diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bdab88c --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +env/ +r_env/ +out*/ +__pycache__/ +.snakemake/ +.vscode/ +.Rproj.user +.Rhistory +.Rprofile +*.html +data/classif_splits/ +data/ner_splits/ +config/ia_access_key.txt +config/ia_secret_key.txt \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0d15a64 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Chan Zuckerberg Initiative Foundation and Global Biodata Coalition + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..56d6904 --- /dev/null +++ b/Makefile @@ -0,0 +1,49 @@ +.PHONY: dryrun, setup, test, train_and_predict, update_inventory + +dryrun_reproduction: + snakemake \ + -s snakemake/train_predict.smk -np \ + --configfile config/train_predict.yml + +setup: + pip install -r requirements.txt + echo "import nltk \nnltk.download('punkt')" | python3 /dev/stdin + pip install --upgrade numpy + Rscript -e 'install.packages("renv"), repos="http://cran.us.r-project.org"' + Rscript -e 'renv::restore()' + +setup_for_updating: + pip install -r requirements.txt + echo "import nltk \nnltk.download('punkt')" | python3 /dev/stdin + pip install --upgrade numpy + +test: + python3 -m pytest -v \ + --flake8 --mypy --pylint \ + --pylint-rcfile=config/.pylintrc \ + src/inventory_utils/*.py \ + src/*.py \ + +train_and_predict: + snakemake \ + -s snakemake/train_predict.smk \ + --configfile config/train_predict.yml -c1 + +process_manually_reviewed_original: + snakemake \ + -s snakemake/train_predict.smk \ + --configfile config/train_predict.yml \ + -c 1 \ + --until all_analysis + +update_inventory: + snakemake \ + -s snakemake/update_inventory.smk \ + --configfile config/update_inventory.yml -c1 + +process_manually_reviewed_update: + snakemake \ + -s snakemake/update_inventory.smk \ + --configfile config/update_inventory.yml \ + -c 1 \ + --until process_countries \ No newline at end of file diff --git a/README.md b/README.md index 82ffddb..b2ede8b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,359 @@ -# inventory_2022 -Public repository for the biodata resource inventory performed in 2022. +# GBC Inventory 2022 + +This code repository represents work done as part of a collaborative effort between the Chan Zuckerberg Initiative (CZI) and Global Biodata Coalition (GBC) to create an inventory of biodata resources found in scientific articles. CZI Research Scientist Ana-Maria Istrate designed the machine learning framework for the project and wrote the code to implement and evaluate the NLP models used to classify articles and extract individual resources. Ana’s code was used by GBC consultant Ken Schackart as the starting point for a pipeline to create an ML-predicted preliminary inventory, which is then further refined with code that includes steps for deduplication, processing for selective manual review, and augmentation with additional attributes to create the final inventory of biodata resources. + +## Motivation + +GBC initiated this project with the objective of gaining an understanding of the global infrastructure of biological data resources. While registries of data resources exist (such as [re3data](https://www.re3data.org/) and [FAIRsharing](https://fairsharing.org/)), their scopes are different from that intended by GBC. So this project was initiated to create an inventory of the blobal biodata resource infrastructure using methodologies that are reproducible so the inventory could be periodically updated. + +## Overview of methods + +EuropePMC is queried to obtain titles and abstracts of scientific articles. A BERT model is used to classify those articles as describing or not describing a biodata resource. A BERT model is also used to perform named entity recoginition to extract the resource name for those articles that are predicted to describe a biodata resource. Resource URLs are extracted using a regular expression. + +This initial collection of articles is automatically deduplicated, and marked for selective manual review. A person manually reviews articles that are potential resource duplicates or false positives. + +The manually reviewed inventory is processed to further deduplicate the inventory and remove false positives. The HTTP statuses of the extracted URLs is checked, the IP addresses of the URLs are geolocated, and archived versions of the URLs are checked in the Internet Archive's WayBack Machine. Further article metadata is obtained from EruopePMC. + +The final inventory gives a list of biodata resources, the PMIDs of the articles describing those resources, and the metadata described above. + +Snakemake is used as a workflow manager to automate these processes. + +## Inteded uses + +The code and pipelines here have been designed with a few intended use cases: + +- **Reproduction**: We intend that the results of this study are directly reproducible using the pipelines presented here. That includes fine-tuning the models on the manually curated datasets, selecting the best model, using the models for prediction, and all downstream processes. + +- **Updating**: It should be possible to get an updated inventory with minimal changes to the code. A pipeline was developed that allows the user to provide a new publication date range, and then the fine-tuned models are used to process the new data to yield an updated list of resources and their associated metadata. + +- **Generalization**: With some extra work, it should be possible for a future user to manually curate new training data, and use the existing pipelines to finetune the models and perform all downstream analysis. We note that while much of the existing code would be useful, some changes to the code are likely in this case. + +To help with the usability of this code, it has been tested on Google Colab. If a user would like to run the code on Colab, [this protocol](https://dx.doi.org/10.17504/protocols.io.5jyl89o36v2w/v3) provides instructions on how to set up colab and clone this project there. Note that Google and GitHub accounts are required to follow those instructions. + +# Workflow overview + +## Data curation + +The manual curation has already been performed, using the full corpus obtained by querying EuropePMC. Titles and abstracts from ~1600 randomly selected papers were used for manual classification. This created the classifier training set. For those papers that were deemed to represent a biodata resource during manual curation, named entities were manually extracted from titles and abstracts, such as the reource name, URL, and description. This created the NER model training set. + +```mermaid +graph TD + query(EuropePMC Query) --> corpus[(Full corpus)] + corpus -- random subset--> manclass(Manual Classification); + manclass -- Not Biodata Resource --> neg[Negative] + manclass -- Biodata Resource --> pos[Positive] + neg --> classtrain[(Classifier training set)] + pos --> classtrain + pos --> ner(Manual named entity extraction) + ner -- Common Name --> nertrain[(NER training set)] + ner -- Full Name --> nertrain +``` + +## Classifier Training + +The manually classified subset of the corpus is split into training, validation, and test (holdout) sets. Several pretrained BERT models are provided with the same training and validation data. The final classifier model is chosen based on the highest *F*1 score on the validation set. This is the classifier used in the final inventory. Final model performance is evaluated on the held-out test set. + +```mermaid +graph TD + classset[(Classifier training set)] + classset --> split(Data Splitting) + split --> train[(train)] + split --> val[(val)] + split --> test[(test)] + subgraph Training + train --> trainer + val --> trainer + models[Pretrained models] --> trainer(training and selection) + trainer -- best model --> classifier{{Classifier}} + end + test ----> eval(Evaluation) + classifier --> eval + +``` + +## NER Model training + +The set of manually extracted named entities is split into training, validation, and test (holdout) sets. Several pretrained BERT models are provided with the same training and validation data. The final NER model is chosen based on the highest *F*1 score on the validation set. This is the NER model used in the final inventory. Final model performance is evaluated on the held-out test set. + +```mermaid +graph TD + nerset[(NER training set)] + nerset --> split(Data Splitting) + split --> train[(train)] + split --> val[(val)] + split --> test[(test)] + subgraph Training + train --> trainer + val --> trainer + models[Pretrained models] --> trainer(training and selection) + trainer -- best model --> ner{{NER Model}} + end + test ----> eval(Evaluation) + ner --> eval +``` + +## Automated Inventory Generation + +Once the classifier and NER models have been trained and selected, they are applied to the full corpus. Those papers that are classified as biodata resource by the trained classifier are passed to the trained NER model for extracting attributes of the resource such as resource common name and full name. Resource URLs are extracted using a regular expression + +The predicted resources are automatically deduplicated (when the name and URL are the same), and the IDs of all articles describing resources are maintained. + +The automatically generated inventory that has been deduplicated is flagged for selective manual review. Articles that share either the resource name or URL are marked as potential duplicates. Articles with low predicted name probability are maked for review. + +```mermaid +graph TD + corpus[(Full corpus)] + corpus --> classifier{{Classifier}} + classifier --> neg[Negative] + classifier --> pos[Positive] + pos --> ner{{NER Model}} + pos --> regex(regex) + ner -- name --> attr[Resource Descriptions] + regex -- URL --> attr + attr --> dedup(Initial Deduplication) + dedup --> flag(Flagging for Selective Review) + flag --> auto_inv[Automatically Generated Inventory] +``` + +## Selective Manual Review + +The process up to this point is run without human intervention. As a quality control measure, the inventory must be manually reviewed for articles that are potentially duplicate descriptions of a common resource, or potential false positives based on a low name probability score. + +During manual review, the inventory is annotated to determine which potential duplicates should be merged, and which low-probability articles should be removed. Instructions for this process are available on Zenodo ([doi: 10.5281/zenodo.7768363](https://doi.org/10.5281/zenodo.7768363)) + +## Final Processing + +Once the automatically generated inventory has been manually reviewed, the directions given during manual review are exectued (further deduplication, removal of false resources). Further metadata is obtained from this finalized list of resources. HTTP status of associated URLs is assessed. Various APIs are queried to geolocated the IP address associated with the URLs. EuropePMC is queried to gather metadata on the articles describing the resources, such as the authors, author affiliations, funding agencies, grant IDs, and number of citations. The affiliations are parsed to extract the countries that are mentioned in the affiliations. + +```mermaid +graph TD + manual[Manually Reviewed Inventory] + manual --> rev_process(Review Processing) + rev_process --> final_list[Final List of Resources] + final_list -- names --> final[Final Inventory] + final_list -- PMIDs --> epmc(EuropePMC) + final_list -- URL --> check(HTTP Check) + final_list -- URL --> wayback(WayBack Machine) + check -- URL status --> final + final_list -- URL --> ip_check(IP APIs) + ip_check -- IP location --> final + wayback -- Archived URL --> final + epmc -- article metadata --> final + epmc -- affiliations --> affil_parse(Parsing) + affil_parse -- affiliation countries --> final +``` + +## Final Inventory Output + +The finalized inventory has the following columns: + +Column | Type | Description +:----: | :--: | ----------- +ID | list(integer) | PMIDs associated with resource. IF multiple, they are separated by a comma and a space +best_name | string | Predicted name with highest probability +best_name_prob | float | Probability associated with the best name (out of 1.0) +best_common | string | Predicted common name with highest probability +best_common_prob | float | Probability associated with the best common name (out of 1.0) +best_full | string | Predicted full name with highest probability +best_full_prob | float | Probability associated with the best full name (out of 1.0) +extracted_url | string | URL(s) extracted from text +extracted_url_status | integer OR string | URL HTTP status code, or error string if an exception occured while requesting URL +extracted_url_country | string | Country code of IP address based on extracted URL, when available +extracted_url_coordinates | string | Country code of IP address based on extracted URL, when available. Formatted as (latitude, longitude) +wayback_url | string | Internet Archive's WayBack Machine's archived version of URL, when available +publication_date | string | Date of initial publication of newest article describing resource. Formatted as YYYY-MM-DD +affiliation | list(string) | Affiliation information from EuropePMC. Affiliation information from individual articles are joined with a space and a comma +authors | list(string) | Authors from EuropePMC. Author lists from individual articles are joined with a space and a comma. +grant_ids| list(string) | Grant IDs from EuropePMC. Author lists from individual articles are joined with a space and a comma. +grant_agencies | list(string) | Grant agencies from EuropePMC. Author lists from individual articles are joined with a space and a comma. +num_citations | integer | Number of citations for papers describing the resource +affiliation_countries | list(string) | Country codes of countries mentioned in affiliations + +# Repository Structure + +```sh +. +├── config/ # Workflow configuration files +├── data/ # Manual curation files and data splits +├── snakemake/ # Snakemake pipelines and rules +├── src/ # Python scripts and modules +├── .gitignore +├── LICENSE +├── Makefile # Make targets for easy running of steps +├── README.md +├── requirements.txt +├── running_pipeline.ipynb +└── updating_inventory.ipynb +``` + +# Systems + +The code for this project was developed using [WSL2](https://learn.microsoft.com/en-us/windows/wsl/install) connected to an [Ubuntu 20.04](https://releases.ubuntu.com/focal/) kernel. It has also been run on [Google Colaboratory](https://colab.research.google.com/). Compatibility to other systems may vary. In particular, certain functionality (like GNU Make) may not work on Windows. + +If you would like to run the code on a Windows machine, we recommend using WSL2. [This protocol](https://www.protocols.io/view/install-wsl-and-vscode-on-windows-10-q26g78e1klwz/v1) may be helpful for getting that set up. + +# Installation + +## Pip + +If installing with pip, ensure you have Python version 3.8. Older or newer versions may not work. + +```sh +$ python3 --version +Python 3.8.12 +``` + +Then you can install Python dependencies using pip. + +Additionally, ensure that R is installed. R version 3.6.3 was originally used, but newer versions should work. + +```sh +$ Rscript --version +R scripting front-end version 3.6.3 +``` + +If R is not installed, [install it](https://cran.r-project.org/) before running the following command. + +A make command is available for installing all other dependencies. + +```sh +$ make setup +``` + +Alternatively, to install them manually: + +```sh +$ pip install -r requirements.txt +``` + +Then download punkt: + +```python +$ python3 +>>> import nltk +>>> nltk.download('punkt') +``` + +## Anaconda + +To create the environment in your `$HOME` directory, run: +```sh +$ conda env create -f config/environment.yml +$ conda activate inventory_env +``` + +Or you can create the environment in this repository by running: +```sh +$ conda env create -f config/environment.yml -p ./env +$ conda activate ./env +``` + +Then download punkt: + +```python +$ python3 +>>> import nltk +>>> nltk.download('punkt') +``` + +# Allow for execution + +To avoid file permission problems, run the following (on Linux) to allow for exeuction of the scripts: + +```sh +$ chmod +x src/*.py analysis/*.py analysis/*.R +``` + +# Running Tests + +A full test suite is included to help ensure that everything is running as expected. To run the full test suite, run: + +```sh +$ make test +``` + +# Running the workflow + +## Dry run + +To see what steps would be run in the workflow, a dry run can be run: +```sh +$ make dryrun_reproduction +``` + +## Reproducing original results + +To run the pipeline from a notebook in Colab, follow the steps in [running_pipeline.ipynb](running_pipeline.ipynb). + +Alternatively, to run the pipeline from the command-line, run: +```sh +$ make train_and_predict +``` + +If Make is unavailable, run +```sh +$ snakemake -s snakemake/train_predict.smk --configfile config/train_predict.yml -c1 +``` + +The above commands run the Snakemake pipeline. If you wish to run the steps manually, see [src/README.md](src/README.md#training-and-prediction). + +## Updating the inventory + +Before running the automated pipelines, first update the configuration file [config/update_inventory.yml](config/update_inventory.yml): + +* **Europe PMC query publication date range**: These are stored as variables `query_from_date` and `query_to_date` in that file. Note that the dates are inclusive. For example to get papers published in 2022, both of those varibles should be 2022. +* **Previous inventory file**: During strict deduplication and flagging for manual review, the results of the previous inventory are taken into account. Specify the location of the most recent inventory output file in the variable `previous_inventory`. + +To run the pipeline from a notebook in Colab, follow the steps in [updating_inventory.ipynb](updating_inventory.ipynb). To run from the command line, follow these steps. + +First, make sure that the files specifying the best trained classifier and NER models are present at `out/classif_train_out/best/best_checkpt.txt` and `out/ner_train_out/best/best_checkpt.txt`. Those files specify which checkpoints to use. Check that the checkpoints those files point to are on your system. + +If you do not have the trained models, and do not want to perform training, they can be downloaded with: +```sh +# Add code here for getting models! +``` + +Next, **make sure that output from previous updates have been saved elsewhere, as the old results must be deleted**. For example + +```sh +$ mv out/new_query out/update_2022 +``` + + +To remove the outputs of previous run: +```sh +$ rm -rf out/new_query +``` + +Then the pipeline for updating results can be run: +```sh +$ make update_inventory +``` + +If Make is unavailable, run +```sh +$ snakemake -s snakemake/update_inventory.smk --configfile config/update_inventory.yml -c1 +``` + +The above commands run the Snakemake pipeline. If you wish to run the steps manually, see [src/README.md](src/README.md#updating-the-inventory). + +## Adjusting configurations + +The Snakemake pipelines are built such that they capture the workflow logic, while all configurations are stored separately. This makes it possible to adjust the workflows without changing source code or the Snakemake pipelines. + +Configurations for reproducing original results are in [config/train_predict.yml](config/train_predict.yml) such as train/validation/split ratios and output directories. Configurations for updating the inventory are in [config/update_inventory.yml](config/update_inventory.yml). + +Configurations regarding model training parameters are stored in [config/models_info.tsv](config/models_info.tsv), such as number of epochs, and convenient model names as well as official HuggingFace model names. + +The EuropePMC query string is stored in [config/query.txt](config/query.txt). + +# Associated publications + +The primary article of the biodata resource inventory can be found at https://doi.org/10.5281/zenodo.7768416. +A case study describing the efforts taken to make this project reproducible and to uphold code and data standards can be found at https://doi.org/10.5281/zenodo.7767794. + +# Authorship + +* [Dr. Heidi Imker](hjimker@gmail.com), Global Biodata Coalition +* [Dr. Kenneth Schackart](schackartk1@gmail.com), Global Biodata Coalition +* [Ana-Maria Istrate](aistrate@chanzuckerberg.com), Chan Zuckerberg Initiative diff --git a/analysis/README.md b/analysis/README.md new file mode 100644 index 0000000..4e6d8c9 --- /dev/null +++ b/analysis/README.md @@ -0,0 +1,93 @@ +# Data Analysis + +This directory contains R scripts for some analysis of the inventory conducted in 2022. They are stored here rather than [src](../src/) since their reuse is likely limited and is strictly related to analysis. However, these scripts are used in the [train and predict Snakemake pipeline](../snakemake/train_predict.smk). + +```sh +. +├── comparison.R # Retrieve life sci resources from FAIRsharing and re3data +├── epmc_metadata.R # Retrieve ePMC metadata to determine OA, full text, etc. +├── funders.R # Analyse funder metadata by article and biodata resource +├── funders_geo.R # Analyse top 200 funders by country +├── location_information.R # Generate maps of resource location metadata +├── metadata_analysis.R # Perform high-level metadata analysis +└── performance_metrics.R # Create plots and tables of model performances +``` + +All R scripts are command-line executable and take output files from the inventory as inputs for analysis. Usage statements are available through the `-h|--help` flag. + +## `location_information.R` + +The final inventory file is supplied as input, output directory is specified with `-o|--out-dir`, and 3 maps are generated: + +* `ip_coordinates.png`: IP host coordinates dot plot +* `ip_countries.png`: IP host countries heatmap, with country fill color scaled to country name count +* `author_countries.png`: Author affiliation countries heatmap, with country fill color scaled to country name count + +## `metadata_analysis.R` + +The final inventory file is supplied as input, and various metadata statistics are output to stdout. To easily save the output of this, simply redirect (`>`) the output to a file. For example, running from the root of the repository: + +```sh +$ Rscript analysis/metadata_analysis.R \ + data/final_inventory_2022.csv \ + > analysis/analysed_metadata.txt +``` + +In this case, no output will be seen in the terminal, but the output will be present in `analysis/analysed_metadata.txt`. + +Information included in this analysis: + +* Number of unique articles +* Number of resources with at least 1 URL returning 2XX or 3XX +* Number of resources with at least 1 WayBack URL +* Number of resources with grant agency data + +## `performance_metrics.R` + +This script conducts analysis on the model performance metrics on the validation and test sets., Output directory is specified with `-o|--out-dir`. Four files are needed as input: + +* `-cv|--class-train`: Classification training and validation set statistics +* `-ct|--class-test`: Classification test set statistics +* `-nv|--ner-train`: NER training and validation set statistics +* `-nt|--ner-test`: NER test set statistics + +The defaults for these arguments are the files stored in the repository, which is the results of the inventory conducted in 2022. + +Six files are output: + +* `class_val_set_performance.svg` and `class_val_set_performance.png`: Bar chart showing the performance of all article classification models on the validation set. Metrics include *F*1-score, precision, and recall. Models are in decreasing order of precision. +* `ner_val_set_performance.svg` and `ner_val_set_performance.png`: Bar chart showing the performance of all NER models on the validation set. Metrics include *F*1-score, precision, and recall. Models are in decreasing order of *F*1-score. +* `combined_classification_table.docx`: A Microsoft Word doc with a table showing the performance of all article classification models on the validation and test sets. Models are in decreasing order of precision on the validation set. +* `combined_ner_table.docx`: A Microsoft Word doc with a table showing the performance of all NER models on the validation and test sets. Models are in decreasing order of *F*1-score on the validation set. + +## `epmc_metadata.R` + +The final inventory file is supplied as input and the Europe PMC API is queried to determined if the article has a CC license, is open access, has full text available, has text mined term, and has text mined accession numbers. Note that all but full text are found by querying the PMIDs found in the final inventory file; for full text, the original query was restricted to return only those as OA and having full text availability for the entire corpus and then those PMIDs were matched against the PMIDs found in the final inventory. + +1 file is output: +* `text_mining_potential.csv`: A summary table of article counts (Y (Yes) or N (No)) and percentages + +## `comparison.R` + +Inputs are retrieved by querying the records available from the re3data.org API and the FAIRsharing API. Returns are filtered to life science resources and then compared resources identified in the final inventory. The resources in these two repositories are compared against one another and the inventory to get a sense of the overlap. + +2 files are output: + +* `inventory_re3data_fairsharing_summary.csv`: Number of overlapping resources in the inventory, re3data, and FAIRsharing. +* `venn_diagram_set.csv`: Intersection set sizes between resources in the inventory, re3data, and FAIRsharing. + +## `funders.R` + +The final inventory file is supplied as input and the Europe PMC API is queried to retrieve "agency" metadata from individual articles (note that biodata resources in the inventory have concatenated "grantID" and "agency" values for resources with >1 article). This scripts retrieves "agency" for each article, when present, to analyze the supporting funding organizations identified. + +1 file is output: +* `inventory_funders.csv`: Deduplicated funder names with total unique article count, total unique biodata resource count, associated article PMIDs (list) and associated biodata resources (list). + +## `funders_geo.R` + +The output file from funders.R (inventory_funders_2023-01-20.csv) was manually curated to determine countries for funders mentioned >2 times and mapped to ISO.3166-1.alpha-3 country codes. The resulting file, funders_geo_200.csv, is used as the input for this script which groups by unique country to get summary statistics. Note that for agency names, there is some ambiguity via either unclear parent-child relationships (e.g. NIH vs. NIGMS) or inconsistent naming (e.g. National Key Research and Development Program vs. National Key Research Program of China). + +2 files are output: +* `funders_geo_counts.csv`: By country summary with count unique agency names, count unique biodata resources, agency names (list) and biodata resource names (list). +* `funder_countries.png`: A (heat)map showing the number of biodata resources that were funded by +at least one agency from a given country. diff --git a/analysis/comparison.R b/analysis/comparison.R new file mode 100644 index 0000000..6f2518e --- /dev/null +++ b/analysis/comparison.R @@ -0,0 +1,561 @@ +#!/usr/bin/env Rscript + +# Author : Heidi Imker +# Kenneth Schackart +# Date : 2023-01-19 +# Purpose: Extract records for biodata resources from re3data and FAIRsharing +# APIs and compare with biodata resources found in GBC inventory +# Notes : +# re3data.org: correct schema (2.2) is here: +# https://gfzpublic.gfz-potsdam.de/pubman/faces/ViewItemOverviewPage.jsp?itemId=item_758898 +# https://www.re3data.org/api/doc +# Scripts found at: +# https://github.com/re3data/using_the_re3data_API/blob/main/re3data_API_certification_by_type.ipynb +# FAIRsharing: data is under CC-BY-SA Don't push any output files to Github! +# Run FAIRsharing login credential script first to obtain "hji_login" argument for the below. +# For rest, see API documentation on +# https://fairsharing.org/API_doc +# and +# https://api.fairsharing.org/model/database_schema.json + +# Imports ------------------------------------------------------------------- + +## Library calls ------------------------------------------------------------ + +library(dplyr) +library(glue) +library(httr) +library(jsonlite) +library(magrittr) +library(readr) +library(stringr) +library(tibble) +library(tidyr) +library(xml2) + +# Function Definitions ------------------------------------------------------ + +## get_args ----------------------------------------------------------------- + +#' Parse command-line arguments +#' +#' @return args list with input filenames +get_args <- function() { + parser <- argparse::ArgumentParser() + + parser$add_argument( + "inventory_file", + help = "Final inventory file", + metavar = "FILE", + type = "character", + default = "data/final_inventory_2022.csv" + ) + parser$add_argument( + "-c", + "--credentials", + help = "FAIRsharing login credentials file", + metavar = "JSON", + type = "character" + ) + parser$add_argument( + "-o", + "--out-dir", + help = "Output directory", + metavar = "DIR", + type = "character", + default = "analysis/figures" + ) + + args <- parser$parse_args() + + return(args) +} + +## extract_repository_info --------------------------------------------------- + +#' Extract re3data repository information +#' +#' @param metadata Repository metadata (XML) +#' +#' @return List of repository metadata +extract_repository_info <- function(metadata) { + metadata_list <- list( + re3data_ID = xml_text(xml_find_all( + metadata, "//r3d:re3data.orgIdentifier" + )), + type = paste(unique(xml_text( + xml_find_all(metadata, "//r3d:type") + )), collapse = "_AND_"), + repositoryURL = paste(unique(xml_text( + xml_find_all(metadata, "//r3d:repositoryURL") + )), collapse = "_AND_"), + repositoryName = paste(unique(xml_text( + xml_find_all(metadata, "//r3d:repositoryName") + )), collapse = "_AND_"), + subject = paste(unique(xml_text( + xml_find_all(metadata, "//r3d:subject") + )), collapse = "_AND_") + ) + + return(metadata_list) +} + +## extract_re3data_info ---------------------------------------------------- + +#' Extract re3data return information +#' +#' @param re3data_return Return from re3data +#' +#' @return dataframe of re3data repositories +extract_re3data_info <- function(re3data_return) { + repositories <- data.frame(matrix(ncol = 12, nrow = 0)) + colnames(repositories) <- + c("re3data_ID", + "repositoryName", + "repositoryURL", + "subject", + "type") + + for (url in re3data_return) { + repository_metadata_request <- GET(url) + + repository_metadata_XML <- read_xml(repository_metadata_request) + + results_list <- extract_repository_info(repository_metadata_XML) + + repositories <- rbind(repositories, results_list) + } + + return(repositories) +} + +## filter_re3data_contents -------------------------------------------------- + +#' Filter the contents of re3data return to only include life science +#' and not "institutional" or "other" to be consistent with FAIRsharing +#' +#' @param df re3data contents dataframe +#' +#' @return dataframe with only life science repositories +filter_re3data_contents <- function(df) { + life_sci_re3data <- df %>% + filter(grepl("Life", subject), + type != "institutional", + type != "other") + + return(life_sci_re3data) +} + +## get_re3data_contents ----------------------------------------------------- + +#' Get contents of re3data +#' +#' @return list of urls from re3data query +get_re3data_contents <- function() { + re3data_request <- GET("http://re3data.org/api/v1/repositories") + re3data_IDs <- + xml_text(xml_find_all(read_xml(re3data_request), xpath = "//id")) + URLs <- + paste("https://www.re3data.org/api/v1/repository/", + re3data_IDs, + sep = "") + + return(URLs) +} + +## login_fairsharing -------------------------------------------------------- + +#' Login to FAIRsharing and get session token +#' +#' @param credentials_file FAIRsharing login credentials file +#' +#' @return session JSON web token to access API +login_fairsharing <- function(credentials_file) { + fair_login_url <- 'https://api.fairsharing.org/users/sign_in' + + response <- POST( + fair_login_url, + add_headers("Content-Type" = "application/json", + "Accept" = "application/json"), + body = upload_file(credentials_file) + ) + content <- fromJSON(rawToChar(response$content)) + token <- con$jwt + + return(token) +} + +## extract_fairsharing_info ------------------------------------------------- + +#' Extract repository information from FAIRsharing return list +#' +#' @param fairsharing_return List of return from FAIRsharing +#' +#' @return dataframe of extracted information +extract_fairsharing_info <- function(fairsharing_return) { + dois <- + fairsharing_return[["data"]][["attributes"]][["metadata"]][["doi"]] + names <- + fairsharing_return[["data"]][["attributes"]][["metadata"]][["name"]] + homepages <- + fairsharing_return[["data"]][["attributes"]][["metadata"]][["homepage"]] + subjects <- + as_tibble_col(fairsharing_return[["data"]][["attributes"]][["subjects"]]) + + fairsharing_repos <- + tibble( + "doi" = dois, + "name" = names, + "homepage" = homepages, + "subjects" = subjects$value + ) + + return(fairsharing_repos) +} + +## get_fairsharing_contents ------------------------------------------------- + +#' Get contents of FAIRsharing life science contents +#' +#' @note The request from FAIRsharing sometimes times out. Keep trying. +#' +#' @param token session JSON web token to access API +#' +#' @return session JSON web token to access API +get_fairsharing_contents <- function(token) { + query_url <- + paste0( + "https://api.fairsharing.org/search/fairsharing_records?", + "fairsharing_registry=database&subjects=life%20science", + "&page[number]=1&page[size]=3600" + ) + + response <- POST( + query_url, + add_headers( + "Content-Type" = "application/json", + "Accept" = "application/json", + "Authorization" = paste0("Bearer ", token) + ) + ) + + query_return <- fromJSON(rawToChar(response$content)) + + return(query_return) +} + +## clean_re3data ------------------------------------------------------------ + +#' Clean re3data fields +#' +#' @param df re3data repositories dataframe +#' +#' @return cleaned dataframe +clean_re3data <- function(df) { + df %>% + select(re3data_ID, repositoryName, repositoryURL) %>% + rename("r3_id" = "re3data_ID", + "r3_name" = "repositoryName", + "r3_url" = "repositoryURL") %>% + mutate(across(where(is.character), str_trim)) %>% + drop_na(r3_url) %>% + mutate( + r3_url = str_remove(r3_url, "^https?://(www.)?"), + r3_url = str_remove(r3_url, "/$"), + r3_url = str_to_lower(r3_url) + ) +} + +## clean_fairsharing ------------------------------------------------------- + +#' Clean FAIRsharing fields +#' +#' @param df FAIRsharing repositories dataframe +#' +#' @return cleaned dataframe +clean_fairsharing <- function(df) { + df %>% + select(doi, name, homepage) %>% + rename("fs_id" = "doi", + "fs_name" = "name", + "fs_url" = "homepage") %>% + mutate(across(where(is.character), str_trim)) %>% + drop_na(fs_url) %>% + mutate( + fs_url = str_remove(fs_url, "^https?://(www.)?"), + fs_url = str_remove(fs_url, "/$"), + fs_url = str_to_lower(fs_url) + ) +} + +## clean_inventory ------------------------------------------------------- + +#' Clean biodata inventory +#' +#' @param df Inventory dataframe +#' +#' @return cleaned dataframe +clean_inventory <- function(df) { + ## note that 2 URLs extracted in inventory for ~5% of inventory resources + ## - testing for matches on first URL only + df %>% + select(ID, best_name, best_common, best_full, extracted_url) %>% + rename( + "inv_id" = "ID", + "inv_name" = "best_name", + "inv_comm_name" = "best_common", + "inv_full_name" = "best_full", + "inv_url" = "extracted_url" + ) %>% + mutate(across(where(is.character), str_trim)) %>% + mutate( + inv_url = str_remove(inv_url, ",.*$"), + inv_url = str_remove(inv_url, "^https?://(www.)?"), + inv_url = str_remove(inv_url, "/$"), + inv_url = str_to_lower(inv_url) + ) +} + +# Main ---------------------------------------------------------------------- + +## Parse arguments ---------------------------------------------------------- + +args <- get_args() + +credentials_file <- args$credentials + +inventory <- + read_csv(args$inventory_file, + show_col_types = FALSE) + +out_dir <- args$out_dir + +## Query APIs --------------------------------------------------------------- + +### re3data ----------------------------------------------------------------- + +re3data_return <- get_re3data_contents() + +re3data_repos_all <- extract_re3data_info(re3data_return) + +re3data_repos <- filter_re3data_contents(re3data_repos_all) + +### FAIRsharing ------------------------------------------------------------- + +fairsharing_token <- login_fairsharing(credentials_file) + +fairsharing_return <- get_fairsharing_contents(fairsharing_token) + +fairsharing_repos <- extract_fairsharing_info(fairsharing_return) + +## Clean data --------------------------------------------------------------- + +re3data_cleaned <- clean_re3data(re3data_repos) + +fairsharing_cleaned <- clean_fairsharing(fairsharing_repos) + +inventory_cleaned <- clean_inventory(inventory) + +## Analysis ------------------------------------------------------------------ + +summary <- tibble( + inventory = logical(), + re3data = logical(), + fairsharing = logical(), + names_shared = numeric(), + urls_shared = numeric(), + total_matches = numeric() +) + +### inventory and re3data ---------------------------------------------------- + +same_comm_name_inv_re3 <- + inner_join(inventory_cleaned, + re3data_cleaned, + by = c("inv_comm_name" = "r3_name")) +same_full_name_inv_re3 <- + inner_join(inventory_cleaned, + re3data_cleaned, + by = c("inv_full_name" = "r3_name")) + +same_name_inv_re3 <- tibble( + names_found_in_re3 = + c( + same_comm_name_inv_re3$inv_comm_name, + same_full_name_inv_re3$inv_full_name + ) +) %>% + distinct(names_found_in_re3) + +same_url_inv_re3 <- + inner_join(inventory_cleaned, re3data_cleaned, by = c("inv_url" = "r3_url")) + +unique_inv_re3 <- tibble( + unique_inv_re3 = c( + same_comm_name_inv_re3$inv_name, + same_full_name_inv_re3$inv_name, + same_url_inv_re3$inv_name + ) +) %>% + distinct(unique_inv_re3) + + +res <- tibble( + inventory = T, + re3data = T, + fairsharing = F, + names_shared = nrow(same_name_inv_re3), + urls_shared = nrow(same_url_inv_re3), + total_matches = nrow(unique_inv_re3) +) + +summary <- summary %>% + rbind(res) + +rm(same_comm_name_inv_re3, + same_full_name_inv_re3, + res) + +### inventory and FAIRsharing ------------------------------------------------ + +same_comm_name_inv_fs <- + inner_join(inventory_cleaned, + fairsharing_cleaned, + by = c("inv_comm_name" = "fs_name")) +same_full_name_inv_fs <- + inner_join(inventory_cleaned, + fairsharing_cleaned, + by = c("inv_full_name" = "fs_name")) + +same_name_inv_fs <- tibble( + names_found_in_fs = + c( + same_comm_name_inv_fs$inv_comm_name, + same_full_name_inv_fs$inv_full_name + ) +) %>% + distinct(names_found_in_fs) + +same_url_inv_fs <- + inner_join(inventory_cleaned, + fairsharing_cleaned, + by = c("inv_url" = "fs_url")) + +unique_inv_fs <- tibble( + unique_inv_fs = c( + same_comm_name_inv_fs$inv_name, + same_full_name_inv_fs$inv_name, + same_url_inv_fs$inv_name + ) +) %>% + distinct(unique_inv_fs) + +res <- tibble( + inventory = T, + re3data = F, + fairsharing = T, + names_shared = nrow(same_name_inv_fs), + urls_shared = nrow(same_url_inv_fs), + total_matches = nrow(unique_inv_fs) +) + +summary <- summary %>% + rbind(res) + +rm(same_comm_name_inv_fs, + same_full_name_inv_fs, + res) + +### re3data and FAIRsharing -------------------------------------------------- + +same_name_re3_fs <- + inner_join(re3data_cleaned, + fairsharing_cleaned, + by = c("r3_name" = "fs_name")) + +same_url_re3_fs <- + inner_join(re3data_cleaned, fairsharing_cleaned, by = c("r3_url" = "fs_url")) + +unique_re3_fs <- tibble(unique_re3_fs = c(same_name_re3_fs$r3_name, + same_url_re3_fs$r3_name)) %>% + distinct(unique_re3_fs) + +res <- tibble( + inventory = F, + re3data = T, + fairsharing = T, + names_shared = nrow(same_name_re3_fs), + urls_shared = nrow(same_url_re3_fs), + total_matches = nrow(unique_re3_fs) +) + +summary <- summary %>% + rbind(res) + +rm(res) + +### inventory and re3data and FAIRsharing ----------------------------------- + +same_name_inv_re3_fs <- + inner_join( + same_name_inv_re3, + same_name_inv_fs, + by = c("names_found_in_re3" = "names_found_in_fs") + ) + +same_url_inv_re3_fs <- + inner_join(same_url_inv_re3, + same_url_inv_fs, by = c("inv_url" = "inv_url")) %>% + distinct(inv_url, .keep_all = T) + +unique_inv_re3_fs <- tibble( + unique_inv_re3_fs = c( + same_name_inv_re3_fs$names_found_in_re3, + same_url_inv_re3_fs$inv_name.x + ) +) %>% + distinct(unique_inv_re3_fs) + +res <- tibble( + inventory = T, + re3data = T, + fairsharing = T, + names_shared = nrow(same_name_inv_re3_fs), + urls_shared = nrow(same_url_inv_re3_fs), + total_matches = nrow(unique_inv_re3_fs) +) + +summary <- summary %>% + rbind(res) + +rm(res) + +### pivoting for venn diagram ----------------------------------------------- + +venn_df <- summary %>% + select(-names_shared, -urls_shared) %>% + mutate( + combo = case_when( + inventory & re3data & !fairsharing ~ "inv_re3", + inventory & !re3data & fairsharing ~ "inv_fs", + !inventory & + re3data & fairsharing ~ "re3_fs", + inventory & re3data & fairsharing ~ "inv_re3_fs", + ) + ) %>% + select(combo, total_matches) %>% + pivot_wider(names_from = combo, values_from = total_matches) %>% + mutate( + inv_re3 = inv_re3 - inv_re3_fs, + inv_fs = inv_fs - inv_re3_fs, + re3_fs = re3_fs - inv_re3_fs, + inv = nrow(inventory_cleaned) - (inv_re3 + inv_fs + inv_re3_fs), + re3 = nrow(re3data_cleaned) - (inv_re3 + re3_fs + inv_re3_fs), + fs = nrow(fairsharing_cleaned) - (inv_fs + re3_fs + inv_re3_fs) + ) + +## Outputs ------------------------------------------------------------------ + +write_csv(summary, + file.path(out_dir, "inventory_re3data_fairsharing_summary.csv")) +write_csv(venn_df, file.path(out_dir, "venn_diagram_sets.csv")) diff --git a/analysis/epmc_metadata.R b/analysis/epmc_metadata.R new file mode 100644 index 0000000..82655a8 --- /dev/null +++ b/analysis/epmc_metadata.R @@ -0,0 +1,548 @@ +#!/usr/bin/env Rscript + +# Author : Heidi Imker +# Kenneth Schackart +# Date : 2022-12-27 +# Purpose: Determine which articles associated with the biodata resource +# inventory are Open Access, have full text available, +# have text-mined terms, etc. + +# Imports ------------------------------------------------------------------- + +## Library calls ------------------------------------------------------------ + +library(argparse) +library(dplyr) +library(europepmc) +library(ggplot2) +library(magrittr) +library(RColorBrewer) +library(readr) +library(scales) +library(stringr) +library(tidyr) + +# Settings ------------------------------------------------------------------ + +theme_set(theme_light() + + theme( + plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5) + )) + +# Function Definitions ------------------------------------------------------ + +#' Parse command-line arguments +#' +#' @return args list with input filenames +get_args <- function() { + parser <- argparse::ArgumentParser() + + parser$add_argument( + "inventory_file", + help = "Final inventory file", + metavar = "FILE", + type = "character", + default = "data/final_inventory_2022.csv" + ) + parser$add_argument( + "-q", + "--query", + help = "Original query", + metavar = "FILE", + type = "character", + default = "config/query.txt" + ) + parser$add_argument( + "-o", + "--out-dir", + help = "Output directory", + metavar = "DIR", + type = "character", + default = "analysis/figures" + ) + + args <- parser$parse_args() + + return(args) +} + +#' Get metadata from Europe PMC +#' +#' @param ids list of article IDs +#' +#' @return dataframe with article metadata +get_metadata <- function(ids) { + out_df <- tibble() + + for (id_i in ids) { + epmc_return <- epmc_details(id_i) + metadata <- epmc_return[[1]] + id <- metadata["id"] + open_access <- metadata["isOpenAccess"] + text_mined_terms <- metadata["hasTextMinedTerms"] + tm_accession_nums <- metadata["hasTMAccessionNumbers"] + license <- tryCatch( + metadata["license"], + error = function(cond) { + return(NA) + force(do.next) + } + ) + + article_report <- + cbind(id, + open_access, + text_mined_terms, + tm_accession_nums, + license) + + out_df <- rbind(out_df, article_report) + } + + return(out_df) +} + +#' Add dates to query and restrict to full text and open access +#' +#' @param query original query string +#' +#' @return dataframe with article metadata +modify_query <- function(query) { + # Original query has place holders for date range, fill those in with years + # then add restrictions to full text and open access + query <- str_replace(query, "\\{0\\}", "2011") %>% + str_replace("\\{1\\}", "2021") %>% + paste("AND ((HAS_FT:Y AND OPEN_ACCESS:Y))") + + + return(query) +} + +#' Get IDs in inventory that are open access and full text +#' +#' @param inventory_ids IDs in inventory +#' @param oa_ft_ids IDs that are open access and full text +#' +#' @return dataframe with article metadata +get_oa_ft_inventory <- function(inventory_ids, oa_ft_ids) { + inventory_oa_ft <- inner_join(inventory_ids, oa_ft_ids) + + return(inventory_oa_ft) +} + +# Main ---------------------------------------------------------------------- + +print("Parsing command-line arguments.") + +args <- get_args() + +query_string <- read_file(args$query) + +full_inventory <- + read_csv(args$inventory_file, + show_col_types = FALSE) + +out_dir <- args$out_dir + +## Queries ------------------------------------------------------------------ + +### Metadata from original inventory --------------------------------- + +long_inventory <- full_inventory %>% + rename("id" = "ID") %>% + mutate(resource_num = row_number()) %>% + mutate(id = strsplit(id, ", ")) %>% + unnest(id) %>% + distinct(id, .keep_all = T) + +cat("Getting metadata from Europe PMC... ") + +id_list <- long_inventory$id + +metadata_df <- get_metadata(id_list) + +long_inventory <- full_join(long_inventory, metadata_df) + +resource_metadata <- long_inventory %>% + select(resource_num, + license, + isOpenAccess, + hasTextMinedTerms, + hasTMAccessionNumbers) %>% + mutate(license = case_when(!is.na(license) ~ "cc", + T ~ "no")) %>% + aggregate(. ~ resource_num, ., unique) %>% + mutate( + license = case_when(license == "cc" ~ "cc", + license == "no" ~ "no", + T ~ "both"), + isOpenAccess = case_when(isOpenAccess == "Y" ~ "Y", + isOpenAccess == "N" ~ "N", + T ~ "both"), + hasTextMinedTerms = case_when( + hasTextMinedTerms == "Y" ~ "Y", + hasTextMinedTerms == "N" ~ "N", + T ~ "both" + ), + hasTMAccessionNumbers = case_when( + hasTMAccessionNumbers == "Y" ~ "Y", + hasTMAccessionNumbers == "N" ~ "N", + T ~ "both" + ) + ) + +cat("Done.\n") + +### Open access and full text ----------------------------------------------- + +cat("Querying Europe PMC for articles with full text and open access... ") + +open_full_ids <- + select(epmc_search(query = query_string, limit = 25000), 1) + +cat("Done.\n") + +oa_ft_inventory <- + get_oa_ft_inventory(long_inventory %>% select(id), open_full_ids) + +## Analysis ----------------------------------------------------------------- + +summary <- tibble( + type = character(), + resources_yes = numeric(), + resources_no = numeric(), + resources_mixed = numeric(), + articles_yes = numeric(), + articles_no = numeric() +) + +### Full text availability -------------------------------------------------- + +articles_w_full_text <- nrow(oa_ft_inventory) +articles_wo_full_text <- length(id_list) - nrow(oa_ft_inventory) + +oa_ft_resources <- oa_ft_inventory %>% + mutate(oa_ft = "true") %>% + right_join(long_inventory) %>% + distinct(id, .keep_all = T) %>% + select(id, oa_ft, resource_num) %>% + mutate(oa_ft = case_when(is.na(oa_ft) ~ "false", T ~ oa_ft)) %>% + aggregate(. ~ resource_num, ., unique) %>% + mutate(oa_ft = case_when(oa_ft == "true" ~ "true", + oa_ft == "false" ~ "false", + T ~ "both")) %>% + group_by(oa_ft) %>% + summarize(count = n()) + +ft_resources <- oa_ft_resources %>% + filter(oa_ft == "true") %>% + select(count) +not_ft_resources <- oa_ft_resources %>% + filter(oa_ft == "false") %>% + select(count) +mixed_ft_resources <- oa_ft_resources %>% + filter(oa_ft == "both") %>% + select(count) + +summary <- summary %>% + rbind( + tibble( + type = "Full Text XML Available", + resources_yes = ft_resources$count, + resources_no = not_ft_resources$count, + resources_mixed = mixed_ft_resources$count, + articles_yes = articles_w_full_text, + articles_no = articles_wo_full_text + ) + ) + +rm( + articles_w_full_text, + articles_wo_full_text, + oa_ft_resources, + ft_resources, + not_ft_resources, + mixed_ft_resources +) + + +### License availability -------------------------------------------------- + +article_licenses <- metadata_df %>% + select(license) %>% + mutate(has_license = case_when(!is.na(license) ~ "yes", + T ~ "no")) %>% + group_by(has_license) %>% + summarize(count = n()) + +articles_w_cc_license <- article_licenses %>% + filter(has_license == "yes") %>% + select(count) +articles_wo_cc_license <- article_licenses %>% + filter(has_license == "no") %>% + select(count) + +resource_licenses <- resource_metadata %>% + select(license) %>% + group_by(license) %>% + summarize(count = n()) + +resources_w_cc_license <- resource_licenses %>% + filter(license == "cc") %>% + select(count) +resources_wo_cc_license <- resource_licenses %>% + filter(license == "no") %>% + select(count) +resources_w_mixed_license <- resource_licenses %>% + filter(license == "both") %>% + select(count) + +summary <- summary %>% + rbind( + tibble( + type = "CC Licensed", + resources_yes = resources_w_cc_license$count, + resources_no = resources_wo_cc_license$count, + resources_mixed = resources_w_mixed_license$count, + articles_yes = articles_w_cc_license$count, + articles_no = articles_wo_cc_license$count + ) + ) + +rm( + article_licenses, + articles_w_cc_license, + articles_wo_cc_license, + resource_licenses, + resources_w_cc_license, + resources_wo_cc_license, + resources_w_mixed_license +) + +### Open access ------------------------------------------------------------- + +article_access <- metadata_df %>% + select(isOpenAccess) %>% + group_by(isOpenAccess) %>% + summarize(count = n()) + +open_access_articles <- article_access %>% + filter(isOpenAccess == "Y") %>% + select(count) +not_open_access_articles <- article_access %>% + filter(isOpenAccess == "N") %>% + select(count) + +resource_access <- resource_metadata %>% + select(isOpenAccess) %>% + group_by(isOpenAccess) %>% + summarize(count = n()) + +open_access_resources <- resource_access %>% + filter(isOpenAccess == "Y") %>% + select(count) +not_open_access_resources <- resource_access %>% + filter(isOpenAccess == "N") %>% + select(count) +mixed_access_resources <- resource_access %>% + filter(isOpenAccess == "both") %>% + select(count) + +summary <- summary %>% + rbind( + tibble( + type = "Open Access", + resources_yes = open_access_resources$count, + resources_no = not_open_access_resources$count, + resources_mixed = mixed_access_resources$count, + articles_yes = open_access_articles$count, + articles_no = not_open_access_articles$count + ) + ) + +rm( + article_access, + open_access_articles, + not_open_access_articles, + resource_access, + open_access_resources, + not_open_access_resources, + mixed_access_resources +) + +### Text mined terms -------------------------------------------------------- + +text_mined_terms <- metadata_df %>% + select(hasTextMinedTerms) %>% + group_by(hasTextMinedTerms) %>% + summarize(count = n()) + +has_text_mined_terms <- text_mined_terms %>% + filter(hasTextMinedTerms == "Y") %>% + select(count) +no_text_mined_terms <- text_mined_terms %>% + filter(hasTextMinedTerms == "N") %>% + select(count) + +res_text_mined_terms <- resource_metadata %>% + select(hasTextMinedTerms) %>% + group_by(hasTextMinedTerms) %>% + summarize(count = n()) + +res_has_text_mined_terms <- res_text_mined_terms %>% + filter(hasTextMinedTerms == "Y") %>% + select(count) +res_no_text_mined_terms <- res_text_mined_terms %>% + filter(hasTextMinedTerms == "N") %>% + select(count) +res_mixed_text_mined_terms <- res_text_mined_terms %>% + filter(hasTextMinedTerms == "both") %>% + select(count) + +summary <- summary %>% + rbind( + tibble( + type = "Text Mined Terms", + resources_yes = res_has_text_mined_terms$count, + resources_no = res_no_text_mined_terms$count, + resources_mixed = res_mixed_text_mined_terms$count, + articles_yes = has_text_mined_terms$count, + articles_no = no_text_mined_terms$count + ) + ) + +rm(text_mined_terms, has_text_mined_terms, no_text_mined_terms) + +### Text mined accession numbers -------------------------------------------- + +text_mined_acc_nums <- metadata_df %>% + select(hasTMAccessionNumbers) %>% + group_by(hasTMAccessionNumbers) %>% + summarize(count = n()) + +has_text_mined_acc_nums <- text_mined_acc_nums %>% + filter(hasTMAccessionNumbers == "Y") %>% + select(count) +no_text_mined_acc_nums <- text_mined_acc_nums %>% + filter(hasTMAccessionNumbers == "N") %>% + select(count) + +res_text_mined_acc_nums <- resource_metadata %>% + select(hasTMAccessionNumbers) %>% + group_by(hasTMAccessionNumbers) %>% + summarize(count = n()) + +res_has_text_minedacc_nums <- res_text_mined_acc_nums %>% + filter(hasTMAccessionNumbers == "Y") %>% + select(count) +res_no_text_mined_acc_nums <- res_text_mined_acc_nums %>% + filter(hasTMAccessionNumbers == "N") %>% + select(count) +res_mixed_text_mined_acc_nums <- res_text_mined_acc_nums %>% + filter(hasTMAccessionNumbers == "both") %>% + select(count) + +summary <- summary %>% + rbind( + tibble( + type = "Text Mined Accession Numbers", + resources_yes = res_has_text_minedacc_nums$count, + resources_no = res_no_text_mined_acc_nums$count, + resources_mixed = res_mixed_text_mined_acc_nums$count, + articles_yes = has_text_mined_acc_nums$count, + articles_no = no_text_mined_acc_nums$count + ) + ) + +rm( + text_mined_acc_nums, + has_text_mined_acc_nums, + no_text_mined_acc_nums, + res_text_mined_acc_nums, + res_has_text_mined_acc_nums, + res_no_text_mined_acc_nums, + res_mixed_text_mined_acc_nums +) + +### Summarization ----------------------------------------------------------- + +summary <- summary %>% + mutate( + articles_yes = as.numeric(articles_yes), + articles_no = as.numeric(articles_no), + resources_yes = as.numeric(resources_yes), + resources_no = as.numeric(resources_no), + resources_mixed = as.numeric(resources_mixed) + ) %>% + mutate( + articles_percent_yes = (articles_yes / (articles_yes + articles_no)) * + 100, + articles_percent_no = (articles_no / (articles_yes + articles_no)) * + 100, + resources_percent_yes = (resources_yes / ( + resources_yes + resources_no + resources_mixed + )) * 100, + resources_percent_no = (resources_no / ( + resources_yes + resources_no + resources_mixed + )) * 100, + resources_percent_mixed = ( + resources_mixed / (resources_yes + resources_no + resources_mixed) + ) * 100 + ) + +summary_long <- summary %>% + select(type, contains("percent")) %>% + pivot_longer(cols = contains("percent"), + names_to = "asset_label", + values_to = "percent") %>% + mutate(asset_label = str_remove(asset_label, "_percent")) %>% + separate(asset_label, into = c("asset", "label")) %>% + mutate( + type = factor( + type, + levels = c( + "Text Mined Accession Numbers", + "Text Mined Terms", + "Full Text XML Available", + "Open Access", + "CC Licensed" + ) + ), + asset = str_to_title(asset), + label = str_to_title(label), + label = factor(label, levels = c("No", "Mixed", "Yes")) + ) + +### Visualization ----------------------------------------------------------- + +summary_plot <- summary_long %>% + ggplot(aes(x = percent / 100, y = type, fill = label)) + + facet_wrap( ~ asset) + + geom_col(width = 0.5, alpha = 0.8) + + scale_fill_manual(values = c("#D95F02", "#666666", "#7570B3")) + + scale_x_continuous(labels = percent) + + labs(x = "", + y = "", + fill = "") + + guides(fill = guide_legend(reverse = T)) + + theme(legend.position = "bottom") + +summary_plot + +## Output ------------------------------------------------------------------- + +write_csv(summary, file.path(out_dir, "text_mining_potential.csv")) + +ggsave( + file.path(out_dir, "text_mining_potential_plot.png"), + summary_plot, + width = 6.5, + height = 4 +) +ggsave( + file.path(out_dir, "text_mining_potential_plot.svg"), + summary_plot, + width = 6.5, + height = 4 +) diff --git a/analysis/funders.R b/analysis/funders.R new file mode 100644 index 0000000..7789d90 --- /dev/null +++ b/analysis/funders.R @@ -0,0 +1,143 @@ +#!/usr/bin/env Rscript + +# Author : Heidi Imker +# Kenneth Schackart +# Purpose: Retrieve and analyze funder metadata + +# Imports ------------------------------------------------------------------- + +## Library calls ------------------------------------------------------------ + +library(argparse) +library(dplyr) +library(europepmc) +library(magrittr) +library(readr) +library(stringr) + +# Function Definitions ------------------------------------------------------ + +#' Parse command-line arguments +#' +#' @return args list with input filenames +get_args <- function() { + parser <- argparse::ArgumentParser() + + parser$add_argument( + "inventory_file", + help = "Final inventory file", + metavar = "FILE", + type = "character", + default = "data/final_inventory_2022.csv" + ) + parser$add_argument( + "-o", + "--out-dir", + help = "Output directory", + metavar = "DIR", + type = "character", + default = "analysis/figures" + ) + + args <- parser$parse_args() + + return(args) +} + +#' Get metadata from Europe PMC +#' +#' @param ids list of article IDs +#' +#' @return dataframe with article metadata +get_metadata <- function(ids) { + out_df <- tibble() + + for (id_i in ids) { + epmc_return <- epmc_details(id_i) + metadata <- epmc_return[[1]] + id <- metadata["id"] + title <- metadata["title"] + agency <- tryCatch( + epmc_return[[9]]["agency"], + error = function(cond) { + return(NA) + force(do.next) + } + ) + + article_report <- + cbind(id, + title, + agency) + + out_df <- rbind(out_df, article_report) + } + + return(out_df) +} + + +# Main ---------------------------------------------------------------------- + +print("Parsing command-line arguments.") + +args <- get_args() + +full_inventory <- + read_csv(args$inventory_file, + show_col_types = FALSE) + +out_dir <- args$out_dir + +long_inventory <- full_inventory %>% + rename("id" = "ID") %>% + mutate(resource_num = row_number()) %>% + mutate(id = strsplit(id, ", ")) %>% + unnest(id) %>% + distinct(id, .keep_all = T) + +## Query Europe PMC --------------------------------------------------------- + +cat("Getting metadata from Europe PMC... ") + +id_list <- long_inventory$id + +metadata_df <- get_metadata(id_list) + +## Analyze funder metadata -------------------------------------------------- + +### Number of articles that have funder metadata ---------------------------- + +num_articles_w_funder_info <- metadata_df %>% + group_by(id) %>% + summarize(agencies = paste(agency, collapse = "")) %>% + filter(agencies != "NA") %>% + summarize(count = n()) + +### Analyze funders by resource --------------------------------------------- + +funders <- long_inventory %>% + select(id, best_name) %>% + drop_na() %>% + distinct(id, .keep_all = T) %>% + right_join(metadata_df %>% drop_na()) %>% + group_by(agency) %>% + summarize( + count_all_article_instances = length(id), + count_unique_articles = length(unique(id)), + count_unique_biodata_resources = length(unique(best_name)), + associated_PMIDs = str_c(unique(id), collapse = ", "), + associated_biodata_resources = str_c(unique(best_name), collapse = ", ") + ) + +## Output ------------------------------------------------------------------- + +cat("Number of articles with funder information:", + has_funder_info$count) +cat("Number of \"unique\" funders:", nrow(funders)) +cat("Greatest # resources per funder:", + max(funders$count_unique_biodata_resources)) +cat("Average # resources per funder:", + mean(funders$count_unique_biodata_resources)) + +write_csv(funders, file.path(out_dir, "inventory_funders.csv")) diff --git a/analysis/funders_geo.R b/analysis/funders_geo.R new file mode 100644 index 0000000..39dbb45 --- /dev/null +++ b/analysis/funders_geo.R @@ -0,0 +1,120 @@ +#!/usr/bin/env Rscript + +# Author : Heidi Imker +# Kenneth Schackart +# Purpose: Analyze funders by country w/ associated agency and biodata resource names and counts + +# Imports ------------------------------------------------------------------- + +## Library calls ------------------------------------------------------------ + +library(argparse) +library(dplyr) +library(ggmap) +library(ggplot2) +library(magrittr) +library(maps) +library(purrr) +library(readr) +library(stringr) +library(tidyr) + +# Function Definitions ------------------------------------------------------ + +#' Parse command-line arguments +#' +#' @return args list with input filenames +get_args <- function() { + parser <- argparse::ArgumentParser() + + parser$add_argument( + "curated_funders", + help = "Manually curated output from funders.R", + metavar = "FILE", + type = "character" + ) + parser$add_argument( + "-o", + "--out-dir", + help = "Output directory", + metavar = "DIR", + type = "character", + default = "analysis/figures" + ) + + args <- parser$parse_args() + + return(args) +} + +# Main ---------------------------------------------------------------------- + +print("Parsing command-line arguments.") + +args <- get_args() + +# funders <- +# read_csv(args$curated_funders, +# show_col_types = FALSE) +funders <- + read_csv("analysis/funders_geo_200.csv", + show_col_types = FALSE) + +out_dir <- args$out_dir + +## Analysis ----------------------------------------------------------------- + +funders_by_country <- funders %>% + select(agency, + country, + country_3, + known_parent, + associated_biodata_resources) %>% + mutate(associated_biodata_resources = gsub('[\" ]', '', associated_biodata_resources)) %>% + group_by(country, country_3) %>% + summarize( + count_agencies = length(agency), + agency_names = str_c(agency, collapse = ", "), + resource_names = str_c(associated_biodata_resources, collapse = ",") + ) %>% + group_by(country) %>% + mutate( + names_split = strsplit(resource_names, ","), + unique_names_split = map(names_split, ~ unique(.x)), + count_resources = length(unlist(unique_names_split)), + biodata_resource_names = str_c(flatten(unique_names_split), collapse = ", ") + ) %>% + select(-resource_names,-names_split,-unique_names_split) + +## Plotting ----------------------------------------------------------------- + +countries_plotting <- funders_by_country %>% + select(country, country_3, count_resources) %>% + mutate( + country = case_when( + country == "US" ~ "USA", + country == "United Kingdom" ~ "UK", + country == "Korea" ~ "South Korea", + T ~ country + ) + ) %>% + right_join(map_data("world"), by = c("country" = "region")) + + +country_plot <- ggplot() + + geom_polygon(data = countries_plotting, aes( + x = long, + y = lat, + fill = count_resources, + group = group + )) + + theme_void() + + labs(fill = "Count") + +## Output ------------------------------------------------------------------- + +write_csv(funders_by_country, + file.path(out_dir, "funders_geo_counts.csv")) + +ggsave(file.path(out_dir, "funder_countries.png"), + country_plot, height = 4, width = 6.5) diff --git a/analysis/funders_geo_200.csv b/analysis/funders_geo_200.csv new file mode 100644 index 0000000..a53d22c --- /dev/null +++ b/analysis/funders_geo_200.csv @@ -0,0 +1,201 @@ +known_parent,country,country_3,agency,count_all_article_instances,count_unique_articles,count_unique_biodata_resources,associated_PMIDs,associated_biodata_resources +,Argentina,ARG,Universidad Nacional de Quilmes,3,3,3,"34954795, 33237329, 33305318","CoDNaS-RNA, MobiDB, PED" +,Argentina,ARG,Agencia Nacional de Promoci√≥n Cient√≠fica y Tecnol√≥gica,6,3,3,"32507889, 31713636, 31680160","articles.ELM, DisProt, ELM" +,Argentina,ARG,Consejo Nacional de Investigaciones Cient√≠ficas y T√©cnicas,3,3,3,"32507889, 34954795, 31680160","articles.ELM, CoDNaS-RNA, ELM" +,Australia,AUS,National Health and Medical Research Council,14,8,8,"33137193, 28381244, 30942868, 30395284, 31161204, 31598690, 33095862, 30395310","AcrHub, DisBind, DEE2, Haemopedia, PRISMOID, ProCarbDB, ThermoMutDB, Vesiclepedia" +,Australia,AUS,Australian Research Council,14,7,7,"30548723, 32928113, 31667690, 31161204, 26434508, 30329070, 33084874","CropSNPdb, CrustyBase, Microndata, PRISMOID, ExoCarta, RaftProt, OGEE" +,Austria,AUT,Austrian Science Fund FWF,7,6,6,"21718534, 24225386, 26590402, 30357379, 21366916, 26586809","CANGS, COMMODE, EffectiveDB, EndoDB, PoPoolation, probeBase" +,Belgium,BEL,Research Foundation Flanders,3,3,3,"31713636, 31584092, 33237329","DisProt, PDBe-KB, MobiDB" +,Brazil,BRA,Coordena√ß√£o de Aperfei√ßoamento de Pessoal de N√≠vel Superior,6,5,5,"33181825, 33995920, 26887375, 33388027, 33095862","CitrusKB, ExVe, PlanTE-MIR DB, Propedia, ThermoMutDB" +,Brazil,BRA,Conselho Nacional de Desenvolvimento Cient√≠fico e Tecnol√≥gico,5,5,5,"33181825, 33995920, 26887375, 24273012, 33095862","CitrusKB, ExVe, PlanTE-MIR DB, SpliceProt, ThermoMutDB" +,Canada,CAN,Canadian Institutes of Health Research,39,32,29,"22135301, 31095607, 24203711, 26048563, 31016417, 33382035, 23109553, 22613085, 21492431, 23180781, 24203342, 27863956, 29206899, 31825307, 23203867, 31724725, 22009677, 26251998, 26578582, 24203708, 25332401, 32442307, 22064855, 22102575, 28158179, 24174537, 31665441, 26481353, 26531826, 33313828, 23650175, 31701148","BacMap, CiliaCarta, DrugBank, CYCLoPs, DNAmod, DIPPER, ECMDB, HAltORF, KID, InnateDB, iRefWeb, IHEC, MOSAIC, MouseBytes, NetwoRx, oRNAment, PhenoM, PhenomeCentral, Pseudomonas Genome, SMPDB, TopFIND, SYNERGxDB, YMDB, YeTFaSCo, TrypsNetDB, DGV, CARD, JASPAR, PSORTdb" +,Canada,CAN,CIHR,16,13,13,"22718786, 31095607, 23019048, 31679514, 24229347, 31724725, 23674503, 31868683, 32442307, 33125652, 33313828, 27899612, 27789705","ChromoHub, CiliaCarta, CRCgene, MaveDB, NeuroGeM, oRNAment, PhosphoGRID, QPN, SYNERGxDB, iRefWeb, PSORTdb, YMDB, CARD" +,Canada,CAN,Natural Sciences and Engineering Research Council of Canada,13,12,12,"30893420, 33735471, 31016417, 33599246, 27863956, 31825307, 33363449, 26251998, 29377907, 33206959, 33305318, 31701148","AYbRAH, CLRP, DNAmod, Gemma, IHEC, MouseBytes, PASS, PhenomeCentral, ProtDataTherm, IntAct, PED, JASPAR" +,Canada,CAN,Genome Canada,6,6,6,"27863956, 33683131, 26251998, 32442307, 31665441, 31701148","IHEC, MRMAssayDB, PhenomeCentral, SYNERGxDB, CARD, JASPAR" +,Canada,CAN,Canada Foundation for Innovation,8,5,5,"33245771, 31733064, 30601939, 33206959, 30407591","MarkerDB, PathDIP, UbiHub, IntAct, IID" +,Canada,CAN,Canada Research Chairs,4,3,3,"33735471, 31825307, 30407591","CLRP, MouseBytes, IID" +,Canada,CAN,Ontario Research Fund,3,3,3,"26251998, 33206959, 33170273","PhenomeCentral, IntAct, Gramene" +,China,CHN,National Natural Science Foundation of China,807,294,289,"34025933, 31584087, 32986825, 31695717, 29321052, 31665428, 31843802, 32294195, 34839012, 28968841, 30239683, 32681639, 33125076, 33219693, 30215764, 31665503, 33882119, 32540200, 28605773, 29985970, 33010176, 30329142, 30329095, 31701131, 34345532, 29939204, 27635320, 35134148, 31691819, 29992323, 33471060, 30289549, 33147626, 31428785, 32608479, 33970229, 33662628, 30045691, 32345360, 34296749, 27365365, 33181824, 34856391, 33121433, 33109630, 30357356, 33693668, 31813095, 31901979, 33010163, 32436316, 33313674, 34992626, 33068433, 33009914, 30285246, 34927675, 29178828, 26940364, 27209279, 28381244, 28575155, 29209336, 29860480, 29961819, 30016397, 30379998, 30380071, 30482172, 30665056, 31603498, 31665429, 33104791, 33320930, 33938221, 34097004, 27037912, 29126995, 30321400, 30335161, 30357379, 30364969, 30365030, 30476229, 30788500, 31277321, 31642496, 31665430, 31774482, 31887789, 32120139, 32681912, 33002112, 33203359, 33497436, 33511767, 34085038, 34954426, 35694152, 27098585, 28529078, 28549078, 29548284, 30053237, 30066211, 30266410, 30371881, 31504765, 31524396, 31566222, 31584099, 31630971, 31725863, 31783725, 32315389, 32496513, 32858223, 32941628, 33045729, 33151298, 33264402, 33406221, 33677507, 33868597, 33965348, 33984507, 34164644, 34175476, 34642750, 34791105, 23601370, 29788225, 30276831, 30476305, 31598675, 31617563, 31665439, 31713618, 31906602, 32193291, 32367112, 32512182, 32766766, 32820322, 33045741, 33045751, 33147622, 33219686, 33507270, 33906563, 34755873, 27167218, 28968812, 29433427, 30057343, 31231773, 31240103, 32833025, 32911083, 32990748, 33125077, 33126250, 33219670, 33418085, 34510194, 28961690, 29982280, 30134653, 30335176, 30445567, 31086734, 31584086, 31950190, 32103267, 32105730, 32111231, 32117995, 32122231, 32487016, 32597311, 33275967, 33304468, 33306802, 33359127, 33514746, 33581334, 33997360, 34120586, 34389843, 26211629, 30239819, 30357353, 30380102, 31584089, 31598699, 31599098, 31602478, 31620779, 31640808, 31725858, 31809863, 33003203, 33010159, 33330918, 33554247, 25640659, 28365723, 28529082, 29028888, 29309507, 29617941, 29961821, 30020436, 30371817, 30380119, 31511885, 31642484, 31713629, 31799597, 31906603, 32382747, 32709339, 32849839, 33010177, 33021671, 33045745, 33068412, 33685493, 34022814, 34496744, 27337171, 27643925, 28420402, 28974472, 30184150, 30371815, 30380087, 30913342, 32047897, 32248093, 32286817, 32351388, 32620074, 32976581, 32990749, 33074314, 33179754, 33360695, 33985427, 34273956, 34407614, 29351734, 30217145, 30223042, 30365026, 30462313, 31021279, 32168374, 32221380, 32349124, 32738156, 33068436, 33095866, 33175170, 33993461, 34601118, 29917040, 30285109, 30335166, 30364951, 30364952, 30364956, 31161214, 31599330, 31670377, 32016318, 33175131, 33219685, 33306787, 33306800, 33693667, 34344425, 30715167, 30172046, 30407568, 30371818, 33196801, 30204897, 31942978, 30329098, 33010178, 33219661, 30407549, 26744602","AddictGene, Animal-ImputeDB, Animal-APAdb, AppleMDO, ASGDB, ASD, ASRD, ASFVdb, ASER, AtCircDB, ATD, ATdb, ATACdb, AtMAD, AWESOME, BBCancer, BC-TFdb, BGVD, BioM2MetDisease, BrainEXP, CancerImmunityQTL, CancerSEA, CancerSplicingQTL, CancerTracer, CanImmunother, CARDIO-LNCRNAS, CardioTF, CATA, CAUSALdb, CeleryDB, Cellinker, CellMarker, CellTalkDB, CFEA, CHDGKB, ChemHub, Chinese Glioma Genome Atlas, CIGene, CircAtlas, circExp, CIRCpedia, circR2Cancer, CircR2Disease, circVAR, CKTTD, CMAUP, CMBD, CMVdb, CNAdbCC, cncRNAdb, CoFly, ColorCells, CottonGVD, CovalentInDB, CoVdb, CRISPRlnc, CRPMKB, CrusTF, dbPHCC, DNetDB, DisBind, Dynamic-BM, DRDB, dbCRSR, dbLGL, dbCID, dbCPM, DSMNC, dbMPIKT, dbHDPLS, dbInDel, DrugCombDB, DrugSpaceX, CyanoPATH, D3DistalMutation, DevOmics, GAMDB, FVD, EWASdb, EVmiRNA, EndoDB, EWAS Atlas, ETCM, ENPD, EnDisease, GEDS, Gene4Denovo, ENdb, FluReassort, ETph, FRCD, ExoceRNA Atlas, EnzyMine, FishDB, FifBase, FAWMine, EyeDiseases, FertilityOnline, EnhFFL, Grape-CRISPR, HCSGD, HEROD, IDPM, HDncRNA, HAMdb, HCCDB, iDog, GMrepo, HybridMolDB, GWAS Atlas, gutMDisorder, GESUR, GRONS, HKPocket, HotSpot3D, gutMEGA, hTFtarget, IDDB, GIMICA, GRNdb, HERB, iCysMod, HIR, GPCards, GGVD, HisPhosSite, HFBD, GWH, HBFP, HFIP, LiverAtlas, LnChrom, lncRNAnet, LncACTdb, KnockTF, LnCeVar, MaGenDB, LncTarD, LLPSDB, LncSpA, MACSNVdb, IRESbase, lncR2metasta, LncAS2Cancer, LncSEA, LncExpDB, KNIndex, LnCeCell, InSexBase, M6ADD, ImmReg, MiasDB, MetSigDis, MethCNA, MCENet, MepmiRDB, MDR, MNDR, MosaicBase, miRNASNP-v3, MASI, MloDisDB, MolluscDB, MicroPhenoDB, Mollusca mitochondrial database, ncDR, PepBDB, PADFrag, NucMap, OncoBase, OsteoporosAtlas, PGG.Han, PCaLiStDB, PDIR, ncEP, NoncoRNA, Nc2Eye, ncRPheno, ncRI, OncotRF, ncRNAVar, PDmethDB, NPBS, NBIGV, NGD, OGP, PCPD, NUCOME, Nabe, PLNlncRbase, POSTAR2, piRTarBase, qPhos, PhaSepDB, QTLbase, PSMD, PmiREN, prokaryotic antiviral defense system, PGG.SNV, PlantCircNet, PsyMuKB, QSIdb, PROTAC-DB, piRNA-eQTL, PMI-DB, SecReT6, SilkPathDB, RED, RRDB, SEGreg, SCRIPT-MAP, SDADB, realDB, SEdb, SAGD, SNP2APA, SilkDB, SpatialDB, RNAactDrug, RNAInter, RSVdb, saponin mass spectrometry database, RIGD, SC2disease, RMVar, SilencerDB, RASP, riboCIRC, Rhododendron Plant Genome Database, RPocket, TarNet, StemCellCKB, SSER, Stress2TF, TRCirc, TransmiR, SymMap, TPIA, TRlnc, TE141K1, TeroKit, TCMIO, TeaCoN, STAB, TCRdb, TransCirc, TISCH, ToxinDB, TarDB, TCM-Blast, Tracking Air Pollution in China, EOGD, PPGD, TSNAdb, Victors, Cistrome DB, UVGD, uORFlight, WeiBI, VirusCircBase, YIR, tsRBase, VARAdb, 2019nCoVR, TUPDB, Viral Putative G-quadruplex, MPD, LncRNADisease, RPFdb, DoriC, PED, HMDD, WDSPdb, LSD, NPInter, VariBench, deepBase, Lnc2Cancer, MetaADEDB, CEG, UbiNet, SorGSD, BacWGSTdb, ICEberg, piRBase, NONCODE, AnimalTFDB, LncBook, LincSNP" +,China,CHN,National Key Research and Development Program of China,143,68,68,"31648087, 32608479, 33970229, 34296749, 33121433, 33693668, 33010163, 33313674, 32754758, 33068433, 29961819, 30335161, 30365030, 30788500, 31584095, 32120139, 34954426, 31504765, 31566222, 33045729, 33151298, 33264402, 33515030, 34164644, 34175476, 33045751, 33147622, 33219686, 34755873, 31157825, 32833025, 33084905, 33125077, 33126250, 33219670, 33418085, 34156447, 31950190, 32597311, 33275967, 33581334, 33997360, 34120586, 30055873, 31584089, 33010159, 33554247, 34111777, 34122478, 29617941, 33045745, 30380087, 32990749, 33074314, 33360695, 30223042, 30462313, 33068436, 33175170, 31599330, 33084874, 33175131, 33219685, 33306787, 33170268, 30204897, 30329098, 33219661","AdditiveChem, CHDGKB, ChemHub, circExp, circVAR, CMBD, cncRNAdb, ColorCells, ConoMode, CovalentInDB, dbLGL, EVmiRNA, ETCM, EnDisease, EWAS, FRCD, FertilityOnline, GMrepo, GWAS Atlas, GIMICA, GRNdb, HERB, HVIDB, HFBD, GWH, LncExpDB, KNIndex, LnCeCell, ImmReg, MENDA, MNDR, MeDAS, MASI, MloDisDB, MolluscDB, MicroPhenoDB, mPPI, PCaLiStDB, OncotRF, ncRNAVar, OGP, PCPD, NUCOME, PhoPepMass, PhaSepDB, PROTAC-DB, PMI-DB, PID, PSDX, SCRIPT-MAP, SilencerDB, SymMap, TCRdb, TransCirc, ToxinDB, TSNAdb, Cistrome DB, tsRBase, 2019nCoVR, LSD, OGEE, deepBase, Lnc2Cancer, MetaADEDB, GVM, AnimalTFDB, LncBook, LincSNP" +,China,CHN,National Science Foundation,89,54,51,"33068435, 30239679, 30371900, 31680137, 31036810, 30486838, 27391016, 32941621, 33119734, 33174603, 26800861, 27664130, 34010390, 30364992, 31509535, 31841142, 32758136, 26653323, 30535108, 31598675, 26322134, 29206899, 31612915, 32079733, 32986834, 30115014, 34241085, 25382819, 27789569, 30329093, 27188311, 31211398, 31490686, 32719467, 32882008, 33035346, 26227548, 31245720, 31598695, 32386298, 22564364, 32558264, 33021634, 33290552, 33151287, 31680153, 33290554, 28296894, 30395331, 31851420, 33156333, 33170273, 30407594, 31598706","AcrDB, AgBioData, Ancestral Genomes, ANISEED, ASNR, CHESS, D-PLACE, dbCAN-PUL, DescribePROT, Datanator, EchinoDB, ENVO, Echinobase, GPs, GutFeelingKB, GlyMDB, GSDB, iTAP, InteracDome, KnockTF, Metabolic In silico Network Expansions, MOSAIC, MIBiG, MtSSPdb, ModelSEED, PdumBase, OCELOT, ProKinO, PMKB, REDfly, Structure Surfer, SpinachBase, TMB, STAGdb, TBDB, tRFtarget, DOCKGROUND, VPGD, MirGeneDB, MGP Portal, PharmGKB, PDB, NDB, GOC, DrugCentral, Plant Reactome, PANTHER, CDD, InterPro, Gramene, Ensembl" +,China,CHN,China Postdoctoral Science Foundation,44,30,30,"30329142, 34345532, 29939204, 33471060, 30289549, 30321400, 30335161, 30380109, 31584099, 32858223, 33264402, 33984507, 29788225, 31713618, 32820322, 28968812, 32159764, 32990748, 30445567, 32111231, 32849839, 30913342, 32990749, 33360695, 34273956, 33175170, 33993461, 31599330, 33306787, 33196801","CancerSEA, CanImmunother, CARDIO-LNCRNAS, Cellinker, CellMarker, EWASdb, EVmiRNA, iEKPD, gutMDisorder, hTFtarget, HERB, HisPhosSite, LnChrom, LncTarD, LncAS2Cancer, MetSigDis, MMHub, miRNASNP-v3, OncoBase, NoncoRNA, RIGD, TPIA, TCRdb, ToxinDB, TCM-Blast, 2019nCoVR, TUPDB, LSD, MetaADEDB, NONCODE" +,China,CHN,Fundamental Research Funds for the Central Universities,39,28,28,"32986825, 32681639, 34856391, 32754758, 33009914, 31691822, 31277321, 32008039, 34954426, 35694152, 30380109, 32510565, 33045729, 32367112, 32512182, 33507270, 31504189, 32159764, 33219670, 32105730, 33275967, 30244175, 31161204, 31809863, 33137192, 32709339, 33175170, 33175131","Animal-APAdb, ATdb, CircR2Disease, ConoMode, CoVdb, DrLLPS, GEDS, EPSD, FertilityOnline, EnhFFL, iEKPD, GreenCircRNA, GIMICA, MACSNVdb, IRESbase, InSexBase, MeLAD, MMHub, MolluscDB, ncEP, ncRNAVar, PTMD, PRISMOID, PsyMuKB, Plant-ImputeDB, saponin mass spectrometry database, 2019nCoVR, deepBase" +,China,CHN,Chinese Academy of Sciences,83,28,27,"31648087, 29209336, 33104791, 29126995, 30357418, 31584095, 33119759, 28387199, 30196115, 30371881, 31566222, 31811943, 32055858, 34175476, 33045751, 30335176, 31584086, 31620779, 28529082, 30365026, 33175170, 30364952, 31599330, 31670377, 33170268, 33196801, 30329098, 33704069","AdditiveChem, DRDB, DrugSpaceX, FVD, EDK, EWAS, gcType, GSA, HeteroMeth, iDog, GWAS Atlas, GliomaDB, GREG, GWH, LncExpDB, NucMap, PGG.Han, prokaryotic antiviral defense system, RED, Victors, 2019nCoVR, PED, LSD, NPInter, GVM, NONCODE, LncBook" +,China,CHN,National Key R&D Program of China,41,24,24,"31665503, 34345532, 30380071, 31603498, 30371881, 31811943, 32858223, 31665439, 32193291, 32512182, 29743053, 29982280, 30445567, 32122231, 30010730, 30380102, 31642469, 31799597, 30546860, 33181826, 31670377, 30407568, 30371818, 33704069","BBCancer, CanImmunother, DSMNC, dbInDel, iDog, GliomaDB, hTFtarget, MaGenDB, LncSpA, IRESbase, PDXliver, PepBDB, OncoBase, ncRPheno, RabGTD, qPhos, PhenoModifier, RNAactDrug, YaTCM, WGVD, NPInter, ICEberg, piRBase, 2019nCoVR" +,China,CHN,National Key Research and Development Program,20,13,13,"32294195, 33009914, 31691822, 34097004, 29961817, 32159764, 31584086, 31640808, 32620074, 31021279, 33175170, 33196801, 32406920","ASFVdb, CoVdb, DrLLPS, DevOmics, LncCeRBase, MMHub, PGG.Han, PGG.SNV, TeaCoN, UVGD, 2019nCoVR, NONCODE, AnnoLnc" +,China,CHN,Natural Science Foundation of China,35,13,13,"31691822, 31598693, 32008039, 30380109, 34907423, 28529077, 30244175, 32621601, 29157087, 30810209, 32761141, 29351734, 33094321","DrLLPS, EuRBPDB, EPSD, iEKPD, iCAV, PLMD, PTMD, SAGER, THANATOS, Tetrahymena Comparative Genomics Database, SPDB, EOGD, VPTMdb" +,China,CHN,Ministry of Science and Technology of the People's Republic of China,156,10,10,"31813095, 28595571, 34992626, 33382035, 33938221, 33984507, 30134653, 33103271, 27643925, 34344425","CMVdb, CottonFGD, CottonGVD, DIPPER, D3DistalMutation, HisPhosSite, PADFrag, NanDeSyn, StemCellCKB, SorGSD" +,China,CHN,Harbin Medical University,9,9,9,"30329142, 30289549, 31665430, 30476305, 31617563, 33045741, 32047897, 33095866, 30407549","CancerSEA, CellMarker, ENdb, LncACTdb, LnCeVar, LncSEA, TRlnc, VARAdb, Lnc2Cancer" +,China,CHN,Natural Science Foundation of Heilongjiang Province,9,8,8,"29939204, 28334239, 31665430, 31713618, 30371817, 30184150, 32047897, 30380072","CARDIO-LNCRNAS, coexpressMAP, ENdb, LncTarD, SEdb, TRCirc, TRlnc, LncRNA2Target" +,China,CHN,National Key Research Program of China,14,7,7,"30364969, 33119759, 28387199, 30371881, 30252093, 30335176, 31620779","EWAS Atlas, gcType, GSA, iDog, iProX, NucMap, prokaryotic antiviral defense system" +,China,CHN,National Science Foundation of China,13,6,6,"31648087, 31598709, 32510565, 30252093, 31231774, 32542382","AdditiveChem, DNMIVD, GreenCircRNA, iProX, Mr.Vc, PRMdb" +,China,CHN,China Scholarship Council,6,6,6,"31066443, 28549078, 31637139, 27643925, 33035346, 33051671","DrugComb, HEROD, ncRNA2MetS, StemCellCKB, tRFtarget, StreptomeDB" +,China,CHN,International Partnership Program of the Chinese Academy of Sciences,6,6,6,"30364969, 31584095, 28387199, 30371881, 30364952, 33170268","EWAS Atlas, EWAS, GSA, iDog, PED, GVM" +,China,CHN,13th Five-year Informatization Plan of Chinese Academy of Sciences,7,6,6,"30364969, 30365027, 31584095, 30371881, 30364952, 33170268","EWAS Atlas, gcMeta, EWAS, iDog, PED, GVM" +,China,CHN,the National Natural Science Foundation of China,10,5,5,"27623959, 34964846, 32183712, 29743053, 34154536","BmncRNAdb, COGVIC, CuAS, PDXliver, TeaAS" +,China,CHN,National High Technology Research and Development Program of China,6,5,5,"30329142, 29939204, 30289549, 28575155, 30252093","CancerSEA, CARDIO-LNCRNAS, CellMarker, Dynamic-BM, iProX" +,China,CHN,Natural Science Foundation of Guangdong Province,5,5,5,"34345532, 29178828, 31524396, 30380102, 32761141","CanImmunother, CrusTF, HybridMolDB, qPhos, SPDB" +,China,CHN,National Key R&D Program,9,5,5,"32345360, 32008039, 30380109, 29157087, 31942978","CircAtlas, EPSD, iEKPD, THANATOS, LncRNADisease" +,China,CHN,Ministry of Science and Technology,9,5,5,"27365365, 30395277, 33175170, 32898258, 31701128","CIRCpedia, PlantPAN, 2019nCoVR, EXPath, DriverDB" +,China,CHN,Shanghai Jiao Tong University,6,5,5,"31887789, 30032758, 31642469, 32221380, 30407568","ETph, HFMDB, PhenoModifier, WeiBI, ICEberg" +,China,CHN,Program for Guangdong Introducing Innovative and Entrepreneurial Teams,4,4,4,"31843802, 33406221, 34907423, 33021671","ASRD, iCysMod, iCAV, RMVar" +,China,CHN,National Program on Key Basic Research,4,4,4,"30329142, 30289549, 30476305, 30407549","CancerSEA, CellMarker, LncACTdb, Lnc2Cancer" +,China,CHN,Beijing Natural Science Foundation,5,4,4,"31428785, 30266409, 29617941, 33306800","CFEA, PlaD, SCRIPT-MAP, CEG" +,China,CHN,Shanghai Municipal Science and Technology Major Project,4,4,4,"31504765, 31584086, 31640808, 29617941","GMrepo, PGG.Han, PGG.SNV, SCRIPT-MAP" +,China,CHN,CAS,5,3,3,"31648087, 31566222, 33264402","AdditiveChem, GWAS Atlas, HERB" +,China,CHN,Natural Science Foundation of Tianjin,3,3,3,"31648087, 31691819, 31598699","AdditiveChem, CAUSALdb, QTLbase" +,China,CHN,Huazhong Agricultural University Scientific & Technological Self-innovation Foundation,3,3,3,"31584087, 32986825, 31410488","Animal-ImputeDB, Animal-APAdb, ncRNA-eQTL" +,China,CHN,Priority Academic Program Development of Jiangsu Higher Education Institutions,3,3,3,"29321052, 29992323, 31603498","ASGDB, CeleryDB, dbInDel" +,China,CHN,Shanghai Sailing Program,3,3,3,"31665428, 33125077, 33306787","ASD, MASI, MetaADEDB" +,China,CHN,Shanghai Municipal Education Commission,3,3,3,"31665428, 32941628, 31809863","ASD, IDDB, PsyMuKB" +,China,CHN,Zhejiang Provincial Natural Science Foundation of China,3,3,3,"32681639, 34085038, 33287903","ATdb, EyeDiseases, KVarPredDB" +,China,CHN,Natural Science Foundation,3,3,3,"33125076, 32193291, 33219685","ATACdb, LncSpA, Lnc2Cancer" +,China,CHN,Shanghai Municipal Science and Technology,3,3,3,"33219693, 31647096, 33084874","AtMAD, proGenomes2, OGEE" +,China,CHN,Science and Technology Program of Guangzhou,6,3,3,"31665503, 31524396, 30380102","BBCancer, HybridMolDB, qPhos" +,China,CHN,Guangdong Introducing Innovative and Entrepreneurial Teams,3,3,3,"31665503, 32496513, 30380102","BBCancer, gutMEGA, qPhos" +,China,CHN,National Key Plan for Scientific Research and Development of China,3,3,3,"29985970, 31774482, 32349124","BrainEXP, FluReassort, VirusCircBase" +,China,CHN,National Science Fund for Distinguished Young Scholars,3,3,3,"33010176, 31584086, 31640808","CancerImmunityQTL, PGG.Han, PGG.SNV" +,China,CHN,Strategic Priority Research Program of the Chinese Academy of Sciences,4,3,3,"26040787, 30364969, 30365027","CARMO, EWAS Atlas, gcMeta" +,China,CHN,the National Key Research and Development Program of China,3,3,3,"30045691, 34791105, 34154536","CIGene, HFIP, TeaAS" +,China,CHN,Zhejiang Provincial Natural Science Foundation,3,3,3,"33068433, 33264402, 33010159","CovalentInDB, HERB, PROTAC-DB" +,China,CHN,Chinese University of Hong Kong,12,3,3,"30380085, 30476229, 30418626","dbAMP, ENPD, dbPTM" +,China,CHN,Fundamental Research Funds,3,3,3,"30321400, 31642484, 33095866","EWASdb, SilkDB, VARAdb" +,China,CHN,National Programs for High Technology Research and Development,6,3,3,"30357418, 30364969, 30364952","EDK, EWAS Atlas, PED" +,China,CHN,Natural Science Foundation of Shanghai,3,3,3,"31504765, 27148975, 33084874","GMrepo, SpinachDB, OGEE" +,China,CHN,Beijing Nova Program,3,3,3,"31725860, 31086734, 31021279","HisgAtlas, OsteoporosAtlas, UVGD" +,China,CHN,Fundamental Research Funds for the Provincial Universities,4,3,3,"31713618, 32487016, 31799597","LncTarD, ncRI, RNAactDrug" +,China,CHN,Heilongjiang Touyan Innovation Team Program,3,3,3,"33219686, 34755873, 33219661","LnCeCell, ImmReg, LincSNP" +,China,CHN,International Science & Technology Cooperation Program of China,5,3,3,"28529077, 29617941, 29157087","PLMD, SCRIPT-MAP, THANATOS" +,China,CHN,Special Project on Precision Medicine under the National Key R&D Program,3,3,3,"30371815, 30285109, 30364956","TransmiR, LncRNADisease, HMDD" +,Czech Republic,CZE,Ministry of Education,4,3,3,"30247677, 30668638, 30587128","AmtDB, CMEP, PSRN" +,Denmark,DNK,Novo Nordisk Fonden,23,18,13,"30357390, 27924032, 33010170, 23143109, 27504778, 31612915, 32079733, 29062930, 34000890, 26476456, 27794045, 33270898, 30395294, 26531826, 29156309, 31696234, 33152079, 25723102","ALEdb, antiSMASH, BiG-FAM, HemaExplorer, GNPS, MIBiG, MtSSPdb, SMBP, TELEMED, BiGG, FANTOM5, GPCRdb, JASPAR" +,Denmark,DNK,Novo Nordisk Foundation Center for Protein Research,20,18,11,"31405382, 22058129, 25484339, 28077569, 23203871, 29617745, 33156327, 29036351, 33151287, 24297252, 24293645, 25352553, 26582926, 26590256, 27924014, 30418610, 30476243, 33237311","bio.tools, DistiLD, DISEASES, RAIN, STRING, TISSUES, TCRD, miRandola, DrugCentral, eggNOG, STITCH" +,Denmark,DNK,NNF Center for Biosustainability,15,11,7,"30357390, 27924032, 33010170, 27504778, 31612915, 29062930, 26476456, 30395294, 29156309, 31696234, 33152079","ALEdb, antiSMASH, BiG-FAM, GNPS, MIBiG, SMBP, BiGG" +,Denmark,DNK,Novo Nordisk Foundation,12,8,7,"33010170, 31612915, 33156327, 33270898, 30395294, 33152079, 30418610, 33237311","BiG-FAM, MIBiG, TCRD, GPCRdb, antiSMASH, eggNOG, STRING" +,Denmark,DNK,Villum Fonden,6,6,6,"31405382, 26062809, 32665542, 32976589, 27504778, 28182744","bio.tools, CMRegNet, CoVex, DIGGER, GNPS, OMDB" +,Denmark,DNK,Lundbeck Foundation,11,9,5,"23143109, 24304901, 24194598, 27794045, 33270898, 29155946, 29140473, 30664776, 25723102","HemaExplorer, GPCRDB, JASPAR, FANTOM5, GPCRdb" +,EU,INT,European Research Council,59,40,36,"30272193, 28453651, 23486613, 31066443, 33084904, 25352549, 26582924, 27664130, 28245064, 30357379, 34485385, 30967549, 24150937, 24194598, 24253300, 32618424, 22096233, 25378328, 28182744, 24198250, 24275491, 26586809, 26467481, 26339475, 28832569, 28985418, 31171447, 24082050, 31504823, 31647096, 33045721, 33206959, 24297252, 30496475, 28053165, 24234451, 29155946, 26582926, 30395289, 30664776","BACTOME, Cancer PanorOmics, CGOB, DrugComb, DualSeqDB, euL1db, enviPath, ENVO, FRED, EndoDB, Fuzzle, iFISH, LoQAtE, JASPAR, InvFEST, LymphoAtlas, MINAS, MyMpn, OMDB, POGO-DB, PhylomeDB, probeBase, SIGNOR, SwissPalm, SweGen, SysteMHC, SynGO, yApoptosis, WALTZ-DB, proGenomes2, ViruSurf, IntAct, eggNOG, PlanMine, GPCRdb, PRIDE" +,EU,INT,Horizon 2020,41,24,23,"33037820, 31713636, 32976589, 33166383, 34220930, 30357370, 32986834, 35559777, 33647438, 30321422, 31647096, 31696235, 33211851, 33237329, 33270898, 33305318, 30357350, 30395289, 33211869, 30418610, 31680160, 30395270, 31691826, 31722421","Bgee, DisProt, DIGGER, FireProtDB, FAANG, liqDB, ModelSEED, MESOCOSM, PepTherDia, RetroRules, proGenomes2, MGnify, WikiPathways, MobiDB, GPCRdb, PED, Pfam, PRIDE, Rfam, eggNOG, ELM, ENA, Ensembl" +,EU,INT,European Commission FP7,32,19,18,"22080563, 22110040, 22096229, 22096227, 22730453, 21472892, 22139920, 23180794, 22102589, 22135296, 21520333, 22753137, 21702733, 21995777, 22102590, 22135291, 22096232, 22121220, 22086963","CADRE, ELM, InterPro, MINT, MuteinDB, PORCN, SitEx, TFClass, UniPathway, VectorBase, LOVD, ALSoD, BRAD, UniProtKB, Rhea, BioSamples, IntAct, Ensembl" +,EU,INT,European Molecular Biology Laboratory,19,18,17,"34493866, 31647096, 31696235, 30020414, 33206959, 33290552, 31701150, 33270111, 31584097, 33237286, 31680160, 33166387, 30357387, 33156333, 33170273, 31691826, 31722421, 33175160","eQTL, proGenomes2, MGnify, SKEMPI, IntAct, GOC, BioModels, GENCODE, IGSR, UniProtKB, ELM, International Nucleotide Sequence Database Collaboration, ArrayExpress, InterPro, Gramene, Ensembl, ENA" +,EU,INT,European Regional Development Fund,13,11,11,"34244700, 30639529, 32990755, 31713636, 33683131, 33822911, 31584092, 33080028, 33655207, 31263870, 26141515","2DProts, Antimicrobial Enzyme Combinations Database, CSVS, DisProt, MRMAssayDB, MENSAdb, PDBe-KB, Peryton, TMSNP, ValTrendsDB, mirEX" +,EU,INT,Seventh Framework Programme,10,8,8,"28376796, 31095607, 34782688, 27664130, 28086860, 29739837, 27749924, 31647100","AHCODA-DB, CiliaCarta, CyFi-MAP, ENVO, NaDH, NvERTx, SulfAtlas, BGD" +,EU,INT,Open Targets,5,5,5,"34493866, 33237286, 33125078, 33156333, 33170273","eQTL, UniProtKB, Pfam, InterPro, Gramene" +,EU,INT,FEDER,7,4,4,"33068420, 30593925, 31171447, 31680165","chewie-NS, SITVIT2, SynGO, DisGeNET" +,EU,INT,Innovative Medicines Initiative,5,4,4,"34782688, 32707486, 30601939, 31701150","CyFi-MAP, hPSCreg, UbiHub, BioModels" +,EU,INT,European Social Fund,4,4,4,"33119759, 32556221, 30380112, 33367605","gcType, OMEGA-NET, Translocatome, Virxicon" +,EU,INT,European Union,25,4,4,"29385404, 31171447, 29533231, 33175160","TOXsIgN, SynGO, wwPDB, ENA" +,EU,INT,European Bioinformatics Institute,4,4,3,"34493866, 31584089, 31691815, 32486891","eQTL, PhaSepDB, Reactome" +,EU,INT,ELIXIR,3,3,3,"31696235, 33156333, 31722421","MGnify, InterPro, ENA" +,France,FRA,Agence Nationale de la Recherche,32,16,16,"30239679, 31680137, 33444113, 31605615, 32618424, 34189203, 28608363, 28791657, 30944327, 34245304, 35559777, 30380106, 31733062, 25740460, 30794542, 33305318","AgBioData, ANISEED, CALR-ETdb, LeGOO, LymphoAtlas, IsoArcH, MiSynPat, MEGALEX, monoterpene indole alkaloid database, MtExpress, MESOCOSM, OrthoInspector, ParameciumDB, PhytoREF, RESPIRE, PED" +,France,FRA,Centre National de la Recherche Scientifique,3,3,3,"33709443, 28608363, 34531327","LIMONADA, MiSynPat, T1TAdb" +,France,FRA,CNRS,3,3,3,"31680137, 31624845, 31733062","ANISEED, CRISPRCasdb, ParameciumDB" +,France,FRA,French National Research Agency,3,3,3,"25065645, 32163115, 30321422","EctoGEM, GRALL, RetroRules" +,France,FRA,Fondation pour la Recherche M√©dicale,3,3,3,"28608363, 33206959, 33305318","MiSynPat, IntAct, PED" +,Germany,DEU,Deutsche Forschungsgemeinschaft,16,14,14,"34738791, 29198880, 31095607, 32459338, 34699529, 33174596, 29913065, 33749993, 34559210, 27800578, 30714194, 30165582, 34656056, 30256983","AroCageDB, AureoWiki, CiliaCarta, EpiRegio, GH19ED, MitoCarta, OptoBase, OmniPath, QSDB, SCEGRAM, SDRED, Traitpedia, SuperTCM, BacDive" +,Germany,DEU,Federal Ministry of Education and Research,8,6,6,"30272193, 32976589, 23607573, 32976578, 31665479, 30256983","BACTOME, DIGGER, PID-NET, TREND-DB, ProteomicsDB, BacDive" +,Germany,DEU,Bundesministerium f√ºr Bildung und Forschung,6,5,5,"34931882, 32862462, 34699529, 30418610, 30476243","CoxBase, ExED, GH19ED, eggNOG, STRING" +,Germany,DEU,Max-Planck-Gesellschaft,3,3,3,"33169878, 35424258, 34534667","A.P.E.S, MeFSAT, TExAs" +,Germany,DEU,Saarland University,3,3,3,"30937442, 30380090, 31691816","Animal sncRNA Atlas, PLSDB, miRPathDB" +,Germany,DEU,German Research Foundation,3,3,3,"30272193, 32766702, 33051671","BACTOME, Male Fertility Gene Atlas, StreptomeDB" +,Germany,DEU,DFG,12,3,3,"31171447, 32976578, 33211869","SynGO, TREND-DB, Rfam" +,Hungary,HUN,Hungarian Academy of Sciences,10,6,6,"29385418, 31713636, 31686102, 29036655, 31612960, 31680160","DIBS, DisProt, FoldamerDB, MFIB, PhaSePro, ELM" +,India,IND,Council of Scientific and Industrial Research,17,13,13,"31958638, 27832200, 27472917, 32219412, 32090260, 28854643, 29129553, 30858555, 30349509, 30307523, 31796964, 29432422, 33112702","BoMiProt, Cancertope, CicerTransDB, circad, HSPMdb, MSDB, mitoepigenomeKB, MorCVD, PanGFR-HM, PVsiRNAdb, PRP, TopicalPdb, LncRBase" +,India,IND,Science and Engineering Research Board,8,8,8,"31958638, 27832200, 34791106, 34793786, 32090260, 35424258, 28961249, 33112702","BoMiProt, Cancertope, database of cancer mutant protein domains, FCCP, HSPMdb, MeFSAT, SWI/SNF Infobase, LncRBase" +,India,IND,Indian Council of Medical Research,6,5,5,"27832200, 29109711, 32345779, 25269378, 32380213","Cancertope, miPepBase, PSCRIdb, SPGDB, TrypInDB" +,India,IND,Department of Biotechnology,5,4,4,"28875065, 29939244, 32829394, 33459764","BioFuelDB, PtRFdb, TGV, SWITCHES" +,India,IND,Department of Science and Technology,5,4,4,"31958638, 32337573, 28498885, 33892308","BoMiProt, MPTherm, PROXiMATE, SAPdb" +,India,IND,University Grants Commission,4,4,4,"32360910, 27832200, 33136065, 29109711","CancerEnD, Cancertope, HPREP, miPepBase" +,India,IND,"Department of Biotechnology, Ministry of Science and Technology",5,4,4,"30703169, 29220464, 30307523, 29432422","HuVarBase, KiPho, PVsiRNAdb, TopicalPdb" +,India,IND,"Department of Biotechnology, Ministry of Science and Technology, India",3,3,3,"32895427, 34025934, 33231322","PCOSKBR2, MycoTRAP-DB, SoyTD" +,Italy,ITA,Telethon,7,7,7,"31095607, 21435384, 22804825, 24558125, 22096227, 22415763, 24234451","CiliaCarta, HOCTARdb, HuPho, MANTRA, MINT, Rett Networked Database, IntAct" +,Japan,JPN,Japan Society for the Promotion of Science,25,13,13,"22659240, 28234924, 33196844, 33511845, 30046160, 29206899, 23911837, 28481528, 33002111, 30371824, 29668970, 30407557, 29216398","CancerProView, DNApod, dbCNS, FMODB, KampoDB, MOSAIC, PTP-central, PubChemQC, PyDISH, ViBrism, MitoFish, FANTOM5, ATTED-II" +,Japan,JPN,Japan Agency for Medical Research and Development,13,10,10,"32026396, 33511845, 29532461, 33740463, 31640730, 33661371, 33179747, 30462320, 33166387, 33156332","BSM-Arc, FMODB, HpBase, ICSCB, NARD, OryzaGenome, jMorp, COXPRESdb, International Nucleotide Sequence Database Collaboration, DDBJ" +,Japan,JPN,Japan Science and Technology Agency,12,10,9,"31978081, 33174597, 30046160, 30295851, 33645624, 29668970, 33125071, 30357349, 33156332, 30321428","AOE, GlycoPOST, KampoDB, jPOST, KAIKObase, MitoFish, GlyTouCan, DDBJ, KEGG" +,Japan,JPN,"Ministry of Education, Culture, Sports, Science and Technology",6,6,6,"22659240, 29532461, 33002111, 33179747, 33166387, 33156332","CancerProView, HpBase, PyDISH, jMorp, International Nucleotide Sequence Database Collaboration, DDBJ" +,Japan,JPN,JSPS,12,5,5,"31680137, 29206899, 28499913, 33211864, 29216398","ANISEED, MOSAIC, Soybean Proteome Database, FANTOM5, ATTED-II" +,Japan,JPN,National Bioscience Database Center,4,4,4,"33125081, 30462302, 33125071, 33166387","KEGG, MBGD, GlyTouCan, International Nucleotide Sequence Database Collaboration" +,Korea,KOR,National Research Foundation of Korea,16,9,9,"31599923, 26272709, 30418591, 33137185, 23219992, 30733462, 30602089, 31680157, 29156309","BiomeNet, GenomewidePDB, HumanNet, iCSDB, MENT, PFDB, STADIUM, ChimerDB, SMBP" +,Netherlands,NLD,Dutch Research Council (NWO),18,14,14,"28376796, 27995664, 23774715, 31095607, 27899646, 29688353, 33439542, 31612915, 33822911, 25352545, 30395294, 26919060, 26496949, 31701148","AHCODA-DB, AraQTL, Autism Brain Imaging Data Exchange, CiliaCarta, FAIRDOMHub, FSD, HDG, MIBiG, MENSAdb, BDB, antiSMASH, MSeqDR, KLIFS, JASPAR" +,Netherlands,NLD,Graduate School for Experimental Plant Sciences,3,3,3,"33010170, 31612915, 33152079","BiG-FAM, MIBiG, antiSMASH" +,Norway,NOR,Norges Forskningsr√•d,3,3,3,"32632099, 34583740, 28651544","EBRAINS, FPADMET, SalmoBase" +,Poland,POL,Narodowe Centrum Nauki,14,4,4,"32499815, 33502860, 29624889, 26141515","LuluDB, InterMetalDB, PhyMet2, mirEX" +,Portugal,PRT,Funda√ß√£o para a Ci√™ncia e a Tecnologia,11,5,5,"33068420, 31713636, 34782688, 29899596, 33822911","chewie-NS, DisProt, CyFi-MAP, LEGE, MENSAdb" +,Russia,RUS,Russian Science Foundation,11,9,7,"34158935, 29401218, 31588507, 31598695, 28011601, 28110602, 33242091, 33231677, 30759212","GEMI, MutHTP, VDJdb, MirGeneDB, CSDB_GT, SitEx, GTRD" +,Saudi Arabia,SAU,King Abdullah University of Science and Technology,4,3,3,"33929018, 31160594, 30329098","IBDDB, PathoPhenoDB, LncBook" +,Spain,ESP,Instituto de Salud Carlos III,7,5,5,"30715274, 32076423, 33252190, 30357370, 34332522","APID, FHLdb, GRINdb, liqDB, PACHIN" +,Spain,ESP,"Ministerio de Ciencia, Innovaci√≥n y Universidades",4,3,3,"33942873, 33655207, 31647096","CANNUSE, TMSNP, proGenomes2" +,Spain,ESP,Ministerio de Econom√≠a y Competitividad,3,3,3,"31713636, 30335169, 28943872","DisProt, PopHumanScan, MAHMI" +,Spain,ESP,MINECO,3,3,3,"34679164, 30020414, 31680165","T-ARDIS, SKEMPI, DisGeNET" +,Spain,ESP,Generalitat de Catalunya,5,5,5,"33942873, 21491493, 30335169, 31608375, 32786900","CANNUSE, Noncoded Amino acids Database, PopHumanScan, GSAD, BCE" +,Sweden,SWE,Vetenskapsr√•det,4,4,4,"31713636, 28182744, 32016318, 33539890","DisProt, OMDB, VariBench, FunCoup" +,Switzerland,CHE,Swiss National Science Foundation,71,36,30,"32449934, 33037820, 26527719, 31665454, 30357342, 26582924, 27504778, 31353404, 22096233, 33156326, 28086860, 31617559, 24225318, 27899579, 23180783, 26578555, 28985418, 23193254, 32117874, 27899580, 28053161, 31410491, 24270792, 33270111, 33196836, 24234447, 23193273, 33180112, 33174605, 23180791, 27899657, 30664776, 30418610, 30395283, 25378343, 25428351","ASAP, Bgee, CEGA, ChlamDB, CoevDB, enviPath, GNPS, MAdb, MINAS, MetaNetX/MNXref, NaDH, PolyASite, Selectome, SNP2TFBS, SwissRegulon, SugarBindDB, SysteMHC, UCNEbase, GDB17, OrthoDB, GETPrime, ABCD, Gene3D, GENCODE, UniCarbKB, EPD, Europe PMC, OMA, GPCRdb, eggNOG" +,Switzerland,CHE,Swiss Institute of Bioinformatics,3,3,3,"33037820, 33174605, 30476243","Bgee, OMA, STRING" +,Switzerland,CHE,Swiss Federal Government,3,3,3,"33156326, 33290552, 33237286","MetaNetX/MNXref, GOC, UniProtKB" +,Taiwan,TWN,"Ministry of Science and Technology, Taiwan",20,11,11,"28704505, 33035337, 34976312, 34266386, 34025934, 30587128, 28194231, 30846808, 27392072, 30048518, 34384382","CSmiRTar, DockCoV2, LCMD, MitoTox, MycoTRAP-DB, PSRN, SkinSensDB, TACCO, YCRD, YARG, OrchidBase" +,Taiwan,TWN,Taiwan Ministry of Science and Technology,9,3,3,"31976536, 29648583, 34285772","HBDB, LipidPedia, Yeast Phosphoinositide-Binding Proteins" +,UK,GBR,Biotechnology and Biological Sciences Research Council,528,179,117,"22682155, 22345505, 25414324, 24265221, 33176685, 28708831, 25414348, 27189610, 26794641, 29040563, 32754757, 23180789, 31095607, 25270877, 25348397, 33084893, 23161684, 23203878, 25232097, 21803806, 22080548, 22110040, 22139938, 22912585, 23203866, 25348407, 25361971, 25432969, 26590404, 27899646, 30008982, 30349118, 30357393, 32726198, 34220930, 25189782, 26314736, 26578596, 27189608, 30364992, 22096229, 22494395, 23452239, 23667450, 27484196, 27899604, 28806134, 30252093, 23060735, 24330312, 30418645, 31612915, 33502607, 22674824, 25172923, 25399418, 26476444, 31584092, 32693783, 33749993, 21801404, 21980353, 23674503, 25414340, 25558364, 30053269, 27097230, 29057095, 29228298, 30321422, 33416848, 22121217, 22140109, 23766369, 24194607, 25300487, 25414345, 26123534, 28013278, 22080546, 24304889, 25300491, 26452372, 27899279, 30398663, 31696235, 32548865, 22067447, 23193253, 23203987, 25352543, 25635527, 26673716, 27794045, 27899622, 29112718, 29145643, 30020414, 30423142, 30476227, 31642470, 31724711, 32728249, 33290552, 30395294, 31701150, 26467479, 24297252, 24270792, 31733063, 30298402, 33270111, 25428371, 23109552, 27613420, 29106550, 30445555, 26657633, 26481351, 29858801, 26582919, 24217918, 23203882, 33237286, 24163254, 24316576, 27794554, 30357350, 29927072, 24275495, 28077563, 23193272, 26476458, 30217829, 24214989, 26578585, 27899635, 29140473, 23630246, 30395289, 29858800, 26888907, 30395267, 33125078, 33211869, 22096232, 25428363, 27450113, 30395331, 26615190, 29112716, 30398656, 31701148, 22064864, 29165610, 26527722, 31641782, 26896847, 33106848, 24288371, 25723102, 27980099, 29069413, 27899630, 33156333, 23180798, 33170273, 29155950, 29140475, 30407521, 30395270, 31691826, 31722421, 30395287, 26578574, 33175160, 29092050, 31598706, 22086963","AFFINOMICS, Arabidopsis Network Analysis Pipeline, Araport, ArchDB, AVIMM, biochem4j, BioModels, BioSharing, BRAINS, CAZypedia, CerealsDB, ChEBI, CiliaCarta, CODEX, ComPPI, crisprSQL, dcGO, D(2)P(2, DAA, Gee Fu, ENA, ELM, Gene3D, GenDR, FlyAtlas, Genome3D, GeneFriends, Ensembl Plants, FunTree, FAIRDOMHub, EMDB, GeneATLAS, GENCODE, EnteroBase, FAANG, GOBLET, GlycoMob, HPMC, HLA-ADR, GPs, InterPro, InterStoreDB, LjGEA, LipidHome, iLIR, IPD-MHC, iLIR@viral, iProX, MetaboLights, Marmal-aid, MemProtMD, MIBiG, Missense3D-DB, PCDDB, Naked Mole Rat Genome Resource, OMA, PDBe, PDBe-KB, ORDER, OmniPath, PICCOLO, pubmed2ensembl, PhosphoGRID, PHI-base, PREDICTS, PITDB, SignaFish, SalmoNet, SeedStor, RetroRules, SARS CoV-2, Stem Cell Discovery Engine, TAIR, TIMBAL, TreeFam, Super Natural II, SUPERFAMILY, The Mouse Genomes Project, ThaleMine, International Nucleotide Sequence Database Collaboration, Expression Atlas, TrypanoCyc, WaspAtlas, WormBase ParaSite, CATH, MGnify, UK Immunological Toolbox, Ensembl Genomes, LAMP, Ensembl, RNAcentral, ARN, Pfam, FANTOM5, UniProt, Rfam, MEROPS, SKEMPI, miRBase, BioGRID, WormBase, SCOP, ENCODE, GOC, antiSMASH, eggNOG, AFND, Gramene, PRIDE, UniProtKB, ArrayExpress, GlyTouCan, OAS, JASPAR, IPD, BioSamples, PDB" +,UK,GBR,Wellcome Trust,267,152,100,"26586806, 29126148, 23486613, 22718786, 25270877, 25355519, 23868908, 24319146, 22962312, 23074185, 23193291, 25505093, 28299908, 22080548, 22116062, 22139938, 22912585, 23161689, 23175615, 25348407, 25378340, 30008982, 30357393, 32726198, 34493866, 22923302, 23193293, 26342919, 26578596, 27733501, 27976751, 29040670, 23667450, 23998809, 27638885, 21856757, 22080565, 23935057, 30418645, 24229347, 24297257, 25172923, 25399418, 26476444, 31584092, 22363733, 23674503, 24304897, 25361970, 25593348, 28748223, 31598690, 23203869, 24504151, 30481257, 33416848, 24194607, 26123534, 26249811, 26582922, 28985418, 33095862, 22080546, 24194600, 24318814, 25300491, 25348409, 30398663, 30601939, 22067447, 23203987, 23245209, 24214965, 24259432, 25352543, 26673716, 27899622, 28981707, 29145643, 29533231, 30407529, 33206959, 33211879, 33290552, 24150940, 23203883, 24270792, 29161421, 33270111, 29121237, 23087376, 31584097, 26919060, 33952332, 31680154, 26657633, 26481351, 23203882, 24163254, 24316576, 30304474, 27899562, 27794554, 30357350, 24265224, 24311564, 27899567, 27899578, 27903906, 33180112, 27899635, 29140473, 29165655, 30395289, 26888907, 30398643, 33125078, 22086950, 27789705, 27450113, 30395331, 30371878, 29112716, 29761457, 30398656, 33166387, 26527722, 26896847, 25883136, 33106848, 24288371, 25723102, 23125362, 23584835, 23193274, 30357387, 23161678, 33156333, 23180798, 33170273, 29155950, 21936816, 24157837, 25348405, 30407521, 26527717, 31691826, 31722421, 26578574, 33175160, 31598706, 22086963","BreCAN-DB, CCDS, CGOB, ChromoHub, CODEX, COSMIC, CREDO, CSA, DECIPHER, DARNED, DGVa, diXa, DrugAge, ENA, GeneDB, Gene3D, GenDR, GeneTack, EuPathDB, Genome3D, Europe PMC, EMDB, GENCODE, EnteroBase, eQTL, HSPIR, HAGR, Geroprotectors, HPMC, HipSci, Hepitopes, ICTV, LipidHome, LongevityMap, IGSR, modENCODE, modMine, metabolicMine, MemProtMD, NeuroGeM, NECTAR, Naked Mole Rat Genome Resource, OMA, PDBe, PDBe-KB, PrionHome, PhosphoGRID, PPD, PomBase, PlasmoGEM, PhenoPlasm, ProCarbDB, SIFTS, SATuRN, SkeletalVis, SARS CoV-2, TreeFam, The Mouse Genomes Project, SurvCurv, SureChEMBL, SysteMHC, ThermoMutDB, International Nucleotide Sequence Database Collaboration, IMPC, Electronic Mouse Atlas of Gene Expression, TrypanoCyc, Human Disease Ontology, CATH, UbiHub, Ensembl Genomes, Ensembl, HGNC, ChEMBL, RefSeq, RNAcentral, Pfam, UniProt, EPD, MEROPS, wwPDB, BioSamples, IntAct, ArrayExpress, GOC, IUPHAR-DB, MSeqDR, SANCDB, TDR Targets, Expression Atlas, PRIDE, RepeatsDB, InterPro, JASPAR, CARD, PDB, FANTOM5, Rfam, ENCODE, Gramene, UniProtKB" +,UK,GBR,Medical Research Council,127,85,62,"29040693, 34528715, 26794641, 31095607, 25270877, 23019048, 21249531, 23529715, 29385418, 31664080, 24174536, 25348407, 26590404, 30008982, 30349118, 30357393, 32726198, 24217912, 25189782, 26578596, 27976751, 29040670, 24194598, 22121219, 23060735, 25542617, 27799474, 29897419, 31504189, 24229347, 24297257, 26476444, 31584092, 22363733, 25593348, 31598690, 26746786, 34844637, 26123534, 29087479, 33095862, 24194600, 24318814, 27899279, 21447597, 24234449, 30407529, 31642470, 31724711, 32728249, 33290552, 24214962, 24270792, 25388151, 31733063, 30298402, 33270111, 31612961, 26531826, 26432830, 23109552, 26919060, 30445555, 24265223, 26935103, 24265224, 22067452, 23650175, 27899567, 29140473, 30398659, 22102590, 22127867, 24194605, 32486891, 27450113, 30395331, 30398656, 31701148, 29069413, 26555599, 23161678, 33156333, 26578574, 31598706","AmyPro, BRAIN UK, BRAINS, CiliaCarta, CODEX, CRCgene, DIADEM, Database of Instruments for Resource Use Measurement, DIBS, dendPoint, GeneProf, Genome3D, FunTree, EMDB, GeneATLAS, GENCODE, EnteroBase, HPO, GOBLET, HPMC, Hepitopes, ICTV, JASPAR, MitoMiner, MetaboLights, MSeqDR, mirDNMR, MARDy, MeLAD, NeuroGeM, NECTAR, PDBe, PDBe-KB, PrionHome, PlasmoGEM, ProCarbDB, RVS, recount3, The Mouse Genomes Project, STCRDab, ThermoMutDB, IMPC, Electronic Mouse Atlas of Gene Expression, WormBase ParaSite, UniProtKB, FlyBase, BioSamples, WormBase, SCOP, ENCODE, GOC, ELM, Gene3D, EVpedia, GWAS Central, SUPERFAMILY, CARD, Reactome, PDB, InterPro, dbNSFP, Ensembl" +,UK,GBR,Cancer Research UK,29,18,16,"32507889, 25270877, 23019048, 24122843, 26590264, 31504189, 31584092, 22544707, 26123534, 26826444, 24163255, 25332396, 33237329, 25414341, 29059374, 26578585, 29858800, 31680160","articles.ELM, CODEX, CRCgene, HTS-DB, MutationAligner, MeLAD, PDBe-KB, SNPnexus, The Mouse Genomes Project, IPD-IMGT/HLA, PED, BCCTBbp, MobiDB, IPD, Gene3D, ELM" +,UK,GBR,British Heart Foundation,25,22,14,"22123736, 24217912, 26314736, 22954629, 23619930, 24297257, 31584092, 22881376, 27899622, 25378336, 23087376, 33237286, 24234451, 27899567, 22102590, 30395267, 30395331, 23161681, 23161678, 24253303, 25348405, 30395287","GOA, HPO, GlycoMob, MaConDa, MRIdb, NECTAR, PDBe-KB, UCL LDLR, UniProt, IUPHAR-DB, UniProtKB, IntAct, GOC, RNAcentral" +,UK,GBR,Natural Environment Research Council,19,12,12,"26820405, 33167031, 25189782, 21707958, 29897419, 33084905, 23193267, 25558364, 25740460, 30094004, 25025376, 26578574","Biosurveillance Analytics Resource Directory, COG, GOBLET, Littorina sequence database, MARDy, MeDAS, PR(2, PREDICTS, PhytoREF, SuperbaSE, tropiTree, Ensembl" +,UK,GBR,Engineering and Physical Sciences Research Council,21,11,11,"27246819, 27899646, 30418645, 31584092, 24214988, 25414345, 29316788, 24214965, 34655133, 30395294, 31740968","BioHub, FAIRDOMHub, MemProtMD, PDBe-KB, SAbDab, SUPERFAMILY, SynBioHub, ChEMBL, OAS, antiSMASH, SEVA-DB" +,UK,GBR,Parkinson's UK,5,5,4,"27899622, 25378336, 27899567, 30395331, 25348405","UniProt, GOA, GOC, UniProtKB" +,UK,GBR,Cystic Fibrosis Trust,4,3,3,"34782688, 31598690, 33416848","CyFi-MAP, ProCarbDB, SARS CoV-2" +,UK,GBR,UK Royal Society-Newton Advanced Fellowship,3,3,3,"31584086, 31640808, 33175170","PGG.Han, PGG.SNV, 2019nCoVR" +NIH,US,USA,NIGMS NIH HHS,571,255,220,"28977551, 27507885, 32765587, 22760305, 23774715, 25378330, 25516260, 23087378, 25414348, 29637199, 24214957, 24994456, 27050421, 25392415, 29509874, 23794735, 27701074, 23185041, 24470572, 30486838, 23203874, 29092931, 23661693, 32073269, 25333826, 32890396, 26602695, 23172289, 23264352, 24225319, 25887129, 25951377, 26019122, 26030752, 26503248, 26503254, 26946289, 27907889, 30053267, 31612957, 33119734, 33174603, 33995899, 21177656, 21994220, 22748121, 23143106, 23197660, 23245398, 23794736, 24928188, 25399415, 26780094, 27141961, 29337142, 29890119, 30008982, 30407583, 30951672, 31642488, 21450710, 22846459, 23104379, 23118488, 23504933, 23958730, 25971743, 26212453, 27504778, 30418591, 31841142, 33119754, 33125055, 33973408, 21177658, 22058127, 22096229, 22570419, 23599502, 23991755, 24002112, 24194598, 25166490, 25707505, 26173699, 26362267, 26582920, 27789704, 27841751, 28150246, 28592645, 30397019, 30535108, 32502232, 22146221, 22701463, 23044546, 23143105, 23875173, 25098325, 25378301, 25559128, 26527726, 26590264, 29126312, 29155944, 29206899, 29284660, 30268942, 31679514, 31754718, 33174596, 22712730, 22833564, 24271386, 24271398, 24406170, 24939129, 25102069, 26476444, 26615193, 27152146, 29106626, 29487113, 31584092, 33361798, 21993301, 22086960, 22559792, 23193263, 23426257, 23624946, 24163250, 24227675, 24304897, 25392411, 26112452, 27026615, 28365761, 29069441, 29575358, 30357353, 30985146, 21472436, 22127861, 22140105, 22976082, 24060102, 24194593, 24271399, 25309735, 25414355, 26138588, 26527724, 27010673, 29222504, 29733404, 30202870, 30329093, 31494246, 32345346, 32785571, 34514416, 34844637, 21253873, 23118483, 24223973, 24225317, 25392422, 27188311, 28985418, 30871473, 31240309, 21276248, 22064863, 22067444, 22135298, 23226127, 23550210, 24243849, 24285306, 25392405, 26227548, 26590254, 27899583, 33211851, 21447597, 23730305, 23868073, 24185695, 24304899, 24356117, 25428361, 26578587, 26590263, 27899622, 27924039, 28160322, 29059334, 30668832, 31665425, 32558264, 32728249, 33290552, 33305318, 34314492, 34529321, 31701150, 30407596, 23494302, 33270111, 29145615, 30417254, 31504780, 30053264, 25501940, 26546518, 25514926, 24259431, 32621232, 33237286, 25326323, 27914894, 23175613, 24270788, 22067456, 22102576, 23042674, 26553799, 24214955, 27515742, 23476021, 27899567, 25378335, 26476458, 27899635, 27924014, 28891124, 28713666, 30395289, 22102590, 27651457, 26578592, 22127867, 27450113, 30395331, 29106616, 33125071, 30476243, 23161681, 24253303, 25348405, 23193289, 26519399, 30395287","AAgMarker, ADPriboDB, Analysis of Breast Cancer GWAS, AraPath, Autism Brain Imaging Data Exchange, Binding MOAD, Bioclock, BioLiP, BioModels, BiOnIC, bNAber, BorreliaBase, C-terminome, Cancer3D, CarbonylDB, CARLSBAD, CeNDR, ChemProt-2.0, ChEpiMod, CHESS, CIL-CCDB, Cistrome Cancer, CMAP, COLMAR Lipids, Complex Mixture Analysis by NMR, CoV3D, CRN, dictyBase, DegraBase, DNASU, dbVOR, DeTEXT, DX, DMD, dbMAE, Digital Development, dbPEC, denovo-db, dbCAN-seq, DNAproDB, DescribePROT, Datanator, DRscDB, Gene Expression Barcode, FlyExpress, Genes2FANs, EcoCyc, EcoGene, Genome-Wide Docking Database, ESCAPE, FreeSolv, GenoBase, GenomeSpace, Enrichr, Express, FlyXCDB, EMDB, FusionGDB, exRNA Atlas, ExonSkipDB, hmChIP, HINT, GFDB, HEXEvent, Human Proteinpedia, HippDB, GLASS, Histone Antibody Specificity Database, GNPS, HumanNet, GlyMDB, HeRA, HbVar, hu.MAP, IsoBase, MACiE, InterPro, LAHEDES, INstruct, IIMDB, M2SG, JASPAR, isoMETLIN, IIIDB, IMG-ABC, ImmuNet, iGNM, KERIS, IRRMC, iPTMnet, ISVdb, iProteinDB, InteracDome, LabxDB, MnM, Medicago PhosphoProtein Database, miR-EdiTar, MonarchBase, MelanomaDB, MediaDB, miRDB, MUFOLD-DB, MouseNet, MutationAligner, Met-DB, MIST, MOSAIC, ModERN, MiPanda, MaveDB, MiST, MitoCarta, PeanutDB, NESdb, OnTheFly, NeXO, PeptiSite, PeptideAtlas, Panorama, PDBe, PDBFlex, NCRO, PAMBD, Panorama Public, PDBe-KB, Open Cancer TherApeutic Discovery, Polbase, PrimerBank, PKKB, PrePPI, Planform, PMP, RADAR--a, PhosphoNetworks, PPD, PyIgClassify, PlantOrDB, PheKB, PhagesDB, PolyA_DB, ProtaBank, piRTarBase, ProteinExplorer, SBKB, RNA CoSSMos, ScerTF, RMDB, RegPrecise, SelenoDB, SFLD, SkateBase, rrnDB, SmedGD, RegulonDB, SM-TF, RNA Structurome Database, SPAR, SequencEnG, REDfly, SliceIt, REPIC, SELAdb, SCISSOR√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¢, recount3, TGED, Spliceosome, SWEETLEAD, TCDB, tRFdb, Structure Surfer, SysteMHC, TADKB, TCR3d, ZFNGenome, EcoliWiki, zfishbook, PhosphoSitePlus, UCLA Multimodal Connectivity Database, cBioPortal, P(3)DB, PortEco, non-human primate reference transcriptome resource, DOCKGROUND, UET, XTalkDB, WikiPathways, UniProtKB, SGR, PANTHER, NDB, SCOPe, dbSNP, GRASP, BioGPS, Lynx, UniProt, FlyRNAi, ATLAS, MetaCyc, DASHR, TFBSshape, PDB, ENCODE, GOC, PED, GlyTouCan, antimicrobial peptide database, GENCODE, iSyTE, VIOLIN, CTD, LNCipedia, EnhancerAtlas, PLncDB, MeT-DB, STRING, Gramene, PRIDE, FlyBase, MODOMICS" +NIH,US,USA,NCI NIH HHS,281,137,127,"20949389, 32765587, 31586392, 22760305, 33037820, 24214957, 34903605, 25392415, 25190456, 26519468, 23893318, 33086069, 24470572, 30486838, 30202990, 26450965, 29092931, 30668638, 23630576, 33095860, 23197658, 23264352, 23550061, 24122041, 25990557, 26503248, 26503254, 29485625, 30371892, 33787872, 21177656, 22110038, 23340253, 26780094, 27141961, 28453687, 30407583, 30951672, 31642488, 21591763, 22165817, 22846459, 23118488, 25971743, 26212453, 30247654, 30418591, 31509535, 22570419, 23599502, 29136207, 30535108, 34127402, 21546393, 23044546, 25332399, 26590264, 27907895, 29126312, 29155944, 29284660, 30268942, 31588509, 32986834, 33174596, 33514395, 34349127, 21491493, 22517761, 23196988, 24271386, 25102069, 25527095, 26072489, 26578589, 27152146, 29186335, 31584092, 31647099, 33245774, 33442735, 21656910, 23193263, 24227675, 24304897, 27789569, 29077937, 32358997, 24194593, 27377064, 29059366, 32345346, 34014674, 22121217, 22786849, 24013925, 25392422, 26208906, 27168721, 27733502, 28985418, 29036590, 29092939, 30053266, 33156327, 21276248, 22135298, 23066107, 23110975, 23550210, 26590259, 27789702, 30462313, 27794042, 27924039, 31691815, 32728249, 33290552, 33849445, 33151287, 26302176, 25514926, 26590405, 31308250, 29136208, 25414341, 33290554, 27899562, 24163257, 27587585, 25378335, 24285300, 25428374, 25348401, 26578600, 27899570, 29092072","ACSR, Analysis of Breast Cancer GWAS, APAatlas, AraPath, Bgee, bNAber, Cancer-Immu, Cancer3D, CCGD, ccmGDB, CDSA, CellMiner-SCLC, ChEpiMod, CHESS, ChIPprimersDB, CircNet, Cistrome Cancer, CMEP, CMS, CNCDatabase, DGA, DegraBase, curatedOvarianData, DGIdb, DSigDB, dbMAE, Digital Development, Datasets2Tools, DrugCentral, Drugmonizome, Gene Expression Barcode, GeneSigDB, EDR, GenomeSpace, Enrichr, GDISC, FusionGDB, exRNA Atlas, ExonSkipDB, GlycoFish, HOMER, HINT, HEXEvent, GLASS, Histone Antibody Specificity Database, HACER, HumanNet, GutFeelingKB, LAHEDES, INstruct, LinkedOmics, InteracDome, Immu-Mela, MSigDB, miR-EdiTar, MTB, MutationAligner, mutLBSgeneDB, Met-DB, MIST, ModERN, MiPanda, MutEx, ModelSEED, MitoCarta, MPM, MiREDiBase, Noncoded Amino acids Database, NGS, NURBS, OnTheFly, Panorama, NetGestalt, PAGER, ORegAnno, NCRO, OncoPPi, PDBe-KB, Pathway Commons, PAGER-CoV, O-GlcNAcAtlas, QuAD, PrePPI, PhosphoNetworks, PPD, PMKB, PICKLES, ProNetView-ccRCC, SelenoDB, RID, SBCDDB, REPIC, SistematX, Stem Cell Discovery Engine, Transcriptomine, targetHub, tRFdb, TANRIC, TCCR, SZGR, SysteMHC, TissGDB, TCPA, TC3A, TCRD, ZFNGenome, PhosphoSitePlus, TSGene, MGI, cBioPortal, UCSC Genome Browser, Cistrome, Cistrome DB, RCSB PDB, FlyRNAi, Reactome, ENCODE, GOC, TANTIGEN, MPD, IPD, PANTHER, ChEMBL, GXD, REDIportal, MeT-DB, MGD" +NIH,US,USA,NHGRI NIH HHS,422,194,120,"30371900, 30534948, 25477388, 21233089, 29637199, 29126148, 22140108, 24470572, 30486838, 23508969, 26855883, 23172289, 24122041, 27766955, 27907889, 31612957, 32227657, 33995899, 21994220, 22110038, 23161689, 23203885, 24243844, 26780094, 27664130, 30357393, 21450710, 22064851, 22123736, 24217912, 26673694, 28967693, 30445434, 34859531, 22792232, 27841751, 21856757, 22080565, 22140101, 24203705, 25542617, 26656948, 29155944, 29206899, 29284660, 31679514, 24939129, 25428349, 26578589, 29487113, 31647099, 21624156, 23378291, 24163250, 24227675, 27026615, 27799469, 28862395, 22140105, 23203869, 26097510, 29040625, 32345346, 22140109, 23197656, 26516187, 29092939, 30052772, 30053266, 34154643, 22067444, 22075990, 22080546, 22736877, 23110975, 24194600, 24267744, 25348409, 25378322, 25392405, 26590259, 30462313, 31283070, 21447597, 22102583, 22110037, 23074187, 23203987, 23245209, 24007337, 24234449, 25214827, 25352543, 26612867, 27794045, 27899622, 28150237, 29069475, 29077884, 29112718, 29145643, 31642470, 31665425, 31691815, 32728249, 33021634, 33261662, 33290552, 34387941, 23494302, 33270111, 25378336, 31584097, 26919060, 31680153, 23175610, 26481351, 33221922, 24217918, 33237286, 23487186, 27899582, 24316576, 30304474, 33290554, 26935103, 21672956, 27794554, 33436076, 30357350, 23161672, 22067452, 24214955, 27515742, 31713622, 23193272, 21520341, 27899567, 24285300, 34698891, 25428374, 28713666, 22102590, 24265222, 28838067, 26578592, 22127867, 30395267, 23203985, 23143107, 33211869, 24194605, 32486891, 29126249, 25361974, 23843252, 30395331, 30398656, 25348401, 29165610, 23161681, 33170210, 33106848, 22135293, 25414346, 29069413, 29145629, 23193274, 26555599, 23161678, 26578600, 33170273, 24253303, 29140510, 30407545, 29155950, 30407594, 22012987, 27899570, 25348405, 26097180, 30407521, 24243840, 29092072, 26519399, 26631132, 31691826, 26087747, 30407599, 30395287, 33231642, 29761460, 22086963, 34741192","Ancestral Genomes, atSNP, BARD, BIND, BiOnIC, CCDS, CharProtDB, ChEpiMod, CHESS, CistromeFinder, CressInt, dictyBase, DGIdb, e-GRASP, denovo-db, DNAproDB, dbMTS, DRscDB, FlyExpress, GeneSigDB, GeneTack, factorbook, FireDB, GenomeSpace, ENVO, GENCODE, hmChIP, HaploReg, GOA, HPO, GtRNAdb, HUMA, GWAS, gnomAD, Integrated Microbial Genomes and Metagenomes, IRRMC, modENCODE, modMine, MethylomeDB, MetaRef, MSeqDR, MG-RAST, MIST, MOSAIC, ModERN, MaveDB, PeptideAtlas, OMIM, ORegAnno, Panorama Public, Pathway Commons, PHARE-KB, PhenoDB, RADAR--a, PhosphoNetworks, PheKB, Plant Reactome, PMS_DN, ScerTF, SIFTS, SANCDB, RISE, REPIC, TAIR, TIGRFAMs, SynLethDB, TCPA, Terabase, TC3A, TE Hub, zfishbook, MGD, International Nucleotide Sequence Database Collaboration, KB, MGI, IMPC, VTO, Human Disease Ontology, UniPROBE, non-human primate reference transcriptome resource, UCSC Genome Browser, Cistrome DB, VIPdb, UniProtKB, MPD, SGD, ZFIN, Ensembl, HGNC, GRASP, FlyBase, BioPortal, RNAcentral, Dfam, FANTOM5, UniProt, UniCarbKB, APPRIS, HEDD, Rfam, MEROPS, WormBase, TFBSshape, Reactome, ENCODE, NDB, dbNSFP, GOC, PharmGKB, IGSR, Expression Atlas, Gramene, PANTHER, Pfam, EnhancerAtlas, ArrayExpress, ChiTaRS, InterPro, GOLD, RGD" +NIH,US,USA,National Institutes of Health,195,126,115,"30239679, 33401309, 30371900, 32765587, 31586392, 27193158, 30534948, 29509874, 31210271, 30668638, 33095860, 32392296, 33167031, 30357367, 32890396, 31725864, 33211888, 30371892, 31612957, 33079988, 33119734, 33174603, 33787872, 29337142, 29890119, 30407583, 31642488, 28967693, 29028885, 30418591, 30445434, 31841142, 33119754, 27841751, 30535108, 31701147, 33137204, 33166392, 25542617, 26322134, 28490127, 29206899, 30764761, 30841849, 31042284, 31612915, 31679514, 31754718, 33174596, 31566225, 31647099, 33442735, 27789569, 28365761, 28862395, 30805645, 27010673, 30202870, 30329093, 31494246, 31950189, 31240309, 31490686, 33035346, 33156327, 33729437, 34522848, 26227548, 31598702, 31680168, 31837751, 32386298, 26434508, 28160322, 30476227, 31665425, 31691815, 31740966, 32558264, 33104772, 33290552, 33305318, 34366563, 34387941, 33151287, 33270111, 31114900, 31584097, 31504780, 26919060, 31680153, 33221922, 33237286, 33290554, 27914894, 30357350, 31713622, 28280852, 31777943, 30395289, 31696236, 30398643, 30395331, 33125071, 31777944, 30476243, 33166387, 33170210, 29846728, 33270901, 33106848, 33156333, 33170273, 30371825, 30407545, 30407594, 32128557, 33151290, 33068428, 30407521, 31691826, 30407599, 30395287, 33231642, 31598706, 31713623","AgBioData, ADeditome, Ancestral Genomes, Analysis of Breast Cancer GWAS, APAatlas, ASL-LEX, atSNP, CarbonylDB, Chickspress, CMEP, CNCDatabase, CoCoCoNet, COG, CORUM, CoV3D, CRAFT, CSEA-DB, DrugCentral, DNAproDB, DKK, DescribePROT, Datanator, Drugmonizome, Express, FlyXCDB, FusionGDB, ExonSkipDB, HUMA, HoTResDB, HumanNet, GWAS, GlyMDB, HeRA, IRRMC, InteracDome, LINCS, KinaseMD, LitCovid, MSeqDR, Metabolic In silico Network Expansions, Milk bioactive peptide database, MOSAIC, MDB, mGAP, Microbiome Learning Repo, MIBiG, MaveDB, MiST, MitoCarta, OGRDB, Pathway Commons, O-GlcNAcAtlas, PMKB, PhagesDB, PMS_DN, PKAD, SM-TF, SequencEnG, REDfly, SliceIt, RNA Characterization of Secondary Structure Motifs, TCR3d, TMB, tRFtarget, TCRD, TIE, ToppCell, DOCKGROUND, VISDB, TSEA-DB, VetCOT, MGP Portal, ExoCarta, ATLAS, BioGRID, TFBSshape, Reactome, EnhancerAtlas, PDB, GXD, GOC, PED, DSLD, PharmGKB, GENCODE, IEDB-AR, IGSR, miRDB, Plant Reactome, UCSC Genome Browser, UniProtKB, PANTHER, SCOPe, Pfam, ENCODE, TANTIGEN, ClinVar, PRIDE, MPD, ChEMBL, GlyTouCan, CDD, STRING, International Nucleotide Sequence Database Collaboration, ZFIN, CTD, RefSeq, RNAcentral, InterPro, Gramene, PubChem, SGD, Ensembl, MGD, RGD" +NIH,US,USA,Intramural NIH HHS,68,62,46,"22084196, 22139929, 23203889, 23197659, 33086069, 24016071, 28383342, 26582918, 23193260, 25428365, 26438539, 23193291, 24297256, 25010047, 22102591, 22139925, 23193258, 23193275, 24558441, 25982314, 31600197, 21873645, 25220766, 22139910, 23093593, 26048622, 22102570, 31103066, 23044550, 23284744, 32027495, 21177655, 22080546, 23180778, 24304891, 25392405, 22140110, 23696674, 24259432, 24319143, 25352543, 25414350, 25428361, 34366563, 25414356, 27008011, 26302176, 26657633, 23203872, 24198245, 26553804, 22135289, 23193287, 24931982, 29927072, 30774152, 26400175, 31851420, 23180798, 24316578, 25510495, 22121212","BGMUT, BioProject, Bookshelf, CDD, CellMiner-SCLC, CellMinerHCC, CHEAR, ClinVar, Clone, COGs, CRISPRz, DGVa, dbGaP, GBM-BioDP, IBIS, GWASdb, GEO, GTR, HTD, GermlncRNA, iCite, iRefIndex, LabeledIn, NCBI Taxonomy, PaVE, PedsDTI, ProPortal, Placental Atlas Tool, SemMedDB, StRAP, tautomeric, COMBREX, International Nucleotide Sequence Database Collaboration, ZInC, Virus Variation, non-human primate reference transcriptome resource, PubChem, CGD, RefSeq, MMDB, RNAcentral, GenBank, GRASP, DSLD, MTB, Rfam" +NIH,US,USA,NIAID NIH HHS,95,50,44,"30304689, 30357390, 22080559, 30534948, 24214957, 24994456, 31649674, 29997612, 23661693, 32073269, 21782820, 21760913, 26510927, 27504778, 29028885, 22260278, 26362267, 27841751, 24203705, 26656948, 33780471, 21765097, 29106626, 31566225, 22080514, 23219434, 23568467, 26602694, 27053566, 30365026, 30593617, 22139919, 34529321, 24194595, 24270792, 25388105, 31114900, 27679478, 31722416, 24259431, 25555720, 33313778, 31667520, 23110173, 33151284, 22067456, 26433228, 27903906, 25428374, 25273106","10KIP, ALEdb, AspGD, atSNP, bNAber, BorreliaBase, cAb-Rep, CDG, CMAP, COLMAR Lipids, DFRMLI, HelmCoP, HAND, GNPS, HoTResDB, IRD, ImmuNet, IRRMC, MetaRef, MG-RAST, MCPdb, PGAT, PAMBD, OGRDB, PolymiRTS, VirmugenDB, VIOLIN, APD3, Vaxar, Victors, ViPR, Nematode.net, antimicrobial peptide database, Gene3D, EuPathDB, IEDB-AR, MEGARes, VIPERdb, PATRIC, SEED, DBAASP, FlyRNAi, RPFdb, UCSC Genome Browser" +NIH,US,USA,NIDDK NIH HHS,60,35,34,"25333826, 26393351, 27766955, 33079988, 33787872, 22748121, 23794736, 24101916, 30951672, 27504778, 28212602, 30418591, 33125055, 33973408, 34859531, 30397019, 21880229, 24203705, 26322134, 33174596, 21890895, 23196988, 24839966, 26072489, 24227675, 24288368, 30674925, 22786849, 31672983, 22067444, 23180778, 24350770, 32728249, 25388151, 24910945","Complex Mixture Analysis by NMR, dkNET, e-GRASP, DKK, Drugmonizome, Genes2FANs, ESCAPE, FunGene, exRNA Atlas, GNPS, HAPPI, HumanNet, HbVar, hu.MAP, gnomAD, iProteinDB, Monogenic Diabetes Registry, MetaRef, Metabolic In silico Network Expansions, MitoCarta, OPM, NURBS, PCD, PAGER, PhosphoNetworks, RDP, Smooth Muscle Transcriptome Browser, Transcriptomine, SPP, zfishbook, ZInC, Model Organism Protein Expression Database, ENCODE, EVpedia" +NIH,US,USA,NHLBI NIH HHS,145,48,33,"23193282, 26602695, 26946289, 29485625, 33787872, 27141961, 30951672, 21982653, 23504933, 33119754, 33973408, 25707505, 31701147, 34936882, 33514395, 21821666, 25102069, 22086960, 24304897, 24288368, 27643925, 30052772, 21296746, 22102583, 23730305, 33290552, 25388151, 25514926, 21321022, 27587585, 24234451, 21520341, 30395267, 25428363, 23843252, 23255149, 27980099, 26555599, 23161678, 23603846, 23794737, 23881287, 25355511, 27602200, 27736745, 29761460, 31713623, 34741192","Allen Brain Atlas, CRN, dbPEC, Datasets2Tools, Drugmonizome, Enrichr, exRNA Atlas, GenTAC, Human Proteinpedia, HeRA, hu.MAP, IIIDB, LINCS, Lung CellCards, MPM, NeuroPedia, Panorama, PrimerBank, PPD, RDP, StemCellCKB, Terabase, RGD, MPD, SGR, GOC, EVpedia, PhosphoSitePlus, REDIportal, IntAct, dbNSFP, RNAcentral, BioGRID" +NIH,US,USA,NLM NIH HHS,47,40,33,"27050421, 26519468, 23893318, 32392296, 33167031, 33211888, 22110038, 22748121, 26567549, 22058127, 31701147, 33137204, 33166392, 22146221, 27907895, 24406170, 26504143, 21624156, 27733502, 29036590, 23066107, 23175606, 26590254, 31598702, 31680168, 21447597, 26612867, 34314492, 26590405, 22102590, 23203985, 33211869, 33166387, 23161681, 33106848, 29069413, 24253303, 25348405, 33151290, 26519399","C-terminome, ccmGDB, CDSA, CoCoCoNet, COG, CSEA-DB, GeneSigDB, Genes2FANs, GEneSTATION, MACiE, LINCS, KinaseMD, LitCovid, MnM, mutLBSgeneDB, PeptiSite, PDID, PHARE-KB, SZGR, TissGDB, TSGene, WholeCellKB, UET, VISDB, TSEA-DB, UniProtKB, Dfam, GlyTouCan, Rfam, International Nucleotide Sequence Database Collaboration, RNAcentral, WormBase, PubChem" +NIH,US,USA,NIMH NIH HHS,100,30,29,"23193282, 23774715, 29985970, 32386544, 32392296, 26019122, 27907889, 33599246, 24217912, 24336862, 26212453, 33439542, 26362267, 28592645, 22140101, 21821666, 26048622, 26578589, 29370821, 31868683, 22171328, 26323714, 31171447, 23209562, 22102583, 26590263, 26311606, 33221922, 24270788, 27587585","Allen Brain Atlas, Autism Brain Imaging Data Exchange, BrainEXP, CCFv3, CoCoCoNet, DX, denovo-db, Gemma, HPO, GIGA, Histone Antibody Specificity Database, HDG, ImmuNet, ISVdb, MethylomeDB, NeuroPedia, PedsDTI, ORegAnno, PhenoDis, QPN, seeQTL, RNASeqMetaDB, SynGO, Wiki-Pi, MPD, Lynx, UCLA Multimodal Connectivity Database, UCSC Genome Browser, REDIportal" +NIH,US,USA,NCATS NIH HHS,48,30,28,"25378330, 25516260, 25190456, 23893318, 23630576, 23172289, 23197658, 25951377, 30371892, 30951672, 31509535, 31701147, 27069559, 34349127, 25102069, 33245774, 33361798, 27026615, 24223973, 27643925, 33035346, 33156327, 26578587, 26590263, 33151287, 25388151, 27899567, 25378335, 23161678, 30395287","Binding MOAD, Bioclock, CCGD, CDSA, CMS, dictyBase, DGA, DeTEXT, DrugCentral, exRNA Atlas, GutFeelingKB, LINCS, MD-CTS, MiREDiBase, Panorama, PAGER-CoV, Open Cancer TherApeutic Discovery, PheKB, SWEETLEAD, StemCellCKB, tRFtarget, TCRD, BioGPS, Lynx, EVpedia, GOC, MeT-DB, UniProtKB" +NIH,US,USA,NCRR NIH HHS,57,30,28,"23893318, 23203874, 23197658, 23264352, 26946289, 21177656, 22748121, 23104379, 23504933, 27504778, 21880229, 22146221, 21821666, 23674503, 24227675, 25309735, 22434841, 24223973, 26516187, 22135298, 23550210, 25348409, 25392405, 27053566, 21447597, 27924039, 25501940, 23203872, 24316576, 22102590","CDSA, CIL-CCDB, DGA, DegraBase, dbPEC, Gene Expression Barcode, Genes2FANs, GFDB, Human Proteinpedia, GNPS, Monogenic Diabetes Registry, MnM, NeuroPedia, PhosphoGRID, PhosphoNetworks, SkateBase, TGD, SWEETLEAD, SynLethDB, PhosphoSitePlus, cBioPortal, Human Disease Ontology, non-human primate reference transcriptome resource, Vaxar, UniProtKB, FlyRNAi, SFLD, Ensembl" +NIH,US,USA,NIH HHS,47,31,26,"34482425, 26503254, 29485625, 27664130, 24217912, 27841751, 27899569, 29284660, 30841849, 31679514, 23674503, 26251998, 28862395, 24203712, 21276248, 23175606, 25348409, 25392405, 31283070, 27924039, 30476227, 34314492, 30407596, 26919060, 31722416, 23203872, 22067456, 24285300, 25428363, 25723102, 27980099","CrePortal, Digital Development, Datasets2Tools, ENVO, HPO, IRRMC, MEGARes, ModERN, mGAP, MaveDB, PhosphoGRID, PhenomeCentral, PMS_DN, TISdb, ZFNGenome, WholeCellKB, Human Disease Ontology, non-human primate reference transcriptome resource, VIPdb, FlyRNAi, BioGRID, GlyTouCan, Cancer3D, MSeqDR, MGD, FANTOM5" +NIH,US,USA,NIH,60,25,25,"33037820, 29985970, 26855883, 26019122, 33995899, 30951672, 33897975, 29028885, 32502232, 29206899, 31197322, 32436932, 29186335, 31584092, 34514416, 29092939, 30052772, 31171447, 33021634, 34387941, 31722416, 33313778, 28891124, 33211869, 27450113","Bgee, BrainEXP, CressInt, DX, DRscDB, exRNA Atlas, FGDB, HoTResDB, LabxDB, MOSAIC, MetOSite, miRactDB, OncoPPi, PDBe-KB, SCISSOR√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¢, TCPA, Terabase, SynGO, NDB, PharmGKB, MEGARes, VIPERdb, DOCKGROUND, Rfam, PDB" +NIH,US,USA,NICHD NIH HHS,54,26,22,"31680137, 23774715, 22916227, 34482425, 26503254, 26946289, 27907889, 23340253, 24150938, 33973408, 32502232, 28490127, 29145608, 26048622, 23110975, 23125366, 26476456, 23730305, 33104772, 34387941, 30417254, 29761459, 25313157, 24316576, 24163257, 34698891","ANISEED, Autism Brain Imaging Data Exchange, CLEARPOND, CrePortal, Digital Development, dbPEC, denovo-db, EDR, GEISHA, hu.MAP, LabxDB, Milk bioactive peptide database, MeDReaders, PedsDTI, MGI, Xenbase, BiGG, SGR, GXD, PharmGKB, iSyTE, Ensembl" +NIH,US,USA,National Human Genome Research Institute,57,25,22,"30534948, 30486838, 32227657, 30357393, 34859531, 32345346, 34154643, 32386298, 31642470, 33206959, 33261662, 33290552, 34387941, 33221922, 33237286, 30304474, 33436076, 31713622, 33211869, 30395331, 31691826, 30395287, 33231642, 31598706, 34741192","atSNP, CHESS, dbMTS, GENCODE, gnomAD, REPIC, TE Hub, MGP Portal, WormBase, IntAct, dbNSFP, GOC, PharmGKB, UCSC Genome Browser, UniProtKB, HGNC, Dfam, ENCODE, Rfam, Ensembl, MGD, RGD" +NIH,US,USA,National Institute of General Medical Sciences,34,22,20,"30486838, 32073269, 25333826, 33995899, 29337142, 33973408, 25102069, 30985146, 29733404, 31494246, 32345346, 27188311, 30871473, 33211851, 30668832, 33206959, 33290552, 34529321, 33237286, 30395331, 30476243, 30395287","CHESS, COLMAR Lipids, Complex Mixture Analysis by NMR, DRscDB, Express, hu.MAP, Panorama, ProteinExplorer, SPAR, SliceIt, REPIC, Structure Surfer, TADKB, WikiPathways, DASHR, IntAct, GOC, antimicrobial peptide database, UniProtKB, STRING" +NIH,US,USA,NIDA NIH HHS,30,19,17,"26019122, 30951672, 25166490, 22276777, 21821666, 21890895, 25102069, 31259547, 31228159, 31868683, 27643925, 31171447, 22067444, 31598702, 22102583, 26434508, 29136208, 27587585, 31696236","DX, exRNA Atlas, isoMETLIN, miRdSNP, NeuroPedia, OPM, Panorama, PerMM, PhenoGen, QPN, StemCellCKB, SynGO, zfishbook, VISDB, MPD, ExoCarta, REDIportal" +NIH,US,USA,NINDS NIH HHS,58,20,16,"23774715, 23203874, 24991954, 21249531, 24700709, 25166490, 25542617, 25953081, 29145608, 21821666, 26048622, 26323714, 31171447, 23226127, 25392405, 26590263, 26919060, 26311606, 23203872, 24270788","Autism Brain Imaging Data Exchange, CIL-CCDB, CORTECON, DIADEM, DBDB, isoMETLIN, MSeqDR, MIsoMine, MeDReaders, NeuroPedia, PedsDTI, RNASeqMetaDB, SynGO, UCLA Multimodal Connectivity Database, non-human primate reference transcriptome resource, Lynx" +NIH,US,USA,NIA NIH HHS,35,19,16,"30357367, 24678734, 30951672, 32777102, 33174596, 33514395, 25172923, 29370821, 24194593, 29733404, 24203712, 27188311, 23226127, 22102583, 30668832, 25388151, 26311606, 26553799, 31696236","CORUM, EPSLiM, exRNA Atlas, MSK-KP, MitoCarta, MPM, Naked Mole Rat Genome Resource, PhenoDis, SelenoDB, SPAR, TISdb, Structure Surfer, UCLA Multimodal Connectivity Database, MPD, DASHR, EVpedia" +NIH,US,USA,National Institute of Allergy and Infectious Diseases,15,14,14,"30304689, 30357390, 31649674, 33276297, 27841751, 33780471, 31161204, 33206959, 33290552, 33237286, 33772585, 31667520, 33151284, 31598706","10KIP, ALEdb, cAb-Rep, DBCOVP, IRRMC, MCPdb, PRISMOID, IntAct, GOC, UniProtKB, IEDB, PATRIC, DBAASP, Ensembl" +NIH,US,USA,NIDCR NIH HHS,24,14,12,"21544197, 27504778, 33119754, 34032471, 29126312, 31588509, 31754718, 22759918, 22064862, 30417254, 31504780, 24185697, 25428374, 27738138","CORE, GNPS, HeRA, HSP, Met-DB, MutEx, MiST, SSKB, CGD, iSyTE, miRDB, UCSC Genome Browser" +NIH,US,USA,NIEHS NIH HHS,34,16,10,"31133849, 29985970, 23630576, 24101916, 28557712, 25953081, 33361798, 22171328, 24288368, 23093600, 25326323, 27651457, 29846728, 30247620, 33068428, 29351546","BioPlanet, BrainEXP, CMS, FunGene, ICE, MIsoMine, Open Cancer TherApeutic Discovery, seeQTL, RDP, CTD" +NIH,US,USA,NEI NIH HHS,13,9,8,"28977551, 26660198, 23661693, 25887129, 29337142, 29036527, 22276777, 30417254, 27515742","AAgMarker, CB, CMAP, dbVOR, Express, iSyTE, miRdSNP, EnhancerAtlas" +NIH,US,USA,NIAAA NIH HHS,11,7,6,"22080549, 31509535, 31228159, 24146757, 33035346, 22135298, 25514926","GeneWeaver, GutFeelingKB, PhenoGen, SIDD, tRFtarget, PhosphoSitePlus" +NIH,US,USA,NIAMS NIH HHS,6,6,6,"30407583, 31642488, 30397019, 32777102, 33174596, 29487113","FusionGDB, ExonSkipDB, iProteinDB, MSK-KP, MitoCarta, Panorama Public" +NIH,US,USA,National Institute of Mental Health,5,5,5,"32386544, 33599246, 26048622, 29370821, 33206959","CCFv3, Gemma, PedsDTI, PhenoDis, IntAct" +NIH,US,USA,CCR NIH HHS,5,5,5,"23893318, 33086069, 21491493, 27377064, 27587585","CDSA, CellMiner-SCLC, Noncoded Amino acids Database, RID, REDIportal" +NIH,US,USA,NIBIB NIH HHS,11,5,5,"22275896, 26019122, 33174603, 31868683, 27899595","COINS, DX, Datanator, QPN, PANTHER" +NIH,US,USA,National Institute on Aging,14,5,5,"29370821, 29733404, 30668832, 33206959, 31696236","PhenoDis, SPAR, DASHR, IntAct, MPD" +NIH,US,USA,National Cancer Institute,29,23,23,"33086069, 30202990, 30247654, 34127402, 31588509, 32986834, 21491493, 25102069, 27152146, 29186335, 32810235, 33245774, 33442735, 21656910, 32358997, 34014674, 32027495, 33729437, 33206959, 33849445, 33151287, 33237286, 33290554","CellMiner-SCLC, ChIPprimersDB, HACER, Immu-Mela, MutEx, ModelSEED, Noncoded Amino acids Database, Panorama, NCRO, OncoPPi, PCAT, PAGER-CoV, O-GlcNAcAtlas, QuAD, ProNetView-ccRCC, SistematX, tautomeric, TIE, IntAct, TANTIGEN, DrugCentral, UniProtKB, PANTHER" +,US,USA,Howard Hughes Medical Institute,29,26,21,"27907889, 33995899, 27113915, 24002112, 24297255, 33174596, 33780471, 22833564, 24271386, 33653882, 23193263, 28365761, 26138588, 26590259, 26705106, 26612867, 26673716, 27924039, 33221922, 22067452, 25428374, 23203985, 24194605, 24288371, 23125362, 23193274","denovo-db, DRscDB, Hipposeq, M2SG, iPfam, MitoCarta, MCPdb, NESdb, OnTheFly, Mycobacterial Systems Resource, PrePPI, PhagesDB, SmedGD, UCSC Genome Browser, WheatExp, Dfam, Pfam, FlyRNAi, WormBase, Rfam, ENCODE" +,US,USA,PHS HHS,23,19,16,"23193282, 26438539, 26602695, 21249531, 26503248, 23175615, 22610854, 23093593, 23125372, 25414355, 22135296, 25392405, 23180793, 24225323, 24293654, 26578581, 25510499, 25414341, 23110173","Allen Brain Atlas, CRISPRz, CRN, DIADEM, dbMAE, EuPathDB, IEDB-AR, PaVE, non-B, rrnDB, VectorBase, non-human primate reference transcriptome resource, IPD, PATRIC, SEED, DBAASP" +,US,USA,Cancer Prevention and Research Institute of Texas,15,11,11,"33211888, 30418591, 32576192, 33137204, 32810235, 29092939, 31432762, 31672983, 31598702, 31680168, 33021634","CSEA-DB, HumanNet, GPSno, KinaseMD, PCAT, TCPA, tRic, SPP, VISDB, TSEA-DB, NDB" +,US,USA,U.S. Department of Energy,15,8,8,"30239679, 31665416, 32986834, 33104790, 32882008, 30208844, 33137183, 33152092","AgBioData, Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters, ModelSEED, PhycoCosm, TBDB, WoM, IMG/VR, GOLD" +NIH,US,USA,"National Heart, Lung, and Blood Institute",12,11,8,"33973408, 31701147, 25102069, 27643925, 33206959, 33290552, 33237286, 30395331, 27602200, 31713623, 34741192","hu.MAP, LINCS, Panorama, StemCellCKB, IntAct, GOC, UniProtKB, RGD" +NIH,US,USA,National Institute for Health Research (NIHR),9,6,6,"31095607, 24217912, 27976751, 24297257, 25414323, 26919060","CiliaCarta, HPO, Hepitopes, NECTAR, AFND, MSeqDR" +NIH,US,USA,National Institute of Diabetes and Digestive and Kidney Diseases,6,6,6,"25333826, 33973408, 34859531, 33206959, 33290552, 33237286","Complex Mixture Analysis by NMR, hu.MAP, gnomAD, IntAct, GOC, UniProtKB" +,US,USA,NSF,7,6,6,"29206899, 34514416, 31642487, 28891124, 27450113, 33170273","MOSAIC, SCISSOR√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¢, AraPheno, DOCKGROUND, PDB, Gramene" +,US,USA,United States Department of Agriculture,31,5,5,"33068435, 32941621, 30407532, 33170273, 31598706","AcrDB, dbCAN-PUL, MaizeGDB, Gramene, Ensembl" +,US,USA,National Institute of Food and Agriculture,7,5,5,"30321383, 26705106, 30357347, 31647100, 31680153","CuGenDB, WheatExp, GDR, BGD, Plant Reactome" +NIH,US,USA,National Eye Institute,5,5,5,"29337142, 33206959, 33290552, 30417254, 33237286","Express, IntAct, GOC, iSyTE, UniProtKB" +NIH,US,USA,FIC NIH HHS,7,4,4,"27504778, 23161692, 23457042, 22116064","GNPS, SchistoDB, RCPedia, TDR Targets" +,US,USA,U.S. Department of Agriculture,15,4,4,"30239679, 31210271, 28415075, 33264401","AgBioData, Chickspress, HopBase, SoyBase" +,EU,INT,European Commission,7,4,4,"31066443, 33125055, 30020414, 32248568","DrugComb, HbVar, SKEMPI, FINDbase" +,US,USA,Welch Foundation,6,4,4,"29890119, 30418591, 33973408, 33021634","FlyXCDB, HumanNet, hu.MAP, NDB" +NIH,US,USA,National Institute of Health,5,4,4,"34010390, 32436932, 27188311, 30407596","Echinobase, miRactDB, Structure Surfer, Cancer3D" +NIH,US,USA,National Institute on Drug Abuse,4,4,4,"26048622, 31259547, 27643925, 31696236","PedsDTI, PerMM, StemCellCKB, MPD" +NIH,US,USA,NIDCD NIH HHS,10,3,3,"27193158, 23774715, 26323714","ASL-LEX, Autism Brain Imaging Data Exchange, RNASeqMetaDB" +NIH,US,USA,NIMHD NIH HHS,5,3,3,"24994456, 25190456, 25378335","BorreliaBase, CCGD, MeT-DB" +,US,USA,Gordon and Betty Moore Foundation,3,3,3,"27664130, 33166387, 31722421","ENVO, International Nucleotide Sequence Database Collaboration, ENA" +,US,USA,Alfred P. Sloan Foundation,3,3,3,"34158935, 30985146, 30371820","GEMI, ProteinExplorer, UNITE" +,US,USA,Oregon State University,3,3,3,"28415075, 28490127, 26973684","HopBase, Milk bioactive peptide database, FragariaCyc" +,US,USA,Foundation for the National Institutes of Health,6,3,3,"33514395, 31283070, 32486891","MPM, VIPdb, Reactome" +NIH,US,USA,National Institute of Child Health and Human Development,3,3,3,"26048622, 31103066, 34387941","PedsDTI, Placental Atlas Tool, PharmGKB" \ No newline at end of file diff --git a/analysis/funders_geo_counts_2023-02-10.csv b/analysis/funders_geo_counts_2023-02-10.csv new file mode 100644 index 0000000..638e73e --- /dev/null +++ b/analysis/funders_geo_counts_2023-02-10.csv @@ -0,0 +1,30 @@ +"country","country_3","count_agencies","count_resources","agency_names","biodata_resource_names" +"Argentina","ARG",3,6,"Universidad Nacional de Quilmes, Agencia Nacional de Promoci√≥n Cient√≠fica y Tecnol√≥gica, Consejo Nacional de Investigaciones Cient√≠ficas y T√©cnicas","CoDNaS-RNA, MobiDB, PED, articles.ELM, DisProt, ELM" +"Australia","AUS",2,14,"National Health and Medical Research Council, Australian Research Council","AcrHub, DisBind, DEE2, Haemopedia, PRISMOID, ProCarbDB, ThermoMutDB, Vesiclepedia, CropSNPdb, CrustyBase, Microndata, ExoCarta, RaftProt, OGEE" +"Austria","AUT",1,6,"Austrian Science Fund FWF","CANGS, COMMODE, EffectiveDB, EndoDB, PoPoolation, probeBase" +"Belgium","BEL",1,3,"Research Foundation Flanders","DisProt, PDBe-KB, MobiDB" +"Brazil","BRA",2,6,"Coordena√ß√£o de Aperfei√ßoamento de Pessoal de N√≠vel Superior, Conselho Nacional de Desenvolvimento Cient√≠fico e Tecnol√≥gico","CitrusKB, ExVe, PlanTE-MIRDB, Propedia, ThermoMutDB, SpliceProt" +"Canada","CAN",7,48,"Canadian Institutes of Health Research, CIHR, Natural Sciences and Engineering Research Council of Canada, Genome Canada, Canada Foundation for Innovation, Canada Research Chairs, Ontario Research Fund","BacMap, CiliaCarta, DrugBank, CYCLoPs, DNAmod, DIPPER, ECMDB, HAltORF, KID, InnateDB, iRefWeb, IHEC, MOSAIC, MouseBytes, NetwoRx, oRNAment, PhenoM, PhenomeCentral, PseudomonasGenome, SMPDB, TopFIND, SYNERGxDB, YMDB, YeTFaSCo, TrypsNetDB, DGV, CARD, JASPAR, PSORTdb, ChromoHub, CRCgene, MaveDB, NeuroGeM, PhosphoGRID, QPN, AYbRAH, CLRP, Gemma, PASS, ProtDataTherm, IntAct, PED, MRMAssayDB, MarkerDB, PathDIP, UbiHub, IID, Gramene" +"China","CHN",52,411,"National Natural Science Foundation of China, National Key Research and Development Program of China, National Science Foundation, China Postdoctoral Science Foundation, Fundamental Research Funds for the Central Universities, Chinese Academy of Sciences, National Key R&D Program of China, National Key Research and Development Program, Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Harbin Medical University, Natural Science Foundation of Heilongjiang Province, National Key Research Program of China, National Science Foundation of China, China Scholarship Council, International Partnership Program of the Chinese Academy of Sciences, 13th Five-year Informatization Plan of Chinese Academy of Sciences, the National Natural Science Foundation of China, National High Technology Research and Development Program of China, Natural Science Foundation of Guangdong Province, National Key R&D Program, Ministry of Science and Technology, Shanghai Jiao Tong University, Program for Guangdong Introducing Innovative and Entrepreneurial Teams, National Program on Key Basic Research, Beijing Natural Science Foundation, Shanghai Municipal Science and Technology Major Project, CAS, Natural Science Foundation of Tianjin, Huazhong Agricultural University Scientific & Technological Self-innovation Foundation, Priority Academic Program Development of Jiangsu Higher Education Institutions, Shanghai Sailing Program, Shanghai Municipal Education Commission, Zhejiang Provincial Natural Science Foundation of China, Natural Science Foundation, Shanghai Municipal Science and Technology, Science and Technology Program of Guangzhou, Guangdong Introducing Innovative and Entrepreneurial Teams, National Key Plan for Scientific Research and Development of China, National Science Fund for Distinguished Young Scholars, Strategic Priority Research Program of the Chinese Academy of Sciences, the National Key Research and Development Program of China, Zhejiang Provincial Natural Science Foundation, Chinese University of Hong Kong, Fundamental Research Funds, National Programs for High Technology Research and Development, Natural Science Foundation of Shanghai, Beijing Nova Program, Fundamental Research Funds for the Provincial Universities, Heilongjiang Touyan Innovation Team Program, International Science & Technology Cooperation Program of China, Special Project on Precision Medicine under the National Key R&D Program","AddictGene, Animal-ImputeDB, Animal-APAdb, AppleMDO, ASGDB, ASD, ASRD, ASFVdb, ASER, AtCircDB, ATD, ATdb, ATACdb, AtMAD, AWESOME, BBCancer, BC-TFdb, BGVD, BioM2MetDisease, BrainEXP, CancerImmunityQTL, CancerSEA, CancerSplicingQTL, CancerTracer, CanImmunother, CARDIO-LNCRNAS, CardioTF, CATA, CAUSALdb, CeleryDB, Cellinker, CellMarker, CellTalkDB, CFEA, CHDGKB, ChemHub, ChineseGliomaGenomeAtlas, CIGene, CircAtlas, circExp, CIRCpedia, circR2Cancer, CircR2Disease, circVAR, CKTTD, CMAUP, CMBD, CMVdb, CNAdbCC, cncRNAdb, CoFly, ColorCells, CottonGVD, CovalentInDB, CoVdb, CRISPRlnc, CRPMKB, CrusTF, dbPHCC, DNetDB, DisBind, Dynamic-BM, DRDB, dbCRSR, dbLGL, dbCID, dbCPM, DSMNC, dbMPIKT, dbHDPLS, dbInDel, DrugCombDB, DrugSpaceX, CyanoPATH, D3DistalMutation, DevOmics, GAMDB, FVD, EWASdb, EVmiRNA, EndoDB, EWASAtlas, ETCM, ENPD, EnDisease, GEDS, Gene4Denovo, ENdb, FluReassort, ETph, FRCD, ExoceRNAAtlas, EnzyMine, FishDB, FifBase, FAWMine, EyeDiseases, FertilityOnline, EnhFFL, Grape-CRISPR, HCSGD, HEROD, IDPM, HDncRNA, HAMdb, HCCDB, iDog, GMrepo, HybridMolDB, GWASAtlas, gutMDisorder, GESUR, GRONS, HKPocket, HotSpot3D, gutMEGA, hTFtarget, IDDB, GIMICA, GRNdb, HERB, iCysMod, HIR, GPCards, GGVD, HisPhosSite, HFBD, GWH, HBFP, HFIP, LiverAtlas, LnChrom, lncRNAnet, LncACTdb, KnockTF, LnCeVar, MaGenDB, LncTarD, LLPSDB, LncSpA, MACSNVdb, IRESbase, lncR2metasta, LncAS2Cancer, LncSEA, LncExpDB, KNIndex, LnCeCell, InSexBase, M6ADD, ImmReg, MiasDB, MetSigDis, MethCNA, MCENet, MepmiRDB, MDR, MNDR, MosaicBase, miRNASNP-v3, MASI, MloDisDB, MolluscDB, MicroPhenoDB, Molluscamitochondrialdatabase, ncDR, PepBDB, PADFrag, NucMap, OncoBase, OsteoporosAtlas, PGG.Han, PCaLiStDB, PDIR, ncEP, NoncoRNA, Nc2Eye, ncRPheno, ncRI, OncotRF, ncRNAVar, PDmethDB, NPBS, NBIGV, NGD, OGP, PCPD, NUCOME, Nabe, PLNlncRbase, POSTAR2, piRTarBase, qPhos, PhaSepDB, QTLbase, PSMD, PmiREN, prokaryoticantiviraldefensesystem, PGG.SNV, PlantCircNet, PsyMuKB, QSIdb, PROTAC-DB, piRNA-eQTL, PMI-DB, SecReT6, SilkPathDB, RED, RRDB, SEGreg, SCRIPT-MAP, SDADB, realDB, SEdb, SAGD, SNP2APA, SilkDB, SpatialDB, RNAactDrug, RNAInter, RSVdb, saponinmassspectrometrydatabase, RIGD, SC2disease, RMVar, SilencerDB, RASP, riboCIRC, RhododendronPlantGenomeDatabase, RPocket, TarNet, StemCellCKB, SSER, Stress2TF, TRCirc, TransmiR, SymMap, TPIA, TRlnc, TE141K1, TeroKit, TCMIO, TeaCoN, STAB, TCRdb, TransCirc, TISCH, ToxinDB, TarDB, TCM-Blast, TrackingAirPollutioninChina, EOGD, PPGD, TSNAdb, Victors, CistromeDB, UVGD, uORFlight, WeiBI, VirusCircBase, YIR, tsRBase, VARAdb, 2019nCoVR, TUPDB, ViralPutativeG-quadruplex, MPD, LncRNADisease, RPFdb, DoriC, PED, HMDD, WDSPdb, LSD, NPInter, VariBench, deepBase, Lnc2Cancer, MetaADEDB, CEG, UbiNet, SorGSD, BacWGSTdb, ICEberg, piRBase, NONCODE, AnimalTFDB, LncBook, LincSNP, AdditiveChem, ConoMode, EWAS, HVIDB, MENDA, MeDAS, mPPI, PhoPepMass, PID, PSDX, OGEE, GVM, AcrDB, AgBioData, AncestralGenomes, ANISEED, ASNR, CHESS, D-PLACE, dbCAN-PUL, DescribePROT, Datanator, EchinoDB, ENVO, Echinobase, GPs, GutFeelingKB, GlyMDB, GSDB, iTAP, InteracDome, MetabolicInsilicoNetworkExpansions, MOSAIC, MIBiG, MtSSPdb, ModelSEED, PdumBase, OCELOT, ProKinO, PMKB, REDfly, StructureSurfer, SpinachBase, TMB, STAGdb, TBDB, tRFtarget, DOCKGROUND, VPGD, MirGeneDB, MGPPortal, PharmGKB, PDB, NDB, GOC, DrugCentral, PlantReactome, PANTHER, CDD, InterPro, Gramene, Ensembl, iEKPD, MMHub, DrLLPS, EPSD, GreenCircRNA, MeLAD, PTMD, PRISMOID, Plant-ImputeDB, EDK, gcType, GSA, HeteroMeth, GliomaDB, GREG, PDXliver, RabGTD, PhenoModifier, YaTCM, WGVD, LncCeRBase, AnnoLnc, EuRBPDB, iCAV, PLMD, SAGER, THANATOS, TetrahymenaComparativeGenomicsDatabase, SPDB, VPTMdb, CottonFGD, DIPPER, NanDeSyn, coexpressMAP, LncRNA2Target, iProX, DNMIVD, Mr.Vc, PRMdb, DrugComb, ncRNA2MetS, StreptomeDB, gcMeta, BmncRNAdb, COGVIC, CuAS, TeaAS, PlantPAN, EXPath, DriverDB, HFMDB, PlaD, ncRNA-eQTL, KVarPredDB, proGenomes2, CARMO, dbAMP, dbPTM, SpinachDB, HisgAtlas" +"Czech Republic","CZE",1,3,"Ministry of Education","AmtDB, CMEP, PSRN" +"Denmark","DNK",6,29,"Novo Nordisk Fonden, Novo Nordisk Foundation Center for Protein Research, NNF Center for Biosustainability, Novo Nordisk Foundation, Villum Fonden, Lundbeck Foundation","ALEdb, antiSMASH, BiG-FAM, HemaExplorer, GNPS, MIBiG, MtSSPdb, SMBP, TELEMED, BiGG, FANTOM5, GPCRdb, JASPAR, bio.tools, DistiLD, DISEASES, RAIN, STRING, TISSUES, TCRD, miRandola, DrugCentral, eggNOG, STITCH, CMRegNet, CoVex, DIGGER, OMDB, GPCRDB" +"EU","INT",14,111,"European Research Council, Horizon 2020, European Commission FP7, European Molecular Biology Laboratory, European Regional Development Fund, Seventh Framework Programme, Open Targets, FEDER, Innovative Medicines Initiative, European Social Fund, European Union, European Bioinformatics Institute, ELIXIR, European Commission","BACTOME, CancerPanorOmics, CGOB, DrugComb, DualSeqDB, euL1db, enviPath, ENVO, FRED, EndoDB, Fuzzle, iFISH, LoQAtE, JASPAR, InvFEST, LymphoAtlas, MINAS, MyMpn, OMDB, POGO-DB, PhylomeDB, probeBase, SIGNOR, SwissPalm, SweGen, SysteMHC, SynGO, yApoptosis, WALTZ-DB, proGenomes2, ViruSurf, IntAct, eggNOG, PlanMine, GPCRdb, PRIDE, Bgee, DisProt, DIGGER, FireProtDB, FAANG, liqDB, ModelSEED, MESOCOSM, PepTherDia, RetroRules, MGnify, WikiPathways, MobiDB, PED, Pfam, Rfam, ELM, ENA, Ensembl, CADRE, InterPro, MINT, MuteinDB, PORCN, SitEx, TFClass, UniPathway, VectorBase, LOVD, ALSoD, BRAD, UniProtKB, Rhea, BioSamples, eQTL, SKEMPI, GOC, BioModels, GENCODE, IGSR, InternationalNucleotideSequenceDatabaseCollaboration, ArrayExpress, Gramene, 2DProts, AntimicrobialEnzymeCombinationsDatabase, CSVS, MRMAssayDB, MENSAdb, PDBe-KB, Peryton, TMSNP, ValTrendsDB, mirEX, AHCODA-DB, CiliaCarta, CyFi-MAP, NaDH, NvERTx, SulfAtlas, BGD, chewie-NS, SITVIT2, DisGeNET, hPSCreg, UbiHub, gcType, OMEGA-NET, Translocatome, Virxicon, TOXsIgN, wwPDB, PhaSepDB, Reactome, HbVar, FINDbase" +"France","FRA",5,23,"Agence Nationale de la Recherche, Centre National de la Recherche Scientifique, CNRS, French National Research Agency, Fondation pour la Recherche M√©dicale","AgBioData, ANISEED, CALR-ETdb, LeGOO, LymphoAtlas, IsoArcH, MiSynPat, MEGALEX, monoterpeneindolealkaloiddatabase, MtExpress, MESOCOSM, OrthoInspector, ParameciumDB, PhytoREF, RESPIRE, PED, LIMONADA, T1TAdb, CRISPRCasdb, EctoGEM, GRALL, RetroRules, IntAct" +"Germany","DEU",7,33,"Deutsche Forschungsgemeinschaft, Federal Ministry of Education and Research, Bundesministerium f√ºr Bildung und Forschung, Max-Planck-Gesellschaft, Saarland University, German Research Foundation, DFG","AroCageDB, AureoWiki, CiliaCarta, EpiRegio, GH19ED, MitoCarta, OptoBase, OmniPath, QSDB, SCEGRAM, SDRED, Traitpedia, SuperTCM, BacDive, BACTOME, DIGGER, PID-NET, TREND-DB, ProteomicsDB, CoxBase, ExED, eggNOG, STRING, A.P.E.S, MeFSAT, TExAs, AnimalsncRNAAtlas, PLSDB, miRPathDB, MaleFertilityGeneAtlas, StreptomeDB, SynGO, Rfam" +"Hungary","HUN",1,6,"Hungarian Academy of Sciences","DIBS, DisProt, FoldamerDB, MFIB, PhaSePro, ELM" +"India","IND",8,35,"Council of Scientific and Industrial Research, Science and Engineering Research Board, Indian Council of Medical Research, Department of Biotechnology, Department of Science and Technology, University Grants Commission, Department of Biotechnology, Ministry of Science and Technology, Department of Biotechnology, Ministry of Science and Technology, India","BoMiProt, Cancertope, CicerTransDB, circad, HSPMdb, MSDB, mitoepigenomeKB, MorCVD, PanGFR-HM, PVsiRNAdb, PRP, TopicalPdb, LncRBase, databaseofcancermutantproteindomains, FCCP, MeFSAT, SWI/SNFInfobase, miPepBase, PSCRIdb, SPGDB, TrypInDB, BioFuelDB, PtRFdb, TGV, SWITCHES, MPTherm, PROXiMATE, SAPdb, CancerEnD, HPREP, HuVarBase, KiPho, PCOSKBR2, MycoTRAP-DB, SoyTD" +"Italy","ITA",1,7,"Telethon","CiliaCarta, HOCTARdb, HuPho, MANTRA, MINT, RettNetworkedDatabase, IntAct" +"Japan","JPN",6,31,"Japan Society for the Promotion of Science, Japan Agency for Medical Research and Development, Japan Science and Technology Agency, Ministry of Education, Culture, Sports, Science and Technology, JSPS, National Bioscience Database Center","CancerProView, DNApod, dbCNS, FMODB, KampoDB, MOSAIC, PTP-central, PubChemQC, PyDISH, ViBrism, MitoFish, FANTOM5, ATTED-II, BSM-Arc, HpBase, ICSCB, NARD, OryzaGenome, jMorp, COXPRESdb, InternationalNucleotideSequenceDatabaseCollaboration, DDBJ, AOE, GlycoPOST, jPOST, KAIKObase, GlyTouCan, KEGG, ANISEED, SoybeanProteomeDatabase, MBGD" +"Korea","KOR",1,9,"National Research Foundation of Korea","BiomeNet, GenomewidePDB, HumanNet, iCSDB, MENT, PFDB, STADIUM, ChimerDB, SMBP" +"Netherlands","NLD",2,15,"Dutch Research Council (NWO), Graduate School for Experimental Plant Sciences","AHCODA-DB, AraQTL, AutismBrainImagingDataExchange, CiliaCarta, FAIRDOMHub, FSD, HDG, MIBiG, MENSAdb, BDB, antiSMASH, MSeqDR, KLIFS, JASPAR, BiG-FAM" +"Norway","NOR",1,3,"Norges Forskningsr√•d","EBRAINS, FPADMET, SalmoBase" +"Poland","POL",1,4,"Narodowe Centrum Nauki","LuluDB, InterMetalDB, PhyMet2, mirEX" +"Portugal","PRT",1,5,"Funda√ß√£o para a Ci√™ncia e a Tecnologia","chewie-NS, DisProt, CyFi-MAP, LEGE, MENSAdb" +"Russia","RUS",1,7,"Russian Science Foundation","GEMI, MutHTP, VDJdb, MirGeneDB, CSDB_GT, SitEx, GTRD" +"Saudi Arabia","SAU",1,3,"King Abdullah University of Science and Technology","IBDDB, PathoPhenoDB, LncBook" +"Spain","ESP",5,17,"Instituto de Salud Carlos III, Ministerio de Ciencia, Innovaci√≥n y Universidades, Ministerio de Econom√≠a y Competitividad, MINECO, Generalitat de Catalunya","APID, FHLdb, GRINdb, liqDB, PACHIN, CANNUSE, TMSNP, proGenomes2, DisProt, PopHumanScan, MAHMI, T-ARDIS, SKEMPI, DisGeNET, NoncodedAminoacidsDatabase, GSAD, BCE" +"Sweden","SWE",1,4,"Vetenskapsr√•det","DisProt, OMDB, VariBench, FunCoup" +"Switzerland","CHE",3,33,"Swiss National Science Foundation, Swiss Institute of Bioinformatics, Swiss Federal Government","ASAP, Bgee, CEGA, ChlamDB, CoevDB, enviPath, GNPS, MAdb, MINAS, MetaNetX/MNXref, NaDH, PolyASite, Selectome, SNP2TFBS, SwissRegulon, SugarBindDB, SysteMHC, UCNEbase, GDB17, OrthoDB, GETPrime, ABCD, Gene3D, GENCODE, UniCarbKB, EPD, EuropePMC, OMA, GPCRdb, eggNOG, STRING, GOC, UniProtKB" +"Taiwan","TWN",2,14,"Ministry of Science and Technology, Taiwan, Taiwan Ministry of Science and Technology","CSmiRTar, DockCoV2, LCMD, MitoTox, MycoTRAP-DB, PSRN, SkinSensDB, TACCO, YCRD, YARG, OrchidBase, HBDB, LipidPedia, YeastPhosphoinositide-BindingProteins" +"UK","GBR",10,225,"Biotechnology and Biological Sciences Research Council, Wellcome Trust, Medical Research Council, Cancer Research UK, British Heart Foundation, Natural Environment Research Council, Engineering and Physical Sciences Research Council, Parkinson's UK, Cystic Fibrosis Trust, UK Royal Society-Newton Advanced Fellowship","AFFINOMICS, ArabidopsisNetworkAnalysisPipeline, Araport, ArchDB, AVIMM, biochem4j, BioModels, BioSharing, BRAINS, CAZypedia, CerealsDB, ChEBI, CiliaCarta, CODEX, ComPPI, crisprSQL, dcGO, D(2)P(2, DAA, GeeFu, ENA, ELM, Gene3D, GenDR, FlyAtlas, Genome3D, GeneFriends, EnsemblPlants, FunTree, FAIRDOMHub, EMDB, GeneATLAS, GENCODE, EnteroBase, FAANG, GOBLET, GlycoMob, HPMC, HLA-ADR, GPs, InterPro, InterStoreDB, LjGEA, LipidHome, iLIR, IPD-MHC, iLIR@viral, iProX, MetaboLights, Marmal-aid, MemProtMD, MIBiG, Missense3D-DB, PCDDB, NakedMoleRatGenomeResource, OMA, PDBe, PDBe-KB, ORDER, OmniPath, PICCOLO, pubmed2ensembl, PhosphoGRID, PHI-base, PREDICTS, PITDB, SignaFish, SalmoNet, SeedStor, RetroRules, SARSCoV-2, StemCellDiscoveryEngine, TAIR, TIMBAL, TreeFam, SuperNaturalII, SUPERFAMILY, TheMouseGenomesProject, ThaleMine, InternationalNucleotideSequenceDatabaseCollaboration, ExpressionAtlas, TrypanoCyc, WaspAtlas, WormBaseParaSite, CATH, MGnify, UKImmunologicalToolbox, EnsemblGenomes, LAMP, Ensembl, RNAcentral, ARN, Pfam, FANTOM5, UniProt, Rfam, MEROPS, SKEMPI, miRBase, BioGRID, WormBase, SCOP, ENCODE, GOC, antiSMASH, eggNOG, AFND, Gramene, PRIDE, UniProtKB, ArrayExpress, GlyTouCan, OAS, JASPAR, IPD, BioSamples, PDB, BreCAN-DB, CCDS, CGOB, ChromoHub, COSMIC, CREDO, CSA, DECIPHER, DARNED, DGVa, diXa, DrugAge, GeneDB, GeneTack, EuPathDB, EuropePMC, eQTL, HSPIR, HAGR, Geroprotectors, HipSci, Hepitopes, ICTV, LongevityMap, IGSR, modENCODE, modMine, metabolicMine, NeuroGeM, NECTAR, PrionHome, PPD, PomBase, PlasmoGEM, PhenoPlasm, ProCarbDB, SIFTS, SATuRN, SkeletalVis, SurvCurv, SureChEMBL, SysteMHC, ThermoMutDB, IMPC, ElectronicMouseAtlasofGeneExpression, HumanDiseaseOntology, UbiHub, HGNC, ChEMBL, RefSeq, EPD, wwPDB, IntAct, IUPHAR-DB, MSeqDR, SANCDB, TDRTargets, RepeatsDB, CARD, AmyPro, BRAINUK, CRCgene, DIADEM, DatabaseofInstrumentsforResourceUseMeasurement, DIBS, dendPoint, GeneProf, HPO, MitoMiner, mirDNMR, MARDy, MeLAD, RVS, recount3, STCRDab, FlyBase, EVpedia, GWASCentral, Reactome, dbNSFP, articles.ELM, HTS-DB, MutationAligner, SNPnexus, IPD-IMGT/HLA, PED, BCCTBbp, MobiDB, GOA, MaConDa, MRIdb, UCLLDLR, BiosurveillanceAnalyticsResourceDirectory, COG, Littorinasequencedatabase, MeDAS, PR(2, PhytoREF, SuperbaSE, tropiTree, BioHub, SAbDab, SynBioHub, SEVA-DB, CyFi-MAP, PGG.Han, PGG.SNV, 2019nCoVR" +"US","USA",54,570,"NIGMS NIH HHS, NCI NIH HHS, NHGRI NIH HHS, National Institutes of Health, Intramural NIH HHS, NIAID NIH HHS, NIDDK NIH HHS, NHLBI NIH HHS, NLM NIH HHS, NIMH NIH HHS, NCATS NIH HHS, NCRR NIH HHS, NIH HHS, NIH, NICHD NIH HHS, National Human Genome Research Institute, National Institute of General Medical Sciences, NIDA NIH HHS, NINDS NIH HHS, NIA NIH HHS, National Institute of Allergy and Infectious Diseases, NIDCR NIH HHS, NIEHS NIH HHS, NEI NIH HHS, NIAAA NIH HHS, NIAMS NIH HHS, National Institute of Mental Health, CCR NIH HHS, NIBIB NIH HHS, National Institute on Aging, National Cancer Institute, Howard Hughes Medical Institute, PHS HHS, Cancer Prevention and Research Institute of Texas, U.S. Department of Energy, National Heart, Lung, and Blood Institute, National Institute for Health Research (NIHR), National Institute of Diabetes and Digestive and Kidney Diseases, NSF, United States Department of Agriculture, National Institute of Food and Agriculture, National Eye Institute, FIC NIH HHS, U.S. Department of Agriculture, Welch Foundation, National Institute of Health, National Institute on Drug Abuse, NIDCD NIH HHS, NIMHD NIH HHS, Gordon and Betty Moore Foundation, Alfred P. Sloan Foundation, Oregon State University, Foundation for the National Institutes of Health, National Institute of Child Health and Human Development","AAgMarker, ADPriboDB, AnalysisofBreastCancerGWAS, AraPath, AutismBrainImagingDataExchange, BindingMOAD, Bioclock, BioLiP, BioModels, BiOnIC, bNAber, BorreliaBase, C-terminome, Cancer3D, CarbonylDB, CARLSBAD, CeNDR, ChemProt-2.0, ChEpiMod, CHESS, CIL-CCDB, CistromeCancer, CMAP, COLMARLipids, ComplexMixtureAnalysisbyNMR, CoV3D, CRN, dictyBase, DegraBase, DNASU, dbVOR, DeTEXT, DX, DMD, dbMAE, DigitalDevelopment, dbPEC, denovo-db, dbCAN-seq, DNAproDB, DescribePROT, Datanator, DRscDB, GeneExpressionBarcode, FlyExpress, Genes2FANs, EcoCyc, EcoGene, Genome-WideDockingDatabase, ESCAPE, FreeSolv, GenoBase, GenomeSpace, Enrichr, Express, FlyXCDB, EMDB, FusionGDB, exRNAAtlas, ExonSkipDB, hmChIP, HINT, GFDB, HEXEvent, HumanProteinpedia, HippDB, GLASS, HistoneAntibodySpecificityDatabase, GNPS, HumanNet, GlyMDB, HeRA, HbVar, hu.MAP, IsoBase, MACiE, InterPro, LAHEDES, INstruct, IIMDB, M2SG, JASPAR, isoMETLIN, IIIDB, IMG-ABC, ImmuNet, iGNM, KERIS, IRRMC, iPTMnet, ISVdb, iProteinDB, InteracDome, LabxDB, MnM, MedicagoPhosphoProteinDatabase, miR-EdiTar, MonarchBase, MelanomaDB, MediaDB, miRDB, MUFOLD-DB, MouseNet, MutationAligner, Met-DB, MIST, MOSAIC, ModERN, MiPanda, MaveDB, MiST, MitoCarta, PeanutDB, NESdb, OnTheFly, NeXO, PeptiSite, PeptideAtlas, Panorama, PDBe, PDBFlex, NCRO, PAMBD, PanoramaPublic, PDBe-KB, OpenCancerTherApeuticDiscovery, Polbase, PrimerBank, PKKB, PrePPI, Planform, PMP, RADAR--a, PhosphoNetworks, PPD, PyIgClassify, PlantOrDB, PheKB, PhagesDB, PolyA_DB, ProtaBank, piRTarBase, ProteinExplorer, SBKB, RNACoSSMos, ScerTF, RMDB, RegPrecise, SelenoDB, SFLD, SkateBase, rrnDB, SmedGD, RegulonDB, SM-TF, RNAStructuromeDatabase, SPAR, SequencEnG, REDfly, SliceIt, REPIC, SELAdb, SCISSOR√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√Ǭ¢√ɬÉ√ǬÇ√ɬÇ√Ǭà√ɬÉ√ǬÇ√ɬÇ√Ǭö√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√Ǭá√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¨√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭ¢, recount3, TGED, Spliceosome, SWEETLEAD, TCDB, tRFdb, StructureSurfer, SysteMHC, TADKB, TCR3d, ZFNGenome, EcoliWiki, zfishbook, PhosphoSitePlus, UCLAMultimodalConnectivityDatabase, cBioPortal, P(3)DB, PortEco, non-humanprimatereferencetranscriptomeresource, DOCKGROUND, UET, XTalkDB, WikiPathways, UniProtKB, SGR, PANTHER, NDB, SCOPe, dbSNP, GRASP, BioGPS, Lynx, UniProt, FlyRNAi, ATLAS, MetaCyc, DASHR, TFBSshape, PDB, ENCODE, GOC, PED, GlyTouCan, antimicrobialpeptidedatabase, GENCODE, iSyTE, VIOLIN, CTD, LNCipedia, EnhancerAtlas, PLncDB, MeT-DB, STRING, Gramene, PRIDE, FlyBase, MODOMICS, ACSR, APAatlas, Bgee, Cancer-Immu, CCGD, ccmGDB, CDSA, CellMiner-SCLC, ChIPprimersDB, CircNet, CMEP, CMS, CNCDatabase, DGA, curatedOvarianData, DGIdb, DSigDB, Datasets2Tools, DrugCentral, Drugmonizome, GeneSigDB, EDR, GDISC, GlycoFish, HOMER, HACER, GutFeelingKB, LinkedOmics, Immu-Mela, MSigDB, MTB, mutLBSgeneDB, MutEx, ModelSEED, MPM, MiREDiBase, NoncodedAminoacidsDatabase, NGS, NURBS, NetGestalt, PAGER, ORegAnno, OncoPPi, PathwayCommons, PAGER-CoV, O-GlcNAcAtlas, QuAD, PMKB, PICKLES, ProNetView-ccRCC, RID, SBCDDB, SistematX, StemCellDiscoveryEngine, Transcriptomine, targetHub, TANRIC, TCCR, SZGR, TissGDB, TCPA, TC3A, TCRD, TSGene, MGI, UCSCGenomeBrowser, Cistrome, CistromeDB, RCSBPDB, Reactome, TANTIGEN, MPD, IPD, ChEMBL, GXD, REDIportal, MGD, AncestralGenomes, atSNP, BARD, BIND, CCDS, CharProtDB, CistromeFinder, CressInt, e-GRASP, dbMTS, GeneTack, factorbook, FireDB, ENVO, HaploReg, GOA, HPO, GtRNAdb, HUMA, GWAS, gnomAD, IntegratedMicrobialGenomesandMetagenomes, modENCODE, modMine, MethylomeDB, MetaRef, MSeqDR, MG-RAST, OMIM, PHARE-KB, PhenoDB, PlantReactome, PMS_DN, SIFTS, SANCDB, RISE, TAIR, TIGRFAMs, SynLethDB, Terabase, TEHub, InternationalNucleotideSequenceDatabaseCollaboration, KB, IMPC, VTO, HumanDiseaseOntology, UniPROBE, VIPdb, SGD, ZFIN, Ensembl, HGNC, BioPortal, RNAcentral, Dfam, FANTOM5, UniCarbKB, APPRIS, HEDD, Rfam, MEROPS, WormBase, dbNSFP, PharmGKB, IGSR, ExpressionAtlas, Pfam, ArrayExpress, ChiTaRS, GOLD, RGD, AgBioData, ADeditome, ASL-LEX, Chickspress, CoCoCoNet, COG, CORUM, CRAFT, CSEA-DB, DKK, HoTResDB, LINCS, KinaseMD, LitCovid, MetabolicInsilicoNetworkExpansions, Milkbioactivepeptidedatabase, MDB, mGAP, MicrobiomeLearningRepo, MIBiG, OGRDB, PKAD, RNACharacterizationofSecondaryStructureMotifs, TMB, tRFtarget, TIE, ToppCell, VISDB, TSEA-DB, VetCOT, MGPPortal, ExoCarta, BioGRID, DSLD, IEDB-AR, ClinVar, CDD, RefSeq, PubChem, BGMUT, BioProject, Bookshelf, CellMinerHCC, CHEAR, Clone, COGs, CRISPRz, DGVa, dbGaP, GBM-BioDP, IBIS, GWASdb, GEO, GTR, HTD, GermlncRNA, iCite, iRefIndex, LabeledIn, NCBITaxonomy, PaVE, PedsDTI, ProPortal, PlacentalAtlasTool, SemMedDB, StRAP, tautomeric, COMBREX, ZInC, VirusVariation, CGD, MMDB, GenBank, 10KIP, ALEdb, AspGD, cAb-Rep, CDG, DFRMLI, HelmCoP, HAND, IRD, MCPdb, PGAT, PolymiRTS, VirmugenDB, APD3, Vaxar, Victors, ViPR, Nematode.net, Gene3D, EuPathDB, MEGARes, VIPERdb, PATRIC, SEED, DBAASP, RPFdb, dkNET, FunGene, HAPPI, MonogenicDiabetesRegistry, OPM, PCD, RDP, SmoothMuscleTranscriptomeBrowser, SPP, ModelOrganismProteinExpressionDatabase, EVpedia, AllenBrainAtlas, GenTAC, LungCellCards, NeuroPedia, StemCellCKB, IntAct, GEneSTATION, PDID, WholeCellKB, BrainEXP, CCFv3, Gemma, GIGA, HDG, PhenoDis, QPN, seeQTL, RNASeqMetaDB, SynGO, Wiki-Pi, MD-CTS, PhosphoGRID, TGD, CrePortal, PhenomeCentral, TISdb, FGDB, MetOSite, miRactDB, ANISEED, CLEARPOND, GEISHA, MeDReaders, Xenbase, BiGG, miRdSNP, PerMM, PhenoGen, CORTECON, DIADEM, DBDB, MIsoMine, EPSLiM, MSK-KP, NakedMoleRatGenomeResource, DBCOVP, PRISMOID, IEDB, CORE, HSP, SSKB, BioPlanet, ICE, CB, GeneWeaver, SIDD, COINS, PCAT, Hipposeq, iPfam, MycobacterialSystemsResource, WheatExp, non-B, VectorBase, GPSno, tRic, IntegratedMicrobialGenomesAtlasofBiosyntheticgeneClusters, PhycoCosm, TBDB, WoM, IMG/VR, CiliaCarta, Hepitopes, NECTAR, AFND, AraPheno, AcrDB, dbCAN-PUL, MaizeGDB, CuGenDB, GDR, BGD, SchistoDB, RCPedia, TDRTargets, HopBase, SoyBase, Echinobase, ENA, GEMI, UNITE, FragariaCyc" diff --git a/analysis/inventory_funders_2023-01-20.csv b/analysis/inventory_funders_2023-01-20.csv new file mode 100644 index 0000000..c6690f0 --- /dev/null +++ b/analysis/inventory_funders_2023-01-20.csv @@ -0,0 +1,1789 @@ +"agency","count_all_article_instances","count_unique_articles","count_unique_biodata_resources","associated_PMIDs","associated_biodata_resources" +"NIAID NIH HHS",95,50,44,"30304689, 30357390, 22080559, 30534948, 24214957, 24994456, 31649674, 29997612, 23661693, 32073269, 21782820, 21760913, 26510927, 27504778, 29028885, 22260278, 26362267, 27841751, 24203705, 26656948, 33780471, 21765097, 29106626, 31566225, 22080514, 23219434, 23568467, 26602694, 27053566, 30365026, 30593617, 22139919, 34529321, 24194595, 24270792, 25388105, 31114900, 27679478, 31722416, 24259431, 25555720, 33313778, 31667520, 23110173, 33151284, 22067456, 26433228, 27903906, 25428374, 25273106","10KIP, ALEdb, AspGD, atSNP, bNAber, BorreliaBase, cAb-Rep, CDG, CMAP, COLMAR Lipids, DFRMLI, HelmCoP, HAND, GNPS, HoTResDB, IRD, ImmuNet, IRRMC, MetaRef, MG-RAST, MCPdb, PGAT, PAMBD, OGRDB, PolymiRTS, VirmugenDB, VIOLIN, APD3, Vaxar, Victors, ViPR, Nematode.net, antimicrobial peptide database, Gene3D, EuPathDB, IEDB-AR, MEGARes, VIPERdb, PATRIC, SEED, DBAASP, FlyRNAi, RPFdb, UCSC Genome Browser" +"National Institute of Allergy and Infectious Diseases",15,14,14,"30304689, 30357390, 31649674, 33276297, 27841751, 33780471, 31161204, 33206959, 33290552, 33237286, 33772585, 31667520, 33151284, 31598706","10KIP, ALEdb, cAb-Rep, DBCOVP, IRRMC, MCPdb, PRISMOID, IntAct, GOC, UniProtKB, IEDB, PATRIC, DBAASP, Ensembl" +"Ministry of Education, Youth and Sports",1,1,1,"34244700","2DProts" +"European Regional Development Fund",13,11,11,"34244700, 30639529, 32990755, 31713636, 33683131, 33822911, 31584092, 33080028, 33655207, 31263870, 26141515","2DProts, Antimicrobial Enzyme Combinations Database, CSVS, DisProt, MRMAssayDB, MENSAdb, PDBe-KB, Peryton, TMSNP, ValTrendsDB, mirEX" +"NIGMS NIH HHS",571,255,220,"28977551, 27507885, 32765587, 22760305, 23774715, 25378330, 25516260, 23087378, 25414348, 29637199, 24214957, 24994456, 27050421, 25392415, 29509874, 23794735, 27701074, 23185041, 24470572, 30486838, 23203874, 29092931, 23661693, 32073269, 25333826, 32890396, 26602695, 23172289, 23264352, 24225319, 25887129, 25951377, 26019122, 26030752, 26503248, 26503254, 26946289, 27907889, 30053267, 31612957, 33119734, 33174603, 33995899, 21177656, 21994220, 22748121, 23143106, 23197660, 23245398, 23794736, 24928188, 25399415, 26780094, 27141961, 29337142, 29890119, 30008982, 30407583, 30951672, 31642488, 21450710, 22846459, 23104379, 23118488, 23504933, 23958730, 25971743, 26212453, 27504778, 30418591, 31841142, 33119754, 33125055, 33973408, 21177658, 22058127, 22096229, 22570419, 23599502, 23991755, 24002112, 24194598, 25166490, 25707505, 26173699, 26362267, 26582920, 27789704, 27841751, 28150246, 28592645, 30397019, 30535108, 32502232, 22146221, 22701463, 23044546, 23143105, 23875173, 25098325, 25378301, 25559128, 26527726, 26590264, 29126312, 29155944, 29206899, 29284660, 30268942, 31679514, 31754718, 33174596, 22712730, 22833564, 24271386, 24271398, 24406170, 24939129, 25102069, 26476444, 26615193, 27152146, 29106626, 29487113, 31584092, 33361798, 21993301, 22086960, 22559792, 23193263, 23426257, 23624946, 24163250, 24227675, 24304897, 25392411, 26112452, 27026615, 28365761, 29069441, 29575358, 30357353, 30985146, 21472436, 22127861, 22140105, 22976082, 24060102, 24194593, 24271399, 25309735, 25414355, 26138588, 26527724, 27010673, 29222504, 29733404, 30202870, 30329093, 31494246, 32345346, 32785571, 34514416, 34844637, 21253873, 23118483, 24223973, 24225317, 25392422, 27188311, 28985418, 30871473, 31240309, 21276248, 22064863, 22067444, 22135298, 23226127, 23550210, 24243849, 24285306, 25392405, 26227548, 26590254, 27899583, 33211851, 21447597, 23730305, 23868073, 24185695, 24304899, 24356117, 25428361, 26578587, 26590263, 27899622, 27924039, 28160322, 29059334, 30668832, 31665425, 32558264, 32728249, 33290552, 33305318, 34314492, 34529321, 31701150, 30407596, 23494302, 33270111, 29145615, 30417254, 31504780, 30053264, 25501940, 26546518, 25514926, 24259431, 32621232, 33237286, 25326323, 27914894, 23175613, 24270788, 22067456, 22102576, 23042674, 26553799, 24214955, 27515742, 23476021, 27899567, 25378335, 26476458, 27899635, 27924014, 28891124, 28713666, 30395289, 22102590, 27651457, 26578592, 22127867, 27450113, 30395331, 29106616, 33125071, 30476243, 23161681, 24253303, 25348405, 23193289, 26519399, 30395287","AAgMarker, ADPriboDB, Analysis of Breast Cancer GWAS, AraPath, Autism Brain Imaging Data Exchange, Binding MOAD, Bioclock, BioLiP, BioModels, BiOnIC, bNAber, BorreliaBase, C-terminome, Cancer3D, CarbonylDB, CARLSBAD, CeNDR, ChemProt-2.0, ChEpiMod, CHESS, CIL-CCDB, Cistrome Cancer, CMAP, COLMAR Lipids, Complex Mixture Analysis by NMR, CoV3D, CRN, dictyBase, DegraBase, DNASU, dbVOR, DeTEXT, DX, DMD, dbMAE, Digital Development, dbPEC, denovo-db, dbCAN-seq, DNAproDB, DescribePROT, Datanator, DRscDB, Gene Expression Barcode, FlyExpress, Genes2FANs, EcoCyc, EcoGene, Genome-Wide Docking Database, ESCAPE, FreeSolv, GenoBase, GenomeSpace, Enrichr, Express, FlyXCDB, EMDB, FusionGDB, exRNA Atlas, ExonSkipDB, hmChIP, HINT, GFDB, HEXEvent, Human Proteinpedia, HippDB, GLASS, Histone Antibody Specificity Database, GNPS, HumanNet, GlyMDB, HeRA, HbVar, hu.MAP, IsoBase, MACiE, InterPro, LAHEDES, INstruct, IIMDB, M2SG, JASPAR, isoMETLIN, IIIDB, IMG-ABC, ImmuNet, iGNM, KERIS, IRRMC, iPTMnet, ISVdb, iProteinDB, InteracDome, LabxDB, MnM, Medicago PhosphoProtein Database, miR-EdiTar, MonarchBase, MelanomaDB, MediaDB, miRDB, MUFOLD-DB, MouseNet, MutationAligner, Met-DB, MIST, MOSAIC, ModERN, MiPanda, MaveDB, MiST, MitoCarta, PeanutDB, NESdb, OnTheFly, NeXO, PeptiSite, PeptideAtlas, Panorama, PDBe, PDBFlex, NCRO, PAMBD, Panorama Public, PDBe-KB, Open Cancer TherApeutic Discovery, Polbase, PrimerBank, PKKB, PrePPI, Planform, PMP, RADAR--a, PhosphoNetworks, PPD, PyIgClassify, PlantOrDB, PheKB, PhagesDB, PolyA_DB, ProtaBank, piRTarBase, ProteinExplorer, SBKB, RNA CoSSMos, ScerTF, RMDB, RegPrecise, SelenoDB, SFLD, SkateBase, rrnDB, SmedGD, RegulonDB, SM-TF, RNA Structurome Database, SPAR, SequencEnG, REDfly, SliceIt, REPIC, SELAdb, SCISSORâ, recount3, TGED, Spliceosome, SWEETLEAD, TCDB, tRFdb, Structure Surfer, SysteMHC, TADKB, TCR3d, ZFNGenome, EcoliWiki, zfishbook, PhosphoSitePlus, UCLA Multimodal Connectivity Database, cBioPortal, P(3)DB, PortEco, non-human primate reference transcriptome resource, DOCKGROUND, UET, XTalkDB, WikiPathways, UniProtKB, SGR, PANTHER, NDB, SCOPe, dbSNP, GRASP, BioGPS, Lynx, UniProt, FlyRNAi, ATLAS, MetaCyc, DASHR, TFBSshape, PDB, ENCODE, GOC, PED, GlyTouCan, antimicrobial peptide database, GENCODE, iSyTE, VIOLIN, CTD, LNCipedia, EnhancerAtlas, PLncDB, MeT-DB, STRING, Gramene, PRIDE, FlyBase, MODOMICS" +"NEI NIH HHS",13,9,8,"28977551, 26660198, 23661693, 25887129, 29337142, 29036527, 22276777, 30417254, 27515742","AAgMarker, CB, CMAP, dbVOR, Express, iSyTE, miRdSNP, EnhancerAtlas" +"Swedish Energy Agency",1,1,1,"31832668","AcetoBase" +"Interreg Europe",1,1,1,"31832668","AcetoBase" +"Västra Götaland Region",1,1,1,"31832668","AcetoBase" +"Max-Planck-Gesellschaft",3,3,3,"33169878, 35424258, 34534667","A.P.E.S, MeFSAT, TExAs" +"Robert Bosch Stiftung",1,1,1,"33169878","A.P.E.S" +"NCI NIH HHS",281,137,127,"20949389, 32765587, 31586392, 22760305, 33037820, 24214957, 34903605, 25392415, 25190456, 26519468, 23893318, 33086069, 24470572, 30486838, 30202990, 26450965, 29092931, 30668638, 23630576, 33095860, 23197658, 23264352, 23550061, 24122041, 25990557, 26503248, 26503254, 29485625, 30371892, 33787872, 21177656, 22110038, 23340253, 26780094, 27141961, 28453687, 30407583, 30951672, 31642488, 21591763, 22165817, 22846459, 23118488, 25971743, 26212453, 30247654, 30418591, 31509535, 22570419, 23599502, 29136207, 30535108, 34127402, 21546393, 23044546, 25332399, 26590264, 27907895, 29126312, 29155944, 29284660, 30268942, 31588509, 32986834, 33174596, 33514395, 34349127, 21491493, 22517761, 23196988, 24271386, 25102069, 25527095, 26072489, 26578589, 27152146, 29186335, 31584092, 31647099, 33245774, 33442735, 21656910, 23193263, 24227675, 24304897, 27789569, 29077937, 32358997, 24194593, 27377064, 29059366, 32345346, 34014674, 22121217, 22786849, 24013925, 25392422, 26208906, 27168721, 27733502, 28985418, 29036590, 29092939, 30053266, 33156327, 21276248, 22135298, 23066107, 23110975, 23550210, 26590259, 27789702, 30462313, 27794042, 27924039, 31691815, 32728249, 33290552, 33849445, 33151287, 26302176, 25514926, 26590405, 31308250, 29136208, 25414341, 33290554, 27899562, 24163257, 27587585, 25378335, 24285300, 25428374, 25348401, 26578600, 27899570, 29092072","ACSR, Analysis of Breast Cancer GWAS, APAatlas, AraPath, Bgee, bNAber, Cancer-Immu, Cancer3D, CCGD, ccmGDB, CDSA, CellMiner-SCLC, ChEpiMod, CHESS, ChIPprimersDB, CircNet, Cistrome Cancer, CMEP, CMS, CNCDatabase, DGA, DegraBase, curatedOvarianData, DGIdb, DSigDB, dbMAE, Digital Development, Datasets2Tools, DrugCentral, Drugmonizome, Gene Expression Barcode, GeneSigDB, EDR, GenomeSpace, Enrichr, GDISC, FusionGDB, exRNA Atlas, ExonSkipDB, GlycoFish, HOMER, HINT, HEXEvent, GLASS, Histone Antibody Specificity Database, HACER, HumanNet, GutFeelingKB, LAHEDES, INstruct, LinkedOmics, InteracDome, Immu-Mela, MSigDB, miR-EdiTar, MTB, MutationAligner, mutLBSgeneDB, Met-DB, MIST, ModERN, MiPanda, MutEx, ModelSEED, MitoCarta, MPM, MiREDiBase, Noncoded Amino acids Database, NGS, NURBS, OnTheFly, Panorama, NetGestalt, PAGER, ORegAnno, NCRO, OncoPPi, PDBe-KB, Pathway Commons, PAGER-CoV, O-GlcNAcAtlas, QuAD, PrePPI, PhosphoNetworks, PPD, PMKB, PICKLES, ProNetView-ccRCC, SelenoDB, RID, SBCDDB, REPIC, SistematX, Stem Cell Discovery Engine, Transcriptomine, targetHub, tRFdb, TANRIC, TCCR, SZGR, SysteMHC, TissGDB, TCPA, TC3A, TCRD, ZFNGenome, PhosphoSitePlus, TSGene, MGI, cBioPortal, UCSC Genome Browser, Cistrome, Cistrome DB, RCSB PDB, FlyRNAi, Reactome, ENCODE, GOC, TANTIGEN, MPD, IPD, PANTHER, ChEMBL, GXD, REDIportal, MeT-DB, MGD" +"NSF (UBM-Institutional-Collaborative: The Four-College Biomath Consortium)",1,1,1,"25229122","ACPro" +"FONDECYT",1,1,1,"32702093","AciDB" +"Fundación Ciencia & Vida",1,1,1,"32702093","AciDB" +"Programa de Apoyo a Centros con Financiamiento Basal",2,2,2,"32702093, 33507271","AciDB, SinEx" +"United States Department of Agriculture",31,5,5,"33068435, 32941621, 30407532, 33170273, 31598706","AcrDB, dbCAN-PUL, MaizeGDB, Gramene, Ensembl" +"National Science Foundation",89,54,51,"33068435, 30239679, 30371900, 31680137, 31036810, 30486838, 27391016, 32941621, 33119734, 33174603, 26800861, 27664130, 34010390, 30364992, 31509535, 31841142, 32758136, 26653323, 30535108, 31598675, 26322134, 29206899, 31612915, 32079733, 32986834, 30115014, 34241085, 25382819, 27789569, 30329093, 27188311, 31211398, 31490686, 32719467, 32882008, 33035346, 26227548, 31245720, 31598695, 32386298, 22564364, 32558264, 33021634, 33290552, 33151287, 31680153, 33290554, 28296894, 30395331, 31851420, 33156333, 33170273, 30407594, 31598706","AcrDB, AgBioData, Ancestral Genomes, ANISEED, ASNR, CHESS, D-PLACE, dbCAN-PUL, DescribePROT, Datanator, EchinoDB, ENVO, Echinobase, GPs, GutFeelingKB, GlyMDB, GSDB, iTAP, InteracDome, KnockTF, Metabolic In silico Network Expansions, MOSAIC, MIBiG, MtSSPdb, ModelSEED, PdumBase, OCELOT, ProKinO, PMKB, REDfly, Structure Surfer, SpinachBase, TMB, STAGdb, TBDB, tRFtarget, DOCKGROUND, VPGD, MirGeneDB, MGP Portal, PharmGKB, PDB, NDB, GOC, DrugCentral, Plant Reactome, PANTHER, CDD, InterPro, Gramene, Ensembl" +"UNL",2,2,2,"33068435, 32941621","AcrDB, dbCAN-PUL" +"National Health and Medical Research Council",14,8,8,"33137193, 28381244, 30942868, 30395284, 31161204, 31598690, 33095862, 30395310","AcrHub, DisBind, DEE2, Haemopedia, PRISMOID, ProCarbDB, ThermoMutDB, Vesiclepedia" +"National Natural Science Foundation of China",807,294,289,"34025933, 31584087, 32986825, 31695717, 29321052, 31665428, 31843802, 32294195, 34839012, 28968841, 30239683, 32681639, 33125076, 33219693, 30215764, 31665503, 33882119, 32540200, 28605773, 29985970, 33010176, 30329142, 30329095, 31701131, 34345532, 29939204, 27635320, 35134148, 31691819, 29992323, 33471060, 30289549, 33147626, 31428785, 32608479, 33970229, 33662628, 30045691, 32345360, 34296749, 27365365, 33181824, 34856391, 33121433, 33109630, 30357356, 33693668, 31813095, 31901979, 33010163, 32436316, 33313674, 34992626, 33068433, 33009914, 30285246, 34927675, 29178828, 26940364, 27209279, 28381244, 28575155, 29209336, 29860480, 29961819, 30016397, 30379998, 30380071, 30482172, 30665056, 31603498, 31665429, 33104791, 33320930, 33938221, 34097004, 27037912, 29126995, 30321400, 30335161, 30357379, 30364969, 30365030, 30476229, 30788500, 31277321, 31642496, 31665430, 31774482, 31887789, 32120139, 32681912, 33002112, 33203359, 33497436, 33511767, 34085038, 34954426, 35694152, 27098585, 28529078, 28549078, 29548284, 30053237, 30066211, 30266410, 30371881, 31504765, 31524396, 31566222, 31584099, 31630971, 31725863, 31783725, 32315389, 32496513, 32858223, 32941628, 33045729, 33151298, 33264402, 33406221, 33677507, 33868597, 33965348, 33984507, 34164644, 34175476, 34642750, 34791105, 23601370, 29788225, 30276831, 30476305, 31598675, 31617563, 31665439, 31713618, 31906602, 32193291, 32367112, 32512182, 32766766, 32820322, 33045741, 33045751, 33147622, 33219686, 33507270, 33906563, 34755873, 27167218, 28968812, 29433427, 30057343, 31231773, 31240103, 32833025, 32911083, 32990748, 33125077, 33126250, 33219670, 33418085, 34510194, 28961690, 29982280, 30134653, 30335176, 30445567, 31086734, 31584086, 31950190, 32103267, 32105730, 32111231, 32117995, 32122231, 32487016, 32597311, 33275967, 33304468, 33306802, 33359127, 33514746, 33581334, 33997360, 34120586, 34389843, 26211629, 30239819, 30357353, 30380102, 31584089, 31598699, 31599098, 31602478, 31620779, 31640808, 31725858, 31809863, 33003203, 33010159, 33330918, 33554247, 25640659, 28365723, 28529082, 29028888, 29309507, 29617941, 29961821, 30020436, 30371817, 30380119, 31511885, 31642484, 31713629, 31799597, 31906603, 32382747, 32709339, 32849839, 33010177, 33021671, 33045745, 33068412, 33685493, 34022814, 34496744, 27337171, 27643925, 28420402, 28974472, 30184150, 30371815, 30380087, 30913342, 32047897, 32248093, 32286817, 32351388, 32620074, 32976581, 32990749, 33074314, 33179754, 33360695, 33985427, 34273956, 34407614, 29351734, 30217145, 30223042, 30365026, 30462313, 31021279, 32168374, 32221380, 32349124, 32738156, 33068436, 33095866, 33175170, 33993461, 34601118, 29917040, 30285109, 30335166, 30364951, 30364952, 30364956, 31161214, 31599330, 31670377, 32016318, 33175131, 33219685, 33306787, 33306800, 33693667, 34344425, 30715167, 30172046, 30407568, 30371818, 33196801, 30204897, 31942978, 30329098, 33010178, 33219661, 30407549, 26744602","AddictGene, Animal-ImputeDB, Animal-APAdb, AppleMDO, ASGDB, ASD, ASRD, ASFVdb, ASER, AtCircDB, ATD, ATdb, ATACdb, AtMAD, AWESOME, BBCancer, BC-TFdb, BGVD, BioM2MetDisease, BrainEXP, CancerImmunityQTL, CancerSEA, CancerSplicingQTL, CancerTracer, CanImmunother, CARDIO-LNCRNAS, CardioTF, CATA, CAUSALdb, CeleryDB, Cellinker, CellMarker, CellTalkDB, CFEA, CHDGKB, ChemHub, Chinese Glioma Genome Atlas, CIGene, CircAtlas, circExp, CIRCpedia, circR2Cancer, CircR2Disease, circVAR, CKTTD, CMAUP, CMBD, CMVdb, CNAdbCC, cncRNAdb, CoFly, ColorCells, CottonGVD, CovalentInDB, CoVdb, CRISPRlnc, CRPMKB, CrusTF, dbPHCC, DNetDB, DisBind, Dynamic-BM, DRDB, dbCRSR, dbLGL, dbCID, dbCPM, DSMNC, dbMPIKT, dbHDPLS, dbInDel, DrugCombDB, DrugSpaceX, CyanoPATH, D3DistalMutation, DevOmics, GAMDB, FVD, EWASdb, EVmiRNA, EndoDB, EWAS Atlas, ETCM, ENPD, EnDisease, GEDS, Gene4Denovo, ENdb, FluReassort, ETph, FRCD, ExoceRNA Atlas, EnzyMine, FishDB, FifBase, FAWMine, EyeDiseases, FertilityOnline, EnhFFL, Grape-CRISPR, HCSGD, HEROD, IDPM, HDncRNA, HAMdb, HCCDB, iDog, GMrepo, HybridMolDB, GWAS Atlas, gutMDisorder, GESUR, GRONS, HKPocket, HotSpot3D, gutMEGA, hTFtarget, IDDB, GIMICA, GRNdb, HERB, iCysMod, HIR, GPCards, GGVD, HisPhosSite, HFBD, GWH, HBFP, HFIP, LiverAtlas, LnChrom, lncRNAnet, LncACTdb, KnockTF, LnCeVar, MaGenDB, LncTarD, LLPSDB, LncSpA, MACSNVdb, IRESbase, lncR2metasta, LncAS2Cancer, LncSEA, LncExpDB, KNIndex, LnCeCell, InSexBase, M6ADD, ImmReg, MiasDB, MetSigDis, MethCNA, MCENet, MepmiRDB, MDR, MNDR, MosaicBase, miRNASNP-v3, MASI, MloDisDB, MolluscDB, MicroPhenoDB, Mollusca mitochondrial database, ncDR, PepBDB, PADFrag, NucMap, OncoBase, OsteoporosAtlas, PGG.Han, PCaLiStDB, PDIR, ncEP, NoncoRNA, Nc2Eye, ncRPheno, ncRI, OncotRF, ncRNAVar, PDmethDB, NPBS, NBIGV, NGD, OGP, PCPD, NUCOME, Nabe, PLNlncRbase, POSTAR2, piRTarBase, qPhos, PhaSepDB, QTLbase, PSMD, PmiREN, prokaryotic antiviral defense system, PGG.SNV, PlantCircNet, PsyMuKB, QSIdb, PROTAC-DB, piRNA-eQTL, PMI-DB, SecReT6, SilkPathDB, RED, RRDB, SEGreg, SCRIPT-MAP, SDADB, realDB, SEdb, SAGD, SNP2APA, SilkDB, SpatialDB, RNAactDrug, RNAInter, RSVdb, saponin mass spectrometry database, RIGD, SC2disease, RMVar, SilencerDB, RASP, riboCIRC, Rhododendron Plant Genome Database, RPocket, TarNet, StemCellCKB, SSER, Stress2TF, TRCirc, TransmiR, SymMap, TPIA, TRlnc, TE141K1, TeroKit, TCMIO, TeaCoN, STAB, TCRdb, TransCirc, TISCH, ToxinDB, TarDB, TCM-Blast, Tracking Air Pollution in China, EOGD, PPGD, TSNAdb, Victors, Cistrome DB, UVGD, uORFlight, WeiBI, VirusCircBase, YIR, tsRBase, VARAdb, 2019nCoVR, TUPDB, Viral Putative G-quadruplex, MPD, LncRNADisease, RPFdb, DoriC, PED, HMDD, WDSPdb, LSD, NPInter, VariBench, deepBase, Lnc2Cancer, MetaADEDB, CEG, UbiNet, SorGSD, BacWGSTdb, ICEberg, piRBase, NONCODE, AnimalTFDB, LncBook, LincSNP" +"EU 7th Framework Programme",1,1,1,"22682155","AFFINOMICS" +"Biotechnology and Biological Sciences Research Council",528,179,117,"22682155, 22345505, 25414324, 24265221, 33176685, 28708831, 25414348, 27189610, 26794641, 29040563, 32754757, 23180789, 31095607, 25270877, 25348397, 33084893, 23161684, 23203878, 25232097, 21803806, 22080548, 22110040, 22139938, 22912585, 23203866, 25348407, 25361971, 25432969, 26590404, 27899646, 30008982, 30349118, 30357393, 32726198, 34220930, 25189782, 26314736, 26578596, 27189608, 30364992, 22096229, 22494395, 23452239, 23667450, 27484196, 27899604, 28806134, 30252093, 23060735, 24330312, 30418645, 31612915, 33502607, 22674824, 25172923, 25399418, 26476444, 31584092, 32693783, 33749993, 21801404, 21980353, 23674503, 25414340, 25558364, 30053269, 27097230, 29057095, 29228298, 30321422, 33416848, 22121217, 22140109, 23766369, 24194607, 25300487, 25414345, 26123534, 28013278, 22080546, 24304889, 25300491, 26452372, 27899279, 30398663, 31696235, 32548865, 22067447, 23193253, 23203987, 25352543, 25635527, 26673716, 27794045, 27899622, 29112718, 29145643, 30020414, 30423142, 30476227, 31642470, 31724711, 32728249, 33290552, 30395294, 31701150, 26467479, 24297252, 24270792, 31733063, 30298402, 33270111, 25428371, 23109552, 27613420, 29106550, 30445555, 26657633, 26481351, 29858801, 26582919, 24217918, 23203882, 33237286, 24163254, 24316576, 27794554, 30357350, 29927072, 24275495, 28077563, 23193272, 26476458, 30217829, 24214989, 26578585, 27899635, 29140473, 23630246, 30395289, 29858800, 26888907, 30395267, 33125078, 33211869, 22096232, 25428363, 27450113, 30395331, 26615190, 29112716, 30398656, 31701148, 22064864, 29165610, 26527722, 31641782, 26896847, 33106848, 24288371, 25723102, 27980099, 29069413, 27899630, 33156333, 23180798, 33170273, 29155950, 29140475, 30407521, 30395270, 31691826, 31722421, 30395287, 26578574, 33175160, 29092050, 31598706, 22086963","AFFINOMICS, Arabidopsis Network Analysis Pipeline, Araport, ArchDB, AVIMM, biochem4j, BioModels, BioSharing, BRAINS, CAZypedia, CerealsDB, ChEBI, CiliaCarta, CODEX, ComPPI, crisprSQL, dcGO, D(2)P(2, DAA, Gee Fu, ENA, ELM, Gene3D, GenDR, FlyAtlas, Genome3D, GeneFriends, Ensembl Plants, FunTree, FAIRDOMHub, EMDB, GeneATLAS, GENCODE, EnteroBase, FAANG, GOBLET, GlycoMob, HPMC, HLA-ADR, GPs, InterPro, InterStoreDB, LjGEA, LipidHome, iLIR, IPD-MHC, iLIR@viral, iProX, MetaboLights, Marmal-aid, MemProtMD, MIBiG, Missense3D-DB, PCDDB, Naked Mole Rat Genome Resource, OMA, PDBe, PDBe-KB, ORDER, OmniPath, PICCOLO, pubmed2ensembl, PhosphoGRID, PHI-base, PREDICTS, PITDB, SignaFish, SalmoNet, SeedStor, RetroRules, SARS CoV-2, Stem Cell Discovery Engine, TAIR, TIMBAL, TreeFam, Super Natural II, SUPERFAMILY, The Mouse Genomes Project, ThaleMine, International Nucleotide Sequence Database Collaboration, Expression Atlas, TrypanoCyc, WaspAtlas, WormBase ParaSite, CATH, MGnify, UK Immunological Toolbox, Ensembl Genomes, LAMP, Ensembl, RNAcentral, ARN, Pfam, FANTOM5, UniProt, Rfam, MEROPS, SKEMPI, miRBase, BioGRID, WormBase, SCOP, ENCODE, GOC, antiSMASH, eggNOG, AFND, Gramene, PRIDE, UniProtKB, ArrayExpress, GlyTouCan, OAS, JASPAR, IPD, BioSamples, PDB" +"U.S. Department of Agriculture",15,4,4,"30239679, 31210271, 28415075, 33264401","AgBioData, Chickspress, HopBase, SoyBase" +"National Institutes of Health",195,126,115,"30239679, 33401309, 30371900, 32765587, 31586392, 27193158, 30534948, 29509874, 31210271, 30668638, 33095860, 32392296, 33167031, 30357367, 32890396, 31725864, 33211888, 30371892, 31612957, 33079988, 33119734, 33174603, 33787872, 29337142, 29890119, 30407583, 31642488, 28967693, 29028885, 30418591, 30445434, 31841142, 33119754, 27841751, 30535108, 31701147, 33137204, 33166392, 25542617, 26322134, 28490127, 29206899, 30764761, 30841849, 31042284, 31612915, 31679514, 31754718, 33174596, 31566225, 31647099, 33442735, 27789569, 28365761, 28862395, 30805645, 27010673, 30202870, 30329093, 31494246, 31950189, 31240309, 31490686, 33035346, 33156327, 33729437, 34522848, 26227548, 31598702, 31680168, 31837751, 32386298, 26434508, 28160322, 30476227, 31665425, 31691815, 31740966, 32558264, 33104772, 33290552, 33305318, 34366563, 34387941, 33151287, 33270111, 31114900, 31584097, 31504780, 26919060, 31680153, 33221922, 33237286, 33290554, 27914894, 30357350, 31713622, 28280852, 31777943, 30395289, 31696236, 30398643, 30395331, 33125071, 31777944, 30476243, 33166387, 33170210, 29846728, 33270901, 33106848, 33156333, 33170273, 30371825, 30407545, 30407594, 32128557, 33151290, 33068428, 30407521, 31691826, 30407599, 30395287, 33231642, 31598706, 31713623","AgBioData, ADeditome, Ancestral Genomes, Analysis of Breast Cancer GWAS, APAatlas, ASL-LEX, atSNP, CarbonylDB, Chickspress, CMEP, CNCDatabase, CoCoCoNet, COG, CORUM, CoV3D, CRAFT, CSEA-DB, DrugCentral, DNAproDB, DKK, DescribePROT, Datanator, Drugmonizome, Express, FlyXCDB, FusionGDB, ExonSkipDB, HUMA, HoTResDB, HumanNet, GWAS, GlyMDB, HeRA, IRRMC, InteracDome, LINCS, KinaseMD, LitCovid, MSeqDR, Metabolic In silico Network Expansions, Milk bioactive peptide database, MOSAIC, MDB, mGAP, Microbiome Learning Repo, MIBiG, MaveDB, MiST, MitoCarta, OGRDB, Pathway Commons, O-GlcNAcAtlas, PMKB, PhagesDB, PMS_DN, PKAD, SM-TF, SequencEnG, REDfly, SliceIt, RNA Characterization of Secondary Structure Motifs, TCR3d, TMB, tRFtarget, TCRD, TIE, ToppCell, DOCKGROUND, VISDB, TSEA-DB, VetCOT, MGP Portal, ExoCarta, ATLAS, BioGRID, TFBSshape, Reactome, EnhancerAtlas, PDB, GXD, GOC, PED, DSLD, PharmGKB, GENCODE, IEDB-AR, IGSR, miRDB, Plant Reactome, UCSC Genome Browser, UniProtKB, PANTHER, SCOPe, Pfam, ENCODE, TANTIGEN, ClinVar, PRIDE, MPD, ChEMBL, GlyTouCan, CDD, STRING, International Nucleotide Sequence Database Collaboration, ZFIN, CTD, RefSeq, RNAcentral, InterPro, Gramene, PubChem, SGD, Ensembl, MGD, RGD" +"International Center for Tropical Agriculture",1,1,1,"30239679","AgBioData" +"The US Land Grant Universities",1,1,1,"30239679","AgBioData" +"Fondazione Edmund Mach",2,2,2,"30239679, 30576486","AgBioData, PhytoTypeDB" +"US Dry Pea and Lentil Council",1,1,1,"30239679","AgBioData" +"Washington Tree Fruit Research",1,1,1,"30239679","AgBioData" +"Bill and Melinda Gates Foundation",3,2,2,"30239679, 31598706","AgBioData, Ensembl" +"Agence Nationale de la Recherche",32,16,16,"30239679, 31680137, 33444113, 31605615, 32618424, 34189203, 28608363, 28791657, 30944327, 34245304, 35559777, 30380106, 31733062, 25740460, 30794542, 33305318","AgBioData, ANISEED, CALR-ETdb, LeGOO, LymphoAtlas, IsoArcH, MiSynPat, MEGALEX, monoterpene indole alkaloid database, MtExpress, MESOCOSM, OrthoInspector, ParameciumDB, PhytoREF, RESPIRE, PED" +"Consultative Group for International Agricultural Research",1,1,1,"30239679","AgBioData" +"Research and Innovation Center",1,1,1,"30239679","AgBioData" +"U.S. Department of Energy",15,8,8,"30239679, 31665416, 32986834, 33104790, 32882008, 30208844, 33137183, 33152092","AgBioData, Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters, ModelSEED, PhycoCosm, TBDB, WoM, IMG/VR, GOLD" +"The Northern Pulse Growers",1,1,1,"30239679","AgBioData" +"University of Montpellier",1,1,1,"30239679","AgBioData" +"CAS",5,3,3,"31648087, 31566222, 33264402","AdditiveChem, GWAS Atlas, HERB" +"Natural Science Foundation of Tianjin",3,3,3,"31648087, 31691819, 31598699","AdditiveChem, CAUSALdb, QTLbase" +"National Key Research and Development Program of China",143,68,68,"31648087, 32608479, 33970229, 34296749, 33121433, 33693668, 33010163, 33313674, 32754758, 33068433, 29961819, 30335161, 30365030, 30788500, 31584095, 32120139, 34954426, 31504765, 31566222, 33045729, 33151298, 33264402, 33515030, 34164644, 34175476, 33045751, 33147622, 33219686, 34755873, 31157825, 32833025, 33084905, 33125077, 33126250, 33219670, 33418085, 34156447, 31950190, 32597311, 33275967, 33581334, 33997360, 34120586, 30055873, 31584089, 33010159, 33554247, 34111777, 34122478, 29617941, 33045745, 30380087, 32990749, 33074314, 33360695, 30223042, 30462313, 33068436, 33175170, 31599330, 33084874, 33175131, 33219685, 33306787, 33170268, 30204897, 30329098, 33219661","AdditiveChem, CHDGKB, ChemHub, circExp, circVAR, CMBD, cncRNAdb, ColorCells, ConoMode, CovalentInDB, dbLGL, EVmiRNA, ETCM, EnDisease, EWAS, FRCD, FertilityOnline, GMrepo, GWAS Atlas, GIMICA, GRNdb, HERB, HVIDB, HFBD, GWH, LncExpDB, KNIndex, LnCeCell, ImmReg, MENDA, MNDR, MeDAS, MASI, MloDisDB, MolluscDB, MicroPhenoDB, mPPI, PCaLiStDB, OncotRF, ncRNAVar, OGP, PCPD, NUCOME, PhoPepMass, PhaSepDB, PROTAC-DB, PMI-DB, PID, PSDX, SCRIPT-MAP, SilencerDB, SymMap, TCRdb, TransCirc, ToxinDB, TSNAdb, Cistrome DB, tsRBase, 2019nCoVR, LSD, OGEE, deepBase, Lnc2Cancer, MetaADEDB, GVM, AnimalTFDB, LncBook, LincSNP" +"National Science Foundation of China",13,6,6,"31648087, 31598709, 32510565, 30252093, 31231774, 32542382","AdditiveChem, DNMIVD, GreenCircRNA, iProX, Mr.Vc, PRMdb" +"Chinese Academy of Sciences",83,28,27,"31648087, 29209336, 33104791, 29126995, 30357418, 31584095, 33119759, 28387199, 30196115, 30371881, 31566222, 31811943, 32055858, 34175476, 33045751, 30335176, 31584086, 31620779, 28529082, 30365026, 33175170, 30364952, 31599330, 31670377, 33170268, 33196801, 30329098, 33704069","AdditiveChem, DRDB, DrugSpaceX, FVD, EDK, EWAS, gcType, GSA, HeteroMeth, iDog, GWAS Atlas, GliomaDB, GREG, GWH, LncExpDB, NucMap, PGG.Han, prokaryotic antiviral defense system, RED, Victors, 2019nCoVR, PED, LSD, NPInter, GVM, NONCODE, LncBook" +"Dutch Research Council (NWO)",18,14,14,"28376796, 27995664, 23774715, 31095607, 27899646, 29688353, 33439542, 31612915, 33822911, 25352545, 30395294, 26919060, 26496949, 31701148","AHCODA-DB, AraQTL, Autism Brain Imaging Data Exchange, CiliaCarta, FAIRDOMHub, FSD, HDG, MIBiG, MENSAdb, BDB, antiSMASH, MSeqDR, KLIFS, JASPAR" +"Agentschap NL",1,1,1,"28376796","AHCODA-DB" +"Seventh Framework Programme",10,8,8,"28376796, 31095607, 34782688, 27664130, 28086860, 29739837, 27749924, 31647100","AHCODA-DB, CiliaCarta, CyFi-MAP, ENVO, NaDH, NvERTx, SulfAtlas, BGD" +"The Australian National Data Service",1,1,1,"30231853","AgriSeqDB" +"Guangdong Provincial Hospital of Chinese Medicine Science and Technology Research Program",1,1,1,"31123286","AICD" +"the National Undergraduate Training Programs for Innovation and Entrepreneurship",1,1,1,"31123286","AICD" +"the start-up support for scientific research of Xinglin Young Scholar in Guangzhou University of Chinese Medicine",1,1,1,"31123286","AICD" +"Guangdong Science and Technology project",1,1,1,"31123286","AICD" +"“Institute for the Promotion of Innovation through Science and Technology in Flanders (IWT-Vlaanderen)”",1,1,1,"22659196","Alkamid" +"PHS HHS",23,19,16,"23193282, 26438539, 26602695, 21249531, 26503248, 23175615, 22610854, 23093593, 23125372, 25414355, 22135296, 25392405, 23180793, 24225323, 24293654, 26578581, 25510499, 25414341, 23110173","Allen Brain Atlas, CRISPRz, CRN, DIADEM, dbMAE, EuPathDB, IEDB-AR, PaVE, non-B, rrnDB, VectorBase, non-human primate reference transcriptome resource, IPD, PATRIC, SEED, DBAASP" +"NIMH NIH HHS",100,30,29,"23193282, 23774715, 29985970, 32386544, 32392296, 26019122, 27907889, 33599246, 24217912, 24336862, 26212453, 33439542, 26362267, 28592645, 22140101, 21821666, 26048622, 26578589, 29370821, 31868683, 22171328, 26323714, 31171447, 23209562, 22102583, 26590263, 26311606, 33221922, 24270788, 27587585","Allen Brain Atlas, Autism Brain Imaging Data Exchange, BrainEXP, CCFv3, CoCoCoNet, DX, denovo-db, Gemma, HPO, GIGA, Histone Antibody Specificity Database, HDG, ImmuNet, ISVdb, MethylomeDB, NeuroPedia, PedsDTI, ORegAnno, PhenoDis, QPN, seeQTL, RNASeqMetaDB, SynGO, Wiki-Pi, MPD, Lynx, UCLA Multimodal Connectivity Database, UCSC Genome Browser, REDIportal" +"NHLBI NIH HHS",145,48,33,"23193282, 26602695, 26946289, 29485625, 33787872, 27141961, 30951672, 21982653, 23504933, 33119754, 33973408, 25707505, 31701147, 34936882, 33514395, 21821666, 25102069, 22086960, 24304897, 24288368, 27643925, 30052772, 21296746, 22102583, 23730305, 33290552, 25388151, 25514926, 21321022, 27587585, 24234451, 21520341, 30395267, 25428363, 23843252, 23255149, 27980099, 26555599, 23161678, 23603846, 23794737, 23881287, 25355511, 27602200, 27736745, 29761460, 31713623, 34741192","Allen Brain Atlas, CRN, dbPEC, Datasets2Tools, Drugmonizome, Enrichr, exRNA Atlas, GenTAC, Human Proteinpedia, HeRA, hu.MAP, IIIDB, LINCS, Lung CellCards, MPM, NeuroPedia, Panorama, PrimerBank, PPD, RDP, StemCellCKB, Terabase, RGD, MPD, SGR, GOC, EVpedia, PhosphoSitePlus, REDIportal, IntAct, dbNSFP, RNAcentral, BioGRID" +"Novo Nordisk Fonden",23,18,13,"30357390, 27924032, 33010170, 23143109, 27504778, 31612915, 32079733, 29062930, 34000890, 26476456, 27794045, 33270898, 30395294, 26531826, 29156309, 31696234, 33152079, 25723102","ALEdb, antiSMASH, BiG-FAM, HemaExplorer, GNPS, MIBiG, MtSSPdb, SMBP, TELEMED, BiGG, FANTOM5, GPCRdb, JASPAR" +"Technical University of Denmark",1,1,1,"30357390","ALEdb" +"NNF Center for Biosustainability",15,11,7,"30357390, 27924032, 33010170, 27504778, 31612915, 29062930, 26476456, 30395294, 29156309, 31696234, 33152079","ALEdb, antiSMASH, BiG-FAM, GNPS, MIBiG, SMBP, BiGG" +"European Commission FP VI",1,1,1,"25762455","amamutdb.no" +"Biomedical Research Council",1,1,1,"30365033","AlloMAPS" +"Biotechnology Unit, AMU and DBT",1,1,1,"23317704","AMDD" +"Medical Research Council",127,85,62,"29040693, 34528715, 26794641, 31095607, 25270877, 23019048, 21249531, 23529715, 29385418, 31664080, 24174536, 25348407, 26590404, 30008982, 30349118, 30357393, 32726198, 24217912, 25189782, 26578596, 27976751, 29040670, 24194598, 22121219, 23060735, 25542617, 27799474, 29897419, 31504189, 24229347, 24297257, 26476444, 31584092, 22363733, 25593348, 31598690, 26746786, 34844637, 26123534, 29087479, 33095862, 24194600, 24318814, 27899279, 21447597, 24234449, 30407529, 31642470, 31724711, 32728249, 33290552, 24214962, 24270792, 25388151, 31733063, 30298402, 33270111, 31612961, 26531826, 26432830, 23109552, 26919060, 30445555, 24265223, 26935103, 24265224, 22067452, 23650175, 27899567, 29140473, 30398659, 22102590, 22127867, 24194605, 32486891, 27450113, 30395331, 30398656, 31701148, 29069413, 26555599, 23161678, 33156333, 26578574, 31598706","AmyPro, BRAIN UK, BRAINS, CiliaCarta, CODEX, CRCgene, DIADEM, Database of Instruments for Resource Use Measurement, DIBS, dendPoint, GeneProf, Genome3D, FunTree, EMDB, GeneATLAS, GENCODE, EnteroBase, HPO, GOBLET, HPMC, Hepitopes, ICTV, JASPAR, MitoMiner, MetaboLights, MSeqDR, mirDNMR, MARDy, MeLAD, NeuroGeM, NECTAR, PDBe, PDBe-KB, PrionHome, PlasmoGEM, ProCarbDB, RVS, recount3, The Mouse Genomes Project, STCRDab, ThermoMutDB, IMPC, Electronic Mouse Atlas of Gene Expression, WormBase ParaSite, UniProtKB, FlyBase, BioSamples, WormBase, SCOP, ENCODE, GOC, ELM, Gene3D, EVpedia, GWAS Central, SUPERFAMILY, CARD, Reactome, PDB, InterPro, dbNSFP, Ensembl" +"Ministry of Education",4,3,3,"30247677, 30668638, 30587128","AmtDB, CMEP, PSRN" +"Polish National Science Center",2,2,2,"30247677, 33270898","AmtDB, GPCRdb" +"NHGRI NIH HHS",422,194,120,"30371900, 30534948, 25477388, 21233089, 29637199, 29126148, 22140108, 24470572, 30486838, 23508969, 26855883, 23172289, 24122041, 27766955, 27907889, 31612957, 32227657, 33995899, 21994220, 22110038, 23161689, 23203885, 24243844, 26780094, 27664130, 30357393, 21450710, 22064851, 22123736, 24217912, 26673694, 28967693, 30445434, 34859531, 22792232, 27841751, 21856757, 22080565, 22140101, 24203705, 25542617, 26656948, 29155944, 29206899, 29284660, 31679514, 24939129, 25428349, 26578589, 29487113, 31647099, 21624156, 23378291, 24163250, 24227675, 27026615, 27799469, 28862395, 22140105, 23203869, 26097510, 29040625, 32345346, 22140109, 23197656, 26516187, 29092939, 30052772, 30053266, 34154643, 22067444, 22075990, 22080546, 22736877, 23110975, 24194600, 24267744, 25348409, 25378322, 25392405, 26590259, 30462313, 31283070, 21447597, 22102583, 22110037, 23074187, 23203987, 23245209, 24007337, 24234449, 25214827, 25352543, 26612867, 27794045, 27899622, 28150237, 29069475, 29077884, 29112718, 29145643, 31642470, 31665425, 31691815, 32728249, 33021634, 33261662, 33290552, 34387941, 23494302, 33270111, 25378336, 31584097, 26919060, 31680153, 23175610, 26481351, 33221922, 24217918, 33237286, 23487186, 27899582, 24316576, 30304474, 33290554, 26935103, 21672956, 27794554, 33436076, 30357350, 23161672, 22067452, 24214955, 27515742, 31713622, 23193272, 21520341, 27899567, 24285300, 34698891, 25428374, 28713666, 22102590, 24265222, 28838067, 26578592, 22127867, 30395267, 23203985, 23143107, 33211869, 24194605, 32486891, 29126249, 25361974, 23843252, 30395331, 30398656, 25348401, 29165610, 23161681, 33170210, 33106848, 22135293, 25414346, 29069413, 29145629, 23193274, 26555599, 23161678, 26578600, 33170273, 24253303, 29140510, 30407545, 29155950, 30407594, 22012987, 27899570, 25348405, 26097180, 30407521, 24243840, 29092072, 26519399, 26631132, 31691826, 26087747, 30407599, 30395287, 33231642, 29761460, 22086963, 34741192","Ancestral Genomes, atSNP, BARD, BIND, BiOnIC, CCDS, CharProtDB, ChEpiMod, CHESS, CistromeFinder, CressInt, dictyBase, DGIdb, e-GRASP, denovo-db, DNAproDB, dbMTS, DRscDB, FlyExpress, GeneSigDB, GeneTack, factorbook, FireDB, GenomeSpace, ENVO, GENCODE, hmChIP, HaploReg, GOA, HPO, GtRNAdb, HUMA, GWAS, gnomAD, Integrated Microbial Genomes and Metagenomes, IRRMC, modENCODE, modMine, MethylomeDB, MetaRef, MSeqDR, MG-RAST, MIST, MOSAIC, ModERN, MaveDB, PeptideAtlas, OMIM, ORegAnno, Panorama Public, Pathway Commons, PHARE-KB, PhenoDB, RADAR--a, PhosphoNetworks, PheKB, Plant Reactome, PMS_DN, ScerTF, SIFTS, SANCDB, RISE, REPIC, TAIR, TIGRFAMs, SynLethDB, TCPA, Terabase, TC3A, TE Hub, zfishbook, MGD, International Nucleotide Sequence Database Collaboration, KB, MGI, IMPC, VTO, Human Disease Ontology, UniPROBE, non-human primate reference transcriptome resource, UCSC Genome Browser, Cistrome DB, VIPdb, UniProtKB, MPD, SGD, ZFIN, Ensembl, HGNC, GRASP, FlyBase, BioPortal, RNAcentral, Dfam, FANTOM5, UniProt, UniCarbKB, APPRIS, HEDD, Rfam, MEROPS, WormBase, TFBSshape, Reactome, ENCODE, NDB, dbNSFP, GOC, PharmGKB, IGSR, Expression Atlas, Gramene, PANTHER, Pfam, EnhancerAtlas, ArrayExpress, ChiTaRS, InterPro, GOLD, RGD" +"European Union and Greek National Funds through the Operational Program ‘Competitiveness, Entrepreneurship and Innovation’",1,1,1,"31094220","AmyCo" +"Saarland University",3,3,3,"30937442, 30380090, 31691816","Animal sncRNA Atlas, PLSDB, miRPathDB" +"Michael J. Fox Foundation for Parkinson’s Research",1,1,1,"30937442","Animal sncRNA Atlas" +"Huazhong Agricultural University Scientific & Technological Self-innovation Foundation",3,3,3,"31584087, 32986825, 31410488","Animal-ImputeDB, Animal-APAdb, ncRNA-eQTL" +"Fundamental Research Funds for the Central University",2,2,2,"31584087, 32487016","Animal-ImputeDB, ncRI" +"MEXT",2,2,2,"31680137, 33211864","ANISEED, FANTOM5" +"Kato Memorial Research Foundation",1,1,1,"31680137","ANISEED" +"JSPS",12,5,5,"31680137, 29206899, 28499913, 33211864, 29216398","ANISEED, MOSAIC, Soybean Proteome Database, FANTOM5, ATTED-II" +"Institut Français de Bioinformatique",2,2,2,"31680137, 31624845","ANISEED, CRISPRCasdb" +"NICHD NIH HHS",54,26,22,"31680137, 23774715, 22916227, 34482425, 26503254, 26946289, 27907889, 23340253, 24150938, 33973408, 32502232, 28490127, 29145608, 26048622, 23110975, 23125366, 26476456, 23730305, 33104772, 34387941, 30417254, 29761459, 25313157, 24316576, 24163257, 34698891","ANISEED, Autism Brain Imaging Data Exchange, CLEARPOND, CrePortal, Digital Development, dbPEC, denovo-db, EDR, GEISHA, hu.MAP, LabxDB, Milk bioactive peptide database, MeDReaders, PedsDTI, MGI, Xenbase, BiGG, SGR, GXD, PharmGKB, iSyTE, Ensembl" +"CNRS",3,3,3,"31680137, 31624845, 31733062","ANISEED, CRISPRCasdb, ParameciumDB" +"Japan Foundation for Applied Enzymology",1,1,1,"31680137","ANISEED" +"Inamori Foundation",1,1,1,"31680137","ANISEED" +"Sumitomo Foundation",2,2,2,"31680137, 33002111","ANISEED, PyDISH" +"Jiangsu Agricultural Science and Technology Independent Innovation Fund",1,1,1,"32986825","Animal-APAdb" +"Fundamental Research Funds for the Central Universities",39,28,28,"32986825, 32681639, 34856391, 32754758, 33009914, 31691822, 31277321, 32008039, 34954426, 35694152, 30380109, 32510565, 33045729, 32367112, 32512182, 33507270, 31504189, 32159764, 33219670, 32105730, 33275967, 30244175, 31161204, 31809863, 33137192, 32709339, 33175170, 33175131","Animal-APAdb, ATdb, CircR2Disease, ConoMode, CoVdb, DrLLPS, GEDS, EPSD, FertilityOnline, EnhFFL, iEKPD, GreenCircRNA, GIMICA, MACSNVdb, IRESbase, InSexBase, MeLAD, MMHub, MolluscDB, ncEP, ncRNAVar, PTMD, PRISMOID, PsyMuKB, Plant-ImputeDB, saponin mass spectrometry database, 2019nCoVR, deepBase" +"Instituto de Salud Carlos III",7,5,5,"30715274, 32076423, 33252190, 30357370, 34332522","APID, FHLdb, GRINdb, liqDB, PACHIN" +"European Project H2020",2,1,1,"30715274","APID" +"Federación Española de Enfermedades Raras",1,1,1,"30715274","APID" +"Cancer Prevention & Research Institute of Texas",2,2,2,"31586392, 30203047","APAatlas, Pancan-meQTL" +"Japan Science and Technology Agency",12,10,9,"31978081, 33174597, 30046160, 30295851, 33645624, 29668970, 33125071, 30357349, 33156332, 30321428","AOE, GlycoPOST, KampoDB, jPOST, KAIKObase, MitoFish, GlyTouCan, DDBJ, KEGG" +"Fonds Wetenschappelijk Onderzoek (BE)",1,1,1,"28095775","ARA-PEPs" +"Onderzoeksraad, KU Leuven (BE)",1,1,1,"28095775","ARA-PEPs" +"Vlaams Instituut voor Biotechnologie",1,1,1,"28095775","ARA-PEPs" +"Central Institute of Medicinal and Aromatic Plants",5,2,2,"30150996, 33685383","AromaDb, MAPslnc" +"Swiss National Science Foundation",71,36,30,"32449934, 33037820, 26527719, 31665454, 30357342, 26582924, 27504778, 31353404, 22096233, 33156326, 28086860, 31617559, 24225318, 27899579, 23180783, 26578555, 28985418, 23193254, 32117874, 27899580, 28053161, 31410491, 24270792, 33270111, 33196836, 24234447, 23193273, 33180112, 33174605, 23180791, 27899657, 30664776, 30418610, 30395283, 25378343, 25428351","ASAP, Bgee, CEGA, ChlamDB, CoevDB, enviPath, GNPS, MAdb, MINAS, MetaNetX/MNXref, NaDH, PolyASite, Selectome, SNP2TFBS, SwissRegulon, SugarBindDB, SysteMHC, UCNEbase, GDB17, OrthoDB, GETPrime, ABCD, Gene3D, GENCODE, UniCarbKB, EPD, Europe PMC, OMA, GPCRdb, eggNOG" +"Chan Zuckerberg Initiative",1,1,1,"32449934","ASAP" +"EPFL",1,1,1,"32449934","ASAP" +"Precision Health & related Technologies",1,1,1,"32449934","ASAP" +"Agencia Nacional de Promoción Científica y Tecnológica",6,3,3,"32507889, 31713636, 31680160","articles.ELM, DisProt, ELM" +"Cancer Research UK",29,18,16,"32507889, 25270877, 23019048, 24122843, 26590264, 31504189, 31584092, 22544707, 26123534, 26826444, 24163255, 25332396, 33237329, 25414341, 29059374, 26578585, 29858800, 31680160","articles.ELM, CODEX, CRCgene, HTS-DB, MutationAligner, MeLAD, PDBe-KB, SNPnexus, The Mouse Genomes Project, IPD-IMGT/HLA, PED, BCCTBbp, MobiDB, IPD, Gene3D, ELM" +"Consejo Nacional de Investigaciones Científicas y Técnicas",3,3,3,"32507889, 34954795, 31680160","articles.ELM, CoDNaS-RNA, ELM" +"Baden-W??rttemberg Stiftung",1,1,1,"34738791","AroCageDB" +"Deutsche Forschungsgemeinschaft",16,14,14,"34738791, 29198880, 31095607, 32459338, 34699529, 33174596, 29913065, 33749993, 34559210, 27800578, 30714194, 30165582, 34656056, 30256983","AroCageDB, AureoWiki, CiliaCarta, EpiRegio, GH19ED, MitoCarta, OptoBase, OmniPath, QSDB, SCEGRAM, SDRED, Traitpedia, SuperTCM, BacDive" +"Deutscher Akademischer Austauschdienst",2,2,2,"34738791, 28641017","AroCageDB, NANPDB" +"NIDCD NIH HHS",10,3,3,"27193158, 23774715, 26323714","ASL-LEX, Autism Brain Imaging Data Exchange, RNASeqMetaDB" +"Tufts University",2,1,1,"27193158","ASL-LEX" +"Priority Academic Program Development of Jiangsu Higher Education Institutions",3,3,3,"29321052, 29992323, 31603498","ASGDB, CeleryDB, dbInDel" +"Shanghai Sailing Program",3,3,3,"31665428, 33125077, 33306787","ASD, MASI, MetaADEDB" +"Chinese National Precise Medical Research key project",1,1,1,"31665428","ASD" +"Shanghai Health and Family Planning Commission",4,2,2,"31665428, 32941628","ASD, IDDB" +"Shanghai Municipal Education Commission",3,3,3,"31665428, 32941628, 31809863","ASD, IDDB, PsyMuKB" +"Natural Science Foundation of Shanghai Municipal Commission of Health and Family Planning",1,1,1,"31665428","ASD" +"Shanghai Natural Science Foundation",2,2,2,"31665428, 28187703","ASD, iHMS" +"Shanghai Science and Technology Innovation",2,2,2,"31665428, 32941628","ASD, IDDB" +"National Key R&D Program of China Grant",1,1,1,"31843802","ASRD" +"Program for Guangdong Introducing Innovative and Entrepreneurial Teams",4,4,4,"31843802, 33406221, 34907423, 33021671","ASRD, iCysMod, iCAV, RMVar" +"Shenzhen Sci-Tech Fund",1,1,1,"31843802","ASRD" +"Central Universities in China",2,2,2,"32294195, 33313674","ASFVdb, ColorCells" +"National Key Research and Development Program",20,13,13,"32294195, 33009914, 31691822, 34097004, 29961817, 32159764, 31584086, 31640808, 32620074, 31021279, 33175170, 33196801, 32406920","ASFVdb, CoVdb, DrLLPS, DevOmics, LncCeRBase, MMHub, PGG.Han, PGG.SNV, TeaCoN, UVGD, 2019nCoVR, NONCODE, AnnoLnc" +"Hubei Province Natural Science Foundation",1,1,1,"34839012","ASER" +"Natural Science Foundation of Shaanxi Province",1,1,1,"30239683","ATD" +"National Institute of Plant Genome Research, India",1,1,1,"30624648","AtFusionDB" +"Comisión Nacional de Investigación Científica y Tecnológica",1,1,1,"31535335","Atacama" +"Zhejiang Provincial Natural Science Foundation of China",3,3,3,"32681639, 34085038, 33287903","ATdb, EyeDiseases, KVarPredDB" +"WeiJian Special Foundation, Zhejiang University School of Public Health",1,1,1,"32681639","ATdb" +"Natural Science Foundation",3,3,3,"33125076, 32193291, 33219685","ATACdb, LncSpA, Lnc2Cancer" +"Autism Speaks",6,1,1,"23774715","Autism Brain Imaging Data Exchange" +"NINDS NIH HHS",58,20,16,"23774715, 23203874, 24991954, 21249531, 24700709, 25166490, 25542617, 25953081, 29145608, 21821666, 26048622, 26323714, 31171447, 23226127, 25392405, 26590263, 26919060, 26311606, 23203872, 24270788","Autism Brain Imaging Data Exchange, CIL-CCDB, CORTECON, DIADEM, DBDB, isoMETLIN, MSeqDR, MIsoMine, MeDReaders, NeuroPedia, PedsDTI, RNASeqMetaDB, SynGO, UCLA Multimodal Connectivity Database, non-human primate reference transcriptome resource, Lynx" +"National Human Genome Research Institute",57,25,22,"30534948, 30486838, 32227657, 30357393, 34859531, 32345346, 34154643, 32386298, 31642470, 33206959, 33261662, 33290552, 34387941, 33221922, 33237286, 30304474, 33436076, 31713622, 33211869, 30395331, 31691826, 30395287, 33231642, 31598706, 34741192","atSNP, CHESS, dbMTS, GENCODE, gnomAD, REPIC, TE Hub, MGP Portal, WormBase, IntAct, dbNSFP, GOC, PharmGKB, UCSC Genome Browser, UniProtKB, HGNC, Dfam, ENCODE, Rfam, Ensembl, MGD, RGD" +"National Institutes of Health BD2K",1,1,1,"30534948","atSNP" +"Beihang University & Capital Medical University Plan",2,1,1,"33219693","AtMAD" +"Shanghai Municipal Science and Technology",3,3,3,"33219693, 31647096, 33084874","AtMAD, proGenomes2, OGEE" +"National Key Research and Development Plan Program",2,2,2,"30215764, 30329095","AWESOME, CancerSplicingQTL" +"Department of Biotechnology, Ministry of Science and Technology, National Bioscience Award project<Q4/>",1,1,1,"30669929","AutophagySMDB" +"Council of Scientific and Industrial Research (CSIR) 12th Plan Network project Genesis",1,1,1,"30669929","AutophagySMDB" +"Natural Sciences and Engineering Research Council of Canada",13,12,12,"30893420, 33735471, 31016417, 33599246, 27863956, 31825307, 33363449, 26251998, 29377907, 33206959, 33305318, 31701148","AYbRAH, CLRP, DNAmod, Gemma, IHEC, MouseBytes, PASS, PhenomeCentral, ProtDataTherm, IntAct, PED, JASPAR" +"Ministry of Science, Research and the Arts of the State of Baden-Württemberg",1,1,1,"33176685","AVIMM" +"Projekt DEAL",2,2,2,"33176685, 33423696","AVIMM, COCONUT" +"Canadian Institutes of Health Research",39,32,29,"22135301, 31095607, 24203711, 26048563, 31016417, 33382035, 23109553, 22613085, 21492431, 23180781, 24203342, 27863956, 29206899, 31825307, 23203867, 31724725, 22009677, 26251998, 26578582, 24203708, 25332401, 32442307, 22064855, 22102575, 28158179, 24174537, 31665441, 26481353, 26531826, 33313828, 23650175, 31701148","BacMap, CiliaCarta, DrugBank, CYCLoPs, DNAmod, DIPPER, ECMDB, HAltORF, KID, InnateDB, iRefWeb, IHEC, MOSAIC, MouseBytes, NetwoRx, oRNAment, PhenoM, PhenomeCentral, Pseudomonas Genome, SMPDB, TopFIND, SYNERGxDB, YMDB, YeTFaSCo, TrypsNetDB, DGV, CARD, JASPAR, PSORTdb" +"Federal Ministry of Education and Research",8,6,6,"30272193, 32976589, 23607573, 32976578, 31665479, 30256983","BACTOME, DIGGER, PID-NET, TREND-DB, ProteomicsDB, BacDive" +"European Research council",2,1,1,"30272193","BACTOME" +"European Research Council",59,40,36,"30272193, 28453651, 23486613, 31066443, 33084904, 25352549, 26582924, 27664130, 28245064, 30357379, 34485385, 30967549, 24150937, 24194598, 24253300, 32618424, 22096233, 25378328, 28182744, 24198250, 24275491, 26586809, 26467481, 26339475, 28832569, 28985418, 31171447, 24082050, 31504823, 31647096, 33045721, 33206959, 24297252, 30496475, 28053165, 24234451, 29155946, 26582926, 30395289, 30664776","BACTOME, Cancer PanorOmics, CGOB, DrugComb, DualSeqDB, euL1db, enviPath, ENVO, FRED, EndoDB, Fuzzle, iFISH, LoQAtE, JASPAR, InvFEST, LymphoAtlas, MINAS, MyMpn, OMDB, POGO-DB, PhylomeDB, probeBase, SIGNOR, SwissPalm, SweGen, SysteMHC, SynGO, yApoptosis, WALTZ-DB, proGenomes2, ViruSurf, IntAct, eggNOG, PlanMine, GPCRdb, PRIDE" +"German Research Foundation",3,3,3,"30272193, 32766702, 33051671","BACTOME, Male Fertility Gene Atlas, StreptomeDB" +"Nanyang Technological University - Jurong Campus",1,1,1,"34838806","bacteria.guru" +"Alzheimer's Society",1,1,1,"24077841","BBGRE" +"National Key R&D Program of China",41,24,24,"31665503, 34345532, 30380071, 31603498, 30371881, 31811943, 32858223, 31665439, 32193291, 32512182, 29743053, 29982280, 30445567, 32122231, 30010730, 30380102, 31642469, 31799597, 30546860, 33181826, 31670377, 30407568, 30371818, 33704069","BBCancer, CanImmunother, DSMNC, dbInDel, iDog, GliomaDB, hTFtarget, MaGenDB, LncSpA, IRESbase, PDXliver, PepBDB, OncoBase, ncRPheno, RabGTD, qPhos, PhenoModifier, RNAactDrug, YaTCM, WGVD, NPInter, ICEberg, piRBase, 2019nCoVR" +"Science and Technology Program of Guangzhou",6,3,3,"31665503, 31524396, 30380102","BBCancer, HybridMolDB, qPhos" +"Guangdong Natural Science Foundation",1,1,1,"31665503","BBCancer" +"Guangdong Introducing Innovative and Entrepreneurial Teams",3,3,3,"31665503, 32496513, 30380102","BBCancer, gutMEGA, qPhos" +"Australian Grain Research and Development Corporation",1,1,1,"33247932","BarleyVarDB" +"Key-Area Research and Development Program of Guangdong Province",1,1,1,"34736471","BDdb" +"Science, Technology and Innovation Commission of Shenzhen Municipality",3,2,2,"34736471, 31373607","BDdb, Pan Immune Repertoire Database" +"Guangdong Provincial Key Laboratory of Genome Read and Write",2,2,2,"34736471, 32705130","BDdb, CNSA" +"Stiftung für Pathobiochemie und Molekulare Diagnostik",1,1,1,"34736471","BDdb" +"Shenzhen Municipal Government of China",2,2,2,"34736471, 31373607","BDdb, Pan Immune Repertoire Database" +"Swiss Institute of Bioinformatics",3,3,3,"33037820, 33174605, 30476243","Bgee, OMA, STRING" +"Horizon 2020",41,24,23,"33037820, 31713636, 32976589, 33166383, 34220930, 30357370, 32986834, 35559777, 33647438, 30321422, 31647096, 31696235, 33211851, 33237329, 33270898, 33305318, 30357350, 30395289, 33211869, 30418610, 31680160, 30395270, 31691826, 31722421","Bgee, DisProt, DIGGER, FireProtDB, FAANG, liqDB, ModelSEED, MESOCOSM, PepTherDia, RetroRules, proGenomes2, MGnify, WikiPathways, MobiDB, GPCRdb, PED, Pfam, PRIDE, Rfam, eggNOG, ELM, ENA, Ensembl" +"NIH",60,25,25,"33037820, 29985970, 26855883, 26019122, 33995899, 30951672, 33897975, 29028885, 32502232, 29206899, 31197322, 32436932, 29186335, 31584092, 34514416, 29092939, 30052772, 31171447, 33021634, 34387941, 31722416, 33313778, 28891124, 33211869, 27450113","Bgee, BrainEXP, CressInt, DX, DRscDB, exRNA Atlas, FGDB, HoTResDB, LabxDB, MOSAIC, MetOSite, miRactDB, OncoPPi, PDBe-KB, SCISSORâ, TCPA, Terabase, SynGO, NDB, PharmGKB, MEGARes, VIPERdb, DOCKGROUND, Rfam, PDB" +"Canton de Vaud",1,1,1,"33037820","Bgee" +"Intramural NIH HHS",68,62,46,"22084196, 22139929, 23203889, 23197659, 33086069, 24016071, 28383342, 26582918, 23193260, 25428365, 26438539, 23193291, 24297256, 25010047, 22102591, 22139925, 23193258, 23193275, 24558441, 25982314, 31600197, 21873645, 25220766, 22139910, 23093593, 26048622, 22102570, 31103066, 23044550, 23284744, 32027495, 21177655, 22080546, 23180778, 24304891, 25392405, 22140110, 23696674, 24259432, 24319143, 25352543, 25414350, 25428361, 34366563, 25414356, 27008011, 26302176, 26657633, 23203872, 24198245, 26553804, 22135289, 23193287, 24931982, 29927072, 30774152, 26400175, 31851420, 23180798, 24316578, 25510495, 22121212","BGMUT, BioProject, Bookshelf, CDD, CellMiner-SCLC, CellMinerHCC, CHEAR, ClinVar, Clone, COGs, CRISPRz, DGVa, dbGaP, GBM-BioDP, IBIS, GWASdb, GEO, GTR, HTD, GermlncRNA, iCite, iRefIndex, LabeledIn, NCBI Taxonomy, PaVE, PedsDTI, ProPortal, Placental Atlas Tool, SemMedDB, StRAP, tautomeric, COMBREX, International Nucleotide Sequence Database Collaboration, ZInC, Virus Variation, non-human primate reference transcriptome resource, PubChem, CGD, RefSeq, MMDB, RNAcentral, GenBank, GRASP, DSLD, MTB, Rfam" +"The Council of Scientific and Industrial Research, India",1,1,1,"34897852","BGvar" +"National Beef Cattle and Yak Industrial Technology System, China",1,1,1,"32540200","BGVD" +"National Thousand Youth Talents Plan, China",1,1,1,"32540200","BGVD" +"Danish National Research Foundation",2,2,2,"33010170, 33152079","BiG-FAM, antiSMASH" +"Novo Nordisk Foundation",12,8,7,"33010170, 31612915, 33156327, 33270898, 30395294, 33152079, 30418610, 33237311","BiG-FAM, MIBiG, TCRD, GPCRdb, antiSMASH, eggNOG, STRING" +"Graduate School for Experimental Plant Sciences",3,3,3,"33010170, 31612915, 33152079","BiG-FAM, MIBiG, antiSMASH" +"NCATS NIH HHS",48,30,28,"25378330, 25516260, 25190456, 23893318, 23630576, 23172289, 23197658, 25951377, 30371892, 30951672, 31509535, 31701147, 27069559, 34349127, 25102069, 33245774, 33361798, 27026615, 24223973, 27643925, 33035346, 33156327, 26578587, 26590263, 33151287, 25388151, 27899567, 25378335, 23161678, 30395287","Binding MOAD, Bioclock, CCGD, CDSA, CMS, dictyBase, DGA, DeTEXT, DrugCentral, exRNA Atlas, GutFeelingKB, LINCS, MD-CTS, MiREDiBase, Panorama, PAGER-CoV, Open Cancer TherApeutic Discovery, PheKB, SWEETLEAD, StemCellCKB, tRFtarget, TCRD, BioGPS, Lynx, EVpedia, GOC, MeT-DB, UniProtKB" +"The Danish Ministry of Higher Education and Science",1,1,1,"31405382","bio.tools" +"Villum Fonden",6,6,6,"31405382, 26062809, 32665542, 32976589, 27504778, 28182744","bio.tools, CMRegNet, CoVex, DIGGER, GNPS, OMDB" +"ELIXIR-EXCELERATE under the European Union's Horizon 2020 research and innovation programme",1,1,1,"31405382","bio.tools" +"Novo Nordisk Foundation Center for Protein Research",20,18,11,"31405382, 22058129, 25484339, 28077569, 23203871, 29617745, 33156327, 29036351, 33151287, 24297252, 24293645, 25352553, 26582926, 26590256, 27924014, 30418610, 30476243, 33237311","bio.tools, DistiLD, DISEASES, RAIN, STRING, TISSUES, TCRD, miRandola, DrugCentral, eggNOG, STITCH" +"Department of Biotechnology",5,4,4,"28875065, 29939244, 32829394, 33459764","BioFuelDB, PtRFdb, TGV, SWITCHES" +"DST-INSPIRE Fellowship",1,1,1,"28875065","BioFuelDB" +"Engineering and Physical Sciences Research Council",21,11,11,"27246819, 27899646, 30418645, 31584092, 24214988, 25414345, 29316788, 24214965, 34655133, 30395294, 31740968","BioHub, FAIRDOMHub, MemProtMD, PDBe-KB, SAbDab, SUPERFAMILY, SynBioHub, ChEMBL, OAS, antiSMASH, SEVA-DB" +"Korean Government",4,1,1,"31599923","BiomeNet" +"National Research Foundation of Korea",16,9,9,"31599923, 26272709, 30418591, 33137185, 23219992, 30733462, 30602089, 31680157, 29156309","BiomeNet, GenomewidePDB, HumanNet, iCSDB, MENT, PFDB, STADIUM, ChimerDB, SMBP" +"Infinitus Co. Ltd",1,1,1,"29529902","BioPepDB" +"NIEHS NIH HHS",34,16,10,"31133849, 29985970, 23630576, 24101916, 28557712, 25953081, 33361798, 22171328, 24288368, 23093600, 25326323, 27651457, 29846728, 30247620, 33068428, 29351546","BioPlanet, BrainEXP, CMS, FunGene, ICE, MIsoMine, Open Cancer TherApeutic Discovery, seeQTL, RDP, CTD" +"Natural Environment Research Council",19,12,12,"26820405, 33167031, 25189782, 21707958, 29897419, 33084905, 23193267, 25558364, 25740460, 30094004, 25025376, 26578574","Biosurveillance Analytics Resource Directory, COG, GOBLET, Littorina sequence database, MARDy, MeDAS, PR(2, PREDICTS, PhytoREF, SuperbaSE, tropiTree, Ensembl" +"the National High Technology Research and Development Program of China",1,1,1,"27623959","BmncRNAdb" +"Chongqing Graduate Student Research Innovation Project",1,1,1,"27623959","BmncRNAdb" +"the National Natural Science Foundation of China",10,5,5,"27623959, 34964846, 32183712, 29743053, 34154536","BmncRNAdb, COGVIC, CuAS, PDXliver, TeaAS" +"Division of Environmental Biology",3,1,1,"28365726","BMW" +"Zhejiang Provincial Key Research Project",1,1,1,"33399824","BnaGVD" +"Jiangsu Collaborative Innovation Centre for Modern Crop Production",1,1,1,"33399824","BnaGVD" +"Department of Science and Technology",5,4,4,"31958638, 32337573, 28498885, 33892308","BoMiProt, MPTherm, PROXiMATE, SAPdb" +"National Postdoctoral Fellowship",1,1,1,"31958638","BoMiProt" +"Council of Scientific and Industrial Research",17,13,13,"31958638, 27832200, 27472917, 32219412, 32090260, 28854643, 29129553, 30858555, 30349509, 30307523, 31796964, 29432422, 33112702","BoMiProt, Cancertope, CicerTransDB, circad, HSPMdb, MSDB, mitoepigenomeKB, MorCVD, PanGFR-HM, PVsiRNAdb, PRP, TopicalPdb, LncRBase" +"Science and Engineering Research Board",8,8,8,"31958638, 27832200, 34791106, 34793786, 32090260, 35424258, 28961249, 33112702","BoMiProt, Cancertope, database of cancer mutant protein domains, FCCP, HSPMdb, MeFSAT, SWI/SNF Infobase, LncRBase" +"Ministry of Human Resource Development",1,1,1,"31958638","BoMiProt" +"NIMHD NIH HHS",5,3,3,"24994456, 25190456, 25378335","BorreliaBase, CCGD, MeT-DB" +"British Neuropathological Society",1,1,1,"34528715","BRAIN UK" +"Medical Research Council Canada",1,1,1,"34528715","BRAIN UK" +"Brain Tumour Research",1,1,1,"34528715","BRAIN UK" +"National Key Plan for Scientific Research and Development of China",3,3,3,"29985970, 31774482, 32349124","BrainEXP, FluReassort, VirusCircBase" +"Innovation-Driven Project of Central South University",2,1,1,"29985970","BrainEXP" +"Scottish Funding Council",2,1,1,"26794641","BRAINS" +"DBT/Wellcome Trust India Alliance",1,1,1,"26586806","BreCAN-DB" +"Wellcome Trust",267,152,100,"26586806, 29126148, 23486613, 22718786, 25270877, 25355519, 23868908, 24319146, 22962312, 23074185, 23193291, 25505093, 28299908, 22080548, 22116062, 22139938, 22912585, 23161689, 23175615, 25348407, 25378340, 30008982, 30357393, 32726198, 34493866, 22923302, 23193293, 26342919, 26578596, 27733501, 27976751, 29040670, 23667450, 23998809, 27638885, 21856757, 22080565, 23935057, 30418645, 24229347, 24297257, 25172923, 25399418, 26476444, 31584092, 22363733, 23674503, 24304897, 25361970, 25593348, 28748223, 31598690, 23203869, 24504151, 30481257, 33416848, 24194607, 26123534, 26249811, 26582922, 28985418, 33095862, 22080546, 24194600, 24318814, 25300491, 25348409, 30398663, 30601939, 22067447, 23203987, 23245209, 24214965, 24259432, 25352543, 26673716, 27899622, 28981707, 29145643, 29533231, 30407529, 33206959, 33211879, 33290552, 24150940, 23203883, 24270792, 29161421, 33270111, 29121237, 23087376, 31584097, 26919060, 33952332, 31680154, 26657633, 26481351, 23203882, 24163254, 24316576, 30304474, 27899562, 27794554, 30357350, 24265224, 24311564, 27899567, 27899578, 27903906, 33180112, 27899635, 29140473, 29165655, 30395289, 26888907, 30398643, 33125078, 22086950, 27789705, 27450113, 30395331, 30371878, 29112716, 29761457, 30398656, 33166387, 26527722, 26896847, 25883136, 33106848, 24288371, 25723102, 23125362, 23584835, 23193274, 30357387, 23161678, 33156333, 23180798, 33170273, 29155950, 21936816, 24157837, 25348405, 30407521, 26527717, 31691826, 31722421, 26578574, 33175160, 31598706, 22086963","BreCAN-DB, CCDS, CGOB, ChromoHub, CODEX, COSMIC, CREDO, CSA, DECIPHER, DARNED, DGVa, diXa, DrugAge, ENA, GeneDB, Gene3D, GenDR, GeneTack, EuPathDB, Genome3D, Europe PMC, EMDB, GENCODE, EnteroBase, eQTL, HSPIR, HAGR, Geroprotectors, HPMC, HipSci, Hepitopes, ICTV, LipidHome, LongevityMap, IGSR, modENCODE, modMine, metabolicMine, MemProtMD, NeuroGeM, NECTAR, Naked Mole Rat Genome Resource, OMA, PDBe, PDBe-KB, PrionHome, PhosphoGRID, PPD, PomBase, PlasmoGEM, PhenoPlasm, ProCarbDB, SIFTS, SATuRN, SkeletalVis, SARS CoV-2, TreeFam, The Mouse Genomes Project, SurvCurv, SureChEMBL, SysteMHC, ThermoMutDB, International Nucleotide Sequence Database Collaboration, IMPC, Electronic Mouse Atlas of Gene Expression, TrypanoCyc, Human Disease Ontology, CATH, UbiHub, Ensembl Genomes, Ensembl, HGNC, ChEMBL, RefSeq, RNAcentral, Pfam, UniProt, EPD, MEROPS, wwPDB, BioSamples, IntAct, ArrayExpress, GOC, IUPHAR-DB, MSeqDR, SANCDB, TDR Targets, Expression Atlas, PRIDE, RepeatsDB, InterPro, JASPAR, CARD, PDB, FANTOM5, Rfam, ENCODE, Gramene, UniProtKB" +"Department of Biotechnology, Govt. of India",2,2,2,"27164438, 27285615","BrucellaBase, Northeast India Helminth Parasite Information Database" +"Innovationsfonden",1,1,1,"33539279","BSGatlas" +"Japan Agency for Medical Research and Development",13,10,10,"32026396, 33511845, 29532461, 33740463, 31640730, 33661371, 33179747, 30462320, 33166387, 33156332","BSM-Arc, FMODB, HpBase, ICSCB, NARD, OryzaGenome, jMorp, COXPRESdb, International Nucleotide Sequence Database Collaboration, DDBJ" +"Japan Agency for Medical Research and Development (JP)",1,1,1,"32026396","BSM-Arc" +"CAPES",1,1,1,"25656309","BtoxDB" +"CNPq",1,1,1,"25656309","BtoxDB" +"FAPESP",3,2,2,"25656309, 33221926","BtoxDB, HumanMetagenomeDB" +"NLM NIH HHS",47,40,33,"27050421, 26519468, 23893318, 32392296, 33167031, 33211888, 22110038, 22748121, 26567549, 22058127, 31701147, 33137204, 33166392, 22146221, 27907895, 24406170, 26504143, 21624156, 27733502, 29036590, 23066107, 23175606, 26590254, 31598702, 31680168, 21447597, 26612867, 34314492, 26590405, 22102590, 23203985, 33211869, 33166387, 23161681, 33106848, 29069413, 24253303, 25348405, 33151290, 26519399","C-terminome, ccmGDB, CDSA, CoCoCoNet, COG, CSEA-DB, GeneSigDB, Genes2FANs, GEneSTATION, MACiE, LINCS, KinaseMD, LitCovid, MnM, mutLBSgeneDB, PeptiSite, PDID, PHARE-KB, SZGR, TissGDB, TSGene, WholeCellKB, UET, VISDB, TSEA-DB, UniProtKB, Dfam, GlyTouCan, Rfam, International Nucleotide Sequence Database Collaboration, RNAcentral, WormBase, PubChem" +"Ministry of Education, Science and Technology",1,1,1,"22584068","CACG" +"European Commission FP7",32,19,18,"22080563, 22110040, 22096229, 22096227, 22730453, 21472892, 22139920, 23180794, 22102589, 22135296, 21520333, 22753137, 21702733, 21995777, 22102590, 22135291, 22096232, 22121220, 22086963","CADRE, ELM, InterPro, MINT, MuteinDB, PORCN, SitEx, TFClass, UniPathway, VectorBase, LOVD, ALSoD, BRAD, UniProtKB, Rhea, BioSamples, IntAct, Ensembl" +"Edward Via College of Osteopathic Medicine",1,1,1,"30329086","CAGm" +"Indo-French Centre for the Promotion of Advanced Research",1,1,1,"33444113","CALR-ETdb" +"Grand Equipement National de Calcul Intensif",1,1,1,"33444113","CALR-ETdb" +"Division of Graduate Education",1,1,1,"33306801","CamRegBase" +"Basic Energy Sciences",1,1,1,"33306801","CamRegBase" +"NCI",3,2,2,"34903605, 29092931","Cancer-Immu, Cistrome Cancer" +"Cancer Center Support Grant",1,1,1,"34903605","Cancer-Immu" +"SPORE in Breast Cancer",1,1,1,"34903605","Cancer-Immu" +"University Grants Commission",4,4,4,"32360910, 27832200, 33136065, 29109711","CancerEnD, Cancertope, HPREP, miPepBase" +"Council of Scientific and Industrial Research, India",2,2,2,"32360910, 34107869","CancerEnD, PINIR" +"Italian Association for Cancer Research",4,2,2,"31598703, 31665520","CancerGeneNet, SIGNOR" +"National Science Fund for Distinguished Young Scholars",3,3,3,"33010176, 31584086, 31640808","CancerImmunityQTL, PGG.Han, PGG.SNV" +"Japan Society for the Promotion of Science",25,13,13,"22659240, 28234924, 33196844, 33511845, 30046160, 29206899, 23911837, 28481528, 33002111, 30371824, 29668970, 30407557, 29216398","CancerProView, DNApod, dbCNS, FMODB, KampoDB, MOSAIC, PTP-central, PubChemQC, PyDISH, ViBrism, MitoFish, FANTOM5, ATTED-II" +"Ministry of Education, Culture, Sports, Science and Technology",6,6,6,"22659240, 29532461, 33002111, 33179747, 33166387, 33156332","CancerProView, HpBase, PyDISH, jMorp, International Nucleotide Sequence Database Collaboration, DDBJ" +"China Postdoctoral Science Foundation",44,30,30,"30329142, 34345532, 29939204, 33471060, 30289549, 30321400, 30335161, 30380109, 31584099, 32858223, 33264402, 33984507, 29788225, 31713618, 32820322, 28968812, 32159764, 32990748, 30445567, 32111231, 32849839, 30913342, 32990749, 33360695, 34273956, 33175170, 33993461, 31599330, 33306787, 33196801","CancerSEA, CanImmunother, CARDIO-LNCRNAS, Cellinker, CellMarker, EWASdb, EVmiRNA, iEKPD, gutMDisorder, hTFtarget, HERB, HisPhosSite, LnChrom, LncTarD, LncAS2Cancer, MetSigDis, MMHub, miRNASNP-v3, OncoBase, NoncoRNA, RIGD, TPIA, TCRdb, ToxinDB, TCM-Blast, 2019nCoVR, TUPDB, LSD, MetaADEDB, NONCODE" +"National Program on Key Basic Research",4,4,4,"30329142, 30289549, 30476305, 30407549","CancerSEA, CellMarker, LncACTdb, Lnc2Cancer" +"National High Technology Research and Development Program of China",6,5,5,"30329142, 29939204, 30289549, 28575155, 30252093","CancerSEA, CARDIO-LNCRNAS, CellMarker, Dynamic-BM, iProX" +"Higher Education in Heilongjiang Province",1,1,1,"30329142","CancerSEA" +"Heilongjiang Postdoctoral Foundation",2,2,2,"30329142, 30289549","CancerSEA, CellMarker" +"Harbin Medical University",9,9,9,"30329142, 30289549, 31665430, 30476305, 31617563, 33045741, 32047897, 33095866, 30407549","CancerSEA, CellMarker, ENdb, LncACTdb, LnCeVar, LncSEA, TRlnc, VARAdb, Lnc2Cancer" +"Indian Council of Medical Research",6,5,5,"27832200, 29109711, 32345779, 25269378, 32380213","Cancertope, miPepBase, PSCRIdb, SPGDB, TrypInDB" +"Open Source Drug Discovery",2,2,2,"27832200, 28759605","Cancertope, THPdb" +"Austrian Science Fund FWF",7,6,6,"21718534, 24225386, 26590402, 30357379, 21366916, 26586809","CANGS, COMMODE, EffectiveDB, EndoDB, PoPoolation, probeBase" +"Shenzhen Basic Research Fund",1,1,1,"34345532","CanImmunother" +"Fundamental Research Funds of the Central Universities",1,1,1,"34345532","CanImmunother" +"Natural Science Foundation of Guangdong Province",5,5,5,"34345532, 29178828, 31524396, 30380102, 32761141","CanImmunother, CrusTF, HybridMolDB, qPhos, SPDB" +"Strategic Priority CAS Project",1,1,1,"34345532","CanImmunother" +"Sanming Project of Medicine",1,1,1,"34345532","CanImmunother" +"Guangdong Project",1,1,1,"34345532","CanImmunother" +"Guangdong Basic and Applied Basic Research Foundation, China",1,1,1,"34345532","CanImmunother" +"Dongsheng Yu",1,1,1,"34345532","CanImmunother" +"Sun Yat-sen University",1,1,1,"34345532","CanImmunother" +"Support Scheme of Guangzhou for Leading Talents in Innovation and Entrepreneurship",1,1,1,"34345532","CanImmunother" +"Wenliang Zhang",1,1,1,"34345532","CanImmunother" +"Binghui Zeng",1,1,1,"34345532","CanImmunother" +"Weizhong Li",1,1,1,"34345532","CanImmunother" +"Ministerio de Ciencia, Innovación y Universidades",4,3,3,"33942873, 33655207, 31647096","CANNUSE, TMSNP, proGenomes2" +"Generalitat de Catalunya",5,5,5,"33942873, 21491493, 30335169, 31608375, 32786900","CANNUSE, Noncoded Amino acids Database, PopHumanScan, GSAD, BCE" +"Spanish government",1,1,1,"33942873","CANNUSE" +"Institut d’Estudis Catalans",1,1,1,"33942873","CANNUSE" +"Stavros Niarchos Foundation",2,2,2,"34174131, 33080028","CanVaS, Peryton" +"Danish Council for Independent Research–Technology and Production Sciences",1,1,1,"29509874","CarbonylDB" +"National Program on Key Basic Research Project",5,2,2,"29939204, 29617941","CARDIO-LNCRNAS, SCRIPT-MAP" +"Heilongjiang Province Youth Science and technology",2,1,1,"29939204","CARDIO-LNCRNAS" +"Natural Science Foundation of Heilongjiang Province",9,8,8,"29939204, 28334239, 31665430, 31713618, 30371817, 30184150, 32047897, 30380072","CARDIO-LNCRNAS, coexpressMAP, ENdb, LncTarD, SEdb, TRCirc, TRlnc, LncRNA2Target" +"Weihan Yu Youth Science Fund Project of Harbin Medical University",1,1,1,"29939204","CARDIO-LNCRNAS" +"Shanghai Institutes for Biological Sciences, Chinese Academy of Sciences",1,1,1,"26040787","CARMO" +"Strategic Priority Research Program of the Chinese Academy of Sciences",4,3,3,"26040787, 30364969, 30365027","CARMO, EWAS Atlas, gcMeta" +"Natural Science Foundation of Guangdong Province-Outstanding Youth Projec",1,1,1,"35134148","CATA" +"Basic & Applied Basic Research Programs of Guangdong province",1,1,1,"35134148","CATA" +"Basic & Applied Basic Research Programs of Guangdong province",1,1,1,"35134148","CATA" +"Swedish Cancer Foundation",1,1,1,"30717315","CBD" +"Svenska Forskningsrådet Formas",1,1,1,"30717315","CBD" +"Dutch Science Foundation",1,1,1,"29020642","cBiT" +"Dutch province of Limburg",1,1,1,"29020642","cBiT" +"Indian Council of Agricultural Research",3,2,2,"29753807, 28096778","CbLncRNAdb, PineElm_SSRdb" +"National Institute of Mental Health",5,5,5,"32386544, 33599246, 26048622, 29370821, 33206959","CCFv3, Gemma, PedsDTI, PhenoDis, IntAct" +"CCR NIH HHS",5,5,5,"23893318, 33086069, 21491493, 27377064, 27587585","CDSA, CellMiner-SCLC, Noncoded Amino acids Database, RID, REDIportal" +"NCRR NIH HHS",57,30,28,"23893318, 23203874, 23197658, 23264352, 26946289, 21177656, 22748121, 23104379, 23504933, 27504778, 21880229, 22146221, 21821666, 23674503, 24227675, 25309735, 22434841, 24223973, 26516187, 22135298, 23550210, 25348409, 25392405, 27053566, 21447597, 27924039, 25501940, 23203872, 24316576, 22102590","CDSA, CIL-CCDB, DGA, DegraBase, dbPEC, Gene Expression Barcode, Genes2FANs, GFDB, Human Proteinpedia, GNPS, Monogenic Diabetes Registry, MnM, NeuroPedia, PhosphoGRID, PhosphoNetworks, SkateBase, TGD, SWEETLEAD, SynLethDB, PhosphoSitePlus, cBioPortal, Human Disease Ontology, non-human primate reference transcriptome resource, Vaxar, UniProtKB, FlyRNAi, SFLD, Ensembl" +"Natural Science Foundation of Jiangsu Province",1,1,1,"29992323","CeleryDB" +"Program for New Century Excellent Talents in University",2,2,2,"29992323, 32510565","CeleryDB, GreenCircRNA" +"Guangzhou science and technology project key project topic",1,1,1,"33471060","Cellinker" +"Basic and Applied Basic Research Fund of Guangdong Province",4,2,2,"33471060, 32833025","Cellinker, MNDR" +"National Key Research and Development Project of China",2,2,2,"33471060, 33003203","Cellinker, QSIdb" +"Construction of Higher Education in Heilongjiang Province",1,1,1,"30289549","CellMarker" +"National Cancer Institute",29,23,23,"33086069, 30202990, 30247654, 34127402, 31588509, 32986834, 21491493, 25102069, 27152146, 29186335, 32810235, 33245774, 33442735, 21656910, 32358997, 34014674, 32027495, 33729437, 33206959, 33849445, 33151287, 33237286, 33290554","CellMiner-SCLC, ChIPprimersDB, HACER, Immu-Mela, MutEx, ModelSEED, Noncoded Amino acids Database, Panorama, NCRO, OncoPPi, PCAT, PAGER-CoV, O-GlcNAcAtlas, QuAD, ProNetView-ccRCC, SistematX, tautomeric, TIE, IntAct, TANTIGEN, DrugCentral, UniProtKB, PANTHER" +"Natural Science Foundation of Zhejiang Province",2,2,2,"33147626, 33507270","CellTalkDB, InSexBase" +"National Youth Top-notch Talent Support Program",1,1,1,"33147626","CellTalkDB" +"Biotechnology and Biological Sciences Research Council, UK",4,1,1,"32754757","CerealsDB" +"Beijing Natural Science Foundation",5,4,4,"31428785, 30266409, 29617941, 33306800","CFEA, PlaD, SCRIPT-MAP, CEG" +"Science Foundation of Zhejiang Province",1,1,1,"31428785","CFEA" +"Special Foundation for Key Basic Research of Wenzhou Institute of Biomaterials and Engineering",1,1,1,"31428785","CFEA" +"CAMS Innovation Fund for Medical Sciences",2,1,1,"31428785","CFEA" +"Association Vaincre la Mucoviscidose",1,1,1,"28603918","CFTR-France" +"Saigon University",1,1,1,"28651548","CHD" +"The Vietnam National Gene Fund",1,1,1,"28651548","CHD" +"Natural Science Foundation of the Jiangsu Higher Education Institutions of China",2,2,2,"32608479, 31950190","CHDGKB, PCaLiStDB" +"National Institute of General Medical Sciences",34,22,20,"30486838, 32073269, 25333826, 33995899, 29337142, 33973408, 25102069, 30985146, 29733404, 31494246, 32345346, 27188311, 30871473, 33211851, 30668832, 33206959, 33290552, 34529321, 33237286, 30395331, 30476243, 30395287","CHESS, COLMAR Lipids, Complex Mixture Analysis by NMR, DRscDB, Express, hu.MAP, Panorama, ProteinExplorer, SPAR, SliceIt, REPIC, Structure Surfer, TADKB, WikiPathways, DASHR, IntAct, GOC, antimicrobial peptide database, UniProtKB, STRING" +"Fundos Europeus Estruturais e de Investimento",1,1,1,"33068420","chewie-NS" +"Fundação para a Ciência e a Tecnologia",11,5,5,"33068420, 31713636, 34782688, 29899596, 33822911","chewie-NS, DisProt, CyFi-MAP, LEGE, MENSAdb" +"FEDER",7,4,4,"33068420, 30593925, 31171447, 31680165","chewie-NS, SITVIT2, SynGO, DisGeNET" +"FCT",2,2,2,"33068420, 30820574","chewie-NS, Mammalian Stress Granules Proteome" +"Department of Defense",3,2,2,"30202990, 21656910","ChIPprimersDB, QuAD" +"National Institute of Health Core",1,1,1,"30202990","ChIPprimersDB" +"Ministry of Innovation and Technology in Hungary",1,1,1,"31942977","ChIPSummitDB" +"Higher Education Institutional Excellence Programme",1,1,1,"31942977","ChIPSummitDB" +"National Research, Development and Innovation Office of Hungary",2,1,1,"31942977","ChIPSummitDB" +"CIHR",16,13,13,"22718786, 31095607, 23019048, 31679514, 24229347, 31724725, 23674503, 31868683, 32442307, 33125652, 33313828, 27899612, 27789705","ChromoHub, CiliaCarta, CRCgene, MaveDB, NeuroGeM, oRNAment, PhosphoGRID, QPN, SYNERGxDB, iRefWeb, PSORTdb, YMDB, CARD" +"University of the Sunshine Coast",1,1,1,"30045691","CIGene" +"the National Key Research and Development Program of China",3,3,3,"30045691, 34791105, 34154536","CIGene, HFIP, TeaAS" +"National Institute for Health Research (NIHR)",9,6,6,"31095607, 24217912, 27976751, 24297257, 25414323, 26919060","CiliaCarta, HPO, Hepitopes, NECTAR, AFND, MSeqDR" +"KRESCENT",1,1,1,"31095607","CiliaCarta" +"Netherlands Genomics Initiative",1,1,1,"31095607","CiliaCarta" +"NIHR Great Ormond Street Hospital Biomedical Research Center",1,1,1,"31095607","CiliaCarta" +"The Sir Jules Thorn Charitable Trust",1,1,1,"31095607","CiliaCarta" +"Telethon",7,7,7,"31095607, 21435384, 22804825, 24558125, 22096227, 22415763, 24234451","CiliaCarta, HOCTARdb, HuPho, MANTRA, MINT, Rett Networked Database, IntAct" +"Nierstichting",1,1,1,"31095607","CiliaCarta" +"Dutch Governement",1,1,1,"31095607","CiliaCarta" +"Radboud Universitair Medisch Centrum",1,1,1,"31095607","CiliaCarta" +"Radboud Universiteit",1,1,1,"31095607","CiliaCarta" +"Michael Smith Foundation for Health Research",2,2,2,"31095607, 31701148","CiliaCarta, JASPAR" +"Metakids Foundation",1,1,1,"31095607","CiliaCarta" +"National Key R&D Program",9,5,5,"32345360, 32008039, 30380109, 29157087, 31942978","CircAtlas, EPSD, iEKPD, THANATOS, LncRNADisease" +"DFG Graduate School",1,1,1,"25234927","circBase" +"MDC-NYU",1,1,1,"25234927","circBase" +"The research start-up fellowship of the University of the Sunshine Coast to M.Z.",1,1,1,"34296749","circExp" +"Ministry of Science and Technology",9,5,5,"27365365, 30395277, 33175170, 32898258, 31701128","CIRCpedia, PlantPAN, 2019nCoVR, EXPath, DriverDB" +"Natural Science Foundation of Guangxi Zhuang Autonomous Region",1,1,1,"33181824","circR2Cancer" +"Hunan Provincial Science and Technology Program",1,1,1,"33181824","circR2Cancer" +"the Natural Science Foundation of Yunnan Province of China",1,1,1,"33181824","circR2Cancer" +"the scientific Research Foundation of Hunan Provincial Education Department",1,1,1,"33181824","circR2Cancer" +"the foundation of Guangxi University",1,1,1,"33181824","circR2Cancer" +"Science and Technology Base and talent Special project of Guangxi",1,1,1,"33181824","circR2Cancer" +"Key Research and Development Plan of Guangxi",1,1,1,"33181824","circR2Cancer" +"the research start-up fellowship of the University of the Sunshine Coast",1,1,1,"33121433","circVAR" +"Fundação Araucária",2,2,2,"33181825, 26887375","CitrusKB, PlanTE-MIR DB" +"Coordenação de Aperfeiçoamento de Pessoal de Nível Superior",6,5,5,"33181825, 33995920, 26887375, 33388027, 33095862","CitrusKB, ExVe, PlanTE-MIR DB, Propedia, ThermoMutDB" +"Conselho Nacional de Desenvolvimento Científico e Tecnológico",5,5,5,"33181825, 33995920, 26887375, 24273012, 33095862","CitrusKB, ExVe, PlanTE-MIR DB, SpliceProt, ThermoMutDB" +"scientific initiation scholarship",1,1,1,"33181825","CitrusKB" +"Fundação de Amparo à Pesquisa do Estado de São Paulo",1,1,1,"33181825","CitrusKB" +"Technological Special Project of Liaoning Province of China",1,1,1,"33109630","CKTTD" +"the Construction of Liaoning Cancer Research Center",1,1,1,"33109630","CKTTD" +"the National Natural Science Foundation in China",1,1,1,"33109630","CKTTD" +"the Fundamental Research Fund for Central University",2,1,1,"33109630","CKTTD" +"National Natural Science Foundation in China",1,1,1,"33109630","CKTTD" +"RGYI",1,1,1,"25913159","ClosIndb" +"Ministry of Research and Innovation of Ontario",1,1,1,"33735471","CLRP" +"Canada Research Chairs",4,3,3,"33735471, 31825307, 30407591","CLRP, MouseBytes, IID" +"Shenzhen Municipal Government",5,2,2,"30357356, 28549078","CMAUP, HEROD" +"Zhejiang Province Ministry of Science and Technology",2,2,2,"30357356, 31231773","CMAUP, MepmiRDB" +"MOE",1,1,1,"30668638","CMEP" +"Featured Areas Research Center Program",1,1,1,"30668638","CMEP" +"Higher Education Sprout Project",2,2,2,"30668638, 33035337","CMEP, DockCoV2" +"Advanced Plant Biotechnology Center",1,1,1,"30668638","CMEP" +"National Major Scientific and Technological Special Project",2,1,1,"32986829","CMNPD" +"National Key Technology R&D Program",1,1,1,"32986829","CMNPD" +"Ministry of Science and Technology of the People's Republic of China",156,10,10,"31813095, 28595571, 34992626, 33382035, 33938221, 33984507, 30134653, 33103271, 27643925, 34344425","CMVdb, CottonFGD, CottonGVD, DIPPER, D3DistalMutation, HisPhosSite, PADFrag, NanDeSyn, StemCellCKB, SorGSD" +"The Technology Development Funding of Wuxi",1,1,1,"31813095","CMVdb" +"Ministry of Science and Technology of the People's Republic of China",147,2,2,"31813095, 34344425","CMVdb, SorGSD" +"Government of Jiangsu Province",1,1,1,"31813095","CMVdb" +"Science and Technology Department of Henan Province",1,1,1,"31813095","CMVdb" +"State Key Lab of Microbial Metabolism and Joint Research Funds for Medical and Engineering and Scientific Research at Shanghai Jiao Tong University",1,1,1,"31813095","CMVdb" +"Jiangnan University",1,1,1,"31813095","CMVdb" +"National Natural Science Foundation of China (CN)",3,2,2,"31901979, 28187703","CNAdbCC, iHMS" +"Basic and Applied Basic Research Fund",2,1,1,"33010163","cncRNAdb" +"Center for Biotechnology, National Taiwan University, Taiwan",1,1,1,"34259866","CNVIntegrate" +"Center of Genomics and Precision Medicine, Ministry of Science and Technology, Taiwan",1,1,1,"34259866","CNVIntegrate" +"Blood Cancer UK",1,1,1,"25270877","CODEX" +"CONICET",1,1,1,"34954795","CoDNaS-RNA" +"Agencia Nacional de Promoción de la Investigación, el Desarrollo Tecnológico y la Innovación",1,1,1,"34954795","CoDNaS-RNA" +"Universidad Nacional de Quilmes",3,3,3,"34954795, 33237329, 33305318","CoDNaS-RNA, MobiDB, PED" +"U.S. Food and Drug Administration",2,2,2,"31029701, 31982380","Codon and Codon-Pair Usage Tables, TissueCoCoPUTs" +"Open Project of Key laboratory of Loquat Germplasm Innovation and Utilization, Putian University, Fujian Province",1,1,1,"32436316","CoFly" +"the Major Science and Technology Planning Project of Guangdong Province",1,1,1,"34964846","COGVIC" +"the Research Initiative Fund of Southern Hospital 2018",1,1,1,"34964846","COGVIC" +"the Science and Technology Program of Guangzhou",1,1,1,"34964846","COGVIC" +"NIBIB NIH HHS",11,5,5,"22275896, 26019122, 33174603, 31868683, 27899595","COINS, DX, Datanator, QPN, PANTHER" +"Ohio State University",1,1,1,"32073269","COLMAR Lipids" +"Color Genomics",1,1,1,"33181822","Color Data" +"Guangdong Province",4,2,2,"33313674, 33175131","ColorCells, deepBase" +"Guangdong Province Key Laboratory of Computational Science",2,2,2,"33313674, 33175131","ColorCells, deepBase" +"Pearl River S and T Nova Program of Guangzhou",1,1,1,"33313674","ColorCells" +"Youth science and technology",1,1,1,"33313674","ColorCells" +"Guangdong Province Computational Science Innovative Research Team",2,2,2,"33313674, 33175131","ColorCells, deepBase" +"Guangzhou city",4,2,2,"33313674, 33175131","ColorCells, deepBase" +"National Institute of Diabetes and Digestive and Kidney Diseases",6,6,6,"25333826, 33973408, 34859531, 33206959, 33290552, 33237286","Complex Mixture Analysis by NMR, hu.MAP, gnomAD, IntAct, GOC, UniProtKB" +"NIDDK NIH HHS",60,35,34,"25333826, 26393351, 27766955, 33079988, 33787872, 22748121, 23794736, 24101916, 30951672, 27504778, 28212602, 30418591, 33125055, 33973408, 34859531, 30397019, 21880229, 24203705, 26322134, 33174596, 21890895, 23196988, 24839966, 26072489, 24227675, 24288368, 30674925, 22786849, 31672983, 22067444, 23180778, 24350770, 32728249, 25388151, 24910945","Complex Mixture Analysis by NMR, dkNET, e-GRASP, DKK, Drugmonizome, Genes2FANs, ESCAPE, FunGene, exRNA Atlas, GNPS, HAPPI, HumanNet, HbVar, hu.MAP, gnomAD, iProteinDB, Monogenic Diabetes Registry, MetaRef, Metabolic In silico Network Expansions, MitoCarta, OPM, NURBS, PCD, PAGER, PhosphoNetworks, RDP, Smooth Muscle Transcriptome Browser, Transcriptomine, SPP, zfishbook, ZInC, Model Organism Protein Expression Database, ENCODE, EVpedia" +"Major Scientific Research Platform Construction Project of Shandong Province",1,1,1,"32754758","ConoMode" +"Marine S&T Fund of Shandong Province for Pilot National Laboratory for Marine Science and Technology",2,2,2,"32754758, 32621601","ConoMode, SAGER" +"National Science and Technology Major Project for Significant New Drugs Development",1,1,1,"32754758","ConoMode" +"National Laboratory Director Fund",1,1,1,"32754758","ConoMode" +"NIDCR NIH HHS",24,14,12,"21544197, 27504778, 33119754, 34032471, 29126312, 31588509, 31754718, 22759918, 22064862, 30417254, 31504780, 24185697, 25428374, 27738138","CORE, GNPS, HeRA, HSP, Met-DB, MutEx, MiST, SSKB, CGD, iSyTE, miRDB, UCSC Genome Browser" +"GREEN-IT - Bioresources for Sustainability",1,1,1,"33382885","CorkOakDB" +"BioData.pt - Infraestrutura Portuguesa de Dados Biológicos",1,1,1,"33382885","CorkOakDB" +"HHS | NIH | U.S. National Library of Medicine",1,1,1,"34016708","CoronaCentral" +"NIA NIH HHS",35,19,16,"30357367, 24678734, 30951672, 32777102, 33174596, 33514395, 25172923, 29370821, 24194593, 29733404, 24203712, 27188311, 23226127, 22102583, 30668832, 25388151, 26311606, 26553799, 31696236","CORUM, EPSLiM, exRNA Atlas, MSK-KP, MitoCarta, MPM, Naked Mole Rat Genome Resource, PhenoDis, SelenoDB, SPAR, TISdb, Structure Surfer, UCLA Multimodal Connectivity Database, MPD, DASHR, EVpedia" +"Ministry of Agriculture of the People's Republic of China",2,1,1,"28595571","CottonFGD" +"National Science & Technology Major Project of China",1,1,1,"33068433","CovalentInDB" +"Zhejiang Provincial Natural Science Foundation",3,3,3,"33068433, 33264402, 33010159","CovalentInDB, HERB, PROTAC-DB" +"Primary Research and Development Program of Zhejiang Province",1,1,1,"33068433","CovalentInDB" +"National Natural Science Foundation of HeBei Province",1,1,1,"33009914","CoVdb" +"EC | EU Framework Programme for Research and Innovation H2020 | H2020 Priority Societal Challenges | H2020 Health (H2020 Societal Challenges - Health, Demographic Change and Well-being)",1,1,1,"32665542","CoVex" +"Bundesministerium für Bildung und Forschung",6,5,5,"34931882, 32862462, 34699529, 30418610, 30476243","CoxBase, ExED, GH19ED, eggNOG, STRING" +"Bundesministerium für Bildung und Forschung (BMBF)",1,1,1,"34931882","CoxBase" +"Defense Advanced Research Projects Agency",1,1,1,"31725864","CRAFT" +"Chief Scientist Office",2,1,1,"23019048","CRCgene" +"national institute of child health and human development",2,2,2,"34482425, 34698891","CrePortal, MGI" +"NIH HHS",47,31,26,"34482425, 26503254, 29485625, 27664130, 24217912, 27841751, 27899569, 29284660, 30841849, 31679514, 23674503, 26251998, 28862395, 24203712, 21276248, 23175606, 25348409, 25392405, 31283070, 27924039, 30476227, 34314492, 30407596, 26919060, 31722416, 23203872, 22067456, 24285300, 25428363, 25723102, 27980099","CrePortal, Digital Development, Datasets2Tools, ENVO, HPO, IRRMC, MEGARes, ModERN, mGAP, MaveDB, PhosphoGRID, PhenomeCentral, PMS_DN, TISdb, ZFNGenome, WholeCellKB, Human Disease Ontology, non-human primate reference transcriptome resource, VIPdb, FlyRNAi, BioGRID, GlyTouCan, Cancer3D, MSeqDR, MGD, FANTOM5" +"nih office of the director",1,1,1,"34482425","CrePortal" +"Center of Genetic Medicine Research",1,1,1,"33010154","CRISP-view" +"Pharmaceutical Research and Manufacturers of America Foundation",1,1,1,"33010154","CRISP-view" +"W.T. Gill Fellowship",1,1,1,"33010154","CRISP-view" +"Developmental Biology of Freshwater Fish",1,1,1,"30285246","CRISPRlnc" +"Developmental Biology of Hunan Province",1,1,1,"30285246","CRISPRlnc" +"Scientific Research Fund of Hunan Provincial Education Department",1,1,1,"30285246","CRISPRlnc" +"Australian Research Council",14,7,7,"30548723, 32928113, 31667690, 31161204, 26434508, 30329070, 33084874","CropSNPdb, CrustyBase, Microndata, PRISMOID, ExoCarta, RaftProt, OGEE" +"Sichuan and Guangxi Provinces",2,2,2,"34927675, 34164644","CRPMKB, HFBD" +"Direct Grant for Research from The Chinese University of Hong Kong",1,1,1,"29178828","CrusTF" +"Collaborative Research Fund of the Research Grants Council",1,1,1,"29178828","CrusTF" +"Data Science and Informatics Core for Cancer Research",2,1,1,"33211888","CSEA-DB" +"Cancer Prevention and Research Institute of Texas",15,11,11,"33211888, 30418591, 32576192, 33137204, 32810235, 29092939, 31432762, 31672983, 31598702, 31680168, 33021634","CSEA-DB, HumanNet, GPSno, KinaseMD, PCAT, TCPA, tRic, SPP, VISDB, TSEA-DB, NDB" +"Ministry of Science and Technology, Taiwan",20,11,11,"28704505, 33035337, 34976312, 34266386, 34025934, 30587128, 28194231, 30846808, 27392072, 30048518, 34384382","CSmiRTar, DockCoV2, LCMD, MitoTox, MycoTRAP-DB, PSRN, SkinSensDB, TACCO, YCRD, YARG, OrchidBase" +"Regional Government of Madrid",2,1,1,"32990755","CSVS" +"Ministry of Economy and Competitiveness",6,1,1,"32990755","CSVS" +"Technology and Education",1,1,1,"32294193","ctcRbase" +"Invigorating Health Care through Science",1,1,1,"32294193","ctcRbase" +"Jiangsu Provincial Key Medical Discipline",1,1,1,"32294193","ctcRbase" +"Economic and Social Research Council",1,1,1,"23529715","Database of Instruments for Resource Use Measurement" +"Department of Science and Technology (DST)",1,1,1,"24548788","DR-GAS" +"European Union’s Seventh Framework Programme",1,1,1,"25484339","DISEASES" +"NIDA NIH HHS",30,19,17,"26019122, 30951672, 25166490, 22276777, 21821666, 21890895, 25102069, 31259547, 31228159, 31868683, 27643925, 31171447, 22067444, 31598702, 22102583, 26434508, 29136208, 27587585, 31696236","DX, exRNA Atlas, isoMETLIN, miRdSNP, NeuroPedia, OPM, Panorama, PerMM, PhenoGen, QPN, StemCellCKB, SynGO, zfishbook, VISDB, MPD, ExoCarta, REDIportal" +"Children's Tumor Foundation",1,1,1,"26144527","dasHPPboard" +"Ministerio de Ciencia e Innovación",3,2,2,"26144527, 33252190","dasHPPboard, GRINdb" +"Comunidad de Madrid",2,2,2,"26144527, 31740968","dasHPPboard, SEVA-DB" +"Chinese Human Proteome Projects",2,1,1,"26940364","dbPHCC" +"Key Infectious Disease Project",1,1,1,"26940364","dbPHCC" +"National Hi-Tech Program",2,2,2,"26940364, 30055873","dbPHCC, PhoPepMass" +"the National “973” Key Basic Research Development Program",1,1,1,"27209279","DNetDB" +"the Program of International S&T Cooperation",1,1,1,"27209279","DNetDB" +"the Fundamental Research Program of Shanghai Municipal Commission of Science and Technology",1,1,1,"27209279","DNetDB" +"Key Laboratory of Liaoning Educational Council",1,1,1,"27553277","Cysteinome" +"Central University",1,1,1,"27553277","Cysteinome" +"Howard Hughes Medical Institute",29,26,21,"27907889, 33995899, 27113915, 24002112, 24297255, 33174596, 33780471, 22833564, 24271386, 33653882, 23193263, 28365761, 26138588, 26590259, 26705106, 26612867, 26673716, 27924039, 33221922, 22067452, 25428374, 23203985, 24194605, 24288371, 23125362, 23193274","denovo-db, DRscDB, Hipposeq, M2SG, iPfam, MitoCarta, MCPdb, NESdb, OnTheFly, Mycobacterial Systems Resource, PrePPI, PhagesDB, SmedGD, UCSC Genome Browser, WheatExp, Dfam, Pfam, FlyRNAi, WormBase, Rfam, ENCODE" +"Japan Society for the Promotion of Science (JSPS)",2,2,2,"28234924, 30046160","DNApod, KampoDB" +"Transdisciplinary Research Integration Center Project of the Research Organization of Information and Systems",1,1,1,"28234924","DNApod" +"Japanese Ministry of Agriculture, Forestry and Fisheries",1,1,1,"28234924","DNApod" +"Israel Ministry of Science and Technology",1,1,1,"28299908","DrugAge" +"European Union's Horizon 2020 research and innovation programme",2,2,2,"28502574, 31612960","DINeR, PhaSePro" +"the National Key Research and Development Program",1,1,1,"28533016","DRodVir" +"the CAMS Innovation Fund for Medical Sciences",1,1,1,"28533016","DRodVir" +"National Major Science and Technology Project",1,1,1,"28533016","DRodVir" +"Program for Changjiang Scholars and Innovative Research Team in University",1,1,1,"28533016","DRodVir" +"National health and family planning commission of the people's republic of china",1,1,1,"28562632","DrugSig" +"Jiangxi Provincial Natural Science Foundation",1,1,1,"29145823","dbMDEGA" +"National Nature Science Foundation of China",5,2,2,"29145823, 30380072","dbMDEGA, LncRNA2Target" +"King Abdulaziz City for Science and Technology",1,1,1,"29209336","DRDB" +"Hungarian Academy of Sciences",10,6,6,"29385418, 31713636, 31686102, 29036655, 31612960, 31680160","DIBS, DisProt, FoldamerDB, MFIB, PhaSePro, ELM" +"European Molecular Biology Organization",2,2,2,"29385418, 33186585","DIBS, PolarProtDb" +"OTKA",2,1,1,"29385418","DIBS" +"US-Israel Binational Agricultural Research and Development",3,1,1,"30321383","CuGenDB" +"National Institute of Food and Agriculture",7,5,5,"30321383, 26705106, 30357347, 31647100, 31680153","CuGenDB, WheatExp, GDR, BGD, Plant Reactome" +"Anhui Provincial Outstanding Young Talent Support Plan",1,1,1,"30379998","dbCPM" +"oung Wanjiang Scholar Program of Anhui Province, China",1,1,1,"30379998","dbCPM" +"Chinese University of Hong Kong",12,3,3,"30380085, 30476229, 30418626","dbAMP, ENPD, dbPTM" +"Anhui Province Funds for Excellent Youth Scholars in Colleges",1,1,1,"30665056","dbHDPLS" +"Anhui Scientific Research Foundation for Returned Scholars",1,1,1,"30665056","dbHDPLS" +"European Union Collaborative Research",1,1,1,"30942868","DEE2" +"University of Toronto McLaughlin Center",1,1,1,"31016417","DNAmod" +"Ontario Institute for Cancer Research",2,2,2,"31016417, 32442307","DNAmod, SYNERGxDB" +"Princess Margaret Cancer Foundation",1,1,1,"31016417","DNAmod" +"University of Toronto",1,1,1,"31016417","DNAmod" +"Ontario Ministry of Training, Colleges and Universities",1,1,1,"31016417","DNAmod" +"Canadian Cancer Society",1,1,1,"31016417","DNAmod" +"Sciences and Engineering Research Council of Canada",1,1,1,"31016417","DNAmod" +"Ontario Ministry of Research, Innovation and Science",1,1,1,"31016417","DNAmod" +"European Commission",7,4,4,"31066443, 33125055, 30020414, 32248568","DrugComb, HbVar, SKEMPI, FINDbase" +"China Scholarship Council",6,6,6,"31066443, 28549078, 31637139, 27643925, 33035346, 33051671","DrugComb, HEROD, ncRNA2MetS, StemCellCKB, tRFtarget, StreptomeDB" +"Academy of Finland Research Fellow",1,1,1,"31066443","DrugComb" +"Finland's EDUFI Fellowship",1,1,1,"31066443","DrugComb" +"Scientific and Technological Research Council of Turkey &#x2013; TUBITAK",1,1,1,"31581093","DORMAN" +"Ministarstvo Prosvete, Nauke i Tehnološkog Razvoja",1,1,1,"31593887","Distances of Amino Acids" +"111 Project",1,1,1,"31598709","DNMIVD" +"Beihang University & Capital Medical University Advanced Innovation Center for Big Data-Based Precision Medicine Plan",1,1,1,"31598709","DNMIVD" +"China Human Proteome Project",2,1,1,"31598709","DNMIVD" +"Rose Hills Foundation",2,2,2,"31612957, 31665425","DNAproDB, TFBSshape" +"Human Frontier Science Program",2,2,2,"31612957, 31665425","DNAproDB, TFBSshape" +"Natural Science Foundation of China",35,13,13,"31691822, 31598693, 32008039, 30380109, 34907423, 28529077, 30244175, 32621601, 29157087, 30810209, 32761141, 29351734, 33094321","DrLLPS, EuRBPDB, EPSD, iEKPD, iCAV, PLMD, PTMD, SAGER, THANATOS, Tetrahymena Comparative Genomics Database, SPDB, EOGD, VPTMdb" +"Changjiang Scholars Program of China",2,2,2,"31691822, 32008039","DrLLPS, EPSD" +"Research Foundation Flanders",3,3,3,"31713636, 31584092, 33237329","DisProt, PDBe-KB, MobiDB" +"Elixir-GR",1,1,1,"31713636","DisProt" +"ICREA",1,1,1,"31713636","DisProt" +"Mexican National Council of Science and Technology",1,1,1,"31713636","DisProt" +"Carlsberg Distinguished Fellowship",1,1,1,"31713636","DisProt" +"Italian Ministry of Health Young Investigator Grant",1,1,1,"31713636","DisProt" +"National Research, Development and Innovation Office",2,2,2,"31713636, 33119751","DisProt, MemMoRF" +"Vetenskapsrådet",4,4,4,"31713636, 28182744, 32016318, 33539890","DisProt, OMDB, VariBench, FunCoup" +"Danmarks Grundforskningsfond",1,1,1,"31713636","DisProt" +"Hungarian National Research, Development, and Innovation Office",4,2,2,"31713636, 31612960","DisProt, PhaSePro" +"Ministry of Education, Science and Technological Development of the Republic of Serbia",1,1,1,"31713636","DisProt" +"Ministerio de Economía y Competitividad",3,3,3,"31713636, 30335169, 28943872","DisProt, PopHumanScan, MAHMI" +"Norges Forskningsråd",3,3,3,"32632099, 34583740, 28651544","EBRAINS, FPADMET, SalmoBase" +"Norges Forskningsråd (Research Council of Norway)",1,1,1,"32632099","EBRAINS" +"EC | Horizon 2020 Framework Programme (EU Framework Programme for Research and Innovation H2020)",1,1,1,"32632099","EBRAINS" +"EC | Horizon 2020 Framework Programme",2,2,2,"32632099, 34493866","EBRAINS, eQTL" +"VILLUM Young Investor",1,1,1,"32976589","DIGGER" +"UKIERI",1,1,1,"33007622","DINAX" +"Frederick National Laboratory for Cancer Research",1,1,1,"33051688","dbGuide" +"National Science Centre, Poland",4,2,2,"33053178, 33367605","DNAmoreDB, Virxicon" +"National Science & Technology",1,1,1,"33104791","DrugSpaceX" +"Robert J. Mattauch Endowment",1,1,1,"33119734","DescribePROT" +"Icahn Institute of Data Science and Genomic Technology",1,1,1,"33174603","Datanator" +"Grants-in-Aid for Scientific Research",1,1,1,"33196844","dbCNS" +"CSIR-Institute of Minerals and Materials Technology (CSIR-IMMT), Bhubaneswar",1,1,1,"33276297","DBCOVP" +"School of Biotechnology, Kalinga Institute of Industrial Technology",1,1,1,"33276297","DBCOVP" +"Deemed to be University, Bhubaneswar",1,1,1,"33276297","DBCOVP" +"Natural Science Foundation of Jilin Province",1,1,1,"33320930","CyanoPATH" +"Research Grants Council, University Grants Committee",3,1,1,"33382035","DIPPER" +"Council of Scientific and Industrial Research, India",1,1,1,"34015403","Ebolabase" +"NSFC",2,2,2,"34256256, 34846641","CytomegaloVirusDb, HODD" +"Natural Science Young Foundation of Anhui",1,1,1,"34314366","dbMCS" +"Natural Science Young Foundation of Anhui Agricultural University",1,1,1,"34314366","dbMCS" +"Introduction and Stabilization of Talent Project of Anhui Agricultural University",1,1,1,"34314366","dbMCS" +"National Key Research and Development",1,1,1,"34314366","dbMCS" +"Graduate Innovation Fund of Anhui Agricultural University",1,1,1,"34314366","dbMCS" +"the research project of the health and family planning commission of heilongjiang province",1,1,1,"34774049","DREAM" +"karolinska institutet research foundation grants 2020-2021",1,1,1,"34774049","DREAM" +"excellent young talents project of central government supporting local university reform and development fund",1,1,1,"34774049","DREAM" +"postdoctoral research foundation of china",2,1,1,"34774049","DREAM" +"national natural science foundation of china",5,2,2,"34774049, 34496744","DREAM, RPocket" +"heilongjiang provincial postdoctoral science foundation",1,1,1,"34774049","DREAM" +"National College Students Innovation and Entrepreneurship Training Program",1,1,1,"34774049","DREAM" +"Cystic Fibrosis Trust",4,3,3,"34782688, 31598690, 33416848","CyFi-MAP, ProCarbDB, SARS CoV-2" +"Innovative Medicines Initiative",5,4,4,"34782688, 32707486, 30601939, 31701150","CyFi-MAP, hPSCreg, UbiHub, BioModels" +"Dirección General de Asuntos del Personal Académico, Universidad Nacional Autónoma de México",2,2,2,"35424427, 32542109","DiaNat-DB, Abasy" +"NIAAA NIH HHS",11,7,6,"22080549, 31509535, 31228159, 24146757, 33035346, 22135298, 25514926","GeneWeaver, GutFeelingKB, PhenoGen, SIDD, tRFtarget, PhosphoSitePlus" +"Interdisciplinary Research Program of Seoul National University",1,1,1,"22140171","EzTaxon-e" +"the French National Research Agency",1,1,1,"22766416","FunGene-DB" +"Genetic Engineering laboratories in Universiti Teknologi Malaysia (UTM)",1,1,1,"24333540","EcoliOverExpressionDB" +"Universiti Sains Malaysia (USM)",1,1,1,"24333540","EcoliOverExpressionDB" +"University of Salerno - Fondi di Ateneo per la Ricerca di Base (FARB)",1,1,1,"24990533","GALT Protein Database" +"Italian Ministry of Education, University and Research and CNR",1,1,1,"24990533","GALT Protein Database" +"French National Research Agency",3,3,3,"25065645, 32163115, 30321422","EctoGEM, GRALL, RetroRules" +"Inria",1,1,1,"25065645","EctoGEM" +"University of Rennes 1",1,1,1,"25065645","EctoGEM" +"BIOTEMPO project",1,1,1,"25065645","EctoGEM" +"Administrative Department of Science, Technology and Innovation of Colombia, Colciencias",4,1,1,"25451822","EDCs DataBank" +"University of Cartagena",1,1,1,"25451822","EDCs DataBank" +"French Ministry of Labor",1,1,1,"26179317","Evalutil" +"Ministry of Health and Welfare",2,1,1,"26272709","GenomewidePDB" +"Ghent University Multidisciplinary Research Partnership",1,1,1,"26456067","FLAD" +"National 973 Basic Research Program of China",1,1,1,"27037912","GAMDB" +"Office of Science",1,1,1,"27664130","ENVO" +"Gordon and Betty Moore Foundation",3,3,3,"27664130, 33166387, 31722421","ENVO, International Nucleotide Sequence Database Collaboration, ENA" +"LifeWatchGreece Research Infrastructure",2,1,1,"27664130","ENVO" +"CyVerse",2,1,1,"27664130","ENVO" +"Office of Biological and Environmental Research",1,1,1,"28245064","FRED" +"Biological and Environmental Research",1,1,1,"28245064","FRED" +"National Institute of Justice",1,1,1,"29175726","FROG-kb" +"Forensic Technology Center of Excellence",1,1,1,"29175726","FROG-kb" +"National Institute of Justice, Office of Investigative Sciences",1,1,1,"29175726","FROG-kb" +"National Eye Institute",5,5,5,"29337142, 33206959, 33290552, 30417254, 33237286","Express, IntAct, GOC, iSyTE, UniProtKB" +"Intramural EPA",1,1,1,"29683130","EnviroAtlas" +"U.S. Environmental Protection Agency",1,1,1,"29683130","EnviroAtlas" +"Office of Research and Development",1,1,1,"29683130","EnviroAtlas" +"Welch Foundation",6,4,4,"29890119, 30418591, 33973408, 33021634","FlyXCDB, HumanNet, hu.MAP, NDB" +"Consellería de Cultura, Educación e Ordenación Universitaria, Xunta de Galicia",1,1,1,"30235322","GC4S" +"Heilongjiang Education Department Fund",2,1,1,"30321400","EWASdb" +"Heilongjiang Postdoctoral",1,1,1,"30321400","EWASdb" +"Fundamental Research Funds",3,3,3,"30321400, 31642484, 33095866","EWASdb, SilkDB, VARAdb" +"Foundation against Cancer",2,1,1,"30357379","EndoDB" +"Fritz Thyssen Stiftung",1,1,1,"30357379","EndoDB" +"National Institute of Neurological Disorders and Stroke",9,2,2,"30357403, 26048622","EncoMPASS, PedsDTI" +"National Programs for High Technology Research and Development",6,3,3,"30357418, 30364969, 30364952","EDK, EWAS Atlas, PED" +"Chinese Academy of Science",2,1,1,"30357418","EDK" +"National Key Research & Development Program of China",2,2,2,"30357418, 30364952","EDK, PED" +"International Partnership Program of the Chinese Academy of Sciences",6,6,6,"30364969, 31584095, 28387199, 30371881, 30364952, 33170268","EWAS Atlas, EWAS, GSA, iDog, PED, GVM" +"National Key Research Program of China",14,7,7,"30364969, 33119759, 28387199, 30371881, 30252093, 30335176, 31620779","EWAS Atlas, gcType, GSA, iDog, iProX, NucMap, prokaryotic antiviral defense system" +"13th Five-year Informatization Plan of Chinese Academy of Sciences",7,6,6,"30364969, 30365027, 31584095, 30371881, 30364952, 33170268","EWAS Atlas, gcMeta, EWAS, iDog, PED, GVM" +"National key Research Program of China",6,1,1,"30365027","gcMeta" +"Developing Countries Around China",1,1,1,"30365027","gcMeta" +"National Science Foundation for Young Scientists of China",3,2,2,"30365027, 33119759","gcMeta, gcType" +"Major State Basic Research Development Program",1,1,1,"30365027","gcMeta" +"Key Research Program of the Chinese Academy of Sciences",2,2,2,"30365027, 33175170","gcMeta, 2019nCoVR" +"Central public welfare research institutes",1,1,1,"30365030","ETCM" +"National Key Technology R&D Program of China",2,1,1,"30365030","ETCM" +"Program of China",1,1,1,"30365030","ETCM" +"Key project at central government",1,1,1,"30365030","ETCM" +"NIAMS NIH HHS",6,6,6,"30407583, 31642488, 30397019, 32777102, 33174596, 29487113","FusionGDB, ExonSkipDB, iProteinDB, MSK-KP, MitoCarta, Panorama Public" +"Farmer Welfare",1,1,1,"30611878","FisOmics" +"Centre for Agricultural Bioinformatics",1,1,1,"30611878","FisOmics" +"Department of Agricultural Research and Education, Ministry of Agriculture",1,1,1,"30611878","FisOmics" +"ICAR-Indian Agricultural Statistics Research Institute",1,1,1,"30611878","FisOmics" +"Tsinghua-Fuzhou Institute for Data Technology",1,1,1,"30788500","EnDisease" +"Volkswagen Foundation",1,1,1,"30864352","GenCoNet" +"International DFG Research Training Group GRK",1,1,1,"30864352","GenCoNet" +"Frank McGraw Memorial Chair in Cancer Research",1,1,1,"30951672","exRNA Atlas" +"American Cancer Society",2,2,2,"30951672, 25382819","exRNA Atlas, ProKinO" +"National Nature Scientific Foundation of China",3,2,2,"31164042, 32608478","EmExplorer, RNAWRE" +"Fund for Excellent Young Scholars of Inner Mongolia",1,1,1,"31164042","EmExplorer" +"Program for Young Talents of Science and Technology in Universities of Inner Mongolia Autonomous Region",1,1,1,"31164042","EmExplorer" +"Plasma Protein Therapeutics Association Europe",1,1,1,"31263866","ESID" +"EURO-POLICY-PID",1,1,1,"31263866","ESID" +"BMBF",4,1,1,"31263866","ESID" +"ESID society",1,1,1,"31263866","ESID" +"European and National Grants",1,1,1,"31263866","ESID" +"Tip-top Scientific and Technical Innovative Youth Talents of Guangdong special support program",2,1,1,"31598693","EuRBPDB" +"Guangzhou Bureau of Science and Information Technology",5,1,1,"31598693","EuRBPDB" +"CAST",2,1,1,"31642496","Gene4Denovo" +"Natural Science Foundation for Young Scientists of Hunan Province, China",1,1,1,"31642496","Gene4Denovo" +"The Fundamental Research Funds for the Provincial Universities",1,1,1,"31665430","ENdb" +"National Competitiveness and Excellence Program",1,1,1,"31686102","FoldamerDB" +"Central Public-Interest Scientific Institution Basal Research Fund",3,1,1,"31774482","FluReassort" +"Non-profit Central Research Institute Fund of Chinese Academy of Medical Sciences",1,1,1,"31774482","FluReassort" +"National Basic Research Program of China",1,1,1,"31774482","FluReassort" +"CAMS Initiative for Innovative Medicine",1,1,1,"31774482","FluReassort" +"Shanghai Jiao Tong University",6,5,5,"31887789, 30032758, 31642469, 32221380, 30407568","ETph, HFMDB, PhenoModifier, WeiBI, ICEberg" +"Interdisciplinary Program of Shanghai Jiao Tong University",2,1,1,"31887789","ETph" +"Chinese Academy of Sciences of China",1,1,1,"32120139","FRCD" +"Natural Science Foundation of Tianjin, China",1,1,1,"32120139","FRCD" +"CAS STS program",1,1,1,"32120139","FRCD" +"Scientific Research Conditions and Technical Support System Program",1,1,1,"32120139","FRCD" +"Australian Museum",1,1,1,"32123502","FrogID" +"Guangzhou Municipal Key Discipline in Medicine",1,1,1,"32219413","FerrDb" +"Key Laboratory for Innovation Platform Plan, Science and Technology Program of Guangzhou, China",1,1,1,"32219413","FerrDb" +"Guangzhou Municipal Psychiatric Disease Clinical Transformation Laboratory",1,1,1,"32219413","FerrDb" +"Deutsches Zentrum für Herz-Kreislaufforschung",1,1,1,"32459338","EpiRegio" +"Natural Science Foundation of Chongqing of China",2,1,1,"32591816","ExoBCD" +"Science and Technology Innovation Commission of Shenzhen",1,1,1,"32591816","ExoBCD" +"Science Innovation Program of College of Laboratory Medicine; Chongqing Medical University",1,1,1,"32591816","ExoBCD" +"Science and Technology Research Program of Chongqing Municipal Education Commission",1,1,1,"32591816","ExoBCD" +"European Union Horizon 2020",2,2,2,"32726198, 32548865","EnteroBase, UK Immunological Toolbox" +"Deutsches Zentrum für Infektionsforschung",1,1,1,"32726198","EnteroBase" +"Niedersächsische Ministerium für Wissenschaft und Kultur",1,1,1,"32726198","EnteroBase" +"the Natural Science Foundation of Tianjin",1,1,1,"33002112","EnzyMine" +"CAS STS programme",1,1,1,"33002112","EnzyMine" +"Scientific Research Conditions and Technical Support System Programme",1,1,1,"33002112","EnzyMine" +"International Partnership Programme of Chinese Academy of Sciences of China",1,1,1,"33002112","EnzyMine" +"National Key Research and Development Programme of China",1,1,1,"33002112","EnzyMine" +"European Social Fund",4,4,4,"33119759, 32556221, 30380112, 33367605","gcType, OMEGA-NET, Translocatome, Virxicon" +"Czech Science Foundation",2,2,2,"33166383, 31584092","FireProtDB, PDBe-KB" +"The Ministry of Education, Youth and Sports",4,1,1,"33166383","FireProtDB" +"Brno University of Technology",1,1,1,"33166383","FireProtDB" +"Operational Programme Research, Development and Education",1,1,1,"33166383","FireProtDB" +"China National Basic Research Program",1,1,1,"33497436","FifBase" +"Program of Shaanxi Province Science and Technology Innovation Team",1,1,1,"33497436","FifBase" +"Mathematical Tianyuan Fund",1,1,1,"33497436","FifBase" +"The State Key Laboratory of Integrated Management of Pest Insects and Rodents",1,1,1,"33511767","FAWMine" +"Precursory Research for Embryonic Science and Technology",1,1,1,"33511845","FMODB" +"University of British Columbia Four–Year Doctoral Fellowship",1,1,1,"33599246","Gemma" +"FIOCRUZ",1,1,1,"33995920","ExVe" +"National Institute of Health",5,4,4,"34010390, 32436932, 27188311, 30407596","Echinobase, miRactDB, Structure Surfer, Cancer3D" +"Binational Science Foundation",1,1,1,"34010390","Echinobase" +"Russian Science Foundation",11,9,7,"34158935, 29401218, 31588507, 31598695, 28011601, 28110602, 33242091, 33231677, 30759212","GEMI, MutHTP, VDJdb, MirGeneDB, CSDB_GT, SitEx, GTRD" +"Alfred P. Sloan Foundation",3,3,3,"34158935, 30985146, 30371820","GEMI, ProteinExplorer, UNITE" +"Intramural NASA",1,1,1,"34158935","GEMI" +"John Templeton Foundation",1,1,1,"34158935","GEMI" +"W. M. Keck Foundation",1,1,1,"34158935","GEMI" +"Eesti Teadusagentuur",4,1,1,"34493866","eQTL" +"European Molecular Biology Laboratory",19,18,17,"34493866, 31647096, 31696235, 30020414, 33206959, 33290552, 31701150, 33270111, 31584097, 33237286, 31680160, 33166387, 30357387, 33156333, 33170273, 31691826, 31722421, 33175160","eQTL, proGenomes2, MGnify, SKEMPI, IntAct, GOC, BioModels, GENCODE, IGSR, UniProtKB, ELM, International Nucleotide Sequence Database Collaboration, ArrayExpress, InterPro, Gramene, Ensembl, ENA" +"EC | European Regional Development Fund",1,1,1,"34493866","eQTL" +"European Bioinformatics Institute",4,4,3,"34493866, 31584089, 31691815, 32486891","eQTL, PhaSepDB, Reactome" +"Open Targets",5,5,5,"34493866, 33237286, 33125078, 33156333, 33170273","eQTL, UniProtKB, Pfam, InterPro, Gramene" +"Health Food Chain Safety and Environment",1,1,1,"34626475","FEDA" +"General Secretariat for Research and Technology",2,2,2,"34741074, 32248568","Fibromine, FINDbase" +"Max Planck Society",1,1,1,"34793786","FCCP" +"Sichuan Science and Technology Program",2,2,2,"35694152, 31504189","EnhFFL, MeLAD" +"Institute of Cancer Research, Medical University of Vienna",1,1,1,"21337704","GPDE" +"Christian Doppler Research Association, Austria and the Austrian “Krebshilfe”",1,1,1,"21337704","GPDE" +"GPDE at the Medical University of Vienna",1,1,1,"21337704","GPDE" +"Italian Telethon Foundation",1,1,1,"21435384","HOCTARdb" +"DST/NRF Research Chair",1,1,1,"21930248","HCVpro" +"National Research Foundation (South Africa)",1,1,1,"21930248","HCVpro" +"National Bioinformatics Network",1,1,1,"21930248","HCVpro" +"British Heart Foundation",25,22,14,"22123736, 24217912, 26314736, 22954629, 23619930, 24297257, 31584092, 22881376, 27899622, 25378336, 23087376, 33237286, 24234451, 27899567, 22102590, 30395267, 30395331, 23161681, 23161678, 24253303, 25348405, 30395287","GOA, HPO, GlycoMob, MaConDa, MRIdb, NECTAR, PDBe-KB, UCL LDLR, UniProt, IUPHAR-DB, UniProtKB, IntAct, GOC, RNAcentral" +"Lundbeck Foundation",11,9,5,"23143109, 24304901, 24194598, 27794045, 33270898, 29155946, 29140473, 30664776, 25723102","HemaExplorer, GPCRDB, JASPAR, FANTOM5, GPCRdb" +"Fight for Sight",1,1,1,"24217912","HPO" +"Indian Institute of Science, Bangalore",1,1,1,"25450223","HIGDB" +"Supercomputer Education and Research Centre",1,1,1,"25450223","HIGDB" +"management of VIT University",1,1,1,"25450223","HIGDB" +"Indian Council of Medical Research (ICMR)",1,1,1,"25450223","HIGDB" +"PRIN project",1,1,1,"27045824","GPKB" +"Data-Driven Genomic Computing (GenData 2020)",1,1,1,"27045824","GPKB" +"Italian Ministry of the University and Research",1,1,1,"27045824","GPKB" +"Hundred Talent of Chinese Academy of Sciences",1,1,1,"27098585","Grape-CRISPR" +"Science Foundation Ireland Strategic Research Cluster (SRC) programme to Alimentary Glycoscience Research Cluster",1,1,1,"27436239","GlycoGAIT" +"European Union FP7 programme in support of the GlycoHIT project",1,1,1,"27436239","GlycoGAIT" +"NCCIH NIH HHS",2,2,2,"27504778, 31612915","GNPS, MIBiG" +"FIC NIH HHS",7,4,4,"27504778, 23161692, 23457042, 22116064","GNPS, SchistoDB, RCPedia, TDR Targets" +"Grantová Agentura eské Republiky",1,1,1,"27527702","HCVIVdb" +"Univerzita Karlova v Praze ()",1,1,1,"27527702","HCVIVdb" +"Research Committee of the Technological Educational Institution (T.E.I.) of Athens, Greece",1,1,1,"28083826","HICL" +"Key Program of the Chinese Academy of Sciences",1,1,1,"28387199","GSA" +"National High-tech R&D Program",2,1,1,"28387199","GSA" +"Key Technology Talent Program of the Chinese Academy of Sciences",2,2,2,"28387199, 33175170","GSA, 2019nCoVR" +"Oregon State University",3,3,3,"28415075, 28490127, 26973684","HopBase, Milk bioactive peptide database, FragariaCyc" +"Southeast University",1,1,1,"28529078","HCSGD" +"Tsinghua University",1,1,1,"28529078","HCSGD" +"Università di Catania",1,1,1,"28708269","HemeOxDB" +"NIAID",2,1,1,"29028885","HoTResDB" +"Grant-in-Aid for Scientific Research",2,1,1,"29532461","HpBase" +"Joint Usage/Educational Center",1,1,1,"29532461","HpBase" +"Grant-in-Aid for Young Scientists",1,1,1,"29532461","HpBase" +"Next-Generation BioGreen21 Program",1,1,1,"29649979","Ginseng Genome Database" +"University of Calgary",1,1,1,"30032758","HFMDB" +"The Canadian Institutes of Health Research",1,1,1,"30032758","HFMDB" +"Western Economic Diversification",1,1,1,"30032758","HFMDB" +"Alberta Innovates - Health Solutions",1,1,1,"30032758","HFMDB" +"School of Medicine",1,1,1,"30032758","HFMDB" +"Shanghai Committee of Science and Technology",2,1,1,"30053237","HDncRNA" +"Fund for Subject Pi lot Program of Tongji University to Luying Peng",1,1,1,"30053237","HDncRNA" +"Fund of the Key Laboratory of Regenerative Biology of Chinese Academy of Science",1,1,1,"30053237","HDncRNA" +"Students Innovation Training Program",1,1,1,"30053237","HDncRNA" +"National Key Basic Research Program",2,2,2,"30066211, 32761141","HAMdb, SPDB" +"National Cancer Center",1,1,1,"30247654","HACER" +"Tsinghua University Initiative Scientific Research Program",1,1,1,"30266410","HCCDB" +"Worldwide Cancer Research",2,2,2,"30371888, 27899581","HmtVar, HmtDB" +"DHOMOS Worldwide Cancer Research",1,1,1,"30371888","HmtVar" +"Department of Biotechnology, Ministry of Science and Technology",5,4,4,"30703169, 29220464, 30307523, 29432422","HuVarBase, KiPho, PVsiRNAdb, TopicalPdb" +"Science and Engineering Research Board (IN), JC Bose fellowship",1,1,1,"30999860","HumCFS" +"Shanghai Municipal Science and Technology Major Project",4,4,4,"31504765, 31584086, 31640808, 29617941","GMrepo, PGG.Han, PGG.SNV, SCRIPT-MAP" +"Natural Science Foundation of Shanghai",3,3,3,"31504765, 27148975, 33084874","GMrepo, SpinachDB, OGEE" +"BLRD VA",3,2,2,"31509535, 30365026","GutFeelingKB, Victors" +"Medical Scientific Research Foundation of Guangdong Province",2,1,1,"31524396","HybridMolDB" +"Ministry of Education of the People's Republic of China",2,2,2,"31524396, 33984507","HybridMolDB, HisPhosSite" +"The Youth Innovation Promotion Association of Chinese Academy of Sciences",1,1,1,"31566222","GWAS Atlas" +"K.C. Wong Education Foundation",1,1,1,"31566222","GWAS Atlas" +"The 100 Talent Program of the Chinese Academy of Sciences",1,1,1,"31566222","GWAS Atlas" +"The Tou-Yan Innovation Team Program of the Heilongjiang Province",1,1,1,"31584099","gutMDisorder" +"Heilongjiang Province Postdoctoral Fund",2,1,1,"31584099","gutMDisorder" +"Peking University",2,2,2,"31630971, 33359127","GESUR, NBIGV" +"Key Technologies R&D Program",1,1,1,"31630971","GESUR" +"Beijing Nova Program",3,3,3,"31725860, 31086734, 31021279","HisgAtlas, OsteoporosAtlas, UVGD" +"Natural Science Foundation of Hubei",1,1,1,"31783725","HKPocket" +"self-determined research funds of CCNU from the colleges' basic research and operation of MOE",1,1,1,"31783725","HKPocket" +"self-determined research funds of CCNU from the colleges’ basic research and operation of MOE",1,1,1,"31783725","HKPocket" +"Taiwan Ministry of Science and Technology",9,3,3,"31976536, 29648583, 34285772","HBDB, LipidPedia, Yeast Phosphoinositide-Binding Proteins" +"National Taiwan University",5,2,2,"31976536, 29648583","HBDB, LipidPedia" +"Joint School of Life Sciences",1,1,1,"32055858","GREG" +"Guangzhou Medical University",1,1,1,"32055858","GREG" +"European Union’s Horizon 2020 Framework Program for Research and Innovation",1,1,1,"32163115","GRALL" +"Ecole Doctorale des Sciences Chimiques",1,1,1,"32163115","GRALL" +"Transformation Project in Scientific and Technological Achievements",1,1,1,"32315389","HotSpot3D" +"Thomas F. and Kate Miller Jeffress Memorial Trust",1,1,1,"32330167","geoBoundaries" +"Scientific and Technical Innovative Youth Talents of Guangdong",1,1,1,"32496513","gutMEGA" +"Ministerstvo Školství, Mládeže a Tělovýchovy (Ministry of Education, Youth and Sports)",1,1,1,"32661237","GlobalFungi" +"Ministerstvo Školství, Mládeže a Tělovýchovy",4,2,2,"32661237, 30622655","GlobalFungi, REXdb" +"European Union’s Horizon 2020 Research and Innovation Programme",1,1,1,"32707486","hPSCreg" +"Two-hundred Talent",1,1,1,"32941628","IDDB" +"Chinese National Precise Medical Research",1,1,1,"32941628","IDDB" +"Shanghai Health and Family Planning System Excellent Subject Leader and Excellent Young Medical Talents Training Program",2,1,1,"32941628","IDDB" +"Clinical Rese Clinical Research Program of 9th People's Hospital",1,1,1,"32941628","IDDB" +"Key New Drug Creation and Manufacturing Program",1,1,1,"32941628","IDDB" +"Key R&D Program of Zhejiang Province",2,2,2,"33045729, 33010159","GIMICA, PROTAC-DB" +"Zhejiang University",1,1,1,"33045729","GIMICA" +"China Knowledge Centre for Engineering Sciences and Technology",1,1,1,"33045729","GIMICA" +"Technology Innovation and Application Demonstration Project of Chongqing",1,1,1,"33045729","GIMICA" +"Cancer Prevention Research Institute of Texas",3,1,1,"33119754","HeRA" +"Cancer Prevention Research Training Program",1,1,1,"33119754","HeRA" +"United States Public Health Service",2,1,1,"33125055","HbVar" +"Golden Helix Foundation",1,1,1,"33125055","HbVar" +"Korea Research Institute of Bioscience and Biotechnology",1,1,1,"33137185","iCSDB" +"Clinical Research Plan of SHDC",1,1,1,"33151298","GRNdb" +"Shanghai Municipal Health Commission",1,1,1,"33151298","GRNdb" +"Helmholtz Association",2,2,2,"33221926, 31728526","HumanMetagenomeDB, TerrestrialMetagenomeDB" +"Syngenta Seeds SAS",1,1,1,"33237299","GreenPhylDB" +"CGIAR Research Program, Roots, Tubers and Bananas",1,1,1,"33237299","GreenPhylDB" +"China Postdoctoral Innovative Talent Foundation",2,2,2,"33264402, 33196801","HERB, NONCODE" +"National Natural Science Foundation for Young Scholars of China",5,2,2,"33264402, 30380087","HERB, SymMap" +"BMICC of National Population Health Data Center",1,1,1,"33264402","HERB" +"Tip-top Scientific and Technical Innovative Youth Talents of Guangdong Special Support Program",1,1,1,"33406221","iCysMod" +"Key program for Department of Science and Technology of Qinghai province",1,1,1,"33406221","iCysMod" +"Digitalization of Biological Resource Project",1,1,1,"33417691","HGFDB" +"Research project of education department of zhejiang province",1,1,1,"33677507","HIR" +"Science and technology project of Taizhou City",1,1,1,"33677507","HIR" +"Humanities and Social Science Project of the Chinese Ministry of Education",1,1,1,"33677507","HIR" +"German Academic Exchange Service",4,2,2,"33740463, 33051671","ICSCB, StreptomeDB" +"King Abdullah University of Science and Technology",4,3,3,"33929018, 31160594, 30329098","IBDDB, PathoPhenoDB, LncBook" +"Eunice Kennedy Shriver National Institute of Child Health and Human Development",2,1,1,"33973408","hu.MAP" +"National Heart, Lung, and Blood Institute",12,11,8,"33973408, 31701147, 25102069, 27643925, 33206959, 33290552, 33237286, 30395331, 27602200, 31713623, 34741192","hu.MAP, LINCS, Panorama, StemCellCKB, IntAct, GOC, UniProtKB, RGD" +"Division of Loan Repayment",1,1,1,"33973408","hu.MAP" +"Jiangsu Province Department of Human Resources and Social Security",1,1,1,"33984507","HisPhosSite" +"norway research council through r&d",1,1,1,"34330336","HumGut" +"Jilin Province Key Laboratory of Big Data Intelligent Computing",1,1,1,"34642750","HBFP" +"Development Project of Jilin Province of China",3,1,1,"34642750","HBFP" +"Guangdong Key Project for Applied Fundamental Research",1,1,1,"34642750","HBFP" +"university of milano-bicocca",1,1,1,"34699529","GH19ED" +"Fostering Fund of Fundamental Research for Young Teachers of Zhengzhou University",1,1,1,"34907423","iCAV" +"Guangdong Esophageal Cancer Institute Science and Technology Program",1,1,1,"34907423","iCAV" +"Tip-Top Scientific and Technical Innovative Youth Talents of Guangdong special support program",1,1,1,"34907423","iCAV" +"University of Athens",1,1,1,"23262288","LepChorionDB" +"National High-Tech Research and Development Program",2,1,1,"23601370","LiverAtlas" +"Chinese National Basic Research Program",1,1,1,"23601370","LiverAtlas" +"Beijing Municipal Natural Science Foundation",1,1,1,"23601370","LiverAtlas" +"National Science Council of the Republic of China",5,1,1,"24525374","lncRNAMap" +"UST-UCSD International Center of Excellence in Advanced Bio-Engineering",3,1,1,"24525374","lncRNAMap" +"Asia University",1,1,1,"24525374","lncRNAMap" +"MOE ATU",1,1,1,"24525374","lncRNAMap" +"Universiti Malaya (MY) UM Research Grant (UMRG)",1,1,1,"26444974","ListeriaBase" +"Universiti Malaya (MY) High Impact Research Grant UM-MOHE",1,1,1,"26444974","ListeriaBase" +"Virginia Polytechnic Institute and State University",1,1,1,"26653323","iTAP" +"China postdoctoral science foundation",1,1,1,"27465544","IGDD" +"Anhui provincial Natural Science Foundation",1,1,1,"27465544","IGDD" +"the Natural Science Foundation of China",1,1,1,"27465544","IGDD" +"the Innovative Research Team of the Educational Department of China",1,1,1,"27465544","IGDD" +"the PAPD (Priority Academic Program Development) program at Nanjing Forestry University",1,1,1,"27465544","IGDD" +"the National Basic Research Project",1,1,1,"27465544","IGDD" +"CANARIE",1,1,1,"27863956","IHEC" +"Genome Québec",1,1,1,"27863956","IHEC" +"Calcul Québec",1,1,1,"27863956","IHEC" +"Genome Canada",6,6,6,"27863956, 33683131, 26251998, 32442307, 31665441, 31701148","IHEC, MRMAssayDB, PhenomeCentral, SYNERGxDB, CARD, JASPAR" +"Compute Canada",1,1,1,"27863956","IHEC" +"Universidad Nacional de Colombia sede Medellín",1,1,1,"27888793","InverPep" +"NHLBI Intramural Program",1,1,1,"27974320","Mammalian Metabolic Enzyme Database" +"French Foundation for Research & Biodiversity (FRB)",1,1,1,"28168018","IRBAS" +"IRBAS",1,1,1,"28168018","IRBAS" +"Centre for Synthesis and Analysis of Biodiversity (CESAB)",1,1,1,"28168018","IRBAS" +"French National Agency for Water and Aquatic Environments (ONEMA)",1,1,1,"28168018","IRBAS" +"Ministry of Science and Technology of China",2,2,2,"29029599, 32911083","LiverWiki, MosaicBase" +"ReumaFonds",1,1,1,"29047407","Infevers" +"Executive Agency for Health and Consumers",1,1,1,"29047407","Infevers" +"Italian PRIN",1,1,1,"29179110","LAND-deFeND" +"Italian National Department for Civil Protection",1,1,1,"29179110","LAND-deFeND" +"PRIN",1,1,1,"29179110","LAND-deFeND" +"DPC",1,1,1,"29179110","LAND-deFeND" +"Fondazione Assicurazioni Generali",1,1,1,"29179110","LAND-deFeND" +"Resources of the Laboratory of Computational Molecular Design and Metabolomics",1,1,1,"29648583","LipidPedia" +"Department of Computer Science and Information Engineering of National Taiwan University",1,1,1,"29648583","LipidPedia" +"NORTE2020",1,1,1,"29899596","LEGE" +"H2020 Health",1,1,1,"29899596","LEGE" +"The Department of Science and Technology, Government of Gujarat",1,1,1,"29905762","LeptoDB" +"Japan Science and Technology Agency (JST)",1,1,1,"30046160","KampoDB" +"International Scientific and Technological Cooperation project of China",2,1,1,"30252093","iProX" +"Agricultural Science and Technology Innovation Program",1,1,1,"30276831","lncRNAnet" +"Key Laboratory of Shenzhen",1,1,1,"30276831","lncRNAnet" +"National Key Basic Research Program of China",2,1,1,"30276831","lncRNAnet" +"Spanish Government",1,1,1,"30357370","liqDB" +"Ministry of Education of Spain",2,1,1,"30357370","liqDB" +"Postdoctoral Science Foundation of China",3,2,2,"30476305, 33219686","LncACTdb, LnCeCell" +"Innovative Talents of Science and Technology Research",5,2,2,"30476305, 30407549","LncACTdb, Lnc2Cancer" +"Portuguese Science and Technology Foundation",1,1,1,"30820574","Mammalian Stress Granules Proteome" +"Ataxia UK",2,1,1,"30820574","Mammalian Stress Granules Proteome" +"French Muscular Dystrophy Association",1,1,1,"30820574","Mammalian Stress Granules Proteome" +"Vinmec Healthcare System",1,1,1,"31180159","KHV" +"Yu Weihan Outstanding Youth Training Fund of Harbin Medical University",1,1,1,"31598675","KnockTF" +"Wu Liande Youth Science Research Fund of Harbin Medical University",1,1,1,"31598675","KnockTF" +"Scientific Research Fund of Harbin Medical University",1,1,1,"31598675","KnockTF" +"TULIP",1,1,1,"31605615","LeGOO" +"Laboratoire d’Excellence",1,1,1,"31605615","LeGOO" +"LABEX",1,1,1,"31605615","LeGOO" +"Postdoctoral Foundation of Hei Long Jiang Province",2,2,2,"31617563, 33219686","LnCeVar, LnCeCell" +"University Nursing Program for Young Scholars with Creative Talents in Heilongjiang Province",1,1,1,"31617563","LnCeVar" +"USDA National Institute of Food and Agriculture",1,1,1,"31648227","MANET" +"Joint Genome Institute",1,1,1,"31665416","Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters" +"Lawrence Berkeley National Laboratory",1,1,1,"31665416","Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters" +"Wuhan University",2,2,2,"31665439, 32168374","MaGenDB, uORFlight" +"National Center for Advancing Translational Sciences",2,2,2,"31701147, 31837751","LINCS, VetCOT" +"Heilongjiang Provincial Health and Family Planning Commission of Science Foundation",2,1,1,"31713618","LncTarD" +"Hei Long Jiang Postdoctoral Foundation",2,1,1,"31713618","LncTarD" +"Fundamental Research Funds for the Provincial Universities",4,3,3,"31713618, 32487016, 31799597","LncTarD, ncRI, RNAactDrug" +"China Postdoctoral Science Special Foundation",1,1,1,"31713618","LncTarD" +"Heilongjiang Provincial planning office key subjects",1,1,1,"31713618","LncTarD" +"AstraZeneca",1,1,1,"31838187","International Severe Asthma Registry" +"Natural Science Foundation of Beijing Municipality",2,2,2,"32028878, 32248093","laPPISite, TE141K1" +"Post-genome Multi-ministerial Project",2,1,1,"32133509","KRGDB" +"Natural Science Foundation of Shaanxi Provincial Department of Education",1,1,1,"32228437","iMarmot" +"China Postdoctoral Science Foundation Grant",1,1,1,"32228437","iMarmot" +"Key Research Fund on Sciences and Technologies for Joint Academic Institute and Local Enterprises of Sichuan",1,1,1,"32367112","MACSNVdb" +"Narodowe Centrum Nauki",14,4,4,"32499815, 33502860, 29624889, 26141515","LuluDB, InterMetalDB, PhyMet2, mirEX" +"Scientific Research Foundation of Nanjing Medical University",1,1,1,"32512182","IRESbase" +"Program for Distinguished Talents of Six Domains in Jiangsu Province",1,1,1,"32512182","IRESbase" +"Natural Science Foundation of the Jiangsu Higher Education Institutions",1,1,1,"32512182","IRESbase" +"Fok Ying Tung Education Foundation",1,1,1,"32512182","IRESbase" +"ERC INTEGRATE",1,1,1,"32618424","LymphoAtlas" +"CNRS, INSERM",1,1,1,"32618424","LymphoAtlas" +"MSDAVENIR Fund",1,1,1,"32618424","LymphoAtlas" +"PHENOMIN",1,1,1,"32618424","LymphoAtlas" +"Investissement d'Avenir program of the French Ministry of Research ProFI",1,1,1,"32618424","LymphoAtlas" +"West China Hospital, Sichuan University",2,1,1,"32820322","LncAS2Cancer" +"Sichuan Province Science and Technology Support Program",1,1,1,"32820322","LncAS2Cancer" +"Sichuan University",3,2,2,"32820322, 33068436","LncAS2Cancer, tsRBase" +"National Natural Science Foundation of China (National Science Foundation of China)",7,2,2,"32821400, 33514746","Kiwifruit Genome Database, NGD" +"National Science Foundation (NSF)",1,1,1,"32821400","Kiwifruit Genome Database" +"National Science Foundation of Heilongjiang Province",1,1,1,"33045741","LncSEA" +"Youth Innovation Promotion Association of Chinese Academy of Sciences",2,2,2,"33045751, 33170268","LncExpDB, GVM" +"K. C. Wong Education Foundation",2,2,2,"33045751, 33175170","LncExpDB, 2019nCoVR" +"Council of Scientific and Industrial Research (CSIR), India",2,1,1,"33095885","IndiGenomes" +"Natural Science Foundation for Distinguished Young Scholar of Hebei Province",2,2,2,"33147622, 32608478","KNIndex, RNAWRE" +"Institute of Computing Technology, Chinese Academy of Sciences",1,1,1,"33147622","KNIndex" +"Intramural Research Program of the National Library of Medicine",1,1,1,"33166392","LitCovid" +"Agricultural Research Service",2,2,2,"33193550, 28830355","MaizeMine, porcine translational research database" +"Heilongjiang Provincial Natural Science Foundation",2,2,2,"33219686, 33219661","LnCeCell, LincSNP" +"Heilongjiang Touyan Innovation Team Program",3,3,3,"33219686, 34755873, 33219661","LnCeCell, ImmReg, LincSNP" +"Chinese National Natural Science Foundation",1,1,1,"33287903","KVarPredDB" +"Zhejiang Provincial Key Projects of Technology Research",1,1,1,"33287903","KVarPredDB" +"Japan International Cooperation Agency",1,1,1,"33645624","KAIKObase" +"Science and Technology Research Partnership for Sustainable Development",1,1,1,"33645624","KAIKObase" +"Fonds De La Recherche Scientifique - FNRS",2,1,1,"33709443","LIMONADA" +"Wallonie-Bruxelles International",1,1,1,"33709443","LIMONADA" +"Centre National de la Recherche Scientifique",3,3,3,"33709443, 28608363, 34531327","LIMONADA, MiSynPat, T1TAdb" +"Applied Technology Research and Development Plan of Heilongjiang Province",1,1,1,"33906563","M6ADD" +"Strasbourg University Hospital",1,1,1,"34023905","knotAnnotSV" +"Inserm",1,1,1,"34023905","knotAnnotSV" +"University of Strasbourg",1,1,1,"34023905","knotAnnotSV" +"Universität Mannheim",1,1,1,"34378177","LinguaPix" +"National Research Foundation of Korea grant",1,1,1,"34415996","LINPS" +"Hainan Provincial Key Laboratory of Carcinogenesis and Intervention",1,1,1,"34755873","ImmReg" +"Hainan Provincial Natural Science Foundation of China",1,1,1,"34755873","ImmReg" +"HMU MarshalInitiative",1,1,1,"34755873","ImmReg" +"Hainan Medical University",1,1,1,"34755873","ImmReg" +"Natural Science Foundation for Distinguished Young Scholars of Heilongjiang Province",1,1,1,"34755873","ImmReg" +"Major Science and Technology Program of Hainan Province",1,1,1,"34755873","ImmReg" +"Hainan Province Clinical Medical Center",1,1,1,"34755873","ImmReg" +"Science and Technology special fund of Hainan Province",1,1,1,"34755873","ImmReg" +"NHLBI",1,1,1,"34936882","Lung CellCards" +"National Cheng Kung University",2,2,2,"34976312, 27392072","LCMD, YCRD" +"ICMR- Biomedical Informatics and National Institute for Research in Tuberculosis (formerly Tuberculosis Research Centre)",1,1,1,"21880546","MtbSD" +"National R & D Program for Cancer Control, Ministry for Health and Welfare, Republic of Korea",1,1,1,"23219992","MENT" +"Basic Science Research Program",1,1,1,"23219992","MENT" +"Ministry of Education, Science and Technology (MOEST)",1,1,1,"23219992","MENT" +"University Grants Commission, India",1,1,1,"24561221","MitoSatPlant" +"North American Mitochondrial Disease Consortium pilot award",1,1,1,"25542617","MSeqDR" +"Great Ormond Street Hospital Childrens Charity",2,1,1,"25542617","MSeqDR" +"Netherlands Genomic Initiative (NGI)/Netherlands Organization for Scientific Research (NWO)",1,1,1,"25542617","MSeqDR" +"NIH National Center for Advancing Translational Sciences (NCATS)",1,1,1,"27069559","MD-CTS" +"Natural Science Foundation of Inner Mongolia",4,1,1,"27167218","MiasDB" +"Inner Mongolia Science & Technology Plan",1,1,1,"27167218","MiasDB" +"Israel Science Foundation",3,2,2,"28481982, 30357384","McPAS-TCR, BitterDB" +"Eunice Kennedy Shriver Institute",1,1,1,"28490127","Milk bioactive peptide database" +"Core",1,1,1,"28490127","Milk bioactive peptide database" +"University of California",1,1,1,"28490127","Milk bioactive peptide database" +"Université de Strasbourg",1,1,1,"28608363","MiSynPat" +"Fondation pour la Recherche Médicale",3,3,3,"28608363, 33206959, 33305318","MiSynPat, IntAct, PED" +"NSF",7,6,6,"29206899, 34514416, 31642487, 28891124, 27450113, 33170273","MOSAIC, SCISSORâ, AraPheno, DOCKGROUND, PDB, Gramene" +"University of Minnesota",2,2,2,"29206899, 31722416","MOSAIC, MEGARes" +"Department of Science and Technology, Government of India",2,2,2,"29401218, 30689843","MutHTP, ccPDB" +"Ministry of Human Resource and Development",2,2,2,"29401218, 32119071","MutHTP, ProCaff" +"Imperial College London",1,1,1,"29897419","MARDy" +"Antimicrobial Research Collaborative",1,1,1,"29897419","MARDy" +"Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning",1,1,1,"29967752","Metaxa2" +"Ohio Agricultural Research and Development Center",2,1,1,"29967752","Metaxa2" +"Costco Honey Bee Biology Fellowship",1,1,1,"29967752","Metaxa2" +"RSF",1,1,1,"30092360","MPDSDM" +"DST",1,1,1,"30092360","MPDSDM" +"Prostate Cancer Foundation",1,1,1,"30268942","MiPanda" +"Prostate Cancer Foundation Young Investigator Award",1,1,1,"30268942","MiPanda" +"Early Detection Research Network",1,1,1,"30268942","MiPanda" +"NCI Prostate SPORE",1,1,1,"30268942","MiPanda" +"Aix-Marseille University",1,1,1,"30371819","MoonDB" +"National Medical Research Council of Singapore",1,1,1,"30380113","MethMotif" +"Ministry of Education Academic Research Fund",1,1,1,"30380113","MethMotif" +"Cancer Science Institute of Singapore",1,1,1,"30380113","MethMotif" +"National Medical Research Council",1,1,1,"30380113","MethMotif" +"Singapore Ministry of Education's AcRF Tier 3",1,1,1,"30380113","MethMotif" +"Hong Kong Scholars Program",1,1,1,"30649247","miR+Pathway" +"National Key Research, Development Program, National research and development project and Hong Kong Scholars Program",3,1,1,"30649247","miR+Pathway" +"Research Grants Council of the Hong Kong Special Administrative Region, China",1,1,1,"30649247","miR+Pathway" +"SERB",1,1,1,"30738202","MtBrowse" +"CSIR-GENESIS",1,1,1,"30738202","MtBrowse" +"CONCYTEC FONDECYT Peruvian research agency",1,1,1,"30944327","monoterpene indole alkaloid database" +"University of Málaga",1,1,1,"31197322","MetOSite" +"Major Increase Or Decrease Program In The Central Finance Level",1,1,1,"31231773","MepmiRDB" +"Hangzhou Ministry of Science and Technology",1,1,1,"31231773","MepmiRDB" +"National Programs for Fundamental and Development",1,1,1,"31231774","Mr.Vc" +"Ministry of Science and Technology, Republic of China",1,1,1,"31404401","miRDRN" +"Sichuan University Postdoctoral Interdisciplinary Innovation Startup Foundation",1,1,1,"31504189","MeLAD" +"Scientific Research Foundation of Sichuan University",2,1,1,"31504189","MeLAD" +"National Natural Science Foundation",2,1,1,"31504189","MeLAD" +"Department of Physiology and Biophysics at the University of Illinois at Chicago",1,1,1,"31586405","MatrisomeDB" +"University of Illinois at Chicago",1,1,1,"31586405","MatrisomeDB" +"College of Pharmacy",1,1,1,"31586405","MatrisomeDB" +"Netherlands eScience Center",1,1,1,"31612915","MIBiG" +"NSF GRF",1,1,1,"31612915","MIBiG" +"NSERC",2,2,2,"31612915, 34521345","MIBiG, PathFams" +"Korea Institute of Planning and Evaluation for Technology in Food, Agriculture, Forestry and Fisheries",1,1,1,"31652812","Murine Microbiome Database" +"Australian National Health and Medical Research Council",2,1,1,"31679514","MaveDB" +"Canada First Research Excellence Fund",1,1,1,"31825307","MouseBytes" +"CIFAR",1,1,1,"31825307","MouseBytes" +"Brain Canada",2,1,1,"31825307","MouseBytes" +"Alzheimer Society",1,1,1,"31825307","MouseBytes" +"Weston Brain Institute",2,2,2,"31825307, 31701148","MouseBytes, JASPAR" +"Mitacs",1,1,1,"31825307","MouseBytes" +"Australian Research Council’s Discovery Early Career Research Award",1,1,1,"31836897","mesophotic.org" +"California Academy of Sciences’ Hope for Reefs Initiative",1,1,1,"31836897","mesophotic.org" +"Noble Research Institute",2,2,2,"32079733, 31245720","MtSSPdb, VPGD" +"Oklahoma Center for the Advancement of Science and Technology (OCAST",1,1,1,"32079733","MtSSPdb" +"Chongqing Research Program of Basic Research and Frontier Technology",1,1,1,"32159764","MMHub" +"Ministry of Human Resource Development and Initiative for Biological Systems Engineering Travel",1,1,1,"32337573","MPTherm" +"Orthopaedic Research Society",1,1,1,"32777102","MSK-KP" +"American Society for Bone and Mineral Research",1,1,1,"32777102","MSK-KP" +"European Calcified Tissue Society",1,1,1,"32777102","MSK-KP" +"Broad Institute",1,1,1,"32777102","MSK-KP" +"ETH Zurich Foundation",1,1,1,"32777102","MSK-KP" +"Center for Individualized Medicine, Mayo Clinic",1,1,1,"32986834","ModelSEED" +"Royal Society",3,1,1,"33084905","MeDAS" +"PAPPIT-DGAPA-UNAM",1,1,1,"33084905","MeDAS" +"Santander and Newton fund UK-China",1,1,1,"33084905","MeDAS" +"National Science Fund",1,1,1,"33084905","MeDAS" +"NERC",1,1,1,"33084905","MeDAS" +"Semmelweis University",1,1,1,"33119751","MemMoRF" +"Higher Education Institutional Excellence Programme of the Ministry for Innovation and Technology in Hungary",1,1,1,"33119751","MemMoRF" +"Shenzhen Science and Technology Innovation Commission",3,1,1,"33125077","MASI" +"Shanghai Science and Technology Funds",1,1,1,"33125077","MASI" +"Shenzhen Bay Laboratory",1,1,1,"33125077","MASI" +"Shenzhen Development and Reform Committee",2,1,1,"33125077","MASI" +"Singapore Academic Research Fund",1,1,1,"33125077","MASI" +"Swiss Federal Government",3,3,3,"33156326, 33290552, 33237286","MetaNetX/MNXref, GOC, UniProtKB" +"Dollis Huntington Endowment Fund for Cancer Research",1,1,1,"33174596","MitoCarta" +"Massachusetts General Hospital Department of Neurology",1,1,1,"33174596","MitoCarta" +"Jane Coffin Childs",1,1,1,"33174596","MitoCarta" +"Shandong Province of China",1,1,1,"33219670","MolluscDB" +"Shandong Natural Science Foundation",1,1,1,"33219670","MolluscDB" +"Canada Foundation for Innovation",8,5,5,"33245771, 31733064, 30601939, 33206959, 30407591","MarkerDB, PathDIP, UbiHub, IntAct, IID" +"Foundation for the National Institutes of Health",6,3,3,"33514395, 31283070, 32486891","MPM, VIPdb, Reactome" +"Genome British Columbia",7,2,2,"33683131, 31701148","MRMAssayDB, JASPAR" +"Howard Hughes Medical Institute Gilliam Fellowship",1,1,1,"33780471","MCPdb" +"Council of Scientific and Industrial Research, Government of India",1,1,1,"33909069","Monosaccharide Biosynthesis Pathways Database" +"Technische Universität München",1,1,1,"34052284","miREV" +"the Investissements Avenir French Government program managed by the French National Research Agency",1,1,1,"34156446","MetamORF" +"National Natural Sciences Foundation of China",1,1,1,"34156447","mPPI" +"Plant2Pro Carnot Institute",1,1,1,"34245304","MtExpress" +"U.S. Department of Health & Human Services | NIH | National Cancer Institute (NCI)",1,1,1,"34349127","MiREDiBase" +"U.S. Department of Health & Human Services | NIH | National Cancer Institute",1,1,1,"34349127","MiREDiBase" +"Doctoral Science Research Foundation of Yantai University",2,1,1,"34510194","Mollusca mitochondrial database" +"Shandong Provincial Natural Science Foundation, China",1,1,1,"34510194","Mollusca mitochondrial database" +"Department of Atomic Energy, Government of India",1,1,1,"35424258","MeFSAT" +"Ministerio de Ciencia e Innovación - FEDER",3,1,1,"21491493","Noncoded Amino acids Database" +"Intramural Research Program of the NIH",1,1,1,"21491493","Noncoded Amino acids Database" +"Gobierno de Aragón",1,1,1,"21491493","Noncoded Amino acids Database" +"Center for Cancer Research",1,1,1,"21491493","Noncoded Amino acids Database" +"National Cancer Institute, National Institutes of Health",1,1,1,"21491493","Noncoded Amino acids Database" +"Versus Arthritis",1,1,1,"24297257","NECTAR" +"Broad Institute of MIT and Harvard",1,1,1,"25102069","Panorama" +"National Institute of Child Health and Human Development",3,3,3,"26048622, 31103066, 34387941","PedsDTI, Placental Atlas Tool, PharmGKB" +"National Institute on Drug Abuse",4,4,4,"26048622, 31259547, 27643925, 31696236","PedsDTI, PerMM, StemCellCKB, MPD" +"High Impact Research (HIR)",1,1,1,"27017950","NeisseriaBase" +"University of Malaya and Ministry of Education",1,1,1,"27017950","NeisseriaBase" +"University of Malaya Research Grant",1,1,1,"27017950","NeisseriaBase" +"Department of Information Technology, Ministry of Communications and Information Technology",1,1,1,"27285615","Northeast India Helminth Parasite Information Database" +"Türkiye Bilimler Akademisi",1,1,1,"27936097","PeTMbase" +"Villum Fonden (DK)",1,1,1,"28182744","OMDB" +"Knut och Alice Wallenbergs Stiftelse (SE)",1,1,1,"28182744","OMDB" +"Basic Science Research Program through the National Research Foundation of Korea",1,1,1,"28184254","NPCARE" +"Alexander von Humboldt-Stiftung",1,1,1,"28641017","NANPDB" +"Winship Cancer Institute",1,1,1,"29186335","OncoPPi" +"HHS | NIH | National Institute of General Medical Sciences",2,1,1,"29487113","Panorama Public" +"HHS | NIH | National Human Genome Research Institute",1,1,1,"29487113","Panorama Public" +"HHS | NIH | National Institute of Arthritis and Musculoskeletal and Skin Diseases",1,1,1,"29487113","Panorama Public" +"Association pour la Recherche sur le Cancer",1,1,1,"29739837","NvERTx" +"Ligue Contre le Cancer",1,1,1,"29739837","NvERTx" +"Minist?re de l'Enseignement Sup?rieur et de la Recherche",2,2,2,"29739837, 30354114","NvERTx, NR-DBIND" +"ATIP-Avenir",1,1,1,"29739837","NvERTx" +"Fondation pour la Recherche M?dicale",1,1,1,"29739837","NvERTx" +"the Projects from the Shanghai Science and Technology Commission",2,1,1,"29743053","PDXliver" +"the State Key Program of National Natural Science of China",1,1,1,"29743053","PDXliver" +"""Strategic Priority Research Program"" of the Chinese Academy of Sciences",2,1,1,"29743053","PDXliver" +"National Key R&D Program of China",1,1,1,"29743053","PDXliver" +"“Strategic Priority Research Program” of the Chinese Academy of Sciences",2,1,1,"29743053","PDXliver" +"Huazhong University of Science and Technology",2,2,2,"29982280, 29028888","PepBDB, RRDB" +"Roy J. Carver Charitable Trust",1,1,1,"30115014","PdumBase" +"Foundation for the Author of National Excellent Doctoral Dissertation of the People's Republic of China",1,1,1,"30134653","PADFrag" +"Malaysian Palm Oil Board",1,1,1,"30239681","PalmXplore" +"National Development and Reform Commission of China",1,1,1,"30335176","NucMap" +"Department of Science and Technology, Ministry of Science and Technology",1,1,1,"30349509","PanGFR-HM" +"Oneida Nation Foundation",1,1,1,"30395323","pATLAS" +"National 973 Program",1,1,1,"30445567","OncoBase" +"Irish Research Council, Government of Ireland",1,1,1,"30535146","OGOB" +"Korea Science and Engineering Foundation",1,1,1,"30733462","PFDB" +"MEXT | Japan Society for the Promotion of Science",1,1,1,"30733462","PFDB" +"Innovation Project",2,2,2,"31086734, 31021279","OsteoporosAtlas, UVGD" +"State Key Laboratory of Proteomics",3,2,2,"31086734, 31021279","OsteoporosAtlas, UVGD" +"Program of Precision Medicine",1,1,1,"31086734","OsteoporosAtlas" +"King Abdullah University of Science and Technology (KAUST)",2,1,1,"31160594","PathoPhenoDB" +"Swedish Research Council",2,2,2,"31566225, 31598695","OGRDB, MirGeneDB" +"Key Research Program of Frontier Sciences",2,2,2,"31584086, 31640808","PGG.Han, PGG.SNV" +"Program of Shanghai Academic Research Leaders",1,1,1,"31584086","PGG.Han" +"Strategic Priority Research Program",2,2,2,"31584086, 31640808","PGG.Han, PGG.SNV" +"UK Royal Society-Newton Advanced Fellowship",3,3,3,"31584086, 31640808, 33175170","PGG.Han, PGG.SNV, 2019nCoVR" +"Zhangjiang Special Project of the National Innovation Demonstration Zone",1,1,1,"31584086","PGG.Han" +"Hermesfonds for ELIXIR Belgium",1,1,1,"31584092","PDBe-KB" +"Wellcome Trust Strategic Awards",6,1,1,"31584092","PDBe-KB" +"SIFTS",1,1,1,"31584092","PDBe-KB" +"India Partnering Award",1,1,1,"31584092","PDBe-KB" +"AIRC",1,1,1,"31584092","PDBe-KB" +"ELIXIR CZ Research Infrastructure Project",1,1,1,"31584092","PDBe-KB" +"La Ligue Contre le Cancer",1,1,1,"31612943","OHNOLOGS" +"Erasmus Mundus",1,1,1,"31612943","OHNOLOGS" +"National and Kapodistrian University of Athens",1,1,1,"31629694","PerMemDB" +"Youth Innovative Talents Training Program for Universities of Heilongjiang Province",1,1,1,"31637139","ncRNA2MetS" +"Harbin Science and Technology Innovation Talents Research Project",1,1,1,"31637139","ncRNA2MetS" +"Research Grant Council, Hong Kong",1,1,1,"31640730","NARD" +"DARPA",1,1,1,"31647099","Pathway Commons" +"Goddard Space Flight Center NASA",1,1,1,"31662803","ODIAC" +"Fonds de Recherche Québec – Santé",1,1,1,"31724725","oRNAment" +"Fonds de Recherche Québec – Nature et Technologies",1,1,1,"31724725","oRNAment" +"Kementerian Sains, Teknologi dan Inovasi",1,1,1,"31725861","PCOSBase" +"Atlantic Canada Opportunities Agency",1,1,1,"31733064","PathDIP" +"Ian Lawson Van Toch Memorial Fund",1,1,1,"31733064","PathDIP" +"Natural Sciences Research Council",2,2,2,"31733064, 30407591","PathDIP, IID" +"IBM",1,1,1,"31733064","PathDIP" +"Ontario Research Foundation",1,1,1,"31733064","PathDIP" +"Funding above to E.I. S. In addition, B.W. received funding from a Faculty Mobility grant from the University of Costa Rica and a grant from the Schlumberger Foundation.",1,1,1,"31831730","OPD" +"Stiftelsen Olle Engkvist Byggmästare",1,1,1,"31831730","OPD" +"Council of Scientific and Industrial Research (CSIR), Government of India",2,1,1,"32090261","NipahVR" +"Breeding program of Taizhou University",1,1,1,"32103267","PDIR" +"The Research Project of the Health and Family Planning Commission of Heilongjiang Province",1,1,1,"32111231","NoncoRNA" +"Heilongjiang Postdoctoral Science Foundation",2,2,2,"32111231, 31799597","NoncoRNA, RNAactDrug" +"The Research Project of the Chinese Society of Neuro-oncology, CACA",1,1,1,"32111231","NoncoRNA" +"Eye Hospital Wenzhou Medical University",1,1,1,"32117995","Nc2Eye" +"Key Technology Research and Development Program of Shandong",1,1,1,"32122231","ncRPheno" +"Rufford Foundation",1,1,1,"32324748","OdoBD" +"Explorers Club",1,1,1,"32324748","OdoBD" +"the DST-PURSE",1,1,1,"32404014","PDB-2-PBv3.0" +"European Cooperation in Science and Technology",3,2,2,"32556221, 30357350","OMEGA-NET, Pfam" +"Spanish Ministry of Science, Innovation and Universities",1,1,1,"32556221","OMEGA-NET" +"Ramón y Cajal",5,2,2,"32556221, 31171447","OMEGA-NET, SynGO" +"Medical Health Science and Technology Key Project of Zhejiang Provincial Health Commission",1,1,1,"32597311","OncotRF" +"Key Program of Zhejiang Provincial Natural Science Foundation of China",1,1,1,"32597311","OncotRF" +"Greehey Children’s Cancer Research Institute",1,1,1,"32810235","PCAT" +"Department of Biotechnology, Ministry of Science and Technology, India",3,3,3,"32895427, 34025934, 33231322","PCOSKBR2, MycoTRAP-DB, SoyTD" +"Department of Health Research, India",3,2,2,"32895427, 34025934","PCOSKBR2, MycoTRAP-DB" +"JDRF",1,1,1,"33045747","Open Targets Genetics" +"ELIXIR-GR: The Greek Research Infrastructure for Data Management and Analysis in Life Sciences",1,1,1,"33080028","Peryton" +"Human Resources Development, Education and Lifelong Learning",1,1,1,"33080028","Peryton" +"Competitiveness, Entrepreneurship and Innovation",1,1,1,"33080028","Peryton" +"National Natural Science Foundation of China-Guangdong Joint Fund",1,1,1,"33103271","NanDeSyn" +"Natural Science Foundation of Shandong Province",1,1,1,"33103271","NanDeSyn" +"Center for Clinical and Translational Science",1,1,1,"33245774","PAGER-CoV" +"The University of Alabama at Birmingham",1,1,1,"33245774","PAGER-CoV" +"the Chinese Universities Scientific Fund",1,1,1,"33247934","OGDA" +"the Natural Science Foundation of Shandong Province",1,1,1,"33247934","OGDA" +"Top Talent Program of The Yantai University",1,1,1,"33247934","OGDA" +"China-ASEAN Maritime Cooperation Fund",1,1,1,"33247934","OGDA" +"Human Islet Research Network",1,1,1,"33294866","Pancreatlas" +"Department of Veterans Affairs",1,1,1,"33294866","Pancreatlas" +"NIDDK",7,1,1,"33294866","Pancreatlas" +"Shantou University",1,1,1,"33304468","PDmethDB" +"Universidade de Macau",1,1,1,"33304468","PDmethDB" +"Li Ka Shing Foundation",1,1,1,"33304468","PDmethDB" +"SGST",1,1,1,"33306802","NPBS" +"CSDB",1,1,1,"33306802","NPBS" +"Chinese Academy of Medical Sciences",2,2,2,"33359127, 32349124","NBIGV, VirusCircBase" +"U.S. Department of Health & Human Services | NIH | National Center for Advancing Translational Sciences",1,1,1,"33361798","Open Cancer TherApeutic Discovery" +"U.S. Department of Health & Human Services | NIH | National Center for Advancing Translational Sciences (NCATS)",1,1,1,"33361798","Open Cancer TherApeutic Discovery" +"U.S. Department of Health & Human Services | NIH | National Institute of General Medical Sciences",1,1,1,"33361798","Open Cancer TherApeutic Discovery" +"U.S. Department of Health & Human Services | NIH | National Institute of Environmental Health Sciences",1,1,1,"33361798","Open Cancer TherApeutic Discovery" +"U.S. Department of Health & Human Services | NIH | National Institute of General Medical Sciences (NIGMS)",1,1,1,"33361798","Open Cancer TherApeutic Discovery" +"Bureau of Landscaping and Forestry of Wuhan Municipality",1,1,1,"33514746","NGD" +"Hubei Chenguang Talented Youth Develoment Foundation",1,1,1,"33514746","NGD" +"Liverpool John Moores University",1,1,1,"33647438","PepTherDia" +"Horizon 2020 Framework Programme",2,2,2,"33647438, 32508104","PepTherDia, Scop3P" +"HHS | National Institutes of Health",2,1,1,"33653882","Mycobacterial Systems Resource" +"UKRI|Biotechnology and Biological Sciences Research Council (BBSRC)",3,1,1,"33749993","OmniPath" +"UK Research and Innovation|Biotechnology and Biological Sciences Research Council, ISP grant for Gut Microbes and Health",1,1,1,"33749993","OmniPath" +"JRC COMBINE, partially funded by Bayer AG",1,1,1,"33749993","OmniPath" +"UK Research and Innovation|Biotechnology and Biological Sciences Research Council, Norwich Research Park Biosciences Doctoral Training Partnership grant",1,1,1,"33749993","OmniPath" +"Federal Ministry of Education (BMFB, Computational Life Sciences grant)",1,1,1,"33749993","OmniPath" +"Deutsche Forschungsgemeinschaft (DFG)",1,1,1,"33749993","OmniPath" +"European Union Innovative Medicines Initiative TransQST",1,1,1,"33749993","OmniPath" +"National Outstanding Youth Science Fund Project of National Natural Science Foundation of China",5,1,1,"33997360","PCPD" +"National Science Foundation Extreme Science and Engineering Discovery Environment",1,1,1,"34241085","OCELOT" +"Fondazione Cariplo",2,2,2,"34527188, 30419167","MyoData, ZINClick" +"Università degli Studi di Padova",1,1,1,"34527188","MyoData" +"European Commission VIth Framework Research and Technological Development Program, ‘SPINE2-COMPLEXES’ Project",1,1,1,"21536137","Proteopedia" +"‘Teach-SG’ Project",1,1,1,"21536137","Proteopedia" +"The Bankhead-Coley Research Program of the State of Florida",1,1,1,"21656910","QuAD" +"US Army Medical Research and Materiel Command under Award",1,1,1,"21656910","QuAD" +"Institutional Research Grant from the American Cancer Society",1,1,1,"21656910","QuAD" +"NIH/National Cancer Institute PSOC",1,1,1,"21656910","QuAD" +"National Cancer Institute under Award",1,1,1,"21656910","QuAD" +"National Functional Genomics Center",1,1,1,"21656910","QuAD" +"Moffitt Foundation",1,1,1,"21656910","QuAD" +"Bankhead-Coley Cancer Research program of the Florida Department of Health",1,1,1,"21656910","QuAD" +"The Melanoma Research Foundation",1,1,1,"21656910","QuAD" +"Virginia Johnson and Lawrence Dangott at the Texas A&M University Protein Chemistry Laboratory",1,1,1,"21656910","QuAD" +"University of South Florida Chemistry Department",1,1,1,"21656910","QuAD" +"University of Florida-Moffitt Collaborative Partnership, Moffitt's Hematological Oncology Program",1,1,1,"21656910","QuAD" +"NINR NIH HHS",1,1,1,"22080514","PolymiRTS" +"Ministère de l'Education Nationale, de la Recherche et de la Technologie",1,1,1,"22589183","PRDB" +"PPT",1,1,1,"23607573","PID-NET" +"MLT",1,1,1,"23911837","PTP-central" +"JST",1,1,1,"23911837","PTP-central" +"Canadian Cancer Society Research Institute",1,1,1,"23911837","PTP-central" +"Kishimoto Foundation",1,1,1,"23911837","PTP-central" +"Georgia Cancer Coalition",1,1,1,"25382819","ProKinO" +"Hans Rausing PhD Scholarship",1,1,1,"25558364","PREDICTS" +"European Union programs MicroB3",1,1,1,"25740460","PhytoREF" +"Investissements d'Avenir",1,1,1,"25740460","PhytoREF" +"EMBRC-France",1,1,1,"25740460","PhytoREF" +"MaCuMBA",1,1,1,"25740460","PhytoREF" +"French Government",1,1,1,"25740460","PhytoREF" +"Provincial Quality Engineer Fund of Anhui Education Department",1,1,1,"26211629","PLNlncRbase" +"Biostatistics Discipline Backbone Cultivated Foundation in Anhui Agricultural University",1,1,1,"26211629","PLNlncRbase" +"Biology Key Subject Construction of Anhui",1,1,1,"26211629","PLNlncRbase" +"Anhui Agricultural University",1,1,1,"26211629","PLNlncRbase" +"National 973 Basic Research",2,1,1,"26225242","PhIN" +"NSERC/CIHR Collaborative Health Research Project",1,1,1,"26251998","PhenomeCentral" +"Hospital for Sick Children",1,1,1,"26251998","PhenomeCentral" +"Ontario Research Fund",3,3,3,"26251998, 33206959, 33170273","PhenomeCentral, IntAct, Gramene" +"Ontario Genomics Institute",2,2,2,"26251998, 30601939","PhenomeCentral, UbiHub" +"Children’s Hospital of Eastern Ontario Foundation",1,1,1,"26251998","PhenomeCentral" +"Genome Quebec",1,1,1,"26251998","PhenomeCentral" +"Hirschl Trust",1,1,1,"27789569","PMKB" +"National Basic Research Program",2,2,2,"28529077, 29157087","PLMD, THANATOS" +"International Science & Technology Cooperation Program of China",5,3,3,"28529077, 29617941, 29157087","PLMD, SCRIPT-MAP, THANATOS" +"Equip@Meso - “Investissement d’Avenir” Programme EQUIPEX",1,1,1,"28592293","Plasmobase" +"Institut Universitaire de France",1,1,1,"28592293","Plasmobase" +"CALSIMLAB - “Investissements d’Avenir” program",1,1,1,"28592293","Plasmobase" +"Amazon Web Services",1,1,1,"28862395","PMS_DN" +"Patient-Centered Outcomes Research Institute",1,1,1,"28862395","PMS_DN" +"Qatar National Research Fund",2,2,2,"29370821, 31290545","PhenoDis, GXB" +"National Institute on Aging",14,5,5,"29370821, 29733404, 30668832, 33206959, 31696236","PhenoDis, SPAR, DASHR, IntAct, MPD" +"National Institute on Aging (US)",1,1,1,"29370821","PhenoDis" +"Scientific Research on Innovative Areas",3,1,1,"29425804","Predicted Endogenous Viral Elements" +"Challenging Exploratory Research",1,1,1,"29425804","Predicted Endogenous Viral Elements" +"Ministry of Education, Culture, Science, Sports, and Technology",1,1,1,"29425804","Predicted Endogenous Viral Elements" +"Research Activity Start-up",1,1,1,"29425804","Predicted Endogenous Viral Elements" +"National Institute of General Medical Sciences of the National Institutes of Health",1,1,1,"29575358","ProtaBank" +"Région Centre Val de Loire",1,1,1,"29662024","PKIDB" +"Association Nationale de la Recherche et de la Technologie",1,1,1,"29662024","PKIDB" +"National Institute of Plant Genome Research",2,2,2,"29939244, 30307523","PtRFdb, PVsiRNAdb" +"Youth Innovation Promotion Association CAS",2,2,2,"30010730, 33094321","RabGTD, VPTMdb" +"Chinese Human Proteome Projects (CNHPP",2,1,1,"30055873","PhoPepMass" +"Shanghai Municipal Science and Technology Commission of China",1,1,1,"30055873","PhoPepMass" +"Pró-Reitoria de Pesquisa e Pós-Graduação",1,1,1,"30101318","PlaNC-TE" +"Foundation for Research Support of the State of São Paulo",1,1,1,"30101318","PlaNC-TE" +"Coordination for the Improvement of Higher Education Personnel",1,1,1,"30101318","PlaNC-TE" +"Universidade Federal do Paraná",1,1,1,"30101318","PlaNC-TE" +"National Council for Scientific and Technological Development",1,1,1,"30101318","PlaNC-TE" +"National Key Research and Development Plan of China",1,1,1,"30239819","POSTAR2" +"Special Project on Precision Medicine",2,1,1,"30244175","PTMD" +"Agència de Gestió d’Ajuts Universitaris i de Recerca",1,1,1,"30335169","PopHumanScan" +"Ministry of Science of Technology of Taiwan",3,1,1,"30357353","piRTarBase" +"NIH predoctoral",1,1,1,"30357353","piRTarBase" +"NIH R00",1,1,1,"30357353","piRTarBase" +"Science and Technology Program of Guangdong",1,1,1,"30380102","qPhos" +"Informatics Institute of the School of Medicine at UAB",1,1,1,"31161204","PRISMOID" +"Monash Major Inter-Disciplinary Research",1,1,1,"31161204","PRISMOID" +"Key Research and Development Program of Shaanxi Province, China",1,1,1,"31161204","PRISMOID" +"Ipsen Bioinnovation Ltd., Cambridge Studentship",1,1,1,"31598690","ProCarbDB" +"Ipsen Bioinnovation Ltd.",1,1,1,"31598690","ProCarbDB" +"Jack Brockhoff Foundation",2,2,2,"31598690, 33095862","ProCarbDB, ThermoMutDB" +"Start-up Fund of Chengdu University",1,1,1,"31599098","PSMD" +"Beijing Academy of Agricultural and Forestry Sciences",2,1,1,"31602478","PmiREN" +"New National Excellence Programme",1,1,1,"31612960","PhaSePro" +"VUB",1,1,1,"31612960","PhaSePro" +"National Research Council of Science and Technology",1,1,1,"31612960","PhaSePro" +"National Development and Reform Commission",1,1,1,"31620779","prokaryotic antiviral defense system" +"Program of Shanghai Academic Research Leader",1,1,1,"31640808","PGG.SNV" +"Chinese Academy of Sciences President’s International Fellowship Initiatives",1,1,1,"31640808","PGG.SNV" +"Zhangjiang Special Project of National Innovation Demonstration Zone",1,1,1,"31642469","PhenoModifier" +"Department of Science and Technology/Science and Engineering Research Board",1,1,1,"31796964","PRP" +"Jilin Province",2,1,1,"31809863","PsyMuKB" +"Program for Professor of Special Appointment (Eastern Scholar) at Shanghai Institutions of Higher Learning",1,1,1,"31809863","PsyMuKB" +"National Key R & D Program of China",1,1,1,"31809863","PsyMuKB" +"Cancerfonden",2,1,1,"31844049","ProTargetMiner" +"National Autonomous University of Mexico | Dirección General de Asuntos del Personal Académico, Universidad Nacional Autónoma de México",2,1,1,"31949184","PulmonDB" +"Consejo Nacional de Ciencia y Tecnología",2,1,1,"31949184","PulmonDB" +"Fundación Miguel Alemán, A.C.",1,1,1,"31949184","PulmonDB" +"India and the DST-INSPIRE",1,1,1,"32119071","ProCaff" +"Department of Biotechnology, Government of India",1,1,1,"32119071","ProCaff" +"Tianjin Rice Industrial Technology System of China",2,2,2,"32542382, 33985427","PRMdb, TarDB" +"Core Research for Evolutional Science and Technology",1,1,1,"33002111","PyDISH" +"New Century Outstanding Talent Support Program",1,1,1,"33003203","QSIdb" +"Education Ministry of China",1,1,1,"33003203","QSIdb" +"Creative Research Groups of China",1,1,1,"33003203","QSIdb" +"Huazhong Agricultural University Scientific & Technological Self - innovation Foundation",1,1,1,"33137192","Plant-ImputeDB" +"National Key Research and Development Plan, China",1,1,1,"33137192","Plant-ImputeDB" +"Magyar Tudományos Akadémia",1,1,1,"33186585","PolarProtDb" +"Hungarian Scientific Research Fund",3,2,2,"33186585, 33305318","PolarProtDb, PED" +"Indian Institute of Technology Madras",1,1,1,"33196841","ProThermDB" +"PolyKnomics BV",1,1,1,"33245779","PheLiGe" +"Russian Ministry of Education and Science",1,1,1,"33245779","PheLiGe" +"Jiangsu Higher Education Institutions",1,1,1,"33330918","piRNA-eQTL" +"Embrapa",1,1,1,"33546584","Plantannot" +"Innovation Team Project for Modern Agricultural Industrious Technology System of Shandong Province",2,1,1,"34111777","PID" +"Nvidia",1,1,1,"34319727","ProBiS-Dock" +"Javna Agencija za Raziskovalno Dejavnost RS",5,1,1,"34319727","ProBiS-Dock" +"EU FP7",1,1,1,"34559210","QSDB" +"The author(s) received no specific funding for this work.",1,1,1,"34716373","PPMdb" +"Army Research Office",1,1,1,"25640659","SecReT6" +"Specialized Research Fund for the Doctoral Program of Higher Education, China",1,1,1,"25640659","SecReT6" +"973 program, Ministry of Science and Technology, China",1,1,1,"25640659","SecReT6" +"United States Department of Homeland Security",1,1,1,"25640659","SecReT6" +"Sino-UK Higher Education Research Partnership for PhD Studies",1,1,1,"25640659","SecReT6" +"Howard Hughes Medical Insitute",1,1,1,"26138588","SmedGD" +"Stowers Institute for Medical Research",1,1,1,"26138588","SmedGD" +"NSF CAREER Award",1,1,1,"27010673","SM-TF" +"American Heart Association (Midwest Affiliate)",1,1,1,"27010673","SM-TF" +"ANR Blanc MITOZEN",1,1,1,"27297221","sHSPdb" +"University of Angers",1,1,1,"27297221","sHSPdb" +"National Health Research Institutes of Taiwan",1,1,1,"28194231","SkinSensDB" +"NSYSU-KMU Joint Research Project",1,1,1,"28194231","SkinSensDB" +"Kaohsiung Medical University Research Foundation",1,1,1,"28194231","SkinSensDB" +"Research Center for Environmental Medicine",1,1,1,"28194231","SkinSensDB" +"Ministry of Education, Culture, Sports, Science and Technology of Japan (MEXT) and the Japan Agency for Medical Research and Development (AMED)",1,1,1,"28438161","REFOLDdb" +"High Technology Research and Development",1,1,1,"28529082","RED" +"Centre for Agricultural Bioinformatics scheme (CABin): Indian council of Agricultural Research Indian Council of Agricultural Research (IN)",1,1,1,"28964253","RiceMetaSys" +"Novo Nordisk",1,1,1,"29062930","SMBP" +"National High-tech R&D Program of China",1,1,1,"29617941","SCRIPT-MAP" +"Consiglio Nazionale delle Ricerche",1,1,1,"29696033","REDIdb" +"Natural Science Foundation of Hunan Province",1,1,1,"29961821","SDADB" +"State Key Laboratory of Ecological Pest Control for Fujian and Taiwan Crops",1,1,1,"30020436","realDB" +"Shandong Province Natural Science foundation",1,1,1,"30020436","realDB" +"Fujian Province",1,1,1,"30020436","realDB" +"Focused Innovations Scheme B",1,1,1,"30165538","SKmDB" +"Research Grants Council",1,1,1,"30165538","SKmDB" +"Hong Kong Special Administrative Region",7,1,1,"30165538","SKmDB" +"CUHK direct",4,1,1,"30165538","SKmDB" +"RGC Collaborative Research Fund",1,1,1,"30165538","SKmDB" +"General Research Funds",1,1,1,"30165538","SKmDB" +"Huazhong Agricultural University",1,1,1,"30380119","SAGD" +"European Community’s Seventh Framework Programme",1,1,1,"30481257","SkeletalVis" +"Wellcome Centre for Cell-Matrix Research",1,1,1,"30481257","SkeletalVis" +"University of Manchester",1,1,1,"30481257","SkeletalVis" +"Programme Opérationnel FEDER-Guadeloupe-Conseil Régional",1,1,1,"30593925","SITVIT2" +"European Union and Guadeloupe Region",1,1,1,"30593925","SITVIT2" +"Akademie Věd České Republiky",1,1,1,"30622655","REXdb" +"U.S. Department of Health &amp; Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases",3,1,1,"30674925","Smooth Muscle Transcriptome Browser" +"University of Nevada Reno School of Medicine",1,1,1,"30674925","Smooth Muscle Transcriptome Browser" +"Dipartimenti di Eccellenza Program",1,1,1,"30721533","SiMPLOD" +"Italian Association for Cancer Research (AIRC)",1,1,1,"30721533","SiMPLOD" +"My First AIRC Grant",1,1,1,"30721533","SiMPLOD" +"Giovanni Armenise-Harvard Career Development Award, Fondazione Cariplo",1,1,1,"30721533","SiMPLOD" +"Giovanni Armenise-Harvard Foundation",1,1,1,"30721533","SiMPLOD" +"Italian Ministry of Education, University and Research (MIUR)",1,1,1,"30721533","SiMPLOD" +"Dept. of Biology and Biotechnology",1,1,1,"30721533","SiMPLOD" +"University of Pavia",1,1,1,"30721533","SiMPLOD" +"Conseil Régional, Île-de-France",1,1,1,"30794542","RESPIRE" +"INSERM",1,1,1,"30794542","RESPIRE" +"Université Paris Diderot",1,1,1,"30794542","RESPIRE" +"Université St Denis La Réunion",1,1,1,"30794542","RESPIRE" +"INTS",1,1,1,"30794542","RESPIRE" +"Huazhong Agricultural University Scientific & Technological Self-innovation Foundation",1,1,1,"31511885","SNP2APA" +"National Key Research and Development Project",1,1,1,"31713629","SpatialDB" +"Natural Science Foundation of Fujian Province (CN)",1,1,1,"31872320","RSRS" +"Program for Innovative Research Team in College of Life Sciences, FAFU",1,1,1,"31872320","RSRS" +"Innovative Foundation of FAFU",1,1,1,"31872320","RSRS" +"Natural Science Foundation of Fujian Province",1,1,1,"31872320","RSRS" +"Ministerio de Ciencia, Innovación y Universidades of Spain",1,1,1,"32096105","SANDchild" +"Consellería de Educación, Xunta de Galicia",1,1,1,"32096105","SANDchild" +"Consejería de Educación e Investigación, Comunidad de Madrid",1,1,1,"32096105","SANDchild" +"Ministerio de Economía y Competitividad of Spain and the Fondo Europeo de Desarrollo Regional",1,1,1,"32096105","SANDchild" +"Key Clinical Specialist Construction Programs of Shanghai Municipal Commission of Health and Family Planning",1,1,1,"32487193","SCDb" +"Fonds Wetenschappelijk Onderzoek",3,2,2,"32508104, 30485709","Scop3P, sORFs.org" +"Universiteit Gent",1,1,1,"32508104","Scop3P" +"State Key Laboratory of Marine Environmental Science",1,1,1,"32621601","SAGER" +"Youth Natural Science Foundation",1,1,1,"32709339","saponin mass spectrometry database" +"Guangdong Basic and Applied Basic Research Foundation",2,1,1,"33021671","RMVar" +"Chinese Ministry of Science and Technology",1,1,1,"33068412","RASP" +"Baden-Württemberg Stiftung",1,1,1,"33196814","RBP2GO" +"Wilhelm Sander-Stiftung",1,1,1,"33196814","RBP2GO" +"German Cancer Aid",1,1,1,"33196814","RBP2GO" +"State of Texas' Governor's University Research Initiative (GURI)",1,1,1,"33231322","SoyTD" +"United Soybean Board",1,1,1,"33231322","SoyTD" +"Fondation Botnar",1,1,1,"33416848","SARS CoV-2" +"Saudi Arabia Research Council",1,1,1,"33416848","SARS CoV-2" +"American Leprosy Missions",1,1,1,"33416848","SARS CoV-2" +"Fondo Nacional de Desarrollo Científico y Tecnológico",1,1,1,"33507271","SinEx" +"Coordena??o de Aperfei?oamento de Pessoal de N?vel Superior",1,1,1,"33553941","SARSCOVIDB" +"Agence nationale de la recherche",1,1,1,"33994075","RHeference" +"Grand Équipement National De Calcul Intensif",1,1,1,"33994075","RHeference" +"Conselho Nacional de Desenvolvimento Cient?fico e Tecnol?gico",2,1,1,"34014674","SistematX" +"Government Council on Grants, Russian Federation",1,1,1,"34014674","SistematX" +"Youth Program of National Natural Science Foundation of China",1,1,1,"34022814","Rhododendron Plant Genome Database" +"Program of Science and Technology Talents Training in Yunnan province",1,1,1,"34022814","Rhododendron Plant Genome Database" +"Construction of International Flower Technology Innovation Center and Industrialization of achievements",1,1,1,"34022814","Rhododendron Plant Genome Database" +"Ten Thousand Young Talents Plan of Yunnan",1,1,1,"34022814","Rhododendron Plant Genome Database" +"NIGMS",2,2,2,"34514416, 27450113","SCISSORâ, PDB" +"UofSC",1,1,1,"34514416","SCISSORâ" +"national institute of general medical sciences",3,1,1,"34844637","recount3" +"office of advanced cyberinfrastructure",1,1,1,"34844637","recount3" +"Fundação Carlos Chagas Filho de Amparo à Pesquisa do Estado do Rio de Janeiro",1,1,1,"24273012","SpliceProt" +"Coordenação de Aperfeiçoamento de Pessoal de Nivel Superior",1,1,1,"24273012","SpliceProt" +"Vice-Presidência de Ensino",1,1,1,"24273012","SpliceProt" +"Ministério da Ciência e Tecnologia/Fundo Setorial de Saúde",1,1,1,"24273012","SpliceProt" +"Marie Curie International Incoming Fellowship",1,1,1,"25805861","SRD" +"PEPFAR",1,1,1,"26249811","SurvCurv" +"Universiti Malaya",1,1,1,"27138013","StreptoBase" +"Shanghai Science and Technology Talents Project",1,1,1,"27148975","SpinachDB" +"Scientific Research Project in Public Agricultural Industry",1,1,1,"27148975","SpinachDB" +"Marie Curie CIG Grant",1,1,1,"27188311","Structure Surfer" +"National Major Scientific and Technological Special Project for “Significant New Drug Formulation”",2,1,1,"27337171","TarNet" +"Tianjin City",1,1,1,"27643925","StemCellCKB" +"Fundamental Research Funds for the Central Universities of China",2,2,2,"28420402, 30223042","SSER, TSNAdb" +"Sichuan Youth Science and Technology Foundation of China",1,1,1,"28420402","SSER" +"Intramural NIST DOC",1,1,1,"28888135","STRSeq" +"Anhui Province",1,1,1,"28974472","Stress2TF" +"MD Anderson Cancer Center",1,1,1,"29092939","TCPA" +"National Science Centre",3,2,2,"29145635, 31728519","tRex, SyntDB" +"Russian Humanitarian Foundation",1,1,1,"29218589","StimulStat" +"Google",1,1,1,"29316788","SynBioHub" +"FUJIFILM Diosynth Biotechnologies U.S.A., Inc.",1,1,1,"29316788","SynBioHub" +"Division of Computing and Communication Foundations",1,1,1,"29316788","SynBioHub" +"Division of Biological Infrastructure",2,2,2,"29316788, 33156333","SynBioHub, InterPro" +"French agency for food and safety",1,1,1,"29385404","TOXsIgN" +"Fondation pour la recherche médicale",1,1,1,"29385404","TOXsIgN" +"European Union",25,4,4,"29385404, 31171447, 29533231, 33175160","TOXsIgN, SynGO, wwPDB, ENA" +"Council for Scientific and Industrial Research",1,1,1,"29432422","TopicalPdb" +"Bundesministerium für Forschung und Technologie",1,1,1,"29776332","TelNet" +"Fundamental Research Funds for Central Universities of China",2,2,2,"30371815, 30364956","TransmiR, HMDD" +"Special Project on Precision Medicine under the National Key R&D Program",3,3,3,"30371815, 30285109, 30364956","TransmiR, LncRNADisease, HMDD" +"Institute of Computing Technology",2,2,2,"30380087, 33196801","SymMap, NONCODE" +"Ministry of Human Capacities in Hungary",1,1,1,"30380112","Translocatome" +"Hungarian National Research Development and Innovation Office",1,1,1,"30380112","Translocatome" +"Hungarian Ministry of Human Capacities",2,1,1,"30380112","Translocatome" +"Ministry of Science and ICT",1,1,1,"30602089","STADIUM" +"Korea Research Environment Open NETwork",1,1,1,"30602089","STADIUM" +"Wuhan Branch, Supercomputing Centre, Chinese Academy of Sciences, China",1,1,1,"30810209","Tetrahymena Comparative Genomics Database" +"Youth Innovation Promotion Association, Chinese Academy of Sciences",1,1,1,"30810209","Tetrahymena Comparative Genomics Database" +"Chang Gung Memorial Hospital, Linkou",1,1,1,"30846808","TACCO" +"National Institute of General Medical Sciences (US)",1,1,1,"30871473","TADKB" +"CONACYT",2,1,1,"30994884","starPepDB" +"USFQ",1,1,1,"30994884","starPepDB" +"Ministry of Science and Technology of Taiwan",1,1,1,"31015229","TCEA" +"Taipei Medical University",1,1,1,"31015229","TCEA" +"SYNSYS",12,1,1,"31171447","SynGO" +"DFG",12,3,3,"31171447, 32976578, 33211869","SynGO, TREND-DB, Rfam" +"Leibniz Foundation",4,1,1,"31171447","SynGO" +"The Broad Institute of MIT and Harvard",4,1,1,"31171447","SynGO" +"European FP People Marie Curie Action",4,1,1,"31171447","SynGO" +"German Federal Ministry of Education and Research",8,2,2,"31171447, 33211880","SynGO, BRENDA" +"EUROSPIN",4,1,1,"31171447","SynGO" +"EU-JPND",4,1,1,"31171447","SynGO" +"CERCA Program/Generalitat de Catalunya",4,1,1,"31171447","SynGO" +"The Stanley Center for Psychiatric Research",4,1,1,"31171447","SynGO" +"Shanghai Engineering Research Center of Plant Germplasm Resources",1,1,1,"31211398","SpinachBase" +"Development and Collaborative Innovation Center of Shanghai",1,1,1,"31211398","SpinachBase" +"Polish Academy of Sciences",2,2,2,"31624839, 33367605","T-psi-C, Virxicon" +"U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases",4,1,1,"31672983","SPP" +"U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases (National Institute of Diabetes & Digestive & Kidney Diseases)",4,1,1,"31672983","SPP" +"Cancer Prevention and Research Institute of Texas (Cancer Prevention Research Institute of Texas)",1,1,1,"31672983","SPP" +"KNOW Poznan RNA Centre",2,2,2,"31728519, 26141515","SyntDB, mirEX" +"Polish Ministry of Science and Higher Education",1,1,1,"31728519","SyntDB" +"National Tea Research Foundation, Tea Board, Ministry of Commerce, Govt of India, Kolkata, India",1,1,1,"32159215","TeaMiD" +"Guangdong Natural Science Founds for Distinguished Young Scholars",1,1,1,"32286817","TeroKit" +"GDAS' Project of Science and Technology Development",1,1,1,"32286817","TeroKit" +"Canadian Network for Research and Innovation in Machining Technology, Natural Sciences and Engineering Research Council of Canada",1,1,1,"32427908","The Ontario Climate Data Portal" +"CSIR-CFTRI",1,1,1,"32696292","TLPdb" +"National Oceanic and Atmospheric Administration",1,1,1,"32719467","STAGdb" +"‘Climbing plan’ supported by Guangdong University students’ Special Fund for Scientific and Technological Innovation and Cultivation",1,1,1,"32761141","SPDB" +"Guangdong Provincial Key Research and Development Plan Project",1,1,1,"32761141","SPDB" +"Research Fellowship of Council of Scientific and Industrial Research",1,1,1,"32829394","TGV" +"US Department of Energy",1,1,1,"32882008","TBDB" +"Dr. Hella Bühler Stiftung",1,1,1,"32976578","TREND-DB" +"Shanghai Science and Technology Innovation Fund",1,1,1,"32976581","STAB" +"Shanghai Municipal Science and Technology Commission",1,1,1,"32976581","STAB" +"SJTU-Yale Collaborative Research Seed Fund",1,1,1,"33035346","tRFtarget" +"Neil Shen's SJTU Medical Research Fund",1,1,1,"33035346","tRFtarget" +"Science and Technology Commission of Shanghai Municipality",1,1,1,"33074314","TransCirc" +"National Science and Technology Basic Resources Investigation",2,2,2,"33074314, 33175170","TransCirc, 2019nCoVR" +"Strategic Priority Research Program of Chinese Academy of Sciences",1,1,1,"33074314","TransCirc" +"Fundação de Amparo à Pesquisa do Estado de Minas Gerais",1,1,1,"33095862","ThermoMutDB" +"Intramural Research Program, Division of Preclinical Innovation, NIH NCATS",2,2,2,"33156327, 33151287","TCRD, DrugCentral" +"Khon Kaen University",1,1,1,"33258964","ThRSDB" +"Natural Science Foundation of Tianjin City",1,1,1,"33360695","ToxinDB" +"CAS-SAFEA International Partnership Program for Creative Research Teams",1,1,1,"33360695","ToxinDB" +"Chinese Academy of Science and Technology Service Network Planning",1,1,1,"33360695","ToxinDB" +"US National Science Foundation",2,1,1,"33459764","SWITCHES" +"ISCIII-Subdirección General de Evaluación",1,1,1,"33655207","TMSNP" +"Region of Southern Denmark",1,1,1,"34000890","TELEMED" +"the Open Fund of State Key Laboratory of Tea Plant Biology and Utilization",1,1,1,"34154536","TeaAS" +"the special funds for the tea germplasm resource garden",1,1,1,"34154536","TeaAS" +"the Base of Introducing Talents for Tea Plant Biology and Quality Chemistry",1,1,1,"34154536","TeaAS" +"Tianjin Institute of Environmental and Operational Medicine",1,1,1,"34244719","TIDB" +"Institut National de la Santé et de la Recherche Médicale",2,2,2,"34531327, 31665499","T1TAdb, ReMap" +"Bordeaux University, and Agence Nationale de la Recherche",2,1,1,"34531327","T1TAdb" +"MINECO",3,3,3,"34679164, 30020414, 31680165","T-ARDIS, SKEMPI, DisGeNET" +"MIUR (Italy)",1,1,1,"22325123","YADAMP" +"University of Salerno (Salerno, Italy)",1,1,1,"22325123","YADAMP" +"National Science Council",1,1,1,"22735743","VIP DB" +"Academia Sinica",2,2,2,"22735743, 32898258","VIP DB, EXPath" +"NCCDPHP CDC HHS",1,1,1,"23175606","WholeCellKB" +"NIH-NIAID",2,2,2,"23219434, 30365026","VirmugenDB, Victors" +"Pfizer Inc",1,1,1,"24498619","CHBMP" +"CDC Foundation",1,1,1,"24498619","CHBMP" +"Epilepsy Research UK",1,1,1,"25414323","AFND" +"Suomen Kulttuurirahasto",1,1,1,"26061870","YDHS" +"INRA DGA",4,1,1,"26573482","WIDDE" +"INRA Métaprogramme ACCAF",1,1,1,"26573482","WIDDE" +"INRA AIP Bioressources",1,1,1,"26573482","WIDDE" +"Michael Paulini",1,1,1,"27899279","WormBase ParaSite" +"Natural Sciences and Engineering Research Council of Canada (NSERC)",1,1,1,"28158179","TrypsNetDB" +"Youth Innovation Promotion Association of the Chinese Academy of Sciences",2,2,2,"29351734, 30217145","EOGD, PPGD" +"Krajowy Naukowy Osrodek Wiodacy",1,1,1,"29624889","PhyMet2" +"Knowledge Innovation Program of the Chinese Academy of Sciences",1,1,1,"30217145","PPGD" +"State Secretariat for Education, Research and Innovation",2,2,2,"30239928, 30272209","UniLectin3D, Rhea" +"Shanghai Pujiang Program",1,1,1,"30365026","Victors" +"VA Research Career Scientist",1,1,1,"30365026","Victors" +"National Program on Key Basic Research Project of China",2,1,1,"30365026","Victors" +"VA Merit",1,1,1,"30365026","Victors" +"Innovation Program of Shanghai Municipal Education Commission",1,1,1,"30365026","Victors" +"Swedish Research Council of Environment, Agricultural Sciences, and Spatial Planning",1,1,1,"30371820","UNITE" +"Australian Research Council DP",1,1,1,"30395310","Vesiclepedia" +"Australian Research Council FT",1,1,1,"30395310","Vesiclepedia" +"U.S. Department of Defense",2,1,1,"30407009","VIETHERB" +"National Institutes of Health of US",2,1,1,"30462313","Cistrome DB" +"Pfizer",1,1,1,"30601939","UbiHub" +"SGC",1,1,1,"30601939","UbiHub" +"Merck KGaA",1,1,1,"30601939","UbiHub" +"Structural Genomics Consortium",1,1,1,"30601939","UbiHub" +"EU/EFPIA",1,1,1,"30601939","UbiHub" +"São Paulo Research Foundation-FAPESP",1,1,1,"30601939","UbiHub" +"The Wellcome",1,1,1,"30601939","UbiHub" +"ULTRA-DD",1,1,1,"30601939","UbiHub" +"Boehringer Ingelheim",1,1,1,"30601939","UbiHub" +"Takeda",1,1,1,"30601939","UbiHub" +"Bayer Pharma AG",1,1,1,"30601939","UbiHub" +"Janssen",1,1,1,"30601939","UbiHub" +"AbbVie",1,1,1,"30601939","UbiHub" +"Eshelman Institute for Innovation",1,1,1,"30601939","UbiHub" +"Novartis Pharma AG",1,1,1,"30601939","UbiHub" +"Ontario Ministry of Research, Innovation and Science (MRIS)",1,1,1,"30601939","UbiHub" +"European Union’s Horizon 2020",2,1,1,"31263870","ValTrendsDB" +"CEITEC 2020",1,1,1,"31263870","ValTrendsDB" +"Grant Agency of Masaryk University",1,1,1,"31263870","ValTrendsDB" +"Ministry of Education, Youth and Sports of the Czech Republic",2,2,2,"31263870, 29533231","ValTrendsDB, wwPDB" +"Tata Consultancy Services",1,1,1,"31283070","VIPdb" +"Research Flanders Post-doctoral Fellowship",1,1,1,"31504823","WALTZ-DB" +"CAPES/MCTI/CNPq",1,1,1,"31512145","ZIKAVID" +"University of Nottingham",1,1,1,"31598695","MirGeneDB" +"NASA-Ames",1,1,1,"31598695","MirGeneDB" +"Southern and Eastern Norway Regional Health Authority",2,1,1,"31598695","MirGeneDB" +"Dartmouth College",1,1,1,"31598695","MirGeneDB" +"Norwegian Research Council",3,2,2,"31598695, 31701148","MirGeneDB, JASPAR" +"Fondo Social Europeo",2,2,2,"31647096, 30418610","proGenomes2, eggNOG" +"Heidelberg Center for Human Bioinformatics",1,1,1,"31647096","proGenomes2" +"Consejería de Educación, Juventud y Deporte de la Comunidad de Madrid",1,1,1,"31647096","proGenomes2" +"ETH Zürich",1,1,1,"31647096","proGenomes2" +"Fudan University",1,1,1,"31647096","proGenomes2" +"Helmut Horten Foundation",1,1,1,"31647096","proGenomes2" +"ZHANGJIANG LAB",1,1,1,"31647096","proGenomes2" +"Biotechnology and Biosciences Research Council",4,2,2,"31696235, 30357350","MGnify, Pfam" +"Russian Fund for Basic Research",1,1,1,"31696235","MGnify" +"ELIXIR",3,3,3,"31696235, 33156333, 31722421","MGnify, InterPro, ENA" +"Simons Foundation",1,1,1,"31705629","VariCarta" +"National Board of Science and Technology of México",1,1,1,"32055857","SymGenDB" +"Ministry of Science and Technology of the People&apos;s Republic of China",1,1,1,"32221380","WeiBI" +"Hunan Provincial Natural Science Foundation of China",1,1,1,"32349124","VirusCircBase" +"CSIR",1,1,1,"32512488","TTRMDB" +"BMGF",1,1,1,"32548865","UK Immunological Toolbox" +"Core Capability Grant awarded to the Roslin Institute.",1,1,1,"32548865","UK Immunological Toolbox" +"SG RESAS Strategic Research Programme, UKRI-BBSRC/SG/BioRad",1,1,1,"32548865","UK Immunological Toolbox" +"H2020 European Institute of Innovation and Technology",1,1,1,"33045721","ViruSurf" +"Science and Technology Program of Sichuan Province",2,1,1,"33068436","tsRBase" +"National Youth Talent Support Program of China",1,1,1,"33094321","VPTMdb" +"Natural Science Fundation for Distinguished Young Scholars of Heilongjiang Province of China",1,1,1,"33095866","VARAdb" +"Alliance Campus Rhodanien",1,1,1,"33174598","UniLectin" +"Labex ARCANE",1,1,1,"33174598","UniLectin" +"ANR",1,1,1,"33174598","UniLectin" +"Key Research Program of Frontier Sciences of the Chinese Academy of Sciences",1,1,1,"33175170","2019nCoVR" +"Genomics Data Center Construction of Chinese Academy of Sciences",2,2,2,"33175170, 33170268","2019nCoVR, GVM" +"Zhangjiang special project of national innovation demonstration zone",1,1,1,"33175170","2019nCoVR" +"China Agriculture Research System",1,1,1,"33181826","WGVD" +"Natural Science Basic Research Plan in Shaanxi Province of China",1,1,1,"33181826","WGVD" +"ZonMw",1,1,1,"33211851","WikiPathways" +"Agropolis Fondation",1,1,1,"33216899","WCSdb" +"National Research Foundation in the Republic of Korea",3,1,1,"33245777","3DIV" +"Institute of Bioorganic Chemistry",1,1,1,"33367605","Virxicon" +"Guizhou Science and Technology Department",3,1,1,"33993461","TUPDB" +"Guizhou University",2,1,1,"33993461","TUPDB" +"Science Foundation Arizona",1,1,1,"22564364","PharmGKB" +"Arizona State University",1,1,1,"22564364","PharmGKB" +"Fulbright International Student Program Russia",1,1,1,"22564364","PharmGKB" +"Foundation For Polish Science",3,1,1,"26141515","mirEX" +"Australian Research Council Discovery",2,1,1,"26434508","ExoCarta" +"U.S. Medical Research and Materiel Command",1,1,1,"27650316","DBSecSys" +"Defense Threat Reduction Agency",1,1,1,"27650316","DBSecSys" +"Parkinson's UK",5,5,4,"27899622, 25378336, 27899567, 30395331, 25348405","UniProt, GOA, GOC, UniProtKB" +"Fundación Científica Asociación Española Contra el Cáncer",1,1,1,"28943872","MAHMI" +"EMBL-EBI core funding",1,1,1,"29533231","wwPDB" +"Priority Project on Infectious Disease Control and Prevention",4,1,1,"29917040","MPD" +"Spanish Ministry of Economy and Competitiveness",1,1,1,"30020414","SKEMPI" +"Interreg POCTEFA",1,1,1,"30020414","SKEMPI" +"Future Leader Fellowship",1,1,1,"30020414","SKEMPI" +"Youth Innovation Promotion Association of Chinese Academy of Science",1,1,1,"30364952","PED" +"Ghent University",1,1,1,"30371849","LNCipedia" +"National Science and Technology Major Project of China",2,1,1,"30380072","LncRNA2Target" +"European Bank",1,1,1,"30407529","BioSamples" +"Russian Foundation for Basic Research",2,2,2,"30445619, 28110602","GTRD, SitEx" +"Genomics Institute Largescale Applied Proteomics",1,1,1,"30476227","BioGRID" +"National Institutes of Health Office of Research Infrastructure",1,1,1,"30476227","BioGRID" +"Shenzhen Basic Research Program",2,1,1,"31161214","WDSPdb" +"ProCare Foundation",1,1,1,"31410491","ABCD" +"Informatization Plan of Chinese Academy of Sciences",1,1,1,"31599330","LSD" +"Ramón y Caja",1,1,1,"31608375","GSAD" +"Dirección General de Investigación Científica y Técnica",3,1,1,"31608375","GSAD" +"Warshel Institute for Computational Biology",2,2,2,"31647101, 33270889","miRTarBase, MethHC" +"Shenzhen Ganghong Group Co.",1,1,1,"31647101","miRTarBase" +"USC-Taiwan Postdoctoral Fellowship",1,1,1,"31665425","TFBSshape" +"Cisco Research Chair in Bioinformatics",1,1,1,"31665441","CARD" +"Cisco Systems",1,1,1,"31665441","CARD" +"Ontario Graduate Scholarship",1,1,1,"31665441","CARD" +"French Ministry of Higher Education and Research",1,1,1,"31665499","ReMap" +"KRIBB",1,1,1,"31680157","ChimerDB" +"Swiss Government",1,1,1,"31680159","EPD" +"Swedish Cancer Society",1,1,1,"32016318","VariBench" +"China-Hebei 100 Scholars Supporting Project to",1,1,1,"32257241","CGDB" +"United States Department of Energy",1,1,1,"32558264","PDB" +"H2020 Marie Sklodowska-Curie Actions",1,1,1,"32786900","BCE" +"Centre for Industrial Technological Development",1,1,1,"32786900","BCE" +"Fundaci?n Bot?n",1,1,1,"32786900","BCE" +"Instituto Nacional de Bioinform?tica",1,1,1,"32786900","BCE" +"Ministerio de Ciencia e Innovaci?n",1,1,1,"32786900","BCE" +"Innovative Translational Agricultural Research Program",1,1,1,"32898258","EXPath" +"Fundação de Amparo ã Pesquisa do Estado de São Paulo",2,1,1,"33021634","NDB" +"USDA Agriculture and Food Research Initiative",1,1,1,"33021634","NDB" +"Robert A. Welch Postdoctoral Fellowship",1,1,1,"33021634","NDB" +"Baden-Württemberg Foundation",1,1,1,"33051671","StreptomeDB" +"Zhengzhou Tobacco Research Institute",3,1,1,"33079992","PLncDB" +"China Association for Science and Technology",1,1,1,"33079992","PLncDB" +"National Research Foundation of Singapore",1,1,1,"33079992","PLncDB" +"NHMRC",1,1,1,"33084874","OGEE" +"The Dutch Cancer Society",2,1,1,"33084889","KLIFS" +"Cancer Center Amsterdam",1,1,1,"33084889","KLIFS" +"Brain Tumour Charity",1,1,1,"33084889","KLIFS" +"The Brain Tumour Charity",1,1,1,"33084889","KLIFS" +"PRACE call 18",1,1,1,"33104797","REDIportal" +"Elixir ITA",1,1,1,"33104797","REDIportal" +"PRACE call 15",1,1,1,"33104797","REDIportal" +"Youth science and technology innovation talent of guangdong TeZhi",1,1,1,"33175131","deepBase" +"Science and Technology New Star in ZhuJiang Guangzhou city",1,1,1,"33175131","deepBase" +"Institute Français de la Bioinformatique",1,1,1,"33206959","IntAct" +"Associazione Italiana per la Ricerca sul Cancro",1,1,1,"33206959","IntAct" +"EMBL core funding, Open Targets",1,1,1,"33206959","IntAct" +"Buchan Foundation",1,1,1,"33206959","IntAct" +"International Business Machines Corporation",1,1,1,"33206959","IntAct" +"Heilongjiang Touyan Innovation",1,1,1,"33219685","Lnc2Cancer" +"Marie Skłodowska-Curie",2,2,2,"33237313, 33237329","RepeatsDB, MobiDB" +"Italian Ministry of University and Research",2,2,2,"33237329, 33305318","MobiDB, PED" +"ANPCyT",1,1,1,"33237329","MobiDB" +"Shenzhen Ganghong Group Co., Ltd.",1,1,1,"33270889","MethHC" +"Independent Research Fund Denmark",1,1,1,"33270898","GPCRdb" +"Alfred Benzon Foundation",1,1,1,"33270898","GPCRdb" +"National Research, Development and Innovation Office, Hungary",1,1,1,"33270898","GPCRdb" +"The Francis Crick Institute",2,2,1,"33290552, 30395331","GOC" +"National Institute for Health Research University College London Hospitals Biomedical Research Centre",1,1,1,"33290552","GOC" +"Alzheimer's Research UK",1,1,1,"33290552","GOC" +"EMBL",2,2,2,"33290552, 33170273","GOC, Gramene" +"Research Council of Norway",2,2,1,"33290552, 30395331","GOC" +"Alzheimers Research UK",6,4,2,"33290552, 27899567, 30395267, 30395331","GOC, RNAcentral" +"Ensemble Effort for the Knowledge Commons",1,1,1,"33290552","GOC" +"Gene Regulation",1,1,1,"33290552","GOC" +"German Ministry of Science and Education",1,1,1,"33305318","PED" +"Vrije Universiteit Brussel",1,1,1,"33305318","PED" +"National Agency for the Promotion of Science and Technology",1,1,1,"33305318","PED" +"LBC",1,1,1,"33305318","PED" +"Shanghai Post-doctoral Excellence Program",1,1,1,"33306787","MetaADEDB" +"the national key research and development program",1,1,1,"33306800","CEG" +"Warshel Institute of Computational Biology",1,1,1,"33693667","UbiNet" +"Ministerium f?r Kultur und Wissenschaft des Landes Nordrhein-Westfalen",1,1,1,"33724838","UniProtKB" +"Bundesministerium f?r Bildung und Forschung",1,1,1,"33724838","UniProtKB" +"Division of Cancer Epidemiology and Genetics, National Cancer Institute",1,1,1,"33849445","TANTIGEN" +"Office of Dietary Supplements",1,1,1,"34366563","DSLD" +"U.S. Department of Health and Human Services",2,2,2,"34366563, 31851420","DSLD, CDD" +"NHGRI",3,2,2,"34387941, 32128557","PharmGKB, SGD" +"the National Key R&D Program of China",1,1,1,"34556150","AprGPD" +"the National Key R&D Program of China",1,1,1,"34556150","AprGPD" +"the Fundamental Research Funds for the Central Non-profit Research Institution of Chinese Academy of Forestry",1,1,1,"34556150","AprGPD" +"Projects of Youth Technology New Star of Shaanxi Province",1,1,1,"34648133","SMART" +"the Hundred Talents Program of Shaanxi Province of China",1,1,1,"34648133","SMART" +"DST, India",1,1,1,"34965192","piRNAQuest" +"supported by the",1,1,1,"34965192","piRNAQuest" +"Zhejiang Province Public Welfare Technology Application Research Project",2,2,1,"30715167, 33010178","BacWGSTdb" +"NIH NCATS Clinical and Translational Science Center for UNM",1,1,1,"33151287","DrugCentral" +"Botnar Foundation",1,1,1,"31733063","Genome3D" +"Joint Programming Initiative FOODBALL",1,1,1,"31724701","Exposome-Explorer" +"World Health Organization",1,1,1,"31724701","Exposome-Explorer" +"EXPOsOMICS FP7-KBBE-2012",1,1,1,"31724701","Exposome-Explorer" +"International Agency for Research on Cancer",1,1,1,"31724701","Exposome-Explorer" +"The British Council",1,1,1,"33270111","GENCODE" +"University of Bern",1,1,1,"33270111","GENCODE" +"UKRI Innovation Fellowship",1,1,1,"31612961","GWAS Central" +"Health Data Research UK",1,1,1,"31612961","GWAS Central" +"Krembil Foundation",2,1,1,"30407591","IID" +"National Institute of Dental and Craniofacial Research",1,1,1,"30417254","iSyTE" +"the University of the Ryukyus",1,1,1,"29668970","MitoFish" +"the Canon Foundation",1,1,1,"29668970","MitoFish" +"Netherlands Genomic Initiative (NGI)",1,1,1,"26919060","MSeqDR" +"United Mitochondrial Disease Foundation",1,1,1,"26919060","MSeqDR" +"USDA NIFA",2,1,1,"31722416","MEGARes" +"College of Veterinary Medicine and Biomedical Sciences, Texas A and M University",1,1,1,"31722416","MEGARes" +"Genome Canada and Genome British Columbia",1,1,1,"33313828","PSORTdb" +"National Sciences and Engineering Research Council of Canada",1,1,1,"33313828","PSORTdb" +"Frederick Banting and Charles Best Canada Graduate Scholarship",1,1,1,"33313828","PSORTdb" +"German Science Foundation",3,1,1,"31665479","ProteomicsDB" +"SAP",1,1,1,"31665479","ProteomicsDB" +"Grand Challenges Africa programme",1,1,1,"33952332","SANCDB" +"DELGEME - Wellcome Trust",1,1,1,"33952332","SANCDB" +"H3ABioNet - NIH",1,1,1,"33952332","SANCDB" +"Ministry of Science, ICT and Future Planning",1,1,1,"29156309","SMBP" +"National Agency for the Promotion of Science and Technology, Argentina",1,1,1,"31680154","TDR Targets" +"Argentinian Ministry of Science and Technology",1,1,1,"31680154","TDR Targets" +"GlaxoSmithKline Argentina",1,1,1,"31680154","TDR Targets" +"Indo-Argentina Bilateral Cooperation Project",1,1,1,"31680154","TDR Targets" +"Korea Health Industry Development Institute",2,1,1,"28437484","ECG-ViEW" +"Leidos",1,1,1,"31308250","cBioPortal" +"HHS | NIH | National Cancer Institute",3,1,1,"31308250","cBioPortal" +"Silicon Valley Community Foundation",2,1,1,"33221922","UCSC Genome Browser" +"UCSC Baskin Endowed Chair Funds",1,1,1,"33221922","UCSC Genome Browser" +"California Institute for Regenerative Medicine",1,1,1,"33221922","UCSC Genome Browser" +"Center for Information Technology Research in the Interest of Society",1,1,1,"33221922","UCSC Genome Browser" +"University of California Office of the President Emergency",1,1,1,"33221922","UCSC Genome Browser" +"AMED",3,1,1,"33179747","jMorp" +"National Aeronautics and Space Administration",2,2,1,"33080015, 30329036","GeneLab" +"Natural Science Foundation for Young Scholars of China",1,1,1,"33196801","NONCODE" +"National Bioscience Database Center",4,4,4,"33125081, 30462302, 33125071, 33166387","KEGG, MBGD, GlyTouCan, International Nucleotide Sequence Database Collaboration" +"International Science and Technology Center",2,2,1,"33151284, 24888447","DBAASP" +"Department of Energy Joint Genome Institute",1,1,1,"30357420","GOLD" +"Swiss Institute of Bioinformatics SERI",1,1,1,"33196836","OrthoDB" +"Israel Cancer Association",1,1,1,"31747015","ChiTaRS" +"Israel Innovation Authority",1,1,1,"31747015","ChiTaRS" +"UIC",1,1,1,"33245761","MoonProt" +"National Breast Cancer Foundation",1,1,1,"25392410","RaftProt" +"Ministry of Science and Higher Education of the Russian Federation",1,1,1,"33231677","GTRD" +"University of Lille 1",1,1,1,"29082924","Norine" +"Inria-Lille Nord Europe",1,1,1,"29082924","Norine" +"bilille plateform",1,1,1,"29082924","Norine" +"The National Institutes of Health",3,1,1,"21520341","dbNSFP" +"China 863 Program",1,1,1,"32406920","AnnoLnc" +"State Key Laboratory of Protein and Plant Gene Research",1,1,1,"32406920","AnnoLnc" +"Beijing Advanced Innovation Center for Genomics",1,1,1,"32406920","AnnoLnc" +"National Program for Support of Top-notch Young Professionals",1,1,1,"32406920","AnnoLnc" +"Det Frie Forskningsråd",1,1,1,"28280852","TANTIGEN" +"ISCIII-FEDER",3,1,1,"31680165","DisGeNET" +"ISCIII",1,1,1,"31680165","DisGeNET" +"Research Programme on Biomedical Informatics",1,1,1,"31680165","DisGeNET" +"EU H2020 Programme",1,1,1,"31680165","DisGeNET" +"IMI-JU",1,1,1,"31680165","DisGeNET" +"EU-FP7",1,1,1,"31680165","DisGeNET" +"Agència de Gestió d’Ajuts Universitaris i de Recerca Generalitat de Catalunya",1,1,1,"31680165","DisGeNET" +"Spanish National Bioinformatics Institute",1,1,1,"31680165","DisGeNET" +"MRC",2,2,2,"33180112, 27450113","Europe PMC, PDB" +"France Genomique",1,1,1,"28968784","MicroScope" +"Institut Francais De Boinformatique",1,1,1,"28968784","MicroScope" +"Spanish Ministry of Science",1,1,1,"31740968","SEVA-DB" +"ELIXIR-IIB",1,1,1,"31665520","SIGNOR" +"Italian Node of the European ELIXIR infrastructure",1,1,1,"31665520","SIGNOR" +"Postdoctoral Fellows of the Research Foundation-Flanders",1,1,1,"30485709","sORFs.org" +"national human genome research institute",2,1,1,"34698891","MGI" +"USDA-AFRI",1,1,1,"30407520","QTLdb" +"National Animal Genome Research",1,1,1,"30407520","QTLdb" +"Directorate for Biological Sciences",1,1,1,"28891124","DOCKGROUND" +"NASA",1,1,1,"30329036","GeneLab" +"GeneLab Project",1,1,1,"30329036","GeneLab" +"SLPSRA",1,1,1,"30329036","GeneLab" +"Division of Space Life and Physical Sciences Research and Applications",1,1,1,"30329036","GeneLab" +"Ames Research Center",1,1,1,"30329036","GeneLab" +"NASA’s Space Biology Program",1,1,1,"30329036","GeneLab" +"Alliance of International Science Organizations",1,1,1,"33704069","2019nCoVR" +"KC Wong Education Foundation",1,1,1,"33704069","2019nCoVR" +"Reactome database project",2,1,1,"28713666","Gramene" +"Gramene database award",1,1,1,"28713666","Gramene" +"UK-Japan Partnership",1,1,1,"30395289","PRIDE" +"Thor Industries",1,1,1,"30395289","PRIDE" +"Innovative Medicines Initiative Joint Undertaking",1,1,1,"30398643","ChEMBL" +"European Union Seventh Framework Programme",2,1,1,"30398643","ChEMBL" +"European Molecular Biology Laboratory Core Funds",1,1,1,"33125078","Pfam" +"European Union's Horizon 2020 MSCA-RISE action",1,1,1,"33125078","Pfam" +"ROIS-DS-JOINT 2019",1,1,1,"33211864","FANTOM5" +"RIKEN Center for Life Science Technology",1,1,1,"33211864","FANTOM5" +"RIKEN",1,1,1,"33211864","FANTOM5" +"Carl Zeiss Foundation",1,1,1,"33211869","Rfam" +"Department of Biotechnology , Ministry of Science and Technology",1,1,1,"27139435","NetPath" +"Qatar Foundation",1,1,1,"31290545","GXB" +"EU",2,2,2,"27450113, 33170273","PDB, Gramene" +"DOE",1,1,1,"27450113","PDB" +"Gene Regulation Ensemble Effort for the Knowledge Commons",1,1,1,"30395331","GOC" +"Parkinson's UK",1,1,1,"30395331","GOC" +"University College London",1,1,1,"30395331","GOC" +"China Medical University",1,1,1,"31701128","DriverDB" +"Ramón y Cajal Programme",1,1,1,"30418610","eggNOG" +"Hungarian National Research",1,1,1,"31680160","ELM" +"German Academic Exchange",1,1,1,"31680160","ELM" +"Argentine Ministry of Science and Technology",1,1,1,"31680160","ELM" +"National Science Foundation, Division of Biological Infrastructure",1,1,1,"30398656","InterPro" +"The Danish Cancer Society",1,1,1,"31701148","JASPAR" +"Helse Sør-Øst",1,1,1,"31701148","JASPAR" +"French National Agency for Research",2,1,1,"31701148","JASPAR" +"Norwegian Cancer Society",1,1,1,"31701148","JASPAR" +"BC Children's Hospital Foundation and Research Institute",1,1,1,"31701148","JASPAR" +"University of Oslo",1,1,1,"31701148","JASPAR" +"Swiss State Secretariat for Education, Research and Innovation",1,1,1,"31724716","neXtProt" +"Danish Council for Independent Research",1,1,1,"30476243","STRING" +"European Commission Horizon 2020",1,1,1,"33166387","International Nucleotide Sequence Database Collaboration" +"United Kingdom Biotechnology, Biological Sciences Research Council",1,1,1,"33166387","International Nucleotide Sequence Database Collaboration" +"National Institute of Environmental Health Sciences",9,3,1,"29846728, 30247620, 33068428","CTD" +"Charles University",1,1,1,"33106848","RNAcentral" +"São Paulo Research Foundation",1,1,1,"30395283","OrthoDB" +"National Science Foundation of USA",1,1,1,"30357387","ArrayExpress" +"U.S. National Library of Medicine",1,1,1,"31851420","CDD" +"DHHS",1,1,1,"33156333","InterPro" +"Academy of Finland",1,1,1,"33237311","STRING" +"United Kingdom Biotechnology and Biosciences Research Council",1,1,1,"33170273","Gramene" +"Technology of Japan",1,1,1,"30357349","DDBJ" +"National Human Genome Research Institute of the National Institutes of Health",1,1,1,"26097180","ZFIN" +"ELIXIR-EXCELERATE",1,1,1,"30395270","ENA" +"EMBRIC",1,1,1,"30395270","ENA" +"National Library of Medicine, National Institutes of Health",1,1,1,"28346087","PubChem" +"The Biological Sciences Research Council",5,1,1,"31722421","ENA" +"Biological Sciences Research Council",8,1,1,"33175160","ENA" +"UK Biosciences and Biotechnology Research Council",5,1,1,"31598706","Ensembl" +"European Union's Horizon 2020 Research and Innovation Programme",2,1,1,"31598706","Ensembl" +"National Science & Technology Support Program",2,1,1,"26744602","SorGSD" +"Sino-Africa Centre of CAS International Outreach Initiatives",1,1,1,"26744602","SorGSD" diff --git a/analysis/location_information.R b/analysis/location_information.R new file mode 100644 index 0000000..508389c --- /dev/null +++ b/analysis/location_information.R @@ -0,0 +1,194 @@ +#!/usr/bin/env Rscript + +# Author : Kenneth Schackart +# Date : 2022-12-27 +# Purpose: Create plots of inventory location metadata + +# Imports ------------------------------------------------------------------- + +## Library calls ------------------------------------------------------------ + +library(argparse) +library(dplyr) +library(ggmap) +library(ggplot2) +library(magrittr) +library(maps) +library(readr) +library(stringr) +library(tidyr) + +# Function Definitions ------------------------------------------------------ + +#' Parse command-line arguments +#' +#' @return args list with input filenames +get_args <- function() { + parser <- argparse::ArgumentParser() + + parser$add_argument( + "inventory_file", + help = "Final inventory file", + metavar = "FILE", + type = "character", + default = "data/final_inventory_2022.csv" + ) + parser$add_argument( + "-o", + "--out-dir", + help = "Output directory", + metavar = "DIR", + type = "character", + default = "analysis/figures" + ) + + args <- parser$parse_args() + + return(args) +} + +# Main ---------------------------------------------------------------------- + +print("Parsing command-line arguments.") + +args <- get_args() + +out_dir <- args$out_dir + +if (!dir.exists(out_dir)) { + dir.create(out_dir) +} + +full_inventory <- + read_csv(args$inventory_file, + show_col_types = FALSE) + +locations <- full_inventory %>% + select(extracted_url_country, + extracted_url_coordinates, + affiliation_countries) + +## URL locations ------------------------------------------------------------ + +print("Processing URL locations.") + +### Coordinates ------------------------------------------------------------- + +print("Plotting URL coordinates.") + +url_coordindates <- locations %>% + select(extracted_url_coordinates) %>% + rename(coordinates = extracted_url_coordinates) %>% + na.omit() %>% + mutate(coordinates = str_replace(coordinates, ",$", "")) %>% + mutate(coordinates = strsplit(coordinates, ", ")) %>% + unnest(coordinates) %>% + mutate(coordinates = str_replace(coordinates, "\\(", "")) %>% + mutate(coordinates = str_replace(coordinates, "\\)", "")) %>% + filter(coordinates != "") %>% + separate(coordinates, into = c("lat", "long"), sep = ",") + +url_coordinate_plot <- url_coordindates %>% + mutate_all(as.double) %>% + ggplot(aes(long, lat)) + + geom_map( + data = map_data("world"), + map = map_data("world"), + aes(long, lat, map_id = region), + color = "white", + fill = "lightgray" + ) + + geom_point( + alpha = 0.2, + color = "#1b2a50", + size = 1.5, + shape = 16 + ) + + theme_void() + +ggsave(file.path(out_dir, "ip_coordinates.png"), + url_coordinate_plot) + +### Countries --------------------------------------------------------------- + +print("Plotting URL countries.") + +url_countries <- locations %>% + select(extracted_url_country) %>% + rename(country = extracted_url_country) %>% + na.omit() %>% + mutate(country = strsplit(country, ", ")) %>% + unnest(country) %>% + group_by(country) %>% + summarize(count = n()) %>% + filter(country != "Province of China") %>% + mutate( + country = case_when( + country == "United States" ~ "USA", + country == "United Kingdom" ~ "UK", + country == "Korea" ~ "South Korea", + country == "Russian Federation" ~ "Russia", + country == "Czechia" ~ "Czech Republic", + T ~ country + ) + ) + +url_countries_joined <- + left_join(map_data("world"), url_countries, by = c("region" = "country")) + +url_country_plot <- ggplot() + + geom_polygon(data = url_countries_joined, aes( + x = long, + y = lat, + fill = count, + group = group + )) + + theme_void() + + labs(fill = "Count") + +ggsave(file.path(out_dir, "ip_countries.png"), + url_country_plot) + +## Author locations --------------------------------------------------------- + +print("Plotting author affiliation countries.") + +author_country_counts <- locations %>% + select(affiliation_countries) %>% + na.omit() %>% + mutate(affiliation_countries = strsplit(affiliation_countries, ", ")) %>% + unnest(affiliation_countries) %>% + rename(country = affiliation_countries) %>% + group_by(country) %>% + summarize(count = n()) %>% + filter(country != "Province of China") %>% + mutate( + country = case_when( + country == "United States" ~ "USA", + country == "United Kingdom" ~ "UK", + country == "Korea" ~ "South Korea", + country == "Russian Federation" ~ "Russia", + country == "Czechia" ~ "Czech Republic", + T ~ country + ) + ) + +author_countries_joined <- + left_join(map_data("world"), + author_country_counts, + by = c("region" = "country")) + +author_plot <- ggplot() + + geom_polygon(data = author_countries_joined, aes( + x = long, + y = lat, + fill = count, + group = group + )) + + theme_void() + + labs(fill = "Count") + +ggsave(file.path(out_dir, "author_countries.png"), + author_plot) + +print("Done. Location data processed successfully.") \ No newline at end of file diff --git a/analysis/metadata_analysis.R b/analysis/metadata_analysis.R new file mode 100644 index 0000000..a8637f6 --- /dev/null +++ b/analysis/metadata_analysis.R @@ -0,0 +1,100 @@ +#!/usr/bin/env Rscript + +# Author : Kenneth Schackart +# Date : 2022-12-27 +# Purpose: Perform simple analyses on final inventory metadata + +# Imports ------------------------------------------------------------------- + +## Library calls ------------------------------------------------------------ + +library(argparse) +library(dplyr) +library(magrittr) +library(readr) +library(stringr) +library(tidyr) + +# Function Definitions ------------------------------------------------------ + +#' Parse command-line arguments +#' +#' @return args list with input filenames +get_args <- function() { + parser <- argparse::ArgumentParser() + + parser$add_argument( + "inventory_file", + help = "Final inventory file", + metavar = "FILE", + type = "character", + default = "data/final_inventory_2022.csv" + ) + + args <- parser$parse_args() + + return(args) +} + +# Main ---------------------------------------------------------------------- + +print("Parsing command-line arguments.") + +args <- get_args() + +full_inventory <- + read_csv(args$inventory_file, + show_col_types = FALSE) + +## Articles ----------------------------------------------------------------- + +num_articles <- full_inventory %>% + mutate(ID = strsplit(ID, ", ")) %>% + unnest(ID) %>% + distinct(ID) %>% + count() + +print(paste("Number of unique articles: ", num_articles)) + +## URLs --------------------------------------------------------------------- + +### URL statuses ------------------------------------------------------------ + +num_resources_with_good_url <- full_inventory %>% + mutate(extracted_url_status = strsplit(extracted_url_status, ", ")) %>% + unnest(extracted_url_status) %>% + filter(str_detect(extracted_url_status, "^[23]")) %>% + distinct(ID) %>% + count() + +print( + paste( + "Number of resources with at least 1 URL returning 2XX or 3XX:", + num_resources_with_good_url + ) +) + +### WayBack URLs ------------------------------------------------------------ + +num_resources_with_wayback <- full_inventory %>% + mutate(wayback_url = strsplit(wayback_url, ", ")) %>% + unnest(wayback_url) %>% + filter(wayback_url != "no_wayback") %>% + distinct(ID) %>% + count() + +print(paste( + "Number of resources with at least 1 WayBack URL:", + num_resources_with_wayback +)) + +## Funding ------------------------------------------------------------------ + +num_with_grant_agency <- full_inventory %>% + drop_na(grant_agencies) %>% + count() + +print(paste( + "Number of resources with grant agency data:", + num_with_grant_agency +)) diff --git a/analysis/performance_metrics.R b/analysis/performance_metrics.R new file mode 100644 index 0000000..b25c4ac --- /dev/null +++ b/analysis/performance_metrics.R @@ -0,0 +1,396 @@ +#!/usr/bin/env Rscript + +# Author : Kenneth Schackart +# Date : 2022-12-27 +# Purpose: Create plots and tables of model performance metrics + +# Imports ------------------------------------------------------------------- + +## Library calls ------------------------------------------------------------ + +library(argparse) +library(dplyr) +library(forcats) +library(ggplot2) +library(gt) +library(magrittr) +library(readr) +library(tidyr) + +# Settings ------------------------------------------------------------------ + +theme_set(theme_light() + + theme( + plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5) + )) + + +# Function definitions ------------------------------------------------------ + +#' Parse command-line arguments +#' +#' @return args list with input filenames +get_args <- function() { + parser <- argparse::ArgumentParser() + + parser$add_argument( + "-cv", + "--class-train", + help = "Classification train/val stats", + metavar = "FILE", + type = "character", + default = "data/classif_metrics/combined_train_stats.csv" + ) + parser$add_argument( + "-ct", + "--class-test", + help = "Classification test stats", + metavar = "FILE", + type = "character", + default = "data/classif_metrics/combined_test_stats.csv" + ) + parser$add_argument( + "-nv", + "--ner-train", + help = "NER train/val stats", + metavar = "FILE", + type = "character", + default = "data/ner_metrics/combined_train_stats.csv" + ) + parser$add_argument( + "-nt", + "--ner-test", + help = "NER test stats", + metavar = "FILE", + type = "character", + default = "data/ner_metrics/combined_test_stats.csv" + ) + parser$add_argument( + "-o", + "--out-dir", + help = "Output directory", + metavar = "DIR", + type = "character", + default = "analysis/figures" + ) + + args <- parser$parse_args() + + return(args) +} + +#' Pivot metrics to longer +#' +#' @param df Wide-formatted dataframe of performance metrics +#' @return Input dataframe pivoted longer +pivot_metrics <- function(df) { + df %>% + pivot_longer(c(contains("train"), contains("val")), + names_to = "metric", + values_to = "value") %>% + separate(metric, c("dataset", "metric"), "_") %>% + pivot_wider(names_from = "metric", values_from = "value") %>% + mutate(dataset = case_when(dataset == "val" ~ "Validation", + dataset == "train" ~ "Train")) +} + +#' Add a new column with simplified model names based on HF model names +#' +#' @param df Dataframe with model_name column of HF model names +#' @return Same dataframe with new model column +relabel_models <- function(df) { + df %>% + mutate( + model = case_when( + model_name == "bert-base-uncased" ~ "BERT", + model_name == "dmis-lab/biobert-v1.1" ~ "BioBERT", + model_name == "kamalkraj/bioelectra-base-discriminator-pubmed" ~ "BioELECTRA", + model_name == "kamalkraj/bioelectra-base-discriminator-pubmed-pmc" ~ "BioELECTRA-PMC", + model_name == "allenai/biomed_roberta_base" ~ "BioMed-RoBERTa", + model_name == "allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169" ~ "BioMed-RoBERTa-CP", + model_name == "allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500" ~ "BioMed-RoBERTa-RCT", + model_name == "bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12" ~ "BlueBERT", + model_name == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12" ~ "BlueBERT-MIMIC-III", + model_name == "giacomomiolo/electramed_base_scivocab_1M" ~ "ELECTRAMed", + model_name == "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract" ~ "PubMedBERT", + model_name == "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext" ~ "PubMedBERT-Full", + model_name == "cambridgeltl/SapBERT-from-PubMedBERT-fulltext" ~ "SapBERT", + model_name == "cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token" ~ "SapBERT-Mean", + model_name == "allenai/scibert_scivocab_uncased" ~ "SciBERT" + ) + ) +} + +# Main ---------------------------------------------------------------------- + +print("Parsing command-line arguments.") + +args <- get_args() + +raw_classif_train_stats <- + read_csv(args$class_train, + show_col_types = FALSE) + +raw_classif_test_stats <- + read_csv(args$class_test, + show_col_types = FALSE) + +raw_ner_train_stats <- + read_csv(args$ner_train, + show_col_types = FALSE) + +raw_ner_test_stats <- + read_csv(args$ner_test, + show_col_types = FALSE) + +out_dir <- args$out_dir + +if (!dir.exists(out_dir)) { + dir.create(out_dir) +} + +## Plots -------------------------------------------------------------------- + +print("Generating plots.") + +### Classification ---------------------------------------------------------- + +print("Plotting classification validation metrics.") + +classif_train_stats <- raw_classif_train_stats %>% + pivot_metrics() %>% + relabel_models() + +tidy_class_train_stats <- classif_train_stats %>% + filter(dataset == "Validation") %>% + group_by(model) %>% + slice(which.max(precision)) %>% + ungroup() %>% + rename("Precision" = "precision", + "Recall" = "recall", + "F1-score" = "f1") %>% + mutate(model = fct_reorder(model, Precision, .desc = TRUE)) %>% + pivot_longer( + names_to = "metric", + values_to = "value", + cols = c(Precision, Recall, "F1-score", loss), + ) %>% + mutate(metric = factor(metric, levels = c("Recall", "Precision", "F1-score"))) %>% + filter(metric != "loss") + +class_val_plot <- tidy_class_train_stats %>% + ggplot(aes(y = metric, x = value)) + + facet_wrap( ~ model, ncol = 3,) + + geom_col(position = "dodge", + alpha = 0.8, + fill = "#29477e") + + labs(x = "", y = "") + + scale_x_continuous(breaks = seq(0, 1, by = 0.2)) + + theme(strip.background = element_rect(fill = "#454545"), + axis.text.y = element_blank()) + +ggsave( + file.path(out_dir, "class_val_set_performances.svg"), + class_val_plot, + width = 5, + height = 6 +) +ggsave( + file.path(out_dir, "class_val_set_performances.png"), + class_val_plot, + width = 5, + height = 6 +) + +### NER --------------------------------------------------------------------- + +print("Plotting NER validation metrics.") + +ner_train_stats <- raw_ner_train_stats %>% + pivot_metrics() %>% + relabel_models() + +tidy_ner_train_stats <- ner_train_stats %>% + filter(dataset == "Validation") %>% + group_by(model) %>% + slice(which.max(f1)) %>% + ungroup() %>% + rename("Precision" = "precision", + "Recall" = "recall", + "F1-score" = "f1") %>% + mutate(model = fct_reorder(model, .[["F1-score"]], .desc = TRUE)) %>% + pivot_longer( + names_to = "metric", + values_to = "value", + cols = c(Precision, Recall, "F1-score", loss), + ) %>% + mutate(metric = factor(metric, levels = c("Recall", "Precision", "F1-score"))) %>% + filter(metric != "loss") + +ner_val_plot <- tidy_ner_train_stats %>% + ggplot(aes(y = metric, x = value)) + + facet_wrap( ~ model, ncol = 3,) + + geom_col(position = "dodge", + alpha = 0.8, + fill = "#29477e") + + labs(x = "", y = "") + + theme(strip.text = element_text(color = "#1a1a1a"), + axis.text.y = element_blank()) + +ggsave( + file.path(out_dir, "ner_val_set_performances.svg"), + ner_val_plot, + width = 5, + height = 6 +) +ggsave( + file.path(out_dir, "ner_val_set_performances.png"), + ner_val_plot, + width = 5, + height = 6 +) + +## Tables ------------------------------------------------------------------- + +print("Generating metrics tables.") + +### Classification ---------------------------------------------------------- + +print("Generating classification metrics table.") + +classif_test_stats <- raw_classif_test_stats %>% + rename( + "model_name" = "model", + "Precision" = "precision", + "Recall" = "recall", + "F1-score" = "f1" + ) %>% + relabel_models() %>% + select(-model_name) %>% + pivot_longer( + names_to = "metric", + values_to = "value", + cols = c(Precision, Recall, "F1-score", loss), + ) %>% + mutate(metric = factor(metric, levels = c("Recall", "Precision", "F1-score"))) + +combined_class_table <- classif_test_stats %>% + na.omit() %>% + mutate(value = signif(value, 3)) %>% + pivot_wider(names_from = "metric", values_from = "value") %>% + rename(test_precision = Precision, + test_recall = Recall, + test_f1 = "F1-score") %>% + left_join( + tidy_class_train_stats %>% + select(-model_name,-dataset,-epoch) %>% + mutate(value = signif(value, 3)) %>% + pivot_wider(names_from = "metric", values_from = "value") %>% + rename( + val_precision = Precision, + val_recall = Recall, + val_f1 = "F1-score" + ), + by = "model" + ) %>% + mutate(model = as.character(model)) %>% + ungroup() %>% + arrange(desc(val_precision)) %>% + gt(rowname_col = "model") %>% + tab_header(title = "Classification model performance on validation and test sets") %>% + cols_move_to_start(columns = c( + val_f1, + val_precision, + val_recall, + test_f1, + test_precision, + test_recall + )) %>% + tab_spanner(label = "Validation Set", + columns = c(val_f1, val_precision, val_recall)) %>% + tab_spanner(label = "Test Set", + columns = c(test_f1, test_precision, test_recall)) %>% + cols_label( + val_f1 = "F1-score", + val_precision = "Precision", + val_recall = "Recall", + test_f1 = "F1-score", + test_precision = "Precision", + test_recall = "Recall" + ) + + +gtsave(combined_class_table, + file.path(out_dir, "combined_classification_table.docx")) + +### NER --------------------------------------------------------------------- + +print("Generating NER metrics table.") + +ner_test_stats <- raw_ner_test_stats %>% + rename( + "model_name" = "model", + "Precision" = "precision", + "Recall" = "recall", + "F1-score" = "f1" + ) %>% + relabel_models() %>% + select(-model_name) %>% + pivot_longer( + names_to = "metric", + values_to = "value", + cols = c(Precision, Recall, "F1-score", loss), + ) %>% + filter(metric != "loss") %>% + mutate(metric = factor(metric, levels = c("Recall", "Precision", "F1-score"))) + +combined_ner_table <- ner_test_stats %>% + na.omit() %>% + mutate(value = signif(value, 3)) %>% + pivot_wider(names_from = "metric", values_from = "value") %>% + rename(test_precision = Precision, + test_recall = Recall, + test_f1 = "F1-score") %>% + left_join( + tidy_ner_train_stats %>% + select(-model_name, -dataset, -epoch) %>% + mutate(value = signif(value, 3)) %>% + pivot_wider(names_from = "metric", values_from = "value") %>% + rename( + val_precision = Precision, + val_recall = Recall, + val_f1 = "F1-score" + ), + by = "model" + ) %>% + mutate(model = as.character(model)) %>% + ungroup() %>% + arrange(desc(val_f1)) %>% + gt(rowname_col = "model") %>% + tab_header(title = "NER model performance on validation and test sets") %>% + cols_move_to_start(columns = c( + val_f1, + val_precision, + val_recall, + test_f1, + test_precision, + test_recall + )) %>% + tab_spanner(label = "Validation Set", + columns = c(val_f1, val_precision, val_recall)) %>% + tab_spanner(label = "Test Set", + columns = c(test_f1, test_precision, test_recall)) %>% + cols_label( + val_f1 = "F1-score", + val_precision = "Precision", + val_recall = "Recall", + test_f1 = "F1-score", + test_precision = "Precision", + test_recall = "Recall" + ) + +gtsave(combined_ner_table, + file.path(out_dir, "combined_ner_table.docx")) + +print("Done. Analysis completed successfully.") diff --git a/config/.pylintrc b/config/.pylintrc new file mode 100644 index 0000000..1f61591 --- /dev/null +++ b/config/.pylintrc @@ -0,0 +1,613 @@ +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Specify a score threshold to be exceeded before program exits with error. +fail-under=10.0 + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=print-statement, + parameter-unpacking, + unpacking-in-except, + old-raise-syntax, + backtick, + long-suffix, + old-ne-operator, + old-octal-literal, + import-star-module-level, + non-ascii-bytes-literal, + raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + apply-builtin, + basestring-builtin, + buffer-builtin, + cmp-builtin, + coerce-builtin, + execfile-builtin, + file-builtin, + long-builtin, + raw_input-builtin, + reduce-builtin, + standarderror-builtin, + unicode-builtin, + xrange-builtin, + coerce-method, + delslice-method, + getslice-method, + setslice-method, + no-absolute-import, + old-division, + dict-iter-method, + dict-view-method, + next-method-called, + metaclass-assignment, + indexing-exception, + raising-string, + reload-builtin, + oct-method, + hex-method, + nonzero-method, + cmp-method, + input-builtin, + round-builtin, + intern-builtin, + unichr-builtin, + map-builtin-not-iterating, + zip-builtin-not-iterating, + range-builtin-not-iterating, + filter-builtin-not-iterating, + using-cmp-argument, + eq-without-hash, + div-method, + idiv-method, + rdiv-method, + exception-message-attribute, + invalid-str-codec, + sys-max-int, + bad-python3-import, + deprecated-string-function, + deprecated-str-translate-call, + deprecated-itertools-function, + deprecated-types-field, + next-method-defined, + dict-items-not-iterating, + dict-keys-not-iterating, + dict-values-not-iterating, + deprecated-operator-function, + deprecated-urllib-function, + xreadlines-attribute, + deprecated-sys-function, + exception-escape, + comprehension-escape, + invalid-name, + duplicate-code + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'error', 'warning', 'refactor', and 'convention' +# which contain the number of messages in each category, as well as 'statement' +# which is the total number of statements analyzed. This score is used by the +# global evaluation report (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +#notes-rgx= + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear and the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +#variable-rgx= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members=torch.* + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules=optparse,tkinter.tix + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "BaseException, Exception". +overgeneral-exceptions=BaseException, + Exception diff --git a/config/README.md b/config/README.md new file mode 100644 index 0000000..abadfbd --- /dev/null +++ b/config/README.md @@ -0,0 +1,55 @@ +# Project Configuration Files + +This directory contains configuration files for several aspects of this project. + +```sh +. +├── .pylintrc # Configurations for pylint +├── environment.yml # Conda environment description +├── fairsharing_login.json # Login info for FAIRsharing +├── models_info.tsv # Model training parameters +├── query.txt # EuropePMC query string +├── README.md +├── train_predict.yml # Configs for reproducing results +└── update_inventory.yml # Configs for updating inventory +``` + +# File Descriptions + +## `.pylintrc` + +Since the test suite includes linting of all Python files with pylint, this configuration file informs pylint about what rules to follow during linting. This helps ensure a consistent testing environment across machines. + +## `environment.yml` + +This YAML file can be used to directly create a conda environment with all of the dependencies of this project. + +## `fairsharing_login.json` + +FAIRsharing requires login credentials for using their API. Before running the reproduction pipeline, you must create and account at FAIRsharing, and enter your email and password into this JSON file. *This is not necessary for updating the intventory*. + +## `models_info.tsv` + +This tab-separated file contains the configurations used during model training. The columns of this file are as follows: + +| model | hf_name | batch_size | learning_rate | weight_decay | scheduler +| :-: | :-: | :-: | :-: | :-: | :-: | +unique, shortened model name used for convenience | pretrained model name as it appears in HuggingFace Hub | number of training examples used by one processor in one training step | step size at each iteration while moving toward a minimum of a loss function | weight decay (L2 penalty) | optional learning rate scheduler flag ( `-lr` or empty) + +New rows can be added to this file, and the training pipeline re-run to evaluate the new models against the others. Multiple rows for a given file can also be added to compare performance of training with certain parameters, but the model column should remain unique. + +More information about these parameters can be found on [Hugging Face 🤗](https://huggingface.co/docs/transformers/main_classes/optimizer_schedules) + +## `query.txt` + +This text file contains a single string, which is the search query sent to EuropePMC. The publication date range should contain placeholders `{0}` and `{1}`, for the from- and to-dates respectively. If placeholders are not used, the date arguments of `src/query_epmc.py` are ignored. + +## `train_predict.yml` + +This YAML file contains the majority of the configurations used in the Snakemake pipelines, such as directories and model training configurations. These are the configurations used for reproducing the original results. + +Toward the end of this file, you can chose which output files are used for data analysis. Currently it is set to use the newly generaly output files. However, you can uncomment the file names that are stored in the repository to reproduce the figures/analyses exactly in case something changes over time, sucha s the metadata retrieved from EuropePMC. + +## `update_inventory.yml` + +This YAML file contains the configurations used when updating the inventory. It is mostly just directory specifications. \ No newline at end of file diff --git a/config/environment.yml b/config/environment.yml new file mode 100644 index 0000000..2522ef1 --- /dev/null +++ b/config/environment.yml @@ -0,0 +1,223 @@ +channels: + - conda-forge +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=1_gnu + - _r-mutex=1.0.1=anacondar_1 + - asttokens=2.0.5=pyhd8ed1ab_0 + - backcall=0.2.0=pyh9f0ad1d_0 + - backports=1.0=py_2 + - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0 + - binutils_impl_linux-64=2.40=hf600244_0 + - bwidget=1.9.14=ha770c72_1 + - bzip2=1.0.8=h7f98852_4 + - c-ares=1.18.1=h7f98852_0 + - ca-certificates=2022.12.7=ha878542_0 + - cairo=1.16.0=ha61ee94_1014 + - curl=7.88.1=hdc1c0ab_0 + - debugpy=1.6.0=py38hfa26641_0 + - decorator=5.1.1=pyhd8ed1ab_0 + - entrypoints=0.4=pyhd8ed1ab_0 + - executing=0.8.3=pyhd8ed1ab_0 + - expat=2.5.0=h27087fc_0 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=hab24e00_0 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - freetype=2.12.1=hca18f0e_1 + - fribidi=1.0.10=h36c2ea0_0 + - gcc_impl_linux-64=12.2.0=hcc96c02_19 + - gettext=0.21.1=h27087fc_0 + - gfortran_impl_linux-64=12.2.0=h55be85b_19 + - graphite2=1.3.13=h58526e2_1001 + - gsl=2.7=he838d99_0 + - gxx_impl_linux-64=12.2.0=hcc96c02_19 + - harfbuzz=6.0.0=h8e241bc_0 + - icu=70.1=h27087fc_0 + - ipykernel=6.13.0=py38h7f3c49e_0 + - ipython=8.3.0=py38h578d9bd_0 + - jedi=0.18.1=py38h578d9bd_1 + - jpeg=9e=h0b41bf4_3 + - jupyter_client=7.3.0=pyhd8ed1ab_0 + - jupyter_core=4.9.2=py38h578d9bd_0 + - kernel-headers_linux-64=2.6.32=he073ed8_15 + - keyutils=1.6.1=h166bdaf_0 + - krb5=1.20.1=h81ceb04_0 + - ld_impl_linux-64=2.40=h41732ed_0 + - lerc=4.0.0=h27087fc_0 + - libblas=3.9.0=16_linux64_openblas + - libcblas=3.9.0=16_linux64_openblas + - libcurl=7.88.1=hdc1c0ab_0 + - libdeflate=1.17=h0b41bf4_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=h516909a_1 + - libffi=3.4.2=h7f98852_5 + - libgcc-devel_linux-64=12.2.0=h3b97bd3_19 + - libgcc-ng=12.2.0=h65d4601_19 + - libgfortran-ng=12.2.0=h69a702a_19 + - libgfortran5=12.2.0=h337968e_19 + - libglib=2.74.1=h606061b_1 + - libgomp=12.2.0=h65d4601_19 + - libiconv=1.17=h166bdaf_0 + - liblapack=3.9.0=16_linux64_openblas + - libnghttp2=1.51.0=hff17c54_0 + - libnsl=2.0.0=h7f98852_0 + - libopenblas=0.3.21=pthreads_h78a6416_3 + - libpng=1.6.39=h753d276_0 + - libsanitizer=12.2.0=h46fd767_19 + - libsodium=1.0.18=h36c2ea0_1 + - libssh2=1.10.0=hf14f497_3 + - libstdcxx-devel_linux-64=12.2.0=h3b97bd3_19 + - libstdcxx-ng=12.2.0=h46fd767_19 + - libtiff=4.5.0=h6adf6a1_2 + - libuuid=2.32.1=h7f98852_1000 + - libwebp-base=1.2.4=h166bdaf_0 + - libxcb=1.13=h7f98852_1004 + - libxml2=2.10.3=h7463322_0 + - libzlib=1.2.13=h166bdaf_4 + - make=4.3=hd18ef5c_1 + - matplotlib-inline=0.1.3=pyhd8ed1ab_0 + - ncurses=6.3=h9c3ff4c_0 + - nest-asyncio=1.5.5=pyhd8ed1ab_0 + - openssl=3.0.8=h0b41bf4_0 + - packaging=21.3=pyhd8ed1ab_0 + - pango=1.50.13=hd33c08f_0 + - parso=0.8.3=pyhd8ed1ab_0 + - pcre2=10.40=hc3806b6_0 + - pexpect=4.8.0=pyh9f0ad1d_2 + - pickleshare=0.7.5=py_1003 + - pip=22.2.2=pyhd8ed1ab_0 + - pixman=0.40.0=h36c2ea0_0 + - prompt-toolkit=3.0.29=pyha770c72_0 + - psutil=5.9.0=py38h0a891b7_1 + - pthread-stubs=0.4=h36c2ea0_1001 + - ptyprocess=0.7.0=pyhd3deb0d_0 + - pure_eval=0.2.2=pyhd8ed1ab_0 + - pygments=2.12.0=pyhd8ed1ab_0 + - pyparsing=3.0.8=pyhd8ed1ab_0 + - python=3.8.12=h0744224_3_cpython + - python-dateutil=2.8.2=pyhd8ed1ab_0 + - python_abi=3.8=2_cp38 + - pyzmq=22.3.0=py38hfc09fa9_2 + - r=4.2=r42hd8ed1ab_1006 + - r-base=4.2.2=ha7d60f8_3 + - r-boot=1.3_28.1=r42hc72bb7e_0 + - r-class=7.3_21=r42h133d619_0 + - r-cluster=2.1.4=r42h8da6f51_0 + - r-codetools=0.2_19=r42hc72bb7e_0 + - r-foreign=0.8_84=r42h133d619_0 + - r-kernsmooth=2.23_20=r42hd009a43_1 + - r-lattice=0.20_45=r42h06615bd_1 + - r-mass=7.3_58.2=r42h133d619_0 + - r-matrix=1.5_3=r42h5f7b363_0 + - r-mgcv=1.8_41=r42h5f7b363_0 + - r-nlme=3.1_162=r42hac0b197_0 + - r-nnet=7.3_18=r42h06615bd_1 + - r-recommended=4.2=r42hd8ed1ab_1005 + - r-rpart=4.1.19=r42h06615bd_0 + - r-spatial=7.3_16=r42h133d619_0 + - r-survival=3.5_3=r42h133d619_0 + - readline=8.1.2=h0f457ee_0 + - sed=4.8=he412f7d_0 + - setuptools=60.9.3=py38h578d9bd_0 + - six=1.16.0=pyh6c4a22f_0 + - sqlite=3.37.0=h9cd32fc_0 + - stack_data=0.2.0=pyhd8ed1ab_0 + - sysroot_linux-64=2.12=he073ed8_15 + - tk=8.6.12=h27826a3_0 + - tktable=2.10=hb7b940f_3 + - tornado=6.1=py38h0a891b7_3 + - traitlets=5.1.1=pyhd8ed1ab_0 + - tzdata=2021e=he74cb21_0 + - wcwidth=0.2.5=pyh9f0ad1d_2 + - wheel=0.37.1=pyhd8ed1ab_0 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.0.10=h7f98852_0 + - xorg-libsm=1.2.3=hd9c2040_1000 + - xorg-libx11=1.7.2=h7f98852_0 + - xorg-libxau=1.0.9=h7f98852_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxrender=0.9.10=h7f98852_1003 + - xorg-libxt=1.2.1=h7f98852_2 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h7f98852_1007 + - xz=5.2.6=h166bdaf_0 + - zeromq=4.3.4=h9c3ff4c_1 + - zlib=1.2.13=h166bdaf_4 + - zstd=1.5.2=h3eb15da_6 + - pip: + - aiohttp==3.8.1 + - aiosignal==1.2.0 + - anyio==3.6.2 + - appdirs==1.4.4 + - async-timeout==4.0.2 + - beautifulsoup4==4.11.1 + - black==22.1.0 + - certifi==2021.10.8 + - charset-normalizer==2.0.12 + - citepy==0.5.0 + - configargparse==1.5.3 + - connection-pool==0.0.3 + - datasets==1.18.3 + - datrie==0.8.2 + - dill==0.3.4 + - frozenlist==1.3.0 + - fsspec==2022.2.0 + - gitdb==4.0.9 + - gitpython==3.1.27 + - h11==0.14.0 + - httpcore==0.16.1 + - httpx==0.23.1 + - huggingface-hub==0.4.0 + - idna==3.3 + - importlib-metadata==1.7.0 + - importlib-resources==5.4.0 + - jsonschema==4.4.0 + - kaleido==0.2.1 + - lxml==4.9.0 + - multidict==6.0.2 + - multiprocess==0.70.12.2 + - nbformat==5.2.0 + - nltk==3.6.1 + - numpy==1.19.2 + - pandas==1.4.1 + - pathspec==0.9.0 + - plac==1.3.4 + - platformdirs==2.5.1 + - plotly==5.1.0 + - pulp==2.6.0 + - pyarrow==7.0.0 + - pycountry==22.3.5 + - pyrsistent==0.18.1 + - pyyaml==6.0 + - ratelimiter==1.2.0.post0 + - reindent==3.5.1 + - requests==2.27.1 + - retry==0.9.2 + - rfc3986==1.5.0 + - scikit-learn==0.24.1 + - smart-open==5.2.1 + - smmap==5.0.0 + - snakefmt==0.6.0 + - snakemake==7.1.1 + - sniffio==1.3.0 + - soupsieve==2.3.2.post1 + - stopit==1.1.2 + - tabulate==0.8.9 + - tenacity==8.0.1 + - tokenizers==0.11.6 + - tomli==2.0.1 + - toposort==1.7 + - torch==1.9.0 + - tqdm==4.63.0 + - transformers==4.16.2 + - urllib3==1.26.8 + - xxhash==3.0.0 + - yarl==1.7.2 + - yte==1.2.0 + - zipp==3.7.0 diff --git a/config/fairsharing_login.json b/config/fairsharing_login.json new file mode 100644 index 0000000..2d4bc47 --- /dev/null +++ b/config/fairsharing_login.json @@ -0,0 +1,6 @@ +{ + "user": { + "login": "example_email@gmail.com", + "password": "example_password123!" + } +} \ No newline at end of file diff --git a/config/models_info.tsv b/config/models_info.tsv new file mode 100644 index 0000000..b099d47 --- /dev/null +++ b/config/models_info.tsv @@ -0,0 +1,16 @@ +model hf_name batch_size learning_rate weight_decay scheduler +bert bert-base-uncased 16 3e-5 0 +biobert dmis-lab/biobert-v1.1 16 3e-5 0 +bioelectra kamalkraj/bioelectra-base-discriminator-pubmed 16 5e-5 0 -lr +bioelectra_pmc kamalkraj/bioelectra-base-discriminator-pubmed-pmc 32 5e-5 0 -lr +biomed_roberta allenai/biomed_roberta_base 16 2e-5 0 +biomed_roberta_chemprot allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 16 2e-5 0 +biomed_roberta_rct500 allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 16 2e-5 0 +bluebert bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 16 3e-5 0 -lr +bluebert_mimic3 bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 32 3e-5 0 +electramed giacomomiolo/electramed_base_scivocab_1M 16 5e-5 0 -lr +sapbert cambridgeltl/SapBERT-from-PubMedBERT-fulltext 16 2e-5 0.01 +sapbert_mean_token cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token 32 2e-5 0.01 +scibert allenai/scibert_scivocab_uncased 16 3e-5 0 +pubmedbert microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract 16 3e-5 0 -lr +pubmedbert_fulltext microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext 32 3e-5 0 -lr diff --git a/config/query.txt b/config/query.txt new file mode 100644 index 0000000..6c3cfef --- /dev/null +++ b/config/query.txt @@ -0,0 +1 @@ +(ABSTRACT:(www OR http*) AND ABSTRACT:(data OR resource OR database*)) NOT (TITLE:(retract* OR withdraw* OR erratum)) NOT (ABSTRACT:(retract* OR withdraw* OR erratum OR github.* OR cran.r OR youtube.com OR bitbucket.org OR links.lww.com OR osf.io OR bioconductor.org OR annualreviews.org OR creativecommons.org OR sourceforge.net OR bit.ly OR zenodo OR onlinelibrary.wiley.com OR proteomecentral.proteomexchange.org/dataset OR oxfordjournals.org/nar/database OR figshare OR mendeley OR .pdf OR "clinical trial" OR registration OR "trial registration" OR clinicaltrial OR "registration number" OR pre-registration OR preregistration)) AND (SRC:(MED OR PMC OR AGR OR CBA)) AND (FIRST_PDATE:[{0} TO {1}]) \ No newline at end of file diff --git a/config/train_predict.yml b/config/train_predict.yml new file mode 100644 index 0000000..db1b360 --- /dev/null +++ b/config/train_predict.yml @@ -0,0 +1,116 @@ +# Environments +project_env: './env' + +# Directories + +## Querying +query_out_dir: 'out/original_query' +last_date_dir: 'out/last_query_date' + +## Classification +classif_splits_dir: 'data/classif_splits' +classif_train_outdir: 'out/classif_train_out' +classif_benchmark_dir: 'out/benchmarks/classif' +classif_log_dir: 'out/logs/classif' +classif_out_dir: 'out/original_query/classification' + +## NER +ner_splits_dir: 'data/ner_splits' +ner_train_outdir: 'out/ner_train_out' +ner_benchmark_dir: 'out/benchmarks/ner' +ner_log_dir: 'out/logs/ner' +ner_out_dir: 'out/original_query/ner' + +## URL Extraction +extract_url_dir: 'out/original_query/url_extraction' + +## Name processing +processed_names_dir: 'out/original_query/processed_names' + +## Initial deduplication +initial_dedupe_dir: 'out/original_query/initial_deduplication' + +## For manual review +for_manual_review_dir: 'out/original_query/for_manual_review' + +## Manually reviewed +manually_reviewed_dir: 'out/original_query/manually_reviewed' + +## Processed manual review +processed_manual_review: 'out/original_query/processed_manual_review' + +## URL Checking +check_url_dir: 'out/original_query/url_checking' + +## Additional metadata from EuropePMC +epmc_meta_dir: 'out/original_query/epmc_meta' + +## Processed country codes +processed_countries: 'out/original_query/processed_countries' + +## Data analysis +analysis_dir: 'analysis/' +figures_dir: 'analysis/figures' + +# Input files +classif_data: 'data/manual_classifications.csv' +ner_data: 'data/manual_ner_extraction.csv' + +# File with configuration settings for the models +models: 'config/models_info.tsv' + +# Dates used for initial query +initial_query_start: 2011 +initial_query_end: 2021 +query_string: 'config/query.txt' + +# Ratios used for data splitting +split_ratios: '0.7 0.15 0.15' + +# Metrics used for choosing best model/epoch +class_criteria_metric: 'precision' +ner_criteria_metric: 'f1' + +# Number of epochs +classif_epochs: 10 +ner_epochs: 10 + +# Filtering parameters +max_urls: 2 +min_best_name_prob: 0.978 + +# URL checking +chunk_size: 200 +num_tries: 3 +backoff: 0.5 + +# Getting metadata from EuropePMC +epmc_chunk_size: 20 + +# Processng country names +country_format: 'full' + +# Input files for data analysis +## Data analysis can either be run on the newly generated output files +## or on the files stored in the repository. Comment/uncomment below +## to choose which files to use. + +## Newly generated files +classification_train_stats: 'out/classif_train_out/combined_train_stats/combined_stats.csv' +classification_test_stats: 'out/classif_train_out/combined_test_stats/combined_stats.csv' +ner_train_stats: 'out/ner_train_out/combined_train_stats/combined_stats.csv' +ner_test_stats: 'out/ner_train_out/combined_test_stats/combined_stats.csv' +final_inventory_file: 'out/original_query/processed_countries/predictions.csv' + +## Stored files +# classification_train_stats: 'data/classif_metrics/combined_train_stats.csv' +# classification_test_stats: 'data/classif_metrics/combined_test_stats.csv' +# ner_train_stats: 'data/ner_metrics/combined_train_stats.csv' +# ner_test_stats: 'data/ner_metrics/combined_test_stats.csv' +# final_inventory_file: 'data/final_inventory_2022.csv' + +# Credentials file for FAIRsharing +fair_login_file: "config/fairsharing_login.json" + +# Manually curated funding agency countries +curated_funders: "analysis/funders_geo_200.csv" diff --git a/config/update_inventory.yml b/config/update_inventory.yml new file mode 100644 index 0000000..201eab5 --- /dev/null +++ b/config/update_inventory.yml @@ -0,0 +1,68 @@ +# Environments +project_env: './env' + +# Directories + +## Querying +query_out_dir: 'out/new_query' +last_date_dir: 'out/last_query_date' + +## Classification +classif_train_outdir: 'out/classif_train_out' +classif_out_dir: 'out/new_query/classification' + +## NER +ner_train_outdir: 'out/ner_train_out' +ner_out_dir: 'out/new_query/ner' + +## URL Extraction +extract_url_dir: 'out/new_query/url_extraction' + +## Name processing +processed_names_dir: 'out/new_query/processed_names' + +## Initial deduplication +initial_dedupe_dir: 'out/new_query/initial_deduplication' + +## For manual review +for_manual_review_dir: 'out/new_query/for_manual_review' + +## Manually reviewed +manually_reviewed_dir: 'out/new_query/manually_reviewed' + +## Processed manual review +processed_manual_review: 'out/new_query/processed_manual_review' + +## URL Checking +check_url_dir: 'out/new_query/url_checking' + +## Additional metadata from EuropePMC +epmc_meta_dir: 'out/new_query/epmc_meta' + +## Processed country codes +processed_countries: 'out/new_query/processed_countries' + +# Parameters + +## Europe PMC query +query_from_date: 2022 +query_to_date: 2022 +query_string: 'config/query.txt' + +# Previous inventory to be merged +previous_inventory: 'data/final_inventory_2022.csv' + +## Filtering and marking for manual review +max_urls: 2 +min_best_name_prob: 0.978 + +## URL checking +chunk_size: 200 +num_tries: 3 +backoff: 0.5 + +## Getting metadata from EuropePMC +epmc_chunk_size: 20 + +## Processng country names +country_format: 'full' diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..ba52a96 --- /dev/null +++ b/data/README.md @@ -0,0 +1,120 @@ +# Overview + +This directory contains the data used for model training, testing, and validation as well as several final output files. + +``` +. +├── classif_metrics/ # Article classification model performance metrics +| ├── combined_train_stats.csv # Performance on training and validation sets +| └── combined_train_stats.csv # Performance on witheld test set +├── ner_metrics/ # NER model performance metrics +| ├── combined_train_stats.csv # Performance on training and validation sets +| └── combined_train_stats.csv # Performance on witheld test set +├── epmc_query_results_2022.csv # EuropePMC query return used in 2022 inventory +├── final_inventory_2022.csv # Final inventorry generated in 2022 +├── manual_classifications.csv # Manual article classifications +├── manual_ner_extraction.csv # Manual NER extraction +└── manually_reviewed_inventory.csv # Selectively manually reviewed inventory +``` +## `*/combined_train_stats.csv` + +The two files `classif_metrics/combined_train_stats.csv` and `ner_metrics/combined_train_stats.csv` have the same columns. They contain obtained during fine-tuning of the article classification and NER models, respectively. + +### `classif_metrics/combined_train_stats.csv` + +Metrics of each model were output by [src/class_train.py](../src/class_train.py), and combined by [src/combine_stats.py](../src/combine_stats.py) to generate this file. + +### `ner_metrics/combined_train_stats.csv` + +Metrics of each model were output by [src/ner_train.py](../src/ner_train.py), and combined by [src/combine_stats.py](../src/combine_stats.py) to generate this file. + +### Columns of both files: + +* **epoch**: Training epoch, beginning at 0 and going to 9 (10 epochs) +* **train_precision**: Precision on training set +* **train_recall**: Recall on training set +* **train_f1**: *F*1-score on training set +* **train_loss**: Loss on training set +* **val_precision**: Precision on validation set +* **val_recall**: Recall on validation set +* **val_f1**: *F*1-score on validation set +* **val_loss**: Loss on validation set +* **model_name**: Model name (corresponding to the **model** column of [config/models_info.tsv](../config/models_info.tsv)) + +## `*/combined_test_stats` + +The two files `classif_metrics/combined_test_stats.csv` and `ner_metrics/combined_test_stats.csv` have the same columns. They contain obtained during evaluation on the witheld test set of the article classification and NER models, respectively. + +### `classif_metrics/combined_test_stats.csv` + +Metrics of each model were output by [src/class_final_eval.py](../src/class_final_eval.py), and combined by [src/combine_stats.py](../src/combine_stats.py) to generate this file. + +### `ner_metrics/combined_test_stats.csv` + +Metrics of each model were output by [src/ner_final_eval.py](../src/ner_final_eval.py), and combined by [src/combine_stats.py](../src/combine_stats.py) to generate this file. + +### Columns of both files: + +* **model**: Model name (corresponding to the **model** column of [config/models_info.tsv](../config/models_info.tsv)) +* **precision**: Precision on test set +* **recall**: Recall on test set +* **f1**: *F*1-score on test set +* **loss**: Loss on test set + +## `epmc_query_results_2022.csv` + +EuropePMC query results that were used for generation of the inventory in 2022. The following columns are included: + +* **id**: article id +* **title**: article title +* **abstract**: article abstract +* **publication_date**: article first publication date + +## `final_inventory_2022.csv` + +The final output file of the inventory conducted in 2022. + +Information on the contents of this file are available in the [main README](../README.md#final-inventory-output). + +## `manual_classifications.csv` + +This file contains the manual classifications of 1634 articles by kes (Kenneth Schackart) and hji (Heidi Imker). This set was split into training, validation, and testing splits for fine-tuning and evaluation of the article classification models. + +* **id**: article id +* **title**: article title +* **abstract**: article abstract +* **checked_by**: curator initials +* **kes_check**: kes determination where 0 = not an article describing data resource OR 1 = an article describing data resource +* **hji_check**: hji determination where 0 = not an article describing data resource OR 1 = an article describing data resource +* **curation_sum**: sum of curator values (iii_checks) +* **number_of_checks**: number of checks (by different curators) +* **curation_score**: curation_sum/number_of_checks (gives a "confidence score"" as done in Wren 2017); note that value other than 0 or 1 indiciate lack of agreement between curators +* **kes_notes**: raw notes documented by kes +* **hji_notes**: raw notes documented by hji + +## `manual_ner_extraction.csv` + +The file contains the manual NER extraction from articles manually classifiedf to describe a biodata resource. Curation was performed by Kenneth Schackart, and validated by Heidi Imker. + +* **id**: article id +* **title**: article title. Adjacent articles were not included (*e.g.* "Protein Ensemble Database" not "The Protein Ensemble Database"). +* **abstract**: article abstract +* **name**: resource name +* **acronym**: resource acronym or shortened name, as presented in the title or abstract. This is sometimes the same as **name**. +* **url**: resource URL. Note, other URL's may have been present that were not that of the resource. These extraneous URL's were not extracted into this column. +* **short_description**: short description of the resource, as found in the abstract or title + +**Notes**: + +Version numbers were generally not included in **name** or **acronym** if there was white space between the element and version number (*e.g.* "CTDB" was recorded for "CTDB (v2.0)" while version number in "PDB-2-PBv3.0" was kept). + +Many articles had several of the above elements. This could be for a few reasons: + +* Multiple versions of an element, for instance when there are different **short_description**s in the title and abstract. +* Differences in case (*e.g.* "Human transporter database" vs "Human Transporter Database"). These are equivalent when case-insensitive, but case is deliberate in many titles. + +## `manually_reviewed_inventory.csv` + +After initial generation of the inventory, automated deduplication, and flagging for selective manual reivew, the inventory was manually reviewed by hji. This is the file resulting from that review. + +For reproducing the original results of the 2022 inventory, this file can be used for the second half of the pipeline (processingt he manual review, data analysis and figures, *etc*.) diff --git a/data/classif_metrics/combined_test_stats.csv b/data/classif_metrics/combined_test_stats.csv new file mode 100644 index 0000000..f69ebeb --- /dev/null +++ b/data/classif_metrics/combined_test_stats.csv @@ -0,0 +1,16 @@ +model,precision,recall,f1,loss +bert-base-uncased,1.0,0.5636363636363636,0.7209302325581396,0.044670137313177 +dmis-lab/biobert-v1.1,0.975,0.7090909090909091,0.8210526315789474,0.0498374319641189 +kamalkraj/bioelectra-base-discriminator-pubmed,0.95,0.6909090909090909,0.8,0.0317919736013472 +kamalkraj/bioelectra-base-discriminator-pubmed-pmc,1.0,0.6,0.7499999999999999,0.0515625044328611 +allenai/biomed_roberta_base,1.0,0.8181818181818182,0.9,0.0402623700338954 +allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169,1.0,0.6545454545454545,0.7912087912087912,0.0763317382193035 +allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500,0.975,0.7090909090909091,0.8210526315789474,0.0509264899575907 +bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12,0.9555555555555556,0.7818181818181819,0.86,0.0440522567486191 +bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12,0.9130434782608696,0.7636363636363637,0.8316831683168316,0.0377682040521652 +giacomomiolo/electramed_base_scivocab_1M,0.9565217391304348,0.8,0.8712871287128713,0.0469235960092489 +cambridgeltl/SapBERT-from-PubMedBERT-fulltext,0.9375,0.8181818181818182,0.8737864077669902,0.0368662678904585 +cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token,0.9767441860465116,0.7636363636363637,0.8571428571428571,0.0256347813110494 +allenai/scibert_scivocab_uncased,1.0,0.6545454545454545,0.7912087912087912,0.0734124845091499 +microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract,1.0,0.7454545454545455,0.8541666666666666,0.0520174471843027 +microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext,1.0,0.7636363636363637,0.865979381443299,0.02706272118145 diff --git a/data/classif_metrics/combined_train_stats.csv b/data/classif_metrics/combined_train_stats.csv new file mode 100644 index 0000000..e1de0c6 --- /dev/null +++ b/data/classif_metrics/combined_train_stats.csv @@ -0,0 +1,151 @@ +epoch,train_precision,train_recall,train_f1,train_loss,val_precision,val_recall,val_f1,val_loss,model_name +0,0.9784172661870504,0.7101827676240209,0.8229954614220877,0.0142745070417769,0.92,0.575,0.7076923076923077,0.0184988350894466,bert-base-uncased +1,0.9889807162534436,0.93733681462141,0.9624664879356568,0.0046247215596614,0.8611111111111112,0.775,0.8157894736842106,0.0200806143722912,bert-base-uncased +2,0.943069306930693,0.9947780678851176,0.9682337992376112,0.0036004023641858,0.7,0.875,0.7777777777777777,0.0303600696999134,bert-base-uncased +3,1.0,1.0,1.0,0.0001494112358317,0.7857142857142857,0.825,0.8048780487804876,0.0263319234305161,bert-base-uncased +4,1.0,1.0,1.0,0.0002503720688442,0.8611111111111112,0.775,0.8157894736842106,0.0247254250248976,bert-base-uncased +5,1.0,1.0,1.0,5.175392756237112e-05,0.7555555555555555,0.85,0.7999999999999998,0.0274129222021233,bert-base-uncased +6,1.0,1.0,1.0,3.330824539649548e-05,0.7727272727272727,0.85,0.8095238095238095,0.0292079951259038,bert-base-uncased +7,1.0,1.0,1.0,2.4941893463434727e-05,0.7727272727272727,0.85,0.8095238095238095,0.0306161555971177,bert-base-uncased +8,1.0,1.0,1.0,1.9725642449676057e-05,0.7727272727272727,0.85,0.8095238095238095,0.0318002053670096,bert-base-uncased +9,1.0,1.0,1.0,1.6138146942777896e-05,0.7727272727272727,0.85,0.8095238095238095,0.0328283470931957,bert-base-uncased +0,0.7239382239382239,0.97911227154047,0.832408435072142,0.0181713900591229,0.6153846153846154,1.0,0.761904761904762,0.0256008761202764,dmis-lab/biobert-v1.1 +1,0.9512820512820512,0.9686684073107048,0.9598965071151356,0.00428044002054,0.8,0.8,0.8000000000000002,0.0147630558261331,dmis-lab/biobert-v1.1 +2,1.0,0.9686684073107048,0.9840848806366048,0.0024200166264435,0.9310344827586208,0.675,0.7826086956521738,0.0266639774331366,dmis-lab/biobert-v1.1 +3,1.0,0.9895561357702348,0.994750656167979,0.0009970445728253,0.9310344827586208,0.675,0.7826086956521738,0.023675157960926,dmis-lab/biobert-v1.1 +4,0.9973958333333334,1.0,0.998696219035202,0.0002473273032533,0.7857142857142857,0.825,0.8048780487804876,0.0235615801022229,dmis-lab/biobert-v1.1 +5,1.0,1.0,1.0,0.000106234203573,0.8205128205128205,0.8,0.810126582278481,0.0236749445504081,dmis-lab/biobert-v1.1 +6,1.0,1.0,1.0,5.139963606143498e-05,0.9090909090909092,0.75,0.821917808219178,0.0261559203792796,dmis-lab/biobert-v1.1 +7,1.0,1.0,1.0,3.8080539161169135e-05,0.8611111111111112,0.775,0.8157894736842106,0.0269533877331271,dmis-lab/biobert-v1.1 +8,1.0,1.0,1.0,3.008640652806465e-05,0.8378378378378378,0.775,0.8051948051948051,0.0277318534402487,dmis-lab/biobert-v1.1 +9,1.0,1.0,1.0,2.463689066397747e-05,0.8378378378378378,0.775,0.8051948051948051,0.028430018061113,dmis-lab/biobert-v1.1 +0,0.952802359882006,0.8433420365535248,0.8947368421052633,0.010448207451989,0.90625,0.725,0.8055555555555555,0.0163885995323927,kamalkraj/bioelectra-base-discriminator-pubmed +1,0.9150485436893204,0.9843342036553524,0.9484276729559749,0.0062379946967423,0.7291666666666666,0.875,0.7954545454545454,0.0201589698499103,kamalkraj/bioelectra-base-discriminator-pubmed +2,0.9477611940298508,0.9947780678851176,0.9707006369426752,0.0033580961518306,0.7391304347826086,0.85,0.7906976744186046,0.0240410060604225,kamalkraj/bioelectra-base-discriminator-pubmed +3,0.9455445544554456,0.9973890339425588,0.9707750952986024,0.0026517638641609,0.6666666666666666,0.85,0.7472527472527473,0.0331022254419776,kamalkraj/bioelectra-base-discriminator-pubmed +4,0.9973614775725592,0.9869451697127938,0.9921259842519684,0.0014082801262602,0.8611111111111112,0.775,0.8157894736842106,0.0228158536618985,kamalkraj/bioelectra-base-discriminator-pubmed +5,1.0,0.9973890339425588,0.9986928104575163,0.0005542292834646,0.8048780487804879,0.825,0.8148148148148149,0.0210400264265027,kamalkraj/bioelectra-base-discriminator-pubmed +6,1.0,0.9973890339425588,0.9986928104575163,0.000453035778515,0.8095238095238095,0.85,0.8292682926829269,0.0238880222376271,kamalkraj/bioelectra-base-discriminator-pubmed +7,1.0,0.9973890339425588,0.9986928104575163,0.0004241108929942,0.8095238095238095,0.85,0.8292682926829269,0.0248525578458354,kamalkraj/bioelectra-base-discriminator-pubmed +8,1.0,0.9973890339425588,0.9986928104575163,0.0004104754462408,0.8095238095238095,0.85,0.8292682926829269,0.0253706616847395,kamalkraj/bioelectra-base-discriminator-pubmed +9,1.0,0.9973890339425588,0.9986928104575163,0.0004062545524324,0.8095238095238095,0.85,0.8292682926829269,0.0255554964393963,kamalkraj/bioelectra-base-discriminator-pubmed +0,0.9775280898876404,0.6814621409921671,0.803076923076923,0.0098710438637305,0.9130434782608696,0.525,0.6666666666666667,0.0129476453898087,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +1,0.96448087431694,0.9216710182767625,0.9425901201602136,0.0037882313017797,0.7692307692307693,0.75,0.7594936708860761,0.0110442927985821,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +2,0.9867724867724867,0.9738903394255874,0.9802890932982918,0.0016966263130248,0.8,0.8,0.8000000000000002,0.0098878804992579,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +3,0.9947368421052633,0.9869451697127938,0.9908256880733946,0.0008505548205934,0.7804878048780488,0.8,0.7901234567901235,0.0102575201096024,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +4,1.0,0.9895561357702348,0.994750656167979,0.0004228915779948,0.723404255319149,0.85,0.7816091954022989,0.0135981246547878,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +5,1.0,0.9895561357702348,0.994750656167979,0.0002009521190435,0.8571428571428571,0.75,0.7999999999999999,0.0119985120851288,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +6,1.0,1.0,1.0,0.0001222710319632,0.8048780487804879,0.825,0.8148148148148149,0.0129293030912771,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +7,1.0,1.0,1.0,9.563790452266604e-05,0.7857142857142857,0.825,0.8048780487804876,0.0134202971023583,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +8,1.0,1.0,1.0,8.425928776683043e-05,0.7857142857142857,0.825,0.8048780487804876,0.0136715520477894,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +9,1.0,1.0,1.0,8.080499151309143e-05,0.7857142857142857,0.825,0.8048780487804876,0.0137591594420139,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +0,0.9324324324324323,0.9007832898172324,0.9163346613545816,0.009458284904348,0.8157894736842105,0.775,0.7948717948717949,0.0189683387676874,allenai/biomed_roberta_base +1,0.9425,0.9843342036553524,0.962962962962963,0.0051140032622044,0.7391304347826086,0.85,0.7906976744186046,0.022145742702784,allenai/biomed_roberta_base +2,0.8721461187214612,0.9973890339425588,0.930572472594397,0.0075299719919526,0.660377358490566,0.875,0.7526881720430108,0.0311827708691733,allenai/biomed_roberta_base +3,0.9973544973544972,0.9843342036553524,0.990801576872536,0.0013542871350177,0.8823529411764706,0.75,0.8108108108108107,0.027826228249326,allenai/biomed_roberta_base +4,0.9973614775725592,0.9869451697127938,0.9921259842519684,0.0005968241960891,0.9117647058823528,0.775,0.8378378378378379,0.0256080028696905,allenai/biomed_roberta_base +5,0.9973684210526316,0.9895561357702348,0.9934469200524246,0.000726024541111,0.8611111111111112,0.775,0.8157894736842106,0.0270600927435008,allenai/biomed_roberta_base +6,0.9948051948051948,1.0,0.9973958333333334,0.0002773666454473,0.8,0.8,0.8000000000000002,0.0288550823519838,allenai/biomed_roberta_base +7,1.0,1.0,1.0,4.4769566775673165e-05,0.8205128205128205,0.8,0.810126582278481,0.0301240103366146,allenai/biomed_roberta_base +8,1.0,1.0,1.0,2.552156209181686e-05,0.8205128205128205,0.8,0.810126582278481,0.0317793183190507,allenai/biomed_roberta_base +9,1.0,1.0,1.0,2.049952794517368e-05,0.8205128205128205,0.8,0.810126582278481,0.0326702112429731,allenai/biomed_roberta_base +0,0.959375,0.8015665796344648,0.8733997155049786,0.0111719130885539,0.8484848484848485,0.7,0.7671232876712328,0.0180060282677599,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +1,0.9637305699481864,0.9712793733681462,0.9674902470741222,0.0037045829160281,0.775,0.775,0.775,0.020442303480967,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +2,0.9597989949748744,0.9973890339425588,0.9782330345710628,0.0025039945328732,0.72,0.9,0.7999999999999999,0.0314478816804271,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +3,0.9895287958115184,0.9869451697127938,0.9882352941176472,0.0014559273474825,0.8333333333333334,0.75,0.7894736842105262,0.0275555027234397,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +4,0.9695431472081218,0.9973890339425588,0.9832689832689832,0.0016717212987473,0.7555555555555555,0.85,0.7999999999999998,0.0264891188016231,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +5,1.0,0.9712793733681462,0.9854304635761588,0.0014181794299908,0.90625,0.725,0.8055555555555555,0.0304121408254968,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +6,1.0,0.9451697127937336,0.9718120805369128,0.0027025421080537,0.9333333333333332,0.7,0.8,0.0345163681176813,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +7,1.0,0.9973890339425588,0.9986928104575163,0.0001169731697609,0.825,0.825,0.825,0.029361976673951,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +8,0.9973958333333334,1.0,0.998696219035202,7.570717397341879e-05,0.7857142857142857,0.825,0.8048780487804876,0.0342164667293578,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +9,1.0,1.0,1.0,1.4980794216353532e-05,0.8461538461538461,0.825,0.8354430379746836,0.0343511277357073,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +0,0.8911392405063291,0.9190600522193212,0.9048843187660668,0.011126310176343,0.7441860465116279,0.8,0.7710843373493975,0.0160700836037314,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +1,0.8613636363636363,0.9895561357702348,0.9210206561360874,0.0093407184311237,0.6481481481481481,0.875,0.7446808510638299,0.022833876452356,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +2,0.992063492063492,0.97911227154047,0.985545335085414,0.0016606553812519,0.8421052631578947,0.8,0.8205128205128205,0.0250707667104243,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +3,0.9895833333333334,0.9921671018276762,0.9908735332464148,0.001136848732658,0.7857142857142857,0.825,0.8048780487804876,0.0249205354217486,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +4,0.992084432717678,0.9817232375979112,0.9868766404199476,0.0016189292562212,0.8888888888888888,0.8,0.8421052631578948,0.0258653925251473,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +5,0.9972677595628416,0.9530026109660574,0.9746328437917224,0.0027853340157098,0.9393939393939394,0.775,0.8493150684931509,0.023250331797596,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +6,0.9947916666666666,0.9973890339425588,0.9960886571056062,0.0006795547146847,0.8292682926829268,0.85,0.8395061728395061,0.0229691751894347,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +7,1.0,0.9973890339425588,0.9986928104575163,0.000164385461997,0.85,0.85,0.85,0.0262860214533928,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +8,1.0,0.9973890339425588,0.9986928104575163,0.0001107595671611,0.868421052631579,0.825,0.8461538461538461,0.0284146948449559,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +9,1.0,0.9973890339425588,0.9986928104575163,8.120022795234284e-05,0.868421052631579,0.825,0.8461538461538461,0.0291836398744109,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +0,0.9316939890710384,0.8903394255874674,0.910547396528705,0.0097204311226088,0.775,0.775,0.775,0.0184039794986352,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +1,0.942211055276382,0.97911227154047,0.9603072983354674,0.0049520657537982,0.7446808510638298,0.875,0.8045977011494252,0.0232435391758972,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +2,0.9868766404199476,0.9817232375979112,0.9842931937172776,0.0023055454228774,0.7435897435897436,0.725,0.7341772151898733,0.0268500688655665,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +3,1.0,0.9712793733681462,0.9854304635761588,0.001936240968279,0.8285714285714286,0.725,0.7733333333333333,0.0274028305254244,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +4,0.9973890339425588,0.9973890339425588,0.9973890339425588,0.000577839820369,0.717391304347826,0.825,0.7674418604651162,0.028982650629192,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +5,1.0,1.0,1.0,0.0002405349209418,0.7333333333333333,0.825,0.776470588235294,0.0337643453371037,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +6,1.0,1.0,1.0,0.0001342730608404,0.775,0.775,0.775,0.0346616045440072,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +7,1.0,1.0,1.0,0.0001092123188235,0.775,0.775,0.775,0.0356969334464813,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +8,1.0,1.0,1.0,9.770086264477014e-05,0.775,0.775,0.775,0.036310342414328,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +9,1.0,1.0,1.0,9.435002507283768e-05,0.775,0.775,0.775,0.0365231356374134,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +0,0.7700892857142857,0.9007832898172324,0.8303249097472922,0.0089871501528061,0.6363636363636364,0.875,0.7368421052631579,0.0114113306099513,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +1,0.8894348894348895,0.9451697127937336,0.9164556962025316,0.004715619056865,0.6875,0.825,0.75,0.0115806370411279,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +2,0.9833795013850416,0.9268929503916448,0.9543010752688172,0.0023050950810759,0.8285714285714286,0.725,0.7733333333333333,0.0103560693608889,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +3,0.9921259842519684,0.9869451697127938,0.9895287958115184,0.0009344734671291,0.8157894736842105,0.775,0.7948717948717949,0.0119206028164557,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +4,1.0,0.9947780678851176,0.9973821989528796,0.0003406010594562,0.6888888888888889,0.775,0.7294117647058822,0.01417401901581,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +5,1.0,0.9973890339425588,0.9986928104575163,0.0001609144364303,0.775,0.775,0.775,0.0165084103743235,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +6,1.0,0.9973890339425588,0.9986928104575163,9.81492054787405e-05,0.7111111111111111,0.8,0.7529411764705882,0.0178782761846698,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +7,1.0,0.9973890339425588,0.9986928104575163,5.37390858936729e-05,0.7619047619047619,0.8,0.7804878048780488,0.0183894345595401,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +8,1.0,1.0,1.0,2.6995710758132737e-05,0.7804878048780488,0.8,0.7901234567901235,0.0190801860401465,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +9,1.0,1.0,1.0,1.873862159961704e-05,0.7692307692307693,0.75,0.7594936708860761,0.0197369941750412,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +0,0.9182561307901907,0.8798955613577023,0.8986666666666666,0.0123936850269353,0.7631578947368421,0.725,0.7435897435897436,0.0221179312317626,giacomomiolo/electramed_base_scivocab_1M +1,0.9282178217821784,0.97911227154047,0.9529860228716646,0.0063284333263963,0.7083333333333334,0.85,0.7727272727272727,0.0195594689451486,giacomomiolo/electramed_base_scivocab_1M +2,0.992084432717678,0.9817232375979112,0.9868766404199476,0.0019137250946451,0.8285714285714286,0.725,0.7733333333333333,0.0273712528078764,giacomomiolo/electramed_base_scivocab_1M +3,0.9973614775725592,0.9869451697127938,0.9921259842519684,0.0010245115020282,0.8648648648648649,0.8,0.8311688311688312,0.0271794852645446,giacomomiolo/electramed_base_scivocab_1M +4,0.9973958333333334,1.0,0.998696219035202,0.0007772715169631,0.8421052631578947,0.8,0.8205128205128205,0.0234039964757672,giacomomiolo/electramed_base_scivocab_1M +5,1.0,1.0,1.0,0.0001899724463118,0.868421052631579,0.825,0.8461538461538461,0.0270650059785752,giacomomiolo/electramed_base_scivocab_1M +6,1.0,1.0,1.0,0.0002256837288002,0.8048780487804879,0.825,0.8148148148148149,0.0286338748293196,giacomomiolo/electramed_base_scivocab_1M +7,1.0,1.0,1.0,0.000119053208162,0.8461538461538461,0.825,0.8354430379746836,0.0299731507152758,giacomomiolo/electramed_base_scivocab_1M +8,1.0,1.0,1.0,0.0001050709833286,0.8461538461538461,0.825,0.8354430379746836,0.0305941727542488,giacomomiolo/electramed_base_scivocab_1M +9,1.0,1.0,1.0,0.0001009992491053,0.8461538461538461,0.825,0.8354430379746836,0.0307870977730255,giacomomiolo/electramed_base_scivocab_1M +0,0.9032258064516128,0.9503916449086162,0.926208651399491,0.0090855596171672,0.7608695652173914,0.875,0.813953488372093,0.014680154996473,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +1,0.9223300970873788,0.9921671018276762,0.9559748427672956,0.0053301974910864,0.7450980392156863,0.95,0.8351648351648352,0.0185005793687682,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +2,0.9869791666666666,0.9895561357702348,0.9882659713168188,0.001456522852677,0.868421052631579,0.825,0.8461538461538461,0.0175446086129724,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +3,1.0,0.93733681462141,0.967654986522911,0.0038747513687665,0.896551724137931,0.65,0.7536231884057972,0.0260131328876288,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +4,1.0,1.0,1.0,0.0002029667587523,0.875,0.875,0.875,0.0180330742809499,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +5,1.0,1.0,1.0,8.448641662837437e-05,0.875,0.875,0.875,0.0214966058712042,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +6,1.0,1.0,1.0,5.610443015382504e-05,0.8974358974358975,0.875,0.8860759493670887,0.022435673217087,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +7,1.0,1.0,1.0,4.216413999543657e-05,0.8974358974358975,0.875,0.8860759493670887,0.0234750265335377,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +8,1.0,1.0,1.0,3.331892964429419e-05,0.8974358974358975,0.875,0.8860759493670887,0.0243448966271361,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +9,1.0,1.0,1.0,2.721993097965e-05,0.8974358974358975,0.875,0.8860759493670887,0.0251000898351318,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +0,0.8044943820224719,0.9347258485639688,0.8647342995169082,0.0069129740022316,0.6851851851851852,0.925,0.7872340425531915,0.0093606969564215,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +1,0.9721448467966574,0.9112271540469974,0.9407008086253368,0.0034070002491944,0.90625,0.725,0.8055555555555555,0.0072937042645688,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +2,0.9973544973544972,0.9843342036553524,0.990801576872536,0.001025950127348,0.9117647058823528,0.775,0.8378378378378379,0.0076767274606152,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +3,1.0,0.9973890339425588,0.9986928104575163,0.0003208194821162,0.8461538461538461,0.825,0.8354430379746836,0.0085242483413444,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +4,1.0,1.0,1.0,0.0001106637185461,0.8333333333333334,0.875,0.8536585365853658,0.0100440775635857,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +5,1.0,1.0,1.0,6.788832001928226e-05,0.7954545454545454,0.875,0.8333333333333334,0.0118065250088583,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +6,1.0,1.0,1.0,3.709093445744674e-05,0.8461538461538461,0.825,0.8354430379746836,0.0111954702333834,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +7,1.0,1.0,1.0,2.8715360463120577e-05,0.8292682926829268,0.85,0.8395061728395061,0.0115355234476005,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +8,1.0,1.0,1.0,2.327173428317874e-05,0.8292682926829268,0.85,0.8395061728395061,0.0117884388884658,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +9,1.0,1.0,1.0,1.942019583400518e-05,0.8292682926829268,0.85,0.8395061728395061,0.0120221741919247,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +0,0.9152119700748128,0.95822454308094,0.9362244897959184,0.0079096968016397,0.7954545454545454,0.875,0.8333333333333334,0.0142115249105219,allenai/scibert_scivocab_uncased +1,0.9180722891566264,0.9947780678851176,0.9548872180451128,0.0048848107872549,0.7,0.875,0.7777777777777777,0.0190920329553151,allenai/scibert_scivocab_uncased +2,0.9921875,0.9947780678851176,0.9934810951760104,0.0009244462695167,0.7608695652173914,0.875,0.813953488372093,0.0238953338461523,allenai/scibert_scivocab_uncased +3,1.0,0.9216710182767625,0.9592391304347826,0.0051621664254456,0.9032258064516128,0.7,0.7887323943661972,0.0314924740583969,allenai/scibert_scivocab_uncased +4,1.0,0.93733681462141,0.967654986522911,0.0025737051991672,0.9333333333333332,0.7,0.8,0.0278697592116974,allenai/scibert_scivocab_uncased +5,0.9871134020618556,1.0,0.993514915693904,0.000933740126056,0.7659574468085106,0.9,0.8275862068965516,0.0300782336472715,allenai/scibert_scivocab_uncased +6,1.0,0.9660574412532638,0.9827357237715804,0.0020053387968434,0.8571428571428571,0.75,0.7999999999999999,0.032056502511347,allenai/scibert_scivocab_uncased +7,0.9921671018276762,0.9921671018276762,0.9921671018276762,0.0008296812519392,0.7857142857142857,0.825,0.8048780487804876,0.0349798135885181,allenai/scibert_scivocab_uncased +8,1.0,0.9973890339425588,0.9986928104575163,0.0004016227318717,0.8421052631578947,0.8,0.8205128205128205,0.0345200855634907,allenai/scibert_scivocab_uncased +9,0.9973544973544972,0.9843342036553524,0.990801576872536,0.0011678959125322,0.8857142857142857,0.775,0.8266666666666667,0.033289002526241,allenai/scibert_scivocab_uncased +0,0.9054726368159204,0.9503916449086162,0.9273885350318471,0.0083955201379796,0.7083333333333334,0.85,0.7727272727272727,0.0152657362837461,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +1,0.931372549019608,0.9921671018276762,0.9608091024020228,0.0043510300238668,0.72,0.9,0.7999999999999999,0.0185425912363911,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +2,0.9947643979057592,0.9921671018276762,0.9934640522875816,0.0009955423226333,0.8205128205128205,0.8,0.810126582278481,0.0244050195811128,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +3,1.0,0.9608355091383812,0.9800266311584556,0.0024627792050656,0.896551724137931,0.65,0.7536231884057972,0.0292804092996162,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +4,1.0,0.9921671018276762,0.9960681520314548,0.0008218421200231,0.90625,0.725,0.8055555555555555,0.0245634458494905,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +5,1.0,1.0,1.0,0.0001054827131769,0.8292682926829268,0.85,0.8395061728395061,0.0245467224484163,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +6,1.0,1.0,1.0,5.952719475804638e-05,0.8918918918918919,0.825,0.8571428571428571,0.025383336780704,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +7,1.0,1.0,1.0,4.82193034751762e-05,0.8918918918918919,0.825,0.8571428571428571,0.0260377795512,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +8,1.0,1.0,1.0,4.306133561110795e-05,0.8918918918918919,0.825,0.8571428571428571,0.0264208167983561,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +9,1.0,1.0,1.0,4.150422162543674e-05,0.8918918918918919,0.825,0.8571428571428571,0.0265529194845651,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +0,0.8151447661469933,0.9556135770234988,0.8798076923076923,0.0069137870783971,0.6727272727272727,0.925,0.7789473684210527,0.0117349629319688,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +1,0.975609756097561,0.9399477806788512,0.9574468085106382,0.0023269604963461,0.868421052631579,0.825,0.8461538461538461,0.0076778614952129,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +2,1.0,0.9921671018276762,0.9960681520314548,0.0006114699209213,0.8536585365853658,0.875,0.8641975308641976,0.0073124838981238,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +3,0.9973890339425588,0.9973890339425588,0.9973890339425588,0.0002519216271406,0.8,0.9,0.8470588235294118,0.0090778383584517,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +4,1.0,0.9973890339425588,0.9986928104575163,0.0001543998945588,0.8571428571428571,0.9,0.8780487804878048,0.0101745394614976,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +5,1.0,0.9973890339425588,0.9986928104575163,7.890069298005837e-05,0.85,0.85,0.85,0.0103295742059653,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +6,1.0,1.0,1.0,5.760939798770668e-05,0.85,0.85,0.85,0.0106064411079358,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +7,1.0,1.0,1.0,4.772710740688081e-05,0.85,0.85,0.85,0.0108314802717862,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +8,1.0,1.0,1.0,4.3163261537046015e-05,0.85,0.85,0.85,0.0109703777439939,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +9,1.0,1.0,1.0,4.183565290465693e-05,0.85,0.85,0.85,0.0110197944461174,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext diff --git a/data/epmc_query_results_2022.csv b/data/epmc_query_results_2022.csv new file mode 100644 index 0000000..34171e8 --- /dev/null +++ b/data/epmc_query_results_2022.csv @@ -0,0 +1,21678 @@ +id,title,abstract,publication_date +34599955,"Wastewater, waste, and water-based epidemiology (WWW-BE): A novel hypothesis and decision-support tool to unravel COVID-19 in low-income settings?","Traditional wastewater-based epidemiology (W-BE) relying on SARS-CoV-2 RNA detection in wastewater is attractive for understanding COVID-19. Yet traditional W-BE based on centralized wastewaters excludes putative SARS-CoV-2 reservoirs such as: (i) wastewaters from shared on-site sanitation facilities, (ii) solid waste including faecal sludge from non-flushing on-site sanitation systems, and COVID-19 personal protective equipment (PPE), (iii) raw/untreated water, and (iv) drinking water supply systems in low-income countries (LICs). A novel hypothesis and decision-support tool based on Wastewater (on-site sanitation, municipal sewer systems), solid Waste, and raw/untreated and drinking Water-based epidemiology (WWW-BE) is proposed for understanding COVID-19 in LICs. The WWW-BE conceptual framework, including components and principles is presented. Evidence on the presence of SARS-CoV-2 and its proxies in wastewaters, solid materials/waste (papers, metals, fabric, plastics), and raw/untreated surface water, groundwater and drinking water is discussed. Taken together, wastewaters from municipal sewer and on-site sanitation systems, solid waste such as faecal sludge and COVID-19 PPE, raw/untreated surface water and groundwater, and drinking water systems in LICs act as potential reservoirs that receive and harbour SARS-CoV-2, and then transmit it to humans. Hence, WWW-BE could serve a dual function in estimating the prevalence and potential transmission of COVID-19. Several applications of WWW-BE as a hypothesis and decision support tool in LICs are discussed. WWW-BE aggregates data from various infected persons in a spatial unit, hence, putatively requires less resources (analytical kits, personnel) than individual diagnostic testing, making it an ideal decision-support tool for LICs. The novelty, and a critique of WWW-BE versus traditional W-BE are presented. Potential challenges of WWW-BE include: (i) biohazards and biosafety risks, (ii) lack of expertise, analytical equipment, and accredited laboratories, and (iii) high uncertainties in estimates of COVID-19 cases. Future perspectives and research directions including key knowledge gaps and the application of novel and emerging technologies in WWW-BE are discussed.",2021-09-30 +34741192,The Rat Genome Database (RGD) facilitates genomic and phenotypic data integration across multiple species for biomedical research.,"Model organism research is essential for discovering the mechanisms of human diseases by defining biologically meaningful gene to disease relationships. The Rat Genome Database (RGD, ( https://rgd.mcw.edu )) is a cross-species knowledgebase and the premier online resource for rat genetic and physiologic data. This rich resource is enhanced by the inclusion and integration of comparative data for human and mouse, as well as other human disease models including chinchilla, dog, bonobo, pig, 13-lined ground squirrel, green monkey, and naked mole-rat. Functional information has been added to records via the assignment of annotations based on sequence similarity to human, rat, and mouse genes. RGD has also imported well-supported cross-species data from external resources. To enable use of these data, RGD has developed a robust infrastructure of standardized ontologies, data formats, and disease- and species-centric portals, complemented with a suite of innovative tools for discovery and analysis. Using examples of single-gene and polygenic human diseases, we illustrate how data from multiple species can help to identify or confirm a gene as involved in a disease and to identify model organisms that can be studied to understand the pathophysiology of a gene or pathway. The ultimate aim of this report is to demonstrate the utility of RGD not only as the core resource for the rat research community but also as a source of bioinformatic tools to support a wider audience, empowering the search for appropriate models for human afflictions.",2021-11-05 +34403192,Plant Metabolic Network 15: A resource of genome-wide metabolism databases for 126 plants and algae.,"To understand and engineer plant metabolism, we need a comprehensive and accurate annotation of all metabolic information across plant species. As a step towards this goal, we generated genome-scale metabolic pathway databases of 126 algal and plant genomes, ranging from model organisms to crops to medicinal plants (https://plantcyc.org). Of these, 104 have not been reported before. We systematically evaluated the quality of the databases, which revealed that our semi-automated validation pipeline dramatically improves the quality. We then compared the metabolic content across the 126 organisms using multiple correspondence analysis and found that Brassicaceae, Poaceae, and Chlorophyta appeared as metabolically distinct groups. To demonstrate the utility of this resource, we used recently published sorghum transcriptomics data to discover previously unreported trends of metabolism underlying drought tolerance. We also used single-cell transcriptomics data from the Arabidopsis root to infer cell type-specific metabolic pathways. This work shows the quality and quantity of our resource and demonstrates its wide-ranging utility in integrating metabolism with other areas of plant biology.",2021-10-27 +34839012,ASER: Animal Sex Reversal Database.,"Sex reversal, representing extraordinary sexual plasticity during the life cycle, not only triggers reproduction in animals but also affects reproductive and endocrine system-related diseases and cancers in humans. Sex reversal has been broadly reported in animals; however, an integrated resource hub of sex reversal information is still lacking. Here, we constructed a comprehensive database named ASER (Animal Sex Reversal) by integrating sex reversal-related data of 18 species from teleostei to mammalia. We systematically collected 40,018 published papers and mined the sex reversal-associated genes (SRGs), including their regulatory networks, from 1611 core papers. We annotated homologous genes and computed conservation scores for whole genomes across the 18 species. Furthermore, we collected available RNA-seq datasets and investigated the expression dynamics of SRGs during sex reversal or sex determination processes. In addition, we manually annotated 550 in situ hybridization (ISH), fluorescence in situ hybridization (FISH), and immunohistochemistry (IHC) images of SRGs from the literature and described their spatial expression in the gonads. Collectively, ASER provides a unique and integrated resource for researchers to query and reuse organized data to explore the mechanisms and applications of SRGs in animal breeding and human health. The ASER database is publicly available at http://aser.ihb.ac.cn/.",2021-11-25 +34911434,qtlXplorer: an online systems genetics browser in the Eucalyptus Genome Integrative Explorer (EucGenIE).,"BACKGROUND:Affordable high-throughput DNA and RNA sequencing technologies are allowing genomic analysis of plant and animal populations and as a result empowering new systems genetics approaches to study complex traits. The availability of intuitive tools to browse and analyze the resulting large-scale genetic and genomic datasets remain a significant challenge. Furthermore, these integrative genomics approaches require innovative methods to dissect the flow and interconnectedness of biological information underlying complex trait variation. The Plant Genome Integrative Explorer (PlantGenIE.org) is a multi-species database and domain that houses online tools for model and woody plant species including Eucalyptus. Since the Eucalyptus Genome Integrative Explorer (EucGenIE) is integrated within PlantGenIE, it shares genome and expression analysis tools previously implemented within the various subdomains (ConGenIE, PopGenIE and AtGenIE). Despite the success in setting up integrative genomics databases, online tools for systems genetics modelling and high-resolution dissection of complex trait variation in plant populations have been lacking. RESULTS:We have developed qtlXplorer ( https://eucgenie.org/QTLXplorer ) for visualizing and exploring systems genetics data from genome-wide association studies including quantitative trait loci (QTLs) and expression-based QTL (eQTL) associations. This module allows users to, for example, find co-located QTLs and eQTLs using an interactive version of Circos, or explore underlying genes using JBrowse. It provides users with a means to build systems genetics models and generate hypotheses from large-scale population genomics data. We also substantially upgraded the EucGenIE resource and show how it enables users to combine genomics and systems genetics approaches to discover candidate genes involved in biotic stress responses and wood formation by focusing on two multigene families, laccases and peroxidases. CONCLUSIONS:qtlXplorer adds a new dimension, population genomics, to the EucGenIE and PlantGenIE environment. The resource will be of interest to researchers and molecular breeders working in Eucalyptus and other woody plant species. It provides an example of how systems genetics data can be integrated with functional genetics data to provide biological insight and formulate hypotheses. Importantly, integration within PlantGenIE enables novel comparative genomics analyses to be performed from population-scale data.",2021-12-15 +34757121,"""ADPKD-omics"": determinants of cyclic AMP levels in renal epithelial cells.","The regulation of cyclic adenosine monophosphate (cAMP) levels in kidney epithelial cells is important in at least 2 groups of disorders, namely water balance disorders and autosomal dominant polycystic kidney disease. Focusing on the latter, we review genes that code for proteins that are determinants of cAMP levels in cells. We identify which of these determinants are expressed in the 14 kidney tubule segments using recently published RNA-sequencing and protein mass spectrometry data (""autosomal dominant polycystic kidney disease-omics""). This includes G protein-coupled receptors, adenylyl cyclases, cyclic nucleotide phosphodiesterases, cAMP transporters, cAMP-binding proteins, regulator of G protein-signaling proteins, G protein-coupled receptor kinases, arrestins, calcium transporters, and calcium-binding proteins. In addition, compartmentalized cAMP signaling in the primary cilium is discussed, and a specialized database of the proteome of the primary cilium of cultured ""IMCD3"" cells is provided as an online resource (https://esbl.nhlbi.nih.gov/Databases/CiliumProteome/). Overall, this article provides a general resource in the form of a curated list of proteins likely to play roles in determination of cAMP levels in kidney epithelial cells and, therefore, likely to be determinants of progression of autosomal dominant polycystic kidney disease.",2021-10-29 +34010390,Integration of 1:1 orthology maps and updated datasets into Echinobase. ,"Echinobase (https://echinobase.org) is a central online platform that generates, manages and hosts genomic data relevant to echinoderm research. While the resource primarily serves the echinoderm research community, the recent release of an excellent quality genome for the frequently studied purple sea urchin (Strongylocentrotus purpuratus genome, v5.0) has provided an opportunity to adapt to the needs of a broader research community across other model systems. To this end, establishing pipelines to identify orthologous genes between echinoderms and other species has become a priority in many contexts including nomenclature, linking to data in other model organisms, and in internal functionality where data gathered in one hosted species can be associated with genes in other hosted echinoderms. This paper describes the orthology pipelines currently employed by Echinobase and how orthology data are processed to yield 1:1 ortholog mappings between a variety of echinoderms and other model taxa. We also describe functions of interest that have recently been included on the resource, including an updated developmental time course for S.purpuratus, and additional tracks for genome browsing. These data enhancements will increase the accessibility of the resource to non-echinoderm researchers and simultaneously expand the data quality and quantity available to core Echinobase users. Database URL: https://echinobase.org.",2021-05-01 +34774049,DREAM: a database of experimentally supported protein-coding RNAs and drug associations in human cancer.,"The Drug Response Gene Expression Associated Map, also referred as ""DREAM"" ( http://bio-big-data.cn:8080/DREAM ), is a manually curated database of experimentally supported protein-coding RNAs and drugs associations in human cancers. The current version of the DREAM documents 3048 entries about scientific literatures supported drug sensitivity or drug intervention related protein-coding RNAs from PubMed database and 195 high-throughput microarray data about drug sensitivity or drug intervention related protein-coding RNAs data from GEO database. Each entry in DREAM database contains detailed information on protein-coding RNA, drug, cancer, and other information including title, PubMed ID, journal, publish time. The DREAM database also provides some data visualization and online analysis services such as volcano plot, GO/KEGG enrichment function analysis, and novel drug discovery analysis. We hope the DREAM database should serve as a valuable resource for clinical practice and basic research, which could help researchers better understand the effects of protein-coding RNAs on drug response in human cancers.",2021-11-13 +34273956,TCM-Blast for traditional Chinese medicine genome alignment with integrated resources.,"The traditional Chinese medicine (TCM) genome project aims to reveal the genetic information and regulatory network of herbal medicines, and to clarify their molecular mechanisms in the prevention and treatment of human diseases. Moreover, the TCM genome could provide the basis for the discovery of the functional genes of active ingredients in TCM, and for the breeding and improvement of TCM. The traditional Chinese Medicine Basic Local Alignment Search Tool (TCM-Blast) is a web interface for TCM protein and DNA sequence similarity searches. It contains approximately 40G of genome data on TCMs, including protein and DNA sequence for 36 TCMs with high medical value.The development of a publicly accessible TCM genome alignment database hosted on the TCM-Blast website ( http://viroblast.pungentdb.org.cn/TCM-Blast/viroblast.php ) has expanded to query multiple sequence databases to obtain TCM genome data, and provide user-friendly output for easy analysis and browsing of BLAST results. The genome sequencing of TCMs helps to elucidate the biosynthetic pathways of important secondary metabolites and provides an essential resource for gene discovery studies and molecular breeding. The TCMs genome provides a valuable resource for the investigation of novel bioactive compounds and drugs from these TCMs under the guidance of TCM clinical practice. Our database could be expanded to other TCMs after the determination of their genome data.",2021-07-17 +34378141,Global Pharmacopoeia Genome Database is an integrated and mineable genomic database for traditional medicines derived from eight international pharmacopoeias.,"Genomic data have demonstrated considerable traction in accelerating contemporary studies in traditional medicine. However, the lack of a uniform format and dispersed storage limits the full potential of herb genomic data. In this study, we developed a Global Pharmacopoeia Genome Database (GPGD). The database contains 34,346 records for 903 herb species from eight global pharmacopoeias (Brazilian, Egyptian, European, Indian, Japanese, Korean, the Pharmacopoeia of the People's Republic of China, and U.S. Pharmacopoeia's Herbal Medicines Compendium). In particular, the GPGD contains 21,872 DNA barcodes from 867 species, 2,203 organelle genomes from 674 species, 55 whole genomes from 49 species, 534 genomic sequencing datasets from 366 species, and 9,682 transcriptome datasets from 350 species. Among the organelle genomes, 534 genomes from 366 species were newly generated in this study. Whole genomes, organelle genomes, genomic fragments, transcriptomes, and DNA barcodes were uniformly formatted and arranged by species. The GPGD is publicly accessible at http://www.gpgenome.com and serves as an essential resource for species identification, decomposition of biosynthetic pathways, and molecular-assisted breeding analysis. Thus, the database is an invaluable resource for future studies on herbal medicine safety, drug discovery, and the protection and rational use of herbal resources.",2021-08-06 +34738791,AroCageDB: A Web-Based Resource for Aromatic Cage Binding Sites and Their Intrinsic Ligands.,"While aromatic cages have extensively been investigated in the context of structural biology, molecular recognition, and drug discovery, there exist to date no comprehensive resource for proteins sharing this conserved structural motif. To this end, we parsed the Protein Data Bank and thus constructed the Aromatic Cage Database (AroCageDB), a database for investigating the binding pocket descriptors and ligand binding space of aromatic-cage-containing proteins (ACCPs). AroCageDB contains 487 unique ACCPs bound to 890 unique ligands, for a total of 1636 complexes. This web-accessible database provides a user-friendly interface for the interactive visualization of ligand-bound ACCP structures, with a variety of search options that will open up opportunities for structural analyses and drug discovery campaigns. AroCageDB is freely available at http://www.pharmbioinf.uni-freiburg.de/arocagedb/.",2021-11-05 +,1409. Pulmonary Non-tuberculous Mycobacterium Infection (PNTMI) and COVID-19: Characterization of the National COVID Collaborative Cohort (N3C),"Abstract

Background

Establishing whether a low-prevalence clinical condition is a risk factor for COVID-19 infection, or serious adverse outcomes, is difficult due to a limited number of patients, and lack of access to patient’s data by researchers. The National COVID Collaborative Cohort (N3C), a centralized national data resource to study COVID-19, provides access to structured clinical data derived from electronic health records. As of June 2021, N3C contains data on 6,193,738 patients (2,090,138 with COVID-19, 33.7%) from 55 participating sites (Figure 1). We describe the characteristics of patients with PNTMI based on COVID-19 infection status. Figure 1 N3C Basic Demographic Data

Methods

COVID-19 is defined by positive lab result (PCR, antigen, or antibody) or COVID-19 coding diagnosis, as defined by N3C. PNTMI phenotype was built with N3C Data Enclave concept set tool, and ATLAS (https://atlas.ohdsi.org/). We limited analysis to adults (18 years-old or older). We used de-identified data sets stripped of protected health information (PHI). We used N3C Data Enclave analytical tools for exploratory data analysis, and descriptive statistics.

Results

We identified five hundred and eighty six individuals from 19 sites fulfilling the PNTMI phenotype (9.46 cases per 100,000 people). After our age limit, 555 individuals were included for analysis (Figure 2). 340 were females (61.3%), 447 of white race (80.5%), and 30 were Hispanic (5.4%). Additional descriptive statistics and statistical significance testing are provided (Table 1). The most common concept were ""Non-tuberculous mycobacterial pneumonia"", and ""Pulmonary Mycobacterium avium complex infection"". Four sites accounted for more than 50% of identified patients (Figure 2). We identified 24 individuals with COVID-19 (4.32%), and 44 deaths in this cohort (7.9%). Deaths were unrelated to COVID-19 event. Figure 2. Basic demographic data of pulmonary non-tuberculous Mycobacterium infection phenotype in N3C Figure 3. Concepts and data sources of pulmonary non-tuberculous Mycobacterium infection phenotype in N3C

Conclusion

In N3C, the PNTMI cohort has a lower proportion of COVID-19 infection than the general population, and it was not a cause of mortality. Further analysis to study impact of comorbidities, and differences in race and geographical location are warranted. N3C is a powerful research platform to study the impact of COVID-19 in special populations with low prevalence, and it can be used to study other populations of interest.

Disclosures

All Authors: No reported disclosures",2021-11-01 +34927675,CRPMKB: a knowledge base of cancer risk prediction models for systematic comparison and personalized applications. ,"In the era of big data and precision medicine, accurate risk assessment is a prerequisite for the implementation of risk screening and preventive treatment. A large number of studies have focused on the risk of cancer, and related risk prediction models have been constructed, but there is a lack of effective resource integration for systematic comparison and personalized applications. Therefore, the establishment and analysis of the cancer risk prediction model knowledge base (CRPMKB) is of great significance. The current knowledge base contains 802 model data. The model comparison indicates that the accuracy of cancer risk prediction was greatly affected by regional differences, cancer types and model types. We divided the model variables into four categories: environment, behavioral lifestyle, biological genetics and clinical examination, and found that there are differences in the distribution of various variables among different cancer types. Taking 50 genes involved in the lung cancer risk prediction models as an example to perform pathway enrichment analyses and the results showed that these genes were significantly enriched in p53 Signaling and Aryl Hydrocarbon Receptor Signaling pathways which are associated with cancer and specific diseases. In addition, we verified the biological significance of overlapping lung cancer genes via STRING database. CRPMKB was established to provide researchers an online tool for the future personalized model application and developing. This study of CRPMKB suggests that developing more targeted models based on specific demographic characteristics and cancer types will further improve the accuracy of cancer risk model predictions. http://www.sysbio.org.cn/CRPMKB/. Supplementary data are available at Bioinformatics online.",2021-12-20 +34798807,Genetic analysis of pharmacogenomic VIP variants in the Wa population from Yunnan Province of China.,"

Background

The variation of drug responses and target does among individuals is mostly determined by genes. With the development of pharmacogenetics and pharmacogenomics, the differences in drug response between different races seem to be mainly caused by the genetic diversity of pharmacodynamics and pharmacokinetics genes. Very important pharmacogenetic (VIP) variants mean that genes or variants play important and vital roles in drug response, which have been listed in pharmacogenomics databases, such as Pharmacogenomics Knowledge Base (PharmGKB). The information of Chinese ethnic minorities such as the Wa ethnic group is scarce. This study aimed to uncover the significantly different loci in the Wa population in Yunnan Province of China from the perspective of pharmacogenomics, to provide a theoretical basis for the future medication guidance, and to ultimately achieve the best treatment in the future.

Results

In this study, we recruited 200 unrelated healthy Wa adults from the Yunnan province of China, selected 52 VIP variants from the PharmGKB for genotyping. We also compared the genotype frequency and allele distribution of VIP variants between Wa population and the other 26 populations from the 1000 Genomes Project ( http://www.1000Genomes.org/ ). Next, χ2 test was used to determine the significant points between these populations. The study results showed that compared with the other 26 population groups, five variants rs776746 (CYP3A5), rs4291 (ACE), rs3093105 (CYP4F2), rs1051298 (SLC19A1), and rs1065852 (CYP2D6) had higher frequencies in the Wa population. The genotype frequencies rs4291-TA, rs3093105-CA, rs1051298-AG and rs1065852-GA were higher than those of the other populations, and the allele distributions of rs4291-T and rs3093105-C were significantly different. Additionally, the difference between the Wa ethnic group and East Asian populations, such as CDX, CHB, and CHS, was the smallest.

Conclusions

Our research results show that there is a significant difference in the distribution of VIP variants between the Wa ethnic group and the other 26 populations. The study results will have an effect on supplementing the pharmacogenomics information for the Wa population and providing a theoretical basis for individualised medication for the Wa population.",2021-11-19 +34714516,A modular map of Bradykinin-mediated inflammatory signaling network.,"Bradykinin, a member of the kallikrein-kinin system (KKS), is associated with an inflammatory response pathway with diverse vascular permeability functions, including thrombosis and blood coagulation. In majority, bradykinin signals through Bradykinin Receptor B2 (B2R). B2R is a G protein-coupled receptor (GPCR) coupled to G protein family such as Gαqs, Gαq/Gα11,i1, and Gβ1γ2. B2R stimulation leads to the activation of a signaling cascade of downstream molecules such as phospholipases, protein kinase C, Ras/Raf-1/MAPK, and PI3K/AKT and secondary messengers such as inositol-1,4,5-trisphosphate, diacylglycerol and Ca2+ ions. These secondary messengers modulate the production of nitric oxide or prostaglandins. Bradykinin-mediated signaling is implicated in inflammation, chronic pain, vasculopathy, neuropathy, obesity, diabetes, and cancer. Despite the biomedical importance of bradykinin, a resource of bradykinin-mediated signaling pathway is currently not available. Here, we developed a pathway resource of signaling events mediated by bradykinin. By employing data mining strategies in the published literature, we describe an integrated pathway reaction map of bradykinin consisting of 233 reactions. Bradykinin signaling pathway events included 25 enzyme catalysis reactions, 12 translocations, 83 activation/inhibition reactions, 11 molecular associations, 45 protein expression and 57 gene regulation events. The pathway map is made publicly available on the WikiPathways Database with the ID URL: https://www.wikipathways.org/index.php/Pathway:WP5132 . The bradykinin-mediated signaling pathway map will facilitate the identification of novel candidates as therapeutic targets for diseases associated with dysregulated bradykinin signaling.",2021-10-29 +34585731,COVIDium: a COVID-19 resource compendium. ,"The severe acute respiratory syndrome coronavirus 2 that causes coronavirus disease 2019 (COVID-19) disrupted the normal functioning throughout the world since early 2020 and it continues to do so. Nonetheless, the global pandemic was taken up as a challenge by researchers across the globe to discover an effective cure, either in the form of a drug or vaccine. This resulted in an unprecedented surge of experimental and computational data and publications, which often translated their findings in the form of databases (DBs) and tools. Over 160 such DBs and more than 80 software tools were developed, which are uncharacterized, unannotated, deployed at different universal resource locators and are challenging to reach out through a normal web search. Besides, most of the DBs/tools are present on preprints and are either underutilized or unrecognized because of their inability to make it to top Google search hits. Henceforth, there was a need to crawl and characterize these DBs and create a compendium for easy referencing. The current article is one such concerted effort in this direction to create a COVID-19 resource compendium (COVIDium) that would facilitate the researchers to find suitable DBs and tools for their research studies. COVIDium tries to classify the DBs and tools into 11 broad categories for quick navigation. It also provides end-users some generic hit terms to filter the DB entries for quick access to the resources. Additionally, the DB provides Tracker Dashboard, Neuro Resources, references to COVID-19 datasets and protein-protein interactions. This compendium will be periodically updated to accommodate new resources. Database URL: The COVIDium is accessible through http://kraza.in/covidium/.",2021-09-29 +33882120,APICURON: a database to credit and acknowledge the work of biocurators. ,"APICURON is an open and freely accessible resource that tracks and credits the work of biocurators across multiple participating knowledgebases. Biocuration is essential to extract knowledge from research data and make it available in a structured and standardized way to the scientific community. However, processing biological data-mainly from literature-requires a huge effort that is difficult to attribute and quantify. APICURON collects biocuration events from third-party resources and aggregates this information, spotlighting biocurator contributions. APICURON promotes biocurator engagement implementing gamification concepts like badges, medals and leaderboards and at the same time provides a monitoring service for registered resources and for biocurators themselves. APICURON adopts a data model that is flexible enough to represent and track the majority of biocuration activities. Biocurators are identified through their Open Researcher and Contributor ID. The definition of curation events, scoring systems and rules for assigning badges and medals are resource-specific and easily customizable. Registered resources can transfer curation activities on the fly through a secure and robust Application Programming Interface (API). Here, we show how simple and effective it is to connect a resource to APICURON, describing the DisProt database of intrinsically disordered proteins as a use case. We believe APICURON will provide biological knowledgebases with a service to recognize and credit the effort of their biocurators, monitor their activity and promote curator engagement. Database URL: https://apicuron.org.",2021-04-01 +34508132,"Determination and benchmarking of 27Al(d,α) and 27Al(d,p) reaction cross sections for energies and angles relevant to NRA.","The cross-sections of deuteron-induced nuclear reactions suitable for ion beam analysis, measured in different laboratories, are often significantly different. In the present work, differential cross-sections of 27Al(d,p) and 27Al(d,α) reactions were measured, and the cross sections benchmarked with thick target spectra obtained from pure aluminium for the first time in two independent laboratories. The 27Al(d,p) and (d,α) differential cross-sections were measured between 1.4 and 2 MeV at scattering angles of 165°, 150°, and 135° in the VDGT laboratory in Tehran (Iran), and the same measurements for detector angle of 150° were repeated from scratch, including target making, with independent equipment on the SAFIR platform at INSP in Paris (France). The results of these two measurements at 150° are in good agreement, and for the first time a fitted function is proposed to describe the Al-cross sections for which no suitable theoretical expression exists. The obtained differential cross-sections were validated through benchmarking, by fitting with SIMNRA deuteron-induced particle spectra obtained from a high purity bulk Al target at both labs for deuteron incident energies between 1.6 and 2 MeV. The thick target spectra are well-reproduced. The evaluated and benchmarked cross sections have been uploaded to the ion beam analysis nuclear data library database (www-nds.iaea.org/ibandl/).",2021-09-10 +34826364,Illustrative Tutorials for ProThermDB: Thermodynamic Database for Proteins and Mutants.,"ProThermDB (https://web.iitm.ac.in/bioinfo2/prothermdb/index.html) is a primary resource for protein stability, which contains experimentally determined thermodynamic data for proteins and their mutants. The most recent version of ProThermDB accumulates the data obtained from both high- and low-throughput experimental biophysical methods. It includes comprehensive information at four different levels, i.e.: (i) protein sequence and structure; (ii) experimental conditions; (iii) thermodynamic parameters such as Gibbs free energy, melting temperature, enthalpy, etc.; and (iv) literature. In the following protocols, we present detailed tutorials for retrieving data using different search, display and sorting options, interpretation of search results, description of each entry-level information category, data upload and download, cross-links with other databases, and visualization options. This protocol consists of six pictorial exercises, which are useful for biologists/users to understand the contents and organization of data in ProThermDB. Further, potential applications of ProThermDB in protein engineering are discussed. © 2021 Wiley Periodicals LLC. Basic Protocol 1: Retrieval of experimental thermodynamic data for wild-type and mutants of a specific protein using a simple query Basic Protocol 2: Retrieval of stabilizing point mutations, which are located at the interior of α-helical regions, and obtaining data by thermal denaturation methods Basic Protocol 3: Retrieval of destabilizing point mutations, which are in β-sheets of exposed regions, and obtaining data by chemical denaturation methods (urea and GdnHCl) Basic Protocol 4: Retrieval of stabilizing and destabilizing point mutations in a range of physiological conditions (pH: 6-9 and T: 20°C-25°C) and publication years (2010-2020) Support Protocol: Downloading the entire data of the database for academic research purposes and submission of new data in ProThermDB.",2021-11-01 +34461244,HantavirusesDB: Vaccinomics and RNA-based therapeutics database for the potentially emerging human respiratory pandemic agents.,"Hantaviruses are etiological agents of several severe respiratory illnesses in humans and their human-to-human transmission has been reported. To cope with any potential pandemic, this group of viruses needs further research and a data platform. Therefore, herein we developed a database ""HantavirusesDB (HVdb)"", where genomics, proteomics, immune resource, RNAi based therapeutics and information on the 3D structures of druggable targets of the Orthohantaviruses are provided on a single platform. The database allows the researchers to effectively map the therapeutic strategies by designing multi-epitopes subunit vaccine and RNA based therapeutics. Moreover, the ease of the web interface allow the users to retrieve specific information from the database. Because of the high quality and excellent functionality of the HVdb, therapeutic research of Hantaviruses can be accelerated, and data analysis might be a foundation to design better treatment strategies targeting the hantaviruses. The database is accessible at http://hvdb.dqweilab-sjtu.com/index.php.",2021-08-28 +34663591,"SysInflam HuDB, a Web Resource for Mining Human Blood Cells Transcriptomic Data Associated with Systemic Inflammatory Responses to Sepsis.","Sepsis develops after a dysregulated host inflammatory response to a systemic infection. Identification of sepsis biomarkers has been challenging because of the multifactorial causes of disease susceptibility and progression. Public transcriptomic data are a valuable resource for mechanistic discoveries and cross-studies concordance of heterogeneous diseases. Nonetheless, the approach requires structured methodologies and effective visualization tools for meaningful data interpretation. Currently, no such database exists for sepsis or systemic inflammatory diseases in human. Hence we curated SysInflam HuDB (http://sepsis.gxbsidra.org/dm3/geneBrowser/list), a unique collection of human blood transcriptomic datasets associated with systemic inflammatory responses to sepsis. The transcriptome collection and the associated clinical metadata are integrated onto a user-friendly and Web-based interface that allows the simultaneous exploration, visualization, and interpretation of multiple datasets stemming from different study designs. To date, the collection encompasses 62 datasets and 5719 individual profiles. Concordance of gene expression changes with the associated literature was assessed, and additional analyses are presented to showcase database utility. Combined with custom data visualization at the group and individual levels, SysInflam HuDB facilitates the identification of specific human blood gene signatures in response to infection (e.g., patients with sepsis versus healthy control subjects) and the delineation of major genetic drivers associated with inflammation onset and progression under various conditions.",2021-11-01 +34100240,Providing a Second Opinion to Dr. Google with the WWW Framework.,"While clinicians are often aware that their patients seek second opinions, they are rarely taught specific skills for how to effectively communicate with patients when they are the ones providing that second opinion. The nuances of these skills are amplified when the second opinion being provided is to the ubiquitous (and often anonymous) Dr. Google. In this perspective, the authors share an approach for discussing a patient's pre-visit health-related internet findings. After emphasizing the importance of setting the stage, they describe the WWW Framework which proposes ""waiting"" before responding with data, getting to the ""what"" of the patient's search, and ""working together"" to negotiate a plan. This stepwise approach is designed to provide psychological safety, build a therapeutic alliance, and empower collaborative treatment planning.",2021-06-07 +33778125,Foot Metastasis: Review of 38 Cases.,"Acrometastases are rare and account for approximately 0.1% of metastases. The most common primary cancer site is the lung, followed by colorectal and genitourinary system. We searched PubMed (www. pubmed.com), Google scholar (www.scholar.google.com), Science Direct (http://www.sciencedirect.com), and Springer (http://link.springer.com) databases, using a combination of controlled vocabulary and text word terms and reviewed the last 10 years literature in order to describe demographic trends, anatomical distribution, the most common primary sources of malignancy, and survival rates in the reports of foot metastases. In conclusion 38 cases were included in this review analysis. Lung and genitourinary system were the most frequent primary sites. Forefoot was involved in 71% of all metastases to foot either alone or in combination with other areas of the foot. Calcaneus was involved in about 23% of patients either alone or in combination with other foot bones.",2021-01-01 +34844637,recount3: summaries and queries for large-scale RNA-seq expression and splicing.,"We present recount3, a resource consisting of over 750,000 publicly available human and mouse RNA sequencing (RNA-seq) samples uniformly processed by our new Monorail analysis pipeline. To facilitate access to the data, we provide the recount3 and snapcount R/Bioconductor packages as well as complementary web resources. Using these tools, data can be downloaded as study-level summaries or queried for specific exon-exon junctions, genes, samples, or other features. Monorail can be used to process local and/or private data, allowing results to be directly compared to any study in recount3. Taken together, our tools help biologists maximize the utility of publicly available RNA-seq data, especially to improve their understanding of newly collected data. recount3 is available from http://rna.recount.bio .",2021-11-29 +34730175,JAMIR-eQTL: Japanese genome-wide identification of microRNA expression quantitative trait loci across dementia types. ,"MicroRNAs (miRNAs) are small non-coding RNAs shown to regulate gene expression by binding to complementary transcripts. Genetic variants, including single-nucleotide polymorphisms and short insertions/deletions, contribute to traits and diseases by influencing miRNA expression. However, the association between genetic variation and miRNA expression remains to be elucidated. Here, by using genotype data and miRNA expression data from 3448 Japanese serum samples, we developed a computational pipeline to systematically identify genome-wide miRNA expression quantitative trait loci (miR-eQTLs). Not only did we identify a total of 2487 cis-miR-eQTLs and 3 155 773 trans-miR-eQTLs at a false discovery rate of <0.05 in six dementia types (Alzheimer's disease, dementia with Lewy bodies, vascular dementia, frontotemporal lobar degeneration, normal-pressure hydrocephalus and mild cognitive impairment) and all samples, including those from patients with other types of dementia, but also we examined the commonality and specificity of miR-eQTLs among dementia types. To enable data searching and downloading of these cis- and trans-eQTLs, we developed a user-friendly database named JAMIR-eQTL, publicly available at https://www.jamir-eqtl.org/. This is the first miR-eQTL database designed for dementia types. Our integrative and comprehensive resource will contribute to understanding the genetic basis of miRNA expression as well as to the discovery of deleterious mutations, particularly in dementia studies. Database URL: https://www.jamir-eqtl.org/.",2021-11-01 +36100329,NDDRF: A risk factor knowledgebase for personalized prevention of neurodegenerative diseases.,"

Introduction

Neurodegenerative diseases (NDDs) are a series of chronic diseases, which are associated with progressive loss of neuronal structure or function. The complex etiologies of the NDDs remain unclear, thus the prevention and early diagnosis of NDDs are critical to reducing the mortality and morbidity of these diseases.

Objectives

To provide a systematic understanding of the heterogeneity of the risk factors associated with different NDDs (pan-neurodegenerative diseases or pan-NDDs), the knowledgebase is established to facilitate the personalized and knowledge-guided diagnosis, prevention and prediction of NDDs.

Methods

Before data collection, the medical, lifescienceand informatics experts as well as the potential users of the database were consulted and discussed for the scope of data and the classification of risk factors. The PubMed database was used as the resource of the data and knowledge extraction. Risk factors of NDDs were manually collected from literature published between 1975 and 2020.

Results

The comprehensive risk factors database for NDDs (NDDRF) was established including 998 single or combined risk factors, 2293 records and 1071 articles relevant to the 14 most common NDDs. The single risk factors are classified into 3 categories, i.e. epidemiological factors (469), genetic factors (324) and biochemical factors (153). Among all the factors, 179 factors are positive and protective, while 880 factors have negative influence for NDDs. The knowledgebase is available at http://sysbio.org.cn/NDDRF/.

Conclusion

NDDRF provides the structured information and knowledge resource on risk factors of NDDs. It could benefit the future systematic and personalized investigation of pan-NDDs genesis and progression. Meanwhile it may be used for the future explainable artificial intelligence modeling for smart diagnosis and prevention of NDDs.",2021-06-20 +33906563,M6ADD: a comprehensive database of m6A modifications in diseases.,"N6-methyladenosine (m6A) modification is an important regulatory factor affecting diseases, including multiple cancers and it is a developing direction for targeted disease therapy. Here, we present the M6ADD (m6A-diseases database) database, a public data resource containing manually curated data on potential m6A-disease associations for which some experimental evidence is available; the related high-throughput sequencing data are also provided and analysed by using different computational methods. To give researchers a tool to query the m6A modification data, the M6ADD was designed as a web-based comprehensive resource focusing on the collection, storage and online analysis of m6A modifications, aimed at exploring the associations between m6A modification and gene disorders and diseases. The M6ADD includes 222 experimentally confirmed m6A-disease associations, involving 59 diseases from a review of more than 2000 published papers. The M6ADD also includes 409,229 m6A-disease associations obtained by computational and statistical methods from 30 high-throughput sequencing datasets. In addition, we provide data on 5239 potential m6A regulatory proteins related to 24 cancers based on network analysis prediction methods. In addition, we have developed a tool to explore the function of m6A-modified genes through the protein-protein interaction networks. The M6ADD can be accessed at http://m6add.edbc.org/.",2021-04-27 +33095594,Observation of the Production of Three Massive Gauge Bosons at sqrt[s]=13  TeV.,"The first observation is reported of the combined production of three massive gauge bosons (VVV with V=W, Z) in proton-proton collisions at a center-of-mass energy of 13 TeV. The analysis is based on a data sample recorded by the CMS experiment at the CERN LHC corresponding to an integrated luminosity of 137  fb^{-1}. The searches for individual WWW, WWZ, WZZ, and ZZZ production are performed in final states with three, four, five, and six leptons (electrons or muons), or with two same-sign leptons plus one or two jets. The observed (expected) significance of the combined VVV production signal is 5.7 (5.9) standard deviations and the corresponding measured cross section relative to the standard model prediction is 1.02_{-0.23}^{+0.26}. The significances of the individual WWW and WWZ production are 3.3 and 3.4 standard deviations, respectively. Measured production cross sections for the individual triboson processes are also reported.",2020-10-01 +34656056,SuperTCM: A biocultural database combining biological pathways and historical linguistic data of Chinese Materia Medica for drug development.,"

Aim of the study

Botanicals used in Traditional Chinese Medicine (TCM) are a rich source for drug discovery and provide models for multi-component drug development. To facilitate the studies of the actions of TCM drugs and expand their applications, a comprehensive database is urgently required.

Methods

One online resource connects all the relevant data from multiple scientific sources and languages. Drug information from published TCM databases and the official Chinese Pharmacopoeia as well as specialized meta-websites such as Kew's Medicinal Plant Names Service was integrated on a higher level.

Results

Our database, SuperTCM, covers the aspects of TCM derived from medicinal plants, encompassing pharmacological recipes up to chemical compounds. It provides the information for 6516 TCM drugs (or ""herbs"") with 5372 botanical species, 55,772 active ingredients against 543 targets in 254 KEGG pathways associated with 8634 diseases. SuperTCM is freely available at http://tcm.charite.de/supertcm.",2021-10-15 +34366563,Modernization of the National Institutes of Health Dietary Supplement Label Database.,"Launched in 2008, NIH's DSLD (https://dsld.nlm.nih.gov/dsld/) currently catalogs information printed on over 125,000 (historical and current) labels of dietary supplement products sold in the U.S.. The database is maintained and updated continuously, and new versions deployed regularly. The new home page includes a prominent search bar and counter that displays the number of searchable labels in the database. The redesigned website yields near-instantaneous label retrieval, a more attractive layout of information, tailored search filters and download options, and the ability to view data in pictorial formats resulting in a much-improved user experience. The modernization of the DSLD ensures that this NIH resource has new forms of data delivery to meet the needs of App developers and data scientists, and improved performance for users. The DSLD is updated frequently to reflect the products sold in the rapidly evolving U.S. dietary supplement market.",2021-06-25 +35585947,GraphQL for the delivery of bioinformatics web APIs and application to ZincBind.,"

Motivation

Many bioinformatics resources are provided as 'web services', with large databases and analysis software stored on a central server, and clients interacting with them using the hypertext transport protocol (HTTP). While some provide only a visual HTML interface, requiring a web browser to use them, many provide programmatic access using a web application programming interface (API) which returns XML, JSON or plain text that computer programs can interpret more easily. This allows access to be automated. Initially, many bioinformatics APIs used the 'simple object access protocol' (SOAP) and, more recently, representational state transfer (REST).

Results

GraphQL is a novel, increasingly prevalent alternative to REST and SOAP that represents the available data in the form of a graph to which any conceivable query can be submitted, and which is seeing increasing adoption in industry. Here, we review the principles of GraphQL, outline its particular suitability to the delivery of bioinformatics resources and describe its implementation in our ZincBind resource.

Availability and implementation

https://api.zincbind.net.

Supplementary information

Supplementary data are available at Bioinformatics Advances online.",2021-09-29 +34958914,Chemical-damage MINE: A database of curated and predicted spontaneous metabolic reactions.,"Spontaneous reactions between metabolites are often neglected in favor of emphasizing enzyme-catalyzed chemistry because spontaneous reaction rates are assumed to be insignificant under physiological conditions. However, synthetic biology and engineering efforts can raise natural metabolites' levels or introduce unnatural ones, so that previously innocuous or nonexistent spontaneous reactions become an issue. Problems arise when spontaneous reaction rates exceed the capacity of a platform organism to dispose of toxic or chemically active reaction products. While various reliable sources list competing or toxic enzymatic pathways' side-reactions, no corresponding compilation of spontaneous side-reactions exists, nor is it possible to predict their occurrence. We addressed this deficiency by creating the Chemical Damage (CD)-MINE resource. First, we used literature data to construct a comprehensive database of metabolite reactions that occur spontaneously in physiological conditions. We then leveraged this data to construct 148 reaction rules describing the known spontaneous chemistry in a substrate-generic way. We applied these rules to all compounds in the ModelSEED database, predicting 180,891 spontaneous reactions. The resulting (CD)-MINE is available at https://minedatabase.mcs.anl.gov/cdmine/#/home and through developer tools. We also demonstrate how damage-prone intermediates and end products are widely distributed among metabolic pathways, and how predicting spontaneous chemical damage helps rationalize toxicity and carbon loss using examples from published pathways to commercial products. We explain how analyzing damage-prone areas in metabolism helps design effective engineering strategies. Finally, we use the CD-MINE toolset to predict the formation of the novel damage product N-carbamoyl proline, and present mass spectrometric evidence for its presence in Escherichia coli.",2021-12-25 +34927097,Preparation of mouse pancreatic tumor for single-cell RNA sequencing and analysis of the data.,"Preparation of single-cell suspension from primary tumor tissue can provide a valuable resource for functional, genetic, proteomic, and tumor microenvironment studies. Here, we describe an effective protocol for mouse pancreatic tumor dissociation with further processing of tumor suspension for single-cell RNA sequencing analysis of cellular populations. We further provide an outline of the bioinformatics processing of the data and clustering of heterogeneous cellular populations comprising pancreatic tumors using Common Workflow Language (CWL) pipelines within user-friendly Scientific Data Analysis Platform (https://SciDAP.com). For complete details on the use and execution of this protocol, please refer to Gabitova-Cornell et al. (2020).",2021-12-04 +34645978,A highly annotated database of genes associated with platinum resistance in cancer.,"Platinum-based chemotherapy, including cisplatin, carboplatin, and oxaliplatin, is prescribed to 10-20% of all cancer patients. Unfortunately, platinum resistance develops in a significant number of patients and is a determinant of clinical outcome. Extensive research has been conducted to understand and overcome platinum resistance, and mechanisms of resistance can be categorized into several broad biological processes, including (1) regulation of drug entry, exit, accumulation, sequestration, and detoxification, (2) enhanced repair and tolerance of platinum-induced DNA damage, (3) alterations in cell survival pathways, (4) alterations in pleiotropic processes and pathways, and (5) changes in the tumor microenvironment. As a resource to the cancer research community, we provide a comprehensive overview accompanied by a manually curated database of the >900 genes/proteins that have been associated with platinum resistance over the last 30 years of literature. The database is annotated with possible pathways through which the curated genes are related to platinum resistance, types of evidence, and hyperlinks to literature sources. The searchable, downloadable database is available online at http://ptrc-ddr.cptac-data-view.org .",2021-10-13 +34514416,SCISSOR™: a single-cell inferred site-specific omics resource for tumor microenvironment association study.,"Tumor tissues are heterogeneous with different cell types in tumor microenvironment, which play an important role in tumorigenesis and tumor progression. Several computational algorithms and tools have been developed to infer the cell composition from bulk transcriptome profiles. However, they ignore the tissue specificity and thus a new resource for tissue-specific cell transcriptomic reference is needed for inferring cell composition in tumor microenvironment and exploring their association with clinical outcomes and tumor omics. In this study, we developed SCISSOR™ (https://thecailab.com/scissor/), an online open resource to fulfill that demand by integrating five orthogonal omics data of >6031 large-scale bulk samples, patient clinical outcomes and 451 917 high-granularity tissue-specific single-cell transcriptomic profiles of 16 cancer types. SCISSOR™ provides five major analysis modules that enable flexible modeling with adjustable parameters and dynamic visualization approaches. SCISSOR™ is valuable as a new resource for promoting tumor heterogeneity and tumor-tumor microenvironment cell interaction research, by delineating cells in the tissue-specific tumor microenvironment and characterizing their associations with tumor omics and clinical outcomes.",2021-09-09 +34504668,Protein-gene Expression Nexus: Comprehensive characterization of human cancer cell lines with proteogenomic analysis.,"Researchers have gained new therapeutic insights using multi-omics platform approaches to study DNA, RNA, and proteins of comprehensively characterized human cancer cell lines. To improve our understanding of the molecular features associated with oncogenic modulation in cancer, we proposed a proteogenomic database for human cancer cell lines, called Protein-gene Expression Nexus (PEN). We have expanded the characterization of cancer cell lines to include genetic, mRNA, and protein data of 145 cancer cell lines from various public studies. PEN contains proteomic and phosphoproteomic data on 4,129,728 peptides, 13,862 proteins, 7,138 phosphorylation site-associated genomic variations, 117 studies, and 12 cancer. We analyzed functional characterizations along with the integrated datasets, such as cis/trans association for copy number alteration (CNA), single amino acid variation for coding genes, post-translation modification site variation for Single Amino Acid Variation, and novel peptide expression for noncoding regions and fusion genes. PEN provides a user-friendly interface for searching, browsing, and downloading data and also supports the visualization of genome-wide association between CNA and expression, novel peptide landscape, mRNA-protein abundance, and functional annotation. Together, this dataset and PEN data portal provide a resource to accelerate cancer research using model cancer cell lines. PEN is freely accessible at http://combio.snu.ac.kr/pen.",2021-08-17 +34561023,[Interest of the bc-GenExMiner web tool in oncology].,"We are taking advantage of the launch of the latest version (v4.6) of our web-based data mining tool ""breast cancer gene-expression miner"" (bc-GenExMiner) to take stock of its position within the oncology research landscape and to present an activity report ten years after its establishment (http://bcgenex.ico.unicancer.fr). bc-GenExMiner is an open-access, user-friendly tool for statistical mining on breast tumor transcriptomes, annotated with more than 20 clinicopathologic and molecular characteristics. The database comprises more than 16,000 patients from 64 cohorts - including TCGA, METABRIC and SCAN-B - for whom several thousands of genes have been quantified by microarrays or RNA-seq. Correlation, expression and prognostic analyses are available for targeted, exhaustive or customized explorations of queried genes. bc-GenExMiner facilitates the validation, investigation, and prioritization of discoveries and hypotheses on genes of interest. It allows users to analyse large databases, create data visualizations, and obtain robust statistical analysis, thereby accelerating biomarker discovery. Ten years after its launch, judging by the number of visits, analyses, and scientific citations of bc-GenExMiner, we conclude that this web resource serves its purpose in the international scientific community working in breast cancer research, with a never-ending rise in its use.",2021-09-21 +34718414,AOP-helpFinder webserver: a tool for comprehensive analysis of the literature to support adverse outcome pathways development. ,"Adverse Outcome Pathways (AOPs) are a conceptual framework developed to support the use of alternative toxicology approaches in the risk assessment. AOPs are structured linear organizations of existing knowledge illustrating causal pathways from the initial molecular perturbation triggered by various stressors, through key events (KEs) at different levels of biology, to the ultimate health or ecotoxicological adverse outcome. Artificial intelligence can be used to systematically explore available toxicological data that can be parsed in the scientific literature. Recently a tool called AOP-helpFinder was developed to identify associations between stressors and KEs supporting thus documentation of AOPs. To facilitate the utilization of this advanced bioinformatics tool by the scientific and the regulatory community, a webserver was created. The proposed AOP-helpFinder webserver uses better performing version of the tool which reduces the need for manual curation of the obtained results. As an example, the server was successfully applied to explore relationships of a set of endocrine disruptors with metabolic-related events. The AOP-helpFinder webserver assists in a rapid evaluation of existing knowledge stored in the PubMed database, a global resource of scientific information, to build AOPs and Adverse Outcome Networks (AONs) supporting the chemical risk assessment. AOP-helpFinder is available at http://aop-helpfinder.u-paris-sciences.fr/index.php.",2021-10-30 +32496513,gutMEGA: a database of the human gut MEtaGenome Atlas. ,"The gut microbiota plays important roles in human health through regulating both physiological homeostasis and disease emergence. The accumulation of metagenomic sequencing studies enables us to better understand the temporal and spatial variations of the gut microbiota under different physiological and pathological conditions. However, it is inconvenient for scientists to query and retrieve published data; thus, a comprehensive resource for the quantitative gut metagenome is urgently needed. In this study, we developed gut MEtaGenome Atlas (gutMEGA), a well-annotated comprehensive database, to curate and host published quantitative gut microbiota datasets from Homo sapiens. By carefully curating the gut microbiota composition, phenotypes and experimental information, gutMEGA finally integrated 59 132 quantification events for 6457 taxa at seven different levels (kingdom, phylum, class, order, family, genus and species) under 776 conditions. Moreover, with various browsing and search functions, gutMEGA provides a fast and simple way for users to obtain the relative abundances of intestinal microbes among phenotypes. Overall, gutMEGA is a convenient and comprehensive resource for gut metagenome research, which can be freely accessed at http://gutmega.omicsbio.info.",2021-05-01 +33401309,ADeditome provides the genomic landscape of A-to-I RNA editing in Alzheimer's disease. ,"A-to-I RNA editing, contributing to nearly 90% of all editing events in human, has been reported to involve in the pathogenesis of Alzheimer's disease (AD) due to its roles in brain development and immune regulation, such as the deficient editing of GluA2 Q/R related to cell death and memory loss. Currently, there are urgent needs for the systematic annotations of A-to-I RNA editing events in AD. Here, we built ADeditome, the annotation database of A-to-I RNA editing in AD available at https://ccsm.uth.edu/ADeditome, aiming to provide a resource and reference for functional annotation of A-to-I RNA editing in AD to identify therapeutically targetable genes in an individual. We detected 1676 363 editing sites in 1524 samples across nine brain regions from ROSMAP, MayoRNAseq and MSBB. For these editing events, we performed multiple functional annotations including identification of specific and disease stage associated editing events and the influence of editing events on gene expression, protein recoding, alternative splicing and miRNA regulation for all the genes, especially for AD-related genes in order to explore the pathology of AD. Combing all the analysis results, we found 108 010 and 26 168 editing events which may promote or inhibit AD progression, respectively. We also found 5582 brain region-specific editing events with potentially dual roles in AD across different brain regions. ADeditome will be a unique resource for AD and drug research communities to identify therapeutically targetable editing events. Significance: ADeditome is the first comprehensive resource of the functional genomics of individual A-to-I RNA editing events in AD, which will be useful for many researchers in the fields of AD pathology, precision medicine, and therapeutic researches.",2021-09-01 +34931882,"CoxBase: an Online Platform for Epidemiological Surveillance, Visualization, Analysis, and Typing of Coxiella burnetii Genomic Sequences.","Q (query) fever is an infectious zoonotic disease caused by the Gram-negative bacterium Coxiella burnetii. Although the disease has been studied for decades, it still represents a threat due to sporadic outbreaks across farms in Europe. The absence of a central platform for Coxiella typing data management is an important epidemiological gap that is relevant in the case of an outbreak. To fill this gap, we have designed and implemented an online, open-source, web-based platform called CoxBase (https://coxbase.q-gaps.de). This platform includes a database that holds genotyping information on more than 400 Coxiella isolates alongside metadata that annotate them. We have also implemented features for in silico genotyping of completely or minimally assembled Coxiella sequences using five different typing methods, querying of existing isolates, visualization of isolate geodata via aggregation on a world map, and submission of new isolates. We tested our in silico typing method on 50 Coxiella genomes downloaded from the RefSeq database, and we successfully genotyped all genomes except for cases where the sequence quality was poor. We identified new spacer sequences using our implementation of the multispacer sequence typing (MST) in silico typing method and established adaA gene phenotypes for all 50 genomes as well as their plasmid types. IMPORTANCE Q fever is a zoonotic disease that is a source of active epidemiological concern due to its persistent threat to public health. In this project, we have identified areas in the field of Coxiella research, especially regarding public health and genomic analysis, where there is an inadequacy of resources to monitor, organize, and analyze genomic data from C. burnetii. Subsequently, we have created an open, web-based platform that contains epidemiological information, genome typing functions comprising all the available Coxiella typing methods, and tools for isolate data discovery and visualization that could help address the above-mentioned challenges. This is the first platform to combine all disparate genotyping systems for Coxiella burnetii as well as metadata assets with tools for genomic comparison and analyses. This platform is a valuable resource for laboratory researchers as well as research epidemiologists interested in investigating the relatedness or dissimilarity among C. burnetii strains.",2021-12-21 +,Drought responsiveness in black pepper (Piper nigrum L.): Genes associated and development of a web‐genomic resource,"Black pepper (Piper nigrum L.; 2n = 52; Piperaceae), the king of spices, is a perennial, trailing woody flowering vine and has global importance with widespread dietary, medicinal, and preservative uses. It is an economically important germplasm cultivated for its fruit and the major cash crop in >30 tropical countries. Crop production is mainly affected by drought stress. The present study deals with the candidate gene identification from drought‐affected black pepper leaf transcriptome generated by Illumina Hiseq2000. It also aims to mine putative molecular markers (namely SSRs, SNPs, and InDels) and generate primers for them. The identification of transcription factors and pathways involved in drought tolerance is also reported here. De novo transcriptome assembly was performed with trinity assembler. In total, 4914 differential expressed genes, 2110 transcriptional factors, 786 domains and 1137 families, 20,124 putative SSR markers, and 259,236 variants were identified. At2g30105 (unidentified gene containing leucine‐rich repeats and ubiquitin‐like domain), serine threonine protein kinase, Mitogen‐activated protein kinase, Nucleotide Binding Site‐Leucine Rich Repeat, Myeloblastosis‐related proteins, basic helix–loop–helix are all found upregulated and are reported to be associated with plant tolerance against drought condition. All these information are catalogued in the Black Pepper Drought Transcriptome Database (BPDRTDb), freely accessible for academic use at http://webtom.cabgrid.res.in/bpdrtdb/. This database is a good foundation for the genetic improvement of pepper plants, breeding programmes, and mapping population of this crop. Putative markers can also be a reliable genomic resource to develop drought‐tolerant variety for better black pepper productivity.",2021-06-01 +34113986,Tfcancer: a manually curated database of transcription factors associated with human cancers. ,"Transcription factors (TFs) are critical regulation elements and its dysregulation can lead to a variety of cancers. However, currently, there are no such online resources for large-scale collection, storage and analysis of TF-cancer associations in those cancers. To fill this gap, we present a database called TFcancer (http://lcbb.swjtu.edu.cn/tfcancer/), which contains 3136 experimentally supported associations between 364 TFs and 33 TCGA cancers by manually curating more than 1800 literature. TFcancer mainly concentrates on four aspects: TF expression, molecular alteration, regulatory relationships between TFs and target genes, and biological processes and signaling pathways of TFs in cancers. TFcancer not only provides a user-friendly interface for browsing and searching but also allows flexible data downloading and user data submitting. It is believed that TFcancer is a helpful and valuable resource for researchers who seek to understand the functions and molecular mechanisms of TFs involved in human cancers. The TFcancer are freely available at http://lcbb.swjtu.edu.cn/tfcancer/. Supplementary data are available at Bioinformatics online.",2021-05-26 +34037703,TFcancer: a manually curated database of transcription factors associated with human cancer. ,"Transcription factors (TFs) are critical regulation elements and its dysregulation can lead to a variety of cancers. However, currently, there are no such online resources for large-scale collection, storage and analysis of TF-cancer associations in those cancers. To fill this gap, we present a database called TFcancer (http://lcbb.swjtu.edu.cn/tfcancer/), which contains 3,136 experimentally supported associations between 364 TFs and 33 TCGA cancers by manually curating more than 1,800 literature. TFcancer mainly concentrates on four aspects: TF expression, molecular alteration, regulatory relationships between TFs and target genes, and biological processes and signaling pathways of TFs in cancers. TFcancer not only provides a user-friendly interface for browsing and searching but also allows flexible data downloading and user data submitting. It is believed that TFcancer is a helpful and valuable resource for researchers who seek to understand the functions and molecular mechanisms of TFs involved in human cancers. The TFcancer are freely available at http://lcbb.swjtu.edu.cn/tfcancer/. Supplementary data are available at Bioinformatics online.",2021-05-26 +34048547,emiRIT: a text-mining-based resource for microRNA information. ,"microRNAs (miRNAs) are essential gene regulators, and their dysregulation often leads to diseases. Easy access to miRNA information is crucial for interpreting generated experimental data, connecting facts across publications and developing new hypotheses built on previous knowledge. Here, we present extracting miRNA Information from Text (emiRIT), a text-miningbased resource, which presents miRNA information mined from the literature through a user-friendly interface. We collected 149 ,233 miRNA -PubMed ID pairs from Medline between January 1997 and May 2020. emiRIT currently contains 'miRNA -gene regulation' (69 ,152 relations), 'miRNA disease (cancer)' (12 ,300 relations), 'miRNA -biological process and pathways' (23, 390 relations) and circulatory 'miRNAs in extracellular locations' (3782 relations). Biological entities and their relation to miRNAs were extracted from Medline abstracts using publicly available and in-house developed text-mining tools, and the entities were normalized to facilitate querying and integration. We built a database and an interface to store and access the integrated data, respectively. We provide an up-to-date and user-friendly resource to facilitate access to comprehensive miRNA information from the literature on a large scale, enabling users to navigate through different roles of miRNA and examine them in a context specific to their information needs. To assess our resource's information coverage, we have conducted two case studies focusing on the target and differential expression information of miRNAs in the context of cancer and a third case study to assess the usage of emiRIT in the curation of miRNA information. Database URL: https://research.bioinformatics.udel.edu/emirit/.",2021-05-01 +34174131,CanVaS: Documenting the genetic variation spectrum of Greek cancer patients.,"National genetic variation registries vastly increase the level of detail for the relevant population, while directly affecting patient management. Herein, we report CanVaS, a Cancer Variation reSource aiming to document the genetic variation of cancer patients in Greece. CanVaS comprises germline genetic data from 7,363 Greek individuals with a personal and/or family history of malignancy. The data set incorporates approximately 24,000 functionally annotated rare variants in 97 established or suspected cancer susceptibility genes. For each variant, allele frequency for the Greek population, interpretation for clinical significance, anonymized family and segregation information, as well as phenotypic traits of the carriers, are included. Moreover, information on the geographic distribution of the variants across the country is provided, enabling the study of Greek population isolates. Direct comparisons between Greek (sub)populations with relevant genetic resources are supported, allowing fine-grain localized adjustment of guidelines and clinical decision-making. Most importantly, anonymized data are available for download, while the Leiden Open Variation Database schema is adopted, enabling integration/interconnection with central resources. CanVaS could become a stepping-stone for a countrywide effort to characterize the cancer genetic variation landscape, concurrently supporting national and international cancer research. The database can be accessed at: http://ithaka.rrp.demokritos.gr/CanVaS.",2021-07-06 +33984507,HisPhosSite: A comprehensive database of histidine phosphorylated proteins and sites.,"Histidine phosphorylation is critically important in a variety of cellular processes including signal transduction, cell cycle, proliferation, differentiation, and apoptosis. It is estimated to account for 6% of all phosphorylated amino acids. However, due to the acid lability of the PN bond, the study of pHis lags far behind that of pSer, pThr, and pTyr. Recently, the development and use of pHis-specific antibodies and methodologies have led to a resurgence in the study of histidine phosphorylation. Although a considerable number of pHis proteins and sites have been discovered, most of them have not been manually curated and integrated to any databases. There is a lack of a data repository for pHis, and such work is expected to help further systemic studies of pHis. Thus, we present a comprehensive resource database of histidine phosphorylation (HisPhosSite) by curating experimentally validated pHis proteins and sites and compiling putative pHis sites with ortholog search. HisPhosSite contains 776 verified pHis sites and 2702 verified pHis proteins in 38 eukaryotic and prokaryotic species and 15,378 putative pHis sites and 10,816 putative pHis proteins in 1366 species. HisPhosSite provides rich annotations of pHis sites and proteins and multiple search engines (including motif search and BLAST search) for users to locate pHis sites of interest. HisPhosSite is available at http://reprod.njmu.edu.cn/hisphossite. SIGNIFICANCE: Histidine phosphorylation is involved in a variety of cellular processes as well as cancers, and it has been proved to be more common than previously thought. The HisPhosSite database was developed to collect pHis data from published literatures with experimental evidences. Unification of the identified pHis proteins and sites will give researchers an informative resource for histidine phosphorylation. HisPhosSite has a user-friendly interface with multiple search engines for users to locate pHis sites of interest. In addition, the database provides rich structural and functional annotations. HisPhosSite will help future studies and elucidation of the functions of histidine phosphorylation.",2021-05-10 +33704069,"The Global Landscape of SARS-CoV-2 Genomes, Variants, and Haplotypes in 2019nCoVR.","On January 22, 2020, China National Center for Bioinformation (CNCB) released the 2019 Novel Coronavirus Resource (2019nCoVR), an open-access information resource for the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). 2019nCoVR features a comprehensive integration of sequence and clinical information for all publicly available SARS-CoV-2 isolates, which are manually curated with value-added annotations and quality evaluated by an automated in-house pipeline. Of particular note, 2019nCoVR offers systematic analyses to generate a dynamic landscape of SARS-CoV-2 genomic variations at a global scale. It provides all identified variants and their detailed statistics for each virus isolate, and congregates the quality score, functional annotation, and population frequency for each variant. Spatiotemporal change for each variant can be visualized and historical viral haplotype network maps for the course of the outbreak are also generated based on all complete and high-quality genomes available. Moreover, 2019nCoVR provides a full collection of SARS-CoV-2 relevant literature on the coronavirus disease 2019 (COVID-19), including published papers from PubMed as well as preprints from services such as bioRxiv and medRxiv through Europe PMC. Furthermore, by linking with relevant databases in CNCB, 2019nCoVR offers data submission services for raw sequence reads and assembled genomes, and data sharing with NCBI. Collectively, SARS-CoV-2 is updated daily to collect the latest information on genome sequences, variants, haplotypes, and literature for a timely reflection, making 2019nCoVR a valuable resource for the global research community. 2019nCoVR is accessible at https://bigd.big.ac.cn/ncov/.",2020-12-28 +34456903,ImmuCellDB: An Indicative Database of Immune Cell Composition From Different Tissues and Disease Conditions in Mouse and Human.,"Immune cell composition is highly divergent across different tissues and diseases. A comprehensive resource of tissue immune cells across different conditions in mouse and human will thus provide great understanding of the immune microenvironment of many diseases. Recently, computational methods for estimating immune cell abundance from tissue transcriptome data have been developed and are now widely used. Using these computational tools, large-scale estimation of immune cell composition across tissues and conditions should be possible using gene expression data collected from public databases. In total, 266 tissue types and 706 disease types in humans, as well as 143 tissue types and 61 disease types, and 206 genotypes in mouse had been included in a database we have named ImmuCellDB (http://wap-lab.org:3200/ImmuCellDB/). In ImmuCellDB, users can search and browse immune cell proportions based on tissues, disease or genotype in mouse or humans. Additionally, the variation and correlation of immune cell abundance and gene expression level between different conditions can be compared and viewed in this database. We believe that ImmuCellDB provides not only an indicative view of tissue-dependent or disease-dependent immune cell profiles, but also represents an easy way to pre-determine immune cell abundance and gene expression profiles for specific situations.",2021-08-12 +33766657,An early-morning gene network controlled by phytochromes and cryptochromes regulates photomorphogenesis pathways in Arabidopsis.,"Light perception at dawn plays a key role in coordinating multiple molecular processes and in entraining the plant circadian clock. The Arabidopsis mutant lacking the main photoreceptors, however, still shows clock entrainment, indicating that the integration of light into the morning transcriptome is not well understood. In this study, we performed a high-resolution RNA-sequencing time-series experiment, sampling every 2 min beginning at dawn. In parallel experiments, we perturbed temperature, the circadian clock, photoreceptor signaling, and chloroplast-derived light signaling. We used these data to infer a gene network that describes the gene expression dynamics after light stimulus in the morning, and then validated key edges. By sampling time points at high density, we are able to identify three light- and temperature-sensitive bursts of transcription factor activity, one of which lasts for only about 8 min. Phytochrome and cryptochrome mutants cause a delay in the transcriptional bursts at dawn, and completely remove a burst of expression in key photomorphogenesis genes (HY5 and BBX family). Our complete network is available online (http://www-users.york.ac.uk/∼de656/dawnBurst/dawnBurst.html). Taken together, our results show that phytochrome and cryptochrome signaling is required for fine-tuning the dawn transcriptional response to light, but separate pathways can robustly activate much of the program in their absence.",2021-03-23 +34842310,The perennial fruit tree proteogenomics atlas: a spatial map of the sweet cherry proteome and transcriptome.,"Genome-wide transcriptome analysis provides systems-level insights into plant biology. Due to the limited depth of quantitative proteomics our understanding of gene-protein-complex stoichiometry is largely unknown in plants. Recently, the complexity of the proteome and its cell-/tissue-specific distribution have boosted the research community to the integration of transcriptomics and proteomics landscapes in a proteogenomic approach. Herein, we generated a quantitative proteome and transcriptome abundance atlas of 15 major sweet cherry (Prunus avium L., cv 'Tragana Edessis') tissues represented by 29 247 genes and 7584 proteins. Additionally, 199 984 alternative splicing events, particularly exon skipping and alternative 3' splicing, were identified in 23 383 transcribed regions of the analyzed tissues. Common signatures as well as differences between mRNA and protein quantities, including genes encoding transcription factors and allergens, within and across the different tissues are reported. Using our integrated dataset, we identified key putative regulators of fruit development, notably genes involved in the biosynthesis of anthocyanins and flavonoids. We also provide proteogenomic-based evidence for the involvement of ethylene signaling and pectin degradation in cherry fruit ripening. Moreover, clusters of genes and proteins with similar and different expression and suppression trends across diverse tissues and developmental stages revealed a relatively low RNA abundance-to-protein correlation. The present proteogenomic analysis allows us to identify 17 novel sweet cherry proteins without prior protein-level annotation evidenced in the currently available databases. To facilitate use by the community, we also developed the Sweet Cherry Atlas Database (https://grcherrydb.com/) for viewing and data mining these resources. This work provides new insights into the proteogenomics workflow in plants and a rich knowledge resource for future investigation of gene and protein functions in Prunus species.",2021-12-16 +34345532,"CanImmunother: a manually curated database for identification of cancer immunotherapies associating with biomarkers, targets, and clinical effects.","As immunotherapy is evolving into an essential armamentarium against cancers, numerous translational studies associated with relevant biomarkers, targets, and clinical effects have been reported in recent years. However, a large amount of associated experimental data remains unexplored due to the difficulty in accessibility and utilization. Here, we established a comprehensive high-quality database for cancer immunotherapy called CanImmunother (http://www.biomedical-web.com/cancerit/) through manual curation on 4515 publications. CanImmunother contains 3267 experimentally validated associations between 218 cancer sub-types across 34 body parts and 484 immunotherapies with 642 biomarkers, 108 targets, and 121 control therapies. Each association was manually curated by professional curators, incorporated with valuable annotation and cross references, and assigned with an association score for prioritization. To help clinicians and researchers in identifying and discovering better cancer immunotherapy and their respective biomarkers and targets, CanImmunother offers user-friendly web applications including search, browse, excel table, association prioritization, and network visualization. CanImmunother presents a landscape of experimental cancer immunotherapy association data, serving as a useful resource to improve our insight and to facilitate further discovery of advanced immunotherapy options for cancer patients.",2021-07-16 +34485275,MeiosisOnline: A Manually Curated Database for Tracking and Predicting Genes Associated With Meiosis.,"Meiosis, an essential step in gametogenesis, is the key event in sexually reproducing organisms. Thousands of genes have been reported to be involved in meiosis. Therefore, a specialist database is much needed for scientists to know about the function of these genes quickly and to search for genes with potential roles in meiosis. Here, we developed ""MeiosisOnline,"" a publicly accessible, comprehensive database of known functional genes and potential candidates in meiosis (https://mcg.ustc.edu.cn/bsc/meiosis/index.html). A total of 2,052 meiotic genes were manually curated from literature resource and were classified into different categories. Annotation information was provided for both meiotic genes and predicted candidates, including basic information, function, protein-protein interaction (PPI), and expression data. On the other hand, 165 mouse genes were predicted as potential candidates in meiosis using the ""Greed AUC Stepwise"" algorithm. Thus, MeiosisOnline provides the most updated and detailed information of experimental verified and predicted genes in meiosis. Furthermore, the searching tools and friendly interface of MeiosisOnline will greatly help researchers in studying meiosis in an easy and efficient way.",2021-08-13 +34335304,Amadis: A Comprehensive Database for Association Between Microbiota and Disease.,"The human gastrointestinal tract represents a symbiotic bioreactor that can mediate the interaction of the human host. The deployment and integration of multi-omics technologies have depicted a more complete image of the functions performed by microbial organisms. In addition, a large amount of data has been generated in a short time. However, researchers struggling to keep track of these mountains of information need a way to conveniently gain a comprehensive understanding of the relationship between microbiota and human diseases. To tackle this issue, we developed Amadis (http://gift2disease.net/GIFTED), a manually curated database that provides experimentally supported microbiota-disease associations and a dynamic network construction method. The current version of the Amadis database documents 20167 associations between 221 human diseases and 774 gut microbes across 17 species, curated from more than 1000 articles. By using the curated data, users can freely select and combine modules to obtain a specific microbe-based human disease network. Additionally, Amadis provides a user-friendly interface for browsing, searching and downloading. We hope it can serve as a useful and valuable resource for researchers exploring the associations between gastrointestinal microbiota and human diseases.",2021-07-14 +34907160,XDeathDB: a visualization platform for cell death molecular interactions.,"Lots of cell death initiator and effector molecules, signalling pathways and subcellular sites have been identified as key mediators in both cell death processes in cancer. The XDeathDB visualization platform provides a comprehensive cell death and their crosstalk resource for deciphering the signaling network organization of interactions among different cell death modes associated with 1461 cancer types and COVID-19, with an aim to understand the molecular mechanisms of physiological cell death in disease and facilitate systems-oriented novel drug discovery in inducing cell deaths properly. Apoptosis, autosis, efferocytosis, ferroptosis, immunogenic cell death, intrinsic apoptosis, lysosomal cell death, mitotic cell death, mitochondrial permeability transition, necroptosis, parthanatos, and pyroptosis related to 12 cell deaths and their crosstalk can be observed systematically by the platform. Big data for cell death gene-disease associations, gene-cell death pathway associations, pathway-cell death mode associations, and cell death-cell death associations is collected by literature review articles and public database from iRefIndex, STRING, BioGRID, Reactom, Pathway's commons, DisGeNET, DrugBank, and Therapeutic Target Database (TTD). An interactive webtool, XDeathDB, is built by web applications with R-Shiny, JavaScript (JS) and Shiny Server Iso. With this platform, users can search specific interactions from vast interdependent networks that occur in the realm of cell death. A multilayer spectral graph clustering method that performs convex layer aggregation to identify crosstalk function among cell death modes for a specific cancer. 147 hallmark genes of cell death could be observed in detail in these networks. These potential druggable targets are displayed systematically and tailoring networks to visualize specified relations is available to fulfil user-specific needs. Users can access XDeathDB for free at https://pcm2019.shinyapps.io/XDeathDB/ .",2021-12-14 +34859208,PEPATAC: an optimized pipeline for ATAC-seq data analysis with serial alignments.,"As chromatin accessibility data from ATAC-seq experiments continues to expand, there is continuing need for standardized analysis pipelines. Here, we present PEPATAC, an ATAC-seq pipeline that is easily applied to ATAC-seq projects of any size, from one-off experiments to large-scale sequencing projects. PEPATAC leverages unique features of ATAC-seq data to optimize for speed and accuracy, and it provides several unique analytical approaches. Output includes convenient quality control plots, summary statistics, and a variety of generally useful data formats to set the groundwork for subsequent project-specific data analysis. Downstream analysis is simplified by a standard definition format, modularity of components, and metadata APIs in R and Python. It is restartable, fault-tolerant, and can be run on local hardware, using any cluster resource manager, or in provided Linux containers. We also demonstrate the advantage of aligning to the mitochondrial genome serially, which improves the accuracy of alignment statistics and quality control metrics. PEPATAC is a robust and portable first step for any ATAC-seq project. BSD2-licensed code and documentation are available at https://pepatac.databio.org.",2021-11-23 +32052981,"No evidence of what-where-when memory in great apes (Pan troglodytes, Pan paniscus, Pongo abelii, and Gorilla gorilla).","Episodic memory is the ability to recollect specific past events belonging to our personal experience, and it is one of the most crucial human abilities, allowing us to mentally travel through time. In animals, however, evidence of what-where-when memory (hereafter, WWW memory) is limited to very few taxa, mostly reflecting the socioecological challenges faced in their environment. In this article, we aimed to replicate 2 studies previously conducted on birds and primates to find convincing evidence of WWW memory in great apes. For this purpose, we tested 12 captive great apes in 3 different tasks. In Task 1, we tested whether great apes take into account temporal information when choosing between highly preferred perishable and less-preferred nonperishable food items. In Task 2, we tested whether great apes can differentiate between similar events having happened at different times in the past. Finally, in Task 3, we tested whether great apes can use their memory flexibly, incorporating novel information in their memories. In none of the tasks did our subjects make the correct choice significantly above chance, with performance further declining when subjects were presented with 2 events (Task 2). Moreover, none of them could reliably integrate novel information into their memories. Overall, our study casts doubt on the existence of WWW memory in great apes, and especially calls for more caution when using WWW memory tasks and interpreting their results. (PsycInfo Database Record (c) 2020 APA, all rights reserved).",2020-02-13 +33999180,OpenAnnotate: a web server to annotate the chromatin accessibility of genomic regions.,"Chromatin accessibility, as a powerful marker of active DNA regulatory elements, provides valuable information for understanding regulatory mechanisms. The revolution in high-throughput methods has accumulated massive chromatin accessibility profiles in public repositories. Nevertheless, utilization of these data is hampered by cumbersome collection, time-consuming processing, and manual chromatin accessibility (openness) annotation of genomic regions. To fill this gap, we developed OpenAnnotate (http://health.tsinghua.edu.cn/openannotate/) as the first web server for efficiently annotating openness of massive genomic regions across various biosample types, tissues, and biological systems. In addition to the annotation resource from 2729 comprehensive profiles of 614 biosample types of human and mouse, OpenAnnotate provides user-friendly functionalities, ultra-efficient calculation, real-time browsing, intuitive visualization, and elaborate application notebooks. We show its unique advantages compared to existing databases and toolkits by effectively revealing cell type-specificity, identifying regulatory elements and 3D chromatin contacts, deciphering gene functional relationships, inferring functions of transcription factors, and unprecedentedly promoting single-cell data analyses. We anticipate OpenAnnotate will provide a promising avenue for researchers to construct a more holistic perspective to understand regulatory mechanisms.",2021-07-01 +34618533,Can we study episodic-like memory in preschoolers from an animal foraging model?,"Episodic-like memory (ELM) involves remembering the what, where, and when (WWW) of an event as a whole, and it can be studied behaviorally. In research regarding this type of memory with children, one experiment proposes a new task adapted from animal foraging studies. A task derived from a foraging model was presented its considers the characteristics required for ELM study in children and employs a single trial presented from an egocentric perspective to avoid memory consolidation. One study compared four-year-old children's choices after being trained with one or three trials using a hide-and-seek task. The consequence size and retention interval between training and test were manipulated. Results showed that children chose the optimal outcome after an immediate or delayed test. The children's choices were conditional on the size of the consequences and the time at retrieval according to the Temporal Weighting Rule (Devenport & Devenport, 1994). The results were similar to those of animal studies and were consistent with a foraging memory model. In discussion, the advantages and limitations of the proposed task for the study of ELM in children are described and explained. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-07-01 +34528715,BRAIN UK: Accessing NHS tissue archives for neuroscience research.,"The purpose of BRAIN UK (the UK BRain Archive Information Network) is to make the very extensive and comprehensive National Health Service (NHS) Neuropathology archives available to the national and international neuroscience research community. The archives comprise samples of tumours and a wide range of other neurological disorders, not only from the brain but also spinal cord, peripheral nerve, muscle, eye and other organs when relevant. BRAIN UK was founded after the recognition of the importance of this large tissue resource, which was not previously readily accessible for research use. BRAIN UK has successfully engaged the majority of the regional clinical neuroscience centres in the United Kingdom to produce a centralised database of the extensive autopsy and biopsy archive. Together with a simple application process and its broad ethical approval, BRAIN UK offers researchers easy access to most of the national archives of neurological tissues and tumours (http://www.brain-uk.org). The range of tissues available reflects the spectrum of disease in society, including many conditions not covered by disease-specific brain banks, and also allows relatively large numbers of cases of uncommon conditions to be studied. BRAIN UK has supported 141 studies (2010-2020) that have generated 70 publications employing methodology as diverse as morphometrics, genetics, proteomics and methylomics. Tissue samples that would otherwise have been unused have supported valuable neuroscience research. The importance of this unique resource will only increase as molecular techniques applicable to human tissues continue to develop and technical advances permit large-scale high-throughput studies.",2021-09-28 +34642750,HBFP: a new repository for human body fluid proteome. ,"Body fluid proteome has been intensively studied as a primary source for disease biomarker discovery. Using advanced proteomics technologies, early research success has resulted in increasingly accumulated proteins detected in different body fluids, among which many are promising biomarkers. However, despite a handful of small-scale and specific data resources, current research is clearly lacking effort compiling published body fluid proteins into a centralized and sustainable repository that can provide users with systematic analytic tools. In this study, we developed a new database of human body fluid proteome (HBFP) that focuses on experimentally validated proteome in 17 types of human body fluids. The current database archives 11 827 unique proteins reported by 164 scientific publications, with a maximal false discovery rate of 0.01 on both the peptide and protein levels since 2001, and enables users to query, analyze and download protein entries with respect to each body fluid. Three unique features of this new system include the following: (i) the protein annotation page includes detailed abundance information based on relative qualitative measures of peptides reported in the original references, (ii) a new score is calculated on each reported protein to indicate the discovery confidence and (iii) HBFP catalogs 7354 proteins with at least two non-nested uniquely mapping peptides of nine amino acids according to the Human Proteome Project Data Interpretation Guidelines, while the remaining 4473 proteins have more than two unique peptides without given sequence information. As an important resource for human protein secretome, we anticipate that this new HBFP database can be a powerful tool that facilitates research in clinical proteomics and biomarker discovery. Database URL: https://bmbl.bmi.osumc.edu/HBFP/.",2021-10-01 +34519429,"SARS-CoV-2 structural coverage map reveals viral protein assembly, mimicry, and hijacking mechanisms.","We modeled 3D structures of all SARS-CoV-2 proteins, generating 2,060 models that span 69% of the viral proteome and provide details not available elsewhere. We found that ˜6% of the proteome mimicked human proteins, while ˜7% was implicated in hijacking mechanisms that reverse post-translational modifications, block host translation, and disable host defenses; a further ˜29% self-assembled into heteromeric states that provided insight into how the viral replication and translation complex forms. To make these 3D models more accessible, we devised a structural coverage map, a novel visualization method to show what is-and is not-known about the 3D structure of the viral proteome. We integrated the coverage map into an accompanying online resource (https://aquaria.ws/covid) that can be used to find and explore models corresponding to the 79 structural states identified in this work. The resulting Aquaria-COVID resource helps scientists use emerging structural data to understand the mechanisms underlying coronavirus infection and draws attention to the 31% of the viral proteome that remains structurally unknown or dark.",2021-09-01 +34259866,CNVIntegrate: the first multi-ethnic database for identifying copy number variations associated with cancer. ,"Human copy number variations (CNVs) and copy number alterations (CNAs) are DNA segments (>1000 base pairs) of duplications or deletions with respect to the reference genome, potentially causing genomic imbalance leading to diseases such as cancer. CNVs further cause genetic diversity in healthy populations and are predominant drivers of gene/genome evolution. Initiatives have been taken by the research community to establish large-scale databases to comprehensively characterize CNVs in humans. Exome Aggregation Consortium (ExAC) is one such endeavor that catalogs CNVs, of nearly 60 000 healthy individuals across five demographic clusters. Furthermore, large projects such as the Catalogue of Somatic Mutations in Cancer (COSMIC) and the Cancer Cell Line Encyclopedia (CCLE) combine CNA data from cancer-affected individuals and large panels of human cancer cell lines, respectively. However, we lack a structured and comprehensive CNV/CNA resource including both healthy individuals and cancer patients across large populations. CNVIntegrate is the first web-based system that hosts CNV and CNA data from both healthy populations and cancer patients, respectively, and concomitantly provides statistical comparisons between copy number frequencies of multiple ethnic populations. It further includes, for the first time, well-cataloged CNV and CNA data from Taiwanese healthy individuals and Taiwan Breast Cancer data, respectively, along with imported resources from ExAC, COSMIC and CCLE. CNVIntegrate offers a CNV/CNA-data hub for structured information retrieval for clinicians and scientists towards important drug discoveries and precision treatments. Database URL: http://cnvintegrate.cgm.ntu.edu.tw/.",2021-07-01 +34085038,"EyeDiseases: an integrated resource for dedicating to genetic variants, gene expression and epigenetic factors of human eye diseases.","Eye diseases are remarkably common and encompass a large and diverse range of morbidities that affect different components of the visual system and visual function. With advances in omics technology of eye disorders, genome-scale datasets have been rapidly accumulated in genetics and epigenetics field. However, the efficient collection and comprehensive analysis of different kinds of omics data are lacking. Herein, we developed EyeDiseases (https://eyediseases.bio-data.cn/), the first database for multi-omics data integration and interpretation of human eyes diseases. It contains 1344 disease-associated genes with genetic variation, 1774 transcription files of bulk cell expression and single-cell RNA-seq, 105 epigenomics data across 185 kinds of human eye diseases. Using EyeDiseases, we investigated SARS-CoV-2 potential tropism in eye infection and found that the SARS-CoV-2 entry factors, ACE2 and TMPRSS2 are highly correlated with cornea and keratoconus, suggest that ocular surface cells are susceptible to infection by SARS-CoV-2. Additionally, integrating analysis of Age-related macular degeneration (AMD) GWAS loci and co-expression data revealed 9 associated genes involved in HIF-1 signaling pathway and voltage-gate potassium channel complex. The EyeDiseases provides a valuable resource for accelerating the discovery and validation of candidate loci and genes contributed to the molecular diagnosis and therapeutic vulnerabilities with various eyes diseases.",2021-06-01 +32597467,A comprehensive integrated drug similarity resource for in-silico drug repositioning and beyond. ,"Drug similarity studies are driven by the hypothesis that similar drugs should display similar therapeutic actions and thus can potentially treat a similar constellation of diseases. Drug-drug similarity has been derived by variety of direct and indirect sources of evidence and frequently shown high predictive power in discovering validated repositioning candidates as well as other in-silico drug development applications. Yet, existing resources either have limited coverage or rely on an individual source of evidence, overlooking the wealth and diversity of drug-related data sources. Hence, there has been an unmet need for a comprehensive resource integrating diverse drug-related information to derive multi-evidenced drug-drug similarities. We addressed this resource gap by compiling heterogenous information for an exhaustive set of small-molecule drugs (total of 10 367 in the current version) and systematically integrated multiple sources of evidence to derive a multi-modal drug-drug similarity network. The resulting database, 'DrugSimDB' currently includes 238 635 drug pairs with significant aggregated similarity, complemented with an interactive user-friendly web interface (http://vafaeelab.com/drugSimDB.html), which not only enables database ease of access, search, filtration and export, but also provides a variety of complementary information on queried drugs and interactions. The integration approach can flexibly incorporate further drug information into the similarity network, providing an easily extendable platform. The database compilation and construction source-code has been well-documented and semi-automated for any-time upgrade to account for new drugs and up-to-date drug information.",2021-05-01 +34358314,LipiDisease: associate lipids to diseases using literature mining. ,"Lipids exhibit an essential role in cellular assembly and signaling. Dysregulation of these functions has been linked with many complications including obesity, diabetes, metabolic disorders, cancer, and more. Investigating lipid profiles in such conditions can provide insights into cellular functions and possible interventions. Hence the field of lipidomics is expanding in recent years. Even though the role of individual lipids in diseases has been investigated, there is no resource to perform disease enrichment analysis considering the cumulative association of a lipid set. To address this, we have implemented the LipiDisease web server. The tool analyzes millions of records from the PubMed biomedical literature database discussing lipids and diseases, predicts their association, and ranks them according to false discovery rates generated by random simulations. The tool takes into account 4270 diseases and 4798 lipids. Since the tool extracts the information from PubMed records, the number of diseases and lipids will be expanded over time as the biomedical literature grows. The LipiDisease webserver can be freely accessed at http://cbdm-01.zdv.uni-mainz.de:3838/piyusmor/LipiDisease/. Supplementary data are available at Bioinformatics online.",2021-08-06 +34344425,SorGSD: updating and expanding the sorghum genome science database with new contents and tools.,"

Background

As the fifth major cereal crop originated from Africa, sorghum (Sorghum bicolor) has become a key C4 model organism for energy plant research. With the development of high-throughput detection technologies for various omics data, much multi-dimensional and multi-omics information has been accumulated for sorghum. Integrating this information may accelerate genetic research and improve molecular breeding for sorghum agronomic traits.

Results

We updated the Sorghum Genome SNP Database (SorGSD) by adding new data, new features and renamed it to Sorghum Genome Science Database (SorGSD). In comparison with the original version SorGSD, which contains SNPs from 48 sorghum accessions mapped to the reference genome BTx623 (v2.1), the new version was expanded to 289 sorghum lines with both single nucleotide polymorphisms (SNPs) and small insertions/deletions (INDELs), which were aligned to the newly assembled and annotated sorghum genome BTx623 (v3.1). Moreover, phenotypic data and panicle pictures of critical accessions were provided in the new version. We implemented new tools including ID Conversion, Homologue Search and Genome Browser for analysis and updated the general information related to sorghum research, such as online sorghum resources and literature references. In addition, we deployed a new database infrastructure and redesigned a new user interface as one of the Genome Variation Map databases. The new version SorGSD is freely accessible online at http://ngdc.cncb.ac.cn/sorgsd/ .

Conclusions

SorGSD is a comprehensive integration with large-scale genomic variation, phenotypic information and incorporates online data analysis tools for data mining, genome navigation and analysis. We hope that SorGSD could provide a valuable resource for sorghum researchers to find variations they are interested in and generate customized high-throughput datasets for further analysis.",2021-08-03 +33780471,MCPdb: The bacterial microcompartment database.,"Bacterial microcompartments are organelle-like structures composed entirely of proteins. They have evolved to carry out several distinct and specialized metabolic functions in a wide variety of bacteria. Their outer shell is constructed from thousands of tessellating protein subunits, encapsulating enzymes that carry out the internal metabolic reactions. The shell proteins are varied, with single, tandem and permuted versions of the PF00936 protein family domain comprising the primary structural component of their polyhedral architecture, which is reminiscent of a viral capsid. While considerable amounts of structural and biophysical data have been generated in the last 15 years, the existing functionalities of current resources have limited our ability to rapidly understand the functional and structural properties of microcompartments (MCPs) and their diversity. In order to make the remarkable structural features of bacterial microcompartments accessible to a broad community of scientists and non-specialists, we developed MCPdb: The Bacterial Microcompartment Database (https://mcpdb.mbi.ucla.edu/). MCPdb is a comprehensive resource that categorizes and organizes known microcompartment protein structures and their larger assemblies. To emphasize the critical roles symmetric assembly and architecture play in microcompartment function, each structure in the MCPdb is validated and annotated with respect to: (1) its predicted natural assembly state (2) tertiary structure and topology and (3) the metabolic compartment type from which it derives. The current database includes 163 structures and is available to the public with the anticipation that it will serve as a growing resource for scientists interested in understanding protein-based metabolic organelles in bacteria.",2021-03-29 +34016708,Analyzing the vast coronavirus literature with CoronaCentral. ,"The SARS-CoV-2 pandemic has caused a surge in research exploring all aspects of the virus and its effects on human health. The overwhelming publication rate means that researchers are unable to keep abreast of the literature. To ameliorate this, we present the CoronaCentral resource that uses machine learning to process the research literature on SARS-CoV-2 together with SARS-CoV and MERS-CoV. We categorize the literature into useful topics and article types and enable analysis of the contents, pace, and emphasis of research during the crisis with integration of Altmetric data. These topics include therapeutics, disease forecasting, as well as growing areas such as ""long COVID"" and studies of inequality. This resource, available at https://coronacentral.ai, is updated daily.",2021-06-01 +35028612,"Multiomic analysis identifies CPT1A as a potential therapeutic target in platinum-refractory, high-grade serous ovarian cancer.","Resistance to platinum compounds is a major determinant of patient survival in high-grade serous ovarian cancer (HGSOC). To understand mechanisms of platinum resistance and identify potential therapeutic targets in resistant HGSOC, we generated a data resource composed of dynamic (±carboplatin) protein, post-translational modification, and RNA sequencing (RNA-seq) profiles from intra-patient cell line pairs derived from 3 HGSOC patients before and after acquiring platinum resistance. These profiles reveal extensive responses to carboplatin that differ between sensitive and resistant cells. Higher fatty acid oxidation (FAO) pathway expression is associated with platinum resistance, and both pharmacologic inhibition and CRISPR knockout of carnitine palmitoyltransferase 1A (CPT1A), which represents a rate limiting step of FAO, sensitize HGSOC cells to platinum. The results are further validated in patient-derived xenograft models, indicating that CPT1A is a candidate therapeutic target to overcome platinum resistance. All multiomic data can be queried via an intuitive gene-query user interface (https://sites.google.com/view/ptrc-cell-line).",2021-12-21 +33174605,"OMA orthology in 2021: website overhaul, conserved isoforms, ancestral gene order and more.","OMA is an established resource to elucidate evolutionary relationships among genes from currently 2326 genomes covering all domains of life. OMA provides pairwise and groupwise orthologs, functional annotations, local and global gene order conservation (synteny) information, among many other functions. This update paper describes the reorganisation of the database into gene-, group- and genome-centric pages. Other new and improved features are detailed, such as reporting of the evolutionarily best conserved isoforms of alternatively spliced genes, the inferred local order of ancestral genes, phylogenetic profiling, better cross-references, fast genome mapping, semantic data sharing via RDF, as well as a special coronavirus OMA with 119 viruses from the Nidovirales order, including SARS-CoV-2, the agent of the COVID-19 pandemic. We conclude with improvements to the documentation of the resource through primers, tutorials and short videos. OMA is accessible at https://omabrowser.org.",2021-01-01 +33211864,FANTOM enters 20th year: expansion of transcriptomic atlases and functional annotation of non-coding RNAs.,"The Functional ANnoTation Of the Mammalian genome (FANTOM) Consortium has continued to provide extensive resources in the pursuit of understanding the transcriptome, and transcriptional regulation, of mammalian genomes for the last 20 years. To share these resources with the research community, the FANTOM web-interfaces and databases are being regularly updated, enhanced and expanded with new data types. In recent years, the FANTOM Consortium's efforts have been mainly focused on creating new non-coding RNA datasets and resources. The existing FANTOM5 human and mouse miRNA atlas was supplemented with rat, dog, and chicken datasets. The sixth (latest) edition of the FANTOM project was launched to assess the function of human long non-coding RNAs (lncRNAs). From its creation until 2020, FANTOM6 has contributed to the research community a large dataset generated from the knock-down of 285 lncRNAs in human dermal fibroblasts; this is followed with extensive expression profiling and cellular phenotyping. Other updates to the FANTOM resource includes the reprocessing of the miRNA and promoter atlases of human, mouse and chicken with the latest reference genome assemblies. To facilitate the use and accessibility of all above resources we further enhanced FANTOM data viewers and web interfaces. The updated FANTOM web resource is publicly available at https://fantom.gsc.riken.jp/.",2021-01-01 +32382747,RSVdb: a comprehensive database of transcriptome RNA structure. ,"RNA fulfills a crucial regulatory role in cells by folding into a complex RNA structure. To date, a chemical compound, dimethyl sulfate (DMS), has been developed to probe the RNA structure at the transcriptome level effectively. We proposed a database, RSVdb (https://taolab.nwafu.edu.cn/rsvdb/), for the browsing and visualization of transcriptome RNA structures. RSVdb, including 626 225 RNAs with validated DMS reactivity from 178 samples in eight species, supports four main functions: information retrieval, research overview, structure prediction and resource download. Users can search for species, studies, transcripts and genes of interest; browse the quality control of sequencing data and statistical charts of RNA structure information; preview and perform online prediction of RNA structures in silico and under DMS restraint of different experimental treatments and download RNA structure data for species and studies. Together, RSVdb provides a reference for RNA structure and will support future research on the function of RNA structure at the transcriptome level.",2021-05-01 +33237311,"The STRING database in 2021: customizable protein-protein networks, and functional characterization of user-uploaded gene/measurement sets.","Cellular life depends on a complex web of functional associations between biomolecules. Among these associations, protein-protein interactions are particularly important due to their versatility, specificity and adaptability. The STRING database aims to integrate all known and predicted associations between proteins, including both physical interactions as well as functional associations. To achieve this, STRING collects and scores evidence from a number of sources: (i) automated text mining of the scientific literature, (ii) databases of interaction experiments and annotated complexes/pathways, (iii) computational interaction predictions from co-expression and from conserved genomic context and (iv) systematic transfers of interaction evidence from one organism to another. STRING aims for wide coverage; the upcoming version 11.5 of the resource will contain more than 14 000 organisms. In this update paper, we describe changes to the text-mining system, a new scoring-mode for physical interactions, as well as extensive user interface features for customizing, extending and sharing protein networks. In addition, we describe how to query STRING with genome-wide, experimental data, including the automated detection of enriched functionalities and potential biases in the user's query data. The STRING resource is available online, at https://string-db.org/.",2021-01-01 +34485385,Fuzzle 2.0: Ligand Binding in Natural Protein Building Blocks.,"Modern proteins have been shown to share evolutionary relationships via subdomain-sized fragments. The assembly of such fragments through duplication and recombination events led to the complex structures and functions we observe today. We previously implemented a pipeline that identified more than 1,000 of these fragments that are shared by different protein folds and developed a web interface to analyze and search for them. This resource named Fuzzle helps structural and evolutionary biologists to identify and analyze conserved parts of a protein but it also provides protein engineers with building blocks for example to design proteins by fragment combination. Here, we describe a new version of this web resource that was extended to include ligand information. This addition is a significant asset to the database since now protein fragments that bind specific ligands can be identified and analyzed. Often the mode of ligand binding is conserved in proteins thereby supporting a common evolutionary origin. The same can now be explored for subdomain-sized fragments within this database. This ligand binding information can also be used in protein engineering to graft binding pockets into other protein scaffolds or to transfer functional sites via recombination of a specific fragment. Fuzzle 2.0 is freely available at https://fuzzle.uni-bayreuth.de/2.0.",2021-08-18 +33849445,TANTIGEN 2.0: a knowledge base of tumor T cell antigens and epitopes.,"We previously developed TANTIGEN, a comprehensive online database cataloging more than 1000 T cell epitopes and HLA ligands from 292 tumor antigens. In TANTIGEN 2.0, we significantly expanded coverage in both immune response targets (T cell epitopes and HLA ligands) and tumor antigens. It catalogs 4,296 antigen variants from 403 unique tumor antigens and more than 1500 T cell epitopes and HLA ligands. We also included neoantigens, a class of tumor antigens generated through mutations resulting in new amino acid sequences in tumor antigens. TANTIGEN 2.0 contains validated TCR sequences specific for cognate T cell epitopes and tumor antigen gene/mRNA/protein expression information in major human cancers extracted by Human Pathology Atlas. TANTIGEN 2.0 is a rich data resource for tumor antigens and their associated epitopes and neoepitopes. It hosts a set of tailored data analytics tools tightly integrated with the data to form meaningful analysis workflows. It is freely available at http://projects.met-hilab.org/tadb .",2021-04-14 +33685493,riboCIRC: a comprehensive database of translatable circRNAs.,"riboCIRC is a translatome data-oriented circRNA database specifically designed for hosting, exploring, analyzing, and visualizing translatable circRNAs from multi-species. The database provides a comprehensive repository of computationally predicted ribosome-associated circRNAs; a manually curated collection of experimentally verified translated circRNAs; an evaluation of cross-species conservation of translatable circRNAs; a systematic de novo annotation of putative circRNA-encoded peptides, including sequence, structure, and function; and a genome browser to visualize the context-specific occupant footprints of circRNAs. It represents a valuable resource for the circRNA research community and is publicly available at http://www.ribocirc.com .",2021-03-08 +34164644,HFBD: a biomarker knowledge database for heart failure heterogeneity and personalized applications. ,"Heart failure (HF) is a cardiovascular disease with a high incidence around the world. Accumulating studies have focused on the identification of biomarkers for HF precision medicine. To understand the HF heterogeneity and provide biomarker information for the personalized diagnosis and treatment of HF, a knowledge database collecting the distributed and multiple-level biomarker information is necessary. In this study, the HF biomarker knowledge database (HFBD) was established by manually collecting the data and knowledge from literature in PubMed. HFBD contains 2618 records and 868 HF biomarkers (731 single and 137 combined) extracted from 1237 original articles. The biomarkers were classified into proteins, RNAs, DNAs, and the others at molecular, image, cellular and physiological levels. The biomarkers were annotated with biological, clinical and article information as well as the experimental methods used for the biomarker discovery. With its user-friendly interface, this knowledge database provides a unique resource for the systematic understanding of HF heterogeneity and personalized diagnosis and treatment of HF in the era of precision medicine. The platform is openly available at http://sysbio.org.cn/HFBD/.",2021-06-23 +33547946,Web resource on available DNA variant tests for hereditary diseases and genetic predispositions in dogs and cats: An Update.,"Vast progress has been made in the clinical diagnosis and molecular basis of hereditary diseases and genetic predisposition in companion animals. The purpose of this report is to provide an update on the availability of DNA testing for hereditary diseases and genetic predispositions in dogs and cats utilizing the WSAVA-PennGen DNA Testing Database web resource (URL: http://research.vet.upenn.edu/WSAVA-LabSearch ). Information on hereditary diseases, DNA tests, genetic testing laboratories and afflicted breeds added to the web-based WSAVA-PennGen DNA Testing Database was gathered. Following verification through original research and clinical studies, searching various databases on hereditary diseases in dogs and cats, and contacting laboratories offering DNA tests, the data were compared to the resource reported on in 2013. The number of molecularly defined Mendelian inherited diseases and variants in companion animals listed in the WSAVA-PennGen DNA Testing Database in 2020 drastically increased by 112% and 141%, respectively. The number of DNA variant tests offered by each laboratory has also doubled for dogs and cats. While the overall number of laboratories has only slightly increased from 43 to 47, the number of larger corporate laboratories increased, while academic laboratories have declined. In addition, there are now several laboratories that are offering breed-specific or all-breed panel tests rather than single-DNA tests for dogs and cats. This unique regularly updated searchable web-based database allows veterinary clinicians, breeders and pet owners to readily find available DNA tests, laboratories performing these DNA tests worldwide, and canine and feline breeds afflicted and also serves as a valuable resource for comparative geneticists.",2021-02-06 +35694152,EnhFFL: A database of enhancer mediated feed-forward loops for human and mouse.,"Feed-forward loops (FFLs) are thought to be one of the most common and important classes of transcriptional network motifs involved in various diseases. Enhancers are cis-regulatory elements that positively regulate protein-coding genes or microRNAs (miRNAs) by recruiting DNA-binding transcription factors (TFs). However, a comprehensive resource to identify, store, and analyze the FFLs of typical enhancer and super-enhancer FFLs is not currently available. Here, we present EnhFFL, an online database to provide a data resource for users to browse and search typical enhancer and super-enhancer FFLs. The current database covers 46 280/7000 TF-enhancer-miRNA FFLs, 9997/236 enhancer-miRNA-gene FFLs, 3 561 164/3 193 182 TF-enhancer-gene FFLs, and 1259/235 TF-enhancer feed-back loops (FBLs) across 91 tissues/cell lines of human and mouse, respectively. Users can browse loops by selecting species, types of tissue/cell line, and types of FFLs. EnhFFL supports searching elements including name/ID, genomic location, and the conservation of miRNA target genes. We also developed tools for users to screen customized FFLs using the threshold of q value as well as the confidence score of miRNA target genes. Disease and functional enrichment analysis showed that master miRNAs that are widely engaged in FFLs including TF-enhancer-miRNAs and enhancer-miRNA-genes are significantly involved in tumorigenesis. Database URL:http://lcbb.swjtu.edu.cn/EnhFFL/.",2021-04-14 +34562055,A molecular-based identification resource for the arthropods of Finland.,"To associate specimens identified by molecular characters to other biological knowledge, we need reference sequences annotated by Linnaean taxonomy. In this study, we (1) report the creation of a comprehensive reference library of DNA barcodes for the arthropods of an entire country (Finland), (2) publish this library, and (3) deliver a new identification tool for insects and spiders, as based on this resource. The reference library contains mtDNA COI barcodes for 11,275 (43%) of 26,437 arthropod species known from Finland, including 10,811 (45%) of 23,956 insect species. To quantify the improvement in identification accuracy enabled by the current reference library, we ran 1000 Finnish insect and spider species through the Barcode of Life Data system (BOLD) identification engine. Of these, 91% were correctly assigned to a unique species when compared to the new reference library alone, 85% were correctly identified when compared to BOLD with the new material included, and 75% with the new material excluded. To capitalize on this resource, we used the new reference material to train a probabilistic taxonomic assignment tool, FinPROTAX, scoring high success. For the full-length barcode region, the accuracy of taxonomic assignments at the level of classes, orders, families, subfamilies, tribes, genera, and species reached 99.9%, 99.9%, 99.8%, 99.7%, 99.4%, 96.8%, and 88.5%, respectively. The FinBOL arthropod reference library and FinPROTAX are available through the Finnish Biodiversity Information Facility (www.laji.fi) at https://laji.fi/en/theme/protax. Overall, the FinBOL investment represents a massive capacity-transfer from the taxonomic community of Finland to all sectors of society.",2021-11-03 +33813885,MolluscDB: a genome and transcriptome database for molluscs.,"As sequencing becomes more accessible and affordable, the analysis of genomic and transcriptomic data has become a cornerstone of many research initiatives. Communities with a focus on particular taxa or ecosystems need solutions capable of aggregating genomic resources and serving them in a standardized and analysis-friendly manner. Taxon-focussed resources can be more flexible in addressing the needs of a research community than can universal or general databases. Here, we present MolluscDB, a genome and transcriptome database for molluscs. MolluscDB offers a rich ecosystem of tools, including an Ensembl browser, a BLAST server for homology searches and an HTTP server from which any dataset present in the database can be downloaded. To demonstrate the utility of the database and verify the quality of its data, we imported data from assembled genomes and transcriptomes of 22 species, estimated the phylogeny of Mollusca using single-copy orthologues, explored patterns of gene family size change and interrogated the data for biomineralization-associated enzymes and shell matrix proteins. MolluscDB provides an easy-to-use and openly accessible data resource for the research community. This article is part of the Theo Murphy meeting issue 'Molluscan genomics: broad insights and future directions for a neglected phylum'.",2021-04-05 +34256256,CytomegaloVirusDb: Multi-omics knowledge database for cytomegaloviruses.,"Cytomegalovirus infection is a significant health concern and need further exploration in immunologic response mechanisms during primary and reactivated CMV infection. In this work, we evaluated the whole genomes and proteomes of different CMV species and developed an integrated open-access platform, CytomegaloVirusDb, a multi-Omics knowledge database for Cytomegaloviruses. The resource is categorized into the main sections ""Genomics,"" ""Proteomics,"" ""Immune response,"" and ""Therapeutics,"". The database is annotated with the list of all CMV species included in the study, and available information is freely accessible at http://www.cmvdb.dqweilab-sjtu.com/index.php. Various parameters used in the analysis for each section were primarily based on the whole genome or proteome of each specie. The platform provided datasets are open to access for researchers to obtain CMV species-specific information. This will help further to explore the dynamics of CMV-specific immune response and therapeutics. This platform is a useful resource to aid in advancing research against Cytomegaloviruses.",2021-06-09 +32997753,BEST: a Shiny/R web-based application to easily retrieve cross-related enzyme functional parameters and information from BRENDA.,"

Motivation

BRENDA is the largest enzyme functional database, containing information of 84 000 experimentally characterized enzyme entries. This database is an invaluable resource for researchers in the biological field, which classifies enzyme-related information in categories that are very useful to obtain specific functional and protein engineering information for enzyme families. However, the BRENDA web interface, the most used by researchers with a non-informatic background, does not allow the user to cross-reference data from different categories or sub-categories in the database. Obtaining information in an easy and fast way, in a friendly web interface, without the necessity to have a deep informatics knowledge, will facilitate and improve research in the enzymology and protein engineering field.

Results

We developed the Brenda Easy Search Tool (BEST), an interactive Shiny/R application that enables querying the BRENDA database for complex cross-tabulated characteristics, and retrieving enzyme-related parameters and information readily and efficiently, which can be used for the study of enzyme function or as an input for other bioinformatics tools.

Availability and implementation

BEST and its tutorial are freely available from https://pesb2.cl/best/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-01 +35121907,"The Cancer Surfaceome Atlas integrates genomic, functional and drug response data to identify actionable targets.","Cell-surface proteins (SPs) are a rich source of immune and targeted therapies. By systematically integrating single-cell and bulk genomics, functional studies and target actionability, in the present study we comprehensively identify and annotate genes encoding SPs (GESPs) pan-cancer. We characterize GESP expression patterns, recurrent genomic alterations, essentiality, receptor-ligand interactions and therapeutic potential. We also find that mRNA expression of GESPs is cancer-type specific and positively correlates with protein expression, and that certain GESP subgroups function as common or specific essential genes for tumor cell growth. We also predict receptor-ligand interactions substantially deregulated in cancer and, using systems biology approaches, we identify cancer-specific GESPs with therapeutic potential. We have made this resource available through the Cancer Surfaceome Atlas ( http://fcgportal.org/TCSA ) within the Functional Cancer Genome data portal.",2021-12-13 +33125652,Navigating the Global Protein-Protein Interaction Landscape Using iRefWeb.,"iRefWeb is a resource that provides web interface to a large collection of protein-protein interactions aggregated from major primary databases. The underlying data-consolidation process, called iRefIndex, implements a rigorous methodology of identifying redundant protein sequences and integrating disparate data records that reference the same peptide sequences, despite many potential differences in data identifiers across various source databases. iRefWeb offers a unified user interface to all interaction records and associated information collected by iRefIndex, in addition to a number of data filters and visual features that present the supporting evidence. Users of iRefWeb can explore the consolidated landscape of protein-protein interactions, establish the provenance and reliability of each data record, and compare annotations performed by different data curator teams. The iRefWeb portal is freely available at http://wodaklab.org/iRefWeb .",2021-01-01 +,Systematic review of health workforce surge capacity during COVID-19 and other viral pandemics,"Abstract

Background

Healthcare decision-makers need comprehensive evidence to mitigate surges in the demand for human resources for health (HRH) during infectious disease outbreaks, in terms of both short- and longer-term impacts. This study aimed to assess the state of the evidence to address HRH surge capacity during COVID-19 and other outbreaks of global significance in the 21st century.

Methods

We systematically searched eight bibliographic databases to extract primary research articles published between 01/2000-06/2020, capturing temporal changes in HRH requirements and responses surrounding viral respiratory infection pandemics. A systems approach was used, considering providers in hospitals, out-of-hospital systems, emergency medical services, and public health. We narratively synthesized the evidence following the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-analyses) standard.

Results

Of the 1,155 retrieved records, 16 studies met our inclusion criteria; of these, 5 focused on COVID-19, 3 on H1N1, and 8 on a hypothetical pandemic. Different training, mobilization, and redeployment options to address pandemic-time health system capacity were assessed. Few governance scenarios drew on observational HRH data allowing for comparability across contexts. Notable evidence gaps included occupational and psychosocial factors affecting healthcare workers' absenteeism and risk of burnout, gendered considerations of HRH capacity, evaluations in low- and lower-middle income countries, and policy-actionable assessments to inform post-pandemic recovery and sustainability of services for noncommunicable disease management.

Conclusions

This research emphasized the critical need for timely, internationally comparable, and equity-informative HRH data and research to enhance preparedness, response, and recovery policies for this and future pandemics. Full paper is available at: https://doi.org/10.1002/hpm.3137

Key messages

The COVID-19 pandemic has highlighted the critical need for enhanced health workforce data and research, including better tracking of demographics, exposures, infections and deaths of health workers. Although women comprise 70% of the health workforce in many countries, gender‐blindness persists in the global literature on health workforce research and governance in public health emergencies./bodyt",2021-10-01 +34954426,FertilityOnline: A Straight Pipeline for Functional Gene Annotation and Disease Mutation Discovery.,"Exploring the genetic basis of human infertility is currently under intensive investigation. However, only a handful of genes have been validated in animal models as disease-causing genes in infertile men. Thus, to better understand the genetic basis of human spermatogenesis and bridge the knowledge gap between humans and other animal species, we construct the FertilityOnline, a database integrating the literature-curated functional genes during spermatogenesis into an existing spermatogenic database, SpermatogenesisOnline 1.0. Additional features, including the functional annotation and genetic variants of human genes, are also incorporated into FertilityOnline. By searching this database, users can browse the functional genes involved in spermatogenesis and instantly narrow down the number of candidates of genetic mutations underlying male infertility in a user-friendly web interface. Clinical application of this database was exampled by the identification of novel causative mutations in synaptonemal complex central element protein 1 (SYCE1) and stromal antigen 3 (STAG3) in azoospermic men. In conclusion, FertilityOnline is not only an integrated resource for spermatogenic genes but also a useful tool facilitating the exploration of the genetic basis of male infertility. FertilityOnline can be freely accessed at http://mcg.ustc.edu.cn/bsc/spermgenes2.0/index.html.",2021-12-24 +34782688,CyFi-MAP: an interactive pathway-based resource for cystic fibrosis.,"Cystic fibrosis (CF) is a life-threatening autosomal recessive disease caused by more than 2100 mutations in the CF transmembrane conductance regulator (CFTR) gene, generating variability in disease severity among individuals with CF sharing the same CFTR genotype. Systems biology can assist in the collection and visualization of CF data to extract additional biological significance and find novel therapeutic targets. Here, we present the CyFi-MAP-a disease map repository of CFTR molecular mechanisms and pathways involved in CF. Specifically, we represented the wild-type (wt-CFTR) and the F508del associated processes (F508del-CFTR) in separate submaps, with pathways related to protein biosynthesis, endoplasmic reticulum retention, export, activation/inactivation of channel function, and recycling/degradation after endocytosis. CyFi-MAP is an open-access resource with specific, curated and continuously updated information on CFTR-related pathways available online at https://cysticfibrosismap.github.io/ . This tool was developed as a reference CF pathway data repository to be continuously updated and used worldwide in CF research.",2021-11-15 +33121433,circVAR database: genome-wide archive of genetic variants for human circular RNAs.,"

Background

Circular RNAs (circRNAs) play important roles in regulating gene expression through binding miRNAs and RNA binding proteins. Genetic variation of circRNAs may affect complex traits/diseases by changing their binding efficiency to target miRNAs and proteins. There is a growing demand for investigations of the functions of genetic changes using large-scale experimental evidence. However, there is no online genetic resource for circRNA genes.

Results

We performed extensive genetic annotation of 295,526 circRNAs integrated from circBase, circNet and circRNAdb. All pre-computed genetic variants were presented at our online resource, circVAR, with data browsing and search functionality. We explored the chromosome-based distribution of circRNAs and their associated variants. We found that, based on mapping to the 1000 Genomes and ClinVAR databases, chromosome 17 has a relatively large number of circRNAs and associated common and health-related genetic variants. Following the annotation of genome wide association studies (GWAS)-based circRNA variants, we found many non-coding variants within circRNAs, suggesting novel mechanisms for common diseases reported from GWAS studies. For cancer-based somatic variants, we found that chromosome 7 has many highly complex mutations that have been overlooked in previous research.

Conclusion

We used the circVAR database to collect SNPs and small insertions and deletions (INDELs) in putative circRNA regions and to identify their potential phenotypic information. To provide a reusable resource for the circRNA research community, we have published all the pre-computed genetic data concerning circRNAs and associated genes together with data query and browsing functions at http://soft.bioinfo-minzhao.org/circvar .",2020-10-29 +34154643,"TE Hub: A community-oriented space for sharing and connecting tools, data, resources, and methods for transposable element annotation.","Transposable elements (TEs) play powerful and varied evolutionary and functional roles, and are widespread in most eukaryotic genomes. Research into their unique biology has driven the creation of a large collection of databases, software, classification systems, and annotation guidelines. The diversity of available TE-related methods and resources raises compatibility concerns and can be overwhelming to researchers and communicators seeking straightforward guidance or materials. To address these challenges, we have initiated a new resource, TE Hub, that provides a space where members of the TE community can collaborate to document and create resources and methods. The space consists of (1) a website organized with an open wiki framework,  https://tehub.org , (2) a conversation framework via a Twitter account and a Slack channel, and (3) bi-monthly Hub Update video chats on the platform's development. In addition to serving as a centralized repository and communication platform, TE Hub lays the foundation for improved integration, standardization, and effectiveness of diverse tools and protocols. We invite the TE community, both novices and experts in TE identification and analysis, to join us in expanding our community-oriented resource.",2021-06-21 +34538772,GenOrigin: A comprehensive protein-coding gene origination database on the evolutionary timescale of life.,"The origination of new genes contributes to the biological diversity of life. New genes may quickly build their network, exert important functions, and generate novel phenotypes. Dating gene age and inferring the origination mechanisms of new genes, like primate-specific genes, is the basis for the functional study of the genes. However, no comprehensive resource of gene age estimates across species is available. Here, we systematically date the age of 9,102,113 protein-coding genes from 565 species in the Ensembl and Ensembl Genomes databases, including 82 bacteria, 57 protists, 134 fungi, 58 plants, 56 metazoa, and 178 vertebrates, using a protein-family-based pipeline with Wagner parsimony algorithm. We also collect gene age estimate data from other studies and uniformly distribute the gene age estimates to time ranges in a million years for comparison across studies. All the data are cataloged into GenOrigin (http://genorigin.chenzxlab.cn/), a user-friendly new database of gene age estimates, where users can browse gene age estimates by species, age, and gene ontology. In GenOrigin, the information such as gene age estimates, annotation, gene ontology, ortholog, and paralog, as well as detailed gene presence/absence views for gene age inference based on the species tree with evolutionary timescale, is provided to researchers for exploring gene functions.",2021-06-14 +33406221,iCysMod: an integrative database for protein cysteine modifications in eukaryotes. ,"As important post-translational modifications, protein cysteine modifications (PCMs) occurring at cysteine thiol group play critical roles in the regulation of various biological processes in eukaryotes. Due to the rapid advancement of high-throughput proteomics technologies, a large number of PCM events have been identified but remain to be curated. Thus, an integrated resource of eukaryotic PCMs will be useful for the research community. In this work, we developed an integrative database for protein cysteine modifications in eukaryotes (iCysMod), which curated and hosted 108 030 PCM events for 85 747 experimentally identified sites on 31 483 proteins from 48 eukaryotes for 8 types of PCMs, including oxidation, S-nitrosylation (-SNO), S-glutathionylation (-SSG), disulfide formation (-SSR), S-sulfhydration (-SSH), S-sulfenylation (-SOH), S-sulfinylation (-SO2H) and S-palmitoylation (-S-palm). Then, browse and search options were provided for accessing the dataset, while various detailed information about the PCM events was well organized for visualization. With human dataset in iCysMod, the sequence features around the cysteine modification sites for each PCM type were analyzed, and the results indicated that various types of PCMs presented distinct sequence recognition preferences. Moreover, different PCMs can crosstalk with each other to synergistically orchestrate specific biological processes, and 37 841 PCM events involved in 119 types of PCM co-occurrences at the same cysteine residues were finally obtained. Taken together, we anticipate that the database of iCysMod would provide a useful resource for eukaryotic PCMs to facilitate related researches, while the online service is freely available at http://icysmod.omicsbio.info.",2021-09-01 +33119075,Phosphomatics: interactive interrogation of substrate-kinase networks in global phosphoproteomics datasets.,"

Motivation

Mass spectrometry-based phosphoproteomics can routinely identify and quantify thousands of phosphorylated peptides from a single experiment. However interrogating possible upstream kinases and identifying key literature for phosphorylation sites is laborious and time-consuming.

Results

Here, we present Phosphomatics-a publicly available web resource for interrogating phosphoproteomics data. Phosphomatics allows researchers to upload phosphoproteomics data and interrogate possible relationships from a substrate-, kinase- or pathway-centric viewpoint.

Availability and implementation

Phosphomatics is freely available via the internet at: https://phosphomatics.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +36311809,CSCS: a chromatin state interface for Chinese Spring bread wheat.,"A chromosome-level genome assembly of the bread wheat variety Chinese Spring (CS) has recently been published. Genome-wide identification of regulatory elements (REs) responsible for regulating gene activity is key to further mechanistic studies. Because epigenetic activity can reflect RE activity, defining chromatin states based on epigenomic features is an effective way to detect REs. Here, we present the web-based platform Chinese Spring chromatin state (CSCS), which provides CS chromatin signature information. CSCS includes 15 recently published epigenomic data sets including open chromatin and major chromatin marks, which are further partitioned into 15 distinct chromatin states. CSCS curates detailed information about these chromatin states, with trained self-organization mapping (SOM) for segments in all chromatin states and JBrowse visualization for genomic regions or genes. Motif analysis for genomic regions or genes, GO analysis for genes and SOM analysis for new epigenomic data sets are also integrated into CSCS. In summary, the CSCS database contains the combinatorial patterns of chromatin signatures in wheat and facilitates the detection of functional elements and further clarification of regulatory activities. We illustrate how CSCS enables biological insights using one example, demonstrating that CSCS is a highly useful resource for intensive data mining. CSCS is available at http://bioinfo.cemps.ac.cn/CSCS/.

Supplementary information

The online version contains supplementary material available at 10.1007/s42994-021-00048-z.",2021-05-31 +34458863,"Predicting molecular mechanisms, pathways, and health outcomes induced by Juul e-cigarette aerosol chemicals using the Comparative Toxicogenomics Database.","There is a critical need to understand the health risks associated with vaping e-cigarettes, which has reached epidemic levels among teens. Juul is currently the most popular type of e-cigarette on the market. Using the Comparative Toxicogenomics Database (CTD; http://ctdbase.org), a public resource that integrates chemical, gene, phenotype and disease data, we aimed to analyze the potential molecular mechanisms of eight chemicals detected in the aerosols generated by heating Juul e-cigarette pods: nicotine, acetaldehyde, formaldehyde, free radicals, crotonaldehyde, acetone, pyruvaldehyde, and particulate matter. Curated content in CTD, including chemical-gene, chemical-phenotype, and chemical-disease interactions, as well as associated phenotypes and pathway enrichment, were analyzed to help identify potential molecular mechanisms and diseases associated with vaping. Nicotine shows the most direct disease associations of these chemicals, followed by particulate matter and formaldehyde. Together, these chemicals show a direct marker or mechanistic relationship with 400 unique diseases in CTD, particularly in the categories of cardiovascular diseases, nervous system diseases, respiratory tract diseases, cancers, and mental disorders. We chose three respiratory tract diseases to investigate further, and found that in addition to cellular processes of apoptosis and cell proliferation, prioritized phenotypes underlying Juul-associated respiratory tract disease outcomes include response to oxidative stress, inflammatory response, and several cell signaling pathways (p38MAPK, NIK/NFkappaB, calcium-mediated).",2021-08-05 +33306802,NPBS database: a chemical data resource with relational data between natural products and biological sources. ,"NPBS (Natural Products & Biological Sources) database is a chemical data resource with relational data between natural products and biological sources, manually curated from literatures of natural product researches. The relational data link a specific species and all the natural products derived from it and contrarily link a specific natural product and all the biological sources. The biological sources cover diverse species of plant, bacterial, fungal and marine organisms; the natural molecules have proper chemical structure data and computable molecular properties and all the relational data have corresponding references. NPBS database provides a wider choice of biological sources and can be used for dereplication to prevent re-isolation and re-characterization of already known natural products. Database URL: http://www.organchem.csdb.cn/scdb/NPBS.",2020-12-01 +34897852,BGvar: A comprehensive resource for blood group immunogenetics.,"

Background

Blood groups form the basis of effective and safe blood transfusion. There are about 43 well-recognised human blood group systems presently known. Blood groups are molecularly determined by the presence of specific antigens on the red blood cells and are genetically determined and inherited following Mendelian principles. The lack of a comprehensive, relevant, manually compiled and genome-ready dataset of red cell antigens limited the widespread application of genomic technologies to characterise and interpret the blood group complement of an individual from genomic datasets.

Materials and methods

A range of public datasets was used to systematically annotate the variation compendium for its functionality and allele frequencies across global populations. Details on phenotype or relevant clinical importance were collated from reported literature evidence.

Results

We have compiled the Blood Group Associated Genomic Variant Resource (BGvar), a manually curated online resource comprising all known human blood group related allelic variants including a total of 1700 International Society of Blood Transfusion approved alleles and 1706 alleles predicted and curated from literature reports. This repository includes 1682 single nucleotide variations (SNVs), 310 Insertions, Deletions (InDels) and Duplications (Copy Number Variations) and about 1360 combination mutations corresponding to 43 human blood group systems and 2 transcription factors. This compendium also encompasses gene fusion and rearrangement events occurring in human blood group genes.

Conclusion

To the best of our knowledge, BGvar is a comprehensive and a user-friendly resource with most relevant collation of blood group alleles in humans. BGvar is accessible online at URL: http://clingen.igib.res.in/bgvar/.",2021-12-13 +34220930,"The FAANG Data Portal: Global, Open-Access, ""FAIR"", and Richly Validated Genotype to Phenotype Data for High-Quality Functional Annotation of Animal Genomes.","The Functional Annotation of ANimal Genomes (FAANG) project is a worldwide coordinated action creating high-quality functional annotation of farmed and companion animal genomes. The generation of a rich genome-to-phenome resource and supporting informatic infrastructure advances the scope of comparative genomics and furthers the understanding of functional elements. The project also provides terrestrial and aquatic animal agriculture community powerful resources for supporting improvements to farmed animal production, disease resistance, and genetic diversity. The FAANG Data Portal (https://data.faang.org) ensures Findable, Accessible, Interoperable and Reusable (FAIR) open access to the wealth of sample, sequencing, and analysis data produced by an ever-growing number of FAANG consortia. It is developed and maintained by the FAANG Data Coordination Centre (DCC) at the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI). FAANG projects produce a standardised set of multi-omic assays with resulting data placed into a range of specialised open data archives. To ensure this data is easily findable and accessible by the community, the portal automatically identifies and collates all submitted FAANG data into a single easily searchable resource. The Data Portal supports direct download from the multiple underlying archives to enable seamless access to all FAANG data from within the portal itself. The portal provides a range of predefined filters, powerful predictive search, and a catalogue of sampling and analysis protocols and automatically identifies publications associated with any dataset. To ensure all FAANG data submissions are high-quality, the portal includes powerful contextual metadata validation and data submissions brokering to the underlying EMBL-EBI archives. The portal will incorporate extensive new technical infrastructure to effectively deliver and standardise FAANG's shift to single-cellomics, cell atlases, pangenomes, and novel phenotypic prediction models. The Data Portal plays a key role for FAANG by supporting high-quality functional annotation of animal genomes, through open FAIR sharing of data, complete with standardised rich metadata. Future Data Portal features developed by the DCC will support new technological developments for continued improvement for FAANG projects.",2021-06-17 +33367605,Virxicon: A Lexicon Of Viral Sequences. ,"Viruses are the most abundant biological entities and constitute a large reservoir of genetic diversity. In recent years, knowledge about them has increased significantly as a result of dynamic development in life sciences and rapid technological progress. This knowledge is scattered across various data repositories, making a comprehensive analysis of viral data difficult. In response to the need for gathering a comprehensive knowledge of viruses and viral sequences, we developed Virxicon, a lexicon of all experimentally-acquired sequences for RNA and DNA viruses. The ability to quickly obtain data for entire viral groups, searching sequences by levels of taxonomic hierarchy-according to the Baltimore classification and ICTV taxonomy-and tracking the distribution of viral data and its growth over time are unique features of our database compared to the other tools. Virxicon is a publicly available resource, updated weekly. It has an intuitive web interface and can be freely accessed at http://virxicon.cs.put.poznan.pl/. Supplementary data are available at Bioinformatics online.",2020-12-26 +34988460,Characterization of the consensus mucosal microbiome of colorectal cancer.,"Dysbioisis is an imbalance of an organ's microbiome and plays a role in colorectal cancer pathogenesis. Characterizing the bacteria in the microenvironment of a cancer through genome sequencing has advantages compared to culture-based profiling. However, there are notable technical and analytical challenges in characterizing universal features of tumor microbiomes. Colorectal tumors demonstrate microbiome variation among different studies and across individual patients. To address these issues, we conducted a computational study to determine a consensus microbiome for colorectal cancer, analyzing 924 tumors from eight independent RNA-Seq data sets. A standardized meta-transcriptomic analysis pipeline was established with quality control metrics. Microbiome profiles across different cohorts were compared and recurrently altered microbial shifts specific to colorectal cancer were determined. We identified cancer-specific set of 114 microbial species associated with tumors that were found among all investigated studies. Firmicutes, Bacteroidetes, Proteobacteria and Actinobacteria were among the four most abundant phyla for the colorectal cancer microbiome. Member species of Clostridia were depleted and Fusobacterium nucleatum was one of the most enriched bacterial species in tumors. Associations between the consensus species and specific immune cell types were noted. Our results are available as a web data resource for other researchers to explore (https://crc-microbiome.stanford.edu).",2021-12-22 +33980298,IDSM ChemWebRDF: SPARQLing small-molecule datasets.,"The Resource Description Framework (RDF), together with well-defined ontologies, significantly increases data interoperability and usability. The SPARQL query language was introduced to retrieve requested RDF data and to explore links between them. Among other useful features, SPARQL supports federated queries that combine multiple independent data source endpoints. This allows users to obtain insights that are not possible using only a single data source. Owing to all of these useful features, many biological and chemical databases present their data in RDF, and support SPARQL querying. In our project, we primary focused on PubChem, ChEMBL and ChEBI small-molecule datasets. These datasets are already being exported to RDF by their creators. However, none of them has an official and currently supported SPARQL endpoint. This omission makes it difficult to construct complex or federated queries that could access all of the datasets, thus underutilising the main advantage of the availability of RDF data. Our goal is to address this gap by integrating the datasets into one database called the Integrated Database of Small Molecules (IDSM) that will be accessible through a SPARQL endpoint. Beyond that, we will also focus on increasing mutual interoperability of the datasets. To realise the endpoint, we decided to implement an in-house developed SPARQL engine based on the PostgreSQL relational database for data storage. In our approach, data are stored in the traditional relational form, and the SPARQL engine translates incoming SPARQL queries into equivalent SQL queries. An important feature of the engine is that it optimises the resulting SQL queries. Together with optimisations performed by PostgreSQL, this allows efficient evaluations of SPARQL queries. The endpoint provides not only querying in the dataset, but also the compound substructure and similarity search supported by our Sachem project. Although the endpoint is accessible from an internet browser, it is mainly intended to be used for programmatic access by other services, for example as a part of federated queries. For regular users, we offer a rich web application called ChemWebRDF using the endpoint. The application is publicly available at https://idsm.elixir-czech.cz/chemweb/ .",2021-05-12 +32337573,MPTherm: database for membrane protein thermodynamics for understanding folding and stability.,"The functions of membrane proteins (MPs) are attributed to their structure and stability. Factors influencing the stability of MPs differ from globular proteins due to the presence of membrane spanning regions. Thermodynamic data of MPs aid to understand the relationship among their structure, stability and function. Although a wealth of experimental data on thermodynamics of MPs are reported in the literature, there is no database available explicitly for MPs. In this work, we have developed a database for MP thermodynamics, MPTherm, which contains more than 7000 thermodynamic data from about 320 MPs. Each entry contains protein sequence and structural information, membrane topology, experimental conditions, thermodynamic parameters such as melting temperature, free energy, enthalpy etc. and literature information. MPTherm assists users to retrieve the data by using different search and display options. We have also provided the sequence and structure visualization as well as cross-links to UniProt and PDB databases. MPTherm database is freely available at http://www.iitm.ac.in/bioinfo/mptherm/. It is implemented in HTML, PHP, MySQL and JavaScript, and supports the latest versions of major browsers, such as Firefox, Chrome and Opera. MPTherm would serve as an effective resource for understanding the stability of MPs, development of prediction tools and identifying drug targets for diseases associated with MPs.",2021-03-01 +33074547,"Usage of the Sea Urchin Hemicentrotus pulcherrimus Database, HpBase.","HpBase ( http://cell-innovation.nig.ac.jp/Hpul/ ) is a database that provides genome and transcriptome resources of the sea urchin Hemicentrotus pulcherrimus. In addition to downloading the bulk data, several analysis tools for resource use are available: gene search, homology search, and genome browsing. HpBase also discloses the protocols for biological experiments using H. pulcherrimus that have been accumulated so far. Therefore, HpBase can assist efficient use of genome resources for researchers from various fields-evolutionary, developmental, and cell biology. In this chapter we present an overview and usage of tools in HpBase.",2021-01-01 +34724898,Genomic resources of broomcorn millet: demonstration and application of a high-throughput BAC mapping pipeline.,"

Background

With high-efficient water-use and drought tolerance, broomcorn millet has emerged as a candidate for food security. To promote its research process for molecular breeding and functional research, a comprehensive genome resource is of great importance.

Results

Herein, we constructed a BAC library for broomcorn millet, generated BAC end sequences based on the clone-array pooled shotgun sequencing strategy and Illumina sequencing technology, and integrated BAC clones into genome by a novel pipeline for BAC end profiling. The BAC library consisted of 76,023 clones with an average insert length of 123.48 Kb, covering about 9.9-fold of the 850 Mb genome. Of 9216 clones tested using our pipeline, 8262 clones were mapped on the broomcorn millet cultivar longmi4 genome. These mapped clones covered 308 of the 829 gaps left by the genome. To our knowledge, this is the only BAC resource for broomcorn millet.

Conclusions

We constructed a high-quality BAC libraray for broomcorn millet and designed a novel pipeline for BAC end profiling. BAC clones can be browsed and obtained from our website ( http://eightstarsbio.com/gresource/JBrowse-1.16.5/index.html ). The high-quality BAC clones mapped on genome in this study will provide a powerful genomic resource for genome gap filling, complex segment sequencing, FISH, functional research and genetic engineering of broomcorn millet.",2021-11-01 +34245304,"MtExpress, a Comprehensive and Curated RNAseq-based Gene Expression Atlas for the Model Legume Medicago truncatula.","Although RNA sequencing (RNAseq) has been becoming the main transcriptomic approach in the model legume Medicago truncatula, there is currently no genome-wide gene expression atlas covering the whole set of RNAseq data published for this species. Nowadays, such a tool is highly valuable to provide a global view of gene expression in a wide range of conditions and tissues/organs. Here, we present MtExpress, a gene expression atlas that compiles an exhaustive set of published M. truncatula RNAseq data (https://medicago.toulouse.inrae.fr/MtExpress). MtExpress makes use of recent releases of M. truncatula genome sequence and annotation, as well as up-to-date tools to perform mapping, quality control, statistical analysis and normalization of RNAseq data. MtExpress combines semi-automated pipelines with manual re-labeling and organization of samples to produce an attractive and user-friendly interface, fully integrated with other available Medicago genomic resources. Importantly, MtExpress is highly flexible, in terms of both queries, e.g. allowing searches with gene names and orthologous gene IDs from Arabidopsis and other legume species, and outputs, to customize visualization and redirect gene study to relevant Medicago webservers. Thanks to its semi-automated pipeline, MtExpress will be frequently updated to follow the rapid pace of M. truncatula RNAseq data publications, as well as the constant improvement of genome annotation. MtExpress also hosts legacy GeneChip expression data originally stored in the Medicago Gene Expression Atlas, as a very valuable and complementary resource.",2021-11-01 +34387544,"Scaled, high fidelity electrophysiological, morphological, and transcriptomic cell characterization. ","The Patch-seq approach is a powerful variation of the patch-clamp technique that allows for the combined electrophysiological, morphological, and transcriptomic characterization of individual neurons. To generate Patch-seq datasets at scale, we identified and refined key factors that contribute to the efficient collection of high-quality data. We developed patch-clamp electrophysiology software with analysis functions specifically designed to automate acquisition with online quality control. We recognized the importance of extracting the nucleus for transcriptomic success and maximizing membrane integrity during nucleus extraction for morphology success. The protocol is generalizable to different species and brain regions, as demonstrated by capturing multimodal data from human and macaque brain slices. The protocol, analysis and acquisition software are compiled at https://githubcom/AllenInstitute/patchseqtools. This resource can be used by individual labs to generate data across diverse mammalian species and that is compatible with large publicly available Patch-seq datasets.",2021-08-13 +33147626,CellTalkDB: a manually curated database of ligand-receptor interactions in humans and mice. ,"Cell-cell communications in multicellular organisms generally involve secreted ligand-receptor (LR) interactions, which is vital for various biological phenomena. Recent advancements in single-cell RNA sequencing (scRNA-seq) have effectively resolved cellular phenotypic heterogeneity and the cell-type composition of complex tissues, facilitating the systematic investigation of cell-cell communications at single-cell resolution. However, assessment of chemical-signal-dependent cell-cell communication through scRNA-seq relies heavily on prior knowledge of LR interaction pairs. We constructed CellTalkDB (http://tcm.zju.edu.cn/celltalkdb), a manually curated comprehensive database of LR interaction pairs in humans and mice comprising 3398 human LR pairs and 2033 mouse LR pairs, through text mining and manual verification of known protein-protein interactions using the STRING database, with literature-supported evidence for each pair. Compared with SingleCellSignalR, the largest LR-pair resource, CellTalkDB includes not only 2033 mouse LR pairs but also 377 additional human LR pairs. In conclusion, the data on human and mouse LR pairs contained in CellTalkDB could help to further the inference and understanding of the LR-interaction-based cell-cell communications, which might provide new insights into the mechanism underlying biological processes.",2021-07-01 +34907423,iCAV: an integrative database of cancer-associated viruses. ,"To date, various studies have found that the occurrence of cancer may be related to viral infections. Therefore, it is important to explore the relationship between viruses and diseases. The International Agency for Research on Cancer has defined six types of viruses as Class 1 human carcinogens, including Epstein-Barr virus, hepatitis C virus, hepatitis B virus, human T-cell lymphotropic virus, human herpesvirus 8 and human papillomavirus, while Merkel cell polyomavirus is classified as 'probably carcinogenic to humans' (Group 2A). Therefore, in-depth research on these viruses will help clarify their relationship with diseases, and substantial efforts have been made to sequence their genomes. However, there is no complete database documenting these cancer-associated viruses, and researchers are not able to easily access and retrieve the published genomes. In this study, we developed iCAV, a database that integrates the genomes of cancer-related viruses and the corresponding phenotypes. We collected a total of 18 649 genome sequences from seven human disease-related viruses, and each virus was further classified by the associated disease, sample and country. iCAV is a comprehensive resource of cancer-associated viruses that provides browse and download functions for viral genomes. Database URL: http://icav.omicsbio.info/.",2021-12-01 +34511389,Helminth egg analysis platform (HEAP): An opened platform for microscopic helminth egg identification and quantification based on the integration of deep learning architectures.,"

Background

Millions of people throughout the world suffer from parasite infections. Traditionally, technicians use manual eye inspection of microscopic specimens to perform a parasite examination. However, manual operations have limitations that hinder the ability to obtain precise egg counts and cause inefficient identification of infected parasites on co-infections. The technician requirements for handling a large number of microscopic examinations in countries that have limited medical resources are substantial. We developed the helminth egg analysis platform (HEAP) as a user-friendly microscopic helminth eggs identification and quantification platform to assist medical technicians during parasite infection examination.

Methods

Multiple deep learning strategies including SSD (Single Shot MultiBox Detector), U-net, and Faster R-CNN (Faster Region-based Convolutional Neural Network) are integrated to identify the same specimen allowing users to choose the best predictions. An image binning and egg-in-edge algorithm based on pixel density detection was developed to increase the performance. Computers with different operation systems can be gathered to lower the computation time using our easy-to-deploy software architecture.

Results

A user-friendly interface is provided to substantially increase the efficiency of manual validation. To adapt to low-cost computers, we architected a distributed computing structure with high flexibilities.

Conclusions

HEAP serves not only as a prediction service provider but also as a parasitic egg database of microscopic helminth egg image collection, labeling data and pretrained models. All images and labeling resources are free and accessible at http://heap.cgu.edu.tw. HEAP can also be an ideal education and training resource for helminth egg examination.",2021-09-02 +34289329,Sense of Coherence and COVID-19: A Longitudinal Study.,"The strong restrictive measures adopted in 2020 against the spread of the COVID-19 pandemic in Italy have deeply affected the general population's mental health. In the current longitudinal study, we specifically focus on sense of coherence (SOC), both in terms of comprehensibility/manageability and meaningfulness, among a large sample of Italian adults; SOC is a potential resource likely to foster the ability to cope with stressors. A total of 2,191 Italian participants (65.8% female) aged 18-82 completed an anonymous online self-report questionnaire at Time 1 (during the lockdown, March 2020) and at Time 2 (at the resumption of most activities, July 2020). The Repeated Measures Latent Profile Analysis (RMLPA) allowed us to identify seven different SOC profiles based on the change in both SOC dimensions, ranging from a strong ""crisis"" in terms of this resource in the face of the pandemic to a solid possibility to count on it. Interestingly, female and younger respondents were more likely to belong to those profiles characterized by lower levels of SOC, and these profiles have specific relations with fear and wellbeing. The implications of these results and the further expansion of the study are discussed.Supplemental data for this article is available online at https://doi.org/10.1080/00223980.2021.1952151 .",2021-07-21 +32436932,miRactDB characterizes miRNA-gene relation switch between normal and cancer tissues across pan-cancer. ,"It has been increasingly accepted that microRNA (miRNA) can both activate and suppress gene expression, directly or indirectly, under particular circumstances. Yet, a systematic study on the switch in their interaction pattern between activation and suppression and between normal and cancer conditions based on multi-omics evidences is not available. We built miRactDB, a database for miRNA-gene interaction, at https://ccsm.uth.edu/miRactDB, to provide a versatile resource and platform for annotation and interpretation of miRNA-gene relations. We conducted a comprehensive investigation on miRNA-gene interactions and their biological implications across tissue types in both tumour and normal conditions, based on TCGA, CCLE and GTEx databases. We particularly explored the genetic and epigenetic mechanisms potentially contributing to the positive correlation, including identification of miRNA binding sites in the gene coding sequence (CDS) and promoter regions of partner genes. Integrative analysis based on this resource revealed that top-ranked genes derived from TCGA tumour and adjacent normal samples share an overwhelming part of biological processes, which are quite different than those from CCLE and GTEx. The most active miRNAs predicted to target CDS and promoter regions are largely overlapped. These findings corroborate that adjacent normal tissues might have undergone significant molecular transformations towards oncogenesis before phenotypic and histological change; and there probably exists a small yet critical set of miRNAs that profoundly influence various cancer hallmark processes. miRactDB provides a unique resource for the cancer and genomics communities to screen, prioritize and rationalize their candidates of miRNA-gene interactions, in both normal and cancer scenarios.",2021-05-01 +,"FoodData Central, USDA's Updated Approach to Food Composition Data Systems","Abstract

Objectives

A dynamic US food supply and need for assessment of diet on health demands transparent, easily accessible information on foods and food components and related data on production and variability for researchers, health and nutrition policymakers and professionals, and food manufacturers. USDA is to develop an integrated food data system to address these needs and provide capacity for higher resolution compositional data analysis and an increase in available food metadata.

Methods

Create an integrated database system with five unique types of data: 1) Foundation Foods - nutrient values and extensive underlying metadata on commercially available foods that are highly consumed either as a whole food or food ingredient 2) SR Legacy (2018), the final release of Standard Reference 3) The Food and Nutrient Database for Dietary Studies - nutrient values for foods and beverages reported in What We Eat in America, National Health and Nutrition Examination Survey; 4) the USDA Global Branded Food Products Database, industry-provided label data for over 350,000 foods from a public-private partnership; and 5) Experimental Foods that will include information from multiple sources about foods produced under experimental conditions.

Results

FoodData Central was first launched in April 2019 and receives major updates every 6 months. Increased resolution of metadata, such as agricultural data, allows users to investigate many factors, including geographical and agricultural practices that affect the nutritional profiles of foods and dietary intake estimates. Continually added data, foods, and sample information provide research insights on attributes that influence the variability of classic nutrients and emerging bioactive compounds of public health importance.

Conclusions

FoodData Central (https://fdc.nal.usda.gov/) is an integrated data system that provides expanded nutrient profile data and links to related agricultural and experimental research. The system's evolution includes exploring and implementing new database technologies and advanced knowledge systems to enhance searching, retrieval, and research capabilities.

Funding Sources

United States Department of Agriculture, Agricultural Research Service.",2021-06-01 +34174819,UniBind: maps of high-confidence direct TF-DNA interactions across nine species.,"

Background

Transcription factors (TFs) bind specifically to TF binding sites (TFBSs) at cis-regulatory regions to control transcription. It is critical to locate these TF-DNA interactions to understand transcriptional regulation. Efforts to predict bona fide TFBSs benefit from the availability of experimental data mapping DNA binding regions of TFs (chromatin immunoprecipitation followed by sequencing - ChIP-seq).

Results

In this study, we processed ~ 10,000 public ChIP-seq datasets from nine species to provide high-quality TFBS predictions. After quality control, it culminated with the prediction of ~ 56 million TFBSs with experimental and computational support for direct TF-DNA interactions for 644 TFs in > 1000 cell lines and tissues. These TFBSs were used to predict > 197,000 cis-regulatory modules representing clusters of binding events in the corresponding genomes. The high-quality of the TFBSs was reinforced by their evolutionary conservation, enrichment at active cis-regulatory regions, and capacity to predict combinatorial binding of TFs. Further, we confirmed that the cell type and tissue specificity of enhancer activity was correlated with the number of TFs with binding sites predicted in these regions. All the data is provided to the community through the UniBind database that can be accessed through its web-interface ( https://unibind.uio.no/ ), a dedicated RESTful API, and as genomic tracks. Finally, we provide an enrichment tool, available as a web-service and an R package, for users to find TFs with enriched TFBSs in a set of provided genomic regions.

Conclusions

UniBind is the first resource of its kind, providing the largest collection of high-confidence direct TF-DNA interactions in nine species.",2021-06-26 +33179747,jMorp updates in 2020: large enhancement of multi-omics data resources on the general Japanese population.,"In the Tohoku Medical Megabank project, genome and omics analyses of participants in two cohort studies were performed. A part of the data is available at the Japanese Multi Omics Reference Panel (jMorp; https://jmorp.megabank.tohoku.ac.jp) as a web-based database, as reported in our previous manuscript published in Nucleic Acid Research in 2018. At that time, jMorp mainly consisted of metabolome data; however, now genome, methylome, and transcriptome data have been integrated in addition to the enhancement of the number of samples for the metabolome data. For genomic data, jMorp provides a Japanese reference sequence obtained using de novo assembly of sequences from three Japanese individuals and allele frequencies obtained using whole-genome sequencing of 8,380 Japanese individuals. In addition, the omics data include methylome and transcriptome data from ∼300 samples and distribution of concentrations of more than 755 metabolites obtained using high-throughput nuclear magnetic resonance and high-sensitivity mass spectrometry. In summary, jMorp now provides four different kinds of omics data (genome, methylome, transcriptome, and metabolome), with a user-friendly web interface. This will be a useful scientific data resource on the general population for the discovery of disease biomarkers and personalized disease prevention and early diagnosis.",2021-01-01 +34321100,Distinct signatures of codon and codon pair usage in 32 primary tumor types in the novel database CancerCoCoPUTs for cancer-specific codon usage.,"

Background

Gene expression is highly variable across tissues of multi-cellular organisms, influencing the codon usage of the tissue-specific transcriptome. Cancer disrupts the gene expression pattern of healthy tissue resulting in altered codon usage preferences. The topic of codon usage changes as they relate to codon demand, and tRNA supply in cancer is of growing interest.

Methods

We analyzed transcriptome-weighted codon and codon pair usage based on The Cancer Genome Atlas (TCGA) RNA-seq data from 6427 solid tumor samples and 632 normal tissue samples. This dataset represents 32 cancer types affecting 11 distinct tissues. Our analysis focused on tissues that give rise to multiple solid tumor types and cancer types that are present in multiple tissues.

Results

We identified distinct patterns of synonymous codon usage changes for different cancer types affecting the same tissue. For example, a substantial increase in GGT-glycine was observed in invasive ductal carcinoma (IDC), invasive lobular carcinoma (ILC), and mixed invasive ductal and lobular carcinoma (IDLC) of the breast. Change in synonymous codon preference favoring GGT correlated with change in synonymous codon preference against GGC in IDC and IDLC, but not in ILC. Furthermore, we examined the codon usage changes between paired healthy/tumor tissue from the same patient. Using clinical data from TCGA, we conducted a survival analysis of patients based on the degree of change between healthy and tumor-specific codon usage, revealing an association between larger changes and increased mortality. We have also created a database that contains cancer-specific codon and codon pair usage data for cancer types derived from TCGA, which represents a comprehensive tool for codon-usage-oriented cancer research.

Conclusions

Based on data from TCGA, we have highlighted tumor type-specific signatures of codon and codon pair usage. Paired data revealed variable changes to codon usage patterns, which must be considered when designing personalized cancer treatments. The associated database, CancerCoCoPUTs, represents a comprehensive resource for codon and codon pair usage in cancer and is available at https://dnahive.fda.gov/review/cancercocoputs/ . These findings are important to understand the relationship between tRNA supply and codon demand in cancer states and could help guide the development of new cancer therapeutics.",2021-07-28 +34497528,"ACNPD: The Database for Elucidating the Relationships Between Natural Products, Compounds, Molecular Mechanisms, and Cancer Types.","Objectives: Cancer is well-known as a collection of diseases of uncontrolled proliferation of cells caused by mutated genes which are generated by external or internal factors. As the mechanisms of cancer have been constantly revealed, including cell cycle, proliferation, apoptosis and so on, a series of new emerging anti-cancer drugs acting on each stage have also been developed. It is worth noting that natural products are one of the important sources for the development of anti-cancer drugs. To the best of our knowledge, there is not any database summarizing the relationships between natural products, compounds, molecular mechanisms, and cancer types. Materials and methods: Based upon published literatures and other sources, we have constructed an anti-cancer natural product database (ACNPD) (http://www.acnpd-fu.com/). The database currently contains 521 compounds, which specifically refer to natural compounds derived from traditional Chinese medicine plants (derivatives are not considered herein). And, it includes 1,593 molecular mechanisms/signaling pathways, covering 10 common cancer types, such as breast cancer, lung cancer and cervical cancer. Results: Integrating existing data sources, we have obtained a large amount of information on natural anti-cancer products, including herbal sources, regulatory targets and signaling pathways. ACNPD is a valuable online resource that illustrates the complex pharmacological relationship between natural products and human cancers. Conclusion: In summary, ACNPD is crucial for better understanding of the relationships between traditional Chinese medicine (TCM) and cancer, which is not only conducive to expand the influence of TCM, but help to find more new anti-cancer drugs in the future.",2021-08-23 +34015823,COVID-19 biomarkers and their overlap with comorbidities in a disease biomarker data model. ,"In response to the COVID-19 outbreak, scientists and medical researchers are capturing a wide range of host responses, symptoms and lingering postrecovery problems within the human population. These variable clinical manifestations suggest differences in influential factors, such as innate and adaptive host immunity, existing or underlying health conditions, comorbidities, genetics and other factors-compounding the complexity of COVID-19 pathobiology and potential biomarkers associated with the disease, as they become available. The heterogeneous data pose challenges for efficient extrapolation of information into clinical applications. We have curated 145 COVID-19 biomarkers by developing a novel cross-cutting disease biomarker data model that allows integration and evaluation of biomarkers in patients with comorbidities. Most biomarkers are related to the immune (SAA, TNF-∝ and IP-10) or coagulation (D-dimer, antithrombin and VWF) cascades, suggesting complex vascular pathobiology of the disease. Furthermore, we observe commonality with established cancer biomarkers (ACE2, IL-6, IL-4 and IL-2) as well as biomarkers for metabolic syndrome and diabetes (CRP, NLR and LDL). We explore these trends as we put forth a COVID-19 biomarker resource (https://data.oncomx.org/covid19) that will help researchers and diagnosticians alike.",2021-11-01 +33156327,TCRD and Pharos 2021: mining the human proteome for disease biology.,"In 2014, the National Institutes of Health (NIH) initiated the Illuminating the Druggable Genome (IDG) program to identify and improve our understanding of poorly characterized proteins that can potentially be modulated using small molecules or biologics. Two resources produced from these efforts are: The Target Central Resource Database (TCRD) (http://juniper.health.unm.edu/tcrd/) and Pharos (https://pharos.nih.gov/), a web interface to browse the TCRD. The ultimate goal of these resources is to highlight and facilitate research into currently understudied proteins, by aggregating a multitude of data sources, and ranking targets based on the amount of data available, and presenting data in machine learning ready format. Since the 2017 release, both TCRD and Pharos have produced two major releases, which have incorporated or expanded an additional 25 data sources. Recently incorporated data types include human and viral-human protein-protein interactions, protein-disease and protein-phenotype associations, and drug-induced gene signatures, among others. These aggregated data have enabled us to generate new visualizations and content sections in Pharos, in order to empower users to find new areas of study in the druggable genome.",2021-01-01 +,315. A Multi-center Study to Describe Obese Pediatric Patients with COVID-19 Across the United States,"Abstract

Background

Obesity is linked to increased risk of complications and is reported to be the most common underlying condition for severely ill SARS-CoV-2 infected individuals. Therefore, we aim further to explore the clinical outcomes of obese children with COVID-19.

Methods

Data were from the Pediatric COVID-19 Case Registry, which includes any patient < 21 years of age diagnosed with COVID-19 at 170 instructions across the United States. A total of 778 COVID-19 positive non-immunocompromised hospitalized patients aged 24 months or older were included. Patients were assigned as obese or non-obese based on BMI as reported from medical records referenced to CDC BMI by gender and age classification (https://www.cdc.gov/growthcharts/clinical_charts.htm).

Results

Patients meeting inclusion criteria included 56% not obese and 44% obese. Compared to matched US population, obese children and adolescents appeared in this database at a rate of 2.3 times their frequency in the population. Obese patients were more likely to be Hispanic and older, symptomatic, have abnormal radiological findings, and require oxygen and ICU admission. Mortality, in this analysis, was similar across the groups. Demographic and clinical characteristics. NS: Not significant *within seven days of COVID diagnosis ***mild: no need for supplemental oxygen; moderate: need for supplemental oxygen and severe: need for mechanical ventilation.

Conclusion

The incidence of obesity in hospitalized COVID children is higher than that of the general population (34% vs. 19%), highlighting obesity as an important risk factor for hospitalization associated with SARS-CoV-2 infected. Therefore, obese children and adolescents with COVID should be prioritized for COVID immunization and managed aggressively, given their significant COVID morbidity.

Disclosures

All Authors: No reported disclosures",2021-11-01 +33262341,"HuskinDB, a database for skin permeation of xenobiotics.","Skin permeation is an essential biological property of small organic compounds our body is exposed to, such as drugs in topic formulations, cosmetics, and environmental toxins. Despite the limited availability of experimental data, there is a lack of systematic analysis and structure. We present a novel resource on skin permeation data that collects all measurements available in the literature and systematically structures experimental conditions. Besides the skin permeation value kp, it includes experimental protocols such as skin source site, skin layer used, preparation technique, storage conditions, as well as test conditions such as temperature, pH as well as the type of donor and acceptor solution. It is important to include these parameters in the assessment of the skin permeation data. In addition, we provide an analysis of physicochemical properties and chemical space coverage, laying the basis for applicability domain determination of insights drawn from the collected data points. The database is freely accessible under https://huskindb.drug-design.de or https://doi.org/10.7303/syn21998881 .",2020-12-01 +32990748,miRNASNP-v3: a comprehensive database for SNPs and disease-related variations in miRNAs and miRNA targets.,"MicroRNAs (miRNAs) related single-nucleotide variations (SNVs), including single-nucleotide polymorphisms (SNPs) and disease-related variations (DRVs) in miRNAs and miRNA-target binding sites, can affect miRNA functions and/or biogenesis, thus to impact on phenotypes. miRNASNP is a widely used database for miRNA-related SNPs and their effects. Here, we updated it to miRNASNP-v3 (http://bioinfo.life.hust.edu.cn/miRNASNP/) with tremendous number of SNVs and new features, especially the DRVs data. We analyzed the effects of 7 161 741 SNPs and 505 417 DRVs on 1897 pre-miRNAs (2630 mature miRNAs) and 3'UTRs of 18 152 genes. miRNASNP-v3 provides a one-stop resource for miRNA-related SNVs research with the following functions: (i) explore associations between miRNA-related SNPs/DRVs and diseases; (ii) browse the effects of SNPs/DRVs on miRNA-target binding; (iii) functional enrichment analysis of miRNA target gain/loss caused by SNPs/DRVs; (iv) investigate correlations between drug sensitivity and miRNA expression; (v) inquire expression profiles of miRNAs and their targets in cancers; (vi) browse the effects of SNPs/DRVs on pre-miRNA secondary structure changes; and (vii) predict the effects of user-defined variations on miRNA-target binding or pre-miRNA secondary structure. miRNASNP-v3 is a valuable and long-term supported resource in functional variation screening and miRNA function studies.",2021-01-01 +33511767,FAWMine: An integrated database and analysis platform for fall armyworm genomics.,"Fall armyworm (Spodoptera frugiperda), a native insect species in the Americas, is rapidly becoming a major agricultural pest worldwide and is causing great damage to corn, rice, soybeans, and other crops. To control this pest, scientists have accumulated a great deal of high-throughput data of fall armyworm, and nine versions of its genomes and transcriptomes have been published. However, easily accessing and performing integrated analysis of these omics data sets is challenging. Here, we developed the Fall Armyworm Genome Database (FAWMine, http://159.226.67.243:8080/fawmine/) to maintain genome sequences, structural and functional annotations, transcriptomes, co-expression, protein interactions, homologs, pathways, and single-nucleotide variations. FAWMine provides a powerful framework that helps users to perform flexible and customized searching, present integrated data sets using diverse visualization methods, output results tables in a range of file formats, analyze candidate gene lists using multiple widgets, and query data available in other InterMine systems. Additionally, stand-alone JBrowse and BLAST services are also established, allowing the users to visualize RNA-Seq data and search genome and annotated gene sequences. Altogether, FAWMine is a useful tool for querying, visualizing, and analyzing compiled data sets rapidly and efficiently. FAWMine will be continually updated to function as a community resource for fall armyworm genomics and pest control research.",2021-01-29 +33156332,DDBJ update: streamlining submission and access of human data.,"The Bioinformation and DDBJ Center (DDBJ Center, https://www.ddbj.nig.ac.jp) provides databases that capture, preserve and disseminate diverse biological data to support research in the life sciences. This center collects nucleotide sequences with annotations, raw sequencing data, and alignment information from high-throughput sequencing platforms, and study and sample information, in collaboration with the National Center for Biotechnology Information (NCBI) and the European Bioinformatics Institute (EBI). This collaborative framework is known as the International Nucleotide Sequence Database Collaboration (INSDC). In collaboration with the National Bioscience Database Center (NBDC), the DDBJ Center also provides a controlled-access database, the Japanese Genotype-phenotype Archive (JGA), which archives and distributes human genotype and phenotype data, requiring authorized access. The NBDC formulates guidelines and policies for sharing human data and reviews data submission and use applications. To streamline all of the processes at NBDC and JGA, we have integrated the two systems by introducing a unified login platform with a group structure in September 2020. In addition to the public databases, the DDBJ Center provides a computer resource, the NIG supercomputer, for domestic researchers to analyze large-scale genomic data. This report describes updates to the services of the DDBJ Center, focusing on the NBDC and JGA system enhancements.",2021-01-01 +33247931,"KiMoSys 2.0: an upgraded database for submitting, storing and accessing experimental data for kinetic modeling. ","The KiMoSys (https://kimosys.org), launched in 2014, is a public repository of published experimental data, which contains concentration data of metabolites, protein abundances and flux data. It offers a web-based interface and upload facility to share data, making it accessible in structured formats, while also integrating associated kinetic models related to the data. In addition, it also supplies tools to simplify the construction process of ODE (Ordinary Differential Equations)-based models of metabolic networks. In this release, we present an update of KiMoSys with new data and several new features, including (i) an improved web interface, (ii) a new multi-filter mechanism, (iii) introduction of data visualization tools, (iv) the addition of downloadable data in machine-readable formats, (v) an improved data submission tool, (vi) the integration of a kinetic model simulation environment and (vii) the introduction of a unique persistent identifier system. We believe that this new version will improve its role as a valuable resource for the systems biology community. Database URL:  www.kimosys.org.",2020-11-01 +34425882,INFIMA leverages multi-omics model organism data to identify effector genes of human GWAS variants.,"Genome-wide association studies reveal many non-coding variants associated with complex traits. However, model organism studies largely remain as an untapped resource for unveiling the effector genes of non-coding variants. We develop INFIMA, Integrative Fine-Mapping, to pinpoint causal SNPs for diversity outbred (DO) mice eQTL by integrating founder mice multi-omics data including ATAC-seq, RNA-seq, footprinting, and in silico mutation analysis. We demonstrate INFIMA's superior performance compared to alternatives with human and mouse chromatin conformation capture datasets. We apply INFIMA to identify novel effector genes for GWAS variants associated with diabetes. The results of the application are available at http://www.statlab.wisc.edu/shiny/INFIMA/ .",2021-08-23 +32915954,"PanGPCR: predictions for multiple targets, repurposing and side effects.","

Summary

Drug discovery targeting G protein-coupled receptors (GPCRs), the largest known class of therapeutic targets, is challenging. To facilitate the rapid discovery and development of GPCR drugs, we built a system, PanGPCR, to predict multiple potential GPCR targets and their expression locations in the tissues, side effects and possible repurposing of GPCR drugs. With PanGPCR, the compound of interest is docked to a library of 36 experimentally determined crystal structures comprising of 46 docking sites for human GPCRs, and a ranked list is generated from the docking studies to assess all GPCRs and their binding affinities. Users can determine a given compound's GPCR targets and its repurposing potential accordingly. Moreover, potential side effects collected from the SIDER (Side-Effect Resource) database and mapped to 45 tissues and organs are provided by linking predicted off-targets and their expressed sequence tag profiles. With PanGPCR, multiple targets, repurposing potential and side effects can be determined by simply uploading a small ligand.

Availability and implementation

PanGPCR is freely accessible at https://gpcrpanel.cmdm.tw/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +32392583,"M6A2Target: a comprehensive database for targets of m6A writers, erasers and readers. ","N6-methyladenosine (m6A) is the most abundant posttranscriptional modification in mammalian mRNA molecules and has a crucial function in the regulation of many fundamental biological processes. The m6A modification is a dynamic and reversible process regulated by a series of writers, erasers and readers (WERs). Different WERs might have different functions, and even the same WER might function differently in different conditions, which are mostly due to different downstream genes being targeted by the WERs. Therefore, identification of the targets of WERs is particularly important for elucidating this dynamic modification. However, there is still no public repository to host the known targets of WERs. Therefore, we developed the m6A WER target gene database (m6A2Target) to provide a comprehensive resource of the targets of m6A WERs. M6A2Target provides a user-friendly interface to present WER targets in two different modules: 'Validated Targets', referred to as WER targets identified from low-throughput studies, and 'Potential Targets', including WER targets analyzed from high-throughput studies. Compared to other existing m6A-associated databases, m6A2Target is the first specific resource for m6A WER target genes. M6A2Target is freely accessible at http://m6a2target.canceromics.org.",2021-05-01 +33021634,The Nucleome Data Bank: web-based resources to simulate and analyze the three-dimensional genome.,"We introduce the Nucleome Data Bank (NDB), a web-based platform to simulate and analyze the three-dimensional (3D) organization of genomes. The NDB enables physics-based simulation of chromosomal structural dynamics through the MEGABASE + MiChroM computational pipeline. The input of the pipeline consists of epigenetic information sourced from the Encode database; the output consists of the trajectories of chromosomal motions that accurately predict Hi-C and fluorescence insitu hybridization data, as well as multiple observations of chromosomal dynamics in vivo. As an intermediate step, users can also generate chromosomal sub-compartment annotations directly from the same epigenetic input, without the use of any DNA-DNA proximity ligation data. Additionally, the NDB freely hosts both experimental and computational structural genomics data. Besides being able to perform their own genome simulations and download the hosted data, users can also analyze and visualize the same data through custom-designed web-based tools. In particular, the one-dimensional genetic and epigenetic data can be overlaid onto accurate 3D structures of chromosomes, to study the spatial distribution of genetic and epigenetic features. The NDB aims to be a shared resource to biologists, biophysicists and all genome scientists. The NDB is available at https://ndb.rice.edu.",2021-01-01 +33211880,"BRENDA, the ELIXIR core data resource in 2021: new developments and updates.","The BRENDA enzyme database (https://www.brenda-enzymes.org), established in 1987, has evolved into the main collection of functional enzyme and metabolism data. In 2018, BRENDA was selected as an ELIXIR Core Data Resource. BRENDA provides reliable data, continuous curation and updates of classified enzymes, and the integration of newly discovered enzymes. The main part contains >5 million data for ∼90 000 enzymes from ∼13 000 organisms, manually extracted from ∼157 000 primary literature references, combined with information of text and data mining, data integration, and prediction algorithms. Supplements comprise disease-related data, protein sequences, 3D structures, genome annotations, ligand information, taxonomic, bibliographic, and kinetic data. BRENDA offers an easy access to enzyme information from quick to advanced searches, text- and structured-based queries for enzyme-ligand interactions, word maps, and visualization of enzyme data. The BRENDA Pathway Maps are completely revised and updated for an enhanced interactive and intuitive usability. The new design of the Enzyme Summary Page provides an improved access to each individual enzyme. A new protein structure 3D viewer was integrated. The prediction of the intracellular localization of eukaryotic enzymes has been implemented. The new EnzymeDetector combines BRENDA enzyme annotations with protein and genome databases for the detection of eukaryotic and prokaryotic enzymes.",2021-01-01 +33442735,O-GlcNAcAtlas: A database of experimentally identified O-GlcNAc sites and proteins.,"O-linked β-N-acetylglucosamine (O-GlcNAc) is a post-translational modification (i.e., O-GlcNAcylation) on the serine/threonine residues of proteins. As a unique intracellular monosaccharide modification, protein O-GlcNAcylation plays important roles in almost all biochemical processes examined. Aberrant O-GlcNAcylation underlies the etiologies of a number of chronic diseases. With the tremendous improvement of techniques, thousands of proteins along with their O-GlcNAc sites have been reported. However, until now, there are few databases dedicated to accommodate the rapid accumulation of such information. Thus, O-GlcNAcAtlas is created to integrate all experimentally identified O-GlcNAc sites and proteins. O-GlcNAcAtlas consists of two datasets (Dataset-I and Dataset-II, for unambiguously identified sites and ambiguously identified sites, respectively), representing a total number of 4571 O-GlcNAc modified proteins from all species studied from 1984 to 31 Dec 2019. For each protein, comprehensive information (including species, sample type, gene symbol, modified peptides and/or modification sites, site mapping methods and literature references) is provided. To solve the heterogeneity among the data collected from different sources, the sequence identity of these reported O-GlcNAc peptides are mapped to the UniProtKB protein entries. To our knowledge, O-GlcNAcAtlas is a highly comprehensive and rigorously curated database encapsulating all O-GlcNAc sites and proteins identified in the past 35 years. We expect that O-GlcNAcAtlas will be a useful resource to facilitate O-GlcNAc studies and computational analyses of protein O-GlcNAcylation. The public version of the web interface to the O-GlcNAcAtlas can be found at http://oglcnac.org/.",2021-08-01 +33270111,GENCODE 2021.,"The GENCODE project annotates human and mouse genes and transcripts supported by experimental data with high accuracy, providing a foundational resource that supports genome biology and clinical genomics. GENCODE annotation processes make use of primary data and bioinformatic tools and analysis generated both within the consortium and externally to support the creation of transcript structures and the determination of their function. Here, we present improvements to our annotation infrastructure, bioinformatics tools, and analysis, and the advances they support in the annotation of the human and mouse genomes including: the completion of first pass manual annotation for the mouse reference genome; targeted improvements to the annotation of genes associated with SARS-CoV-2 infection; collaborative projects to achieve convergence across reference annotation databases for the annotation of human and mouse protein-coding genes; and the first GENCODE manually supervised automated annotation of lncRNAs. Our annotation is accessible via Ensembl, the UCSC Genome Browser and https://www.gencodegenes.org.",2021-01-01 +34965192,piRNAQuest V.2: an updated resource for searching through the piRNAome of multiple species.,"PIWI interacting RNAs (piRNAs) have emerged as important gene regulators in recent times. Since the release of our first version of piRNAQuest in 2014, lots of novel piRNAs have been annotated in different species other than human, mouse and rat. Such new developments in piRNA research have led us to develop an updated database piRNAQuest V.2. It consists of 92,77,689 piRNA entries for 25 new species of different phylum along with human, mouse and rat. Besides providing primary piRNA features which include their genomic location, with further information on piRNAs overlapping with repeat elements, pseudogenes and syntenic regions, etc., the novel features of this version includes (i) density based cluster prediction, (ii) piRNA expression profile across various healthy and disease systems and (iii) piRNA target prediction. The concept of density-based piRNA cluster identification is robust as it does not consider parametric distribution in its model. The piRNA expression profile for 21 disease systems including cancer have been hosted in addition to 32 tissue specific piRNA expression profile for various species. Further, the piRNA target prediction section includes both predicted and curated piRNA targets within eight disease systems and developmental stages of mouse testis. Further, users can visualize the piRNA-target duplex structure and the ping-pong signature pattern for all the ping-pong piRNA partners in different species. Overall, piRNAQuest V.2 is an updated user-friendly database which will serve as a useful resource to survey, search and retrieve information on piRNAs for multiple species. This freely accessible database is available at http://dibresources.jcbose.ac.in/zhumur/pirnaquest2.",2021-12-31 +,Potential application of a knowledgebase of iron metabolism of Acidithiobacillus ferrooxidans as an alternative platform,"Acidithiobacillus ferrooxidans is a facultative anaerobe that depends on ferrous ion oxidation as well as reduced sulfur oxidation to obtain energy and is widely applied in metallurgy, environmental protection, and soil remediation. With the accumulation of experimental data, metabolic mechanisms, kinetic models, and several databases have been established. However, scattered data are not conducive to understanding A. ferrooxidans that necessitates updated information informed by systems biology.Here, we constructed a knowledgebase of iron metabolism of A. ferrooxidans (KIMAf) system by integrating public databases and reviewing the literature, including the database of bioleaching substrates (DBS), the database of bioleaching metallic ion-related proteins (MIRP), the A. ferrooxidans bioinformation database (Af-info), and the database for dynamics model of bioleaching (DDMB). The DBS and MIRP incorporate common bioleaching substrates and metal ion-related proteins. Af-info and DDMB integrate nucleotide, gene, protein, and kinetic model information. Statistical analysis was performed to elucidate the distribution of isolated A. ferrooxidans strains, evolutionary and metabolic advances, and the development of bioleaching models.This comprehensive system provides researchers with a platform of available iron metabolism-related resources of A. ferrooxidans and facilitates its application.Zhou Z, Ma W, Liu Y, et al. Potential application of a knowledgebase of iron metabolism of Acidithiobacillus ferrooxidans as an alternative platform. Electron J Biotechnol 2021;51; https://doi.org/10.1016/j.ejbt.2021.04.003",2021-07-01 +34048576,The COVID-19 Data Portal: accelerating SARS-CoV-2 and COVID-19 research through rapid open access data sharing.,"The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic will be remembered as one of the defining events of the 21st century. The rapid global outbreak has had significant impacts on human society and is already responsible for millions of deaths. Understanding and tackling the impact of the virus has required a worldwide mobilisation and coordination of scientific research. The COVID-19 Data Portal (https://www.covid19dataportal.org/) was first released as part of the European COVID-19 Data Platform, on April 20th 2020 to facilitate rapid and open data sharing and analysis, to accelerate global SARS-CoV-2 and COVID-19 research. The COVID-19 Data Portal has fortnightly feature releases to continue to add new data types, search options, visualisations and improvements based on user feedback and research. The open datasets and intuitive suite of search, identification and download services, represent a truly FAIR (Findable, Accessible, Interoperable and Reusable) resource that enables researchers to easily identify and quickly obtain the key datasets needed for their COVID-19 research.",2021-07-01 +,Scoring System to Triage Patients for Spine Surgery in the Setting of Limited Resources,"Abstract

INTRODUCTION

As of May 04, 2020, the COVID-19 pandemic has affected over 3.5 million people and touched every inhabited continent. Accordingly, it has stressed health systems the world over leading to the cancellation of elective surgical cases and discussions regarding healthcare resource rationing. It is expected that rationing of surgical resources will continue even after the pandemic peak, and may recur with future pandemics, creating a need for a means of triaging emergent and elective spine surgery patients.

METHODS

Using a modified Delphi technique, a cohort of 16 fellowship-trained spine surgeons from 10 academic medical centers constructed a scoring system for the triage and prioritization of emergent and elective spine surgeries. Three separate rounds of videoconferencing and written correspondence were used to reach a final scoring system. Sixteen test cases were used to optimize the scoring system so that it could categorize cases as requiring emergent, urgent, high-priority elective, or low-priority elective scheduling.

RESULTS

The devised scoring system included 8 independent components: neurological status, underlying spine stability, presentation of a highrisk post-operative complication, patient medical comorbidities, expected hospital course, expected discharge disposition, facility resource limitations, and local disease burden. The resultant calculator was deployed as a freely-available web-based calculator: https://jhuspine3.shinyapps.io/SpineUrgencyCalculator/

CONCLUSION

Here we present the first quantitative urgency scoring system for the triage and prioritizing of spine surgery cases in resource-limited settings. We believe that our scoring system, while not all-encompassing, has potential value as a guide for triaging spine surgical cases during the COVID pandemic and post-COVID period.",2020-11-16 +33104772,The mouse Gene Expression Database (GXD): 2021 update.,"The Gene Expression Database (GXD; www.informatics.jax.org/expression.shtml) is an extensive and well-curated community resource of mouse developmental gene expression information. For many years, GXD has collected and integrated data from RNA in situ hybridization, immunohistochemistry, RT-PCR, northern blot, and western blot experiments through curation of the scientific literature and by collaborations with large-scale expression projects. Since our last report in 2019, we have continued to acquire these classical types of expression data; developed a searchable index of RNA-Seq and microarray experiments that allows users to quickly and reliably find specific mouse expression studies in ArrayExpress (https://www.ebi.ac.uk/arrayexpress/) and GEO (https://www.ncbi.nlm.nih.gov/geo/); and expanded GXD to include RNA-Seq data. Uniformly processed RNA-Seq data are imported from the EBI Expression Atlas and then integrated with the other types of expression data in GXD, and with the genetic, functional, phenotypic and disease-related information in Mouse Genome Informatics (MGI). This integration has made the RNA-Seq data accessible via GXD's enhanced searching and filtering capabilities. Further, we have embedded the Morpheus heat map utility into the GXD user interface to provide additional tools for display and analysis of RNA-Seq data, including heat map visualization, sorting, filtering, hierarchical clustering, nearest neighbors analysis and visual enrichment.",2021-01-01 +32703790,Cell type-specific novel long non-coding RNA and circular RNA in the BLUEPRINT hematopoietic transcriptomes atlas.,"Transcriptional profiling of hematopoietic cell subpopulations has helped to characterize the developmental stages of the hematopoietic system and the molecular bases of malignant and non-malignant blood diseases. Previously, only the genes targeted by expression microarrays could be profiled genome-wide. High-throughput RNA sequencing, however, encompasses a broader repertoire of RNA molecules, without restriction to previously annotated genes. We analyzed the BLUEPRINT consortium RNA-sequencing data for mature hematopoietic cell types. The data comprised 90 total RNA-sequencing samples, each composed of one of 27 cell types, and 32 small RNA-sequencing samples, each composed of one of 11 cell types. We estimated gene and isoform expression levels for each cell type using existing annotations from Ensembl. We then used guided transcriptome assembly to discover unannotated transcripts. We identified hundreds of novel non-coding RNA genes and showed that the majority have cell type-dependent expression. We also characterized the expression of circular RNA and found that these are also cell type-specific. These analyses refine the active transcriptional landscape of mature hematopoietic cells, highlight abundant genes and transcriptional isoforms for each blood cell type, and provide a valuable resource for researchers of hematologic development and diseases. Finally, we made the data accessible via a web-based interface: https://blueprint.haem.cam.ac.uk/bloodatlas/.",2021-10-01 +32507889,"The articles.ELM resource: simplifying access to protein linear motif literature by annotation, text-mining and classification. ","Modern biology produces data at a staggering rate. Yet, much of these biological data is still isolated in the text, figures, tables and supplementary materials of articles. As a result, biological information created at great expense is significantly underutilised. The protein motif biology field does not have sufficient resources to curate the corpus of motif-related literature and, to date, only a fraction of the available articles have been curated. In this study, we develop a set of tools and a web resource, 'articles.ELM', to rapidly identify the motif literature articles pertinent to a researcher's interest. At the core of the resource is a manually curated set of about 8000 motif-related articles. These articles are automatically annotated with a range of relevant biological data allowing in-depth search functionality. Machine-learning article classification is used to group articles based on their similarity to manually curated motif classes in the Eukaryotic Linear Motif resource. Articles can also be manually classified within the resource. The 'articles.ELM' resource permits the rapid and accurate discovery of relevant motif articles thereby improving the visibility of motif literature and simplifying the recovery of valuable biological insights sequestered within scientific articles. Consequently, this web resource removes a critical bottleneck in scientific productivity for the motif biology field. Database URL: http://slim.icr.ac.uk/articles/.",2020-01-01 +33252190,GRIN database: A unified and manually curated repertoire of GRIN variants.,"Glutamatergic neurotransmission is crucial for brain development, wiring neuronal function, and synaptic plasticity mechanisms. Recent genetic studies showed the existence of autosomal dominant de novo GRIN gene variants associated with GRIN-related disorders (GRDs), a rare pediatric neurological disorder caused by N-methyl- d-aspartate receptor (NMDAR) dysfunction. Notwithstanding, GRIN variants identification is exponentially growing and their clinical, genetic, and functional annotations remain highly fragmented, representing a bottleneck in GRD patient's stratification. To shorten the gap between GRIN variant identification and patient stratification, we present the GRIN database (GRINdb), a publicly available, nonredundant, updated, and curated database gathering all available genetic, functional, and clinical data from more than 4000 GRIN variants. The manually curated GRINdb outputs on a web server, allowing query and retrieval of reported GRIN variants, and thus representing a fast and reliable bioinformatics resource for molecular clinical advice. Furthermore, the comprehensive mapping of GRIN variants' genetic and clinical information along NMDAR structure revealed important differences in GRIN variants' pathogenicity and clinical phenotypes, shedding light on GRIN-specific fingerprints. Overall, the GRINdb and web server is a resource for molecular stratification of GRIN variants, delivering clinical and investigational insights into GRDs. GRINdb is accessible at http://lmc.uab.es/grindb.",2020-11-30 +33863373,IrGO: Iranian traditional medicine General Ontology and knowledge base.,"

Background

Iranian traditional medicine, also known as Persian Medicine, is a holistic school of medicine with a long prolific history. It describes numerous concepts and the relationships between them. However, no unified language system has been proposed for the concepts of this medicine up to the present time. Considering the extensive terminology in the numerous textbooks written by the scholars over centuries, comprehending the totality of concepts is obviously a very challenging task. To resolve this issue, overcome the obstacles, and code the concepts in a reusable manner, constructing an ontology of the concepts of Iranian traditional medicine seems a necessity.

Construction and content

Makhzan al-Advieh, an encyclopedia of materia medica compiled by Mohammad Hossein Aghili Khorasani, was selected as the resource to create an ontology of the concepts used to describe medicinal substances. The steps followed to accomplish this task included (1) compiling the list of classes via examination of textbooks, and text mining the resource followed by manual review to ensure comprehensiveness of extracted terms; (2) arranging the classes in a taxonomy; (3) determining object and data properties; (4) specifying annotation properties including ID, labels (English and Persian), alternative terms, and definitions (English and Persian); (5) ontology evaluation. The ontology was created using Protégé with adherence to the principles of ontology development provided by the Open Biological and Biomedical Ontology (OBO) foundry.

Utility and discussion

The ontology was finalized with inclusion of 3521 classes, 15 properties, and 20,903 axioms in the Iranian traditional medicine General Ontology (IrGO) database, freely available at http://ir-go.net/ . An indented list and an interactive graph view using WebVOWL were used to visualize the ontology. All classes were linked to their instances in UNaProd database to create a knowledge base of ITM materia medica.

Conclusion

We constructed an ontology-based knowledge base of ITM concepts in the domain of materia medica to help offer a shared and common understanding of this concept, enable reuse of the knowledge, and make the assumptions explicit. This ontology will aid Persian medicine practitioners in clinical decision-making to select drugs. Extending IrGO will bridge the gap between traditional and conventional schools of medicine, helping guide future research in the process of drug discovery.",2021-04-16 +,"USDA, NIH and FDA Iodine Database of U.S. Foods for Estimating Iodine Intakes","Abstract

Objectives

Data on the iodine content of foods are needed to assess intake and plan dietary guidance. Iodine is central for thyroid function in human growth, reproduction, neurologic development and energy metabolism, and inadequate or excessive intakes can cause thyroid dysfunction and/or disease. Overall, U.S. iodine intake is sufficient, but some women of reproductive age and pregnant women may be at risk for deficiency, as well as people whose dietary patterns do not include iodine-rich foods.

Methods

A Special Interest Database (SID) was developed through the collaboration of the Methods and Application of Food Composition Laboratory (USDA), the Food and Drug Administration (FDA), and the Office of Dietary Supplements (NIH). Data for foods and beverages were derived from samples analyzed by USDA and the FDA Total Diet Study; foods included seaweed, fish and other seafood, dairy, iodized salt, eggs, and commercial foods; metadata were captured as varying iodine levels may arise from feed supplementation, iodophor use, and iodine-containing ingredients in processed foods. Samples were analyzed for iodine using inductively coupled plasma mass spectrometry (ICP-MS). Quality control included certified reference materials and secondary in-house controls.

Results

The Special Interest Database on Iodine (https://www.ars.usda.gov/northeast-area/beltsville-md-bhnrc/beltsville-human-nutrition-research-center/methods-and-application-of-food-composition-laboratory/mafcl-site-pages/iodine/) was released in 2020 and includes food descriptions, means, standard deviations, value ranges, sample sizes, and supporting information for 430 foods. Foods continue to be analyzed for iodine and added to the database. In addition, iodine intakes of the U.S. population are being calculated by mapping the iodine content of foods to food consumption data from the 2014 U.S. National Health and Nutrition Examination Survey (NHANES) and eventually more recent NHANES dietary data.

Conclusions

The SID on Iodine and the mapped NHANES data provide needed information to monitor iodine status and develop dietary guidance for the general U.S. population and vulnerable subgroups. Furthermore, the database can provide a valuable tool for other research programs and clinical applications in iodine nutrition.

Funding Sources

NIH Office of Dietary Supplements.",2021-06-01 +34762703,CicerSpTEdb: A web-based database for high-resolution genome-wide identification of transposable elements in Cicer species.,"Recently, Cicer species have experienced increased research interest due to their economic importance, especially in genetics, genomics, and crop improvement. The Cicer arietinum, Cicer reticulatum, and Cicer echinospermum genomes have been sequenced and provide valuable resources for trait improvement. Since the publication of the chickpea draft genome, progress has been made in genome assembly, functional annotation, and identification of polymorphic markers. However, work is still needed to identify transposable elements (TEs) and make them available for researchers. In this paper, we present CicerSpTEdb, a comprehensive TE database for Cicer species that aims to improve our understanding of the organization and structural variations of the chickpea genome. Using structure and homology-based methods, 3942 C. echinospermum, 3579 C. reticulatum, and 2240 C. arietinum TEs were identified. Comparisons between Cicer species indicate that C. echinospermum has the highest number of LTR-RT and hAT TEs. C. reticulatum has more Mutator, PIF Harbinger, Tc1 Mariner, and CACTA TEs, while C. arietinum has the highest number of Helitron. CicerSpTEdb enables users to search and visualize TEs by location and download their results. The database will provide a powerful resource that can assist in developing TE target markers for molecular breeding and answer related biological questions. Database URL: http://cicersptedb.easyomics.org/index.php.",2021-11-11 +34639237,GreeningDB: A Database of Host-Pathogen Protein-Protein Interactions and Annotation Features of the Bacteria Causing Huanglongbing HLB Disease. ,"The Citrus genus comprises some of the most important and commonly cultivated fruit plants. Within the last decade, citrus greening disease (also known as huanglongbing or HLB) has emerged as the biggest threat for the citrus industry. This disease does not have a cure yet and, thus, many efforts have been made to find a solution to this devastating condition. There are challenges in the generation of high-yield resistant cultivars, in part due to the limited and sparse knowledge about the mechanisms that are used by the Liberibacter bacteria to proliferate the infection in Citrus plants. Here, we present GreeningDB, a database implemented to provide the annotation of Liberibacter proteomes, as well as the host-pathogen comparactomics tool, a novel platform to compare the predicted interactomes of two HLB host-pathogen systems. GreeningDB is built to deliver a user-friendly interface, including network visualization and links to other resources. We hope that by providing these characteristics, GreeningDB can become a central resource to retrieve HLB-related protein annotations, and thus, aid the community that is pursuing the development of molecular-based strategies to mitigate this disease's impact. The database is freely available at http://bioinfo.usu.edu/GreeningDB/ (accessed on 11 August 2021).",2021-10-08 +33166149,Insights from the First Phosphopeptide Challenge of the MS Resource Pillar of the HUPO Human Proteome Project.,"Mass spectrometry has greatly improved the analysis of phosphorylation events in complex biological systems and on a large scale. Despite considerable progress, the correct identification of phosphorylated sites, their quantification, and their interpretation regarding physiological relevance remain challenging. The MS Resource Pillar of the Human Proteome Organization (HUPO) Human Proteome Project (HPP) initiated the Phosphopeptide Challenge as a resource to help the community evaluate methods, learn procedures and data analysis routines, and establish their own workflows by comparing results obtained from a standard set of 94 phosphopeptides (serine, threonine, tyrosine) and their nonphosphorylated counterparts mixed at different ratios in a neat sample and a yeast background. Participants analyzed both samples with their method(s) of choice to report the identification and site localization of these peptides, determine their relative abundances, and enrich for the phosphorylated peptides in the yeast background. We discuss the results from 22 laboratories that used a range of different methods, instruments, and analysis software. We reanalyzed submitted data with a single software pipeline and highlight the successes and challenges in correct phosphosite localization. All of the data from this collaborative endeavor are shared as a resource to encourage the development of even better methods and tools for diverse phosphoproteomic applications. All submitted data and search results were uploaded to MassIVE (https://massive.ucsd.edu/) as data set MSV000085932 with ProteomeXchange identifier PXD020801.",2020-11-09 +32258285,Dataset for WWW landing pages webobject retrieval performance evaluation.,"This dataset describes data obtained from a multi-day World Wide Web (WWW) measurement campaign distributed internationally across multiple Amazon Web Service (AWS) datacentres. The Chrome web browser was controlled by the Selenium framework to make repetitive requests to several popular websites; the resulting webobjects were captured by a proxy server and details about them stored in the provided SQLite3 databases. A Python script is provided to evaluate the webobjects with respect to their configured as well as their actual expiration times, as part of our more detailed analysis that we provide in [1]. Researchers and practitioners can readily employ this dataset in their own research endeavours with little efforts for avenues of inquiry beyond webobject expiration times we described in [1], as we provide additional information about each webobject and each website visit during the measurement campaign time horizon.",2020-03-14 +33769951,A Comprehensive Map of mRNAs and Their Isoforms across All 14 Renal Tubule Segments of Mouse. ,"The repertoire of protein expression along the renal tubule depends both on regulation of transcription and regulation of alternative splicing that can generate multiple proteins from a single gene. A full-length, small-sample RNA-seq protocol profiled transcriptomes for all 14 renal tubule segments microdissected from mouse kidneys. This study identified >34,000 transcripts, including 3709 that were expressed in a segment-specific manner. All data are provided as an online resource (https://esbl.nhlbi.nih.gov/MRECA/Nephron/). Many of the genes expressed in unique patterns along the renal tubule were solute carriers, transcription factors, or G protein-coupled receptors that account for segment-specific function. Mapping the distribution of transcripts associated with Wnk-SPAK-PKA signaling, renin-angiotensin-aldosterone signaling, and cystic diseases of the kidney illustrated the applications of the online resource. The method allowed full-length mapping of RNA-seq reads, which facilitated comprehensive, unbiased characterization of alternative exon usage along the renal tubule, including known isoforms of Cldn10, Kcnj1 (ROMK), Slc12a1 (NKCC2), Wnk1, Stk39 (SPAK), and Slc14a2 (UT-A urea transporter). It also identified many novel isoforms with segment-specific distribution. These included variants associated with altered protein structure (Slc9a8, Khk, Tsc22d1, and Scoc), and variants that may affect untranslated, regulatory regions of transcripts (Pth1r, Pkar1a, and Dab2). Full-length, unbiased sequencing of transcripts identified gene-expression patterns along the mouse renal tubule. The data, provided as an online resource, include both quantitative and qualitative differences in transcripts. Identification of alternative splicing along the renal tubule may prove critical to understanding renal physiology and pathophysiology.",2021-03-04 +32990749,TCRdb: a comprehensive database for T-cell receptor sequences with powerful search function.,"T cells and the T-cell receptor (TCR) repertoire play pivotal roles in immune response and immunotherapy. TCR sequencing (TCR-Seq) technology has enabled accurate profiling TCR repertoire and currently a large number of TCR-Seq data are available in public. Based on the urgent need to effectively re-use these data, we developed TCRdb, a comprehensive human TCR sequences database, by a uniform pipeline to characterize TCR sequences on TCR-Seq data. TCRdb contains more than 277 million highly reliable TCR sequences from over 8265 TCR-Seq samples across hundreds of tissues/clinical conditions/cell types. The unique features of TCRdb include: (i) comprehensive and reliable sequences for TCR repertoire in different samples generated by a strict and uniform pipeline of TCRdb; (ii) powerful search function, allowing users to identify their interested TCR sequences in different conditions; (iii) categorized sample metadata, enabling comparison of TCRs in different sample types; (iv) interactive data visualization charts, describing the TCR repertoire in TCR diversity, length distribution and V-J gene utilization. The TCRdb database is freely available at http://bioinfo.life.hust.edu.cn/TCRdb/ and will be a useful resource in the research and application community of T cell immunology.",2021-01-01 +,"Building a baseline for habitat-forming corals by a multi-source approach, including Web Ecological Knowledge","In the Mediterranean, habitat-forming corals often characterize essential fish habitats. While their distribution is sufficiently known for the western basin, few data are available from the Central-Eastern Mediterranean Sea (CEM). This study fills this gap supplying the largest dataset ever built on the geographical and bathymetric distribution of the most relevant habitat-forming corals (Eunicella cavolini, Eunicella verrucosa, Eunicella singularis, Leptogorgia sarmentosa, Paramuricea clavata, Corallium rubrum and Savalia savaglia) of the CEM. Information collected from different sources such as literature, citizen science, and from the World Wide Web (WWW) was combined. Videos published on the WWW provided additional information on the presence of fishing lines and signs of damage, as well as on the distribution of purple and yellow-purple colonies of Paramuricea clavata. The study highlighted the impressive amount of information that the WWW can offer to scientists, termed here as Web Ecological Knowledge (WEK). The WEK is constantly fuelled by internauts, representing a free, refreshable, long-term exploitable reservoir of information. A quick and easy method to retrieve data from the WWW was illustrated. In addition, the distribution of corals was overlapped to marine protected areas and to the distribution of environmental conditions suitable for coralligenous habitats, fragile biogenic Mediterranean structures hosting complex assemblages in need of strict protection. The collected data allowed identifying priority areas with high species diversity and sites that are impacted by fishing activities. Supplied data can correctly address conservation and restoration policies in the CEM, adding an important contribution to ecosystem-based marine spatial planning.",2018-04-01 +33084904,DualSeqDB: the host-pathogen dual RNA sequencing database for infection processes.,"Despite antibiotic resistance being a matter of growing concern worldwide, the bacterial mechanisms of pathogenesis remain underexplored, restraining our ability to develop new antimicrobials. The rise of high-throughput sequencing technology has made available a massive amount of transcriptomic data that could help elucidate the mechanisms underlying bacterial infection. Here, we introduce the DualSeqDB database, a resource that helps the identification of gene transcriptional changes in both pathogenic bacteria and their natural hosts upon infection. DualSeqDB comprises nearly 300 000 entries from eight different studies, with information on bacterial and host differential gene expression under in vivo and in vitro conditions. Expression data values were calculated entirely from raw data and analyzed through a standardized pipeline to ensure consistency between different studies. It includes information on seven different strains of pathogenic bacteria and a variety of cell types and tissues in Homo sapiens, Mus musculus and Macaca fascicularis at different time points. We envisage that DualSeqDB can help the research community in the systematic characterization of genes involved in host infection and help the development and tailoring of new molecules against infectious diseases. DualSeqDB is freely available at http://www.tartaglialab.com/dualseq.",2021-01-01 +32986834,"The ModelSEED Biochemistry Database for the integration of metabolic annotations and the reconstruction, comparison and analysis of metabolic models for plants, fungi and microbes.","For over 10 years, ModelSEED has been a primary resource for the construction of draft genome-scale metabolic models based on annotated microbial or plant genomes. Now being released, the biochemistry database serves as the foundation of biochemical data underlying ModelSEED and KBase. The biochemistry database embodies several properties that, taken together, distinguish it from other published biochemistry resources by: (i) including compartmentalization, transport reactions, charged molecules and proton balancing on reactions; (ii) being extensible by the user community, with all data stored in GitHub; and (iii) design as a biochemical 'Rosetta Stone' to facilitate comparison and integration of annotations from many different tools and databases. The database was constructed by combining chemical data from many resources, applying standard transformations, identifying redundancies and computing thermodynamic properties. The ModelSEED biochemistry is continually tested using flux balance analysis to ensure the biochemical network is modeling-ready and capable of simulating diverse phenotypes. Ontologies can be designed to aid in comparing and reconciling metabolic reconstructions that differ in how they represent various metabolic pathways. ModelSEED now includes 33,978 compounds and 36,645 reactions, available as a set of extensible files on GitHub, and available to search at https://modelseed.org/biochem and KBase.",2021-01-01 +33175170,"Database Resources of the National Genomics Data Center, China National Center for Bioinformation in 2021.","The National Genomics Data Center (NGDC), part of the China National Center for Bioinformation (CNCB), provides a suite of database resources to support worldwide research activities in both academia and industry. With the explosive growth of multi-omics data, CNCB-NGDC is continually expanding, updating and enriching its core database resources through big data deposition, integration and translation. In the past year, considerable efforts have been devoted to 2019nCoVR, a newly established resource providing a global landscape of SARS-CoV-2 genomic sequences, variants, and haplotypes, as well as Aging Atlas, BrainBase, GTDB (Glycosyltransferases Database), LncExpDB, and TransCirc (Translation potential for circular RNAs). Meanwhile, a series of resources have been updated and improved, including BioProject, BioSample, GWH (Genome Warehouse), GVM (Genome Variation Map), GEN (Gene Expression Nebulas) as well as several biodiversity and plant resources. Particularly, BIG Search, a scalable, one-stop, cross-database search engine, has been significantly updated by providing easy access to a large number of internal and external biological resources from CNCB-NGDC, our partners, EBI and NCBI. All of these resources along with their services are publicly accessible at https://bigd.big.ac.cn.",2021-01-01 +33237313,RepeatsDB in 2021: improved data and extended classification for protein tandem repeat structures.,"The RepeatsDB database (URL: https://repeatsdb.org/) provides annotations and classification for protein tandem repeat structures from the Protein Data Bank (PDB). Protein tandem repeats are ubiquitous in all branches of the tree of life. The accumulation of solved repeat structures provides new possibilities for classification and detection, but also increasing the need for annotation. Here we present RepeatsDB 3.0, which addresses these challenges and presents an extended classification scheme. The major conceptual change compared to the previous version is the hierarchical classification combining top levels based solely on structural similarity (Class > Topology > Fold) with two new levels (Clan > Family) requiring sequence similarity and describing repeat motifs in collaboration with Pfam. Data growth has been addressed with improved mechanisms for browsing the classification hierarchy. A new UniProt-centric view unifies the increasingly frequent annotation of structures from identical or similar sequences. This update of RepeatsDB aligns with our commitment to develop a resource that extracts, organizes and distributes specialized information on tandem repeat protein structures.",2021-01-01 +33877858,Developing a Flexible National Wastewater Surveillance System for COVID-19 and Beyond.,"

Background

Wastewater testing offers a cost-effective strategy for measuring population disease prevalence and health behaviors. For COVID-19, wastewater surveillance addresses testing gaps and provides an early warning for outbreaks. As U.S. federal agencies build a National Wastewater Surveillance System around the pandemic, thinking through ways to develop flexible frameworks for wastewater sampling, testing, and reporting can avoid unnecessary system overhauls for future infectious disease, chronic disease, and drug epidemics.

Objectives

We discuss ways to transform a historically academic exercise into a tool for epidemic response. We generalize lessons learned by a global network of wastewater researchers around validation and implementation for COVID-19 and opioids while also drawing on our experience with wastewater-based epidemiology in the United States.

Discussion

Sustainable wastewater surveillance requires coordination between health and safety officials, utilities, labs, and researchers. Adapting sampling frequency, type, and location to threat level, community vulnerability, biomarker properties, and decisions that wastewater data will inform can increase the practical value of the data. Marketplace instabilities, coupled with a fragmented testing landscape due to specialization, may require officials to engage multiple labs to test for known and unknown threats. Government funding can stabilize the market, balancing commercial pressures with public good, and incentivize data sharing. When reporting results, standardizing metrics and contextualizing wastewater data with health resource data can provide insights into a community's vulnerability and identify strategies to prevent health care systems from being overwhelmed. If wastewater data will inform policy decisions for an entire community, comparing characteristics of the wastewater treatment plant's service population to those of the larger community can help determine whether the wastewater data are generalizable. Ethical protocols may be needed to protect privacy and avoid stigmatization. With data-driven approaches to sample collection, analysis, and interpretation, officials can use wastewater surveillance for adaptive resource allocation, pandemic management, and program evaluation. https://doi.org/10.1289/EHP8572.",2021-04-20 +34017945,Identity and compatibility of reference genome resources.,"Genome analysis relies on reference data like sequences, feature annotations, and aligner indexes. These data can be found in many versions from many sources, making it challenging to identify and assess compatibility among them. For example, how can you determine which indexes are derived from identical raw sequence files, or which annotations share a compatible coordinate system? Here, we describe a novel approach to establish identity and compatibility of reference genome resources. We approach this with three advances: first, we derive unique identifiers for each resource; second, we record parent-child relationships among resources; and third, we describe recursive identifiers that determine identity as well as compatibility of coordinate systems and sequence names. These advances facilitate portability, reproducibility, and re-use of genome reference data. Available athttps://refgenie.databio.org.",2021-05-14 +33031499,Identifiers.org: Compact Identifier services in the cloud.,"

Motivation

Since its launch in 2010, Identifiers.org has become an important tool for the annotation and cross-referencing of Life Science data. In 2016, we established the Compact Identifier (CID) scheme (prefix: accession) to generate globally unique identifiers for data resources using their locally assigned accession identifiers. Since then, we have developed and improved services to support the growing need to create, reference and resolve CIDs, in systems ranging from human readable text to cloud-based e-infrastructures, by providing high availability and low-latency cloud-based services, backed by a high-quality, manually curated resource.

Results

We describe a set of services that can be used to construct and resolve CIDs in Life Sciences and beyond. We have developed a new front end for accessing the Identifiers.org registry data and APIs to simplify integration of Identifiers.org CID services with third-party applications. We have also deployed the new Identifiers.org infrastructure in a commercial cloud environment, bringing our services closer to the data.

Availabilityand implementation

https://identifiers.org.",2021-07-01 +33079988,The Dark Kinase Knowledgebase: an online compendium of knowledge and experimental results of understudied kinases.,"Kinases form the backbone of numerous cell signaling pathways, with their dysfunction similarly implicated in multiple pathologies. Further facilitated by their druggability, kinases are a major focus of therapeutic development efforts in diseases such as cancer, infectious disease and autoimmune disorders. While their importance is clear, the role or biological function of nearly one-third of kinases is largely unknown. Here, we describe a data resource, the Dark Kinase Knowledgebase (DKK; https://darkkinome.org), that is specifically focused on providing data and reagents for these understudied kinases to the broader research community. Supported through NIH's Illuminating the Druggable Genome (IDG) Program, the DKK is focused on data and knowledge generation for 162 poorly studied or 'dark' kinases. Types of data provided through the DKK include parallel reaction monitoring (PRM) peptides for quantitative proteomics, protein interactions, NanoBRET reagents, and kinase-specific compounds. Higher-level data is similarly being generated and consolidated such as tissue gene expression profiles and, longer-term, functional relationships derived through perturbation studies. Associated web tools that help investigators interrogate both internal and external data are also provided through the site. As an evolving resource, the DKK seeks to continually support and enhance knowledge on these potentially high-impact druggable targets.",2021-01-01 +34614039,virusMED: an atlas of hotspots of viral proteins. ,"Metal binding sites, antigen epitopes and drug binding sites are the hotspots in viral proteins that control how viruses interact with their hosts. virusMED (virusMetal binding sites, Epitopes and Drug binding sites) is a rich internet application based on a database of atomic interactions around hotspots in 7041 experimentally determined viral protein structures. 25306 hotspots from 805 virus strains from 75 virus families were characterized, including influenza, HIV-1 and SARS-CoV-2 viruses. Just as Google Maps organizes and annotates points of interest, virusMED presents the positions of individual hotspots on each viral protein and creates an atlas upon which newly characterized functional sites can be placed as they are being discovered. virusMED contains an extensive set of annotation tags about the virus species and strains, viral hosts, viral proteins, metal ions, specific antibodies and FDA-approved drugs, which permits rapid screening of hotspots on viral proteins tailored to a particular research problem. The virusMED portal (https://virusmed.biocloud.top) can serve as a window to a valuable resource for many areas of virus research and play a critical role in the rational design of new preventative and therapeutic agents targeting viral infections.",2021-09-28 +34025934,Computational modeling and bioinformatic analyses of functional mutations in drug target genes in Mycobacterium tuberculosis.,"Tuberculosis (TB) continues to be the leading cause of deaths due to its persistent drug resistance and the consequent ineffectiveness of anti-TB treatment. Recent years witnessed huge amount of sequencing data, revealing mutations responsible for drug resistance. However, the lack of an up-to-date repository remains a barrier towards utilization of these data and identifying major mutations-associated with resistance. Amongst all mutations, non-synonymous mutations alter the amino acid sequence of a protein and have a much greater effect on pathogenicity. Hence, this type of gene mutation is of prime interest of the present study. The purpose of this study is to develop an updated database comprising almost all reported substitutions within the Mycobacterium tuberculosis (M.tb) drug target genes rpoB, inhA, katG, pncA, gyrA and gyrB. Various bioinformatics prediction tools were used to assess the structural and biophysical impacts of the resistance causing non-synonymous single nucleotide polymorphisms (nsSNPs) at the molecular level. This was followed by evaluating the impact of these mutations on binding affinity of the drugs to target proteins. We have developed a comprehensive online resource named MycoTRAP-DB (Mycobacterium tuberculosis Resistance Associated Polymorphisms Database) that connects mutations in genes with their structural, functional and pathogenic implications on protein. This database is accessible at http://139.59.12.92. This integrated platform would enable comprehensive analysis and prioritization of SNPs for the development of improved diagnostics and antimycobacterial medications. Moreover, our study puts forward secondary mutations that can be important for prognostic assessments of drug-resistance mechanism and actionable anti-TB drugs.",2021-04-19 +33553941,SARSCOVIDB-A New Platform for the Analysis of the Molecular Impact of SARS-CoV-2 Viral Infection.,"The COVID-19 pandemic caused by the new coronavirus (SARS-CoV-2) has become a global emergency issue for public health. This threat has led to an acceleration in related research and, consequently, an unprecedented volume of clinical and experimental data that include changes in gene expression resulting from infection. The SARS-CoV-2 infection database (SARSCOVIDB: https://sarscovidb.org/) was created to mitigate the difficulties related to this scenario. The SARSCOVIDB is an online platform that aims to integrate all differential gene expression data, at messenger RNA and protein levels, helping to speed up analysis and research on the molecular impact of COVID-19. The database can be searched from different experimental perspectives and presents all related information from published data, such as viral strains, hosts, methodological approaches (proteomics or transcriptomics), genes/proteins, and samples (clinical or experimental). All information was taken from 24 articles related to analyses of differential gene expression out of 5,554 COVID-19/SARS-CoV-2-related articles published so far. The database features 12,535 genes whose expression has been identified as altered due to SARS-CoV-2 infection. Thus, the SARSCOVIDB is a new resource to support the health workers and the scientific community in understanding the pathogenesis and molecular impact caused by SARS-CoV-2.",2021-01-21 +33399824,BnaGVD: A genomic variation database of rapeseed (Brassica napus). ,"Rapeseed (Brassica napus L.) is a typical polyploid crop and one of the most important oilseed crops worldwide. With the rapid progress on high-throughput sequencing technologies and the reduction of sequencing cost, large-scale genomic data of a specific crop have become available. However, raw sequence data are mostly deposited in the sequence read archive of the National Center of Biotechnology Information (NCBI) and the European Nucleotide Archive (ENA), which is freely accessible to all researchers. Extensive tools for practical purposes should be developed to efficiently utilize these large raw data. Here, we report a web-based rapeseed genomic variation database (BnaGVD, http://rapeseed.biocloud.net/home) from which genomic variations, such as single nucleotide polymorphisms (SNPs) and insertions/deletions (InDels) across a world-wide collection of rapeseed accessions, can be referred. The current release of the BnaGVD contains 34,591,899 high-quality SNPs and 12,281,923 high-quality InDels and provides search tools to retrieve genomic variations and gene annotations across 1,007 accessions of worldwide rapeseed germplasm. We implement a variety of built-in tools (e.g., BnaGWAS, BnaPCA, and BnaStructure) to help users perform in-depth analyses. We recommend this web resource for accelerating studies on the functional genomics and screening of molecular markers for rapeseed breeding.",2021-01-05 +33455583,Establishment and application of information resource of mutant mice in RIKEN BioResource Research Center.,"Online databases are crucial infrastructures to facilitate the wide effective and efficient use of mouse mutant resources in life sciences. The number and types of mouse resources have been rapidly growing due to the development of genetic modification technology with associated information of genomic sequence and phenotypes. Therefore, data integration technologies to improve the findability, accessibility, interoperability, and reusability of mouse strain data becomes essential for mouse strain repositories. In 2020, the RIKEN BioResource Research Center released an integrated database of bioresources including, experimental mouse strains, Arabidopsis thaliana as a laboratory plant, cell lines, microorganisms, and genetic materials using Resource Description Framework-related technologies. The integrated database shows multiple advanced features for the dissemination of bioresource information. The current version of our online catalog of mouse strains which functions as a part of the integrated database of bioresources is available from search bars on the page of the Center ( https://brc.riken.jp ) and the Experimental Animal Division ( https://mus.brc.riken.jp/ ) websites. The BioResource Research Center also released a genomic variation database of mouse strains established in Japan and Western Europe, MoG+ ( https://molossinus.brc.riken.jp/mogplus/ ), and a database for phenotype-phenotype associations across the mouse phenome using data from the International Mouse Phenotyping Platform. In this review, we describe features of current version of databases related to mouse strain resources in RIKEN BioResource Research Center and discuss future views.",2021-01-18 +33137192,Plant-ImputeDB: an integrated multiple plant reference panel database for genotype imputation.,"Genotype imputation is a process that estimates missing genotypes in terms of the haplotypes and genotypes in a reference panel. It can effectively increase the density of single nucleotide polymorphisms (SNPs), boost the power to identify genetic association and promote the combination of genetic studies. However, there has been a lack of high-quality reference panels for most plants, which greatly hinders the application of genotype imputation. Here, we developed Plant-ImputeDB (http://gong_lab.hzau.edu.cn/Plant_imputeDB/), a comprehensive database with reference panels of 12 plant species for online genotype imputation, SNP and block search and free download. By integrating genotype data and whole-genome resequencing data of plants from various studies and databases, the current Plant-ImputeDB provides high-quality reference panels of 12 plant species, including ∼69.9 million SNPs from 34 244 samples. It also provides an easy-to-use online tool with the option of two popular tools specifically designed for genotype imputation. In addition, Plant-ImputeDB accepts submissions of different types of genomic variations, and provides free and open access to all publicly available data in support of related research worldwide. In general, Plant-ImputeDB may serve as an important resource for plant genotype imputation and greatly facilitate the research on plant genetic research.",2021-01-01 +33119751,The MemMoRF database for recognizing disordered protein regions interacting with cellular membranes.,"Protein and lipid membrane interactions play fundamental roles in a large number of cellular processes (e.g. signalling, vesicle trafficking, or viral invasion). A growing number of examples indicate that such interactions can also rely on intrinsically disordered protein regions (IDRs), which can form specific reversible interactions not only with proteins but also with lipids. We named IDRs involved in such membrane lipid-induced disorder-to-order transition as MemMoRFs, in an analogy to IDRs exhibiting disorder-to-order transition upon interaction with protein partners termed Molecular Recognition Features (MoRFs). Currently, both the experimental detection and computational characterization of MemMoRFs are challenging, and information about these regions are scattered in the literature. To facilitate the related investigations we generated a comprehensive database of experimentally validated MemMoRFs based on manual curation of literature and structural data. To characterize the dynamics of MemMoRFs, secondary structure propensity and flexibility calculated from nuclear magnetic resonance chemical shifts were incorporated into the database. These data were supplemented by inclusion of sentences from papers, functional data and disease-related information. The MemMoRF database can be accessed via a user-friendly interface at https://memmorf.hegelab.org, potentially providing a central resource for the characterization of disordered regions in transmembrane and membrane-associated proteins.",2021-01-01 +34107869,PINIR: a comprehensive information resource for Pin-II type protease inhibitors.,"

Background

Serine protease inhibitors belonging to the Potato type-II Inhibitor family Protease Inhibitors (Pin-II type PIs) are essential plant defense molecules. They are characterized by multiple inhibitory repeat domains, conserved disulfide bond pattern, and a tripeptide reactive center loop. These features of Pin-II type PIs make them potential molecules for protein engineering and designing inhibitors for agricultural and therapeutic applications. However, the diversity in these PIs remains unexplored due to the lack of annotated protein sequences and their functional attributes in the available databases.

Results

We have developed a database, PINIR (Pin-II type PIs Information Resource), by systematic collection and manual annotation of 415 Pin-II type PI protein sequences. For each PI, the number and position for signature sequences are specified: 695 domains, 75 linkers, 63 reactive center loops, and 10 disulfide bond patterns are identified and mapped. Database analysis revealed novel subcategories of PIs, species-correlated occurrence of inhibitory domains, reactive center loops, and disulfide bond patterns. By analyzing linker regions, we predict that alternative processing at linker regions could generate PI variants in the Solanaceae family.

Conclusion

PINIR ( https://pinir.ncl.res.in ) provides a web interface for browsing and analyzing the protein sequences of Pin-II type PIs. Information about signature sequences, spatio-temporal expression, biochemical properties, gene sequences, and literature references are provided. Analysis of PINIR depicts conserved species-specific features of Pin-II type PI protein sequences. Diversity in the sequence of inhibitory domains and reactive loops directs potential applications to engineer Pin-II type PIs. The PINIR database will serve as a comprehensive information resource for further research into Pin-II type PIs.",2021-06-09 +33079992,PLncDB V2.0: a comprehensive encyclopedia of plant long noncoding RNAs.,"Long noncoding RNAs (lncRNAs) are transcripts longer than 200 nucleotides with little or no protein coding potential. The expanding list of lncRNAs and accumulating evidence of their functions in plants have necessitated the creation of a comprehensive database for lncRNA research. However, currently available plant lncRNA databases have some deficiencies, including the lack of lncRNA data from some model plants, uneven annotation standards, a lack of visualization for expression patterns, and the absence of epigenetic information. To overcome these problems, we upgraded our Plant Long noncoding RNA Database (PLncDB, http://plncdb.tobaccodb.org/), which was based on a uniform annotation pipeline. PLncDB V2.0 currently contains 1 246 372 lncRNAs for 80 plant species based on 13 834 RNA-Seq datasets, integrating lncRNA information from four other resources including EVLncRNAs, RNAcentral and etc. Expression patterns and epigenetic signals can be visualized using multiple tools (JBrowse, eFP Browser and EPexplorer). Targets and regulatory networks for lncRNAs are also provided for function exploration. In addition, PLncDB V2.0 is hierarchical and user-friendly and has five built-in search engines. We believe PLncDB V2.0 is useful for the plant lncRNA community and data mining studies and provides a comprehensive resource for data-driven lncRNA research in plants.",2021-01-01 +34175476,Genome Warehouse: A Public Repository Housing Genome-scale Data.,"The Genome Warehouse (GWH) is a public repository housing genome assembly data for a wide range of species and delivering a series of web services for genome data submission, storage, release, and sharing. As one of the core resources in the National Genomics Data Center (NGDC), part of the China National Center for Bioinformation (CNCB; https://ngdc.cncb.ac.cn), GWH accepts both full and partial (chloroplast, mitochondrion, and plasmid) genome sequences with different assembly levels, as well as an update of existing genome assemblies. For each assembly, GWH collects detailed genome-related metadata of biological project, biological sample, and genome assembly, in addition to genome sequence and annotation. To archive high-quality genome sequences and annotations, GWH is equipped with a uniform and standardized procedure for quality control. Besides basic browse and search functionalities, all released genome sequences and annotations can be visualized with JBrowse. By May 21, 2021, GWH has received 19,124 direct submissions covering a diversity of 1108 species and has released 8772 of them. Collectively, GWH serves as an important resource for genome-scale data management and provides free and publicly accessible data to support research activities throughout the world. GWH is publicly accessible at https://ngdc.cncb.ac.cn/gwh.",2021-06-24 +33685383,lncRNADetector: a bioinformatics pipeline for long non-coding RNA identification and MAPslnc: a repository of medicinal and aromatic plant lncRNAs.,"Long non-coding RNAs (lncRNAs) are an emerging class of non-coding RNAs and potent regulatory elements in the living cells. High throughput RNA sequencing analyses have generated a tremendous amount of transcript sequence data. A large proportion of these transcript sequences does not code for proteins and are known as non-coding RNAs. Among them, lncRNAs are a unique class of transcripts longer than 200 nucleotides with diverse biological functions and regulatory mechanisms. Recent emerging studies and next-generation sequencing technologies show a substantial amount of lncRNAs within the plant genome, which are yet to be identified. The computational identification of lncRNAs from these transcripts is a challenging task due to the involvement of a series of filtering steps. We have developed lncRNADetector, a bioinformatics pipeline for the identification of novel lncRNAs, especially from medicinal and aromatic plant (MAP) species. The lncRNADetector has been utilized to analyse and identify more than 88,459 lncRNAs from 21 species of MAPs. To provide a knowledge resource for the plant research community towards elucidating the diversity of biological roles of lncRNAs, the information generated about MAP lncRNAs (post-filtering steps) through lncRNADetector has been stored and organized in MAPslnc database (MAPslnc, https://lncrnapipe.cimap.res.in). The lncRNADetector web server and MAPslnc database have been developed in order to facilitate researchers for accurate identification of lncRNAs from the next-generation sequencing data of different organisms for downstream studies. To the best of our knowledge no such MAPslnc database is available till date.",2021-03-18 +,Data is the new oil. How Covid-19 boosted information transparency in Austria,"Abstract

Background

Austria has no tradition of sharing administrative or routine (health) data collected by public authorities with researchers and accordingly no corresponding data infrastructure exists. Triggered by the GDPR and the pandemic situation an increasing demand from scientific institutions to obtain access to health care data (e.g. use of resources, patient pathways) for research purposes emerged. The latest governmental program addressed this fundamental request even before the start of the pandemic mainly because one of the members of the current coalition, the Green Party, values information transparency very high. The abolishment of “professional secrecy” and the promotion of research by enabling a comprehensive secondary use of data are cornerstones of their political agenda.

Mathods

The agenda was boosted by the outbreak of the Covid-19 pandemic that made the need for joint and real-time data-based research into SARS-CoV-2 and COVID-19 evident and eventually led to the establishment of the Covid-19 data platform https://datenplattform-covid.goeg.at in April 2020, which is run by the Austrian National Public Health Institute (GÖG). Since June 2020 national and international scientific institutions can obtain, free of cost, data from the Epidemiological reporting system which includes core information on COVID-19 infections in Austria. Institutions need to apply with GÖG for accreditation which is granted (or denied) by the Scientific Advisory Board of the Platform. Data provision is continuously expanded and includes as of now, data on hospital and ICU admissions due to COVID-19 as well as data on SARS-CoV-2 genome sequencing.

Results

In April 2021, 84 institutions (8 international) had applied for access. 60 have been accredited, in line with the rules of procedure (incl. a plausibility check of the applicant against OECD's Frascati-criteria. In total, nine articles using data from the Covid-19 platform have been submitted to scientific journals.

Key messages

On a national level, the Covid-19 data platform, commissioned by the Austrian Ministry of Health (BMSGPK) is an important step towards a modern and transparent health care administration system. Covid-19 and the data platform triggered a debate around the need for a (national) data-governance framework balancing benefits from data usage vs the right to data protection of citizens’ health data.",2021-10-01 +33953926,Rapid response to emerging biomedical challenges and threats.,"As part of the global mobilization to combat the present pandemic, almost 100 000 COVID-19-related papers have been published and nearly a thousand models of macromolecules encoded by SARS-CoV-2 have been deposited in the Protein Data Bank within less than a year. The avalanche of new structural data has given rise to multiple resources dedicated to assessing the correctness and quality of structural data and models. Here, an approach to evaluate the massive amounts of such data using the resource https://covid19.bioreproducibility.org is described, which offers a template that could be used in large-scale initiatives undertaken in response to future biomedical crises. Broader use of the described methodology could considerably curtail information noise and significantly improve the reproducibility of biomedical research.",2021-03-26 +32248568,Documentation of clinically relevant genomic biomarker allele frequencies in the next-generation FINDbase worldwide database.,"FINDbase (http://www.findbase.org) is a comprehensive data resource recording the prevalence of clinically relevant genomic variants in various populations worldwide, such as pathogenic variants underlying genetic disorders as well as pharmacogenomic biomarkers that can guide drug treatment. Here, we report significant new developments and technological advancements in the database architecture, leading to a completely revamped database structure, querying interface, accompanied with substantial extensions of data content and curation. In particular, the FINDbase upgrade further improves the user experience by introducing responsive features that support a wide variety of mobile and stationary devices, while enhancing computational runtime due to the use of a modern Javascript framework such as ReactJS. Data collection is significantly enriched, with the data records being divided in a Public and Private version, the latter being accessed on the basis of data contribution, according to the microattribution approach, while the front end was redesigned to support the new functionalities and querying tools. The abovementioned updates further enhance the impact of FINDbase, improve the overall user experience, facilitate further data sharing by microattribution, and strengthen the role of FINDbase as a key resource for personalized medicine applications and personalized public health.",2020-04-14 +31639440,The impact of using three-dimensional digital models of human embryos in the biomedical curriculum.,"BACKGROUND:Knowledge of embryonic development is essential to understand the positioning of organs in the human body. Unfortunately, (bio)medical students have to struggle with textbooks that use static, two-dimensional (2D) schematics to grasp the intricate three-dimensional (3D) morphogenesis of the developing human body. To facilitate embryology education on an understandable and scientific level, a 3D Atlas of Human Embryology (3D Atlas) was created (Science, 2016), encompassing 14 interactive 3D-PDFs of various stages of human embryonic development (freely available from http://www.3datlasofhumanembryology.com). This study examined whether the use of the 3D atlas has added educational value and improves the students learning experience. METHODS:The 3D atlas was introduced and integrated in lectures and practical classes of an existing embryology course at our university for first year biomedical students. By means of a questionnaire the use of the 3D atlas was evaluated. The outcomes in written examinations was compared between cohorts that followed the course before and after integration of the 3D atlas. RESULTS:Our results showed that the 3D Atlas significantly improves students' understanding of human embryology, reflected in significant higher test scores for new students. Furthermore, the 3D atlas also significantly improved repeaters' test scores. CONCLUSIONS:The results indicate that the3D Atlas of Human Embryology facilitates students' learning experience as a resource to support embryology lectures. Students appreciated the use of the 3D atlas in practical classes and liked its interactive aspect. Interestingly, the students also appreciated the physical hand-painted embryological models that were used in addition to the digital 3D atlas during practical classes. The 3D Atlas of Human Embryology has proven to be a valuable resource in addition to the existing resources to teach the intricate developmental processes of human embryology, especially in a blended learning curriculum.",2019-10-19 +31189922,"2DMatPedia, an open computational database of two-dimensional materials from top-down and bottom-up approaches.","Two-dimensional (2D) materials have been a hot research topic in the last decade, due to novel fundamental physics in the reduced dimension and appealing applications. Systematic discovery of functional 2D materials has been the focus of many studies. Here, we present a large dataset of 2D materials, with more than 6,000 monolayer structures, obtained from both top-down and bottom-up discovery procedures. First, we screened all bulk materials in the database of Materials Project for layered structures by a topology-based algorithm and theoretically exfoliated them into monolayers. Then, we generated new 2D materials by chemical substitution of elements in known 2D materials by others from the same group in the periodic table. The structural, electronic and energetic properties of these 2D materials are consistently calculated, to provide a starting point for further material screening, data mining, data analysis and artificial intelligence applications. We present the details of computational methodology, data record and technical validation of our publicly available data ( http://www.2dmatpedia.org/ ).",2019-06-12 +34527196,Interpreting a black box predictor to gain insights into early folding mechanisms.,"Protein folding and function are closely connected, but the exact mechanisms by which proteins fold remain elusive. Early folding residues (EFRs) are amino acids within a particular protein that induce the very first stages of the folding process. High-resolution EFR data are only available for few proteins, which has previously enabled the training of a protein sequence-based machine learning 'black box' predictor (EFoldMine). Such a black box approach does not allow a direct extraction of the 'early folding rules' embedded in the protein sequence, whilst such interpretation is essential to improve our understanding of how the folding process works. We here apply and investigate a novel 'grey box' approach to the prediction of EFRs from protein sequence to gain mechanistic residue-level insights into the sequence determinants of EFRs in proteins. We interpret the rule set for three datasets, a default set comprised of natural proteins, a scrambled set comprised of the scrambled default set sequences, and a set of de novo designed proteins. Finally, we relate these data to the secondary structure adopted in the folded protein and provide all information online via http://xefoldmine.bio2byte.be/, as a resource to help understand and steer early protein folding.",2021-08-27 +34225788,A global overview of genetically interpretable multimorbidities among common diseases in the UK Biobank.,"

Background

Multimorbidities greatly increase the global health burdens, but the landscapes of their genetic risks have not been systematically investigated.

Methods

We used the hospital inpatient data of 385,335 patients in the UK Biobank to investigate the multimorbid relations among 439 common diseases. Post-GWAS analyses were performed to identify multimorbidity shared genetic risks at the genomic loci, network, as well as overall genetic architecture levels. We conducted network decomposition for the networks of genetically interpretable multimorbidities to detect the hub diseases and the involved molecules and functions in each module.

Results

In total, 11,285 multimorbidities among 439 common diseases were identified, and 46% of them were genetically interpretable at the loci, network, or overall genetic architecture levels. Multimorbidities affecting the same and different physiological systems displayed different patterns of the shared genetic components, with the former more likely to share loci-level genetic components while the latter more likely to share network-level genetic components. Moreover, both the loci- and network-level genetic components shared by multimorbidities converged on cell immunity, protein metabolism, and gene silencing. Furthermore, we found that the genetically interpretable multimorbidities tend to form network modules, mediated by hub diseases and featuring physiological categories. Finally, we showcased how hub diseases mediating the multimorbidity modules could help provide useful insights for the genetic contributors of multimorbidities.

Conclusions

Our results provide a systematic resource for understanding the genetic predispositions of multimorbidities and indicate that hub diseases and converged molecules and functions may be the key for treating multimorbidities. We have created an online database that facilitates researchers and physicians to browse, search, or download these multimorbidities ( https://multimorbidity.comp-sysbio.org ).",2021-07-05 +33436076,"The Dfam community resource of transposable element families, sequence models, and genome annotations.","Dfam is an open access database of repetitive DNA families, sequence models, and genome annotations. The 3.0-3.3 releases of Dfam ( https://dfam.org ) represent an evolution from a proof-of-principle collection of transposable element families in model organisms into a community resource for a broad range of species, and for both curated and uncurated datasets. In addition, releases since Dfam 3.0 provide auxiliary consensus sequence models, transposable element protein alignments, and a formalized classification system to support the growing diversity of organisms represented in the resource. The latest release includes 266,740 new de novo generated transposable element families from 336 species contributed by the EBI. This expansion demonstrates the utility of many of Dfam's new features and provides insight into the long term challenges ahead for improving de novo generated transposable element datasets.",2021-01-12 +33985427,TarDB: an online database for plant miRNA targets and miRNA-triggered phased siRNAs.,"

Background

In plants, microRNAs (miRNAs) are pivotal regulators of plant development and stress responses. Different computational tools and web servers have been developed for plant miRNA target prediction; however, in silico prediction normally contains false positive results. In addition, many plant miRNA target prediction servers lack information for miRNA-triggered phased small interfering RNAs (phasiRNAs). Creating a comprehensive and relatively high-confidence plant miRNA target database is much needed.

Results

Here, we report TarDB, an online database that collects three categories of relatively high-confidence plant miRNA targets: (i) cross-species conserved miRNA targets; (ii) degradome/PARE (Parallel Analysis of RNA Ends) sequencing supported miRNA targets; (iii) miRNA-triggered phasiRNA loci. TarDB provides a user-friendly interface that enables users to easily search, browse and retrieve miRNA targets and miRNA initiated phasiRNAs in a broad variety of plants. TarDB has a comprehensive collection of reliable plant miRNA targets containing previously unreported miRNA targets and miRNA-triggered phasiRNAs even in the well-studied model species. Most of these novel miRNA targets are relevant to lineage-specific or species-specific miRNAs. TarDB data is freely available at http://www.biosequencing.cn/TarDB .

Conclusions

In summary, TarDB serves as a useful web resource for exploring relatively high-confidence miRNA targets and miRNA-triggered phasiRNAs in plants.",2021-05-13 +32829394,Reanalysis of genome sequences of tomato accessions and its wild relatives: development of Tomato Genomic Variation (TGV) database integrating SNPs and INDELs polymorphisms.,"

Motivation

Facilitated by technological advances and expeditious decrease in the sequencing costs, whole-genome sequencing is increasingly implemented to uncover variations in cultivars/accessions of many crop plants. In tomato (Solanum lycopersicum), the availability of the genome sequence, followed by the resequencing of tomato cultivars and its wild relatives, has provided a prodigious resource for the improvement of traits. A high-quality genome resequencing of 84 tomato accessions and wild relatives generated a dataset that can be used as a resource to identify agronomically important alleles across the genome. Converting this dataset into a searchable database, including information about the influence of single-nucleotide polymorphisms (SNPs) on protein function, provides valuable information about the genetic variations. The database will assist in searching for functional variants of a gene for introgression into tomato cultivars.

Results

A recent release of better-quality tomato genome reference assembly SL3.0, and new annotation ITAG3.2 of SL3.0, dropped 3857 genes, added 4900 novel genes and updated 20 766 genes. Using the above version, we remapped the data from the tomato lines resequenced under the '100 tomato genome resequencing project' on new tomato genome assembly SL3.0 and made an online searchable Tomato Genomic Variations (TGVs) database. The TGV contains information about SNPs and insertion/deletion events and expands it by functional annotation of variants with new ITAG3.2 using SIFT4G software. This database with search function assists in inferring the influence of SNPs on the function of a target gene. This database can be used for selecting SNPs, which can be potentially deployed for improving tomato traits.

Availability and implementation

TGV is freely available at http://psd.uohyd.ac.in/tgv.",2020-12-01 +33245777,3DIV update for 2021: a comprehensive resource of 3D genome and 3D cancer genome.,"Three-dimensional (3D) genome organization is tightly coupled with gene regulation in various biological processes and diseases. In cancer, various types of large-scale genomic rearrangements can disrupt the 3D genome, leading to oncogenic gene expression. However, unraveling the pathogenicity of the 3D cancer genome remains a challenge since closer examinations have been greatly limited due to the lack of appropriate tools specialized for disorganized higher-order chromatin structure. Here, we updated a 3D-genome Interaction Viewer and database named 3DIV by uniformly processing ∼230 billion raw Hi-C reads to expand our contents to the 3D cancer genome. The updates of 3DIV are listed as follows: (i) the collection of 401 samples including 220 cancer cell line/tumor Hi-C data, 153 normal cell line/tissue Hi-C data, and 28 promoter capture Hi-C data, (ii) the live interactive manipulation of the 3D cancer genome to simulate the impact of structural variations and (iii) the reconstruction of Hi-C contact maps by user-defined chromosome order to investigate the 3D genome of the complex genomic rearrangement. In summary, the updated 3DIV will be the most comprehensive resource to explore the gene regulatory effects of both the normal and cancer 3D genome. '3DIV' is freely available at http://3div.kr.",2021-01-01 +33074314,TransCirc: an interactive database for translatable circular RNAs based on multi-omics evidence.,"TransCirc (https://www.biosino.org/transcirc/) is a specialized database that provide comprehensive evidences supporting the translation potential of circular RNAs (circRNAs). This database was generated by integrating various direct and indirect evidences to predict coding potential of each human circRNA and the putative translation products. Seven types of evidences for circRNA translation were included: (i) ribosome/polysome binding evidences supporting the occupancy of ribosomes onto circRNAs; (ii) experimentally mapped translation initiation sites on circRNAs; (iii) internal ribosome entry site on circRNAs; (iv) published N-6-methyladenosine modification data in circRNA that promote translation initiation; (v) lengths of the circRNA specific open reading frames; (vi) sequence composition scores from a machine learning prediction of all potential open reading frames; (vii) mass spectrometry data that directly support the circRNA encoded peptides across back-splice junctions. TransCirc provides a user-friendly searching/browsing interface and independent lines of evidences to predicte how likely a circRNA can be translated. In addition, several flexible tools have been developed to aid retrieval and analysis of the data. TransCirc can serve as an important resource for investigating the translation capacity of circRNAs and the potential circRNA-encoded peptides, and can be expanded to include new evidences or additional species in the future.",2021-01-01 +33010176,CancerImmunityQTL: a database to systematically evaluate the impact of genetic variants on immune infiltration in human cancer.,"Tumor-infiltrating immune cells as integral component of the tumor microenvironment are associated with tumor progress, prognosis and responses to immunotherapy. Genetic variants have been demonstrated to impact tumor-infiltrating, underscoring the heritable character of immune landscape. Therefore, identification of immunity quantitative trait loci (immunQTLs), which evaluate the effect of genetic variants on immune cells infiltration, might present a critical step toward fully understanding the contribution of genetic variants in tumor development. Although emerging studies have demonstrated the determinants of germline variants on immune infiltration, no database has yet been developed to systematically analyze immunQTLs across multiple cancer types. Using genotype data from TCGA database and immune cell fractions estimated by CIBERSORT, we developed a computational pipeline to identify immunQTLs in 33 cancer types. A total of 913 immunQTLs across different cancer types were identified. Among them, 5 immunQTLs are associated with patient overall survival. Furthermore, by integrating immunQTLs with GWAS data, we identified 527 immunQTLs overlapping with known GWAS linkage disequilibrium regions. Finally, we constructed a user-friendly database, CancerImmunityQTL (http://www.cancerimmunityqtl-hust.com/) for users to browse, search and download data of interest. This database provides an informative resource to understand the germline determinants of immune infiltration in human cancer and benefit from personalized cancer immunotherapy.",2021-01-01 +34570431,NDEx: Accessing Network Models and Streamlining Network Biology Workflows.,"NDEx, the Network Data Exchange (https://www.ndexbio.org) is a web-based resource where users can find, store, share and publish network models of any type and size. NDEx is integrated with Cytoscape, the widely used desktop application for network analysis and visualization. NDEx and Cytoscape are the pillars of the Cytoscape Ecosystem, a diverse environment of resources, tools, applications and services for network biology workflows. In this article, we introduce researchers to NDEx and highlight how it can simplify common tasks in network biology workflows as well as streamline publication and access to). Finally, we show how NDEx can be used programmatically via Python with the 'ndex2' client library, and point readers to additional examples for other popular programming languages such as JavaScript and R. © 2021 The Authors. Current Protocols published by Wiley Periodicals LLC. Basic Protocol 1: Getting started with NDEx Basic Protocol 2: Using NDEx and Cytoscape in a publication-oriented workflow Basic Protocol 3: Manipulating networks in NDEx via Python.",2021-09-01 +34251875,Operating in a Climate Crisis: A State-of-the-Science Review of Life Cycle Assessment within Surgical and Anesthetic Care.,"

Background

Both human health and the health systems we depend on are increasingly threatened by a range of environmental crises, including climate change. Paradoxically, health care provision is a significant driver of environmental pollution, with surgical and anesthetic services among the most resource-intensive components of the health system.

Objectives

This analysis aimed to summarize the state of life cycle assessment (LCA) practice as applied to surgical and anesthetic care via review of extant literature assessing environmental impacts of related services, procedures, equipment, and pharmaceuticals.

Methods

A state-of-the-science review was undertaken following a registered protocol and a standardized, LCA-specific reporting framework. Three bibliographic databases (Scopus®, PubMed, and Embase®) and the gray literature were searched. Inclusion criteria were applied, eligible entries critically appraised, and key methodological data and results extracted.

Results

From 1,316 identified records, 44 studies were eligible for inclusion. The annual climate impact of operating surgical suites ranged between 3,200,000 and 5,200,000 kg CO2e. The climate impact of individual surgical procedures varied considerably, with estimates ranging from 6 to 1,007 kg CO2e. Anesthetic gases; single-use equipment; and heating, ventilation, and air conditioning system operation were the main emissions hot spots identified among operating room- and procedure-specific analyses. Single-use equipment used in surgical settings was generally more harmful than equivalent reusable items across a range of environmental parameters. Life cycle inventories have been assembled and associated climate impacts calculated for three anesthetic gases (2-85 kg CO2e/MAC-h) and 20 injectable anesthetic drugs (0.01-3.0 kg CO2e/gAPI).

Discussion

Despite the recent proliferation of surgical and anesthesiology-related LCAs, extant studies address a miniscule fraction of the numerous services, procedures, and products available today. Methodological heterogeneity, external validity, and a lack of background life cycle inventory data related to many essential surgical and anesthetic inputs are key limitations of the current evidence base. This review provides an indication of the spectrum of environmental impacts associated with surgical and anesthetic care at various scales. https://doi.org/10.1289/EHP8666.",2021-07-12 +34667563,C-H functionalisation tolerant to polar groups could transform fragment-based drug discovery (FBDD).,"We have analysed 131 fragment-to-lead (F2L) examples targeting a wide variety of protein families published by academic and industrial laboratories between 2015-2019. Our assessment of X-ray structural data identifies the most common polar functional groups involved in fragment-protein binding are: N-H (hydrogen bond donors on aromatic and aliphatic N-H, amides and anilines; totalling 35%), aromatic nitrogen atoms (hydrogen bond acceptors; totalling 23%), and carbonyl oxygen group atoms (hydrogen bond acceptors on amides, ureas and ketones; totalling 22%). Furthermore, the elaboration of each fragment into its corresponding lead is analysed to identify the nominal synthetic growth vectors. In ∼80% of cases, growth originates from an aromatic or aliphatic carbon on the fragment and more than 50% of the total bonds formed are carbon-carbon bonds. This analysis reveals that growth from carbocentric vectors is key and therefore robust C-H functionalisation methods that tolerate the innate polar functionality on fragments could transform fragment-based drug discovery (FBDD). As a further resource to the community, we have provided the full data of our analysis as well as an online overlay page of the X-ray structures of the fragment hit and leads: https://astx.com/interactive/F2L-2021/.",2021-09-01 +33793824,Analysis of a photosynthetic cyanobacterium rich in internal membrane systems via gradient profiling by sequencing (Grad-seq).,"Although regulatory small RNAs have been reported in photosynthetic cyanobacteria, the lack of clear RNA chaperones involved in their regulation poses a conundrum. Here, we analyzed the full complement of cellular RNAs and proteins using gradient profiling by sequencing (Grad-seq) in Synechocystis 6803. Complexes with overlapping subunits such as the CpcG1-type versus the CpcL-type phycobilisomes or the PsaK1 versus PsaK2 photosystem I pre(complexes) could be distinguished, supporting the high quality of this approach. Clustering of the in-gradient distribution profiles followed by several additional criteria yielded a short list of potential RNA chaperones that include an YlxR homolog and a cyanobacterial homolog of the KhpA/B complex. The data suggest previously undetected complexes between accessory proteins and CRISPR-Cas systems, such as a Csx1-Csm6 ribonucleolytic defense complex. Moreover, the exclusive association of either RpoZ or 6S RNA with the core RNA polymerase complex and the existence of a reservoir of inactive sigma-antisigma complexes is suggested. The Synechocystis Grad-seq resource is available online at https://sunshine.biologie.uni-freiburg.de/GradSeqExplorer/ providing a comprehensive resource for the functional assignment of RNA-protein complexes and multisubunit protein complexes in a photosynthetic organism.",2021-04-01 +34768782,LegumeSSRdb: A Comprehensive Microsatellite Marker Database of Legumes for Germplasm Characterization and Crop Improvement. ,"Microsatellites, or simple sequence repeats (SSRs), are polymorphic loci that play a major role as molecular markers for genome analysis and plant breeding. The legume SSR database is a webserver which contains simple sequence repeats (SSRs) from genomes of 13 legume species. A total of 3,706,276 SSRs are present in the database, 698,509 of which are genic SSRs, and 3,007,772 are non-genic. This webserver is an integrated tool to perform end-to-end marker selection right from generating SSRs to designing and validating primers, visualizing the results and blasting the genomic sequences at one place without juggling between several resources. The user-friendly web interface allows users to browse SSRs based on the genomic region, chromosome, motif type, repeat motif sequence, frequency of motif, and advanced searches allow users to search based on chromosome location range and length of SSR. Users can give their desired flanking region around repeat and obtain the sequence, they can explore the genes in which the SSRs are present or the genes between which the SSRs are bound design custom primers, and perform in silico validation using PCR. An SSR prediction pipeline is implemented where the user can submit their genomic sequence to generate SSRs. This webserver will be frequently updated with more species, in time. We believe that legumeSSRdb would be a useful resource for marker-assisted selection and mapping quantitative trait loci (QTLs) to practice genomic selection and improve crop health. The database can be freely accessed at http://bioinfo.usu.edu/legumeSSRdb/.",2021-10-21 +34583243,Technical Note: The Forensic Anthropology Society of Europe (FASE) Map of Identified Osteological Collections.,"Identified (documented) osteological collections represent an important resource in the development of forensic anthropology standards and methods as well as a precious tool for learning and training of practitioners. Even though the number of papers presenting identified collections worldwide increases, many of the collections have still not been divulged to the scientific community in sufficient detail to ascertain their exact number. The Forensic Anthropology Society of Europe (FASE) therefore developed a tool that goes beyond sporadic publications: the FASE Map of Identified Osteological Collections, which is freely accessible and continuously updated and revised. The online map is available at http://forensicanthropology.eu/osteological-collections/. The map of skeletal collections was created in 2017 and currently displays information on 153 identified osteological collections (43 of them categorized as contemporary) located in 41 different countries. This article offers a short analysis of the type, geographical location and content of the collections included in the map. The aim of this article and the map as such is to provide a useful resource to facilitate research planning and teaching in forensic anthropology and related disciplines.",2021-09-10 +33739923,Towards Cross-Dataset Palmprint Recognition Via Joint Pixel and Feature Alignment.,"Deep learning-based palmprint recognition algorithms have shown great potential. Most of them are mainly focused on identifying samples from the same dataset. However, they may be not suitable for a more convenient case that the images for training and test are from different datasets, such as collected by embedded terminals and smartphones. Therefore, we propose a novel Joint Pixel and Feature Alignment (JPFA) framework for such cross-dataset palmprint recognition scenarios. Two-stage alignment is applied to obtain adaptive features in source and target datasets. 1) Deep style transfer model is adopted to convert source images into fake images to reduce the dataset gaps and perform data augmentation on pixel level. 2) A new deep domain adaptation model is proposed to extract adaptive features by aligning the dataset-specific distributions of target-source and target-fake pairs on feature level. Adequate experiments are conducted on several benchmarks including constrained and unconstrained palmprint databases. The results demonstrate that our JPFA outperforms other models to achieve the state-of-the-arts. Compared with baseline, the accuracy of cross-dataset identification is improved by up to 28.10% and the Equal Error Rate (EER) of cross-dataset verification is reduced by up to 4.69%. To make our results reproducible, the codes are publicly available at http://gr.xjtu.edu.cn/web/bell/resource.",2021-03-23 +33046717,Protein ontology on the semantic web for knowledge discovery.,"The Protein Ontology (PRO) provides an ontological representation of protein-related entities, ranging from protein families to proteoforms to complexes. Protein Ontology Linked Open Data (LOD) exposes, shares, and connects knowledge about protein-related entities on the Semantic Web using Resource Description Framework (RDF), thus enabling integration with other Linked Open Data for biological knowledge discovery. For example, proteins (or variants thereof) can be retrieved on the basis of specific disease associations. As a community resource, we strive to follow the Findability, Accessibility, Interoperability, and Reusability (FAIR) principles, disseminate regular updates of our data, support multiple methods for accessing, querying and downloading data in various formats, and provide documentation both for scientists and programmers. PRO Linked Open Data can be browsed via faceted browser interface and queried using SPARQL via YASGUI. RDF data dumps are also available for download. Additionally, we developed RESTful APIs to support programmatic data access. We also provide W3C HCLS specification compliant metadata description for our data. The PRO Linked Open Data is available at https://lod.proconsortium.org/ .",2020-10-12 +32591816,ExoBCD: a comprehensive database for exosomal biomarker discovery in breast cancer. ,"Effective and safe implementation of precision oncology for breast cancer is a vital strategy to improve patient outcomes, which relies on the application of reliable biomarkers. As 'liquid biopsy' and novel resource for biomarkers, exosomes provide a promising avenue for the diagnosis and treatment of breast cancer. Although several exosome-related databases have been developed, there is still lacking of an integrated database for exosome-based biomarker discovery. To this end, a comprehensive database ExoBCD (https://exobcd.liumwei.org) was constructed with the combination of robust analysis of four high-throughput datasets, transcriptome validation of 1191 TCGA cases and manual mining of 950 studies. In ExoBCD, approximately 20 900 annotation entries were integrated from 25 external sources and 306 exosomal molecules (49 potential biomarkers and 257 biologically interesting molecules). The latter could be divided into 3 molecule types, including 121 mRNAs, 172 miRNAs and 13 lncRNAs. Thus, the well-linked information about molecular characters, experimental biology, gene expression patterns, overall survival, functional evidence, tumour stage and clinical use were fully integrated. As a data-driven and literature-based paradigm proposed of biomarker discovery, this study also demonstrated the corroborative analysis and identified 36 promising molecules, as well as the most promising prognostic biomarkers, IGF1R and FRS2. Taken together, ExoBCD is the first well-corroborated knowledge base for exosomal studies of breast cancer. It not only lays a foundation for subsequent studies but also strengthens the studies of probing molecular mechanisms, discovering biomarkers and developing meaningful clinical use.",2021-05-01 +33053178,"DNAmoreDB, a database of DNAzymes.","Deoxyribozymes, DNA enzymes or simply DNAzymes are single-stranded oligo-deoxyribonucleotide molecules that, like proteins and ribozymes, possess the ability to perform catalysis. Although DNAzymes have not yet been found in living organisms, they have been isolated in the laboratory through in vitro selection. The selected DNAzyme sequences have the ability to catalyze a broad range of chemical reactions, utilizing DNA, RNA, peptides or small organic compounds as substrates. DNAmoreDB is a comprehensive database resource for DNAzymes that collects and organizes the following types of information: sequences, conditions of the selection procedure, catalyzed reactions, kinetic parameters, substrates, cofactors, structural information whenever available, and literature references. Currently, DNAmoreDB contains information about DNAzymes that catalyze 20 different reactions. We included a submission form for new data, a REST-based API system that allows users to retrieve the database contents in a machine-readable format, and keyword and BLASTN search features. The database is publicly available at https://www.genesilico.pl/DNAmoreDB/.",2021-01-01 +33151287,DrugCentral 2021 supports drug discovery and repositioning.,"DrugCentral is a public resource (http://drugcentral.org) that serves the scientific community by providing up-to-date drug information, as described in previous papers. The current release includes 109 newly approved (October 2018 through March 2020) active pharmaceutical ingredients in the US, Europe, Japan and other countries; and two molecular entities (e.g. mefuparib) of interest for COVID19. New additions include a set of pharmacokinetic properties for ∼1000 drugs, and a sex-based separation of side effects, processed from FAERS (FDA Adverse Event Reporting System); as well as a drug repositioning prioritization scheme based on the market availability and intellectual property rights forFDA approved drugs. In the context of the COVID19 pandemic, we also incorporated REDIAL-2020, a machine learning platform that estimates anti-SARS-CoV-2 activities, as well as the 'drugs in news' feature offers a brief enumeration of the most interesting drugs at the present moment. The full database dump and data files are available for download from the DrugCentral web portal.",2021-01-01 +33010154,CRISP-view: a database of functional genetic screens spanning multiple phenotypes.,"High-throughput genetic screening based on CRISPR/Cas9 or RNA-interference (RNAi) enables the exploration of genes associated with the phenotype of interest on a large scale. The rapid accumulation of public available genetic screening data provides a wealth of knowledge about genotype-to-phenotype relationships and a valuable resource for the systematic analysis of gene functions. Here we present CRISP-view, a comprehensive database of CRISPR/Cas9 and RNAi screening datasets that span multiple phenotypes, including in vitro and in vivo cell proliferation and viability, response to cancer immunotherapy, virus response, protein expression, etc. By 22 September 2020, CRISP-view has collected 10 321 human samples and 825 mouse samples from 167 papers. All the datasets have been curated, annotated, and processed by a standard MAGeCK-VISPR analysis pipeline with quality control (QC) metrics. We also developed a user-friendly webserver to visualize, explore, and search these datasets. The webserver is freely available at http://crispview.weililab.org.",2021-01-01 +34793786,An atlas of fragrance chemicals in children's products.,"Exposure to environmental chemicals during early childhood is a potential health concern. At a tender age, children are exposed to fragrance chemicals used in toys and child care products. Although there are few initiatives in Europe and United States towards monitoring and regulation of fragrance chemicals in children's products, such efforts are still lacking elsewhere. Besides there has been no systematic effort to create a database compiling the surrounding knowledge on fragrance chemicals used in children's products from published literature. Here, we built a database of Fragrance Chemicals in Children's Products (FCCP) that compiles information on 153 fragrance chemicals from published literature. The fragrance chemicals in FCCP have been classified based on their chemical structure, children's product source, chemical origin and odor profile. Moreover, we have also compiled the physicochemical properties, predicted Absorption, Distribution, Metabolism, Excretion and Toxicity (ADMET) properties, molecular descriptors and human target genes for the fragrance chemicals in FCCP. After building FCCP, we performed multiple analyses of the associated fragrance chemical space. Firstly, we assessed the regulatory status of the fragrance chemicals in FCCP through a comparative analysis with 21 chemical lists reflecting current guidelines or regulations. We find that several fragrance chemicals in children's products are potential carcinogens, endocrine disruptors, neurotoxicants, phytotoxins and skin sensitizers. Secondly, we performed a similarity network based analysis of the fragrance chemicals in children's products to reveal the high structural diversity of the associated chemical space. Lastly, we identified skin sensitizing fragrance chemicals in children's products using ToxCast assays. In a nutshell, we present a comprehensive resource and detailed analysis of fragrance chemicals in children's products highlighting the need for their better risk assessment and regulation to deliver safer products for children. FCCP is accessible at: https://cb.imsc.res.in/fccp.",2021-11-15 +33068433,CovalentInDB: a comprehensive database facilitating the discovery of covalent inhibitors.,"Inhibitors that form covalent bonds with their targets have traditionally been considered highly adventurous due to their potential off-target effects and toxicity concerns. However, with the clinical validation and approval of many covalent inhibitors during the past decade, design and discovery of novel covalent inhibitors have attracted increasing attention. A large amount of scattered experimental data for covalent inhibitors have been reported, but a resource by integrating the experimental information for covalent inhibitor discovery is still lacking. In this study, we presented Covalent Inhibitor Database (CovalentInDB), the largest online database that provides the structural information and experimental data for covalent inhibitors. CovalentInDB contains 4511 covalent inhibitors (including 68 approved drugs) with 57 different reactive warheads for 280 protein targets. The crystal structures of some of the proteins bound with a covalent inhibitor are provided to visualize the protein-ligand interactions around the binding site. Each covalent inhibitor is annotated with the structure, warhead, experimental bioactivity, physicochemical properties, etc. Moreover, CovalentInDB provides the covalent reaction mechanism and the corresponding experimental verification methods for each inhibitor towards its target. High-quality datasets are downloadable for users to evaluate and develop computational methods for covalent drug design. CovalentInDB is freely accessible at http://cadd.zju.edu.cn/cidb/.",2021-01-01 +33084905,MeDAS: a Metazoan Developmental Alternative Splicing database.,"Alternative splicing is widespread throughout eukaryotic genomes and greatly increases transcriptomic diversity. Many alternative isoforms have functional roles in developmental processes and are precisely temporally regulated. To facilitate the study of alternative splicing in a developmental context, we created MeDAS, a Metazoan Developmental Alternative Splicing database. MeDAS is an added-value resource that re-analyses publicly archived RNA-seq libraries to provide quantitative data on alternative splicing events as they vary across the time course of development. It has broad temporal and taxonomic scope and is intended to assist the user in identifying trends in alternative splicing throughout development. To create MeDAS, we re-analysed a curated set of 2232 Illumina polyA+ RNA-seq libraries that chart detailed time courses of embryonic and post-natal development across 18 species with a taxonomic range spanning the major metazoan lineages from Caenorhabditis elegans to human. MeDAS is freely available at https://das.chenlulab.com both as raw data tables and as an interactive browser allowing searches by species, tissue, or genomic feature (gene, transcript or exon ID and sequence). Results will provide details on alternative splicing events identified for the queried feature and can be visualised at the gene-, transcript- and exon-level as time courses of expression and inclusion levels, respectively.",2021-01-01 +33330918,Systematic evaluation of the effects of genetic variants on PIWI-interacting RNA expression across 33 cancer types.,"PIWI-interacting RNAs (piRNAs) are an emerging class of non-coding RNAs involved in tumorigenesis. Expression quantitative trait locus (eQTL) analysis has been demonstrated to help reveal the genetic mechanism of single nucleotide polymorphisms (SNPs) in cancer etiology. However, there are no databases that have been constructed to provide an eQTL analysis between SNPs and piRNA expression. In this study, we collected genotyping and piRNA expression data for 10 997 samples across 33 cancer types from The Cancer Genome Atlas (TCGA). Using linear regression cis-eQTL analysis with adjustment of appropriate covariates, we identified millions of SNP-piRNA pairs in tumor (76 924 831) and normal (24 431 061) tissues. Further, we performed differential expression and survival analyses, and linked the eQTLs to genome-wide association study (GWAS) data to comprehensively decipher the functional roles of identified cis-piRNA eQTLs. Finally, we developed a user-friendly database, piRNA-eQTL (http://njmu-edu.cn:3838/piRNA-eQTL/), to help users query, browse and download corresponding eQTL results. In summary, piRNA-eQTL could serve as an important resource to assist the research community in understanding the roles of genetic variants and piRNAs in the development of cancers.",2021-01-01 +34269889,B3Pdb: an archive of blood-brain barrier-penetrating peptides.,"The blood-brain barrier poses major hurdles in the treatment of brain-related ailments. Over the past decade, interest in peptides-based therapeutics has thrived a lot because of their higher benefit to risk ratio. However, a complete knowledgebase providing a well-annotated picture of the peptide as a therapeutic molecule to cure brain-related ailments is lacking. We have built up a knowledgebase B3Pdb on blood-brain barrier (BBB)-penetrating peptides in the present study. The B3Pdb holds clinically relevant experimental information on 1225 BBB-penetrating peptides, including mode of delivery, animal model, in vitro/in vivo experiments, chemical modifications, length. Hoping that drug delivery systems can improve central nervous system disorder-related therapeutics. In this regard, B3Pdb is an important resource to support the rational design of therapeutics peptides for CNS-related disorders. The complete ready-to-use and updated database with a user-friendly web interface is available to the scientific community at https://webs.iiitd.edu.in/raghava/b3pdb/ .",2021-07-16 +33068435,AcrDB: a database of anti-CRISPR operons in prokaryotes and viruses.,"CRISPR-Cas is an anti-viral mechanism of prokaryotes that has been widely adopted for genome editing. To make CRISPR-Cas genome editing more controllable and safer to use, anti-CRISPR proteins have been recently exploited to prevent excessive/prolonged Cas nuclease cleavage. Anti-CRISPR (Acr) proteins are encoded by (pro)phages/(pro)viruses, and have the ability to inhibit their host's CRISPR-Cas systems. We have built an online database AcrDB (http://bcb.unl.edu/AcrDB) by scanning ∼19 000 genomes of prokaryotes and viruses with AcrFinder, a recently developed Acr-Aca (Acr-associated regulator) operon prediction program. Proteins in Acr-Aca operons were further processed by two machine learning-based programs (AcRanker and PaCRISPR) to obtain numerical scores/ranks. Compared to other anti-CRISPR databases, AcrDB has the following unique features: (i) It is a genome-scale database with the largest collection of data (39 799 Acr-Aca operons containing Aca or Acr homologs); (ii) It offers a user-friendly web interface with various functions for browsing, graphically viewing, searching, and batch downloading Acr-Aca operons; (iii) It focuses on the genomic context of Acr and Aca candidates instead of individual Acr protein family and (iv) It collects data with three independent programs each having a unique data mining algorithm for cross validation. AcrDB will be a valuable resource to the anti-CRISPR research community.",2021-01-01 +34679164,Mining drug-target and drug-adverse drug reaction databases to identify target-adverse drug reaction relationships. ,"The level of attrition on drug discovery, particularly at advanced stages, is very high due to unexpected adverse drug reactions (ADRs) caused by drug candidates, and thus, being able to predict undesirable responses when modulating certain protein targets would contribute to the development of safer drugs and have important economic implications. On the one hand, there are a number of databases that compile information of drug-target interactions. On the other hand, there are a number of public resources that compile information on drugs and ADR. It is therefore possible to link target and ADRs using drug entities as connecting elements. Here, we present T-ARDIS (Target-Adverse Reaction Database Integrated Search) database, a resource that provides comprehensive information on proteins and associated ADRs. By combining the information from drug-protein and drug-ADR databases, we statistically identify significant associations between proteins and ADRs. Besides describing the relationship between proteins and ADRs, T-ARDIS provides detailed description about proteins along with the drug and adverse reaction information. Currently T-ARDIS contains over 3000 ADR and 248 targets for a total of more 17 000 pairwise interactions. Each entry can be retrieved through multiple search terms including target Uniprot ID, gene name, adverse effect and drug name. Ultimately, the T-ARDIS database has been created in response to the increasing interest in identifying early in the drug development pipeline potentially problematic protein targets whose modulation could result in ADRs. Database URL: http://www.bioinsilico.org/T-ARDIS.",2021-10-01 +34303324,On the border of the amyloidogenic sequences: prefix analysis of the parallel beta sheets in the PDB_Amyloid collection.,"The Protein Data Bank (PDB) today contains more than 174,000 entries with the 3-dimensional structures of biological macromolecules. Using the rich resources of this repository, it is possible identifying subsets with specific, interesting properties for different applications. Our research group prepared an automatically updated list of amyloid- and probably amyloidogenic molecules, the PDB_Amyloid collection, which is freely available at the address http://pitgroup.org/amyloid. This resource applies exclusively the geometric properties of the steric structures for identifying amyloids. In the present contribution, we analyze the starting (i.e., prefix) subsequences of the characteristic, parallel beta-sheets of the structures in the PDB_Amyloid collection, and identify further appearances of these length-5 prefix subsequences in the whole PDB data set. We have identified this way numerous proteins, whose normal or irregular functions involve amyloid formation, structural misfolding, or anti-coagulant properties, simply by containing these prefixes: including the T-cell receptor (TCR), bound with the major histocompatibility complexes MHC-1 and MHC-2; the p53 tumor suppressor protein; a mycobacterial RNA polymerase transcription initialization complex; the human bridging integrator protein BIN-1; and the tick anti-coagulant peptide TAP.",2021-07-26 +33546584,Plant Co-expression Annotation Resource: a web server for identifying targets for genetically modified crop breeding pipelines.,"The development of genetically modified crops (GM) includes the discovery of candidate genes through bioinformatics analysis using genomics data, gene expression, and others. Proteins of unknown function (PUFs) are interesting targets for GM crops breeding pipelines for the novelty associated with such targets and also to avoid copyright protection. One method of inferring the putative function of PUFs is by relating them to factors of interest such as abiotic stresses using orthology and co-expression networks, in a guilt-by-association manner. In this regard, we have downloaded, analyzed, and processed genomics data of 53 angiosperms, totaling 1,862,010 genes and 2,332,974 RNA. Diamond and InterproScan were used to discover 72,266 PUFs for all organisms. RNA-seq datasets related to abiotic stresses were downloaded from NCBI/GEO. The RNA-seq data was used as input to the LSTrAP software to construct co-expression networks. LSTrAP also created clusters of transcripts with correlated expression, whose members are more probably related to the molecular mechanisms associated with abiotic stresses in the plants. Orthologous groups were created (OrhtoMCL) using all 2,332,974 proteins in order to associate PUFs to abiotic stress-related clusters of co-expression and therefore infer their function in a guilt-by-association manner. A freely available web resource named ""Plant Co-expression Annotation Resource"" ( https://www.machado.cnptia.embrapa.br/plantannot ), Plantannot, was created to provide indexed queries to search for PUF putatively associated with abiotic stresses. The web interface also allows browsing, querying, and retrieving of public genomics data from 53 plants. We hope Plantannot to be useful for researchers trying to obtain novel GM crops resistant to climate change hazards.",2021-02-05 +33095866,VARAdb: a comprehensive variation annotation database for human.,"With the study of human diseases and biological processes increasing, a large number of non-coding variants have been identified and facilitated. The rapid accumulation of genetic and epigenomic information has resulted in an urgent need to collect and process data to explore the regulation of non-coding variants. Here, we developed a comprehensive variation annotation database for human (VARAdb, http://www.licpathway.net/VARAdb/), which specifically considers non-coding variants. VARAdb provides annotation information for 577,283,813 variations and novel variants, prioritizes variations based on scores using nine annotation categories, and supports pathway downstream analysis. Importantly, VARAdb integrates a large amount of genetic and epigenomic data into five annotation sections, which include 'Variation information', 'Regulatory information', 'Related genes', 'Chromatin accessibility' and 'Chromatin interaction'. The detailed annotation information consists of motif changes, risk SNPs, LD SNPs, eQTLs, clinical variant-drug-gene pairs, sequence conservation, somatic mutations, enhancers, super enhancers, promoters, transcription factors, chromatin states, histone modifications, chromatin accessibility regions and chromatin interactions. This database is a user-friendly interface to query, browse and visualize variations and related annotation information. VARAdb is a useful resource for selecting potential functional variations and interpreting their effects on human diseases and biological processes.",2021-01-01 +33068428,Comparative Toxicogenomics Database (CTD): update 2021.,"The public Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is an innovative digital ecosystem that relates toxicological information for chemicals, genes, phenotypes, diseases, and exposures to advance understanding about human health. Literature-based, manually curated interactions are integrated to create a knowledgebase that harmonizes cross-species heterogeneous data for chemical exposures and their biological repercussions. In this biennial update, we report a 20% increase in CTD curated content and now provide 45 million toxicogenomic relationships for over 16 300 chemicals, 51 300 genes, 5500 phenotypes, 7200 diseases and 163 000 exposure events, from 600 comparative species. Furthermore, we increase the functionality of chemical-phenotype content with new data-tabs on CTD Disease pages (to help fill in knowledge gaps for environmental health) and new phenotype search parameters (for Batch Query and Venn analysis tools). As well, we introduce new CTD Anatomy pages that allow users to uniquely explore and analyze chemical-phenotype interactions from an anatomical perspective. Finally, we have enhanced CTD Chemical pages with new literature-based chemical synonyms (to improve querying) and added 1600 amino acid-based compounds (to increase chemical landscape). Together, these updates continue to augment CTD as a powerful resource for generating testable hypotheses about the etiologies and molecular mechanisms underlying environmentally influenced diseases.",2021-01-01 +34840246,"Building Capacity Through ICCPR Cardiovascular Rehabilitation Foundations Certification (CRFC): EVALUATION OF REACH, BARRIERS, AND IMPACT.","

Purpose

The International Council of Cardiovascular Prevention and Rehabilitation (ICCPR) developed an online Cardiovascular Rehabilitation Foundations Certification (CRFC; https://globalcardiacrehab.com/Certification) in October 2017, to build cardiac rehabilitation (CR) delivery capacity in low-resource settings based on their guidelines. Herein we evaluate its reach globally, barriers to its completion, as well as satisfaction and impact of the course among those completing it.

Methods

The country of origin of all applicants was tallied. An online survey was developed for learners who completed the CRFC (completers), and for those who applied but did not yet complete the program (noncompleters), administered using Google Forms.

Results

With regard to reach, 236 applications were received from 23/203 (11%) countries in the world; 51 (22%) were from low- or middle-income countries. A total of 130 (55%) have completed the CRFC; mean scores on the final examination were 88.3 ± 7.1%, with no difference by country income classification (P= .052). Sixteen (22%) noncompleters and 37 (34%) completers responded to the survey. Barriers reported by noncompleters were time constraints, cost, and technical issues. Overall satisfaction (scale 1-5) with the CRFC was high (4.49 ± 0.51); most completers would highly recommend the CRFC to others (4.30 ± 0.66), and perceived that the information provided will contribute to their work and/or the care of their patients (4.38 ± 0.89); 29 (78%) had used the information from the CRFC in their practice.

Conclusions

The reach of the CRFC still needs to be broadened, in particular in low-resource settings. Learners are highly satisfied with the certification, and its impacts on CR practice are encouraging. Input has been implemented to improve the CRFC.",2021-11-24 +33952332,SANCDB: an update on South African natural compounds and their readily available analogs.,"

Background

South African Natural Compounds Database (SANCDB; https://sancdb.rubi.ru.ac.za/ ) is the sole and a fully referenced database of natural chemical compounds of South African biodiversity. It is freely available, and since its inception in 2015, the database has become an important resource to several studies. Its content has been: used as training data for machine learning models; incorporated to larger databases; and utilized in drug discovery studies for hit identifications.

Description

Here, we report the updated version of SANCDB. The new version includes 412 additional compounds that have been reported since 2015, giving a total of 1012 compounds in the database. Further, although natural products (NPs) are an important source of unique scaffolds, they have a major drawback due to their complex structure resulting in low synthetic feasibility in the laboratory. With this in mind, SANCDB is, now, updated to provide direct links to commercially available analogs from two major chemical databases namely Mcule and MolPort. To our knowledge, this feature is not available in other NP databases. Additionally, for easier access to information by users, the database and website interface were updated. The compounds are now downloadable in many different chemical formats.

Conclusions

The drug discovery process relies heavily on NPs due to their unique chemical organization. This has inspired the establishment of numerous NP chemical databases. With the emergence of newer chemoinformatic technologies, existing chemical databases require constant updates to facilitate information accessibility and integration by users. Besides increasing the NPs compound content, the updated SANCDB allows users to access the individual compounds (if available) or their analogs from commercial databases seamlessly.",2021-05-05 +33151298,GRNdb: decoding the gene regulatory networks in diverse human and mouse conditions.,"Gene regulatory networks (GRNs) formed by transcription factors (TFs) and their downstream target genes play essential roles in gene expression regulation. Moreover, GRNs can be dynamic changing across different conditions, which are crucial for understanding the underlying mechanisms of disease pathogenesis. However, no existing database provides comprehensive GRN information for various human and mouse normal tissues and diseases at the single-cell level. Based on the known TF-target relationships and the large-scale single-cell RNA-seq data collected from public databases as well as the bulk data of The Cancer Genome Atlas and the Genotype-Tissue Expression project, we systematically predicted the GRNs of 184 different physiological and pathological conditions of human and mouse involving >633 000 cells and >27 700 bulk samples. We further developed GRNdb, a freely accessible and user-friendly database (http://www.grndb.com/) for searching, comparing, browsing, visualizing, and downloading the predicted information of 77 746 GRNs, 19 687 841 TF-target pairs, and related binding motifs at single-cell/bulk resolution. GRNdb also allows users to explore the gene expression profile, correlations, and the associations between expression levels and the patient survival of diverse cancers. Overall, GRNdb provides a valuable and timely resource to the scientific community to elucidate the functions and mechanisms of gene expression regulation in various conditions.",2021-01-01 +,"A Genome Resource of Setosphaeriaturcica, Causal Agent of Northern Leaf Blight of Maize","The heterothallic ascomycete Setosphaeria turcica (anamorph Exserohilum turcicum) causes northern corn leaf blight, which results in devastating yield losses and a reduction in feed value. Although genome sequences of two model strains of the pathogen are available (https://mycocosm.jgi.doe.gov/mycocosm/home), previous drafts were assembled using short read technologies, making evolutionary and genetic linkage inferences difficult. Here, race 23N of S. turcica strain Et28A was sequenced again using Illumina HiSeq and PacBio Sequel technologies, and assembled to approximately 43,480,261 bp on 30 scaffolds. In all, 13,183 protein-coding genes were predicted, 13,142 of them were well annotated. This S. turcica genome resource is important for understanding the genetics behind pathogen evolution and infection mechanisms.",2020-12-01 +32833025,MNDR v3.0: mammal ncRNA-disease repository with increased coverage and annotation.,"Many studies have indicated that non-coding RNA (ncRNA) dysfunction is closely related to numerous diseases. Recently, accumulated ncRNA-disease associations have made related databases insufficient to meet the demands of biomedical research. The constant updating of ncRNA-disease resources has become essential. Here, we have updated the mammal ncRNA-disease repository (MNDR, http://www.rna-society.org/mndr/) to version 3.0, containing more than one million entries, four-fold increment in data compared to the previous version. Experimental and predicted circRNA-disease associations have been integrated, increasing the number of categories of ncRNAs to five, and the number of mammalian species to 11. Moreover, ncRNA-disease related drug annotations and associations, as well as ncRNA subcellular localizations and interactions, were added. In addition, three ncRNA-disease (miRNA/lncRNA/circRNA) prediction tools were provided, and the website was also optimized, making it more practical and user-friendly. In summary, MNDR v3.0 will be a valuable resource for the investigation of disease mechanisms and clinical treatment strategies.",2021-01-01 +33653882,A Mycobacterial Systems Resource for the Research Community. ,"Functional characterization of bacterial proteins lags far behind the identification of new protein families. This is especially true for bacterial species that are more difficult to grow and genetically manipulate than model systems such as Escherichia coli and Bacillus subtilis To facilitate functional characterization of mycobacterial proteins, we have established a Mycobacterial Systems Resource (MSR) using the model organism Mycobacterium smegmatis This resource focuses specifically on 1,153 highly conserved core genes that are common to many mycobacterial species, including Mycobacterium tuberculosis, in order to provide the most relevant information and resources for the mycobacterial research community. The MSR includes both biological and bioinformatic resources. The biological resource includes (i) an expression plasmid library of 1,116 genes fused to a fluorescent protein for determining protein localization; (ii) a library of 569 precise deletions of nonessential genes; and (iii) a set of 843 CRISPR-interference (CRISPRi) plasmids specifically targeted to silence expression of essential core genes and genes for which a precise deletion was not obtained. The bioinformatic resource includes information about individual genes and a detailed assessment of protein localization. We anticipate that integration of these initial functional analyses and the availability of the biological resource will facilitate studies of these core proteins in many Mycobacterium species, including the less experimentally tractable pathogens M. abscessus, M. avium, M. kansasii, M. leprae, M. marinum, M. tuberculosis, and M. ulceransIMPORTANCE Diseases caused by mycobacterial species result in millions of deaths per year globally, and present a substantial health and economic burden, especially in immunocompromised patients. Difficulties inherent in working with mycobacterial pathogens have hampered the development and application of high-throughput genetics that can inform genome annotations and subsequent functional assays. To facilitate mycobacterial research, we have created a biological and bioinformatic resource (https://msrdb.org/) using Mycobacterium smegmatis as a model organism. The resource focuses specifically on 1,153 proteins that are highly conserved across the mycobacterial genus and, therefore, likely perform conserved mycobacterial core functions. Thus, functional insights from the MSR will apply to all mycobacterial species. We believe that the availability of this mycobacterial systems resource will accelerate research throughout the mycobacterial research community.",2021-03-02 +31647099,"Pathway Commons 2019 Update: integration, analysis and exploration of pathway data.","Pathway Commons (https://www.pathwaycommons.org) is an integrated resource of publicly available information about biological pathways including biochemical reactions, assembly of biomolecular complexes, transport and catalysis events and physical interactions involving proteins, DNA, RNA, and small molecules (e.g. metabolites and drug compounds). Data is collected from multiple providers in standard formats, including the Biological Pathway Exchange (BioPAX) language and the Proteomics Standards Initiative Molecular Interactions format, and then integrated. Pathway Commons provides biologists with (i) tools to search this comprehensive resource, (ii) a download site offering integrated bulk sets of pathway data (e.g. tables of interactions and gene sets), (iii) reusable software libraries for working with pathway information in several programming languages (Java, R, Python and Javascript) and (iv) a web service for programmatically querying the entire dataset. Visualization of pathways is supported using the Systems Biological Graphical Notation (SBGN). Pathway Commons currently contains data from 22 databases with 4794 detailed human biochemical processes (i.e. pathways) and ∼2.3 million interactions. To enhance the usability of this large resource for end-users, we develop and maintain interactive web applications and training materials that enable pathway exploration and advanced analysis.",2020-01-01 +34901870,pdCSM-GPCR: predicting potent GPCR ligands with graph-based signatures.,"

Motivation

G protein-coupled receptors (GPCRs) can selectively bind to many types of ligands, ranging from light-sensitive compounds, ions, hormones, pheromones and neurotransmitters, modulating cell physiology. Considering their role in many essential cellular processes, they are one of the most targeted protein families, with over a third of all approved drugs modulating GPCR signalling. Despite this, the large diversity of receptors and their multipass transmembrane architectures make the identification and development of novel specific, and safe GPCR ligands a challenge. While computational approaches have the potential to assist GPCR drug development, they have presented limited performance and generalization capabilities. Here, we explored the use of graph-based signatures to develop pdCSM-GPCR, a method capable of rapidly and accurately screening potential GPCR ligands.

Results

Bioactivity data (IC50, EC50, Ki and Kd) for individual GPCRs were curated. After curation, we used the data for developing predictive models for 36 major GPCR targets, across 4 classes (A, B, C and F). Our models compose the most comprehensive computational resource for GPCR bioactivity prediction to date. Across stratified 10-fold cross-validation and blind tests, our approach achieved Pearson's correlations of up to 0.89, significantly outperforming previous methods. Interpreting our results, we identified common important features of potent GPCRs ligands, which tend to have bicyclic rings, leading to higher levels of aromaticity. We believe pdCSM-GPCR will be an invaluable tool to assist screening efforts, enriching compound libraries and ranking candidates for further experimental validation.

Availability and implementation

pdCSM-GPCR predictive models and datasets used have been made available via a freely accessible and easy-to-use web server at http://biosig.unimelb.edu.au/pdcsm_gpcr/.

Supplementary information

Supplementary data are available at Bioinformatics Advances online.",2021-11-10 +33837660,Upregulation of ZHX2 predicts poor prognosis and is correlated with immune infiltration in gastric cancer.,"The transcriptional repressor zinc finger homeobox 2 (ZHX2) is reported to regulate tumor progression in several human cancers, although little is known about its role in gastric cancer (GC). In the present study, we investigated the expression of ZHX2 and its relationship with the clinicopathological characteristics and prognosis of GC patients, and we also examined the effect of ZHX2 overexpression in GC cell lines. We used UALCAN (http://ualcan.path.uab.edu) and the Tumor Immune Estimation Resource (http://cistrome.org/TIMER) to examine ZHX2 mRNA expression, and also used Kaplan-Meier Plotter (https://kmplot.com) to determine whether ZHX2 expression was related to GC prognosis. Expression of ZHX2 protein was detected using immunohistochemical staining assays. Cell proliferation was evaluated using a cell counting kit-8 and colony formation assays, whereas apoptosis was examined by flow cytometry. Wound healing and transwell assays were used to detect cell migration and invasion. We also performed Gene Set Enrichment Analysis (https://www.gsea-msigdb.org) and used The Cancer Genome Atlas database (https://www.genome.gov/Funded-Programs-Projects/Cancer-Genome-Atlas) to examine the correlation of ZHX2 with immune infiltration. We report that ZHX2 is highly expressed in GC tissues and is significantly associated with clinical characteristics. Upregulation of ZHX2 predicted poor prognosis in GC. Furthermore, ZHX2 overexpression can promote the proliferation, invasion and migration, but inhibit apoptosis, of GC cells. High expression of ZHX2 in GC is correlated with the presence of infiltrating immune cells, including B cells, CD4+ T cells, macrophages and dendritic cells. Our data suggest that high expression of ZHX2 in GC predicts poor prognosis. In addition, ZHX2 may promote malignant behaviors of GC cells, and immune infiltration might be related to the oncogenic role of ZHX2 in GC.",2021-05-24 +,"SylvanSeeds, a seed germination database for temperate deciduous forests","Seed traits have functional significance at all levels of plant ecology, but there is a lack of germination databases of wide geographical scope. This report presents SylvanSeeds (https://efernandezpascual.github.io/home/sylvanseeds.html), a first global database of germination records for an ecologically coherent unit: temperate broad‐leaved and mixed forests. Data were gathered with a systematic literature search. A list of frequent taxa of the study area was created using 14,963 vegetation relevés from the sPlot database. The list was searched in the Web of Science. In total, 6,791 references were screened, finding 555 articles from which data were extracted. SylvanSeeds includes 4,012 germination records of 334 species from 72 families (gymnosperms and angiosperms), collected in 46 countries between 1920 and 2017. It provides raw data for meta‐analysis: proportions of seeds germinated in laboratory experiments of scarification, stratification, light/darkness, and constant/alternating temperatures. SylvanSeeds is freely distributed as a csv file. A shiny web app is also presented, to make data accessible to the public. SylvanSeeds advances functional seed ecology and brings two innovations to plant science. First, the data‐gathering methodology can be extended to other biomes. Second, database and app can be a standard in further efforts to compile germination data.",2021-01-01 +33294866,Pancreatlas: Applying an Adaptable Framework to Map the Human Pancreas in Health and Disease.,"Human tissue phenotyping generates complex spatial information from numerous imaging modalities, yet images typically become static figures for publication, and original data and metadata are rarely available. While comprehensive image maps exist for some organs, most resources have limited support for multiplexed imaging or have non-intuitive user interfaces. Therefore, we built a Pancreatlas resource that integrates several technologies into a unique interface, allowing users to access richly annotated web pages, drill down to individual images, and deeply explore data online. The current version of Pancreatlas contains over 800 unique images acquired by whole-slide scanning, confocal microscopy, and imaging mass cytometry, and is available at https://www.pancreatlas.org. To create this human pancreas-specific biological imaging resource, we developed a React-based web application and Python-based application programming interface, collectively called Flexible Framework for Integrating and Navigating Data (FFIND), which can be adapted beyond Pancreatlas to meet countless imaging or other structured data-management needs.",2020-10-05 +33290552,The Gene Ontology resource: enriching a GOld mine.,"The Gene Ontology Consortium (GOC) provides the most comprehensive resource currently available for computable knowledge regarding the functions of genes and gene products. Here, we report the advances of the consortium over the past two years. The new GO-CAM annotation framework was notably improved, and we formalized the model with a computational schema to check and validate the rapidly increasing repository of 2838 GO-CAMs. In addition, we describe the impacts of several collaborations to refine GO and report a 10% increase in the number of GO annotations, a 25% increase in annotated gene products, and over 9,400 new scientific articles annotated. As the project matures, we continue our efforts to review older annotations in light of newer findings, and, to maintain consistency with other ontologies. As a result, 20 000 annotations derived from experimental data were reviewed, corresponding to 2.5% of experimental GO annotations. The website (http://geneontology.org) was redesigned for quick access to documentation, downloads and tools. To maintain an accurate resource and support traceability and reproducibility, we have made available a historical archive covering the past 15 years of GO data with a consistent format and file structure for both the ontology and annotations.",2021-01-01 +33275967,ncRNAVar: A Manually Curated Database for Identification of Noncoding RNA Variants Associated with Human Diseases.,"While variants of noncoding RNAs (ncRNAs) have been experimentally validated as a new class of biomarkers and drug targets, the discovery and interpretation of relationships between ncRNA variants and human diseases become important and challenging. Here we present ncRNAVar (http://www.liwzlab.cn/ncrnavar/), the first database that provides association data between validated ncRNA variants and human diseases through manual curation on 2650 publications and computational annotation. ncRNAVar contains 4565 associations between 711 human disease phenotypes and 3112 variants from 2597 ncRNAs. Each association was reviewed by professional curators, incorporated with valuable annotation and cross references, and designated with an association score by our refined score model. ncRNAVar offers web applications including association prioritization, network visualization, and relationship mapping. ncRNAVar, presenting a landscape of ncRNA variants in human diseases and a useful resource for subsequent software development, will improve our insight of relationships between ncRNA variants and human health.",2020-12-01 +,CTNI-30. A SNAPSHOT OF “REAL WORLD” CURRENT NEURO-ONCOLOGY PRACTICE IN TEN UK CENTRES AND RATIONALE FOR THE TESSA JOWELL BRAIN MATRIX (TJBM) PLATFORM STUDY,"Abstract

OVERVIEW

The TJBM Platform Study (https://www.birmingham.ac.uk/research/crctu/trials/brain-matrix) is a programme of work aimed at improving the knowledge of, and treatment for, glioma. We present the feasibility data collected from the initial ten UK centres.

METHOD

The UK TJBM centres completed a multi-disciplinary feasibility questionnaire to facilitate participation and collaboration across centres. Data were collected from hospital electronic board review records, clinic letters, operative and imaging notes, MDT or personal experience.

RESULTS

Work load: Between 2016-2018 service provision redistribution reflects a trend towards higher volume centres. Overall, glioma workload within ten TJBM centres has remained stable. Imaging: All TJBM centres have good access to imaging techniques and neuroradiology expertise, including relevant ‘advanced’ imaging. All have RANO capability, although not widely used clinically. Neurosurgery: All centres have access to 5 ALA, perform awake craniotomy for language assessment and motor/sensory mapping are typically performed asleep, with subtle variation in techniques. Pathology: Despite molecular analysis advances, current practice is limited to the evaluation of formalin embedded tissue by traditional morphology/ immunohistochemically staining, with limited targeted testing of specific genetic changes. Clinical oncology: Oncology treatments for glioma were as per NICE guidance with some minor local variation. A relative lack of linking treatments to detailed clinical, treatment, toxicity, and quality of life data making communication of significance of findings to patients challenging.

CONCLUSION

Through systematic real-world data collection the TJBM platform study will provide a detailed understanding of practice within the UK, linked to molecular tumour genotype, treatment response outcome measures, and regular quality of life assessments. This infrastructure will help establish a trial-competent network for future collaborative research. Academic and industry partners will be able to use the TJBM platform through collaboration, overseen by a strong governance framework. This will maximise the opportunities and abilities to translate advances into trials and patient benefit.",2021-11-01 +34000296,Generation of an isoform-level transcriptome atlas of macrophage activation.,"RNA-seq is routinely used to measure gene expression changes in response to cell perturbation. Genes upregulated or downregulated following some perturbation are designated as genes of interest, and their most expressed isoform(s) would then be selected for follow-up experimentation. However, because of its need to fragment RNA molecules, RNA-seq is limited in its ability to capture gene isoforms and their expression patterns. This lack of isoform-specific data means that isoforms would be selected based on annotation databases that are incomplete, not tissue specific, or do not provide key information on expression levels. As a result, minority or nonexistent isoforms might be selected for follow-up, leading to loss in valuable resources and time. There is therefore a great need to comprehensively identify gene isoforms along with their corresponding levels of expression. Using the long-read nanopore-based R2C2 method, which does not fragment RNA molecules, we generated an Isoform-level transcriptome Atlas of Macrophage Activation that identifies full-length isoforms in primary human monocyte-derived macrophages. Macrophages are critical innate immune cells important for recognizing pathogens through binding of pathogen-associated molecular patterns to toll-like receptors, culminating in the initiation of host defense pathways. We characterized isoforms for most moderately-to-highly expressed genes in resting and toll-like receptor-activated monocyte-derived macrophages, identified isoforms differentially expressed between conditions, and validated these isoforms by RT-qPCR. We compiled these data into a user-friendly data portal within the UCSC Genome Browser (https://genome.ucsc.edu/s/vollmers/IAMA). Our atlas represents a valuable resource for innate immune research, providing unprecedented isoform information for primary human macrophages.",2021-01-01 +35059555,Analysis of 180 Genetic Variants in a New Interactive FX Variant Database Reveals Novel Insights into FX Deficiency.,"Coagulation factor X (FX), often termed as Stuart-Prower factor, is a plasma glycoprotein composed of the γ-carboxyglutamic acid (GLA) domain, two epidermal growth factor domains (EGF-1 and EGF-2), and the serine protease (SP) domain. FX plays a pivotal role in the coagulation cascade, activating thrombin to promote platelet plug formation and prevent excess blood loss. Genetic variants in FX disrupt coagulation and lead to FX or Stuart-Prower factor deficiency. To better understand the relationship between FX deficiency and disease severity, an interactive FX variant database has been set up at https://www.factorx-db.org , based on earlier web sites for the factor-XI and -IX coagulation proteins. To date (April 2021), we report 427 case reports on FX deficiency corresponding to 180 distinct F10 genetic variants. Of these, 149 are point variants (of which 128 are missense), 22 are deletions, 3 are insertions, and 6 are polymorphisms. FX variants are phenotypically classified as being type I or II. Type-I variants involve the simultaneous reduction of FX coagulant activity (FX:C) and FX antigen levels (FX:Ag), whereas type-II variants involve a reduction in FX:C with normal FX:Ag plasma levels. Both types of variants were distributed throughout the FXa protein structure. Analyses based on residue surface accessibilities showed the most damaging variants to occur at residues with low accessibilities. The interactive FX web database provides a novel easy-to-use resource for clinicians and scientists to improve the understanding of FX deficiency. Guidelines are provided for clinicians who wish to use the database for diagnostic purposes.",2021-10-01 +35059554,Analysis of 272 Genetic Variants in the Upgraded Interactive FXI Web Database Reveals New Insights into FXI Deficiency.,"Coagulation Factor XI (FXI) is a plasma glycoprotein composed of four apple (Ap) domains and a serine protease (SP) domain. FXI circulates as a dimer and activates Factor IX (FIX), promoting thrombin production and preventing excess blood loss. Genetic variants that degrade FXI structure and function often lead to bleeding diatheses, commonly termed FXI deficiency. The first interactive FXI variant database underwent initial development in 2003 at https://www.factorxi.org . Here, based on a much improved FXI crystal structure, the upgraded FXI database contains information regarding 272 FXI variants (including 154 missense variants) found in 657 patients, this being a significant increase from the 183 variants identified in the 2009 update. Type I variants involve the simultaneous reduction of FXI coagulant activity (FXI:C) and FXI antigen levels (FXI:Ag), whereas Type II variants result in decreased FXI:C yet normal FXI:Ag. The database updates now highlight the predominance of Type I variants in FXI. Analysis in terms of a consensus Ap domain revealed the near-uniform distribution of 81 missense variants across the Ap domains. A further 66 missense variants were identified in the SP domain, showing that all regions of the FXI protein were important for function. The variants clarified the critical importance of changes in surface solvent accessibility, as well as those of cysteine residues and the dimer interface. Guidelines are provided below for clinicians who wish to use the database for diagnostic purposes. In conclusion, the updated database provides an easy-to-use web resource on FXI deficiency for clinicians.",2021-10-01 +,PSMD: An extensive database for pan‐species microsatellite investigation and marker development,"Microsatellites are widely distributed throughout nearly all genomes which have been extensively exploited as powerful genetic markers for diverse applications due to their high polymorphisms. Their length variations are involved in gene regulation and implicated in numerous genetic diseases even in cancers. Although much effort has been devoted in microsatellite database construction, the existing microsatellite databases still had some drawbacks, such as limited number of species, unfriendly export format, missing marker development, lack of compound microsatellites and absence of gene annotation, which seriously restricted researchers to perform downstream analysis. In order to overcome the above limitations, we developed PSMD (Pan‐Species Microsatellite Database, http://big.cdu.edu.cn/psmd/) as a web‐based database to facilitate researchers to easily identify microsatellites, exploit reliable molecular markers and compare microsatellite distribution pattern on genome‐wide scale. In current release, PSMD comprises 678,106,741 perfect microsatellites and 43,848,943 compound microsatellites from 18,408 organisms, which covered almost all species with available genomic data. In addition to interactive browse interface, PSMD also offers a flexible filter function for users to quickly gain desired microsatellites from large data sets. PSMD allows users to export GFF3 formatted file and CSV formatted statistical file for downstream analysis. We also implemented an online tool for analysing occurrence of microsatellites with user‐defined parameters. Furthermore, Primer3 was embedded to help users to design high‐quality primers with customizable settings. To our knowledge, PSMD is the most extensive resource which is likely to be adopted by scientists engaged in biological, medical, environmental and agricultural research.",2020-01-01 +33439542,"Human disease genes website series: An international, open and dynamic library for up-to-date clinical information.","Since the introduction of next-generation sequencing, an increasing number of disorders have been discovered to have genetic etiology. To address diverse clinical questions and coordinate research activities that arise with the identification of these rare disorders, we developed the Human Disease Genes website series (HDG website series): an international digital library that records detailed information on the clinical phenotype of novel genetic variants in the human genome (https://humandiseasegenes.info/). Each gene website is moderated by a dedicated team of clinicians and researchers, focused on specific genes, and provides up-to-date-including unpublished-clinical information. The HDG website series is expanding rapidly with 424 genes currently adopted by 325 moderators from across the globe. On average, a gene website has detailed phenotypic information of 14.4 patients. There are multiple examples of added value, one being the ARID1B gene website, which was recently utilized in research to collect clinical information of 81 new patients. Additionally, several gene websites have more data available than currently published in the literature. In conclusion, the HDG website series provides an easily accessible, open and up-to-date clinical data resource for patients with pathogenic variants of individual genes. This is a valuable resource not only for clinicians dealing with rare genetic disorders such as developmental delay and autism, but other professionals working in diagnostics and basic research. Since the HDG website series is a dynamic platform, its data also include the phenotype of yet unpublished patients curated by professionals providing higher quality clinical detail to improve management of these rare disorders.",2021-01-13 +34214659,An inferred functional impact map of genetic variants in rice.,"Interpreting the functional impacts of genetic variants (GVs) is an important challenge for functional genomic studies in crops and next-generation breeding. Previous studies in rice (Oryza sativa) have focused mainly on the identification of GVs, whereas systematic functional annotation of GVs has not yet been performed. Here, we present a functional impact map of GVs in rice. We curated haplotype information for 17 397 026 GVs from sequencing data of 4726 rice accessions. We quantitatively evaluated the effects of missense mutations in coding regions in each haplotype based on the conservation of amino acid residues and obtained the effects of 918 848 non-redundant missense GVs. Furthermore, we generated high-quality chromatin accessibility (CA) data from six representative rice tissues and used these data to train deep convolutional neural network models to predict the impacts of 5 067 405 GVs for CA in regulatory regions. We characterized the functional properties and tissue specificity of the GV effects and found that large-effect GVs in coding and regulatory regions may be subject to selection in different directions. Finally, we demonstrated how the functional impact map could be used to prioritize causal variants in mapping populations. This impact map will be a useful resource for accelerating gene cloning and functional studies in rice, and can be freely queried in RiceVarMap V2.0 (http://ricevarmap.ncpgr.cn).",2021-06-29 +,"Prairie and tree planting tool—PT2 (1.0): a conservation decision support tool for Iowa, USA","This article overviews the prairie and tree planting tool or PT2 (1.0), an online GIS-based decision support tool for landowners interested in exploring opportunities to plant prairie or trees in and around their farm fields for conservation or production purposes. PT2 1.0 can be found online at: https://pt2.nrem.iastate.edu/. With the PT2 (1.0) users locate farm fields of interest in an online aerial photograph and mapping geographic information system (GIS). Users explore areas they are considering for prairie or tree cover by examining different data layers: soil maps, 2-foot contour topography maps, LiDAR hillshade maps, and a map of current land values based on estimated land rent. Users then utilize scaled dimensional drawing tools to measure and delineate areas of interest for planting trees and or prairie. Once an area is delineated, users select from drop-down menus prairie seed mixes or woody species that are suitable for the soils present, and users can select basic long-term management options. PT2 (1.0) estimates total annualized costs for tree or prairie establishment, long-term management, and opportunity costs (based on area weighted expected soil rent), and factors in the potential benefit of utilizing government cost-share programming, e.g., Environmental Quality Incentive Program or the Conservation Reserve Program. Key data layers are currently functional in Iowa, likewise the financial data underlying the cost analysis are specific to Iowa. PT2 (1.0) is, however, open source and open code and guidance is provided regarding how to access and adapt the data for use in other states.

Supplementary Information

The online version contains supplementary material available at 10.1007/s10457-021-00686-8.",2021-10-12 +33106848,"RNAcentral 2021: secondary structure integration, improved sequence search and new member databases.","RNAcentral is a comprehensive database of non-coding RNA (ncRNA) sequences that provides a single access point to 44 RNA resources and >18 million ncRNA sequences from a wide range of organisms and RNA types. RNAcentral now also includes secondary (2D) structure information for >13 million sequences, making RNAcentral the world's largest RNA 2D structure database. The 2D diagrams are displayed using R2DT, a new 2D structure visualization method that uses consistent, reproducible and recognizable layouts for related RNAs. The sequence similarity search has been updated with a faster interface featuring facets for filtering search results by RNA type, organism, source database or any keyword. This sequence search tool is available as a reusable web component, and has been integrated into several RNAcentral member databases, including Rfam, miRBase and snoDB. To allow for a more fine-grained assignment of RNA types and subtypes, all RNAcentral sequences have been annotated with Sequence Ontology terms. The RNAcentral database continues to grow and provide a central data resource for the RNA community. RNAcentral is freely available at https://rnacentral.org.",2021-01-01 +33193550,MaizeMine: A Data Mining Warehouse for the Maize Genetics and Genomics Database.,"MaizeMine is the data mining resource of the Maize Genetics and Genome Database (MaizeGDB; http://maizemine.maizegdb.org). It enables researchers to create and export customized annotation datasets that can be merged with their own research data for use in downstream analyses. MaizeMine uses the InterMine data warehousing system to integrate genomic sequences and gene annotations from the Zea mays B73 RefGen_v3 and B73 RefGen_v4 genome assemblies, Gene Ontology annotations, single nucleotide polymorphisms, protein annotations, homologs, pathways, and precomputed gene expression levels based on RNA-seq data from the Z. mays B73 Gene Expression Atlas. MaizeMine also provides database cross references between genes of alternative gene sets from Gramene and NCBI RefSeq. MaizeMine includes several search tools, including a keyword search, built-in template queries with intuitive search menus, and a QueryBuilder tool for creating custom queries. The Genomic Regions search tool executes queries based on lists of genome coordinates, and supports both the B73 RefGen_v3 and B73 RefGen_v4 assemblies. The List tool allows you to upload identifiers to create custom lists, perform set operations such as unions and intersections, and execute template queries with lists. When used with gene identifiers, the List tool automatically provides gene set enrichment for Gene Ontology (GO) and pathways, with a choice of statistical parameters and background gene sets. With the ability to save query outputs as lists that can be input to new queries, MaizeMine provides limitless possibilities for data integration and meta-analysis.",2020-10-22 +33382886,Creating a Metabolic Syndrome Research Resource using the National Health and Nutrition Examination Survey. ,"Metabolic syndrome (MetS) is multifaceted. Risk factors include visceral adiposity, dyslipidemia, hyperglycemia, hypertension and environmental stimuli. MetS leads to an increased risk of cardiovascular disease, type 2 diabetes and stroke. Comparative studies, however, have identified heterogeneity in the pathology of MetS across groups though the etiology of these differences has yet to be elucidated. The Metabolic Syndrome Research Resource (MetSRR) described in this report is a curated database that provides access to MetS-associated biological and ancillary data and pools current and potential biomarkers of MetS extracted from relevant National Health and Nutrition Examination Survey (NHANES) data from 1999-2016. Each potential biomarker was selected following the review of over 100 peer-reviewed articles. MetSRR includes 28 demographics, survey and known MetS-related variables, including 9 curated categorical variables and 42 potentially novel biomarkers. All measures are captured from over 90 000 individuals. This biocuration effort provides increased access to curated MetS-related data and will serve as a hypothesis-generating tool to aid in novel biomarker discovery. In addition, MetSRR provides the ability to generate and export ethnic group-/race-, sex- and age-specific curated datasets, thus broadening participation in research efforts to identify clinically evaluative MetS biomarkers for disparate populations. Although there are other databases, such as BioM2MetDisease, designed to explore metabolic diseases through analysis of miRNAs and disease phenotypes, MetSRR is the only MetS-specific database designed to explore etiology of MetS across groups, through the biocuration of demographic, biological samples and biometric data. Database URL:  http://www.healthdisparityinformatics.com/MetSRR.",2020-12-01 +31799597,RNAactDrug: a comprehensive database of RNAs associated with drug sensitivity from multi-omics data.,"Drug sensitivity has always been at the core of individualized cancer chemotherapy. However, we have been overwhelmed by large-scale pharmacogenomic data in the era of next-generation sequencing technology, which makes it increasingly challenging for researchers, especially those without bioinformatic experience, to perform data integration, exploration and analysis. To bridge this gap, we developed RNAactDrug, a comprehensive database of RNAs associated with drug sensitivity from multi-omics data, which allows users to explore drug sensitivity and RNA molecule associations directly. It provides association data between drug sensitivity and RNA molecules including mRNAs, long non-coding RNAs (lncRNAs) and microRNAs (miRNAs) at four molecular levels (expression, copy number variation, mutation and methylation) from integrated analysis of three large-scale pharmacogenomic databases (GDSC, CellMiner and CCLE). RNAactDrug currently stores more than 4 924 200 associations of RNA molecules and drug sensitivity at four molecular levels covering more than 19 770 mRNAs, 11 119 lncRNAs, 438 miRNAs and 4155 drugs. A user-friendly interface enriched with various browsing sections augmented with advance search facility for querying the database is offered for users retrieving. RNAactDrug provides a comprehensive resource for RNA molecules acting in drug sensitivity, and it could be used to prioritize drug sensitivity-related RNA molecules, further promoting the identification of clinically actionable biomarkers in drug sensitivity and drug development more cost-efficiently by making this knowledge accessible to both basic researchers and clinical practitioners. Database URL: http://bio-bigdata.hrbmu.edu.cn/RNAactDrug.",2020-12-01 +34361108,alfaNET: A Database of Alfalfa-Bacterial Stem Blight Protein-Protein Interactions Revealing the Molecular Features of the Disease-causing Bacteria. ,"Alfalfa has emerged as one of the most important forage crops, owing to its wide adaptation and high biomass production worldwide. In the last decade, the emergence of bacterial stem blight (caused by Pseudomonas syringae pv. syringae ALF3) in alfalfa has caused around 50% yield losses in the United States. Studies are being conducted to decipher the roles of the key genes and pathways regulating the disease, but due to the sparse knowledge about the infection mechanisms of Pseudomonas, the development of resistant cultivars is hampered. The database alfaNET is an attempt to assist researchers by providing comprehensive Pseudomonas proteome annotations, as well as a host-pathogen interactome tool, which predicts the interactions between host and pathogen based on orthology. alfaNET is a user-friendly and efficient tool and includes other features such as subcellular localization annotations of pathogen proteins, gene ontology (GO) annotations, network visualization, and effector protein prediction. Users can also browse and search the database using particular keywords or proteins with a specific length. Additionally, the BLAST search tool enables the user to perform a homology sequence search against the alfalfa and Pseudomonas proteomes. With the successful implementation of these attributes, alfaNET will be a beneficial resource to the research community engaged in implementing molecular strategies to mitigate the disease. alfaNET is freely available for public use at http://bioinfo.usu.edu/alfanet/.",2021-08-03 +34414826,The Society of Toxicologic Pathology: Advances and Adventures in the First 50 Years.,"The Society of Toxicologic Pathology (STP, https://www.toxpath.org/) was founded in North America in 1971 as a nonprofit scientific and educational association to promote the professional practice of pathology as applied to pharmaceutical and environmental safety assessment. In the ensuing 50 years, the STP has become a principal global leader in the field. Society membership has expanded to include toxicologic pathologists and allied scientists (eg, toxicologists, regulatory reviewers) from many nations. In addition to serving membership needs for professional development and networking, major STP outreach activities include production of articles and presentations designed to optimize toxicologic pathology procedures (""best practice"" recommendations), communicate core principles of pathology evaluation and interpretation (""points to consider"" and ""opinion"" pieces), and participation in international efforts to harmonize diagnostic nomenclature. The STP has evolved into an essential resource for academic, government, and industrial organizations that employ and educate toxicologic pathologists as well as use toxicologic pathology data across a range of applications from assessing product safety (therapies, foods, etc) to monitoring and maintaining environmental and occupational health. This article recapitulates the important milestones and accomplishments of the STP during its first 50 years.",2021-08-20 +32493955,WilsonGen a comprehensive clinically annotated genomic variant resource for Wilson's Disease.,"Wilson disease (WD) is one of the most prevalent genetic diseases with an estimated global carrier frequency of 1 in 90 and a prevalence of 1 in 30,000. The disease owes its genesis to Kinnier Wilson who described the disease, and is caused by accumulation of Copper (Cu) in various organs including the liver, central nervous system, cornea, kidney, joints and cardiac muscle which contribute to the characteristic clinical features of WD. A number of studies have reported genetic variants in the ATP7B gene from diverse ethnic and geographical origins. The recent advent of next-generation sequencing approaches has also enabled the discovery of a large number of novel variants in the gene associated with the disease. Previous attempts have been made to compile the knowledgebase and spectrum of genetic variants from across the multitude of publications, but have been limited by the utility due to the significant differences in approaches used to qualify pathogenicity of variants in each of the publications. The recent formulation of guidelines and algorithms for assessment of the pathogenicity of variants jointly put forward by the American College of Medical Genetics and the Association of Molecular Pathologists (ACMG &) has provided a framework for evidence based and systematic assessment of pathogenicity of variants. In this paper, we describe a comprehensive resource of genetic variants in ATP7B gene manually curated from literature and data resources and systematically annotated using the ACMG & AMP guidelines for assessing pathogenicity. The resource therefore serves as a central point for clinicians and geneticists working on WD and to the best of our knowledge is the most comprehensive and only clinically annotated resource for WD. The resource is available at URL http://clingen.igib.res.in/WilsonGen/. We compiled a total of 3662 genetic variants from publications and databases associated with WD. Of these variants compiled, a total of 1458 were found to be unique entries. This is the largest WD database comprising 656 pathogenic/likely pathogenic variants reported classified according to ACMG & AMP guidelines. We also mapped all the pathogenic variants corresponding to ATP7B protein from literature and other databases. In addition, geographical origin and distribution of ATP7B pathogenic variants reported are also mapped in the database.",2020-06-03 +32008039,EPSD: a well-annotated data resource of protein phosphorylation sites in eukaryotes.,"As an important post-translational modification (PTM), protein phosphorylation is involved in the regulation of almost all of biological processes in eukaryotes. Due to the rapid progress in mass spectrometry-based phosphoproteomics, a large number of phosphorylation sites (p-sites) have been characterized but remain to be curated. Here, we briefly summarized the current progresses in the development of data resources for the collection, curation, integration and annotation of p-sites in eukaryotic proteins. Also, we designed the eukaryotic phosphorylation site database (EPSD), which contained 1 616 804 experimentally identified p-sites in 209 326 phosphoproteins from 68 eukaryotic species. In EPSD, we not only collected 1 451 629 newly identified p-sites from high-throughput (HTP) phosphoproteomic studies, but also integrated known p-sites from 13 additional databases. Moreover, we carefully annotated the phosphoproteins and p-sites of eight model organisms by integrating the knowledge from 100 additional resources that covered 15 aspects, including phosphorylation regulator, genetic variation and mutation, functional annotation, structural annotation, physicochemical property, functional domain, disease-associated information, protein-protein interaction, drug-target relation, orthologous information, biological pathway, transcriptional regulator, mRNA expression, protein expression/proteomics and subcellular localization. We anticipate that the EPSD can serve as a useful resource for further analysis of eukaryotic phosphorylation. With a data volume of 14.1 GB, EPSD is free for all users at http://epsd.biocuckoo.cn/.",2021-01-01 +33219693,AtMAD: Arabidopsis thaliana multi-omics association database.,"Integration analysis of multi-omics data provides a comprehensive landscape for understanding biological systems and mechanisms. The abundance of high-quality multi-omics data (genomics, transcriptomics, methylomics and phenomics) for the model organism Arabidopsis thaliana enables scientists to study the genetic mechanism of many biological processes. However, no resource is available to provide comprehensive and systematic multi-omics associations for Arabidopsis. Here, we developed an Arabidopsis thaliana Multi-omics Association Database (AtMAD, http://www.megabionet.org/atmad), a public repository for large-scale measurements of associations between genome, transcriptome, methylome, pathway and phenotype in Arabidopsis, designed for facilitating identification of eQTL, emQTL, Pathway-mQTL, Phenotype-pathway, GWAS, TWAS and EWAS. Candidate variants/methylations/genes were identified in AtMAD for specific phenotypes or biological processes, many of them are supported by experimental evidence. Based on the multi-omics association strategy, we have identified 11 796 cis-eQTLs and 10 119 trans-eQTLs. Among them, 68 837 environment-eQTL associations and 149 622 GWAS-eQTL associations were identified and stored in AtMAD. For expression-methylation quantitative trait loci (emQTL), we identified 265 776 emQTLs and 122 344 pathway-mQTLs. For TWAS and EWAS, we obtained 62 754 significant phenotype-gene associations and 3 993 379 significant phenotype-methylation associations, respectively. Overall, the multi-omics associated network in AtMAD will provide new insights into exploring biological mechanisms of plants at multi-omics levels.",2021-01-01 +,Design of water quality monitoring system for aquaculture ponds based on NB-IoT,"In order to promote the development of aquaculture informatization and monitor aquaculture ponds more accurately and conveniently, this article has developed a water quality monitoring system for aquaculture ponds based on the narrow band internet of things (NB-IoT) technology. This system realizes remote collection and data storage of multi-sensor processor information (temperature, pH, dissolved oxygen (DO) and other environmental parameters), as well as intelligent control and centralized management of breeding ponds. The system uses STM32L151C8 microcontroller and sensor terminal real-time acquisition, such as temperature, pH value, dissolved oxygen. It realizes data aggregation and transmission over a long distance to the Internet of things (IoT) telecom cloud platform through the technology of NB-IoT. The software called Keil implement the data format design of wireless communication module and data transmission. Java is used to develop background monitoring applications for accessing cloud platform, controlling underlying devices and local data processing. It can not only send hypertext transfer protocol (HTTP) requests to monitor cloud platform data, but also issue commands to the underlying control module to control the startup and shutdown of equipment such as aerator. The system was implemented and tested in ChangZhou, JiangSu Province, China. The experimental results showed that the system can obtain water quality parameters in time. The temperature control accuracy is maintained at ±0.12℃, the average relative error is 0.15 %, the dissolved oxygen control accuracy is maintained within ±0.55mg/L, the average relative error is 2.48 %, the pH control accuracy is maintained at ±0.09, and the average relative error is 0.21 %. The system has stable overall operation, real-time and accurate data transmission, which can meet the actual production needs and provide strong data and technical support for further water quality regulation and aquaculture production management.",2021-08-01 +33095885,IndiGenomes: a comprehensive resource of genetic variants from over 1000 Indian genomes.,"With the advent of next-generation sequencing, large-scale initiatives for mining whole genomes and exomes have been employed to better understand global or population-level genetic architecture. India encompasses more than 17% of the world population with extensive genetic diversity, but is under-represented in the global sequencing datasets. This gave us the impetus to perform and analyze the whole genome sequencing of 1029 healthy Indian individuals under the pilot phase of the 'IndiGen' program. We generated a compendium of 55,898,122 single allelic genetic variants from geographically distinct Indian genomes and calculated the allele frequency, allele count, allele number, along with the number of heterozygous or homozygous individuals. In the present study, these variants were systematically annotated using publicly available population databases and can be accessed through a browsable online database named as 'IndiGenomes' http://clingen.igib.res.in/indigen/. The IndiGenomes database will help clinicians and researchers in exploring the genetic component underlying medical conditions. Till date, this is the most comprehensive genetic variant resource for the Indian population and is made freely available for academic utility. The resource has also been accessed extensively by the worldwide community since it's launch.",2021-01-01 +32941628,"IDDB: a comprehensive resource featuring genes, variants and characteristics associated with infertility.","Infertility is a complex multifactorial disease that affects up to 10% of couples across the world. However, many mechanisms of infertility remain unclear due to the lack of studies based on systematic knowledge, leading to ineffective treatment and/or transmission of genetic defects to offspring. Here, we developed an infertility disease database to provide a comprehensive resource featuring various factors involved in infertility. Features in the current IDDB version were manually curated as follows: (i) a total of 307 infertility-associated genes in human and 1348 genes associated with reproductive disorder in 9 model organisms; (ii) a total of 202 chromosomal abnormalities leading to human infertility, including aneuploidies and structural variants; and (iii) a total of 2078 pathogenic variants from infertility patients' samples across 60 different diseases causing infertility. Additionally, the characteristics of clinically diagnosed infertility patients (i.e. causative variants, laboratory indexes and clinical manifestations) were collected. To the best of our knowledge, the IDDB is the first infertility database serving as a systematic resource for biologists to decipher infertility mechanisms and for clinicians to achieve better diagnosis/treatment of patients from disease phenotype to genetic factors. The IDDB is freely available at http://mdl.shsmu.edu.cn/IDDB/.",2021-01-01 +,P14.18 Patient and Public Involvement to define patient-centred outcomes from National Cancer Datasets,"Abstract

BACKGROUND

GlioCova uses linked national cancer data on all 51 000 adult patients with a primary brain tumour in England (2013 - 2018) to understand patterns of care, treatment, and outcomes in patients with glioma (http://wwwf.imperial.ac.uk/blog/gliocova/). A key aim is the use of patient and carer input in defining patient-centered outcomes. We have held multiple Patient & Public Involvement (PPI) sessions with patients and carergivers and data analysts to understand what patient and caregivers want to know about brain tumours.

MATERIAL AND METHOD

We used a modified Delphi method. The online PPI sessions (Zoom) consisted of two presentations, open discussions, and Q&As. We made the sessions as interactive as possible by using Mentimeter and an interactive online white board (Explain Everything). Pre-reading material was circulated via email. Attendees (6–14 per session) covered a wide range of ages (30–75), diagnoses (GBM, recurrent gliomas, low grade gliomas, ependymoma); patients, caregivers, neuro-oncology staff, data analysts and basic scientists. Work was conducted in line with the INVOLVE PPI guidance.

RESULTS

We identified four questions that were of interest to patients and had correlates in the data: Potential symptoms experienced 3-months pre-diagnosis; Side effects, 3-months post-diagnosis; The survival following different treatments (i.e., surgery only, radiotherapy only); Demographics of patients who finished/ did not finish 6 cycles of temozolomide; Patients and caregivers were also interested in the impact of diet, quality of life, social life, and exercise. However, these data cannot be answered using the current national data.

CONCLUSION

Our PPI work has helped us to identify and prioritise questions to ask of the data. Ongoing PPI work will provide a wider perspective and identify knowledge gaps for future research. Patients and caregivers report feeling empowered, being part of a team, feeling like they had given something back and done something meaningful for the research community and other patients. Patients and caregivers also felt that they had an enriched understanding of the data that is collected. As this process is an iterative process, we will hold more PPI sessions to identify and prioritise topics to analyse.",2021-09-01 +33787284,Consensus Transcriptional Landscape of Human End-Stage Heart Failure.,"Background Transcriptomic studies have contributed to fundamental knowledge of myocardial remodeling in human heart failure (HF). However, the key HF genes reported are often inconsistent between studies, and systematic efforts to integrate evidence from multiple patient cohorts are lacking. Here, we aimed to provide a framework for comprehensive comparison and analysis of publicly available data sets resulting in an unbiased consensus transcriptional signature of human end-stage HF. Methods and Results We curated and uniformly processed 16 public transcriptomic studies of left ventricular samples from 263 healthy and 653 failing human hearts. First, we evaluated the degree of consistency between studies by using linear classifiers and overrepresentation analysis. Then, we meta-analyzed the deregulation of 14 041 genes to extract a consensus signature of HF. Finally, to functionally characterize this signature, we estimated the activities of 343 transcription factors, 14 signaling pathways, and 182 micro RNAs, as well as the enrichment of 5998 biological processes. Machine learning approaches revealed conserved disease patterns across all studies independent of technical differences. These consistent molecular changes were prioritized with a meta-analysis, functionally characterized and validated on external data. We provide all results in a free public resource (https://saezlab.shinyapps.io/reheat/) and exemplified usage by deciphering fetal gene reprogramming and tracing the potential myocardial origin of the plasma proteome markers in patients with HF. Conclusions Even though technical and sampling variability confound the identification of differentially expressed genes in individual studies, we demonstrated that coordinated molecular responses during end-stage HF are conserved. The presented resource is crucial to complement findings in independent studies and decipher fundamental changes in failing myocardium.",2021-03-31 +34896356,Real-World Outcomes in Cystic Fibrosis Telemedicine Clinical Care in a Time of a Global Pandemic.,"

Background

During the COVID-19 pandemic, the University of Virginia adult cystic fibrosis (CF) center transitioned from in-person clinical encounters to a model that included interdisciplinary telemedicine. The pandemic presented an unprecedented opportunity to assess the impact of the interdisciplinary telemedicine model on clinical CF outcomes.

Research question

What are the clinical outcomes of a care model that includes interdisciplinary telemedicine (IDC-TM) compared with in-person clinical care for patients with CF during the COVID-19 pandemic?

Study design and methods

Adults with CF were included. The prepandemic year was defined as March 17, 2019, through March 16, 2020, and the pandemic year (PY) was defined as March 17, 2020, through March 16, 2021. Patients were enrolled starting in the PY. Prepandemic data were gathered retrospectively. Telemedicine visits were defined as clinical encounters via secured video communication. Hybrid visits were in-person evaluations by physician, with in-clinic video communication by other team members. In-person visits were encounters with in-person providers only. All encounters included previsit screening. Outcomes were lung function, BMI, exacerbations, and antibiotic use. FEV1 percent predicted, exacerbations, and antibiotic use were adjusted for the effect of elexacaftor/tezacaftor/ivacaftor treatment.

Results

One hundred twenty-four patients participated. One hundred ten patients were analyzed (mean age, 35 years; range, 18-69 years). Ninety-five percent had access to telemedicine (n = 105). Telemedicine visits accounted for 64% of encounters (n = 260), hybrid visits with telemedicine support accounted for 28% of encounters (n = 114), and in-person visits accounted for 7% of encounters (n = 30). No difference in lung function or exacerbation rate during the PY was found. BMI increased from 25 to 26 kg/m2 (t100 = -4.72; P < .001). Antibiotic use decreased from 316 to 124 episodes (z = 8.81; P < .0001).

Interpretation

This CF care model, which includes IDC-TM, successfully monitored lung function and BMI, identified exacerbations, and followed guidelines-based care during the pandemic. A significant decrease in antibiotic use suggests that social mitigation strategies were protective.

Trial registry

ClinicalTrials.gov; No.: NCT04402801; URL: www.

Clinicaltrials

gov.",2021-12-10 +34534667,Network biology approach to human tissue-specific chemical exposome.,"Human exposure to environmental chemicals is a major contributor to the global disease burden. To characterize the external exposome it is important to assess its chemical components and to study their impact on human health. Biomonitoring studies measure the body burden of environmental chemicals detected in biospecimens from a wide range of the population. The detection of these chemicals in biospecimens (and, hence, human tissues) is considered an important biomarker of human exposure. However, there is no readily available resource that compiles such exposure data for human tissues from published literature, and no studies that explore the patterns in the associations between tissue-specific exposures and human diseases. We present Human Tissue-specific Exposome Atlas (TExAs), a compilation of 380 environmental chemicals detected across 27 human tissues. TExAs is accessible via a user friendly webserver: https://cb.imsc.res.in/texas. We compare the chemicals in TExAs with 55 global chemical regulations, guidelines, and inventories, which represent several categories of the external exposome of humans. Further to understand the potential implications on human health of chemicals detected across human tissues, we employ a network biology approach and explore possible chemical exposure-disease associations. Ensuing analyses reveal the possibilities of disease comorbidities and demonstrate the application of network biology in unraveling complex disease associations due to chemical exposure.",2021-09-15 +32632099,Database of literature derived cellular measurements from the murine basal ganglia.,"Quantitative measurements and descriptive statistics of different cellular elements in the brain are typically published in journal articles as text, tables, and example figures, and represent an important basis for the creation of biologically constrained computational models, design of intervention studies, and comparison of subject groups. Such data can be challenging to extract from publications and difficult to normalise and compare across studies, and few studies have so far attempted to integrate quantitative information available in journal articles. We here present a database of quantitative information about cellular parameters in the frequently studied murine basal ganglia. The database holds a curated and normalised selection of currently available data collected from the literature and public repositories, providing the most comprehensive collection of quantitative neuroanatomical data from the basal ganglia to date. The database is shared as a downloadable resource from the EBRAINS Knowledge Graph (https://kg.ebrains.eu), together with a workflow that allows interested researchers to update and expand the database with data from future reports.",2020-07-06 +33103271,The NanDeSyn database for Nannochloropsis systems and synthetic biology.,"Nannochloropsis species, unicellular industrial oleaginous microalgae, are model organisms for microalgal systems and synthetic biology. To facilitate community-based annotation and mining of the rapidly accumulating functional genomics resources, we have initiated an international consortium and present a comprehensive multi-omics resource database named Nannochloropsis Design and Synthesis (NanDeSyn; http://nandesyn.single-cell.cn). Via the Tripal toolkit, it features user-friendly interfaces hosting genomic resources with gene annotations and transcriptomic and proteomic data for six Nannochloropsis species, including two updated genomes of Nannochloropsis oceanica IMET1 and Nannochloropsis salina CCMP1776. Toolboxes for search, Blast, synteny view, enrichment analysis, metabolic pathway analysis, a genome browser, etc. are also included. In addition, functional validation of genes is indicated based on phenotypes of mutants and relevant bibliography. Furthermore, epigenomic resources are also incorporated, especially for sequencing of small RNAs including microRNAs and circular RNAs. Such comprehensive and integrated landscapes of Nannochloropsis genomics and epigenomics will promote and accelerate community efforts in systems and synthetic biology of these industrially important microalgae.",2020-11-27 +33313674,"ColorCells: a database of expression, classification and functions of lncRNAs in single cells. ","Although long noncoding RNAs (lncRNAs) have significant tissue specificity, their expression and variability in single cells remain unclear. Here, we developed ColorCells (http://rna.sysu.edu.cn/colorcells/), a resource for comparative analysis of lncRNAs expression, classification and functions in single-cell RNA-Seq data. ColorCells was applied to 167 913 publicly available scRNA-Seq datasets from six species, and identified a batch of cell-specific lncRNAs. These lncRNAs show surprising levels of expression variability between different cell clusters, and has the comparable cell classification ability as known marker genes. Cell-specific lncRNAs have been identified and further validated by in vitro experiments. We found that lncRNAs are typically co-expressed with the mRNAs in the same cell cluster, which can be used to uncover lncRNAs' functions. Our study emphasizes the need to uncover lncRNAs in all cell types and shows the power of lncRNAs as novel marker genes at single cell resolution.",2021-07-01 +34019656,Amino Acid Interactions (INTAA) web server v2.0: a single service for computation of energetics and conservation in biomolecular 3D structures.,"Interactions among amino acid residues are the principal contributor to the stability of the three-dimensional structure of a protein. The Amino Acid Interactions (INTAA) web server (https://bioinfo.uochb.cas.cz/INTAA/) has established itself as a unique computational resource, which enables users to calculate the contribution of individual residues in a biomolecular structure to its total energy using a molecular mechanical scoring function. In this update, we describe major additions to the web server which help solidify its position as a robust, comprehensive resource for biomolecular structure analysis. Importantly, a new continuum solvation model was introduced, allowing more accurate representation of electrostatic interactions in aqueous media. In addition, a low-overhead pipeline for the estimation of evolutionary conservation in protein chains has been added. New visualization options were introduced as well, allowing users to easily switch between and interrelate the energetic and evolutionary views of the investigated structures.",2021-07-01 +32890396,CoV3D: a database of high resolution coronavirus protein structures.,"SARS-CoV-2, the etiologic agent of COVID-19, exemplifies the general threat to global health posed by coronaviruses. The urgent need for effective vaccines and therapies is leading to a rapid rise in the number of high resolution structures of SARS-CoV-2 proteins that collectively reveal a map of virus vulnerabilities. To assist structure-based design of vaccines and therapeutics against SARS-CoV-2 and other coronaviruses, we have developed CoV3D, a database and resource for coronavirus protein structures, which is updated on a weekly basis. CoV3D provides users with comprehensive sets of structures of coronavirus proteins and their complexes with antibodies, receptors, and small molecules. Integrated molecular viewers allow users to visualize structures of the spike glycoprotein, which is the major target of neutralizing antibodies and vaccine design efforts, as well as sets of spike-antibody complexes, spike sequence variability, and known polymorphisms. In order to aid structure-based design and analysis of the spike glycoprotein, CoV3D permits visualization and download of spike structures with modeled N-glycosylation at known glycan sites, and contains structure-based classification of spike conformations, generated by unsupervised clustering. CoV3D can serve the research community as a centralized reference and resource for spike and other coronavirus protein structures, and is available at: https://cov3d.ibbr.umd.edu.",2021-01-01 +32111231,NoncoRNA: a database of experimentally supported non-coding RNAs and drug targets in cancer.,"NoncoRNA (http://www.ncdtcdb.cn:8080/NoncoRNA/) is a manually curated database of experimentally supported non-coding RNAs (ncRNAs) and drug target associations that aim to potentially provide a high-quality data resource for exploring drug sensitivity/resistance-related ncRNAs in various human cancers. ncRNA are RNA molecular that do not encode proteins, but are involved in gene regulation and cellular functions in variety of human diseases, including neurodegenerative diseases and cancers. Here, we developed NoncoRNA which contained 8233 entries between 5568 ncRNAs and 154 drugs in 134 cancers. Each entry in the NoncoRNA contains detailed information on the ncRNAs, drugs, and cancers, the ncRNA expression pattern and experimental detection techniques, drug response and other targets, literature references, and other information. NoncoRNA offers a user-friendly, open access web interface to easily browse, search, and download data. NoncoRNA also provides a submission page for researchers to submit newly validated ncRNA-drug-cancer associations. NoncoRNA might serve as an immeasurable resource for understanding the roles of ncRNAs in cancer therapy.",2020-02-28 +34349127,"MiREDiBase, a manually curated database of validated and putative editing events in microRNAs.","MicroRNAs (miRNAs) are regulatory small non-coding RNAs that function as translational repressors. MiRNAs are involved in most cellular processes, and their expression and function are presided by several factors. Amongst, miRNA editing is an epitranscriptional modification that alters the original nucleotide sequence of selected miRNAs, possibly influencing their biogenesis and target-binding ability. A-to-I and C-to-U RNA editing are recognized as the canonical types, with the A-to-I type being the predominant one. Albeit some bioinformatics resources have been implemented to collect RNA editing data, it still lacks a comprehensive resource explicitly dedicated to miRNA editing. Here, we present MiREDiBase, a manually curated catalog of editing events in miRNAs. The current version includes 3,059 unique validated and putative editing sites from 626 pre-miRNAs in humans and three primates. Editing events in mature human miRNAs are supplied with miRNA-target predictions and enrichment analysis, while minimum free energy structures are inferred for edited pre-miRNAs. MiREDiBase represents a valuable tool for cell biology and biomedical research and will be continuously updated and expanded at https://ncrnaome.osumc.edu/miredibase .",2021-08-04 +32444277,A systematic review of guidelines for lymphedema and the need for contemporary intersocietal guidelines for the management of lymphedema.,"

Objective

Lymphedema (LED) affects an estimated 35 million patients in the United States and a staggering 140,200 million people worldwide, yet LED is the forgotten vascular disease. Whereas the diagnosis and treatment of arterial and venous diseases have been strengthened by the development of clinical practice guidelines (CPGs), few CPGs are available for LED. Moreover, for CPGs to have their greatest impact, they should be both of high quality and developed using the most rigorous evidence-based methods. We performed a systematic review of the available CPGs for LED, which were assessed for breadth of content and methodologic strength.

Methods

A literature search was conducted from National Guideline Clearinghouse (www.

Guidelines

gov), BMJ Clinical Evidence (http://clinicalevidence.bmj.com), and National Institute for Health and Care Excellence (http://www.nice.org.uk) as well as from MEDLINE and Google, which selected 245 documents. After a horizon scan that identified 13 potential CPGs, 4 satisfied the criteria for LED. These were analyzed for inclusion of key elements of diagnosis and treatment.

Results

A horizon scan (abstract review) of the 245 documents identified 10 potential CPGs. Of the 10 documents, 6 claimed to be CPGs, but 2 were limited in scope (rehabilitation or compression only), 2 were consensus statements, 1 was a position statement, and 1 was a systematic review. This process yielded four CPGs: Lymphedema Framework Best Practice for the Management of Lymphedema; Japanese Lymphedema Study Group-A Practice Guideline for the Management of Lymphedema; Clinical Resource Efficiency Support Team Guidelines for the Diagnosis, Assessment and Management of Lymphedema; and Guidelines of the American Venous Forum. Only one of four CPGs was based on a contemporary systematic review (2016 end date of references), whereas the remainder had older systematic reviews (end dates of 2005, 2007, and 2007). Several areas of contemporary diagnosis, treatment, and monitoring of LED were absent.

Conclusions

This systematic review of available LED CPGs demonstrates a limited number of guidelines. The four CPGs identified lack contemporary references while demonstrating low overall study quality. Therefore, it is imperative for our vascular societies to develop contemporary high-quality evidence-based CPGs for LED, as they have for other vascular diseases.",2020-05-20 +33515030,HVIDB: a comprehensive database for human-virus protein-protein interactions.,"While leading to millions of people's deaths every year the treatment of viral infectious diseases remains a huge public health challenge.Therefore, an in-depth understanding of human-virus protein-protein interactions (PPIs) as the molecular interface between a virus and its host cell is of paramount importance to obtain new insights into the pathogenesis of viral infections and development of antiviral therapeutic treatments. However, current human-virus PPI database resources are incomplete, lack annotation and usually do not provide the opportunity to computationally predict human-virus PPIs. Here, we present the Human-Virus Interaction DataBase (HVIDB, http://zzdlab.com/hvidb/) that provides comprehensively annotated human-virus PPI data as well as seamlessly integrates online PPI prediction tools. Currently, HVIDB highlights 48 643 experimentally verified human-virus PPIs covering 35 virus families, 6633 virally targeted host complexes, 3572 host dependency/restriction factors as well as 911 experimentally verified/predicted 3D complex structures of human-virus PPIs. Furthermore, our database resource provides tissue-specific expression profiles of 6790 human genes that are targeted by viruses and 129 Gene Expression Omnibus series of differentially expressed genes post-viral infections. Based on these multifaceted and annotated data, our database allows the users to easily obtain reliable information about PPIs of various human viruses and conduct an in-depth analysis of their inherent biological significance. In particular, HVIDB also integrates well-performing machine learning models to predict interactions between the human host and viral proteins that are based on (i) sequence embedding techniques, (ii) interolog mapping and (iii) domain-domain interaction inference. We anticipate that HVIDB will serve as a one-stop knowledge base to further guide hypothesis-driven experimental efforts to investigate human-virus relationships.",2021-03-01 +,333 Animal –GRIN a platform for animal genetic information,"Abstract An information system, Animal-GRIN, has been constructed as part of the U.S., Brazilian, and Canadian livestock genetic resource programs. It is designed to provide information to gene bank managers, the research community, and livestock producers about livestock breeds and subpopulations acquired in gene bank collections. The system was developed using a range of free software tools, including: MySQL, Ruby on Rails, Java Script, etc. The system is dynamic and publically accessible (https://nrrc.ars.usda.gov/A-GRIN). Exemplary information in Animal-GRIN consists of: animal identifiers, number and type of samples in the collection, pedigrees, coefficients of genetic relationships between animals within a breed, breeding values, phenotypes, and geographic source. To meet the national need for the long term archiving of genomic information developed with public funds, Animal-GRIN was expanded to store and make publically available genomic information (SNP) from any SNP chip, including custom products. Researchers are encouraged to submit their data upon completion of their publically funded projects. With the drill down concept, users can search the database for genomic information, physical samples associated with the genomic information, and phenotypic information on specific animals. Once animals of interest are found, on-line tools enable users to request either germplasm samples or genomic data. Progress in meeting genetic security for a breed’s collection can also be viewed. To date the U.S. collection has 52,639 animals with almost a million samples representing 36 species, 167 breeds, and 331 subpopulations and these have been entered into Animal-GRIN. Genomic data has been acquired on 1,899 animals representing 36 breeds. The next phase of Animal-GRIN development will be development of landscape genomics components. Acquisition of germplasm samples and associated genomic information are a continuing effort.",2019-12-01 +,"Predictors of cognitive, behavioural and academic difficulties in NF1","

Aims

The aim of this study is to systematically investigate the demographic and disease predictors of cognitive and behavioural phenotype in the largest cohort of children with NF1 published to date. Based on previously published research, we examine the potential role of demographic predictors such as age, sex, SES, parental NF1 status as well as the neurological complications such as epilepsy and brain tumours in NF1 associated cognitive/ behavioural impairments.

Method

In this cross-sectional study design, participant data were drawn from two large databases which included (i) A clinical database of all patients with NF1 seen in a clinical psychological service from 2010 to 2019 and (ii) A research dataset from two previously published studies (2,8). The complex National NF1 service based within Manchester regional genetic services is set up for individuals with complex NF1 (https://www.mangen.co.uk/healthcare-professionals/clinical-genomic-services/nf1/) in the North of the UK. Children were referred to the psychological services by NF1 clinicians if psychological assessment was warranted based on parental reports. In order to reduce clinic referral bias, the clinical sample was supplemented by including participants that were seen solely for the purposes of research studies within our centre.

Result

Relative to population norms, 90% of the NF1 sample demonstrated significantly lower scores in at least one cognitive or behavioral domain. Family history of NF1 and lower SES were independently associated with poorer cognitive, behavioral and academic outcomes. Neurological problems such as epilepsy and hydrocephalus were associated with lower IQ and academic skills.

Conclusion

Cognitive and behavioural phenotypes commonly emerge via a complex interplay between genes and environmental factors, and this is true also of a monogenic condition such as NF1. Early interventions and remedial education may be targeted to risk groups such those with familial NF1, families with lower SES and those with associated neurological comorbidities.",2021-06-18 +34265261,ColBuilder: A server to build collagen fibril models.,"Type I collagen is the main structural component of many tissues in the human body. It provides excellent mechanical properties to connective tissue and acts as a protein interaction hub. There is thus a wide interest in understanding the properties and diverse functions of type I collagen at the molecular level. A precondition is an atomistic collagen I structure as it occurs in native tissue. To this end, we built full-atom models of cross-linked collagen fibrils by integrating the low-resolution structure of collagen fibril available from x-ray fiber diffraction with high-resolution structures of short collagen-like peptides from x-ray crystallography and mass spectrometry data. We created a Web resource of collagen models for 20 different species with a large variety of cross-link types and localization within the fibril to facilitate structure-based analyses and simulations of type I collagen in health and disease. To easily enable simulations, we provide parameters of the modeled cross-links for an Amber force field. The repository of collagen models is available at https://colbuilder.h-its.org.",2021-07-13 +33843105,Sequence and evolutionary analysis of bacterial ribosomal S1 proteins.,"The multi-domain bacterial S1 protein is the largest and most functionally important ribosomal protein of the 30S subunit, which interacts with both mRNA and proteins. The family of ribosomal S1 proteins differs in the classical sense from a protein with tandem repeats and has a ""bead-on-string"" organization, where each repeat is folded into a globular domain. Based on our recent data, the study of evolutionary relationships for the bacterial phyla will provide evidence for one of the proposed theories of the evolutionary development of proteins with structural repeats: from multiple repeats of assembles to single repeats, or vice versa. In this comparative analysis of 1333 S1 sequences that were identified in 24 different phyla, we demonstrate how such phyla can form independently/dependently during evolution. To the best of our knowledge, this work is the first study of the evolutionary history of bacterial ribosomal S1 proteins. The collected and structured data can be useful to computer biologists as a resource for determining percent identity, amino acid composition and logo motifs, as well as dN/dS ratio in bacterial S1 protein. The obtained research data indicate that the evolutionary development of bacterial ribosomal S1 proteins evolved from multiple assemblies to single repeat. The presented data are integrated into the server, which can be accessed at http://oka.protres.ru:4200.",2021-04-23 +33554247,Prediction and collection of protein-metabolite interactions. ,"Interactions between proteins and small molecule metabolites play vital roles in regulating protein functions and controlling various cellular processes. The activities of metabolic enzymes, transcription factors, transporters and membrane receptors can all be mediated through protein-metabolite interactions (PMIs). Compared with the rich knowledge of protein-protein interactions, little is known about PMIs. To the best of our knowledge, no existing database has been developed for collecting PMIs. The recent rapid development of large-scale mass spectrometry analysis of biomolecules has led to the discovery of large amounts of PMIs. Therefore, we developed the PMI-DB to provide a comprehensive and accurate resource of PMIs. A total of 49 785 entries were manually collected in the PMI-DB, corresponding to 23 small molecule metabolites, 9631 proteins and 4 species. Unlike other databases that only provide positive samples, the PMI-DB provides non-interaction between proteins and metabolites, which not only reduces the experimental cost for biological experimenters but also facilitates the construction of more accurate algorithms for researchers using machine learning. To show the convenience of the PMI-DB, we developed a deep learning-based method to predict PMIs in the PMI-DB and compared it with several methods. The experimental results show that the area under the curve and area under the precision-recall curve of our method are 0.88 and 0.95, respectively. Overall, the PMI-DB provides a user-friendly interface for browsing the biological functions of metabolites/proteins of interest, and experimental techniques for identifying PMIs in different species, which provides important support for furthering the understanding of cellular processes. The PMI-DB is freely accessible at http://easybioai.com/PMIDB.",2021-09-01 +,Automatic cough detection from realistic audio recordings using C-BiLSTM with boundary regression,"Automatic cough detection in the patients’ realistic audio recordings is of great significance to diagnose and monitor respiratory diseases, such as COVID-19. Many detection methods have been developed so far, but they are still unable to meet the practical requirements. In this paper, we present a deep convolutional bidirectional long short-term memory (C-BiLSTM) model with boundary regression for cough detection, where cough and non-cough parts need to be classified and located. We added convolutional layers before the LSTM to enhance the cough features and preserve the temporal information of the audio data. Considering the importance of the cough event integrity for subsequent analysis, the novel model includes an embedded boundary regression on the last feature map for both higher detection accuracy and more accurate boundaries. We delicately designed, collected and labelled a realistic audio dataset containing recordings of patients with respiratory diseases, named the Corp Dataset. 168 h of recordings with 9969 coughs from 42 different patients are included. The dataset is published online on the MARI Lab website (https://mari.tongji.edu.cn/info/1012/1030.htm). The results show that the system achieves a sensitivity of 84.13%, a specificity of 99.82% and an intersection-over-union (IoU) of 0.89, which is significantly superior to other related models. With the proposed method, all the criteria on cough detection significantly increased. The open source Corp Dataset provides useful material and a benchmark for researchers investigating cough detection. We propose the state-of-the-art system with boundary regression, laying the foundation for identifying cough sounds in real-world audio data.",2021-11-11 +33045751,LncExpDB: an expression database of human long non-coding RNAs.,"Expression profiles of long non-coding RNAs (lncRNAs) across diverse biological conditions provide significant insights into their biological functions, interacting targets as well as transcriptional reliability. However, there lacks a comprehensive resource that systematically characterizes the expression landscape of human lncRNAs by integrating their expression profiles across a wide range of biological conditions. Here, we present LncExpDB (https://bigd.big.ac.cn/lncexpdb), an expression database of human lncRNAs that is devoted to providing comprehensive expression profiles of lncRNA genes, exploring their expression features and capacities, identifying featured genes with potentially important functions, and building interactions with protein-coding genes across various biological contexts/conditions. Based on comprehensive integration and stringent curation, LncExpDB currently houses expression profiles of 101 293 high-quality human lncRNA genes derived from 1977 samples of 337 biological conditions across nine biological contexts. Consequently, LncExpDB estimates lncRNA genes' expression reliability and capacities, identifies 25 191 featured genes, and further obtains 28 443 865 lncRNA-mRNA interactions. Moreover, user-friendly web interfaces enable interactive visualization of expression profiles across various conditions and easy exploration of featured lncRNAs and their interacting partners in specific contexts. Collectively, LncExpDB features comprehensive integration and curation of lncRNA expression profiles and thus will serve as a fundamental resource for functional studies on human lncRNAs.",2021-01-01 +34468243,Prevalence and Newly Diagnosed Rates of Multimorbidity in Older Medicare Beneficiaries with COPD.,"Few studies have quantified the multimorbidity burden in older adults with chronic obstructive pulmonary disease (COPD) using large and generalizable data. Such evidence is essential to inform evidence-based research, clinical care, and resource allocation. This retrospective cohort study used a nationally representative sample of Medicare beneficiaries aged 65 years or older with COPD and 1:1 matched (on age, sex, and race) non-COPD beneficiaries to: (1) quantify the prevalence of multimorbidity at COPD onset and one-year later; (2) quantify the rates [per 100 person-years (PY)] of newly diagnosed multimorbidity during in the year prior to and in the year following COPD onset; and (3) compare multimorbidity prevalence in beneficiaries with and without COPD. Among 739,118 eligible beneficiaries with and without COPD, the average number of multimorbidity was 10.0 (SD = 4.7) and 1.0 (SD = 3.3), respectively. The most prevalent multimorbidity at COPD onset and at one-year after, respectively, were hypertension (70.8% and 80.2%), hyperlipidemia (52.2% and 64.8%), anemia (42.1% and 52.0%), arthritis (39.8% and 47.7%), and congestive heart failure (CHF) (31.3% and 38.8%). Conditions with the highest newly diagnosed rates before and following COPD onset, respectively, included hypertension (39.8 and 32.3 per 100 PY), hyperlipidemia (22.8 and 27.6), anemia (17.8 and 20.3), CHF (16.2 and 13.2), and arthritis (12.9 and 13.2). COPD was significantly associated with increased odds of all measured conditions relative to non-COPD controls. This study updates existing literature with more current, generalizable findings of the substantial multimorbidity burden in medically complex older adults with COPD-necessary to inform patient-centered, multidimensional care.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.1968815 .",2021-09-01 +33749993,Integrated intra- and intercellular signaling knowledge for multicellular omics analysis.,"Molecular knowledge of biological processes is a cornerstone in omics data analysis. Applied to single-cell data, such analyses provide mechanistic insights into individual cells and their interactions. However, knowledge of intercellular communication is scarce, scattered across resources, and not linked to intracellular processes. To address this gap, we combined over 100 resources covering interactions and roles of proteins in inter- and intracellular signaling, as well as transcriptional and post-transcriptional regulation. We added protein complex information and annotations on function, localization, and role in diseases for each protein. The resource is available for human, and via homology translation for mouse and rat. The data are accessible via OmniPath's web service (https://omnipathdb.org/), a Cytoscape plug-in, and packages in R/Bioconductor and Python, providing access options for computational and experimental scientists. We created workflows with tutorials to facilitate the analysis of cell-cell interactions and affected downstream intracellular signaling processes. OmniPath provides a single access point to knowledge spanning intra- and intercellular processes for data analysis, as we demonstrate in applications studying SARS-CoV-2 infection and ulcerative colitis.",2021-03-01 +34967029,The flagellar germ-line hypothesis: How flagellate and ciliate gametes significantly shaped the evolution of organismal complexity.,"This essay presents a hypothesis which contends that the development of organismic complexity in the eukaryotes depended extensively on propagation via flagellated and ciliated gametes. Organisms utilizing flagellate and ciliate gametes to propagate their germ line have contributed most of the organismic complexity found in the higher animals. The genes of the flagellum and the flagellar assembly system (intraflagellar transport) have played a disproportionately important role in the construction of complex tissues and organs. The hypothesis also proposes that competition between large numbers of haploid flagellated male gametes rigorously conserved the functionality of a key set of flagellar genes for more than 700 million years. This in turn has insured that a large set (>600) of highly functional cytoskeletal and signal pathway genes is always present in the lineage of organisms with flagellated or ciliated gametes to act as a dependable resource, or ""toolkit,"" for organ elaboration. Also see the video abstract here: https://youtu.be/lC5nC-WOcm8.",2021-12-29 +33929905,Risk-Based Chemical Ranking and Generating a Prioritized Human Exposome Database.,"

Background

Due to the ubiquitous use of chemicals in modern society, humans are increasingly exposed to thousands of chemicals that contribute to a major portion of the human exposome. Should a comprehensive and risk-based human exposome database be created, it would be conducive to the rapid progress of human exposomics research. In addition, once a xenobiotic is biotransformed with distinct half-lives upon exposure, monitoring the parent compounds alone may not reflect the actual human exposure. To address these questions, a comprehensive and risk-prioritized human exposome database is needed.

Objectives

Our objective was to set up a comprehensive risk-prioritized human exposome database including physicochemical properties as well as risk prediction and develop a graphical user interface (GUI) that has the ability to conduct searches for content associated with chemicals in our database.

Methods

We built a comprehensive risk-prioritized human exposome database by text mining and database fusion. Subsequently, chemicals were prioritized by integrating exposure level obtained from the Systematic Empirical Evaluation of Models with toxicity data predicted by the Toxicity Estimation Software Tool and the Toxicological Priority Index calculated from the ToxCast database. The biotransformation half-lives (HLBs) of all the chemicals were assessed using the Iterative Fragment Selection approach and biotransformation products were predicted using the previously developed BioTransformer machine-learning method.

Results

We compiled a human exposome database of >20,000 chemicals, prioritized 13,441 chemicals based on probabilistic hazard quotient and 7,770 chemicals based on risk index, and provided a predicted biotransformation metabolite database of >95,000 metabolites. In addition, a user-interactive Java software (Oracle)-based search GUI was generated to enable open access to this new resource.

Discussion

Our database can be used to guide chemical management and enhance scientific understanding to rapidly and effectively prioritize chemicals for comprehensive biomonitoring in epidemiological investigations. https://doi.org/10.1289/EHP7722.",2021-04-30 +32542363,PvP01-DB: computational structural and functional characterization of soluble proteome of PvP01 strain of Plasmodium vivax. ,"Despite Plasmodium vivax being the main offender in the majority of malarial infections, very little information is available about its adaptation and development in humans. Its capability for activating relapsing infections through its dormant liver stage and resistance to antimalarial drugs makes it as one of the major challenges in eradicating malaria. Noting the immediate necessity for the availability of a comprehensive and reliable structural and functional repository for P. vivax proteome, here we developed a web resource for the new reference genome, PvP01, furnishing information on sequence, structure, functions, active sites and metabolic pathways compiled and predicted using some of the state-of-the-art methods in respective fields. The PvP01 web resource comprises organized data on the soluble proteome consisting of 3664 proteins in blood and liver stages of malarial cycle. The current public resources represent only 163 proteins of soluble proteome of PvP01, with complete information about their molecular function, biological process and cellular components. Also, only 46 proteins of P. vivax have experimentally determined structures. In this milieu of extreme scarcity of structural and functional information, PvP01 web resource offers meticulously validated structures of 3664 soluble proteins. The sequence and structure-based functional characterization led to a quantum leap from 163 proteins available presently to whole soluble proteome offered through PvP01 web resource. We believe PvP01 web resource will serve the researchers in identifying novel protein drug targets and in accelerating the development of structure-based new drug candidates to combat malaria. Database Availability: http://www.scfbio-iitd.res.in/PvP01.",2020-01-01 +,Street performers and donations in an online environment in the wake of COVID-19,"The spread of coronavirus (COVID-19) has meant that street performers can no longer perform on the street. This has changed the landscape for the exchange for money between a street performer and their audience. The paper uses a unique data set from the online busking platform ‘The Busking Project’ (https://busk.co) to analyse whether sign up by performers to the platform and donation by individuals to street performers through the platform has changed since the World Health Organization declared COVID-19 to be a pandemic on March 11, 2020. The results show a lift both in street performers signing up to the platform and in individuals' donations to street performers after the announcement. The recovery of cities and the cultural economy from COVID-19 will not be immediate. As we move to a post COVID-19 world our results have implications for performers, for donors and for (local) governments as street performers return to the street.",2021-12-04 +33126250,MloDisDB: a manually curated database of the relations between membraneless organelles and diseases. ,"Cells are compartmentalized by numerous membrane-bounded organelles and membraneless organelles (MLOs) to ensure temporal and spatial regulation of various biological processes. A number of MLOs, such as nucleoli, nuclear speckles and stress granules, exist as liquid droplets within the cells and arise from the condensation of proteins and RNAs via liquid-liquid phase separation (LLPS). By concentrating certain proteins and RNAs, MLOs accelerate biochemical reactions and protect cells during stress, and dysfunction of MLOs is associated with various pathological processes. With the development in this field, more and more relations between the MLOs and diseases have been described; however, these results have not been made available in a centralized resource. Herein, we build MloDisDB, a database which aims to gather the relations between MLOs and diseases from dispersed literature. In addition, the relations between LLPS and diseases were included as well. Currently, MloDisDB contains 771 curated entries from 607 publications; each entry in MloDisDB contains detailed information about the MLO, the disease and the functional factor in the relation. Furthermore, an efficient and user-friendly interface for users to search, browse and download all entries was provided. MloDisDB is the first comprehensive database of the relations between MLOs and diseases so far, and the database is freely accessible at http://mlodis.phasep.pro/.",2021-07-01 +33849055,"Expasy, the Swiss Bioinformatics Resource Portal, as designed by its users.","The SIB Swiss Institute of Bioinformatics (https://www.sib.swiss) creates, maintains and disseminates a portfolio of reliable and state-of-the-art bioinformatics services and resources for the storage, analysis and interpretation of biological data. Through Expasy (https://www.expasy.org), the Swiss Bioinformatics Resource Portal, the scientific community worldwide, freely accesses more than 160 SIB resources supporting a wide range of life science and biomedical research areas. In 2020, Expasy was redesigned through a user-centric approach, known as User-Centred Design (UCD), whose aim is to create user interfaces that are easy-to-use, efficient and targeting the intended community. This approach, widely used in other fields such as marketing, e-commerce, and design of mobile applications, is still scarcely explored in bioinformatics. In total, around 50 people were actively involved, including internal stakeholders and end-users. In addition to an optimised interface that meets users' needs and expectations, the new version of Expasy provides an up-to-date and accurate description of high-quality resources based on a standardised ontology, allowing to connect functionally-related resources.",2021-07-01 +33471060,Cellinker: a platform of ligand-receptor interactions for intercellular communication analysis. ,"Ligand-receptor (L-R) interactions mediate cell adhesion, recognition and communication and play essential roles in physiological and pathological signaling. With the rapid development of single-cell RNA sequencing (scRNA-seq) technologies, systematically decoding the intercellular communication network involving L-R interactions has become a focus of research. Therefore, construction of a comprehensive, high-confidence and well-organized resource to retrieve L-R interactions in order to study the functional effects of cell-cell communications would be of great value. In this study, we developed Cellinker, a manually curated resource of literature-supported L-R interactions that play roles in cell-cell communication. We aimed to provide a useful platform for studies on cell-cell communication mediated by L-R interactions. The current version of Cellinker documents over 3,700 human and 3,200 mouse L-R protein-protein interactions (PPIs) and embeds a practical and convenient webserver with which researchers can decode intercellular communications based on scRNA-seq data. And over 400 endogenous small molecule (sMOL) related L-R interactions were collected as well. Moreover, to help with research on coronavirus (CoV) infection, Cellinker collects information on 16 L-R PPIs involved in CoV-human interactions (including 12 L-R PPIs involved in SARS-CoV-2 infection). In summary, Cellinker provides a user-friendly interface for querying, browsing and visualizing L-R interactions as well as a practical and convenient web tool for inferring intercellular communications based on scRNA-seq data. We believe this platform could promote intercellular communication research and accelerate the development of related algorithms for scRNA-seq studies. Cellinker is available at http://www.rna-society.org/cellinker/. Supplementary data are available at Bioinformatics online.",2021-01-20 +35424258,MeFSAT: a curated natural product database specific to secondary metabolites of medicinal fungi.,"Fungi are a rich source of secondary metabolites which constitutes a valuable and diverse chemical space of natural products. Medicinal fungi have been used in traditional medicine to treat human ailments for centuries. To date, there is no devoted resource on secondary metabolites and therapeutic uses of medicinal fungi. Such a dedicated resource compiling dispersed information on medicinal fungi across published literature will facilitate ongoing efforts towards natural product based drug discovery. Here, we present the first comprehensive manually curated database on Medicinal Fungi Secondary metabolites And Therapeutics (MeFSAT) that compiles information on 184 medicinal fungi, 1830 secondary metabolites and 149 therapeutics uses. Importantly, MeFSAT contains a non-redundant in silico natural product library of 1830 secondary metabolites along with information on their chemical structures, computed physicochemical properties, drug-likeness properties, predicted ADMET properties, molecular descriptors and predicted human target proteins. By comparing the physicochemical properties of secondary metabolites in MeFSAT with other small molecules collections, we find that fungal secondary metabolites have high stereochemical complexity and shape complexity similar to other natural product libraries. Based on multiple scoring schemes, we have filtered a subset of 228 drug-like secondary metabolites in MeFSAT database. By constructing and analyzing chemical similarity networks, we show that the chemical space of secondary metabolites in MeFSAT is highly diverse. The compiled information in MeFSAT database is openly accessible at: https://cb.imsc.res.in/mefsat/.",2021-01-12 +31155677,A comprehensive overview of oncogenic pathways in human cancer.,"Alterations of biological pathways can lead to oncogenesis. An overview of these oncogenic pathways would be highly valuable for researchers to reveal the pathogenic mechanism and develop novel therapeutic approaches for cancers. Here, we reviewed approximately 8500 literatures and documented experimentally validated cancer-pathway associations as benchmarking data set. This data resource includes 4709 manually curated relationships between 1557 paths and 49 cancers with 2427 upstream regulators in 7 species. Based on this resource, we first summarized the cancer-pathway associations and revealed some commonly deregulated pathways across tumor types. Then, we systematically analyzed these oncogenic pathways by integrating TCGA pan-cancer data sets. Multi-omics analysis showed oncogenic pathways may play different roles across tumor types under different omics contexts. We also charted the survival relevance landscape of oncogenic pathways in 26 tumor types, identified dominant omics features and found survival relevance for oncogenic pathways varied in tumor types and omics levels. Moreover, we predicted upstream regulators and constructed a hierarchical network model to understand the pathogenic mechanism of human cancers underlying oncogenic pathway context. Finally, we developed `CPAD' (freely available at http://bio-bigdata.hrbmu.edu.cn/CPAD/), an online resource for exploring oncogenic pathways in human cancers, that integrated manually curated cancer-pathway associations, TCGA pan-cancer multi-omics data sets, drug-target data, drug sensitivity and multi-omics data for cancer cell lines. In summary, our study provides a comprehensive characterization of oncogenic pathways and also presents a valuable resource for investigating the pathogenesis of human cancer.",2020-05-01 +,MON-176 Making Informed Decisions on the Selection of Antibodies Using dkNET (NIDDK Information Network),"Abstract The NIDDK Information Network (dkNET; https://dknet.org) is an open community resource portal for basic and clinical investigators in diabetes, digestive, endocrine, metabolic, kidney, and urologic diseases [1]. dkNET provides access to a collection of diverse research resources, including data, information, materials, organisms, tools, funding opportunities, literature, services, and events that advance the mission of the National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK). dkNET also supports the use of unique identifiers for resources, the Research Resource Identifier (RRID)[2], and has developed services and tools to assist researchers in improving rigor and reproducibility. Incomplete identification of antibodies, and other reagents, contributes to the reproducibility crisis in biomedical research. The proper identification of research resource using RRIDs, while also providing detailed and updated reports about these resources can help improve reproducible research. To that end, we have developed Resource Reports, which are based on a unique integrated data set and analytics platform that combines RRIDs, text mining and data aggregation. The reports provide a detailed overview of each resource and associated citation metrics, provide rating or validation information, from resources such as the Human Protein Atlas, ENCODE, or resource centers and consortia, and provide alerts when there is problem with the resource. With this information and additional information, such as who else has used these resources, users can make informed decisions about the resources that they plan to use. For example, if one were looking for an anti-human phospho-akt(ser473) antibody, one would start with a search of “phospho-akt” AND ser473 within the Antibody Resource Report. From the 53 antibodies found (on 11/1/18), one may initially select 14 antibodies that had been cited with RRIDs in the Endocrinology journal. Usage and Citation Metrics provided information about the usage of antibodies (cited with RRIDs), so one may then compare the 5 most used antibodies. Additional information on rating and alerts is available for one of these antibodies - including information that it had been used by the NIDDK-funded Intestinal Stem Cell Consortium (ISCC) and that additional rating information is available. Further investigation of the collaborator network provides a list of researchers that used this antibody allowing one to inquire for additional validation information or to inquire about experience when using this antibody. References (1) Whetzel PL et al., PLoS One. 2015; 10(9):e0136206. (2) Bandrowski AE et al., Neuron. 2016; 90(3):434-6. Source of Support NIH NIDDK Grant U24DK097771",2019-04-15 +32621232,Dockground Tool for Development and Benchmarking of Protein Docking Procedures.,"Databases of protein-protein complexes are essential for the development of protein modeling/docking techniques. Such databases provide a knowledge base for docking algorithms, intermolecular potentials, search procedures, scoring functions, and refinement protocols. Development of docking techniques requires systematic validation of the modeling protocols on carefully curated benchmark sets of complexes. We present a description and a guide to the DOCKGROUND resource ( http://dockground.compbio.ku.edu ) for structural modeling of protein interactions. The resource integrates various datasets of protein complexes and other data for the development and testing of protein docking techniques. The sets include bound complexes, experimentally determined unbound, simulated unbound, model-model complexes, and docking decoys. The datasets are available to the user community through a Web interface.",2020-01-01 +33219686,LnCeCell: a comprehensive database of predicted lncRNA-associated ceRNA networks at single-cell resolution.,"Within the tumour microenvironment, cells exhibit different behaviours driven by fine-tuning of gene regulation. Identification of cellular-specific gene regulatory networks will deepen the understanding of disease pathology at single-cell resolution and contribute to the development of precision medicine. Here, we describe a database, LnCeCell (http://www.bio-bigdata.net/LnCeCell/ or http://bio-bigdata.hrbmu.edu.cn/LnCeCell/), which aims to document cellular-specific long non-coding RNA (lncRNA)-associated competing endogenous RNA (ceRNA) networks for personalised characterisation of diseases based on the 'One Cell, One World' theory. LnCeCell is curated with cellular-specific ceRNA regulations from >94 000 cells across 25 types of cancers and provides >9000 experimentally supported lncRNA biomarkers, associated with tumour metastasis, recurrence, prognosis, circulation, drug resistance, etc. For each cell, LnCeCell illustrates a global map of ceRNA sub-cellular locations, which have been manually curated from the literature and related data sources, and portrays a functional state atlas for a single cancer cell. LnCeCell also provides several flexible tools to infer ceRNA functions based on a specific cellular background. LnCeCell serves as an important resource for investigating the gene regulatory networks within a single cell and can help researchers understand the regulatory mechanisms underlying complex microbial ecosystems and individual phenotypes.",2021-01-01 +33186585,PolarProtDb: A Database of Transmembrane and Secreted Proteins showing Apical-Basal Polarity.,"Most cells in multicellular organisms are somehow asymmetric, polarized: maintaining separate membrane domains. Typical examples are the epithelial cells (apical-basal polarization), neurons (dendritic-axonal domains), or migratory cells (with a leading and a trailing edge). Here we present the most comprehensive database containing experimentally verified mammalian proteins that display polarized sorting or secretion, focusing on epithelial polarity. In addition to the source cells or tissues, homology-based inferences and transmembrane topology (if applicable) are all provided. PolarProtDb also offers a detailed interface displaying all information that may be relevant for trafficking: including post-translational modifications (glycosylations and phosphorylations), known or predicted short linear motifs conserved across orthologs, as well as potential interaction partners. Data on polarized sorting has so far been scattered across myriads of publications, hence difficult to access. This information can help researchers in several areas, such as scanning for potential entry points of viral agents like COVID-19. PolarProtDb shall be a useful resource to design future experiments as well as for comparative analyses. The database is available at http://polarprotdb.enzim.hu.",2020-11-10 +33245771,MarkerDB: an online database of molecular biomarkers.,"MarkerDB is a freely available electronic database that attempts to consolidate information on all known clinical and a selected set of pre-clinical molecular biomarkers into a single resource. The database includes four major types of molecular biomarkers (chemical, protein, DNA [genetic] and karyotypic) and four biomarker categories (diagnostic, predictive, prognostic and exposure). MarkerDB provides information such as: biomarker names and synonyms, associated conditions or pathologies, detailed disease descriptions, detailed biomarker descriptions, biomarker specificity, sensitivity and ROC curves, standard reference values (for protein and chemical markers), variants (for SNP or genetic markers), sequence information (for genetic and protein markers), molecular structures (for protein and chemical markers), tissue or biofluid sources (for protein and chemical markers), chromosomal location and structure (for genetic and karyotype markers), clinical approval status and relevant literature references. Users can browse the data by conditions, condition categories, biomarker types, biomarker categories or search by sequence similarity through the advanced search function. Currently, the database contains 142 protein biomarkers, 1089 chemical biomarkers, 154 karyotype biomarkers and 26 374 genetic markers. These are categorized into 25 560 diagnostic biomarkers, 102 prognostic biomarkers, 265 exposure biomarkers and 6746 predictive biomarkers or biomarker panels. Collectively, these markers can be used to detect, monitor or predict 670 specific human conditions which are grouped into 27 broad condition categories. MarkerDB is available at https://markerdb.ca.",2021-01-01 +33215706,Draft genome and transcriptome analyses of halophyte rice Oryza coarctata provide resources for salinity and submergence stress response factors.,"Oryza coarctata is a wild relative of rice that has adapted to diverse ecological environments, including high salinity and submergence. Thus, it can provide an important resource for discovering candidate genes/factors involved in tolerance to these stresses. Here, we report a draft genome assembly of 573 Mb comprised of 8877 scaffolds with N50 length of 205 kb. We predicted a total of 50,562 protein-coding genes, of which a significant fraction was found to be involved in secondary metabolite biosynthesis and hormone signal transduction pathways. Several salinity and submergence stress-responsive protein-coding and long noncoding RNAs involved in diverse biological processes were identified using RNA-sequencing data. Based on small RNA sequencing, we identified 168 unique miRNAs and 3219 target transcripts (coding and noncoding) involved in several biological processes, including abiotic stress responses. Further, whole genome bisulphite sequencing data analysis revealed at least 19%-48% methylcytosines in different sequence contexts and the influence of methylation status on gene expression. The genome assembly along with other datasets have been made publicly available at http://ccbb.jnu.ac.in/ory-coar. Altogether, we provide a comprehensive genomic resource for understanding the regulation of salinity and submergence stress responses and identification of candidate genes/factors involved for functional genomics studies.",2020-11-30 +34600479,DECONbench: a benchmarking platform dedicated to deconvolution methods for tumor heterogeneity quantification.,"

Background

Quantification of tumor heterogeneity is essential to better understand cancer progression and to adapt therapeutic treatments to patient specificities. Bioinformatic tools to assess the different cell populations from single-omic datasets as bulk transcriptome or methylome samples have been recently developed, including reference-based and reference-free methods. Improved methods using multi-omic datasets are yet to be developed in the future and the community would need systematic tools to perform a comparative evaluation of these algorithms on controlled data.

Results

We present DECONbench, a standardized unbiased benchmarking resource, applied to the evaluation of computational methods quantifying cell-type heterogeneity in cancer. DECONbench includes gold standard simulated benchmark datasets, consisting of transcriptome and methylome profiles mimicking pancreatic adenocarcinoma molecular heterogeneity, and a set of baseline deconvolution methods (reference-free algorithms inferring cell-type proportions). DECONbench performs a systematic performance evaluation of each new methodological contribution and provides the possibility to publicly share source code and scoring.

Conclusion

DECONbench allows continuous submission of new methods in a user-friendly fashion, each novel contribution being automatically compared to the reference baseline methods, which enables crowdsourced benchmarking. DECONbench is designed to serve as a reference platform for the benchmarking of deconvolution methods in the evaluation of cancer heterogeneity. We believe it will contribute to leverage the benchmarking practices in the biomedical and life science communities. DECONbench is hosted on the open source Codalab competition platform. It is freely available at: https://competitions.codalab.org/competitions/27453 .",2021-10-02 +32330167,geoBoundaries: A global database of political administrative boundaries.,"We present the geoBoundaries Global Administrative Database (geoBoundaries): an online, open license resource of the geographic boundaries of political administrative divisions (i.e., state, county). Contrasted to other resources geoBoundaries (1) provides detailed information on the legal open license for every boundary in the repository, and (2) focuses on provisioning highly precise boundary data to support accurate, replicable scientific inquiry. Further, all data is released in a structured form, allowing for the integration of geoBoundaries with large-scale computational workflows. Our database has records for every country around the world, with up to 5 levels of administrative hierarchy. The database is accessible at http://www.geoboundaries.org, and a static version is archived on the Harvard Dataverse.",2020-04-24 +33216893,"DPL: a comprehensive database on sequences, structures, sources and functions of peptide ligands. ","DPL (http://www.peptide-ligand.cn/) is a comprehensive database of peptide ligand (DPL). DPL1.0 holds 1044 peptide ligand entries and provides references for the study of the polypeptide platform. The data were collected from PubMed-NCBI, PDB, APD3, CAMPR3, etc. The lengths of the base sequences are varied from 3 to78. DPL database has 923 linear peptides and 88 cyclic peptides. The functions of peptides collected by DPL are very wide. It includes 540 entries of antiviral peptides (including SARS-CoV-2), 55 entries of signal peptides, 48 entries of protease inhibitors, 45 entries of anti-hypertension, 37 entries of anticancer peptides, etc. There are 270 different kinds of peptide targets. All peptides in DPL have clear binding targets. Most of the peptides and receptors have 3D structures experimentally verified or predicted by CYCLOPS, I-TASSER and SWISS-MODEL. With the rapid development of the COVID-2019 epidemic, this database also collects the research progress of peptides against coronavirus. In conclusion, DPL is a unique resource, which allows users easily to explore the targets, different structures as well as properties of peptides.",2020-11-01 +32433469,Construction of a web-based nanomaterial database by big data curation and modeling friendly nanostructure annotations.,"Modern nanotechnology research has generated numerous experimental data for various nanomaterials. However, the few nanomaterial databases available are not suitable for modeling studies due to the way they are curated. Here, we report the construction of a large nanomaterial database containing annotated nanostructures suited for modeling research. The database, which is publicly available through http://www.pubvinas.com/, contains 705 unique nanomaterials covering 11 material types. Each nanomaterial has up to six physicochemical properties and/or bioactivities, resulting in more than ten endpoints in the database. All the nanostructures are annotated and transformed into protein data bank files, which are downloadable by researchers worldwide. Furthermore, the nanostructure annotation procedure generates 2142 nanodescriptors for all nanomaterials for machine learning purposes, which are also available through the portal. This database provides a public resource for data-driven nanoinformatics modeling research aimed at rational nanomaterial design and other areas of modern computational nanotechnology.",2020-05-20 +33174597,GlycoPOST realizes FAIR principles for glycomics mass spectrometry data.,"For the reproducibility and sustainability of scientific research, FAIRness (Findable, Accessible, Interoperable and Re-usable), with respect to the release of raw data obtained by researchers, is one of the most important principles underpinning the future of open science. In genomics and transcriptomics, the sharing of raw data from next-generation sequencers is made possible through public repositories. In addition, in proteomics, the deposition of raw data from mass spectrometry (MS) experiments into repositories is becoming standardized. However, a standard repository for such MS data had not yet been established in glycomics. With the increasing number of glycomics MS data, therefore, we have developed GlycoPOST (https://glycopost.glycosmos.org/), a repository for raw MS data generated from glycomics experiments. In just the first year since the release of GlycoPOST, 73 projects have already been registered by researchers around the world, and the number of registered projects is continuously growing, making a significant contribution to the future FAIRness of the glycomics field. GlycoPOST is a free resource to the community and accepts (and will continue to accept in the future) raw data regardless of vendor-specific formats.",2021-01-01 +31831730,"The odonate phenotypic database, a new open data resource for comparative studies of an old insect order.","We present The Odonate Phenotypic Database (OPD): an online data resource of dragonfly and damselfly phenotypes (Insecta: Odonata). Odonata is a relatively small insect order that currently consists of about 6400 species belonging to 32 families. The database consists of multiple morphological, life-history and behavioral traits, and biogeographical information collected from literature sources. We see taxon-specific phenotypic databases from Odonata and other organismal groups as becoming an increasing valuable resource in comparative studies. Our database has phenotypic records for 1011 of all 6400 known odonate species. The database is accessible at http://www.odonatephenotypicdatabase.org/, and a static version with an information file about the variables in the database is archived at Dryad.",2019-12-12 +33568057,GalaxyTrakr: a distributed analysis tool for public health whole genome sequence data accessible to non-bioinformaticians.,"

Background

Processing and analyzing whole genome sequencing (WGS) is computationally intense: a single Illumina MiSeq WGS run produces ~ 1 million 250-base-pair reads for each of 24 samples. This poses significant obstacles for smaller laboratories, or laboratories not affiliated with larger projects, which may not have dedicated bioinformatics staff or computing power to effectively use genomic data to protect public health. Building on the success of the cloud-based Galaxy bioinformatics platform ( http://galaxyproject.org ), already known for its user-friendliness and powerful WGS analytical tools, the Center for Food Safety and Applied Nutrition (CFSAN) at the U.S. Food and Drug Administration (FDA) created a customized 'instance' of the Galaxy environment, called GalaxyTrakr ( https://www.galaxytrakr.org ), for use by laboratory scientists performing food-safety regulatory research. The goal was to enable laboratories outside of the FDA internal network to (1) perform quality assessments of sequence data, (2) identify links between clinical isolates and positive food/environmental samples, including those at the National Center for Biotechnology Information sequence read archive ( https://www.ncbi.nlm.nih.gov/sra/ ), and (3) explore new methodologies such as metagenomics. GalaxyTrakr hosts a variety of free and adaptable tools and provides the data storage and computing power to run the tools. These tools support coordinated analytic methods and consistent interpretation of results across laboratories. Users can create and share tools for their specific needs and use sequence data generated locally and elsewhere.

Results

In its first full year (2018), GalaxyTrakr processed over 85,000 jobs and went from 25 to 250 users, representing 53 different public and state health laboratories, academic institutions, international health laboratories, and federal organizations. By mid-2020, it has grown to 600 registered users and processed over 450,000 analytical jobs. To illustrate how laboratories are making use of this resource, we describe how six institutions use GalaxyTrakr to quickly analyze and review their data. Instructions for participating in GalaxyTrakr are provided.

Conclusions

GalaxyTrakr advances food safety by providing reliable and harmonized WGS analyses for public health laboratories and promoting collaboration across laboratories with differing resources. Anticipated enhancements to this resource will include workflows for additional foodborne pathogens, viruses, and parasites, as well as new tools and services.",2021-02-10 +33151290,PubChem in 2021: new data content and improved web interfaces.,"PubChem (https://pubchem.ncbi.nlm.nih.gov) is a popular chemical information resource that serves the scientific community as well as the general public, with millions of unique users per month. In the past two years, PubChem made substantial improvements. Data from more than 100 new data sources were added to PubChem, including chemical-literature links from Thieme Chemistry, chemical and physical property links from SpringerMaterials, and patent links from the World Intellectual Properties Organization (WIPO). PubChem's homepage and individual record pages were updated to help users find desired information faster. This update involved a data model change for the data objects used by these pages as well as by programmatic users. Several new services were introduced, including the PubChem Periodic Table and Element pages, Pathway pages, and Knowledge panels. Additionally, in response to the coronavirus disease 2019 (COVID-19) outbreak, PubChem created a special data collection that contains PubChem data related to COVID-19 and the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).",2021-01-01 +32333753,"An interactive online dashboard for tracking COVID-19 in U.S. counties, cities, and states in real time.","

Objective

The study sought to create an online resource that informs the public of coronavirus disease 2019 (COVID-19) outbreaks in their area.

Materials and methods

This R Shiny application aggregates data from multiple resources that track COVID-19 and visualizes them through an interactive, online dashboard.

Results

The Web resource, called the COVID-19 Watcher, can be accessed online (https://covid19watcher.research.cchmc.org/). It displays COVID-19 data from every county and 188 metropolitan areas in the United States. Features include rankings of the worst-affected areas and auto-generating plots that depict temporal changes in testing capacity, cases, and deaths.

Discussion

The Centers for Disease Control and Prevention does not publish COVID-19 data for local municipalities, so it is critical that academic resources fill this void so the public can stay informed. The data used have limitations and likely underestimate the scale of the outbreak.

Conclusions

The COVID-19 Watcher can provide the public with real-time updates of outbreaks in their area.",2020-07-01 +34860515,Multimodal Mass Spectrometry Imaging of Rat Brain Using IR-MALDESI and NanoPOTS-LC-MS/MS.,"Multimodal mass spectrometry imaging (MSI) is a critical technique used for deeply investigating biological systems by combining multiple MSI platforms in order to gain the maximum molecular information about a sample that would otherwise be limited by a single analytical technique. The aim of this work was to create a multimodal MSI approach that measures metabolomic and proteomic data from a single biological organ by combining infrared matrix-assisted laser desorption electrospray ionization (IR-MALDESI) for metabolomic MSI and nanodroplet processing in one pot for trace samples (nanoPOTS) LC-MS/MS for spatially resolved proteome profiling. Adjacent tissue sections of rat brain were analyzed by each platform, and each data set was individually analyzed using previously optimized workflows. IR-MALDESI data sets were annotated by accurate mass and spectral accuracy using HMDB, METLIN, and LipidMaps databases, while nanoPOTS-LC-MS/MS data sets were searched against the rat proteome using the Sequest HT algorithm and filtered with a 1% FDR. The combined data revealed complementary molecular profiles distinguishing the corpus callosum against other sampled regions of the brain. A multiomic pathway integration showed a strong correlation between the two data sets when comparing average abundances of metabolites and corresponding enzymes in each brain region. This work demonstrates the first steps in the creation of a multimodal MSI technique that combines two highly sensitive and complementary imaging platforms. Raw data files are available in METASPACE (https://metaspace2020.eu/project/pace-2021) and MassIVE (identifier: MSV000088211).",2021-12-03 +32772385,DICOM re-encoding of volumetrically annotated Lung Imaging Database Consortium (LIDC) nodules.,"

Purpose

The dataset contains annotations for lung nodules collected by the Lung Imaging Data Consortium and Image Database Resource Initiative (LIDC) stored as standard DICOM objects. The annotations accompany a collection of computed tomography (CT) scans for over 1000 subjects annotated by multiple expert readers, and correspond to ""nodules ≥ 3 mm"", defined as any lesion considered to be a nodule with greatest in-plane dimension in the range 3-30 mm regardless of presumed histology. The present dataset aims to simplify reuse of the data with the readily available tools, and is targeted towards researchers interested in the analysis of lung CT images.

Acquisition and validation methods

Open source tools were utilized to parse the project-specific XML representation of LIDC-IDRI annotations and save the result as standard DICOM objects. Validation procedures focused on establishing compliance of the resulting objects with the standard, consistency of the data between the DICOM and project-specific representation, and evaluating interoperability with the existing tools.

Data format and usage notes

The dataset utilizes DICOM Segmentation objects for storing annotations of the lung nodules, and DICOM Structured Reporting objects for communicating qualitative evaluations (nine attributes) and quantitative measurements (three attributes) associated with the nodules. The total of 875 subjects contain 6859 nodule annotations. Clustering of the neighboring annotations resulted in 2651 distinct nodules. The data are available in TCIA at https://doi.org/10.7937/TCIA.2018.h7umfurq.

Potential applications

The standardized dataset maintains the content of the original contribution of the LIDC-IDRI consortium, and should be helpful in developing automated tools for characterization of lung lesions and image phenotyping. In addition to those properties, the representation of the present dataset makes it more FAIR (Findable, Accessible, Interoperable, Reusable) for the research community, and enables its integration with other standardized data collections.",2020-09-06 +32728249,Expanded encyclopaedias of DNA elements in the human and mouse genomes.,"The human and mouse genomes contain instructions that specify RNAs and proteins and govern the timing, magnitude, and cellular context of their production. To better delineate these elements, phase III of the Encyclopedia of DNA Elements (ENCODE) Project has expanded analysis of the cell and tissue repertoires of RNA transcription, chromatin structure and modification, DNA methylation, chromatin looping, and occupancy by transcription factors and RNA-binding proteins. Here we summarize these efforts, which have produced 5,992 new experimental datasets, including systematic determinations across mouse fetal development. All data are available through the ENCODE data portal (https://www.encodeproject.org), including phase II ENCODE1 and Roadmap Epigenomics2 data. We have developed a registry of 926,535 human and 339,815 mouse candidate cis-regulatory elements, covering 7.9 and 3.4% of their respective genomes, by integrating selected datatypes associated with gene regulation, and constructed a web-based server (SCREEN; http://screen.encodeproject.org) to provide flexible, user-defined access to this resource. Collectively, the ENCODE data and registry provide an expansive resource for the scientific community to build a better understanding of the organization and function of the human and mouse genomes.",2020-07-29 +31642469,PhenoModifier: a genetic modifier database for elucidating the genetic basis of human phenotypic variation.,"From clinical observations to large-scale sequencing studies, the phenotypic impact of genetic modifiers is evident. To better understand the full spectrum of the genetic contribution to human disease, concerted efforts are needed to construct a useful modifier resource for interpreting the information from sequencing data. Here, we present the PhenoModifier (https://www.biosino.org/PhenoModifier), a manually curated database that provides a comprehensive overview of human genetic modifiers. By manually curating over ten thousand published articles, 3078 records of modifier information were entered into the current version of PhenoModifier, related to 288 different disorders, 2126 genetic modifier variants and 843 distinct modifier genes. To help users probe further into the mechanism of their interested modifier genes, we extended the yeast genetic interaction data and yeast quantitative trait loci to the human and we also integrated GWAS data into the PhenoModifier to assist users in evaluating all possible phenotypes associated with a modifier allele. As the first comprehensive resource of human genetic modifiers, PhenoModifier provides a more complete spectrum of genetic factors contributing to human phenotypic variation. The portal has a broad scientific and clinical scope, spanning activities relevant to variant interpretation for research purposes as well as clinical decision making.",2020-01-01 +33219661,"LincSNP 3.0: an updated database for linking functional variants to human long non-coding RNAs, circular RNAs and their regulatory elements.","We describe an updated comprehensive database, LincSNP 3.0 (http://bioinfo.hrbmu.edu.cn/LincSNP), which aims to document and annotate disease or phenotype-associated variants in human long non-coding RNAs (lncRNAs) and circular RNAs (circRNAs) or their regulatory elements. LincSNP 3.0 has updated with several novel features, including (i) more types of variants including single nucleotide polymorphisms (SNPs), linkage disequilibrium SNPs (LD SNPs), somatic mutations and RNA editing sites have been expanded; (ii) more regulatory elements including transcription factor binding sites (TFBSs), enhancers, DNase I hypersensitive sites (DHSs), topologically associated domains (TADs), footprintss, methylations and open chromatin regions have been added; (iii) the associations among circRNAs, regulatory elements and variants have been identified; (iv) more experimentally supported variant-lncRNA/circRNA-disease/phenotype associations have been manually collected; (v) the sources of lncRNAs, circRNAs, SNPs, somatic mutations and RNA editing sites have been updated. Moreover, four flexible online tools including Genome Browser, Variant Mapper, Circos Plotter and Functional Annotation have been developed to retrieve, visualize and analyze the data. Collectively, LincSNP 3.0 provides associations among functional variants, regulatory elements, lncRNAs and circRNAs in diseases. It will serve as an important and continually updated resource for investigating functions and mechanisms of lncRNAs and circRNAs in diseases.",2021-01-01 +33010177,SC2disease: a manually curated database of single-cell transcriptome for human diseases.,"SC2disease (http://easybioai.com/sc2disease/) is a manually curated database that aims to provide a comprehensive and accurate resource of gene expression profiles in various cell types for different diseases. With the development of single-cell RNA sequencing (scRNA-seq) technologies, uncovering cellular heterogeneity of different tissues for different diseases has become feasible by profiling transcriptomes across cell types at the cellular level. In particular, comparing gene expression profiles between different cell types and identifying cell-type-specific genes in various diseases offers new possibilities to address biological and medical questions. However, systematic, hierarchical and vast databases of gene expression profiles in human diseases at the cellular level are lacking. Thus, we reviewed the literature prior to March 2020 for studies which used scRNA-seq to study diseases with human samples, and developed the SC2disease database to summarize all the data by different diseases, tissues and cell types. SC2disease documents 946 481 entries, corresponding to 341 cell types, 29 tissues and 25 diseases. Each entry in the SC2disease database contains comparisons of differentially expressed genes between different cell types, tissues and disease-related health status. Furthermore, we reanalyzed gene expression matrix by unified pipeline to improve the comparability between different studies. For each disease, we also compare cell-type-specific genes with the corresponding genes of lead single nucleotide polymorphisms (SNPs) identified in genome-wide association studies (GWAS) to implicate cell type specificity of the traits.",2021-01-01 +33219685,Lnc2Cancer 3.0: an updated resource for experimentally supported lncRNA/circRNA cancer associations and web tools based on RNA-seq and scRNA-seq data.,"An updated Lnc2Cancer 3.0 (http://www.bio-bigdata.net/lnc2cancer or http://bio-bigdata.hrbmu.edu.cn/lnc2cancer) database, which includes comprehensive data on experimentally supported long non-coding RNAs (lncRNAs) and circular RNAs (circRNAs) associated with human cancers. In addition, web tools for analyzing lncRNA expression by high-throughput RNA sequencing (RNA-seq) and single-cell RNA-seq (scRNA-seq) are described. Lnc2Cancer 3.0 was updated with several new features, including (i) Increased cancer-associated lncRNA entries over the previous version. The current release includes 9254 lncRNA-cancer associations, with 2659 lncRNAs and 216 cancer subtypes. (ii) Newly adding 1049 experimentally supported circRNA-cancer associations, with 743 circRNAs and 70 cancer subtypes. (iii) Experimentally supported regulatory mechanisms of cancer-related lncRNAs and circRNAs, involving microRNAs, transcription factors (TF), genetic variants, methylation and enhancers were included. (iv) Appending experimentally supported biological functions of cancer-related lncRNAs and circRNAs including cell growth, apoptosis, autophagy, epithelial mesenchymal transformation (EMT), immunity and coding ability. (v) Experimentally supported clinical relevance of cancer-related lncRNAs and circRNAs in metastasis, recurrence, circulation, drug resistance, and prognosis was included. Additionally, two flexible online tools, including RNA-seq and scRNA-seq web tools, were developed to enable fast and customizable analysis and visualization of lncRNAs in cancers. Lnc2Cancer 3.0 is a valuable resource for elucidating the associations between lncRNA, circRNA and cancer.",2021-01-01 +36303746,GeneCloudOmics: A Data Analytic Cloud Platform for High-Throughput Gene Expression Analysis.,"Gene expression profiling techniques, such as DNA microarray and RNA-Sequencing, have provided significant impact on our understanding of biological systems. They contribute to almost all aspects of biomedical research, including studying developmental biology, host-parasite relationships, disease progression and drug effects. However, the high-throughput data generations present challenges for many wet experimentalists to analyze and take full advantage of such rich and complex data. Here we present GeneCloudOmics, an easy-to-use web server for high-throughput gene expression analysis that extends the functionality of our previous ABioTrans with several new tools, including protein datasets analysis, and a web interface. GeneCloudOmics allows both microarray and RNA-Seq data analysis with a comprehensive range of data analytics tools in one package that no other current standalone software or web-based tool can do. In total, GeneCloudOmics provides the user access to 23 different data analytical and bioinformatics tasks including reads normalization, scatter plots, linear/non-linear correlations, PCA, clustering (hierarchical, k-means, t-SNE, SOM), differential expression analyses, pathway enrichments, evolutionary analyses, pathological analyses, and protein-protein interaction (PPI) identifications. Furthermore, GeneCloudOmics allows the direct import of gene expression data from the NCBI Gene Expression Omnibus database. The user can perform all tasks rapidly through an intuitive graphical user interface that overcomes the hassle of coding, installing tools/packages/libraries and dealing with operating systems compatibility and version issues, complications that make data analysis tasks challenging for biologists. Thus, GeneCloudOmics is a one-stop open-source tool for gene expression data analysis and visualization. It is freely available at http://combio-sifbi.org/GeneCloudOmics.",2021-11-25 +35116646,The expression and prognostic value of Src homology 2 domain-containing transforming protein C3 (SHC3) and its potential role in colorectal cancer.,"

Background

To determine the prognostic value of Src homology 2 domain-containing transforming protein C3 (SHC3) in colorectal cancer (CRC).

Methods

The pan-cancer expression of SHC3 mRNA in TCGA was analyzed using Gene_DE module in Tumor Immune Estimation Resource (TIMER) database. SHC3 mRNA expression in CRC was further analyzed by TCGA and Oncomine databases. The dataset from Kaplan-Meier Plotter (http://kmplot.com) was used to analyze the overall survival (OS) of CRC patients in relationship of SHC3 expression. SHC3 mRNA expression in the CRC HCT116 and RKO cell lines was measured by qRT-PCR. Both cell lines were transduced with shSHC3 or shCtrl lentiviruses, and the knockdown was validated by qRT-PCR and Western blotting. The effects of SHC3 knockdown were analyzed by MTT assay, Celigo-based cell counting, colony formation assay, scratch assay and Transwell migration assay.

Results

SHC3 is upregulated in tumor tissues relative to normal tissues across multiple cancer types including CRC in TCGA database, and associated with poor OS (HR =3.27, 95% CI: 1.31-8.16, log-rank P=0.0072). Consistent with this, SHC3 mRNA levels were significantly high in CRC cell lines. SHC3 knockdown in the HCT116 and RKO cells markedly reduced their proliferation and migration, and promoted apoptosis.

Conclusions

SHC3 is upregulated in CRC tissues and cell lines, and likely functions as an oncogene in CRC.",2021-07-01 +34903605,A pan-cancer immunogenomic atlas for immune checkpoint blockade immunotherapy.,"The ability to identify robust genomic signatures that predict response to immune checkpoint blockade is restricted by limited sample sizes and ungeneralizable performance across cohorts. To address these challenges, we established Cancer-Immu (http://bioinfo.vanderbilt.edu/database/Cancer-Immu/) a comprehensive platform that integrates large-scale multidimensional omics data, including genetic, bulk, and single-cell transcriptomic, proteomic, and dynamic genomic profiles, with clinical phenotypes to explore consistent and rare immunogenomic connections. Currently Cancer-Immu has incorporated data for 3,652 samples for 16 cancer types. It provides easy access to immunogenomic data and empowers researchers to translate omics datasets into biological insights and clinical applications.",2021-12-13 +,1121. Implementation and Evaluation of a Virtual Microbiology Laboratory for Pharmacy Students,"Abstract

Background

Health professions students learn microbiology concepts during in-person laboratories (labs). While highly rated by students, labs are extremely resource- and time-intensive. A virtual lab may minimize resource use while maintaining educational value. We report on the implementation and evaluation of a virtual lab designed to teach clinical microbiology to pharmacy students during an infectious diseases course.

Methods

We created a video in our clinical microbiology lab to depict the steps involved in processing and analyzing a patient sample. We also designed 2 web-based, interactive modules for students to practice lab techniques, such as virtually streaking an agar plate. Students viewed the video and completed the modules prior to attending a 2-hour in-person, case-based, small group discussion on higher-order clinical microbiology concepts. All students were invited to complete a post-session evaluation that assessed achievement of session objectives.

Results

Sixty-nine students (65%) completed the survey. Students highly rated the video, modules, and in-class cases (Table 1). Fewer students felt confident explaining the clinical microbiology process, compared to selecting antibiotics, interpreting cultures, explaining Gram stains, and interpreting an antibiogram (Table 2). Student comments highlighted the value of the video, modules, and instructor facilitation during the in-class session. Students also suggested improvements with the module user interface and reinforcement of certain topics (e.g. clinical breakpoints) during the in-class session. Table 1: Student Ratings of the Quality of Instructional Materials Table 2: Student Self-Reported Agreement with Achievement of Session Objectives

Conclusion

We demonstrated successful implementation a virtual microbiology lab within a pharmacy course. Overall student ratings of materials were favorable. We plan to refine and re-offer the virtual micro lab next year and measure its association with student performance. To facilitate the adaptation of this virtual lab by other schools, our teaching materials are available for use via https://vimeo.com/390087512 (video) and http://tiny.ucsf.edu/atlas (modules).

Disclosures

All Authors: No reported disclosures",2020-10-01 +33156326,MetaNetX/MNXref: unified namespace for metabolites and biochemical reactions in the context of metabolic models.,"MetaNetX/MNXref is a reconciliation of metabolites and biochemical reactions providing cross-links between major public biochemistry and Genome-Scale Metabolic Network (GSMN) databases. The new release brings several improvements with respect to the quality of the reconciliation, with particular attention dedicated to preserving the intrinsic properties of GSMN models. The MetaNetX website (https://www.metanetx.org/) provides access to the full database and online services. A major improvement is for mapping of user-provided GSMNs to MXNref, which now provides diagnostic messages about model content. In addition to the website and flat files, the resource can now be accessed through a SPARQL endpoint (https://rdf.metanetx.org).",2021-01-01 +34877793,GEPSdb: The Gene Expression Database of Poplar under Stress.,"As a model tree species, poplar (Populus L.) has important economic and ecological value. Here, we constructed the GEPSdb (Gene Expression Database of Poplar under Stress; http://gepsdb.ahau-edu.cn/), which is an integrated database of poplar gene expression profiles derived from RNA-seq and microarray library data. This database provides a comprehensive collection of gene expression data from poplar exposed to 14 types of environmental stress from 11 high-quality RNA-seq experiments and 51 microarray libraries. The GEPSdb includes 56 genes from previous literature that have been examined in poplar and functionally verified. By incorporating data from numerous expression analyses, GEPSdb provides a user-friendly web interface for querying, browsing, and visualizing the expression profiles of related genes. Consequently, GEPSdb can be used to link transcription data with phenotypes and can enhance our understanding of important biological processes and mechanisms underlying complex agronomic traits in poplar.",2021-12-08 +34954795,CoDNaS-RNA: a database of Conformational Diversity in the Native State of RNA. ,"Conformational changes in RNA native ensembles are central to fulfill many of their biological roles. Systematic knowledge of the extent and possible modulators of this conformational diversity is desirable to better understand the relationship between RNA dynamics and function. We have developed CoDNaS-RNA as the first database of conformational diversity in RNA molecules. Known RNA structures are retrieved and clustered to identify alternative conformers of each molecule. Pairwise structural comparisons between all conformers within each cluster allows to measure the variability of the molecule. Additional annotations about structural features, molecular interactions and biological function are provided. All data in CoDNaS-RNA is free to download and available as a public website that can be of interest for researchers in computational biology and other life science disciplines. CoDNaS-RNA and the latest version of its data are available at http://ufq.unq.edu.ar/codnasrna or https://codnas-rna.bioinformatica.org/. Supplementary data are available at Bioinformatics online.",2021-12-25 +34655133,"Observed Antibody Space: A diverse database of cleaned, annotated, and translated unpaired and paired antibody sequences.","The antibody repertoires of individuals and groups have been used to explore disease states, understand vaccine responses, and drive therapeutic development. The arrival of B-cell receptor repertoire sequencing has enabled researchers to get a snapshot of these antibody repertoires, and as more data are generated, increasingly in-depth studies are possible. However, most publicly available data only exist as raw FASTQ files, making the data hard to access, process, and compare. The Observed Antibody Space (OAS) database was created in 2018 to offer clean, annotated, and translated repertoire data. In this paper, we describe an update to OAS that has been driven by the increasing volume of data and the appearance of paired (VH/VL) sequence data. OAS is now accessible via a new web server, with standardized search parameters and a new sequence-based search option. The new database provides both nucleotides and amino acids for every sequence, with additional sequence annotations to make the data Minimal Information about Adaptive Immune Receptor Repertoire compliant, and comments on potential problems with the sequence. OAS now contains 25 new studies, including severe acute respiratory syndrome coronavirus 2 data and paired sequencing data. The new database is accessible at http://opig.stats.ox.ac.uk/webapps/oas/, and all data are freely available for download.",2021-10-29 +34387941,"PharmGKB, an Integrated Resource of Pharmacogenomic Knowledge.","The Pharmacogenomics Knowledgebase (PharmGKB) is an integrated online knowledge resource for the understanding of how genetic variation contributes to variation in drug response. Our focus includes not only pharmacogenomic information useful for clinical implementation (e.g., drug dosing guidelines and annotated drug labels), but also information to catalyze scientific research and drug discovery (e.g., variant-drug annotations and drug-centered pathways). As of April 2021, the annotated content of PharmGKB spans 715 drugs, 1761 genes, 227 diseases, 165 clinical guidelines, and 784 drug labels. We have manually curated data from more than 9000 published papers to generate the content of PharmGKB. Recently, we have also implemented an automated natural language processing (NLP) tool to broaden our coverage of the pharmacogenomic literature. This article contains a basic protocol describing how to navigate the PharmGKB website to retrieve information on how genes and genetic variations affect drug efficacy and toxicity. It also includes a protocol on how to use PharmGKB to facilitate interpretation of findings for a pharmacogenomic variant genotype or metabolizer phenotype. PharmGKB is freely available at http://www.pharmgkb.org. © 2021 Wiley Periodicals LLC. Basic Protocol 1: Navigating the homepage of PharmGKB and searching by drug Basic Protocol 2: Using PharmGKB to facilitate interpretation of pharmacogenomic variant genotypes or metabolizer phenotypes.",2021-08-01 +34694049,Utilizing ClinGen gene-disease validity and dosage sensitivity curations to inform variant classification.,"Understanding whether there is enough evidence to implicate a gene's role in a given disease, as well as the mechanisms by which variants in this gene might cause this disease, is essential to determine clinical relevance. The National Institutes of Health-funded Clinical Genome Resource (ClinGen) has developed evaluation frameworks to assess both the strength of evidence supporting a relationship between a gene and disease (gene-disease validity), and whether loss (haploinsufficiency) or gain (triplosensitivity) of individual genes or genomic regions is a mechanism for disease (dosage sensitivity). ClinGen actively applies these frameworks across multiple disease domains, and makes this information publicly available via its website (https://www.clinicalgenome.org/) for use in multiple applications, including clinical variant classification. Here, we describe how the results of these curation processes can be utilized to inform the appropriate application of pathogenicity criteria for both sequence and copy number variants, as well as to guide test development and inform genomic filtering pipelines.",2021-11-15 +32291734,"The Auditory English Lexicon Project: A multi-talker, multi-region psycholinguistic database of 10,170 spoken words and nonwords.","The Auditory English Lexicon Project (AELP) is a multi-talker, multi-region psycholinguistic database of 10,170 spoken words and 10,170 spoken nonwords. Six tokens of each stimulus were recorded as 44.1-kHz, 16-bit, mono WAV files by native speakers of American, British, and Singapore English, with one from each gender. Intelligibility norms, as determined by average identification scores and confidence ratings from between 15 and 20 responses per token, were obtained from 561 participants. Auditory lexical decision accuracies and latencies, with between 25 and 36 responses per token, were obtained from 438 participants. The database also includes a variety of lexico-semantic variables and structural indices for the words and nonwords, as well as participants' individual difference measures such as age, gender, language background, and proficiency. Taken together, there are a total of 122,040 sound files and over 4 million behavioral data points in the AELP. We describe some of the characteristics of this database. This resource is freely available from a website ( https://inetapps.nus.edu.sg/aelp/ ) hosted by the Department of Psychology at the National University of Singapore.",2020-10-01 +,widgetcon: A website and program for quick conversion among common population genetic data formats,"One of the most tedious steps in genetic data analyses is the reformatting data generated with one program for use with other applications. This conversion is necessary because comprehensive evaluation of the data may be based on different algorithms included in diverse software, each requiring a distinct input format. A platform‐independent and freely available program or a web‐based tool dedicated to such reformatting can save time and efforts in data processing. Here, we report widgetcon, a website and a program which has been developed to quickly and easily convert among various molecular data formats commonly used in phylogenetic analysis, population genetics, and other fields. The web‐based service is available at https://www.widgetcon.net. The program and the website convert the major data formats in four basic steps in less than a minute. The resource will be a useful tool for the research community and can be updated to include more formats and features in the future.",2019-09-01 +,52. BrMPANEL: A PUBLIC RESOURCE OF ORGANOTROPIC CELL LINES,"Abstract Central nervous system (CNS), notably brain, metastases are most prevalent in lung cancer (20–56% of patients), breast cancer (5–20%) and melanoma (7–16%). Lesions occur in both the brain parenchyma and the meninges. To mechanistically understand CNS metastasis formation and develop preventive and therapeutic strategies, it is essential to use model systems that, as much as possible, faithfully recapitulate the clinical disease process. Furthermore, the complexities of brain metastases dictate that studies should utilize multiple model systems in various stages of brain metastases progression. To facilitate brain metastasis research, 19 laboratories around the world have compiled comprehensive information on their brain metastasis mouse models. Each lab has provided details on the cell lines that they have generated or characterized as being capable of forming metastatic colonies in the brain, as well as principle methodologies of brain metastasis research. This Brain Metastasis Cell Lines Panel (BrMPanel, https://apps.cnio.es/app/BrainMetastasis/CellLines) represents the first of its class and includes information about each cell line, how tropism to the brain was established, and the behavior of each model in vivo. The BrMPanel is composed of 60 cell lines, derived from patients (32 cell lines, 53%), mouse (27, 45%) or rat (1, 2%), and represent the three main cancer types that result in brain metastasis: breast cancer (38 cell lines, 63%), lung cancer (8, 13%) and melanoma (14, 23%). This resource is intended to assist investigators in choosing the most suitable model for research on brain metastasis, and is available to the entire scientific community. The ultimate goal of this effort is to facilitate research on this unmet clinical need, to improve models through a collaborative environment, and to promote the exchange of information on these valuable resources. We invite other collaborators to contribute their models to the BrMPanel to grow this resource.",2020-08-01 +33193666,HpeNet: Co-expression Network Database for de novo Transcriptome Assembly of Paeonia lactiflora Pall.,"The herbaceous peony (Paeonia lactiflora Pall.) is a well-known ornamental flowering and pharmaceutical plant found in China. Its high medicinal value has long been recognized by traditional Chinese medicine (as Radix paeoniae Alba and Radix paeoniae Rubra), and it has become economically valued for its oilseed in recent years; like other Paeonia species, it has been identified as a novel resource for the α-linolenic acid used in seed oil production. However, its genome has not yet been sequenced, and little transcriptome data on Paeonia lactiflora are available. To obtain a comprehensive transcriptome for Paeonia lactiflora, RNAs from 10 tissues of the Paeonia lactiflora Pall. cv Shaoyou17C were used for de novo assembly, and 416,062 unigenes were obtained. Using a homology search, it was found that 236,222 (approximately 57%) unigenes had at least one BLAST hit in one or more public data resources. The construction of co-expression networks is a feasible means for improving unigene annotation. Using in-house transcriptome data, we obtained a co-expression network covering 95.13% of the unigenes. Then we integrated co-expression network analyses and lipid-related pathway genes to study lipid metabolism in Paeonia lactiflora cultivars. Finally, we constructed the online database HpeNet (http://bioinformatics.cau.edu.cn/HpeNet) to integrate transcriptome data, gene information, the co-expression network, and so forth. The database can also be searched for gene details, gene functions, orthologous matches, and other data. Our online database may help the research community identify functional genes and perform research on Paeonia lactiflora more conveniently. We hope that de novo transcriptome assembly, combined with co-expression networks, can provide a feasible means to predict the gene function of species that do not have a reference genome.",2020-10-21 +35153418,Culture of spermatogonial stem cells and use of surrogate sires as a breeding technology to propagate superior genetics in livestock production: A systematic review.,"

Background and aim

Spermatogonial stem cells (SSCs) have previously been isolated from animals' testes, cultured in vitro, and successfully transplanted into compatible recipients. The SSC unique characteristic has potential for exploitation as a reproductive tool and this can be achieved through SSC intratesticular transplantation to surrogate sires. Here, we aimed at comprehensively analyzing published data on in vitro maintenance of SSC isolated from the testes of livestock animals and their applications.

Materials and methods

The literature search was performed in PubMed, Science Direct, and Google Scholar electronic databases. Data screening was conducted using Rayyan Intelligent Systematic Review software (https://www.rayyan.ai/). Duplicate papers were excluded from the study. Abstracts were read and relevant full papers were reviewed for data extraction.

Results

From a total of 4786 full papers screened, data were extracted from 93 relevant papers. Of these, eight papers reported on long-term culture conditions (>1 month) for SSC in different livestock species, 22 papers on short-term cultures (5-15 days), 10 papers on transfection protocols, 18 papers on transplantation using different methods of preparation of livestock recipients, and five papers on donor-derived spermatogenesis.

Conclusion

Optimization of SSC long-term culture systems has renewed the possibilities of utilization of these cells in gene-editing technologies to develop transgenic animals. Further, the development of genetically deficient recipients in the endogenous germline layer lends to a future possibility for the utilization of germ cell transplantation in livestock systems.",2021-12-31 +32556221,The OMEGA-NET International Inventory of Occupational Cohorts.,"In a recent count of cohort studies in Europe capturing information on occupation and/or occupational exposures, we estimated that there are more than 60 major studies with some type of occupational information that enrolled over 30 million persons. With few exceptions there have been no large-scale analyses systematically combining cohorts from this extraordinary resource. We present the development of an inventory of cohorts with occupational information in Europe and internationally and describe the online interactive tool with detailed information on existing cohorts. The OMEGA-NET inventory can be accessed at http://occupationalcohorts.net/ includes cohorts, case-control studies nested within cohorts and intervention studies that are active or can substantiate that their data are potentially accessible; that include data on occupation and/or industry or at least one occupational exposure; and that have at least one follow-up, either already conducted or planned. We expect that this open access inventory will be an important prerequisite for use of this resource of existing studies for research and policy development.",2020-07-01 +33276297,DBCOVP: A database of coronavirus virulent glycoproteins.,"Since the emergence of SARS-CoV-1 (2002), novel coronaviruses have emerged periodically like the MERS- CoV (2012) and now, the SARS-CoV-2 outbreak which has posed a global threat to public health. Although, this is the third zoonotic coronavirus breakout within the last two decades, there are only a few platforms that provide information about coronavirus genomes. None of them is specific for the virulence glycoproteins and complete sequence-structural features of these virulence factors across the betacoronavirus family including SARS-CoV-2 strains are lacking. Against this backdrop, we present DBCOVP (http://covp.immt.res.in/), the first manually-curated, web-based resource to provide extensive information on the complete repertoire of structural virulent glycoproteins from coronavirus genomes belonging to betacoronavirus genera. The database provides various sequence-structural properties in which users can browse and analyze information in different ways. Furthermore, many conserved T-cell and B-cell epitopes predicted for each protein are present that may perform a significant role in eliciting the humoral and cellular immune response. The tertiary structure of the epitopes together with the docked epitope-HLA binding-complex is made available to facilitate further analysis. DBCOVP presents an easy-to-use interface with in-built tools for similarity search, cross-genome comparison, phylogenetic, and multiple sequence alignment. DBCOVP will certainly be an important resource for experimental biologists engaged in coronavirus research studies and will aid in vaccine development.",2020-11-21 +33027504,PredHPI: an integrated web server platform for the detection and visualization of host-pathogen interactions using sequence-based methods.,"

Motivation

Understanding the mechanisms underlying infectious diseases is fundamental to develop prevention strategies. Host-pathogen interactions (HPIs) are actively studied worldwide to find potential genomic targets for the development of novel drugs, vaccines and other therapeutics. Determining which proteins are involved in the interaction system behind an infectious process is the first step to develop an efficient disease control strategy. Very few computational methods have been implemented as web services to infer novel HPIs, and there is not a single framework which combines several of those approaches to produce and visualize a comprehensive analysis of HPIs.

Results

Here, we introduce PredHPI, a powerful framework that integrates both the detection and visualization of interaction networks in a single web service, facilitating the apprehension of model and non-model host-pathogen systems to aid the biologists in building hypotheses and designing appropriate experiments. PredHPI is built on high-performance computing resources on the backend capable of handling proteome-scale sequence data from both the host as well as pathogen. Data are displayed in an information-rich and interactive visualization, which can be further customized with user-defined layouts. We believe PredHPI will serve as an invaluable resource to diverse experimental biologists and will help advance the research in the understanding of complex infectious diseases.

Availability and implementation

PredHPI tool is freely available at http://bioinfo.usu.edu/PredHPI/.

Supplementary information

Sup plementary data are available at Bioinformatics online.",2021-05-01 +31733063,Genome3D: integrating a collaborative data pipeline to expand the depth and breadth of consensus protein structure annotation.,"Genome3D (https://www.genome3d.eu) is a freely available resource that provides consensus structural annotations for representative protein sequences taken from a selection of model organisms. Since the last NAR update in 2015, the method of data submission has been overhauled, with annotations now being 'pushed' to the database via an API. As a result, contributing groups are now able to manage their own structural annotations, making the resource more flexible and maintainable. The new submission protocol brings a number of additional benefits including: providing instant validation of data and avoiding the requirement to synchronise releases between resources. It also makes it possible to implement the submission of these structural annotations as an automated part of existing internal workflows. In turn, these improvements facilitate Genome3D being opened up to new prediction algorithms and groups. For the latest release of Genome3D (v2.1), the underlying dataset of sequences used as prediction targets has been updated using the latest reference proteomes available in UniProtKB. A number of new reference proteomes have also been added of particular interest to the wider scientific community: cow, pig, wheat and mycobacterium tuberculosis. These additions, along with improvements to the underlying predictions from contributing resources, has ensured that the number of annotations in Genome3D has nearly doubled since the last NAR update article. The new API has also been used to facilitate the dissemination of Genome3D data into InterPro, thereby widening the visibility of both the annotation data and annotation algorithms.",2020-01-01 +33981815,A spatiotemporal dataset for integrated assessment and modelling of crop-livestock integration with the MAELIA simulation platform.,"The general purpose of the primary and secondary data available in this article is to support an integrated assessment of scenarios of crop-livestock integration at the territorial level i.e. of exchanges between arable and livestock farms. The data is a result of a research collaboration between the scientist from INRAE, agricultural advisers from Chamber of Agriculture of Pays de la Loire (CRAPL) and a collective of five arable and two livestock farmers located in the district of Pays de Pouzauges (Vendée department, western France). All participants formed part of the DiverIMPACTS project (https://www.diverimpacts.net/) that aims to achieve the full potential of diversification of cropping systems for improved productivity, delivery of ecosystem services and resource-efficient and sustainable value chains in Europe. The first dataset corresponds to the inputs of MAELIA (http://maelia-platform.inra.fr/), a spatial agent-based simulation platform that was used to support an iterative design and assessment of scenarios to redesign cropping systems. The second dataset corresponds to the outputs of MAELIA simulations and the associated indicators at the farm, group and territory level. The data comprise multiple shape and csv files characterizing the edaphic-climatic heterogeneity of the territory and cropping systems, farmers' crop management rules (IF-THEN rules) and general information about the farms (e.g. crops, agricultural equipment, average crop yields). Data is reported for the baseline situation and three exchange scenarios containing different innovative cropping systems co-designed by scientists, agricultural advisers and the farmers. The data presented here can be found in the Portail Data INRA repository (https://doi.org/10.15454/3ZTCF5) and were used in the research article ""Fostering local crop-livestock integration via legume exchanges using an innovative integrated assessment and modelling approach: MAELIA"" [1].",2021-04-01 +33270889,MethHC 2.0: information repository of DNA methylation and gene expression in human cancer.,"DNA methylation is an important epigenetic regulator in gene expression and has several roles in cancer and disease progression. MethHC version 2.0 (MethHC 2.0) is an integrated and web-based resource focusing on the aberrant methylomes of human diseases, specifically cancer. This paper presents an updated implementation of MethHC 2.0 by incorporating additional DNA methylomes and transcriptomes from several public repositories, including 33 human cancers, over 50 118 microarray and RNA sequencing data from TCGA and GEO, and accumulating up to 3586 manually curated data from >7000 collected published literature with experimental evidence. MethHC 2.0 has also been equipped with enhanced data annotation functionality and a user-friendly web interface for data presentation, search, and visualization. Provided features include clinical-pathological data, mutation and copy number variation, multiplicity of information (gene regions, enhancer regions, and CGI regions), and circulating tumor DNA methylation profiles, available for research such as biomarker panel design, cancer comparison, diagnosis, prognosis, therapy study and identifying potential epigenetic biomarkers. MethHC 2.0 is now available at http://awi.cuhk.edu.cn/∼MethHC.",2021-01-01 +33973408,"hu.MAP 2.0: integration of over 15,000 proteomic experiments builds a global compendium of human multiprotein assemblies.","A general principle of biology is the self-assembly of proteins into functional complexes. Characterizing their composition is, therefore, required for our understanding of cellular functions. Unfortunately, we lack knowledge of the comprehensive set of identities of protein complexes in human cells. To address this gap, we developed a machine learning framework to identify protein complexes in over 15,000 mass spectrometry experiments which resulted in the identification of nearly 7,000 physical assemblies. We show our resource, hu.MAP 2.0, is more accurate and comprehensive than previous state of the art high-throughput protein complex resources and gives rise to many new hypotheses, including for 274 completely uncharacterized proteins. Further, we identify 253 promiscuous proteins that participate in multiple complexes pointing to possible moonlighting roles. We have made hu.MAP 2.0 easily searchable in a web interface (http://humap2.proteincomplexes.org/), which will be a valuable resource for researchers across a broad range of interests including systems biology, structural biology, and molecular explanations of disease.",2021-05-01 +34314492,"Enhancing the interoperability of glycan data flow between ChEBI, PubChem and GlyGen.","Glycans play a vital role in health, disease, bioenergy, biomaterials and bio-therapeutics. As a result, there is keen interest to identify and increase glycan data in bioinformatics databases like ChEBI and PubChem, and connecting them to resources at the EMBL-EBI and NCBI to facilitate access to important annotations at a global level. GlyTouCan is a comprehensive archival database that contains glycans obtained primarily through batch upload from glycan repositories, glycoprotein databases and individual laboratories. In many instances, the glycan structures deposited in GlyTouCan may not be fully defined or have supporting experimental evidence and citations. Databases like ChEBI and PubChem were designed to accommodate complete atomistic structures with well-defined chemical linkages. As a result, they cannot easily accommodate the structural ambiguity inherent in glycan databases. Consequently, there is a need to improve the organization of glycan data coherently to enhance connectivity across the major NCBI, EMBL-EBI and glycoscience databases. This paper outlines a workflow developed in collaboration between GlyGen, ChEBI and PubChem to improve the visibility and connectivity of glycan data across these resources. GlyGen hosts a subset of glycans (~29,000) from the GlyTouCan database and has submitted valuable glycan annotations to the PubChem database and integrated over 10,500 (including ambiguously defined) glycans into the ChEBI database. The integrated glycans were prioritized based on links to PubChem and connectivity to glycoprotein data. The pipeline provides a blueprint for how glycan data can be harmonized between different resources. The current PubChem, ChEBI and GlyTouCan mappings can be downloaded from GlyGen (https://data.glygen.org).",2021-12-01 +34795092,Intelligent Integrative Platform for Sharing Heterogenuous Stem Cell Research Data.,"Recent studies demonstrated that comparative analysis of stem cell research data sets originating from multiple studies can produce new information and help with hypotheses generation. Effective approaches for incorporating multiple diverse heterogeneous data sets collected from stem cell projects into a harmonized project-based framework have been lacking. Here, we provide an intelligent informatics solution for integrating comprehensive characterizations of stem cells with research subject and project outcome information. Our platform is the first to seamlessly integrate information from iPSCs and cancer stem cell research into a single platform, using a multi-modular common data element framework. Heterogeneous data is validated using predefined ontologies and stored in a relational database, to ensure data quality and ease of access. Testing was performed using 103 published, publicly-available iPSC and cancer stem cell projects conducted in clinical, preclinical and in vitro evaluations. We validated the robustness of the platform, by seamlessly harmonizing diverse data elements, and demonstrated its potential for knowledge generation through the aggregation and harmonization of data. Future aims of this project include increasing the database size using crowdsourcing and natural language processing functionalities. The platform is publicly available at https://remedy.mssm.edu/.",2021-11-01 +34819397,Q-omics: Smart Software for Assisting Oncology and Cancer Research.,"The rapid increase in collateral omics and phenotypic data has enabled data-driven studies for the fast discovery of cancer targets and biomarkers. Thus, it is necessary to develop convenient tools for general oncologists and cancer scientists to carry out customized data mining without computational expertise. For this purpose, we developed innovative software that enables user-driven analyses assisted by knowledge-based smart systems. Publicly available data on mutations, gene expression, patient survival, immune score, drug screening and RNAi screening were integrated from the TCGA, GDSC, CCLE, NCI, and DepMap databases. The optimal selection of samples and other filtering options were guided by the smart function of the software for data mining and visualization on Kaplan-Meier plots, box plots and scatter plots of publication quality. We implemented unique algorithms for both data mining and visualization, thus simplifying and accelerating user-driven discovery activities on large multiomics datasets. The present Q-omics software program (v0.95) is available at http://qomics.sookmyung.ac.kr.",2021-11-01 +32976589,DIGGER: exploring the functional role of alternative splicing in protein interactions.,"Alternative splicing plays a major role in regulating the functional repertoire of the proteome. However, isoform-specific effects to protein-protein interactions (PPIs) are usually overlooked, making it impossible to judge the functional role of individual exons on a systems biology level. We overcome this barrier by integrating protein-protein interactions, domain-domain interactions and residue-level interactions information to lift exon expression analysis to a network level. Our user-friendly database DIGGER is available at https://exbio.wzw.tum.de/digger and allows users to seamlessly switch between isoform and exon-centric views of the interactome and to extract sub-networks of relevant isoforms, making it an essential resource for studying mechanistic consequences of alternative splicing.",2021-01-01 +32778890,SurvivalMeth: a web server to investigate the effect of DNA methylation-related functional elements on prognosis. ,"Aberrant DNA methylation is a fundamental characterization of epigenetics for carcinogenesis. Abnormality of DNA methylation-related functional elements (DMFEs) may lead to dysfunction of regulatory genes in the progression of cancers, contributing to prognosis of many cancers. There is an urgent need to construct a tool to comprehensively assess the impact of DMFEs on prognosis. Therefore, we developed SurvivalMeth (http://bio-bigdata.hrbmu.edu.cn/survivalmeth) to explore the prognosis-related DMFEs, which documented many kinds of DMFEs, including 309,465 CpG island-related elements, 104,748 transcript-related elements, 77,634 repeat elements, as well as cell-type specific 1,689,653 super enhancers (SE) and 1,304,902 CTCF binding regions for analysis. SurvivalMeth is a convenient tool which collected DNA methylation profiles of 36 cancers and allowed users to query their genes of interest in different datasets for prognosis. Furthermore, SurvivalMeth not only integrated different combinations, including single DMFE, multiple DMFEs, SEs and clinical data, to perform survival analysis on preupload data but also allowed for uploading customized DNA methylation profile of DMFEs from various diseases to analyze. SurvivalMeth provided a comprehensive resource and automated analysis for prognostic DMFEs, including DMFE methylation level, correlation analysis, clinical analysis, differential analysis, DMFE annotation, survival-related detailed result and visualization of survival analysis. In summary, we believe that SurvivalMeth will facilitate prognostic research of DMFEs in diverse cancers.",2021-05-01 +32778839,Combined proximity labeling and affinity purification-mass spectrometry workflow for mapping and visualizing protein interaction networks.,"Affinity purification coupled with mass spectrometry (AP-MS) and proximity-dependent biotinylation identification (BioID) methods have made substantial contributions to interaction proteomics studies. Whereas AP-MS results in the identification of proteins that are in a stable complex, BioID labels and identifies proteins that are in close proximity to the bait, resulting in overlapping yet distinct protein identifications. Integration of AP-MS and BioID data has been shown to comprehensively characterize a protein's molecular context, but interactome analysis using both methods in parallel is still labor and resource intense with respect to cell line generation and protein purification. Therefore, we developed the Multiple Approaches Combined (MAC)-tag workflow, which allows for both AP-MS and BioID analysis with a single construct and with almost identical protein purification and mass spectrometry (MS) identification procedures. We have applied the MAC-tag workflow to a selection of subcellular markers to provide a global view of the cellular protein interactome landscape. This localization database is accessible via our online platform ( http://proteomics.fi ) to predict the cellular localization of a protein of interest (POI) depending on its identified interactors. In this protocol, we present the detailed three-stage procedure for the MAC-tag workflow: (1) cell line generation for the MAC-tagged POI; (2) parallel AP-MS and BioID protein purification followed by MS analysis; and (3) protein interaction data analysis, data filtration and visualization with our localization visualization platform. The entire procedure can be completed within 25 d.",2020-08-10 +32558264,Insights from 20 years of the Molecule of the Month.,"For 20 years, Molecule of the Month articles have highlighted the functional stories of 3D structures found in the Protein Data Bank (PDB). The PDB is the primary archive of atomic structures of biological molecules, currently providing open access to more than 150,000 structures studied by researchers around the world. The wealth of knowledge embodied in this resource is remarkable, with structures that allow exploration of nearly any biomolecular topic, including the basic science of genetic mechanisms, mechanisms of photosynthesis and bioenergetics, and central biomedical topics like cancer therapy and the fight against infectious disease. The central motivation behind the Molecule of the Month is to provide a user-friendly introduction to this rich body of data, charting a path for users to get started with finding and exploring the many available structures. The Molecule of the Month and related materials are updated regularly at the education portal PDB-101 (http://pdb101.rcsb.org/), offering an ongoing resource for molecular biology educators and students around the world.",2020-06-17 +31665503,BBCancer: an expression atlas of blood-based biomarkers in the early diagnosis of cancers.,"The early detection of cancer holds the key to combat and control the increasing global burden of cancer morbidity and mortality. Blood-based screenings using circulating DNAs (ctDNAs), circulating RNA (ctRNAs), circulating tumor cells (CTCs) and extracellular vesicles (EVs) have shown promising prospects in the early detection of cancer. Recent high-throughput gene expression profiling of blood samples from cancer patients has provided a valuable resource for developing new biomarkers for the early detection of cancer. However, a well-organized online repository for these blood-based high-throughput gene expression data is still not available. Here, we present BBCancer (http://bbcancer.renlab.org/), a web-accessible and comprehensive open resource for providing the expression landscape of six types of RNAs, including messenger RNAs (mRNAs), long noncoding RNAs (lncRNAs), microRNAs (miRNAs), circular RNAs (circRNAs), tRNA-derived fragments (tRFRNAs) and Piwi-interacting RNAs (piRNAs) in blood samples, including plasma, CTCs and EVs, from cancer patients with various cancer types. Currently, BBCancer contains expression data of the six RNA types from 5040 normal and tumor blood samples across 15 cancer types. We believe this database will serve as a powerful platform for developing blood biomarkers.",2020-01-01 +34992626,CottonGVD: A Comprehensive Genomic Variation Database for Cultivated Cottons.,"Cultivated cottons are the most important economic crop, which produce natural fiber for the textile industry. In recent years, the genetic basis of several essential traits for cultivated cottons has been gradually elucidated by decoding their genomic variations. Although an abundance of resequencing data is available in public, there is still a lack of a comprehensive tool to exhibit the results of genomic variations and genome-wide association study (GWAS). To assist cotton researchers in utilizing these data efficiently and conveniently, we constructed the cotton genomic variation database (CottonGVD; http://120.78.174.209/ or http://db.cngb.org/cottonGVD). This database contains the published genomic information of three cultivated cotton species, the corresponding population variations (SNP and InDel markers), and the visualized results of GWAS for major traits. Various built-in genomic tools help users retrieve, browse, and query the variations conveniently. The database also provides interactive maps (e.g., Manhattan map, scatter plot, heatmap, and linkage disequilibrium block) to exhibit GWAS and expression GWAS results. Cotton researchers could easily focus on phenotype-associated loci visualization, and they are interested in and screen for candidate genes. Moreover, CottonGVD will continue to update by adding more data and functions.",2021-12-21 +33423696,COCONUT online: Collection of Open Natural Products database.,"Natural products (NPs) are small molecules produced by living organisms with potential applications in pharmacology and other industries as many of them are bioactive. This potential raised great interest in NP research around the world and in different application fields, therefore, over the years a multiplication of generalistic and thematic NP databases has been observed. However, there is, at this moment, no online resource regrouping all known NPs in just one place, which would greatly simplify NPs research and allow computational screening and other in silico applications. In this manuscript we present the online version of the COlleCtion of Open Natural prodUcTs (COCONUT): an aggregated dataset of elucidated and predicted NPs collected from open sources and a web interface to browse, search and easily and quickly download NPs. COCONUT web is freely available at https://coconut.naturalproducts.net .",2021-01-10 +34710585,Cross-scanner reproducibility and harmonization of a diffusion MRI structural brain network: A traveling subject study of multi-b acquisition.,"Characterization of brain networks by diffusion MRI (dMRI) has rapidly evolved, and there are ongoing movements toward data sharing and multi-center studies. To extract meaningful information from multi-center data, methods to correct for the bias caused by scanner differences, that is, harmonization, are urgently needed. In this work, we report the cross-scanner differences in structural network analyses using data from nine traveling subjects (four males and five females, 21-49 years-old) who underwent scanning using four 3T scanners (public database available from the Brain/MINDS Beyond Human Brain MRI project (http://mriportal.umin.jp/)). The reliability and reproducibility were compared to those of data from another set of four subjects (all males, 29-42 years-old) who underwent scan-rescan (interval, 105-147 days) with the same scanner as well as scan-rescan data from the Human Connectome Project database. The results demonstrated that the reliability of the edge weights and graph theory metrics was lower for data including different scanners, compared to the scan-rescan with the same scanner. Besides, systematic differences between scanners were observed, indicating the risk of bias in comparing networks obtained from different scanners directly. We further demonstrate that it is feasible to reduce inter-scanner variabilities while preserving the inter-subject differences among healthy individuals by modeling the scanner effects at the level of network matrices, when traveling-subject data are available for calibration between scanners. The present data and results are expected to serve as a basis for developing and evaluating novel harmonization methods.",2021-10-26 +31713618,LncTarD: a manually-curated database of experimentally-supported functional lncRNA-target regulations in human diseases.,"Long non-coding RNAs (lncRNAs) are associated with human diseases. Although lncRNA-disease associations have received significant attention, no online repository is available to collect lncRNA-mediated regulatory mechanisms, key downstream targets, and important biological functions driven by disease-related lncRNAs in human diseases. We thus developed LncTarD (http://biocc.hrbmu.edu.cn/LncTarD/ or http://bio-bigdata.hrbmu.edu.cn/LncTarD), a manually-curated database that provides a comprehensive resource of key lncRNA-target regulations, lncRNA-influenced functions, and lncRNA-mediated regulatory mechanisms in human diseases. LncTarD offers (i) 2822 key lncRNA-target regulations involving 475 lncRNAs and 1039 targets associated with 177 human diseases; (ii) 1613 experimentally-supported functional regulations and 1209 expression associations in human diseases; (iii) important biological functions driven by disease-related lncRNAs in human diseases; (iv) lncRNA-target regulations responsible for drug resistance or sensitivity in human diseases and (v) lncRNA microarray, lncRNA sequence data and transcriptome data of an 11 373 pan-cancer patient cohort from TCGA to help characterize the functional dynamics of these lncRNA-target regulations. LncTarD also provides a user-friendly interface to conveniently browse, search, and download data. LncTarD will be a useful resource platform for the further understanding of functions and molecular mechanisms of lncRNA deregulation in human disease, which will help to identify novel and sensitive biomarkers and therapeutic targets.",2020-01-01 +32738156,Predicted yeast interactome and network-based interpretation of transcriptionally changed genes.,"Saccharomyces cerevisiae, budding yeast, is a widely used model organism and research tool in genetics studies. Many efforts have been directed at constructing a high-quality comprehensive molecular interaction network to elucidate the design logic of the gene circuitries in this classic model organism. In this work, we present the yeast interactome resource (YIR), which includes 22,238 putative functional gene interactions inferred from functional gene association data integrated from 10 databases focusing on diverse functional perspectives. These putative functional gene interactions are expected to cover 18.84% of yeast protein interactions, and 38.49% may represent protein interactions. Based on the YIR, a gene set linkage analysis (GSLA) web tool was developed to annotate the potential functional impacts of a set of transcriptionally changed genes. In a case study, we show that the YIR/GSLA system produced more extensive and concise annotations compared with widely used gene set annotation tools, including PANTHER and DAVID. Both YIR and GSLA are accessible through the website http://yeast.biomedtzc.cn.",2020-08-11 +34316700,On the impact of batch effect correction in TCGA isomiR expression data.,"MicroRNAs (miRNAs) are small non-coding RNAs with diverse functions in post-transcriptional regulation of gene expression. Sequence and length variants of miRNAs are called isomiRs and can exert different functions compared to their canonical counterparts. The Cancer Genome Atlas (TCGA) provides isomiR-level expression data for patients of various cancer entities collected in a multi-center approach over several years. However, the impact of batch effects within individual cohorts has not been systematically investigated and corrected for before. Therefore, the aim of this study was to identify relevant cohort-specific batch variables and generate batch-corrected isomiR expression data for 16 TCGA cohorts. The main batch variables included sequencing platform, plate, sample purity and sequencing depth. Platform bias was related to certain length and sequence features of individual recurrently affected isomiRs. Furthermore, significant downregulation of reported tumor suppressive isomiRs in lung tumor tissue compared to normal samples was only observed after batch correction, highlighting the importance of working with corrected data. Batch-corrected datasets for all cohorts including quality control are provided as supplement. In summary, this study reveals that batch effects present in the TCGA dataset might mask biologically relevant effects and provides a valuable resource for research on isomiRs in cancer (accessible through GEO: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE164767).",2021-03-11 +32795611,DPHL: A DIA Pan-human Protein Mass Spectrometry Library for Robust Biomarker Discovery.,"To address the increasing need for detecting and validating protein biomarkers in clinical specimens, mass spectrometry (MS)-based targeted proteomic techniques, including the selected reaction monitoring (SRM), parallel reaction monitoring (PRM), and massively parallel data-independent acquisition (DIA), have been developed. For optimal performance, they require the fragment ion spectra of targeted peptides as prior knowledge. In this report, we describe a MS pipeline and spectral resource to support targeted proteomics studies for human tissue samples. To build the spectral resource, we integrated common open-source MS computational tools to assemble a freely accessible computational workflow based on Docker. We then applied the workflow to generate DPHL, a comprehensive DIA pan-human library, from 1096 data-dependent acquisition (DDA) MS raw files for 16 types of cancer samples. This extensive spectral resource was then applied to a proteomic study of 17 prostate cancer (PCa) patients. Thereafter, PRM validation was applied to a larger study of 57 PCa patients and the differential expression of three proteins in prostate tumor was validated. As a second application, the DPHL spectral resource was applied to a study consisting of plasma samples from 19 diffuse large B cell lymphoma (DLBCL) patients and 18 healthy control subjects. Differentially expressed proteins between DLBCL patients and healthy control subjects were detected by DIA-MS and confirmed by PRM. These data demonstrate that the DPHL supports DIA and PRM MS pipelines for robust protein biomarker discovery. DPHL is freely accessible at https://www.iprox.org/page/project.html?id=IPX0001400000.",2020-04-01 +32883210,"""METAGENOTE: a simplified web platform for metadata annotation of genomic samples and streamlined submission to NCBI's sequence read archive"".","

Background

The improvements in genomics methods coupled with readily accessible high-throughput sequencing have contributed to our understanding of microbial species, metagenomes, infectious diseases and more. To maximize the impact of these genomics studies, it is important that data from biological samples will become publicly available with standardized metadata. The availability of data at public archives provides the hope that greater insights could be obtained through integration with multi-omics data, reproducibility of published studies, or meta-analyses of large diverse datasets. These datasets should include a description of the host, organism, environmental source of the specimen, spatial-temporal information and other relevant metadata, but unfortunately these attributes are often missing and when present, they show inconsistencies in the use of metadata standards and ontologies.

Results

METAGENOTE ( https://metagenote.niaid.nih.gov ) is a web portal that greatly facilitates the annotation of samples from genomic studies and streamlines the submission process of sequencing files and metadata to the Sequence Read Archive (SRA) (Leinonen R, et al, Nucleic Acids Res, 39:D19-21, 2011) for public access. This platform offers a wide selection of packages for different types of biological and experimental studies with a special emphasis on the standardization of metadata reporting. These packages follow the guidelines from the MIxS standards developed by the Genomics Standard Consortium (GSC) and adopted by the three partners of the International Nucleotides Sequencing Database Collaboration (INSDC) (Cochrane G, et al, Nucleic Acids Res, 44:D48-50, 2016) - National Center for Biotechnology Information (NCBI), European Bioinformatics Institute (EBI) and the DNA Data Bank of Japan (DDBJ). METAGENOTE then compiles, validates and manages the submission through an easy-to-use web interface minimizing submission errors and eliminating the need for submitting sequencing files via a separate file transfer mechanism.

Conclusions

METAGENOTE is a public resource that focuses on simplifying the annotation and submission process of data with its corresponding metadata. Users of METAGENOTE will benefit from the easy to use annotation interface but most importantly will be encouraged to publish metadata following standards and ontologies that make the public data available for reuse.",2020-09-03 +32445018,Status Quo and Analysis of the Cardiovascular Clinical Practice Guidelines/Expert Consensuses of Chinese and Integrative Medicine: A Systematic Review.,"

Objective

To describe and analyze the status quo of cardiovascular clinical practice guidelines or expert consensuses including both Chinese medicine (CM) and integrative medicine, through systematic literatures searching and quality assessment.

Methods

Data bases including Chinese Biomedical Literature Database, the China National Knowledge Infrastructure, Wanfang Data, China Science and Technology Journal Database were searched for published CM or integrative cardiovascular clinical practice guidelines or expert consensuses. The website www. medlive.cn was also retrieved as supplementary. The clinical practice evaluation tool AGREE II was used to assess the quality of included guidelines or consensuses.

Results

A total of 31 relevant clinical practice guidelines or expert consensuses were included, covering diagnosis, treatment, Chinese patent and patient fields. Common cardiovascular diseases like coronary heart diseases, heart failure and arrhythmia were also involved. Through analysis it was found that both the quantity and quality of included guidelines have been improved year by year. A total of 4 evidence-based clinical practice guideline has been found, one of which was a guideline project plan. Except that, the remaining 27 reports were all consensus-based guidelines. The scores of each field, from highest to lowest, were clarity of presentation (58%), scope and purpose (54%), stakeholder involvement (28%), rigor of development (21%), applicability (13%) and editorial independence (8%).

Conclusions

Although clinical practice guidelines in cardiovascular domain of Chinese have gained increasing concern, with both quantity and quality improved, there is still huge gap in methodology and reporting standards between CM guidelines and international ones. On the one hand, it is essential to improve and standardize the methodology of developing CM guidelines. On the other hands, the evaluation system of evidence and recommendation with CM characters should be developed urgently.",2020-05-22 +34636883,MS2AI: Automated repurposing of public peptide LC-MS data for machine learning applications. ,"Liquid-chromatography mass-spectrometry (LC-MS) is the established standard for analyzing the proteome in biological samples by identification and quantification of thousands of proteins. Machine learning (ML) promises to considerably improve the analysis of the resulting data, however, there is yet to be any tool that mediates the path from raw data to modern ML applications. More specifically, ML applications are currently hampered by three major limitations: (1) absence of balanced training data with large sample size; (2) unclear definition of sufficiently information-rich data representations for e.g., peptide identification; (3) lack of benchmarking of ML methods on specific LC-MS problems. We created the MS2AI pipeline that automates the process of gathering vast quantities of mass spectrometry (MS) data for large scale ML applications. The software retrieves raw data from either in-house sources or from the proteomics identifications database, PRIDE. Subsequently, the raw data is stored in a standardized format amenable for ML, encompassing MS1/MS2 spectra and peptide identifications. This tool bridges the gap between MS and AI, and to this effect we also present an ML application in the form of a convolutional neural network for the identification of oxidized peptides. An open-source implementation of the software can be found at https://gitlab.com/roettgerlab/ms2ai. Supplementary data are available at Bioinformatics online.",2021-10-12 +32509450,VIRdb: a comprehensive database for interactive analysis of genes/proteins involved in the pathogenesis of vitiligo.,"Vitiligo is a chronic asymptomatic disorder affecting melanocytes from the basal layer of the epidermis which leads to a patchy loss of skin color. Even though it is one of the neglected disease conditions, people suffering from vitiligo are more prone to psychological disorders. As of now, various studies have been done in order to project auto-immune implications as the root cause. To understand the complexity of vitiligo, we propose the Vitiligo Information Resource (VIRdb) that integrates both the drug-target and systems approach to produce a comprehensive repository entirely devoted to vitiligo, along with curated information at both protein level and gene level along with potential therapeutics leads. These 25,041 natural compounds are curated from Natural Product Activity and Species Source Database. VIRdb is an attempt to accelerate the drug discovery process and laboratory trials for vitiligo through the computationally derived potential drugs. It is an exhaustive resource consisting of 129 differentially expressed genes, which are validated through gene ontology and pathway enrichment analysis. We also report 22 genes through enrichment analysis which are involved in the regulation of epithelial cell differentiation. At the protein level, 40 curated protein target molecules along with their natural hits that are derived through virtual screening. We also demonstrate the utility of the VIRdb by exploring the Protein-Protein Interaction Network and Gene-Gene Interaction Network of the target proteins and differentially expressed genes. For maintaining the quality and standard of the data in the VIRdb, the gold standard in bioinformatics toolkits like Cytoscape, Schrödinger's GLIDE, along with the server installation of MATLAB, are used for generating results. VIRdb can be accessed through ""http://www.vitiligoinfores.com/"".",2020-05-21 +32766766,LncR2metasta: a manually curated database for experimentally supported lncRNAs during various cancer metastatic events. ,"Mounting evidence has shown the involvement of long non-coding RNAs (lncRNAs) during various cancer metastatic events (abbreviated as CMEs, e.g. cancer cell invasion, intravasation, extravasation, proliferation, etc.) that may cooperatively facilitate malignant tumor spread and cause massive patient deaths. The study of lncRNA-CME associations might help understand lncRNA functions in metastasis and present reliable biomarkers for early dissemination detection and optimized treatment. Therefore, we developed a database named 'lncR2metasta' by manually compiling experimentally supported lncRNAs during various CMEs from existing studies. LncR2metasta documents 1238 associations between 304 lncRNAs and 39 CMEs across 54 human cancer subtypes. Each entry of lncR2metasta contains detailed information on a lncRNA-CME association, including lncRNA symbol, a specific CME, brief description of the association, lncRNA category, lncRNA Entrez or Ensembl ID, lncRNA genomic location and strand, lncRNA experiment, lncRNA expression pattern, detection method, target gene (or pathway) of lncRNA, lncRNA regulatory role on a CME, cancer name and the literature reference. An easy-to-use web interface was deployed in lncR2metasta for its users to easily browse, search and download as well as to submit novel lncRNA-CME associations. LncR2metasta will be a useful resource in cancer research community. It is freely available at http://lncR2metasta.wchoda.com.",2021-05-01 +35137368,SARS/CoV-2: Behavioral Host Manipulation.,"

Introduction

Though it has not been extensively studied, host manipulation has been documented for various pathogens. Examples of this phenomenon can be seen in cases of toxoplasmosis, rabies, and the influenza virus. An examination of the possible means by which SARS/CoV-2 alters the behavior of its host to spread among populations is elaborated. Indirect evidence that serves as indicators of this phenomenon is presented.

Methods

This is primarily a theoretical document. Many of the ideas raised are not amenable to direct testing due to ethical concerns. However, several indirect means by which to test the hypothesis are discussed. Primary data from cell phones regarding miles traveled, number of times leaving home, etc., are among the possible indirect measures.

Results

The rapid ability of the SARS/CoV-2 virus to spread through society suggests that it may cause behavioral changes of the host to increase its transmission. Numerous cases of super spreader events are noted that have provided meaningful measures of host manipulation.

Conclusion

In the case of SARS/CoV-2, the largest advantage of the pathogen is likely that between 50% and 70% of those infected are asymptomatic (John's Hopkins Coronavirus Resource Center, John's Hopkins University Corona Virus Resource Center. Available at https://coronavirus.jhu.edu/map.html , 2020). This component is a threat to elderly individuals and those immunocompromised who are more likely to have severe complications from the virus and die. To spread within these groups, a seemingly healthy host is necessary to carry the virus to them. The goal of the virus is not to kill the host, but to survive and reproduce.",2021-01-01 +33963869,ProteoSign v2: a faster and evolved user-friendly online tool for statistical analyses of differential proteomics.,"Bottom-up proteomics analyses have been proved over the last years to be a powerful tool in the characterization of the proteome and are crucial for understanding cellular and organism behaviour. Through differential proteomic analysis researchers can shed light on groups of proteins or individual proteins that play key roles in certain, normal or pathological conditions. However, several tools for the analysis of such complex datasets are powerful, but hard-to-use with steep learning curves. In addition, some other tools are easy to use, but are weak in terms of analytical power. Previously, we have introduced ProteoSign, a powerful, yet user-friendly open-source online platform for protein differential expression/abundance analysis designed with the end-proteomics user in mind. Part of Proteosign's power stems from the utilization of the well-established Linear Models For Microarray Data (LIMMA) methodology. Here, we present a substantial upgrade of this computational resource, called ProteoSign v2, where we introduce major improvements, also based on user feedback. The new version offers more plot options, supports additional experimental designs, analyzes updated input datasets and performs a gene enrichment analysis of the differentially expressed proteins. We also introduce the deployment of the Docker technology and significantly increase the speed of a full analysis. ProteoSign v2 is available at http://bioinformatics.med.uoc.gr/ProteoSign.",2021-07-01 +,92 Clinical Frailty Scoring Is Crucial For the COVID-19 Era and Beyond,"Abstract

Introduction

The COVID-19 pandemic placed a new focus on provision of clinical resources. With high mortality and limited capacity; appropriate decisions to escalate to critical care were vital for just resource allocation but also to prevent harm where interventions would not change outcomes. NICE guidance highlighted Clinical frailty scoring (CFS) as central to the decision-making process. 1, Despite initial criticism, recent evidence has confirmed increasing CFS as an independent risk factor to inpatient mortality in COVID-19.2 We conducted a quality improvement project with the aim of improving CFS documentation at the Royal Free Hospital.

Methods

We reviewed the notes of 71 inpatients over the age of 65 years from 6 wards on 08/05/20–12/05/20 for both a CFS score documentation and clear treatment escalation plan at time points of initial clerking, post-take and following ward admission with an audit standard of 100%. We developed teaching sessions, promoted the CFS mobile application, developed a post-take sticker and an elderly medicine ward admission proforma. We re-audited 66 inpatient notes from the same 6 wards from 25/06/20–07/07/20.

Results

Documentation.of CFS improved from 7% to 17% for clerking and post-take and from 13% to 24% on the ward admission. The number of patients with treatment escalation plans was 50%.

Conclusion

CFS is crucial for the COVID-19 era and beyond. We have demonstrated that increased awareness improves use of CFS, though it is not yet being widely used in escalation decisions. 1. Covid-19 Rapid guideline: Critical Care in adults. NICE guideline [NG159]: https://www.nice.org.uk/guidance/ng159 Accessed July 2020 2. Hewitt J et al (2020): The effect of frailty on survival in patients with COVID -19 (COPE): a multicentre, European, observational cohort study; The Lancet: https://doi.org/10.1016/S2468-2667(20)30146-8.",2021-03-01 +,Insights from electronic health record data to improve mental health service delivery during the COVID-19 pandemic,"

Background

Remote consultation technology has been rapidly adopted due to the COVID-19 pandemic. However, some healthcare settings have faced barriers in implementation. We present a study to investigate changes in rates of remote consultation during the pandemic using a large electronic health record (EHR) dataset.

Methods

The Clinical Record Interactive Search tool (CRIS) was used to examine de-identified EHR data of people receiving mental healthcare in South London, UK. Data from around 37,500 patients were analysed for each week from 7th January 2019 and 20th September 2020 using linear regression and locally estimated scatterplot smoothing (LOESS) to investigate changes in the number of clinical contacts (in-person, remote or non-attended) with mental healthcare professionals and prescribing of antipsychotics and mood stabilisers. The data are presented in an interactive dashboard: http://rpatel.co.uk/TelepsychiatryDashboard.

Results

The frequency of in-person contacts was substantially reduced following the onset of the pandemic (β coefficient: -5829.6 contacts, 95% CI -6919.5 to -4739.6, p<0.001), while the frequency of remote contacts increased significantly (β coefficient: 3338.5 contacts, 95% CI 3074.4 to 3602.7, p<0.001). Rates of remote consultation were lower in older adults than in working age adults, children and adolescents. Despite the increase in remote contact, antipsychotic and mood stabiliser prescribing remained at similar levels.

Conclusions

The COVID-19 pandemic has been associated with a marked increase in remote consultation, particularly among younger patients. However, there was no evidence that this has led to changes in prescribing. Further work is needed to support older patients in accessing remote mental healthcare.

Disclosure

All authors have completed the ICMJE uniform disclosure form at www.icmje.org/coi_disclosure.pdf and declare: RS has received funding from Janssen, GSK and Takeda outside the submitted work. RP has received funding from Janssen, Induction Healthcare and H",2021-08-13 +,Characterization of AhLea-3 and its enhancement of salt tolerance in transgenic peanut plants,"Late embryogenesis abundant (LEA) proteins were reported to be related to adversity stress and drought tolerance. Lea-3 from Arachis hypogaea L. (AhLea-3) was previously found to be related to salt tolerance according to the result of transcriptome profiling and digital gene expression analysis. So, AhLea-3 was cloned and the salt tolerance was validated by transgenic peanut plants.AhLea-3 was isolated from M34, a salt-resistant mutant of peanut, with its cDNA as the template. AhLea-3 contains one intron and two extrons, and the full-length cDNA sequence contains 303 bp. AhLea-3 was ligated to pCAMBIA1301 to obtain the overexpression vector pCAMBIA1301-AhLea-3, which was then transferred into peanut variety Huayu23. The expression level of AhLea-3, as determined by qRT-PCR analysis, was >10 times higher in transgenic than in non-transgenic plants. Five days after they were irrigated with 250 mM NaCl, the transgenic plants showed less severe leaf wilting, higher activities of antioxidant enzymes (superoxide dismutase, peroxidase, and catalase), and lower malonic dialdehyde content than non-transgenic plants. Relative to non-transgenic plants, the transgenic plants had a higher photosynthetic net rate, stomatal conductance, and transpiration rate, and a lower intercellular CO₂ concentration after salt stress treatment (250 mM NaCl).These results indicate that overexpression of AhLea-3 increased the salt tolerance of transgenic peanut plants. AhLea-3 might become a useful gene resource for the variety breeding of salinity tolerance in peanut.How to cite: Qiao L, Jiang P, Tang Y, et al. Characterization of AhLea-3 and its enhancement of salt tolerance in transgenic peanut plants. Electron J Biotechnol 2021;49. https://doi.org/10.1016/j.ejbt.2020.10.006",2021-01-01 +33136287,A complete map of the Calcium/calmodulin-dependent protein kinase kinase 2 (CAMKK2) signaling pathway.,"Calcium/calmodulin-dependent protein kinase kinase 2 (CAMKK2) is a serine/threonine-protein kinase belonging to the Ca2+/calmodulin-dependent protein kinase subfamily. CAMKK2 has an autocatalytic site, which gets exposed when Ca2+/calmodulin (CAM) binds to it. This results in autophosphorylation and complete activation of CAMKK2. The three major known downstream targets of CAMKK2 are 5'-adenosine monophosphate (AMP)-activated protein kinase (AMPKα), calcium/calmodulin-dependent protein kinase 1 (CAMK1) and calcium/calmodulin-dependent protein kinase 4 (CAMK4). Activation of these targets by CAMKK2 is important for the maintenance of different cellular and physiological processes within the cell. CAMKK2 is found to be important in neuronal development, bone remodeling, adipogenesis, and systemic glucose homeostasis, osteoclastgensis and postnatal myogensis. CAMKK2 is reported to be involved in pathologies like Duchenne muscular dystrophy, inflammation, osteoporosis and bone remodeling and is also reported to be overexpressed in prostate cancer, hepatic cancer, ovarian and gastric cancer. CAMKK2 is involved in increased cell proliferation and migration through CAMKK2/AMPK pathway in prostate cancer and activation of AKT in ovarian cancer. Although CAMKK2 is a molecule of great importance, a public resource of the CAMKK2 signaling pathway is currently lacking. Therefore, we carried out detailed data mining and documentation of the signaling events associated with CAMKK2 from published literature and developed an integrated reaction map of CAMKK2 signaling. This resulted in the cataloging of 285 reactions belonging to the CAMKK2 signaling pathway, which includes 33 protein-protein interactions, 74 post-translational modifications, 7 protein translocation events, and 22 activation/inhibition events. Besides, 124 gene regulation events and 25 activator/inhibitors involved in CAMKK2 activation were also cataloged. The CAMKK2 signaling pathway map data is made freely accessible through WikiPathway database ( https://www.wikipathways.org/index.php/Pathway:WP4874 ). We expect that data on a signaling map of CAMKK2 will provide the scientific community with an improved platform to facilitate further molecular as well as biomedical investigations on CAMKK2 and its utility in the development of biomarkers and therapeutic targets.",2020-11-02 +33386221,Update of the AMSER National Medical Student Curriculum.,"Since the first steps of creating the Alliance of Medical Student Educators in Radiology (AMSER) curriculum 20 years ago, dramatic advances in medical imaging, patient care, and medical education have occurred necessitating an update of this valuable resource. The 2020 update of the AMSER curriculum aims to address as many of these changes while providing a succinct resource that will hopefully remain useful for years to come. The updated AMSER curriculum document is freely available for download via the AMSER website at https://www.aur.org/en/affinity-groups/amser/curriculum.",2020-12-29 +32324748,OdoBD: An online database for the dragonflies and damselflies of Bangladesh.,"Combining scientific data over a long-time period is necessary for generating large-scale datasets, which are an essential component of comparative analysis for understanding evolutionary processes. Furthermore, monitoring temporal and spatial distributions of animals at a global and regional scale is essential for studying climate change driven extinction risks. Regional and global datasets focusing on different animal groups are on the rise to meet such challenges. Although being one of the earliest and best-known insect groups, the data on Odonata remains rudimentary and dispersed, especially in the South Asian region. Bangladesh, being located within a biodiversity hotspot, possesses a large number of odonate species and many of them are endemic to the South Asian region. We have developed an online database for the Odonata of Bangladesh by compiling and digitizing data from our last four years of field studies, from previously published research articles and field guides, and also by collecting data from citizen scientists. The Odonata of Bangladesh database (accessible at http://www.odobd.org) contains phenotypic, genotypic, photographic, taxonomic, biogeographic and faunistic data of the Odonata of Bangladesh. The database will be a valuable resource for understanding diversity, distributions, extinction risks and conservation planning of the Odonata of Bangladesh. Finally, phenotypic, spatial and temporal data of Odonata of Bangladesh datasets can be integrated with other regional datasets for analyzing macroevolutionary trends and to monitor the effect of climate change on odonates.",2020-04-23 +34733322,Gene4HL: An Integrated Genetic Database for Hearing Loss.,"Hearing loss (HL) is one of the most common disabilities in the world. In industrialized countries, HL occurs in 1-2/1,000 newborns, and approximately 60% of HL is caused by genetic factors. Next generation sequencing (NGS) has been widely used to identify many candidate genes and variants in patients with HL, but the data are scattered in multitudinous studies. It is a challenge for scientists, clinicians, and biologists to easily obtain and analyze HL genes and variant data from these studies. Thus, we developed a one-stop database of HL-related genes and variants, Gene4HL (http://www.genemed.tech/gene4hl/), making it easy to catalog, search, browse and analyze the genetic data. Gene4HL integrates the detailed genetic and clinical data of 326 HL-related genes from 1,608 published studies, along with 62 popular genetic data sources to provide comprehensive knowledge of candidate genes and variants associated with HL. Additionally, Gene4HL supports the users to analyze their own genetic engineering network data, performs comprehensive annotation, and prioritizes candidate genes and variations using custom parameters. Thus, Gene4HL can help users explain the function of HL genes and the clinical significance of variants by correlating the genotypes and phenotypes in humans.",2021-10-18 +,Screening for Asymptomatic Coronary Artery Disease in People With Type2 Diabetes Mellitus in a Tertiary Care Center,"Abstract Cardiovascular disease is the biggest driver of mortality in people with diabetes. Cardiovascular disease and diabetes share the same risk factors, the so-called “common soil” hypothesis. Asians and more specifically Indians are predisposed to cardiovascular disease, that too at an earlier age. The cost of management of cardiovascular disease in India is prohibitive. Thus, screening for asymptomatic coronary artery disease in people with type 2 diabetes and referring them for further evaluation will go a long way in preventing cardiovascular mortality. 560 consenting previously diagnosed people with type 2 diabetes, undergoing treatment for type 2 diabetes at our center, were recruited in the study. We used the risk score model for the assessment of coronary artery disease in asymptomatic patients with type 2 diabetes (1) because it was easy to use, specific for Asian population and validated with coronary computed tomographic angiography in asymptomatic people with type 2 diabetes. Questions regarding smoking, past history of stroke and duration of diabetes were recorded as per the risk score and accordingly the subjects were labelled low, intermediate and high risk. Anthropometric measurements were recorded, lipid profile was measured, neuropathy assessment was done using the DNS score. Results: 48.9%subjects were females,51.1% were males, mean duration of diabetes was 3.5 years, mean HbA1c was 8.5%, mean BMI 26.5kg/m2, mean age was 51.4 years, mean CAD score was 4.1 44.2% of the subjects were in a low risk category, 44.9% were in the intermediate risk category and 10.9% in the high-risk category. The maximum people had intermediate to high risk and were in the age group of 50–60 years (21.3%), followed by 13% in the 60–70 age group. Surprisingly, 12.6% people in the 40-50year age group had an intermediate to high risk score for ASCVD. The high prevalence of intermediate to high risk in relatively younger populations with shorter duration of diabetes (mean duration of diabetes 3.5 years) mandates universal screening for asymptomatic coronary artery disease in all people with type 2 diabetes mellitus. Our study highlights the importance of identifying asymptomatic coronary artery disease using locally relevant risk models and their timely referral to prevent excessive cardiovascular mortality in people with type 2 diabetes mellitus. This would ensure optimum utilization and prioritization of scarce resources in resource crunch situations. Keywords: Screening, asymptomatic CAD, type 2 diabetes mellitus. References: 1. Park G-M, An H, Lee S-W, Cho Y-R, Gil EH, Her SH, et al. Risk Score Model for the Assessment of Coronary Artery Disease in Asymptomatic Patients With Type 2 Diabetes. Medicine [Internet]. 2015 Jan [cited 2020 Oct 14];94(4):e508. Available from: https://journals.lww.com/md-journal/Fulltext/2015/01040/Risk_Score_Model_for_the_Assessment_of_Coronary.44.aspx",2021-05-03 +33170268,Genome Variation Map: a worldwide collection of genome variations across multiple species.,"The Genome Variation Map (GVM; http://bigd.big.ac.cn/gvm/) is a public data repository of genome variations. It aims to collect and integrate genome variations for a wide range of species, accepts submissions of different variation types from all over the world and provides free open access to all publicly available data in support of worldwide research activities. Compared with the previous version, particularly, a total of 22 species, 115 projects, 55 935 samples, 463 429 609 variants, 66 220 associations and 56 submissions (as of 7 September 2020) were newly added in the current version of GVM. In the current release, GVM houses a total of ∼960 million variants from 41 species, including 13 animals, 25 plants and 3 viruses. Moreover, it incorporates 64 819 individual genotypes and 260 393 manually curated high-quality genotype-to-phenotype associations. Since its inception, GVM has archived genomic variation data of 43 754 samples submitted by worldwide users and served >1 million data download requests. Collectively, as a core resource in the National Genomics Data Center, GVM provides valuable genome variations for a diversity of species and thus plays an important role in both functional genomics studies and molecular breeding.",2021-01-01 +34791105,HFIP: an integrated multi-omics data and knowledge platform for the precision medicine of heart failure. ,"As the terminal clinical phenotype of almost all types of cardiovascular diseases, heart failure (HF) is a complex and heterogeneous syndrome leading to considerable morbidity and mortality. Existing HF-related omics studies mainly focus on case/control comparisons, small cohorts of special subtypes, etc., and a large amount of multi-omics data and knowledge have been generated. However, it is difficult for researchers to obtain biological and clinical insights from these scattered data and knowledge. In this paper, we built the Heart Failure Integrated Platform (HFIP) for data exploration, fusion analysis and visualization by collecting and curating existing multi-omics data and knowledge from various public sources and also provided an auto-updating mechanism for future integration. The developed HFIP contained 253 datasets (7842 samples), multiple analysis flow, and 14 independent tools. In addition, based on the integration of existing databases and literature, a knowledge base for HF was constructed with a scoring system for evaluating the relationship between molecular signals and HF. The knowledge base includes 1956 genes and annotation information. The literature mining module was developed to assist the researcher to overview the hotspots and contexts in basic and clinical research. HFIP can be used as a data-driven and knowledge-guided platform for the basic and clinical research of HF. Database URL: http://heartfailure.medical-bigdata.com.",2021-11-01 +34845387,A 3D structural SARS-CoV-2-human interactome to explore genetic and drug perturbations.,"Emergence of new viral agents is driven by evolution of interactions between viral proteins and host targets. For instance, increased infectivity of SARS-CoV-2 compared to SARS-CoV-1 arose in part through rapid evolution along the interface between the spike protein and its human receptor ACE2, leading to increased binding affinity. To facilitate broader exploration of how pathogen-host interactions might impact transmission and virulence in the ongoing COVID-19 pandemic, we performed state-of-the-art interface prediction followed by molecular docking to construct a three-dimensional structural interactome between SARS-CoV-2 and human. We additionally carried out downstream meta-analyses to investigate enrichment of sequence divergence between SARS-CoV-1 and SARS-CoV-2 or human population variants along viral-human protein-interaction interfaces, predict changes in binding affinity by these mutations/variants and further prioritize drug repurposing candidates predicted to competitively bind human targets. We believe this resource ( http://3D-SARS2.yulab.org ) will aid in development and testing of informed hypotheses for SARS-CoV-2 etiology and treatments.",2021-11-29 +32542382,PRMdb: A Repository of Predicted RNA Modifications in Plants.,"Evidence is mounting that RNA modifications play essential roles in posttranscriptional regulation of gene expression. So far, over 150 RNA modifications catalyzed by distinct enzymes have been documented. In plants, genome-wide identification of RNA modifications is largely limited to the model species Arabidopsis thaliana, while lacking in diverse non-model plants. Here, we present PRMdb, a plant RNA modification database, based on the analysis of thousands of RNA-seq, degradome-seq and small RNA-seq data from a wide range of plant species using the well-documented tool HAMR (high-throughput analysis of modified ribonucleotide). PRMdb provides a user-friendly interface that enables easy browsing and searching of the tRNA and mRNA modification data. We show that PRMdb collects high-confidence RNA modifications including novel RNA modification sites that can be validated by genomic PCR and reverse transcription PCR. In summary, PRMdb provides a valuable web resource for deciphering the epitranscriptomes in diverse plant species and will facilitate functional studies of RNA modifications in plants. RPMdb is available via http://www.biosequencing.cn/PRMdb/.",2020-06-01 +32508104,Scop3P: A Comprehensive Resource of Human Phosphosites within Their Full Context.,"Protein phosphorylation is a key post-translational modification in many biological processes and is associated to human diseases such as cancer and metabolic disorders. The accurate identification, annotation, and functional analysis of phosphosites are therefore crucial to understand their various roles. Phosphosites are mainly analyzed through phosphoproteomics, which has led to increasing amounts of publicly available phosphoproteomics data. Several resources have been built around the resulting phosphosite information, but these are usually restricted to the protein sequence and basic site metadata. What is often missing from these resources, however, is context, including protein structure mapping, experimental provenance information, and biophysical predictions. We therefore developed Scop3P: a comprehensive database of human phosphosites within their full context. Scop3P integrates sequences (UniProtKB/Swiss-Prot), structures (PDB), and uniformly reprocessed phosphoproteomics data (PRIDE) to annotate all known human phosphosites. Furthermore, these sites are put into biophysical context by annotating each phosphoprotein with per-residue structural propensity, solvent accessibility, disordered probability, and early folding information. Scop3P, available at https://iomics.ugent.be/scop3p, presents a unique resource for visualization and analysis of phosphosites and for understanding of phosphosite structure-function relationships.",2020-06-18 +33408242,St. Jude Cloud: A Pediatric Cancer Genomic Data-Sharing Ecosystem.,"Effective data sharing is key to accelerating research to improve diagnostic precision, treatment efficacy, and long-term survival in pediatric cancer and other childhood catastrophic diseases. We present St. Jude Cloud (https://www.stjude.cloud), a cloud-based data-sharing ecosystem for accessing, analyzing, and visualizing genomic data from >10,000 pediatric patients with cancer and long-term survivors, and >800 pediatric sickle cell patients. Harmonized genomic data totaling 1.25 petabytes are freely available, including 12,104 whole genomes, 7,697 whole exomes, and 2,202 transcriptomes. The resource is expanding rapidly, with regular data uploads from St. Jude's prospective clinical genomics programs. Three interconnected apps within the ecosystem-Genomics Platform, Pediatric Cancer Knowledgebase, and Visualization Community-enable simultaneously performing advanced data analysis in the cloud and enhancing the Pediatric Cancer knowledgebase. We demonstrate the value of the ecosystem through use cases that classify 135 pediatric cancer subtypes by gene expression profiling and map mutational signatures across 35 pediatric cancer subtypes. SIGNIFICANCE: To advance research and treatment of pediatric cancer, we developed St. Jude Cloud, a data-sharing ecosystem for accessing >1.2 petabytes of raw genomic data from >10,000 pediatric patients and survivors, innovative analysis workflows, integrative multiomics visualizations, and a knowledgebase of published data contributed by the global pediatric cancer community.This article is highlighted in the In This Issue feature, p. 995.",2021-01-06 +,CT-152: Application of Web-Scraping Techniques for Autonomous Massive Retrieval of Hematologic Patients' Information During SARS-CoV2 Pandemic,"

Context

Data collection involving a large number of patients is usually known as a tedious and time-consuming task by healthcare professionals. Current patient load makes collecting clinical data almost impossible even though we need that information more than ever.

Objective

We wanted to deploy a system that automatically and autonomously retrieves clinical data from our patients suffering from SARS-CoV2 that arrive at hospital admission to collect that information for further analysis.

Design

We designed a daemon in PHP programming language connected to a MySQL MariaDB database that continuously searches for new patients consulting at hospital. We collected medical history, disease records, regular medication, physical exploration, vital signs, blood chemistry and count, and finally, microbiology testing of SARS-CoV2 (both PCR and ELISA antibody testing). As we don't have access to any API service (out-of-the-box connection to the data mainframe), we took advantage of web-scraping (brute-force data extraction from webpages using HTTP protocol) applied to our hospital web interface.

Setting

Monitoring was made between 1st March, 2020 and 15th April, 2020 (during worst Coronavirus outbreak phase of the country), using only one computer connected to the hospital network. The number of patients identified was 259, each one with 344 clinical and testing variables.

Results

Using this technique, we collected data of 259 hematologic patients without human intervention and more than 300 variables have been analyzed. Nowadays, manual revision of certain aspects of the database (e.g., comorbidities) is needed and some data needs to be manually entered due to the lack of proper codification. In the future, with the development of semantic-matching technologies, fully autonomous building of the databases will be possible. In the meantime, our technique can solve the capture of enormous amount of clinical information without effort. With that information, observational studies, even a prognosis score using machine learning, have been developed in our center.

Conclusions

Data collection for further analysis is usually a vital, but time-consuming, task in order to answer clinical questions. We developed a technique that helped our center retrieve patients' clinical information autonomously during the SARS-Cov-2 pandemic.",2020-09-01 +,Multi-sectoral impact assessment during the 1st wave of COVID-19 pandemic in West Bengal (India) for sustainable planning and management,"With the advancement of globalisation, urbanisation and environmental change, the outbreak of the Coronavirus disease 2019 (COVID-19), as an infectious disease, has become a global threat. The entire world is continuously trying to adapt to the pandemic situation due to the sudden outbreak of COVID-19 and the lockdown phase, which has not been faced before. The fear of infection by such an unknown virus and the epidemic transformed the built-up environment and impacted various sectors of lives and livelihoods, which must be assessed in spatial perspectives. The objective of this research is to assess the multi-sectoral impact due to the COVID-19 pandemic. Thus, it is designed to inspect seven essential sectors, namely, the economy, employment, education, transport, travel and tourism, health and environment sector-wise impact assessment of the West Bengal state of India. Taking the required COVID-19 data from the government website of India (http://www.covid19india.org; https://www.mygov.in/corona-data/covid19-statewise-status) and West Bengal (https://covidindia.org/west-bengal), a methodology is proposed on an integrated framework for the multi-sectoral impact assessment. The study concentrates on West Bengal, as no study exists on the multi-sectoral impact assessment due to the COVID-19 pandemic during the 1st wave, especially using the geospatial platform. The economy, employment, education, transport, health, tourism and environment multi-sectors of West Bengal are selected in this research, as these sectors have built the economic, sociocultural and environmental pillars of the state. All these sectors have been seriously affected, and the nature of the impact is diverse and large. Before the vaccine comes into the hands of the common people of West Bengal and in a broad sense in India, the awareness should be increased at the grass-root level to fight against the pandemic situation and even after the post-COVID era. The application of geospatial technology used for the mapping and analysis of COVID-19 affects the related database to tease out the multidimensional study, which aims to plan future road maps, search for answers and learn to add further security to overcome the future virus attack.

Supplementary Information

The online version contains supplementary material available at 10.1007/s12517-021-08836-z.",2021-01-01 +33382035,"DIPPER, a spatiotemporal proteomics atlas of human intervertebral discs for exploring ageing and degeneration dynamics. ","The spatiotemporal proteome of the intervertebral disc (IVD) underpins its integrity and function. We present DIPPER, a deep and comprehensive IVD proteomic resource comprising 94 genome-wide profiles from 17 individuals. To begin with, protein modules defining key directional trends spanning the lateral and anteroposterior axes were derived from high-resolution spatial proteomes of intact young cadaveric lumbar IVDs. They revealed novel region-specific profiles of regulatory activities and displayed potential paths of deconstruction in the level- and location-matched aged cadaveric discs. Machine learning methods predicted a 'hydration matrisome' that connects extracellular matrix with MRI intensity. Importantly, the static proteome used as point-references can be integrated with dynamic proteome (SILAC/degradome) and transcriptome data from multiple clinical samples, enhancing robustness and clinical relevance. The data, findings, and methodology, available on a web interface (http://www.sbms.hku.hk/dclab/DIPPER/), will be valuable references in the field of IVD biology and proteomic analytics.",2020-12-31 +32934277,StoneMod: a database for kidney stone modulatory proteins with experimental evidence.,"Better understanding of molecular mechanisms for kidney stone formation is required to improve management of kidney stone disease with better therapeutic outcome. Recent kidney stone research has indicated critical roles of a group of proteins, namely 'stone modulators', in promotion or inhibition of the stone formation. Nevertheless, such information is currently dispersed and difficult to obtain. Herein, we present the kidney stone modulator database (StoneMod), which is a curated resource by obtaining necessary information of such stone modulatory proteins, which can act as stone promoters or inhibitors, with experimental evidence from previously published studies. Currently, the StoneMod database contains 10, 16, 13, 8 modulatory proteins that affect calcium oxalate crystallization, crystal growth, crystal aggregation, and crystal adhesion on renal tubular cells, respectively. Informative details of each modulatory protein and PubMed links to the published articles are provided. Additionally, hyperlinks to other protein/gene databases (e.g., UniProtKB, Swiss-Prot, Human Protein Atlas, PeptideAtlas, and Ensembl) are made available for the users to obtain additional in-depth information of each protein. Moreover, this database provides a user-friendly web interface, in which the users can freely access to the information and/or submit their data to deposit or update. Database URL: https://www.stonemod.org .",2020-09-15 +34615485,"The bayberry database: a multiomic database for Myrica rubra, an important fruit tree with medicinal value.","

Background

Chinese bayberry (Myrica rubra Sieb. & Zucc.) is an important fruit tree in China, and has high medicinal value. At present, the genome, transcriptome and germplasm resources of bayberry have been reported. In order to make more convenient use of these data, the Bayberry Database was established.

Results

The Bayberry Database is a comprehensive and intuitive data platform for examining the diverse annotated genome and germplasm resources of this species. This database contains nine central functional domains to interact with multiomic data: home, genome, germplasm, markers, tools, map, expression, reference, and contact. All domains provide pathways to a variety of data types composed of a reference genome sequence, transcriptomic data, gene patterns, phenotypic data, fruit images of Myrica rubra varieties, gSSR data, gene maps with annotation and evolutionary analyses. The tools module includes BLAST search, keyword search, sequence fetch and enrichment analysis functions.

Conclusions

The web address of the database is as follows http://www.bayberrybase.cn/ . The Myrica rubra database is an intelligent, interactive, and user-friendly system that enables researchers, breeders and horticultural personnel to browse, search and retrieve relevant and useful information and thus facilitate genomic research and breeding efforts concerning Myrica rubra. This database will be of great help to bayberry research and breeding in the future.",2021-10-06 +34838140,Chemical toxicity prediction based on semi-supervised learning and graph convolutional neural network.,"As safety is one of the most important properties of drugs, chemical toxicology prediction has received increasing attentions in the drug discovery research. Traditionally, researchers rely on in vitro and in vivo experiments to test the toxicity of chemical compounds. However, not only are these experiments time consuming and costly, but experiments that involve animal testing are increasingly subject to ethical concerns. While traditional machine learning (ML) methods have been used in the field with some success, the limited availability of annotated toxicity data is the major hurdle for further improving model performance. Inspired by the success of semi-supervised learning (SSL) algorithms, we propose a Graph Convolution Neural Network (GCN) to predict chemical toxicity and trained the network by the Mean Teacher (MT) SSL algorithm. Using the Tox21 data, our optimal SSL-GCN models for predicting the twelve toxicological endpoints achieve an average ROC-AUC score of 0.757 in the test set, which is a 6% improvement over GCN models trained by supervised learning and conventional ML methods. Our SSL-GCN models also exhibit superior performance when compared to models constructed using the built-in DeepChem ML methods. This study demonstrates that SSL can increase the prediction power of models by learning from unannotated data. The optimal unannotated to annotated data ratio ranges between 1:1 and 4:1. This study demonstrates the success of SSL in chemical toxicity prediction; the same technique is expected to be beneficial to other chemical property prediction tasks by utilizing existing large chemical databases. Our optimal model SSL-GCN is hosted on an online server accessible through: https://app.cbbio.online/ssl-gcn/home .",2021-11-27 +33950237,Development and dissemination of a consumer health information website on infant and toddler sleep.,"Sleep problems are prevalent in early childhood, with the majority of caregivers desiring to change something about their child's sleep. Quality-assured education and resources are needed to be related to infant and toddler sleep. This article describes the development and dissemination of a global consumer health information website (http://www.babysleep.com) by the Pediatric Sleep Council to provide publicly accessible evidence-based information and resources for caregivers and practitioners. The website includes sleep health-related information and resources. Three phases, including the launch, social media strategy, and search engine optimization, for promotion and dissemination of the site was implemented. Analysis of dissemination indicates exponential growth of the site since its launch. With access across the globe, the site has developed from its inception into a widely-used resource, with over 800,000 users from around the world (99% of countries).",2021-09-01 +34859531,Variant interpretation using population databases: Lessons from gnomAD.,"Reference population databases are an essential tool in variant and gene interpretation. Their use guides the identification of pathogenic variants amidst the sea of benign variation present in every human genome, and supports the discovery of new disease-gene relationships. The Genome Aggregation Database (gnomAD) is currently the largest and most widely used publicly available collection of population variation from harmonized sequencing data. The data is available through the online gnomAD browser (https://gnomad.broadinstitute.org/) that enables rapid and intuitive variant analysis. This review provides guidance on the content of the gnomAD browser, and its usage for variant and gene interpretation. We introduce key features including allele frequency, per-base expression levels, constraint scores, and variant co-occurrence, alongside guidance on how to use these in analysis, with a focus on the interpretation of candidate variants and novel genes in rare disease.",2021-12-16 +32179762,"Hepamine - A Liver Disease Microarray Database, Visualization Platform and Data-Mining Resource.","Numerous gene expression profiling data on liver diseases were generated and stored in public databases. Only few were used for additional analyses by the hepatology research community. This may mostly be due to limited bioinformatics knowledge of most biomedical research personnel. In order to support an easy translation of bioinformatics data into translational hepatology research, we created Hepamine, a liver disease gene expression, visualization platform and data-mining resource. Microarray data were obtained from the NCBI GEO database. Pre-analysis of expression data was performed using R statistical software and the limma microarray analysis package from the Bioconductor repository. We generated Hepamine, a web-based repository of pre-analyzed microarray data for various liver diseases. At its initial release Hepamine contains 13 gene expression datasets, 20 microarray experiments and approximately 400 000 gene expression measurements. A self-explanatory website offers open and easy access to gene expression profiles. Results are furthermore visualized in simple three-color tables indicating differential expression. All data were linked to common functional and genetic databases particularly through the DAVID bioinformatics suite. Hepamine provides comprehensive data and easy access to hepatologic gene expression data even without in depth bioinformatics or microarray profiling experience. http://www.hepamine.de.",2020-03-16 +,"First Report of Richardia scabra as a Symptomatic Host of ‘Candidatus Phytoplasma trifolii’ (16SrVI-A Subgroup) from Bengaluru, India","The rubiaceous plant Florida pusley (Richardia scabra L.) is a naturalized annual weed found in most parts of India. During 2012 to 2018, R. scabra plants exhibiting phytoplasma disease-like symptoms such as stunting, malformed inflorescence, virescence, phyllody, and undersized leaves were found on the National Bureau of Agricultural Insect Resources Research Farm in Bengaluru as well as in neighboring fields, with a mean incidence ranging from 5 to 8%. Phytoplasma was the suspected cause of disease, because usually eggplant (Solanum melongena L.) and sesame (Sesamum indicum L.) crops—both of which suffer from phytoplasma-induced diseases—are raised for experimental purposes in that area. Negative sap-inoculation results from 10 symptomatic plants in a screenhouse experiment ruled out the involvement of a mechanically transmitted pathogen (e.g., a virus). The disease agent could be transmitted to four out of 10 healthy plants through grafting, reproducing symptoms that were milder than those in the field. No known leafhopper or planthopper vector was constantly associated with the plant, except for sporadic probing visits by adults of Hishimonus phycitis (Distant), a known phytoplasma vector. Adult H. phycitis preferred eggplant or sesame rather than R. scabra in choice tests, but when confined, the leafhopper could transmit the pathogen to 43% of plants. Genomic DNA was isolated from aboveground parts of three field-collected symptomatic plants. Targeting the phytoplasma 16S rDNA gene, DNA was amplified with universal primers P1/P7 and R16F2n/R16R2 in primary (Deng and Hiruki 1991; Schneider et al. 1995) and nested PCR (Gundersen and Lee 1996; Lee et al. 1993), respectively. DNA from three asymptomatic (apparently healthy) plants served as the negative control. The ∼1.2-kb amplified product of 16S rRNA gene of phytoplasma DNA was detected in all the three symptomatic plants but not in asymptomatic control plants. PCR products were gel-eluted using a gel extraction kit and sequenced with an ABI 3500 xL Genetic Analyzer using the BigDye Terminator version 3.1 Cycle Sequencing Kit (Applied Biosystems). The nucleotide sequence deposited in NCBI GenBank (accession no. MN663123) is closest (100% identity) to ‘Candidatus Phytoplasma trifolii’ isolate Tirupati (accession no. KP899062) in BLAST. The virtual restriction fragment length polymorphism pattern derived through iPhyClassifier (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi; Zhao et al. 2009) indicated that the sequence is closest to the reference pattern of the 16SrVI-D subgroup (GenBank accession no. X83431). To the best of the author’s knowledge, this is the first report of a phytoplasma-associated disease in R. scabra. Because H. phycitis was noticed to visit R. scabra, and because eggplant and sesame are also hosts of the same leafhopper, farmers need to keep their fields free from this weed.",2021-04-01 +33156333,The InterPro protein families and domains database: 20 years on.,"The InterPro database (https://www.ebi.ac.uk/interpro/) provides an integrative classification of protein sequences into families, and identifies functionally important domains and conserved sites. InterProScan is the underlying software that allows protein and nucleic acid sequences to be searched against InterPro's signatures. Signatures are predictive models which describe protein families, domains or sites, and are provided by multiple databases. InterPro combines signatures representing equivalent families, domains or sites, and provides additional information such as descriptions, literature references and Gene Ontology (GO) terms, to produce a comprehensive resource for protein classification. Founded in 1999, InterPro has become one of the most widely used resources for protein family annotation. Here, we report the status of InterPro (version 81.0) in its 20th year of operation, and its associated software, including updates to database content, the release of a new website and REST API, and performance improvements in InterProScan.",2021-01-01 +33465550,In-silico identification of subunit vaccine candidates against lung cancer-associated oncogenic viruses.,"Globally, ~20% of cancer malignancies are associated with virus infections. Lung cancer is the most prevalent cancer and has a 10% 5-year survival rate when diagnosed at stage IV. Cancer vaccines and oncolytic immunotherapy are promising treatment strategies for better clinical outcomes in advanced-stage cancer patients. Here, we used a reverse vaccinology approach to devise subunit vaccine candidates against lung cancer-causing oncogenic viruses. Protein components (945) from nine oncogenic virus species were systematically analyzed to identify epitope-based subunit vaccine candidates. Best vaccine candidates were identified based on their predicted ability to stimulate humoral and cell-mediated immunity and avoid self-tolerance. Using a rigorous integrative approach, we identified 125 best antigenic epitopes with predicted B-cell, T-cell, and/or MHC-binding capability and vaccine adjuvant potential. Thirty-two of these antigenic epitopes were predicted to have IL-4/IFN-gamma inducing potential and IL-10 non-inducing potential and were predicted to bind 15 MHC-type I and 49 MHC-type II alleles. All 32 epitopes were non-allergenic and 31 were non-toxic. The identified epitopes showed good conservancy and likely bind a broad class of human HLA alleles, indicating promiscuous potential. The majority of best antigenic epitopes were derived from Human papillomavirus and Epstein-Barr virus proteins. Of the 32 epitopes, 25 promiscuous epitopes were related to E1 and E6 envelope genes and were present in multiple viral strains/species, potentially providing heterologous immunity. Further validating our results, 38 antigenic epitopes were also present in the largest experimentally-validated epitope resource, Immune Epitope Database and Analysis Resource. We further narrowed the selection to 29 antigenic epitopes with the highest immunogenic/immune-boosting potential. These epitopes possess tremendous therapeutic potential as vaccines against lung cancer-causing viruses and should be validated in future experiments. All findings are available at https://webs.iiitd.edu.in/raghava/vlcvirus/.",2021-01-12 +33045747,Open Targets Genetics: systematic identification of trait-associated genes using large-scale genetics and functional genomics.,"Open Targets Genetics (https://genetics.opentargets.org) is an open-access integrative resource that aggregates human GWAS and functional genomics data including gene expression, protein abundance, chromatin interaction and conformation data from a wide range of cell types and tissues to make robust connections between GWAS-associated loci, variants and likely causal genes. This enables systematic identification and prioritisation of likely causal variants and genes across all published trait-associated loci. In this paper, we describe the public resources we aggregate, the technology and analyses we use, and the functionality that the portal offers. Open Targets Genetics can be searched by variant, gene or study/phenotype. It offers tools that enable users to prioritise causal variants and genes at disease-associated loci and access systematic cross-disease and disease-molecular trait colocalization analysis across 92 cell types and tissues including the eQTL Catalogue. Data visualizations such as Manhattan-like plots, regional plots, credible sets overlap between studies and PheWAS plots enable users to explore GWAS signals in depth. The integrated data is made available through the web portal, for bulk download and via a GraphQL API, and the software is open source. Applications of this integrated data include identification of novel targets for drug discovery and drug repurposing.",2021-01-01 +34663829,Phosphoproteome profiling uncovers a key role for CDKs in TNF signaling.,"Tumor necrosis factor (TNF) is one of the few cytokines successfully targeted by therapies against inflammatory diseases. However, blocking this well studied and pleiotropic ligand can cause dramatic side-effects. Here, we reason that a systems-level proteomic analysis of TNF signaling could dissect its diverse functions and offer a base for developing more targeted therapies. Therefore, we combine phosphoproteomics time course experiments with subcellular localization and kinase inhibitor analysis to identify functional modules of protein phosphorylation. The majority of regulated phosphorylation events can be assigned to an upstream kinase by inhibiting master kinases. Spatial proteomics reveals phosphorylation-dependent translocations of hundreds of proteins upon TNF stimulation. Phosphoproteome analysis of TNF-induced apoptosis and necroptosis uncovers a key role for transcriptional cyclin-dependent kinase activity to promote cytokine production and prevent excessive cell death downstream of the TNF signaling receptor. This resource of TNF-induced pathways and sites can be explored at http://tnfviewer.biochem.mpg.de/ .",2021-10-18 +,Strengthening supply chain resilience during COVID‐19: A case study of JD.com,"Abstract The coronavirus/SARS‐CoV‐2 (COVID‐19) outbreak has caused severe supply chain disruptions in practically all industries worldwide. Online e‐commerce platforms, which interact directly with various industries and service numerous consumers, have become remarkable interfaces to observe the impacts of the pandemic on supply chains. Using quantitative operational data obtained from JD.com https://www.jd.com., this study analyzes the impact of the pandemic on supply chain resilience, summarizes the challenging scenarios that retailing supply chains experienced in China, and presents the practical response of JD.com throughout the pandemic. To summarize, the pandemic caused exceptional demand and severe logistical disruptions in China, and JD.com has handled well its supply chain management in response based on its integrated supply chain structure and comprehensive intelligent platforms. In particular, the existing intelligent platforms and the delivery procedures were modified slightly but promptly to deal with specific disruptions. Moreover, the entire market scenario in China was effectively controlled through the joint efforts of multiple firms, the government, and the entire Chinese society. Our study provides an example of using practical operational indicators to analyze supply chain resilience, and suggests firms pay attention to operational flexibility and collaboration beyond supply chains to deal with a large‐scale supply chain disruption, such as the COVID‐19 outbreak.",2021-10-18 +34736471,BDdb: a comprehensive platform for exploration and utilization of birth defect multi-omics data.,"

Background

Birth defects pose a major challenge to infant health. Thus far, however, the causes of most birth defects remain cryptic. Over the past few decades, considerable effort has been expended on disclosing the underlying mechanisms related to birth defects, yielding myriad treatises and data. To meet the increasing requirements for data resources, we developed a freely accessible birth defect multi-omics database (BDdb, http://t21omics.cngb.org ) consisting of multi-omics data and potential disease biomarkers.

Results

In total, omics datasets from 136 Gene Expression Omnibus (GEO) Series records, including 5245 samples, as well as 869 biomarkers of 22 birth defects in six different species, were integrated into the BDdb. The database provides a user-friendly interface for searching, browsing, and downloading data of interest. The BDdb also enables users to explore the correlations among different sequencing methods, such as chromatin immunoprecipitation sequencing (ChIP-Seq) and RNA sequencing (RNA-Seq) from different studies, to obtain the information on gene expression patterns from diverse aspects.

Conclusion

To the best of our knowledge, the BDdb is the first comprehensive database associated with birth defects, which should benefit the diagnosis and prevention of birth defects.",2021-11-04 +33677064,Primary Coenzyme Q deficiencies: A literature review and online platform of clinical features to uncover genotype-phenotype correlations.,"Primary Coenzyme Q (CoQ) deficiencies are clinically heterogeneous conditions and lack clear genotype-phenotype correlations, complicating diagnosis and prognostic assessment. Here we present a compilation of all the symptoms and patients with primary CoQ deficiency described in the literature so far and analyse the most common clinical manifestations associated with pathogenic variants identified in the different COQ genes. In addition, we identified new associations between the age of onset of symptoms and different pathogenic variants, which could help to a better diagnosis and guided treatment. To make these results useable for clinicians, we created an online platform (https://coenzymeQbiology.github.io/clinic-CoQ-deficiency) about clinical manifestations of primary CoQ deficiency that will be periodically updated to incorporate new information published in the literature. Since CoQ primary deficiency is a rare disease, the available data are still limited, but as new patients are added over time, this tool could become a key resource for a more efficient diagnosis of this pathology.",2021-03-04 +34895148,Regulatory modules of human thermogenic adipocytes: functional genomics of large cohort and Meta-analysis derived marker-genes. ,"Recently, ProFAT and BATLAS studies identified brown and white adipocytes marker genes based on analysis of large databases. They offered scores to determine the thermogenic status of adipocytes using the gene-expression data of these markers. In this work, we investigated the functional context of these genes. Gene Set Enrichment Analyses (KEGG, Reactome) of the BATLAS and ProFAT marker-genes identified pathways deterministic in the formation of brown and white adipocytes. The collection of the annotated proteins of the defined pathways resulted in expanded white and brown characteristic protein-sets, which theoretically contain all functional proteins that could be involved in the formation of adipocytes. Based on our previously obtained RNA-seq data, we visualized the expression profile of these proteins coding genes and found patterns consistent with the two adipocyte phenotypes. The trajectory of the regulatory processes could be outlined by the transcriptional profile of progenitor and differentiated adipocytes, highlighting the importance of suppression processes in browning. Protein interaction network-based functional genomics by STRING, Cytoscape and R-Igraph platforms revealed that different biological processes shape the brown and white adipocytes and highlighted key regulatory elements and modules including GAPDH-CS, DECR1, SOD2, IL6, HRAS, MTOR, INS-AKT, ERBB2 and 4-NFKB, and SLIT-ROBO-MAPK. To assess the potential role of a particular protein in shaping adipocytes, we assigned interaction network location-based scores (betweenness centrality, number of bridges) to them and created a freely accessible platform, the AdipoNET ( https//adiponet.com ), to conveniently use these data. The Eukaryote Promoter Database predicted the response elements in the UCP1 promoter for the identified, potentially important transcription factors (HIF1A, MYC, REL, PPARG, TP53, AR, RUNX, and FoxO1). Our integrative approach-based results allowed us to investigate potential regulatory elements of thermogenesis in adipose tissue. The analyses revealed that some unique biological processes form the brown and white adipocyte phenotypes, which presumes the existence of the transitional states. The data also suggests that the two phenotypes are not mutually exclusive, and differentiation of thermogenic adipocyte requires induction of browning as well as repressions of whitening. The recognition of these simultaneous actions and the identified regulatory modules can open new direction in obesity research.",2021-12-11 +33554860,"shinyDepMap, a tool to identify targetable cancer genes and their functional connections from Cancer Dependency Map data. ","Individual cancers rely on distinct essential genes for their survival. The Cancer Dependency Map (DepMap) is an ongoing project to uncover these gene dependencies in hundreds of cancer cell lines. To make this drug discovery resource more accessible to the scientific community, we built an easy-to-use browser, shinyDepMap (https://labsyspharm.shinyapps.io/depmap). shinyDepMap combines CRISPR and shRNA data to determine, for each gene, the growth reduction caused by knockout/knockdown and the selectivity of this effect across cell lines. The tool also clusters genes with similar dependencies, revealing functional relationships. shinyDepMap can be used to (1) predict the efficacy and selectivity of drugs targeting particular genes; (2) identify maximally sensitive cell lines for testing a drug; (3) target hop, that is, navigate from an undruggable protein with the desired selectivity profile, such as an activated oncogene, to more druggable targets with a similar profile; and (4) identify novel pathways driving cancer cell growth and survival.",2021-02-08 +33045776,An online coronavirus analysis platform from the National Genomics Data Center.,"Since the first reported severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) infection in December 2019, coronavirus disease 2019 (COVID-19) has become a global pandemic, spreading to more than 200 countries and regions worldwide. With continued research progress and virus detection, SARS-CoV-2 genomes and sequencing data have been reported and accumulated at an unprecedented rate. To meet the need for fast analysis of these genome sequences, the National Genomics Data Center (NGDC) of the China National Center for Bioinformation (CNCB) has established an online coronavirus analysis platform, which includes de novoassembly, BLAST alignment, genome annotation, variant identification, and variant annotation modules. The online analysis platform can be freely accessed at the 2019 Novel Coronavirus Resource (2019nCoVR) (https://bigd.big.ac.cn/ncov/online/tools).",2020-11-01 +34556150,AprGPD: the apricot genomic and phenotypic database.,"

Background

Apricot is cultivated worldwide because of its high nutritive content and strong adaptability. Its flesh is delicious and has a unique and pleasant aroma. Apricot kernel is also consumed as nuts. The genome of apricot has been sequenced, and the transcriptome, resequencing, and phenotype data have been increasely generated. However, with the emergence of new information, the data are expected to integrate, and disseminate.

Results

To better manage the continuous addition of new data and increase convenience, we constructed the apricot genomic and phenotypic database (AprGPD, http://apricotgpd.com ). At present, AprGPD contains three reference genomes, 1692 germplasms, 306 genome resequencing data, 90 RNA sequencing data. A set of user-friendly query, analysis, and visualization tools have been implemented in AprGPD. We have also performed a detailed analysis of 59 transcription factor families for the three genomes of apricot.

Conclusion

Six modules are displayed in AprGPD, including species, germplasm, genome, variation, product, tools. The data integrated by AprGPD will be helpful for the molecular breeding of apricot.",2021-09-23 +,Emerging Technologies to Bring Glycoproteomics Within Reach,"The NIH Common Fund Glycoscience program develops accessible and affordable new tools for carbohydrate analysis, informatics, and synthesis. The program emphasizes straightforward technologies responsive to needs expressed by end user groups from multiple fields. Validation and usability testing in partnering labs ensures that tools are accessible to non-specialists. This session will describe tools emerging from the Glycoscience program and explain how resource facilities can access those tools and bring them in-house. Our goal is for resources to broaden their impact in glycomics, glycoproteomics, and glycoscience generally, through adoption of straightforward, reliable synthetic, analytical, and informatics tools that we are developing. Three Scientific Sessions will describe tools emerging from the Glycoscience program, in three categories: (1) glycan analysis, including mass spectrometric techniques for structure determination and glycoproteomics; (2) glycan synthesis; and (3) glycoinformatics. Analytical technologies to be described include: high throughput permethylation of glycopeptides via one-pot for site mapping and glycan analysis, Isotope-targeted glycoproteomics (IsoTaG), a mass-independent chemical glycoproteomics technique for profiling glycopeptides, identifying both N- and O-glycan structures and sites of attachment in complex samples (www.IsoStamp.org); facile methods to ultra purify glycans; software tools for building 3D models of glycoproteins and predicting the 3D structure of glycans (https://dev.glycam.org/); a wide range of highly versatile glycan affinity reagents, including sialoglycan-recognizing probes; and new photo-crosslinking probes for discovery of the interaction partners of O-GlcNAc modified proteins. Chemical and enzymatic schemes for the facile automated synthesis of carbohydrates (N- linked, O-linked, human milk, GAGs, glycolipids) are available to core facilities to bring in-house using existing instrumentation or new low-cost modular automation platforms. Glycoinformatics tools and methods are being developed in a community-based effort, integrating them with well-established genomic and proteomic databases and tools at NCBI and EBI. The project involves 10 teams in 5 countries. Glycoinformatics tools and data are already available (http://www.glygen.org/).",2019-12-01 +34026713,An Implementation Evaluation of A Group-Based Parenting Intervention to Promote Early Childhood Development in Rural Kenya.,"Early childhood development (ECD) parenting interventions can improve child developmental outcomes in low-resource settings, but information about their implementation lags far behind evidence of their effectiveness, hindering their generalizability. This study presents results from an implementation evaluation of Msingi Bora (""Good Foundation"" in Swahili), a group-based responsive stimulation and nutrition education intervention recently tested in a cluster randomized controlled trial across 60 villages in rural western Kenya. Msingi Bora successfully improved child cognitive, receptive language, and socioemotional outcomes, as well as parenting practices. We conducted a mixed methods implementation evaluation of the Msingi Bora trial between April 2018 and November 2019 following the Consolidated Advice for Reporting ECD implementation research (CARE) guidelines. We collected qualitative and quantitative data on program inputs, outputs, and outcomes, with a view to examining how aspects of the program's implementation, such as program acceptance and delivery fidelity, related to observed program impacts on parents and children. We found that study areas had initially very low levels of familiarity or knowledge of ECD among parents, community delivery agents, and even supervisory staff from our partner non-governmental organization (NGO). We increased training and supervision in response, and provided a structured manual to enable local delivery agents to successfully lead the sessions. There was a high level of parental compliance, with median attendance of 13 out of 16 fortnightly sessions over 8 months. For delivery agents, all measures of delivery performance and fidelity increased with program experience. Older, more knowledable delivery agents were associated with larger impacts on parental stimulation and child outcomes, and delivery agents with higher fidelity scores were also related to improved parenting practices. We conclude that a group-based parenting intervention delivered by local delivery agents can improve multiple child and parent outcomes. An upfront investment in training local trainers and delivery agents, and regular supervision of delivery of a manualized program, appear key to our documented success. Our results represent a promising avenue for scaling similar interventions in low-resource rural settings to serve families in need of ECD programming. This trial is registered at ClinicalTrials.gov, NCT03548558, June 7, 2018. https://clinicaltrials.gov/ct2/show/NCT03548558.",2021-05-05 +33098359,Proteomic characteristics of bronchoalveolar lavage fluid in critical COVID-19 patients.,"Up to 10-20% of patients with coronavirus disease 2019 (COVID-19) develop a severe pulmonary disease due to immune dysfunction and cytokine dysregulation. However, the extracellular proteomic characteristics in respiratory tract of these critical COVID-19 patients still remain to be investigated. In the present study, we performed a quantitative proteomic analysis of the bronchoalveolar lavage fluid (BALF) from patients with critical COVID-19 and from non-COVID-19 controls. Our study identified 358 differentially expressed BALF proteins (P < 0.05), among which 41 were significantly changed after using the Benjamini-Hochberg correction (q < 0.05). The up-regulated signaling was found to be mainly involved in inflammatory signaling and response to oxidative stress. A series of increased extracellular factors including Tenascin-C (TNC), Mucin-1 (KL-6 or MUC1), Lipocalin-2 (LCN2), periostin (POSTN), Chitinase 3-like 1 (CHI3L1 or YKL40), and S100A12, and the antigens including lymphocyte antigen 6D/E48 antigen (LY6D), CD9 antigen, CD177 antigen, and prostate stem cell antigen (PSCA) were identified, among which the proinflammatory factors TNC and KL-6 were further validated in serum of another thirty-nine COVID-19 patients and healthy controls, showing high potentials of being biomarkers or therapeutic candidates for COVID-19. This BALF proteome associated with COVID-19 would also be a valuable resource for researches on anti-inflammatory medication and understanding the molecular mechanisms of host response. DATABASE: Proteomic raw data are available in ProteomeXchange (http://proteomecentral.proteomexchange.org) under the accession number PXD022085, and in iProX (www.iprox.org) under the accession number IPX0002429000.",2020-11-16 +31949184,PulmonDB: a curated lung disease gene expression database.,"Chronic Obstructive Pulmonary Disease (COPD) and Idiopathic Pulmonary Fibrosis (IPF) have contrasting clinical and pathological characteristics and interesting whole-genome transcriptomic profiles. However, data from public repositories are difficult to reprocess and reanalyze. Here, we present PulmonDB, a web-based database (http://pulmondb.liigh.unam.mx/) and R library that facilitates exploration of gene expression profiles for these diseases by integrating transcriptomic data and curated annotation from different sources. We demonstrated the value of this resource by presenting the expression of already well-known genes of COPD and IPF across multiple experiments and the results of two differential expression analyses in which we successfully identified differences and similarities. With this first version of PulmonDB, we create a new hypothesis and compare the two diseases from a transcriptomics perspective.",2020-01-16 +32765587,ABC-GWAS: Functional Annotation of Estrogen Receptor-Positive Breast Cancer Genetic Variants.,"Over the past decade, hundreds of genome-wide association studies (GWAS) have implicated genetic variants in various diseases, including cancer. However, only a few of these variants have been functionally characterized to date, mainly because the majority of the variants reside in non-coding regions of the human genome with unknown function. A comprehensive functional annotation of the candidate variants is thus necessary to fill the gap between the correlative findings of GWAS and the development of therapeutic strategies. By integrating large-scale multi-omics datasets such as the Cancer Genome Atlas (TCGA) and the Encyclopedia of DNA Elements (ENCODE), we performed multivariate linear regression analysis of expression quantitative trait loci, sequence permutation test of transcription factor binding perturbation, and modeling of three-dimensional chromatin interactions to analyze the potential molecular functions of 2,813 single nucleotide variants in 93 genomic loci associated with estrogen receptor-positive breast cancer. To facilitate rapid progress in functional genomics of breast cancer, we have created ""Analysis of Breast Cancer GWAS"" (ABC-GWAS), an interactive database of functional annotation of estrogen receptor-positive breast cancer GWAS variants. Our resource includes expression quantitative trait loci, long-range chromatin interaction predictions, and transcription factor binding motif analyses to prioritize putative target genes, causal variants, and transcription factors. An embedded genome browser also facilitates convenient visualization of the GWAS loci in genomic and epigenomic context. ABC-GWAS provides an interactive visual summary of comprehensive functional characterization of estrogen receptor-positive breast cancer variants. The web resource will be useful to both computational and experimental biologists who wish to generate and test their hypotheses regarding the genetic susceptibility, etiology, and carcinogenesis of breast cancer. ABC-GWAS can also be used as a user-friendly educational resource for teaching functional genomics. ABC-GWAS is available at http://education.knoweng.org/abc-gwas/.",2020-07-20 +32386298,The Mnemiopsis Genome Project Portal: integrating new gene expression resources and improving data visualization. ,"Following the completion of the genome sequencing and gene prediction of Mnemiopsis leidyi, a lobate ctenophore that is native to the coastal waters of the western Atlantic Ocean, we developed and implemented the Mnemiopsis Genome Project Portal (MGP Portal), a comprehensive Web-based data portal for navigating the genome sequence and gene annotations. In the years following the first release of the MGP Portal, it has become evident that the inclusion of data from significant published studies on Mnemiopsis has been critical to its adoption as the centralized resource for this emerging model organism. With this most recent update, the Portal has significantly expanded to include in situ images, temporal developmental expression profiles and single-cell expression data. Recent enhancements also include implementations of an updated BLAST interface, new graphical visualization tools and updates to gene pages that integrate all new data types. Database URL: https://research.nhgri.nih.gov/mnemiopsis/.",2020-01-01 +34557572,Life cycle assessment data of French organic agricultural products.,"Environmental data on organic products are needed to assess their environmental performance. The purpose of the ACV Bio project reported here was to generate environmental data as life cycle assessment (LCA) data for a sample of French organic production systems including cropping systems (annual crops, intercrops, forages), grassland, wine grapes, cow milk, calves, beef cattle, sheep, pigs, broilers and eggs. LCA was used to estimate environmental impacts of products from these systems. Recommended uses are to characterize part of the diversity of French organic farming systems and some of their environmental impacts, identify areas for improvement, perform eco-design and sensitivity analysis, and/or make system choices in a given context. However, these data do not represent average French organic products and should not be used as such. The MEANS-InOut web application was used to generate life cycle inventories (LCI). Impact assessment was performed using SimaPro v9 software. The Environmental Footprint 2.0 characterisation method was used to generate LCA data. These data were supplemented with three LCA indicators: cumulative energy demand, land competition (CML-IA non-baseline) and biodiversity loss. Three non-LCA indicators were also calculated for certain systems: diversity of crop families (for cropping systems), agro-ecological infrastructure (for sheep) and pesticide treatment frequency index (for grapes). In total, 173 products were modelled. LCA and non-LCA data are available in the Microsoft® Excel file at Data INRAE (https://doi.org/10.15454/TTR25S). LCI data are available in the AGRIBALYSE database and can be accessed using SimaPro and openLCA software. Farmer-practice data are available on demand.",2021-09-09 +,Impact of COVID‐19 pandemic in an early‐onset dementia clinic in Barcelona,"Abstract

Background

The ongoing COVID‐19 pandemic and related care policies have affected dementia patients. The characteristics of early‐onset dementia (EOD, <65 years) patients in 2020 may provide insights on how to rearrange the provision of care.

Method

We retrospectively reviewed, from 2016 to 2020, the demographic and clinical data of the new referrals at our EOD clinic (Hospital Clínic Barcelona). We used Fisher’s Exact test and Mann–Whitney U test in R4.0.2 (http://www.R‐project.org/) to analyze differences between 2020 and the period 2016‐2019.

Result

In 2020, we did not visit any new referral from 15th march to 31th may. We evaluated 104 patients in 2020 and 392 patients in 2016‐2019 (mean=98(SD=11.8) patients/year). No differences were found in age at onset (AAO), sex, diagnostic delay and MMSE score (Table1). Significant differences were found in the diagnoses obtained in each period (p<0.000005, Figure1A). In 2020, 19.2% of the patients were diagnosed with neurodegenerative diseases (ND), 48.1% with non‐neurodegenerative diseases (NND) and 32.7% with subjective cognitive decline (SCD). On contrast, in 2016‐2019, 26% of the patients were diagnosed with ND, 22.2% with NND and 51.8% with SCD. Compared to 2016‐2019, ND, but not SCD or NND, presented longer diagnostic delay in 2020 (p<0.0005, Figure1B). ND, NND and SCD did not show differences between periods in AAO, sex or MMSE. We did not find differences in the type of ND in each period (Figure1A). Compared to 2016‐2019, Frontotemporal Lobar Degeneration (FTLD) presented longer diagnostic delay in 2020 (p<0.005, Figure1B) while ND subgroups did not show differences in AAO, sex or MMSE. Cognitive disturbances in recovered COVID‐19 patients accounted for 16% of NND in 2020 [N=8, AAO 50.63(12), 63% female, MMSE 26.8(2.3)].

Conclusion

In 2020, albeit we were forced to stop our normal activity during 2.5 months, we visited a similar number of patients among which we observed an increase in NND, including cognitive disturbances in patients with recovered COVID‐19. On contrast, we found a reduction in SCD and, to a lesser extent, ND. ND showed a longer diagnostic delay in 2020 that mainly affected FTLD. Whether COVID‐19 pandemic entails a diagnostic delay in dementia patients must be confirmed in 2021.",2021-12-01 +33125078,Pfam: The protein families database in 2021.,"The Pfam database is a widely used resource for classifying protein sequences into families and domains. Since Pfam was last described in this journal, over 350 new families have been added in Pfam 33.1 and numerous improvements have been made to existing entries. To facilitate research on COVID-19, we have revised the Pfam entries that cover the SARS-CoV-2 proteome, and built new entries for regions that were not covered by Pfam. We have reintroduced Pfam-B which provides an automatically generated supplement to Pfam and contains 136 730 novel clusters of sequences that are not yet matched by a Pfam family. The new Pfam-B is based on a clustering by the MMseqs2 software. We have compared all of the regions in the RepeatsDB to those in Pfam and have started to use the results to build and refine Pfam repeat families. Pfam is freely available for browsing and download at http://pfam.xfam.org/.",2021-01-01 +31982380,TissueCoCoPUTs: Novel Human Tissue-Specific Codon and Codon-Pair Usage Tables Based on Differential Tissue Gene Expression.,"Protein expression in multicellular organisms varies widely across tissues. Codon usage in the transcriptome of each tissue is derived from genomic codon usage and the relative expression level of each gene. We created a comprehensive computational resource that houses tissue-specific codon, codon-pair, and dinucleotide usage data for 51 Homo sapiens tissues (TissueCoCoPUTs: https://hive.biochemistry.gwu.edu/review/tissue_codon), using transcriptome data from the Broad Institute Genotype-Tissue Expression (GTEx) portal. Distances between tissue-specific codon and codon-pair frequencies were used to generate a dendrogram based on the unique patterns of codon and codon-pair usage in each tissue that are clearly distinct from the genomic distribution. This novel resource may be useful in unraveling the relationship between codon usage and tRNA abundance, which could be critical in determining translation kinetics and efficiency across tissues. Areas of investigation such as biotherapeutic development, tissue-specific genetic engineering, and genetic disease prediction will greatly benefit from this resource.",2020-01-23 +35016569,"Prevalence, Pattern, Risks Factors and Consequences of Antibiotic Resistance in COPD: A Systematic Review.","A concern of antibiotic use in chronic obstructive pulmonary disease (COPD) is the emergence and propagation of antimicrobial resistance (AMR). A systematic review was conducted to determine prevalence, pattern, risk factors and consequences of AMR in COPD. Bibliographic databases were searched from inception to November 2020, with no language restrictions, including studies of any design that included patients with COPD and reported prevalence and pattern of AMR. 2748 unique titles and abstracts were identified, of which 63 articles, comprising 26,387 patients, met inclusion criteria. Forty-four (69.8%) studies were performed during acute exacerbation. The median prevalence of AMR ranged from 0-100% for Pseudomonas aeruginosa, Moraxella catarrhalis, Klebsiella pneumoniae and Acinetobacter baumannii. Median resistance rates of H influenzae and S pneumoniae were lower by comparison, with maximum rates ≤40% and ≤46%, respectively, and higher for Staphylococcus aureus. There was a trend towards higher rates of AMR in patients with poorer lung function and greater incidence of previous antibiotic exposure and hospitalisation. The impact of AMR on mortality was unclear. Data regarding antimicrobial susceptibility testing techniques and the impact of other risk factors or consequences of AMR were variable or not reported. This is the first review to systematically unify data regarding AMR in COPD. AMR is relatively common and strategies to optimise antibiotic use could be valuable to prevent the currently under-investigated potential adverse consequences of AMR.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.2000957 .",2021-12-01 +31950986,3D-Cell-Annotator: an open-source active surface tool for single-cell segmentation in 3D microscopy images.,"SUMMARY:Segmentation of single cells in microscopy images is one of the major challenges in computational biology. It is the first step of most bioimage analysis tasks, and essential to create training sets for more advanced deep learning approaches. Here, we propose 3D-Cell-Annotator to solve this task using 3D active surfaces together with shape descriptors as prior information in a semi-automated fashion. The software uses the convenient 3D interface of the widely used Medical Imaging Interaction Toolkit (MITK). Results on 3D biological structures (e.g. spheroids, organoids and embryos) show that the precision of the segmentation reaches the level of a human expert. AVAILABILITY AND IMPLEMENTATION:3D-Cell-Annotator is implemented in CUDA/C++ as a patch for the segmentation module of MITK. The 3D-Cell-Annotator enabled MITK distribution can be downloaded at: www.3D-cell-annotator.org. It works under Windows 64-bit systems and recent Linux distributions even on a consumer level laptop with a CUDA-enabled video card using recent NVIDIA drivers. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +33110585,Rapid response to the COVID-19 pandemic: Vietnam government's experience and preliminary success.,"

Background

The COVID-19 pandemic has hit all corners of the world, challenging governments to act promptly in controlling the spread of the pandemic. Due to limited resources and inferior technological capacities, developing countries including Vietnam have faced many challenges in combating the pandemic. Since the first cases were detected on 23 January 2020, Vietnam has undergone a 3-month fierce battle to control the outbreak with stringent measures from the government to mitigate the adverse impacts. In this study, we aim to give insights into the Vietnamese government's progress during the first three months of the outbreak. Additionally, we relatively compare Vietnam's response with that of other Southeast Asia countries to deliver a clear and comprehensive view on disease control strategies.

Methods

The data on the number of COVID-19 confirmed and recovered cases in Vietnam was obtained from the Dashboard for COVID-19 statistics of the Ministry of Health (https://ncov.vncdc.gov.vn/). The review on Vietnam's country-level responses was conducted by searching for relevant government documents issued on the online database 'Vietnam Laws Repository' (https://thuvienphapluat.vn/en/index.aspx), with the grey literature on Google and relevant official websites. A stringency index of government policies and the countries' respective numbers of confirmed cases of nine Southeast Asian countries were adapted from the Oxford COVID-19 Government Response Tracker (https://www.bsg.ox.ac.uk/research/research-projects/coronavirus-government-response-tracker). All data was updated as of 24 April 2020.

Results

Preliminary positive results have been achieved given that the nation confirmed no new community-transmitted cases since 16 April and zero COVID-19 - related deaths throughout the 3-month pandemic period. To date, the pandemic has been successfully controlled thanks to the Vietnamese government's prompt, proactive and decisive responses including mobilization of the health care systems, security forces, economic policies, along with a creative and effective communication campaign corresponding with crucial milestones of the epidemic's progression.

Conclusions

Vietnam could be one of the role models in pandemic control for low-resource settings. As the pandemic is still ongoing in an unpredictable trajectory, disease control measures should continue to be put in place in the foreseeable short term.",2020-12-01 +,Effectiveness of internet-based cognitive behavioural therapy for binge eating disorder,"

Introduction

Binge eating disorder (BED) is the most prevalent specific eating disorder. It is characterized by recurrent episodes of binge eating and is associated with feelings of shame and a lack of control. Internet-based treatments are gaining increasing attention as a way to reach more patients with evidence based treatments In 2020 we conducted a preliminary analysis on the effectiveness of an internet-based cognitive behavioural therapy treatment project (Jensen ES, Linnet, J, Holmberg TT, Tarp K, Nielsen JH, Lichtenstein MB. Effectiveness of internet-based guided self-help for binge-eating disorder and characteristics of completers versus noncompleters. Int J Eat Disord. 2020;1-6. https://doi.org/10.1002/eat.23384).

Objectives

This study aims to update the analyses on treatment effect with the patients who have completed treatment in the year following the last data extraction.

Methods

The iBED treatment project is a 10-session psychologist guided internet-based self-help program based on cognitive behavioural therapy. When applying for treatment and upon completion patients respond to a survey containing, among other scales, the eating disorder examination-questionnaire (EDE-Q), binge eating disorder-questionnaire (BED-Q) and various sociodemographic questions. Data will be extracted from the treatment project in anonymized form for analyses.

Results

The preliminary analyses were conducted on 36 completers. These showed large standardized effect sizes on both the EDE-Q subscales (Cohens d ranging from .88-1.65) and on the BED-Q (d = 1.38). The updated effectiveness analyses will be presented at the conference. We expect approximately 70-80 patients to have completed treatment at this time.

Conclusions

Results will be discussed and presented at the conference.",2021-08-13 +33892308,SAPdb: A database of short peptides and the corresponding nanostructures formed by self-assembly.,"Nanostructures generated by self-assembly of peptides yield nanomaterials that have many therapeutic applications, including drug delivery and biomedical engineering, due to their low cytotoxicity and higher uptake by targeted cells owing to their high affinity and specificity towards cell surface receptors. Despite the promising implications of this rapidly expanding field, there is no dedicated resource to study peptide nanostructures. This study endeavours to create a repository of short peptides, which may prove to be the best models to study ordered nanostructures formed by peptide self-assembly. SAPdb has a repertoire of 1049 entries of experimentally validated nanostructures formed by the self-assembly of small peptides. It consists of 328 tripeptides, 701 dipeptides, and 20 single amino acids with some conjugate partners. Each entry encompasses comprehensive information about the peptide, such as chemical modifications, the type of nanostructure formed, experimental conditions like pH, temperature, solvent required for the self-assembly, etc. Our analysis indicates that peptides containing aromatic amino acids favour the formation of self-assembling nanostructures. Additionally, we observed that these peptides form different nanostructures under different experimental conditions. SAPdb provides this comprehensive information in a hassle-free tabulated manner at a glance. User-friendly browsing, searching, and analysis modules have been integrated for easy data retrieval, data comparison, and examination of properties. We anticipate SAPdb to be a valuable repository for researchers engaged in the burgeoning arena of nanobiotechnology. It is freely available at https://webs.iiitd.edu.in/raghava/sapdb.",2021-04-10 +,Screening of cellulose degradation bacteria from Min pigs and optimization of its cellulase production,"Cellulose as a potential feed resource hinders its utilization because of its complex structure, and cellulase is the key to its biological effective utilization. Animal endogenous probiotics are more susceptible to colonization in the intestinal tract, and their digestive enzymes are more conducive to the digestion and absorption of feed in young animals. Min pigs are potential sources of cellulase probiotics because of the high proportion of dietary fiber in their feed. In this study, the cellulolytic bacteria in the feces of Min pigs were isolated and screened. The characteristics of enzymes and cellulase production were studied, which provided a theoretical basis for the rational utilization of cellulase and high-fiber food in animal production.In our study, 10 strains of cellulase producing strains were isolated from Min pig manure, among which the M2 strain had the best enzyme producing ability and was identified as Bacillus velezensis. The optimum production conditions of cellulase from strain M2 were: 2% inoculum, the temperature of 35°C, the pH of 5.0, and the liquid loading volume of 50 mL. The optimum temperature, pH and time for the reaction of cellulase produced by strain M2 were 55°C, 4.5 and 5 min, respectively.Min pigs can be used as a source of cellulase producing strains. The M2 strain isolated from feces was identified as Bacillus velezensis. The cellulase from M2 strain had a good activity and the potential to be used as feed additive for piglets.How to cite: Li F, Xie Y, Gao X, et al. Screening of cellulose degradation bacteria from Min Pigs and optimization of its cellulase production. Electron J Biotechnol 2020;48. https://doi.org/10.1016/j.ejbt.2020.09.001",2020-11-01 +34531868,Pan-Cancer Analysis of PARP1 Alterations as Biomarkers in the Prediction of Immunotherapeutic Effects and the Association of Its Expression Levels and Immunotherapy Signatures.,"

Background

Poly (ADP-ribose) polymerases-1 (PARP1) alterations are associated with PARP1 inhibitor resistance, regulating the function of Treg cells and PDL1 expression in tumor cells, and high PARP1 expression is significantly associated with aggressive behavior and chemotherapeutic resistance in several tumors. However, a comprehensive analysis of the predictive values of PARP1 alteration for immune checkpoint inhibitor (ICI) effectiveness in tumors remains unclear, and the associations between its expression and immunotherapy signatures also needs to be explored further.

Methods

We performed some analyses with the cBioPortal online database (https://www.cbioportal.org), TIMER2.0 (Tumor Immune Estimation Resource 2.0, http://timer.comp-genomics.org/) and TCGA database (https://xenabrowser.net or https://portal.gdc.cancer.gov/). Survival analysis was conducted using Kaplan-Meier method, and the associations between PARP1 transcription levels and immune checkpoint gene expression, the number of neoantigens, tumor mutation burden (TMB) levels, and microsatellite instability (MSI) event are analyzed by spearman correlation analysis and visualization of those mentioned above is performed using R, version 3.6.3 (http://www.r-project.org/).

Results

We found that PARP1 was altered in 1338 (2.9%) out of 45604 patients with diverse tumors, which was associated with markedly higher TMB levels in a variety of tumors (P < 0.01). Impressively, patients with PARP1 alterations in advanced tumors showed better overall survival (OS) in the ICI-treated cohort (P = 0.016). PARP1 altered group was substantially correlated with higher immune infiltrates across most tumors, including CD8+ T cells in colorectal adenocarcinoma (P = 0.0061), endometrial carcinoma (P = 0.0033), stomach cancer (P = 0.033), and cervical cancer (P = 0.026), respectively. The PARP1 altered group showed high expression in transcription (P < 0.001), and higher expression of LAG3, PDCD1, CTLA-4, and TIGIT (P < 0.05). Higher PARP1 expression was present in 27 tumor compared the corresponding normal tissues using the GTEx and TCGA databases and it had a worse OS in several tumors (P < 0.05). Further, high PARP1 expression was significantly associated with six immune cells (B cells, CD4+ T cells, CD8+ T cells, macrophages, neutrophils, and dendritic cells) in most tumors, including colon adenocarcinoma (COAD), head and neck squamous cell carcinoma (HNSC), kidney renal clear cell carcinoma (KIRC), and liver hepatocellular carcinoma (LIHC) (P < 0.05). In particular, CD8+T cell infiltration, was also positively correlated with high PARP1 expression in bladder urothelial carcinoma (BLCA), breast invasive carcinoma (BRCA), kidney renal papillary cell carcinoma (KIRP), brain lower grade glioma (LGG), LIHC, pancreatic adenocarcinoma (PAAD), pheochromocytoma and paraganglioma (PCPG), prostate adenocarcinoma (PRAD), rectum adenocarcinoma (READ), testicular germ cell tumors (TGCT), thymoma (THYM), uterine corpus endometrial carcinoma (UCEC), uveal melanoma (UVM) (P < 0.05, no data shown), and PARP1 expression was significantly positively correlated with the transcription levels of some of the 47 immune checkpoint genes, such as CD274, CTLA4, and PDCD1 in several tumors, including PAAD, LIHC, KIRC, HNSC, and BLCA (P < 0.05). A significant positive association between PARP1 expression and the number of immune neoantigen was found within COAD, KIRC, lung adenocarcinoma (LUAD), PAAD and THYM (P < 0.05), and there were also significantly positive correlations between PARP1 expression and TMB in many tumors like adrenocortical carcinoma (ACC), COAD, kidney chromophobe (KICH), LGG, LUAD, READ, skin cutaneous melanoma (SKCM) and stomach adenocarcinoma (STAD) (P < 0.05). In addition, high PARP1 expression was positively associated with microsatellite instability event in COAD, KIRP, BRCA, glioblastoma multiforme (GBM), lung squamous cell carcinoma (LUSC), LGG, READ, UCEC, SKCM and LUAD (P < 0.05).

Conclusions

Our results highlight the significance of PARP1 alterations as pan-cancer predictive biomarkers for ICI treatment, and its expression levels seem to be correlated with the status of immunotherapy-associated signatures, thus may be a promising biomarker for predicting ICI response in several tumors.",2021-08-31 +,First Report of Grapevine Red Globe Virus in Grapevines in Washington State,"Grapevine red globe virus (GRGV; genus Maculavirus, family Tymoviridae) has been reported in grapevines (Vitis spp.) from Italy, Greece, France, China, Spain, and Germany and in California, U.S.A. (Cretazzo et al. 2017; Fan et al. 2016; Ruiz-Garcia et al. 2018; Sabanadzovic et al. 2000). During surveys of grapevine nurseries, a total of 241 composite samples, each consisting of four petioles from mature leaves/vine from five asymptomatic grapevines, from 33 grapevine (Vitis vinifera) cultivars were collected. Total RNA isolated from these samples using a Spectrum Total RNA isolation kit (Sigma-Aldrich, St. Louis, MO) was subjected to high-throughput sequencing (HTS) on an Illumina HiSeq2500 or NovaSeq 6000 platform in paired-end mode (Genomics Core Facility, Huntsman Cancer Institute, Utah University, Salt Lake City, UT). After trimming raw reads based on quality and ambiguity, the paired-end quality reads of approximately 120 (HiSeq) or 145 (NovaSeq) base pair (bp) length were assembled de novo into a pool of contigs (CLC Genomics Workbench 12). These contigs were subjected to BLASTn analysis against the nonredundant virus database from GenBank (https://www.ncbi.nlm.nih.gov/blast). A total of 49 contig sequences, ranging from 200 to 1,645 bp in length with an average coverage ranging up to 418.7, aligning with GRGV genome, were detected in cultivars Aglianico, Cabernet franc, Pinot gris, and Riesling. BLASTn analysis of contigs greater than 500 bp in length showed sequence identity between 88.5 and 95% with corresponding GRGV sequences reported from other countries. These results indicated the presence of genetically distinct isolates of GRGV. HTS data also revealed coinfection of GRGV in all samples with one or more of the following viruses and/or viroids: grapevine rupestris stem pitting associated virus, grapevine rupestris vein feathering virus, hop stunt viroid, or grapevine yellow speckle viroid-1. To further confirm infection by GRGV, total RNA was extracted from two asymptomatic Pinot gris vines that previously tested positive in HTS using the Spectrum Total RNA isolation kit and subjected to reverse transcription PCR using primers specific to the replicase polyprotein gene of the virus (RG4847F, 5′-TGGTCTGTTGTTCGCATCTT-3′; RG6076R, 5′ CGGAAGGGGAAGCATTGATCT-3′ Cretazzo et al. 2017). Sequence analysis of the approximately 1,250-bp amplicons (accession no. MT749359) showed 91.2% nt sequence identity with a corresponding sequence of GRGV isolate from Brazil (KX828704.1). To our knowledge, this is the first report of GRGV in Washington State. Together with the report of the occurrence of GRGV in California (Sabanadzovic et al. 2000), these results indicate wide geographical distribution of the virus. Although GRGV can cause asymptomatic infections in grapevines (Martelli et al. 2002), the economic importance of GRGV as single or coinfections with other viruses needs to be examined to assess the potential significance of the virus to grape production and grapevine certification programs.",2021-03-01 +34964846,The first comprehensive database of germline pathogenic variants in East Asian cancer patients. ,"Pathogenic germline variants in cancer-associated genes are risk factors for cancer predisposition. However, systematic mining and summarizing of cancer pathogenic or likely pathogenic variants has not been performed for people of East Asian descent. This study aimed to investigate publicly available data to identify germline variants in East Asian cancer cohorts and compare them to variants in Caucasian cancer cohorts. Based on the data we retrieved, we built a comprehensive database, named COGVIC (Catalog of Germline Variants in Cancer). A total of 233 variants in the East Asian population were identified. The majority (87%) of genes with cancer-associated variants were not shared between the East Asian and Caucasian cohorts. This included pathogenic variants in BRCA2. Our study summarized the prevalence of germline variants in East Asian cancer cohorts and provides an easy-to-use online tool to explore germline mutations related to cancer susceptibility. http://www.cogvic.vip/.",2021-12-01 +32472030,"dbPSP 2.0, an updated database of protein phosphorylation sites in prokaryotes.","In prokaryotes, protein phosphorylation plays a critical role in regulating a broad spectrum of biological processes and occurs mainly on various amino acids, including serine (S), threonine (T), tyrosine (Y), arginine (R), aspartic acid (D), histidine (H) and cysteine (C) residues of protein substrates. Through literature curation and public database integration, here we reported an updated database of phosphorylation sites (p-sites) in prokaryotes (dbPSP 2.0) that contains 19,296 experimentally identified p-sites in 8,586 proteins from 200 prokaryotic organisms, which belong to 12 phyla of two kingdoms, bacteria and archaea. To carefully annotate these phosphoproteins and p-sites, we integrated the knowledge from 88 publicly available resources that covers 9 aspects, namely, taxonomy annotation, genome annotation, function annotation, transcriptional regulation, sequence and structure information, family and domain annotation, interaction, orthologous information and biological pathway. In contrast to version 1.0 (~30 MB), dbPSP 2.0 contains ~9 GB of data, with a 300-fold increased volume. We anticipate that dbPSP 2.0 can serve as a useful data resource for further investigating phosphorylation events in prokaryotes. dbPSP 2.0 is free for all users to access at: http://dbpsp.biocuckoo.cn.",2020-05-29 +33181824,CircR2Cancer: a manually curated database of associations between circRNAs and cancers. ,"Accumulating evidences have shown that the deregulation of circRNA has close association with many human cancers. However, these experimental verified circRNA-cancer associations are not collected in any database. Here, we develop a manually curated database (circR2Cancer) that provides experimentally supported associations between circRNAs and cancers. The current version of the circR2Cancer contains 1439 associations between 1135 circRNAs and 82 cancers by extracting data from existing literatures and databases. In addition, circR2Cancer contains the information of cancer exacted from Disease Ontology and basic biological information of circRNAs from circBase. At the same time, circR2Cancer provides a simple and friendly interface for users to conveniently browse, search and download the data. It will be a useful and valuable resource for researchers to understanding the regulation mechanism of circRNA in cancers. http://www.biobdlab.cn:8000.",2020-01-01 +32449765,HECNet: a hierarchical approach to enzyme function classification using a Siamese Triplet Network.,"

Motivation

Understanding an enzyme's function is one of the most crucial problem domains in computational biology. Enzymes are a key component in all organisms and many industrial processes as they help in fighting diseases and speed up essential chemical reactions. They have wide applications and therefore, the discovery of new enzymatic proteins can accelerate biological research and commercial productivity. Biological experiments, to determine an enzyme's function, are time-consuming and resource expensive.

Results

In this study, we propose a novel computational approach to predict an enzyme's function up to the fourth level of the Enzyme Commission (EC) Number. Many studies have attempted to predict an enzyme's function. Yet, no approach has properly tackled the fourth and final level of the EC number. The fourth level holds great significance as it gives us the most specific information of how an enzyme performs its function. Our method uses innovative deep learning approaches along with an efficient hierarchical classification scheme to predict an enzyme's precise function. On a dataset of 11 353 enzymes and 402 classes, we achieved a hierarchical accuracy and Macro-F1 score of 91.2% and 81.9%, respectively, on the 4th level. Moreover, our method can be used to predict the function of enzyme isoforms with considerable success. This methodology is broadly applicable for genome-wide prediction that can subsequently lead to automated annotation of enzyme databases and the identification of better/cheaper enzymes for commercial activities.

Availability and implementation

The web-server can be freely accessed at http://hecnet.cbrlab.org/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-11-01 +34160247,"Quantitative Microbial Risk Assessment for Contaminated Private Wells in the Fractured Dolomite Aquifer of Kewaunee County, Wisconsin.","

Background

Private wells are an important source of drinking water in Kewaunee County, Wisconsin. Due to the region's fractured dolomite aquifer, these wells are vulnerable to contamination by human and zoonotic gastrointestinal pathogens originating from land-applied cattle manure and private septic systems.

Objective

We determined the magnitude of the health burden associated with contamination of private wells in Kewaunee County by feces-borne gastrointestinal pathogens.

Methods

This study used data from a year-long countywide pathogen occurrence study as inputs into a quantitative microbial risk assessment (QMRA) to predict the total cases of acute gastrointestinal illness (AGI) caused by private well contamination in the county. Microbial source tracking was used to associate predicted cases of illness with bovine, human, or unknown fecal sources.

Results

Results suggest that private well contamination could be responsible for as many as 301 AGI cases per year in Kewaunee County, and that 230 and 12 cases per year were associated with a bovine and human fecal source, respectively. Furthermore, Cryptosporidium parvum was predicted to cause 190 cases per year, the most out of all 8 pathogens included in the QMRA.

Discussion

This study has important implications for land use and water resource management in Kewaunee County and informs the public health impacts of consuming drinking water produced in other similarly vulnerable hydrogeological settings. https://doi.org/10.1289/EHP7815.",2021-06-23 +32527260,PINOT: an intuitive resource for integrating protein-protein interactions.,"

Background

The past decade has seen the rise of omics data for the understanding of biological systems in health and disease. This wealth of information includes protein-protein interaction (PPI) data derived from both low- and high-throughput assays, which are curated into multiple databases that capture the extent of available information from the peer-reviewed literature. Although these curation efforts are extremely useful, reliably downloading and integrating PPI data from the variety of available repositories is challenging and time consuming.

Methods

We here present a novel user-friendly web-resource called PINOT (Protein Interaction Network Online Tool; available at http://www.reading.ac.uk/bioinf/PINOT/PINOT_form.html) to optimise the collection and processing of PPI data from IMEx consortium associated repositories (members and observers) and WormBase, for constructing, respectively, human and Caenorhabditis elegans PPI networks.

Results

Users submit a query containing a list of proteins of interest for which PINOT extracts data describing PPIs. At every query submission PPI data are downloaded, merged and quality assessed. Then each PPI is confidence scored based on the number of distinct methods used for interaction detection and the number of publications that report the specific interaction. Examples of how PINOT can be applied are provided to highlight the performance, ease of use and potential utility of this tool.

Conclusions

PINOT is a tool that allows users to survey the curated literature, extracting PPI data in relation to a list of proteins of interest. PINOT extracts a similar numbers of PPIs as other, analogous, tools and incorporates a set of innovative features. PINOT is able to process large queries, it downloads human PPIs live through PSICQUIC and it applies quality control filters on the downloaded PPI data (i.e. removing the need for manual inspection by the user). PINOT provides the user with information on detection methods and publication history for each downloaded interaction data entry and outputs the results in a table format that can be straightforwardly further customised and/or directly uploaded into network visualization software. Video abstract.",2020-06-11 +32139688,Identification of region-specific astrocyte subtypes at single cell resolution.,"Astrocytes, a major cell type found throughout the central nervous system, have general roles in the modulation of synapse formation and synaptic transmission, blood-brain barrier formation, and regulation of blood flow, as well as metabolic support of other brain resident cells. Crucially, emerging evidence shows specific adaptations and astrocyte-encoded functions in regions, such as the spinal cord and cerebellum. To investigate the true extent of astrocyte molecular diversity across forebrain regions, we used single-cell RNA sequencing. Our analysis identifies five transcriptomically distinct astrocyte subtypes in adult mouse cortex and hippocampus. Validation of our data in situ reveals distinct spatial positioning of defined subtypes, reflecting the distribution of morphologically and physiologically distinct astrocyte populations. Our findings are evidence for specialized astrocyte subtypes between and within brain regions. The data are available through an online database (https://holt-sc.glialab.org/), providing a resource on which to base explorations of local astrocyte diversity and function in the brain.",2020-03-05 +33175131,deepBase v3.0: expression atlas and interactive analysis of ncRNAs from thousands of deep-sequencing data.,"Eukaryotic genomes encode thousands of small and large non-coding RNAs (ncRNAs). However, the expression, functions and evolution of these ncRNAs are still largely unknown. In this study, we have updated deepBase to version 3.0 (deepBase v3.0, http://rna.sysu.edu.cn/deepbase3/index.html), an increasingly popular and openly licensed resource that facilitates integrative and interactive display and analysis of the expression, evolution, and functions of various ncRNAs by deeply mining thousands of high-throughput sequencing data from tissue, tumor and exosome samples. We updated deepBase v3.0 to provide the most comprehensive expression atlas of small RNAs and lncRNAs by integrating ∼67 620 data from 80 normal tissues and ∼50 cancer tissues. The extracellular patterns of various ncRNAs were profiled to explore their applications for discovery of noninvasive biomarkers. Moreover, we constructed survival maps of tRNA-derived RNA Fragments (tRFs), miRNAs, snoRNAs and lncRNAs by analyzing >45 000 cancer sample data and corresponding clinical information. We also developed interactive webs to analyze the differential expression and biological functions of various ncRNAs in ∼50 types of cancers. This update is expected to provide a variety of new modules and graphic visualizations to facilitate analyses and explorations of the functions and mechanisms of various types of ncRNAs.",2021-01-01 +31667520,The PATRIC Bioinformatics Resource Center: expanding data and analysis capabilities.,"The PathoSystems Resource Integration Center (PATRIC) is the bacterial Bioinformatics Resource Center funded by the National Institute of Allergy and Infectious Diseases (https://www.patricbrc.org). PATRIC supports bioinformatic analyses of all bacteria with a special emphasis on pathogens, offering a rich comparative analysis environment that provides users with access to over 250 000 uniformly annotated and publicly available genomes with curated metadata. PATRIC offers web-based visualization and comparative analysis tools, a private workspace in which users can analyze their own data in the context of the public collections, services that streamline complex bioinformatic workflows and command-line tools for bulk data analysis. Over the past several years, as genomic and other omics-related experiments have become more cost-effective and widespread, we have observed considerable growth in the usage of and demand for easy-to-use, publicly available bioinformatic tools and services. Here we report the recent updates to the PATRIC resource, including new web-based comparative analysis tools, eight new services and the release of a command-line interface to access, query and analyze data.",2020-01-01 +33459764,SWITCHES: Searchable web interface for topologies of CHEmical switches. ,"Bistable biochemical switches are key motifs in cellular state decisions and long-term storage of cellular 'memory'. There are a few known biological switches that have been well characterized, however these examples are insufficient for systematic surveys of properties of these important systems. Here we present a resource of all possible bistable biochemical reaction networks with up to 6 reactions between 3 molecules, and 3 reactions between 4 molecules. Over 35,000 reaction topologies were constructed by identifying unique combinations of reactions between a fixed number of molecules. Then, these topologies were populated with rates within a biologically realistic range. The Searchable Web Interface for Topologies of CHEmical Switches (SWITCHES, https://switches.ncbs.res.in) provides a bistability and parameter analysis of over 7 million models from this systematic survey of chemical reaction space. This database will be useful for theoreticians interested in analyzing stability in chemical systems and also experimentalists for creating robust synthetic biological switches. Freely available on the web at https://switches.ncbs.res.in. Website implemented in PHP, MariaDB, Graphviz, and Apache, with all major browsers supported.",2021-01-18 +34692807,"Tracking, Synthesizing, and Sharing Global Batrachochytrium Data at AmphibianDisease.org.","Emerging infectious diseases have been especially devastating to amphibians, the most endangered class of vertebrates. For amphibians, the greatest disease threat is chytridiomycosis, caused by one of two chytridiomycete fungal pathogens Batrachochytrium dendrobatidis (Bd) and Batrachochytrium salamandrivorans (Bsal). Research over the last two decades has shown that susceptibility to this disease varies greatly with respect to a suite of host and pathogen factors such as phylogeny, geography (including abiotic factors), host community composition, and historical exposure to pathogens; yet, despite a growing body of research, a comprehensive understanding of global chytridiomycosis incidence remains elusive. In a large collaborative effort, Bd-Maps was launched in 2007 to increase multidisciplinary investigations and understanding using compiled global Bd occurrence data (Bsal was not discovered until 2013). As its database functions aged and became unsustainable, we sought to address critical needs utilizing new technologies to meet the challenges of aggregating data to facilitate research on both Bd and Bsal. Here, we introduce an advanced central online repository to archive, aggregate, and share Bd and Bsal data collected from around the world. The Amphibian Disease Portal (https://amphibiandisease.org) addresses several critical community needs while also helping to build basic biological knowledge of chytridiomycosis. This portal could be useful for other amphibian diseases and could also be replicated for uses with other wildlife diseases. We show how the Amphibian Disease Portal provides: (1) a new repository for the legacy Bd-Maps data; (2) a repository for sample-level data to archive datasets and host published data with permanent DOIs; (3) a flexible framework to adapt to advances in field, laboratory, and informatics technologies; and (4) a global aggregation of Bd and Bsal infection data to enable and accelerate research and conservation. The new framework for this project is built using biodiversity informatics best practices and metadata standards to ensure scientific reproducibility and linkages across other biological and biodiversity repositories.",2021-10-04 +33010163,cncRNAdb: a manually curated resource of experimentally supported RNAs with both protein-coding and noncoding function.,"RNA endowed with both protein-coding and noncoding functions is referred to as 'dual-function RNA', 'binary functional RNA (bifunctional RNA)' or 'cncRNA (coding and noncoding RNA)'. Recently, an increasing number of cncRNAs have been identified, including both translated ncRNAs (ncRNAs with coding functions) and untranslated mRNAs (mRNAs with noncoding functions). However, an appropriate database for storing and organizing cncRNAs is still lacking. Here, we developed cncRNAdb, a manually curated database of experimentally supported cncRNAs, which aims to provide a resource for efficient manipulation, browsing and analysis of cncRNAs. The current version of cncRNAdb documents about 2600 manually curated entries of cncRNA functions with experimental evidence, involving more than 2,000 RNAs (including over 1300 translated ncRNAs and over 600 untranslated mRNAs) across over 20 species. In summary, we believe that cncRNAdb will help elucidate the functions and mechanisms of cncRNAs and develop new prediction methods. The database is available at http://www.rna-society.org/cncrnadb/.",2021-01-01 +33166392,LitCovid: an open database of COVID-19 literature.,"Since the outbreak of the current pandemic in 2020, there has been a rapid growth of published articles on COVID-19 and SARS-CoV-2, with about 10,000 new articles added each month. This is causing an increasingly serious information overload, making it difficult for scientists, healthcare professionals and the general public to remain up to date on the latest SARS-CoV-2 and COVID-19 research. Hence, we developed LitCovid (https://www.ncbi.nlm.nih.gov/research/coronavirus/), a curated literature hub, to track up-to-date scientific information in PubMed. LitCovid is updated daily with newly identified relevant articles organized into curated categories. To support manual curation, advanced machine-learning and deep-learning algorithms have been developed, evaluated and integrated into the curation workflow. To the best of our knowledge, LitCovid is the first-of-its-kind COVID-19-specific literature resource, with all of its collected articles and curated data freely available. Since its release, LitCovid has been widely used, with millions of accesses by users worldwide for various information needs, such as evidence synthesis, drug discovery and text and data mining, among others.",2021-01-01 +,Advancing Mental Health Provision in Pharmacy (AMPLIPHY),"Abstract

Introduction

Improvement of mental health is a priority in the NHS Long Term Plan (1), and pharmacists and their teams could provide enhanced support for people who take medicines for anxiety or depression, two of the most common mental health problems in the UK. However, a recent Cochrane review (2) identified no community pharmacy services focused on mental health.

Aim

We aimed to pilot a mental health support service, in community pharmacy: Advancing Mental Health Provision in Pharmacy (AMPLIPHY) to assess its feasibility and potential benefit

Methods

The AMPLIPHY service was codesigned through a workshop involving people with lived experience, pharmacists and researchers. The resultant programme is a series of consultations, beginning at the presentation of the qualifying prescription for an antidepressant, after a further 1–2 weeks and then as further prescriptions are presented, up to 3 months. People are eligible to enter the service if they are newly prescribed antidepressants for depression or anxiety, or have a change in medication, dose or quantity. Pharmacists and their teams identified people who met this criterion and invited them to participate. The service was intended to be patient-led, with the pharmacist helping the patient to define tangible aims and/or outcomes that they wanted to focus on, and providing sign-posting where required. Following brief one-day training, the pilot ran across ten pharmacies in Greater Manchester from November 2019 through March 2020. We triangulated results from: a) quantitative analysis of consultation data; b) content analysis of consultation records; and c) template analysis of semi-structured interviews with participating pharmacists at the start and end of the service. We aimed to obtain feedback from people on exit from the AMPLIPHY service, but this was curtailed due to the coronavirus pandemic. Consultations were recorded via the Pharmoutcomes system (a,b) and interviews were recorded and transcribed, with NVivo used to manage the interview dataset (c).

Results

Seventy-six patients participated in the service, across 9 of the 10 pharmacies. Seventy-five percent of patients had just one consultation. The median age was 39 (IQR 28–47) and 62% of patients were women. Most patients entered the service due to new prescription of antidepressant (74%), 17% due to a change in dose and the remainder due to change in medication or quantity. Sertraline was the most commonly prescribed medication (46%). The content analysis is indicating that consultations centred around one of five areas: health (n=31), lifestyle (n=62), medication (n=45), support (n=37) and patient’s descriptions of their feelings (n=31).

Conclusion

AMPLIPHY was accessed by a range of people, mainly on initiation of a new antidepressant. Parallels might be drawn with the New Medicines Service in England, but this does not currently extend to antidepressants. Consultations were not restricted to health and medication, but extended to other social and lifestyle aspects thus indicated that participants felt comfortable to disclose their personal situations to the pharmacist. This could support tailored interactions. However, more work is warranted to understand why most patients did not attend multiple consultations, and the immediate/ long-term impact from the patient’s perspective.

References

1. NHS. NHS Long Term Plan [online]. 2019 [cited 09 October 2020]. Available at: https://www.england.nhs.uk/long-term-plan/ 2. de Barra M, Scott CL, Scott NW, Johnston M, de Bruin M, Nkansah N, Bond CM, Matheson CI, Rackow P, Williams AJ, Watson MC. Pharmacist services for non‐hospitalised patients. Cochrane Database of Systematic Reviews 2018, Issue 9. Art. No.: CD013102. DOI: 10.1002/14651858.CD013102.",2021-03-26 +,Simple synthesis of photoluminescent carbon dots from a marine polysaccharide found in shark cartilage,"For more than a decade, water-soluble, eco-friendly, biocompatible, and low-toxicity fluorescent nanomaterials have received considerable attention for their numerous in vivo and in vitro applications in biomedical imaging, disease diagnostics, and environmental monitoring. Owing to their tunable photoluminescence properties, carbon-based luminescent nanomaterials have shown great potential in bioimaging, photocatalysis, and biosensing among other applications.Marine environments provide excellent resources for the fabrication of these nanomaterials, because many marine organisms contain interesting trigger organic compounds that can be used as precursors. Herein, we synthesize multi-color emissive carbon dots (CDs) with an intrinsic photoluminescence quantum yield of 20.46%. These nanostructures were achieved through the one-step hydrothermal treatment of marine polysaccharide chondroitin sulfate, obtained from shark cartilage, in aqueous solution.We successfully demonstrate the low toxicity of our marine resource-derived CDs in zebrafish, and provide an initial assessment of their possible use as a bioimaging agent. Notably, the newly synthesized CDs localize in the intestines of zebrafish larvae, thereby indicating their biocompatibility and potential use as in vivo dyes.How to cite: Kim KW, Choi TY, Kwon YM, et al. Simple synthesis of photoluminescent carbon dots from a marine polysaccharide found in shark cartilage. Electron J Biotechnol 2020;47. https://doi.org/10.1016/j.ejbt.2020.07.003.",2020-09-01 +34679165,MMV-db: vaccinomics and RNA-based therapeutics database for infectious hemorrhagic fever-causing mammarenaviruses.,"The recent viral outbreaks and the current pandemic situation urges us to timely address any emerging viral infections by designing therapeutic strategies. Multi-omics and therapeutic data are of great interest to develop early remedial interventions. This work provides a therapeutic data platform (Mammarenavirus (MMV)-db) for pathogenic mammarenaviruses with potential catastrophic effects on human health around the world. The database integrates vaccinomics and RNA-based therapeutics data for seven human pathogenic MMVs associated with severe viral hemorrhagic fever and lethality in humans. Protein-specific cytotoxic T lymphocytes, B lymphocytes, helper T-cell and interferon-inducing epitopes were mapped using a cluster of immune-omics-based algorithms and tools for the seven human pathogenic viral species. Furthermore, the physiochemical and antigenic properties were also explored to guide protein-specific multi-epitope subunit vaccine for each species. Moreover, highly efficacious RNAs (small Interfering RNA (siRNA), microRNA and single guide RNA (sgRNA)) after extensive genome-based analysis with therapeutic relevance were explored. All the therapeutic RNAs were further classified and listed on the basis of predicted higher efficacy. The online platform (http://www.mmvdb.dqweilab-sjtu.com/index.php) contains easily accessible data sets and vaccine designs with potential utility in further computational and experimental work. Conclusively, the current study provides a baseline data platform to secure better future therapeutic interventions against the hemorrhagic fever causing mammarenaviruses. Database URL: http://www.mmvdb.dqweilab-sjtu.com/index.php.",2021-10-01 +34755873,ImmReg: the regulon atlas of immune-related pathways across cancer types.,"Immune system gene regulation perturbation has been found to be a major cause of the development of various types of cancer. Numbers of mechanisms contribute to gene expression regulation, thus, systematically identification of potential regulons of immune-related pathways is critical to cancer immunotherapy. Here, we comprehensively chart the landscape of transcription factors, microRNAs, RNA binding proteins and long noncoding RNAs regulation in 17 immune-related pathways across 33 cancers. The potential immunology regulons are likely to exhibit higher expressions in immune cells, show expression perturbations in cancer, and are significantly correlated with immune cell infiltrations. We also identify a panel of clinically relevant immunology regulons across cancers. Moreover, the regulon atlas of immune-related pathways helps prioritizing cancer-related genes (i.e. ETV7, miR-146a-5p, ZFP36 and HCP5). We further identified two molecular subtypes of glioma (cold and hot tumour phenotypes), which were characterized by differences in immune cell infiltrations, expression of checkpoints, and prognosis. Finally, we developed a user-friendly resource, ImmReg (http://bio-bigdata.hrbmu.edu.cn/ImmReg/), with multiple modules to visualize, browse, and download immunology regulation. Our study provides a comprehensive landscape of immunology regulons, which will shed light on future development of RNA-based cancer immunotherapies.",2021-12-01 +34415997,The Breeding Information Management System (BIMS): an online resource for crop breeding. ,"In this era of big data, breeding programs are producing ever larger amounts of data. This necessitates access to efficient management systems to keep track of cross, performance, pedigree, geographical and image-based data, as well as genotyping data. In this article, we report the progress on the Breeding Information Management System (BIMS), a free, secure and online breeding management system that allows breeders to store, manage, archive and analyze their private breeding data. BIMS is the first publicly available database system that enables individual breeders to integrate their private phenotypic and genotypic data with public data and, at the same time, have complete control of their own breeding data along with access to tools such as data import/export, data analysis and data archiving. The integration of breeding data with publicly available genomic and genetic data enhances genetic understanding of important traits and maximizes the marker-assisted breeding utility for breeders and allied scientists. BIMS incorporates the use of the Android App Field Book, open-source phenotype data collection software for phones and tablets that allows breeders to replace hard copy field books, thus alleviating the possibility of transcription errors while providing faster access to the collected data. BIMS comes with training materials and support for individual or small group training and is currently implemented in the Genome Database for Rosaceae, CottonGEN, the Citrus Genome Database, the Pulse Crop Database, and the Genome Database for Vaccinium. Database URLs: (https://www.rosaceae.org/), (https://www.cottongen.org/), (https://www.citrusgenomedb.org/), (https://www.pulsedb.org/) and (https://www.vaccinium.org/).",2021-08-01 +34971674,CellDepot: A Unified Repository for scRNA-seq Data and Visual Exploration.,"CellDepot containing over 270 datasets from 8 species and many tissues serves as an integrated web application to empower scientists in exploring single-cell RNA-seq (scRNA-seq) datasets and comparing the datasets among various studies through a user-friendly interface with advanced visualization and analytical capabilities. To begin with, it provides an efficient data management system that users can upload single cell datasets and query the database by multiple attributes such as species and cell types. In addition, the graphical multi-logic, multi-condition query builder and convenient filtering tool backed by MySQL database system, allows users to quickly find the datasets of interest and compare the expression of gene(s) across these. Moreover, by embedding the cellxgene VIP tool, CellDepot enables fast exploration of individual dataset in the manner of interactivity and scalability to gain more refined insights such as cell composition, gene expression profiles, and differentially expressed genes among cell types by leveraging more than 20 frequently applied plotting functions and high-level analysis methods in single cell research. In summary, the web portal available at http://celldepot.bxgenomics.com, prompts large scale single cell data sharing, facilitates meta-analysis and visualization, and encourages scientists to contribute to the single-cell community in a tractable and collaborative way. Finally, CellDepot is released as open-source software under MIT license to motivate crowd contribution, broad adoption, and local deployment for private datasets.",2021-12-28 +,Discovering millions of plankton genomic markers from the Atlantic Ocean and the Mediterranean Sea,"Comparison of the molecular diversity in all plankton populations present in geographically distant water columns may allow for a holistic view of the connectivity, isolation and adaptation of organisms in the marine environment. In this context, a large‐scale detection and analysis of genomic variants directly in metagenomic data appeared as a powerful strategy for the identification of genetic structures and genes under natural selection in plankton. Here, we used discosnp++, a reference‐free variant caller, to produce genetic variants from large‐scale metagenomic data and assessed its accuracy on the copepod Oithona nana in terms of variant calling, allele frequency estimation and population genomic statistics by comparing it to the state‐of‐the‐art method. discosnp ++ produces variants leading to similar conclusions regarding the genetic structure and identification of loci under natural selection. discosnp++ was then applied to 120 metagenomic samples from four size fractions, including prokaryotes, protists and zooplankton sampled from 39 tara Oceans sampling stations located in the Atlantic Ocean and the Mediterranean Sea to produce a new set of marine genomic markers containing more than 19 million of variants. This new genomic resource can be used by the community to relocate these markers on their plankton genomes or transcriptomes of interest. This resource will be updated with new marine expeditions and the increase of metagenomic data (availability: http://bioinformatique.rennes.inria.fr/taravariants/).",2019-03-01 +33954026,ZebraShare: a new venue for rapid dissemination of zebrafish mutant data.,"

Background

In the past decade, the zebrafish community has widely embraced targeted mutagenesis technologies, resulting in an abundance of mutant lines. While many lines have proven to be useful for investigating gene function, many have also shown no apparent phenotype, or phenotypes not of interest to the originating lab. In order for labs to document and share information about these lines, we have created ZebraShare as a new resource offered within ZFIN.

Methods

ZebraShare involves a form-based submission process generated by ZFIN. The ZebraShare interface (https://zfin.org/action/zebrashare) can be accessed on ZFIN under ""Submit Data"". Users download the Submission Workbook and complete the required fields, then submit the completed workbook with associated images and captions, generating a new ZFIN publication record. ZFIN curators add the submitted phenotype and mutant information to the ZFIN database, provide mapping information about mutations, and cross reference this information across the appropriate ZFIN databases. We present here examples of ZebraShare submissions, including phf21aa, kdm1a, ctnnd1, snu13a, and snu13b mutant lines.

Results

Users can find ZebraShare submissions by searching ZFIN for specific alleles or line designations, just as for alleles submitted through the normal process. We present several potential examples of submission types to ZebraShare including a phenotypic mutants, mildly phenotypic, and early lethal mutants. Mutants for kdm1a show no apparent skeletal phenotype, and phf21aa mutants show only a mild skeletal phenotype, yet these genes have specific human disease relevance and therefore may be useful for further studies. The p120-catenin encoding gene, ctnnd1, was knocked out to investigate a potential role in brain development or function. The homozygous ctnnd1 mutant disintegrates during early somitogenesis and the heterozygote has localized defects, revealing vital roles in early development. Two snu13 genes were knocked out to investigate a role in muscle formation. The snu13a;snu13b double mutant has an early embryonic lethal phenotype, potentially related to a proposed role in the core splicing complex. In each example, the mutants submitted to ZebraShare display phenotypes that are not ideally suited to their originating lab's project directions but may be of great relevance to other researchers.

Conclusion

ZebraShare provides an opportunity for researchers to directly share information about mutant lines within ZFIN, which is widely used by the community as a central database of information about zebrafish lines. Submissions of alleles with a phenotypic or unexpected phenotypes is encouraged to promote collaborations, disseminate lines, reduce redundancy of effort and to promote efficient use of time and resources. We anticipate that as submissions to ZebraShare increase, they will help build an ultimately more complete picture of zebrafish genetics and development.",2021-04-13 +31665479,ProteomicsDB: a multi-omics and multi-organism resource for life science research.,"ProteomicsDB (https://www.ProteomicsDB.org) started as a protein-centric in-memory database for the exploration of large collections of quantitative mass spectrometry-based proteomics data. The data types and contents grew over time to include RNA-Seq expression data, drug-target interactions and cell line viability data. In this manuscript, we summarize new developments since the previous update that was published in Nucleic Acids Research in 2017. Over the past two years, we have enriched the data content by additional datasets and extended the platform to support protein turnover data. Another important new addition is that ProteomicsDB now supports the storage and visualization of data collected from other organisms, exemplified by Arabidopsis thaliana. Due to the generic design of ProteomicsDB, all analytical features available for the original human resource seamlessly transfer to other organisms. Furthermore, we introduce a new service in ProteomicsDB which allows users to upload their own expression datasets and analyze them alongside with data stored in ProteomicsDB. Initially, users will be able to make use of this feature in the interactive heat map functionality as well as the drug sensitivity prediction, but ultimately will be able to use all analytical features of ProteomicsDB in this way.",2020-01-01 +33501897,Fortunes of Dragons: Cohort size effects on life outcomes.,"This paper examines the long-term effects of birth cohort size on life outcomes. Using administrative data from Singapore, we study the outcomes of large birth cohorts created by the Chinese superstitious practice of zodiac birth timing, where parents prefer to give birth in the year of the Dragon. This practice is followed exclusively by the Chinese majority, with no similar patterns detected among non-Chinese minorities, allowing us to differentiate cohort size effects from confounding year-of-birth effects. Despite government efforts to increase public educational resources for these cohorts, Chinese Dragons earn lower incomes and are less likely to gain admission to national universities. There is also evidence of negative externalities on non-practising populations who happen to enter the labour market at the same time as Chinese Dragons. Our analysis suggests that the adverse life outcomes are not due to selection, but rather reflect the aggregate resource implications of birth cohort size.Supplementary material is available for this article at: https://doi.org/10.1080/00324728.2020.1864458.",2021-01-27 +31584092,PDBe-KB: a community-driven resource for structural and functional annotations.,"The Protein Data Bank in Europe-Knowledge Base (PDBe-KB, https://pdbe-kb.org) is a community-driven, collaborative resource for literature-derived, manually curated and computationally predicted structural and functional annotations of macromolecular structure data, contained in the Protein Data Bank (PDB). The goal of PDBe-KB is two-fold: (i) to increase the visibility and reduce the fragmentation of annotations contributed by specialist data resources, and to make these data more findable, accessible, interoperable and reusable (FAIR) and (ii) to place macromolecular structure data in their biological context, thus facilitating their use by the broader scientific community in fundamental and applied research. Here, we describe the guidelines of this collaborative effort, the current status of contributed data, and the PDBe-KB infrastructure, which includes the data exchange format, the deposition system for added value annotations, the distributable database containing the assembled data, and programmatic access endpoints. We also describe a series of novel web-pages-the PDBe-KB aggregated views of structure data-which combine information on macromolecular structures from many PDB entries. We have recently released the first set of pages in this series, which provide an overview of available structural and functional information for a protein of interest, referenced by a UniProtKB accession.",2020-01-01 +32761142,"NCBI Taxonomy: a comprehensive update on curation, resources and tools. ","The National Center for Biotechnology Information (NCBI) Taxonomy includes organism names and classifications for every sequence in the nucleotide and protein sequence databases of the International Nucleotide Sequence Database Collaboration. Since the last review of this resource in 2012, it has undergone several improvements. Most notable is the shift from a single SQL database to a series of linked databases tied to a framework of data called NameBank. This means that relations among data elements can be adjusted in more detail, resulting in expanded annotation of synonyms, the ability to flag names with specific nomenclatural properties, enhanced tracking of publications tied to names and improved annotation of scientific authorities and types. Additionally, practices utilized by NCBI Taxonomy curators specific to major taxonomic groups are described, terms peculiar to NCBI Taxonomy are explained, external resources are acknowledged and updates to tools and other resources are documented. Database URL: https://www.ncbi.nlm.nih.gov/taxonomy.",2020-01-01 +34838806,Bacteria.guru: Comparative Transcriptomics and Co-Expression Database for Bacterial Pathogens.,"While bacteria can be beneficial to our health, their deadly pathogenic potential has been an ever-present concern exacerbated by the emergence of drug-resistant strains. As such, there is a pressing urgency for an enhanced understanding of their gene function and regulation, which could mediate the development of novel antimicrobials. Transcriptomic analyses have been established as insightful and indispensable to the functional characterization of genes and identification of new biological pathways, but in the context of bacterial studies, they remain limited to species-specific datasets. To address this, we integrated the genomic and transcriptomic data of the 17 most notorious and researched bacterial pathogens, creating bacteria.guru, an interactive database that can identify, visualize, and compare gene expression profiles, coexpression networks, functionally enriched clusters, and gene families across species. Through illustrating antibiotic resistance mechanisms in P. aeruginosa, we demonstrate that bacteria.guru could potentially aid in discovering multi-faceted antibiotic targets and, overall, facilitate future bacterial research. AVAILABILITY: The database and coexpression networks are freely available from https://bacteria.guru/. Sample annotations can be found in the supplemental data.",2021-11-25 +34224351,Deep Learning for Ultrasound Image Formation: CUBDL Evaluation Framework and Open Datasets.,"Deep learning for ultrasound image formation is rapidly garnering research support and attention, quickly rising as the latest frontier in ultrasound image formation, with much promise to balance both image quality and display speed. Despite this promise, one challenge with identifying optimal solutions is the absence of unified evaluation methods and datasets that are not specific to a single research group. This article introduces the largest known international database of ultrasound channel data and describes the associated evaluation methods that were initially developed for the challenge on ultrasound beamforming with deep learning (CUBDL), which was offered as a component of the 2020 IEEE International Ultrasonics Symposium. We summarize the challenge results and present qualitative and quantitative assessments using both the initially closed CUBDL evaluation test dataset (which was crowd-sourced from multiple groups around the world) and additional in vivo breast ultrasound data contributed after the challenge was completed. As an example quantitative assessment, single plane wave images from the CUBDL Task 1 dataset produced a mean generalized contrast-to-noise ratio (gCNR) of 0.67 and a mean lateral resolution of 0.42 mm when formed with delay-and-sum beamforming, compared with a mean gCNR as high as 0.81 and a mean lateral resolution as low as 0.32 mm when formed with networks submitted by the challenge winners. We also describe contributed CUBDL data that may be used for training of future networks. The compiled database includes a total of 576 image acquisition sequences. We additionally introduce a neural-network-based global sound speed estimator implementation that was necessary to fairly evaluate the results obtained with this international database. The integration of CUBDL evaluation methods, evaluation code, network weights from the challenge winners, and all datasets described herein are publicly available (visit https://cubdl.jhu.edu for details).",2021-11-23 +31724722,DDBJ Database updates and computational infrastructure enhancement.,"The Bioinformation and DDBJ Center (https://www.ddbj.nig.ac.jp) in the National Institute of Genetics (NIG) maintains a primary nucleotide sequence database as a member of the International Nucleotide Sequence Database Collaboration (INSDC) in partnership with the US National Center for Biotechnology Information and the European Bioinformatics Institute. The NIG operates the NIG supercomputer as a computational basis for the construction of DDBJ databases and as a large-scale computational resource for Japanese biologists and medical researchers. In order to accommodate the rapidly growing amount of deoxyribonucleic acid (DNA) nucleotide sequence data, NIG replaced its supercomputer system, which is designed for big data analysis of genome data, in early 2019. The new system is equipped with 30 PB of DNA data archiving storage; large-scale parallel distributed file systems (13.8 PB in total) and 1.1 PFLOPS computation nodes and graphics processing units (GPUs). Moreover, as a starting point of developing multi-cloud infrastructure of bioinformatics, we have also installed an automatic file transfer system that allows users to prevent data lock-in and to achieve cost/performance balance by exploiting the most suitable environment from among the supercomputer and public clouds for different workloads.",2020-01-01 +34714871,SpeCollate: Deep cross-modal similarity network for mass spectrometry data based peptide deductions.,"Historically, the database search algorithms have been the de facto standard for inferring peptides from mass spectrometry (MS) data. Database search algorithms deduce peptides by transforming theoretical peptides into theoretical spectra and matching them to the experimental spectra. Heuristic similarity-scoring functions are used to match an experimental spectrum to a theoretical spectrum. However, the heuristic nature of the scoring functions and the simple transformation of the peptides into theoretical spectra, along with noisy mass spectra for the less abundant peptides, can introduce a cascade of inaccuracies. In this paper, we design and implement a Deep Cross-Modal Similarity Network called SpeCollate, which overcomes these inaccuracies by learning the similarity function between experimental spectra and peptides directly from the labeled MS data. SpeCollate transforms spectra and peptides into a shared Euclidean subspace by learning fixed size embeddings for both. Our proposed deep-learning network trains on sextuplets of positive and negative examples coupled with our custom-designed SNAP-loss function. Online hardest negative mining is used to select the appropriate negative examples for optimal training performance. We use 4.8 million sextuplets obtained from the NIST and MassIVE peptide libraries to train the network and demonstrate that for closed search, SpeCollate is able to perform better than Crux and MSFragger in terms of the number of peptide-spectrum matches (PSMs) and unique peptides identified under 1% FDR for real-world data. SpeCollate also identifies a large number of peptides not reported by either Crux or MSFragger. To the best of our knowledge, our proposed SpeCollate is the first deep-learning network that can determine the cross-modal similarity between peptides and mass-spectra for MS-based proteomics. We believe SpeCollate is significant progress towards developing machine-learning solutions for MS-based omics data analysis. SpeCollate is available at https://deepspecs.github.io/.",2021-10-29 +28185543,An Atlas of annotations of Hydra vulgaris transcriptome.,"

Background

RNA sequencing takes advantage of the Next Generation Sequencing (NGS) technologies for analyzing RNA transcript counts with an excellent accuracy. Trying to interpret this huge amount of data in biological information is still a key issue, reason for which the creation of web-resources useful for their analysis is highly desiderable.

Results

Starting from a previous work, Transcriptator, we present the Atlas of Hydra's vulgaris, an extensible web tool in which its complete transcriptome is annotated. In order to provide to the users an advantageous resource that include the whole functional annotated transcriptome of Hydra vulgaris water polyp, we implemented the Atlas web-tool contains 31.988 accesible and downloadable transcripts of this non-reference model organism.

Conclusion

Atlas, as a freely available resource, can be considered a valuable tool to rapidly retrieve functional annotation for transcripts differentially expressed in Hydra vulgaris exposed to the distinct experimental treatments. WEB RESOURCE URL: http://www-labgtp.na.icar.cnr.it/Atlas .",2016-09-22 +33104802,"SMART: recent updates, new developments and status in 2020.","SMART (Simple Modular Architecture Research Tool) is a web resource (https://smart.embl.de) for the identification and annotation of protein domains and the analysis of protein domain architectures. SMART version 9 contains manually curated models for more than 1300 protein domains, with a topical set of 68 new models added since our last update article (1). All the new models are for diverse recombinase families and subfamilies and as a set they provide a comprehensive overview of mobile element recombinases namely transposase, integrase, relaxase, resolvase, cas1 casposase and Xer like cellular recombinase. Further updates include the synchronization of the underlying protein databases with UniProt (2), Ensembl (3) and STRING (4), greatly increasing the total number of annotated domains and other protein features available in architecture analysis mode. Furthermore, SMART's vector-based protein display engine has been extended and updated to use the latest web technologies and the domain architecture analysis components have been optimized to handle the increased number of protein features available.",2021-01-01 +31841142,GlyMDB: Glycan Microarray Database and analysis toolset.,"

Motivation

Glycan microarrays are capable of illuminating the interactions of glycan-binding proteins (GBPs) against hundreds of defined glycan structures, and have revolutionized the investigations of protein-carbohydrate interactions underlying numerous critical biological activities. However, it is difficult to interpret microarray data and identify structural determinants promoting glycan binding to glycan-binding proteins due to the ambiguity in microarray fluorescence intensity and complexity in branched glycan structures. To facilitate analysis of glycan microarray data alongside protein structure, we have built the Glycan Microarray Database (GlyMDB), a web-based resource including a searchable database of glycan microarray samples and a toolset for data/structure analysis.

Results

The current GlyMDB provides data visualization and glycan-binding motif discovery for 5203 glycan microarray samples collected from the Consortium for Functional Glycomics. The unique feature of GlyMDB is to link microarray data to PDB structures. The GlyMDB provides different options for database query, and allows users to upload their microarray data for analysis. After search or upload is complete, users can choose the criterion for binder versus non-binder classification. They can view the signal intensity graph including the binder/non-binder threshold followed by a list of glycan-binding motifs. One can also compare the fluorescence intensity data from two different microarray samples. A protein sequence-based search is performed using BLAST to match microarray data with all available PDB structures containing glycans. The glycan ligand information is displayed, and links are provided for structural visualization and redirection to other modules in GlycanStructure.ORG for further investigation of glycan-binding sites and glycan structures.

Availability and implementation

http://www.glycanstructure.org/glymdb.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +33306801,"CamRegBase: a gene regulation database for the biofuel crop, Camelina sativa. ","Camelina is an annual oilseed plant from the Brassicaceae family that is gaining momentum as a biofuel winter cover crop. However, a significant limitation in further enhancing its utility as a producer of oils that can be used as biofuels, jet fuels or bio-based products is the absence of a repository for all the gene expression and regulatory information that is being rapidly generated by the community. Here, we provide CamRegBase (https://camregbase.org/) as a one-stop resource to access Camelina information on gene expression and co-expression, transcription factors, lipid associated genes and genome-wide orthologs in the close-relative reference plant Arabidopsis. We envision this as a resource of curated information for users, as well as a repository of new gene regulation information.",2020-12-01 +34901042,Vaccine Hesitancy Is a Barrier to Achieving Equitable Herd Immunity Among Racial Minorities.,"Introduction: Racial minority groups have been disproportionately affected by the 2019 novel coronavirus disease (COVID-19). Vaccine hesitancy may be a major barrier to achieving equitable herd immunity and must be addressed to reduce the excess morbidity and mortality of COVID-19 in disproportionately affected communities. This study aimed to determine if COVID-19 vaccine hesitancy, and its factors vaccine complacency and confidence, are more prominent among disproportionately affected racial minority groups. Methods:We collected data from participants aged 18 years or older from the four most populous U.S. states, including New York, California, Florida, and Texas, and Canada. Data were collected using a web-based survey platform. Data are available at http://www.covid19-database.com. Results:Data from 4,434 participants were included [mean (SD) age = 48.7 (17.2) and 50.4% women]. Vaccine hesitancy was higher in Black, Indigenous (Native American and Indigenous People of Canada, including First Nations, Inuit and Métis), and Latinx compared to White participants, while no difference was found between East Asian and White participants. The group differences in vaccine hesitancy for Indigenous and Black compared to White participants remained after controlling for sociodemographic factors. Determinants of vaccine complacency were equivalent between disproportionately affected racial groups and white participants. Vaccine confidence (i.e., trust in vaccine benefit) was generally lower in all racial groups compared to White participants. Differences in vaccine mistrust comparing Black and East Asian to White participants remained after controlling for sociodemographic factors. Discussion:Disproportionately affected racial minorities may have higher vaccine hesitancy and lower confidence in COVID-19 vaccines. Public health and other relevant government services should address vaccine hesitancy among racial minorities using a culturally sensitive, community-centered approach to attain equitable herd immunity.",2021-11-24 +34697108,R|S Atlas: Identifying existing cohort study data resources to accelerate epidemiological research on the influence of religion and spirituality on human health.,"

Objective

Many studies have documented significant associations between religion and spirituality (R/S) and health, but relatively few prospective analyses exist that can support causal inferences. To date, there has been no systematic analysis of R/S survey items collected in US cohort studies. We conducted a systematic content analysis of all surveys ever fielded in 20 diverse US cohort studies funded by the National Institutes of Health (NIH) to identify all R/S-related items collected from each cohort's baseline survey through 2014.

Design

An R|S Ontology was developed from our systematic content analysis to categorise all R/S survey items identified into key conceptual categories. A systematic literature review was completed for each R/S item to identify any cohort publications involving these items through 2018.

Results

Our content analysis identified 319 R/S survey items, reflecting 213 unique R/S constructs and 50 R|S Ontology categories. 193 of the 319 extant R/S survey items had been analysed in at least one published paper. Using these data, we created the R|S Atlas (https://atlas.mgh.harvard.edu/), a publicly available, online relational database that allows investigators to identify R/S survey items that have been collected by US cohorts, and to further refine searches by other key data available in cohorts that may be necessary for a given study (eg, race/ethnicity, availability of DNA or geocoded data).

Conclusions

R|S Atlas not only allows researchers to identify available sources of R/S data in cohort studies but will also assist in identifying novel research questions that have yet to be explored within the context of US cohort studies.",2021-10-25 +30666329,BINOPtimal: a web tool for optimal chiral phosphoric acid catalyst selection.,"A catalyst selection program, BINOPtimal, has been developed. This interactive web tool selects the best performing chiral phosphoric acid catalysts from analysis of the starting materials, imine and nucleophile, on the basis of rules derived from the transformations within its database. This procedure has been applied to an example transformation demonstrating the potential to assist reaction design. The tool is available at www-mmm.ch.cam.ac.uk.",2019-02-01 +34517763,TnCentral: a Prokaryotic Transposable Element Database and Web Portal for Transposon Analysis.,"We describe here the structure and organization of TnCentral (https://tncentral.proteininformationresource.org/ [or the mirror link at https://tncentral.ncc.unesp.br/]), a web resource for prokaryotic transposable elements (TE). TnCentral currently contains ∼400 carefully annotated TE, including transposons from the Tn3, Tn7, Tn402, and Tn554 families; compound transposons; integrons; and associated insertion sequences (IS). These TE carry passenger genes, including genes conferring resistance to over 25 classes of antibiotics and nine types of heavy metal, as well as genes responsible for pathogenesis in plants, toxin/antitoxin gene pairs, transcription factors, and genes involved in metabolism. Each TE has its own entry page, providing details about its transposition genes, passenger genes, and other sequence features required for transposition, as well as a graphical map of all features. TnCentral content can be browsed and queried through text- and sequence-based searches with a graphic output. We describe three use cases, which illustrate how the search interface, results tables, and entry pages can be used to explore and compare TE. TnCentral also includes downloadable software to facilitate user-driven identification, with manual annotation, of certain types of TE in genomic sequences. Through the TnCentral homepage, users can also access TnPedia, which provides comprehensive reviews of the major TE families, including an extensive general section and specialized sections with descriptions of insertion sequence and transposon families. TnCentral and TnPedia are intuitive resources that can be used by clinicians and scientists to assess TE diversity in clinical, veterinary, and environmental samples. IMPORTANCE The ability of bacteria to undergo rapid evolution and adapt to changing environmental circumstances drives the public health crisis of multiple antibiotic resistance, as well as outbreaks of disease in economically important agricultural crops and animal husbandry. Prokaryotic transposable elements (TE) play a critical role in this. Many carry ""passenger genes"" (not required for the transposition process) conferring resistance to antibiotics or heavy metals or causing disease in plants and animals. Passenger genes are spread by normal TE transposition activities and by insertion into plasmids, which then spread via conjugation within and across bacterial populations. Thus, an understanding of TE composition and transposition mechanisms is key to developing strategies to combat bacterial pathogenesis. Toward this end, we have developed TnCentral, a bioinformatics resource dedicated to describing and exploring the structural and functional features of prokaryotic TE whose use is intuitive and accessible to users with or without bioinformatics expertise.",2021-09-14 +34097004,DevOmics: an integrated multi-omics database of human and mouse early embryo. ,"Transcriptomic and epigenetic alterations during early embryo development have been proven to play essential roles in regulating the cell fate. Nowadays, advances in single-cell transcriptomics and epigenomics profiling techniques provide large volumes of data for understanding the molecular regulatory mechanisms in early embryos and facilitate the investigation of assisted reproductive technology as well as preimplantation genetic testing. However, the lack of integrated data collection and unified analytic procedures greatly limits their usage in scientific research and clinical application. Hence, it is necessary to establish a database integrating the regulatory information of human and mouse early embryos with unified analytic procedures. Here, we introduce DevOmics (http://devomics.cn/), which contains normalized gene expression, DNA methylation, histone modifications (H3K4me3, H3K9me3, H3K27me3, H3K27ac), chromatin accessibility and 3D chromatin architecture profiles of human and mouse early embryos spanning six developmental stages (zygote, 2cell, 4cell, 8cell, morula and blastocyst (ICM, TE)). The current version of DevOmics provides Search and Advanced Search for retrieving genes a researcher is interested in, Analysis Tools including the differentially expressed genes (DEGs) analysis for acquiring DEGs between different types of samples, allelic explorer for displaying allele-specific gene expression as well as epigenetic modifications and correlation analysis for showing the dynamic changes in different layers of data across developmental stages, as well as Genome Browser and Ortholog for visualization. DevOmics offers a user-friendly website for biologists and clinicians to decipher molecular regulatory mechanisms of human and mouse early embryos.",2021-11-01 +33112702,LncRBase V.2: an updated resource for multispecies lncRNAs and ClinicLSNP hosting genetic variants in lncRNAs for cancer patients.,"The recent discovery of long non-coding RNA as a regulatory molecule in the cellular system has altered the concept of the functional aptitude of the genome. Since our publication of the first version of LncRBase in 2014, there has been an enormous increase in the number of annotated lncRNAs of multiple species other than Human and Mouse. LncRBase V.2 hosts information of 549,648 lncRNAs corresponding to six additional species besides Human and Mouse, viz. Rat, Fruitfly, Zebrafish, Chicken, Cow and C.elegans. It provides additional distinct features such as (i) Transcription Factor Binding Site (TFBS) in the lncRNA promoter region, (ii) sub-cellular localization pattern of lncRNAs (iii) lnc-pri-miRNAs (iv) Possible small open reading frames (sORFs) within lncRNA. (v) Manually curated information of interacting target molecules and disease association of lncRNA genes (vi) Distribution of lncRNAs across multiple tissues of all species. Moreover, we have hosted ClinicLSNP within LncRBase V.2. ClinicLSNP has a comprehensive catalogue of lncRNA variants present within breast, ovarian, and cervical cancer inferred from 561 RNA-Seq data corresponding to these cancers. Further, we have checked whether these lncRNA variants overlap with (i)Repeat elements,(ii)CGI, (iii)TFBS within lncRNA loci (iv)SNP localization in trait-associated Linkage Disequilibrium(LD) region, (v)predicted the potentially pathogenic variants and (vi)effect of SNP on lncRNA secondary structure. Overall, LncRBaseV.2 is a user-friendly database to survey, search and retrieve information about multi-species lncRNAs. Further, ClinicLSNP will serve as a useful resource for cancer specific lncRNA variants and their related information. The database is freely accessible and available at http://dibresources.jcbose.ac.in/zhumur/lncrbase2/.",2020-10-28 +,Two hours in Hollywood: A manually annotated ground truth data set of eye movements during movie clip watching,"In this short article we present our manual annotation of the eye movement events in a +subset of the large-scale eye tracking data set Hollywood2. Our labels include fixations, +saccades, and smooth pursuits, as well as a noise event type (the latter representing either +blinks, loss of tracking, or physically implausible signals). In order to achieve more +consistent annotations, the gaze samples were labelled by a novice rater based on +rudimentary algorithmic suggestions, and subsequently corrected by an expert rater. +Overall, we annotated eye movement events in the recordings corresponding to 50 +randomly selected test set clips and 6 training set clips from Hollywood2, which were +viewed by 16 observers and amount to a total of approximately 130 minutes of gaze data. +In these labels, 62.4% of the samples were attributed to fixations, 9.1% – to saccades, and, +notably, 24.2% – to pursuit (the remainder marked as noise). After evaluation of 15 +published eye movement classification algorithms on our newly collected annotated data +set, we found that the most recent algorithms perform very well on average, and even +reach human-level labelling quality for fixations and saccades, but all have a much larger +room for improvement when it comes to smooth pursuit classification. The data set is +made available at https://gin.g-node.org/ioannis.agtzidis/hollywood2_em.",2021-04-08 +32330435,Identification and Evaluation of Controlled Trials in Pediatric Cardiology: Crowdsourced Scoping Review and Creation of Accessible Searchable Database.,"Cardiac disease in children is associated with significant morbidity and mortality as well as increased health resource utilisation. There is a perception that there is a paucity of high-quality studies, particularly randomized controlled trials (RCTs), in the field of pediatric cardiology. We sought to identify, examine, and map the range of RCTs conducted in children with cardiac conditions, including the development of a searchable open-access database. A literature search was conducted encompassing MEDLINE, EMBASE, and the Cochrane Central Register of Controlled Trials from inception to 2018. All English-language RCTs enrolling children (age 0-21 years) with cardiac conditions were included. Data extraction and risk of bias assessments were performed in duplicate via crowdsourcing for each eligible study and entered into an online database. A total of 933 RCTs met eligibility criteria. Median trial recruitment was 49 patients (interquartile range 30-86) with 18.9% of studies (n = 176) including > 100 patients. A wide variety of populations and interventions were encompassed with congenital heart disease (79.8% of RCTs) and medications (63.3% of RCTs) often studied. Just over one-half of the trials (53.4%) clearly identified a primary outcome, and fewer than half (46.6%) fully documented a robust randomization process. Trials were summarised in a searchable online database (https://pediatrics.knack.com/cardiology-rct-database#cardiology-rcts/). Contrary to a commonly held perception, there are nearly 1,000 published RCTs in pediatric cardiology. The open-access database created as part of this project provides a resource that facilitates an efficient comprehensive review of the literature for clinicians and researchers caring for children with cardiac issues.",2020-02-15 +34407614,Tracking Air Pollution in China: Near Real-Time PM2.5 Retrievals from Multisource Data Fusion.,"Air pollution has altered the Earth's radiation balance, disturbed the ecosystem, and increased human morbidity and mortality. Accordingly, a full-coverage high-resolution air pollutant data set with timely updates and historical long-term records is essential to support both research and environmental management. Here, for the first time, we develop a near real-time air pollutant database known as Tracking Air Pollution in China (TAP, http://tapdata.org.cn/) that combines information from multiple data sources, including ground observations, satellite aerosol optical depth (AOD), operational chemical transport model simulations, and other ancillary data such as meteorological fields, land use data, population, and elevation. Daily full-coverage PM2.5 data at a spatial resolution of 10 km is our first near real-time product. The TAP PM2.5 is estimated based on a two-stage machine learning model coupled with the synthetic minority oversampling technique and a tree-based gap-filling method. Our model has an averaged out-of-bag cross-validation R2 of 0.83 for different years, which is comparable to those of other studies, but improves its performance at high pollution levels and fills the gaps in missing AOD on daily scale. The full coverage and near real-time updates of the daily PM2.5 data allow us to track the day-to-day variations in PM2.5 concentrations over China in a timely manner. The long-term records of PM2.5 data since 2000 will also support policy assessments and health impact studies. The TAP PM2.5 data are publicly available through our website for sharing with the research and policy communities.",2021-08-18 +31642484,SilkDB 3.0: visualizing and exploring multiple levels of data for silkworm.,"SilkDB is an open-accessibility database and powerful platform that provides comprehensive information on the silkworm (Bombyx mori) genome. Since SilkDB 2.0 was released 10 years ago, vast quantities of data about multiple aspects of the silkworm have been generated, including genome, transcriptome, Hi-C and pangenome. To visualize data at these different biological levels, we present SilkDB 3.0 (https://silkdb.bioinfotoolkits.net), a visual analytic tool for exploring silkworm data through an interactive user interface. The database contains a high-quality chromosome-level assembly of the silkworm genome, and its coding sequences and gene sets are more accurate than those in the previous version. SilkDB 3.0 provides a view of the information for each gene at the levels of sequence, protein structure, gene family, orthology, synteny, genome organization and gives access to gene expression information, genetic variation and genome interaction map. A set of visualization tools are available to display the abundant information in the above datasets. With an improved interactive user interface for the integration of large data sets, the updated SilkDB 3.0 database will be a valuable resource for the silkworm and insect research community.",2020-01-01 +34968387,Epidemiological and ecological consequences of virus manipulation of host and vector in plant virus transmission.,"Many plant viruses are transmitted by insect vectors. Transmission can be described as persistent or non-persistent depending on rates of acquisition, retention, and inoculation of virus. Much experimental evidence has accumulated indicating vectors can prefer to settle and/or feed on infected versus noninfected host plants. For persistent transmission, vector preference can also be conditional, depending on the vector's own infection status. Since viruses can alter host plant quality as a resource for feeding, infection potentially also affects vector population dynamics. Here we use mathematical modelling to develop a theoretical framework addressing the effects of vector preferences for landing, settling and feeding-as well as potential effects of infection on vector population density-on plant virus epidemics. We explore the consequences of preferences that depend on the host (infected or healthy) and vector (viruliferous or nonviruliferous) phenotypes, and how this is affected by the form of transmission, persistent or non-persistent. We show how different components of vector preference have characteristic effects on both the basic reproduction number and the final incidence of disease. We also show how vector preference can induce bistability, in which the virus is able to persist even when it cannot invade from very low densities. Feedbacks between plant infection status, vector population dynamics and virus transmission potentially lead to very complex dynamics, including sustained oscillations. Our work is supported by an interactive interface https://plantdiseasevectorpreference.herokuapp.com/. Our model reiterates the importance of coupling virus infection to vector behaviour, life history and population dynamics to fully understand plant virus epidemics.",2021-12-30 +34642378,Observational study on the efficiency of Neonatal Emergency Transport in reducing mortality and morbidity indexes in Sicily.,"In these last 25 years, the Neonatal Emergency Transport (NET) service has been widely improved in Italy. To date, all National areas are covered by a NET service; 53 NET centers have been activated in all the Italian territory. Herein, the authors present an observational study to evaluate the rate of infantile mortality after introduction of NET in Sicily, and to study the efficiency of this service in reducing these rates of mortality in vulnerable neonates, transported from primary care birth centers to tertiary facilities to undergo to specialized NICU assistance. All neonates who required an emergency transport by NETS were included. No exclusions criteria were applied. Demographic and regional infantile mortality data, expressed as infant mortality rate, were selected by the official government database (ISTAT- National Statistic Institute- http://www.istat.it ). All data were respectively divided into three groups: data concerning transport, clinical condition, and mortality of the transported patients. We transported by NET 325 neonates. The analysis of the infant mortality rate (per 1.000 live births) in Catania from 2016 to 2018 was reduced compared to the same rate calculated before NETS activation (4.41 index before 2016 vs 4.17 index after 2016). These data showed an increase in other provinces (Enna, Caltanissetta, and Agrigento). 61% of neonates showed a respiratory disease. During the study period the proportion of neonates with a Mortality Index for Neonatal Transportation-MINT < 6 has been reduced, while there was an increase of neonates with higher Transport Risk Index of Physiologic Stability-TRIPS score results. The slight decrease of infantile mortality in Catania during the first three years after introduction of NET follows the same trend of all Italian territories, showing the importance of this service in reducing infantile mortality.",2021-10-12 +34525303,ReactionDataExtractor: A Tool for Automated Extraction of Information from Chemical Reaction Schemes.,"Chemical reaction schemes are commonly used for visual encapsulation of chemical information. Figures of reaction schemes contain chemical transformations, the chemical species involved, as well as reaction conditions. From a data-mining point of view, they constitute rich sources, densely packed with knowledge. Yet, the challenge of automatically extracting data from them has remained largely untackled. This work presents ReactionDataExtractor, a software tool that can be used for the automatic extraction of information from multistep reaction schemes. Its capabilities include segmentation of reaction steps, regions containing reaction conditions, chemical diagrams, as well as optical character and structure recognition. A combination of rules and unsupervised machine-learning approaches is used, with bespoke detection algorithms that identify arrows, structures, labels, and conditions detection algorithms. It can be used as a low-maintenance tool for database generation capable of extracting data from large quantities of images supplied by the user. On assessment using a self-generated evaluation set, the tool achieved precision and recall metrics of between 67% and 91% in the six core areas of data extraction. The ReactionDataExtractor tool is released under the MIT license and is available to download from http://www.reactiondataextractor.org.",2021-09-15 +,DART radiative transfer modelling for sloping landscapes,"Topography is one of the key factors that impact remotely sensed data and their interpretation. Indeed, combined with the viewing geometry and neighbour effects, it strongly affects the direct, diffuse and multi-scattered scene irradiance, which in turn impacts the radiative budget and remote sensing signals of the landscapes. The increased availability of digital elevation models (DEM) and the advancement of 3D radiative transfer (RT) models allow us to better address these topographic effects. DART (Discrete Anisotropic Radiative Transfer) is one of the most accurate and comprehensive 3D RT models that simulate remote sensing observations of natural and urban landscapes with topography and atmosphere. It simulates environmental effects (i.e., impact of adjacent landscape on the observed landscape) using a so-called infinite slope mode that infinitely duplicates the observed landscape while ensuring the continuity of slope and altitude at the DEM edges. Up to DART version 5.7.4, this mode was slightly inaccurate and computer intensive, depending on the topography. This paper presents an innovative modelling strategy that greatly improves it in terms of accuracy, image quality and computer efficiency. For that, a fictive auxiliary oblique plane, adapted to the landscape topography, is introduced for managing the scene illumination, the Earth-Atmosphere coupling and the storage of the radiation that exits the scene before being projected onto the sensor plane. Improvements and validations are illustrated both visually and quantitatively by DART images, radiometric products and radiative budget. For example, the observed reflectance of a Lambertian slope is equal to the expected analytical value. In addition, the solar plane reflectance of a forest on a mountain slope (experimental scene) has an average error of about 0.01% relative to the reflectance of the same forest stand in the reference scene (i.e., nine duplications of the experimental scene). This new modelling is already integrated in the official DART version (https://dart.omp.eu).",2021-09-01 +33125081,KEGG: integrating viruses and cellular organisms.,"KEGG (https://www.kegg.jp/) is a manually curated resource integrating eighteen databases categorized into systems, genomic, chemical and health information. It also provides KEGG mapping tools, which enable understanding of cellular and organism-level functions from genome sequences and other molecular datasets. KEGG mapping is a predictive method of reconstructing molecular network systems from molecular building blocks based on the concept of functional orthologs. Since the introduction of the KEGG NETWORK database, various diseases have been associated with network variants, which are perturbed molecular networks caused by human gene variants, viruses, other pathogens and environmental factors. The network variation maps are created as aligned sets of related networks showing, for example, how different viruses inhibit or activate specific cellular signaling pathways. The KEGG pathway maps are now integrated with network variation maps in the NETWORK database, as well as with conserved functional units of KEGG modules and reaction modules in the MODULE database. The KO database for functional orthologs continues to be improved and virus KOs are being expanded for better understanding of virus-cell interactions and for enabling prediction of viral perturbations.",2021-01-01 +32025315,CitGVD: a comprehensive database of citrus genomic variations.,"Citrus is one of the most important commercial fruit crops worldwide. With the vast genomic data currently available for citrus fruit, genetic relationships, and molecular markers can be assessed for the development of molecular breeding and genomic selection strategies. In this study, to permit the ease of access to these data, a web-based database, the citrus genomic variation database (CitGVD, http://citgvd.cric.cn/home) was developed as the first citrus-specific comprehensive database dedicated to genome-wide variations including single nucleotide polymorphisms (SNPs) and insertions/deletions (INDELs). The current version (V1.0.0) of CitGVD is an open-access resource centered on 1,493,258,964 high-quality genomic variations and 84 phenotypes of 346 organisms curated from in-house projects and public resources. CitGVD integrates closely related information on genomic variation annotations, related gene annotations, and details regarding the organisms, incorporating a variety of built-in tools for data accession and analysis. As an example, CitGWAS can be used for genome-wide association studies (GWASs) with SNPs and phenotypic data, while CitEVOL can be used for genetic structure analysis. These features make CitGVD a comprehensive web portal and bioinformatics platform for citrus-related studies. It also provides a model for analyzing genome-wide variations for a wide range of crop varieties.",2020-02-01 +32881101,"UCSF ChimeraX: Structure visualization for researchers, educators, and developers.","UCSF ChimeraX is the next-generation interactive visualization program from the Resource for Biocomputing, Visualization, and Informatics (RBVI), following UCSF Chimera. ChimeraX brings (a) significant performance and graphics enhancements; (b) new implementations of Chimera's most highly used tools, many with further improvements; (c) several entirely new analysis features; (d) support for new areas such as virtual reality, light-sheet microscopy, and medical imaging data; (e) major ease-of-use advances, including toolbars with icons to perform actions with a single click, basic ""undo"" capabilities, and more logical and consistent commands; and (f) an app store for researchers to contribute new tools. ChimeraX includes full user documentation and is free for noncommercial use, with downloads available for Windows, Linux, and macOS from https://www.rbvi.ucsf.edu/chimerax.",2020-10-22 +30874795,mirtronDB: a mirtron knowledge base.,"

Motivation

Mirtrons arise from short introns with atypical cleavage by using the splicing mechanism. In the current literature, there is no repository centralizing and organizing the data available to the public. To fill this gap, we developed mirtronDB, the first knowledge database dedicated to mirtron, and it is available at http://mirtrondb.cp.utfpr.edu.br/. MirtronDB currently contains a total of 1407 mirtron precursors and 2426 mirtron mature sequences in 18 species.

Results

Through a user-friendly interface, users can now browse and search mirtrons by organism, organism group, type and name. MirtronDB is a specialized resource that provides free and user-friendly access to knowledge on mirtron data.

Availability and implementation

MirtronDB is available at http://mirtrondb.cp.utfpr.edu.br/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +33676930,The DBSAV Database: Predicting Deleteriousness of Single Amino Acid Variations in the Human Proteome.,"Deleterious single amino acid variation (SAV) is one of the leading causes of human diseases. Evaluating the functional impact of SAVs is crucial for diagnosis of genetic disorders. We previously developed a deep convolutional neural network predictor, DeepSAV, to evaluate the deleterious effects of SAVs on protein function based on various sequence, structural, and functional properties. DeepSAV scores of rare SAVs observed in the human population are aggregated into a gene-level score called GTS (Gene Tolerance of rare SAVs) that reflects a gene's tolerance to deleterious missense mutations and serves as a useful tool to study gene-disease associations. In this study, we aim to enhance the performance of DeepSAV by using expanded datasets of pathogenic and benign variants, more features, and neural network optimization. We found that multiple sequence alignments built from vertebrate-level orthologs yield better prediction results compared to those built from mammalian-level orthologs. For multiple sequence alignments built from BLAST searches, optimal performance was achieved with a sequence identify cutoff of 50% to remove distant homologs. The new version of DeepSAV exhibits the best performance among standalone predictors of deleterious effects of SAVs. We developed the DBSAV database (http://prodata.swmed.edu/DBSAV) that reports GTS scores of human genes and DeepSAV scores of SAVs in the human proteome, including pathogenic and benign SAVs, population-level SAVs, and all possible SAVs by single nucleotide variations. This database serves as a useful resource for research of human SAVs and their relationships with protein functions and human diseases.",2021-03-04 +34554878,Spoken Discourse Assessment and Analysis in Aphasia: An International Survey of Current Practices.,"Purpose Spoken discourse analysis is commonly employed in the assessment and treatment of people living with aphasia, yet there is no standardization in assessment, analysis, or reporting procedures, thereby precluding comparison/meta-analyses of data and hindering replication of findings. An important first step is to identify current practices in collecting and analyzing spoken discourse in aphasia. Thus, this study surveyed current practices, with the goal of working toward standardizing spoken discourse assessment first in research settings with subsequent implementation into clinical settings. Method A mixed-methods (quantitative and qualitative) survey was publicized to researchers and clinicians around the globe who have collected and/or analyzed spoken discourse data in aphasia. The survey data were collected between September and November 2019. Results Of the 201 individuals who consented to participate, 189 completed all mandatory questions in the survey (with fewer completing nonmandatory response questions). The majority of respondents reported barriers to utilizing discourse including transcription, coding, and analysis. The most common barrier was time (e.g., lack of time). Respondents also indicated that there was a lack of, and a need for, psychometric properties and normative data for spoken discourse use in the assessment and treatment of persons with aphasia. Quantitative and qualitative results are described in detail. Conclusions The current survey study evaluated spoken discourse methods in aphasia across research and clinical settings. Findings from this study will be used to guide development of process standardization in spoken discourse and for the creation of a psychometric and normative property database. Supplemental Material https://doi.org/10.23641/asha.166395100.",2021-09-23 +,The Ocean barcode atlas: A web service to explore the biodiversity and biogeography of marine organisms,"The Ocean Barcode Atlas (OBA) is a user friendly web service designed for biologists who wish to explore the biodiversity and biogeography of marine organisms locked in otherwise difficult to mine planetary scale DNA metabarcode data sets. Using just a web browser, a comprehensive picture of the diversity of a taxon or a barcode sequence is visualized graphically on world maps and interactive charts. Interactive results panels allow dynamic threshold adjustments and the display of diversity results in their environmental context measured at the time of sampling (temperature, oxygen, latitude, etc). Ecological analyses such as alpha and beta‐diversity plots are produced via publication quality vector graphics representations. Currently, the Ocean Barcode Altas is deployed online with the (i) Tara Oceans eukaryotic 18S‐V9 rDNA metabarcodes; (ii) Tara Oceans 16S/18S rRNA ₘᵢTags; and (iii) 16S‐V4 V5 metabarcodes collected during the Malaspina‐2010 expedition. Additional prokaryotic or eukaryotic plankton barcode data sets will be added upon availability, given they provide the required complement of barcodes (including raw reads to compute barcode abundance) associated with their contextual environmental variables. Ocean Barcode Atlas is a freely‐available web service at: http://oba.mio.osupytheas.fr/ocean‐atlas/.",2021-05-01 +31679514,MaveDB: an open-source platform to distribute and interpret data from multiplexed assays of variant effect.,"Multiplex assays of variant effect (MAVEs), such as deep mutational scans and massively parallel reporter assays, test thousands of sequence variants in a single experiment. Despite the importance of MAVE data for basic and clinical research, there is no standard resource for their discovery and distribution. Here, we present MaveDB ( https://www.mavedb.org ), a public repository for large-scale measurements of sequence variant impact, designed for interoperability with applications to interpret these datasets. We also describe the first such application, MaveVis, which retrieves, visualizes, and contextualizes variant effect maps. Together, the database and applications will empower the community to mine these powerful datasets.",2019-11-04 +34851722,Polyphenol Utilization Proteins in the Human Gut Microbiome.,"Dietary polyphenols can significantly benefit human health, but their bioavailability is metabolically controlled by human gut microbiota. To facilitate the study of polyphenol metabolism for human gut health, we have manually curated experimentally characterized polyphenol utilization proteins (PUPs) from published literature. This resulted in 60 experimentally characterized PUPs (named seeds) with various metadata, such as species and substrate. Further database search found 107,851 homologs of the seeds from UniProt and UHGP (unified human gastrointestinal protein) databases. All PUP seeds and homologs were classified into protein classes, families, and subfamilies based on Enzyme Commission (EC) numbers, Pfam (protein family) domains, and sequence similarity networks. By locating PUP homologs in the genomes of UHGP, we have identified 1,074 physically linked PUP gene clusters (PGCs), which are potentially involved in polyphenol metabolism in the human gut. The gut microbiome of Africans was consistently ranked the top in terms of the abundance and prevalence of PUP homologs and PGCs among all geographical continents. This reflects the fact that dietary polyphenols are consumed by the African population more commonly than by other populations, such as Europeans and North Americans. A case study of the Hadza hunter-gatherer microbiome verified the feasibility of using dbPUP to profile metagenomic data for biologically meaningful discovery, suggesting an association between diet and PUP abundance. A Pfam domain enrichment analysis of PGCs identified a number of putatively novel PUP families. Lastly, a user-friendly web interface (https://bcb.unl.edu/dbpup/) provides all the data online to facilitate the research of polyphenol metabolism for improved human health. IMPORTANCE Long-term consumption of polyphenol-rich foods has been shown to lower the risk of various human diseases, such as cardiovascular diseases, cancers, and metabolic diseases. Raw polyphenols are often enzymatically processed by gut microbiome, which contains various polyphenol utilization proteins (PUPs) to produce metabolites with much higher bioaccessibility to gastrointestinal cells. This study delivered dbPUP as an online database for experimentally characterized PUPs and their homologs in human gut microbiome. This work also performed a systematic classification of PUPs into enzyme classes, families, and subfamilies. The signature Pfam domains were identified for PUP families, enabling conserved domain-based PUP annotation. This standardized sequence similarity-based PUP classification system offered a guideline for the future inclusion of new experimentally characterized PUPs and the creation of new PUP families. An in-depth data analysis was further conducted on PUP homologs and physically linked PUP gene clusters (PGCs) in gut microbiomes of different human populations.",2021-12-01 +31680154,TDR Targets 6: driving drug discovery for human pathogens through intensive chemogenomic data integration.,"The volume of biological, chemical and functional data deposited in the public domain is growing rapidly, thanks to next generation sequencing and highly-automated screening technologies. These datasets represent invaluable resources for drug discovery, particularly for less studied neglected disease pathogens. To leverage these datasets, smart and intensive data integration is required to guide computational inferences across diverse organisms. The TDR Targets chemogenomics resource integrates genomic data from human pathogens and model organisms along with information on bioactive compounds and their annotated activities. This report highlights the latest updates on the available data and functionality in TDR Targets 6. Based on chemogenomic network models providing links between inhibitors and targets, the database now incorporates network-driven target prioritizations, and novel visualizations of network subgraphs displaying chemical- and target-similarity neighborhoods along with associated target-compound bioactivity links. Available data can be browsed and queried through a new user interface, that allow users to perform prioritizations of protein targets and chemical inhibitors. As such, TDR Targets now facilitates the investigation of drug repurposing against pathogen targets, which can potentially help in identifying candidate targets for bioactive compounds with previously unknown targets. TDR Targets is available at https://tdrtargets.org.",2020-01-01 +32521049,"FAIR-compliant clinical, radiomics and DICOM metadata of RIDER, interobserver, Lung1 and head-Neck1 TCIA collections.","

Purpose

One of the most frequently cited radiomics investigations showed that features automatically extracted from routine clinical images could be used in prognostic modeling. These images have been made publicly accessible via The Cancer Imaging Archive (TCIA). There have been numerous requests for additional explanatory metadata on the following datasets - RIDER, Interobserver, Lung1, and Head-Neck1. To support repeatability, reproducibility, generalizability, and transparency in radiomics research, we publish the subjects' clinical data, extracted radiomics features, and digital imaging and communications in medicine (DICOM) headers of these four datasets with descriptive metadata, in order to be more compliant with findable, accessible, interoperable, and reusable (FAIR) data management principles.

Acquisition and validation methods

Overall survival time intervals were updated using a national citizens registry after internal ethics board approval. Spatial offsets of the primary gross tumor volume (GTV) regions of interest (ROIs) associated with the Lung1 CT series were improved on the TCIA. GTV radiomics features were extracted using the open-source Ontology-Guided Radiomics Analysis Workflow (O-RAW). We reshaped the output of O-RAW to map features and extraction settings to the latest version of Radiomics Ontology, so as to be consistent with the Image Biomarker Standardization Initiative (IBSI). Digital imaging and communications in medicine metadata was extracted using a research version of Semantic DICOM (SOHARD, GmbH, Fuerth; Germany). Subjects' clinical data were described with metadata using the Radiation Oncology Ontology. All of the above were published in Resource Descriptor Format (RDF), that is, triples. Example SPARQL queries are shared with the reader to use on the online triples archive, which are intended to illustrate how to exploit this data submission.

Data format

The accumulated RDF data are publicly accessible through a SPARQL endpoint where the triples are archived. The endpoint is remotely queried through a graph database web application at http://sparql.cancerdata.org. SPARQL queries are intrinsically federated, such that we can efficiently cross-reference clinical, DICOM, and radiomics data within a single query, while being agnostic to the original data format and coding system. The federated queries work in the same way even if the RDF data were partitioned across multiple servers and dispersed physical locations.

Potential applications

The public availability of these data resources is intended to support radiomics features replication, repeatability, and reproducibility studies by the academic community. The example SPARQL queries may be freely used and modified by readers depending on their research question. Data interoperability and reusability are supported by referencing existing public ontologies. The RDF data are readily findable and accessible through the aforementioned link. Scripts used to create the RDF are made available at a code repository linked to this submission: https://gitlab.com/UM-CDS/FAIR-compliant_clinical_radiomics_and_DICOM_metadata.",2020-06-27 +33539251,Bringing together scientific disciplines for collaborative undertakings: a vision for advancing the adverse outcome pathway framework.,"

Background

Decades of research to understand the impacts of various types of environmental occupational and medical stressors on human health have produced a vast amount of data across many scientific disciplines. Organizing these data in a meaningful way to support risk assessment has been a significant challenge. To address this and other challenges in modernizing chemical health risk assessment, the Organisation for Economic Cooperation and Development (OECD) formalized the adverse outcome pathway (AOP) framework, an approach to consolidate knowledge into measurable key events (KEs) at various levels of biological organisation causally linked to disease based on the weight of scientific evidence (http://oe.cd/aops). Currently, AOPs have been considered predominantly in chemical safety but are relevant to radiation. In this context, the Nuclear Energy Agency's (NEA's) High-Level Group on Low Dose Research (HLG-LDR) is working to improve research co-ordination, including radiological research with chemical research, identify synergies between the fields and to avoid duplication of efforts and resource investments. To this end, a virtual workshop was held on 7 and 8 October 2020 with experts from the OECD AOP Programme together with the radiation and chemical research/regulation communities. The workshop was a coordinated effort of Health Canada, the Electric Power Research Institute (EPRI), and the Nuclear Energy Agency (NEA). The AOP approach was discussed including key issues to fully embrace its value and catalyze implementation in areas of radiation risk assessment.

Conclusions

A joint chemical and radiological expert group was proposed as a means to encourage cooperation between risk assessors and an initial vision was discussed on a path forward. A global survey was suggested as a way to identify priority health outcomes of regulatory interest for AOP development. Multidisciplinary teams are needed to address the challenge of producing the appropriate data for risk assessments. Data management and machine learning tools were highlighted as a way to progress from weight of evidence to computational causal inference.",2021-03-01 +34025933,AddictGene: An integrated knowledge base for differentially expressed genes associated with addictive substance.,"Addiction, a disorder of maladaptive brain plasticity, is associated with changes in numerous gene expressions. Nowadays, high-throughput sequencing data on addictive substance-induced gene expression have become widely available. A resource for comprehensive annotation of genes that show differential expression in response to commonly abused substances is necessary. So, we developed AddictGene by integrating gene expression, gene-gene interaction, gene-drug interaction and epigenetic regulatory annotation for over 70,156 items of differentially expressed genes associated with 7 commonly abused substances, including alcohol, nicotine, cocaine, morphine, heroin, methamphetamine, and amphetamine, across three species (human, mouse, rat). We also collected 1,141 addiction-related experimentally validated genes by techniques such as RT-PCR, northern blot and in situ hybridization. The easy-to-use web interface of AddictGene (http://159.226.67.237/sun/addictgedb/) allows users to search and browse multidimensional data on DEGs of their interest: 1) detailed gene-specific information extracted from the original studies; 2) basic information about the specific gene extracted from NCBI; 3) SNP associated with substance dependence and other psychiatry disorders; 4) expression alteration of specific gene in other psychiatric disorders; 5) expression patterns of interested gene across 31 primary and 54 secondary human tissues; 6) functional annotation of interested gene; 7) epigenetic regulators involved in the alteration of specific genes, including histone modifications and DNA methylation; 8) protein-protein interaction for functional linkage with interested gene; 9) drug-gene interaction for potential druggability. AddictGene offers a valuable repository for researchers to study the molecular mechanisms underlying addiction, and might provide valuable insights into potential therapies for drug abuse and relapse.",2021-04-19 +33080028,Peryton: a manual collection of experimentally supported microbe-disease associations.,"We present Peryton (https://dianalab.e-ce.uth.gr/peryton/), a database of experimentally supported microbe-disease associations. Its first version constitutes a novel resource hosting more than 7900 entries linking 43 diseases with 1396 microorganisms. Peryton's content is exclusively sustained by manual curation of biomedical articles. Diseases and microorganisms are provided in a systematic, standardized manner using reference resources to create database dictionaries. Information about the experimental design, study cohorts and the applied high- or low-throughput techniques is meticulously annotated and catered to users. Several functionalities are provided to enhance user experience and enable ingenious use of Peryton. One or more microorganisms and/or diseases can be queried at the same time. Advanced filtering options and direct text-based filtering of results enable refinement of returned information and the conducting of tailored queries suitable to different research questions. Peryton also provides interactive visualizations to effectively capture different aspects of its content and results can be directly downloaded for local storage and downstream analyses. Peryton will serve as a valuable source, enabling scientists of microbe-related disease fields to form novel hypotheses but, equally importantly, to assist in cross-validation of findings.",2021-01-01 +31452162,Accessing Cryptosporidium Omic and Isolate Data via CryptoDB.org.,"Cryptosporidium has historically been a difficult organism to work with, and molecular genomic data for this important pathogen have typically lagged behind other prominent protist pathogens. CryptoDB ( http://cryptodb.org/ ) was launched in 2004 following the appearance of draft genome sequences for both C. parvum and C. hominis. CryptoDB merged with the EuPathDB Bioinformatics Resource Center family of databases ( https://eupathdb.org ) and has been maintained and updated regularly since its establishment. These resources are freely available, are web-based, and permit users to analyze their own sequence data in the context of reference genome sequences in our user workspaces. Advances in technology have greatly facilitated Cryptosporidium research in the last several years greatly enhancing and extending the data and types of data available for this genus. Currently, 13 genome sequences are available for 9 species of Cryptosporidium as well as the distantly related Gregarina niphandrodes and two free-living alveolate outgroups of the Apicomplexa, Chromera velia and Vitrella brassicaformis. Recent years have seen several new genome sequences for both existing and new Cryptosporidium species as well as transcriptomics, proteomics, SNP, and isolate population surveys. This chapter introduces the extensive data mining and visualization capabilities of the EuPathDB software platform and introduces the data types and tools that are currently available for Cryptosporidium. Key features are demonstrated with Cryptosporidium-relevant examples and explanations.",2020-01-01 +32102777,The 2019 novel coronavirus resource.,"An ongoing outbreak of a novel coronavirus infection in Wuhan, China since December 2019 has led to 31,516 infected persons and 638 deaths across 25 countries (till 16:00 on February 7, 2020). The virus causing this pneumonia was then named as the 2019 novel coronavirus (2019-nCoV) by the World Health Organization. To promote the data sharing and make all relevant information of 2019-nCoV publicly available, we construct the 2019 Novel Coronavirus Resource (2019nCoVR, https://bigd.big.ac.cn/ncov). 2019nCoVR features comprehensive integration of genomic and proteomic sequences as well as their metadata information from the Global Initiative on Sharing All Influenza Data, National Center for Biotechnology Information, China National GeneBank, National Microbiology Data Center and China National Center for Bioinformation (CNCB)/National Genomics Data Center (NGDC). It also incorporates a wide range of relevant information including scientific literatures, news, and popular articles for science dissemination, and provides visualization functionalities for genome variation analysis results based on all collected 2019-nCoV strains. Moreover, by linking seamlessly with related databases in CNCB/NGDC, 2019nCoVR offers virus data submission and sharing services for raw sequence reads and assembled sequences. In this report, we provide comprehensive descriptions on data deposition, management, release and utility in 2019nCoVR, laying important foundations in aid of studies on virus classification and origin, genome variation and evolution, fast detection, drug development and pneumonia precision prevention and therapy.",2020-02-01 +31691799,Norine: update of the nonribosomal peptide resource.,"Norine, the unique resource dedicated to nonribosomal peptides (NRPs), is now updated with a new pipeline to automate massive sourcing and enhance annotation. External databases are mined to extract NRPs that are not yet in Norine. To maintain a high data quality, successive filters are applied to automatically validate the NRP annotations and only validated data is inserted in the database. External databases were also used to complete annotations of NRPs already in Norine. Besides, annotation consistency inside Norine and between Norine and external sources have reported annotation errors. Some can be corrected automatically, while others need manual curation. This new approach led to the insertion of 539 new NRPs and the addition or correction of annotations of nearly all Norine entries. Two new tools to analyse the chemical structures of NRPs (rBAN) and to infer a molecular formula from the mass-to-charge ratio of an NRP (Kendrick Formula Predictor) were also integrated. Norine is freely accessible from the following URL: https://bioinfo.cristal.univ-lille.fr/norine/.",2020-01-01 +33086069,SCLC-CellMiner: A Resource for Small Cell Lung Cancer Cell Line Genomics and Pharmacology Based on Genomic Signatures.,"CellMiner-SCLC (https://discover.nci.nih.gov/SclcCellMinerCDB/) integrates drug sensitivity and genomic data, including high-resolution methylome and transcriptome from 118 patient-derived small cell lung cancer (SCLC) cell lines, providing a resource for research into this ""recalcitrant cancer."" We demonstrate the reproducibility and stability of data from multiple sources and validate the SCLC consensus nomenclature on the basis of expression of master transcription factors NEUROD1, ASCL1, POU2F3, and YAP1. Our analyses reveal transcription networks linking SCLC subtypes with MYC and its paralogs and the NOTCH and HIPPO pathways. SCLC subsets express specific surface markers, providing potential opportunities for antibody-based targeted therapies. YAP1-driven SCLCs are notable for differential expression of the NOTCH pathway, epithelial-mesenchymal transition (EMT), and antigen-presenting machinery (APM) genes and sensitivity to mTOR and AKT inhibitors. These analyses provide insights into SCLC biology and a framework for future investigations into subtype-specific SCLC vulnerabilities.",2020-10-01 +34393586,"Kakila database: Towards a FAIR community approved database of cetacean presence in the waters of the Guadeloupe Archipelago, based on citizen science.","

Background

In the French West Indies, more than 20 species of cetaceans have been observed over the last decades. The recognition of this hotspot of biodiversity of marine mammals, observed in the French Exclusive Economic Zone of the West Indies, motivated the French government to create in 2010 a marine protected area (MPA) dedicated to the conservation of marine mammals: the Agoa Sanctuary. Threats that cetacean populations face are multiple, but well-documented. Cetacean conservation can only be achieved if relevant and reliable data are available, starting by occurrence data. In the Guadeloupe Archipelago and in addition to some data collected by the Agoa Sanctuary, occurrence data are mainly available through the contribution of citizen science and of local stakeholders (i.e. non-profit organisations (NPO) and whale-watchers). However, no observation network has been coordinated and no standards exist for cetacean presence data collection and management.

New information

In recent years, several whale watchers and NPOs regularly collected cetacean observation data around the Guadeloupe Archipelago. Our objective was to gather datasets from three Guadeloupean whale watchers, two NPOs and the Agoa Sanctuary, that agreed to share their data. These heterogeneous data went through a careful process of curation and standardisation in order to create a new extended database, using a newly-designed metadata set. This aggregated dataset contains a total of 4,704 records of 21 species collected in the Guadeloupe Archipelago from 2000 to 2019. The database was called Kakila (""who is there?"" in Guadeloupean Creole). The Kakila database was developed following the FAIR principles with the ultimate objective of ensuring sustainability. All these data were transferred into the PNDB repository (Pöle National de Données de Biodiversité, Biodiversity French Data Hub, https://www.pndb.fr).In the Agoa Sanctuary and surrounding waters, marine mammals have to interact with increasing anthropogenic pressure from growing human activities. In this context, the Kakila database fulfils the need for an organised system to structure marine mammal occurrences collected by multiple local stakeholders with a common objective: contribute to the knowledge and conservation of cetaceans living in the French Antilles waters. Much needed data analysis will enable us to identify high cetacean presence areas, to document the presence of rarer species and to determine areas of possible negative interactions with anthropogenic activities.",2021-07-22 +34877226,Cluster Headache and Associated Risk Factors: A Systemic Review and Meta-Analysis.,"Cluster headache (CH) has always been associated with several risk factors, including hereditary, environmental, and lifestyle habits. This study focuses on important risk factors, including family history, smoking, alcohol consumption, male predominance, and head trauma associated with CH. The present study aimed at investigating the available literature on cluster headaches and evaluating their associated risk factors. A systematic data search was designed, and scientific data were collected from renowned databases, including PubMed, Cochrane, Embase, Cumulative Index of Nursing and Allied Health Literature (CINAHL), and Google Scholar. Thirty-two studies were selected to execute a systemic review, and 26 studies, consisting of 6,065 CH patients, qualified for a meta-analysis. Statistical analyses were conducted by using MedCalc, version 16.8.4; (MedCalc Software, Ostend, Belgium; http://www.medcalc.org) and Rapidminer statistical software, version 9.6.0 (www.rapidminer.com). We conclude the evidence of family history, smoking, alcohol consumption, male predominance, and head trauma are associated with cluster headaches. However, sex discrimination in familial cases needs to be revisited because of the female predominance reported by familial history and CH association studies.",2021-11-05 +34389843,Nabe: an energetic database of amino acid mutations in protein-nucleic acid binding interfaces. ,"Protein-nucleic acid complexes play essential roles in regulating transcription, translation, DNA replication, repair and recombination, RNA processing and translocation. Site-directed mutagenesis has been extremely useful in understanding the principles of protein-DNA and protein-RNA interactions, and experimentally determined mutagenesis data are prerequisites for designing effective algorithms for predicting the binding affinity change upon mutation. However, a vital challenge in this area is the lack of sufficient public experimentally recognized mutation data, which leads to difficulties in developing computational prediction methods. In this article, we present Nabe, an integrated database of amino acid mutations and their effects on the binding free energy in protein-DNA and protein-RNA interactions for which binding affinities have been experimentally determined. Compared with existing databases and data sets, Nabe is the largest protein-nucleic acid mutation database, containing 2506 mutations in 473 protein-DNA and protein-RNA complexes, and of that 1751 are alanine mutations in 405 protein-nucleic acid complexes. For researchers to conveniently utilize the data, Nabe assembles protein-DNA and protein-RNA benchmark databases by adopting the data-processing procedures in the majority of models. To further facilitate users to query data, Nabe provides a searchable and graphical web page. Database URL: http://nabe.denglab.org.",2021-08-01 +34648133,SMART v1.0: A Database for Small Molecules with Functional Implications in Plants.,"We developed SMART v1.0 ( http://smart.omicstudio.cloud ), the first database for small molecules with functional implications in plants. The SMART database is devoted to providing and managing small molecules and their associated structural data, chemoinformatic data, protein targets, pathways and induced phenotype/function information. Currently, SMART v1.0 encompasses 1218 unique small molecules which are involved in multiple biological pathways. SMART v1.0 is featured with user-friendly interfaces, through which pathway-centered visualization of small molecules can be efficiently performed, and multiple types of searches (i.e., text search, structure similarity search and sequence similarity search) can be conveniently conducted. SMART v1.0 is also specifically designed to be a small molecule-sharing database, allowing users to release their newly discovered small molecules to public via the Contribute webpage. The SMART database will facilitate the comprehensive understanding of small molecules in complex biological processes in plants.",2021-10-14 +34185062,BioVLAB-Cancer-Pharmacogenomics: Tumor Heterogeneity and Pharmacogenomics Analysis of Multi-omics Data from Tumor on the Cloud. ,"Multi-omics data in molecular biology has accumulated rapidly over the years. Such data contains valuable information for research in medicine and drug discovery. Unfortunately, data-driven research in medicine and drug discovery is challenging for a majority of small research labs due to the large volume of data and the complexity of analysis pipeline. We present BioVLAB-Cancer-Pharmacogenomics, a bioinformatics system that facilitates analysis of multi-omics data from breast cancer to analyze and investigate intratumor heterogeneity and pharmacogenomics on Amazon Web Services. Our system takes multi-omics data as input to perform tumor heterogeneity analysis in terms of TCGA data and deconvolve-and-match the tumor gene expression to cell line data in CCLE using DNA methylation profiles. We believe that our system can help small research labs perform analysis of tumor multi-omics without worrying about computational infrastructure and maintenance of databases and tools. http://biohealth.snu.ac.kr/software/biovlab_cancer_pharmacogenomics. Supplementary data are available at Bioinformatics online.",2021-06-29 +32749460,Development of an online tool for linking behavior change techniques and mechanisms of action based on triangulation of findings from literature synthesis and expert consensus.,"Researchers, practitioners, and policymakers develop interventions to change behavior based on their understanding of how behavior change techniques (BCTs) impact the determinants of behavior. A transparent, systematic, and accessible method of linking BCTs with the processes through which they change behavior (i.e., their mechanisms of action [MoAs]) would advance the understanding of intervention effects and improve theory and intervention development. The purpose of this study is to triangulate evidence for hypothesized BCT-MoA links obtained in two previous studies and present the results in an interactive, online tool. Two previous studies generated evidence on links between 56 BCTs and 26 MoAs based on their frequency in literature synthesis and on expert consensus. Concordance between the findings of the two studies was examined using multilevel modeling. Uncertainties and differences between the two studies were reconciled by 16 behavior change experts using consensus development methods. The resulting evidence was used to generate an online tool. The two studies showed concordance for 25 of the 26 MoAs and agreement for 37 links and for 460 ""nonlinks."" A further 55 links were resolved by consensus (total of 92 [37 + 55] hypothesized BCT-MoA links). Full data on 1,456 possible links was incorporated into the online interactive Theory and Technique Tool (https://theoryandtechniquetool.humanbehaviourchange.org/). This triangulation of two distinct sources of evidence provides guidance on how BCTs may affect the mechanisms that change behavior and is available as a resource for behavior change intervention designers, researchers and theorists, supporting intervention design, research synthesis, and collaborative research.",2021-05-01 +,Subjectively perceived healthcare provision during the first wave of the COVID-19 pandemic,"Abstract

Background

An OECD benchmark (2020) shows that Germany has, in effect, an above-average healthcare infrastructure to face COVID-19. Nevertheless, at the beginning of the pandemic, there were restrictions and uncertainties in the use of health-related services. The following analysis examines the subjective perception of respondents with their healthcare provision whether there were sociodemographic differences.

Methods

Data from an online cross-sectional survey conducted between 29 April and 8 May 2020 were used (N = 1,570; 18-74 years). Bivariate test methods were used for analysis, stratified by age group, subjective social status (SSS) and chronic illness. The subjective perception was assessed via subjects' subjectively perceived worries, fears and difficulties of not receiving an adequate healthcare provision during the COVID-19 pandemic. Thus, an index was constructed with a range of values from 3 to 12 (Cronbach's α = 0.886). A higher index implies a poorer perception of the healthcare provision.

Results

Compared to the groups 50-59-year-olds and 60 years and older the 18-29-year-olds (MRank 431.1) had a significantly (p < 0.05) poorer perception of their healthcare provision during COVID-19. In addition, probands with low SSS were found to have a significantly (p < 0.05) worse perception (MRank 423.0) than subjects with medium SSS (MRank 374.1). Chronically ill subjects suffered significantly poorer perception (MRank 406.1; p < 0.05) compared to healthy subjects (MRank 366.0).

Conclusions

Vulnerable groups with poorer perceptions of their health care provision during COVID-19 can be identified. More investigation is needed to identify subjective or COVID-19 related predictors that cause this subjective perception. Reference OECD (2020). Beyond Containment: Health systems responses to COVID-19 in the OECD. Available: https://read.oecd-ilibrary.org/view/?ref=119_119689-ud5comtf84&title=Beyond_Containment:Health_systems_responses_to_COVID-19_in_the_OECD (Accessed: 14.04.2021)

Key messages

It was found that respondents perceived their subjective healthcare provision during COVID-19 differently, according to sociodemographic stratification. Especially younger, chronically ill, and socioeconomically deprived respondents experienced a poorer perception of their healthcare provision during the first wave of the COVID-19 pandemic.",2021-10-01 +31444973,CLASTR: The Cellosaurus STR similarity search tool - A precious help for cell line authentication.,"Despite an increased awareness of the problematic of cell line cross-contamination and misidentification, it remains nowadays a major source of erroneous experimental results in biomedical research. To prevent it, researchers are expected to frequently test the authenticity of the cell lines they are working on. STR profiling was selected as the international reference method to perform cell line authentication. While the experimental protocols and manipulations for generating a STR profile are well described, the available tools and workflows to analyze such data are lacking. The Cellosaurus knowledge resource aimed to improve the situation by compiling all the publicly available STR profiles from the literature and other databases. As a result, it grew to become the largest database in terms of human STR profiles, with 6,474 distinct cell lines having an associated STR profile (release July 31, 2019). Here we present CLASTR, the Cellosaurus STR similarity search tool enabling users to compare one or more STR profiles with those available in the Cellosaurus cell line knowledge resource. It aims to help researchers in the process of cell line authentication by providing numerous functionalities. The tool is publicly accessible on the SIB ExPASy server (https://web.expasy.org/cellosaurus-str-search) and its source code is available on GitHub under the GPL-3.0 license.",2019-10-04 +33289511,AVIA 3.0: interactive portal for genomic variant and sample level analysis.,"

Summary

The Annotation, Visualization and Impact Analysis (AVIA) is a web application combining multiple features to annotate and visualize genomic variant data. Users can investigate functional significance of their genetic alterations across samples, genes and pathways. Version 3.0 of AVIA offers filtering options through interactive charts and by linking disease relevant data sources. Newly incorporated services include gene, variant and sample level reporting, literature and functional correlations among impacted genes, comparative analysis across samples and against data sources such as TCGA and ClinVar, and cohort building. Sample and data management is now feasible through the application, which allows greater flexibility with sharing, reannotating and organizing data. Most importantly, AVIA's utility stems from its convenience for allowing users to upload and explore results without any a priori knowledge or the need to install, update and maintain software or databases. Together, these enhancements strengthen AVIA as a comprehensive, user-driven variant analysis portal.

Availabilityand implementation

AVIA is accessible online at https://avia-abcc.ncifcrf.gov.",2021-08-01 +34589189,Cancer DEIso: An integrative analysis platform for investigating differentially expressed gene-level and isoform-level human cancer markers.,"Transcript isoforms regulated by alternative splicing can substantially impact carcinogenesis, leading to a need to obtain clues for both gene differential expression and malfunctions of isoform distributions in cancer studies. The Cancer Genome Atlas (TCGA) project was launched in 2008 to collect cancer-related genome mutation raw data from the population. While many repositories tried to add insights into the raw data in TCGA, no existing database provides both comprehensive gene-level and isoform-level cancer stage marker investigation and survival analysis. We constructed Cancer DEIso to facilitate in-depth analyses for both gene-level and isoform-level human cancer studies. Patient RNA-seq data, sample sheets, patient clinical data, and human genome datasets were collected and processed in Cancer DEIso. And four functions to search differentially expressed genes/isoforms between cancer stages were implemented: (i) Search potential gene/isoform markers for a specified cancer type and its two stages; (ii) Search potentially induced cancer types and stages for a gene/isoform; (iii) Expression survival analysis on a given gene/isoform for some cancer; (iv) Gene/isoform stage expression comparison visualization. As an example, we demonstrate that Cancer DEIso can indicate potential colorectal cancer isoform diagnostic markers that are not easily detected when only gene-level expressions are considered. Cancer DEIso is available at http://cosbi4.ee.ncku.edu.tw/DEIso/.",2021-09-08 +31612961,GWAS Central: a comprehensive resource for the discovery and comparison of genotype and phenotype data from genome-wide association studies.,"The GWAS Central resource provides a toolkit for integrative access and visualization of a uniquely extensive collection of genome-wide association study data, while ensuring safe open access to prevent research participant identification. GWAS Central is the world's most comprehensive openly accessible repository of summary-level GWAS association information, providing over 70 million P-values for over 3800 studies investigating over 1400 unique phenotypes. The database content comprises direct submissions received from GWAS authors and consortia, in addition to actively gathered data sets from various public sources. GWAS data are discoverable from the perspective of genetic markers, genes, genome regions or phenotypes, via graphical visualizations and detailed downloadable data reports. Tested genetic markers and relevant genomic features can be visually interrogated across up to sixteen multiple association data sets in a single view using the integrated genome browser. The semantic standardization of phenotype descriptions with Medical Subject Headings and the Human Phenotype Ontology allows the precise identification of genetic variants associated with diseases, phenotypes and traits of interest. Harmonization of the phenotype descriptions used across several GWAS-related resources has extended the phenotype search capabilities to enable cross-database study discovery using a range of ontologies. GWAS Central is updated regularly and available at https://www.gwascentral.org.",2020-01-01 +33170273,Gramene 2021: harnessing the power of comparative genomics and pathways for plant research.,"Gramene (http://www.gramene.org), a knowledgebase founded on comparative functional analyses of genomic and pathway data for model plants and major crops, supports agricultural researchers worldwide. The resource is committed to open access and reproducible science based on the FAIR data principles. Since the last NAR update, we made nine releases; doubled the genome portal's content; expanded curated genes, pathways and expression sets; and implemented the Domain Informational Vocabulary Extraction (DIVE) algorithm for extracting gene function information from publications. The current release, #63 (October 2020), hosts 93 reference genomes-over 3.9 million genes in 122 947 families with orthologous and paralogous classifications. Plant Reactome portrays pathway networks using a combination of manual biocuration in rice (320 reference pathways) and orthology-based projections to 106 species. The Reactome platform facilitates comparison between reference and projected pathways, gene expression analyses and overlays of gene-gene interactions. Gramene integrates ontology-based protein structure-function annotation; information on genetic, epigenetic, expression, and phenotypic diversity; and gene functional annotations extracted from plant-focused journals using DIVE. We train plant researchers in biocuration of genes and pathways; host curated maize gene structures as tracks in the maize genome browser; and integrate curated rice genes and pathways in the Plant Reactome.",2021-01-01 +31696235,MGnify: the microbiome analysis resource in 2020.,"MGnify (http://www.ebi.ac.uk/metagenomics) provides a free to use platform for the assembly, analysis and archiving of microbiome data derived from sequencing microbial populations that are present in particular environments. Over the past 2 years, MGnify (formerly EBI Metagenomics) has more than doubled the number of publicly available analysed datasets held within the resource. Recently, an updated approach to data analysis has been unveiled (version 5.0), replacing the previous single pipeline with multiple analysis pipelines that are tailored according to the input data, and that are formally described using the Common Workflow Language, enabling greater provenance, reusability, and reproducibility. MGnify's new analysis pipelines offer additional approaches for taxonomic assertions based on ribosomal internal transcribed spacer regions (ITS1/2) and expanded protein functional annotations. Biochemical pathways and systems predictions have also been added for assembled contigs. MGnify's growing focus on the assembly of metagenomic data has also seen the number of datasets it has assembled and analysed increase six-fold. The non-redundant protein database constructed from the proteins encoded by these assemblies now exceeds 1 billion sequences. Meanwhile, a newly developed contig viewer provides fine-grained visualisation of the assembled contigs and their enriched annotations.",2020-01-01 +31991337,FrogAncestryCalc: A standalone batch likelihood computation tool for ancestry inference panels catalogued in FROG-kb.,"The web-based application, FROG-kb (the Forensic Resource/Reference on Genetics-knowledge base, https://frog.med.yale.edu) supports the use of Single Nucleotide Polymorphisms (SNPs) for individual identification and ancestry inference in a forensic setting. The primary functionality provided by FROG-kb on the web is computation of relative likelihoods of populations being the origin of an individual, utilizing the underlying reference population allele frequency data curated and organized in ALFRED, the ALlele FREquency Database (https://alfred.med.yale.edu/). Here we present a downloadable stand-alone tool, FrogAncestryCalc that can simultaneously compute population likelihoods for multiple individuals for a selected panel of SNPs. The program calculates for a given Ancestry Inference (AI) panel the probability of each individual's genotype profile arising in each of the reference populations. Five of the AI panels catalogued in FROG-kb are implemented in this version of FrogAncestryCalc.",2020-01-22 +33116744,Identification of Novel Therapeutic Molecular Targets in Inflammatory Bowel Disease by Using Genetic Databases.,"

Purpose

Utilization of genetic databases to identify genes involved in ulcerative colitis (UC), Crohn's disease (CD), and their extra-intestinal manifestations.

Methods

Protein coding genes involved in ulcerative colitis (3783 genes), Crohn's disease (3980 genes), uveitis (1043 genes), arthritis (5583 genes), primary sclerosing cholangitis (PSC) (1313 genes), and pyoderma gangrenosum (119 genes) were categorized using four genetic databases. These include Genecards: The Human Gene Database (www.genecards.org), DisGeNET (https://www.disgenet.org/), The Comparative Toxicogenomics Database (http://ctdbase.org/) and the Universal Protein Resource (https://www.uniprot.org/). NDex, Network Data Exchange (http://www.ndexbio.org/), was then utilized for mapping a unique signal pathway from the identified shared genes involved in the above disease processes.

Results

We have detected a unique array of 20 genes with the highest probability of overlay in UC, CD, uveitis, arthritis, pyoderma gangrenosum, and PSC. Figure 1 represents the interactome of these 20 protein coding genes. Of note, unique immune modulators in different disease processes are also noted. Interleukin-25 (IL-25) and monensin-resistant homolog 2 (MON-2) are only noted in UC, CD, pyoderma gangrenosum, and arthritis. Arachidonate 5-lipoxygenase (ALOX5) is involved in UC, CD, and arthritis. SLCO1B3 is exclusively involved with pyoderma gangrenosum, UC, and CD. As expected, TNF involvement is noted in CD, UC, PSC, and arthritis. Table 1 depicts the detailed result.

Conclusion

Our work has identified a distinctive set of genes involved in IBD and its associated extra-intestinal disease processes. These genes play crucial roles in mechanisms of immune response, inflammation, and apoptosis and further our understanding of this complex disease process. We postulate that these genes play a critical role at intersecting pathways involved in inflammatory bowel disease, and these novel molecules, their upstream and downstream effectors, are potential targets for future therapeutic agents.",2020-10-19 +33051688,dbGuide: a database of functionally validated guide RNAs for genome editing in human and mouse cells.,"With the technology's accessibility and ease of use, CRISPR has been employed widely in many different organisms and experimental settings. As a result, thousands of publications have used CRISPR to make specific genetic perturbations, establishing in itself a resource of validated guide RNA sequences. While numerous computational tools to assist in the design and identification of candidate guide RNAs exist, these are still just at best predictions and generally, researchers inevitably will test multiple sequences for functional activity. Here, we present dbGuide (https://sgrnascorer.cancer.gov/dbguide), a database of functionally validated guide RNA sequences for CRISPR/Cas9-based knockout in human and mouse. Our database not only contains computationally determined candidate guide RNA sequences, but of even greater value, over 4000 sequences which have been functionally validated either through direct amplicon sequencing or manual curation of literature from over 1000 publications. Finally, our established framework will allow for continual addition of newly published and experimentally validated guide RNA sequences for CRISPR/Cas9-based knockout as well as incorporation of sequences from different gene editing systems, additional species and other types of site-specific functionalities such as base editing, gene activation, repression and epigenetic modification.",2021-01-01 +34493866,A compendium of uniformly processed human gene expression and splicing quantitative trait loci.,"Many gene expression quantitative trait locus (eQTL) studies have published their summary statistics, which can be used to gain insight into complex human traits by downstream analyses, such as fine mapping and co-localization. However, technical differences between these datasets are a barrier to their widespread use. Consequently, target genes for most genome-wide association study (GWAS) signals have still not been identified. In the present study, we present the eQTL Catalogue ( https://www.ebi.ac.uk/eqtl ), a resource of quality-controlled, uniformly re-computed gene expression and splicing QTLs from 21 studies. We find that, for matching cell types and tissues, the eQTL effect sizes are highly reproducible between studies. Although most QTLs were shared between most bulk tissues, we identified a greater diversity of cell-type-specific QTLs from purified cell types, a subset of which also manifested as new disease co-localizations. Our summary statistics are freely available to enable the systematic interpretation of human GWAS associations across many cell types and tissues.",2021-09-06 +,A novel workflow to improve genotyping of multigene families in wildlife species: An experimental set‐up with a known model system,"Genotyping complex multigene families in novel systems is particularly challenging. Target primers frequently amplify simultaneously multiple loci leading to high PCR and sequencing artefacts such as chimeras and allele amplification bias. Most genotyping pipelines have been validated in nonmodel systems whereby the real genotype is unknown and the generation of artefacts may be highly repeatable. Further hindering accurate genotyping, the relationship between artefacts and genotype complexity (i.e. number of alleles per genotype) within a PCR remains poorly described. Here, we investigated the latter by experimentally combining multiple known major histocompatibility complex (MHC) haplotypes of a model organism (chicken, Gallus gallus, 43 artificial genotypes with 2–13 alleles per amplicon). In addition to well‐defined ‘optimal’ primers, we simulated a nonmodel species situation by designing ‘cross‐species’ primers based on sequence data from closely related Galliform species. We applied a novel open‐source genotyping pipeline (ACACIA; https://gitlab.com/psc_santos/ACACIA), and compared its performance with another, previously published pipeline (AmpliSAS). Allele calling accuracy was higher when using ACACIA (98.5% versus 97% and 77.8% versus 75% for the ‘optimal’ and ‘cross‐species’ data sets, respectively). Systematic allele dropout of three alleles owing to primer mismatch in the ‘cross‐species’ data set explained high allele calling repeatability (100% when using ACACIA) despite low accuracy, demonstrating that repeatability can be misleading when evaluating genotyping workflows. Genotype complexity was positively associated with nonchimeric artefacts, chimeric artefacts (nonlinearly by levelling when amplifying more than 4–6 alleles) and allele amplification bias. Our study exemplifies and demonstrates pitfalls researchers should avoid to reliably genotype complex multigene families.",2021-04-01 +33095860,CNCDatabase: a database of non-coding cancer drivers.,"Most mutations in cancer genomes occur in the non-coding regions with unknown impact on tumor development. Although the increase in the number of cancer whole-genome sequences has revealed numerous putative non-coding cancer drivers, their information is dispersed across multiple studies making it difficult to understand their roles in tumorigenesis of different cancer types. We have developed CNCDatabase, Cornell Non-coding Cancer driver Database (https://cncdatabase.med.cornell.edu/) that contains detailed information about predicted non-coding drivers at gene promoters, 5' and 3' UTRs (untranslated regions), enhancers, CTCF insulators and non-coding RNAs. CNCDatabase documents 1111 protein-coding genes and 90 non-coding RNAs with reported drivers in their non-coding regions from 32 cancer types by computational predictions of positive selection using whole-genome sequences; differential gene expression in samples with and without mutations; or another set of experimental validations including luciferase reporter assays and genome editing. The database can be easily modified and scaled as lists of non-coding drivers are revised in the community with larger whole-genome sequencing studies, CRISPR screens and further experimental validations. Overall, CNCDatabase provides a helpful resource for researchers to explore the pathological role of non-coding alterations in human cancers.",2021-01-01 +34273570,"Cause of death trends among adults with and without cerebral palsy in the United States, 2013-2017.","

Background

Adults with cerebral palsy (CP) in the United States die much earlier than those without CP, a health inequality likely shaped by causes of death. Existing research has not considered demographic differences in mortality patterns.

Objectives

To analyze differences in cause of death for adults who did/did not have CP reported on their death certificates and to assess sex and racial-ethnic difference in causes of death among adult decedents with CP.

Methods

Data are from the 2013-2017 US Multiple Cause of Death Mortality files (N = 13,332,871; n = 13,897 with CP). Multiple logistic regression models were used to compare differences in causes of death between adults with and without CP and to determine sex and racial-ethnic differences in causes of death among adults with CP. Adjusted odds ratios (aORs) and 95% confidence intervals (CIs) were estimated.

Results

As compared with decedents without CP, those with CP were more likely to die from pneumonitis (aOR 31.14, 95% CI 29.42-32.96), influenza/pneumonia (8.78, 8.30-9.29), respiratory failure (17.24, 15.19-18.69), and choking (20.66, 18.86-22.62) and less likely to die from heart disease (0.61, 0.58-0.65), cancer (0.12, 0.11-0.13), chronic lower respiratory diseases (0.50, 0.44-0.56), and cerebrovascular diseases (0.66, 0.59-0.75). Among adults with CP, female decedents were more likely than males to die from respiratory failure (1.21, 1.03-1.42), and non-Hispanic Black decedents were more likely than non-Hispanic White decedents to die from heart disease (1.24, 1.07-1.45) and cerebrovascular disease (1.77, 1.29-2.49).

Conclusions

In 2013-2017, heart disease was the leading cause of death for adults with and without CP. However, for people with compared to those without CP, likelihood of death from likely preventable respiratory causes of death was higher. Non-Hispanic Black adults were more likely than non-Hispanic White adults to die from heart and cerebrovascular diseases. Public health, clinical, and rehabilitation efforts must use a multifaceted approach to address respiratory and circulatory health among people with CP.

Database

United States National Vital Statistics System of the Centers for Disease Control and Prevention Multiple Cause of Death Mortality files (National Bureau of Economic Research: https://www.nber.org/research/data/vital-statistics-mortality-data-nber).",2021-11-16 +34478719,The Cost of ARDS: A Systematic Review.,"

Background

ARDS is an inflammatory condition of the lungs and is a common condition in adult ICUs. The resources required and costs of care for patients with ARDS are significant because of the severity of the illness and extended ICU lengths of stay.

Research question

What are the costs associated with ARDS?

Study design and methods

We systematically searched the literature through April 29, 2021, for articles relevant to ARDS and costs. MEDLINE, Embase, Central, and EconLit databases were searched, and articles that reported on cost data from an original publication in adult patients with ARDS were included. Two authors independently assessed articles for inclusion and extracted data elements related to costs, methodology, health care system type, economic perspective, and clinical data. Publication quality was assessed using a modified version of the Quality of Health Economic Studies Instrument.

Results

Four thousand six hundred sixty-three publications were found, of which 110 were included for full-text review (κ = 0.72). A total of 22 publications (49,483 patients) were suitable for data extraction. The publications represented a broad range of health care systems, economic perspectives, costing methodology, and time frames. Mean inpatient costs ranged from $8,476 (2021 US dollars [USD]) to $547,974 (2021 USD) and were highest in publications of lower quality and in American health systems and were associated with trauma cohorts. Outpatient costs were highest in publications with higher readmission rates, longer durations of follow-up, and in American health systems.

Interpretation

A wide range of costing data is available for ARDS. A comprehensive synthesis of this literature frames the reasons for this and allows estimates to reflect the context in which they were assessed. This information will be of value to researchers and administrators interested in the economics of caring for patients with ARDS.

Trial registry

PROSPERO; No.: CRD42020192487 https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=192487.",2021-08-31 +32893032,The Atlas of Inflammation Resolution (AIR).,"Acute inflammation is a protective reaction by the immune system in response to invading pathogens or tissue damage. Ideally, the response should be localized, self-limited, and returning to homeostasis. If not resolved, acute inflammation can result in organ pathologies leading to chronic inflammatory phenotypes. Acute inflammation and inflammation resolution are complex coordinated processes, involving a number of cell types, interacting in space and time. The biomolecular complexity and the fact that several biomedical fields are involved, make a multi- and interdisciplinary approach necessary. The Atlas of Inflammation Resolution (AIR) is a web-based resource capturing an essential part of the state-of-the-art in acute inflammation and inflammation resolution research. The AIR provides an interface for users to search thousands of interactions, arranged in inter-connected multi-layers of process diagrams, covering a wide range of clinically relevant phenotypes. By mapping experimental data onto the AIR, it can be used to elucidate drug action as well as molecular mechanisms underlying different disease phenotypes. For the visualization and exploration of information, the AIR uses the Minerva platform, which is a well-established tool for the presentation of disease maps. The molecular details of the AIR are encoded using international standards. The AIR was created as a freely accessible resource, supporting research and education in the fields of acute inflammation and inflammation resolution. The AIR connects research communities, facilitates clinical decision making, and supports research scientists in the formulation and validation of hypotheses. The AIR is accessible through https://air.bio.informatik.uni-rostock.de.",2020-08-01 +34252924,DIAmeter: matching peptides to data-independent acquisition mass spectrometry data.,"

Motivation

Tandem mass spectrometry data acquired using data independent acquisition (DIA) is challenging to interpret because the data exhibits complex structure along both the mass-to-charge (m/z) and time axes. The most common approach to analyzing this type of data makes use of a library of previously observed DIA data patterns (a 'spectral library'), but this approach is expensive because the libraries do not typically generalize well across laboratories.

Results

Here, we propose DIAmeter, a search engine that detects peptides in DIA data using only a peptide sequence database. Although some existing library-free DIA analysis methods (i) support data generated using both wide and narrow isolation windows, (ii) detect peptides containing post-translational modifications, (iii) analyze data from a variety of instrument platforms and (iv) are capable of detecting peptides even in the absence of detectable signal in the survey (MS1) scan, DIAmeter is the only method that offers all four capabilities in a single tool.

Availability and implementation

The open source, Apache licensed source code is available as part of the Crux mass spectrometry analysis toolkit (http://crux.ms).

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +33130899,Functional analysis of low-grade glioma genetic variants predicts key target genes and transcription factors.,"

Background

Large-scale genome-wide association studies (GWAS) have implicated thousands of germline genetic variants in modulating individuals' risk to various diseases, including cancer. At least 25 risk loci have been identified for low-grade gliomas (LGGs), but their molecular functions remain largely unknown.

Methods

We hypothesized that GWAS loci contain causal single nucleotide polymorphisms (SNPs) that reside in accessible open chromatin regions and modulate the expression of target genes by perturbing the binding affinity of transcription factors (TFs). We performed an integrative analysis of genomic and epigenomic data from The Cancer Genome Atlas and other public repositories to identify candidate causal SNPs within linkage disequilibrium blocks of LGG GWAS loci. We assessed their potential regulatory role via in silico TF binding sequence perturbations, convolutional neural network trained on TF binding data, and simulated annealing-based interpretation methods.

Results

We built an interactive website (http://education.knoweng.org/alg3/) summarizing the functional footprinting of 280 variants in 25 LGG GWAS regions, providing rich information for further computational and experimental scrutiny. We identified as case studies PHLDB1 and SLC25A26 as candidate target genes of rs12803321 and rs11706832, respectively, and predicted the GWAS variant rs648044 to be the causal SNP modulating ZBTB16, a known tumor suppressor in multiple cancers. We showed that rs648044 likely perturbed the binding affinity of the TF MAFF, as supported by RNA interference and in vitro MAFF binding experiments.

Conclusions

The identified candidate (causal SNP, target gene, TF) triplets and the accompanying resource will help accelerate our understanding of the molecular mechanisms underlying genetic risk factors for gliomas.",2021-04-01 +34741074,Fibromine is a multi-omics database and mining tool for target discovery in pulmonary fibrosis.,"Idiopathic pulmonary fibrosis is a lethal lung fibroproliferative disease with limited therapeutic options. Differential expression profiling of affected sites has been instrumental for involved pathogenetic mechanisms dissection and therapeutic targets discovery. However, there have been limited efforts to comparatively analyse/mine the numerous related publicly available datasets, to fully exploit their potential on the validation/creation of novel research hypotheses. In this context and towards that goal, we present Fibromine, an integrated database and exploration environment comprising of consistently re-analysed, manually curated transcriptomic and proteomic pulmonary fibrosis datasets covering a wide range of experimental designs in both patients and animal models. Fibromine can be accessed via an R Shiny application ( http://www.fibromine.com/Fibromine ) which offers dynamic data exploration and real-time integration functionalities. Moreover, we introduce a novel benchmarking system based on transcriptomic datasets underlying characteristics, resulting to dataset accreditation aiming to aid the user on dataset selection. Cell specificity of gene expression can be visualised and/or explored in several scRNA-seq datasets, in an effort to link legacy data with this cutting-edge methodology and paving the way to their integration. Several use case examples are presented, that, importantly, can be reproduced on-the-fly by a non-specialist user, the primary target and potential user of this endeavour.",2021-11-05 +32623772,EpigenCentral: Portal for DNA methylation data analysis and classification in rare diseases.,"Epigenetic processes play a key role in regulating gene expression. Genetic variants that disrupt chromatin-modifying proteins are associated with a broad range of diseases, some of which have specific epigenetic patterns, such as aberrant DNA methylation (DNAm), which may be used as disease biomarkers. While much of the epigenetic research has focused on cancer, there is a paucity of resources devoted to neurodevelopmental disorders (NDDs), which include autism spectrum disorder and many rare, clinically overlapping syndromes. To address this challenge, we created EpigenCentral, a free web resource for biomedical researchers, molecular diagnostic laboratories, and clinical practitioners to perform the interactive classification and analysis of DNAm data related to NDDs. It allows users to search for known disease-associated patterns in their DNAm data, classify genetic variants as pathogenic or benign to assist in molecular diagnostics, or analyze patterns of differential methylation in their data through a simple web form. EpigenCentral is freely available at http://epigen.ccm.sickkids.ca/.",2020-07-15 +31617559,PolyASite 2.0: a consolidated atlas of polyadenylation sites from 3' end sequencing.,"Generated by 3' end cleavage and polyadenylation at alternative polyadenylation (poly(A)) sites, alternative terminal exons account for much of the variation between human transcript isoforms. More than a dozen protocols have been developed so far for capturing and sequencing RNA 3' ends from a variety of cell types and species. In previous studies, we have used these data to uncover novel regulatory signals and cell type-specific isoforms. Here we present an update of the PolyASite (https://polyasite.unibas.ch) resource of poly(A) sites, constructed from publicly available human, mouse and worm 3' end sequencing datasets by enforcing uniform quality measures, including the flagging of putative internal priming sites. Through integrated processing of all data, we identified and clustered sites that are closely spaced and share polyadenylation signals, as these are likely the result of stochastic variations in processing. For each cluster, we identified the representative - most frequently processed - site and estimated the relative use in the transcriptome across all samples. We have established a modern web portal for efficient finding, exploration and export of data. Database generation is fully automated, greatly facilitating incorporation of new datasets and the updating of underlying genome resources.",2020-01-01 +31428785,CFEA: a cell-free epigenome atlas in human diseases.,"Epigenetic alterations, including 5-methylcytosine (5mC), 5-hydroxymethylcytosine (5hmC) and nucleosome positioning (NP), in cell-free DNA (cfDNA) have been widely observed in human diseases, and many available cfDNA-based epigenome-wide profiles exhibit high sensitivity and specificity in disease detection and classification. However, due to the lack of efficient collection, standardized quality control, and analysis procedures, efficiently integrating and reusing these data remain considerable challenges. Here, we introduce CFEA (http://www.bio-data.cn/CFEA), a cell-free epigenome database dedicated to three types of widely adopted epigenetic modifications (5mC, 5hmC and NP) involved in 27 human diseases. We developed bioinformatic pipelines for quality control and standard data processing and an easy-to-use web interface to facilitate the query, visualization and download of these cell-free epigenome data. We also manually curated related biological and clinical information for each profile, allowing users to better browse and compare cfDNA epigenomes at a specific stage (such as early- or metastasis-stage) of cancer development. CFEA provides a comprehensive and timely resource to the scientific community and supports the development of liquid biopsy-based biomarkers for various human diseases.",2020-01-01 +30137226,ImaGEO: integrative gene expression meta-analysis from GEO database.,"SUMMARY:The Gene Expression Omnibus (GEO) database provides an invaluable resource of publicly available gene expression data that can be integrated and analyzed to derive new hypothesis and knowledge. In this context, gene expression meta-analysis (geMAs) is increasingly used in several fields to improve study reproducibility and discovering robust biomarkers. Nevertheless, integrating data is not straightforward without bioinformatics expertise. Here, we present ImaGEO, a web tool for geMAs that implements a complete and comprehensive meta-analysis workflow starting from GEO dataset identifiers. The application integrates GEO datasets, applies different meta-analysis techniques and provides functional analysis results in an easy-to-use environment. ImaGEO is a powerful and useful resource that allows researchers to integrate and perform meta-analysis of GEO datasets to lead robust findings for biomarker discovery studies. AVAILABILITY AND IMPLEMENTATION:ImaGEO is accessible at http://bioinfo.genyo.es/imageo/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-03-01 +,Diagnosis of COVID‐19 using skin rashes,"Several studies have observed that patients hospitalized with COVID‐19 experienced unusual skin rashes, such as urticaria (‘nettle rash’), chickenpox‐type rash, and reddish and purplish bumps on the fingers or toes. Using data from about 336,000 UK users of the COVID Symptom Study app, we observed that 8.8% of people reporting a positive SARS‐CoV‐2 swab test had experienced a skin rash, compared with 5.4% of those with a negative test result. Next, we analysed data collected using an independent online survey in nearly 12,000 people with skin rashes and a suspected or confirmed SARS‐CoV‐2 infection. We observed that 17% of respondents testing positive reported a rash as the first symptom of the disease, while for 21% of them this was their only symptom. Taking these observations together, we advise that skin rashes should be considered when diagnosing COVID‐19 infection. To increase awareness regarding these symptoms we have created, in collaboration with the British Association of Dermatologists, an online catalogue of images of the most common skin manifestations of COVID‐19, available at https://covidskinsigns.com. Linked Article: Visconti et al. Br J Dermatol 2021; 184:880–887.",2021-05-01 +,Analysis of the population structure and genetic diversity of the red swamp crayfish (Procambarus clarkii) in China using SSR markers,"Procambarus clarkii produces high-quality, delicious meat that is high in protein, low in fat, and rich in calcium and phosphorus. It has become an important aquatic resource in China. Our objectives are (i) to analyze the level of genetic diversity of P. clarkii populations; (ii) to explore the genetic differentiation (Gst); and (iii) to propose appropriate strategies for the conservation.In this study, Shannon's index (I) and Nei's gene diversity index (H) for P. clarkii were high (I = 0.3462 and H = 0.2325 on average and I = 0.6264, H = 0.4377 at the species level) based on the SSR markers. The expected heterozygosity value of 17 microsatellite loci in 25 crayfish populations was 0.9317, the observed heterozygosity value was 0.9121, and the observed number of alleles per locus was 2.000; and the effective number of alleles per locus was 1.8075. Among the P. clarkii populations, the inbreeding coefficient within populations (Fis) was 0.2315, overall inbreeding coefficient (Fit) was 0.4438, genetic differentiation coefficient among populations (Fst) was 0.3145 and gene differentiation (Gst) was 0.4785 based on SSR analyses. The cluster analysis results obtained by unweighted pair-group method with arithmetic mean (UPGMA) analysis, principal coordinate analysis (PCoA) and STRUCTURE analysis were similar. A mantel test showed that the isolation-by-distance pattern was not significant.The high Gst among P. clarkii populations is attributed to genetic drift and geographic isolation. The results indicated that more P. clarkii populations should be collected when formulating conservation and aquaculture strategies.Liu F, Qu Y-K, Geng C, et al. Analysis of the population structure and genetic diversity of the red swamp crayfish (Procambarus clarkii) in China using SSR markers. Electron J Biotechnol 2020;47. https://doi.org/10.1016/j.ejbt.2020.06.007.",2020-09-01 +33600011,MutSpliceDB: A database of splice sites variants with RNA-seq based evidence on effects on splicing.,"Splice site variants may lead to transcript alterations, causing exons inclusion, exclusion, truncation, or intron retention. Interpreting the consequences of a specific splice site variant is not straightforward, especially if the variant is located outside of the canonical splice sites. We developed MutSpliceDB: https://brb.nci.nih.gov/splicing, a public resource to facilitate the interpretation of splice sites variants effects on splicing based on manually reviewed RNA-seq BAM files from samples with splice site variants.",2021-03-01 +30703169,HuVarBase: A human variant database with comprehensive information at gene and protein levels.,"Human variant databases could be better exploited if the variant data available in multiple resources is integrated in a single comprehensive resource along with sequence and structural features. Such integration would improve the analyses of variants for disease prediction, prevention or treatment. The HuVarBase (HUmanVARiantdataBASE) assimilates publicly available human variant data at protein level and gene level into a comprehensive resource. Protein level data such as amino acid sequence, secondary structure of the mutant residue, domain, function, subcellular location and post-translational modification are integrated with gene level data such as gene name, chromosome number & genome position, DNA mutation, mutation type origin and rs ID number. Disease class has been added for the disease causing variants. The database is publicly available at https://www.iitm.ac.in/bioinfo/huvarbase. A total of 774,863 variant records, integrated in the HuVarBase, can be searched with options to display, visualize and download the results.",2019-01-31 +34928054,LINT-Web: A Web-Based Lipidomic Data Mining Tool Using Intra-Omic Integrative Correlation Strategy.,"Lipidomics is a younger member of the ""omics"" family. It aims to profile lipidome alterations occurring in biological systems. Similar to the other ""omics"", lipidomic data is highly dimensional and contains a massive amount of information awaiting deciphering and data mining. Currently, the available bioinformatic tools targeting lipidomic data processing and lipid pathway analysis are limited. A few tools designed for lipidomic analysis perform only basic statistical analyses, and lipid pathway analyses rely heavily on public databases (KEGG, Reactome, and HMDB). Due to the inadequate understanding of lipid signaling and metabolism, the use of public databases for lipid pathway analysis can be biased and misleading. Instead of using public databases to interpret lipidomic ontology, the authors introduce an intra-omic integrative correlation strategy for lipidomic data mining. Such an intra-omic strategy allows researchers to unscramble and predict lipid biological functions from correlated genomic ontological results using statistical approaches. To simplify and improve the lipidomic data processing experience, they designed an interactive web-based tool: LINT-web (http://www.lintwebomics.info/) to perform the intra-omic analysis strategy, and validated the functions of LINT-web using two biological systems. Users without sophisticated statistical experience can easily process lipidomic datasets and predict the potential lipid biological functions using LINT-web.",2021-07-31 +31599330,LSD 3.0: a comprehensive resource for the leaf senescence research community.,"The leaf senescence database (LSD) is a comprehensive resource of senescence-associated genes (SAGs) and their corresponding mutants. Through manual curation and extensive annotation, we updated the LSD to a new version LSD 3.0, which contains 5853 genes and 617 mutants from 68 species. To provide sustainable and reliable services for the plant research community, LSD 3.0 (https://bigd.big.ac.cn/lsd/) has been moved to and maintained by the National Genomics Data Center at Beijing Institute of Genomics, Chinese Academy of Sciences. In the current release, we added some new features: (i) Transcriptome data of leaf senescence in poplar were integrated; (ii) Leaf senescence-associated transcriptome data information in Arabidopsis, rice and soybean were included; (iii) Senescence-differentially expressed small RNAs (Sen-smRNA) in Arabidopsis were identified; (iv) Interaction pairs between Sen-smRNAs and senescence-associated transcription factors (Sen-TF) were established; (v) Senescence phenotypes of 90 natural accessions (ecotypes) and 42 images of ecotypes in Arabidopsis were incorporated; (vi) Mutant seed information of SAGs in rice obtained from Kitbase was integrated; (vii) New options of search engines for ecotypes and transcriptome data were implemented. Together, the updated database bears great utility to continue to provide users with useful resources for studies of leaf senescence.",2020-01-01 +31872320,Rice Stress-Resistant SNP Database.,"

Background

Rice (Oryza sativa L.) yield is limited inherently by environmental stresses, including biotic and abiotic stresses. Thus, it is of great importance to perform in-depth explorations on the genes that are closely associated with the stress-resistant traits in rice. The existing rice SNP databases have made considerable contributions to rice genomic variation information but none of them have a particular focus on integrating stress-resistant variation and related phenotype data into one web resource.

Results

Rice Stress-Resistant SNP database (http://bioinformatics.fafu.edu.cn/RSRS) mainly focuses on SNPs specific to biotic and abiotic stress-resistant ability in rice, and presents them in a unified web resource platform. The Rice Stress-Resistant SNP (RSRS) database contains over 9.5 million stress-resistant SNPs and 797 stress-resistant candidate genes in rice, which were detected from more than 400 stress-resistant rice varieties. We incorporated the SNPs function, genome annotation and phenotype information into this database. Besides, the database has a user-friendly web interface for users to query, browse and visualize a specific SNP efficiently. RSRS database allows users to query the SNP information and their relevant annotations for individual variety or more varieties. The search results can be visualized graphically in a genome browser or displayed in formatted tables. Users can also align SNPs between two or more rice accessions.

Conclusion

RSRS database shows great utility for scientists to further characterize the function of variants related to environmental stress-resistant ability in rice.",2019-12-23 +,The Relationship Between COVID-19 Cases and COVID-19 Testing: a Panel Data Analysis on OECD Countries,"Testing, one of the methods to combat the COVID-19 outbreak, is highly recommended in all countries. Empirical studies on how testing relates to the control of new cases will help highlight the importance of testing in efforts to combat the epidemic. Therefore, this study aims to investigate the relationship between COVID-19 testing and COVID-19 cases. We use panel autoregressive distributed lag analysis to test the effect of COVID-19 test number on the COVID-19 new cases. The data of the study cover the period from March 19, 2020, to May 01, 2020, for 14 OECD countries. Data were obtained from the https://ourworldindata.org/coronavirus website. According to the results, this study shows that increasing the COVID-19 test number will help to reduce new COVID-19 cases. On the other hand, increase in the test number per thousand will probably not contribute to reducing new COVID-19 cases, because countries do not already test by random selection, and even if they do, it will not contribute to detection and isolating of the new cases without identifying risky groups.",2021-04-13 +34117863,Methylation-eQTL Analysis in Cancer Research.,"

Motivation

DNA methylation is a key epigenetic factor regulating gene expression. While promoter methylation has been well studied, recent publications have revealed that functionally important methylation also occurs in intergenic and distal regions, and varies across genes and tissue types. Given the growing importance of inter-platform integrative genomic analyses, there is an urgent need to develop methods to discover and characterize gene-level relationships between methylation and expression.

Results

We introduce a novel sequential penalized regression approach to identify methylation-expression quantitative trait loci (methyl-eQTLs), a term that we have coined to represent, for each gene and tissue type, a sparse set of CpG loci best explaining gene expression and accompanying weights indicating direction and strength of association. Using TCGA and MD Anderson colorectal cohorts to build and validate our models, we demonstrate our strategy better explains expression variability than current commonly used gene-level methylation summaries. The methyl-eQTLs identified by our approach can be used to construct gene-level methylation summaries that are maximally correlated with gene expression for use in integrative models, and produce a tissue-specific summary of which genes appear to be strongly regulated by methylation. Our results introduce an important resource to the biomedical community for integrative genomics analyses involving DNA methylation.

Availability and implementation

We produce an R Shiny app (https://rstudio-prd-c1.pmacs.upenn.edu/methyl-eQTL/) that interactively presents methyl-eQTL results for colorectal, breast, and pancreatic cancer. The source R code for this work is provided in the supplement.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-12 +33950201,Trips-Viz: an environment for the analysis of public and user-generated ribosome profiling data.,"Trips-Viz (https://trips.ucc.ie/) is an interactive platform for the analysis and visualization of ribosome profiling (Ribo-Seq) and shotgun RNA sequencing (RNA-seq) data. This includes publicly available and user generated data, hence Trips-Viz can be classified as a database and as a server. As a database it provides access to many processed Ribo-Seq and RNA-seq data aligned to reference transcriptomes which has been expanded considerably since its inception. Here, we focus on the server functionality of Trips-viz which also has been greatly improved. Trips-viz now enables visualisation of proteomics data from a large number of processed mass spectrometry datasets. It can be used to support translation inferred from Ribo-Seq data. Users are now able to upload a custom reference transcriptome as well as data types other than Ribo-Seq/RNA-Seq. Incorporating custom data has been streamlined with RiboGalaxy (https://ribogalaxy.ucc.ie/) integration. The other new functionality is the rapid detection of translated open reading frames (ORFs) through a simple easy to use interface. The analysis of differential expression has been also improved via integration of DESeq2 and Anota2seq in addition to a number of other improvements of existing Trips-viz features.",2021-07-01 +31691826,Ensembl 2020.,"The Ensembl (https://www.ensembl.org) is a system for generating and distributing genome annotation such as genes, variation, regulation and comparative genomics across the vertebrate subphylum and key model organisms. The Ensembl annotation pipeline is capable of integrating experimental and reference data from multiple providers into a single integrated resource. Here, we present 94 newly annotated and re-annotated genomes, bringing the total number of genomes offered by Ensembl to 227. This represents the single largest expansion of the resource since its inception. We also detail our continued efforts to improve human annotation, developments in our epigenome analysis and display, a new tool for imputing causal genes from genome-wide association studies and visualisation of variation within a 3D protein model. Finally, we present information on our new website. Both software and data are made available without restriction via our website, online tools platform and programmatic interfaces (available under an Apache 2.0 license) and data updates made available four times a year.",2020-01-01 +31352145,"MMRdb: Measles, mumps, and rubella viruses database and analysis resource.","Measles, mumps, and rubella viruses are well known human pathogens that cause mild to severe illnesses. Despite the existence of MMR vaccines since 1971, outbreaks have been largely documented even in highly vaccinated populations. There is a pressing need to develop a resource to monitor genetic and antigenic variations among these viruses. Here, we introduced MMRdb, a web central database and analysis resource for measles, mumps, and rubella viruses. Users can search viruses at gene level and obtain sequence information based on gene product, geographic location, year, or host. The MMRdb also catalogs experimentally verified B cells and T cells antigenic epitopes data. A set of computation tools such as multiple sequence alignment, Geo Chart, and sequence similarity BLAST search has been implemented in a user-friendly database. The main features of this database will assist researchers in monitoring genetics and antigenic variations, tracking geographic spread with regards of sequence information, and facilitate the development of diagnostics, vaccines, and immunotherapeutics. Database URL: http://mmrdb.org.",2019-07-26 +32562744,Dual-domain cascade of U-nets for multi-channel magnetic resonance image reconstruction.,"The U-net is a deep-learning network model that has been used to solve a number of inverse problems. In this work, the concatenation of two-element U-nets, termed the W-net, operating in k-space (K) and image (I) domains, were evaluated for multi-channel magnetic resonance (MR) image reconstruction. The two-element network combinations were evaluated for the four possible image-k-space domain configurations: a) W-net II, b) W-net KK, c) W-net IK, and d) W-net KI. Selected four element (WW-nets) and six element (WWW-nets) networks were also examined. Two configurations of each network were compared: 1) each coil channel was processed independently, and 2) all channels were processed simultaneously. One hundred and eleven volumetric, T1-weighted, 12-channel coil k-space datasets were used in the experiments. Normalized root mean squared error, peak signal-to-noise ratio and visual information fidelity were used to assess the reconstructed images against the fully sampled reference images. Our results indicated that networks that operate solely in the image domain were better when independently processing individual channels of multi-channel data. Dual-domain methods were better when simultaneously reconstructing all channels of multi-channel data. In addition, the best cascade of U-nets performed better (p < 0.01) than the previously published, state-of-the-art Deep Cascade and Hybrid Cascade models in three out of four experiments.",2020-06-17 +33315308,Protein Sequence Analysis Using the MPI Bioinformatics Toolkit.,"The MPI Bioinformatics Toolkit (https://toolkit.tuebingen.mpg.de) provides interactive access to a wide range of the best-performing bioinformatics tools and databases, including the state-of-the-art protein sequence comparison methods HHblits and HHpred. The Toolkit currently includes 35 external and in-house tools, covering functionalities such as sequence similarity searching, prediction of sequence features, and sequence classification. Due to this breadth of functionality, the tight interconnection of its constituent tools, and its ease of use, the Toolkit has become an important resource for biomedical research and for teaching protein sequence analysis to students in the life sciences. In this article, we provide detailed information on utilizing the three most widely accessed tools within the Toolkit: HHpred for the detection of homologs, HHpred in conjunction with MODELLER for structure prediction and homology modeling, and CLANS for the visualization of relationships in large sequence datasets. © 2020 The Authors. Basic Protocol 1: Sequence similarity searching using HHpred Alternate Protocol: Pairwise sequence comparison using HHpred Support Protocol: Building a custom multiple sequence alignment using PSI-BLAST and forwarding it as input to HHpred Basic Protocol 2: Calculation of homology models using HHpred and MODELLER Basic Protocol 3: Cluster analysis using CLANS.",2020-12-01 +34506132,Special Features of COVID-19 in the FMODB: Fragment Molecular Orbital Calculations and Interaction Energy Analysis of SARS-CoV-2-Related Proteins.,"SARS-CoV-2 is the causative agent of coronavirus (known as COVID-19), the virus causing the current pandemic. There are ongoing research studies to develop effective therapeutics and vaccines against COVID-19 using various methods and many results have been published. The structure-based drug design of SARS-CoV-2-related proteins is promising, however, reliable information regarding the structural and intra- and intermolecular interactions is required. We have conducted studies based on the fragment molecular orbital (FMO) method for calculating the electronic structures of protein complexes and analyzing their quantitative molecular interactions. This enables us to extensively analyze the molecular interactions in residues or functional group units acting inside the protein complexes. Such precise interaction data are available in the FMO database (FMODB) (https://drugdesign.riken.jp/FMODB/). Since April 2020, we have performed several FMO calculations on the structures of SARS-CoV-2-related proteins registered in the Protein Data Bank. We have published the results of 681 structures, including three structural proteins and 11 nonstructural proteins, on the COVID-19 special page (as of June 8, 2021). In this paper, we describe the entire COVID-19 special page of the FMODB and discuss the calculation results for various proteins. These data not only aid the interpretation of experimentally determined structures but also the understanding of protein functions, which is useful for rational drug design for COVID-19.",2021-09-10 +34724150,Opioid receptors signaling network.,"Opioid receptors belong to the class A G-protein-coupled receptors and are activated by alkaloid opiates such as morphine, and endogenous ligands such as endorphins and enkephalins. Opioid receptors are widely distributed in the human body and are involved in numerous physiological processes through three major classical opioid receptor subtypes; the mu, delta and kappa along with a lesser characterized subtype, opioid receptor-like (ORL1). Opioids are the most potent analgesics and have been extensively used as a therapeutic drug for the treatment of pain and related disorders. Chronic administration of clinically used opioids is associated with adverse effects such as drug tolerance, addiction and constipation. Several investigations attempted to identify the molecular signaling networks associated with endogenous as well as synthetic opiates, however, there is a paucity of a cumulative depiction of these signaling events. Here, we report a systemic collection of downstream molecules pertaining to four subtypes of opioid receptors (MOR, KOR, DOR and ORL1) in the form of a signaling pathway map. We manually curated reactions induced by the activation of opioid receptors from the literature into five categories- molecular association, activation/inhibition, catalysis, transport, and gene regulation. This led to a dataset of 180 molecules, which is collectively represented in the opioid receptor signaling network following NetPath criteria. We believe that the public availability of an opioid receptor signaling pathway map can accelerate biomedical research in this area because of its high therapeutic significance. The opioid receptors signaling pathway map is uploaded to a freely available web resource, WikiPathways enabling ease of access ( https://www.wikipathways.org/index.php/Pathway:WP5093 ).",2021-11-01 +34716856,Systematic review of the receptor tyrosine kinase superfamily in neuroblastoma pathophysiology.,"

Background

Neuroblastoma is a devastating disease accounting for 15% of all childhood cancer deaths. Yet, our understanding of key molecular drivers such as receptor tyrosine kinases (RTKs) in this pathology remains poorly clarified. Here, we provide a systematic analysis of the RTK superfamily in the context of neuroblastoma pathogenesis.

Methods

Statistical correlations for all RTK family members' expression to neuroblastoma patient survival across 10 independent patient cohorts were annotated, synthesized, and ranked using the R2: Genomics Analysis and Visualization Platform. Gene expression of selected members across different cancer cell lines was further analyzed in the Cancer Cell Line Encyclopedia, part of the Cancer Dependency Map portal (depmap portal ( http://depmap.org )). Finally, we provide a detailed literature review for highly ranked candidates.

Results

Our analysis defined two subsets of RTKs showing robust associations with either better or worse survival, constituting potential novel players in neuroblastoma pathophysiology, diagnosis, and therapy. We review the available literature regarding the oncogenic functions of these RTKs, their roles in neuroblastoma pathophysiology, and potential utility as therapeutic targets.

Conclusions

Our systematic analysis and review of the RTK superfamily in neuroblastoma pathogenesis provides a new resource to guide the research community towards focused efforts investigating signaling pathways that contribute to neuroblastoma tumor establishment, growth, and/or aggressiveness and targeting these druggable molecules in novel therapeutic strategies.",2021-10-30 +32402073,AcrFinder: genome mining anti-CRISPR operons in prokaryotes and their viruses.,"Anti-CRISPR (Acr) proteins encoded by (pro)phages/(pro)viruses have a great potential to enable a more controllable genome editing. However, genome mining new Acr proteins is challenging due to the lack of a conserved functional domain and the low sequence similarity among experimentally characterized Acr proteins. We introduce here AcrFinder, a web server (http://bcb.unl.edu/AcrFinder) that combines three well-accepted ideas used by previous experimental studies to pre-screen genomic data for Acr candidates. These ideas include homology search, guilt-by-association (GBA), and CRISPR-Cas self-targeting spacers. Compared to existing bioinformatics tools, AcrFinder has the following unique functions: (i) it is the first online server specifically mining genomes for Acr-Aca operons; (ii) it provides a most comprehensive Acr and Aca (Acr-associated regulator) database (populated by GBA-based Acr and Aca datasets); (iii) it combines homology-based, GBA-based, and self-targeting approaches in one software package; and (iv) it provides a user-friendly web interface to take both nucleotide and protein sequence files as inputs, and output a result page with graphic representation of the genomic contexts of Acr-Aca operons. The leave-one-out cross-validation on experimentally characterized Acr-Aca operons showed that AcrFinder had a 100% recall. AcrFinder will be a valuable web resource to help experimental microbiologists discover new Anti-CRISPRs.",2020-07-01 +34296749,circExp database: an online transcriptome platform for human circRNA expressions in cancers. ,"Circular RNA (circRNA) is a highly stable, single-stranded, closed-loop RNA that works as RNA or as a protein decoy to regulate gene expression. In humans, thousands of circRNA transcriptional products precisely express in specific developmental stages, tissues and cell types. Due to their stability and specificity, circRNAs are ideal biomarkers for cancer diagnosis and prognosis. To provide an integrated and standardized circRNA expression profile for human cancers, we performed extensive data curation across 11 technical platforms, collecting 48 expression profile data sets for 18 cancer types and amassing 860 751 expression records. We also identified 189 193 differential expression signatures that are significantly different between normal and cancer samples. All the pre-calculated expression analysis results are organized into 132 plain text files for bulk download. Our online interface, circExp, provides data browsing and search functions. For each data set, a dynamic expression heatmap provides a profile overview. Based on the processed data, we found that 52 circRNAs were consistently and differentially expressed in 20 or more processed analyses. By mapping those circRNAs to their parent protein-coding genes, we found that they may have profoundly affected the survival of 10 797 patients in the The Cancer Genome Atlas pan-cancer data set. In sum, we developed circExp and demonstrated that it is useful to identify circRNAs that have potential diagnostic and prognostic significance for a variety of cancer types. In this online and reusable database, found at http://soft.bioinfo-minzhao.org/circexp, we have provided pre-calculated expression data about circRNAs and their parental genes, as well as data browsing and searching functions. Database URL: http://soft.bioinfominzhao.org/circexp/.",2021-07-01 +32621601,SAGER: a database of Symbiodiniaceae and Algal Genomic Resource. ,"Symbiodiniaceae dinoflagellates are essential endosymbionts of reef building corals and some other invertebrates. Information of their genome structure and function is critical for understanding coral symbiosis and bleaching. With the rapid development of sequencing technology, genome draft assemblies of several Symbiodiniaceae species and diverse marine algal genomes have become publicly available but spread in multiple separate locations. Here, we present a Symbiodiniaceae and Algal Genomic Resource Database (SAGER), a user-friendly online repository for integrating existing genomic data of Symbiodiniaceae species and diverse marine algal gene sets from MMETSP and PhyloDB databases. Relevant algal data are included to facilitate comparative analyses. The database is freely accessible at http://sampgr.org.cn. It provides comprehensive tools for studying gene function, expression and comparative genomics, including search tools to identify gene information from Symbiodiniaceae species, and BLAST tool to find orthologs from marine algae and protists. Moreover, SAGER integrates transcriptome datasets derived from diverse culture conditions of corresponding Symbiodiniaceae species. SAGER was developed with the capacity to incorporate future Symbiodiniaceae and algal genome and transcriptome data, and will serve as an open-access and sustained platform providing genomic and molecular tools that can be conveniently used to study Symbiodiniaceae and other marine algae. Database URL: http://sampgr.org.cn.",2020-01-01 +34752200,Mechanisms and Methods to Understand Depressive Symptoms.,"Depressive symptoms, feelings of sadness, anger, and loss that interfere with a person's daily life, are prevalent health concerns across populations that significantly result in adverse health outcomes with direct and indirect economic burdens at a national and global level. This article aims to synthesize known mechanisms of depressive symptoms and the established and emerging methodologies used to understand depressive symptoms; implications and directions for future nursing research are discussed. A comprehensive search was performed by Cumulative Index to Nursing and Allied Health Literature, MEDLINE, and PUBMED databases between 2000-2021 to examine contributing factors of depressive symptoms. Many environmental, psychological, and physiological factors are associated with the development or increased severity of depressive symptoms (anhedonia, fatigue, sleep and appetite disturbances to depressed mood). This paper discusses biological and psychological theories that guide our understanding of depressive symptoms, as well as known biomarkers (gut microbiome, specific genes, multi-cytokine, and hormones) and established and emerging methods. Disruptions within the nervous system, hormonal and neurotransmitters levels, brain structure, gut-brain axis, leaky-gut syndrome, immune and inflammatory process, and genetic variations are significant mediating mechanisms in depressive symptomology. Nursing research and practice are at the forefront of furthering depressive symptoms' mechanisms and methods. Utilizing advanced technology and measurement tools (big data, machine learning/artificial intelligence, and multi-omic approaches) can provide insight into the psychological and biological mechanisms leading to effective intervention development. Thus, understanding depressive symptomology provides a pathway to improve patients' health outcomes, leading to reduced morbidity and mortality and the overall nation-wide economic burden.Supplemental data for this article is available online at https://doi.org/10.1080/01612840.2021.1998261 .",2021-11-09 +,Genetic diversity among wild pomegranate (Punica granatum) in Azad Jammu and Kashmir region of Pakistan,"Pomegranate (Punica granatum L.), one of the most important tropical fruits in Azad Jammu and Kashmir regions of Pakistan, is highly valued for its nutrition and medicinal purposes. Although pomegranate is native to this region, the genetic diversity among wild pomegranate accessions is currently unknown. Such information would be vital for germplasm conservation and breeding efforts. In the current study, genetic diversity among forty-eight wild pomegranate accessions collected from different agro-ecological zones of Azad Jammu and Kashmir was assessed using 41 simple sequence repeat (SSR) markers.The markers revealed 303 alleles averaging 7.39 alleles per marker. Polymorphic information content ranged from 0.12 (PGCT093B) to 0.88 (Pom006), with a mean of 0.54. The average genetic distance (GD) across all genotypes was 0.52, and was lowest between Chattar Class and Thorar genotypes (GD = 0.27), but highest between Khun Bandway and Akhor Ban (GD = 0.74). A neighbor-joining dendrogram separated the genotypes into three major clusters, with further sub-clustering within each cluster.Overall, the results presented here show significant genetic diversity among wild pomegranate accessions in Azad Jammu and Kashmir region of Pakistan. These accessions present a valuable genetic resource to breeding and cultivar improvement programs within the region.How to cite: Aziz S, Firdous S, Rahman H, et al. Genetic diversity among wild pomegranate (Punica granatum) in Azad Jammu and Kashmir region of Pakistan. Electron J Biotechnol 2020;46. https://doi.org/10.1016/j.ejbt.2020.06.002.",2020-07-01 +33677507,HIR V2: a human interactome resource for the biological interpretation of differentially expressed genes via gene set linkage analysis. ,"To facilitate biomedical studies of disease mechanisms, a high-quality interactome that connects functionally related genes is needed to help investigators formulate pathway hypotheses and to interpret the biological logic of a phenotype at the biological process level. Interactions in the updated version of the human interactome resource (HIR V2) were inferred from 36 mathematical characterizations of six types of data that suggest functional associations between genes. This update of the HIR consists of 88 069 pairs of genes (23.2% functional interactions of HIR V2 are in common with the previous version of HIR), representing functional associations that are of strengths similar to those between well-studied protein interactions. Among these functional interactions, 57% may represent protein interactions, which are expected to cover 32% of the true human protein interactome. The gene set linkage analysis (GSLA) tool is developed based on the high-quality HIR V2 to identify the potential functional impacts of the observed transcriptomic changes, helping to elucidate their biological significance and complementing the currently widely used enrichment-based gene set interpretation tools. A case study shows that the annotations reported by the HIR V2/GSLA system are more comprehensive and concise compared to those obtained by the widely used gene set annotation tools such as PANTHER and DAVID. The HIR V2 and GSLA are available at http://human.biomedtzc.cn.",2021-03-01 +34651182,"https://botryosphaeriales.org/, an online platform for up-to-date classification and account of taxa of Botryosphaeriales. ","Fungi are eukaryotes that inhabit various ecosystems worldwide and have a decomposing effect that other organisms cannot replace. Fungi are divided into two main groups depending on how their sexual spores are formed, viz. Ascomycota and Basidiomycota. The members of Botryosphaeriales (Dothideomycetes, Ascomycota) are ubiquitous. They are pathogenic on a wide range of hosts, causing diverse diseases including dieback, canker, leaf spots and root rots and are also reported as saprobes and endophytes worldwide. As an important fungal group, of which most are plant pathogens, it is necessary to organize data and information on Botryosphaeriales so that scientific literature can be used effectively. For this purpose, a new website, https://botryosphaeriales.org is established to gather all published data together with updates on the present taxonomy of Botryosphaeriales. The website consists of an easy-to-operate searching system and provides an up-to-date classification together with accounts of Botryosphaeriales taxa, including colour illustrations, descriptions, notes and numbers of species in each genus, as well as their classification. Thus, readers will be able to obtain information on botryosphaerialean taxa through this platform. Database URL: https://botryosphaeriales.org/.",2021-10-01 +,Prediction of functional outcome in bipolar disorder: Effects of cognitive remediation and cognitive psychoeducational group therapy,"

Introduction

In bipolar patients cognitive deficits are an important feature. Persisting neurocognitive impairment is associated with low psychosocial functioning.

Objectives

The aim of this presentation is to discuss potential cognitive, clinical and treatment-dependent predictors for functional impairment in bipolar patients.

Methods

In a first study (1) at the Medical University of Vienna 43 remitted bipolar patients and 40 healthy controls were assessed testing specifically attention, memory, verbal fluency and executive functions. In a randomized controlled trial, patients were assigned to two treatment conditions as add-on to state-of-the-art pharmacotherapy: cognitive psychoeducational group therapy over 14 weeks or treatment-as-usual. At 12 months after therapy, functional impairment and severity of symptoms were assessed. In a second, ongoing study, in-patients from a defined catchment area in Vienna (12th, 13th and 23rd district) were assessed via SCIP (Purdon S. 2005. The screen for cognitive impairment in psychiatry: Administration and psychometric properties. Edmonton, Alberta, Canada: PNL Inc.). The SCIP was performed before and after cognitive remediation. The effects of treatment on functioning were measured with the clinical Global Impression Scale (CGI).

Results

Compared to controls, bipolar patients showed lower performance in executive function, sustained attention, verbal learning and verbal fluency. Cognitive psychoeducational group therapy and attention predicted occupational functioning. In the second study, SCIP and CGI values showed improvement after treatment.

Conclusions

Our data support the idea that cognition affects outcome. Bipolar patients benefit from cognitive psychoeducational group therapy in the domain of occupational life. (1) Sachs G et al. Front. Psychiatry, 23 November 2020 | https://doi.org/10.3389/fpsyt.2020.530026

Disclosure

No significant relationships.",2021-08-13 +34264745,"Benchmarking and Testing Machine Learning Approaches with BARRA:CuRDa, a Curated RNA-Seq Database for Cancer Research.","RNA-seq is gradually becoming the dominating technique employed to access the global gene expression in biological samples, allowing more flexible protocols and robust analysis. However, the nature of RNA-seq results imposes new data-handling challenges when it comes to computational analysis. With the increasing employment of machine learning (ML) techniques in biomedical sciences, databases that could provide curated data sets treated with state-of-the-art approaches already adapted to ML protocols, become essential for testing new algorithms. In this study, we present the Benchmarking of ARtificial intelligence Research: Curated RNA-seq Database (BARRA:CuRDa). BARRA:CuRDa was built exclusively for cancer research and is composed of 17 handpicked RNA-seq data sets for Homo sapiens that were gathered from the Gene Expression Omnibus, using rigorous filtering criteria. All data sets were individually submitted to sample quality analysis, removal of low-quality bases and artifacts from the experimental process, removal of ribosomal RNA, and estimation of transcript-level abundance. Moreover, all data sets were tested using standard approaches in the field, which allows them to be used as benchmark to new ML approaches. A feature selection analysis was also performed on each data set to investigate the biological accuracy of basic techniques. Results include genes already related to their specific tumoral tissue a large amount of long noncoding RNA and pseudogenes. BARRA:CuRDa is available at http://sbcb.inf.ufrgs.br/barracurda.",2021-07-14 +32345779,PSCRIdb: A database of regulatory interactions and networks of pluripotent stem cell lines. ,"Pluripotency in stem cells is regulated by a complex network between the transcription factors, signaling molecules, mRNAs, and epigenetic regulators like non-coding RNAs. Different pluripotent stem cell (PSC) lines were isolated and characterized to study the regulatory network topology to understand the mechanism that control developmental potential of pluripotent cells. PSCRIdb is a manually curated database of regulatory interactions including protein-protein, protein-DNA, gene-gene, and miRNA-mRNA interactions in mouse and human pluripotent stem cells including embryonic stem cells and embryonic carcinoma cells. At present, 22 different mouse and human pluripotent stem-cell-line-specific regulatory interactions are compiled in the database. Detailed information of the four types of interaction data are presented in tabular format and graphical network view in Cytoscape layout. The database is available at http://bicresources.jcbose.ac.in/ ssaha4/pscridb. The database contains 3037 entries of experimentally validated molecular interactions that can be useful for systematic study of pluripotency integrating multi-omics data. In summary, the database can be a useful resource for identification of regulatory networks present in different pluripotent stem cell lines.",2020-01-01 +34213323,pdCSM-cancer: Using Graph-Based Signatures to Identify Small Molecules with Anticancer Properties.,"The development of new, effective, and safe drugs to treat cancer remains a challenging and time-consuming task due to limited hit rates, restraining subsequent development efforts. Despite the impressive progress of quantitative structure-activity relationship and machine learning-based models that have been developed to predict molecule pharmacodynamics and bioactivity, they have had mixed success at identifying compounds with anticancer properties against multiple cell lines. Here, we have developed a novel predictive tool, pdCSM-cancer, which uses a graph-based signature representation of the chemical structure of a small molecule in order to accurately predict molecules likely to be active against one or multiple cancer cell lines. pdCSM-cancer represents the most comprehensive anticancer bioactivity prediction platform developed till date, comprising trained and validated models on experimental data of the growth inhibition concentration (GI50%) effects, including over 18,000 compounds, on 9 tumor types and 74 distinct cancer cell lines. Across 10-fold cross-validation, it achieved Pearson's correlation coefficients of up to 0.74 and comparable performance of up to 0.67 across independent, non-redundant blind tests. Leveraging the insights from these cell line-specific models, we developed a generic predictive model to identify molecules active in at least 60 cell lines. Our final model achieved an area under the receiver operating characteristic curve (AUC) of up to 0.94 on 10-fold cross-validation and up to 0.94 on independent non-redundant blind tests, outperforming alternative approaches. We believe that our predictive tool will provide a valuable resource to optimizing and enriching screening libraries for the identification of effective and safe anticancer molecules. To provide a simple and integrated platform to rapidly screen for potential biologically active molecules with favorable anticancer properties, we made pdCSM-cancer freely available online at http://biosig.unimelb.edu.au/pdcsm_cancer.",2021-07-02 +32576192,The genetic and pharmacogenomic landscape of snoRNAs in human cancer.,"Emerging evidence has revealed significant roles for small nucleolar RNAs (snoRNAs) in tumorigenesis. However, the genetic and pharmacogenomic landscape of snoRNAs has not been characterized. Using the genotype and snoRNA expression data from The Cancer Genome Atlas, we characterized the effects of genetic variants on snoRNAs across 29 cancer types and further linked related alleles with patient survival as well as genome-wide association study risk loci. Furthermore, we characterized the impact of snoRNA expression on drug response in patients to facilitate the clinical utility of snoRNAs in cancer. We also developed a user-friendly data resource, GPSno (http://hanlab.uth.edu/GPSno), with multiple modules for researchers to visualize, browse, and download multi-dimensional data. Our study provides a comprehensive genetic and pharmacogenomic landscape of snoRNAs, which will shed light on future clinical considerations for the development of snoRNA-based targeted therapies.",2020-06-23 +,IoT based low cost and intelligent module for smart irrigation system,"Agriculture contributes to a major share in the Indian economy and most of its people are dependent on it for their livelihood. This makes water an important resource that needs to be preserved using the latest available technologies. Apart from being fundamental in industry 4.0, IoT also extends its capability to smart farming. Work proposed here targets to develop a low cost intelligent system for smart irrigation. It uses IoT to make devices used in the system to talk and connect on their own, with capabilities like: admin mode for user interaction, one-time setup for irrigation schedule estimation, neural based decision making for intelligent support and remote data monitoring. A sample crop test-bed has been chosen to present results of the proposed system, that include irrigation schedule, neural net decision making and remote data viewing. Neural network provides required intelligence to the device that considers current sensor input and masks the irrigation schedule for efficient irrigation. The system uses MQTT and HTTP to keep the user informed about the current crop situation even from a distant location. The proposed system proves beneficial with its intelligence, low cost and portability, making it suitable for greenhouse, farms, etc.",2019-07-01 +34597405,"eggNOG-mapper v2: Functional Annotation, Orthology Assignments, and Domain Prediction at the Metagenomic Scale.","Even though automated functional annotation of genes represents a fundamental step in most genomic and metagenomic workflows, it remains challenging at large scales. Here, we describe a major upgrade to eggNOG-mapper, a tool for functional annotation based on precomputed orthology assignments, now optimized for vast (meta)genomic data sets. Improvements in version 2 include a full update of both the genomes and functional databases to those from eggNOG v5, as well as several efficiency enhancements and new features. Most notably, eggNOG-mapper v2 now allows for: 1) de novo gene prediction from raw contigs, 2) built-in pairwise orthology prediction, 3) fast protein domain discovery, and 4) automated GFF decoration. eggNOG-mapper v2 is available as a standalone tool or as an online service at http://eggnog-mapper.embl.de.",2021-12-01 +31680160,ELM-the eukaryotic linear motif resource in 2020.,"The eukaryotic linear motif (ELM) resource is a repository of manually curated experimentally validated short linear motifs (SLiMs). Since the initial release almost 20 years ago, ELM has become an indispensable resource for the molecular biology community for investigating functional regions in many proteins. In this update, we have added 21 novel motif classes, made major revisions to 12 motif classes and added >400 new instances mostly focused on DNA damage, the cytoskeleton, SH2-binding phosphotyrosine motifs and motif mimicry by pathogenic bacterial effector proteins. The current release of the ELM database contains 289 motif classes and 3523 individual protein motif instances manually curated from 3467 scientific publications. ELM is available at: http://elm.eu.org.",2020-01-01 +34966738,Research Progress on the Treatment of Premature Ovarian Failure Using Mesenchymal Stem Cells: A Literature Review.,"Premature ovarian failure (POF) has become one of the main causes of infertility in women of childbearing age and the incidence of POF is increasing year by year, seriously affecting the physical and mental health of patients and increasing the economic burden on families and society as a whole. The etiology and pathogenesis of POF are complex and not very clear at present. Currently, hormone replacement therapy is mainly used to improve the symptoms of low estrogen, but cannot fundamentally solve the fertility problem. In recent years, stem cell (SC) transplantation has become one of the research hotspots in the treatment of POF. The results from animal experiments bring hope for the recovery of ovarian function and fertility in patients with POF. In this article, we searched the published literature between 2000 and 2020 from the PubMed database (https://pubmed.ncbi.nlm.nih.gov), and summarized the preclinical research data and possible therapeutic mechanism of mesenchymal stem cells (MSCs) in the treatment of POF. Our aim is to provide useful information for understanding POF and reference for follow-up research and treatment of POF.",2021-12-13 +32358040,Quantitative Proteomics of All 14 Renal Tubule Segments in Rat.,"

Background

Previous research has used RNA sequencing in microdissected kidney tubules or single cells isolated from the kidney to profile gene expression in each type of kidney tubule epithelial cell. However, because proteins, not mRNA molecules, mediate most cellular functions, it is desirable to know the identity and amounts of each protein species to understand function. Recent improvements in the sensitivity of mass spectrometers offered us the ability to quantify the proteins expressed in each of 14 different renal tubule segments from rat.

Methods

We manually dissected kidney tubules from rat kidneys and subjected samples to protein mass spectrometry. We used the ""proteomic ruler"" technique to estimate the number of molecules of each protein per cell.

Results

Over the 44 samples analyzed, the average number of quantified proteins per segment was 4234, accounting for at least 99% of protein molecules in each cell. We have made the data publicly available online at the Kidney Tubule Expression Atlas website (https://esbl.nhlbi.nih.gov/KTEA/). Protein abundance along the renal tubule for many commonly studied water and solute transport proteins and metabolic enzymes matched expectations from prior localization studies, demonstrating the overall reliability of the data. The site features a ""correlated protein"" function, which we used to identify cell type-specific transcription factors expressed along the renal tubule.

Conclusions

We identified and quantified proteins expressed in each of the 14 segments of rat kidney tubules and used the proteomic data that we obtained to create an online information resource, the Kidney Tubule Expression Atlas. This resource will allow users throughout the world to browse segment-specific protein expression data and download them for their own research.",2020-05-01 +36533093,"Sherlock: an open-source data platform to store, analyze and integrate Big Data for biology.","In the era of Big Data, data collection underpins biological research more so than ever before. In many cases this can be as time-consuming as the analysis itself, requiring downloading multiple different public databases, with different data structures, and in general, spending days before answering any biological questions. To solve this problem, we introduce an open-source, cloud-based big data platform, called Sherlock ( https://earlham-sherlock.github.io/). Sherlock provides a gap-filling way for biologists to store, convert, query, share and generate biology data, while ultimately streamlining bioinformatics data management. The Sherlock platform provides a simple interface to leverage big data technologies, such as Docker and PrestoDB. Sherlock is designed to analyse, process, query and extract the information from extremely complex and large data sets. Furthermore, Sherlock is capable of handling different structured data (interaction, localization, or genomic sequence) from several sources and converting them to a common optimized storage format, for example to the Optimized Row Columnar (ORC). This format facilitates Sherlock's ability to quickly and easily execute distributed analytical queries on extremely large data files as well as share datasets between teams. The Sherlock platform is freely available on Github, and contains specific loader scripts for structured data sources of genomics, interaction and expression databases. With these loader scripts, users are able to easily and quickly create and work with the specific file formats, such as JavaScript Object Notation (JSON) or ORC. For computational biology and large-scale bioinformatics projects, Sherlock provides an open-source platform empowering data management, data analytics, data integration and collaboration through modern big data technologies.",2021-05-21 +,"First Report of ‘Candidatus Phytoplasma trifolii’-Related Strain Associated with Flower Abortion and Necrosis in Prickly Pear Cactus in Zacatecas, Mexico","Mexico is the most important producer of prickly pear cactus (Opuntia ficus-indica) worldwide with a total of 471,637 tons on 46,555 ha in 2018. In July 2019, the presence of symptoms of flower abortion, foliar necrosis, loss of thorns, yellowing, deformation, and proliferation of cladodes was documented in approximately 8% of prickly pear plants from 20 ha of commercial fields in the municipality of Pinos in Zacatecas, Mexico (22°18′0′′ N, 101°34′0′′ W). Total DNA was extracted from 10 symptomatic and five symptomless prickly pear plants. Direct and nested PCR assays targeting the 16S rRNA gene were used to confirm the association of phytoplasma with this new symptomatology. The primers used for direct PCR were P1 (5′-AAGAGTTTGATCCTGGCTCAGGATT-3′) and Tint (5′-TCAGGCGTGTGCTCTAACCAGC-3′) (Smart et al. 1996), and for nested PCR, R16F2n (5′-GAAACGACTGCTAAGACTGG-3′) and R16R2 (5′-TGACGGGCGGTGTGTACAAACCCCG-3′) (Gundersen and Lee 1996). No PCR products were obtained from the symptomless plants. The nested PCR amplicons (1.2 kb) amplified from all symptomatic plants were cloned separately and directly sequenced. BLAST analysis of the 16S rDNA sequences revealed that they shared 100% sequence identity to each other and 99.0% sequence identity with those of the 16SrVI group, ‘Candidatus Phytoplasma trifolii’ strains. Computer-simulated restriction fragment length polymorphism (RFLP) analysis of the prickly pear phytoplasma sequence (GenBank accession no. MT507114) was performed using iPhyClassifier (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi), and RFLP profiles were compared with each phytoplasma group and subgroup (Zhao et al. 2013), confirming that the analyzed sequence shared 99.0% identity with those of the group 16SrVI (reference strain AY390261), and also it was classified into a new subgroup (16SrVI-K). There are reports of phytoplasmas of the 16SrI (Fucikovsky-Zak et al. 2011), II (Hernández-Pérez et al. 2009), and XIII (Suaste et al. 2012) groups associated with effects in prickly pear cactus in Mexico. However, there are no reports of 16SrVI group phytoplasmas infecting Cactaceae species. ‘Ca. P. trifolii’ has been related to other diseases in many important crops in Mexico (Reveles-Torres et al. 2018), and its proliferation in prickly pear cactus fields has increased due to application of mineral and organic fertilizers (Santiago-Lorenzo et al. 2016), which reduce the use of insecticides and increase the presence of insects that can act as possible vectors of phytoplasmas. This is the first report of ‘Ca. P. trifolii’ associated with a new disease in prickly pear cactus, and the identification of a phytoplasma sequence belongs to a new subgroup (16SrVI-K). The showed results establish the importance of implementing an integrated management program to reduce the proliferation and incidence of ‘Ca. P. trifolii’ in economically important crops in Mexico and other countries.",2020-12-01 +34120586,NUCOME: A comprehensive database of nucleosome organization referenced landscapes in mammalian genomes.,"

Background

Nucleosome organization is involved in many regulatory activities in various organisms. However, studies integrating nucleosome organization in mammalian genomes are very limited mainly due to the lack of comprehensive data quality control (QC) assessment and uneven data quality of public data sets.

Results

The NUCOME is a database focused on filtering qualified nucleosome organization referenced landscapes covering various cell types in human and mouse based on QC metrics. The filtering strategy guarantees the quality of nucleosome organization referenced landscapes and exempts users from redundant data set selection and processing. The NUCOME database provides standardized, qualified data source and informative nucleosome organization features at a whole-genome scale and on the level of individual loci.

Conclusions

The NUCOME provides valuable data resources for integrative analyses focus on nucleosome organization. The NUCOME is freely available at http://compbio-zhanglab.org/NUCOME .",2021-06-13 +31110280,"CancerMine: a literature-mined resource for drivers, oncogenes and tumor suppressors in cancer.","Tumors from individuals with cancer are frequently genetically profiled for information about the driving forces behind the disease. We present the CancerMine resource, a text-mined and routinely updated database of drivers, oncogenes and tumor suppressors in different types of cancer. All data are available online ( http://bionlp.bcgsc.ca/cancermine ) and downloadable under a Creative Commons Zero license for ease of use.",2019-05-20 +,SHERPA-city: A web application to assess the impact of traffic measures on NO2 pollution in cities,"This paper presents SHERPA-City, a web application to assess the potential of traffic measures to abate NO2 air pollution in cities. The application is developed by the Joint Research Centre. It is freely available (https://integrated-assessment.jrc.ec.europa.eu) and allows the user to perform a fast screening of possible NO2 abatement measures addressing traffic in European cities. SHERPA-City results depend on the quality of the default input data. It is therefore important to stress that the SHERPA-City default traffic flows, emission factors, fleet composition, road network topology, NO2 pollution from other sources and meteorological data are based on EU-wide datasets that may not always represent perfectly a particular local situation. This is why the SHERPA-City allows the default data to be substituted by local data, to better reflect local features. This tool must be considered as a first step in exploring options to abate NO2 air pollution through transport measures. The final decisions should be based, wherever possible, on full-scale modelling studies incorporating local knowledge. Graphical abstract Image 1 Highlights • A free user-friendly web application to evaluate the impact of traffic measures on NO2 concentrations in European cities.• EU wide default traffic data and emission factors are provided.• Possibility to upload own traffic data and emission factors.• A case study on Madrid Low Emission Zones demonstrates the key features of the tool.",2021-01-01 +34432001,metID: a R package for automatable compound annotation for LC-MS-based data. ,"Accurate and efficient compound annotation is a long-standing challenge for LC-MS-based data (e.g., untargeted metabolomics and exposomics). Substantial efforts have been devoted to overcoming this obstacle, whereas current tools are limited by the sources of spectral information used (in-house and public databases) and are not automated and streamlined. Therefore, we developed metID, an R package that combines information from all major databases for comprehensive and streamlined compound annotation. metID is a flexible, simple, and powerful tool that can be installed on all platforms, allowing the compound annotation process to be fully automatic and reproducible. A detailed tutorial and a case study are provided in Supplementary Materials. https://jaspershen.github.io/metID. Supplementary data are available at Bioinformatics online.",2021-08-25 +,Type 1 Familial Hypocalciuric Hypercalcemia Caused by p.M74L Variant in the Calcium Sensing Receptor (CASR) Gene,"Abstract Background: Familial hypocalciuric hypercalcemia (FHH) is a rare cause of hypercalcemia caused by inactivating mutations in specific regions of chromosome 3 and 19. Most cases are due to inactivating mutations in the Calcium sensing receptor (CASR) which is encoded by the gene located on the long arm of chromosome 3 (3q 21.1). FHH is characterized by hypercalcemia, an inappropriately normal to elevated serum PTH level, hypocalciuria, and a family history of hypercalcemia. Several mutations in the CASR gene have been described in literature. However, the p.M74L variant in the CASR gene has an extremely low frequency of occurrence in population databases such as the genome aggregation database (gnomAD)1. Clinical Case: A 67 years old woman with a past medical history of hypertension, dyslipidemia, type 2 diabetes mellitus, and chronic kidney disease presented for an evaluation of a long standing history of hypercalcemia. Patient reported non-specific symptoms including chronic fatigue and arthralgias. She denied a history of renal stone or chronic use of lithium. She distinctly recalled that her mother and maternal grandmother had high blood calcium levels. Review of old records showed an elevated corrected calcium level of 11.3 mg/dl, which was elevated since at least October 2013 (no medical records prior to October 2013 were available) which persisted to-date. Patient underwent work-up which revealed a high corrected serum calcium of 10.4 (8.6–10.0mg/dl), high serum PTH of 101 (15–65 pg/ml), an extremely low 24hr urine calcium of <9.2 (100–300 mg/24hr) with corresponding urine volume of 1150 cc and urine creatinine of 1392 (740–1570 mg/24hr), low 25-OH vitamin D of 20.6 (30–100 ng/ml), and a low eGFR of 47 ml/m/1.73. SPECT parathyroid gland was negative. FHH was suspected and subsequent CASR gene analysis panel showed a heterogenous DNA sequence change at nucleotide position c.220 in exon 3 of the CASR gene (c.220A>C). This nucleotide change results in an amino acid change from methionine (M) to leucine (L) at position 74 in the CASR protein (p.M74L). Conclusion: We report a case of a p.M74L variant in the CASR gene which is an extremely uncommon mutation in the CASR gene1. Our case supports the current limited evidence that p.M74L variant in the CASR gene can cause FHH. References: 1) Genome Aggregation Database: https://gnomad.broadinstitute.org/",2021-05-03 +32512182,IRESbase: A Comprehensive Database of Experimentally Validated Internal Ribosome Entry Sites.,"Internal ribosome entry sites (IRESs) are functional RNA elements that can directly recruit ribosomes to an internal position of the mRNA in a cap-independent manner to initiate translation. Recently, IRES elements have attracted much attention for their critical roles in various processes including translation initiation of a new type of RNA, circular RNA (circRNA), with no 5' cap to support classical cap-dependent translation. Thus, an integrative data resource of IRES elements with experimental evidence will be useful for further studies. In this study, we present IRESbase, a comprehensive database of IRESs, by curating the experimentally validated functional minimal IRES elements from literature and annotating their host linear and circular RNAs. The current version of IRESbase contains 1328 IRESs, including 774 eukaryotic IRESs and 554 viral IRESs from 11 eukaryotic organisms and 198 viruses, respectively. As IRESbase collects only IRES of minimal length with functional evidence, the median length of IRESs in IRESbase is 174 nucleotides. By mapping IRESs to human circRNAs and long non-coding RNAs (lncRNAs), 2191 circRNAs and 168 lncRNAs were found to contain at least one entire or partial IRES sequence. IRESbase is available at http://reprod.njmu.edu.cn/cgi-bin/iresbase/index.php.",2020-04-01 +34339812,PharmDE: A new expert system for drug-excipient compatibility evaluation.,"Drug-excipient compatibility study is the essential basis for excipient selection at the pre-formulation stage. According to the pharmaceutical Quality by Design (QbD) principles, a comprehensive understanding of the ingredients' physicochemical properties and a theoretical evaluation of the interaction risk between the drugs and excipients are required for conducting rational compatibility experimental design. Currently, there is an urgent need to establish an artificial intelligence system for researchers to easily get through the problem because it is very inconvenient and hard to utilize those drug-excipient incompatibility data scattered in scientific literature. Here, we designed a knowledge-driven expert system named PharmDE for drug-excipient incompatibility risk evaluation. PharmDE firstly developed an information-rich database to store incompatibility data, covering 532 data items from 228 selected articles. Then, 60 drug-excipient interaction rules were created based on our knowledge and formulation research experiences. Finally, the expert system was developed by organically integrating the database searching and rule-based incompatibility risk prediction, which resulted in four main functionalities: basic search of incompatibility database, data matching by similarity search, drug incompatibility risk evaluation, and formulation incompatibility risk evaluation. PharmDE is expected to be a useful tool for drug-excipient compatibility study and accelerate drug formulation design. It is now freely available at https://pharmde.computpharm.org.",2021-07-31 +31598693,"EuRBPDB: a comprehensive resource for annotation, functional and oncological investigation of eukaryotic RNA binding proteins (RBPs).","RNA binding proteins (RBPs) are a large protein family that plays important roles at almost all levels of gene regulation through interacting with RNAs, and contributes to numerous biological processes. However, the complete list of eukaryotic RBPs including human is still unavailable. Here, we systematically identified RBPs in 162 eukaryotic species based on both computational analysis of RNA binding domains (RBDs) and large-scale RNA binding proteomic data, and established a comprehensive eukaryotic RBP database, EuRBPDB (http://EuRBPDB.syshospital.org). We identified a total of 311 571 RBPs with RBDs (corresponding to 6368 ortholog groups) and 3,651 non-canonical RBPs without known RBDs. EuRBPDB provides detailed annotations for each RBP, including basic information and functional annotation. Moreover, we systematically investigated RBPs in the context of cancer biology based on published literatures, PPI-network and large-scale omics data. To facilitate the exploration of the clinical relevance of RBPs, we additionally designed a cancer web interface to systematically and interactively display the biological features of RBPs in various types of cancers. EuRBPDB has a user-friendly web interface with browse and search functions, as well as data downloading function. We expect that EuRBPDB will be a widely-used resource and platform for both the communities of RNA biology and cancer biology.",2020-01-01 +31584087,Animal-ImputeDB: a comprehensive database with multiple animal reference panels for genotype imputation.,"Animal-ImputeDB (http://gong_lab.hzau.edu.cn/Animal_ImputeDB/) is a public database with genomic reference panels of 13 animal species for online genotype imputation, genetic variant search, and free download. Genotype imputation is a process of estimating missing genotypes in terms of the haplotypes and genotypes in a reference panel. It can effectively increase the density of single nucleotide polymorphisms (SNPs) and thus can be widely used in large-scale genome-wide association studies (GWASs) using relatively inexpensive and low-density SNP arrays. However, most animals except humans lack high-quality reference panels, which greatly limits the application of genotype imputation in animals. To overcome this limitation, we developed Animal-ImputeDB, which is dedicated to collecting genotype data and whole-genome resequencing data of nonhuman animals from various studies and databases. A computational pipeline was developed to process different types of raw data to construct reference panels. Finally, 13 high-quality reference panels including ∼400 million SNPs from 2265 samples were constructed. In Animal-ImputeDB, an easy-to-use online tool consisting of two popular imputation tools was designed for the purpose of genotype imputation. Collectively, Animal-ImputeDB serves as an important resource for animal genotype imputation and will greatly facilitate research on animal genomic selection and genetic improvement.",2020-01-01 +32294193,ctcRbase: the gene expression database of circulating tumor cells and microemboli. ,"Circulating tumor cells/microemboli (CTCs/CTMs) are malignant cells that depart from cancerous lesions and shed into the bloodstream. Analysis of CTCs can allow the investigation of tumor cell biomarker expression from a non-invasive liquid biopsy. To date, high-throughput technologies have become a powerful tool to provide a genome-wide view of transcriptomic changes associated with CTCs/CTMs. These data provided us much information to understand the tumor heterogeneity, and the underlying molecular mechanism of tumor metastases. Unfortunately, these data have been deposited into various repositories, and a uniform resource for the cancer metastasis is still unavailable. To this end, we integrated previously published transcriptome datasets of CTCs/CTMs and constructed a web-accessible database. The first release of ctcRbase contains 526 CTCs/CTM samples across seven cancer types. The expression of 14 631 mRNAs and 3642 long non-coding RNAs of CTCs/CTMs were included. Experimental validations from the published literature are also included. Since CTCs/CTMs are considered to be precursors of metastases, ctcRbase also collected the expression data of primary tumors and metastases, which allows user to discover a unique 'circulating tumor cell gene signature' that is distinct from primary tumor and metastases. An easy-to-use database was constructed to query and browse CTCs/CTMs genes. ctcRbase can be freely accessible at http://www.origin-gene.cn/database/ctcRbase/.",2020-01-01 +31566225,OGRDB: a reference database of inferred immune receptor genes.,"High-throughput sequencing of the adaptive immune receptor repertoire (AIRR-seq) is providing unprecedented insights into the immune response to disease and into the development of immune disorders. The accurate interpretation of AIRR-seq data depends on the existence of comprehensive germline gene reference sets. Current sets are known to be incomplete and unrepresentative of the degree of polymorphism and diversity in human and animal populations. A key issue is the complexity of the genomic regions in which they lie, which, because of the presence of multiple repeats, insertions and deletions, have not proved tractable with short-read whole genome sequencing. Recently, tools and methods for inferring such gene sequences from AIRR-seq datasets have become available, and a community approach has been developed for the expert review and publication of such inferences. Here, we present OGRDB, the Open Germline Receptor Database (https://ogrdb.airr-community.org), a public resource for the submission, review and publication of previously unknown receptor germline sequences together with supporting evidence.",2020-01-01 +31679497,Using historical and palaeoecological data to inform ambitious species recovery targets.,"Historical data are a valuable resource for addressing present-day conservation issues, for example by informing the establishment of appropriate recovery targets. However, while the recovery of threatened species is the end goal of many conservation programmes, data made available through the efforts of palaeoecologists and historical ecologists are rarely consulted. The proposal of a 'Green List of Species' by the International Union for Conservation of Nature (IUCN) will soon change this. The Green List of Species measures recovery against historical baselines; in particular, the method requires estimates of species range and abundance in previous centuries. In this paper, we present the case for why setting species recovery against a historical baseline is necessary to produce ambitious conservation targets, and we highlight examples from palaeoecology and historical ecology where fossil and archival data have been used to establish historical species baselines. Finally, we introduce Conservation Archive (https://conservationarchive.shinyapps.io/ConservationArchive/), a database of resources that can be used to infer baseline species conditions, and invite contributions to this database. This article is part of a discussion meeting issue 'The past is a foreign country: how much can the fossil record actually inform conservation?'",2019-11-04 +32681912,ExoceRNA atlas: A database of cancer ceRNAs in human blood exosomes.,"

Aims

Competing endogenous RNAs (ceRNAs) play essential roles in cancer pathogenesis and those in exosomes have been the promising biomarkers for cancer diagnose and therapy. We aim to identify potential active ceRNA pairs in cancer blood exosomes by combining TCGA and exoRBase.

Main methods

Two strict screening criteria were implemented, including hypergeometric test on the targets predicted by RNA22 for differential miRNAs and Pearson test on the candidate mRNAs and lncRNAs for each cancer. Then2638292, 4925485 and 70669 ceRNAs in blood exosomes are available for colorectal cancer (CRC), hepatocellular carcinoma (HCC) and pancreatic adenocarcinoma (PAAD), respectively.

Key findings

A comprehensive functional analysis on differential miRNAs in cancer blood exosomes indicates that they play important roles in development of cancer by degrading or inhibiting the post-transcription translation level of mRNA or by acting as mediators to regulate the expression of mRNA. Topological and biological functional analysis of ceRNA networks demonstrate that hub ceRNAs involve in cancer-related biological pathways and processes, so as to influence the occurrence and development of cancer and would be the potential biomarkers for three cancers. Finally, we designed a web-accessible database, ExoceRNA Atlas (https://www.exocerna-atlas.com/exoceRNA#/) as a repository of ceRNAs in blood exosomes. It can friendly search, browse and visualize ceRNA networks of the query genes along with giving the detailed functional analysis results. The entire ceRNA data can also be freely downloaded.

Significance

ExoceRNA Atlas will serve as a powerful public resource for identifying ceRNAs and greatly deepen our understanding their functions in cancer exosomes.",2020-07-15 +,A bioinformatics approach to investigating the structural and functional consequences of SNPs in TMPRSS2 for COVID‐19 infection,"SARS‐CoV‐2 is a highly infectious virus that is responsible for the COVID‐19 global pandemic that swept the world in 2020. Disease outcomes range from asymptomatic to fatal. The virus initiates entry into host cells by the binding of its spike protein to the ACE2 receptor. Entry is finalized by the activation of spike glycoprotein by proteases including transmembrane protease, serine 2 (TMPRSS2) and FURIN which cleave the spike protein of the virus. Single nucleotide polymorphisms (SNPs) in TMPRSS2 may lead to functional changes which could underlie differences in disease severity. TMPRSS2 is also known to activate different respiratory illnesses including coronaviruses and influenza A (Shen et al., 2020). Previous studies have shown that knockout TMPRSS2 mice appeared healthy, experienced a decrease in viral spread within the respiratory system, and had a less severe immune response when infected with SARS‐CoV and MERS‐CoV (Baughn et al., 2020). Thus, we asked whether genetic variations in TMPRSS2 in humans lead to differences in infection rates or severity of disease symptoms of SARS‐CoV‐2. We examined the NCBI dbSNP database to identify SNPs in the TMPRSS2 gene. As of 10 December 2020, we found there were 11,023 intron variants, 393 missense variants, 186 synonymous variants, 3 in‐frame insertion variants, 2 in‐frame deletion variants, and 1 initiator codon variant reported. To narrow these down to 23 SNPs of interest, we first searched the ClinVar database to identify SNPs with general clinical significance, followed by searching the literature to determine SNPs specifically related to SARS‐CoV‐2 severity. One missense variant, rs12329760, results in an amino acid substitution, V160M, which has been predicted to alter TMPRSS2 function. A subset of these SNPs show differences in frequency in world populations, and we wondered if these SNPs had structural and functional consequences for the protein. A crystal structure of TMPRSS2 is not currently available. To visualize the structural consequences of amino acid substitutions, we performed homology modeling on TMPRSS2 (UniProt O15393) using the structure prediction software HHPred, RaptorX, and SwissModel based on the ~30% similarity to hepsin. The predicted structures of TMPRSS2 with various amino acid substitutions were then docked to the SARS‐CoV‐2 spike protein using I‐TASSER and Haddock 2.4 to observe differences in binding interactions and therefore determine which sequence changes are predicted to alter binding interactions, potentially contributing to the wide variation of symptoms caused by COVID‐19. Baughn, L. B., Sharma, N., Elhaik, E., Sekulic, A., Bryce, A. H., & Fonseca, R. (2020). Targeting TMPRSS2 in SARS‐CoV‐2 Infection. Mayo Clinic proceedings, 95(9), 1989–1999. https://doi.org/10.1016/j.mayocp.2020.06.018 Shen, L.W.; Mao, H.J.;, Wu, Y.L.; Tanaka,Y.; Zhang,W. (2017) TMPRSS2: A potential target for treatment of influenza virus and coronavirus infections, Biochimie, 142, 1‐10. https://doi.org/10.1016/j.biochi.2017.07.016",2021-05-01 +33154365,Spatially and cell-type resolved quantitative proteomic atlas of healthy human skin.,"Human skin provides both physical integrity and immunological protection from the external environment using functionally distinct layers, cell types and extracellular matrix. Despite its central role in human health and disease, the constituent proteins of skin have not been systematically characterized. Here, we combine advanced tissue dissection methods, flow cytometry and state-of-the-art proteomics to describe a spatially-resolved quantitative proteomic atlas of human skin. We quantify 10,701 proteins as a function of their spatial location and cellular origin. The resulting protein atlas and our initial data analyses demonstrate the value of proteomics for understanding cell-type diversity within the skin. We describe the quantitative distribution of structural proteins, known and previously undescribed proteins specific to cellular subsets and those with specialized immunological functions such as cytokines and chemokines. We anticipate that this proteomic atlas of human skin will become an essential community resource for basic and translational research ( https://skin.science/ ).",2020-11-05 +34477040,A Systematic Review with Thematic Synthesis of the Experience of Hospitalization in People with Advanced Chronic Obstructive Pulmonary Disease.,"Hospital admissions are common for people with advanced chronic obstructive pulmonary disease (COPD). To provide effective, responsive care, it is important to understand how people experience hospitalization. The aim of this review was to explore the experience of hospitalization in people with advanced COPD, drawing from qualitative research data. Guided by a thematic synthesis approach, a systematic search of databases (n = 13) including PubMed, CINAHL, PsycINFO and ProQuest was undertaken from database inception to May 2020. Inclusion criteria included papers reporting qualitative research focused on any aspect of hospitalization for people with advanced COPD and reported in English language from peer reviewed journals. Following quality appraisal, relevant data were extracted, and a three-stage thematic synthesis method used to develop inductive themes. From 1935 papers, the 11 included studies focused on specific aspects of hospitalization (e.g., care and treatment), rather than the totality of the experience. Four analytical themes were identified: unpredictable hospitalization, benefits and burdens of treatment, overwhelming distress and the communicative attitude of staff. Hospitalization was unpredictable because of the frequent, sudden admissions required for acute breathlessness. Hospital could be perceived both as a safe place, due to immediate symptom relief, but also as a place for experiencing overwhelming distress. Breathlessness was the most difficult symptom experienced, causing physical and psychological distress. Both communication and attitudes of the staff could influence the experience. A holistic approach to the care of hospitalized individuals with advanced COPD is required to improve care.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.1971186 .",2021-09-03 +,Tips and Tricks for PTM Analysis,"Analysis of protein post-translational modification (PTM) is critical to the understanding of cell signaling and disease. These modifications are typically carefully regulated through enzymatic control, and aberrant PTMs are often associated with disease states, but protein PTMs cannot be studied through genomic methods, making their identification and quantification key aims of many proteomics studies. Global proteomic-scale analysis of PTMs is made challenging by their low abundance, chemical properties, and/or instability. This session will introduce several classes of PTMs, including the commonly studied phosphorylation and glycosylation as well as less frequently identified modifications such as citrullination and protein myristoylation. The chemical structure and biological significance of each PTM, current methods for enrichment of modified proteins or peptides from a complex mixture, tips for effective LCMS/MS of peptides containing the PTM, and any pitfalls to watch out for in sample preparation or analysis will be described. This session will also describe tools emerging from the NIH Common Fund Glycoscience program and explain how resource facilities can access those tools and bring them in-house. Analytical technologies to be described include: high throughput permethylation of glycopeptides for site mapping and glycan analysis, isotope-targeted glycoproteomics (www.IsoStamp.org); facile methods to ultra-purify glycans; software tools for building 3D models of glycoproteins and predicting the 3D structure of glycans (https://dev.glycam.org/); a wide range of highly versatile glycan affinity reagents, including sialoglycan-recognizing probes; and new photo-crosslinking probes for discovery of the interaction partners of O-GlcNAc modified proteins. Glycoinformatics tools and methods are being developed in a community-based effort involving 10 teams in 5 countries (http://www.glygen.org/).",2020-08-01 +,Tips and Tricks for PTM Analysis,"Analysis of protein post-translational modification (PTM) is critical to the understanding of cell signaling and disease. These modifications are typically carefully regulated through enzymatic control, and aberrant PTMs are often associated with disease states, but protein PTMs cannot be studied through genomic methods, making their identification and quantification key aims of many proteomics studies. Global proteomic-scale analysis of PTMs is made challenging by their low abundance, chemical properties, and/or instability. This session will introduce several classes of PTMs, including the commonly studied phosphorylation and glycosylation as well as less frequently identified modifications such as citrullination and protein myristoylation. The chemical structure and biological significance of each PTM, current methods for enrichment of modified proteins or peptides from a complex mixture, tips for effective LCMS/MS of peptides containing the PTM, and any pitfalls to watch out for in sample preparation or analysis will be described. This session will also describe tools emerging from the NIH Common Fund Glycoscience program and explain how resource facilities can access those tools and bring them in-house. Analytical technologies to be described include: high throughput permethylation of glycopeptides for site mapping and glycan analysis, isotope-targeted glycoproteomics (www.IsoStamp.org); facile methods to ultra-purify glycans; software tools for building 3D models of glycoproteins and predicting the 3D structure of glycans (https://dev.glycam.org/); a wide range of highly versatile glycan affinity reagents, including sialoglycan-recognizing probes; and new photo-crosslinking probes for discovery of the interaction partners of O-GlcNAc modified proteins. Glycoinformatics tools and methods are being developed in a community-based effort involving 10 teams in 5 countries (http://www.glygen.org/).",2020-08-01 +32392296,CoCoCoNet: conserved and comparative co-expression across a diverse set of species.,"Co-expression analysis has provided insight into gene function in organisms from Arabidopsis to zebrafish. Comparison across species has the potential to enrich these results, for example by prioritizing among candidate human disease genes based on their network properties or by finding alternative model systems where their co-expression is conserved. Here, we present CoCoCoNet as a tool for identifying conserved gene modules and comparing co-expression networks. CoCoCoNet is a resource for both data and methods, providing gold standard networks and sophisticated tools for on-the-fly comparative analyses across 14 species. We show how CoCoCoNet can be used in two use cases. In the first, we demonstrate deep conservation of a nucleolus gene module across very divergent organisms, and in the second, we show how the heterogeneity of autism mechanisms in humans can be broken down by functional groups and translated to model organisms. CoCoCoNet is free to use and available to all at https://milton.cshl.edu/CoCoCoNet, with data and R scripts available at ftp://milton.cshl.edu/data.",2020-07-01 +34779026,Boosting the analysis of protein interfaces with multiple interface string alignments: Illustration on the spikes of coronaviruses.,"We introduce multiple interface string alignment (MISA), a visualization tool to display coherently various sequence and structure based statistics at protein-protein interfaces (SSE elements, buried surface area, ΔASA , B factor values, etc). The amino acids supporting these annotations are obtained from Voronoi interface models. The benefit of MISA is to collate annotated sequences of (homologous) chains found in different biological contexts, that is, bound with different partners or unbound. The aggregated views MISA/SSE, MISA/BSA, MISA/ΔASA, and so forth, make it trivial to identify commonalities and differences between chains, to infer key interface residues, and to understand where conformational changes occur upon binding. As such, they should prove of key relevance for knowledge-based annotations of protein databases such as the Protein Data Bank. Illustrations are provided on the receptor binding domain of coronaviruses, in complex with their cognate partner or (neutralizing) antibodies. MISA computed with a minimal number of structures complement and enrich findings previously reported. The corresponding package is available from the Structural Bioinformatics Library (http://sbl.inria.frand https://sbl.inria.fr/doc/Multiple_interface_string_alignment-user-manual.html).",2021-12-16 +33500778,HPAStainR: a Bioconductor and Shiny app to query protein expression patterns in the Human Protein Atlas.,"The Human Protein Atlas is a website of protein expression in human tissues. It is an excellent resource of tissue and cell type protein localization, but only allows the query of a single protein at a time. We introduce HPAStainR as a new Shiny app and Bioconductor/R package used to query the scored staining patterns in the Human Protein Atlas with multiple proteins/genes of interest. This allows the user to determine if an experimentally-generated protein/gene list associates with a particular cell type. We validated the tool using the Panglao Database cell type specific marker genes and a Genotype Expression (GTEx) tissue deconvolution dataset.  HPAStainR identified 92% of the Panglao cell types in the top quartile of confidence scores limited to tissue type of origin results. It also appropriately identified the correct cell types from the GTEx dataset. HPAStainR fills a gap in available bioinformatics tools to identify cell type protein expression patterns and can assist in establishing ground truths and exploratory analysis. HPAStainR is available from: https://32tim32.shinyapps.io/HPAStainR/.",2020-10-08 +34930731,Identifying patients with psychosocial problems in general practice: a scoping review protocol.,"

Introduction

Psychosocial problems (PSPs) are common issues associated with negative health outcomes. Since general practitioners are the first point of contact for any health-related concern, understanding their options to recognise patients with PSPs plays an important role as it is essential for early intervention and can prevent serious conditions. The objective of our scoping review is to map published evidence on the usage of instruments to identify patients with PSPs in general practice.

Methods and analysis

We will follow the Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews checklist and the Joanna Briggs Institute Reviewer's Manual on scoping reviews. A systematic search of four electronic databases (Medline (Ovid), Web of Science Core Collection, PsycInfo, Cochrane Library) will be conducted for quantitative and qualitative studies published in English, Spanish, French and German. Main study characteristics as well as information on identification instruments will be extracted and visualised in structured tables to map the available evidence. The protocol has been registered with Open Science Framework, https://osfio/c2m6z.

Ethics and dissemination

This study does not require ethical approval as we will not collect personal data. Dissemination will consist of publications, presentations and other knowledge translation activities.",2021-12-20 +34179956,The DNA methylation haplotype (mHap) format and mHapTools. ,"Bisulfite sequencing (BS-seq) is currently the gold standard for measuring genome-wide DNA methylation profiles at single-nucleotide resolution. Most analyses focus on mean CpG methylation and ignore methylation states on the same DNA fragments [DNA methylation haplotypes (mHaps)]. Here, we propose mHap, a simple DNA mHap format for storing DNA BS-seq data. This format reduces the size of a BAM file by 40- to 140-fold while retaining complete read-level CpG methylation information. It is also compatible with the Tabix tool for fast and random access. We implemented a command-line tool, mHapTools, for converting BAM/SAM files from existing platforms to mHap files as well as post-processing DNA methylation data in mHap format. With this tool, we processed all publicly available human reduced representation bisulfite sequencing data and provided these data as a comprehensive mHap database. https://jiantaoshi.github.io/mHap/index.html. Supplementary data are available at Bioinformatics online.",2021-06-19 +,Automating Areas of Interest Analysis in Mobile Eye Tracking Experiments based on Machine Learning,"For an in-depth, AOI-based analysis of mobile eye tracking data, a preceding gaze assign-ment step is inevitable. Current solutions such as manual gaze mapping or marker-based approaches are tedious and not suitable for applications manipulating tangible objects. This makes mobile eye tracking studies with several hours of recording difficult to analyse quan-titatively. We introduce a new machine learning-based algorithm, the computational Gaze-Object Mapping (cGOM), that automatically maps gaze data onto respective AOIs. cGOM extends state-of-the-art object detection and segmentation by mask R-CNN with a gaze mapping feature. The new algorithm’s performance is validated against a manual fixation-by-fixation mapping, which is considered as ground truth, in terms of true positive rate (TPR), true negative rate (TNR) and efficiency. Using only 72 training images with 264 labelled object representations, cGOM is able to reach a TPR of approx. 80% and a TNR of 85% compared to the manual mapping. The break-even point is reached at 2 hours of eye tracking recording for the total procedure, respectively 1 hour considering human working time only. Together with a real-time capability of the mapping process after completed train-ing, even hours of eye tracking recording can be evaluated efficiently. (Code and video examples have been made available at: https://gitlab.ethz.ch/pdz/cgom.git)",2021-04-08 +,Soybean transporter database: A comprehensive database for identification and exploration of natural variants in soybean transporter genes,"Transporters, a class of membrane proteins that facilitate exchange of solutes including diverse molecules and ions across the cellular membrane, are vital component for the survival of all organisms. Understanding plant transporters is important to get insight of the basic cellular processes, physiology, and molecular mechanisms including nutrient uptake, signaling, response to external stress, and many more. In this regard, extensive analysis of transporters predicted in soybean and other plant species was performed. In addition, an integrated database for soybean transporter protein, SoyTD, was developed that will facilitate the identification, classification, and extensive characterization of transporter proteins by integrating expression, gene ontology, conserved domain and motifs, gene structure organization, and chromosomal distribution features. A comprehensive analysis was performed to identify highly confident transporters by integrating various prediction tools. Initially, 7541 transmembrane (TM) proteins were predicted in the soybean genome; out of these, 3306 non‐redundant transporter genes carrying two or more transmembrane domains were selected for further analysis. The identified transporter genes were classified according to a standard transporter classification (TC) system. Comparative analysis of transporter genes among 47 plant genomes provided insights into expansion and duplication of transporter genes in land plants. The whole genome resequencing (WGRS) and tissue‐specific transcriptome datasets of soybean were integrated to investigate the natural variants and expression profile associated with transporter(s) of interest. Overall, SoyTD provides a comprehensive interface to study genetic and molecular function of soybean transporters. SoyTD is publicly available at http://artemis.cyverse.org/soykb_dev/SoyTD/.",2021-04-01 +33237598,Single-cell transcriptomic analysis of small and large wounds reveals the distinct spatial organization of regenerative fibroblasts.,"Wound-induced hair follicle neogenesis (WIHN) has been an important model to study hair follicle regeneration during wound repair. However, the cellular and molecular components of the dermis that make large wounds more regenerative are not fully understood. Here, we compare and contrast recently published scRNA-seq data of small scarring wounds to wounds that regenerate in hope to elucidate the role of fibroblasts lineages in WIHN. Our analysis revealed an over-representation of the newly identified upper wound fibroblasts in regenerative wound conditions, which express the retinoic acid binding protein Crabp1. This regenerative cell type shares a similar gene signature to the murine papillary fibroblast lineage, which are necessary to support hair follicle morphogenesis and homeostasis. RNA velocity analysis comparing scarring and regenerating wounds revealed the divergent trajectories towards upper and lower wound fibroblasts and that the upper populations were closely associated with the specialized dermal papilla. We also provide analyses and explanation reconciling the inconsistency between the histological lineage tracing and the scRNA-seq data from recent reports investigating large wounds. Finally, we performed a computational test to map the spatial location of upper wound fibroblasts in large wounds which revealed that upper peripheral fibroblasts might harbour equivalent regenerative competence as those in the centre. Overall, our scRNA-seq reanalysis combining multiple samples suggests that upper wound fibroblasts are required for hair follicle regeneration and that papillary fibroblasts may migrate from the wound periphery to the centre during wound re-epithelialization. Moreover, data from this publication are made available on our searchable web resource: https://skinregeneration.org/.",2020-12-07 +33182422,Enhancements and Challenges in CoAP-A Survey. ,"The Internet of Engineering Task (IETF) developed a lighter application protocol (Constrained Application Protocol (CoAP)) for the constrained IoT devices operating in lossy environments. Based on UDP, CoAP is a lightweight and efficient protocol compared to other IoT protocols such as HTTP, MQTT, etc. CoAP also provides reliable communication among nodes in wireless sensor networks in addition to features such as resource observation, resource discovery, congestion control, etc. These capabilities of CoAP have enabled the implementation of CoAP in various domains ranging from home automation to health management systems. The use of CoAP has highlighted its shortcomings over the time. To overcome shortcomings of CoAP, numerous enhancements have been made in basic CoAP architecture. This survey highlights the shortcomings of basic CoAP architecture and enhancements made in it throughout the time. Furthermore, existing challenges and issue in the current CoAP architecture are also discussed. Finally, some applications with CoAP implementation are mentioned in order to realize the viability of CoAP in real world use cases.",2020-11-09 +32780488,CropPAL for discovering divergence in protein subcellular location in crops to support strategies for molecular crop breeding.,"Agriculture faces increasing demand for yield, higher plant-derived protein content and diversity while facing pressure to achieve sustainability. Although the genomes of many of the important crops have been sequenced, the subcellular locations of most of the encoded proteins remain unknown or are only predicted. Protein subcellular location is crucial in determining protein function and accumulation patterns in plants, and is critical for targeted improvements in yield and resilience. Integrating location data from over 800 studies for 12 major crop species into the cropPAL2020 data collection showed that while >80% of proteins in most species are not localised by experimental data, combining species data or integrating predictions can help bridge gaps at similar accuracy. The collation and integration of over 61 505 experimental localisations and more than 6 million predictions showed that the relative sizes of the protein catalogues located in different subcellular compartments are comparable between crops and Arabidopsis. A comprehensive cross-species comparison showed that between 50% and 80% of the subcellulomes are conserved across species and that conservation only depends to some degree on the phylogenetic relationship of the species. Protein subcellular locations in major biosynthesis pathways are more often conserved than in metabolic pathways. Underlying this conservation is a clear potential for subcellular diversity in protein location between species by means of gene duplication and alternative splicing. Our cropPAL data set and search platform (https://crop-pal.org) provide a comprehensive subcellular proteomics resource to drive compartmentation-based approaches for improving yield, protein composition and resilience in future crop varieties.",2020-09-16 +33057676,IDseq-An open source cloud-based pipeline and analysis service for metagenomic pathogen detection and monitoring. ,"Metagenomic next-generation sequencing (mNGS) has enabled the rapid, unbiased detection and identification of microbes without pathogen-specific reagents, culturing, or a priori knowledge of the microbial landscape. mNGS data analysis requires a series of computationally intensive processing steps to accurately determine the microbial composition of a sample. Existing mNGS data analysis tools typically require bioinformatics expertise and access to local server-class hardware resources. For many research laboratories, this presents an obstacle, especially in resource-limited environments. We present IDseq, an open source cloud-based metagenomics pipeline and service for global pathogen detection and monitoring (https://idseq.net). The IDseq Portal accepts raw mNGS data, performs host and quality filtration steps, then executes an assembly-based alignment pipeline, which results in the assignment of reads and contigs to taxonomic categories. The taxonomic relative abundances are reported and visualized in an easy-to-use web application to facilitate data interpretation and hypothesis generation. Furthermore, IDseq supports environmental background model generation and automatic internal spike-in control recognition, providing statistics that are critical for data interpretation. IDseq was designed with the specific intent of detecting novel pathogens. Here, we benchmark novel virus detection capability using both synthetically evolved viral sequences and real-world samples, including IDseq analysis of a nasopharyngeal swab sample acquired and processed locally in Cambodia from a tourist from Wuhan, China, infected with the recently emergent SARS-CoV-2. The IDseq Portal reduces the barrier to entry for mNGS data analysis and enables bench scientists, clinicians, and bioinformaticians to gain insight from mNGS datasets for both known and novel pathogens.",2020-10-01 +33136065,HPREP: a comprehensive database for human proteome repeats. ,"Amino acid repeats are found to play important roles in both structures and functions of the proteins. These are commonly found in all kingdoms of life, especially in eukaryotes and a larger fraction of human proteins composed of repeats. Further, the abnormal expansions of shorter repeats cause various diseases to humans. Therefore, the analysis of repeats of the entire human proteome along with functional, mutational and disease information would help to better understand their roles in proteins. To fulfill this need, we developed a web database HPREP (http://bioinfo.bdu.ac.in/hprep) for human proteome repeats using Perl and HTML programming. We identified different categories of well-characterized repeats and domain repeats that are present in the human proteome of UniProtKB/Swiss-Prot by using in-house Perl programming and novel repeats by using the repeat detection T-REKS tool as well as XSTREAM web server. Further, these proteins are annotated with functional, mutational and disease information and grouped according to specific repeat types. The developed database enables the users to search by specific repeat type in order to understand their involvement in proteins. Thus, the HPREP database is expected to be a useful resource to gain better insight regarding the different repeats in human proteome and their biological roles.",2020-11-03 +34491747,Searching Geometric Patterns in Protein Binding Sites and Their Application to Data Mining in Protein Kinase Structures.,"The ever-growing number of protein-ligand complex structures can give fundamental insights into protein functions and protein-ligand interactions, especially in the field of protein kinase research. The number of tools to mine this data for individually defined structural motifs is restricted due to the challenging task of developing efficient index structures for 3D data in relational databases. Herein we present GeoMine, a database system with web front-end mining of more than 900 000 binding sites. It enables database searches for geometric (interaction) patterns in protein-ligand interfaces by, for example, textual, numerical, substructure, similarity, and 3D searches. GeoMine processes reasonably selective user-defined queries within minutes. We demonstrate its usability for advancing protein kinase research with a special emphasis on unusual interactions, their use in designing selective kinase inhibitors, and the analysis of reactive cysteine residues that are amenable to covalent kinase inhibitors. GeoMine is freely available as part of our modeling support server at https://proteins.plus.",2021-09-07 +34268481,PubChem Periodic Table and Element Pages: Improving Access to Information on Chemical Elements from Authoritative Sources.,"PubChem (https://pubchem.ncbi.nlm.nih.gov) is one of the top five most visited chemistry web sites in the world, with more than five million unique users per month (as of March 2020). Many of these users are educators, undergraduate students, and graduate students at academic institutions. Therefore, PubChem has a great potential as an online resource for chemical education. This paper describes the PubChem Periodic Table and Element pages, which were recently introduced to celebrate the 150th anniversary of the periodic table. These services help users navigate the abundant chemical element data available within PubChem, while providing a convenient entry point to explore additional chemical content, such as biological activities and health and safety data available in PubChem Compound pages for specific elements and their isotopes. The PubChem Periodic Table and Element pages are also available as widgets, which enable web developers to display PubChem's element data on web pages they design. The elemental data can be downloaded in common file formats and imported into data analysis programs (e.g., spreadsheet software, like Microsoft Excel and Google Sheets, and computer scripts, such as python and R). Overall, the PubChem Periodic Table and Element pages improve access to chemical element data from authoritative sources.",2020-07-13 +34019655,KEA3: improved kinase enrichment analysis via data integration.,"Phosphoproteomics and proteomics experiments capture a global snapshot of the cellular signaling network, but these methods do not directly measure kinase state. Kinase Enrichment Analysis 3 (KEA3) is a webserver application that infers overrepresentation of upstream kinases whose putative substrates are in a user-inputted list of proteins. KEA3 can be applied to analyze data from phosphoproteomics and proteomics studies to predict the upstream kinases responsible for observed differential phosphorylations. The KEA3 background database contains measured and predicted kinase-substrate interactions (KSI), kinase-protein interactions (KPI), and interactions supported by co-expression and co-occurrence data. To benchmark the performance of KEA3, we examined whether KEA3 can predict the perturbed kinase from single-kinase perturbation followed by gene expression experiments, and phosphoproteomics data collected from kinase-targeting small molecules. We show that integrating KSIs and KPIs across data sources to produce a composite ranking improves the recovery of the expected kinase. The KEA3 webserver is available at https://maayanlab.cloud/kea3.",2021-07-01 +34697637,OBO Foundry in 2021: operationalizing open data principles to evaluate ontologies. ,"Biological ontologies are used to organize, curate and interpret the vast quantities of data arising from biological experiments. While this works well when using a single ontology, integrating multiple ontologies can be problematic, as they are developed independently, which can lead to incompatibilities. The Open Biological and Biomedical Ontologies (OBO) Foundry was created to address this by facilitating the development, harmonization, application and sharing of ontologies, guided by a set of overarching principles. One challenge in reaching these goals was that the OBO principles were not originally encoded in a precise fashion, and interpretation was subjective. Here, we show how we have addressed this by formally encoding the OBO principles as operational rules and implementing a suite of automated validation checks and a dashboard for objectively evaluating each ontology's compliance with each principle. This entailed a substantial effort to curate metadata across all ontologies and to coordinate with individual stakeholders. We have applied these checks across the full OBO suite of ontologies, revealing areas where individual ontologies require changes to conform to our principles. Our work demonstrates how a sizable, federated community can be organized and evaluated on objective criteria that help improve overall quality and interoperability, which is vital for the sustenance of the OBO project and towards the overall goals of making data Findable, Accessible, Interoperable, and Reusable (FAIR). Database URL http://obofoundry.org/.",2021-10-01 +34178036,RHIVDB: A Freely Accessible Database of HIV Amino Acid Sequences and Clinical Data of Infected Patients.,"Human immunodeficiency virus (HIV) infection remains one of the most severe problems for humanity, particularly due to the development of HIV resistance. To evaluate an association between viral sequence data and drug combinations and to estimate an effect of a particular drug combination on the treatment results, collection of the most representative drug combinations used to cure HIV and the biological data on amino acid sequences of HIV proteins is essential. We have created a new, freely available web database containing 1,651 amino acid sequences of HIV structural proteins [reverse transcriptase (RT), protease (PR), integrase (IN), and envelope protein (ENV)], treatment history information, and CD4+ cell count and viral load data available by the user's query. Additionally, the biological data on new HIV sequences and treatment data can be stored in the database by any user followed by an expert's verification. The database is available on the web at http://www.way2drug.com/rhivdb.",2021-06-10 +34156291,Insights into the Cultured Bacterial Fraction of Corals.,"Bacteria associated with coral hosts are diverse and abundant, with recent studies suggesting involvement of these symbionts in host resilience to anthropogenic stress. Despite their putative importance, the work dedicated to culturing coral-associated bacteria has received little attention. Combining published and unpublished data, here we report a comprehensive overview of the diversity and function of culturable bacteria isolated from corals originating from tropical, temperate, and cold-water habitats. A total of 3,055 isolates from 52 studies were considered by our metasurvey. Of these, 1,045 had full-length 16S rRNA gene sequences, spanning 138 formally described and 12 putatively novel bacterial genera across the Proteobacteria, Firmicutes, Bacteroidetes, and Actinobacteria phyla. We performed comparative genomic analysis using the available genomes of 74 strains and identified potential signatures of beneficial bacterium-coral symbioses among the strains. Our analysis revealed >400 biosynthetic gene clusters that underlie the biosynthesis of antioxidant, antimicrobial, cytotoxic, and other secondary metabolites. Moreover, we uncovered genomic features-not previously described for coral-bacterium symbioses-potentially involved in host colonization and host-symbiont recognition, antiviral defense mechanisms, and/or integrated metabolic interactions, which we suggest as novel targets for the screening of coral probiotics. Our results highlight the importance of bacterial cultures to elucidate coral holobiont functioning and guide the selection of probiotic candidates to promote coral resilience and improve holistic and customized reef restoration and rehabilitation efforts. IMPORTANCE Our paper is the first study to synthesize currently available but decentralized data of cultured microbes associated with corals. We were able to collate 3,055 isolates across a number of published studies and unpublished collections from various laboratories and researchers around the world. This equated to 1,045 individual isolates which had full-length 16S rRNA gene sequences, after filtering of the original 3,055. We also explored which of these had genomes available. Originally, only 36 were available, and as part of this study, we added a further 38-equating to 74 in total. From this, we investigated potential genetic signatures that may facilitate a host-associated lifestyle. Further, such a resource is an important step in the selection of probiotic candidates, which are being investigated for promoting coral resilience and potentially applied as a novel strategy in reef restoration and rehabilitation efforts. In the spirit of open access, we have ensured this collection is available to the wider research community through the web site http://isolates.reefgenomics.org/ with the hope many scientists across the globe will ask for access to these cultures for future studies.",2021-06-22 +32451429,"MyomirDB: A unified database and server platform for muscle atrophy myomiRs, coregulatory networks and regulons.","Muscular atrophy or muscle loss is a multifactorial clinical condition during many critical illnesses like cancer, cardiovascular diseases, diabetes, pulmonary diseases etc. leading to fatigue and weakness and contributes towards a decreased quality of life. The proportion of older adults (>65 y) in the overall population is also growing and aging is another important factor causing muscle loss. Some muscle miRNAs (myomiRs) and their target genes have even been proposed as potential diagnostic, therapeutic and predictive markers for muscular atrophy. MyomirDB (http://www.myomirdb.in/) is a unique resource that provides a comprehensive, curated, user- friendly and detailed compilation of various miRNA bio-molecular interactions; miRNA-Transcription Factor-Target Gene co-regulatory networks and ~8000 tripartite regulons associated with 247 myomiRs which have been experimentally validated to be associated with various muscular atrophy conditions. For each database entry, MyomirDB compiles source organism, muscle atrophic condition, experiment duration, its level of expression, fold change, tissue of expression, experimental validation, disease and drug association, tissue-specific expression level, Gene Ontology and KEGG pathway associations. The web resource is a unique server platform which uses in-house scripts to construct miRNA-Transcription Factor-Target Gene co-regulatory networks and extract tri-partite regulons also called Feed Forward Loops. These unique features helps to offer mechanistic insights in disease pathology. Hence, MyomirDB is a unique platform for researchers working in this area to explore, fetch, compare and analyse atrophy associated miRNAs, their co-regulatory networks and FFL regulons.",2020-05-25 +32603341,A simple strategy to enhance the speed of protein secondary structure prediction without sacrificing accuracy.,"The secondary structure prediction of proteins is a classic topic of computational structural biology with a variety of applications. During the past decade, the accuracy of prediction achieved by state-of-the-art algorithms has been >80%; meanwhile, the time cost of prediction increased rapidly because of the exponential growth of fundamental protein sequence data. Based on literature studies and preliminary observations on the relationships between the size/homology of the fundamental protein dataset and the speed/accuracy of predictions, we raised two hypotheses that might be helpful to determine the main influence factors of the efficiency of secondary structure prediction. Experimental results of size and homology reductions of the fundamental protein dataset supported those hypotheses. They revealed that shrinking the size of the dataset could substantially cut down the time cost of prediction with a slight decrease of accuracy, which could be increased on the contrary by homology reduction of the dataset. Moreover, the Shannon information entropy could be applied to explain how accuracy was influenced by the size and homology of the dataset. Based on these findings, we proposed that a proper combination of size and homology reductions of the protein dataset could speed up the secondary structure prediction while preserving the high accuracy of state-of-the-art algorithms. Testing the proposed strategy with the fundamental protein dataset of the year 2018 provided by the Universal Protein Resource, the speed of prediction was enhanced over 20 folds while all accuracy measures remained equivalently high. These findings are supposed helpful for improving the efficiency of researches and applications depending on the secondary structure prediction of proteins. To make future implementations of the proposed strategy easy, we have established a database of size and homology reduced protein datasets at http://10.life.nctu.edu.tw/UniRefNR.",2020-06-30 +33306787,MetaADEDB 2.0: a comprehensive database on adverse drug events.,"

Summary

MetaADEDB is an online database we developed to integrate comprehensive information on adverse drug events (ADEs). The first version of MetaADEDB was released in 2013 and has been widely used by researchers. However, it has not been updated for more than seven years. Here, we reported its second version by collecting more and newer data from the U.S. FDA Adverse Event Reporting System (FAERS) and Canada Vigilance Adverse Reaction Online Database, in addition to the original three sources. The new version consists of 744 709 drug-ADE associations between 8498 drugs and 13 193 ADEs, which has an over 40% increase in drug-ADE associations compared to the previous version. Meanwhile, we developed a new and user-friendly web interface for data search and analysis. We hope that MetaADEDB 2.0 could provide a useful tool for drug safety assessment and related studies in drug discovery and development.

Availability and implementation

The database is freely available at: http://lmmd.ecust.edu.cn/metaadedb/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-08-01 +34955275,Signatures of selection reveal candidate genes involved in production traits in Chinese crossbred buffaloes.,"Identification of selection signature is important for a better understanding of genetic mechanisms that affect phenotypic differentiation in livestock. However, the genome-wide selection responses have not been investigated for the production traits of Chinese crossbred buffaloes. In this study, an SNP data set of 133 buffaloes (Chinese crossbred buffalo, n = 45; Chinese local swamp buffalo, n = 88) was collected from the Dryad Digital Repository database (https://datadryad.org/stash/). Population genetics analysis showed that these buffaloes were divided into the following 2 groups: crossbred buffalo and swamp buffalo. The crossbred group had higher genetic diversity than the swamp group. Using 3 complementary statistical methods (integrated haplotype score, cross population extended haplotype homozygosity, and composite likelihood ratio), a total of 31 candidate selection regions were identified in the Chinese crossbred population. Here, within these candidate regions, 25 genes were under the putative selection. Among them, several candidate genes were reported to be associated with production traits. In addition, we identified 13 selection regions that overlapped with bovine QTLs that were mainly involved in milk production and composition traits. These results can provide useful insights regarding the selection response for production traits of Chinese crossbred buffalo, as identified candidate genes influence production performance.",2021-12-23 +31161204,PRISMOID: a comprehensive 3D structure database for post-translational modifications and mutations with functional impact.,"Post-translational modifications (PTMs) play very important roles in various cell signaling pathways and biological process. Due to PTMs' extremely important roles, many major PTMs have been studied, while the functional and mechanical characterization of major PTMs is well documented in several databases. However, most currently available databases mainly focus on protein sequences, while the real 3D structures of PTMs have been largely ignored. Therefore, studies of PTMs 3D structural signatures have been severely limited by the deficiency of the data. Here, we develop PRISMOID, a novel publicly available and free 3D structure database for a wide range of PTMs. PRISMOID represents an up-to-date and interactive online knowledge base with specific focus on 3D structural contexts of PTMs sites and mutations that occur on PTMs and in the close proximity of PTM sites with functional impact. The first version of PRISMOID encompasses 17 145 non-redundant modification sites on 3919 related protein 3D structure entries pertaining to 37 different types of PTMs. Our entry web page is organized in a comprehensive manner, including detailed PTM annotation on the 3D structure and biological information in terms of mutations affecting PTMs, secondary structure features and per-residue solvent accessibility features of PTM sites, domain context, predicted natively disordered regions and sequence alignments. In addition, high-definition JavaScript packages are employed to enhance information visualization in PRISMOID. PRISMOID equips a variety of interactive and customizable search options and data browsing functions; these capabilities allow users to access data via keyword, ID and advanced options combination search in an efficient and user-friendly way. A download page is also provided to enable users to download the SQL file, computational structural features and PTM sites' data. We anticipate PRISMOID will swiftly become an invaluable online resource, assisting both biologists and bioinformaticians to conduct experiments and develop applications supporting discovery efforts in the sequence-structural-functional relationship of PTMs and providing important insight into mutations and PTM sites interaction mechanisms. The PRISMOID database is freely accessible at http://prismoid.erc.monash.edu/. The database and web interface are implemented in MySQL, JSP, JavaScript and HTML with all major browsers supported.",2020-05-01 +34756630,Assessment of YouTube videos as an information resource for keratoconus patients.,"

Purpose

To assess the quality of YouTube™ videos on keratoconus as an information resource for patients.

Methods

The words ""Keratoconus"", ""Crosslinking"", and ""Keratoconus treatment for patients"" were used as search terms on the official YouTube™ website (http://www.youtube.com). All cookies were disabled, the search history of the web browser was deleted just prior to the search, and no additional changes were made to the standard YouTube™ search preferences. The first 180 videos were downloaded. A total of 116 videos were evaluated according to DISCERN score, Journal of the American Medical Association score, and Global Quality Score. Videos were also assessed based on the presence or absence of 10 criteria proposed by two ophthalmologists.

Results

One hundred and sixteen videos were analyzed after the exclusion of 54 videos that were either duplicates, irrelevant, or in languages other than English. The mean DISCERN, Journal of the American Medical Association and Global Quality scores were 43.25±11.52, 1.30±0.90, 3.05±0.96 respectively. Fifty-six (48.3%) videos had been uploaded by physicians, 23 (19.8%) by healthcare organizations, 23 (19.8%) by patients, and 14 (12.1%) by other entities such as independent organizations and YouTube™ health channels. The most commonly satisfied criterion was information on treatment modalities (82.7%).

Conclusion

Keratoconus is a disease of youth, therefore YouTube™ videos would seem to be an easily accessible, informative and educational source, especially for younger patients. However, these videos are not useful as information resources and overall do not offer any additional benefit to patients.",2021-10-29 +35070450,A Review of Doses for Dental Imaging in 2010-2020 and Development of a Web Dose Calculator.,"Dental imaging is one of the most common types of diagnostic radiological procedures in modern medicine. We introduce a comprehensive table of organ doses received by patients in dental imaging procedures extracted from literature and a new web application to visualize the summarized dose information. We analyzed articles, published after 2010, from PubMed on organ and effective doses delivered by dental imaging procedures, including intraoral radiography, panoramic radiography, and cone-beam computed tomography (CBCT), and summarized doses by dosimetry method, machine model, patient age, and technical parameters. Mean effective doses delivered by intraoral, 1.32 (0.60-2.56) μSv, and panoramic, 17.93 (3.47-75.00) μSv, procedures were found to be about1% and 15% of that delivered by CBCT, 121.09 (17.10-392.20) μSv, respectively. In CBCT imaging, child phantoms received about 29% more effective dose than the adult phantoms received. The effective dose of a large field of view (FOV) (>150 cm2) was about 1.6 times greater than that of a small FOV (<50 cm2). The maximum CBCT effective dose with a large FOV for children, 392.2 μSv, was about 13% of theeffective dose that a person receives on average every year from natural radiation, 3110 μSv. Monte Carlo simulations of representative cases of the three dental imaging procedures were then conducted to estimate and visualize the dose distribution within the head. The user-friendly interactive web application (available at http://dentaldose.org) receives user input, such as the number of intraoral radiographs taken, and displays total organ and effective doses, dose distribution maps, and a comparison with other medical and natural sources of radiation. The web dose calculator provides a practical resource for patients interested in understanding the radiation doses delivered by dental imaging procedures.",2021-12-10 +34846641,HODD: A Manually Curated Database of Human Ophthalmic Diseases with Symptom Characteristics and Genetic Variants Towards Facilitating Quick and Definite Diagnosis.,"Ophthalmic diseases are disorders that affect the eyes. Hundreds of causal genes and biological pathways have been reported to be closely correlated with ophthalmic diseases. However, these information are scattered across various resources, which has hindered a thorough and deep understanding of ophthalmic diseases. In the present work, we proposed the Human Ophthalmic Diseases Database (HODD), which currently deposits 730 ophthalmic diseases and 653 related genes and is available at http://bio-bigdata.cn/HODD/ . The disease-related information and genes related to ophthalmic diseases were collected from the several well-known databases. To comprehensively understand the ophthalmic diseases, the basic information was provided for each disease, including disease description, related genes, gene location, ocular and extraocular effect of the disease, protein-protein interaction and disease-associated pathways. All these data were reorganized and made accessible through multiple entrances. We hope that HODD will facilitate studies on ophthalmic diseases. The workflow for the construction of the HODD (Human Ophthalmic Diseases Database, http://bio-bigdata.cn/HODD/ ) database.",2021-11-30 +,SAR data for tropical forest disturbance alerts in French Guiana: Benefit over optical imagery,"French Guiana forests cover 8 million hectares. With 98% of emerged land covered by forests, French Guiana is the area with the highest proportion of forest cover in the world. These forests are home to an exceptionally rich and diverse wealth of biodiversity that is both vulnerable and under threat due to high levels of pressure from human activity. As part of the French territory, French Guiana benefits from determined and continuous national efforts in the preservation of biodiversity and the environmental functionalities of ecosystems. The loss and fragmentation of forest cover caused by gold mining (legal and illegal), smallholder agriculture and forest exploitation, are considered as small-scale disturbances, although representing strong effects to vulnerable natural habitats, landscapes, and local populations. To monitor forest management programs and combat illegal deforestation and forest opening near-real time alerts system based on remote sensing data are required. For this large territory under frequent cloud cover, Synthetic-Aperture Radar (SAR) data appear to be the best adapted. In this paper, a method for forest alerts in a near-real time context based on Sentinel-1 data over the whole of French Guiana (83,534 km2) was developed and evaluated. The assessment was conducted for 2 years between 2016 and 2018 and includes comparisons with reference data provided by French Guiana forest organizations and comparisons with the existing University of Maryland Global Land Analysis and Discovery Forest Alerts datasets based on Landsat data. The reference datasets include 1,867 plots covering 2,124.5 ha of gold mining, smallholder agriculture and forest exploitation. The validation results showed high user accuracies (96.2%) and producer accuracies (81.5%) for forest loss detection, with the latter much higher than for optical forest alerts (36.4%). The forest alerts maps were also compared in terms of detection timing, showing systematic temporal delays of up to one year in the optical method compared to the SAR method. These results highlight the benefits of SAR over optical imagery for forest alerts detection in French Guiana. Finally, the potential of the SAR method applied to tropical forests is discussed. The SAR-based map of this study is available on http://cesbiomass.net/.",2021-01-01 +32090261,NipahVR: a resource of multi-targeted putative therapeutics and epitopes for the Nipah virus. ,"Nipah virus (NiV) is an emerging and priority pathogen from the Paramyxoviridae family with a high fatality rate. It causes various diseases such as respiratory ailments and encephalitis and poses a great threat to humans and livestock. Despite various efforts, there is no approved antiviral treatment available. Therefore, to expedite and assist the research, we have developed an integrative resource NipahVR (http://bioinfo.imtech.res.in/manojk/nipahvr/) for the multi-targeted putative therapeutics and epitopes for NiV. It is structured into different sections, i.e. genomes, codon usage, phylogenomics, molecular diagnostic primers, therapeutics (siRNAs, sgRNAs, miRNAs) and vaccine epitopes (B-cell, CTL, MHC-I and -II binders). Most decisively, potentially efficient therapeutic regimens targeting different NiV proteins and genes were anticipated and projected. We hope this computational resource would be helpful in developing combating strategies against this deadly pathogen. Database URL: http://bioinfo.imtech.res.in/manojk/nipahvr/.",2020-01-01 +30365034,Database Resources of the BIG Data Center in 2019.,"The BIG Data Center at Beijing Institute of Genomics (BIG) of the Chinese Academy of Sciences provides a suite of database resources in support of worldwide research activities in both academia and industry. With the vast amounts of multi-omics data generated at unprecedented scales and rates, the BIG Data Center is continually expanding, updating and enriching its core database resources through big data integration and value-added curation. Resources with significant updates in the past year include BioProject (a biological project library), BioSample (a biological sample library), Genome Sequence Archive (GSA, a data repository for archiving raw sequence reads), Genome Warehouse (GWH, a centralized resource housing genome-scale data), Genome Variation Map (GVM, a public repository of genome variations), Science Wikis (a catalog of biological knowledge wikis for community annotations) and IC4R (Information Commons for Rice). Newly released resources include EWAS Atlas (a knowledgebase of epigenome-wide association studies), iDog (an integrated omics data resource for dog) and RNA editing resources (for editome-disease associations and plant RNA editosome, respectively). To promote biodiversity and health big data sharing around the world, the Open Biodiversity and Health Big Data (BHBD) initiative is introduced. All of these resources are publicly accessible at http://bigd.big.ac.cn.",2019-01-01 +32938368,Large-scale prediction and analysis of protein sub-mitochondrial localization with DeepMito.,"

Background

The prediction of protein subcellular localization is a key step of the big effort towards protein functional annotation. Many computational methods exist to identify high-level protein subcellular compartments such as nucleus, cytoplasm or organelles. However, many organelles, like mitochondria, have their own internal compartmentalization. Knowing the precise location of a protein inside mitochondria is crucial for its accurate functional characterization. We recently developed DeepMito, a new method based on a 1-Dimensional Convolutional Neural Network (1D-CNN) architecture outperforming other similar approaches available in literature.

Results

Here, we explore the adoption of DeepMito for the large-scale annotation of four sub-mitochondrial localizations on mitochondrial proteomes of five different species, including human, mouse, fly, yeast and Arabidopsis thaliana. A significant fraction of the proteins from these organisms lacked experimental information about sub-mitochondrial localization. We adopted DeepMito to fill the gap, providing complete characterization of protein localization at sub-mitochondrial level for each protein of the five proteomes. Moreover, we identified novel mitochondrial proteins fishing on the set of proteins lacking any subcellular localization annotation using available state-of-the-art subcellular localization predictors. We finally performed additional functional characterization of proteins predicted by DeepMito as localized into the four different sub-mitochondrial compartments using both available experimental and predicted GO terms. All data generated in this study were collected into a database called DeepMitoDB (available at http://busca.biocomp.unibo.it/deepmitodb ), providing complete functional characterization of 4307 mitochondrial proteins from the five species.

Conclusions

DeepMitoDB offers a comprehensive view of mitochondrial proteins, including experimental and predicted fine-grain sub-cellular localization and annotated and predicted functional annotations. The database complements other similar resources providing characterization of new proteins. Furthermore, it is also unique in including localization information at the sub-mitochondrial level. For this reason, we believe that DeepMitoDB can be a valuable resource for mitochondrial research.",2020-09-16 +32924233,Evaluation of an educational video providing key messages for doctors to counsel families following a first afebrile seizure.,"

Aim

The aim was to evaluate an educational video in educating doctors on the key messages and follow-up pathways following a first afebrile seizure presentation. A multidisciplinary expert team developed the video (http://www.pennsw.org.au/families/resources/first-seizure-pack-and-video) based on available evidence and best-practice. It contains a role-play between the parent/child and physician. It addresses: key messages to impart following a first seizure, seizure first aid, safety messages including necessary precautions post-discharge, contents of the First Seizure Pack for families, follow-up pathway and issues for discussion with the paediatrician at a later appointment.

Methods

Paediatric/Emergency department (ED) trainees across three Australian sites were recruited during terms 1 and 2, 2019. A repeated measures design was used. Multilevel modelling analyses were performed. The primary outcome was clinician knowledge. Secondary outcomes were confidence in answering questions and counselling families. Qualitative data on the utility, strengths and weaknesses of the video were evaluated.

Results

A total of 127 participants consented, one withdrew prior to commencing. A total of 126 baseline surveys, 115 follow-up surveys and 45 1-month follow-up surveys were returned. Viewing the video significantly improved knowledge of key messages at immediate follow-up (P < 0.001) and 1-month follow-up (P = 0.048). Likewise, confidence was significantly improved; 96.5% of responders found the video useful, 90.3% were likely to use the resource in the future and 82% would change their approach to counselling. Most liked aspects of the resource were clarity/conciseness of the information (n = 70) and comprehensiveness (n = 38).

Conclusion

This education video significantly improved clinician knowledge and confidence in counselling families following first seizure.",2020-09-13 +32140729,GXD's RNA-Seq and Microarray Experiment Search: using curated metadata to reliably find mouse expression studies of interest. ,"The Gene Expression Database (GXD), an extensive community resource of curated expression information for the mouse, has developed an RNA-Seq and Microarray Experiment Search (http://www.informatics.jax.org/gxd/htexp_index). This tool allows users to quickly and reliably find specific experiments in ArrayExpress and the Gene Expression Omnibus (GEO) that study endogenous gene expression in wild-type and mutant mice. Standardized metadata annotations, curated by GXD, allow users to specify the anatomical structure, developmental stage, mutated gene, strain and sex of samples of interest, as well as the study type and key parameters of the experiment. These searches, powered by controlled vocabularies and ontologies, can be combined with free text searching of experiment titles and descriptions. Search result summaries include link-outs to ArrayExpress and GEO, providing easy access to the expression data itself. Links to the PubMed entries for accompanying publications are also included. More information about this tool and GXD can be found at the GXD home page (http://www.informatics.jax.org/expression.shtml). Database URL: http://www.informatics.jax.org/expression.shtml.",2020-01-01 +34759910,Comparison of CRISPR-Cas Immune Systems in Healthcare-Related Pathogens.,"The ESKAPE pathogens (Enterococcus faecium, Staphylococcus aureus, Klebsiella pneumoniae, Acinetobacter baumannii, Pseudomonas aeruginosa, and Enterobacter species) and Clostridium difficile have been identified as the leading global cause of multidrug-resistant bacterial infections in hospitals. CRISPR-Cas systems are bacterial immune systems, empowering the bacteria with defense against invasive mobile genetic elements that may carry the antimicrobial resistance (AMR) genes, among others. On the other hand, the CRISPR-Cas systems are themselves mobile. In this study, we annotated and compared the CRISPR-Cas systems in these pathogens, utilizing their publicly available large numbers of sequenced genomes (e.g., there are more than 12 thousands of S. aureus genomes). The presence of CRISPR-Cas systems showed a very broad spectrum in these pathogens: S. aureus has the least tendency of obtaining the CRISPR-Cas systems with only 0.55% of its isolates containing CRISPR-Cas systems, whereas isolates of C. difficile we analyzed have CRISPR-Cas systems each having multiple CRISPRs. Statistical tests show that CRISPR-Cas containing isolates tend to have more AMRs for four of the pathogens (A. baumannii, E. faecium, P. aeruginosa, and S. aureus). We made available all the annotated CRISPR-Cas systems in these pathogens with visualization at a website (https://omics.informatics.indiana.edu/CRISPRone/pathogen), which we believe will be an important resource for studying the pathogens and their arms-race with invaders mediated through the CRISPR-Cas systems, and for developing potential clinical applications of the CRISPR-Cas systems for battles against the antibiotic resistant pathogens.",2021-10-25 +31665441,CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database.,"The Comprehensive Antibiotic Resistance Database (CARD; https://card.mcmaster.ca) is a curated resource providing reference DNA and protein sequences, detection models and bioinformatics tools on the molecular basis of bacterial antimicrobial resistance (AMR). CARD focuses on providing high-quality reference data and molecular sequences within a controlled vocabulary, the Antibiotic Resistance Ontology (ARO), designed by the CARD biocuration team to integrate with software development efforts for resistome analysis and prediction, such as CARD's Resistance Gene Identifier (RGI) software. Since 2017, CARD has expanded through extensive curation of reference sequences, revision of the ontological structure, curation of over 500 new AMR detection models, development of a new classification paradigm and expansion of analytical tools. Most notably, a new Resistomes & Variants module provides analysis and statistical summary of in silico predicted resistance variants from 82 pathogens and over 100 000 genomes. By adding these resistance variants to CARD, we are able to summarize predicted resistance using the information included in CARD, identify trends in AMR mobility and determine previously undescribed and novel resistance variants. Here, we describe updates and recent expansions to CARD and its biocuration process, including new resources for community biocuration of AMR molecular reference data.",2020-01-01 +,A Web Interface for Petri Nets with Transits and Petri Games,"Developing algorithms for distributed systems is an error-prone task. Formal models like Petri nets with transits and Petri games can prevent errors when developing such algorithms. Petri nets with transits allow us to follow the data flow between components in a distributed system. They can be model checked against specifications in LTL on both the local data flow and the global behavior. Petri games allow the synthesis of local controllers for distributed systems from safety specifications. Modeling problems in these formalisms requires defining extended Petri nets which can be cumbersome when performed textually. In this paper, we present a web interface (The web interface is deployed at http://adam.informatik.uni-oldenburg.de.) that allows an intuitive, visual definition of Petri nets with transits and Petri games. The corresponding model checking and synthesis problems are solved directly on a server. In the interface, implementations, counterexamples, and all intermediate steps can be analyzed and simulated. Stepwise simulations and interactive state space generation support the user in detecting modeling errors.",2021-02-26 +30657872,AllerCatPro-prediction of protein allergenicity potential from the protein sequence.,"

Motivation

Due to the risk of inducing an immediate Type I (IgE-mediated) allergic response, proteins intended for use in consumer products must be investigated for their allergenic potential before introduction into the marketplace. The FAO/WHO guidelines for computational assessment of allergenic potential of proteins based on short peptide hits and linear sequence window identity thresholds misclassify many proteins as allergens.

Results

We developed AllerCatPro which predicts the allergenic potential of proteins based on similarity of their 3D protein structure as well as their amino acid sequence compared with a data set of known protein allergens comprising of 4180 unique allergenic protein sequences derived from the union of the major databases Food Allergy Research and Resource Program, Comprehensive Protein Allergen Resource, WHO/International Union of Immunological Societies, UniProtKB and Allergome. We extended the hexamer hit rule by removing peptides with high probability of random occurrence measured by sequence entropy as well as requiring 3 or more hexamer hits consistent with natural linear epitope patterns in known allergens. This is complemented with a Gluten-like repeat pattern detection. We also switched from a linear sequence window similarity to a B-cell epitope-like 3D surface similarity window which became possible through extensive 3D structure modeling covering the majority (74%) of allergens. In case no structure similarity is found, the decision workflow reverts to the old linear sequence window rule. The overall accuracy of AllerCatPro is 84% compared with other current methods which range from 51 to 73%. Both the FAO/WHO rules and AllerCatPro achieve highest sensitivity but AllerCatPro provides a 37-fold increase in specificity.

Availability and implementation

https://allercatpro.bii.a-star.edu.sg/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +34663425,DNA methylation-calling tools for Oxford Nanopore sequencing: a survey and human epigenome-wide evaluation.,"

Background

Nanopore long-read sequencing technology greatly expands the capacity of long-range, single-molecule DNA-modification detection. A growing number of analytical tools have been developed to detect DNA methylation from nanopore sequencing reads. Here, we assess the performance of different methylation-calling tools to provide a systematic evaluation to guide researchers performing human epigenome-wide studies.

Results

We compare seven analytic tools for detecting DNA methylation from nanopore long-read sequencing data generated from human natural DNA at a whole-genome scale. We evaluate the per-read and per-site performance of CpG methylation prediction across different genomic contexts, CpG site coverage, and computational resources consumed by each tool. The seven tools exhibit different performances across the evaluation criteria. We show that the methylation prediction at regions with discordant DNA methylation patterns, intergenic regions, low CG density regions, and repetitive regions show room for improvement across all tools. Furthermore, we demonstrate that 5hmC levels at least partly contribute to the discrepancy between bisulfite and nanopore sequencing. Lastly, we provide an online DNA methylation database ( https://nanome.jax.org ) to display the DNA methylation levels detected by nanopore sequencing and bisulfite sequencing data across different genomic contexts.

Conclusions

Our study is the first systematic benchmark of computational methods for detection of mammalian whole-genome DNA modifications in nanopore sequencing. We provide a broad foundation for cross-platform standardization and an evaluation of analytical tools designed for genome-scale modified base detection using nanopore sequencing.",2021-10-18 +34619810,Using ConSurf to Detect Functionally Important Regions in RNA.,"The ConSurf web server (https://consurf.tau.ac.il/) for using evolutionary data to detect functional regions is useful for analyzing proteins. The analysis is based on the premise that functional regions, which may for example facilitate ligand binding and catalysis, often evolve slowly. The analysis requires finding enough effective, i.e., non-redundant, sufficiently remote homologs. Indeed, the ConSurf pipeline, which is based on state-of-the-art protein sequence databases and analysis tools, is highly valuable for protein analysis. ConSurf also allows evolutionary analysis of RNA, but the analysis often fails due to insufficient data, particularly the inability of the current pipeline to detect enough effective RNA homologs. This is because the RNA search tools and databases offered are not as good as those used for protein analysis. Fortunately, ConSurf also allows importing external collections of homologs in the form of a multiple sequence alignment (MSA). Leveraging this, here we describe various protocols for constructing MSAs for successful ConSurf analysis of RNA queries. We report the level of success of these protocols on an exemplary set comprising a dozen RNA molecules of diverse structure and function. © 2021 Wiley Periodicals LLC. Basic Protocol 1: Standard ConSurf evolutionary conservation analysis of an RNA query. Basic Protocol 2: ConSurf evolutionary conservation analysis of an RNA query with external MSA. Support Protocol 1: Construction of an MSA for an RNA query using other online servers. Support Protocol 2: Construction of an MSA for an RNA query using nHMMER locally.",2021-10-01 +34197127,"Precise Evaluation of Spatial Characteristics of Periodically Precipitating Systems via Measurement of RGB (Red, Green, and Blue) Values of Pattern Images.","In the present study, a method is described for precise determination of spatial characteristics of Liesegang bands formed by employing a classical 1D setup using a web-based free resource (https://www.ginifab.com/feeds/pms/color_picker_from_image.php). The method involves the compartmentalization of the information on each pixel into R (red), G (green), or B (blue) values from the pattern images obtained using a simple digital camera. The values can further be converted to absorbance values by using the system blank. Each trough (or peak) in the graph of RGB values (or absorbance values) corresponds to a band in the pattern. The method is employed to determine the spacing and width of the periodically precipitating AgCl, AgBr, and Co(OH)2 in an agar gel. It is observed that AgCl shows revert banding, and AgBr shows revert banding at the top of the tube and then diverges to regular banding at the bottom of the tube, whereas the Co(OH)2 patterns explicitly show regular banding under given experimental conditions. It is also observed that minute instabilities, such as the formation of secondary bands, can also be visualized by the present method.",2021-07-01 +31514149,A Convolutional Neural Network System to Discriminate Drug-Target Interactions.,"Biological targets are most commonly proteins such as enzymes, ion channels, and receptors. They are anything within a living organism to bind with some other entities (like an endogenous ligand or a drug), resulting in change in their behaviors or functions. Exploring potential drug-target interactions (DTIs) are crucial for drug discovery and effective drug development. Computational methods were widely applied in drug-target interactions, since experimental methods are extremely time-consuming and resource-intensive. In this paper, we proposed a novel deep learning-based prediction system, with a new negative instance generation, to identify DTIs. As a result, our method achieved an accuracy of 0.9800 on our created dataset. Another dataset derived from DrugBank was used to further assess the generalization of the model, which yielded a good performance with accuracy of 0.8814 and AUC value of 0.9527 on the dataset. The outcome of our experimental results indicated that the proposed method, involving the credible negative generation, can be employed to discriminate the interactions between drugs and targets. Website: http://www.dlearningapp.com/web/DrugCNN.htm.",2021-07-01 +34585729,Classifying domain-specific text documents containing ambiguous keywords. ,"A keyword-based search of comprehensive databases such as PubMed may return irrelevant papers, especially if the keywords are used in multiple fields of study. In such cases, domain experts (curators) need to verify the results and remove the irrelevant articles. Automating this filtering process will save time, but it has to be done well enough to ensure few relevant papers are rejected and few irrelevant papers are accepted. A good solution would be fast, work with the limited amount of data freely available (full paper body may be missing), handle ambiguous keywords and be as domain-neutral as possible. In this paper, we evaluate a number of classification algorithms for identifying a domain-specific set of papers about echinoderm species and show that the resulting tool satisfies most of the abovementioned requirements. Echinoderms consist of a number of very different organisms, including brittle stars, sea stars (starfish), sea urchins and sea cucumbers. While their taxonomic identifiers are specific, the common names are used in many other contexts, creating ambiguity and making a keyword search prone to error. We try classifiers using Linear, Naïve Bayes, Nearest Neighbor, Tree, SVM, Bagging, AdaBoost and Neural Network learning models and compare their performance. We show how effective the resulting classifiers are in filtering irrelevant articles returned from PubMed. The methodology used is more dependent on the good selection of training data and is a practical solution that can be applied to other fields of study facing similar challenges. Database URL: The code and date reported in this paper are freely available at http://xenbaseturbofrog.org/pub/Text-Topic-Classifier/.",2021-09-29 +34700681,Evaluation of MET Non-Exon-14 Mutations as Biomarkers for Immunotherapy Outcomes Across Multiple Cancer Types.,"

Purpose/objective(s)

MET-inhibitors have shown promising anti-tumor activity in patients with MET exon 14 mutant cancers. However, therapeutic strategies for MET non-exon-14 (MET-non-ex14) mutant cancer are still largely unknown. We evaluated the relationship between MET-non-ex14 mutations and the efficacy of immune checkpoint inhibitors (ICIs) in patients with multiple cancer types.

Materials/methods

The clinical and genomic data of 1690 ICIs-treated cancer patients were obtained from the cBioPortal database (https://www.cbioportal.org). MET mutations were defined as any nonsynonymous mutations including missense, frameshift, nonsense and splice site mutations. We divided MET mutations into METex14 and MET-non-ex14 mutation subsets according to the mutated-position in MET exons. Kruskal-Wallis test was used to analyze the difference of tumor mutational burden (TMB) score, and χ2 test was applied for categorical variables. Log-rank test was used to analyze the differences between Kaplan-Meier survival curves. P values were 2-sided, and P < 0.05 was considered statistically significant.

Results

1690 ICIs-treated patients with various cancer types were enrolled in this study, including non-small-cell lung cancer (NSCLC) (22.8%, 385/1690), melanoma (18.9%, 320/1690), bladder cancer (12.7%, 215/1690), and so on. A total of 51 patients (3.0%) harbored MET-non-ex14 mutations in the entire cohort, including one melanoma and one NSCLC patients with METex14 commutations. No significant differences in age, sex and cancer types were observed between patients with and without MET-non-ex14 mutations (P > 0.05). However, the TMB in patients with MET-non-ex14 mutations was significantly higher than those without these mutations (P < 0.001). Across 1661 patients with available overall survival (OS) data, MET-non-ex14 mutant patients had a significantly longer OS versus MET ex14-mutant/wild-type patients (median OS, not reached vs 18 months; P = 0.001). In NSCLC subgroup, 17 patients (4.4%) had MET mutations including six with MET-non-ex14 mutations, 10 with METex14 mutations and one with MET-non-ex14/METex14 commutations. MET-non-ex14-mutant patients (7/385, 1.8%) possessed a significantly higher TMB than METex14-mutant (10/385, 2.6%) and MET wild-type (368/385, 95.6%) populations, respectively (P = 0.008; P = 0.01, respectively). In the total of 356 NSCLC patients with available OS data, the OS was significantly longer in MET-non-ex14-mutant subgroup compared with their wild-type counterparts (median OS, not reached vs 11 months; P = 0.039). Additionally, patients with MET-non-ex14 mutations exhibited relatively better survival versus METex14-mutant patients (median OS, not reached vs 18 months; P = 0.175).

Conclusion

Our results indicated that MET-non-ex14 mutations were associated with higher TMB, and could be considered as a positive prognostic biomarker for immunotherapy in different cancer types.",2021-11-01 +31722421,The European Nucleotide Archive in 2019.,"The European Nucleotide Archive (ENA, https://www.ebi.ac.uk/ena) at the European Molecular Biology Laboratory's European Bioinformatics Institute provides open and freely available data deposition and access services across the spectrum of nucleotide sequence data types. Making the world's public sequencing datasets available to the scientific community, the ENA represents a globally comprehensive nucleotide sequence resource. Here, we outline ENA services and content in 2019 and provide an insight into selected key areas of development in this period.",2020-01-01 +34242710,Introgression contributes to distribution of structural variations in cattle.,"Structural variations (SVs) are an important source of phenotypic diversity in cattle. Here, 72 whole genome sequences representing taurine and zebu cattle were used to identify SVs. Applying multiple approaches, 16,738 SVs were identified. A comparison against the Database of Genomic Variants archives revealed that 1575 SVs were novel in our data. A novel duplication covering the entire GALNT15 gene, was observed only in N'Dama. A duplication, which was previously reported only in zebu and associated with navel length, was also observed in N'Dama. Investigation of a novel deletion located upstream of CAST13 gene and identified only in Italian cattle and zebu, revealed its introgressed origin in the former. Overall, our data highlights how the SVs distribution in cattle is also shaped by forces such as demographical differences and gene flow. The cattle SVs of this study and its meta-data can be visualized on an interactive genome browser at https://tinyurl.com/svCowArs.",2021-07-07 +34236262,"The prognosis of glioblastoma: a large, multifactorial study.","

Objective

Glioblastoma is the most common and fatal primary brain tumor in adults. Even with maximal resection and a series of postoperative adjuvant treatments, the median overall survival (OS) of glioblastoma patients remains approximately 15 months. The Huashan Hospital glioma bank contains more than 2000 glioma tissue samples with long-term follow-up data; almost half of these samples are from glioblastoma patients. Several large glioma databases with long-term follow-up data have reported outcomes of glioblastoma patients from countries other than China. We investigated the prognosis of glioblastoma patients in China and compared the survival outcomes among patients from different databases.

Methods

The data for 967 glioblastoma patients who underwent surgery at Huashan Hospital and had long-term follow-up records were obtained from our glioma registry (diagnosed from 29 March 2010, through 7 June 2017). Patients were eligible for inclusion if they underwent surgical resection for newly diagnosed glioblastomas and had available data of survival and personal information. Data of 778 glioblastoma patients were collected from three separate online databases (448 patients from The Cancer Genome Atlas (TCGA, https://cancergenome.nih.gov), 191 from REpository for Molecular BRAin Neoplasia DaTa (REMBRANDT) database (GSE108476) and 132 from data set GSE16011(Hereafter called as the French database). We compared the prognosis of glioblastoma patients from records among the different databases and the changes in survival outcomes of glioblastoma patients from Huashan Hospital over an 8-year period.

Results

The median OS of glioblastoma patients was 16.3 (95% CI: 15.4-17.2) months for Huashan Hospital, 13.8 (95% CI: 12.9-14.9) months for TCGA, 19.3 (95% CI: 17.0-20.0) months for the REMBRANDT database, and 9.1 months for the French database. The median OS of glioblastoma patients from Huashan Hospital improved from 15.6 (2010-2013, 95% CI: 14.4-16.6) months to 18.2 (2014-2017, 95% CI: 15.8-20.6) months over the study period (2010-2017). In addition, the prognosis of glioblastoma patients with total resection was significantly better than that of glioblastoma patients with sub-total resection or biopsy.

Conclusions

Our study confirms that treatment centered around maximal surgical resection brought survival benefits to glioblastoma patients after adjusting to validated prognostic factors. In addition, an improvement in prognosis was observed among glioblastoma patients from Huashan Hospital over the course of our study. We attributed it to the adoption of a new standard of neurosurgical treatment on the basis of neurosurgical multimodal technologies. Even though the prognosis of glioblastoma patients remains poor, gradual progress is being made.",2021-07-08 +33226064,pdm_utils: a SEA-PHAGES MySQL phage database management toolkit.,"

Summary

Bacteriophages (phages) are incredibly abundant and genetically diverse. The volume of phage genomics data is rapidly increasing, driven in part by the SEA-PHAGES program, which isolates, sequences and manually annotates hundreds of phage genomes each year. With an ever-expanding genomics dataset, there are many opportunities for generating new biological insights through comparative genomic and bioinformatic analyses. As a result, there is a growing need to be able to store, update, explore and analyze phage genomics data. The package pdm_utils provides a collection of tools for MySQL phage database management designed to meet specific needs in the SEA-PHAGES program and phage genomics generally.

Availability and implementation

https://pypi.org/project/pdm-utils/.",2021-08-01 +34849207,Two-species community design of lactic acid bacteria for optimal production of lactate.,"Microbial communities that metabolise pentose and hexose sugars are useful in producing high-value chemicals, resulting in the effective conversion of raw materials to the product, a reduction in the production cost, and increased yield. Here, we present a computational analysis approach called CAMP (Co-culture/Community Analyses for Metabolite Production) that simulates and identifies appropriate communities to produce a metabolite of interest. To demonstrate this approach, we focus on the optimal production of lactate from various Lactic Acid Bacteria. We used genome-scale metabolic models (GSMMs) belonging to Lactobacillus, Leuconostoc, and Pediococcus species from the Virtual Metabolic Human (VMH; https://vmh.life/) resource and well-curated GSMMs of L. plantarum WCSF1 and L. reuteri JCM 1112. We analysed 1176 two-species communities using a constraint-based modelling method for steady-state flux-balance analysis of communities. Flux variability analysis was used to detect the maximum lactate flux in the communities. Using glucose or xylose as substrates separately or in combination resulted in either parasitism, amensalism, or mutualism being the dominant interaction behaviour in the communities. Interaction behaviour between members of the community was deduced based on variations in the predicted growth rates of monocultures and co-cultures. Acetaldehyde, ethanol, acetate, among other metabolites, were found to be cross-fed between community members. L. plantarum WCSF1 was found to be a member of communities with high lactate yields. In silico community optimisation strategies to predict reaction knock-outs for improving lactate flux were implemented. Reaction knock-outs of acetate kinase, phosphate acetyltransferase, and fumarate reductase in the communities were found to enhance lactate production.",2021-11-09 +34464437,lncExplore: a database of pan-cancer analysis and systematic functional annotation for lncRNAs from RNA-sequencing data. ,"Over the past few years, with the rapid growth of deep-sequencing technology and the development of computational prediction algorithms, a large number of long non-coding RNAs (lncRNAs) have been identified in various types of human cancers. Therefore, it has become critical to determine how to properly annotate the potential function of lncRNAs from RNA-sequencing (RNA-seq) data and arrange the robust information and analysis into a useful system readily accessible by biological and clinical researchers. In order to produce a collective interpretation of lncRNA functions, it is necessary to integrate different types of data regarding the important functional diversity and regulatory role of these lncRNAs. In this study, we utilized transcriptomic sequencing data to systematically observe and identify lncRNAs and their potential functions from 5034 The Cancer Genome Atlas RNA-seq datasets covering 24 cancers. Then, we constructed the 'lncExplore' database that was developed to comprehensively integrate various types of genomic annotation data for collective interpretation. The distinctive features in our lncExplore database include (i) novel lncRNAs verified by both coding potential and translation efficiency score, (ii) pan-cancer analysis for studying the significantly aberrant expression across 24 human cancers, (iii) genomic annotation of lncRNAs, such as cis-regulatory information and gene ontology, (iv) observation of the regulatory roles as enhancer RNAs and competing endogenous RNAs and (v) the findings of the potential lncRNA biomarkers for the user-interested cancers by integrating clinical information and disease specificity score. The lncExplore database is to our knowledge the first public lncRNA annotation database providing cancer-specific lncRNA expression profiles for not only known but also novel lncRNAs, enhancer RNAs annotation and clinical analysis based on pan-cancer analysis. lncExplore provides a more complete pathway to highly efficient, novel and more comprehensive translation of laboratory discoveries into the clinical context and will assist in reinterpreting the biological regulatory function of lncRNAs in cancer research. Database URL http://lncexplore.bmi.nycu.edu.tw.",2021-08-01 +30380090,PLSDB: a resource of complete bacterial plasmids.,"The study of bacterial isolates or communities requires the analysis of the therein included plasmids in order to provide an extensive characterization of the organisms. Plasmids harboring resistance and virulence factors are of especial interest as they contribute to the dissemination of antibiotic resistance. As the number of newly sequenced bacterial genomes is growing a comprehensive resource is required which will allow to browse and filter the available plasmids, and to perform sequence analyses. Here, we present PLSDB, a resource containing 13 789 plasmid records collected from the NCBI nucleotide database. The web server provides an interactive view of all obtained plasmids with additional meta information such as sequence characteristics, sample-related information and taxonomy. Moreover, nucleotide sequence data can be uploaded to search for short nucleotide sequences (e.g. specific genes) in the plasmids, to compare a given plasmid to the records in the collection or to determine whether a sample contains one or multiple of the known plasmids (containment analysis). The resource is freely accessible under https://ccb-microbe.cs.uni-saarland.de/plsdb/.",2019-01-01 +32561749,A clinically and genomically annotated nerve sheath tumor biospecimen repository.,"Nerve sheath tumors occur as a heterogeneous group of neoplasms in patients with neurofibromatosis type 1 (NF1). The malignant form represents the most common cause of death in people with NF1, and even when benign, these tumors can result in significant disfigurement, neurologic dysfunction, and a range of profound symptoms. Lack of human tissue across the peripheral nerve tumors common in NF1 has been a major limitation in the development of new therapies. To address this unmet need, we have created an annotated collection of patient tumor samples, patient-derived cell lines, and patient-derived xenografts, and carried out high-throughput genomic and transcriptomic characterization to serve as a resource for further biologic and preclinical therapeutic studies. In this work, we release genomic and transcriptomic datasets comprised of 55 tumor samples derived from 23 individuals, complete with clinical annotation. All data are publicly available through the NF Data Portal and at http://synapse.org/jhubiobank.",2020-06-19 +34729303,"MCDB: A comprehensive curated mitotic catastrophe database for retrieval, protein sequence alignment, and target prediction.","Mitotic catastrophe (MC) is a form of programmed cell death induced by mitotic process disorders, which is very important in tumor prevention, development, and drug resistance. Because rapidly increased data for MC is vigorously promoting the tumor-related biomedical and clinical study, it is urgent for us to develop a professional and comprehensive database to curate MC-related data. Mitotic Catastrophe Database (MCDB) consists of 1214 genes/proteins and 5014 compounds collected and organized from more than 8000 research articles. Also, MCDB defines the confidence level, classification criteria, and uniform naming rules for MC-related data, which greatly improves data reliability and retrieval convenience. Moreover, MCDB develops protein sequence alignment and target prediction functions. The former can be used to predict new potential MC-related genes and proteins, and the latter can facilitate the identification of potential target proteins of unknown MC-related compounds. In short, MCDB is such a proprietary, standard, and comprehensive database for MC-relate data that will facilitate the exploration of MC from chemists to biologists in the fields of medicinal chemistry, molecular biology, bioinformatics, oncology and so on. The MCDB is distributed on http://www.combio-lezhang.online/MCDB/index_html/.",2021-06-07 +31844049,ProTargetMiner as a proteome signature library of anticancer molecules for functional discovery.,"Deconvolution of targets and action mechanisms of anticancer compounds is fundamental in drug development. Here, we report on ProTargetMiner as a publicly available expandable proteome signature library of anticancer molecules in cancer cell lines. Based on 287 A549 adenocarcinoma proteomes affected by 56 compounds, the main dataset contains 7,328 proteins and 1,307,859 refined protein-drug pairs. These proteomic signatures cluster by compound targets and action mechanisms. The targets and mechanistic proteins are deconvoluted by partial least square modeling, provided through the website http://protargetminer.genexplain.com. For 9 molecules representing the most diverse mechanisms and the common cancer cell lines MCF-7, RKO and A549, deep proteome datasets are obtained. Combining data from the three cell lines highlights common drug targets and cell-specific differences. The database can be easily extended and merged with new compound signatures. ProTargetMiner serves as a chemical proteomics resource for the cancer research community, and can become a valuable tool in drug discovery.",2019-12-16 +34032471,The Human Salivary Proteome Wiki: A Community-Driven Research Platform.,"Saliva has become an attractive body fluid for on-site, remote, and real-time monitoring of oral and systemic health. At the same time, the scientific community needs a saliva-centered information platform that keeps pace with the rapid accumulation of new data and knowledge by annotating, refining, and updating the salivary proteome catalog. We developed the Human Salivary Proteome (HSP) Wiki as a public data platform for researching and retrieving custom-curated data and knowledge on the saliva proteome. The HSP Wiki is dynamically compiled and updated based on published saliva proteome studies and up-to-date protein reference records. It integrates a wide range of available information by funneling in data from established external protein, genome, transcriptome, and glycome databases. In addition, the HSP Wiki incorporates data from human disease-related studies. Users can explore the proteome of saliva simply by browsing the database, querying the available data, performing comparisons of data sets, and annotating existing protein entries using a simple, intuitive interface. The annotation process includes both user feedback and curator committee review to ensure the quality and validity of each entry. Here, we present the first overview of features and functions the HSP Wiki offers. As a saliva proteome-centric, publicly accessible database, the HSP Wiki will advance the knowledge of saliva composition and function in health and disease for users across a wide range of disciplines. As a community-based data- and knowledgebase, the HSP Wiki will serve as a worldwide platform to exchange salivary proteome information, inspire novel research ideas, and foster cross-discipline collaborations. The HSP Wiki will pave the way for harnessing the full potential of the salivary proteome for diagnosis, risk prediction, therapy of oral and systemic diseases, and preparedness for emerging infectious diseases.Database URL: https://salivaryproteome.nidcr.nih.gov/.",2021-05-25 +34788021,Web-Based Open-Source Tool for Isotachophoresis.,"We present the development of a client-side web-based simulator for complex electrophoresis phenomena, including isotachophoresis. The simulation tool is called Client-based Application for Fast Electrophoresis Simulation (CAFES). CAFES uses the broad cross-browser compatibility of JavaScript to provide a rapid and easy-to-use tool for coupled unsteady electromigration, diffusion, and equilibrium electrolyte reactions among multiple weak electrolytes. The code uses a stationary grid (for simplicity) and an adaptive time step to provide reliable estimates of ion concentration dynamics (including pH profile evolution), requiring no prior installation nor compilation. CAFES also offers a large database of commonly used species and their relevant physicochemical properties. We present a validation of predictions from CAFES by comparing them to experimental data of peak- and plateau-mode isotachophoresis experiments. The code yields accurate estimates of interface velocity, plateau length and relative intensity, and pH variations while significantly reducing the computation time compared to existing codes. The tool is open-source and available for free at https://microfluidics.stanford.edu/cafes.",2021-11-17 +32380213,TrypInDB: A searchable online resource of small molecule inhibitors against Trypanosoma sp.,"African Trypanosomiasis and American Trypanosomiasis are the diseases affecting more than thousands of people yearly and more than twenty-five million people risk acquiring the disease. The treatment for the disease is generally expensive, and most of the available drugs are of high-toxicity and cause fatal side-effects. Hence, there is a constant need for finding new treatment strategies for Trypanosomiasis. Combination therapy and repurposing or redesigning of existing inhibitors for new drugs are of high importance to address these hurdles, particularly the drug resistance. Hence, here we report TrypInDB, a searchable online resource of small molecule inhibitors having a varying degree of activity towards Trypanosoma sp. Information of about >14,000 small molecules from >700 published research articles was collected and made as an easy-to-search database. Four major sets of information were made available for each collected inhibitors viz., General information (activity values; source of the inhibitors; enzyme targets; etc.,), Structural information, Toxicity information, and Literature information. More than 25 different information about each inhibitor were collected or predicted and made accessible for searching. The database is designed to be queried easily with multiple-field filters with the provisions to perform sub-structure search and similar FDA approved drug searches. The database supports the easy export of queried records and structure in multiple formats. In addition, the TrypInDB is actively integrated into LeishInDB. We believe that the scope of TrypInDB permits the research community to exploit the available data for repurposing the inhibitors as well as for the investigation of new therapeutics. Database URL: http://trypindb.biomedinformri.com/.",2020-05-05 +34029142,Landscape of GPCR expression along the mouse nephron.,"Kidney transport and other renal functions are regulated by multiple G protein-coupled receptors (GPCRs) expressed along the renal tubule. The rapid, recent appearance of comprehensive unbiased gene expression data in the various renal tubule segments, chiefly RNA sequencing and protein mass spectrometry data, has provided a means of identifying patterns of GPCR expression along the renal tubule. To allow for comprehensive mapping, we first curated a comprehensive list of GPCRs in the genomes of mice, rats, and humans (https://hpcwebapps.cit.nih.gov/ESBL/Database/GPCRs/) using multiple online data sources. We used this list to mine segment-specific and cell type-specific expression data from RNA-sequencing studies in microdissected mouse tubule segments to identify GPCRs that are selectively expressed in discrete tubule segments. Comparisons of these mapped mouse GPCRs with other omics datasets as well as functional data from isolated perfused tubule and micropuncture studies confirmed patterns of expression for well-known receptors and identified poorly studied GPCRs that are likely to play roles in the regulation of renal tubule function. Thus, we provide data resources for GPCR expression across the renal tubule, highlighting both well-known GPCRs and understudied receptors to provide guidance for future studies.",2021-05-24 +31602478,PmiREN: a comprehensive encyclopedia of plant miRNAs.,"MicroRNAs (miRNAs) are small non-coding RNA molecules that function as diverse endogenous gene regulators at the post-transcriptional level. In the past two decades, as research effort on miRNA identification, function and evolution has soared, so has the demand for miRNA databases. However, the current plant miRNA databases suffer from several typical drawbacks, including a lack of entries for many important species, uneven annotation standards across different species, abundant questionable entries, and limited annotation. To address these issues, we developed a knowledge-based database called Plant miRNA Encyclopedia (PmiREN, http://www.pmiren.com/), which was based on uniform processing of sequenced small RNA libraries using miRDeep-P2, followed by manual curation using newly updated plant miRNA identification criteria, and comprehensive annotation. PmiREN currently contains 16,422 high confidence novel miRNA loci in 88 plant species and 3,966 retrieved from miRBase. For every miRNA entry, information on precursor sequence, precursor secondary structure, expression pattern, clusters and synteny in the genome, potential targets supported by Parallel Analysis of RNA Ends (PARE) sequencing, and references is attached whenever possible. PmiREN is hierarchically accessible and has eight built-in search engines. We believe PmiREN is useful for plant miRNA cataloguing and data mining, therefore a resource for data-driven miRNA research in plants.",2020-01-01 +30841849,"mGAP: the macaque genotype and phenotype resource, a framework for accessing and interpreting macaque variant data, and identifying new models of human disease.","

Background

Non-human primates (NHPs), particularly macaques, serve as critical and highly relevant pre-clinical models of human disease. The similarity in human and macaque natural disease susceptibility, along with parallel genetic risk alleles, underscores the value of macaques in the development of effective treatment strategies. Nonetheless, there are limited genomic resources available to support the exploration and discovery of macaque models of inherited disease. Notably, there are few public databases tailored to searching NHP sequence variants, and no other database making use of centralized variant calling, or providing genotype-level data and predicted pathogenic effects for each variant.

Results

The macaque Genotype And Phenotype (mGAP) resource is the first public website providing searchable, annotated macaque variant data. The mGAP resource includes a catalog of high confidence variants, derived from whole genome sequence (WGS). The current mGAP release at time of publication (1.7) contains 17,087,212 variants based on the sequence analysis of 293 rhesus macaques. A custom pipeline was developed to enable annotation of the macaque variants, leveraging human data sources that include regulatory elements (ENCODE, RegulomeDB), known disease- or phenotype-associated variants (GRASP), predicted impact (SIFT, PolyPhen2), and sequence conservation (Phylop, PhastCons). Currently mGAP includes 2767 variants that are identical to alleles listed in the human ClinVar database, of which 276 variants, spanning 258 genes, are identified as pathogenic. An additional 12,472 variants are predicted as high impact (SnpEff) and 13,129 are predicted as damaging (PolyPhen2). In total, these variants are predicted to be associated with more than 2000 human disease or phenotype entries reported in OMIM (Online Mendelian Inheritance in Man). Importantly, mGAP also provides genotype-level data for all subjects, allowing identification of specific individuals harboring alleles of interest.

Conclusions

The mGAP resource provides variant and genotype data from hundreds of rhesus macaques, processed in a consistent manner across all subjects ( https://mgap.ohsu.edu ). Together with the extensive variant annotations, mGAP presents unprecedented opportunity to investigate potential genetic associations with currently characterized disease models, and to uncover new macaque models based on parallels with human risk alleles.",2019-03-06 +,19233 Basis profile curve identification to understand electrical stimulation effects in human brain networks,"ABSTRACT IMPACT: Brain networks can be explored by delivering brief pulses of electrical current in one area while measuring responses in other areas, and this describes an open-source novel algorithm to carry out this exploration. OBJECTIVES/GOALS: If we focus on a single brain site and observe the average effect of stimulating each of many other brain sites, visually-apparent motifs in the temporal response shape emerge from adjacent stimulation sites. There are no existing approaches to identify and quantify the spatiotemporal structure of these motifs. METHODS/STUDY POPULATION: Individual stimulation trials are correlated with one another, then a correlation-significance matrix quantifying similarity between stimulation sites is decomposed with non-negative matrix factorization, in which the inner dimension is iteratively reduced. The dimensionality reduction identifies stimulation sites that produce a common elicited temporal response, and linear kernel PCA is applied to obtain the robust profile of this response cluster. RESULTS/ANTICIPATED RESULTS: We describe and illustrate a data-driven approach to determine characteristic spatiotemporal structure in these response shapes, summarized by a set of unique ‘basis profile curves’ (BPCs). Each BPC may be mapped back to underlying anatomy in a natural way, quantifying projection strength from each stimulation site using simple metrics. Our technique is demonstrated for an array of implanted brain surface electrodes in a human patient, and our code is shared at https://purl.stanford.edu/rc201dv0636. DISCUSSION/SIGNIFICANCE OF FINDINGS: This framework enables straightforward interpretation of single-pulse brain stimulation data, and can be applied generically to explore the diverse milieu of interactions that comprise the connectome.",2021-03-30 +31617563,LnCeVar: a comprehensive database of genomic variations that disturb ceRNA network regulation.,"LnCeVar (http://www.bio-bigdata.net/LnCeVar/) is a comprehensive database that aims to provide genomic variations that disturb lncRNA-associated competing endogenous RNA (ceRNA) network regulation curated from the published literature and high-throughput data sets. LnCeVar curated 119 501 variation-ceRNA events from thousands of samples and cell lines, including: (i) more than 2000 experimentally supported circulating, drug-resistant and prognosis-related lncRNA biomarkers; (ii) 11 418 somatic mutation-ceRNA events from TCGA and COSMIC; (iii) 112 674 CNV-ceRNA events from TCGA; (iv) 67 066 SNP-ceRNA events from the 1000 Genomes Project. LnCeVar provides a user-friendly searching and browsing interface. In addition, as an important supplement of the database, several flexible tools have been developed to aid retrieval and analysis of the data. The LnCeVar-BLAST interface is a convenient way for users to search ceRNAs by interesting sequences. LnCeVar-Function is a tool for performing functional enrichment analysis. LnCeVar-Hallmark identifies dysregulated cancer hallmarks of variation-ceRNA events. LnCeVar-Survival performs COX regression analyses and produces survival curves for variation-ceRNA events. LnCeVar-Network identifies and creates a visualization of dysregulated variation-ceRNA networks. Collectively, LnCeVar will serve as an important resource for investigating the functions and mechanisms of personalized genomic variations that disturb ceRNA network regulation in human diseases.",2020-01-01 +34378177,LinguaPix database: A megastudy of picture-naming norms.,"The major aim of the present megastudy of picture-naming norms was to address the shortcomings of the available picture data sets used in psychological and linguistic research by creating a new database of normed colour images that researchers from around the world can rely upon in their investigations. In order to do this, we employed a new form of normative study, namely a megastudy, whereby 1620 colour photographs of items spanning across 42 semantic categories were named and rated by a group of German speakers. This was done to establish the following linguistic norms: speech onset times (SOT), name agreement, accuracy, familiarity, visual complexity, valence, and arousal. The data, including over 64,000 audio files, were used to create the LinguaPix database of pictures, audio recordings, and linguistic norms, which to our knowledge, is the largest available research tool of its kind ( http://linguapix.uni-mannheim.de ). In this paper, we present the tool and the analysis of the major variables.",2021-08-10 +32055858,GREG-studying transcriptional regulation using integrative graph databases. ,"A gene regulatory process is the result of the concerted action of transcription factors, co-factors, regulatory non-coding RNAs (ncRNAs) and chromatin interactions. Therefore, the combination of protein-DNA, protein-protein, ncRNA-DNA, ncRNA-protein and DNA-DNA data in a single graph database offers new possibilities regarding generation of biological hypotheses. GREG (The Gene Regulation Graph Database) is an integrative database and web resource that allows the user to visualize and explore the network of all above-mentioned interactions for a query transcription factor, long non-coding RNA, genomic range or DNA annotation, as well as extracting node and interaction information, identifying connected nodes and performing advanced graphical queries directly on the regulatory network, in a simple and efficient way. In this article, we introduce GREG together with some application examples (including exploratory research of Nanog's regulatory landscape and the etiology of chronic obstructive pulmonary disease), which we use as a demonstration of the advantages of using graph databases in biomedical research. Database URL: https://mora-lab.github.io/projects/greg.html, www.moralab.science/GREG/.",2020-01-01 +31598690,ProCarbDB: a database of carbohydrate-binding proteins.,"Carbohydrate-binding proteins play crucial roles across all organisms and viruses. The complexity of carbohydrate structures, together with inconsistencies in how their 3D structures are reported, has led to difficulties in characterizing the protein-carbohydrate interfaces. In order to better understand protein-carbohydrate interactions, we have developed an open-access database, ProCarbDB, which, unlike the Protein Data Bank (PDB), clearly distinguishes between the complete carbohydrate ligands and their monomeric units. ProCarbDB is a comprehensive database containing over 5200 3D X-ray crystal structures of protein-carbohydrate complexes. In ProCarbDB, the complete carbohydrate ligands are annotated and all their interactions are displayed. Users can also select any protein residue in the proximity of the ligand to inspect its interactions with the carbohydrate ligand and with other neighbouring protein residues. Where available, additional curated information on the binding affinity of the complex and the effects of mutations on the binding have also been provided in the database. We believe that ProCarbDB will be an invaluable resource for understanding protein-carbohydrate interfaces. The ProCarbDB web server is freely available at http://www.procarbdb.science/procarb.",2020-01-01 +31584099,gutMDisorder: a comprehensive database for dysbiosis of the gut microbiota in disorders and interventions.,"gutMDisorder (http://bio-annotation.cn/gutMDisorder), a manually curated database, aims at providing a comprehensive resource of dysbiosis of the gut microbiota in disorders and interventions. Alterations in the composition of the gut microbial community play crucial roles in the development of chronic disorders. And the beneficial effects of drugs, foods and other intervention measures on disorders could be microbially mediated. The current version of gutMDisorder documents 2263 curated associations between 579 gut microbes and 123 disorders or 77 intervention measures in Human, and 930 curated associations between 273 gut microbes and 33 disorders or 151 intervention measures in Mouse. Each entry in the gutMDisorder contains detailed information on an association, including an intestinal microbe, a disorder name, intervention measures, experimental technology and platform, characteristic of samples, web sites for downloading the sequencing data, a brief description of the association, a literature reference, and so on. gutMDisorder provides a user-friendly interface to browse, retrieve each entry using gut microbes, disorders, and intervention measures. It also offers pages for downloading all the entries and submitting new experimentally validated associations.",2020-01-01 +32075414,The First Report of the International Cartilage Regeneration and Joint Preservation Society's Global Registry.,"

Objective

The International Cartilage Regeneration and Joint Preservation Society's (ICRS's) global registry, aims to be the best source of information for patients and an unbiased resource of evidence-based medicine for scientists and clinicians working to help those unfortunate enough to suffer the pain and disability associated with articular cartilage lesions. This article constitutes the scientific summary of the reports' main findings.

Design

The article outlines the historical precedents in the development of orthopedic registries from the earliest tumor registries, then local arthroplasty databases that led ultimately to international collaborations between national arthroplasty and soft tissue registries. The ICRS global cartilage registry was designed from the outset as a GDPR (General Data Protection Regulation) compliant, multilingual, multinational cooperative system. It is a web-based user-friendly, live in 11 languages by end 2019, which can be accessed via https://cartilage.org/society/icrs-patient-registry/. Patients and clinicians enter data by smartphone, tablet, or computer on any knee cartilage regeneration and joint preservation treatment, including the use of focal arthroplasty. Knee Injury and Osteoarthritis Outcome Score and Kujala patient-reported outcome measures are collected preoperatively, 6 months, 12 months, and annually for ten years thereafter. EQ-5D data collection will allow cost-effectiveness analysis. Strengths, weaknesses, and future plans are discussed.

Results

Since inception the registry has 264 users across 50 countries. Major findings are presented and discussed, while the entire first ICRS global registry report is available at https://cartilage.org/society/icrs-patient-registry/registry-annual-reports/. Conclusion. A measure of the maturity of any registry is the publication of its findings in the peer reviewed literature. With the publication of its first report, the ICRS global registry has achieved that milestone.",2020-02-19 +33435202,An Experimental Analysis of Attack Classification Using Machine Learning in IoT Networks. ,"In recent years, there has been a massive increase in the amount of Internet of Things (IoT) devices as well as the data generated by such devices. The participating devices in IoT networks can be problematic due to their resource-constrained nature, and integrating security on these devices is often overlooked. This has resulted in attackers having an increased incentive to target IoT devices. As the number of attacks possible on a network increases, it becomes more difficult for traditional intrusion detection systems (IDS) to cope with these attacks efficiently. In this paper, we highlight several machine learning (ML) methods such as k-nearest neighbour (KNN), support vector machine (SVM), decision tree (DT), naive Bayes (NB), random forest (RF), artificial neural network (ANN), and logistic regression (LR) that can be used in IDS. In this work, ML algorithms are compared for both binary and multi-class classification on Bot-IoT dataset. Based on several parameters such as accuracy, precision, recall, F1 score, and log loss, we experimentally compared the aforementioned ML algorithms. In the case of HTTP distributed denial-of-service (DDoS) attack, the accuracy of RF is 99%. Furthermore, other simulation results-based precision, recall, F1 score, and log loss metric reveal that RF outperforms on all types of attacks in binary classification. However, in multi-class classification, KNN outperforms other ML algorithms with an accuracy of 99%, which is 4% higher than RF.",2021-01-10 +34951548,Does physical exercise improve the capacity for independent living in people with dementia or mild cognitive impairment: an overview of systematic reviews and meta-analyses.,"

Objective

To summarise existing systematic reviews which assessed the effects of physical exercise on activities of daily living, walking, balance and visual processing in people with dementia or mild cognitive impairment.

Methods

In this overview of systematic reviews and meta-analyses, seven electronic databases were searched to identify eligible reviews published between January 2015 and April 2021.

Results

A total of 30 systematic reviews were identified and included in the overview. The most frequent type of exercise for the intervention group was multimodal exercises. Mind-body exercises, exergames, dance intervention and aerobic exercise were other exercise types. Most of the reviews reported that exercise is significantly effective for improving activities of daily living (SMD 95%CI, from 0.27 to 1.44), walking (SMD 95%CI, from 0.08 to 2.23), balance (SMD 95%CI, from 0.37 to 2.24) and visuospatial function (SMD 95%CI, from 0.16 to 0.51), which are among the most leading determinants of independent living in individuals with dementia or mild cognitive impairment.

Conclusion

Evidence has shown that exercise (especially multicomponent exercise programmes including cognitive, physical and multitasking exercises) with sufficient intensity improves the activities of daily living skills. Exercise also improves walking, balance and visual processing, which can provide a more independent life for people with dementia and mild cognitive impairment. Cognitively impaired people should therefore be encouraged to exercise regularly in order to be more independent.Supplemental data for this article is available online at http://dx.doi.org/10.1080/13607863.2021.2019192.",2021-12-24 +33238002,RecipeDB: a resource for exploring recipes. ,"Cooking is the act of turning nature into the culture, which has enabled the advent of the omnivorous human diet. The cultural wisdom of processing raw ingredients into delicious dishes is embodied in their cuisines. Recipes thus are the cultural capsules that encode elaborate cooking protocols for evoking sensory satiation as well as providing nourishment. As we stand on the verge of an epidemic of diet-linked disorders, it is eminently important to investigate the culinary correlates of recipes to probe their association with sensory responses as well as consequences for nutrition and health. RecipeDB (https://cosylab.iiitd.edu.in/recipedb) is a structured compilation of recipes, ingredients and nutrition profiles interlinked with flavor profiles and health associations. The repertoire comprises of meticulous integration of 118 171 recipes from cuisines across the globe (6 continents, 26 geocultural regions and 74 countries), cooked using 268 processes (heat, cook, boil, simmer, bake, etc.), by blending over 20 262 diverse ingredients, which are further linked to their flavor molecules (FlavorDB), nutritional profiles (US Department of Agriculture) and empirical records of disease associations obtained from MEDLINE (DietRx). This resource is aimed at facilitating scientific explorations of the culinary space (recipe, ingredient, cooking processes/techniques, dietary styles, etc.) linked to taste (flavor profile) and health (nutrition and disease associations) attributes seeking for divergent applications. Database URL:  https://cosylab.iiitd.edu.in/recipedb.",2020-11-01 +30357387,ArrayExpress update - from bulk to single-cell expression data.,"ArrayExpress (https://www.ebi.ac.uk/arrayexpress) is an archive of functional genomics data from a variety of technologies assaying functional modalities of a genome, such as gene expression or promoter occupancy. The number of experiments based on sequencing technologies, in particular RNA-seq experiments, has been increasing over the last few years and submissions of sequencing data have overtaken microarray experiments in the last 12 months. Additionally, there is a significant increase in experiments investigating single cells, rather than bulk samples, known as single-cell RNA-seq. To accommodate these trends, we have substantially changed our submission tool Annotare which, along with raw and processed data, collects all metadata necessary to interpret these experiments. Selected datasets are re-processed and loaded into our sister resource, the value-added Expression Atlas (and its component Single Cell Expression Atlas), which not only enables users to interpret the data easily but also serves as a test for data quality. With an increasing number of studies that combine different assay modalities (multi-omics experiments), a new more general archival resource the BioStudies Database has been developed, which will eventually supersede ArrayExpress. Data submissions will continue unchanged; all existing ArrayExpress data will be incorporated into BioStudies and the existing accession numbers and application programming interfaces will be maintained.",2019-01-01 +34081107,PDBe Aggregated API: Programmatic access to an integrative knowledge graph of molecular structure data. ,"The PDBe aggregated API is an open-access and open-source RESTful API that provides programmatic access to a wealth of macromolecular structural data and their functional and biophysical annotations through 80+ API endpoints. The API is powered by the PDBe graph database (https://pdbe.org/graph-schema), an open-access integrative knowledge graph that can be used as a discovery tool to answer complex biological questions. The PDBe aggregated API provides up-to-date access to the PDBe graph database, which has weekly releases with the latest data from the Protein Data Bank, integrated with updated annotations from UniProt, Pfam, CATH, SCOP and the PDBe-KB partner resources. The complete list of all the available API endpoints and their descriptions are available at https://pdbe.org/graph-api. The source code of the Python 3.6+ API application is publicly available at https://gitlab.ebi.ac.uk/pdbe-kb/services/pdbe-graph-api. Supplementary data are available at Bioinformatics online.",2021-06-03 +31843280,In-House Surgeon-Led Virtual Surgical Planning for Maxillofacial Reconstruction.,"

Purpose

Virtual surgical planning (VSP) and custom fabricated cutting guides for maxillofacial reconstruction have been shown to improve the accuracy of bony reconstruction and overall surgical efficiency and decrease the ischemia time. Our aim was to describe an in-house VSP technique for maxillofacial reconstructive procedures.

Materials and methods

We used 2 free software applications. 3DSlicer (available at: http://www.3dslicer.org) was used to extract the bones of interest for the recipient and the donor sites from the computed tomography scan's DICOM (digital imaging and communications in medicine) data. The Autodesk Meshmixer (Autodesk Inc, San Rafael, CA) was used to perform VSP and fabrication of the cutting guides. A reconstructed jaw model was printed in-house using a commercially available fused deposition modeling-based desktop 3-dimensional (3D) printer (Qidi Technology, Zhejiang, China) and used to prebend the reconstruction plate. The cutting guides were printed using a commercially available resin-based stereolithography apparatus desktop 3D printer (Form 2, Dental SG Resin; Formlabs, Somerville, MA) to allow for sterilization of the guides. We performed this technique for 19 consecutive patients with maxillofacial benign or malignant tumors requiring microvascular bony reconstruction. We calculated the average time and associated costs using this in-house VSP technique.

Results

The technique was found to be simple and repeatable. The average time required for VSP was 158 minutes (2 hours, 38 minutes). The average cost for printing the reconstructed model per case was $5.21 Canadian dollars (CAD), and the average cost for printing the cutting guides per case was $12.80 CAD.

Conclusions

Using this technique, in-house VSP and 3D printing can be performed by the treating surgeon, without an engineering background, within a reasonable period.",2019-11-21 +,MON-LB9 Cyclic Progesterone Therapy in Androgenic Polycystic Ovary Syndrome (PCOS) - Person-Related 6-Month Experience Changes,"Abstract Endometrial cancer1 and oligomenorrhea2 are common risks for women living with androgenic PCOS (WLWP); cyclic progesterone therapy could prevent both. Cyclic oral micronized progesterone therapy (Cyclic OMP; 300 mg at hs/14 days/cycle) also corrects the neuroendocrine origins of PCOS3. Although vaginal progesterone is used in PCOS ovulation induction 4, and short Cyclic OMP decreases LH and/or Testosterone 5,6, no WLWP person-level prospective data with Cyclic OMP therapy are published. A WLWP, aged 31, BMI 20.1, with heavy flow and slightly irregular ~35-day cycles, was unable to tolerate birth control pills. She was prescribed Cyclic OMP (300 mg/h.s. cycle days 14-27)7. She began keeping the Menstrual Cycle Diary© (Diary), a 19-item tool (scored 0-4), during her 1st Cyclic OMP cycle and took no other therapy. This pilot study was designed to understand Cyclic OMP-related experience changes in WLWP: 1) by documenting experience changes on the 1st to the 6th complete Diary; and 2) by assessing follicular phase changes in baseline data (no Rx) vs. cycles 3 and 6. We entered data from six consecutive Diaries into an SPSS (Version 24) database. Analysis #1 used Wilcoxon Signed Ranks Tests (for within-person ordinal data) and #2 repeated measures ANOVA. Research question: What Cyclic OMP-related experience changes occurred for a WLWP? On Cyclic OMP, she spontaneously reported improvements in aching joints, sleep and GI problems. We assessed selected, potentially E2-related Diary changes: flow, fluid retention, breast tenderness, stretchy cervical mucus and anxiety. Cyclic OMP was associated with shorter cycle lengths of 28.17+/-0.8 days. Fluid retention (P=0.000), mucus (P=0.048), and breast tenderness (P=0.000) all decreased, but anxiety and flow were unchanged. Follicular phase only fluid retention significantly decreased (F (1.2, 14.7) = 6.7, P =0.017). Although open-label, these prospective analyses suggest that Cyclic OMP, alone, is related to short-term benefits in androgenic PCOS. Prospective studies and controlled comparative trials of this innovative “luteal phase replacement” PCOS therapy are needed. Reference:1Barry J Hum Reprod Update 2014 20:748. 2Azziz R Nat Rev Dis Primers 2016;2:16057. 3Blank S Hum Reprod Update 2006;12:351. 4Montville C Fertil Steril 2010;94:678. 5Livadas S Fertil Steril 2010;94:242. 6Bagis T J Clin Endocr Met 2002;87:4536. 7Prior J https://hellocluecom/articles/cycle-a-z/the-case-for-a-new-pcos-therapy 2018",2020-05-08 +,First Report of Alfalfa Mosaic Virus in Chayote in Italy,"Chayote (Sechium edule [Jacq.] Sw.) is a vigorous perennial and climbing cucurbit, native to Mesoamerica, and cultivated for alimentary purposes in the American continent, Australia, New Zealand, South Europe, Asia, and Africa. During spring 2019, some chayote plants showing bright yellow vein banding, rings, and lines were observed in a private garden in South Italy (Campania region). Symptoms coalesced in some leaves, covering almost the whole foliar area. Double-stranded RNA was extracted from symptomatic leaves of a single chayote plant and reverse transcribed, randomly amplified, and submitted to Illumina sequencing (Marais et al. 2018). Reads were assembled using CLC Genomics Workbench 11.1 (https://digitalinsights.qiagen.com/). Contigs were then annotated by BLASTn and BLASTx comparison with the GenBank database, which allowed the identification of eight contigs of between 380 and 980 nucleotides (nt) sharing significant identity with alfalfa mosaic virus (AMV) genomic RNAs. No other viral contigs were identified. Mapping of reads on AMV genomic RNAs identified 4,209 AMV reads (1.26% of total reads) and allowed the scaffolding of the contigs into three scaffolds corresponding to the three AMV genomic RNAs. To complete the sequence of the AMV chayote isolate genome (named See-1), primers were designed from the contig sequences and used to amplify rapid amplification of cDNA ends (RACE) polymerase chain reaction (PCR) products spanning the 5′ and 3′ terminal regions of the three genomic RNAs using the SMARTer RACE cDNA Amplification Kit (Clontech, China). All amplicons were cloned into the pGEM-T vector (Promega, U.S.A.) and sequenced (three clones for each amplicon) by Microsynth Seqlab (Microsynth, Switzerland). Finally, the complete genomic sequences of the three RNAs were assembled by MacVector 17.5 (MacVector, U.S.A.). The RNA1, RNA2, and RNA3 of See-1 are 3,643, 2,593, and 2,037 nt, respectively (GenBank accession nos. MT093209 to MT093211) and share the highest nt sequence identity with the RNA1 and RNA3 of AMV isolate (HZ) from tobacco (99.5% for RNA1, HQ316635; 98.7% for RNA3, HQ316637) and with the RNA2 of isolate AMV-Gym from Gynostemma pentaphyllum (98.1%, MH332898), both from China. AMV isolate See-1 was classified as belonging to subgroup I based on the presence of a BamH I and two AvaII sites in the CP open reading frame (Parrella et al. 2000). Reverse transcription PCR, using primers targeting the CP gene (Parrella et al. 2000), confirmed AMV infection in three symptomatic chayote plants including that used for Illumina sequencing, with 100% of nt sequence identity of amplicons. Three plants each of Chenopodium amaranticolor, Nicotiana benthamiana, and Solanum lycopersicon were mechanically inoculated with sap from isolate See-1 infected plant, leading to the appearance of typical AMV symptoms in all three hosts 10 days postinoculation (Jaspars and Bos 1980). This note describes the first detection of AMV in chayote in Italy and, to the best of our knowledge, in the world. In some areas of Southern Italy, climatic conditions are favorable enough to allow chayote development in the wild. Further studies would be desirable to determine the distribution and incidence of AMV in chayote and to understand the possibility that this species may play a role in AMV epidemiology, representing a threat to other susceptible crops.",2021-03-01 +,First Report of Fusarium proliferatum Causing Sheath Rot Disease of Rice in Eastern India,"Sheath rot is one of the most devastating diseases of rice because of its ability to reduce the yield significantly in all rice cultivating areas of the world (Bigirimana et al. 2015). Sheath rot disease is associated with various pathogens such as Sarocladium oryzae, Fusarium fujikuroi complex, and Pseudomonas fuscovaginae (Bigirimana et al. 2015). Hence, this disease has become more complex in nature and added more seriousness. From September to December 2018, plants were observed with typical sheath rot symptoms in a research farm of ICAR – National Rice Research Institute and 10 farmer’s fields of Cuttack district, Odisha, Eastern India. About 25 to 37% of sheath rot disease severity was recorded in the infected field. Diseased plants were observed with symptoms such as brownish or reddish-brown irregular lesions, which later became enlarged with grayish centers. Further, rotting of the topmost leaf sheaths that surround the young panicle was observed. At the severe stages, the young panicle was partially emerged from the sheath or completely rotted within the sheath. The white to pinkish powdery growth observed inside the infected sheath led to chaffy and discolored grains. The sheath rot symptomatic plants were collected from the infected fields. To isolate the causal pathogen, infected sheath tissues were surface sterilized in 1% sodium hypochlorite for 2 min, rinsed three times in sterile distilled water, and placed on potato dextrose agar medium (PDA) (HiMedia). Plates were incubated at 27 ± 1°C for 3 days. Further, fungal pathogen colonies were subcultured and purified to perform the pathogenicity test. On PDA, the colonies produced abundant white aerial mycelium with violet to pink pigmentation, and hyphae were hyaline with septation. Abundant single-celled, oval-shaped microconidia (5.5 to 9 × 1.5 to 2 μm) were produced, whereas macroconidia were not produced, and the fungal pathogen was tentatively identified as Fusarium sp. In order to characterize the pathogen at a molecular level, ITS, alpha elongation factor gene (EF1-α), RNA polymerase II largest-subunit gene (RPB2), and calmodulin gene (cld) were amplified using the primer pair ITS1/ITS4, EF1/EF2, 5F/7CR, and CLPRO1/CLPRO2, respectively, and PCR amplicons were subjected to sequencing (Chang et al. 2015; O’Donnell et al. 1998; White et al. 1990). Furthermore, a species-specific primer, Fp3-F/Fp4-R, was used to identify the pathogen (Jurado et al. 2006). The resulting sequences were confirmed by BLAST analysis and the FUSARIUM-ID database (http://isolate.fusariumdb.org/blast.php). BLASTn search showed 100% similarity between the query sequence and ITS, EF1-α, RPB2, and calmodulin gene sequences of Fusarium proliferatum available in GenBank. The following GenBank accession numbers were obtained: MT394055 for ITS, MT439867 for EF1-α, MT790774 for calmodulin, MT940224 for RPB2, and MT801050 for species-specific to F. proliferatum. To confirm the pathogenicity under glass house conditions, fungus grown on sterilized chaffy grains was placed in between boot leaf sheath and panicle and covered with moist cotton (Saravanakumar et al. 2009). After 15 days postinoculation, rotting symptoms were observed, and these were similar to field symptoms. Pathogen was constantly reisolated from symptomatic tissue, satisfying Koch’s postulates. Disease symptoms were not observed on uninoculated plants. Morphological characters, pathogenicity testing, and molecular characterization have identified the pathogen as F. proliferatum. To the best of our knowledge, this is the first confirmed report of F. proliferatum causing sheath rot disease on rice from Eastern India.",2021-03-01 +33109630,Checkpoint therapeutic target database (CKTTD): the first comprehensive database for checkpoint targets and their modulators in cancer immunotherapy. ,"Checkpoint targets play a key role in tumor-mediated immune escape and therefore are critical for cancer immunotherapy. Unfortunately, there is a lack of bioinformatics resource that compile all the checkpoint targets for translational research and drug discovery in immuno-oncology. To this end, we developed checkpoint therapeutic target database (CKTTD), the first comprehensive database for immune checkpoint targets (proteins, miRNAs and LncRNAs) and their modulators. A scoring system was adopted to filter more relevant targets with high confidence. In addition, a few biological databases such as Oncomine, Drugbank, miRBase and Lnc2Cancer database were integrated into CKTTD to provide an in-depth information. Moreover, we computed and provided ligand-binding site information for all the targets which may support bench scientists for drug discovery efforts. In total, CKTTD compiles 105 checkpoint protein targets, 53 modulators (small-molecules and antibody), 30 miRNAs and 18 LncRNAs in cancer immunotherapy with validated experimental evidences curated from 10 649 literatures via an enhanced text-mining system. In conclusion, the CKTTD may serve as a useful platform for the research of cancer immunotherapy and drug discovery. The CKTTD database is freely available to public at http://www.ckttdb.org/.",2020-10-01 +33305409,Drought responsiveness in black pepper (Piper nigrum L.): Genes associated and development of a web-genomic resource.,"Black pepper (Piper nigrum L.; 2n = 52; Piperaceae), the king of spices, is a perennial, trailing woody flowering vine and has global importance with widespread dietary, medicinal, and preservative uses. It is an economically important germplasm cultivated for its fruit and the major cash crop in >30 tropical countries. Crop production is mainly affected by drought stress. The present study deals with the candidate gene identification from drought-affected black pepper leaf transcriptome generated by Illumina Hiseq2000. It also aims to mine putative molecular markers (namely SSRs, SNPs, and InDels) and generate primers for them. The identification of transcription factors and pathways involved in drought tolerance is also reported here. De novo transcriptome assembly was performed with trinity assembler. In total, 4914 differential expressed genes, 2110 transcriptional factors, 786 domains and 1137 families, 20,124 putative SSR markers, and 259,236 variants were identified. At2g30105 (unidentified gene containing leucine-rich repeats and ubiquitin-like domain), serine threonine protein kinase, Mitogen-activated protein kinase, Nucleotide Binding Site-Leucine Rich Repeat, Myeloblastosis-related proteins, basic helix-loop-helix are all found upregulated and are reported to be associated with plant tolerance against drought condition. All these information are catalogued in the Black Pepper Drought Transcriptome Database (BPDRTDb), freely accessible for academic use at http://webtom.cabgrid.res.in/bpdrtdb/. This database is a good foundation for the genetic improvement of pepper plants, breeding programmes, and mapping population of this crop. Putative markers can also be a reliable genomic resource to develop drought-tolerant variety for better black pepper productivity.",2020-12-20 +34058988,A tool for analyzing and visualizing ribo-seq data at the isoform level.,"

Background

Translational regulation is one important aspect of gene expression regulation. Dysregulation of translation results in abnormal cell physiology and leads to diseases. Ribosome profiling (RP), also called ribo-seq, is a powerful experimental technique to study translational regulation. It can capture a snapshot of translation by deep sequencing of ribosome-protected mRNA fragments. Many ribosome profiling data processing tools have been developed. However, almost all tools analyze ribosome profiling data at the gene level. Since different isoforms of a gene may produce different proteins with distinct biological functions, it is advantageous to analyze ribosome profiling data at the isoform level. To meet this need, previously we developed a pipeline to analyze 610 public human ribosome profiling data at the isoform level and constructed HRPDviewer database.

Results

To allow other researchers to use our pipeline as well, here we implement our pipeline as an easy-to-use software tool called RPiso. Compared to Ribomap (a widely used tool which provides isoform-level ribosome profiling analyses), our RPiso (1) estimates isoform abundance more accurately, (2) supports analyses on more species, and (3) provides a web-based viewer for interactively visualizing ribosome profiling data on the selected mRNA isoforms.

Conclusions

In this study, we developed RPiso software tool ( http://cosbi7.ee.ncku.edu.tw/RPiso/ ) to provide isoform-level ribosome profiling analyses. RPiso is very easy to install and execute. RPiso also provides a web-based viewer for interactively visualizing ribosome profiling data on the selected mRNA isoforms. We believe that RPiso is a useful tool for researchers to analyze and visualize their own ribosome profiling data at the isoform level.",2021-05-25 +34657619,Laparoscopic nerve‑sparing radical hysterectomy for the treatment of cervical cancer: a meta-analysis of randomized controlled trials.,"

Background

The effects and safety of laparoscopic nerve‑sparing radical hysterectomy (LNSRH) and laparoscopic radical hysterectomy (LRH) in cervical cancer treatment remain unclear. This article aims to evaluate the role of LNSRH versus LRH in the treatment of cervical cancer. This is because the updated meta-analysis with synthesized data may provide more reliable evidence on the role of LNSRH and LRH.

Methods

We searched Pubmed et al. databases for randomized controlled trials (RCTs) involving laparoscopic nerve‑sparing radical hysterectomy (LNSRH) and laparoscopic radical hysterectomy (LRH) for cervical cancer treatment from the inception of databases to June 15, 2021. The RevMan 5.3 software was used for data analyses. This meta-analysis protocol had been registered online (available at: https://inplasy.com/inplasy-2021-9-0047/ ).

Results

Thirteen RCTs involving a total of 1002 cervical cancer patients were included. Synthesized results indicated that the duration of surgery of the LNSRH group was significantly longer than that of the LRH group [SMD 1.11, 95% CI (0.15 ~ 2.07), P = 0.02]. The time to intestinal function recovery [SMD -1.27, 95% CI (-1.84 ~ -0.69), P < 0.001] and the time to postoperative urinary catheter removal of the LNSRH group [SMD -1.24, 95% CI (-1.62 ~ -0.86), P < 0.001] were significantly less than that of the LRH group. There were no significant differences in the estimated blood loss [SMD 0.10, 95% CI (-0.14 ~ 0.34), P = 0.41], the length of parauterine tissue resection [SMD -0.10, 95% CI (-0.25 ~ 0.05), P = 0.19], length of vaginal excision [SMD 0.04, 95% CI (-0.26 ~ 0.34), P = 0.78], and incidence of intraoperative adverse events [RR 0.97, 95% CI (0.44 ~ 2.13), P = 0.94] between the LNSRH group and the LRH group.

Conclusions

LNSRH significantly results in earlier bladder and bowel function after surgery. Limited by sample size, LNSRH should be considered with caution in the future.",2021-10-18 +34285236,"Dating historical droughts from religious ceremonies, the international pro pluvia rogation database.","Climate proxy data are required for improved understanding of climate variability and change in the pre-instrumental period. We present the first international initiative to compile and share information on pro pluvia rogation ceremonies, which is a well-studied proxy of agricultural drought. Currently, the database has more than 3500 dates of celebration of rogation ceremonies, providing information for 153 locations across 11 countries spanning the period from 1333 to 1949. This product provides data for better understanding of the pre-instrumental drought variability, validating natural proxies and model simulations, and multi-proxy rainfall reconstructions, amongst other climatic exercises. The database is freely available and can be easily accessed and visualized via http://inpro.unizar.es/ .",2021-07-20 +34006627,DOE JGI Metagenome Workflow. ,"The DOE Joint Genome Institute (JGI) Metagenome Workflow performs metagenome data processing, including assembly; structural, functional, and taxonomic annotation; and binning of metagenomic data sets that are subsequently included into the Integrated Microbial Genomes and Microbiomes (IMG/M) (I.-M. A. Chen, K. Chu, K. Palaniappan, A. Ratner, et al., Nucleic Acids Res, 49:D751-D763, 2021, https://doi.org/10.1093/nar/gkaa939) comparative analysis system and provided for download via the JGI data portal (https://genome.jgi.doe.gov/portal/). This workflow scales to run on thousands of metagenome samples per year, which can vary by the complexity of microbial communities and sequencing depth. Here, we describe the different tools, databases, and parameters used at different steps of the workflow to help with the interpretation of metagenome data available in IMG and to enable researchers to apply this workflow to their own data. We use 20 publicly available sediment metagenomes to illustrate the computing requirements for the different steps and highlight the typical results of data processing. The workflow modules for read filtering and metagenome assembly are available as a workflow description language (WDL) file (https://code.jgi.doe.gov/BFoster/jgi_meta_wdl). The workflow modules for annotation and binning are provided as a service to the user community at https://img.jgi.doe.gov/submit and require filling out the project and associated metadata descriptions in the Genomes OnLine Database (GOLD) (S. Mukherjee, D. Stamatis, J. Bertsch, G. Ovchinnikova, et al., Nucleic Acids Res, 49:D723-D733, 2021, https://doi.org/10.1093/nar/gkaa983).IMPORTANCE The DOE JGI Metagenome Workflow is designed for processing metagenomic data sets starting from Illumina fastq files. It performs data preprocessing, error correction, assembly, structural and functional annotation, and binning. The results of processing are provided in several standard formats, such as fasta and gff, and can be used for subsequent integration into the Integrated Microbial Genomes and Microbiomes (IMG/M) system where they can be compared to a comprehensive set of publicly available metagenomes. As of 30 July 2020, 7,155 JGI metagenomes have been processed by the DOE JGI Metagenome Workflow. Here, we present a metagenome workflow developed at the JGI that generates rich data in standard formats and has been optimized for downstream analyses ranging from assessment of the functional and taxonomic composition of microbial communities to genome-resolved metagenomics and the identification and characterization of novel taxa. This workflow is currently being used to analyze thousands of metagenomic data sets in a consistent and standardized manner.",2021-05-18 +34319727,ProBiS-Dock Database: A Web Server and Interactive Web Repository of Small Ligand-Protein Binding Sites for Drug Design.,"We have developed a new system, ProBiS-Dock, which can be used to determine the different types of protein binding sites for small ligands. The binding sites identified this way are then used to construct a new binding site database, the ProBiS-Dock Database, that allows for the ranking of binding sites according to their utility for drug development. The newly constructed database currently has more than 1.4 million binding sites and offers the possibility to investigate potential drug targets originating from different biological species. The interactive ProBiS-Dock Database, a web server and repository that consists of all small-molecule ligand binding sites in all of the protein structures in the Protein Data Bank, is freely available at http://probis-dock-database.insilab.org. The ProBiS-Dock Database will be regularly updated to keep pace with the growth of the Protein Data Bank, and our anticipation is that it will be useful in drug discovery.",2021-07-28 +34551573,Development of SAB model for predicting mortality in intensive care unit after aortic aneurysm surgery.,"

Background

Aortic aneurysm (AA) patients after vascular surgery are at high risk of death, some of them need intensive care. Our aim was to develop a simplified model with baseline data within 24 hours of intensive care unit (ICU) admission to early predict mortality.

Methods

Univariate analysis and least absolute shrinkage and selection operator were used to select important variables, which were then taken into logistic regression to fit the model. Discrimination and validation were used to evaluate the performance of the model. Bootstrap method was conducted to perform internal validation. Finally, decision clinical analysis curve was used to test the clinical usefulness of the model.

Results

We obtained baseline data of 482 AA patients from Medical Information Mart for Intensive Care III database, 33 (6.8%) of whom died in ICU. Our final model contained three variables and was called SAB model based on initials of three items [Sepsis, Anion gap, Bicarbonate (SAB)]. Area under the curve of SAB was 0.904 (95% CI: 0.841-0.967) while brier score was 0.043 (95% CI: 0.028-0.057). After internal validation, corrected area under the curve was 0.898 and brier score was 0.045, which showed good prediction ability of SAB model. The model can be assessed on https://vascularmodel.shinyapps.io/AorticAneurysm/.

Conclusions

SAB model derived in this study can be easily used to predict in-ICU mortality of AA patients after surgery precisely.",2021-09-13 +31029701,Codon and Codon-Pair Usage Tables (CoCoPUTs): Facilitating Genetic Variation Analyses and Recombinant Gene Design.,"Usage of sequential codon-pairs is non-random and unique to each species. Codon-pair bias is related to but clearly distinct from individual codon usage bias. Codon-pair bias is thought to affect translational fidelity and efficiency and is presumed to be under the selective pressure. It was suggested that changes in codon-pair utilization may affect human disease more significantly than changes in single codons. Although recombinant gene technologies often take codon-pair usage bias into account, codon-pair usage data/tables are not readily available, thus potentially impeding research efforts. The present computational resource (https://hive.biochemistry.gwu.edu/review/codon2) systematically addresses this issue. Building on our recent HIVE-Codon Usage Tables, we constructed a new database to include genomic codon-pair and dinucleotide statistics of all organisms with sequenced genome, available in the GenBank. We believe that the growing understanding of the importance of codon-pair usage will make this resource an invaluable tool to many researchers in academia and pharmaceutical industry.",2019-04-26 +33290554,"PANTHER version 16: a revised family classification, tree-based classification tool, enhancer regions and extensive API.","PANTHER (Protein Analysis Through Evolutionary Relationships, http://www.pantherdb.org) is a resource for the evolutionary and functional classification of protein-coding genes from all domains of life. The evolutionary classification is based on a library of over 15,000 phylogenetic trees, and the functional classifications include Gene Ontology terms and pathways. Here, we analyze the current coverage of genes from genomes in different taxonomic groups, so that users can better understand what to expect when analyzing a gene list using PANTHER tools. We also describe extensive improvements to PANTHER made in the past two years. The PANTHER Protein Class ontology has been completely refactored, and 6101 PANTHER families have been manually assigned to a Protein Class, providing a high level classification of protein families and their genes. Users can access the TreeGrafter tool to add their own protein sequences to the reference phylogenetic trees in PANTHER, to infer evolutionary context as well as fine-grained annotations. We have added human enhancer-gene links that associate non-coding regions with the annotated human genes in PANTHER. We have also expanded the available services for programmatic access to PANTHER tools and data via application programming interfaces (APIs). Other improvements include additional plant genomes and an updated PANTHER GO-slim.",2021-01-01 +32953953,Shotgun metagenomic data of microbiomes on plastic fabrics exposed to harsh tropical environments.,"The development of more affordable high-throughput DNA sequencing technologies and powerful bioinformatics is making of shotgun metagenomics a common tool for effective characterization of microbiomes and robust functional genomics. A shotgun metagenomic approach was applied in the characterization of microbial communities associated with plasticized fabric materials exposed to a harsh tropical environment for 14 months. High-throughput sequencing of TruSeq paired-end libraries was conducted using a whole-genome shotgun (WGS) approach on an Illumina HiSeq2000 platform generating 100 bp reads. A multifaceted bioinformatics pipeline was developed and applied to conduct quality control and trimming of raw reads, microbial classification, assembly of multi-microbial genomes, binning of assembled contigs to individual genomes, and prediction of microbial genes and proteins. The bioinformatic analysis of the large 161 Gb sequence dataset generated 3,314,688 contigs and 120 microbial genomes. The raw metagenomic data and the detailed description of the bioinformatics pipeline applied in data analysis provide an important resource for the genomic characterization of microbial communities associated with biodegraded plastic fabric materials. The raw shotgun metagenomics sequence data of microbial communities on plastic fabric materials have been deposited in MG-RAST (https://www.mg-rast.org/) under accession numbers: mgm4794685.3-mgm4794690.3. The datasets and raw data presented here were associated with the main research work ""Metagenomic characterization of microbial communities on plasticized fabric materials exposed to harsh tropical environments"" (Radwan et al., 2020).",2020-08-24 +34816039,The ever-changing landscape in modern dentistry therapeutics - Enhancing the emptying quiver of the periodontist.,"

Introduction/objectives

Periodontitis comprises of a wide range of inflammatory conditions of the gums leading to soft tissue damage and attachment loss. The initiation of periodontitis constitutes a rather complex disease pathogenesis which is based on pathogenic shifts of the oral microbiota combined with the host-microbiome interactions. The severity of the periodontitis is multifactorial depending on genetic, environmental, as well as host immunity factors.

Data and sources

To make an inclusive analysis on the periodontitis therapeutics, reading of the recent relevant literature was carried out using the MEDLINE/PubMed database, Google Scholar and the NIH public online database for clinical trials (http://www.clinicaltrials.gov).

Conclusions

Tackling the inflammation associated periodontal defects can be succeeded with conventional therapy or resective and regenerative treatment. To date, the mechanical removal of the supragingival and subgingival biofilm is considered the ""gold standard"" of periodontal therapy in combination with the use of antibacterial compounds. The antimicrobial resistance phenomenon tends to turn all the currently applied antibacterials into ""endangered species"". Ongoing efforts through the conduct of clinical trials should be focused on understanding the advantages of modern approaches in comparison to traditional therapies.",2021-11-10 +32386544,The Allen Mouse Brain Common Coordinate Framework: A 3D Reference Atlas.,"Recent large-scale collaborations are generating major surveys of cell types and connections in the mouse brain, collecting large amounts of data across modalities, spatial scales, and brain areas. Successful integration of these data requires a standard 3D reference atlas. Here, we present the Allen Mouse Brain Common Coordinate Framework (CCFv3) as such a resource. We constructed an average template brain at 10 μm voxel resolution by interpolating high resolution in-plane serial two-photon tomography images with 100 μm z-sampling from 1,675 young adult C57BL/6J mice. Then, using multimodal reference data, we parcellated the entire brain directly in 3D, labeling every voxel with a brain structure spanning 43 isocortical areas and their layers, 329 subcortical gray matter structures, 81 fiber tracts, and 8 ventricular structures. CCFv3 can be used to analyze, visualize, and integrate multimodal and multiscale datasets in 3D and is openly accessible (https://atlas.brain-map.org/).",2020-05-07 +32351388,TCMIO: A Comprehensive Database of Traditional Chinese Medicine on Immuno-Oncology.,"Advances in immuno-oncology (IO) are making immunotherapy a powerful tool for cancer treatment. With the discovery of an increasing number of IO targets, many herbs or ingredients from traditional Chinese medicine (TCM) have shown immunomodulatory function and antitumor effects via targeting the immune system. However, knowledge of underlying mechanisms is limited due to the complexity of TCM, which has multiple ingredients acting on multiple targets. To address this issue, we present TCMIO, a comprehensive database of Traditional Chinese Medicine on Immuno-Oncology, which can be used to explore the molecular mechanisms of TCM in modulating the cancer immune microenvironment. Over 120,000 small molecules against 400 IO targets were extracted from public databases and the literature. These ligands were further mapped to the chemical ingredients of TCM to identify herbs that interact with the IO targets. Furthermore, we applied a network inference-based approach to identify the potential IO targets of natural products in TCM. All of these data, along with cheminformatics and bioinformatics tools, were integrated into the publicly accessible database. Chemical structure mining tools are provided to explore the chemical ingredients and ligands against IO targets. Herb-ingredient-target networks can be generated online, and pathway enrichment analysis for TCM or prescription is available. This database is functional for chemical ingredient structure mining and network analysis for TCM. We believe that this database provides a comprehensive resource for further research on the exploration of the mechanisms of TCM in cancer immunity and TCM-inspired identification of novel drug leads for cancer immunotherapy. TCMIO can be publicly accessed at http://tcmio.xielab.net.",2020-04-15 +34266386,MitoTox: a comprehensive mitochondrial toxicity database.,"

Background

Mitochondria play essential roles in regulating cellular functions. Some drug treatments and molecular interventions have been reported to have off-target effects damaging mitochondria and causing severe side effects. The development of a database for the management of mitochondrial toxicity-related molecules and their targets is important for further analyses.

Results

To correlate chemical, biological and mechanistic information on clinically relevant mitochondria-related toxicity, a comprehensive mitochondrial toxicity database (MitoTox) was developed. MitoTox is an electronic repository that integrates comprehensive information about mitochondria-related toxins and their targets. Information and data related to mitochondrial toxicity originate from various sources, including scientific journals and other electronic databases. These resources were manually verified and extracted into MitoTox. The database currently contains over 1400 small-molecule compounds, 870 mitochondrial targets, and more than 4100  mitochondrial toxin-target associations. Each MitoTox data record contains over 30 fields, including biochemical properties, therapeutic classification, target proteins, toxicological data, mechanistic information, clinical side effects, and references.

Conclusions

MitoTox provides a fully searchable database with links to references and other databases. Potential applications of MitoTox include toxicity classification, prediction, reference and education. MitoTox is available online at http://www.mitotox.org .",2021-07-15 +33963857,Proteo3Dnet: a web server for the integration of structural information with interactomics data.,"Proteo3Dnet is a web server dedicated to the analysis of mass spectrometry interactomics experiments. Given a flat list of proteins, its aim is to organize it in terms of structural interactions to provide a clearer overview of the data. This is achieved using three means: (i) the search for interologs with resolved structure available in the protein data bank, including cross-species remote homology search, (ii) the search for possibly weaker interactions mediated through Short Linear Motifs as predicted by ELM-a unique feature of Proteo3Dnet, (iii) the search for protein-protein interactions physically validated in the BioGRID database. The server then compiles this information and returns a graph of the identified interactions and details about the different searches. The graph can be interactively explored to understand the way the core complexes identified could interact. It can also suggest undetected partners to the experimentalists, or specific cases of conditionally exclusive binding. The interest of Proteo3Dnet, previously demonstrated for the difficult cases of the proteasome and pragmin complexes data is, here, illustrated in the context of yeast precursors to the small ribosomal subunits and the smaller interactome of 14-3-3zeta frequent interactors. The Proteo3Dnet web server is accessible at http://bioserv.rpbs.univ-paris-diderot.fr/services/Proteo3Dnet/.",2021-07-01 +32803238,Integrative genomics approach identifies conserved transcriptomic networks in Alzheimer's disease.,"Alzheimer's disease (AD) is a devastating neurological disorder characterized by changes in cell-type proportions and consequently marked alterations of the transcriptome. Here we use a data-driven systems biology meta-analytical approach across three human AD cohorts, encompassing six cortical brain regions, and integrate with multi-scale datasets comprising of DNA methylation, histone acetylation, transcriptome- and genome-wide association studies and quantitative trait loci to further characterize the genetic architecture of AD. We perform co-expression network analysis across more than 1200 human brain samples, identifying robust AD-associated dysregulation of the transcriptome, unaltered in normal human aging. We assess the cell-type specificity of AD gene co-expression changes and estimate cell-type proportion changes in human AD by integrating co-expression modules with single-cell transcriptome data generated from 27 321 nuclei from human postmortem prefrontal cortical tissue. We also show that genetic variants of AD are enriched in a microglial AD-associated module and identify key transcription factors regulating co-expressed modules. Additionally, we validate our results in multiple published human AD gene expression datasets, which can be easily accessed using our online resource (https://swaruplab.bio.uci.edu/consensusAD).",2020-10-01 +30364952,Plant editosome database: a curated database of RNA editosome in plants.,"RNA editing plays an important role in plant development and growth, enlisting a number of editing factors in the editing process and accordingly revealing the diversity of plant editosomes for RNA editing. However, there is no resource available thus far that integrates editosome data for a variety of plants. Here, we present Plant Editosome Database (PED; http://bigd.big.ac.cn/ped), a curated database of RNA editosome in plants that is dedicated to the curation, integration and standardization of plant editosome data. Unlike extant relevant databases, PED incorporates high-quality editosome data manually curated from related publications and organelle genome annotations. In the current version, PED integrates a complete collection of 98 RNA editing factors and 20 836 RNA editing events, covering 203 organelle genes and 1621 associated species. In addition, it contains functional effects of editing factors in regulating plant phenotypes and includes detailed experimental evidence. Together, PED serves as an important resource to help researchers investigate the RNA editing process across a wide range of plants and thus would be of broad utility for the global plant research community.",2019-01-01 +30445541,"SIFTS: updated Structure Integration with Function, Taxonomy and Sequences resource allows 40-fold increase in coverage of structure-based annotations for proteins.","The Structure Integration with Function, Taxonomy and Sequences resource (SIFTS; http://pdbe.org/sifts/) was established in 2002 and continues to operate as a collaboration between the Protein Data Bank in Europe (PDBe; http://pdbe.org) and the UniProt Knowledgebase (UniProtKB; http://uniprot.org). The resource is instrumental in the transfer of annotations between protein structure and protein sequence resources through provision of up-to-date residue-level mappings between entries from the PDB and from UniProtKB. SIFTS also incorporates residue-level annotations from other biological resources, currently comprising the NCBI taxonomy database, IntEnz, GO, Pfam, InterPro, SCOP, CATH, PubMed, Ensembl, Homologene and automatic Pfam domain assignments based on HMM profiles. The recently released implementation of SIFTS includes support for multiple cross-references for proteins in the PDB, allowing mappings to UniProtKB isoforms and UniRef90 cluster members. This development makes structure data in the PDB readily available to over 1.8 million UniProtKB accessions.",2019-01-01 +34791104,Prototheca-ID: a web-based application for molecular identification of Prototheca species. ,"The genus Prototheca houses unicellular, achlorophyllous, yeast-like algae, widely distributed in the environment. Protothecae are the only known plants that have repeatedly been reported to infect vertebrates, including humans. Although rare, protothecosis can be clinically demanding, with an unpredictable and treatment-resistant behavior. Accurate identification of Prototheca species relies upon DNA sequence-based typing of the mitochondrially encoded CYTB gene. However, no bioinformatic tool for the processing and analyzing of protothecal sequence data exists. Moreover, currently available sequence databases suffer from a limited number of records and lack of or flawed sequence annotations, making Prototheca identification challenging and often inconclusive. This report introduces the Prototheca-ID, a user-friendly, web-based application providing fast and reliable speciation of Prototheca isolates. In addition, the application offers the users the possibility of depositing their sequences and associated metadata in a fully open Prototheca-ID database, developed to enhance research integrity and quality in the field of Protothecae and protothecosis. Database URL: The Prototheca-ID application is available at https://prototheca-id.org.",2021-11-01 +34698891,Mouse Genome Informatics (MGI): latest news from MGD and GXD.,"The Mouse Genome Informatics (MGI) database system combines multiple expertly curated community data resources into a shared knowledge management ecosystem united by common metadata annotation standards. MGI's mission is to facilitate the use of the mouse as an experimental model for understanding the genetic and genomic basis of human health and disease. MGI is the authoritative source for mouse gene, allele, and strain nomenclature and is the primary source of mouse phenotype annotations, functional annotations, developmental gene expression information, and annotations of mouse models with human diseases. MGI maintains mouse anatomy and phenotype ontologies and contributes to the development of the Gene Ontology and Disease Ontology and uses these ontologies as standard terminologies for annotation. The Mouse Genome Database (MGD) and the Gene Expression Database (GXD) are MGI's two major knowledgebases. Here, we highlight some of the recent changes and enhancements to MGD and GXD that have been implemented in response to changing needs of the biomedical research community and to improve the efficiency of expert curation. MGI can be accessed freely at http://www.informatics.jax.org .",2021-10-26 +34127402,Immu-Mela: An open resource for exploring immunotherapy-related multidimensional genomic profiles in melanoma.,"There are increasing studies aimed to reveal genomic hallmarks predictive of immune checkpoint blockade (ICB) treatment response, which generated a large number of data and provided an unprecedented opportunity to identify response-related features and evaluate their robustness across cohorts. However, those valuable data sets are not easily accessible to the research community. To take full advantage of existing large-scale immuno-genomic profiles, we developed Immu-Mela (http://bioinfo.vanderbilt.edu/database/Immu-Mela/), a multidimensional immuno-genomic portal that provides interactive exploration of associations between ICB responsiveness and multi-omics features in melanoma, including genetic, transcriptomics, immune cells, and single-cell populations. Immu-Mela also enables integrative analysis of any two genomic features. We demonstrated the value of Immu-Mela by identifying known and novel genomic features associated with ICB response. In addition, Immu-Mela allows users to upload their data sets (unrestricted to any cancer types) and co-analyze with existing data to identify and validate signatures of interest. Immu-Mela reduces barriers between researchers and complex genomic data, facilitating discoveries in cancer immunotherapy.",2021-05-14 +32891434,Handle-On-QOL: a dedicated quality of life resource following the diagnosis and treatment of head and neck cancer.,"Measuring quality of life (QOL) after head and neck cancer (HNC), is rapidly becoming the standard of care. The Head and Neck Database Listing Evidence on QOL (Handle-On-QOL) is a dedicated QOL resource that includes articles published from 1982 onwards. The aim of this study was to assess the completeness of Handle-On-QOL, when compared with other non-specific search engines. Six years were selected at random; 1982, 1990, 1998, 2003, 2006, 2016. Four search engines were used (Medline, EMBASE, CINAHL, PsycINFO). Reporting followed PRISMA methodology. A total of 595 papers were assessed, of which 200 met the inclusion criteria. 186 papers were present on Handle-On-QOL, 243 were found on Handle-On-QOL for these six years, but not identified in the other searches, and 14 were missing from Handle-On-QOL. A search using standard engines generated a large number of irrelevant papers. Handle-On-QOL provides a comprehensive and accurate reflection of articles published using questionnaires to report QOL following HNC. This web-based repository (http://www.handle-on-qol.com) acts as a quick reference point for clinicians and researchers.",2020-09-02 +31667505,IPD-IMGT/HLA Database.,"The IPD-IMGT/HLA Database, http://www.ebi.ac.uk/ipd/imgt/hla/, currently contains over 25 000 allele sequence for 45 genes, which are located within the Major Histocompatibility Complex (MHC) of the human genome. This region is the most polymorphic region of the human genome, and the levels of polymorphism seen exceed most other genes. Some of the genes have several thousand variants and are now termed hyperpolymorphic, rather than just simply polymorphic. The IPD-IMGT/HLA Database has provided a stable, highly accessible, user-friendly repository for this information, providing the scientific and medical community access to the many variant sequences of this gene system, that are critical for the successful outcome of transplantation. The number of currently known variants, and dramatic increase in the number of new variants being identified has necessitated a dedicated resource with custom tools for curation and publication. The challenge for the database is to continue to provide a highly curated database of sequence variants, while supporting the increased number of submissions and complexity of sequences. In order to do this, traditional methods of accessing and presenting data will be challenged, and new methods will need to be utilized to keep pace with new discoveries.",2020-01-01 +30903148,Differential proteostatic regulation of insoluble and abundant proteins.,"MOTIVATION:Despite intense effort, it has been difficult to explain chaperone dependencies of proteins from sequence or structural properties. RESULTS:We constructed a database collecting all publicly available data of experimental chaperone interaction and dependency data for the Escherichia coli proteome, and enriched it with an extensive set of protein-specific as well as cell-context-dependent proteostatic parameters. Employing this new resource, we performed a comprehensive meta-analysis of the key determinants of chaperone interaction. Our study confirms that GroEL client proteins are biased toward insoluble proteins of low abundance, but for client proteins of the Trigger Factor/DnaK axis, we instead find that cellular parameters such as high protein abundance, translational efficiency and mRNA turnover are key determinants. We experimentally confirmed the finding that chaperone dependence is a function of translation rate and not protein-intrinsic parameters by tuning chaperone dependence of Green Fluorescent Protein (GFP) in E.coli by synonymous mutations only. The juxtaposition of both protein-intrinsic and cell-contextual chaperone triage mechanisms explains how the E.coli proteome achieves combining reliable production of abundant and conserved proteins, while also enabling the evolution of diverging metabolic functions. AVAILABILITY AND IMPLEMENTATION:The database will be made available via http://phdb.switchlab.org. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-10-01 +34042771,Introducing a Platform for Integrating and Sharing Stem Cell Research Data.,"Advancements in regenerative medicine have highlighted the need for increased standardization and sharing of stem cell products to help drive these innovative interventions toward public availability and to increase collaboration in the scientific community. Although numerous attempts and numerous databases have been made to store this data, there is still a lack of a platform that incorporates heterogeneous stem cell information into a harmonized project-based framework. The aim of the platform described in this study, ReMeDy, is to provide an intelligent informatics solution which integrates diverse stem cell product characteristics with study subject and omics information. In the resulting platform, heterogeneous data is validated using predefined ontologies and stored in a relational database. In this initial feasibility study, testing of the ReMeDy functionality was performed using published, publically-available induced pluripotent stem cell projects conducted in in vitro, preclinical and intervention evaluations. It demonstrated the robustness of ReMeDy for storing diverse iPSC data, by seamlessly harmonizing diverse common data elements, and the potential utility of this platform for driving knowledge generation from the aggregation of this shared data. Next steps include increasing the number of curated projects by developing a crowdsourcing framework for data upload and an automated pipeline for metadata abstraction. The database is publically accessible at https://remedy.mssm.edu/.",2021-05-01 +34075103,"KAUST Metagenomic Analysis Platform (KMAP), enabling access to massive analytics of re-annotated metagenomic data.","Exponential rise of metagenomics sequencing is delivering massive functional environmental genomics data. However, this also generates a procedural bottleneck for on-going re-analysis as reference databases grow and methods improve, and analyses need be updated for consistency, which require acceess to increasingly demanding bioinformatic and computational resources. Here, we present the KAUST Metagenomic Analysis Platform (KMAP), a new integrated open web-based tool for the comprehensive exploration of shotgun metagenomic data. We illustrate the capacities KMAP provides through the re-assembly of ~ 27,000 public metagenomic samples captured in ~ 450 studies sampled across ~ 77 diverse habitats. A small subset of these metagenomic assemblies is used in this pilot study grouped into 36 new habitat-specific gene catalogs, all based on full-length (complete) genes. Extensive taxonomic and gene annotations are stored in Gene Information Tables (GITs), a simple tractable data integration format useful for analysis through command line or for database management. KMAP pilot study provides the exploration and comparison of microbial GITs across different habitats with over 275 million genes. KMAP access to data and analyses is available at https://www.cbrc.kaust.edu.sa/aamg/kmap.start .",2021-06-01 +33952239,Use of electronic pharmacy transaction data and website development to assess antibiotic use in nursing homes.,"

Background

In 2017, the Centers for Medicare and Medicaid Services required all long-term care facilities, including nursing homes, to have an antibiotic stewardship program. Many nursing homes lack the resources, expertise, or infrastructure to track and analyze antibiotic use measures. Here, we demonstrate that pharmacy invoices are a viable source of data to track and report antibiotic use in nursing homes.

Methods

The dispensing pharmacy working with several nursing homes in the same healthcare corporation provided pharmacy invoices from 2014 to 2016 as files formatted as comma separated values. We aggregated these files by aligning elements into a consistent set of variables and assessed the completeness of data from each nursing home over time. Data cleaning involved removing rows that did not describe systemic medications, de-duplication, consolidating prescription refills, and removing prescriptions for insulin and opioids, which are medications that were not administered at a regular dose or schedule. After merging this cleaned invoice data to nursing home census data including bed days of care and publicly available data characterizing bed allocation for each nursing home, we used the resulting database to describe several antibiotic use metrics and generated an interactive website to permit further analysis.

Results

The resultant database permitted assessment of the following antibiotic use metrics: days of antibiotic therapy, length of antibiotic therapy, rate of antibiotic starts, and the antibiotic spectrum index. Further, we created a template for summarizing data within a facility and comparing across facilities. https://sunahsong.shinyapps.io/USNursingHomes/ .

Conclusions

Lack of resources and infrastructure contributes to challenges facing nursing homes as they develop antibiotic stewardship programs. Our experience with using pharmacy invoice data may serve as a useful approach for nursing homes to track and report antibiotic use.",2021-05-05 +34332522,Pediatric In-Hospital Cardiac Arrest International Registry (PACHIN): protocol for a prospective international multicenter register of cardiac arrest in children.,"

Background and aims

Cardiac arrest (CA) in children is a major public health problem. Thanks to advances in cardiopulmonary resuscitation (CPR) guidelines and teaching skills, results in children have improved. However, pediatric CA has a very high mortality. In the treatment of in-hospital CA there are still multiple controversies. The objective of this study is to develop a multicenter and international registry of in-hospital pediatric cardiac arrest including the diversity of management in different clinical and social contexts. Participation in this register will enable the evaluation of the diagnosis of CA, CPR and post-resuscitation care and its influence in survival and neurological prognosis.

Methods

An intrahospital CA data recording protocol has been designed following the Utstein model. Database is hosted according to European legislation regarding patient data protection. It is drafted in English and Spanish. Invitation to participate has been sent to Spanish, European and Latinamerican hospitals. Variables included, asses hospital characteristics, the resuscitation team, patient's demographics and background, CPR, post-resuscitation care, mortality, survival and long-term evolution. Survival at hospital discharge will be evaluated as a primary outcome and survival with good neurological status as a secondary outcome, analyzing the different factors involved in them. The study design is prospective, observational registry of a cohort of pediatric CA.

Conclusions

This study represents the development of a registry of in-hospital CA in childhood. Its development will provide access to CPR data in different hospital settings and will allow the analysis of current controversies in the treatment of pediatric CA and post-resuscitation care. The results may contribute to the development of further international recommendations. Trial register: ClinicalTrials.gov Identifier: NCT04675918. Registered 19 December 2020 - Retrospectively registered, https://clinicaltrials.gov/ct2/show/record/NCT04675918?cond=pediatric+cardiac+arrest&draw=2&rank=10.",2021-07-31 +34147352,MPSBase: Comprehensive repository of differentially expressed genes for mucopolysaccharidoses.,"Mucopolysaccharidoses (MPS) are lysosomal storage diseases (LSDs) caused by the deficiency of enzymes essential for the metabolism of extracellular matrix components called glycosaminoglycans (GAGs). To understand the physiopathology and alterations due to the lysosomal accumulation resulting from enzymatic deficiencies and their secondary outcomes can improve the diagnosis and treatment of rare genetic diseases. This work presents a database for differentially expressed genes from different public MPS data. We developed our database, including 13 studies previously deposited in the GEO (https://www.ncbi.nlm.nih.gov/geo/). The website is hosted in the UFRGS data processing center (CPD) and is available at . The site was constructed in PHP, and the analyses were performed in R. The organisms represented by the datasets are Canis lupus familiaris, Homo sapiens, Mus musculus, and Rattus norvegicus. The user can search for the differentially expressed genes and ontologies by species, MPS type, or tissue type. For each comparison, a heatmap with the 50 top differentially expressed genes is available as well as dot plots for the 30 top ontologies divided by biological process, cellular component, KEGG pathways, and molecular function. This data is also fully available in tables. There are 54 possible comparisons involving about 5000 to 10,000 genes each. This website is the only specific database for MPS with filtering and presenting their results in a one-click approach to the best of our knowledge. The development of such analytical and automated strategies accessible to health professionals is essential for fostering MPS research. The MPSBase is a web user-friendly, comprehensive repository of differentially expressed genes and ontologies regarding the MPS data.",2021-06-15 +34516226,A Preliminary Investigation of Social Justice Perceptions Among U.S. Speech-Language Pathologists: Clinical Implications.,"Purpose The purpose of this survey research is to provide preliminary data regarding speech-language pathologists' (SLPs') perceptions of the role that social justice (SJ) plays in their work. As our professional organizations call us to advocate and communicate with regulatory agencies and legislative bodies to promote quality care for all individuals, this topic has become particularly important at this time. At present, there is a lack of data in peer-reviewed publications within the discipline of communication disorders on SJ and even less regarding the perceptions of SLPs on SJ. Method The survey was sent to American Speech-Language-Hearing Association (ASHA)-certified SLPs, identified by the ASHA ProFind database, across six U.S. geographic regions, including both urban and rural communities. Four themes were explored through the survey: (a) importance of SJ, (b) awareness of SJ, (c) current practices related to SJ, and (d) barriers to SJ implementation. Results The majority of respondents view SJ as important to the profession (91.2%) and value the work of creating equality among groups (96.0%). Many SLPs are actively involved in implementing SJ principles in their own practice by accepting Medicaid (40.7%), engaging in political outreach (55.0%), and providing transdisciplinary educational outreach (77.9%). Identified barriers to incorporating SJ include time (62.7%), resources (65.6%), and finances (70.0%). Conclusions Working for SJ is important to a majority of the respondents, and various efforts are implemented to create equal opportunities for service to clients. Barriers continue to exist that limit the degree to which SLPs can work toward SJ. A list of actions to be considered in order to promote SJ in the field is provided. Supplemental Material https://doi.org/10.23641/asha.16584044.",2021-09-10 +33497436,FifBase: a comprehensive fertility-associated indicators factor database for domestic animals. ,"Fertility refers to the ability of animals to maintain reproductive function and give birth to offspring, which is an important indicator to measure the productivity of animals. Fertility is affected by many factors, among which environmental factors may also play key roles. During the past years, substantial research studies have been conducted to detect the factors related to fecundity, including genetic factors and environmental factors. However, the identified genes associated with fertility from countless previous studies are randomly dispersed in the literature, whereas some other novel fertility-related genes are needed to detect from omics-based datasets. Here, we constructed a fertility index factor database FifBase based on manually curated published literature and RNA-Seq datasets. During the construction of the literature group, we obtained 3301 articles related to fecundity for 13 species from PubMed, involving 2823 genes, which are related to 75 fecundity indicators or 47 environmental factors. Eventually, 1558 genes associated with fertility were filtered in 10 species, of which 1088 and 470 were from RNA-Seq datasets and text mining data, respectively, involving 2910 fertility-gene pairs and 58 fertility-environmental factors. All these data were cataloged into FifBase (http://www.nwsuaflmz.com/FifBase/), where the fertility-related factor information, including gene annotation and environmental factors, can be browsed, retrieved and downloaded with the user-friendly interface.",2021-09-01 +34566452,A database and checklist of geometrid moths (Lepidoptera) from Colombia.,"

Background

Molecular DNA sequence data allow unprecedented advances in biodiversity assessments, monitoring schemes and taxonomic works, particularly in poorly-explored areas. They allow, for instance, the sorting of material rapidly into operational taxonomic units (such as BINs - Barcode Index Numbers), sequences can be subject to diverse analyses and, with linked metadata and physical vouchers, they can be examined further by experts. However, a prerequisite for their exploitation is the construction of reference libraries of DNA sequences that represent the existing biodiversity. To achieve these goals for Geometridae (Lepidoptera) moths in Colombia, expeditions were carried out to 26 localities in the northern part of the country in 2015-2019. The aim was to collect specimens and sequence their DNA barcodes and to record a fraction of the species richness and occurrences in one of the most biodiversity-rich countries. These data are the beginning of an identification guide to Colombian geometrid moths, whose identities are currently often provisional only, being morpho species or operational taxonomic units (OTUs). Prior to the current dataset, 99 Geometridae sequences forming 44 BINs from Colombia were publicly available on the Barcode of Life Data System (BOLD), covering 20 species only.

New information

We enrich the Colombian Geometridae database significantly by including DNA barcodes, two nuclear markers, photos of vouchers and georeferenced occurrences of 281 specimens of geometrid moths from different localities. These specimens are classified into 80 genera. Analytical tools on BOLD clustered 157 of the mentioned sequences to existing BINs identified to species level, identified earlier by experts. Another 115 were assigned to BINs that were identified to genus or tribe level only. Eleven specimens did not match any existing BIN on BOLD and are, therefore, new additions to the database. It is likely that many BINs represent undescribed species. Nine short sequences (< 500bp) were not assigned to BINs, but identified to the lowest taxonomic category by expert taxonomists and with comparisons of type material photos. The released new genetic information will help to further progress the systematics of Geometridae. An illustrated catalogue of all new records allows validation of our identifications; it is also the first document of this kind for Colombian Geometridae. All specimens are deposited at the Museo de Zoología of Universidad de Sucre (MZUS), North Colombia. DNA BINs are reported in this study through dx.doi.org/10.5883/DS-GEOCO, the species occurrences are available on SIB Colombia https://sibcolombia.net/ and the Global Biodiversity Information Facility (GBIF) https://www.gbif.org/ through https://doi.org/10.15472/ucfmkh.",2021-09-03 +,Paraphyly of the genus Boehmeria (Urticaceae): a response to Liang et al. ‘Relationships among Chinese Boehmeria species and the evolution of various clade’,"Boehmeria, as currently circumscribed, comprises 52 species and has a pantropical distribution. Liang et al. propose a sectional classification of Boehmeria based on the phylogenetic analysis of SNP data for 20 species and an additional 10 subspecific taxa of these at the rank of variety or form. They restrict their sampling to species documented in China. We found many shortcomings in the sampling and analyses which we feel have resulted in a misleading phylogeny for the genus and the economically important fibre-plant, Boehmeria nivea. By sampling only Chinese species of this genus for their in-group and using a single distantly related outgroup, Liang et al. have failed to capture the diversity of the genus and so erroneously concluded that it forms a monophyletic group. Previous published research clearly demonstrates that Boehmeria is paraphyletic and polyphyletic, comprising at least four monophyletic groupings most closely related to several genera within the Boehmerieae. For these reasons, the sections that Liang et al. (Ind Crops Prod 148:112092, 2020. https://doi.org/10.1016/j.indcrop.2020.112092) propose for Boehmeria are not effective tools for its classification. The important fibre-plant, Boehmeria nivea, should therefore not be considered as part of the genus Boehmeria for the purposes of crop breeding, but as sister to Archiboehmeria. Breeding programmes for ramie should therefore focus on populations and germplasm of Archiboehmeria atrata. We conclude that poor taxon sampling, overlooking relevant molecular and taxonomic literature, internal conflict within their SNP data and the overinterpretation of low support values has resulted in the erroneous conclusion that Boehmeria represents a monophyletic or ‘natural’ genus.",2021-02-01 +,"Genesis of Antibiotic Resistance (AR) LXVII: Inverse Correlation of morbidity and mortality rate with social distancing, stay‐home program / shelter‐in program/lockdown – A “Cogent Transmission Model” (CTM) for personal hardiness bolstering herd immunity","As of May 25, 2020, 15:38 GMT, the total number of coronavirus cases across the globe as 5,542,056, deaths: 347,381and recovery of 2,321,556 cases. (https://www.worldometers.info/coronavirus/). We have chosen India as a prototype for the analysis of morbidity and mortality rates to learn about the principles and practices for achieving minimum causality under the most complex practical scenario. The State of Goa where 5 cases / 100,000 with no death reported as of May 25, 2020, 8:21 A.M. At the time of the data analysis, India was on 62nd day of lockdown. Based on the data available at this time, here we present an inverse correlation of decreased morbidity and mortality rate with increased social distancing, stay‐home program / shelter‐in program administered by Indian authorities. It is intriguing that a population of approximately 1.3 billion people, reporting a minimum morbidity and mortality rate (M&M rate) could be an exceptional example of coordination of incredibly diverse cultural and complex administrative fabric. In all, our hypothesis is that it is the herd immunity conferred the protection for an en masse from the current pandemic event. Based on the lessons learned from this current SARS‐CoV‐2, here we present a “Cogent Transmission Model” (CTM) in which community living confer and/or reinforce herd immunity induced protection from the future pandemic of infectious diseases caused by antibiotic resistance bacterial pathogens (ARBP). The success in sustaining a minimum M&M rate could be due in part by the social support system which is a significant element for the development of hardiness to prevent the spread of infectious diseases. The success of the current lockdown is a reflection of the social support system which is intertwined with perceptions of personal control enabling the coping process. Such a level of hardiness is also referred to as “Internal Locus of Control” (ILC). It is our observation that a pattern of consistency on the confidence level of each and every member of the community having considerable control over the events in overcoming the illness and combating the diseases is truly flabbergasting. Such a level of resilience draws a corollary of the hardy personality as identified in the “Personal Hardiness” (J. Pers and Soci. Psych 37, 1–11: 1979). Traits such as a. sense of commitment to self, b. control over their life at the individual level and c. view and make adjustments as challenges arise rather than viewing it as a source of stress. Taken together we suggest that commitment to self, control of self in dynamic social environmental circumstances and readjustment to challenges separate the people with “Personal Hardiness” from the rest of the society as a remarkable trait to overcome the fear of the unknown in a pandemic event (FASEB Journal 2019 33:1_supplement, 483.16‐483.16). It would be a dream come true, should the principles of “Personal Hardiness” disseminated across the globe at all socioeconomic levels in every nook and corner at the earliest possible time, it would definitely vanquish the plausible pandemic of infectious diseases at its blossom.",2021-05-01 +30858555,MorCVD: A Unified Database for Host-Pathogen Protein-Protein Interactions of Cardiovascular Diseases Related to Microbes.,"Microbe induced cardiovascular diseases (CVDs) are less studied at present. Host-pathogen interactions (HPIs) between human proteins and microbial proteins associated with CVD can be found dispersed in existing molecular interaction databases. MorCVD database is a curated resource that combines 23,377 protein interactions between human host and 432 unique pathogens involved in CVDs in a single intuitive web application. It covers endocarditis, myocarditis, pericarditis and 16 other microbe induced CVDs. The HPI information has been compiled, curated, and presented in a freely accessible web interface ( http://morcvd.sblab-nsit.net/About ). Apart from organization, enrichment of the HPI data was done by adding hyperlinked protein ID, PubMed, gene ontology records. For each protein in the database, drug target and interactors (same as well as different species) information has been provided. The database can be searched by disease, protein ID, pathogen name or interaction detection method. Interactions detected by more than one method can also be listed. The information can be presented in tabular form or downloaded. A comprehensive help file has been developed to explain the various options available. Hence, MorCVD acts as a unified resource for retrieval of HPI data for researchers in CVD and microbiology.",2019-03-11 +32117432,"BedSect: An Integrated Web Server Application to Perform Intersection, Visualization, and Functional Annotation of Genomic Regions From Multiple Datasets.","A large number of genomic regions, such as transcription factor binding sites (TFBSs) captured from next generation sequencing (NGS) data analyses or those available from the public resource database ENCODE, are generally overlapped to answer a variety of biological questions. Though several command-line tools are available to perform such an analysis, there is a notable lack of an integrated webserver application with which to identify genomic region intersections, generate publication-ready plots depicting subsets of the overlapped regions, and perform functional annotation. Thus, there is an ardent need for a comprehensive and user-friendly webserver application that allows the users to either upload multiple datasets or select from the integrated Gene Transcription Regulation Database (GTRD). We thus introduce BedSect (http://imgsb.org/bedsect/.), which not only fulfils the above criteria but also performs intersection analysis along with visualization of the intersection regions as an UpSet and correlation plot using the integrated Shiny application. Moreover, analyses, including functional annotation, gene ontology, and biological pathways enrichment for the identified unique and intersected genomic regions, can also be performed using the integrated GREAT tool. To view the genomic regions in the genome browser, the inbuilt hyperlink for UCSC can redirect the user to visualize the results as custom tracks.",2020-02-05 +34325658,Taxallnomy: an extension of NCBI Taxonomy that produces a hierarchically complete taxonomic tree.,"

Background

NCBI Taxonomy is the main taxonomic source for several bioinformatics tools and databases since all organisms with sequence accessions deposited on INSDC are organized in its hierarchical structure. Despite the extensive use and application of this data source, an alternative representation of data as a table would facilitate the use of information for processing bioinformatics data. To do so, since some taxonomic-ranks are missing in some lineages, an algorithm might propose provisional names for all taxonomic-ranks.

Results

To address this issue, we developed an algorithm that takes the tree structure from NCBI Taxonomy and generates a hierarchically complete taxonomic table, maintaining its compatibility with the original tree. The procedures performed by the algorithm consist of attempting to assign a taxonomic-rank to an existing clade or ""no rank"" node when possible, using its name as part of the created taxonomic-rank name (e.g. Ord_Ornithischia) or interpolating parent nodes when needed (e.g. Cla_of_Ornithischia), both examples given for the dinosaur Brachylophosaurus lineage. The new hierarchical structure was named Taxallnomy because it contains names for all taxonomic-ranks, and it contains 41 hierarchical levels corresponding to the 41 taxonomic-ranks currently found in the NCBI Taxonomy database. From Taxallnomy, users can obtain the complete taxonomic lineage with 41 nodes of all taxa available in the NCBI Taxonomy database, without any hazard to the original tree information. In this work, we demonstrate its applicability by embedding taxonomic information of a specified rank into a phylogenetic tree and by producing metagenomics profiles.

Conclusion

Taxallnomy applies to any bioinformatics analyses that depend on the information from NCBI Taxonomy. Taxallnomy is updated periodically but with a distributed PERL script users can generate it locally using NCBI Taxonomy as input. All Taxallnomy resources are available at http://bioinfo.icb.ufmg.br/taxallnomy .",2021-07-29 +32858223,hTFtarget: A Comprehensive Database for Regulations of Human Transcription Factors and Their Targets.,"Transcription factors (TFs) as key regulators play crucial roles in biological processes. The identification of TF-target regulatory relationships is a key step for revealing functions of TFs and their regulations on gene expression. The accumulated data of chromatin immunoprecipitation sequencing (ChIP-seq) provide great opportunities to discover the TF-target regulations across different conditions. In this study, we constructed a database named hTFtarget, which integrated huge human TF target resources (7190 ChIP-seq samples of 659 TFs and high-confidence binding sites of 699 TFs) and epigenetic modification information to predict accurate TF-target regulations. hTFtarget offers the following functions for users to explore TF-target regulations: (1) browse or search general targets of a query TF across datasets; (2) browse TF-target regulations for a query TF in a specific dataset or tissue; (3) search potential TFs for a given target gene or non-coding RNA; (4) investigate co-association between TFs in cell lines; (5) explore potential co-regulations for given target genes or TFs; (6) predict candidate TF binding sites on given DNA sequences; (7) visualize ChIP-seq peaks for different TFs and conditions in a genome browser. hTFtarget provides a comprehensive, reliable and user-friendly resource for exploring human TF-target regulations, which will be very useful for a wide range of users in the TF and gene expression regulation community. hTFtarget is available at http://bioinfo.life.hust.edu.cn/hTFtarget.",2020-04-01 +33359127,NBIGV-DB: A dedicated database of non-B cell derived immunoglobulin variable region.,"Immunoglobulins (Ig) are important immune molecules that possess highly diverse variable region sequences enabling antigen recognition. According to classical immune theory, B lymphocytes have been considered the only source of Ig production (B-Igs). However, accumulating evidence have suggested that Igs are also produced by many non-B cells (non-B Igs), including epithelial cells, neurons, germ cells, as well as myeloid cells of hemopoietic system. Besides acting as bona fide antibodies, Non-B Igs have alternative cellular functions, such as promotion of cell survival, adhesion and migration. More importantly, Unlike the unlimited sequence diversity of B-Igs, the non-B Igs exhibit conserved V(D)J patterns across the same lineages. To support the analysis and comparison of variable region sequences from Igs, produced by B and non-B cells, we established a database (NBIGV) constituted by a non-B Ig variable region repertoire, which includes 727,989 VHDJH and VκJκ recombination sequences of non-B Igs sequenced from mouse samples. Upon database search, users can view, browse and investigate the variable region sequences of non-B Igs according to respective mice strains and tissues as well as Ig classes. Moreover, users can easily download selected sequences and/or compare sequences of interest with known non-B Ig sequences present in the database using NCBI-BLAST algorithms. Additionally, our database integrates a submission page and supplementary sample information. The NBIGV database may serve as a valuable resource for sequence analyses of Non-B Igs. NBIGV database is freely available at http://nbigv.org.",2020-12-23 +34742864,"Natural products for the treatment of stress-induced depression: Pharmacology, mechanism and traditional use.","

Ethnopharmacological relevance

Depression, one of the most common psychiatric disorders, is the fourth leading cause of long-term disability worldwide. A series of causes triggered depression, including psychological stress and conflict, as well as biological derangement, among which stress has a pivotal role in the development of depression. Traditional herbal medicine has been used for the treatment of various disorders including depression for a long history with multi-targets, multi-levels and multi-ways, attracting great attention from scholars. Recently, natural products have been commercialized as antidepressants which have become increasingly popular in the world health drug markets. Major research contributions in ethnopharmacology have generated and updated vast amount of data associated with natural products in antidepressant-like activity.

Aims of the review

This review aims to briefly discuss the pathological mechanism, animal models of stress-induced depression, traditional use of herbal medicines and especially recapitulate the natural products with antidepressant activity and their pharmacological functions and mechanism of action, which may contribute to a better understanding of potential therapeutic effects of natural products and the development of promising drugs with high efficacy and low toxicity for the treatment of stress-induced depression.

Materials and methods

The contents of this review were sourced from electronic databases including PubMed, Sci Finder, Web of Science, Science Direct, Elsevier, Google Scholar, Chinese Knowledge On frastructure (CNKI), Wan Fang, Chinese Scientific and Technological Periodical Database (VIP) and Chinese Biomedical Database (CBM). Additional information was collected from Yao Zhi website (https://db.yaozh.com/). Data were obtained from April 1992 to June 2021. Only English language was applied to the search. The search terms were 'stress-induced depression', 'pathological mechanism' in the title and 'stress', 'depression', 'animal model' and 'natural products' in the whole text.

Results

Stress-induced depression is related to the monoaminergic system, hypothalamic-pituitary-adrenal (HPA) axis, neuronal plasticity and a series of inflammatory factors. Four main types of animal models of stress-induced depression were represented. Fifty-eight bioactive phytochemical compounds, fifty-six herb medicines and five formulas from traditional Chinese medicine were highlighted, which exert antidepressant effects by inhibiting monoamine oxidase (MAO) reaction, alleviating dysfunction of the HPA axis and nerve injury, and possessing anti-inflammatory activities.

Conclusions

Natural products provide a large number of compounds with antidepressant-like effects, and their therapeutic impacts has been highlighted for a long time. This review summarized the pathological mechanism and animal models of stress-induced depression, and the natural products with antidepressant activity in particular, which will shed light on the action mechanism and clinical potential of these compounds. Natural products also have been a vital and promising source for future antidepressant drug discovery.",2021-11-03 +33326321,Identification and characterization of GAL4 drivers that mark distinct cell types and regions in the Drosophila adult gut.,"The gastrointestinal tract in the adult Drosophila serves as a model system for exploring the mechanisms underlying digestion, absorption and excretion, stem cell plasticity, and inter-organ communication, particularly through the gut-brain axis. It is also useful for studying the cellular and adaptive responses to dietary changes, alterations in microbiota and immunity, and systematic and endocrine signals. Despite the various cell types and distinct regions in the gastrointestinal tract, few tools are available to target and manipulate the activity of each cell type and region, and their gene expression. Here, we report 353 GAL4 lines and several split-GAL4 lines that are expressed in enteric neurons (ENs), progenitors (ISCs and EBs), enterocytes (ECs), enteroendocrine cells (EEs), or/and other cell types that are yet to be identified in distinct regions of the gut. We had initially collected approximately 600 GAL4 lines that may be expressed in the gut based on RNA sequencing data, and then crossed them to UAS-GFP to perform immunohistochemistry to identify those that are expressed selectively in the gut. The cell types and regional expression patterns that are associated with the entire set of GAL4 drivers and split-GAL4 combinations are annotated online at http://kdrc.kr/index.php (K-Gut Project). This GAL4 resource can be used to target specific populations of distinct cell types in the fly gut, and therefore, should permit a more precise investigation of gut cells that regulate important biological processes.",2020-12-16 +32117926,Gene-Focused Networks Underlying Phenotypic Convergence in a Systematically Phenotyped Cohort With Heterogeneous Intellectual Disability.,"The broad spectrum of intellectual disability (ID) patients' clinical manifestations, the heterogeneity of ID genetic variation, and the diversity of the phenotypic variation represent major challenges for ID diagnosis. By exploiting a manually curated systematic phenotyping cohort of 3803 patients harboring ID, we identified 704 pathogenic genes, 3848 pathogenic sites, and 2075 standard phenotypes for underlying molecular perturbations and their phenotypic impact. We found the positive correlation between the number of phenotypes and that of patients that revealed their extreme heterogeneities, and the relative contribution of multiple determinants to the heterogeneity of ID phenotypes. Nevertheless, despite the extreme heterogeneity in phenotypes, the ID genes had a specific bias of mutation types, and the top 44 genes that ranked by the number of patients accounted for 39.9% of total patients. More interesting, enriched co-occurrent phenotypes and co-occurrent phenotype networks for each gene had the potential for prioritizing ID genes, further exhibited the convergences of ID phenotypes. Then we established a predictor called IDpred using machine learning methods for ID pathogenic genes prediction. Using10-fold cross-validation, our evaluation shows remarkable AUC values for IDpred (auc = 0.978), demonstrating the robustness and reliability of our tool. Besides, we built the most comprehensive database of ID phenotyped cohort to date: IDminer http://218.4.234.74:3100/IDminer/, which included the curated ID data and integrated IDpred tool for both clinical and experimental researchers. The IDminer serves as an important resource and user-friendly interface to help researchers investigate ID data, and provide important implications for the diagnosis and pathogenesis of developmental disorders of cognition.",2020-02-07 +31231774,Mr.Vc: a database of microarray and RNA-seq of Vibrio cholerae. ,"Gram-negative bacterium Vibrio cholerae is the causative agent of cholera, a life-threatening diarrheal disease. During its infectious cycle, V. cholerae routinely switches niches between aquatic environment and host gastrointestinal tract, in which V. cholerae modulates its transcriptome pattern accordingly for better survival and proliferation. A comprehensive resource for V. cholerae transcriptome will be helpful for cholera research, including prevention, diagnosis and intervention strategies. In this study, we constructed a microarray and RNA-seq database of V. cholerae (Mr.Vc), containing gene transcriptional expression data of 145 experimental conditions of V. cholerae from various sources, covering 25 937 entries of differentially expressed genes. In addition, we collected relevant information including gene annotation, operons they may belong to and possible interaction partners of their protein products. With Mr.Vc, users can easily find transcriptome data they are interested in, such as the experimental conditions in which a gene of interest was differentially expressed in, or all genes that were differentially expressed in an experimental condition. We believe that Mr.Vc database is a comprehensive data repository dedicated to V. cholerae and could be a useful resource for all researchers in related fields. Mr.Vc is available for free at http://bioinfo.life.hust.edu.cn/mrvc.",2019-01-01 +31566222,GWAS Atlas: a curated resource of genome-wide variant-trait associations in plants and animals.,"GWAS Atlas (https://bigd.big.ac.cn/gwas/) is a manually curated resource of genome-wide variant-trait associations for a wide range of species. Unlike existing related resources, it features comprehensive integration of a high-quality collection of 75 467 variant-trait associations for 614 traits across 7 cultivated plants (cotton, Japanese apricot, maize, rapeseed, rice, sorghum and soybean) and two domesticated animals (goat and pig), which were manually curated from 254 publications. We integrated these associations into GWAS Atlas and presented them in terms of variants, genes, traits, studies and publications. More importantly, all associations and traits were annotated and organized based on a suite of ontologies (Plant Trait Ontology, Animal Trait Ontology for Livestock, etc.). Taken together, GWAS Atlas integrates high-quality curated GWAS associations for animals and plants and provides user-friendly web interfaces for data browsing and downloading, accordingly serving as a valuable resource for genetic research of important traits and breeding application.",2020-01-01 +31410488,ncRNA-eQTL: a database to systematically evaluate the effects of SNPs on non-coding RNA expression across cancer types.,"Numerous studies indicate that non-coding RNAs (ncRNAs) have critical functions across biological processes, and single-nucleotide polymorphisms (SNPs) could contribute to diseases or traits through influencing ncRNA expression. However, the associations between SNPs and ncRNA expression are largely unknown. Therefore, genome-wide expression quantitative trait loci (eQTL) analysis to assess the effects of SNPs on ncRNA expression, especially in multiple cancer types, will help to understand how risk alleles contribute toward tumorigenesis and cancer development. Using genotype data and expression profiles of ncRNAs of >8700 samples from The Cancer Genome Atlas (TCGA), we developed a computational pipeline to systematically identify ncRNA-related eQTLs (ncRNA-eQTLs) across 33 cancer types. We identified a total of 6 133 278 and 721 122 eQTL-ncRNA pairs in cis-eQTL and trans-eQTL analyses, respectively. Further survival analyses identified 8312 eQTLs associated with patient survival times. Furthermore, we linked ncRNA-eQTLs to genome-wide association study (GWAS) data and found 262 332 ncRNA-eQTLs overlapping with known disease- and trait-associated loci. Finally, a user-friendly database, ncRNA-eQTL (http://ibi.hzau.edu.cn/ncRNA-eQTL), was developed for free searching, browsing and downloading of all ncRNA-eQTLs. We anticipate that such an integrative and comprehensive resource will improve our understanding of the mechanistic basis of human complex phenotypic variation, especially for ncRNA- and cancer-related studies.",2020-01-01 +31843802,"An Online Database for Exploring Over 2,000 Arabidopsis Small RNA Libraries.","Small RNAs (sRNAs) play a wide range of important roles in plants, from maintaining genome stability and enhancing disease resistance to regulating developmental processes. Over the past decade, next-generation sequencing technologies have allowed us to explore the sRNA populations with unprecedented depth and accuracy. The community has accumulated a tremendous amount of sRNA sequencing (sRNA-seq) data from various genotypes, tissues, and treatments. However, it has become increasingly challenging to access these ""big data"" and extract useful information, particularly for researchers lacking sophisticated bioinformatics tools and expensive computational resources. Here, we constructed an online website, Arabidopsis Small RNA Database (ASRD, http://ipf.sustech.edu.cn/pub/asrd), that allows users to easily explore the information from publicly available Arabidopsis (Arabidopsis thaliana) sRNA libraries. Our database contains ∼2.3 billion sRNA reads, representing ∼250 million unique sequences from 2,024 sRNA-seq libraries. We downloaded the raw data for all libraries and reprocessed them with a unified pipeline so that the normalized abundance of any particular sRNA or the sum of abundances of sRNAs from a genic or transposable element region can be compared across all libraries. We also integrated an online Integrative Genomics Viewer browser into our Web site for convenient visualization. ASRD is a free, web-accessible, and user-friendly database that supports the direct query of over 2,000 Arabidopsis sRNA-seq libraries. We believe this resource will help plant researchers take advantage of the vast next-generation sequencing datasets available in the public domain.",2019-12-16 +34924988,A Toolbox and Crowdsourcing Platform for Automatic Labeling of Independent Components in Electroencephalography.,"Independent Component Analysis (ICA) is a conventional approach to exclude non-brain signals such as eye movements and muscle artifacts from electroencephalography (EEG). A rejection of independent components (ICs) is usually performed in semiautomatic mode and requires experts' involvement. As also revealed by our study, experts' opinions about the nature of a component often disagree, highlighting the need to develop a robust and sustainable automatic system for EEG ICs classification. The current article presents a toolbox and crowdsourcing platform for Automatic Labeling of Independent Components in Electroencephalography (ALICE) available via link http://alice.adase.org/. The ALICE toolbox aims to build a sustainable algorithm to remove artifacts and find specific patterns in EEG signals using ICA decomposition based on accumulated experts' knowledge. The difference from previous toolboxes is that the ALICE project will accumulate different benchmarks based on crowdsourced visual labeling of ICs collected from publicly available and in-house EEG recordings. The choice of labeling is based on the estimation of IC time-series, IC amplitude topography, and spectral power distribution. The platform allows supervised machine learning (ML) model training and re-training on available data subsamples for better performance in specific tasks (i.e., movement artifact detection in healthy or autistic children). Also, current research implements the novel strategy for consentient labeling of ICs by several experts. The provided baseline model could detect noisy IC and components related to the functional brain oscillations such as alpha and mu rhythm. The ALICE project implies the creation and constant replenishment of the IC database, which will improve ML algorithms for automatic labeling and extraction of non-brain signals from EEG. The toolbox and current dataset are open-source and freely available to the researcher community.",2021-12-02 +31740968,SEVA 3.0: an update of the Standard European Vector Architecture for enabling portability of genetic constructs among diverse bacterial hosts.,"The Standard European Vector Architecture 3.0 database (SEVA-DB 3.0, http://seva.cnb.csic.es) is the update of the platform launched in 2013 both as a web-based resource and as a material repository of formatted genetic tools (mostly plasmids) for analysis, construction and deployment of complex bacterial phenotypes. The period between the first version of SEVA-DB and the present time has witnessed several technical, computational and conceptual advances in genetic/genomic engineering of prokaryotes that have enabled upgrading of the utilities of the updated database. Novelties include not only a more user-friendly web interface and many more plasmid vectors, but also new links of the plasmids to advanced bioinformatic tools. These provide an intuitive visualization of the constructs at stake and a range of virtual manipulations of DNA segments that were not possible before. Finally, the list of canonical SEVA plasmids is available in machine-readable SBOL (Synthetic Biology Open Language) format. This ensures interoperability with other platforms and affords simulations of their behaviour under different in vivo conditions. We argue that the SEVA-DB will remain a useful resource for extending Synthetic Biology approaches towards non-standard bacterial species as well as genetically programming new prokaryotic chassis for a suite of fundamental and biotechnological endeavours.",2020-01-01 +31642488,ExonSkipDB: functional annotation of exon skipping event in human.,"Exon skipping (ES) is reported to be the most common alternative splicing event due to loss of functional domains/sites or shifting of the open reading frame (ORF), leading to a variety of human diseases and considered therapeutic targets. To date, systematic and intensive annotations of ES events based on the skipped exon units in cancer and normal tissues are not available. Here, we built ExonSkipDB, the ES annotation database available at https://ccsm.uth.edu/ExonSkipDB/, aiming to provide a resource and reference for functional annotation of ES events in multiple cancer and tissues to identify therapeutically targetable genes in individual exon units. We collected 14 272 genes that have 90 616 and 89 845 ES events across 33 cancer types and 31 normal tissues from The Cancer Genome Atlas (TCGA) and Genotype-Tissue Expression (GTEx). For the ES events, we performed multiple functional annotations. These include ORF assignment of exon skipped transcript, studies of lost protein functional features due to ES events, and studies of exon skipping events associated with mutations and methylations based on multi-omics evidence. ExonSkipDB will be a unique resource for cancer and drug research communities to identify therapeutically targetable exon skipping events.",2020-01-01 +34772427,Communication between cells: exosomes as a delivery system in prostate cancer.,"Despite the considerable efforts in screening and diagnostic protocols, prostate cancer still represents the second leading cause of cancer-related death in men. Many patients with localized disease and low risk of recurrence have a favourable outcome. In a substantial proportion of patients, however, the disease progresses and becomes aggressive. The mechanisms that promote prostate cancer progression remain still debated. Many findings point to the role of cross-communication between prostate tumor cells and their surrounding microenvironment during the disease progression. Such a connection fosters survival, proliferation, angiogenesis, metastatic spreading and drug-resistance of prostate cancer. Recent years have seen a profound interest in understanding the way by which prostate cancer cells communicate with the surrounding cells in the microenvironment. In this regard, direct cell-to-cell contacts and soluble factors have been identified. Increasing evidence indicates that PC cells communicate with the surrounding cells through the release of extracellular vesicles, mainly the exosomes. By directly acting in stromal or prostate cancer epithelial cells, exosomes represent a critical intercellular communication system. By querying the public database ( https://pubmed.ncbi.nlm.nih.gov ) for the past 10 years, we have found more than four hundred papers. Among them, we have extrapolated the most relevant about the role of exosomes in prostate cancer malignancy and progression. Emerging data concerning the use of these vesicles in diagnostic management and therapeutic guidance of PC patients are also presented. Video Abstract.",2021-11-12 +33753737,Fault2SHA Central Apennines database and structuring active fault data for seismic hazard assessment.,"We present a database of field data for active faults in the central Apennines, Italy, including trace, fault and main fault locations with activity and location certainties, and slip-rate, slip-vector and surface geometry data. As advances occur in our capability to create more detailed fault-based hazard models, depending on the availability of primary data and observations, it is desirable that such data can be organized in a way that is easily understood and incorporated into present and future models. The database structure presented herein aims to assist this process. We recommend stating what observations have led to different location and activity certainty and presenting slip-rate data with point location coordinates of where the data were collected with the time periods over which they were calculated. Such data reporting allows more complete uncertainty analyses in hazard and risk modelling. The data and maps are available as kmz, kml, and geopackage files with the data presented in spreadsheet files and the map coordinates as txt files. The files are available at: https://doi.org/10.1594/PANGAEA.922582 .",2021-03-22 +,Natural Infection of Tomatoes (Solanum lycopersicum) by Euphorbia Yellow Mosaic Virus Isolates Across Four Brazilian States,"Severe yield losses induced by a complex of whitefly-transmitted Begomovirus species (family Geminiviridae) have been reported in tomatoes in Brazil (Reis et al. 2020). Nine isolates were obtained from tomato plants exhibiting begomovirus-like symptoms (namely, apical and interveinal chlorosis, yellow spots, and stunting) during independent field surveys: one isolate in Sumaré, São Paulo (SP) State (isolate SP-066) in 2001, two in Serra Negra, Minas Gerais (MG) (MG-012 and MG-016) in 2002, five in Caxias do Sul, Rio Grande do Sul (RS) (RS-039, RS-045, RS-046, RS-047, and RS-058) in 2011, and one in Domingos Martins, Espírito Santo (ES) (ES-148) in 2016. Disease incidence across all sampled fields ranged from 30% (in Domingos Martins, ES) to 90% in Sumaré, SP. Total DNA extraction was done by a modified CTAB method (Boiteux et al. 1999). Begomovirus infection was confirmed in all isolates by selective amplification of viral DNA-A segments using the primer pairs PAL1v1978/PAR1c496 (Rojas et al. 1993) and BegomoAFor1/BegomoARev1 (Ha et al. 2006), which produce two large and nonoverlapping segments (≈1,120 and ≈1,205 bp, respectively). These PCR amplicons were initially characterized via direct Sanger dideoxy sequencing at Embrapa Vegetable Crops (CNPH). BLASTn analysis of the partial DNA-A genomes of these nine isolates indicated identity levels of 95 to 97% to three euphorbia yellow mosaic virus (EuYMV) reference isolates (KY559532, JF756674, and KY559583) found infecting the weed Euphorbia heterophylla L. The entire DNA-A (2,609 nt, MN746971) and DNA-B (2,579 nt, MN746970) components of the MG-016 isolate were obtained via high-throughput sequencing using an Illumina HiSeq 2500 system (Macrogen, South Korea). Sequences were assembled with CLC Genomics Workbench version 10. Contigs were validated by BLASTx and BLASTn and compared with the ssDNA virus database at NCBI (https://www.ncbi.nlm.nih.gov/). The fully characterized MG-016 isolate displayed identity levels ranging from 97 to 99% to the EuYMV reference isolates as well as similar genomic features such as the conserved TATA box, nonanucleotide, and iterons (that were in agreement with a cognate nature of the DNA-A and DNA-B components). A partial sequence of the DNA-B genome was also obtained for the MG-012 isolate (MT7831942). The isolates MG-012 and MG-016 were found in mixed infections with tomato severe rugose virus and tomato golden vein virus, respectively. In addition, the complete DNA-A genomes of ES-148 (MN746972) and SP-066 (MN782438) were also obtained via a combination of primer walking and Sanger dideoxy sequencing, displaying 96 to 98% identity to EuYMV isolates. To our knowledge, this is the first report of multiple and independent events of natural infection of tomatoes by EuYMV isolates. Our results confirm the natural host status of tomatoes to EuYMV isolates as indicated in previous infectivity assays using biolistic inoculation (Barreto et al. 2013). The weed E. heterophylla is widely disseminated and often present within tomato fields due to its higher levels of tolerance to the major herbicide (metribuzin) employed in this crop. Therefore, this weed may act as a persistent reservoir of tomato-infecting EuYMV isolates, which may allow the selection of viral populations potentially more adapted to this vegetable crop.",2021-02-01 +35033323,Development of the first DNA database and identification portal for identification of Unidentified bodies in India - UMID.,"Identifying missing persons and unidentified dead bodies is a well-documented global problem in recent years. To curb this issue, countries such as the USA, UK, and Australia already have well-established DNA databases. Considering the alarming number of unidentified/unclaimed dead bodies reported in India every year, it is evident that the current practices are not sufficient to establish their identities. Forensic medicine professionals are ethically, morally, and dutybound to collect information about missing and unidentified persons and work with the government agencies to determine their identity. Concerning the social and public interest, we have developed the first-ever identification portal and DNA database of unidentified dead bodies autopsied at the Department of Forensic Medicine and Toxicology, AIIMS, New Delhi, India. After the investigation officer's informed consent, biological samples from unidentified dead bodies and a detailed phenotypic description, anthropological data and other visual characteristics of the deceased are recorded at the time of autopsy. This information is uploaded on our database which is available for public access, and the genotypic information generated through STR analysis is only available for internal usage.Claimants (biological relatives) may browse through the URL (https://umid-aiims.icmr.org.in/), and if they wish to claim an unidentified dead body, they may approach as per the given guidelines. The DNA profiles generated include a total of 16 STRs (15 autosomal tetranucleotide microsatellite STRs and 1 Sex Chromosome Specific STR). The claimant's STR profile is run through the questioned database to look for a potential match. If positive, the investigating officer of that particular case is informed for further necessary action. Until December 2020, our database consisted the information of 255 individuals and two unidentified cadavers were identified. This project's success can also lead to a pioneering National DNA database of unidentified and missing persons in India.",2021-12-09 +32759329,Application of Transcriptional Gene Modules to Analysis of Caenorhabditis elegans' Gene Expression Data.,"Identification of co-expressed sets of genes (gene modules) is used widely for grouping functionally related genes during transcriptomic data analysis. An organism-wide atlas of high-quality gene modules would provide a powerful tool for unbiased detection of biological signals from gene expression data. Here, using a method based on independent component analysis we call DEXICA, we have defined and optimized 209 modules that broadly represent transcriptional wiring of the key experimental organism C. elegans These modules represent responses to changes in the environment (e.g., starvation, exposure to xenobiotics), genes regulated by transcriptions factors (e.g., ATFS-1, DAF-16), genes specific to tissues (e.g., neurons, muscle), genes that change during development, and other complex transcriptional responses to genetic, environmental and temporal perturbations. Interrogation of these modules reveals processes that are activated in long-lived mutants in cases where traditional analyses of differentially expressed genes fail to do so. Additionally, we show that modules can inform the strength of the association between a gene and an annotation (e.g., GO term). Analysis of ""module-weighted annotations"" improves on several aspects of traditional annotation-enrichment tests and can aid in functional interpretation of poorly annotated genes. We provide an online interactive resource with tutorials at http://genemodules.org/, in which users can find detailed information on each module, check genes for module-weighted annotations, and use both of these to analyze their own gene expression data (generated using any platform) or gene sets of interest.",2020-10-05 +30304689,"The 10,000 Immunomes Project: Building a Resource for Human Immunology.","There is increasing appreciation that the immune system plays critical roles not only in the traditional domains of infection and inflammation but also in many areas of biology, including tumorigenesis, metabolism, and even neurobiology. However, one of the major barriers for understanding human immunological mechanisms is that immune assays have not been reproducibly characterized for a sufficiently large and diverse healthy human cohort. Here, we present the 10,000 Immunomes Project (10KIP), a framework for growing a diverse human immunology reference, from ImmPort, a publicly available resource of subject-level immunology data. Although some measurement types are sparse in the presently deposited ImmPort database, the extant data allow for a diversity of robust comparisons. Using 10KIP, we describe variations in serum cytokines and leukocytes by age, race, and sex; define a baseline cell-cytokine network; and describe immunologic changes in pregnancy. All data in the resource are available for visualization and download at http://10kimmunomes.org/.",2018-10-01 +,The Ultrafast and Accurate Mapping Algorithm FANSe3: Mapping a Human Whole-Genome Sequencing Dataset Within 30 Minutes,"Aligning billions of reads generated by the next-generation sequencing (NGS) to reference sequences, termed “mapping”, is the time-consuming and computationally-intensive process in most NGS applications. A Fast, accurate and robust mapping algorithm is highly needed. Therefore, we developed the FANSe3 mapping algorithm, which can map a 30 × human whole-genome sequencing (WGS) dataset within 30 min, a 50 × human whole exome sequencing (WES) dataset within 30 s, and a typical mRNA-seq dataset within seconds in a single-server node without the need for any hardware acceleration feature. Like its predecessor FANSe2, the error rate of FANSe3 can be kept as low as 10–9 in most cases, this is more robust than the Burrows–Wheeler transform-based algorithms. Error allowance hardly affected the identification of a driver somatic mutation in clinically relevant WGS data and provided robust gene expression profiles regardless of the parameter settings and sequencer used. The novel algorithm, designed for high-performance cloud-computing after infrastructures, will break the bottleneck of speed and accuracy in NGS data analysis and promote NGS applications in various fields. The FANSe3 algorithm can be downloaded from the website: http://www.chi-biotech.com/fanse3/.",2021-02-01 +,Stability of Weekly Intramuscular Estradiol Cypionate in a Transgender Woman,"Abstract Background: Transgender women often take estrogen with or without an antiandrogen to achieve the physical and physiological changes of estrogen. Estradiol may be administered through intramuscular (IM) injection weekly or every other week (1). It is thought that weekly IM estradiol may be more stable than every other week administration. The objective of this case was to evaluate the levels of IM estradiol cypionate when administered weekly. Clinical Case: A 38-year-old transgender woman with a past medical history of gender dysphoria, type 2 diabetes mellitus, hyperlipidemia, obstructive sleep apnea compliant with continuous positive airway pressure, class 3 severe obesity, anxiety, depression and a non-smoker, presented for evaluation for hormone replacement therapy (HRT). The patient wished to begin IM estradiol because she heard it was most effective. She was started on estradiol cypionate 0.5 mL (2.5 mg) IM every Sunday along with spironolactone 100 mg daily. Approximately one month later, her estradiol was 65.8 pg/mL on a Saturday, total testosterone by LC-MS/MS was suppressed to 7 ng/dL (male: 300-1080 ng/dL, female: 9 - 55 ng/dL), FSH <0.3 mIU/mL (1.5-12.4), LH <0.3 mIU/mL (1.7-8.6). We increased her estradiol cypionate to 0.8 mL (4 mg) IM every Sunday to achieve goal estradiol levels up to 100-200 pg/mL. Approximately 2 months later, estradiol was up to 160 pg/mL on a Thursday. FSH and LH remained suppressed. Spironolactone was stopped. Patient gave her estradiol dose every Sunday between 4:15-7 PM. She injected on the lateral thigh switching sides every week. At the patient’s request, blood was drawn on distinct days of the week going further from the day of injection as data collection progressed. The data we received: Monday: 153 pg/mL, Tuesday: 164 pg/mL, Wednesday: 147 pg/mL, Thursday: 122 pg/mL, Friday: 134 pg/mL, Saturday: 167 pg/mL. All labs were drawn between approximately 9:30-10:15 AM. Conclusion: Our patient wanted to see just how stable weekly IM estradiol cypionate was. We found she was able to stay within target physiologic estrogen levels, 100-200 pg/mL, throughout the week. Overall mean +/- standard deviation levels for the six samples taken between injections were 148 +/- 17 pg/mL (range: 122-167). This case provides reassurance to clinicians concerned IM estradiol may cause supraphysiologic estradiol levels. References: 1. Wylie C Hembree et al. Endocrine Treatment of Gender-Dysphoric/Gender-Incongruent Persons: An Endocrine Society Clinical Practice Guideline, The Journal of Clinical Endocrinology & Metabolism, Volume 102, Issue 11, 1 November 2017, Pages 3869-3903, https://doi.org/10.1210/jc.2017-01658",2021-05-03 +31551426,CommonMind Consortium provides transcriptomic and epigenomic data for Schizophrenia and Bipolar Disorder.,"Schizophrenia and bipolar disorder are serious mental illnesses that affect more than 2% of adults. While large-scale genetics studies have identified genomic regions associated with disease risk, less is known about the molecular mechanisms by which risk alleles with small effects lead to schizophrenia and bipolar disorder. In order to fill this gap between genetics and disease phenotype, we have undertaken a multi-cohort genomics study of postmortem brains from controls, individuals with schizophrenia and bipolar disorder. Here we present a public resource of functional genomic data from the dorsolateral prefrontal cortex (DLPFC; Brodmann areas 9 and 46) of 986 individuals from 4 separate brain banks, including 353 diagnosed with schizophrenia and 120 with bipolar disorder. The genomic data include RNA-seq and SNP genotypes on 980 individuals, and ATAC-seq on 269 individuals, of which 264 are a subset of individuals with RNA-seq. We have performed extensive preprocessing and quality control on these data so that the research community can take advantage of this public resource available on the Synapse platform at http://CommonMind.org .",2019-09-24 +32028878,PlaPPISite: a comprehensive resource for plant protein-protein interaction sites.,"BACKGROUND:Protein-protein interactions (PPIs) play very important roles in diverse biological processes. Experimentally validated or predicted PPI data have become increasingly available in diverse plant species. To further explore the biological functions of PPIs, understanding the interaction details of plant PPIs (e.g., the 3D structural contexts of interaction sites) is necessary. By integrating bioinformatics algorithms, interaction details can be annotated at different levels and then compiled into user-friendly databases. In our previous study, we developed AraPPISite, which aimed to provide interaction site information for PPIs in the model plant Arabidopsis thaliana. Considering that the application of AraPPISite is limited to one species, it is very natural that AraPPISite should be evolved into a new database that can provide interaction details of PPIs in multiple plants. DESCRIPTION:PlaPPISite (http://zzdlab.com/plappisite/index.php) is a comprehensive, high-coverage and interaction details-oriented database for 13 plant interactomes. In addition to collecting 121 experimentally verified structures of protein complexes, the complex structures of experimental/predicted PPIs in the 13 plants were also constructed, and the corresponding interaction sites were annotated. For the PPIs whose 3D structures could not be modelled, the associated domain-domain interactions (DDIs) and domain-motif interactions (DMIs) were inferred. To facilitate the reliability assessment of predicted PPIs, the source species of interolog templates, GO annotations, subcellular localizations and gene expression similarities are also provided. JavaScript packages were employed to visualize structures of protein complexes, protein interaction sites and protein interaction networks. We also developed an online tool for homology modelling and protein interaction site annotation of protein complexes. All data contained in PlaPPISite are also freely available on the Download page. CONCLUSION:PlaPPISite provides the plant research community with an easy-to-use and comprehensive data resource for the search and analysis of protein interaction details from the 13 important plant species.",2020-02-06 +35935266,Novel computational models offer alternatives to animal testing for assessing eye irritation and corrosion potential of chemicals.,"Eye irritation and corrosion are fundamental considerations in developing chemicals to be used in or near the eye, from cleaning products to ophthalmic solutions. Unfortunately, animal testing is currently the standard method to identify compounds that cause eye irritation or corrosion. Yet, there is growing pressure on the part of regulatory agencies both in the USA and abroad to develop New Approach Methodologies (NAMs) that help reduce the need for animal testing and address unmet need to modernize safety evaluation of chemical hazards. In furthering the development and applications of computational NAMs in chemical safety assessment, in this study we have collected the largest expertly curated dataset of compounds tested for eye irritation and corrosion, and employed this data to build and validate binary and multi-classification Quantitative Structure-Activity Relationships (QSAR) models that can reliably assess eye irritation/corrosion potential of novel untested compounds. QSAR models were generated with Random Forest (RF) and Multi-Descriptor Read Across (MuDRA) machine learning (ML) methods, and validated using a 5-fold external cross-validation protocol. These models demonstrated high balanced accuracy (CCR of 0.68-0.88), sensitivity (SE of 0.61-0.84), positive predictive value (PPV of 0.65-0.90), specificity (SP of 0.56-0.91), and negative predictive value (NPV of 0.68-0.85). Overall, MuDRA models outperformed RF models and were applied to predict compounds' irritation/corrosion potential from the Inactive Ingredient Database, which contains components present in FDA-approved drug products, and from the Cosmetic Ingredient Database, the European Commission source of information on cosmetic substances. All models built and validated in this study are publicly available at the STopTox web portal (https://stoptox.mml.unc.edu/). These models can be employed as reliable tools for identifying potential eye irritant/corrosive compounds.",2021-12-05 +34992453,Development and External Validation of a Nomogram to Predict Cancer-Specific Survival in Patients with Primary Intestinal Non-Hodgkin Lymphomas.,"

Purpose

Primary intestinal non-Hodgkin lymphoma (PINHL) is a biologically and clinically heterogeneous disease. Few individual prediction models are available to establish prognoses for PINHL patients. Herein, a novel nomogram was developed and verified to predict long-term cancer-specific survival (CSS) rates in PINHL patients, and a convenient online risk calculator was created using the nomogram.

Materials and methods

Data on PINHL patients from January 1, 2004, to December 31, 2015, obtained from the Surveillance, Epidemiology, and End Results (SEER) database (n = 2372; training cohort), were analyzed by Cox regression to identify independent prognostic parameters for CSS. The nomogram was internally and externally validated in a SEER cohort (n = 1014) and a First Affiliated Hospital of Guangzhou University of Chinese Medicine (FAHGUCM) cohort (n = 37), respectively. Area under the receiver operating characteristic curve (AUC), calibration curves, and decision curve analysis (DCA) were used to evaluate nomogram performance.

Results

Five independent predictors were identified, namely, age, marital status, Ann Arbor Stage, B symptoms, and histologic type. The nomogram showed good performance in discrimination and calibration, with C-indices of 0.772 (95% CI: 0.754-0.790), 0.763 (95% CI: 0.734-0.792), and 0.851 (95% CI: 0.755-0.947) in the training, internal validation, and external validation cohorts, respectively. The calibration curve indicated that the nomogram was accurate, and DCA showed that the nomogram had a high clinical application value. AUC values indicated that the prediction accuracy of the nomogram was higher than that of Ann Arbor Stage (training cohort: 0.804 vs 0.630; internal validation cohort: 0.800 vs 0.637; external validation cohort: 0.811 vs 0.598), and Kaplan-Meier curves indicated the same.

Conclusion

A nomogram was developed to assist clinicians in predicting the survival of PINHL patients and in making optimal treatment decisions. An online calculator based on the nomogram was made available at https://cuifenzhang.shinyapps.io/DynNomapp/.",2021-12-20 +31906603,RNAInter in 2020: RNA interactome repository with increased coverage and annotation.,"Research on RNA-associated interactions has exploded in recent years, and increasing numbers of studies are not limited to RNA-RNA and RNA-protein interactions but also include RNA-DNA/compound interactions. To facilitate the development of the interactome and promote understanding of the biological functions and molecular mechanisms of RNA, we updated RAID v2.0 to RNAInter (RNA Interactome Database), a repository for RNA-associated interactions that is freely accessible at http://www.rna-society.org/rnainter/ or http://www.rna-society.org/raid/. Compared to RAID v2.0, new features in RNAInter include (i) 8-fold more interaction data and 94 additional species; (ii) more definite annotations organized, including RNA editing/localization/modification/structure and homology interaction; (iii) advanced functions including fuzzy/batch search, interaction network and RNA dynamic expression and (iv) four embedded RNA interactome tools: RIscoper, IntaRNA, PRIdictor and DeepBind. Consequently, RNAInter contains >41 million RNA-associated interaction entries, involving more than 450 thousand unique molecules, including RNA, protein, DNA and compound. Overall, RNAInter provides a comprehensive RNA interactome resource for researchers and paves the way to investigate the regulatory landscape of cellular RNAs.",2020-01-01 +,Mental health outcomes during COVID-19: A scoping review & recommendations for geriatrics research,"

Introduction

In addition to being at greater risk for severe illness and fatality during infectious outbreaks, older adults are also vulnerable to mental and physical health risks due to increased social isolation. Studies on the mental health effects of the COVID-19 pandemic are rapidly emerging, with reviews focused on the prevalence of psychiatric symptoms in patients infected with COVID-19, healthcare workers, or mixed samples of healthcare workers, patient populations, and community-dwelling individuals. However, recent reviews of mental health during COVID-19 have failed to focus on findings in the geriatric population. This review provides a scope of the current literature on the prevalence of psychiatric symptoms in the general population, with a geriatric lens, by examining older adults' representation across studies and the pandemic's impact on older adults’ mental health.

Methods

The PRISMA Extension for Scoping Reviews Checklist was used as the methodological framework to conduct the review. A review of the literature on the topic was conducted through PubMed and https://www.medrxiv.org/ from January 1st, 2020 to June 1st, 2020. Studies were included if they (i) focused on mental health outcomes during the COVID-19 pandemic, (ii) used validated measures of psychological or psychiatric symptomatology; (iii) included samples of the general population or in which they comprised the majority; (iv) reported quantitative outcomes of prevalence (expressed as the percentage of participants that exceed the cut-off for normal on the measures). Excluded were (i) opinion papers or commentaries without prevalence data; (ii) studies that reported qualitative data or population means only; (iii) studies with samples of primarily healthcare workers (i.e., the entire or >50% of the sample were healthcare workers), (iv) studies with patient samples such as those with confirmed or probable COVID-19 infection, clinic or hospitalized patients, or pregnant women. Fifty-six full-text articles were included in the review, with 33 published articles and 23 pre-print articles.

Results

Cross-sectional designs were employed by all 56 studies that met the inclusion criteria. The results pooled across the studies showed that 1 in 3 individuals across all age groups endorses post-traumatic stress symptoms, while approximately 30% experience depression, anxiety, and overall stress. Amongst available data in older adults 60 years of age or older, anxiety rates were comparable to the overall population (30.2%), while the prevalence of depression was higher, with nearly 1 in 3 older adults experiencing clinically significant depression symptoms. Effects of age on mental health outcomes are mixed, with several studies showing that the risk or severity of psychiatric symptoms may be lower in older adults. In contrast, others show the opposite pattern or no effect of age. The current literature on mental health outcomes during the COVID-19 pandemic has two important methodological limitations. The first is that the cross-sectional design in all reviewed studies does not permit firm conclusions regarding whether the current reported prevalence rates can be attributed to the onset of the pandemic or whether they represent a change from baseline. A second limitation is that all studies reviewed typically assessed mental health outcomes using self-report measures completed by anonymous responders in online surveys. No studies included follow-up assessments, and none conducted clinical or psychiatric interviews to confirm the presence of clinically significant psychiatric symptomatology or to establish a diagnosis. Self-report measures are used as screening tools in psychological research, but there are limitations to relying on scale scores to establish clinical significance.

Conclusions

Overall, nearly 1 in 3 individuals have experienced negative effects on mental health during COVID-19, a figure that exceeds rates reported in front-line healthcare workers. Paradoxically, some studies report that older age may be a protective factor against psychiatric symptoms, but this is based on a limited number of studies. Our main recommendations for future research include: (i) the use of longitudinal study designs to permit assessment of change in mental health and to yield more definitive conclusions regarding the impact of the pandemic on mental health, and (ii) ensuring a wider representation of older adults through the use of methods, other than online survey platforms, to assess mental health. More high-quality data is needed to understand the pandemic's effects on the general population's mental health, which can inform public health and social policy decisions aimed at alleviating these burdens during the pandemic.

Funding

The study was funded by the Ontario Ministry of Health and Long-Term Care Alternative Funding Plan. The funders had no role in the design of the study, analysis, or preparation of the manuscript.",2021-03-16 +,SynBiopython: an open-source software library for Synthetic Biology,"Abstract Advances in hardware automation in synthetic biology laboratories are not yet fully matched by those of their software counterparts. Such automated laboratories, now commonly called biofoundries, require software solutions that would help with many specialized tasks such as batch DNA design, sample and data tracking, and data analysis, among others. Typically, many of the challenges facing biofoundries are shared, yet there is frequent wheel-reinvention where many labs develop similar software solutions in parallel. In this article, we present the first attempt at creating a standardized, open-source Python package. A number of tools will be integrated and developed that we envisage will become the obvious starting point for software development projects within biofoundries globally. Specifically, we describe the current state of available software, present usage scenarios and case studies for common problems, and finally describe plans for future development. SynBiopython is publicly available at the following address: http://synbiopython.org.",2021-01-01 +31612960,PhaSePro: the database of proteins driving liquid-liquid phase separation.,"Membraneless organelles (MOs) are dynamic liquid condensates that host a variety of specific cellular processes, such as ribosome biogenesis or RNA degradation. MOs form through liquid-liquid phase separation (LLPS), a process that relies on multivalent weak interactions of the constituent proteins and other macromolecules. Since the first discoveries of certain proteins being able to drive LLPS, it emerged as a general mechanism for the effective organization of cellular space that is exploited in all kingdoms of life. While numerous experimental studies report novel cases, the computational identification of LLPS drivers is lagging behind, and many open questions remain about the sequence determinants, composition, regulation and biological relevance of the resulting condensates. Our limited ability to overcome these issues is largely due to the lack of a dedicated LLPS database. Therefore, here we introduce PhaSePro (https://phasepro.elte.hu), an openly accessible, comprehensive, manually curated database of experimentally validated LLPS driver proteins/protein regions. It not only provides a wealth of information on such systems, but improves the standardization of data by introducing novel LLPS-specific controlled vocabularies. PhaSePro can be accessed through an appealing, user-friendly interface and thus has definite potential to become the central resource in this dynamically developing field.",2020-01-01 +34048545,"dbGENVOC: database of GENomic Variants of Oral Cancer, with special reference to India. ","Oral cancer is highly prevalent in India and is the most frequent cancer type among Indian males. It is also very common in southeast Asia. India has participated in the International Cancer Genome Consortium (ICGC) and some national initiatives to generate large-scale genomic data on oral cancer patients and analyze to identify associations and systematically catalog the associated variants. We have now created an open, web-accessible database of these variants found significantly associated with Indian oral cancer patients, with a user-friendly interface to enable easy mining. We have value added to this database by including relevant data collated from various sources on other global populations, thereby providing opportunities of comparative geographical and/or ethnic analyses. Currently, no other database of similar nature is available on oral cancer. We have developed Database of GENomic Variants of Oral Cancer, a browsable online database framework for storage, retrieval and analysis of large-scale data on genomic variants and make it freely accessible to the scientific community. Presently, the web-accessible database allows potential users to mine data on ∼24 million clinically relevant somatic and germline variants derived from exomes (n = 100) and whole genomes (n = 5) of Indian oral cancer patients; all generated by us. Variant data from The Cancer Genome Atlas and data manually curated from peer-reviewed publications were also incorporated into the database for comparative analyses. It allows users to query the database by a single gene, multiple genes, multiple variant sites, genomic region, patient ID and pathway identities. Database URL: http://research.nibmg.ac.in/dbcares/dbgenvoc/.",2021-05-01 +,"Using FLOSS for Storing, Processing and Linking Corpus Data","Corpus data is widely used to solve different linguistic, educational and applied problems. The Tatar corpus management system (http://tugantel.tatar) is specifically developed for Turkic languages. The functionality of our corpus management system includes a search of lexical units, morphological and lexical search, a search of syntactic units, a search of N-grams and others. The search is performed using open source tools (database management system MariaDB, Redis data store). This article describes the process of choosing FLOSS for the main components of our system and also processing a search query and building a linked open dataset based on corpus data.",2020-01-01 +29509874,CarbonylDB: a curated data-resource of protein carbonylation sites.,"Motivation:Oxidative stress and protein damage have been associated with over 200 human ailments including cancer, stroke, neuro-degenerative diseases and aging. Protein carbonylation, a chemically diverse oxidative post-translational modification, is widely considered as the biomarker for oxidative stress and protein damage. Despite their importance and extensive studies, no database/resource on carbonylated proteins/sites exists. As such information is very useful to research in biology/medicine, we have manually curated a data-resource (CarbonylDB) of experimentally-confirmed carbonylated proteins/sites. Results:The CarbonylDB currently contains 1495 carbonylated proteins and 3781 sites from 21 species, with human, rat and yeast as the top three species. We have made further analyses of these carbonylated proteins/sites and presented their occurrence and occupancy patterns. Carbonylation site data on serum albumin, in particular, provides a fine model system to understand the dynamics of oxidative protein modifications/damage. Availability and implementation:The CarbonylDB is available as a web-resource and for download at http://digbio.missouri.edu/CarbonylDB/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +,First Report of a ‘Candidatus Phytoplasma fraxini’-Related Strain Associated with Potato in Colombia,"Potato is one of the most important crops in Colombia, with a total production of 2,819,020 tons in 149,060 ha in 2017 (FAOSTAT 2018). More than 10 varieties of Solanum tuberosum and S. phureja are grown in Colombia; among them, variety Superior covers 20% of the cultivated area. In the last 5 years, potato growers have reported a new disease in Cundinamarca state, and our objective was to test for the presence of phytoplasmas in affected plants. Mejia et al. (2011) associated phytoplasmas of the groups 16SrV and 16SrXII with potatoes in central Colombia. In May 2015, we sampled a crop of the variety Superior from Tausa, Cundinamarca (5°10′53.2″N, 73°51′26.1″W). More than 70% of the plants showed symptoms such as curly and yellowing leaves with purple margins, and abnormally short or long internodes. Five symptomatic and five nonsymptomatic plants were sampled. Total DNA was extracted from the leaves by a modified cetyltrimethylammonium bromide protocol method (Prince et al. 1993), and the extracts were tested by nested polymerase chain reaction (PCR) with phytoplasma 16SrDNA universal primers P1A/P7A followed by R16mR1/R16mF2 for sequencing, and P1A/P7A followed by R16F2n/R16R2 for restriction fragment length polymorphism (RFLP) analysis with restriction enzymes AluI, MseI, and RsaI (Gundersen and Lee 1996; Lee et al. 2004). Amplicons were obtained for all symptomatic plants, although in one sample the band was too faint for analysis. The other four amplicons produced RFLP patterns consistent with group 16SrVII with all the restriction enzymes. These amplicons were sequenced by Macrogen, Korea. Good quality sequences were obtained from two samples (GenBank nos. MK956091 and MK956092). These sequences were analyzed with the iPhyClassifier suite (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi), and both had 99.8% similarity with the reference strain ‘Candidatus Phytoplasma fraxini’ (GenBank no. AF092209). The sequence MK956091 had a similarity coefficient of 0.97, suggesting that it belongs to a new subgroup, with the most similar reference pattern that of 16SrVII-A (GenBank no. AF092209). The other sequence was not analyzed because it was too short. A phylogenetic tree was built by the neighbor-joining method with 1,000 bootstrap replicates, including representative phytoplasmas sequences, and the two sequences clustered with ‘Ca. P. fraxini’ isolates from Colombia and the United States. Phytoplasmas were not detected in the nonsymptomatic plants. Finally, leaf petioles of symptomatic plants that were positive by nested PCR were studied by transmission electron microscopy (Devonshire 2013). Translucent cells that resemble phytoplasmas were observed in the sieve elements of the phloem tissue. Although the prevalence of this disease has not been estimated, potato growers report these symptoms increasingly. ‘Ca. P. fraxini’ is known to infect urban tree species in Bogota such as Fraxinus uhdei, Liquidambar styraciflua, Populus nigra, and Quercus humboldtii, among others (Franco-Lara et al. 2014; unpublished results). Interestingly, the new 16SrVII subgroup detected in potatoes is also present in urban trees such as P. nigra (GenBank nos. MH795203 to MH795207) and in Q. humboldtii (MH795215 and MH795216). Bogotá city is located in Cundinamarca state; these results suggest that the pathogen is moving from the trees to the crops.",2020-10-01 +33063992,"Harnessing In Silico, In Vitro, and In Vivo Data to Understand the Toxicity Landscape of Polycyclic Aromatic Compounds (PACs).","Polycyclic aromatic compounds (PACs) are compounds with a minimum of two six-atom aromatic fused rings. PACs arise from incomplete combustion or thermal decomposition of organic matter and are ubiquitous in the environment. Within PACs, carcinogenicity is generally regarded to be the most important public health concern. However, toxicity in other systems (reproductive and developmental toxicity, immunotoxicity) has also been reported. Despite the large number of PACs identified in the environment, research attention to understand exposure and health effects of PACs has focused on a relatively limited subset, namely polycyclic aromatic hydrocarbons (PAHs), the PACs with only carbon and hydrogen atoms. To triage the rest of the vast number of PACs for more resource-intensive testing, we developed a data-driven approach to contextualize hazard characterization of PACs, by leveraging the available data from various data streams (in silico toxicity, in vitro activity, structural fingerprints, and in vivo data availability). The PACs were clustered on the basis of their in silico toxicity profiles containing predictions from 8 different categories (carcinogenicity, cardiotoxicity, developmental toxicity, genotoxicity, hepatotoxicity, neurotoxicity, reproductive toxicity, and urinary toxicity). We found that PACs with the same parent structure (e.g., fluorene) could have diverse in silico toxicity profiles. In contrast, PACs with similar substituted groups (e.g., alkylated-PAHs) or heterocyclics (e.g., N-PACs) with varying ring sizes could have similar in silico toxicity profiles, suggesting that these groups are better candidates for toxicity read-across analysis. The clusters/regions associated with certain in silico toxicity, in vitro activity, and structural fingerprints were identified. We found that genotoxicity/carcinogenicity (in silico toxicity) and xenobiotic homeostasis and stress response (in vitro activity), respectively, dominate the toxicity/activity variation seen in the PACs. The ""hot spots"" with enriched toxicity/activity in conjunction with availability of in vivo carcinogenicity data revealed regions of either data-poor (hydroxylated-PAHs) or data-rich (unsubstituted, parent PAHs) PACs. These regions offer potential targets for prioritization of further in vivo assessment and for chemical read-across efforts. The analysis results are searchable through an interactive web application (https://ntp.niehs.nih.gov/go/pacs_tableau), allowing for alternative hypothesis generation.",2020-10-16 +31665430,ENdb: a manually curated database of experimentally supported enhancers for human and mouse.,"Enhancers are a class of cis-regulatory elements that can increase gene transcription by forming loops in intergenic regions, introns and exons. Enhancers, as well as their associated target genes, and transcription factors (TFs) that bind to them, are highly associated with human disease and biological processes. Although some enhancer databases have been published, most only focus on enhancers identified by high-throughput experimental techniques. Therefore, it is highly desirable to construct a comprehensive resource of manually curated enhancers and their related information based on low-throughput experimental evidences. Here, we established a comprehensive manually-curated enhancer database for human and mouse, which provides a resource for experimentally supported enhancers, and to annotate the detailed information of enhancers. The current release of ENdb documents 737 experimentally validated enhancers and their related information, including 384 target genes, 263 TFs, 110 diseases and 153 functions in human and mouse. Moreover, the enhancer-related information was supported by experimental evidences, such as RNAi, in vitro knockdown, western blotting, qRT-PCR, luciferase reporter assay, chromatin conformation capture (3C) and chromosome conformation capture-on-chip (4C) assays. ENdb provides a user-friendly interface to query, browse and visualize the detailed information of enhancers. The database is available at http://www.licpathway.net/ENdb.",2020-01-01 +34247232,Comparative Evaluation of Shape Retrieval Methods on Macromolecular Surfaces: An Application of Computer Vision Methods in Structural Bioinformatics. ,"The investigation of the structure of biological systems at the molecular level gives insight about their functions and dynamics. Shape and surface of biomolecules are fundamental to molecular recognition events. Characterizing their geometry can lead to more adequate predictions of their interactions. In the present work, we assess the performance of reference shape retrieval methods from the computer vision community on protein shapes. Shape retrieval methods are efficient in identifying orthologous proteins and tracking large conformational changes. This work illustrates the interest for the protein surface shape as a higher-level representation of the protein structure that 1) abstracts the underlying protein sequence, structure or fold, 2) allows the use of shape retrieval methods to screen large database of protein structures to identify surficial homologs and possible interacting partners, 3) opens an extension of the protein structure-function paradigm towards a protein structure-surface(s)-function paradigm. All data are available online at http://datasetmachat.drugdesign.fr. Supplementary data are available at Bioinformatics online.",2021-07-10 +30204897,AnimalTFDB 3.0: a comprehensive resource for annotation and prediction of animal transcription factors.,"The Animal Transcription Factor DataBase (AnimalTFDB) is a resource aimed to provide the most comprehensive and accurate information for animal transcription factors (TFs) and cofactors. The AnimalTFDB has been maintained and updated for seven years and we will continue to improve it. Recently, we updated the AnimalTFDB to version 3.0 (http://bioinfo.life.hust.edu.cn/AnimalTFDB/) with more data and functions to improve it. AnimalTFDB contains 125,135 TF genes and 80,060 transcription cofactor genes from 97 animal genomes. Besides the expansion in data quantity, some new features and functions have been added. These new features are: (i) more accurate TF family assignment rules; (ii) classification of transcription cofactors; (iii) TF binding sites information; (iv) the GWAS phenotype related information of human TFs; (v) TF expressions in 22 animal species; (vi) a TF binding site prediction tool to identify potential binding TFs for nucleotide sequences; (vii) a separate human TF database web interface (HumanTFDB) was designed for better utilizing the human TFs. The new version of AnimalTFDB provides a comprehensive annotation and classification of TFs and cofactors, and will be a useful resource for studies of TF and transcription regulation.",2019-01-01 +32856859,Robust Estimation of Breast Cancer Incidence Risk in Presence of Incomplete or Inaccurate Information.,"

Purpose

To evaluate the robustness of multiple machine learning classifiers for breast cancer risk estimation in the presence of incomplete or inaccurate information.

Data and methods

Open data for this study was obtained from the BCSC Data Resource (http://breastscreening.cancer.gov/). We conducted two ablation-type experiments to compare the robustness of different classifiers where we randomly switched known information to missing with a missing probability of pm in one experiment, and randomly corrupted the existing information with a probability of pc in another experiment. We considered three prominent machine-learning classifiers such as Logistic regression (LR), Random Forests (RF) and a custom Neural Network (NN) architecture and compared their degradation of discrimination performance as a function of increasing probability of missing or inaccurate data.

Results

LR, RF and custom NN resulted in an Area Under Curve (AUC) of 0.645, 0.643 and 0.649, respectively, on a test set with 500,000 total observations. When we manipulated the data by varying probabilities pm and pc from 0 to 1, NN resulted in better performance in terms of AUC compared to RF and LR as long as less than half the data was missing/inaccurate (that is, for values of pm < 0.5 and pc < 0.5). However, for missing (pm) or corruption (pc) probabilities above 0.5, LR gave similar performance as the custom NN. RF resulted in overall poorer performance when the data had additional missing or incorrect entries.

Conclusion

In cases where the input information is missing or inaccurate, our experiments show that the proposed custom NN provides reliable risk estimates in medical datasets like BCSC. These results are particularly important in health care applications where not every attribute of the individual participant might be available.
.",2020-08-01 +,GIFT – A Global Inventory of Floras and Traits for macroecology and biogeography,"AIM: To understand how functional traits and evolutionary history shape the geographic distribution of plant life on Earth, we need to integrate high‐quality and global‐scale distribution data with functional and phylogenetic information. Large‐scale distribution data for plants are, however, often restricted to either certain taxonomic groups or geographic regions. Range maps only exist for a small subset of all plant species and digitally available point‐occurrence information is biased both geographically and taxonomically. Floras and checklists represent an alternative, yet rarely used potential source of information. They contain highly curated information about the species composition of a clearly defined area, and together virtually cover the entire global land surface. Here, we report on our recent efforts to mobilize this information for macroecological and biogeographical analyses in the GIFT database, the Global Inventory of Floras and Traits. LOCATION: Global. TAXON: Land plants (Embryophyta). METHODS: GIFT integrates plant distributions from regional Floras and checklists with functional traits, phylogenetic information, and region‐level geographic, environmental and socio‐economic data. It contains information about the floristic status (native, endemic, alien and naturalized) and takes advantage of the wealth of trait information in the regional Floras, complemented by data from global trait databases. RESULTS: GIFT 1.0 holds species lists for 2,893 regions across the whole globe including ~315,000 taxonomically standardized species names (i.e. c. 80% of all known land plant species) and ~3 million species‐by‐region occurrences. Based on a hierarchical and taxonomical derivation scheme, GIFT contains information for 83 functional traits and more than 2.3 million trait‐by‐species combinations and achieves unprecedented coverage in categorical traits such as woodiness (~233,000 spp.) or growth form (~213,000 spp.). MAIN CONCLUSIONS: Here, we present the structure, content and automated workflows of GIFT and a corresponding web‐interface (http://gift.uni-goettingen.de) as proof of concept for the feasibility and potential of mobilizing aggregated biodiversity data for global macroecological and biogeographical research.",2020-01-01 +31418763,AntiHIV-Pred: web-resource for in silico prediction of anti-HIV/AIDS activity.,"

Motivation

Identification of new molecules promising for treatment of HIV-infection and HIV-associated disorders remains an important task in order to provide safer and more effective therapies. Utilization of prior knowledge by application of computer-aided drug discovery approaches reduces time and financial expenses and increases the chances of positive results in anti-HIV R&D. To provide the scientific community with a tool that allows estimating of potential agents for treatment of HIV-infection and its comorbidities, we have created a freely-available web-resource for prediction of relevant biological activities based on the structural formulae of drug-like molecules.

Results

Over 50 000 experimental records for anti-retroviral agents from ChEMBL database were extracted for creating the training sets. After careful examination, about seven thousand molecules inhibiting five HIV-1 proteins were used to develop regression and classification models with the GUSAR software. The average values of R2 = 0.95 and Q2 = 0.72 in validation procedure demonstrated the reasonable accuracy and predictivity of the obtained (Q)SAR models. Prediction of 81 biological activities associated with the treatment of HIV-associated comorbidities with 92% mean accuracy was realized using the PASS program.

Availability and implementation

Freely available on the web at http://www.way2drug.com/hiv/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +31504173,pCRM1exportome: database of predicted CRM1-dependent Nuclear Export Signal (NES) motifs in cancer-related genes.,"

Motivation

The consensus pattern of Nuclear Export Signal (NES) is a short sequence motif that is commonly identified in protein sequences, whether the motif acts as an NES (true positive) or not (false positive). Finding more plausible NES functioning regions among the vast array of consensus-matching segments would provide an interesting resource for further experimental validation. Better defined NES should also allow meaningful mapping of cancer-related mutation positions, leading to plausible explanations for the relationship between nuclear export and disease.

Results

Possible NES candidate regions are extracted from the cancer-related human reference proteome. Extracted NES are scored for reliability by combining sequence-based and structure-based approaches. The confidently identified NES candidate motifs were checked for overlap with cancer-related mutation positions annotated in the COSMIC database. Among the ∼700 cancer-related sequences in the COSMIC Cancer Gene Census, 178 sequences are predicted to have possible NES motifs containing cancer-related mutations at their key positions. These lists are organized into our database (pCRM1exportome), and other protein sequences in the human reference proteome can also be retrieved by their UniProt IDs.

Availability and implementation

The database is freely available at http://prodata.swmed.edu/pCRM1exportome.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +33994075,A Review of the Literature Organized Into a New Database: RHeference.,"Hundreds of articles containing heterogeneous data describe D variants or add to the knowledge of known alleles. Data can be difficult to find despite existing online blood group resources and genetic and literature databases. We have developed a modern, elaborate database for D variants, thanks to an extensive literature search with meticulous curation of 387 peer-reviewed articles and 80 abstracts from major conferences and other sources. RHeference contains entries for 710 RHD alleles, 11 RHCE alleles, 30 phenotype descriptions (preventing data loss from historical sources), 35 partly characterized alleles, 3 haplotypes, and 16 miscellaneous entries. The entries include molecular, phenotypic, serological, alloimmunization, haplotype, geographical, and other data, detailed for each source. The main characteristics are summarized for each entry. The sources for all information are included and easily accessible through doi and PMID links. Overall, the database contains more than 10,000 individual pieces of data. We have set up the database architecture based on our previous expertise on database setup and biocuration for other topics, using modern technologies such as the Django framework, BioPython, Bootstrap, and Jquery. This architecture allows an easy access to data and enables simple and complex queries: combining multiple mutations, keywords, or any of the characteristics included in the database. RHeference provides a complement to existing resources and will continue to grow as our knowledge expands and new articles are published. The database url is http://www.rheference.org/.",2021-04-20 +34554585,"Impact of COVID-19 and other infectious conditions requiring isolation on the provision of and adaptations to fundamental nursing care in hospital in terms of overall patient experience, care quality, functional ability, and treatment outcomes: systematic review.","

Aim

This systematic review identifies, appraises and synthesizes the evidence on the provision of fundamental nursing care to hospitalized patients with a highly infectious virus and the effectiveness of adaptations to overcome barriers to care.

Design

Systematic review.

Data sources

In July 2020, we searched Medline, PsycINFO (OvidSP), CINAHL (EBSCOhost), BNI (ProQuest), WHO COVID-19 Database (https://search.bvsalud.org/) MedRxiv (https://www.medrxiv.org/), bioRxiv (https://www.biorxiv.org/) and also Google Scholar, TRIP database and NICE Evidence, forwards citation searching and reference checking of included papers, from 2016 onwards.

Review methods

We included quantitative and qualitative research reporting (i) the views, perceptions and experiences of patients who have received fundamental nursing care whilst in hospital with COVID-19, MERS, SARS, H1N1 or EVD or (ii) the views, perceptions and experiences of professional nurses and non-professionally registered care workers who have provided that care. We included review articles, commentaries, protocols and guidance documents. One reviewer performed data extraction and quality appraisal and was checked by another person.

Results

Of 3086 references, we included 64 articles; 19 empirical research and 45 review articles, commentaries, protocols and guidance documents spanning five pandemics. Four main themes (and 11 sub-themes) were identified. Barriers to delivering fundamental care were wearing personal protective equipment, adequate staffing, infection control procedures and emotional challenges of care. These barriers were addressed by multiple adaptations to communication, organization of care, staff support and leadership.

Conclusion

To prepare for continuation of the COVID-19 pandemic and future pandemics, evaluative studies of adaptations to fundamental healthcare delivery must be prioritized to enable evidence-based care to be provided in future.

Impact

Our review identifies the barriers nurses experience in providing fundamental care during a pandemic, highlights potential adaptations that address barriers and ensure positive healthcare experiences and draws attention to the need for evaluative research on fundamental care practices during pandemics.",2021-09-23 +33661371,OryzaGenome2.1: Database of Diverse Genotypes in Wild Oryza Species.,"

Background

OryzaGenome ( http://viewer.shigen.info/oryzagenome21detail/index.xhtml ), a feature within Oryzabase ( https://shigen.nig.ac.jp/rice/oryzabase/ ), is a genomic database for wild Oryza species that provides comparative and evolutionary genomics approaches for the rice research community.

Results

Here we release OryzaGenome2.1, the first major update of OryzaGenome. The main feature in this version is the inclusion of newly sequenced genotypes and their meta-information, giving a total of 217 accessions of 19 wild Oryza species (O. rufipogon, O. barthii, O. longistaminata, O. meridionalis, O. glumaepatula, O. punctata, O. minuta, O. officinalis, O. rhizomatis, O. eichingeri, O. latifolia, O. alta, O. grandiglumis, O. australiensis, O. brachyantha, O. granulata, O. meyeriana, O. ridleyi, and O. longiglumis). These 19 wild species belong to 9 genome types (AA, BB, CC, BBCC, CCDD, EE, FF, GG, and HHJJ), representing wide genomic diversity in the genus. Using the genotype information, we analyzed the genome diversity of Oryza species. Other features of OryzaGenome facilitate the use of information on single nucleotide polymorphisms (SNPs) between O. sativa and its wild progenitor O. rufipogon in rice research, including breeding as well as basic science. For example, we provide Variant Call Format (VCF) files for genome-wide SNPs of 33 O. rufipogon accessions against the O. sativa reference genome, IRGSP1.0. In addition, we provide a new SNP Effect Table function, allowing users to identify SNPs or small insertion/deletion polymorphisms in the 33 O. rufipogon accessions and to search for the effect of these polymorphisms on protein function if they reside in the coding region (e.g., are missense or nonsense mutations). Furthermore, the SNP Viewer for 446 O. rufipogon accessions was updated by implementing new tracks for possible selective sweep regions and highly mutated regions that were potentially exposed to selective pressures during the process of domestication.

Conclusion

OryzaGenome2.1 focuses on comparative genomic analysis of diverse wild Oryza accessions collected around the world and on the development of resources to speed up the identification of critical trait-related genes, especially from O. rufipogon. It aims to promote the use of genotype information from wild accessions in rice breeding and potential future crop improvements. Diverse genotypes will be a key resource for evolutionary studies in Oryza, including polyploid biology.",2021-03-04 +34715518,GeneCoNet: A web application server for constructing cancer patient-specific gene correlation networks with prognostic gene pairs.,"

Background and objective

Most prognostic gene signatures that have been known for cancer are either individual genes or combination of genes. Both individual genes and combination of genes do not provide information on gene-gene relations, and often have less prognostic significance than random genes associated with cell proliferation. Several methods for generating sample-specific gene networks have been proposed, but programs implementing the methods are not publicly available.

Methods

We have developed a method that builds gene correlation networks specific to individual cancer patients and derives prognostic gene correlations from the networks. A gene correlation network specific to a patient is constructed by identifying gene-gene relations that are significantly different from normal samples. Prognostic gene pairs are obtained by carrying out the Cox proportional hazards regression and the log-rank test for every gene pair.

Results

We built a web application server called GeneCoNet with thousands of tumor samples in TCGA. Given a tumor sample ID of TCGA, GeneCoNet dynamically constructs a gene correlation network specific to the sample as output. As an additional output, it provides information on prognostic gene correlations in the network. GeneCoNet found several prognostic gene correlations for six types of cancer, but there were no prognostic gene pairs common to multiple cancer types.

Conclusion

Extensive analysis of patient-specific gene correlation networks suggests that patients with a larger subnetwork of prognostic gene pairs have shorter survival time than the others and that patients with a subnetwork that contains more genes participating in prognostic gene pairs have shorter survival time than the others. GeneCoNet can be used as a valuable resource for generating gene correlation networks specific to individual patients and for identifying prognostic gene correlations. It is freely accessible at http://geneconet.inha.ac.kr.",2021-10-20 +31918660,A crustacean annotated transcriptome (CAT) database.,"BACKGROUND:Decapods are an order of crustaceans which includes shrimps, crabs, lobsters and crayfish. They occur worldwide and are of great scientific interest as well as being of ecological and economic importance in fisheries and aquaculture. However, our knowledge of their biology mainly comes from the group which is most closely related to crustaceans - insects. Here we produce a de novo transcriptome database, crustacean annotated transcriptome (CAT) database, spanning multiple tissues and the life stages of seven crustaceans. DESCRIPTION:A total of 71 transcriptome assemblies from six decapod species and a stomatopod species, including the coral shrimp Stenopus hispidus, the cherry shrimp Neocaridina davidi, the redclaw crayfish Cherax quadricarinatus, the spiny lobster Panulirus ornatus, the red king crab Paralithodes camtschaticus, the coconut crab Birgus latro, and the zebra mantis shrimp Lysiosquillina maculata, were generated. Differential gene expression analyses within species were generated as a reference and included in a graphical user interface database at http://cat.sls.cuhk.edu.hk/. Users can carry out gene name searches and also access gene sequences based on a sequence query using the BLAST search function. CONCLUSIONS:The data generated and deposited in this database offers a valuable resource for the further study of these crustaceans, as well as being of use in aquaculture development.",2020-01-09 +33683131,An Update on MRMAssayDB: A Comprehensive Resource for Targeted Proteomics Assays in the Community.,"Precise multiplexed quantification of proteins in biological samples can be achieved by targeted proteomics using multiple or parallel reaction monitoring (MRM/PRM). Combined with internal standards, the method achieves very good repeatability and reproducibility enabling excellent protein quantification and allowing longitudinal and cohort studies. A laborious part of performing such experiments lies in the preparation steps dedicated to the development and validation of individual protein assays. Several public repositories host information on targeted proteomics assays, including NCI's Clinical Proteomic Tumor Analysis Consortium assay portals, PeptideAtlas SRM Experiment Library, SRMAtlas, PanoramaWeb, and PeptideTracker, with all offering varying levels of details. We introduced MRMAssayDB in 2018 as an integrated resource for targeted proteomics assays. The Web-based application maps and links the assays from the repositories, includes comprehensive up-to-date protein and sequence annotations, and provides multiple visualization options on the peptide and protein level. We have extended MRMAssayDB with more assays and extensive annotations. Currently it contains >828 000 assays covering >51 000 proteins from 94 organisms, of which >17 000 proteins are present in >2400 biological pathways, and >48 000 mapping to >21 000 Gene Ontology terms. This is an increase of about four times the number of assays since introduction. We have expanded annotations of interaction, biological pathways, and disease associations. A newly added visualization module for coupled molecular structural annotation browsing allows the user to interactively examine peptide sequence and any known PTMs and disease mutations, and map all to available protein 3D structures. Because of its integrative approach, MRMAssayDB enables a holistic view of suitable proteotypic peptides and commonly used transitions in empirical data. Availability: http://mrmassaydb.proteincentre.com.",2021-03-08 +33616668,QMaker: Fast and Accurate Method to Estimate Empirical Models of Protein Evolution.,"Amino acid substitution models play a crucial role in phylogenetic analyses. Maximum likelihood (ML) methods have been proposed to estimate amino acid substitution models; however, they are typically complicated and slow. In this article, we propose QMaker, a new ML method to estimate a general time-reversible $Q$ matrix from a large protein data set consisting of multiple sequence alignments. QMaker combines an efficient ML tree search algorithm, a model selection for handling the model heterogeneity among alignments, and the consideration of rate mixture models among sites. We provide QMaker as a user-friendly function in the IQ-TREE software package (http://www.iqtree.org) supporting the use of multiple CPU cores so that biologists can easily estimate amino acid substitution models from their own protein alignments. We used QMaker to estimate new empirical general amino acid substitution models from the current Pfam database as well as five clade-specific models for mammals, birds, insects, yeasts, and plants. Our results show that the new models considerably improve the fit between model and data and in some cases influence the inference of phylogenetic tree topologies.[Amino acid replacement matrices; amino acid substitution models; maximum likelihood estimation; phylogenetic inferences.].",2021-08-01 +34150040,Prognostic gene biomarker identification in liver cancer by data mining.,"

Background

Liver cancer is a common cancer that enormously threatens the health of people worldwide. With the continuous advances of high-throughput gene sequencing technology and computer data mining technology, researchers can understand liver cancer based on the current accumulation of gene expression data and clinical information.

Methods

We downloaded the TCGA data of liver cancer on the cancer-related website (https://genome-cancer.ucsc.edu/proj/site/hgHeatmap/), comprising 438 patients and 20,530 genes. After removing some patients with missing survival data, we collected 397 patients' samples. Our data were collected from a public database without real patient participation. While matching the patient samples in the gene expression spectrum, we attained 330 samples with primary tumors and 50 samples with normal solid tissue.

Results

After the 330 tumor tissue samples were randomized into two equal-numbered groups (one is a training set, and the other is a test set), we selected 26 gene biomarkers from the training set and validated them in the test set. Based on the selected 26 gene biomarkers, RBM14, ALG11, MAG, SETD3, HOXD10 and other 26 genes were considered independent risk factors for the prognosis of liver cancer, and genes such as GHR significantly affect human growth hormone for liver cancer. The findings discovered that low-risk patients survived remarkably better than the high-risk patients (P<0.001), and the area under the curve (AUC) of receiver operating characteristic curve (ROC) was greater than 0.5.

Conclusion

Our numerical results showed that these 26 gene biomarkers can be used to guide the effective prognostic therapy of patients with liver cancer.",2021-05-15 +32707486,Access to stem cell data and registration of pluripotent cell lines: The Human Pluripotent Stem Cell Registry (hPSCreg).,"The value of human pluripotent stem cells (hPSC) in regenerative medicine has yet to reach its full potential. The road from basic research tool to clinically validated PSC-derived cell therapy products is a long and winding one, leading researchers, clinicians, industry and regulators alike into undiscovered territory. All stakeholders must work together to ensure the development of safe and effective cell therapies. Similarly, utilization of hPSC in meaningful and controlled disease modeling and drug screening applications requires information on the quality and suitability of the applied cell lines. Central to these common goals is the complete documentation of hPSC data, including the ethical provenance of the source material, the hPSC line derivation, culture conditions and genetic constitution of the lines. Data surrounding hPSC is scattered amongst diverse sources, including publications, supplemental data, researcher lab books, accredited lab reports, certificates of analyses and public data repositories. Not all of these data sources are publicly accessible nor associated with metadata nor stored in a standard manner, such that data can be easily found and retrieved. The Human Pluripotent Stem Cell Registry (hPSCreg; https://hpscreg.eu/) was started in 2007 to impart provenance and transparency towards hPSC research by registering and collecting standard properties of hPSC lines. In this chapter, we present a short primer on the history of stem cell-based products, summarize the ethical and regulatory issues introduced in the course of working with hPSC-derived products and their associated data, and finally present the Human Pluripotent Stem Cell Registry as a valuable resource for all stakeholders in therapies and disease modeling based on hPSC-derived cells.",2020-06-27 +34545834,The Security State of the German Health Web: An Exploratory Study.,"The internet has become an important resource for health information and for interactions with healthcare providers. However, information of all types can go through many servers and networks before reaching its intended destination and any of these has the potential to intercept or even manipulate the exchanged information if data's transfer is not adequately protected. As trust is a fundamental concept in healthcare relationships, it is crucial to offer a secure medical website to maintain the same level of trust as provided in a face-to-face meeting. This study provides a first analysis of the SSL/TLS security of and the security headers used within the health-related web limited to web pages in German, the German health web (GHW).

Methods

testssl.sh and TLS-Scanner were used to analyze the URLs of the 1,000 top-ranked health-related web sites (according to PageRank) for each of the country- code top level domains: "".de"", "".at"" and "".ch"".

Results

Our study revealed that most websites in the GHW are potentially vulnerable to common SSL/TLS security vulnerabilities, offer deprecated SSL/TLS protocol versions and mostly do not implement HTTP security headers at all.

Conclusions

These findings question the concept of trust within the GHW. Website owners should reconsider the use of outdated SSL/TLS protocol versions for compatibility reasons. Additionally, HTTP security headers should be implemented more consequently to provide additional security aspects. In future work, the authors intend to repeat this study and to incorporate a website's category, i.e. governmental or public health, to get a more detailed view of the GHW's security.",2021-09-01 +33988716,EDGAR3.0: comparative genomics and phylogenomics on a scalable infrastructure.,"The EDGAR platform, a web server providing databases of precomputed orthology data for thousands of microbial genomes, is one of the most established tools in the field of comparative genomics and phylogenomics. Based on precomputed gene alignments, EDGAR allows quick identification of the differential gene content, i.e. the pan genome, the core genome, or singleton genes. Furthermore, EDGAR features a wide range of analyses and visualizations like Venn diagrams, synteny plots, phylogenetic trees, as well as Amino Acid Identity (AAI) and Average Nucleotide Identity (ANI) matrices. During the last few years, the average number of genomes analyzed in an EDGAR project increased by two orders of magnitude. To handle this massive increase, a completely new technical backend infrastructure for the EDGAR platform was designed and launched as EDGAR3.0. For the calculation of new EDGAR3.0 projects, we are now using a scalable Kubernetes cluster running in a cloud environment. A new storage infrastructure was developed using a file-based high-performance storage backend which ensures timely data handling and efficient access. The new data backend guarantees a memory efficient calculation of orthologs, and parallelization has led to drastically reduced processing times. Based on the advanced technical infrastructure new analysis features could be implemented including POCP and FastANI genomes similarity indices, UpSet intersecting set visualization, and circular genome plots. Also the public database section of EDGAR was largely updated and now offers access to 24,317 genomes in 749 free-to-use projects. In summary, EDGAR 3.0 provides a new, scalable infrastructure for comprehensive microbial comparative gene content analysis. The web server is accessible at http://edgar3.computational.bio.",2021-07-01 +34453697,Best clinical practice guidance for conscious sedation of children undergoing dental treatment: an EAPD policy document.,"

Background

Due to fear and/or behaviour management problems, some children are unable to cooperate for dental treatment using local anaesthesia and psychological support alone. Sedation is required for these patients in order for dentists to be able to deliver high quality, pain-free dental care. The aim of this guideline is to evaluate the efficacy and relative efficacy of conscious sedation agents and dosages for behaviour management in paediatric dentistry and to provide guidance as to which sedative agents should be used.

Methods

These guidelines were developed using a multi-step approach adapted from that outlined by the National Institute for Clinical Excellence (NICE (2020) Developing NICE Guidelines: the manual. https://www.nice.org.uk/process/pmg20/chapter/introduction#main-stages-of-guideline-development . Accessed 7 Oct 2020). Evidence for this guideline was provided from a pre-existing Cochrane review (Ashley et al. Cochrane Database Syst Rev 12:CD003877, 2018) supplemented by an updated search and data extraction up to May 2020.

Results

Studies were from 18 different countries and had recruited 4131 participants overall with an average of 70 participants per study. Ages ranged from 0 to 16 years with an average age of 5.6 years across all included studies. A wide variety of drugs or combinations of drugs (n = 38) were used and delivered orally, intranasally, intravenously, rectally, intramuscularly, submucosally, transmucosally or by inhalation sedation. Twenty-four different outcome measures for behaviour were used. The wide range of drug combinations and outcome measures used greatly complicated description and analysis of the data.

Conclusion

Oral midazolam is recommended for conscious dental sedation. Midazolam delivered via other methods or nitrous oxide/oxygen sedation could be considered, but the evidence for both was very low.",2021-08-28 +34962496,What Are the Minimally Important Changes of Four Commonly Used Patient-reported Outcome Measures for 36 Hand and Wrist Condition-Treatment Combinations?,"

Background

Patient-reported outcome measures (PROMs) are frequently used to assess treatment outcomes for hand and wrist conditions. To adequately interpret these outcomes, it is important to determine whether a statistically significant change is also clinically relevant. For this purpose, the minimally important change (MIC) was developed, representing the minimal within-person change in outcome that patients perceive as a beneficial treatment effect. Prior studies demonstrated substantial differences in MICs between condition-treatment combinations, suggesting that MICs are context-specific and cannot be reliably generalized. Hence, a study providing MICs for a wide diversity of condition-treatment combinations for hand and wrist conditions will contribute to more accurate treatment evaluations.

Questions/purposes

(1) What are the MICs of the most frequently used PROMs for common condition-treatment combinations of hand and wrist conditions? (2) Do MICs vary based on the invasiveness of the treatment (nonsurgical treatment or surgical treatment)?

Methods

This study is based on data from a longitudinally maintained database of patients with hand and wrist conditions treated in one of 26 outpatient clinics in the Netherlands between November 2013 and November 2020. Patients were invited to complete several validated PROMs before treatment and at final follow-up. All patients were invited to complete the VAS for pain and hand function. Depending on the condition, patients were also invited to complete the Michigan Hand outcomes Questionnaire (MHQ) (finger and thumb conditions), the Patient-rated Wrist/Hand Evaluation (PRWHE) (wrist conditions), or the Boston Carpal Tunnel Questionnaire (BCTQ) (nerve conditions). Additionally, patients completed the validated Satisfaction with Treatment Result Questionnaire at final follow-up. Final follow-up timepoints were 3 months for nonsurgical and minor surgical treatment (including trigger finger release) and 12 months for major surgical treatment (such as trapeziectomy). Our database included 55,651 patients, of whom we excluded 1528 who only required diagnostic management, 25,099 patients who did not complete the Satisfaction with Treatment Result Questionnaire, 3509 patients with missing data in the PROM of interest at baseline or follow-up, and 1766 patients who were part of condition-treatment combinations with less than 100 patients. The final sample represented 43% (23,749) of all patients and consisted of 36 condition-treatment combinations. In this final sample, 26% (6179) of patients were managed nonsurgically and 74% (17,570) were managed surgically. Patients had a mean ± SD age of 55 ± 14 years, and 66% (15,593) of patients were women. To estimate the MIC, we used two anchor-based methods (the anchor mean change and the MIC predict method), which were triangulated afterward to obtain a single MIC. Applying this method, we calculated the MIC for 36 condition-treatment combinations, comprising 22 different conditions, and calculated the MIC for combined nonsurgical and surgical treatment groups. To examine whether the MIC differs between nonsurgical and surgical treatments, we performed a Wilcoxon signed rank test to compare the MICs of all PROM scores between nonsurgical and surgical treatment.

Results

We found a large variation in triangulated MICs between the condition-treatment combinations. For example, for nonsurgical treatment of hand OA, the MICs of VAS pain during load clustered around 10 (interquartile range 8 to 11), for wrist osteotomy/carpectomy it was around 25 (IQR 24 to 27), and for nerve decompression it was 21. Additionally, the MICs of the MHQ total score ranged from 4 (nonsurgical treatment of CMC1 OA) to 15 (trapeziectomy with LRTI and bone tunnel), for the PRWHE total score it ranged from 2 (nonsurgical treatment of STT OA) to 29 (release of first extensor compartment), and for the BCTQ Symptom Severity Scale it ranged from 0.44 (nonsurgical treatment of carpal tunnel syndrome) to 0.87 (carpal tunnel release). An overview of all MIC values is available in a freely available online application at: https://analyse.equipezorgbedrijven.nl/shiny/mic-per-treatment/. In the combined treatment groups, the triangulated MIC values were lower for nonsurgical treatment than for surgical treatment (p < 0.001). The MICs for nonsurgical treatment can be approximated to be one-ninth (IQR 0.08 to 0.13) of the scale (approximately 11 on a 100-point instrument), and surgical treatment had MICs that were approximately one-fifth (IQR 0.14 to 0.24) of the scale (approximately 19 on a 100-point instrument).

Conclusion

MICs vary between condition-treatment combinations and differ depending on the invasiveness of the intervention. Patients receiving a more invasive treatment have higher treatment expectations, may experience more discomfort from their treatment, or may feel that the investment of undergoing a more invasive treatment should yield greater improvement, leading to a different perception of what constitutes a beneficial treatment effect.

Clinical relevance

Our findings indicate that the MIC is context-specific and may be misleading if applied inappropriately. Implementation of these condition-specific and treatment-specific MICs in clinical research allows for a better study design and to achieve more accurate treatment evaluations. Consequently, this could aid clinicians in better informing patients about the expected treatment results and facilitate shared decision-making in clinical practice. Future studies may focus on adaptive techniques to achieve individualized MICs, which may ultimately aid clinicians in selecting the optimal treatment for individual patients.",2021-12-27 +34961588,"Exploring the mechanism of Buxue Yimu Pill on hemorrhagic anemia through molecular docking, network pharmacology and experimental validation.","Buxue Yimu Pill (BYP) is a classic gynecological medicine in China, which is composed of Angelica sinensis (Oliv.) Diels, Leonurus japonicus Houtt, Astragalus membranaceus (Fisch.) Bunge, Colla corii asini and Citrus reticulata Blanco. It has been widely used in clinical therapy with the function of enriching Blood, nourishing Qi, and removing blood stasis. The current study was designed to determine the bioactive molecules and therapeutic mechanism of BYP against hemorrhagic anemia. Herein, GC-MS and UPLC/Q-TOF-MS/MS were employed to identify the chemical compounds from BYP. The genecards database (https: //www.genecards.org/) was used to obtain the potential target proteins related to hemorrhagic anemia. Autodock/Vina was adopted to evaluate the binding ability of protein receptors and chemical ligands. Gene ontology and KEGG pathway enrichment analysis were conducted using the ClusterProfiler. As a result, a total of 62 candidate molecules were identified and 152 targets related to hemorrhagic anemia were obtained. Furthermore, 34 active molecules and 140 targets were obtained through the virtual screening experiment. The data of molecular-target (M-T), target-pathway (T-P), and molecular-target-pathway (M-T-P) network suggested that 32 active molecules enhanced hematopoiesis and activated the immune system by regulating 57 important targets. Pharmacological experiments showed that BYP significantly increased the counts of RBC, HGB, and HCT, and significantly down-regulated the expression of EPO, IL-6, CSF3, NOS2, VEGFA, PDGFRB, and TGFB1. The results also showed that leonurine, leonuriside B, leosibiricin, ononin, rutin, astragaloside I, riligustilide and levistolide A, were the active molecules closely related to enriching Blood. In conclusion, based on molecular docking, network pharmacology and validation experiment results, the enriching blood effect of BYP on hemorrhagic anemia may be associated with hematopoiesis, anti-inflammation, and immunity enhancement.",2021-12-01 +33338592,"Pharmacology, phytochemistry, and traditional uses of Scrophularia ningpoensis Hemsl.","

Ethnopharmacological relevance

Scrophularia ningpoensis Hemsl. (known as Xuanshen) has been used in China for centuries as a traditional medicinal plant to treat numerous diseases including inflammation, hypertension, cancer, and diabetes.

Aim of review

In this review, we provide an update on the botany, pharmacology, phytochemistry, pharmacokinetics, traditional uses, and safety of S. ningpoensis to highlight future research needs and potential uses of this plant.

Materials and methods

All information on S. ningpoensis was obtained from scientific databases including ScienceDirect, Springer, PubMed, Sci Finder, China Knowledge Resource Integrated Database from the China National Knowledge Infrastructure (CNKI), Google Scholar, and Baidu Scholar. Additional information was collected from Chinese herbal medicine books, Ph.D. dissertations, and M.Sc. Theses. Plant taxonomy was verified by ""The Plant List"" database (http://www.theplantlist.org).

Results

S. ningpoensis displays fever reducing, detoxifying, and nourishing 'Yin' effects in traditional Chinese medicine (TCM). More than 162 compounds have been identified and isolated from S. ningpoensis, including iridoids and iridoid glycosides, phenylpropanoid glycosides, organic acids, volatile oils, terpenoids, saccharides, flavonoids, sterols, and saponins. These compounds possess a diverse variety of pharmacological properties that affect the cardiovascular, hepatic, and nervous systems, and protect the body against inflammation, oxidation, and carcinogenesis.

Conclusions

Modern pharmacological studies have confirmed that S. ningpoensis is a valuable Chinese medicinal herb with many pharmacological uses in the treatment of cardiovascular, diabetic, and liver diseases. Most of the S. ningpoensis activity may be attributed to iridoid glycosides and phenylpropanoid glycosides; however, detailed information on the molecular mechanisms, metabolic activity, toxicology, and structure-function relationships of active components is limited. Further comprehensive research to evaluate the medicinal properties of S. ningpoensis is needed.",2020-12-16 +31851420,NCBI's Conserved Domain Database and Tools for Protein Domain Analysis.,"The Conserved Domain Database (CDD) is a freely available resource for the annotation of sequences with the locations of conserved protein domain footprints, as well as functional sites and motifs inferred from these footprints. It includes protein domain and protein family models curated in house by CDD staff, as well as imported from a variety of other sources. The latest CDD release (v3.17, April 2019) contains more than 57,000 domain models, of which almost 15,000 were curated by CDD staff. The CDD curation effort increases coverage and provides finer-grained classifications of common and widely distributed protein domain families, for which a wealth of functional and structural data have become available. The CDD maintains both live search capabilities and an archive of pre-computed domain annotations for a selected subset of sequences tracked by the NCBI's Entrez protein database. These can be retrieved or computed for a single sequence using CD-Search or in bulk using Batch CD-Search, or computed via standalone RPS-BLAST plus the rpsbproc software package. The CDD can be accessed via https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml. The three protocols listed here describe how to perform a CD-Search (Basic Protocol 1), a Batch CD-Search (Basic Protocol 2), and a Standalone RPS-BLAST and rpsbproc (Basic Protocol 3). © 2019 The Authors. Basic Protocol 1: CD-search Basic Protocol 2: Batch CD-search Basic Protocol 3: Standalone RPS-BLAST and rpsbproc.",2020-03-01 +,Cardiovascular risk quantification using QRISK-3 score in people with intellectual disability,"

Aims

The prevalence of cardiovascular diseases (CVD) in people with intellectual disability (ID) is around 14%, higher than the general population. However, CVD risk assessments are not consistently performed. Given the high risk of premature deaths in people with ID, it is important to identify preventable risk factors and follow evidence-based interventions. QRISK-3 is a validated risk-stratification tool, which calculates the 10-year risk of developing a heart attack or stroke (https://qrisk.org/three/index.php). There are no published studies on the use of QRISK-3 in people with ID. This project aimed to understand the use of QRISK-3 in an ID clinic and to quantify individual CVD risks to recommend appropriate management options.

Method

A cross sectional study was performed on 143 patients open to an ID psychiatry clinic. Patients and carers were sent an accessible information leaflet on this study. Basic demographic data and information on psychiatric diagnoses were collected. Patients were grouped according to the presence of severe mental illness (SMI) defined as schizophrenia, bipolar disorder and other psychotic illnesses. QRISK-3 ≥ 10% was defined as elevated risk in accordance with NICE guidelines. Patients who had a high QRISK-3 score were advised to contact their GP.

Result

Of 143 patients, 73 (51.0%) had a mild ID and the remaining had a moderate to severe ID. The mean age was 43.3 years, 53.1% were male. Overall, 28 (19.6%) participants had an elevated CVD risk, of whom 16 (57.1%) were not on statins, which is the recommended treatment. The mean QRISK-3 score was 6.31 (standard deviation [SD] 8.95), and the relative risk is 3.50 (SD 7.13). The proportion of QRISK-3 ≥ 10% and mean score were not significantly different in those with SMI, but those with SMI were more likely to be prescribed statins than those without (14 [31.1%] vs 10 [10.2%], p = 0.002). Statins were given to 24 (16.8%) participants, of whom 12 (50%) had elevated CVD risk. 89% had a blood pressure recording within the past 5 years, 87% had height and 88% had weight recorded. 73% had lipid serology results recorded.

Conclusion

Elevated CVD risk was common in this ID study population, and more than half with elevated QRISK-3 were not on the medical treatment recommended by national guidelines. QRISK-3 could feasibly be implemented in the outpatient setting. Increased routine CVD risk assessment and management should be considered as another measure to reduce morbidity and mortality.",2021-06-18 +34648221,Match_Motif: A rapid computational tool to assist in protein-protein interaction design.,"In order to generate protein assemblies with a desired function, the rational design of protein-protein binding interfaces is of significant interest. Approaches based on random mutagenesis or directed evolution may involve complex experimental selection procedures. Also, molecular modeling approaches to design entirely new proteins and interactions with partner molecules can involve large computational efforts and screening steps. In order to simplify at least the initial effort for designing a putative binding interface between two proteins the Match_Motif approach has been developed. It employs the large collection of known protein-protein complex structures to suggest interface modifications that may lead to improved binding for a desired input interaction geometry. The approach extracts interaction motifs based on the backbone structure of short (four residues) segments and the relative arrangement with respect to short segments on the partner protein. The interaction geometry is used to search through a database of such motifs in known stable bound complexes. All matches are rapidly identified (within a few seconds) and collected and can be used to guide changes in the interface that may lead to improved binding. In the output, an alternative interface structure is also proposed based on the frequency of occurrence of side chains at a given interface position in all matches and based on sterical considerations. Applications of the procedure to known complex structures and alternative arrangements are presented and discussed. The program, data files, and example applications can be downloaded from https://www.groups.ph.tum.de/t38/downloads/.",2021-10-26 +33279968,"Gene Circuit Explorer (GeneEx): an interactive web-app for visualizing, simulating and analyzing gene regulatory circuits.","

Summary

GeneEx is an interactive web-app that uses an ODE-based mathematical modeling approach to simulate, visualize and analyze gene regulatory circuits (GRCs) for an explicit kinetic parameter set or for a large ensemble of random parameter sets. GeneEx offers users the freedom to modify many aspects of the simulation such as the parameter ranges, the levels of gene expression noise and the GRC network topology itself. This degree of flexibility allows users to explore a variety of hypotheses by providing insight into the number and stability of attractors for a given GRC. Moreover, users have the option to upload, and subsequently compare, experimental gene expression data to simulated data generated from the analysis of a built or uploaded custom circuit. Finally, GeneEx offers a curated database that contains circuit motifs and known biological GRCs to facilitate further inquiry into these. Overall, GeneEx enables users to investigate the effects of parameter variation, stochasticity and/or topological changes on gene expression for GRCs using a systems-biology approach.

Availability and implementation

GeneEx is available at https://geneex.jax.org. This web-app is released under the MIT license and is free and open to all users and there is no mandatory login requirement.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-01 +34332120,Interactive Web-based Annotation of Plant MicroRNAs with iwa-miRNA.,"MicroRNAs (miRNAs) are important regulators of gene expression. The large-scale detection and profiling of miRNAs have accelerated with the development of high-throughput small RNA sequencing (sRNA-Seq) techniques and bioinformatics tools. However, generating high-quality comprehensive miRNA annotations remains challenging due to the intrinsic complexity of sRNA-Seq data and inherent limitations of existing miRNA predictions. Here, we present iwa-miRNA, a Galaxy-based framework that can facilitate miRNA annotation in plant species by combining computational analysis and manual curation. iwa-miRNA is specifically designed to generate a comprehensive list of miRNA candidates, bridging the gap between already annotated miRNAs provided by public miRNA databases and new predictions from sRNA-Seq datasets. It can also assist users in selecting promising miRNA candidates in an interactive mode, contributing to the accessibility and reproducibility of genome-wide miRNA annotation. iwa-miRNA is user-friendly and can be easily deployed as a web application for researchers without programming experience. With flexible, interactive, and easy-to-use features, iwa-miRNA is a valuable tool for the annotation of miRNAs in plant species with reference genomes. We also illustrate the application of iwa-miRNA for miRNA annotation using data from plant species with varying genomic complexity. The source codes and web server of iwa-miRNA are freely accessible at http://iwa-miRNA.omicstudio.cloud/.",2021-07-28 +34319187,Benefits of Dietary Management in Breast Cancer Patients: A Systematic Review and Meta-Analysis.,"The World Cancer Research Fund/American Institute of Cancer Research recommendations include guidance on diet, nutrition, and weight management for people with cancer. However, for women diagnosed with breast cancer there is a lack of comprehensive analyses on the effects of dietary interventions. The purpose of this study was to investigate the impact of changes in dietary behavior and body composition on breast cancer development. A comprehensive and systematic literature search of 12 electronic databases was undertaken on January 27, 2021 to identify randomized controlled trials (RCTs) of dietary interventions for breast cancer. The Cochrane risk bias assessment tool was used to evaluate the quality of the trials identified with the data analyzed by Review Manager 5.3 software. The results showed that dietary interventions probably did not modify servings of fruit (P = 0.08), fat intake (P = 0.10), total cholesterol level (P = 0.82), body weight (P = 0.08), waist circumference (P = 0.15), or waist-to-hip ratio (P = 0.32). However, a significant reduction in body mass index (P = 0.03), and hip circumference (P = 0.03), and improvement in energy intake (P = 0.02), vegetable servings (P < 0.0001), and fiber intake (P < 0.00001) were observed. Future studies should investigate the benefits of exercise in combination with dietary interventions in breast cancer patients.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1957129.",2021-07-28 +34976312,LCMD: Lung Cancer Metabolome Database.,"Lung cancer, one of the most common causes of cancer-related death worldwide, has been associated with high treatment cost and imposed great burdens. The 5-year postoperative survival rate of lung cancer (13%) is lower than many other leading cancers indicating the urgent needs to dissect its pathogenic mechanisms and discover specific biomarkers. Although several proteins have been proposed to be potential candidates for the diagnosis of lung cancer, they present low accuracy in clinical settings. Metabolomics has thus emerged as a very promising tool for biomarker discovery. To date, many lung cancer-related metabolites have been highlighted in the literature but no database is available for scientists to retrieve this information. Herein, we construct and introduce the first Lung Cancer Metabolome Database (LCMD), a freely available online database depositing 2013 lung cancer-related metabolites identified from 65 mass spectrometry-based lung cancer metabolomics studies. Researchers are able to explore LCMD via two ways. Firstly, by applying various filters in the ""Browse Metabolites"" mode, users can access a list of lung cancer-related metabolites that satisfy the filter specifications. For each metabolite, users can acquire the value of the fold change (cancer/normal), statistical significance (p-value) of the fold change, and the comparative research designs of all the mass spectrometry-based lung cancer metabolomics studies that identify this metabolite. Secondly, by applying various filters in the ""Browse Studies"" mode, users can obtain a list of mass spectrometry-based lung cancer metabolomics studies that satisfy the filter specifications. For each study, users can view the type of studied specimen, mass spectrometry (MS) method, MS data processing software, and differential analysis method, as well as all the identified lung cancer-related metabolites. Furthermore, the overview of each study is clearly illustrated by a graphical summary. The LCMD (http://cosbi7.ee.ncku.edu.tw/LCMD/) is the first database that brings together the meaningful information of lung cancer-related metabolites. The development of the LCMD is envisioned to promote the biomarker discovery of lung cancer.",2021-12-07 +31780760,"PedMap: a pediatric diseases map generated from clinical big data from Hangzhou, China.","Epidemiological knowledge of pediatric diseases may improve professionals' understanding of the pathophysiology of and risk factors for diseases and is also crucial for decision making related to workforce and resource planning in pediatric departments. In this study, a pediatric disease epidemiology knowledgebase called PedMap (http://pedmap.nbscn.org) was constructed from the clinical data from 5 447 202 outpatient visits of 2 189 868 unique patients at a children's hospital (Hangzhou, China) from 2013 to 2016. The top 100 most-reported pediatric diseases were identified and visualized. These common pediatric diseases were clustered into 4 age groups and 4 seasons. The prevalence, age distribution and co-occurrence diseases for each disease were also visualized. Furthermore, an online prediction tool based on Gaussian regression models was developed to predict pediatric disease incidence based on weather information. PedMap is the first comprehensive epidemiological resource to show the full view of age-related, seasonal, climate-related variations in and co-occurrence patterns of pediatric diseases.",2019-11-28 +,"112 Outcomes and Clinical Characteristics of COVID-19 Disease in the Frail, Elderly Population of Tayside","Abstract

Introduction

With advancing age, frailty, multi-morbidity and need for care, elderly patients are some of the most vulnerable to Covid-19 disease. In NHS Tayside, a dedicated Covid-19 Medicine for the Elderly (MFE) Team was formed to care for patients identified as frail and likely to benefit from comprehensive geriatric assessment.

Methods

All Covid-19 patients meeting frailty criteria1, cared for by the Covid-19 MFE Team were identified. Data on outcomes and clinical characteristics for all (140) patients admitted during the first pandemic wave (March–July 2020) was collected using electronic patient records and analysed.

Results

Patients were predominantly male (58.6%). Ages ranged from 65–99 years, with 43.6% aged ≥85 years. 82.1% had one or more of cough, fever and anosmia on admission fitting Covid-19 case definition 2. Lymphopenia was present in 92.1%. Of note, 26.5% of patients had a normal or unchanged chest x-ray report, with only 10.2% showing bilateral peripheral infiltrates. 28-day mortality was 37.1% with Covid-19 Disease listed as primary cause of death in 90.4%.

Conclusion(s)

Entering further “waves” of infection, it is vital that we understand the clinical presentation and course of Covid-19 disease in elderly patients. Our data highlights that any Covid-19 symptom, even in isolation, should raise suspicion of disease. Chest x-rays should not be used alone as a diagnostic tool. The presence of lymphopenia should raise suspicion of Covid-19 infection. In developing an understanding of how elderly patients with Covid-19 present, we can ensure early identification and initiation of appropriate infection control measures.

References

1. Healthcare Improvement Scotland. Think Frailty. 2014. http://www.healthcareimprovementscotland.org/his/idoc.ashx?docid=8abd8530-48f3-4152-bbfb-d0918b870ec9&version=-1 2. Scottish Government. Update to Coronavirus Symptoms 2020. https://www.gov.scot/news/update-to-coronavirus-symptoms",2021-03-01 +,Identification of determinants of pollen donor fecundity using the hierarchical neighborhood model,"Individual differences in male reproductive success drive genetic drift and natural selection, altering genetic variation and phenotypic trait distributions in future generations. Therefore, identifying the determinants of reproductive success is important for understanding the ecology and evolution of plants. Here, based on the spatially explicit mating model (the neighborhood model), we develop a hierarchical probability model that links co‐dominant genotypes of offspring and candidate parents with phenotypic determinants of male reproductive success. The model accounts for pollen dispersal, genotyping errors as well as individual variation in selfing, pollen immigration, and differentiation of immigrant pollen pools. Unlike the classic neighborhood model approach, our approach is specially designed to account for excessive variation (overdispersion) in male fecundity. We implemented a Bayesian estimation method (the Windows computer program available at: https://www.ukw.edu.pl/pracownicy/plik/igor_chybicki/1806/) that, among others, allows for selecting phenotypic variables important for male fecundity and assessing the fraction of variance in fecundity (R²) explained by selected variables. Simulations showed that our method outperforms both the classic neighborhood model and the two‐step approach, where fecundities and the effects of phenotypic variables are estimated separately. The analysis of two data examples showed that in wind‐pollinated trees, male fecundity depends on both the amount of produced pollen and the ability to pollen spread. However, despite that the tree size was positively correlated with male fecundity, it explained only a fraction of the total variance in fecundity, indicating the presence of additional factors. Finally, case studies highlighted the importance of accounting for pollen dispersal in the estimation of fecundity determinants.",2021-04-01 +31240309,TCR3d: The T cell receptor structural repertoire database.,"

Summary

T cell receptors (TCRs) are critical molecules of the adaptive immune system, capable of recognizing diverse antigens, including peptides, lipids and small molecules, and represent a rapidly growing class of therapeutics. Determining the structural and mechanistic basis of TCR targeting of antigens is a major challenge, as each individual has a vast and diverse repertoire of TCRs. Despite shared general recognition modes, diversity in TCR sequence and recognition represents a challenge to predictive modeling and computational techniques being developed to predict antigen specificity and mechanistic basis of TCR targeting. To this end, we have developed the TCR3d database, a resource containing all known TCR structures, with a particular focus on antigen recognition. TCR3d provides key information on antigen binding mode, interface features, loop sequences and germline gene usage. Users can interactively view TCR complex structures, search sequences of interest against known structures and sequences, and download curated datasets of structurally characterized TCR complexes. This database is updated on a weekly basis, and can serve the community as a centralized resource for those studying T cell receptors and their recognition.

Availability and implementation

The TCR3d database is available at https://tcr3d.ibbr.umd.edu/.",2019-12-01 +32442307,SYNERGxDB: an integrative pharmacogenomic portal to identify synergistic drug combinations for precision oncology.,"Drug-combination data portals have recently been introduced to mine huge amounts of pharmacological data with the aim of improving current chemotherapy strategies. However, these portals have only been investigated for isolated datasets, and molecular profiles of cancer cell lines are lacking. Here we developed a cloud-based pharmacogenomics portal called SYNERGxDB (http://SYNERGxDB.ca/) that integrates multiple high-throughput drug-combination studies with molecular and pharmacological profiles of a large panel of cancer cell lines. This portal enables the identification of synergistic drug combinations through harmonization and unified computational analysis. We integrated nine of the largest drug combination datasets from both academic groups and pharmaceutical companies, resulting in 22 507 unique drug combinations (1977 unique compounds) screened against 151 cancer cell lines. This data compendium includes metabolomics, gene expression, copy number and mutation profiles of the cancer cell lines. In addition, SYNERGxDB provides analytical tools to discover effective therapeutic combinations and predictive biomarkers across cancer, including specific types. Combining molecular and pharmacological profiles, we systematically explored the large space of univariate predictors of drug synergism. SYNERGxDB constitutes a comprehensive resource that opens new avenues of research for exploring the mechanism of action for drug synergy with the potential of identifying new treatment strategies for cancer patients.",2020-07-01 +33080021,NAMS webserver: coding potential assessment and functional annotation of plant transcripts. ,"Recent advances in transcriptomics have uncovered lots of novel transcripts in plants. To annotate such transcripts, dissecting their coding potential is a critical step. Computational approaches have been proven fruitful in this task; however, most current tools are designed/optimized for mammals and only a few of them have been tested on a limited number of plant species. In this work, we present NAMS webserver, which contains a novel coding potential classifier, NAMS, specifically optimized for plants. We have evaluated the performance of NAMS using a comprehensive dataset containing more than 3 million transcripts from various plant species, where NAMS demonstrates high accuracy and remarkable performance improvements over state-of-the-art software. Moreover, our webserver also furnishes functional annotations, aiming to provide users informative clues to the functions of their transcripts. Considering that most plant species are poorly characterized, our NAMS webserver could serve as a valuable resource to facilitate the transcriptomic studies. The webserver with testing dataset is freely available at http://sunlab.cpy.cuhk.edu.hk/NAMS/.",2021-05-01 +34235237,"Whole genome sequence data of Bacillus australimaris strain B28A, isolated from Marine Water in India.","Bacillus genus members are dominant in the Eastern Arabian Sea and are known for producing many industrial enzymes. Bacillus australimaris B28A, isolated from seawater, had an enzymatic activity. Here, the whole genome sequence of Bacillus australimaris B28A is reported. The 3,766,107-bp genome, with a GC content of 41.6%, comprised 3936 protein-coding genes, seven ribosomal RNA, and 75 transfer RNA. Several bioactive secondary metabolite genes in the genome, including surfactin, lichenysin, bacillibactin, bacilysin, paenilamicin, fengycin, and carotenoid, were identified using antiSMASH. The 1396 proteins were predicted using RAST, including asparaginase enzyme: an anticancer enzyme. Sequence data have been deposited in the DDBJ/ENA/GenBank database under the accession number JAGQFH000000000. The version described in this paper is JAGQFH000000000.1. The BioProject ID in the GenBank database is PRJNA670955. The raw data is publicly available at ""https://www.ncbi.nlm.nih.gov/sra/SRR14203888"".",2021-06-21 +34259329,AniAMPpred: artificial intelligence guided discovery of novel antimicrobial peptides in animal kingdom. ,"With advancements in genomics, there has been substantial reduction in the cost and time of genome sequencing and has resulted in lot of data in genome databases. Antimicrobial host defense proteins provide protection against invading microbes. But confirming the antimicrobial function of host proteins by wet-lab experiments is expensive and time consuming. Therefore, there is a need to develop an in silico tool to identify the antimicrobial function of proteins. In the current study, we developed a model AniAMPpred by considering all the available antimicrobial peptides (AMPs) of length $\in $[10 200] from the animal kingdom. The model utilizes a support vector machine algorithm with deep learning-based features and identifies probable antimicrobial proteins (PAPs) in the genome of animals. The results show that our proposed model outperforms other state-of-the-art classifiers, has very high confidence in its predictions, is not biased and can classify both AMPs and non-AMPs for a diverse peptide length with high accuracy. By utilizing AniAMPpred, we identified 436 PAPs in the genome of Helobdella robusta. To further confirm the functional activity of PAPs, we performed BLAST analysis against known AMPs. On detailed analysis of five selected PAPs, we could observe their similarity with antimicrobial proteins of several animal species. Thus, our proposed model can help the researchers identify PAPs in the genome of animals and provide insight into the functional identity of different proteins. An online prediction server is also developed based on the proposed approach, which is freely accessible at https://aniamppred.anvil.app/.",2021-11-01 +34755241,A catalog of curated breast cancer genes.,"

Purpose

Decades of research have identified multiple genetic variants associated with breast cancer etiology. However, there is no database that archives breast cancer genes and variants responsible for predisposition. We set out to build a dynamic repository of curated breast cancer genes.

Methods

A comprehensive literature search was performed in PubMed and Google Scholar, followed by data extraction and harmonization for downstream analysis.

Results

Using a subset of 345 studies, we cataloged 652 breast cancer-associated loci across the genome. A majority of these were present in the non-coding region (i.e., intergenic (101) and intronic (345)), whereas only 158 were located within an exon. Using the odds ratio, we identified 429 loci to increase the disease risk and 198 to confer protection against breast cancer, whereas 25 were identified to both increase disease risk and confer protection against breast cancer. Chromosomal ideogram analysis indicated that chromosomes 17 and 19 have the highest density of breast cancer loci. We manually annotated and collated breast cancer genes in which a previous association between rare-monogenic variant and breast cancer has been documented. Finally, network and functional enrichment analysis revealed that steroid metabolism and DNA repair pathways were predominant among breast cancer genes and variants.

Conclusions

We have built an online interactive catalog of curated breast cancer genes ( https://cbcg.dk ). This will expedite clinical diagnostics and support the ongoing efforts in managing breast cancer etiology. Moreover, the database will serve as an essential repository when designing new breast cancer multigene panels.",2021-11-10 +33209300,Increased IL-10-producing regulatory T cells are characteristic of severe cases of COVID-19.,"

Objectives

The pandemic spread of the coronavirus SARS-CoV-2 is due, in part, to the immunological properties of the host-virus interaction. The clinical presentation varies from individual to individual, with asymptomatic carriers, mild-to-moderate-presenting patients and severely affected patients. Variation in immune response to SARS-CoV-2 may underlie this clinical variation.

Methods

Using a high-dimensional systems immunology platform, we have analysed the peripheral blood compartment of 6 healthy individuals, 23 mild-to-moderate and 20 severe COVID-19 patients.

Results

We identify distinct immunological signatures in the peripheral blood of the mild-to-moderate and severe COVID-19 patients, including T-cell lymphopenia, more consistent with peripheral hypo- than hyper-immune activation. Unique to the severe COVID-19 cases was a large increase in the proportion of IL-10-secreting regulatory T cells, a lineage known to possess anti-inflammatory properties in the lung.

Conclusion

As IL-10-secreting regulatory T cells are known to possess anti-inflammatory properties in the lung, their proportional increase could contribute to a more severe COVID-19 phenotype. We openly provide annotated data (https://flowrepository.org/experiments/2713) with clinical correlates as a systems immunology resource for the COVID-19 research community.",2020-11-13 +34156313,Low EVI1 expression at diagnosis predicted poor outcomes in pediatric Ph-negative B cell precursor acute lymphoblastic leukemia patients.,"Abnormally high ecotropic viral integration site 1 (EVI1) expression has been recognized as a poor prognostic factor in acute myeloid leukemia patients. However, its prognostic impact in B cell precursor acute lymphoblastic leukemia (BCP-ALL) remains unknown. A total of 176 pediatric Ph-negative BCP-ALL patients who received at least 1 course of chemotherapy and received chemotherapy only during follow-up were retrospectively tested for EVI1 transcript levels by real-time quantitative PCR at diagnosis, and survival analysis was performed. Clinical and EVI1 expression data of 129 pediatric BCP-ALL patients were downloaded from therapeutically applicable research to generate effective treatments (TARGET) database for validation. In our cohort, the median EVI1 transcript level was 0.33% (range, 0.0068-136.2%), and 0.10% was determined to be the optimal cutoff value for patient grouping by receiver operating characteristic curve analysis. Low EVI1 expression (<0.10%) was significantly related to lower 5-year relapse-free survival (RFS) and overall survival (OS) rates (P = 0.017 and 0.018, respectively). Multivariate analysis showed that EVI1 expression <0.10% was an independent adverse prognostic factor for RFS and OS. TARGET data showed that low EVI1 expression tended to be related to a lower 5-year OS rate (P = 0.066). In conclusion, low EVI1 expression at diagnosis could predict poor outcomes in pediatric Ph-negative BCP-ALL patients receiving chemotherapy.Supplemental data for this article is available online at https://doi.org/10.1080/08880018.2021.1939818 .",2021-06-22 +34672229,Neurodevelopmental effects of maternal folic acid supplementation: a systematic review and meta-analysis.,"Folic acid, a water-soluble vitamin B nutrient, plays an important role not only in maintaining a healthy pregnancy but also in offspring brain development and function, however, it remains unclear whether maternal folic acid (FA) supplementation associated with the risk of different postnatal neurodevelopmental outcomes. Here, we performed a systematic review and meta-analysis on the impact of maternal FA supplementation on a wide range of postnatal neurodevelopmental outcomes which include intellectual development, risk of autistic traits, ADHD, behavior, language, and psychomotor problems, using studies extracted from the following databases, including MEDLINE, Web of Science, Cochrane Library, Scopus, EMBASE, and PsychInfo. Thirty-two cohort studies and seven case-control studies were included in this meta-analysis. In the present study, we found that prenatal FA supplementation had a positive impact on offspring's neurodevelopmental outcomes, including improved intellectual development and reduced risk of autism traits, ADHD, behavioral, and language problems. We also found that FA over-supplementation was not associated with an improvement in offspring's brain development, and may have a negative impact on offspring's neurodevelopmental outcomes. This study proved the first panoramic review on the relationship of FA supplementation with offspring's neurodevelopment. Further studies focusing on different dosages and periods of FA supplementation are needed.Supplemental data for this article is available online at https://doi.org/10.1080/10408398.2021.1993781 .",2021-10-21 +31501752,ABCD: Alzheimer's disease Biomarkers Comprehensive Database.,"Alzheimer's disease (AD) is an age-related, non-reversible, and progressive brain disorder. Memory loss, confusion, and personality changes are major symptoms noticed. AD ultimately leads to a severe loss of mental function. Due to lack of effective biomarkers, no effective medication was available for the complete treatment of AD. There is a need to provide all AD-related essential information to the scientific community. Our resource Alzheimer's disease Biomarkers Comprehensive Database (ABCD) is being planned to accomplish this objective. ABCD is a huge collection of AD-related data of molecular markers. The web interface contains information concerning the proteins, genes, transcription factors, SNPs, miRNAs, mitochondrial genes, and expressed genes implicated in AD pathogenesis. In addition to the molecular-level data, the database has information for animal models, medicinal candidates and pathways involved in the AD and some image data for AD patients. ABCD is coupled with some major external resources where the user can retrieve additional general information about the disease. The database was designed in such a manner that user can extract meaningful information about gene, protein, pathway, and regulatory elements based search options. This database is unique in the sense that it is completely dedicated to specific neurological disorder i.e. AD. Further advance options like AD-affected brain image data of patients and structural compound level information add values to our database. Features of this database enable users to extract, analyze and display information related to a disease in many different ways. The database is available for academic purpose and accessible at http://www.bioinfoindia.org/abcd.",2019-09-03 +34376710,Identification of prognostic biomarkers related to the tumor microenvironment in thyroid carcinoma.,"Thyroid Carcinoma (THCA) is the most common endocrine tumor that is mainly treated using surgery and radiotherapy. In addition, immunotherapy is a recently developed treatment option that has played an essential role in the management of several types of tumors. However, few reports exist on the use of immunotherapy to treat THCA. The study downloaded the miRNA, mRNA and lncRNA data for THCA patients from the TCGA database ( https://portal.gdc.cancer.gov/ ). Thereafter, the tumor samples were divided into cold and hot tumors, based on the immune score of the tumor microenvironment. Moreover, the differentially expressed lncRNAs and miRNAs were obtained. Finally, the study jointly constructed a ceRNA network through differential analysis of the mRNA data for cold and hot tumors. The study first assessed the level of immune infiltration in the THCA tumor microenvironment then divided the samples into cold and hot tumors, based on the immune score. Additionally, a total of 568 up-regulated and 412 down-regulated DEGs were screened by analyzing the differences between hot and cold tumors. Thereafter, the study examined the differentially expressed genes for lncRNA and miRNA. The results revealed 629 differentially expressed genes related to lncRNA and 114 associated with miRNA. Finally, a ceRNA network of the differentially expressed genes was constructed. The results showed a five-miRNA hubnet, i.e., hsa-mir-204, hsa-mir-128, hsa-mir-214, hsa-mir-150 and hsa-mir-338. The present study identified the immune-related mRNA, lncRNA and miRNA in THCA then constructed a ceRNA network. These results are therefore important as they provide more insights on the immune mechanisms in THCA. The findings also provides additional information for possible THCA immunotherapy.",2021-08-10 +33999203,PredictProtein - Predicting Protein Structure and Function for 29 Years.,"Since 1992 PredictProtein (https://predictprotein.org) is a one-stop online resource for protein sequence analysis with its main site hosted at the Luxembourg Centre for Systems Biomedicine (LCSB) and queried monthly by over 3,000 users in 2020. PredictProtein was the first Internet server for protein predictions. It pioneered combining evolutionary information and machine learning. Given a protein sequence as input, the server outputs multiple sequence alignments, predictions of protein structure in 1D and 2D (secondary structure, solvent accessibility, transmembrane segments, disordered regions, protein flexibility, and disulfide bridges) and predictions of protein function (functional effects of sequence variation or point mutations, Gene Ontology (GO) terms, subcellular localization, and protein-, RNA-, and DNA binding). PredictProtein's infrastructure has moved to the LCSB increasing throughput; the use of MMseqs2 sequence search reduced runtime five-fold (apparently without lowering performance of prediction methods); user interface elements improved usability, and new prediction methods were added. PredictProtein recently included predictions from deep learning embeddings (GO and secondary structure) and a method for the prediction of proteins and residues binding DNA, RNA, or other proteins. PredictProtein.org aspires to provide reliable predictions to computational and experimental biologists alike. All scripts and methods are freely available for offline execution in high-throughput settings.",2021-07-01 +,High-resolution mapping of floodplain topography from space: A case study in the Amazon,"Terrain elevation is essential for land management, navigation, and earth science applications. Remote sensing advancements have led to an increase in the availability of a range of digital elevation models with global to quasi-global land coverage. However, the generation of these models in water bodies requires specialized approaches, such as the delimitation of the shorelines (isobaths) of lakes over time. Therefore, the processing costs are high in complex areas with many lakes. Currently, there is no systematic topographic mapping of lakes and channels in large and complex floodplains using remote sensing data. We present here the first high-resolution topographic mapping (30 m) of the non-forested portion of the middle-lower Amazon floodplain using a new method based on in-situ Amazon river water levels and a flood-frequency map derived from the Landsat Global Surface Water Dataset. Validation using locally derived bathymetry showed a root mean square error (RMSE) of 0.89 m for floodplain elevation and a good representation of spatial patterns with Pearson's correlation coefficient of 0.77. Our approach for improving topographic representation in open water areas is an alternative to SRTM3 DEM or MERIT DEM, which represents these areas as a flat surface. We also generated the Amazon River bathymetry using nautical charts from the Brazilian Navy (average RMSE of 7.5 m and bias of 5 m), and floodplain depths maps corresponding to the high- and low-water periods of the river flood wave. The results show that the storage volume in the open-water floodplain varies 104.3 km³ on average each year (from 11.9 km³ in low-water to 116.2 km³ in high-water). The method can be applied to any temporarily flooded area to provide the often missing underwater digital topographic data required for hydrological, ecological, and geomorphological studies. The data set developed in this study can be found at https://doi.org/10.17632/vn599y9szb.1.",2020-12-01 +34666569,Association of maternal pre-pregnancy dietary intake with adverse maternal and neonatal outcomes: A systematic review and meta-analysis of prospective studies.,"This study aimed to summarize the evidence regarding the effects of dietary intake before conception on pregnancy outcomes by performing a systematic review and meta-analysis of prospective studies. Electronic databases were searched from inception up to August 2021. Overall, 65 studies involving 831 798 participants were included and 38 studies were quantitatively pooled. With regard to maternal outcomes, pre-pregnancy intake of fried food, fast food, red and processed meat, heme iron and a low-carbohydrate dietary pattern was positively associated with the risk of gestational diabetes mellitus (GDM) (all P < 0.05). However, a high dietary fiber intake and folic acid supplementation were negatively associated with GDM risk (both P < 0.05). With regard to neonatal outcomes, maternal caffeine intake before pregnancy significantly increased the risk of spontaneous abortion, while folic acid supplementation had protective effects on total adverse neonatal outcomes, preterm birth, and small-for-gestational age (SGA, all P < 0.05). However, no significant associations were found between adverse pregnancy outcomes (i.e., GDM and SGA) and the pre-pregnancy dietary intake of sugar-sweetened beverages, potato, fish, and carbohydrates and the Healthy Eating Index. Our study suggests that maintaining a healthy diet before conception has significant beneficial effects on pregnancy outcomes.Supplemental data for this article is available online at https://doi.org/10.1080/10408398.2021.1989658.",2021-10-20 +34408238,Survival analysis in breast cancer using proteomic data from four independent datasets.,"Breast cancer clinical treatment selection is based on the immunohistochemical determination of four protein biomarkers: ESR1, PGR, HER2, and MKI67. Our aim was to correlate immunohistochemical results to proteome-level technologies in measuring the expression of these markers. We also aimed to integrate available proteome-level breast cancer datasets to identify and validate new prognostic biomarker candidates. We searched studies involving breast cancer patient cohorts with published survival and proteomic information. Immunohistochemistry and proteomic technologies were compared using the Mann-Whitney test. Receiver operating characteristics (ROC) curves were generated to validate discriminative power. Cox regression and Kaplan-Meier survival analysis were calculated to assess prognostic power. False Discovery Rate was computed to correct for multiple hypothesis testing. We established a database integrating protein expression data and survival information from four independent cohorts for 1229 breast cancer patients. In all four studies combined, a total of 7342 unique proteins were identified, and 1417 of these were identified in at least three datasets. ESR1, PGR, and HER2 protein expression levels determined by RPPA or LC-MS/MS methods showed a significant correlation with the levels determined by immunohistochemistry (p < 0.0001). PGR and ESR1 levels showed a moderate correlation (correlation coefficient = 0.17, p = 0.0399). An additional panel of candidate proteins, including apoptosis-related proteins (BCL2,), adhesion markers (CDH1, CLDN3, CLDN7) and basal markers (cytokeratins), were validated as prognostic biomarkers. Finally, we expanded our previously established web tool designed to validate survival-associated biomarkers by including the proteomic datasets analyzed in this study ( https://kmplot.com/ ). In summary, large proteomic studies now provide sufficient data enabling the validation and ranking of potential protein biomarkers.",2021-08-18 +32180108,A high-quality genome sequence of alkaligrass provides insights into halophyte stress tolerance.,"Alkaligrass (Puccinellia tenuiflora) is a monocotyledonous halophytic forage grass widely distributed in Northern China. It belongs to the Gramineae family and shares a close phylogenetic relationship with the cereal crops, wheat and barley. Here, we present a high-quality chromosome-level genome sequence of alkaligrass assembled from Illumina, PacBio and 10× Genomics reads combined with genome-wide chromosome conformation capture (Hi-C) data. The ∼1.50 Gb assembled alkaligrass genome encodes 38,387 protein-coding genes, and 54.9% of the assembly are transposable elements, with long terminal repeats being the most abundant. Comparative genomic analysis coupled with stress-treated transcriptome profiling uncovers a set of unique saline- and alkaline-responsive genes in alkaligrass. The high-quality genome assembly and the identified stress related genes in alkaligrass provide an important resource for evolutionary genomic studies in Gramineae and facilitate further understanding of molecular mechanisms underlying stress tolerance in monocotyledonous halophytes. The alkaligrass genome data is freely available at http://xhhuanglab.cn/data/alkaligrass.html .",2020-03-12 +30999839,From trash to treasure: detecting unexpected contamination in unmapped NGS data.,"

Background

Next Generation Sequencing (NGS) experiments produce millions of short sequences that, mapped to a reference genome, provide biological insights at genomic, transcriptomic and epigenomic level. Typically the amount of reads that correctly maps to the reference genome ranges between 70% and 90%, leaving in some cases a consistent fraction of unmapped sequences. This 'misalignment' can be ascribed to low quality bases or sequence differences between the sample reads and the reference genome. Investigating the source of the unmapped reads is definitely important to better assess the quality of the whole experiment and to check for possible downstream or upstream 'contamination' from exogenous nucleic acids.

Results

Here we propose DecontaMiner, a tool to unravel the presence of contaminating sequences among the unmapped reads. It uses a subtraction approach to identify bacteria, fungi and viruses genome contamination. DecontaMiner generates several output files to track all the processed reads, and to provide a complete report of their characteristics. The good quality matches on microorganism genomes are counted and compared among samples. DecontaMiner builds an offline HTML page containing summary statistics and plots. The latter are obtained using the state-of-the-art D3 javascript libraries. DecontaMiner has been mainly used to detect contamination in human RNA-Seq data. The software is freely available at http://www-labgtp.na.icar.cnr.it/decontaminer .

Conclusions

DecontaMiner is a tool designed and developed to investigate the presence of contaminating sequences in unmapped NGS data. It can suggest the presence of contaminating organisms in sequenced samples, that might derive either from laboratory contamination or from their biological source, and in both cases can be considered as worthy of further investigation and experimental validation. The novelty of DecontaMiner is mainly represented by its easy integration with the standard procedures of NGS data analysis, while providing a complete, reliable, and automatic pipeline.",2019-04-18 +34329395,PolyRound: Polytope rounding for random sampling in metabolic networks. ,"Random flux sampling is a powerful tool for the constraint-based analysis of metabolic networks. The most efficient sampling method relies on a rounding transform of the constraint polytope, but no available rounding implementation can round all relevant models. By removing redundant polytope constraints on the go, PolyRound simplifies the numerical problem and rounds all the 108 models in the BiGG database without parameter tuning, compared to about 50% for the state-of-the-art implementation. The implementation is available on gitlab: https://gitlab.com/csb.ethz/PolyRound. Supplementary data are available at Bioinformatics online.",2021-07-30 +34388468,ChAlPred: A web server for prediction of allergenicity of chemical compounds.,"

Background

Allergy is the abrupt reaction of the immune system that may occur after the exposure to allergens such as proteins, peptides, or chemicals. In the past, various methods have been generated for predicting allergenicity of proteins and peptides. In contrast, there is no method that can predict allergenic potential of chemicals. In this paper, we described a method ChAlPred developed for predicting chemical allergens as well as for designing chemical analogs with desired allergenicity.

Method

In this study, we have used 403 allergenic and 1074 non-allergenic chemical compounds obtained from IEDB database. The PaDEL software was used to compute the molecular descriptors of the chemical compounds to develop different prediction models. All the models were trained and tested on the 80% training data and evaluated on the 20% validation data using the 2D, 3D and FP descriptors.

Results

In this study, we have developed different prediction models using several machine learning approaches. It was observed that the Random Forest based model developed using hybrid descriptors performed the best, and achieved the maximum accuracy of 83.39% and AUC of 0.93 on validation dataset. The fingerprint analysis of the dataset indicates that certain chemical fingerprints are more abundant in allergens that include PubChemFP129 and GraphFP1014. We have also predicted allergenicity potential of FDA-approved drugs using our best model and identified the drugs causing allergic symptoms (e.g., Cefuroxime, Spironolactone, Tioconazole). Our results agreed with allergenicity of these drugs reported in literature.

Conclusions

To aid the research community, we developed a smart-device compatible web server ChAlPred (https://webs.iiitd.edu.in/raghava/chalpred/) that allows to predict and design the chemicals with allergenic properties.",2021-08-08 +33754892,'Necessity is the mother of invention': Specialist palliative care service innovation and practice change in response to COVID-19. Results from a multinational survey (CovPall).,"

Background

Specialist palliative care services have a key role in a whole system response to COVID-19, a disease caused by the SARS-CoV-2 virus. There is a need to understand service response to share good practice and prepare for future care.

Aim

To map and understand specialist palliative care services innovations and practice changes in response to COVID-19.

Design

Online survey of specialist palliative care providers (CovPall), disseminated via key stakeholders. Data collected on service characteristics, innovations and changes in response to COVID-19. Statistical analysis included frequencies, proportions and means, and free-text comments were analysed using a qualitative framework approach.

Setting/participants

Inpatient palliative care units, home nursing services, hospital and home palliative care teams from any country.

Results

Four hundred and fifty-eight respondents: 277 UK, 85 Europe (except UK), 95 World (except UK and Europe), 1 missing country. 54.8% provided care across 2+ settings; 47.4% hospital palliative care teams, 57% in-patient palliative care units and 57% home palliative care teams. The crisis context meant services implemented rapid changes. Changes involved streamlining, extending and increasing outreach of services, using technology to facilitate communication, and implementing staff wellbeing innovations. Barriers included; fear and anxiety, duplication of effort, information overload and funding. Enablers included; collaborative teamwork, staff flexibility, a pre-existing IT infrastructure and strong leadership.

Conclusions

Specialist palliative care services have been flexible, highly adaptive and have adopted low-cost solutions, also called 'frugal innovations', in response to COVID-19. In addition to financial support, greater collaboration is essential to minimise duplication of effort and optimise resource use.ISRCTN16561225 https://doi.org/10.1186/ISRCTN16561225.",2021-03-23 +,First Report of Canker and Branch Dieback of Sweet Cherry Trees Caused by Calosphaeria pulchella in Chile,"In Chile, the 2019 to 2020 sweet cherry season yielded 228,548 t produced on 38,392 ha and an average annual crop value about US$1.6 billion (http://www.iqonsulting.com/yb/). Between autumn 2019 and summer 2020, branch and limb dieback symptoms were observed in two 12-year-old sweet cherry (Prunus avium L.) orchards located in the O’Higgins region (Chile Central Valley). Furthermore, other symptoms such as wilting leaves, cankers, bark cracking, emission of gum exudates, and internal wood necrosis were detected on trees of ‘Bing’, ‘Santina’, and ‘Sweetheart’ cultivars (Cainelli et al. 2017). Wood fragments from symptomatic branches were surface sterilized with 95% ethanol, flamed, and placed onto potato dextrose agar amended with 0.5 g/liter of streptomycin sulfate (Berbegal et al. 2014). After 7 days of incubation at 25°C, pink to red colonies with white margins were isolated. Each isolate was characterized by having hyaline and oblong-ellipsoidal conidia of 5.76 ± 0.88 × 1.76 ± 0.36 μm (n = 100) (Trouillas et al. 2012). According to these morphological features, the fungus was identified as Calosphaeria pulchella (Pers.: Fr.) J. Schröt (anamorph Calosphaeriophora pulchella Réblová, L. Mostert, W. Gams & Crous) (Réblová et al. 2004). The internal transcribed spacer (ITS) region of the rDNA sequence comparison using BLAST analysis revealed a 99.48% identity and 100% query coverage between C. pulchella sequence HM237297 and the Chilean isolates. Moreover, the Chilean isolates were confirmed by means of phylogenetic analysis using ITS sequences of C. pulchella available in the GenBank database. The maximum-parsimony phylogenetic tree supported the cluster analysis of the Chilean C. pulchella isolates with those obtained in other regions of the world with a bootstrap value of 95% (Berbegal et al. 2014; Trouillas et al. 2012). The Chilean ITS sequences were deposited into GenBank (MT378444 to MT378447). Two-year-old sweet cherry trees (cv. Bing) were inoculated with the Chilean isolates. Six trees were used as replicates. To accomplish this goal, two punctures of 5-mm diameter were made in two branches per tree with a cork borer, and a plug of mycelium from 7-day-old colonies was laid on the wound mycelium side down. Six trees were inoculated with sterile agar plugs. Every puncture was sealed with petroleum jelly and wrapped with Parafilm. Four months after inoculation, the vascular streaking developing from the inoculated wounds was measured. The average lesion lengths on inoculated and noninoculated shoots were 43.79 and 21.79 mm, respectively, which were significantly different according Fisher’s LSD test (P < 0.05). C. pulchella was recovered from all the inoculated branches. No fungus was isolated from the controls, confirming Koch’s postulates (Trouillas et al. 2012). To our knowledge this is the first report of C. pulchella causing canker and branch dieback in sweet cherry trees in Chile. This new disease represents a serious threat to the Chilean cherry industry, and further research on disease control is needed.",2021-01-01 +34323935,An astonishing wealth of new proteasome homologs. ,"The proteasome is the main proteolytic machine for targeted protein degradation in archaea and eukaryotes. While some bacteria also possess the proteasome, most of them contain a simpler and more specialized homolog, the HslV protease. In recent years, three further homologs of the proteasome core subunits have been characterized in prokaryotes: Anbu, BPH, and connectase. With the inclusion of these members, the family of proteasome-like proteins now exhibits a range of architectural and functional forms, from the canonical proteasome, a barrel-shaped protease without pronounced intrinsic substrate specificity, to the monomeric connectase, a highly specific protein ligase. We employed systematic sequence searches to show that we have only seen the tip of the iceberg so far and that beyond the hitherto known proteasome homologs lies a wealth of distantly related, uncharacterized homologs. We describe a total of 22 novel proteasome homologs in bacteria and archaea. Using sequence and structure analysis, we analyze their evolutionary history and assess structural differences that may modulate their function. With this initial description, we aim to stimulate the experimental investigation of these novel proteasome-like family members. The protein sequences in this study are searchable in the MPI Bioinformatics Toolkit (https://toolkit.tuebingen.mpg.de) with ProtBLAST/PSI-BLAST and with HHpred (database ""proteasome_homologs""). The following data are available at https://data.mendeley.com/datasets/t48yhff7hs/3: (I) sequence alignments for each proteasome-like homolog, (II) the coordinates for their structural models, and (III) a cluster-map file, which can be navigated interactively in CLANS and gives direct access to all the sequences in this study. Supplementary data are available at Bioinformatics online.",2021-07-29 +32449936,CVCDAP: an integrated platform for molecular and clinical analysis of cancer virtual cohorts.,"Recent large-scale multi-omics studies resulted in quick accumulation of an overwhelming amount of cancer-related data, which provides an unprecedented resource to interrogate diverse questions. While certain existing web servers are valuable and widely used, analysis and visualization functions with regard to re-investigation of these data at cohort level are not adequately addressed. Here, we present CVCDAP, a web-based platform to deliver an interactive and customizable toolbox off the shelf for cohort-level analysis of TCGA and CPTAC public datasets, as well as user uploaded datasets. CVCDAP allows flexible selection of patients sharing common molecular and/or clinical characteristics across multiple studies as a virtual cohort, and provides dozens of built-in customizable tools for seamless genomic, transcriptomic, proteomic and clinical analysis of a single virtual cohort, as well as, to compare two virtual cohorts with relevance. The flexibility and analytic competence of CVCDAP empower experimental and clinical researchers to identify new molecular mechanisms and develop potential therapeutic approaches, by building and analyzing virtual cohorts for their subject of interests. We demonstrate that CVCDAP can conveniently reproduce published findings and reveal novel insights by two applications. The CVCDAP web server is freely available at https://omics.bjcancer.org/cvcdap/.",2020-07-01 +34260718,CoSMeD: a user-friendly web server to estimate 5-year survival probability of left-sided and right-sided colorectal cancer patients using molecular data. ,"Colorectal cancer is a heterogeneous disease with diverse prognoses between left-sided and right-sided patients; therefore, it is necessary to precisely evaluate the survival probability of side-specific colorectal cancer patients. Here, we collected multi-omics data from The Cancer Genome Atlas (TCGA) program, including gene expression, DNA methylation, and microRNA (miRNA) expression. Specificity measure (SPM) and robust likelihood-based survival analysis were used to identify 6 left-sided and 28 right-sided prognostic biomarkers. Compared to the performance of clinical prognostic models, the addition of these biomarkers could significantly improve the discriminatory ability and calibration in predicting side-specific 5-year survival for colorectal cancer. Additional dataset derived from Gene Expression Omnibus (GEO) was used to validate the prognostic value of side-specific genes. Finally, we constructed colorectal cancer side-specific molecular database (CoSMeD), a user-friendly interface for estimating side-specific colorectal cancer 5-year survival probability, which can lay the basis for personalized management of left-sided and right-sided colorectal cancer patients. CoSMeD is freely available at https://mulongdu.shinyapps.io/cosmed. Supplementary data are available at Bioinformatics online.",2021-07-14 +33889687,Environmental variables database from a Miocene marine stratigraphic section: A multivariate statistical analysis.,"The data presented here are related to the research article ""Miocene Atlantic transgressive-regressive events in northeastern and offshore Patagonia: A palynological perspective"" (Guler et al. 2021; https://doi.org/10.1016/j.jsames.2021.103239). A total of 60 drilled cutting samples from a 580 m-thick subsurface stratigraphic section (YPF.Ch.PV.es-1 borehole) in Península Valdés, Chubut Province, Argentina, collected every 10 m, were processed for palynological analysis. The quantitative data were statistically evaluated. In detail, the database contain: 1) raw palynological data - proxy data - from counting under transmitted light microscope; 2) four paleoenvironmental variables selected to conduct a multivariate analysis: terrestrial/marine ratio, acritarchs, outer neritic dinocyst taxa and warm-water dinocyst taxa; 3) transformed variables used for the Principal Component Analysis (PCA) and 4) the principal component scores obtained, stratigraphically ordered from the top to bottom of the borehole. Data from future studies in new sites combined with here presented data, can be useful to refine paleoenvironment models applied to basin analysis.",2021-03-18 +34108585,Development of a time-series shotgun metagenomics database for monitoring microbial communities at the Pacific coast of Japan.,"Although numerous metagenome, amplicon sequencing-based studies have been conducted to date to characterize marine microbial communities, relatively few have employed full metagenome shotgun sequencing to obtain a broader picture of the functional features of these marine microbial communities. Moreover, most of these studies only performed sporadic sampling, which is insufficient to understand an ecosystem comprehensively. In this study, we regularly conducted seawater sampling along the northeastern Pacific coast of Japan between March 2012 and May 2016. We collected 213 seawater samples and prepared size-based fractions to generate 454 subsets of samples for shotgun metagenome sequencing and analysis. We also determined the sequences of 16S rRNA (n = 111) and 18S rRNA (n = 47) gene amplicons from smaller sample subsets. We thereafter developed the Ocean Monitoring Database for time-series metagenomic data ( http://marine-meta.healthscience.sci.waseda.ac.jp/omd/ ), which provides a three-dimensional bird's-eye view of the data. This database includes results of digital DNA chip analysis, a novel method for estimating ocean characteristics such as water temperature from metagenomic data. Furthermore, we developed a novel classification method that includes more information about viruses than that acquired using BLAST. We further report the discovery of a large number of previously overlooked (TAG)n repeat sequences in the genomes of marine microbes. We predict that the availability of this time-series database will lead to major discoveries in marine microbiome research.",2021-06-09 +33759252,MS Amanda 2.0: Advancements in the standalone implementation.,"

Rationale

Database search engines are the preferred method to identify peptides in mass spectrometry data. However, valuable software is in this context not only defined by a powerful algorithm to separate correct from false identifications, but also by constant maintenance and continuous improvements.

Methods

In 2014, we presented our peptide identification algorithm MS Amanda, showing its suitability for identifying peptides in high-resolution tandem mass spectrometry data and its ability to outperform widely used tools to identify peptides. Since then, we have continuously worked on improvements to enhance its usability and to support new trends and developments in this fast-growing field, while keeping the original scoring algorithm to assess the quality of a peptide spectrum match unchanged.

Results

We present the outcome of these efforts, MS Amanda 2.0, a faster and more flexible standalone version with the original scoring algorithm. The new implementation has led to a 3-5× speedup, is able to handle new ion types and supports standard data formats. We also show that MS Amanda 2.0 works best when using only the most common ion types in a particular search instead of all possible ion types.

Conclusions

MS Amanda is available free of charge from https://ms.imp.ac.at/index.php?action=msamanda.",2021-06-01 +34705525,Cost-Effectiveness and Cost Utility of Treatment of Attention-Deficit/Hyperactivity Disorder: A Systematic Review.,"Objectives: This systematic review provides an overview of full economic evaluations of attention-deficit/hyperactivity disorder (ADHD) treatments, evaluates their outcomes, and highlights gaps in the literature. Data Sources: Electronic databases were searched for full economic evaluations of ADHD treatments for children, adolescents, or adults published in English or Dutch. Results: Twenty-nine studies met the inclusion criteria. Almost all studies that compared medication or psychosocial treatment to no treatment, placebo, or care as usual indicated that medication and psychosocial treatment were cost-effective compared to the control group. Stimulant treatment appeared to be cost-effective for the treatment of ADHD in children and adolescents. Only few studies focus on treatments in adults and psychosocial treatments and the number of studies with long time horizons and without industry funding is limited. Conclusions: Despite the rising interest in cost-effectiveness, this systematic review shows that more cost-effectiveness research of higher quality is warranted to aid in the optimal use of available treatments and resources for individuals with ADHD. Specifically, more studies should focus on treatments in adults and psychosocial treatments, and more studies with long time horizons and without industry funding are warranted. Nevertheless, we can conclude that treating ADHD is generally cost-effective compared to no treatment. PROSPERO: CRD42017060074. Available from: https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=60074.",2021-10-27 +33890853,Transcriptional profiling of mouse peripheral nerves to the single-cell level to build a sciatic nerve ATlas (SNAT). ,"Peripheral nerves are organ-like structures containing diverse cell types to optimize function. This interactive assembly includes mostly axon-associated Schwann cells, but also endothelial cells of supporting blood vessels, immune system-associated cells, barrier-forming cells of the perineurium surrounding and protecting nerve fascicles, and connective tissue-resident cells within the intra-fascicular endoneurium and inter-fascicular epineurium. We have established transcriptional profiles of mouse sciatic nerve-inhabitant cells to foster the fundamental understanding of peripheral nerves. To achieve this goal, we have combined bulk RNA sequencing of developing sciatic nerves up to the adult with focused bulk and single-cell RNA sequencing of Schwann cells throughout postnatal development, extended by single-cell transcriptome analysis of the full sciatic nerve both perinatally and in the adult. The results were merged in the transcriptome resource Sciatic Nerve ATlas (SNAT: https://www.snat.ethz.ch). We anticipate that insights gained from our multi-layered analysis will serve as valuable interactive reference point to guide future studies.",2021-04-23 +34241085,OCELOT: An infrastructure for data-driven research to discover and design crystalline organic semiconductors.,"Materials design and discovery are often hampered by the slow pace and materials and human costs associated with Edisonian trial-and-error screening approaches. Recent advances in computational power, theoretical methods, and data science techniques, however, are being manifest in a convergence of these tools to enable in silico materials discovery. Here, we present the development and deployment of computational materials data and data analytic approaches for crystalline organic semiconductors. The OCELOT (Organic Crystals in Electronic and Light-Oriented Technologies) infrastructure, consisting of a Python-based OCELOT application programming interface and OCELOT database, is designed to enable rapid materials exploration. The database contains a descriptor-based schema for high-throughput calculations that have been implemented on more than 56 000 experimental crystal structures derived from 47 000 distinct molecular structures. OCELOT is open-access and accessible via a web-user interface at https://oscar.as.uky.edu.",2021-05-01 +34252246,Exploring Curated Conformational Ensembles of Intrinsically Disordered Proteins in the Protein Ensemble Database.,"The Protein Ensemble Database (PED; https://proteinensemble.org/) is the major repository of conformational ensembles of intrinsically disordered proteins (IDPs). Conformational ensembles of IDPs are primarily provided by their authors or occasionally collected from literature, and are subsequently deposited in PED along with the corresponding structured, manually curated metadata. The modeling of conformational ensembles usually relies on experimental data from small-angle X-ray scattering (SAXS), fluorescence resonance energy transfer (FRET), NMR spectroscopy, and molecular dynamics (MD) simulations, or a combination of these techniques. The growing number of scientific studies based on these data, along with the astounding and swift progress in the field of protein intrinsic disorder, has required a significant update and upgrade of PED, first published in 2014. To this end, the database was entirely renewed in 2020 and now has a dedicated team of biocurators providing manually curated descriptions of the methods and conditions applied to generate the conformational ensembles and for checking consistency of the data. Here, we present a detailed description on how to explore PED with its protein pages and experimental pages, and how to interpret entries of conformational ensembles. We describe how to efficiently search conformational ensembles deposited in PED by means of its web interface and API. We demonstrate how to make sense of the PED protein page and its associated experimental entry pages with reference to the yeast Sic1 use case. © 2021 The Authors. Current Protocols published by Wiley Periodicals LLC. Basic Protocol 1: Performing a search in PED Support Protocol 1: Programmatic access with the PED API Basic Protocol 2: Interpreting the protein page and the experimental entry page-the Sic1 use case Support Protocol 2: Downloading options Support Protocol 3: Understanding the validation report-the Sic1 use case Basic Protocol 3: Submitting new conformational ensembles to PED Basic Protocol 4: Providing feedback in PED.",2021-07-01 +32209698,An atlas of human metabolism. ,"Genome-scale metabolic models (GEMs) are valuable tools to study metabolism and provide a scaffold for the integrative analysis of omics data. Researchers have developed increasingly comprehensive human GEMs, but the disconnect among different model sources and versions impedes further progress. We therefore integrated and extensively curated the most recent human metabolic models to construct a consensus GEM, Human1. We demonstrated the versatility of Human1 through the generation and analysis of cell- and tissue-specific models using transcriptomic, proteomic, and kinetic data. We also present an accompanying web portal, Metabolic Atlas (https://www.metabolicatlas.org/), which facilitates further exploration and visualization of Human1 content. Human1 was created using a version-controlled, open-source model development framework to enable community-driven curation and refinement. This framework allows Human1 to be an evolving shared resource for future studies of human health and disease.",2020-03-24 +34527188,MyoData: An expression knowledgebase at single cell/nucleus level for the discovery of coding-noncoding RNA functional interactions in skeletal muscle.,"Non-coding RNAs represent the largest part of transcribed mammalian genomes and prevalently exert regulatory functions. Long non-coding RNAs (lncRNAs) and microRNAs (miRNAs) can modulate the activity of each other. Skeletal muscle is the most abundant tissue in mammals. It is composed of different cell types with myofibers that represent the smallest complete contractile system. Considering that lncRNAs and miRNAs are more cell type-specific than coding RNAs, to understand their function it is imperative to evaluate their expression and action within single myofibers. In this database, we collected gene expression data for coding and non-coding genes in single myofibers and used them to produce interaction networks based on expression correlations. Since biological pathways are more informative than networks based on gene expression correlation, to understand how altered genes participate in the studied phenotype, we integrated KEGG pathways with miRNAs and lncRNAs. The database also integrates single nucleus gene expression data on skeletal muscle in different patho-physiological conditions. We demonstrated that these networks can serve as a framework from which to dissect new miRNA and lncRNA functions to experimentally validate. Some interactions included in the database have been previously experimentally validated using high throughput methods. These can be the basis for further functional studies. Using database information, we demonstrate the involvement of miR-149, -214 and let-7e in mitochondria shaping; the ability of the lncRNA Pvt1 to mitigate the action of miR-27a via sponging; and the regulatory activity of miR-214 on Sox6 and Slc16a3. The MyoData is available at https://myodata.bio.unipd.it.",2021-07-26 +34122478,PSDX: A Comprehensive Multi-Omics Association Database of Populus trichocarpa With a Focus on the Secondary Growth in Response to Stresses.,"Populus trichocarpa (P. trichocarpa) is a model tree for the investigation of wood formation. In recent years, researchers have generated a large number of high-throughput sequencing data in P. trichocarpa. However, no comprehensive database that provides multi-omics associations for the investigation of secondary growth in response to diverse stresses has been reported. Therefore, we developed a public repository that presents comprehensive measurements of gene expression and post-transcriptional regulation by integrating 144 RNA-Seq, 33 ChIP-seq, and six single-molecule real-time (SMRT) isoform sequencing (Iso-seq) libraries prepared from tissues subjected to different stresses. All the samples from different studies were analyzed to obtain gene expression, co-expression network, and differentially expressed genes (DEG) using unified parameters, which allowed comparison of results from different studies and treatments. In addition to gene expression, we also identified and deposited pre-processed data about alternative splicing (AS), alternative polyadenylation (APA) and alternative transcription initiation (ATI). The post-transcriptional regulation, differential expression, and co-expression network datasets were integrated into a new P. trichocarpa Stem Differentiating Xylem (PSDX) database (http://forestry.fafu.edu.cn/db/SDX), which further highlights gene families of RNA-binding proteins and stress-related genes. The PSDX also provides tools for data query, visualization, a genome browser, and the BLAST option for sequence-based query. Much of the data is also available for bulk download. The availability of PSDX contributes to the research related to the secondary growth in response to stresses in P. trichocarpa, which will provide new insights that can be useful for the improvement of stress tolerance in woody plants.",2021-05-20 +31830689,"Variants of DNA mismatch repair genes derived from 33,998 Chinese individuals with and without cancer reveal their highly ethnic-specific nature.","

Purpose

DNA mismatch repair (MMR) genes play important roles in maintaining genome stability. Mutations in MMR genes disrupt their mismatch repair function, cause genome instability and lead to increased risk of cancer in the mutation carriers as represented by Lynch Syndrome. Studies have identified a large number of MMR variants, mostly in the Caucasian population, whereas data from non-Caucasian populations remain poorly illustrated. With the population size of 1.4 billion, knowledge of MMR variants in the Chinese population can be valuable in understanding the roles of ethnic MMR variation and cancer and to further guide clinical applications in MMR-related cancer prevention and treatment in the Chinese population. In this study, we systematically analysed the MMR variants from the Chinese population.

Experimental design

We performed a comprehensive MMR data mining and collected all the MMR variation data reported from 33,998 Chinese individuals consisting of 23,938 cancer and 10,060 non-cancer cases between January 1997 to May 2019. For the collected data, we performed standardisation following Human Genome Variation Society nomenclature and reannotated the MMR variant data following American College of Medical Genetics and Genomics guidelines and comparing with non-Chinese MMR data on various aspects.

Results

We identified a total of 540 MMR variants in the Chinese population, including 194 in MLH1, 181 in MSH2, 59 in MSH6, 53 in PMS2 single-base/indel changes and 53 large deletions/duplications in MLH1, MSH2, MSH6 and PMS2, respectively. We determined that the pathogenic/likely pathogenic carrier rate in the Chinese population was 1.6%. Comparative analysis in variant spectrum, variant types, clinical classification and founder mutations showed substantial differences of MMR variation between Chinese and non-Chinese populations and the fact that over 90% of the variants were only present in the Chinese ethnicity reveals the highly ethnic-specific nature of the Chinese MMR variation . We also developed an open-access database, dbMMR-Chinese, to host all data (https://dbMMR-chinese.fhs.um.edu.mo). The rich MMR data from a large non-Caucasian population should be valuable to study MMR variation and its relationship with cancer and provide a valuable reference resource for MMR-related cancer prevention and treatment.

Conclusion

Our study provides the largest MMR data set from a single non-Caucasian population and reveals that MMR variation in the humans can be highly ethnic-specific.",2019-12-09 +,"698 Sleep quality, depression and anxiety in a community sample of Habana, Cuba during the 2020 COVID-19 pandemic","Abstract

Introduction

The 2020 Coronavirus disease 19 (COVID-19) pandemic has infected and killed millions of persons. To avoid virus spread, stay-at-home orders and social distancing measures were implemented worldwide. These measures have caused changes in work schedules and, subsequently, sleep habits. This study aims to examine sleep disturbances, anxiety and depression in a random community in Havana, Cuba during the pandemic lockdown period.

Methods

This a descriptive cross-sectional study performed in a randomly selected neighbourhood, via direct door-to-door survey. We applied four different surveys:1) Pittsburgh Sleep Quality Index (PSQI);2) Insomnia Severity Index (ISI);3) Epworth Sleepiness Scale (ESS) and 4) Hospital Anxiety and Depression Scales (HADS) questionnaire. Descriptive statistics will be applied using StatSoft, Inc. (2011) STATISTICA (data analysis software system), version 10. HYPERLINK “http://www.statsoft.com” www.statsoft.com

Results

A total of 366 adult subjects were surveyed and abnormal values were observed in the following percentages: 60.65% in the PSQI, 34.51% in the ISI, 14.74% in the ESS and 36.61% in the HADS for depression and 40.43% in the HADS for anxiety. Poorer sleepers and depression were more common women and elderly (p<0.05 for all comparisons). Anxiety and insomnia were seen mostly in subjects with higher education and working during this period (p<0.05 for all comparisons). Poor sleep correlated with insomnia, depression and anxiety (p < 0.001 for all comparisons).

Conclusion

There were sleep quality disruption in large percentage of subjects during the COVID-19 pandemic lockdown. Poor sleep and depression were worse in women and the elderly. Insomnia and anxiety were seen more in younger subjects that continue to work during this time. Support (if any):",2021-05-01 +,First Report of Erysiphe necator Causing Powdery Mildew to Rubber Tree (Hevea brasiliensis) in Brazil,"The rubber tree (Hevea brasiliensis Muell. Arg.) is widely used for latex production, representing an important economic resource for the industry. In December 2019, a plantation located in the municipality of São Manuel, São Paulo State, Brazil (22°46′24.6″S, 48°34′32.0″W) had plants showing whitish mycelial growth of powdery aspect on the abaxial surface of mature leaflets. Microscopic analysis revealed a pathogen with hyaline hyphae, which were septate, branched, and had lobed or multilobed appressoria. Conidiophores ranged from 53.9 to 120.3 µm (X¯ = 83.5 μm, n = 100) in length and contained foot cells that were upright or had one or two slight twists, ranging from 14.07 to 51.04 µm (X¯ = 28.91 μm, n = 100). Conidia were formed singly (not catenescent), ellipsoid to cylindrical, sometimes presenting some curvatures in the middle portion, resembling an hourglass shape, without fibrosin bodies, and measured 25.7 to 44.3 μm (X¯ = 35.8 μm, n = 100) in length and 11.4 to 18.7 μm (X¯ = 14.8 μm, n = 100) in width. Germination is of the Pseudoidium type, showing a lobed or multilobed terminal conidial appressorium, which resembled the characteristics of the anamorph-typified genus Pseudoidium (Braun and Cook 2012), which is now, according to the current International Code of Nomenclature, a heterotypic synonym of Erysiphe. The presence of the sexual morph (chasmothecium) was not observed. To confirm pathogenicity, five healthy rubber trees were inoculated with a solution of 2 × 10⁴ conidia/ml (sprinkled until draining) and maintained in a growth chamber, at 20 ± 2°C, 80 to 100% relative humidity, and 12-h artificial photoperiod. For control, another five plants were not inoculated and were maintained under the same conditions. The first symptoms appeared after 10 days, and after 15 days all inoculated plants exhibited symptoms resembling those observed in the field. To confirm the pathogen identity, DNA extraction was performed from leaves containing lesions with sporulation (from plants both in the field and undergoing the pathogenicity test), using the CTAB protocol (Doyle and Doyle 1987). PCR analysis was conducted with a Fermentas PCR Master Mix 2× kit (Thermo Scientific) and the primer pairs ITS-1/ITS-4 (White et al. 1990) and PM3/TW14 (Mori et al. 2000; Takamatsu and Kano 2001), which amplify the ITS1/5.8S/ITS2 and 28S regions, respectively. All PCR products were bidirectionally sequenced and compared with sequences in the GenBank, using the BLASTn tool (https://blast.ncbi.nlm.nih.gov/Blast.cgi). From each primer pair, two sequences were obtained: one from plants infected in the field and one from plants infected in pathogenicity tests. rDNA/ITS sequences (GenBank nos. MT182958 and MT180480) shared 99.35% (609/613 identical bases) and 99.51% (608/611 identical bases) identity, respectively, to the isolate PTM1 of Erysiphe necator from Vitis vinifera (GenBank MK357386), whereas 28S sequences (GenBank nos. MT182732 and MT182949) shared 97.93% (569/581 identical nucleotides, GenBank LC028996) and 99.28% (823/829 identical nucleotides, GenBank MK357423) identity, respectively, to two isolates of E. necator, also infecting V. vinifera. To date, only E. quercicola was related to powdery mildew in rubber trees (Liyanage et al. 2016); therefore, this is the first report of E. necator infecting rubber trees in the world. In Brazil, E. necator was found infecting Anacardium occidentale (Fonseca et al. 2019) and Caryocar brasiliensis (Braun et al. 2017) and is one of the major pathogens of the Vitaceae family (Gadoury et al. 2012). The present report also shows an adaptation of this fungus to new hosts, interestingly so far only in Brazil, which adds further challenges to epidemiological studies.",2020-11-01 +32286817,TeroKit: A Database-Driven Web Server for Terpenome Research.,"Natural products are the major resource of drug discovery, and terpenoids represent the largest family of natural products. Terpenome is defined as all terpenoid-like and terpenoid-derived natural compounds, including the terpenoids, steroids, and their derivatives. Herein, aiming to navigate the chemical and biological space of terpenome, the first comprehensive database dedicated to terpenome research has been developed by collecting over 110 000 terpenome molecules from various resources, distributed in 14 351 species, belonging to 1109 families, and showing activity against 1366 biological targets. Much of the publically available information or computationally predicted properties for each terpenome molecule is annotated and integrated into TeroKit (http://terokit.qmclab.com/), serving as free Web server for academic use. Moreover, several practical toolkits, such as target profiling and conformer generation modules, are also implemented to facilitate the drug discovery of terpenome.",2020-04-20 +33118010,The Association Between Coffee Consumption and Metabolic Syndrome in Adults: A Systematic Review and Meta-Analysis.,"Previous meta-analyses that found an inverse association between coffee consumption and metabolic syndrome pooled data from cross-sectional and longitudinal studies, which could lead to potentially misleading conclusions. Hence, this work aimed to reassess this association by analyzing data from the 2 types of studies separately and including recent studies. Online databases including PubMed, Scopus, Embase, The Cumulative Index to Nursing and Allied Health Literature (CINAHL) Plus, and Science Direct were searched for relevant studies published up to July 2020. Both cross-sectional and longitudinal studies were included if published after 1999, reported both effect estimates and CIs, and presented results adjusted for confounding variables. Data of the highest coffee consumption level in each study, as well as those of medium consumption levels in studies with ≥3 consumption categories, were pooled using random-effect models, with sex-stratified and sex-adjusted results being analyzed separately. Results were obtained based on data from 13 cross-sectional studies involving 280,803 participants and 2 longitudinal studies involving 17,014 participants. The overall sex-adjusted association of the highest consumption level was not significant (n = 9 studies; OR: 0.88; 95% CI: 0.70, 1.10; I2: 91.5%) and the 2 longitudinal studies both yielded no association. Subgroup analysis revealed inverse associations in both males and females, as well as in Caucasians with medium coffee consumption (n = 4 studies, OR: 0.88; 95% CI: 0.84, 0.93; I2: 0%). Although residual confounding could affect the results of this meta-analysis, our findings suggested with a low certainty that coffee consumption may not be associated with metabolic syndrome, a finding that is different from those of previous meta-analyses and could be due to variation in characteristics of study participants. More longitudinal studies are also needed to further assess the temporal association between coffee consumption and metabolic syndrome. This meta-analysis was registered at https://www.crd.york.ac.uk/prospero as CRD42018110650.",2021-06-01 +,"A Spatial Web Application to Explore the Interactions between Human Mobility, Government Policies, and COVID-19 Cases","Reports of coronavirus disease 2019 (COVID-19) cases began in December 2019. Soon after, the virus had spread around the world and became a pandemic. Social restrictions, quarantines, and other governmental policies in response to the pandemic altered normal operations across the world. One area significantly affected is human mobility. Typical movement patterns have been hindered by the pandemic. But inversely, mobility patterns can influence patterns of the virus. With this in mind, we created an interactive web application to visualize in near-real time the relationship between the COVID-19 pandemic and human mobility, as well as the impact of governmental policies at different spatial scales. The web application allows users to select a country at the global scale or a state or county for the USA and then displays a corresponding plot that compares human mobility to COVID-19 cases across time for the location, as well as to policy data. The application is useful for quickly revealing insightful patterns. First, the initial impact of the COVID-19 pandemic was a rather sudden decrease in mobility. Second, a relationship exists between mobility and COVID-19 offset by a lag, but that lag is not consistent over space or time. Third, spatial autocorrelation of relationship is apparent, meaning locations near each other share similar patterns. Overall, the application is a useful data visualization tool that helps uncover patterns that might otherwise go unnoticed. The application is available at this link: https://chrischapin7.shinyapps.io/covid19_vs_humanmobility/",2021-01-01 +34012763,Benchmarking mass spectrometry based proteomics algorithms using a simulated database. ,"Protein sequencing algorithms process data from a variety of instruments that has been generated under diverse experimental conditions. Currently there is no way to predict the accuracy of an algorithm for a given data set. Most of the published algorithms and associated software has been evaluated on limited number of experimental data sets. However, these performance evaluations do not cover the complete search space the algorithmand the software might encounter in real-world. To this end, we present a database of simulated spectra that can be used to benchmark any spectra to peptide search engine. We demonstrate the usability of this database by bench marking two popular peptide sequencing engines. We show wide variation in the accuracy of peptide deductions and a complete quality profile of a given algorithm can be useful for practitioners and algorithm developers. All benchmarking data is available at https://users.cs.fiu.edu/~fsaeed/Benchmark.html.",2021-03-26 +34318869,Functionathon: a manual data mining workflow to generate functional hypotheses for uncharacterized human proteins and its application by undergraduate students. ,"About 10% of human proteins have no annotated function in protein knowledge bases. A workflow to generate hypotheses for the function of these uncharacterized proteins has been developed, based on predicted and experimental information on protein properties, interactions, tissular expression, subcellular localization, conservation in other organisms, as well as phenotypic data in mutant model organisms. This workflow has been applied to seven uncharacterized human proteins (C6orf118, C7orf25, CXorf58, RSRP1, SMLR1, TMEM53 and TMEM232) in the frame of a course-based undergraduate research experience named Functionathon organized at the University of Geneva to teach undergraduate students how to use biological databases and bioinformatics tools and interpret the results. C6orf118, CXorf58 and TMEM232 were proposed to be involved in cilia-related functions; TMEM53 and SMLR1 were proposed to be involved in lipid metabolism and C7orf25 and RSRP1 were proposed to be involved in RNA metabolism and gene expression. Experimental strategies to test these hypotheses were also discussed. The results of this manual data mining study may contribute to the project recently launched by the Human Proteome Organization (HUPO) Human Proteome Project aiming to fill gaps in the functional annotation of human proteins. Database URL: http://www.nextprot.org.",2021-07-01 +34122663,Context aware benchmarking and tuning of a TByte-scale air quality database and web service.,"We present context-aware benchmarking and performance engineering of a mature TByte-scale air quality database system which was created by the Tropospheric Ozone Assessment Report (TOAR) and contains one of the world's largest collections of near-surface air quality measurements. A special feature of our data service https://join.fz-juelich.de is on-demand processing of several air quality metrics directly from the TOAR database. As a service that is used by more than 350 users of the international air quality research community, our web service must be easily accessible and functionally flexible, while delivering good performance. The current on-demand calculations of air quality metrics outside the database together with the necessary transfer of large volume raw data are identified as the major performance bottleneck. In this study, we therefore explore and benchmark in-database approaches for the statistical processing, which results in performance enhancements of up to 32%.",2021-06-07 +32632289,Dynamics in protein translation sustaining T cell preparedness.,"In response to pathogenic threats, naive T cells rapidly transition from a quiescent to an activated state, yet the underlying mechanisms are incompletely understood. Using a pulsed SILAC approach, we investigated the dynamics of mRNA translation kinetics and protein turnover in human naive and activated T cells. Our datasets uncovered that transcription factors maintaining T cell quiescence had constitutively high turnover, which facilitated their depletion following activation. Furthermore, naive T cells maintained a surprisingly large number of idling ribosomes as well as 242 repressed mRNA species and a reservoir of glycolytic enzymes. These components were rapidly engaged following stimulation, promoting an immediate translational and glycolytic switch to ramp up the T cell activation program. Our data elucidate new insights into how T cells maintain a prepared state to mount a rapid immune response, and provide a resource of protein turnover, absolute translation kinetics and protein synthesis rates in T cells ( https://www.immunomics.ch ).",2020-07-06 +34046592,A map of the SARS-CoV-2 RNA structurome.,"SARS-CoV-2 has exploded throughout the human population. To facilitate efforts to gain insights into SARS-CoV-2 biology and to target the virus therapeutically, it is essential to have a roadmap of likely functional regions embedded in its RNA genome. In this report, we used a bioinformatics approach, ScanFold, to deduce the local RNA structural landscape of the SARS-CoV-2 genome with the highest likelihood of being functional. We recapitulate previously-known elements of RNA structure and provide a model for the folding of an essential frameshift signal. Our results find that SARS-CoV-2 is greatly enriched in unusually stable and likely evolutionarily ordered RNA structure, which provides a large reservoir of potential drug targets for RNA-binding small molecules. Results are enhanced via the re-analyses of publicly-available genome-wide biochemical structure probing datasets that are broadly in agreement with our models. Additionally, ScanFold was updated to incorporate experimental data as constraints in the analysis to facilitate comparisons between ScanFold and other RNA modelling approaches. Ultimately, ScanFold was able to identify eight highly structured/conserved motifs in SARS-CoV-2 that agree with experimental data, without explicitly using these data. All results are made available via a public database (the RNAStructuromeDB: https://structurome.bb.iastate.edu/sars-cov-2) and model comparisons are readily viewable at https://structurome.bb.iastate.edu/sars-cov-2-global-model-comparisons.",2021-05-22 +34285834,ERpred: a web server for the prediction of subtype-specific estrogen receptor antagonists.,"Estrogen receptors alpha and beta (ERα and ERβ) are responsible for breast cancer metastasis through their involvement of clinical outcomes. Estradiol and hormone replacement therapy targets both ERs, but this often leads to an increased risk of breast and endometrial cancers as well as thromboembolism. A major challenge is posed for the development of compounds possessing ER subtype specificity. Herein, we present a large-scale classification structure-activity relationship (CSAR) study of inhibitors from the ChEMBL database which consisted of an initial set of 11,618 compounds for ERα and 7,810 compounds for ERβ. The IC50 was selected as the bioactivity unit for further investigation and after the data curation process, this led to a final data set of 1,593 and 1,281 compounds for ERα and ERβ, respectively. We employed the random forest (RF) algorithm for model building and of the 12 fingerprint types, models built using the PubChem fingerprint was the most robust (Ac of 94.65% and 92.25% and Matthews correlation coefficient (MCC) of 89% and 76% for ERα and ERβ, respectively) and therefore selected for feature interpretation. Results indicated the importance of features pertaining to aromatic rings, nitrogen-containing functional groups and aliphatic hydrocarbons. Finally, the model was deployed as the publicly available web server called ERpred at http://codes.bio/erpred where users can submit SMILES notation as the input query for prediction of the bioactivity against ERα and ERβ.",2021-07-09 +34022814,The Rhododendron Plant Genome Database (RPGD): a comprehensive online omics database for Rhododendron.,"

Background

The genus Rhododendron L. has been widely cultivated for hundreds of years around the world. Members of this genus are known for great ornamental and medicinal value. Owing to advances in sequencing technology, genomes and transcriptomes of members of the Rhododendron genus have been sequenced and published by various laboratories. With increasing amounts of omics data available, a centralized platform is necessary for effective storage, analysis, and integration of these large-scale datasets to ensure consistency, independence, and maintainability.

Results

Here, we report our development of the Rhododendron Plant Genome Database (RPGD; http://bioinfor.kib.ac.cn/RPGD/ ), which represents the first comprehensive database of Rhododendron genomics information. It includes large amounts of omics data, including genome sequence assemblies for R. delavayi, R. williamsianum, and R. simsii, gene expression profiles derived from public RNA-Seq data, functional annotations, gene families, transcription factor identification, gene homology, simple sequence repeats, and chloroplast genome. Additionally, many useful tools, including BLAST, JBrowse, Orthologous Groups, Genome Synteny Browser, Flanking Sequence Finder, Expression Heatmap, and Batch Download were integrated into the platform.

Conclusions

RPGD is designed to be a comprehensive and helpful platform for all Rhododendron researchers. Believe that RPGD will be an indispensable hub for Rhododendron studies.",2021-05-22 +,FastD: Fast detection of insecticide target‐site mutations and overexpressed detoxification genes in insect populations from RNA‐Seq data,"Abstract Target‐site mutations and detoxification gene overexpression are two major mechanisms conferring insecticide resistance. Molecular assays applied to detect these resistance genetic markers are time‐consuming and with high false‐positive rates. RNA‐Seq data contains information on the variations within expressed genomic regions and expression of detoxification genes. However, there is no corresponding method to detect resistance markers at present. Here, we collected 66 reported resistance mutations of four insecticide targets (AChE, VGSC, RyR, and nAChR) from 82 insect species. Next, we obtained 403 sequences of the four target genes and 12,665 sequences of three kinds of detoxification genes including P450s, GSTs, and CCEs. Then, we developed a Perl program, FastD, to detect target‐site mutations and overexpressed detoxification genes from RNA‐Seq data and constructed a web server for FastD (http://www.insect-genome.com/fastd). The estimation of FastD on simulated RNA‐Seq data showed high sensitivity and specificity. We applied FastD to detect resistant markers in 15 populations of six insects, Plutella xylostella, Aphis gossypii, Anopheles arabiensis, Musca domestica, Leptinotarsa decemlineata and Apis mellifera. Results showed that 11 RyR mutations in P. xylostella, one nAChR mutation in A. gossypii, one VGSC mutation in A. arabiensis and five VGSC mutations in M. domestica were found to be with frequency difference >40% between resistant and susceptible populations including previously confirmed mutations G4946E in RyR, R81T in nAChR and L1014F in VGSC. And 49 detoxification genes were found to be overexpressed in resistant populations compared with susceptible populations including previously confirmed detoxification genes CYP6BG1, CYP6CY22, CYP6CY13, CYP6P3, CYP6M2, CYP6P4 and CYP4G16. The candidate target‐site mutations and detoxification genes were worth further validation. Resistance estimates according to confirmed markers were consistent with population phenotypes, confirming the reliability of this program in predicting population resistance at omics‐level. We developed a program called FastD to detect the insecticide target‐site mutations and overexpressed detoxification genes from RNA‐Seq data and constructed a corresponding web server for FastD (http://www.insect-genome.com/fastd) in this article.",2020-11-21 +34510194,MODB: a comprehensive mitochondrial genome database for Mollusca. ,"Mollusca is the largest marine phylum, comprising about 23% of all named marine organisms, Mollusca systematics are still in flux, and an increase in human activities has affected Molluscan reproduction and development, strongly impacting diversity and classification. Therefore, it is necessary to explore the mitochondrial genome of Mollusca. The Mollusca mitochondrial database (MODB) was established for the Life and Health Big Data Center of Yantai University. This database is dedicated to collecting, sorting and sharing basic information regarding mollusks, especially their mitochondrial genome information. We also integrated a series of analysis and visualization tools, such as BLAST, MUSCLE, GENEWISE and LASTZ. In particular, a phylogenetic tree was implemented in this database to visualize the evolutionary relationships between species. The original version contains 616 species whose mitochondrial genomes have been sequenced. The database provides comprehensive information and analysis platform for researchers interested in understanding the biological characteristics of mollusks. Database URL: http://modb.ytu.edu.cn/.",2021-09-01 +31770586,Plant virus interaction mechanism and associated pathways in mosaic disease of small cardamom (Elettaria cardamomum Maton) by RNA-Seq approach.,"Small cardamom (Elettaria cardamomum), grown in limited coastal tropical countries is one of the costliest and widely exported agri-produce having global turnover of >10 billion USD. Mosaic/marble disease is one of the major impediments that requires understanding of disease at molecular level. Neither whole genome sequence nor any genomic resources are available, thus RNA seq approach can be a rapid and economical alternative. De novo transcriptome assembly was done with Illumina Hiseq data. A total of 5317 DEGs, 2267 TFs, 114 pathways and 175,952 genic region putative markers were obtained. Gene regulatory network analysis deciphered molecular events involved in marble disease. This is the first transcriptomic report revealing disease mechanism mediated by perturbation in auxin homeostasis and ethylene signalling leading to senescence. The web-genomic resource (SCMVTDb) catalogues putative molecular markers, candidate genes and transcript information. SCMVTDb can be used in germplasm improvement against mosaic disease in endeavour of small cardamom productivity. Availability of genomic resource, SCMVTDb: http://webtom.cabgrid.res.in/scmvtdb/.",2019-11-23 +31361490,FAME 3: Predicting the Sites of Metabolism in Synthetic Compounds and Natural Products for Phase 1 and Phase 2 Metabolic Enzymes.,"In this work we present the third generation of FAst MEtabolizer (FAME 3), a collection of extra trees classifiers for the prediction of sites of metabolism (SoMs) in small molecules such as drugs, druglike compounds, natural products, agrochemicals, and cosmetics. FAME 3 was derived from the MetaQSAR database ( Pedretti et al. J. Med. Chem. 2018 , 61 , 1019 ), a recently published data resource on xenobiotic metabolism that contains more than 2100 substrates annotated with more than 6300 experimentally confirmed SoMs related to redox reactions, hydrolysis and other nonredox reactions, and conjugation reactions. In tests with holdout data, FAME 3 models reached competitive performance, with Matthews correlation coefficients (MCCs) ranging from 0.50 for a global model covering phase 1 and phase 2 metabolism, to 0.75 for a focused model for phase 2 metabolism. A model focused on cytochrome P450 metabolism yielded an MCC of 0.57. Results from case studies with several synthetic compounds, natural products, and natural product derivatives demonstrate the agreement between model predictions and literature data even for molecules with structural patterns clearly distinct from those present in the training data. The applicability domains of the individual models were estimated by a new, atom-based distance measure (FAMEscore) that is based on a nearest-neighbor search in the space of atom environments. FAME 3 is available via a public web service at https://nerdd.zbh.uni-hamburg.de/ and as a self-contained Java software package, free for academic and noncommercial research.",2019-08-13 +,Diversity and antimicrobial activity of culturable fungi associated with sea anemone Anthopleura xanthogrammica,"The main objective of this study was to isolate fungi associated with Anthopleura xanthogrammica and measure their antimicrobial and enzymatic activities. A total of 93 fungal strains associated with A. xanthogrammica were isolated in this study, of which 32 isolates were identified using both morphological characteristics and internal transcribed spacer (ITS) sequence analysis. The antibacterial activities of 32 fungal isolates were tested against Bacillus subtilis, Staphylococcus aureus, Escherichia coli, Edwardsiella tarda, Vibrio harveyi, Fusarium oxysporum, and Pyricularia oryzae by agar diffusion assay. Extracellular hydrolytic enzyme activities of the fungal isolates were determined by agar diffusion assays. Enzyme activities were detected from clear halo size.The isolated fungi belonged to 18 genera within 7 taxonomic orders of 1 phylum. The genera Aspergillaceae were the most diverse and common. The antimicrobial activities of 32 isolates were evaluated, and 19 (59.4%) of fungi isolate displayed unique antimicrobial activities. All fungal strains displayed at least one enzyme activity. The most common enzyme activities in the fungi isolates were amylase and protease, while the least common were pectinase and xylanase.This is first report on the sea anemone-derived fungi with antimicrobial and enzyme activities. Results indicated that sea anemone is a hot spot of fungal diversity and a rich resource of bioactive natural products.How to cite: Liu S, Ahmed S, Zhang C, et al. Diversity and antimicrobial activity of culturable fungi associated with sea anemone Anthopleura xanthogrammica. Electron J Biotechnol 2020;44. https://doi.org/10.1016/j.ejbt.2020.01.003",2020-03-01 +32117995,Nc2Eye: A Curated ncRNAomics Knowledgebase for Bridging Basic and Clinical Research in Eye Diseases.,"Eye diseases (EDs) represent a group of disorders affecting the visual system, most of which can lead to visual impairment and blindness. Accumulating evidence reveals that non-coding RNAs (ncRNAs) are closely associated with a wide variety of EDs. However, abundant associations between ncRNAs and EDs are scattered across the published literature, obstructing a global view of ncRNA-ED associations. A public resource of high-quality manually curated ncRNAomics knowledge associated with EDs remains unavailable. To address this gap, we thus developed Nc2Eye (http://nc2eye.bio-data.cn/), which is the first knowledgebase dedicated to providing a comprehensive ncRNAomics resource for bridging basic and clinical research in EDs. Through a comprehensive review of more than 2400 published papers, Nc2Eye catalogs 7088 manually curated ncRNA-ED associations involving 4363 ncRNAs across eight species. We also provide detailed descriptions and annotation information for each ncRNA-disease association such as ncRNA categories, experimental methods, expression pattern and related clinical drugs. To further expand the pathogenic ncRNAs, we also collected more than 90 high-throughput EDs-related transcriptome datasets. Furthermore, a user-friendly interface was constructed for convenient and flexible data browsing, querying, and retrieving. We believe that Nc2Eye is a timely and valuable knowledgebase for significantly improving and useful for discovery of new diagnostic and therapeutic biomarkers.",2020-02-14 +33981200,Gene4PD: A Comprehensive Genetic Database of Parkinson's Disease.,"Parkinson's disease (PD) is a complex neurodegenerative disorder with a strong genetic component. A growing number of variants and genes have been reported to be associated with PD; however, there is no database that integrate different type of genetic data, and support analyzing of PD-associated genes (PAGs). By systematic review and curation of multiple lines of public studies, we integrate multiple layers of genetic data (rare variants and copy-number variants identified from patients with PD, associated variants identified from genome-wide association studies, differentially expressed genes, and differential DNA methylation genes) and age at onset in PD. We integrated five layers of genetic data (8302 terms) with different levels of evidences from more than 3,000 studies and prioritized 124 PAGs with strong or suggestive evidences. These PAGs were identified to be significantly interacted with each other and formed an interconnected functional network enriched in several functional pathways involved in PD, suggesting these genes may contribute to the pathogenesis of PD. Furthermore, we identified 10 genes were associated with a juvenile-onset (age ≤ 30 years), 11 genes were associated with an early-onset (age of 30-50 years), whereas another 10 genes were associated with a late-onset (age > 50 years). Notably, the AAOs of patients with loss of function variants in five genes were significantly lower than that of patients with deleterious missense variants, while patients with VPS13C (P = 0.01) was opposite. Finally, we developed an online database named Gene4PD (http://genemed.tech/gene4pd) which integrated published genetic data in PD, the PAGs, and 63 popular genomic data sources, as well as an online pipeline for prioritize risk variants in PD. In conclusion, Gene4PD provides researchers and clinicians comprehensive genetic knowledge and analytic platform for PD, and would also improve the understanding of pathogenesis in PD.",2021-04-26 +31637139,ncRNA2MetS: a manually curated database for non-coding RNAs associated with metabolic syndrome.,"Metabolic syndrome is a cluster of the most dangerous heart attack risk factors (diabetes and raised fasting plasma glucose, abdominal obesity, high cholesterol and high blood pressure), and has become a major global threat to human health. A number of studies have demonstrated that hundreds of non-coding RNAs, including miRNAs and lncRNAs, are involved in metabolic syndrome-related diseases such as obesity, type 2 diabetes mellitus, hypertension, etc. However, these research results are distributed in a large number of literature, which is not conducive to analysis and use. There is an urgent need to integrate these relationship data between metabolic syndrome and non-coding RNA into a specialized database. To address this need, we developed a metabolic syndrome-associated non-coding RNA database (ncRNA2MetS) to curate the associations between metabolic syndrome and non-coding RNA. Currently, ncRNA2MetS contains 1,068 associations between five metabolic syndrome traits and 627 non-coding RNAs (543 miRNAs and 84 lncRNAs) in four species. Each record in ncRNA2MetS database represents a pair of disease-miRNA (lncRNA) association consisting of non-coding RNA category, miRNA (lncRNA) name, name of metabolic syndrome trait, expressive patterns of non-coding RNA, method for validation, specie involved, a brief introduction to the association, the article referenced, etc. We also developed a user-friendly website so that users can easily access and download all data. In short, ncRNA2MetS is a complete and high-quality data resource for exploring the role of non-coding RNA in the pathogenesis of metabolic syndrome and seeking new treatment options. The website is freely available at http://www.biomed-bigdata.com:50020/index.html.",2019-10-15 +32431267,"The WorldWide Antimalarial Resistance Network Clinical Trials Publication Library: A Live, Open-Access Database of Plasmodium Treatment Efficacy Trials.","Parasite resistance to antimalarial drugs poses a serious threat to malaria control. The WorldWide Antimalarial Resistance Network (WWARN) aims to provide a collaborative platform to support the global malaria research effort. Here, we describe the ""WWARN clinical trials publication library,"" an open-access, up-to-date resource to streamline the synthesis of antimalarial safety and efficacy data. A series of iteratively refined database searches were conducted to identify prospective clinical trials assessing antimalarial drug efficacy with at least 28 days of follow-up. Of approximately 45,000 articles screened, 1,221 trials published between 1946 and 2018 were identified, representing 2,339 treatment arms and 323,819 patients. In trials from endemic locations, 75.7% (787/1,040) recruited patients with Plasmodium falciparum, 17.0% (177/1,040) Plasmodium vivax, 6.9% (72/1,040) both, and 0.4% (4/1,040) other Plasmodium species; 57.2% (585/1,022) of trials included under-fives and 5.3% (55/1,036) included pregnant women. In Africa, there has been a marked increase in both P. falciparum and P. vivax studies over the last two decades. The WHO-recommended artemisinin-based combination therapies alone or with a gametocidal drug were assessed in 39.5% (705/1,783) of P. falciparum treatment arms and 10.5% (45/429) of P. vivax arms, increasing to 78.0% (266/341) and 22.9% (27/118), respectively, in the last five years. The library is a comprehensive, open-access tool that can be used by the malaria community to explore the collective knowledge on antimalarial efficacy (available at https://www.wwarn.org/tools-resources/literature-reviews/wwarn-clinical-trials-publication-library). It is the first of its kind in the field of global infectious diseases, and lessons learnt in its creation can be adapted to other infectious diseases.",2020-05-07 +34596084,Characterization of Androgen Receptor Complex Associated Protein (ARCAP) in hepatocellular carcinoma and liver.,"

Background

Hepatocellular carcinoma (HCC) ranks many tasks in clinical oncology due to possibly developing a general tumor in men and, usually lead to malignant to death within years. Researches had reported about major factors for being HCC was male sex and HCC associated with cirrhosis in childhood was found more common in males than females. In certain mouse strains as studied, breeding with testosterone significantly increases the development of HCC. Furthermore, castration of male mice diminished the frequency of the development of liver tumors. Meanwhile male hepatitis B virus transgenic mice have a greater occurrence of HCC than females.

Methods

We apply degenerate priming PCR to observe the expression of various steroid receptors in livers. Yeast-two hybrid screening to search a novel RNA fragment helps to find a new full-length gene by RACE experiment. RT-PCR is applied to detect various expressions in tissues and cell lines. In situ hybridization detects DNA in Chromosome mapping. GFP-constructs transfection proves the gene localization in cells. Immunoprecipitation pulldown assay verifies protein interaction. Gene transfection followed with luciferase assay demonstrates the interaction of genes within cellular signaling. Genomic alignment analysis for observing sequences data perform from NCBI database website (http://www.ncbi.nim.nih.gov/genebank/).

Results

The androgen receptor (AR) expression level is found at the highest level among the steroid receptors families detected in liver tumors. By yeast-two hybrid screening, we cloned an Androgen Receptor Complex Associated Protein (ARCAP), of 95 Kd in molecular weight and its cDNA. ARCAP locates at Chromosome 1. Our findings indicate ARCAP is highly expressed in hepatoma cell lines and liver tumors and their adjacent tumors as observed. Yeast two-hybrid assay and in vitro immunoprecipitation assays demonstrated an interaction between AR and ARCAP.

Conclusion

We aim to search for different types and levels of steroid receptors expressed within human HCCs and in the adjacent liver tissues. To verify possible molecular mechanisms by which AR might affect hepatoma cells, we had characterized a novel protein ARCAP which functions as a coregulator to interact with AR within liver. The ligand-dependent AR with its cofactor, ARCAP, can induce a signal cascade by transactivation.",2021-12-01 +31602652,Gene expression analysis of Cyanophora paradoxa reveals conserved abiotic stress responses between basal algae and flowering plants.,"The glaucophyte Cyanophora paradoxa represents the most basal member of the kingdom Archaeplastida, but the function and expression of most of its genes are unknown. This information is needed to uncover how functional gene modules, that is groups of genes performing a given function, evolved in the plant kingdom. We have generated a gene expression atlas capturing responses of Cyanophora to various abiotic stresses. The data were included in the CoNekT-Plants database, enabling comparative transcriptomic analyses across two algae and six land plants. We demonstrate how the database can be used to study gene expression, co-expression networks and gene function in Cyanophora, and how conserved transcriptional programs can be identified. We identified gene modules involved in phycobilisome biosynthesis, response to high light and cell division. While we observed no correlation between the number of differentially expressed genes and the impact on growth of Cyanophora, we found that the response to stress involves a conserved, kingdom-wide transcriptional reprogramming, which is activated upon most stresses in algae and land plants. The Cyanophora stress gene expression atlas and the tools found in the https://conekt.plant.tools/ database thus provide a useful resource to reveal functionally related genes and stress responses in the plant kingdom.",2019-11-11 +28567010,"The What, the When, and the Whether of Intentional Action in the Brain: A Meta-Analytical Review.","In their attempt to define discrete subcomponents of intentionality, Brass and Haggard (2008) proposed their What, When, and Whether model (www-model) which postulates that the content, the timing and the possibility of generating an action can be partially independent both at the cognitive level and at the level of their neural implementation. The original proposal was based on a limited number of studies, which were reviewed with a discursive approach. To assess whether the model stands in front of the more recently published data, we performed a systematic review of the literature with a meta-analytic method based on a hierarchical clustering (HC) algorithm. We identified 15 PET/fMRI studies well-suited for this quest. HC revealed the existence of a rostro-caudal gradient within the medial prefrontal cortex, with the more anterior regions (the anterior cingulum) involved in more abstract decisions of whether to execute an action and the more posterior ones (the middle cingulum or the SMA) recruited in specifying the content and the timing components of actions. However, in contrast with the original www-model, this dissociation involves also brain regions well outside the median wall of the frontal lobe, in a component specific manner: the supramarginal gyrus for the what component, the pallidum and the thalamus for the when component, the putamen and the insula for the whether component. We then calculated co-activation maps on the three component-specific www clusters of the medial wall of the frontal/limbic lobe: to this end, we used the activation likelihood approach that we applied on the imaging studies on action contained in the BrainMap.org database. This analysis confirmed the main findings of the HC analyses. However, the BrainMap.org data analyses also showed that the aforementioned segregations are generated by paradigms in which subjects act in response to conditional stimuli rather than while driven by their own intentions. We conclude that the available data confirm that the neural underpinnings of intentionality can be fractionated in discrete components that are partially independent. We also suggest that intentionality manifests itself in discrete components through the boosting of general purpose action-related regions specialized for different aspects of action selection and inhibition.",2017-05-17 +31642470,WormBase: a modern Model Organism Information Resource.,"WormBase (https://wormbase.org/) is a mature Model Organism Information Resource supporting researchers using the nematode Caenorhabditis elegans as a model system for studies across a broad range of basic biological processes. Toward this mission, WormBase efforts are arranged in three primary facets: curation, user interface and architecture. In this update, we describe progress in each of these three areas. In particular, we discuss the status of literature curation and recently added data, detail new features of the web interface and options for users wishing to conduct data mining workflows, and discuss our efforts to build a robust and scalable architecture by leveraging commercial cloud offerings. We conclude with a description of WormBase's role as a founding member of the nascent Alliance of Genome Resources.",2020-01-01 +,OR31-06 Candidate Gene Variants in a Large Cohort of Women with Primary Ovarian Insufficiency,"Abstract Primary ovarian insufficiency (POI) is highly heritable. The majority of cases have no known cause. We hypothesized that mutations in previously identified genes or genes from the same pathways are the cause of POI in a recessive or dominant manner. Subjects included 294 women diagnosed with POI (amenorrhea with an elevated FSH level). All had a 46XX karyotype, and normal FMR1 repeat number. Subjects were recruited in Boston (n=95), at the NIH and Washington University (n=98), and in Pittsburgh (n=98). Controls included subjects recruited for health in old age and disorders unrelated to reproduction or cancer, and subjects from the 1000 Genomes Project (total n=587). Variants were called using the Sentieon software package (https://www.sentieon.com). Case and control samples were stratified on ethnicity, relatedness and heterozygosity. Peddy and XPAT were used to calculate quality control metrics to detect outlier samples for removal from analysis to create a homogenous dataset. The number of cases (227) and controls (458) was adjusted for downstream analysis. XPAT imposed additional quality filters and removed variants. A second filter removed variants that did not pass a Gnomad filter of <0.001 allele frequency. VAAST was used to determine a composite likelihood ratio (CLR) as the test statistic to represent the aggregate burden of variants of affected individuals in each transcript relative to a set of 458 control genomes. The significance of each transcript’s VAAST CLR score was evaluated by 1 million permutations. We screened exomes for variants in previously identified genes causing POI in humans and those demonstrating infertility in a male or female mouse model. We also used the American College of Medical Genetics and Genomics standards for interpretation of pathogenicity of a variant, with priority on null variants in genes with probability of loss of function intolerance based on the observed vs. expected rate in gnomAD, in vivo or in vitro functional evidence of a damaging effect, significantly increased prevalence compared to controls, i.e. not found in any controls or in fewer than 10 in the gnomAD database if the subject had a matching race/ethnicity. Thirty-four subjects were removed for poor quality exomes and relatedness. Fifty-three subjects had at least one variant in a previously identified POI gene or one in which there was a previously identified functional model. Two subjects carried recessive variants and 30 carried at least one novel heterozygous candidate variant for follow up. Analysis of genetic causes of POI in this large cohort identified candidate causal gene variants in over half of the subjects. The data demonstrate that the genetic architecture is heterogeneous. Although recessive mutations have been identified in consanguineous families, the data suggest that a dominant or oligogenic pattern of inheritance may be important.",2020-05-08 +34023905,"AnnotSV and knotAnnotSV: a web server for human structural variations annotations, ranking and analysis.","With the dramatic increase of pangenomic analysis, Human geneticists have generated large amount of genomic data including millions of small variants (SNV/indel) but also thousands of structural variations (SV) mainly from next-generation sequencing and array-based techniques. While the identification of the complete SV repertoire of a patient is getting possible, the interpretation of each SV remains challenging. To help identifying human pathogenic SV, we have developed a web server dedicated to their annotation and ranking (AnnotSV) as well as their visualization and interpretation (knotAnnotSV) freely available at the following address: https://www.lbgi.fr/AnnotSV/. A large amount of annotations from >20 sources is integrated in our web server including among others genes, haploinsufficiency, triplosensitivity, regulatory elements, known pathogenic or benign genomic regions, phenotypic data. An ACMG/ClinGen compliant prioritization module allows the scoring and the ranking of SV into 5 SV classes from pathogenic to benign. Finally, the visualization interface displays the annotated SV in an interactive way including popups, search fields, filtering options, advanced colouring to highlight pathogenic SV and hyperlinks to the UCSC genome browser or other public databases. This web server is designed for diagnostic and research analysis by providing important resources to the user.",2021-07-01 +34463905,"Prevalence and Impact of Treatment-Resistant Depression in Latin America: a Prospective, Observational Study.","Approximately one-third of patients with major depressive disorder (MDD) have treatment-resistant depression (TRD). The TRAL study will evaluate the prevalence and impact of TRD among patients with MDD in four Latin American countries. In this multicenter, prospective, observational study, patients with MDD were recruited from 33 reference sites in Mexico, Colombia, Brazil, and Argentina. Patients were assessed for TRD, defined as failure to respond to ≥ 2 antidepressant medications of adequate dose and duration. Demographics, previous/current treatments, depressive symptoms, functioning, healthcare resource utilization, and work impairment were also collected and evaluated using descriptive statistics, chi-square test, Fisher exact test, t-test for independent samples, or the Mann-Whitney nonparametric test, as appropriate. 1475 patients with MDD were included in the analysis (mean age, 45.6 years; 78% women); 89% were receiving relevant psychiatric treatment. 429 patients met criteria for TRD, and a numerically higher proportion of patients with TRD was present in public versus private sites of care (31% vs 27%). The mean Montgomery-Asberg Depression Rating Scale score was 25.0 among all MDD patients and was significantly higher for patients with TRD versus non-TRD (29.4 vs 23.3; P < 0.0001). Patients with TRD, versus those with non-TRD, were significantly more likely to be older, have a longer disease duration, have more comorbidities, be symptomatic, have a higher median number of psychiatric consultations, and report greater work impairment. Patients with TRD have a disproportionate burden of disease compared to those with non-TRD. Appropriate treatment for TRD is a substantial unmet need in Latin America. https://www.ClinicalTrials.gov identifier NCT03207282, 07/02/2017.",2021-08-31 +32487193,SCDb: an integrated database of stomach cancer.,"

Background

Stomach cancer (SC) is a type of cancer, which is derived from the stomach mucous membrane. As there are non-specific symptoms or no noticeable symptoms observed at the early stage, newly diagnosed SC cases usually reach an advanced stage and are thus difficult to cure. Therefore, in this study, we aimed to develop an integrated database of SC.

Methods

SC-related genes were identified through literature mining and by analyzing the publicly available microarray datasets. Using the RNA-seq, miRNA-seq and clinical data downloaded from The Cancer Genome Atlas (TCGA), the Kaplan-Meier (KM) survival curves for all the SC-related genes were generated and analyzed. The miRNAs (miRanda, miRTarget2, PicTar, PITA and TargetScan databases), SC-related miRNAs (HMDD and miR2Disease databases), single nucleotide polymorphisms (SNPs, dbSNP database), and SC-related SNPs (ClinVar database) were also retrieved from the indicated databases. Moreover, gene_disease (OMIM and GAD databases), copy number variation (CNV, DGV database), methylation (PubMeth database), drug (WebGestalt database), and transcription factor (TF, TRANSFAC database) analyses were performed for the differentially expressed genes (DEGs).

Results

In total, 9990 SC-related genes (including 8347 up-regulated genes and 1643 down-regulated genes) were identified, among which, 65 genes were further confirmed as SC-related genes by performing enrichment analysis. Besides this, 457 miRNAs, 20 SC-related miRNAs, 1570 SNPs, 108 SC-related SNPs, 419 TFs, 44,605 CNVs, 3404 drug-associated genes, 63 genes with methylation, and KM survival curves of 20,264 genes were obtained. By integrating these datasets, an integrated database of stomach cancer, designated as SCDb, (available at http://www.stomachcancerdb.org/) was established.

Conclusions

As a comprehensive resource for human SC, SCDb database will be very useful for performing SC-related research in future, and will thus promote the understanding of the pathogenesis of SC.",2020-06-02 +33735471,Update of the CLRP eye plaque brachytherapy database for photon-emitting sources.,"

Purpose

To update and extend the Carleton Laboratory for Radiotherapy Physics (CLRP) Eye Plaque (EP) dosimetry database for low-energy photon-emitting brachytherapy sources using egs_brachy, an open-source EGSnrc application. The previous database, CLRP_EPv1, contained datasets for the Collaborative Ocular Melanoma Study (COMS) plaques (10-22 mm diameter) with 103 Pd or 125 I seeds (BrachyDose-computed, 2008). The new database, CLRP_EPv2, consists of newly calculated three-dimensional (3D) dose distributions for 17 plaques [eight COMS, five Eckert & Ziegler BEBIG, and four others representative of models used worldwide] for 103 Pd, 125 I, and 131 Cs seeds.

Acquisition and validation methods

Plaque models are developed with egs_brachy, based on published/manufacturer dimensions and material data. The BEBIG plaques (modeled for the first time) are identical in dimensions to COMS plaques but differ in elemental composition and/or density. Previously benchmarked seed models are used. Eye plaques and seeds are simulated at the center of full-scatter water phantoms, scoring in (0.05 cm)3 voxels spanning the eye for scenarios: (a) ""HOMO"": simulated TG43 conditions; (b) ""HETERO"": eye plaques and seeds fully modeled; (c) ""HETsi"" (BEBIG only): one seed is active at a time with other seed geometries present but not emitting photons (inactive); summation over all i seeds in a plaque then yields ""HETsum"" (includes interseed effects). For validation, doses are compared to those from CLRP_EPv1 and published data.

Data format and access

Data are available at https://physics.carleton.ca/clrp/eye_plaque_v2, http://doi.org/10.22215/clrp/EPv2. The data consist of 3D dose distributions (text-based EGSnrc ""3ddose"" file format) and graphical presentations of the comparisons to previously published data.

Potential applications

The CLRP_EPv2 database provides accurate reference 3D dose distributions to advance ocular brachytherapy dose evaluations. The fully-benchmarked eye plaque models will be freely distributed with egs_brachy, supporting adoption of model-based dose evaluations as recommended by TG-129, TG-186, and TG-221.",2021-04-17 +,DATABASE FOR INDICES OF AGING IN NONHUMAN PRIMATES,"Abstract The Primate Aging Database (PAD) is a multi-centered, relational database of biological variables in aging, captive monkeys and apes containing approximately one million data points for body weight, blood chemistry and hematology, for male and female subjects over time (https://primatedatabase.org). More than forty species are currently represented, primarily chimpanzees, macaques and common marmosets. Metadata include housing environment, social context and diet. Life history information for each species is also provided. Data in PAD is gathered from various research facilities, sanctuaries and zoos. PAD has recently been extensively revamped to enhance ease of use. Tools for data visualization and analysis in multiple formats are included. PAD has been useful for exploring biomarkers of aging in primates and for examining physiological dysregulation in aging across primate species. It also provides age-specific normative values that are valuable in clinical veterinary medicine. New data are being added to PAD, including additional subjects and variables, and additional contributors are solicited. (Supported by contract HHSN2711201800025C from the National Institute on Aging to CleMetric Data Analytics and Management, LLC.)",2019-11-01 +34596551,Gosling: A Grammar-based Toolkit for Scalable and Interactive Genomics Data Visualization.,"The combination of diverse data types and analysis tasks in genomics has resulted in the development of a wide range of visualization techniques and tools. However, most existing tools are tailored to a specific problem or data type and offer limited customization, making it challenging to optimize visualizations for new analysis tasks or datasets. To address this challenge, we designed Gosling-a grammar for interactive and scalable genomics data visualization. Gosling balances expressiveness for comprehensive multi-scale genomics data visualizations with accessibility for domain scientists. Our accompanying JavaScript toolkit called Gosling.js provides scalable and interactive rendering. Gosling.js is built on top of an existing platform for web-based genomics data visualization to further simplify the visualization of common genomics data formats. We demonstrate the expressiveness of the grammar through a variety of real-world examples. Furthermore, we show how Gosling supports the design of novel genomics visualizations. An online editor and examples of Gosling.js, its source code, and documentation are available at https://gosling.js.org.",2021-12-30 +32618424,LymphoAtlas: a dynamic and integrated phosphoproteomic resource of TCR signaling in primary T cells reveals ITSN2 as a regulator of effector functions.,"T-cell receptor (TCR) ligation-mediated protein phosphorylation regulates the activation, cellular responses, and fates of T cells. Here, we used time-resolved high-resolution phosphoproteomics to identify, quantify, and characterize the phosphorylation dynamics of thousands of phosphorylation sites in primary T cells during the first 10 min after TCR stimulation. Bioinformatic analysis of the data revealed a coherent orchestration of biological processes underlying T-cell activation. In particular, functional modules associated with cytoskeletal remodeling, transcription, translation, and metabolic processes were mobilized within seconds after TCR engagement. Among proteins whose phosphorylation was regulated by TCR stimulation, we demonstrated, using a fast-track gene inactivation approach in primary lymphocytes, that the ITSN2 adaptor protein regulated T-cell effector functions. This resource, called LymphoAtlas, represents an integrated pipeline to further decipher the organization of the signaling network encoding T-cell activation. LymphoAtlas is accessible to the community at: https://bmm-lab.github.io/LymphoAtlas.",2020-07-01 +34265305,OdoriFy: A conglomerate of artificial intelligence-driven prediction engines for olfactory decoding.,"The molecular mechanisms of olfaction, or the sense of smell, are relatively underexplored compared with other sensory systems, primarily because of its underlying molecular complexity and the limited availability of dedicated predictive computational tools. Odorant receptors (ORs) allow the detection and discrimination of a myriad of odorant molecules and therefore mediate the first step of the olfactory signaling cascade. To date, odorant (or agonist) information for the majority of these receptors is still unknown, limiting our understanding of their functional relevance in odor-induced behavioral responses. In this study, we introduce OdoriFy, a Web server featuring powerful deep neural network-based prediction engines. OdoriFy enables (1) identification of odorant molecules for wildtype or mutant human ORs (Odor Finder); (2) classification of user-provided chemicals as odorants/nonodorants (Odorant Predictor); (3) identification of responsive ORs for a query odorant (OR Finder); and (4) interaction validation using Odorant-OR Pair Analysis. In addition, OdoriFy provides the rationale behind every prediction it makes by leveraging explainable artificial intelligence. This module highlights the basis of the prediction of odorants/nonodorants at atomic resolution and for the ORs at amino acid levels. A key distinguishing feature of OdoriFy is that it is built on a comprehensive repertoire of manually curated information of human ORs with their known agonists and nonagonists, making it a highly interactive and resource-enriched Web server. Moreover, comparative analysis of OdoriFy predictions with an alternative structure-based ligand interaction method revealed comparable results. OdoriFy is available freely as a web service at https://odorify.ahujalab.iiitd.edu.in/olfy/.",2021-07-12 +34480478,Targeted whole exome sequencing and Drosophila modelling to unveil the molecular basis of primary ovarian insufficiency.,"

Study question

Can a targeted whole exome sequencing (WES) on a cohort of women showing a primary ovarian insufficiency (POI) phenotype at a young age, combined with a study of copy number variations, identify variants in candidate genes confirming their deleterious effect on ovarian function?

Summary answer

This integrated approach has proved effective in identifying novel candidate genes unveiling mechanisms involved in POI pathogenesis.

What is known already

POI, a condition occurring in 1% of women under 40 years of age, affects women's fertility leading to a premature loss of ovarian reserve. The genetic causes of POI are highly heterogeneous and several determinants contributing to its prominent oligogenic inheritance pattern still need to be elucidated.

Study design, size, duration

WES screening for pathogenic variants of 41 Italian women with non-syndromic primary and early secondary amenorrhoea occurring before age 25 was replicated on another 60 POI patients, including 35 French and 25 American women, to reveal statistically significant shared variants.

Participants/materials, setting, methods

The Italian POI patients' DNA were processed by targeted WES including 542 RefSeq genes expressed or functioning during distinct reproductive or ovarian processes (e.g. DNA repair, meiosis, oocyte maturation, folliculogenesis and menopause). Extremely rare variants were filtered and selected by means of a Fisher Exact test using several publicly available datasets. A case-control Burden test was applied to highlight the most significant genes using two ad-hoc control female cohorts. To support the obtained data, the identified genes were screened on a novel cohort of 60 Caucasian POI patients and the same case-control analysis was carried out. Comparative analysis of the human identified genes was performed on mouse and Drosophila melanogaster by analysing the orthologous genes in their ovarian phenotype, and two of the selected genes were fruit fly modelled to explore their role in fertility.

Main results and the role of chance

The filtering steps applied to search for extremely rare pathogenic variants in the Italian cohort revealed 64 validated single-nucleotide variants/Indels in 59 genes in 30 out of 41 screened women. Burden test analysis highlighted 13 ovarian genes as being the most enriched and significant. To validate these findings, filtering steps and Burden analysis on the second cohort of Caucasian patients yielded 11 significantly enriched genes. Among them, AFP, DMRT3, MOV10, FYN and MYC were significant in both patient cohorts and hence were considered strong candidates for POI. Mouse and Drosophila comparative analysis evaluated a conserved role through the evolution of several candidates, and functional studies using a Drosophila model, when applicable, supported the conserved role of the MOV10 armitage and DMRT3 dmrt93B orthologues in female fertility.

Large scale data

The datasets for the Italian cohort generated during the current study are publicly available at ClinVar database (http://www.ncbi.nlm.nih.gov/clinvar/): accession numbers SCV001364312 to SCV001364375.

Limitations, reasons for caution

This is a targeted WES analysis hunting variants in candidate genes previously identified by different genomic approaches. For most of the investigated sporadic cases, we could not track the parental inheritance, due to unavailability of the parents' DNA samples; in addition, we might have overlooked additional rare variants in novel candidate POI genes extracted from the exome data. On the contrary, we might have considered some inherited variants whose clinical significance is uncertain and might not be causative for the patients' phenotype. Additionally, as regards the Drosophila model, it will be extremely important in the future to have more mutants or RNAi strains available for each candidate gene in order to validate their role in POI pathogenesis.

Wider implications of the findings

The genomic, statistical, comparative and functional approaches integrated in our study convincingly support the extremely heterogeneous oligogenic nature of POI, and confirm the maintenance across the evolution of some key genes safeguarding fertility and successful reproduction. Two principal classes of genes were identified: (i) genes primarily involved in meiosis, namely in synaptonemal complex formation, asymmetric division and oocyte maturation and (ii) genes safeguarding cell maintenance (piRNA and DNA repair pathways).

Study funding/competing interest(s)

This work was supported by Italian Ministry of Health grants 'Ricerca Corrente' (08C621_2016 and 08C924_2019) provided to IRCCS Istituto Auxologico Italiano, and by 'Piano Sostegno alla Ricerca' (PSR2020_FINELLI_LINEA_B) provided by the University of Milan; M.P.B. was supported by Telethon-Italy (grant number GG14181). There are no conflicts of interest.",2021-10-01 +,Creation of an Online Platform for Identification of Microorganisms: Peak Picking or Full-Spectrum Analysis,"Identification of microorganisms by MALDI-TOF mass spectrometry is a very efficient method with high throughput, speed, and accuracy. However, it is significantly limited by the absence of a universal database of reference mass spectra. This problem can be solved by creating an Internet platform for open databases of protein spectra of microorganisms. Choosing the optimal mathematical apparatus is the pivotal issue for this task. In our previous study we proposed the geometric approach for processing mass spectrometry data, which represented a mass spectrum as a vector in a multidimensional Euclidean space. This algorithm was implemented in a Jacob4 stand-alone package. We demonstrated its efficiency in delimiting two closely related species of the Bacillus pumilus group. In this study, the geometric approach was realized as R scripts which allowed us to design a Web-based application. We also studied the possibility of using full spectra analysis (FSA) without calculating mass peaks (PPA), which is the logical development of the method. We used 74 microbial strains from the collections of ICiG SB RAS, UNIQEM, IEGM, KMM, and VGM as the models. We demonstrated that the algorithms based on peak-picking and analysis of complete data have accuracy no less than that of Biotyper 3.1 software. We proposed a method for calculating cut-off thresholds based on averaged intraspecific distances. The resulting database, raw data, and the set of R scripts are available online at https://icg-test.mydisk.nsc.ru/s/qj6cfZg57g6qwzN.",2020-01-01 +33651795,Biological impact of mutually exclusive exon switching.,"Alternative splicing can expand the diversity of proteomes. Homologous mutually exclusive exons (MXEs) originate from the same ancestral exon and result in polypeptides with similar structural properties but altered sequence. Why would some genes switch homologous exons and what are their biological impact? Here, we analyse the extent of sequence, structural and functional variability in MXEs and report the first large scale, structure-based analysis of the biological impact of MXE events from different genomes. MXE-specific residues tend to map to single domains, are highly enriched in surface exposed residues and cluster at or near protein functional sites. Thus, MXE events are likely to maintain the protein fold, but alter specificity and selectivity of protein function. This comprehensive resource of MXE events and their annotations is available at: http://gene3d.biochem.ucl.ac.uk/mxemod/. These findings highlight how small, but significant changes at critical positions on a protein surface are exploited in evolution to alter function.",2021-03-02 +34038028,Using the PhenX Toolkit to Select Standard Measurement Protocols for Your Research Study.,"The goals of PhenX (consensus measures for Phenotypes and eXposures) are to promote the use of standard measurement protocols and to help investigators identify opportunities for collaborative research and cross-study analysis, thus increasing the impact of individual studies. The PhenX Toolkit (https://www.phenxtoolkit.org/) offers high-quality, well-established measurement protocols to assess phenotypes and exposures in studies with human participants. The Toolkit contains protocols representing 29 research domains and 6 specialty collections of protocols that add depth to the Toolkit in specific research areas (e.g., COVID-19, Social Determinants of Health [SDoH], Blood Sciences Research [BSR], Mental Health Research [MHR], Tobacco Regulatory Research [TRR], and Substance Abuse and Addiction [SAA]). Protocols are recommended for inclusion in the PhenX Toolkit by Working Groups of domain experts using a consensus process that includes input from the scientific community. For each PhenX protocol, the Toolkit provides a detailed description, the rationale for inclusion, and supporting documentation. Users can browse protocols in the Toolkit, search the Toolkit using keywords, or use Browse Protocols Tree to identify protocols of interest. The PhenX Toolkit provides data dictionaries compatible with the database of Genotypes and Phenotypes (dbGaP), Research Electronic Data Capture (REDCap) data submission compatibility, and data collection worksheets to help investigators incorporate PhenX protocols into their study design. The PhenX Toolkit provides resources to help users identify published studies that used PhenX protocols. © 2021 The Authors. Current Protocols published by Wiley Periodicals LLC. Basic Protocol: Using the PhenX Toolkit to support or extend study design.",2021-05-01 +31598706,Ensembl Genomes 2020-enabling non-vertebrate genomic research.,"Ensembl Genomes (http://www.ensemblgenomes.org) is an integrating resource for genome-scale data from non-vertebrate species, complementing the resources for vertebrate genomics developed in the context of the Ensembl project (http://www.ensembl.org). Together, the two resources provide a consistent set of interfaces to genomic data across the tree of life, including reference genome sequence, gene models, transcriptional data, genetic variation and comparative analysis. Data may be accessed via our website, online tools platform and programmatic interfaces, with updates made four times per year (in synchrony with Ensembl). Here, we provide an overview of Ensembl Genomes, with a focus on recent developments. These include the continued growth, more robust and reproducible sets of orthologues and paralogues, and enriched views of gene expression and gene function in plants. Finally, we report on our continued deeper integration with the Ensembl project, which forms a key part of our future strategy for dealing with the increasing quantity of available genome-scale data across the tree of life.",2020-01-01 +33135044,RNANet: an automatically built dual-source dataset integrating homologous sequences and RNA structures.,"

Motivation

Applied research in machine learning progresses faster when a clean dataset is available and ready to use. Several datasets have been proposed and released over the years for specific tasks such as image classification, speech-recognition and more recently for protein structure prediction. However, for the fundamental problem of RNA structure prediction, information is spread between several databases depending on the level we are interested in: sequence, secondary structure, 3D structure or interactions with other macromolecules. In order to speed-up advances in machine-learning based approaches for RNA secondary and/or 3D structure prediction, a dataset integrating all this information is required, to avoid spending time on data gathering and cleaning.

Results

Here, we propose the first attempt of a standardized and automatically generated dataset dedicated to RNA combining together: RNA sequences, homology information (under the form of position-specific scoring matrices) and information derived by annotation of available 3D structures (including secondary structure, canonical and non-canonical interactions and backbone torsion angles). The data are retrieved from public databases PDB, Rfam and SILVA. The paper describes the procedure to build such dataset and the RNA structure descriptors we provide. Some statistical descriptions of the resulting dataset are also provided.

Availability and implementation

The dataset is updated every month and available online (in flat-text file format) on the EvryRNA software platform (https://evryrna.ibisc.univ-evry.fr/evryrna/rnanet). An efficient parallel pipeline to build the dataset is also provided for easy reproduction or modification.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-01 +33821874,Fantastic databases and where to find them: Web applications for researchers in a rush.,"Public databases are essential to the development of multi-omics resources. The amount of data created by biological technologies needs a systematic and organized form of storage, that can quickly be accessed, and managed. This is the objective of a biological database. Here, we present an overview of human databases with web applications. The databases and tools allow the search of biological sequences, genes and genomes, gene expression patterns, epigenetic variation, protein-protein interactions, variant frequency, regulatory elements, and comparative analysis between human and model organisms. Our goal is to provide an opportunity for exploring large datasets and analyzing the data for users with little or no programming skills. Public user-friendly web-based databases facilitate data mining and the search for information applicable to healthcare professionals. Besides, biological databases are essential to improve biomedical search sensitivity and efficiency and merge multiple datasets needed to share data and build global initiatives for the diagnosis, prognosis, and discovery of new treatments for genetic diseases. To show the databases at work, we present a a case study using ACE2 as example of a gene to be investigated. The analysis and the complete list of databases is available in the following website .",2021-04-02 +30462313,Cistrome Data Browser: expanded datasets and new tools for gene regulatory analysis.,"The Cistrome Data Browser (DB) is a resource of human and mouse cis-regulatory information derived from ChIP-seq, DNase-seq and ATAC-seq chromatin profiling assays, which map the genome-wide locations of transcription factor binding sites, histone post-translational modifications and regions of chromatin accessible to endonuclease activity. Currently, the Cistrome DB contains approximately 47,000 human and mouse samples with about 24,000 newly collected datasets compared to the previous release two years ago. Furthermore, the Cistrome DB has a new Toolkit module with several features that allow users to better utilize the large-scale ChIP-seq, DNase-seq, and ATAC-seq data. First, users can query the factors which are likely to regulate a specific gene of interest. Second, the Cistrome DB Toolkit facilitates searches for factor binding, histone modifications, and chromatin accessibility in any given genomic interval shorter than 2Mb. Third, the Toolkit can determine the most similar ChIP-seq, DNase-seq, and ATAC-seq samples in terms of genomic interval overlaps with user-provided genomic interval sets. The Cistrome DB is a user-friendly, up-to-date, and well maintained resource, and the new tools will greatly benefit the biomedical research community. The database is freely available at http://cistrome.org/db, and the Toolkit is at http://dbtoolkit.cistrome.org.",2019-01-01 +32209622,"Severe Asthma Toolkit: an online resource for multidisciplinary health professionals-needs assessment, development process and user analytics with survey feedback.","

Objectives

Severe asthma imposes a significant burden on individuals, families and the healthcare system. New treatment and management approaches are emerging as effective options for severe asthma. Translating new knowledge to multidisciplinary healthcare professionals is a priority. We developed 'The Severe Asthma Toolkit' (https://toolkit.severeasthma.org.au) to increase awareness of severe asthma, provide evidence-based resources and support decisionmaking by healthcare providers.

Setting

Roundtable discussions and a survey of Australians clinicians were conducted to determine clinician preferences, format and content for a severe asthma resource.

Participants

A reference group from stakeholder and consumer bodies and severe asthma experts provided advice and feedback. A multidisciplinary team of international experts was engaged to develop content. Written content was based on up-to-date literature. Peer and editorial review were performed to finalise content and inform web design. Website design focused on user experience, navigation, engagement, interactivity and tailoring of content for a clinical audience.

Results

A web-based resource was developed. Roundtable discussions and a needs assessment survey identified the need for dedicated severe asthma management resources to support skills training. The end-product, which launched 26 March 2018, includes an overview of severe asthma, diagnosis and assessment, management, medications, comorbidities, living with severe asthma, establishing a clinic, paediatrics/adolescents and clinical resources. Analytics indicate access by users worldwide (32 169 users from 169 countries). User survey results (n=394) confirm access by the target audience (72% health professionals), who agreed the toolkit increased their knowledge (73%) and confidence in managing severe asthma (66%), and 75% are likely to use the resource in clinic.

Conclusions

The Severe Asthma Toolkit is a unique, evidence-based internet resource to support healthcare professionals providing optimal care for people with severe asthma. It is a comprehensive, accessible and independent resource developed by leading severe asthma experts to improve clinician knowledge and skills in severe asthma management.",2020-03-24 +30380102,qPhos: a database of protein phosphorylation dynamics in humans.,"Temporal and spatial protein phosphorylation dynamically orchestrates a broad spectrum of biological processes and plays various physiological and pathological roles in diseases and cancers. Recent advancements in high-throughput proteomics techniques greatly promoted the profiling and quantification of phosphoproteome. However, although several comprehensive databases have reserved the phosphorylated proteins and sites, a resource for phosphorylation quantification still remains to be constructed. In this study, we developed the qPhos (http://qphos.cancerbio.info) database to integrate and host the data on phosphorylation dynamics. A total of 3 537 533 quantification events for 199 071 non-redundant phosphorylation sites on 18 402 proteins under 484 conditions were collected through exhaustive curation of published literature. The experimental details, including sample materials, conditions and methods, were recorded. Various annotations, such as protein sequence and structure properties, potential upstream kinases and their inhibitors, were systematically integrated and carefully organized to present details about the quantified phosphorylation sites. Various browse and search functions were implemented for the user-defined filtering of samples, conditions and proteins. Furthermore, the qKinAct service was developed to dissect the kinase activity profile from user-submitted quantitative phosphoproteome data through annotating the kinase activity-related phosphorylation sites. Taken together, the qPhos database provides a comprehensive resource for protein phosphorylation dynamics to facilitate related investigations.",2019-01-01 +,The Impact of Connective Tissue Diseases on the Inpatient Outcomes of Congestive Heart Failure Patients,"Background Rheumatoid arthritis (RA) and systemic lupus erythematosus (SLE) are autoimmune diseases with chronically elevated inflammatory activity. Treatments typically have been aimed at decreasing inflammation. While RA and SLE are known to have a high incidence of congestive heart failure (HF), the mechanism behind this remains elusive. We sought to assess the outcomes of HF patients with either RA or SLE as opposed to HF patients without RA or SLE. Methods We conducted a retrospective analysis of the Healthcare Utilization Project - National Inpatient Sample Database from 2010 to 2015 (third quarter). Patients with a primary admitting diagnosis of HF were queried, and those with or without a diagnosis of either SLE or RA were separated into two groups. In-hospital mortality, total charges (TOTCHG), and length of stay (LOS) were analyzed with a multivariate regression model adjusted for demographical and comorbidity variables, using generalized linear models with family binomial, gamma, and negative-binomial, respectively. A p-value smaller than 0.05 was deemed statistically significant. All the statistical analyses were performed in R 3.5.5 (R Core Team, 2013, http://www.R-project.org/). Results  The in-hospital mortality (3.4% v/s 4.43%), mean TOTCHG ($46k v/s $51k), and mean LOS (5.79 v/s 6.12 days) were significantly lower in HF patients with RA/SLE when compared with HF patients without RA/SLE. A younger age (70.5 v/s 72.6 years) and a female preponderance (75% v/s 51%) were evident in the RA/SLE group. Both groups consistently showed a significant disparity in the rates of hospitalization, which was inversely related to household income. p-value was less than 0.001 for all the above outcomes. Conclusions  RA/SLE patients are associated with better in-hospital outcomes of HF. The underlying mechanism is unclear in terms of this paradox. Given the fact that the majority of RA/SLE patients are treated with agents aimed at decreasing inflammation, this may shed light on the role of inflammation being an important contributor to HF and implicate a future therapeutic direction.",2021-01-02 +34704369,Artificial-intelligence-driven discovery of prognostic biomarker for sarcopenia.,"

Background

Sarcopenia is defined as muscle wasting, characterized by a progressive loss of muscle mass and function due to ageing. Diagnosis of sarcopenia typically involves both muscle imaging and the physical performance of people exhibiting signs of muscle weakness. Despite its worldwide prevalence, a molecular method for accurately diagnosing sarcopenia has not been established.

Methods

We develop an artificial intelligence (AI) diagnosis model of sarcopenia using a published transcriptome dataset comprising patients from multiple ethnicities. For the AI model for sarcopenia diagnosis, we use a transcriptome database comprising 17 339 genes from 118 subjects. Among the 17 339 genes, we select 27 features as the model inputs. For feature selection, we use a random forest, extreme gradient boosting and adaptive boosting. Using the top 27 features, we propose a four-layer deep neural network, named DSnet-v1, for sarcopenia diagnosis.

Results

Among isolated testing datasets, DSnet-v1 provides high sensitivity (100%), specificity (94.12%), accuracy (95.83%), balanced accuracy (97.06%) and area under receiver operating characteristics (0.99). To extend the number of patient data, we develop a web application (http://sarcopeniaAI.ml/), where the model can be accessed unrestrictedly to diagnose sarcopenia if the transcriptome is available. A focused analysis of the top 27 genes for their differential or co-expression with other genes implied the potential existence of race-specific factors for sarcopenia, suggesting the possibility of identifying causal factors of sarcopenia when a more extended dataset is provided.

Conclusions

Our new AI model, DSnet-v1, accurately diagnoses sarcopenia and is currently available publicly to assist healthcare providers in diagnosing and treating sarcopenia.",2021-10-26 +,Data-driven prediction of antiviral peptides based on periodicities of amino acid properties,"With the emergence of new pathogens, e.g., methicillin-resistant Staphylococcus aureus (MRSA), and the recent novel coronavirus pandemic, there has been an ever-increasing need for novel antimicrobial therapeutics. In this work, we have developed support vector machine (SVM) models to predict antiviral peptide sequences. Oscillations in physicochemical properties in protein sequences have been shown to be predictive of protein structure and function, and in the presented we work we have taken advantage of these known periodicities to develop models that predict antiviral peptide sequences. In developing the presented models, we first generated property factors by applying principal component analysis (PCA) to the AAindex dataset of 544 amino acid properties. We next converted peptide sequences into physicochemical vectors using 18 property factors resulting from the PCA. Fourier transforms were applied to the property factor vectors to measure the amplitude of the physicochemical oscillations, which served as the features to train our SVM models. To train and test the developed models we have used a publicly available database of antiviral peptides (http://crdd.osdd.net/servers/avppred/), and we have used cross-validation to train and tune models based on multiple training and testing sets. To further understand the physicochemical properties of antiviral peptides we have also applied a previously developed feature selection algorithm. Future work will be aimed at computationally designing novel antiviral therapeutics based on the developed machine learning models.",2021-01-01 +,A new online database on genome-related information of Indian plants,"In this paper, we present a new online comprehensive database developed for genome-related information of Indian plants (dGRIP). In strict sense, dGRIP database displays for each species and genus, its chromosome number(s) with comprehensive cytogenetic data, genome size, ploidy, systematics and molecular genetics related to Indian angiosperms, gymnosperms, pteridophytes and bryophytes. The data are described in the form of datasheets encompassing 29 parameters available for users to consolidate the knowledge with respect to comprehensive cytogenetical details. The chromosome database is developed based on object-relational database management system (Sequence Query Language) and includes references from which the information was sourced. The online database is currently available at http://sbtju.in/Dgrip/index.html consists of a main page, project information, collaborators and search tools for each group, namely angiosperms, gymnosperms pteridophytes and bryophytes. Currently, the information is available for about 1500 species (Release 1.0); however, the dGRIP continues to expand with goal to include data of more than 17,000 plant species from India.",2019-11-01 +31301205,The Generation of a Comprehensive Spectral Library for the Analysis of the Guinea Pig Proteome by SWATH-MS.,"Advances in liquid chromatography-mass spectrometry have facilitated the incorporation of proteomic studies to many biology experimental workflows. Data-independent acquisition platforms, such as sequential window acquisition of all theoretical mass spectra (SWATH-MS), offer several advantages for label-free quantitative assessment of complex proteomes over data-dependent acquisition (DDA) approaches. However, SWATH data interpretation requires spectral libraries as a detailed reference resource. The guinea pig (Cavia porcellus) is an excellent experimental model for translation to many aspects of human physiology and disease, yet there is limited experimental information regarding its proteome. To overcome this knowledge gap, a comprehensive spectral library of the guinea pig proteome is generated. Homogenates and tryptic digests are prepared from 16 tissues and subjected to >200 DDA runs. Analysis of >250 000 peptide-spectrum matches resulted in a library of 73 594 peptides from 7666 proteins. Library validation is provided by i) analyzing externally derived SWATH files (https://doi.org/10.1016/j.jprot.2018.03.023) and comparing peptide intensity quantifications; ii) merging of externally derived data to the base library. This furnishes the research community with a comprehensive proteomic resource that will facilitate future molecular-phenotypic studies using (re-engaging) the guinea pig as an experimental model of relevance to human biology. The spectral library and raw data are freely accessible in the MassIVE repository (MSV000083199).",2019-07-22 +32657405,Chromatin network markers of leukemia.,"

Motivation

The structure of chromatin impacts gene expression. Its alteration has been shown to coincide with the occurrence of cancer. A key challenge is in understanding the role of chromatin structure (CS) in cellular processes and its implications in diseases.

Results

We propose a comparative pipeline to analyze CSs and apply it to study chronic lymphocytic leukemia (CLL). We model the chromatin of the affected and control cells as networks and analyze the network topology by state-of-the-art methods. Our results show that CSs are a rich source of new biological and functional information about DNA elements and cells that can complement protein-protein and co-expression data. Importantly, we show the existence of structural markers of cancer-related DNA elements in the chromatin. Surprisingly, CLL driver genes are characterized by specific local wiring patterns not only in the CS network of CLL cells, but also of healthy cells. This allows us to successfully predict new CLL-related DNA elements. Importantly, this shows that we can identify cancer-related DNA elements in other cancer types by investigating the CS network of the healthy cell of origin, a key new insight paving the road to new therapeutic strategies. This gives us an opportunity to exploit chromosome conformation data in healthy cells to predict new drivers.

Availability and implementation

Our predicted CLL genes and RNAs are provided as a free resource to the community at https://life.bsc.es/iconbi/chromatin/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-07-01 +,Age and Gender Demographics Predict Compliance with COVID-19 Public Health Measures: Data from a Global Sample,"Abstract The COVID-19 global pandemic has brought far-reaching consequences on individual and societal levels. Social distancing and physical hygiene constitute effective public health measures to limit the spread of the virus. The current study investigates individual age and gender demographics, in interaction with a country’s human development index (HDI), as crucial factors influencing compliance with public health measures in a large multi-national adult lifespan sample. This report leverages data from a large-scale international collaboration (Van Bavel et al., 2020; https://psyarxiv.com/ydt95/) comprising 45,576 individuals from 66 countries/territories. Participants provided self-reports of their compliance/agreement with three public health measures (i.e., spatial distancing, physical hygiene, policy support). Older age, female gender, and lower HDI were independently associated with greater compliance with public health measures. In addition, a significant three-way interaction between participant age, participant gender, and a country’s HDI revealed that compliance was lowest in younger adults from well-developed countries, while compliance was highest among females across all ages from less-developed countries. Compliance with public health measures is crucial in effectively reducing coronavirus spread. Our findings suggest that age and gender as individual-level demographics, in tandem with HDI as a country-level predictor, affect individuals’ willingness to comply with public health measures. These results highlight the potential of data-driven, tailored (i.e., towards specific demographics) health campaigns and public policies in the fight against a global pandemic.",2021-01-01 +33757430,ATAV: a comprehensive platform for population-scale genomic analyses.,"

Background

A common approach for sequencing studies is to do joint-calling and store variants of all samples in a single file. If new samples are continually added or controls are re-used for several studies, the cost and time required to perform joint-calling for each analysis can become prohibitive.

Results

We present ATAV, an analysis platform for large-scale whole-exome and whole-genome sequencing projects. ATAV stores variant and per site coverage data for all samples in a centralized database, which is efficiently queried by ATAV to support diagnostic analyses for trios and singletons, as well as rare-variant collapsing analyses for finding disease associations in complex diseases. Runtime logs ensure full reproducibility and the modularized ATAV framework makes it extensible to continuous development. Besides helping with the identification of disease-causing variants for a range of diseases, ATAV has also enabled the discovery of disease-genes by rare-variant collapsing on datasets containing more than 20,000 samples. Analyses to date have been performed on data of more than 110,000 individuals demonstrating the scalability of the framework. To allow users to easily access variant-level data directly from the database, we provide a web-based interface, the ATAV data browser ( http://atavdb.org/ ). Through this browser, summary-level data for more than 40,000 samples can be queried by the general public representing a mix of cases and controls of diverse ancestries. Users have access to phenotype categories of variant carriers, as well as predicted ancestry, gender, and quality metrics. In contrast to many other platforms, the data browser is able to show data of newly-added samples in real-time and therefore evolves rapidly as more and more samples are sequenced.

Conclusions

Through ATAV, users have public access to one of the largest variant databases for patients sequenced at a tertiary care center and can look up any genes or variants of interest. Additionally, since the entire code is freely available on GitHub, ATAV can easily be deployed by other groups that wish to build their own platform, database, and user interface.",2021-03-23 +33662628,Chinese Glioma Genome Atlas (CGGA): A Comprehensive Resource with Functional Genomic Data from Chinese Glioma Patients.,"Gliomas are the most common and malignant intracranial tumors in adults. Recent studies have revealed the significance of functional genomics for glioma pathophysiological studies and treatments. However, access to comprehensive genomic data and analytical platforms is often limited. Here, we developed the Chinese Glioma Genome Atlas (CGGA), a user-friendly data portal for the storage and interactive exploration of cross-omics data, including nearly 2000 primary and recurrent glioma samples from Chinese cohort. Currently, open access is provided to whole-exome sequencing data (286 samples), mRNA sequencing (1018 samples) and microarray data (301 samples), DNA methylation microarray data (159 samples), and microRNA microarray data (198 samples), and to detailed clinical information (age, gender, chemoradiotherapy status, WHO grade, histological type, critical molecular pathological information, and survival data). In addition, we have developed several tools for users to analyze the mutation profiles, mRNA/microRNA expression, and DNA methylation profiles, and to perform survival and gene correlation analyses of specific glioma subtypes. This database removes the barriers for researchers, providing rapid and convenient access to high-quality functional genomic data resources for biological studies and clinical applications. CGGA is available at http://www.cgga.org.cn.",2021-02-01 +34759968,"Construction, Validation, and Visualization of Two Web-Based Nomograms to Predict Overall and Cancer-Specific Survival in Patients with Gastric Cancer and Lung Metastases.","

Background

The lung is one of the most common sites of metastasis in gastric cancer. Our study developed two nomograms to achieve individualized prediction of overall survival (OS) and cancer-specific survival (CSS) in patients with gastric cancer and lung metastasis (GCLM) to better guide follow-up and planning of subsequent treatment.

Methods

We reviewed data of patients diagnosed with GCLM in the Surveillance, Epidemiology, and End Results (SEER) database from 2010 to 2015. The endpoints of the study were the OS and CSS. We used the ""caret"" package to randomly divide patients into training and validation cohorts in a 7 : 3 ratio. Multivariate Cox regression analysis was performed using univariate Cox regression analysis to confirm the independent prognostic factors. Afterward, we built the OS and CSS nomograms with the ""rms"" package. Subsequently, we evaluated the two nomograms through calibration curves, receiver operating characteristic (ROC) curves, and decision curve analysis (DCA). Finally, two web-based nomograms were built on the basis of effective nomograms.

Results

The OS analysis included 640 patients, and the results of the multivariate Cox regression analysis showed that grade, chemotherapy, and liver metastasis were independent prognostic factors for patients with GCLM. The CSS analysis included 524 patients, and the results of the multivariate Cox regression analysis showed that the independent prognostic factors for patients with GCLM were chemotherapy, liver metastasis, marital status, and tumor site. The ROC curves, calibration curves, and DCA revealed favorable predictive power in the OS and CSS nomograms. We created web-based nomograms for OS (https://zhenghh.shinyapps.io/aclmos/) and CSS (https://zhenghh.shinyapps.io/aslmcss/).

Conclusions

We created two web-based nomograms to predict OS and CSS in patients with GCLM. Both web-based nomograms had satisfactory accuracy and clinical usefulness and may help clinicians make individualized treatment decisions for patients.",2021-11-01 +34267263,A hybrid computational framework for intelligent inter-continent SARS-CoV-2 sub-strains characterization and prediction.,"Whereas accelerated attention beclouded early stages of the coronavirus spread, knowledge of actual pathogenicity and origin of possible sub-strains remained unclear. By harvesting the Global initiative on Sharing All Influenza Data (GISAID) database ( https://www.gisaid.org/ ), between December 2019 and January 15, 2021, a total of 8864 human SARS-CoV-2 complete genome sequences processed by gender, across 6 continents (88 countries) of the world, Antarctica exempt, were analyzed. We hypothesized that data speak for itself and can discern true and explainable patterns of the disease. Identical genome diversity and pattern correlates analysis performed using a hybrid of biotechnology and machine learning methods corroborate the emergence of inter- and intra- SARS-CoV-2 sub-strains transmission and sustain an increase in sub-strains within the various continents, with nucleotide mutations dynamically varying between individuals in close association with the virus as it adapts to its host/environment. Interestingly, some viral sub-strain patterns progressively transformed into new sub-strain clusters indicating varying amino acid, and strong nucleotide association derived from same lineage. A novel cognitive approach to knowledge mining helped the discovery of transmission routes and seamless contact tracing protocol. Our classification results were better than state-of-the-art methods, indicating a more robust system for predicting emerging or new viral sub-strain(s). The results therefore offer explanations for the growing concerns about the virus and its next wave(s). A future direction of this work is a defuzzification of confusable pattern clusters for precise intra-country SARS-CoV-2 sub-strains analytics.",2021-07-15 +34869322,New Autophagy-Ferroptosis Gene Signature Predicts Survival in Glioma.,"Background: Ferroptosis plays an important role in glioma and significantly affects the prognosis, but the specific mechanism has not yet been elucidated. Recent studies suggest that autophagy regulates the process of ferroptosis. This study aimed to find potential autophagy-ferroptosis genes and explore the prognostic significance in glioma. Methods: Ferroptosis and autophagy genes were obtained from two online databases (zhounan.org/ferrdb and autophagy.lu/). The RNAseq data and clinical information were obtained from the Chinese Glioma Genome Atlas (CGGA) database (http://www.cgga.org.cn/). Univariate, multivariate, lasso and Cox regression analysis screened out prognosis-related genes, and a risk model was constructed. Receiver operating characteristic (ROC) curve analysis evaluated the predictive efficiency of the model. Finally, a nomogram was constructed to more accurately predict the prognosis of glioma. Results: We developed a Venn diagram showing 23 autophagy-ferroptosis genes. A total of 660 cases (including RNA sequences and complete clinical information) from two different cohorts (training group n = 413, verification group n = 247) of the CGGA database was acquired. Cohorts were screened to include five prognosis-related genes (MTOR, BID, HSPA5, CDKN2A, GABARAPLA2). Kaplan-Meier curves showed that the risk model was a good prognostic indicator (p < 0.001). ROC analysis showed good efficacy of the risk model. Multivariate Cox analysis also revealed that the risk model was suitable for clinical factors related to prognosis, including type of disease (primary, recurrence), grade (III-IV), age, temozolomide treatment, and 1p19q state. Using the five prognosis-related genes and the risk score, we constructed a nomogram assessed by C-index (0.7205) and a calibration plot that could more accurately predict glioma prognosis. Conclusion: Using a current database of autophagy and ferroptosis genes, we confirmed the prognostic significance of autophagy-ferroptosis genes in glioma, and we constructed a prognostic model to help guide treatment for high grade glioma in the future.",2021-11-15 +,"Next generation sequencing and microbiome's taxonomical characterization of frozen soil of north western Himalayas of Jammu and Kashmir, India","Traditionally, microbial genome sequencing has been restrained to the species grown in pure culture. The development of culture-independent techniques over the last decade allows scientists to sequence microbial communities directly from environmental samples. Metagenomics is the study of complex genome by the isolation of DNA of the whole community. Next generation sequencing (NGS) of metagenomic DNA gives information about the microbial and taxonomical characterization of a particular niche. The objective of the present research is to study the microbial and taxonomical characterization of the metagenomic DNA, isolated from the frozen soil sample of a glacier in the north western Himalayas through NGS.The glacier community comprised of 16 phyla with the representation of members belonging to Proteobacteria and Acidobacteria. The number of genes annotated through the Kyoto Encyclopedia of Genes and Genomes (KEGG), GO, Pfam, Clusters of Orthologous Groups of proteins (COGs), and FIG databases were generated by COGNIZER. The annotation of genes assigned in each group from the metagenomics data through COG database and the number of genes annotated in different pathways through KEGG database were reported.Results indicate that the glacier soil taken in the present study, harbors taxonomically and metabolically diverse communities. The major bacterial group present in the niche is Proteobacteria followed by Acidobacteria, and Actinobacteria, etc. Different genes were annotated through COG and KEGG databases that integrate genomic, chemical, and systemic functional information.How to cite: Gupta V, Singh I, Rasool S, et al. Next Generation sequencing and microbiome’s taxonomical characterization of frozen soil of North Western Himalayas of Jammu and Kashmir, India. Electron J Biotechnol 2020;45. https://doi.org/10.1016/j.ejbt.2020.03.003.",2020-05-01 +34910571,Statistical Considerations for Analyzing Ecological Momentary Assessment Data.,"

Purpose

The analysis of Ecological Momentary Assessment (EMA) data can be difficult to conceptualize due to the complexity of how the data are collected. The goal of this tutorial is to provide an overview of statistical considerations for analyzing observational data arising from EMA studies.

Method

EMA data are collected in a variety of ways, complicating the statistical analysis. We focus on fundamental statistical characteristics of the data and general purpose statistical approaches to analyzing EMA data. We implement those statistical approaches using a recent study involving EMA.

Results

The linear or generalized linear mixed-model statistical approach can adequately capture the challenges resulting from EMA collected data if properly set up. Additionally, while sample size depends on both the number of participants and the number of survey responses per participant, having more participants is more important than the number of responses per participant.

Conclusion

Using modern statistical methods when analyzing EMA data and adequately considering all of the statistical assumptions being used can lead to interesting and important findings when using EMA.

Supplemental material

https://doi.org/10.23641/asha.17155961.",2021-12-15 +33995478,Construction of Unified Human Antimicrobial and Immunomodulatory Peptide Database and Examination of Antimicrobial and Immunomodulatory Peptides in Alzheimer's Disease Using Network Analysis of Proteomics Datasets.,"The reanalysis of genomics and proteomics datasets by bioinformatics approaches is an appealing way to examine large amounts of reliable data. This can be especially true in cases such as Alzheimer's disease, where the access to biological samples, along with well-defined patient information can be challenging. Considering the inflammatory part of Alzheimer's disease, our aim was to examine the presence of antimicrobial and immunomodulatory peptides in human proteomic datasets deposited in the publicly available proteomics database ProteomeXchange (http://www.proteomexchange.org/). First, a unified, comprehensive human antimicrobial and immunomodulatory peptide database, containing all known human antimicrobial and immunomodulatory peptides was constructed and used along with the datasets containing high-quality proteomics data originating from the examination of Alzheimer's disease and control groups. A throughout network analysis was carried out, and the enriched GO functions were examined. Less than 1% of all identified proteins in the brain were antimicrobial and immunomodulatory peptides, but the alterations characteristic of Alzheimer's disease could be recapitulated with their analysis. Our data emphasize the key role of the innate immune system and blood clotting in the development of Alzheimer's disease. The central role of antimicrobial and immunomodulatory peptides suggests their utilization as potential targets for mechanistic studies and future therapies.",2021-04-28 +26106450,Metrabase: a cheminformatics and bioinformatics database for small molecule transporter data analysis and (Q)SAR modeling.,"

Abstract

Both metabolism and transport are key elements defining the bioavailability and biological activity of molecules, i.e. their adverse and therapeutic effects. Structured and high quality experimental data stored in a suitable container, such as a relational database, facilitates easy computational processing and thus allows for high quality information/knowledge to be efficiently inferred by computational analyses. Our aim was to create a freely accessible database that would provide easy access to data describing interactions between proteins involved in transport and xenobiotic metabolism and their small molecule substrates and modulators. We present Metrabase, an integrated cheminformatics and bioinformatics resource containing curated data related to human transport and metabolism of chemical compounds. Its primary content includes over 11,500 interaction records involving nearly 3,500 small molecule substrates and modulators of transport proteins and, currently to a much smaller extent, cytochrome P450 enzymes. Data was manually extracted from the published literature and supplemented with data integrated from other available resources. Metrabase version 1.0 is freely available under a CC BY-SA 4.0 license at http://www-metrabase.ch.cam.ac.uk.",2015-06-23 +33907838,Demetra Application: An integrated genotype analysis web server for clinical genomics in endometriosis. ,"Demetra Application is a holistic integrated and scalable bioinformatics web‑based tool designed to assist medical experts and researchers in the process of diagnosing endometriosis. The application identifies the most prominent gene variants and single nucleotide polymorphisms (SNPs) causing endometriosis using the genomic data provided for the patient by a medical expert. The present study analyzed >28.000 endometriosis‑related publications using data mining and semantic techniques aimed towards extracting the endometriosis‑related genes and SNPs. The extracted knowledge was filtered, evaluated, annotated, classified, and stored in the Demetra Application Database (DAD). Moreover, an updated gene regulatory network with the genes implements in endometriosis was established. This was followed by the design and development of the Demetra Application, in which the generated datasets and results were included. The application was tested and presented herein with whole‑exome sequencing data from seven related patients with endometriosis. Endometriosis‑related SNPs and variants identified in genome‑wide association studies (GWAS), whole‑genome (WGS), whole‑exome (WES), or targeted sequencing information were classified, annotated and analyzed in a consolidated patient profile with clinical significance information. Probable genes associated with the patient's genomic profile were visualized using several graphs, including chromosome ideograms, statistic bars and regulatory networks through data mining studies with relative publications, in an effort to obtain a representative number of the most credible candidate genes and biological pathways associated with endometriosis. An evaluation analysis was performed on seven patients from a three‑generation family with endometriosis. All the recognized gene variants that were previously considered to be associated with endometriosis were properly identified in the output profile per patient, and by comparing the results, novel findings emerged. This novel and accessible webserver tool of endometriosis to assist medical experts in the clinical genomics and precision medicine procedure is available at http://geneticslab.aua.gr/.",2021-04-28 +34330336,HumGut: a comprehensive human gut prokaryotic genomes collection filtered by metagenome data.,"

Background

A major bottleneck in the use of metagenome sequencing for human gut microbiome studies has been the lack of a comprehensive genome collection to be used as a reference database. Several recent efforts have been made to re-construct genomes from human gut metagenome data, resulting in a huge increase in the number of relevant genomes. In this work, we aimed to create a collection of the most prevalent healthy human gut prokaryotic genomes, to be used as a reference database, including both MAGs from the human gut and ordinary RefSeq genomes.

Results

We screened > 5,700 healthy human gut metagenomes for the containment of > 490,000 publicly available prokaryotic genomes sourced from RefSeq and the recently announced UHGG collection. This resulted in a pool of > 381,000 genomes that were subsequently scored and ranked based on their prevalence in the healthy human metagenomes. The genomes were then clustered at a 97.5% sequence identity resolution, and cluster representatives (30,691 in total) were retained to comprise the HumGut collection. Using the Kraken2 software for classification, we find superior performance in the assignment of metagenomic reads, classifying on average 94.5% of the reads in a metagenome, as opposed to 86% with UHGG and 44% when using standard Kraken2 database. A coarser HumGut collection, consisting of genomes dereplicated at 95% sequence identity-similar to UHGG, classified 88.25% of the reads. HumGut, half the size of standard Kraken2 database and directly comparable to the UHGG size, outperforms them both.

Conclusions

The HumGut collection contains > 30,000 genomes clustered at a 97.5% sequence identity resolution and ranked by human gut prevalence. We demonstrate how metagenomes from IBD-patients map equally well to this collection, indicating this reference is relevant also for studies well outside the metagenome reference set used to obtain HumGut. All data and metadata, as well as helpful code, are available at http://arken.nmbu.no/~larssn/humgut/ . Video Abstract.",2021-07-31 +33897975,FGDB: Database of follicle stimulating hormone glycans.,"Glycomics, the study of the entire complement of sugars of an organism has received significant attention in the recent past due to the advances made in high throughput mass spectrometry technologies. These analytical advancements have facilitated the characterization of glycans associated with the follicle-stimulating hormones (FSH), which play a central role in the human reproductive system both in males and females utilizing regulating gonadal (testicular and ovarian) functions. The irregularities in FSH activity are also directly linked with osteoporosis. The glycoanalytical studies have been tremendously helpful in understanding the biological roles of FSH. Subsequently, the increasing number of characterized FSH glycan structures and related glycoform data has thrown a challenge to the glycoinformatics community in terms of data organization, storage and access. Also, a user-friendly platform is needed for providing easy access to the database and performing integrated analysis using a high volume of experimental data to accelerate FSH-focused research. FSH Glycans DataBase (FGDB) serves as a comprehensive and unique repository of structures, features, and related information of glycans associated with FSH. Apart from providing multiple search options, the database also facilitates an integrated user-friendly interface to perform the glycan abundance and comparative analyses using experimental data. The automated integrated pipelines present the possible structures of glycans and variants of FSH based on the input data, and allow the user to perform various analyses. The potential application of FGDB will significantly help both glycoinformaticians as well as wet-lab researchers to stimulate the research in this area. FGDB web access: https://fgdb.unmc.edu/.",2021-03-22 +,"First Report of Erwinia rhapontici Causing Bacterial Rot on Peach, Detected in Hungary","Erwinia rhapontici is an opportunistic bacterial plant pathogen that can cause two types of symptoms: pink seed or crown, and soft, bulb, and blossom rot (Huang et al. 2003). It has been shown to cause disease in numerous plant species including kiwifruit (Wang et al. 2017), wheat, onion, cereal, pea, bean, rye, hyacinth, and tomato (Huang et al. 2003). However, E. rhapontici has not yet been reported to cause disease on the peach (Prunus persica). We observed typical bacterial rot, shriveled stems, a characteristic shepherd’s crook, and bacterial ooze on two 5-year-old cultivar Champion and one 8-year-old cultivar Hope peach trees (see photos in the supplement) in a private garden in Budakeszi, Hungary, on 30 May 2019 during wet weather conditions. The garden is located in a calm suburban setting far from bigger plantations, where only these peach trees were planted and no similar symptoms were observed on these trees earlier. These were isolated cases in this area. Nine samples were taken from the oozes, inoculated on sucrose-peptone agar, and incubated at 28°C for 24 to 30 h in order to reveal the etiological agent of the infection. Outgrown colonies produced pink pigment, and three of them were chosen for identification using MALDI-TOF MS by comparing the sample’s spectra against the VITEK MS version 3.2.0 database, using the manufacturer’s instructions. All colonies were identified as E. rhapontici, based on high confidence scores (2.03 to 2.18). For whole-genome sequencing, bacterial DNA was isolated (PureLink Genomic DNA Mini Kit, Thermo Fisher Scientific, U.S.A.) from the subculture of one colony according to the instructions of the manufacturer. The whole-genome sequencing and de novo assembly occurred as described in GenBank (accession GCA_012271765.1). We performed an ANIb analysis at JSpecies (http://jspecies.ribohost.com/jspeciesws/#analyse) against the GenBank reference strain of E. rhapontici BIGb0435 (accession GCA_004364855.1), confirming that the isolate was E. rhapontici (the ANI was 98.81%). To verify the etiological role of E. rhapontici, experiments were performed based on the Koch’s postulates. Ten 1-year-old Champion peach trees were treated. Trees were planted in 10-liter dishes and cultivated under artificial conditions in plant chambers (temperature 22 ± 1°C, illumination: 14 h/day). Infection was carried out when at least five leaves were present on each tree. One-third of the leaves were injured using a sterile rubber, and 20 ml of E. rhapontici-containing suspension (7 × 10⁷ CFU/ml; cells were pelleted with centrifugation at 6,000 × g and resuspended in sterile phosphate-buffered saline [PBS]) was sprayed on the injured leaves of five trees. Sterile PBS was applied to five control trees. The first symptoms of infection were detected 4 days after the treatment on injured leaves. Symptoms also spread to the uninjured leaves, indicating the systemic nature of the infection. For example, brown spots appeared first along the midrib and veins, later also on other parts of the leaves, and larger lesions could be detected as well. Fifty-four to 78% of the leaves on the E. rhapontici-infected trees showed signs of infection 14 days after treatment, whereas no leaves on the control trees showed any symptoms. Colonies were reisolated from three infected leaves, and their identities were confirmed with MALDI-TOF MS as E. rhapontici. To our knowledge, this is the first report on E. rhapontici causing disease in peaches.",2020-12-01 +32780568,How to Illuminate the Dark Proteome Using the Multi-omic OpenProt Resource.,"Ten of thousands of open reading frames (ORFs) are hidden within genomes. These alternative ORFs, or small ORFs, have eluded annotations because they are either small or within unsuspected locations. They are found in untranslated regions or overlap a known coding sequence in messenger RNA and anywhere in a ""non-coding"" RNA. Serendipitous discoveries have highlighted these ORFs' importance in biological functions and pathways. With their discovery came the need for deeper ORF annotation and large-scale mining of public repositories to gather supporting experimental evidence. OpenProt, accessible at https://openprot.org/, is the first proteogenomic resource enforcing a polycistronic model of annotation across an exhaustive transcriptome for 10 species. Moreover, OpenProt reports experimental evidence cumulated across a re-analysis of 114 mass spectrometry and 87 ribosome profiling datasets. The multi-omics OpenProt resource also includes the identification of predicted functional domains and evaluation of conservation for all predicted ORFs. The OpenProt web server provides two query interfaces and one genome browser. The query interfaces allow for exploration of the coding potential of genes or transcripts of interest as well as custom downloads of all information contained in OpenProt. © 2020 The Authors. Basic Protocol 1: Using the Search interface Basic Protocol 2: Using the Downloads interface.",2020-09-01 +34777861,Spatial Distribution and Determinants of Nonautonomy on Decision Regarding Contraceptive Utilization among Married Reproductive-Age Women in Ethiopia: Spatial and Bayesian Multilevel Analysis.,"

Background

Studies conducted to date in Ethiopia did not explore the spatial distribution, individual-level, and community-level factors affecting women's nonautonomy on decision to use contraceptives. Hence, this study aimed to assess the spatial distribution of women's nonautonomy on decision regarding contraceptive utilization and its determinants in Ethiopia.

Methods

Data were accessed from the Demographic Health Survey program official database website (https://dhsprogram.com). A weighted sample of 3,668 married reproductive-age women currently using contraceptives was included in this analysis. Bayesian multilevel logistic regression models were fitted to identify the determinants of women's nonautonomy on contraceptive utilization. Adjusted odds ratio with 95% credible interval was used to select variables that have a significant effect on nonautonomy on contraceptive utilization.

Results

A high proportion of women with nonautonomy on decision regarding contraceptive utilization was found in northern parts of Southern Nations, Nationalities, and People's Region, Southern parts of Oromia, and Benishangul-Gumuz regions of the country. Overall, 2876 (78.40% (95% CI: 77.0%, 79.7%)) women were nonautonomous on decision regarding contraceptive utilization. In the final model, age from 35-49 (AOR (95% CI) = 0.63 (0.54, 0.72)), living in the richer households (AOR (95% CI) = 0.12 (0.03, 0.26)), being married at 18 years or above (AOR (95% CI) = 0.33 (0.19, 0.57)), and residing in an rural areas (AOR (95% CI) = 1.34 (1.01, 1.71)) and metropolitan regions (AOR (95% CI) = 0.71(0.54, 0.91)) were associated with women's nonautonomy on decision regarding contraceptive utilization.

Conclusions

In Ethiopia, the spatial distribution of women's nonautonomy on decision about contraceptive utilization was nonrandom. More than three-fourths of married reproductive-age women in Ethiopia are nonautonomous on decision regarding contraceptive utilization. Region, residence, current age, age at marriage, and wealth index were statistically associated with women's nonautonomy on decision regarding contraceptive utilization.",2021-11-05 +34878854,"The National Health and Nutrition Examination Survey (NHANES), 2021-2022: Adapting Data Collection in a COVID-19 Environment.","The National Health and Nutrition Examination Survey (NHANES) is a unique source of national data on the health and nutritional status of the US population, collecting data through interviews, standard exams, and biospecimen collection. Because of the COVID-19 pandemic, NHANES data collection was suspended, with more than a year gap in data collection. NHANES resumed operations in 2021 with the NHANES 2021-2022 survey, which will monitor the health and nutritional status of the nation while adding to the knowledge of COVID-19 in the US population. This article describes the reshaping of the NHANES program and, specifically, the planning of NHANES 2021-2022 for data collection during the COVID-19 pandemic. Details are provided on how NHANES transformed its participant recruitment and data collection plans at home and at the mobile examination center to safely collect data in a COVID-19 environment. The potential implications for data users are also discussed. (Am J Public Health. 2021;111(12):2149-2156. https://doi.org/10.2105/AJPH.2021.306517).",2021-12-01 +31648227,MANET 3.0: Hierarchy and modularity in evolving metabolic networks.,"Enzyme recruitment is a fundamental evolutionary driver of modern metabolism. We see evidence of recruitment at work in the metabolic Molecular Ancestry Networks (MANET) database, an online resource that integrates data from KEGG, SCOP and structural phylogenomic reconstruction. The database, which was introduced in 2006, traces the deep history of the structural domains of enzymes in metabolic pathways. Here we release version 3.0 of MANET, which updates data from KEGG and SCOP, links enzyme and PDB information with PDBsum, and traces evolutionary information of domains defined at fold family level of SCOP classification in metabolic subnetwork diagrams. Compared to SCOP folds used in the previous versions, fold families are cohesive units of functional similarity that are highly conserved at sequence level and offer a 10-fold increase of data entries. We surveyed enzymatic, functional and catalytic site distributions among superkingdoms showing that ancient enzymatic innovations followed a biphasic temporal pattern of diversification typical of module innovation. We grouped enzymatic activities of MANET into a hierarchical system of subnetworks and mesonetworks matching KEGG classification. The evolutionary growth of these modules of metabolic activity was studied using bipartite networks and their one-mode projections at enzyme, subnetwork and mesonetwork levels of organization. Evolving metabolic networks revealed patterns of enzyme sharing that transcended mesonetwork boundaries and supported the patchwork model of metabolic evolution. We also explored the scale-freeness, randomness and small-world properties of evolving networks as possible organizing principles of network growth and diversification. The network structure shows an increase in hierarchical modularity and scale-free behavior as metabolic networks unfold in evolutionary time. Remarkably, this evolutionary constraint on structure was stronger at lower levels of metabolic organization. Evolving metabolic structure reveals a 'principle of granularity', an evolutionary increase of the cohesiveness of lower-level parts of a hierarchical system. MANET is available at http://manet.illinois.edu.",2019-10-24 +32445587,Systematic analysis of 1298 RNA-Seq samples and construction of a comprehensive soybean (Glycine max) expression atlas.,"Soybean (Glycine max [L.] Merr.) is a major crop in animal feed and human nutrition, mainly for its rich protein and oil contents. The remarkable rise in soybean transcriptome studies over the past 5 years generated an enormous amount of RNA-seq data, encompassing various tissues, developmental conditions and genotypes. In this study, we have collected data from 1298 publicly available soybean transcriptome samples, processed the raw sequencing reads and mapped them to the soybean reference genome in a systematic fashion. We found that 94% of the annotated genes (52 737/56 044) had detectable expression in at least one sample. Unsupervised clustering revealed three major groups, comprising samples from aerial, underground and seed/seed-related parts. We found 452 genes with uniform and constant expression levels, supporting their roles as housekeeping genes. On the other hand, 1349 genes showed heavily biased expression patterns towards particular tissues. A transcript-level analysis revealed that 95% (70 963 of 74 490) of the assembled transcripts have intron chains exactly matching those from known transcripts, whereas 3256 assembled transcripts represent potentially novel splicing isoforms. The dataset compiled here constitute a new resource for the community, which can be downloaded or accessed through a user-friendly web interface at http://venanciogroup.uenf.br/resources/. This comprehensive transcriptome atlas will likely accelerate research on soybean genetics and genomics.",2020-08-13 +30084000,A network map of netrin receptor UNC5B-mediated signaling.,"UNC-5 Homolog B (UNC5B) is a member of the dependence receptor family. This family of receptors can induce two opposite intracellular signaling cascades depending on the presence or absence of the ligand and is thus capable of driving two opposing processes. UNC5B signaling has been implicated in several cancers, where it induces cell death in the absence of its ligand Netrin-1 and promotes cell survival in its presence. In addition, inhibition of Netrin-1 ligand has been reported to decrease invasiveness and angiogenesis in tumors. UNC5B signaling pathway has also been reported to be involved in several processes such as neural development, developmental angiogenesis and inflammatory processes. However, literature pertaining to UNC5B signaling is scarce and scattered. Considering the importance of UNC5B signaling, we developed a resource of signaling events mediated by UNC5B. Using data mined from published literature, we compiled an integrated pathway map consisting of 88 UNC5B-mediated signaling events and 55 proteins. These signaling events include 27 protein-protein interaction events, 33 catalytic events involving various post-translational modifications, 9 events of UNC5B-mediated protein activation/inhibition, 27 gene regulation events and 2 events of translocation. This pathway resource has been made available to the research community through NetPath ( http://www.netpath.org /), a manually curated resource of signaling pathways (Database URL: http://www.netpath.org/pathways?path_id=NetPath_172 ). The current resource provides a foundation for the understanding of UNC5B-mediated cellular responses. The development of resource will serve researchers to explore the mechanisms of UNC-5B signaling in cancers.",2018-08-06 +33740463,"Integrated Collection of Stem Cell Bank Data, a Data Portal for Standardized Stem Cell Information.","The past decade has witnessed an extremely rapid increase in the number of newly established stem cell lines. However, due to the lack of a standardized format, data exchange among stem cell line resources has been challenging, and no system can search all stem cell lines across resources worldwide. To solve this problem, we have developed the Integrated Collection of Stem Cell Bank data (ICSCB) (http://icscb.stemcellinformatics.org/), the largest database search portal for stem cell line information, based on the standardized data items and terms of the MIACARM framework. Currently, ICSCB can retrieve >16,000 cell lines from four major data resources in Europe, Japan, and the United States. ICSCB is automatically updated to provide the latest cell line information, and its integrative search helps users collect cell line information for over 1,000 diseases, including many rare diseases worldwide, which has been a formidable task, thereby distinguishing itself from other database search portals.",2021-03-18 +30965135,A new data analysis method based on feature linear combination.,"In biological data, feature relationships are complex and diverse, they could reflect physiological and pathological changes. Defining simple and efficient classification rules based on feature relationships is helpful for discriminating different conditions and studying disease mechanism. The popular data analysis method, k top scoring pairs (k-TSP), explores the feature relationship by focusing on the difference of the relative level of two features in different groups and classifies samples based on the exploration. To define more efficient classification rules, we propose a new data analysis method based on the linear combination of k > 0 top scoring pairs (LC-k-TSP). LC-k-TSP applies support vector machine (SVM) to define the best linear relationship of each feature pair, scores feature pairs by the discriminative abilities of the corresponding linear combinations and selects k disjoint top scoring pairs to construct an ensemble classifier. Experiments on twelve public datasets showed the superiority of LC-k-TSP over k-TSP which evaluates the relationship of every two features in the same way. The experiment also illustrated that LC-k-TSP performed similarly to SVM and random forest (RF) in accuracy rate. LC-k-TSP studies the own unique linear combination for each feature pair and defines simple classification rules, it is easy to explore the biomedical explanation. Finally, we applied LC-k-TSP to analyze the hepatocellular carcinoma (HCC) metabolomics data and define the simple classification rules for discrimination of different liver diseases. It obtained accuracy rates of 89.76% and 89.13% in distinguishing between small HCC and hepatic cirrhosis (CIR) groups as well as between HCC and CIR groups, superior to 87.99% and 80.35% by k-TSP. Hence, defining classification rules based on feature relationships is an effective way to analyze biological data. LC-k-TSP which checks different feature pairs by their corresponding unique best linear relationship has the superiority over k-TSP which checks each pair by the same linear relationship. Availability and implementation: http://www.402.dicp.ac.cn/download_ok_4.htm.",2019-04-06 +34546290,ATPdock: a template-based method for ATP-specific protein-ligand docking. ,"Accurately identifying protein-ATP binding poses is significantly valuable for both basic structure biology and drug discovery. Although many docking methods have been designed, most of them require a user-defined binding site and are difficult to achieve a high-quality protein-ATP docking result. It is critical to develop a protein-ATP-specific blind docking method without user-defined binding sites. Here, we present ATPdock, a template-based method for docking ATP into protein. For each query protein, if no pocket site is given, ATPdock first identifies its most potential pocket using ATPbind, an ATP-binding site predictor; then, the template pocket, which is most similar to the given or identified pocket, is searched from the database of pocket-ligand structures using APoc, a pocket structural alignment tool; thirdly, the rough docking pose of ATP (rdATP) is generated using LS-align, a ligand structural alignment tool, to align the initial ATP pose to the template ligand corresponding to template pocket; finally, the Metropolis Monte Carlo simulation is used to fine-tune the rdATP under the guidance of AutoDock Vina energy function. Benchmark tests show that ATPdock significantly outperforms other state-of-the-art methods in docking accuracy. https://jun-csbio.github.io/atpdock/. Supplementary data are available at Bioinformatics online.",2021-09-21 +33993461,TUPDB: Target-Unrelated Peptide Data Bank.,"The isolation of target-unrelated peptides (TUPs) through biopanning remains as a major problem of phage display selection experiments. These TUPs do not have any actual affinity toward targets of interest, which tend to be mistakenly identified as target-binding peptides. Therefore, an information portal for storing TUP data is urgently needed. Here, we present a TUP data bank (TUPDB), which is a comprehensive, manually curated database of approximately 73 experimentally verified TUPs and 1963 potential TUPs collected from TUPScan, the BDB database, and public research articles. The TUPScan tool has been integrated in TUPDB to facilitate TUP analysis. We believe that TUPDB can help identify and remove TUPs in future reports in the biopanning community. The database is of great importance to improving the quality of phage display-based epitope mapping and promoting the development of vaccines, diagnostics, and therapeutics. The TUPDB database is available at http://i.uestc.edu.cn/tupdb .",2021-05-16 +33216795,Exploring options for reprocessing of N95 Filtering Facepiece Respirators (N95-FFRs) amidst COVID-19 pandemic: A systematic review.,"

Background

There is global shortage of Personal Protective Equipment due to COVID-19 pandemic. N95 Filtering Facepiece Respirators (N95-FFRs) provide respiratory protection against respiratory pathogens including SARS-CoV-2. There is scant literature on reprocessing methods which can enable reuse of N95-FFRs.

Aim

We conducted this study to evaluate research done, prior to COVID-19 pandemic, on various decontamination methods for reprocessing of N95-FFRs.

Methods

We searched 5 electronic databases (Pubmed, Google Scholar, Crossref, Ovid, ScienceDirect) and 1 Grey literature database (OpenGrey). We included original studies, published prior to year 2020, which had evaluated any decontamination method on FFRs. Studies had evaluated a reprocessing method against parameters namely physical changes, user acceptability, respirator fit, filter efficiency, microbicidal efficacy and presence of chemical residues post-reprocessing.

Findings and conclusions

Overall, we found 7887 records amongst which 17 original research articles were finally included for qualitative analysis. Overall, 21 different types of decontamination or reprocessing methods for N95-FFRs were evaluated. Most commonly evaluated method for reprocessing of FFRs was Ultraviolet (Type-C) irradiation (UVGI) which was evaluated in 13/17 (76%) studies. We found published literature was scant on this topic despite warning signs of pandemic of a respiratory illness over the years. Promising technologies requiring expeditious evaluation are UVGI, Microwave generated steam (MGS) and based on Hydrogen peroxide vapor. Global presence of technologies, which have been given Emergency use authorisation for N95-FFR reprocessing, is extremely limited. Reprocessing of N95-FFRs by MGS should be considered for emergency implementation in resource limited settings to tackle shortage of N95-FFRs.

Systematic review identifier

PROSPERO, PROSPERO ID: CRD42020189684, (https://www.crd.york.ac.uk/prospero/display_record.php?ID=CRD42020189684).",2020-11-20 +34922446,GraphOmics: an interactive platform to explore and integrate multi-omics data.,"

Background

An increasing number of studies now produce multiple omics measurements that require using sophisticated computational methods for analysis. While each omics data can be examined separately, jointly integrating multiple omics data allows for deeper understanding and insights to be gained from the study. In particular, data integration can be performed horizontally, where biological entities from multiple omics measurements are mapped to common reactions and pathways. However, data integration remains a challenge due to the complexity of the data and the difficulty in interpreting analysis results.

Results

Here we present GraphOmics, a user-friendly platform to explore and integrate multiple omics datasets and support hypothesis generation. Users can upload transcriptomics, proteomics and metabolomics data to GraphOmics. Relevant entities are connected based on their biochemical relationships, and mapped to reactions and pathways from Reactome. From the Data Browser in GraphOmics, mapped entities and pathways can be ranked, sorted and filtered according to their statistical significance (p values) and fold changes. Context-sensitive panels provide information on the currently selected entities, while interactive heatmaps and clustering functionalities are also available. As a case study, we demonstrated how GraphOmics was used to interactively explore multi-omics data and support hypothesis generation using two complex datasets from existing Zebrafish regeneration and Covid-19 human studies.

Conclusions

GraphOmics is fully open-sourced and freely accessible from https://graphomics.glasgowcompbio.org/ . It can be used to integrate multiple omics data horizontally by mapping entities across omics to reactions and pathways. Our demonstration showed that by using interactive explorations from GraphOmics, interesting insights and biological hypotheses could be rapidly revealed.",2021-12-18 +30371892,DrugCentral 2018: an update.,"DrugCentral is a drug information resource (http://drugcentral.org) open to the public since 2016 and previously described in the 2017 Nucleic Acids Research Database issue. Since the 2016 release, 103 new approved drugs were updated. The following new data sources have been included: Food and Drug Administration (FDA) Adverse Event Reporting System (FAERS), FDA Orange Book information, L1000 gene perturbation profile distance/similarity matrices and estimated protonation constants. New and existing entries have been updated with the latest information from scientific literature, drug labels and external databases. The web interface has been updated to display and query new data. The full database dump and data files are available for download from the DrugCentral website.",2019-01-01 +34585726,EpiSurf: metadata-driven search server for analyzing amino acid changes within epitopes of SARS-CoV-2 and other viral species. ,"EpiSurf is a Web application for selecting viral populations of interest and then analyzing how their amino acid changes are distributed along epitopes. Viral sequences are searched within ViruSurf, which stores curated metadata and amino acid changes imported from the most widely used deposition sources for viral databases (GenBank, COVID-19 Genomics UK (COG-UK) and Global initiative on sharing all influenza data (GISAID)). Epitopes are searched within the open source Immune Epitope Database or directly proposed by users by indicating their start and stop positions in the context of a given viral protein. Amino acid changes of selected populations are joined with epitopes of interest; a result table summarizes, for each epitope, statistics about the overlapping amino acid changes and about the sequences carrying such alterations. The results may also be inspected by the VirusViz Web application; epitope regions are highlighted within the given viral protein, and changes can be comparatively inspected. For sequences mutated within the epitope, we also offer a complete view of the distribution of amino acid changes, optionally grouped by the location, collection date or lineage. Thanks to these functionalities, EpiSurf supports the user-friendly testing of epitope conservancy within selected populations of interest, which can be of utmost relevance for designing vaccines, drugs or serological assays. EpiSurf is available at two endpoints. Database URL: http://gmql.eu/episurf/ (for searching GenBank and COG-UK sequences) and http://gmql.eu/episurf_gisaid/ (for GISAID sequences).",2021-09-29 +34013639,In silico prediction of drug-induced ototoxicity using machine learning and deep learning methods.,"Drug-induced ototoxicity has become a serious global problem, because of leading to deafness in hundreds of thousands of people every year. It always results from exposure to drugs or environmental chemicals that cause the impairment and degeneration of the inner ear. Herein, we focused on the in silico modeling of drug-induced ototoxicity of chemicals. We collected 1,102 ototoxic medications and 1,705 non-ototoxic drugs. Based on the data set, a series of computational models were developed with different traditional machine learning and deep learning algorithms implemented on an online chemical database and modeling environment. Six ML models performed best on 5-fold cross-validation and test set. A consensus model was developed with the best individual models. These models were further validated with an external validation. The consensus model showed best predictive ability, with high accuracy of 0.95 on test set and 0.90 on validation set. The consensus model and the data sets used for model development are available at https://ochem.eu/model/46566321. Besides, 16 structural alerts responsible for drug-induced ototoxicity were identified. We hope the results could provide meaningful knowledge and useful tools for ototoxicity evaluation in drug discovery and environmental risk assessment.",2021-06-07 +34130646,Alternative splicing associated with cancer stemness in kidney renal clear cell carcinoma.,"

Backgroud

Cancer stemness is associated with metastases in kidney renal clear cell carcinoma (KIRC) and negatively correlates with immune infiltrates. Recent stemness evaluation methods based on the absolute expression have been proposed to reveal the relationship between stemness and cancer. However, we found that existing methods do not perform well in assessing the stemness of KIRC patients, and they overlooked the impact of alternative splicing. Alternative splicing not only progresses during the differentiation of stem cells, but also changes during the acquisition of the stemness features of cancer stem cells. There is an urgent need for a new method to predict KIRC-specific stemness more accurately, so as to provide help in selecting treatment options.

Methods

The corresponding RNA-Seq data were obtained from the The Cancer Genome Atlas (TCGA) data portal. We also downloaded stem cell RNA sequence data from the Progenitor Cell Biology Consortium (PCBC) Synapse Portal. Independent validation sets with large sample size and common clinic pathological characteristics were obtained from the Gene Expression Omnibus (GEO) database. we constructed a KIRC-specific stemness prediction model using an algorithm called one-class logistic regression based on the expression and alternative splicing data to predict stemness indices of KIRC patients, and the model was externally validated. We identify stemness-associated alternative splicing events (SASEs) by analyzing different alternative splicing event between high- and low- stemness groups. Univariate Cox and multivariable logistic regression analysisw as carried out to detect the prognosis-related SASEs respectively. The area under curve (AUC) of receiver operating characteristic (ROC) was performed to evaluate the predictive values of our model.

Results

Here, we constructed a KIRC-specific stemness prediction model with an AUC of 0.968,and to provide a user-friendly interface of our model for KIRC stemness analysis, we have developed KIRC Stemness Calculator and Visualization (KSCV), hosted on the Shiny server, can most easily be accessed via web browser and the url https://jiang-lab.shinyapps.io/kscv/ . When applied to 605 KIRC patients, our stemness indices had a higher correlation with the gender, smoking history and metastasis of the patients than the previous stemness indices, and revealed intratumor heterogeneity at the stemness level. We identified 77 novel SASEs by dividing patients into high- and low- stemness groups with significantly different outcome and they had significant correlations with expression of 17 experimentally validated splicing factors. Both univariate and multivariate survival analysis demonstrated that SASEs closely correlated with the overall survival of patients.

Conclusions

Basing on the stemness indices, we found that not only immune infiltration but also alternative splicing events showed significant different at the stemness level. More importantly, we highlight the critical role of these differential alternative splicing events in poor prognosis, and we believe in the potential for their further translation into targets for immunotherapy.",2021-06-15 +33367130,"An adaptive, interacting, cluster-based model for predicting the transmission dynamics of COVID-19.","The SARS-CoV-2 driven disease COVID-19 is pandemic with increasing human and monetary costs. COVID-19 has put an unexpected and inordinate degree of pressure on healthcare systems of strong and fragile countries alike. To launch both containment and mitigation measures, each country requires estimates of COVID-19 incidence as such preparedness allows agencies to plan efficient resource allocation and to design control strategies. Here, we have developed a new adaptive, interacting, and cluster-based mathematical model to predict the granular trajectory of COVID-19. We have analyzed incidence data from three currently afflicted countries of Italy, the United States of America, and India. We show that our approach predicts state-wise COVID-19 spread for each country with reasonable accuracy. We show that Rt, as the effective reproduction number, exhibits significant spatial variations in these countries. However, by accounting for the spatial variation of Rt in an adaptive fashion, the predictive model provides estimates of the possible asymptomatic and undetected COVID-19 cases, both of which are key contributors in COVID-19 transmission. We have applied our methodology to make detailed predictions for COVID19 incidences at the district and state level in India. Finally, to make the models available to the public at large, we have developed a web-based dashboard, namely ""Predictions and Assessment of Corona Infections and Transmission in India"" (PRACRITI, see http://pracriti.iitd.ac.in), which provides the detailed Rt values and a three-week forecast of COVID cases.",2020-12-14 +34582218,Human Blood and Bird Egg Proteins Identified in Red Paint Covering a 1000-Year-Old Gold Mask from Peru.,"We analyzed a red paint sample from the surface of a gold mask excavated from a Middle Sicán elite tomb in Peru. The mask covered the face of the principal male and dates from ca. 1000 AD, a period when many painted precious metal objects were produced. The paint's inorganic pigment was identified more than 30 years ago as cinnabar (a mercuric sulfide scarlet-red to brown-red mineral), but the identity of the effective organic binder remained a mystery. Fourier transform infrared (FTIR) analysis of the sample indicated a proteinaceous composition, and no lipids were recovered from an N,O-bis(trimethylsilyl)trifluoroacetamide (BSTFA) derivatized extract of the sample analyzed by gas chromatography-mass spectrometry (GC-MS). Proteomics analysis by nanoLC-MS/MS identified unique peptides in the sample, which were matched to human blood and bird egg proteins via Uniprot database searches. These included immunoglobulin heavy chain, immunoglobulin G, serum albumin, and ovomucoid. Cinnabar-based paints were typically used in the context of social elites and ritually important items. The presence of human blood would support previous ideas that red cinnabar paint may represent ""life force"" intended to support ""rebirth"". As the red paint sample came from the first scientifically excavated Sicán gold mask, the results suggest a method to authenticate similar unprovenanced masks now in private and museum collections. Proteomics data set identifier https://doi.org/10.5287/bodleian:1ajYbBgQP.",2021-09-28 +32367112,MACSNVdb: a high-quality SNV database for interspecies genetic divergence investigation among macaques. ,"Macaques are the most widely used non-human primates in biomedical research. The genetic divergence between these animal models is responsible for their phenotypic differences in response to certain diseases. However, the macaque single nucleotide polymorphism resources mainly focused on rhesus macaque (Macaca mulatta), which hinders the broad research and biomedical application of other macaques. In order to overcome these limitations, we constructed a database named MACSNVdb that focuses on the interspecies genetic diversity among macaque genomes. MACSNVdb is a web-enabled database comprising ~74.51 million high-quality non-redundant single nucleotide variants (SNVs) identified among 20 macaque individuals from six species groups (muttla, fascicularis, sinica, arctoides, silenus, sylvanus). In addition to individual SNVs, MACSNVdb also allows users to browse and retrieve groups of user-defined SNVs. In particular, users can retrieve non-synonymous SNVs that may have deleterious effects on protein structure or function within macaque orthologs of human disease and drug-target genes. Besides position, alleles and flanking sequences, MACSNVdb integrated additional genomic information including SNV annotations and gene functional annotations. MACSNVdb will facilitate biomedical researchers to discover molecular mechanisms of diverse responses to diseases as well as primatologist to perform population genetic studies. We will continue updating MACSNVdb with newly available sequencing data and annotation to keep the resource up to date. Database URL: http://big.cdu.edu.cn/macsnvdb/.",2020-01-01 +34406119,"Multi-syndrome, multi-gene risk modeling for individuals with a family history of cancer with the novel R package PanelPRO.","Identifying individuals who are at high risk of cancer due to inherited germline mutations is critical for effective implementation of personalized prevention strategies. Most existing models focus on a few specific syndromes; however, recent evidence from multi-gene panel testing shows that many syndromes are overlapping, motivating the development of models that incorporate family history on several cancers and predict mutations for a comprehensive panel of genes.We present PanelPRO, a new, open-source R package providing a fast, flexible back-end for multi-gene, multi-cancer risk modeling with pedigree data. It includes a customizable database with default parameter values estimated from published studies and allows users to select any combinations of genes and cancers for their models, including well-established single syndrome BayesMendel models (BRCAPRO and MMRPRO). This leads to more accurate risk predictions and ultimately has a high impact on prevention strategies for cancer and clinical decision making. The package is available for download for research purposes at https://projects.iq.harvard.edu/bayesmendel/panelpro.",2021-08-18 +31598709,DNMIVD: DNA methylation interactive visualization database.,"Aberrant DNA methylation plays an important role in cancer progression. However, no resource has been available that comprehensively provides DNA methylation-based diagnostic and prognostic models, expression-methylation quantitative trait loci (emQTL), pathway activity-methylation quantitative trait loci (pathway-meQTL), differentially variable and differentially methylated CpGs, and survival analysis, as well as functional epigenetic modules for different cancers. These provide valuable information for researchers to explore DNA methylation profiles from different aspects in cancer. To this end, we constructed a user-friendly database named DNA Methylation Interactive Visualization Database (DNMIVD), which comprehensively provides the following important resources: (i) diagnostic and prognostic models based on DNA methylation for multiple cancer types of The Cancer Genome Atlas (TCGA); (ii) meQTL, emQTL and pathway-meQTL for diverse cancers; (iii) Functional Epigenetic Modules (FEM) constructed from Protein-Protein Interactions (PPI) and Co-Occurrence and Mutual Exclusive (COME) network by integrating DNA methylation and gene expression data of TCGA cancers; (iv) differentially variable and differentially methylated CpGs and differentially methylated genes as well as related enhancer information; (v) correlations between methylation of gene promoter and corresponding gene expression and (vi) patient survival-associated CpGs and genes with different endpoints. DNMIVD is freely available at http://www.unimd.org/dnmivd/. We believe that DNMIVD can facilitate research of diverse cancers.",2020-01-01 +,genodive version 3.0: Easy‐to‐use software for the analysis of genetic data of diploids and polyploids,"genodive version 3.0 is a user‐friendly program for the analysis of population genetic data. This version presents a major update from the previous version and now offers a wide spectrum of different types of analyses. genodive has an intuitive graphical user interface that allows direct manipulation of the data through transformation, imputation of missing data, and exclusion and inclusion of individuals, population and/or loci. Furthermore, genodive seamlessly supports 15 different file formats for importing or exporting data from or to other programs. One major feature of genodive is that it supports both diploid and polyploid data, up to octaploidy (2n = 8x) for some analyses, but up to hexadecaploidy (2n = 16x) for other analyses. The different types of analyses offered by genodive include multiple statistics for estimating population differentiation (φST, FST, FʹST, GST, GʹST, GʹʹST, Dₑₛₜ, RST, ρ), analysis of molecular variance‐based K‐means clustering, Hardy–Weinberg equilibrium, hybrid index, population assignment, clone assignment, Mantel test, Spatial Autocorrelation, 23 ways of calculating genetic distances, and both principal components and principal coordinates analyses. A unique feature of genodive is that it can also open data sets with nongenetic variables, for example environmental data or geographical coordinates that can be included in the analysis. In addition, genodive makes it possible to run several external programs (lfmm, structure, instruct and vegan) directly from its own user interface, avoiding the need for data reformatting and use of the command line. genodive is available for computers running Mac OS X 10.7 or higher and can be downloaded freely from: http://www.patrickmeirmans.com/software.",2020-07-01 +33985433,IPD 2.0: To derive insights from an evolving SARS-CoV-2 genome.,"

Background

Rapid analysis of SARS-CoV-2 genomic data plays a crucial role in surveillance and adoption of measures in controlling spread of Covid-19. Fast, inclusive and adaptive methods are required for the heterogenous SARS-CoV-2 sequence data generated at an unprecedented rate.

Results

We present an updated version of the SARS-CoV-2 analysis module of our automated computational pipeline, Infectious Pathogen Detector (IPD) 2.0, to perform genomic analysis to understand the variability and dynamics of the virus. It adopts the recent clade nomenclature and demonstrates the clade prediction accuracy of 92.8%. IPD 2.0 also contains a SARS-CoV-2 updater module, allowing automatic upgrading of the variant database using genome sequences from GISAID. As a proof of principle, analyzing 208,911 SARS-CoV-2 genome sequences, we generate an extensive database of 2.58 million sample-wise variants. A comparative account of lineage-specific mutations in the newer SARS-CoV-2 strains emerging in the UK, South Africa and Brazil and data reported from India identify overlapping and lineages specific acquired mutations suggesting a repetitive convergent and adaptive evolution.

Conclusions

A novel and dynamic feature of the SARS-CoV-2 module of IPD 2.0 makes it a contemporary tool to analyze the diverse and growing genomic strains of the virus and serve as a vital tool to help facilitate rapid genomic surveillance in a population to identify variants involved in breakthrough infections. IPD 2.0 is freely available from http://www.actrec.gov.in/pi-webpages/AmitDutt/IPD/IPD.html and the web-application is available at http://ipd.actrec.gov.in/ipdweb/ .",2021-05-13 +,Paracrine Signalling From SOX2-Expressing Pituitary Embryonic Cells Is Required for Terminal Differentiation of Hormone-Producing Cells,"Abstract The pituitary gland is the master regulator of the endocrine system, housing six major hormone producing cell types. This gland is derived from Rathke’s Pouch, an invagination of the oral ectoderm. Hormone-producing pituitary cell lineages are derived from a population of embryonic cells expressing SOX2. ZFP36L1/Butyrate Response Factor 1 (BRF1) is an RNA binding protein that binds and targets mRNAs of various cytokines and chemokines for degradation prior to translation, attenuating secretion of inflammatory factors (Herranz et al. 2015). Here, we show that BRF1 is a novel marker expressed in SOX2+ cells in human and mouse pituitaries, suggesting that these cells may have a secretory profile. To investigate this possibility, we have combined molecular and genetic studies in vivo. We have used a novel mouse model, R26lsl-mBRF1 that allows the expression of a mutant, constitutively active BRF1 protein upon Cre-mediated recombination, alongside our lab’s models (Hesx1Cre/+ and Sox2CreERT2/+), to express mutant BRF1 in HESX1+ and SOX2+ cells during development and postnatally. This approach results in pituitary hypoplasia and severe hypopituitarism due to a failure of cell-lineage specified cells to differentiate into hormone-producing cells. Hormone production in these mutant cells, however, can be rescued in vitro through co-culture with WT pituitaries and in vivo in chimeric pituitaries, highlighting a cell non-autonomous mechanism underlying the phenotype. Single cell RNA sequencing of WT and Sox2CreERT2/+;R26lsl-mBRF1 murine embryonic pituitaries, as well as use publicly available human pituitary single cell datasets, have allowed us to identify specific cytokines and chemokines secreted by SOX2+ cells, as well as downstream intracellular signalling pathways in differentiating cells (Zhang et al. 2020), which may be responsible for controlling terminal differentiation of hormone-producing cells within the developing pituitary. Together with our recently published data, these results support the notion that SOX2+ pituitary stem cells play a critical paracrine role in controlling progenitor cell proliferation and terminal differentiation (Russell et al. 2021). References: Herranz, Nicolás et al. 2015. “MTOR Regulates MAPKAPK2 Translation to Control the Senescence-Associated Secretory Phenotype.” Nature Cell Biology 17(9): 1205–17. http://www.nature.com/doifinder/10.1038/ncb3225. Russell, John P et al. 2021. “Pituitary Stem Cells Produce Paracrine WNT Signals to Control the Expansion of Their Descendant Progenitor Cells.” eLife. Zhang, Shu et al. 2020. “Single-Cell Transcriptomics Identifies Divergent Developmental Lineage Trajectories during Human Pituitary Development.” Nature Communications.",2021-05-03 +34878878,COVID-19 Pandemic Impact on the National Health Care Surveys.,"While underscoring the need for timely, nationally representative data in ambulatory, hospital, and long-term-care settings, the COVID-19 pandemic posed many challenges to traditional methods and mechanisms of data collection. To continue generating data from health care and long-term-care providers and establishments in the midst of the COVID-19 pandemic, the National Center for Health Statistics had to modify survey operations for several of its provider-based National Health Care Surveys, including quickly adding survey questions that captured the experiences of providing care during the pandemic. With the aim of providing information that may be useful to other health care data collection systems, this article presents some key challenges that affected data collection activities for these national provider surveys, as well as the measures taken to minimize the disruption in data collection and to optimize the likelihood of disseminating quality data in a timely manner. (Am J Public Health. 2021;111(12):2141-2148. https://doi.org/10.2105/AJPH.2021.306514).",2021-12-01 +34080131,"Preclinical Western Blot in the Era of Digital Transformation and Reproducible Research, an Eastern Perspective.","The current research is an interdisciplinary endeavor to develop a necessary tool in preclinical protein studies of diseases or disorders through western blotting. In the era of digital transformation and open access principles, an interactive cloud-based database called East-West Blot ( https://rancs-lab.shinyapps.io/WesternBlots ) is designed and developed. The online interactive subject-specific database built on the R shiny platform facilitates a systematic literature search on the specific subject matter, here set to western blot studies of protein regulation in the preclinical model of TBI. The tool summarizes the existing publicly available knowledge through a data visualization technique and easy access to the critical data elements and links to the study itself. The application compiled a relational database of PubMed-indexed western blot studies labeled under HHS public access, reporting downstream protein regulations presented by fluid percussion injury model of traumatic brain injury. The promises of the developed tool include progressing toward implementing the principles of 3Rs (replacement, reduction, and refinement) for humane experiments, cultivating the prerequisites of reproducible research in terms of reporting characteristics, paving the ways for a more collaborative experimental design in basic science, and rendering an up-to-date and summarized perspective of current publicly available knowledge.",2021-06-02 +,A bioinformatics approach revealed the transcription factors of Helicobacter pylori pathogenic genes and their regulatory network nodes,"Helicobacter pylori is a chronic pathogenic bacteria that causes gastric mucosal damage through various host-related and pathogen-related factors. Thus, a single gene research cannot fully explain its pathogenicity. Purpose of the study: It is necessary to establish a Helicobacter pylori pathogenic gene transcription factor regulatory network (TFRN) and study its central nodes.The expression data of Helicobacter pylori pathogenic genes were obtained through GEO Datasets of NCBI. The genes were screened using linear model-empirical Bayesian statistics in R language Limma package combined with the conventional t-test; the results identified 1231 differentially expressed genes. The functional analysis (gene ontology-analysis) and signal pathway analysis (pathway-analysis) of differentially expressed genes were performed using the DAVID and KEGG databases, respectively. The pathogenic gene regulatory network was constructed by integrating transcriptional regulatory element database (TRED); the disease-related analysis of the pathogenic genes was conducted using the DAVID annotation tool. Five pathogenic genes (Nos2, Il5, Colla1, Tnf, and Nfkb1) and their transcription factors (Jun, Cebpa, Egrl, Ppara, and Il6) were found to suppress the host immune function and enhance the pathogenicity of Helicobacter pylori by regulating the host immune system.This effect was largely mediated via three signaling pathways: Tnf pathway, PI3K Akt pathway, and Jak–STAT pathway. The pathogenicity of Helicobacter pylori is closely related to the body's immune and inflammatory system. A better understanding of the correlation of the pathogenic factors with the host immune and inflammatory factors may help to determine the precise pathogenic mechanism of H. pylori infection.How to cite: Bai Y, Li W, Xu G, et al. A bioinformatics approach revealed the transcription factors of Helicobacter pylori pathogenic genes and their regulatory network nodes. Electron J Biotechnol 2020;45. https://doi.org/10.1016/j.ejbt.2020.04.002.",2020-05-01 +34890448,Linking big biomedical datasets to modular analysis with Portable Encapsulated Projects. ,"Organizing and annotating biological sample data is critical in data-intensive bioinformatics. Unfortunately, metadata formats from a data provider are often incompatible with requirements of a processing tool. There is no broadly accepted standard to organize metadata across biological projects and bioinformatics tools, restricting the portability and reusability of both annotated datasets and analysis software. To address this, we present the Portable Encapsulated Project (PEP) specification, a formal specification for biological sample metadata structure. The PEP specification accommodates typical features of data-intensive bioinformatics projects with many biological samples. In addition to standardization, the PEP specification provides descriptors and modifiers for project-level and sample-level metadata, which improve portability across both computing environments and data processing tools. PEPs include a schema validator framework, allowing formal definition of required metadata attributes for data analysis broadly. We have implemented packages for reading PEPs in both Python and R to provide a language-agnostic interface for organizing project metadata. The PEP specification is an important step toward unifying data annotation and processing tools in data-intensive biological research projects. Links to tools and documentation are available at http://pep.databio.org/.",2021-12-01 +33721918,Novel perspectives for SARS-CoV-2 genome browsing.,"SARS-CoV-2 has spread worldwide and caused social, economic, and health turmoil. The first genome assembly of SARS-CoV-2 was produced in Wuhan, and it is widely used as a reference. Subsequently, more than a hundred additional SARS-CoV-2 genomes have been sequenced. While the genomes appear to be mostly identical, there are variations. Therefore, an alignment of all available genomes and the derived consensus sequence could be used as a reference, better serving the science community. Variations are significant, but representing them in a genome browser can become, especially if their sequences are largely identical. Here we summarize the variation in one track. Other information not currently found in genome browsers for SARS-CoV-2, such as predicted miRNAs and predicted TRS as well as secondary structure information, were also added as tracks to the consensus genome. We believe that a genome browser based on the consensus sequence is better suited when considering worldwide effects and can become a valuable resource in the combating of COVID-19. The genome browser is available at http://cov.iaba.online.",2021-03-16 +31512145,ZIKAVID-Zika virus infection database: a new platform to analyze the molecular impact of Zika virus infection.,"The recent outbreak of Zika virus (ZIKV) in Brazil and other countries globally demonstrated the relevance of ZIKV studies. During and after this outbreak, there was an intense increase in scientific production on ZIKV infections, especially toward alterations promoted by the infection and related to clinical outcomes. Considering this massive amount of new data, mainly thousands of genes and proteins whose expression is impacted by ZIKV infection, the ZIKA Virus Infection Database (ZIKAVID) was created. ZIKAVID is an online database that comprises all genes or proteins, and associated information, for which expression was experimentally measured and found to be altered after ZIKV infection. The database, available at https://zikavid.org, contains 16,984 entries of gene expression measurements from a total of 7348 genes. It allows users to easily perform searches for different experimental hosts (cell lines, tissues, and animal models), ZIKV strains (African, Asian, and Brazilian), and target molecules (messenger RNA [mRNA] and protein), among others, used in differential expression studies regarding ZIKV infection. In this way, the ZIKAVID will serve as an additional and important resource to improve the characterization of the molecular impact and pathogenesis associated with ZIKV infection.",2019-09-11 +,DOES GEOGRAPHICAL REGION IMPACT PARENTS’ PERCEPTIONS TOWARDS YOUTH SPORT SPECIALIZATION?,"

Background:

Two billion U.S. healthcare dollars are spent on youth sports injuries each year. This staggering figure is projected to increase as athletes are sustaining more sport-related injuries and are specializing in a single sport at younger ages than ever before. Sport specialization is a modifiable injury risk factor unique to youth athlete populations. Parents are a group of key stakeholders whose knowledge and beliefs likely impact youth sport participation, however a paucity of research exists in this area.

Purpose:

To assess the differences in perceptions of parents on youth sport participation and specialization based on geographical region.

Methods:

Parents of children, ages 8-18 years, who participate in organized sports were surveyed for this study. Electronic surveys with a total of 40 questions were circulated via team/organization mailing lists and social media. Geographic regions were described as either South or North based on divisions from the U.S. Census Bureau. Sport specialization was defined as an athlete meeting >2 of the following 3 criteria: participates in one sport to the exclusion of other sports, participates in sport-specific lessons, participates on >2 teams in a single sport. Descriptive statistics and chi-square analyses were used to compare perceptions of parents with specialized and non-specialized athletes across different geographical regions.

Results:

Three-hundred and seventy-one responses were collected (South=204, North=167). Parents from the South reported a higher number of specialized athletes as compared to parents from the North (54% vs 38%; P=0.003). Regardless of geographical region, most parents did not think specialization had a positive impact on a youth athlete’s development (P=0.307) or their future in that sport (P=0.086) (Figures 1 & 2). Parents from the South did select ‘Potential to earn a college scholarship’ as a motivator for youth sport specialization more often when compared to parents from the North (30% vs 17%; P=0.002).

Conclusion:

Regional differences exist in the percentage of specialized youth athletes participating in organized sport. Parental knowledge of the risks and benefits of youth sport specialization does not appear to drive these observed differences. Earning a college scholarship was selected more frequently as a reason for specialization in the South, indicating that societal or socioeconomic factors may be present. Further research is needed to determine the underlying factors driving youth sport specialization in the U.S.

Figures:

Figure 1.Figure 2.

References:

Bell DR PE, Trigsted SM, Hetzel S, McGuine TA and Brooks MA. Prevalence of Sport Specialization in High School Athletics: A 1-Year Observational Study. Am J Sports Med. 2016;44(6):1469-1474. Bell DR, Post EG, Trigsted SM, Schaefer DA, McGuine TA, Brooks MA. Parents’ Awareness and Perceptions of Sport Specialization and Injury Prevention Recommendations. Clin J Sport Med. 2018. Brooks MA, Post EG, Trigsted SM, et al. Knowledge, Attitudes, and Beliefs of Youth Club Athletes Toward Sport Specialization and Sport Participation. Orthop J Sports Med. 2018;6(5). Gregory S. How Kids’ Sports became a $15 Billion Industry. TIME. 2017;190(9). Jayanthi NA, LaBella CR, Fischer D, Pasulka J, Dugas LR. Sports-specialized intensive training and the risk of injury in young athletes: a clinical case-control study. Am J Sports Med. 2015;43(4):794-801. LaPrade RF, Agel J, Baker J, et al. AOSSM Early Sport Specialization Consensus Statement. Orthop J Sports Med. 2016;4(4). Malina RM. Early sport specialization: roots, effectiveness, risks. Curr Sports Med Rep. 2010;9(6):364-371. McGuine TA, Post EG, Hetzel SJ, Brooks MA, Trigsted S, Bell DR. A Prospective Study on the Effect of Sport Specialization on Lower Extremity Injury Rates in High School Athletes. Am J Sports Med. 2017;45(12): 2706-2712. Myer GD, Jayanthi N, Difiori JP, et al. Sport Specialization, Part I: Does Early Sports Specialization Increase Negative Outcomes and Reduce the Opportunity for Success in Young Athletes? Sports Health.2015;7(5):437-442. Pasulka J, Jayanthi N, McCann A, Dugas LR, LaBella C. Specialization patterns across various youth sports and relationship to injury risk. Phys Sportsmed. 2017;45(3):344-352. Post EG, Trigsted SM, Riekena JW, et al. The Association of Sport Specialization and Training Volume With Injury History in Youth Athletes. Am J Sports Med. 2017;45(6):1405-1412. U.S. Census Bureau. Census Bureau Regions and Divisions with State FIPS Codes. https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf. Accessed June 26, 2020.",2021-07-01 +31372596,Accelerating structure-function mapping using the ViVa webtool to mine natural variation.,"Thousands of sequenced genomes are now publicly available capturing a significant amount of natural variation within plant species; yet, much of these data remain inaccessible to researchers without significant bioinformatics experience. Here, we present a webtool called ViVa (Visualizing Variation) which aims to empower any researcher to take advantage of the amazing genetic resource collected in the Arabidopsis thaliana 1001 Genomes Project (http://1001genomes.org). ViVa facilitates data mining on the gene, gene family, or gene network level. To test the utility and accessibility of ViVa, we assembled a team with a range of expertise within biology and bioinformatics to analyze the natural variation within the well-studied nuclear auxin signaling pathway. Our analysis has provided further confirmation of existing knowledge and has also helped generate new hypotheses regarding this well-studied pathway. These results highlight how natural variation could be used to generate and test hypotheses about less-studied gene families and networks, especially when paired with biochemical and genetic characterization. ViVa is also readily extensible to databases of interspecific genetic variation in plants as well as other organisms, such as the 3,000 Rice Genomes Project ( http://snp-seek.irri.org/) and human genetic variation ( https://www.ncbi.nlm.nih.gov/clinvar/).",2019-07-26 +31599098,PSMD: An extensive database for pan-species microsatellite investigation and marker development.,"Microsatellites are widely distributed throughout nearly all genomes which have been extensively exploited as powerful genetic markers for diverse applications due to their high polymorphisms. Their length variations are involved in gene regulation and implicated in numerous genetic diseases even in cancers. Although much effort has been devoted in microsatellite database construction, the existing microsatellite databases still had some drawbacks, such as limited number of species, unfriendly export format, missing marker development, lack of compound microsatellites and absence of gene annotation, which seriously restricted researchers to perform downstream analysis. In order to overcome the above limitations, we developed PSMD (Pan-Species Microsatellite Database, http://big.cdu.edu.cn/psmd/) as a web-based database to facilitate researchers to easily identify microsatellites, exploit reliable molecular markers and compare microsatellite distribution pattern on genome-wide scale. In current release, PSMD comprises 678,106,741 perfect microsatellites and 43,848,943 compound microsatellites from 18,408 organisms, which covered almost all species with available genomic data. In addition to interactive browse interface, PSMD also offers a flexible filter function for users to quickly gain desired microsatellites from large data sets. PSMD allows users to export GFF3 formatted file and CSV formatted statistical file for downstream analysis. We also implemented an online tool for analysing occurrence of microsatellites with user-defined parameters. Furthermore, Primer3 was embedded to help users to design high-quality primers with customizable settings. To our knowledge, PSMD is the most extensive resource which is likely to be adopted by scientists engaged in biological, medical, environmental and agricultural research.",2019-10-28 +31091262,Mutplot: An easy-to-use online tool for plotting complex mutation data with flexibility.,"With the development of technology, an enormous amount of sequencing data is being generated rapidly. However, transforming this data into patient care is a critical challenge. There are two difficulties: how to integrate functional information into mutation interpretation and how to make the integration easy to apply. One solution is to visualize amino acid changes with protein structure and function in web app platform. There are multiple existing tools for plotting mutations, but the majority of them requires programming skills that are not common background for clinicians or researchers. Furthermore, the recurrent mutations are the focus and the recurrence cutoff varies. Yet, none of the current software offers customer-defined cutoff. Thus, we developed this user-friendly web-based tool, Mutplot (https://bioinformaticstools.shinyapps.io/lollipop/). Mutplot retrieves up-to-date domain information from the protein resource UniProt (https://www.uniprot.org/), integrates the submitted mutation information and produces lollipop diagrams with annotations and highlighted candidates. It offers flexible output options. For data that follows security standards, the app can also be hosted in web servers inside a firewall or computers without internet with Uniprot database stored on them. Altogether, Mutplot is an excellent tool for visualizing protein mutations, especially for clinicians or researchers without any bioinformatics background.",2019-05-15 +33926352,Exploratory research on asthma exacerbation risk factors using the Japanese claims database and machine learning: a retrospective cohort study.,"

Objective

Analytical studies of risk factor assessment using machine learning have recently been reported. We performed an exploratory detection study of asthma exacerbation-related factors using health insurance claims data and machine learning to explore risk factors that have high generalizability and can be easily obtained in daily practice.

Methods

A dataset of asthma patients during May 2014-April 2019 from the Japanese insurance claims database, MediScope® (DB) was used. Patient characteristics and disease information were extracted, and association with occurrence of asthma exacerbation was evaluated to comprehensively search for exacerbation risk factors. Asthma exacerbations were defined as the co-occurrence of emergency medical procedures, such as emergency transport and intravenous steroid injections, with asthma claims, which were recorded in the database.

Results

In total, 5,844 (13.7%) subjects had exacerbations in 42,685 eligible cases from the DB. Information on approximately 3,300 diseases was subjected to a machine learning, and 25 variables were extracted as variable importance and targeted for risk assessment. As a result, sex, days without exacerbation from cohort entry date at look-back period, Charlson Comorbidity Index, allergic rhinitis, chronic sinusitis, acute airway disease (upper airway), acute airway disease (lower airways), Chronic obstructive pulmonary disease/chronic bronchitis, gastroesophageal reflux disease, and hypertension were significantly associated with exacerbation. Dyslipidemia and periodontitis were detected as associated factors of reduced exacerbation risk.

Conclusions

A comprehensive analysis of claims data using machine learning showed asthma exacerbation risk factors mostly consistent with those in previous studies. Further examination in other fields is warranted.Supplemental data for this article is available online at https://doi.org/10.1080/02770903.2021.1923740 .",2021-05-18 +33991093,Challenges for FAIR-compliant description and comparison of crop phenotype data with standardized controlled vocabularies. ,"Crop phenotypic data underpin many pre-breeding efforts to characterize variation within germplasm collections. Although there has been an increase in the global capacity for accumulating and comparing such data, a lack of consistency in the systematic description of metadata often limits integration and sharing. We therefore aimed to understand some of the challenges facing findable, accesible, interoperable and reusable (FAIR) curation and annotation of phenotypic data from minor and underutilized crops. We used bambara groundnut (Vigna subterranea) as an exemplar underutilized crop to assess the ability of the Crop Ontology system to facilitate curation of trait datasets, so that they are accessible for comparative analysis. This involved generating a controlled vocabulary Trait Dictionary of 134 terms. Systematic quantification of syntactic and semantic cohesiveness of the full set of 28 crop-specific COs identified inconsistencies between trait descriptor names, a relative lack of cross-referencing to other ontologies and a flat ontological structure for classifying traits. We also evaluated the Minimal Information About a Phenotyping Experiment and FAIR compliance of bambara trait datasets curated within the CropStoreDB schema. We discuss specifications for a more systematic and generic approach to trait controlled vocabularies, which would benefit from representation of terms that adhere to Open Biological and Biomedical Ontologies principles. In particular, we focus on the benefits of reuse of existing definitions within pre- and post-composed axioms from other domains in order to facilitate the curation and comparison of datasets from a wider range of crops. Database URL: https://www.cropstoredb.org/cs_bambara.html.",2021-05-01 +34527563,The relationship between autophagy-related genes and the staging and prognosis of thyroid cancer: a bioinformatics analysis.,"

Background

The number of patients with thyroid cancer is increasing. Autophagy is closely related to thyroid cancer. This study conducted a bioinformatics analysis to examine the relationship between autophagy-related genes and the prognosis of thyroid cancer.

Methods

Based on The Cancer Genome Atlas (TCGA) database, the standardized ribonucleic acid (RNA) sequencing data and corresponding clinical records of 497 patients were obtained. The gene set of autophagy-related genes was obtained from reactom [https://reactome.org/; gene set identification: (R-HSA-1632852)]. Based on the completeness of the sequencing and prognostic data, 135 effective genes were screened to form a gene set. A cluster analysis of the genetic expression of the whole genome was conducted. Different groups and subgroups were defined according to the clustering situation. The relationship between the expression levels of different autophagy-related genes and the clinical characteristics of thyroid cancer were analyzed.

Results

Patients were divided into 2 clusters and 4 subclusters. A comparison of the clinical parameters of the 2 clusters showed that there were differences in node (N)-stage, and a comparison of the 4 subclusters showed that there were differences in age and 4 other characteristics. In relation to the survival comparison, there was a difference in the disease-free survival (DFS) between the 2 clusters, and there was a difference in overall survival (OS) and DFS between subclusters. The 2 clusters had 114 differentially expressed genes (DEGs), and the 4 subclusters had 131 DEGs. In relation to the 5 different factors in each group, there were differences in the distribution of N0N1NX in clusters and subclusters, there were differences in the distribution of M0M1MX in subclusters, and there were differences in the distribution of age and the American Joint Committee on Cancer stage in subclusters. In relation to the stage/N stage/Metastasis (M) stage-related DEGs, 5 common genes were identified: EPAS1, ATG4A, BECN1, ATG4C, and PLIN3. In relation to the stage/N stage/M stage-related DEGs and age-related DEGs 1 common gene was identified: EPAS1.

Conclusions

Autophagy-related genes are related to the staging of thyroid cancer, but have no clear relationship with long-term prognosis.",2021-08-01 +,Modeling the Nutritional Impact of Adding Mushrooms to USDA Food Patterns,"Abstract

Objectives

The objective was to assess the nutritional impact of adding a serving of mushrooms in USDA Food Patterns (Healthy US-Style Food Patterns, Healthy Mediterranean-Style Patterns and Healthy Vegetarian Patterns) using a similar approach to that used by USDA for Dietary Guidelines.

Methods

A composite of commonly consumed raw mushrooms (white, brown/crimini and portabella; at 1:1:1 ratio), and raw specialty mushrooms (oyster mushrooms) were used for modeling. USDA Food Data Central database (https://fdc.nal.usda.gov/) was used to obtain nutrient profiles of mushrooms. Nutritional profiles of USDAs Food Patterns were obtained from the Scientific Report of the 2015 Dietary Guidelines Advisory Committee, Appendix E-3 (https://health.gov/dietaryguidelines/2015-scientific-report/15-appendix-E3/) and dietary modeling was accomplished by adding nutrients from mushrooms.

Results

Addition of a serving (84 g) of raw mushrooms to USDA Food Patterns (each at 2000 kcal levels) resulted in about 1% increase in calories, less than 5% increase in macronutrients, 2–6% increase in fiber, 9–11% increase in potassium, 14–15% increase in riboflavin, 13–26% increase in niacin, and 13–22% increase in copper in USDA Food Patterns. Addition of oyster mushroom also additionally increased 9–11% vitamin D and 12–14% choline in USDA Food Patterns. Mushrooms exposed to UV light to increase vitamin D levels to 200 IU/serving also increased vitamin D by 70–90% in USDA Food Patterns. Addition of mushrooms had minimal effect on sodium (1% or less increase) and no effect on saturated fat or cholesterol in USDA's Food Patterns.

Conclusions

Addition of mushrooms to USDA Food Patterns increased several micronutrients including shortfall nutrients, and had a minimal or no impact on overall calories, sodium or saturated fat.

Funding Sources

Mushroom Council.",2020-05-29 +32360910,CancerEnD: A database of cancer associated enhancers.,"CancerEnD is an integrated resource developed for annotating 8524 unique expressed enhancers, associated genes, somatic mutations and copy number variations of 8063 cancer samples from 18 cancer types of TCGA. Somatic mutation data was taken from the COSMIC repository. To delineate the relationship of change in copy number of enhancer elements with the prognosis of cancer patients, survival analysis was done using the survival package in R. We identified 1762 overall survival associated enhancers, which can be used for prognostic purposes of cancer patients in a tissue-specific manner. CancerEnD (https://webs.iiitd.edu.in/raghava/cancerend/) is developed on a user-friendly responsive template, that enables searching, browsing and downloading of the annotated enhancer elements in terms of gene expression, copy number variation and survival association. We hope it provides a promising avenue for researchers to facilitate the understanding of enhancer deregulation in tumorigenesis, and to identify new biomarkers for therapy and disease-diagnosis.",2020-05-01 +,NAATP calls on Biden to get COVID‐19 vaccination to patients,"In a Jan. 26 open letter to President Biden, the National Association of Addiction Treatment Providers (NAATP) called for COVID‐19 vaccines to be prioritized for people with substance use disorders and their treatment providers, to ensure that treatment providers are identified as front‐line health workers with access to personal protective equipment; require data collection of all COVID‐19 testing, cases and deaths by race, ethnicity, disability status, gender identity, sexual orientation and age; and revise the Centers for Medicare and Medicaid Services' definition of telehealth in the Medicare program to authorize and allow reimbursement of audio‐only service delivery. Also in the letter: recommendations that the Medicaid Institutions for Mental Disease exclusion be eliminated, that there be a focus on equity, and that a better Substance Abuse and Mental Health Services Administration provider list be developed to include credentials and determine appropriate treatment referrals. Finally, the letter includes a call to enforce the SUPPORT Act for covering all Food and Drug Administration–approved medications and to reduce utilization management barriers to medications for opioid use disorders such as prior authorization. For the letter, go to https://www.naatp.org/resources/news/naatp‐delivers‐provider‐priorities‐president‐biden/jan‐27‐2021.",2021-02-05 +,A phylogeny of the subfamily Thiotrichinae (Lepidoptera: Gelechiidae) with a revision of the generic classification based on molecular and morphological analyses,"Thiotrichinae are a subfamily of Gelechiidae with approximately 180 described species in five genera: Calliprora Meyrick, Macrenches Meyrick, Palumbina Rondani, Polyhymno Chambers and Thiotricha Meyrick. The subfamily was established based on two species in a recent molecular phylogenetic study of Gelechiidae, but the monophyly of each genus and the phylogenetic relationships among the genera have not been tested. To address these questions, we performed the first phylogenetic analyses based on seven molecular markers (COI, EF‐1α, GAPDH, RpS5, CAD, Wg and MDH) and 95 morphological characters for 47 ingroup and 3 outgroup taxa. Each dataset was analysed separately and together using maximum likelihood, Bayesian inference and maximum parsimony methods. The maximum likelihood and Bayesian analyses produced nearly identical tree topologies for ingroup relationships, but the parsimony analysis yielded different results for some lineages. In all our analyses, Thiotrichinae formed a strongly supported monophyletic group with two other gelechiid genera, Cnaphostola Meyrick (Gelechiinae) and Semnostoma Meyrick (Apatetrinae). The genus Macrenches was recognized as the sister‐group to all other species in this clade. Polyhymno + Calliprora were found to be sister to all remaining ingroup taxa. The species‐rich group Thiotricha was non‐monophyletic, and it clustered with Palumbina, Cnaphostola and Semnostoma. The type species of Thiotricha was included in a monophyletic clade with a majority of congeneric species in the parsimony analyses, but the clade was divided into two groups in the model‐based results. The generic placement of two species Thiotricha dissobola Meyrick and T. pyrphora Meyrick was not confirmed in this study and they were placed as incertae sedis. The monophyly of Palumbina was strongly supported by both molecular data and morphological evidence. Based on genetic and morphological affinities and examination of the type specimens, four genera are synonymized with Thiotricha: Blastovalva Janse syn.n., Cnaphostola syn.n., Hierangela Meyrick syn.n. and Semnostoma syn.n. We propose two new genera for newly recognized two sub‐clades: Pulchrala gen.n. and Tenupalpa gen.n., and 15 new combinations are made: Thiotricha adamantina (Meyrick) comb.n., T. exalbida (Omelko & Omelko) comb.n., Tenupalpa angustella (Omelko) comb.n., T. biformis (Omelko) comb.n., T. flavitermina (Kyaw, Yagi & Hirowatari) comb.n., T. glenias (Meyrick) comb.n., T. nephodesma (Meyrick) comb.n., T. venustalis (Omelko) comb.n., Pulchrala clidias (Meyrick) comb.n., P. chujaensis (Park) comb.n., P. elaeocarpiella (Kyaw, Yagi & Hirowatari) comb.n., P. epiclista (Meyrick) comb.n., P. melanacma (Bradley) comb.n., P. panglycera (Turner) comb.n., and P. saulotis (Meyrick) comb.n. Revised generic concepts are provided in the study. This published work has been registered on ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:0E3C0EB5‐503C‐4ADC‐BE87‐A203230DE6CB.",2021-04-01 +35005139,Revisiting social vulnerability analysis in Indonesia data.,"This paper presents the dataset about the social vulnerability in Indonesia. This dataset contains several dimensions which rely on previous studies. The data was compiled mainly from the 2017 National Socioeconomic Survey (SUSENAS) done by BPS-Statistics Indonesia. We utilize the weight to obtain the estimation based on multistage sampling. We also received additional information on population, the number, and population growth from the BPS-Statistics Indonesia's 2017 Population projection. Furthermore, we provide the distance matrix as the supplementary information and the number of populations to do the Fuzzy Geographically Weighted Clustering (FGWC). This data can be utilized to do further analysis of social vulnerability to promote disaster management. The data can be accessed further at https://raw.githubusercontent.com/bmlmcmc/naspaclust/main/data/sovi_data.csv.",2021-12-23 +34878853,Advancements in the National Vital Statistics System to Meet the Real-Time Data Needs of a Pandemic.,"The National Center for Health Statistics' (NCHS's) National Vital Statistics System (NVSS) collects, processes, codes, and reviews death certificate data and disseminates the data in annual data files and reports. With the global rise of COVID-19 in early 2020, the NCHS mobilized to rapidly respond to the growing need for reliable, accurate, and complete real-time data on COVID-19 deaths. Within weeks of the first reported US cases, NCHS developed certification guidance, adjusted internal data processing systems, and stood up a surveillance system to release daily updates of COVID-19 deaths to track the impact of the COVID-19 pandemic on US mortality. This report describes the processes that NCHS took to produce timely mortality data in response to the COVID-19 pandemic. (Am J Public Health. 2021;111(12):2133-2140. https://doi.org/10.2105/AJPH.2021.306519).",2021-12-01 +31686107,The ProteomeXchange consortium in 2020: enabling 'big data' approaches in proteomics.,"The ProteomeXchange (PX) consortium of proteomics resources (http://www.proteomexchange.org) has standardized data submission and dissemination of mass spectrometry proteomics data worldwide since 2012. In this paper, we describe the main developments since the previous update manuscript was published in Nucleic Acids Research in 2017. Since then, in addition to the four PX existing members at the time (PRIDE, PeptideAtlas including the PASSEL resource, MassIVE and jPOST), two new resources have joined PX: iProX (China) and Panorama Public (USA). We first describe the updated submission guidelines, now expanded to include six members. Next, with current data submission statistics, we demonstrate that the proteomics field is now actively embracing public open data policies. At the end of June 2019, more than 14 100 datasets had been submitted to PX resources since 2012, and from those, more than 9 500 in just the last three years. In parallel, an unprecedented increase of data re-use activities in the field, including 'big data' approaches, is enabling novel research and new data resources. At last, we also outline some of our future plans for the coming years.",2020-01-01 +34322599,Predictors of ophthalmology career success (POCS) study.,"

Objective

Ophthalmology is the busiest outpatient specialty with demand predicted to rise over 40% in the next 20 years. A significant increase in the number of trainee ophthalmologists is required to fill currently vacant consultant posts and meet the UK's workforce demands by 2038. Our aim was to understand what determines success in ophthalmology training, in order to inform future ophthalmologists, refine recruitment and facilitate workforce planning.

Methods and analysis

This was a retrospective longitudinal cohort study using routinely collected data available from UK Medical Education Database (UKMED) (https://www.ukmed.ac.uk/). Data were analysed on 1350 candidates who had applied for ophthalmology specialty training (OST) between 2012 and 2018, as well as 495 candidates who had attempted Fellow of the Royal College of Ophthalmologists (FRCOphth) Part 1 between 2013 and 2018. Participants who had not obtained their primary medical qualification from the UK medical schools were excluded. Primary outcome measures included gaining a place on the OST programme and passing the FRCOphth Part 1 examination on first attempt.

Results

Higher education performance measure decile scores at medical school are strongly predictive in securing an OST post and passing the part 1 examination first time (p<0.001). Candidates who attempt FRCOphth Part 1 prior to their ST1 application are more likely to get a place on OST on first attempt. Socioeconomic factors, gender and ethnicity do not influence success in OST entry. Male trainees are more likely to pass FRCOphth Part 1 on their first attempt.

Conclusion

This study is the first quantitative assessment of the factors that determine success in OST recruitment and ophthalmology postgraduate examinations in the UK. Similar studies should be undertaken in all other medical and surgical specialties to understand what factors predict success.",2021-07-12 +31724716,"The neXtProt knowledgebase in 2020: data, tools and usability improvements.","The neXtProt knowledgebase (https://www.nextprot.org) is an integrative resource providing both data on human protein and the tools to explore these. In order to provide comprehensive and up-to-date data, we evaluate and add new data sets. We describe the incorporation of three new data sets that provide expression, function, protein-protein binary interaction, post-translational modifications (PTM) and variant information. New SPARQL query examples illustrating uses of the new data were added. neXtProt has continued to develop tools for proteomics. We have improved the peptide uniqueness checker and have implemented a new protein digestion tool. Together, these tools make it possible to determine which proteases can be used to identify trypsin-resistant proteins by mass spectrometry. In terms of usability, we have finished revamping our web interface and completely rewritten our API. Our SPARQL endpoint now supports federated queries. All the neXtProt data are available via our user interface, API, SPARQL endpoint and FTP site, including the new PEFF 1.0 format files. Finally, the data on our FTP site is now CC BY 4.0 to promote its reuse.",2020-01-01 +33576798,OpenContami: A web-based application for detecting microbial contaminants in next-generation sequencing data. ,"Microorganisms infect and contaminate eukaryotic cells during the course of biological experiments. Because microbes influence host cell biology and may therefore lead to erroneous conclusions, a computational platform that facilitates decontamination is indispensable. Recent studies show that next-generation sequencing (NGS) data can be used to identify the presence of exogenous microbial species. Previously, we proposed an algorithm to improve detection of microbes in NGS data. Here, we developed an online application, OpenContami, which allows researchers easy access to the algorithm via interactive web-based interfaces. We have designed the application by incorporating a database comprising analytical results from a large-scale public dataset and data uploaded by users. The database serves as a reference for assessing user data and provides a list of genera detected from negative blank controls as a 'blacklist', which is useful for studying human infectious diseases. OpenContami offers a comprehensive overview of exogenous species in NGS datasets; as such, it will increase our understanding of the impact of microbial contamination on biological and pathological traits. OpenContami is freely available at: https://openlooper.hgc.jp/opencontami/. Supplementary data are available at Bioinformatics online.",2021-02-12 +31976536,Human Breathomics Database. ,"Breathomics is a special branch of metabolomics that quantifies volatile organic compounds (VOCs) from collected exhaled breath samples. Understanding how breath molecules are related to diseases, mechanisms and pathways identified from experimental analytical measurements is challenging due to the lack of an organized resource describing breath molecules, related references and biomedical information embedded in the literature. To provide breath VOCs, related references and biomedical information, we aim to organize a database composed of manually curated information and automatically extracted biomedical information. First, VOCs-related disease information was manually organized from 207 literature linked to 99 VOCs and known Medical Subject Headings (MeSH) terms. Then an automated text mining algorithm was used to extract biomedical information from this literature. In the end, the manually curated information and auto-extracted biomedical information was combined to form a breath molecule database-the Human Breathomics Database (HBDB). We first manually curated and organized disease information including MeSH term from 207 literatures associated with 99 VOCs. Then, an automatic pipeline of text mining approach was used to collect 2766 literatures and extract biomedical information from breath researches. We combined curated information with automatically extracted biomedical information to assemble a breath molecule database, the HBDB. The HBDB is a database that includes references, VOCs and diseases associated with human breathomics. Most of these VOCs were detected in human breath samples or exhaled breath condensate samples. So far, the database contains a total of 913 VOCs in relation to human exhaled breath researches reported in 2766 publications. The HBDB is the most comprehensive HBDB of VOCs in human exhaled breath to date. It is a useful and organized resource for researchers and clinicians to identify and further investigate potential biomarkers from the breath of patients. Database URL: https://hbdb.cmdm.tw.",2020-01-01 +,Refractory Hypoglycemia Related to Gastric Bypass Complicated by Hypothyroidism,"Abstract A 29-year-old African-American female was brought to the emergency department for sudden onset nausea, dizziness, and loss of consciousness at work. She was found to have low blood sugar and was fed with juice and crackers. She had history of similar episodes in the past, migraine headache, post-thyroidectomy hypothyroidism, seizures, generalized anxiety disorder, gastroesophageal reflux disease, and Roux-en-Y gastric bypass 9 months ago for morbid obesity related refractory Pseudotumor Cerebri (failed VP Shunting). She lost 41 kg, and had intolerance to foods from dumping syndrome. Her vital signs were within normal limits upon arrival to the ER. Laboratory parameters were remarkable for hypoglycemia of 52 mg/dL. She was immediately given dextrose intravenously. ECG showed sinus bradycardia. Though she regained consciousness, her blood sugar levels were persistently low, the lowest recording being 43 mg/dL. She was placed on a continuous D10 infusion. Further investigations revealed negative toxicology screens for 1st and 2nd generation sulfonylureas, and normal insulin levels at the time of the hypoglycemic event. Her TSH was found to be very high (99 nIU/mL). Her thyroglobulin (2.8 ng/mL), free T4 (0.25ng/dL), thyroglobulin antibody (99.18 uIU/mL), cortisol levels in morning (14.2 ug/mL), and evening (4.6 ug/dL) were found to be within normal range. Abdominal MRI did not reveal Insulinoma or any other pathology. Repeat ECG showed normal sinus rhythm with occasional PVCs. She was treated for nausea with ondansetron, promethazine, scopolamine, and dronabinol. Dextrose infusion was titrated down with rise in blood sugar levels. She confessed non-compliance to thyroid replacement. Therefore, she was initially treated with intravenous thyroxine replacement, which was later switched to oral therapy. Psychiatry evaluation ruled out any eating disorder. She was counselled to eat small frequent meals, increase dietary proteins, and restrict high glycemic index sugars. Educational value: National data indicates an increasing trend for admissions related to morbid obesity and bariatric procedures, especially laparoscopic (vertical) sleeve gastrectomy (see Fig 1). (1) Complex alteration in the signaling pathway to pancreas owing to changing metabolism causes hypoglycemia after bariatric surgery. When compounded by severe untreated hypothyroidism, as in our case, causes severe hypoglycemia needing hospitalization. Patient education for adjustment in dietary habits would have played a key role in preventing episodes of severe hypoglycemia. Reference: 1.HCUPnet, Healthcare Cost and Utilization Project. Agency for Healthcare Research and Quality, Rockville, MD. https://hcupnet.ahrq.gov/. Fig 1: Trends for admissions for Morbid Obesity and Sleeve Gastrectomy",2021-05-03 +32276847,Large-scale analysis of zebrafish (Danio rerio) transcriptomes identifies functional modules associated with phenotypes.,"Zebrafish (Danio rerio) is an excellent model for biomedicine research due to its genetic accessibility and optical transparency. A large number of microarray based transcriptomes of zebrafish have been profiled in various cell types, tissues, development stages, toxicological exposures and other conditions. However, there is still no easy-to-use web tool to explore those precious data. We downloaded 1434 microarray data from National Center for Biotechnology Information Gene Expression Omnibus (NCBI GEO), constructed weighted gene co-expression network, and identified 50 modules of co-expressed genes that correspond to different cell types, tissues, development stages, and other experimental conditions. These modules were associated with experiments/traits, and may serve signature modules for phenotypes. Hub genes were screened by intra-modular connectivity. Higher-order module networks analysis suggested that nucleus and cell cycle modules are densely connected. Module-based gene function identification may help to discover novel gene function. Our web tool provides a new resource for gene function study in zebrafish (http://bioinformatics.fafu.edu.cn/zebrafish/).",2020-04-07 +33201237,AlgPred 2.0: an improved method for predicting allergenic proteins and mapping of IgE epitopes. ,"AlgPred 2.0 is a web server developed for predicting allergenic proteins and allergenic regions in a protein. It is an updated version of AlgPred developed in 2006. The dataset used for training, testing and validation consists of 10 075 allergens and 10 075 non-allergens. In addition, 10 451 experimentally validated immunoglobulin E (IgE) epitopes were used to identify antigenic regions in a protein. All models were trained on 80% of data called training dataset, and the performance of models was evaluated using 5-fold cross-validation technique. The performance of the final model trained on the training dataset was evaluated on 20% of data called validation dataset; no two proteins in any two sets have more than 40% similarity. First, a Basic Local Alignment Search Tool (BLAST) search has been performed against the dataset, and allergens were predicted based on the level of similarity with known allergens. Second, IgE epitopes obtained from the IEDB database were searched in the dataset to predict allergens based on their presence in a protein. Third, motif-based approaches like multiple EM for motif elicitation/motif alignment and search tool have been used to predict allergens. Fourth, allergen prediction models have been developed using a wide range of machine learning techniques. Finally, the ensemble approach has been used for predicting allergenic protein by combining prediction scores of different approaches. Our best model achieved maximum performance in terms of area under receiver operating characteristic curve 0.98 with Matthew's correlation coefficient 0.85 on the validation dataset. A web server AlgPred 2.0 has been developed that allows the prediction of allergens, mapping of IgE epitope, motif search and BLAST search (https://webs.iiitd.edu.in/raghava/algpred2/).",2021-07-01 +32079733,MtSSPdb: The Medicago truncatula Small Secreted Peptide Database.,"A growing number of small secreted peptides (SSPs) in plants are recognized as important regulatory molecules with roles in processes such as growth, development, reproduction, stress tolerance, and pathogen defense. Recent discoveries further implicate SSPs in regulating root nodule development, which is of particular significance for legumes. SSP-coding genes are frequently overlooked, because genome annotation pipelines generally ignore small open reading frames, which are those most likely to encode SSPs. Also, SSP-coding small open reading frames are often expressed at low levels or only under specific conditions, and thus are underrepresented in non-tissue-targeted or non-condition-optimized RNA-sequencing projects. We previously identified 4,439 SSP-encoding genes in the model legume Medicago truncatula To support systematic characterization and annotation of these putative SSP-encoding genes, we developed the M. truncatula Small Secreted Peptide Database (MtSSPdb; https://mtsspdb.noble.org/). MtSSPdb currently hosts (1) a compendium of M. truncatula SSP candidates with putative function and family annotations; (2) a large-scale M. truncatula RNA-sequencing-based gene expression atlas integrated with various analytical tools, including differential expression, coexpression, and pathway enrichment analyses; (3) an online plant SSP prediction tool capable of analyzing protein sequences at the genome scale using the same protocol as for the identification of SSP genes; and (4) information about a library of synthetic peptides and root and nodule phenotyping data from synthetic peptide screens in planta. These datasets and analytical tools make MtSSPdb a unique and valuable resource for the plant research community. MtSSPdb also has the potential to become the most complete database of SSPs in plants.",2020-02-20 +34520300,Validity of the food frequency questionnaire for adults in nutritional epidemiological studies: A systematic review and meta-analysis.,"As the most widely used tool for assessing dietary intake, the validity of food frequency questionnaires (FFQs) should be evaluated before application. A comprehensive search of the PubMed and Web of Science databases was conducted for publications from January 2000 to April 1, 2020. Pooled estimates were calculated for correlation coefficients and mean differences for energy and 61 nutrients between FFQs and standard methods. The literature search identified 130 articles that included 21,494 participants. Subgroup analyses according to the number of administrations of the reference method, sample size, administration methods, FFQ items, reference periods, quality of the studies, gender, and regions were also performed. We conducted a meta-analysis by summarizing the available evidence to comprehensively assess the validity of FFQs stratified by the reference method type (24-hour recall (24HRs) and food records (FRs). We also performed subgroup analyses to examine the impact on the final summary estimates. After a meta-analysis of the FFQs' validity correlation coefficients of the included studies, this study showed that the range (median) of the validity coefficients of the 24HRs as reference methods was 0.220-0.770 (0.416), and for the FRs, it was 0.173-0.735 (0.373), which indicated that FFQs were suitable to assess the overall dietary intake in nutritional epidemiological studies. The results of the subgroup analysis showed that the number of administrations of the reference method, administration mode, number of items, reference periods, sample size, and gender mainly affected the validity correlation of FFQs.Supplemental data for this article is available online at https://doi.org/10.1080/10408398.2021.1966737 .",2021-09-14 +31603436,[Assessment of consumer exposure to chemical agents on the example of the ConsExpo model].,"Not only employees in industrial plants but also consumers, by using finished products, are exposed to chemical substances. Therefore, consumer exposure assessment is also important. To assess the risk for the consumer, the exposure magnitude is needed but measuring these values in residential conditions of consumers is usually impossible. ConsExpo has been designed to facilitate the exposure assessment to substances in consumer products. It is available in English as a free web application at www. consexpoweb.nl. The ConsExpo Web tool, developed by the Netherlands National Institute for Public Health and the Environment (Rijksinstituut voor Volksgezondheid en Milieu), contains a set of models that help in the assessment of exposure to the substances in consumer products. These are mathematical models with increasing complexity, describing exposure by inhalation, dermal and oral routes. Available models are described in this work. ConsExpo is also equipped with a products database with defined exposure scenarios and default values, which could be a starting point for the models. The aim of this work was to review the literature regarding ConsExpo and to present the application to Polish users through the description of the models contained therein and by providing assessments examples. The review was based on databases of scientific journals. ConsExpo is a commonly known tool, and one of its applications is exposure estimation in comparative studies and the development of new models. For lower-tier analyses ConsExpo can be used by less advanced users. The most favorable for Polish users would be the creation of the Polishlanguage version of the ConsExpo application or a detailed Polish-language instruction manual. Med Pr. 2019;70(6):747-62.",2019-10-11 +34154536,TeaAS: a comprehensive database for alternative splicing in tea plants (Camellia sinensis).,"Alternative splicing (AS) increases the diversity of transcripts and proteins through the selection of different splice sites and plays an important role in the growth, development and stress tolerance of plants. With the release of the reference genome of the tea plant (Camellia sinensis) and the development of transcriptome sequencing, researchers have reported the existence of AS in tea plants. However, there is a lack of a platform, centered on different RNA-seq datasets, that provides comprehensive information on AS.To facilitate access to information on AS and reveal the molecular function of AS in tea plants, we established the first comprehensive AS database for tea plants (TeaAS, http://www.teaas.cn/index.php ). In this study, 3.96 Tb reads from 66 different RNA-seq datasets were collected to identify AS events. TeaAS supports four methods of retrieval of AS information based on gene ID, gene name, annotation (non-redundant/Kyoto encyclopedia of genes and genomes/gene ontology annotation or chromosomal location) and RNA-seq data. It integrates data pertaining to genome annotation, type of AS event, transcript sequence, and isoforms expression levels from 66 RNA-seq datasets. The AS events resulting from different environmental conditions and that occurring in varied tissue types, and the expression levels of specific transcripts can be clearly identified through this online database. Moreover, it also provides two useful tools, Basic Local Alignment Search Tool and Generic Genome Browser, for sequence alignment and visualization of gene structure.The features of the TeaAS database make it a comprehensive AS bioinformatics platform for researchers, as well as a reference for studying AS events in woody crops. It could also be helpful for revealing the novel biological functions of AS in gene regulation in tea plants.",2021-06-21 +,Prediction of an Organic Compound’s Biotransformation Time: A Study Using Avermectins,"The current spread of the SARS-CoV-2 coronavirus is a challenge for the entire world. Ivermectin is a promising agent, which could be used to combat the SARS-CoV-2 coronavirus. It represents a complex of semisynthetic derivatives of natural avermectins that have been taken advantage of for a long time in medicine and agriculture as antiparasitic drugs. However, the experimental ecotoxicology assessment data for individual avermectins are still scarce. In relation to this, the aim of this study is to develop a mathematical model that would allow reliably predicting the biotransformation ability of natural and semisynthetic avermectins and identifying the structural fragments of avermectin molecules that have the largest impact on this biological activity. The base for the model construction was a structurally heterogeneous set including organic compounds with experimentally determined biotransformation half-life periods (KmHL). Using the OCHEM web platform (https://ochem.eu) with the implemented PyDescriptor plugin for the descriptor calculation and Random Forest and Transformer-CNN algorithms, a satisfactory (",2021-01-01 +30407557,Update of the FANTOM web resource: expansion to provide additional transcriptome atlases.,"The FANTOM web resource (http://fantom.gsc.riken.jp/) was developed to provide easy access to the data produced by the FANTOM project. It contains the most complete and comprehensive sets of actively transcribed enhancers and promoters in the human and mouse genomes. We determined the transcription activities of these regulatory elements by CAGE (Cap Analysis of Gene Expression) for both steady and dynamic cellular states in all major and some rare cell types, consecutive stages of differentiation and responses to stimuli. We have expanded the resource by employing different assays, such as RNA-seq, short RNA-seq and a paired-end protocol for CAGE (CAGEscan), to provide new angles to study the transcriptome. That yielded additional atlases of long noncoding RNAs, miRNAs and their promoters. We have also expanded the CAGE analysis to cover rat, dog, chicken, and macaque species for a limited number of cell types. The CAGE data obtained from human and mouse were reprocessed to make them available on the latest genome assemblies. Here, we report the recent updates of both data and interfaces in the FANTOM web resource.",2019-01-01 +31197322,MetOSite: an integrated resource for the study of methionine residues sulfoxidation.,"

Motivation

The oxidation of protein-bound methionine to form methionine sulfoxide has traditionally been regarded as an oxidative damage. However, growing evidences support the view of this reversible reaction also as a regulatory post-translational modification. Thus, the oxidation of methionine residues has been reported to have multiple and varied implications for protein function. However, despite the importance of this modification and the abundance of reports, all these data are scattered in the literature. No database/resource on methionine sulfoxidation exists currently. Since this information is useful to gain further insights into the redox regulation of cellular proteins, we have created a primary database of experimentally confirmed sulfoxidation sites.

Results

MetOSite currently contains 7242 methionine sulfoxide sites found in 3562 different proteins from 23 species, with Homo sapiens, Arabidopsis thaliana and Bacillus cereus as the main contributors. Each collected site has been classified according to the effect of its sulfoxidation on the biological properties of the modified protein. Thus, MetOSite documents cases where the sulfoxidation of methionine leads to (i) gain of activity, (ii) loss of activity, (iii) increased protein-protein interaction susceptibility, (iv) decreased protein-protein interaction susceptibility, (v) changes in protein stability and (vi) changes in subcellular location.

Availability and implementation

MetOSite is available at https://metosite.uma.es.",2019-11-01 +33814598,Teaching and Learning Computational Drug Design: Student Investigations of 3D Quantitative Structure-Activity Relationships through Web Applications.,"The increasing use of information technology in the discovery of new molecular entities encourages the use of modern molecular-modeling tools to help teach important concepts of drug design to chemistry and pharmacy undergraduate students. In particular, statistical models such as quantitative structure-activity relationships (QSAR)-often as its 3D QSAR variant-are commonly used in the development and optimization of a leading compound. We describe how these drug discovery methods can be taught and learned by means of free and open-source web applications, specifically the online platform www.3d-qsar.com. This new suite of web applications has been integrated into a drug design teaching course, one that provides both theoretical and practical perspectives. We include the teaching protocol by which pharmaceutical biotechnology master students at Pharmacy Faculty of Sapienza Rome University are introduced to drug design. Starting with a choice among recent articles describing the potencies of a series of molecules tested against a biological target, each student is expected to build a 3D QSAR ligand-based model from their chosen publication, proceeding as follows: creating the initial data set (Py-MolEdit); generating the global minimum conformations (Py-ConfSearch); proposing a promising mutual alignment (Py-Align); and finally, building, and optimizing a robust 3D QSAR models (Py-CoMFA). These student activities also help validate these new molecular modeling tools, especially for their usability by inexperienced hands. To more fully demonstrate the effectiveness of this protocol and its tools, we include the work performed by four of these students (four of the coauthors), detailing the satisfactory 3D QSAR models they obtained. Such scientifically complete experiences by undergraduates, made possible by the efficiency of the 3D QSAR methodology, provide exposure to computational tools in the same spirit as traditional laboratory exercises. With the obsolescence of the classic Comparative Molecular Field Analysis Sybyl host, the 3dqsar web portal offers one of the few available means of performing this well-established 3D QSAR method.",2020-06-23 +34928943,FaDA: A web application for regular laboratory data analyses.,"Web-based data analysis and visualization tools are mostly designed for specific purposes, such as the analysis of data from whole transcriptome RNA sequencing or single-cell RNA sequencing. However, generic tools designed for the analysis of common laboratory data for noncomputational scientists are also needed. The importance of such web-based tools is emphasized by the continuing increases in the sample capacity of conventional laboratory tools such as quantitative PCR, flow cytometry or ELISA instruments. We present a web-based application FaDA, developed with the R Shiny package that provides users with the ability to perform statistical group comparisons, including parametric and nonparametric tests, with multiple testing corrections suitable for most standard wet-laboratory analyses. FaDA provides data visualizations such as heatmaps, principal component analysis (PCA) plots, correlograms and receiver operating curves (ROCs). Calculations are performed through the R language. The FaDA application provides a free and intuitive interface that allows biologists without bioinformatic skill to easily and quickly perform common laboratory data analyses. The application is freely accessible at https://shiny-bird.univ-nantes.fr/app/Fada.",2021-12-20 +33211879,From ArrayExpress to BioStudies.,"ArrayExpress (https://www.ebi.ac.uk/arrayexpress) is an archive of functional genomics data at EMBL-EBI, established in 2002, initially as an archive for publication-related microarray data and was later extended to accept sequencing-based data. Over the last decade an increasing share of biological experiments involve multiple technologies assaying different biological modalities, such as epigenetics, and RNA and protein expression, and thus the BioStudies database (https://www.ebi.ac.uk/biostudies) was established to deal with such multimodal data. Its central concept is a study, which typically is associated with a publication. BioStudies stores metadata describing the study, provides links to the relevant databases, such as European Nucleotide Archive (ENA), as well as hosts the types of data for which specialized databases do not exist. With BioStudies now fully functional, we are able to further harmonize the archival data infrastructure at EMBL-EBI, and ArrayExpress is being migrated to BioStudies. In future, all functional genomics data will be archived at BioStudies. The process will be seamless for the users, who will continue to submit data using the online tool Annotare and will be able to query and download data largely in the same manner as before. Nevertheless, some technical aspects, particularly programmatic access, will change. This update guides the users through these changes.",2021-01-01 +32917675,Incomplete annotation has a disproportionate impact on our understanding of Mendelian and complex neurogenetic disorders. ,"Growing evidence suggests that human gene annotation remains incomplete; however, it is unclear how this affects different tissues and our understanding of different disorders. Here, we detect previously unannotated transcription from Genotype-Tissue Expression RNA sequencing data across 41 human tissues. We connect this unannotated transcription to known genes, confirming that human gene annotation remains incomplete, even among well-studied genes including 63% of the Online Mendelian Inheritance in Man-morbid catalog and 317 neurodegeneration-associated genes. We find the greatest abundance of unannotated transcription in brain and genes highly expressed in brain are more likely to be reannotated. We explore examples of reannotated disease genes, such as SNCA, for which we experimentally validate a previously unidentified, brain-specific, potentially protein-coding exon. We release all tissue-specific transcriptomes through vizER: http://rytenlab.com/browser/app/vizER We anticipate that this resource will facilitate more accurate genetic analysis, with the greatest impact on our understanding of Mendelian and complex neurogenetic disorders.",2020-06-10 +34468230,Central obesity accelerates leukocyte telomere length (LTL) shortening in apparently healthy adults: A systematic review and meta-analysis.,"Shorter telomere length is associated with numerous comorbidities; central obesity might trigger leukocyte telomere shortening; in the current meta-analysis we evaluated the association of central obesity with leukocyte telomere length among adults. A systematic search from Scopus, PubMed, Embase and Proquest electronic databases up to May 2021 was done. The final screening, provided five articles to be included in final meta-analysis. Those in the highest category of telomere length had 3.72 cm lower waist circumference (WC) compared with those in the lowest category (WMD=-3.718; CI=-7.180, -0.257 P = 0.035; I2 = 95.4%). Also, those in the highest LTL category had 0.02 lower waist to hip ratio (WHR) compared with those in the lowest category, although this association was not significant (WMD: -0.02; CI=-0.04, 0.01; P = 0.19; I2= 90.7%). In quality assessment of included studies, all of the studies had moderate or high quality score and there was no study with poor quality. Higher leukocyte telomere length was accompanied with lower WC among adults. This association was not significant for difference in WHR. Because of the high heterogeneity values and also because of the observational design of included studies, the inference of causality of these associations needs further investigations.Supplemental data for this article is available online at https://doi.org/10.1080/10408398.2021.1971155.",2021-09-01 +34107016,VirusViz: comparative analysis and effective visualization of viral nucleotide and amino acid variants.,"Variant visualization plays an important role in supporting the viral evolution analysis, extremely valuable during the COVID-19 pandemic. VirusViz is a web-based application for comparing variants of selected viral populations and their sub-populations; it is primarily focused on SARS-CoV-2 variants, although the tool also supports other viral species (SARS-CoV, MERS-CoV, Dengue, Ebola). As input, VirusViz imports results of queries extracting variants and metadata from the large database ViruSurf, which integrates information about most SARS-CoV-2 sequences publicly deposited worldwide. Moreover, VirusViz accepts sequences of new viral populations as multi-FASTA files plus corresponding metadata in CSV format; a bioinformatic pipeline builds a suitable input for VirusViz by extracting the nucleotide and amino acid variants. Pages of VirusViz provide metadata summarization, variant descriptions, and variant visualization with rich options for zooming, highlighting variants or regions of interest, and switching from nucleotides to amino acids; sequences can be grouped, groups can be comparatively analyzed. For SARS-CoV-2, we manually collect mutations with known or predicted levels of severity/virulence, as indicated in linked research articles; such critical mutations are reported when observed in sequences. The system includes light-weight project management for downloading, resuming, and merging data analysis sessions. VirusViz is freely available at http://gmql.eu/virusviz/.",2021-09-01 +31797049,MS/MS in silico subtraction-based proteomic profiling as an approach to facilitate disease gene discovery: application to lens development and cataract.,"While the bioinformatics resource-tool iSyTE (integrated Systems Tool for Eye gene discovery) effectively identifies human cataract-associated genes, it is currently based on just transcriptome data, and thus, it is necessary to include protein-level information to gain greater confidence in gene prioritization. Here, we expand iSyTE through development of a novel proteome-based resource on the lens and demonstrate its utility in cataract gene discovery. We applied high-throughput tandem mass spectrometry (MS/MS) to generate a global protein expression profile of mouse lens at embryonic day (E)14.5, which identified 2371 lens-expressed proteins. A major challenge of high-throughput expression profiling is identification of high-priority candidates among the thousands of expressed proteins. To address this problem, we generated new MS/MS proteome data on mouse whole embryonic body (WB). WB proteome was then used as a reference dataset for performing ""in silico WB-subtraction"" comparative analysis with the lens proteome, which effectively identified 422 proteins with lens-enriched expression at ≥ 2.5 average spectral counts, ≥ 2.0 fold enrichment (FDR < 0.01) cut-off. These top 20% candidates represent a rich pool of high-priority proteins in the lens including known human cataract-linked genes and many new potential regulators of lens development and homeostasis. This rich information is made publicly accessible through iSyTE (https://research.bioinformatics.udel.edu/iSyTE/), which enables user-friendly visualization of promising candidates, thus making iSyTE a comprehensive tool for cataract gene discovery.",2019-12-03 +31584095,EWAS Data Hub: a resource of DNA methylation array data and metadata.,"Epigenome-Wide Association Study (EWAS) has become an effective strategy to explore epigenetic basis of complex traits. Over the past decade, a large amount of epigenetic data, especially those sourced from DNA methylation array, has been accumulated as the result of numerous EWAS projects. We present EWAS Data Hub (https://bigd.big.ac.cn/ewas/datahub), a resource for collecting and normalizing DNA methylation array data as well as archiving associated metadata. The current release of EWAS Data Hub integrates a comprehensive collection of DNA methylation array data from 75 344 samples and employs an effective normalization method to remove batch effects among different datasets. Accordingly, taking advantages of both massive high-quality DNA methylation data and standardized metadata, EWAS Data Hub provides reference DNA methylation profiles under different contexts, involving 81 tissues/cell types (that contain 25 brain parts and 25 blood cell types), six ancestry categories, and 67 diseases (including 39 cancers). In summary, EWAS Data Hub bears great promise to aid the retrieval and discovery of methylation-based biomarkers for phenotype characterization, clinical treatment and health care.",2020-01-01 +31680165,The DisGeNET knowledge platform for disease genomics: 2019 update.,"One of the most pressing challenges in genomic medicine is to understand the role played by genetic variation in health and disease. Thanks to the exploration of genomic variants at large scale, hundreds of thousands of disease-associated loci have been uncovered. However, the identification of variants of clinical relevance is a significant challenge that requires comprehensive interrogation of previous knowledge and linkage to new experimental results. To assist in this complex task, we created DisGeNET (http://www.disgenet.org/), a knowledge management platform integrating and standardizing data about disease associated genes and variants from multiple sources, including the scientific literature. DisGeNET covers the full spectrum of human diseases as well as normal and abnormal traits. The current release covers more than 24 000 diseases and traits, 17 000 genes and 117 000 genomic variants. The latest developments of DisGeNET include new sources of data, novel data attributes and prioritization metrics, a redesigned web interface and recently launched APIs. Thanks to the data standardization, the combination of expert curated information with data automatically mined from the scientific literature, and a suite of tools for accessing its publicly available data, DisGeNET is an interoperable resource supporting a variety of applications in genomic medicine and drug R&D.",2020-01-01 +31114875,GEPIA2: an enhanced web server for large-scale expression profiling and interactive analysis.,"Introduced in 2017, the GEPIA (Gene Expression Profiling Interactive Analysis) web server has been a valuable and highly cited resource for gene expression analysis based on tumor and normal samples from the TCGA and the GTEx databases. Here, we present GEPIA2, an updated and enhanced version to provide insights with higher resolution and more functionalities. Featuring 198 619 isoforms and 84 cancer subtypes, GEPIA2 has extended gene expression quantification from the gene level to the transcript level, and supports analysis of a specific cancer subtype, and comparison between subtypes. In addition, GEPIA2 has adopted new analysis techniques of gene signature quantification inspired by single-cell sequencing studies, and provides customized analysis where users can upload their own RNA-seq data and compare them with TCGA and GTEx samples. We also offer an API for batch process and easy retrieval of the analysis results. The updated web server is publicly accessible at http://gepia2.cancer-pku.cn/.",2019-07-01 +31351979,Preventing iatrogenic gelatin anaphylaxis.,"

Objective

To assess the iatrogenic risks of gelatin allergy and identify resources for patient management.

Data sources

A literature review was performed using PubMed and public databases provided by the National Library of Medicine.

Study selections

Reports of iatrogenic gelatin allergy associated with vaccines, hemostatic agents, intravenous colloids, medicinal capsules, and intraoperative surgical supplies.

Results

Gelatin ingredients may not be identified by electronic medical record safeguards, and an exhaustive listing of potential iatrogenic exposures is elusive. The National Library of Medicine AccessGUDID (https://accessgudid.nlm.nih.gov/) can be a useful resource in evaluating medical devices for gelatin content. Unexpected sources of iatrogenic gelatin exposure include hemostatic agents, vascular grafts, intravascular cannulas, bone replacement implants, and emergency resuscitation fluids.

Conclusion

Vigilance is important within medical systems to avoid inadvertent gelatin exposure when caring for patients with gelatin allergy. Additional safeguards are needed to remove latent health care system errors that fail to prevent gelatin administration in this at-risk population.",2019-07-26 +33758323,Weighted gene co-expression network analysis identifies specific modules and hub genes related to coronary artery disease.,"This investigation seeks to dissect coronary artery disease molecular target candidates along with its underlying molecular mechanisms. Data on patients with CAD across three separate array data sets, GSE66360, GSE19339 and GSE97320 were extracted. The gene expression profiles were obtained by normalizing and removing the differences between the three data sets, and important modules linked to coronary heart disease were identified using weighted gene co-expression network analysis (WGCNA). Gene Ontology (GO) functional and Kyoto Encyclopedia of Genes and genomes (KEGG) pathway enrichment analyses were applied in order to identify statistically significant genetic modules with the Database for Annotation, Visualization and Integrated Discovery (DAVID) online tool (version 6.8; http://david.abcc.ncifcrf.gov ). The online STRING tool was used to construct a protein-protein interaction (PPI) network, followed by the use of Molecular Complex Detection (MCODE) plug-ins in Cytoscape software to identify hub genes. Two significant modules (green-yellow and magenta) were identified in the CAD samples. Genes in the magenta module were noted to be involved in inflammatory and immune-related pathways, based on GO and KEGG enrichment analyses. After the MCODE analysis, two different MCODE complexes were identified in the magenta module, and four hub genes (ITGAM, degree = 39; CAMP, degree = 37; TYROBP, degree = 28; ICAM1, degree = 18) were uncovered to be critical players in mediating CAD. Independent verification data as well as our RT-qPCR results were highly consistent with the above finding. ITGAM, CAMP, TYROBP and ICAM1 are potential targets in CAD. The underlying mechanism may be related to the transendothelial migration of leukocytes and the immune response.",2021-03-23 +31605102,DRUDIT: web-based DRUgs DIscovery Tools to design small molecules as modulators of biological targets.,"

Motivation

New in silico tools to predict biological affinities for input structures are presented. The tools are implemented in the DRUDIT (DRUgs DIscovery Tools) web service. The DRUDIT biological finder module is based on molecular descriptors that are calculated by the MOLDESTO (MOLecular DEScriptors TOol) software module developed by the same authors, which is able to calculate more than one thousand molecular descriptors. At this stage, DRUDIT includes 250 biological targets, but new external targets can be added. This feature extends the application scope of DRUDIT to several fields. Moreover, two more functions are implemented: the multi- and on/off-target tasks. These tools applied to input structures allow for predicting the polypharmacology and evaluating the collateral effects.

Results

The applications described in the article show that DRUDIT is able to predict a single biological target, to identify similarities among biological targets, and to discriminate different target isoforms. The main advantages of DRUDIT for the scientific community lie in its ease of use by worldwide scientists and the possibility to be used also without specific, and often expensive, hardware and software. In fact, it is fully accessible through the WWW from any device to perform calculations. Just a click or a tap can start tasks to predict biological properties for new compounds or repurpose drugs, lead compounds, or unsuccessful compounds. To date, DRUDIT is supported by four servers each able to execute 8 jobs simultaneously.

Availability and implementation

The web service is accessible at the www.drudit.com URL and its use is free of charge.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +33826865,IsoDA: Isoform-Disease Association Prediction by Multiomics Data Fusion.,"A gene can be spliced into different isoforms by alternative splicing, which contributes to the functional diversity of protein species. Computational prediction of gene-disease associations (GDAs) has been studied for decades. However, the process of identifying the isoform-disease associations (IDAs) at a large scale is rarely explored, which can decipher the pathology at a more granular level. The main bottleneck is the lack of IDAs in current databases and the multilevel omics data fusion. To bridge this gap, we propose a computational approach called Isoform-Disease Association prediction by multiomics data fusion (IsoDA) to predict IDAs. Based on the relationship between a gene and its spliced isoforms, IsoDA first introduces a dispatch and aggregation term to dispatch gene-disease associations to individual isoforms, and reversely aggregate these dispatched associations to their hosting genes. At the same time, it fuses the genome, transcriptome, and proteome data by joint matrix factorization to improve the prediction of IDAs. Experimental results show that IsoDA significantly outperforms the related state-of-the-art methods at both the gene level and isoform level. A case study further shows that IsoDA credibly identifies three isoforms spliced from apolipoprotein E, which have individual associations with Alzheimer's disease, and two isoforms spliced from vascular endothelial growth factor A, which have different associations with coronary heart disease. The codes of IsoDA are available at http://mlda.swu.edu.cn/codes.php?name=IsoDA.",2021-04-07 +,P13 Abdominal Surgery during the COVID-19 Pandemic: A Multicentre Audit,"Abstract

Introduction

The threshold for surgery has increased during the COVID-19 pandemic. A widely cited Chinese study (n = 34) reported postoperative COVID-19 pneumonia and mortality rates of 100% and 21% respectively [1]. This audit assessed outcomes after abdominal surgery across three hospitals within Mid & South Essex NHS Foundation Trust.

Methods

Patients undergoing abdominal surgery at Basildon University Hospital, Mid Essex Hospital and Southend University Hospital between 1st March and 27th April 2020 were included. Obstetric, gynaecological, vascular, inguinal/femoral hernia, and skin operations were excluded. Electronic data collection was supplemented by telephone follow-up.

Results

306 patients were included. The median age was 57 years. 148 (48.4%) were female. 156 (51.0%) and 150 (49.0%) patients underwent elective and emergency surgery respectively. The preoperative and postoperative SARS-CoV-2 rates (based on RT-PCR or imaging) were 0.3% (n = 1) and 4.6% (n = 14) respectively. 84.6% (n = 259) did not have RT-PCR tests. All-cause 30-day mortality was 3.6% (n = 11). Amongst patients with SARS-CoV-2, mortality was 50% (7/14), occurring only after emergency surgery. Elective (vs. emergency) surgery was associated with lower postoperative SARS-CoV-2 (0.6% vs. 8.7%; p < 0.001) and mortality (0.6% vs. 6.7%; p = 0.005). At follow-up, 79.1% (242/306) of patients responded, most (85.1%; 206/242) without major clinical issue.

Conclusion

Local SARS-CoV-2 and mortality rates are lower than previously reported [1]. Perioperative COVID-19 carries a high mortality risk. We recommend perioperative SARS-CoV-2 testing for all patients and cohorting by infection status.

References

1. Lei et al., Clinical characteristics and outcomes of patients undergoing surgeries during the incubation period of COVID-19 infection, EClinicalMedicine(2020), https://doi.org/10.1016/j.eclinm.2020.100331",2021-04-01 +33772585,An immunologically friendly classification of non-peptidic ligands. ,"The Immune Epitope Database (IEDB) freely provides experimental data regarding immune epitopes to the scientific public. The main users of the IEDB are immunologists who can easily use our web interface to search for peptidic epitopes via their simple single-letter codes. For example, 'A' stands for 'alanine'. Similarly, users can easily navigate the IEDB's simplified NCBI taxonomy hierarchy to locate proteins from specific organisms. However, some epitopes are non-peptidic, such as carbohydrates, lipids, chemicals and drugs, and it is more challenging to consistently name them and search upon, making access to their data more problematic for immunologists. Therefore, we set out to improve access to non-peptidic epitope data in the IEDB through the simplification of the non-peptidic hierarchy used in our search interfaces. Here, we present these efforts and their outcomes. Database URL:  http://www.iedb.org/.",2021-03-01 +34849195,A k-mer based approach for classifying viruses without taxonomy identifies viral associations in human autism and plant microbiomes.,"Viruses are an underrepresented taxa in the study and identification of microbiome constituents; however, they play an essential role in health, microbiome regulation, and transfer of genetic material. Only a few thousand viruses have been isolated, sequenced, and assigned a taxonomy, which limits the ability to identify and quantify viruses in the microbiome. Additionally, the vast diversity of viruses represents a challenge for classification, not only in constructing a viral taxonomy, but also in identifying similarities between a virus' genotype and its phenotype. However, the diversity of viral sequences can be leveraged to classify their sequences in metagenomic and metatranscriptomic samples, even if they do not have a taxonomy. To identify and quantify viruses in transcriptomic and genomic samples, we developed a dynamic programming algorithm for creating a classification tree out of 715,672 metagenome viruses. To create the classification tree, we clustered proportional similarity scores generated from the k-mer profiles of each of the metagenome viruses to create a database of metagenomic viruses. The resulting Kraken2 database of the metagenomic viruses can be found here: https://www.osti.gov/biblio/1615774 and is compatible with Kraken2. We then integrated the viral classification database with databases created with genomes from NCBI for use with ParaKraken (a parallelized version of Kraken provided in Supplemental Zip 1), a metagenomic/transcriptomic classifier. To illustrate the breadth of our utility for classifying metagenome viruses, we analyzed data from a plant metagenome study identifying genotypic and compartment specific differences between two Populus genotypes in three different compartments. We also identified a significant increase in abundance of eight viral sequences in post mortem brains in a human metatranscriptome study comparing Autism Spectrum Disorder patients and controls. We also show the potential accuracy for classifying viruses by utilizing both the JGI and NCBI viral databases to identify the uniqueness of viral sequences. Finally, we validate the accuracy of viral classification with NCBI databases containing viruses with taxonomy to identify pathogenic viruses in known COVID-19 and cassava brown streak virus infection samples. Our method represents the compulsory first step in better understanding the role of viruses in the microbiome by allowing for a more complete identification of sequences without taxonomy. Better classification of viruses will improve identifying associations between viruses and their hosts as well as viruses and other microbiome members. Despite the lack of taxonomy, this database of metagenomic viruses can be used with any tool that utilizes a taxonomy, such as Kraken, for accurate classification of viruses.",2021-10-25 +33852582,AlnC: An extensive database of long non-coding RNAs in angiosperms.,"Long non-coding RNAs (lncRNAs) are defined as transcripts of greater than 200 nucleotides that play a crucial role in various cellular processes such as the development, differentiation and gene regulation across all eukaryotes, including plant cells. Since the last decade, there has been a significant rise in our understanding of lncRNA molecular functions in plants, resulting in an exponential increase in lncRNA transcripts, while these went unannounced from the major Angiosperm plant species despite the availability of large-scale high throughput sequencing data in public repositories. We, therefore, developed a user-friendly, open-access web interface, AlnC (Angiosperm lncRNA Catalogue) for the exploration of lncRNAs in diverse Angiosperm plant species using recent 1000 plant (1KP) trancriptomes data. The current version of AlnC offers 10,855,598 annotated lncRNA transcripts across 682 Angiosperm plant species encompassing 809 tissues. To improve the user interface, we added features for browsing, searching, and downloading lncRNA data, interactive graphs, and an online BLAST service. Additionally, each lncRNA record is annotated with possible small open reading frames (sORFs) to facilitate the study of peptides encoded within lncRNAs. With this user-friendly interface, we anticipate that AlnC will provide a rich source of lncRNAs for small-and large-scale studies in a variety of flowering plants, as well as aid in the improvement of key characteristics in relevance to their economic importance. Database URL: http://www.nipgr.ac.in/AlnC.",2021-04-14 +33104196,Classification and review of free PCR primer design software.,"

Motivation

Polymerase chain reaction (PCR) has been a revolutionary biomedical advancement. However, for PCR to be appropriately used, one must spend a significant amount of effort on PCR primer design. Carefully designed PCR primers not only increase sensitivity and specificity, but also decrease effort spent on experimental optimization. Computer software removes the human element by performing and automating the complex and rigorous calculations required in PCR primer design. Classification and review of the available software options and their capabilities should be a valuable resource for any PCR application.

Results

This article focuses on currently available free PCR primer design software and their major functions (https://pcrprimerdesign.github.io/). The software are classified according to their PCR applications, such as Sanger sequencing, reverse transcription quantitative PCR, single nucleotide polymorphism detection, splicing variant detection, methylation detection, microsatellite detection, multiplex PCR and targeted next generation sequencing, and conserved/degenerate primers to clone orthologous genes from related species, new gene family members in the same species, or to detect a group of related pathogens. Each software is summarized to provide a technical review of their capabilities and utilities.",2021-04-01 +34878857,"National Health Interview Survey, COVID-19, and Online Data Collection Platforms: Adaptations, Tradeoffs, and New Directions.","High-quality data are accurate, relevant, and timely. Large national health surveys have always balanced the implementation of these quality dimensions to meet the needs of diverse users. The COVID-19 pandemic shifted these balances, with both disrupted survey operations and a critical need for relevant and timely health data for decision-making. The National Health Interview Survey (NHIS) responded to these challenges with several operational changes to continue production in 2020. However, data files from the 2020 NHIS were not expected to be publicly available until fall 2021. To fill the gap, the National Center for Health Statistics (NCHS) turned to 2 online data collection platforms-the Census Bureau's Household Pulse Survey (HPS) and the NCHS Research and Development Survey (RANDS)-to collect COVID-19‒related data more quickly. This article describes the adaptations of NHIS and the use of HPS and RANDS during the pandemic in the context of the recently released Framework for Data Quality from the Federal Committee on Statistical Methodology. (Am J Public Health. 2021;111(12):2167-2175. https://doi.org/10.2105/AJPH.2021.306516).",2021-12-01 +30335176,NucMap: a database of genome-wide nucleosome positioning map across species.,"Dynamics of nucleosome positioning affects chromatin state, transcription and all other biological processes occurring on genomic DNA. While MNase-Seq has been used to depict nucleosome positioning map in eukaryote in the past years, nucleosome positioning data is increasing dramatically. To facilitate the usage of published data across studies, we developed a database named nucleosome positioning map (NucMap, http://bigd.big.ac.cn/nucmap). NucMap includes 798 experimental data from 477 samples across 15 species. With a series of functional modules, users can search profile of nucleosome positioning at the promoter region of each gene across all samples and make enrichment analysis on nucleosome positioning data in all genomic regions. Nucleosome browser was built to visualize the profiles of nucleosome positioning. Users can also visualize multiple sources of omics data with the nucleosome browser and make side-by-side comparisons. All processed data in the database are freely available. NucMap is the first comprehensive nucleosome positioning platform and it will serve as an important resource to facilitate the understanding of chromatin regulation.",2019-01-01 +32076423,FHLdb: A Comprehensive Database on the Molecular Basis of Familial Hemophagocytic Lymphohistiocytosis.,"Background: Primary immunodeficiencies (PIDs) are a heterogeneous group of disorders. The lack of comprehensive disease-specific mutation databases may hinder or delay classification of the genetic variants found in samples from these patients. This is especially true for familial hemophagocytic lymphohistiocytosis (FHL), a life-threatening PID classically considered an autosomal recessive condition, but with increasingly demonstrated genetic heterogeneity. Objective: The aim of this study was to build an open-access repository to collect detailed information on the known genetic variants reported in FHL. Methods: We manually reviewed more than 120 articles to identify all reported variants related to FHL. We retrieved relevant information about the allelic status, the number of patients with the same variant, and whether functional assays were done. We stored all the data retrieved in a PostgreSQL database and then built a website on top of it, using the Django framework. Results: The database designed (FHLdb) (https://www.biotoclin.org/FHLdb) contains comprehensive information on reported variants in the 4 genes related to FHL (PRF1, UNC13D, STXBP2, STX11). It comprises 240 missense, 69 frameshift, 51 nonsense, 51 splicing, 10 in-frame indel, 7 deep intronic, and 5 large rearrangement variants together with their allelic status, carrier(s) information, and functional evidence. All genetic variants have been classified as pathogenic, likely pathogenic, uncertain significance, likely benign or benign, according to the American College of Medical Genetics guidelines. Additionally, it integrates information from other relevant databases: clinical evidence from ClinVar and UniProt, population allele frequency from ExAC and gnomAD, and pathogenicity predictions from well-recognized tools (e.g., PolyPhen-2, SIFT). Finally, a diagram depicts the location of the variant relative to the gene exon and protein domain structures. Conclusion: FHLdb includes a broad range of data on the reported genetic variants in familial HLH genes. It is a free-access and easy-to-use resource that will facilitate the interpretation of molecular results of FHL patients, and it illustrates the potential value of disease-specific databases for other PIDs.",2020-01-31 +,First Report of Fusarium proliferatum as the Causal Agent of Seed Rot of Hyssopus officinalis in Serbia,"Fusarium spp. are important soil- and seed-borne pathogens of many field and vegetable crops, including orchards and medicinal plants (Leslie and Summerell 2006). Fusarium proliferatum is the most common pathogen infecting numerous crop plants and occurring in various climatic zones. In Serbia, this species is well-known as a pathogen of wheat, maize, bean, and recently, garlic and onion (Lević et al. 2009; Stanković et al. 2007). Hyssop (Hyssopus officinalis L.) is grown in Serbia as a member of the Lamiaceae family for the needs of pharmaceutical companies and tea production, because of its medicinal and aromatic properties. In Serbia, production takes place on about 500 ha (no official data). During a routine quality control of hyssop seeds collected from Rumenka (Vojvodina Province), in 2018, fungal infection followed by seed rot was noticed on an average of 22%. White mycelium covered infected seeds with violet pigmentation occurring under the seeds. Microscopic observation confirmed the presence of Fusarium spp. Prior to isolation, seeds were surface disinfected in 1% NaOCl for 3 min, rinsed, dried, and plated onto potato dextrose agar (PDA). Plates were incubated at 25°C under ultraviolet light (“black light”) with a 12-h photoperiod (Mathur and Kongsdall 2003). Seven days later, 12 Fusarium spp. isolates (JBL 4003/1 to 4003/12) were single spored and subcultured on both PDA and carnation leaf agar. Pathogenicity testing was performed in vitro using a modified agar slant method in the test tube with PDA amended (Porter et al. 2015). A piece of mycelium of each isolate grown on PDA for 7 days was placed at the bottom of the tube, and 2 cm above the inoculum, dried hyssop seed was carefully placed. After 10 days, fungal mycelia of 12 isolates caused seed rot and seedling decay. All the tested isolates were reisolated and used for further analysis. The isolate JBL 4003/2 formed white, aerial, and abundant colonies, with light violet to brown pigmentation in agar. Within 5 days, microconidia were formed in the aerial mycelium, in long chains or cohering in false heads. Slightly curved rather straight macroconidia were formed, with a distinct foot cell, mostly three to five septate, with average dimensions of 31 to 53 × 3.4 to 4.1 µm. No chlamydospores were observed. Based on the description given by Gerlach and Nirenberg (1982), cultural and morphological characteristics indicated that the isolate JBL 4003/2 belongs to F. proliferatum. To obtain a DNA sequence-based identification, total DNA was extracted directly from the mycelium with a DNeasy Plant Mini Kit (Qiagen, Hilden, Germany). Following DNA extraction, the translation elongation factor 1-alpha region was amplified by PCR using the primer pair EF1 and EF2 (Geiser et al. 2004). Sequences were analyzed and BLAST searched against GenBank (http://blast.ncbi.nlm.nih.gov/) and FUSARIUM-ID (http://fusariumdb.org/). Isolate designated as JBL 4003/2 was deposited in the NCBI GenBank database under the accession number MK061541.1. BLASTn queries of GenBank and the Fusarium-ID database showed 100% identity to accession numbers KY801934.1 and MK507798.1, which belong to F. proliferatum (Matsushima) Nirenberg. Based on Koch’s postulates and sequence analysis, to our knowledge this is the first report of F. proliferatum as the causal agent of H. officinalis seed rot in Serbia.",2020-06-01 +34269623,The impact of disability on performance in a high-stakes postgraduate surgical examination: a retrospective cohort study.,"

Objective

Despite rising numbers of doctors in the workforce with disabilities, little is known about the impact of disabilities on postgraduate performance. To ensure all groups are treated fairly in surgical training, it is essential to know whether any attainment differences exist in markers of surgical performance. To address this gap, we assessed the impact of disabilities on performance on the Intercollegiate Membership of the Royal College of Surgeons examination (MRCS).

Design

Retrospective cohort study.

Setting

Secondary care.

Participants

All UK MRCS candidates attempting Part A (n = 9600) and Part B (n = 4560) between 2007 and 2017 with linked disability data in the UK Medical Education Database (https://www.ukmed.ac.uk) were included.

Main outcome measures

Chi-square tests and correlation coefficients established univariate associations with MRCS performance, while multiple logistic regressions identified independent predictors of success.

Results

Though MRCS Part B pass rates were similar (p = 0.339), candidates with registered disabilities had significantly lower first-attempt Part A pass rates (46.3% vs. 59.8%, p < 0.001). Candidates with disabilities also performed less well in examinations taken throughout school and medical school, and after adjusting for prior academic performance and sociodemographic predictors of success, logistic regression found that candidates with disabilities were no less likely to pass MRCS than their peers (odds ratio 1.04, 95% confidence interval 0.66 to 1.62). No significant variation was found in MRCS performance between type of disability or degree of limitations caused by disability (p > 0.05).

Conclusion

Although candidates with registered disabilities performed less well in formal, written examinations, our data indicate that they are as likely to pass MRCS at first attempt as their peers who achieved similar grades at high school and medical school. In order to enable equity in career progression, further work is needed to investigate the causes of attainment differences in early career assessments.",2021-07-16 +34240464,Ligand Binding Site Comparison - LiBiSCo - a web-based tool for analyzing interactions between proteins and ligands to explore amino acid specificity within active sites.,"Interaction between protein and ligands are ubiquitous in a biological cell, and understanding these interactions at the atom level in protein-ligand complexes is crucial for structural bioinformatics and drug discovery. Here, we present a web-based protein-ligand interaction application named Ligand Binding Site Comparison (LiBiSCo) for comparing the amino acid residues interacting with atoms of a ligand molecule between different protein-ligand complexes available in the Protein Data Bank (PDB) database. The comparison is performed at the ligand atom level irrespectively of having binding site similarity or not between the protein structures of interest. The input used in LiBiSCo is one or several PDB IDs of protein-ligand complex(es) and the tool returns a list of identified interactions at ligand atom level including both bonded and non-bonded interactions. A sequence profile for the interaction for each ligand atoms is provided as a WebLogo. The LiBiSco is useful in understanding ligand binding specificity and structural promiscuity among families that are structurally unrelated. The LiBiSCo tool can be accessed through https://albiorix.bioenv.gu.se/LiBiSCo/HomePage.py.",2021-08-02 +31584097,The International Genome Sample Resource (IGSR) collection of open human genomic variation resources.,"To sustain and develop the largest fully open human genomic resources the International Genome Sample Resource (IGSR) (https://www.internationalgenome.org) was established. It is built on the foundation of the 1000 Genomes Project, which created the largest openly accessible catalogue of human genomic variation developed from samples spanning five continents. IGSR (i) maintains access to 1000 Genomes Project resources, (ii) updates 1000 Genomes Project resources to the GRCh38 human reference assembly, (iii) adds new data generated on 1000 Genomes Project cell lines, (iv) shares data from samples with a similarly open consent to increase the number of samples and populations represented in the resources and (v) provides support to users of these resources. Among recent updates are the release of variation calls from 1000 Genomes Project data calculated directly on GRCh38 and the addition of high coverage sequence data for the 2504 samples in the 1000 Genomes Project phase three panel. The data portal, which facilitates web-based exploration of the IGSR resources, has been updated to include samples which were not part of the 1000 Genomes Project and now presents a unified view of data and samples across almost 5000 samples from multiple studies. All data is fully open and publicly accessible.",2020-01-01 +34976163,Process-specific technical data used in exposure assessment of food enzymes.,"Technical data for exposure assessment of food enzymes Dietary exposure is part of the overall assessment of food enzymes. In order to develop food process-based exposure models, a number of different input data are required in tandem with technical conversion factors. This allows for a combination of use levels with food consumption data, which are typically reported as consumed. The use levels are expressed as total organic solids/kg raw materials. For each food process, EFSA identified a list of food groups and collated technical conversion factors. To ensure uniform application of FoodEx food categories and technical conversion factors in the assessment of food enzyme dossiers, stakeholders were consulted via open calls-for-data. Feedback was analysed. This document reports the consolidated input parameters for each food process. Regular updates have been made on a yearly basis since 2018, as further process-specific parameters were generated. The consolidated input data have been used to calculate dietary exposure during the evaluation of food enzyme applications. As well as publishing the input parameters, process-specific calculators of the food enzyme intake models (FEIM) have also been developed on the basis of summary statistics. These calculators have been deposited at https://zenodo.org/ for open access.",2021-12-20 +33080015,NASA GeneLab: interfaces for the exploration of space omics data.,"The mission of NASA's GeneLab database (https://genelab.nasa.gov/) is to collect, curate, and provide access to the genomic, transcriptomic, proteomic and metabolomic (so-called 'omics') data from biospecimens flown in space or exposed to simulated space stressors, maximizing their utilization. This large collection of data enables the exploration of molecular network responses to space environments using a systems biology approach. We review here the various components of the GeneLab platform, including the new data repository web interface, and the GeneLab Online Data Entry (GEODE) web portal, which will support the expansion of the database in the future to include companion non-omics assay data. We discuss our design for GEODE, particularly how it promotes investigators providing more accurate metadata, reducing the curation effort required of GeneLab staff. We also introduce here a new GeneLab Application Programming Interface (API) specifically designed to support tools for the visualization of processed omics data. We review the outreach efforts by GeneLab to utilize the spaceflight data in the repository to generate novel discoveries and develop new hypotheses, including spearheading data analysis working groups, and a high school student training program. All these efforts are aimed ultimately at supporting precision risk management for human space exploration.",2021-01-01 +34156446,MetamORF: a repository of unique short open reading frames identified by both experimental and computational approaches for gene and metagene analyses. ,"The development of high-throughput technologies revealed the existence of non-canonical short open reading frames (sORFs) on most eukaryotic ribonucleic acids. They are ubiquitous genetic elements conserved across species and suspected to be involved in numerous cellular processes. MetamORF (https://metamorf.hb.univ-amu.fr/) aims to provide a repository of unique sORFs identified in the human and mouse genomes with both experimental and computational approaches. By gathering publicly available sORF data, normalizing them and summarizing redundant information, we were able to identify a total of 1 162 675 unique sORFs. Despite the usual characterization of ORFs as short, upstream or downstream, there is currently no clear consensus regarding the definition of these categories. Thus, the data have been reprocessed using a normalized nomenclature. MetamORF enables new analyses at locus, gene, transcript and ORF levels, which should offer the possibility to address new questions regarding sORF functions in the future. The repository is available through an user-friendly web interface, allowing easy browsing, visualization, filtering over multiple criteria and export possibilities. sORFs can be searched starting from a gene, a transcript and an ORF ID, looking in a genome area or browsing the whole repository for a species. The database content has also been made available through track hubs at UCSC Genome Browser. Finally, we demonstrated an enrichment of genes harboring upstream ORFs among genes expressed in response to reticular stress. Database URL  https://metamorf.hb.univ-amu.fr/.",2021-06-01 +34891750,A Platform for Integrating and Sharing Cancer Stem Cell Data.,"Advancements in cancer research and treatment have highlighted the need for standardization and sharing of cancer stem cell (CSC) data to facilitate research transparency and to promote collaboration within the scientific community. Although previous applications have attempted to gather and disseminate these data, currently no platform organizes the heterogeneous CSC information into a harmonized project-based framework. The aim of our platform, ReMeDy, is to provide an intelligent informatics solution integrating diverse CSC characteristics, outcomes information, and omics data across clinical, preclinical and in vitro studies. These heterogeneous data streams are organized within a multi-modular framework, subjected to a stringent validation by using standardized ontologies, and stored in a searchable format. To test usefulness of our approach for capturing diverse data related to CSCs, we integrated data from 52 publicly-available CSC projects. We validated the robustness of the platform, by efficiently organizing diverse data elements, and demonstrated its potential for promoting future knowledge discovery driven by aggregation of published data. Next steps include expanding number of uploaded CSC projects and developing additional data visualization tools. The platform is accessible through https://remedy.mssm.edu/.",2021-11-01 +34004273,Klebsiella MALDI TypeR: a web-based tool for Klebsiella identification based on MALDI-TOF mass spectrometry.,"Klebsiella pathogens affect human and animal health and are widely distributed in the environment. Among these, the Klebsiella pneumoniae species complex, which includes seven phylogroups, is an important cause of community and hospital infections. The Klebsiella oxytoca species complex also causes hospital infections and antibiotic-associated haemorrhagic colitis. The unsuitability of currently used clinical microbiology methods to distinguish species within each of these species complexes leads to high rates of misidentifications that are masking the true clinical significance and potential epidemiological specificities of individual species. We developed a web-based tool, Klebsiella MALDI TypeR, a platform-independent and user-friendly application that enables uploading MALDI-TOF mass spectrometry data in order to identify Klebsiella isolates at the species complex and phylogroup levels. The tool, available at https://maldityper.pasteur.fr/, leverages a database of previously identified biomarkers that are specific for species complexes, individual phylogroups, or related phylogroups. We obtained 84%-100% identification accuracy depending on phylogroup. Identification results are obtained in a few seconds from batches of uploaded spectral data. Klebsiella MALDI TypeR enables fast and reliable identification of Klebsiella strains that are often misidentified with standard microbiological methods. This web-based identification tool may be extended in the future to other human bacterial pathogens.",2021-05-15 +33694079,Using fuzzy string matching for automated assessment of listener transcripts in speech intelligibility studies.,"Many studies of speech perception assess the intelligibility of spoken sentence stimuli by means of transcription tasks ('type out what you hear'). The intelligibility of a given stimulus is then often expressed in terms of percentage of words correctly reported from the target sentence. Yet scoring the participants' raw responses for words correctly identified from the target sentence is a time-consuming task, and hence resource-intensive. Moreover, there is no consensus among speech scientists about what specific protocol to use for the human scoring, limiting the reliability of human scores. The present paper evaluates various forms of fuzzy string matching between participants' responses and target sentences, as automated metrics of listener transcript accuracy. We demonstrate that one particular metric, the token sort ratio, is a consistent, highly efficient, and accurate metric for automated assessment of listener transcripts, as evidenced by high correlations with human-generated scores (best correlation: r = 0.940) and a strong relationship to acoustic markers of speech intelligibility. Thus, fuzzy string matching provides a practical tool for assessment of listener transcript accuracy in large-scale speech intelligibility studies. See https://tokensortratio.netlify.app for an online implementation.",2021-03-10 +34700680,NCI Imaging Data Commons.,"

Purpose/objective(s)

National Cancer Institute (NCI) Cancer Research Data Commons (CRDC) aims to establish a cloud-based data science infrastructure. Imaging Data Commons (IDC) is a component of CRDC supported by the Cancer Moonshot™, which aims to enable access and exploration of de-identified imaging data, and to support integrated analyses with non-imaging data. IDC will interoperate with other components of CRDC, which include repositories of other types of data, such as genomics and proteomics repositories, and computational resources to perform analysis of the data. IDC builds on the strengths of the established efforts such as The Cancer Imaging Archive (TCIA) to collect and share FAIR (Findable Accessible Interoperable Reusable) imaging data.

Materials/methods

IDC uses a combination of commercially available tools and capabilities provided by Google Cloud Platform (GCP) together with a range of open-source components. While the initial focus is to support clinical radiology and radiotherapy data, IDC aims to provide similar capabilities for brightfield microscopy, multi-channel immunofluorescence and other imaging modalities. Equally important is the ability to support the results of imaging data analysis, such as annotations of regions of interest in the images or various descriptors of image findings. The IDC search portal provides an interface for exploring the data, defining cohorts, and summarizing attributes of the cohort. Images can be viewed in the integrated browser-based viewer, which uses DICOMweb to access the IDC data. IDC data is public and contains no Protected Health Information (PHI). As CDRC grows, imaging datasets will be increasingly cross-linked to genomic, proteomic, and clinical data about the subjects.

Results

The pilot of IDC was released in October 2020, including 28 collections of the TCIA: radiology images related to The Cancer Genome Atlas (TCGA) project, and several collections prioritized to establish the capabilities of IDC in handling image-derived data. DICOM and collection-level metadata is available from the BigQuery tables, and does not require a project configured with billing. The IDC portal is available at https://portal.imagingdatacommons.cancer.gov, and integrates a customized web viewer that supports visualization of both the images and image annotations (specifically, visualization of DICOM Segmentation and Radiotherapy Structure Set is supported, including multiplanar reformatting). IDC also provides documentation and a user forum.

Conclusion

The IDC pilot available to the cancer research community explores the promise of cloud-hosted public imaging collections co-located with the compute resources and a growing number of tools to support data analysis. Production release of IDC is planned for Fall 2021, and will include all of the public TCIA collections, including those that contain imaging and annotation data from radiotherapy studies and clinical trials.",2021-11-01 +31307376,PhenPath: a tool for characterizing biological functions underlying different phenotypes.,"

Background

Many diseases are associated with complex patterns of symptoms and phenotypic manifestations. Parsimonious explanations aim at reconciling the multiplicity of phenotypic traits with the perturbation of one or few biological functions. For this, it is necessary to characterize human phenotypes at the molecular and functional levels, by exploiting gene annotations and known relations among genes, diseases and phenotypes. This characterization makes it possible to implement tools for retrieving functions shared among phenotypes, co-occurring in the same patient and facilitating the formulation of hypotheses about the molecular causes of the disease.

Results

We introduce PhenPath, a new resource consisting of two parts: PhenPathDB and PhenPathTOOL. The former is a database collecting the human genes associated with the phenotypes described in Human Phenotype Ontology (HPO) and OMIM Clinical Synopses. Phenotypes are then associated with biological functions and pathways by means of NET-GE, a network-based method for functional enrichment of sets of genes. The present version considers only phenotypes related to diseases. PhenPathDB collects information for 18 OMIM Clinical synopses and 7137 HPO phenotypes, related to 4292 diseases and 3446 genes. Enrichment of Gene Ontology annotations endows some 87.7, 86.9 and 73.6% of HPO phenotypes with Biological Process, Molecular Function and Cellular Component terms, respectively. Furthermore, 58.8 and 77.8% of HPO phenotypes are also enriched for KEGG and Reactome pathways, respectively. Based on PhenPathDB, PhenPathTOOL analyzes user-defined sets of phenotypes retrieving diseases, genes and functional terms which they share. This information can provide clues for interpreting the co-occurrence of phenotypes in a patient.

Conclusions

The resource allows finding molecular features useful to investigate diseases characterized by multiple phenotypes, and by this, it can help researchers and physicians in identifying molecular mechanisms and biological functions underlying the concomitant manifestation of phenotypes. The resource is freely available at http://phenpath.biocomp.unibo.it .",2019-07-16 +30937442,The sncRNA Zoo: a repository for circulating small noncoding RNAs in animals.,"The repertoire of small noncoding RNAs (sncRNAs), particularly miRNAs, in animals is considered to be evolutionarily conserved. Studies on sncRNAs are often largely based on homology-based information, relying on genomic sequence similarity and excluding actual expression data. To obtain information on sncRNA expression (including miRNAs, snoRNAs, YRNAs and tRNAs), we performed low-input-volume next-generation sequencing of 500 pg of RNA from 21 animals at two German zoological gardens. Notably, none of the species under investigation were previously annotated in any miRNA reference database. Sequencing was performed on blood cells as they are amongst the most accessible, stable and abundant sources of the different sncRNA classes. We evaluated and compared the composition and nature of sncRNAs across the different species by computational approaches. While the distribution of sncRNAs in the different RNA classes varied significantly, general evolutionary patterns were maintained. In particular, miRNA sequences and expression were found to be even more conserved than previously assumed. To make the results available for other researchers, all data, including expression profiles at the species and family levels, and different tools for viewing, filtering and searching the data are freely available in the online resource ASRA (Animal sncRNA Atlas) at https://www.ccb.uni-saarland.de/asra/.",2019-05-01 +30907069,"""The Missing Link"": The Tubulin Mutation Database Connects Over 1500 Missense Mutations With Phenotypes Across Eukaryotes.","As outlined in their recent paper (A Tubulin Mutation Database: A Resource for the Cytoskeletal Community), Catherine Pham and Naomi Morrissette from the University of California, Irvine, scoured the literature and catalogued data for 489 point mutations for 𝛂-tubulin, 729 for β-tubulin, and 343 for 𝛄, ẟ, 𝛆, and 𝛇 tubulins to create the tubulin mutation database (http://tubulinmutations.bio.uci.edu). The database is a searchable catalog of missense mutations and phenotypes that is expected to grow with biannual updates. Data entries regarding the species and isoform, as well as links to available sequences and the original study which characterized the mutant are intuitively displayed and color coded (Pham & Morrissette, 2019). This database represents a unique opportunity for clinicians and cell biologists to rapidly connect sequence data to mutant phenotypes and gather primary literature which promises to facilitate discoveries on topics including microtubule dynamics, antimitotic drug use and resistance, and evolution. We expect that many researchers will find this tool of great use to their research. This article is protected by copyright. All rights reserved.",2019-02-01 +31349169,A curated knowledgebase on endocrine disrupting chemicals and their biological systems-level perturbations.,"Human well-being can be affected by exposure to several chemicals in the environment. One such group is endocrine disrupting chemicals (EDCs) that can perturb the hormonal homeostasis leading to adverse health effects. In this work, we have developed a detailed workflow to identify EDCs with supporting evidence of endocrine disruption in published experiments in humans or rodents. Thereafter, this workflow was used to manually evaluate more than 16,000 published research articles and identify 686 potential EDCs with published evidence in humans or rodents. Importantly, we have compiled the observed adverse effects or endocrine-specific perturbations along with the dosage information for the potential EDCs from their supporting published experiments. Subsequently, the potential EDCs were classified based on the type of supporting evidence, their environmental source and their chemical properties. Additional compiled information for potential EDCs include their chemical structure, physicochemical properties, predicted ADMET properties and target genes. In order to enable future research based on this compiled information on potential EDCs, we have built an online knowledgebase, Database of Endocrine Disrupting Chemicals and their Toxicity profiles (DEDuCT), accessible at: https://cb.imsc.res.in/deduct/. After building this comprehensive resource, we have performed a network-centric analysis of the chemical space and the associated biological space of target genes of EDCs. Specifically, we have constructed two networks of EDCs using our resource based on similarity of chemical structures or target genes. Ensuing analysis revealed a lack of correlation between chemical structure and target genes of EDCs. Though our detailed results highlight potential challenges in developing predictive models for EDCs, the compiled information in our resource will undoubtedly enable future research in the field, especially, those focussed towards mechanistic understanding of the systems-level perturbations caused by EDCs.",2019-07-16 +34226524,Determination of the dynamic cellular transcriptional profiles during kidney development from birth to maturity in rats by single-cell RNA sequencing.,"Recent single-cell RNA sequencing (scRNA-seq) analyses have offered much insight into the gene expression profiles in early-stage kidney development. However, comprehensive gene expression profiles from mid- and late-stage kidney development are lacking. In the present study, by using the scRNA-seq technique, we analyzed 54,704 rat kidney cells from just after birth to adulthood (six time points: postnatal days 0, 2, 5, 10, 20, and 56) including the mid and late stages of kidney development. Twenty-five original clusters and 13 different cell types were identified during these stages. Gene expression in these 13 cell types was mapped, and single cell atlas of the rat kidney from birth to maturity ( http://youngbearlab.com ) was built to enable users to search for a gene of interest and to evaluate its expression in different cells. The variation trend of six major types of kidney cells-intercalated cells of the collecting duct (CD-ICs), principal cells of the collecting duct (CD-PCs), cells of the distal convoluted tubules (DCTs), cells of the loop of Henle (LOH), podocytes (PDs), and cells of the proximal tubules (PTs)-during six postnatal time points was demonstrated. The trajectory of rat kidney development and the order of induction of the six major types of kidney cells from just after birth to maturity were determined. In addition, features of the dynamically changing genes as well as transcription factors during postnatal rat kidney development were identified. The present study provides a resource for achieving a deep understanding of the molecular basis of and regulatory events in the mid and late stages of kidney development.",2021-06-24 +32479913,Scoring System to Triage Patients for Spine Surgery in the Setting of Limited Resources: Application to the Coronavirus Disease 2019 (COVID-19) Pandemic and Beyond.,"

Background

As of May 4, 2020, the coronavirus disease 2019 (COVID-19) pandemic has affected >3.5 million people and touched every inhabited continent. Accordingly, it has stressed health systems worldwide, leading to the cancellation of elective surgical cases and discussions regarding health care resource rationing. It is expected that rationing of surgical resources will continue even after the pandemic peak and may recur with future pandemics, creating a need for a means of triaging patients for emergent and elective spine surgery.

Methods

Using a modified Delphi technique, a cohort of 16 fellowship-trained spine surgeons from 10 academic medical centers constructed a scoring system for the triage and prioritization of emergent and elective spine surgeries. Three separate rounds of videoconferencing and written correspondence were used to reach a final scoring system. Sixteen test cases were used to optimize the scoring system so that it could categorize cases as requiring emergent, urgent, high-priority elective, or low-priority elective scheduling.

Results

The devised scoring system included 8 independent components: neurologic status, underlying spine stability, presentation of a high-risk postoperative complication, patient medical comorbidities, expected hospital course, expected discharge disposition, facility resource limitations, and local disease burden. The resultant calculator was deployed as a freely available Web-based calculator (https://jhuspine3.shinyapps.io/SpineUrgencyCalculator/).

Conclusions

We present the first quantitative urgency scoring system for the triage and prioritizing of spine surgery cases in resource-limited settings. We believe that our scoring system, although not all encompassing, has potential value as a guide for triaging spine surgical cases during the COVID pandemic and post-COVID period.",2020-05-29 +34588260,Publication practices of sub-Saharan African Cochrane authors: a bibliometric study.,"

Introduction

Cochrane Africa (https://africa.cochrane.org/) aims to increase Cochrane reviews addressing high priority questions in sub-Saharan Africa (SSA). Researchers residing in SSA, despite often drawing on Cochrane methods, training or resources, conduct and publish systematic reviews outside of Cochrane. Our objective was to investigate the extent to which Cochrane authors from SSA publish Cochrane and non-Cochrane reviews.

Methods

We conducted a bibliometric study of systematic reviews and overviews of systematic reviews from SSA, first by identifying SSA Cochrane authors, then retrieving their first and last author systematic reviews and overviews from PubMed (2008 to April 2019) and using descriptive analyses to investigate the country of origin, types of reviews and trends in publishing Cochrane and non-Cochrane systematic reviews over time. To be eligible, a review had to have predetermined objectives, eligibility criteria, at least two databases searched, data extraction, quality assessment and a first or last author with a SSA affiliation.

Results

We identified 657 Cochrane authors and 757 eligible systematic reviews. Most authors were from South Africa (n=332; 51%), followed by Nigeria (n=126; 19%). Three-quarters of the reviews (71%) were systematic reviews of interventions. The intervention reviews were more likely to be Cochrane reviews (60.3% vs 39.7%). Conversely, the overviews (23.8% vs 76.2%), qualitative reviews (14.8% vs 85.2%), diagnostic test accuracy reviews (16.1% vs 83.9%) and the 'other' reviews (11.1% vs 88.9%) were more likely to be non-Cochrane reviews. During the study period, the number of non-Cochrane reviews increased more than the number of Cochrane reviews. About a quarter of the reviews covered infectious disease topics.

Conclusion

Cochrane authors from SSA are increasingly publishing a diverse variety of systematic reviews and overviews of systematic reviews, often opting for non-Cochrane journals.",2021-09-28 +32098967,Regulatory sites for splicing in human basal ganglia are enriched for disease-relevant information.,"Genome-wide association studies have generated an increasing number of common genetic variants associated with neurological and psychiatric disease risk. An improved understanding of the genetic control of gene expression in human brain is vital considering this is the likely modus operandum for many causal variants. However, human brain sampling complexities limit the explanatory power of brain-related expression quantitative trait loci (eQTL) and allele-specific expression (ASE) signals. We address this, using paired genomic and transcriptomic data from putamen and substantia nigra from 117 human brains, interrogating regulation at different RNA processing stages and uncovering novel transcripts. We identify disease-relevant regulatory loci, find that splicing eQTLs are enriched for regulatory information of neuron-specific genes, that ASEs provide cell-specific regulatory information with evidence for cellular specificity, and that incomplete annotation of the brain transcriptome limits interpretation of risk loci for neuropsychiatric disease. This resource of regulatory data is accessible through our web server, http://braineacv2.inf.um.es/.",2020-02-25 +33968730,The Immune-Related Gene HCST as a Novel Biomarker for the Diagnosis and Prognosis of Clear Cell Renal Cell Carcinoma.,"Clear cell renal cell carcinoma (ccRCC) is the most common type of kidney tumor worldwide. Analysis of The Cancer Genome Atlas (TCGA) and Gene Expression Omnibus (GEO) databases showed that the immune-related gene (IRG) hematopoietic cell signal transducer (HCST) could provide guidance for the diagnosis, prognosis, and treatment of ccRCC. The RNA-seq data of ccRCC tissues were extracted from two databases: TCGA (https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga) and GEO (https://www.ncbi.nlm.nih.gov/geo/). Corresponding clinical information was downloaded from TCGA. Immune-related gene data were extracted from the IMMPORT website (https://www.immport.org/). Differential analysis with R software (https://www.r-project.org/) was used to obtain a prognosis model of ccRCC IRGs. The differences were combined with the clinical data to assess the usefulness of the HCST as a prognostic biomarker. Based on data obtained from the Oncomine (https://www.oncomine.org/), Human Protein Atlas (https://www.proteinatlas.org/), and PubMed (https://pubmed.ncbi.nlm.nih.gov/) databases, the expression levels of the HCST in ccRCC, clinical-pathological indicators of relevance, and influence on prognosis were analyzed. Regulation of the HCST gene in ccRCC was assessed by gene set enrichment analysis (GSEA). In TCGA/GEO databases, the high HCST expression in tumor tissues was significantly correlated to the TMN stage, tumor grade, invasion depth, and lymphatic metastasis (p < 0.05). The overall survival (OS) of patients with high HCST gene expression was significantly lower than that of patients with low HCST gene expression (p < 0.001). Multivariate Cox regression analysis suggested that the HCST expression level [hazard ratio (HR) = 1.630, 95% confidence interval (CI) = 1.042-2.552], tumor cell grade (HR = 1.829, 95% CI = 1.115-3.001), and distant metastasis (HR = 2.634, 95%, CI = 1.562-4.442) were independent risk factors affecting the OS of ccRCC patients (all, p < 0.05). The GSEA study showed that there was significant enrichment in cell adhesion, tumorigenesis, and immune and inflammatory responses in HCST high expression samples. Hematopoietic cell signal transducer expression was closely associated with the levels of infiltrating immune cells around ccRCC tissues, especially dendritic cells (DCs). In conclusion, the present study suggested that the HCST was interrelated to the clinicopathology and poor prognosis of ccRCC. High HCST expression was also closely correlated with the levels of tumor-infiltrating immune cells, especially DCs.",2021-04-23 +,18069 WISE Indiana (Wellbeing Informed by Science and Evidence in Indiana) - A state-university partnership response to the pandemic,"ABSTRACT IMPACT: The WISE Indiana COVID-19 project facilitates rapid response and access to relevant and emerging evidence-based information for state personnel, healthcare providers and systems, managed care entities, community organizations, and all others involved in a professional capacity with the pandemic response. OBJECTIVES/GOALS: The COVID-19 project was developed to assist in responding to the Indiana Department of Health’s need for rapid and evidence-informed responses to complex questions about the pandemic and best practices for preventing, mitigating, monitoring and recovering from the COVID-19 global pandemic. METHODS/STUDY POPULATION: The WISE Indiana team was activated to assist in managing the project and immediately connected with university research librarians. Through our established networks, we were able to quickly engage academic researchers and clinicians across the state to rapidly respond to key questions about COVID-19 from government leadership. Research librarians added their expertise by conducting comprehensive searches of evidence-based clinical, public health, policy, and law literature and writing up detailed annotated bibliographies. Academic experts were also recruited to write daily summaries of emerging COVID-19 literature for the benefit of Indiana’s frontline responders and build and maintain an online repository of evidence-based learning materials for practitioners on the front lines. RESULTS/ANTICIPATED RESULTS: This work has informed key decision-making at many levels of Indiana’s COVID-19 response. Examples include data modeling for the IN.gov COVID-19 Dashboard, the allocation of Remdesivir, decisions about resuming elective procedures, and strategies for scaling back mitigation efforts. The WISE Indiana team has been able to engage over 40 academic experts from across the state of Indiana with expertise in pulmonary, infectious disease, law, epidemiology, mental health, public health, policy, and communications to assist in responding to key questions posed by government leadership and writing summaries of emerging COVID-19 literature which is summarized and accessible through our website: https://indianactsi.org/community/monon-collaborative/covid-19/. DISCUSSION/SIGNIFICANCE OF FINDINGS: The bidirectional exchange of information through the WISE Indiana collaborative network enable our team to quickly pivot to respond to the needs of our government leadership. Our team was able to rapidly translate the evidence-based information in order to respond to the policy and health outcomes needs of the state’s response to the global pandemic.",2021-03-30 +30810209,Tetrahymena Comparative Genomics Database (TCGD): a community resource for Tetrahymena. ,"Ciliates are a large and diverse group of unicellular organisms characterized by having the following two distinct type of nuclei within a single cell: micronucleus (MIC) and macronucleus (MAC). Although the genomes of several ciliates in different groups have been sequenced, comparative genomics data for multiple species within a ciliate genus are not yet available. Here we collected the genome information and comparative genomics analysis results for 10 species in the Tetrahymena genus, including the previously sequenced model organism Tetrahymena thermophila and 9 newly sequenced species, and constructed a genus-level comparative analysis platform, the Tetrahymena Comparative Genomics Database (TCGD). Genome sequences, transcriptomic data, gene models, functional annotation, ortholog groups and synteny maps were built into this database and a user-friendly interface was developed for searching, visualizing and analyzing these data. In summary, the TCGD (http://ciliate.ihb.ac.cn) will be an important and useful resource for the ciliate research community.",2019-01-01 +33313778,VIPERdb v3.0: a structure-based data analytics platform for viral capsids.,"VIrus Particle ExploreR data base (VIPERdb) (http://viperdb.scripps.edu) is a curated repository of virus capsid structures and a database of structure-derived data along with various virus specific information. VIPERdb has been continuously improved for over 20 years and contains a number of virus structure analysis tools. The release of VIPERdb v3.0 contains new structure-based data analytics tools like Multiple Structure-based and Sequence Alignment (MSSA) to identify hot-spot residues within a selected group of structures and an anomaly detection application to analyze and curate the structure-derived data within individual virus families. At the time of this writing, there are 931 virus structures from 62 different virus families in the database. Significantly, the new release also contains a standalone database called 'Virus World database' (VWdb) that comprises all the characterized viruses (∼181 000) known to date, gathered from ICTVdb and NCBI, and their capsid protein sequences, organized according to their virus taxonomy with links to known structures in VIPERdb and PDB. Moreover, the new release of VIPERdb includes a service-oriented data engine to handle all the data access requests and provides an interface for futuristic data analytics using machine leaning applications.",2021-01-01 +33904784,Cost-Utility of Group Versus Individual Acupuncture for Cancer-Related Pain Using Quality-Adjusted Life Years in a Noninferiority Trial.,"Introduction: Individual acupuncture (AP) is the gold standard method of AP delivery for cancer-related pain; however, costs can be prohibitive. Group AP allows four to six patients to be treated in a single session. This study sought to examine the cost-utility of group AP compared with individual AP from a patient perspective. Materials and Methods: Effectiveness and cost data from a noninferiority randomized trial of group versus individual AP for cancer-related pain were used. In the trial, 74 patients were randomly assigned to individual or group AP treatments twice per week for 6 weeks. The EuroQol five-dimension five level questionnaire (EQ-5D-5L) was used to assess health-related quality of life, and the EQ-5D Utility Index was used as a composite measure constituted of five domains (mobility, self-care, usual activities, anxiety-depression, and pain-discomfort). Linear mixed models were used to compare the change in EQ-5D-5L states pre-post intervention between the two arms. A cost-utility analysis was performed in terms of the incremental costs per additional quality-adjusted life year (QALY) gained. Results: Group AP participants experienced more significant relief in the pain-discomfort subscale of the EQ-5D-5L measure compared with individual AP participants (group × time, F = 6.18; p = 0.02). The effect size on pain-discomfort for group AP (d = 0.80) was higher than that of individual AP (d = 0.34). There were no significant differences between the two study arms for other subscales of the EQ-5D-5L over time. QALYs at 6 weeks were slightly higher for group AP (0.020) compared with individual AP (0.007) leading to an incremental QALY gained by the group arm of 0.013, but this difference was not statistically significant (p = 0.07). The cost of delivering AP treatment for the group arm over 6 weeks ($201.25) was nearly half of the individual arm ($400). Conclusions: Group AP was superior to individual AP in cancer patients. These findings have implications for the use of group AP in low-resource settings and in health care systems where AP for cancer patients is not covered by public health insurance. ClinicalTrials.gov (NCT03641222). Registered July 10, 2018-Retrospectively registered, https://clinicaltrials.gov/ct2/show/study/NCT03641222.",2021-04-27 +34024305,"Peer review of searches for studies for health technology assessments, systematic reviews, and other evidence syntheses.","

Introduction

Peer review of searches is a process whereby both the search strategies and the search process description are reviewed, ideally using an evidence-based checklist.

Rationale

As the search strategy underpins any well-conducted evidence synthesis, its quality could affect the final result. Evidence shows, however, that search strategies are prone to error.

Findings

There is increasing awareness and use of the PRESS Evidence-Based Checklist and peer review of search strategies, at the outset of evidence syntheses, prior to the searches being run, and this is now recommended by a number of evidence synthesis organizations.

Recommendations and conclusions

Searches for evidence syntheses should be peer reviewed by a suitably qualified and experienced librarian or information specialist after being designed, ideally, by another suitably qualified and experienced librarian or information specialist. Peer review of searches should take place at two important stages in the evidence synthesis process; at the outset of the project prior to the searches being run and at the prepublication stage. There is little empirical evidence, however, to support the effectiveness of peer review of searches. Further research is required to assess this. Those wishing to stay up to date with the latest developments in information retrieval, including peer review of searches, should consult the SuRe Info resource (http://www.sure-info.org), which seeks to help information specialists and others by providing easy access to the findings from current information retrieval methods research and thus support more research-based information retrieval practice.",2021-05-24 +34419470,CovidPhy: A tool for phylogeographic analysis of SARS-CoV-2 variation.,"The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) is the pathogen responsible for the coronavirus disease 2019 (COVID-19) pandemic. SARS-CoV-2 genomes have been sequenced massively and worldwide and are now available in different public genome repositories. There is much interest in generating bioinformatic tools capable to analyze and interpret SARS-CoV-2 variation. We have designed CovidPhy (http://covidphy.eu), a web interface that can process SARS-CoV-2 genome sequences in plain fasta text format or provided through identity codes from the Global Initiative on Sharing Avian Influenza Data (GISAID) or GenBank. CovidPhy aggregates information available on the large GISAID database (>1.49 M genomes). Sequences are first aligned against the reference sequence and the interface provides different sources of information, including automatic classification of genomes into a pre-computed phylogeny and phylogeographic information, haplogroup/lineage frequencies, and sequencing variation, indicating also if the genome contains known variants of concern (VOC). Additionally, CovidPhy allows searching for variants and haplotypes introduced by the user and includes a list of genomes that are good candidates for being responsible for large outbreaks worldwide, most likely mediated by important superspreading events, indicating their possible geographic epicenters and their relative impact as recorded in the GISAID database.",2021-08-20 +33174603,Datanator: an integrated database of molecular data for quantitatively modeling cellular behavior.,"Integrative research about multiple biochemical subsystems has significant potential to help advance biology, bioengineering and medicine. However, it is difficult to obtain the diverse data needed for integrative research. To facilitate biochemical research, we developed Datanator (https://datanator.info), an integrated database and set of tools for finding clouds of multiple types of molecular data about specific molecules and reactions in specific organisms and environments, as well as data about chemically-similar molecules and reactions in phylogenetically-similar organisms in similar environments. Currently, Datanator includes metabolite concentrations, RNA modifications and half-lives, protein abundances and modifications, and reaction rate constants about a broad range of organisms. Going forward, we aim to launch a community initiative to curate additional data. Datanator also provides tools for filtering, visualizing and exporting these data clouds. We believe that Datanator can facilitate a wide range of research from integrative mechanistic models, such as whole-cell models, to comparative data-driven analyses of multiple organisms.",2021-01-01 +33750020,KnetMiner: a comprehensive approach for supporting evidence-based gene discovery and complex trait analysis across species.,"The generation of new ideas and scientific hypotheses is often the result of extensive literature and database searches, but, with the growing wealth of public and private knowledge, the process of searching diverse and interconnected data to generate new insights into genes, gene networks, traits and diseases is becoming both more complex and more time-consuming. To guide this technically challenging data integration task and to make gene discovery and hypotheses generation easier for researchers, we have developed a comprehensive software package called KnetMiner which is open-source and containerized for easy use. KnetMiner is an integrated, intelligent, interactive gene and gene network discovery platform that supports scientists explore and understand the biological stories of complex traits and diseases across species. It features fast algorithms for generating rich interactive gene networks and prioritizing candidate genes based on knowledge mining approaches. KnetMiner is used in many plant science institutions and has been adopted by several plant breeding organizations to accelerate gene discovery. The software is generic and customizable and can therefore be readily applied to new species and data types; for example, it has been applied to pest insects and fungal pathogens; and most recently repurposed to support COVID-19 research. Here, we give an overview of the main approaches behind KnetMiner and we report plant-centric case studies for identifying genes, gene networks and trait relationships in Triticum aestivum (bread wheat), as well as, an evidence-based approach to rank candidate genes under a large Arabidopsis thaliana QTL. KnetMiner is available at: https://knetminer.org.",2021-04-05 +34817215,The ATCC Genome Portal: Microbial Genome Reference Standards with Data Provenance.,"Lack of data provenance negatively impacts scientific reproducibility and the reliability of genomic data. The ATCC Genome Portal (https://genomes.atcc.org) addresses this by providing data provenance information for microbial whole-genome assemblies originating from authenticated biological materials. To date, we have sequenced 1,579 complete genomes, including 466 type strains and 1,156 novel genomes.",2021-11-24 +,Is Coffee Intake Associated with Obesity-Related Traits? . A Mendelian Randomization/Pleiotropy Approach Using United Kingdom Biobank (UKBB) Database,"Abstract

Objectives

Epidemiological studies suggest that coffee intake (CI) is protective against body weight gain. We explored whether genetic determinants of CI are associated with obesity-related phenotypic traits, primarily body mass Index (BMI).

Methods

We leveraged information from ∼354,000 individuals in the UKBB database (https://genetics.opentargets.org/) searching for genetic variants associated with CI (cutoff P < 0.5E-8). We further explored the association of these variants with BMI and other obesity-related traits (body fat percentage-BFP, obesity or waist circumference-WC) using summarized data from Neale's lab (http://www.nealelab.is/uk-biobank/).

Results

Twenty seven variants were significantly associated with CI, including rs2472297-CYP1A1/2 (P = 3.4E-116, beta(b) = 0.047) and rs4410790-AHR- (P = 3.2E-95, b = 0.039), which were previously reported to be associated with CI. Seventeen variants showed significant associations with BMI in the same direction (i.e., rs2472297- CYP1A1/2, P = 1.2E-6, b = 0.06; rs4410790-AHR, P = 1.7E-4, b = 0.04; rs589500-SEC16B, P = 2.5E-59, b = 0.22; rs1260326-GCKR, P = 8.0E-6, b = 0.05; rs3814424-MEF2C, P = 6.3E-20, b = 0.14; rs1189470082-AL355997.1, P = 4.3E-8, b = 0.07; rs9398171-LINC00222/FOXO3, P = 6.1E-10, b = 0.08; rs370535199-KBTBD2, P = 9.2E-5, b = 0.045; rs1057868-POR, P = 9.9E-6, b = 0.055; rs56094641-FTO, P = 2.1E-219, b = 0.26). The remaining 9 variants showed no associations with any obesity-related trait. One variant (rs57918684-MED13) showed a marginal and opposite effect. Beta coefficients for CI and BMI were significantly correlated (Spearman R: 0.69, P < 0.0001), which is compatible with a significant genetic correlation between both traits (rg = 0.24 ± 0.02, P = 4.06E-23). The positive association between CI and BMI is biologically supported by genetic correlations between CI and food intake (rg = 0.26 ± 0.07, P = 1.0E-4), BFP (rg = 0.16 ± 0.02, P = 2.54E-13) and WC (rg = 0.23 ± 0.02, P = 2.0E-22).

Conclusions

Variants associated with CI present direct pleiotropic effects on obesity-related traits such as BMI, BFP, and WC. If these are not causal relations, then from a Mendelian Randomization point of view, CI has an undesirable effect.

Funding Sources

Supported by grants from the National Agency for Scientific and Technological Promotion and the National Scientific and Technical Research Council (Argentina).",2020-05-29 +,Anxiety and Fear During the Covid-19 Pandemic: A Web-Based Survey of Thyroid Cancer Survivors,"Abstract Background: The coronavirus (COVID-19) pandemic has led to rapid changes in our society and healthcare system. Cancer patients and survivors may be disproportionately affected by these changes, including decreased access to healthcare, increased infection risk, and economic challenges. We sought to determine the effects of the pandemic on thyroid cancer survivors’ quality of life. Methods: An anonymous web-based survey was administered in collaboration with ThyCa: Thyroid Cancer Survivors’ Association, consisting of questions about (1) demographics, (2) thyroid cancer clinical characteristics, (3) attitudes toward and impact of COVID-19, and (4) the Patient-Reported Outcomes Measurement Information System (PROMIS) 29-item profile. The survey was linked on the ThyCa homepage. PROMIS measures were scored using item response theory models with a T-score metric relative to U.S. reference data via the HealthMeasures Scoring Service (https://www.healthmeasures.net). T-scores were analyzed using Mann-Whitney U, Wilcoxon signed-rank, Kruskal-Wallis, and Spearman’s rank correlation tests. Results: From 5/6/2020 - 10/8/2020, 505 participants accessed the survey, and all completed surveys by U.S.-based thyroid cancer survivors were analyzed (n=378, 75%). Mean age was 53 years, 89% were female, 90% were white, 74% had papillary thyroid cancer, 97% had surgery, and 70% received radioactive iodine. The vast majority agreed or strongly agreed (83%) that their lives were very different during COVID-19, as was the way they interacted with their doctors (79%). Less than half (43%) agreed or strongly agreed that they were satisfied with the amount of information from their doctor’s office regarding COVID-19 changes. Compared to previously-published PROMIS data for this population, T-scores were significantly higher in the domain of anxiety/fear (57.8 vs. 56.5, p<0.01) and lower for ability to participate in social roles and activities (46.2 vs. 48.1, p<0.01). Younger age was weakly correlated with greater anxiety/fear (Spearman’s rho=-0.38, p<0.01), and greater anxiety/fear was associated with pending treatment (p<0.01), lower cancer stage (p=0.01), and female sex (p=0.02). Conclusions: During the COVID-19 pandemic, thyroid cancer survivors reported increased anxiety/fear and decreased social participation. In our efforts to care for patients both physically and mentally as the pandemic continues, we must better understand their fears and concerns and improve communication about potential changes to their care.",2021-01-01 +34386815,A nomenclature for echinoderm genes.,"Echinoderm embryos and larvae are prominent experimental model systems for studying developmental mechanisms. High-quality, assembled, annotated genome sequences are now available for several echinoderm species, including representatives from most classes. The increased availability of these data necessitates the development of a nomenclature that assigns universally interpretable gene symbols to echinoderm genes to facilitate cross-species comparisons of gene functions, both within echinoderms and across other phyla. This paper describes the implementation of an improved set of echinoderm gene nomenclature guidelines that both communicates meaningful orthology information in protein-coding gene symbols and names and establishes continuity with nomenclatures developed for major vertebrate model organisms, including humans. Differences between the echinoderm gene nomenclature guidelines and vertebrate guidelines are examined and explained. This nomenclature incorporates novel solutions to allow for several types of orthologous relationships, including the single echinoderm genes with multiple vertebrate co-orthologs that result from whole-genome-duplication events. The current version of the Echinoderm Gene Nomenclature Guidelines can be found at https://www.echinobase.org/gene/static/geneNomenclature.jsp Database URL https://www.echinobase.org/.",2021-08-01 +34726489,"Structure-Aware Mycobacterium tuberculosis Functional Annotation Uncloaks Resistance, Metabolic, and Virulence Genes.","Accurate and timely functional genome annotation is essential for translating basic pathogen research into clinically impactful advances. Here, through literature curation and structure-function inference, we systematically update the functional genome annotation of Mycobacterium tuberculosis virulent type strain H37Rv. First, we systematically curated annotations for 589 genes from 662 publications, including 282 gene products absent from leading databases. Second, we modeled 1,711 underannotated proteins and developed a semiautomated pipeline that captured shared function between 400 protein models and structural matches of known function on Protein Data Bank, including drug efflux proteins, metabolic enzymes, and virulence factors. In aggregate, these structure- and literature-derived annotations update 940/1,725 underannotated H37Rv genes and generate hundreds of functional hypotheses. Retrospectively applying the annotation to a recent whole-genome transposon mutant screen provided missing function for 48% (13/27) of underannotated genes altering antibiotic efficacy and 33% (23/69) required for persistence during mouse tuberculosis (TB) infection. Prospective application of the protein models enabled us to functionally interpret novel laboratory generated pyrazinamide (PZA)-resistant mutants of unknown function, which implicated the emerging coenzyme A depletion model of PZA action in the mutants' PZA resistance. Our findings demonstrate the functional insight gained by integrating structural modeling and systematic literature curation, even for widely studied microorganisms. Functional annotations and protein structure models are available at https://tuberculosis.sdsu.edu/H37Rv in human- and machine-readable formats. IMPORTANCE Mycobacterium tuberculosis, the primary causative agent of tuberculosis, kills more humans than any other infectious bacterium. Yet 40% of its genome is functionally uncharacterized, leaving much about the genetic basis of its resistance to antibiotics, capacity to withstand host immunity, and basic metabolism yet undiscovered. Irregular literature curation for functional annotation contributes to this gap. We systematically curated functions from literature and structural similarity for over half of poorly characterized genes, expanding the functionally annotated Mycobacterium tuberculosis proteome. Applying this updated annotation to recent in vivo functional screens added functional information to dozens of clinically pertinent proteins described as having unknown function. Integrating the annotations with a prospective functional screen identified new mutants resistant to a first-line TB drug, supporting an emerging hypothesis for its mode of action. These improvements in functional interpretation of clinically informative studies underscore the translational value of this functional knowledge. Structure-derived annotations identify hundreds of high-confidence candidates for mechanisms of antibiotic resistance, virulence factors, and basic metabolism and other functions key in clinical and basic tuberculosis research. More broadly, they provide a systematic framework for improving prokaryotic reference annotations.",2021-11-02 +33599246,Curation of over 10 000 transcriptomic studies to enable data reuse. ,"Vast amounts of transcriptomic data reside in public repositories, but effective reuse remains challenging. Issues include unstructured dataset metadata, inconsistent data processing and quality control, and inconsistent probe-gene mappings across microarray technologies. Thus, extensive curation and data reprocessing are necessary prior to any reuse. The Gemma bioinformatics system was created to help address these issues. Gemma consists of a database of curated transcriptomic datasets, analytical software, a web interface and web services. Here we present an update on Gemma's holdings, data processing and analysis pipelines, our curation guidelines, and software features. As of June 2020, Gemma contains 10 811 manually curated datasets (primarily human, mouse and rat), over 395 000 samples and hundreds of curated transcriptomic platforms (both microarray and RNA sequencing). Dataset topics were represented with 10 215 distinct terms from 12 ontologies, for a total of 54 316 topic annotations (mean topics/dataset = 5.2). While Gemma has broad coverage of conditions and tissues, it captures a large majority of available brain-related datasets, accounting for 34% of its holdings. Users can access the curated data and differential expression analyses through the Gemma website, RESTful service and an R package. Database URL: https://gemma.msl.ubc.ca/home.html.",2021-02-01 +34888626,HolistIC: leveraging Hi-C and whole genome shotgun sequencing for double minute chromosome discovery. ,"Double minute chromosomes are acentric extrachromosomal DNA artifacts that are frequently observed in the cells of numerous cancers. They are highly amplified and contain oncogenes and drug resistance genes, making their presence a challenge for effective cancer treatment. Algorithmic discovery of double minutes (DM) can potentially improve bench-derived therapies for cancer treatment. A hindrance to this task is that DMs evolve, yielding circular chromatin that shares segments from progenitor double minutes. This creates double minutes with overlapping amplicon coordinates. Existing DM discovery algorithms use whole genome shotgun sequencing in isolation, which can potentially incorrectly classify DMs that share overlapping coordinates. In this study, we describe an algorithm called ""HolistIC"" that can predict double minutes in tumor genomes by integrating whole genome shotgun sequencing (WGS) and Hi-C sequencing data. The consolidation of these sources of information resolves ambiguity in double minute amplicon prediction that exists in DM prediction with WGS data used in isolation. We implemented and tested our algorithm on the tandem Hi-C and WGS datasets of three cancer datasets and a simulated dataset. Results on the cancer datasets demonstrated HolistIC's ability to predict DMs from Hi-C and WGS data in tandem. The results on the simulated data showed the HolistIC can accurately distinguish double minutes that have overlapping amplicon coordinates, an advance over methods that predict extrachromosomal amplification using WGS data in isolation. Our software, named ""HolistIC"", is available at http://www.github.com/mhayes20/HolistIC. Supplementary data are available at Bioinformatics online.",2021-12-09 +31161214,WDSPdb: an updated resource for WD40 proteins.,"

Summary

The WD40-repeat proteins are a large family of scaffold molecules that assemble complexes in various cellular processes. Obtaining their structures is the key to understanding their interaction details. We present WDSPdb 2.0, a significantly updated resource providing accurately predicted secondary and tertiary structures and featured sites annotations. Based on an optimized pipeline, WDSPdb 2.0 contains about 600 thousand entries, an increase of 10-fold, and integrates more than 37 000 variants from sources of ClinVar, Cosmic, 1000 Genomes, ExAC, IntOGen, cBioPortal and IntAct. In addition, the web site is largely improved for visualization, exploring and data downloading.

Availability and implementation

http://www.wdspdb.com/wdsp/ or http://wu.scbb.pkusz.edu.cn/wdsp/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +31265791,Linguistic Materials and Metrics for the Creation of Well-Controlled Swedish Speech Perception Tests.,"Purpose As factors influencing human word perception are important in the construction of speech perception tests used within the speech and hearing sciences, the purposes of this study were as follows: first, to develop algorithms that can be used to calculate different types of word metrics that influence the speed and accuracy of word perception and, second, to create a database in which those word metrics were calculated for a large set of Swedish words. Method Based on a revision of a large Swedish phonetic dictionary, data and algorithms were developed by which various frequency metrics, word length metrics, semantic metrics, neighborhood metrics, phonotactic metrics, and orthographic transparency metrics were calculated for each word in the dictionary. Of the various word metric algorithms used, some were Swedish language reimplementations of previously published algorithms, and some were developed in this study. Results The results of this study have been gathered in a Swedish word metric database called the AFC-list. The AFC-list consists of 816,404 phonetically transcribed Swedish words, all supplied with the word metric data calculated. The full AFC-list has been made publicly available under the Creative Commons Attribution 4.0 International license. Conclusion The results of this study constitute an extensive linguistic resource for the process of selecting test items in new well-controlled speech perception tests in the Swedish language. Supplemental Material https://doi.org/10.23641/asha.8330009.",2019-07-02 +31805986,The SMART App: an interactive web application for comprehensive DNA methylation analysis and visualization.,"BACKGROUND:Data mining of The Cancer Genome Atlas (TCGA) data has significantly facilitated cancer genome research and provided unprecedented opportunities for cancer researchers. However, existing web applications for DNA methylation analysis does not adequately address the need of experimental biologists, and many additional functions are often required. RESULTS:To facilitate DNA methylation analysis, we present the SMART (Shiny Methylation Analysis Resource Tool) App, a user-friendly and easy-to-use web application for comprehensively analyzing the DNA methylation data of TCGA project. The SMART App integrates multi-omics and clinical data with DNA methylation and provides key interactive and customized functions including CpG visualization, pan-cancer methylation profile, differential methylation analysis, correlation analysis and survival analysis for users to analyze the DNA methylation in diverse cancer types in a multi-dimensional manner. CONCLUSION:The SMART App serves as a new approach for users, especially wet-bench scientists with no programming background, to analyze the scientific big data and facilitate data mining. The SMART App is available at http://www.bioinfo-zs.com/smartapp.",2019-12-05 +34403408,Single-cell RNA-sequencing reveals pre-meiotic X-chromosome dosage compensation in Drosophila testis.,"Dosage compensation equalizes X-linked expression between XY males and XX females. In male fruit flies, expression levels of the X-chromosome are increased approximately two-fold to compensate for their single X chromosome. In testis, dosage compensation is thought to cease during meiosis; however, the timing and degree of the resulting transcriptional suppression is difficult to separate from global meiotic downregulation of each chromosome. To address this, we analyzed testis single-cell RNA-sequencing (scRNA-seq) data from two Drosophila melanogaster strains. We found evidence that the X chromosome is equally transcriptionally active as autosomes in somatic and pre-meiotic cells, and less transcriptionally active than autosomes in meiotic and post-meiotic cells. In cells experiencing dosage compensation, close proximity to MSL (male-specific lethal) chromatin entry sites (CES) correlates with increased X chromosome transcription. We found low or undetectable levels of germline expression of most msl genes, mle, roX1 and roX2 via scRNA-seq and RNA-FISH, and no evidence of germline nuclear roX1/2 localization. Our results suggest that, although dosage compensation occurs in somatic and pre-meiotic germ cells in Drosophila testis, there might be non-canonical factors involved in the dosage compensation mechanism. The single-cell expression patterns and enrichment statistics of detected genes can be explored interactively in our database: https://zhao.labapps.rockefeller.edu/gene-expr/.",2021-08-17 +34526138,Outcomes of neonatal hypothermia among very low birth weight infants: a Meta-analysis.,"

Background

Neonatal admission hypothermia (HT) is a frequently encountered problem in neonatal intensive care units (NICUs) and it has been linked to a higher risk of mortality and morbidity. However, there is a disparity in data in the existing literature regarding the prevalence and outcomes associated with HT in very low birth weight (VLBW) infants. This review aimed to provide further summary and analyses of the association between HT and adverse clinical outcomes in VLBW infants.

Methods

In July 2020, we conducted this review according to the Preferred Reporting Items for Systematic Reviews and Meta-Analyses guidelines. A systematic database search was conducted in MEDLINE (PubMed), Google Scholar, ScienceDirect, World Health Organization Virtual Health Library, Cochrane Library databases, and System for Information on Grey Literature in Europe (SIGLE). We included studies that assessed the prevalence of HT and/or the association between HT and any adverse outcomes in VLBW infants. We calculated the pooled prevalence and Odds Ratio (OR) estimates with the corresponding 95% Confidence Interval (CI) using the Comprehensive meta-analysis software version 3.3 (Biostat, Engle-wood, NJ, USA; http://www.Meta-Analysis.com ).

Results

Eighteen studies that fulfilled the eligibility criteria were meta-analyzed. The pooled prevalence of HT among VLBW infants was 48.3% (95% CI, 42.0-54.7%). HT in VLBW infants was significantly associated with mortality (OR = 1.89; 1.72-2.09), intra-ventricular hemorrhage (OR = 1.86; 1.09-3.14), bronchopulmonary dysplasia (OR = 1.28; 1.16-1.40), neonatal sepsis (OR = 1.47; 1.09-2.49), and retinopathy of prematurity (OR = 1.45; 1.28-1.72).

Conclusion

Neonatal HT rate is high in VLBW infants and it is a risk factor for mortality and morbidity in VLBW infants. This review provides a comprehensive view of the prevalence and outcomes of HT in VLBW infants.",2021-09-15 +34554866,A Systematic Review of Interventions for Multilingual Preschoolers With Speech and Language Difficulties.,"Purpose There is a shortage of information on evidence-based interventions for supporting young multilingual children. The purpose of this review was to identify interventions that have been evaluated with preschool-age multilingual children with a speech and/or language disorder or who are at risk of poor speech, language, literacy, and/or educational outcomes. Method This review considered speech, language, and early literacy interventions evaluated with preschool-age multilingual children with a speech and/or language disorder or who have been identified as being at risk of language difficulties (PROSPERO ID: 165892). The following electronic databases were searched: EBSCO (CINAHL Plus, ERIC, PsycINFO, Medline, Education) and Linguistics, Language, and Behavior Abstracts. Data were extracted describing article, participant, methodological, and intervention variables, and effect sizes. The Council for Exceptional Children's (CEC) standards for evidence-based practice were used to examine the quality of studies. Results Fifty-six relevant studies were identified in 52 articles and these studies described 4,551 participants who had speech sound disorder (six articles), developmental language disorder (11 articles), or were considered to be at risk (36 articles). The interventions targeted speech production (seven studies), language (45 studies), and early literacy (11 studies) skills. Most studies reported positive effects. Only 15 studies met all quality indicators specified by the CEC (2014) and these described 18 interventions targeting language and literacy skills. The only intervention with sufficient evidence to be considered an evidence-based practice was Nuestros Niños [Our Children] for children's early literacy and phonological awareness skills. Conclusions A number of high-quality studies exist that describe speech, language and/or literacy interventions for preschool-age multilingual children with a speech and/or language disorder, or who have been identified as being at risk of language difficulties. However, there remains limited evidence for specific interventions as to their ability to inform evidence-based practices. Supplemental Material https://doi.org/10.23641/asha.16632649.",2021-09-23 +,P58 Head Injury CT scan: measuring up to NICE guidelines,"Abstract

Introduction

Head injury is the most common cause of death and disability in individuals between 1-40 years in the UK, with roughly 200,000 annual admissions. The aim is to evaluate how effectively CT scans following a head injury are being performed with reference to NICE guidelines 1-hour criteria1 as patient’s prognosis could potentially be improved with early detection.

Standards

100% of patients should be scanned within 1 hour of risk factors being identified. 100% of provisional radiology reports on CT-heads should be completed within 1 hour of the scan being performed: All requests from emergency department must have clear documentation of head injury risk factors justifying scan

Methods

retrospective audit collecting data from 01/01/2020 to 14/01/2020 for 1st cycle and from 14/10/2020 to 30/10/2020 for 2nd cycle. Requests were found on the electronic request system (ICE) to assess clinical information provided by ED clinicians only taking into account those scans following a head injury according to NICE guidelines.

Results First cycle

100 scans were examined with only 36% meeting criteria for 1 hr scan. Only 19 (52%) of those patients had received all 3 measures of the standard. In total 8 patients had positive findings with only 4 patients getting scan and report on time.

Second cycle

100 scans were examined with 45% meeting criteria for 1 hr scan. 37 (83%) had received all 3 measures of the standard. In total 13 patients had positive findings with 11 patients getting scan and report on time.

Conclusion

About only half of the patients presenting with the risk factors got their scan done in line with NICE guidelines. Following change of ICE to include criteria patients were better categorized, eliminating any unnecessary scans, reducing waiting times and cost, improving patient flow in ED and all scans are now justifiable according to criteria.

References

1-National Institute for Health and Clinical Excellence. CG176. Head Injury: assessment and early management. London. January 2014. https://www.nice.org.uk/guidance/cg176:2-irefer. The Royal College of Radiologists. Making the best use of clinical radiology services 8th edition. 2017 https://www.rcr.ac.uk/sso/irefer/v8:3-https://www.rcr.ac.uk/audit/compliance-nice-guidelines-2014-traumatic-head-injury-regard-ct",2021-04-01 +,First Report of Grapevine Pinot Gris Virus and Grapevine Rupestris Stem Pitting-Associated Virus in Grapevine in Belgium,"Grapevine Pinot gris virus belongs to the genus Trichovirus of the family Betaflexiviridae from the order Tymovirales. Grapevine Pinot gris virus (GPGV) was first discovered in Italy in 2012 (Giampetruzzi et al. 2012) and later in several countries including Germany (Reynard et al. 2016) and France (Beuve et al. 2015). Grapevine rupestris stem pitting-associated virus is a member of the genus Foveavirus of the family Betaflexiviridae, order Tymovirales (Hily et al. 2018). Because grapevine is known to be a host of a wide variety of viruses, a pilot sampling (10 symptomless plants) was carried out in May 2018 and the presence of GPGV evaluated by RT-PCR (data not shown). Subsequently, a leaf sample from a GPGV-positive Vitis vinifera ‘Regent’ from a Belgian vineyard (province of Namur) was analyzed by high-throughput sequencing (HTS). The sequencing library was prepared on the template of ribosomal-depleted total RNAs (Ribo-Zero Plant Leaf Kit, Illumina) using the TruSeq Stranded Total RNA Library Prep Kit (Illumina). The sample was sequenced (2 × 150 nt) on an Illumina Nextseq 500 platform (GIGA, Liege University, Liege, Belgium). After quality control and elimination of duplicate reads, 2,934,997 high-quality paired reads were assembled de novo using SPADES as a plugin in Geneious version 10.1.5 (https://www.geneious.com). BLASTn analysis of the contigs against the NCBI reference database showed homologies to two grapevine viruses, GPGV (NC_015782.2) and grapevine rupestris stem pitting-associated virus (GRSPaV) (NC_001948.1). The nearly full genome sequences (7,344 nt for GPGV and 8,711 nt for GRSPaV) of both viruses were reconstructed by de novo assembly and deposited in GenBank as MN228488 (GPGV) and MN228487 (GRSPaV). BLASTn analysis indicated that the closest sequences to the GPGV isolate were KM491305 (France), KU194413 (Canada), and KR528581 (Korea) with 98.6% identity, whereas the closest sequences to the GRSPaV isolate (98.9% identity) were MG938325, MG938334, and MG938327 (France), all belonging to the molecular clade 3 of group L of GRSPaV (Hily et al. 2018). To estimate the occurrence of GPGV and GRSPaV in the vineyard, 49 samples, all asymptomatic, were randomly selected and tested together with the sequenced sample using RT-PCR. For the detection of GPGV, the primer pair GPG-14F (5′-AATTGATCCCGTGTAGTGC-3′) and GPG-632R (5′-TCCGAGGACGATGAACCTC-3′) (Glasa et al. 2014) was applied (anticipated amplicon size: 618 bp), whereas for the detection of GRSPaV, the primers GRSPV-NGS-Be-s (5′-TCTGCATTAGGCATCATGTG-3′) and GRSPV-NGS-Be-as (5′-GGCCGTTACCAATCTTCTCG-3′) were designed based on the HTS-generated sequences and used (anticipated amplicon size: 420 bp). Among the 50 samples, eight tested positive for GPGV and 12 for GRSPaV. Interestingly, four samples, including the HTS-sequenced sample, were positive for both viruses. To confirm the identity of the PCR products, amplicons from two samples for each virus were sequenced at Starseq, Mainz, Germany. The sequences of the projected GPGV amplicons (MK533603 and MK533604) showed 98.5 and 100% identity to the HTS sequence, respectively, confirming the presence of the virus in the samples. The sequences of the projected GRSPaV amplicons (MK569516 and MK569517) showed 100% identity to each other and 98.5% identity to the isolate 34 clone 1 from France (MG938303), but they diverged from the HTS sequence (80% identity). The results suggest the presence of divergent isolates of GRSPaV that belong to at least two distinct clusters (clades 1 and 3 in Hily et al. 2018) in the vineyard. This is the first report of grapevine viruses in Belgium. Although no detrimental effects were observed on the original plant and the two viruses are common worldwide, GPGV can be associated with severe symptoms (Giampetruzzi et al. 2012). Unveiling the presence of the viruses in Belgium contributes to understanding the occurrence of the viruses and developing management measures should they become necessary.",2020-06-01 +34964845,https://www.fungiofpakistan.com: a continuously updated online database of fungi in Pakistan. ,"The website fungiofpakistan.com is a collection of all the available data about macro- as well as micro-fungi collected from Pakistan. This website comprises reported fungal species with isolation source or host record, locality and updated classification. The data on this website is based on old literature (library data, personal data of specific authors or books that were not easily accessible to public) and recent publications. This website is an important potential platform for researchers, government officials, industries and other users. Users can provide their inputs related to missing taxa, new genera, the new record and new data. They also have the opportunity to express their opinions on valid names, invalid names and illegitimate names, with notes published in the 'Notes' section of webpage provided following review and editing by curators and fungal taxonomists. This website plays a significant contribution to our knowledge of the rich fungal diversity of Pakistan. However, much more sustained and detailed research is needed to fully evaluate fungal diversity in Pakistan. Undoubtedly, that many more fungi will be discovered and added in the future. https://fungiofpakistan.com/.",2021-12-01 +33588073,ASDB: A comprehensive omics database for Anopheles sinensis.,"Anopheles sinensis is a key disease vector for human malaria and parasitic diseases such as malayan filariasis, and it is considered to be one of the most important malaria vectors in China and Southeast Asia. As high-throughput sequencing and assembly technology are widely used in An. sinensis, a lot of omics data have been generated, and abundant genome, mRNA transcriptome, miRNA transcriptome and resequencing results have been accumulated. In addition, lots of valuable morphological images and publications have been produced with the in-depth studies on An. sinensis. However, the increased quantity, variety, and structure complexity of the omics data create inconveniences for researchers to use and manage this information. We have built an An. sinensis omics database (ASDB, http://asdb.jungleran.com/) - a comprehensive and integrated database to promote scientific research on An. sinensis. Docker was used to deploy a development environment and Drupal to build ASDB. ASDB provides a Blast tool to do sequence alignment of genome sequence, gene sequence and protein sequence of An. sinensis. It also offers JBrowse (a next-generation genome visualization and analysis web platform) to facilitate researchers visualize the gene structure, non-coding RNA (include miRNA, snRNA, tRNA and so on) structure and genomic variation sites as desired. ASDB has integrated various latest omics data of An. Sinensis, including de novo genome and its annotation data, genome variation data (such as SNP and InDel), transcriptome and its expression value, miRNA expression value and miRNA-mRNA interaction, metagenomes. The database has also included the morphological images of different developmental stages and tissues, and important literatures associated with An. sinensis. ASDB provides a user-friendly search and displays pages. The integration of these resources will contribute to the study of basic biology and functional genome of An. sinensis.",2021-02-12 +31494368,Suicides in Greece before and during the period of austerity by sex and age group: Relationship to unemployment and economic variables.,"

Background

There is disagreement on the specific mechanism through which the economic recession increased suicides in Greece. Unemployment is considered by many authors to be the determining factor but the data are inconclusive and often negative, especially concerning the temporal relationship between onset of increase in unemployment and increase in suicides AIMS: The aim of this paper was to clarify the specific role of unemployment as well as of other socioeconomic variables on specific age-by-gender groups concerning the increase in suicides.

Methods

Data of the Hellenic Statistical Authority ELSTAT (www.

Statistics

gr) were analyzed with Linear Regression Analysis and Bonferroni correction for multiple testing RESULTS: Unemployment correlates with suicide rates only in males aged 20-24, 50-54 and 60-64 years (p < 0.001). Unemployment could held responsible for an additional 148 male deaths during the period 2009-2015, which accounts for 5.3% of the total (29 additional deaths per year). The changes in all the socioeconomic conditions could held responsible for 317 cases of suicide or 9.4% of total CONCLUSIONS: The results of the current study suggest that there was a 33% increase in deaths by suicide in Greece during the early years of recession (2009-2015); one third could be directly attributed to unemployment, one third to other consequences of recession while another third is of unknown origin. The effect of unemployment is specifically restricted to males at the beginning of their working career (20-24 years old) and to middle aged (45-49 and 55-59 years old).",2019-09-03 +33970835,"""Just Engage in It or Not, You Get Out What You Put In"": Student and Staff Experiences of Feedback and Feedforward in Workplace-Based Learning Environments.","Feedback is central to student learning in the veterinary workplace. Feedforward, a related concept, is used to describe the way information about a student's performance may be used to improve their future performance. Feedback and feedforward practices are diverse, with varied student and staff understandings of the nature and purpose of feedback (feedback literacy). This study compared the practices of feedback and feedforward in a range of programs in one institution during student transitions from the classroom to workplace-based learning environments. The study adopted a broad inter-professional approach to include health care programs and social work and theater and performance studies. Profession-specific focus groups were conducted with contribution from 28 students and 31 staff from five different professions. Thematic analysis revealed that students and staff shared an understanding of the feedback and feedforward concepts, and both groups recognized the importance of emotional and relational aspects of the process. Students and staff across all professions recognized the impact of time constraints on the feedback process, although this was particularly highlighted in the health science professions. Social work and theater and performance studies students demonstrated a more nuanced understanding of the emotional and relational aspects of feedback and feedforward. Overall, the approach highlights similarities and differences in practices and experiences in different workplace contexts, creating opportunities for cross-disciplinary learning, which may have relevance more widely in higher education programs with workplace-based elements. The study underpinned the development of the LeapForward feedback training resource (https://bilt.online/the-leapforward-project/).",2021-05-19 +34841018,Saccadic and manual response time data on inhibition of return during and after a visual search.,"In the present paper we present a dataset that provides data of two experiments in which we investigated the presence of Inhibition of Return (IOR) during and after a visual search. Participants either had to saccade (Experiment 1 and 2) or make a manual response (Experiment 2) to a probe during a visual search task (searching for a target letter among a set of distractors) or immediately after its completion. The data consist of the unprocessed raw data and one csv-file of the processed eye tracking data on eight (Experiment 1) and 18 (Experiment 2) participants, respectively. In total, we obtained 5,116 trials in Experiment 1 and 18,424 in Experiment 2. The data set is stored at the repository DOOR hosted by the University of Krems (https://door.donau-uni.ac.at/view/o:1014). Detailed information about the experiments and the interpretation of the data can be found in the paper ""Post-search IOR: Searching for inhibition of return after search"" (Höfler et al., 2019) [1].",2021-11-12 +34970355,"Fine-grained, spatiotemporal datasets measuring 200 years of land development in the United States.","The collection, processing, and analysis of remote sensing data since the early 1970s has rapidly improved our understanding of change on the Earth's surface. While satellite-based Earth observation has proven to be of vast scientific value, these data are typically confined to recent decades of observation and often lack important thematic detail. Here, we advance in this arena by constructing new spatially explicit settlement data for the United States that extend back to the early 19th century and are consistently enumerated at fine spatial and temporal granularity (i.e. 250m spatial and 5-year temporal resolution). We create these time series using a large, novel building-stock database to extract and map retrospective, fine-grained spatial distributions of built-up properties in the conterminous United States from 1810 to 2015. From our data extraction, we analyse and publish a series of gridded geospatial datasets that enable novel retrospective historical analysis of the built environment at an unprecedented spatial and temporal resolution. The datasets are part of the Historical Settlement Data Compilation for the United States (https://dataverse.harvard.edu/dataverse/hisdacus, last access: 25 January 2021) and are available at https://doi.org/10.7910/DVN/YSWMDR (Uhl and Leyk, 2020a), https://doi.org/10.7910/DVN/SJ213V (Uhl and Leyk, 2020b), and https://doi.org/10.7910/DVN/J6CYUJ (Uhl and Leyk, 2020c).",2021-01-27 +33037820,The Bgee suite: integrated curated expression atlas and comparative transcriptomics in animals.,"Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced by integrating multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data). It is based exclusively on curated healthy wild-type expression data (e.g., no gene knock-out, no treatment, no disease), to provide a comparable reference of normal gene expression. Curation includes very large datasets such as GTEx (re-annotation of samples as 'healthy' or not) as well as many small ones. Data are integrated and made comparable between species thanks to consistent data annotation and processing, and to calls of presence/absence of expression, along with expression scores. As a result, Bgee is capable of detecting the conditions of expression of any single gene, accommodating any data type and species. Bgee provides several tools for analyses, allowing, e.g., automated comparisons of gene expression patterns within and between species, retrieval of the prefered conditions of expression of any gene, or enrichment analyses of conditions with expression of sets of genes. Bgee release 14.1 includes 29 animal species, and is available at https://bgee.org/ and through its Bioconductor R package BgeeDB.",2021-01-01 +34484220,RAPID: A Rep-Seq Dataset Analysis Platform With an Integrated Antibody Database.,"The antibody repertoire is a critical component of the adaptive immune system and is believed to reflect an individual's immune history and current immune status. Delineating the antibody repertoire has advanced our understanding of humoral immunity, facilitated antibody discovery, and showed great potential for improving the diagnosis and treatment of disease. However, no tool to date has effectively integrated big Rep-seq data and prior knowledge of functional antibodies to elucidate the remarkably diverse antibody repertoire. We developed a Rep-seq dataset Analysis Platform with an Integrated antibody Database (RAPID; https://rapid.zzhlab.org/), a free and web-based tool that allows researchers to process and analyse Rep-seq datasets. RAPID consolidates 521 WHO-recognized therapeutic antibodies, 88,059 antigen- or disease-specific antibodies, and 306 million clones extracted from 2,449 human IGH Rep-seq datasets generated from individuals with 29 different health conditions. RAPID also integrates a standardized Rep-seq dataset analysis pipeline to enable users to upload and analyse their datasets. In the process, users can also select set of existing repertoires for comparison. RAPID automatically annotates clones based on integrated therapeutic and known antibodies, and users can easily query antibodies or repertoires based on sequence or optional keywords. With its powerful analysis functions and rich set of antibody and antibody repertoire information, RAPID will benefit researchers in adaptive immune studies.",2021-08-13 +33511845,FMODB: The World's First Database of Quantum Mechanical Calculations for Biomacromolecules Based on the Fragment Molecular Orbital Method.,"We developed the world's first web-based public database for the storage, management, and sharing of fragment molecular orbital (FMO) calculation data sets describing the complex interactions between biomacromolecules, named FMO Database (https://drugdesign.riken.jp/FMODB/). Each entry in the database contains relevant background information on how the data was compiled as well as the total energy of each molecular system and interfragment interaction energy (IFIE) and pair interaction energy decomposition analysis (PIEDA) values. Currently, the database contains more than 13 600 FMO calculation data sets, and a comprehensive search function implemented at the front-end. The procedure for selecting target proteins, preprocessing the experimental structures, construction of the database, and details of the database front-end were described. Then, we demonstrated a use of the FMODB by comparing IFIE value distributions of hydrogen bond, ion-pair, and XH/π interactions obtained by FMO method to those by molecular mechanics approach. From the comparison, the statistical analysis of the data provided standard reference values for the three types of interactions that will be useful for determining whether each interaction in a given system is relatively strong or weak compared to the interactions contained within the data in the FMODB. In the final part, we demonstrate the use of the database to examine the contribution of halogen atoms to the binding affinity between human cathepsin L and its inhibitors. We found that the electrostatic term derived by PIEDA greatly correlated with the binding affinities of the halogen containing cathepsin L inhibitors, indicating the importance of QM calculation for quantitative analysis of halogen interactions. Thus, the FMO calculation data in FMODB will be useful for conducting statistical analyses to drug discovery, for conducting molecular recognition studies in structural biology, and for other studies involving quantum mechanics-based interactions.",2021-01-29 +34583224,"A comprehensive review on the phytochemistry, pharmacokinetics, and antidiabetic effect of Ginseng.","

Background

Radix Ginseng, one of the well-known medicinal herbs, has been used in the management of diabetes and its complications for more than 1000 years.

Purpose

The aim of this review is devoted to summarize the phytochemistry and pharmacokinetics of Ginseng, and provide evidence for the antidiabetic effects of Ginseng and its ingredients as well as the underlying mechanisms involved.

Methods

For the purpose of this review, the following databases were consulted: the PubMed Database (https://pubmed.ncbi.nlm.nih.gov), Chinese National Knowledge Infrastructure (http://www.cnki.net), National Science and Technology Library (http://www.nstl.gov.cn/), Wanfang Data (http://www.wanfangdata.com.cn/) and the Web of Science Database (http://apps.webofknowledge.com/).

Results

Ginseng exhibits glucose-lowering effects in different diabetic animal models. In addition, Ginseng may prevent the development of diabetic complications, including liver, pancreas, adipose tissue, skeletal muscle, nephropathy, cardiomyopathy, retinopathy, atherosclerosis and others. The main ingredients of Ginseng include ginsenosides and polysaccharides. The underlying mechanisms whereby this herb exerts antidiabetic activities may be attributed to the regulation of multiple signaling pathways, including IRS1/PI3K/AKT, LKB1/AMPK/FoxO1, AGEs/RAGE, MAPK/ERK, NF-κB, PPARδ/STAT3, cAMP/PKA/CERB and HIF-1α/VEGF, etc. The pharmacokinetic profiles of ginsenosides provide valuable information on therapeutic efficacy of Ginseng in diabetes. Although Ginseng is well-tolerated, dietary consumption of this herb should follow the doctors' advice.

Conclusion

Ginseng may offer an alternative strategy in protection against diabetes and its complications through the regulations of the multi-targets via various signaling pathways. Efforts to understand the underlying mechanisms with strictly-controlled animal models, combined with well-designed clinical trials and pharmacokinetic evaluation, will be important subjects of the further investigations and weigh in translational value of this herb in diabetes management.",2021-09-10 +34458160,Forensic Analysis of Human Microbiome in Skin and Body Fluids Based on Geographic Location.,"High-throughput DNA sequencing technologies have facilitated the in silico forensic analysis of human microbiome. Specific microbial species or communities obtained from the crime scene provide evidence of human contacts and their body fluids. The microbial community is influenced by geographic, ethnic, lifestyle, and environmental factors such as urbanization. An understanding of the effects of these external stressors on the human microbiome and determination of stable and changing elements are important in selecting appropriate targets for investigation. In this study, the Forensic Microbiome Database (FMD) (http://www.fmd.jcvi.org) containing the microbiome data of various locations in the human body in 35 countries was used. We focused on skin, saliva, vaginal fluid, and stool and found that the microbiome distribution differed according to the body part as well as the geographic location. In the case of skin samples, Staphylococcus species were higher than Corynebacterium species among Asians compared with Americans. Holdemanella and Fusobacterium were specific in the saliva of Koreans and Japanese populations. Lactobacillus was found in the vaginal fluids of individuals in all countries, whereas Serratia and Enterobacter were endemic to Bolivia and Congo, respectively. This study is the first attempt to collate and describe the observed variation in microbiomes from the forensic microbiome database. As additional microbiome databases are reported by studies worldwide, the diversity of the applications may exceed and expand beyond the initial identification of the host.",2021-08-12 +33464891,TopSuite Web Server: A Meta-Suite for Deep-Learning-Based Protein Structure and Quality Prediction.,"Proteins carry out the most fundamental processes of life such as cellular metabolism, regulation, and communication. Understanding these processes at a molecular level requires knowledge of their three-dimensional structures. Experimental techniques such as X-ray crystallography, NMR spectroscopy, and cryogenic electron microscopy can resolve protein structures but are costly and time-consuming and do not work for all proteins. Computational protein structure prediction tries to overcome these problems by predicting the structure of a new protein using existing protein structures as a resource. Here we present TopSuite, a web server for protein model quality assessment (TopScore) and template-based protein structure prediction (TopModel). TopScore provides meta-predictions for global and residue-wise model quality estimation using deep neural networks. TopModel predicts protein structures using a top-down consensus approach to aid the template selection and subsequently uses TopScore to refine and assess the predicted structures. The TopSuite Web server is freely available at https://cpclab.uni-duesseldorf.de/topsuite/.",2021-01-19 +34507528,Mining microbe-disease interactions from literature via a transfer learning model.,"

Background

Interactions of microbes and diseases are of great importance for biomedical research. However, large-scale of microbe-disease interactions are hidden in the biomedical literature. The structured databases for microbe-disease interactions are in limited amounts. In this paper, we aim to construct a large-scale database for microbe-disease interactions automatically. We attained this goal via applying text mining methods based on a deep learning model with a moderate curation cost. We also built a user-friendly web interface that allows researchers to navigate and query required information.

Results

Firstly, we manually constructed a golden-standard corpus and a sliver-standard corpus (SSC) for microbe-disease interactions for curation. Moreover, we proposed a text mining framework for microbe-disease interaction extraction based on a pretrained model BERE. We applied named entity recognition tools to detect microbe and disease mentions from the free biomedical texts. After that, we fine-tuned the pretrained model BERE to recognize relations between targeted entities, which was originally built for drug-target interactions or drug-drug interactions. The introduction of SSC for model fine-tuning greatly improved detection performance for microbe-disease interactions, with an average reduction in error of approximately 10%. The MDIDB website offers data browsing, custom searching for specific diseases or microbes, and batch downloading.

Conclusions

Evaluation results demonstrate that our method outperform the baseline model (rule-based PKDE4J) with an average [Formula: see text]-score of 73.81%. For further validation, we randomly sampled nearly 1000 predicted interactions by our model, and manually checked the correctness of each interaction, which gives a 73% accuracy. The MDIDB webiste is freely avaliable throuth http://dbmdi.com/index/.",2021-09-10 +,SemanticGO: a tool for gene functional similarity analysis in Arabidopsis thaliana and rice,"Gene or pathway functional similarities are important information for researchers. However, these similarities are often described sparsely and qualitatively. The latent semantic analysis of Arabidopsis thaliana (Arabidopsis) Gene Ontology (GO) data produced a set of 200-dimension feature vectors for each gene. Pathways were represented by summing the vectors of the pathway member genes. Thus, the similarities between genes and pathways were assessed. Additionally, the gene feature vectors were correlated with external gene data, including gene expression and gene network connectivity, to elucidate the associated functions. The gene feature vectors were decoded, and their applications were demonstrated. A simple online tool, SemanticGO (http://bioinformatics.fafu.edu.cn/semanticGO/), is herein provided to enable researchers to explore the similarities between genes and pathways in both Arabidopsis and rice.",2020-08-01 +33575553,UniProt-Related Documents (UniReD): assisting wet lab biologists in their quest on finding novel counterparts in a protein network.,"The in-depth study of protein-protein interactions (PPIs) is of key importance for understanding how cells operate. Therefore, in the past few years, many experimental as well as computational approaches have been developed for the identification and discovery of such interactions. Here, we present UniReD, a user-friendly, computational prediction tool which analyses biomedical literature in order to extract known protein associations and suggest undocumented ones. As a proof of concept, we demonstrate its usefulness by experimentally validating six predicted interactions and by benchmarking it against public databases of experimentally validated PPIs succeeding a high coverage. We believe that UniReD can become an important and intuitive resource for experimental biologists in their quest for finding novel associations within a protein network and a useful tool to complement experimental approaches (e.g. mass spectrometry) by producing sorted lists of candidate proteins for further experimental validation. UniReD is available at http://bioinformatics.med.uoc.gr/unired/.",2020-02-11 +32459338,EpiRegio: analysis and retrieval of regulatory elements linked to genes.,"A current challenge in genomics is to interpret non-coding regions and their role in transcriptional regulation of possibly distant target genes. Genome-wide association studies show that a large part of genomic variants are found in those non-coding regions, but their mechanisms of gene regulation are often unknown. An additional challenge is to reliably identify the target genes of the regulatory regions, which is an essential step in understanding their impact on gene expression. Here we present the EpiRegio web server, a resource of regulatory elements (REMs). REMs are genomic regions that exhibit variations in their chromatin accessibility profile associated with changes in expression of their target genes. EpiRegio incorporates both epigenomic and gene expression data for various human primary cell types and tissues, providing an integrated view of REMs in the genome. Our web server allows the analysis of genes and their associated REMs, including the REM's activity and its estimated cell type-specific contribution to its target gene's expression. Further, it is possible to explore genomic regions for their regulatory potential, investigate overlapping REMs and by that the dissection of regions of large epigenomic complexity. EpiRegio allows programmatic access through a REST API and is freely available at https://epiregio.de/.",2020-07-01 +34906207,Flimma: a federated and privacy-aware tool for differential gene expression analysis.,"Aggregating transcriptomics data across hospitals can increase sensitivity and robustness of differential expression analyses, yielding deeper clinical insights. As data exchange is often restricted by privacy legislation, meta-analyses are frequently employed to pool local results. However, the accuracy might drop if class labels are inhomogeneously distributed among cohorts. Flimma ( https://exbio.wzw.tum.de/flimma/ ) addresses this issue by implementing the state-of-the-art workflow limma voom in a federated manner, i.e., patient data never leaves its source site. Flimma results are identical to those generated by limma voom on aggregated datasets even in imbalanced scenarios where meta-analysis approaches fail.",2021-12-14 +32510549,DenvInD: dengue virus inhibitors database for clinical and molecular research. ,"Dengue virus (DENV) researchers often face challenges with the highly time-consuming process of collecting and curating information on known inhibitors during the standard drug discovery process. To this end, however, required collective information is not yet available on a single platform. Hence, we have developed the DenvInD database for experimentally validated DENV inhibitors against its known targets presently hosted at https://webs.iiitd.edu.in/raghava/denvind/. This database provides comprehensive information, i.e. PubChem IDs, SMILES, IC50, EC50, CC50, and wherever available Ki values of the 484 compounds in vitro validated as inhibitors against respective drug targets of DENV. Also, the DenvInD database has been linked to the user-friendly web-based interface and accessibility features, such as simple search, advanced search and data browsing. All the required data curation was conducted manually from the reported scientific literature and PubChem. The collected information was then organized into the DenvInD database using sequence query language under user interface by hypertext markup language. DenvInD is the first useful repository of its kind which would augment the DENV drug discovery research by providing essential information on known DENV inhibitors for molecular docking, computational screening, pharmacophore modeling and quantitative structure-activity relationship modeling.",2021-05-01 +34862102,Data mining of natural hazard biomarkers and metabolites with integrated metabolomic tools.,"Data mining was one of the most important challenges in natural product analysis and biomarker discovery. In this work, we proposed an integrated data analysis protocol for natural products annotation and identification in data-dependent acquisition. Firstly, natural products and structure-related compounds could be identified by comparing mass spectrum behavior with commercial standard. Secondly, diagnostic fragmentation filtering (DFF) function in MZmine (http://mzmine.github.io/) was investigated for screening specific conjugation compounds with the same neutral loss. Thirdly, we present feature-based molecular networking (FBMN) in GNPS (https://gnps.ucsd.edu/) as a chromatographic feature detection and alignment tool. In addition, FBMN could enable natural products analysis based on molecular networks. This proposed integrated protocol should facilitate metabolomic data mining and biomarker discovery.",2021-11-26 +,CSIG-13. TRPM7 INDUCES TUMORIGENESIS AND STEMNESS THROUGH NOTCH ACTIVATION IN GLIOMA,"Abstract Our group found that the inhibitory effect of TRPM7 on proliferation and invasion of human glioma cell is mediated by multiple mechanisms. TRPM7 regulates miR-28-5p expression, which suppresses cell proliferation and invasion in glioma cells by targeting Ras-related protein Rap1b. In particular, our group found that TRPM7 channels regulate glioma stem cell (GSC) growth/proliferation through STAT3 and Notch signaling. However, which Notch component(s) is crucial for its activity regulated by TRPM7, and its relationship with other GSC markers, such as CD133 and ALDH1, remain unclear. In the current project, we elucidate the mechanisms of TRMP7’s regulation of Notch signaling pathway that contribute to the development and progression of glioma and maintenance of self-renewal and tumorigenicity of GSC using multiple glioma cell lines (GC) with different molecular subtypes and GSCs derived from the GC lines. 1) We first analyzed TRPM7 expression using the Oncomine database (https://www.oncomine.org) and found that the TRPM7 mRNA expression is significantly increased in anaplastic astrocytoma, diffuse astrocytoma, and GBM patients compared to that in normal brain tissue controls. 2) TRPM7 is expressed in GBM, and its channel activity is correlated with Notch1 activation. Inhibition of TRPM7 downregulates Notch1 signaling, while upregulation of TRPM7 upregulates Notch1 signaling. 3) GSC markers, CD133 and ALDH1, are correlated with TRPM7 in GBM. 4) Targeting TRPM7 suppresses the growth and proliferation of glioma cells through G1/S arrests and apoptosis of glioma cells. 5) Targeting Notch1 suppresses the TRPM7-induced growth and proliferation of glioma cells, as well as the expression of GSC markers CD133 and ALDH1. In summary, TRPM7 is responsible for sustained Notch signaling activation, enhanced expression of GSC markers, and regulation of glioma stemness, which contribute to malignant glioma cell growth and invasion. Notch1 and ligand DII4 are key components that contribute GSC stemness.",2020-11-01 +32693783,The oilseed rape developmental expression resource: a resource for the investigation of gene expression dynamics during the floral transition in oilseed rape.,"

Background

Transcriptome time series can be used to track the expression of genes during development, allowing the timing, intensity, and dynamics of genetic programmes to be determined. Furthermore, time series analysis can reveal causal relationships between genes, leading to an understanding of how the regulatory networks are rewired during development. Due to its impact on yield, a developmental transition of agricultural interest in crops is the switch from vegetative to floral growth. We previously reported the collection of genome-wide gene expression data during the floral transition in the allopolyploid crop Brassica napus (oilseed rape, OSR). To provide the OSR research community with easy access to this dataset, we have developed the Oilseed Rape Developmental Expression Resource (ORDER; http://order.jic.ac.uk ).

Results

ORDER enables users to search for genes of interest and plot expression patterns during the floral transition in both a winter and a spring variety of OSR. We illustrate the utility of ORDER using two case studies: the first investigating the interaction between transcription factors, the second comparing genes that mediate the vernalisation response between OSR and radish (Raphanus sativus L.). All the data is downloadable and the generic website platform underlying ORDER, called AionPlot, is made freely and openly available to facilitate the dissemination of other time series datasets.

Conclusions

ORDER provides the OSR research community with access to a dataset focused on a period of OSR development important for yield. AionPlot, the platform on which ORDER is built, will allow researchers from all fields to share similar time series datasets.",2020-07-21 +33581334,OGP: A Repository of Experimentally Characterized O-glycoproteins to Facilitate Studies on O-glycosylation.,"Numerous studies on cancers, biopharmaceuticals, and clinical trials have necessitated comprehensive and precise analysis of protein O-glycosylation. However, the lack of updated and convenient databases deters the storage of and reference to emerging O-glycoprotein data. To resolve this issue, an O-glycoprotein repository named OGP was established in this work. It was constructed with a collection of O-glycoprotein data from different sources. OGP contains 9354 O-glycosylation sites and 11,633 site-specific O-glycans mapping to 2133 O-glycoproteins, and it is the largest O-glycoprotein repository thus far. Based on the recorded O-glycosylation sites, an O-glycosylation site prediction tool was developed. Moreover, an OGP-based website is already available (https://www.oglyp.org/). The website comprises four specially designed and user-friendly modules: statistical analysis, database search, site prediction, and data submission. The first version of OGP repository and the website allow users to obtain various O-glycoprotein-related information, such as protein accession Nos., O-glycosylation sites, O-glycopeptide sequences, site-specific O-glycan structures, experimental methods, and potential O-glycosylation sites. O-glycosylation data mining can be performed efficiently on this website, which will greatly facilitate related studies. In addition, the database is accessible from OGP website (https://www.oglyp.org/download.php).",2021-02-10 +34512723,ACE2 Netlas: In silico Functional Characterization and Drug-Gene Interactions of ACE2 Gene Network to Understand Its Potential Involvement in COVID-19 Susceptibility.,"Angiotensin-converting enzyme-2 (ACE2) receptor has been identified as the key adhesion molecule for the transmission of the SARS-CoV-2. However, there is no evidence that human genetic variation in ACE2 is singularly responsible for COVID-19 susceptibility. Therefore, we performed an integrative multi-level characterization of genes that interact with ACE2 (ACE2-gene network) for their statistically enriched biological properties in the context of COVID-19. The phenome-wide association of 51 genes including ACE2 with 4,756 traits categorized into 26 phenotype categories, showed enrichment of immunological, respiratory, environmental, skeletal, dermatological, and metabolic domains (p < 4e-4). Transcriptomic regulation of ACE2-gene network was enriched for tissue-specificity in kidney, small intestine, and colon (p < 4.7e-4). Leveraging the drug-gene interaction database we identified 47 drugs, including dexamethasone and spironolactone, among others. Considering genetic variants within ± 10 kb of ACE2-network genes we identified miRNAs whose binding sites may be altered as a consequence of genetic variation. The identified miRNAs revealed statistical over-representation of inflammation, aging, diabetes, and heart conditions. The genetic variant associations in RORA, SLC12A6, and SLC6A19 genes were observed in genome-wide association study (GWAS) of COVID-19 susceptibility. We also report the GWAS-identified variant in 3p21.31 locus, serves as trans-QTL for RORA and RORC genes. Overall, functional characterization of ACE2-gene network highlights several potential mechanisms in COVID-19 susceptibility. The data can also be accessed at https://gpwhiz.github.io/ACE2Netlas/.",2021-08-27 +34174821,"Construction of a high-density linkage map and graphical representation of the arrangement of transcriptome-based unigene markers on the chromosomes of onion, Allium cepa L.","

Background

Genomic information for Allium cepa L. is limited as it is heterozygous and its genome is very large. To elucidate potential SNP markers obtained by NGS, we used a complete set of A. fistulosum L.-A. cepa monosomic addition lines (MALs) and doubled haploids (DHs). These were the parental lines of an A. cepa mapping population for transcriptome-based SNP genotyping.

Results

We mapped the transcriptome sequence reads from a series of A. fistulosum-A. cepa MALs onto the unigene sequence of the doubled haploid shallot A. cepa Aggregatum group (DHA) and compared the MAL genotype call for parental bunching onion and shallot transcriptome mapping data. We identified SNP sites with at least four reads on 25,462 unigenes. They were anchored on eight A. cepa chromosomes. A single SNP site was identified on 3,278 unigenes and multiple SNPs were identified on 22,184 unigenes. The chromosome marker information was made public via the web database Allium TDB ( http://alliumtdb.kazusa.or.jp/ ). To apply transcriptome based genotyping approach for genetic mapping, we gathered RNA sequence data from 96 lines of a DHA × doubled haploid bulb onion A. cepa common onion group (DHC) mapping population. After selecting co-dominant SNP sites, 16,872 SNPs were identified in 5,339 unigenes. Of these, at least two SNPs with identical genotypes were found in 1,435 unigenes. We developed a linkage map using genotype information from these unigenes. All unigene markers mapped onto the eight chromosomes and graphical genotyping was conducted based on the unigene order information. Another 2,963 unigenes were allocated onto the eight chromosomes. To confirm the accuracy of this transcriptome-based genetic linkage map, conventional PCR-based markers were used for linkage analysis. All SNP - and PCR-based markers were mapped onto the expected linkage groups and no inconsistency was found among these chromosomal locations.

Conclusions

Effective transcriptome analysis with unique Allium resources successfully associated numerous chromosome markers with unigene information and a high-density A. cepa linkage map. The information on these unigene markers is valuable in genome sequencing and useful trait detection in Allium.",2021-06-26 +34362451,MPDB 2.0: a large scale and integrated medicinal plant database of Bangladesh.,"

Objective

MPDB 2.0 is built to be the continuation of MPDB 1.0, to serve as a more comprehensive data repertoire for Bangladeshi medicinal plants, and to provide a user-friendly interface for researchers, health practitioners, drug developers, and students who wish to study the various medicinal & nutritive plants scattered around Bangladesh and the underlying phytochemicals contributing to their efficacy in Bangladeshi folk medicine.

Results

MPDB 2.0 database ( https://www.medicinalplantbd.com/ ) comprises a collection of more than five hundred Bangladeshi medicinal plants, alongside a record of their corresponding scientific, family, and local names together with their utilized parts, information regarding ailments, active compounds, and PubMed ID of related publications. While medicinal plants are not limited to the borders of any country, Bangladesh and its Southeast Asian neighbors do boast a huge collection of potent medicinal plants with considerable folk-medicinal history compared to most other countries in the world. Development of MPDB 2.0 has been highly focused upon human diseases, albeit many of the plants indexed here can serve in developing biofuel (e.g.: Jatropha curcas used in biofuel) or bioremediation technologies (e.g.: Amaranthus cruentus helps to reduce cadmium level in soil) or nutritive diets (Terminalia chebula can be used in nutritive diets) or cosmetics (Aloe vera used in cosmetics), etc.",2021-08-06 +29045725,StemMapper: a curated gene expression database for stem cell lineage analysis.,"Transcriptomic data have become a fundamental resource for stem cell (SC) biologists as well as for a wider research audience studying SC-related processes such as aging, embryonic development and prevalent diseases including cancer, diabetes and neurodegenerative diseases. Access and analysis of the growing amount of freely available transcriptomics datasets for SCs, however, are not trivial tasks. Here, we present StemMapper, a manually curated gene expression database and comprehensive resource for SC research, built on integrated data for different lineages of human and mouse SCs. It is based on careful selection, standardized processing and stringent quality control of relevant transcriptomics datasets to minimize artefacts, and includes currently over 960 transcriptomes covering a broad range of SC types. Each of the integrated datasets was individually inspected and manually curated. StemMapper's user-friendly interface enables fast querying, comparison, and interactive visualization of quality-controlled SC gene expression data in a comprehensive manner. A proof-of-principle analysis discovering novel putative astrocyte/neural SC lineage markers exemplifies the utility of the integrated data resource. We believe that StemMapper can open the way for new insights and advances in SC research by greatly simplifying the access and analysis of SC transcriptomic data. StemMapper is freely accessible at http://stemmapper.sysbiolab.eu.",2018-01-01 +29092050,Ensembl Genomes 2018: an integrated omics infrastructure for non-vertebrate species.,"Ensembl Genomes (http://www.ensemblgenomes.org) is an integrating resource for genome-scale data from non-vertebrate species, complementing the resources for vertebrate genomics developed in the Ensembl project (http://www.ensembl.org). Together, the two resources provide a consistent set of programmatic and interactive interfaces to a rich range of data including genome sequence, gene models, transcript sequence, genetic variation, and comparative analysis. This paper provides an update to the previous publications about the resource, with a focus on recent developments and expansions. These include the incorporation of almost 20 000 additional genome sequences and over 35 000 tracks of RNA-Seq data, which have been aligned to genomic sequence and made available for visualization. Other advances since 2015 include the release of the database in Resource Description Framework (RDF) format, a large increase in community-derived curation, a new high-performance protein sequence search, additional cross-references, improved annotation of non-protein-coding genes, and the launch of pre-release and archival sites. Collectively, these changes are part of a continuing response to the increasing quantity of publicly-available genome-scale data, and the consequent need to archive, integrate, annotate and disseminate these using automated, scalable methods.",2018-01-01 +31796060,Text-mining clinically relevant cancer biomarkers for curation into the CIViC database.,"

Background

Precision oncology involves analysis of individual cancer samples to understand the genes and pathways involved in the development and progression of a cancer. To improve patient care, knowledge of diagnostic, prognostic, predisposing, and drug response markers is essential. Several knowledgebases have been created by different groups to collate evidence for these associations. These include the open-access Clinical Interpretation of Variants in Cancer (CIViC) knowledgebase. These databases rely on time-consuming manual curation from skilled experts who read and interpret the relevant biomedical literature.

Methods

To aid in this curation and provide the greatest coverage for these databases, particularly CIViC, we propose the use of text mining approaches to extract these clinically relevant biomarkers from all available published literature. To this end, a group of cancer genomics experts annotated sentences that discussed biomarkers with their clinical associations and achieved good inter-annotator agreement. We then used a supervised learning approach to construct the CIViCmine knowledgebase.

Results

We extracted 121,589 relevant sentences from PubMed abstracts and PubMed Central Open Access full-text papers. CIViCmine contains over 87,412 biomarkers associated with 8035 genes, 337 drugs, and 572 cancer types, representing 25,818 abstracts and 39,795 full-text publications.

Conclusions

Through integration with CIVIC, we provide a prioritized list of curatable clinically relevant cancer biomarkers as well as a resource that is valuable to other knowledgebases and precision cancer analysts in general. All data is publically available and distributed with a Creative Commons Zero license. The CIViCmine knowledgebase is available at http://bionlp.bcgsc.ca/civicmine/.",2019-12-03 +,"Involving undergraduate nursing students in a multidisciplinary research project: strategy for implementation, first results and future perspectives","Introduction: Engaging undergraduate students in faculty-led research is documented as a mutually valuable experience [1,2]. The VASelfCare project started in January 2018 (http://vaselfcare.rd.ciencias.ulisboa.pt); it aims to develop and test a software prototype with a virtual assistant, to facilitate self-care of older people with type 2 diabetes. The project entails the involvement of undergraduate nursing students at the consortium’s lead Institution (ESEL). The purpose of this paper is to describe the strategy for students’ participation and present its first results. Materials and methods: The implementation strategy was informed by prior experience of the research team and relevant literature. A call for applications was launched within ESEL, targeting 2nd to 4th year students. The call provided information about the project, the required student profile and the selection process. In this early phase, participation consisted of expanding the literature review on features of virtual assistant softwares. After an initial meeting, each student was assigned a specific task. Students worked in pairs and presented their work in two meetings with the multidisciplinary research team, scheduled over the course of roughly two months. Students’ opinion was ascertained at the end of this phase by means of an anonymous questionnaire, comprised by four open questions and five closed questions. Results: All the 12 applications were selected. There was a predominance of 3rd year students (n = 5). Of the 11 students who completed this phase, six responded to the questionnaire and consented to the use of data in publications related to the project. Participation was described in one word using terms such as “challenging” and “interesting”. Students perceived their involvement as an opportunity to contact with research and innovation, to develop competencies and to network with students with different background. Views on less positive aspects pertained mainly to the small exposure to research. Students’ expressed willingness to be involved in more phases of the project and to be informed about its results. They unanimously agreed (6/6) that participation contributed to improving their understanding of novel situations in a different scientific area and to the development of skills in scientific literature analysis. Discussion and conclusions: Thorough recruitment, assigning specific tasks, peer support and regular meetings were an effective strategy for involving students. This is in line with previous findings [2], and may be of help to researchers in other institutions and disciplines. Overall, students’ opinion on their experience were favourable. Future perspectives include awarding course credit for students’ participation, to enable more in-depth involvement without overloading their schedule and to further stimulate their interest in research.",2021-03-13 +,Context-dependent aggression toward non-nestmates in the ant Diacamma sp. from Japan,"Aggression toward competitors is a useful measure of resource ownership and defense in animals, but aggressive behavior is costly. Therefore, it is predicted that animals will display aggression only when the expected benefit to individual fitness exceeds the expected cost. In ants, when conspecific individuals belonging to different colonies encounter each other, fighting occurs, seemingly facultatively. However, the context that influences the expression of ants’ aggressive behavior, especially in the field, is still largely unknown. We investigated the plasticity of aggressiveness toward non-nestmates in Diacamma sp. from Japan. Our field experiment clearly showed that the same foragers that were aggressive toward non-nestmates in the vicinity of their nest changed to be non-aggressive at greater distances from the nest. Furthermore, the size of the colony to which the foragers belonged weakly but significantly affected their aggressiveness: foragers belonging to larger colonies behaved more aggressively toward non-nestmates. We discuss the possible adaptive significance of the observed facultative aggression between conspecific non-nestmates. Digital video images related to the article are available at http://www.momo-p.com/showdetail-e.php?movieid=momo190618ds01a and http://www.momo-p.com/showdetail-e.php?movieid=momo190618ds02a.",2019-09-01 +31813964,Pathway Tools version 23.0 update: software for pathway/genome informatics and systems biology.,"

Motivation

Biological systems function through dynamic interactions among genes and their products, regulatory circuits and metabolic networks. Our development of the Pathway Tools software was motivated by the need to construct biological knowledge resources that combine these many types of data, and that enable users to find and comprehend data of interest as quickly as possible through query and visualization tools. Further, we sought to support the development of metabolic flux models from pathway databases, and to use pathway information to leverage the interpretation of high-throughput data sets.

Results

In the past 4 years we have enhanced the already extensive Pathway Tools software in several respects. It can now support metabolic-model execution through the Web, it provides a more accurate gap filler for metabolic models; it supports development of models for organism communities distributed across a spatial grid; and model results may be visualized graphically. Pathway Tools supports several new omics-data analysis tools including the Omics Dashboard, multi-pathway diagrams called pathway collages, a pathway-covering algorithm for metabolomics data analysis and an algorithm for generating mechanistic explanations of multi-omics data. We have also improved the core pathway/genome databases management capabilities of the software, providing new multi-organism search tools for organism communities, improved graphics rendering, faster performance and re-designed gene and metabolite pages.

Availability

The software is free for academic use; a fee is required for commercial use. See http://pathwaytools.com.

Contact

pkarp@ai.sri.com.

Supplementary information

Supplementary data are available at Briefings in Bioinformatics online.",2021-01-01 +,Differentially methylated CpG sites associated with the high-risk group of prostate cancer,"Abstract Prostate cancer (PC) is one of the most common and socially significant oncological diseases among men. Bioinformatic analysis of omics data allows identifying molecular genetic changes associated with the disease development, as well as markers of prognosis and response to therapy. Alterations in DNA methylation and histone modification profiles widely occur in malignant tumors. In this study, we analyzed changes in DNA methylation in three groups of PC patients based on data from The Cancer Genome Atlas project (TCGA, https://portal.gdc.cancer.gov): (1) high- and intermediate-risk of the tumor progression, (2) favorable and unfavorable prognoses within the high-risk group, and (3) TMPRSS2-ERG-positive (tumors with TMPRSS2-ERG fusion transcript) and TMPRSS2-ERG-free cases within the high-risk group. We found eight CpG sites (cg07548607, cg13533340, cg16643088, cg18467168, cg23324953, cg23753247, cg25773620, and cg27148952) hypermethylated in the high-risk group compared with the intermediate-risk group of PC. Seven differentially methylated CpG sites (cg00063748, cg06834698, cg18607127, cg25273707, cg01704198, cg02067712, and cg02157224) were associated with unfavorable prognosis within the high-risk group. Six CpG sites (cg01138171, cg14060519, cg19570244, cg24492886, cg25605277, and cg26228280) were hypomethylated in TMPRSS2-ERG-positive PC compared to TMPRSS2-ERG-negative tumors within the high-risk group. The CpG sites were localized, predominantly, in regulatory genome regions belonging to promoters of the following genes: ARHGEF4, C6orf141, C8orf86, CLASP2, CSRNP1, GDA, GSX1, IQSEC1, MYOF, OR10A3, PLCD1, PLEC1, PRDM16, PTAFR, RP11-844P9.2, SCYL3, VPS13D, WT1, and ZSWIM2. For these genes, analysis of differential expression and its correlation with CpG site methylation (β-value level) was also performed. In addition, STK33 and PLCD1 had similar changes in colorectal cancer. As for the CSRNP1, the ARHGEF4, and the WT1 genes, misregulated expression levels were mentioned in lung, liver, pancreatic and androgen-independent prostate cancer. The potential impact of changed methylation on the mRNA level was determined for the CSRNP1, STK33, PLCD1, ARHGEF4, WT1, SCYL3, and VPS13D genes. The above CpG sites could be considered as potential prognostic markers of the high-risk group of PC.",2020-12-01 +32759687,Evaluation of an IoT Application-Scoped Access Control Model over a Publish/Subscribe Architecture Based on FIWARE. ,"The Internet of Things (IoT) brings plenty of opportunities to enhance society's activities, from improving a factory's production chain to facilitating people's household tasks. However, it has also brought new security breaches, compromising privacy and authenticity. IoT devices are vulnerable to being accessed from the Internet; they lack sufficient resources to face cyber-attack threats. Keeping a balance between access control and the devices' resource consumption has become one of the highest priorities of IoT research. In this paper, we evaluate an access control architecture based on the IAACaaS (IoT application-Scoped Access Control as a Service) model with the aim of protecting IoT devices that communicate using the Publish/Subscribe pattern. IAACaaS is based on the OAuth 2.0 authorization framework, which externalizes the identity and access control infrastructure of applications. In our evaluation, we implement the model using FIWARE Generic Enablers and deploy them for a smart buildings use case with a wireless communication. Then, we compare the performance of two different approaches in the data-sharing between sensors and the Publish/Subscribe broker, using Constrained Application Protocol (CoAP) and Hypertext Transfer Protocol (HTTP) protocols. We conclude that the integration of Publish/Subscribe IoT deployments with IAACaaS adds an extra layer of security and access control without compromising the system's performance.",2020-08-04 +33965348,GGVD: A goat genome variation database for tracking the dynamic evolutionary process of selective signatures and ancient introgressions.,"Understanding the evolutionary history and adaptive process depends on the knowledge that we can acquire from both ancient and modern genomic data. With the availability of a deluge of whole-genome sequencing data from ancient and modern goat samples, a user-friendly database making efficient reuse of these important resources is needed. Here, we use the genomes of 208 modern domestic goats, 24 bezoars, 46 wild ibexes, and 82 ancient goats to present a comprehensive goat genome variation database (GGVD). GGVD hosts a total of ∼41.44 million SNPs, ∼5.14 million indels, 6,193 selected loci, and 112 introgression regions. Users can freely visualize the frequency of genomic variations in geographical maps, selective sweeps in interactive tables, Manhattan plots, or line charts, as well as the heatmap patterns of the SNP genotype. Ancient data can be shown in haplotypes to track the state of genetic variants of selection and introgression events in the early, middle, and late stages. For facilitating access to sequence features, the UCSC Genome Browser, BLAT, BLAST, LiftOver, and pcadapt are also integrated into GGVD. GGVD will be a convenient tool for population genetic studies and molecular marker designing in goat breeding programs, and it is publicly available at http://animal.nwsuaf.edu.cn/GoatVar.",2021-03-01 +32986829,CMNPD: a comprehensive marine natural products database towards facilitating drug discovery from the ocean.,"Marine organisms are expected to be an important source of inspiration for drug discovery after terrestrial plants and microorganisms. Despite the remarkable progress in the field of marine natural products (MNPs) chemistry, there are only a few open access databases dedicated to MNPs research. To meet the growing demand for mining and sharing for MNPs-related data resources, we developed CMNPD, a comprehensive marine natural products database based on manually curated data. CMNPD currently contains more than 31 000 chemical entities with various physicochemical and pharmacokinetic properties, standardized biological activity data, systematic taxonomy and geographical distribution of source organisms, and detailed literature citations. It is an integrated platform for structure dereplication (assessment of novelty) of (marine) natural products, discovery of lead compounds, data mining of structure-activity relationships and investigation of chemical ecology. Access is available through a user-friendly web interface at https://www.cmnpd.org. We are committed to providing a free data sharing platform for not only professional MNPs researchers but also the broader scientific community to facilitate drug discovery from the ocean.",2021-01-01 +32849449,CoronaVR: A Computational Resource and Analysis of Epitopes and Therapeutics for Severe Acute Respiratory Syndrome Coronavirus-2.,"In December 2019, the Chinese city of Wuhan was the center of origin of a pneumonia-like disease outbreak with an unknown causative pathogen. The CDC, China, managed to track the source of infection to a novel coronavirus (2019-nCoV; SARS-CoV-2) that shares approximately 79.6% of its genome with SARS-CoV. The World Health Organization (WHO) initially declared COVID-19 as a Public Health Emergency of International Concern (PHEIC) and later characterized it as a global pandemic on March 11, 2020. Due to the novel nature of this virus, there is an urgent need for vaccines and therapeutics to control the spread of SARS-CoV-2 and its associated disease, COVID-19. Global efforts are underway to circumvent its further spread and treat COVID-19 patients through experimental vaccine formulations and therapeutic interventions, respectively. In the absence of any effective therapeutics, we have devised h bioinformatics-based approaches to accelerate global efforts in the fight against SARS-CoV-2 and to assist researchers in the initial phase of vaccine and therapeutics development. In this study, we have performed comprehensive meta-analyses and developed an integrative resource, ""CoronaVR"" (http://bioinfo.imtech.res.in/manojk/coronavr/). Predominantly, we identified potential epitope-based vaccine candidates, siRNA-based therapeutic regimens, and diagnostic primers. The resource is categorized into the main sections ""Genomes,"" ""Epitopes,"" ""Therapeutics,"" and Primers."" The genome section harbors different components, viz, genomes, a genome browser, phylogenetic analysis, codon usage, glycosylation sites, and structural analysis. Under the umbrella of epitopes, sub-divisions, namely cross-protective epitopes, B-cell (linear/discontinuous), T-cell (CD4+/CD8+), CTL, and MHC binders, are presented. The therapeutics section has different sub-sections like siRNA, miRNAs, and sgRNAs. Further, experimentally confirmed and designed diagnostic primers are earmarked in the primers section. Our study provided a set of shortlisted B-cell and T-cell (CD4+ and CD8+) epitopes that can be experimentally tested for their incorporation in vaccine formulations. The list of selected primers can be used in testing kits to identify SARS-CoV-2, while the recommended siRNAs, sgRNAs, and miRNAs can be used in therapeutic regimens. We foresee that this resource will help in advancing the research against coronaviruses.",2020-07-31 +,The Panorama Data Repository for Skyline Users,"Panorama is an open-source web-based data management system that was designed and developed for Skyline, a software tool for targeted mass spectrometry-based experiments. Panorama facilitates viewing, sharing, and disseminating targeted, quantitative results contained in Skyline documents. Panorama can be installed locally, or laboratories and organizations can sign-up for fully featured workspaces on the PanoramaWeb server (https://panoramaweb.org) hosted at the University of Washington. Workspaces on PanoramaWeb can be organized as needed by the owners and configured with fine-grained access controls to enable collaborative projects. To allow unlimited file storage Panorama projects can be set up to use cloud-backed storage such as Amazon Simple Storage Service (S3). In addition to storing and sharing Skyline results, Panorama together with Skyline is used for fully automated, longitudinal monitoring of LC-MS/MS system suitability. This is done with the Panorama AutoQC pipeline which automatically imports system suitability runs into a Skyline document as they are acquired. The document is uploaded to a Panorama server and several identification free metrics such as peak area, retention time etc. can be viewed as Levey-Jennings plots in a web-browser to track normal variation and quickly detect anomalies. Skyline documents and raw data on PanoramaWeb that are associated with research manuscripts can be submitted to the Panorama Public repository (https://panoramaweb.org/public.url) which is hosted on PanoramaWeb and is a member of the ProteomeXchange Consortium (http://www.proteomexchange.org/). Data on Panorama Public can be explored with a variety of graphs and annotated chromatographic peak views making it easy to evaluate quantitative results contained in the associated manuscripts. Access to data in the repository is managed as required, e.g. private access to reviewers during the manuscript review process and public access upon publication.",2020-08-01 +32646415,"ECCParaCorp: a cross-lingual parallel corpus towards cancer education, dissemination and application.","

Background

The increasing global cancer incidence corresponds to serious health impact in countries worldwide. Knowledge-powered health system in different languages would enhance clinicians' healthcare practice, patients' health management and public health literacy. High-quality corpus containing cancer information is the necessary foundation of cancer education. Massive non-structural information resources exist in clinical narratives, electronic health records (EHR) etc. They can only be used for training AI models after being transformed into structured corpus. However, the scarcity of multilingual cancer corpus limits the intelligent processing, such as machine translation in medical scenarios. Thus, we created the cancer specific cross-lingual corpus and open it to the public for academic use.

Methods

Aiming to build an English-Chinese cancer parallel corpus, we developed a workflow of seven steps including data retrieval, data parsing, data processing, corpus implementation, assessment verification, corpus release, and application. We applied the workflow to a cross-lingual, comprehensive and authoritative cancer information resource, PDQ (Physician Data Query). We constructed, validated and released the parallel corpus named as ECCParaCorp, made it openly accessible online.

Results

The proposed English-Chinese Cancer Parallel Corpus (ECCParaCorp) consists of 6685 aligned text pairs in Xml, Excel, Csv format, containing 5190 sentence pairs, 1083 phrase pairs and 412 word pairs, which involved information of 6 cancers including breast cancer, liver cancer, lung cancer, esophageal cancer, colorectal cancer, and stomach cancer, and 3 cancer themes containing cancer prevention, screening, and treatment. All data in the parallel corpus are online, available for users to browse and download ( http://www.phoc.org.cn/ECCParaCorp/ ).

Conclusions

ECCParaCorp is a parallel corpus focused on cancer in a cross-lingual form, which is openly accessible. It would make up the imbalance of scarce multilingual corpus resources, bridge the gap between human readable information and machine understanding data resources, and would contribute to intelligent technology application as a preparatory data foundation e.g. cancer-related machine translation, cancer system development towards medical education, and disease-oriented knowledge extraction.",2020-07-09 +30256983,BacDive in 2019: bacterial phenotypic data for High-throughput biodiversity analysis.,"The bacterial metadatabase BacDive (http://bacdive.dsmz.de) has become a comprehensive resource for structured data on the taxonomy, morphology, physiology, cultivation, isolation and molecular data of prokaryotes. With its current release (7/2018) the database offers information for 63 669 bacterial and archaeal strains including 12 715 type strains. During recent developments of BacDive, the enrichment of information on existing strains was prioritized. This has resulted in a 146% increase of database content over the past three years. Especially rich datasets were integrated from 4782 manual annotated species descriptions in the International Journal of Systematic and Evolutionary Microbiology which yielded standardized phenotypic data for 5468 type strains. Another important improvement of content was achieved through the mobilization of 8977 Analytical Profile Index (API®) test results that constitute physiological data for the identification of 5237 strains. BacDive offers a unique API® data collection with respect to size and diversity. In addition, data on fatty acid profiles and antibiotic susceptibility tests were integrated. A revised graphical user interface and new search tools such as the API® test finder, the TAXplorer, or the Microbial Isolation Source Search significantly improve the user experience.",2019-01-01 +34919405,Skyline Batch: An Intuitive User Interface for Batch Processing with Skyline.,"Skyline Batch is a newly developed Windows forms application that enables the easy and consistent reprocessing of data with Skyline. Skyline has made previous advances in this direction; however, none enable seamless automated reprocessing of local and remote files. Skyline keeps a log of all of the steps that were taken in the document; however, reproducing these steps takes time and allows room for human error. Skyline also has a command-line interface, enabling it to be run from a batch script, but using the program in this way requires expertise in editing these scripts. By formalizing the workflow of a highly used set of batch scripts into an intuitive and powerful user interface, Skyline Batch can reprocess data stored in remote repositories just by opening and running a Skyline Batch configuration file. When run, a Skyline Batch configuration downloads all necessary remote files and then runs a four-step Skyline workflow. By condensing the steps needed to reprocess the data into one file, Skyline Batch gives researchers the opportunity to publish their processing along with their data and other analysis files. These easily run configuration files will greatly increase the transparency and reproducibility of published work. Skyline Batch is freely available at https://skyline.ms/batch.url.",2021-12-17 +34790743,miR-34c-5p mediates the cellular malignant behaviors of oral squamous cell carcinoma through targeted binding of TRIM29.,"

Background

This investigation examined the effects of the microRNA miR-34c-5p on the proliferation, migration, and invasion of oral squamous cell carcinoma (OSCC) and the mechanisms involved.

Methods

The Gene Expression Omnibus (GEO) database was used to filter the chips, and the GEO2R software (https://www.ncbi.nlm.nih.gov/geo/geo2r/) was used to analyze the microarray data (GSE28100 and GSE45238). Gene set enrichment analysis (GSEA) was used to study the relationship between the expression of miR-34c-5p and the distant metastasis and pathological grade of OSCC. The correlation between TRIM29 (tripartite motif containing 29) expression and the malignant clinical phenotype of OSCC was also examined. The mRNA and protein expression levels of miR-34c-5p and TRIM29 were measured by real time quantitative reverse transcription polymerase chain reaction (RT-qPCR) and Western blot analysis. The proliferation, migration, invasion and apoptosis of the human oral squamous carcinoma cell lines CAL-27 and Tca8113 was assessed by performing cell-counting kit-8 (CCK-8) assays, colony formation assays, transwell tests, wound scratch tests and flow cytometry. Luciferase reporter assays were used to predict the relationship between miR-34c-5p and TRIM29. A xenograft nude model was established and used to evaluate the effect of miR-34c-5p on tumor growth in female BALB/c mice.

Results

The expression of miR-34c-5p was significantly correlated with the proliferation, migration, and metastasis of OSCC. Overexpression of miR-34c-5p promoted the proliferation, migration, and invasion of CAL-27 and Tca8113 cells, and suppressed their apoptosis. Inversely, low expression of miR-34c-5p suppressed the proliferation, migration, and invasion of CAL-27 and Tca8113 cells, and promoted their apoptosis. Overexpression of miR-34c-5p promoted tumor growth in the xenograft nude mice model. The expression of TRIM29 was related to malignant clinical phenotype of OSCC. Overexpression of TRIM29 inhibited the proliferation, migration and invasion of CAL-27 and Tca8113 cell, and induced their apoptosis. TRIM29 knockout had just the opposite effect. Importantly, miR-34c-5p binds to TRIM29 and inhibited TRIM29 expression.

Conclusions

MiR-34c-5p regulates the proliferation, migration, invasion, and apoptosis of OSCC through targeted binding of TRIM29. This may represent a novel therapeutic target for the treatment of patients with OSCC.",2021-10-01 +33175872,High density genotype storage for plant breeding in the Chado schema of Breedbase.,"Modern breeding programs routinely use genome-wide information for selecting individuals to advance. The large volumes of genotypic information required present a challenge for data storage and query efficiency. Major use cases require genotyping data to be linked with trait phenotyping data. In contrast to phenotyping data that are often stored in relational database schemas, next-generation genotyping data are traditionally stored in non-relational storage systems due to their extremely large scope. This study presents a novel data model implemented in Breedbase (https://breedbase.org/) for uniting relational phenotyping data and non-relational genotyping data within the open-source PostgreSQL database engine. Breedbase is an open-source, web-database designed to manage all of a breeder's informatics needs: management of field experiments, phenotypic and genotypic data collection and storage, and statistical analyses. The genotyping data is stored in a PostgreSQL data-type known as binary JavaScript Object Notation (JSONb), where the JSON structures closely follow the Variant Call Format (VCF) data model. The Breedbase genotyping data model can handle different ploidy levels, structural variants, and any genotype encoded in VCF. JSONb is both compressed and indexed, resulting in a space and time efficient system. Furthermore, file caching maximizes data retrieval performance. Integration of all breeding data within the Chado database schema retains referential integrity that may be lost when genotyping and phenotyping data are stored in separate systems. Benchmarking demonstrates that the system is fast enough for computation of a genomic relationship matrix (GRM) and genome wide association study (GWAS) for datasets involving 1,325 diploid Zea mays, 314 triploid Musa acuminata, and 924 diploid Manihot esculenta samples genotyped with 955,690, 142,119, and 287,952 genotype-by-sequencing (GBS) markers, respectively.",2020-11-11 +33684246,Development of machine learning model algorithm for prediction of 5-year soft tissue myxoid liposarcoma survival.,"

Background

Predicting survival in myxoid liposarcoma (MLS) patients is very challenging given its propensity to metastasize and the controversial role of adjuvant therapy. The purpose of this study was to develop a machine-learning algorithm for the prediction of survival at five years for patients with MLS and externally validate it using our institutional cohort.

Methods

Two databases, the surveillance, epidemiology, and end results program (SEER) database and an institutional database, were used in this study. Five machine learning models were created based on the SEER database and performance was rated using the TRIPOD criteria. The model that performed best on the SEER data was again tested on our institutional database.

Results

The net-elastic penalized logistic regression model was the best according to our performance indicators. This model had an area under the curve (AUC) of 0.85 when compared to the SEER testing data and an AUC of 0.76 when tested against institutional database. An application to use this calculator is available at https://sorg-apps.shinyapps.io/myxoid_liposarcoma/.

Conclusion

MLS is a soft-tissue sarcoma with adjunct treatment options that are, in part, decided by prognostic survival. We developed the first machine-learning predictive algorithm specifically for MLS using the SEER registry that retained performance during external validation with institutional data.",2021-03-08 +31665520,"SIGNOR 2.0, the SIGnaling Network Open Resource 2.0: 2019 update.","The SIGnaling Network Open Resource 2.0 (SIGNOR 2.0) is a public repository that stores signaling information as binary causal relationships between biological entities. The captured information is represented graphically as a signed directed graph. Each signaling relationship is associated to an effect (up/down-regulation) and to the mechanism (e.g. binding, phosphorylation, transcriptional activation, etc.) causing the up/down-regulation of the target entity. Since its first release, SIGNOR has undergone a significant content increase and the number of annotated causal interactions have almost doubled. SIGNOR 2.0 now stores almost 23 000 manually-annotated causal relationships between proteins and other biologically relevant entities: chemicals, phenotypes, complexes, etc. We describe here significant changes in curation policy and a new confidence score, which is assigned to each interaction. We have also improved the compliance to the FAIR data principles by providing (i) SIGNOR stable identifiers, (ii) programmatic access through REST APIs, (iii) bioschemas and (iv) downloadable data in standard-compliant formats, such as PSI-MI CausalTAB and GMT. The data are freely accessible and downloadable at https://signor.uniroma2.it/.",2020-01-01 +34384382,OrchidBase 4.0: a database for orchid genomics and molecular biology.,"

Background

The Orchid family is the largest families of the monocotyledons and an economically important ornamental plant worldwide. Given the pivotal role of this plant to humans, botanical researchers and breeding communities should have access to valuable genomic and transcriptomic information of this plant. Previously, we established OrchidBase, which contains expressed sequence tags (ESTs) from different tissues and developmental stages of Phalaenopsis as well as biotic and abiotic stress-treated Phalaenopsis. The database includes floral transcriptomic sequences from 10 orchid species across all the five subfamilies of Orchidaceae.

Description

Recently, the whole-genome sequences of Apostasia shenzhenica, Dendrobium catenatum, and Phalaenopsis equestris were de novo assembled and analyzed. These datasets were used to develop OrchidBase 4.0, including genomic and transcriptomic data for these three orchid species. OrchidBase 4.0 offers information for gene annotation, gene expression with fragments per kilobase of transcript per millions mapped reads (FPKM), KEGG pathways and BLAST search. In addition, assembled genome sequences and location of genes and miRNAs could be visualized by the genome browser. The online resources in OrchidBase 4.0 can be accessed by browsing or using BLAST. Users can also download the assembled scaffold sequences and the predicted gene and protein sequences of these three orchid species.

Conclusions

OrchidBase 4.0 is the first database that contain the whole-genome sequences and annotations of multiple orchid species. OrchidBase 4.0 is available at http://orchidbase.itps.ncku.edu.tw/.",2021-08-12 +34265502,Supporting the delivery of good maternity care for parents with learning disabilities.,"

Background and objective

Despite directives to improve maternity care in general and to improve care for parents with learning disabilities, the maternity experience of parents with learning disabilities is often poor and lacking reasonable adjustments to care. The objective of this study was to develop resources - in collaboration with key stakeholders - to support the workforce in delivering good maternity care to parents with learning disabilities.

Design

A two-phase mixed-methods study.

Participants

Phase 1: 16 key stakeholders (health and social care professionals, parents with learning disabilities and their informal supporters/carers) were interviewed to understand views of best practice and inform resource development. Phase 2: 20 healthcare professionals engaged with the resources and gave feedback via online survey or discussion group to further refine them.

Findings

Thematic analysis of key stakeholder interviews indicated that good maternity care for parents with learning disabilities requires a positive and proactive approach to identifying need; reasonable adjustments to communication and providing information; and professionals working together to support and enable parents.

Key conclusions

Health and social care professionals identified barriers to the delivery of good maternity care for parents with learning disabilities, including how to identify whether a parent has learning disabilities. Professionals in maternity services require additional resources to ensure parents' needs are recognised and they are provided with personalised preparation for parenthood and sufficient support.

Implications for practice

The Together Toolkit and Maternity Passport were coproduced to support the workforce to deliver good maternity care to parents with learning disabilities, these resources are free and accessible for use [https://www.surrey.ac.uk/research-projects/together-project-supporting-delivery-good-practice-maternity-services-parents-learning-disabilities]. Further evaluation will explore acceptability and perceived impact of these resources in maternity services.",2021-06-24 +,Community Connector: The AHRQ Social Determinants of Health Data Viz Challenge Winner,"

Research Objective

Clinical care accounts for a small part of the factors that affect population health. The rest is determined by social determinants of health (SDoH)—social, behavioral, and environmental factors that interact dynamically to keep people healthy. Many tools, such as the Opportunity Atlas, the City Health Dashboard, and the Robert Wood Johnson Foundation rankings, describe the social needs of populations but do not integrate a variety of federal, state, and local data sources and summarize only one community at a time, limiting the potential for peer‐to‐peer, comprehensive learning. Thus, we sought to build a common definition of local SDoH and a way to identify communities with similar needs and demographics, particularly those that have had success in addressing social needs and improving health and well‐being.

Study Design

Our Community Connector tool (https://communityconnector.mathematica.org/) is designed to summarize a community’s social needs in one picture, or fingerprint; identify communities with similar fingerprints; and compare communities across key indicators of health care utilization and cost. This fingerprint is based on outcome‐agnostic county‐level scores for six domains of SDoH identified by the Kaiser Family Foundation. We used sparse principal component analysis to determine which variables would be used in the defined SDoH scores and assigned additional variables that were not selected by this approach based on prior knowledge and strong association with the health outcomes. Each of the domain scores is a weighted average of the selected, normalized variables. The tool also provides a comparison of a county’s fingerprint to other counties with similar demographic and nonmodifiable SDoH characteristics, where the similarity is determined using a Lasso regression model.

Population Studied

We collected open‐source federal, state, and local data for the state of Colorado and targeted health outcomes related to obesity, diabetes, and kidney disease for the prototype. We used data sources such as the Colorado Department of Public Health and Environment and the Centers for Disease Control and Prevention’s Diabetes Atlas. With additional time and funding, the tool can expand and scale nationally and present analyses on a larger set of health outcomes and utilization data.

Principal Findings

Through developing the tool, we learned there is vast heterogeneity across community needs, outcomes, and data availability. This tool can help users understand community differences and explore similarities. As we assessed the app’s usability, we incorporated feedback from a variety of stakeholders into the app. From this feedback, we learned how to display the graphics and instructions in the most comprehensible way. As a result, the Community Connector has had about 30 000 views since its launch in mid‐December.

Conclusions

In late January 2020, the Agency for Healthcare Research and Quality (AHRQ) announced this tool as the grand prize winner of the AHRQ’s Visualization Resources of Community‐Level Social Determinants of Health Challenge.

Implications for Policy or Practice

This tool provides further resources and opportunities for communities to use SDoH data when identifying intervention opportunities. In addition to expanding nationally and across health care outcomes, the Community Connector could potentially allow localities to customize their search for similar counties by uploading their own county‐level data to the tool.

Primary Funding Source

Agency for Healthcare Research and Quality.",2020-08-01 +30665056,dbHDPLS: A database of human disease-related protein-ligand structures.,"Protein-ligand complexes perform specific functions, most of which are related to human diseases. The database, called as human disease-related protein-ligand structures (dbHDPLS), collected 8833 structures which were extracted from protein data bank (PDB) and other related databases. The database is annotated with comprehensive information involving ligands and drugs, related human diseases and protein-ligand interaction information, with the information of protein structures. The database may be a reliable resource for structure-based drug target discoveries and druggability predictions of protein-ligand binding sites, drug-disease relationships based on protein-ligand complex structures. It can be publicly accessed at the website: http://DeepLearner.ahu.edu.cn/web/dbDPLS/.",2019-01-11 +33797707,A network map of apelin-mediated signaling.,"The apelin receptor (APLNR) is a class A (rhodopsin-like) G-protein coupled receptor with a wide distribution throughout the human body. Activation of the apelin/APLNR system regulates AMPK/PI3K/AKT/mTOR and RAF/ERK1/2 mediated signaling pathways. APLNR activation orchestrates several downstream signaling cascades, which play diverse roles in physiological effects, including effects upon vasoconstriction, heart muscle contractility, energy metabolism regulation, and fluid homeostasis angiogenesis. We consolidated a network map of the APLNR signaling map owing to its biomedical importance. The curation of literature data pertaining to the APLNR system was performed manually by the NetPath criteria. The described apelin receptor signaling map comprises 35 activation/inhibition events, 38 catalysis events, 4 molecular associations, 62 gene regulation events, 113 protein expression types, and 4 protein translocation events. The APLNR signaling pathway map data is made freely accessible through the WikiPathways Database ( https://www.wikipathways.org/index.php/Pathway:WP5067 ).",2021-04-02 +32713350,Genome-wide identification and expression analysis of YTH domain-containing RNA-binding protein family in common wheat.,"

Background

N6-Methyladenosine (m6A) is the most widespread RNA modification that plays roles in the regulation of genes and genome stability. YT521-B homology (YTH) domain-containing RNA-binding proteins are important RNA binding proteins that affect the fate of m6A-containing RNA by binding m6A. Little is known about the YTH genes in common wheat (Triticum aestivum L.), one of the most important crops for humans.

Results

A total of 39 TaYTH genes were identified in common wheat, which are comprised of 13 homologous triads, and could be mapped in 18 out of the 21 chromosomes. A phylogenetic analysis revealed that the TaYTHs could be divided into two groups: YTHDF (TaDF) and YTHDC (TaDC). The TaYTHs in the same group share similar motif distributions and domain organizations, which indicates functional similarity between the closely related TaYTHs. The TaDF proteins share only one domain, which is the YTH domain. In contrast, the TaDCs possess three C3H1-type zinc finger repeats at their N-termini in addition to their central YTH domain. In TaDFs, the predicated aromatic cage pocket that binds the methylysine residue of m6A is composed of tryptophan, tryptophan, and tryptophan (WWW). In contrast, the aromatic cage pocket in the TaDCs is composed of tryptophan, tryptophan, and tyrosine (WWY). In addition to the general aspartic acid or asparagine residue used to form a hydrogen bond with N1 of m6A, histidine might be utilized in some TaDFb proteins. An analysis of the expression using both online RNA-Seq data and quantitative real-time PCR verification revealed that the TaDFa and TaDFb genes are highly expressed in various tissues/organs compared with that of TaDFcs and TaDCs. In addition, the expression of the TaYTH genes is changed in response to various abiotic stresses.

Conclusions

In this study, we identified 39 TaYTH genes from common wheat. The phylogenetic structure, chromosome distribution, and patterns of expression of these genes and their protein structures were analyzed. Our results provide a foundation for the functional analysis of TaYTHs in the future.",2020-06-23 +32554663,Interpreter costs across clinical commissioning groups in England 2017-2018: a cross sectional survey using 'freedom of information' requests. ,"Professional interpreters are considered to be the gold standard when meeting the needs of patients with limited English proficiency (LEP) in primary care. The models by which CCGs supply interpreting services (IS) vary. Many CCGs use external commercial IS, while other CCGs commission 'not for profit' services such as the Advocacy and Interpreting Service in Tower Hamlets, the Sussex Interpreting Service, and the decommissioned Sheffield Community Access and Interpreting Service. Research on comparative costs and needs of the LEP population is lacking. To compare the costs of interpreting services between CCG's in England. A cross-sectional study involving CCGs in England. A standardised request was sent to 195 CCGs inviting comment on how much the CCG spent (2017-2018) on IS. The data were plotted against a number of demographic variables (https://fingertips.phe.org.uk) and analysed using regression analysis. Survey response rate: 86% of CCGs (n = 169). Of those CCGs who responded, 39% (n = 66) did not hold IS cost data. NHS England spent £2 951 348.16 for IS services for the year 2017-2018. A positive correlation was noted with increased cost of interpreting services when plotted against increasing percentage BME or percentage birth to non-UK parents. However, there were wide variations around correlation of best fit indicating variation in spending between CCGs for similar populations. Inter CCG variation in correlations between demographic variables and expenditure suggests further research is needed to determine how to optimise and resource safe and equitable IS across the UK population.",2020-06-01 +33335964,Survey data on voluntary nature conservation commitments of German businesses and their perceptions towards conservation credits.,"To preserve biodiversity and ecosystem services, company engagement is crucial. However, available data on manager views and perceptions regarding nature conservation in particular is rare. The presented survey data gives insights into current levels and forms of business commitments for nature conservation. The data contributes to understanding business attitudes towards voluntary conservation action and includes information about factors that influence their engagement. Moreover, the data informs about manager perceptions towards the concept of nature conservation credits and, as such, allows for an evaluation of a certified biodiversity and ecosystem services market. Importantly, the dataset contains essential company characteristics to put responses into greater context. The scope of the survey is limited to German companies from secondary and tertiary sectors. Companies were sampled through proportional stratified random sampling based on size and location. The data was collected through a self-administered online-survey, conducted in 2019. The database comprises responses of 747 companies that logged into the online system. The survey data were in part analysed through structural equation modelling for an investigation of factors that drive voluntary conservation commitments [1]. Related to this analysis, a subset of 618 companies is available that provided sufficiently completed questionnaires. Both datasets, i.e. the raw data as well as the first subset used for analysis, are hosted in the public repository Open Research Data of the Leibniz Centre for Agricultural Landscape Research (ZALF), Germany. The repository also stores all coding information as well as the questionnaire: https://www.doi.org/10.4228/ZALF.DK.149. The dataset can be used, for example, by researchers from the field of environmental business management and strategy.",2020-12-08 +34656666,Ethnodermatological use of medicinal plants in India: From ayurvedic formulations to clinical perspectives - A review.,"

Ethnopharmacological relevance

Traditional knowledge is a particular form of practice or skill set that was developed in ancient times and was sustained through generations via the passing of knowledge, essentially confined within a specific tribe, local people, or family lineages. Ethnodermatological use of medicinal plants in India is still a subject to conduct more studies to see if there is chemical, microbiological, and/or clinical evidence, from a scientific perspective, of their effectiveness for those skin disorders. Thus, this review can be the basis for further studies and may provide targets for drug development.

Aim of the study

We compile and emphasize the most important part of ethnodermatology, namely, traditional knowledge of medicinal plants and their applications for several skin diseases in India. We also include a brief review and explanation on dermatology in Ayurvedic and Unani medicine. We review the pharmacological activity of extracts derived from some of the most cited plants against problem skin diseases as well.

Materials and methods

Different kinds of key phrases such as ""Indian traditional ethnodermatology"", ""ethnodermatology"", ""ethnobotany"", ""skin diseases"", ""Ayurveda dermatology"", ""pharmacological activity"" were searched in online search servers/databases such as Google Scholar (https://scholar.google.com/), ResearchGate (https://www.researchgate.net/), PubMed (https://pubmed.ncbi.nlm.nih.gov/), NISCAIR Online Periodicals Repository (NOPR) (http://nopr.niscair.res.in/). Based upon the analyses of data obtained from 178 articles, we formulated several important findings which are a summary shown in Tables. Tables. A total of 119 records of plants' uses have been found across India against 39 skin diseases. These are depicted with their localities of report, parts used, and preparation and administration methods against particular skin diseases.

Results

The knowledge and utilisation of herbal medicine in the Indian subcontinent has great potential to treat different kinds of human skin disorders. The administration of extracts from most of the plant species used is topical and few only are administrated orally. We also investigated the pharmacological activity of the extracts of the most cited plants against mice, bacterial and fungal pathogens, and human cells.

Conclusions

Complementary therapy for dermatological problems and treatment remains the main option for millions of people in the Indian subcontinent. This review on the practices of ethnobotanical dermatology in India confirms the belief that their analysis will accelerate the discovery of new, effective therapeutic agents for skin diseases. However, more studies and clinical evidence are still required to determine if the identified species may contribute to skin condition treatment, particularly in atopic eczema. Today, ethnodermatology is a well-accepted international discipline and many new practices have been initiated in numerous countries. We hope this article will further accelerate the development of this area to identify a new generation of natural human skin treatments that will help meet the growing consumer demand for safe, sustainable, and natural treatments. In this context, research on plants utilised in ethnodermatology in India and elsewhere should be intensified.",2021-10-14 +32696292,TLPdb: A Resource for Thaumatin-Like Proteins.,"Antifungal proteins and peptides have drawn the attention of numerous plant biologists and Clinicians, owing to their potential value in protecting commercial crops as well as preventing fungal infections in humans. Various proteins and peptides, such as glucanases, chitinases, chitinase-like proteins, lectins, peroxidases, defensins, and lipid transfer proteins have antifungal activities. Thaumatin is a protein from a West African plant Thaumatococcus danielli that is sweet in taste but does not exhibit antifungal activities. Despite the structural similarities between thaumatins and thaumatin-like proteins (TLPs), TLPs are not sweet in taste, unlike thaumatins. We developed a thaumatin-like protein database of various organisms. TLPs are pathogenesis-related proteins (PR) with molecular masses of 20-26 kDa. The amino acid residues of TLPs involved in an antifungal activity remain obscure and make it hard to receive comprehensive information on TLPs. The biggest problem in the wine industry is white haze, an undesirable feature of high-quality wine. Hence, the problem may be figured out with the easy accessibility of amino acid sequences and to generate infest resistant crops. Overall, we aimed to produce a freely accessible TLP database ( https://tlpdb.cftri.com ) that would provide substantive information in understanding the mechanistic facet of TLPs. Briefly, TLPdb contains sequences, structures, and amino acid compositions of validated, published TLP protein sequences (from the plant, fungal as well as animal sources). Thus, this work may yield valuable information that may be useful in understanding the mechanistic aspects of TLP activity and in the evolution of antifungal proteins and fungal resistant crops. TLPdb is a comprehensive thaumatin-like protein resource database of various organisms. The database can serve as a unique Bioinformatics tool for understanding the TLPs. This further may help in understanding and the development of fungal resistant crops. TLPdb is freely available at https://tlpdb.cftri.com .",2020-08-01 +33750297,Rust expression browser: an open source database for simultaneous analysis of host and pathogen gene expression profiles with expVIP.,"

Background

Transcriptomics is being increasingly applied to generate new insight into the interactions between plants and their pathogens. For the wheat yellow (stripe) rust pathogen (Puccinia striiformis f. sp. tritici, Pst) RNA-based sequencing (RNA-Seq) has proved particularly valuable, overcoming the barriers associated with its obligate biotrophic nature. This includes the application of RNA-Seq approaches to study Pst and wheat gene expression dynamics over time and the Pst population composition through the use of a novel RNA-Seq based surveillance approach called ""field pathogenomics"". As a dual RNA-Seq approach, the field pathogenomics technique also provides gene expression data from the host, giving new insight into host responses. However, this has created a wealth of data for interrogation.

Results

Here, we used the field pathogenomics approach to generate 538 new RNA-Seq datasets from Pst-infected field wheat samples, doubling the amount of transcriptomics data available for this important pathosystem. We then analysed these datasets alongside 66 RNA-Seq datasets from four Pst infection time-courses and 420 Pst-infected plant field and laboratory samples that were publicly available. A database of gene expression values for Pst and wheat was generated for each of these 1024 RNA-Seq datasets and incorporated into the development of the rust expression browser ( http://www.rust-expression.com ). This enables for the first time simultaneous 'point-and-click' access to gene expression profiles for Pst and its wheat host and represents the largest database of processed RNA-Seq datasets available for any of the three Puccinia wheat rust pathogens. We also demonstrated the utility of the browser through investigation of expression of putative Pst virulence genes over time and examined the host plants response to Pst infection.

Conclusions

The rust expression browser offers immense value to the wider community, facilitating data sharing and transparency and the underlying database can be continually expanded as more datasets become publicly available.",2021-03-09 +33264401,"A new decade and new data at SoyBase, the USDA-ARS soybean genetics and genomics database.","SoyBase, a USDA genetic and genomics database, holds professionally curated soybean genetic and genomic data, which is integrated and made accessible to researchers and breeders. The site holds several reference genome assemblies, as well as genetic maps, thousands of mapped traits, expression and epigenetic data, pedigree information, and extensive variant and genotyping data sets. SoyBase displays include genetic, genomic, and epigenetic maps of the soybean genome. Gene expression data is presented in the genome viewer as heat maps and pictorial and tabular displays in gene report pages. Millions of sequence variants have been added, representing variations across various collections of cultivars. This variant data is explorable using new interactive tools to visualize the distribution of those variants across the genome, between selected accessions. SoyBase holds several reference-quality soybean genome assemblies, accessible via various query tools and browsers, including a new visualization system for exploring the soybean pan-genome. SoyBase also serves as a nexus of announcements pertinent to the greater soybean research community. The database also includes a soybean-specific anatomic and biochemical trait ontology. The database can be accessed at https://soybase.org.",2021-01-01 +30357347,15 years of GDR: New data and functionality in the Genome Database for Rosaceae.,"The Genome Database for Rosaceae (GDR, https://www.rosaceae.org) is an integrated web-based community database resource providing access to publicly available genomics, genetics and breeding data and data-mining tools to facilitate basic, translational and applied research in Rosaceae. The volume of data in GDR has increased greatly over the last 5 years. The GDR now houses multiple versions of whole genome assembly and annotation data from 14 species, made available by recent advances in sequencing technology. Annotated and searchable reference transcriptomes, RefTrans, combining peer-reviewed published RNA-Seq as well as EST datasets, are newly available for major crop species. Significantly more quantitative trait loci, genetic maps and markers are available in MapViewer, a new visualization tool that better integrates with other pages in GDR. Pathways can be accessed through the new GDR Cyc Pathways databases, and synteny among the newest genome assemblies from eight species can be viewed through the new synteny browser, SynView. Collated single-nucleotide polymorphism diversity data and phenotypic data from publicly available breeding datasets are integrated with other relevant data. Also, the new Breeding Information Management System allows breeders to upload, manage and analyze their private breeding data within the secure GDR server with an option to release data publicly.",2019-01-01 +33858848,HLA Ligand Atlas: a benign reference of HLA-presented peptides to improve T-cell-based cancer immunotherapy. ,"The human leucocyte antigen (HLA) complex controls adaptive immunity by presenting defined fractions of the intracellular and extracellular protein content to immune cells. Understanding the benign HLA ligand repertoire is a prerequisite to define safe T-cell-based immunotherapies against cancer. Due to the poor availability of benign tissues, if available, normal tissue adjacent to the tumor has been used as a benign surrogate when defining tumor-associated antigens. However, this comparison has proven to be insufficient and even resulted in lethal outcomes. In order to match the tumor immunopeptidome with an equivalent counterpart, we created the HLA Ligand Atlas, the first extensive collection of paired HLA-I and HLA-II immunopeptidomes from 227 benign human tissue samples. This dataset facilitates a balanced comparison between tumor and benign tissues on HLA ligand level. Human tissue samples were obtained from 16 subjects at autopsy, five thymus samples and two ovary samples originating from living donors. HLA ligands were isolated via immunoaffinity purification and analyzed in over 1200 liquid chromatography mass spectrometry runs. Experimentally and computationally reproducible protocols were employed for data acquisition and processing. The initial release covers 51 HLA-I and 86 HLA-II allotypes presenting 90,428 HLA-I- and 142,625 HLA-II ligands. The HLA allotypes are representative for the world population. We observe that immunopeptidomes differ considerably between tissues and individuals on source protein and HLA-ligand level. Moreover, we discover 1407 HLA-I ligands from non-canonical genomic regions. Such peptides were previously described in tumors, peripheral blood mononuclear cells (PBMCs), healthy lung tissues and cell lines. In a case study in glioblastoma, we show that potential on-target off-tumor adverse events in immunotherapy can be avoided by comparing tumor immunopeptidomes to the provided multi-tissue reference. Given that T-cell-based immunotherapies, such as CAR-T cells, affinity-enhanced T cell transfer, cancer vaccines and immune checkpoint inhibition, have significant side effects, the HLA Ligand Atlas is the first step toward defining tumor-associated targets with an improved safety profile. The resource provides insights into basic and applied immune-associated questions in the context of cancer immunotherapy, infection, transplantation, allergy and autoimmunity. It is publicly available and can be browsed in an easy-to-use web interface at https://hla-ligand-atlas.org .",2021-04-01 +29059374,The Pancreatic Expression Database: 2018 update.,"The Pancreatic Expression Database (PED, http://www.pancreasexpression.org) continues to be a major resource for mining pancreatic -omics data a decade after its initial release. Here, we present recent updates to PED and describe its evolution into a comprehensive resource for extracting, analysing and integrating publicly available multi-omics datasets. A new analytical module has been implemented to run in parallel with the existing literature mining functions. This analytical module has been created using rich data content derived from pancreas-related specimens available through the major data repositories (GEO, ArrayExpress) and international initiatives (TCGA, GENIE, CCLE). Researchers have access to a host of functions to tailor analyses to meet their needs. Results are presented using interactive graphics that allow the molecular data to be visualized in a user-friendly manner. Furthermore, researchers are provided with the means to superimpose layers of molecular information to gain greater insight into alterations and the relationships between them. The literature-mining module has been improved with a redesigned web appearance, restructured query platforms and updated annotations. These updates to PED are in preparation for its integration with the Pancreatic Cancer Research Fund Tissue Bank (PCRFTB), a vital resource of pancreas cancer tissue for researchers to support and promote cutting-edge research.",2018-01-01 +33729437,Tumor IsomiR Encyclopedia (TIE): a pancancer database of miRNA isoforms. ,"MicroRNAs (miRNAs) are master regulators of gene expression in cancers. Their sequence variants or isoforms (isomiRs) are highly abundant and possess unique functions. Given their short sequence length and high heterogeneity, mapping isomiRs can be challenging; without adequate depth and data aggregation, low frequency events are often disregarded. To address these challenges, we present the Tumor IsomiR Encyclopedia (TIE): a dynamic database of isomiRs from over 10,000 adult and pediatric tumor samples in The Cancer Genome Atlas (TCGA) and The Therapeutically Applicable Research to Generate Effective Treatments (TARGET) projects. A key novelty of TIE is its ability to annotate heterogeneous isomiR sequences and aggregate the variants obtained across all datasets. Results can be browsed online or downloaded as spreadsheets. Here we show analysis of isomiRs of miR-21 and miR-30a to demonstrate the utility of TIE. TIE search engine and data is freely available to use at https://isomir.ccr.cancer.gov/.",2021-03-17 +33705530,Development of a biomarker database toward performing disease classification and finding disease interrelations. ,"A biomarker is a measurable indicator of a disease or abnormal state of a body that plays an important role in disease diagnosis, prognosis and treatment. The biomarker has become a significant topic due to its versatile usage in the medical field and in rapid detection of the presence or severity of some diseases. The volume of biomarker data is rapidly increasing and the identified data are scattered. To provide comprehensive information, the explosively growing data need to be recorded in a single platform. There is no open-source freely available comprehensive online biomarker database. To fulfill this purpose, we have developed a human biomarker database as part of the KNApSAcK family databases which contain a vast quantity of information on the relationships between biomarkers and diseases. We have classified the diseases into 18 disease classes, mostly according to the National Center for Biotechnology Information definitions. Apart from this database development, we also have performed disease classification by separately using protein and metabolite biomarkers based on the network clustering algorithm DPClusO and hierarchical clustering. Finally, we reached a conclusion about the relationships among the disease classes. The human biomarker database can be accessed online and the inter-disease relationships may be helpful in understanding the molecular mechanisms of diseases. To our knowledge, this is one of the first approaches to classify diseases based on biomarkers. Database URL:  http://www.knapsackfamily.com/Biomarker/top.php.",2021-03-01 +34244700,2DProts: Database of Family-Wide Protein Secondary Structure Diagrams. ,"Secondary structures provide a deep insight into the protein architecture. They can serve for comparison between individual protein family members. The most straightforward way how to deal with protein secondary structure is its visualization using 2 D diagrams. Several software tools for the generation of 2 D diagrams were developed. Unfortunately, they create 2 D diagrams based on only a single protein. Therefore, 2 D diagrams of two proteins from one family markedly differ. For this reason, we developed the 2DProts database, which contains secondary structure 2 D diagrams for all domains from the CATH and all proteins from PDB databases. These 2 D diagrams are generated based on a whole protein family, and they also consider information about the 3 D arrangement of secondary structure elements. Moreover, 2DProts database contains multiple 2 D diagrams, which provide an overview of a whole protein family's secondary structures. 2DProts is updated weekly and is integrated into CATH. Freely accessible at https://2dprots.ncbr.muni.cz. The web interface was implemented in JavaScript. The database was implemented in Python. Supplementary data are available at Bioinformatics online.",2021-07-09 +33196830,GenBank.,"GenBank® (https://www.ncbi.nlm.nih.gov/genbank/) is a comprehensive, public database that contains 9.9 trillion base pairs from over 2.1 billion nucleotide sequences for 478 000 formally described species. Daily data exchange with the European Nucleotide Archive and the DNA Data Bank of Japan ensures worldwide coverage. Recent updates include new resources for data from the SARS-CoV-2 virus, updates to the NCBI Submission Portal and associated submission wizards for dengue and SARS-CoV-2 viruses, new taxonomy queries for viruses and prokaryotes, and simplified submission processes for EST and GSS sequences.",2021-01-01 +33765325,A Bayesian spatial model for imaging genetics.,"We develop a Bayesian bivariate spatial model for multivariate regression analysis applicable to studies examining the influence of genetic variation on brain structure. Our model is motivated by an imaging genetics study of the Alzheimer's Disease Neuroimaging Initiative (ADNI), where the objective is to examine the association between images of volumetric and cortical thickness values summarizing the structure of the brain as measured by magnetic resonance imaging (MRI) and a set of 486 single nucleotide polymorphism (SNPs) from 33 Alzheimer's disease (AD) candidate genes obtained from 632 subjects. A bivariate spatial process model is developed to accommodate the correlation structures typically seen in structural brain imaging data. First, we allow for spatial correlation on a graph structure in the imaging phenotypes obtained from a neighborhood matrix for measures on the same hemisphere of the brain. Second, we allow for correlation in the same measures obtained from different hemispheres (left/right) of the brain. We develop a mean-field variational Bayes algorithm and a Gibbs sampling algorithm to fit the model. We also incorporate Bayesian false discovery rate (FDR) procedures to select SNPs. We implement the methodology in a new release of the R package bgsmtr. We show that the new spatial model demonstrates superior performance over a standard model in our application. Data used in the preparation of this article were obtained from the ADNI database (https://adni.loni.usc.edu).",2021-04-19 +34310736,Newly defined allergens in the WHO/IUIS Allergen Nomenclature Database during 01/2019-03/2021.,"The WHO/IUIS Allergen Nomenclature Database (http://allergen.org) provides up-to-date expert-reviewed data on newly discovered allergens and their unambiguous nomenclature to allergen researchers worldwide. This review discusses the 106 allergens that were accepted by the Allergen Nomenclature Sub-Committee between 01/2019 and 03/2021. Information about protein family membership, patient cohorts, and assays used for allergen characterization is summarized. A first allergenic fungal triosephosphate isomerase, Asp t 36, was discovered in Aspergillus terreus. Plant allergens contained 1 contact, 38 respiratory, and 16 food allergens. Can s 4 from Indian hemp was identified as the first allergenic oxygen-evolving enhancer protein 2 and Cic a 1 from chickpeas as the first allergenic group 4 late embryogenesis abundant protein. Among the animal allergens were 19 respiratory, 28 food, and 3 venom allergens. Important discoveries include Rap v 2, an allergenic paramyosin in molluscs, and Sal s 4 and Pan h 4, allergenic fish tropomyosins. Paramyosins and tropomyosins were previously known mainly as arthropod allergens. Collagens from barramundi, Lat c 6, and salmon, Sal s 6, were the first members from the collagen superfamily added to the database. In summary, the addition of 106 new allergens to the previously listed 930 allergens reflects the continuous linear growth of the allergen database. In addition, 17 newly described allergen sources were included.",2021-08-05 +33204420,"Risk factors, clinical outcomes and predictors of stroke mortality in Sierra Leoneans: A retrospective hospital cohort study.","

Background

Stroke data from Sierra Leone is limited, despite the increase in global burden of the disease. The aim of this study was to assess the risk factors, clinical outcomes and predictors of stroke mortality at a tertiary hospital in Freetown, Sierra Leone.

Methods

This retrospective cohort study was conducted on stroke patients admitted at the Connaught Teaching Hospital between 1st January to December 31, 2018. Clinical data related to stroke, with variables including patients' demographics, stroke subtype, vascular risk factors, modified Rankin Scale (mRS), and outcomes were documented. In-hospital mortality, associated risk factors and predictors of stroke were determined. The study was approved by the Sierra Leone Ethics and Scientific Review Committee. It was registered under Research Registry https://www.researchregistry.com/browse-the-registry#home/with the unique identifying number researchregistry6009.

Result

We studied 178 (95 male and 83 female) patients. The mean age was 59.8 ± 14.0 years, median was 58.1years (ranging: 29-88 years). The commonest risk factors were hypertension (84.3%), tobacco smoking (35.9%) and alcohol (31.4%). Ischemic stroke confirmed by CT scan was 76.3%. In-hospital mortality was 34.8% and at discharge, mean modified Rankin Score (mRS) was 3.89 ± 1.62. The independent predictors for stroke mortality were: hypertension [AOR = 2.2; C.I 95%: (1.32-3.80), p = 0.001], previous stroke [AOR = 2.31; C.I 95%: (1.43-5.74), p = 0.001], GCS < 8 [AOR = 6.06; C.I 95%: (3.17-12.79), p < 0.001], clinical diagnosis in the absence of imaging [AOR = 3.11; C.I 95%: (2.1-9.87), p = 0.001], hemorrhagic stroke [AOR = 2.96; C.I 95%: (1.96-9.54), p < 0.001], and aspiration pneumonia [(AOR = 3.03; C.I 95%:(1.44-6.36), p = 0.001]. Women had poorer outcome than men.

Conclusion

This study highlights a high stroke mortality in a resource limited hospital, with some stroke patients having difficulties in accessing Computer Tomogram (CT) scan services. It illustrates the need to establish a stroke care setting to improve the quality of stroke care.",2020-11-04 +,"First genome sequence of Chilean Brucella canis SCL strain provides insights on the epidemiology and virulence factors, explaining differences between geographical origins","Brucella canis is the etiological agent of canine brucellosis, a worldwide neglected zoonosis that constitutes one of the major infectious causes of infertility and reproductive failure in dogs. Although genomic information available for this pathogen has increased in recent years, here we report the first genome sequencing of a B. canis strain in Chile, and the differences in virulence genes with other B. canis strains.Genome assembly produced a total length of 3,289,216 bp, N50 of 95,163 and GC% of 57.27, organized in 54 contigs in chromosome I, and 21 contigs in chromosome II. The genome annotation identified a total of 1981 CDS, 3 rRNA and 36 tRNA in chromosome I, and 1113 CDS and 10 tRNA in chromosome II. There is little variation between the different strains and the SCL isolate. Phylogenetic analysis showed that the Chilean SCL strain is closely related to B. canis and B. suis strains. Small differences were found when compared to the Serbian isolate, but all strains shared the same recent common ancestor. Finally, changes in the sequence of some virulence factors showed that the SCL strain is similar to other South American B. canis strains.This work sequenced and characterized the complete genome of B. canis strain SCL, evidencing the complete presence of all the genes of the virB operon, and minor changes in outer membrane proteins and in the urease operon. Our data suggest that B. canis was introduced from North America and then spread throughout the South American continent.How to cite: Borie C, Bravo C, Dettleff P, et al. First genome sequence of Chilean Brucella canis SCL strain provides insights on the epidemiology and virulence factors, explaining differences between geographical origins Electron J Biotechnol 2021;49. https://dx.doi.org/10.1016/j.ejbt.2020.10.002.",2021-01-01 +,"The DendroEcological Network: A cyberinfrastructure for the storage, discovery and sharing of tree-ring and associated ecological data","The DendroEcological Network (DEN; https://www.uvm.edu/femc/dendro) is an opensource repository of high quality dendrochronological and associated ecological data. Launched in 2018, the mission of the DEN is to provide a centralized, standards-driven cyberinfrastructure for data storage, exploration and sharing. Specifically, the objectives of the DEN are to, 1) act as an integrator of dendrochronological and ecological data, 2) facilitate synthetic investigation and analyses of these data, 3) uphold the scientific community’s goals of data transparency and reproducibility of results, 4) serve as a long-term data archiving platform for use by individuals, laboratories and the greater scientific, management and conservation communities and, 5) leverage and extend previous and future research. The DEN facilitates the gathering of individual studies into a larger network, expanding the scale of inquiry to address pressing ecological questions that no single study can answer alone.",2020-04-01 +,Preventing Post Traumatic Stress Disorder in the general population induced by trauma during the COVID pandemic : A simple brief intervention based on cognitive science that could be delivered digitally,"Most of the recent studies indicated the prevalence of Post-Traumatic Stress Symptoms (PTSS) are increasing after the COVID pandemic around the world. Bo et al. reported PTSS prevalence of 96.2% among the COVID-19-infected people. The sociocultural and individual vulnerability and protective factors may influence onset and maintenance of the symptoms. However, there is significant lack in understanding the risk factors and preventive factors that influence the maintenance of Post-Traumatic Stress symptoms that defines Post-Traumatic Stress Disorder (PTSD). The digital technology gives us the unique opportunity to assess this risk, to monitor and track this evolution longitudinally. In this research project we aimed to design and develop a smartphone application for longitudinal data collection enabling to (1) predict and follow the evolution of PTSS toward PTSD, (2) assess the relative efficacy of several methods to prevent the evolution of PTSS right after exposure to trauma (1−24 h), (3) educate people about psychological effects that can occur during and after trauma, normalize acute distress and refer to professional help if a disorder is constituted. We hope that this research project will help to understand how to maximize the self help support during the acute phase (golden hours) after trauma to prevent the transition from PTSS to PTSD. A video abstract can be found on https://www.youtube.com/watch?v=RZJehj3J8go&feature=emb_title",2020-12-30 +30945201,CANTATAdb 2.0: Expanding the Collection of Plant Long Noncoding RNAs.,"Long non-coding RNAs (lncRNAs) are a class of potent regulators of gene expression that are found in a wide array of eukaryotes; however, our knowledge about these molecules in plants is very limited. In particular, a number of plant species with important roles in biotechnology, agriculture and basic research still lack comprehensively identified and annotated sets of lncRNAs. To address these shortcomings, we previously created a database of lncRNAs in 10 model species, called CANTATAdb, and now we are expanding this online resource to encompass 39 species, including three algae. The lncRNAs were identified computationally using publicly available RNA sequencing (RNA-Seq) data. Expression values, coding potential calculations and other types of information were used to provide annotations for the identified lncRNAs. The data are freely available for searching, browsing and downloading from an online database called CANTATAdb 2.0 ( http://cantata.amu.edu.pl , http://yeti.amu.edu.pl/CANTATA/ ).",2019-01-01 +34878860,Using Statewide Electronic Health Record and Influenza Vaccination Data to Plan and Prioritize COVID-19 Vaccine Outreach and Communications in Wisconsin Communities.,"The University of Wisconsin Neighborhood Health Partnerships Program used electronic health record and influenza vaccination data to estimate COVID-19 relative mortality risk and potential barriers to vaccination in Wisconsin ZIP Code Tabulation Areas. Data visualization revealed four groupings to use in planning and prioritizing vaccine outreach and communication based on ZIP Code Tabulation Area characteristics. The program provided data, visualization, and guidance to health systems, health departments, nonprofits, and others to support planning targeted outreach approaches to increase COVID-19 vaccination uptake. (Am J Public Health. 2021;111(12):2111-2114. https://doi.org/10.2105/AJPH.2021.306524).",2021-12-01 +35935892,Multi-locus phylogenetic analysis of lophiostomatoid fungi motivates a broad concept of Lophiostoma and reveals nine new species.,"Recent studies on the fungal families Lophiostomataceae and Lophiotremataceae (Pleosporales) have provided varying phylogenetic and taxonomic results concerning constituent genera and species. By adding DNA sequences of 24 new strains of Lophiostomataceae and nine new strains of Lophiotremataceae to a sequence data matrix from international databases, we provide a new understanding of the relationships within these families. Multigene analysis of the four molecular markers ITS, LSU, TEF1-α, and RPB2 reveals that the genera within Lophio-tremataceae are phylogenetically well supported. Lophiostoma myriocarpum is recognised as a species of Lophiotrema in contrast to earlier concepts. In Lophiostomataceae, we resurrect a broad generic concept of the genus Lophiostoma and reduce 14 genera to synonymy: Alpestrisphaeria, Biappendiculispora, Capulatispora, Coelodictyosporium, Guttulispora, Lophiohelichrysum, Lophiopoacea, Neopaucispora, Neotrematosphaeria, Platystomum, Pseudocapulatispora, Pseudolophiostoma, Pseudoplatystomum, and Sigarispora. Nine new species are described based on molecular data and in most cases supported by morphological characters: Antealophiotrema populicola, Atrocalyx nordicus, Lophiostoma carpini, Lophiostoma dictyosporium, Lophiostoma erumpens, Lophiostoma fusisporum, Lophiostoma jotunheimenense, Lophiostoma plantaginis, and Lophiostoma submuriforme. Lophiostoma caespitosum and Lophiotrema myriocarpum are lecto- and epitypified to stabilise their species concepts. High intraspecific variability of several morphological traits is common within Lophiostomataceae. Citation: Andreasen M, Skrede I, Jaklitsch WM, et al. 2021. Multi-locus phylogenetic analysis of lophiostomatoid fungi motivates a broad concept of Lophiostoma and reveals nine new species. Persoonia 46: 240-271. https://doi.org/10.3767/persoonia.2021.46.09.",2021-06-12 +33959747,Updates to HCOP: the HGNC comparison of orthology predictions tool. ,"Multiple resources currently exist that predict orthologous relationships between genes. These resources differ both in the methodologies used and in the species they make predictions for. The HGNC Comparison of Orthology Predictions (HCOP) search tool integrates and displays data from multiple ortholog prediction resources for a specified human gene or set of genes. An indication of the reliability of a prediction is provided by the number of resources that support it. HCOP was originally designed to show orthology predictions between human and mouse but has been expanded to include data from a current total of 20 selected vertebrate and model organism species. The HCOP pipeline used to fetch and integrate the information from the disparate ortholog and nomenclature data resources has recently been rewritten, both to enable the inclusion of new data and to take advantage of modern web technologies. Data from HCOP are used extensively in our work naming genes as the Vertebrate Gene Nomenclature Committee (https://vertebrate.genenames.org).",2021-11-01 +34491769,Core Outcome Set Use in Poststroke Aphasia Treatment Research: Examining Barriers and Facilitators to Implementation Using the Theoretical Domains Framework.,"Purpose A core outcome set (COS; an agreed minimum set of outcomes) was developed to address the heterogeneous measurement of outcomes in poststroke aphasia treatment research. Successful implementation of a COS requires change in individual and collective research behavior. We used the Theoretical Domains Framework (TDF) to understand the factors influencing researchers' use and nonuse of the Research Outcome Measurement in Aphasia (ROMA) COS. Method Aphasia trialists and highly published treatment researchers were identified from the Cochrane review of speech and language therapy for aphasia following stroke and through database searches. Participants completed a theory-informed online survey that explored factors influencing COS use. Data were analyzed using descriptive statistics and qualitative content analysis. Results Sixty-four aphasia researchers from 13 countries participated. Most participants (81%) were aware of the ROMA COS, and participants identified more facilitators than barriers to its use. The TDF domain with the highest agreement (i.e., facilitator) was ""knowledge"" (84% agree/strongly agree). Participants had knowledge of the measures included in the ROMA COS, their associated benefits, and the existing recommendations. The TDF domains with the least agreement (i.e., barriers) were ""reinforcement"" (34% agree/strongly agree); ""social influences"" (41% agree/strongly agree); ""memory, attention, and decision processes"" (45% agree/strongly agree); and ""behavioral regulation"" (49% agree/strongly agree). Hence, participants identified a lack of external incentives, collegial encouragement, and monitoring systems as barriers to using the ROMA COS. The suitability and availability of individual measurement instruments, as well as burden associated with collecting the COS, were also identified as reasons for nonuse. Conclusions Overall, participants were aware of the benefits of using the ROMA COS and believed that its implementation would improve research quality; however, incentives for routine implementation were reported to be lacking. Findings will guide future revisions of the ROMA COS and the development of theoretically informed implementation strategies. Supplemental Material https://doi.org/10.23641/asha.16528524.",2021-09-07 +31974515,"The Rhinella arenarum transcriptome: de novo assembly, annotation and gene prediction.","The common toad Rhinella arenarum is widely distributed in Argentina, where it is utilised as an autochthonous model in ecotoxicological research and environmental toxicology. However, the lack of a reference genome makes molecular assays and gene expression studies difficult to carry out on this non-model species. To address this issue, we performed a genome-wide transcriptome analysis on R. arenarum larvae through massive RNA sequencing, followed by de novo assembly, annotation, and gene prediction. We obtained 57,407 well-annotated transcripts representing 99.4% of transcriptome completeness (available at http://rhinella.uncoma.edu.ar). We also defined a set of 52,800 high-confidence lncRNA transcripts and demonstrated the reliability of the transcriptome data to perform phylogenetic analysis. Our comprehensive transcriptome analysis of R. arenarum represents a valuable resource to perform functional genomic studies and to identify potential molecular biomarkers in ecotoxicological research.",2020-01-23 +33416858,The iPPI-DB initiative: A Community-centered database of Protein-Protein Interaction modulators. ,"One avenue to address the paucity of clinically testable targets is to reinvestigate the druggable genome by tackling complicated types of targets such as Protein-Protein Interactions (PPIs). Given the challenge to target those interfaces with small chemical compounds, it has become clear that learning from successful examples of PPI modulation is a powerful strategy. Freely-accessible databases of PPI modulators that provide the community with tractable chemical and pharmacological data, as well as powerful tools to query them, are therefore essential to stimulate new drug discovery projects on PPI targets. Here, we present the new version iPPI-DB, our manually curated database of PPI modulators. In this completely redesigned version of the database, we introduce a new web interface relying on crowdsourcing for the maintenance of the database. This interface was created to enable community contributions, whereby external experts can suggest new database entries. Moreover, the data model, the graphical interface, and the tools to query the database have been completely modernized and improved. We added new PPI modulators, new PPI targets, and extended our focus to stabilizers of PPIs as well. The iPPI-DB server is available at https://ippidb.pasteur.fr The source code for this server is available at https://gitlab.pasteur.fr/ippidb/ippidb-web/ and is distributed under GPL licence (http://www.gnu.org/licences/gpl). Queries can be shared through persistent links according to the FAIR data standards. Data can be downloaded from the website as csv files. Supplementary data are available at Bioinformatics online.",2021-01-08 +30726866,RiboD: a comprehensive database for prokaryotic riboswitches.,"SUMMARY:Riboswitches are cis-regulatory non-coding genomic segments that control the expression of downstream genes by undergoing conformational change upon ligand binding. We present a comprehensive database of prokaryotic riboswitches that allows the user to search for riboswitches using multiple criteria, extract information about riboswitch location and gene/operon it regulates. RiboD provides a very useful resource that can be utilized for the better understanding of riboswitch-based gene regulation in bacteria and archaea. AVAILABILITY AND IMPLEMENTATION:RiboD can be freely accessed on the web at http://ribod.iiserkol.ac.in/.",2019-09-01 +33270898,"GPCRdb in 2021: integrating GPCR sequence, structure and function.","G protein-coupled receptors (GPCRs) form both the largest family of membrane proteins and drug targets, mediating the action of one-third of medicines. The GPCR database, GPCRdb serves >4 000 researchers every month and offers reference data, analysis of own or literature data, experiment design and dissemination of published datasets. Here, we describe new and updated GPCRdb resources with a particular focus on integration of sequence, structure and function. GPCRdb contains all human non-olfactory GPCRs (and >27 000 orthologs), G-proteins and arrestins. It includes over 2 000 drug and in-trial agents and nearly 200 000 ligands with activity and availability data. GPCRdb annotates all published GPCR structures (updated monthly), which are also offered in a refined version (with re-modeled missing/distorted regions and reverted mutations) and provides structure models of all human non-olfactory receptors in inactive, intermediate and active states. Mutagenesis data in the GPCRdb spans natural genetic variants, GPCR-G protein interfaces, ligand sites and thermostabilising mutations. A new sequence signature tool for identification of functional residue determinants has been added and two data driven tools to design ligand site mutations and constructs for structure determination have been updated extending their coverage of receptors and modifications. The GPCRdb is available at https://gpcrdb.org.",2021-01-01 +30271982,Expression map of 78 brain-expressed mouse orphan GPCRs provides a translational resource for neuropsychiatric research.,"Orphan G-protein-coupled receptors (oGPCRs) possess untapped potential for drug discovery. In the brain, oGPCRs are generally expressed at low abundance and their function is understudied. Expression profiling is an essential step to position oGPCRs in brain function and disease, however public databases provide only partial information. Here, we fine-map expression of 78 brain-oGPCRs in the mouse, using customized probes in both standard and supersensitive in situ hybridization. Images are available at http://ogpcr-neuromap.douglas.qc.ca. This searchable database contains over 8000 coronal brain sections across 1350 slides, providing the first public mapping resource dedicated to oGPCRs. Analysis with public mouse (60 oGPCRs) and human (56 oGPCRs) genome-wide datasets identifies 25 oGPCRs with potential to address emotional and/or cognitive dimensions of psychiatric conditions. We probe their expression in postmortem human brains using nanoString, and included data in the resource. Correlating human with mouse datasets reveals excellent suitability of mouse models for oGPCRs in neuropsychiatric research.",2018-08-06 +34765707,"RNA-Seq transcriptome data of the liver of common Pekin, Muscovy, mule and Hinny ducks fed ad libitum or overfed.","Duck species are known to have different ability to fatty liver production in response to overfeeding and gene expression analyses can help to characterize mechanisms involved in these differences. This data article reports the sequencing of RNAs extracted from the liver of Pekin and Muscovy duck species and of their reciprocal hybrids, Mule and Hinny ducks fed ad libitum or overfed. Libraries were prepared by selecting polyadenylated mRNAs and RNA Sequencing (RNASeq) was performed using Illumina HiSeq2000 platform. RNASeq data presented in this article were deposited in the NCBI sequence read archive (SRA) under the accession number SRP144764 and links to these data were also indicated in the Data INRAE repository (https://doi.org/10.15454/JJZ3QQ). Transcriptome analyses of these data were published in Hérault et al. (2019) and Liu et al. (2020).",2021-10-27 +30371888,HmtVar: a new resource for human mitochondrial variations and pathogenicity data.,"Interest in human mitochondrial genetic data is constantly increasing among both clinicians and researchers, due to the involvement of mitochondrial DNA (mtDNA) in a number of physiological and pathological processes. Thanks to new sequencing technologies and modern databases, the large amount of information on mtDNA variability may be exploited to gain insights into the relationship between mtDNA variants, phenotypes and diseases. To facilitate this process, we have developed the HmtVar resource, a variant-focused database that allows the exploration of a dataset of over 40 000 human mitochondrial variants. Mitochondrial variation data, initially gathered from the HmtDB platform, are integrated with in-house pathogenicity assessments based on various evaluation criteria and with a set of additional annotations from third-party resources. The result is a comprehensive collection of information of crucial importance for human mitochondrial variation studies and investigation of common and rare diseases in which the mitochondrion may be involved. HmtVar is accessible at https://www.hmtvar.uniba.it and data may be retrieved using either a web interface through the Query page or a state-of-the-art API for programmatic access.",2019-01-01 +33326008,Collecting and managing taxonomic data with NCBI-taxonomist. ,"We present NCBI-taxonomist - a command-line tool written in Python that collects and manages taxonomic data from the National Center for Biotechnology Information (NCBI). NCBI-taxonomist does not depend on a pre-downloaded taxonomic database but can store data locally. NCBI-taxonomist has six commands to map, collect, extract, resolve, import and group taxonomic data that can be linked together to create powerful analytical pipelines. Because many life science databases use the same taxonomic information, the data managed by NCBI-taxonomist is not limited to NCBI and can be used to find data linked to taxonomic information present in other scientific databases. NCBI-taxonomist is implemented in Python 3 (≥3.8) and available at https://gitlab.com/janpb/ncbi-taxonomist and via PyPi (https://pypi.org/project/ncbi-taxonomist/), as a Docker container (https://gitlab.com/janpb/ncbi-taxonomist/container_registry/) and Singularity (v3.5.3) image (https://cloud.sylabs.io/library/jpb/ncbi-taxonomist). NCBI-taxonomist is licensed under the GPLv3. https://ncbi-taxonomist.readthedocs.io/en/latest/.",2020-12-16 +33903708,The microRNA analysis portal is a next-generation tool for exploring and analyzing miRNA-focused data in the literature.,"MicroRNAs constitute a class of noncoding small RNAs involved in the posttranscriptional regulation of many biological pathways. In recent years, microRNAs have also been associated with regulation across kingdoms, demonstrating that exogenous miRNAs can function in mammals in a fashion similar to mammalian miRNAs. The growing interest in microRNAs and the increasing amount of literature and molecular and biomedical data available make it difficult to identify records of interest and keep up to date with novel findings. For these reasons, we developed the microRNA Analysis Portal (MAP). MAP selects relevant miRNA-focused articles from PubMed, links biomedical and molecular data and applies bioinformatics modules. At the time of this writing, MAP represents the richest, most complete and integrated database focused on microRNAs. MAP also integrates an updated version of MirCompare (2.0), a computational platform used for selecting plant microRNAs on the basis of their ability to regulate mammalian genes. Both MAP and MirCompare functionalities were used to predict that microRNAs from Moringa oleifera have putative roles across kingdoms by regulating human genes coding for proteins of the immune system. Starting from a selection of 94 human microRNAs, MirCompare selected 6 Moringa oleifera functional homologs. The subsequent prediction of human targets and areas of functional enrichment highlighted the central involvement of these genes in regulating immune system processes, particularly the host-virus interaction processes in hepatitis B, cytomegalovirus, papillomavirus and coronavirus. This case of use showed how MAP can help to perform complex queries without any computational background. MAP is available at http://stablab.uniroma2.it/MAP .",2021-04-26 +32615035,mycoCSM: Using Graph-Based Signatures to Identify Safe Potent Hits against Mycobacteria.,"Development of new potent, safe drugs to treat Mycobacteria has proven to be challenging, with limited hit rates of initial screens restricting subsequent development efforts. Despite significant efforts and the evolution of quantitative structure-activity relationship as well as machine learning-based models for computationally predicting molecule bioactivity, there is an unmet need for efficient and reliable methods for identifying biologically active compounds against Mycobacterium that are also safe for humans. Here we developed mycoCSM, a graph-based signature approach to rapidly identify compounds likely to be active against bacteria from the genus Mycobacterium, or against specific Mycobacteria species. mycoCSM was trained and validated on eight organism-specific and for the first time a general Mycobacteria data set, achieving correlation coefficients of up to 0.89 on cross-validation and 0.88 on independent blind tests, when predicting bioactivity in terms of minimum inhibitory concentration. In addition, we also developed a predictor to identify those compounds likely to penetrate in necrotic tuberculosis foci, which achieved a correlation coefficient of 0.75. Together with a built-in estimator of the maximum tolerated dose in humans, we believe this method will provide a valuable resource to enrich screening libraries with potent, safe molecules. To provide simple guidance in the selection of libraries with favorable anti-Mycobacteria properties, we made mycoCSM freely available online at http://biosig.unimelb.edu.au/myco_csm.",2020-07-16 +33170210,The Zebrafish Information Network: major gene page and home page updates.,"The Zebrafish Information Network (ZFIN) (https://zfin.org/) is the database for the model organism, zebrafish (Danio rerio). ZFIN expertly curates, organizes, and provides a wide array of zebrafish genetic and genomic data, including genes, alleles, transgenic lines, gene expression, gene function, mutant phenotypes, orthology, human disease models, gene and mutant nomenclature, and reagents. New features at ZFIN include major updates to the home page and the gene page, the two most used pages at ZFIN. Data including disease models, phenotypes, expression, mutants and gene function continue to be contributed to The Alliance of Genome Resources for integration with similar data from other model organisms.",2021-01-01 +23766369,TIMBAL v2: update of a database holding small molecules modulating protein-protein interactions.,"TIMBAL is a database holding molecules of molecular weight <1200 Daltons that modulate protein-protein interactions. Since its first release, the database has been extended to cover 50 known protein-protein interactions drug targets, including protein complexes that can be stabilized by small molecules with therapeutic effect. The resource contains 14 890 data points for 6896 distinct small molecules. UniProt codes and Protein Data Bank entries are also included. Database URL: http://www-cryst.bioc.cam.ac.uk/timbal",2013-06-13 +33537385,Seismicity of the Gargano promontory (Southern Italy) after 7 years of local seismic network operation: Data release of waveforms from 2013 to 2018.,"The University of Bari (Italy), in cooperation with the National Institute of Geophysics and Volcanology (INGV) (Italy), has installed the OTRIONS micro-earthquake network to better understand the active tectonics of the Gargano promontory (Southern Italy). The OTRIONS network operates since 2013 and consists of 12 short period, 3 components, seismic stations located in the Apulian territory (Southern Italy). This data article releases the waveform database collected from 2013 to 2018 and describes the characteristics of the local network in the current configuration. At the end of 2018, we implemented a cloud infrastructure to make more robust the acquisition and storage system of the network through a collaboration with the RECAS-Bari computing centre of the University of Bari (Italy) and of the National Institute of Nuclear Physics (Italy). Thanks to this implementation, waveforms recorded after the beginning of 2019 and the station metadata are accessible through the European Integrated Data Archive (EIDA, https://www.orfeus-eu.org/data/eida/nodes/INGV/).",2021-01-21 +,"First Report of 16SrIV Palm Lethal Yellowing Group Phytoplasma (‘Candidatus Phytoplasma palmae’) in Palmilla de Taco (Brahea brandegeei) and Palma Colorada (Washingtonia robusta) in the State of Baja California Sur, Mexico","The Mexican state of Baja California Sur (BCS) is part of Peninsular Range Province (PRP), the arid mountainous region from Southern California to the southern tip of the Baja California Peninsula. This is the northwestern limit in the geographic range of the palms family (Arecaceae) in the Americas (Munnich et al. 2011). Brahea brandegeei (Bb) and Washingtonia robusta (Wr) are among the native ornamental fan palms of PRP most widely cultivated worldwide. In BCS, Wr grows largely throughout oases at the base of mountains and in urban areas, whereas Bb favors high-elevation oases and is rarely found in urban zones (Leon de la Luz et al. 2014). In July 2014, the symptoms of lethal yellowing (LY) diseases were observed in two Wr and one Bb palms in a private garden in El Centenario,10 km north of La Paz. Bb manifested bunchy-top-like symptoms, with midcanopy yellowing and chlorosis, necrotic tips of lateral leaves, and streaks in rachis and necrosis in some inflorescences. Wr palms had necrosis at the lower leaves and partial necrotic inflorescences. Samples from palm leaves, rachises, and inflorescences were collected and analyzed by scanning electron microscopy (SEM) and molecular techniques. Using SEM, phytoplasma cells were detected in phloem tissue of tested samples, with variable distribution in different specimens, and with average sizes 400 to 1,800 nm. Total DNA from eight collected samples of leaves and inflorescences from two palms was extracted (Tapia-Tussell et al. 2005), and nested polymerase chain reaction (PCR) was performed using P1/P7 and R16F2n/R16R2 primer pairs (Lee et al. 1998). Amplicons of ∼1.2 kb were obtained from all samples. Nested PCR products from three samples, two from Bb (2a and 3a) and one from Wr (Kb), were cloned into pGEM-T-easy vector (Promega, Madison, WI) and sequenced (University of California, Davis, CA, http://dnaseq.ucdavis.edu/). Analysis in NCBI BLASTn database of sequences Kb and 3a showed 99.28% (1,234/1,243 bp) and 99.35% (1,228/1,236 bp) similarity with 14 accessions from 16SrIV group, and the nearest matching sequence was MG234701, whereas sequence 2a displayed 99.36% identity (1,235/1,243 bp) to 35 GenBank accessions from 16SrIV group, such as MK421966. The analyzed three sequences, KX982666 (Kb), KX982667 (2a), and KX982668 (3a), were deposited in GenBank database. The use of iPhyClassifier tool (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi) confirmed their 99% similarity with ‘Candidatus Phytoplasma palmae’ reference strain (accession no. U18747). Virtual restriction fragment length polymorphism (RFLP) analysis (Zhao et al. 2009) of sequence 2a (KX982667) identified the phytoplasma as a variant of 16SrIV-D, based on 0.98 similarity coefficient (F) to virtual pattern from the reference strain 16SrIV-D (AF237615). Analysis of virtual RFLP of sequences 3a (KX982668) and Kb (KX982666) indicates the most similarity with the reference pattern of 16SrIV-A (AF498307), with F ≤ 0.97 (0.95 for 3a and 0.96 for Kb). Further research is required to prove if these two phytoplasmas represent new subgroups within the 16SrIV group. To our knowledge, this is the first report of 16SrIV palm LY phytoplasma in two native palm species in the Baja California Peninsula; worldwide, this is the first report of 16SrIV-D ‘Ca. P. palmae’ in a new species, Brahea brabdegeei. Research is in progress to investigate the presence of phytoplasmas in Bb and Wr in the mountainous region of BCS, to establish the possible source of LY phytoplasma in their native ecosystem.",2019-08-01 +33140980,Determinants and impact of physical impairment in patient-reported outcomes among older patients with type 2 diabetes mellitus in Japan.,"

Objective

To investigate the predictive factors associated with physical impairment among older patients with type 2 diabetes mellitus (T2DM) in Japan and to examine the potential impact of physical impairment on patient-reported health outcomes in this population.

Methods

A cross-sectional analysis was conducted using patient-reported data from the 2012-2014 Japan National Health and Wellness Survey. Physical impairment was measured using the Physical Component Summary (PCS) score of the Short-Form 36-Item Health Survey (SF-36) three-component model (using Japanese norms). Older T2DM patients (≥65 years old; n = 1511) were dichotomized into physically impaired (PCS ≤ 25th percentile; n = 378) and non-physically impaired (PCS > 25th percentile; n = 1133). Work productivity (absenteeism, presenteeism and overall work impairment), activity impairment and healthcare resource utilization were compared between these groups.

Results

Age, female sex, low and high body mass index (BMI), diabetes-related complications, cardiovascular events, unawareness of having hypoglycemic events in the past 3 months, and lack of regular exercise were significant factors associated with physical impairment in multivariable analysis. The physically impaired group reported significantly more regular outpatient visits (13.48 vs. 10.16, respectively, p < .001), 1% or greater absenteeism (16.7% vs. 4.1%, p = .005), greater presenteeism (27.8% vs. 12.2%, p = .001), overall work impairment (30.0% vs. 13.0%, p = .001) and overall activity impairment (39.5% vs. 17.2%, p < .001) than the non-physically-impaired group after adjusting for covariates.

Conclusions

This study identified age, BMI, diabetes-related comorbidities, history of cardiovascular events and lack of exercise as key predictors associated with physical impairment in older patients with T2DM in Japan, which predicted low work productivity as well as activity impairment. This study provides support that physical impairment in patients with T2DM may lead to low work productivity and activity impairment.Supplemental data for this article is available online at https://doi.org/10.1080/03007995.2020.1846170.",2020-11-23 +30398470,"Enabling precision medicine in neonatology, an integrated repository for preterm birth research.","Preterm birth, or the delivery of an infant prior to 37 weeks of gestation, is a significant cause of infant morbidity and mortality. In the last decade, the advent and continued development of molecular profiling technologies has enabled researchers to generate vast amount of 'omics' data, which together with integrative computational approaches, can help refine the current knowledge about disease mechanisms, diagnostics, and therapeutics. Here we describe the March of Dimes' Database for Preterm Birth Research (http://www.immport.org/resources/mod), a unique resource that contains a variety of 'omics' datasets related to preterm birth. The database is open publicly, and as of January 2018, links 13 molecular studies with data across tens of thousands of patients from 6 measurement modalities. The data in the repository are highly diverse and include genomic, transcriptomic, immunological, and microbiome data. Relevant datasets are augmented with additional molecular characterizations of almost 25,000 biological samples from public databases. We believe our data-sharing efforts will lead to enhanced research collaborations and coordination accelerating the overall pace of discovery in preterm birth research.",2018-11-06 +33221922,The UCSC Genome Browser database: 2021 update.,"For more than two decades, the UCSC Genome Browser database (https://genome.ucsc.edu) has provided high-quality genomics data visualization and genome annotations to the research community. As the field of genomics grows and more data become available, new modes of display are required to accommodate new technologies. New features released this past year include a Hi-C heatmap display, a phased family trio display for VCF files, and various track visualization improvements. Striving to keep data up-to-date, new updates to gene annotations include GENCODE Genes, NCBI RefSeq Genes, and Ensembl Genes. New data tracks added for human and mouse genomes include the ENCODE registry of candidate cis-regulatory elements, promoters from the Eukaryotic Promoter Database, and NCBI RefSeq Select and Matched Annotation from NCBI and EMBL-EBI (MANE). Within weeks of learning about the outbreak of coronavirus, UCSC released a genome browser, with detailed annotation tracks, for the SARS-CoV-2 RNA reference assembly.",2021-01-01 +,Study of Patients’ Characteristics and Mutual Impact Between Covid-19 and Hyperglycemia at a Community Hospital in Central Brooklyn,"Abstract Background: Studies have shown that poorly-controlled hyperglycemia worsens the outcomes in patients with COVID-19 (C-19) and C-19 may damage pancreatic islets via ACE2 receptors causing acute hyperglycemia. The major population we serve at Kingsbrook Jewish Medical Center (KJMC) are underprivileged with many of them having multiple comorbidities. Methods: This is a retrospective study wherein patients, admitted from February 2020 to April 2020 with hyperglycemia, were selected and divided into 2 groups based on presence or absence of C-19. Data include demographics, comorbidities, blood glucose level, serum osmolality, serum bicarbonate, anion gap, acute kidney injury (AKI), serum creatinine, ICU admission, length of stay (LOS) and mortality. Data were analyzed using descriptive study and T-test. Results: 100 patients were included in the C-19 group (CG) and 88 patients were included in the Non C-19 group (NCG). Major comorbidities were similar in both groups including HTN, DM, CKD followed by ESRD. Mean age of patients (years) was 65.68 in CG and 61.17 in NCG. 61% were male in CG and 53.41% were male in NCG. 16% and 9% developed DKA and HHS in CG, and 13.64% and 6.82% developed DKA and HHS in NCG respectively. 15% in CG had combined DKA & HHS and 3.41% had same in NCG. Mean blood glucose level (mg/dl) was 541.6 in CG and 460.0 in NCG (p=0.03). Mean serum osmolality (mOsm/kg) was 335.7 (SD±41.01) in CG and 317.1 (SD±30.54) in NCG (p=0.01). Mean serum bicarbonate (mEq/L) was 17.73 (SD±6.31) in CG and 21.46 (SD±5.94) in NCG (p<0.0001). Mean anion gap was 17.93 (SD±7.6) in CG and 13.10 (SD±7.2) in NCG (p<0.0001). 56% in CG and 37% in NCG developed AKI respectively (p=0.01). Mean serum creatinine (mg/dl) was 4.22 in CG and 1.65 in NCG (p=0.004). 55% of CG were admitted to ICU and 34% of NCG were admitted to ICU (p=0.003). Median LOS (days) in discharged patients was 8 in CG and 5 in NCG (p=0.02). Mortality was 40% in CG and 3.41% in NCG (p<0.0001). 12 patients in CG and 2 patients in NCG developed new-onset diabetes. In the subset of DKA, interestingly, mean age (years) was 61.63 (SD±17.73) in CG and 39.67 in NCG (SD±13.39) (p=0.001). Conclusion: In our study, patients in the CG carry worse laboratory parameters, unfavorable clinical outcomes and strikingly higher mortality. We discovered increased incidence of new-onset diabetes and elderly DKA in CG. In an inner city population like ours, the burden of DM with significant social and health care disparities is quite severe. Diabetic patients with concurrent C-19 infection can have particularly negative outcomes and C-19 possibly damages the pancreatic islets resulting in acute hyperglycemic crisis. Further research on larger population is required. References: (1)https://dx.doi.org/10.1016%2Fj.diabres.2020.108142(2) https://doi.org/10.2337/dc20-0723(3)https://www.nejm.org/doi/full/10.1056/NEJMc2018688",2021-01-01 +,Transcriptome profiling reveals differential expression of genes potentially involved in muscle and adipose tissue development of cattle,"To identify differentially expressed genes (DEGs) between muscle and adipose in cattle, we analyzed the data from the RNA sequencing of three Angus×Qinchuan crossbred cattle.Searched the Gene Expression Omnibus (GEO) for a microarray dataset of Yan yellow cattle, GSE49992. After the DEGs were identified, we used STRING and Cytoscape to construct a protein–protein interaction (PPI) network, subsequently analyzing the major modules of key genes. In total, 340 DEGs were discovered, including 21 hub genes, which were mainly enriched in muscle contraction, skeletal muscle contraction, troponin complex, lipid particle, Z disc, tropomyosin binding, and actin filament binding.In summary, these genes can be regarded as candidate biomarkers for the regulation of muscle and adipose development.How to cite: Wang S, Raza SHA, Mei C, et al. Transcriptome profiling reveals differential expression of genes potentially involved in muscle and adipose tissue development of cattle. Electron J Biotechnol 2020;48. https://doi.org/10.1016/j.ejbt.2020.09.004.",2020-11-01 +32838057,When COVID-19 will decline in India? Prediction by combination of recovery and case load rate.,"

Background

The World Health Organization (WHO) declared COVID-19 as a pandemic on March 11, 2020. There is sudden need of statistical modeling due to onset of COVID-19 pandemic across the world. But health planning and policy requirements need the estimates of disease problem from clinical data.

Objective

The present study aimed to predict the declination of COVID-19 using recovery rate and case load rate on basis of available data from India.

Methods

The reported COVID-19 cases in the country were obtained from website (https://datahub.io/core/covid-19#resource-covid-19_zip/). The confirmed cases, recovered cases and deaths were used for estimating recovery rate, case load rate and death rate till June 04, 2020.

Results

A total of 216919 confirmed cases were reported nationwide in India on June 04, 2020. It is found that the recovery rate increased to 47.99% and case load rate decreased to 49.21%. Death rate is found to be very low 2.80%. Accordingly, coincidence of the difference of case load rate and recovery rate (delta) will reveal a declination in expected COVID-19 cases.

Conclusion

The epidemic in the country was mainly caused by the movement of people from various foreign countries to India. Lockdown as restricting the migration of population and decision taken by the government to quarantine the population may greatly reduce the risk of continued spread of the epidemic in India. This study predicts that when the case load rate gets lesser than recovery rate, there after COVID-19 patients would be started to decline.",2020-06-23 +31004550,"The Plant PTM Viewer, a central resource for exploring plant protein modifications.","Post-translational modifications (PTMs) of proteins are central in any kind of cellular signaling. Modern mass spectrometry technologies enable comprehensive identification and quantification of various PTMs. Given the increased numbers and types of mapped protein modifications, a database is necessary that simultaneously integrates and compares site-specific information for different PTMs, especially in plants for which the available PTM data are poorly catalogued. Here, we present the Plant PTM Viewer (http://www.psb.ugent.be/PlantPTMViewer), an integrative PTM resource that comprises approximately 370 000 PTM sites for 19 types of protein modifications in plant proteins from five different species. The Plant PTM Viewer provides the user with a protein sequence overview in which the experimentally evidenced PTMs are highlighted together with an estimate of the confidence by which the modified peptides and, if possible, the actual modification sites were identified and with functional protein domains or active site residues. The PTM sequence search tool can query PTM combinations in specific protein sequences, whereas the PTM BLAST tool searches for modified protein sequences to detect conserved PTMs in homologous sequences. Taken together, these tools help to assume the role and potential interplay of PTMs in specific proteins or within a broader systems biology context. The Plant PTM Viewer is an open repository that allows the submission of mass spectrometry-based PTM data to remain at pace with future PTM plant studies.",2019-05-13 +34251213,MolGpka: A Web Server for Small Molecule pKa Prediction Using a Graph-Convolutional Neural Network.,"pKa is an important property in the lead optimization process since the charge state of a molecule in physiologic pH plays a critical role in its biological activity, solubility, membrane permeability, metabolism, and toxicity. Accurate and fast estimation of small molecule pKa is vital during the drug discovery process. We present MolGpKa, a web server for pKa prediction using a graph-convolutional neural network model. The model works by learning pKa related chemical patterns automatically and building reliable predictors with learned features. ACD/pKa data for 1.6 million compounds from the ChEMBL database was used for model training. We found that the performance of the model is better than machine learning models built with human-engineered fingerprints. Detailed analysis shows that the substitution effect on pKa is well learned by the model. MolGpKa is a handy tool for the rapid estimation of pKa during the ligand design process. The MolGpKa server is freely available to researchers and can be accessed at https://xundrug.cn/molgpka.",2021-07-12 +30590391,Curse: building expression atlases and co-expression networks from public RNA-Seq data.,"

Summary

Public RNA-Sequencing (RNA-Seq) datasets are a valuable resource for transcriptome analyses, but their accessibility is hindered by the imperfect quality and presentation of their metadata and by the complexity of processing raw sequencing data. The Curse suite was created to alleviate these problems. It consists of an online curation tool named Curse to efficiently build compendia of experiments hosted on the Sequence Read Archive, and a lightweight pipeline named Prose to download and process the RNA-Seq data into expression atlases and co-expression networks. Curse networks showed improved linking of functionally related genes compared to the state-of-the-art.

Availability and implementation

Curse, Prose and their manuals are available at http://bioinformatics.psb.ugent.be/webtools/Curse/. Prose was implemented in Java.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +34248372,"Database of summer fish fauna sampled in river estuaries in the southern part of the Boso Peninsula, Japan.","

Background

River estuaries provide various ecosystem services, such as nutrient circulation, climate change mitigation, habitats and coastal defence. Information on the various taxonomic groups is collected from large-scale estuaries; however, few studies have focused on river estuaries of small and medium-sized rivers. In particular, information on river estuaries in peninsulas and islands with complex marine environments is lacking.

New information

This paper provides basic information on summer fish fauna in the southern part of the Boso Peninsula, Japan. The Boso Peninsula is located at the northernmost point of where the warm current (Kuroshio) reaches and is considered to have highly endemic fish fauna. In total, 28 families, 51 species and 2,908 individuals were collected from the 27 river estuaries. The data are all accessible from the document ""database_fish_estuary_boso (http://ipt.pensoft.net/manage/resource.do?r=database_fish_estuary_boso)"". Further, Sicyopterus japonicus and Microphis brachyurus, which appear in estuaries that are influenced by the Kuroshio, were confirmed. However, these species were confirmed in few of the rivers studied, highlighting the importance of habitat conservation.",2021-06-29 +34157965,Genome-wide association study and its applications in the non-model crop Sesamum indicum.,"

Background

Sesame is a rare example of non-model and minor crop for which numerous genetic loci and candidate genes underlying features of interest have been disclosed at relatively high resolution. These progresses have been achieved thanks to the applications of the genome-wide association study (GWAS) approach. GWAS has benefited from the availability of high-quality genomes, re-sequencing data from thousands of genotypes, extensive transcriptome sequencing, development of haplotype map and web-based functional databases in sesame.

Results

In this paper, we reviewed the GWAS methods, the underlying statistical models and the applications for genetic discovery of important traits in sesame. A novel online database SiGeDiD ( http://sigedid.ucad.sn/ ) has been developed to provide access to all genetic and genomic discoveries through GWAS in sesame. We also tested for the first time, applications of various new GWAS multi-locus models in sesame.

Conclusions

Collectively, this work portrays steps and provides guidelines for efficient GWAS implementation in sesame, a non-model crop.",2021-06-22 +30689723,The interplay between microRNA and alternative splicing of linear and circular RNAs in eleven plant species.,"MOTIVATION:MicroRNA (miRNA) and alternative splicing (AS)-mediated post-transcriptional regulation has been extensively studied in most eukaryotes. However, the interplay between AS and miRNAs has not been explored in plants. To our knowledge, the overall profile of miRNA target sites in circular RNAs (circRNA) generated by alternative back splicing has never been reported previously. To address the challenge, we identified miRNA target sites located in alternatively spliced regions of the linear and circular splice isoforms using the up-to-date single-molecule real-time (SMRT) isoform sequencing (Iso-Seq) and Illumina sequencing data in eleven plant species. RESULTS:In total, we identified 399 401 and 114 574 AS events from linear and circular RNAs, respectively. Among them, there were 64 781 and 41 146 miRNA target sites located in linear and circular AS region, respectively. In addition, we found 38 913 circRNAs to be overlapping with 45 648 AS events of its own parent isoforms, suggesting circRNA regulation of AS of linear RNAs by forming R-loop with the genomic locus. Here, we present a comprehensive database of miRNA targets in alternatively spliced linear and circRNAs (ASmiR) and a web server for deposition and identification of miRNA target sites located in the alternatively spliced region of linear and circular RNAs. This database is accompanied by an easy-to-use web query interface for meaningful downstream analysis. Plant research community can submit user-defined datasets to the web service to search AS regions harboring small RNA target sites. In conclusion, this study provides an unprecedented resource to understand regulatory relationships between miRNAs and AS in both gymnosperms and angiosperms. AVAILABILITY AND IMPLEMENTATION:The readily accessible database and web-based tools are available at http://forestry.fafu.edu.cn/bioinfor/db/ASmiR. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +32986723,Diagnostic accuracy of combined thoracic and cardiac sonography for the diagnosis of pulmonary embolism: A systematic review and meta-analysis.,"

Objectives

Computed tomography pulmonary angiography (CTPA) is the diagnostic standard for pulmonary embolism (PE), but is unavailable in many low resource settings. We evaluated the evidence for point of care ultrasound as an alternative diagnostic.

Methods

Using a PROSPERO-registered, protocol-driven strategy (https://www.crd.york.ac.uk/PROSPERO, ID = CRD42018099925), we searched MEDLINE, EMBASE, and CINHAL for observational and clinical trials of cardiopulmonary ultrasound (CPUS) for PE. We included English-language studies of adult patients with acute breathlessness, reported according to PRISMA guidelines published in the last two decades (January 2000 to February 2020). The primary outcome was diagnostic accuracy of CPUS compared to reference standard CTPA for detection of PE in acutely breathless adults.

Results

We identified 260 unique publications of which twelve met all inclusion criteria. Of these, seven studies (N = 3872) were suitable for inclusion in our meta-analysis for diagnostic accuracy (two using CTPA and five using clinically derived diagnosis criterion). Meta-analysis of data demonstrated that using cardiopulmonary ultrasound (CPUS) was 91% sensitive and 81% specific for pulmonary embolism diagnosis compared to diagnostic standard CTPA. When compared to clinically derived diagnosis criterion, CPUS was 52% sensitive and 92% specific for PE diagnosis. We observed substantial heterogeneity across studies meeting inclusion criteria (I2 = 73.5%).

Conclusions

Cardiopulmonary ultrasound may be useful in areas where CTPA is unavailable or unsuitable. Interpretation is limited by study heterogeneity. Further methodologically rigorous studies comparing CPUS and CTPA are important to inform clinical practice.",2020-09-28 +30593617,Hepatitis C Virus Database and Bioinformatics Analysis Tools in the Virus Pathogen Resource (ViPR).,"The Virus Pathogen Resource (ViPR; www.viprbrc.org ) is a US National Institute of Allergy and Infectious Diseases (NIAID)-sponsored Bioinformatics Resource Center providing bioinformatics support for major human viral pathogens. The hepatitis C virus (HCV) portal of ViPR facilitates basic research and development of diagnostics and therapeutics for HCV, by providing a comprehensive collection of HCV-related data integrated from various sources, a growing suite of analysis and visualization tools for data mining and hypothesis generation, and personal Workbench spaces for data storage and sharing. This chapter introduces the data and functionality provided by the ViPR HCV portal. It describes example workflows for (1) searching HCV genome and protein sequences, (2) conducting phylogenetic analysis, and (3) analyzing sequence variations using pattern search for amino acid substitutions in proteins, single nucleotide variation calculation, metadata-driven comparison, and sequence feature variant type analysis. All data and tools are freely available via the ViPR HCV portal at https://www.viprbrc.org/brc/home.spg?decorator=flavi_hcv .",2019-01-01 +30252093,iProX: an integrated proteome resource.,"Sharing of research data in public repositories has become best practice in academia. With the accumulation of massive data, network bandwidth and storage requirements are rapidly increasing. The ProteomeXchange (PX) consortium implements a mode of centralized metadata and distributed raw data management, which promotes effective data sharing. To facilitate open access of proteome data worldwide, we have developed the integrated proteome resource iProX (http://www.iprox.org) as a public platform for collecting and sharing raw data, analysis results and metadata obtained from proteomics experiments. The iProX repository employs a web-based proteome data submission process and open sharing of mass spectrometry-based proteomics datasets. Also, it deploys extensive controlled vocabularies and ontologies to annotate proteomics datasets. Users can use a GUI to provide and access data through a fast Aspera-based transfer tool. iProX is a full member of the PX consortium; all released datasets are freely accessible to the public. iProX is based on a high availability architecture and has been deployed as part of the proteomics infrastructure of China, ensuring long-term and stable resource support. iProX will facilitate worldwide data analysis and sharing of proteomics experiments.",2019-01-01 +34000890,The hospital telemedicine TELEMED database: Providing information on evidence-based telemedicine services to hospital managers and healthcare professionals.,"

Background

Increased use of telemedicine in the healthcare system is a political goal in Denmark. Although the number of hospital patients using interventions such as the video consultation has increased in recent years only a small proportion of the outpatient and inpatient visits involve telemedicine. The TELEMED database (https://telemedicine.cimt.dk/) has been launched at the Center for Innovative Medical Technology in Denmark to ensure that hospital managers and healthcare professionals have access to information about telemedicine services and their effectiveness. This article describes the development and the content of the TELEMED database.

Methods

A structured literature search was made in the PubMed Database for randomised controlled trials or observational studies with a control group that investigated the effect of telemedicine interventions for hospital patients. Data were extracted from each article on the clinical effectiveness, patient perceptions, economic effects and implementation challenges. As the database should only provide inspiration to healthcare professionals regarding possibilities for use of telemedicine, the risk of bias in the studies was not assessed.

Results

The literature search resulted in 2825 hits. Based on full text assessment, 331 articles were included for data extraction and assessment. These articles present telemedicine services used in 22 different medical specialities. Forty-eight percent of the studies found a positive, statistically significant clinical effect, while 47% showed no statistically significant difference. In 48% of the studies, patients' experiences were examined and of these 68% found positive patient experiences. Fifty-four percent of the articles included information on the economic effects and, of these, 51% found reduction in healthcare utilization. In the majority of studies between two and four types of implementation challenges were found.Conclusions and recommendations: The TELEMED database provides an easily accessible overview of existing evidence-based telemedicine services for use by hospital managers and health professionals, who whish to to implement telemedicine. The database is freely available and expected to be continuously improved and broadened over time.",2021-05-18 +,A43 Translational research: NGS metagenomics into clinical diagnostics,"Abstract As research next-generation sequencing (NGS) metagenomic pipelines transition to clinical diagnostics, the user-base changes from bioinformaticians to biologists, medical doctors, and lab-technicians. Besides the obvious need for benchmarking and assessment of diagnostic outcomes of the pipelines and tools, other focus points remain: reproducibility, data immutability, user-friendliness, portability/scalability, privacy, and a clear audit trail. We have a research metagenomics pipeline that takes raw fastq files and produces annotated contigs, but it is too complicated for non-bioinformaticians. Here, we present preliminary findings in adapting this pipeline for clinical diagnostics. We used information available on relevant fora (www.bioinfo-core.org) and experiences and publications from colleague bioinformaticians in other institutes (COMPARE, UBC, and LUMC). From this information, a robust and user-friendly storage and analysis workflow was designed for non-bioinformaticians in a clinical setting. Via Conda [https://conda.io] and Docker containers [http://www.docker.com], we made our disparate pipeline processes self-contained and reproducible. Furthermore, we moved all pipeline settings into a separate JSON file. After every analysis, the pipeline settings and virtual-environment recipes will be archived (immutably) under a persistent unique identifier. This allows long-term precise reproducibility. Likewise, after every run the raw data and final products will be automatically archived, complying with data retention laws/guidelines. All the disparate processes in the pipeline are parallelized and automated via Snakemake1 (i.e. end-users need no coding skills). In addition, interactive web-reports such as MultiQC [http://multiqc.info] and Krona2 are generated automatically. By combining Snakemake, Conda, and containers, our pipeline is highly portable and easily scaled up for outbreak situations, or scaled down to reduce costs. Since patient privacy is a concern, our pipeline automatically removes human genetic data. Moreover, all source code will be stored on an internal Gitlab server, and, combined with the archived data, ensures a clear audit trail. Nevertheless, challenges remain: (1) reproducible reference databases, e.g. being able to revert to an older version to reproduce old analyses. (2) A user-friendly GUI. (3) Connecting the pipeline and NGS data to in-house LIMS. (4) Efficient long-term storage, e.g. lossless compression algorithms. Nevertheless, this work represents a step forward in making user-friendly clinical diagnostic workflows.",2019-08-01 +32091591,6mA-Finder: a novel online tool for predicting DNA N6-methyladenine sites in genomes.,"

Motivation

DNA N6-methyladenine (6 mA) has recently been found as an essential epigenetic modification, playing its roles in a variety of cellular processes. The abnormal status of DNA 6 mA modification has been reported in cancer and other disease. The annotation of 6 mA marks in genome is the first crucial step to explore the underlying molecular mechanisms including its regulatory roles.

Results

We present a novel online DNA 6 mA site tool, 6 mA-Finder, by incorporating seven sequence-derived information and three physicochemical-based features through recursive feature elimination strategy. Our multiple cross-validations indicate the promising accuracy and robustness of our model. 6 mA-Finder outperforms its peer tools in general and species-specific 6 mA site prediction, suggesting it can provide a useful resource for further experimental investigation of DNA 6 mA modification.

Availability and implementation

https://bioinfo.uth.edu/6mA_Finder.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-05-01 +31114901,Simple ClinVar: an interactive web server to explore and retrieve gene and disease variants aggregated in ClinVar database.,"Clinical genetic testing has exponentially expanded in recent years, leading to an overwhelming amount of patient variants with high variability in pathogenicity and heterogeneous phenotypes. A large part of the variant level data is aggregated in public databases such as ClinVar. However, the ability to explore this rich resource and answer general questions such as 'How many genes inside ClinVar are associated with a specific disease? or 'In which part of the protein are patient variants located?' is limited and requires advanced bioinformatics processing. Here, we present Simple ClinVar (http://simple-clinvar.broadinstitute.org/) a web server application that is able to provide variant, gene and disease level summary statistics based on the entire ClinVar database in a dynamic and user-friendly web-interface. Overall, our web application is able to interactively answer basic questions regarding genetic variation and its known relationships to disease. By typing a disease term of interest, the user can identify in seconds the genes and phenotypes most frequently reported to ClinVar. Subsets of variants can then be further explored, filtered or mapped and visualized in the corresponding protein sequences. Our website will follow ClinVar monthly releases and provide easy access to ClinVar resources to a broader audience including basic and clinical scientists.",2019-07-01 +34847134,Clinical Applications of Meshed Multilayered Anatomical Models by Low-Cost Three-Dimensional Printer.,"

Summary

In recent years, even low-cost fused deposition modeling-type three-dimensional printers can be used to create a three-dimensional model with few errors. The authors devised a method to create a three-dimensional multilayered anatomical model at a lower cost and more easily than with established methods, by using a meshlike structure as the surface layer. Fused deposition modeling-type three-dimensional printers were used, with opaque polylactide filament for material. Using the three-dimensional data-editing software Blender (Blender Foundation, www.blender.org) and Instant Meshes (Jakob et al., https://igl.ethz.ch/projects/instant-meshes/) together, the body surface data were converted into a meshlike structure while retaining its overall shape. The meshed data were printed together with other data (nonmeshed) or printed separately. In each case, the multilayer model in which the layer of the body surface was meshed could be output without any trouble. It was possible to grasp the positional relationship between the body surface and the deep target, and it was clinically useful. The total work time for preparation and processing of three-dimensional data ranged from 1 hour to several hours, depending on the case, but the work time required for converting into a meshlike shape was about 10 minutes in all cases. The filament cost was $2 to $8. In conclusion, the authors devised a method to create a three-dimensional multilayered anatomical model to easily visualize positional relationships within the structure by converting the surface layer into a meshlike structure. This method is easy to adopt, regardless of the available facilities and economic environment, and has broad applications.",2021-12-01 +34347812,Mental health among healthcare workers and other vulnerable groups during the COVID-19 pandemic and other coronavirus outbreaks: A rapid systematic review.,"

Introduction

Although most countries and healthcare systems worldwide have been affected by the COVID-19 pandemic, some groups of the population may be more vulnerable to detrimental effects of the pandemic on mental health than others. The aim of this systematic review was to synthesise evidence currently available from systematic reviews on the impact of COVID-19 and other coronavirus outbreaks on mental health for groups of the population thought to be at increased risk of detrimental mental health impacts.

Materials and methods

We conducted a systematic review of reviews on adults and children residing in a country affected by a coronavirus outbreak and belonging to a group considered to be at risk of experiencing mental health inequalities. Data were collected on symptoms or diagnoses of any mental health condition, quality of life, suicide or attempted suicide. The protocol for this systematic review was registered in the online PROSPERO database prior to commencing the review (https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=194264).

Results

We included 25 systematic reviews. Most reviews included primary studies of hospital workers from multiple countries. Reviews reported variable estimates for the burden of symptoms of mental health problems among acute healthcare workers, COVID-19 patients with physical comorbidities, and children and adolescents. No evaluations of interventions were identified. Risk- and protective factors, mostly for healthcare workers, showed the importance of personal factors, the work environment, and social networks for mental health.

Conclusions

This review of reviews based on primary studies conducted in the early months of the COVID-19 pandemic shows a lack of evidence on mental health interventions and mental health impacts on vulnerable groups in the population.",2021-08-04 +33166383,FireProtDB: database of manually curated protein stability data.,"The majority of naturally occurring proteins have evolved to function under mild conditions inside the living organisms. One of the critical obstacles for the use of proteins in biotechnological applications is their insufficient stability at elevated temperatures or in the presence of salts. Since experimental screening for stabilizing mutations is typically laborious and expensive, in silico predictors are often used for narrowing down the mutational landscape. The recent advances in machine learning and artificial intelligence further facilitate the development of such computational tools. However, the accuracy of these predictors strongly depends on the quality and amount of data used for training and testing, which have often been reported as the current bottleneck of the approach. To address this problem, we present a novel database of experimental thermostability data for single-point mutants FireProtDB. The database combines the published datasets, data extracted manually from the recent literature, and the data collected in our laboratory. Its user interface is designed to facilitate both types of the expected use: (i) the interactive explorations of individual entries on the level of a protein or mutation and (ii) the construction of highly customized and machine learning-friendly datasets using advanced searching and filtering. The database is freely available at https://loschmidt.chemi.muni.cz/fireprotdb.",2021-01-01 +29309507,SEGreg: a database for human specifically expressed genes and their regulations in cancer and normal tissue.,"Human specifically expressed genes (SEGs) usually serve as potential biomarkers for disease diagnosis and treatment. However, the regulation underlying their specific expression remains to be revealed. In this study, we constructed SEG regulation database (SEGreg; available at http://bioinfo.life.hust.edu.cn/SEGreg) for showing SEGs and their transcription factors (TFs) and microRNA (miRNA) regulations under different physiological conditions, which include normal tissue, cancer tissue and cell line. In total, SEGreg collected 6387, 1451, 4506 and 5320 SEGs from expression profiles of 34 cancer types and 55 tissues of The Cancer Genome Atlas, Cancer Cell Line Encyclopedia, Human Body Map and Genotype-Tissue Expression databases/projects, respectively. The cancer or tissue corresponding expressed miRNAs and TFs were identified from miRNA and gene expression profiles, and their targets were collected from several public resources. Then the regulatory networks of all SEGs were constructed and integrated into SEGreg. Through a user-friendly interface, users can browse and search SEGreg by gene name, data source, tissue, cancer type and regulators. In summary, SEGreg is a specialized resource to explore SEGs and their regulations, which provides clues to reveal the mechanisms of carcinogenesis and biological processes.",2019-07-01 +34858446,Time-Series Growth Prediction Model Based on U-Net and Machine Learning in Arabidopsis.,"Yield prediction for crops is essential information for food security. A high-throughput phenotyping platform (HTPP) generates the data of the complete life cycle of a plant. However, the data are rarely used for yield prediction because of the lack of quality image analysis methods, yield data associated with HTPP, and the time-series analysis method for yield prediction. To overcome limitations, this study employed multiple deep learning (DL) networks to extract high-quality HTTP data, establish an association between HTTP data and the yield performance of crops, and select essential time intervals using machine learning (ML). The images of Arabidopsis were taken 12 times under environmentally controlled HTPP over 23 days after sowing (DAS). First, the features from images were extracted using DL network U-Net with SE-ResXt101 encoder and divided into early (15-21 DAS) and late (∼21-23 DAS) pre-flowering developmental stages using the physiological characteristics of the Arabidopsis plant. Second, the late pre-flowering stage at 23 DAS can be predicted using the ML algorithm XGBoost, based only on a portion of the early pre-flowering stage (17-21 DAS). This was confirmed using an additional biological experiment (P < 0.01). Finally, the projected area (PA) was estimated into fresh weight (FW), and the correlation coefficient between FW and predicted FW was calculated as 0.85. This was the first study that analyzed time-series data to predict the FW of related but different developmental stages and predict the PA. The results of this study were informative and enabled the understanding of the FW of Arabidopsis or yield of leafy plants and total biomass consumed in vertical farming. Moreover, this study highlighted the reduction of time-series data for examining interesting traits and future application of time-series analysis in various HTPPs.",2021-11-11 +31777944,CDD/SPARCLE: the conserved domain database in 2020.,"As NLM's Conserved Domain Database (CDD) enters its 20th year of operations as a publicly available resource, CDD curation staff continues to develop hierarchical classifications of widely distributed protein domain families, and to record conserved sites associated with molecular function, so that they can be mapped onto user queries in support of hypothesis-driven biomolecular research. CDD offers both an archive of pre-computed domain annotations as well as live search services for both single protein or nucleotide queries and larger sets of protein query sequences. CDD staff has continued to characterize protein families via conserved domain architectures and has built up a significant corpus of curated domain architectures in support of naming bacterial proteins in RefSeq. These architecture definitions are available via SPARCLE, the Subfamily Protein Architecture Labeling Engine. CDD can be accessed at https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml.",2020-01-01 +34263466,Prediction of overall survival in patients with Stage I esophageal cancer: A novel web-based calculator.,"

Background and aims

In this study, we aimed to develop a convenient web-based calculator to predict the overall survival (OS) of patients with Stage I esophageal cancer (EC).

Methods

Data of 1664 patients, between 2004 and 2015, were extracted from the Surveillance, Epidemiology, and End Results database. Least absolute shrinkage and selection operator regression was employed to sift variables; subsequently, Cox proportional hazards regression model was built. We applied the enhanced bootstrap validation to appraise the discrimination and calibration of the model. Clinical benefit was measured using decision curve analysis (DCA). Thereafter, a web-based calculator based on the model, which could be used to predict the 1-, 3-, and 5-year OS rates, was developed.

Results

Race, age, histologic type, grade, N stage, and therapeutic methods were selected. C-indices of the prediction model in the training and validation groups were 0.726 (95% confidence interval [CI], 0.679-0.773) and 0.724 (95% CI, 0.679-0.769), respectively. Calibration curves showed good agreement between the groups. The DCA demonstrated that the prediction model is clinically useful.

Conclusions

The prediction model we developed showed a good performance in calculating the OS rates in patients with Stage I EC. The web-based calculator is available at https://championship.shinyapps.io/dynnomapp/.",2021-07-14 +34510392,The Therapeutic Effects of Mild to Moderate Intensity Aerobic Exercise on Glycemic Control in Patients with Type 2 Diabetes Mellitus: A Meta-Analysis of Randomized Trials.,"

Introduction

It has been recommended that physical activity be a part of treatment and management regimens of type 2 diabetes mellitus (T2DM), and research has shown that regular physical exercise facilitates glycemic control in these patients. In this analysis, our aim was to systematically show the therapeutic effects of mild to moderate intensity aerobic exercise on glycemic control in patients with T2DM.

Methods

From February to April 2021, we searched the https://www.clinicaltrials.gov , EMBASE, MEDLINE, Cochrane Central Register of Controlled Trials (CENTRAL), Web of Science and Google Scholar databases for trials that showed the effects of aerobic exercise on glycemic control in patients with T2DM. Glycated hemoglobin (HbA1c) was the endpoint in the analysis. The RevMan version 5.4 statistical program was used for statistical analysis, and the mean difference (MD) and 95% confidence intervals (CI) used to represent the data following analysis.

Results

Eighteen trials involving 972 participants with T2DM were included in this meta-analysis, of whom 523 were assigned to an exercise group and 449 were assigned to a control group. A comparison pre- versus post-aerobic exercise showed that aerobic exercise significantly improved glycemic control (HbA1c) (MD 0.35, 95% CI 0.23-0.48; P = 0.00001) in these patients with T2DM. A second comparison, T2DM participants in the experimental group post-exercise versus T2DM participants from the control group at the end of the follow-up, also showed that aerobic exercise significantly improved glycemic control (MD - 0.46, 95% CI - 0.69 to - 0.22; P = 0.0001). However, a comparison of HbA1c of T2DM participants in the control group at the beginning of the study compared to those at the end of follow-up did not show any significant improvement in glycemic control (MD 0.08, 95% CI - 0.05 to 0.21; P = 0.21).

Conclusion

The current analysis showed that mild to moderate intensity aerobic exercise significantly improved glycemic control in patients with T2DM. Patients with T2DM who regularly participated in aerobic exercise activities had a better control of their disease than those who were not on a regular aerobic exercise regimen. These results lead to the recommendation that at least mild to moderate intensity aerobic exercise should be included in the treatment and management regimens of patients with T2DM.",2021-09-12 +34900235,"PlotXpress, a webtool for normalization and visualization of reporter expression data.","In molecular cell biology, reporter assays are frequently used to investigate gene expression levels. Reporter assays employ a gene that encodes a light-emitting protein, of which the luminescence is quantified as a proxy of gene expression. Commercial parties provide reporter assay kits that include protocols and specialized detection machinery. However, downstream analysis of the output data and their presentation are not standardized. We have developed plotXpress to fill this gap, providing a free, open-source platform for the semi-automated analysis and standardized visualisation of experimental gene reporter data. Users can upload raw luminescence data acquired from a reporter gene assay with an internal control. In plotXpress, the data is corrected for sample variation with the internal control and the average for each condition is calculated. When a reference condition is selected the fold change is calculated for all other conditions, based on the selected reference. The results are shown as dot plots with a statistical summary, which can be adjusted to create publication-grade plots without requiring coding skills. Altogether, plotXpress is an open-source, low-threshold, web-based tool, that promotes a standardized and reproducible analysis while providing an appealing visualization of reporter data. The webtool can be accessed at: https://huygens.science.uva.nl/PlotXpress/.",2021-11-08 +33995920,ExVe: The knowledge base of orthologous proteins identified in fungal extracellular vesicles.,"Extracellular vesicles (EVs) are double-membrane particles associated with intercellular communication. Since the discovery of EV production in the fungus Cryptococcus neoformans, the importance of EV release in its physiology and pathogenicity has been investigated. To date, few studies have investigated the proteomic content of EVs from multiple fungal species. Our main objective was to use an orthology approach to compare proteins identified by EV shotgun proteomics in 8 pathogenic and 1 nonpathogenic species. Using protein information from the UniProt and FungiDB databases, we integrated data for 11,433 hits in fungal EVs with an orthology perspective, resulting in 3,834 different orthologous groups. OG6_100083 (Hsp70 Pfam domain) was the unique orthologous group that was identified for all fungal species. Proteins with this protein domain are associated with the stress response, survival and morphological changes in different fungal species. Although no pathogenic orthologous group was found, we identified 5 orthologous groups exclusive to S. cerevisiae. Using the criteria of at least 7 pathogenic fungi to define a cluster, we detected the 4 unique pathogenic orthologous groups. Taken together, our data suggest that Hsp70-related proteins might play a key role in fungal EVs, regardless of the pathogenic status. Using an orthology approach, we identified at least 4 protein domains that could be novel therapeutic targets against pathogenic fungi. Our results were compiled in the herein described ExVe database, which is publicly available at http://exve.icc.fiocruz.br.",2021-04-17 +33147622,KNIndex: a comprehensive database of physicochemical properties for k-tuple nucleotides. ,"With the development of high-throughput sequencing technology, the genomic sequences increased exponentially over the last decade. In order to decode these new genomic data, machine learning methods were introduced for genome annotation and analysis. Due to the requirement of most machines learning methods, the biological sequences must be represented as fixed-length digital vectors. In this representation procedure, the physicochemical properties of k-tuple nucleotides are important information. However, the values of the physicochemical properties of k-tuple nucleotides are scattered in different resources. To facilitate the studies on genomic sequences, we developed the first comprehensive database, namely KNIndex (https://knindex.pufengdu.org), for depositing and visualizing physicochemical properties of k-tuple nucleotides. Currently, the KNIndex database contains 182 properties including one for mononucleotide (DNA), 169 for dinucleotide (147 for DNA and 22 for RNA) and 12 for trinucleotide (DNA). KNIndex database also provides a user-friendly web-based interface for the users to browse, query, visualize and download the physicochemical properties of k-tuple nucleotides. With the built-in conversion and visualization functions, users are allowed to display DNA/RNA sequences as curves of multiple physicochemical properties. We wish that the KNIndex will facilitate the related studies in computational biology.",2021-07-01 +34848217,A machine learning model for nowcasting epidemic incidence.,"Due to delay in reporting, the daily national and statewide COVID-19 incidence counts are often unreliable and need to be estimated from recent data. This process is known in economics as nowcasting. We describe in this paper a simple random forest statistical model for nowcasting the COVID-19 daily new infection counts based on historic data along with a set of simple covariates, such as the currently reported infection counts, day of the week, and time since first reporting. We apply the model to adjust the daily infection counts in Ohio, and show that the predictions from this simple data-driven method compare favorably both in quality and computational burden to those obtained from the state-of-the-art hierarchical Bayesian model employing a complex statistical algorithm. The interactive notebook for performing nowcasting is available online at https://tinyurl.com/simpleMLnowcasting.",2021-11-27 +32689897,"A Genome Resource of Setosphaeria turcica, Causal Agent of Northern Leaf Blight of Maize.","The heterothallic ascomycete Setosphaeria turcica (anamorph Exserohilum turcicum) causes northern corn leaf blight, which results in devastating yield losses and a reduction in feed value. Although genome sequences of two model strains of the pathogen are available (https://mycocosm.jgi.doe.gov/mycocosm/home), previous drafts were assembled using short read technologies, making evolutionary and genetic linkage inferences difficult. Here, race 23N of S. turcica strain Et28A was sequenced again using Illumina HiSeq and PacBio Sequel technologies, and assembled to approximately 43,480,261 bp on 30 scaffolds. In all, 13,183 protein-coding genes were predicted, 13,142 of them were well annotated. This S. turcica genome resource is important for understanding the genetics behind pathogen evolution and infection mechanisms.",2020-11-04 +34013110,Identifying Information Needs of Patients With IgA Nephropathy Using an Innovative Social Media-stepped Analytical Approach.,"

Introduction

The number of people with kidney disease using social media to search for medical information and peer support is increasing. IgA nephropathy (IgAN) predominantly affects young adults, demographically the biggest users of social media. This article presents an innovative analysis of social media interactions to identify unmet education and information needs of patients with IgAN.

Methods

Following ethical approval for the study, the IgAN Support UK Facebook group (https://www.facebook.com/groups/915274415226674) granted us permission to anonymously collect and analyze 1959 posts and comments from 498 group users. An initial patient focus group and quantitative word-frequency analysis created an initial categorization matrix that was iteratively refined after serial analyses of the social media database to generate a final categorization matrix of needs. We evaluated narrative data relating to each identified category to define patient narratives relating to each area.

Results

A large number of information gaps and unanswered questions were identified relating to the following: diet, symptoms, diagnosis, treatment, and patient comorbidities. Patient-clinician communication and the presentation of information were also drawn out as cross-cutting issues. These themes differed significantly from those identified from the traditional patient focus group, highlighting the value of this novel method for interrogating social media data to understand unmet patient needs.

Conclusion

Social media data are untapped and valuable resources that can be used to better understand patient information gaps, leading to the generation of targeted materials to address unmet educational needs. This innovative approach could be replicated across other health conditions.",2021-03-02 +31328773,Tripal v3: an ontology-based toolkit for construction of FAIR biological community databases.,"Community biological databases provide an important online resource for both public and private data, analysis tools and community engagement. These sites house genomic, transcriptomic, genetic, breeding and ancillary data for specific species, families or clades. Due to the complexity and increasing quantities of these data, construction of online resources is increasingly difficult especially with limited funding and access to technical expertise. Furthermore, online repositories are expected to promote FAIR data principles (findable, accessible, interoperable and reusable) that presents additional challenges. The open-source Tripal database toolkit seeks to mitigate these challenges by creating both the software and an interactive community of developers for construction of online community databases. Additionally, through coordinated, distributed co-development, Tripal sites encourage community-wide sustainability. Here, we report the release of Tripal version 3 that improves data accessibility and data sharing through systematic use of controlled vocabularies (CVs). Tripal uses the community-developed Chado database as a default data store, but now provides tools to support other data stores, while ensuring that CVs remain the central organizational structure for the data. A new site developer can use Tripal to develop a basic site with little to no programming, with the ability to integrate other data types using extension modules and the Tripal application programming interface. A thorough online User's Guide and Developer's Handbook are available at http://tripal.info, providing download, installation and step-by-step setup instructions.",2019-01-01 +32911083,MosaicBase: A Knowledgebase of Postzygotic Mosaic Variants in Noncancer Disease-related and Healthy Human Individuals.,"Mosaic variants resulting from postzygotic mutations are prevalent in the human genome and play important roles in human diseases. However, except for cancer-related variants, there is no collection of postzygotic mosaic variants in noncancer disease-related and healthy individuals. Here, we present MosaicBase, a comprehensive database that includes 6698 mosaic variants related to 266 noncancer diseases and 27,991 mosaic variants identified in 422 healthy individuals. Genomic and phenotypic information of each variant was manually extracted and curated from 383 publications. MosaicBase supports the query of variants with Online Mendelian Inheritance in Man (OMIM) entries, genomic coordinates, gene symbols, or Entrez IDs. We also provide an integrated genome browser for users to easily access mosaic variants and their related annotations for any genomic region. By analyzing the variants collected in MosaicBase, we find that mosaic variants that directly contribute to disease phenotype show features distinct from those of variants in individuals with mild or no phenotypes, in terms of their genomic distribution, mutation signatures, and fraction of mutant cells. MosaicBase will not only assist clinicians in genetic counseling and diagnosis but also provide a useful resource to understand the genomic baseline of postzygotic mutations in the general human population. MosaicBase is publicly available at http://mosaicbase.com/ or http://49.4.21.8:8000.",2020-04-01 +,Are seaweeds the food of the future? Challenges for its conservation and introduction in the Portuguese diet,"Introduction: Seaweeds are well known for their nutritional value [1]. The ALGA4FOOD project [https://alga4food.wixsite.com/pt-pt] [2] aims: i) researching new seaweed conservation processes (e.g. lyophilization, pascalization, MAP, etc.) which optimize functional and organoleptic characteristics; ii) develop strategies for its introduction in the Portuguese diet. Materials and methods1:Raw Materials: Seven seweed species (Ulva rigida, Codium tomentosum, Undaria pinnatifida, Saccorhiza polyschides, Gracilaria gracilis, Osmundea pinnatifida, Chondracanthus teedei var. Lusitanicus) were collected in north and central Portugal. Porphyra sp. and Ulva rigida for sensorial analysis were cultivated in an IMTA system. Instrumental Analysis: Each seaweed was frozen with liquid nitrogen and ground. Samples were maintained at −20 °C until microextraction by HS-SPME, using 30 g of fresh seaweed and 104 g of milli-Q water with 30% NaCl (PA). Ulva rigida was also dried and 6.75 g of the dried seaweed and 117 g of the saline solution were used. Compounds extracted were analyzed and identified by GC-MS. Product Development and Sensory Analysis: Dried pastas enriched with U. rigida and Porphyra sp. were developed and affective acceptance tests, using as control a 100% semolina (Triticum durum) pasta, were performed. The panel was composed by 60 untrained testers, and a structured hedonic scale of 1 to 9 points was used for the evaluation of appearance, color, aroma, texture, flavour, overall impression and food suitability. Data was submitted to analysis of variance (ANOVA), and Tukey’s multiple comparison tests. Results:Instrumental analysis: GC-MS identified 57 different organic compounds (167 in total): 28 aldehydes, 4 alcohols; 10 hydrocarbons; 3 ethers; 8 ketones; 4 other functional groups (1 containing sulphur, 2 iodine and 1 bromine). For U. rigida, results showed that the main compounds contributing for its fresh flavour profile (β-ionone and 2,4-decadienal) were different from those for the dried seaweed, meaning that fresh U. rigida has a higher aromatic richness than the dried one. Sensory analysis: The results showed no significant differences between the average values for texture, flavour, overall impression and food suitability. However, the appearance, colour and aroma attributes were significantly different particularly for the control and Porphyra sp. formulations [3]. Discussion and conclusions: The exploratory process showed that both instrumental and sensorial analysis, as well as the concepts underlying cooking creativity [3], are of great value for the development of new seaweed food products. GC-MS technique, together with sensory analysis, showed similarities between Undaria pinnatifida and collard greens (Brassica oleracea var. Acephala). Based on this, the seaweed was introduced in the traditional soup “caldoverde”, and preliminary tests showed good acceptance. Sensory analysis of pasta showed that green seaweed, due to their subtle maritime flavour, have a greater potential for cooking applications than the red seaweed [2,3].",2021-03-13 +30818353,A large-scale pedigree resource of wheat reveals evidence for adaptation and selection by breeders.,"Information on crop pedigrees can be used to help maximise genetic gain in crop breeding and allow efficient management of genetic resources. We present a pedigree resource of 2,657 wheat (Triticum aestivum L.) genotypes originating from 38 countries, representing more than a century of breeding and variety development. Visualisation of the pedigree enables illustration of the key developments in United Kingdom wheat breeding, highlights the wide genetic background of the UK wheat gene pool, and facilitates tracing the origin of beneficial alleles. A relatively high correlation between pedigree- and marker-based kinship coefficients was found, which validated the pedigree and enabled identification of errors in the pedigree or marker data. Using simulations with a combination of pedigree and genotype data, we found evidence for significant effects of selection by breeders. Within crosses, genotypes are often more closely related than expected by simulations to one of the parents, which indicates selection for favourable alleles during the breeding process. Selection across the pedigree was demonstrated on a subset of the pedigree in which 110 genotyped varieties released before the year 2000 were used to simulate the distribution of marker alleles of 45 genotyped varieties released after the year 2000, in the absence of selection. Allelic diversity in the 45 varieties was found to deviate significantly from the simulated distributions at a number of loci, indicating regions under selection over this period. The identification of one of these regions as coinciding with a strong yield component quantitative trait locus (QTL) highlights both the potential of the remaining loci as wheat breeding targets for further investigation, as well as the utility of this pedigree-based methodology to identify important breeding targets in other crops. Further evidence for selection was found as greater linkage disequilibrium (LD) for observed versus simulated genotypes within all chromosomes. This difference was greater at shorter genetic distances, indicating that breeder selections have conserved beneficial linkage blocks. Collectively, this work highlights the benefits of generating detailed pedigree resources for crop species. The wheat pedigree database developed here represents a valuable community resource and will be updated as new varieties are released at https://www.niab.com/pages/id/501/UK_Wheat_varieties_Pedigree.",2019-02-28 +32090260,HSPMdb: a computational repository of heat shock protein modulators. ,"Heat shock proteins (Hsp) are among highly conserved proteins across all domains of life. Though originally discovered as a cellular response to stress, these proteins are also involved in a wide range of cellular functions such as protein refolding, protein trafficking and cellular signalling. A large number of potential Hsp modulators are under clinical trials against various human diseases. As the number of modulators targeting Hsps is growing, there is a need to develop a comprehensive knowledge repository of these findings which is largely scattered. We have thus developed a web-accessible database, HSPMdb, which is a first of its kind manually curated repository of experimentally validated Hsp modulators (activators and inhibitors). The data was collected from 176 research articles and current version of HSPMdb holds 10 223 entries of compounds that are known to modulate activities of five major Hsps (Hsp100, Hsp90, Hsp70, Hsp60 and Hsp40) originated from 15 different organisms (i.e. human, yeast, bacteria, virus, mouse, rat, bovine, porcine, canine, chicken, Trypanosoma brucei and Plasmodium falciparum). HSPMdb provides comprehensive information on biological activities as well as the chemical properties of Hsp modulators. The biological activities of modulators are presented as enzymatic activity and cellular activity. Under the enzymatic activity field, parameters such as IC50, EC50, DC50, Ki and KD have been provided. In the cellular activity field, complete information on cellular activities (percentage cell growth inhibition, EC50 and GI50), type of cell viability assays and cell line used has been provided. One of the important features of HSPMdb is that it allows users to screen whether or not their compound of interest has any similarity with the previously known Hsp modulators. We anticipate that HSPMdb would become a valuable resource for the broader scientific community working in the area of chaperone biology and protein misfolding diseases. HSPMdb is freely accessible at http://bioinfo.imtech.res.in/bvs/hspmdb/index.php.",2020-01-01 +34716395,Auto-qPCR; a python-based web app for automated and reproducible analysis of qPCR data.,"Quantifying changes in DNA and RNA levels is essential in numerous molecular biology protocols. Quantitative real time PCR (qPCR) techniques have evolved to become commonplace, however, data analysis includes many time-consuming and cumbersome steps, which can lead to mistakes and misinterpretation of data. To address these bottlenecks, we have developed an open-source Python software to automate processing of result spreadsheets from qPCR machines, employing calculations usually performed manually. Auto-qPCR is a tool that saves time when computing qPCR data, helping to ensure reproducibility of qPCR experiment analyses. Our web-based app ( https://auto-q-pcr.com/ ) is easy to use and does not require programming knowledge or software installation. Using Auto-qPCR, we provide examples of data treatment, display and statistical analyses for four different data processing modes within one program: (1) DNA quantification to identify genomic deletion or duplication events; (2) assessment of gene expression levels using an absolute model, and relative quantification (3) with or (4) without a reference sample. Our open access Auto-qPCR software saves the time of manual data analysis and provides a more systematic workflow, minimizing the risk of errors. Our program constitutes a new tool that can be incorporated into bioinformatic and molecular biology pipelines in clinical and research labs.",2021-10-29 +,PSVII-26 Detection of selection signatures in Civil and Large White pig breeds,"Abstract The detection of selection signatures can help to understand the mechanisms of artificial selection. The purpose of this work is to determine the selection signatures in Civil pig relatively Large White breed. The research was carried out on Large White of three breeding centers (LW_1_New=16, LW_2_New=12, LW_3_New=15) and Civilskay, bred in Russia on the basis of local pigs and Large White boars, (Civil=17). GeneSeek GGP Porcine HD BeadChip (San Diego, USA) was used. A heatmap plot of genomic relationship matrix (GRM) was used to evaluate genetic variability between populations. The signals of diversifying selection were detected using Fst and Smoothing Fst (R package Lokern). QTLs and genes were identified and annotated in the Ensembl genome browser (Sscrofa 11.1) (https://www.ensembl.org/index.html), Panther 15.0 (http://www.pantherdb.org/) and QTLdb (https://www.animalgenome.org/cgi-bin/QTLdb/index).Data visualization from heatmapplot (Fig.) showed that all individuals are grouped in relation to their groups. However, the groups LW_1_New, LW_3_New and CIVIL were separated from LW_2_New. We have selected LW_1_New, LW_3_New and CIVIL to identify the selection signatures. After smoothing of the data by moving average, top 0.1% of the observations were considered as pinpointing CIVIL-specific selection signals. As a result, genome regions with strong selection signals on SSC1:214634036-217738857, SSSC2:137533941-138048506, SSC4:24655041-30832595, SSC7:27386432-28655399 and SSC8:55806826-56299366 were found. In these areas, 535 QTLs have been identified, including 347 QTLs for Meat and Carcass Traits (the majority related to Backfat), 67 QTLs Production (Average daily gain or Body weight), 62 QTLs Health (Mean corpuscular volume, Actinobacillus pleuropneumoniae susceptibility et al.), 32 QTLs Reproduction (Teat number) and 27 QTLs Exterior (Leg conformation). In general, 32 genes are defined in these regions (SSC1-11; SSC2-5; SSC4-10; SSC7-3; SSC8-3) (Table). They are mainly involved in the following biological processes: metabolic (GO:0008152), cellular (GO:0009987) and biological regulation (GO:0065007). This research was supported by the Russian Scientific Foundation (RSF) within Project No. 19-16-00109.",2020-11-01 +33237286,UniProt: the universal protein knowledgebase in 2021.,"The aim of the UniProt Knowledgebase is to provide users with a comprehensive, high-quality and freely accessible set of protein sequences annotated with functional information. In this article, we describe significant updates that we have made over the last two years to the resource. The number of sequences in UniProtKB has risen to approximately 190 million, despite continued work to reduce sequence redundancy at the proteome level. We have adopted new methods of assessing proteome completeness and quality. We continue to extract detailed annotations from the literature to add to reviewed entries and supplement these in unreviewed entries with annotations provided by automated systems such as the newly implemented Association-Rule-Based Annotator (ARBA). We have developed a credit-based publication submission interface to allow the community to contribute publications and annotations to UniProt entries. We describe how UniProtKB responded to the COVID-19 pandemic through expert curation of relevant entries that were rapidly made available to the research community through a dedicated portal. UniProt resources are available under a CC-BY (4.0) license via the web at https://www.uniprot.org/.",2021-01-01 +31535335,Atacama Database: a platform of the microbiome of the Atacama Desert.,"The Atacama Desert is one of the oldest and driest places on Earth. In the last decade, microbial richness and diversity has been acknowledged as an important biological resource of this region. Owing to the value of the microbial diversity apparent in potential biotechnology applications and conservation purposes, it is necessary to catalogue these microbial communities to promote research activities and help to preserve the wide range of ecological niches of the Atacama region. A prototype Atacama Database has been designed and it provides a description of the rich microbial diversity of the Atacama Desert, and helps to visualise available literature resources. Data has been collected, curated, and organised into several categories to generate a single record for each organism in the database that covers classification, isolation metadata, morphology, physiology, genome and metabolism information. The current version of Atacama Database contains 2302 microorganisms and includes cultured and uncultured organisms retrieved from different environments within the desert between 1984 and 2016. These organisms are distributed in bacterial, archaeal or eukaryotic domains, along with those that are unclassified taxonomically. The initial prototype of the Atacama Database includes a basic search and taxonomic and advanced search tools to allow identification and comparison of microbial populations, and space distribution within this biome. A geolocation search was implemented to visualise the microbial diversity of the ecological niches defined by sectors and extract general information of the sampling sites. This effort will aid understanding of the microbial ecology of the desert, microbial population dynamics, seasonal behaviour, impact of climate change over time, and reveal further biotechnological applications of these microorganisms. The Atacama Database is freely available at: https://www.atacamadb.cl.",2019-09-18 +34407377,Using Nonword Repetition to Identify Developmental Language Disorder in Monolingual and Bilingual Children: A Systematic Review and Meta-Analysis.,"Purpose A wealth of studies has assessed the diagnostic value of the nonword repetition task (NWRT) for the detection of developmental language disorder (DLD) in the clinical context of speech and language therapy, first in monolingual children and, more recently, in bilingual children. This review article reviews this literature systematically and conducts a meta-analysis on the discriminative power of this type of task in both populations. Method Three databases were used to select articles based on keyword combinations, which were then reviewed for relevance and methodological rigor based on internationally recognized checklists. From an initial pool of 488 studies, 46 studies were selected for inclusion in the systematic review, and 35 of these studies could be included in a meta-analysis. Results Most of the articles report significant discrimination between children with and without DLD in both monolingual and bilingual contexts, and the meta-analysis shows a large mean effect size. Three factors (age of the child, linguistic status, and language specificity of the task) yielded enough quantitative data for further exploration. Subgroups analysis shows variance in effect sizes, but none of the three factors, neither their interactions, were significant in a metaregression. We discuss how other, less explored factors (e.g., nature of the stimuli, scoring methods) could also contribute to differences in results. Sensitivity and specificity analyses reported in 33 studies confirmed that, despite possible effect size differences, the diagnostic accuracy of the NWRT is generally near thresholds considered to be discriminatory. It generally increases when it is combined with other tasks (e.g., parental questionnaire). Conclusions This review indicates that the NWRT is a promising diagnostic tool to identify children with DLD in monolingual and bilingual contexts with a large mean effect size. However, it seems necessary to choose the precise NWRT materials based on the children's language background and to complement the assessment sessions with other tools in order to ensure diagnosis and to obtain complete language profile of the child. Supplemental Material https://doi.org/10.23641/asha.15152370.",2021-08-18 +31856735,"RNA-seq, de novo transcriptome assembly and flavonoid gene analysis in 13 wild and cultivated berry fruit species with high content of phenolics.","

Background

Flavonoids are produced in all flowering plants in a wide range of tissues including in berry fruits. These compounds are of considerable interest for their biological activities, health benefits and potential pharmacological applications. However, transcriptomic and genomic resources for wild and cultivated berry fruit species are often limited, despite their value in underpinning the in-depth study of metabolic pathways, fruit ripening as well as in the identification of genotypes rich in bioactive compounds.

Results

To access the genetic diversity of wild and cultivated berry fruit species that accumulate high levels of phenolic compounds in their fleshy berry(-like) fruits, we selected 13 species from Europe, South America and Asia representing eight genera, seven families and seven orders within three clades of the kingdom Plantae. RNA from either ripe fruits (ten species) or three ripening stages (two species) as well as leaf RNA (one species) were used to construct, assemble and analyse de novo transcriptomes. The transcriptome sequences are deposited in the BacHBerryGEN database (http://jicbio.nbi.ac.uk/berries) and were used, as a proof of concept, via its BLAST portal (http://jicbio.nbi.ac.uk/berries/blast.html) to identify candidate genes involved in the biosynthesis of phenylpropanoid compounds. Genes encoding regulatory proteins of the anthocyanin biosynthetic pathway (MYB and basic helix-loop-helix (bHLH) transcription factors and WD40 repeat proteins) were isolated using the transcriptomic resources of wild blackberry (Rubus genevieri) and cultivated red raspberry (Rubus idaeus cv. Prestige) and were shown to activate anthocyanin synthesis in Nicotiana benthamiana. Expression patterns of candidate flavonoid gene transcripts were also studied across three fruit developmental stages via the BacHBerryEXP gene expression browser (http://www.bachberryexp.com) in R. genevieri and R. idaeus cv. Prestige.

Conclusions

We report a transcriptome resource that includes data for a wide range of berry(-like) fruit species that has been developed for gene identification and functional analysis to assist in berry fruit improvement. These resources will enable investigations of metabolic processes in berries beyond the phenylpropanoid biosynthetic pathway analysed in this study. The RNA-seq data will be useful for studies of berry fruit development and to select wild plant species useful for plant breeding purposes.",2019-12-19 +33174596,MitoCarta3.0: an updated mitochondrial proteome now with sub-organelle localization and pathway annotations.,"The mammalian mitochondrial proteome is under dual genomic control, with 99% of proteins encoded by the nuclear genome and 13 originating from the mitochondrial DNA (mtDNA). We previously developed MitoCarta, a catalogue of over 1000 genes encoding the mammalian mitochondrial proteome. This catalogue was compiled using a Bayesian integration of multiple sequence features and experimental datasets, notably protein mass spectrometry of mitochondria isolated from fourteen murine tissues. Here, we introduce MitoCarta3.0. Beginning with the MitoCarta2.0 inventory, we performed manual review to remove 100 genes and introduce 78 additional genes, arriving at an updated inventory of 1136 human genes. We now include manually curated annotations of sub-mitochondrial localization (matrix, inner membrane, intermembrane space, outer membrane) as well as assignment to 149 hierarchical 'MitoPathways' spanning seven broad functional categories relevant to mitochondria. MitoCarta3.0, including sub-mitochondrial localization and MitoPathway annotations, is freely available at http://www.broadinstitute.org/mitocarta and should serve as a continued community resource for mitochondrial biology and medicine.",2021-01-01 +32167906,An Effective Biclustering-Based Framework for Identifying Cell Subpopulations From scRNA-seq Data.,"The advent of single-cell RNA sequencing (scRNA-seq) techniques opens up new opportunities for studying the cell-specific changes in the transcriptomic data. An important research problem related with scRNA-seq data analysis is to identify cell subpopulations with distinct functions. However, the expression profiles of individual cells are usually measured over tens of thousands of genes, and it remains a difficult problem to effectively cluster the cells based on the high-dimensional profiles. An additional challenge of performing the analysis is that, the scRNA-seq data are often noisy and sometimes extremely sparse due to technical limitations and sampling deficiencies. In this paper, we propose a biclustering-based framework called DivBiclust that effectively identifies the cell subpopulations based on the high-dimensional noisy scRNA-seq data. Compared with nine state-of-the-art methods, DivBiclust excels in identifying cell subpopulations with high accuracy as evidenced by our experiments on ten real scRNA-seq datasets with different size and diverse dropout rates. The supplemental materials of DivBiclust, including the source codes, data, and a supplementary document, are available at https://www.github.com/Qiong-Fang/DivBiclust.",2021-11-01 +35113396,Subcellular Proteomics as a Unified Approach of Experimental Localizations and Computed Prediction Data for Arabidopsis and Crop Plants.,"In eukaryotic organisms, subcellular protein location is critical in defining protein function and understanding sub-functionalization of gene families. Some proteins have defined locations, whereas others have low specificity targeting and complex accumulation patterns. There is no single approach that can be considered entirely adequate for defining the in vivo location of all proteins. By combining evidence from different approaches, the strengths and weaknesses of different technologies can be estimated, and a location consensus can be built. The Subcellular Location of Proteins in Arabidopsis database ( http://suba.live/ ) combines experimental data sets that have been reported in the literature and is analyzing these data to provide useful tools for biologists to interpret their own data. Foremost among these tools is a consensus classifier (SUBAcon) that computes a proposed location for all proteins based on balancing the experimental evidence and predictions. Further tools analyze sets of proteins to define the abundance of cellular structures. Extending these types of resources to plant crop species has been complex due to polyploidy, gene family expansion and contraction, and the movement of pathways and processes within cells across the plant kingdom. The Crop Proteins of Annotated Location database ( http://crop-pal.org/ ) has developed a range of subcellular location resources including a species-specific voting consensus for 12 plant crop species that offers collated evidence and filters for current crop proteomes akin to SUBA. Comprehensive cross-species comparison of these data shows that the sub-cellular proteomes (subcellulomes) depend only to some degree on phylogenetic relationship and are more conserved in major biosynthesis than in metabolic pathways. Together SUBA and cropPAL created reference subcellulomes for plants as well as species-specific subcellulomes for cross-species data mining. These data collections are increasingly used by the research community to provide a subcellular protein location layer, inform models of compartmented cell function and protein-protein interaction network, guide future molecular crop breeding strategies, or simply answer a specific question-where is my protein of interest inside the cell?",2021-01-01 +33281870,DRIM: A Web-Based System for Investigating Drug Response at the Molecular Level by Condition-Specific Multi-Omics Data Integration.,"Pharmacogenomics is the study of how genes affect a person's response to drugs. Thus, understanding the effect of drug at the molecular level can be helpful in both drug discovery and personalized medicine. Over the years, transcriptome data upon drug treatment has been collected and several databases compiled before drug treatment cancer cell multi-omics data with drug sensitivity (IC 50, AUC) or time-series transcriptomic data after drug treatment. However, analyzing transcriptome data upon drug treatment is challenging since more than 20,000 genes interact in complex ways. In addition, due to the difficulty of both time-series analysis and multi-omics integration, current methods can hardly perform analysis of databases with different data characteristics. One effective way is to interpret transcriptome data in terms of well-characterized biological pathways. Another way is to leverage state-of-the-art methods for multi-omics data integration. In this paper, we developed Drug Response analysis Integrating Multi-omics and time-series data (DRIM), an integrative multi-omics and time-series data analysis framework that identifies perturbed sub-pathways and regulation mechanisms upon drug treatment. The system takes drug name and cell line identification numbers or user's drug control/treat time-series gene expression data as input. Then, analysis of multi-omics data upon drug treatment is performed in two perspectives. For the multi-omics perspective analysis, IC 50-related multi-omics potential mediator genes are determined by embedding multi-omics data to gene-centric vector space using a tensor decomposition method and an autoencoder deep learning model. Then, perturbed pathway analysis of potential mediator genes is performed. For the time-series perspective analysis, time-varying perturbed sub-pathways upon drug treatment are constructed. Additionally, a network involving transcription factors (TFs), multi-omics potential mediator genes, and perturbed sub-pathways is constructed, and paths to perturbed pathways from TFs are determined by an influence maximization method. To demonstrate the utility of our system, we provide analysis results of sub-pathway regulatory mechanisms in breast cancer cell lines of different drug sensitivity. DRIM is available at: http://biohealth.snu.ac.kr/software/DRIM/.",2020-11-12 +34546950,Clinical Features for the Diagnosis of Pediatric Urinary Tract Infections: Systematic Review and Meta-Analysis.,"

Purpose

Accurate diagnosis of urinary tract infection in children is essential because children left untreated can experience permanent renal injury. We aimed to assess the diagnostic value of clinical features of pediatric urinary tract infection.

Methods

We performed a systematic review and meta-analysis of diagnostic test accuracy studies in ambulatory care. We searched the PubMed, Embase, Web of Science, Cumulative Index to Nursing and Allied Health Literature, Cochrane Central Register of Controlled Trials, Health Technology Assessment, and Database of Abstracts of Reviews of Effects databases from inception to January 27, 2020 for studies reporting 2 × 2 diagnostic accuracy data for clinical features compared with urine culture in children aged <18 years. For each clinical feature, we calculated likelihood ratios and posttest probabilities of urinary tract infection. To estimate summary parameters, we conducted a bivariate random effects meta-analysis and hierarchical summary receiver operating characteristic analysis.

Results

A total of 35 studies (N = 78,427 patients) of moderate to high quality were included, providing information on 58 clinical features and 6 prediction rules. Only circumcision (negative likelihood ratio [LR-] 0.24; 95% CI, 0.08-0.72; n = 8), stridor (LR- 0.20; 95% CI, 0.05-0.81; n = 1), and diaper rash (LR- 0.13; 95% CI, 0.02-0.92; n = 1) were useful for ruling out urinary tract infection. Body temperature or fever duration showed limited diagnostic value (area under the receiver operating characteristic curve 0.61; 95% CI, 0.47-0.73; n = 16). The Diagnosis of Urinary Tract Infection in Young Children score, Gorelick Scale score, and UTIcalc (https://uticalc.pitt.edu) might be useful to identify children eligible for urine sampling.

Conclusions

Few clinical signs and symptoms are useful for diagnosing or ruling out urinary tract infection in children. Clinical prediction rules might be more accurate; however, they should be validated externally. Physicians should not restrict urine sampling to children with unexplained fever or other features suggestive of urinary tract infection.",2021-09-01 +34783395,Tripeptide loop closure: A detailed study of reconstructions based on Ramachandran distributions.,"Tripeptide loop closure (TLC) is a standard procedure to reconstruct protein backbone conformations, by solving a zero-dimensional polynomial system yielding up to 16 solutions. In this work, we first show that multiprecision is required in a TLC solver to guarantee the existence and the accuracy of solutions. We then compare solutions yielded by the TLC solver against tripeptides from the Protein Data Bank. We show that these solutions are geometrically diverse (up to 3Å Root mean square deviation with respect to the data) and sound in terms of potential energy. Finally, we compare Ramachandran distributions of data and reconstructions for the three amino acids. The distribution of reconstructions in the second angular space ϕ2ψ2 stands out, with a rather uniform distribution leaving a central void. We anticipate that these insights, coupled to our robust implementation in the Structural Bioinformatics Library ( https://sbl.inria.fr/doc/Tripeptide_loop_closure-user-manual.html), will help understanding the properties of TLC reconstructions, with potential applications to the generation of conformations of flexible loops in particular.",2021-12-11 +34733938,Key immune-related gene ITGB2 as a prognostic signature for acute myeloid leukemia.,"

Background

The tumor microenvironment (TME) has an essential role in tumorigenesis, progression, and therapeutic response in many cancers. Currently, the role of TME in acute myeloid leukemia (AML) is unclear. This study investigated the correlation between immune-related genes and prognosis in AML patients.

Methods

Transcriptome RNA-Seq data for 151 AML samples were downloaded from TCGA database (https://portal.gdc.cancer.gov/), and the immune related genes (irgs) were selected from Immport database. Bioinformatics screening was used to identify irgs for AML, and genes with a critical role in the prognosis of AML were selected for further analysis. To confirm the prognostic role of irgs in AML, we undertook protein-protein interaction (PPI) network analysis of the top 30 interacting genes. We then investigated associations between immune cell infiltration and prognosis in AML patients. Immunohistochemistry was used to validate protein expression levels between AML and normal bone marrow samples. Analysis of the drug sensitivity of the selected gene was then performed.

Results

The integrin lymphocyte function-associated antigen 1 (CD11A/CD18; ITGAL/ITGB2) was identified as the key immune-related gene that significantly influenced prognosis in AML patients. Overexpression of ITGB2 indicated poor prognosis in AML patients (P=0.007). Risk modeling indicated that a high-risk score led to poor outcomes (P=3.076e-08) in AML patients. The risk model showed accuracy for predicting prognosis in AML patients, with area under curve (AUC) at 1 year, 0.816; AUC at 3 years, 0.82; and AUC at 5 years, 0.875. In addition, we found that ITGB2 had a powerful influence on immune cell infiltration into AML TME. The results of immunohistochemistry showed that AML patients had significantly higher ITGB2 protein expression than normal samples. The AML patients were divided into 2 groups based on ITGB2 risk scores. Drug sensitivity test results indicated that the high-risk group was sensitive to cytarabine, axitinib, bosutinib, and docetaxel, but resistant to cisplatin and bortezomib.

Conclusions

In the present study, we found that ITGB2 may be able to serve as a biomarker for assessing prognosis and drug sensitivity in AML patients.",2021-09-01 +32356758,Understanding the Limit of Open Search in the Identification of Peptides With Post-translational Modifications - A Simulation-Based Study.,"Peptide identification from tandem mass spectrometry data is a fundamental task in computational proteomics. Traditional algorithms perform well when facing unmodified peptides. However, when peptides have post-translational modifications (PTMs), these methods cannot provide satisfactory results. Recently, open search methods have been proposed to identify peptides with PTMs. While the performance of these new methods is promising, the identification results vary greatly with respect to the quality of tandem mass spectra and the number of PTMs in peptides. This motivates us to systematically study the relationship between the performance of open search methods and the quality parameters of tandem mass spectrometry data as well as the number of PTMs in peptides. In this paper, we have proposed an analytical model derived from simulated data to describe the relationship between the probability of obtaining correct results and the spectrum quality as well as the number of PTMs. The proposed model is verified using 1,464,146 real experimental spectra. The consistent trend observed in both simulated data and real data reveals the necessary conditions to effectively apply open search methods. Source code of our study is available at http://bioinformatics.ust.hk/PST.html.",2021-11-01 +34897649,"Discussion on ""distributional independent component analysis for diverse neuroimaging modalities"" by Ben Wu, Subhadip Pal, Jian Kang, and Ying Guo.","I applaud the authors on their innovative generalized independent component analysis (ICA) framework for neuroimaging data. Although ICA has enjoyed great popularity for the analysis of functional magnetic resonance imaging (fMRI) data, its applicability to other modalities has been limited because standard ICA algorithms may not be directly applicable to a diversity of data representations. This is particularly true for single-subject structural neuroimaging, where only a single measurement is collected at each location in the brain. The ingenious idea of Wu et al. (2021) is to transform the data to a vector of probabilities via a mixture distribution with K components, which (following a simple transformation to R K - 1 $\mathbb {R}^{K-1}$ ) can be directly analyzed with standard ICA algorithms, such as infomax (Bell and Sejnowski, 1995) or fastICA (Hyvarinen, 1999). The underlying distribution forming the basis of the mixture is customized to the particular modality being analyzed. This framework, termed distributional ICA (DICA), is applicable in theory to nearly any neuroimaging modality. This has substantial implications for ICA as a general tool for neuroimaging analysis, with particular promise for structural modalities and multimodal studies. This invited commentary focuses on the applicability and potential of DICA for different neuroimaging modalities, questions around details of implementation and performance, and limitations of the validation study presented in the paper.",2021-12-12 +33522913,IsomiR_Window: a system for analyzing small-RNA-seq data in an integrative and user-friendly manner.,"

Background

IsomiRs are miRNA variants that vary in length and/or sequence when compared to their canonical forms. These variants display differences in length and/or sequence, including additions or deletions of one or more nucleotides (nts) at the 5' and/or 3' end, internal editings or untemplated 3' end additions. Most available tools for small RNA-seq data analysis do not allow the identification of isomiRs and often require advanced knowledge of bioinformatics. To overcome this, we have developed IsomiR Window, a platform that supports the systematic identification, quantification and functional exploration of isomiR expression in small RNA-seq datasets, accessible to users with no computational skills.

Methods

IsomiR Window enables the discovery of isomiRs and identification of all annotated non-coding RNAs in RNA-seq datasets from animals and plants. It comprises two main components: the IsomiR Window pipeline for data processing; and the IsomiR Window Browser interface. It integrates over ten third-party softwares for the analysis of small-RNA-seq data and holds a new algorithm that allows the detection of all possible types of isomiRs. These include 3' and 5'end isomiRs, 3' end tailings, isomiRs with single nucleotide polymorphisms (SNPs) or potential RNA editings, as well as all possible fuzzy combinations. IsomiR Window includes all required databases for analysis and annotation, and is freely distributed as a Linux virtual machine, including all required software.

Results

IsomiR Window processes several datasets in an automated manner, without restrictions of input file size. It generates high quality interactive figures and tables which can be exported into different formats. The performance of isomiR detection and quantification was assessed using simulated small-RNA-seq data. For correctly mapped reads, it identified different types of isomiRs with high confidence and 100% accuracy. The analysis of a small RNA-seq data from Basal Cell Carcinomas (BCCs) using isomiR Window confirmed that miR-183-5p is up-regulated in Nodular BCCs, but revealed that this effect was predominantly due to a novel 5'end variant. This variant displays a different seed region motif and 1756 isoform-exclusive mRNA targets that are significantly associated with disease pathways, underscoring the biological relevance of isomiR-focused analysis. IsomiR Window is available at https://isomir.fc.ul.pt/ .",2021-02-01 +34792554,TDFragMapper: a visualization tool for evaluating experimental parameters in top-down proteomics. ,"We present a new software-tool allowing an easy visualization of fragment ions and thus a rapid evaluation of key experimental parameters on the sequence coverage obtained for the MS/MS analysis of intact proteins. Our tool can process data obtained from various deconvolution and fragment assignment software. We demonstrate that TDFragMapper can rapidly highlight the experimental fragmentation parameters that are critical to the characterization of intact proteins of various size using top-down proteomics. TDFragMapper, a demonstration video and user tutorial are freely available for academic use at https://msbio.pasteur.fr/tdfragmapper; all data are thus available from the ProteomeXchange consortium (identifier PXD024643). Supplementary data are available at Bioinformatics online.",2021-11-18 +32160557,Multi-omics Analysis of the Intermittent Fasting Response in Mice Identifies an Unexpected Role for HNF4α.,"Every-other-day fasting (EODF) is an effective intervention for the treatment of metabolic disease, including improvements in liver health. But how the liver proteome is reprogrammed by EODF is currently unknown. Here, we use EODF in mice and multi-omics analysis to identify regulated pathways. Many changes in the liver proteome are distinct after EODF and absent after a single fasting bout. Key among these is the simultaneous induction by EODF of de novo lipogenesis and fatty acid oxidation enzymes. Together with activation of oxidative stress defenses, this contributes to the improvements in glucose tolerance and lifespan after EODF. Enrichment analysis shows unexpected downregulation of HNF4α targets by EODF, and we confirm HNF4α inhibition. Suppressed HNF4α targets include bile synthetic enzymes and secreted proteins, such as α1-antitrypsin or inflammatory factors, which reflect EODF phenotypes. Interactive online access is provided to a data resource (https://www.larancelab.com/eodf), which provides a global view of fasting-induced mechanisms in mice.",2020-03-01 +29897484,ILDgenDB: integrated genetic knowledge resource for interstitial lung diseases (ILDs). ,"Interstitial lung diseases (ILDs) are a diverse group of ∼200 acute and chronic pulmonary disorders that are characterized by variable amounts of inflammation, fibrosis and architectural distortion with substantial morbidity and mortality. Inaccurate and delayed diagnoses increase the risk, especially in developing countries. Studies have indicated the significant roles of genetic elements in ILDs pathogenesis. Therefore, the first genetic knowledge resource, ILDgenDB, has been developed with an objective to provide ILDs genetic data and their integrated analyses for the better understanding of disease pathogenesis and identification of diagnostics-based biomarkers. This resource contains literature-curated disease candidate genes (DCGs) enriched with various regulatory elements that have been generated using an integrated bioinformatics workflow of databases searches, literature-mining and DCGs-microRNA (miRNAs)-single nucleotide polymorphisms (SNPs) association analyses. To provide statistical significance to disease-gene association, ILD-specificity index and hypergeomatric test scores were also incorporated. Association analyses of miRNAs, SNPs and pathways responsible for the pathogenesis of different sub-classes of ILDs were also incorporated. Manually verified 299 DCGs and their significant associations with 1932 SNPs, 2966 miRNAs and 9170 miR-polymorphisms were also provided. Furthermore, 216 literature-mined and proposed biomarkers were identified. The ILDgenDB resource provides user-friendly browsing and extensive query-based information retrieval systems. Additionally, this resource also facilitates graphical view of predicted DCGs-SNPs/miRNAs and literature associated DCGs-ILDs interactions for each ILD to facilitate efficient data interpretation. Outcomes of analyses suggested the significant involvement of immune system and defense mechanisms in ILDs pathogenesis. This resource may potentially facilitate genetic-based disease monitoring and diagnosis.Database URL: http://14.139.240.55/ildgendb/index.php.",2018-01-01 +,Eye and Visual Health in New England: Findings from the Healthy Aging Data Reports,"Abstract Eye and visual health issues in older adults are prevalent, often undetected and untreated, but can contribute to poor physical and mental health issues, and higher mortality rates. The study describes state and local community rates of eye and visual health indicators (cataract, glaucoma, self-reported vision difficulty, and clinical diagnosis of blindness or visual impairment) of older adults 65+ in MA, NH, RI, and CT. Data sources used to calculate rates were: the American Community Survey (2014-2018 RI, 2012-2016 MA and NH, 2014-2018 CT) and the Medicare Current Beneficiary Summary File (2016-2017 RI, 2015 MA and NH, 2016-2017 CT). Small area estimation techniques were used to calculate age-sex adjusted community rates for more than 150 health indicators (https://healthyagingdatareports.org/). Disparities in rates were examined for 4 eye and visual health indicators: cataract, glaucoma, self-reported vision difficulty, and clinical diagnosis of blindness or visual impairment. Results showed variability in rates across states. MA had the highest rates of self-reported vision difficulty (5.8%) and blindness or visual impairment (1.5%), and the greatest differences in rates of self-reported vision difficulty (0.00-40.91%). CT had the highest rates of glaucoma (28.3%), and the greatest differences in rates of glaucoma (19.51-41.91%) and blindness or visual impairment (0.44-4.39%). RI had the highest rates of cataract (67.5%). Understanding the distribution of community rates makes disparities evident, and may help practitioners and policymakers to allocate resources to areas of highest need.",2021-01-01 +,The 12th International Food Data Conference (IFDC): From food composition to better policies and programmes in nutrition and agriculture,"The 12th International Food Data Conference (IFDC) was held in Buenos Aires, Argentina, from 11 to 13 October 2017. It was organized by Instituto Nacional de Investigaciones Biológica and International Network of Data System/FAO (INSIBIO and INFOODS/FAO) as a pre-conference of the 21th International Congress of Nutrition (ICN2017). The theme was “From Food Composition to Better Policies and Programmes in Nutrition and Agriculture”. A total of 152 delegates from 37 countries participated in the Conference. The programme included one keynote address, 46 oral presentations and 82 poster displays. The conference programme along with the Power Point files of the oral presentations are available on the conference website (http://www.ifdc2017.com/). The Conference was very successful, with high-quality scientific oral presentations and posters, and very satisfactory participation. The participants were able to exchange knowledge and personal experiences, and develop new ideas while discussing current research and other matters.",2020-07-01 +33119759,gcType: a high-quality type strain genome database for microbial phylogenetic and functional research.,"Taxonomic and functional research of microorganisms has increasingly relied upon genome-based data and methods. As the depository of the Global Catalogue of Microorganisms (GCM) 10K prokaryotic type strain sequencing project, Global Catalogue of Type Strain (gcType) has published 1049 type strain genomes sequenced by the GCM 10K project which are preserved in global culture collections with a valid published status. Additionally, the information provided through gcType includes >12 000 publicly available type strain genome sequences from GenBank incorporated using quality control criteria and standard data annotation pipelines to form a high-quality reference database. This database integrates type strain sequences with their phenotypic information to facilitate phenotypic and genotypic analyses. Multiple formats of cross-genome searches and interactive interfaces have allowed extensive exploration of the database's resources. In this study, we describe web-based data analysis pipelines for genomic analyses and genome-based taxonomy, which could serve as a one-stop platform for the identification of prokaryotic species. The number of type strain genomes that are published will continue to increase as the GCM 10K project increases its collaboration with culture collections worldwide. Data of this project is shared with the International Nucleotide Sequence Database Collaboration. Access to gcType is free at http://gctype.wdcm.org/.",2021-01-01 +33417691,HGFDB: a collective database of helmeted guinea fowl genomics. ,"As a vigorous and hardy and an almost disease-free game bird, the domestic helmeted guinea fowl (Numida meleagris, hereafter HGF) has attracted considerable attention in a large number of genetic study projects. However, none of the current/recent avian databases are related to this agriculturally and commercially important poultry species. To address this data gap, we developed Helmeted Guinea Fowl Database (HGFDB), which manages and shares HGF genomic and genetic data. By processing the data of genome assembly, sequencing reads and genetic variations, we organized them into eight modules, which correspond to 'Home', 'Genome', 'Re-sequence', 'Gene', 'Variation', 'Download', 'Tools' and 'Help', HGFDB provides the most comprehensive view of the HGF genome to date and will be relevant for future studies on HGF structural and functional genomics and genetic improvement. Database URL: http://hgfdb.ynau.edu.cn/.",2021-01-01 +34890217,Evaluation of Remote Categorical Loudness Scaling.,"

Purpose

The aims of this study were to (a) demonstrate the feasibility of administering categorical loudness scaling (CLS) tests in a remote setting, (b) assess the reliability of remote compared with laboratory CLS results, and (c) provide preliminary evidence of the validity of remote CLS testing.

Method

CLS data from 21 adult participants collected in a home setting were compared to CLS data collected in a laboratory setting from previous studies. Five participants took part in studies in both settings. Precalibrated equipment was delivered to participants who performed headphone output level checks and measured ambient noise levels. After a practice run, CLS measurements were collected for two runs at 1 and 4 kHz.

Results

Mean headphone output levels were within 1.5 dB of the target calibration level. Mean ambient noise levels were below the target level. Within-run variability was similar between the two settings, but across-run bias was smaller for data collected in the laboratory setting compared with the remote setting. Systematic differences in CLS functions were not observed for the five individuals who participated in both settings.

Conclusions

This study demonstrated that precise stimulus levels can be delivered and background noise levels can be controlled in a home environment. Across-run bias for remote CLS was larger than for in-laboratory CLS, indicating that further work is needed to improve the reliability of CLS data collected in remote settings. Supplemental Material https://doi.org/10.23641/asha.17131856.",2021-12-10 +34052284,miREV: An Online Database and Tool to Uncover Potential Reference RNAs and Biomarkers in Small-RNA Sequencing Data Sets from Extracellular Vesicles Enriched Samples.,"Extracellular vesicles (EVs) are nano-sized, membrane-enclosed vesicles released by cells for intercellular communication. EVs are involved in pathological processes and miRNAs in EVs have gained interest as easily accessible biomolecules in liquid biopsies for diagnostic purposes. To validate potential miRNA biomarker, transcriptome analyses must be carried out to detect suitable reference miRNAs. miREV is a database with over 400 miRNA sequencing data sets and helps the researcher to find suitable reference miRNAs for their individual experimental setup. The researcher can put together a specific sample set in miREV, which is similar to his own experimental concept in order to find the most suitable references. This allows to run validation experiments without having to carry out a complex and costly transcriptome analysis priorly. Additional read count tables of each generated sample set are downloadable for further analysis. miREV is freely available at https://www.physio.wzw.tum.de/mirev/.",2021-05-28 +31598083,Construction of a core collection of eggplant (Solanum melongena L.) based on genome-wide SNP and SSR genotypes.,"A core collection of eggplant (Solanum melongena L.) was developed based on a dataset of genome-wide 831 SNP and 50 SSR genotypes analyzed in 893 accessions of eggplant genetic resources collected in the NARO Genebank using the Core Hunter II program. The 893 accessions were collected worldwide, mainly Asia. Genetic variation and population structure among the 893 eggplant accessions were characterized. The genetic diversity of the Asian accessions, especially the South Asian and Southeast Asian accessions, forming the center of diversity in eggplant, was higher than that of the other regions. The resulting core collection, World Eggplant Core (WEC) collection consisted of 100 accessions basically collected from the high genetic diversity countries. Based on the results of the cluster and STRUCTURE analyses with SNP genotypes, the WEC collection was divided into four clusters (S1-S4). Each cluster corresponds to a geographical group as below, S1; the European, American and African countries, S2; the East Asian countries, S3; the Southeast Asian countries, S4; the South Asian and Southeast Asian countries. The genotype and phenotype data of the WEC collection are available from the VegMarks database (https://vegmarks.nivot.affrc.go.jp/resource/), and seed samples are available from the NARO Genebank (https://www.gene.affrc.go.jp/databases-core_collections.php).",2019-07-10 +33494129,Data resource profile: the allergic disease database of the Korean National Health Insurance Service.,"Researchers have been interested in probing how the environmental factors associated with allergic diseases affect the use of medical services. Considering this demand, we have constructed a database, named the Allergic Disease Database, based on the National Health Insurance Database (NHID). The NHID contains information on demographic and medical service utilization for approximately 99% of the Korean population. This study targeted 3 major allergic diseases, including allergic rhinitis, atopic dermatitis, and asthma. For the target diseases, our database provides daily medical service information, including the number of daily visits from 2013 and 2017, categorized by patients' characteristics such as address, sex, age, and duration of residence. We provide additional information, including yearly population, a number of patients, and averaged geocoding coordinates by eup, myeon, and dong district code (the smallest-scale administrative units in Korea). This information enables researchers to analyze how daily changes in the environmental factors of allergic diseases (e.g., particulate matter, sulfur dioxide, and ozone) in certain regions would influence patients' behavioral patterns of medical service utilization. Moreover, researchers can analyze long-term trends in allergic diseases and the health effects caused by environmental factors such as daily climate and pollution data. The advantages of this database are easy access to data, additional levels of geographic detail, time-efficient data-refining and processing, and a de-identification process that minimizes the exposure of identifiable personal information. All datasets included in the Allergic Disease Database can be downloaded by accessing the National Health Insurance Service data sharing webpage (https://nhiss.nhis.or.kr).",2021-01-21 +30217145,Pseudocohnilembus persalinus genome database - the first genome database of facultative scuticociliatosis pathogens.,"

Background

Pseudocohnilembus persalinus, a unicellular ciliated protozoan, is one of commonest facultative pathogens. We sequenced the macronuclear genome of P. persalinus in 2015, which provided new insights into its pathogenicity.

Results

Here, we present the P. persalinus genome database (PPGD) ( http://ciliates.ihb.ac.cn/database/home/#pp ), the first genome database for the scuticociliatosis pathogens. PPGD integrates P. persalinus macronuclear genomic and transcriptomic data, including genome sequence, transcript, gene expression data, and gene annotation, as well as relevant information on its biology, morphology and taxonomy. The database also provides functions for visualizing, analyzing, and downloading the data.

Conclusion

PPGD is a useful resource for studying scuticociliates or scuticociliatosis. We will continue to update the PPGD by integrating more data and aim to integrate the PPGD with other ciliate databases to build a comprehensive ciliate genome database.",2018-09-14 +,"Predicting growth of Listeria monocytogenes at dynamic conditions during manufacturing, ripening and storage of cheeses – Evaluation and application of models","Mathematical models were evaluated to predict growth of L. monocytogenes in mould/smear-ripened cheeses with measured dynamic changes in product characteristics and storage conditions. To generate data for model evaluation three challenge tests were performed with mould-ripened cheeses produced by using milk inoculated with L. monocytogenes. Growth of L. monocytogenes and lactic acid bacteria (LAB) in the rind and in the core of cheeses were quantified together with changes in product characteristics over time (temperature, pH, NaCl/aw, lactic- and acetic acid concentrations). The performance of nine available L. monocytogenes growth models was evaluated using growth responses from the present study and from literature together with the determined or reported dynamic product characteristics and storage conditions (46 kinetics). The acceptable simulation zone (ASZ) method was used to assess model performance. A reduced version of the Martinez-Rios et al. (2019) model (https://doi.org/10.3389/fmicb.2019.01510) and the model of Østergaard et al. (2014) (https://doi.org/10.1016/j.ijfoodmicro.2014.07.012) had acceptable performance with a ASZ-score of 71-70% for L. monocytogenes growth in mould/smear-ripened cheeses. Models from Coroller et al. (2012) (https://doi.org/10.1016/j.ijfoodmicro.2011.09.023) had close to acceptable performance with ASZ-scores of 67–69%. The validated models (Martinez-Rios et al., 2019; Østergaard et al., 2014) can be used to facilitate the evaluation of time to critical L. monocytogenes growth for mould/smear-ripened cheeses including modification of recipes with for example reduced salt/sodium or to support exposure assessment studies for these cheeses.",2020-12-01 +,Characterization of the biosorption of fast black azo dye K salt by the bacterium Rhodopseudomonas palustris 51ATA strain,"Removal of dyes from wastewater by microorganisms through adsorption, degradation, or accumulation has been investigated. Biological methods used for dye treatment are generally always effective and environmentally friendly. In this study, biosorption of the Fast Black K salt azo dye by the bacterium Rhodopseudomonas palustris 51ATA was studied spectrophotometrically, at various pH (2–10), temperatures (25°C, 35°C, and 45°C) and dye concentrations (25–400 mg L⁻¹).The bacterial strain showed extremely good dye-removing potential at various dye concentrations. IR studies at different temperatures showed that the dye was adsorbed on the bacterial surface at lower temperatures. Characteristics of the adsorption process were investigated by Scatchard analysis at 25°C and 35°C. Scatchard analysis of the equilibrium binding data for the dye on this bacterium gave rise to linear plots, indicating that the Langmuir model could be applied. The regression coefficients obtained for the dye from the Freundlich and Langmuir models were significant and divergence from the Scatchard plot was observed.The adsorption behavior of the dye on this bacterium was expressed by the Langmuir, Freundlich, and Temkin isotherms. The adsorption data with respect to various temperatures provided an excellent fit to the Freundlich isotherm. However, when the Langmuir and Temkin isotherm models were applied to these data, a good fit was only obtained for the dye at lower temperatures, thus indicating that the biosorption ability of R. palustris 51ATA is dependent on temperature, pH, and dye concentration.How to cite: Öztürk A, Bayol E, Abdullah MI. Characterization of the biosorption of fast black azo dye K salt by the bacterium Rhodopseudomonas palustris 51ATA strain. Electron J Biotechnol 2020;46. https://doi.org/10.1016/j.ejbt.2020.05.002.",2020-07-01 +31177618,widgetcon: A website and program for quick conversion among common population genetic data formats.,"One of the most tedious steps in genetic data analyses is the reformatting data generated with one program for use with other applications. This conversion is necessary because comprehensive evaluation of the data may be based on different algorithms included in diverse software, each requiring a distinct input format. A platform-independent and freely available program or a web-based tool dedicated to such reformatting can save time and efforts in data processing. Here, we report widgetcon, a website and a program which has been developed to quickly and easily convert among various molecular data formats commonly used in phylogenetic analysis, population genetics, and other fields. The web-based service is available at https://www.widgetcon.net. The program and the website convert the major data formats in four basic steps in less than a minute. The resource will be a useful tool for the research community and can be updated to include more formats and features in the future.",2019-07-09 +34001366,Genomic evaluation of dairy heifer livability.,"Differences in breeds and sire lines suggest the presence of a genetic component for heifer livability (HLIV). Genomic evaluation for this trait can increase profitability and improve animal health and welfare. Evaluations for HLIV were examined from 3,362,499 calf data records from heifers of all breeds born from 2009 to 2016. Data were obtained from the national cooperator database maintained by the Council on Dairy Cattle Breeding (https://www.uscdcb.com/). The total number of deaths reported was 134,753 (4.01%), which included herds with death loss between 1.5 and 25.5%. Age at death was evaluated and ranged from >2 d of age until the heifer left the herd, with a maximum of 18 mo of age. Records were not included until 3 yr after the birthdate so that live status of contemporaries could be confirmed by a calving date for those animals. Deaths observed until 2 d after birth were considered to be a stillbirth rather than a failure of HLIV. The scale used for analysis of HLIV was 0 (died) or 100 (live), and the heritability estimate was 0.7% based on sire model with restricted maximum likelihood estimation. Genomic predicted transmitting abilities for Holstein ranged from -1.6% to +1.6% with a standard deviation of 0.5%, and genomic predicted transmitting abilities for Jersey ranged from -0.5% to +0.5% with a standard deviation of 0.2%. The mean overall death loss was about 4%. Reliabilities of genomic predictions for young animals averaged 46% for Holsteins and 30% for Jerseys, and corresponding traditional parent average reliabilities averaged 16% and 12%, respectively. Correlations of HLIV were 0.44 with productive life, 0.18 to 0.22 with yield traits, and 0.29 with early first calving on proven Holstein bulls. The HLIV trait had a favorable genetic trend in recent years, likely because of the indirect selection associated with the correlated traits. The trait HLIV should receive 1% of emphasis on the Lifetime Net Merit index, resulting in economic progress worth $50,000/yr. By encouraging more comprehensive recording on calf mortality, the reliabilities of genetic predictions could increase significantly.",2021-05-15 +34352185,Interventions Designed to Improve Narrative Language in School-Age Children: A Systematic Review With Meta-Analyses.,"Purpose The purpose of this systematic review with meta-analyses was to examine interventions that aimed to improve narrative language outcomes for preschool and elementary school-age children in the United States. Our goal was to examine peer-reviewed publications to describe the characteristics of these interventions and synthesize their overall effectiveness on narrative comprehension and production via meta-analysis. Method We searched electronic databases, examined previously published reviews, and consulted experts in the field to identify published studies that employed robust experimental and quasi-experimental designs. We included randomized controlled trials, studies with nonrandomized comparison groups, and single-case design (SCD) studies. We completed a qualitative synthesis of study factors for all identified studies and calculated meta-analyses for the studies that had sufficient data. All included studies were analyzed for risk of bias. Results Our systematic search yielded 40 studies that included one or more narrative language outcomes as part of their assessment battery. Twenty-four of the included studies were group design studies, including randomized controlled trials and quasi-experimental designs, and the other 16 were SCD studies. Effect sizes were analyzed based on narrative production and comprehension outcomes. The meta-analyses of 26 studies indicated overall positive effects of the interventions, with effect sizes of d = 0.51 and 0.54 in the group design studies and d = 1.24 in the SCD studies. Conclusions A variety of effective interventions were found that improve narrative production and comprehension outcomes in children with diverse learner characteristics. Some common characteristics across these interventions include manualized curricula, opportunities to produce narrative language, verbal and visual supports, direct instruction of story grammar, and use of authentic children's literature. Supplemental Material https://doi.org/10.23641/asha.15079173.",2021-08-04 +33418085,"MicroPhenoDB Associates Metagenomic Data with Pathogenic Microbes, Microbial Core Genes, and Human Disease Phenotypes.","Microbes play important roles in human health and disease. The interaction between microbes and hosts is a reciprocal relationship, which remains largely under-explored. Current computational resources lack manually and consistently curated data to connect metagenomic data to pathogenic microbes, microbial core genes, and disease phenotypes. We developed the MicroPhenoDB database by manually curating and consistently integrating microbe-disease association data. MicroPhenoDB provides 5677 non-redundant associations between 1781 microbes and 542 human disease phenotypes across more than 22 human body sites. MicroPhenoDB also provides 696,934 relationships between 27,277 unique clade-specific core genes and 685 microbes. Disease phenotypes are classified and described using the Experimental Factor Ontology (EFO). A refined score model was developed to prioritize the associations based on evidential metrics. The sequence search option in MicroPhenoDB enables rapid identification of existing pathogenic microbes in samples without running the usual metagenomic data processing and assembly. MicroPhenoDB offers data browsing, searching, and visualization through user-friendly web interfaces and web service application programming interfaces. MicroPhenoDB is the first database platform to detail the relationships between pathogenic microbes, core genes, and disease phenotypes. It will accelerate metagenomic data analysis and assist studies in decoding microbes related to human diseases. MicroPhenoDB is available through http://www.liwzlab.cn/microphenodb and http://lilab2.sysu.edu.cn/microphenodb.",2020-12-01 +33068436,tsRBase: a comprehensive database for expression and function of tsRNAs in multiple species.,"tRNA-derived small RNAs (tsRNAs) are a class of novel small RNAs, ubiquitously present in prokaryotes and eukaryotes. It has been reported that tsRNAs exhibit spatiotemporal expression patterns and can function as regulatory molecules in many biological processes. Current tsRNA databases only cover limited organisms and ignore tsRNA functional characteristics. Thus, integrating more relevant tsRNA information is helpful for further exploration. Here, we present a tsRNA database, named tsRBase, which integrates the expression pattern and functional information of tsRNAs in multiple species. In tsRBase, we identified 121 942 tsRNAs by analyzing more than 14 000 publicly available small RNA-seq data covering 20 species. This database collects samples from different tissues/cell-lines, or under different treatments and genetic backgrounds, thus helps depict specific expression patterns of tsRNAs under different conditions. Importantly, to enrich our understanding of biological significance, we collected tsRNAs experimentally validated from published literatures, obtained protein-binding tsRNAs from CLIP/RIP-seq data, and identified targets of tsRNAs from CLASH and CLEAR-CLIP data. Taken together, tsRBase is the most comprehensive and systematic tsRNA repository, exhibiting all-inclusive information of tsRNAs from diverse data sources of multiple species. tsRBase is freely available at http://www.tsrbase.org.",2021-01-01 +32611258,"Choosing Educational Resources to Build Interprofessional, Palliative Care Competency: A Replicable Review Methodology.","

Aim/objective

The purpose of the project was to provide information to inform the choice of educational resources available in British Columbia to support palliative care competency development for 4 disciplines: nurses, physicians, health care assistants, and social workers/counsellors. This article will describe the process of resource review. Results of the review are available at https://www.bc-cpc.ca/cpc/education-resource-review/. The objectives were to (1) identify gaps common to all educational resources, (2) provide information on content addressing competencies as well as logistics such as time required, cost, delivery method, and training requirements for instructors, and (3) develop a reproducible process for assessment of educational resources which is unbiased, transparent, and competency based.

Method

Sixteen educational resources were assessed for the percentage of competencies that were addressed. Gaps common to all resources were identified.

Results

The review process is described and can be replicated when assessing future versions of these and other palliative continuing education courses. This is a reproducible methodology for review of competency-based educational resources which could be applied for any practice-related subject.

Conclusion

This review process provided information which can inform a provincial interprofessional palliative education plan. The methodology may be used by others to assess and choose between competency-based education resources with a palliative population focus and other patient population foci.",2020-07-02 +33686370,COVID-19 in Europe: Dataset at a sub-national level.,"The COVID-19 pandemic has hit humanity, straining health care systems, economies, and governments worldwide. In one of the responses to the pandemic, a big global effort has been mounted to collect, analyze, and make data publicly available. However, many of the existing COVID-19 public datasets are (i) aggregated at country level, and (ii) tend not to bring the COVID-19-specific data coupled with socio-demographic, economic, public policy, health, pollution and environmental factors, all of which may be key elements to study the transmission of the SARS-CoV-2 and its severity. To aid the evaluation of the determinants and impact of the COVID-19 pandemic at a large scale, we present here a new dataset with socio-demographic, economic, public policy, health, pollution and environmental factors for the European Union at the small regions level (NUTS3). The database is freely accessible at http://dx.doi.org/10.17632/2ghxnrkr9p.4. This dataset can help to monitor the COVID-19 mortality and infections at the sub-national level and enable analysis that may inform future policymaking.",2021-03-03 +33326653,Functional and clinical implications of genetic structure in 1686 Italian exomes.,"To reconstruct the phenotypical and clinical implications of the Italian genetic structure, we thoroughly analyzed a whole-exome sequencing data set comprised of 1686 healthy Italian individuals. We found six previously unreported variants with remarkable frequency differences between Northern and Southern Italy in the HERC2, OR52R1, ADH1B, and THBS4 genes. We reported 36 clinically relevant variants (submitted as pathogenic, risk factors, or drug response in ClinVar) with significant frequency differences between Italy and Europe. We then explored putatively pathogenic variants in the Italian exome. On average, our Italian individuals carried 16.6 protein-truncating variants (PTVs), with 2.5% of the population having a PTV in one of the 59 American College of Medical Genetics (ACMG) actionable genes. Lastly, we looked for PTVs that are likely to cause Mendelian diseases. We found four heterozygous PTVs in haploinsufficient genes (KAT6A, PTCH1, and STXBP1) and three homozygous PTVs in genes causing recessive diseases (DPYD, FLG, and PYGM). Comparing frequencies from our data set to other public databases, like gnomAD, we showed the importance of population-specific databases for a more accurate assessment of variant pathogenicity. For this reason, we made aggregated frequencies from our data set publicly available as a tool for both clinicians and researchers (http://nigdb.cineca.it; NIG-ExIT).",2021-02-02 +,Developing a Global Strategy for the Control of Folate Deficiency and Folic Acid Responsive Neural Tube Defects in Low- and Middle-Income Countries (P10-107-19),"Abstract

Objectives

To develop a global action plan for the control of folate deficiency and folic acid responsive neural tube defects (anencephaly and spina bifida)

Methods

• Establish a multi- and inter-disciplinary group to develop a global action plan for folate-sensitive NTD prevention• Ensure regional lab capacity exists to assess folate status applying a harmonized microbiologic assay (MBA) to measure red blood cell (RBC) folate, establishing a global network of regional laboratories coordinated by an umbrella organization• Promote improvements of folate status in women of reproductive age (WRA) in LMIC, engaging a wide variety of national and global stakeholders• Address key knowledge gaps related to the prevention of folate sensitive NTDs• Improve knowledge availability and sharing amongst global stakeholders in NTD prevention• Communicate and share the work of the Folate Task Team

Results

• A standing Folate Task Team comprised of a 2-member Secretariat, a 10-member Expert Advisory Group, 4 Ex-Officio members, and 4 “As Needed” advisors (see Fig. 1)• Identification of an initial list of 12 global stakeholders and partner organizations• Five resource laboratories have been trained at the Division of Laboratory Sciences - CDC, including labs in Vietnam, Sri Lanka, Philippines, Tanzania, and Pakistan• A training video supported by a manual and mini posters to illustrate specific activities of the assay has been produced by CDC.• A landscaping analysis has identified countries that have mandatory/voluntary food fortification practices, information on folate status in the population, burden of NTDs, and consumption patterns of fortified foods• Identification of alternative foods/food vehicles likely to be fortified to reach at-risk segments of WRA• Dissemination activities are shared at https://www.nutritionintl.org/what-we-do/nteam/folate-task-team/

Conclusions

• The estimated global prevalence of NTD-affected pregnancies is estimated to be 260,100 in 2015 and has a significant emotional and economic impact on families and society, while contributing to the loss of human potential for countries• The global action provides a clear path forward to help direct and prioritize investments, advance resource mobilization, and garner the political will to accelerate NTD prevention in LMIC

Funding Sources

The Bill & Melinda Gates Foundation.

Supporting Tables, Images and/or Graphs

",2019-06-01 +33095870,Database resources of the National Center for Biotechnology Information.,"The National Center for Biotechnology Information (NCBI) provides a large suite of online resources for biological information and data, including the GenBank® nucleic acid sequence database and the PubMed® database of citations and abstracts published in life science journals. The Entrez system provides search and retrieval operations for most of these data from 34 distinct databases. The E-utilities serve as the programming interface for the Entrez system. Custom implementations of the BLAST program provide sequence-based searching of many specialized datasets. New resources released in the past year include a new PubMed interface and NCBI datasets. Additional resources that were updated in the past year include PMC, Bookshelf, Genome Data Viewer, SRA, ClinVar, dbSNP, dbVar, Pathogen Detection, BLAST, Primer-BLAST, IgBLAST, iCn3D and PubChem. All of these resources can be accessed through the NCBI home page at https://www.ncbi.nlm.nih.gov.",2021-01-01 +32252626,Complete genome sequence and annotation of the laboratory reference strain Shigella flexneri serotype 5a M90T and genome-wide transcriptional start site determination.,"

Background

Shigella is a Gram-negative facultative intracellular bacterium that causes bacillary dysentery in humans. Shigella invades cells of the colonic mucosa owing to its virulence plasmid-encoded Type 3 Secretion System (T3SS), and multiplies in the target cell cytosol. Although the laboratory reference strain S. flexneri serotype 5a M90T has been extensively used to understand the molecular mechanisms of pathogenesis, its complete genome sequence is not available, thereby greatly limiting studies employing high-throughput sequencing and systems biology approaches.

Results

We have sequenced, assembled, annotated and manually curated the full genome of S. flexneri 5a M90T. This yielded two complete circular contigs, the chromosome and the virulence plasmid (pWR100). To obtain the genome sequence, we have employed long-read PacBio DNA sequencing followed by polishing with Illumina RNA-seq data. This provides a new hybrid strategy to prepare gapless, highly accurate genome sequences, which also cover AT-rich tracks or repetitive sequences that are transcribed. Furthermore, we have performed genome-wide analysis of transcriptional start sites (TSS) and determined the length of 5' untranslated regions (5'-UTRs) at typical culture conditions for the inoculum of in vitro infection experiments. We identified 6723 primary TSS (pTSS) and 7328 secondary TSS (sTSS). The S. flexneri 5a M90T annotated genome sequence and the transcriptional start sites are integrated into RegulonDB (http://regulondb.ccg.unam.mx) and RSAT (http://embnet.ccg.unam.mx/rsat/) databases to use their analysis tools in the S. flexneri 5a M90T genome.

Conclusions

We provide the first complete genome for S. flexneri serotype 5a, specifically the laboratory reference strain M90T. Our work opens the possibility of employing S. flexneri M90T in high-quality systems biology studies such as transcriptomic and differential expression analyses or in genome evolution studies. Moreover, the catalogue of TSS that we report here can be used in molecular pathogenesis studies as a resource to know which genes are transcribed before infection of host cells. The genome sequence, together with the analysis of transcriptional start sites, is also a valuable tool for precise genetic manipulation of S. flexneri 5a M90T. Further, we present a new hybrid strategy to prepare gapless, highly accurate genome sequences. Unlike currently used hybrid strategies combining long- and short-read DNA sequencing technologies to maximize accuracy, our workflow using long-read DNA sequencing and short-read RNA sequencing provides the added value of using non-redundant technologies, which yield distinct, exploitable datasets.",2020-04-06 +33539890,"FunCoup 5: Functional Association Networks in All Domains of Life, Supporting Directed Links and Tissue-Specificity.","FunCoup (https://funcoup.sbc.su.se) is one of the most comprehensive functional association networks of genes/proteins available. Functional associations are inferred by integrating different types of evidence using a redundancy-weighted naïve Bayesian approach, combined with orthology transfer. FunCoup's high coverage comes from using eleven different types of evidence, and extensive transfer of information between species. Since the latest update of the database, the availability of source data has improved drastically, and user expectations on a tool for functional associations have grown. To meet these requirements, we have made a new release of FunCoup with updated source data and improved functionality. FunCoup 5 now includes 22 species from all domains of life, and the source data for evidences, gold standards, and genomes have been updated to the latest available versions. In this new release, directed regulatory links inferred from transcription factor binding can be visualized in the network viewer for the human interactome. Another new feature is the possibility to filter by genes expressed in a certain tissue in the network viewer. FunCoup 5 further includes the SARS-CoV-2 proteome, allowing users to visualize and analyze interactions between SARS-CoV-2 and human proteins in order to better understand COVID-19. This new release of FunCoup constitutes a major advance for the users, with updated sources, new species and improved functionality for analysis of the networks.",2021-02-02 +33166379,Each patient is a research biorepository: informatics-enabled research on surplus clinical specimens via the living BioBank.,"The ability to analyze human specimens is the pillar of modern-day translational research. To enhance the research availability of relevant clinical specimens, we developed the Living BioBank (LBB) solution, which allows for just-in-time capture and delivery of phenotyped surplus laboratory medicine specimens. The LBB is a system-of-systems integrating research feasibility databases in i2b2, a real-time clinical data warehouse, and an informatics system for institutional research services management (SPARC). LBB delivers deidentified clinical data and laboratory specimens. We further present an extension to our solution, the Living µBiome Bank, that allows the user to request and receive phenotyped specimen microbiome data. We discuss the details of the implementation of the LBB system and the necessary regulatory oversight for this solution. The conducted institutional focus group of translational investigators indicates an overall positive sentiment towards potential scientific results generated with the use of LBB. Reference implementation of LBB is available at https://LivingBioBank.musc.edu.",2021-01-01 +34667567,Real-time prediction of 1H and 13C chemical shifts with DFT accuracy using a 3D graph neural network.,"Nuclear magnetic resonance (NMR) is one of the primary techniques used to elucidate the chemical structure, bonding, stereochemistry, and conformation of organic compounds. The distinct chemical shifts in an NMR spectrum depend upon each atom's local chemical environment and are influenced by both through-bond and through-space interactions with other atoms and functional groups. The in silico prediction of NMR chemical shifts using quantum mechanical (QM) calculations is now commonplace in aiding organic structural assignment since spectra can be computed for several candidate structures and then compared with experimental values to find the best possible match. However, the computational demands of calculating multiple structural- and stereo-isomers, each of which may typically exist as an ensemble of rapidly-interconverting conformations, are expensive. Additionally, the QM predictions themselves may lack sufficient accuracy to identify a correct structure. In this work, we address both of these shortcomings by developing a rapid machine learning (ML) protocol to predict 1H and 13C chemical shifts through an efficient graph neural network (GNN) using 3D structures as input. Transfer learning with experimental data is used to improve the final prediction accuracy of a model trained using QM calculations. When tested on the CHESHIRE dataset, the proposed model predicts observed 13C chemical shifts with comparable accuracy to the best-performing DFT functionals (1.5 ppm) in around 1/6000 of the CPU time. An automated prediction webserver and graphical interface are accessible online at http://nova.chem.colostate.edu/cascade/. We further demonstrate the model in three applications: first, we use the model to decide the correct organic structure from candidates through experimental spectra, including complex stereoisomers; second, we automatically detect and revise incorrect chemical shift assignments in a popular NMR database, the NMRShiftDB; and third, we use NMR chemical shifts as descriptors for determination of the sites of electrophilic aromatic substitution.",2021-08-09 +30398663,CATH: expanding the horizons of structure-based functional annotations for genome sequences.,"This article provides an update of the latest data and developments within the CATH protein structure classification database (http://www.cathdb.info). The resource provides two levels of release: CATH-B, a daily snapshot of the latest structural domain boundaries and superfamily assignments, and CATH+, which adds layers of derived data, such as predicted sequence domains, functional annotations and functional clustering (known as Functional Families or FunFams). The most recent CATH+ release (version 4.2) provides a huge update in the coverage of structural data. This release increases the number of fully- classified domains by over 40% (from 308 999 to 434 857 structural domains), corresponding to an almost two- fold increase in sequence data (from 53 million to over 95 million predicted domains) organised into 6119 superfamilies. The coverage of high-resolution, protein PDB chains that contain at least one assigned CATH domain is now 90.2% (increased from 82.3% in the previous release). A number of highly requested features have also been implemented in our web pages: allowing the user to view an alignment between their query sequence and a representative FunFam structure and providing tools that make it easier to view the full structural context (multi-domain architecture) of domains and chains.",2019-01-01 +34370723,PPIDomainMiner: Inferring domain-domain interactions from multiple sources of protein-protein interactions.,"Many biological processes are mediated by protein-protein interactions (PPIs). Because protein domains are the building blocks of proteins, PPIs likely rely on domain-domain interactions (DDIs). Several attempts exist to infer DDIs from PPI networks but the produced datasets are heterogeneous and sometimes not accessible, while the PPI interactome data keeps growing. We describe a new computational approach called ""PPIDM"" (Protein-Protein Interactions Domain Miner) for inferring DDIs using multiple sources of PPIs. The approach is an extension of our previously described ""CODAC"" (Computational Discovery of Direct Associations using Common neighbors) method for inferring new edges in a tripartite graph. The PPIDM method has been applied to seven widely used PPI resources, using as ""Gold-Standard"" a set of DDIs extracted from 3D structural databases. Overall, PPIDM has produced a dataset of 84,552 non-redundant DDIs. Statistical significance (p-value) is calculated for each source of PPI and used to classify the PPIDM DDIs in Gold (9,175 DDIs), Silver (24,934 DDIs) and Bronze (50,443 DDIs) categories. Dataset comparison reveals that PPIDM has inferred from the 2017 releases of PPI sources about 46% of the DDIs present in the 2020 release of the 3did database, not counting the DDIs present in the Gold-Standard. The PPIDM dataset contains 10,229 DDIs that are consistent with more than 13,300 PPIs extracted from the IMEx database, and nearly 23,300 DDIs (27.5%) that are consistent with more than 214,000 human PPIs extracted from the STRING database. Examples of newly inferred DDIs covering more than 10 PPIs in the IMEx database are provided. Further exploitation of the PPIDM DDI reservoir includes the inventory of possible partners of a protein of interest and characterization of protein interactions at the domain level in combination with other methods. The result is publicly available at http://ppidm.loria.fr/.",2021-08-09 +33003203,QSIdb: quorum sensing interference molecules. ,"Quorum sensing interference (QSI), the disruption and manipulation of quorum sensing (QS) in the dynamic control of bacteria populations could be widely applied in synthetic biology to realize dynamic metabolic control and develop potential clinical therapies. Conventionally, limited QSI molecules (QSIMs) were developed based on molecular structures or for specific QS receptors, which are in short supply for various interferences and manipulations of QS systems. In this study, we developed QSIdb (http://qsidb.lbci.net/), a specialized repository of 633 reported QSIMs and 73 073 expanded QSIMs including both QS agonists and antagonists. We have collected all reported QSIMs in literatures focused on the modifications of N-acyl homoserine lactones, natural QSIMs and synthetic QS analogues. Moreover, we developed a pipeline with SMILES-based similarity assessment algorithms and docking-based validations to mine potential QSIMs from existing 138 805 608 compounds in the PubChem database. In addition, we proposed a new measure, pocketedit, for assessing the similarities of active protein pockets or QSIMs crosstalk, and obtained 273 possible potential broad-spectrum QSIMs. We provided user-friendly browsing and searching facilities for easy data retrieval and comparison. QSIdb could assist the scientific community in understanding QS-related therapeutics, manipulating QS-based genetic circuits in metabolic engineering, developing potential broad-spectrum QSIMs and expanding new ligands for other receptors.",2021-07-01 +,Developing a User-focused Standardised Design System for Prescription Medicine Packaging in Slovenia,"Abstract

Introduction

In my thesis I am exploring the role of pharmaceutical packaging design in relation to the user. This topic is becoming increasingly relevant as the number of issued prescriptions in Slovenia is rising every year, treatment with prescription medicine is experienced by almost everyone. Medicine packaging must therefore provide essential information effectively and efficiently.

Aim

The purpose of this thesis is to improve current heterogeneous conditions by developing a standardized design system for all prescription drugs by taking into account users’ needs at each stage of the process. The final goal is a simpler and more effective use of products for everyone involved.

Methods

Research was conducted in three stages. In the first stage, the existing condition in packaging design was analysed: information hierarchy/arrangement on 8 significant manufacturers’ products considering 1 - the type of information and 2 - different user groups. Second stage consisted of conducting surveys with 2 focus groups representing two main user groups who use the packaging differently - medicine consumers and healthcare professionals.(1) Consumer focus group consisted of 81 participants, recruited randomly from various age groups (age 10 to 89). They were asked closed-ended questions. Healthcare professionals focus group consisted of 5 pharmacists with extensive experience. They were asked open-ended questions. The collected data from both research stages was statistically and qualitatively analysed in order to define the main problems with medicine packaging design and use. Identified problems were then addressed through the design process. The third stage included development of a standardised design system in accordance with information design theory and cognitive psychology findings.(2) These helped establish the system building blocks/rules: information hierarchy and organization, use of colour, shape and typography.

Results

Analysis of existing conditions clearly exposed the heterogeneity and unsuitability of the majority of medicine packaging design. These caused similar problems to both user groups: trouble finding information (73 %), lack/redundancy of information (47 %/17 %), illegible, unreadable typography (39 %), distracting visual elements (26 %), unclear distinction between medicines (17 %). These lead to various consequences: incorrect route of administration (39 %), consuming/prescribing expired (30 %) or incorrect product (8 %), time loss (8 %). Each of these problems was addressed through establishment of new, highly precise rules in packaging design: regulating hierarchy and typography, introducing visual categorization through symbols and illustrations (information category, pharmaceutical form, ATC group) and color-coding medicine strength. The rules form a standardised system which provides unity, consistency and quality regulation, improving the everyday experience of many people.

Conclusion

The research was carried out as a part of a BA thesis. The execution of the project would require a change in the legislation on state level. It therefore serves as a speculative proposal, aiming to raise questions that are currently not being addressed properly within the industry. The possibility of implementation could be recognized through gradual transformation of individual rules/building blocks of the system into new state regulations or guidelines. Discussion with the industry and the profession has not yet been carried out due to the Covid-19 crisis.

References

1. Heinio RL, RUSKO E, Van der Waarde K. Challenges to read and understand information on pharmaceutical packages [Internet]. 2012 Jun [cited 2020 May 24]; 79 - 85. Available from: https://www.vttresearch.com/sites/default/files/julkaisut/muut/2012/Rusko IAPRI_2012_Manuscript_final.pdf 2. Černe Oven P, Požar C. On Information Design [Internet]. Ljubljana: Muzej za arhitekturo in oblikovanje; 2016. 149 p. Available from: http://www.mao.si/Upload/file/Oninformation-design_e-book-spread.pdfEvaluation of the first pharmacy-led weight management programme in Greece.",2021-03-26 +34122523,Development and Validation of a Hypoxia-Related Signature for Predicting Survival Outcomes in Patients With Bladder Cancer.,"

Objectives

This study aimed to develop and validate a hypoxia signature for predicting survival outcomes in patients with bladder cancer.

Methods

We downloaded the RNA sequence and the clinicopathologic data of the patients with bladder cancer from The Cancer Genome Atlas (TCGA) (https://portal.gdc.cancer.gov/repository?facetTab=files) and the Gene Expression Omnibus (GEO) (https://www.ncbi.nlm.nih.gov/geo/) databases. Hypoxia genes were retrieved from the Molecular Signatures Database (https://www.gsea-msigdb.org/gsea/msigdb/index.jsp). Differentially expressed hypoxia-related genes were screened by univariate Cox regression analysis and Lasso regression analysis. Then, the selected genes constituted the hypoxia signature and were included in multivariate Cox regression to generate the risk scores. After that, we evaluate the predictive performance of this signature by multiple receiver operating characteristic (ROC) curves. The CIBERSORT tool was applied to investigate the relationship between the hypoxia signature and the immune cell infiltration, and the maftool was used to summarize and analyze the mutational data. Gene-set enrichment analysis (GSEA) was used to investigate the related signaling pathways of differentially expressed genes in both risk groups. Furthermore, we developed a model and presented it with a nomogram to predict survival outcomes in patients with bladder cancer.

Results

Eight genes (AKAP12, ALDOB, CASP6, DTNA, HS3ST1, JUN, KDELR3, and STC1) were included in the hypoxia signature. The patients with higher risk scores showed worse overall survival time than the ones with lower risk scores in the training set (TCGA) and two external validation sets (GSE13507 and GSE32548). Immune infiltration analysis showed that two types of immune cells (M0 and M1 macrophages) had a significant infiltration in the high-risk group. Tumor mutation burden (TMB) analysis showed that the risk scores between the wild types and the mutation types of TP53, MUC16, RB1, and FGFR3 were significantly different. Gene-Set Enrichment Analysis (GSEA) showed that immune or cancer-associated pathways belonged to the high-risk groups and metabolism-related signal pathways were enriched into the low-risk group. Finally, we constructed a predictive model with risk score, age, and stage and validated its performance in GEO datasets.

Conclusion

We successfully constructed and validated a novel hypoxia signature in bladder cancer, which could accurately predict patients' prognosis.",2021-05-26 +,ARIMA and NAR based prediction model for time series analysis of COVID-19 cases in India,"In this paper, we have applied the univariate time series model to predict the number of COVID-19 infected cases that can be expected in upcoming days in India. We adopted an Auto-Regressive Integrated Moving Average (ARIMA) model on the data collected from 31st January 2020 to 25th March 2020 and verified it using the data collected from 26th March 2020 to 04th April 2020. A nonlinear autoregressive (NAR) neural network was developed to compare the accuracy of predicted models. The model has been used for daily prediction of COVID-19 cases for next 50 days without any additional intervention. Statistics from various sources, including the Ministry of Health and Family Welfare (MoHFW) and http://covid19india.org/ are used for the study. The results showed an increasing trend in the actual and forecasted numbers of COVID-19 cases with approximately 1500 cases per day, based on available data as on 04th April 2020. The appropriate ARIMA (1,1,0) model was selected based on the Bayesian Information Criteria (BIC) values and the overall highest R2 values of 0.95. The NAR model architecture constitutes ten neurons, which was optimized using the Levenberg-Marquardt optimization training algorithm (LM) with the overall highest R2 values of 0.97.",2020-06-29 +34400449,Does performance at medical school predict success at the Intercollegiate Membership of the Royal College of Surgeons (MRCS) examination? A retrospective cohort study.,"

Background

Identifying predictors of success in postgraduate examinations can help guide the career choices of medical students and may aid early identification of trainees requiring extra support to progress in specialty training. We assessed whether performance on the educational performance measurement (EPM) and situational judgement test (SJT) used for selection into foundation training predicted success at the Membership of the Royal College of Surgeons (MRCS) examination.

Methods

This was a longitudinal, cohort study using data from the UK Medical Education Database (https://www.ukmed.ac.uk). UK medical graduates who had attempted Part A (n=2585) and Part B (n=755) of the MRCS between 2014 and 2017 were included. χ2 and independent t-tests were used to examine the relationship between medical school performance and sociodemographic factors with first-attempt success at MRCS Part A and B. Multivariate logistic regression was employed to identify independent predictors of MRCS performance.

Results

The odds of passing MRCS increased by 55% for Part A (OR 1.55 (95% CI 1.48 to 1.61)) and 23% for Part B (1.23 (1.14 to 1.32)) for every additional EPM decile point gained. For every point awarded for additional degrees in the EPM, candidates were 20% more likely to pass MRCS Part A (1.20 (1.13 to 1.29)) and 17% more likely to pass Part B (1.17 (1.04 to 1.33)). For every point awarded for publications in the EPM, candidates were 14% more likely to pass MRCS Part A (1.14 (1.01 to 1.28)). SJT score was not a statistically significant independent predictor of MRCS success.

Conclusion

This study has demonstrated the EPM's independent predictive power and found that medical school performance deciles are the most significant measure of predicting later success in the MRCS. These findings can be used by medical schools, training boards and workforce planners to inform evidence-based and contemporary selection and assessment strategies.",2021-08-16 +35251694,DBHR: a collection of databases relevant to human research.,"

Background

The achievement of the human genome project provides a basis for the systematic study of the human genome from evolutionary history to disease-specific medicine. With the explosive growth of biological data, a growing number of biological databases are being established to support human-related research.

Objective

The main objective of our study is to store, organize and share data in a structured and searchable manner. In short, we have planned the future development of new features in the database research area.

Materials & methods

In total, we collected and integrated 680 human databases from scientific published work. Multiple options are presented for accessing the data, while original links and short descriptions are also presented for each database.

Results & discussion

We have provided the latest collection of human research databases on a single platform with six categories: DNA database, RNA database, protein database, expression database, pathway database and disease database.

Conclusion

Taken together, our database will be useful for further human research study and will be modified over time. The database has been implemented in PHP, HTML, CSS and MySQL and is available freely at https://habdsk.org/database.php.",2021-01-20 +31584089,PhaSepDB: a database of liquid-liquid phase separation related proteins.,"It's widely appreciated that liquid-liquid phase separation (LLPS) underlies the formation of membraneless organelles, which function to concentrate proteins and nucleic acids. In the past few decades, major efforts have been devoted to identify the phase separation associated proteins and elucidate their functions. To better utilize the knowledge dispersed in published literature, we developed PhaSepDB (http://db.phasep.pro/), a manually curated database of phase separation associated proteins. Currently, PhaSepDB includes 2914 non-redundant proteins localized in different organelles curated from published literature and database. PhaSepDB provides protein summary, publication reference and sequence features of phase separation associated proteins. The sequence features which reflect the LLPS behavior are also available for other human protein candidates. The online database provides a convenient interface for the research community to easily browse, search and download phase separation associated proteins. As a centralized resource, we believe PhaSepDB will facilitate the future study of phase separation.",2020-01-01 +30272209,Updates in Rhea: SPARQLing biochemical reaction data.,"Rhea (http://www.rhea-db.org) is a comprehensive and non-redundant resource of over 11 000 expert-curated biochemical reactions that uses chemical entities from the ChEBI ontology to represent reaction participants. Originally designed as an annotation vocabulary for the UniProt Knowledgebase (UniProtKB), Rhea also provides reaction data for a range of other core knowledgebases and data repositories including ChEBI and MetaboLights. Here we describe recent developments in Rhea, focusing on a new resource description framework representation of Rhea reaction data and an SPARQL endpoint (https://sparql.rhea-db.org/sparql) that provides access to it. We demonstrate how federated queries that combine the Rhea SPARQL endpoint and other SPARQL endpoints such as that of UniProt can provide improved metabolite annotation and support integrative analyses that link the metabolome through the proteome to the transcriptome and genome. These developments will significantly boost the utility of Rhea as a means to link chemistry and biology for a more holistic understanding of biological systems and their function in health and disease.",2019-01-01 +33495705,ResiDB: An automated database manager for sequence data.,"The amount of publicly available DNA sequence data is drastically increasing, making it a tedious task to create sequence databases necessary for the design of diagnostic assays. The selection of appropriate sequences is especially challenging in genes affected by frequent point mutations such as antibiotic resistance genes. To overcome this issue, we have designed the webtool resiDB, a rapid and user-friendly sequence database manager for bacteria, fungi, viruses, protozoa, invertebrates, plants, archaea, environmental and whole genome shotgun sequence data. It automatically identifies and curates sequence clusters to create custom sequence databases based on user-defined input sequences. A collection of helpful visualization tools gives the user the opportunity to easily access, evaluate, edit, and download the newly created database. Consequently, researchers do no longer have to manually manage sequence data retrieval, deal with hardware limitations, and run multiple independent software tools, each having its own requirements, input and output formats. Our tool was developed within the H2020 project FAPIC aiming to develop a single diagnostic assay targeting all sepsis-relevant pathogens and antibiotic resistance mechanisms. ResiDB is freely accessible to all users through https://residb.ait.ac.at/.",2021-01-19 +33868597,GPCards: An integrated database of genotype-phenotype correlations in human genetic diseases.,"Genotype-phenotype correlations are the basis of precision medicine of human genetic diseases. However, it remains a challenge for clinicians and researchers to conveniently access detailed individual-level clinical phenotypic features of patients with various genetic variants. To address this urgent need, we manually searched for genetic studies in PubMed and catalogued 8,309 genetic variants in 1,288 genes from 17,738 patients with detailed clinical phenotypic features from 1,855 publications. Based on genotype-phenotype correlations in this dataset, we developed an user-friendly online database called GPCards (http://genemed.tech/gpcards/), which not only provided the association between genetic diseases and disease genes, but also the prevalence of various clinical phenotypes related to disease genes and the patient-level mapping between these clinical phenotypes and genetic variants. To accelerate the interpretation of genetic variants, we integrated 62 well-known variant-level and gene-level genomic data sources, including functional predictions, allele frequencies in different populations, and disease-related information. Furthermore, GPCards enables automatic analyses of users' own genetic data, comprehensive annotation, prioritization of candidate functional variants, and identification of genotype-phenotype correlations using custom parameters. In conclusion, GPCards is expected to accelerate the interpretation of genotype-phenotype correlations, subtype classification, and candidate gene prioritisation in human genetic diseases.",2021-03-22 +33929018,IBDDB: a manually curated and text-mining-enhanced database of genes involved in inflammatory bowel disease. ,"To date, research on inflammatory bowel disease (IBD, encompassing Crohn's disease and ulcerative colitis), a chronic complex disorder, has generated a large amount of data scattered across published literature (1 06 333) listed in PubMed on 14 October 2020, and no dedicated database currently exists that catalogues information on genes associated with IBD. We aimed to manually curate 289 genes that are experimentally validated to be linked with IBD and its known phenotypes. Furthermore, we have developed an integrated platform providing information about different aspects of these genes by incorporating several resources and an extensive text-mined knowledgebase. The curated IBD database (IBDDB) allows the selective display of collated 34 subject-specific concepts (listed as columns) exportable through a user-friendly IBDDB portal. The information embedded in concepts was acquired via text-mining of PubMed (manually cleaned and curated), accompanied by data-mining from varied resources. The user can also explore different biomedical entities and their co-occurrence with other entities (about one million) from 11 curated dictionaries in the indexed PubMed records. This functionality permits the user to generate and cross-examine a new hypothesis that is otherwise not easy to comprehend by just reading the published abstracts and papers. Users can download required information using various file formats and can display information in the form of networks. To our knowledge, no curated database of IBD-related genes is available so far. IBDDB is free for academic users and can be accessed at https://www.cbrc.kaust.edu.sa/ibd/.",2021-04-01 +,Prospective production of fructose and single cell protein from date palm waste,"Fructose and single cell protein are important products for the food market. Abundant amounts of low-grade dates worldwide are annually wasted. In this study, highly concentrated fructose syrups and single cell protein were obtained through selective fermentation of date extracts by Saccharomyces cerevisiae.The effect of air flow (0.1, 0.5, 0.75, 1, 1.25 and 1.5 vvm) and pH (4.5, 4.8, 5, 5.3 and 5.6) was investigated. Higher air flow led to lower fructose yield. The optimum cell mass production of 10 g/L was achieved at air flow of 1.25 vvm with the fructose yield of 91%. Similar cell mass production was obtained in the range pH of 5.0–5.6, while less cell mass was obtained at pH less than 5. Controlling the pH at 4.5, 5.0 and 5.3 failed to improve the production of cell mass which were 5.6, 5.9 and 5.4 g/L respectively; however, better fructose yield was obtained.Extension of the modified Gompertz enabled excellent predictions of the cell mass, fructose production and fructose fraction. The proposed model was also successfully validated against data from literatures. Thus, the model will be useful for wide application of biological processes.How to cite: Putra MD, Abasaeed AE, Al-Zahrani SM. Prospective production of fructose and single cell protein from date palm waste. Electron J Biotechnol 2020;48. https://doi.org/10.1016/j.ejbt.2020.09.007.",2020-11-01 +31653717,Diverse Traits Contribute to Salinity Tolerance of Wild Tomato Seedlings from the Galapagos Islands.,"Traits of modern crops have been heavily selected in agriculture, leaving commercial lines often more susceptible to harsh conditions compared with their wild relatives. Understanding the mechanisms of stress tolerance in wild relatives can enhance crop performance under stress conditions such as high salinity. In this study, we investigated salinity tolerance of two species of wild tomato endemic to the Galapagos Islands, Solanum cheesmaniae and Solanum galapagense Since these tomatoes grow well despite being constantly splashed with seawater, they represent a valuable genetic resource for improving salinity tolerance in commercial tomatoes. To explore their potential, we recorded over 20 traits reflecting plant growth, physiology, and ion content in 67 accessions and two commercial tomato lines of Solanum lycopersicum. Salt treatments were applied for 10 d using supported hydroponics. The Galapagos tomatoes displayed greater tolerance to salt stress than the commercial lines and showed substantial natural variation in their responses. The accessions LA0317, LA1449, and LA1403 showed particularly high salinity tolerance based on growth under salinity stress. Therefore, Galapagos tomatoes should be further explored to identify the genes underlying their high tolerance and be used as a resource for increasing the salinity tolerance of commercial tomatoes. The generated data, along with useful analysis tools, have been packaged and made publicly available via an interactive online application (https://mmjulkowska.shinyapps.io/La_isla_de_tomato/) to facilitate trait selection and the use of Galapagos tomatoes for the development of salt-tolerant commercial tomatoes.",2019-10-25 +33468221,LightCUD: a program for diagnosing IBD based on human gut microbiome data.,"

Background

The diagnosis of inflammatory bowel disease (IBD) and discrimination between the types of IBD are clinically important. IBD is associated with marked changes in the intestinal microbiota. Advances in next-generation sequencing (NGS) technology and the improved hospital bioinformatics analysis ability motivated us to develop a diagnostic method based on the gut microbiome.

Results

Using a set of whole-genome sequencing (WGS) data from 349 human gut microbiota samples with two types of IBD and healthy controls, we assembled and aligned WGS short reads to obtain feature profiles of strains and genera. The genus and strain profiles were used for the 16S-based and WGS-based diagnostic modules construction respectively. We designed a novel feature selection procedure to select those case-specific features. With these features, we built discrimination models using different machine learning algorithms. The machine learning algorithm LightGBM outperformed other algorithms in this study and thus was chosen as the core algorithm. Specially, we identified two small sets of biomarkers (strains) separately for the WGS-based health vs IBD module and ulcerative colitis vs Crohn's disease module, which contributed to the optimization of model performance during pre-training. We released LightCUD as an IBD diagnostic program built with LightGBM. The high performance has been validated through five-fold cross-validation and using an independent test data set. LightCUD was implemented in Python and packaged free for installation with customized databases. With WGS data or 16S rRNA sequencing data of gut microbiome samples as the input, LightCUD can discriminate IBD from healthy controls with high accuracy and further identify the specific type of IBD. The executable program LightCUD was released in open source with instructions at the webpage http://cqb.pku.edu.cn/ZhuLab/LightCUD/ . The identified strain biomarkers could be used to study the critical factors for disease development and recommend treatments regarding changes in the gut microbial community.

Conclusions

As the first released human gut microbiome-based IBD diagnostic tool, LightCUD demonstrates a high-performance for both WGS and 16S sequencing data. The strains that either identify healthy controls from IBD patients or distinguish the specific type of IBD are expected to be clinically important to serve as biomarkers.",2021-01-19 +33630831,A plasmid DNA-launched SARS-CoV-2 reverse genetics system and coronavirus toolkit for COVID-19 research.,"The recent emergence of Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2), the underlying cause of Coronavirus Disease 2019 (COVID-19), has led to a worldwide pandemic causing substantial morbidity, mortality, and economic devastation. In response, many laboratories have redirected attention to SARS-CoV-2, meaning there is an urgent need for tools that can be used in laboratories unaccustomed to working with coronaviruses. Here we report a range of tools for SARS-CoV-2 research. First, we describe a facile single plasmid SARS-CoV-2 reverse genetics system that is simple to genetically manipulate and can be used to rescue infectious virus through transient transfection (without in vitro transcription or additional expression plasmids). The rescue system is accompanied by our panel of SARS-CoV-2 antibodies (against nearly every viral protein), SARS-CoV-2 clinical isolates, and SARS-CoV-2 permissive cell lines, which are all openly available to the scientific community. Using these tools, we demonstrate here that the controversial ORF10 protein is expressed in infected cells. Furthermore, we show that the promising repurposed antiviral activity of apilimod is dependent on TMPRSS2 expression. Altogether, our SARS-CoV-2 toolkit, which can be directly accessed via our website at https://mrcppu-covid.bio/, constitutes a resource with considerable potential to advance COVID-19 vaccine design, drug testing, and discovery science.",2021-02-25 +35091331,"Imaging Mass Spectrometry (IMS) for drug discovery and development survey: Results on methods, applications and regulatory compliance.","Imaging mass spectrometry (IMS) is increasingly used for drug discovery and development to understand target enagement, tissue distribution, drug toxicity, and disease mechanisms, etc. However, this is still a relatively new technique that requires further development validation before it will be an acceptable technique to support regulated development of new drugs. Thus, best practices will need to be established to build more confidence and gain wider acceptance by the scientific community, pharmaceutical industry, and regulatory authorities. The Imaging Mass Spectrometry Society (IMSS) and the Japan Association for Imaging Mass Spectrometry (JAIMS) have conducted a thorough survey to gather information on the current state of IMS and to identify key issues. The survey was sent to researchers or managers in the position who are currently using IMS techniques in support of their drug discovery and development efforts and/or who plan to use such tools as best practices are established. The survey probes questions related to details regarding technical aspects of IMS, which includes data acquisition, data analysis and quantitation, data integrity, reporting, applications, and regulatory concerns. This international survey was conducted online through the Survey Monkey (https://www.surveymonkey.com) in both English and Japanese from September 14 through September 30, 2020.",2021-12-16 +33245774,"PAGER-CoV: a comprehensive collection of pathways, annotated gene-lists and gene signatures for coronavirus disease studies.","PAGER-CoV (http://discovery.informatics.uab.edu/PAGER-CoV/) is a new web-based database that can help biomedical researchers interpret coronavirus-related functional genomic study results in the context of curated knowledge of host viral infection, inflammatory response, organ damage, and tissue repair. The new database consists of 11 835 PAGs (Pathways, Annotated gene-lists, or Gene signatures) from 33 public data sources. Through the web user interface, users can search by a query gene or a query term and retrieve significantly matched PAGs with all the curated information. Users can navigate from a PAG of interest to other related PAGs through either shared PAG-to-PAG co-membership relationships or PAG-to-PAG regulatory relationships, totaling 19 996 993. Users can also retrieve enriched PAGs from an input list of COVID-19 functional study result genes, customize the search data sources, and export all results for subsequent offline data analysis. In a case study, we performed a gene set enrichment analysis (GSEA) of a COVID-19 RNA-seq data set from the Gene Expression Omnibus database. Compared with the results using the standard PAGER database, PAGER-CoV allows for more sensitive matching of known immune-related gene signatures. We expect PAGER-CoV to be invaluable for biomedical researchers to find molecular biology mechanisms and tailored therapeutics to treat COVID-19 patients.",2021-01-01 +35111182,The Grape Gene Reference Catalogue as a Standard Resource for Gene Selection and Genetic Improvement.,"Effective crop improvement, whether through selective breeding or biotech strategies, is largely dependent on the cumulative knowledge of a species' pangenome and its containing genes. Acquiring this knowledge is specially challenging in grapevine, one of the oldest fruit crops grown worldwide, which is known to have more than 30,000 genes. Well-established research communities studying model organisms have created and maintained, through public and private funds, a diverse range of online tools and databases serving as repositories of genomes and gene function data. The lack of such resources for the non-model, but economically important, Vitis vinifera species has driven the need for a standardised collection of genes within the grapevine community. In an effort led by the Integrape COST Action CA17111, we have recently developed the first grape gene reference catalogue, where genes are ascribed to functional data, including their accession identifiers from different genome-annotation versions (https://integrape.eu/resources/genes-genomes/). We present and discuss this gene repository together with a validation-level scheme based on varied supporting evidence found in current literature. The catalogue structure and online submission form provided permits community curation. Finally, we present the Gene Cards tool, developed within the Vitis Visualization (VitViz) platform, to visualize the data collected in the catalogue and link gene function with tissue-specific expression derived from public transcriptomic data. This perspective article aims to present these resources to the community as well as highlight their potential use, in particular for plant-breeding applications.",2021-01-01 +34019885,Interventions to decrease complications after shoulder dystocia: a systematic review and Bayesian meta-analysis.,"

Objective

This study aimed to evaluate the outcomes associated with the implementation of simulation exercises to reduce the sequela of shoulder dystocia.

Data sources

Electronic databases (Ovid MEDLINE, Embase, the Cumulative Index to Nursing and Allied Health Literature database, and Scopus) were initially queried in June 2020 and updated in November 2020. The following 3 concepts were introduced and refined using the controlled vocabulary of the database: vaginal birth, shoulder dystocia, and simulation training. There were no limitations to the year of publication as part of the search strategy.

Study eligibility criteria

We included all studies that reported on the frequency of shoulder dystocia and the associated complications before and after the implementation of interventional exercises to improve outcomes.

Methods

Two authors independently assessed the abstracts and full-text articles of all studies for eligibility and evaluated the quality of the included studies using the Newcastle-Ottawa Scale. Any inconsistencies related to study evaluation or data extraction were resolved by a third author. The coprimary outcomes of this systematic review and meta-analysis were neonatal brachial plexus palsy diagnosed following deliveries complicated by shoulder dystocia and persistence of brachial palsy at 12 months or later. The secondary outcomes were the frequency of shoulder dystocia and cesarean delivery. Study effects were combined using a Bayesian meta-analysis and were reported as risk ratios and 95% credible intervals (Crs).

Results

Of the 372 articles reviewed, 16 publications, which included 428,552 deliveries with 217,713 (50.8%) deliveries during the preintervention and 210,839 (49.2%) deliveries during the postinterventional period, were included in the meta-analysis. The incidence of neonatal brachial plexus palsy after shoulder dystocia decreased from 12.1% to 5.7% (risk ratio, 0.37; 95% Cr, 0.26-0.57; probability of reduction 100%). The overall proportion of neonatal brachial plexus palsy decreased, but with less precision, from 0.3% to 0.1% (risk ratio, 0.53; 95% Cr, 0.21-1.26; probability of reduction 94%). Two studies followed newborns with brachial plexus palsy for at least 12 months. One study that reported on persistent neonatal brachial plexus palsy at 12 months among 1148 shoulder dystocia cases noted a reduction in persistent neonatal brachial plexus palsy from 1.9% to 0.2% of shoulder dystocia cases (risk ratio, 0.13; 95% confidence interval, 0.04-0.49). In contrast, the study that reported on persistent neonatal brachial plexus palsy at 12 months for all deliveries noted that it did not change significantly, namely from 0.3 to 0.2 per 1000 births (risk ratio, 0.77; 95% confidence interval, 0.31-1.90). Following the implementation of shoulder dystocia interventional exercises, the diagnosis of shoulder dystocia increased significantly from 1.2% to 1.7% of vaginal deliveries (risk ratio, 1.39; 95% Cr, 1.19-1.65; probability of increase 100%). Compared with the preimplementation period, the cesarean delivery rate increased postimplementation from 21.2% to 25.9% (risk ratio, 1.22; 95% Cr, 0.93-1.59; probability of increase 93%). We created an online tool (https://ccrebm-bell.shinyapps.io/sdmeta/) that permits calculation of the absolute risk reduction and absolute risk increase attributable to the intervention vis-à-vis the incidence of shoulder dystocia, neonatal brachial plexus palsy, and cesarean deliveries.

Conclusion

Introduction of shoulder dystocia interventional exercises decreased the rate of neonatal brachial plexus palsy per shoulder dystocia case; the data on persistence of neonatal brachial plexus palsy beyond 12 months is limited and contradictory. Implementation of the interventions was associated with an increase in the diagnosis of shoulder dystocia and rate of cesarean deliveries.",2021-05-18 +30124853,Data Resource Profile: The China National Health Survey (CNHS).,"The China National Health Survey (CNHS) is the first nationwide multi-ethnic cross-sectional interview and health examination conducted from 2012 to 2017. The survey is designed to study reference intervals for physiological constants as well as determinants of noncommunicable diseases among different ethnic populations in different areas, so that the data can be used to enhance clinical diagnosis strategies and health promotion. CNHS used a stratified, multistage cluster sampling method to obtain a sample of 53 895 people aged 20-80 years in 10 ethnic groups from 11 provinces or autonomous regions all over China. Blood samples were collected from each participant for the establishment of the China Multi-Ethnic Biobank (CMEB). CNHS collected data on demographic and socioeconomic information, lifestyle factors, anthropometric measures, laboratory tests and clinical profiles. These data provide a comprehensive resource for further study on risk factors of noncommunicable disease among different ethnic groups. Information about the CNHS database, including publication list, introduction of the survey design and methods, and guidelines for submitting electronic forms of data application, is available at [http://www.bmicc.cn/web/share/home].",2018-12-01 +34194678,WADDAICA: A webserver for aiding protein drug design by artificial intelligence and classical algorithm.,"Artificial intelligence can train the related known drug data into deep learning models for drug design, while classical algorithms can design drugs through established and predefined procedures. Both deep learning and classical algorithms have their merits for drug design. Here, the webserver WADDAICA is built to employ the advantage of deep learning model and classical algorithms for drug design. The WADDAICA mainly contains two modules. In the first module, WADDAICA provides deep learning models for scaffold hopping of compounds to modify or design new novel drugs. The deep learning model which is used in WADDAICA shows a good scoring power based on the PDBbind database. In the second module, WADDAICA supplies functions for modifying or designing new novel drugs by classical algorithms. WADDAICA shows better Pearson and Spearman correlations of binding affinity than Autodock Vina that is considered to have the best scoring power. Besides, WADDAICA supplies a friendly and convenient web interface for users to submit drug design jobs. We believe that WADDAICA is a useful and effective tool to help researchers to modify or design novel drugs by deep learning models and classical algorithms. WADDAICA is free and accessible at https://bqflab.github.io or https://heisenberg.ucam.edu:5000.",2021-06-14 +34591514,"""On the role of (implicit) drinking self-identity in alcohol use and problematic drinking: A comparison of five measures."" Correction to Cummins, Lindgren, and De Houwer (2020).","Reports an error in ""On the role of (implicit) drinking self-identity in alcohol use and problematic drinking: A comparison of five measures"" by Jamie Cummins, Kristen P. Lindgren and Jan De Houwer (Psychology of Addictive Behaviors, 2021[Jun], Vol 35[4], 458-471). In the article (https://doi.org/10 .1037/adb0000643), the Open Data and Open Materials badges were omitted in error from the advance online publication version of this article. (The following abstract of the original article appeared in record 2020-80846-001.) Objective: Implicit and explicit drinking self-identity appear to be useful in predicting alcohol-related outcomes. However, there are several different implicit and explicit measures which can be used to assess drinking self-identity. Some of these implicit measures can also capture relational information (e.g., I am a drinker, I should be a drinker), which might provide unique advantages. Despite the importance of having good measures of drinking self-identity, to date there has been little direct comparison of these measures. Method: This study (N = 358) systematically compared two commonly used measures of drinking self-identity (one implicit and one explicit: the Implicit Association Test [IAT] and the Alcohol Self-Concept Scale [ASCS]) with three relational measures of implicit self-identity (the autobiographical IAT [aIAT], the Relational Responding Task [RRT], and the Propositional Concealed Information Test [pCIT]) on a range of criteria relevant to experimental and clinical alcohol researchers. Results: Overall, we found mixed performances on the implicit measures. Interestingly, the aIAT, which probed should-based drinking identity, performed better than the standard IAT. However, the explicit measure exhibited superior performance to all other measures across all criteria. Conclusions: Our results suggest that researchers who wish to assess drinking-related self-identity and to predict alcohol-related outcomes cross-sectionally should set their focus primarily on the use (and further development) of the ASCS, rather than any of the implicit measures. Future research focusing on the ASCS should seek to investigate the generalizability of our findings to patient populations, and incorporate relational information within that procedure to further improve upon its already-strong utility. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-09-01 +34516507,Postoperative complications observed with robotic versus laparoscopic surgery for the treatment of rectal cancer: An updated meta-analysis of recently published studies.,"

Background

This is an updated meta-analysis comparing the postoperative complications observed with robotic versus laparoscopic surgery (LS) for the treatment of rectal cancer.

Methods

Cochrane central, MEDLNE (Medical Literature Analysis and Retrieval System Online), EMBASE (Excerpta Medica dataBASE), Google Scholar, Web of Science and http://www.ClinicalTrials.gov were searched for studies (published after the year 2015), comparing robotic versus LS for the treatment of rectal cancer. The postoperative outcomes were considered as the endpoints in this analysis. RevMan 5.4 was used to carry out the statistical analysis. Risk ratio (RR) with 95% confidence intervals (CI) were used to represent the results following data analysis.

Results

A total number of 22,744 participants were included in this study whereby 9178 participants were assigned to the robotic surgery and 13,566 participants were assigned to the LS group. The time period of patients' enrollment varied from years 2007 to 2017. Our results showed that overall complications (RR: 0.91, 95% CI: 0.71-1.17; P = .45), wound complications (RR: 0.81, 95% CI: 0.64-1.04; P = .09), anastomotic leak (RR: 1.12, 95% CI: 0.88-1.42; P = .37), anastomotic bleeding (RR: 0.88, 95% CI: 0.29-2.64; P = .82), stoma-related complications (RR: 0.88, 95% CI: 0.24-3.21; P = .85), intra-abdominal abscess (RR: 0.53. 95% CI: 0.22-1.31; P = .17), urinary tract infection (RR: 0.94, 95% CI: 0.53-1.66; P = .83), enterocolitis (RR: 1.35, 95% CI: 0.38-4.71; P = .64), reoperation (RR: 0.85, 95% CI: 0.46-1.54; P = .58), and mortality (RR: 0.75, 95% CI: 0.34-1.62; P = .46) were not significantly different between robotic-assisted versus LS for rectal cancer. Postoperative ileus (RR: 1.21, 95% CI: 0.81-1.81; P = .34), readmission (RR: 1.17, 95% CI: 0.75-1.83; P = .48), and urinary retention (RR: 0.51, 95% CI: 0.21-1.23; P = .14) were also similarly manifested.

Conclusions

In this updated meta-analysis, both robotic and laparoscopic surgeries were equally effective for the treatment of rectal cancer. Similar postoperative complications were observed. However, our analysis was restricted only to postoperative outcomes, parameters such as duration of surgery were not taken into consideration.",2021-09-01 +32818254,DREIMT: a drug repositioning database and prioritization tool for immunomodulation.,"

Motivation

Drug immunomodulation modifies the response of the immune system and can be therapeutically exploited in pathologies such as cancer and autoimmune diseases.

Results

DREIMT is a new hypothesis-generation web tool, which performs drug prioritization analysis for immunomodulation. DREIMT provides significant immunomodulatory drugs targeting up to 70 immune cells subtypes through a curated database that integrates 4960 drug profiles and ∼2600 immune gene expression signatures. The tool also suggests potential immunomodulatory drugs targeting user-supplied gene expression signatures. Final output includes drug-signature association scores, FDRs and downloadable plots and results tables.

Availabilityand implementation

http://www.dreimt.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +31106365,Pergola-web: a web server for the visualization and analysis of longitudinal behavioral data using repurposed genomics tools and standards.,"We present a new web application to query and visualize time-series behavioral data: the Pergola web-server. This server provides a user-friendly interface for exploring longitudinal behavioral data taking advantage of the Pergola Python library. Using the server, users can process the data applying some basic operations, such as binning or grouping, while formatting the data into existing genomic formats. Thanks to this repurposing of genomics standards, the application automatically renders an interactive data visualization based on sophisticated genome visualization tools. Our tool allows behavioral scientists to share, display and navigate complex behavioral data comprising multiple individuals and multiple data types, in a scalable and flexible manner. A download option allows for further analysis using genomic tools. The server can be a great resource for the field in a time where behavioral science is entering a data-intensive cycle thanks to high-throughput behavioral phenotyping platforms. Pergola is publicly available at http://pergola.crg.eu/.",2019-07-01 +30380106,OrthoInspector 3.0: open portal for comparative genomics.,"OrthoInspector is one of the leading software suites for orthology relations inference. In this paper, we describe a major redesign of the OrthoInspector online resource along with a significant increase in the number of species: 4753 organisms are now covered across the three domains of life, making OrthoInspector the most exhaustive orthology resource to date in terms of covered species (excluding viruses). The new website integrates original data exploration and visualization tools in an ergonomic interface. Distributions of protein orthologs are represented by heatmaps summarizing their evolutionary histories, and proteins with similar profiles can be directly accessed. Two novel tools have been implemented for comparative genomics: a phylogenetic profile search that can be used to find proteins with a specific presence-absence profile and investigate their functions and, inversely, a GO profiling tool aimed at deciphering evolutionary histories of molecular functions, processes or cell components. In addition to the re-designed website, the OrthoInspector resource now provides a REST interface for programmatic access. OrthoInspector 3.0 is available at http://lbgi.fr/orthoinspectorv3.",2019-01-01 +34908108,RNAglib: A python package for RNA 2.5D graphs. ,"RNA 3D architectures are stabilized by sophisticated networks of (non-canonical) base pair interactions, which can be conveniently encoded as multi-relational graphs and efficiently exploited by graph theoretical approaches and recent progresses in machine learning techniques. RNAglib is a library that eases the use of this representation, by providing clean data, methods to load it in machine learning pipelines and graph-based deep learning models suited for this representation. RNAglib also offers other utilities to model RNA with 2.5D graphs, such as drawing tools, comparison functions or baseline performances on RNA applications. The method is distributed as a pip package, RNAglib. The source code, data, and documentation is available at https://rnaglib.cs.mcgill.ca.",2021-12-15 +31598703,CancerGeneNet: linking driver genes to cancer hallmarks.,"CancerGeneNet (https://signor.uniroma2.it/CancerGeneNet/) is a resource that links genes that are frequently mutated in cancers to cancer phenotypes. The resource takes advantage of a curation effort aimed at embedding a large fraction of the gene products that are found altered in cancer cells into a network of causal protein relationships. Graph algorithms, in turn, allow to infer likely paths of causal interactions linking cancer associated genes to cancer phenotypes thus offering a rational framework for the design of strategies to revert disease phenotypes. CancerGeneNet bridges two interaction layers by connecting proteins whose activities are affected by cancer drivers to proteins that impact on the 'hallmarks of cancer'. In addition, CancerGeneNet annotates curated pathways that are relevant to rationalize the pathological consequences of cancer driver mutations in selected common cancers and 'MiniPathways' illustrating regulatory circuits that are frequently altered in different cancers.",2020-01-01 +,The use of Facebook in a community pharmacist-led weight management programme – a London-based proof of concept study,"Abstract

Introduction

In the United Kingdom (UK), 63% of adults are overweight,(1) costing the NHS £6.1 billion/year. With the public using digital technology over healthcare professionals (HCPs) for health advice, this warrants an investigation of technology use in community pharmacy, given its previous successful use.(2)

Aim

To determine the feasibility and perceptions of a community pharmacist (CP)-led weight management programme (WMP), enhanced by a Facebook support group (FSG).

Methods

A proof of concept study was conducted between January-March 2020. Recruitment was via a pharmacy, the university and a community Facebook group. Inclusion criteria: over 18 years; overweight; no medical conditions. Participants attended face-to-face meetings (ftf) with a CP and final year pharmacy student (PS) on two occasions (0 (baseline) and 4 weeks). At baseline, participants were given the NHS weight loss programme and set weight loss goals. During ftf, participants had height, weight, and waist circumference (WC) measurements by a CP/PS and discussed eating habits, exercise and alcohol. In between ftf, participants accessed the FSG (created (December 2019) and moderated by a CP). Here, they received posts about diet, exercise and motivation. Participants were to have their measurements taken ftf at 8-weeks, however, COVID-19 meant participants had to self-declare these via video call. Following the 8-week programme, participants completed a 4-section survey about their experience (signing up to the service; comparison to previous weight loss attempts; the FSG and overall perceptions). Question types included multiple choice, Likert scale and free text comments. Data were analysed in Excel (Microsoft Corporation 2016) with changes in height, weight, waist circumference, alcohol and exercise being calculated.

Results

Fifty-five participants were recruited. 18 were lost to follow-up, most (n=12/18) citing COVID-19. Of the 37 participants remaining (70.3% female, mean age=37 years), 22 were obese, the rest overweight. Mean weight loss, mean percentage weight loss and mean WC reduction at 4-weeks was 1.6 kg (SD+/- 1.7 kg), 1.8% (SD+/- 1.9%) and 2 cm (SD+/- 1.96 cm) respectively. At week 8 measurements were self-declared. Mean weight loss at 8-weeks from baseline was 2.7 kg (SD +/- 2.6 kg) and mean percentage weight loss was 3% (SD+/- 3%). Only five participants’ self-declared WC measurements at 8-weeks with mean reduction being 3.6 cm. Five participants moved to healthier BMI classifications by week 8. All participants accessed the FSG at least weekly with 13 accessing it daily. Diet posts were the most popular (n=20/37). Participants learned about portion control and increasing fruits/vegetables intake. All participants would recommend the programme to their friends/family.

Conclusion

An 8-week CPWMP, enhanced with FSG, supported participants to lose a mean of 3% body weight. Participants accessed the page regularly and were positive about its usefulness. One limitation was that the COVID-19 lockdown prevented the 8-week ftf, therefore, self-declared measurements were used. The pandemic has highlighted the importance of pharmacy embracing technology for service delivery, particularly when in-person contact is limited. The implication of this study is that it provides proof that the concept of digital service delivery could work in practice.

References

1. GOV.UK. Tackling obesity: empowering adults and children to live healthier lives [Internet]. Department of Health and Social Care. 2020 [cited 2020 Aug 18]. Available from: https://www.gov.uk/government/publications/tackling-obesity-government-strategy/tackling-obesity-empowering-adults-and-children-to-live-healthier-lives 2. Crilly P, Kayyali R. A Systematic Review of Randomized Controlled Trials of Telehealth and Digital Technology Use by Community Pharmacists to Improve Public Health. Pharmacy 2020;8(3):137. Available from: https://www.mdpi.com/2226–4787/8/3/137",2021-03-26 +33331653,DATAMAN: A global database of nitrous oxide and ammonia emission factors for excreta deposited by livestock and land-applied manure.,"Nitrous oxide (N2 O), ammonia (NH3 ), and methane (CH4 ) emissions from the manure management chain of livestock production systems are important contributors to greenhouse gases (GHGs) and NH3 emitted by human activities. Several studies have evaluated manure-related emissions and associated key variables at regional, national, or continental scales. However, there have been few studies focusing on the drivers of these emissions using a global dataset. An international project was created (DATAMAN) to develop a global database on GHG and NH3 emissions from the manure management chain (housing, storage, and field) to identify key variables influencing emissions and ultimately to refine emission factors (EFs) for future national GHG inventories and NH3 emission reporting. This paper describes the ""field"" database that focuses on N2 O and NH3 EFs from land-applied manure and excreta deposited by grazing livestock. We collated relevant information (EFs, manure characteristics, soil properties, and climatic conditions) from published peer-reviewed research, conference papers, and existing databases. The database, containing 5,632 observations compiled from 184 studies, was relatively evenly split between N2 O and NH3 (56 and 44% of the EF values, respectively). The N2 O data were derived from studies conducted in 21 countries on five continents, with New Zealand, the United Kingdom, Kenya, and Brazil representing 86% of the data. The NH3 data originated from studies conducted in 17 countries on four continents, with the United Kingdom, Denmark, Canada, and The Netherlands representing 79% of the data. Wet temperate climates represented 90% of the total database. The DATAMAN field database is available at http://www.dataman.co.nz.",2021-01-22 +30813887,mtProtEvol: the resource presenting molecular evolution analysis of proteins involved in the function of Vertebrate mitochondria.,"BACKGROUND:Heterotachy is the variation in the evolutionary rate of aligned sites in different parts of the phylogenetic tree. It occurs mainly due to epistatic interactions among the substitutions, which are highly complex and make it difficult to study protein evolution. The vast majority of computational evolutionary approaches for studying these epistatic interactions or their evolutionary consequences in proteins require high computational time. However, recently, it has been shown that the evolution of residue solvent accessibility (RSA) is tightly linked with changes in protein fitness and intra-protein epistatic interactions. This provides a computationally fast alternative, based on comparison of evolutionary rates of amino acid replacements with the rates of RSA evolutionary changes in order to recognize any shifts in epistatic interaction. RESULTS:Based on RSA information, data randomization and phylogenetic approaches, we constructed a software pipeline, which can be used to analyze the evolutionary consequences of intra-protein epistatic interactions with relatively low computational time. We analyzed the evolution of 512 protein families tightly linked to mitochondrial function in Vertebrates and created ""mtProtEvol"", the web resource with data on protein evolution. In strict agreement with lifespan and metabolic rate data, we demonstrated that different functional categories of mitochondria-related proteins subjected to selection on accelerated and decelerated RSA rates in rodents and primates. For example, accelerated RSA evolution in rodents has been shown for Krebs cycle enzymes, respiratory chain and reactive oxygen species metabolism, while in primates these functions are stress-response, translation and mtDNA integrity. Decelerated RSA evolution in rodents has been demonstrated for translational machinery and oxidative stress response components. CONCLUSIONS:mtProtEvol is an interactive resource focused on evolutionary analysis of epistatic interactions in protein families involved in Vertebrata mitochondria function and available at http://bioinfodbs.kantiana.ru/mtProtEvol /. This resource and the devised software pipeline may be useful tool for researchers in area of protein evolution.",2019-02-26 +34709870,"Successful, Easy to Access, Online Publication of COVID-19 Data During the Pandemic, New York City, 2020.","Making public health data easier to access, understand, and use makes it more likely that the data will be influential. Throughout the COVID-19 pandemic, the New York City (NYC) Department of Health and Mental Hygiene's Web-based data communication became a cornerstone of NYC's response and allowed the public, journalists, and researchers to access and understand the data in a way that supported the pandemic response and brought attention to the deeply unequal patterns of COVID-19's morbidity and mortality in NYC. (Am J Public Health. 2021;111(S3):S193-S196. https://doi.org/10.2105/AJPH.2021.306446).",2021-10-01 +34510756,Advanced data preprocessing for comprehensive two-dimensional gas chromatography with vacuum ultraviolet spectroscopy detection.,"Comprehensive two-dimensional gas chromatography with vacuum ultraviolet detection results in sizable data for which noise and baseline drift ought to be corrected. As the data is acquired from multiple channels, preprocessing steps have to be applied to the data from all channels while being robust and rather fast with respect to the significant size of the data. In this study, we have described advanced data preprocessing techniques for such data which were not available in the existing commercial software solutions and which were dedicated primarily to noise and baseline correction. Noise reduction was performed on both the spectral and the time dimension. For the baseline correction, a morphological approach based on iterated convolutions and rectifier operations was proposed. On the spectral dimension, much less noisy and reliable spectra were obtained. From a quantitative point of view, mentioned preprocessing steps significantly improved the signal-to-noise ratio for the analyte detection (circa six times in this study). These preprocessing methods were integrated into the plugim! platform (https://www.plugim.fr/).",2021-09-24 +34363073,Male Infertility Knowledgebase: decoding the genetic and disease landscape. ,"Male infertility is a multifactorial condition that contributes to around one-third of cases of infertility worldwide. Several chromosomal aberrations, single-gene and polygenic associations with male factor defects have been reported. These defects manifest as sperm number or sperm quality defects leading to infertility. However, in almost 40% of cases, the genetic etiology of male infertility remains unexplained. Understanding the causal genetic factors is crucial for effective patient management and counseling. Integrating the vast amount of available omics data on male infertility is a first step towards understanding, delineating and prioritizing genes associated with the different male reproductive disorders. The Male Infertility Knowledgebase (MIK) is a manually curated repository developed to boost research on the elusive genetic etiology of male infertility. It integrates information on ∼17 000 genes, their associated pathways, gene ontology, diseases and gene and sequence-based analysis tools. In addition, it also incorporates information on reported chromosomal aberrations and syndromic associations with male infertility. Disease enrichment of genes in MIK indicate a shared genetic etiology between cancer, male and female infertility disorders. While the genes involved in cancer pathways were found to be common causal factors for sperm number and sperm quality defects, the interleukin pathways were found to be shared and enriched between male factor defects and non-reproductive conditions like cardiovascular diseases, metabolic diseases, etc. Disease information in MIK can be explored further to identify high-risk conditions associated with male infertility and delineate shared genetic etiology. Utility of the knowledgebase in predicting novel genes is illustrated by identification of 149 novel candidates for cryptorchidism using gene prioritization and network analysis. MIK will serve as a platform for review of genetic information on male infertility, identification pleiotropic genes, prediction of novel candidate genes for the different male infertility diseases and for portending future high-risk diseases associated with male infertility. Database URL: http://mik.bicnirrh.res.in/.",2021-08-01 +33426873,BBPpred: Sequence-Based Prediction of Blood-Brain Barrier Peptides with Feature Representation Learning and Logistic Regression.,"Blood-brain barrier peptides (BBPs) have a large range of biomedical applications since they can cross the blood-brain barrier based on different mechanisms. As experimental methods for the identification of BBPs are laborious and expensive, computational approaches are necessary to be developed for predicting BBPs. In this work, we describe a computational method, BBPpred (blood-brain barrier peptides prediction), that can efficiently identify BBPs using logistic regression. We investigate a wide variety of features from amino acid sequence information, and then a feature learning method is adopted to represent the informative features. To improve the prediction performance, seven informative features are selected for classification by eliminating redundant and irrelevant features. In addition, we specifically create two benchmark data sets (training and independent test), which contain a total of 119 BBPs from public databases and the literature. On the training data set, BBPpred shows promising performances with an AUC score of 0.8764 and an AUPR score of 0.8757 using the 10-fold cross-validation. We also test our new method on the independent test data set and obtain a favorable performance. We envision that BBPpred will be a useful tool for identifying, annotating, and characterizing BBPs. BBPpred is freely available at http://BBPpred.xialab.info.",2021-01-11 +34878880,The Impacts of the COVID-19 Pandemic on the Medical Expenditure Panel Survey.,"The COVID-19 pandemic caused substantial disruptions in the field operations of all 3 major components of the Medical Expenditure Panel Survey (MEPS). The MEPS is widely used to study how policy changes and major shocks, such as the COVID-19 pandemic, affect insurance coverage, access, and preventive and other health care utilization and how these relate to population health. We describe how the MEPS program successfully responded to these challenges by reengineering field operations, including survey modes, to complete data collection and maintain data release schedules. The impact of the pandemic on response rates varied considerably across the MEPS. Investigations to date show little effect on the quality of data collected. However, lower response rates may reduce the statistical precision of some estimates. We also describe several enhancements made to the MEPS that will allow researchers to better understand the impact of the pandemic on US residents, employers, and the US health care system. (Am J Public Health. 2021;111(12):2157-2166. https://doi.org/10.2105/AJPH.2021.306534).",2021-12-01 +31830251,FFLtool: a web server for transcription factor and miRNA feed forward loop analysis in human.,"SUMMARY:Transcription factors (TFs) and microRNAs (miRNAs) are two kinds of important regulators for transcriptional and post-transcriptional regulations. Understanding cross-talks between the two regulators and their targets is critical to reveal complex molecular regulatory mechanisms. Here, we developed FFLtool, a web server for detecting potential feed forward loop (FFL) of TF-miRNA-target regulation in human. In FFLtool, we integrated comprehensive regulations of TF-target and miRNA-target, and developed two functional modules: (i) The 'FFL Analysis' module can detect potential FFLs and internal regulatory networks in a user-defined gene set. FFLtool also provides three levels of evidence to illustrate the reliability for each FFL and enrichment functions for co-target genes of the same TF and miRNA; (ii) The 'Browse FFLs' module displays FFLs comprised of differentially or specifically expressed TFs and miRNAs and their target genes in cancers. FFLtool is a valuable resource for investigating gene expression regulation and mechanism study in biological processes and diseases. AVAILABILITY AND IMPLEMENTATION:FFLtool is available on http://bioinfo.life.hust.edu.cn/FFLtool/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-04-01 +34014674,The SistematX Web Portal of Natural Products: An Update.,"Natural products and their secondary metabolites are promising starting points for the development of drug prototypes and new drugs, as many current treatments for numerous diseases are directly or indirectly related to such compounds. State-of-the-art, curated, integrated, and frequently updated databases of secondary metabolites are thus highly relevant to drug discovery. The SistematX Web Portal, introduced in 2018, is undergoing development to address this need and documents crucial information about plant secondary metabolites, including the exact location of the species from which the compounds were isolated. SistematX also allows registered users to log in to the data management area and gain access to administrative pages. This study reports recent updates and modifications to the SistematX Web Portal, including a batch download option, the generation and visualization of 1H and 13C nuclear magnetic resonance spectra, and the calculation of physicochemical (drug-like and lead-like) properties and biological activity profiles. The SistematX Web Portal is freely available at http://sistematx.ufpb.br.",2021-05-20 +31210271,Chickspress: a resource for chicken gene expression. ,"High-throughput sequencing and proteomics technologies are markedly increasing the amount of RNA and peptide data that are available to researchers, which are typically made publicly available via data repositories such as the NCBI Sequence Read Archive and proteome archives, respectively. These data sets contain valuable information about when and where gene products are expressed, but this information is not readily obtainable from archived data sets. Here we report Chickspress (http://geneatlas.arl.arizona.edu), the first publicly available gene expression resource for chicken tissues. Since there is no single source of chicken gene models, Chickspress incorporates both NCBI and Ensembl gene models and links these gene sets with experimental gene expression data and QTL information. By linking gene models from both NCBI and Ensembl gene prediction pipelines, researchers can, for the first time, easily compare gene models from each of these prediction workflows to available experimental data for these products. We use Chickspress data to show the differences between these gene annotation pipelines. Chickspress also provides rapid search, visualization and download capacity for chicken gene sets based upon tissue type, developmental stage and experiment type. This first Chickspress release contains 161 gene expression data sets, including expression of mRNAs, miRNAs, proteins and peptides. We provide several examples demonstrating how researchers may use this resource.",2019-01-01 +33382884,HeartBioPortal2.0: new developments and updates for genetic ancestry and cardiometabolic quantitative traits in diverse human populations. ,"Cardiovascular disease (CVD) is the leading cause of death worldwide for all genders and across most racial and ethnic groups. However, different races and ethnicities exhibit different rates of CVD and its related cardiorenal and metabolic comorbidities, suggesting differences in genetic predisposition and risk of onset, as well as socioeconomic and lifestyle factors (diet, exercise, etc.) that act upon an individual's unique underlying genetic background. Here, we present HeartBioPortal2.0, a major update to HeartBioPortal, the world's largest CVD genetics data precision medicine platform for harmonized CVD-relevant genetic variants, which now enables search and analysis of human genetic information related to heart disease across ethnically diverse populations and cardiovascular/renal/metabolic quantitative traits pertinent to CVD pathophysiology. HeartBioPortal2.0 is structured as a cloud-based computing platform and knowledge portal that consolidates a multitude of CVD-relevant genomic data modalities into a single powerful query and browsing interface between data and user via a user-friendly web application publicly available to the scientific research community. Since its initial release, HeartBioPortal2.0 has added new cardiovascular/renal/metabolic disease-relevant gene expression data as well as genetic association data from numerous large-scale genome-wide association study consortiums such as CARDIoGRAMplusC4D, TOPMed, FinnGen, AFGen, MESA, MEGASTROKE, UK Biobank, CHARGE, Biobank Japan and MyCode, among other studies. In addition, HeartBioPortal2.0 now includes support for quantitative traits and ethnically diverse populations, allowing users to investigate the shared genetic architecture of any gene or its variants across the continuous cardiometabolic spectrum from health (e.g. blood pressure traits) to disease (e.g. hypertension), facilitating the understanding of CVD trait genetics that inform health-to-disease transitions and endophenotypes. Custom visualizations in the new and improved user interface, including performance enhancements and new security features such as user authentication, collectively re-imagine HeartBioPortal's user experience and provide a data commons that co-locates data, storage and computing infrastructure in the context of studying the genetic basis behind the leading cause of global mortality. Database URL: https://www.heartbioportal.com/.",2020-12-01 +33683565,"Health and longevity studies in C. elegans: the ""healthy worm database"" reveals strengths, weaknesses and gaps of test compound-based studies.","Several biogerontology databases exist that focus on genetic or gene expression data linked to health as well as survival, subsequent to compound treatments or genetic manipulations in animal models. However, none of these has yet collected experimental results of compound-related health changes. Since quality of life is often regarded as more valuable than length of life, we aim to fill this gap with the ""Healthy Worm Database"" ( http://healthy-worm-database.eu ). Literature describing health-related compound studies in the aging model Caenorhabditis elegans was screened, and data for 440 compounds collected. The database considers 189 publications describing 89 different phenotypes measured in 2995 different conditions. Besides enabling a targeted search for promising compounds for further investigations, this database also offers insights into the research field of studies on healthy aging based on a frequently used model organism. Some weaknesses of C. elegans-based aging studies, like underrepresented phenotypes, especially concerning cognitive functions, as well as the convenience-based use of young worms as the starting point for compound treatment or phenotype measurement are discussed. In conclusion, the database provides an anchor for the search for compounds affecting health, with a link to public databases, and it further highlights some potential shortcomings in current aging research.",2021-03-08 +34210950,Exploration of the Important Role of Microfibril-Associated Protein 4 Gene in Oral Squamous Cell Carcinoma.,"BACKGROUND Oral squamous cell carcinoma (OSCC) is a common tumor of the head and neck. Its treatment usually requires multiple modalities. Currently, there are no molecular biomarkers to guide these treatment strategies. Studies have shown that microfibril-associated protein 4 (MFAP4) is potentially useful for non-invasive assessment of various diseases; however, its biological function in tumors is still unknown. In this study, we propose that MFAP4 is a new prognostic target for OSCC. MATERIAL AND METHODS First, we collected OSCC data (GSE25099 and GSE30784 datasets) from the Gene Expression Omnibus (GEO) database and compared the differential expression of MFAP4 gene between the patients (tumor) and normal (control) groups. The comparison was done with University of California Santa Cruz Xena (https://xenabrowser.net/Datapages/), and we calculated the difference in MFAP4 gene expression between normal and tumor tissues in a pan-cancer analysis. Then, we compared the 2 groups with high and low expression of MFAP4 gene in terms of tumor mutation burden (TMB), miRNA regulation, and immune cell infiltration. RESULTS We found that the expression of MFAP4 gene was significantly decreased in tumors. Our research also showed that high expression of MFAP4 was related to better prognosis of patients and may be related to tumor gene mutation, miRNA regulation, and infiltration of different immune cells. CONCLUSIONS Our work provides evidence that expression of MFAP4 can be used as a prognostic biomarker for risk stratification of OSCC patients and elaborates on its relation with the regulation of TMB, miRNAs, and immune cell infiltration.",2021-07-02 +34081565,Biomedical Entity Explorer: A Web Server for Biomedical Entity Exploration.,"Biomedical Entity Explorer (BEE) is a web server that can search for biomedical entities from a database of six biomedical entity types (gene, miRNA, drug, disease, single nucleotide polymorphism [SNP], pathway) and their gene associations. The search results can be explored using intersections, unions, and negations. BEE has integrated biomedical entities from 16 databases (Ensemble, PharmGKB, Genetic Home Reference, Tarbase, Mirbase, NCI Thesaurus, DisGeNET, Linked life data, UMLS, GSEA MsigDB, Reactome, KEGG, Gene Ontology, HGVD, SNPedia, and dbSNP) based on their gene associations and built a database with their synonyms, descriptions, and links containing individual details. Users can enter the keyword of one or more entities and select the type of entity for which they want to know the relationship for and by using set operations such as union, negation, and intersection, they can navigate the search results more clearly. We believe that BEE will not only be useful for biologists querying for complex associations between entities, but can also be a good starting point for general users searching for biomedical entities. BEE is accessible at (http://bike-bee.snu.ac.kr).",2021-06-02 +34400360,The Genome Sequence Archive Family: Toward Explosive Data Growth and Diverse Data Types.,"The Genome Sequence Archive (GSA) is a data repository for archiving raw sequence data, which provides data storage and sharing services for worldwide scientific communities. Considering explosive data growth with diverse data types, here we present the GSA family by expanding into a set of resources for raw data archive with different purposes, namely, GSA (https://ngdc.cncb.ac.cn/gsa/), GSA for Human (GSA-Human, https://ngdc.cncb.ac.cn/gsa-human/), and Open Archive for Miscellaneous Data (OMIX, https://ngdc.cncb.ac.cn/omix/). Compared with the 2017 version, GSA has been significantly updated in data model, online functionalities, and web interfaces. GSA-Human, as a new partner of GSA, is a data repository specialized in human genetics-related data with controlled access and security. OMIX, as a critical complement to the two resources mentioned above, is an open archive for miscellaneous data. Together, all these resources form a family of resources dedicated to archiving explosive data with diverse types, accepting data submissions from all over the world, and providing free open access to all publicly available data in support of worldwide research activities.",2021-08-13 +33574307,Computational scanning tunneling microscope image database.,"We introduce the systematic database of scanning tunneling microscope (STM) images obtained using density functional theory (DFT) for two-dimensional (2D) materials, calculated using the Tersoff-Hamann method. It currently contains data for 716 exfoliable 2D materials. Examples of the five possible Bravais lattice types for 2D materials and their Fourier-transforms are discussed. All the computational STM images generated in this work are made available on the JARVIS-STM website ( https://jarvis.nist.gov/jarvisstm ). We find excellent qualitative agreement between the computational and experimental STM images for selected materials. As a first example application of this database, we train a convolution neural network model to identify the Bravais lattice from the STM images. We believe the model can aid high-throughput experimental data analysis. These computational STM images can directly aid the identification of phases, analyzing defects and lattice-distortions in experimental STM images, as well as be incorporated in the autonomous experiment workflows.",2021-02-11 +33227814,GP4: an integrated Gram-Positive Protein Prediction Pipeline for subcellular localization mimicking bacterial sorting. ,"Subcellular localization is a critical aspect of protein function and the potential application of proteins either as drugs or drug targets, or in industrial and domestic applications. However, the experimental determination of protein localization is time consuming and expensive. Therefore, various localization predictors have been developed for particular groups of species. Intriguingly, despite their major representation amongst biotechnological cell factories and pathogens, a meta-predictor based on sorting signals and specific for Gram-positive bacteria was still lacking. Here we present GP4, a protein subcellular localization meta-predictor mainly for Firmicutes, but also Actinobacteria, based on the combination of multiple tools, each specific for different sorting signals and compartments. Novelty elements include improved cell-wall protein prediction, including differentiation of the type of interaction, prediction of non-canonical secretion pathway target proteins, separate prediction of lipoproteins and better user experience in terms of parsability and interpretability of the results. GP4 aims at mimicking protein sorting as it would happen in a bacterial cell. As GP4 is not homology based, it has a broad applicability and does not depend on annotated databases with homologous proteins. Non-canonical usage may include little studied or novel species, synthetic and engineered organisms, and even re-use of the prediction data to develop custom prediction algorithms. Our benchmark analysis highlights the improved performance of GP4 compared to other widely used subcellular protein localization predictors. A webserver running GP4 is available at http://gp4.hpc.rug.nl/.",2021-07-01 +34061414,COVID-19 spreading across world correlates with C677T allele of the methylenetetrahydrofolate reductase (MTHFR) gene prevalence.,"

Background

Homocysteine assessment has been proposed as a potential predictive biomarker for the severity of COVID-19 infection. The purpose of this review was to analyze the correlation between the prevalence of MTHFR C677 T gene polymorphism and COVID-19 incidence and mortality worldwide.

Methods

Data regarding MTHFR C677 T gene mutation were obtained from the interrogation of the Genome Aggregation Database (genomAD), which is publicly available from the web""https://gnomad.broadinstitute.org."" COVID-19 cases, including prevalence and mortality, were obtained from""https://www.worldometers.info/coronavirus"" 27 August 2020.

Results

There is a clear trend toward the worldwide prevalence of MTHFR 677 T and COVID-19 incidence and mortality. The prevalence of MTHFR 677 T allele in the Latino population, and the incidence and mortality for COVID-19 was higher for this ethnic group than that reported for most other populations globally. Statistical analysis showed a relatively strong correlation between C677 T and death from coronavirus.

Conclusions

Genetic polymorphism of MTHFR C677 T may modulate the incidence and severity of COVID-19 pandemic infection.",2021-06-01 +34647461,"Mass Dynamics 1.0: A Streamlined, Web-Based Environment for Analyzing, Sharing, and Integrating Label-Free Data.","Label-free quantification (LFQ) of shotgun proteomics data is a popular and robust method for the characterization of relative protein abundance between samples. Many analytical pipelines exist for the automation of this analysis, and some tools exist for the subsequent representation and inspection of the results of these pipelines. Mass Dynamics 1.0 (MD 1.0) is a web-based analysis environment that can analyze and visualize LFQ data produced by software such as MaxQuant. Unlike other tools, MD 1.0 utilizes a cloud-based architecture to enable researchers to store their data, enabling researchers to not only automatically process and visualize their LFQ data but also annotate and share their findings with collaborators and, if chosen, to easily publish results to the community. With a view toward increased reproducibility and standardization in proteomics data analysis and streamlining collaboration between researchers, MD 1.0 requires minimal parameter choices and automatically generates quality control reports to verify experiment integrity. Here, we demonstrate that MD 1.0 provides reliable results for protein expression quantification, emulating Perseus on benchmark datasets over a wide dynamic range. The MD 1.0 platform is available globally via: https://app.massdynamics.com/.",2021-10-14 +33415739,Unsupervised cluster analysis of SARS-CoV-2 genomes reflects its geographic progression and identifies distinct genetic subgroups of SARS-CoV-2 virus.,"Over 10,000 viral genome sequences of the SARS-CoV-2virus have been made readily available during the ongoing coronavirus pandemic since the initial genome sequence of the virus was released on the open access Virological website (http://virological.org/) early on January 11. We utilize the published data on the single stranded RNAs of 11,132 SARS-CoV-2 patients in the GISAID database, which contains fully or partially sequenced SARS-CoV-2 samples from laboratories around the world. Among many important research questions which are currently being investigated, one aspect pertains to the genetic characterization/classification of the virus. We analyze data on the nucleotide sequencing of the virus and geographic information of a subset of 7640 SARS-CoV-2 patients without missing entries that are available in the GISAID database. Instead of modeling the mutation rate, applying phylogenetic tree approaches, and so forth, we here utilize a model-free clustering approach that compares the viruses at a genome-wide level. We apply principal component analysis to a similarity matrix that compares all pairs of these SARS-CoV-2 nucleotide sequences at all loci simultaneously, using the Jaccard index. Our analysis results of the SARS-CoV-2 genome data illustrates the geographic and chronological progression of the virus, starting from the first cases that were observed in China to the current wave of cases in Europe and North America. This is in line with a phylogenetic analysis which we use to contrast our results. We also observe that, based on their sequence data, the SARS-CoV-2 viruses cluster in distinct genetic subgroups. It is the subject of ongoing research to examine whether the genetic subgroup could be related to diseases outcome and its potential implications for vaccine development.",2021-01-08 +33777034,TCRMatch: Predicting T-Cell Receptor Specificity Based on Sequence Similarity to Previously Characterized Receptors.,"The adaptive immune system in vertebrates has evolved to recognize non-self antigens, such as proteins expressed by infectious agents and mutated cancer cells. T cells play an important role in antigen recognition by expressing a diverse repertoire of antigen-specific receptors, which bind epitopes to mount targeted immune responses. Recent advances in high-throughput sequencing have enabled the routine generation of T-cell receptor (TCR) repertoire data. Identifying the specific epitopes targeted by different TCRs in these data would be valuable. To accomplish that, we took advantage of the ever-increasing number of TCRs with known epitope specificity curated in the Immune Epitope Database (IEDB) since 2004. We compared seven metrics of sequence similarity to determine their power to predict if two TCRs have the same epitope specificity. We found that a comprehensive k-mer matching approach produced the best results, which we have implemented into TCRMatch, an openly accessible tool (http://tools.iedb.org/tcrmatch/) that takes TCR β-chain CDR3 sequences as an input, identifies TCRs with a match in the IEDB, and reports the specificity of each match. We anticipate that this tool will provide new insights into T cell responses captured in receptor repertoire and single cell sequencing experiments and will facilitate the development of new strategies for monitoring and treatment of infectious, allergic, and autoimmune diseases, as well as cancer.",2021-03-11 +33608948,The Oxford Catalogue of Opioids: A systematic synthesis of opioid drug names and their pharmacology.,"

Aim

The growing demand for analgesia, coupled with an increasing need to treat opioid dependence and overdose, has escalated the development of novel opioids. We aimed to quantify the number of opioid drugs developed and to catalogue them based on their pharmacology.

Methods

We conducted a systematic search of seven sources in November 2020, including the WHO's Anatomical Therapeutic Classification index, the British National Formulary, the IUPHAR/BPS Guide to Pharmacology, the International Narcotics Control Board Index of Names of Narcotic Drugs, the WHO's International Nonproprietary Names MedNet service, Martindale's Extra Pharmacopoeia and the Merck Index, to include opioid drugs that targeted or had an effect or coeffect at one or more opioid receptors. We extracted chemical and nonproprietary names, drug stems, molecular formulas, molecular weights, receptor targets, actions at opioid receptors and classes based on their origins. We used descriptive statistics and calculated medians and interquartile ranges where appropriate.

Results

We identified 233 opioid drugs and created an online resource (https://www.catalogueofopioids.net/). There were 10 unique drug stems, and ""-fentanil"" accounted for one-fifth (20%) of all opioids. Most of the drugs (n = 133) targeted mu-opioid receptors and the majority (n = 191) were agonists at one or more receptors. Most (82%) were synthetic opioids, followed by semisynthetic opioids (16%) and alkaloids (3%).

Conclusion

This catalogue centralizes and disseminates information that could assist researchers, prescribers and the public to improve the safe use of opioids.",2021-03-20 +34377363,"Insight into membraneless organelles and their associated proteins: Drivers, Clients and Regulators.","In recent years, attention has been devoted to proteins forming immiscible liquid phases within the liquid intracellular medium, commonly referred to as membraneless organelles (MLO). These organelles enable the spatiotemporal associations of cellular components that exchange dynamically with the cellular milieu. The dysregulation of these liquid-liquid phase separation processes (LLPS) may cause various diseases including neurodegenerative pathologies and cancer, among others. Until very recently, databases containing information on proteins forming MLOs, as well as tools and resources facilitating their analysis, were missing. This has recently changed with the publication of 4 databases that focus on different types of experiments, sets of proteins, inclusion criteria, and levels of annotation or curation. In this study we integrate and analyze the information across these databases, complement their records, and produce a consolidated set of proteins that enables the investigation of the LLPS phenomenon. To gain insight into the features that characterize different types of MLOs and the roles of their associated proteins, they were grouped into categories: High Confidence MLO associated (including Drivers and reviewed proteins), Potential Clients and Regulators, according to their annotated functions. We show that none of the databases taken alone covers the data sufficiently to enable meaningful analysis, validating our integration effort as essential for gaining better understanding of phase separation and laying the foundations for the discovery of new proteins potentially involved in this important cellular process. Lastly, we developed a server, enabling customized selections of different sets of proteins based on MLO location, database, disorder content, among other attributes (https://forti.shinyapps.io/mlos/).",2021-06-29 +33122286,Transcriptomes of Major Proximal Tubule Cell Culture Models.,"

Background

Cultured cell lines are widely used for research in the physiology, pathophysiology, toxicology, and pharmacology of the renal proximal tubule. The lines that are most appropriate for a given use depend upon the genes expressed. New tools for transcriptomic profiling using RNA sequencing (RNA-Seq) make it possible to catalog expressed genes in each cell line.

Methods

Fourteen different proximal tubule cell lines, representing six species, were grown on permeable supports under conditions specific for the respective lines. RNA-Seq followed standard procedures.

Results

Transcripts expressed in cell lines variably matched transcripts selectively expressed in native proximal tubule. Opossum kidney (OK) cells displayed the highest percentage match (45% of proximal marker genes [TPM threshold =15]), with pig kidney cells (LLC-PK1) close behind (39%). Lower-percentage matches were seen for various human lines, including HK-2 (26%), and lines from rodent kidneys, such as NRK-52E (23%). Nominally, identical OK cells from different sources differed substantially in expression of proximal tubule markers. Mapping cell line transcriptomes to gene sets for various proximal tubule functions (sodium and water transport, protein transport, metabolic functions, endocrine functions) showed that different lines may be optimal for experimentally modeling each function. An online resource (https://esbl.nhlbi.nih.gov/JBrowse/KCT/) has been created to interrogate cell line transcriptome data. Proteomic analysis of NRK-52E cells confirmed low expression of many proximal tubule marker proteins.

Conclusions

No cell line fully matched the transcriptome of native proximal tubule cells. However, some of the lines tested are suitable for the study of particular metabolic and transport processes seen in the proximal tubule.",2020-10-29 +34427512,Multilevel Genome Typing Describes Short- and Long-Term Vibrio cholerae Molecular Epidemiology.,"Since 1817, cholera, caused by Vibrio cholerae, has been characterized by seven distinct pandemics. The ongoing seventh pandemic (7P) began in 1961. In this study, we developed a Multilevel Genome Typing (MGT) tool for classifying the V. cholerae species with a focus on the 7P. MGT is based on multilocus sequence typing (MLST), but the concept has been expanded to include a series of MLST schemes that compare population structure from broad to fine resolutions. The V. cholerae MGT consists of eight levels, with the lowest, MGT1, composed of 7 loci and the highest, MGT8, consisting of the 7P core genome (3,759 loci). We used MGT to analyze 5,771 V. cholerae genomes. The genetic relationships revealed by lower MGT levels recapitulated previous findings of large-scale 7P transmission across the globe. Furthermore, the higher MGT levels provided an increased discriminatory power to differentiate subgroups within a national outbreak. Additionally, we demonstrated the usefulness of MGT for non-7P classification. In a large non-7P MGT1 type, MGT2 and MGT3 described continental and regional distributions, respectively. Finally, MGT described trends of 7P in virulence, and MGT2 to MGT3 sequence types (STs) grouped isolates of the same ctxB, tcpA, and ctxB-tcpA genotypes and characterized their trends over the pandemic. MGT offers a range of resolutions for typing V. cholerae. The MGT nomenclature is stable, transferable, and directly comparable between investigations. The MGT database (https://mgtdb.unsw.edu.au/) can accept and process newly submitted samples. MGT allows tracking of existing and new isolates and will be useful for understanding future spread of cholera. IMPORTANCE In 2017, the World Health Organization launched the ""Ending Cholera"" initiative to reduce cholera-related deaths by 90% by 2030. This strategy emphasized the importance of the speed and accessibility of newer technologies to contain outbreaks. Here, we present a new tool named Multilevel Genome Typing (MGT), which classifies isolates of the cholera-causing agent, Vibrio cholerae. MGT is a freely available online database that groups genetically similar V. cholerae isolates to quickly indicate the origins of outbreaks. We validated the MGT database retrospectively in an outbreak setting, showcasing rapid confirmation of the Nepalese origins for the 2010 Haiti outbreak. In the past 5 years, thousands of V. cholerae genomes have been submitted to the NCBI database, which underscores the importance of and need for proper genome data classification for cholera epidemiology. The V. cholerae MGT database can assist in early decision making that directly impacts controlling both the local and global spread of cholera.",2021-08-24 +26504105,Differential ligand-signaling network of CCL19/CCL21-CCR7 system. ,"Chemokine (C-C motif) receptor 7 (CCR7), a class A subtype G-Protein Coupled Receptor (GPCR), is involved in the migration, activation and survival of multiple cell types including dendritic cells, T cells, eosinophils, B cells, endothelial cells and different cancer cells. Together, CCR7 signaling system has been implicated in diverse biological processes such as lymph node homeostasis, T cell activation, immune tolerance, inflammatory response and cancer metastasis. CCL19 and CCL21, the two well-characterized CCR7 ligands, have been established to be differential in their signaling through CCR7 in multiple cell types. Although the differential ligand signaling through single receptor have been suggested for many receptors including GPCRs, there exists no resource or platform to analyse them globally. Here, first of its kind, we present the cell-type-specific differential signaling network of CCL19/CCL21-CCR7 system for effective visualization and differential analysis of chemokine/GPCR signaling. Database URL: http:// www. netpath. org/ pathways? path_ id= NetPath_ 46.",2015-10-26 +33166387,The international nucleotide sequence database collaboration.,"The International Nucleotide Sequence Database Collaboration (INSDC; http://www.insdc.org/) has been the core infrastructure for collecting and providing nucleotide sequence data and metadata for >30 years. Three partner organizations, the DNA Data Bank of Japan (DDBJ) at the National Institute of Genetics in Mishima, Japan; the European Nucleotide Archive (ENA) at the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI) in Hinxton, UK; and GenBank at National Center for Biotechnology Information (NCBI), National Library of Medicine, National Institutes of Health in Bethesda, Maryland, USA have been collaboratively maintaining the INSDC for the benefit of not only science but all types of community worldwide.",2021-01-01 +33186463,Exploring functionally annotated transcriptional consensus regulatory elements with CONREL. ,"Understanding the interaction between human genome regulatory elements and transcription factors is fundamental to elucidate the structure of gene regulatory networks. Here we present CONREL, a web application that allows for the exploration of functionally annotated transcriptional 'consensus' regulatory elements at different levels of abstraction. CONREL provides an extensive collection of consensus promoters, enhancers and active enhancers for 198 cell-lines across 38 tissue types, which are also combined to provide global consensuses. In addition, 1000 Genomes Project genotype data and the 'total binding affinity' of thousands of transcription factor binding motifs at genomic regulatory elements is fully combined and exploited to characterize and annotate functional properties of our collection. Comparison with other available resources highlights the strengths and advantages of CONREL. CONREL can be used to explore genomic loci, specific genes or genomic regions of interest across different cell lines and tissue types. The resource is freely available at https://bcglab.cibio.unitn.it/conrel.",2020-01-01 +31856718,Knowledge Base Commons (KBCommons) v1.1: a universal framework for multi-omics data integration and biological discoveries.,"

Background

Knowledge Base Commons (KBCommons) v1.1 is a universal and all-inclusive web-based framework providing generic functionalities for storing, sharing, analyzing, exploring, integrating and visualizing multiple organisms' genomics and integrative omics data. KBCommons is designed and developed to integrate diverse multi-level omics data and to support biological discoveries for all species via a common platform.

Methods

KBCommons has four modules including data storage, data processing, data accessing, and web interface for data management and retrieval. It provides a comprehensive framework for new plant-specific, animal-specific, virus-specific, bacteria-specific or human disease-specific knowledge base (KB) creation, for adding new genome versions and additional multi-omics data to existing KBs, and for exploring existing datasets within current KBs.

Results

KBCommons has an array of tools for data visualization and data analytics such as multiple gene/metabolite search, gene family/Pfam/Panther function annotation search, miRNA/metabolite/trait/SNP search, differential gene expression analysis, and bulk data download capacity. It contains a highly reliable data privilege management system to make users' data publicly available easily and to share private or pre-publication data with members in their collaborative groups safely and securely. It allows users to conduct data analysis using our in-house developed workflow functionalities that are linked to XSEDE high performance computing resources. Using KBCommons' intuitive web interface, users can easily retrieve genomic data, multi-omics data and analysis results from workflow according to their requirements and interests.

Conclusions

KBCommons addresses the needs of many diverse research communities to have a comprehensive multi-level OMICS web resource for data retrieval, sharing, analysis and visualization. KBCommons can be publicly accessed through a dedicated link for all organisms at http://kbcommons.org/.",2019-12-20 +34919025,Effectiveness of structured education and follow-up in the management of perceived breastmilk insufficiency: a randomized control trial.,"In this study, we examine the effectiveness of structured education and follow-up in the management of perceived milk insufficiency and in increasing the baby's amount of milk intake in breastfeeding mothers. We conducted a randomized controlled trial over the period December 2018-June 2019 at Family Health Centers in Turkey with 64 mothers (intervention group: 33 and control group: 31). We provided the intervention group with education using structured educational material. A Descriptive Information Form, a Breastfeeding Follow-up Form, and the Scoring System for Measuring a Baby's Intake of Breast Milk were the instruments we used in the data collection. Mothers in the intervention group started breastfeeding their babies in the first one hour after birth. We observed at each monitoring that a significantly greater percentage of the mothers in the intervention group believed their milk to be sufficient and that they fed their babies exclusively with breast milk in the three follow-ups. We found from the data we obtained that all three tracking times indicated that the baby's amount of milk intake in the intervention group was better than in the control group; the differences were statistically significant. We concluded that structured education and follow-ups increased the exclusive breastfeeding of 0-2-month-old babies, improved perceptions of sufficient milk intake, diminishing the perception of breast milk deficiency.Supplemental data for this article is available online at https://doi.org/10.1080/07399332.2021.2007249.",2021-12-17 +33968360,Dataset: local government mask orders preceding statewide orders by US states.,"We present a database listing local government mask orders for COVID-19 that were enacted between April and September, 2020, prior to the date that the governors issued statewide mask wearing mandates. We obtained data from a Google search of web pages of local and national commercial and public broadcasters and newspapers, and of the orders themselves.  In the database, we present data identifying the county, municipality or tribal council, date of the order, and the source's internet address. In the 34 states with statewide orders, local governments in 21 of these states issued mandates in 218 municipalities, 155 counties, and 1 tribal council.  The dataset can be accessed from https://doi.org/10.7939/DVN/NDFEHK.",2021-01-08 +34337141,Across the Rural-Urban Universe: Two Continuous Indices of Urbanization for U.S. Census Microdata.,"Microdata from U.S. decennial censuses and the American Community Survey are a key resource for social science and policy analysis, enabling researchers to investigate relationships among all reported characteristics for individual respondents and their households. To protect privacy, the Census Bureau restricts the detail of geographic information in public use microdata, and this complicates how researchers can investigate and account for variations across levels of urbanization when analyzing microdata. One option is to focus on metropolitan status, which can be determined exactly for most microdata records and approximated for others, but a binary metro/nonmetro classification is still coarse and limited on its own, emphasizing one aspect of rural-urban variation and discounting others. To address these issues, we compute two continuous indices for public use microdata-average tract density and average metro/micro-area population-using population-weighted geometric means. We show how these indices correspond to two key dimensions of urbanization-concentration and size-and we demonstrate their utility through an examination of disparities in poverty throughout the rural-urban universe. Poverty rates vary across settlement types in nonlinear ways: rates are lowest in moderately dense parts of major metro areas, and rates are higher in both low- and high-density areas, as well as in smaller commuting systems. Using the two indices also reveals that correlations between poverty and demographic characteristics vary considerably across settlement types. Both indices are now available for recent census microdata via IPUMS USA (https://usa.ipums.org).",2021-03-15 +33247936,VarStack: a web tool for data retrieval to interpret somatic variants in cancer. ,"Advances in tumor genome sequencing created an urgent need for bioinformatics tools to support the interpretation of the clinical significance of the variants detected. VarStack is a web tool which is a base to retrieve somatic variant data relating to cancer from existing databases. VarStack incorporates data from several publicly available databases and presents them with an easy-to-navigate user interface. It currently supports data from the Catalogue of Somatic Mutations in Cancer, gnomAD, cBioPortal, ClinVar, OncoKB, CiViC and UCSC Genome Browser. It retrieves the data from these databases and returns them back to the user in a fraction of the time it would take to manually navigate each site independently. Users submit a variant with a gene symbol, peptide change and coding sequence change. They may select a variety of tumor-specific studies in cBioPortal to search through in addition to their original query. The results from the databases are presented in tabs. Users can export the results as an Excel file. VarStack also has the batch search feature in which the user can submit a list of variants and download an Excel file with the data from the databases. With the batch search and data download options, users can easily incorporate VarStack into their workflow or tools. VarStack saves time by providing somatic variant information to the user from multiple databases in an easy-to-export and interpretable format. VarStack is freely available under https://varstack.brown.edu.",2020-11-01 +31720340,"Draft genome sequence data of Cercospora kikuchii, a causal agent of Cercospora leaf blight and purple seed stain of soybeans.","Cercospora kikuchii (Tak. Matsumoto & Tomoy.) M.W. Gardner 1927 is an ascomycete fungal pathogen that causes Cercospora leaf blight and purple seed stain on soybean. Here, we report the first draft genome sequence and assembly of this pathogen. The C. kikuchii strain ARG_18_001 was isolated from soybean purple seed collected from San Pedro, Buenos Aires, Argentina, during the 2018 harvest. The genome was sequenced using a 2 × 150 bp paired-end method by Illumina NovaSeq 6000. The C. kikuchii protein-coding genes were predicted using FunGAP (Fungal Genome Annotation Pipeline). The draft genome assembly was 33.1 Mb in size with a GC-content of 53%. The gene prediction resulted in 14,856 gene models/14,721 protein coding genes. Genomic data of C. kikuchii presented here will be a useful resource for future studies of this pathosystem. The data can be accessed at GenBank under the accession number VTAY00000000 https://www.ncbi.nlm.nih.gov/nuccore/VTAY00000000.",2019-10-21 +29293907,A benchmark for comparing precision medicine methods in thyroid cancer diagnosis using tissue microarrays.,"Motivation:The aim of precision medicine is to harness new knowledge and technology to optimize the timing and targeting of interventions for maximal therapeutic benefit. This study explores the possibility of building AI models without precise pixel-level annotation in prediction of the tumor size, extrathyroidal extension, lymph node metastasis, cancer stage and BRAF mutation in thyroid cancer diagnosis, providing the patients' background information, histopathological and immunohistochemical tissue images. Results:A novel framework for objective evaluation of automatic patient diagnosis algorithms has been established under the auspices of the IEEE International Symposium on Biomedical Imaging 2017- A Grand Challenge for Tissue Microarray Analysis in Thyroid Cancer Diagnosis. Here, we present the datasets, methods and results of the challenge and lay down the principles for future uses of this benchmark. The main contributions of the challenge include the creation of the data repository of tissue microarrays; the creation of the clinical diagnosis classification data repository of thyroid cancer; and the definition of objective quantitative evaluation for comparison and ranking of the algorithms. With this benchmark, three automatic methods for predictions of the five clinical outcomes have been compared, and detailed quantitative evaluation results are presented in this paper. Based on the quantitative evaluation results, we believe automatic patient diagnosis is still a challenging and unsolved problem. Availability and implementation:The datasets and the evaluation software will be made available to the research community, further encouraging future developments in this field. (http://www-o.ntust.edu.tw/cvmi/ISBI2017/). Contact:cweiwang@mail.ntust.edu.tw. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +33068420,Chewie Nomenclature Server (chewie-NS): a deployable nomenclature server for easy sharing of core and whole genome MLST schemas.,"Chewie Nomenclature Server (chewie-NS, https://chewbbaca.online/) allows users to share genome-based gene-by-gene typing schemas and to maintain a common nomenclature, simplifying the comparison of results. The combination between local analyses and a public repository of allelic data strikes a balance between potential confidentiality issues and the need to compare results. The possibility of deploying private instances of chewie-NS facilitates the creation of nomenclature servers with a restricted user base to allow compliance with the strictest data policies. Chewie-NS allows users to easily share their own schemas and to explore publicly available schemas, including informative statistics on schemas and loci presented in interactive charts and tables. Users can retrieve all the information necessary to run a schema locally or all the alleles identified at a particular locus. The integration with the chewBBACA suite enables users to directly upload new schemas to chewie-NS, download existing schemas and synchronize local and remote schemas from chewBBACA command line version, allowing an easier integration into high-throughput analysis pipelines. The same REST API linking chewie-NS and the chewBBACA suite supports the interaction of other interfaces or pipelines with the databases available at chewie-NS, facilitating the reusability of the stored data.",2021-01-01 +,eSnail: A transcriptome‐based molecular resource of the central nervous system for terrestrial gastropods,"To expand on emerging terrestrial gastropod molecular resources, we have undertaken transcriptome‐based sequencing of the central nervous system (CNS) from six ecologically invasive terrestrial gastropods. Focusing on snail species Cochlicella acuta and Helix aspersa and reticulated slugs Deroceras invadens, Deroceras reticulatum, Lehmannia nyctelia and Milax gagates, we obtained a total of 367,869,636 high‐quality reads and compared them with existing CNS transcript resources for the invasive Mediterranean snail, Theba pisana. In total, we obtained 419,289 unique transcripts (unigenes) from 1,410,569 assembled contigs, with blast search analysis of multiple protein databases leading to the annotation of 124,268 unigenes, of which 92,544 mapped to ncbi nonredundant protein databases. We found that these transcriptomes have representatives in most biological functions, based on comparison of gene ontology, kegg pathway and protein family contents, demonstrating a high range of transcripts responsible for regulating metabolic activities and molecular functions occurring within the CNS. To provide an accessible genetic resource, we also demonstrate the presence of 66,687 microsatellites and 304,693 single‐nucleotide variants, which can be used for the design of potentially thousands of unique primers for functional screening. An online “eSnail” database with a user‐friendly web interface was implemented to query all the information obtained herein (http://soft.bioinfo-minzhao.org/esnail). We demonstrate the usefulness of the database through the mining of molluscan neuropeptides. As the most comprehensive CNS transcriptome resource for terrestrial gastropods, eSnail may serve as a useful gateway for researchers to explore gastropod CNS function for multiple purposes, including for the development of biocontrol approaches.",2018-01-01 +,"The Quetzal Coalescence template library: A C++ programmers resource for integrating distributional, demographic and coalescent models","Genetic samples can be used to understand and predict the behaviour of species living in a fragmented and temporally changing environment. In this regard, models of coalescence conditioned to an environment through an explicit modelling of population growth and migration have been developed in recent years, and simulators implementing these models have been developed, enabling biologists to estimate parameters of interest with Approximate Bayesian Computation techniques. However, model choice remains limited, and developing new coalescence simulators is extremely time consuming because code re‐use is limited. We present Quetzal, a C++ library composed of re‐usable components, which is sufficiently general to efficiently implement a wide range of spatially explicit coalescence‐based environmental models of population genetics and to embed the simulation in an Approximate Bayesian Computation framework. Quetzal is not a simulation program, but a toolbox for programming simulators aimed at the community of scientific coders and research software engineers in molecular ecology and phylogeography. This new code resource is open‐source and available at https://becheler.github.io/pages/quetzal.html along with other documentation resources.",2019-05-01 +30418591,HumanNet v2: human gene networks for disease research.,"Human gene networks have proven useful in many aspects of disease research, with numerous network-based strategies developed for generating hypotheses about gene-disease-drug associations. The ability to predict and organize genes most relevant to a specific disease has proven especially important. We previously developed a human functional gene network, HumanNet, by integrating diverse types of omics data using Bayesian statistics framework and demonstrated its ability to retrieve disease genes. Here, we present HumanNet v2 (http://www.inetbio.org/humannet), a database of human gene networks, which was updated by incorporating new data types, extending data sources and improving network inference algorithms. HumanNet now comprises a hierarchy of human gene networks, allowing for more flexible incorporation of network information into studies. HumanNet performs well in ranking disease-linked gene sets with minimal literature-dependent biases. We observe that incorporating model organisms' protein-protein interactions does not markedly improve disease gene predictions, suggesting that many of the disease gene associations are now captured directly in human-derived datasets. With an improved interactive user interface for disease network analysis, we expect HumanNet will be a useful resource for network medicine.",2019-01-01 +30407573,IMG/VR v.2.0: an integrated data management and analysis system for cultivated and environmental viral genomes.,"The Integrated Microbial Genome/Virus (IMG/VR) system v.2.0 (https://img.jgi.doe.gov/vr/) is the largest publicly available data management and analysis platform dedicated to viral genomics. Since the last report published in the 2016, NAR Database Issue, the data has tripled in size and currently contains genomes of 8389 cultivated reference viruses, 12 498 previously published curated prophages derived from cultivated microbial isolates, and 735 112 viral genomic fragments computationally predicted from assembled shotgun metagenomes. Nearly 60% of the viral genomes and genome fragments are clustered into 110 384 viral Operational Taxonomic Units (vOTUs) with two or more members. To improve data quality and predictions of host specificity, IMG/VR v.2.0 now separates prokaryotic and eukaryotic viruses, utilizes known prophage sequences to improve taxonomic assignments, and provides viral genome quality scores based on the estimated genome completeness. New features also include enhanced BLAST search capabilities for external queries. Finally, geographic map visualization to locate user-selected viral genomes or genome fragments has been implemented and download options have been extended. All of these features make IMG/VR v.2.0 a key resource for the study of viruses.",2019-01-01 +32358997,ProNetView-ccRCC: A Web-Based Portal to Interactively Explore Clear Cell Renal Cell Carcinoma Proteogenomics Networks.,"To better understand the molecular basis of cancer, the NCI's Clinical Proteomics Tumor Analysis Consortium (CPTAC) has been performing comprehensive large-scale proteogenomic characterizations of multiple cancer types. Gene and protein regulatory networks are subsequently being derived based on these proteogenomic profiles, which serve as tools to gain systems-level understanding of the molecular regulatory factories underlying these diseases. On the other hand, it remains a challenge to effectively visualize and navigate the resulting network models, which capture higher order structures in the proteogenomic profiles. There is a pressing need to have a new open community resource tool for intuitive visual exploration, interpretation, and communication of these gene/protein regulatory networks by the cancer research community. In this work, ProNetView-ccRCC (http://ccrcc.cptac-network-view.org/), an interactive web-based network exploration portal for investigating phosphopeptide co-expression network inferred based on the CPTAC clear cell renal cell carcinoma (ccRCC) phosphoproteomics data is introduced. ProNetView-ccRCC enables quick, user-intuitive visual interactions with the ccRCC tumor phosphoprotein co-expression network comprised of 3614 genes, as well as 30 functional pathway-enriched network modules. Users can interact with the network portal and can conveniently query for association between abundance of each phosphopeptide in the network and clinical variables such as tumor grade.",2020-05-27 +34741046,A machine learning framework for rapid forecasting and history matching in unconventional reservoirs.,"We present a novel workflow for forecasting production in unconventional reservoirs using reduced-order models and machine-learning. Our physics-informed machine-learning workflow addresses the challenges to real-time reservoir management in unconventionals, namely the lack of data (i.e., the time-frame for which the wells have been producing), and the significant computational expense of high-fidelity modeling. We do this by applying the machine-learning paradigm of transfer learning, where we combine fast, but less accurate reduced-order models with slow, but accurate high-fidelity models. We use the Patzek model (Proc Natl Acad Sci 11:19731-19736, https://doi.org/10.1073/pnas.1313380110 , 2013) as the reduced-order model to generate synthetic production data and supplement this data with synthetic production data obtained from high-fidelity discrete fracture network simulations of the site of interest. Our results demonstrate that training with low-fidelity models is not sufficient for accurate forecasting, but transfer learning is able to augment the knowledge and perform well once trained with the small set of results from the high-fidelity model. Such a physics-informed machine-learning (PIML) workflow, grounded in physics, is a viable candidate for real-time history matching and production forecasting in a fractured shale gas reservoir.",2021-11-05 +35136431,Are Dimensions of Gender Inequality Uniformly Associated With Human Values?,"A previous work of Schwartz and Rubel-Lifschitz (2009, https://doi.org/10.1037/a0015546) highlighted the association between human values and gender equality. However, gender equality is not a monolith. Indeed, it is a multidimensional phenomenon. We started from this multidimensionality to understand how the relative importance of human values varies through the different dimensions of Gender Equality Index (GEI)-namely work, money, knowledge, time, power, and health. We have designed a cross-national study based on secondary data analysis from international databases (i.e., European Social Survey [ESS] and GEI). Through the Bayesian correlational analysis of 18 European countries, findings revealed that 1) universalism, benevolence and self-direction are strongly and positively correlated to gender equality; 2) security, power and achievement are strongly and negatively correlated to equality while 3) conformity, tradition, stimulation, and hedonism have weak/non-significant correlation coefficients with gender equality. Relevance to cultural values and ideologies that support social equality are discussed. Furthermore, we find that some values are related to certain specific gender equality dimensions. Our results provide a more fine-grained analysis compared to previous findings, by outlining a more complex scenario.",2021-05-31 +33755549,A snapshot of human leukocyte antigen (HLA) diversity using data from the Allele Frequency Net Database.,"The extensive allelic variability observed in several genes related to the immune response and its significance in different areas including transplantation, disease association studies, diversity in human populations, among many others, has led the scientific community to analyse these variants among individuals. Serving as an electronic data warehouse, the Allele Frequency Net Database (AFND, http://www.allelefrequencies.net) contains data on the frequency of immune related genes and their corresponding alleles from more than 1700 worldwide population samples covering more than ten million unrelated individuals. The collection of population data sets available in AFND encompasses different polymorphic regions including the highly-polymorphic human leukocyte antigen (HLA) system for which more than 1200 populations are available. In this article, we provide an insight of the high diversity found in the HLA region by examining population data sets stored in AFND, as well as a description of the available data sets for further analyses.",2020-10-21 +34709868,COVID-19 Pandemic and Indigenous Representation in Public Health Data.,"Public Health 3.0 calls for the inclusion of new partners and novel data to bring systemic change to the US public health landscape. The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic has illuminated significant data gaps influenced by ongoing colonial legacies of racism and erasure. American Indian and Alaska Native (AI/AN) populations and communities have been disproportionately affected by incomplete public health data and by the COVID-19 pandemic itself. Our findings indicate that only 26 US states were able to calculate COVID-19‒related death rates for AI/AN populations. Given that 37 states have Indian Health Service locations, we argue that public health researchers and practitioners should have a far larger data set of aggregated public health information on AI/AN populations. Despite enormous obstacles, local Tribal facilities have created effective community responses to COVID-19 testing, tracking, and vaccine administration. Their knowledge can lead the way to a healthier nation. Federal and state governments and health agencies must learn to responsibly support Tribal efforts, collect data from AI/AN persons in partnership with Indian Health Service and Tribal governments, and communicate effectively with Tribal authorities to ensure Indigenous data sovereignty. (Am J Public Health. 2021;111(S3): S208-S214. https://doi.org/10.2105/AJPH.2021.306415).",2021-10-01 +,475. Describing the impact of the COVID-19 pandemic on HIV care in Latin America,"Abstract

Background

The effects of the COVID-19 pandemic on people living with HIV (PWH) are unknown. Beyond SARS-CoV-2 co-infection, the pandemic may have devastating consequences for HIV care delivery. Understanding these is crucial as reduced antiretroviral therapy (ART) availability alone could lead to ≥500,000 AIDS-related deaths in 2020–2021. With Latin America now a focal point in the pandemic, we sought to describe the impact of COVID-19 on HIV care at Latin American clinical sites.

Methods

Caribbean, Central and South America network for HIV epidemiology (CCASAnet) and additional Brazilian HIV care sites in Argentina, Brazil, Chile, Haiti, Honduras, Mexico, and Peru were included. An electronic survey of COVID-19 effects on HIV clinic operations was administered in Spanish or English via phone and email, April 28-June 2, 2020. We also compared national COVID-19 case, mortality, and policy data from public sources.

Results

Brazil’s and Mexico’s epidemics appear most pronounced, with >10,000 confirmed COVID-19-related deaths (Figure 1); countries implemented “social distancing” policies at different times after initial cases, with Haiti earliest and Mexico latest (Figure 2). Nearly all 13 sites reported decreased hours and providers for HIV care. Twelve of 13 reported increased use of telehealth, suspension/postponements of routine HIV appointments, and/or suspension of HIV research. Eleven of 13 reported initiation of new COVID-19 research but suspension of community HIV testing, and nearly half provided additional ART supplies. Nearly 70% reported impacts on HIV viral load testing and nearly 40% reported personal protective equipment stock-outs (Table). All 13 sites experienced changes in resources/services in tandem with national policies; there was wide variation, however, in the number of economic and health supports implemented thus far (e.g., quarantines, tax deferrals, interest rate reductions, etc.), from 172 COVID-19-related policies in Brazil to only 30 in Mexico. Table Site Assessment of Impacts of the COVID-19 Pandemic on HIV services in Latin America at CCASAnet and Coorte Sites, N=13 Figure 1. Cumulative mortality due to COVID-19 in countries within which CCASAnet and Coorte sites are located Figure 1 footnote: Source for mortality counts: the WHO COVID-19 Dashboard, available at: https://covid19.who.int/ All data were up-to-date as of, and were accessed on, June 17th, 2020 Figure 2. Cumulative cases of COVID-19 in countries within which CCASAnet and Coorte sites are located and dates (relative to the day on which the first positive case of COVID-19 was detected) of general social distancing, public health emergency, or mass quarantine policy introduction (vertical dashed lines), 2020 Figure 2 footnote: Source for case counts: the WHO COVID-19 Dashboard, available at: https://covid19.who.int/ Source for health policy implementation: the United Nations Economic Council for Latin America & the Caribbean, available at: https://cepalstat-prod.cepal.org/forms/covid-countrysheet/index.html All data were up-to-date as of, and were accessed on, June 17th, 2020

Conclusion

The COVID-19 pandemic has already had a substantial effect on daily operations of HIV clinics in Latin America. The downstream effects of these impacts on HIV outcomes in Latin America will need to be further studied.

Disclosures

All Authors: No reported disclosures",2020-10-01 +34508155,NetTCR-2.0 enables accurate prediction of TCR-peptide binding by using paired TCRα and β sequence data.,"Prediction of T-cell receptor (TCR) interactions with MHC-peptide complexes remains highly challenging. This challenge is primarily due to three dominant factors: data accuracy, data scarceness, and problem complexity. Here, we showcase that ""shallow"" convolutional neural network (CNN) architectures are adequate to deal with the problem complexity imposed by the length variations of TCRs. We demonstrate that current public bulk CDR3β-pMHC binding data overall is of low quality and that the development of accurate prediction models is contingent on paired α/β TCR sequence data corresponding to at least 150 distinct pairs for each investigated pMHC. In comparison, models trained on CDR3α or CDR3β data alone demonstrated a variable and pMHC specific relative performance drop. Together these findings support that T-cell specificity is predictable given the availability of accurate and sufficient paired TCR sequence data. NetTCR-2.0 is publicly available at https://services.healthtech.dtu.dk/service.php?NetTCR-2.0 .",2021-09-10 +32264951,A multidimensional systems biology analysis of cellular senescence in aging and disease.,"

Background

Cellular senescence, a permanent state of replicative arrest in otherwise proliferating cells, is a hallmark of aging and has been linked to aging-related diseases. Many genes play a role in cellular senescence, yet a comprehensive understanding of its pathways is still lacking.

Results

We develop CellAge (http://genomics.senescence.info/cells), a manually curated database of 279 human genes driving cellular senescence, and perform various integrative analyses. Genes inducing cellular senescence tend to be overexpressed with age in human tissues and are significantly overrepresented in anti-longevity and tumor-suppressor genes, while genes inhibiting cellular senescence overlap with pro-longevity and oncogenes. Furthermore, cellular senescence genes are strongly conserved in mammals but not in invertebrates. We also build cellular senescence protein-protein interaction and co-expression networks. Clusters in the networks are enriched for cell cycle and immunological processes. Network topological parameters also reveal novel potential cellular senescence regulators. Using siRNAs, we observe that all 26 candidates tested induce at least one marker of senescence with 13 genes (C9orf40, CDC25A, CDCA4, CKAP2, GTF3C4, HAUS4, IMMT, MCM7, MTHFD2, MYBL2, NEK2, NIPA2, and TCEB3) decreasing cell number, activating p16/p21, and undergoing morphological changes that resemble cellular senescence.

Conclusions

Overall, our work provides a benchmark resource for researchers to study cellular senescence, and our systems biology analyses reveal new insights and gene regulators of cellular senescence.",2020-04-07 +33382885,CorkOakDB-The Cork Oak Genome Database Portal. ,"Quercus suber (cork oak) is an evergreen tree native to the Mediterranean basin, which plays a key role in the ecology and economy of this area. Over the last decades, this species has gone through an observable decline, mostly due to environmental factors. Deciphering the mechanisms of cork oak's response to the environment and getting a deep insight into its biology are crucial to counteract biotic and abiotic stresses compromising the stability of a unique ecosystem. In the light of these setbacks, the publication of the genome in 2018 was a major step towards understanding the genetic make-up of this species. In an effort to integrate this information in a comprehensive, accessible and intuitive format, we have developed The Cork Oak Genome Database Portal (CorkOakDB). The CorkOakDB is supported by the BioData.pt e-infrastructure, the Portuguese ELIXIR node for biological data. The portal gives public access to search and explore the curated genomic and transcriptomic data on this species. Moreover, CorkOakDB provides a user-friendly interface and functional tools to help the research community take advantage of the increased accessibility to genomic information. A study case is provided to highlight the functionalities of the portal. CorkOakDB guarantees the update, curation and data collection, aiming to collect data besides the genetic/genomic information, in order to become the main repository in cork oak research. Database URL: http://corkoakdb.org/.",2020-12-01 +,Survival of commercial probiotic strains and their effect on dark chocolate synbiotic snack with raspberry content during the storage and after simulated digestion,"A key challenge for manufacturers of pro-health food containing active probiotic microorganisms is to develop a product with attractive sensory features along with maintenance of declared number of microorganisms during storage and transfer by alimentary tract.The highest concentration of polyphenols was observed in snacks without an additive of probiotics as well as those with an additive of L. rhamnosus and B. animalis bacteria and concentration of these compounds increased by 9.5% during six months of storage. None of the products distinguished itself in the sensorial assessment although each was assessed positively. The number of microorganisms was stable and comparatively high during six months of storage at a room temperature and in cooling conditions (10⁸ cfu/g). In the digestion model, an influence of aggressive digestion conditions was examined in the alimentary tract on the number of microorganisms, which allowed to arrange strains from the most resistant (S. boulardii) to the most sensitive (B. breve). It must be noted that currently on the market there is no available snack containing probiotic yeast as well as there is no literature data on works on such formulation of food.In the newly developed snack made of chocolate, in which sugar has been replaced with maltitol, a raw material was added in the form of raspberry, prebiotic in the form of inulin and a strain of probiotic bacteria, including the unprecedented so far S. boulardii, which stands a high chance to occupy a good place on the market of functional food.How to cite: Cielecka-Piontek J, Dziedziński M, Szczepaniak O, et al. Survival of commercial probiotic strains and their effect on dark chocolate synbiotic snack with raspberry content during the storage and after simulated digestion. Electron J Biotechnol 2020;48. https://doi.org/10.1016/j.ejbt.2020.09.005.",2020-11-01 +33093168,Tumor Fibroblast-Derived FGF2 Regulates Expression of SPRY1 in Esophageal Tumor-Infiltrating T Cells and Plays a Role in T-cell Exhaustion.,"T-cell exhaustion was initially identified in chronic infection in mice and was subsequently described in humans with cancer. Although the distinct signature of exhausted T (TEX) cells in cancer has been well investigated, the molecular mechanism of T-cell exhaustion in cancer is not fully understood. Using single-cell RNA sequencing, we report here that TEX cells in esophageal cancer are more heterogeneous than previously clarified. Sprouty RTK signaling antagonist 1 (SPRY1) was notably enriched in two subsets of exhausted CD8+ T cells. When overexpressed, SPRY1 impaired T-cell activation by interacting with CBL, a negative regulator of ZAP-70 tyrosine phosphorylation. Data from the Tumor Immune Estimation Resource revealed a strong correlation between FGF2 and SPRY1 expression in esophageal cancer. High expression of FGF2 was evident in fibroblasts from esophageal cancer tissue and correlated with poor overall survival. In vitro administration of FGF2 significantly upregulated expression of SPRY1 in CD8+ T cells and attenuated T-cell receptor-triggered CD8+ T-cell activation. A mouse tumor model confirmed that overexpression of FGF2 in fibroblasts significantly upregulated SPRY1 expression in TEX cells, impaired T-cell cytotoxic activity, and promoted tumor growth. Thus, these findings identify FGF2 as an important regulator of SPRY1 expression involved in establishing the dysfunctional state of CD8+ T cells in esophageal cancer. SIGNIFICANCE: These findings reveal FGF2 as an important regulator of SPRY1 expression involved in establishing the dysfunctional state of CD8+ T cells and suggest that inhibition of FGF2 has potential clinical value in ESCC. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/24/5583/F1.large.jpg.",2020-10-22 +,COVID 19 and Hemoglobinopathies: Update of the Italian Experience,"Background. Patients with pre-existent chronic morbidities are likely to be more severely affected by SARS-Cov2 infection. In Italy, the “Società Italiana Talassemie ed Emoglobinopatie” (SITE) has recently estimated the number of patients (Pts) with Hemoglobinopathies followed by Italian Specialized Centers (SITE Network). Five thousand Transfusion-dependent beta-thalassemia (TDT), 1900 Non-Transfusion-dependent beta-thalassemia (NTDT) and 2000 Sickle Cell Disease (SCD) were registered [1]. To verify the impact of SARS-CoV-2 infection on Pts with Hemoglobinopathies, we performed a specific survey by electronic Case Report Form (eCRF). Inclusion criteria included positive swab or serology in a patient with hemoglobinopathy and at least 15 days of follow-up from either the onset of symptoms or SARS-CoV2 positivity. The survey was approved by the Ethics Committee, and eCRF was shared with the Centers of Italian Hemoglobinopathies Network. Preliminary data updated to April 10, 2020, were published [2]. Results. As of July 31, 2020, 27 cases have been reported: 18 TDT, 4 NTDT, 5 SCD. 89% of the cases were in Northern Italy, where the rate of infection was much higher than the rest of the country, reflecting the national epidemiology. The mean age of thalassemia patients (TDT and NTDT) was 43±11 years, and 55% were male; the mean age of SCD patients was 33±15 years, and 40% was male. The likely source of infection has been detected in 63% (17/27) of cases: 11 had occupational exposure, 6 had a positive relative. Five patients were asymptomatic: for them, the SARS-CoV-2 infection was identified by positive swab for 1 patient and by positive level of IgG for 4. Twenty patients had associated comorbidities, 14 were splenectomized, and 3 had functional asplenia. Eleven patients were hospitalized, only one in high-intensity care unit. Three patients required more intensive ventilation support with continuous positive airway pressure (CPAP), one of these has a history of diffuse large B-cell lymphoma treated with chemotherapy in the previous year. Three other patients required support by oxygen. No Pts required intubation. Two Pts increased blood requirement. Only five received supposedly specific treatment for COVID-19: two hydroxychloroquine (HCQ), one HCQ plus ritonavir/darunavir, and one HCQ plus anakinra, one HCQ plus Tocilizumab plus Lopinavir/Ritonavir. The clinical course of hospitalized patients was 18±7 days. All patients recovered. Conclusions. The prevalence of COVID-19 infection in Italian patients with Hemoglobinopathies result 0,3% while in general population the prevalence in Italy is 0,4% [3]. Considering that the thalassemia population is more strictly observed, we could postulate that the precautions suggested or self-applied by the Pts were effective. No death nor severe SARS with intubation, nor signs of cytokines storm, only one thromboembolic event was observed although most individuals had pre-existing complications. A single case with pulmonary hypertension has been described in detail [4]. In most individuals the infection has been pauci or asymptomatic and all recovered. This experience differs from what has been observed in Iran on a similar series with different severity and mortality and ask for a more in-depth comparison [5]. In conclusion, our data do not indicate increased severity of COVID-19 in Pts with Hemoglobinopathies followed in Specialized Centers. Acknowledgment. We would like to thank ALT (Associazione per la Lotta alla Talassemia R.Vullo - Ferrara).. References 1. http://www.site-italia.org/2020/covid-19.php. SITE communication. Accessed April 1, 2020 2. Motta I, Migone De Amicis M, Pinto VM, et al. SARS-CoV-2 infection in beta thalassemia: Preliminary data from the Italian experience. Am J Hematol. 2020;95(8): E198-E199. 3. https://www.epicentro.iss.it/coronavirus/sars-cov-2-dashboard, Accessed July 31, 2020 4. Pinto VM, Derchi GE, Bacigalupo L, Pontali E, Forni GL. COVID-19 in a Patient with β-Thalassemia Major and Severe Pulmonary Arterial Hypertension. Hemoglobin. 2020;44(3):218-220. 5. Karimi M, Haghpanah S, Azarkeivan A, et al. Prevalence and mortality in β-thalassaemias due to outbreak of novel coronavirus disease (COVID-19): the nationwide Iranian experience. Br J Haematol. 2020;190(3):e137-e140.

Disclosures

Motta:Sanofi Genzyme: Honoraria. Cappellini:BMS: Honoraria; Genzyme/Sanofi: Honoraria, Membership on an entity's Board of Directors or advisory committees; CRISPR Therapeutics, Novartis, Vifor Pharma: Membership on an entity's Board of Directors or advisory committees. Piga:BMS: Research Funding; Novartis: Research Funding. Forni:Novartis: Membership on an entity's Board of Directors or advisory committees.",2020-11-05 +30357384,BitterDB: taste ligands and receptors database in 2019.,"BitterDB (http://bitterdb.agri.huji.ac.il) was introduced in 2012 as a central resource for information on bitter-tasting molecules and their receptors. The information in BitterDB is frequently used for choosing suitable ligands for experimental studies, for developing bitterness predictors, for analysis of receptors promiscuity and more. Here, we describe a major upgrade of the database, including significant increase in content as well as new features. BitterDB now holds over 1000 bitter molecules, up from the initial 550. When available, quantitative sensory data on bitterness intensity as well as toxicity information were added. For 270 molecules, at least one associated bitter taste receptor (T2R) is reported. The overall number of ligand-T2R associations is now close to 800. BitterDB was extended to several species: in addition to human, it now holds information on mouse, cat and chicken T2Rs, and the compounds that activate them. BitterDB now provides a unique platform for structure-based studies with high-quality homology models, known ligands, and for the human receptors also data from mutagenesis experiments, information on frequently occurring single nucleotide polymorphisms and links to expression levels in different tissues.",2019-01-01 +33199414,Acute Kidney Injury in a National Cohort of Hospitalized US Veterans with COVID-19.,"

Background and objectives

Coronavirus disease 2019 (COVID-19) is associated with higher risk of AKI. We aimed to describe rates and characterize predictors and health outcomes associated with AKI in a national cohort of US veterans hospitalized with COVID-19.

Design, setting, participants, & measurements

In a cohort of 5216 US veterans hospitalized with COVID-19 identified through July 23, 2020, we described changes in serum creatinine and examined predictors of AKI and the associations between AKI, health resource utilization, and death, utilizing logistic regressions. We characterized geographic and temporal variations in AKI rates and estimated variance explained by key variables utilizing Poisson regressions.

Results

In total, 1655 (32%) participants had AKI; 961 (58%), 223 (13%), and 270 (16%) met Kidney Disease Improving Global Outcomes definitions of stage 1, 2, and 3 AKI, respectively, and 201 (12%) received KRT. Eight percent of participants had AKI within 1 day of hospitalization, and 47% did not recover to baseline serum creatinine by discharge. Older age, Black race, male gender, obesity, diabetes, hypertension, and lower eGFR were significant predictors of AKI during hospitalization with COVID-19. AKI was associated with higher mechanical ventilation use (odds ratio, 6.46; 95% confidence interval, 5.52 to 7.57) and longer hospital stay (5.56 additional days; 95% confidence interval, 4.78 to 6.34). AKI was also associated with higher risk of death (odds ratio, 6.71; 95% confidence interval, 5.62 to 8.04); this association was stronger in Blacks (P value of interaction <0.001). Hospital-level rates of AKI exhibited substantial geographic variability, ranging from 10% to 56%. Between March and July 2020, AKI rates declined from 40% to 27%; proportions of AKI stage 3 and AKI requiring KRT decreased from 44% to 17%. Both geographic and temporal variabilities were predominately explained by percentages of Blacks (31% and 49%, respectively).

Conclusions

AKI is common during hospitalization with COVID-19 and associated with higher risk of health care resource utilization and death. Nearly half of patients with AKI did not recover to baseline by discharge. Substantial geographic variation and temporal decline in rates and severity of AKI were observed.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_11_16_CJN09610620_final.mp3.",2020-11-16 +33630388,Crystal structure of the sugar acid-binding protein CxaP from a TRAP transporter in Advenella mimigardefordensis strain DPN7T.,"Recently, CxaP, a sugar acid substrate binding protein (SBP) from Advenella mimigardefordensis strain DPN7T , was identified as part of a novel sugar uptake strategy. In the present study, the protein was successfully crystallized. Although several SBP structures of tripartite ATP-independent periplasmic transporters have already been solved, this is the first structure of an SBP accepting multiple sugar acid ligands. Protein crystals were obtained with bound d-xylonic acid, d-fuconic acid d-galactonic and d-gluconic acid, respectively. The protein shows the typical structure of an SBP of a tripartite ATP-independent periplasmic transporter consisting of two domains linked by a hinge and spanned by a long α-helix. By analysis of the structure, the substrate binding site of the protein was identified. The carboxylic group of the sugar acids interacts with Arg175, whereas the coordination of the hydroxylic groups at positions C2 and C3 is most probably realized by Arg154 and Asn151. Furthermore, it was observed that 2-keto-3-deoxy-d-gluconic acid is bound in protein crystals that were crystallized without the addition of any ligand, indicating that this molecule is prebound to the protein and is displaced by the other ligands if they are available. DATABASE: Structural data of CxaP complexes are available in the worldwide Protein Data Bank (https://www.rcsb.org) under the accession codes 7BBR (2-keto-3-deoxy-d-gluconic acid), 7BCR (d-galactonic acid), 7BCN (d-xylonic acid), 7BCO (d-fuconic acid) and 7BCP (d-gluconic acid).",2021-03-11 +34594439,Online Tools for Teaching Cancer Bioinformatics. ,"The rise of deep molecular characterization with omics data as a standard in biological sciences has highlighted a need for expanded instruction in bioinformatics curricula. Many large biology data sets are publicly available and offer an incredible opportunity for educators to help students explore biological phenomena with computational tools, including data manipulation, visualization, and statistical assessment. However, logistical barriers to data access and integration often complicate their use in undergraduate education. Here, we present a cancer bioinformatics module that is designed to overcome these barriers through six exercises containing authentic, biologically motivated computational exercises that demonstrate how modern omics data are used in precision oncology. Upper-division undergraduate students develop advanced Python programming and data analysis skills with real-world oncology data which integrates proteomics and genomics. The module is publicly available and open source at https://paynelab.github.io/biograder/bio462. These hands-on activities include explanatory text, code demonstrations, and practice problems and are ready to implement in bioinformatics courses.",2021-08-31 +33152070,Genenames.org: the HGNC and VGNC resources in 2021.,"The HUGO Gene Nomenclature Committee (HGNC) based at EMBL's European Bioinformatics Institute (EMBL-EBI) assigns unique symbols and names to human genes. There are over 42,000 approved gene symbols in our current database of which over 19 000 are for protein-coding genes. While we still update placeholder and problematic symbols, we are working towards stabilizing symbols where possible; over 2000 symbols for disease associated genes are now marked as stable in our symbol reports. All of our data is available at the HGNC website https://www.genenames.org. The Vertebrate Gene Nomenclature Committee (VGNC) was established to assign standardized nomenclature in line with human for vertebrate species lacking their own nomenclature committee. In addition to the previous VGNC core species of chimpanzee, cow, horse and dog, we now name genes in cat, macaque and pig. Gene groups have been added to VGNC and currently include two complex families: olfactory receptors (ORs) and cytochrome P450s (CYPs). In collaboration with specialists we have also named CYPs in species beyond our core set. All VGNC data is available at https://vertebrate.genenames.org/. This article provides an overview of our online data and resources, focusing on updates over the last two years.",2021-01-01 +31516400,N-GlycositeAtlas: a database resource for mass spectrometry-based human N-linked glycoprotein and glycosylation site mapping.,"

Background

N-linked glycoprotein is a highly interesting class of proteins for clinical and biological research. The large-scale characterization of N-linked glycoproteins accomplished by mass spectrometry-based glycoproteomics has provided valuable insights into the interdependence of glycoprotein structure and protein function. However, these studies focused mainly on the analysis of specific sample type, and lack the integration of glycoproteomic data from different tissues, body fluids or cell types.

Methods

In this study, we collected the human glycosite-containing peptides identified through their de-glycosylated forms by mass spectrometry from over 100 publications and unpublished datasets generated from our laboratory. A database resource termed N-GlycositeAtlas was created and further used for the distribution analyses of glycoproteins among different human cells, tissues and body fluids. Finally, a web interface of N-GlycositeAtlas was created to maximize the utility and value of the database.

Results

The N-GlycositeAtlas database contains more than 30,000 glycosite-containing peptides (representing > 14,000 N-glycosylation sites) from more than 7200 N-glycoproteins from different biological sources including human-derived tissues, body fluids and cell lines from over 100 studies.

Conclusions

The entire human N-glycoproteome database as well as 22 sub-databases associated with individual tissues or body fluids can be downloaded from the N-GlycositeAtlas website at http://nglycositeatlas.biomarkercenter.org.",2019-09-07 +31861980,BISR-RNAseq: an efficient and scalable RNAseq analysis workflow with interactive report generation.,"

Background

RNA sequencing has become an increasingly affordable way to profile gene expression patterns. Here we introduce a workflow implementing several open-source softwares that can be run on a high performance computing environment.

Results

Developed as a tool by the Bioinformatics Shared Resource Group (BISR) at the Ohio State University, we have applied the pipeline to a few publicly available RNAseq datasets downloaded from GEO in order to demonstrate the feasibility of this workflow. Source code is available here: workflow: https://code.bmi.osumc.edu/gadepalli.3/BISR-RNAseq-ICIBM2019 and shiny: https://code.bmi.osumc.edu/gadepalli.3/BISR_RNASeq_ICIBM19. Example dataset is demonstrated here: https://dataportal.bmi.osumc.edu/RNA_Seq/.

Conclusion

The workflow allows for the analysis (alignment, QC, gene-wise counts generation) of raw RNAseq data and seamless integration of quality analysis and differential expression results into a configurable R shiny web application.",2019-12-20 +34647622,Prediction of in-hospital mortality with machine learning for COVID-19 patients treated with steroid and remdesivir.,"We aimed to create the prediction model of in-hospital mortality using machine learning methods for patients with coronavirus disease 2019 (COVID-19) treated with steroid and remdesivir. We reviewed 1571 hospitalized patients with laboratory confirmed COVID-19 from the Mount Sinai Health System treated with both steroids and remdesivir. The important variables associated with in-hospital mortality were identified using LASSO (least absolute shrinkage and selection operator) and SHAP (SHapley Additive exPlanations) through the light gradient boosting model (GBM). The data before February 17th, 2021 (N = 769) was randomly split into training and testing datasets; 80% versus 20%, respectively. Light GBM models were created with train data and area under the curves (AUCs) were calculated. Additionally, we calculated AUC with the data between February 17th, 2021 and March 30th, 2021 (N = 802). Of the 1571 patients admitted due to COVID-19, 331 (21.1%) died during hospitalization. Through LASSO and SHAP, we selected six important variables; age, hypertension, oxygen saturation, blood urea nitrogen, intensive care unit admission, and endotracheal intubation. AUCs using training and testing datasets derived from the data before February 17th, 2021 were 0.871/0.911. Additionally, the light GBM model has high predictability for the latest data (AUC: 0.881) (https://risk-model.herokuapp.com/covid). A high-value prediction model was created to estimate in-hospital mortality for COVID-19 patients treated with steroid and remdesivir.",2021-10-22 +33244000,Retrospective assessment of rat liver microsomal stability at NCATS: data and QSAR models.,"Hepatic metabolic stability is a key pharmacokinetic parameter in drug discovery. Metabolic stability is usually assessed in microsomal fractions and only the best compounds progress in the drug discovery process. A high-throughput single time point substrate depletion assay in rat liver microsomes (RLM) is employed at the National Center for Advancing Translational Sciences. Between 2012 and 2020, RLM stability data was generated for ~ 24,000 compounds from more than 250 projects that cover a wide range of pharmacological targets and cellular pathways. Although a crucial endpoint, little or no data exists in the public domain. In this study, computational models were developed for predicting RLM stability using different machine learning methods. In addition, a retrospective time-split validation was performed, and local models were built for projects that performed poorly with global models. Further analysis revealed inherent medicinal chemistry knowledge potentially useful to chemists in the pursuit of synthesizing metabolically stable compounds. In addition, we deposited experimental data for ~ 2500 compounds in the PubChem bioassay database (AID: 1508591). The global prediction models are made publicly accessible ( https://opendata.ncats.nih.gov/adme ). This is to the best of our knowledge, the first publicly available RLM prediction model built using high-quality data generated at a single laboratory.",2020-11-26 +29136208,Mouse Phenome Database: an integrative database and analysis suite for curated empirical phenotype data from laboratory mice.,"The Mouse Phenome Database (MPD; https://phenome.jax.org) is a widely used resource that provides access to primary experimental trait data, genotypic variation, protocols and analysis tools for mouse genetic studies. Data are contributed by investigators worldwide and represent a broad scope of phenotyping endpoints and disease-related traits in naïve mice and those exposed to drugs, environmental agents or other treatments. MPD houses individual animal data with detailed, searchable protocols, and makes these data available to other resources via API. MPD provides rigorous curation of experimental data and supporting documentation using relevant ontologies and controlled vocabularies. Most data in MPD are from inbreds and other reproducible strains such that the data are cumulative over time and across laboratories. The resource has been expanded to include the QTL Archive and other primary phenotype data from mapping crosses as well as advanced high-diversity mouse populations including the Collaborative Cross and Diversity Outbred mice. Furthermore, MPD provides a means of assessing replicability and reproducibility across experimental conditions and protocols, benchmarking assays in users' own laboratories, identifying sensitized backgrounds for making new mouse models with genome editing technologies, analyzing trait co-inheritance, finding the common genetic basis for multiple traits and assessing sex differences and sex-by-genotype interactions.",2018-01-01 +33521625,Planning for monitoring the introduction and effectiveness of new vaccines using real-word data and geospatial visualization: An example using rotavirus vaccines with potential application to SARS-CoV-2.,"

Background

Infectious diseases continue to cause significant impact on human health. Vaccines are instrumental in preventing infectious diseases and mitigating pandemics and epidemics. SARS-CoV-2 is the most recent example of an urgent pandemic that requires the development of vaccines. This study combined real-world data and geospatial visualization techniques to demonstrate methods to monitor and communicate the uptake and impact of existing and new vaccines.

Methods

Observational data of existing pediatric rotavirus vaccines were used as an example. A large US national insurance claims database was accessed to build an analytic dataset for a 20-year period (1996-2017). For each week and multiple geographic scales, animated spatial and non-spatial visualization techniques were applied to demonstrate changes in seasonal rotavirus epidemic curves and population-based disease rates before, during, and after vaccine introduction in 2006. The geographic scales included national, state, county and zip code tabulation areas. An online web-based digital atlas was built to display either continuous or snapshot visualizations of disease patterns, vaccine uptake, and improved health outcomes after vaccination (http://www.mapvaccines.com).

Results

Over 17 million zip code-weeks of data were available for analysis. The animations show geospatial patterns of rotavirus-related medical encounter rates peaking every year from November - February prior to vaccine availability in 2006. Visualizations showed increasing vaccination coverage rates at all geographic scales over time. Declines in medical encounter rates accelerated as vaccination coverage rapidly increased after 2010. The data maps also identified geographic hotspots with low vaccination rates and persistent disease rates.

Conclusion

This project developed novel web-based methods to communicate location and time-based vaccine uptake and the related reduction in medical visits due to viral infection. Future applications of the visualization could be used by health agencies to monitor known or novel disease patterns over time in conjunction with close assessment of current and future vaccine utilization.",2021-01-09 +,Genome analyses of the new model protist Euplotes vannus focusing on genome rearrangement and resistance to environmental stressors,"As a model organism for studies of cell and environmental biology, the free‐living and cosmopolitan ciliate Euplotes vannus shows intriguing features like dual genome architecture (i.e., separate germline and somatic nuclei in each cell/organism), “gene‐sized” chromosomes, stop codon reassignment, programmed ribosomal frameshifting (PRF) and strong resistance to environmental stressors. However, the molecular mechanisms that account for these remarkable traits remain largely unknown. Here we report a combined analysis of de novo assembled high‐quality macronuclear (MAC; i.e., somatic) and partial micronuclear (MIC; i.e., germline) genome sequences for E. vannus, and transcriptome profiling data under varying conditions. The results demonstrate that: (a) the MAC genome contains more than 25,000 complete “gene‐sized” nanochromosomes (~85 Mb haploid genome size) with the N50 ~2.7 kb; (b) although there is a high frequency of frameshifting at stop codons UAA and UAG, we did not observe impaired transcript abundance as a result of PRF in this species as has been reported for other euplotids; (c) the sequence motif 5′‐TA‐3′ is conserved at nearly all internally‐eliminated sequence (IES) boundaries in the MIC genome, and chromosome breakage sites (CBSs) are duplicated and retained in the MAC genome; (d) by profiling the weighted correlation network of genes in the MAC under different environmental stressors, including nutrient scarcity, extreme temperature, salinity and the presence of ammonia, we identified gene clusters that respond to these external physical or chemical stimulations, and (e) we observed a dramatic increase in HSP70 gene transcription under salinity and chemical stresses but surprisingly, not under temperature changes; we link this temperature‐resistance to the evolved loss of temperature stress‐sensitive elements in regulatory regions. Together with the genome resources generated in this study, which are available online at Euplotes vannus Genome Database (http://evan.ciliate.org), these data provide molecular evidence for understanding the unique biology of highly adaptable microorganisms.",2019-09-01 +30728743,"Photo images, 3D/CT data and mtDNA of the freshwater mussels (Bivalvia: Unionidae) in the Kyushu and Ryukyu Islands, Japan, with SEM/EDS analysis of the shell.","

Background

Freshwater mussels (Bivalvia: Unionidae), which are keystone species of freshwater ecosystems, are in global decline. In addition to ecological/genetic studies, morphological examinations are needed to help provide information for the development of additional freshwater mussel studies and eventually conservation efforts for freshwater ecosystems.The microscopic structure, which can be obtained by scanning electron microscopy (SEM) and elemental composition, which can be obtained with energy dispersive X-ray spectrometry (EDS), of mollusc shells are of interest to malacologists. However, information about freshwater mussels is still limited.Kyushu Island is the southernmost island of the four major islands of Japan. Kyushu Island is a hotspot of bitterling fishes in Japan, which simultaneously means that the island is a hotspot of freshwater mussels. The Ryukyu Islands stretch southwest from Kyushu Island to Taiwan; a freshwater mussel of unknown origin was reported from the Ryukyu Islands.Digital archiving for biology and ecology is a continuing challenge for open science. This data paper describes online published photo images, 3D/CT and mtDNA data and SEM/EDS analyses of the shell of freshwater mussels that inhabit the Kyushu and Ryukyu Islands, Japan. Our data will provide basic information regarding freshwater biology and be of public interest as open science.

New information

Photo images, 3D/CT data, mtDNA data, SEM images and EDS elemental analysis of freshwater mussels that inhabit the Kyushu and Ryukyu Islands (61 individuals, nine species/subspecies) were published online in a local database (http://ffish.asia/Unionidae3D), GBIF (http://ipt.pensoft.net/resource?r=unionidae3d) and DDBJ/EMBL/Genbank (LC431810-LC431840).",2019-01-28 +33125079,Cyanorak v2.1: a scalable information system dedicated to the visualization and expert curation of marine and brackish picocyanobacteria genomes.,"Cyanorak v2.1 (http://www.sb-roscoff.fr/cyanorak) is an information system dedicated to visualizing, comparing and curating the genomes of Prochlorococcus, Synechococcus and Cyanobium, the most abundant photosynthetic microorganisms on Earth. The database encompasses sequences from 97 genomes, covering most of the wide genetic diversity known so far within these groups, and which were split into 25,834 clusters of likely orthologous groups (CLOGs). The user interface gives access to genomic characteristics, accession numbers as well as an interactive map showing strain isolation sites. The main entry to the database is through search for a term (gene name, product, etc.), resulting in a list of CLOGs and individual genes. Each CLOG benefits from a rich functional annotation including EggNOG, EC/K numbers, GO terms, TIGR Roles, custom-designed Cyanorak Roles as well as several protein motif predictions. Cyanorak also displays a phyletic profile, indicating the genotype and pigment type for each CLOG, and a genome viewer (Jbrowse) to visualize additional data on each genome such as predicted operons, genomic islands or transcriptomic data, when available. This information system also includes a BLAST search tool, comparative genomic context as well as various data export options. Altogether, Cyanorak v2.1 constitutes an invaluable, scalable tool for comparative genomics of ecologically relevant marine microorganisms.",2021-01-01 +34125008,"The ""second wave"" of the COVID-19 pandemic in the Arctic: regional and temporal dynamics.","This article focuses on the ""second wave"" of the COVID-19 pandemic in the Arctic and examines spatiotemporal patterns between July 2020 and January 2021. We analyse available COVID-19 data at the regional (subnational) level to elucidate patterns and typology of Arctic regions with respect to the COVID-19 pandemic. This article builds upon our previous research that examined the early phase of the COVID-19 pandemic between February and July 2020. The pandemic's ""second wave"" observed in the Arctic between September 2020 and January 2021 was severe in terms of COVID-19 infections and fatalities, having particularly strong impacts in Alaska, Northern Russia and Northern Sweden. Based on the spatiotemporal patterns of the ""second wave"" dynamics, we identified 5 types of the pandemic across regions: Shockwaves (Iceland, Faroe Islands, Northern Norway, and Northern Finland), Protracted Waves (Northern Sweden), Tidal Waves (Northern Russia), Tsunami Waves (Alaska), and Isolated Splashes (Northern Canada and Greenland). Although data limitations and gaps persist, monitoring of COVID-19 is critical for developing a proper understanding of the pandemic in order to develop informed and effective responses to the current crisis and possible future pandemics in the Arctic. Data used in this paper are available at https://arctic.uni.edu/arctic-covid-19.",2021-12-01 +,MBRS-48. IDENTIFICATION OF NOVEL THERAPEUTIC APPROACHES FOR MYC-DRIVEN MEDULLOBLASTOMA,"Abstract Medulloblastoma (MB) is the most common malignant brain tumor in children and is frequently metastatic at diagnosis. Treatment with surgery, radiation and multi-agent chemotherapy may leave survivors of these brain tumors with long-term deficits as a consequence. One of the four consensus molecular subgroups of MB is the MYC-driven group 3 MB, which is the most malignant type and has a poor prognosis under current therapy. Thus, it is important to discover more effective targeted therapeutic approaches. We conducted a high-throughput drug screening to identify novel compounds showing efficiency in group 3 MB using both clinically established inhibitors (n=196) and clinically-applicable compounds (n=464). More than 20 compounds demonstrated a significantly higher anti-tumoral effect in MYChigh (n=7) compared to MYClow (n=4) MB cell models. Among these compounds, Navitoclax and Clofarabine showed the strongest effect in inducing cell cycle arrest and apoptosis in MYChigh MB models. Furthermore, we show that Navitoclax, an orally bioavailable and blood-brain barrier passing anti-cancer drug, inhibits specifically Bcl-xL proteins. In line, we found a significant correlation between BCL-xL and MYC mRNA levels in 763 primary MB patient samples (Data source: “R2 https://hgserver1.amc.nl”). In addition, Navitoclax and Clofarabine have been tested in cells obtained from MB patient-derived-xenografts, which confirmed their specific efficacy in MYChigh versus MYClow MB. In summary, our approach has identified promising new drugs that significantly reduce cell viability in MYChigh compared to MYClow MB cell models. Our findings point to novel therapeutic vulnerabilities for MB that need to be further validated in vitro and in vivo.",2020-12-01 +33502045,Genotype-Phenotype Relations for Isolated Dystonia Genes: MDSGene Systematic Review.,"This comprehensive MDSGene review is devoted to 7 genes - TOR1A, THAP1, GNAL, ANO3, PRKRA, KMT2B, and HPCA - mutations in which may cause isolated dystonia. It followed MDSGene's standardized data extraction protocol and screened a total of ~1200 citations. Phenotypic and genotypic data on ~1200 patients with 254 different mutations were curated and analyzed. There were differences regarding age at onset, site of onset, and distribution of symptoms across mutation carriers in all 7 genes. Although carriers of TOR1A, THAP1, PRKRA, KMT2B, or HPCA mutations mostly showed childhood and adolescent onset, patients with GNAL and ANO3 mutations often developed first symptoms in adulthood. GNAL and KMT2B mutation carriers frequently have 1 predominant site of onset, that is, the neck (GNAL) or the lower limbs (KMT2B), whereas site of onset in DYT-TOR1A, DYT-THAP1, DYT-ANO3, DYT-PRKRA, and DYT-HPCA was broader. However, in most DYT-THAP1 and DYT-ANO3 patients, dystonia first manifested in the upper half of the body (upper limb, neck, and craniofacial/laryngeal), whereas onset in DYT-TOR1A, DYT-PRKRA and DYT-HPCA was frequently observed in an extremity, including both upper and lower ones. For ANO3, a segmental/multifocal distribution was typical, whereas TOR1A, PRKRA, KMT2B, and HPCA mutation carriers commonly developed generalized dystonia. THAP1 mutation carriers presented with focal, segmental/multifocal, or generalized dystonia in almost equal proportions. GNAL mutation carriers rarely showed generalization. This review provides a comprehensive overview of the current knowledge of hereditary isolated dystonia. The data are also available in an online database (http://www.mdsgene.org), which additionally offers descriptive summary statistics. © 2021 The Authors. Movement Disorders published by Wiley Periodicals LLC on behalf of International Parkinson and Movement Disorder Society.",2021-01-27 +34878867,Considerations for Improving Reporting and Analysis of Date-Based COVID-19 Surveillance Data by Public Health Agencies.,"More than a year after the first domestic COVID-19 cases, the United States does not have national standards for COVID-19 surveillance data analysis and public reporting. This has led to dramatic variations in surveillance practices among public health agencies, which analyze and present newly confirmed cases by a wide variety of dates. The choice of which date to use should be guided by a balance between interpretability and epidemiological relevance. Report date is easily interpretable, generally representative of outbreak trends, and available in surveillance data sets. These features make it a preferred date for public reporting and visualization of surveillance data, although it is not appropriate for epidemiological analyses of outbreak dynamics. Symptom onset date is better suited for such analyses because of its clinical and epidemiological relevance. However, using symptom onset for public reporting of new confirmed cases can cause confusion because reporting lags result in an artificial decline in recent cases. We hope this discussion is a starting point toward a more standardized approach to date-based surveillance. Such standardization could improve public comprehension, policymaking, and outbreak response. (Am J Public Health. 2021;111(12):2127-2132. https://doi.org/10.2105/AJPH.2021.306520).",2021-12-01 +33125071,The international glycan repository GlyTouCan version 3.0.,"Glycans serve important roles in signaling events and cell-cell communication, and they are recognized by lectins, viruses and bacteria, playing a variety of roles in many biological processes. However, there was no system to organize the plethora of glycan-related data in the literature. Thus GlyTouCan (https://glytoucan.org) was developed as the international glycan repository, allowing researchers to assign accession numbers to glycans. This also aided in the integration of glycan data across various databases. GlyTouCan assigns accession numbers to glycans which are defined as sets of monosaccharides, which may or may not be characterized with linkage information. GlyTouCan was developed to be able to recognize any level of ambiguity in glycans and uniquely assign accession numbers to each of them, regardless of the input text format. In this manuscript, we describe the latest update to GlyTouCan in version 3.0, its usage, and plans for future development.",2021-01-01 +31932804,"Integration of epidemiologic, pharmacologic, genetic and gut microbiome data in a drug-metabolite atlas.","Progress in high-throughput metabolic profiling provides unprecedented opportunities to obtain insights into the effects of drugs on human metabolism. The Biobanking BioMolecular Research Infrastructure of the Netherlands has constructed an atlas of drug-metabolite associations for 87 commonly prescribed drugs and 150 clinically relevant plasma-based metabolites assessed by proton nuclear magnetic resonance. The atlas includes a meta-analysis of ten cohorts (18,873 persons) and uncovers 1,071 drug-metabolite associations after evaluation of confounders including co-treatment. We show that the effect estimates of statins on metabolites from the cross-sectional study are comparable to those from intervention and genetic observational studies. Further data integration links proton pump inhibitors to circulating metabolites, liver function, hepatic steatosis and the gut microbiome. Our atlas provides a tool for targeted experimental pharmaceutical research and clinical trials to improve drug efficacy, safety and repurposing. We provide a web-based resource for visualization of the atlas (http://bbmri.researchlumc.nl/atlas/).",2020-01-13 +34125979,A dynamic nomogram for predicting the risk of asthma: Development and validation in a database study.,"

Background

Asthma remains a serious health problem with increasing prevalence and incidence. This study was to develop and validate a dynamic nomogram for predicting asthma risk.

Methods

Totally 597 subjects whose age ≥18 years old with asthma, an accurate age at first cigarette, and clear smoking status were selected from the National Health and Nutrition Examination Survey (NHANES) database (2013-2018). The dataset was randomly split into the training set and the testing set at a ratio of 4:6. Simple and multiple logistic regressions were used for identifying independent predictors. Then the nomogram was developed and internally validated using data from the testing set. The receiver operator characteristic (ROC) curve was used for assessing the performance of the nomogram.

Results

According to the simple and multiple logistic regressions, smoking ≥40 years, female gender, the age for the first smoking, having close relative with asthma were independently associated with the risk of an asthma attack. The nomogram was thereby developed with the link of https://yanglifen.shinyapps.io/Dynamic_Nomogram_for_Asthma/. The ROC analyses showed an AUC of 0.726 (0.724-0.728) with a sensitivity of 0.887 (0.847-0.928) in the training set, and an AUC of 0.702 (0.700-0.703) with a sensitivity of 0.860 (0.804-0.916) in the testing set, fitting well in calibration curves. Decision curve analysis further confirmed the clinical usefulness of the nomogram.

Conclusion

Our dynamic nomogram could help clinicians to assess the individual probability of asthma attack, which was helpful for improving the treatment and prognosis of asthma.",2021-06-14 +35559777,MESOCOSM: A mesocosm database management system for environmental nanosafety.,"Engineered nanomaterials (ENMs) are intentionally designed and produced by humans to revolutionize the manufacturing sector, such as electronic goods, paints, tires, clothes, cosmetic products, and biomedicine. With the spread of these ENMs in our daily lives, scientific research have generated a huge amount of data related to their potential impacts on human and environment health. To date, these data are gathered in databases mainly focused on the (eco)toxicity and occupational exposure to ENMs. These databases are therefore not suitable to build well-informed environmental exposure scenarios covering the life cycle of ENMs. In this paper, we report the construction of one of the first centralized mesocosm database management system for environmental nanosafety (called MESOCOSM) containing experimental data collected from mesocosm experiments suited for understanding and quantifying both the environmental hazard and exposure. The database, which is publicly available through https://aliayadi.github.io/MESOCOSM-database/, contains 5200 entities covering tens of unique experiments investigating Ag, CeO2, CuO, TiO2-based ENMs as well as nano-enabled products. These entities are divided into different groups i.e. physicochemical properties of ENMS, environmental, exposure and hazard endpoints, and other general information about the mesocosm testing, resulting in more than forty parameters in the database. The MESOCOSM database is equipped with a powerful application, consisting of a graphical user interface (GUI), allowing users to manage and search data using complex queries without relying on programmers. MESOCOSM aims to predict and explain ENMs behavior and fate in different ecosystems as well as their potential impacts on the environment at different stages of the nanoproducts lifecycle. MESOCOSM is expected to benefit the nanosafety community by providing a continuous source of critical information and additional characterization factors for predicting ENMs interactions with the environment and their risks.",2020-12-22 +29528046,Assembled genomic and tissue-specific transcriptomic data resources for two genetically distinct lines of Cowpea ( Vigna unguiculata (L.) Walp).,"Cowpea ( Vigna unguiculata (L.) Walp) is an important legume crop for food security in areas of low-input and smallholder farming throughout Africa and Asia. Genetic improvements are required to increase yield and resilience to biotic and abiotic stress and to enhance cowpea crop performance. An integrated cowpea genomic and gene expression data resource has the potential to greatly accelerate breeding and the delivery of novel genetic traits for cowpea. Extensive genomic resources for cowpea have been absent from the public domain; however, a recent early release reference genome for IT97K-499-35 ( Vigna unguiculata v1.0, NSF, UCR, USAID, DOE-JGI, http://phytozome.jgi.doe.gov/) has now been established in a collaboration between the Joint Genome Institute (JGI) and University California (UC) Riverside. Here we release supporting genomic and transcriptomic data for two transformable cowpea varieties, IT97K-499-35 and IT86D-1010. The transcriptome resource includes six tissue-specific datasets for each variety, with particular emphasis on reproductive tissues that extend and support the V. unguiculata v1.0 reference. Annotations have been included in our resource to allow direct mapping to the v1.0 cowpea reference. The resource described here is supported by downloadable raw and assembled sequence data.",2018-06-18 +32637336,"Follow, select and assemble method FSAM for VE and re assembly.","We modified the methods by Ballard et al. [1]. They sought to study eye-hand coordination strategies and created both a real world and a virtual model copying task consisting of three different areas: model, source, and workspace. Participants followed a pickup and place exercise and used a mouse to control stimuli presented on a computer monitor in the virtual task. We also considered the method presented by Hayhoe et al. [2] and Aivar et al. [3] who designed a similar model copying task in a 3D virtual environment. Stimuli were displayed in a head mounted display and participants held a motion sensor to select and move virtual objects in 3D space. Moreover, Aivar et al. [3] also included extra assembly pieces and a variation in the position of the assembly pieces located in the resource area. • It proposes an assembly task designed at a 1:1 scale for two environments, real and virtual environments. • It introduces a reading sequence for the model that is being replicated and it also introduces distractor assembly blocks with similar colors and shapes as the required assembly blocks, and a change in the location for all assembly blocks in the resource area. • It modifies the interaction for the VR environment by using hand gestures to select, move and position virtual assembly blocks. This was possible by incorporating a LEAP® motion controller which although it does not provide haptic feedback, it provides a virtual representation of the participant's hand. Our VE also includes visual and auditory feedback to guide depth perception and virtual control. The software used for this research study is available at: http://virtualete.com/research/fsam.php.",2020-06-19 +34525215,Four key challenges in the open-data revolution.,"In Focus: Culina, A., Adriaensen, F., Bailey, L. D., et al. (2021) Connecting the data landscape of long-term ecological studies: The SPI-Birds data hub. Journal of Animal Ecology, https://doi.org/10.1111/1365-2656.13388. Long-term, individual-based datasets have been at the core of many key discoveries in ecology, and calls for the collection, curation and release of these kinds of ecological data are contributing to a flourishing open-data revolution in ecology. Birds, in particular, have been the focus of international research for decades, resulting in a number of uniquely long-term studies, but accessing these datasets has been historically challenging. Culina et al. (2021) introduce an online repository of individual-level, long-term bird records with ancillary data (e.g. genetics), which will enable key ecological questions to be answered on a global scale. As well as these opportunities, however, we argue that the ongoing open-data revolution comes with four key challenges relating to the (1) harmonisation of, (2) biases in, (3) expertise in and (4) communication of, open ecological data. Here, we discuss these challenges and how key efforts such as those by Culina et al. are using FAIR (Findable, Accessible, Interoperable and Reproducible) principles to overcome them. The open-data revolution will undoubtedly reshape our understanding of ecology, but with it the ecological community has a responsibility to ensure this revolution is ethical and effective.",2021-09-01 +32193291,LncSpA: LncRNA Spatial Atlas of Expression across Normal and Cancer Tissues.,"Long noncoding RNAs (lncRNA) play important roles in maintaining morphology and function of tissues, and their regulatory effectiveness is closely associated with spatial expression. To provide a comprehensive spatial atlas of expression for lncRNA, we propose LncSpA (http://bio-bigdata.hrbmu.edu.cn/LncSpA) to explore tissue-elevated (TE) lncRNA across human normal and adult and pediatric cancer tissues. In total, 71,131 and 12,007 TE lncRNAs and 634 clinical-related TE lncRNAs were identified across 38 normal and 33 adult cancer tissues. Moreover, 4,688 TE and 413 clinical-related lncRNAs were identified in pediatric cancer. By quick searching or query options, users can obtain eight major types of detailed information for lncRNA via various visualization techniques, including qualitative and quantitative spatial expression in different resources, coexpressed mRNAs, predicted function, known disease association, and the potential to serve as diagnostic or prognostic markers. LncSpA will be a valuable resource to understand lncRNA functions across tissues and cancers, leading to enhanced therapeutic strategies in precision oncology. SIGNIFICANCE: LncSpA is a new interactive resource that provides the spatial expression pattern of lncRNA across thousands of normal and cancer samples representing major tissue types.",2020-03-19 +28368827,BRANE Clust: Cluster-Assisted Gene Regulatory Network Inference Refinement.,"Discovering meaningful gene interactions is crucial for the identification of novel regulatory processes in cells. Building accurately the related graphs remains challenging due to the large number of possible solutions from available data. Nonetheless, enforcing a priori on the graph structure, such as modularity, may reduce network indeterminacy issues. BRANE Clust (Biologically-Related A priori Network Enhancement with Clustering) refines gene regulatory network (GRN) inference thanks to cluster information. It works as a post-processing tool for inference methods (i.e., CLR, GENIE3). In BRANE Clust, the clustering is based on the inversion of a system of linear equations involving a graph-Laplacian matrix promoting a modular structure. Our approach is validated on DREAM4 and DREAM5 datasets with objective measures, showing significant comparative improvements. We provide additional insights on the discovery of novel regulatory or co-expressed links in the inferred Escherichia coli network evaluated using the STRING database. The comparative pertinence of clustering is discussed computationally (SIMoNe, WGCNA, X-means) and biologically (RegulonDB). BRANE Clust software is available at: http://www-syscom.univ-mlv.fr/~pirayre/Codes-GRN-BRANE-clust.html.",2017-03-28 +33502860,InterMetalDB: A Database and Browser of Intermolecular Metal Binding Sites in Macromolecules with Structural Information.,"InterMetalDB is a free-of-charge database and browser of intermolecular metal binding sites that are present on the interfaces of macromolecules forming larger assemblies based on structural information deposited in Protein Data Bank (PDB). It can be found and freely used at https://intermetaldb.biotech.uni.wroc.pl/. InterMetalDB collects the interfacial binding sites with involvement of metal ions and clusters them on the basis of 50% sequence similarity and the nearest metal environment (5 Å radius). The data are available through the web interface where they can be queried, viewed, and downloaded. Complexity of the query depends on the user, because the questions in the query are connected with each other by a logical AND. InterMetalDB offers several useful options for filtering records including searching for structures by particular parameters such as structure resolution, structure description, and date of deposition. Records can be filtered by coordinated metal ion, number of bound amino acid residues, coordination sphere, and other features. InterMetalDB is regularly updated and will continue to be regularly updated with new content in the future. InterMetalDB is a useful tool for all researchers interested in metalloproteins, protein engineering, and metal-driven oligomerization.",2021-01-27 +29764375,The aquatic animals' transcriptome resource for comparative functional analysis.,"BACKGROUND:Aquatic animals have great economic and ecological importance. Among them, non-model organisms have been studied regarding eco-toxicity, stress biology, and environmental adaptation. Due to recent advances in next-generation sequencing techniques, large amounts of RNA-seq data for aquatic animals are publicly available. However, currently there is no comprehensive resource exist for the analysis, unification, and integration of these datasets. This study utilizes computational approaches to build a new resource of transcriptomic maps for aquatic animals. This aquatic animal transcriptome map database dbATM provides de novo assembly of transcriptome, gene annotation and comparative analysis of more than twenty aquatic organisms without draft genome. RESULTS:To improve the assembly quality, three computational tools (Trinity, Oases and SOAPdenovo-Trans) were employed to enhance individual transcriptome assembly, and CAP3 and CD-HIT-EST software were then used to merge these three assembled transcriptomes. In addition, functional annotation analysis provides valuable clues to gene characteristics, including full-length transcript coding regions, conserved domains, gene ontology and KEGG pathways. Furthermore, all aquatic animal genes are essential for comparative genomics tasks such as constructing homologous gene groups and blast databases and phylogenetic analysis. CONCLUSION:In conclusion, we establish a resource for non model organism aquatic animals, which is great economic and ecological importance and provide transcriptomic information including functional annotation and comparative transcriptome analysis. The database is now publically accessible through the URL http://dbATM.mbc.nctu.edu.tw/ .",2018-05-09 +34305236,The Interrelationship between Liver Function Test and the Coronavirus Disease 2019: A Systematic Review and Meta-Analysis.,"

Background

The outbreak of the coronavirus disease-2019 (COVID-19) has become a global public health challenge. Assessing the effect of COVID-19 on liver injury is of great importance. A systematic review and meta-analysis were conducted to establish the characteristics of liver function tests in COVID-19 patients.

Methods

A systematic search of publications from December 2019 up to April 2020 in Web of Science, Scopus, and Medline (via PubMed) databases was performed. Both cross-sectional and case series studies reporting an association between liver injury and COVID-19 infection were included. The data were analyzed using the STATA software (version 11.0) and the random-effects model for I2>50% was used to pool the results.

Results

In this meta-analysis, 42 articles comprising a total of 6,557 COVID-19 patients were studied. The prevalence of increase in alanine aminotransferase (ALT) and aspartate aminotransferase (AST) levels was 30% and 21% in non-severe patients and 38% and 48% in severe patients, respectively. Patients with severe COVID-19 infection were 4.22, 4.96, and 4.13 times more likely to have elevated AST, ALT, and lactate dehydrogenase (LDH) levels, respectively.

Conclusion

Elevation in liver function tests was higher in patients with severe than non-severe COVID-19 infection. Given the widespread use of drugs that increases the risk of hepatotoxicity, healthcare providers should be aware of changes in liver enzymes in COVID-19 patients. The inclusion of other studies from outside China could confirm the pattern of elevation in liver function tests in COVID-19 patients across the globe. Preprint of this article is available on medRxiv, https://www.medrxiv.org/content/10.1101/2020.05.20.20108357v1.",2021-07-01 +34565441,miRkit: R framework analyzing miRNA PCR array data.,"

Objective

The characterization of microRNAs (miRNA) in recent years is an important advance in the field of gene regulation. To this end, several approaches for miRNA expression analysis and various bioinformatics tools have been developed over the last few years. It is a common practice to analyze miRNA PCR Array data using the commercially available software, mostly due to its convenience and ease-of-use.

Results

In this work we present miRkit, an open source framework written in R, that allows for the comprehensive analysis of RT-PCR data, from the processing of raw data to a functional analysis of the produced results. The main goal of the proposed tool is to provide an assessment of the samples' quality, perform data normalization by endogenous and exogenous miRNAs, and facilitate differential and functional enrichment analysis. The tool offers fast execution times with low memory usage, and is freely available under a ΜΙΤ license from https://bio.tools/mirkit . Overall, miRkit offers the full analysis from the raw RT-PCR data to functional analysis of targeted genes, and specifically designed to support the popular miScript miRNA PCR Array (Qiagen) technology.",2021-09-26 +33094321,VPTMdb: a viral posttranslational modification database. ,"In viruses, posttranslational modifications (PTMs) are essential for their life cycle. Recognizing viral PTMs is very important for a better understanding of the mechanism of viral infections and finding potential drug targets. However, few studies have investigated the roles of viral PTMs in virus-human interactions using comprehensive viral PTM datasets. To fill this gap, we developed the first comprehensive viral posttranslational modification database (VPTMdb) for collecting systematic information of PTMs in human viruses and infected host cells. The VPTMdb contains 1240 unique viral PTM sites with 8 modification types from 43 viruses (818 experimentally verified PTM sites manually extracted from 150 publications and 422 PTMs extracted from SwissProt) as well as 13 650 infected cells' PTMs extracted from seven global proteomics experiments in six human viruses. The investigation of viral PTM sequences motifs showed that most viral PTMs have the consensus motifs with human proteins in phosphorylation and five cellular kinase families phosphorylate more than 10 viral species. The analysis of protein disordered regions presented that more than 50% glycosylation sites of double-strand DNA viruses are in the disordered regions, whereas single-strand RNA and retroviruses prefer ordered regions. Domain-domain interaction analysis indicating potential roles of viral PTMs play in infections. The findings should make an important contribution to the field of virus-human interaction. Moreover, we created a novel sequence-based classifier named VPTMpre to help users predict viral protein phosphorylation sites. VPTMdb online web server (http://vptmdb.com:8787/VPTMdb/) was implemented for users to download viral PTM data and predict phosphorylation sites of interest.",2021-07-01 +34851988,The Lyme and Tickborne Disease Dashboard: A map-based resource to promote public health awareness and research collaboration.,"With the incidence of Lyme and other tickborne diseases on the rise in the US and globally, there is a critical need for data-driven tools that communicate the magnitude of this problem and help guide public health responses. We present the Johns Hopkins Lyme and Tickborne Disease Dashboard (https://www.hopkinslymetracker.org/), a new tool that harnesses the power of geography to raise awareness and fuel research and scientific collaboration. The dashboard is unique in applying a geographic lens to tickborne diseases, aiming not only to become a global tracker of tickborne diseases but also to contextualize their complicated geography with a comprehensive set of maps and spatial data sets representing a One Health approach. We share our experience designing and implementing the dashboard, describe the main features, and discuss current limitations and future directions.",2021-12-01 +30476305,LncACTdb 2.0: an updated database of experimentally supported ceRNA interactions curated from low- and high-throughput experiments.,"We describe LncACTdb 2.0 (http://www.bio-bigdata.net/LncACTdb/), an updated and significantly expanded database which provides comprehensive information of competing endogenous RNAs (ceRNAs) in different species and diseases. We have updated LncACTdb 2.0 with more data and several new features, including (i) manually curating 2663 experimentally supported ceRNA interactions from >5000 published literatures; (ii) expanding the scope of the database up to 23 species and 213 diseases/phenotypes; (iii) curating more ceRNA types such as circular RNAs and pseudogenes; (iv) identifying and scoring candidate lncRNA-associated ceRNA interactions across 33 cancer types from TCGA data; (v) providing illustration of survival, network and cancer hallmark information for ceRNAs. Furthermore, several flexible online tools including LncACT-Get, LncACT-Function, LncACT-Survival, LncACT-Network and LncACTBrowser have been developed to perform customized analysis, functional analysis, survival analysis, network illustration and genomic visualization. LncACTdb 2.0 also provides newly designed, user-friendly web interfaces to search, browse and download all the data. The BLAST interface is convenient for users to query dataset by inputting custom sequences. The Hot points interface provides users the most studied items by others. LncACTdb 2.0 is a continually updated database and will serve as an important resource to explore ceRNAs in physiological and pathological processes.",2019-01-01 +30407549,Lnc2Cancer v2.0: updated database of experimentally supported long non-coding RNAs in human cancers.,"Lnc2Cancer 2.0 (http://www.bio-bigdata.net/lnc2cancer) is an updated database that provides comprehensive experimentally supported associations between lncRNAs and human cancers. In Lnc2Cancer 2.0, we have updated the database with more data and several new features, including (i) exceeding a 4-fold increase over the previous version, recruiting 4989 lncRNA-cancer associations between 1614 lncRNAs and 165 cancer subtypes. (ii) newly adding about 800 experimentally supported circulating, drug-resistant and prognostic-related lncRNAs in various cancers. (iii) appending the regulatory mechanism of lncRNA in cancer, including microRNA (miRNA), transcription factor (TF), variant and methylation regulation. (iv) increasing more than 70 high-throughput experiments (microarray and next-generation sequencing) of lncRNAs in cancers. (v) Scoring the associations between lncRNA and cancer to evaluate the correlations. (vi) updating the annotation information of lncRNAs (version 28) and containing more detailed descriptions for lncRNAs and cancers. Moreover, a newly designed, user-friendly interface was also developed to provide a convenient platform for users. In particular, the functions of browsing data by cancer primary organ, biomarker type and regulatory mechanism, advanced search following several features and filtering the data by LncRNA-Cancer score were enhanced. Lnc2Cancer 2.0 will be a useful resource platform for further understanding the associations between lncRNA and human cancer.",2019-01-01 +33039623,The Developmental Chronnecto-Genomics (Dev-CoG) study: A multimodal study on the developing brain.,"Brain development has largely been studied through unimodal analysis of neuroimaging data, providing independent results for structural and functional data. However, structure clearly impacts function and vice versa, pointing to the need for performing multimodal data collection and analysis to improve our understanding of brain development, and to further inform models of typical and atypical brain development across the lifespan. Ultimately, such models should also incorporate genetic and epigenetic mechanisms underlying brain structure and function, although currently this area is poorly specified. To this end, we are reporting here a multi-site, multi-modal dataset that captures cognitive function, brain structure and function, and genetic and epigenetic measures to better quantify the factors that influence brain development in children originally aged 9-14 years. Data collection for the Developmental Chronnecto-Genomics (Dev-CoG) study (http://devcog.mrn.org/) includes cognitive, emotional, and social performance scales, structural and functional MRI, diffusion MRI, magnetoencephalography (MEG), and saliva collection for DNA analysis of single nucleotide polymorphisms (SNPs) and DNA methylation patterns. Across two sites (The Mind Research Network and the University of Nebraska Medical Center), data from over 200 participants were collected and these children were re-tested annually for at least 3 years. The data collection protocol, sample demographics, and data quality measures for the dataset are presented here. The sample will be made freely available through the collaborative informatics and neuroimaging suite (COINS) database at the conclusion of the study.",2020-10-08 +30407599,Mouse Genome Database (MGD) 2019.,"The Mouse Genome Database (MGD; http://www.informatics.jax.org) is the community model organism genetic and genome resource for the laboratory mouse. MGD is the authoritative source for biological reference data sets related to mouse genes, gene functions, phenotypes, and mouse models of human disease. MGD is the primary outlet for official gene, allele and mouse strain nomenclature based on the guidelines set by the International Committee on Standardized Nomenclature for Mice. In this report we describe significant enhancements to MGD, including two new graphical user interfaces: (i) the Multi Genome Viewer for exploring the genomes of multiple mouse strains and (ii) the Phenotype-Gene Expression matrix which was developed in collaboration with the Gene Expression Database (GXD) and allows researchers to compare gene expression and phenotype annotations for mouse genes. Other recent improvements include enhanced efficiency of our literature curation processes and the incorporation of Transcriptional Start Site (TSS) annotations from RIKEN's FANTOM 5 initiative.",2019-01-01 +31214208,The Plant Ontology Facilitates Comparisons of Plant Development Stages Across Species.,"The Plant Ontology (PO) is a community resource consisting of standardized terms, definitions, and logical relations describing plant structures and development stages, augmented by a large database of annotations from genomic and phenomic studies. This paper describes the structure of the ontology and the design principles we used in constructing PO terms for plant development stages. It also provides details of the methodology and rationale behind our revision and expansion of the PO to cover development stages for all plants, particularly the land plants (bryophytes through angiosperms). As a case study to illustrate the general approach, we examine variation in gene expression across embryo development stages in Arabidopsis and maize, demonstrating how the PO can be used to compare patterns of expression across stages and in developmentally different species. Although many genes appear to be active throughout embryo development, we identified a small set of uniquely expressed genes for each stage of embryo development and also between the two species. Evaluating the different sets of genes expressed during embryo development in Arabidopsis or maize may inform future studies of the divergent developmental pathways observed in monocotyledonous versus dicotyledonous species. The PO and its annotation database (http://www.planteome.org) make plant data for any species more discoverable and accessible through common formats, thus providing support for applications in plant pathology, image analysis, and comparative development and evolution.",2019-06-04 +30476227,The BioGRID interaction database: 2019 update.,"The Biological General Repository for Interaction Datasets (BioGRID: https://thebiogrid.org) is an open access database dedicated to the curation and archival storage of protein, genetic and chemical interactions for all major model organism species and humans. As of September 2018 (build 3.4.164), BioGRID contains records for 1 598 688 biological interactions manually annotated from 55 809 publications for 71 species, as classified by an updated set of controlled vocabularies for experimental detection methods. BioGRID also houses records for >700 000 post-translational modification sites. BioGRID now captures chemical interaction data, including chemical-protein interactions for human drug targets drawn from the DrugBank database and manually curated bioactive compounds reported in the literature. A new dedicated aspect of BioGRID annotates genome-wide CRISPR/Cas9-based screens that report gene-phenotype and gene-gene relationships. An extension of the BioGRID resource called the Open Repository for CRISPR Screens (ORCS) database (https://orcs.thebiogrid.org) currently contains over 500 genome-wide screens carried out in human or mouse cell lines. All data in BioGRID is made freely available without restriction, is directly downloadable in standard formats and can be readily incorporated into existing applications via our web service platforms. BioGRID data are also freely distributed through partner model organism databases and meta-databases.",2019-01-01 +35003395,Transcription Factors Leading to High Expression of Neuropeptide L1CAM in Brain Metastases from Lung Adenocarcinoma and Clinical Prognostic Analysis.,"

Background

There is a lack of understanding of the development of metastasis in lung adenocarcinoma (LUAD). This study is aimed at exploring the upstream regulatory transcription factors of L1 cell adhesion molecule (L1CAM) and to construct a prognostic model to predict the risk of brain metastasis in LUAD.

Methods

Differences in gene expression between LUAD and brain metastatic LUAD were analyzed using the Wilcoxon rank-sum test. The GRNdb (http://www.grndb.com) was used to reveal the upstream regulatory transcription factors of L1CAM in LUAD. Single-cell expression profile data (GSE131907) were obtained from the transcriptome data of 10 metastatic brain tissue samples. LUAD prognostic nomogram prediction models were constructed based on the identified significant transcription factors and L1CAM.

Results

Survival analysis suggested that high L1CAM expression was negatively significantly associated with overall survival, disease-specific survival, and prognosis in the progression-free interval (p < 0.05). The box plot indicates that high expression of L1CAM was associated with distant metastases in LUAD, while ROC curves suggested that high expression of L1CAM was associated with poor prognosis. FOSL2, HOXA9, IRF4, IKZF1, STAT1, FLI1, ETS1, E2F7, and ADARB1 are potential upstream transcriptional regulators of L1CAM. Single-cell data analysis revealed that the expression of L1CAM was found significantly and positively correlated with the expression of ETS1, FOSL2, and STAT1 in brain metastases. L1CAM, ETS1, FOSL2, and STAT1 were used to construct the LUAD prognostic nomogram prediction model, and the ROC curves suggest that the constructed nomogram possesses good predictive power.

Conclusion

By bioinformatics methods, ETS1, FOSL2, and STAT1 were identified as potential transcriptional regulators of L1CAM in this study. This will help to facilitate the early identification of patients at high risk of metastasis.",2021-12-30 +30575285,Discovering millions of plankton genomic markers from the Atlantic Ocean and the Mediterranean Sea.,"Comparison of the molecular diversity in all plankton populations present in geographically distant water columns may allow for a holistic view of the connectivity, isolation and adaptation of organisms in the marine environment. In this context, a large-scale detection and analysis of genomic variants directly in metagenomic data appeared as a powerful strategy for the identification of genetic structures and genes under natural selection in plankton. Here, we used discosnp++, a reference-free variant caller, to produce genetic variants from large-scale metagenomic data and assessed its accuracy on the copepod Oithona nana in terms of variant calling, allele frequency estimation and population genomic statistics by comparing it to the state-of-the-art method. discosnp ++ produces variants leading to similar conclusions regarding the genetic structure and identification of loci under natural selection. discosnp++ was then applied to 120 metagenomic samples from four size fractions, including prokaryotes, protists and zooplankton sampled from 39 tara Oceans sampling stations located in the Atlantic Ocean and the Mediterranean Sea to produce a new set of marine genomic markers containing more than 19 million of variants. This new genomic resource can be used by the community to relocate these markers on their plankton genomes or transcriptomes of interest. This resource will be updated with new marine expeditions and the increase of metagenomic data (availability: http://bioinformatique.rennes.inria.fr/taravariants/).",2019-03-01 +33692157,Performance at medical school selection correlates with success in Part A of the intercollegiate Membership of the Royal College of Surgeons (MRCS) examination.,"Medical schools in the UK typically use prior academic attainment and an admissions test (University Clinical Aptitude Test (UCAT), Biomedical Admissions Test (BMAT) or the Graduate Medical School Admissions Test (GAMSAT)) to help select applicants for interview. To justify their use, more information is needed about the predictive validity of these tests. Thus, we investigated the relationship between performance in admissions tests and the Membership of the Royal College of Surgeons (MRCS) examination.The UKMED database (https://www.ukmed.ac.uk) was used to access medical school selection data for all UK graduates who attempted MRCS Part A (n=11 570) and Part B (n=5690) between 2007 and 2019. Univariate and multivariate logistic regression models identified independent predictors of MRCS success. Pearson correlation coefficients examined the linear relationship between test scores and MRCS performance.Successful MRCS Part A candidates scored higher in A-Levels, UCAT, BMAT and GAMSAT (p<0.05). No significant differences were observed for MRCS Part B. All admissions tests were found to independently predict MRCS Part A performance after adjusting for prior academic attainment (A-Level performance) (p<0.05). Admission test scores demonstrated statistically significant correlations with MRCS Part A performance (p<0.001).The utility of admissions tests is clear with respect to helping medical schools select from large numbers of applicants for a limited number of places. Additionally, these tests appear to offer incremental value above A-Level performance alone. We expect this data to guide medical schools' use of admissions test scores in their selection process.",2021-03-10 +33105068,Pharmacogenetic profiling of dihydropyrimidine dehydrogenase (DPYD) variants in the Indian population.,"

Background

The present study aimed to delineate the pharmacologically relevant dihydropyrimidine dehydrogenase (DPYD) variants in the Indian population.

Methods

We screened 2000 Indian subjects for DPYD variants using the Infinium Global Screening Array (GSA) (Illumina Inc., San Diego, CA, USA).

Results

The GSA analysis identified seven coding, two intronic and three synonymous DPYD variants. Level 1A alleles (rs75017182, rs3918290, P633Qfs*5 and D949V) were found to be rare (minor allele frequency: 1.889%), whereas Level 3 alleles were observed to be predominant (C29R: 24.91%, I543V: 9.047%, M166V: 8.993% and V732I: 8.44%). In silico predictions revealed that all Level 1A alleles were deleterious, whereas three (M166V, S534N and V732I) of seven Level 3 alleles were damaging. CUPSAT analysis revealed that two Level 1A (P633Qfs*, D949V) and three Level 3 (I543V, V732I and S534N) variants were thermolabile. The pooled Indian data showed that V732I, S534N and rs3918290 variants were associated with 5-FU/capecitabine toxicity, whereas C29R, I543V and M166V variants exhibited the null association. A comparison of our data with other population data from the 'Allele Frequency Aggregator' (https://www.ncbi.nlm.nih.gov/snp/docs/gsr/alfa/) database showed similarities with the South Asian data.

Conclusions

We have identified four Level 1A (non-functional/dysfunctional) and seven Level 3 variants in the DPYD gene. The pooled Indian data revealed the association of V732I, S534N and rs3918290 variants with 5-FU/capecitabine toxicity. Clustering analysis revealed the similarities in the DPYD profiles of the Indian and South Asian populations.",2020-11-20 +34951624,BATL: Bayesian annotations for targeted lipidomics. ,"Bioinformatic tools capable of annotating, rapidly and reproducibly, large, targeted lipidomic datasets are limited. Specifically, few programs enable high-throughput peak assessment of liquid chromatography-electrospray ionization tandem mass spectrometry (LC-ESI-MS/MS) data acquired in either selected or multiple reaction monitoring (SRM and MRM) modes. We present here Bayesian Annotations for Targeted Lipidomics (BATL), a Gaussian naïve Bayes classifier for targeted lipidomics that annotates peak identities according to eight features related to retention time, intensity, and peak shape. Lipid identification is achieved by modelling distributions of these eight input features across biological conditions and maximizing the joint posterior probabilities of all peak identities at a given transition. When applied to sphingolipid and glycerophosphocholine SRM datasets, we demonstrate over 95% of all peaks are rapidly and correctly identified. BATL software is freely accessible online at https://complimet.ca/batl/ and is compatible with Safari, Firefox, Chrome and Edge. Supplementary data are available at Bioinformatics online.",2021-12-24 +30738202,MtBrowse: An integrative genomics browser for human mitochondrial DNA.,"The human mitochondrion is a unique semi-autonomous organelle with a genome of its own and also requires nuclear encoded components to carry out its functions. In addition to being the powerhouse of the cell, mitochondria plays a central role in several metabolic pathways. It is therefore challenging to delineate the cause-effect relationship in context of mitochondrial dysfunction. Several studies implicate mutations in mitochondrial DNA (mtDNA) in various complex diseases. The human mitochondrial DNA (mtDNA) encodes a set of 37 genes, 13 protein coding, 22 tRNAs and two ribosomal RNAs, which are essential structural and functional components of the electron transport chain. As mentioned above, variations in these genes have been implicated in a broad spectrum of diseases and are extensively reported in literature and various databases. A large number of databases and prediction methods have been published to elucidate the role of human mitochondrial DNA in various disease phenotypes. However, there is no centralized resource to visualize this genotype-phenotype data. Towards this, we have developed MtBrowse: an integrative genomics browser for human mtDNA. As of now, MtBrowse has four categories - Gene, Disease, Reported variation and Variation prediction. These categories have 105 tracks and house data on mitochondrial reference genes, around 600 variants reported in literature with respect to various disease phenotypes and predictions for potential pathogenic variations in protein-coding genes. MtBrowse also hosts genomic variation data from over 5000 individuals on 22 disease phenotypes. MtBrowse may be accessed at http://ab-openlab.csir.res.in/cgi-bin/gb2/gbrowse.",2019-02-07 +31101000,Bioinformatics Resource Manager: a systems biology web tool for microRNA and omics data integration.,"

Background

The Bioinformatics Resource Manager (BRM) is a web-based tool developed to facilitate identifier conversion and data integration for Homo sapiens (human), Mus musculus (mouse), Rattus norvegicus (rat), Danio rerio (zebrafish), and Macaca mulatta (macaque), as well as perform orthologous conversions among the supported species. In addition to providing a robust means of identifier conversion, BRM also incorporates a suite of microRNA (miRNA)-target databases upon which to query target genes or to perform reverse target lookups using gene identifiers.

Results

BRM has the capability to perform cross-species identifier lookups across common identifier types, directly integrate datasets across platform or species by performing identifier retrievals in the background, and retrieve miRNA targets from multiple databases simultaneously and integrate the resulting gene targets with experimental mRNA data. Here we use workflows provided in BRM to integrate RNA sequencing data across species to identify common biomarkers of exposure after treatment of human lung cells and zebrafish to benzo[a]pyrene (BAP). We further use the miRNA Target workflow to experimentally determine the role of miRNAs as regulators of BAP toxicity and identify the predicted functional consequences of miRNA-target regulation in our system. The output from BRM can easily and directly be uploaded to freely available visualization tools for further analysis. From these examples, we were able to identify an important role for several miRNAs as potential regulators of BAP toxicity in human lung cells associated with cell migration, cell communication, cell junction assembly and regulation of cell death.

Conclusions

Overall, BRM provides bioinformatics tools to assist biologists having minimal programming skills with analysis and integration of high-content omics' data from various transcriptomic and proteomic platforms. BRM workflows were developed in Java and other open-source technologies and are served publicly using Apache Tomcat at https://cbb.pnnl.gov/brm/ .",2019-05-17 +34993287,A multi-purpose dataset of Devanagari script comprising of isolated numerals and vowels.,"This article presents handwritten isolated characters of the Devanagari script. Devanagari script contains ten numerals, 13 vowels, and 33 consonants. Devanagari Character dataset includes 23 different characters of numerals and vowels. 2400 handwritten samples are collected for each of the numerals and 1400 for each vowel. Collected samples are digitized and pre-processed. During pre-processing, images with noise are removed. In this context, a final dataset of 38,750 images were included, where 2,250 and 1,250 samples for each numeral and vowel, respectively. The data is available in images and comma-separated-values, along with attached labels. The dataset could be used for Optical Character Recognition research and deep learning. In India, the Devanagari script is the base script on which 120+ languages are evolved; hence this dataset serves as the base for Machine Learning research in these languages. The data set is publicly available at https://data.mendeley.com/datasets/pxrnvp4yy8/2.",2021-12-16 +33546314,Genome Wide Analysis of Amino Acid Transporter Superfamily in Solanum lycopersicum. ,"Amino acid transporters (AATs) are integral membrane proteins and have several functions, including transporting amino acids across cellular membranes. They are critical for plant growth and development. This study comprehensively identified AAT-encoding genes in tomato (Solanum lycopersicum), which is an important vegetable crop and serves as a model for fleshy fruit development. In this study, 88 genes were identified in the S. lycopersicum genome and grouped into 12 subfamilies, based on previously identified AATs in Arabidopsis, rice (Oryza sativa), and potato (Solanum tuberosum) plants. Chromosomal localization revealed that S. lycopersicum AAT (SlAAT) genes are distributed on the 12 S. lycopersicum chromosomes. Segmental duplication events contribute mainly to the expansion of SlAAT genes and about 32% (29 genes) of SlAAT genes were found to originate from this type of event. Expression profiles of SlAAT genes in various tissues of S. lycopersicum using RNA sequencing data from the Tomato Functional Genomics Database (http://ted.bti.cornell.edu/) showed that SlAAT genes exhibited tissue-specific expression patterns. Comprehensive data generated in this study will provide a platform for further studies on the SlAAT gene family and will facilitate the functional characterization of SlAAT genes.",2021-02-03 +29069473,Genome Variation Map: a data repository of genome variations in BIG Data Center.,"The Genome Variation Map (GVM; http://bigd.big.ac.cn/gvm/) is a public data repository of genome variations. As a core resource in the BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, GVM dedicates to collect, integrate and visualize genome variations for a wide range of species, accepts submissions of different types of genome variations from all over the world and provides free open access to all publicly available data in support of worldwide research activities. Unlike existing related databases, GVM features integration of a large number of genome variations for a broad diversity of species including human, cultivated plants and domesticated animals. Specifically, the current implementation of GVM not only houses a total of ∼4.9 billion variants for 19 species including chicken, dog, goat, human, poplar, rice and tomato, but also incorporates 8669 individual genotypes and 13 262 manually curated high-quality genotype-to-phenotype associations for non-human species. In addition, GVM provides friendly intuitive web interfaces for data submission, browse, search and visualization. Collectively, GVM serves as an important resource for archiving genomic variation data, helpful for better understanding population genetic diversity and deciphering complex mechanisms associated with different phenotypes.",2018-01-01 +33942873,"CANNUSE, a database of traditional Cannabis uses-an opportunity for new research. ","Cannabis is one of the most versatile genera in terms of plant uses and has been exploited by humans for millennia due to its medicinal properties, strong fibres, nutritious seeds and psychoactive resin. Nowadays, Cannabis is the centre of many scientific studies, which mainly focus on its chemical composition and medicinal properties. Unfortunately, while new applications of this plant are continuously being developed, some of its traditional uses are becoming rare and even disappearing altogether. Information on traditional uses of Cannabis is vast, but it is scattered across many publication sources in different formats, so synthesis and standardization of these data are increasingly important. The CANNUSE database provides an organized information source for scientists and general public interested in different aspects of Cannabis use. It contains over 2300 entries from 649 publications related to medicinal, alimentary, fibre and other uses from different geographical areas and cultures around the world. We believe this database will serve as a starting point for new research and development strategies based on the traditional knowledge. Database URL: http://cannusedb.csic.es.",2021-05-01 +33452079,Advancing the Patient EXperience (APEX) in COPD Registry: Study Design and Strengths.,"The Advancing the Patient Experience (APEX) in Chronic Obstructive Pulmonary Disease (COPD) registry (https://www.apexcopd.org/) is the first primary care health system-based COPD registry in the United States. While its ultimate goal is to improve the care of patients diagnosed with COPD, the registry is also designed to describe real-life experiences of people with COPD, track key outcomes longitudinally, and assess the effectiveness of interventions. It will retrospectively and prospectively collect information from 3000 patients enrolled in 5 health care organizations. Information will be obtained from electronic health records, and from extended annual and brief questionnaires completed by patients before clinic visits. Core variables to be collected into the APEX COPD registry were agreed on by Delphi consensus and fall into 3 domains: demographics, COPD monitoring, and treatment. Main strengths of the registry include: 1) its size and scope (in terms of patient numbers, geographic spread and use of multiple information sources including patient-reported information); 2) collection of variables which are clinically relevant and practical to collect within primary care; 3) use of electronic data capture systems to ensure high-quality data and minimization of data-entry requirements; 4) inclusion of clinical, database development, management and communication experts; 5) regular sharing of key findings, both at international/national congresses and in peer-reviewed publications; and 6) a robust organizational structure to ensure continuance of the registry, and that research outputs are ethical, relevant and continue to bring value to both patients and physicians.",2021-01-01 +33125076,ATACdb: a comprehensive human chromatin accessibility database.,"Accessible chromatin is a highly informative structural feature for identifying regulatory elements, which provides a large amount of information about transcriptional activity and gene regulatory mechanisms. Human ATAC-seq datasets are accumulating rapidly, prompting an urgent need to comprehensively collect and effectively process these data. We developed a comprehensive human chromatin accessibility database (ATACdb, http://www.licpathway.net/ATACdb), with the aim of providing a large amount of publicly available resources on human chromatin accessibility data, and to annotate and illustrate potential roles in a tissue/cell type-specific manner. The current version of ATACdb documented a total of 52 078 883 regions from over 1400 ATAC-seq samples. These samples have been manually curated from over 2200 chromatin accessibility samples from NCBI GEO/SRA. To make these datasets more accessible to the research community, ATACdb provides a quality assurance process including four quality control (QC) metrics. ATACdb provides detailed (epi)genetic annotations in chromatin accessibility regions, including super-enhancers, typical enhancers, transcription factors (TFs), common single-nucleotide polymorphisms (SNPs), risk SNPs, eQTLs, LD SNPs, methylations, chromatin interactions and TADs. Especially, ATACdb provides accurate inference of TF footprints within chromatin accessibility regions. ATACdb is a powerful platform that provides the most comprehensive accessible chromatin data, QC, TF footprint and various other annotations.",2021-01-01 +33231677,GTRD: an integrated view of transcription regulation.,"The Gene Transcription Regulation Database (GTRD; http://gtrd.biouml.org/) contains uniformly annotated and processed NGS data related to gene transcription regulation: ChIP-seq, ChIP-exo, DNase-seq, MNase-seq, ATAC-seq and RNA-seq. With the latest release, the database has reached a new level of data integration. All cell types (cell lines and tissues) presented in the GTRD were arranged into a dictionary and linked with different ontologies (BRENDA, Cell Ontology, Uberon, Cellosaurus and Experimental Factor Ontology) and with related experiments in specialized databases on transcription regulation (FANTOM5, ENCODE and GTEx). The updated version of the GTRD provides an integrated view of transcription regulation through a dedicated web interface with advanced browsing and search capabilities, an integrated genome browser, and table reports by cell types, transcription factors, and genes of interest.",2021-01-01 +34571013,Vesicle Viewer: Online visualization and analysis of small-angle scattering from lipid vesicles.,"Small-angle X-ray and neutron scattering are among the most powerful experimental techniques for investigating the structure of biological membranes. Much of the critical information contained in small-angle scattering (SAS) data is not easily accessible to researchers who have limited time to analyze results by hand or to nonexperts who may lack the necessary scientific background to process such data. Easy-to-use data visualization software can allow them to take full advantage of their SAS data and maximize the use of limited resources. To this end, we developed an internet-based application called Vesicle Viewer to visualize and analyze SAS data from unilamellar lipid bilayer vesicles. Vesicle Viewer utilizes a modified scattering density profile (SDP) analysis called EZ-SDP in which key bilayer structural parameters, such as area per lipid and bilayer thickness, are easily and robustly determined. Notably, we introduce a bilayer model that is able to describe an asymmetric bilayer, whether it be chemically or isotopically asymmetric. The application primarily uses Django, a Python package specialized for the development of robust web applications. In addition, several other libraries are used to support the more technical aspects of the project; notable examples are Matplotlib (for graphs) and NumPy (for calculations). By eliminating the barrier of downloading and installing software, this web-based application will allow scientists to analyze their own vesicle scattering data using their preferred operating system. The web-based application can be found at https://vesicleviewer.dmarquardt.ca/.",2021-09-24 +30203047,Pancan-meQTL: a database to systematically evaluate the effects of genetic variants on methylation in human cancer.,"DNA methylation is an important epigenetic mechanism for regulating gene expression. Aberrant DNA methylation has been observed in various human diseases, including cancer. Single-nucleotide polymorphisms can contribute to tumor initiation, progression and prognosis by influencing DNA methylation, and DNA methylation quantitative trait loci (meQTL) have been identified in physiological and pathological contexts. However, no database has been developed to systematically analyze meQTLs across multiple cancer types. Here, we present Pancan-meQTL, a database to comprehensively provide meQTLs across 23 cancer types from The Cancer Genome Atlas by integrating genome-wide genotype and DNA methylation data. In total, we identified 8 028 964 cis-meQTLs and 965 050 trans-meQTLs. Among these, 23 432 meQTLs are associated with patient overall survival times. Furthermore, we identified 2 214 458 meQTLs that overlap with known loci identified through genome-wide association studies. Pancan-meQTL provides a user-friendly web interface (http://bioinfo.life.hust.edu.cn/Pancan-meQTL/) that is convenient for browsing, searching and downloading data of interest. This database is a valuable resource for investigating the roles of genetics and epigenetics in cancer.",2019-01-01 +34223780,The Performance of Digital Monitoring Devices for Oxygen Saturation and Respiratory Rate in COPD: A Systematic Review.,"Healthcare access and delivery for individuals with chronic obstructive pulmonary disease (COPD) who live in remote areas or who are susceptible to contracting communicable diseases, such as COVID-19, may be a challenge. Telehealth and remote monitoring devices can be used to overcome this issue. However, the accuracy of these devices must be ensured before forming healthcare decisions based on their outcomes. Therefore, a systematic review was performed to synthesize the evidence on the reliability, validity and responsiveness of digital devices used for tracking oxygen saturation (SpO2) and/or respiratory rate (RR) in individuals with COPD, in remote settings. Three electronic databases were searched: MEDLINE (1996 to October 8, 2020), EMBASE (1996 to October 8, 2020) and CINAHL (1998 to October 8, 2020). Studies were included if they aimed to evaluate one or more measurement properties of a digital device measuring SpO2 or RR in individuals with COPD. Six-hundred and twenty-five articles were identified and after screening, 7 studies matched the inclusion criteria; covering 11 devices measuring SpO2 and/or RR. Studies reported on the reliability (n = 1), convergent validity (n = 1), concurrent validity (n = 2) and predictive validity (n = 2) of SpO2 devices and on the convergent validity (n = 1), concurrent validity (n = 1) and predictive validity (n = 1) of RR devices. SpO2 and RR devices were valid when compared against other respiration monitoring devices but were not precise in predicting exacerbation events. More well-designed measurement studies are needed to make firm conclusions about the accuracy of such devices.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.1945021 .",2021-07-05 +30963485,miRandb: A Metadatabase of Online Resources of miRNA and miRNA Targets.,"MicroRNA (miRNA) studies deliver numerous types of information including miRNA identification, sequence of miRNAs, target prediction, roles in diseases, and interactions in signaling pathways. Considering the different types of miRNA data, the number of miRNA databases has been increasing quickly. While resources have been planned to simplify miRNA analysis, scientists are facing the challenging task of choosing the most proper tool to retrieve related information. In this chapter, we introduce the use of miRandb, a resource that we have established to present an outline of different types of miRNA online resources and to simplify finding the right miRNA information that scientists need for their research. miRandb offers a user-friendly platform to find related information about any miRNA data among more than 188 present miRNA databases. miRandb has an easy procedure, and information can be retrieved by miRNA category resources. Each database comprises numerous kinds of information including database activity, description, main and unique features, organism, URL, publication, category, published year, citations per year, last update, and relative popularity. miRandb provides several opportunities and facilitates access to diverse classes of microRNA resources. miRandb is available at http://miRandb.ir .",2019-01-01 +30371815,TransmiR v2.0: an updated transcription factor-microRNA regulation database.,"MicroRNAs (miRNAs) are important post-transcriptional regulators of gene expression and play vital roles in various biological processes. It has been reported that aberrant regulation of miRNAs was associated with the development and progression of various diseases, but the underlying mechanisms are not fully deciphered. Here, we described our updated TransmiR v2.0 database for more comprehensive information about transcription factor (TF)-miRNA regulations. 3730 TF-miRNA regulations among 19 species from 1349 reports were manually curated by surveying >8000 publications, and more than 1.7 million tissue-specific TF-miRNA regulations were further incorporated based on ChIP-seq data. Besides, we constructed a 'Predict' module to query the predicted TF-miRNA regulations in human based on binding motifs of TFs. To facilitate the community, we provided a 'Network' module to visualize TF-miRNA regulations for each TF and miRNA, or for a specific disease. An 'Enrichment analysis' module was also included to predict TFs that are likely to regulate a miRNA list of interest. In conclusion, with improved data coverage and webserver functionalities, TransmiR v2.0 would be a useful resource for investigating the regulation of miRNAs. TransmiR v2.0 is freely accessible at http://www.cuilab.cn/transmir.",2019-01-01 +34604741,Statistical Enrichment Analysis of Samples: A General-Purpose Tool to Annotate Metadata Neighborhoods of Biological Samples.,"Unsupervised learning techniques, such as clustering and embedding, have been increasingly popular to cluster biomedical samples from high-dimensional biomedical data. Extracting clinical data or sample meta-data shared in common among biomedical samples of a given biological condition remains a major challenge. Here, we describe a powerful analytical method called Statistical Enrichment Analysis of Samples (SEAS) for interpreting clustered or embedded sample data from omics studies. The method derives its power by focusing on sample sets, i.e., groups of biological samples that were constructed for various purposes, e.g., manual curation of samples sharing specific characteristics or automated clusters generated by embedding sample omic profiles from multi-dimensional omics space. The samples in the sample set share common clinical measurements, which we refer to as ""clinotypes,"" such as age group, gender, treatment status, or survival days. We demonstrate how SEAS yields insights into biological data sets using glioblastoma (GBM) samples. Notably, when analyzing the combined The Cancer Genome Atlas (TCGA)-patient-derived xenograft (PDX) data, SEAS allows approximating the different clinical outcomes of radiotherapy-treated PDX samples, which has not been solved by other tools. The result shows that SEAS may support the clinical decision. The SEAS tool is publicly available as a freely available software package at https://aimed-lab.shinyapps.io/SEAS/.",2021-09-16 +34499534,Sexual Orientation and Gender Identity Data Collection at US Health Centers: Impact of City-Level Structural Stigma in 2018.,"Objectives. To examine the relationship between city-level structural stigma pertaining to sexual orientation and gender identity (SOGI) and completeness of patient SOGI data collection at US federally qualified health centers (FQHCs). Methods. We used the Human Rights Campaign's Municipal Equality Index to quantify city-level structural stigma against sexual and gender minority people in 506 US cities across 49 states. We ascertained the completeness of SOGI data collection at FQHCs from the 2018 Uniform Data System, which describes FQHC patient demographics and service utilization. We included FQHCs in cities captured by the structural stigma index in multinomial generalized linear mixed models to examine the relationship between city-level structural stigma and SOGI data completeness. Results. FQHCs in cities with more protective sexual orientation nondiscrimination policies reported more complete patient sexual orientation data (adjusted odds ratio [AOR] = 1.6; 95% confidence interval [CI] = 1.2, 2.1). This association was also found for gender identity nondiscrimination policies and gender identity data collection (AOR = 1.7; 95% CI = 1.3, 2.2). Conclusions. Municipal sexual and gender minority nondiscrimination laws are associated with social and municipal environments that facilitate patient SOGI data collection.(Am J Public Health. 2021;111(11):2059-2063. https://doi.org/10.2105/AJPH.2021.306414).",2021-09-09 +34134783,An Axin2 mutation and perinatal risk factors contribute to sagittal craniosynostosis: evidence from a Chinese female monochorionic diamniotic twin family.,"

Background

Craniosynostosis, defined as premature fusion of one or more cranial sutures, affects approximately 1 in every 2000-2500 live births. Sagittal craniosynostosis (CS), the most prevalent form of isolated craniosynostosis, is caused by interplay between genetic and perinatal environmental insults. However, the underlying details remain largely unknown.

Methods

The proband (a female monochorionic twin diagnosed with CS), her healthy co-twin sister and parents were enrolled. Obstetric history was extracted from medical records. Genetic screening was performed by whole exome sequencing (WES) and confirmed by Sanger sequencing. Functional annotation, conservation and structural analysis were predicted in public database. Phenotype data of Axin2 knockout mice was downloaded from The International Mouse Phenotyping Consortium (IMPC, http://www.mousephenotype.org ).

Results

Obstetric medical records showed that, except for the shared perinatal risk factors by the twins, the proband suffered additional persistent breech presentation and intrauterine growth restriction. We identified a heterozygous mutation of Axin2 (c.1181G > A, p.R394H, rs200899695) in monochorionic twins and their father, but not in the mother. This mutation is not reported in Asian population and results in replacement of Arg at residue 394 by His (p.R394H). Arg 394 is located at the GSK3β binding domain of Axin2 protein, which is highly conserved across species. The mutation was predicted to be potentially deleterious by in silico analysis. Incomplete penetrance of Axin2 haploinsufficiency was found in female mice.

Conclusions

Axin2 (c.1181G > A, p.R394H, rs200899695) mutation confers susceptibility and perinatal risk factors trigger the occurrence of sagittal craniosynostosis. Our findings provide a new evidence for the gene-environment interplay in understanding pathogenesis of craniosynostosis in Chinese population.",2021-06-16 +33724838,MaCPepDB: A Database to Quickly Access All Tryptic Peptides of the UniProtKB.,"Protein sequence databases play a crucial role in the majority of the currently applied mass-spectrometry-based proteomics workflows. Here UniProtKB serves as one of the major sources, as it combines the information of several smaller databases and enriches the entries with additional biological information. For the identification of peptides in a sample by tandem mass spectra, as generated by data-dependent acquisition, protein sequence databases provide the basis for most spectrum identification search engines. In addition, for targeted proteomics approaches like selected reaction monitoring (SRM) and parallel reaction monitoring (PRM), knowledge of the peptide sequences, their masses, and whether they are unique for a protein is essential. Because most bottom-up proteomics approaches use trypsin to cleave the proteins in a sample, the tryptic peptides contained in a protein database are of great interest. We present a database, called MaCPepDB (mass-centric peptide database), that consists of the complete tryptic digest of the Swiss-Prot and TrEMBL parts of UniProtKB. This database is especially designed to not only allow queries of peptide sequences and return the respective information about connected proteins and thus whether a peptide is unique but also allow queries of specific masses of peptides or precursors of MS/MS spectra. Furthermore, posttranslational modifications can be considered in a query as well as different mass deviations for posttranslational modifications. Hence the database can be used by a sequence query not only to, for example, check in which proteins of the UniProt database a tryptic peptide can be found but also to find possibly interfering peptides in PRM/SRM experiments using the mass query. The complete database contains currently 5 939 244 990 peptides from 185 561 610 proteins (UniProt version 2020_03), for which a single query usually takes less than 1 s. For easy exploration of the data, a web interface was developed. A REST application programming interface (API) for programmatic and workflow access is also available at https://macpepdb.mpc.rub.de.",2021-03-16 +33378192,"Database Independent Automated Structure Elucidation of Organic Molecules Based on IR, 1H NMR, 13C NMR, and MS Data.","Herein, we report a computational algorithm that follows a spectroscopist-driven elucidation process of the structure of an organic molecule based on IR, 1H and 13C NMR, and MS tabular data. The algorithm is independent from database searching and is based on a bottom-up approach, building the molecular structure from small structural fragments visible in spectra. It employs an analytical combinatorial approach with a graph search technique to determine the connectivity of structural fragments that is based on the analysis of the NMR spectra, to connect the identified structural fragments into a molecular structure. After the process is completed, the interface lists the compound candidates, which are visualized by the WolframAlpha computational knowledge engine within the interface. The candidates are ranked according to the predefined rules for analyzing the spectral data. The developed elucidator has a user-friendly web interface and is publicly available (http://schmarnica.si).",2020-12-30 +31360359,Altmetric Analysis of Contemporary Iranian Medical Journals.,"

Background

Altmetrics is a newly emerging scholarly tool measuring online attention surrounding scientific research outputs. With respect to increasing demand of disseminating research findings on the World Wide Web, this study aims to analyze the altmetric statues of Iranian medical journals.

Methods

On February 27, 2019, the list of Iranian medical journals extracted from http://journals.research.ac.ir/ and consequently altmetric data token out from Altmetric database (Altmetric LLP, London, UK). The science mapping done via keyword co-occurrence, co-citation and co-authorship, network analysis using the VOSviewer. The Pearson coefficient was then employed for the correlation analysis using R.

Results

Among a total of 104 journals, 7518 articles were mentioned in Altmetric data resources (Mean: 72.28, Confidence Level (95.0%): 16.8), total mentions were 27577 (Mean: 265.16, Confidence Level (95.0%): 79.9). Considering the total mentions of articles, International Journal of Preventive Medicine achieved the first rank, followed by Journal of Research in Medical Sciences and Iranian Journal of Public Health. Notably, Twitter was the most popular altmetric resource followed by Facebook and news outlets. Tweets were generally from the United States and United Kingdom. Among top 5% popular Iranian medical articles multiple sclerosis, cancer, and anxiety was hot topics.

Conclusions

Iranian biomedical journal editors and research scientists needs to be more dynamic in World Wide Web using social media, post-publication peer review tools, Stack Exchange (Q and A) sites, research highlight tools, Wikipedia, and etc. In spite, more attention to the concept of evidence-based policymaking, by Iranian government along with the health policymakers seems necessary.",2019-06-12 +34876872,"Long-term dynamics of the abundance of earthworms and enchytraeids (Annelida, Clitellata: Lumbricidae, Enchytraeidae) in forests of the Central Urals, Russia.","

Background

Since the late 1980s, long-term monitoring of terrestrial ecosystems in metal-contaminated areas has been carried out in the Central Urals. As a part of these monitoring programmes, the data on soil macroinvertebrates in undisturbed areas as reference sites continues to be gathered. These data help study the local biodiversity and long-term dynamics of soil macroinvertebrate abundance in non-polluted areas.

New information

The dataset (available from the GBIF network at https://www.gbif.org/dataset/bf5bc7f6-71a3-4abd-8abc-861ee3cbf84a) includes information from a long-term monitoring programme for two taxa of Annelids, Lumbricidae and Enchytraeidae, which dwell in the topsoil of spruce-fir, birch, pine and floodplain forests in the Central Urals. The dataset includes information on the earthworm community structure (list of species, species abundance, number of egg cocoons, cocoon exuvia, juveniles and adults) and enchytraeid abundance. The dataset consists of 553 sampling events (= samples, corresponding to upper and lower layers of the soil monoliths) and 12739 occurrences (earthworms, mainly identified to species and earthworm cocoons and enchytraeids, identified to family) collected during 1990-1991, 2004, 2014-2016 and 2018-2020. In total, 3305 individuals of earthworms were collected, representing ten (out of twelve) species and all eight genera recorded for the fauna of the Central Urals. In addition, 7292 earthworm egg cocoons and cocoon exuvia and 6926 individuals of enchytraeids were accumulated. The presence-absence data on each of the ten earthworm species, egg cocoons, cocoon exuvia and enchytraeids are provided for each sampling event. All data were collected in undisturbed non-polluted areas and are used as a local reference for ecotoxicological monitoring. The dataset provides valuable information for estimating the composition and abundance of earthworm communities in different habitats over a long time and contributes to the study of soil fauna biodiversity in the Urals.",2021-11-26 +34709872,Supporting Health Equity Through Data-Driven Decision-Making: A Local Health Department Response to COVID-19.,"COVID-19 highlights preexisting inequities that affect health outcomes and access to care for Black and Brown Americans. The Marion County Public Health Department in Indiana sought to address inequities in COVID-19 testing by using surveillance data to place community testing sites in areas with the highest incidence of disease. Testing site demographic data indicated that targeted testing reached populations with the highest disease burden, suggesting that local health departments can effectively use surveillance data as a tool to address inequities. (Am J Public Health. 2021;111(S3):S197-S200. https://doi.org/10.2105/AJPH.2021.306421).",2021-10-01 +35011068,"Nutrition, Diet and Healthy Aging. ","The current increase in life expectancy is confirmed by data from different sources (i.e.,The World Population Prospects 2019 issued by the United Nations; https://population.un.org/wpp/ (accessed on 20 December 2021)), which predict that, in the near future, individ-uals who are over 65 and over 80 will be the fastest-growing portion of the population [...].",2021-12-31 +31130983,Predicting Ion Channels Genes and Their Types With Machine Learning Techniques.,"Motivation: The number of ion channels is increasing rapidly. As many of them are associated with diseases, they are the targets of more than 700 drugs. The discovery of new ion channels is facilitated by computational methods that predict ion channels and their types from protein sequences. Methods: We used the SVMProt and the k-skip-n-gram methods to extract the feature vectors of ion channels, and obtained 188- and 400-dimensional features, respectively. The 188- and 400-dimensional features were combined to obtain 588-dimensional features. We then employed the maximum-relevance-maximum-distance method to reduce the dimensions of the 588-dimensional features. Finally, the support vector machine and random forest methods were used to build the prediction models to evaluate the classification effect. Results: Different methods were employed to extract various feature vectors, and after effective dimensionality reduction, different classifiers were used to classify the ion channels. We extracted the ion channel data from the Universal Protein Resource (UniProt, http://www.uniprot.org/) and Ligand-Gated Ion Channel databases (http://www.ebi.ac.uk/compneur-srv/LGICdb/LGICdb.php), and then verified the performance of the classifiers after screening. The findings of this study could inform the research and development of drugs.",2019-05-03 +34390311,Machine learning-based model for predicting 1 year mortality of hospitalized patients with heart failure.,"

Aims

Individual risk stratification is a fundamental strategy in managing patients with heart failure (HF). Artificial intelligence, particularly machine learning (ML), can develop superior models for predicting the prognosis of HF patients, and administrative claim data (ACD) are suitable for ML analysis because ACD is a structured database. The objective of this study was to analyse ACD using an ML algorithm, predict the 1 year mortality of patients with HF, and finally develop an easy-to-use prediction model with high accuracy using the top predictors identified by the ML algorithm.

Methods and results

Machine learning-based prognostic prediction models were developed from the ACD on 10 175 HF patients from the Japanese Registry of Acute Decompensated Heart Failure with 17% mortality during 1 year follow-up. The top predictors for prognosis in HF were identified by the permutation feature importance technique, and an easy-to-use prediction model was developed based on these predictors. The c-statistics and Brier scores of the developed ML-based models were compared with those of conventional risk models: Seattle Heart Failure Model (SHFM) and Meta-Analysis Global Group in Chronic Heart Failure (MAGGIC). A voting classifier algorithm (ACD-VC) achieved the highest c-statistics among the six ML algorithms. The permutation feature importance technique enabled identification of the top predictors such as Barthel index, age, body mass index, duration of hospitalization, last hospitalization, renal disease, and non-loop diuretics use (feature importance values were 0.054, 0.025, 0.010, 0.005, 0.005, 0.004, and 0.004, respectively). Upon combination of some of the predictors that can be assessed from a brief interview, the Simple Model by ARTificial intelligence for HF risk stratification (SMART-HF) was established as an easy-to-use prediction model. Compared with the conventional models, SMART-HF achieved a higher c-statistic {ACD-VC: 0.777 [95% confidence interval (CI) 0.751-0.803], SMART-HF: 0.765 [95% CI 0.739-0.791], SHFM: 0.713 [95% CI 0.684-0.742], MAGGIC: 0.726 [95% CI 0.698-0.753]} and better Brier scores (ACD-VC: 0.121, SMART-HF: 0.124, SHFM: 0.139, MAGGIC: 0.130).

Conclusions

The ML model based on ACD predicted the 1 year mortality of HF patients with high accuracy, and SMART-HF along with the ML model achieved superior performance to that of the conventional risk models. The SMART-HF model has the clear merit of easy operability even by non-healthcare providers with a user-friendly online interface (https://hfriskcalculator.herokuapp.com/). Risk models developed using SMART-HF may provide a novel modality for risk stratification of patients with HF.",2021-08-13 +34553975,Correction to Burghardt (2021).,"Reports an error in ""How comparative was (is) the Journal of Comparative Psychology? A reptilian perspective"" by Gordon M. Burghardt (Journal of Comparative Psychology, Advanced Online Publication, Aug 05, 2021, np). In the article ""How Comparative Was (Is) the Journal of Comparative Psychology? A Reptilian Perspective"" by Gordon M. Burghardt (Journal of Comparative Psychology. Advance online publication. August 5, 2021. http://doi.org/10.1037/com0000290), the phrase in the introduction that includes the Dewbury (1998) citation also includes an extra word. The phrase should appear as Dewbury (1998) noted that the focus. The year of publication for the Journal of Animal Behavior that appears in the third line of the Method section should appear as (1911-1917). The last sentence in the first paragraph of the Method section should appear as The 8,911 entries over this 110-year period constituted the data analyzed here in detail. The first sentence of the Results section should appear as The Journal of Animal Behavior published 238 articles in its 7-year run. The last phrase of the first paragraph of the Results section should appear as and 8,635 published items of the JCP and JCPP.... (The following abstract of the original article appeared in record 2021-71123-001.) Comparative psychology, and particularly the Journal of Comparative Psychology, has been criticized for a lack of taxon diversity. The nature and consequences of the critiques are discussed and assessed by analyzing the representation of nonavian reptiles in the journal over its 100-year existence. Although reptiles are indeed rare in the journal, their representation has greatly increased in recent decades, and especially since about 1980. More interestingly, the mix among the major reptilian groups: turtles, lizards, snakes, and crocodylians, has shifted. First turtles predominated in studies, but in recent decades, snakes were far more prominent. In the last 50 years of the journal, there were 10 times the number of articles on snakes than in the first 50 years, turtles declined, and lizards increased greatly, although their totals remained less than half the number of snake articles. Crocodylians only appeared in the first several volumes in the 1920s and never again. The predominance of snakes, not known for their cognitive prowess, in a journal viewed increasingly as an outlet for work on comparative cognition, is discussed. Finally, it appears that the low representation of reptile behavioral research is not peculiar to the Journal of Comparative Psychology, but animal behavior journals more generally. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-08-01 +32783952,DStabilize: A Web Resource to Generate Mirror Images of Biomolecules.,"Peptides comprising D-amino acids have been shown to be resistant to proteolysis. This makes them potential candidates as probes of cellular interactions, notably protein-biomolecule interactions. However, the empirical conversion of the amino acids that constitute a peptide from L-forms to D-forms will result in abrogation of the normal interactions made by the L-amino acids due to side-chain orientation changes that are associated with the changes in chirality. These interactions can be preserved by reversing the sequence of the D-peptide. We present a web server (http://dstabilize.bii.a-star.edu.sg/) that allows users to convert between L-proteins and D-proteins and for sequence reversal of D-peptides, along with the capability of performing other empirical geometric transforms. This resource allows the user to generate structures of interest easily for subsequent in silico processing.",2020-08-11 +33391232,Creation of an Online Platform for Identification of Microorganisms: Peak Picking or Full-Spectrum Analysis.,"Identification of microorganisms by MALDI-TOF mass spectrometry is a very efficient method with high throughput, speed, and accuracy. However, it is significantly limited by the absence of a universal database of reference mass spectra. This problem can be solved by creating an Internet platform for open databases of protein spectra of microorganisms. Choosing the optimal mathematical apparatus is the pivotal issue for this task. In our previous study we proposed the geometric approach for processing mass spectrometry data, which represented a mass spectrum as a vector in a multidimensional Euclidean space. This algorithm was implemented in a Jacob4 stand-alone package. We demonstrated its efficiency in delimiting two closely related species of the Bacillus pumilus group. In this study, the geometric approach was realized as R scripts which allowed us to design a Web-based application. We also studied the possibility of using full spectra analysis (FSA) without calculating mass peaks (PPA), which is the logical development of the method. We used 74 microbial strains from the collections of ICiG SB RAS, UNIQEM, IEGM, KMM, and VGM as the models. We demonstrated that the algorithms based on peak-picking and analysis of complete data have accuracy no less than that of Biotyper 3.1 software. We proposed a method for calculating cut-off thresholds based on averaged intraspecific distances. The resulting database, raw data, and the set of R scripts are available online at https://icg-test.mydisk.nsc.ru/s/qj6cfZg57g6qwzN.",2020-12-18 +34825724,Quality control of 3D MRSI data in glioblastoma: Can we do without the experts?,"

Purpose

Proton magnetic resonance spectroscopic imaging (1H MRSI) is a noninvasive technique for assessing tumor metabolism. Manual inspection is still the gold standard for quality control (QC) of spectra, but it is both time-consuming and subjective. The aim of the present study was to assess automatic QC of glioblastoma MRSI data using random forest analysis.

Methods

Data for 25 patients, acquired prospectively in a preradiotherapy examination, were submitted to postprocessing with syngo.MR Spectro (VB40A; Siemens) or Java-based magnetic resonance user interface (jMRUI) software. A total of 28 features were extracted from each spectrum for the automatic QC. Three spectroscopists also performed manual inspections, labeling each spectrum as good or poor quality. All statistical analyses, with addressing unbalanced data, were conducted with R 3.6.1 (R Foundation for Statistical Computing; https://www.r-project.org).

Results

The random forest method classified the spectra with an area under the curve of 95.5%, sensitivity of 95.8%, and specificity of 81.7%. The most important feature for the classification was Residuum_Lipids_Versus_Fit, obtained with syngo.MR Spectro.

Conclusion

The automatic QC method was able to distinguish between good- and poor-quality spectra, and can be used by radiation oncologists who are not spectroscopy experts. This study revealed a novel set of MRSI signal features that are closely correlated with spectral quality.",2021-11-26 +34818955,Asthma biologic trial eligibility and real-world outcomes in the United States.,"

Objective

To compare the outcomes of real-world patients who would have been eligible for asthma biologics to those who would not have been eligible.

Methods

We used data from the OptumLabs Data Warehouse (OLDW) to categorize patients into eligible and ineligible groups based on clinical trials (n = 19 trials) used for Food and Drug Administration (FDA) approval. We then compared the change in the number of asthma exacerbations before and after biological initiation between the two groups.

Results

The percentage of people who would have been eligible for asthma biologic clinical trials ranged from 0-10.2%. The eligible group had a greater reduction in number of asthma exacerbations compared to the ineligible group based on eligibility criteria from 1 omalizumab trial (1.52, 95% CI 1.25, 1.8 in eligible vs. 0.47, 95% CI 0.43, 0.52 in ineligible) and from 1 dupilumab trial (1.6, 95% CI 0.92, 2.28 in eligible vs. 0.52, 95% CI 0.38, 0.65 ineligible). Notably, 15 of the 19 trials had fewer than 11 eligible people, limiting additional comparisons.

Conclusions

Fewer than 1 in 10 people in the United States treated with asthma biologics would have been eligible to participate in the trial for the biologic they used. Where comparisons could be made, trial eligible people have a greater reduction in exacerbations.Supplemental data for this article is available online at https://doi.org/10.1080/02770903.2021.2010749 .",2021-12-06 +34015403,Ebolabase: Zaire ebolavirus-human protein interaction database for drug-repurposing.,"Ebola Virus (EBOV) is one of the deadliest pathogenic virus which causes hemorrhagic fever. Though many Ebola-human interaction studies and databases are already reported, the unavailability of an adequate model and lack of publically accessible resources requires a comprehensive study to curate the Ebola-Human-Drug interactions. In total, 270 human proteins interacted with EBOV are collected from published experimental evidence. Then the protein-protein interaction networks are generated as EBOV-human and EBOV-Human-Drugs interaction. These results can help the researcher to find the effective repurposed drug for EBOV treatment. Further, the illustration of gene enrichment and pathway analysis would provide knowledge and insight of EBOV-human interaction describes the importance of the study. Investigating the networks may help to identify a suitable human-based drug target for ebola research community. The inclusion of an emerging concept, a human-based drug targeted therapy plays a very significant role in drug repurposing which reduces the time and effort is the highlight of the current research. An integrated database namely, Ebolabase has been developed and linked with other repositories such as Epitopes, Structures, Literature, Genomics and Proteomics. All generated networks are made to be viewed in a customized manner and the required data can be downloaded freely. The Ebolabase is available at http://ebola.bicpu.edu.in.",2021-05-17 +,First Report of Xanthomonas axonopodis pv. begoniae Causing Bacterial Leaf Spot on Rieger Begonias in Taiwan,"Rieger begonias (Begonia × hiemalis) are flowering ornamentals grown worldwide, often as bedding plants or houseplants. In March 2019, potted Rieger begonias exhibiting symptoms resembling bacterial leaf spot were found in a nursery in Nantun District, Taichung, Taiwan. Approximately 10% of the plants on-site exhibited notable symptoms. The lesions appeared water-soaked and were mostly found near leaf margins. Larger V-shaped, necrotic leaf lesions were also observed. Four plants were sampled, and infected leaves from all of them were cut and examined using a Nikon Optiphot bright-field microscope at 600× magnification. Bacterial streaming and rod-shaped, motile cells were consistently observed, and the bacteria were isolated by streaking the samples onto Reasoner’s 2A agar. After incubating 72 h under 25°C, circular, yellow colonies were recovered from infected tissues. At least one bacterial strain was isolated for each plant, and six isolates (Rb1 to Rb6) were obtained. When grown on yeast dextrose calcium carbonate agar, all of the strains produced yellow mucoid colonies typical of Xanthomonas species (Schaad et al. 2001). Additional biochemical and physiological tests indicated that these strains were able to degrade casein, starch, and lipid and to hydrolyze esculin (Schaad et al. 2001). Tobacco infiltration also showed that all six isolates were capable of inducing hypersensitive responses. Further identification of these strains was conducted by sequencing their fusA, gyrB, gap-1, gltA, lacF, and lepA genes and analyzing the data using a multilocus sequence analysis scheme (Almeida et al. 2010). For all six gene fragments examined, the sequences from Rb1 to Rb6 were identical. The sequences of the six genes were deposited separately in GenBank (accession nos. MK838481 to MK838486). They were then concatenated into a 2,745-bp fragment and compared against data retrieved from the Plant Associated and Environmental Microbes Database (PAMDB; http://genome.ppws.vt.edu/cgi-bin/MLST/home.pl). The analyses showed that the concatenated sequence of the Rb strains shared highest identity (99.7%; 2,738/2,745 bp) with the sequences of X. axonopodis pv. begoniae ICMP 194 (type strain; sequences available in PAMDB). To complete Koch’s postulates, potted Rieger begonias were inoculated with three representative strains: Rb1, Rb2, and Rb3. Three plants were tested for each strain, and three additional pots served as controls. For each plant, three leaves were pierced using a syringe needle. The bacteria were applied onto the wounds using sterile cotton swabs dipped in bacterial suspensions (with the average concentration of 2.7 × 108 CFU/ml). For the control group, cotton swabs dipped in sterile water were used instead. The plants were bagged for 2 days (to maintain humidity) and kept in an incubator (27/25°C day/night) throughout the experiment. Within 12 days, water-soaked leaf spot and V-shaped necrotic lesions developed on all of the inoculated plants. No symptoms were observed on the controls. Bacterial strains were reisolated (two for each strain tested) and identified by sequencing their gap-1 gene. All of the reisolates shared identical sequences with the original isolates. Bacterial leaf spot of Rieger begonias and closely related ornamentals has been reported in Turkey, the Yunnan Province of China, and other locations (Ornek et al. 2007; Zhou and Ji 2013). The present study is the first report of the disease occurring in Taiwan. Because many Begonia species or hybrids found in Taiwan could also be susceptible to X. axonopodis pv. begoniae, it is important to avoid the spread of the pathogen and the possible outbreak of this disease.",2019-11-01 +34737426,A generalized linear mixed model association tool for biobank-scale data.,"Compared with linear mixed model-based genome-wide association (GWA) methods, generalized linear mixed model (GLMM)-based methods have better statistical properties when applied to binary traits but are computationally much slower. In the present study, leveraging efficient sparse matrix-based algorithms, we developed a GLMM-based GWA tool, fastGWA-GLMM, that is severalfold to orders of magnitude faster than the state-of-the-art tools when applied to the UK Biobank (UKB) data and scalable to cohorts with millions of individuals. We show by simulation that the fastGWA-GLMM test statistics of both common and rare variants are well calibrated under the null, even for traits with extreme case-control ratios. We applied fastGWA-GLMM to the UKB data of 456,348 individuals, 11,842,647 variants and 2,989 binary traits (full summary statistics available at http://fastgwa.info/ukbimpbin ), and identified 259 rare variants associated with 75 traits, demonstrating the use of imputed genotype data in a large cohort to discover rare variants for binary complex traits.",2021-11-04 +29697364,IMPACT web portal: oncology database integrating molecular profiles with actionable therapeutics.,"BACKGROUND:With the advancement of next generation sequencing technology, researchers are now able to identify important variants and structural changes in DNA and RNA in cancer patient samples. With this information, we can now correlate specific variants and/or structural changes with actionable therapeutics known to inhibit these variants. We introduce the creation of the IMPACT Web Portal, a new online resource that connects molecular profiles of tumors to approved drugs, investigational therapeutics and pharmacogenetics associated drugs. RESULTS:IMPACT Web Portal contains a total of 776 drugs connected to 1326 target genes and 435 target variants, fusion, and copy number alterations. The online IMPACT Web Portal allows users to search for various genetic alterations and connects them to three levels of actionable therapeutics. The results are categorized into 3 levels: Level 1 contains approved drugs separated into two groups; Level 1A contains approved drugs with variant specific information while Level 1B contains approved drugs with gene level information. Level 2 contains drugs currently in oncology clinical trials. Level 3 provides pharmacogenetic associations between approved drugs and genes. CONCLUSION:IMPACT Web Portal allows for sequencing data to be linked to actionable therapeutics for translational and drug repurposing research. The IMPACT Web Portal online resource allows users to query genes and variants to approved and investigational drugs. We envision that this resource will be a valuable database for personalized medicine and drug repurposing. IMPACT Web Portal is freely available for non-commercial use at http://tanlab.ucdenver.edu/IMPACT .",2018-04-20 +32656017,A Predictive Model for Patient Census and Ventilator Requirements at Individual Hospitals During the Coronavirus Disease 2019 (COVID-19) Pandemic: A Preliminary Technical Report.,"During the initial wave of the coronavirus disease 2019 (COVID-19) pandemic, many hospitals struggled to forecast bed capacity and the number of mechanical ventilators they needed to have available. Numerous epidemiological models forecast regional or national peak bed and ventilator needs, but these are not suitable for predictions at the hospital level. We developed an analytical model to assist hospitals in determining their census and ventilator requirements for COVID-19 patients during future periods of the pandemic, by using their data. This model is based on (1) projection of future daily admissions using counts from the previous seven days, (2) lengths of stay and duration of mechanical ventilation, and (3) the percentage of inpatients requiring mechanical ventilation. The implementation is done within an Excel (Microsoft, Redmond, WA) workbook without the use of add-ins or macro programming. The model inputs for each currently hospitalized patient with COVID-19 are the duration of hospitalization, whether the patient is currently receiving or has previously received mechanical ventilation, and the duration of the current ventilation episode, if applicable. Data validity and internal consistency are checked within the workbook, and errors are identified. Durations of care (length of hospital stay and duration of mechanical ventilation) are generated by fitting a two-parameter Weibull distribution to the hospital's historical data from the initial phase of the pandemic (incorporating censoring due to ongoing care), for which we provide source code in the R programming language (R Foundation for Statistical Computing, Vienna, Austria). Conditional distributions are then calculated using the hospital's current data. The output of the model is nearly instantaneous, producing an estimate of the census and the number of ventilators required in one, three, and seven days following the date on which the simulation is run. Given that the pandemic is ongoing, and a second surge of cases is expected with the reopening of the economy, having such a tool to predict resource needs for hospital planning purposes has been useful. A major benefit to individual hospitals from such modeling has been to provide reassurance to state and local governments that the hospitals have sufficient resources available to meet anticipated needs for new COVID-19 patients without having to set aside substantially greater numbers of beds or ventilators for such care. Such ongoing activity is important for the economic recovery of hospitals that have been hard-hit economically by the shutdown in elective surgery and other patient care activities. The modeling software is freely available at https://FDshort.com/COVID19, and its parameters can easily be modified by end-users.",2020-06-08 +34878311,Mortality Risk from PM2.5: A Comparison of Modeling Approaches to Identify Disparities across Racial/Ethnic Groups in Policy Outcomes.,"

Background

Regulatory analyses of air pollution policies require the use of concentration-response functions and underlying health data to estimate the mortality and morbidity effects, as well as the resulting benefits, associated with policy-related changes in fine particulate matter ≤2.5μm (PM2.5)]. Common practice by U.S. federal agencies involves using underlying health data and concentration-response functions that are not differentiated by racial/ethnic group.

Objectives

We aim to explore the policy implications of using race/ethnicity-specific concentration-response functions and mortality data in comparison to standard approaches when estimating the impact of air pollution on non-White racial/ethnic subgroups.

Methods

Using new estimates from the epidemiological literature on race/ethnicity-specific concentration-response functions paired with race/ethnicity-specific mortality rates, we estimated the mortality impacts of air pollution from all sources from a uniform increase in concentrations and from the regulations imposed by the Mercury Air Toxics Standards.

Results

Use of race/ethnicity-specific information increased PM2.5-related premature mortality estimates in older populations by 9% and among older Black Americans by 150% for all-source pollution exposure. Under a uniform degradation of air quality and race/ethnicity-specific information, older Black Americans were found to have approximately 3 times higher mortality relative to White Americans, which is obscured under a non-race/ethnicity-specific modeling approach. Standard approaches of using non-racial/ethnic specific information underestimate the benefits of the Mercury Air Toxics Standards to older Black Americans by almost 60% and overestimate the benefits to older White Americans by 14% relative to using a race/ethnicity-specific modeling approach.

Discussion

Policy analyses incorporating race/ethnicity-specific concentration-response functions and mortality data relative to nondifferentiated inputs underestimate the overall magnitude of PM2.5 mortality burden and the disparity in impacts on older Black American populations. Based on our results, we recommend that the best available race/ethnicity-specific inputs are used in regulatory assessments to understand and reduce environmental injustices. https://doi.org/10.1289/EHP9001.",2021-12-15 +33258964,"ThRSDB: a database of Thai rice starch composition, molecular structure and functionality. ","As starch properties can affect end product quality in many ways, rice starch from Thai domesticated cultivars and landraces has been the focus of increasing research interest. Increasing knowledge in this area creates a high demand from the research community for better organized information. The Thai Rice Starch Database (ThRSDB) is an online database containing data extensively curated from original research articles on Thai rice starch composition, molecular structure and functionality. The key aim of the ThRSDB is to facilitate accessibility to dispersed rice starch information for, but not limited to, both research and industrial users. Currently, 373 samples from 191 different Thai rice cultivars have been collected from 39 published articles. The ThRSDB includes the search functions necessary for accessing data together with a user-friendly web interface and interactive visualization tools. We have also demonstrated how the collected data can be efficiently used to observe the relationships between starch parameters and rice cultivars through correlation analysis and Partial Least Squares Discriminant Analysis. Database URL: http://thairicestarch.kku.ac.th.",2020-12-01 +29106651,Target-Pathogen: a structural bioinformatic approach to prioritize drug targets in pathogens.,"Available genomic data for pathogens has created new opportunities for drug discovery and development to fight them, including new resistant and multiresistant strains. In particular structural data must be integrated with both, gene information and experimental results. In this sense, there is a lack of an online resource that allows genome wide-based data consolidation from diverse sources together with thorough bioinformatic analysis that allows easy filtering and scoring for fast target selection for drug discovery. Here, we present Target-Pathogen database (http://target.sbg.qb.fcen.uba.ar/patho), designed and developed as an online resource that allows the integration and weighting of protein information such as: function, metabolic role, off-targeting, structural properties including druggability, essentiality and omic experiments, to facilitate the identification and prioritization of candidate drug targets in pathogens. We include in the database 10 genomes of some of the most relevant microorganisms for human health (Mycobacterium tuberculosis, Mycobacterium leprae, Klebsiella pneumoniae, Plasmodium vivax, Toxoplasma gondii, Leishmania major, Wolbachia bancrofti, Trypanosoma brucei, Shigella dysenteriae and Schistosoma Smanosoni) and show its applicability. New genomes can be uploaded upon request.",2018-01-01 +34129933,Combining gene expression signature with clinical features for survival stratification of gastric cancer.,"The AJCC staging system is considered as the golden standard in clinical practice. However, it remains some pitfalls in assessing the prognosis of gastric cancer (GC) patients with similar clinicopathological characteristics. We aim to develop a new clinic and genetic risk score (CGRS) to improve the prognosis prediction of GC patients. We established genetic risk score (GRS) based on nine-gene signature including APOD, CCDC92, CYS1, GSDME, ST8SIA5, STARD3NL, TIMEM245, TSPYL5, and VAT1 based on the gene expression profiles of the training set from the Asian Cancer Research Group (ACRG) cohort by LASSO-Cox regression algorithms. CGRS was established by integrating GRS with clinical risk score (CRS) derived from Surveillance, Epidemiology, and End Results (SEER) database. GRS and CGRS dichotomized GC patients into high and low risk groups with significantly different prognosis in four independent cohorts with different data types, such as microarray, RNA sequencing and qRT-PCR (all HR > 1, all P < 0.001). Both GRS and CGRS were prognostic signatures independent of the AJCC staging system. Receiver operating characteristic (ROC) analysis showed that area under ROC curve of CGRS was larger than that of the AJCC staging system in most cohorts we studied. Nomogram and web tool (http://39.100.117.92/CGRS/) based on CGRS were developed for clinicians to conveniently assess GC prognosis in clinical practice. CGRS integrating genetic signature with clinical features shows strong robustness in predicting GC prognosis, and can be easily applied in clinical practice through the web application.",2021-06-12 +32811511,Advancing brain barriers RNA sequencing: guidelines from experimental design to publication.,"

Background

RNA sequencing (RNA-Seq) in its varied forms has become an indispensable tool for analyzing differential gene expression and thus characterization of specific tissues. Aiming to understand the brain barriers genetic signature, RNA seq has also been introduced in brain barriers research. This has led to availability of both, bulk and single-cell RNA-Seq datasets over the last few years. If appropriately performed, the RNA-Seq studies provide powerful datasets that allow for significant deepening of knowledge on the molecular mechanisms that establish the brain barriers. However, RNA-Seq studies comprise complex workflows that require to consider many options and variables before, during and after the proper sequencing process.

Main body

In the current manuscript, we build on the interdisciplinary experience of the European PhD Training Network BtRAIN ( https://www.btrain-2020.eu/ ) where bioinformaticians and brain barriers researchers collaborated to analyze and establish RNA-Seq datasets on vertebrate brain barriers. The obstacles BtRAIN has identified in this process have been integrated into the present manuscript. It provides guidelines along the entire workflow of brain barriers RNA-Seq studies starting from the overall experimental design to interpretation of results. Focusing on the vertebrate endothelial blood-brain barrier (BBB) and epithelial blood-cerebrospinal-fluid barrier (BCSFB) of the choroid plexus, we provide a step-by-step description of the workflow, highlighting the decisions to be made at each step of the workflow and explaining the strengths and weaknesses of individual choices made. Finally, we propose recommendations for accurate data interpretation and on the information to be included into a publication to ensure appropriate accessibility of the data and reproducibility of the observations by the scientific community.

Conclusion

Next generation transcriptomic profiling of the brain barriers provides a novel resource for understanding the development, function and pathology of these barrier cells, which is essential for understanding CNS homeostasis and disease. Continuous advancement and sophistication of RNA-Seq will require interdisciplinary approaches between brain barrier researchers and bioinformaticians as successfully performed in BtRAIN. The present guidelines are built on the BtRAIN interdisciplinary experience and aim to facilitate collaboration of brain barriers researchers with bioinformaticians to advance RNA-Seq study design in the brain barriers community.",2020-08-18 +34060398,Prognostic Significance of Prognostic Nutritional Index in Patients with Renal Cell Carcinoma: A Meta-Analysis.,"The prognostic nutrition index (PNI), based on the serum lymphocyte counts and albumin levels, has been introduced as a prognostic factor in various cancer. In the present study, we explore the prognostic significance of PNI in patients with renal cell carcinoma (RCC). A literature search of all publications was conducted using the Cochrane library, PubMed and Embase databases from inception to April 2020. A total of 12 studies consisting of 7,391 patients were enrolled in the present study. We found that low pretreatment PNI is significantly correlated to poor survival, including overall survival (OS) (P < 0.001), cancer-specific survival (CSS) (P = 0.002), progression-free survival/recurrence-free survival/disease-free survival (PFS/RFS/DFS) (P < 0.001). The age (P < 0.001), clear cell histology (P = 0.044), T3-T4 (P = 0.049), and Fuhrman grade 3-4 (P = 0.024) were significantly differed in the low and high pretreatment PNI group. In summary, low pretreatment PNI was associated with adverse clinicopathological features in patients with RCC. Besides, low pretreatment PNI was also an unfavorable factor of OS, CSS, and PFS/RFS/DFS in RCC patients, which could serve as an unfavorable factor. More studies with large participants are required to verify our results.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1931702.",2021-06-01 +33304713,NeProc predicts binding segments in intrinsically disordered regions without learning binding region sequences.,"Intrinsically disordered proteins are those proteins with intrinsically disordered regions. One of the unique characteristics of intrinsically disordered proteins is the existence of functional segments in intrinsically dis-ordered regions. These segments are involved in binding to partner molecules, such as protein and DNA, and play important roles in signaling pathways and/or transcriptional regulation. Although there are databases that gather information on such disordered binding regions, data remain limited. Therefore, it is desirable to develop programs to predict the disordered binding regions without using data for the binding regions. We developed a program, NeProc, to predict the disordered binding regions, which can be regarded as intrinsically disordered regions with a structural propensity. We only used data for the structural domains and intrinsically disordered regions to detect such regions. NeProc accepts a query amino acid sequence converted into a position specific score matrix, and uses two neural networks that employ different window sizes, a neural network of short windows, and a neural network of long windows. The performance of NeProc was comparable to that of existing programs of the disordered binding region prediction. This result presents the possibility to overcome the shortage of the disordered binding region data in the development of the prediction programs for these binding regions. NeProc is available at http://flab.neproc.org/neproc/index.html.",2020-11-03 +34116617,The Continuum of Recovery from Alcohol Dependence: From Addiction Remission to Complete Mental Health.,"

Background

Few representative studies have examined optimal mental health among those with a history of alcohol dependence (AD).

Objectives

In a representative sample of Canadians with a history of AD, to determine prevalence of, and factors associated with 1) remission from AD, 2) the absence of Substance Dependence and Psychiatric Disorders (SDPD) in the past year, and 3) complete mental health (CMH).

Method

Secondary analysis of a publicly available Statistics Canada database, the 2012 Canadian Community Health Survey-Mental Health (820 adults with AD history; 19,945 without AD). Lifetime AD, past-year remission from AD, and previous 12-month absence of SDPD were determined using World Health Organisation Composite International Diagnostic Interview (WHO-CIDI) measures. Individuals are classified as being in CMH if they possessed social and psychological well-being, happiness or life satisfaction and absence of SDPD.

Results

Over 70% of those with a history of AD were in remission, 52% were without past-year SDPD, and 38% of respondents were in CMH. Positive outcomes were more common among married respondents, older individuals, those with higher level of social support, and those who had never had major depressive disorders or generalised anxiety disorders.

Conclusion

The majority of Canadians with a history of AD achieve remission and a significant proportion achieve CMH. However, targeted outreach is warranted for the most vulnerable with a history of alcohol dependence, including younger respondents and those with low levels of social support or a history of mental illness.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1922451.",2021-06-11 +32477412,Expression Quantitative Trait Loci (eQTL) Mapping in Korean Patients With Crohn's Disease and Identification of Potential Causal Genes Through Integration With Disease Associations.,"

Background

Expression quantitative trait loci (eQTL) datasets have extensively been used to help interpret genome-wide association study signals. Most eQTL analyses have been conducted with populations of European ancestry.

Objective

To determine the most functionally relevant genes at the Crohn's disease (CD) loci identified in genome-wide association studies (GWAS) involving Asian populations and to find novel disease-associated genes, we conducted an eQTL analysis.

Methods

eQTL analysis was performed using whole-blood RNA-sequencing of 101 Korean patients with CD. FastQTL was used for a pair-wise genome analysis of ∼ 6.5 M SNPs and ∼ 22 K transcripts.

Results

We identified 135,164 cis-eQTL and 3,816 eGenes with a false discovery rate less than 0.05. A significant proportion of the genes identified in our study overlapped with those identified in previous studies. The significantly enriched pathways of these 3,816 eGenes included neutrophil degranulation and small molecule biosynthetic process. Integrated analysis of CD GWAS with Korean eQTL revealed two putative target genes, TNFSF15 and GPR35, at two previously reported loci, whereas TNFSF15 only with the whole blood data from the Genotype-Tissue Expression (GTEx) project, highlighting the utility of building a population-specific data set, even of modest size. The risk alleles of these genes were found to be associated with lower expression levels of TNFSF15 and GPR35, respectively. Our eQTL browser can be accessed at ""http://asan.crohneqtl.com/"".

Conclusion

This resource would be useful for studies that need to employ genome-wide association analyses involving Asian populations.",2020-05-14 +33095862,ThermoMutDB: a thermodynamic database for missense mutations.,"Proteins are intricate, dynamic structures, and small changes in their amino acid sequences can lead to large effects on their folding, stability and dynamics. To facilitate the further development and evaluation of methods to predict these changes, we have developed ThermoMutDB, a manually curated database containing >14,669 experimental data of thermodynamic parameters for wild type and mutant proteins. This represents an increase of 83% in unique mutations over previous databases and includes thermodynamic information on 204 new proteins. During manual curation we have also corrected annotation errors in previously curated entries. Associated with each entry, we have included information on the unfolding Gibbs free energy and melting temperature change, and have associated entries with available experimental structural information. ThermoMutDB supports users to contribute to new data points and programmatic access to the database via a RESTful API. ThermoMutDB is freely available at: http://biosig.unimelb.edu.au/thermomutdb.",2021-01-01 +34856835,Enhancing the caregiving experience of family care partners in Singapore through an arts programme for persons with dementia: an exploratory study.,"Objectives: Arts-based programmes for persons with dementia have shown promise in alleviating some of the caregiving challenges for family care partners. The present study sought to broaden the sociocultural perspectives of arts-based programmes by investigating the impact of a locally developed Arts & Dementia programme for persons with dementia on their family care partners in Singapore.Methods: Thirty-two family care partners of persons with dementia who participated in the Arts & Dementia programme were recruited. A mixed-methodological study was employed utilising quantitative pre- and post-programme data from the Zarit Burden Interview and Gain in Alzheimer care Instrument, and qualitative data from semi-structured group interviews.Results: Although there were no self-reported short-term changes in perceived caregiving difficulties and gains, semi-structured group interviews revealed potential caregiving benefits. Six overarching themes were identified: (1) contentment and social engagement, (2) re-connecting and developing new interests, (3) positive influence on caregiving, (4) enhancements to the programme, (5) more arts programmes, and (6) more support for families living with dementia.Conclusion: The present study highlights potential benefits of community-based arts activities in enabling caregiving to be a more positive experience for care partners.Supplemental data for this article is available online at http://dx.doi.org/10.1080/13607863.2021.2008306'I kept blaming myself for not spending enough time with him. Communication with him is different now. Watching him on the stage, he is happy and with a purpose'. (Gary, male).",2021-12-03 +34315491,Identification of the miRNA signature and key genes in colorectal cancer lymph node metastasis.,"

Background

Because its metastasis to the lymph nodes are closely related to poor prognosis, miRNAs and mRNAs can serve as biomarkers for the diagnosis, prognosis, and therapy of colorectal cancer (CRC). This study aimed to identify novel gene signatures in the lymph node metastasis of CRC.

Methods

GSE56350, GSE70574, and GSE95109 datasets were downloaded from the Gene Expression Omnibus (GEO) database, while data from 569 colorectal cancer cases were also downloaded from The Cancer Genome Atlas (TCGA) database. Differentially expressed miRNAs (DE-miRNAs) were calculated using R programming language (Version 3.6.3), while gene ontology and enrichment analysis of target mRNAs were performed using FunRich ( http://www.funrich.org ). Furthermore, the mRNA-miRNA network was constructed using Cytoscape software (Version 3.8.0). Gene expression levels were verified using the GEO datasets. Similarly, quantitative real-time PCR (qPCR) was used to examine expression profiles from 20 paired non-metastatic and metastatic lymph node tissue samples obtained from patients with CRC.

Results

In total, five DE-miRNAs were selected, and 34 mRNAs were identified after filtering the results. Moreover, two key miRNAs (hsa-miR-99a, hsa-miR-100) and one gene (heparan sulfate-glucosamine 3-sulfotransferase 2 [HS3ST2]) were identified. The GEO datasets analysis and qPCR results showed that the expression of key miRNA and genes were consistent with that obtained from the bioinformatic analysis. A novel miRNA-mRNA network capable of predicting the prognosis and confirmed experimentally, hsa-miR-99a-HS3ST2-hsa-miR-100, was found after expression analysis in metastasized lymph node tissue from CRC samples.

Conclusion

In summary, miRNAs and genes with potential as biomarkers were found and a novel miRNA-mRNA network was established for CRC lymph node metastasis by systematic bioinformatic analysis and experimental validation. This network may be used as a potential biomarker in the development of lymph node metastatic CRC.",2021-07-07 +31111606,Poplar carbohydrate-active enzymes: whole-genome annotation and functional analyses based on RNA expression data.,"Carbohydrate-active enzymes (CAZymes) catalyze the formation and modification of glycoproteins, glycolipids, starch, secondary metabolites and cell wall biopolymers. They are key enzymes for the biosynthesis of food and renewable biomass. Woody biomass is particularly important for long-term carbon storage and as an abundant renewable natural resource for many industrial applications. This study presents a re-annotation of CAZyme genes in the current Populus trichocarpa genome assembly and in silico functional characterization, based on high-resolution RNA-Seq data sets. Altogether, 1914 CAZyme and expansin genes were annotated in 101 families. About 1797 of these genes were found expressed in at least one Populus organ. We identified genes involved in the biosynthesis of different cell wall polymers and their paralogs. Whereas similar families exist in poplar and Arabidopsis thaliana (with the exception of CBM13 found only in poplar), a few families had significantly different copy numbers between the two species. To identify the transcriptional coordination and functional relatedness within the CAZymes and other proteins, we performed co-expression network analysis of CAZymes in wood-forming tissues using the AspWood database (http://aspwood.popgenie.org/aspwood-v3.0/) for Populus tremula. This provided an overview of the transcriptional changes in CAZymes during the transition from primary to secondary wall formation, and the clustering of transcripts into potential regulons. Candidate enzymes involved in the biosynthesis of polysaccharides were identified along with many tissue-specific uncharacterized genes and transcription factors. These collections offer a rich source of targets for the modification of secondary cell wall biosynthesis and other developmental processes in woody plants.",2019-07-01 +34450618,TPWshiny: an interactive R/Shiny app to explore cell line transcriptional responses to anti-cancer drugs. ,"The NCI Transcriptional Pharmacodynamics Workbench (Monks 2018) is an extensive compilation of directly measured transcriptional responses to anti-cancer agents across the well-characterized NCI-60 cancer cell lines. The NCI TPW data are publicly available through a web interface that allows limited user interaction with the data. We developed ""TPWshiny"" as a standalone, easy to install, R application to facilitate more interactive data exploration.With no programming skills required, TPWshiny provides an intuitive and comprehensive graphical interface to help researchers understand the response of tumor cell lines to 15 therapeutic agents. The data are presented in interactive scatter plots, heatmaps, time series and Venn diagrams. Data can be queried by drug concentration, time point, gene and tissue type. Researchers can download the data for further analysis. Users can download the ready-to-use, self-extracting package for Windows or macOS, and R source code from the project website (https://brb.nci.nih.gov/TPWshiny/). TPWshiny documentation and additional information can be found on the project website.",2021-08-27 +31620779,PADS Arsenal: a database of prokaryotic defense systems related genes.,"Defense systems are vital weapons for prokaryotes to resist heterologous DNA and survive from the constant invasion of viruses, and they are widely used in biochemistry investigation and antimicrobial drug research. So far, numerous types of defense systems have been discovered, but there is no comprehensive defense systems database to organize prokaryotic defense gene datasets. To fill this gap, we unveil the prokaryotic antiviral defense system (PADS) Arsenal (https://bigd.big.ac.cn/padsarsenal), a public database dedicated to gathering, storing, analyzing and visualizing prokaryotic defense gene datasets. The initial version of PADS Arsenal integrates 18 distinctive categories of defense system with the annotation of 6 600 264 genes retrieved from 63,701 genomes across 33 390 species of archaea and bacteria. PADS Arsenal provides various ways to retrieve defense systems related genes information and visualize them with multifarious function modes. Moreover, an online analysis pipeline is integrated into PADS Arsenal to facilitate annotation and evolutionary analysis of defense genes. PADS Arsenal can also visualize the dynamic variation information of defense genes from pan-genome analysis. Overall, PADS Arsenal is a state-of-the-art open comprehensive resource to accelerate the research of prokaryotic defense systems.",2020-01-01 +34183042,Vitamin D inhibits TNF-α induced apoptosis of human nucleus pulposus cells through regulation of NF-kB signaling pathway.,"

Background

To observe the effects of vitamin D on the apoptotic human nucleus pulposus cells under tumor necrosis factor-α (TNF-α) treatment.

Methods

The gene expression data was downloaded from the NCBI Gene Expression Omnibus (GEO) database ( https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE34095 ). Differentially expressed genes between degenerative disc and non-degenerative disc were performed by R software. Gene ontology (GO) and Kyoto Encyclopedia of Genes and Genome (KEGG) pathway enrichment analyses were performed using The Database for Annotation, Visualization and Integrated Discovery (DAVID). Then, the human nucleus pulposus tissue was harvested from 12 patients according to the modified Pfirrmann classification and human nucleus pulposus cells were obtained from digestion of herniated nucleus pulposus tissue. The collected nucleus pulposus cells were treated with different concentration of TNF-α, and cellular apoptosis was measured by flow cytometry. Then, human nucleus pulposus cells were divided into following groups: normal culture medium, TNF-α treated, TNF-α, and vitamin D-treated groups. Cellular apoptosis rate was quantified by flow cytometry. Protein expression of p-p65, p65, and IkBa was detected with western blot analysis.

Results

A total of 536 differentially expressed genes were identified through bioinformatic analysis. KEGG pathway revealed that NF-kB signaling pathway was involved in the process of disc degeneration. In the NP cell cultures, vitamin D significantly increased cell proliferation potency. Furthermore, vitamin D inhibited TNF-α induced apoptosis of human nucleus pulposus cells. Vitamin D reduced the phospho-NF-κB/p65 expression in the TNF-α-treated NP cells.

Conclusion

Vitamin D can attenuate TNF-α-induced NP cells apoptosis through interfering with the NF-κB pathway.",2021-06-28 +,409 Eleven Years of the Western Maryland Pasture-Based Meat Goat Performance Test.,"Abstract The US meat goat industry lags behind other livestock industries in the use of quantitative genetic evaluation. The Western Maryland Pasture-Based Meat Goat Performance Test was initiated in 2006 at the University of Maryland’s Western Maryland Research & Education Center (39º 30’ N/77º 44’ W). The purpose of the test was to identify genetically-superior meat goat bucks, especially those which exhibited resistance and resilience to internal parasites. From 2006–2016, 736 bucklings, of various breeds and crosses, were evaluated. One hundred producers from 20 states consigned one or bucks. Bi-weekly progress reports, summaries, and other information and data were shared via a blog (https://mdgoattest.blogspot.com), which has 218,346 cumulative page views from 523 blog posts. Many other programs were held in conjunction with the test, including sales, field days, youth programs, and carcass evaluation. Each year, top-performing bucks were identified and sold or retained for breeding, with 86% of consigners using the genetics from top-performing bucks in their breeding programs. Many more producers used the genetics from top-performing bucks by purchasing breeding stock and/or semen from consigners. Per survey data, consigning to the Maryland test improved the reputation of participating herds (80%), increased the demand and selling price for breeding stock (66–72%), and enabled the sale of semen from top-performers (38%). The test enabled participating producers to develop more parasite resistant herds (76%), improve the health and performance of their herds (73%), and improve their knowledge of parasite control (80%). The Western Maryland Pasture-Based Meat Goat Performance Test provided economic value to producers (71%), while contributing to the genetic improvement of the meat goat industry and serving as a valuable resource to producers.",2018-12-01 +34097064,ggtreeExtra: Compact Visualization of Richly Annotated Phylogenetic Data.,We present the ggtreeExtra package for visualizing heterogeneous data with a phylogenetic tree in a circular or rectangular layout (https://www.bioconductor.org/packages/ggtreeExtra). The package supports more data types and visualization methods than other tools. It supports using the grammar of graphics syntax to present data on a tree with richly annotated layers and allows evolutionary statistics inferred by commonly used software to be integrated and visualized with external data. GgtreeExtra is a universal tool for tree data visualization. It extends the applications of the phylogenetic tree in different disciplines by making more domain-specific data to be available to visualize and interpret in the evolutionary context.,2021-08-01 +33734313,Fast and sensitive taxonomic assignment to metagenomic contigs. ,"MMseqs2 taxonomy is a new tool to assign taxonomic labels to metagenomic contigs. It extracts all possible protein fragments from each contig, quickly retains those that can contribute to taxonomic annotation, assigns them with robust labels and determines the contig's taxonomic identity by weighted voting. Its fragment extraction step is suitable for the analysis of all domains of life. MMseqs2 taxonomy is 2-18x faster than state-of-the-art tools and also contains new modules for creating and manipulating taxonomic reference databases as well as reporting and visualizing taxonomic assignments. MMseqs2 taxonomy is part of the MMseqs2 free open-source software package available for Linux, macOS and Windows at https://mmseqs.com. Supplementary data is available at Bioinformatics online.",2021-03-17 +32967423,Spritz: A Proteogenomic Database Engine.,"Proteoforms are the workhorses of the cell, and subtle differences between their amino acid sequences or post-translational modifications (PTMs) can change their biological function. To most effectively identify and quantify proteoforms in genetically diverse samples by mass spectrometry (MS), it is advantageous to search the MS data against a sample-specific protein database that is tailored to the sample being analyzed, in that it contains the correct amino acid sequences and relevant PTMs for that sample. To this end, we have developed Spritz (https://smith-chem-wisc.github.io/Spritz/), an open-source software tool for generating protein databases annotated with sequence variations and PTMs. We provide a simple graphical user interface for Windows and scripts that can be run on any operating system. Spritz automatically sets up and executes approximately 20 tools, which enable the construction of a proteogenomic database from only raw RNA sequencing data. Sequence variations that are discovered in RNA sequencing data upon comparison to the Ensembl reference genome are annotated on proteins in these databases, and PTM annotations are transferred from UniProt. Modifications can also be discovered and added to the database using bottom-up mass spectrometry data and global PTM discovery in MetaMorpheus. We demonstrate that such sample-specific databases allow the identification of variant peptides, modified variant peptides, and variant proteoforms by searching bottom-up and top-down proteomic data from the Jurkat human T lymphocyte cell line and demonstrate the identification of phosphorylated variant sites with phosphoproteomic data from the U2OS human osteosarcoma cell line.",2020-10-07 +33529633,CRMarker: A manually curated comprehensive resource of cancer RNA markers.,"Biomolecular markers have extremely important value for cancer research and treatment. However, as far as we know, there are still no searchable and predictable resources focusing on multiple classes of RNA molecular markers in cancers. Herein, we developed CRMarker, a manually curated comprehensive repository of cancer RNA markers. In the current release, CRMarker v1.1 consists of 5489 ""known"" cancer RNA markers based on 8756 valid publications in PubMed, including 2878 mRNAs (genes), 1314 miRNAs, 1097 lncRNAs and 200 circRNAs, and involving two functional molecules (diagnosis and prognosis), 21 organisms and 154 cancers. The search results provided by the database are comprehensive, including 11 items such as RNA molecule expression and risk level, type of tissue or sample, cancer subtype, reference type, etc. Moreover, CRMarker also provides more than 18,000 potential cancer RNA markers, which are predicted based on ""guilt-by-association"" analysis of the above-mentioned ""known"" RNA markers and three molecular interaction networks, and survival analysis of 18 gene expression data sets with survival data. CRMarker v1.1 has a friendly interface and is freely available online at http://crmarker.hnnu.edu.cn/. We aim to build a comprehensive platform that is convenient for cancer researchers and clinicians to inquire and retrieve.",2021-01-30 +33507270,InSexBase: an annotated genomic resource of sex chromosomes and sex-biased genes in insects. ,"Sex determination and the regulation of sexual dimorphism are among the most fascinating topics in modern biology. As the most species-rich group of sexually reproducing organisms on Earth, insects have multiple sex determination systems. Though sex chromosomes and sex-biased genes are well-studied in dozens of insects, their gene sequences are scattered in various databases. Moreover, a shortage of annotation hinders the deep mining of these data. Here, we collected the chromosome-level sex chromosome data of 49 insect species, including 34 X chromosomes, 15 Z chromosomes, 5 W chromosomes and 2 Y chromosomes. We also obtained Y-linked contigs of four insects species-Anopheles gambiae, Drosophila innubila, Drosophila yakuba and Tribolium castaneum. The unannotated chromosome-level sex chromosomes were annotated using a standard pipeline, yielding a total of 123 030 protein-coding genes, 2 159 427 repeat sequences, 894 miRNAs, 1574 rRNAs, 5105 tRNAs, 395 snoRNAs (small nucleolar RNA), 54 snRNAs (small nuclear RNA) and 5959 other ncRNAs (non-coding RNA). In addition, 36 781 sex-biased genes were identified by analyzing 62 RNA-seq (RNA sequencing) datasets. Together with 5707 sex-biased genes from the Drosophila genus collected from the Sex-Associated Gene Database, we obtained a total of 42 488 sex-biased genes from 13 insect species. All these data were deposited into InSexBase, a new user-friendly database of insect sex chromosomes and sex-biased genes. Database URL: http://www.insect-genome.com/Sexdb/.",2021-01-01 +32055857,"An update on the Symbiotic Genomes Database (SymGenDB): a collection of metadata, genomic, genetic and protein sequences, orthologs and metabolic networks of symbiotic organisms. ","The Symbiotic Genomes Database (SymGenDB; http://symbiogenomesdb.uv.es/) is a public resource of manually curated associations between organisms involved in symbiotic relationships, maintaining a catalog of completely sequenced/finished bacterial genomes exclusively. It originally consisted of three modules where users could search for the bacteria involved in a specific symbiotic relationship, their genomes and their genes (including their orthologs). In this update, we present an additional module that includes a representation of the metabolic network of each organism included in the database, as Directed Acyclic Graphs (MetaDAGs). This module provides unique opportunities to explore the metabolism of each individual organism and/or to evaluate the shared and joint metabolic capabilities of the organisms of the same genera included in our listing, allowing users to construct predictive analyses of metabolic associations and complementation between systems. We also report a ~25% increase in manually curated content in the database, i.e. bacterial genomes and their associations, with a final count of 2328 bacterial genomes associated to 498 hosts. We describe new querying possibilities for all the modules, as well as new display features for the MetaDAGs module, providing a relevant range of content and utility. This update continues to improve SymGenDB and can help elucidate the mechanisms by which organisms depend on each other.",2020-01-01 +34462322,Statistical guidelines for quality control of next-generation sequencing techniques. ,"More and more next-generation sequencing (NGS) data are made available every day. However, the quality of this data is not always guaranteed. Available quality control tools require profound knowledge to correctly interpret the multiplicity of quality features. Moreover, it is usually difficult to know if quality features are relevant in all experimental conditions. Therefore, the NGS community would highly benefit from condition-specific data-driven guidelines derived from many publicly available experiments, which reflect routinely generated NGS data. In this work, we have characterized well-known quality guidelines and related features in big datasets and concluded that they are too limited for assessing the quality of a given NGS file accurately. Therefore, we present new data-driven guidelines derived from the statistical analysis of many public datasets using quality features calculated by common bioinformatics tools. Thanks to this approach, we confirm the high relevance of genome mapping statistics to assess the quality of the data, and we demonstrate the limited scope of some quality features that are not relevant in all conditions. Our guidelines are available at https://cbdm.uni-mainz.de/ngs-guidelines.",2021-08-30 +33811468,"Resourcing, annotating, and analysing synthetic peptides of SARS-CoV-2 for immunopeptidomics and other immunological studies.","SARS-CoV-2 has caused a significant ongoing pandemic worldwide. A number of studies have examined the T cell mediated immune responses against SARS-CoV-2, identifying potential T cell epitopes derived from the SARS-CoV-2 proteome. Such studies will aid in identifying targets for vaccination and immune monitoring. In this study, we applied tandem mass spectrometry and proteomic techniques to a library of ∼40,000 synthetic peptides, in order to generate a large dataset of SARS-CoV-2 derived peptide MS/MS spectra. On this basis, we built an online knowledgebase, termed virusMS (https://virusms.erc.monash.edu/), to document, annotate and analyse these synthetic peptides and their spectral information. VirusMS incorporates a user-friendly interface to facilitate searching, browsing and downloading the database content. Detailed annotations of the peptides, including experimental information, peptide modifications, predicted peptide-HLA (human leukocyte antigen) binding affinities, and peptide MS/MS spectral data, are provided in virusMS.",2021-04-14 +33283531,Association of Processed Meats and Alcohol Consumption with Renal Cell Carcinoma: A Worldwide Population-Based Study.,"The link between diet and renal cell carcinoma (RCC) is still unclear. The purpose of this study was to evaluate the association of diet with RCC's incidence and mortality rates worldwide. We conducted an ecological study including 170 countries, whose data on age-standardized (AS) incidence and mortality rates of RCC, dietary factors, and potentially confounding factors such as obesity, insufficient physical activity, tobacco smoking, hypertension, diabetes, and human development index (HDI) were collected and available on May 2020 from the Global Cancer Observatory, the Global Dietary Database, the Global Health Observatory data repository, the Diabetes Atlas 9th edition and the Human Development Report 2019. Univariable and multivariable linear regression analyses were performed to determine the association of dietary factors with incidence and mortality rates of RCC adjusted for the effects of population age and potentially confounding factors. Intake of processed meats and consumption of alcohol were both positively associated with AS incidence rates of RCC (β = 0.11, P < 0.001 and β = 0.1, P = 0.044, respectively). We suggest that high consumption of processed meats and/or alcohol is a risk factor for RCC. However, they were not associated with mortality. Further research is needed at an individual level.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2020.1856388.",2020-12-07 +33456216,Why do knees after total knee arthroplasty fail in different parts of the world?,"

Objective

The aim of this narrative review was to provide an overview of failure modes after total knee arthroplasty in different parts of the world based on data from worldwide representative studies and National Joint Registries.

Methods

A review of the available literature was performed using the keyword terms ""total knee arthroplasty"", ""revision"", ""failure"", ""reasons"", ""causes"", ""complications"", ""epidemiology"", ""etiology""; ""assessment"", ""painful knee"", ""registry"" and ""national"" in several combinations. The following databases were assessed: Pubmed (https://pubmed.ncbi.nlm.nih.gov), Cochrane Reviews (https://www.cochrane.org), Google Scholar (https://scholar.google.com). In addition, registry data were obtained directly from national registry archives. Due to the heterogeneity of available data it was decided to present the review in a narrative manner.

Results

Current literature report that infection has become the primary acute cause of TKA failure, while aseptic loosening and instability remain the overall most frequent reasons for revisions. Based on national registries certain tendencies can be deducted. The predominant overall failure mode of aseptic loosening is particularly found in Japan, United Kingdom, New Zealand and Switzerland. Leading early TKA failure mode represents infection with percentages of 20-30% in Sweden, Australia, New Zealand, Japan and the United States. Higher numbers could only be found in clinical studies on the Asian continent such as Korea (38%), China (53%), Iran (44%) and India (87%).

Conclusion

Although there are regional differences in TKA failure modes, TKA fails worldwide especially due to infections and aseptic loosening. It is important to diagnose these in good time and reliably using appropriate, standardized diagnostics in order to recommend the best possible therapy to the patient.",2020-12-31 +34349739,Beyond Taxonomic Identification: Integration of Ecological Responses to a Soil Bacterial 16S rRNA Gene Database.,"High-throughput sequencing 16S rRNA gene surveys have enabled new insights into the diversity of soil bacteria, and furthered understanding of the ecological drivers of abundances across landscapes. However, current analytical approaches are of limited use in formalizing syntheses of the ecological attributes of taxa discovered, because derived taxonomic units are typically unique to individual studies and sequence identification databases only characterize taxonomy. To address this, we used sequences obtained from a large nationwide soil survey (GB Countryside Survey, henceforth CS) to create a comprehensive soil specific 16S reference database, with coupled ecological information derived from survey metadata. Specifically, we modeled taxon responses to soil pH at the OTU level using hierarchical logistic regression (HOF) models, to provide information on both the shape of landscape scale pH-abundance responses, and pH optima (pH at which OTU abundance is maximal). We identify that most of the soil OTUs examined exhibited a non-flat relationship with soil pH. Further, the pH optima could not be generalized by broad taxonomy, highlighting the need for tools and databases synthesizing ecological traits at finer taxonomic resolution. We further demonstrate the utility of the database by testing against geographically dispersed query 16S datasets; evaluating efficacy by quantifying matches, and accuracy in predicting pH responses of query sequences from a separate large soil survey. We found that the CS database provided good coverage of dominant taxa; and that the taxa indicating soil pH in a query dataset corresponded with the pH classifications of top matches in the CS database. Furthermore we were able to predict query dataset community structure, using predicted abundances of dominant taxa based on query soil pH data and the HOF models of matched CS database taxa. The database with associated HOF model outputs is released as an online portal for querying single sequences of interest (https://shiny-apps.ceh.ac.uk/ID-TaxER/), and flat files are made available for use in bioinformatic pipelines. The further development of advanced informatics infrastructures incorporating modeled ecological attributes along with new functional genomic information will likely facilitate large scale exploration and prediction of soil microbial functional biodiversity under current and future environmental change scenarios.",2021-07-19 +33438817,Large-scale survey and database of high affinity ligands for peptide recognition modules.,"Many proteins involved in signal transduction contain peptide recognition modules (PRMs) that recognize short linear motifs (SLiMs) within their interaction partners. Here, we used large-scale peptide-phage display methods to derive optimal ligands for 163 unique PRMs representing 79 distinct structural families. We combined the new data with previous data that we collected for the large SH3, PDZ, and WW domain families to assemble a database containing 7,984 unique peptide ligands for 500 PRMs representing 82 structural families. For 74 PRMs, we acquired enough new data to map the specificity profiles in detail and derived position weight matrices and binding specificity logos based on multiple peptide ligands. These analyses showed that optimal peptide ligands resembled peptides observed in existing structures of PRM-ligand complexes, indicating that a large majority of the phage-derived peptides are likely to target natural peptide-binding sites and could thus act as inhibitors of natural protein-protein interactions. The complete dataset has been assembled in an online database (http://www.prm-db.org) that will enable many structural, functional, and biological studies of PRMs and SLiMs.",2020-12-01 +33308175,"ILDGDB: a manually curated database of genomics, transcriptomics, proteomics and drug information for interstitial lung diseases.","

Background

Interstitial lung diseases (ILDs), a diverse group of diffuse lung diseases, mainly affect the lung parenchyma. The low-throughput 'omics' technologies (genomics, transcriptomics, proteomics) and relative drug information have begun to reshaped our understanding of ILDs, whereas, these data are scattered among massive references and are difficult to be fully exploited. Therefore, we manually mined and summarized these data at a database (ILDGDB, http://ildgdb.org/ ) and will continue to update it in the future.

Main body

The current version of ILDGDB incorporates 2018 entries representing 20 ILDs and over 600 genes obtained from over 3000 articles in four species. Each entry contains detailed information, including species, disease type, detailed description of gene (e.g. official symbol of gene), and the original reference etc. ILDGDB is free, and provides a user-friendly web page. Users can easily search for genes of interest, view their expression pattern and detailed information, manage genes sets and submit novel ILDs-gene association.

Conclusion

The main principle behind ILDGDB's design is to provide an exploratory platform, with minimum filtering and interpretation, while making the presentation of the data very accessible, which will provide great help for researchers to decipher gene mechanisms and improve the prevention, diagnosis and therapy of ILDs.",2020-12-11 +31863748,ClusPro LigTBM: Automated Template-based Small Molecule Docking.,"The template-based approach has been essential for achieving high-quality models in the recent rounds of blind protein-protein docking competition CAPRI (Critical Assessment of Predicted Interactions). However, few such automated methods exist for protein-small molecule docking. In this paper, we present an algorithm for template-based docking of small molecules. It searches for known complexes with ligands that have partial coverage of the target ligand, performs conformational sampling and template-guided energy refinement to produce a variety of possible poses, and then scores the refined poses. The algorithm is available as the automated ClusPro LigTBM server. It allows the user to specify the target protein as a PDB file and the ligand as a SMILES string. The server then searches for templates and uses them for docking, presenting the user with top-scoring poses and their confidence scores. The method is tested on the Astex Diverse benchmark, as well as on the targets from the last round of the D3R (Drug Design Data Resource) Grand Challenge. The server is publicly available as part of the ClusPro docking server suite at https://ligtbm.cluspro.org/.",2019-12-19 +33335465,WAUC: A Multi-Modal Database for Mental Workload Assessment Under Physical Activity.,"Assessment of mental workload is crucial for applications that require sustained attention and where conditions such as mental fatigue and drowsiness must be avoided. Previous work that attempted to devise objective methods to model mental workload were mainly based on neurological or physiological data collected when the participants performed tasks that did not involve physical activity. While such models may be useful for scenarios that involve static operators, they may not apply in real-world situations where operators are performing tasks under varying levels of physical activity, such as those faced by first responders, firefighters, and police officers. Here, we describe WAUC, a multimodal database of mental Workload Assessment Under physical aCtivity. The study involved 48 participants who performed the NASA Revised Multi-Attribute Task Battery II under three different activity level conditions. Physical activity was manipulated by changing the speed of a stationary bike or a treadmill. During data collection, six neural and physiological modalities were recorded, namely: electroencephalography, electrocardiography, breathing rate, skin temperature, galvanic skin response, and blood volume pulse, in addition to 3-axis accelerometry. Moreover, participants were asked to answer the NASA Task Load Index questionnaire after each experimental section, as well as rate their physical fatigue level on the Borg fatigue scale. In order to bring our experimental setup closer to real-world situations, all signals were monitored using wearable, off-the-shelf devices. In this paper, we describe the adopted experimental protocol, as well as validate the subjective, neural, and physiological data collected. The WAUC database, including the raw data and features, subjective ratings, and scripts to reproduce the experiments reported herein will be made available at: http://musaelab.ca/resources/.",2020-12-01 +29860480,dbCRSR: a manually curated database for regulation of cancer radiosensitivity. ,"Radiotherapy is used to treat approximately 50% of all cancer patients, with varying prognoses. Intrinsic radiosensitivity is an important factor underlying the radiotherapeutic efficacy of this precise treatment. During the past decades, great efforts have been made to improve radiotherapy treatment through multiple strategies. However, invaluable data remains buried in the extensive radiotherapy literature, making it difficult to obtain an overall view of the detailed mechanisms leading to radiosensitivity, thus limiting advances in radiotherapy. To address this issue, we collected data from the relevant literature contained in the PubMed database and developed a literature-based database that we term the cancer radiosensitivity regulation factors database (dbCRSR). dbCRSR is a manually curated catalogue of radiosensitivity, containing multiple radiosensitivity regulation factors (395 coding genes, 119 non-coding RNAs and 306 chemical compounds) with appropriate annotation. To illustrate the value of the data we collected, data mining was performed including functional annotation and network analysis. In summary, dbCRSR is the first literature-based database to focus on radiosensitivity and provides a resource to better understand the detailed mechanisms of radiosensitivity. We anticipate dbCRSR will be a useful resource to enrich our knowledge and to promote further study of radiosensitivity.Database URL: http://bioinfo.ahu.edu.cn: 8080/dbCRSR/.",2018-01-01 +34814330,Purimeth: an integrated web-based tool for estimating and accounting for tumor purity in cancer DNA methylation studies.,"Proportion of cancerous cells in a tumor sample, known as ""tumor purity"", is a major source of confounding factor in cancer data analyses. Lots of computational methods are available for estimating tumor purity from different types of genomics data or based on different platforms, which makes it difficult to compare and integrate the estimated results. To rectify the deviation caused by tumor purity effect, a number of methods for downstream data analysis have been developed, including tumor sample clustering, association study and differential methylation between tumor samples. However, using these computational tools remains a daunting task for many researchers since they require non-trivial computational skills. To this end, we present Purimeth, an integrated web-based tool for estimating and accounting for tumor purity in cancer DNA methylation studies. Purimeth implements three state-of-the-art methods for tumor purity estimation from DNA methylation array data: InfiniumPurify, MEpurity and PAMES. It also provides graphical interface for various analyses including differential methylation (DM), sample clustering, and purification of tumor methylomes, all with the consideration of tumor purities. In addition, Purimeth catalogs estimated tumor purities for TCGA samples from nine methods for users to visualize and explore. In conclusion, Purimeth provides an easy-operated way for researchers to explore tumor purity and implement cancer methylation data analysis. It is developed using Shiny (Version 1.6.0) and freely available at http://purimeth.comp-epi.com/.",2021-10-01 +33196841,ProThermDB: thermodynamic database for proteins and mutants revisited after 15 years.,"ProThermDB is an updated version of the thermodynamic database for proteins and mutants (ProTherm), which has ∼31 500 data on protein stability, an increase of 84% from the previous version. It contains several thermodynamic parameters such as melting temperature, free energy obtained with thermal and denaturant denaturation, enthalpy change and heat capacity change along with experimental methods and conditions, sequence, structure and literature information. Besides, the current version of the database includes about 120 000 thermodynamic data obtained for different organisms and cell lines, which are determined by recent high throughput proteomics techniques using whole-cell approaches. In addition, we provided a graphical interface for visualization of mutations at sequence and structure levels. ProThermDB is cross-linked with other relevant databases, PDB, UniProt, PubMed etc. It is freely available at https://web.iitm.ac.in/bioinfo2/prothermdb/index.html without any login requirements. It is implemented in Python, HTML and JavaScript, and supports the latest versions of major browsers, such as Firefox, Chrome and Safari.",2021-01-01 +,Automated analysis of fatality rates for COVID 19 across different countries,"One of the significant parameters that helps in the reporting the highest risk areas, which have COVID 19 pandemic is case fatality rate (CFR). In this work, automated analysis was carried out to evaluate fatality rate (CFR) across different countries. Furthermore, a state of art algorithm is proposed to estimate CFR and it is possible to make it applicable in the mobile phone. This application will enable us to monitor the status level of the patients (suspected, exposed and infected) to save time, efforts and get a high quailty of the recordings. All data were obtained from (https://www.worldometers.info/coronavirus/) and pointed at the period between 27th March and 27th May 2020. Results present Spain and Egypt have a highest score of the fatality rate (approximately 24%) compared with previous research, which Italy was the highest score of the case fatality rate (CFR). On the other hand, Australia has had the lowest of the (CFR) in the current and previous researches. Furthermore, Spain has the highest percentage score of the total active cases and death rate: 0.41% and 0.00073% respectively. Documentation and comparison fatality rate of COVID 19 pandemic across different countries could assist in illustrating the strength of this pandemic, speed spreading and risk area which infected of this disease.",2020-09-26 +33125055,Clinically relevant updates of the HbVar database of human hemoglobin variants and thalassemia mutations.,"HbVar (http://globin.bx.psu.edu/hbvar) is a widely-used locus-specific database (LSDB) launched 20 years ago by a multi-center academic effort to provide timely information on the numerous genomic variants leading to hemoglobin variants and all types of thalassemia and hemoglobinopathies. Here, we report several advances for the database. We made clinically relevant updates of HbVar, implemented as additional querying options in the HbVar query page, allowing the user to explore the clinical phenotype of compound heterozygous patients. We also made significant improvements to the HbVar front page, making comparative data querying, analysis and output more user-friendly. We continued to expand and enrich the regular data content, involving 1820 variants, 230 of which are new entries. We also increased the querying potential and expanded the usefulness of HbVar database in the clinical setting. These several additions, expansions and updates should improve the utility of HbVar both for the globin research community and in a clinical setting.",2021-01-01 +32550548,VitiVar: A locus specific database of vitiligo associated genes and variations.,"Vitiligo is the most common skin pigmentation disorder which affects around 1% of the population worldwide. The disease has complex pathogenesis and is of multifactorial etiology, that finally culminates in patchy depigmentation of skin. Genetic contribution to the disease is well studied, however the information about multiple associated genes and contributing variations are scattered across the literature. To address this complex disorder affecting the skin, we systematically cataloged the genes and variations by creating a Locus Specific Database for vitiligo called, ""VitiVar"". This comprehensive resource houses manually curated 322 genes and 254 variations, from 202 articles indexed in PubMed. We applied an integrative approach to stratify genes and variations to facilitate dissection of vitiligo pathogenesis by layering it with expression status in specific constituent cell types of skin and in-house vitiligo expression data. Finally, we were able to demonstrate the utility of VitiVar by generating a vitiligo interactome using GeneMANIA and overlaying the vitiligo and cell type specific information. This interaction network yielded 20 new genes (apart from 322 VitiVar genes) of which we were able to prioritize IFI27 and IFI6 for further validation. This, thereby makes VitiVar a comprehensive integrative platform in unravelling disease biology by providing meaningful leads for functional interrogation. VitiVar is freely accessible to the research community for prioritizing and validating the candidate genes and variations (http://vitivar.igib.res.in/).",2019-05-11 +33599248,bc-GenExMiner 4.5: new mining module computes breast cancer differential gene expression analyses. ,"'Breast cancer gene-expression miner' (bc-GenExMiner) is a breast cancer-associated web portal (http://bcgenex.ico.unicancer.fr). Here, we describe the development of a new statistical mining module, which permits several differential gene expression analyses, i.e. 'Expression' module. Sixty-two breast cancer cohorts and one healthy breast cohort with their corresponding clinicopathological information are included in bc-GenExMiner v4.5 version. Analyses are based on microarray or RNAseq transcriptomic data. Thirty-nine differential gene expression analyses, grouped into 13 categories, according to clinicopathological and molecular characteristics ('Targeted' and 'Exhaustive') and gene expression ('Customized'), have been developed. Output results are visualized in four forms of plots. This new statistical mining module offers, among other things, the possibility to compare gene expression in healthy (cancer-free), tumour-adjacent and tumour tissues at once and in three triple-negative breast cancer subtypes (i.e. C1: molecular apocrine tumours; C2: basal-like tumours infiltrated by immune suppressive cells and C3: basal-like tumours triggering an ineffective immune response). Several validation tests showed that bioinformatics process did not alter the pathobiological information contained in the source data. In this work, we developed and demonstrated that bc-GenExMiner 'Expression' module can be used for exploratory and validation purposes. Database URL: http://bcgenex.ico.unicancer.fr.",2021-02-01 +32952115,CNGBdb: China National GeneBank DataBase.,"China National GeneBank DataBase (CNGBdb) is a data platform aiming to systematically archiving and sharing of multi-omics data in life science. As the service portal of Bio-informatics Data Center of the core structure, namely, ""Three Banks and Two Platforms"" of China National GeneBank (CNGB), CNGBdb has the advantages of rich sample resources, data resources, cooperation projects, powerful data computation and analysis capabilities. With the advent of high throughput sequencing technologies, research in life science has entered the big data era, which is in the need of closer international cooperation and data sharing. With the development of China's economy and the increase of investment in life science research, we need to establish a national public platform for data archiving and sharing in life science to promote the systematic management, application and industrial utilization. Currently, CNGBdb can provide genomic data archiving, information search engines, data management and data analysis services. The data schema of CNGBdb has covered projects, samples, experiments, runs, assemblies, variations and sequences. Until May 22, 2020, CNGBdb has archived 2176 research projects and more than 2221 TB sequencing data submitted by researchers globally. In the future, CNGBdb will continue to be dedicated to promoting data sharing in life science research and improving the service capability. CNGBdb website is: https://db.cngb.org/.",2020-08-01 +34530999,VitiVar: A locus specific database of vitiligo associated genes and variations.,"Vitiligo is the most common skin pigmentation disorder which affects around 1% of the population worldwide. The disease has complex pathogenesis and is of multifactorial etiology, that finally culminates in patchy depigmentation of skin. Genetic contribution to the disease is well studied, however the information about multiple associated genes and contributing variations are scattered across the literature. To address this complex disorder affecting the skin, we systematically cataloged the genes and variations by creating a Locus Specific Database for vitiligo called, ""VitiVar"". This comprehensive resource houses manually curated 322 genes and 254 variations, from 202 articles indexed in PubMed. We applied an integrative approach to stratify genes and variations to facilitate dissection of vitiligo pathogenesis by layering it with expression status in specific constituent cell types of skin and in-house vitiligo expression data. Finally, we were able to demonstrate the utility of VitiVar by generating a vitiligo interactome using GeneMANIA and overlaying the vitiligo and cell type specific information. This interaction network yielded 20 new genes (apart from 322 VitiVar genes) of which we were able to prioritize IFI27 and IFI6 for further validation. This, thereby makes VitiVar a comprehensive integrative platform in unravelling disease biology by providing meaningful leads for functional interrogation. VitiVar is freely accessible to the research community for prioritizing and validating the candidate genes and variations (http://vitivar.igib.res.in/).",2019-05-11 +30379998,dbCPM: a manually curated database for exploring the cancer passenger mutations. ,"While recently emergent driver mutation data sets are available for developing computational methods to predict cancer mutation effects, benchmark sets focusing on passenger mutations are largely missing. Here, we developed a comprehensive literature-based database of Cancer Passenger Mutations (dbCPM), which contains 941 experimentally supported and 978 putative passenger mutations derived from a manual curation of the literature. Using the missense mutation data, the largest group in the dbCPM, we explored patterns of missense passenger mutations by comparing them with the missense driver mutations and assessed the performance of four cancer-focused mutation effect predictors. We found that the missense passenger mutations showed significant differences with drivers at multiple levels, and several appeared in both the passenger and driver categories, showing pleiotropic functions depending on the tumor context. Although all the predictors displayed good true positive rates, their true negative rates were relatively low due to the lack of negative training samples with experimental evidence, which suggests that a suitable negative data set for developing a more robust methodology is needed. We hope that the dbCPM will be a benchmark data set for improving and evaluating prediction algorithms and serve as a valuable resource for the cancer research community. dbCPM is freely available online at http://bioinfo.ahu.edu.cn:8080/dbCPM.",2018-10-30 +34373890,Elucidation of dynamic microRNA regulations in cancer progression using integrative machine learning.,"

Motivation

Empowered by advanced genomics discovery tools, recent biomedical research has produced a massive amount of genomic data on (post-)transcriptional regulations related to transcription factors, microRNAs, long non-coding RNAs, epigenetic modifications and genetic variations. Computational modeling, as an essential research method, has generated promising testable quantitative models that represent complex interplay among different gene regulatory mechanisms based on these data in many biological systems. However, given the dynamic changes of interactome in chaotic systems such as cancers, and the dramatic growth of heterogeneous data on this topic, such promise has encountered unprecedented challenges in terms of model complexity and scalability. In this study, we introduce a new integrative machine learning approach that can infer multifaceted gene regulations in cancers with a particular focus on microRNA regulation. In addition to new strategies for data integration and graphical model fusion, a supervised deep learning model was integrated to identify conditional microRNA-mRNA interactions across different cancer stages.

Results

In a case study of human breast cancer, we have identified distinct gene regulatory networks associated with four progressive stages. The subsequent functional analysis focusing on microRNA-mediated dysregulation across stages has revealed significant changes in major cancer hallmarks, as well as novel pathological signaling and metabolic processes, which shed light on microRNAs' regulatory roles in breast cancer progression. We believe this integrative model can be a robust and effective discovery tool to understand key regulatory characteristics in complex biological systems.

Availability

http://sbbi-panda.unl.edu/pin/.",2021-11-01 +33719338,Construction of circRNA-Based ceRNA Network to Reveal the Role of circRNAs in the Progression and Prognosis of Hepatocellular Carcinoma.,"

Background

Circular RNAs (circRNAs) are now under hot discussion as novel promising biomarkers for patients with hepatocellular carcinoma (HCC). The purpose of our study is to identify several competing endogenous RNA (ceRNA) networks related to the prognosis and progression of HCC and to further investigate the mechanism of their influence on tumor progression.

Methods

First, we obtained gene expression data related to liver cancer from The Cancer Genome Atlas (TCGA) database (http://www.portal.gdc.cancer.gov/), including microRNA (miRNA) sequence, RNA sequence, and clinical information. A co-expression network was constructed through the Weighted Correlation Network Analysis (WGCNA) software package in R software. The differentially expressed messenger RNAs (DEmRNAs) in the key module were analyzed with the Database for Annotation Visualization and Integrated Discovery (DAVID) (https://david.ncifcrf.gov/summary.jsp) to perform functional enrichment analysis including Kyoto Encyclopedia of Genes and Genomes (KEGG) and Gene Ontology (GO). The data of miRNA expression and clinical information downloaded from TCGA were utilized for survival analysis to detach the prognostic value of the DEmiRNAs of the key module.

Results

The 201 differentially expressed miRNAs (DEmiRNAs) and 3,783 DEmRNAs were preliminarily identified through differential expression analysis. The co-expression networks of DEmiRNAs and DEmRNAs were constructed with WGCNA. Further analysis confirmed four miRNAs in the most significant module (blue module) were associated with the overall survival (OS) of patients with liver cancer, including hsa-miR-92b-3p, hsa-miR-122-3p, hsa-miR-139-5p, and hsa-miR-7850-5p. DAVID was used for functional enrichment analysis of 286 co-expressed mRNAs. The GO analysis results showed that the top enriched GO terms were oxidation-reduction process, extracellular exosome, and iron ion binding. In KEGG pathway analysis, the top three enriched terms included metabolic pathways, fatty acid degradation, and valine, leucine, and isoleucine degradation. In addition, we intersected the miRNA-mRNA interaction prediction results with the differentially expressed and prognostic mRNAs. We found that hsa-miR-92b-3p can be related to CPEB3 and ACADL. By overlapping the data of predicted circRNAs by circBank and differentially expressed circRNAs of GSE94508, we screened has_circ_0077210 as the upstream regulatory molecule of hsa-miR-92b-3p. Hsa_circ_0077210/hsa-miR-92b-3p/cytoplasmic polyadenylation element binding protein-3 (CPEB3) and acyl-Coenzyme A dehydrogenase, long chain (ACADL) were validated in HCC tissue.

Conclusion

Our research provides a mechanistic elucidation of the unknown ceRNA regulatory network in HCC. Hsa_circ_0077210 might serve a momentous therapeutic role to restrain the occurrence and development of HCC.",2021-02-26 +33381851,Dementia key gene identification with multi-layered SNP-gene-disease network.,"

Motivation

Recently, various approaches for diagnosing and treating dementia have received significant attention, especially in identifying key genes that are crucial for dementia. If the mutations of such key genes could be tracked, it would be possible to predict the time of onset of dementia and significantly aid in developing drugs to treat dementia. However, gene finding involves tremendous cost, time and effort. To alleviate these problems, research on utilizing computational biology to decrease the search space of candidate genes is actively conducted.In this study, we propose a framework in which diseases, genes and single-nucleotide polymorphisms are represented by a layered network, and key genes are predicted by a machine learning algorithm. The algorithm utilizes a network-based semi-supervised learning model that can be applied to layered data structures.

Results

The proposed method was applied to a dataset extracted from public databases related to diseases and genes with data collected from 186 patients. A portion of key genes obtained using the proposed method was verified in silico through PubMed literature, and the remaining genes were left as possible candidate genes.

Availability and implementation

The code for the framework will be available at http://www.alphaminers.net/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +34746683,Sequenced Breakpoints of Crossover Suppressor/Inversion qC1. ,"We used whole-genome sequencing (WGS) data from a number of balanced lethal strains in Caenorhabditis elegans to show that the crossover suppressor qC1 is an inversion. The rearrangement is complex, with a large primary inversion that contains several other smaller inverted regions. The graphical representation below depicts these various qC1 rearrangements for ease of conceptualization. It is the simplest chromosomal structure compatible with the data currently available, but even then it is worth noting that the complexity of the qC1 chromosome can make the graphical reconstruction difficult to understand, and it may seem a bit like relativity theory or artwork from M.C. Escher (https://moa.byu.edu/m-c-eschers-relativity/).",2021-11-03 +30380109,"iEKPD 2.0: an update with rich annotations for eukaryotic protein kinases, protein phosphatases and proteins containing phosphoprotein-binding domains.","Here, we described the updated database iEKPD 2.0 (http://iekpd.biocuckoo.org) for eukaryotic protein kinases (PKs), protein phosphatases (PPs) and proteins containing phosphoprotein-binding domains (PPBDs), which are key molecules responsible for phosphorylation-dependent signalling networks and participate in the regulation of almost all biological processes and pathways. In total, iEKPD 2.0 contained 197 348 phosphorylation regulators, including 109 912 PKs, 23 294 PPs and 68 748 PPBD-containing proteins in 164 eukaryotic species. In particular, we provided rich annotations for the regulators of eight model organisms, especially humans, by compiling and integrating the knowledge from 100 widely used public databases that cover 13 aspects, including cancer mutations, genetic variations, disease-associated information, mRNA expression, DNA & RNA elements, DNA methylation, molecular interactions, drug-target relations, protein 3D structures, post-translational modifications, protein expressions/proteomics, subcellular localizations and protein functional annotations. Compared with our previously developed EKPD 1.0 (∼0.5 GB), iEKPD 2.0 contains ∼99.8 GB of data with an ∼200-fold increase in data volume. We anticipate that iEKPD 2.0 represents a more useful resource for further study of phosphorylation regulators.",2019-01-01 +30357418,Editome Disease Knowledgebase (EDK): a curated knowledgebase of editome-disease associations in human.,"RNA editing, as an essential co-/post-transcriptional RNA modification type, plays critical roles in many biological processes and involves with a variety of human diseases. Although several databases have been developed to collect RNA editing data in both model and non-model animals, there still lacks a resource integrating associations between editome and human disease. In this study, we present Editome-Disease Knowledgebase (EDK; http://bigd.big.ac.cn/edk), an integrated knowledgebase of RNA editome-disease associations manually curated from published literatures. In the current version, EDK incorporates 61 diseases associated with 248 experimentally validated abnormal editing events located in 32 mRNAs, 16 miRNAs, 1 lncRNA and 11 viruses, and 44 aberrant activities involved with 6 editing enzymes, which together are curated from more than 200 publications. In addition, to facilitate standardization of editome-disease knowledge integration, we propose a data curation model in EDK, factoring an abundance of relevant information to fully capture the context of editome-disease associations. Taken together, EDK is a comprehensive collection of editome-disease associations and bears the great utility in aid of better understanding the RNA editing machinery and complex molecular mechanisms associated with human diseases.",2019-01-01 +33483747,Predicting Discharge Disposition Following Meningioma Resection Using a Multi-Institutional Natural Language Processing Model.,"

Background

Machine learning (ML)-based predictive models are increasingly common in neurosurgery, but typically require large databases of discrete variables for training. Natural language processing (NLP) can extract meaningful data from unstructured text.

Objective

To present an NLP model that predicts nonhome discharge and a point-of-care implementation.

Methods

We retrospectively collected age, preoperative notes, and radiology reports from 595 adults who underwent meningioma resection in an academic center from 1995 to 2015. A total of 32 algorithms were trained with the data; the 3 best performing algorithms were combined to form an ensemble. Predictive ability, assessed by area under the receiver operating characteristic curve (AUC) and calibration, was compared to a previously published model utilizing 52 neurosurgeon-selected variables. We then built a multi-institutional model by incorporating notes from 693 patients at another center into algorithm training. Permutation importance was used to analyze the relative importance of each input to model performance. Word clouds and non-negative matrix factorization were used to analyze predictive features of text.

Results

The single-institution NLP model predicted nonhome discharge with AUC of 0.80 (95% CI = 0.74-0.86) on internal and 0.76 on holdout validation compared to AUC of 0.77 (95% CI = 0.73-0.81) and 0.74 for the 52-variable ensemble. The multi-institutional model performed similarly well with AUC = 0.78 (95% CI = 0.74-0.81) on internal and 0.76 on holdout validation. Preoperative notes most influenced predictions. The model is available at http://nlp-home.insds.org.

Conclusion

ML and NLP are underutilized in neurosurgery. Here, we construct a multi-institutional NLP model that predicts nonhome discharge.",2021-03-01 +30717676,A new version of the ANDSystem tool for automatic extraction of knowledge from scientific publications with expanded functionality for reconstruction of associative gene networks by considering tissue-specific gene expression.,"

Background

Consideration of tissue-specific gene expression in reconstruction and analysis of molecular genetic networks is necessary for a proper description of the processes occurring in a specified tissue. Currently, there are a number of computer systems that allow the user to reconstruct molecular-genetic networks using the data automatically extracted from the texts of scientific publications. Examples of such systems are STRING, Pathway Commons, MetaCore and Ingenuity. The MetaCore and Ingenuity systems permit taking into account tissue-specific gene expression during the reconstruction of gene networks. Previously, we developed the ANDSystem tool, which also provides an automated extraction of knowledge from scientific texts and allows the reconstruction of gene networks. The main difference between our system and other tools is in the different types of interactions between objects, which makes the ANDSystem complementary to existing well-known systems. However, previous versions of the ANDSystem did not contain any information on tissue-specific expression.

Results

A new version of the ANDSystem has been developed. It offers the reconstruction of associative gene networks while taking into account the tissue-specific gene expression. The ANDSystem knowledge base features information on tissue-specific expression for 272 tissues. The system allows the reconstruction of combined gene networks, as well as performing the filtering of genes from such networks using the information on their tissue-specific expression. As an example of the application of such filtering, the gene network of the extrinsic apoptotic signaling pathway was analyzed. It was shown that considering different tissues can lead to changes in gene network structure, including changes in such indicators as betweenness centrality of vertices, clustering coefficient, network centralization, network density, etc. CONCLUSIONS: The consideration of tissue specificity can play an important role in the analysis of gene networks, in particular solving the problem of finding the most significant central genes. Thus, the new version of ANDSystem can be employed for a wide range of tasks related to biomedical studies of individual tissues. It is available at http://www-bionet.sscc.ru/and/cell /.",2019-02-05 +,479. Mobility Restrictions and COVID-19 Pandemic Outbreak Control,"Abstract

Background

In December 2009, a cluster of patients with pneumonia was reported in the city of Wuhan, capital of Hubei province in China, caused by a novel coronavirus: SARS-CoV-2. The epidemiological compartmental susceptible-exposed-infected-recovered (SEIR) model has been previously used during the initial wave of the H1N1 influenza pandemic in 2009. This study investigates whether the SEIR model, associated to mobility changes parameters, can determine the likelihood of establishing control over an epidemic in a city, state or country.

Methods

The critical step in the prediction of COVID-19 by a SEIR model are the values of the basic reproduction number (R0) and the infectious period, in days. R0 and the infectious periods were calculated by mathematical constrained optimization, and used to determine the numerically minimum SEIR model errors in a country, based on COVID-19 data until april 11th. The Community Mobility Reports from Google Maps (https://www.google.com/covid19/mobility/) provided mobility changes on april 5th compared to the baseline (Jan 3th to Feb 6th). The data was used to measure the non-pharmacological intervention adherence. The impact of each mobility component was made by logistic regression models. COVID-19 control was defined by R0 of the SEIR model in a country less than 1.0. Algorithm for the SEIR model applied to COVID-19 (initialization) Table 01: Algorithm for the SEIR model applied to COVID-19 (calculation of new COVID-19 cases day-by-day)

Results

Residential mobility restriction presented the higher logistic coefficient (17.7), meaning higher impact on outbreak control. Workplace mobility restriction was the second most effective measure, considering a restriction minimum of 56% for a 53% chance of outbreak control. Retail and recreation mobility presented 53%, and 86% respectively. Transit stations (96% and 54%) were also assessed. Park mobility restriction demonstrated the lowest effectiveness in outbreak control, considering that absolute (100%) restriction provided the lowest chance of outbreak control (46%). Table 2: The Community Mobility Reports from Google Maps: Mobility changes on April 5 compared to the baseline (5- week period; Jan 3–Feb 6, 2020): T_infectious and R0 obtained by using COVID-19 new cases day-by-day in each country, adjusted to the SEIR model by mathematical constrained optimization Logistic regression models to evaluate the chance of an epidemic control based on the non-pharmacological interventions adherence Simulation of the impact of the mobility component in the chance of outbreak control: analysis by using the logistic regression model summarized in Table 2

Conclusion

Residential mobility restriction is the most effective measure. The degree to which mobility restrictions increase or decrease the overall epidemic size depends on the level of risk in each community and the characteristics of the disease. More research is required in order to estimate the optimal balance between mobility restriction, outbreak control, economy and freedom of movement.

Disclosures

All Authors: No reported disclosures",2020-10-01 +29092072,Mouse Genome Database (MGD)-2018: knowledgebase for the laboratory mouse.,"The Mouse Genome Database (MGD; http://www.informatics.jax.org) is the key community mouse database which supports basic, translational and computational research by providing integrated data on the genetics, genomics, and biology of the laboratory mouse. MGD serves as the source for biological reference data sets related to mouse genes, gene functions, phenotypes and disease models with an increasing emphasis on the association of these data to human biology and disease. We report here on recent enhancements to this resource, including improved access to mouse disease model and human phenotype data and enhanced relationships of mouse models to human disease.",2018-01-01 +31619460,An Atlas of Transcription Factors Expressed in Male Pupal Terminalia of Drosophila melanogaster.,"During development, transcription factors and signaling molecules govern gene regulatory networks to direct the formation of unique morphologies. As changes in gene regulatory networks are often implicated in morphological evolution, mapping transcription factor landscapes is important, especially in tissues that undergo rapid evolutionary change. The terminalia (genital and anal structures) of Drosophila melanogaster and its close relatives exhibit dramatic changes in morphology between species. While previous studies have identified network components important for patterning the larval genital disc, the networks governing adult structures during pupal development have remained uncharted. Here, we performed RNA-seq in whole Drosophila melanogaster male terminalia followed by in situ hybridization for 100 highly expressed transcription factors during pupal development. We find that the male terminalia are highly patterned during pupal stages and that specific transcription factors mark separate structures and substructures. Our results are housed online in a searchable database (https://flyterminalia.pitt.edu/) as a resource for the community. This work lays a foundation for future investigations into the gene regulatory networks governing the development and evolution of Drosophila terminalia.",2019-12-03 +33161418,MobiDetails: online DNA variants interpretation.,"MobiDetails is an expert tool, online application which gathers useful data for the interpretation of DNA variants in the context of molecular diagnosis. It brings together in a single tool many sources of data, such as population genetics, various kinds of predictors, Human Genome Variation Society (HGVS) nomenclatures, curated databases, and access to various annotations. Accurate interpretation of DNA variants is crucial and can impact the patient care or have familial outcomes (prenatal diagnosis). Its importance will increase in the coming years with the expansion of the personalized medicine. MobiDetails is specifically designed to help with this task. Exonic or intronic substitutions and small insertions/deletions related to more than 18,000 human genes are easily submitted and annotated in real-time. It is a responsive website that can be accessed using mobiles or tablets during medical staff meetings. MobiDetails is based on publicly available resources, does not include any specific data on patients or phenotypes, and is freely available for academic use at https://mobidetails.iurc.montp.inserm.fr/MD/ .",2020-11-07 +33594411,"Viral Host Range database, an online tool for recording, analyzing and disseminating virus-host interactions. ","Viruses are ubiquitous in the living world, and their ability to infect more than one host defines their host range. However, information about which virus infects which host, and about which host is infected by which virus, is not readily available. We developed a web-based tool called the Viral Host Range database to record, analyze and disseminate experimental host range data for viruses infecting archaea, bacteria and eukaryotes. The ViralHostRangeDB application is available from https://viralhostrangedb.pasteur.cloud. Its source code is freely available from the Gitlab hub of Institut Pasteur (https://gitlab.pasteur.fr/hub/viralhostrangedb).",2021-02-17 +33529135,Disruption of the Atrophy-based Functional Network in Multiple Sclerosis Is Associated with Clinical Disability: Validation of a Meta-Analytic Model in Resting-State Functional MRI.,"Background In multiple sclerosis (MS), gray matter (GM) atrophy exhibits a specific pattern, which correlates strongly with clinical disability. However, the mechanism of regional specificity in GM atrophy remains largely unknown. Recently, the network degeneration hypothesis (NDH) was quantitatively defined (using coordinate-based meta-analysis) as the atrophy-based functional network (AFN) model, which posits that localized GM atrophy in MS is mediated by functional networks. Purpose To test the NDH in MS in a data-driven manner using the AFN model to direct analyses in an independent test sample. Materials and Methods Model fit testing was conducted with structural equation modeling, which is based on the computation of semipartial correlations. Model verification was performed in coordinate-based data of healthy control participants from the BrainMap database (https://www.brainmap.org). Model validation was conducted in prospectively acquired resting-state functional MRI in participants with relapsing-remitting MS who were recruited between September 2018 and January 2019. Correlation analyses of model fit indices and volumetric measures with Expanded Disability Status Scale (EDSS) scores and disease duration were performed. Results Model verification of healthy control participants included 80 194 coordinates from 9035 experiments. Model verification in healthy control data resulted in excellent model fit (root mean square error of approximation, 0.037; 90% CI: 0.036, 0.039). Twenty participants (mean age, 36 years ± 9 [standard deviation]; 12 women) with relapsing-remitting MS were evaluated. Model validation in resting-state functional MRI in participants with MS resulted in deviation from optimal model fit (root mean square error of approximation, 0.071; 90% CI: 0.070, 0.072), which correlated with EDSS scores (r = 0.68; P = .002). Conclusion The atrophy-based functional network model predicts functional network disruption in multiple sclerosis (MS), thereby supporting the network degeneration hypothesis. On resting-state functional MRI scans, reduced functional network integrity in participants with MS had a strong positive correlation with clinical disability. © RSNA, 2021 Online supplemental material is available for this article.",2021-02-02 +32702093,"AciDB 1.0: a database of acidophilic organisms, their genomic information and associated metadata.","

Motivation

There are about 600 available genome sequences of acidophilic organisms (grow at a pH < 5) from the three domains of the Tree of Life. Information about acidophiles is scattered over many heterogeneous sites making it extraordinarily difficult to link physiological traits with genomic data. We were motivated to generate a curated, searchable database to address this problem.

Results

AciDB 1.0 is a curated database of sequenced acidophiles that enables researchers to execute complex queries linking genomic features to growth data, environmental descriptions and taxonomic information.

Availability and implementation

AciDB 1.0 is freely available online at: http://AciDB.cl. The source code is released under an MIT license at: https://gitlab.com/Hawkline451/acidb/.",2020-12-01 +,Workshop Abstracts,"Rationale/Background: Feedback is a dynamic and co-constructive interaction in the context of a safe and mutually respectful relationship for the purpose of challenging a learner's (and educator's) ways of thinking, acting or being to support growth (Ajjawi&Regehr, 2019). The R2C2 model for feedback and coaching, with four phases in which supervisors and learners build relationship, explore reactions and reflections, determine content, and coach for change to co-create an action plan, was developed as a model to facilitate such conversations. It was based on theory and research related to self-assessment, cognitive domains, humanism, commitment to change and implementation science. It has been tested and found effective for work with physicians in practice, nurse practitioners, and residents across several countries and disciplines. Recently, the researchers have modified the R2C2 model for use with in-the-moment feedback and coaching that occur in the clinical environment. This workshop will provide participants with an opportunity to explore and practice the R2C2 in-the-moment model (https://medicine.dal.ca/departments/core-units/cpd/faculty-development/R2C2.html) and discuss its applicability within their own context. Instructional Methods: This interactive workshop will draw upon participants' experiences through: 1. Introductions and large group discussion of participant experiences (10 minutes) 2. Presentation and demonstration of the model (15 mins) 3. Small group discussion of the model (10 mins) 4. Coaching practice with case scenarios and debriefing (40 mins) 5. Large group discussion of R2C2 in-the moment coaching and feedback and take-home messages (15 mins) Target audience: Educators at all levels of the education continuum with an interest in feedback and coaching Learning Objective: Share experiences providing “in-the-moment” feedback within clinical settings. Practice applying the R2C2 ITM model to case scenarios. Identify the utility of and barriers to integration of the R2C2 in-the moment (ITM) feedback and coaching model and its application within their work. Rationale/Background: Storytelling is a powerful tool in transforming the way people remember and respond to information. Research in cognitive sciences indicate that humans have been telling stories for thousands of years and human brains are wired to process and respond to them. Both Programmatic Assessment and Program Evaluation in medical education involve collecting large volumes of data and making informed decisions using that data. The use of online exam platforms and survey tools enables us to collect the data, but effective presentation and communication of assessment and evaluation data is still evolving. In the past 3 years, our office explored the use of data visualization and storytelling to support data exploration and analysis, high-stakes decision making, exam bank quality auditing, and identifying areas of improvements as a part of our overall Program Evaluation Model. Instructional Methods: We will begin by presenting theories and best practices in data visualization and data storytelling. A brief presentation of real examples of data visualization from our office will follow. We will then divide participants of different levels of expertise into small working groups to engage in two visualization and storytelling design activities. There will be a Q&A session at the end followed by a brief wrap-up. Target audience: Administrative leaders, department heads, education scientists, data practitioners and staff who are involved in medical school assessment and evaluation areas. Learning Objective: Identify data visualization and storytelling opportunities in student assessment and program evaluation. Learn how to present quantitative information effectively Develop confidence in effectively engaging and persuading your audience with storytelling Rationale/Background: Study methods used by many students in undergraduate degree programs are typically ineffective in medicine. We can influence knowledge retention and recall using different teaching strategies. Research has shown that spaced learning, interleaving and testing are effective ways in which to enhance knowledge retention. Cognitive load is an important consideration when teaching complex key concepts. This workshop explores how we can incorporate our understanding of knowledge retention and recall to enhance learning in our medical curricula, and encourage effective study strategies in our learners. Instructional Methods: The format includes brief interactive presentations interspersed with individual and group activities. Participants will engage with known research around knowledge retention and recall and apply this to their own teaching experience. The techniques used in the workshop will demonstrate the different methods participants can incorporate into their own teaching, and in turn, encourage their learners to adopt. Specific activities: 1. Consider the methods they used to study during training and place in sequence the success rate of different study strategies currently used by students 2. Think, pair share around their own experiences of learning and the strategies used 3. Identify a teaching activity in their own institution and consider how they might change the teaching format to maximize comprehension, retention and recall. Target audience: Educators with an interest in designing curricula that maximize comprehension, retention and recall of knowledge. Learning Objective: By the end of this workshop, participants will be able to: + Explain why the more common study methods employed by students are ineffective Give examples of teaching strategies that maximize learning and recall Contrast current curricular teaching methods with proven strategies to maximize learning Apply the concept of scaffolding to a course or topic they are responsible for teaching",2020-04-01 +34118462,Consistent Alterations of Human Fecal Microbes After Transplantation into Germ-free Mice.,"Fecal microbiota transplantation (FMT) of human fecal samples into germ-free (GF) mice is useful for establishing causal relationships between the gut microbiota and human phenotypes. However, due to the intrinsic differences between human and mouse intestines and the different diets of the two organisms, it may not be possible to replicate human phenotypes in mice through FMT; similarly, treatments that are effective in mouse models may not be effective in humans. In this study, we aimed to identify human gut microbes that undergo significant and consistent changes (i.e., in relative abundances) after transplantation into GF mice in multiple experimental settings. We collected 16S rDNA-seq data from four published studies and analyzed the gut microbiota profiles from 1713 human-mouse pairs. Strikingly, on average, we found that only 47% of the human gut microbes could be re-established in mice at the species level, among which more than 1/3 underwent significant changes (referred to as ""variable taxa""). Most of the human gut microbes that underwent significant changes were consistent across multiple human-mouse pairs and experimental settings. Consequently, about 1/3 of human samples changed their enterotypes, i.e., significant changes in their leading species after FMT. Mice fed with a controlled diet showed a lower enterotype change rate (23.5%) than those fed with a noncontrolled diet (49.0%), suggesting a possible solution for rescue. Most of the variable taxa have been reported to be implicated in human diseases, with some recognized as the causative species. Our results highlight the challenges of using a mouse model to replicate human gut microbiota-associated phenotypes, provide useful information for researchers using mice in gut microbiota studies, and call for additional validations after FMT. An online database named FMT-DB is publicly available at http://fmt2mice.humangut.info/#/.",2021-06-09 +34841127,Effect of temperature and humidity on coronavirus infection in Pakistan.,"Ongoing Coronavirus epidemic (COVID-19) identified first in Wuhan, China posed huge impact on public health and economy around the globe. Both cough and sneeze based droplets or aerosols encapsulated COVID-19 particles are responsible for airborne transmission of this virus and caused an unexpected escalation and high mortality worldwide. Current study intends to investigate the correlation of COVID-19 epidemic with meteorological parameters, particularly temperature and humidity. A data set of Epidemiological data of COVID-19 for highly infected provinces of Pakistan was collected from the official website of (https://www.covid.gov.pk/) and weather data was collected from (https://www.timeanddate.com/) during the time period of 1st March to 30th September 2020. The GrapPad prism 5 Software was used to calculate the mean and standard error of mean (SEM). In the current study the incident of daily covid cases is recorded higher in the month of June while the less number of case were reported in the month of May as compared to the other months (April, May, June, July, September and August) in the four province of Pakistan. We also find out that the incident of Covid19 were high at higher temperature (like the average temperature in the month of June 37 °C) while less cases were reported in May the average temperature was 29.5 °C. Furthermore the incident of covid cases were less reported at low humidity while more intendant with high humidity. Pearson's (r) determine the strength of the relationship between the variables. Pearson's correlation coefficient test employed for data analysis revealed that temperature average (TA) and average humidity is not a significant correlated with COVID-19 pandemic. The results obtained from the current analysis for selected parameters indirect correlation of COVID-19 transmission with temperature variation, and humidity. In the present study association of parameters is not correlated with COVID-19 pandemic, suggested need of more strict actions and control measures for highly populated cities. These findings will be helpful for health regulatory authorities and policy makers to take specific measures to combat COVID-19 epidemic in Pakistan.",2021-11-20 +32730951,Usage Patterns of a Web-Based Palliative Care Content Platform (PalliCOVID) During the COVID-19 Pandemic.,"

Context

The COVID-19 pandemic has highlighted the essential role of palliative care to support the delivery of compassionate, goal-concordant patient care. We created the Web-based application, PalliCOVID (https://pallicovid.app/), in April 2020 to provide all clinicians with convenient access to palliative care resources and support. PalliCOVID features evidence-based clinical guidelines, educational content, and institutional protocols related to palliative care for COVID-19 patients. It is a publicly available resource accessible from any mobile device or desktop computer that provides clinicians with access to palliative care guidance across a variety of care settings, including the emergency department, hospital ward, intensive care unit, and primary care practice.

Objective

The primary objective of this study was to evaluate usage patterns of PalliCOVID to understand user behavior in relation to this palliative care content platform during the period of the local peak of COVID-19 infection in Massachusetts.

Methods

We retrospectively analyzed deidentified usage data collected by Google Analytics from the first day of PalliCOVID's launch on April 7, 2020, until May 1, 2020, the time period that encompassed the local peak of the COVID-19 surge in Massachusetts. User access data were collected and summarized by using Google Analytics software that had been integrated into the PalliCOVID Web application.

Results

A total of 2042 users accessed PalliCOVID and viewed 4637 pages from April 7 to May 1, 2020. Users spent an average of 2 minutes and 6 seconds per session. Eighty-one percent of users were first-time visitors, while the remaining 19% were return visitors. Most users accessed PalliCOVID from the United States (87%), with a large proportion of users coming from Boston and the surrounding cities (32% of overall users).

Conclusions

PalliCOVID is one example of a scalable digital health solution that can bring palliative care resources to frontline clinicians. Analysis of PalliCOVID usage patterns has the potential to inform the improvement of the platform to better meet the needs of its user base and guide future dissemination strategies. The quantitative data presented here, although informative about user behavior, should be supplemented with future qualitative research to further define the impact of this tool and extend our ability to deliver clinical care that is compassionate, rational, and well-aligned with patients' values and goals.",2020-07-27 +35058006,A new platform for untargeted UHPLC-HRMS data analysis to address the time-shift problem.,"Substantial deviations in retention times among samples pose a great challenge for the accurate screening and identifying of metabolites by ultrahigh-performance liquid chromatography high-resolution mass spectrometry (UHPLC-HRMS). In this study, a coarse-to-refined time-shift correction methodology was proposed to efficiently address this problem. Metabolites producing multiple fragment ions were automatically selected as landmarks to generate pseudo-mass spectra for a coarse time-shift correction. Refined peak alignment for extracted ion chromatograms was then performed by using a moving window-based multiple-peak alignment strategy. Based on this novel coarse-to-refined time-shift correction methodology, a new comprehensive UHPLC-HRMS data analysis platform was developed for UHPLC-HRMS-based metabolomics. Original datasets were employed as inputs to automatically extract and register features in the dataset and to distinguish fragment ions from metabolites for chemometric analysis. Its performance was further evaluated using complex datasets, and the results suggest that the new platform can satisfactorily resolve the time-shift problem and is comparable with commonly used UHPLC-HRMS data analysis tools such as XCMS Online, MS-DIAL, Mzmine2, and Progenesis QI. The new platform can be downloaded from: http://www.pmdb.org.cn/antdas2tsc.",2021-12-25 +34611535,Neodymium isotopes in modern human dental enamel: An exploratory dataset for human provenancing.,"This collection presents data on neodymium isotopes from modern dental elements (third molars) of 47 individuals born and raised in the Netherlands, Grenada, Curaçao, Bonaire, Columbia and Iceland. Neodymium isotope composition was successfully analyzed for 40 individuals (ranging between 0.511820 and 0.512773 143Nd/144Nd and -16.0 to 2.6 εNd), with neodymium concentration data available for 23 individuals (ranging between 0.1 and 21.0 ppb). For 37 individuals the dental elements have also been analyzed for strontium isotopes. All analyses were performed on a Thermo Scientific Triton Plus TIMS. Neodymium analyses were performed using 1013 Ω resistors, with samples reanalyzed using 1011 Ω resistors if enough sample was available. Strontium analyses were performed using 1011Ω resistors. A discussion about the applicability of the analysis technique and the results can be found in the article ""Evaluation of neodymium isotope analysis of human dental enamel as a provenance indicator using 1013 Ω amplifiers (TIMS)"". This dataset is available for verification of the provenance capability of neodymium isotope analysis in archaeological and forensic mobility studies. To ensure the interoperability and reusability of the data, the data is available on the IsoArcH (https://isoarch.eu/) data repository.",2021-09-16 +34869372,Therapeutic Response-Based Reclassification of Multiple Tumor Subtypes Reveals Intrinsic Molecular Concordance of Therapy Across Histologically Disparate Cancers.,"Cancers that are histologically defined as the same type of cancer often need a distinct therapy based on underlying heterogeneity; likewise, histologically disparate cancers can require similar treatment approaches due to intrinsic similarities. A comprehensive analysis integrated with drug response data and molecular alterations, particularly to reveal therapeutic concordance mechanisms across histologically disparate tumor subtypes, has not yet been fully exploited. In this study, we integrated pharmacological, genomic, and transcriptomic profiling data provided from the Cancer Genome Project (CGP) in a systematic in silico investigation of the pharmacological subtypes of cancers and the intrinsic concordance of molecular mechanisms leading to similar therapeutic responses across histologically disparate tumor subtypes. We further developed a novel approach to redefine cell-to-cell similarity and drug-to-drug similarity from the therapeutic concordance, providing a new point of view to study cancer heterogeneity. This study demonstrates how pharmacological and omics data can be used to systematically classify cancers in terms of response to various compounds and provides us with a purely therapy-oriented perspective to view tumor classifications independent of histology subtypes. The knowledge of pharmacological subtypes of 367 drugs are available via our website (http://www.hywanglab.cn/dtdb/), providing the resources for precision medicine in the perspective of therapeutic response-based re-classification of tumor.",2021-11-12 +31606894,DALI and the persistence of protein shape.,"DALI is a popular resource for comparing protein structures. The software is based on distance-matrix alignment. The associated web server provides tools to navigate, integrate and organize some data pushed out by genomics and structural genomics. The server has been running continuously for the past 25 years. Structural biologists routinely use DALI to compare a new structure against previously known protein structures. If significant similarities are discovered, it may indicate a distant homology, that is, that the structures are of shared origin. This may be significant in determining the molecular mechanisms, as these may remain very similar from a distant predecessor to the present day, for example, from the last common ancestor of humans and bacteria. Meta-analysis of independent reference-based evaluations of alignment accuracy and fold discrimination shows DALI at top rank in six out of 12 studies. The web server and standalone software are available from http://ekhidna2.biocenter.helsinki.fi/dali.",2019-11-05 +31665428,Unraveling allosteric landscapes of allosterome with ASD.,"Allosteric regulation is one of the most direct and efficient ways to fine-tune protein function; it is induced by the binding of a ligand at an allosteric site that is topographically distinct from an orthosteric site. The Allosteric Database (ASD, available online at http://mdl.shsmu.edu.cn/ASD) was developed ten years ago to provide comprehensive information related to allosteric regulation. In recent years, allosteric regulation has received great attention in biological research, bioengineering, and drug discovery, leading to the emergence of entire allosteric landscapes as allosteromes. To facilitate research from the perspective of the allosterome, in ASD 2019, novel features were curated as follows: (i) >10 000 potential allosteric sites of human proteins were deposited for allosteric drug discovery; (ii) 7 human allosterome maps, including protease and ion channel maps, were built to reveal allosteric evolution within families; (iii) 1312 somatic missense mutations at allosteric sites were collected from patient samples from 33 cancer types and (iv) 1493 pharmacophores extracted from allosteric sites were provided for modulator screening. Over the past ten years, the ASD has become a central resource for studying allosteric regulation and will play more important roles in both target identification and allosteric drug discovery in the future.",2020-01-01 +34296017,Individual and community-level determinants of Iron-Folic Acid Intake for the recommended period among pregnant women in Ethiopia: A multilevel analysis.,"

Background

Iron-folic acid (IFA) intake for the recommended period during pregnancy reduces the risk of anemia and congenital anomalies. However, IFA intake for the recommended period is still very low in low-income countries including Ethiopia. Thus, the aim of this study was to assess both individual-and community-level determinants of IFA intake for the recommended period among pregnant women in Ethiopia.

Methods

Data were retrieved from the Demographic and Health Survey program's official database website (http://dhsprogram.com). A two-stage stratified cluster sampling technique was employed to conduct the 2016 Ethiopian Demographic and Health Survey. A sample of 3088 pregnant women who had received at least one dose of IFA in Ethiopia were included in this study. A multivariable multilevel logistic regression analysis model was fitted to identify the determinants of IFA intake below the recommended period [< 90 days] during pregnancy. Akaike's Information Criterion (AIC) was used during the model selection procedure.

Results

This study revealed that 87.6% [95% CI; 86.3%, 88.6%] of the women took IFA below the recommended period during the index pregnancy. After adjusting for the covariates: living in rural areas [AOR = 1.74: 95% CI 1.37, 2.50], and women's illiterate proportion [AOR = 1.43: 95% CI 1.06, 1.70] were community level factors. Whereas, primary education level [AOR = 0.63: 95% CI 0.40, 0.78], poorer wealth index [AOR = 1.53: 95% CI 1.08, 3.09], 4 + antenatal care visits [AOR = 0.43: 95% CI 0.31, 0.69], and receive nutritional counseling during pregnancy [AOR = 0.63: 95% CI 0.37, 0.84] were the individual-level factors of IFA intake below the recommended period during pregnancy.

Conclusions

In this study, nearly nine out of ten pregnant women did not take IFA for the recommended period. Thus, promoting recommended ANC visits, enhancing the quality of nutritional counseling, strengthening the expansion of media, and educate rural women towards the importance of optimal intake of IFA during pregnancy. Besides, the policymakers should design essential strategies based on identified barriers to improve the IFA intake for the recommended period.",2021-07-09 +33181825,CitrusKB: a comprehensive knowledge base for transcriptome and interactome of Citrus spp. infected by Xanthomonas citri subsp. citri at different infection stages. ,"Citrus canker type A is a serious disease caused by Xanthomonas citri subsp. citri (X. citri), which is responsible for severe losses to growers and to the citrus industry worldwide. To date, no canker-resistant citrus genotypes are available, and there is limited information regarding the molecular and genetic mechanisms involved in the early stages of the citrus canker development. Here, we present the CitrusKB knowledge base. This is the first in vivo interactome database for different citrus cultivars, and it was produced to provide a valuable resource of information on citrus and their interaction with the citrus canker bacterium X. citri. CitrusKB provides tools for a user-friendly web interface to let users search and analyse a large amount of information regarding eight citrus cultivars with distinct levels of susceptibility to the disease, with controls and infected plants at different stages of infection by the citrus canker bacterium X. citri. Currently, CitrusKB comprises a reference citrus genome and its transcriptome, expressed transcripts, pseudogenes and predicted genomic variations (SNPs and SSRs). The updating process will continue over time by the incorporation of novel annotations and analysis tools. We expect that CitrusKB may substantially contribute to the field of citrus genomics. CitrusKB is accessible at http://bioinfo.deinfo.uepg.br/citrus. Users can download all the generated raw sequences and generated datasets by this study from the CitrusKB website.",2020-01-01 +31691822,DrLLPS: a data resource of liquid-liquid phase separation in eukaryotes.,"Here, we presented an integrative database named DrLLPS (http://llps.biocuckoo.cn/) for proteins involved in liquid-liquid phase separation (LLPS), which is a ubiquitous and crucial mechanism for spatiotemporal organization of various biochemical reactions, by creating membraneless organelles (MLOs) in eukaryotic cells. From the literature, we manually collected 150 scaffold proteins that are drivers of LLPS, 987 regulators that contribute in modulating LLPS, and 8148 potential client proteins that might be dispensable for the formation of MLOs, which were then categorized into 40 biomolecular condensates. We searched potential orthologs of these known proteins, and in total DrLLPS contained 437 887 known and potential LLPS-associated proteins in 164 eukaryotes. Furthermore, we carefully annotated LLPS-associated proteins in eight model organisms, by using the knowledge integrated from 110 widely used resources that covered 16 aspects, including protein disordered regions, domain annotations, post-translational modifications (PTMs), genetic variations, cancer mutations, molecular interactions, disease-associated information, drug-target relations, physicochemical property, protein functional annotations, protein expressions/proteomics, protein 3D structures, subcellular localizations, mRNA expressions, DNA & RNA elements, and DNA methylations. We anticipate DrLLPS can serve as a helpful resource for further analysis of LLPS.",2020-01-01 +34591846,Signal-based optical map alignment.,"In genomics, optical mapping technology provides long-range contiguity information to improve genome sequence assemblies and detect structural variation. Originally a laborious manual process, Bionano Genomics platforms now offer high-throughput, automated optical mapping based on chips packed with nanochannels through which unwound DNA is guided and the fluorescent DNA backbone and specific restriction sites are recorded. Although the raw image data obtained is of high quality, the processing and assembly software accompanying the platforms is closed source and does not seem to make full use of data, labeling approximately half of the measured signals as unusable. Here we introduce two new software tools, independent of Bionano Genomics software, to extract and process molecules from raw images (OptiScan) and to perform molecule-to-molecule and molecule-to-reference alignments using a novel signal-based approach (OptiMap). We demonstrate that the molecules detected by OptiScan can yield better assemblies, and that the approach taken by OptiMap results in higher use of molecules from the raw data. These tools lay the foundation for a suite of open-source methods to process and analyze high-throughput optical mapping data. The Python implementations of the OptiTools are publicly available through http://www.bif.wur.nl/.",2021-09-30 +30233603,Whole Genome Characterization of a Few EMS-Induced Mutants of Upland Rice Variety Nagina 22 Reveals a Staggeringly High Frequency of SNPs Which Show High Phenotypic Plasticity Towards the Wild-Type.,"The Indian initiative, in creating mutant resources for the functional genomics in rice, has been instrumental in the development of 87,000 ethylmethanesulfonate (EMS)-induced mutants, of which 7,000 are in advanced generations. The mutants have been created in the background of Nagina 22, a popular drought- and heat-tolerant upland cultivar. As it is a pregreen revolution cultivar, as many as 573 dwarf mutants identified from this resource could be useful as an alternate source of dwarfing. A total of 541 mutants, including the macromutants and the trait-specific ones, obtained after appropriate screening, are being maintained in the mutant garden. Here, we report on the detailed characterizations of the 541 mutants based on the distinctness, uniformity, and stability (DUS) descriptors at two different locations. About 90% of the mutants were found to be similar to the wild type (WT) with high similarity index (>0.6) at both the locations. All 541 mutants were characterized for chlorophyll and epicuticular wax contents, while a subset of 84 mutants were characterized for their ionomes, namely, phosphorous, silicon, and chloride contents. Genotyping of these mutants with 54 genomewide simple sequence repeat (SSR) markers revealed 93% of the mutants to be either completely identical to WT or nearly identical with just one polymorphic locus. Whole genome resequencing (WGS) of four mutants, which have minimal differences in the SSR fingerprint pattern and DUS characters from the WT, revealed a staggeringly high number of single nucleotide polymorphisms (SNPs) on an average (16,453 per mutant) in the genic sequences. Of these, nearly 50% of the SNPs led to non-synonymous codons, while 30% resulted in synonymous codons. The number of insertions and deletions (InDels) varied from 898 to 2,595, with more than 80% of them being 1-2 bp long. Such a high number of SNPs could pose a serious challenge in identifying gene(s) governing the mutant phenotype by next generation sequencing-based mapping approaches such as Mutmap. From the WGS data of the WT and the mutants, we developed a genic resource of the WT with a novel analysis pipeline. The entire information about this resource along with the panicle architecture of the 493 mutants is made available in a mutant database EMSgardeN22 (http://14.139.229.201/EMSgardeN22).",2018-09-04 +31686102,FoldamerDB: a database of peptidic foldamers.,"Foldamers are non-natural oligomers that mimic the structural behaviour of natural peptides, proteins and nucleotides by folding into a well-defined 3D conformation in solution. Since their first description about two decades ago, numerous studies have been undertaken dealing with the design, synthesis, characterization and application of foldamers. They have huge application potential as antimicrobial, anticancer and anti-HIV agents and in materials science. Despite their importance, there is no publicly available web resource providing comprehensive information on these compounds. Here we describe FoldamerDB, an open-source, fully annotated and manually curated database of peptidic foldamers. FoldamerDB holds the information about the sequence, structure and biological activities of the foldamer entries. It contains the information on over 1319 species and 1018 activities, collected from more than 160 research papers. The web-interface is designed to be clutter-free, user-friendly and it is compatible with devices of different screen sizes. The interface allows the user to search the database, browse and filter the foldamers using multiple criteria. It also offers a detailed help page to assist new users. FoldamerDB is hoped to bridge the gap in the freely available web-based resources on foldamers and will be of interest to diverse groups of scientists from chemists to biologists. The database can be accessed at http://foldamerdb.ttk.hu/.",2020-01-01 +,"Recognition of the Trachypetidae stat.n. as a new extant family of Ichneumonoidea (Hymenoptera), based on molecular and morphological evidence","The Trachypetinae (type genus Trachypetus Guérin de Méneville) comprise seven species of large‐bodied wasps in three genera (Cercobarcon Tobias, Megalohelcon Turner and Trachypetus) endemic to continental Australia. Historically they have been variously treated, as members of the Helconinae in the case of Megalohelcon, or as separate subfamilies (Cercobarconinae and Trachypetinae). Some 25 years ago they were united in a single subfamily, the Trachypetinae, based on a number of characters. Although there has been conflicting evidence from morphological and molecular phylogenetic studies as to how best to treat the group, there has been a growing consensus that they fall outside the rest of the Braconidae, although taxon sampling has been a limiting factor for molecular studies. We generated a molecular dataset comprising five gene fragments (nuclear 28S ribosomal rDNA, nuclear 18S, elongation factor 1‐alpha, mitochondrial 16S rDNA, and mitochondrial cytochrome oxidase subunit 1) for a taxonomically broad range of Braconidae, Ichneumonidae, trachypetines and outgroup hymenopterans including the first molecular data for the trachypetines Cercobarcon and Trachypetus obtained using specially designed internal primers. Molecular and combined molecular and morphological analyses confirm the monophyly of the Trachypetinae and robustly place them as sister to the Braconidae. Detailed morphological analysis including newly recognized characters shows that trachypetines lack several synapomorphies that define the Braconidae, and that they possess a number of symplesiomorphies absent from this family but found in some ichneumonids. We conclude that family‐level status is warranted for the group based on both molecular and morphological criteria, and hence we propose the new family, Trachypetidae Schulz stat.n. (type genus Trachypetus Guérin de Méneville), for it. As a result, the remaining extant Braconidae become clearly defined based on synapomorphies not present in Trachypetidae stat.n. This published work has been registered on ZooBank, http://zoobank.org/urn:lsid:urn:lsid:zoobank.org:pub:5418F709‐D724‐4F14‐89D8‐1E054D1D27D0.",2020-10-01 +31161194,MTTFsite: cross-cell type TF binding site prediction by using multi-task learning.,"

Motivation

The prediction of transcription factor binding sites (TFBSs) is crucial for gene expression analysis. Supervised learning approaches for TFBS predictions require large amounts of labeled data. However, many TFs of certain cell types either do not have sufficient labeled data or do not have any labeled data.

Results

In this paper, a multi-task learning framework (called MTTFsite) is proposed to address the lack of labeled data problem by leveraging on labeled data available in cross-cell types. The proposed MTTFsite contains a shared CNN to learn common features for all cell types and a private CNN for each cell type to learn private features. The common features are aimed to help predicting TFBSs for all cell types especially those cell types that lack labeled data. MTTFsite is evaluated on 241 cell type TF pairs and compared with a baseline method without using any multi-task learning model and a fully shared multi-task model that uses only a shared CNN and do not use private CNNs. For cell types with insufficient labeled data, results show that MTTFsite performs better than the baseline method and the fully shared model on more than 89% pairs. For cell types without any labeled data, MTTFsite outperforms the baseline method and the fully shared model by more than 80 and 93% pairs, respectively. A novel gene expression prediction method (called TFChrome) using both MTTFsite and histone modification features is also presented. Results show that TFBSs predicted by MTTFsite alone can achieve good performance. When MTTFsite is combined with histone modification features, a significant 5.7% performance improvement is obtained.

Availability and implementation

The resource and executable code are freely available at http://hlt.hitsz.edu.cn/MTTFsite/ and http://www.hitsz-hlt.com:8080/MTTFsite/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31307628,[Oligometastatic prostate cancer management].,"

Objective

To review biology and management of oligometastatic prostate cancer.

Material and methods

Relevant publications were identified through Medline (www. ncbi.nlm.nih.gov), Embase (www.embase.com) and the US National Library of Medicine (www.clinicaltrials.org) databases using the following keywords, alone or in association, «prostate cancer; metastasis; oligo-metastasis». Articles were selected according to methods, language of publication and relevance. After careful selection 99 publications were eligible for our review.

Results

Oligometastatic prostate cancer is a new entity including prostate cancer with a limited number of metastasis. This particular state becomes more frequent with the imaging progresses especially with the common use of new PET imaging with Choline or PSMA. There is no consensus about a strict definition of oligometastatic prostate cancer, number and sites of metastasis vary widely in the literature. Moreover, oligometastatic state can be observed de novo at the time of prostate cancer diagnosis as well as in case of recurrence after a primary treatment. There is actually an important lack of evidence-based medicine and no guidelines regarding treatment can be found. In de novo oligo-metatastatic prostate cancer, treatment of the primary tumor in association with androgen deprivation therapy seems to increase survival in selected patients but this needs to be confirmed by ongoing prospective clinical trials. In recurrent prostate cancer, metastasis directed therapy with or without androgen deprivation therapy is now routinely performed but its impact needs also to be analyzed.

Conclusion

In absence of consensus or guidelines, management of prostate cancer should be an individualized, patient-based management taking into account primary tumor stage and grade, number and types of metastasis and patient characteristics.",2019-06-01 +34460405,Granular Ball Sampling for Noisy Label Classification or Imbalanced Classification. ,"This article presents a general sampling method, called granular-ball sampling (GBS), for classification problems by introducing the idea of granular computing. The GBS method uses some adaptively generated hyperballs to cover the data space, and the points on the hyperballs constitute the sampled data. GBS is the first sampling method that not only reduces the data size but also improves the data quality in noisy label classification. In addition, because the GBS method can be used to exactly describe the boundary, it can obtain almost the same classification accuracy as the results on the original datasets, and it can obtain an obviously higher classification accuracy than random sampling. Therefore, for the data reduction classification task, GBS is a general method that is not especially restricted by any specific classifier or dataset. Moreover, the GBS can be effectively used as an undersampling method for imbalanced classification. It has a time complexity that is close to O(N), so it can accelerate most classifiers. These advantages make GBS powerful for improving the performance of classifiers. All codes have been released in the open source GBS library at http://www.cquptshuyinxia.com/GBS.html.",2021-08-30 +30329095,CancerSplicingQTL: a database for genome-wide identification of splicing QTLs in human cancer.,"Alternative splicing (AS) is a widespread process that increases structural transcript variation and proteome diversity. Aberrant splicing patterns are frequently observed in cancer initiation, progress, prognosis and therapy. Increasing evidence has demonstrated that AS events could undergo modulation by genetic variants. The identification of splicing quantitative trait loci (sQTLs), genetic variants that affect AS events, might represent an important step toward fully understanding the contribution of genetic variants in disease development. However, no database has yet been developed to systematically analyze sQTLs across multiple cancer types. Using genotype data from The Cancer Genome Atlas and corresponding AS values calculated by TCGASpliceSeq, we developed a computational pipeline to identify sQTLs from 9 026 tumor samples in 33 cancer types. We totally identified 4 599 598 sQTLs across all cancer types. We further performed survival analyses and identified 17 072 sQTLs associated with patient overall survival times. Furthermore, using genome-wide association study (GWAS) catalog data, we identified 1 180 132 sQTLs overlapping with known GWAS linkage disequilibrium regions. Finally, we constructed a user-friendly database, CancerSplicingQTL (http://www.cancersplicingqtl-hust.com/) for users to conveniently browse, search and download data of interest. This database provides an informative sQTL resource for further characterizing the potential functional roles of SNPs that control transcript isoforms in human cancer.",2019-01-01 +31688940,Tripal MapViewer: A tool for interactive visualization and comparison of genetic maps. ,"Tripal is an open-source, resource-efficient toolkit for construction of genomic, genetic and breeding databases. It facilitates development of biological websites by providing tools to integrate and display biological data using the generic database schema, Chado, together with Drupal, a popular website creation and content management system. Tripal MapViewer is a new interactive tool for visualizing genetic map data. Developed as a Tripal replacement for Comparative Map Viewer (CMap), it enables visualization of entire maps or linkage groups and features such as molecular markers, quantitative trait loci (QTLs) and heritable phenotypic markers. It also provides graphical comparison of maps sharing the same markers as well as dot plot and correspondence matrices. MapViewer integrates directly with the Tripal application programming interface framework, improving data searching capability and providing a more seamless experience for site visitors. The Tripal MapViewer interface can be integrated in any Tripal map page and linked from any Tripal page for markers, QTLs, heritable morphological markers or genes. Configuration of the display is available through a control panel and the administration interface. The administration interface also allows configuration of the custom database query for building materialized views, providing better performance and flexibility in the way data is stored in the Chado database schema. MapViewer is implemented with the D3.js technology and is currently being used at the Genome Database for Rosaceae (https://www.rosaceae.org), CottonGen (https://www.cottongen.org), Citrus Genome Database (https://citrusgenomedb.org), Vaccinium Genome Database (https://www.vaccinium.org) and Cool Season Food Legume Database (https://www.coolseasonfoodlegume.org). It is also currently in development on the Hardwood Genomics Web (https://hardwoodgenomics.org) and TreeGenes (https://treegenesdb.org). Database URL: https://gitlab.com/mainlabwsu/tripal_map.",2019-01-01 +33485793,cgMLST@Taiwan: A web service platform for Vibrio cholerae cgMLST profiling and global strain tracking.,"

Background

Cholera, a rapidly dehydrating diarrheal disease caused by toxigenic Vibrio cholerae, is a leading cause of morbidity and mortality in some regions of the world. Core genome multilocus sequence typing (cgMLST) is a promising approach in generating genetic fingerprints from whole-genome sequencing (WGS) data for strain comparison among laboratories.

Methods

We constructed a V. cholerae core gene allele database using an in-house developed computational pipeline, a database with cgMLST profiles converted from genomic sequences from the National Center for Biotechnology Information, and built a REST-based web accessible via the Internet.

Results

We built a web service platform-cgMLST@Taiwan and installed a V. cholerae allele database, a cgMLST profile database, and computational tools for generating V. cholerae cgMLST profiles (based on 3,017 core genes), performing rapid global strain tracking, and clustering analysis of cgMLST profiles. This web-based platform provides services to researchers, public health microbiologists, and physicians who use WGS data for the investigation of cholera outbreaks and tracking of V. cholerae strain transmission across countries and geographic regions. The cgMLST@Taiwan is accessible at http://rdvd.cdc.gov.tw/cgMLST.",2021-01-15 +32928113,CrustyBase: an interactive online database for crustacean transcriptomes.,"Transcriptome sequencing has opened the field of genomics to a wide variety of researchers, owing to its efficiency, applicability across species and ability to quantify gene expression. The resulting datasets are a rich source of information that can be mined for many years into the future, with each dataset providing a unique angle on a specific context in biology. Maintaining accessibility to this accumulation of data presents quite a challenge for researchers.The primary focus of conventional genomics databases is the storage, navigation and interpretation of sequence data, which is typically classified down to the level of a species or individual. The addition of expression data adds a new dimension to this paradigm - the sampling context. Does gene expression describe different tissues, a temporal distribution or an experimental treatment? These data not only describe an individual, but the biological context surrounding that individual. The structure and utility of a transcriptome database must therefore reflect these attributes. We present an online database which has been designed to maximise the accessibility of crustacean transcriptome data by providing intuitive navigation within and between datasets and instant visualization of gene expression and protein structure.The site is accessible at https://crustybase.org and currently holds 10 datasets from a range of crustacean species. It also allows for upload of novel transcriptome datasets through a simple web interface, allowing the research community to contribute their own data to a pool of shared knowledge.",2020-09-14 +32372858,"Phylotastic: Improving Access to Tree-of-Life Knowledge With Flexible, on-the-Fly Delivery of Trees.","A comprehensive phylogeny of species, i.e., a tree of life, has potential uses in a variety of contexts, including research, education, and public policy. Yet, accessing the tree of life typically requires special knowledge, complex software, or long periods of training. The Phylotastic project aims make it as easy to get a phylogeny of species as it is to get driving directions from mapping software. In prior work, we presented a design for an open system to validate and manage taxon names, find phylogeny resources, extract subtrees matching a user's taxon list, scale trees to time, and integrate related resources such as species images. Here, we report the implementation of a set of tools that together represent a robust, accessible system for on-the-fly delivery of phylogenetic knowledge. This set of tools includes a web portal to execute several customizable workflows to obtain species phylogenies (scaled by geologic time and decorated with thumbnail images); more than 30 underlying web services (accessible via a common registry); and code toolkits in R and Python (allowing others to develop custom applications using Phylotastic services). The Phylotastic system, accessible via http://www.phylotastic.org, provides a unique resource to access the current state of phylogenetic knowledge, useful for a variety of cases in which a tree extracted quickly from online resources (as distinct from a tree custom-made from character data) is sufficient, as it is for many casual uses of trees identified here.",2020-04-06 +33367853,UniBioDicts: Unified access to biological dictionaries. ,"We present a set of software packages that provide uniform access to diverse biological vocabulary resources that are instrumental for current biocuration efforts and tools. The Unified Biological Dictionaries (UniBioDicts or UBDs) provide a single query-interface for accessing the online API services of leading biological data providers. Given a search string, UBDs return a list of matching term, identifier and metadata units from databases (e.g. UniProt), controlled vocabularies (e.g. PSI-MI), and ontologies (e.g. GO, via BioPortal). This functionality can be connected to input fields (user-interface components) that offer autocomplete lookup for these dictionaries. UBDs create a unified gateway for accessing life science concepts, helping curators find annotation terms across resources (based on descriptive metadata and unambiguous identifiers), and helping data users search and retrieve the right query terms. The UBDs are available through npm and the code is available in the GitHub organisation UniBioDicts under the Affero GPL license. Further information on the related project VSM is available at https://vsm.github.io.",2020-12-26 +33603824,An Updated Review on the Secondary Metabolites and Biological Activities of Aspergillus ruber and Aspergillus flavus and Exploring the Cytotoxic Potential of Their Isolated Compounds Using Virtual Screening.,"The secondary metabolites and biological activities of Aspergillus ruber and Aspergillus flavus were comprehensively reported. About 70 compounds were isolated from both species that belong to different classes using conventional and advanced chromatographic techniques and unambiguously elucidated employing one- and two-dimensional nuclear magnetic resonance (1D and 2D NMR) and high resolution mass spectrometry (HRMS). Some of them displayed promising antiviral, anti-inflammatory, and antioxidant activities. In silico studies were conducted on human cyclin-dependent kinase 2 (CDK-2), human DNA topoisomerase II (TOP-2), and matrix metalloprotinase 13 (MMP-13) in an effort to explore the cytotoxic potential of the diverse compounds obtained from both Aspergillus species. 1,6,8-Trihydroxy-4-benzoyloxy-3-methylanthraquinone (23) revealed the most firm fitting with the active pockets of CDK-2 and MMP-13; meanwhile, variecolorin H alkaloid (14) showed the highest fitting within TOP-2 with ∆G equals to -36.51 kcal/mole. Thus, fungal metabolites could offer new drug entities for combating cancer. Relevant data about both Aspergillus species up to August 2020 were gathered from various databases comprising Scifinder (https://scifinder.cas.org/scifinder/login) for secondary metabolite-related studies; meanwhile, for biology-related articles, data were collected from both PubMed (http://www.ncbi.nlm.nih.gov/pubmed/) and Web of Knowledge (http://www.webofknowledge.com) as well.",2021-01-31 +33231642,Mouse Genome Database (MGD): Knowledgebase for mouse-human comparative biology.,"The Mouse Genome Database (MGD; http://www.informatics.jax.org) is the community model organism knowledgebase for the laboratory mouse, a widely used animal model for comparative studies of the genetic and genomic basis for human health and disease. MGD is the authoritative source for biological reference data related to mouse genes, gene functions, phenotypes and mouse models of human disease. MGD is the primary source for official gene, allele, and mouse strain nomenclature based on the guidelines set by the International Committee on Standardized Nomenclature for Mice. MGD's biocuration scientists curate information from the biomedical literature and from large and small datasets contributed directly by investigators. In this report we describe significant enhancements to the content and interfaces at MGD, including (i) improvements in the Multi Genome Viewer for exploring the genomes of multiple mouse strains, (ii) inclusion of many more mouse strains and new mouse strain pages with extended query options and (iii) integration of extensive data about mouse strain variants. We also describe improvements to the efficiency of literature curation processes and the implementation of an information portal focused on mouse models and genes for the study of COVID-19.",2021-01-01 +33063234,"Social Validity of the Strengthening Families Program in Northeastern Brazil: the Voices of Parents, Adolescents, and Facilitators.","In 2013, Brazil's Ministry of Health adopted the Strengthening Families Program (SFP 10-14), developed internationally for preventing drug abuse by enhancing family bonds. The social validity of the objectives, procedures, and perceived impacts of the program were investigated for participants and facilitators in northeastern Brazil. Focus groups with parents/guardians (N = 199), adolescents (N = 111), and facilitators (N = 100) were implemented. Content analysis revealed that the program's objectives were considered socially relevant and that there was a positive short-term perceived impact on family cohesion, authoritative parenting style, adolescent life skills, and the facilitators' professional capacity. The parents/guardians and adolescents presented a positive perception of the appropriateness of the program's methodology, while facilitators indicated the need to adapt it to vulnerable families and improve its implementation conditions. Future studies may benefit from these findings when developing similarly viable and scalable interventions in low-resource settings. Brazilian Trial Register RBR-7q9xh5. Registered 5 August 2017, http://www.ensaiosclinicos.gov.br/rg/RBR-7q9xh5/.",2020-10-16 +33367505,PICKLE 3.0: Enriching the human Meta-database with the mouse protein interactome extended via mouse-human orthology. ,"The PICKLE 3.0 upgrade refers to the enrichment of this human protein-protein interaction (PPI) meta-database with the mouse protein interactome. Experimental PPI data between mouse genetic entities are rather limited; however, they are substantially complemented by PPIs between mouse and human genetic entities. The relational scheme of PICKLE 3.0 has been amended to exploit the Mouse Genome Informatics (MGI) mouse-human ortholog gene pair collection, enabling (i) the extension through orthology of the mouse interactome with potentially valid PPIs between mouse entities based on the experimental PPIs between mouse and human entities, and (ii) the comparison between mouse and human PPI networks. Interestingly, 43.5% of the experimental mouse PPIs lacks a corresponding by orthology PPI in human, an inconsistency in need of further investigation. Overall, as primary mouse PPI datasets show a considerably limited overlap, PICKLE 3.0 provides a unique comprehensive representation of the mouse protein interactome. PICKLE can be queried and downloaded at http://www.pickle.gr. Supplementary data are available at Bioinformatics online.",2020-12-26 +34657890,Characterizing the Resilience Effect of Neurodegeneration for the Mechanistic Pathway of Alzheimer's Disease.,"

Background

With the rapid development of neurobiology and neuroimaging technologies, mounting evidence shows that Alzheimer's disease (AD) is caused by the build-up of two abnormal proteins, amyloid-β plaques (A) and neurofibrillary tangles (T). Over time, these AD-related neuropathological burdens begin to spread throughout the brain, which results in the characteristic progression of symptoms in AD.

Objective

Although tremendous efforts have been made to link biological indicators to the progression of AD, limited attention has been paid to investigate the multi-factorial role of socioeconomic status (SES) in the prevalence or incidence of AD. There is high demand to explore the synergetic effect of sex and SES factors in moderating the neurodegeneration process caused by the accumulation of A and T biomarkers.

Methods

We carry out a meta-data analysis on the longitudinal neuroimaging data, clinical outcomes, genotypes, and demographic data in Alzheimer's Disease Neuroimaging Initiative (ADNI) database (http://adni.loni.usc.edu).

Results

Our major findings include 1) education and occupation show resilience effects at the angular gyrus, superior parietal lobule, lateral occipital-temporal sulcus, and posterior transverse collateral sulcus where we found significant slowdown of neurodegeneration due to higher education level or more advanced occupation rank; 2) A and T biomarkers manifest different spatial patterns of brain resilience; 3) BDNF (brain-derived neurotrophic factor) single nucleotide polymorphism (SNP) rs10835211 shows strong association to the identified resilience effect; 4) the identified resilience effect is associated with the clinical manifestation in memory, learning, and organization performance.

Conclusion

Several brain regions manifest resilience from SES to A and T biomarkers. BDNF SNPs have a potential association with the resilience effect from SES. In addition, cognitive measures of learning and memory demonstrate the resilience effect.",2021-01-01 +33835460,MeT-DB V2.0: Elucidating Context-Specific Functions of N6-Methyl-Adenosine Methyltranscriptome.,"N6-methyladenosine (m6A) is the most prevalent posttranscriptional modification in eukaryotes and plays a pivotal role in various biological processes. A knowledge base with the systematic collection and curation of context specific transcriptome-wide methylations is critical for elucidating their biological functions as well as for developing bioinformatics tools. In this chapter, we present a comprehensive platform MeT-DB V2.0 for elucidating context-specific functions of N6-methyl-adenosine methyltranscriptome. Met-DB V2.0 database contains context specific m6A peaks and single-base sites predicted from 185 samples for 7 species from 26 independent studies. Moreover, it is also integrated with a new database for targets of m6A readers, erasers and writers and expanded with more collections of functional data. The Met-DB V2.0 web interface and genome browser provide more friendly, powerful, and informative ways to query and visualize the data. More importantly, MeT-DB V2.0 offers for the first time a series of tools specifically designed for understanding m6A functions. The MeT-DB V2.0 web server is freely available at: http://compgenomics.utsa.edu/MeTDB and www.xjtlu.edu.cn/metdb2 .",2021-01-01 +31950241,Evaluating genetic causes of azoospermia: What can we learn from a complex cellular structure and single-cell transcriptomics of the human testis?,"Azoospermia is a condition defined as the absence of spermatozoa in the ejaculate, but the testicular phenotype of men with azoospermia may be very variable, ranging from full spermatogenesis, through arrested maturation of germ cells at different stages, to completely degenerated tissue with ghost tubules. Hence, information regarding the cell-type-specific expression patterns is needed to prioritise potential pathogenic variants that contribute to the pathogenesis of azoospermia. Thanks to technological advances within next-generation sequencing, it is now possible to obtain detailed cell-type-specific expression patterns in the testis by single-cell RNA sequencing. However, to interpret single-cell RNA sequencing data properly, substantial knowledge of the highly sophisticated data processing and visualisation methods is needed. Here we review the complex cellular structure of the human testis in different types of azoospermia and outline how known genetic alterations affect the pathology of the testis. We combined the currently available single-cell RNA sequencing datasets originating from the human testis into one dataset covering 62,751 testicular cells, each with a median of 2637 transcripts quantified. We show what effects the most common data-processing steps have, and how different visualisation methods can be used. Furthermore, we calculated expression patterns in pseudotime, and show how splicing rates can be used to determine the velocity of differentiation during spermatogenesis. With the combined dataset we show expression patterns and network analysis of genes known to be involved in the pathogenesis of azoospermia. Finally, we provide the combined dataset as an interactive online resource where expression of genes and different visualisation methods can be explored ( https://testis.cells.ucsc.edu/ ).",2020-01-16 +34798231,WeBrain: A web-based brainformatics platform of computational ecosystem for EEG big data analysis.,"The current evolution of 'cloud neuroscience' leads to more efforts with the large-scale EEG applications, by using EEG pipelines to handle the rapidly accumulating EEG data. However, there are a few specific cloud platforms that seek to address the cloud computational challenges of EEG big data analysis to benefit the EEG community. In response to the challenges, a WeBrain cloud platform (https://webrain.uestc.edu.cn/) is designed as a web-based brainformatics platform and computational ecosystem to enable large-scale EEG data storage, exploration and analysis using cloud high-performance computing (HPC) facilities. WeBrain connects researchers from different fields to EEG and multimodal tools that have become the norm in the field and the cloud processing power required to handle those large EEG datasets. This platform provides an easy-to-use system for novice users (even no computer programming skills) and provides satisfactory maintainability, sustainability and flexibility for IT administrators and tool developers. A range of resources are also available on https://webrain.uestc.edu.cn/, including documents, manuals, example datasets related to WeBrain, and collected links to open EEG datasets and tools. It is not necessary for users or administrators to install any software or system, and all that is needed is a modern web browser, which reduces the technical expertise required to use or manage WeBrain. The WeBrain platform is sponsored and driven by the China-Canada-Cuba international brain cooperation project (CCC-Axis, http://ccc-axis.org/), and we hope that WeBrain will be a promising cloud brainformatics platform for exploring brain information in large-scale EEG applications in the EEG community.",2021-11-17 +34345765,Maternity care during COVID-19: a protocol for a qualitative evidence synthesis of women's and maternity care providers' views and experiences.,"Background: Considerable changes in maternity care provision internationally were implemented in response to COVID-19. Such changes, often occurring suddenly with little advance warning, have had the potential to affect women's and maternity care providers experience of maternity care, both positively and negatively. For this reason, to gain insight and understanding of personal and professional experiences, we will perform a synthesis of the available qualitative evidence on women and maternity care providers' views and experiences of maternity care during COVID-19. Methods and analysis: A qualitative evidence synthesis will be conducted. Studies will be eligible if they include pregnant or postpartum women (up to six months) and maternity care providers who received or provided care during COVID-19. To retrieve relevant literature the electronic databases of CINAHL, EMBASE, MEDLINE, PsycINFO, and the Cochrane COVID study register ( https://covid-19.cochrane.org/) will be searched from 01-Jan-2020 to date of search. A combination of search terms based on COVID-19, pregnancy, childbirth and maternity care, and study design, will be used to guide the search.  The methodological quality of the included studies will be assessed by at least two reviewers using the Evidence for Policy and Practice Information (EPPI)-Centre 12-criteria quality assessment tool. The Thomas and Harden approach to thematic synthesis will be used for data synthesis. This will involve line by line coding of extracted data, establishing descriptive themes, and determining analytical themes. Confidence in the findings of the review will be assessed by two reviewers independently using Grading of Recommendations Assessment, Development and Evaluation-Confidence in the Evidence from Reviews of Qualitative research (GRADE-CERQual).   Conclusion: The proposed synthesis of evidence will help identify maternity care needs during a global pandemic from the perspectives of those receiving and providing care. The evidence will inform and help enhance care provision into the future.",2021-02-18 +32621219,Estimating the Quality of 3D Protein Models Using the ModFOLD7 Server.,"Assessing the accuracy of 3D models has become a keystone in the protein structure prediction field. ModFOLD7 is our leading resource for Estimates of Model Accuracy (EMA), which has been upgraded by integrating a number of the pioneering pure-single- and quasi-single-model approaches. Such an integration has given our latest version the strengths to accurately score and rank predicted models, with higher consistency compared to older EMA methods. Additionally, the server provides three options for producing global score estimates, depending on the requirements of the user: (1) ModFOLD7_rank, which is optimized for ranking/selection, (2) ModFOLD7_cor, which is optimized for correlations of predicted and observed scores, and (3) ModFOLD7 global for balanced performance. ModFOLD7 has been ranked among the top few EMA methods according to independent blind testing by the CASP13 assessors. Another evaluation resource for ModFOLD7 is the CAMEO project, where the method is continuously automatically evaluated, showing a significant improvement compared to our previous versions. The ModFOLD7 server is freely available at http://www.reading.ac.uk/bioinf/ModFOLD/ .",2020-01-01 +31697319,Critical evaluation of web-based prediction tools for human protein subcellular localization.,"Human protein subcellular localization has an important research value in biological processes, also in elucidating protein functions and identifying drug targets. Over the past decade, a number of protein subcellular localization prediction tools have been designed and made freely available online. The purpose of this paper is to summarize the progress of research on the subcellular localization of human proteins in recent years, including commonly used data sets proposed by the predecessors and the performance of all selected prediction tools against the same benchmark data set. We carry out a systematic evaluation of several publicly available subcellular localization prediction methods on various benchmark data sets. Among them, we find that mLASSO-Hum and pLoc-mHum provide a statistically significant improvement in performance, as measured by the value of accuracy, relative to the other methods. Meanwhile, we build a new data set using the latest version of Uniprot database and construct a new GO-based prediction method HumLoc-LBCI in this paper. Then, we test all selected prediction tools on the new data set. Finally, we discuss the possible development directions of human protein subcellular localization. Availability: The codes and data are available from http://www.lbci.cn/syn/.",2020-09-01 +,First Report of Fusarium Wilt of Lettuce Caused by Fusarium oxysporum f. sp. lactucae Race 1 in Spain,"Lettuce plants (Lactuca sativa) of the romaine cultivar Amible showing wilt symptoms were observed in a 2-ha field located in the Comarca del Noroeste in the region of Murcia, Spain, in August 2017. The incidence of wilted plants was 60%. On affected plants the leaves showed chlorosis, and necrosis also was observed, particularly on the internal younger leaves, with vascular darkening and severe wilting. Small (3- to 4-mm) pieces of necrotic vascular and root tissues were surface sterilized for 1 min in 1.5% NaOCl, washed twice with sterilized distilled water, and plated onto potato dextrose agar (PDA) with streptomycin sulfate (0.5 g per liter). Plates were incubated at 25°C for 3 to 5 days. Fusarium colonies were transferred to PDA and Spezieller Nahrstoffarmer agar (SNA) media (Garibaldi et al. 2004) for morphological identification and were identified as Fusarium oxysporum based on morphology on SNA (Leslie and Summerell 2006). Macroconidia were straight to slightly curved, with one septum and spores measuring (15.0 to) 19.1 (to 25) × (3.1 to) 4.1 (to 5.0) µm (n = 30), or two septa and spores measuring (20 to) 22.8 (to 27.5) × (4.8 to) 4.9 (to 5.0) µm (n = 30). Microconidia were borne on short monophialides in false heads, were ovoid to reniform, and were (7.5 to) 11.5 (to 15.0) × (2.5 to) 3.3 (to 5.0) µm (n = 30). Chlamydospores were mostly single, terminal, and intercalary, measuring (7.5 to) 10.8 (to 12.5) µm (n = 30). The translation elongation factor-1α (EF-1α) gene of 10 representative isolates was sequenced using EF-1/EF-2 primer pairs (O’Donnell et al. 1998). All EF-1α sequences were identical, and one (corresponding to isolate Fm1) was deposited in GenBank (accession no. MN379455). BLASTn comparison showed a 100% homology with the EF-1α sequence of F. oxysporum f. sp. lactucae (KY009874). Comparison of this sequence in the Fusarium ID database (http://fusarium.mycobank.org/) exhibited identical homology. Specific primers Hani3′ and Hanilatt3rev (Pasquali et al. 2005) produced a 183-bp product specific for Fusarium oxysporum f. sp. lactucae race 1, and specific primers for race 4, FPUF and FPUR (Gilardi et al. 2017), showed no amplification. For pathogenicity testing four isolates (Fm1, Fm2, Fm3, and Fm4) were inoculated onto lettuce plants following the protocol by Pasquali et al. (2005). Two lettuce cultivars, Romano Odessa and Chiquina, were grown in a mix of peat and sterilized perlite (3:1 v:v) in 500-cm³ pots (Garibaldi et al. 2004). Fifteen-day-old plants of each cultivar were inoculated either by irrigation with spores or by immersing the roots in a spore suspension (1 × 10⁶ CFU/ml) for each isolate. For the former, plants were irrigated with 5 ml of the conidial suspension, and for the latter roots were immersed in the spore suspension for 30 min. The inoculated plants and the controls (10 replicate plants per isolate, cultivar, and treatment) were kept in growth chambers with a completely randomized design at 26 to 28°C with a 14-h photoperiod per day. The first wilt symptoms appeared 9 days after inoculation, independent of the cultivar, inoculation method, and isolate. Lettuce growth was stunted compared with the control plants. Control plants remained asymptomatic. The fungus was 100% reisolated and identification confirmed as described (morphologically and with EF-1α sequencing), fulfilling Koch’s postulates. To our knowledge, this is the first report of F. oxysporum f. sp. lactucae causing Fusarium wilt of lettuce in Spain. This severe disease poses a substantial threat to the continued production of lettuce in this region; in Europe, it was first described in Italy (Garibaldi et al. 2002).",2020-06-01 +34046172,Three datasets for nutrition environment measures of food outlets located in the Lower Mississippi Delta region of the United States.,"This data note provides details of a research database containing 266 food outlets located in five rural towns in the Lower Mississippi Delta region of Mississippi, whose nutrition environments were measured from 2016 to 2018.  The food outlet types include grocery stores, convenience stores, full-service restaurants, and fast food restaurants.  The purpose of this publication is to describe the three datasets for external researchers who may be interested in making use of them.  The datasets are available from the USDA National Agricultural Library's Ag Data Commons under a CC0 1.0 Universal License: https://doi.org/10.15482/USDA.ADC/1503704.",2020-11-09 +31294713,Simulation and visualization of material flows in sanitation systems for streamlined sustainability assessment.,"New and alternative sanitation systems are increasingly discussed and find their way into implementation. However, discussions on sanitation concepts often are held in a rather emotional way. Furthermore, not all the available sanitation concepts might be known to the decision maker. The work presented here attempts to contribute to a good discussion and decision making process by compiling available technologies, by defining easy-to-implement criteria for a sustainability assessment method and by integrating these results into a simulation tool which allows to visualize the related resource fluxes (e.g. those on nutrients, such as N, P and K) and to analyse different sanitation options with regard to their capital and operational costs and with regard to environmental impact criteria such as greenhouse gas emissions. Whilst the calculations are to be considered as being approximate in their nature (due to uncertainties or lack of suitable input data), this tool allows the planners, with sometimes little modelling experience, to consider the characteristics of sanitation systems. Whilst starting from earlier work, such as Eawag's Sanitation Compendium and work on material flow analysis, work described in this contribution merges resource flux modelling, easy-to-use simulation and visualization and methods of life cycle assessment and life cycle costing. The simulation tool is freely available on https://www.ifak.eu/en/products/sampsons.",2019-05-01 +34704284,Expression of Concern.,"Expression of Concern: 'miR-377-3p drives malignancy characteristics via upregulating GSK-3β expression and activating NF-κB pathway in hCRC cells', by Wei-Ying Liu, Zhen Yang, Qi Sun, Xi Yang, Yang Hu, Hong Xie, Hui-Jie Gao, Li-Ming Guo, Jian-Ying Yi, Min Liu, and Hua Tang (2017); J Cell Biochem. 2018; 2124-2134: This Expression of Concern is for the above article, published online on 31 August 2017, in Wiley Online Library (https://doi.org/10.1002/jcb.26374), and has been published by agreement between the the journal's Editor in Chief, Prof. Dr. Christian Behl, and Wiley Periodicals LLC. Concerns about data integrity were raised by an independent source regarding a possible duplication in Figure 4D. The authors admit an error at compilation of the figures and provided a correction. After a detailed investigation and analysis of the raw data, concerns remain regarding the integrity of the data and the conclusions of the article. The authors' institution, Tianjin Medical University, has been contacted, and an investigation of this paper from the group of Hua Tang is underway. While the conclusions of this investigation are awaited, the journal has decided to issue this Expression of Concern.",2021-10-26 +34552366,A Survey-Weighted Analytic Hierarchy Process to Quantify Authorship.,"

Background

Authorship is a pinnacle activity in academic medicine that often involves collaboration and a mentor-mentee relationship. The International Committee of Medical Journal Editors criteria for authorship (ICMJEc) are intended to prevent abuses of authorship and are used by more than 5500 medical journals. However, the binary ICMJEc have not yet been quantified.

Aim

To develop a numeric scoring rubric for the ICMJEc to corroborate the authenticity of authorship claims.

Methods

The four ICMJEc were separated into the nine authorship components of conception, design, data acquisition, data analysis, interpretation of data, draft, revision, final approval and accountability. In spring 2021, members of an international association of medical editors rated the importance of each authorship component using an 11-point Likert scale ranging from 0 (no importance) to 10 (most important). The median component scores were used to calibrate the pairwise comparisons in an analytic hierarchy process (AHP). The AHP priority weights were multiplied against a four-level perceived effort/capability grade to calculate an authorship score.

Results

Sixty-six decision-making medical editors completed the survey. The components had the median scores/AHP weights: conception 7.5/5.3%; design 8/8.9%; data acquisition 7/3.6%; data analysis 7/3.6%; interpretation of data 8/8.9%; draft 8/8.9%; revision 8/8.9%; final approval 9/20.1%; and accountability 10/31.8%, with Kruskal-Wallis Chi2 = 65.11, p < 0.001.

Conclusion

The editors rated accountability as the most important component of authorship, followed by the final approval of the manuscript; data acquisition had the lowest median importance score for authorship. The scoring rubric (https://tinyurl.com/eyu86y96) transforms the binary tetrad ICMJEc into 9 quantifiable components of authorship, providing a transparent method to objectively assess authorship contributions, determine authorship order and potentially decrease the abuse of authorship. If desired, individual journals can survey their editorial boards and use the AHP method to derive customized weightings for an ICMJEc-based authorship index.",2021-09-15 +30134653,PADFrag: A Database Built for the Exploration of Bioactive Fragment Space for Drug Discovery.,"Structural analyses of drugs and pesticides can enable the identification of new bioactive compounds with novel and diverse scaffolds as well as improve our understanding of the bioactive fragment space. The Pesticide And Drug Fragments (PADFrag) database is a unique bioinformatic-cheminformatic cross-referencing resource that combines detailed bioactive fragment data and potential targets with a strong focus on quantitative, analytic, and molecular-scale information for the exploration of bioactive fragment space for drug discovery ( http://chemyang.ccnu.edu.cn/ccb/database/PADFrag/ ). The main applications of PADFrag are the analysis of the privileged structures within known bioactive molecules, ab initio molecule library design, and core fragment discovery for fragment-based drug design. Other potential applications include prediction of fragment interactions and general pharmaceutical research.",2018-09-06 +30715274,APID database: redefining protein-protein interaction experimental evidences and binary interactomes. ,"The collection and integration of all the known protein-protein physical interactions within a proteome framework are critical to allow proper exploration of the protein interaction networks that drive biological processes in cells at molecular level. APID Interactomes is a public resource of biological data (http://apid.dep.usal.es) that provides a comprehensive and curated collection of `protein interactomes' for more than 1100 organisms, including 30 species with more than 500 interactions, derived from the integration of experimentally detected protein-to-protein physical interactions (PPIs). We have performed an update of APID database including a redefinition of several key properties of the PPIs to provide a more precise data integration and to avoid false duplicated records. This includes the unification of all the PPIs from five primary databases of molecular interactions (BioGRID, DIP, HPRD, IntAct and MINT), plus the information from two original systematic sources of human data and from experimentally resolved 3D structures (i.e. PDBs, Protein Data Bank files, where more than two distinct proteins have been identified). Thus, APID provides PPIs reported in published research articles (with traceable PMIDs) and detected by valid experimental interaction methods that give evidences about such protein interactions (following the `ontology and controlled vocabulary': www.ebi.ac.uk/ols/ontologies/mi; developed by `HUPO PSI-MI'). Within this data mining framework, all interaction detection methods have been grouped into two main types: (i) `binary' physical direct detection methods and (ii) `indirect' methods. As a result of these redefinitions, APID provides unified protein interactomes including the specific `experimental evidences' that support each PPI, indicating whether the interactions can be considered `binary' (i.e. supported by at least one binary detection method) or not.",2019-01-01 +34529494,A Novel Imputation Approach for Sharing Protected Public Health Data.,"Objectives. To develop an imputation method to produce estimates for suppressed values within a shared government administrative data set to facilitate accurate data sharing and statistical and spatial analyses. Methods. We developed an imputation approach that incorporated known features of suppressed Massachusetts surveillance data from 2011 to 2017 to predict missing values more precisely. Our methods for 35 de-identified opioid prescription data sets combined modified previous or next substitution followed by mean imputation and a count adjustment to estimate suppressed values before sharing. We modeled 4 methods and compared the results to baseline mean imputation. Results. We assessed performance by comparing root mean squared error (RMSE), mean absolute error (MAE), and proportional variance between imputed and suppressed values. Our method outperformed mean imputation; we retained 46% of the suppressed value's proportional variance with better precision (22% lower RMSE and 26% lower MAE) than simple mean imputation. Conclusions. Our easy-to-implement imputation technique largely overcomes the adverse effects of low count value suppression with superior results to simple mean imputation. This novel method is generalizable to researchers sharing protected public health surveillance data. (Am J Public Health. 2021; 111(10):1830-1838. https://doi.org/10.2105/AJPH.2021.306432).",2021-09-16 +34496933,Minhee Analysis Package: an integrated software package for detection and management of spontaneous synaptic events.,"To understand the information encoded in a connection between the neurons, postsynaptic current (PSC) has been widely measured as a primary index of synaptic strength in the field of neurophysiology. Although several automatic detection methods for PSCs have been proposed to simplify a workflow in the analysis, repetitive steps such as quantification and management of PSC data should be still performed with much effort. Here, we present Minhee Analysis Package, an integrated standalone software package that is capable of detecting, sorting, and quantifying PSC data. First, we developed a stepwise exploratory algorithm to detect PSC and validated our detection algorithm using the simulated and experimental data. We also described all the features and examples of the package so that users can use and follow them properly. In conclusion, our software package is expected to improve the convenience and efficiency of neurophysiologists to analyze PSC data by simplifying the workflow from detection to quantification. Minhee Analysis Package is freely available to download from http://www.github.com/parkgilbong/Minhee_Analysis_Pack .",2021-09-08 +33909476,A Systematic Review of Research on Augmentative and Alternative Communication Interventions for Children Aged 6-10 in the Last Decade.,"Purpose The purpose of this systematic review was to identify, appraise, and critically synthesize the latest available evidence on the effects of augmentative and alternative communication (AAC)-based interventions on communication skills in children aged between 6 and 10 years with mixed diagnoses. Method MEDLINE (OVID), PsycINFO (EBSCO), ERIC (ProQuest), SCIELO (WOS), Teacher Reference Center (EBSCO), and Education Database (ProQuest) were searched. The studies were independently selected by two reviewers for the purposes of the review. The methodological quality of the included studies was assessed, and characteristics and results of the studies were extracted. Results This review included 14 studies from a total of 1,204 found through an electronic search. The AAC interventions studied were effective at improving various outcomes in children with mixed diagnoses. Interventions that focused on narrative skills were the most common type. When considering the quality of the studies, the independence of assessors, data analysis, replication, and generalization of interventions were the weaker areas. Conclusions Interventions analyzed in this review improve communication skills, including phonological awareness, vocabulary, requesting, and developing narrative skills in children aged between 6 and 10 years with mixed diagnoses. The results of one study also indicate that the acquisition of skills using an AAC method is superior when the child prefers the method. Supplemental Material https://doi.org/10.23641/asha.14462256.",2021-04-28 +34433316,"First Report of Anthracnose on Camellia japonica Caused by Colletotrichum siamense in Zhejiang Province, China. ","Camellia japonica is an attractive flowering woody plant with great ornamental and medicinal value in China. However, typical anthracnose lesions on the leaves are usually observed in summer in Zhejiang province. A number of 100 trees have been investigated with over 70% of leaf disease incidence. The symptom initially develops from the tip or edge of the leaf and dark green infected spots appear. The diseased spots expand and become yellow brown. The lesions are covered with abundant, small and black acervuli at the center with yellow edges. The diseased leaves become brittle, cracked, and finally fall off. Sixty leaves with typical anthracnose symptoms were sampled from gardens in Lin'an, Zhejiang province. The diseased tissues were cut into pieces and incubated in moist chambers at 25°C. The spore mass was collected using a sterile needle under dissection microscope and put on 2% malt extract agar (MEA). The cultures were incubated at 25°C in the dark for one week. Thirty single spore cultures were obtained and grown on 2% MEA at 25°C for morphological characterization. White aerial mycelia and black conidiomata with orange masses of conidia developed seven days later. Conidia are cylindrical in shape, 12-19 μm, mean lengths ranging from 15.5 ± 1.0 to 16.0 ± 1.2 μm. The morphological characteristics are consistent with those of Colletotrichum species. DNA was extracted from three selected isolates (HT-71, J-5, J-20) for sequencing. The partial regions of ribosomal internal transcribed spacers (ITS), calmodulin (CAL), glyceraldehyde-3-phosphate dehydrogenase (GAPDH), actin gene (ACT), beta-tubulin (TUB2), Apn2-Mat1-2 intergenic spacer and partial mating type gene (ApMat), and glutamine synthetase (GS) were amplified as described by Liu et al. (2015). Sequences of the above seven loci for the selected isolates were obtained, and deposited in the GenBank database (MZ014901 to MZ014905, MZ514915 to MZ514922, MZ514925 to MZ514930, MZ497332 and MZ497333). BLAST results indicate they represent Colletotricum siamense. Multi-locus phylogenetic analysis including ex-type of C. siamense (ICMP18578=CBS130417) and related species was conducted using Maximum Likelihood method, and C. acutatum (CBS 112996) served as the outgroup. The three obtained isolates clustered with the ex-type isolate of C. siamense. Eight leaves on two Camellia plants were inoculated to confirm the pathogenicity in the field. The leaves were surface sprayed with 75% ethanol and dried with sterilized filter paper. The leaves were inoculated using the wound/drop inoculation method: an aliquot of 10 μL of spore suspension (1.0 × 106 conidia per mL) was dropped on the left side of a leaf after wounding once by pin-pricking with a sterilized needle. The sterile water was dropped on the right side of the same leaf in parallel as control. The initial symptoms were observed seven days later, all inoculated leaves developed lesions similar to those observed in the field, and no symptoms observed in the control. The fungus was successfully re-isolated only from lesions inoculated with spore suspension exhibiting morphological characteristics resembling those in C. siamense, and further confirmed with sequence data. To our knowledge, this represents the first report of anthracnose on C. japonica caused by C. siamense worldwide. Confirmation of this pathogen in the region will be helpful for the disease management on C. japonica, considering previous report of C. camelliae-japonicae on the same host. References Fu, M., et al. 2019. Persoonia. 42: 1. https://doi.org/10.3767/persoonia.2019.42.01 Guarnaccia, V., et al. 2017. Persoonia. 39: 32. https://doi.org/10.3767/persoonia.2017.39.02 Hou, L. W., et al. 2016. Mycosphere. 7: 1111. Doi 10.5943/mycosphere/si/2c/4 Liu, F., et al. 2015. Persoonia. 35: 63. http://dx.doi.org/10.3767/003158515X687597 Vieira, A. D. S., et al. 2019. Mol. Phylogenet. Evol. https://doi.org/10.1016/j.ympev.2019.106694.",2021-08-25 +33882119,BC-TFdb: a database of transcription factor drivers in breast cancer. ,"Transcription factors (TFs) are DNA-binding proteins, which regulate many essential biological functions. In several cancer types, TF function is altered by various direct mechanisms, including gene amplification or deletion, point mutations, chromosomal translocations, expression alterations, as well as indirectly by non-coding DNA mutations influencing the binding of the TF. TFs are also actively involved in breast cancer (BC) initiation and progression. Herein, we have developed an open-access database, BC-TFdb (Breast Cancer Transcription Factors database), of curated, non-redundant TF involved in BC. The database provides BC driver TFs related information including genomic sequences, proteomic sequences, structural data, pathway information, mutations information, DNA binding residues, survival and therapeutic resources. The database will be a useful platform for researchers to obtain BC-related TF-specific information. High-quality datasets are downloadable for users to evaluate and develop computational methods for drug designing against BC. Database URL: https://www.dqweilab-sjtu.com/index.php.",2021-04-01 +29575358,ProtaBank: A repository for protein design and engineering data.,"We present ProtaBank, a repository for storing, querying, analyzing, and sharing protein design and engineering data in an actively maintained and updated database. ProtaBank provides a format to describe and compare all types of protein mutational data, spanning a wide range of properties and techniques. It features a user-friendly web interface and programming layer that streamlines data deposition and allows for batch input and queries. The database schema design incorporates a standard format for reporting protein sequences and experimental data that facilitates comparison of results across different data sets. A suite of analysis and visualization tools are provided to facilitate discovery, to guide future designs, and to benchmark and train new predictive tools and algorithms. ProtaBank will provide a valuable resource to the protein engineering community by storing and safeguarding newly generated data, allowing for fast searching and identification of relevant data from the existing literature, and exploring correlations between disparate data sets. ProtaBank invites researchers to contribute data to the database to make it accessible for search and analysis. ProtaBank is available at https://protabank.org.",2018-04-30 +32990755,"CSVS, a crowdsourcing database of the Spanish population genetic variability.","The knowledge of the genetic variability of the local population is of utmost importance in personalized medicine and has been revealed as a critical factor for the discovery of new disease variants. Here, we present the Collaborative Spanish Variability Server (CSVS), which currently contains more than 2000 genomes and exomes of unrelated Spanish individuals. This database has been generated in a collaborative crowdsourcing effort collecting sequencing data produced by local genomic projects and for other purposes. Sequences have been grouped by ICD10 upper categories. A web interface allows querying the database removing one or more ICD10 categories. In this way, aggregated counts of allele frequencies of the pseudo-control Spanish population can be obtained for diseases belonging to the category removed. Interestingly, in addition to pseudo-control studies, some population studies can be made, as, for example, prevalence of pharmacogenomic variants, etc. In addition, this genomic data has been used to define the first Spanish Genome Reference Panel (SGRP1.0) for imputation. This is the first local repository of variability entirely produced by a crowdsourcing effort and constitutes an example for future initiatives to characterize local variability worldwide. CSVS is also part of the GA4GH Beacon network. CSVS can be accessed at: http://csvs.babelomics.org/.",2021-01-01 +34623882,"Rebuilding a US Federal Data Strategy After the End of the ""Community Health Status Indicators"".","For nearly 2 decades, the Community Health Status Indicators tool reliably supplied communities with standardized, local health data and the capacity for peer-community comparisons. At the same time, it created a large community of users who shared learning in addressing local health needs. The tool survived a transition from the Health Resources and Services Administration to the Centers for Disease Control and Prevention before being shuttered in 2017. While new community data tools have come online, nothing has replaced Community Health Status Indicators, and many stakeholders continue to clamor for something new that will enable local health needs assessments, peer comparisons, and creation of a community of solutions. The National Committee on Vital and Health Statistics heard from many stakeholders that they still need a replacement data source. (Am J Public Health. 2021;111(10):1865-1873. https://doi.org/10.2105/AJPH.2021.306437).",2021-10-01 +33901273,Protlego: A Python package for the analysis and design of chimeric proteins. ,"Duplication and recombination of protein fragments have led to the highly diverse protein space that we observe today. By mimicking this natural process, the design of protein chimeras via fragment recombination has proven experimentally successful and has opened a new era for the design of customizable proteins. The in-silico building of structural models for these chimeric proteins, however, remains a manual task that requires a considerable degree of expertise and is not amenable for high-throughput studies. Energetic and structural analysis of the designed proteins often require the use of several tools, each with their unique technical difficulties and available in different programming languages or web servers. We implemented a Python package that enables automated, high-throughput design of chimeras and their structural analysis. First, it fetches evolutionarily conserved fragments from a built-in database (also available at fuzzle.uni-bayreuth.de). These relationships can then be represented via networks or further selected for chimera construction via recombination. Designed chimeras or natural proteins are then scored and minimised with the Charmm and Amber forcefields and their diverse structural features can be analysed at ease. Here, we showcase Protlego's pipeline by exploring the relationships between the P-loop and Rossmann superfolds, building and characterising their offspring chimeras. We believe that Protlego provides a powerful new tool for the protein design community. Protlego runs on the Linux platform and is freely available at (https://hoecker-lab.github.io/protlego/) with tutorials and documentation and runs on Linux OS. Supplementary data are available at Bioinformatics online.",2021-04-26 +34788788,bollito: a flexible pipeline for comprehensive single-cell RNA-seq analyses. ,"bollito is an automated, flexible and parallelizable computational pipeline for the comprehensive analysis of single-cell RNA-seq data. Starting from FASTQ files or pre-processed expression matrices, bollito performs both basic and advanced tasks in single-cell analysis integrating >30 state-of-the-art tools. This includes quality control, read alignment, dimensionality reduction, clustering, cell-marker detection, differential expression, functional analysis, trajectory inference and RNA velocity. bollito is built using the Snakemake workflow management system, which easily connects each execution step and facilitates the reproducibility of results. bollito's modular design makes it easy to incorporate other packages into the pipeline enabling its expansion with new functionalities. Source code is freely available at https://gitlab.com/bu_cnio/bollito under the MIT license. Supplementary data are available at Bioinformatics online.",2021-11-12 +33615481,Knowing your neighbours: How memory-mediated conspecific avoidance influences home ranges.,"In Focus: Ellison, N., Hatchwell, B. J., Biddiscombe, S. J., Napper, C. J., & Potts, J. R. (2020). Mechanistic home range analysis reveals drivers of space use patterns for a non-territorial passerine. Journal of Animal Ecology. https://doi.org/10.1111/1365-2656.13292. Most animals for which space use has been studied restrict their movements into a constrained spatial area: their home range. The ubiquity of this space-use pattern suggests that home ranges are adaptive in a wide range of ecological contexts, and that they likely arise from general biological mechanisms. In this issue, Ellison et al. use a mechanistic home range analysis (MHRA) to uncover the drivers underlying home range patterns in a passerine that is non-territorial. They show that a model integrating both resource preferences (specifically, an attraction to woodland centre), and memory-mediated conspecific avoidance can capture the space-use patterns observed in a wild population of long-tailed tits Aegithalos caudatus. In doing so, their analysis extends the applicability of MHRA to capturing and predicting home range patterns beyond the previously studied cases where spatially exclusive home ranges emerge from scent mark-mediated avoidance responses to neighbouring groups.",2020-12-01 +,Cover Image: Metronomic chemotherapy of cyclophosphamide plus methotrexate for advanced breast cancer: Real‐world data analyses and experience of one center,"The cover image is based on the Original Article Metronomic chemotherapy of cyclophosphamide plus methotrexate in advanced breast cancer: Real‐world data and experience of one center (CHJC‐D‐19‐00380) by Shusen Wang et al., https://doi.org/10.1002/cac2.12029.",2020-05-01 +29069510,Lnc2Meth: a manually curated database of regulatory relationships between long non-coding RNAs and DNA methylation associated with human disease.,"Lnc2Meth (http://www.bio-bigdata.com/Lnc2Meth/), an interactive resource to identify regulatory relationships between human long non-coding RNAs (lncRNAs) and DNA methylation, is not only a manually curated collection and annotation of experimentally supported lncRNAs-DNA methylation associations but also a platform that effectively integrates tools for calculating and identifying the differentially methylated lncRNAs and protein-coding genes (PCGs) in diverse human diseases. The resource provides: (i) advanced search possibilities, e.g. retrieval of the database by searching the lncRNA symbol of interest, DNA methylation patterns, regulatory mechanisms and disease types; (ii) abundant computationally calculated DNA methylation array profiles for the lncRNAs and PCGs; (iii) the prognostic values for each hit transcript calculated from the patients clinical data; (iv) a genome browser to display the DNA methylation landscape of the lncRNA transcripts for a specific type of disease; (v) tools to re-annotate probes to lncRNA loci and identify the differential methylation patterns for lncRNAs and PCGs with user-supplied external datasets; (vi) an R package (LncDM) to complete the differentially methylated lncRNAs identification and visualization with local computers. Lnc2Meth provides a timely and valuable resource that can be applied to significantly expand our understanding of the regulatory relationships between lncRNAs and DNA methylation in various human diseases.",2018-01-01 +32805023,Mechanistic insights into SARS-CoV-2 epidemic via revealing the features of SARS-CoV-2 coding proteins and host responses upon its infection.,"

Summary

There are seven known coronaviruses that infect humans: four mild coronaviruses, including HCoV-229E, HCoV-OC43, HCoV-NL63 and HCoV-HKU1, only cause mild respiratory diseases, and three severe coronaviruses, including SARS-CoV, MERS-CoV and SARS-CoV-2, can cause severe respiratory diseases even death of infected patients. Both infection and death caused by SARS-CoV-2 are still rapidly increasing worldwide. In this study, we demonstrate that viral coding proteins of SARS-CoV-2 have distinct features and are most, medium and least conserved with SARS-CoV, MERS-CoV and the rest four mild coronaviruses (HCoV-229E, HCoV-OC43, HCoV-NL63 and HCoV-HKU1), respectively. Moreover, expression of host responsive genes (HRG), HRG-enriched biological processes and HRG-enriched KEGG pathways upon infection of SARS-CoV-2 shows slightly overlapping with SARS-CoV and MERS-CoV but distinctive to the four mild coronaviruses. Interestingly, enrichment of overactivation of neutrophil by HRGs is only and commonly found in infections of severe SARS-CoV-2, SARS-CoV and MERS-CoV but not in the other four mild coronaviruses, and the related gene networks show different patterns. Clinical data support that overactivation of neutrophil for severe patients can be one major factor for the similar clinical symptoms observed in SARS-CoV-2 infection compared to infections of the other two severe coronavirus (SARS-CoV and MERS-CoV). Taken together, our study provides a mechanistic insight into SARS-CoV-2 epidemic via revealing the conserved and distinct features of SARS-CoV-2, raising the critical role of dysregulation of neutrophil for SARS-CoV-2 infection.

Availability and implementation

All data sources and analysis methods related to this manuscript are available in the methods, supplementary materials and GEO database (https://www.ncbi.nlm.nih.gov/geo/).

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-01-01 +31086734,OsteoporosAtlas: a human osteoporosis-related gene database.,"

Background

Osteoporosis is a common, complex disease of bone with a strong heritable component, characterized by low bone mineral density, microarchitectural deterioration of bone tissue and an increased risk of fracture. Due to limited drug selection for osteoporosis and increasing morbidity, mortality of osteoporotic fractures, osteoporosis has become a major health burden in aging societies. Current researches for identifying specific loci or genes involved in osteoporosis contribute to a greater understanding of the pathogenesis of osteoporosis and the development of better diagnosis, prevention and treatment strategies. However, little is known about how most causal genes work and interact to influence osteoporosis. Therefore, it is greatly significant to collect and analyze the studies involved in osteoporosis-related genes. Unfortunately, the information about all these osteoporosis-related genes is scattered in a large amount of extensive literature. Currently, there is no specialized database for easily accessing relevant information about osteoporosis-related genes and miRNAs.

Methods

We extracted data from literature abstracts in PubMed by text-mining and manual curation. Moreover, a local MySQL database containing all the data was developed with PHP on a Windows server.

Results

OsteoporosAtlas (http://biokb.ncpsb.org/osteoporosis/), the first specialized database for easily accessing relevant information such as osteoporosis-related genes and miRNAs, was constructed and served for researchers. OsteoporosAtlas enables users to retrieve, browse and download osteoporosis-related genes and miRNAs. Gene ontology and pathway analyses were integrated into OsteoporosAtlas. It currently includes 617 human encoding genes, 131 human non-coding miRNAs, and 128 functional roles. We think that OsteoporosAtlas will be an important bioinformatics resource to facilitate a better understanding of the pathogenesis of osteoporosis and developing better diagnosis, prevention and treatment strategies.",2019-04-26 +33894373,Comparative database search engine analysis on massive tandem mass spectra of pork-based food products for halal proteomics.,"Mass spectrometry-based proteomics relies on dedicated software for peptide and protein identification. These software include open-source or commercial-based search engines; wherein, they employ different algorithms to establish their scoring and identified proteins. Although previous comparative studies have differentiated the proteomics results from different software, there are still yet studies specifically been conducted to compare and evaluate the search engine in the field of halal analysis. This is important because the halal analysis is often using commercial meat samples that have been subjected to various processing, further complicating its analysis. Thus, this study aimed to assess three open-source search engines (Comet, X! Tandem, and ProteinProspector) and a commercial-based search engine (ProteinPilot™) against 135 raw tandem mass spectrometry data files from 15 types of pork-based food products for halal analysis. Each database search engine contained high false-discovery rate (FDR); however, a post-searching algorithm called PeptideProphet managed to reduce the FDR, except for ProteinProspector and ProteinPilot™. From this study, the combined database search engine (executed by iProphet) reveals a thorough protein list for pork-based food products; wherein the most abundant proteins are myofibrillar proteins. Thus, this proteomics study will aid the identification of potential peptide and protein biomarkers for future precision halal analysis. SIGNIFICANCE: A critical challenge of halal proteomics is the availability of a database to confirm the inferential peptides as well as proteins. Currently, the established database such as UniProtKB is related to animal proteome; however, the halal proteomics is related to the highly processed meat-based food products. This study highlights the use of different database search engines (Comet, X! Tandem, ProteinProspector, and ProteinPilot™) and their respective algorithms to analyse 135 raw tandem mass spectrometry data files from 15 types of pork-based food products. This is the first attempt that has compared different database search engines in the context of halal proteomics to ensure the effectiveness of controlling the FDR. Previous studies were just focused on the advantages of a certain algorithm over another. Moreover, other previous studies also have mainly reported the use of mass spectrometry-based shotgun proteomics for meat authentication (the most similar field to halal analysis), but none of the studies were reported on halal aspects that used samples originated from highly processed food products. Hence, a systematic comparative study is duly needed for a more comprehensive and thorough proteomics analysis for such samples. In this study, our combinatorial approach for halal proteomics results from the different search engines used (Comet, X! Tandem, and ProteinProspector) has successfully generated a comprehensive spectral library for the pork-based meat products. This combined spectral library is freely available at https://data.mendeley.com/datasets/6dmm8659rm/3. Thus far, this is the first and new attempt at establishing a spectral library for halal proteomics. We also believe this study is a pioneer for halal proteomics that aimed at non-conventional and non-model organism proteomics, protein analytics, protein bioinformatics, and potential biomarker discovery.",2021-04-21 +31665454,ChlamDB: a comparative genomics database of the phylum Chlamydiae and other members of the Planctomycetes-Verrucomicrobiae-Chlamydiae superphylum.,"ChlamDB is a comparative genomics database containing 277 genomes covering the entire Chlamydiae phylum as well as their closest relatives belonging to the Planctomycetes-Verrucomicrobiae-Chlamydiae (PVC) superphylum. Genomes can be compared, analyzed and retrieved using accessions numbers of the most widely used databases including COG, KEGG ortholog, KEGG pathway, KEGG module, Pfam and InterPro. Gene annotations from multiple databases including UniProt (curated and automated protein annotations), KEGG (annotation of pathways), COG (orthology), TCDB (transporters), STRING (protein-protein interactions) and InterPro (domains and signatures) can be accessed in a comprehensive overview page. Candidate effectors of the Type III secretion system (T3SS) were identified using four in silico methods. The identification of orthologs among all PVC genomes allows users to perform large-scale comparative analyses and to identify orthologs of any protein in all genomes integrated in the database. Phylogenetic relationships of PVC proteins and their closest homologs in RefSeq, comparison of transmembrane domains and Pfam domains, conservation of gene neighborhood and taxonomic profiles can be visualized using dynamically generated graphs, available for download. As a central resource for researchers working on chlamydia, chlamydia-related bacteria, verrucomicrobia and planctomyces, ChlamDB facilitates the access to comprehensive annotations, integrates multiple tools for comparative genomic analyses and is freely available at https://chlamdb.ch/. Database URL: https://chlamdb.ch/.",2020-01-01 +33761699,Increased bleeding events with the addition of apixaban to the dual anti-platelet regimen for the treatment of patients with acute coronary syndrome: A meta-analysis.,"

Background

Dual anti-platelet therapy (DAPT) with aspirin and clopidogrel has been the mainstay of treatment for patients with acute coronary syndrome (ACS). However, the recurrence of thrombotic events, potential aspirin and clopidogrel hypo-responsiveness, and other limitations of DAPT have led to the development of newer oral anti-thrombotic drugs. Apixaban, a new non-vitamin K antagonist, has been approved for use. In this meta-analysis, we aimed to compare the bleeding outcomes observed with the addition of apixaban to DAPT for the treatment of patients with ACS.

Methods

Online databases including EMBASE, Cochrane Central, http://www.ClinicalTrials.gov, MEDLINE and Web of Science were searched for English based publications comparing the use of apixaban added to DAPT for the treatment of patients with ACS. Different categories of bleeding events and cardiovascular outcomes were assessed. The analysis was carried out by the RevMan software version 5.4. Odds ratios (OR) with 95% confidence intervals (CI) were used to represent the data following analysis.

Results

This research analysis consisted of 4 trials with a total number of 9010 participants. Thrombolysis in myocardial infarction (TIMI) defined major bleeding (OR: 2.45, 95% CI: 1.45-4.12; P = .0008), TIMI defined minor bleeding (OR: 3.12, 95% CI: 1.71-5.70; P = .0002), International society of thrombosis and hemostasis (ISTH) major bleeding (OR: 2.49, 95% CI: 1.80-3.45; P = .00001) and Global Use of Strategies to Open Occluded Arteries (GUSTO) defined severe bleeding (OR: 3.00, 95% CI: 1.56-5.78; P = .01) were significantly increased with the addition of apixaban to DAPT versus DAPT alone in these patients with ACS. However fatal bleeding (OR: 10.96, 95% CI: 0.61-198.3; P = .11) was not significantly different.

Conclusions

Addition of the novel oral anticoagulant apixaban to the DAPT regimen significantly increased bleeding and therefore did not show any beneficial effect in these patients with ACS. However, due to the extremely limited data, we apparently have to rely on future larger studies to confirm this hypothesis.",2021-03-01 +34875998,SpinSPJ: a novel NMR scripting system to implement artificial intelligence and advanced applications.,"

Background

Software for nuclear magnetic resonance (NMR) spectrometers offer general functionality of instrument control and data processing; these applications are often developed with non-scripting languages. NMR users need to flexibly integrate rapidly developing NMR applications with emerging technologies. Scripting systems offer open environments for NMR users to write custom programs. However, existing scripting systems have limited capabilities for both extending the functionality of NMR software's non-script main program and using advanced native script libraries to support specialized application domains (e.g., biomacromolecules and metabolomics). Therefore, it is essential to design a novel scripting system to address both of these needs.

Result

Here, a novel NMR scripting system named SpinSPJ is proposed. It works as a plug-in in the Java based NMR spectrometer software SpinStudioJ. In the scripting system, both Java based NMR methods and original CPython based libraries are supported. A module has been developed as a bridge to integrate the runtime environments of Java and CPython. The module works as an extension in the CPython environment and interacts with Java via the Java Native Interface. Leveraging this bridge, Java based instrument control and data processing methods of SpinStudioJ can be called with the CPython style. Compared with traditional scripting systems, SpinSPJ better supports both extending the non-script main program and implementing advanced NMR applications with a rich variety of script libraries. NMR researchers can easily call functions of instrument control and data processing as well as developing complex functionality (such as multivariate statistical analysis, deep learning, etc.) with CPython native libraries.

Conclusion

SpinSPJ offers a user-friendly environment to implement custom functionality leveraging its powerful basic NMR and rich CPython libraries. NMR applications with emerging technologies can be easily integrated. The scripting system is free of charge and can be downloaded by visiting http://www.spinstudioj.net/spinspj .",2021-12-07 +,PSVIII-2 Dietary supplementation of Bacillus subtilis altered metabolites in the intestine of weaned pigs,"Abstract This experiment aimed to investigate the effects of dietary supplementation of Bacillus subtilis probiotics on metabolites in the intestines of weaned pigs experimentally infected with an enterotoxigenic Escherichia coli (E. coli). Forty-eight weaned pigs (6.17 ± 0.36 kg BW) were individually housed in disease containment rooms and randomly allotted to one of four dietary treatments: negative control (NC, control diet without E. coli challenge), positive control (PC, control diet with E. coli challenge), and supplementation of 50 mg/kg of carbadox or 500 mg/kg of Bacillus subtilis probiotics. The experiment lasted 28 days with 7 days before and 21 days after the first E. coli inoculation. The F18 E. coli were given to pigs at 1010 CFU/3 mL dose for three consecutive days. At the end of the experiment, all pigs were euthanized to collect ileal mucosa and colon digesta for the analysis of metabolomic profiles by gas chromatography time of flight-mass spectrometer (GCTOF-MS). All data were analyzed by an online MetaboAnalyst tool (https://www.metaboanalyst.ca/). Statistical significance was declared at P < 0.05 and the false discovery rate–adjusted P value (q value) < 0.20. A total of 282 (141 identified and 121 unidentified) and 196 (127 identified and 69 unidentified) metabolites were detected in ileal mucosa and colon digesta, respectively. Forty-nine identified metabolites in ileal mucosa significantly differed among experimental groups (P < 0.05; q < 0.20). The most impacted metabolic pathways were galactose metabolism, aspartate and glutamate metabolism, fructose and mannose degradation, pentose phosphate pathway, and urea cycle. However, in colon digesta, only 7 identified metabolites differed among experimental groups (P < 0.05; q < 0.20) and the majority of them were involved in purine metabolism. Results of metabolomics indicated that supplementation of Bacillus subtilis or antibiotics altered metabolites in the intestines of weaned pigs. In particular, more treatment impacts were observed in the metabolite profiles in ileal mucosa compared with colon digesta.",2020-11-01 +32103267,Predicted Drosophila Interactome Resource and web tool for functional interpretation of differentially expressed genes. ,"Drosophila melanogaster is a well-established model organism that is widely used in genetic studies. This species enjoys the availability of a wide range of research tools, well-annotated reference databases and highly similar gene circuitry to other insects. To facilitate molecular mechanism studies in Drosophila, we present the Predicted Drosophila Interactome Resource (PDIR), a database of high-quality predicted functional gene interactions. These interactions were inferred from evidence in 10 public databases providing information for functional gene interactions from diverse perspectives. The current version of PDIR includes 102 835 putative functional associations with balanced sensitivity and specificity, which are expected to cover 22.56% of all Drosophila protein interactions. This set of functional interactions is a good reference for hypothesis formulation in molecular mechanism studies. At the same time, these interactions also serve as a high-quality reference interactome for gene set linkage analysis (GSLA), which is a web tool for the interpretation of the potential functional impacts of a set of changed genes observed in transcriptomics analyses. In a case study, we show that the PDIR/GSLA system was able to produce a more comprehensive and concise interpretation of the collective functional impact of multiple simultaneously changed genes compared with the widely used gene set annotation tools, including PANTHER and David. PDIR and its associated GSLA service can be accessed at http://drosophila.biomedtzc.cn.",2020-01-01 +,Using the Galaxy Training Network tutorial library for bioinformatics training programs,"Does your facility do bioinformatics training? Would your facility like to do bioinformatics training? The Galaxy Training Network library (https://training.galaxyproject.org/) is an easy way to offer bioinformatics training at your facility with minimal preparation time and startup cost. These materials feature slides, hands-on-tutorials, and training data sets. The library features well over 100 slide sets and hands-on tutorials, created by over 130 Galaxy community members, and covering a wide range of bioinformatics topics. Galaxy (https://galaxyproject.org) is a widely adopted platform for bioinformatics analysis and training, allowing trainers, learners, and researchers to focus on concepts and tools, rather than Linux systems administration and learning command line interfaces. The library of materials is free to use and adapt as needed. The GTN community is supportive and responsive to community needs. All materials are kept in GitHub and are managed in a transparent, community driven manner.",2020-08-01 +34793257,Opportunities to Enhance Children's Communication Development at School in Underserved Communities.,"

Purpose

Alternative service delivery approaches are required to provide support for children with communication difficulties in underserved communities. Schools have a unique set of assets that can be utilized to provide this support. This study explored what the education sector and classrooms in the early years of schooling offer as support for children with communication difficulties in an underserved Majority World country, the Maldives. The objective was to identify opportunities to enhance support provided for these children.

Method

A qualitative multimethod approach was used involving (a) 520 min of classroom observational data from four remote schools, (b) interviews with four special education needs teachers, and (c) an interview with a Ministry of Education official. Classroom observational data were analyzed using the Communication Supporting Classroom Observation Tool. Deductive content analysis was used to analyze the interview data.

Results

The support system aimed to reflect the Inclusive Education Policy of the Maldives. The Ministry of Education official and teachers raised concerns regarding lack of allied health services such as speech-language therapy in schools. Teachers frequently used certain communication supporting interactions such as imitation in classrooms. Missed opportunities to enhance communication were observed, including limited use of some interaction features such as modeling language, limited planned opportunities for children to interact in class, and limited resources in the environment to develop communication in Grade 1 and 2 compared to preschool.

Conclusion

Findings suggest building capacity among teachers and training teachers on identified classroom communication support areas to enhance support for children with communication difficulties.

Supplemental material

https://doi.org/10.23641/asha.17003980.",2021-11-18 +34745946,SMRT: Randomized Data Transformation for Cancer Subtyping and Big Data Analysis.,"Cancer is an umbrella term that includes a range of disorders, from those that are fast-growing and lethal to indolent lesions with low or delayed potential for progression to death. The treatment options, as well as treatment success, are highly dependent on the correct subtyping of individual patients. With the advancement of high-throughput platforms, we have the opportunity to differentiate among cancer subtypes from a holistic perspective that takes into consideration phenomena at different molecular levels (mRNA, methylation, etc.). This demands powerful integrative methods to leverage large multi-omics datasets for a better subtyping. Here we introduce Subtyping Multi-omics using a Randomized Transformation (SMRT), a new method for multi-omics integration and cancer subtyping. SMRT offers the following advantages over existing approaches: (i) the scalable analysis pipeline allows researchers to integrate multi-omics data and analyze hundreds of thousands of samples in minutes, (ii) the ability to integrate data types with different numbers of patients, (iii) the ability to analyze un-matched data of different types, and (iv) the ability to offer users a convenient data analysis pipeline through a web application. We also improve the efficiency of our ensemble-based, perturbation clustering to support analysis on machines with memory constraints. In an extensive analysis, we compare SMRT with eight state-of-the-art subtyping methods using 37 TCGA and two METABRIC datasets comprising a total of almost 12,000 patient samples from 28 different types of cancer. We also performed a number of simulation studies. We demonstrate that SMRT outperforms other methods in identifying subtypes with significantly different survival profiles. In addition, SMRT is extremely fast, being able to analyze hundreds of thousands of samples in minutes. The web application is available at http://SMRT.tinnguyen-lab.com. The R package will be deposited to CRAN as part of our PINSPlus software suite.",2021-10-20 +33045721,ViruSurf: an integrated database to investigate viral sequences.,"ViruSurf, available at http://gmql.eu/virusurf/, is a large public database of viral sequences and integrated and curated metadata from heterogeneous sources (RefSeq, GenBank, COG-UK and NMDC); it also exposes computed nucleotide and amino acid variants, called from original sequences. A GISAID-specific ViruSurf database, available at http://gmql.eu/virusurf_gisaid/, offers a subset of these functionalities. Given the current pandemic outbreak, SARS-CoV-2 data are collected from the four sources; but ViruSurf contains other virus species harmful to humans, including SARS-CoV, MERS-CoV, Ebola and Dengue. The database is centered on sequences, described from their biological, technological and organizational dimensions. In addition, the analytical dimension characterizes the sequence in terms of its annotations and variants. The web interface enables expressing complex search queries in a simple way; arbitrary search queries can freely combine conditions on attributes from the four dimensions, extracting the resulting sequences. Several example queries on the database confirm and possibly improve results from recent research papers; results can be recomputed over time and upon selected populations. Effective search over large and curated sequence data may enable faster responses to future threats that could arise from new viruses.",2021-01-01 +33305318,PED in 2021: a major update of the protein ensemble database for intrinsically disordered proteins.,"The Protein Ensemble Database (PED) (https://proteinensemble.org), which holds structural ensembles of intrinsically disordered proteins (IDPs), has been significantly updated and upgraded since its last release in 2016. The new version, PED 4.0, has been completely redesigned and reimplemented with cutting-edge technology and now holds about six times more data (162 versus 24 entries and 242 versus 60 structural ensembles) and a broader representation of state of the art ensemble generation methods than the previous version. The database has a completely renewed graphical interface with an interactive feature viewer for region-based annotations, and provides a series of descriptors of the qualitative and quantitative properties of the ensembles. High quality of the data is guaranteed by a new submission process, which combines both automatic and manual evaluation steps. A team of biocurators integrate structured metadata describing the ensemble generation methodology, experimental constraints and conditions. A new search engine allows the user to build advanced queries and search all entry fields including cross-references to IDP-related resources such as DisProt, MobiDB, BMRB and SASBDB. We expect that the renewed PED will be useful for researchers interested in the atomic-level understanding of IDP function, and promote the rational, structure-based design of IDP-targeting drugs.",2021-01-01 +31429284,One Thousand and One Software for Proteomics: Tales of the Toolmakers of Science.,"Proteomics is a highly dynamic field driven by frequent introduction of new technological approaches, leading to high demand for new software tools and the concurrent development of many methods for data analysis, processing, and storage. The rapidly changing landscape of proteomics software makes finding a tool fit for a particular purpose a significant challenge. The comparison of software and the selection of tools capable to perform a certain operation on a given type of data rely on their detailed annotation using well-defined descriptors. However, finding accurate information including tool input/output capabilities can be challenging and often heavily depends on manual curation efforts. This is further hampered by a rather low half-life of most of the tools, thus demanding the maintenance of a resource with updated information about the tools. We present here our approach to curate a collection of 189 software tools with detailed information about their functional capabilities. We furthermore describe our efforts to reach out to the proteomics community for their engagement, which further increased the catalog to >750 tools being about 70% of the estimated number of 1097 tools existing for proteomics data analysis. Descriptions of all annotated tools are available at  https://proteomics.bio.tools.",2019-08-29 +33898816,FermFooDb: A database of bioactive peptides derived from fermented foods.,"Globally fermented foods are in demands due to their functional and nutritional benefits. These foods are sources of probiotic organisms and bioactive peptides, various amino acids, enzymes etc. that provides numerous health benefits. FermFooDb (https://webs.iiitd.edu.in/raghava/fermfoodb/) is a manually curated database of bioactive peptides derived from wide range of foods that maintain comprehensive information about peptides and process of fermentation. This database comprises of 2205 entries with following major fields, peptide sequence, Mass and IC50, food source, functional activity, fermentation conditions, starter culture, testing conditions of sequences in vitro or in vivo, type of model and method of analysis. The bioactive peptides in our database have wide range of therapeutic potentials that includes antihypertensive, ACE-inhibitory, antioxidant, antimicrobial, immunomodulatory and cholesterol lowering peptides. These bioactive peptides were derived from different types of fermented foods that include milk, cheese, yogurt, wheat and rice. Numerous, web-based tools have been integrated to retrieve data, peptide mapping of proteins, similarity search and multiple-sequence alignment. This database will be useful for the food industry and researchers to explore full therapeutic potential of fermented foods from specific cultures.",2021-04-08 +33836063,MendelVar: gene prioritization at GWAS loci using phenotypic enrichment of Mendelian disease genes.,"

Motivation

Gene prioritization at human GWAS loci is challenging due to linkage-disequilibrium and long-range gene regulatory mechanisms. However, identifying the causal gene is crucial to enable identification of potential drug targets and better understanding of molecular mechanisms. Mapping GWAS traits to known phenotypically relevant Mendelian disease genes near a locus is a promising approach to gene prioritization.

Results

We present MendelVar, a comprehensive tool that integrates knowledge from four databases on Mendelian disease genes with enrichment testing for a range of associated functional annotations such as Human Phenotype Ontology, Disease Ontology and variants from ClinVar. This open web-based platform enables users to strengthen the case for causal importance of phenotypically matched candidate genes at GWAS loci. We demonstrate the use of MendelVar in post-GWAS gene annotation for type 1 diabetes, type 2 diabetes, blood lipids and atopic dermatitis.

Availability and implementation

MendelVar is freely available at https://mendelvar.mrcieu.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +28110602,SITEX 2.0: Projections of protein functional sites on eukaryotic genes. Extension with orthologous genes.,"Functional sites define the diversity of protein functions and are the central object of research of the structural and functional organization of proteins. The mechanisms underlying protein functional sites emergence and their variability during evolution are distinguished by duplication, shuffling, insertion and deletion of the exons in genes. The study of the correlation between a site structure and exon structure serves as the basis for the in-depth understanding of sites organization. In this regard, the development of programming resources that allow the realization of the mutual projection of exon structure of genes and primary and tertiary structures of encoded proteins is still the actual problem. Previously, we developed the SitEx system that provides information about protein and gene sequences with mapped exon borders and protein functional sites amino acid positions. The database included information on proteins with known 3D structure. However, data with respect to orthologs was not available. Therefore, we added the projection of sites positions to the exon structures of orthologs in SitEx 2.0. We implemented a search through database using site conservation variability and site discontinuity through exon structure. Inclusion of the information on orthologs allowed to expand the possibilities of SitEx usage for solving problems regarding the analysis of the structural and functional organization of proteins. Database URL: http://www-bionet.sscc.ru/sitex/ .",2017-01-23 +33979320,Decreased HLF Expression Predicts Poor Survival in Lung Adenocarcinoma.,"BACKGROUND Lung adenocarcinoma (LUAD) is a type of non-small cell carcinoma. Its pathogenesis is being explored and there is no cure for the disease. MATERIAL AND METHODS The Gene Expression Omnibus (GEO) was searched to obtain data on expression of messenger RNA. GEO2R, an interactive web tool, was used to calculate the differentially expressed genes (DEGs) in LUAD. All the DEGs from different datasets were imported into VENNY 2.1 (https://bioinfogp.cnb.csic.es/tools/venny/index.html) to identify the intersection of the DEGs. An online analysis tool, the Database for Annotation, Visualization, and Integrated Discovery (DAVID), was used to help understand the biological meaning of DEG enrichment in LUAD. Cytoscape 3.7.2 was used to perform centrality analysis and visualize hub genes and related networks. Furthermore, the prognostic value of the hub genes was evaluated with the Kaplan-Meier plotter survival analysis tool. RESULTS The GEO database was used to obtain RNA sequencing information for LUAD and normal tissue from the GSE118370, GSE136043, and GSE140797 datasets. A total of 376 DEGs were identified from GSE118370, 248 were identified from GSE136403, and 718 DEGs were identified from GSE140797. The 10 genes with the highest degrees of expression - the hub genes - were CAV1, TEK, SLIT2, RHOJ, DGSX, HLF, MEIS1, PTPRD, FOXF1, and ADRB2. In addition, Kaplan-Meier survival evaluation showed that CAV1, TEK, SLIT2, HLF, MEIS1, PTPRD, FOXF1, and ADRB2 were associated with favorable outcomes for LUAD. CONCLUSIONS CAV1, TEK, SLIT2, HLF, MEIS1, PTPRD, FOXF1, and ADRB2 are hub genes in the DEG interaction network for LUAD and are involved in the development of and prognosis for the disease. The mechanisms underlying these genes should be the subject of further studies.",2021-05-12 +32695214,A data workflow to support plant breeding decisions from a terrestrial field-based high-throughput plant phenotyping system.,"Field-based high-throughput plant phenotyping (FB-HTPP) has been a primary focus for crop improvement to meet the demands of a growing population in a changing environment. Over the years, breeders, geneticists, physiologists, and agronomists have been able to improve the understanding between complex dynamic traits and plant response to changing environmental conditions using FB-HTPP. However, the volume, velocity, and variety of data captured by FB-HTPP can be problematic, requiring large data stores, databases, and computationally intensive data processing pipelines. To be fully effective, FB-HTTP data workflows including applications for database implementation, data processing, and data interpretation must be developed and optimized. At the US Arid Land Agricultural Center in Maricopa Arizona, USA a data workflow was developed for a terrestrial FB-HTPP platform that utilized a custom Python application and a PostgreSQL database. The workflow developed for the HTPP platform enables users to capture and organize data and verify data quality before statistical analysis. The data from this platform and workflow were used to identify plant lodging and heat tolerance, enhancing genetic gain by improving selection accuracy in an upland cotton breeding program. An advantage of this platform and workflow was the increased amount of data collected throughout the season, while a main limitation was the start-up cost.",2020-07-16 +33465344,Elucidating the influence of environmentally relevant toxic metal mixture on molecular mechanisms involved in the development of neurodegenerative diseases: In silico toxicogenomic data-mining.,"This in silico toxicogenomic analysis aims to: (i) testify the hypothesis about the influence of the environmentally relevant toxic metals (lead, methylmercury (organic form of mercury), cadmium and arsenic) on molecular mechanisms involved in amyotrophic lateral sclerosis (ALS), Parkinson's Disease (PD) and Alzheimer's disease (AD) development; and (ii) demonstrate the capability of in silico toxicogenomic data-mining for distinguishing the probable mechanisms of mixture-induced toxic effects. The Comparative Toxicogenomics Database (CTD; http://ctd. mdibl.org) and Cytoscape software were used as the main data-mining tools in this analysis. The results have shown that there were 7, 13 and 14 common genes for all the metals present in the mixture for each of the selected neurodegenerative disease (ND), respectively: ALS, PD and AD. Physical interactions (68.18%) were the most prominent interactions between the genes extracted for ALS, co-expression (60.85%) for PD and interactions predicted by the server (44.30%) for AD. SOD2 gene was noted as the mutual gene for all the selected ND. Oxidative stress, folate metabolism, vitamin B12, AGE-RAGE, apoptosis were noted as the key disrupted molecular pathways that contribute to the neurodegenerative disease's development. Gene ontology analysis revealed biological processes affected by the investigated mixture (glutathione metabolic process was listed as the most important for ALS, cellular response to toxic substance for PD, and neuron death for AD). Our results emphasize the role of oxidative stress, particularly SOD2, in neurodegeneration triggered by environmental toxic metal mixture and give a new insight into common molecular mechanisms involved in ALS, PD and AD pathology.",2021-01-16 +33848166,MAPLE: A Microbiome Analysis Pipeline Enabling Optimal Peptide Search and Comparative Taxonomic and Functional Analysis.,"Metaproteomics by mass spectrometry (MS) is a powerful approach to profile a large number of proteins expressed by all organisms in a highly complex biological or ecological sample, which is able to provide a direct and quantitative assessment of the functional makeup of a microbiota. The human gastrointestinal microbiota has been found playing important roles in human physiology and health, and metaproteomics has been shown to shed light on multiple novel associations between microbiota and diseases. MS-powered proteomics generally relies on genome data to define search space. However, metaproteomics, which simultaneously analyzes all proteins from hundreds to thousands of species, faces significant challenges regarding database search and interpretation of results. To overcome these obstacles, we have developed a user-friendly microbiome analysis pipeline (MAPLE, freely downloadable at http://maple.rx.umaryland.edu/), which is able to define an optimal search space by inferring proteomes specific to samples following the principle of parsimony. MAPLE facilitates highly comparable or better peptide identification compared to a sample-specific metagenome-guided search. In addition, we implemented an automated peptide-centric enrichment analysis function in MAPLE to address issues of traditional protein-centric comparison, enabling straightforward and comprehensive comparison of taxonomic and functional makeup between microbiota.",2021-04-13 +33589856,Connecting to the oceans: supporting ocean literacy and public engagement.,"Improved public understanding of the ocean and the importance of sustainable ocean use, or ocean literacy, is essential for achieving global commitments to sustainable development by 2030 and beyond. However, growing human populations (particularly in mega-cities), urbanisation and socio-economic disparity threaten opportunities for people to engage and connect directly with ocean environments. Thus, a major challenge in engaging the whole of society in achieving ocean sustainability by 2030 is to develop strategies to improve societal connections to the ocean. The concept of ocean literacy reflects public understanding of the ocean, but is also an indication of connections to, and attitudes and behaviours towards, the ocean. Improving and progressing global ocean literacy has potential to catalyse the behaviour changes necessary for achieving a sustainable future. As part of the Future Seas project (https://futureseas2030.org/), this paper aims to synthesise knowledge and perspectives on ocean literacy from a range of disciplines, including but not exclusive to marine biology, socio-ecology, philosophy, technology, psychology, oceanography and human health. Using examples from the literature, we outline the potential for positive change towards a sustainable future based on knowledge that already exists. We focus on four drivers that can influence and improve ocean literacy and societal connections to the ocean: (1) education, (2) cultural connections, (3) technological developments, and (4) knowledge exchange and science-policy interconnections. We explore how each driver plays a role in improving perceptions of the ocean to engender more widespread societal support for effective ocean management and conservation. In doing so, we develop an ocean literacy toolkit, a practical resource for enhancing ocean connections across a broad range of contexts worldwide.",2021-02-10 +30365030,ETCM: an encyclopaedia of traditional Chinese medicine.,"Traditional Chinese medicine (TCM) is not only an effective solution for primary health care, but also a great resource for drug innovation and discovery. To meet the increasing needs for TCM-related data resources, we developed ETCM, an Encyclopedia of Traditional Chinese Medicine. ETCM includes comprehensive and standardized information for the commonly used herbs and formulas of TCM, as well as their ingredients. The herb basic property and quality control standard, formula composition, ingredient drug-likeness, as well as many other information provided by ETCM can serve as a convenient resource for users to obtain thorough information about a herb or a formula. To facilitate functional and mechanistic studies of TCM, ETCM provides predicted target genes of TCM ingredients, herbs, and formulas, according to the chemical fingerprint similarity between TCM ingredients and known drugs. A systematic analysis function is also developed in ETCM, which allows users to explore the relationships or build networks among TCM herbs, formulas,ingredients, gene targets, and related pathways or diseases. ETCM is freely accessible at http://www.nrc.ac.cn:9090/ETCM/. We expect ETCM to develop into a major data warehouse for TCM and to promote TCM related researches and drug development in the future.",2019-01-01 +33388027,Propedia: a database for protein-peptide identification based on a hybrid clustering algorithm.,"

Background

Protein-peptide interactions play a fundamental role in a wide variety of biological processes, such as cell signaling, regulatory networks, immune responses, and enzyme inhibition. Peptides are characterized by low toxicity and small interface areas; therefore, they are good targets for therapeutic strategies, rational drug planning and protein inhibition. Approximately 10% of the ethical pharmaceutical market is protein/peptide-based. Furthermore, it is estimated that 40% of protein interactions are mediated by peptides. Despite the fast increase in the volume of biological data, particularly on sequences and structures, there remains a lack of broad and comprehensive protein-peptide databases and tools that allow the retrieval, characterization and understanding of protein-peptide recognition and consequently support peptide design.

Results

We introduce Propedia, a comprehensive and up-to-date database with a web interface that permits clustering, searching and visualizing of protein-peptide complexes according to varied criteria. Propedia comprises over 19,000 high-resolution structures from the Protein Data Bank including structural and sequence information from protein-peptide complexes. The main advantage of Propedia over other peptide databases is that it allows a more comprehensive analysis of similarity and redundancy. It was constructed based on a hybrid clustering algorithm that compares and groups peptides by sequences, interface structures and binding sites. Propedia is available through a graphical, user-friendly and functional interface where users can retrieve, and analyze complexes and download each search data set. We performed case studies and verified that the utility of Propedia scores to rank promissing interacting peptides. In a study involving predicting peptides to inhibit SARS-CoV-2 main protease, we showed that Propedia scores related to similarity between different peptide complexes with SARS-CoV-2 main protease are in agreement with molecular dynamics free energy calculation.

Conclusions

Propedia is a database and tool to support structure-based rational design of peptides for special purposes. Protein-peptide interactions can be useful to predict, classifying and scoring complexes or for designing new molecules as well. Propedia is up-to-date as a ready-to-use webserver with a friendly and resourceful interface and is available at: https://bioinfo.dcc.ufmg.br/propedia.",2021-01-02 +34605428,eSPC: an online data-analysis platform for molecular biophysics.,"All biological processes rely on the formation of protein-ligand, protein-peptide and protein-protein complexes. Studying the affinity, kinetics and thermodynamics of binding between these pairs is critical for understanding basic cellular mechanisms. Many different technologies have been designed for probing interactions between biomolecules, each based on measuring different signals (fluorescence, heat, thermophoresis, scattering and interference, among others). Evaluation of the data from binding experiments and their fitting is an essential step towards the quantification of binding affinities. Here, user-friendly online tools to analyze biophysical data from steady-state fluorescence spectroscopy, microscale thermophoresis and differential scanning fluorimetry experiments are presented. The modules of the data-analysis platform (https://spc.embl-hamburg.de/) contain classical thermodynamic models and clear user guidelines for the determination of equilibrium dissociation constants (Kd) and thermal unfolding parameters such as melting temperatures (Tm).",2021-09-24 +33135727,Mortality Tracker: the COVID-19 case for real time web APIs as epidemiology commons.,"

Motivation

Mortality Tracker is an in-browser application for data wrangling, analysis, dissemination and visualization of public time series of mortality in the United States. It was developed in response to requests by epidemiologists for portable real time assessment of the effect of COVID-19 on other causes of death and all-cause mortality. This is performed by comparing 2020 real time values with observations from the same week in the previous 5 years, and by enabling the extraction of temporal snapshots of mortality series that facilitate modeling the interdependence between its causes.

Results

Our solution employs a scalable 'Data Commons at Web Scale' approach that abstracts all stages of the data cycle as in-browser components. Specifically, the data wrangling computation, not just the orchestration of data retrieval, takes place in the browser, without any requirement to download or install software. This approach, where operations that would normally be computed server-side are mapped to in-browser SDKs, is sometimes loosely described as Web APIs, a designation adopted here.

Availabilityand implementation

https://episphere.github.io/mortalitytracker; webcast demo: youtu.be/ZsvCe7cZzLo.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-08-01 +33847681,"Dual therapy with an oral non-vitamin K antagonist and a P2Y12 inhibitor vs triple therapy with aspirin, a P2Y12 inhibitor and a vitamin K antagonist for the treatment of diabetes mellitus patients with co-existing atrial fibrillation following percutaneous coronary intervention: A meta-analysis.","

Background

In this analysis, we aimed to compare the efficacy and safety of dual therapy (DT) with a non-vitamin K oral anticoagulant (NOAC) and an adenosine diphosphate receptor antagonist (P2Y12 inhibitor) vs triple therapy (TT) with aspirin, a P2Y12 inhibitor and a vitamin K antagonist for the treatment of diabetes mellitus (DM) patients with co-existing atrial fibrillation (AF) following percutaneous coronary intervention (PCI).

Methods

Medical Literature Analysis and Retrieval System Online (MEDLINE), http://www.ClinicalTrials.gov, Excerpta Medical data BASE (EMBASE), Web of Science, Cochrane Central and Google Scholar were the searched databases. Studies that were randomized trials or observational studies comparing DT vs TT for the treatment of DM patients with co-existing AF following PCI were included in this analysis. The adverse cardiovascular outcomes and bleeding events were the endpoints. This meta-analysis was carried out by the RevMan version 5.4 software. Risk ratios (RR) with 95% confidence intervals (CI) were used to represent data and interpret the analysis.

Results

A total number of 4970 participants were included whereby 2456 participants were assigned to the DT group and 2514 participants were assigned to the TT group. The enrollment period varied from year 2006 to year 2018. Our current results showed that major adverse cardiac events (RR: 1.00, 95% CI: 0.84-1.20; P = .98), mortality (RR: 1.08, 95% CI: 0.78-1.48; P = .66), myocardial infarction (RR: 1.02, 95% CI: 0.74-1.42; P = .90), stroke (RR: 0.94, 95% CI: 0.53-1.67; P = .84) and stent thrombosis (RR: 1.09, 95% CI: 0.56-2.10; P = .80) were similar with DT versus TT in these patients. However, the risks for total major bleeding (RR: 0.66, 95% CI: 0.54-0.82; P = .0001), total minor bleeding (RR: 0.74, 95% CI: 0.64-0.85; P = .0001), Thrombolysis in Myocardial Infarction (TIMI) defined major bleeding (RR: 0.58, 95% CI: 0.35-0.95; P = .03), TIMI defined minor bleeding (RR: 0.62, 95% CI: 0.42-0.92; P = .02), intra-cranial bleeding (RR: 0.34, 95% CI: 0.13-0.95; P = .04) and major bleeding defined by the International Society on Thrombosis and Hemostasis (RR: 0.68, 95% CI: 0.51-0.90; P = .008) were significantly higher with TT.

Conclusions

DT with a NOAC and a P2Y12 inhibitor was associated with significantly less bleeding events without increasing the adverse cardiovascular outcomes when compared to TT with aspirin, a P2Y12 inhibitor and a Vitamin K antagonist for the treatment of DM patients with co-existing AF following PCI. Hence, DT is comparable in efficacy, but safer compared to TT. This interesting hypothesis will have to be confirmed in future studies.",2021-04-01 +34910569,Who is Right? A Word-Identification-in-Noise Test for Young Children Using Minimal Pair Distracters.,"

Purpose

Many children have difficulties understanding speech. At present, there are few assessments that test for subtle impairments in speech perception with normative data from U.K. children. We present a new test that evaluates children's ability to identify target words in background noise by choosing between minimal pair alternatives that differ by a single articulatory phonetic feature. This task (a) is tailored to testing young children, but also readily applicable to adults; (b) has minimal memory demands; (c) adapts to the child's ability; and (d) does not require reading or verbal output.

Method

We tested 155 children and young adults aged from 5 to 25 years on this new test of single word perception.

Results

Speech-in-noise abilities in this particular task develop rapidly through childhood until they reach maturity at around 9 years of age.

Conclusions

We make this test freely available and provide associated normative data. We hope that it will be useful to researchers and clinicians in the assessment of speech perception abilities in children who are hard of hearing or have developmental language disorder, dyslexia, or auditory processing disorder.

Supplemental material

https://doi.org/10.23641/asha.17155934.",2021-12-15 +36530774,Testing for genetic mutation of seasonal influenza virus.,"Influenza virus strains undergo genetic mutations every year and these changes in genetic makeup pose difficulties for effective vaccine selection. To better understand the problem it is important to statistically quantify the amount of genetic change between circulating strains from different years. In this paper, we propose the nonparametric crossmatch test applied to phylogenetic trees to assess the level of discrepancy between circulating flu virus strains between two years; the viruses being represented by a phylogenetic tree. The crossmatch test has advantages compared to parametric tests in that it preserves more information in the data. The outcome of the test would indicate whether the circulating influenza virus has mutated sufficiently in the past year to be considered as a new population of virus, suggesting the need to consider a new vaccine. We validate the test on simulated phylogenetic tree samples with varying branch lengths, as well as with publicly available virus sequence data from the 'Global Initiative on Sharing All Influenza Data' (GISAID: https://www.gisaid.org/).",2021-09-29 +29705949,A network map of IL-33 signaling pathway.,"Interleukin-33 (IL-33) is a member of the IL-1 family of cytokines that play a central role in the regulation of immune responses. Its release from epithelial and endothelial cells is mediated by pro-inflammatory cytokines, cell damage and by recognition of pathogen-associated molecular patterns (PAMPs). The activity of IL-33 is mediated by binding to the IL-33 receptor complex (IL-33R) and activation of NF-κB signaling via the classical MyD88/IRAK/TRAF6 module. IL-33 also induces the phosphorylation and activation of ERK1/2, JNK, p38 and PI3K/AKT signaling modules resulting in the production and release of pro-inflammatory cytokines. Aberrant signaling by IL-33 has been implicated in the pathogenesis of several acute and chronic inflammatory diseases, including asthma, atopic dermatitis, rheumatoid arthritis and ulcerative colitis among others. Considering the biomedical importance of IL-33, we developed a pathway resource of signaling events mediated by IL-33/IL-33R in this study. Using data mined from the published literature, we describe an integrated pathway reaction map of IL-33/IL-33R consisting of 681 proteins and 765 reactions. These include information pertaining to 19 physical interaction events, 740 enzyme catalysis events, 6 protein translocation events, 4 activation/inhibition events, 9 transcriptional regulators and 2492 gene regulation events. The pathway map is publicly available through NetPath ( http://www.netpath.org /), a resource of human signaling pathways developed previously by our group. This resource will provide a platform to the scientific community in facilitating identification of novel therapeutic targets for diseases associated with dysregulated IL-33 signaling. Database URL: http://www.netpath.org/pathways?path_id=NetPath_120 .",2018-04-28 +33068412,RASP: an atlas of transcriptome-wide RNA secondary structure probing data.,"RNA molecules fold into complex structures that are important across many biological processes. Recent technological developments have enabled transcriptome-wide probing of RNA secondary structure using nucleases and chemical modifiers. These approaches have been widely applied to capture RNA secondary structure in many studies, but gathering and presenting such data from very different technologies in a comprehensive and accessible way has been challenging. Existing RNA structure probing databases usually focus on low-throughput or very specific datasets. Here, we present a comprehensive RNA structure probing database called RASP (RNA Atlas of Structure Probing) by collecting 161 deduplicated transcriptome-wide RNA secondary structure probing datasets from 38 papers. RASP covers 18 species across animals, plants, bacteria, fungi, and also viruses, and categorizes 18 experimental methods including DMS-seq, SHAPE-Seq, SHAPE-MaP, and icSHAPE, etc. Specially, RASP curates the up-to-date datasets of several RNA secondary structure probing studies for the RNA genome of SARS-CoV-2, the RNA virus that caused the on-going COVID-19 pandemic. RASP also provides a user-friendly interface to query, browse, and visualize RNA structure profiles, offering a shortcut to accessing RNA secondary structures grounded in experimental data. The database is freely available at http://rasp.zhanglab.net.",2021-01-01 +33046018,ECCDIA: an interactive web tool for the comprehensive analysis of clinical and survival data of esophageal cancer patients.,"

Background

Esophageal cancer (EC) is considered as one of the deadliest malignancies with respect to incidence and mortality rate, and numerous risk factors may affect the prognosis of EC patients. For better understanding of the risk factors associated with the onset and prognosis of this malignancy, we develop an interactive web-based tool for the convenient analysis of clinical and survival characteristics of EC patients.

Methods

The clinical data were obtained from The Surveillance, Epidemiology, and End Results (SEER) database. Seven analysis and visualization modules were built with Shiny.

Results

The Esophageal Cancer Clinical Data Interactive Analysis (ECCDIA, http://webapps.3steps.cn/ECCDIA/ ) was developed to provide basic data analysis, visualization, survival analysis, and nomogram of the overall group and subgroups of 77,273 EC patients recorded in SEER. The basic data analysis modules contained distribution analysis of clinical factor ratios, Sankey plot analysis for relationships between clinical factors, and a map for visualizing the distribution of clinical factors. The survival analysis included Kaplan-Meier (K-M) analysis and Cox analysis for different subgroups of EC patients. The nomogram module enabled clinicians to precisely predict the survival probability of different subgroups of EC patients.

Conclusion

ECCDIA provides clinicians with an interactive prediction and visualization tool for visualizing invaluable clinical and prognostic information of individual EC patients, further providing useful information for better understanding of esophageal cancer.",2020-10-12 +33180112,Europe PMC in 2020.,"Europe PMC (https://europepmc.org) is a database of research articles, including peer reviewed full text articles and abstracts, and preprints - all freely available for use via website, APIs and bulk download. This article outlines new developments since 2017 where work has focussed on three key areas: (i) Europe PMC has added to its core content to include life science preprint abstracts and a special collection of full text of COVID-19-related preprints. Europe PMC is unique as an aggregator of biomedical preprints alongside peer-reviewed articles, with over 180 000 preprints available to search. (ii) Europe PMC has significantly expanded its links to content related to the publications, such as links to Unpaywall, providing wider access to full text, preprint peer-review platforms, all major curated data resources in the life sciences, and experimental protocols. The redesigned Europe PMC website features the PubMed abstract and corresponding PMC full text merged into one article page; there is more evident and user-friendly navigation within articles and to related content, plus a figure browse feature. (iii) The expanded annotations platform offers ∼1.3 billion text mined biological terms and concepts sourced from 10 providers and over 40 global data resources.",2021-01-01 +31075273,refTSS: A Reference Data Set for Human and Mouse Transcription Start Sites.,"Transcription starts at genomic positions called transcription start sites (TSSs), producing RNAs, and is mainly regulated by genomic elements and transcription factors binding around these TSSs. This indicates that TSSs may be a better unit to integrate various data sources related to transcriptional events, including regulation and production of RNAs. However, although several TSS datasets and promoter atlases are available, a comprehensive reference set that integrates all known TSSs is lacking. Thus, we constructed a reference dataset of TSSs (refTSS) for the human and mouse genomes by collecting publicly available TSS annotations and promoter resources, such as FANTOM5, DBTSS, EPDnew, and ENCODE. The data set consists of genomic coordinates of TSS peaks, their gene annotations, quality check results, and conservation between human and mouse. We also developed a web interface to browse the refTSS (http://reftss.clst.riken.jp/). Users can access the resource for collecting and integrating data and information about transcriptional regulation and transcription products.",2019-05-08 +33045741,LncSEA: a platform for long non-coding RNA related sets and enrichment analysis.,"Long non-coding RNAs (lncRNAs) have been proven to play important roles in transcriptional processes and various biological functions. Establishing a comprehensive collection of human lncRNA sets is urgent work at present. Using reference lncRNA sets, enrichment analyses will be useful for analyzing lncRNA lists of interest submitted by users. Therefore, we developed a human lncRNA sets database, called LncSEA, which aimed to document a large number of available resources for human lncRNA sets and provide annotation and enrichment analyses for lncRNAs. LncSEA supports >40 000 lncRNA reference sets across 18 categories and 66 sub-categories, and covers over 50 000 lncRNAs. We not only collected lncRNA sets based on downstream regulatory data sources, but also identified a large number of lncRNA sets regulated by upstream transcription factors (TFs) and DNA regulatory elements by integrating TF ChIP-seq, DNase-seq, ATAC-seq and H3K27ac ChIP-seq data. Importantly, LncSEA provides annotation and enrichment analyses of lncRNA sets associated with upstream regulators and downstream targets. In summary, LncSEA is a powerful platform that provides a variety of types of lncRNA sets for users, and supports lncRNA annotations and enrichment analyses. The LncSEA database is freely accessible at http://bio.liclab.net/LncSEA/index.php.",2021-01-01 +32986825,Animal-APAdb: a comprehensive animal alternative polyadenylation database.,"Alternative polyadenylation (APA) is an important post-transcriptional regulatory mechanism that recognizes different polyadenylation signals on transcripts, resulting in transcripts with different lengths of 3' untranslated regions and thereby influencing a series of biological processes. Recent studies have highlighted the important roles of APA in human. However, APA profiles in other animals have not been fully recognized, and there is no database that provides comprehensive APA information for other animals except human. Here, by using the RNA sequencing data collected from public databases, we systematically characterized the APA profiles in 9244 samples of 18 species. In total, we identified 342 952 APA events with a median of 17 020 per species using the DaPars2 algorithm, and 315 691 APA events with a median of 17 953 per species using the QAPA algorithm in these 18 species, respectively. In addition, we predicted the polyadenylation sites (PAS) and motifs near PAS of these species. We further developed Animal-APAdb, a user-friendly database (http://gong_lab.hzau.edu.cn/Animal-APAdb/) for data searching, browsing and downloading. With comprehensive information of APA events in different tissues of different species, Animal-APAdb may greatly facilitate the exploration of animal APA patterns and novel mechanisms, gene expression regulation and APA evolution across tissues and species.",2021-01-01 +31274965,Visual mass-spec share (vMS-Share): a new public web-based mass spectrometry visualization and data mining repository. ,"Herein we introduce the Visual Mass-Spec Share (vMS-Share), a new public mass spectrometric (MS) repository and data mining website/resource freely accessible at https://vmsshare.nist.gov. vMS-Share is a web-based application developed for instant visualization of raw MS data with integrated display of metadata optimized for the sharing of proteomics and metabolomics experimental results. Each MS-based identification is linked to a given experiment and the entire experimental data can then be viewed using the link associated with a given peptide and/or small molecule. Interactive and user-friendly visualizations are provided to the user via variety of easily accessible search filters.",2019-01-01 +28438161,REFOLDdb: a new and sustainable gateway to experimental protocols for protein refolding.,"

Background

More than 7000 papers related to ""protein refolding"" have been published to date, with approximately 300 reports each year during the last decade. Whilst some of these papers provide experimental protocols for protein refolding, a survey in the structural life science communities showed a necessity for a comprehensive database for refolding techniques. We therefore have developed a new resource - ""REFOLDdb"" that collects refolding techniques into a single, searchable repository to help researchers develop refolding protocols for proteins of interest.

Results

We based our resource on the existing REFOLD database, which has not been updated since 2009. We redesigned the data format to be more concise, allowing consistent representations among data entries compared with the original REFOLD database. The remodeled data architecture enhances the search efficiency and improves the sustainability of the database. After an exhaustive literature search we added experimental refolding protocols from reports published 2009 to early 2017. In addition to this new data, we fully converted and integrated existing REFOLD data into our new resource. REFOLDdb contains 1877 entries as of March 17th, 2017, and is freely available at http://p4d-info.nig.ac.jp/refolddb/ .

Conclusion

REFOLDdb is a unique database for the life sciences research community, providing annotated information for designing new refolding protocols and customizing existing methodologies. We envisage that this resource will find wide utility across broad disciplines that rely on the production of pure, active, recombinant proteins. Furthermore, the database also provides a useful overview of the recent trends and statistics in refolding technology development.",2017-04-24 +31412866,ETM-DB: integrated Ethiopian traditional herbal medicine and phytochemicals database.,"

Background

Recently, there has been an increasing tendency to go back to nature in search of new medicines. To facilitate this, a great deal of effort has been made to compile information on natural products worldwide, and as a result, many ethnic-based traditional medicine databases have been developed. In Ethiopia, there are more than 80 ethnic groups, each having their indigenous knowledge on the use of traditional medicine. About 80% of the population uses traditional medicine for primary health care. Despite this, there is no structured online database for Ethiopian traditional medicine, which limits natural products based drug discovery researches using natural products from this country.

Description

To develop ETM-DB, online research articles, theses, books, and public databases containing Ethiopian herbal medicine and phytochemicals information were searched. These resources were thoroughly inspected and the necessary data were extracted. Then, we developed a comprehensive online relational database which contains information on 1054 Ethiopian medicinal herbs with 1465 traditional therapeutic uses, 573 multi-herb prescriptions, 4285 compounds, 11,621 human target gene/proteins, covering 5779 herb-phenotype, 1879 prescription-herb, 16,426 herb-compound, 105,202 compound-phenotype, 162,632 compound-gene/protein, and 16,584 phenotype-gene/protein relationships. Using various cheminformatics tools, we obtained predicted physicochemical and absorption, distribution, metabolism, excretion, and toxicity (ADMET) properties of ETM-DB compounds. We also evaluated drug-likeness properties of these compounds using FAF-Drugs4 webserver. From the 4285 compounds, 4080 of them passed the FAF-Drugs4 input data curation stage, of which 876 were found to have acceptable drug-likeness properties.

Conclusion

ETM-DB is the largest, freely accessible, web-based integrated resource on Ethiopian traditional medicine. It provides traditional herbal medicine entities and their relationships in well-structured forms including reference to the sources. The ETM-DB website interface allows users to search the entities using various options provided by the search menu. We hope that our database will expedite drug discovery and development researches from Ethiopian natural products as it contains information on the chemical composition and related human target gene/proteins. The current version of ETM-DB is openly accessible at http://biosoft.kaist.ac.kr/etm .",2019-08-14 +34033493,Task-Specific Iconic Gesturing During Spoken Discourse in Aphasia.,"

Purpose

In persons living with aphasia, we will explore the relationship between iconic gesture production during spontaneous speech and discourse task, spoken language, and demographic information.

Method

Employing the AphasiaBank database, we coded iconic gestures in 75 speakers with aphasia during two spoken discourse tasks: a procedural narrative, which involved participants telling the experimenter how to make a sandwich (""Sandwich""), and a picture sequence narrative, which had participants describe the picture sequence to the experimenter (""Window""). Forty-three produced a gesture during both tasks, and we further evaluate data from this subgroup as a more direct comparison between tasks.

Results

More iconic gestures, at a higher rate, were produced during the procedural narrative. For both tasks, there was a relationship between iconic gesture rate, modeled as iconic gestures per word, and metrics of language dysfluency extracted from the discourse task as well as a metric of fluency extracted from a standardized battery. Iconic gesture production was correlated with aphasia duration, which was driven by performance during only a single task (Window), but not with other demographic metrics, such as aphasia severity or age. We also provide preliminary evidence for task differences shown through the lens of two types of iconic gestures.

Conclusions

While speech-language pathologists have utilized gesture in therapy for poststroke aphasia, due to its possible facilitatory role in spoken language, there has been considerably less work in understanding how gesture differs across naturalistic tasks and how we can best utilize this information to better assess gesture in aphasia and improve multimodal treatment for aphasia. Furthermore, our results contribute to gesture theory, particularly, about the role of gesture across naturalistic tasks and its relationship with spoken language. Supplemental Material https://doi.org/10.23641/asha.14614941.",2021-05-25 +33822911,MENSAdb: a thorough structural analysis of membrane protein dimers. ,"Membrane proteins (MPs) are key players in a variety of different cellular processes and constitute the target of around 60% of all Food and Drug Administration-approved drugs. Despite their importance, there is still a massive lack of relevant structural, biochemical and mechanistic information mainly due to their localization within the lipid bilayer. To help fulfil this gap, we developed the MEmbrane protein dimer Novel Structure Analyser database (MENSAdb). This interactive web application summarizes the evolutionary and physicochemical properties of dimeric MPs to expand the available knowledge on the fundamental principles underlying their formation. Currently, MENSAdb contains features of 167 unique MPs (63% homo- and 37% heterodimers) and brings insights into the conservation of residues, accessible solvent area descriptors, average B-factors, intermolecular contacts at 2.5 Å and 4.0 Å distance cut-offs, hydrophobic contacts, hydrogen bonds, salt bridges, π-π stacking, T-stacking and cation-π interactions. The regular update and organization of all these data into a unique platform will allow a broad community of researchers to collect and analyse a large number of features efficiently, thus facilitating their use in the development of prediction models associated with MPs. Database URL: http://www.moreiralab.com/resources/mensadb.",2021-04-01 +31695892,Transcriptome profiling of maternal stress-induced wing dimorphism in pea aphids.,"Wing dimorphism, that is, wingless and winged forms, can be induced by maternal stress signals and is an adaptive response of aphids to environmental changes. Here, we investigated the ecological and molecular effects of three kinds of stress, namely crowding, predation, and aphid alarm pheromone, on wing dimorphism. These three stressors induced high proportion of up to 60% of winged morphs in offspring. Transcriptome analysis of stress-treated female aphids revealed different changes in maternal gene expression induced by the three stressors. Crowding elicited widespread changes in the expression of genes involved in nutrient accumulation and energy mobilization. Distinct from crowding, predation caused dramatic expression changes in cuticle protein (CP) genes. Twenty-three CP genes that belong to CP RR2 subfamily and are highly expressed in legs and embryos were greatly repressed by the presence of ladybird. By contrast, application of alarm pheromone, E-β-farnesene, caused slight changes in gene expression. The three factors shared a responsive gene, cuticle protein 43. This study reveals the adaptive response of aphids to environmental stresses and provides a rich resource on genome-wide expression genes for exploring molecular mechanisms of ecological adaptation in aphids. OPEN RESEARCH BADGES:This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.55b2b15.",2019-10-02 +29761461,Bovine Genome Database: Tools for Mining the Bos taurus Genome.,"The Bovine Genome Database (BGD; http://bovinegenome.org ) is a web-accessible resource that supports bovine genomics research by providing genome annotation and data mining tools. BovineMine is a tool within BGD that integrates BGD data, including the genome, genes, precomputed gene expression levels and variant consequences, with external data sources that include quantitative trait loci (QTL), orthologues, Gene Ontology, gene interactions, and pathways. BovineMine enables researchers without programming skills to create custom integrated datasets for use in downstream analyses. This chapter describes how to enhance a bovine genomics project using the Bovine Genome Database, with data mining examples demonstrating BovineMine.",2018-01-01 +34678110,Methods and Statistical Analyses in Studies of Motivation for E-Cigarette Use Among University Students: An Integrative Review.,"Understanding the motivations for e-cigarette use among college and university students is essential for developing and implementing effective interventions. Evaluating existing literature is necessary to identify methodological gaps and limitations and improve the quality of future research.We aimed to evaluate the quality of the methods and statistical analyses and integrate evidence addressing motivations for e-cigarette use among college and university students.An integrative literature review was conducted by two researchers to identify and evaluate peer-reviewed, quantitative, and mixed methods research exploring motivations for e-cigarette use among college and university students. A systematic analytic method of data reduction was used to identify alignment and divergence of the data, gaps in the literature, and methodological limitations.Fifteen quantitative studies and three mixed methods studies published between 2015-2020 were included. Most studies were cross-sectional, used convenience sampling, and lacked psychometric and assumptions testing. Half performed regression analyses, however, very few adhered to research and statistical reporting standards.Current literature provides a foundation for developing and implementing interventions aimed to prevent e-cigarette use and encourage cessation. Future research should incorporate stronger sampling methods and research designs, as well as the use of rigorous statistical analyses in conjunction with thorough reporting.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1990332.",2021-10-22 +33597026,PathoFact: a pipeline for the prediction of virulence factors and antimicrobial resistance genes in metagenomic data.,"

Background

Pathogenic microorganisms cause disease by invading, colonizing, and damaging their host. Virulence factors including bacterial toxins contribute to pathogenicity. Additionally, antimicrobial resistance genes allow pathogens to evade otherwise curative treatments. To understand causal relationships between microbiome compositions, functioning, and disease, it is essential to identify virulence factors and antimicrobial resistance genes in situ. At present, there is a clear lack of computational approaches to simultaneously identify these factors in metagenomic datasets.

Results

Here, we present PathoFact, a tool for the contextualized prediction of virulence factors, bacterial toxins, and antimicrobial resistance genes with high accuracy (0.921, 0.832 and 0.979, respectively) and specificity (0.957, 0.989 and 0.994). We evaluate the performance of PathoFact on simulated metagenomic datasets and perform a comparison to two other general workflows for the analysis of metagenomic data. PathoFact outperforms all existing workflows in predicting virulence factors and toxin genes. It performs comparably to one pipeline regarding the prediction of antimicrobial resistance while outperforming the others. We further demonstrate the performance of PathoFact on three publicly available case-control metagenomic datasets representing an actual infection as well as chronic diseases in which either pathogenic potential or bacterial toxins are hypothesized to play a role. In each case, we identify virulence factors and AMR genes which differentiated between the case and control groups, thereby revealing novel gene associations with the studied diseases.

Conclusion

PathoFact is an easy-to-use, modular, and reproducible pipeline for the identification of virulence factors, bacterial toxins, and antimicrobial resistance genes in metagenomic data. Additionally, our tool combines the prediction of these pathogenicity factors with the identification of mobile genetic elements. This provides further depth to the analysis by considering the genomic context of the pertinent genes. Furthermore, PathoFact's modules for virulence factors, toxins, and antimicrobial resistance genes can be applied independently, thereby making it a flexible and versatile tool. PathoFact, its models, and databases are freely available at https://pathofact.lcsb.uni.lu . Video abstract.",2021-02-17 +31710725,The Dundee Resource for Sequence Analysis and Structure Prediction.,"The Dundee Resource for Sequence Analysis and Structure Prediction (DRSASP; http://www.compbio.dundee.ac.uk/drsasp.html) is a collection of web services provided by the Barton Group at the University of Dundee. DRSASP's flagship services are the JPred4 webserver for secondary structure and solvent accessibility prediction and the JABAWS 2.2 webserver for multiple sequence alignment, disorder prediction, amino acid conservation calculations, and specificity-determining site prediction. DRSASP resources are available through conventional web interfaces and APIs but are also integrated into the Jalview sequence analysis workbench, which enables the composition of multitool interactive workflows. Other existing Barton Group tools are being brought under the banner of DRSASP, including NoD (Nucleolar localization sequence detector) and 14-3-3-Pred. New resources are being developed that enable the analysis of population genetic data in evolutionary and 3D structural contexts. Existing resources are actively developed to exploit new technologies and maintain parity with evolving web standards. DRSASP provides substantial computational resources for public use, and since 2016 DRSASP services have completed over 1.5 million jobs.",2019-11-28 +29788225,LnChrom: a resource of experimentally validated lncRNA-chromatin interactions in human and mouse. ,"Long non-coding RNAs (lncRNAs) constitute an important layer of chromatin regulation that contributes to various biological processes and diseases. By interacting with chromatin, many lncRNAs can regulate that state of chromatin by recruiting chromatin-modifying complexes and thus control large-scale gene expression programs. However, the available information on interactions between lncRNAs and chromatin is hidden in a large amount of dispersed literature and has not been extensively collected. We established the LnChrom database, a manually curated resource of experimentally validated lncRNA-chromatin interactions. The current release of LnChrom includes 382 743 interactions in human and mouse. We also manually collected detailed metadata for each interaction pair, including those of chromatin modifying factors, epigenetic marks and disease associations. LnChrom provides a user-friendly interface to facilitate browsing, searching and retrieving of lncRNA-chromatin interaction data. Additionally, a large amount of multi-omics data was integrated into LnChrom to aid in characterizing the effects of lncRNA-chromatin interactions on epigenetic modifications and transcriptional expression. We believe that LnChrom is a timely and valuable resource that can greatly motivate mechanistic research into lncRNAs.Database URL: http://biocc.hrbmu.edu.cn/LnChrom/.",2018-01-01 +30115014,PdumBase: a transcriptome database and research tool for Platynereis dumerilii and early development of other metazoans.,"

Background

The marine polychaete annelid Platynereis dumerilii has recently emerged as a prominent organism for the study of development, evolution, stem cells, regeneration, marine ecology, chronobiology and neurobiology within metazoans. Its phylogenetic position within the spiralian/ lophotrochozoan clade, the comparatively high conservation of ancestral features in the Platynereis genome, and experimental access to any stage within its life cycle, make Platynereis an important model for elucidating the complex regulatory and functional molecular mechanisms governing early development, later organogenesis, and various features of its larval and adult life. High resolution RNA-seq gene expression data obtained from specific developmental stages can be used to dissect early developmental mechanisms. However, the potential for discovery of these mechanisms relies on tools to search, retrieve, and compare genome-wide information within Platynereis, and across other metazoan taxa.

Results

To facilitate exploration and discovery by the broader scientific community, we have developed a web-based, searchable online research tool, PdumBase, featuring the first comprehensive transcriptome database for Platynereis dumerilii during early stages of development (2 h ~ 14 h). Our database also includes additional stages over the P. dumerilii life cycle and provides access to the expression data of 17,213 genes (31,806 transcripts) along with annotation information sourced from Swiss-Prot, Gene Ontology, KEGG pathways, Pfam domains, TmHMM, SingleP, and EggNOG orthology. Expression data for each gene includes the stage, the normalized FPKM, the raw read counts, and information that can be leveraged for statistical analyses of differential gene expression and the construction of genome-wide co-expression networks. In addition, PdumBase offers early stage transcriptome expression data from five further species as a valuable resource for investigators interested in comparing early development in different organisms. To understand conservation of Platynereis gene models and to validate gene annotation, most Platynereis gene models include a comprehensive phylogenetic analysis across 18 species representing diverse metazoan taxa.

Conclusions

PdumBase represents the first online resource for the early developmental transcriptome of Platynereis dumerilii. It serves as a research platform for discovery and exploration of gene expression during early stages, throughout the Platynereis life cycle, and enables comparison to other model organisms. PdumBase is freely available at http://pdumbase.gdcb.iastate.edu .",2018-08-16 +29036667,DISNOR: a disease network open resource.,"DISNOR is a new resource that aims at exploiting the explosion of data on the identification of disease-associated genes to assemble inferred disease pathways. This may help dissecting the signaling events whose disruption causes the pathological phenotypes and may contribute to build a platform for precision medicine. To this end we combine the gene-disease association (GDA) data annotated in the DisGeNET resource with a new curation effort aimed at populating the SIGNOR database with causal interactions related to disease genes with the highest possible coverage. DISNOR can be freely accessed at http://DISNOR.uniroma2.it/ where >3700 disease-networks, linking ∼2600 disease genes, can be explored. For each disease curated in DisGeNET, DISNOR links disease genes by manually annotated causal relationships and offers an intuitive visualization of the inferred 'patho-pathways' at different complexity levels. User-defined gene lists are also accepted in the query pipeline. In addition, for each list of query genes-either annotated in DisGeNET or user-defined-DISNOR performs a gene set enrichment analysis on KEGG-defined pathways or on the lists of proteins associated with the inferred disease pathways. This function offers additional information on disease-associated cellular pathways and disease similarity.",2018-01-01 +33592005,Cervical length varies considering different populations and gestational outcomes: Results from a systematic review and meta-analysis.,"

Background

The uterine cervical length is an important risk factor for preterm birth. The aim of this study was to assess cervical length distribution in women with singleton pregnancies, measured by transvaginal ultrasound between 16 and 24 weeks, and its association with population characteristics.

Materials and methods

We searched electronic databases and other sources for studies published from April 1, 1990 to July 21, 2020. Of the 2019 retrieved publications, full-text versions of 137 articles were considered. We included 77 original articles that reported cervical length measurements of 363,431 women. The main aim of this study was to identify the pattern of cervical length in different populations. We collected demographic and clinical data concerning the population, in addition to information regarding the ultrasound examination and cervical length measurement. Regarding study bias, 56 were at low risk of bias and 21 were at medium risk of bias.

Results

The meta-analysis included 57 articles with data from 158,346 women. The mean cervical length was 37.96. mm (95% CI [36.68, 39.24]). Cervical length was shorter in women from Africa and Asia, in those from low-income countries, with a lower body weight, and in those who delivered before 37 gestational weeks. We found that the cervical length from pooled studies is longer than that usually discussed in the literature. Regarding limitations, we had difficulty assessing our main variable because there was no consistent pattern in the way authors reported cervical length measurement. Another limitation was the great heterogeneity between studies.

Conclusions

The use of a single cutoff value to define a short cervix diagnosis, an important risk factor for preterm birth, may not be correct and cervical length must be considered according to maternal population characteristics. Future studies should identify different specific curves and cutoff values for cervical length in different populations. This meta-analysis was registered in the PROSPERO database under CRD42017070246 at https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=70246.",2021-02-16 +33995899,DRscDB: A single-cell RNA-seq resource for data mining and data comparison across species.,"With the advent of single-cell RNA sequencing (scRNA-seq) technologies, there has been a spike in studies involving scRNA-seq of several tissues across diverse species including Drosophila. Although a few databases exist for users to query genes of interest within the scRNA-seq studies, search tools that enable users to find orthologous genes and their cell type-specific expression patterns across species are limited. Here, we built a new search database, DRscDB (https://www.flyrnai.org/tools/single_cell/web/), to address this need. DRscDB serves as a comprehensive repository for published scRNA-seq datasets for Drosophila and relevant datasets from human and other model organisms. DRscDB is based on manual curation of Drosophila scRNA-seq studies of various tissue types and their corresponding analogous tissues in vertebrates including zebrafish, mouse, and human. Of note, our search database provides most of the literature-derived marker genes, thus preserving the original analysis of the published scRNA-seq datasets. Finally, DRscDB serves as a web-based user interface that allows users to mine gene expression data from scRNA-seq studies and perform cell cluster enrichment analyses pertaining to various scRNA-seq studies, both within and across species.",2021-04-11 +,Projecting Infant Weight Gain per Day to 112 Days from First 35 Days of Life,"Abstract

Objectives

The American Academy of Pediatrics Task Force on Clinical Testing of Infant Formulas [1] stated, “Determination of rate of gain in weight is the single most valuable component of the clinical evaluation of an infant formula” and “recommends, that weight gain be determined over an interval of 3 to 4 months.” A gastrointestinal tolerance study (denoted ST) measured infants' weight gain per day from 14 to 35 days of age (CW2). The objectives of this project were 1) to estimate weight gain per day from 14 to 112 days of age (CW5) using the CW2 in ST and historical data from seven 4-months growth and tolerance studies by simulation and bootstrap, and 2) to determine if ST would have supported normal physical growth in infants had the study been continued to 112 days of age by showing that the experimental formula is non-inferior to the control formula using weight gain accretion of 3 g/day as the non-inferiority margin. 1. American Academy of Pediatrics, Committee on Nutrition, (CON-AAP). “Clinical Testing of Infant Formulas With Respect to Nutritional Suitability for Term Infants.” June 1988 (Report prepared under FDA contract 223-86-2117) (available at https://wayback.archive-it.org/7993/20170722090324/https://www.fda.gov/Food/GuidanceRegulation/GuidanceDocumentsRegulatoryInformation/InfantFormula/ucm170649.htm) (accessed May 23, 2019).

Methods

The simulation made use of the observed linear relationship between CW2 and CW5 from growth data in the historical studies, which included 16 formula groups. The simulated data for CW2 and CW5 were generated using multivariate normal distribution with mean vector μ and covariance matrix ∑, estimated from ST and the historical studies. In addition, the bootstrapped distribution of the difference in mean CW5 between the experimental and control groups in ST was derived using the historical studies. Software SAS® 9.4 and SAS® Enterprise Guide 7.1 (SAS Institute Inc, Cary, NC) were used for this report.

Results

Based on the analyses, we conclude that growth observed from 14 to 35 days of age in ST when extended to 112 days of age will demonstrate normal physical growth with high probability (> 98%).

Conclusions

Using such a predictive model may be a complementary solution for bridging growth and tolerance studies of shorter duration to 4 months for minor changes to infant formulas.

Funding Sources

Abbott Laboratories.",2020-05-29 +33549003,The efficacy and safety of moderate aerobic exercise for patients with Parkinson's disease: a systematic review and meta-analysis of randomized controlled trials.,"

Background

Exercise therapy is an important component of non-drug treatment for Parkinson's disease (PD). However, the impact of moderate aerobic exercise on PD remains unclear. The purpose of this systematic review was to evaluate the efficacy and safety of moderate aerobic exercise for patients with PD.

Methods

Databases including the Cochrane Library, PubMed, Web of Science, EMBASE, Chinese Biomedical Literature (CBM) Database, Chinese National Knowledge Infrastructure (CNKI), WanFang, and the Weipu Database for Chinese Technical Periodicals (VIP) were searched electronically from the date of inception of the database to June 2020 to recruit relevant randomized controlled trials (RCTs) investigating the efficacy and safety of moderate aerobic exercise on balance and other symptoms in patients with PD. Literature screening, data extraction, and quality evaluation were carried out. Revman5.1 (http:// ims.cochrane.org/revman) was used for data analysis.

Results

In total, nine RCTs with 444 patients were included in this study. Most of the included trials had a low risk of bias and high methodological quality. The meta-analysis suggested that moderate aerobic exercise is effective in improving balance [weighted mean difference (WMD) =-0.42, 95% confidence interval (CI): -0.59 to -0.25, P<0.001] and gait (WMD =49.97, 95% CI: 17.84-82.10, P<0.001) in patients with PD, but not in motor symptoms (WMD =-2.14, 95% CI: -4.86-0.58, P=0.12). Subgroup analysis showed that the improvement in the quality of life of PD patients was affected by different types of exercise, and the treadmill subgroup improved significantly (WMD =-3.90, 95% CI: -5.02 to -2.78), P<0.001).

Conclusions

On the whole, moderate aerobic exercise effectively improves balance and gait in patients with PD, but the effect on motor symptoms is not obvious. Different exercise styles have varying effects on the quality of life of PD patients. It is necessary to standardize the exercise program further and carry out indepth research in the future.",2021-02-05 +31831861,TRANSNAP: a web database providing comprehensive information on Japanese pear transcriptome.,"Japanese pear (Pyrus pyrifolia) is a major fruit tree in the family Rosaceae and is bred for fruit production. To promote the development of breeding strategies and molecular research for Japanese pear, we sequenced the transcripts of Japanese pear variety 'Hosui'. To exhaustively collect information of total gene expression, RNA samples from various organs and stages of Japanese pear were sequenced by three technologies, single-molecule real-time (SMRT) sequencing, 454 pyrosequencing, and Sanger sequencing. Using all those reads, we determined comprehensive reference sequences of Japanese pear. Then, their protein sequences were predicted, and biological functional annotations were assigned. Finally, we developed a web database, TRANSNAP (http://plantomics.mind.meiji.ac.jp/nashi), which is the first web resource of Japanese pear omics information. This database provides highly reliable information via a user-friendly web interface: the reference sequences, gene functional annotations, and gene expression profiles from microarray experiments. In addition, based on sequence comparisons among Japanese, Chinese and European pears, similar protein sequences among the pears and species-specific proteins in Japanese pear can be quickly and efficiently identified. TRANSNAP will aid molecular research and breeding in Japanese pear, and its information is available for comparative analysis among other pear species and families.",2019-12-12 +32976564,Fold recognition by scoring protein maps using the congruence coefficient.,"

Motivation

Protein fold recognition is a key step for template-based modeling approaches to protein structure prediction. Although closely related folds can be easily identified by sequence homology search in sequence databases, fold recognition is notoriously more difficult when it involves the identification of distantly related homologs. Recent progress in residue-residue contact and distance prediction opens up the possibility of improving fold recognition by using structural information contained in predicted distance and contact maps.

Results

Here we propose to use the congruence coefficient as a metric of similarity between maps. We prove that this metric has several interesting mathematical properties which allow one to compute in polynomial time its exact mean and variance over all possible (exponentially many) alignments between two symmetric matrices, and assess the statistical significance of similarity between aligned maps. We perform fold recognition tests by recovering predicted target contact/distance maps from the two most recent Critical Assessment of Structure Prediction editions and over 27 000 non-homologous structural templates from the ECOD database. On this large benchmark, we compare fold recognition performances of different alignment tools with their own similarity scores against those obtained using the congruence coefficient. We show that the congruence coefficient overall improves fold recognition over other methods, proving its effectiveness as a general similarity metric for protein map comparison.

Availability and implementation

The congruence coefficient software CCpro is available as part of the SCRATCH suite at: http://scratch.proteomics.ics.uci.edu/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +31738435,PMBD: a Comprehensive Plastics Microbial Biodegradation Database. ,"Since the invention over a hundred years ago, plastics have been used in many applications, and they are involved in every aspect of our lives. The extensive usage of plastics results in a tremendous amount of waste, which has become a severe burden on the environment. Several degradation approaches exist in nature to cope with ever-increasing plastic waste. Among these approaches, biodegradation by microorganisms has emerged as a natural way, which is favored by many environmentally conscious societies. To facilitate the study on biodegradation of plastics, we developed an online resource, Plastics Microbial Biodegradation Database (PMBD), to gather and present the information about microbial biodegradation of plastics. In this database, 949 microorganisms-plastics relationships and 79 genes involved in the biodegradation of plastics were manually collected and confirmed through literature searching. In addition, more than 8000 automatically annotated enzyme sequences, which were predicted to be involved in the plastics biodegradation, were extracted from the TrEMBL section of the UniProt database. The PMBD database is presented with a website at http://pmbd.genome-mining.cn/home. Data may be accessed through browsing or searching. Also included on the website are a sequence alignment tool and a function prediction tool.",2019-01-01 +32820322,LncAS2Cancer: a comprehensive database for alternative splicing of lncRNAs across human cancers. ,"Accumulating studies demonstrated that the roles of lncRNAs for tumorigenesis were isoform-dependent and their aberrant splicing patterns in cancers contributed to function specificity. However, there is no existing database focusing on cancer-related alternative splicing of lncRNAs. Here, we developed a comprehensive database called LncAS2Cancer, which collected 5335 bulk RNA sequencing and 1826 single-cell RNA sequencing samples, covering over 30 cancer types. By applying six state-of-the-art splicing algorithms, 50 859 alternative splicing events for 8 splicing types were identified and deposited in the database. In addition, the database contained the following information: (i) splicing patterns of lncRNAs under seven different conditions, such as gene interference, which facilitated to infer potential regulators; (ii) annotation information derived from eight sources and manual curation, to understand the functional impact of affected sequences; (iii) survival analysis to explore potential biomarkers; as well as (iv) a suite of tools to browse, search, visualize and download interesting information. LncAS2Cancer could not only confirm the known cancer-associated lncRNA isoforms but also indicate novel ones. Using the data deposited in LncAS2Cancer, we compared gene model and transcript overlap between lncRNAs and protein-coding genes and discusses how these factors, along with sequencing depth, affected the interpretation of splicing signals. Based on recurrent signals and potential confounders, we proposed a reliable score to prioritize splicing events for further elucidation. Together, with the broad collection of lncRNA splicing patterns and annotation, LncAS2Cancer will provide important new insights into the diverse functional roles of lncRNA isoforms in human cancers. LncAS2Cancer is freely available at https://lncrna2as.cd120.com/.",2021-05-01 +32058000,TbCAPs: A toolbox for co-activation pattern analysis.,"Functional magnetic resonance imaging provides rich spatio-temporal data of human brain activity during task and rest. Many recent efforts have focussed on characterising dynamics of brain activity. One notable instance is co-activation pattern (CAP) analysis, a frame-wise analytical approach that disentangles the different functional brain networks interacting with a user-defined seed region. While promising applications in various clinical settings have been demonstrated, there is not yet any centralised, publicly accessible resource to facilitate the deployment of the technique. Here, we release a working version of TbCAPs, a new toolbox for CAP analysis, which includes all steps of the analytical pipeline, introduces new methodological developments that build on already existing concepts, and enables a facilitated inspection of CAPs and resulting metrics of brain dynamics. The toolbox is available on a public academic repository at https://c4science.ch/source/CAP_Toolbox.git. In addition, to illustrate the feasibility and usefulness of our pipeline, we describe an application to the study of human cognition. CAPs are constructed from resting-state fMRI using as seed the right dorsolateral prefrontal cortex, and, in a separate sample, we successfully predict a behavioural measure of continuous attentional performance from the metrics of CAP dynamics (R ​= ​0.59).",2020-02-10 +33942874,Human IRES Atlas: an integrative platform for studying IRES-driven translational regulation in humans. ,"It is now known that cap-independent translation initiation facilitated by internal ribosome entry sites (IRESs) is vital in selective cellular protein synthesis under stress and different physiological conditions. However, three problems make it hard to understand transcriptome-wide cellular IRES-mediated translation initiation mechanisms: (i) complex interplay between IRESs and other translation initiation-related information, (ii) reliability issue of in silico cellular IRES investigation and (iii) labor-intensive in vivo IRES identification. In this research, we constructed the Human IRES Atlas database for a comprehensive understanding of cellular IRESs in humans. First, currently available and suitable IRES prediction tools (IRESfinder, PatSearch and IRESpy) were used to obtain transcriptome-wide human IRESs. Then, we collected eight genres of translation initiation-related features to help study the potential molecular mechanisms of each of the putative IRESs. Three functional tests (conservation, structural RNA-protein scores and conditional translation efficiency) were devised to evaluate the functionality of the identified putative IRESs. Moreover, an easy-to-use interface and an IRES-translation initiation interaction map for each gene transcript were implemented to help understand the interactions between IRESs and translation initiation-related features. Researchers can easily search/browse an IRES of interest using the web interface and deduce testable mechanism hypotheses of human IRES-driven translation initiation based on the integrated results. In summary, Human IRES Atlas integrates putative IRES elements and translation initiation-related experiments for better usage of these data and deduction of mechanism hypotheses. Database URL: http://cobishss0.im.nuk.edu.tw/Human_IRES_Atlas/.",2021-05-01 +32976578,TREND-DB-a transcriptome-wide atlas of the dynamic landscape of alternative polyadenylation.,"Alternative polyadenylation (APA) profoundly expands the transcriptome complexity. Perturbations of APA can disrupt biological processes, ultimately resulting in devastating disorders. A major challenge in identifying mechanisms and consequences of APA (and its perturbations) lies in the complexity of RNA 3' end processing, involving poorly conserved RNA motifs and multi-component complexes consisting of far more than 50 proteins. This is further complicated in that RNA 3' end maturation is closely linked to transcription, RNA processing and even epigenetic (histone/DNA/RNA) modifications. Here, we present TREND-DB (http://shiny.imbei.uni-mainz.de:3838/trend-db), a resource cataloging the dynamic landscape of APA after depletion of >170 proteins involved in various facets of transcriptional, co- and post-transcriptional gene regulation, epigenetic modifications and further processes. TREND-DB visualizes the dynamics of transcriptome 3' end diversification (TREND) in a highly interactive manner; it provides a global APA network map and allows interrogating genes affected by specific APA-regulators and vice versa. It also permits condition-specific functional enrichment analyses of APA-affected genes, which suggest wide biological and clinical relevance across all RNAi conditions. The implementation of the UCSC Genome Browser provides additional customizable layers of gene regulation accounting for individual transcript isoforms (e.g. epigenetics, miRNA-binding sites and RNA-binding proteins). TREND-DB thereby fosters disentangling the role of APA for various biological programs, including potential disease mechanisms, and helps identify their diagnostic and therapeutic potential.",2021-01-01 +30089500,The Cancer Omics Atlas: an integrative resource for cancer omics annotations.,"

Background

The Cancer Genome Atlas (TCGA) is an important data resource for cancer biologists and oncologists. However, a lack of bioinformatics expertise often hinders experimental cancer biologists and oncologists from exploring the TCGA resource. Although a number of tools have been developed for facilitating cancer researchers to utilize the TCGA data, these existing tools cannot fully satisfy the large community of experimental cancer biologists and oncologists without bioinformatics expertise.

Methods

We developed a new web-based tool The Cancer Omics Atlas (TCOA, http://tcoa.cpu.edu.cn ) for fast and straightforward querying of TCGA ""omics"" data.

Results

TCOA provides the querying of gene expression, somatic mutations, microRNA (miRNA) expression, protein expression data based on a single molecule or cancer type. TCOA also provides the querying of expression correlation between gene pairs, miRNA pairs, gene and miRNA, and gene and protein. Moreover, TCOA provides the querying of the associations between gene, miRNA, or protein expression and survival prognosis in cancers. In addition, TCOA displays transcriptional profiles across various human cancer types based on the pan-cancer analysis. Finally, TCOA provides the querying of molecular profiles for 2877 immune-related genes in human cancers. These immune-related genes include those that are established or promising targets for cancer immunotherapy such as CTLA4, PD1, PD-L1, PD-L2, IDO1, LAG3, and TIGIT.

Conclusions

TCOA is a useful tool that supplies a number of unique and new functions complementary to the existing tools to facilitate exploration of the TCGA resource.",2018-08-08 +32750793,Index Networks.,"We show that existing upsampling operators in convolutional networks can be unified using the notion of the index function. This notion is inspired by an observation in the decoding process of deep image matting where indices-guided unpooling can often recover boundary details considerably better than other upsampling operators such as bilinear interpolation. By viewing the indices as a function of the feature map, we introduce the concept of 'learning to index', and present a novel index-guided encoder-decoder framework where indices are learned adaptively from data and are used to guide downsampling and upsampling stages, without extra training supervision. At the core of this framework is a new learnable module, termed Index Network (IndexNet), which dynamically generates indices conditioned on the feature map. IndexNet can be used as a plug-in, applicable to almost all convolutional networks that have coupled downsampling and upsampling stages, enabling the networks to dynamically capture variations of local patterns. In particular, we instantiate and investigate five families of IndexNet. We highlight their superiority in delivering spatial information over other upsampling operators with experiments on synthetic data, and demonstrate their effectiveness on four dense prediction tasks, including image matting, image denoising, semantic segmentation, and monocular depth estimation. Code and models are available at https://git.io/IndexNet.",2021-12-07 +29057095,"SalmoNet, an integrated network of ten Salmonella enterica strains reveals common and distinct pathways to host adaptation.","Salmonella enterica is a prominent bacterial pathogen with implications on human and animal health. Salmonella serovars could be classified as gastro-intestinal or extra-intestinal. Genome-wide comparisons revealed that extra-intestinal strains are closer relatives of gastro-intestinal strains than to each other indicating a parallel evolution of this trait. Given the complexity of the differences, a systems-level comparison could reveal key mechanisms enabling extra-intestinal serovars to cause systemic infections. Accordingly, in this work, we introduce a unique resource, SalmoNet, which combines manual curation, high-throughput data and computational predictions to provide an integrated network for Salmonella at the metabolic, transcriptional regulatory and protein-protein interaction levels. SalmoNet provides the networks separately for five gastro-intestinal and five extra-intestinal strains. As a multi-layered, multi-strain database containing experimental data, SalmoNet is the first dedicated network resource for Salmonella. It comprehensively contains interactions between proteins encoded in Salmonella pathogenicity islands, as well as regulatory mechanisms of metabolic processes with the option to zoom-in and analyze the interactions at specific loci in more detail. Application of SalmoNet is not limited to strain comparisons as it also provides a Salmonella resource for biochemical network modeling, host-pathogen interaction studies, drug discovery, experimental validation of novel interactions, uncovering new pathological mechanisms from emergent properties and epidemiological studies. SalmoNet is available at http://salmonet.org.",2017-10-18 +34558825,Meta-analysis and Consolidation of Farnesoid X Receptor Chromatin Immunoprecipitation Sequencing Data Across Different Species and Conditions.,"Farnesoid X receptor (FXR) is a nuclear receptor that controls gene regulation of different metabolic pathways and represents an upcoming drug target for various liver diseases. Several data sets on genome-wide FXR binding in different species and conditions exist. We have previously reported that these data sets are heterogeneous and do not cover the full spectrum of potential FXR binding sites. Here, we report the first meta-analysis of all publicly available FXR chromatin immunoprecipitation sequencing (ChIP-seq) data sets from mouse, rat, and human across different conditions using a newly generated analysis pipeline. All publicly available single data sets were biocurated in a standardized manner and compared on every relevant level from raw reads to affected functional pathways. Individual murine data sets were then virtually merged into a single unique ""FXR binding atlas"" spanning all potential binding sites across various conditions. Comparison of the single biocurated data sets showed that the overlap of FXR binding sites between different species is modest and ranges from 48% (mouse-human) to 55% (mouse-rat). Moreover, in vivo data among different species are more similar than human in vivo data compared to human in vitro data. The consolidated murine global FXR binding atlas virtually increases sequencing depth and allows recovering more and novel potential binding sites and signaling pathways that were missed in the individual data sets. The FXR binding atlas is publicly searchable (https://fxratlas.tugraz.at). Conclusion: Published single FXR ChIP-seq data sets and large-scale integrated omics data sets do not cover the full spectrum of FXR binding. Combining different individual data sets and creating an ""FXR super-binding atlas"" enhances understanding of FXR signaling capacities across different conditions. This is important when considering the potential wide spectrum for drugs targeting FXR in liver diseases.",2021-07-01 +34688174,MIcro-surgical anastomose workflow recognition challenge report.,"

Background and objective

Automatic surgical workflow recognition is an essential step in developing context-aware computer-assisted surgical systems. Video recordings of surgeries are becoming widely accessible, as the operational field view is captured during laparoscopic surgeries. Head and ceiling mounted cameras are also increasingly being used to record videos in open surgeries. This makes videos a common choice in surgical workflow recognition. Additional modalities, such as kinematic data captured during robot-assisted surgeries, could also improve workflow recognition. This paper presents the design and results of the MIcro-Surgical Anastomose Workflow recognition on training sessions (MISAW) challenge whose objective was to develop workflow recognition models based on kinematic data and/or videos.

Methods

The MISAW challenge provided a data set of 27 sequences of micro-surgical anastomosis on artificial blood vessels. This data set was composed of videos, kinematics, and workflow annotations. The latter described the sequences at three different granularity levels: phase, step, and activity. Four tasks were proposed to the participants: three of them were related to the recognition of surgical workflow at three different granularity levels, while the last one addressed the recognition of all granularity levels in the same model. We used the average application-dependent balanced accuracy (AD-Accuracy) as the evaluation metric. This takes unbalanced classes into account and it is more clinically relevant than a frame-by-frame score.

Results

Six teams participated in at least one task. All models employed deep learning models, such as convolutional neural networks (CNN), recurrent neural networks (RNN), or a combination of both. The best models achieved accuracy above 95%, 80%, 60%, and 75% respectively for recognition of phases, steps, activities, and multi-granularity. The RNN-based models outperformed the CNN-based ones as well as the dedicated modality models compared to the multi-granularity except for activity recognition.

Conclusion

For high levels of granularity, the best models had a recognition rate that may be sufficient for applications such as prediction of remaining surgical time. However, for activities, the recognition rate was still low for applications that can be employed clinically. The MISAW data set is publicly available at http://www.synapse.org/MISAW to encourage further research in surgical workflow recognition.",2021-10-10 +33360695,A data-driven integrative platform for computational prediction of toxin biotransformation with a case study.,"Recently, biogenic toxins have received increasing attention owing to their high contamination levels in feed and food as well as in the environment. However, there is a lack of an integrative platform for seamless linking of data-driven computational methods with 'wet' experimental validations. To this end, we constructed a novel platform that integrates the technical aspects of toxin biotransformation methods. First, a biogenic toxin database termed ToxinDB (http://www.rxnfinder.org/toxindb/), containing multifaceted data on more than 4836 toxins, was built. Next, more than 8000 biotransformation reaction rules were extracted from over 300,000 biochemical reactions extracted from ~580,000 literature reports curated by more than 100 people over the past decade. Based on these reaction rules, a toxin biotransformation prediction model was constructed. Finally, the global chemical space of biogenic toxins was constructed, comprising ~550,000 toxins and putative toxin metabolites, of which 94.7% of the metabolites have not been previously reported. Additionally, we performed a case study to investigate citrinin metabolism in Trichoderma, and a novel metabolite was identified with the assistance of the biotransformation prediction tool of ToxinDB. This unique integrative platform will assist exploration of the 'dark matter' of a toxin's metabolome and promote the discovery of detoxification enzymes.",2020-12-11 +33514395,My personal mutanome: a computational genomic medicine platform for searching network perturbing alleles linking genotype to phenotype.,"Massive genome sequencing data have inspired new challenges in personalized treatments and facilitated oncological drug discovery. We present a comprehensive database, My Personal Mutanome (MPM), for accelerating the development of precision cancer medicine protocols. MPM contains 490,245 mutations from over 10,800 tumor exomes across 33 cancer types in The Cancer Genome Atlas mapped to 94,563 structure-resolved/predicted protein-protein interaction interfaces (""edgetic"") and 311,022 functional sites (""nodetic""), including ligand-protein binding sites and 8 types of protein posttranslational modifications. In total, 8884 survival results and 1,271,132 drug responses are obtained for these mapped interactions. MPM is available at https://mutanome.lerner.ccf.org .",2021-01-29 +,"First Report of Prunus domestica as the Host of a Phytoplasma Belonging to Group 16SrI, Subgroup B/L","In July 2016 and 2017, diseased plum trees (Prunus domestica L.) were observed in the orchard localized in the Wielkopolska region of Poland. The symptoms of witches’ brooms with internode shortening, axillary bud growth, and reduced leaf size indicated a potential phytoplasma infection. Samples of shortened shoots were collected from five symptomatic and five asymptomatic plants, respectively. DNA samples were extracted from 2 g of leaf tissues using a modified cetyltrimethylammonium bromide procedure (Maixner et al. 1995) and subsequently used as templates in nested polymerase chain reaction (PCR) for the amplification of genes encoding 16S rRNA; ribosomal proteins (rp) S19, L22, and S3 and elongation factor Tu (tuf) with primers P1/P7 followed by R16F2n/R16R2; rpF1/rpR1 followed by rp(I)F1A/rp(I)R1A and fTuf1/rTuf1 followed by fTufAY/rTufAY, respectively (Duduk et al. 2013; Martini et al. 2007; Schneider et al. 1997). Amplicons of correct sizes (1.2, 1.2, and 0.9 kb) were obtained from all the diseased plants tested (samples Plum1 to Plum5), respectively. No PCR products were obtained from the asymptomatic samples. All PCR products were ligated into pGEM-T Easy Vector Systems (Promega), and plasmid DNA was sequenced by an external company (Genomed, Warsaw, Poland). The 16S rDNA, tuf, and rp genes coding sequences from Plum1 to Plum5 were identical, and therefore one representative sequence of each region was deposited in GenBank under accession numbers MH061193, MH061368, and MH061366, respectively. The R16F2n/R16R2 (MH061193) primed fragment was subjected to in silico restriction digestion using iPhyClassifier, the online tool for phytoplasma classification (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi). The collective restriction fragment length polymorphism patterns indicated that diseased plants were infected by ‘Candidatus Phytoplasma asteris’ of subgroup 16SrI-L. Next, the 385-bp barcode fragments of the EF-Tu gene from representatives of different phytoplasma groups were retrieved from the GenBank database. The phylogenetic analysis was performed based on those sequences and the tuf sequence obtained in this study using the neighbor joining algorithm of MEGA 7 software (https://www.megasoftware.net/). The 1.2-kbp DNA segment containing rp genes was also analyzed. In the phylogenetic trees, the Polish isolate of plum witches’ broom phytoplasma clustered together with ‘Ca. P. asteris’ representatives of ribosomal subgroup 16SrI-B and 16SrI-B/L. Based on all the performed analyses, the plum witches’ broom phytoplasma (PlumWB) was classified as a member of group 16SrI, subgroup B/L, which consists of two heterogeneous 16Sr RNA genes (Jomantiene et al. 2010). To our knowledge, this is the first report of plums infected with ‘Ca. P. asteris’ representing subgroup I-B/L in Poland, suggesting that fruit trees can be perennial reservoirs of the phytoplasma, which has great significance in the pathogen’s epidemiology.",2019-01-01 +34895022,"Awareness and Correlates of HIV Pre-Exposure Prophylaxis (PrEP) Among HIV-negative People Who Access Syringe Services in Seattle, Washington.","

Background

HIV pre-exposure prophylaxis (PrEP) is safe and effective for use in people who inject drugs (PWID), but PrEP is underutilized in this population. We assessed awareness of PrEP and correlates of interest in PrEP among PWID in Seattle, Washington.

Methods

This study analyzed data from a 2019 survey of PWID at 3 Seattle-area syringe service programs (SSPs). We used descriptive statistics to compare PrEP-aware and unaware PWID and multivariable Poisson regression with robust standard errors to estimate adjusted prevalence ratios (APR) for interest in PrEP.

Results

Among 348 HIV-negative PWID, ≤1% were currently taking PrEP, 51% were PrEP aware and 46% were interested in PrEP. Interest in PrEP was inversely associated with prior PrEP awareness (APR 0.58, 95% CI 0.45 - 0.74); however, interest in PrEP was high among PWID meeting pre-specified risk criteria for HIV (APR 1.41, 95% CI 1.06 - 1.88).

Conclusions

Our results suggest increasing awareness of PrEP may not be sufficient to promote PrEP uptake among PWID, and further efforts are needed to understand perceptions of risk for HIV, determinants of PrEP use, and to investigate successful strategies for PrEP implementation and delivery in this marginalized population.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.2012688 .",2021-12-13 +33444134,Generating Synthetic Labeled Data From Existing Anatomical Models: An Example With Echocardiography Segmentation.,"Deep learning can bring time savings and increased reproducibility to medical image analysis. However, acquiring training data is challenging due to the time-intensive nature of labeling and high inter-observer variability in annotations. Rather than labeling images, in this work we propose an alternative pipeline where images are generated from existing high-quality annotations using generative adversarial networks (GANs). Annotations are derived automatically from previously built anatomical models and are transformed into realistic synthetic ultrasound images with paired labels using a CycleGAN. We demonstrate the pipeline by generating synthetic 2D echocardiography images to compare with existing deep learning ultrasound segmentation datasets. A convolutional neural network is trained to segment the left ventricle and left atrium using only synthetic images. Networks trained with synthetic images were extensively tested on four different unseen datasets of real images with median Dice scores of 91, 90, 88, and 87 for left ventricle segmentation. These results match or are better than inter-observer results measured on real ultrasound datasets and are comparable to a network trained on a separate set of real images. Results demonstrate the images produced can effectively be used in place of real data for training. The proposed pipeline opens the door for automatic generation of training data for many tasks in medical imaging as the same process can be applied to other segmentation or landmark detection tasks in any modality. The source code and anatomical models are available to other researchers.1 1https://adgilbert.github.io/data-generation/.",2021-09-30 +33655207,TMSNP: a web server to predict pathogenesis of missense mutations in the transmembrane region of membrane proteins.,"The massive amount of data generated from genome sequencing brings tons of newly identified mutations, whose pathogenic/non-pathogenic effects need to be evaluated. This has given rise to several mutation predictor tools that, in general, do not consider the specificities of the various protein groups. We aimed to develop a predictor tool dedicated to membrane proteins, under the premise that their specific structural features and environment would give different responses to mutations compared to globular proteins. For this purpose, we created TMSNP, a database that currently contains information from 2624 pathogenic and 196 705 non-pathogenic reported mutations located in the transmembrane region of membrane proteins. By computing various conservation parameters on these mutations in combination with annotations, we trained a machine-learning model able to classify mutations as pathogenic or not. TMSNP (freely available at http://lmc.uab.es/tmsnp/) improves considerably the prediction power of commonly used mutation predictors trained with globular proteins.",2021-02-23 +33645624,"An update of KAIKObase, the silkworm genome database. ","KAIKObase was established in 2009 as the genome database of the domesticated silkworm Bombyx mori. It provides several gene sets and genetic maps as well as genome annotation obtained from the sequencing project of the International Silkworm Genome Consortium in 2008. KAIKObase has been used widely for silkworm and insect studies even though there are some erroneous predicted genes due to misassembly and gaps in the genome. In 2019, we released a new silkworm genome assembly, showing improvements in gap closure and covering more and longer gene models. Therefore, there is a need to include new genome and new gene models to KAIKObase. In this article, we present the updated contents of KAIKObase and the methods to generate, integrate and analyze the data sets. Database URL: https://kaikobase.dna.affrc.go.jp.",2021-02-01 +33010178,BacWGSTdb 2.0: a one-stop repository for bacterial whole-genome sequence typing and source tracking.,"An increasing prevalence of hospital acquired infections and foodborne illnesses caused by pathogenic and multidrug-resistant bacteria has stimulated a pressing need for benchtop computational techniques to rapidly and accurately classify bacteria from genomic sequence data, and based on that, to trace the source of infection. BacWGSTdb (http://bacdb.org/BacWGSTdb) is a free publicly accessible database we have developed for bacterial whole-genome sequence typing and source tracking. This database incorporates extensive resources for bacterial genome sequencing data and the corresponding metadata, combined with specialized bioinformatics tools that enable the systematic characterization of the bacterial isolates recovered from infections. Here, we present BacWGSTdb 2.0, which encompasses several major updates, including (i) the integration of the core genome multi-locus sequence typing (cgMLST) approach, which is highly scalable and appropriate for typing isolates belonging to different lineages; (ii) the addition of a multiple genome analysis module that can process dozens of user uploaded sequences in a batch mode; (iii) a new source tracking module for comparing user uploaded plasmid sequences to those deposited in the public databases; (iv) the number of species encompassed in BacWGSTdb 2.0 has increased from 9 to 20, which represents bacterial pathogens of medical importance; (v) a newly designed, user-friendly interface and a set of visualization tools for providing a convenient platform for users are also included. Overall, the updated BacWGSTdb 2.0 bears great utility in continuing to provide users, including epidemiologists, clinicians and bench scientists, with a one-stop solution to bacterial genome sequence analysis.",2021-01-01 +30184150,TRCirc: a resource for transcriptional regulation information of circRNAs.,"In recent years, high-throughput genomic technologies like chromatin immunoprecipitation sequencing (ChIp-seq) and transcriptome sequencing (RNA-seq) have been becoming both more refined and less expensive, making them more accessible. Many circular RNAs (circRNAs) that originate from back-spliced exons have been identified in various cell lines across different species. However, the regulatory mechanism for transcription of circRNAs remains unclear. Therefore, there is an urgent need to construct a database detailing the transcriptional regulation of circRNAs. TRCirc (http://www.licpathway.net/TRCirc) provides a resource for efficient retrieval, browsing and visualization of transcriptional regulation information of circRNAs. The current version of TRCirc documents 92 375 circRNAs and 161 transcription factors (TFs) from more than 100 cell types and together represent more than 765 000 TF-circRNA regulatory relationships. Furthermore, TRCirc provides other regulatory information about transcription of circRNAs, including their expression, methylation levels, H3K27ac signals in regulation regions and super-enhancers associated with circRNAs. TRCirc provides a convenient, user-friendly interface to search, browse and visualize detailed information about these circRNAs.",2019-11-01 +33009809,Detection of mobile genetic elements associated with antibiotic resistance in Salmonella enterica using a newly developed web tool: MobileElementFinder.,"

Objectives

Antimicrobial resistance (AMR) in clinically relevant bacteria is a growing threat to public health globally. In these bacteria, antimicrobial resistance genes are often associated with mobile genetic elements (MGEs), which promote their mobility, enabling them to rapidly spread throughout a bacterial community.

Methods

The tool MobileElementFinder was developed to enable rapid detection of MGEs and their genetic context in assembled sequence data. MGEs are detected based on sequence similarity to a database of 4452 known elements augmented with annotation of resistance genes, virulence factors and detection of plasmids.

Results

MobileElementFinder was applied to analyse the mobilome of 1725 sequenced Salmonella enterica isolates of animal origin from Denmark, Germany and the USA. We found that the MGEs were seemingly conserved according to multilocus ST and not restricted to either the host or the country of origin. Moreover, we identified putative translocatable units for specific aminoglycoside, sulphonamide and tetracycline genes. Several putative composite transposons were predicted that could mobilize, among others, AMR, metal resistance and phosphodiesterase genes associated with macrophage survivability. This is, to our knowledge, the first time the phosphodiesterase-like pdeL has been found to be potentially mobilized into S. enterica.

Conclusions

MobileElementFinder is a powerful tool to study the epidemiology of MGEs in a large number of genome sequences and to determine the potential for genomic plasticity of bacteria. This web service provides a convenient method of detecting MGEs in assembled sequence data. MobileElementFinder can be accessed at https://cge.cbs.dtu.dk/services/MobileElementFinder/.",2021-01-01 +33264402,HERB: a high-throughput experiment- and reference-guided database of traditional Chinese medicine.,"Pharmacotranscriptomics has become a powerful approach for evaluating the therapeutic efficacy of drugs and discovering new drug targets. Recently, studies of traditional Chinese medicine (TCM) have increasingly turned to high-throughput transcriptomic screens for molecular effects of herbs/ingredients. And numerous studies have examined gene targets for herbs/ingredients, and link herbs/ingredients to various modern diseases. However, there is currently no systematic database organizing these data for TCM. Therefore, we built HERB, a high-throughput experiment- and reference-guided database of TCM, with its Chinese name as BenCaoZuJian. We re-analyzed 6164 gene expression profiles from 1037 high-throughput experiments evaluating TCM herbs/ingredients, and generated connections between TCM herbs/ingredients and 2837 modern drugs by mapping the comprehensive pharmacotranscriptomics dataset in HERB to CMap, the largest such dataset for modern drugs. Moreover, we manually curated 1241 gene targets and 494 modern diseases for 473 herbs/ingredients from 1966 references published recently, and cross-referenced this novel information to databases containing such data for drugs. Together with database mining and statistical inference, we linked 12 933 targets and 28 212 diseases to 7263 herbs and 49 258 ingredients and provided six pairwise relationships among them in HERB. In summary, HERB will intensively support the modernization of TCM and guide rational modern drug discovery efforts. And it is accessible through http://herb.ac.cn/.",2021-01-01 +34882502,"Development and Evaluation of a Holistic and Mechanistic Modeling Framework for Chemical Emissions, Fate, Exposure, and Risk.","

Background

Large numbers of chemicals require evaluation to determine if their production and use pose potential risks to ecological and human health. For most chemicals, the inadequacy and uncertainty of chemical-specific data severely limit the application of exposure- and risk-based methods for screening-level assessments, priority setting, and effective management.

Objective

We developed and evaluated a holistic, mechanistic modeling framework for ecological and human health assessments to support the safe and sustainable production, use, and disposal of organic chemicals.

Methods

We consolidated various models for simulating the PROduction-To-EXposure (PROTEX) continuum with empirical data sets and models for predicting chemical property and use function information to enable high-throughput (HT) exposure and risk estimation. The new PROTEX-HT framework calculates exposure and risk by integrating mechanistic computational modules describing chemical behavior and fate in the socioeconomic system (i.e., life cycle emissions), natural and indoor environments, various ecological receptors, and humans. PROTEX-HT requires only molecular structure and chemical tonnage (i.e., annual production or consumption volume) as input information. We evaluated the PROTEX-HT framework using 95 organic chemicals commercialized in the United States and demonstrated its application in various exposure and risk assessment contexts.

Results

Seventy-nine percent and 97% of the PROTEX-HT human exposure predictions were within one and two orders of magnitude, respectively, of independent human exposure estimates inferred from biomonitoring data. PROTEX-HT supported screening and ranking chemicals based on various exposure and risk metrics, setting chemical-specific maximum allowable tonnage based on user-defined toxicological thresholds, and identifying the most relevant emission sources, environmental media, and exposure routes of concern in the PROTEX continuum. The case study shows that high chemical tonnage did not necessarily result in high exposure or health risks.

Conclusion

Requiring only two chemical-specific pieces of information, PROTEX-HT enables efficient screening-level evaluations of existing and premanufacture chemicals in various exposure- and risk-based contexts. https://doi.org/10.1289/EHP9372.",2021-12-09 +33342295,Markedness and implicational relationships in phonological development: A cross-linguistic investigation.,"

Purpose

The complexity approach to speech disorders, based on the theoretical notion of phonological markedness, has been gaining interest over the last decade. In a nutshell, this approach suggests that the acquisition of phonologically marked units (e.g. complex onsets) implies the acquisition of less marked ones (e.g. singleton onsets). However, because the notion of markedness is, itself, subject to controversies, we need to constrain what types of implications can be generalised among language learners, within and across languages.

Method

We report on longitudinal data from one phonologically-disordered and five typically-developing children documented across four different languages (English, French, German, Portuguese), using data from the PhonBank database (https://phonbank.talkbank.org). Using the Phon software program (https://www.phon.ca), we systematically analysed each longitudinal study for consonants in singleton onsets and codas as well as in onset clusters.

Result

The implicational relationships supported by our study involve units of similar types (e.g. relations between different segmental categories), while relationships that involve different types of units or processes cannot be generalised across learners.

Conclusion

A better understanding of implicational relationships makes the complexity approach more predictive of developmental patterns of phonology and related phonological disorders.",2020-12-20 +30639529,Catalysing the way towards antimicrobial effectiveness: A systematic analysis and a new online resource for antimicrobial-enzyme combinations against Pseudomonas aeruginosa and Staphylococcus aureus.,"Growing antimicrobial resistance and the resilience of biofilm infections have led researchers to study the potential of antimicrobial combinations, including those incorporating enzymes with biofilm-disrupting abilities. This work aimed to evaluate the journey of antimicrobial-enzyme combination research and to gain insights into its current status and most promising leads. Expert curators annotated and analysed all published experimental data on enzyme-containing combinations for two major biofilm-forming pathogens, namely Pseudomonas aeruginosa and Staphylococcus aureus. This entailed the construction of the first publicly accessible online database on antimicrobial-enzyme combinations, the Antimicrobial Enzyme Combinations Database (https://www.ceb.uminho.pt/aecd). Gathered data were also reconstructed as knowledge networks to help analyse and visualise annotated entities (e.g. enzymes, methods, strains, combination outputs). The database currently holds 122 and 206 annotated combinations for P. aeruginosa and S. aureus, respectively, and their analysis allowed a systematic review of the available evidence on enzyme combinations, reliably illustrating the studies being performed. The most tested enzymes (e.g. lysozyme, DNase, lysostaphin) were scrutinised and the rationale behind each combination was explained. This research area is still growing although current research gaps/opportunities were identified, such as lack of biofilm testing and studies on polymicrobial scenarios. Hopefully, this work will shed light on the synergistic potential of enzyme combinations and alleviate some of the time- and resource-consuming tasks related to enzyme combination research by helping the selection and design of new enzyme-related therapeutic options for P. aeruginosa and S. aureus infections.",2019-01-09 +,Automated Personalized Feedback Improves Learning Gains in An Intelligent Tutoring System,"We investigate how automated, data-driven, personalized feedback in a large-scale intelligent tutoring system (ITS) improves student learning outcomes. We propose a machine learning approach to generate personalized feedback, which takes individual needs of students into account. We utilize state-of-the-art machine learning and natural language processing techniques to provide the students with personalized hints, Wikipedia-based explanations, and mathematical hints. Our model is used in Korbit (https://www.korbit.ai), a large-scale dialogue-based ITS with thousands of students launched in 2019, and we demonstrate that the personalized feedback leads to considerable improvement in student learning outcomes and in the subjective evaluation of the feedback.",2020-06-10 +33432018,Identifying intracellular signaling modules and exploring pathways associated with breast cancer recurrence.,"Exploring complex modularization of intracellular signal transduction pathways is critical to understanding aberrant cellular responses during disease development and drug treatment. IMPALA (Inferred Modularization of PAthway LAndscapes) integrates information from high throughput gene expression experiments and genome-scale knowledge databases to identify aberrant pathway modules, thereby providing a powerful sampling strategy to reconstruct and explore pathway landscapes. Here IMPALA identifies pathway modules associated with breast cancer recurrence and Tamoxifen resistance. Focusing on estrogen-receptor (ER) signaling, IMPALA identifies alternative pathways from gene expression data of Tamoxifen treated ER positive breast cancer patient samples. These pathways were often interconnected through cytoplasmic genes such as IRS1/2, JAK1, YWHAZ, CSNK2A1, MAPK1 and HSP90AA1 and significantly enriched with ErbB, MAPK, and JAK-STAT signaling components. Characterization of the pathway landscape revealed key modules associated with ER signaling and with cell cycle and apoptosis signaling. We validated IMPALA-identified pathway modules using data from four different breast cancer cell lines including sensitive and resistant models to Tamoxifen. Results showed that a majority of genes in cell cycle/apoptosis modules that were up-regulated in breast cancer patients with short survivals (< 5 years) were also over-expressed in drug resistant cell lines, whereas the transcription factors JUN, FOS, and STAT3 were down-regulated in both patient and drug resistant cell lines. Hence, IMPALA identified pathways were associated with Tamoxifen resistance and an increased risk of breast cancer recurrence. The IMPALA package is available at https://dlrl.ece.vt.edu/software/ .",2021-01-11 +34634444,Yeast cell segmentation in microstructured environments with deep learning.,"Cell segmentation is a major bottleneck in extracting quantitative single-cell information from microscopy data. The challenge is exasperated in the setting of microstructured environments. While deep learning approaches have proven useful for general cell segmentation tasks, previously available segmentation tools for the yeast-microstructure setting rely on traditional machine learning approaches. Here we present convolutional neural networks trained for multiclass segmenting of individual yeast cells and discerning these from cell-similar microstructures. An U-Net based semantic segmentation approach, as well as a direct instance segmentation approach with a Mask R-CNN are demonstrated. We give an overview of the datasets recorded for training, validating and testing the networks, as well as a typical use-case. We showcase the methods' contribution to segmenting yeast in microstructured environments with a typical systems or synthetic biology application. The models achieve robust segmentation results, outperforming the previous state-of-the-art in both accuracy and speed. The combination of fast and accurate segmentation is not only beneficial for a posteriori data processing, it also makes online monitoring of thousands of trapped cells or closed-loop optimal experimental design feasible from an image processing perspective. Code is and data samples are available at https://git.rwth-aachen.de/bcs/projects/tp/multiclass-yeast-seg.",2021-10-09 +33203359,FishDB: an integrated functional genomics database for fishes.,"

Background

Hundreds of genomes and transcriptomes of fish species have been sequenced in recent years. However, fish scholarship currently lacks a comprehensive, integrated, and up-to-date collection of fish genomic data.

Results

Here we present FishDB, the first database for fish multi-level omics data, available online at http://fishdb.ihb.ac.cn . The database contains 233 fish genomes, 201 fish transcriptomes, 5841 fish mitochondrial genomes, 88 fish gene sets, 16,239 miRNAs of 65 fishes, 1,330,692 piRNAs and 4852 lncRNAs of Danio rerio, 59,040 Mb untranslated regions (UTR) of 230 fishes, and 31,918 Mb coding sequences (CDS) of 230 fishes. Among these, we newly generated a total of 11 fish genomes and 53 fish transcriptomes.

Conclusions

This release contains over 410,721.67 Mb sequences and provides search functionality, a BLAST server, JBrowse, and PrimerServer modules.",2020-11-17 +31549018,SoyCSN: Soybean context-specific network analysis and prediction based on tissue-specific transcriptome data.,"The Soybean Gene Atlas project provides a comprehensive map for understanding gene expression patterns in major soybean tissues from flower, root, leaf, nodule, seed, and shoot and stem. The RNA-Seq data generated in the project serve as a valuable resource for discovering tissue-specific transcriptome behavior of soybean genes in different tissues. We developed a computational pipeline for Soybean context-specific network (SoyCSN) inference with a suite of prediction tools to analyze, annotate, retrieve, and visualize soybean context-specific networks at both transcriptome and interactome levels. BicMix and Cross-Conditions Cluster Detection algorithms were applied to detect modules based on co-expression relationships across all the tissues. Soybean context-specific interactomes were predicted by combining soybean tissue gene expression and protein-protein interaction data. Functional analyses of these predicted networks provide insights into soybean tissue specificities. For example, under symbiotic, nitrogen-fixing conditions, the constructed soybean leaf network highlights the connection between the photosynthesis function and rhizobium-legume symbiosis. SoyCSN data and all its results are publicly available via an interactive web service within the Soybean Knowledge Base (SoyKB) at http://soykb.org/SoyCSN. SoyCSN provides a useful web-based access for exploring context specificities systematically in gene regulatory mechanisms and gene relationships for soybean researchers and molecular breeders.",2019-09-17 +34967842,"Impact of Nationwide Lockdowns Resulting from The First Wave of the COVID-19 Pandemic on Food Intake, Eating Behaviours and Diet Quality: A Systematic Review. ","The lockdowns resulting from the first wave of the COVID-19 pandemic impacted deeply on all life activities, including diet. We performed a systematic review to investigate changes in food intake, eating behaviours and diet quality during lockdown as compared to before. A literature search was performed using three electronic databases from inception until June 13, 2021. Observational studies evaluating changes in general populations during the COVID-19 pandemic lockdown were eligible. Out of 1,963 studies achieved from the search strategy, 95 met inclusion criteria (85 on adults, 10 on children/adolescents), and the majority were of high quality (72.6%). Most of the studies were web-based surveys using convenience sampling, mainly focused on variations in the consumption of foods and eating behaviours during lockdown, whereas only 15 studies analysed diet quality through dietary indices. On the basis of the definition of a healthful diet as reflected by a traditional Mediterranean diet, an increase in recommended foods such as fruit and vegetables, legumes, cereals and olive oil was observed, although a sharp decrease in fish intake and an increase in dairy products were documented. Accordingly, a reduction in foods that should be eaten less frequently was reported, namely, red and processed meat. However, a higher consumption of unhealthy foods (e.g., snacks and sweets) was also observed. Results indicated improved diet quality in Europe, especially among Mediterranean countries, with the exception of France, while a switching to poor nutrient patterns was observed in Colombia and Saudi Arabia. Analyses of eating behaviours suggest an increase in food intake, number of daily meals and snacking. In conclusion, changes in intake of major food groups, apart from fish intake, were in line with the definition of a traditional Mediterranean diet, indicating a consistent moderate improvement of dietary habits worldwide. This review protocol was registered at https://www.crd.york.ac.uk/prospero/ as CRD42020225292.",2021-12-30 +,"Expression of Pinellia ternata leaf agglutinin under rolC promoter confers resistance against a phytophagous sap sucking aphid, Myzus persicae","Piercing/sucking insect pests in the order Hemiptera causes substantial crop losses by removing photoassimilates and transmitting viruses to their host plants. Cloning and heterologous expression of plant-derived insect resistance genes is a promising approach to control aphids and other sap-sucking insect pests. While expression from the constitutive 35S promoter provides broad protection, the phloem-specific rolC promoter provides better defense against sap sucking insects. The selection of plant-derived insect resistance genes for expression in crop species will minimize bio-safety concerns.Pinellia ternata leaf agglutinin gene (pta), encodes an insecticidal lectin, was isolated and cloned under the 35S and rolC promoters in the pGA482 plant transformation vector for Agrobacterium-mediated tobacco transformation. Integration and expression of the transgene was validated by Southern blotting and qRT-PCR, respectively. Insect bioassays data of transgenic tobacco plants showed that expression of pta under rolC promoter caused 100% aphid mortality and reduced aphid fecundity up to 70% in transgenic tobacco line LRP-9. These results highlight the better effectivity of pta under rolC promoter to control phloem feeders, aphids.These findings suggested the potential of PTA against aphids and other sap sucking insect pests. Evaluation of gene in tobacco under two different promoters; 35S constitutive promoter and rolC phloem-specific promoter could be successfully use for other crop plants particularly in cotton. Development of transgenic cotton plants using plant-derived insecticidal, PTA, would be key step towards commercialization of environmentally safe insect-resistant crops.Umer N, Naqvi RZ, Rauf I, et al. Expression of Pinellia ternata leaf agglutinin under rolC promoter confers resistance against a phytophagous sap sucking aphid, Myzus persicae. Electron J Biotechnol 2020;47. https://doi.org/10.1016/j.ejbt.2020.07.004.",2020-09-01 +,Spectral Clustering by Subspace Randomization and Graph Fusion for High-Dimensional Data,"Subspace clustering has been gaining increasing attention in recent years due to its promising ability in dealing with high-dimensional data. However, most of the existing subspace clustering methods tend to only exploit the subspace information to construct a single affinity graph (typically for spectral clustering), which often lack the ability to go beyond a single graph to explore multiple graphs built in various subspaces in high-dimensional space. To address this, this paper presents a new spectral clustering approach based on subspace randomization and graph fusion (SC-SRGF) for high-dimensional data. In particular, a set of random subspaces are first generated by performing random sampling on the original feature space. Then, multiple K-nearest neighbor (K-NN) affinity graphs are constructed to capture the local structures in the generated subspaces. To fuse the multiple affinity graphs from multiple subspaces, an iterative similarity network fusion scheme is utilized to achieve a unified graph for the final spectral clustering. Experiments on twelve real-world high-dimensional datasets demonstrate the superiority of the proposed approach. The MATLAB source code is available at https://www.researchgate.net/publication/338864134.",2020-04-17 +34875956,Combined Network Pharmacology and Cytology Experiments to Identify Potential Anti-Breast Cancer Targets and Mechanisms of Delphinidin.,"Delphinidin is a type of anthocyanin monomer with antioxidant, anti-inflammatory, and anti-tumor effects. However, the biological mechanisms underlying its anti-breast cancer activity have not been thoroughly studied. We further studied the effect of delphinidin on breast cancer cells through comprehensive network pharmacology, cellular and molecular experiments. We acquired the know therapeutic targets of delphinidin and obtained differentially expressed genes (DEGs) of breast cancer using RTCGA. We used topological analysis to screen out the 106 core targets of delphinium anti-breast cancer and performed functional analysis. These genes were mainly enriched in the pathways in cancer, Progesterone-mediated oocyte maturation and cell cycle. Then, by taking the intersection of the three analyzed data sets, important core targets (EGFR, TOP2A and PTGS2) were obtained and molecular-docking was performed to validate the result. Additionally, In Vitro experiments, MCF-7 and BT-474 cell proliferation was inhibited in a dose-dependent manner by delphinidin and the expressions of EGFR, TOP2A and PTGS were reduced. Moreover, delphinidin influenced cell cycle, the expressions of cdk1 and cyclin B1 were reduced. Furthermore, delphinidin induced apoptosis by activating the MAPK-Signaling pathway. Collectively, our findings suggested that delphinidin may offer effective approaches in breast cancer prevention and therapy.Supplemental data for this article is available online at http://dx.doi.org/10.1080/01635581.2021.2012582.",2021-12-08 +33196844,dbCNS: A New Database for Conserved Noncoding Sequences.,"We developed dbCNS (http://yamasati.nig.ac.jp/dbcns), a new database for conserved noncoding sequences (CNSs). CNSs exist in many eukaryotes and are assumed to be involved in protein expression control. Version 1 of dbCNS, introduced here, includes a powerful and precise CNS identification pipeline for multiple vertebrate genomes. Mutations in CNSs may induce morphological changes and cause genetic diseases. For this reason, many vertebrate CNSs have been identified, with special reference to primate genomes. We integrated ∼6.9 million CNSs from many vertebrate genomes into dbCNS, which allows users to extract CNSs near genes of interest using keyword searches. In addition to CNSs, dbCNS contains published genome sequences of 161 species. With purposeful taxonomic sampling of genomes, users can employ CNSs as queries to reconstruct CNS alignments and phylogenetic trees, to evaluate CNS modifications, acquisitions, and losses, and to roughly identify species with CNSs having accelerated substitution rates. dbCNS also produces links to dbSNP for searching pathogenic single-nucleotide polymorphisms in human CNSs. Thus, dbCNS connects morphological changes with genetic diseases. A test analysis using 38 gnathostome genomes was accomplished within 30 s. dbCNS results can evaluate CNSs identified by other stand-alone programs using genome-scale data.",2021-04-01 +34878856,Adapting Survey Data Collection to Respond to the COVID-19 Pandemic: Experiences From a Local Health Department.,"The New York City (NYC) Department of Health and Mental Hygiene (""Health Department"") conducts routine surveys to describe the health of NYC residents. During the COVID-19 pandemic, the Health Department adjusted existing surveys and developed new ones to improve our understanding of the impact of the pandemic on physical health, mental health, and social determinants of health and to incorporate more explicit measures of racial inequities. The longstanding Community Health Survey was adapted in 2020 to ask questions about COVID-19 and recruit respondents for a population-based severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) serosurvey. A new survey panel, Healthy NYC, was launched in June 2020 and is being used to collect data on COVID-19, mental health, and social determinants of health. In addition, 7 Health Opinion Polls were conducted from March 2020 through March 2021 to learn about COVID-19-related knowledge, attitudes, and opinions, including vaccine intentions. We describe the contributions that survey data have made to the emergency response in NYC in ways that address COVID-19 and the profound inequities of the pandemic. (Am J Public Health. 2021;111(12):2176-2185. https://doi.org/10.2105/AJPH.2021.306515).",2021-12-01 +35865261,Comparison of Three Methodologies for Removal of Random-Noise-Induced Biases From Second-Order Statistical Parameters of Lidar and Radar Measurements.,"Random-noise-induced biases are inherent issues to the accurate derivation of second-order statistical parameters (e.g., variances, fluxes, energy densities, and power spectra) from lidar and radar measurements. We demonstrate here for the first time an altitude-interleaved method for eliminating such biases, following the original proposals by Gardner and Chu (2020, https://doi.org/10.1364/ao.400375) who demonstrated a time-interleaved method. Interleaving in altitude bins provides two statistically independent samples over the same time period and nearly the same altitude range, thus enabling the replacement of variances that include the noise-induced biases with covariances that are intrinsically free of such biases. Comparing the interleaved method with previous variance subtraction (VS) and spectral proportion (SP) methods using gravity wave potential energy density calculated from Antarctic lidar data and from a forward model, this study finds the accuracy and precision of each method differing in various conditions, each with its own strengths and weakness. VS performs well in high-SNR, yet its accuracy fails at lower-SNR as it often yields negative values. SP is accurate and precise under high-SNR, remaining accurate in worse conditions than VS would, yet develops a positive bias under low-SNR. The interleaved method is accurate in all SNRs but requires a large number of samples to drive random-noise terms in covariances toward zero and to compensate for the reduced precision due to the splitting of return signals. Therefore, selecting the proper bias removal/elimination method for actual signal and sample conditions is crucial in utilizing lidar/radar data, as neglecting this can conceal trends or overstate atmospheric variability.",2021-12-30 +33613153,Nutritional impact of adding a serving of mushrooms to USDA Food Patterns - a dietary modeling analysis. ,"Mushrooms are part of vegetables and are important source of nutrients and bioactive compounds. The objective was to assess the nutritional impact of adding a serving of mushrooms in USDA Food Patterns using a similar approach to that used by USDA for Dietary Guidelines. A composite of commonly consumed raw mushrooms (white, brown/crimini and portabella; at 1:1:1 ratio) and raw speciality mushrooms (oyster mushrooms) were used for modeling. The United States Department of Agriculture (USDA) Food Data central database (https://fdc.nal.usda.gov/) was used to obtain nutrient profiles of mushrooms. Nutritional profiles of USDAs Food Patterns were obtained from the Scientific Report of the 2015 Dietary Guidelines Advisory Committee, Appendix E-3 (https://health.gov/dietaryguidelines/2015-scientific-report/15-appendix-E3/) and dietary modeling was accomplished by adding nutrients from mushrooms. Addition of an 84 g serving of commonly consumed raw mushrooms to USDA Food Patterns resulted in about 1% increase in calories, less than 5% increase in macronutrients, 2-3% increase in fiber, 8-12% increase in potassium, 12-18% increase in riboflavin, 11-26% increase in niacin, 11-23% selenium and 16-26% increase in copper depending upon the pattern type and calorie level. Mushrooms exposed to UV light to increase vitamin D levels to 200 IU/serving also increased vitamin D by 67-90% in USDA Food Patterns. Addition of oyster mushroom also additionally increased 8-11% vitamin D and 10-16% choline in USDA Food Patterns. Addition of mushrooms had minimal effect on sodium (1% or less increase) and no effect on saturated fat or cholesterol in USDA Food Patterns. Based on published data, a serving of commonly consumed mushrooms would also be expected to add 2.2 mg ergothioneine and 3.5 mg glutathione to the USDA Food Patterns. Addition of mushrooms to USDA Food Patterns increased several micronutrients including shortfall nutrients (such as potassium, vitamin D and choline), and had a minimal or no impact on overall calories, sodium or saturated fat.",2021-02-05 +32406920,AnnoLnc2: the one-stop portal to systematically annotate novel lncRNAs for human and mouse.,"With the abundant mammalian lncRNAs identified recently, a comprehensive annotation resource for these novel lncRNAs is an urgent need. Since its first release in November 2016, AnnoLnc has been the only online server for comprehensively annotating novel human lncRNAs on-the-fly. Here, with significant updates to multiple annotation modules, backend datasets and the code base, AnnoLnc2 continues the effort to provide the scientific community with a one-stop online portal for systematically annotating novel human and mouse lncRNAs with a comprehensive functional spectrum covering sequences, structure, expression, regulation, genetic association and evolution. In response to numerous requests from multiple users, a standalone package is also provided for large-scale offline analysis. We believe that updated AnnoLnc2 (http://annolnc.gao-lab.org/) will help both computational and bench biologists identify lncRNA functions and investigate underlying mechanisms.",2020-07-01 +33342083,Crystal structures of a novel family IV esterase in free and substrate-bound form.,"Bacterial lipolytic enzymes of family IV are homologs of the mammalian hormone-sensitive lipases (HSL) and have been successfully used for various biotechnological applications. The broad substrate specificity and ability for enantio-, regio-, and stereoselective hydrolysis are remarkable features of enzymes from this class. Many crystal structures are available for esterases and lipases, but structures of enzyme-substrate or enzyme-inhibitor complexes are less frequent although important to understand the molecular basis of enzyme-substrate interaction and to rationalize biochemical enzyme characteristics. Here, we report on the structures of a novel family IV esterase isolated from a metagenomic screen, which shows a broad substrate specificity. We solved the crystal structures in the apo form and with a bound substrate analogue at 1.35 and 1.81 Å resolution, respectively. This enzyme named PtEst1 hydrolyzed more than 60 out 96 structurally different ester substrates thus being substrate promiscuous. Its broad substrate specificity is in accord with a large active site cavity, which is covered by an α-helical cap domain. The substrate analogue methyl 4-methylumbelliferyl hexylphosphonate was rapidly hydrolyzed by the enzyme leading to a complete inactivation caused by covalent binding of phosphinic acid to the catalytic serine. Interestingly, the alcohol leaving group 4-methylumbelliferone was found remaining in the active site cavity, and additionally, a complete inhibitor molecule was found at the cap domain next to the entrance of the substrate tunnel. This unique situation allowed gaining valuable insights into the role of the cap domain for enzyme-substrate interaction of esterases belonging to family IV. DATABASE: Structural data of PtEst1 are available in the worldwide protein data bank (https://www.rcsb.org) under the accession codes: 6Z68 (apo-PtEst1) and 6Z69 (PtEst1-inhibitor complex).",2021-01-06 +30395270,The European Nucleotide Archive in 2018.,"The European Nucleotide Archive (ENA; https://www.ebi.ac.uk/ena), provided from EMBL-EBI, has for more than three decades been responsible for archiving the world's public sequencing data and presenting this important resource to the scientific community to support and accelerate the global research effort. Here, we outline ENA services and content in 2018 and provide an overview of a selection of focus areas of development work: extending data coordination services around ENA, sequence submissions through template expansion, early pre-submission validation tools and our move towards a new browser and retrieval infrastructure.",2019-01-01 +35136446,"Job Characteristics, Well-Being and Physical Activity: A Field Study Using a Consumer Fitness Tracker.","The relation between job characteristics and health is one of the most important fields of research within work and organizational psychology. Another prominent variable influencing health is physical activity. The physical activity mediated Demand-Control (pamDC) model (Häusser & Mojzisch, 2017, https://doi.org/10.1080/02678373.2017.1303759) combines these health indicators in a new theoretical framework. Based on the pamDC model the current study aims to clarify the role of leisure time physical activity (LTPA) in the interplay of job demands, job control and well-being. We expect physical activity to partially mediate the impact of job characteristics on health. To avoid self-report bias considering physical activity we used a consumer fitness tracker to collect additional data. In total, 104 white-collar workers participated in the study. The results show that job control and job demands could predict well-being in cross-sectional analyses. In longitudinal analyses, this was only the case for job demands. Regarding the proposed mediating effect of LTPA between job characteristics and health, we could not detect a significant mediation in our sample. This was true for both self-reported and objective data on physical activity. This study provides a first step in validating the pamDC model and has implications for future research.",2021-11-30 +,DeltaGen: A Comprehensive Decision Support Tool for Plant Breeders,"In this paper, we introduce a unique new plant breeding decision support software tool DeltaGen, implemented in R and its package Shiny. DeltaGen provides plant breeders with a single integrated solution for experimental design generation, data quality control, statistical and quantitative genetic analyses, breeding strategy evaluation, simulation, and cost analysis, pattern analysis, index selection, and underlying basic theory on quantitative genetics. Key analysis procedures in DeltaGen were demonstrated using three datasets generated from forage breeding trials in Australia, New Zealand, and the United States. Analyses of the perennial ryegrass seasonal growth data in Case Study 1 was based on residual maximum likelihood analysis and pattern analysis. A graphical summary of the performance of entries across locations was generated, and entries with specific and broad adaptation were identified. The quantitative genetic analysis and breeding method simulation procedures applied to the perennial ryegrass half‐sib (HS) family data in Case Study 2 enabled estimation of quantitative genetic parameters, prediction of genetic gain, and calculation of costs per selection cycle. These results enabled comparison of three breeding methods, which also included genomic selection, and their simulation. Data from Case Study 3 were analyzed to investigate a multivariate approach to identify HS families of switchgrass with breeding values that would enable an increase in biomass dry matter yield (DMY) and cell wall ethanol (CWE) and a decrease in Klason lignin (KL). The Smith–Hazel index developed enabled identification of HS families with genetic worth for increasing DMY and CWE and reducing KL, in contrast with individual trait selection. Analysis of the datasets in all three case studies provides a snapshot of the key analyses available within DeltaGen. This software tool could also be used as a teaching resource in plant breeding courses. DeltaGen is available as freeware at http://agrubuntu.cloudapp.net/PlantBreedingTool/",2018-05-01 +33180965,Minocycline in neurodegenerative and psychiatric diseases: An update.,"

Background and purpose

Minocycline is a broad-spectrum antibiotic, effective as a chronic treatment for recurrent bacterial infections. Beyond its antibiotic action, minocycline also has important anti-inflammatory, antioxidant and antiapoptotic properties. Its efficacy has therefore been evaluated in many neurodegenerative and psychiatric diseases that have an inflammatory basis. Our aim was to review preclinical and clinical studies performed in neurological and psychiatric diseases whose treatment involved the use of minocycline and thereby to discern the possible beneficial effect of minocycline in these disorders.

Methods

Completed and ongoing preclinical studies and clinical trials of minocycline for both neurodegenerative diseases and psychiatric disorders, published from January 1995 to January 2020, were identified through searching relevant databases (https://www.ncbi.nlm.nih.gov/pubmed/, https://clinicaltrials.gov/). A total of 74 preclinical studies and 44 clinical trials and open-label studies were selected.

Results

The results of the nearly 20 years of research identified are diverse. While minocycline mostly proved to be effective in animal models, clinical results showed divergent outcomes, with positive results in some studies counterbalanced by a number of cases with no significant improvements. Specific data for each disease are further individually described in this review.

Conclusions

Despite minocycline demonstrating antioxidant and anti-inflammatory effects, discrepancies between preclinical and clinical data indicate that we should be cautious in analyzing the outcomes. Improving and standardizing protocols and refining animal models could help us to determine if minocycline really is a useful drug in the treatment of these pathologies.",2020-12-24 +29541361,Data Sets Representative of the Structures and Experimental Properties of FDA-Approved Drugs.,"Presented here are several data sets that gather information collected from the labels of the FDA approved drugs: their molecular structures and those of the described active metabolites, their associated pharmacokinetics and pharmacodynamics data, and the history of their marketing authorization by the FDA. To date, 1852 chemical structures have been identified with a molecular weight less than 2000 of which 492 are or have active metabolites. To promote the sharing of data, the original web server was upgraded for browsing the database and downloading the data sets (http://chemoinfo.ipmc.cnrs.fr/edrug3d). It is believed that the multidimensional chemistry-oriented collections are an essential resource for a thorough analysis of the current drug chemical space. The data sets are envisioned as being used in a wide range of endeavors that include drug repurposing, drug design, privileged structures analyses, structure-activity relationship studies, and improving of absorption, distribution, metabolism, and elimination predictive models.",2018-01-29 +33057581,ABCModeller: an automatic data mining tool based on a consistent voting method with a user-friendly graphical interface. ,"In order to extract useful information from a huge amount of biological data nowadays, simple and convenient tools are urgently needed for data analysis and modeling. In this paper, an automatic data mining tool, termed as ABCModeller (Automatic Binary Classification Modeller), with a user-friendly graphical interface was developed here, which includes automated functions as data preprocessing, significant feature extraction, classification modeling, model evaluation and prediction. In order to enhance the generalization ability of the final model, a consistent voting method was built here in this tool with the utilization of three popular machine-learning algorithms, as artificial neural network, support vector machine and random forest. Besides, Fibonacci search and orthogonal experimental design methods were also employed here to automatically select significant features in the data space and optimal hyperparameters of the three algorithms to achieve the best model. The reliability of this tool has been verified through multiple benchmark data sets. In addition, with the advantage of a user-friendly graphical interface of this tool, users without any programming skills can easily obtain reliable models directly from original data, which can reduce the complexity of modeling and data mining, and contribute to the development of related research including but not limited to biology. The excitable file of this tool can be downloaded from http://lishuyan.lzu.edu.cn/ABCModeller.rar.",2021-07-01 +34791040,Toward the assessment of predicted inter-residue distance. ,"Significant progress has been achieved in distance-based protein folding, due to improved prediction of inter-residue distance by deep learning. Many efforts are thus made to improve distance prediction in recent years. However, it remains unknown what is the best way of objectively assessing the accuracy of predicted distance. A total of 19 metrics were proposed to measure the accuracy of predicted distance. These metrics were discussed and compared quantitatively on three benchmark datasets, with distance and structure models predicted by the trRosetta pipeline. The experiments show that a few metrics, such as distance precision, have a high correlation with the model accuracy measure TM-score (Pearson's correlation coefficient >0.7). In addition, the metrics are applied to rank the distance prediction groups in CASP14. The ranking by our metrics coincides largely with the official version. These data suggest that the proposed metrics are effective for measuring distance prediction. We anticipate that this study paves the way for objectively monitoring the progress of inter-residue distance prediction. A web server and a standalone package are provided to implement the proposed metrics. http://yanglab.nankai.edu.cn/APD. Supplementary data are available at Bioinformatics online.",2021-11-15 +31345254,Benchmarking of alignment-free sequence comparison methods.,"

Background

Alignment-free (AF) sequence comparison is attracting persistent interest driven by data-intensive applications. Hence, many AF procedures have been proposed in recent years, but a lack of a clearly defined benchmarking consensus hampers their performance assessment.

Results

Here, we present a community resource (http://afproject.org) to establish standards for comparing alignment-free approaches across different areas of sequence-based research. We characterize 74 AF methods available in 24 software tools for five research applications, namely, protein sequence classification, gene tree inference, regulatory element detection, genome-based phylogenetic inference, and reconstruction of species trees under horizontal gene transfer and recombination events.

Conclusion

The interactive web service allows researchers to explore the performance of alignment-free tools relevant to their data types and analytical goals. It also allows method developers to assess their own algorithms and compare them with current state-of-the-art tools, accelerating the development of new, more accurate AF solutions.",2019-07-25 +34859662,"MetEx, a Metabolomics Explorer Application for Natural Product Discovery.","Advances in next-generation DNA sequencing technologies, bioinformatics, and mass spectrometry-based metabolite detection have ushered in a new era of natural product discovery. Microbial secondary metabolomes are complex, especially when otherwise silent biosynthetic genes are activated, and there is therefore a need for data analysis software to explore and map the resulting multidimensional datasets. To that end, we herein report the Metabolomics Explorer (MetEx), a publicly available web application for the analysis of parallel liquid chromatography-coupled mass spectrometry (LC-MS)-based metabolomics data. MetEx is a highly interactive application that facilitates visualization and analysis of complex metabolomics datasets, consisting of retention time, m/z, and MS intensity features, as a function of hundreds of conditions or elicitors. The software enables prioritization of leads from three-dimensional maps, extraction of two-dimensional slices from various higher order plots, organization of datasets by elicitor chemotypes, customizable library-based dereplication, and automatically scored lead selection. We describe the application of MetEx to the first UPLC-MS-guided high-throughput elicitor screen in which Burkholderia gladioli was challenged with 750 elicitors, and the resulting profiles were interrogated by UPLC-Qtof-MS and subsequently analyzed with the app. We demonstrate the utility of MetEx by reporting elicitors for several cryptic metabolite groups and by uncovering new natural products that remain to be characterized. MetEx is available at https://mo.princeton.edu/MetEx/.",2021-12-03 +27841751,A public database of macromolecular diffraction experiments.,"The low reproducibility of published experimental results in many scientific disciplines has recently garnered negative attention in scientific journals and the general media. Public transparency, including the availability of `raw' experimental data, will help to address growing concerns regarding scientific integrity. Macromolecular X-ray crystallography has led the way in requiring the public dissemination of atomic coordinates and a wealth of experimental data, making the field one of the most reproducible in the biological sciences. However, there remains no mandate for public disclosure of the original diffraction data. The Integrated Resource for Reproducibility in Macromolecular Crystallography (IRRMC) has been developed to archive raw data from diffraction experiments and, equally importantly, to provide related metadata. Currently, the database of our resource contains data from 2920 macromolecular diffraction experiments (5767 data sets), accounting for around 3% of all depositions in the Protein Data Bank (PDB), with their corresponding partially curated metadata. IRRMC utilizes distributed storage implemented using a federated architecture of many independent storage servers, which provides both scalability and sustainability. The resource, which is accessible via the web portal at http://www.proteindiffraction.org, can be searched using various criteria. All data are available for unrestricted access and download. The resource serves as a proof of concept and demonstrates the feasibility of archiving raw diffraction data and associated metadata from X-ray crystallographic studies of biological macromolecules. The goal is to expand this resource and include data sets that failed to yield X-ray structures in order to facilitate collaborative efforts that will improve protein structure-determination methods and to ensure the availability of `orphan' data left behind for various reasons by individual investigators and/or extinct structural genomics projects.",2016-10-28 +33216899,WCSdb: a database of wild Coffea species. ,"Coffee is a beverage enjoyed by millions of people worldwide and an important commodity for millions of people. Beside the two cultivated species (Coffea arabica and Coffea canephora), the 139 wild coffee species/taxa belonging to the Coffea genus are largely unknown to coffee scientists and breeders although these species may be crucial for future coffee crop development to face climate changes. Here we present the Wild Coffee Species database (WCSdb) hosted by Pl@ntNet platform (http://publish.plantnet-project.org/project/wildcofdb_en), providing information for 141 coffee species/taxa, for which 84 contain a photo gallery and 82 contain sequencing data (genotyping-by-sequencing, chloroplast or whole genome sequences). The objective of this database is to better understand and characterize the species (identification, morphology, biochemical compounds, genetic diversity and sequence data) in order to better protect and promote them. http://publish.plantnet-project.org/project/wildcofdb_en.",2020-11-01 +34025412,The Genus Eriosema (Fabaceae): From the Ethnopharmacology to an Evidence-Based Phytotherapeutic Perspective?,"The genus Eriosema (Fabaceae) includes approximately 150 species widely distributed across tropical and subtropical regions of the world (Africa, Neotropics, Asia and Australia). Throughout these regions, several species are used since centuries in different traditional medicinal systems, while others are used as food or food supplement. The present review attempts to critically summarize current information concerning the uses, phytochemistry and pharmacology of the Eriosema genus and to evaluate the therapeutic potential. The information published in English and French (up to September 2020) on ethnopharmacology or traditional uses, chemistry, pharmacology and toxicology of Eriosema genus was collected from electronic databases [SciFinder, PubMed, Google, Google Scholar, Scopus, Web of Science, Prelude Medicinal Plants-http://www.ethnopharmacologia.org/recherche-dans-prelude/?plant, The Plant List (http://www.theplantlist.org/), POWO (http://powo.science.kew.org/) and IUCN Red List Categories (https://www.iucnredlist.org/)], conference proceedings, books, M.Sc. and Ph.D. dissertations. The information retrieved on the ethnomedicinal indications of Eriosema genus allowed to list 25 species (∼16.6% of the genus). The majority of uses is recorded from Africa. Phytochemical analyses of 8 species led to the identification and/or isolation of 107 compounds, with flavonoids (69.2%), chromones (7.5%) and benzoic acid derivatives (3.7%) as the main chemical classes. Pharmacological investigations with crude extracts and isolated compounds showed a broad range of activities including aphrodisiac, estrogenic, anti-osteoporosis, hypolipidemic, anti-diabetic, anti-diarrheal, anti-microbial, anti-oxidant, anthelmintic, anti-cancer, and acetylcholinesterase inhibitory activities. Despite the low number of Eriosema species tested, there is convincing evidence in vitro and in vivo studies validating some traditional and ethnobotanical uses. However, the utility of several of the described uses has not yet been confirmed in pharmacological studies. Reviewed data could serve as a reference tool and preliminary information for advanced research on Eriosema species.",2021-05-07 +34704895,Trends in heroin use and injection drug use among high school students in five urban school districts in the US (2005-2017).,"Background. We describe the prevalence of and changes in heroin use and injection drug use (IDU) among high school students in five large, urban school districts in the US (2005-2017); nearly three-fourths of the students were Black and/or Hispanic/Latino.Methods. Data are from the Centers for Disease Control and Prevention's ""Youth Risk Behavior Survey"" program, which includes biennial surveys in urban school districts. We pooled data across districts and survey years, and then generated weighted prevalence estimates (and 95% CIs) for any lifetime heroin use and IDU. Joinpoint regression modeling was used to estimate changes in prevalence over the study period.Results. Biennial prevalence estimates (2005-2017) for heroin use and IDU were above 1.8% for all seven timepoints. In 2017, prevalence of heroin use and IDU were 2.9% and 2.5%, respectively. Both heroin use and IDU were higher among boys than girls. There were statistically significant increases in heroin use and IDU among girls from 2005-2009, whereas changes over time were stable among boys.Conclusions. High school students in large, urban school districts may have higher rates of heroin use and IDU than US high school students in general, and there is little evidence of increases since 2009. This study suggests that adolescence may be a critical period for initiation of heroin use among adolescents in large urban school districts, the majority of whom are Black and/or Latino.Supplemental data for this article is available online at https://doi.org/10.1080/15332640.2021.1992327 .",2021-10-27 +31341169,Deep single-cell RNA sequencing data of individual T cells from treatment-naïve colorectal cancer patients.,"T cells, as a crucial compartment of the tumour microenvironment, play vital roles in cancer immunotherapy. However, the basic properties of tumour-infiltrating T cells (TILs) such as the functional state, migratory capability and clonal expansion remain elusive. Here, using Smart-seq2 protocol, we have generated a RNA sequencing dataset of 11,138 T cells isolated from peripheral blood, adjacent normal and tumour tissues of 12 colorectal cancer (CRC) patients, including 4 with microsatellite instability (MSI). The dataset contained an expression profile of 10,805 T cells, as well as the full-length T cell receptor (TCR) sequences of 9,878 cells after quality control. To facilitate data mining of our T cell dataset, we developed a web-based application to deliver systematic interrogations and customizable functionalities ( http://crctcell.cancer-pku.cn/ ). Functioning with our dataset, the web tool enables the characterization of TILs based on both transcriptome and assembled TCR sequences at the single cell level, which will help unleash the potential value of our CRC T cell data resource.",2019-07-24 +34420978,A Multi-Omic Huntington's Disease Transgenic Sheep-Model Database for Investigating Disease Pathogenesis.,"

Background

The pathological mechanism of cellular dysfunction and death in Huntington's disease (HD) is not well defined. Our transgenic HD sheep model (OVT73) was generated to investigate these mechanisms and for therapeutic testing. One particular cohort of animals has undergone focused investigation resulting in a large interrelated multi-omic dataset, with statistically significant changes observed comparing OVT73 and control 'omic' profiles and reported in literature.

Objective

Here we make this dataset publicly available for the advancement of HD pathogenic mechanism discovery.

Methods

To enable investigation in a user-friendly format, we integrated seven multi-omic datasets from a cohort of 5-year-old OVT73 (n = 6) and control (n = 6) sheep into a single database utilising the programming language R. It includes high-throughput transcriptomic, metabolomic and proteomic data from blood, brain, and other tissues.

Results

We present the 'multi-omic' HD sheep database as a queryable web-based platform that can be used by the wider HD research community (https://hdsheep.cer.auckland.ac.nz/). The database is supported with a suite of simple automated statistical analysis functions for rapid exploratory analyses. We present examples of its use that validates the integrity relative to results previously reported. The data may also be downloaded for user determined analysis.

Conclusion

We propose the use of this online database as a hypothesis generator and method to confirm/refute findings made from patient samples and alternate model systems, to expand our understanding of HD pathogenesis. Importantly, additional tissue samples are available for further investigation of this cohort.",2021-01-01 +,"5th Anniversary Article: Data‐Driven Materials Science: Status, Challenges, and Perspectives (Adv. Sci. 21/2019)","Data‐driven science is heralded as the new paradigm in materials science. Data infrastructures store vast amounts of materials data. Machine learning algorithms systematically extract knowledge from materials data streams to discover new materials for future technologies and the well‐being of society. In article number https://doi.org/10.1002/advs.201900808, Patrick Rinke and co‐worker review the current state of data‐driven materials science.",2019-11-06 +29474519,ViCTree: an automated framework for taxonomic classification from protein sequences.,"

Motivation

The increasing rate of submission of genetic sequences into public databases is providing a growing resource for classifying the organisms that these sequences represent. To aid viral classification, we have developed ViCTree, which automatically integrates the relevant sets of sequences in NCBI GenBank and transforms them into an interactive maximum likelihood phylogenetic tree that can be updated automatically. ViCTree incorporates ViCTreeView, which is a JavaScript-based visualization tool that enables the tree to be explored interactively in the context of pairwise distance data.

Results

To demonstrate utility, ViCTree was applied to subfamily Densovirinae of family Parvoviridae. This led to the identification of six new species of insect virus.

Availability and implementation

ViCTree is open-source and can be run on any Linux- or Unix-based computer or cluster. A tutorial, the documentation and the source code are available under a GPL3 license, and can be accessed at http://bioinformatics.cvr.ac.uk/victree_web/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +34941283,Somali migration to the United States: Understanding adaptation through digital stories.,"

Objectives

The purpose of this study was to further understand the psychological process of migration through an interdisciplinary (psychology, history, and digital humanities) collaboration that examines the experiences of Somali refugees in the United States.

Method

The sample consisted of 26 Somali American emerging adult and older adult refugees who created digital stories as part of the Immigrant Stories Project (https://immigrantstories.umn.edu/). Stories were analyzed through an examination of narrative structure and content.

Results

The structure of the authors' stories was primarily progressive or stable, with very few regressive stories. Although the distribution of these story structures did not differ for emerging adults and older adults, there were important variations in content. Emerging adults' stories reflected a struggle to find self-continuity across time and place, whereas older adults' stories indicated attempts to find meaning and optimally adapt to their current situations. Moreover, none of the stories took on a redemptive structure, a type of story that has been identified as culturally prevalent in U.S. culture but seldom examined across diverse populations.

Conclusions

The findings highlight the varieties of paths toward successful immigration and the importance of taking a collaborative, participatory approach to understanding migration experiences. (PsycInfo Database Record (c) 2022 APA, all rights reserved).",2021-12-23 +30906871,ClusterEnG: an interactive educational web resource for clustering and visualizing high-dimensional data. ,"Clustering is one of the most common techniques used in data analysis to discover hidden structures by grouping together data points that are similar in some measure into clusters. Although there are many programs available for performing clustering, a single web resource that provides both state-of-the-art clustering methods and interactive visualizations is lacking. ClusterEnG (acronym for Clustering Engine for Genomics) provides an interface for clustering big data and interactive visualizations including 3D views, cluster selection and zoom features. ClusterEnG also aims at educating the user about the similarities and differences between various clustering algorithms and provides clustering tutorials that demonstrate potential pitfalls of each algorithm. The web resource will be particularly useful to scientists who are not conversant with computing but want to understand the structure of their data in an intuitive manner. ClusterEnG is part of a bigger project called KnowEnG (Knowledge Engine for Genomics) and is available at http://education.knoweng.org/clustereng. songi@illinois.edu.",2018-05-21 +30380119,SAGD: a comprehensive sex-associated gene database from transcriptomes.,"Many animal species present sex differences. Sex-associated genes (SAGs), which have female-biased or male-biased expression, have major influences on the remarkable sex differences in important traits such as growth, reproduction, disease resistance and behaviors. However, the SAGs resulting in the vast majority of phenotypic sex differences are still unknown. To provide a useful resource for the functional study of SAGs, we manually curated public RNA-seq datasets with paired female and male biological replicates from the same condition and systematically re-analyzed the datasets using standardized methods. We identified 27,793 female-biased SAGs and 64,043 male-biased SAGs from 2,828 samples of 21 species, including human, chimpanzee, macaque, mouse, rat, cow, horse, chicken, zebrafish, seven fly species and five worm species. All these data were cataloged into SAGD, a user-friendly database of SAGs (http://bioinfo.life.hust.edu.cn/SAGD) where users can browse SAGs by gene, species, drug and dataset. In SAGD, the expression, annotation, targeting drugs, homologs, ontology and related RNA-seq datasets of SAGs are provided to help researchers to explore their functions and potential applications in agriculture and human health.",2019-01-01 +33008298,Database: web application for visualization of the cumulated RNAseq data against the salicylic acid (SA) and methyl jasmonate (MeJA) treatment of Arabidopsis thaliana.,"

Background

Plants have adapted to survive under adverse conditions or exploit favorable conditions in response to their environment as sessile creatures. In a way of plant adaptation, plant hormones have been evolved to efficiently use limited resources. Plant hormones including auxin, jasmonic acid, salicylic acid, and ethylene have been studied to reveal their role in plant adaptation against their environment by phenotypic observation with experimental design such as mutation on hormone receptors and treatment / non-treatment of plant hormones along with other environmental conditions. With the development of Next Generation Sequencing (NGS) technology, it became possible to score the total gene expression of the sampled plants and estimate the degree of effect of plant hormones in gene expression. This allowed us to infer the signaling pathway through plant hormones, which greatly stimulated the study of functional genomics using mutants. Due to the continued development of NGS technology and analytical techniques, many plant hormone-related studies have produced and accumulated NGS-based data, especially RNAseq data have been stored in the sequence read archive represented by NCBI, EBI, and DDBJ.

Description

Here, hormone treatment RNAseq data of Arabidopsis (Col0), wild-type genotype, were collected with mock, SA, and MeJA treatments. The genes affected by hormones were identified through a machine learning approach. The degree of expression of the affected gene was quantified, visualized in boxplot using d3 (data-driven-document), and the database was built by Django.

Conclusion

Using this database, we created a web application ( http://pgl.gnu.ac.kr/hormoneDB/ ) that lists hormone-related or hormone-affected genes and visualizes the boxplot of the gene expression of selected genes. This web application eventually aids the functional genomics researchers who want to gather the cases of the gene responses by the hormones.",2020-10-02 +34731586,Speech Abilities in a Heterogeneous Group of Children With Autism.,"

Purpose

This study aimed to provide detailed descriptive information about the speech of a heterogeneous cohort of children with autism spectrum disorder (ASD) and to explore whether subgroups exist based on this detailed speech data. High rates of delayed and disordered speech in both low-verbal and high-functioning children with ASD have been reported. There is limited information regarding the speech abilities of young children across a range of functional levels.

Method

Participants were 23 children aged 2;0-6;11 (years;months) with a diagnosis of ASD. Comprehensive speech and language assessments were administered. Independent and relational speech analyses were conducted from single-word naming tasks and spontaneous speech samples. Hierarchical clustering based on language, nonverbal communication, and spontaneous speech descriptive data was completed.

Results

Independent and relational speech analyses are reported. These variables are used in the cluster analyses, which identified three distinct subgroups: (a) children with high language and high speech ability (n = 10), (b) children with low expressive language and low speech ability but higher receptive language and use of gestures (n = 3), and (c) children with low language and low speech development (n = 10).

Conclusions

This is the first study to provide detailed descriptive speech data of a heterogeneous cohort of children with ASD and use this information to statistically explore potential subgroups. Clustering suggests a small number of children present with low levels of speech and expressive language in the presence of better receptive language and gestures. This communication profile warrants further exploration. Replicating these findings with a larger cohort of children is needed. Supplemental Material https://doi.org/10.23641/asha.16906978.",2021-11-03 +34788369,"DeepKG: An End-to-End Deep Learning-Based Workflow for Biomedical Knowledge Graph Extraction, Optimization and Applications. ","DeepKG is an end-to-end deep learning-based workflow that helps researchers automatically mine valuable knowledge in biomedical literature. Users can utilize it to establish customized knowledge graphs in specified domains, thus facilitating in-depth understanding on disease mechanisms and applications on drug repurposing and clinical research, etc. To improve the performance of DeepKG, a cascaded hybrid information extraction framework (CHIEF) is developed for training model of 3-tuple extraction, and a novel AutoML-based knowledge representation algorithm (AutoTransX) is proposed for knowledge representation and inference. The system has been deployed in dozens of hospitals and extensive experiments strongly evidence the effectiveness. In the context of 144,900 COVID-19 scholarly full-text literature, DeepKG generates a high-quality knowledge graph with 7,980 entities and 43,760 3-tuples, a candidate drug list, and relevant animal experimental studies are being carried out. To accelerate more studies, we make DeepKG publicly available and provide an online tool including the data of 3-tuples, potential drug list, question answering system, visualization platform. Free to all users: http://covidkg.ai/. Supplementary data are available at Bioinformatics online.",2021-11-11 +30601939,UbiHub: a data hub for the explorers of ubiquitination pathways.,"

Motivation

Protein ubiquitination plays a central role in important cellular machineries such as protein degradation or chromatin-mediated signaling. With the recent discovery of the first potent ubiquitin-specific protease inhibitors, and the maturation of proteolysis targeting chimeras as promising chemical tools to exploit the ubiquitin-proteasome system, protein target classes associated with ubiquitination pathways are becoming the focus of intense drug-discovery efforts.

Results

We have developed UbiHub, an online resource that can be used to visualize a diverse array of biological, structural and chemical data on phylogenetic trees of human protein families involved in ubiquitination signaling, including E3 ligases and deubiquitinases. This interface can inform target prioritization and drug design, and serves as a navigation tool for medicinal chemists, structural and cell biologists exploring ubiquitination pathways.

Availability and implementation

https://ubihub.thesgc.org.",2019-08-01 +30357420,Genomes OnLine database (GOLD) v.7: updates and new features.,"The Genomes Online Database (GOLD) (https://gold.jgi.doe.gov) is an open online resource, which maintains an up-to-date catalog of genome and metagenome projects in the context of a comprehensive list of associated metadata. Information in GOLD is organized into four levels: Study, Biosample/Organism, Sequencing Project and Analysis Project. Currently GOLD hosts information on 33 415 Studies, 49 826 Biosamples, 313 324 Organisms, 215 881 Sequencing Projects and 174 454 Analysis Projects with a total of 541 metadata fields, of which 80 are based on controlled vocabulary (CV) terms. GOLD provides a user-friendly web interface to browse sequencing projects and launch advanced search tools across four classification levels. Users submit metadata on a wide range of Sequencing and Analysis Projects in GOLD before depositing sequence data to the Integrated Microbial Genomes (IMG) system for analysis. GOLD conforms with and supports the rules set by the Genomic Standards Consortium (GSC) Minimum Information standards. The current version of GOLD (v.7) has seen the number of projects and associated metadata increase exponentially over the years. This paper provides an update on the current status of GOLD and highlights the new features added over the last two years.",2019-01-01 +34931833,Predicting Protein-Peptide Complex Structures by Accounting for Peptide Flexibility and the Physicochemical Environment.,"Predicting protein-peptide complex structures is crucial to the understanding of a vast variety of peptide-mediated cellular processes and to peptide-based drug development. Peptide flexibility and binding mode ranking are the two major challenges for protein-peptide complex structure prediction. Peptides are highly flexible molecules, and therefore, brute-force modeling of peptide conformations of interest in protein-peptide docking is beyond current computing power. Inspired by the fact that the protein-peptide binding process is like protein folding, we developed a novel strategy, named MDockPeP2, which tries to address these challenges using physicochemical information embedded in abundant monomeric proteins with an exhaustive search strategy, in combination with an integrated global search and a local flexible minimization method. Only the peptide sequence and the protein crystal structure are required. The method was systemically assessed using a newly constructed structural database of 89 nonredundant protein-peptide complexes with the peptide sequence length ranging from 5 to 29 in which about half of the peptides are longer than 15 residues. MDockPeP2 yielded a total success rate of 58.4% (70.8, 79.8%) for the bound docking (i.e., with the bound receptor and fully flexible peptides) and 19.0% (44.8, 70.7%) for the challenging unbound docking when top 10 (100, 1000) models were considered for each prediction. MDockPeP2 achieved significantly higher success rates on two other datasets, peptiDB and LEADS-PEP, which contain only short- and medium-size peptides (≤ 15 residues). For peptiDB, our method obtained a success rate of 62.0% for the bound docking and 35.9% for the unbound docking when the top 10 models were considered. For LEADS-PEP, MDockPeP2 achieved a success rate of 69.8% when the top 10 models were considered. The program is available at https://zougrouptoolkit.missouri.edu/mdockpep2/download.html.",2021-12-21 +33441366,Protocol for a qualitative study to identify strategies to optimise hospital ePrescribing systems.,"

Introduction

Electronic prescribing (ePrescribing) is a key area of development and investment in the UK and across the developed world. ePrescribing is widely understood as a vehicle for tackling medication-related safety concerns, improving care quality and making more efficient use of health resources. Nevertheless, implementation of an electronic health record does not itself ensure benefits for prescribing are maximised. We examine the process of optimisation of ePrescribing systems using case studies to provide policy recommendations based on the experiences of digitally mature hospital sites.

Methods and analysis

Qualitative interviews within six digitally mature sites will be carried out. The aim is to capture successful optimisation of electronic prescribing (ePrescribing) in particular health systems and hospitals. We have identified hospital sites in the UK and in three other developed countries. We used a combination of literature reviews and advice from experts at Optimising ePrescribing in Hospitals (eP Opt) Project round-table events. Sites were purposively selected based on geographical area, innovative work in ePrescribing/electronic health (eHealth) and potential transferability of practices to the UK setting. Interviews will be recorded and transcribed and transcripts coded thematically using NVivo software. Relevant policy and governance documents will be analysed, where available. Planned site visits were suspended due to the COVID-19 pandemic.

Ethics and dissemination

The Usher Research Ethics Group granted approval for this study. Results will be disseminated via peer-reviewed journals in medical informatics and expert round-table events, lay member meetings and the ePrescribing Toolkit (http://www.eprescribingtoolkit.com/)-an online resource supporting National Health Service (NHS) hospitals through the ePrescribing process.",2021-01-13 +34762556,Perceived Intrinsic Motivation Mediates the Effect of Motive Incongruence on Job Burnout and Job Satisfaction.,"Job burnout is a profound concern in modern society producing enormous financial and emotional costs for companies, health insurances, and the individual employee. In this study, we aimed at contributing to the literature on determinants of job burnout by investigating the indirect effects of implicit and explicit motive discrepancies (IED) through intrinsic motivation, with the aim of replicating previous findings from the literature. In addition, we extended this research by adding job satisfaction as an outcome variable in the mediation model, as well as volition as a moderator in these relationships. We preregistered our study and collected data from 136 participants (82 females; Mage = 29.33years, SDage = 6.30) using indirect measures (for implicit motives) and self-report measures (for explicit motives, job burnout, job satisfaction and volition). IED was shown to have an indirect effect on both job burnout and job satisfaction through intrinsic motivation. Additionally, these indirect effects were mitigated by high levels volition. We discuss implications of our findings for research and practice.Supplemental data for this article is available online at https://doi.org/10.1080/00223980.2021.1980758.",2021-11-11 +33067342,OpenPepXL: An Open-Source Tool for Sensitive Identification of Cross-Linked Peptides in XL-MS.,"Cross-linking MS (XL-MS) has been recognized as an effective source of information about protein structures and interactions. In contrast to regular peptide identification, XL-MS has to deal with a quadratic search space, where peptides from every protein could potentially be cross-linked to any other protein. To cope with this search space, most tools apply different heuristics for search space reduction. We introduce a new open-source XL-MS database search algorithm, OpenPepXL, which offers increased sensitivity compared with other tools. OpenPepXL searches the full search space of an XL-MS experiment without using heuristics to reduce it. Because of efficient data structures and built-in parallelization OpenPepXL achieves excellent runtimes and can also be deployed on large compute clusters and cloud services while maintaining a slim memory footprint. We compared OpenPepXL to several other commonly used tools for identification of noncleavable labeled and label-free cross-linkers on a diverse set of XL-MS experiments. In our first comparison, we used a data set from a fraction of a cell lysate with a protein database of 128 targets and 128 decoys. At 5% FDR, OpenPepXL finds from 7% to over 50% more unique residue pairs (URPs) than other tools. On data sets with available high-resolution structures for cross-link validation OpenPepXL reports from 7% to over 40% more structurally validated URPs than other tools. Additionally, we used a synthetic peptide data set that allows objective validation of cross-links without relying on structural information and found that OpenPepXL reports at least 12% more validated URPs than other tools. It has been built as part of the OpenMS suite of tools and supports Windows, macOS, and Linux operating systems. OpenPepXL also supports the MzIdentML 1.2 format for XL-MS identification results. It is freely available under a three-clause BSD license at https://openms.org/openpepxl.",2020-10-16 +34672870,Multidimensional Sexual Well-being Scale for Older Adults: Validity Evidence from a Polish Sample.,"Here, we report the results of a Polish adaptation of the Multidimensional Sexual Well-being Scale (MSWBS) for older adults. The MSWBS is a short self-report scale for assessing the five dimensions of an individual's sexual well-being: frequency of caressing, sexual intimacy, sexual compliance, sexual satisfaction, and sexual distress. The aim of our adaptation was to examine the utility of the scale in a country with conservative views on the sexuality of older people, which will help health care providers and researchers to better understand the sexual needs of older people. The study included 507 participants, aged 60-92. We found the reliability of the scale to be satisfactory (Cronbach's alpha = .71-.87). Confirmatory factor analysis (CFA) showed a good fit of the data to a five-factor model. This study demonstrates that the MSWBS-PL is connected to general life satisfaction and diversity of sexual activity, confirming the scale's validity.Supplemental data for this article is available online at https://doi.org/10.1080/0092623X.2021.1991535.",2021-10-21 +28575155,Dynamic-BM: multispecies Dynamic BodyMap database from temporal RNA-seq data.,"Biological processes, especially developmental processes, are often dynamic. Previous BodyMap projects for human and mouse have provided researchers with portals to tissue-specific gene expression, but these efforts have not included dynamic gene expression patterns. Over the past few years, substantial progress in our understanding of the molecular mechanisms of protein-coding and long noncoding RNA (lncRNA) genes in development processes has been achieved through numerous time series RNA sequencing (RNA-seq) studies. However, none of the existing databases focuses on these time series data, thus rendering the exploration of dynamic gene expression patterns inconvenient. Here, we present Dynamic BodyMap (Dynamic-BM), a database for temporal gene expression profiles, obtained from 2203 time series of RNA-seq samples, covering >25 tissues from five species. Dynamic-BM has a user-friendly Web interface designed for browsing and searching the dynamic expression pattern of genes from different sources. It is an open resource for efficient data exploration, providing dynamic expression profiles of both protein-coding genes and lncRNAs to facilitate the generation of new hypotheses in developmental biology research. Additionally, Dynamic-BM includes a literature-based knowledgebase for lncRNAs associated with tissue development and a list of manually selected lncRNA candidates that may be involved in tissue development. Dynamic-BM is available at http://bioinfo.ibp.ac.cn/Dynamic-BM.",2018-11-01 +34524450,Distantly supervised biomedical relation extraction using piecewise attentive convolutional neural network and reinforcement learning.,"

Objective

There have been various methods to deal with the erroneous training data in distantly supervised relation extraction (RE), however, their performance is still far from satisfaction. We aimed to deal with the insufficient modeling problem on instance-label correlations for predicting biomedical relations using deep learning and reinforcement learning.

Materials and methods

In this study, a new computational model called piecewise attentive convolutional neural network and reinforcement learning (PACNN+RL) was proposed to perform RE on distantly supervised data generated from Unified Medical Language System with MEDLINE abstracts and benchmark datasets. In PACNN+RL, PACNN was introduced to encode semantic information of biomedical text, and the RL method with memory backtracking mechanism was leveraged to alleviate the erroneous data issue. Extensive experiments were conducted on 4 biomedical RE tasks.

Results

The proposed PACNN+RL model achieved competitive performance on 8 biomedical corpora, outperforming most baseline systems. Specifically, PACNN+RL outperformed all baseline methods with the F1-score of 0.5592 on the may-prevent dataset, 0.6666 on the may-treat dataset, and 0.3838 on the DDI corpus, 2011. For the protein-protein interaction RE task, we obtained new state-of-the-art performance on 4 out of 5 benchmark datasets.

Conclusions

The performance on many distantly supervised biomedical RE tasks was substantially improved, primarily owing to the denoising effect of the proposed model. It is anticipated that PACNN+RL will become a useful tool for large-scale RE and other downstream tasks to facilitate biomedical knowledge acquisition. We also made the demonstration program and source code publicly available at http://112.74.48.115:9000/.",2021-11-01 +35026509,Genus Lonicera: New drug discovery from traditional usage to modern chemical and pharmacological research.,"

Background

Lonicera Linn. belonging to the family Caprifoliaceae, the largest genus in the plant family, includes about more than 200 species, which are mainly distributed in northern Africa, North America, Europe and Asia. Some species of this genus have been usually used in traditional Chinese medicine as well as functional foods, cosmetics and other applications, such as L. japonica Thunb. Bioactive components and pharmacological activities of the genus Lonicera plants have received an increasing interest from the scientific community. Thus, a comprehensive and systematic review on their traditional usage in China, chemical components, and their pharmacological properties of their whole plants, bioactive extracts, and bioactive isolates including partial structure-activity relationships from the genus is indispensable.

Methods

Information on genus Lonicera of this systematic electronic literature search was gathered via the published articles, patents, clinical trials website (https://clinicaltrials.gov/) and several online bibliographic databases (PubMed, Sci Finder, Research Gate, Science Direct, CNKI, Web of Science and Google Scholar). The following keywords were used for the online search: Lonicera, phytochemical composition, Lonicerae japonica, Lonicera review articles, bioactivities of Lonicera, anti-inflammatory, antiviral, antimicrobial, anticancer, hepatoprotective, antioxidant, neuroprotective, anti-diabetic, and clinical trials. This review paper consists of a total of 225 papers covering the Lonicera genus from 1800 to 2021, including research articles, reviews, patents, and book chapters.

Results

In this review (1800s-2021), about 420 components from the genus of Lonicera Linn. including 87 flavonoids, 222 terpenoids, 51 organic acids, and other compounds, together with their pharmacological activities including anti-inflammatory, antiviral, antimicrobial, anticancer, hepatoprotective, antioxidant, neuroprotective, antidiabetic, anti-allergic, immunomodulatory effects, and toxicity were summarized.

Conclusion

The relationship is discussed among their traditional usage, their pharmacological properties, and their chemical components, which indicate the genus Lonicera have a large prospect in terms of new drug exploitation, especially in COVID-19 treatment.",2021-12-20 +29761457,EuPathDB: The Eukaryotic Pathogen Genomics Database Resource.,"Fighting infections and developing novel drugs and vaccines requires advanced knowledge of pathogen's biology. Readily accessible genomic, functional genomic, and population data aids biological and translational discovery. The Eukaryotic Pathogen Database Resources ( http://eupathdb.org ) are data mining resources that support hypothesis driven research by facilitating the discovery of meaningful biological relationships from large volumes of data. The resource encompasses 13 sites that support over 170 species including pathogenic protists, oomycetes, and fungi as well as evolutionarily related nonpathogenic species. EuPathDB integrates preanalyzed data with advanced search capabilities, data visualization, analysis tools and a comprehensive record system in a graphical interface that does not require prior computational skills. This chapter describes guiding concepts common across EuPathDB sites and illustrates the powerful data mining capabilities of some of the available tools and features.",2018-01-01 +33827920,Learning the molecular grammar of protein condensates from sequence determinants and embeddings. ,"Intracellular phase separation of proteins into biomolecular condensates is increasingly recognized as a process with a key role in cellular compartmentalization and regulation. Different hypotheses about the parameters that determine the tendency of proteins to form condensates have been proposed, with some of them probed experimentally through the use of constructs generated by sequence alterations. To broaden the scope of these observations, we established an in silico strategy for understanding on a global level the associations between protein sequence and phase behavior and further constructed machine-learning models for predicting protein liquid-liquid phase separation (LLPS). Our analysis highlighted that LLPS-prone proteins are more disordered, less hydrophobic, and of lower Shannon entropy than sequences in the Protein Data Bank or the Swiss-Prot database and that they show a fine balance in their relative content of polar and hydrophobic residues. To further learn in a hypothesis-free manner the sequence features underpinning LLPS, we trained a neural network-based language model and found that a classifier constructed on such embeddings learned the underlying principles of phase behavior at a comparable accuracy to a classifier that used knowledge-based features. By combining knowledge-based features with unsupervised embeddings, we generated an integrated model that distinguished LLPS-prone sequences both from structured proteins and from unstructured proteins with a lower LLPS propensity and further identified such sequences from the human proteome at a high accuracy. These results provide a platform rooted in molecular principles for understanding protein phase behavior. The predictor, termed DeePhase, is accessible from https://deephase.ch.cam.ac.uk/.",2021-04-01 +34791504,Epione application: An integrated web‑toolkit of clinical genomics and personalized medicine in systemic lupus erythematosus. ,"Genome wide association studies (GWAS) have identified autoimmune disease‑associated loci, a number of which are involved in numerous disease‑associated pathways. However, much of the underlying genetic and pathophysiological mechanisms remain to be elucidated. Systemic lupus erythematosus (SLE) is a chronic, highly heterogeneous autoimmune disease, characterized by differences in autoantibody profile, serum cytokines and a multi‑system involvement. This study presents the Epione application, an integrated bioinformatics web‑toolkit, designed to assist medical experts and researchers in more accurately diagnosing SLE. The application aims to identify the most credible gene variants and single nucleotide polymorphisms (SNPs) associated with SLE susceptibility, by using patient's genomic data to aid the medical expert in SLE diagnosis. The application contains useful knowledge of >70,000 SLE‑related publications that have been analyzed, using data mining and semantic techniques, towards extracting the SLE‑related genes and the corresponding SNPs. Probable genes associated with the patient's genomic profile are visualized with several graphs, including chromosome ideograms, statistic bars and regulatory networks through data mining studies with relative publications, to obtain a representative number of the most credible candidate genes and biological pathways associated with the SLE. Furthermore, an evaluation study was performed on a patient diagnosed with SLE and is presented herein. Epione has also been expanded in family‑related candidate patients to evaluate its predictive power. All the recognized gene variants that were previously considered to be associated with SLE were accurately identified in the output profile of the patient, and by comparing the results, novel findings have emerged. The Epione application may assist and facilitate in early stage diagnosis by using the patients' genomic profile to compare against the list of the most predictable candidate gene variants related to SLE. Its diagnosis‑oriented output presents the user with a structured set of results on variant association, position in genome and links to specific bibliography and gene network associations. The overall aim of the present study was to provide a reliable tool for the most effective study of SLE. This novel and accessible webserver tool of SLE is available at http://geneticslab.aua.gr/epione/.",2021-11-18 +,MON-LB67 Bisphosphonate Related Ocular Inflammation,"Abstract Introduction: Osteoporosis is a major public health problem, increasing in incidence with the growth of the aging population. It affects over 200 million women worldwide and is associated with fragility fractures leading to increased morbidity, mortality and poor quality of life (1). Bisphosphonates are among the most widely used first line forms of treatment for management of osteoporosis. They have a structure like pyrophosphate and inhibit bone resorption by attaching to hydroxyapatite binding sites on the bone in areas with active resorption. While initiating treatment with bisphosphonates, endocrinologists generally discuss side effects including gastrointestinal symptoms related to gastroesophageal reflux disease and gastritis, acute phase reactions related to infusion of the bisphosphonates, musculoskeletal pain, hypocalcemia, osteonecrosis of the jaw, and atypical femur fractures. There are rare but severe side effects causing ocular inflammation related to bisphosphonate use - Bisphosphonate Related Ocular Inflammation (BROI). While these are rare based on few case reports, they are significant side effects, which if patients are not aware of or not addressed in timely manner can result in vision loss. We report a clinical scenario of a patient who experienced bisphosphonate-related ocular inflammation. Clinical History: 62-year-old female with cerebral palsy presented to the emergency room with 1 day of marked left eye redness and swelling. When the symptoms began, she felt that her eye was itchy. She had mild left eye discomfort. She did not perceive any decreased visual acuity. Patient had her first infusion of bisphosphonate, 4 days prior to the emergency room visit. On Exam: She was alert, no apparent distress, left eyelids were swollen, almost completely shut with minimal discoloration, there was underlying severe chemosis and conjunctival redness, pupils equal and round, visual acuity 20/100 OD and 20/100 OS. She was diagnosed with allergic conjunctivitis, and advised to apply Tobradex drops and Diphenhydramine, and to follow-up in eye clinic. Scleritis related to bisphosphonates was also considered as a possible cause of her symptoms. The patient called the endocrinologist about the eye symptoms the following day. The endocrinologist also raised the possibility bisphosphonate related ocular inflammation and advised to be seen in ophthalmology clinic urgently. The endocrinologist also communicated with the ophthalmologist indicating that her eye symptoms could be due to inflammatory response related to bisphosphonates and to consider starting systemic steroids. The patient was seen in ophthalmology clinic the following day: The ophthalmology exam revealed severe orbital inflammation with conjunctival chemosis OS: Table 1.She was started on Prednisone 80mg a day with tapering by 10mg daily over the next 2 weeks. She was also advised to apply Durezol eye drops twice a day. She was referred to Oculoplastics for further evaluation. Table 1: Oculoplastics evaluation next day Table 2: Revealed orbital inflammation with good initial response to steroids and advised to continue prednisone taper. Table 2: On the 2 weeks follow up: Table 3: Resolved orbital inflammation and was advised to stop prednisone Table 3: Clinical Discussion: Bisphosphonates are widely prescribed and effective forms of treatment for osteoporosis in preventing fractures. Ocular side effects are rare but reported over the past 2 decades (2). First time users of bisphosphonates are at a higher risk compared with nonusers (3).There is also an association of bisphosphonate-related ocular inflammation (BROI) with coexisting inflammatory conditions that associated with ocular inflammation, such as rheumatoid arthritis, ankylosing spondylitis, psoriasis, inflammatory bowel disease, systemic lupus erythematosus or sarcoidosis (4,8).Symptoms related to BROI typically occur within 24 to 72 hours of the bisphosphonate exposure but can range between few hours to 3 years. Those who receive systemic bisphosphonates present earlier compared to orally administered bisphosphonates (5,6,7). In prior case reports, patients presented with flu like symptoms, lasting for lasting for 24 to 72h prior to onset of orbital disease (5). The range of ocular inflammation is variable, and can include conjunctivitis, uveitis, scleritis, episcleritis and keratitis. The symptoms can be unilateral or bilateral. Discontinuation of bisphosphonates is necessary for resolution of ocular inflammation (7).The postulated cellular mechanism causing BROI is as follows: Bisphosphonates are secreted into the lacrimal system, and induce an inflammatory response resulting in release of cytokines that results in ocular inflammation: Fig 1: (9). Fig 1: It is unclear as to why BROI is a rare side effect, though related to release of inflammatory reactants, which are also responsible for the more common flu like side effect. The risk of BROI is increased in those with associated inflammatory condition (arthritis or inflammatory bowel disease). It is possible that in individuals susceptible to inflammatory disorders, there is pre-existing infiltration of the lacrimal gland with mononuclear cells which causes a robust local inflammatory response to bisphosphonate treatment in the eye. Our patient with cerebral palsy and learning disability, was taken to the emergency room within few hours of onset symptoms and signs of ocular inflammation. Though she was diagnosed with severe orbital inflammation, she did not express eye pain, which is commonly seen in scleritis. Through multidisciplinary teamwork between the emergency physician, endocrinologist, ophthalmologist and oculoplastic specialists, she was promptly started on systemic steroids and her symptoms resolved completely with no vision loss. While considering bisphosphonates as an option for osteoporosis treatment, the endocrinologist should discuss the rare but serious complications of BROI. With use of bisphosphonates in the aging population to treat osteoporosis, specific attention should be given to underlying eye disease, inflammatory conditions, and cognition. References: 1. IOF statistics: (https://www.iofbonehealth.org/epidemiology). 2. Clark EM, Durup D. Inflammatory eye reactions with bisphosphonates and other osteoporosis medications: what are the risks?. Ther Adv Musculoskelet Dis. 2015;7(1):11–16. doi:10.1177/1759720X145664243. Etminan M, Forooghian F, Maberley D. Inflammatory ocular adverse events with the use of oral bisphosphonates: a retrospective cohort study. CMAJ. 2012;184(8):E431–E434. doi:10.1503/cmaj.1117524.French DD1, Margo CE. Postmarketing surveillance rates of uveitis and scleritis with bisphosphonates among a national veteran cohort. Retina. 2008 Jun;28(6):889-93. doi: 10.1097/IAE.0b013e31816576ef. 5.Herrera I, Kam Y, Whittaker TJ, Champion M, Ajlan RS. Bisphosphonate-induced orbital inflammation in a patient on chronic immunosuppressive therapy. BMC Ophthalmol. 2019;19(1):51. Published 2019 Feb 14. doi:10.1186/s12886-019-1063-8 6. Ehsan Rahimy, Simon K. Law. Orbital inflammation after zoledronate infusion: an emerging complication.https://doi.org/10.1016/j.jcjo.2012.09.011 7. Frederick W. Fraunfelder, M.D Bisphosphonates and Ocular Inflammation. NEngJ Med 2003; 348:1187-1188. DOI: 10.1056/NEJM200303203481225 8.Pazianas M1, Clark EM, Eiken PA, Brixen K, Abrahamsen B. Inflammatory eye reactions in patients treated with bisphosphonates and other osteoporosis medications: cohort analysis using a national prescription database. J Bone Miner Res. 2013 Mar;28(3):455-63. doi: 10.1002/jbmr.1783.9.Keith Thompson and Michael J. Rogers. New Insights into Old Drugs. BoneKEy-Osteovision. 2006 August;3(8):5-13",2020-05-08 +,SUN-497 Life-Saving Management of Thyrotoxicosis with a Single Session of Plasmapheresis,"Abstract Thyroid hormones are essential in controlling numerous metabolic and regulatory processes in the human body. Thyroid storm is a medical emergency and is the most extreme manifestation of the overproduction of thyroid hormones. It is critical to recognize thyroid storm at its earliest state and conventional practice recommends initiating treatment immediately with steroids, thionamides, and beta-blockers.Here we present a case of a patient with Graves’ disease in which thionamides could not be continued due to acute thionamide-induced hepatotoxicity. With the patient’s worsening thyrotoxic state, declining mental status and clinical deterioration; the decision was made to initiate plasmapheresis as a life-saving therapy to remove the thyroid stimulating antibodies responsible for the patient’s deteriorating status.After one session of plasmapheresis, the patients’ clinical condition began to significantly improve to baseline, along with normalization of his thyroid function and the patient was discharged home with plans for emergent thyroidectomy.This case demonstrates the importance of considering plasmapheresis as a life saving measure in the treatment algorithm of patients with extreme thyrotoxicosis and thyroid storm in which traditional therapies such as thionamides, steroids, and beta-blockers cannot be used or are ineffective. Reference:Ono, Y., Ono, S., Yasunaga, H., Matsui, H., Fushimi, K., & Tanaka, Y. (2016, February 18). Factors Associated With Mortality of Thyroid Storm: Analysis Using a National Inpatient Database in Japan. Retrieved April 6, 2019, from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4998648/",2020-05-08 +,MON-LB129 A Pilot Genome Wide Association Study (GWAS) on Primary Aldosteronism Patients in a Multi-Ethnic Malaysian Cohort,"Abstract Studies on excised aldosterone-producing lesions have found somatic mutations in five genes (KCNJ5, CACNA1D, ATP1A1, ATP2B3, and CTNNB1) commonly causes the excess aldosterone production. Interestingly, Oriental cohorts had the highest frequency of KCNJ5 mutations whereas CACNA1D mutations were most common in Black African Caribbean patients, suggesting that genetic background affects the prevalence and distribution of aldosterone-driving somatic mutation. We therefore aimed to identify the common germline variants that associates with excess aldosterone production through performing a pilot genome wide association study (GWAS) on primary aldosteronism (PA) patients. GWAS was performed using the Human Infinium OmniExpressExome-8 v1.4 BeadChip containing 960,919 markers to compare gDNA of 154 PA patients with 78 healthy controls. Samples were checked for sex discordance, heterozygosity rate, missing rate and the degree of recent shared ancestry for each pair of individuals using the PLINK program and Genome Studio (Illumina). In total, 150 patients and 75 controls (112 males and 113 females) were included in the downstream analysis. 630,749 markers that passed quality control steps (missing call rate <95% and minor allele frequency in controls >1%) were used to perform association analysis using the Chi-square Test which was then subjected to multiple testing corrections (Bonferroni correction). As expected with a pilot sample size, no variants passed the suggestive significant threshold of Bonferroni corrected P-value < 5 x 10-6 (-log10 P = 5.3). However, 27 SNPs had the uncorrected P-value<0.0002, odds ratio >2, and differences of frequencies in cases compared to control >0.1 or <-0.2, of which 3 genes (SRGAP3, AUTS2, and RORA) associated with these SNPs were also highlighted in the UK Biobank database of 72 patients with primary aldosteronism (https://biobankengine.stanford. edu/coding/HC189). Of these, RORA has recently been found to be down-regulated in adrenals from PA patients and spontaneously hypertensive rat adrenals compared to control adrenalsa,b. RORA encodes for the protein retinoic acid receptor (RAR)-related orphan receptor alpha, a member of the NR1 subfamily of nuclear hormone receptors (NR1F1). Interestingly, adrenal is the second organ to skin with the highest expression of RORA and treatment of angiotensin II in the adrenocortical cell line H295R increases RORA expressionc,d. Taken together, this pilot GWAS highlights RORA as a potential nuclear hormone receptor that regulates aldosterone production. References aChu et al., Int J Clin Exp Pathol 2017;10(9):10009-10018. bTanaka et al., Hypertens Res 2019;42(2):165-173. cNogueira et al., Mol Cell Endocrinol 2009; 302(2): 230–236. dGTEx Analysis Release V7 (dbGaP Accession phs000424.v7.p2) Acknowledgements This research was supported by the Malaysian Ministry of Higher Education Grant (FRGS/1/2015/SKK08/UKM/02/3), The National University of Malaysia (UKM) University Grant (GUP-2016-083), and The UKM Medical Center Fundamental Grant (FF-2016-302).",2020-05-08 +30534599,New Insights into Human Nostril Microbiome from the Expanded Human Oral Microbiome Database (eHOMD): a Resource for the Microbiome of the Human Aerodigestive Tract. ,"The expanded Human Oral Microbiome Database (eHOMD) is a comprehensive microbiome database for sites along the human aerodigestive tract that revealed new insights into the nostril microbiome. The eHOMD provides well-curated 16S rRNA gene reference sequences linked to available genomes and enables assignment of species-level taxonomy to most next-generation sequences derived from diverse aerodigestive tract sites, including the nasal passages, sinuses, throat, esophagus, and mouth. Using minimum entropy decomposition coupled with the RDP Classifier and our eHOMD V1-V3 training set, we reanalyzed 16S rRNA V1-V3 sequences from the nostrils of 210 Human Microbiome Project participants at the species level, revealing four key insights. First, we discovered that Lawsonella clevelandensis, a recently named bacterium, and Neisseriaceae [G-1] HMT-174, a previously unrecognized bacterium, are common in adult nostrils. Second, just 19 species accounted for 90% of the total sequences from all participants. Third, 1 of these 19 species belonged to a currently uncultivated genus. Fourth, for 94% of the participants, 2 to 10 species constituted 90% of their sequences, indicating that the nostril microbiome may be represented by limited consortia. These insights highlight the strengths of the nostril microbiome as a model system for studying interspecies interactions and microbiome function. Also, in this cohort, three common nasal species (Dolosigranulum pigrum and two Corynebacterium species) showed positive differential abundance when the pathobiont Staphylococcus aureus was absent, generating hypotheses regarding colonization resistance. By facilitating species-level taxonomic assignment to microbes from the human aerodigestive tract, the eHOMD is a vital resource enhancing clinical relevance of microbiome studies. IMPORTANCE The eHOMD (http://www.ehomd.org) is a valuable resource for researchers, from basic to clinical, who study the microbiomes and the individual microbes in body sites in the human aerodigestive tract, which includes the nasal passages, sinuses, throat, esophagus, and mouth, and the lower respiratory tract, in health and disease. The eHOMD is an actively curated, web-based, open-access resource. eHOMD provides the following: (i) species-level taxonomy based on grouping 16S rRNA gene sequences at 98.5% identity, (ii) a systematic naming scheme for unnamed and/or uncultivated microbial taxa, (iii) reference genomes to facilitate metagenomic, metatranscriptomic, and proteomic studies and (iv) convenient cross-links to other databases (e.g., PubMed and Entrez). By facilitating the assignment of species names to sequences, the eHOMD is a vital resource for enhancing the clinical relevance of 16S rRNA gene-based microbiome studies, as well as metagenomic studies.",2018-11-01 +,Fingerprinting of URL Logs: Continuous User Authentication from Behavioural Patterns,"Security of computer systems is now a critical and evolving issue. Current trends try to use behavioural biometrics for continuous authorization. Our work is intended to strengthen network user authentication by a software interaction analysis. In our research, we use HTTP request (URLs) logs that network administrators collect. We use a set of full-convolutional autoencoders and one authentication (one-class) convolutional neural network. The proposed method copes with extensive data from many users and allows to add new users in the future. Moreover, the system works in a real-time manner, and the proposed deep learning framework can use other user behaviour- and software interaction-related features.",2020-05-23 +33597522,Inference and analysis of cell-cell communication using CellChat.,"Understanding global communications among cells requires accurate representation of cell-cell signaling links and effective systems-level analyses of those links. We construct a database of interactions among ligands, receptors and their cofactors that accurately represent known heteromeric molecular complexes. We then develop CellChat, a tool that is able to quantitatively infer and analyze intercellular communication networks from single-cell RNA-sequencing (scRNA-seq) data. CellChat predicts major signaling inputs and outputs for cells and how those cells and signals coordinate for functions using network analysis and pattern recognition approaches. Through manifold learning and quantitative contrasts, CellChat classifies signaling pathways and delineates conserved and context-specific pathways across different datasets. Applying CellChat to mouse and human skin datasets shows its ability to extract complex signaling patterns. Our versatile and easy-to-use toolkit CellChat and a web-based Explorer ( http://www.cellchat.org/ ) will help discover novel intercellular communications and build cell-cell communication atlases in diverse tissues.",2021-02-17 +34976872,"AMPing Up the Search: A Structural and Functional Repository of Antimicrobial Peptides for Biofilm Studies, and a Case Study of Its Application to Corynebacterium striatum, an Emerging Pathogen.","Antimicrobial peptides (AMPs) have been recognized for their ability to target processes important for biofilm formation. Given the vast array of AMPs, identifying potential anti-biofilm candidates remains a significant challenge, and prompts the need for preliminary in silico investigations prior to extensive in vitro and in vivo studies. We have developed Biofilm-AMP (B-AMP), a curated 3D structural and functional repository of AMPs relevant to biofilm studies. In its current version, B-AMP contains predicted 3D structural models of 5544 AMPs (from the DRAMP database) developed using a suite of molecular modeling tools. The repository supports a user-friendly search, using source, name, DRAMP ID, and PepID (unique to B-AMP). Further, AMPs are annotated to existing biofilm literature, consisting of a vast library of over 10,000 articles, enhancing the functional capabilities of B-AMP. To provide an example of the usability of B-AMP, we use the sortase C biofilm target of the emerging pathogen Corynebacterium striatum as a case study. For this, 100 structural AMP models from B-AMP were subject to in silico protein-peptide molecular docking against the catalytic site residues of the C. striatum sortase C protein. Based on docking scores and interacting residues, we suggest a preference scale using which candidate AMPs could be taken up for further in silico, in vitro and in vivo testing. The 3D protein-peptide interaction models and preference scale are available in B-AMP. B-AMP is a comprehensive structural and functional repository of AMPs, and will serve as a starting point for future studies exploring AMPs for biofilm studies. B-AMP is freely available to the community at https://b-amp.karishmakaushiklab.com and will be regularly updated with AMP structures, interaction models with potential biofilm targets, and annotations to biofilm literature.",2021-12-16 +34918996,Physician risk perceptions and surveillance practices for tyrosine kinase inhibitor long-term effects in pediatric CML.,"Chronic myeloid leukemia (CML) is effectively treated with long-term tyrosine kinase inhibitor (TKI) therapy, yet little is known about risks of prolonged TKI exposure in young patients, and long-term effect monitoring is not standardized. We surveyed North American pediatric oncologists (n = 119) to evaluate perceived risk of and surveillance practices for potential toxicities associated with prolonged TKI exposure in children and adolescents/young adults (AYAs) with CML. Survey domains included general and specific risk perceptions and surveillance practices for asymptomatic patients on chronic TKI therapy. We analyzed data descriptively and explored relationships between risk perceptions and surveillance. Risk perceptions varied among oncologists but were similar across six categories (thyroid, cardiac, vascular, metabolic, fertility, psychologic), with less than one-third rating each risk as moderate or high in pediatric and AYA patients. More oncologists perceived moderate or high risk of growth abnormalities in children (62% pediatric, 14% AYA) and financial toxicity in all patients (60% pediatric, 64% AYA). A greater proportion of oncologists with moderate or high perceived risk of thyroid abnormalities reported testing thyroid function compared to those with lower perceived risk; patterns for metabolic risk/lipid tests and cardiac risk/tests were similar. In summary, we found that pediatric oncologists had variable risk perceptions and surveillance practices for potential toxicities associated with prolonged TKI exposure. Standardizing surveillance would help quantify risks and refine recommendations.Supplemental data for this article is available online at https://doi.org/10.1080/08880018.2021.2017085 .",2021-12-17 +,Annual maps of global artificial impervious area (GAIA) between 1985 and 2018,"Artificial impervious areas are predominant indicators of human settlements. Timely, accurate, and frequent information on artificial impervious areas is critical to understanding the process of urbanization and land use/cover change, as well as of their impacts on the environment and biodiversity. Despite their importance, there still lack annual maps of high-resolution Global Artificial Impervious Areas (GAIA) with longer than 30-year records, due to the high demand of high performance computation and the lack of effective mapping algorithms. In this paper, we mapped annual GAIA from 1985 to 2018 using the full archive of 30-m resolution Landsat images on the Google Earth Engine platform. With ancillary datasets, including the nighttime light data and the Sentinel-1 Synthetic Aperture Radar data, we improved the performance of our previously developed algorithm in arid areas. We evaluated the GAIA data for 1985, 1990, 1995, 2000, 2005, 2010, and 2015, and the mean overall accuracy is higher than 90%. A cross-product comparison indicates the GAIA data are the only dataset spanning over 30 years. The temporal trend in GAIA agrees well with other datasets at the local, regional, and global scales. Our results indicate that the GAIA reached 797,076 km² in 2018, which is 1.5 times more than that in 1990. China and the United States (US) rank among the top two in artificial impervious area, accounting for approximately 50% of the world's total in 2018. The artificial impervious area of China surpassed that of the US in 2015. By 2018, the remaining eight among the top ten countries are India, Russia, Brazil, France, Italy, Germany, Japan, and Canada. The GAIA dataset can be freely downloaded from http://data.ess.tsinghua.edu.cn.",2020-01-01 +34706201,Collaboration Between Child Play Therapy and Speech-Language Pathology: Case Reports of a Novel Language and Behavior Intervention.,"Purpose It has been well documented that a significant number of children with developmental language disorders (DLDs) also exhibit challenging behaviors. In this study, a new intervention (Play and Language [PAL]) was developed through a research collaboration between a speech-language pathologist and a play therapist. The purpose of this clinical focus article is to describe child play therapy techniques and how these, along with early language intervention techniques, may positively impact preschool children's general communication and behavior. Method Students in a communication sciences and disorders program were trained to use a combination of child therapy techniques and language facilitation procedures in the PAL approach. Five preschool children, who displayed DLD and challenging behaviors, participated in a 2-week daily intensive intervention. Pre- and postintervention data for general communication and behavior skills were collected through parent report and language sample data. Student clinician and parent surveys were collected to assess the feasibility of conducting the new intervention and the parent-observed outcomes and satisfaction. Results A majority of the children who participated in the study increased their intelligibility and number of different words. Fewer than half increased their sentence length. These same children decreased their challenging behaviors, with 11 of 14 behaviors being reduced to normal levels. All parents reported satisfaction with their child's results. In addition, students trained to provide the intervention reported high levels of satisfaction with the training to implement PAL and that they were confident in providing the intervention techniques. Conclusion Together, our exploratory data provide preliminary and limited evidence that combining play therapy and language facilitation techniques may improve general communication skills and decrease challenging behaviors within the same intervention. Supplemental Material https://doi.org/10.23641/asha.16840459.",2021-10-27 +34506584,PIIKA 2.5: Enhanced quality control of peptide microarrays for kinome analysis.,"Peptide microarrays consisting of defined phosphorylation target sites are an effective approach for high throughput analysis of cellular kinase (kinome) activity. Kinome peptide arrays are highly customizable and do not require species-specific reagents to measure kinase activity, making them amenable for kinome analysis in any species. Our group developed software, Platform for Integrated, Intelligent Kinome Analysis (PIIKA), to enable more effective extraction of meaningful biological information from kinome peptide array data. A subsequent version, PIIKA2, unveiled new statistical tools and data visualization options. Here we introduce PIIKA 2.5 to provide two essential quality control metrics and a new background correction technique to increase the accuracy and consistency of kinome results. The first metric alerts users to improper spot size and informs them of the need to perform manual resizing to enhance the quality of the raw intensity data. The second metric uses inter-array comparisons to identify outlier arrays that sometimes emerge as a consequence of technical issues. In addition, a new background correction method, background scaling, can sharply reduce spatial biases within a single array in comparison to background subtraction alone. Collectively, the modifications of PIIKA 2.5 enable identification and correction of technical issues inherent to the technology and better facilitate the extraction of meaningful biological information. We show that these metrics demonstrably enhance kinome analysis by identifying low quality data and reducing batch effects, and ultimately improve clustering of treatment groups and enhance reproducibility. The web-based and stand-alone versions of PIIKA 2.5 are freely accessible at via http://saphire.usask.ca.",2021-09-10 +31831521,Genome-Wide Analysis of Differential Gene Expression and Splicing in Excitatory Neurons and Interneuron Subtypes.,"Cortical circuit activity is shaped by the parvalbumin (PV) and somatostatin (SST) interneurons that inhibit principal excitatory (EXC) neurons and the vasoactive intestinal peptide (VIP) interneurons that suppress activation of other interneurons. To understand the molecular-genetic basis of functional specialization and identify potential drug targets specific to each neuron subtype, we performed a genome wide assessment of both gene expression and splicing across EXC, PV, SST and VIP neurons from male and female mouse brains. These results reveal numerous examples where neuron subtype-specific gene expression, as well as splice-isoform usage, can explain functional differences between neuron subtypes, including in presynaptic plasticity, postsynaptic receptor function, and synaptic connectivity specification. We provide a searchable web resource for exploring differential mRNA expression and splice form usage between excitatory, PV, SST, and VIP neurons (http://research-pub.gene.com/NeuronSubtypeTranscriptomes). This resource, combining a unique new dataset and novel application of analysis methods to multiple relevant datasets, identifies numerous potential drug targets for manipulating circuit function, reveals neuron subtype-specific roles for disease-linked genes, and is useful for understanding gene expression changes observed in human patient brains.SIGNIFICANCE STATEMENT Understanding the basis of functional specialization of neuron subtypes and identifying drug targets for manipulating circuit function requires comprehensive information on cell-type-specific transcriptional profiles. We sorted excitatory neurons and key inhibitory neuron subtypes from mouse brains and assessed differential mRNA expression. We used a genome-wide analysis which not only examined differential gene expression levels but could also detect differences in splice isoform usage. This analysis reveals numerous examples of neuron subtype-specific isoform usage with functional importance, identifies potential drug targets, and provides insight into the neuron subtypes involved in psychiatric disease. We also apply our analysis to two other relevant datasets for comparison, and provide a searchable website for convenient access to the resource.",2019-12-12 +34929547,Machine learning applications to differentiate comorbid functional seizures and epilepsy from pure functional seizures.,"

Purpose

We have utilized different methods in machine learning (ML) to develop the best algorithm to differentiate comorbid functional seizures (FS) and epilepsy from those who have pure FS.

Methods

This was a retrospective study of an electronic database of patients with seizures. All patients with a diagnosis of FS (with or without comorbid epilepsy) were studied at the outpatient epilepsy clinic at Shiraz University of Medical Sciences, Shiraz, Iran, from 2008 until 2021. We arbitrarily selected 14 features that are important in making the diagnosis of patients with seizures and also are easily obtainable during history taking. Pytorch and Scikit-learn packages were used to construct various models including random forest classifier, decision tree classifier, support vector classifier, k-nearest neighbor, and TabNet classifier.

Results

Three hundred and two patients had FS (82.5%), while 64 patients had FS and comorbid epilepsy (17.5%). The ""TabNet classifier"" could provide the best sensitivity (90%) and specificity (74%) measures (accuracy of 76%) to help differentiate patients with FS from those with FS and comorbid epilepsy.

Conclusion

These satisfactory differentiating measures suggest that the current algorithm could be used in clinical practice to help with the difficult task of distinguishing patients with FS from those with FS and comorbid epilepsy. Based on the results of the current study, we have developed an Application (SeiDx). This App is freely accessible at the following address: https://drive.google.com/file/d/1rAgBXKNPW9bmUCDioaGHHzLBQgzZ-HZ2/view. This App should be validated in a prospective assessment.",2021-12-15 +30407521,Ensembl 2019.,"The Ensembl project (https://www.ensembl.org) makes key genomic data sets available to the entire scientific community without restrictions. Ensembl seeks to be a fundamental resource driving scientific progress by creating, maintaining and updating reference genome annotation and comparative genomics resources. This year we describe our new and expanded gene, variant and comparative annotation capabilities, which led to a 50% increase in the number of vertebrate genomes we support. We have also doubled the number of available human variants and added regulatory regions for many mouse cell types and developmental stages. Our data sets and tools are available via the Ensembl website as well as a through a RESTful webservice, Perl application programming interface and as data files for download.",2019-01-01 +29976644,FusoPortal: an Interactive Repository of Hybrid MinION-Sequenced Fusobacterium Genomes Improves Gene Identification and Characterization. ,"Here we present FusoPortal, an interactive repository of Fusobacterium genomes that were sequenced using a hybrid MinION long-read sequencing pipeline, followed by assembly and annotation using a diverse portfolio of predominantly open-source software. Significant efforts were made to provide genomic and bioinformatic data as downloadable files, including raw sequencing reads, genome maps, gene annotations, protein functional analysis and classifications, and a custom BLAST server for FusoPortal genomes. FusoPortal has been initiated with eight complete genomes, of which seven were previously only drafts that ranged from 24 to 67 contigs. We have showcased that the genomes in FusoPortal provide accurate open reading frame annotations and have corrected a number of large (>3-kb) genes that were previously misannotated due to contig boundaries. In summary, FusoPortal (http://fusoportal.org) is the first database of MinION-sequenced and completely assembled Fusobacterium genomes, and this central Fusobacterium genomic and bioinformatic resource will aid the scientific community in developing a deeper understanding of how this human pathogen contributes to an array of diseases, including periodontitis and colorectal cancer.IMPORTANCE In this report, we describe a hybrid MinION whole-genome sequencing pipeline and the genomic characteristics of the first eight Fusobacterium strains deposited in the FusoPortal database. This collection of highly accurate and complete genomes drastically improves upon previous multicontig assemblies by correcting and newly identifying a significant number of open reading frames. We believe that the availability of this resource will result in the discovery of proteins and molecular mechanisms used by an oral pathogen, with the potential to further our understanding of how Fusobacterium nucleatum contributes to a repertoire of diseases, including periodontitis, preterm birth, and colorectal cancer.",2018-07-05 +34976319,SortPred: The first machine learning based predictor to identify bacterial sortases and their classes using sequence-derived information.,"Sortase enzymes are cysteine transpeptidases that embellish the surface of Gram-positive bacteria with various proteins thereby allowing these microorganisms to interact with their neighboring environment. It is known that several of their substrates can cause pathological implications, so researchers have focused on the development of sortase inhibitors. Currently, six different classes of sortases (A-F) are recognized. However, with the extensive application of bacterial genome sequencing projects, the number of potential sortases in the public databases has exploded, presenting considerable challenges in annotating these sequences. It is very laborious and time-consuming to characterize these sortase classes experimentally. Therefore, this study developed the first machine-learning-based two-layer predictor called SortPred, where the first layer predicts the sortase from the given sequence and the second layer predicts their class from the predicted sortase. To develop SortPred, we constructed an original benchmarking dataset and investigated 31 feature descriptors, primarily on five feature encoding algorithms. Afterward, each of these descriptors were trained using a random forest classifier and their robustness was evaluated with an independent dataset. Finally, we selected the final model independently for both layers depending on the performance consistency between cross-validation and independent evaluation. SortPred is expected to be an effective tool for identifying bacterial sortases, which in turn may aid in designing sortase inhibitors and exploring their functions. The SortPred webserver and a standalone version are freely accessible at: https://procarb.org/sortpred.",2021-12-14 +34482425,Annotated expression and activity data for murine recombinase alleles and transgenes: the CrePortal resource.,"Recombinase alleles and transgenes can be used to facilitate spatio-temporal specificity of gene disruption or transgene expression. However, the versatility of this in vivo recombination system relies on having detailed and accurate characterization of recombinase expression and activity to enable selection of the appropriate allele or transgene. The CrePortal ( http://www.informatics.jax.org/home/recombinase ) leverages the informatics infrastructure of Mouse Genome Informatics to integrate data from the scientific literature, direct data submissions from the scientific community at-large, and from major projects developing new recombinase lines and characterizing recombinase expression and specificity patterns. Searching the CrePortal by recombinase activity or specific recombinase gene driver provides users with a recombinase alleles and transgenes activity tissue summary and matrix comparison of gene expression and recombinase activity with links to generation details, a recombinase activity grid, and associated phenotype annotations. Future improvements will add cell type-based activity annotations. The CrePortal provides a comprehensive presentation of recombinase allele and transgene data to assist researchers in selection of the recombinase allele or transgene based on where and when recombination is desired.",2021-09-04 +33363710,FangNet: Mining herb hidden knowledge from TCM clinical effective formulas using structure network algorithm.,"The use of herbs to treat various human diseases has been recorded for thousands of years. In Asia's current medical system, numerous herbal formulas have been repeatedly verified to confirm their effectiveness in different periods, which is a great resource for drug innovation and discovery. Through the mining of these clinical effective formulas by network pharmacology and bioinformatics analysis, important biologically active ingredients derived from these natural products might be discovered. As modern medicine requires a combination of multiple drugs for the treatment of complex diseases, previously clinical formulas are also combinations of various herbs according to the main causes and accompanying symptoms. However, the herbs that play a major role in the treatment of diseases are always unclear. Therefore, how to rank each herb's relative importance and determine the core herbs, is the first step to assisting herb selection for active ingredients discovery. To solve this problem, we built the platform FangNet, which ranks all herbs on their relative topological importance using the PageRank algorithm, based on the constructed symptom-herb network from a collection of clinical empirical prescriptions. Three types of herb hidden knowledge, including herb importance rank, herb-herb co-occurrence, and associations to symptoms, were provided in an interactive visualization. Moreover, FangNet has designed role-based permission for teams to store, analyze, and jointly interpret their clinical formulas, in an easy and secure collaboration environment, aiming at creating a central hub for massive symptom-herb connections. FangNet can be accessed at http://fangnet.org or http://fangnet.herb.ac.cn.",2020-12-04 +32502232,LabxDB: versatile databases for genomic sequencing and lab management.,"

Summary

Experimental laboratory management and data-driven science require centralized software for sharing information, such as lab collections or genomic sequencing datasets. Although database servers such as PostgreSQL can store such information with multiple-user access, they lack user-friendly graphical and programmatic interfaces for easy data access and inputting. We developed LabxDB, a versatile open-source solution for organizing and sharing structured data. We provide several out-of-the-box databases for deployment in the cloud including simple mutant or plasmid collections and purchase-tracking databases. We also developed a high-throughput sequencing (HTS) database, LabxDB seq, dedicated to storage of hierarchical sample annotations. Scientists can import their own or publicly available HTS data into LabxDB seq to manage them from production to publication. Using LabxDB's programmatic access (REST API), annotations can be easily integrated into bioinformatics pipelines. LabxDB is modular, offering a flexible framework that scientists can leverage to build new database interfaces adapted to their needs.

Availability and implementation

LabxDB is available at https://gitlab.com/vejnar/labxdb and https://labxdb.vejnar.org for documentation. LabxDB is licensed under the terms of the Mozilla Public License 2.0.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +34925951,Prediction of Postoperative Delirium in Geriatric Hip Fracture Patients: A Clinical Prediction Model Using Machine Learning Algorithms.,"

Introduction

Postoperative delirium in geriatric hip fracture patients adversely affects clinical and functional outcomes and increases costs. A preoperative prediction tool to identify high-risk patients may facilitate optimal use of preventive interventions. The purpose of this study was to develop a clinical prediction model using machine learning algorithms for preoperative prediction of postoperative delirium in geriatric hip fracture patients.

Materials & methods

Geriatric patients undergoing operative hip fracture fixation were queried in the American College of Surgeons National Surgical Quality Improvement Program database (ACS NSQIP) from 2016 through 2019. A total of 28 207 patients were included, of which 8030 (28.5%) developed a postoperative delirium. First, the dataset was randomly split 80:20 into a training and testing subset. Then, a random forest (RF) algorithm was used to identify the variables predictive for a postoperative delirium. The machine learning-model was developed on the training set and the performance was assessed in the testing set. Performance was assessed by discrimination (c-statistic), calibration (slope and intercept), overall performance (Brier-score), and decision curve analysis.

Results

The included variables identified using RF algorithms were (1) age, (2) ASA class, (3) functional status, (4) preoperative dementia, (5) preoperative delirium, and (6) preoperative need for mobility-aid. The clinical prediction model reached good discrimination (c-statistic = .79), almost perfect calibration (intercept = -.01, slope = 1.02), and excellent overall model performance (Brier score = .15). The clinical prediction model was deployed as an open-access web-application: https://sorg-apps.shinyapps.io/hipfxdelirium/.

Discussion & conclusions

We developed a clinical prediction model that shows promise in estimating the risk of postoperative delirium in geriatric hip fracture patients. The clinical prediction model can play a beneficial role in decision-making for preventative measures for patients at risk of developing a delirium. If found to be externally valid, clinicians might use the available web-based application to help incorporate the model into clinical practice to aid decision-making and optimize preoperative prevention efforts.",2021-12-13 +30335683,Comprehensive analysis of long noncoding RNA expression in dorsal root ganglion reveals cell-type specificity and dysregulation after nerve injury.,"Dorsal root ganglion (DRG) neurons provide connectivity between peripheral tissues and the spinal cord. Transcriptional plasticity within DRG sensory neurons after peripheral nerve injury contributes to nerve repair but also leads to maladaptive plasticity, including the development of neuropathic pain. This study presents tissue and neuron-specific expression profiling of both known and novel long noncoding RNAs (LncRNAs) in the rodent DRG after nerve injury. We have identified a large number of novel LncRNAs expressed within the rodent DRG, a minority of which were syntenically conserved between the mouse, rat, and human, and including, both intergenic and antisense LncRNAs. We have also identified neuron type-specific LncRNAs in the mouse DRG and LncRNAs that are expressed in human IPS cell-derived sensory neurons. We show significant plasticity in LncRNA expression after nerve injury, which in mice is strain and gender dependent. This resource is publicly available and will aid future studies of DRG neuron identity and the transcriptional landscape in both the naive and injured DRG. We present our work regarding novel antisense and intergenic LncRNAs as an online searchable database, accessible from PainNetworks (http://www.painnetworks.org/). We have also integrated all annotated gene expression data in PainNetworks, so they can be examined in the context of their protein interactions.",2019-02-01 +,Occurrence of Xanthomonas arboricola pv. pruni Causing Bacterial Leaf Spot and Shot-Hole on Peach in Montenegro,"During May and June 2019, bacterial leaf spot was observed on peach (Prunus persica L. Batsch) in a young plantation (2017, 2018) on four different cultivars (Sugar Time, Britney Lane, Royal Bell, and Royal Time) in the Podgorica region of Montenegro. Diseased trees appeared sporadically, and incidence of symptomatic leaves per infected tree was 10 to 15%. Angular water-soaked spots surrounded by chlorotic tissue were observed along leaf midribs or margins, turning light brown in later stages. Necrotic lesions sometimes dropped out, giving the leaves a “shot-hole” appearance. When spots coalesced, large areas of necrotic foliar tissue formed. Eventually leaves turned yellow, resulting in premature defoliation. Margins taken from healthy tissue and necrotic lesions were homogenized in sterile distilled water (SDW) and spread onto YDC agar (Schaad et al. 2001), on which mostly pale yellow, translucent, circular, raised, and mucoid Xanthomonas-like colonies were observed after 3 days incubation at 26°C. Twelve representative isolates coded as Xp1 to 12 (Sugar Time, Xp1 to 3; Britney Lane, Xp4 to 6; Royal Bell, Xp7 to 9; and Royal Time, Xp10 to 12) were aerobic, gram negative, catalase positive, and oxidase and arginine dihydrolase negative; hydrolyzed esculin and gelatin but not starch; produced H₂S but not indole; and did not reduce nitrate (Schaad et al. 2001). All reactions corresponded with the reactions of the reference strain NCPPB 3156 of Xanthomonas arboricola pv. pruni (Xap), causal agent of leaf spot, shot-hole, and canker of stone fruit. The identity of the isolates was confirmed by PCR using the Xap-specific primer pair XapY17-F/XapY17-R for the ABC transporter ATP-binding protein (Pothier et al. 2011). A single unique target band of 943 bp was amplified for all isolates tested. Furthermore, sequencing of the housekeeping genes fyuA, rpoD, and gyrB (Young et al. 2008) showed 100% identity of tested isolates with genomic sequences of Xap strains deposited in the NCBI database (CFBP 411, 3921, 5580, 6653, 7099, and 7100) based on BLAST analysis and a neighbor-joining phylogenetic tree. The sequences of four isolates representing each cultivar were deposited in GenBank under accession numbers MN520623 to 26 for fyuA, MN520627 to 30 for rpoD, and MN520631 to 34 for gyrB gene for Xp1, Xp4, Xp7, and Xp10, respectively. Pathogenicity was confirmed on (i) young, fully expanded, healthy, detached peach leaves using a hypodermic syringe without a needle, pressed against the abaxial side of the leaf (bacterial suspension at 10⁷ CFU/ml from a 72-h YDC culture) and (ii) immature fruits stabbed and pressing the bacterial suspension (10⁷ CFU/ml) until overflow. Xap reference strain NCPPB 3156 served as a positive and SDW as a negative control. Inoculated leaves were kept under controlled conditions at 25°C, 16/8-h day/night photoperiod, and 60 to 80% relative humidity. All tested bacterial isolates and the reference strain developed water-soaked spots on leaves 3 days and on fruits 4 days after inoculation. Ten days after inoculation the spots became dark brown and necrotic. SDW-treated controls were negative. Xap was successfully reisolated and confirmed to be Xap using PCR (XapY17-F/XapY17-R). This study demonstrates Xap on peach as a new host in Montenegro. Xap was previously reported from other (15 of 44) European countries (https://gd.eppo.int/taxon/XANTPR/distribution), mainly on peach and plum, and from Montenegro on almond (Panić et al. 1998). As the causal agent of a major disease of stone fruits, Xap is a quarantine organism in Europe (EPPO A2 List of Pests recommended for regulation for EU). Therefore, it is necessary to implement containment and/or eradication measures to prevent further spread to new areas and/or new hosts. Furthermore, a nation-wide survey on the (re)occurrence of Xap in other hosts such as almond (reported by Panić et al. 1998), apricot, and plum advised.",2020-04-01 +,First Report of Neocosmospora falciformis Causing Wilt and Root Rot of Muskmelon in Spain,"‘Cantaloupe’ and ‘Piel de Sapo’ are melon (Cucumis melo L.) varieties cultivated in Spain. In 2018, during a pathogens survey in experimental fields of Valencia and Alicante provinces (southeast Spain), wilt and root rot of melon plants were detected in grafted and ungrafted plants. Disease incidence ranged from 10% (Alicante) to 45% (Valencia). Symptoms included yellowing and wilting of leaves, rotting at the stem base and upper root, and collapse of the entire plant. Samplings were conducted from severely decayed and dead plants. Fragments (0.5 to 1 cm) from rotted lower stems and roots were surface disinfected for 1 min in 1.5% NaOCl, washed twice with sterilized distilled water, and plated onto potato dextrose agar (PDA) with streptomycin sulfate (0.5 g/liter). Plates were incubated at 25°C in the dark for 3 to 5 days. Mycelia resembling Fusarium were isolated and characterized by morphological and molecular methods. Based on their adpressed beige mycelia, growth in concentric rings, and absence of sporodochia, colonies growing on PDA and Spezieller Nährstoffarmer agar were preliminary identified as belonging to the Fusarium solani species complex. On PDA, colonies were white-greyish to pale-cream growing in concentric rings with beige reverse after 6 days. No sporodochia were observed. Macroconidia were slender, falcate, hyaline, three to five septate 43 (38 to 47) × 4.5 (3.8 to 5.2) µm; aerial microconidia were abundant, borne on short, undifferentiated monophialides, ellipsoidal to reniform, sometimes with a truncate base, and zero to one septate 10 (9.2 to 11.4) × 3.5 (2.5 to 6) µm. Chlamydospores were globose, single or in chains, intercalary and thin- to thick-walled. Sequencing of the internal transcribed spacer (ITS) region, a fragment of translation elongation factor-1α (TEF-1α), and RNA polymerase II (RPB2) partial genes was done using ITS1/ITS4 (White et al. 1990), EF1/EF2 (O’Donnell et al. 1998), and fRPB2-7cF/fRPB2-11aR (Reeb et al. 2004) primers, respectively. After comparisons using BLASTn and the Fusarium ID database (http://www.wi.knaw.nl/fusarium/), eight isolates were identified as Neocosmospora falciformis. The ITS, EF-1α, and RPB2 sequences of isolate CRR 2-6 showed 99% homology with N. falciformis EU329691 (ITS), AB817158 (EF-1α), and EU329650 (RPB2). Sequences were deposited in GenBank with accession numbers MN086327 (ITS), MN509809 (TEF-1α), and MN509810 (RPB2). For pathogenicity tests, isolate CRR 2-6 was grown in 250-ml flasks containing potato sucrose medium for 3 days at 25°C in the dark with constant agitation. Roots of ten 15-day-old Piel de Sapo seedlings grown 6 days in trays with sterilized substrate were submerged into a suspension of 5 × 10⁶ conidia/ml for 2 min and transferred to the plastic containers. Three plants submerged in sterile water served as controls. Plants were incubated in a growth chamber (25°C; 16-h/8-h photoperiod). Scarce development, wilting, and yellowing followed by plant death were observed 15 days postinoculation. Noninoculated controls remained asymptomatic. The fungus was reisolated from all the inoculated plants and identified using ITS, TEF-1α, and RPB2. N. falciformis belongs to the Neocosmospora (Fusarium) solani species complex (O’Donnell et al. 2008). To our knowledge, this is the first report of N. falciformis causing wilt and root rot of melon in Spain. The adoption of molecular-based identification methods should lead to a more precise determination on incidence of the pathogen in this Mediterranean area.",2020-04-01 +34878861,"Using Point-in-Time Homeless Counts to Monitor Mortality Trends Among People Experiencing Homelessness in Los Angeles County, California, 2015‒2019.","Objectives. To report trends in mortality rates, mortality rate ratios (MRRs), and causes of death among people experiencing homelessness (PEH) in Los Angeles County, California, by using annual point-in-time homeless counts and to compare findings to published longitudinal cohort studies of homeless mortality. Methods. We enumerated homeless deaths and determined causes by using 2015-2019 medical examiner‒coroner data matched to death certificate data. We estimated midyear homeless population denominators by averaging consecutive January point-in-time homeless counts. We used annual demographic surveys of PEH to estimate age- and gender-adjusted MRRs. We identified comparison studies through a literature review. Results. Mortality rates increased from 2015 to 2019. Drug overdose was the leading cause of death. Mortality was higher among White than among Black and Latino PEH. Compared with the general population, MRRs ranged from 2.8 (95% confidence interval [CI] = 2.7, 3.0) for all causes to 35.1 (95% CI = 31.9, 38.4) for drug overdose. Crude mortality rates and all-cause MRRs from comparison cohort studies were similar to those in the current study. Conclusions. These methods can be adapted by other urban jurisdictions seeking to better understand and reduce mortality in their homeless populations. (Am J Public Health. 2021;111(12):2212-2222. https://doi.org/10.2105/AJPH.2021.306502).",2021-12-01 +34433408,Web-based LinRegPCR: application for the visualization and analysis of (RT)-qPCR amplification and melting data.,"

Background

The analyses of amplification and melting curves have been shown to provide valuable information on the quality of the individual reactions in quantitative PCR (qPCR) experiments and to result in more reliable and reproducible quantitative results.

Implementation

The main steps in the amplification curve analysis are (1) a unique baseline subtraction, not using the ground phase cycles, (2) PCR efficiency determination from the exponential phase of the individual reactions, (3) setting a common quantification threshold and (4) calculation of the efficiency-corrected target quantity with the common threshold, efficiency per assay and Cq per reaction. The melting curve analysis encompasses smoothing of the observed fluorescence data, normalization to remove product-independent fluorescence loss, peak calling and assessment of the correct peak by comparing its melting temperature with the known melting temperature of the intended amplification product.

Results

The LinRegPCR web application provides visualization and analysis of a single qPCR run. The user interface displays the analysis results on the amplification curve analysis and melting curve analysis in tables and graphs in which deviant reactions are highlighted. The annotated results in the tables can be exported for calculation of gene-expression ratios, fold-change between experimental conditions and further statistical analysis. Web-based LinRegPCR addresses two types of users, wet-lab scientists analyzing the amplification and melting curves of their own qPCR experiments and bioinformaticians creating pipelines for analysis of series of qPCR experiments by splitting its functionality into a stand-alone back-end RDML (Real-time PCR Data Markup Language) Python library and several companion applications for data visualization, analysis and interactive access. The use of the RDML data standard enables machine independent storage and exchange of qPCR data and the RDML-Tools assist with the import of qPCR data from the files exported by the qPCR instrument.

Conclusions

The combined implementation of these analyses in the newly developed web-based LinRegPCR ( https://www.gear-genomics.com/rdml-tools/ ) is platform independent and much faster than the original Windows-based versions of the LinRegPCR program. Moreover, web-based LinRegPCR includes a novel statistical outlier detection and the combination of amplification and melting curve analyses allows direct validation of the amplification product and reporting of reactions that amplify artefacts.",2021-08-24 +31081014,iMKT: the integrative McDonald and Kreitman test.,"The McDonald and Kreitman test (MKT) is one of the most powerful and widely used methods to detect and quantify recurrent natural selection using DNA sequence data. Here we present iMKT (acronym for integrative McDonald and Kreitman test), a novel web-based service performing four distinct MKT types. It allows the detection and estimation of four different selection regimes -adaptive, neutral, strongly deleterious and weakly deleterious- acting on any genomic sequence. iMKT can analyze both user's own population genomic data and pre-loaded Drosophila melanogaster and human sequences of protein-coding genes obtained from the largest population genomic datasets to date. Advanced options in the website allow testing complex hypotheses such as the application example showed here: do genes located in high recombination regions undergo higher rates of adaptation? We aim that iMKT will become a reference site tool for the study of evolutionary adaptation in massive population genomics datasets, especially in Drosophila and humans. iMKT is a free resource online at https://imkt.uab.cat.",2019-07-01 +29655704,A Library of Phosphoproteomic and Chromatin Signatures for Characterizing Cellular Responses to Drug Perturbations.,"Although the value of proteomics has been demonstrated, cost and scale are typically prohibitive, and gene expression profiling remains dominant for characterizing cellular responses to perturbations. However, high-throughput sentinel assays provide an opportunity for proteomics to contribute at a meaningful scale. We present a systematic library resource (90 drugs × 6 cell lines) of proteomic signatures that measure changes in the reduced-representation phosphoproteome (P100) and changes in epigenetic marks on histones (GCP). A majority of these drugs elicited reproducible signatures, but notable cell line- and assay-specific differences were observed. Using the ""connectivity"" framework, we compared signatures across cell types and integrated data across assays, including a transcriptional assay (L1000). Consistent connectivity among cell types revealed cellular responses that transcended lineage, and consistent connectivity among assays revealed unexpected associations between drugs. We further leveraged the resource against public data to formulate hypotheses for treatment of multiple myeloma and acute lymphocytic leukemia. This resource is publicly available at https://clue.io/proteomics.",2018-04-11 +34895309,Drug targeting of aminoacyl-tRNA synthetases in Anopheles species and Aedes aegypti that cause malaria and dengue.,"

Background

Mosquito-borne diseases have a devastating impact on human civilization. A few species of Anopheles mosquitoes are responsible for malaria transmission, and while there has been a reduction in malaria-related deaths worldwide, growing insecticide resistance is a cause for concern. Aedes mosquitoes are known vectors of viral infections, including dengue, yellow fever, chikungunya, and Zika. Aminoacyl-tRNA synthetases (aaRSs) are key players in protein synthesis and are potent anti-infective drug targets. The structure-function activity relationship of aaRSs in mosquitoes (in particular, Anopheles and Aedes spp.) remains unexplored.

Methods

We employed computational techniques to identify aaRSs from five different mosquito species (Anopheles culicifacies, Anopheles stephensi, Anopheles gambiae, Anopheles minimus, and Aedes aegypti). The VectorBase database ( https://vectorbase.org/vectorbase/app ) and web-based tools were utilized to predict the subcellular localizations (TargetP-2.0, UniProt, DeepLoc-1.0), physicochemical characteristics (ProtParam), and domain arrangements (PfAM, InterPro) of the aaRSs. Structural models for prolyl (PRS)-, and phenylalanyl (FRS)-tRNA synthetases-were generated using the I-TASSER and Phyre protein modeling servers.

Results

Among the vector species, a total of 37 (An. gambiae), 37 (An. culicifacies), 37 (An. stephensi), 37 (An. minimus), and 35 (Ae. aegypti) different aaRSs were characterized within their respective mosquito genomes. Sequence identity amongst the aaRSs from the four Anopheles spp. was > 80% and in Ae. aegypti was > 50%.

Conclusions

Structural analysis of two important aminoacyl-tRNA synthetases [prolyl (PRS) and phenylanalyl (FRS)] of Anopheles spp. suggests structural and sequence similarity with potential antimalarial inhibitor [halofuginone (HF) and bicyclic azetidine (BRD1369)] binding sites. This suggests the potential for repurposing of these inhibitors against the studied Anopheles spp. and Ae. aegypti.",2021-12-11 +34925449,SVInterpreter: A Comprehensive Topologically Associated Domain-Based Clinical Outcome Prediction Tool for Balanced and Unbalanced Structural Variants.,"With the advent of genomic sequencing, a number of balanced and unbalanced structural variants (SVs) can be detected per individual. Mainly due to incompleteness and the scattered nature of the available annotation data of the human genome, manual interpretation of the SV's clinical significance is laborious and cumbersome. Since bioinformatic tools developed for this task are limited, a comprehensive tool to assist clinical outcome prediction of SVs is warranted. Herein, we present SVInterpreter, a free Web application, which analyzes both balanced and unbalanced SVs using topologically associated domains (TADs) as genome units. Among others, gene-associated data (as function and dosage sensitivity), phenotype similarity scores, and copy number variants (CNVs) scoring metrics are retrieved for an informed SV interpretation. For evaluation, we retrospectively applied SVInterpreter to 97 balanced (translocations and inversions) and 125 unbalanced (deletions, duplications, and insertions) previously published SVs, and 145 SVs identified from 20 clinical samples. Our results showed the ability of SVInterpreter to support the evaluation of SVs by (1) confirming more than half of the predictions of the original studies, (2) decreasing 40% of the variants of uncertain significance, and (3) indicating several potential position effect events. To our knowledge, SVInterpreter is the most comprehensive TAD-based tool to identify the possible disease-causing candidate genes and to assist prediction of the clinical outcome of SVs. SVInterpreter is available at http://dgrctools-insa.min-saude.pt/cgi-bin/SVInterpreter.py.",2021-12-01 +35155806,Pneumococcal conjugate vaccines reduce myringotomy with tympanostomy tube insertion in young children in Japan.,"

Objective

Pneumococcal conjugate vaccines (PCVs) have been reported to reduce the incidence of myringotomy with tympanostomy tube insertion (MTTI) in children. However, little information is available focusing specific ages. We examined the prophylactic efficacy of PCVs on the onset of complex otitis media (ComOM) that requires MTTI.

Method

From 2011, the public support for PCV7 started with the usual four-dose schedule and an emergency schedule for 2- to 4-year-old children in Japan. PCV7 was replaced with PCV13 in 2013. We reviewed the nationwide database obtained from the JMDC Claims Database (https://www.jmdc.co.jp/en/) to examine the MTTI incidence during the era before and after PCV introduction (from 2008 to 2010 and from 2011 to 2017, respectively). Subjects were analyzed by stratified age groups (from 0 to 8 years old) and in subdivided groups of 6 months (from 0 to 35 months old). We compared the MTTI incidence between the groups for each age as well as between those for each calendar year.

Results

A significant reduction in the MTTI incidence was detected in the 1-year-old children of the PCV era compared to those of the pre-PCV era. The reduction rates were more prominent in the 12-17 months group as compared to the 18-23 months group (PCV7 p = .005 and PCV13 p = .011, PCV7 p = .014 and PCV13 p = .153, respectively). The significant difference in the 1-year-old children continued in six of seven calendar years from 2011 to 2017, whereas no significant reduction was detected in children >3 years old.

Conclusions

The introduction of both PCV7 and PCV13 reduced MTTI incidences in children around 1 year old, and the effects were more prominent during the early half-periods. Our results support etiological evidence that pneumococcal infection in children aged 1 year and younger might play roles in the pathogenesis of ComOM that requires MTTI.",2021-12-11 +31701150,BioModels-15 years of sharing computational models in life science.,"Computational modelling has become increasingly common in life science research. To provide a platform to support universal sharing, easy accessibility and model reproducibility, BioModels (https://www.ebi.ac.uk/biomodels/), a repository for mathematical models, was established in 2005. The current BioModels platform allows submission of models encoded in diverse modelling formats, including SBML, CellML, PharmML, COMBINE archive, MATLAB, Mathematica, R, Python or C++. The models submitted to BioModels are curated to verify the computational representation of the biological process and the reproducibility of the simulation results in the reference publication. The curation also involves encoding models in standard formats and annotation with controlled vocabularies following MIRIAM (minimal information required in the annotation of biochemical models) guidelines. BioModels now accepts large-scale submission of auto-generated computational models. With gradual growth in content over 15 years, BioModels currently hosts about 2000 models from the published literature. With about 800 curated models, BioModels has become the world's largest repository of curated models and emerged as the third most used data resource after PubMed and Google Scholar among the scientists who use modelling in their research. Thus, BioModels benefits modellers by providing access to reliable and semantically enriched curated models in standard formats that are easy to share, reproduce and reuse.",2020-01-01 +33297001,Derivation of algal acute to chronic ratios for use in chemical toxicity extrapolations.,"Algal toxicity studies are required by regulatory agencies for a variety of purposes including classification and labeling and environmental risk assessment of chemicals. Algae are also frequently the most sensitive taxonomic group tested. Acute to chronic ratios (ACRs) have been challenging to derive for algal species because of the complexities of the underlying experimental data including: a lack of universally agreed upon algal inhibition endpoints; evolution of experimental designs over time and by different standardization authorities; and differing statistical approaches (e.g., regression versus hypothesis-based effect concentrations). Experimental data for developing globally accepted algal ACRs have been limited because of data availability, and in most regulatory frameworks an ACR of 10 is used regardless of species, chemical type or mode of action. Acute and chronic toxicity (inhibition) data on 17 algal species and 442 chemicals were compiled from the EnviroTox database (https://envirotoxdatabase.org/) and a proprietary database of algal toxicity records. Information was probed for growth rate, yield, and final cell density endpoints focusing primarily on studies of 72 and 96 h duration. Comparisons of acute and chronic data based on either single (e.g., growth rate) and multiple (e.g., growth rate, final cell density) endpoints were used to assess acute and chronic relationships. Linear regressions of various model permutations were used to compute ACRs for multiple combinations of taxa, chemicals, and endpoints, and showed that ACRs for algae were consistently around 4 (ranging from 2.43 to 5.62). An ACR of 4 for algal toxicity is proposed as an alternative to a default value of 10, and recommendations for consideration and additional research and development are provided.",2020-08-13 +33245098,Morphing projections: a new visual technique for fast and interactive large-scale analysis of biomedical datasets.,"

Motivation

Biomedical research entails analyzing high dimensional records of biomedical features with hundreds or thousands of samples each. This often involves using also complementary clinical metadata, as well as a broad user domain knowledge. Common data analytics software makes use of machine learning algorithms or data visualization tools. However, they are frequently one-way analyses, providing little room for the user to reconfigure the steps in light of the observed results. In other cases, reconfigurations involve large latencies, requiring a retraining of algorithms or a large pipeline of actions. The complex and multiway nature of the problem, nonetheless, suggests that user interaction feedback is a key element to boost the cognitive process of analysis, and must be both broad and fluid.

Results

In this article, we present a technique for biomedical data analytics, based on blending meaningful views in an efficient manner, allowing to provide a natural smooth way to transition among different but complementary representations of data and knowledge. Our hypothesis is that the confluence of diverse complementary information from different domains on a highly interactive interface allows the user to discover relevant relationships or generate new hypotheses to be investigated by other means. We illustrate the potential of this approach with three case studies involving gene expression data and clinical metadata, as representative examples of high dimensional, multidomain, biomedical data.

Availability and implementation

Code and demo app to reproduce the results available at https://gitlab.com/idiazblanco/morphing-projections-demo-and-dataset-preparation.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +35003156,Novel Design of Imputation-Enabled SNP Arrays for Breeding and Research Applications Supporting Multi-Species Hybridization.,"Array-based single nucleotide polymorphism (SNP) genotyping platforms have low genotype error and missing data rates compared to genotyping-by-sequencing technologies. However, design decisions used to create array-based SNP genotyping assays for both research and breeding applications are critical to their success. We describe a novel approach applicable to any animal or plant species for the design of cost-effective imputation-enabled SNP genotyping arrays with broad utility and demonstrate its application through the development of the Illumina Infinium Wheat Barley 40K SNP array Version 1.0. We show that the approach delivers high quality and high resolution data for wheat and barley, including when samples are jointly hybridised. The new array aims to maximally capture haplotypic diversity in globally diverse wheat and barley germplasm while minimizing ascertainment bias. Comprising mostly biallelic markers that were designed to be species-specific and single-copy, the array permits highly accurate imputation in diverse germplasm to improve the statistical power of genome-wide association studies (GWAS) and genomic selection. The SNP content captures tetraploid wheat (A- and B-genome) and Aegilops tauschii Coss. (D-genome) diversity and delineates synthetic and tetraploid wheat from other wheat, as well as tetraploid species and subgroups. The content includes SNP tagging key trait loci in wheat and barley, as well as direct connections to other genotyping platforms and legacy datasets. The utility of the array is enhanced through the web-based tool, Pretzel (https://plantinformatics.io/) which enables the content of the array to be visualized and interrogated interactively in the context of numerous genetic and genomic resources to be connected more seamlessly to research and breeding. The array is available for use by the international wheat and barley community.",2021-12-22 +33539887,ADDRESS: A Database of Disease-associated Human Variants Incorporating Protein Structure and Folding Stabilities.,"Numerous human diseases are caused by mutations in genomic sequences. Since amino acid changes affect protein function through mechanisms often predictable from protein structure, the integration of structural and sequence data enables us to estimate with greater accuracy whether and how a given mutation will lead to disease. Publicly available annotated databases enable hypothesis assessment and benchmarking of prediction tools. However, the results are often presented as summary statistics or black box predictors, without providing full descriptive information. We developed a new semi-manually curated human variant database presenting information on the protein contact-map, sequence-to-structure mapping, amino acid identity change, and stability prediction for the popular UniProt database. We found that the profiles of pathogenic and benign missense polymorphisms can be effectively deduced using decision trees and comparative analyses based on the presented dataset. The database is made publicly available through https://zhanglab.ccmb.med.umich.edu/ADDRESS.",2021-02-02 +33548918,Prediction Models in Aneurysmal Subarachnoid Hemorrhage: Forecasting Clinical Outcome With Artificial Intelligence.,"

Background

Predicting outcome after aneurysmal subarachnoid hemorrhage (aSAH) is known to be challenging and complex. Machine learning approaches, of which feedforward artificial neural networks (ffANNs) are the most widely used, could contribute to the patient-specific outcome prediction.

Objective

To investigate the prediction capacity of an ffANN for the patient-specific clinical outcome and the occurrence of delayed cerebral ischemia (DCI) and compare those results with the predictions of 2 internationally used scoring systems.

Methods

A prospective database was used to predict (1) death during hospitalization (ie, mortality) (n = 451), (2) unfavorable modified Rankin Scale (mRS) at 6 mo (n = 413), and (3) the occurrence of DCI (n = 362). Additionally, the predictive capacities of the ffANN were compared to those of Subarachnoid Haemorrhage International Trialists (SAHIT) and VASOGRADE to predict clinical outcome and occurrence of DCI.

Results

The area under the curve (AUC) of the ffANN showed to be 88%, 85%, and 72% for predicting mortality, an unfavorable mRS, and the occurrence of DCI, respectively. Sensitivity/specificity rates of the ffANN for mortality, unfavorable mRS, and the occurrence of DCI were 82%/80%, 94%/80%, and 74%/68%. The ffANN and SAHIT calculator showed similar AUCs for predicting personalized outcome. The presented ffANN and VASOGRADE were found to perform equally with regard to personalized prediction of occurrence of DCI.

Conclusion

The presented ffANN showed equal performance when compared with VASOGRADE and SAHIT scoring systems while using less individual cases. The web interface launched simultaneously with the publication of this manuscript allows for usage of the ffANN-based prediction tool for individual data (https://nutshell-tool.com/).",2021-04-01 +31675986,Magnitude and causes of first-line antiretroviral therapy regimen changes among HIV patients in Ethiopia: a systematic review and meta-analysis.,"

Background

Antiretroviral therapy (ART) has markedly decreased the morbidity and mortality due to HIV/AIDS. ART regimen change is a major challenge for the sustainability of human immunodeficiency virus (HIV) treatment program. This is found to be a major concern among HIV/AIDS patients in a resource-limited setting, where treatment options are limited.

Objectives

The aim of this review is to generate the best available evidence regarding the magnitude of first-line antiretroviral therapy regimen change and the causes for regimen change among HIV patients on ART in Ethiopia.

Methods

The reviewed studies were accessed through electronic web-based search strategy from PubMed Medline, EMBASE, Hinari, Springer link and Google Scholar. Data were extracted using Microsoft Excel and exported to Stata software version 13 for analyses. The overall pooled estimation of outcomes was calculated using a random-effect model of DerSimonian-Laird method at 95% confidence level. Heterogeneity of studies was determined using I2 statistics. For the magnitude of regimen change, the presence of publication bias was evaluated using the Begg's and Egger's tests. The protocol of this systematic review and meta-analysis was registered in the Prospero database with reference number ID: CRD42018099742. The published methodology is available from: https://www.crd.york.ac.uk/PROSPERO/display_record.php?RecordID=99742 .

Results

A total of 22 studies published between the years 2012 and 2018 were included. Out of 22 articles, 14 articles reported the magnitude of regimen change and consisted of 13,668 HIV patients. The estimated national pooled magnitude of regimen change was 37% (95% CI: 34, 44%; Range: 15.1-63.8%) with degree of heterogeneity (I2), 98.7%; p-value < 0.001. Seventeen articles were used to identify the causes for first-line antiretroviral therapy regimen change. The major causes identified were toxicity, 58% (95% CI: 46, 69%; Range: 14.4-88.5%); TB co-morbidity, 12% (95% CI: 8, 16%; Range: 0.8-31.7%); treatment failure, 7% (95% CI: 5, 9%; Range: 0.4-24.4%); and pregnancy, 5% (95% CI: 4, 7%; Range: 0.6-11.9%).

Conclusions

The original first-line regimen was changed in one-third of HIV patients on ART in Ethiopia. Toxicity of the drugs, TB co-morbidity, treatment failure, and pregnancy were the main causes for the change of the first-line regimen among HIV patients on antiretroviral therapy.",2019-11-01 +31439493,Common Data Elements for National Institute of Mental Health-Funded Translational Early Psychosis Research.,"The National Institutes of Health has established the PhenX Toolkit as a web-based resource containing consensus measures freely available to the research community. The National Institute of Mental Health (NIMH) has introduced the Mental Health Research Core Collection as part of the PhenX Toolkit and recently convened the PhenX Early Psychosis Working Group to generate the PhenX Early Psychosis Specialty Collection. The Working Group consisted of two complementary panels for clinical and translational research. We review the process, deliberations, and products of the translational research panel. The Early Psychosis Specialty Collection rationale for measure selection as well as additional information and protocols for obtaining each measure are available on the PhenX website (https://www.phenxtoolkit.org). The NIMH strongly encourages investigators to use instruments from the PhenX Mental Health Research Collections in NIMH-funded studies and discourages use of alternative measures to collect similar data without justification. We also discuss some of the potential advances that can be achieved by collecting common data elements across large-scale longitudinal studies of early psychosis.",2019-06-29 +31114900,IEDB-AR: immune epitope database-analysis resource in 2019.,"The Immune Epitope Database Analysis Resource (IEDB-AR, http://tools.iedb.org/) is a companion website to the IEDB that provides computational tools focused on the prediction and analysis of B and T cell epitopes. All of the tools are freely available through the public website and many are also available through a REST API and/or a downloadable command-line tool. A virtual machine image of the entire site is also freely available for non-commercial use and contains most of the tools on the public site. Here, we describe the tools and functionalities that are available in the IEDB-AR, focusing on the 10 new tools that have been added since the last report in the 2012 NAR webserver edition. In addition, many of the tools that were already hosted on the site in 2012 have received updates to newest versions, including NetMHC, NetMHCpan, BepiPred and DiscoTope. Overall, this IEDB-AR update provides a substantial set of updated and novel features for epitope prediction and analysis.",2019-07-01 +33375842,Expecting Questions Modulates Cognitive Effort in a Syntactic Processing Task: Evidence From Pupillometry.,"Purpose Pupillary responses captured via pupillometry (measurement of pupillary dilation and constriction during the performance of a cognitive task) are psychophysiological indicators of cognitive effort, attention, arousal, and resource engagement. Pupillometry may be a promising tool for enhancing our understanding of the relationship between cognition and language in people with and without aphasia. Interpretation of pupillary responses is complex. This study was designed as a stepping-stone for future pupillometric studies involving people with aphasia. Asking comprehension questions is common in language processing research involving people with and without aphasia. However, the influence of comprehension questions on pupillometric indices of task engagement (tonic responses) and cognitive effort (task-evoked responses of the pupil [TERPs]) is unknown. We tested whether asking comprehension questions influenced pupillometric results of adults without aphasia during a syntactic processing task. Method Forty adults without aphasia listened to easy (canonical) and difficult (noncanonical) sentences in two conditions: one that contained an explicit comprehension task (question condition) and one that did not (no-question condition). The influence of condition and canonicity on pupillary responses was examined. Results The influence of canonicity was only significant in the question condition: TERPs for difficult sentences were larger than TERPs for easy sentences. Tonic responses did not differ between conditions. Conclusions Although participants had similar levels of attentiveness in both conditions, increases in indices of cognitive effort during syntactic processing were significant only when participants expected comprehension questions. Results contribute to a body of evidence indicating the importance of task design and careful linguistic stimulus control when using pupillometry to study language processing. Supplemental Material https://doi.org/10.23641/asha.13480368.",2020-12-29 +32746818,Pain talk in hospice care: a conversation analysis.,"

Background

A large number of the hospice patients have been reported to be with symptoms of pain. Thus, managing the patient's pain is one aspect of hospice care provision. The delivery of pain care services could be facilitated through effective communication. However, little has been done to explore the interactional details of the delivery of pain care services in palliative care.

Methods

Conversation analysis is a useful method to explore the interactional details of interaction by hospice care providers and terminally ill patients. Using the method of Conversation Analysis (CA), this study aims to demonstrate how the hospice care provider employs different types of interactional practices to address the patient's pain concerns. The data showed in this study are collected from the Alexander St website http://ctiv.alexanderstreet.com , an educational resource presenting a large collection of psycho-therapeutic videos.

Results

In this study, an illustrative analysis is demonstrated to show the potential of conversation analysis for research on pain talk in palliative care. It has been shown that conversation analysis could contribute to unfolding the interactional details regarding ""pain talk"" in hospice care settings. Specifically, conversation analysis could provide a detailed description and interpretation of the conversational practices, which are used to construct hospice care provider participation in delivering pain talk. In addition, conversation analysis could also demonstrate the interactional resources by which patients disclose their experiences of physical or spiritual pain to the hospice care provider and the way how the hospice care provider responds to the patient's troubles talk or feelings talk.

Conclusions

This study identifies five types of interactional resources which are used to deal with the patient's pain concerns in hospice care setting. A conversation analytical study of pain talk in hospice care could provide a turn-by-turn description of how the hospice care provider communicates with the terminally ill patient in terms of the patient's pain concerns. The findings in this study could inform how the hospice care provider initiates, delivers and develops a pain talk with the terminally ill patient effectively.",2020-08-03 +30462302,MBGD update 2018: microbial genome database based on hierarchical orthology relations covering closely related and distantly related comparisons.,"The Microbial Genome Database for Comparative Analysis (MBGD) is a database for comparative genomics based on comprehensive orthology analysis of bacteria, archaea and unicellular eukaryotes. MBGD now contains 6318 genomes. To utilize the database for both closely related and distantly related genomes, MBGD previously provided two types of ortholog tables: the standard ortholog table containing one representative genome from each genus covering the entire taxonomic range and the taxon specific ortholog tables for each taxon. However, this approach has a drawback in that the standard ortholog table contains only genes that are conserved in the representative genomes. To address this problem, we developed a stepwise procedure to construct ortholog tables hierarchically in a bottom-up manner. By using this approach, the new standard ortholog table now covers the entire gene repertoire stored in MBGD. In addition, we have enhanced several functionalities, including rapid and flexible keyword searching, profile-based sequence searching for orthology assignment to a user query sequence, and displaying a phylogenetic tree of each taxon based on the concatenated core gene sequences. For integrative database searching, the core data in MBGD are represented in Resource Description Framework (RDF) and a SPARQL interface is provided to search them. MBGD is available at http://mbgd.genome.ad.jp/.",2019-01-01 +,Deciphering genes associated with root wilt disease of coconut and development of its transcriptomic database (CnTDB),"Coconut (Cocos nucifera L.) has global significance in agriculture and industries due to its nutritional and medicinal properties. Coconut Root Wilt Disease (RWD) causes huge economic loss, thus molecular approach for improved varieties is needed. Since whole genome sequence is unavailable, transcriptomic approach is imperative for deciphering pathways as well as genic region marker discovery from contrasting genotypes. This is the first report of RWD transcriptome database having candidate genes and pathway discovery along with genic simple sequence repeats, SNPs, indels to be used as functional domain markers. A relational database, CnTDB (http://webtom.cabgrid.res.in/cntdb/), based on three-tier architecture has been developed having 285235 transcripts with all blast information, annotations, pathways, 22021 DEGs, transcriptional factors, 10126 and 97117 SSR markers mined from DEGs and de novo transcriptome assembly, respectively. Putative markers with primers can be valuable genomic resource in endeavor of RWD resistant variety development for higher coconut productivity.",2017-12-01 +34805992,mmCSM-NA: accurately predicting effects of single and multiple mutations on protein-nucleic acid binding affinity.,"While protein-nucleic acid interactions are pivotal for many crucial biological processes, limited experimental data has made the development of computational approaches to characterise these interactions a challenge. Consequently, most approaches to understand the effects of missense mutations on protein-nucleic acid affinity have focused on single-point mutations and have presented a limited performance on independent data sets. To overcome this, we have curated the largest dataset of experimentally measured effects of mutations on nucleic acid binding affinity to date, encompassing 856 single-point mutations and 141 multiple-point mutations across 155 experimentally solved complexes. This was used in combination with an optimized version of our graph-based signatures to develop mmCSM-NA (http://biosig.unimelb.edu.au/mmcsm_na), the first scalable method capable of quantitatively and accurately predicting the effects of multiple-point mutations on nucleic acid binding affinities. mmCSM-NA obtained a Pearson's correlation of up to 0.67 (RMSE of 1.06 Kcal/mol) on single-point mutations under cross-validation, and up to 0.65 on independent non-redundant datasets of multiple-point mutations (RMSE of 1.12 kcal/mol), outperforming similar tools. mmCSM-NA is freely available as an easy-to-use web-server and API. We believe it will be an invaluable tool to shed light on the role of mutations affecting protein-nucleic acid interactions in diseases.",2021-11-17 +,Mapping pan-European land cover using Landsat spectral-temporal metrics and the European LUCAS survey,"This study analyzed, for the first time, the potential of combining the large European-wide land survey LUCAS (Land Use/Cover Area frame Survey) and Landsat-8 data for mapping pan-European land cover and land use. We used annual and seasonal spectral-temporal metrics and environmental features to map 12 land cover and land use classes across Europe. The spectral-temporal metrics provided an efficient means to capture seasonal variations of land surface spectra and to reduce the impact of clouds and cloud-shadows by relaxing the otherwise strong cloud cover limitations imposed by image-based classification methods. The best classification model was based on Landsat-8 data from three years (2014–2016) and achieved an accuracy of 75.1%, nearly 2 percentage points higher than the classification model based on a single year of Landsat data (2015). Our results indicate that annual pan-European land cover maps are feasible, but that temporally dynamic classes like artificial land, cropland, and grassland still benefit from more frequent satellite observations. The produced pan-European land cover map compared favorably to the existing CORINE (Coordination of Information on the Environment) 2012 land cover dataset. The mapped country-wide area proportions strongly correlated with LUCAS-estimated area proportions (r = 0.98). Differences between mapped and LUCAS sample-based area estimates were highest for broadleaved forest (map area was 9% higher). Grassland and seasonal cropland areas were 7% higher than the LUCAS estimate, respectively. In comparison, the correlation between LUCAS and CORINE area proportions was weaker (r = 0.84) and varied strongly by country. CORINE substantially overestimated seasonal croplands by 63% and underestimated grassland proportions by 37%. Our study shows that combining current state-of-the-art remote sensing methods with the large LUCAS database improves pan-European land cover mapping. Although this study focuses on European land cover, the unique combination of large survey data and machine learning of spectral-temporal metrics, may also serve as a reference case for other regions. The pan-European land cover map for 2015 developed in this study is available under https://doi.pangaea.de/10.1594/PANGAEA.896282.",2019-02-01 +30289549,CellMarker: a manually curated resource of cell markers in human and mouse.,"One of the most fundamental questions in biology is what types of cells form different tissues and organs in a functionally coordinated fashion. Larger-scale single-cell sequencing and biology experiment studies are now rapidly opening up new ways to track this question by revealing substantial cell markers for distinguishing different cell types in tissues. Here, we developed the CellMarker database (http://biocc.hrbmu.edu.cn/CellMarker/ or http://bio-bigdata.hrbmu.edu.cn/CellMarker/), aiming to provide a comprehensive and accurate resource of cell markers for various cell types in tissues of human and mouse. By manually curating over 100 000 published papers, 4124 entries including the cell marker information, tissue type, cell type, cancer information and source, were recorded. At last, 13 605 cell markers of 467 cell types in 158 human tissues/sub-tissues and 9148 cell makers of 389 cell types in 81 mouse tissues/sub-tissues were collected and deposited in CellMarker. CellMarker provides a user-friendly interface for browsing, searching and downloading markers of diverse cell types of different tissues. Furthermore, a summarized marker prevalence in each cell type is graphically and intuitively presented through a vivid statistical graph. We believe that CellMarker is a comprehensive and valuable resource for cell researches in precisely identifying and characterizing cells, especially at the single-cell level.",2019-01-01 +32065211,A computational platform to identify origins of replication sites in eukaryotes.,"The locations of the initiation of genomic DNA replication are defined as origins of replication sites (ORIs), which regulate the onset of DNA replication and play significant roles in the DNA replication process. The study of ORIs is essential for understanding the cell-division cycle and gene expression regulation. Accurate identification of ORIs will provide important clues for DNA replication research and drug development by developing computational methods. In this paper, the first integrated predictor named iORI-Euk was built to identify ORIs in multiple eukaryotes and multiple cell types. In the predictor, seven eukaryotic (Homo sapiens, Mus musculus, Drosophila melanogaster, Arabidopsis thaliana, Pichia pastoris, Schizosaccharomyces pombe and Kluyveromyces lactis) ORI data was collected from public database to construct benchmark datasets. Subsequently, three feature extraction strategies which are k-mer, binary encoding and combination of k-mer and binary were used to formulate DNA sequence samples. We also compared the different classification algorithms' performance. As a result, the best results were obtained by using support vector machine in 5-fold cross-validation test and independent dataset test. Based on the optimal model, an online web server called iORI-Euk (http://lin-group.cn/server/iORI-Euk/) was established for the novel ORI identification.",2021-03-01 +31598699,QTLbase: an integrative resource for quantitative trait loci across multiple human molecular phenotypes.,"Recent advances in genome sequencing and functional genomic profiling have promoted many large-scale quantitative trait locus (QTL) studies, which connect genotypes with tissue/cell type-specific cellular functions from transcriptional to post-translational level. However, no comprehensive resource can perform QTL lookup across multiple molecular phenotypes and investigate the potential cascade effect of functional variants. We developed a versatile resource, named QTLbase, for interpreting the possible molecular functions of genetic variants, as well as their tissue/cell-type specificity. Overall, QTLbase has five key functions: (i) curating and compiling genome-wide QTL summary statistics for 13 human molecular traits from 233 independent studies; (ii) mapping QTL-relevant tissue/cell types to 78 unified terms according to a standard anatomogram; (iii) normalizing variant and trait information uniformly, yielding >170 million significant QTLs; (iv) providing a rich web client that enables phenome- and tissue-wise visualization; and (v) integrating the most comprehensive genomic features and functional predictions to annotate the potential QTL mechanisms. QTLbase provides a one-stop shop for QTL retrieval and comparison across multiple tissues and multiple layers of molecular complexity, and will greatly help researchers interrogate the biological mechanism of causal variants and guide the direction of functional validation. QTLbase is freely available at http://mulinlab.org/qtlbase.",2020-01-01 +32188688,Expanding Access to Biospecimens for Lyme Disease Test Development. ,"The laboratory diagnosis of Lyme disease relies upon serologic testing. A standard or modified two-tiered testing algorithm is used to enhance the accuracy of antibody detection. However, this approach suffers from a lack of sensitivity in early Lyme disease. Ongoing efforts to develop more sensitive antibody detection technologies and other diagnostic approaches are dependent upon the availability of quality-assured biospecimens linked to reliable clinical data. In this issue of the Journal of Clinical Microbiology, Horn et al. (E. J. Horn, G. Dempsey, A. M. Schotthoefer, U. L. Prisco, et al., J Clin Microbiol 58:e00032-20, 2020, https://doi.org/10.1128/JCM.00032-20) described the development of the Lyme Disease Biobank. Clinically categorized case patients with early Lyme disease and healthy controls were identified (without laboratory diagnostic testing) from three sites where Lyme disease is endemic. Subjects provided whole blood and urine, which were processed and stored at a central biorepository. Whole blood, serum, and urine aliquots were prepared and are available to investigators developing laboratory diagnostics for Lyme disease. After obtaining samples, extensive laboratory testing was performed, including serologic and nucleic acid amplification testing for B. burgdorferi and other tick-borne pathogens. Direct detection methods yielded few positive results. Relative to the findings for another commonly used biorepository cohort, the results of this testing demonstrated a low seropositive rate, as determined by standard two-tiered testing. Additionally, relatively few subjects demonstrated seroconversion with testing of convalescent-phase samples. This clinical and serologically defined cohort of samples from Lyme disease and control cases from areas of Lyme disease endemicity offers an additional valuable resource for novel test development that includes alternate specimen types.",2020-05-26 +31829207,Genepanel.iobio - an easy to use web tool for generating disease- and phenotype-associated gene lists.,"When ordering genetic testing or triaging candidate variants in exome and genome sequencing studies, it is critical to generate and test a comprehensive list of candidate genes that succinctly describe the complete and objective phenotypic features of disease. Significant efforts have been made to curate gene:disease associations both in academic research and commercial genetic testing laboratory settings. However, many of these valuable resources exist as islands and must be used independently, generating static, single-resource gene:disease association lists. Here we describe genepanel.iobio (https://genepanel.iobio.io) an easy to use, free and open-source web tool for generating disease- and phenotype-associated gene lists from multiple gene:disease association resources, including the NCBI Genetic Testing Registry (GTR), Phenolyzer, and the Human Phenotype Ontology (HPO). We demonstrate the utility of genepanel.iobio by applying it to complex, rare and undiagnosed disease cases that had reached a diagnostic conclusion. We find that genepanel.iobio is able to correctly prioritize the gene containing the diagnostic variant in roughly half of these challenging cases. Importantly, each component resource contributed diagnostic value, showing the benefits of this aggregate approach. We expect genepanel.iobio will improve the ease and diagnostic value of generating gene:disease association lists for genetic test ordering and whole genome or exome sequencing variant prioritization.",2019-12-11 +33093586,Automatic construction of molecular similarity networks for visual graph mining in chemical space of bioactive peptides: an unsupervised learning approach.,"The increasing interest in bioactive peptides with therapeutic potentials has been reflected in a large variety of biological databases published over the last years. However, the knowledge discovery process from these heterogeneous data sources is a nontrivial task, becoming the essence of our research endeavor. Therefore, we devise a unified data model based on molecular similarity networks for representing a chemical reference space of bioactive peptides, having an implicit knowledge that is currently not explicitly accessed in existing biological databases. Indeed, our main contribution is a novel workflow for the automatic construction of such similarity networks, enabling visual graph mining techniques to uncover new insights from the ""ocean"" of known bioactive peptides. The workflow presented here relies on the following sequential steps: (i) calculation of molecular descriptors by applying statistical and aggregation operators on amino acid property vectors; (ii) a two-stage unsupervised feature selection method to identify an optimized subset of descriptors using the concepts of entropy and mutual information; (iii) generation of sparse networks where nodes represent bioactive peptides, and edges between two nodes denote their pairwise similarity/distance relationships in the defined descriptor space; and (iv) exploratory analysis using visual inspection in combination with clustering and network science techniques. For practical purposes, the proposed workflow has been implemented in our visual analytics software tool ( http://mobiosd-hub.com/starpep/ ), to assist researchers in extracting useful information from an integrated collection of 45120 bioactive peptides, which is one of the largest and most diverse data in its field. Finally, we illustrate the applicability of the proposed workflow for discovering central nodes in molecular similarity networks that may represent a biologically relevant chemical space known to date.",2020-10-22 +30275736,Togo National Herbarium database.,"This article describes the herbarium database of the University of Lomé. The database provides a good representation of the current knowledge of the flora of Togo. The herbarium of University of Lomé, known also as Herbarium togoense is the national herbarium and is registered in Index Herbariorum with the abbreviation TOGO. It contains 15,000 specimens of vascular plants coming mostly from all Togo's ecofloristic regions. Less than one percent of the specimens are from neighbouring countries such as Ghana, Benin and Burkina Faso. Collecting site details are specified in more that 97% of the sheet labels, but only about 50% contain geographic coordinates. Besides being a research resource, the herbarium constitutes an educational collection. The dataset described in this paper is registered with GBIF and accessible at https://www.gbif.org/dataset/b05dd467-aaf8-4c67-843c-27f049057b78. It was developed with the RIHA software (Réseau Informatique des Herbiers d'Afrique). The RIHA system (Chevillotte and Florence 2006, Radji et al. 2009) allows the capture of label data and associated information such as synonyms, vernacular names, taxonomic hierarchy and references.",2018-09-13 +34674583,Treating rare tumors with the assistance of the expert virtual consultation system: two cases of juvenile granulosa cell tumors.,"

Background

The lack of internationally recognized guidelines for very rare tumors, such as juvenile granulosa cell tumors (JGCTs), which are nonepithelial, unusual ovarian tumors, is a challenge for pediatric oncologists, especially in developing countries with limited resources and experience in treating rare tumors.

Methods

We report clinical data of 2 girls with JGCTs treated at the Pediatric Cancer and Blood Disorders Center of Armenia with the assistance of the EXPeRT (European Cooperative Study Group for Pediatric Rare Tumors) international cooperation panel.

Case presentation

Two girls (16 and 15 years old) with JGCTs of the ovaries, stage Ic, underwent surgery and, with consultation through an online advisory board (http://vrt.cineca.it/), received 4 cycles of chemotherapy according to the PEI regimen (cisplatin, etoposide, ifosfamide).

Conclusion

Very rare tumors, especially in advanced stages, have limited data and a low survival rate. International collaboration with the EXPeRT group is beneficial for physicians with limited experience and facilitates research in pediatric oncology.",2021-10-21 +34709855,"""American Indian"" as a Racial Category in Public Health: Implications for Communities and Practice.","When public health considers the health and disease status of Indigenous people, it often does so using a racial lens. In recent decades, public health researchers have begun to acknowledge that commonly employed racial categories represent history, power dynamics, embodiment, and legacies of discrimination and racism, rather than innate biology. Even so, public health has not yet fully embraced an understanding of other components of identity formation for Indigenous people, including political status within Native nations. In this article, we discuss why the continued racial conceptualization of Indigeneity in US public health is inadequate. We begin by providing a brief account of racialization as a tool of colonization, of failure to recognize and acknowledge Indigenous sovereignty, and of common public health practices of Indigenous data collection and interpretation. We then articulate the stakes of racialized health data for Native communities. We end by offering alternative approaches, many drawn from scholarship from Indigenous researchers. (Am J Public Health. 2021;111(11):1969-1975. https://doi.org/10.2105/AJPH.2021.306465).",2021-10-28 +34879750,Does Transient Opioid Use Increase Risk of Short-Term Respiratory Exacerbation among Older Adults with Chronic Obstructive Pulmonary Disease?,"The objective of this study was to examine the association between transient opioid use and acute respiratory exacerbations among older Medicare beneficiaries with COPD. This study was conducted using national Medicare 5% sample administrative claims data between 2012 and 2016 and employed a case-crossover design. The date of eligible COPD exacerbation events was defined as the index date and the presence of opioid prescriptions during a 7-day exposure window prior to index date was compared to a set of 10 control periods, each 7-days long. The association between opioid exposure and COPD exacerbation was estimated using a conditional logistic regression with robust sandwich estimators, after accounting for known time-varying confounders. Among 16,290 eligible COPD exacerbations included in the study sample, the average patient age was 77.08 years, and 64.2% of events occurred in women. Transient exposure to opioids was associated with a 76% increase in the odds of an acute COPD exacerbation (OR: 1.76, 95%CI: 1.67-1.84), and each 25 mg increase in morphine milligram equivalent dose was associated with a 18% increase in the odds of exacerbation (OR: 1.18, 95% CI: 1.15-1.21). Effect estimates were consistent across subgroup analyses conducted among events identified in the emergency department versus hospital, and among individuals with a single exacerbation event versus those with multiple exacerbations. Transient exposure to opioids was associated with an increased short-term risk of respiratory exacerbation among older adults with COPD. Treatment decisions for breathlessness among individuals with COPD need to account for the benefit-risk profile of opioids.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.2013460 .",2021-12-08 +34586887,Shared Interactive Book Reading Interventions for Young Children With Disabilities: A Systematic Review.,"Purpose Shared interactive book reading (SIBR) is an evidence-based practice for young children who are typically developing and those with developmental disabilities or considered at risk for developmental delays. The purpose of this review was to provide a comprehensive examination of the evidence of using SIBR to facilitate growth in language skills for young children with developmental disabilities and/or delays. Specifically, authors examined the descriptive characteristics, study rigor, and effect sizes for language and literacy outcomes. Method We extracted data from studies meeting specified criteria (n = 23) published in peer-reviewed journals on a wide range of variables, including participant characteristics, setting, training/coaching, defined independent and dependent variables, study rigor, and overall outcomes. Descriptive and study rigor data were aggregated using descriptive statistics. Effect-size estimates were calculated for all child outcomes related to language. Results Descriptive data were variable across studies. Three single-case experimental design and three group design studies met design standards without reservations. Single-case experimental design studies overall showed positive effects on child language and communication. Within group design studies, expressive language outcomes showed the largest effect sizes. Conclusion A review of SIBR studies indicates this as a viable intervention to positively impact the language skills of young children with developmental disabilities and/or delays. Supplemental Material https://doi.org/10.23641/asha.16674355.",2021-09-29 +33002111,PyDISH: database and analysis tools for heme porphyrin distortion in heme proteins. ,"Heme participates in a wide range of biological functions such as oxygen transport, electron transport, oxygen reduction, transcriptional regulation and so on. While the mechanism of each function has been investigated for many heme proteins, the origin of the diversity of the heme functions is still unclear and a crucial scientific issue. We have constructed a database of heme proteins, named Python-based database and analyzer for DIStortion of Heme porphyrin (PyDISH), which also contains some analysis tools. The aim of PyDISH is to integrate the information on the structures of hemes and heme proteins and the functions of heme proteins. This database will provide the structure-function relationships focusing on heme porphyrin distortion and lead to the elucidation of the origin of the functional diversity of heme proteins. In addition, the insights obtained from the database can be used for the design of protein function. PyDISH contains the structural data of more than 13 000 hemes extracted from the Protein Data Bank, including heme porphyrin distortion, axial ligands coordinating to the heme and the orientation of the propionate sidechains of heme. PyDISH also has information about the protein domains, including Uniprot ID, protein fold by CATH ID, organism, coordination distance and so on. The analytical tools implemented in PyDISH allow users to not only browse and download the data but also analyze the structures of heme porphyrin by using the analytical tools implemented in PyDISH. PyDISH users will be able to utilize the obtained results for the design of protein function. Database URL: http://pydish.bio.info.hiroshima-cu.ac.jp/.",2020-10-01 +29858801,Allele Frequency Net Database.,"The allele frequency net database (AFND, http://www.allelefrequencies.net ) is an online web-based repository that contains information on the frequencies of immune-related genes and their corresponding alleles in worldwide human populations. At present, the system contains data from 1505 populations in more than ten million individuals on the frequency of genes from different polymorphic regions including data for the human leukocyte antigens (HLA) system. This resource has been widely used in a variety of contexts such as histocompatibility, immunology, epidemiology, pharmacogenetics, and population genetics, among many others. In this chapter, we present some of the more commonly used searching mechanisms and some of the most recent developments included in AFND.",2018-01-01 +34976008,Geographical Landscape and Transmission Dynamics of SARS-CoV-2 Variants Across India: A Longitudinal Perspective.,"Globally, SARS-CoV-2 has moved from one tide to another with ebbs in between. Genomic surveillance has greatly aided the detection and tracking of the virus and the identification of the variants of concern (VOC). The knowledge and understanding from genomic surveillance is important for a populous country like India for public health and healthcare officials for advance planning. An integrative analysis of the publicly available datasets in GISAID from India reveals the differential distribution of clades, lineages, gender, and age over a year (Apr 2020-Mar 2021). The significant insights include the early evidence towards B.1.617 and B.1.1.7 lineages in the specific states of India. Pan-India longitudinal data highlighted that B.1.36* was the predominant clade in India until January-February 2021 after which it has gradually been replaced by the B.1.617.1 lineage, from December 2020 onward. Regional analysis of the spread of SARS-CoV-2 indicated that B.1.617.3 was first seen in India in the month of October in the state of Maharashtra, while the now most prevalent strain B.1.617.2 was first seen in Bihar and subsequently spread to the states of Maharashtra, Gujarat, and West Bengal. To enable a real time understanding of the transmission and evolution of the SARS-CoV-2 genomes, we built a transmission map available on https://covid19-indiana.soic.iupui.edu/India/EmergingLineages/April2020/to/March2021. Based on our analysis, the rate estimate for divergence in our dataset was 9.48 e-4 substitutions per site/year for SARS-CoV-2. This would enable pandemic preparedness with the addition of future sequencing data from India available in the public repositories for tracking and monitoring the VOCs and variants of interest (VOI). This would help aid decision making from the public health perspective.",2021-12-17 +33657667,Effects of patient decision aids in patients with type 2 diabetes mellitus: A systematic review and meta-analysis.,"

Aims

This study aimed to systematically evaluate the effectiveness of patient decision aids on knowledge, decisional conflict and decisional self-efficacy outcomes in patients with diabetes.

Methods

A comprehensive database search was performed using the Web of Science, Cochrane Library, PubMed, Embase, PsycINFO (Ovid), CINAHL (EBASCO), CNKI, VIP, Wan Fang Database and the Ottawa Decision Aid Library Inventory (http://decisionaid.ohri.ca/index.html) from inception to 13 October 2019. Two reviewers independently searched databases, screened articles, extracted data and evaluated the risk bias of included studies. Then Rev Man 5.3 software was adopted for statistical analysis.

Results

Ten articles containing 1,452 people with diabetes were selected. The results of meta-analysis showed that patient decision aids had a positive effect on reducing decisional conflict and improving decisional self-efficacy among patients with type 2 diabetes. Meanwhile, this article also revealed that patient decision aids have beneficial short-term effects on improving knowledge, but there was no significant long-term effect.

Conclusion

Patient decision aids are capable of becoming support tools to improve shared decision making. Further implementation studies are required to transform patient decision aids tools into clinical practice.",2021-03-03 +34153189,DIAproteomics: A Multifunctional Data Analysis Pipeline for Data-Independent Acquisition Proteomics and Peptidomics.,"Data-independent acquisition (DIA) is becoming a leading analysis method in biomedical mass spectrometry. The main advantages include greater reproducibility and sensitivity and a greater dynamic range compared with data-dependent acquisition (DDA). However, the data analysis is complex and often requires expert knowledge when dealing with large-scale data sets. Here we present DIAproteomics, a multifunctional, automated, high-throughput pipeline implemented in the Nextflow workflow management system that allows one to easily process proteomics and peptidomics DIA data sets on diverse compute infrastructures. The central components are well-established tools such as the OpenSwathWorkflow for the DIA spectral library search and PyProphet for the false discovery rate assessment. In addition, it provides options to generate spectral libraries from existing DDA data and to carry out the retention time and chromatogram alignment. The output includes annotated tables and diagnostic visualizations from the statistical postprocessing and computation of fold-changes across pairwise conditions, predefined in an experimental design. DIAproteomics is well documented open-source software and is available under a permissive license to the scientific community at https://www.openms.de/diaproteomics/.",2021-06-21 +33289897,"Seaview Version 5: A Multiplatform Software for Multiple Sequence Alignment, Molecular Phylogenetic Analyses, and Tree Reconciliation.","We present Seaview version 5, a multiplatform program to perform multiple alignment and phylogenetic tree building from molecular sequence data. Seaview provides network access to sequence databases, alignment with arbitrary algorithm, parsimony, distance and maximum likelihood tree building with PhyML, and display, printing, and copy-to-clipboard or to SVG files of rooted or unrooted, binary or multifurcating phylogenetic trees. While Seaview is primarily a program providing a graphical user interface to guide the user into performing desired analyses, Seaview possesses also a command-line mode adequate for user-provided scripts. Seaview version 5 introduces the ability to reconcile a gene tree with a reference species tree and use this reconciliation to root and rearrange the gene tree. Seaview is freely available at http://doua.prabi.fr/software/seaview .",2021-01-01 +34865567,Sociodemographic dynamics and age trajectories of depressive symptoms among adults in mid- and later life: a cohort perspective.,"

Objectives

This study explored the age trajectories of depressive symptoms across multiple cohort groups who were in middle and late adulthood; examined sociodemographic differences in these trajectories; and investigated how relevant factors contributed to depressive symptoms trends of different cohorts.

Methods

Drawing on data from the 1994-2016 Health and Retirement Study (HRS), we used growth curve models to examine the age patterns of depressive symptoms, changes in sociodemographic gaps in depressive symptoms trajectories, and predictors of changes in depressive symptoms.

Results

In general, adults' depressive symptoms started high in middle-adulthood, declined in young-old life, increased moderately in mid-old life, and peaked in old-old life; In detail, more nuanced cohort-specific age trajectories of depressive symptoms were observed, challenging the prevailing assumption of a common age trajectory of depressive symptoms. Later-born cohorts displayed higher levels of depressive symptoms than earlier-born cohorts at observed ages. Second, we found intra-cohort sociodemographic differences in levels of depressive symptoms, but these differences' growth rates varied by specific factors. Regardless of the cohort group, as people age, the gender gap in depressive symptoms persisted but the partnership gap reduced. A widening educational gap across cohorts was observed, but it declined with age in some cohorts.

Conclusion

Results suggest more evidence for the persistent inequality and age-as-leveler hypotheses rather than the cumulative (dis-)advantage hypothesis.Supplemental data for this article can be accessed online at https://doi.org/10.1080/13607863.2021.2010182 .",2021-12-06 +,"First Report of Phytoplasma ‘Candidatus Phytoplasma aurantifolia’ Associated with Purple Top Diseased Potatoes (Solanum tuberosum) in Guangdong Province, China","China is the largest producer of potatoes in the world. In 2014, a total of 96 million metric tons of potatoes were produced on more than 5.6 million hectares. Recognizing its importance, the Chinese central government elevated potatoes in 2010 to the status of the fourth major staple crop, following rice, wheat, and corn. In recent years, phytoplasma-associated diseases are becoming increasingly important in potato production, with an incidence rate of 80 to 100% in some commercial fields, according to disease surveys conducted by our team from 2005 to 2017 in seed and commercial fields of Yunnan Province and Inner Mongolia Autonomous Region. In China, six phytoplasma 16Sr groups, associated with vegetables, cereals, flowers, shrubs, fruit, and ornamental trees, in 29 genera, have been reported (https://plantpathology.ba.ars.usda.gov/cgi-bin/​resource/phytoclass.cgi?strainsearchbox=&countrysearchbox=China&submit_country_​search=search&diseasesearchbox=&hostsearchbox=&group_popup=I&subgroup_popup=​all). We found that phytoplasma group 16SrI, 16SrVI, and 16SrXII were associated with potatoes. Information on phytoplasma associated with potatoes in many potato growing areas remains little known (Cheng et al. 2015). Guangdong Province, especially Huidong County, is one of the major winter potato production areas in China. During a potato disease survey conducted in Huidong County, Guangdong Province (22.9850°N, 114.7200°E) in March 2014, prior to harvest, eight samples collected displayed symptoms of proliferation, upright growth, purpling of apical leaves, shortened and thick-ended stolons, stolons with multiple tubers, and formation of aerial tubers—all characteristics of phytoplasma disease. The incidence of phytoplasma in those fields ranged from 20 to 35%. Four samples were collected from asymptomatic plants at the same time. Total DNA was extracted from tissues (leaves, stems, and roots) of symptomatic and asymptomatic plants using a DNeasy Plant Mini Kit (Qiagen, Valencia, CA) according to the manufacturer’s instructions. A nested polymerase chain reaction (PCR) was performed by using primer pair P1/P7, followed by P1A/16S-SR (Lee et al. 2004). A 1.5-kb PCR fragment was amplified from the DNAs of all symptomatic plants and none of the symptomless plants. The PCR products were cloned into a pCR8/GW/TOPO vector (Invitrogen, Carlsbad, CA) and sequenced by GENEWIZ (South Plainfield, NJ). The potato phytoplasma 16S rDNA sequence (GenBank accession no. KM212951) was analyzed by iPhyClassifier software (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi) (Zhao et al. 2013), and the results showed that the 16S rRNA gene sequence had a 99.7% sequence identity to the reference strain (GenBank accession no. Y10097) for ‘Candidatus Phytoplasma aurantifolia’ (White et al. 1998). The restriction fragment length polymorphism similarity is identical (coefficient 1.00) to the reference pattern of the 16Sr group II, subgroup A (GenBank accession no. L33765). The phylogenetic tree was constructed using representative phytoplasma strains reported from China by the neighbor-joining method with 1,000 bootstrap replicates in the program MEGA 4 (Tamura et al. 2007). The results indicated that the potato phytoplasma 16S rDNA sequence isolated in this study clustered with two 16SrII group strains reported from China (tomato yellows strain SGW and kidney bean little leaf [Dong et al. 2013]) and from Korea (pigeon pea witches’ broom). ‘Ca. P. australiense’ is on the A1 regulated organism list for Canada and Bahrain and quarantined by the United States. To our knowledge, this is the first report of ‘Ca. P. australiense’ associated with potatoes in China. Phytoplasmas from Guangdong Province may be easily spread to spring and summer potato production areas through tubers. This might be one of the factors accounting for the increasingly widespread and important of phytoplasma-associated diseases on potatoes in China. Therefore, further study on identifying alternate host plants, potential vectors, and geographic distribution will be helpful to control this phytoplasmal disease.",2019-05-01 +33165572,ProteoCombiner: integrating bottom-up with top-down proteomics data for improved proteoform assessment.,"

Motivation

We present a high-performance software integrating shotgun with top-down proteomic data. The tool can deal with multiple experiments and search engines. Enable rapid and easy visualization, manual validation and comparison of the identified proteoform sequences including the post-translational modification characterization.

Results

We demonstrate the effectiveness of our approach on a large-scale Escherichia coli dataset; ProteoCombiner unambiguously shortlisted proteoforms among those identified by the multiple search engines.

Availability and implementation

ProteoCombiner, a demonstration video and user tutorial are freely available at https://proteocombiner.pasteur.fr, for academic use; all data are thus available from the ProteomeXchange consortium (identifier PXD017618).

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-08-01 +34081116,Assessing Consistency Across Functional Screening Datasets in Cancer Cells.,"

Motivation

Many high-throughput screening studies have been carried out in cancer cell lines to identify therapeutic agents and targets. Existing consistency assessment studies only examined two datasets at a time, with conclusions based on a subset of carefully selected features rather than considering global consistency of all the data. However, poor concordance can still be observed for a large part of the data even when selected features are highly consistent.

Results

In this study we assembled nine compound screening datasets and three functional genomics datasets. We derived direct measures of consistency as well as indirect measures of consistency based on association between functional data and copy number-adjusted gene expression data. These results have been integrated into a web application - the Functional Data Consistency Explorer (FDCE), to allow users to make queries and generate interactive visualizations so that functional data consistency can be assessed for individual features of interest.

Availability

The FDCE web tool and we have developed and the functional data consistency measures we have generated are available at https://lccl.shinyapps.io/FDCE/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-03 +33247934,OGDA: a comprehensive organelle genome database for algae. ,"Algae are the oldest taxa on Earth, with an evolutionary relationship that spans prokaryotes (Cyanobacteria) and eukaryotes. A long evolutionary history has led to high algal diversity. Their organelle DNAs are characterized by uniparental inheritance and a compact genome structure compared with nuclear genomes; thus, they are efficient molecular tools for the analysis of gene structure, genome structure, organelle function and evolution. However, an integrated organelle genome database for algae, which could enable users to both examine and use relevant data, has not previously been developed. Therefore, to provide an organelle genome platform for algae, we have developed a user-friendly database named Organelle Genome Database for Algae (OGDA, http://ogda.ytu.edu.cn/). OGDA contains organelle genome data either retrieved from several public databases or sequenced in our laboratory (Laboratory of Genetics and Breeding of Marine Organism [MOGBL]), which are continuously updated. The first release of OGDA contains 1055 plastid genomes and 755 mitochondrial genomes. Additionally, a variety of applications have been integrated into this platform to analyze the structural characteristics, collinearity and phylogeny of organellar genomes for algae. This database represents a useful tool for users, enabling the rapid retrieval and analysis of information related to organellar genomes for biological discovery.",2020-11-01 +29140510,Saccharomyces genome database informs human biology.,"The Saccharomyces Genome Database (SGD; http://www.yeastgenome.org) is an expertly curated database of literature-derived functional information for the model organism budding yeast, Saccharomyces cerevisiae. SGD constantly strives to synergize new types of experimental data and bioinformatics predictions with existing data, and to organize them into a comprehensive and up-to-date information resource. The primary mission of SGD is to facilitate research into the biology of yeast and to provide this wealth of information to advance, in many ways, research on other organisms, even those as evolutionarily distant as humans. To build such a bridge between biological kingdoms, SGD is curating data regarding yeast-human complementation, in which a human gene can successfully replace the function of a yeast gene, and/or vice versa. These data are manually curated from published literature, made available for download, and incorporated into a variety of analysis tools provided by SGD.",2018-01-01 +33693667,"UbiNet 2.0: a verified, classified, annotated and updated database of E3 ubiquitin ligase-substrate interactions. ","Ubiquitination is an important post-translational modification, which controls protein turnover by labeling malfunctional and redundant proteins for proteasomal degradation, and also serves intriguing non-proteolytic regulatory functions. E3 ubiquitin ligases, whose substrate specificity determines the recognition of target proteins of ubiquitination, play crucial roles in ubiquitin-proteasome system. UbiNet 2.0 is an updated version of the database UbiNet. It contains 3332 experimentally verified E3-substrate interactions (ESIs) in 54 organisms and rich annotations useful for investigating the regulation of ubiquitination and the substrate specificity of E3 ligases. Based on the accumulated ESIs data, the recognition motifs in substrates for each E3 were also identified and a functional enrichment analysis was conducted on the collected substrates. To facilitate the research on ESIs with different categories of E3 ligases, UbiNet 2.0 performed strictly evidence-based classification of the E3 ligases in the database based on their mechanisms of ubiquitin transfer and substrate specificity. The platform also provides users with an interactive tool that can visualize the ubiquitination network of a group of self-defined proteins, displaying ESIs and protein-protein interactions in a graphical manner. The tool can facilitate the exploration of inner regulatory relationships mediated by ubiquitination among proteins of interest. In summary, UbiNet 2.0 is a user-friendly web-based platform that provides comprehensive as well as updated information about experimentally validated ESIs and a visualized tool for the construction of ubiquitination regulatory networks available at http://awi.cuhk.edu.cn/~ubinet/index.php.",2021-03-01 +32047897,TRlnc: a comprehensive database for human transcriptional regulatory information of lncRNAs.,"Long noncoding RNAs (lncRNAs) have been proven to play important roles in transcriptional processes and biological functions. With the increasing study of human diseases and biological processes, information in human H3K27ac ChIP-seq, ATAC-seq and DNase-seq datasets is accumulating rapidly, resulting in an urgent need to collect and process data to identify transcriptional regulatory regions of lncRNAs. We therefore developed a comprehensive database for human regulatory information of lncRNAs (TRlnc, http://bio.licpathway.net/TRlnc), which aimed to collect available resources of transcriptional regulatory regions of lncRNAs and to annotate and illustrate their potential roles in the regulation of lncRNAs in a cell type-specific manner. The current version of TRlnc contains 8 683 028 typical enhancers/super-enhancers and 32 348 244 chromatin accessibility regions associated with 91 906 human lncRNAs. These regions are identified from over 900 human H3K27ac ChIP-seq, ATAC-seq and DNase-seq samples. Furthermore, TRlnc provides the detailed genetic and epigenetic annotation information within transcriptional regulatory regions (promoter, enhancer/super-enhancer and chromatin accessibility regions) of lncRNAs, including common SNPs, risk SNPs, eQTLs, linkage disequilibrium SNPs, transcription factors, methylation sites, histone modifications and 3D chromatin interactions. It is anticipated that the use of TRlnc will help users to gain in-depth and useful insights into the transcriptional regulatory mechanisms of lncRNAs.",2021-03-01 +32349124,VirusCircBase: a database of virus circular RNAs.,"Circular RNAs (circRNAs) are covalently closed long noncoding RNAs critical in diverse cellular activities and multiple human diseases. Several cancer-related viral circRNAs have been identified in double-stranded DNA viruses (dsDNA), yet no systematic study about the viral circRNAs has been reported. Herein, we have performed a systematic survey of 11 924 circRNAs from 23 viral species by computational prediction of viral circRNAs from viral-infection-related RNA sequencing data. Besides the dsDNA viruses, our study has also revealed lots of circRNAs in single-stranded RNA viruses and retro-transcribing viruses, such as the Zika virus, the Influenza A virus, the Zaire ebolavirus, and the Human immunodeficiency virus 1. Most viral circRNAs had reverse complementary sequences or repeated sequences at the flanking sequences of the back-splice sites. Most viral circRNAs only expressed in a specific cell line or tissue in a specific species. Functional enrichment analysis indicated that the viral circRNAs from dsDNA viruses were involved in KEGG pathways associated with cancer. All viral circRNAs presented in the current study were stored and organized in VirusCircBase, which is freely available at http://www.computationalbiology.cn/ViruscircBase/home.html and is the first virus circRNA database. VirusCircBase forms the fundamental atlas for the further exploration and investigation of viral circRNAs in the context of public health.",2021-03-01 +31526404,TwinsUK: The UK Adult Twin Registry Update.,"TwinsUK is the largest cohort of community-dwelling adult twins in the UK. The registry comprises over 14,000 volunteer twins (14,838 including mixed, single and triplets); it is predominantly female (82%) and middle-aged (mean age 59). In addition, over 1800 parents and siblings of twins are registered volunteers. During the last 27 years, TwinsUK has collected numerous questionnaire responses, physical/cognitive measures and biological measures on over 8500 subjects. Data were collected alongside four comprehensive phenotyping clinical visits to the Department of Twin Research and Genetic Epidemiology, King's College London. Such collection methods have resulted in very detailed longitudinal clinical, biochemical, behavioral, dietary and socioeconomic cohort characterization; it provides a multidisciplinary platform for the study of complex disease during the adult life course, including the process of healthy aging. The major strength of TwinsUK is the availability of several 'omic' technologies for a range of sample types from participants, which includes genomewide scans of single-nucleotide variants, next-generation sequencing, metabolomic profiles, microbiomics, exome sequencing, epigenetic markers, gene expression arrays, RNA sequencing and telomere length measures. TwinsUK facilitates and actively encourages sharing the 'TwinsUK' resource with the scientific community - interested researchers may request data via the TwinsUK website (http://twinsuk.ac.uk/resources-for-researchers/access-our-data/) for their own use or future collaboration with the study team. In addition, further cohort data collection is planned via the Wellcome Open Research gateway (https://wellcomeopenresearch.org/gateways). The current article presents an up-to-date report on the application of technological advances, new study procedures in the cohort and future direction of TwinsUK.",2019-09-17 +,Wearable Heterosynapses: Ultralow Power Wearable Heterosynapse with Photoelectric Synergistic Modulation (Adv. Sci. 8/2020),"In article number https://doi.org/10.1002/advs.201903480, Lin Chen, Qing‐Qing Sun, and co‐workers present a wearable 2D artificial heterosynapse for simulating multi‐terminal correlations in biology with two modulation modes–an electronic mode and a photoexcited mode. The artificial heterosynapse exhibits ultrafast speed and ultralow power consumption, providing a path for a neuromorphic computing system owning more excellent processing ability than the human brain to deal with the increasing data samples.",2020-04-01 +34558834,Transcriptomic Cross-Species Analysis of Chronic Liver Disease Reveals Consistent Regulation Between Humans and Mice.,"Mouse models are frequently used to study chronic liver diseases (CLDs). To assess their translational relevance, we quantified the similarity of commonly used mouse models to human CLDs based on transcriptome data. Gene-expression data from 372 patients were compared with data from acute and chronic mouse models consisting of 227 mice, and additionally to nine published gene sets of chronic mouse models. Genes consistently altered in humans and mice were mapped to liver cell types based on single-cell RNA-sequencing data and validated by immunostaining. Considering the top differentially expressed genes, the similarity between humans and mice varied among the mouse models and depended on the period of damage induction. The highest recall (0.4) and precision (0.33) were observed for the model with 12-months damage induction by CCl4 and by a Western diet, respectively. Genes consistently up-regulated between the chronic CCl4 model and human CLDs were enriched in inflammatory and developmental processes, and mostly mapped to cholangiocytes, macrophages, and endothelial and mesenchymal cells. Down-regulated genes were enriched in metabolic processes and mapped to hepatocytes. Immunostaining confirmed the regulation of selected genes and their cell type specificity. Genes that were up-regulated in both acute and chronic models showed higher recall and precision with respect to human CLDs than exclusively acute or chronic genes. Conclusion: Similarly regulated genes in human and mouse CLDs were identified. Despite major interspecies differences, mouse models detected 40% of the genes significantly altered in human CLD. The translational relevance of individual genes can be assessed at https://saezlab.shinyapps.io/liverdiseaseatlas/.",2021-08-28 +34964684,Microsatellite instability-high is rare events in refractory pediatric solid tumors.,"Microsatellite instability (MSI)-high status is associated with good responsiveness to immune checkpoint inhibitors. Although MSI-high status has been actively investigated in pediatric brain tumors, studies of other pediatric solid tumors are lacking. Among 334 consecutive pediatric patients with solid tumors, we retrospectively analyzed formalin-fixed paraffin-embedded tumor tissues of 36 of 74 patients (49%) who died of disease. We assessed the MSI status in these tissues using five multiplexed markers. The results revealed that none of the patients had an MSI-high status. These results indicate that MSI-high status is a rare event in pediatric patients with refractory/relapsed solid tumors.Supplemental data for this article is available online at https://doi.org/10.1080/08880018.2021.1998266.",2021-12-29 +34859618,"Development, validation, and visualization of a web-based nomogram for predicting the incidence of leiomyosarcoma patients with distant metastasis.","

Background

Leiomyosarcoma (LMS) is one of the most common soft tissue sarcomas. LMS is prone to distant metastasis (DM), and patients with DM have a poor prognosis.

Aim

In this study, we investigated the risk factors of DM in LMS patients and the prognostic factors of LMS patients with DM.

Methods and results

LMS patients diagnosed between 2010 and 2016 were extracted from the Surveillance, Epidemiology, and End Result (SEER) database. Patients were randomly divided into the training set and validation set. Univariate and multivariate logistic regression analyses were performed, and a nomogram was established. The area under the curve (AUC), calibration curve, and decision curve analysis (DCA) were used to evaluate the nomogram. Based on the nomogram, a web-based nomogram is established. The univariate and multivariate Cox regression analyses were used to assess the prognostic risk factors of LMS patients with DM. Eventually, 2184 patients diagnosed with LMS were enrolled, randomly divided into the training set (n = 1532, 70.14%) and validation set (n = 652, 29.86%). Race, primary site, grade, T stage, and tumor size were correlated with DM incidence in LMS patients. The AUC of the nomogram is 0.715 in training and 0.713 in the validation set. The calibration curve and DCA results showed that the nomogram performed well in predicting the DM risk. A web-based nomogram was established to predict DM's risk in LMS patients (https://wenn23.shinyapps.io/riskoflmsdm/). Epithelioid LMS, in uterus, older age, giant tumor, multiple organ metastasis, without surgery, and chemotherapy had a poor prognosis.

Conclusions

The established web-based nomogram (https://wenn23.shinyapps.io/riskoflmsdm/) is an accurate and personalized tool to predict the risks of LMS developing DM. Advanced age, larger tumor, multiple organ metastasis, epithelioid type, uterine LMS, no surgery, and no chemotherapy were associated with poor prognosis in LMS patients with DM.",2021-12-03 +32533862,Resource: A multi-species multi-timepoint transcriptome database and webpage for the pineal gland and retina.,"The website and database https://snengs.nichd.nih.gov provides RNA sequencing data from multi-species analysis of the pineal glands from zebrafish (Danio rerio), chicken (White Leghorn), rat (Rattus novegicus), mouse (Mus musculus), rhesus macaque (Macaca mulatta), and human (Homo sapiens); in most cases, retinal data are also included along with results of the analysis of a mixture of RNA from tissues. Studies cover day and night conditions; in addition, a time series over multiple hours, a developmental time series and pharmacological experiments on rats are included. The data have been uniformly re-processed using the latest methods and assemblies to allow for comparisons between experiments and to reduce processing differences. The website presents search functionality, graphical representations, Excel tables, and track hubs of all data for detailed visualization in the UCSC Genome Browser. As more data are collected from investigators and improved genomes become available in the future, the website will be updated. This database is in the public domain and elements can be reproduced by citing the URL and this report. This effort makes the results of 21st century transcriptome profiling widely available in a user-friendly format that is expected to broadly influence pineal research.",2020-07-08 +,Atypical Peripheral Blood Cell Morphology in COVID-19 (Sars-CoV-2) Patients from Mount Sinai Health System in New York City,"INTRODUCTION Coronavirus disease 2019 (COVID-19) is a respiratory disease caused by a novel coronavirus named severe acute respiratory syndrome coronavirus 2 (SARS‐CoV‐2). Recent studies have suggested that COVID-19 positive patients present with leukopenia, lymphopenia, neutrophilia, thrombocytopenia, and higher neutrophil: lymphocyte ratio (NLR) and monocyte: lymphocyte ratio (MLR). More recently, we reported hypersegmented granulocytes and COVID-19 infection in Blood. 2020 Jun 11;135(24):2196. Neutrophil hypersegmentation has been closely associated with vitamin B12, folate and iron deficiencies, as well as methotrexate use, chemotherapy toxicity, uremia, heat stroke, myelodysplasia and Boucher-Neuhäuser Syndrome. Initially, these cytomorphologic changes may easily be overlooked or dismissed as non-specific reactive changes. In this study, we expand our initial observation on our index case to a larger case series. To the best of our knowledge, this is the largest case series to describe the concurrent lymphocyte and unique granulocyte atypia associated with SARS-CoV-2 infection. METHODS Study Design 2,199 patients were hospitalized in the Mount Sinai Health System from Feb 27 to April 2, 2020 with confirmed COVID-19 positivity. Data obtained for this study was covered under an Institutional Review Board (IRB) waiver, HS#:12-00133 GCO#1:12-036(0001-08) Inclusion criteria 50 peripheral blood smears flagged for Pathologist review from March 13 - April 20, 2020 at Mount Sinai Hospital Clinical Hematology Laboratory were included in this study. All suspected COVID-19 cases were confirmed using real-time polymerase chain reaction (RT-PCR) assay to test nasal and pharyngeal swab specimens, per WHO guidelines. Of the 50 COVID-19 positive peripheral blood smears, 39 slides were scanned and imaged with Scopio Labs X100 Full Field Digital Microscope. The X100 provided high resolution oil-immersion level images of large scanned areas. https://scopiolabs.com/hematology/ 19 peripheral blood smears were blindly and independently reviewed by 4 Hematopathologists (CS, PK, JC, JTF), with particular emphasis on granulocyte cytomorphology and percent of hypersegmented neutrophils present (defined as neutrophils with 5 or more nuclear lobes in at least 3% of cells or presence of 6 or more lobes). Atypical lymphocyte morphology was also evaluated and categorized as Downey type I, II, III or plasmacytoid, while monocyte morphology was assessed for unusual nuclear folds and features. Evaluation of platelets and other abnormalities were noted. The presence and degree of significant cytologic atypia was recorded and compared to 20 COVID-19 negative blood smears. RESULTS 16 of the 19 (84%) COVID-19 positive cases showed hypersegmented neutrophils, and all 19 harbored atypical lymphocytes and monocyte morphology, with giant platelets. In contrast, 5 of the 20 (25%) COVID-19 negative cases showed hypersegmented neutrophils, with 2 patients displaying atypical monocytes; none showed atypical lymphocytes or giant platelets (p = 0.022). Concurrent laboratory values showed no evidence of vitamin B12 or folate deficiency. Representative images are summarized in Figure 1 (A-C, 5-6 lobed neutrophils; D-E atypical plasmacytoid lymphocytes, G-I atypical monocytes, J-L giant platelets). CONCLUSION We report atypical hypersegmented neutrophils with toxic cytoplasmic change, atypical monocytes, plasmacytoid lymphocytes, and giant platelets in peripheral blood smears of COVID-19 patients which are significantly higher than in control COVID-19 negative patients. Figure 1

Disclosures

Teruya-Feldstein:Edge Anthem: Consultancy.",2020-11-05 +34715772,isoCNV: in silico optimization of copy number variant detection from targeted or exome sequencing data.,"

Background

Accurate copy number variant (CNV) detection is especially challenging for both targeted sequencing (TS) and whole-exome sequencing (WES) data. To maximize the performance, the parameters of the CNV calling algorithms should be optimized for each specific dataset. This requires obtaining validated CNV information using either multiplex ligation-dependent probe amplification (MLPA) or array comparative genomic hybridization (aCGH). They are gold standard but time-consuming and costly approaches.

Results

We present isoCNV which optimizes the parameters of DECoN algorithm using only NGS data. The parameter optimization process is performed using an in silico CNV validated dataset obtained from the overlapping calls of three algorithms: CNVkit, panelcn.MOPS and DECoN. We evaluated the performance of our tool and showed that increases the sensitivity in both TS and WES real datasets.

Conclusions

isoCNV provides an easy-to-use pipeline to optimize DECoN that allows the detection of analysis-ready CNV from a set of DNA alignments obtained under the same conditions. It increases the sensitivity of DECoN without the need for orthogonal methods. isoCNV is available at https://gitlab.com/sequentiateampublic/isocnv .",2021-10-29 +34925273,MDRSA: A Web Based-Tool for Rapid Identification of Multidrug Resistant Staphylococcus aureus Based on Matrix-Assisted Laser Desorption Ionization-Time of Flight Mass Spectrometry.,"As antibiotics resistance on superbugs has risen, more and more studies have focused on developing rapid antibiotics susceptibility tests (AST). Meanwhile, identification of multiple antibiotics resistance on Staphylococcus aureus provides instant information which can assist clinicians in administrating the appropriate prescriptions. In recent years, matrix-assisted laser desorption ionization-time of flight mass spectrometry (MALDI-TOF MS) has emerged as a powerful tool in clinical microbiology laboratories for the rapid identification of bacterial species. Yet, lack of study devoted on providing efficient methods to deal with the MS shifting problem, not to mention to providing tools incorporating the MALDI-TOF MS for the clinical use which deliver the instant administration of antibiotics to the clinicians. In this study, we developed a web tool, MDRSA, for the rapid identification of oxacillin-, clindamycin-, and erythromycin-resistant Staphylococcus aureus. Specifically, the kernel density estimation (KDE) was adopted to deal with the peak shifting problem, which is critical to analyze mass spectra data, and machine learning methods, including decision trees, random forests, and support vector machines, which were used to construct the classifiers to identify the antibiotic resistance. The areas under the receiver operating the characteristic curve attained 0.8 on the internal (10-fold cross validation) and external (independent testing) validation. The promising results can provide more confidence to apply these prediction models in the real world. Briefly, this study provides a web-based tool to provide rapid predictions for the resistance of antibiotics on Staphylococcus aureus based on the MALDI-TOF MS data. The web tool is available at: http://fdblab.csie.ncu.edu.tw/mdrsa/.",2021-12-03 +33170358,The rationale and development of a CyberKnife© registry for pediatric patients with CNS lesions.,"

Background

CyberKnife© Radiosurgery (CKRS) is a recognized treatment concept for CNS lesions in adults due to its high precision and efficacy beside a high patient comfort. However, scientific evidence for this treatment modality in pediatric patients is scarce. A dedicated registry was designed to document CyberKnife© procedures in children, aiming to test the hypothesis that it is safe and efficient for the treatment of CNS lesions.

Methods

The CyberKnife© registry is designed as a retrospective and prospective multicenter observational study (German Clinical Trials Register ( https://www.drks.de ), DRKS-ID 00016973). Patient recruitment will be ongoing throughout a 5-year period and includes collection of demographic, treatment, clinical, and imaging data. Follow-up results will be monitored for 10 years. All data will be registered in a centralized electronic database at the Charité-Universitätsmedizin. The primary endpoint is stable disease for benign and vascular lesions at 5 years of follow-up and local tumor control for malign lesions at 1- and 2-year follow-up. Secondary endpoints are radiation toxicity, side effects, and neurocognitive development.

Conclusion

The CyberKnife© registry intends to generate scientific evidence for all treatment- and outcome-related aspects in pediatric patients with treated CNS lesions. The registry may define safety and efficacy of CKRS in children and serve as a basis for future clinical trials, inter-methodological comparisons and changes of treatment algorithms.",2020-11-10 +34855761,The bioinformatics analysis of RIOX2 gene in lung adenocarcinoma and squamous cell carcinoma.,"Lung cancer is characterized by high morbidity and mortality rates, and it has become an important public health issue worldwide. The occurrence and development of tumors is a multi-gene and multi-stage complex process. As an oncogene, ribosomal oxygenase 2 (RIOX2) has been associated with a variety of cancers. In this article, we analyzed the correlation between RIOX2 expression and methylation in lung cancer based on the databases including the cancer genome atlas (TCGA) (https://portal.gdc.cancer.gov/) and the gene expression omnibus (GEO) (https://www.ncbi.nlm.nih.gov/geo/). It was found that RIOX2 is highly expressed in lung adenocarcinoma (LUAD) and lung squamous cell carcinoma (LUSC) tissues, whose expression is negatively correlated with its methylation level. In this regard, methylation at cg09716038, cg14773523, cg14941179, and cg22299097 had a significant negative correlation with RIOX2 expression in LUAD, whereas in LUSC, methylation at cg09716038, cg14773523, cg14941179, cg22299097, cg05451573, cg10779801, and cg23629183 is negatively correlated with RIOX2 expression. According to the analysis based on the databases, RIOX2 gene could not be considered as the independent prognostic biomarker in lung adenocarcinoma or squamous cell lung cancer. However, the molecular mechanism of RIOX2 gene in the development of lung cancer may be helpful in improving lung cancer therapy.",2021-12-02 +31378711,A Genome-wide Functional Signature Ontology Map and Applications to Natural Product Mechanism of Action Discovery.,"Gene expression signature-based inference of functional connectivity within and between genetic perturbations, chemical perturbations, and disease status can lead to the development of actionable hypotheses for gene function, chemical modes of action, and disease treatment strategies. Here, we report a FuSiOn-based genome-wide integration of hypomorphic cellular phenotypes that enables functional annotation of gene network topology, assignment of mechanistic hypotheses to genes of unknown function, and detection of cooperativity among cell regulatory systems. Dovetailing genetic perturbation data with chemical perturbation phenotypes allowed simultaneous generation of mechanism of action hypotheses for thousands of uncharacterized natural products fractions (NPFs). The predicted mechanism of actions span a broad spectrum of cellular mechanisms, many of which are not currently recognized as ""druggable."" To enable use of FuSiOn as a hypothesis generation resource, all associations and analyses are available within an open source web-based GUI (http://fusion.yuhs.ac).",2019-08-01 +34252933,On the feasibility of deep learning applications using raw mass spectrometry data.,"

Summary

In recent years, SWATH-MS has become the proteomic method of choice for data-independent-acquisition, as it enables high proteome coverage, accuracy and reproducibility. However, data analysis is convoluted and requires prior information and expert curation. Furthermore, as quantification is limited to a small set of peptides, potentially important biological information may be discarded. Here we demonstrate that deep learning can be used to learn discriminative features directly from raw MS data, eliminating hence the need of elaborate data processing pipelines. Using transfer learning to overcome sample sparsity, we exploit a collection of publicly available deep learning models already trained for the task of natural image classification. These models are used to produce feature vectors from each mass spectrometry (MS) raw image, which are later used as input for a classifier trained to distinguish tumor from normal prostate biopsies. Although the deep learning models were originally trained for a completely different classification task and no additional fine-tuning is performed on them, we achieve a highly remarkable classification performance of 0.876 AUC. We investigate different types of image preprocessing and encoding. We also investigate whether the inclusion of the secondary MS2 spectra improves the classification performance. Throughout all tested models, we use standard protein expression vectors as gold standards. Even with our naïve implementation, our results suggest that the application of deep learning and transfer learning techniques might pave the way to the broader usage of raw mass spectrometry data in real-time diagnosis.

Availability and implementation

The open source code used to generate the results from MS images is available on GitHub: https://ibm.biz/mstransc. The raw MS data underlying this article cannot be shared publicly for the privacy of individuals that participated in the study. Processed data including the MS images, their encodings, classification labels and results can be accessed at the following link: https://ibm.box.com/v/mstc-supplementary.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +32846052,Exploring Non-Coding RNAs in RNAcentral.,"Non-coding RNAs are essential for all life and carry out a wide range of functions. Information about these molecules is distributed across dozens of specialized resources. RNAcentral is a database of non-coding RNA sequences that provides a unified access point to non-coding RNA annotations from >40 member databases and helps provide insight into the function of these RNAs. This article describes different ways of accessing the data, including searching the website and retrieving the data programmatically over web APIs and a public database. We also demonstrate an example Galaxy workflow for using RNAcentral for RNA-seq differential expression analysis. RNAcentral is available at https://rnacentral.org. © 2020 The Authors. Basic Protocol 1: Viewing RNAcentral sequence reports Basic Protocol 2: Using RNAcentral text search to explore ncRNA sequences Basic Protocol 3: Using RNAcentral sequence search Basic Protocol 4: Using RNAcentral FTP archive Support Protocol 1: Using web APIs for programmatic data access Support Protocol 2: Using public Postgres database to export large datasets Support Protocol 3: Analyze non-coding RNA in RNA-seq datasets using RNAcentral and Galaxy.",2020-09-01 +32895427,"PCOSKBR2: a database of genes, diseases, pathways, and networks associated with polycystic ovary syndrome.","PolyCystic Ovary Syndrome KnowledgeBase (PCOSKBR2) is a manually curated database with information on 533 genes, 145 SNPs, 29 miRNAs, 1,150 pathways, and 1,237 diseases associated with PCOS. This data has been retrieved based on evidence gleaned by critically reviewing literature and related records available for PCOS in databases such as KEGG, DisGeNET, OMIM, GO, Reactome, STRING, and dbSNP. Since PCOS is associated with multiple genes and comorbidities, data mining algorithms for comorbidity prediction and identification of enriched pathways and hub genes are integrated in PCOSKBR2, making it an ideal research platform for PCOS. PCOSKBR2 is freely accessible at http://www.pcoskb.bicnirrh.res.in/ .",2020-09-07 +35070901,Diagnosis and management of malignant sublingual gland tumors: a narrative review.,"

Objective

In this article we review the literature on the malignant sublingual gland tumors from a surgical perspective.

Background

Sublingual gland tumors occur with a very low incidence and most are malignant tumors. The extent of treatment, clinical outcomes and prognosis of malignant sublingual gland tumors have not been well defined, due to the rarity of this disease.

Methods

A database search using Web of Science (https://webofknowledge.com/) and PubMed (https://pubmed.ncbi.nlm.nih.gov/) was conducted. The following keywords were used in the search: ""sublingual gland tumor"", AND ""malignancy"".

Conclusions

Although the sublingual glands are among the major salivary glands, they share common anatomical characteristics with minor salivary gland tumors. Therefore, the tumors from the sublingual gland may have different clinical behaviors from the other major salivary gland tumors. The sublingual glands are small without a true surrounding capsule of the glands; the extra-parenchymal extension is very common in sublingual gland tumors. Furthermore, the sublingual glands are located in close proximity to the lingual nerve, the submandibular gland-duct system and the mandible. Thus, the surgical approach to the malignant sublingual gland tumor should include the adequate management of neighboring structures. In addition, adjuvant radiation therapy provides a survival benefit for patients with malignant sublingual gland tumors, which have adverse features. This article summarizes the clinical characteristics and unique features of malignant sublingual gland tumors based on previous reports, and provides clinical information regarding the sublingual gland tumors to increase awareness of primary physicians as well as patients.",2021-12-01 +34910407,[The Swiss Brain Health Registry : a national infrastructure for Alzheimer's research].,"The Memory Centres of several Swiss hospitals have set up a national online registry for Alzheimer's research, called www.BHR-suisse.org. This type of registry already exists in the United States (www.brainhealthregistry.org/) and the Netherlands (https://hersenonderzoek.nl/). It contributes, as do these initiating sites, to the creation of a global database of research partnersb who wish to contribute by participating in studies on neurodegenerative diseases and more particularly on Alzheimer's disease. By registering, they provide a certain amount of information and become potential research partners. Researchers can then select a panel of volunteers according to the selection and exclusion criteria of their studies, contact them and include them in their studies.",2021-12-01 +34848704,"Integrative genome, transcriptome, microRNA, and degradome analysis of water dropwort (Oenanthe javanica) in response to water stress.","Water dropwort (Liyang Baiqin, Oenanthe javanica (BI.) DC.) is an aquatic perennial plant from the Apiaceae family with abundant protein, dietary fiber, vitamins, and minerals. It usually grows in wet soils and can even grow in water. Here, whole-genome sequencing of O. javanica via HiSeq 2000 sequencing technology was reported for the first time. The genome size was 1.28 Gb, including 42,270 genes, of which 93.92% could be functionally annotated. An online database of the whole-genome sequences of water dropwort, Water dropwortDB, was established to share the results and facilitate further research on O. javanica (database homepage: http://apiaceae.njau.edu.cn/waterdropwortdb ). Water dropwortDB offers whole-genome and transcriptome sequences and a Basic Local Alignment Search Tool. Comparative analysis with other species showed that the evolutionary relationship between O. javanica and Daucus carota was the closest. Twenty-five gene families of O. javanica were found to be expanded, and some genetic factors (such as genes and miRNAs) related to phenotypic and anatomic differentiation in O. javanica under different water conditions were further investigated. Two miRNA and target gene pairs (miR408 and Oja15472, miR171 and Oja47040) were remarkably regulated by water stress. The obtained reference genome of O. javanica provides important information for future work, thus making in-depth genetic breeding and gene editing possible. The present study also provides a foundation for the understanding of the O. javanica response to water stress, including morphological, anatomical, and genetic differentiation.",2021-12-01 +34396859,Tea intake and cardiovascular disease: an umbrella review.,"Brewed tea (Camellia sinensis) is a major dietary source of flavonoids, in particular flavan-3-ols. Tea consumption has been suggested to be inversely associated with a decreased risk of cardiovascular disease (CVD). Several biological mechanisms support the inverse relationship between tea flavonoid intake and CVD risk. Given the recent accumulating evidence from various systematic reviews regarding the role of tea as a beverage in reducing CVD risk and severity, we conducted an umbrella review to describe and critically evaluate the totality of evidence to date. We searched the PubMed, Web of Science, Cochrane Database of Systematic Reviews, and BIOSIS databases for systematic reviews published between January 1, 2010 and February 22, 2020 reporting relationships between tea (C. sinensis) consumption and CVD mortality, CVD diagnosis or incidence, CVD events, stroke events, blood pressure, endothelial function, blood lipids and triglycerides, and inflammatory markers. Herein, we describe results from 23 included systematic reviews. Consistently consuming 2 cups of unsweet tea per day offers the right levels of flavonoids to potentially decrease CVD risk and its progression. This is supported by the consistency between a recent high-quality systematic review and dose-response meta-analyses of population-based studies demonstrating beneficial effects of consumption on CVD mortality, CVD events and stroke events and medium- to high-quality systematic reviews of intervention studies that further elucidate potential benefits on both validated (i.e., SBP, DBP, total cholesterol, and LDL-cholesterol) and emerging risk biomarkers of CVD (TNF-ɑ and IL-6). On the basis of this umbrella review, the consumption of tea as a beverage did not seem to be harmful to health; therefore, the benefits of moderate consumption likely outweigh risk. Future large, clinical intervention studies will provide better mechanistic insight with the ability to confirm the outcome effects shown across observational studies. The review protocol was registered on PROSPERO (https://www.crd.york.ac.uk/PROSPERO/) as CRD42020218159.KEY MESSAGESIt is reasonable to judge that 2 cups of unsweet tea per day has the potential to decrease CVD risk and progression due to its flavonoid content.The primary side effects of tea documented in human studies are hepatotoxicity and gastrointestinal disturbances (i.e., vomiting and diarrhea) after high-dose supplemental intake.Additional clinical research is needed to fully elucidate the effects of tea flavonoids on markers of CVD, as many studies were under-powered to detect changes.[Figure: see text].",2021-12-01 +29989091,RicyerDB: A Database For Collecting Rice Yield-related Genes with Biological Analysis.,"The Rice Yield-related Database (RicyerDB) was created to complement with related research of influence rice (Oryza sativa L.) yield in multiple traits by manually curating the related databases and literature, and genomics and proteomics information that could be useful for comprehensive understanding of the rice biology. RicyerDB provides a more valuable resource in which to efficiently investigate, browse and analyze yield-related genes. The whole data set can be easily queried and downloaded through the webpage. In addition, RicyerDB also constructed a protein-protein interaction network with biological analysis. The combined rice database opens a new path to facilitate researchers achieving information on rice gene in terms of their effects on traits important for rice breeding. The web server is freely available at: http://server.malab.cn/Ricyer/index.html.",2018-05-22 +34514504,Contraceptive and Infertility Target DataBase: a contraceptive drug development tool for targeting and analysis of human reproductive specific tissues†.,"The long and challenging drug development process begins with discovery biology for the selection of an appropriate target for a specific indication. Target is a broad term that can be applied to a range of biological entities such as proteins, genes, and ribonucleic acids (RNAs). Although there are numerous databases available for mining biological entities, publicly available searchable, downloadable databases to aid in target selection for a specific disease or indication (e.g., developing contraceptives and infertility treatments) are limited. We report the development of the Contraceptive and Infertility Target DataBase (https://www.citdbase.org), which provides investigators an interface to mine existing transcriptomic and proteomic resources to identify high-quality contraceptive/infertility targets. The development of similar databases is applicable to the identification of targets for other diseases and conditions.",2021-12-01 +34880664,Role of CD5L and SRD5A2 as Prognostic Biomarkers for Hepatocellular Carcinoma.,"

Purpose

Due to the limitations of currently available biomarkers, new biomarkers are needed to accurately predict the prognosis of patients with hepatocellular carcinoma (HCC) patients.

Methods

In this study, we screened for differentially expressed genes (DEGs) in the tumor and the adjacent tissues using the four gene expression array (GSE14520, GSE45267, GSE121248, GSE62232) of the Gene Express Omnibus (GEO) database.

Results

Subsequently, 47 overlapping DEGs were identified in four GEO datasets, which were mostly located on chromosomes 5q and 6q, distributed in the liver and CD105-positive endothelial cells, and closely related to HCC. Function enrichment revealed 47 DEGs were related to HCC, and involved in steroid /lipid /retinol metabolism, bile secretion and p53 signalling pathway. The Kaplan-Meier plotter analysis (http://www.kmplot.com/) identified 26 and 40 genes associated with the 5-year overall survival (OS) and relapse-free survival (RFS). We found that CD5L and SRD5A2 were independent prognostic factors for 5-year OS (P=0.036) and RFS (P=0.044) in HCC patients from GSE14520, respectively. Clinicopathological features including BCLC stage, cirrhosis, and risk signature for predicted metastasis were used to construct and validate a nomogram for 5-year OS with C-index of 0.732 and 0.717 in the training and validation cohort, respectively. SRD5A2, BCLC stage and gender was independent prognostic factors for RFS which were used to build a nomogram with the C-index of 0.666 and 0.682 in the training and validation cohort, respectively.

Conclusion

CD5L can facilitate individualized, targeted therapy for HCC patients.",2021-12-01 +34861128,Role of IL-1β rs1143634 (+3954C>T) polymorphism in cancer risk: an updated meta-analysis and trial sequential analysis.,"

Objective

Oxidative stress caused by the pro-inflammatory cytokine interleukin (IL)-1β has been widely investigated for cancer risk. In this study, we focused on the role of IL-1β rs1143634 polymorphism to reveal its impact on cancer development.

Methods

Related studies with fixed inclusion criteria were selected from electronic databases to May 2021. This meta-analysis was performed with odds ratios and 95% confidence intervals. Heterogeneity, publication bias and sensitivity analyses were also conducted. Trial sequential analysis (TSA) and in-silico gene expression analysis were performed.

Results

Forty-four case-control studies involving 18,645 patients with cancer and 22,882 controls were included. We observed a significant association of this single nucleotide polymorphism with overall cancer risk in the codominant model 3 (1.13-fold), recessive model (1.14-fold) and allelic model (1.08-fold). Subgroup analysis revealed that rs1143634 elevated the risk of gastric cancer, breast cancer and multiple myeloma. In addition, Asian and mixed populations and hospital-based controls had a significantly higher risk of cancer development. TSA confirmed our findings.

Conclusion

Our meta-analysis revealed that the presence of IL-1β rs1143634 polymorphism increases the risk of cancer development. Among polymorphism carriers, the Asian population has a higher risk than other ethnic populations.This meta-analysis was registered retrospectively at INPLASY (https://inplasy.com/, INPLASY2021100044).",2021-12-01 +33909069,Monosaccharide biosynthesis pathways database.,"A distinctive feature of glycans vis-à-vis proteins and nucleic acids is its structural complexity, which arises from the huge repertoire of monosaccharides, isomeric linkages and branching. A very large number of monosaccharides have so far been discovered in natural glycans. Experimentally, pathways for the biosynthesis have been characterized completely for 55 monosaccharides and partially for a few more. However, there is no single platform, which provides information about monosaccharide biosynthesis pathways and associated enzymes We have gathered 572 experimentally characterized enzymes of 66 biosynthesis pathways from literature and set up a first of its kind database called the Monosaccharide Biosynthesis Pathways Database http://www.bio.iitb.ac.in/mbpd/). Annotations such as the reaction catalyzed, substrate specificity, biosynthesis pathway and PubMed IDs are provided for all the enzymes in the database. Sequence homologs of the experimentally characterized enzymes found in nearly 13,000 completely sequenced genomes from Bacteria and Archaea have also been included in the database. This platform will help in the deduction of evolutionary relationships among enzymes such as aminotransferases, nucleotidyltransferases, acetyltransferases and SDR family enzymes. It can also facilitate experimental studies such as direct enzyme assays to validate putative annotations, establish structure-function relationship, expression profiling to determine the function, determine the phenotypic consequences of gene knock-out/knock-in and complementation studies.",2021-12-01 +33547344,Genetic ancestry plays a central role in population pharmacogenomics.,"Recent studies have pointed out the essential role of genetic ancestry in population pharmacogenetics. In this study, we analyzed the whole-genome sequencing data from The 1000 Genomes Project (Phase 3) and the pharmacogenetic information from Drug Bank, PharmGKB, PharmaADME, and Biotransformation. Here we show that ancestry-informative markers are enriched in pharmacogenetic loci, suggesting that trans-ancestry differentiation must be carefully considered in population pharmacogenetics studies. Ancestry-informative pharmacogenetic loci are located in both protein-coding and non-protein-coding regions, illustrating that a whole-genome analysis is necessary for an unbiased examination over pharmacogenetic loci. Finally, those ancestry-informative pharmacogenetic loci that target multiple drugs are often a functional variant, which reflects their importance in biological functions and pathways. In summary, we develop an efficient algorithm for an ultrahigh-dimensional principal component analysis. We create genetic catalogs of ancestry-informative markers and genes. We explore pharmacogenetic patterns and establish a high-accuracy prediction panel of genetic ancestry. Moreover, we construct a genetic ancestry pharmacogenomic database Genetic Ancestry PhD ( http://hcyang.stat.sinica.edu.tw/databases/genetic_ancestry_phd/ ).",2021-02-05 +34871008,Children's self-blame appraisals about their mothers' depressive symptoms and risk for internalizing symptoms. Correction to Kouros et al. (2020).,"Reports an error in ""Children's self-blame appraisals about their mothers' depressive symptoms and risk for internalizing symptoms"" by Chrystyna D. Kouros, Sharyl E. Wee, Chelsea N. Carson and Naomi V. Ekas (Journal of Family Psychology, 2020[Aug], Vol 34[5], 534-543). In the article (https://doi.org/10.1037/fa m0000639), ""p = .19"" should have read ""p = .019"" in Panel B of Figure 1. The online version of this article has been corrected. (The following abstract of the original article appeared in record 2020-07537-001.) Maternal depressive symptoms are a robust predictor of children's risk for internalizing symptoms, yet not all children are negatively affected by exposure to their mothers' symptoms. The present study tested children's self-blame appraisals as a moderator of the association between maternal depressive symptoms and children's internalizing symptoms, controlling for children's negative attributional style. We hypothesized that the relation between maternal depressive symptoms and children's internalizing symptoms would be stronger for children who blamed themselves more for their mothers' symptoms. Participants were 129 mother-child dyads (M child age = 13.63, SD = 2.2; 52.7% female; 38.8% White, 31% African American, 22.5% Latinx/Hispanic) recruited from the community. Results indicated that maternal depressive symptoms were associated with higher levels of children's internalizing symptoms for children who reported higher, but not lower, levels of self-blame appraisals. Results were consistent using mothers' or children's reports of their own and each other's symptoms. The findings highlight the importance of assessing children's appraisals about their mothers' depressive symptoms, and suggest that preventive interventions should target children who endorse higher levels of self-blame appraisals. Furthermore, children's self-blame appraisals about mothers' depressive symptoms should be considered as a target of treatment for child internalizing disorders. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-12-01 +,"Comments on the published article: “Capability of Sentinel-2 data for estimating maximum evapotranspiration and irrigation requirements for tomato crop in central Italy” by S. Vanino et al., Remote Sensing of Environment, 215(2018), 452-470","This short communication concerns errors in the albedo computation from Sentinel 2 in the article by S. Vanino et al., “Capability of Sentinel-2 data for estimating maximum evapotranspiration and irrigation requirements for tomato crop in Central Italy”, Remote Sensing of Environment, vol. 215, 2018, Pag. 452–470, https://doi.org/10.1016/j.rse.2018.06.035.",2020-02-01 +32699131,Febrile Illness Evaluation in a Broad Range of Endemicities (FIEBRE): protocol for a multisite prospective observational study of the causes of fever in Africa and Asia.,"

Introduction

Fever commonly leads to healthcare seeking and hospital admission in sub-Saharan Africa and Asia. There is only limited guidance for clinicians managing non-malarial fevers, which often results in inappropriate treatment for patients. Furthermore, there is little evidence for estimates of disease burden, or to guide empirical therapy, control measures, resource allocation, prioritisation of clinical diagnostics or antimicrobial stewardship. The Febrile Illness Evaluation in a Broad Range of Endemicities (FIEBRE) study seeks to address these information gaps.

Methods and analysis

FIEBRE investigates febrile illness in paediatric and adult outpatients and inpatients using standardised clinical, laboratory and social science protocols over a minimum 12-month period at five sites in sub-Saharan Africa and Southeastern and Southern Asia. Patients presenting with fever are enrolled and provide clinical data, pharyngeal swabs and a venous blood sample; selected participants also provide a urine sample. Laboratory assessments target infections that are treatable and/or preventable. Selected point-of-care tests, as well as blood and urine cultures and antimicrobial susceptibility testing, are performed on site. On day 28, patients provide a second venous blood sample for serology and information on clinical outcome. Further diagnostic assays are performed at international reference laboratories. Blood and pharyngeal samples from matched community controls enable calculation of AFs, and surveys of treatment seeking allow estimation of the incidence of common infections. Additional assays detect markers that may differentiate bacterial from non-bacterial causes of illness and/or prognosticate illness severity. Social science research on antimicrobial use will inform future recommendations for fever case management. Residual samples from participants are stored for future use.

Ethics and dissemination

Ethics approval was obtained from all relevant institutional and national committees; written informed consent is obtained from all participants or parents/guardians. Final results will be shared with participating communities, and in open-access journals and other scientific fora. Study documents are available online (https://doi.org/10.17037/PUBS.04652739).",2020-07-21 +32548865,The UK Veterinary Immunological Toolbox Website: promoting vaccine research by facilitating communication and removing reagent barriers.,"Using the best animal models to study immune responses against specific pathogens or vaccines can dramatically accelerate our understanding. Veterinary species are well studied, particularly livestock, to reduce their disease burden. They have also proven to be powerful models, especially for zoonotic pathogens and novel vaccination strategies. A prerequisite for any model selection is having the right quality and range of species-specific immunological reagents. To help promote the widest possible use of veterinary species, an open access website (https://www.immunologicaltoolbox.co.uk) has been created as a central community annotated hub for veterinary immunological reagents. The website is also the portal into services offered by the UK Immunological Toolbox project that includes antibody generation, sequencing and recombinant expression. The funding for this effort is linked into sustainable sources, but ultimate success relies on community engagement to continually increase the quality and quantity of information. It is hoped that as more users and reagent owners engage, it will become an essential resource for researchers, veterinarians and clinicians alike by removing barriers that prevent the use of the most informative animal models.",2020-07-29 +33871478,The Pharmit Backend: A Computer Systems Approach to Enabling Interactive Online Drug Discovery.,"Pharmit (http://pharmit.csb.pitt.edu) is an open-source online resource that allows users to interactively search libraries of millions compounds as part of a structure-based drug discovery workflow. Here we describe the systems-level implementation decisions made in designing Pharmit that, when combined with novel sub-linear time search algorithms, allow it to screen millions of molecules in seconds. The key concepts are to maximize parallelism while minimizing intra-thread communication, optimize data layout for sequential processing, and efficiently manage memory allocation. We describe how these concepts are applied to the cheminformatic data inherent to Pharmit and discuss limitations and possible future directions.",2018-11-28 +34212235,MR Imaging of Human Brain Mechanics In Vivo: New Measurements to Facilitate the Development of Computational Models of Brain Injury.,"Computational models of the brain and its biomechanical response to skull accelerations are important tools for understanding and predicting traumatic brain injuries (TBIs). However, most models have been developed using experimental data collected on animal models and cadaveric specimens, both of which differ from the living human brain. Here we describe efforts to noninvasively measure the biomechanical response of the human brain with MRI-at non-injurious strain levels-and generate data that can be used to develop, calibrate, and evaluate computational brain biomechanics models. Specifically, this paper reports on a project supported by the National Institute of Neurological Disorders and Stroke to comprehensively image brain anatomy and geometry, mechanical properties, and brain deformations that arise from impulsive and harmonic skull loadings. The outcome of this work will be a publicly available dataset ( http://www.nitrc.org/projects/bbir ) that includes measurements on both males and females across an age range from adolescence to older adulthood. This article describes the rationale and approach for this study, the data available, and how these data may be used to develop new computational models and augment existing approaches; it will serve as a reference to researchers interested in using these data.",2021-07-01 +34911571,Beyondcell: targeting cancer therapeutic heterogeneity in single-cell RNA-seq data.,"We present Beyondcell, a computational methodology for identifying tumour cell subpopulations with distinct drug responses in single-cell RNA-seq data and proposing cancer-specific treatments. Our method calculates an enrichment score in a collection of drug signatures, delineating therapeutic clusters (TCs) within cellular populations. Additionally, Beyondcell determines the therapeutic differences among cell populations and generates a prioritised sensitivity-based ranking in order to guide drug selection. We performed Beyondcell analysis in five single-cell datasets and demonstrated that TCs can be exploited to target malignant cells both in cancer cell lines and tumour patients. Beyondcell is available at: https://gitlab.com/bu_cnio/beyondcell .",2021-12-16 +34020536,Federated sharing and processing of genomic datasets for tertiary data analysis. ,"With the spreading of biological and clinical uses of next-generation sequencing (NGS) data, many laboratories and health organizations are facing the need of sharing NGS data resources and easily accessing and processing comprehensively shared genomic data; in most cases, primary and secondary data management of NGS data is done at sequencing stations, and sharing applies to processed data. Based on the previous single-instance GMQL system architecture, here we review the model, language and architectural extensions that make the GMQL centralized system innovatively open to federated computing. A well-designed extension of a centralized system architecture to support federated data sharing and query processing. Data is federated thanks to simple data sharing instructions. Queries are assigned to execution nodes; they are translated into an intermediate representation, whose computation drives data and processing distributions. The approach allows writing federated applications according to classical styles: centralized, distributed or externalized. The federated genomic data management system is freely available for non-commercial use as an open source project at http://www.bioinformatics.deib.polimi.it/FederatedGMQLsystem/. {arif.canakoglu, pietro.pinoli}@polimi.it.",2021-05-01 +34697993,Tasks for a theoretical psychology of emotion.,"In the first part of the article, the central role of theory in emotion psychology is underscored and reasons are given why more theoretical psychology of emotion is needed. In the second part, nine tasks for the theoretical psychology of emotion are defined, by refining and extending three of the general tasks of theoretical psychology proposed 70 years ago by Sigmund Koch [Theoretical psychology, 1950: An overview. Psychological Review, 58(4), 295. https://doi.org/10.1037/h0055768]. The nine tasks are: (1) Analysis, rational reconstruction and critique of existing emotion theories. (2) Comparison of different theories. (3) Systematization and integration of theories. (4) Reconstruction of the development of theories over time. (5) Analysis, reconstruction and critique of theory-data and data-theory inferences. (6) Analysis, reconstruction and critique of the complete set of arguments for and against specific emotion-theoretic assumption and whole theories. (7) Analysis, reconstruction and critique of measurement theories for emotions. (8) Development of new emotion theories and theories of emotion measurement. (9) Information about theoretical and methodological developments of interest to emotion psychology in other subdisciplines of psychology and in neighbouring sciences, and export of theories and methods to other disciplines.",2021-10-26 +33779263,First Report of 16SrII-V Peanut Witches' Broom Phytoplasma in Snake Gourd (Trichosanthes cucumerina L.) in Taiwan. ,"Snake gourd (Trichosanthes cucumerina L.), an annual climbing plant belonging to the family of Cucurbitaceae, is native to Southeast Asia countries, e.g., India, Pakistan, Malaysia, China, and Indonesia. It is commonly consumed as a vegetable and also used as a traditional herbal medicine due to the antidiabetic, anti-inflammatory, antibacterial, hepatoprotective, and cytotoxic activities (Devi 2017). In September 2020, phytoplasma-induced disease symptoms such as little leaf, yellowing, phyllody, virescence, and witches' broom were observed on snake gourd in Yunlin County, Taiwan. The cross-sectional examination of the symptomatic plant by transmission electron microscopy showed typical phytoplasma-like pleomorphic bodies with spherical, oval and tubular shapes in sieve elements. Further examination by nested PCR revealed that a 1.2 kb DNA fragment for 16S rRNA gene was only amplified from symptomatic leaf of snake gourd using the phytoplasma universal primer pairs P1/P7 followed by R16F2n/R16R2. BLAST and iPhyClassifier (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi) analyses on the amplified DNA fragment (accession no. MW309142) revealed that it shares 100% identity with that of GenBank accession NZ_AMWZ01000008 (complement [31109 to 32640]) of peanut witches' broom (PnWB) phytoplasma, a 'Candidatus phytoplasma aurantifolia'-related strain (Firrao et al. 2004), and could be classified into the 16SrII-V subgroup. Samples examined by nested PCR were further characterized by western blotting using the polyclonal antibody raised against the Imp of PnWB phytoplasma (Chien et al. 2020a, b). An expected signal of 19 kDa specific for Imp was only detected in the symptomatic snake gourd, but not in healthy snake gourd. Since the disease symptoms caused by phytoplasma infection are highly dependent on the secreted effectors (Namba 2019), phyllogen gene that is responsible for phyllody and virescence symptoms was amplified from symptomatic snake gourd by PCR. BLAST analysis revealed that phyllogen identified in snake gourd is identical with that of PnWB phytoplasma. In Taiwan, species of family Cucurbitaceae such as loofah, bitter gourd, and pumpkin are commonly infected by 16SrVIII phytoplasma (Davis 2017). In this study, we report for the first time that snake gourd, a species of family Cucurbitaceae, was infected by 16SrII-V PnWB phytoplasma in Taiwan.",2021-03-29 +29087479,STCRDab: the structural T-cell receptor database.,"The Structural T-cell Receptor Database (STCRDab; http://opig.stats.ox.ac.uk/webapps/stcrdab) is an online resource that automatically collects and curates TCR structural data from the Protein Data Bank. For each entry, the database provides annotations, such as the α/β or γ/δ chain pairings, major histocompatibility complex details, and where available, antigen binding affinities. In addition, the orientation between the variable domains and the canonical forms of the complementarity-determining region loops are also provided. Users can select, view, and download individual or bulk sets of structures based on these criteria. Where available, STCRDab also finds antibody structures that are similar to TCRs, helping users explore the relationship between TCRs and antibodies.",2018-01-01 +31551601,Single-cell transcriptomic profiling of the aging mouse brain.,"The mammalian brain is complex, with multiple cell types performing a variety of diverse functions, but exactly how each cell type is affected in aging remains largely unknown. Here we performed a single-cell transcriptomic analysis of young and old mouse brains. We provide comprehensive datasets of aging-related genes, pathways and ligand-receptor interactions in nearly all brain cell types. Our analysis identified gene signatures that vary in a coordinated manner across cell types and gene sets that are regulated in a cell-type specific manner, even at times in opposite directions. These data reveal that aging, rather than inducing a universal program, drives a distinct transcriptional course in each cell population, and they highlight key molecular processes, including ribosome biogenesis, underlying brain aging. Overall, these large-scale datasets (accessible online at https://portals.broadinstitute.org/single_cell/study/aging-mouse-brain ) provide a resource for the neuroscience community that will facilitate additional discoveries directed towards understanding and modifying the aging process.",2019-09-24 +34035422,Multi Locus View: an extensible web-based tool for the analysis of genomic data.,"Tracking and understanding data quality, analysis and reproducibility are critical concerns in the biological sciences. This is especially true in genomics where next generation sequencing (NGS) based technologies such as ChIP-seq, RNA-seq and ATAC-seq are generating a flood of genome-scale data. However, such data are usually processed with automated tools and pipelines, generating tabular outputs and static visualisations. Interpretation is normally made at a high level without the ability to visualise the underlying data in detail. Conventional genome browsers are limited to browsing single locations and do not allow for interactions with the dataset as a whole. Multi Locus View (MLV), a web-based tool, has been developed to allow users to fluidly interact with genomics datasets at multiple scales. The user is able to browse the raw data, cluster, and combine the data with other analysis and annotate the data. User datasets can then be shared with other users or made public for quick assessment from the academic community. MLV is publically available at https://mlv.molbiol.ox.ac.uk .",2021-05-25 +35057716,Myocardial revascularization in Russian Federation for acute coronary syndrome in 2016-2020.,"Aim    To analyze the number of cases of acute coronary syndrome (ACS) [ST segment elevation myocardial infarction (STEMI), non-ST elevation acute coronary syndrome (nSTEACS)] and results of myocardial revascularization for ACS as a part of the monitoring performed by the Ministry of Health Care of Russia*. This analysis allows, on one hand, providing control of morbidity and mortality of patients with socially significant pathologies and, on the other hand, monitoring the effectivity of treatments to identify and correct their shortcomings. Time-related changes in results of myocardial revascularization performed for ACS patients in the Russian Federation in 2020 were analyzed and compared with the values of 2016-2019 based on data of the Russian Ministry of Health Care monitoring.Material and methods    Yearly absolute, relative, and calculated indices of revascularization for ACS were analyzed and compared based on data of the Russian Ministry of Health Care monitoring in 2016-2020.Results    In the Russian Federation in 2020, the lowest number of hospitalizations for ACS (403, 931) was recorded with an unprecedented ratio of 1 / 1.8 for STEMI/nSTEACS, respectively. In Russia in 2020, the proportion of primary percutaneous coronary interventions (pPCI) for STEMI continued growing; it reached 44% and peaked to the maximum for 2016-2020. At the same time, the thrombolytic therapy (TLT) remained essential in the structure of reperfusion strategies during those years (24.0-27.3 % of all STEMI cases). Total death rate of admitted patients with STEMI in Russia was stable at the level of 13.1-14.6 %. In 2020, there were no significant differences in quality indexes of the treatment for STEMI from the previous period (2016-2019). A yearly relative increase in the number of PCIs for STEACS (from 16 % in 2016 to 30 % in 2020 and from 30% to 46% for high-risk nSTEACS) was observed. In 2020, a significant increase in death rate was observed for nSTEACS as a whole (to 4.1 %) and for individual subgroups (high-risk nSTEACS, to 4.5 %; after PCI for nSTEACS, to 1.8 %; and after PCI for high-risk nSTEACS, to 2.8 %) whereas mean death rate values in these subgroups in 2016-2019 were 2.75 %, 3.45 %, 1.5 %, and 2.3 %, respectively.Conclusion    The analysis of revascularization indexes in ACS patients based on the Ministry of Health Care of Russia monitoring performed in 2016-2020 showed a number of positive trends, including an increase in the total number of revascularization procedures; a decrease in the time from the disease onset to the endovascular treatment; an increase in the availability of stenting for severe ACS; and general stabilization of the mortality. On the other hand, the Russian Federation is considerably behind European countries in several qualitative and quantitative parameters of health care in ACS, such as pPCI availability, symptom-to-balloon time, total mortality of all hospitalized STEMI patients, and revascularization for nSTEACS. Despite the gradual improvement of relative quantitative indexes of myocardial revascularization for ACS, negative changes in the absolute number of myocardial revascularizations for various forms of ACS and a notable increase in the death rate in nSTEACS were observed in 2020, including patients after PCI. There is no doubt that the negative results of myocardial revascularization in Russia in 2020 were due to the effect of the COVID-19 pandemic.* monitoring of measures to reduce the mortality from ischemic heart disease (letters of the Ministry of Health Care of the Russian Federation of 13.03.2015 # 17-6 /10 / 1-177 and of 24.07.2015 # 17-9 / 10 / 2-4128), which includes monthly collection of data on the Federal Research Institute for Health Organization and Informatics portal, the Automated System for Monitoring of Medical Statistics, at http://asmms.mednet.ru.",2021-12-31 +34843539,Analysis of chemical compositions and larvicidal activity of nut extracts from Areca catechu Linn against Aedes (Diptera: Culicidae).,"

Background

There is a growing need to use green alternative larvicidal control for Aedes larvae compared to chemical insecticides. Substantial reliance on chemical insecticides caused insecticide resistance in mosquito populations. Thus, research for alternate chemical compounds from natural products is necessary to control Aedes larvae. This study explores the analysis of chemical compositions from Areca catechu nut as a potential larvicide for Aedes (Diptera: Culicidae).

Methods

The Areca catechu nut collected from Ipoh, Perak, Malaysia was grounded into powder and used for Soxhlet extraction. The chemical analysis of the extracts and their structures were identified using the GCMS-QP2010 Ultra (Shimadzu) system. National Institute of Standards and Technology (NIST) Chemistry WebBook, Standard Reference Database 69 (https://webbook.nist.gov/chemistry/) and PubChem (https://pubchem.ncbi.nlm.nih.gov/), the two databases used to retrieve the synonyms, molecular formula, molecular weight, and 2-dimensional (2D) structure of chemical compounds. Next, following WHO procedures for larval bioassays, the extracts were used to asses larvicidal activity against early 4th instar larvae of Aedes aegypti and Aedes albopictus.

Results

The larvicidal activities were observed against early 4th stage larvae with different concentrations in the range from 200 mg/L to 1600 mg/L. The LC50 and LC95 of Aedes aegypti were 621 mg/L and 2264 mg/L respectively; whereas the LC50 and LC95 of Aedes albopictus were 636 mg/L and 2268 mg/L respectively. Mortality was not observed in the non-target organism test. The analysis using gas chromatography and mass spectrometer recovered several chemical compounds such as Arecaidine, Dodecanoic acid, Methyl tetradecanoate, Tetradecanoic acid , and n-Hexadecanoic acid bioactive components. These chemical constituents were used as additive formulations in pesticides, pest control, insect repellent, and insecticidal agents.

Conclusions

Our study showed significant outcomes from the extract of Areca catechu nut and it deserves further investigation in relation to chemical components and larvicidal actions between different species of Aedes mosquitoes. Even though all these findings are fundamental, it may have some interesting potentials to be developed as natural bio-larvicidal products.",2021-11-29 +33539888,PPD: A Manually Curated Database for Experimentally Verified Prokaryotic Promoters.,"As a key region, promoter plays a key role in transcription regulation. A eukaryotic promoter database called EPD has been constructed to store eukaryotic POL II promoters. Although there are some promoter databases for specific prokaryotic species or specific promoter type, such as RegulonDB for Escherichia coli K-12, DBTBS for Bacillus subtilis and Pro54DB for sigma 54 promoter, because of the diversity of prokaryotes and the development of sequencing technology, huge amounts of prokaryotic promoters are scattered in numerous published articles, which is inconvenient for researchers to explore the process of gene regulation in prokaryotes. In this study, we constructed a Prokaryotic Promoter Database (PPD), which records the experimentally validated promoters in prokaryotes, from published articles. Up to now, PPD has stored 129,148 promoters across 63 prokaryotic species manually extracted from published papers. We provided a friendly interface for users to browse, search, blast, visualize, submit and download data. The PPD will provide relatively comprehensive resources of prokaryotic promoter for the study of prokaryotic gene transcription. The PPD is freely available and easy accessed at http://lin-group.cn/database/ppd/.",2021-02-02 +34626214,The human hepatocyte TXG-MAPr: gene co-expression network modules to support mechanism-based risk assessment.,"Mechanism-based risk assessment is urged to advance and fully permeate into current safety assessment practices, possibly at early phases of drug safety testing. Toxicogenomics is a promising source of mechanisms-revealing data, but interpretative analysis tools specific for the testing systems (e.g. hepatocytes) are lacking. In this study, we present the TXG-MAPr webtool (available at https://txg-mapr.eu/WGCNA_PHH/TGGATEs_PHH/ ), an R-Shiny-based implementation of weighted gene co-expression network analysis (WGCNA) obtained from the Primary Human Hepatocytes (PHH) TG-GATEs dataset. The 398 gene co-expression networks (modules) were annotated with functional information (pathway enrichment, transcription factor) to reveal their mechanistic interpretation. Several well-known stress response pathways were captured in the modules, were perturbed by specific stressors and showed preservation in rat systems (rat primary hepatocytes and rat in vivo liver), with the exception of DNA damage and oxidative stress responses. A subset of 87 well-annotated and preserved modules was used to evaluate mechanisms of toxicity of endoplasmic reticulum (ER) stress and oxidative stress inducers, including cyclosporine A, tunicamycin and acetaminophen. In addition, module responses can be calculated from external datasets obtained with different hepatocyte cells and platforms, including targeted RNA-seq data, therefore, imputing biological responses from a limited gene set. As another application, donors' sensitivity towards tunicamycin was investigated with the TXG-MAPr, identifying higher basal level of intrinsic immune response in donors with pre-existing liver pathology. In conclusion, we demonstrated that gene co-expression analysis coupled to an interactive visualization environment, the TXG-MAPr, is a promising approach to achieve mechanistic relevant, cross-species and cross-platform evaluation of toxicogenomic data.",2021-10-09 +34791993,Examining Changes in Sleep Duration Associated with the Onset of the COVID-19 Pandemic: Who is Sleeping and Who is Not?,"The COVID-19 pandemic has resulted in social isolation and reports of insomnia. However, reports of changes in sleep duration and associated factors are few. To determine the impact of COVID-19 on changes in sleep behavior, data were analyzed from an online survey of adults recruited via social media that included questions asking whether the respondent slept less or more after the onset of the pandemic as well as self-reported sociodemographic and occupational information; beliefs about COVID-19; and responses pertaining to loneliness, anxiety, and depression. There were 5,175 respondents; 53.9% had a change in sleep duration.17.1% slept less and 36.7% slept more. Sleeping more was related to greater education, being single/divorced/separated, unemployed or a student. Being retired, divorced/separated or a homemaker, and living in the Mountain or Central time zones were associated with less sleep. Beliefs that COVID-19 would result in personal adverse consequences was associated with both more and less sleep. However, the strongest associations for both more and less sleep were seen with depression, anxiety, and loneliness. In summary, changes in sleep duration since the start of the COVID-19 pandemic were highly prevalent among social media users and were associated with several sociodemographic factors and beliefs that COVID-19 would have adverse personal impacts. However, the strongest associations occurred with worse mental health suggesting that improvements may occur with better sleep.Supplemental data for this article is available online at https://doi.org/10.1080/08964289.2021.2002800 .",2021-11-18 +23868908,CREDO: a structural interactomics database for drug discovery.,"CREDO is a unique relational database storing all pairwise atomic interactions of inter- as well as intra-molecular contacts between small molecules and macromolecules found in experimentally determined structures from the Protein Data Bank. These interactions are integrated with further chemical and biological data. The database implements useful data structures and algorithms such as cheminformatics routines to create a comprehensive analysis platform for drug discovery. The database can be accessed through a web-based interface, downloads of data sets and web services at http://www-cryst.bioc.cam.ac.uk/credo. Database URL: http://www-cryst.bioc.cam.ac.uk/credo.",2013-07-18 +33062422,No one-size-fits-all solution to clean GBIF.,"Species occurrence records provide the basis for many biodiversity studies. They derive from georeferenced specimens deposited in natural history collections and visual observations, such as those obtained through various mobile applications. Given the rapid increase in availability of such data, the control of quality and accuracy constitutes a particular concern. Automatic filtering is a scalable and reproducible means to identify potentially problematic records and tailor datasets from public databases such as the Global Biodiversity Information Facility (GBIF; http://www.gbif.org), for biodiversity analyses. However, it is unclear how much data may be lost by filtering, whether the same filters should be applied across all taxonomic groups, and what the effect of filtering is on common downstream analyses. Here, we evaluate the effect of 13 recently proposed filters on the inference of species richness patterns and automated conservation assessments for 18 Neotropical taxa, including terrestrial and marine animals, fungi, and plants downloaded from GBIF. We find that a total of 44.3% of the records are potentially problematic, with large variation across taxonomic groups (25-90%). A small fraction of records was identified as erroneous in the strict sense (4.2%), and a much larger proportion as unfit for most downstream analyses (41.7%). Filters of duplicated information, collection year, and basis of record, as well as coordinates in urban areas, or for terrestrial taxa in the sea or marine taxa on land, have the greatest effect. Automated filtering can help in identifying problematic records, but requires customization of which tests and thresholds should be applied to the taxonomic group and geographic area under focus. Our results stress the importance of thorough recording and exploration of the meta-data associated with species records for biodiversity research.",2020-09-28 +34189203,"Collagen stable isotope data from East and Northeast Asia, c. 7000 BC-1000 AD.","Stable isotope analysis is routinely used in archaeology to answer questions related to past diets. As the technique matures, data from archaeological sites have been generated at an exponential rate over the past several decades, thus provided an invaluable opportunity to examine past dietary practices and subsistence economies in much larger geographical and temporal settings. In Asia, a significant proportion of isotopic data is published in non-English journals or in grey literature, therefore remains largely inaccessible to general researchers. In order to provide easier access to these data, and to encourage future large-scale meta-data analyses in Asia, this collection presents the most comprehensive set of collagen stable isotope data of carbon, nitrogen, and sulfur from East and Northeast Asia (29-51˚N, 96-136˚ E) to date, including sites located within the modern territories of the People's Republic of China, Mongolia, the Russian Federation, and the Republic of Korea. Using academic search engines such as Google Scholar, the Chinese National Knowledge Infrastructure (CNKI), and ScienceON, a total of 3,304 previously published archaeological human and faunal stable isotope data from 136 archaeological sites in East and Northeast Asia, spanning over a period of 8,000 years (c. 7000 BC to AD 1000) are collected. The collated data are deposited on the open-access platform IsoArcH (https://isoarch.eu/) for any interested parties to use.",2021-06-10 +,The forgotten land use class: Mapping of fallow fields across the Sahel using Sentinel-2,"Remote sensing-derived cropland products have depicted the location and extent of agricultural lands with an ever increasing accuracy. However, limited attention has been devoted to distinguishing between actively cropped fields and fallowed fields within agricultural lands, and in particular so in grass fallow systems of semi-arid areas. In the Sahel, one of the largest dryland regions worldwide, crop-fallow rotation practices are widely used for soil fertility regeneration. Yet, little is known about the extent of fallow fields since fallow is not explicitly differentiated within the cropland class in any existing remote sensing-based land use/cover maps, regardless of the spatial scale. With a 10 m spatial resolution and a 5-day revisit frequency, Sentinel-2 satellite imagery made it possible to disentangle agricultural land into cropped and fallow fields, facilitated by Google Earth Engine (GEE) for big data handling. Here we produce the first Sahelian fallow field map at a 10 m resolution for the baseline year 2017, accomplished by designing a remote sensing driven protocol for generating reference data for mapping over large areas. Based on the 2015 Copernicus Dynamic Land Cover map at 100 m resolution, the extent of fallow fields in the cropland class is estimated to be 63% (403,617 km2) for the Sahel in 2017. Similar results are obtained for five contemporary cropland products, with fallow fields occupying 57–62% of the cropland area. Yet, it is noted that the total estimated area coverage depends on the quality of the different cropland products. The share of cropped fields within the Copernicus cropland area is found to be higher in the arid regions (200–300 mm rainfall) as compared to the semi-arid regions (300–600 mm rainfall). The woody cover fraction within cropped and fallow fields is found to have a reversed pattern between arid (higher woody cover in cropped fields) and semi-arid (higher woody cover in fallow fields) regions. The method developed, using cloud-based Earth Observation (EO) data and computation on the GEE platform, is expected to be reproducible for mapping the extent of fallow fields across global croplands. Future applications based on multi-year time series is expected to improve our understanding of crop-fallow rotation dynamics in grass fallow systems being key in teasing apart how cropland intensification and expansion affect environmental variables, such as soil fertility, crop yields and local livelihoods in low-income regions such as the Sahel. The mapping result can be visualized via a web viewer (https://buwuyou.users.earthengine.app/view/fallowinsahel).",2020-03-01 +35864949,Reconstruction of the Electron Diffusion Region With Inertia and Compressibility Effects.,"A method based on electron magnetohydrodynamics (EMHD) for the reconstruction of steady, two-dimensional plasma and magnetic field structures from data taken by a single spacecraft, first developed by Sonnerup et al. (2016), https://doi.org/10.1002/2016ja022430, is extended to accommodate inhomogeneity of the electron density and temperature, electron inertia effects, and guide magnetic field in and around the electron diffusion region (EDR), the central part of the magnetic reconnection region. The new method assumes that the electron density and temperature are constant along, but may vary across, the magnetic field lines. We present two models for the reconstruction of electron streamlines, one of which is not constrained by any specific formula for the electron pressure tensor term in the generalized Ohm's law that is responsible for electron unmagnetization in the EDR, and the other is a modification of the original model to include the inertia and compressibility effects. Benchmark tests using data from fully kinetic simulations show that our new method is applicable to both antiparallel and guide-field (component) reconnection, and the electron velocity field can be better reconstructed by including the inertia effects. The new EMHD reconstruction technique has been applied to an EDR of magnetotail reconnection encountered by the Magnetospheric Multiscale spacecraft on 11 July 2017, reported by Torbert et al. (2018), https://doi.org/10.1126/science.aat2998 and reconstructed with the original inertia-less version by Hasegawa et al. (2019), https://doi.org/10.1029/2018ja026051, which demonstrates that the new method better performs in recovering the electric field and electron streamlines than the original version.",2021-11-17 +34962442,"A multilevel perspective on goals, barriers, and facilitators of school-based asthma management.","

Background

School based asthma care is being increasingly used to combat uncontrolled pediatric asthma.

Objective

The purpose of these secondary analyses was to explore multi-level perspectives regarding school-based asthma medical management for inner city, school-aged children with poor asthma control.

Methods

Sixty-six participants from two large U.S. urban school districts and key stakeholders participated in 1:1 interviews and focus groups. Participants were selected from across the asthma care community (children/caregivers, school personnel, nurses, pharmacists, healthcare providers, and administrators/insurers). Qualitative and descriptive techniques were used to analyze data.

Results

Goals: Children/caregivers prioritized living a normal active life with few asthma worries. Other stakeholders prioritized reducing student's asthma related emergency room visits and lost learning time. Facilitators: Continuity of care, strong relationships between care community members, and incentivizers were commonly suggested facilitators. School-based asthma management was viewed as a strong facilitator, particularly in the presence of a full-time school nurse. Barriers: Four themes were identified. (1) Greater systems and policy support for asthma management is needed in general, and at school in particular. (2) Overburdened families and systems often operate in crisis-mode, and asthma management is often not a priority until crisis is reached. (3) Discordance and distrust between members of the asthma care community can hinder shared asthma management. (4) Better communication is needed at all levels to improve care.

Conclusion

Moving away from a crisis-based approach to asthma management for high-risk children will require increased systemic support for proactive asthma care and optimized communication within the asthma care community.Supplemental data for this article is available online at https://dx.doi.org/10.1080/02770903.2021.2018704.",2021-12-28 +34791064,COBREXA.jl: constraint-based reconstruction and exascale analysis. ,"COBREXA.jl is a Julia package for scalable, high-performance constraint-based reconstruction and analysis of very large-scale biological models. Its primary purpose is to facilitate the integration of modern high performance computing environments with the processing and analysis of large-scale metabolic models of challenging complexity. We report the architecture of the package, and demonstrate how the design promotes analysis scalability on several use-cases with multi-organism community models. https://doi.org/10.17881/ZKCR-BT30. Supplementary data are available at Bioinformatics online.",2021-11-16 +33891549,Winning Solutions and Post-Challenge Analyses of the ChaLearn AutoDL Challenge 2019.,"This paper reports the results and post-challenge analyses of ChaLearn's AutoDL challenge series, which helped sorting out a profusion of AutoML solutions for Deep Learning (DL) that had been introduced in a variety of settings, but lacked fair comparisons. All input data modalities (time series, images, videos, text, tabular) were formatted as tensors and all tasks were multi-label classification problems. Code submissions were executed on hidden tasks, with limited time and computational resources, pushing solutions that get results quickly. In this setting, DL methods dominated, though popular Neural Architecture Search (NAS) was impractical. Solutions relied on fine-tuned pre-trained networks, with architectures matching data modality. Post-challenge tests did not reveal improvements beyond the imposed time limit. While no component is particularly original or novel, a high level modular organization emerged featuring a ""meta-learner"", ""data ingestor"", ""model selector"", ""model/learner"", and ""evaluator"". This modularity enabled ablation studies, which revealed the importance of (off-platform) meta-learning, ensembling, and efficient data management. Experiments on heterogeneous module combinations further confirm the (local) optimality of the winning solutions. Our challenge legacy includes an ever-lasting benchmark (http://autodl.chalearn.org), the open-sourced code of the winners, and a free ""AutoDL self-service.""",2021-08-04 +34871982,Developing and validating natural language processing algorithms for radiology reports compared to ICD-10 codes for identifying venous thromboembolism in hospitalized medical patients.,"

Background

Identifying venous thromboembolism (VTE) from large clinical and administrative databases is important for research and quality improvement.

Objective

To develop and validate natural language processing (NLP) algorithms to identify VTE from radiology reports among general internal medicine (GIM) inpatients.

Methods

This cross-sectional study included GIM hospitalizations between April 1, 2010 and March 31, 2017 at 5 hospitals in Toronto, Ontario, Canada. We developed NLP algorithms to identify pulmonary embolism (PE) and deep venous thrombosis (DVT) from radiologist reports of thoracic computed tomography (CT), extremity compression ultrasound (US), and nuclear ventilation-perfusion (VQ) scans in a training dataset of 1551 hospitalizations. We compared the accuracy of our NLP algorithms, the previously-published ""simpleNLP"" tool, and administrative discharge diagnosis codes (ICD-10-CA) for PE and DVT to the ""gold standard"" manual review in a separate random sample of 4000 GIM hospitalizations.

Results

Our NLP algorithms were highly accurate for identifying DVT from US, with sensitivity 0.94, positive predictive value (PPV) 0.90, and Area Under the Receiver-Operating-Characteristic Curve (AUC) 0.96; and in identifying PE from CT, with sensitivity 0.91, PPV 0.89, and AUC 0.96. Administrative diagnosis codes and the simple NLP tool were less accurate for DVT (ICD-10-CA sensitivity 0.63, PPV 0.43, AUC 0.81; simpleNLP sensitivity 0.41, PPV 0.36, AUC 0.66) and PE (ICD-10-CA sensitivity 0.83, PPV 0.70, AUC 0.91; simpleNLP sensitivity 0.89, PPV 0.62, AUC 0.92).

Conclusions

Administrative diagnosis codes are unreliable in identifying VTE in hospitalized patients. We developed highly accurate NLP algorithms to identify VTE from radiology reports in a multicentre sample and have made the algorithms freely available to the academic community with a user-friendly tool (https://lks-chart.github.io/CHARTextract-docs/08-downloads/rulesets.html#venous-thromboembolism-vte-rulesets).",2021-11-27 +30407583,FusionGDB: fusion gene annotation DataBase.,"Gene fusion is one of the hallmarks of cancer genome via chromosomal rearrangement initiated by DNA double-strand breakage. To date, many fusion genes (FGs) have been established as important biomarkers and therapeutic targets in multiple cancer types. To better understand the function of FGs in cancer types and to promote the discovery of clinically relevant FGs, we built FusionGDB (Fusion Gene annotation DataBase) available at https://ccsm.uth.edu/FusionGDB. We collected 48 117 FGs across pan-cancer from three representative fusion gene resources: the improved database of chimeric transcripts and RNA-seq data (ChiTaRS 3.1), an integrative resource for cancer-associated transcript fusions (TumorFusions), and The Cancer Genome Atlas (TCGA) fusions by Gao et al. For these ∼48K FGs, we performed functional annotations including gene assessment across pan-cancer fusion genes, open reading frame (ORF) assignment, and retention search of 39 protein features based on gene structures of multiple isoforms with different breakpoints. We also provided the fusion transcript and amino acid sequences according to multiple breakpoints and transcript isoforms. Our analyses identified 331, 303 and 667 in-frame FGs with retaining kinase, DNA-binding, and epigenetic factor domains, respectively, as well as 976 FGs lost protein-protein interaction. FusionGDB provides six categories of annotations: FusionGeneSummary, FusionProtFeature, FusionGeneSequence, FusionGenePPI, RelatedDrug and RelatedDisease.",2019-01-01 +31681953,Mabellini: a genome-wide database for understanding the structural proteome and evaluating prospective antimicrobial targets of the emerging pathogen Mycobacterium abscessus. ,"Mycobacterium abscessus, a rapid growing, multidrug resistant, nontuberculous mycobacteria, can cause a wide range of opportunistic infections, particularly in immunocompromised individuals. M. abscessus has emerged as a growing threat to patients with cystic fibrosis, where it causes accelerated inflammatory lung damage, is difficult and sometimes impossible to treat and can prevent safe transplantation. There is therefore an urgent unmet need to develop new therapeutic strategies. The elucidation of the M. abscessus genome in 2009 opened a wide range of research possibilities in the field of drug discovery that can be more effectively exploited upon the characterization of the structural proteome. Where there are no experimental structures, we have used the available amino acid sequences to create 3D models of the majority of the remaining proteins that constitute the M. abscessus proteome (3394 proteins and over 13 000 models) using a range of up-to-date computational tools, many developed by our own group. The models are freely available for download in an on-line database, together with quality data and functional annotation. Furthermore, we have developed an intuitive and user-friendly web interface (http://www.mabellinidb.science) that enables easy browsing, querying and retrieval of the proteins of interest. We believe that this resource will be of use in evaluating the prospective targets for design of antimicrobial agents and will serve as a cornerstone to support the development of new molecules to treat M. abscessus infections.",2019-01-01 +33113081,Differentially modulated proteins associated with Leishmaniasis-a systematic review of in-vivo and in-vitro studies.,"High-throughput proteomic technologies are widely used for understanding the disease mechanism, drug-resistant mechanism, and to identify drug targets and markers for diagnostics. Studies with proteomics applications, relating to Leishmaniasis, are being constantly reported in the literature. However, from such studies, a readily accessible knowledge of differentially modulated proteins associated with Leishmaniasis is lacking. Hence, we performed a systematic review concerning differentially modulated proteins (DMP) in Leishmania as well as host infected with Leishmania from the published articles between the years 2000 and 2019. This review is classified into five different sections, namely, DMP in the host after Leishmania infection, DMP between different strains of Leishmania, DMP in drug-resistant Leishmania, DMP in Leishmania under stress, and DMP in different life stages of Leishmania. A lot of consensuses could be observed among the DMP in drug-resistant and stressed Leishmania. In addition to the review, a database was constructed with the data collected in this study (protein accession ID, protein name, gene name, host organism, experimental conditions, fold change, and regulatory data). A total of 2635 records are available in the database. We believe this review and the database will help the researcher in understanding the disease better and provide information for the targeted proteomics study related to Leishmaniasis. Database availability: http://ldepdb.biomedinformri.com/ .",2020-10-28 +30247620,The Comparative Toxicogenomics Database: update 2019.,"The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is a premier public resource for literature-based, manually curated associations between chemicals, gene products, phenotypes, diseases, and environmental exposures. In this biennial update, we present our new chemical-phenotype module that codes chemical-induced effects on phenotypes, curated using controlled vocabularies for chemicals, phenotypes, taxa, and anatomical descriptors; this module provides unique opportunities to explore cellular and system-level phenotypes of the pre-disease state and allows users to construct predictive adverse outcome pathways (linking chemical-gene molecular initiating events with phenotypic key events, diseases, and population-level health outcomes). We also report a 46% increase in CTD manually curated content, which when integrated with other datasets yields more than 38 million toxicogenomic relationships. We describe new querying and display features for our enhanced chemical-exposure science module, providing greater scope of content and utility. As well, we discuss an updated MEDIC disease vocabulary with over 1700 new terms and accession identifiers. To accommodate these increases in data content and functionality, CTD has upgraded its computational infrastructure. These updates continue to improve CTD and help inform new testable hypotheses about the etiology and mechanisms underlying environmentally influenced diseases.",2019-01-01 +33950258,LipidSuite: interactive web server for lipidomics differential and enrichment analysis.,"Advances in mass spectrometry enabled high throughput profiling of lipids but differential analysis and biological interpretation of lipidomics datasets remains challenging. To overcome this barrier, we present LipidSuite, an end-to-end differential lipidomics data analysis server. LipidSuite offers a step-by-step workflow for preprocessing, exploration, differential analysis and enrichment analysis of untargeted and targeted lipidomics. Three lipidomics data formats are accepted for upload: mwTab file from Metabolomics Workbench, Skyline CSV Export, and a numerical matrix. Experimental variables to be used in analysis are uploaded in a separate file. Conventional lipid names are automatically parsed to enable lipid class and chain length analyses. Users can interactively explore data, choose subsets based on sample types or lipid classes or characteristics, and conduct univariate, multivariate and unsupervised analyses. For complex experimental designs and clinical cohorts, LipidSuite offers confounding variables adjustment. Finally, data tables and plots can be both interactively viewed or downloaded for publication or reports. Overall, we anticipate this free, user-friendly webserver to facilitate differential lipidomics data analysis and re-analysis, and fully harness biological interpretation from lipidomics datasets. LipidSuite is freely available at http://suite.lipidr.org.",2021-07-01 +,Nutritional composition of processed baby foods targeted at infants from 0–12 months,"In the last decade, there has been an increasing demand for ready-to-eat infant meals in Spain. However, the used food composition databases do not include processed products intended for infants under 12 months of age. Thus, we aimed at the creation of a food composition table including these products. Nutritional composition data were collected from the label of 568 products including infant formula, cereal products and complementary foods (mixed puree or snacks and desserts); additionally, breast-milk (11 preterm and term samples) nutritional composition were estimated from scientific publications available to date. Information was compiled into a table available in “open access” at the website http://ucc.uniovi.es/formacioncientifica/recursosinv including information about 53 dietary components of infant formula, 28 for breast milk, 30 for cereal products and 14 for complementary foods. The infant formula ́s composition from 0 to 6 months were found similar to the values reported for mature breast milk (10–12 weeks) but different from breast milk from 1 to 4 weeks. One serving of cereals has been found to satisfy more than the 50% of the DRI for protein and carbohydrates. This information constitutes an essential step in order to understand the diet-health relationships in the early stages of life in the Spanish population.",2019-06-01 +29069413,WormBase 2017: molting into a new stage.,"WormBase (http://www.wormbase.org) is an important knowledge resource for biomedical researchers worldwide. To accommodate the ever increasing amount and complexity of research data, WormBase continues to advance its practices on data acquisition, curation and retrieval to most effectively deliver comprehensive knowledge about Caenorhabditis elegans, and genomic information about other nematodes and parasitic flatworms. Recent notable enhancements include user-directed submission of data, such as micropublication; genomic data curation and presentation, including additional genomes and JBrowse, respectively; new query tools, such as SimpleMine, Gene Enrichment Analysis; new data displays, such as the Person Lineage browser and the Summary of Ontology-based Annotations. Anticipating more rapid data growth ahead, WormBase continues the process of migrating to a cutting-edge database technology to achieve better stability, scalability, reproducibility and a faster response time. To better serve the broader research community, WormBase, with five other Model Organism Databases and The Gene Ontology project, have begun to collaborate formally as the Alliance of Genome Resources.",2018-01-01 +30329070,RaftProt V2: understanding membrane microdomain function through lipid raft proteomes.,"Cellular membranes feature dynamic submicrometer-scale lateral domains termed lipid rafts, membrane rafts or glycosphingolipid-enriched microdomains (GEM). Numerous proteomics studies have been conducted on the lipid raft proteome, however, interpretation of individual studies is limited by potential undefined contaminant proteins. To enable integrated analyses, we previously developed RaftProt (http://lipid-raft-database.di.uq.edu.au/), a searchable database of mammalian lipid raft-associated proteins. Despite being a highly used resource, further developments in annotation and utilities were required. Here, we present RaftProt V2 (http://raftprot.org), an improved update of RaftProt. Besides the addition of new datasets and re-mapping of all entries to both UniProt and UniRef IDs, we have implemented a stringent annotation based on experimental evidence level to assist in identification of possible contaminant proteins. RaftProt V2 allows for simultaneous search of multiple proteins/experiments at the cell/tissue type and UniRef/Gene level, where correlations, interactions or overlaps can be investigated. The web-interface has been completely re-designed to enable interactive data and subset selection, correlation analysis and network visualization. Overall, RaftProt aims to advance our understanding of lipid raft function through integrative analysis of datasets collected from diverse tissue and conditions. Database URL: http://raftprot.org.",2019-01-01 +29186576,AutDB: a platform to decode the genetic architecture of autism.,"AutDB is a deeply annotated resource for exploring the impact of genetic variations associated with autism spectrum disorders (ASD). First released in 2007, AutDB has evolved into a multi-modular resource of diverse types of genetic and functional evidence related to ASD. Current modules include: Human Gene, which annotates all ASD-linked genes and their variants; Animal Model, which catalogs behavioral, anatomical and physiological data from rodent models of ASD; Protein Interaction (PIN), which builds interactomes from direct relationships of protein products of ASD genes; and Copy Number Variant (CNV), which catalogs deletions and duplications of chromosomal loci identified in ASD. A multilevel data-integration strategy is utilized to connect the ASD genes to the components of the other modules. All information in this resource is manually curated by expert scientists from primary scientific publications and is referenced to source articles. AutDB is actively maintained with a rigorous quarterly data release schedule. As of June 2017, AutDB contains detailed annotations for 910 genes, 2197 CNV loci, 1060 rodent models and 38 296 PINs. With its widespread use by the research community, AutDB serves as a reference resource for analysis of large datasets, accelerating ASD research and potentially leading to targeted drug treatments. AutDB is available at http://autism.mindspec.org/autdb/Welcome.do.",2018-01-01 +28968784,MicroScope-an integrated resource for community expertise of gene functions and comparative analysis of microbial genomic and metabolic data.,"The overwhelming list of new bacterial genomes becoming available on a daily basis makes accurate genome annotation an essential step that ultimately determines the relevance of thousands of genomes stored in public databanks. The MicroScope platform (http://www.genoscope.cns.fr/agc/microscope) is an integrative resource that supports systematic and efficient revision of microbial genome annotation, data management and comparative analysis. Starting from the results of our syntactic, functional and relational annotation pipelines, MicroScope provides an integrated environment for the expert annotation and comparative analysis of prokaryotic genomes. It combines tools and graphical interfaces to analyze genomes and to perform the manual curation of gene function in a comparative genomics and metabolic context. In this article, we describe the free-of-charge MicroScope services for the annotation and analysis of microbial (meta)genomes, transcriptomic and re-sequencing data. Then, the functionalities of the platform are presented in a way providing practical guidance and help to the nonspecialists in bioinformatics. Newly integrated analysis tools (i.e. prediction of virulence and resistance genes in bacterial genomes) and original method recently developed (the pan-genome graph representation) are also described. Integrated environments such as MicroScope clearly contribute, through the user community, to help maintaining accurate resources.",2019-07-01 +31942161,"Harvestmen occurrence database (Arachnida, Opiliones) of the Museu Paraense Emílio Goeldi, Brazil.","

Background

We present a dataset with information from the Opiliones collection of the Museu Paraense Emílio Goeldi, Northern Brazil. This collection currently has 6,400 specimens distributed in 13 families, 30 genera and 32 species and holotypes of four species: Imeri ajuba Coronato-Ribeiro, Pinto-da-Rocha & Rheims, 2013, Phareicranaus patauateua Pinto-da-Rocha & Bonaldo, 2011, Protimesius trocaraincola Pinto-da-Rocha, 1997 and Sickesia tremembe Pinto-da-Rocha & Carvalho, 2009. The material of the collection is exclusive from Brazil, mostly from the Amazon Region. The dataset is now available for public consultation on the Sistema de Informação sobre a Biodiversidade Brasileira (SiBBr) (https://ipt.sibbr.gov.br/goeldi/resource?r=museuparaenseemiliogoeldi-collection-aracnologiaopiliones). SiBBr is the Brazilian Biodiversity Information System, an initiative of the government and the Brazilian node of the Global Biodiversity Information Facility (GBIF), which aims to consolidate and make primary biodiversity data available on a platform (Dias et al. 2017).

New information

Harvestmen or Opiliones constitute the third largest arachnid order, with approximately 6,500 described species. Brazil is the holder of the greatest diversity in the world, with more than 1,000 described species, 95% (960 species) of which are endemic to the country. Of these, 32 species were identified and deposited in the collection of the Museu Paraense Emílio Goeldi.",2019-12-31 +33170789,COVIDGR Dataset and COVID-SDNet Methodology for Predicting COVID-19 Based on Chest X-Ray Images.,"Currently, Coronavirus disease (COVID-19), one of the most infectious diseases in the 21st century, is diagnosed using RT-PCR testing, CT scans and/or Chest X-Ray (CXR) images. CT (Computed Tomography) scanners and RT-PCR testing are not available in most medical centers and hence in many cases CXR images become the most time/cost effective tool for assisting clinicians in making decisions. Deep learning neural networks have a great potential for building COVID-19 triage systems and detecting COVID-19 patients, especially patients with low severity. Unfortunately, current databases do not allow building such systems as they are highly heterogeneous and biased towards severe cases. This article is three-fold: (i) we demystify the high sensitivities achieved by most recent COVID-19 classification models, (ii) under a close collaboration with Hospital Universitario Clínico San Cecilio, Granada, Spain, we built COVIDGR-1.0, a homogeneous and balanced database that includes all levels of severity, from normal with Positive RT-PCR, Mild, Moderate to Severe. COVIDGR-1.0 contains 426 positive and 426 negative PA (PosteroAnterior) CXR views and (iii) we propose COVID Smart Data based Network (COVID-SDNet) methodology for improving the generalization capacity of COVID-classification models. Our approach reaches good and stable results with an accuracy of [Formula: see text], [Formula: see text], [Formula: see text] in severe, moderate and mild COVID-19 severity levels. Our approach could help in the early detection of COVID-19. COVIDGR-1.0 along with the severity level labels are available to the scientific community through this link https://dasci.es/es/transferencia/open-data/covidgr/.",2020-12-04 +34004070,A Multiplex One-Step RT-qPCR Protocol to Detect SARS-CoV-2 in NP/OP Swabs and Saliva.,"Since December 2019, SARS-CoV-2 has spread extensively throughout the world, with more than 117 million reported cases and 2.6 million deaths (Johns Hopkins coronavirus resource center, https://coronavirus.jhu.edu/map.html). Detecting the virus is the first step in diagnosing the infection, followed by quarantine to prevent transmission. Nasopharyngeal/oropharyngeal swabs (NP/OP) and saliva are two specimen types that are most often analyzed to detect SARS-CoV-2 by molecular tests that detect viral RNA or by antigen/antibody tests that detect viral proteins and/or the host immune response against the virus. Compared to antigen/antibody tests, molecular tests are highly sensitive and specific for detecting the virus. A significant drawback is that specimen collection requirements are specific to each test and cannot be interchanged with another test. Some tests are qualified to be used on NP swabs or saliva, but not both specimen types. Even with NP swabs, a test may be qualified to detect the virus only with swabs collected in viral transport medium (VTM) but not in other media. These restrictive pre-analytic steps are disadvantageous in that a lab would have to develop and validate different tests for SARS-CoV-2 depending on the specimen type and collection media, with added setup cost, infrastructure, and training requirements. To overcome these problems, we developed and validated a cost-effective multiplex reverse-transcription real-time PCR assay that can be used to detect SARS-CoV-2 in different specimen types. The assay is highly sensitive and specific, can be used to detect the virus in saliva as well as NP swabs collected in different media such as VTM, saline, and commercial preservative fluid, and serves as one test for all applications. The protocol also describes an optimal laboratory setup and unidirectional workflow for detecting SARS-CoV-2 by RT-qPCR. © 2021 The Authors. Current Protocols published by Wiley Periodicals LLC. Basic Protocol 1: Manual viral nucleic acid extraction from NP/OP swabs collected in different media, and from saliva Alternate Protocol 1: Low-throughput automated extraction on the Qiagen EZ1 Advanced XL machine (1-14 samples) Alternate Protocol 2: High-throughput automated extraction on the Kingfisher Flex machine (1-96 samples) Basic Protocol 2: Multiplex RT-qPCR protocol to detect SARS-CoV-2 Alternate Protocol 3: Multiplex one-step RT-qPCR protocol to detect SARS-CoV-2 with S and E gene probes labeled with the same fluorochrome.",2021-05-01 +31899510,A genome alignment of 120 mammals highlights ultraconserved element variability and placenta-associated enhancers. ,"Multiple alignments of mammalian genomes have been the basis of many comparative genomic studies aiming at annotating genes, detecting regions under evolutionary constraint, and studying genome evolution. A key factor that affects the power of comparative analyses is the number of species included in a genome alignment. To utilize the increased number of sequenced genomes and to provide an accessible resource for genomic studies, we generated a mammalian genome alignment comprising 120 species. We used this alignment and the CESAR method to provide protein-coding gene annotations for 119 non-human mammals. Furthermore, we illustrate the utility of this alignment by 2 exemplary analyses. First, we quantified how variable ultraconserved elements (UCEs) are among placental mammals. Leveraging the high taxonomic coverage in our alignment, we estimate that UCEs contain on average 4.7%-15.6% variable alignment columns. Furthermore, we show that the center regions of UCEs are generally most constrained. Second, we identified enhancer sequences that are only conserved in placental mammals. We found that these enhancers are significantly associated with placenta-related genes, suggesting that some of these enhancers may be involved in the evolution of placental mammal-specific aspects of the placenta. The 120-mammal alignment and all other data are available for analysis and visualization in a genome browser at https://genome-public.pks.mpg.de/and for download at https://bds.mpi-cbg.de/hillerlab/120MammalAlignment/.",2020-01-01 +33850871,Development and validation of a prediction model for lung adenocarcinoma based on RNA-binding protein.,"

Background

RNA-binding proteins (RBPs) have been found to participate in the development and progression of cancer. This present study aimed to construct a RBP-based prognostic prediction model for lung adenocarcinoma (LUAD).

Methods

RNA sequencing data and corresponding clinical information were acquired from The Cancer Genome Atlas (TCGA) and served as a training set. The prediction model was validated using the dataset in Gene Expression Omnibus (GEO) databases. Univariate and multivariate Cox regression analyses were conducted to identify the RBPs associated with survival. R software (http://www.r-project.org) was used for analysis in this study.

Results

Nine hub prognostic RBPs (CIRBP, DARS2, DDX24, GAPDH, LARP6, SNRPE, WDR3, ZC3H12C, ZC3H12D) were identified by univariate Cox regression analysis and multivariate Cox regression analysis. Using a risk score based on the nine-hub RBP model, we separated the LUAD patients into a low-risk group and a high-risk group. The outcomes revealed that patients in the high-risk group had poorer survival than those in the low-risk group. This signature was validated in the GEO database. Further study revealed that the risk score can be an independent prognostic biomarker for LUAD. A nomogram based on the nine hub RBPs was built to quantitatively predict the prognosis of LUAD patients.

Conclusions

Our nine-gene signature model could be used as a marker to predict the prognosis of LUAD and has potential for use in treatment individualization.",2021-03-01 +34927668,"TMBleR, a bioinformatic tool to optimize TMB estimation and predictive power. ","Tumor mutational burden (TMB) has been proposed as a predictive biomarker for immunotherapy response in cancer patients, as it is thought to enrich for tumors with high neoantigen load. TMB assessed by Whole Exome Sequencing (WES) is considered the gold standard but remains confined to research settings. In the clinical setting, targeted gene panels sampling various genomic sizes along with diverse strategies to estimate TMB were proposed and no real standard has emerged yet. We provide the community with TMBleR, a tool to measure the clinical impact of various strategies of panel-based TMB measurement. R package and docker container (GPL-3 Open Source license): https://acc-bioinfo.github.io/TMBleR/. Graphical-user interface website: https://bioserver.ieo.it/shiny/app/tmbler. Supplementary data are available at Bioinformatics online.",2021-12-20 +31553576,"The EFI Web Resource for Genomic Enzymology Tools: Leveraging Protein, Genome, and Metagenome Databases to Discover Novel Enzymes and Metabolic Pathways.","The assignment of functions to uncharacterized proteins discovered in genome projects requires easily accessible tools and computational resources for large-scale, user-friendly leveraging of the protein, genome, and metagenome databases by experimentalists. This article describes the web resource developed by the Enzyme Function Initiative (EFI; accessed at https://efi.igb.illinois.edu/ ) that provides ""genomic enzymology"" tools (""web tools"") for (1) generating sequence similarity networks (SSNs) for protein families (EFI-EST); (2) analyzing and visualizing genome context of the proteins in clusters in SSNs (in genome neighborhood networks, GNNs, and genome neighborhood diagrams, GNDs) (EFI-GNT); and (3) prioritizing uncharacterized SSN clusters for functional assignment based on metagenome abundance (chemically guided functional profiling, CGFP) (EFI-CGFP). The SSNs generated by EFI-EST are used as the input for EFI-GNT and EFI-CGFP, enabling easy transfer of information among the tools. The networks are visualized and analyzed using Cytoscape, a widely used desktop application; GNDs and CGFP heatmaps summarizing metagenome abundance are viewed within the tools. We provide a detailed example of the integrated use of the tools with an analysis of glycyl radical enzyme superfamily (IPR004184) found in the human gut microbiome. This analysis demonstrates that (1) SwissProt annotations are not always correct, (2) large-scale genome context analyses allow the prediction of novel metabolic pathways, and (3) metagenome abundance can be used to identify/prioritize uncharacterized proteins for functional investigation.",2019-10-04 +30587128,Plant stress RNA-seq Nexus: a stress-specific transcriptome database in plant cells.,"

Background

Abiotic and biotic stresses severely affect the growth and reproduction of plants and crops. Determining the critical molecular mechanisms and cellular processes in response to stresses will provide biological insight for addressing both climate change and food crises. RNA sequencing (RNA-Seq) is a revolutionary tool that has been used extensively in plant stress research. However, no existing large-scale RNA-Seq database has been designed to provide information on the stress-specific differentially expressed transcripts that occur across diverse plant species and various stresses.

Results

We have constructed a comprehensive database, the plant stress RNA-Seq nexus (PSRN), which includes 12 plant species, 26 plant-stress RNA-Seq datasets, and 937 samples. All samples are assigned to 133 stress-specific subsets, which are constructed into 254 subset pairs, a comparison between selected two subsets, for stress-specific differentially expressed transcript identification.

Conclusions

PSRN is an open resource for intuitive data exploration, providing expression profiles of coding-transcript/lncRNA and identifying which transcripts are differentially expressed between different stress-specific subsets, in order to support researchers generating new biological insights and hypotheses in molecular breeding or evolution. PSRN is freely available at http://syslab5.nchu.edu.tw/PSRN .",2018-12-27 +34817117,Detection of novel paramyxoviruses in Chaerephon bat species in Nigeria and phylogenetics of paramyxoviruses co-evolution with bats in Africa.,"Bat paramyxoviruses (PmV) are a diverse group of viruses and include zoonotic viruses such as henipaviruses. Members of this group in other continents have been associated with severe respiratory and neurological infections in animals and humans. Furthermore, despite the richness of diverse bat species that can transmit this virus in African countries like Nigeria, there is very scanty information as to the presence and co-evolution of paramyxoviruses in bats. There is a need for continuous surveillance of zoonotic viruses and their biological reservoirs as this will help in the prevention and management of pathogens' spillovers. This study detected novel paramyxoviruses in Chaerephon nigeriae bat species found in Badagry, Lagos. Phylogenetic analyses of paramyxovirus sequences' co-evolution with frugivorous and insectivorous bats circulating in African countries were also performed using sequences of African origin available in the Database of Bat-Associated Viruses (DBatVir: http://www.mgc.ac.cn/DBatVir/). Oral swabs (n = 18) and blood samples (n = 32) were collected from C. nigeriae bats in Badagry, Lagos. The L gene of bat paramyxovirus was detected in all oral swabs using PCR techniques. Six of the amplicons were successfully sequenced. Estimated phylogenies placed the sequences in close relationship with those isolated from insectivorous bats. Phylogenetic analyses of previously sequenced isolates in the African region showed the likelihood of different co-evolution mechanisms of paramyxoviruses with frugivorous bats compared with insectivorous bats. This may be due to codon usage bias of the L gene. Spatial distribution of paramyxoviruses in African countries showed limited ongoing surveillance of this virus in the continent, especially in southern and northern countries. Extensive surveillance of paramyxoviruses with possible zoonotic potentials among bat species in the continent is recommended. This will provide further insights into co-evolution as well as prevent possible spillover into the human population.",2021-11-24 +32184364,FORENSIC: an Online Platform for Fecal Source Identification. ,"Sewage overflows, agricultural runoff, and stormwater discharges introduce fecal pollution into surface waters. Distinguishing these sources is critical for evaluating water quality and formulating remediation strategies. With the falling costs of sequencing, microbial community-based water quality assessment tools are under development. However, their application is limited by the need to build reference libraries, which requires extensive sampling of sources and bioinformatic expertise. Here, we introduce FORest Enteric Source IdentifiCation (FORENSIC; https://forensic.sfs.uwm.edu/), an online, library-independent source tracking platform based on random forest classification and 16S rRNA gene amplicon sequences to identify in environmental samples common fecal contamination sources, including humans, domestic pets, and agricultural animals. FORENSIC relies on a broad reference signature database of Bacteroidales and Clostridiales, two predominant bacterial groups that have coevolved with their hosts. As a result, these groups demonstrate cohesive and reliable assemblage patterns within mammalian species or among species sharing the same diet/physiology. We created a scalable and extensible platform that we tested for global applicability using samples collected in distant geographic locations. This Web application offers a fast and intuitive approach for fecal source identification, particularly in sewage-contaminated waters.IMPORTANCE FORENSIC is an online platform to identify sources of fecal pollution without the need to create reference libraries. FORENSIC is based on the ability of random forest classification to extract cohesive source microbial signatures to create classifiers despite individual variability and to detect the signatures in environmental samples. We primarily focused on defining sewage signals, which are associated with a high human health risk in polluted waters. To test for fecal contamination sources, the platform only requires paired-end reads targeting the V4 or V6 regions of the 16S rRNA gene. We demonstrated that we could use V4V5 reads trimmed to the V4 positions to generate the reference signature. The systematic workflow we describe to create and validate the signatures could be applied to many disciplines. With the increasing gap between advancing technology and practical applications, this platform makes sequence-based water quality assessments accessible to the public health and water resource communities.",2020-03-17 +32669379,In Silico Genotyping of Escherichia coli Isolates for Extraintestinal Virulence Genes by Use of Whole-Genome Sequencing Data. ,"Extraintestinal pathogenic Escherichia coli (ExPEC) is the leading cause in humans of urinary tract infection and bacteremia. The previously published web tool VirulenceFinder (http://cge.cbs.dtu.dk/services/VirulenceFinder/) uses whole-genome sequencing (WGS) data for in silico characterization of E. coli isolates and enables researchers and clinical health personnel to quickly extract and interpret virulence-relevant information from WGS data. In this study, 38 ExPEC-associated virulence genes were added to the existing E. coli VirulenceFinder database. In total, 14,441 alleles were downloaded. A total of 1,890 distinct alleles were added to the database after removal of redundant sequences and analysis of the remaining alleles for open reading frames (ORFs). The database now contains 139 genes-of which 44 are related to ExPEC-and 2,826 corresponding alleles. Construction of the database included validation against 27 primer pairs from previous studies, a search for serotype-specific P fimbriae papA alleles, and a BLASTn confirmation of seven genes (etsC, iucC, kpsE, neuC, sitA, tcpC, and terC) not covered by the primers. The augmented database was evaluated using (i) a panel of nine control strains and (ii) 288 human-source E. coli strains classified by PCR as ExPEC and non-ExPEC. We observed very high concordance (average, 93.4%) between PCR and WGS findings, but WGS identified more alleles. In conclusion, the addition of 38 ExPEC-associated genes and the associated alleles to the E. coli VirulenceFinder database allows for a more complete characterization of E. coli isolates based on WGS data, which has become increasingly important considering the plasticity of the E. coli genome.",2020-09-22 +34313401,A mathematical dashboard for the analysis of Italian COVID-19 epidemic data.,"An analysis of the COVID-19 epidemic is proposed on the basis of the epiMOX dashboard (publicly accessible at https://www.epimox.polimi.it) that deals with data of the epidemic trends and outbreaks in Italy from late February 2020. Our analysis provides an immediate appreciation of the past epidemic development, together with its current trends by fostering a deeper interpretation of available data through several critical epidemic indicators. In addition, we complement the epiMOX dashboard with a predictive tool based on an epidemiological compartmental model, named SUIHTER, for the forecast on the near future epidemic evolution.",2021-08-08 +34434652,IdentPMP: identification of moonlighting proteins in plants using sequence-based learning models.,"

Background

A moonlighting protein refers to a protein that can perform two or more functions. Since the current moonlighting protein prediction tools mainly focus on the proteins in animals and microorganisms, and there are differences in the cells and proteins between animals and plants, these may cause the existing tools to predict plant moonlighting proteins inaccurately. Hence, the availability of a benchmark data set and a prediction tool specific for plant moonlighting protein are necessary.

Methods

This study used some protein feature classes from the data set constructed in house to develop a web-based prediction tool. In the beginning, we built a data set about plant protein and reduced redundant sequences. We then performed feature selection, feature normalization and feature dimensionality reduction on the training data. Next, machine learning methods for preliminary modeling were used to select feature classes that performed best in plant moonlighting protein prediction. This selected feature was incorporated into the final plant protein prediction tool. After that, we compared five machine learning methods and used grid searching to optimize parameters, and the most suitable method was chosen as the final model.

Results

The prediction results indicated that the eXtreme Gradient Boosting (XGBoost) performed best, which was used as the algorithm to construct the prediction tool, called IdentPMP (Identification of Plant Moonlighting Proteins). The results of the independent test set shows that the area under the precision-recall curve (AUPRC) and the area under the receiver operating characteristic curve (AUC) of IdentPMP is 0.43 and 0.68, which are 19.44% (0.43 vs. 0.36) and 13.33% (0.68 vs. 0.60) higher than state-of-the-art non-plant specific methods, respectively. This further demonstrated that a benchmark data set and a plant-specific prediction tool was required for plant moonlighting protein studies. Finally, we implemented the tool into a web version, and users can use it freely through the URL: http://identpmp.aielab.net/.",2021-08-06 +34160596,LSTM-PHV: prediction of human-virus protein-protein interactions by LSTM with word2vec. ,"Viral infection involves a large number of protein-protein interactions (PPIs) between human and virus. The PPIs range from the initial binding of viral coat proteins to host membrane receptors to the hijacking of host transcription machinery. However, few interspecies PPIs have been identified, because experimental methods including mass spectrometry are time-consuming and expensive, and molecular dynamic simulation is limited only to the proteins whose 3D structures are solved. Sequence-based machine learning methods are expected to overcome these problems. We have first developed the LSTM model with word2vec to predict PPIs between human and virus, named LSTM-PHV, by using amino acid sequences alone. The LSTM-PHV effectively learnt the training data with a highly imbalanced ratio of positive to negative samples and achieved AUCs of 0.976 and 0.973 and accuracies of 0.984 and 0.985 on the training and independent datasets, respectively. In predicting PPIs between human and unknown or new virus, the LSTM-PHV learned greatly outperformed the existing state-of-the-art PPI predictors. Interestingly, learning of only sequence contexts as words is sufficient for PPI prediction. Use of uniform manifold approximation and projection demonstrated that the LSTM-PHV clearly distinguished the positive PPI samples from the negative ones. We presented the LSTM-PHV online web server and support data that are freely available at http://kurata35.bio.kyutech.ac.jp/LSTM-PHV.",2021-11-01 +,First Report of Grapevine (Vitis sp.) Cluster Blight Caused by Fusarium proliferatum in Russia,"Viticulture is an important economic sector in Russia, with 91,500 ha producing 580,000 tons in 2017 (according to Statistics Service of the Russian Federation, https://gks.ru/enterprise_economy). The monitoring of table “Euro-American” hybrid varieties Citron, Moldova, Augustin, and Ubiley Novocherkasska was conducted in a 50-ha vineyard in Krasnodar region in 2017. Blight of the peduncles and rachis was first observed during the flowering and later on berries of nearly 25 to 30% of each hybrid variety. The infected parts of the rachis were washed for 2 min under running water followed by 2% NaClO for 30 s and rinsed with sterilized water. Small pieces were separated and placed on potato dextrose agar (PDA). Colonies with abundant aerial mycelium were observed in 5 days. A hyphal tip was taken, put on PDA, and incubated for 7 to 10 days. The colonies of mycelium were originally white and then pink to pinkish-purple. Violet pigments diffused into the agar after 7 days of incubation at 25°C. Five single cultures were consistently isolated. Based on cultural and conidial morphology, isolates were identified as Fusarium proliferatum (Matsushima) Nirenberg (Leslie and Summerell 2006). On average through five isolates, macroconidia were slender, thin-walled, three to five septate, 20.7 to 45.5 × 2.8 to 6.1 μm (n = 50), with curved apical cells and poorly developed basal cells. Microconidia were thin-walled, hyaline, club-shaped, 4.5 to 10.8 × 1.5 to 3.2 μm (n = 50), and formed in chains and in false heads from monophialides and polyphialides. Chlamydospores were absent. For molecular identification, the internal transcribed spacer region (ITS), beta-tubulin (β-tub), translation elongation factor 1α (EF1α), and RNA polymerase II genes (RPB2) of one isolate were amplified, sequenced using primers ITS1/ITS4 (White et al. 1990), T1/T22 (O’Donnell and Cigelnik 1997), EF1f/EF2r, and 5f2/7cr and 7cf/11ar (O’Donnell et al. 2010) and deposited at GenBank (MK598060, MK598059, MK598058, and MK598061, respectively). BLAST analysis showed that the ITS, β-tub, EF1α, and RPB2 sequences had 100% identity to F. proliferatum (MK158221.1, LT841257.1, MH178093.1, and LT841252.1, respectively). In addition, the sequences of TUB and RPB2 showed 99.8 and 99.44% identities to the sequences of the Fusarium fujikuroi Nirenberg species complex (FD_01776 and FD_03678) in the Fusarium-ID database (Geiser et al. 2004). EF1α and ITS showed 99.84 and 97% identities to the sequences of F. proliferatum and F. concolor, respectively (FD_01389 and FD_01847). Multilocus sequence typing (MLST) using four genes in Fusarium MLST database showed 99.74% identity with F. fujikuroi (NRRL 13308). The pathogenicity tests of five isolates were conducted on clusters and young green shoots of the most common in Russia table hybrid varieties Moldova (late ripening red grape) and Augustin (early ripening white grape) in a single experiment. Fifteen shoots of each hybrid variety were inoculated with 50 μl of conidial suspension (1 × 106 conidia/ml) of each isolate and were maintained at 25°C and 70% relative humidity. Tissue necrosis of inoculated shoots and blight of clusters were observed in 4 days after inoculation. In 2 weeks, complete tissue blight was observed, similar to that observed in the field, whereas the control shoots treated with water remained asymptomatic. The pathogen was reisolated from the inoculated shoots, thus completing Koch’s postulates. Previously, F. proliferatum was identified as the pathogen causing fruit rot on grapevine in Pakistan and China (Ghuffar et al. 2018; Wang et al. 2015). Here we describe different symptoms caused by F. proliferatum on grapevine. The results of this study have great importance for improving phytosanitary monitoring and integrated disease control of vineyards, because F. proliferatum can seriously limit the production of table grapes in the Krasnodar region. To our knowledge, this is the first report of grapevine cluster blight caused by F. proliferatum in Russia.",2020-03-01 +30268942,MiPanda: A Resource for Analyzing and Visualizing Next-Generation Sequencing Transcriptomics Data.,"The Michigan Portal for the Analysis of NGS data portal (http://mipanda.org) is an open-access online resource that provides the scientific community with access to the results of a large-scale computational analysis of thousands of high-throughput RNA sequencing (RNA-seq) samples. The portal provides access to gene expression profiles, enabling users to interrogate expression of genes across myriad normal and cancer tissues and cell lines. From these data, tissue- and cancer-specific expression patterns can be identified. Gene-gene coexpression profiles can also be interrogated. The current portal contains data for over 20,000 RNA-seq samples and will be continually updated.",2018-09-27 +34809621,Stabilization of UCA1 by N6-methyladenosine RNA methylation modification promotes colorectal cancer progression.,"

Background

UCA1 is frequently upregulated in a variety of cancers, including CRC, and it can play an oncogenic role by various mechanisms. However, how UCA1 is regulated in cancer is largely unknown. In this study, we aimed to determine whether RNA methylation at N6-methyladenosine (m6A) can impact UCA1 expression in colorectal cancer (CRC).

Methods

qRT-PCR was performed to detect the level of UCA1 and IGF2BP2 in CRC samples. CRISPR/Cas9 was employed to knockout (KO) UCA1, METTL3 and WTAP in DLD-1 and HCT-116 cells, while rescue experiments were carried out to re-express METTL3 and WTAP in KO cells. Immunoprecipitation using m6A antibody was performed to determine the m6A modification of UCA1. In vivo pulldown assays using S1m tagging combined with site-direct mutagenesis was carried out to confirm the recognition of m6A-modified UCA1 by IGF2BP2. Cell viability was measured by MTT and colony formation assays. The expression of UCA1 and IGF2BP2 in TCGA CRC database was obtained from GEPIA ( http://gepia.cancer-pku.cn ).

Results

Our results revealed that IGF2BP2 serves as a reader for m6A modified UCA1 and that adenosine at 1038 of UCA1 is critical to the recognition by IGF2BP2. Importantly, we showed that m6A writers, METTL3 and WTAP positively regulate UCA1 expression. Mechanically, IGF2BP2 increases the stability of m6A-modified UCA1. Clinically, IGF2BP2 is upregulated in CRC tissues compared with normal tissues.

Conclusion

These results suggest that m6A modification is an important factor contributing to upregulation of UCA1 in CRC tissues.",2021-11-22 +28585374,SVMDLF: A novel R-based Web application for prediction of dipeptidyl peptidase 4 inhibitors.,"Dipeptidyl peptidase 4 (DPP4) is a well-known target for the antidiabetic drugs. However, currently available DPP4 inhibitor screening assays are costly and labor-intensive. It is important to create a robust in silico method to predict the activity of DPP4 inhibitor for the new lead finding. Here, we introduce an R-based Web application SVMDLF (SVM-based DPP4 Lead Finder) to predict the inhibitor of DPP4, based on support vector machine (SVM) model, predictions of which are confirmed by in vitro biological evaluation. The best model generated by MACCS structure fingerprint gave the Matthews correlation coefficient of 0.87 for the test set and 0.883 for the external test set. We screened Maybridge database consisting approximately 53,000 compounds. For further bioactivity assay, six compounds were shortlisted, and of six hits, three compounds showed significant DPP4 inhibitory activities with IC50 values ranging from 8.01 to 10.73 μm. This application is an OpenCPU server app which is a novel single-page R-based Web application for the DPP4 inhibitor prediction. The SVMDLF is freely available and open to all users at http://svmdlf.net/ocpu/library/dlfsvm/www/ and http://www.cdri.res.in/svmdlf/.",2017-07-11 +29036719,ChannelsDB: database of biomacromolecular tunnels and pores.,"ChannelsDB (http://ncbr.muni.cz/ChannelsDB) is a database providing information about the positions, geometry and physicochemical properties of channels (pores and tunnels) found within biomacromolecular structures deposited in the Protein Data Bank. Channels were deposited from two sources; from literature using manual deposition and from a software tool automatically detecting tunnels leading to the enzymatic active sites and selected cofactors, and transmembrane pores. The database stores information about geometrical features (e.g. length and radius profile along a channel) and physicochemical properties involving polarity, hydrophobicity, hydropathy, charge and mutability. The stored data are interlinked with available UniProt annotation data mapping known mutation effects to channel-lining residues. All structures with channels are displayed in a clear interactive manner, further facilitating data manipulation and interpretation. As such, ChannelsDB provides an invaluable resource for research related to deciphering the biological function of biomacromolecular channels.",2018-01-01 +33416864,SoluProt: Prediction of Soluble Protein Expression in Escherichia coli. ,"Poor protein solubility hinders the production of many therapeutic and industrially useful proteins. Experimental efforts to increase solubility are plagued by low success rates and often reduce biological activity. Computational prediction of protein expressibility and solubility in Escherichia coli using only sequence information could reduce the cost of experimental studies by enabling prioritisation of highly soluble proteins. A new tool for sequence-based prediction of soluble protein expression in Escherichia coli, SoluProt, was created using the gradient boosting machine technique with the TargetTrack database as a training set. When evaluated against a balanced independent test set derived from the NESG database, SoluProt's accuracy of 58.5% and AUC of 0.62 exceeded those of a suite of alternative solubility prediction tools. There is also evidence that it could significantly increase the success rate of experimental protein studies. SoluProt is freely available as a standalone program and a user-friendly webserver at https://loschmidt.chemi.muni.cz/soluprot/. https://loschmidt.chemi.muni.cz/soluprot/. Supplementary data are available at Bioinformatics online.",2021-01-08 +33686532,Discovery of potential biomarkers in acute kidney injury by ultra-high-performance liquid chromatography-tandem quadrupole time-of-flight mass spectrometry (UPLC-Q/TOF-MS).,"

Objective

The LC-MS/MS-based non-targeted metabolomics method was used to differentially screen serum and urine metabolites of acute kidney injury (AKI) patients and healthy people, to explore potential biomarkers of AKI and analyze related pathways, and explain the potential mechanism and biological significance of AKI.

Methods

The serum and urine samples from 30 AKI patients and 20 healthy people were selected to conduct a non-targeted metabolomics study by ultra-high-performance liquid chromatography-tandem quadrupole time-of-flight mass spectrometry (UPLC-Q/TOF-MS). The differential metabolites between the two groups were searched by the human metabolome (HMDB) database ( https://hmdb.ca/ ) and the related pathways of these potential biomarkers were identified by searching the Kyoto encyclopedia of genes and genomes (KEGG) database ( https://www.kegg.jp/ ). The total metabolic pathways were analyzed by the MS Peaks to Pathways module of MetaboAnalyst ( https://www.metaboanalyst.ca/ ).

Results

Multivariate data analysis found that serum and urine metabolism in AKI patients was significantly different from healthy people. We found three metabolites in urine (2-S-glutathionyl glutathione acetate, 5-L-Glutamyl-taurine, and L-Phosphoarginine) contributing to the separation of AKI patients from healthy people, and major metabolic pathways associated with these potential biomarkers including cytochrome P450 metabolism, arginine, and proline metabolism.

Conclusion

2-S-glutathionyl glutathione acetate, 5-L-Glutamyl-taurine, and L-Phosphoarginine were associated with AKI patients, which could be selected as potential biomarkers to predicate AKI disease.",2021-03-08 +34705568,Accurate Quantification of Overlapping Herpesvirus Transcripts from RNA Sequencing Data.,"Herpesviruses employ extensive bidirectional transcription of overlapping genes to overcome length constraints on their gene product repertoire. As a consequence, many lytic transcripts cannot be measured individually by reverse transcription-quantitative PCR (RT-qPCR) or conventional RNA sequencing (RNA-seq) analysis. A. G. Bruce, S. Barcy, T. DiMaio, E. Gan, et al. (Pathogens 6:11, 2017, https://doi.org/10.3390/pathogens6010011) have proposed an approximation method using unique coding sequences (UCDS) to estimate lytic gene abundance from Kaposi's sarcoma-associated herpesvirus (KSHV) RNA-seq data. Although UCDS has been widely employed, its accuracy, to our knowledge, has never been rigorously validated for any herpesvirus. In this study, we use cap analysis of gene expression sequencing (CAGE-seq) as a gold-standard to determine the accuracy of UCDS for estimating Epstein-Barr virus (EBV) lytic gene expression levels from RNA-seq data. We also introduce the Unique TranScript (UTS) method, which, like UCDS, estimates transcript abundance from changes in mean RNA-seq read depth. UTS is distinguished by its use of empirically determined 5' and 3' transcript ends rather than coding sequence annotations. Compared to conventional read assignment, both UCDS and UTS improved the accuracy of quantitation of overlapping genes, with UTS giving the most-accurate results. The UTS method discards fewer reads and may be advantageous for experiments with less sequencing depth. UTS is compatible with any aligner and, unlike isoform-aware alignment methods, can be implemented on a laptop computer. Our findings demonstrate that the accuracy achieved by complex and expensive techniques such as CAGE-seq can be approximated using conventional short-read RNA-seq data when read assignment methods address transcript overlap. Although our study focuses on EBV transcription, the UTS method should be applicable across all herpesviruses as well as to other genomes with extensively overlapping transcriptomes. IMPORTANCE Many viruses employ extensively overlapping transcript structures. This complexity makes it difficult to quantify gene expression by using conventional methods, including RNA-seq. Although high-throughput techniques that overcome these limitations exist, they are complex, expensive, and scarce in the herpesvirus literature relative to short-read RNA-seq. Here, using Epstein-Barr virus (EBV) as a model, we demonstrate that conventional RNA-seq analysis methods fail to accurately quantify the abundances of many overlapping transcripts. We further show that the previously described Unique CoDing Sequence (UCDS) method and our Unique TranScript (UTS) method greatly improve the accuracy of EBV lytic gene measurements obtained from RNA-seq data. The UTS method has the advantages of discarding fewer reads and being implementable on a laptop computer. Although this study focuses on EBV, the UCDS and UTS methods should be applicable across herpesviruses and for other viruses that make extensive use of overlapping transcription.",2021-10-27 +36569172,Automatic cough detection from realistic audio recordings using C-BiLSTM with boundary regression.,"Automatic cough detection in the patients' realistic audio recordings is of great significance to diagnose and monitor respiratory diseases, such as COVID-19. Many detection methods have been developed so far, but they are still unable to meet the practical requirements. In this paper, we present a deep convolutional bidirectional long short-term memory (C-BiLSTM) model with boundary regression for cough detection, where cough and non-cough parts need to be classified and located. We added convolutional layers before the LSTM to enhance the cough features and preserve the temporal information of the audio data. Considering the importance of the cough event integrity for subsequent analysis, the novel model includes an embedded boundary regression on the last feature map for both higher detection accuracy and more accurate boundaries. We delicately designed, collected and labelled a realistic audio dataset containing recordings of patients with respiratory diseases, named the Corp Dataset. 168 h of recordings with 9969 coughs from 42 different patients are included. The dataset is published online on the MARI Lab website (https://mari.tongji.edu.cn/info/1012/1030.htm). The results show that the system achieves a sensitivity of 84.13%, a specificity of 99.82% and an intersection-over-union (IoU) of 0.89, which is significantly superior to other related models. With the proposed method, all the criteria on cough detection significantly increased. The open source Corp Dataset provides useful material and a benchmark for researchers investigating cough detection. We propose the state-of-the-art system with boundary regression, laying the foundation for identifying cough sounds in real-world audio data.",2021-11-11 +34809957,Extensive growth and growth boundary model for non-proteolytic Clostridium botulinum - Evaluation and validation with MAP and smoked foods.,"The growth inhibiting effect of lactic acid bacteria (LAB) on non-proteolytic Clostridium botulinum was studied. LAB had no significant effect on growth of C. botulinum and their effect was not included in the model to be evaluated. An available cardinal parameter growth and growth boundary model for non-proteolytic C. botulinum (Koukou et al., 2021; https://doi.org/10.1016/j.ijfoodmicro.2021.109162) was evaluated using a total of 822 time-to-toxin (TTT) formation data extracted from the scientific literature for seafood, poultry, vegetables and meat products. These data included smoked products and food stored in air, vacuum or modified atmosphere packaging (MAP) with added CO2. The available extensive model predicted TTT formation without bias (Bf-TTT value = 0.99) and with a reasonable accuracy (Af-TTT value = 1.76). The model was successfully validated for seafood and poultry products. This study substantially increased the range of applicability of the available growth and growth boundary model for non-proteolytic C. botulinum. The performed evaluation showed this model can be used to predict environmental conditions to prevent growth in seafood and poultry products including smoked fish and MAP foods. It is expected that this validated model will contribute to product development and innovation including new sodium reduced foods.",2021-10-30 +34214174,Rank-in: enabling integrative analysis across microarray and RNA-seq for cancer.,"Though transcriptomics technologies evolve rapidly in the past decades, integrative analysis of mixed data between microarray and RNA-seq remains challenging due to the inherent variability difference between them. Here, Rank-In was proposed to correct the nonbiological effects across the two technologies, enabling freely blended data for consolidated analysis. Rank-In was rigorously validated via the public cell and tissue samples tested by both technologies. On the two reference samples of the SEQC project, Rank-In not only perfectly classified the 44 profiles but also achieved the best accuracy of 0.9 on predicting TaqMan-validated DEGs. More importantly, on 327 Glioblastoma (GBM) profiles and 248, 523 heterogeneous colon cancer profiles respectively, only Rank-In can successfully discriminate every single cancer profile from normal controls, while the others cannot. Further on different sizes of mixed seq-array GBM profiles, Rank-In can robustly reproduce a median range of DEG overlapping from 0.74 to 0.83 among top genes, whereas the others never exceed 0.72. Being the first effective method enabling mixed data of cross-technology analysis, Rank-In welcomes hybrid of array and seq profiles for integrative study on large/small, paired/unpaired and balanced/imbalanced samples, opening possibility to reduce sampling space of clinical cancer patients. Rank-In can be accessed at http://www.badd-cao.net/rank-in/index.html.",2021-09-01 +34395533,SHAPER: A Web Server for Fast and Accurate SHAPE Reactivity Prediction.,"Selective 2'-hydroxyl acylation analyzed by primer extension (SHAPE) chemical probing serves as a convenient and efficient experiment technique for providing information about RNA local flexibility. The local structural information contained in SHAPE reactivity data can be used as constraints in 2D/3D structure predictions. Here, we present SHAPE predictoR (SHAPER), a web server for fast and accurate SHAPE reactivity prediction. The main purpose of the SHAPER web server is to provide a portal that uses experimental SHAPE data to refine 2D/3D RNA structure selection. Input structures for the SHAPER server can be obtained through experimental or computational modeling. The SHAPER server can accept RNA structures with single or multiple conformations, and the predicted SHAPE profile and correlation with experimental SHAPE data (if provided) for each conformation can be freely downloaded through the web portal. The SHAPER web server is available at http://rna.physics.missouri.edu/shaper/.",2021-07-28 +32672454,Bioactivity Profile Similarities to Expand the Repertoire of COVID-19 Drugs.,"Until a vaccine becomes available, the current repertoire of drugs is our only therapeutic asset to fight the SARS-CoV-2 outbreak. Indeed, emergency clinical trials have been launched to assess the effectiveness of many marketed drugs, tackling the decrease of viral load through several mechanisms. Here, we present an online resource, based on small-molecule bioactivity signatures and natural language processing, to expand the portfolio of compounds with potential to treat COVID-19. By comparing the set of drugs reported to be potentially active against SARS-CoV-2 to a universe of 1 million bioactive molecules, we identify compounds that display analogous chemical and functional features to the current COVID-19 candidates. Searches can be filtered by level of evidence and mechanism of action, and results can be restricted to drug molecules or include the much broader space of bioactive compounds. Moreover, we allow users to contribute COVID-19 drug candidates, which are automatically incorporated to the pipeline once per day. The computational platform, as well as the source code, is available at https://sbnb.irbbarcelona.org/covid19.",2020-07-16 +34853151,Spatially Resolved Transcriptomic Analysis of Acute Kidney Injury in a Female Murine Model.,"

Background

Single-cell sequencing technologies have advanced our understanding of kidney biology and disease, but the loss of spatial information in these datasets hinders our interpretation of intercellular communication networks and regional gene expression patterns. New spatial transcriptomic sequencing platforms make it possible to measure the topography of gene expression at genome depth.

Methods

We optimized and validated a female bilateral ischemia-reperfusion injury model. Using the 10× Genomics Visium Spatial Gene Expression solution, we generated spatial maps of gene expression across the injury and repair time course, and applied two open-source computational tools, Giotto and SPOTlight, to increase resolution and measure cell-cell interaction dynamics.

Results

An ischemia time of 34 minutes in a female murine model resulted in comparable injury to 22 minutes for males. We report a total of 16,856 unique genes mapped across our injury and repair time course. Giotto, a computational toolbox for spatial data analysis, enabled increased resolution mapping of genes and cell types. Using a seeded nonnegative matrix regression (SPOTlight) to deconvolute the dynamic landscape of cell-cell interactions, we found that injured proximal tubule cells were characterized by increasing macrophage and lymphocyte interactions even 6 weeks after injury, potentially reflecting the AKI to CKD transition.

Conclusions

In this transcriptomic atlas, we defined region-specific and injury-induced loss of differentiation markers and their re-expression during repair, as well as region-specific injury and repair transcriptional responses. Lastly, we created an interactive data visualization application for the scientific community to explore these results (http://humphreyslab.com/SingleCell/).",2021-12-01 +33641184,DbStRiPs: Database of structural repeats in proteins.,"Recent interest in repeat proteins has arisen due to stable structural folds, high evolutionary conservation and repertoire of functions provided by these proteins. However, repeat proteins are poorly characterized because of high sequence variation between repeating units and structure-based identification and classification of repeats is desirable. Using a robust network-based pipeline, manual curation and Kajava's structure-based classification schema, we have developed a database of tandem structural repeats, Database of Structural Repeats in Proteins (DbStRiPs). A unique feature of this database is that available knowledge on sequence repeat families is incorporated by mapping Pfam classification scheme onto structural classification. Integration of sequence and structure-based classifications help in identifying different functional groups within the same structural subclass, leading to refinement in the annotation of repeat proteins. Analysis of complete Protein Data Bank revealed 16,472 repeat annotations in 15,141 protein chains, one previously uncharacterized novel protein repeat family (PRF), named left-handed beta helix, and 33 protein repeat clusters (PRCs). Based on their unique structural motif, ~79% of these repeat proteins are classified in one of the 14 PRFs or 33 PRCs, and the remaining are grouped as unclassified repeat proteins. Each repeat protein is provided with a detailed annotation in DbStRiPs that includes start and end boundaries of repeating units, copy number, secondary and tertiary structure view, repeat class/subclass, disease association, MSA of repeating units and cross-references to various protein pattern databases, human protein atlas and interaction resources. DbStRiPs provides easy search and download options to high-quality annotations of structural repeat proteins (URL: http://bioinf.iiit.ac.in/dbstrips/).",2021-03-06 +32665542,Exploring the SARS-CoV-2 virus-host-drug interactome for drug repurposing.,"Coronavirus Disease-2019 (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Various studies exist about the molecular mechanisms of viral infection. However, such information is spread across many publications and it is very time-consuming to integrate, and exploit. We develop CoVex, an interactive online platform for SARS-CoV-2 host interactome exploration and drug (target) identification. CoVex integrates virus-human protein interactions, human protein-protein interactions, and drug-target interactions. It allows visual exploration of the virus-host interactome and implements systems medicine algorithms for network-based prediction of drug candidates. Thus, CoVex is a resource to understand molecular mechanisms of pathogenicity and to prioritize candidate therapeutics. We investigate recent hypotheses on a systems biology level to explore mechanistic virus life cycle drivers, and to extract drug repurposing candidates. CoVex renders COVID-19 drug research systems-medicine-ready by giving the scientific community direct access to network medicine algorithms. It is available at https://exbio.wzw.tum.de/covex/.",2020-07-14 +32024464,ECFS-DEA: an ensemble classifier-based feature selection for differential expression analysis on expression profiles.,"BACKGROUND:Various methods for differential expression analysis have been widely used to identify features which best distinguish between different categories of samples. Multiple hypothesis testing may leave out explanatory features, each of which may be composed of individually insignificant variables. Multivariate hypothesis testing holds a non-mainstream position, considering the large computation overhead of large-scale matrix operation. Random forest provides a classification strategy for calculation of variable importance. However, it may be unsuitable for different distributions of samples. RESULTS:Based on the thought of using an ensemble classifier, we develop a feature selection tool for differential expression analysis on expression profiles (i.e., ECFS-DEA for short). Considering the differences in sample distribution, a graphical user interface is designed to allow the selection of different base classifiers. Inspired by random forest, a common measure which is applicable to any base classifier is proposed for calculation of variable importance. After an interactive selection of a feature on sorted individual variables, a projection heatmap is presented using k-means clustering. ROC curve is also provided, both of which can intuitively demonstrate the effectiveness of the selected feature. CONCLUSIONS:Feature selection through ensemble classifiers helps to select important variables and thus is applicable for different sample distributions. Experiments on simulation and realistic data demonstrate the effectiveness of ECFS-DEA for differential expression analysis on expression profiles. The software is available at http://bio-nefu.com/resource/ecfs-dea.",2020-02-05 +32436344,Update of the CLRP TG-43 parameter database for low-energy brachytherapy sources.,"

Purpose

To update the Carleton Laboratory for Radiotherapy Physics (CLRP) TG-43 dosimetry database for low-energy (≤50 keV) photon-emitting low-dose rate (LDR) brachytherapy sources utilizing the open-source EGSnrc application egs_brachy rather than the BrachyDose application used previously for 27 LDR sources in the 2008 CLRP version (CLRPv1). CLRPv2 covers 40 sources ( 103 Pd, 125 I, and 131 Cs). A comprehensive set of TG-43 parameters is calculated, including dose-rate constants, radial dose functions with functional fitting parameters, 1D and 2D anisotropy functions, along-away dose-rate tables, Primary-Scatter separation dose tables (for some sources), and mean photon energies at the surface of the sources. The database also documents the source models which will become part of the egs_brachy distribution.

Acquisition and validation methods

Datasets are calculated after a systematic recoding of the source geometries using the egs++ geometry package and its egs_brachy extensions. Air-kerma strength per history is calculated for models of NIST's Wide-Angle Free-Air chamber (WAFAC) and for a point detector located at 10 cm on the source's transverse axis. Full scatter water phantoms with varying voxel resolutions in cylindrical coordinates are used for dose calculations. New statistical uncertainties of source volume corrections for phantom voxels which overlap with brachytherapy sources are implemented in egs_brachy, and all CLRPv2 data include these uncertainties. For validation, data are compared to CLRPv1 and other data in the literature.

Data format and access

Data are available at https://physics.carleton.ca/clrp/egs_brachy/seed_database_v2, http://doi.org/10.22215/clrp/tg43v2. As well as being presented graphically in comparisons to previous calculations, data are available in Excel (.xlsx) spreadsheets for each source.

Potential applications

The database has applications in research, dosimetry, and brachytherapy treatment planning. This comprehensive update provides the medical physics community with more accurate TG-43 dose evaluation parameters, as well as fully benchmarked and described source models which are distributed with egs_brachy.",2020-07-13 +32920969,How to use the MEROPS database and website to help understand peptidase specificity.,"The MEROPS website (https://www.ebi.ac.uk/merops) and database was established in 1996 to present the classification and nomenclature of proteolytic enzymes. This was expanded to include a classification of protein inhibitors of proteolytic enzymes in 2004. Each peptidase or inhibitor is assigned to a distinct identifier, based on its biochemical and biological properties, and homologous sequences are assembled into a family. Families in which the proteins share similar tertiary structures are assembled into a clan. The MEROPS classification is thus a hierarchy with at least three levels (protein-species, family, and clan) showing the evolutionary relationship. Several other data collections have been assembled, which are accessed from all levels in the hierarchy. These include, sequence homologs, selective bibliographies, substrate cleavage sites, peptidase-inhibitor interactions, alignments, and phylogenetic trees. The substrate cleavage collection has been assembled from the literature and includes physiological, pathological, and nonphysiological cleavages in proteins, peptides, and synthetic substrates. In this article, we make recommendations about how best to analyze these data and show analyses to indicate peptidase binding site preferences and exclusions. We also identify peptidases where co-operative binding occurs between adjacent binding sites.",2020-10-03 +34424732,IMFLer: A Web Application for Interactive Metabolic Flux Analysis and Visualization.,"Increasing genome-wide data in biological sciences and medicine has contributed to the development of a variety of visualization tools. Several automatic, semiautomatic, and manual visualization tools have already been developed. Some even have integrated flux balance analysis (FBA), but in most cases, it depends on separately installed third party software that is proprietary and does not allow customization of its functionality and has many restrictions for easy data distribution and analysis. In this study, we present an interactive metabolic flux analyzer and visualizer (IMFLer)-a static single-page web application that enables the reading and management of metabolic model layout maps, as well as immediate visualization of results from both FBA and flux variability analysis (FVA). IMFLer uses the Escher Builder tool to load, show, edit, and save metabolic pathway maps. This makes IMFLer an attractive and easily applicable tool with a user-friendly interface. Moreover, it allows to faster interpret results from FBA and FVA and improves data interoperability by using a standardized file format for the genome-scale metabolic model. IMFLer is a fully open-source tool that enables the rapid visualization and interpretation of the results of FBA and FVA with no time setup and no programming skills required, available at https://lv-csbg.github.io/IMFLer/.",2021-08-23 +,QISS: An Open Source Image Similarity Search Engine,"Qwant Image Similarity Search (QISS) is a multi-lingual image similarity search engine based on a dual path neural networks that embed texts and images into a common feature space where they are easily comparable. Our demonstrator, available at http://research.qwant.com/images, allows real-time searches in a database of approximately 100 million images.",2020-03-24 +34042972,cfDNApipe: A comprehensive quality control and analysis pipeline for cell-free DNA high-throughput sequencing data.,"

Motivation

Cell-free DNA (cfDNA) is gaining substantial attention from both biological and clinical fields as a promising marker for liquid biopsy. Many aspects of disease-related features have been discovered from cfDNA high-throughput sequencing (HTS) data. However, there is still a lack of integrative and systematic tools for cfDNA HTS data analysis and quality control (QC).

Results

Here, we propose cfDNApipe, an easy-to-use and systematic python package for cfDNA whole-genome sequencing (WGS) and whole-genome bisulfite sequencing (WGBS) data analysis. It covers the entire analysis pipeline for the cfDNA data, including raw sequencing data processing, QC and sophisticated statistical analysis such as detecting copy number variations (CNVs), differentially methylated regions (DMRs) and DNA fragment size alterations. cfDNApipe provides one-command-line-execution pipelines and flexible application programming interfaces for customized analysis.

Availability

https://xwanglabthu.github.io/cfDNApipe/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-27 +34879210,"Insecure Attachment and Sexual Satisfaction: A Path Analysis Model Integrating Sexual Mindfulness, Sexual Anxiety, and Sexual Self-Esteem.","This study examines the intermediary role of three variables (sexual mindfulness, sexual anxiety, and sexual self-esteem) in a path analysis model to explain the association between insecure attachment and sexual satisfaction. A community sample of 543 adults completed an online survey. Results supported our hypothesized integrative model, which explained 44.1% of the variance in sexual satisfaction and presented satisfactory fit indices. This study suggests that the association between attachment insecurities and lower sexual satisfaction could be partially explained by a proximal association with lower sexual mindfulness, which emphasizes the relevance of examining sexual mindfulness during the screening and treatment of people presenting low sexual satisfaction.Supplemental data for this article is available online at https://doi.org/10.1080/0092623X.2021.2011808 .",2021-12-08 +,Poster Session A,"A.1 Dynamic proteomic profiling of the Salmonella-host interplay reveals new modes of action for known and novel virulence factors Jennifer Geddes-McAlister1, Stefanie Vogt2, Jennifer Rowland2, Sarah Woodward2, Arjun Sukumaran1, Lilianne Gee1, Baerbel Raupach3, Brett Finlay2, Felix Meissner4 1University of Guelph, Guelph, Canada, 2University of British Columbia, Vancouver, Canada, 3Max Planck Institute for Infectious Biology, Berlin, Germany, 4Max Planck Institute of Biochemistry, Martinsried, Germany Intracellular bacterial pathogens cause a diverse array of diseases in humans and represent a significant threat to global health. These pathogens have evolved sophisticated strategies including the secretion of virulence factors to interfere with host cell functions and to perturb immune responses. However, interplay between the host and pathogen at the protein level in the context of infection has not been systematically investigated. Our 'infectome' analysis aims to identify previously undescribed proteins involved in bacterial virulence and host immune defense, representing an opportunity to elucidate molecular mechanisms of host-pathogen interplay during disease. Here, we investigate the host-pathogen interplay between the pathogenic bacteria, Salmonella enterica serovar Typhimurium, and primary macrophages. We performed quantitative proteomics of the host cells infected with wild-type (SL1344) or the type 3 secretion system (T3SS) mutant strains (Dspi-1 and Dspi-2) in single runs using high resolution mass spectrometry on a Quadrupole Orbitrap instrument. Our results provide a comprehensive and dynamic view of both pathogen and host proteins during infection. In the host cells, we observed the upregulation of proinflammatory and lysosomal proteins, representing host defense mechanisms to initiate immune responses and combat bacterial invasion. For S. Typhimurium, integration of proteome and infectome data identified eight proteins not encoded on SPI-1 or SPI-2 as being co-regulated with known virulence factors, suggesting a co-functional role in virulence and infection. Additionally, murine model competitive index assays revealed virulence-associated phenotypes of five proteins and defined their roles in bacterial cell regulation, as well as their impact on the host proteome. Overall, we provide an innovative strategy for profiling infection from dual perspectives in a single assay and characterizing novel virulence factors. A.2 Identification of urine-derived diagnostic biomarkers for Tuberculosis Bridget Calder1 1University of Cape Town, South Africa Tuberculosis remains a leading cause of death worldwide, driven in part by the lack of sufficiently decisive diagnostic tools in the clinical setting. In South Africa, the incidence of TB/HIV co-infection is high, and co-infected individuals have particularly bad clinical outcomes. Some of the available diagnostics have a sensitivity as low as 50% in HIV positive individuals, and sputum-based testing is not possible in a high proportion of TB positive patients. An alternative TB diagnostic test should have high sensitivity and specificity in TB/HIV co-infected individuals, and be applicable in a biofluid that is obtained non-invasively. Urine has been proposed as an ideal biofluid for these purposes, and previous studies have found biomarkers for renal or GIT diseases in human urine. Since disseminated or extrapulmonary TB is often found in HIV positive individuals post mortem, we theorised that it should be possible to find a signature for TB in human urine that is either of TB or human origin. To that end, we employed discovery mass spectrometry-based proteomics to survey the urine of individuals who had been classified into four clinical groups: TB+/HIV-, TB+/HIV+, TB-/HIV+, and TB-/HIV-. This is the largest human urinary proteome-based study to date, comprising 120 individuals. Using Random Forest machine learning, TB status could be predicted using only four human proteins with a sensitivity and specificity of 95% and 85%, respectively in a one third hold-out set of the total data. We propose these human-derived biomarkers as a potential diagnostic panel for TB, which warrants further validation in a larger cohort. A.3 Microscaled Proteogenomic Methods for Precision Oncology Shankha Satpathy1, Eric Jaehnig2, Karsten Krug1, Michael Gillette1, Alexander Saltzman2, Kimberly Holloway2, Meenakshi Anurag2, Chen Huang2, Purba Singh2, Beom-Jun Kim2, Goerge Miles2, Noel Namai2, Anna Malovannaya2, DR Mani1, Chuck Perou3, Bing Zhang2, Steven Carr1, Matthew Ellis2 1The Broad Institute of MIT and Harvard, 2Baylor College of Medicine, 3University of North Carolina Cancer proteogenomics combines genomics, transcriptomics and mass spectrometry-based proteomics to gain insights into cancer biology and treatment responsiveness. While proteogenomics analyses have shown great potential to deepen our understanding of cancer tissue complexity and signaling, how a patient's tumor changes upon treatment has largely been the province of genomics. This is due to technical difficulties associated with doing proteogenomic analysis on clinic-derived core-needle biopsies. To address this critical need, we have developed a “microscaled” proteogenomics approach for tumor-rich OCT-embedded core needle biopsies. Tissue-sparing specimen processing (“Biopsy Trifecta EXTraction”, BioTExt) and microscaled proteomics (MiProt) methodologies allowed generation of deep-scale proteogenomics datasets, with copy number and transcript information for >20,000 genes and mass spectrometry-based identification and quantification of nearly all expressed proteins in a tumor (>10,000 proteins) and more than >20,000 phosphosites starting with just 25 micrograms of peptides per sample. In order to understand the capabilities and limitations of our approach relative to conventional deepscale proteomics requiring >10X more starting material, we compared preclinical patient derived xenograft (PDX) models at conventional scale with data obtained by core-needle biopsy of the same tissues. Comparable depth and biological insights were obtained from the cores relative to surgically resected tumors. As a proof-of-concept for implementation in clinical trials, we applied microscaled proteogenomic methods to a small-scale clinical study where biopsies were accrued from patients with ERBB2+ advanced breast cancer before and 48 to 72 hours after the first dose of neoadjuvant Trastuzumab-based chemotherapy. Multi-omics comparisons were conducted between samples associated with residual disease versus samples associated with complete pathological response. Integrative proteogenomic analyses efficiently diagnosed the molecular bases of diverse candidate treatment resistance mechanisms including: 1) absence of ERBB2 amplification (false-ERBB2+); 2) insufficient ERBB2 activity for therapeutic sensitivity despite ERBB2 amplification (pseudo-ERBB2+); 3) resistance features in true-ERBB2+ cases including androgen receptor signaling, mucin expression and an inactive immune microenvironment; 4) lack of acute phospho-ERBB2 down-regulation in non-pCR cases. In summary, we have developed a proteogenomics pipeline well suited for large-scale cancer clinical studies to identify potential resistance mechanism in patients. We conclude that microscaled cancer proteogenomics could improve diagnostic precision in the clinical setting. A.4 Reduced proteasome activity in the aging brain results in ribosome stoichiometry loss and aggregation Joanna M. Kirkpatrick1, Erika K. Sacramento1, Mariateresa Mazzetto1, Simone Di Sanzo1, Cinzia Caterino1, Aleksandar Bartolome1, Michele Sanguanini3, Nikoletta Papaevgeniou4, Maria Lefaki4, Dorothee Childs5, Eva Terzibasi-Tozzini2, Natalie Romanov5, Mario Baumgart1, Wolfgang Huber5, Niki Chondrogianni4, Michele Vendruscolo3, Alessandro Cellerino1,2, Alessandro Ori1 1Leibniz Institute on Aging - Fritz Lipmann Institute (FLI), Jena, Germany, 2Scuola Normale Superiore, Pisa, Italy, 3Centre for Misfolding Diseases, Department of Chemistry, University of Cambridge, Cambridge, UK, 4Institute of Biology, Medicinal Chemistry and Biotechnology, Athens, Greece, 5European Molecular Biology Laboratory, Heidelberg, Germany A progressive loss of protein homeostasis is characteristic of aging and a driver of neurodegeneration. To investigate this process quantitatively, we characterized proteome dynamics during brain aging by using the short-lived vertebrate Nothobranchius furzeri and combining transcriptomics, proteomics and thermal proteome profiling. We found that the correlation between protein and mRNA levels is progressively reduced during aging, and that post-transcriptional mechanisms are responsible for over 40% of these alterations. These changes induce a progressive stoichiometry loss in protein complexes, including ribosomes, which have low thermal stability in brain lysates and whose component proteins are enriched in aggregates found in old brains. Mechanistically, we show that reduced proteasome activity occurs early during brain aging, and is sufficient to induce loss of stoichiometry. Our work thus defines early events in the aging process that can be targeted to prevent loss of protein homeostasis and age-related neurodegeneration. A.5 Affinity Proteomics Reveals Assembly of PPP-type Phosphatase Holoenzyme by PPM1G-B56δ Parveen Kumar1,2, Prajakta Tathe1,2, Subbareddy Maddika1 1Laboratory of Cell Death & Cell Survival, Centre for DNA Fingerprinting and Diagnostics, INDIA, 2Graduate studies, Manipal Academy of Higher Education, Manipal 576104, INDIA Serine/threonine phosphatases form distinct holoenzymes to achieve substrate specificity. PPP serine/threonine phosphatase family members such as PP1 and PP2A are well known to assemble and function as holoenzymes, but none of the PPM family members was so far shown to assemble holoenzymes. Here, we performed a systematic proteomic analysis of human Ser/Thr protein phosphatases associated protein complexes using tandem affinity purification coupled with mass spectrometry (TAP-MS). Our interaction screen revealed an assembly of a holoenzyme by PPM1G, a member of PPM family of serine/threonine phosphatases. We identified that PPM1G interact with a regulatory subunit B56δ to form a distinct holoenzyme complex. B56δ alters the localization of PPM1G to the cytoplasm where PPM1G can act on a discrete set of substrates. Further, we identified α-Catenin, a component of adherens junction, as a novel substrate for PPM1G-B56 phosphatase in the cytoplasm. B56δ-PPM1G dephosphorylates α-Catenin at Serine 641 and prevents aberrant cell migration. Collectively, we identified a new phosphatase holoenzyme with PPM1G-B56δ as integral components, in which the regulatory subunit regulates its cellular localization to target distinct substrates. A.6 Targeted Quantification of Incomplete Prohormone Processing Products in Type 1 Diabetes Yinyin Ye1, Adam C. Swensen1, Lian Yi1, Yuqian Gao1, Emily K. Sims2,3, Tujin Shi1, Carla J. Greenbaum4, Carmella Evans-Molina2,5,6,7,8 1Biological Sciences Division, Pacific Northwest National Laboratory, Richland, WA 99354, 2Department of Pediatrics, Indiana University School of Medicine, Indianapolis, IN 46202, 3Center for Diabetes and Metabolic Diseases, Indiana University School of Medicine, Indianapolis, IN, 4Diabetes Clinical Research Program, Benaroya Research Institute, Seattle, WA 98101, 5Department of Cellular and Integrative Physiology, Indiana University School of Medicine, Indianapol, 6Department of Medicine, Indiana University School of Medicine, Indianapolis, IN 46202, 7Department of Biochemistry and Molecular Biology, Indiana University School of Medicine, Indianapoli, 8Richard L. Roudebush VA Medical Center, Indianapolis, IN 46202 Type 1 diabetes (T1D) is an autoimmune disease marked by the loss of insulin production in pancreatic islet β-cells. In healthy β-cells, proinsulin processing results in the cleavage of the C-peptide fragment thereby forming mature insulin. At diagnosis, C-peptide is usually still detectable at low levels but over time it is lost completely. However, low-level detectable proinsulin secretion was recently shown to be retained years after the initial diagnosis of T1D. This suggests that incomplete proinsulin processing may be associated with T1D etiopathology. Accurate quantitative measurements of proinsulin and C-peptide help us better understand the pathophysiology of T1D. The most common current measurement methods rely on antibody-based affinity assays. These assays may not be sufficiently specific and may fail to capture the subtle variations within proinsulin and C-peptide. To overcome these issues, mass spectrometry (MS) can be utilized to achieve confident detection specificity. In this presentation, we aim to develop a targeted MS method (i.e., liquid chromatography-selected reaction monitoring) in order to characterize incomplete hormone processing in islets and serum from T1D subjects. In addition to our initial classical trypsin-digestion-based proteomics technique, we have included two additional proteases, GluC and AspN, which were optimized using mouse macrophages RAW264.7. These two proteases were selected primarily because their cleavage sites are located outside of the important enzymatic processing regions used by proinsulin processing enzymes (i.e., PC1/3, CPE, etc.). Whether proinsulin is partially processed or not can be identified. Our preliminary results demonstrate that the optimized GluC and AspN digestions achieved digestion specificities of nearly 80% and 50%, respectively. We are further optimizing the method with clinical samples, and will apply the assays to longitudinal T1D samples. The targeted MS method can be easily expanded in future work to characterize other prohormone forms in human islets, such as proglucagon, pro-islet amyloid polypeptide (pro-IAPP), and pro-somatostatin. A.7 Novel methods and reagents for characterization of protein biotinylation sites by peptide-based immunoaffinity enrichment Yiying Zhu1, Matthew D. Fry1, Alissa J. Nelson1, Jian Min Ren1, Vicky Yang1, Michael C. Palazzola1, Charles L. Farnsworth1, Matthew P. Stokes1, Kimberly A. Lee1 1Cell Signaling Technology INC Biotin labeling in combination with LC-MS/MS has been widely applied in large-scale analysis of protein-protein interactions, subcellular localization, and post-translational modifications. Direct characterization of protein biotinylation sites is challenging due to the low recovery of biotinylated peptides using conventional streptavidin-based purification methods. Previous published studies demonstrated that anti-biotin antibodies are superior capture reagents for biotinylated peptides compared to streptavidin. In this study, we establish an immunoaffinity enrichment method using a monoclonal anti-biotin antibody compared to previously published approaches using commercially available polyclonal antibodies. Trypsin digested mouse liver peptides labeled with NHS-biotin and mixed with unlabeled liver peptides were used as the test sample for method optimization and comparison. Enrichment with the rabbit monoclonal anti-biotin antibody yielded more biotinylated peptide identifications than any polyclonal antibody tested. Over 3,400 unique biotinylated peptides were identified using the monoclonal antibody, with other available antibodies ranging from 200 to 3,200 unique peptides. We then used this optimized enrichment method to characterize protein biotinylation sites from APEX proximity labeling in living cells. HEK 293T cells stably expressing fused β2AR and APEX were cultured, incubated with biotin phenol, and treated with the agonist BI167107 for 10 min. Protein biotinylation was activated by adding H2O2. In total 1,354 unique biotinylated peptides from 858 proteins were identified and quantified. Among them, the levels of 148 biotinylated peptides from 125 proteins were responsive to agonist treatment. A.8 Exploring the glycosylation levels of snake venom proteins by mass spectrometry: microheterogeneity determination of sweet spots in toxins of Bothrops snake genus Débora Andrade Silva1, Lívia Rosa-Fernandes2, Giuseppe Palmisano3, Silvia R. Travaglia-Cardoso4, Solange M. Toledo Serrano1, Martin R. Larsen2 1Laboratório Especial de Toxinologia Aplicada, Instituto Butantan, São Paulo, Brazil, 2Centre of Clinical Proteomics, Department of Biochem. and Mol. Biology, SDU, Odense, Denmark, 3Laboratório de GlicoProteômica, Instituto de Ciências Biomédicas, USP, São Paulo, Brazil, 4Museu Biológico, Instituto Butantan, São Paulo, Brazil Differently from proteomic studies based on non-modified peptide identification, the analysis of post-translational modifications (PTMs), such as glycosylation, faces challenges in all analytical steps. The first key step to identify glycopeptides using mass spectrometry involves their enrichment from a complex mixture to overcome the ion suppression and abundance of non-modified peptides. In this study, we used two strategies to enrich and fractionate glycopeptides of Bothrops venom proteins: TiO2 beads and HILIC. It has recently been shown that the N-glycome of Bothrops venoms contains structures belonging to high mannose and hybrid/complex types (with and without sialic acid); therefore, the enrichment step using TiO2 beads allowed the isolation of sialylated glycopeptides, while the non-binding fraction was submitted to HILIC to further capture the remaining glycopeptides. To help in peptide backbone identification, part of each glycopeptide fraction was submitted to enzymatic deglycosylation with PNGase F. Then, the fractions were submitted to LC-MS/MS fragmentation using stepped HCD fragmentation. These enrichment steps proved to be efficient to separate the sialic acid-containing from other glycopeptides, as observed from the different profiles of oxonium ion reporters in their MS2 spectra. This strategy allowed the identification of the primary structures of deglycosylated peptides, which, otherwise, is not straightforwardly obtained from the fragmentation of the intact glycopeptides. To identify the glycosylation profile of Bothrops venoms, we combined information from a database of N- glycans from eight Bothrops venoms together with the peptide sequence identified in the deglycosylated peptide fraction, using the GlycReSoft software. The number of identified deglycosylated peptides (1500–3500 per venom) was significantly higher than that of identified intact glycopeptides (90–230 per venom) confirming the difficulty in assigning intact glycopeptides. Nevertheless, an important aspect of these findings is the view of toxin glycosylation microheterogeneity and profile, especially in snake venom metalloproteinases, which appear to display specific patterns of N-glycan structures on their different structural domains. As a general view, the data illustrate that N-linked glycosylation is not only a common PTM but also an important component of venom phenotype variability. Financial Support: FAPESP (2013/07467-1; 2018/08794-0; 2016/16935-7); the Villum Center for Bioanalytical Sciences at SDU. A.9 Expression of PNPLA3 I148M Variant Alters Lipid Droplet Proteome Mara Monetti1, Jeffrey Culver1, Sharath Sasi1, Liang Xue1, Gregory Tesz1, Collin Crowley1, Trenton Ross1, Thomas Magee1, Melissa Miller1, Bei Zhang1, Kendra Bence1 1Pfizer, Internal Medicine Research Unit, Cambridge, MA, 20139, USA Liver cirrhosis as a consequence of fatty liver disease is a leading cause of liver transplant in the Unites States. A variant (I148M) of the lipid droplet protein patatin-like phospholipase domain-containing protein 3 (PNPLA3) is associated with hepatic steatosis. However, the mechanisms of liver lipid accumulation due to PNPLA3 mutation are unclear. To investigate the biological function of wildtype and variant PNPLA3, we used an unbiased and systematic proteomic approach, analyzing the protein composition of lipid droplets (LD) isolated from livers of mice expressing either variant of human PNPLA3. We isolated lipid droplets by sucrose gradient (20 to 55% sucrose) centrifugation and analyzed their proteome using a label free quantification approach. Using high resolution mass spectrometry-based proteomics, we identified ∼800 proteins in the lipid droplet fraction. Among the most abundant of these proteins were well-known lipid droplet proteins, including perilipins, hormone sensitive lipase and CGI58. Bioinformatics analyses of these datasets show that PNPLA3 accumulates to a higher level on lipid droplets isolated from mice expressing PNPLA3 I148M as compared to those from mice expressing the wild-type protein. Interestingly, some lipid droplet proteins, such as CGI58, increased in the lipid droplet fraction upon expression of I148M, while other lipid droplet proteins, such as Lipe, did not change between the two genotypes, consistent with published observations (Smagris et al, 2015). When challenging murine liver with a Western diet, PNPLA3 I148M induced extensive changes in the lipid droplet protein composition, consistent with data published by the laboratory of Drs. Cohen and Hobbs. Among the striking changes, we detected fewer proteins of the ubiquitin/proteasome system in murine liver lipid droplets from PNPLA3 I148M expressing animals, supporting the hypothesis of differential turnover regulation due to PNPLA3 expression. A.10 Vascular Cell Surface Proteomics In Vivo Dirk M. Walther1, Elizabeth Gordon1, Benjamin Smith1, Thomas O. Cameron1, Ru Wei1, Peter Juhasz1 1Biogen, Cambridge, MA 02142 Therapeutic antibodies have great potential for the treatment of neurological diseases. However, only a small fraction of drug molecules cross from the bloodstream into the central nervous system (CNS) because they are excluded by of the blood brain barrier (BBB). Receptor mediated transport (RMT) is a mechanism which relies on cell surface proteins shuttling between the luminal (blood) and the abluminal (brain) side of endothelial cells to facilitate the transport of cargos across the BBB. Most known RMT receptors, such as Transferrin receptor (TfR1), are expressed in multiple organs, resulting in a rapid target-mediated drug disposition and thus unfavorable pharmacokinetics. This limitation could be overcome if suitable brain-specific RMT receptors were known. To better understand tissue-specific differences in the microvasculature, we developed an in vivo approach to catalogue the luminally accessible proteome of different organs in rats and non-human primates (cynomolgus monkeys). We optimized a chemical cell surface labeling protocol by cardiac perfusion of anesthetized animals (n≥6) using a lysine-reactive reagent with a biotin affinity handle. A vehicle perfused group of animals was included as a control. After tissue collection and lysis, labeled proteins were enriched with streptavidin beads and eluted by reductive linker cleavage under mild conditions. After digestion proteins were quantified by label-free proteomics using a QE HF mass spectrometer. Additionally, proteomes of total lysates were acquired to determine enrichment factors of labeled proteins with regards to total expression levels, resulting in the identification of more than 14,000 proteins in each species. With this workflow, we were able to label the vascular bed in the CNS and all seven peripheral tissues analyzed. The corresponding cell surface proteomes show reproducible tissue-specific expression patterns. In non-human primates, most of the cell surface proteins enriched in the CNS over the periphery were detected consistently and at similar levels in the five different brain regions covered in our study. Among those proteins were most known RMT receptors, including TfR1, insulin receptor, IGF-I receptor, and GLUT-1 In summary, studying vascular cell surface proteomes in vivo provides a valuable starting point for the development of bispecific therapeutic antibodies with improved CNS exposure. A.11 Application of 4C Proteomics and Interactomics in Study of PTM Proteins Involved in Regulation of Arabidopsis Flowering Ning Li1,2 1Division of Life Science, Hong Kong University of Science and Technology, Hong Kong SAR, 2The HKUST Shenzhen Research Institute, Shenzhen, 3Energy Institute, Hong Kong University of Science and Technology, Hong Kong SAR, 4Institute for the Environment, Hong Kong University of Science and Technology, Hong Kong SAR Ethylene and force signals are well-known for their roles in regulation of bolting. The underlying molecular mechanisms are important issues in plant biology. To understand these molecular mechanisms, we have applied proteomics to study the post-translational modification (PTM) of proteins that play an important role in regulation of numerous cellular events, protein-protein interactions and enzymatic activities in Arabidopsis, a model plant organism. A stable isotope labeling-based 4C quantitative PTM proteomics and interactomics were therefore established to investigate the roles of key proteins in both ethylene and force cell signaling. The SILIA- and iTRAQ-based quantitative PTM proteomics have revealed that TREPH1 and ERF110 proteins are phosphorylated in response to mechanical and hormonal signals, respectively. Molecular genetics and cell biology studies confirmed that phosphorylation of the transcriptional factor, ERF110, and the cytoskeleton protein, TREPH1, are required for regulating Arabidopsis flowering.(Funding Supports: 16101819, 16100318, 16103817, 1613615, AOE/M-403/16, 31570187, 31370315, C020406). Author's website for publication: https://life-sci.ust.hk/team/ning-li/. A.12 Characterization of Symptomatic Aortic Valve Stenosis Subtypes by DIA-MS Proteome Profiling Christof Lenz1,2, Lisa Neuenroth2, Soeren Brandenburg3, Stephan Lehnart3,4,5, Henning Urlaub1,2 1Bioanalytical Mass Spectrometry, Max Planck Institute for Biophysical Chemistry, Göttingen, Germany, 2Core Facility Proteomics, University Medical Center Göttingen, Germany, 3Heart Research Center, University Medical Center Göttingen, Germany, 4German Centre for Cardiovascular Research (DZHK), Göttingen, Germany, 5DFG Collaborative Research Center 1002-A09 Introduction: The success of proteomics in precision medicine rests on the availability of experimental protocols that provide sufficient analytical depth, reproducibility, throughput and turnaround for relevant tissue and body fluid samples. Left ventricular (LV) biopsies from patients with aortic valve stenosis (AVS) promise to be a highly valuable source of information to deepen the molecular understanding and, potentially, differential diagnosis of presumed classes and/or pathophysiological stages of heart disease. We have successfully developed and demonstrate a rapid analytical workflow consisting of pressure-cycling tissue lysis and label-free data-independent acquisition mass spectrometry (DIA-MS) that enables the medium throughput analysis of small LV biopsy samples. Materials & Methods: Small volumes (1–3 mm3) typical for bioptic samples of human heart tissue obtained during a clinical study from patients diagnosed with severe AVS were lysed and digested using Pressure Cycling Technology (PCT: Barocycler 2320; Pressure Biosciences), and analyzed by DIA-MS on a hybrid quadrupole/time-of-flight mass spectrometer (TripleTOF 5600+, Sciex). Neutral pH Reversed Phase (nPH-RP) pre-fractionation was used to build a tissue-specific spectral library. Results: nPH-RP separation of digested peptide samples provided a spectral library consisting of 2,951 proteins @ 1% FDR. Data-independent analysis (DIA) by SWATH-MS on a hybrid quadrupole/time-of-flight instrument showed that small LV biopsies could be profiled to a depth of 2,273 protein across 25 samples each representing one idividual patient. Our workflow enabled high reproducibility, a sample throughput of up to 12 samples/day and a turnaround time of 1.5 days. Hence, the use of parallelized pressure cycling technology allows for straightforward upscaling of sample handling. Exploratory statistical analysis shows that different classes of AVS (type I-IV) correlate with their corresponding proteome profiles, opening avenues to explore potential diagnostic biomarkers. A.13 Cell-surface proteomic landscape of developing and mature olfactory projection neurons Jiefu Li1, Shuo Han2, Hongjie Li1, Namrata D. Udeshi3, Tanya Svinkina3, D. R. Mani3, Chuanyun Xu1, Ricardo Guajardo1, Qijing Xie1, Tongchao Li1, Bing Wu1, Anthony Xie1, David J. Luginbuhl1, Pornchai Kaewsapsak2, Stephen R. Quake4, Steven A. Carr3, Alice Y. Ting2, Liqun Luo1 1Department of Biology, Howard Hughes Medical Institute, Stanford University, 2Departments of Genetics, Biology, and Chemistry, Chan Zuckerberg Biohub, Stanford University, 3The Broad Institute of MIT and Harvard, 4Departments of Bioengineering and Applied Physics, Chan Zuckerberg Biohub, Stanford University Intercellular signaling governs the development and physiology of multicellular organisms. Delineating the principles of cell-surface signaling is thus a crucial step to understand the organization and function of any multicellular system, including the intricately wired nervous system. We developed a cell-type-specific, spatiotemporally-resolved, proximity-labeling proteomic approach to profile the cell-surface proteome in intact tissues. Applying it to the Drosophila olfactory circuit, we observed proteome-wide temporal evolution of cell-surface molecules in coordination with the developmental timeline. Multi-omic analysis revealed a broad impact of post-transcriptional regulation on the dynamics of cell-surface proteins, especially the ones playing central roles in neural development and synaptic transmission. Unbiased genetic screen of developmentally enriched cell-surface proteins identified many new molecules required for wiring specificity, the majority of which belong to unexpected molecular families. A.14 Breast cancer quantitative proteome and proteogenomic landscape Henrik J. Johansson1, Fabio Socciarelli1, Nathaniel M. Vacanti1,2, Mads H. Haugen3, Yafeng Zhu1, Ioannis Siavelis1, Alejandro Fernandez-Woodbridge1, Miriam R. Aure4, Bengt Sennblad5, Mattias Vesterlund1, Rui M. Branca1, Lukas M. Orre1, Mikael Huss6, Erik Fredlund1, Elsa Beraki7, Øystein Garred7, Jorrit Boeke1, Torill Sauer8,9, Wei Zhao10, Silje Nord4, Elen K. Höglander4, Daniel C. Jans11, Hjalmar Brismar11,12, Tonje H. Haukaas13, Tone F. Bathen13, Ellen Schlichting14, Bjørn Naume9,15, OSBREAC 16, Torben Luders9,17, Elin Borgen7, Vessela N. Kristensen4,9,17, Hege G. Russnes4, Ole Christian Lingjærde4,18, Gordon B. Mills10, Kristine K. Sahlberg4,19, Anne-Lise Børresen-Dale4,9, Janne Lehtiö1 1Science for Life Laboratory, Department of Oncology-Pathology, Karolinska Institutet, 171 21, Solna, 2Cornell University, Division of Nutritional Sciences, Ithaca, NY 14853, USA, 3Dept of Tumor Biology & Dept of Cancer Genetics, Inst for Cancer Research, Oslo University Hospital, 4Dept of Cancer Genetics, Inst for Cancer Research, Oslo University Hospital, 0424, Oslo, 5Dept. of Cell and Molecular Biology, NBIS Sweden, Science for Life Laboratory, Uppsala University, 6Dept of Biochemistry and Biophysics, NBIS Sweden, Science for Life Laboratory, Stockholm University, 7Department of Pathology, Oslo University Hospital, 0424, Oslo, Norway, 8Department of Pathology, Akershus University Hospital, 1478, Lørenskog, Norway, 9Institute for Clinical Medicine, University of Oslo, 0318, Oslo, Norway, 10Department of Systems Biology, The University of Texas MD Anderson Cancer Center, Houston, TX, USA, 11Department of Applied Physics, KTH Royal Institute of Technology, 171 21, Solna, Sweden, 12Department of Womens's and Children's Health, Karolinska Institutet, 171 21, Solna, Sweden, 13Dept. of Circulation and Medical Imaging, The Norwegian University of Science and Technology – NTNU, 14Section for Breast- and Endocrine Surgery, Dept of Cancer, Oslo University Hospital, 15Dept of Oncology, Division of Surgery & Cancer & Transplantation Medicine, Oslo University Hospital, 16www.osbreac.no, Oslo, Norway, 17Dept of Clinical Molecular Biology & Laboratory Science (EpiGen), Akershus University Hospital, 18Centre for Cancer Biomedicine, University of Oslo, 0424, Oslo, Norway, 19Department of Research, Vestre Viken Hospital Trust, 3004, Drammen, Norway We present a proteome-centric multi-omics examination of the breast cancer (BC) molecular landscape. Unbiased analyses of deep tumor proteomes recapitulate PAM50 BC subtypes while further distinguishing poor-prognosis basal-like and luminal B tumors by immune component infiltration, suggesting the current classification is incomplete. Proteome-based networks distinguishes BC subtype-specific functional protein modules with co-expression of known drug targets marking ductal carcinoma in situ regions of normal-like tumors, lending to a more accurate classification of this poorly defined subtype. We find effects of copy number alterations to be dampened dependent on protein-level gene regulation, and transcripts within prognostic mRNA panels to be reliable protein surrogates, underscoring the value of proteome quantification for prognostication and phenotypic classification. Furthermore, protein products mapping to “non-coding” genomic regions were identified; highlighting a potential new class of tumor-specific immunotherapeutic targets. A.15 Native Mass Spectrometry Study on the Stoichiometry of Proteasome AAA+ ATPase Nucleotide Binding Yadong Yu1,4, Haichuan Liu2,5, Zanlin Yu1, H. Ewa Witkowska2, Yifan Cheng1,3 1UCSF, Dept. Biochemistry and Biophysics, San Francisco, CA 94143, 2UCSF, Dept. OBGYN & Reproductive Sci, Sandler-Moore MS Core Facility, San Francisco CA 94143, 3Howard Hughes Medical Institute, University of California San Francisco, CA 94143, 4LakePharma, 201 Industrial Rd, San Carlos CA 94070, 5Thermo Fisher Scientific, 355 River Oaks Pkwy, San Jose, CA 95134 AAA+ ATPases constitute a large family of proteins that are involved in a plethora of cellular processes including DNA disassembly, protein degradation and protein complex disassembly. They typically form a hexameric ring-shaped structure with six subunits in a (pseudo) six-fold symmetry. In a subset of AAA+ ATPases that facilitate protein unfolding and degradation, six subunits cooperate – in a yet unknown fashion – to translocate protein substrates through a central pore in the ring. The number and type of nucleotides in an AAA+ ATPase hexamer is inherently linked to the mechanism that underlies cooperation among subunits and couples ATP hydrolysis with substrate translocation. We conducted a native mass spectrometry study of a monodispersed form of PAN, an archaeal proteasome AAA+ ATPase, to determine the number of nucleotides bound to each hexamer of the wild-type protein. We utilized ADP and its analogues (TNP-ADP and mant-ADP), and a non-hydrolyzable ATP analogue (AMP-PNP) to study nucleotide site occupancy within the PAN hexamer in ADP- and ATP-binding states, respectively. Throughout all experiments we used a Walker A mutant that is impaired in nucleotide binding as an internal standard to mitigate the effects of residual solvation on mass measurement accuracy and to serve as a “reference protein” to control for non-specific nucleotide binding. This approach led to the unambiguous finding that a wild-type PAN hexamer carried – from expression host – six tightly bound ADP molecules that could be exchanged for ADP and ATP analogues. While the Walker A mutant did not bind ADP analogues, it did bind AMP-PNP, albeit at multiple stoichiometries. We observed variable levels of hexamer dissociation and an appearance of multimeric species with the overcharged-like molecular ion distributions across repeated experiments. We posit that these phenomena originated during ESI process at the final stages of ESI droplet evolution. A.16 Towards Elucidation of Muscle-Specific Receptor Tyrosine Kinase (MuSK) Signaling Pathway by Differential Agonists Hanna G. Budayeva1, Arundhati Sengupta Ghosh1, Lilian Phu1, Donald S. Kirkpatrick1 1Genentech Inc., South San Francisco, CA 94080 Muscle-Specific receptor tyrosine Kinase (MuSK) is essential for neuromuscular junction (NMJ) formation. MuSK is activated upon binding to Lrp4 co-receptor in complex with agrin, a motor neuron signaling molecule. In response to MuSK activation, acetylcholine receptors begin clustering on the muscle surface to guide neuronal attachment. Molecular players involved in propagation of MuSK signaling are not yet fully defined. We employed mass spectrometry-based quantification by TMT to elucidate ubiquitination- and phosphorylation-mediated signaling pathways activated in response to treatment with agrin or MuSK agonist antibody. We observed that majority of MuSK signaling was mediated by phosphorylation. Significant increases in phosphotyrosine levels were detected on MuSK and its adaptor protein Dok7 upon treatment with each agonist. Several AchR subunits were detected with increased phosphorylation, in line with previously reported observations that phosphorylation plays an important role in AchR clustering. Profiling of phosphorylation and ubiquitination events revealed that proteins with functions in clathrin-mediated endocytosis are regulated by ubiquitination events downstream of MuSK. Overall results point at a significant overlap in signaling processes initiated by MuSK natural agonist agrin and MuSK agonist antibody. A.17 Multiplexed and Quantitative Assessment of the Cellular Reactive Cysteinome in T cell activation Liang Xue1, Uthpala Seneviratne2 1Simulation and Modeling Science, Pfizer, 2I&I Medicinal Chemistry, Pfizer Over the past few years, a broad variety of chemoproteomic methods on targeting cysteine residues have been reported. One such technology, isotopic tandem orthogonal proteolysis-activity based protein profiling (isoTOP-ABPP) uses iodoacetamide alkyne probe for cysteine targeting and isotope coded cleavable azide for click chemistry mediated-enrichment for greater peptide and protein identifications in a complex proteome. In here we describe a much simpler platform that uses a broad spectrum cysteine-reactive desthiobiotin iodoacetamide (DBI) probe in combination with tandem mass tags (TMT-10plex) to uncover in-depth of the reactive cysteinome. By employing the multiplexed-cysteine profiling platform we identified and quantified more than 13000 probe labeled-peptides that correspond to more than 4500 proteins in the human T cell proteome. The method facilitates a high throughput chemoproteomics by comparing multiple samples at once and enables the interrogation of low abundant cysteine activated proteins with greater depth. A.18 Enzyme toolkit for selective enrichment and analysis of mucin-domain glycoproteins Stacy A. Malaker1, Judy Shon1, Kayvon Pedram1, Nicholas M. Riley1, Carolyn R. Bertozzi1,2 1Stanford University, Stanford, CA 94305, 2Howard Hughes Medical Institute, Stanford, CA 94305 Mucin domains are densely O-glycosylated modular protein domains that are found in a wide variety of cell surface and secreted proteins. Mucin-domain glycoproteins are known to be key players in a host of human diseases, especially cancer, wherein mucin expression and glycosylation patterns are altered. Mucin biology has been difficult to study at the molecular level in part because methods to manipulate and structurally characterize mucin domains are lacking. One major issue is that these domains are resistant to degradation by trypsin, meaning the majority of their sequence space is often left unanalyzed. Selective mucin degradation or enrichment, especially in a sequence- and glycan-specific manner, can facilitate study of these proteins by mass spectrometry. Previously, we expressed and characterized a bacterial mucinase, StcE, and used its unique properties to improve sequence coverage, glycosite mapping, and glycoform analysis of recombinant human mucins by mass spectrometry. To expand on this work, we expressed and characterized several other bacterial mucinases to generate a mucin-selective enzymatic toolkit. Their activities were confirmed using a panel of O-glycoproteins by mass spectrometry. We manually validated peptide sequences from MS/MS spectra to elucidate all cleaved peptides present in the mucinase-digested samples but not in the control samples, revealing that each enzyme has a slightly different cleavage motif. Interestingly, all of the enzymes rely on a combination of peptide sequence and glycosylation status. Together with StcE, we have characterized a total of five bacterial mucinases capable of digesting mucins into peptides amenable for mass spectrometric analysis. Further, given the enzymes' selectivity for mucin-domain glycoproteins, we reasoned that they could be employed to purify mucins from protein mixtures. Thus, inactivated mucinases were conjugated to aldehyde beads using reductive amidation. Using the enzyme-conjugated beads, we demonstrate that we can selectively enrich for mucin-domain glycoproteins from lysate and crude cancer patient ascites fluid. We are thus defining the “mucinome”, as a comprehensive list of mucin-domain glycoproteins does not exist. Future experiments will be devoted to isolation, digestion, and characterization of mucins from human cancer patient ascites fluid, with the ultimate goal of identifying diagnostic and/or prognostic markers of disease states. A.19 Functional Metabolomics uncovering the role of Trp–KYN–KA Axis in Intestinal Injury and Repair Di Wang1, Huimin Guo1, Zunjian Zhang1, Fengguo Xu1 1China Pharmaceutical University, Nanjing 210009, P. R. China Drug-induced disease has become one of the major causes of death in clinical. Gastrointestinal dysfunction which accounts for about 20% of all kinds of adverse drug reactions (ADRs) cause gastrointestinal mucosa damage, reduce mucosal barrier function and led to inflammatory bowel disease (IBD). Targeted metabolomics studies of Trp metabolic profile in vincristine-induced rat ileus, irinotecan-induced rat diarrhea and DSS-induced rat IBD models we found that Trp–KYN–KA axis metabolism was significantly increased in rat damaged intestinal. Besides, damaged intestinal was significantly recovered after drug was terminated, hypothesizing that Trp–KYN–KA axis might be plays an important role in intestine injury and repair. Following studies we found that colon formed IL-6–IDO1–AHR positive feedback loop at pathologic condition, not at normal condition, accelerate KYN and KA accumulation. Besides, we also found that IL-6 which came from macrophages plays a leading role in the accumulation of KYN and KA, whereas positive feedback loop only works as a role of auxiliary acceleration. Unexpectedly, KYN and KA inhibited LPS-induced IL-6 production by activating AHR in macrophages, reminding us that the following studies need to consider the whole role of AHR in different organizations. Crucially, G protein-coupled receptor 35 (GPR35), which significantly high expressed in intestine, negative feedback regulated intestinal injury and inflammation to promote colon repair and maintain intestinal homeostasis through sensing KA level selectively. Taken together, This study provides a promise insight about the body feedback regulation of intestinal damage and inflammation to maintain intestinal homeostasis through sensing KA level, suggesting regulate Trp–KYN–KA axis and combine with AHR and GPR35 agonist may be play a synergistic effect in reducing ADRs and treating IBD. A.20 Metabolic Control of OGT Interactome in Hepatocytes Krista Kaasik1, Chin Fen Teo1, Robert Chalkey1, Alma L. Burlingame1 1University of California San Francisco O-linked-β-N-acetylglucosamine transferase (OGT) post-translationally catalyzes the addition of a single N-acetylglucosamine in O-glycosidic linkage to serine and threonine residues and is required for stem cell viability. Remarkably, hepatocytes are resistant to the loss of OGT in hepatocyte specific OGT knockout mouse. O-GlcNAcylation is drastically reduced in mutant liver tissue compared to wild type littermates during postnatal development analyzed by wheat germ agglutinin (WGA) lectin affinity chromatography. We have applied proximity labeling based proteomics coupled to label-free mass spectrometry to identify dynamic OGT interactions in vivo. The short labeling time based on an engineered ascorbate peroxidase (APEX2) labeling enables capture of more transient interactions while reducing non-specific interactions. We have made stably expressed OGT-APEX2 and its mutants in hepatocyte cell lines. We have identified 724 proteins from OGT full-length sample, 983 proteins from OGT catalytically inactive cell lines, after removal background proteins. Molecular function analyses show that ∼45% of proteins identified by APEX2 labeling have enzymatic activity and ∼38% with scaffolding activity. Many of them were identified O-GlcNAc modified representing macromolecular complexes in multiple cell signaling pathways. Catalytically inactive OGT is unaffected in substrate binding supporting OGT scaffolding role in addition to its enzymatic function. Proteins from adherent junctions, proteasome, DNA replication complexes, RNA degradation, splicing are among identified. For example focal adhesion proteins - talin, vinculin, paxillin, zyxin all identified modified by OGT in adherence junctions that controls cell adhesion and mechanotransduction. Currently we are validating novel targets from proximity labeling to characterize the loss of OGT hepatocyte specific liver phenotype. A.21 Integrating Phosphoproteomics and Transcriptional Classifiers Reveals “Hidden Signaling” in Multiple Myeloma Including Differential KRAS and NRAS Mutant Effects Yu-Hsiu T. Lin1, Gregory P. Way2, Benjamin G. Barwick3, Margarette C. Mariano1, Makeba Marcoulis1, Ian D. Ferguson1, Christoph Driessen4, Lawrence H. Boise3, Casey S. Greene2, Arun P. Wiita1 1University of California, San Francisco, 2University of Pennsylvania, 3Emory University, 4Kantonsspital St. Gallen Introduction: Multiple myeloma (MM) is a complex disease that requires a sophisticated treatment strategy. Currently, no kinase inhibitors have been approved for MM despite their potential for supplementing current combination therapies. Previous functional studies have explored kinase dependency in MM by either a small molecule inhibitor library or RNA interference. However, owing to their off-target effects, these approaches are imprecise at dissecting signaling networks driving MM growth and survival. Here, we aim to improve prognostic measures and recommend small molecule-based treatments for MM patients by identifying vulnerable signaling patterns in disease using integrated transcriptome- and phosphoproteome-based predictive models. Results: We inferred the activities of 297 kinases across eight MM cell lines from mass spectrometry-based quantitative phospho-proteomic data by performing a kinase-substrate enrichment analysis (KSEA). Initially, we were surprised to find greater predicted activity in KRASG12-mutant cell lines compared to NRAS-mutant cell lines. We further explored this disparity with our machine learning-based Ras classifier built on transcriptional data from CoMMpass, a longitudinal study of >1000 MM patients. We identified 311, 405, and 390 genes whose expressions are characteristic of the WT RAS, KRAS mutant, and NRAS mutant genotype, respectively, with surprisingly limited overlap between KRAS and NRAS transcriptional signatures. Building on our KSEA analysis, we next performed a kinase inhibitor screen to evaluate the predictive value of the inferred kinase activities for drug sensitivity. Of 12 screened compounds, mTOR inhibitor INK128 displayed the strongest correlation between drug response and predicted kinase activity. Furthermore, we probed the potential of using pathway activity signatures as prognostic markers. To this end, we applied a gene expression-based signaling pathway prediction model to RNAseq data derived from CoMMpass patients and found that the MAPK signature stratifies patient survival with statistical significance, while the presence and absence of RAS mutations carry no prognostic value. Conclusion: Both phosphoproteomics and a machine learning-based transcriptional classifier highlight a striking difference in the pattern of signaling between NRAS and KRAS mutants. Taken together, uncovering the cellular signaling networks dysregulated in MM may lead to improved precision medicine, particularly in stratifying patients who may benefit most from kinase inhibitor therapy. A.22 Characterization of KRAS 4B C-Terminal Hypervariable Region using LC/MS James Wilkins1, A.L. Burlingame1 1UCSF Aberrant KRAS 4B activity has been shown to be linked to 30 percent of all cancers. The C-terminus of the molecule contains a region (termed the Hypervariable Region) with extremely high positive charge and the mature form of the molecule is prenylated. It shares this property with a number of other membrane-associated RAS-related proteins that are involved in cellular vesicle targeting, cytoskeletal and other functions. In order to understand better the behavior of the KRAS 4B polybasic region, modified and unmodified C-terminal synthetic peptides were studied using online LCMS approaches. A peptide consisting of the C-terminal 22 amino acids of KRAS 4B was synthesized and used for these studies. The native peptide (containing a cysteine-linked farnesyl group) was prepared by chemical methods and its behavior along with the peptide containing a free cysteine was studied using online LCMS. The nonfarnesylated peptide has an isoelectric point of 11.27 and exhibits charge states ranging from +3 to +7 when examined in an Orbitrap instrument at pH 2. Its solubility properties were surprising and we found that the peptide was more soluble at basic pH values as demonstrated by its ability to ionize in nanospray MS experiments. Chromatographic behavior of the unmodified peptide using conventional reversed phase approaches on silica stationary phases yielded poor results. We also looked at separation on a polymeric stationary phase (PLRP-S) and found behavior similar to that obtained with C18. The peptide was cleaved using ASP-N protease resulting in 2 fragments that were also both poorly behaved in chromatography. We found dramatic improvement in the peptide's chromatographic behavior on the same C18 stationary phase after blocking lysine ε-amino groups with propionyl moieties. For this reaction, we employed succinimidyl propionate and found that all 12 lysine side chains were blocked along with the N-terminal amino group. These studies have been done to enable the use of high sensitivity characterization of the KRAS molecule and its modifications in normal and diseased tissues using LC/MS. A.23 Rapid, Sensitive and Multiplexed Ubiquitylation Profiling in Cells and Tissues Deepak C. Mani1, Namrata D. Udeshi1, Philipp Mertins1,2, Shaunt Fereshetian1, Jessica A. Gasser1,3, Shankha Satpathy1, Tanya Svinkina1, Hasmik Keshishian1, Benjamin L. Ebert1,3,4, Steven A. Carr1 1Broad Institute of MIT and Harvard, Cambridge, MA, USA, 2Max Delbrück Center for Molecular Medicine, Germany, 3Division of Hematology, Brigham and Women's Hospital, Boston, MA, USA, 4Dana-Farber Cancer Institute, Department of Medical Oncology, Boston, MA, USA The study of ubiquitin systems is of great interest as they play an important role in numerous cancers and diseases. Global ubiquitylation profiling by mass spectrometry has been a key method for identifying and understanding how protein ubiquitylation sites are regulated in cellular systems. Almost all analyses of ubiquitylation to date have been carried out in cells grown in culture with quantification using SILAC. This approach has been enormously successful, but is limited by the requirement that samples be amenable to metabolic labeling, the need for relatively large amounts of sample (multi-milligram/sample), and that SILAC can only routinely be multiplexed to three. Here, we present a new method that allows for deepscale, quantitative and highly multiplexed ubiquitylome analyses in any biological system. The method is much faster and requires far less sample than prior approaches and permits the comparison of up to 11 conditions without a significant loss in total numbers of identified ubiquitylation sites. Using this method we identify >10,000 ubiquitin sites in tissue samples across 10 states in a TMT 10-plex using only 500 ug tissue per state. A.24 Characterization of a hybrid insulin peptide as an autoantigen in human type 1 diabetes Timothy A. Wiles1, Rocky L. Baker2, Maki Nakayama3, Thomas Delong1 1University of Colorado Skaggs School of Pharmacy and Pharmaceutical Sciences, 2University of Colorado School of Medicine, 3Barbara Davis Center for Childhood Diabetes Relatively little is known about the primary peptide epitopes targeted by the autoimmune response during the development of type 1 diabetes (T1D) in humans. We have shown in the non-obese diabetic (NOD) mouse model of type 1 diabetes that insulin peptides within the pancreatic beta cell become covalently linked via a peptide bond to other beta cell peptides, leading to the generation of hybrid insulin peptides (HIPs). Using mass spectrometry, we recently confirmed that HIPs are present in both mouse and human islets. We established that HIP-reactive CD4 T cells can trigger disease in NOD mice, indicating that HIPs are major autoantigens in this animal model. Furthermore, we determined that HIP-reactive CD4 T cells are present in the peripheral blood of recent onset T1D patients and in the residual islets of organ donors with T1D. Here, we demonstrate the presence of a specific insulin C-peptide HIP in the islets of human donors by mass spectrometry. CD4 T cells specific for this peptide can be detected in the peripheral blood of T1D patients, and from one of these patients we have isolated T cell clones that respond to the HIP at low nanomolar concentrations. Our evidence suggests that this HIP may be an important antigen in the autoimmune pathogenesis of human T1D. A.25 Improved reproducibility of enrichment and site-assignment of biotinylated peptides using new anti-biotin antibody and its use to investigate redox signaling Meagan Olive1, Namrata D. Udeshi1, Samuel A. Myers1, Steven A. Carr1 1Broad Institute of MIT and Harvard, Cambridge, MA 02142 Affinity purification of biotinylated proteins with a standard streptavidin-based enrichment is a powerful tool, but it is limited in its ability to provide site-specific information due to difficult recovery of biotin-modified peptides. Previous studies have shown that enrichment of biotin-modified peptides with an anti-biotin antibody allows for large-scale identification of biotinylated sites by tandem mass spectrometry, making it a potentially useful tool for the study of various post-translational modifications. Application of antibody-based methods to broadly purposed enrichment strategies necessitates interbatch reproducibility of antibody, leading us to test a new monoclonal anti-biotin antibody from Cell Signal Technologies. Here, we evaluate the depth and reproducibility of enrichment of biotinylated peptides using this antibody and compare the results to those obtained using the prior ImmuneChem antibody. We then utilize the new antibody to investigate the prevalence and potential biological roles of redox signaling in immune cells. The ability of this new antibody to purify biotin-labeled peptides will contribute to the development of robust strategies to study post-translational modifications and their biological implications. A.26 Proteome-wide analysis of protein stability in E. coli using pulse proteolysis Liang Zhao1, Giulia Vecchi2, Michele Vendruscolo2, Roman Körner1, Manajit Hayer-Hartl1, Ulrich Hartl1 1Max-Planck Institute of Biochemistry, Martinsried, Germany, 2University of Cambridge, Centre of Misfolding Diseases, UK Molecular chaperones play an essential role for maintaining proteins in native states, but how they affect proteome-wide protein stability under native conditions is not well understood. Here, we used pulse proteolysis and quantitative proteomics to screen protein folding states globally under different growth conditions in Escherichia coli and characterized the effects of the DnaK (Hsp70) chaperone system on proteome stability. During a 1 min. short incubation of cells upon lysis, accessible protein regions got cleaved by thermolysin. We then determined the percentage of cleavage for each protein and identified the degraded regions by SILAC-labeling, gel-separation and direct identification of cleaved peptides by mass spectrometry. Comparison of cleavage patterns between stressed and unstressed cells at normal, increased (overexpression mutant) and decreased (deletion mutant) levels of the DnaK (Hsp70) system gave inside into the effects of heat stress and the protective role of the Dnak (Hsp70) chaperone system. We found ∼500 proteins (∼25% of total by mass) to be protease-sensitive under normal growth conditions, indicating that conformationally dynamic proteins make up a large fraction of the cytosolic proteome. These metastable proteins tend to be larger than average in size, with a high degree of connectivity in protein interaction networks. Upon acute heat stress, not resulting in upregulation of the major chaperone systems, an additional ∼200 proteins unfolded, exposing hydrophobic amino acid residues to the solvent that are buried in the native state. These thermosensitive proteins are enriched in large, abundant, and hetero-oligomeric proteins as well as proteins with the CATH fold domain c.37, which is among the most ancient classified folds. Heat shock also resulted in a further destabilization of proteins which were protease sensitive already under normal growth conditions, increasing the fraction of cleaved proteins to ∼33% by mass. Overexpression of the DnaK (Hsp70) chaperone system revealed its potential to markedly stabilize numerous thermo-sensitive proteins, including ribosomal proteins as well as large multi-domain, hetero-oligomeric proteins. These results reveal a strong capacity of DnaK (Hsp70) to stabilize proteins in their folded states under denaturing stress conditions. A.27 The case for mass spectrometry-based proteomics and phospho-proteomics in personalized cancer medicine Sophia Doll1, Fabian Coscia2, Alberto Santos2, Philipp Nuhn3, Philipp Geyer1, Matthias Mann1,2 1Department of Proteomics and Signal Transduction, Max Planck Institute of Biochemistry, Martinsried, 2Novo Nordisk Foundation Center for Protein Research, Faculty of Health Sciences, University of Cope, 3Department of Urology, University Medical Centre Mannheim, University of Heidelberg, Mannheim, Germ Recent advances in mass spectrometry (MS) based proteomics together with progresses in computational biology are transforming translational MS-based cancer proteomics from an idea to a practice. Mindful of the time constraints in the clinic, we developed a rapid and robust proteomic workflow for the analysis of cancer tissues, including FFPE tissues. It allows the quantification of thousands of tumor proteins in several hours of measuring time and a total turnaround of currently only a few days from obtaining the sample to interpreted result. Here, we applied our pipeline to several 'case studies' of single patients, a well-established paradigm in medicine. In a first metastatic case study - of the extremely rare urachal carcinoma - we uncovered the epigenetic regulator lysine specific histone demethylase 1 as a potential therapeutic target. This protein is an epigenetic regulator and a therapeutic target of new drugs in clinical trials. Thus clinical cancer proteomics can rapidly and efficiently identify actionable therapeutic options. Complementing the proteomic data with NGS and a newly developed 'clinical knowledge graph' that integrates vast amounts of proteomic, genomic and clinical information, helped to guide the therapy decision. In another end-stage cancer patients, we uncovered an up-regulation of a mutated form of the androgen receptor. Finally, we integrated the analysis phosphorylation sites along with somatic mutations, combining genomics with proteomics to uncover additional and personalized treatment options for cancer patients. We envision that our MS-based proteomic workflow can be broadly applied to cancer patients. A.28 Characterization of the Sin3 HDAC complex interaction network Mark K. Adams1, Charles A. Banks1, Janet L. Thornton1, Cassandra G. Kempf1, Sayem Miah1, Laurence Florens1, Michael P. Washburn1,2 1Stowers Institute for Medical Research, Kansas City, MO, 2Department of Pathology & Laboratory Medicine, University of Kansas Medical Center, Kansas City, KS The efficacy of HDAC inhibitors (HDACis) as chemotherapeutic agents is a focus of many ongoing clinical studies. Despite the current existence of 4 FDA-approved HDACis, the molecular mechanisms that mediate their beneficial and off-target effects are poorly defined. Among HDAC complexes that are targeted by HDACis, Sin3 complexes have important roles in the regulation of transcriptional activity and may mediate many the effects associated with the application of these compounds. Sin3 complexes are named for the scaffolding proteins of the complexes and have forms conserved from yeast to humans. However, the acquisition of complex components by humans that are not present within the well characterized yeast forms of the complex contributes to our poor understanding of the functional attributes of the Sin3 complexes in humans. Using MudPIT mass spectrometry, we characterize the human Sin3 interaction network. We show that the interaction networks of the two human Sin3 protein paralogs, SIN3A and SIN3B, only partially overlap and that the identity of the Sin3 protein paralog within a complex influences complex composition. Through the comparison of SIN3A and SIN3B protein features, we identify shared and divergent attributes that influence the functional properties of these proteins. Our results reveal the presence of mutually exclusive components of the Sin3 interaction network and provide definition to the heterogeneous population of Sin3 complexes. These findings highlight the need for future studies to assess the biological consequences of diversity within populations of HDAC complexes. A.29 Proteogenomics of melanoma cell lines and xenografts identifies amino acid variants with a potential to rewire signal transduction networks Marisa Schmitt1, Nicolas Nalpas1, Tobias Sinnberg2, Heike Niessner2, Claus Garbe2, Boris Macek1 1Quantitative Proteomics, University of Tuebingen, Tuebingen, Germany, 2Division of Dermatooncology, University of Tuebingen, Tuebingen, Germany Malignant melanoma is characterized by somatic mutations in BRAF and NRAS in the MAPK pathway, which strongly correlate with poor prognosis of the disease. Targeted inhibition with kinase inhibitors shows a promise in melanoma treatment; however, treated tumours inevitably develop resistance. Although several mechanisms of resistance have been proposed, key phosphoproteins and associated mutations responsible for therapy responses are largely elusive. Here, we reconstruct the disturbed cellular signalling networks upon establishment of melanoma resistance using individualised genomic, proteomic and PTM data. To study the impact of mutations on signal transduction networks, we have established a bioinformatics workflow to predict non-synonymous single nucleotide variants and applied it to exome sequencing data of different drug-resistant and drug-sensitive cell lines as well primary tissues of patients. This led to incorporation of about 13,000 amino acid variants into human proteome database, resulting in around 20,000 novel protein sequence entries. We classified the mutations based on their potential to attack signaling networks for example assignment to a reported cancer-relevant protein. This stratification allowed for further mutation ranking and selection of highest effect scoring mutation. The resulting proteogenomic databases were applied to phosphoproteomics data from several melanoma cell lines and xenografts. Across cell lines and xenografts, we covered about 14,000 protein groups and 16,000 phosphosites, of which 1,300 were localized on peptides containing single amino acid variants. Notably, we identified a number of phosphopeptides resulting from knock-in of a phosphorylated residue and we detected multiple instances of phosphosite loss due to mutations. Most of them were unique to a specific phenotype, cell line or xenograft, which calls for personalized approaches to cancer understanding and treatment. Several proliferation and signaling pathways (PI3K-Akt pathway) were over-represented in mutated proteins. We are currently validating a number of interesting candidates such as the transcription factor RUNX1 via CRISPR/Cas9 strategy followed by MS-based proteomic. Future work will include interactome studies of wild-type and mutated proteins under different conditions. A.30 Integration of the deep learning prediction tool Prosit into Skyline for high-accuracy, on-demand fragment intensity and iRT prediction Tobias Rohde1, Tobias Schmidt2, Bernhard Kuster2,3, Michael J. MacCoss1, Mathias Wilhelm2, Brendan MacLean1 1Department of Genome Sciences, University of Washington, Seattle, WA 98195, 2Chair of Proteomics and Bioanalytics, Technical University of Munich, Freising, Germany, 3Bavarian Center for Biomolecular Mass Spectrometry, Freising, Germany Mass spectrometry-based proteomics employs a variety of acquisition schemes. When aiming for high reproducibility and quantitative accuracy, targeted (SRM/MRM and PRM) and data-independent acquisition (DIA and SWATH) are commonly used because of the lower missing values they produce in comparison to data-dependent acquisition methods. For both acquisition schemes, the measured fragment intensities are subsequently matched against MS/MS spectra stored in a library to determine the presence and quantity of peptides, while spectral libraries are often used to choose transitions for SRM. Knowledge of peptide elution time is used for acquisition scheduling, efficient chromatogram extraction, and peak picking. Skyline is a popular open-source tool for building and analyzing such methods, but like most other tools, requires empirically measured spectral libraries. These libraries are usually acquired by DDA experiments which might require extensive offline fractionation or synthetic peptides. While publicly available spectral libraries can be used as well, they are often incomplete and may have been acquired using different LC/MS settings. Recently, a deep neural network named Prosit has been developed to predict MS/MS fragment ion intensities and retention time indices (iRT) with high accuracy. Although Prosit was trained on ∼460.000 human tryptic peptides synthesized in the ProteomeTools project, it generalizes to other organisms and even proteases, allowing the prediction of MS/MS spectra and retention times for any precursor of interest. This motivated the integration of Prosit into Skyline. Because Prosit requires GPUs for prediction, we decided to use the Google Remote Procedure Call system (gRPC) to request spectra directly from GPUs hosted by ProteomicsDB. This allows on-demand generation of high quality spectral libraries in Skyline within seconds. Skyline is the first tool to use this Prosit interface for spectrum and RT prediction. The implementation provides a reference for other developers to integrate Prosit predictions into their tools. Comparison of RT prediction in Skyline between SSRCalc and Prosit show a 5-fold decrease in regression residuals with Prosit. We found that using the Prosit spectral libraries in Skyline produced detection of almost the same number of peptides as through experimental libraries in a benchmark DIA experiment (−3%). A.31 Exploring mechanisms of immune suppression promoted by cancer-associated fibroblasts in lung squamous cell carcinoma Carlo P. Ramil1, Handan Xiang2, Josephine Hai3, Chunsheng Zhang4, Huijun Wang5, Amanda A. Watkins2, Roshi Afshar2, Peter Georgiev2, Xuelei S. Song3, Dongyu Sun3, Andrey Loboda4, Yanlin Jia3, Lily Y. Moy3, Philip E. Brandish2, An Chi1 1Chemical Biology, Merck & Co., Inc., 33 Avenue Louis Pasteur, Boston, MA, 02115, 2Discovery Oncology, Merck & Co., Inc., 33 Avenue Louis Pasteur, Boston, MA, 02115, 3Pharmacology, Merck & Co., Inc., 33 Avenue Louis Pasteur, Boston, MA, 02115, 4Informatics, Merck & Co., Inc., 33 Avenue Louis Pasteur, Boston, 5Modeling & Informatics, Merck & Co., Inc., 2000 Galloping Hill Rd, Kenilworth, NJ 07033 Cancer-associated fibroblasts (CAFs) are activated fibroblasts that constitute the stromal component in the tumor microenvironment (TME). Although CAFs have been shown to promote tumor growth and mediate resistance to chemotherapy, their role and potential mechanisms by which they may contribute to immune suppression in lung squamous cell carcinoma (LSCC) remain largely unexplored. Here, we used discovery proteomics to identify potential mechanisms of CAF-promoted immune suppression. We established a patient-derived co-culture model system and showed that CAF polarizes monocytes to adopt a myeloid-derived suppressor cell (MDSC) phenotype characterized by robust suppression of autologous CD8+ T cell proliferation and IFNγ production. We measured the total proteome of CAF-induced MDSCs and compared to non-suppressive immature dendritic cells derived from the same monocyte population. One of the highly enriched pathways in MDSCs is the activation of NADPH oxidase. Pharmacological inhibition of NOX2 activity in CAF-induced MDSCs restored CD8+ T cell proliferation. This study highlights a pivotal role of CAFs in regulating monocyte differentiation and demonstrates that NOX2 inhibition abrogates the CAF-MDSC axis, illuminating a potential therapeutic path to reversing the CAF-mediated immunosuppressive microenvironment. A.32 Changes in prooncogenic and immune response proteins during development of cervical cancer through quantitative proteomics Gonzalo A. Soto-Fuenzalida1, Rosa C. Lopez-Sanchez1, Sergio Encarnacion-Guevara2, Juan E. Martinez-Ledesma1, Rocio Ortiz-Lopez1, Luis M. Villela-Martinez3,4, Victor M. Treviño-Alvarado1, Jose A. Hernandez-Hernandez1 1Tecnologico de Monterrey, Escuela de Medicina y Ciencias de la Salud, Campus Monterrey, 64710, 2Programa de Genomica Funcional de Procariotes, Centro de Ciencias Genomicas-UNAM, 62210, 3Universidad del Valle de México, Escuela de Medicina, Hermosillo, Sonora, 83165, 4Centro médico Dr. Ignacio Chávez, ISSSTESON, Hermosillo, Sonora, Mexico, 83000 Currently, cervicovaginal cancer (CaCu) is one of the most common cause of gynecological cancer worldwide. Nearly 99% CaCu cases are associated with Human papilloma virus (HPV) infection. The progression of this disease is slow, and it has different and sequential precancerous stages until reach cancer. A differential proteomic study in Cervical mucus samples was set up. We studied CaCu mucus using an iTRAQ approach combined with high-resolution mass spectrometry to describe protein patterns found in mucus during cancer development. Cervix fluid samples were obtained from healthy and precancerous patients (CIN1–3) and analyzed. HPV genotype presence was evaluated. We identified and reported 1731 different proteins quantified, with high confidence, that are common for all disease stages. The proteomic analysis showed that during disease progression, different protein change patterns are shown in cervical mucus. In our study, the most important changes found was down-regulation in proteins as H2AFX, LYPD3, S100A9, SPRR3 and FLG. In contrast, proteins up-regulated showed in protein related with cancer process AGT, OSTF1, GADPH and SERPINA1. Several proteins found in our study are dysregulated and present in mucus during progression of HVP infection to CaCu. They have potential to be used as biomarkers and/or therapeutic targets but validation studies about those proteins are needed. A.33 Elucidating Changes in O-GlcNAcylation in Pancreatic Cancer Talieh Zomorrodinia1, Jason Maynard1, Krista Kaasik1, Alma Burlingame1 1University of California, San Francisco Cancer cell growth, survival, and proliferation are linked to a metabolic shift from oxidative phosphorylation to glycolysis and as such requires increased glucose uptake. Glucose is used by the hexosamine biosynthetic pathway to create UDP-GlcNAc, the donor substrate required for intracellular protein O-GlcNAcylation. O-GlcNAcylation is a post-translational modification found mainly on serine and threonine residues of nuclear and cytoplasmic proteins. This dynamic modification is controlled by two enzymes, O-GlcNAc transferase (OGT) and O-GlcNAcase (OGA); the former adding a GlcNAc moiety and the latter removing it. Studies have shown that cancer cells have an increase in the occurrence of O-GlcNAcylation (Ma, Vocadlo, & Vosseller, 2013). Pancreatic cancer is one of the most common forms of cancer in the United States. Previous studies have linked the observation of hyper-O-GlcNAcylation with NF-κB activity in the pancreatic cancer cell line, MiaPaCa-2. This cell line possesses the KRASG12C mutation. It is known that the ARS-1620 inhibitor selectively targets this KRASG12C mutation resulting in inhibition of KRASG12C activity. In this study, we aim to elucidate the broader proteomic and posttranslational changes revealed upon treatment of MiaPaCa-2 cells with ARS-1620. SILAC and lectin weak affinity chromatography (LWAC) was employed to compare and enrich GlcNAc containing glycopeptides. EThcD based mass spectrometry was used to identify specific sites of protein O-GlcNAcylation. Changes to the proteome and phosphoproteome were also analyzed and the results will be presented. Financial acknowledgement: Dr. Miriam And Sheldon G. Adelson Medical Research Foundation and UCSF Program for Breakthrough Biomedical Research (PBBR). A.34 In vivo investigation of kigelia africana leaf as possible therapeutic option for gastric ulcer disease Oladayo E. Apalowo1, Babatunde M. Adekola3, Funke T. Asaolu1, Vincent O. Oriyomi4, Gbenga S. Ogunleye1, Oladayo J. Areola2, Olusegun O. Babalola1 1Department of Biochemistry and Molecular Biology, Obafemi Awolowo University, Ile-Ife, 2Department of Medical Biochemistry, Faculty of Basic Medical Science, Obafemi Awolowo University, 3Department of Environmental Management and Toxicology, Federal University of Agriculture, Abeokuta, 4Institute of Ecology and Environmental Studies, Obafemi Awolowo University The study investigated the antiulcer potentials of kigelia africana leaf using aspirin-induced model of gastric ulcer in wistar albino rats. Fresh leaves of Kigelia africana was extracted with 70% (v/v) ethanol and subjected to liquid-liquid partitioning using solvents of different polarities. In vitro tests comprising of DPPH radical scavenging activity, FRAP assay, 5-Lipoxygenase and Xanthine oxidase inhibitory activity of different fractions obtained revealed ethyl acetate fraction as possible lead fraction and was used for the antiulcer study. GC-MS fingerprinting of the lead fraction was carried out to identify active chemical constituents. Pre-treatment of experimental animals with varying doses of the lead fraction lasted for 30 days. Thereafter, gastric ulcer was induced in wistar rats with a single dose of aspirin. A control was set up which consisted of wistar rats that received only standard animal pellet and water. Several biochemical markers for gastric ulceration were determined from the plasma, stomach tissue and gastric content. Results from the in vivo study revealed a significant increase (p ≤ 0.05) in defensive factors like mucin content, total cholesterol and triglyceride concentrations when compared to the control while pepsin activity, myeloperoxidase activity, nitric oxide and malondialdehyde levels were significantly reduced. The study revealed that neutrophil infiltration, an index of myeloperoxidase activity, and inflammation are key factors in gastric ulcer pathogenesis. However, increased total cholesterol and fatty acid concentration may serve as defensive mechanism during an offensive onslaught leading to gastric ulcer. GCMS analysis of lead fraction revealed the presence of several constituents with anti-inflammatory properties, some of which have not been previously investigated. A.35 Formation of N-GlcNAc proteins is upregulated upon inhibition of proteasome activity in Ngly1-KO cells Jason C. Maynard1, Haruhiko Fujihira2, Gabby E. Dolgonos1, Tadashi Suzuki3,4, Alma L. Burlingame1 1Department of Pharmaceutical Chemistry, University of California San Francisco, San Francisco, CA, 2Division of Glycobiologics, Juntendo University, Tokyo, Japan, 3Glycometabolic Biochemistry Laboratory, RIKEN Cluster for Pioneering Research, Saitama, Japan, 4Suzuki Project, T-CiRA discovery, Kanagawa, Japan NGLY1 is a widely conserved eukaryotic cytosolic deglycosylase. Recently, a human genetic disorder called NGLY1 deficiency was reported, indicating the functional importance of NGLY1 in humans. NGLY1 is involved in the endoplasmic reticulum-associated degradation (ERAD) process, which eliminates misfolded proteins through retrograde translocation and proteasomal degradation. NGLY1 is also reported to be involved in the activation of a transcription factor, making the disease mechanism of NGLY1 deficiency complicated. Recent evidence also suggests that Ngly1-KO in the C57BL/6 mouse strain is embryonic lethal, while additional deletion of the Engase gene, encoding another cytosolic deglycosylating enzyme (endo-β-N-acetylglucosaminidase; ENGase), partially rescued lethality in mice. It was suggested that upon compromised NGLY1 activity, ENGase-mediated deglycosylation of misfolded glycoproteins may cause excess formation of N-GlcNAc proteins in the cytosol, which can somehow be detrimental to mice. Whether endogenous N-GlcNAc proteins are really formed in Ngly1-KO cells/animals or not remains unclarified. In this study, comprehensive identification of O- and N-GlcNAc proteins was carried out using the cytosol fraction of mice embryonic fibroblasts from wild type, Ngly1-KO, Engase-KO and Ngly1/Engase double KO mice in the presence or absence of proteosomal inhibition. It was revealed that, while there is no dramatic change in the level of O-GlcNAc proteins among conditions examined, there was a vast increase of N-GlcNAc proteins in Ngly1-KO fibroblasts upon proteasome inhibition. Importantly, few N-GlcNAc proteins were observed in Engase-KO or Ngly1/Engase double-KO cells, clearly indicating that ENGase is responsible for the formation of cytosolic N-GlcNAc proteins. The excess formation of N-GlcNAc proteins may at least in part account for the pathogenesis of NGLY1 deficiency. This work was supported by the Dr. Miriam And Sheldon G. Adelson Medical Research Foundation (AMRF), the UCSF Program for Breakthrough Biomedical Research (PBBR), the Grace Science Foundation, RIKEN Pioneering Project (Glycolipidologue Initiative), and Grants-in-Aid for Scientific Research (grant no. 16K18520). A.36 Unbiased Proteomics and Network Propagation Reveals Cancer Drug Targets Mehdi Bouhaddou1, Neil Bhola1, Rachel O'Keefe1, Margaret Soucheray1, Hua Li1, Tian Zhu1, Kelechi Nwachuku1, Toni Brand1, Gordon Mills1, Dan Johnson1, Danielle L. Swaney1, Jennifer Grandis1, Nevan J. Krogan1 1University of California San Francisco, San Francisco, CA 94158 Head and neck cancer is the seventh most common malignancy worldwide with few treatment options. The only FDA-approved targeted kinase inhibitor to treat the disease is cetuximab, a monoclonal antibody against EGFR to which patients often develop lethal resistance. There is a critical need to understand the mechanisms of drug resistance and discover novel targets whose inhibition could provide synergy with current therapy. Here, we integrate proteomic data from cell lines and patients to reveal novel factors underlying cetuximab resistance. To collect proteomics data in cell lines, we first cultured head and neck cancer cell lines with cetuximab for several months until resistance developed. We then performed global phosphoproteomics and abundance proteomics on both the drug-naïve and resistant models. In addition, for each model, we performed affinity purification mass spectrometry (AP-MS) for 28 of the most commonly mutated proteins in head and neck cancer (including several protein mutants). High confidence interacting proteins were identified and quantified using a multi-step bioinformatics pipeline. The result is a comprehensive map of changes in protein-protein interactions between the drug-naïve and resistant cell contexts. Phosphoproteomics revealed increased activation of several signaling pathways. Abundance proteomics revealed enhanced recruitment of metabolic pathways. AP-MS data reveals several novel oncogene interactions—many of which drastically change upon drug-induced rewiring. Lastly, an integrative network propagation technique, which incorporates all layers of proteomics data including reverse phase protein array (RPPA) data from head and neck cancer PDX models, reveals a subnetwork of ∼100 genes underlying drug resistance and sensitivity. Ongoing studies aim to perform CRISPRa/i screens of identified targets in combination with cetuximab to assess synergistic potential. We create a resource map of altered protein-protein interactions and reveal a protein subnetwork signature of drug resistance using a network propagation procedure to overlay distinct datatypes and extract overlapping features. Potential applications from this study span to other cancer types and drug targets. A.37 Tuning residence time with lysine-targeted, reversible covalent probes Tangpo Yang1, Adolfo Cuesta1, Xiaobo Wan1,2, Jack Taunton1 1Department of Cellular and Molecular Pharmacology, University of California, San Francisco, CA 94158, 2Department of Pharmaceutical Chemistry, University of California, San Francisco, CA 94158 Irreversible or reversible covalent modification, often applied to cysteine residues, can enhance the on-target residence time of small-molecule drugs and probes. However, many protein targets lack an accessible cysteine, and alternative strategies for covalent modification are therefore needed. Here, we report a series of benzaldehyde-based chemoproteomic probes that rapidly and reversibly engage the catalytic lysine of up to 167 protein kinases in cells and in mice. We demonstrate that probe-kinase residence time can be tuned by appending a hydroxyl group ortho to the aldehyde moiety. Chemoproteomic studies revealed that the intracellular selectivity of otherwise promiscuous salicylaldehyde-based probes increased dramatically upon washout due to distinct kinase-specific residence times. Finally, co-crystal structures of salicylaldehyde-bound AurA and Src kinases provided insight into the basis of prolonged residence time. We anticipate this approach can be applied more generally to the design of reversible covalent probes with sustained target engagement. A.38 Characterizing and Targeting the Hypoxic T Cell Surfaceome to Promote Immune Function in Cancer James R. Byrnes1, Lisa Kirkemo1, Amy M. Weeks1, James A. Wells1,2 1Department of Pharmaceutical Chemistry, University of California, San Francisco, 2Department of Cellular and Molecular Pharmacology, University of California, San Francisco Recent cancer treatment research efforts have focused on developing T cell-based immunotherapies. However, these therapies have minimal efficacy in solid tumors. One characteristic of the solid tumor microenvironment is low oxygen availability, or hypoxia. Previous studies investigating the effects of tumor hypoxia on T cell function suggest that hypoxia suppresses the anti-tumor immune response. We therefore hypothesize that hypoxia alters the T cell surface protein profile (the “surfaceome”) and T cell function in a manner consistent with a net immunosuppressive effect. Using proteomics-based approaches, we aim to identify targets for antibody tools designed to increase the anti-tumor function of hypoxic T cells. To characterize hypoxia-induced surfaceomic changes, we first examined how hypoxia affects T cell surfaceomes in vitro. Primary CD8+ or CD4+ effector T cells (Teffs), as well as immunosuppressive regulatory T cells (Tregs), were isolated from peripheral blood and expanded for two weeks in heavy or light lysine/arginine containing media to ensure complete isotope labeling. Cells were then stimulated with anti-CD3/CD28 and cultured for 3 days in either normoxia (20% O2) or hypoxia (1% O2). Surfaceomes were profiled using an established biocytin hydrazide surface glycoproteomics pipeline. LC-MS/MS of surface-enriched proteins from either CD4+ or CD8+ Teffs cultured in normoxia or hypoxia identified over 900 surface proteins, many of which were significantly repressed or induced by hypoxia. Overall, CD4+ and CD8+ Teffs responded similarly to hypoxia (R=0.7, P<0.0001, N=3 donors), but the magnitude of hypoxia-induced surfaceomic changes was greater in CD4+ versus CD8+ cells. Consistent with previous reports suggesting hypoxia is immunosuppressive, hypoxia significantly downregulated numerous Teff stimulatory proteins (cytokine receptors, co-stimulatory proteins). In addition to observing previously reported hypoxia-induced proteins, many proteins involved in protein glycosylation and carbohydrate metabolism were upregulated on hypoxic Teffs. Preliminary Treg experiments showed this T cell subtype was more resistant to hypoxia-induced surfaceomic changes than Teffs, suggesting these cells may function better in the hypoxic tumor microenvironment. Collectively, these data suggest hypoxia induces surfaceomic changes consistent with reduced Teff function. Future proteomic and functional studies will aim to validate these findings and identify new antibody-based strategies for enhancing the anti-tumor function of hypoxic Teffs. A.39 Twins Labeling Derivatization-based LC-MS/MS Strategy for absolute quantification of modified metabolites Wei Li1,2, Zunjian Zhang1,2, Fengguo Xu1,2 1Key Laboratory of Drug Quality Control and Pharmacovigilance, China Pharmaceutical University, China, 2State Key Laboratory of Natural Medicine, China Pharmaceutical University, Nanjing 210009, China Epigenetic modifications and regulation of DNA, RNA and proteins have been thoroughly investigated these days with various detection techniques. Accordingly, there are also diverse forms of modifications in metabolites such as bases, nucleosides and amino acids. However, the biological function of these modified metabolites has not been well illustrated owing to their obstacles in identification and quantification. In the current study, a sensitive liquid chromatography tandem mass spectrometry (LC-MS/MS) method was developed for the simultaneous quantification of eighteen metabolites including cytosine bases, nucleosides, amino acids and their different forms of modification. For the purpose of improving quantification sensitivity and accuracy, two structure analogs named N-dimethyl-amino naphthalene-1-sulfonyl chloride (Dns-Cl) and N-diethyl-amino naphthalene-1-sulfonyl chloride (Dens-Cl) were used for twins labeling derivatization. With the introduction of naphthalene and easily ionizable moiety of tertiary ammonium, this method notably improved the chromatography retention and detection sensitivity of these polar metabolites. In allusion to the problem of wide concentration range between these unmodified metabolites and their modified forms in biological samples, a wide range of concentration was tested and met the requirements with good accuracy and precision. The lower limit of quantification (LLOQ) was in the range of 1–100ng/mL. The validated method was successfully applied to quantify monomethyl, dimethyl, trimethyl, and acetyl modifications of metabolites and their ratios in human lung adenocarcinoma cell line A549 and its cisplatin resistant derivative A549/DDP. The results demonstrated significant reduction of 1-methyladenosine, 1-methyladenine, symmetric dimethylarginine in A549/DDP compared with A549. These modified metabolites could potentially act as biomarkers or have underlying effects on the epigenetic regulation in the process of cisplatin resistance. A.40 A Chemoproteomics Workflow for the Global Analysis of Acyl-CoA Signaling Networks Michaella J. Levy1, David C. Montgomery2, Mihaela E. Sardiu1, Abigail Thorpe2, Steve Fox3, Qishan Lin4, Thorkell Andresson3, Laurence Florens1, Michael P. Washburn1,4, Jordan L. Meier2 1Stowers Institute for Medical Research, Kansas City, MO 64110, 2Chemical Biology Laboratory, Center for Cancer Research, NCI, NIH, Frederick, MD 21702, 3Laboratory of Proteomics and Analytical Technologies, Leidos, Inc, Frederick, MD 21702, 4Department of Pathology and Laboratory Medicine, KUMC, Kansas City, KS 66160 Acyl-CoAs are essential for life. These metabolites serve as fundamental cellular building blocks in the biosynthesis of lipids, intermediates in energy production via the TCA cycle, and essential precursors for reversible protein acetylation. Each of these functions are physically dependent on acyl-CoA/protein interactions, which can regulate protein activity as enzyme cofactors, competitive or allosteric inhibitors, or through covalent modification of proteins. These examples illustrate the ability of acyl-CoA signaling to influence biology and disease. However, the global scope and selectivity of these metabolite-governed regulatory networks remains unknown. To this end, we used a previously reported resin-immobilized CoA analogue (Lys-CoA) to capture CoA-utilizing enzymes from unfractionated biological samples. To understand the global profiling or discovery of novel lysine acetyltransferase (KAT) enzymes in an unbiased, high throughput manner, we implemented Multidimensional Protein Identification Technology (MudPIT) mass spectrometry, integrating online multidimensional liquid chromatographic separation and quantitative tandem mass spectrometry analysis of complex peptide mixtures. Proteomes competed with acetyl-CoA at three concentrations were analyzed in triplicate. The 1700 proteins identified by at least 4 spectral counts in the control were separated into 8 distinct clusters by k-means clustering. Next, the binding profiles the proteins in each of the 8 clusters were determined and showed which clusters contained proteins that were susceptible to competition with acetyl-CoA. Three clusters contained proteins with profiles showing moderate, competitive, or hyper-competitive binding profiles with acetyl-CoA. Further, gene ontology analysis of the clusters revealed the highest percentage of CoA annotated proteins were in the three competed clusters. Analyzing the data using this pipeline highlighted the ability to identify CoA binding proteins in an unbiased manner from a whole proteome. We further demonstrated the strengths of this approach when proteomes were competed with various concentrations of CoA or CoA metabolite analogues at a single concentration. We term this approach CATNIP (CoA/AcetylTraNsferase Interaction Profiling) and demonstrate its ability to globally enrich and analyze acyl-CoA/protein interactions in endogenous human proteomes. Overall, our studies illustrate the power of integrating chemoproteomics and systems biology analysis methods and provide a novel resource for understanding the diverse signaling roles of acyl-CoAs in biology and disease. A.41 Proteomics insights into the role of PknG in mycobacterial physiology and pathogenesis Analía Lima1,2, Magdalena Gil1,3, Bernardina Rivera1,2, Jessica Rossello1,2, Annemarie Wehenkel3, María N. Lisa4, Pedro M. Alzari3, Rosario Durán1,2 1Institut Pasteur de Montevideo, 2Instituto de Investigaciones Biológicas Clemente Estable, Montevideo, 3Institut Pasteur, Paris, 4Instituto de Biología Molecular y Celular de Rosario, Argentina Mycobacterium tuberculosis, the causative agent of tuberculosis, is a major public health problem, being the first cause of death due to a single infectious agent. The success of M. tuberculosis as a human pathogen lies mainly in its ability to switch lifestyles to survive in the different conditions found in the host. A key player in facilitating bacterial survival within host macrophages is the Ser/Thr kinase PknG, an autophosphorylated multi-domain protein. Although the central role of PknG in mycobacterial physiology and virulence is well documented, the molecular mechanisms underlying these effects, as well as the protein partners involved, are still poorly characterized. To contribute to a better understanding of the signaling pathways of PknG we have carried out proteomics and interactomics studies. We developed a tailored interactomic approach that combines the use of different constructions of PknG with specific sequential elution steps to identify kinase mediated protein complexes in vitro, and to discriminate those interactions relying on PknG's autophosphorylated docking sites1. We report a list of kinase substrates and interactors that suggest its involvement in the regulation of a wide range of cellular processes including protein translation, nitrogen assimilation and cell wall biosynthesis. The interaction with the kinase, and/or the phosphorylation by PknG, was further confirmed for selected candidates. Moreover, we carried out quantitative proteomic approaches to compare M. tuberculosis wild type and a mutant derivative lacking PknG. The results showed that the expression of as much as 6.8% of the predicted M. tuberculosis proteome is altered in the bacteria lacking PknG, and indicated that a battery of proteins that are relevant for the adaptation to host's environment and induction of a mycobacterial persistent state are underrepresented in this strain. Altogether, our results suggest that the effect of PknG on mycobacterial survival inside macrophages could be mediated by a general metabolic fitness to the conditions encountered in the host. 1Gil et al, J Proteomics. 2019;192:321–333 A.42 Pushing the limits: Boosting sensitivity of PRM assays for the detection of very low abundant proteins in complex samples Emmanuelle Lezan1,2, Erik Ahrné1,3, Thomas Bock1, Alexander Schmidt1 1Biozentrum, University of Basel, Switzerland, 2F. Hoffmann-La Roche Ltd, Basel, Switzerland, 3Novartis, Basel, Switzerland Even though several DIA approaches have been developed to date, parallel reaction monitoring (PRM) provides the most promising approach to detect specific proteins of low abundance within a complex sample. While providing superior sensitivity, very low abundant proteins being expressed with less than a few hundred copies per cell remain challenging to confidently detect and quantify in higher eukaryotes when applying a standard PRM setup. Here, we evaluated the impact of several critical MS parameters on PRM sensitivity and compared the data to other DIA approaches. We performed a dilution series experiment comprising twenty unmodified synthetic heavy labeled peptides spiked into a complex human cell digest to determine limits of detection (LOD), quantification (LOQ) and identification (LOI) of the different methodologies. We further employed different parent ion mass windows, resolutions and ion fill times and determined their impact on LOD, LOQ and LOI. For shotgun and SWATH/HRM LC-MS analysis we identified detection limits of about 1 fmol on column. As expected, PRM analysis provided the highest sensitivity with LODs being in the high attomole range using standard settings (resolution 30,000, fill time 50 ms). We next evaluated the impact of different parent isolation mass windows on LODs. In the initial PRM studies, a mass window of 2 Th was applied, however, for our Q Exactive HF LC-MS platform, we found smaller mass windows (0.4–0.7 Th) to considerably improve the sensitivity of our PRM assays. Interestingly, we found large mass windows, like utilized in SWATH/HRM-MS, to have a considerable negative impact on detection limits. We further evaluated the impact of increased resolution and fill time of the Orbitrap analyzer and found both parameters to boost overall sensitivity of the PRM assays up to 20-fold. This allowed peptide monitoring in the low attomole range within this complex human sample, albeit with a reduced number of peptide targets of around 10. We finally demonstrate the power of this highly sensitive PRM assays by robust quantification of centrosomal proteins that are expressed at extremely low levels and that were not identified in previous large-scale LC-MS studies. A.43 Discovery of a common target of natural products through combination of chemical genomics and proteomics Haijun Guo1,2, Yang Yang1, Jieren Deng1, Mankin Wong1,2, Qian Zhao1,2 1Department of Applied Biology and Chemical Technology, The Hong Kong Polytechnic University, HK SAR, 2State Key Laboratory of Chemical Biology and Drug Discovery, Hong Kong SAR, China Chemproteomics that seeks to design small molecule probes to study protein function, is a powerful tool to understand the binding targets and related mechanisms of functional small molecules. As challenges lie in probe synthesis as well as throughput of target identification and validation, an approach that offers high efficiency without extensive synthesis is desired. We have developed a new method by introducing chemical genomics into chemical proteomics-based target identification. First, natural products Cel, WA, Au were classified to have similar effects on gene expression patterns by using the chemical genomics platform L1000. Next, we synthesized a chemical probe C1a based on Cel structure as it is the most well studied and easiest for chemical derivatization among all three natural products. As we expected, probe C1a remained the same biological activities of its parent compound Cel, including inhibition on cancer cell proliferation and anti-inflammation. By using C1a, we were able to monitor the cellular localization of Cel as well as its binding proteins. In a competition experiment where WA and AU served as competitors, a common protein target of all the three natural products was identified, which may also explained their biological activities in common. In summary, our study revealed a novel protein target of several natural products, indicating that the similarities in genomic profiles could result from common binding targets. Meanwhile, the novel method that we have developed through combining chemical genomics with proteomics will be useful for multiplexing target identification and mechanism studies of structurally distinct small molecules. A.44 Developing a reversible covalent protein/peptide capture technology for low abundance proteomics Brendan M. Floyd1, Cecil J. Howard II1, Jagannath Swaminathan1, James L. Reuther1, Edward M. Marcotte1, Eric V. Anslyn1 1University of Texas at Austin A major challenge in proteomics is the handling of extremely low abundance samples spurring the development of new sample handling technologies. Here we present the development of a bead-based covalent capture method for low abundance sample handling. This method uses a physiological pH covalent reaction to specifically capture proteins and peptides by the amino terminus. Attachment of the reactive agent to a solid-support has allowed for capture and manipulation of linked peptides and proteins including proteolysis and covalent modification. Peptides can subsequently be released with high efficiency and analyzed using mass spectrometry or single-molecule protein sequencing. This new method offers a novel way to tackle the contemporary proteomics problem of low abundance sample handling. JS, EMM, and EVA are cofounders and hold equity of Erisyon, Inc.",2019-08-13 +,In Case You Haven't Heard…,"Among the many breathtaking changes the federal government is making because of COVID‐19 (more methadone take‐homes, video‐only buprenorphine induction) is the recent announcement by the Office of Inspector General (OIG) of the Department of Health and Human Services that Medicare beneficiaries be provided more information about methadone, buprenorphine and naloxone, in a harm‐reduction‐style approach to reducing overdoses among the older population. In a data brief released last month, the OIG notes that people with opioid use disorder (OUD) could be particularly hard hit by COVID‐19, because of increased risk of respiratory disease, and that most Medicare beneficiaries at serious risk of opioid misuse or overdose in 2017 received high amounts of opioids the following year. However, only a quarter received a prescription for naloxone, which reverses opioid overdoses. In addition, only 7% of beneficiaries who were at serious risk in 2017 who were diagnosed with an OUD received a medication‐assisted treatment drug, “possibly because of challenges that beneficiaries have in accessing prescribers.” For the policy brief, go to https://oig.hhs.gov/oei/reports/oei‐02‐19‐00130.asp.",2020-05-29 +34900127,MIKB: A manually curated and comprehensive knowledge base for myocardial infarction.,"Myocardial infarction knowledge base (MIKB; http://www.sysbio.org.cn/mikb/; latest update: December 31, 2020) is an open-access and manually curated database dedicated to integrating knowledge about MI to improve the efficiency of translational MI research. MIKB is an updated and expanded version of our previous MI Risk Knowledge Base (MIRKB), which integrated MI-related risk factors and risk models for providing help in risk assessment or diagnostic prediction of MI. The updated MIRKB includes 9701 records with 2054 single factors, 209 combined factors, 243 risk models, 37 MI subtypes and 3406 interactions between single factors and MIs collected from 4817 research articles. The expanded functional module, i.e. MIGD, is a database including not only MI associated genetic variants, but also the other multi-omics factors and the annotations for their functional alterations. The goal of MIGD is to provide a multi-omics level understanding of the molecular pathogenesis of MI. MIGD includes 1782 omics factors, 28 MI subtypes and 2347 omics factor-MI interactions as well as 1253 genes and 6 chromosomal alterations collected from 2647 research articles. The functions of MI associated genes and their interaction with drugs were analyzed. MIKB will be continuously updated and optimized to provide precision and comprehensive knowledge for the study of heterogeneous and personalized MI.",2021-11-16 +33084893,crisprSQL: a novel database platform for CRISPR/Cas off-target cleavage assays.,"With ongoing development of the CRISPR/Cas programmable nuclease system, applications in the area of in vivo therapeutic gene editing are increasingly within reach. However, non-negligible off-target effects remain a major concern for clinical applications. Even though a multitude of off-target cleavage datasets have been published, a comprehensive, transparent overview tool has not yet been established. Here, we present crisprSQL (http://www.crisprsql.com), an interactive and bioinformatically enhanced collection of CRISPR/Cas9 off-target cleavage studies aimed at enriching the fields of cleavage profiling, gene editing safety analysis and transcriptomics. The current version of crisprSQL contains cleavage data from 144 guide RNAs on 25,632 guide-target pairs from human and rodent cell lines, with interaction-specific references to epigenetic markers and gene names. The first curated database of this standard, it promises to enhance safety quantification research, inform experiment design and fuel development of computational off-target prediction algorithms.",2021-01-01 +33010170,BiG-FAM: the biosynthetic gene cluster families database.,"Computational analysis of biosynthetic gene clusters (BGCs) has revolutionized natural product discovery by enabling the rapid investigation of secondary metabolic potential within microbial genome sequences. Grouping homologous BGCs into Gene Cluster Families (GCFs) facilitates mapping their architectural and taxonomic diversity and provides insights into the novelty of putative BGCs, through dereplication with BGCs of known function. While multiple databases exist for exploring BGCs from publicly available data, no public resources exist that focus on GCF relationships. Here, we present BiG-FAM, a database of 29,955 GCFs capturing the global diversity of 1,225,071 BGCs predicted from 209,206 publicly available microbial genomes and metagenome-assembled genomes (MAGs). The database offers rich functionalities, such as multi-criterion GCF searches, direct links to BGC databases such as antiSMASH-DB, and rapid GCF annotation of user-supplied BGCs from antiSMASH results. BiG-FAM can be accessed online at https://bigfam.bioinformatics.nl.",2021-01-01 +34882796,Structural amyloid plaque polymorphism is associated with distinct lipid accumulations revealed by trapped ion mobility mass spectrometry imaging.,"Understanding of Alzheimer's disease (AD) pathophysiology requires molecular assessment of how key pathological factors, specifically amyloid β (Aβ) plaques, influence the surrounding microenvironment. Here, neuronal lipids have been implicated in Aβ plaque pathology, though the lipid microenvironment in direct proximity to Aβ plaques is still not fully resolved. A further challenge is the microenvironmental molecular heterogeneity, across structurally polymorphic Aβ features, such as diffuse, immature, and mature, fibrillary aggregates, whose resolution requires the integration of advanced, multimodal chemical imaging tools. Herein, we used matrix-assisted laser desorption/ionization trapped ion mobility spectrometry time-of-flight based mass spectrometry imaging (MALDI TIMS TOF MSI) in combination with hyperspectral confocal microscopy to probe the lipidomic microenvironment associated with structural polymorphism of Aβ plaques in transgenic Alzheimer's disease mice (tgAPPSWE ). Using on tissue and ex situ validation, TIMS MS/MS facilitated unambiguous identification of isobaric lipid species that showed plaque pathology-associated localizations. Integrated multivariate imaging data analysis revealed multiple, Aβ plaque-enriched lipid patterns for gangliosides (GM), phosphoinositols (PI), phosphoethanolamines (PE), and phosphatidic acids (PA). Conversely, sulfatides (ST), cardiolipins (CL), and polyunsaturated fatty acid (PUFA)-conjugated phosphoserines (PS), and PE were depleted at plaques. Hyperspectral amyloid imaging further delineated the unique distribution of PA and PE species to mature plaque core regions, while PI, LPI, GM2 and GM3 lipids localized to immature Aβ aggregates present within the periphery of Aβ plaques. Finally, we followed AD pathology-associated lipid changes over time, identifying plaque- growth and maturation to be characterized by peripheral accumulation of PI (18:0/22:6). Together, these data demonstrate the potential of multimodal imaging approaches to overcome limitations associated with conventional advanced MS imaging applications. This allowed for the differentiation of both distinct lipid components in a complex micro-environment as well as their correlation to disease-relevant amyloid plaque polymorphs. Cover Image for this issue: https://doi.org/10.1111/jnc.15390.",2021-12-26 +31133849,"The NCATS BioPlanet - An Integrated Platform for Exploring the Universe of Cellular Signaling Pathways for Toxicology, Systems Biology, and Chemical Genomics.","Chemical genomics aims to comprehensively define, and ultimately predict, the effects of small molecule compounds on biological systems. Chemical activity profiling approaches must consider chemical effects on all pathways operative in mammalian cells. To enable a strategic and maximally efficient chemical profiling of pathway space, we have created the NCATS BioPlanet, a comprehensive integrated pathway resource that incorporates the universe of 1,658 human pathways sourced from publicly available, manually curated sources, which have been subjected to thorough redundancy and consistency cross-evaluation. BioPlanet supports interactive browsing, retrieval, and analysis of pathways, exploration of pathway connections, and pathway search by gene targets, category, and availability of corresponding bioactivity assay, as well as visualization of pathways on a 3-dimensional globe, in which the distance between any two pathways is proportional to their degree of gene component overlap. Using this resource, we propose a strategy to identify a minimal set of 362 biological assays that can interrogate the universe of human pathways. The NCATS BioPlanet is a public resource, which will be continually expanded and updated, for systems biology, toxicology, and chemical genomics, available at http://tripod.nih.gov/bioplanet/.",2019-04-26 +34175927,TimeCycle: Topology Inspired MEthod for the Detection of Cycling Transcripts in Circadian Time-Series Data.,"

Motivation

The circadian rhythm drives the oscillatory expression of thousands of genes across all tissues. The recent revolution in high-throughput transcriptomics, coupled with the significant implications of the circadian clock for human health, has sparked an interest in circadian profiling studies to discover genes under circadian control.

Result

We present TimeCycle: a topology-based rhythm detection method designed to identify cycling transcripts. For a given time-series, the method reconstructs the state space using time-delay embedding, a data transformation technique from dynamical systems theory. In the embedded space, Takens' theorem proves that the dynamics of a rhythmic signal will exhibit circular patterns. The degree of circularity of the embedding is calculated as a persistence score using persistent homology, an algebraic method for discerning the topological features of data. By comparing the persistence scores to a bootstrapped null distribution, cycling genes are identified. Results in both synthetic and biological data highlight TimeCycle's ability to identify cycling genes across a range of sampling schemes, number of replicates, and missing data. Comparison to competing methods highlights their relative strengths, providing guidance as to the optimal choice of cycling detection method.

Availability

A fully documented open-source R package implementing TimeCycle is available at: https://nesscoder.github.io/TimeCycle/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-27 +33835435,Modeling and Predicting RNA Three-Dimensional Structures.,"Modeling the three-dimensional structure of RNAs is a milestone toward better understanding and prediction of nucleic acids molecular functions. Physics-based approaches and molecular dynamics simulations are not tractable on large molecules with all-atom models. To address this issue, coarse-grained models of RNA three-dimensional structures have been developed. In this chapter, we describe a graphical modeling based on the Leontis-Westhof extended base pair classification. This representation of RNA structures enables us to identify highly conserved structural motifs with complex nucleotide interactions in structure databases. We show how to take advantage of this knowledge to quickly predict three-dimensional structures of large RNA molecules and present the RNA-MoIP web server (http://rnamoip.cs.mcgill.ca) that streamlines the computational and visualization processes. Finally, we show recent advances in the prediction of local 3D motifs from sequence data with the BayesPairing software and discuss its impact toward complete 3D structure prediction.",2021-01-01 +33084889,KLIFS: an overhaul after the first 5 years of supporting kinase research.,"Kinases are a prime target of drug development efforts with >60 drug approvals in the past two decades. Due to the research into this protein family, a wealth of data has been accumulated that keeps on growing. KLIFS-Kinase-Ligand Interaction Fingerprints and Structures-is a structural database focusing on how kinase inhibitors interact with their targets. The aim of KLIFS is to support (structure-based) kinase research through the systematic collection, annotation, and processing of kinase structures. Now, 5 years after releasing the initial KLIFS website, the database has undergone a complete overhaul with a new website, new logo, and new functionalities. In this article, we start by looking back at how KLIFS has been used by the research community, followed by a description of the renewed KLIFS, and conclude with showcasing the functionalities of KLIFS. Major changes include the integration of approved drugs and inhibitors in clinical trials, extension of the coverage to atypical kinases, and a RESTful API for programmatic access. KLIFS is available at the new domain https://klifs.net.",2021-01-01 +33645879,Biochemical and structural characterization of a novel 4-O-α-l-rhamnosyl-β-d-glucuronidase from Fusarium oxysporum.,"In this study, we have isolated the novel enzyme 4-O-α-l-rhamnosyl-β-d-glucuronidase (FoBGlcA), which releases α-l-rhamnosyl (1→4) glucuronic acid from gum arabic (GA), from Fusarium oxysporum 12S culture supernatant, and for the first time report an enzyme with such catalytic activity. The gene encoding FoBGlcA was cloned and expressed in Pichia pastoris. When GA was subjected to the recombinant enzyme, > 95% of the l-rhamnose (Rha) and d-glucuronic acid in the substrate were released, which indicates that almost all Rha binds to the glucuronic acid at the end of the GA side chains. The crystal structure of FoBGlcA was determined using a single-wavelength anomalous dispersion at 1.51 Å resolution. FoBGlcA consisted of an N-terminal (β/α)8 -barrel domain and a C-terminal antiparallel β-sheet domain. This configuration is characteristic of glycoside hydrolase (GH) family 79 proteins. A structural similarity search showed that FoBGlcA mostly resembled GH79 β-d-glucuronidase (AcGlcA79A) of Acidobacterium capsulatum; however, the root-mean-square deviation value was 3.2 Å, indicating that FoBGlcA has a high structural divergence. FoBGlcA had a low sequence identity with AcGlcA79A (19%) and differed from other GH79 β-glucuronidases. The structures of FoBGlcA and AcGlcA79A also differed in terms of the loop structure location near subsite -2 of their catalytic sites, which may account for the unique substrate specificity of FoBGlcA. The amino acid residues involved in the catalytic activity of this enzyme were determined by evaluating the activity levels of various mutant enzymes based on the crystal structure analysis of the FoBGlcA reaction product complex. DATABASE: Atomic coordinates and structure factors (codes 7DFQ and 7DFS) have been deposited in the Protein Data Bank (http://wwpdb.org/).",2021-03-11 +31415755,The Landscape of Genetic Content in the Gut and Oral Human Microbiome.,"Despite substantial interest in the species diversity of the human microbiome and its role in disease, the scale of its genetic diversity, which is fundamental to deciphering human-microbe interactions, has not been quantified. Here, we conducted a cross-study meta-analysis of metagenomes from two human body niches, the mouth and gut, covering 3,655 samples from 13 studies. We found staggering genetic heterogeneity in the dataset, identifying a total of 45,666,334 non-redundant genes (23,961,508 oral and 22,254,436 gut) at the 95% identity level. Fifty percent of all genes were ""singletons,"" or unique to a single metagenomic sample. Singletons were enriched for different functions (compared with non-singletons) and arose from sub-population-specific microbial strains. Overall, these results provide potential bases for the unexplained heterogeneity observed in microbiome-derived human phenotypes. One the basis of these data, we built a resource, which can be accessed at https://microbial-genes.bio.",2019-08-01 +27899570,Mouse Genome Database (MGD)-2017: community knowledge resource for the laboratory mouse.,"The Mouse Genome Database (MGD: http://www.informatics.jax.org) is the primary community data resource for the laboratory mouse. It provides a highly integrated and highly curated system offering a comprehensive view of current knowledge about mouse genes, genetic markers and genomic features as well as the associations of those features with sequence, phenotypes, functional and comparative information, and their relationships to human diseases. MGD continues to enhance access to these data, to extend the scope of data content and visualizations, and to provide infrastructure and user support that ensures effective and efficient use of MGD in the advancement of scientific knowledge. Here, we report on recent enhancements made to the resource and new features.",2016-11-28 +34569894,Experiences and Challenges in the Role as Peer Support Workers in a Swedish Mental Health Context - An Interview Study.,"The focus on recovery within psychiatric care is increasing, where peer support may play a pivotal role. Previous research shows both mixed and promising results in terms of beneficial outcomes for patients and peer support workers (PSW). The study's aim was to investigate PSW' experiences of their professional role and associated relationships with healthcare staff and patients. Semi-structured in-depth interviews were conducted with 10 PSW. Data was analyzed with content analysis. Three themes were constructed; ""Experience of stigma"", ""Authenticity and balance in the patient relationship"" and ""Opportunities and setbacks in the team"". Challenges included stigmatization, loyalty conflicts, lack of a clear job description and feelings of insecurity and disinterest among other staff. However, the peer support role was perceived as deeply meaningful. The peer support role comes with challenges and opportunities for the PSW, and potentially for the patients and the surrounding work team. Further research is needed to illuminate the value of peer support for patients, PSW and healthcare staff, and potential barriers and facilitators to the integration of peer support within psychiatric care.Supplemental data for this article is available online at https://doi.org/10.1080/01612840.2021.1978596 .",2021-09-27 +34870567,PTSD Symptoms and Hazardous Drinking Indicators among Trauma-Exposed Sexual Minority Women during Heightened Societal Stress.,"Trauma-exposed sexual minority women (SMW) are at elevated risk of posttraumatic stress disorder (PTSD) and hazardous drinking compared to trauma-exposed heterosexual women. To understand whether these problems might be exacerbated during times of elevated societal stress, we collected data from a New York-based sample of trauma-exposed SMW between April 2020 and August 2020, a period of notable, compounding societal stressors, including: (a) living in or near one of the first epicenters of the coronavirus disease 2019 (COVID-19) epidemic in the United States and (b) living through multiple high-profile occurrences of racism-related police violence and subsequent racial unrest. SMW (n = 68) completed online self-report questionnaires related to trauma, PTSD symptoms, and alcohol use, and a subset (n = 29) completed semi-structured qualitative interviews. PsycINFO was searched with terms related to SMW, PTSD, and alcohol use to identify studies with samples of SMW from articles published within the last 10 years to which we could compare our sample; this produced nine studies. Welch's t-tests and Chi-square analyses revealed that SMW within our sample reported significantly higher PTSD symptom severity, probable PTSD, and hazardous drinking indicators (i.e., alcohol use disorder and heavy episodic drinking) between April 2020 and August 2020 compared to similar samples (i.e., trauma-exposed SMW and general samples of SMW) assessed previously. Qualitative reports also indicated that the societal stressors of 2020 contributed to mental and behavioral health concerns. These results underscore the need for integrated PTSD and alcohol use prevention and intervention efforts for trauma-exposed SMW during times of heightened societal stress.Supplemental data for this article is available online at https://doi.org/10.1080/08964289.2021.2006132 .",2021-12-06 +34142845,"First report of Xanthomonas campestris pv. campestris as the causal agent of necrotic leaf spot in Phaseolus vulgaris at Puebla, Mexico. ","Beans are the most cultivated legume in the world. In Mexico, it is the second most important crop after corn (FAO 2020; SIAP 2020). Bean plants ""Flor de Mayo M38"" variety were affected by a foliar disease during the agricultural cycle 2019 in Puebla-Mexico (19°02'46.6"" LN and 98°05'15.6"" LO). Necrotic V- shaped lesions were observed on the margins of the leaves surrounded by yellow halos followed by foliar necrosis, affecting 40% of the crop. In Mexico this variety of cultivars is in great demand for local consumption and generates income in foreign currency (Castellanos et al. 1997). Sampling was carried out on 50 plants ""Flor de Mayo M38"" variety, with necrotic leaf symptoms from ten plots of one hectare. Samples were cut into pieces (5 mm), disinfested with 1% hypochlorite 3 min, and washed with sterile distilled water. Subsequently, samples were dried on sterile paper and placed on Petri plates containing yeast extract calcium carbonate dextrose agar (YDC) medium and kept at 36°C for 3 days. Colonies of ten typical bacteria isolated from all symptomatic plants were Gram (-), small and uniform in size with rounded edges, yellow, convex with entire borders and mucoid appearance on YDC. Bacteria did not grow on 0.1% triphenyl tetrazolium chloride amended casamino acid, peptone, and glucose medium (CPG). Biochemical tests showed that isolates did not reduce nitrate to nitrites, had positive catalase and starch hydrolysis, while the Kovac oxidase test was negative (Schaad and White 1974). Genus identity of the representative isolate Xcf1-APJR, was confirmed by 16S rRNA encoding gene partial sequencing, using universal primers 518F (5'-CCAGCAGCCGCGGTAATACG-3') and 800R (5'-TACCAGGGTATCTAATCC-3') (Halim et al. 2020). BLASTn alignments against the nucleotide collection were 100% identical to Xanthomonas sequences including Xanthomonas campestris pv. campestris strains NZ_AP019684.1, CP025750.1, and MN108237.1. The 1,418 bp sequence was deposited in the GenBank database under accession number MT645246. The identification of species/pathovar was accomplished by serological methods using a polyclonal antiserum specific for X. campestris pv. campestris (Popovic ́ et al. 2013) with the DAS-ELISA commercial kit (catalog number 07122C/096, LOEWE Biochemica GmbH, Germany). The pathogenicity test was carried out on 50 healthy bean plants from the ""Flor de Mayo M38"" variety. Bacterial culture incubated at 28°C for 48 h in YDC medium was used to prepare the bacterial suspension (108 CFU mL-1). The first two lower leaves of 30-day-old plants were inoculated by sprinkling. Ten plants sprayed with sterile distilled water were used as negative control. All plants were kept for 20 days in greenhouse at 18-26°C and relative humidity of 60%. After seven days, chlorotic lesions developed on all inoculated plants that became necrotic from 14 days after inoculation (dai). Necrotic leaf spots merged at 14 dai to form necrotic areas of more than 20 mm in diameter, reaching total necrosis of the leaf tissue at 20 dai and were similar to the symptoms observed in the field. Koch's postulates were confirmed by the reisolation of Xcf1-APJR strain, which presented the same colony morphology, partial sequence, and polyclonal specific detection. This is the first report of this pathogen causing necrotic leaf spot in beans from the ""Flor de Mayo M38"" variety in Puebla-Mexico. The author(s) declare no conflict of interest. References: FAO. 2020. FAOSTAT. Food and Agriculture Data. http://www.fao.org/faostat/en/#home/. SIAP. 2020. Atlas Agroalimentario. https://www.gob.mx/siap/. Castellanos, J. Z., et al. 1997. Arch. Latinoam. Nutr. 47:163. Schaad, N. W., and White, W. C. 1974. Phytopathology. 64:876. https://doi.org/10.1094/Phyto-64-876 Halim, R. A., et al. 2020. HAYATI J. Biosciences. 27:215. https://doi.org/10.4308/hjb.27.3.215 Popovic ́, T., et al. 2013. Plant Dis. 97:418. https://doi.org/10.1094/PDIS-05-12-0506-PDN.",2021-06-18 +33304465,HARP: a database of structural impacts of systematic missense mutations in drug targets of Mycobacterium leprae.,"Computational Saturation Mutagenesis is an in-silico approach that employs systematic mutagenesis of each amino acid residue in the protein to all other amino acid types, and predicts changes in thermodynamic stability and affinity to the other subunits/protein counterparts, ligands and nucleic acid molecules. The data thus generated are useful in understanding the functional consequences of mutations in antimicrobial resistance phenotypes. In this study, we applied computational saturation mutagenesis to three important drug-targets in Mycobacterium leprae (M. leprae) for the drugs dapsone, rifampin and ofloxacin namely Dihydropteroate Synthase (DHPS), RNA Polymerase (RNAP) and DNA Gyrase (GYR), respectively. M. leprae causes leprosy and is an obligate intracellular bacillus with limited protein structural information associating mutations with phenotypic resistance outcomes in leprosy. Experimentally solved structures of DHPS, RNAP and GYR of M. leprae are not available in the Protein Data Bank, therefore, we modelled the structures of these proteins using template-based comparative modelling and introduced systematic mutations in each model generating 80,902 mutations and mutant structures for all the three proteins. Impacts of mutations on stability and protein-subunit, protein-ligand and protein-nucleic acid affinities were computed using various in-house developed and other published protein stability and affinity prediction software. A consensus impact was estimated for each mutation using qualitative scoring metrics for physicochemical properties and by a categorical grouping of stability and affinity predictions. We developed a web database named HARP (a database of Hansen's Disease Antimicrobial Resistance Profiles), which is accessible at the URL - https://harp-leprosy.org and provides the details to each of these predictions.",2020-11-19 +33742350,"Sca1+ Progenitor Cells (Ex vivo) Exhibits Differential Proteomic Signatures From the Culture Adapted Sca1+ Cells (In vitro), Both Isolated From Murine Skeletal Muscle Tissue.","Stem cell antigen-1 (Sca-1) is a glycosyl-phosphatidylinositol-anchored membrane protein that is expressed in a sub-population of muscle stem and progenitor cell types. Reportedly, Sca-1 regulates the myogenic property of myoblasts and Sca-1-/- mice exhibited defective muscle regeneration. Although the role of Sca-1 in muscle development and maintenance is well-acknowledged, molecular composition of muscle derived Sca-1+ cells is not characterized. Here, we applied a high-resolution mass spectrometry-based workflow to characterize the proteomic landscape of mouse hindlimb skeletal muscle derived Sca-1+ cells. Furthermore, we characterized the impact of the cellular microenvironments on the proteomes of Sca-1+ cells. The proteome component of freshly isolated Sca-1+ cells (ex vivo) was compared with that of Sca-1+ cells expanded in cell culture (in vitro). The analysis revealed significant differences in the protein abundances in the two conditions reflective of their functional variations. The identified proteins were enriched in various biological pathways. Notably, we identified proteins related to myotube differentiation, myotube cell development and myoblast fusion. We also identified a panel of cell surface marker proteins that can be leveraged in future to enrich Sca-1+ cells using combinatorial strategies. Comparative analysis implicated the activation of various pathways leading to increased protein synthesis under in vitro condition. We report here the most comprehensive proteome map of Sca-1+ cells that provides insights into the molecular networks operative in Sca-1+ cells. Importantly, through our work we generated the proteomic blueprint of protein abundances significantly altered in Sca-1+ cells under ex vivo and in vitro conditions. The curated data can also be visualized at https://yenepoya.res.in/database/Sca-1-Proteomics .",2021-03-19 +29088455,HCMDB: the human cancer metastasis database.,"Metastasis is the main event leading to death in cancer patients. Over the past decade, high-throughput technologies have provided genome-wide view of transcriptomic changes associated with cancer metastases. Many microarray and RNA sequencing studies have addressed metastases-related expression patterns in various types of cancer, and the number of relevant works continues to increase rapidly. These works have characterized genes that orchestrate the metastatic phenotype of cancer cells. However, these expression data have been deposited in various repositories, and efficiently analyzing these data is still difficult because of the lack of an integrated data mining platform. To facilitate the in-depth analyses of transcriptome data on metastasis, it is quite important to make a comprehensive integration of these metastases-related expression data. Here, we presented a database, HCMDB (the human cancer metastasis database, http://hcmdb.i-sanger.com/index), which is freely accessible to the research community query cross-platform transcriptome data on metastases. HCMDB is developed and maintained as a useful resource for building the systems-biology understanding of metastasis.",2018-01-01 +32849839,RIGD: A Database for Intronless Genes in the Rosaceae.,"Most eukaryotic genes are interrupted by one or more introns, and only prokaryotic genomes are composed of mainly single-exon genes without introns. Due to the absence of introns, intronless genes in eukaryotes have become important materials for comparative genomics and evolutionary biology. There is currently no cohesive database that collects intronless genes in plants into a single database, although many databases on exons and introns exist. In this study, we constructed the Rosaceae Intronless Genes Database (RIGD), a user-friendly web interface to explore and collect information on intronless genes from different plants. Six Rosaceae species, Pyrus bretschneideri, Pyrus communis, Malus domestica, Prunus persica, Prunus mume, and Fragaria vesca, are included in the current release of the RIGD. Sequence data and gene annotation were collected from different databases and integrated. The main purpose of this study is to provide gene sequence data. In addition, attribute analysis, functional annotations, subcellular localization prediction, and GO analysis are reported. The RIGD allows users to browse, search, and download data with ease. Blast and comparative analyses are also provided through this online database, which is available at http://www.rigdb.cn/.",2020-08-07 +31803189,Conservation Analysis of B-Cell Allergen Epitopes to Predict Clinical Cross-Reactivity Between Shellfish and Inhalant Invertebrate Allergens.,"Understanding and predicting an individual's clinical cross-reactivity to related allergens is a key to better management, treatment and progression of novel therapeutics for food allergy. In food allergy, clinical cross-reactivity is observed in patients reacting to unexpected allergen sources containing the same allergenic protein or antibody binding patches (epitopes), often resulting in severe allergic reactions. Shellfish allergy affects up to 2% of the world population and persists for life in most patients. The diagnosis of shellfish allergy is however often challenging due to reported clinical cross-reactivity to other invertebrates including mites and cockroaches. Prediction of cross-reactivity can be achieved utilizing an in-depth analysis of a few selected IgE-antibody binding epitopes. We combined available experimentally proven IgE-binding epitopes with informatics-based cross-reactivity prediction modeling to assist in the identification of clinical cross-reactive biomarkers on shellfish allergens. This knowledge can be translated into prevention and treatment of allergic diseases. To overcome the problem of predicting IgE cross-reactivity of shellfish allergens we developed an epitope conservation model using IgE binding epitopes available in the Immune Epitope Database and Analysis Resource (http://www.iedb.org/). We applied this method to a set of four different shrimp allergens, and successfully identified several non-cross-reactive as well as cross-reactive epitopes, which have been experimentally established to cross-react. Based on these findings we suggest that this method can be used for advanced component-resolved-diagnosis to identify patients sensitized to a specific shellfish group and distinguish from patients with extensive cross-reactivity to ingested and inhaled allergens from invertebrate sources.",2019-11-19 +34866521,"Translation, Psychometric and Concept Analysis of the Occupational Balance-Questionnaire Based on a Turkish Population.","Occupational balance is a crucial concept in occupational therapy, accepted as a key component of health and well-being. The Occupational Balance-Questionnaire (OB-Quest) is designed as a standardized instrument to assess occupational balance. This study investigated the validity and reliability of the OB-Quest Turkish, which consisted of translation, cross-cultural adaptation, and analysis psychometric properties phases. The factor structure of the OB-Quest indicated a good model fit. The criterion-related validity showed a positive correlation with Beck Depression Inventory and a negative correlation with the 12-item Short Form Survey. The OB-Quest-Turkish showed questionable internal consistency and an excellent correlation between test-retest.Supplemental data for this article is available online at https://doi.org/10.1080/07380577.2021.2010160 .",2021-12-04 +31863285,A comprehensive pathway map of IL-18-mediated signalling.,"Interleukin-18 (IL-18) is a member of the IL-1 family of cytokines and was initially described as an IFN-γ-inducing factor derived from anti-CD3-stimulated T-helper (Th)1 cells. IL-18 plays a significant role in the activation of hematopoietic cell types mediating both Th1 and Th2 responses and is the primary inducer of interferon-γ in these cells. The biological activity of IL-18 is mediated through its binding to the IL-18 receptor complex and activation of nuclear factor-κB (NF-κB), culminating in the production and release of several cytokines, chemokines, and cellular adhesion molecules. In certain cell types, IL-18 also activates mitogen-activated protein kinases (MAPKs) and phosphoinositide 3-kinase/ AKT serine/threonine kinase (PI3K/AKT) signaling modules leading to the production and release of proinflammatory cytokines. IL-18-mediated signaling acts as one of the vital components of the immunomodulatory cytokine networks involved in host defense, inflammation, and tissue regeneration. Albeit its biomedical importance, a comprehensive resource of IL-18 mediated signaling pathway is currently lacking. In this study, we report on the development of an integrated pathway map of IL-18/IL-18R signaling. The pathway map was developed through literature mining from published literature based on manual curation guidelines adapted from NetPath and includes information on 16 protein-protein interaction events, 38 enzyme-catalysis events, 12 protein translocation events, 26 activations/inhibition events, transcriptional regulators, 230 gene regulation events and 84 induced protein expression events. The IL-18 signaling pathway can be freely accessed through the WikiPathways database (https://www.wikipathways.org/index.php/Pathway:WP4754).",2019-12-20 +33746036,Alkaptonuria in Turkey: Clinical and molecular characteristics of 66 patients.,"Alkaptonuria (AKU) is an inborn error of metabolism caused by the deficiency of homogentisate 1,2-dioxygenase (HGD) as a result of a defect in the HGD gene. HGD enzyme deficiency results in accumulation of homogentisic acid (HGA) in the body, which in turn leads to multisystemic clinical symptoms. The present study aimed to investigate the presenting symptoms, age at diagnosis, and clinical and genetic characteristics of AKU patients followed-up in different centers in Turkey. In this cross-sectional, multicenter, descriptive study, medical records of 66 AKU patients were retrospectively evaluated. Patients' data regarding demographic, clinical and genetic characteristics were recorded. HGD database (http://hgddatabase.cvtisr.sk/) was used to identify HGD gene variants. Of the patients, 37 (56.1%) presented with isolated dark urine and 29 (43.9%) were diagnosed based on the clinical symptoms or family screening. One of these patients was on follow-up for 2 years due to Parkinsonism and was diagnosed with AKU on further analyses. Signs of ochronosis such as joint pain, low back pain and renal stones developed in childhood in 7 patients. Eight patients were diagnosed with depression via psychiatric evaluation. There were 14 (21.2%) patients operated on for ochronosis. The most frequent mutation observed in the patients was c.175delA, which was followed by c.674G > A and c.1007-2A > T mutations. Four novel mutations (c.189G > A, c.549+1G > T, c.1188+1G > A, and c.334 T > G) were identified in the patients included in the study. In addition to the known signs such as dark urine and skin pigmentation, symptoms involving different systems such as neurological findings and depression can also be encountered in AKU patients. The presence of a change in urine color needs to be questioned in patients presenting with different symptoms such as arthralgia/arthritis, renal stones or low-back pain, particularly in childhood, when skin ochronosis is not pronounced, and further examination should be performed.",2021-03-18 +34779073,PDBsum extras: SARS-CoV-2 and AlphaFold models.,"The PDBsum web server provides structural analyses of the entries in the Protein Data Bank (PDB). Two recent additions are described here. The first is the detailed analysis of the SARS-CoV-2 virus protein structures in the PDB. These include the variants of concern, which are shown both on the sequences and 3D structures of the proteins. The second addition is the inclusion of the available AlphaFold models for human proteins. The pages allow a search of the protein against existing structures in the PDB via the Sequence Annotated by Structure (SAS) server, so one can easily compare the predicted model against experimentally determined structures. The server is freely accessible to all at http://www.ebi.ac.uk/pdbsum.",2021-11-24 +32396365,"A Sectioning and Database Enrichment Approach for Improved Peptide Spectrum Matching in Large, Genome-Guided Protein Sequence Databases.","Multiomics approaches focused on mass spectrometry (MS)-based data, such as metaproteomics, utilize genomic and/or transcriptomic sequencing data to generate a comprehensive protein sequence database. These databases can be very large, containing millions of sequences, which reduces the sensitivity of matching tandem mass spectrometry (MS/MS) data to sequences to generate peptide spectrum matches (PSMs). Here, we describe and evaluate a sectioning method for generating an enriched database for those protein sequences that are most likely present in the sample. Our evaluation demonstrates how this method helps to increase the sensitivity of PSMs while maintaining acceptable false discovery rate statistics-offering a flexible alternative to traditional large database searching, as well as previously described two-step database searching methods for large sequence database applications. Furthermore, implementation in the Galaxy platform provides access to an automated and customizable workflow for carrying out the method. Additionally, the results of this study provide valuable insights into the advantages and limitations offered by available methods aimed at addressing challenges of genome-guided, large database applications in proteomics. Relevant raw data has been made available at https://zenodo.org/ using data set identifier ""3754789"" and https://arcticdata.io/catalog using data set identifier ""A2VX06340"".",2020-05-26 +34713102,A Real-Time Wearable System for Monitoring Vital Signs of COVID-19 Patients in a Hospital Setting.,"The challenges presented by the Coronavirus disease 2019 (COVID-19) pandemic to the National Health Service (NHS) in the United Kingdom (UK) led to a rapid adaptation of infection disease protocols in-hospital. In this paper we report on the optimisation of our wearable ambulatory monitoring system (AMS) to monitor COVID-19 patients on isolation wards. A wearable chest patch (VitalPatch®, VitalConnect, United States of America, USA) and finger-worn pulse oximeter (WristOx2® 3150, Nonin, USA) were used to estimate and transmit continuous Heart Rate (HR), Respiratory Rate (RR), and peripheral blood Oxygen Saturation (SpO2) data from ambulatory patients on these isolation wards to nurse bays remote from these patients, with a view to minimising the risk of infection for nursing staff. Our virtual High-Dependency Unit (vHDU) system used a secure web-based architecture and protocols (HTTPS and encrypted WebSockets) to transmit the vital-sign data in real time from wireless Android tablet devices, operating as patient data collection devices by the bedside in the isolation rooms, into the clinician dashboard interface available remotely via any modern web-browser. Fault-tolerant software strategies were used to reconnect the wearables automatically, avoiding the need for nurses to enter the isolation ward to re-set the patient monitoring equipment. The remote dashboard also displayed the vital-sign observations recorded by the nurses, using a separate electronic observation system, allowing them to review both sources of vital-sign data in one integrated chart. System usage was found to follow the trend of the number of local COVID-19 infections during the first wave of the pandemic in the UK (March to June 2020), with almost half of the patients on the isolation ward monitored with wearables during the peak of hospital admissions in the local area. Patients were monitored for a median of 31.5 [8.8, 75.4] hours, representing 88.1 [62.5, 94.5]% of the median time they were registered in the system. This indicates the system was being used in the isolation ward during this period. An updated version of the system has now also been used throughout the second and third waves of the pandemic in the UK.",2021-09-07 +32299846,Pilot Study of Return of Genetic Results to Patients in Adult Nephrology.,"

Background and objectives

Actionable genetic findings have implications for care of patients with kidney disease, and genetic testing is an emerging tool in nephrology practice. However, there are scarce data regarding best practices for return of results and clinical application of actionable genetic findings for kidney patients.

Design, setting, participants, & measurements

We developed a return of results workflow in collaborations with clinicians for the retrospective recontact of adult nephrology patients who had been recruited into a biobank research study for exome sequencing and were identified to have medically actionable genetic findings.

Results

Using this workflow, we attempted to recontact a diverse pilot cohort of 104 nephrology research participants with actionable genetic findings, encompassing 34 different monogenic etiologies of nephropathy and five single-gene disorders recommended by the American College of Medical Genetics and Genomics for return as medically actionable secondary findings. We successfully recontacted 64 (62%) participants and returned results to 41 (39%) individuals. In each case, the genetic diagnosis had meaningful implications for the patients' nephrology care. Through implementation efforts and qualitative interviews with providers, we identified over 20 key challenges associated with returning results to study participants, and found that physician knowledge gaps in genomics was a recurrent theme. We iteratively addressed these challenges to yield an optimized workflow, which included standardized consultation notes with tailored management recommendations, monthly educational conferences on core topics in genomics, and a curated list of expert clinicians for patients requiring extranephrologic referrals.

Conclusions

Developing the infrastructure to support return of genetic results in nephrology was resource-intensive, but presented potential opportunities for improving patient care.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_04_16_12481019.mp3.",2020-04-16 +34795910,Effect of low protein intake on acute exacerbations in mild to moderate chronic obstructive pulmonary disease: data from the 2007-2012 KNHANES.,"

Background

Several researchers have reported that the amount of protein intake is associated with lung function and airflow obstruction. However, few studies have investigated the effect of low protein intake on acute exacerbations of chronic obstructive pulmonary disease. This study aimed to investigate the effect of low protein intake on exacerbations in mild to moderate chronic obstructive pulmonary disease.

Methods

We used data obtained from the Korean National Health and Nutrition Examination Survey (KNHANES) between 2007 and 2012, linked to the National Health Insurance claims data. The clinical outcomes and the rate of exacerbation were retrospectively compared between the low protein intake group and the non-low protein intake group which was stratified by quartile categories of protein intake in 2,069 patients with mild to moderate chronic obstructive pulmonary disease.

Results

The low protein intake group was significantly associated with older age, women, never smoker, low household income, and low education level, compared with the non-low protein intake group. The low protein intake group was significantly associated with increased hospitalization (18.0% vs. 10.5%, P<0.001) and emergency department utilization (1.6±1.0 vs. 1.1±0.4, P=0.033) compared with the non-low protein intake group. In multivariate analysis, the low protein intake group was associated with hospitalization (odds ratio 1.46; 95% CI, 1.09-1.96; P=0.012). The multiple linear regression analysis revealed that the amount of protein intake was associated with FVC % predicted (β=0.048, P<0.001) and FEV1% predicted (β=0.022, P=0.015).

Conclusions

Low protein intake was associated with an increased risk of exacerbations in mild to moderate chronic obstructive pulmonary disease. The data are available at the KNHANES website (https://knhanes.cdc.go.kr).",2021-10-01 +32077475,NERDD: a web portal providing access to in silico tools for drug discovery.,"SUMMARY:The New E-Resource for Drug Discovery (NERDD) is a quickly expanding web portal focused on the provision of peer-reviewed in silico tools for drug discovery. NERDD currently hosts tools for predicting the sites of metabolism (FAME) and metabolites (GLORY) of small organic molecules, for flagging compounds that are likely to interfere with biological assays (Hit Dexter), and for identifying natural products and natural product derivatives in large compound collections (NP-Scout). Several additional models and components are currently in development. AVAILABILITY AND IMPLEMENTATION:The NERDD web server is available at https://nerdd.zbh.uni-hamburg.de. Most tools are also available as software packages for local installation.",2020-02-01 +34954794,GC-Profile 2.0: an extended web server for the prediction and visualization of CpG islands. ,"Due to the spontaneous deamination of 5'-methylcytosine into thymine, the number of CpG dinucleotides is less than expected in vertebrate genomes. Exceptionally, there are a large number of CpG dinucleotides clustered at certain genomic loci, known as CpG islands (CGIs), where CpG dinucleotides are free from methylation. Identification of CGIs is of great significance in the field of genomics and epigenetics because they can serve as important gene markers or regulatory elements. Here, GC-Profile 2.0 has been presented as a newly extended application for CGIs detection and visualization. Based on a benchmark test of assembled sequences, GC-Profile 2.0 has shown better overall performance compared with other four popular methods. In addition, cumulative CpG profile, a visualization tool of CpG content variation, is also proposed to intuitively display the change trend of CpG content. GC-Profile 2.0 is freely available at http://tubic.org/GC-Profile2. Supplementary data are available at Bioinformatics online.",2021-12-25 +32259197,Locally Informed Simulation to Predict Hospital Capacity Needs During the COVID-19 Pandemic.,"

Background

The coronavirus disease 2019 (COVID-19) pandemic challenges hospital leaders to make time-sensitive, critical decisions about clinical operations and resource allocations.

Objective

To estimate the timing of surges in clinical demand and the best- and worst-case scenarios of local COVID-19-induced strain on hospital capacity, and thus inform clinical operations and staffing demands and identify when hospital capacity would be saturated.

Design

Monte Carlo simulation instantiation of a susceptible, infected, removed (SIR) model with a 1-day cycle.

Setting

3 hospitals in an academic health system.

Patients

All people living in the greater Philadelphia region.

Measurements

The COVID-19 Hospital Impact Model (CHIME) (http://penn-chime.phl.io) SIR model was used to estimate the time from 23 March 2020 until hospital capacity would probably be exceeded, and the intensity of the surge, including for intensive care unit (ICU) beds and ventilators.

Results

Using patients with COVID-19 alone, CHIME estimated that it would be 31 to 53 days before demand exceeds existing hospital capacity. In best- and worst-case scenarios of surges in the number of patients with COVID-19, the needed total capacity for hospital beds would reach 3131 to 12 650 across the 3 hospitals, including 338 to 1608 ICU beds and 118 to 599 ventilators.

Limitations

Model parameters were taken directly or derived from published data across heterogeneous populations and practice environments and from the health system's historical data. CHIME does not incorporate more transition states to model infection severity, social networks to model transmission dynamics, or geographic information to account for spatial patterns of human interaction.

Conclusion

Publicly available and designed for hospital operations leaders, this modeling tool can inform preparations for capacity strain during the early days of a pandemic.

Primary funding source

University of Pennsylvania Health System and the Palliative and Advanced Illness Research Center.",2020-04-07 +34763529,The Role of Talker Variability in Nonnative Phonetic Learning: A Systematic Review and Meta-Analysis.,"

Purpose

High-variability phonetic training (HVPT) has been found to be effective on adult second language (L2) learning, but results are mixed in regards to the benefit of multiple talkers over single talker. This study provides a systematic review with meta-analysis to investigate the talker variability effect in nonnative phonetic learning and the factors moderating the effect.

Method

We collected studies with keyword search in major academic databases including EBSCO, ERIC, MEDLINE, ProQuest Dissertations & Theses, Elsevier, Scopus, Wiley Online Library, and Web of Science. We identified potential participant-, training-, and study-related moderators and conducted a random-effects model meta-analysis for each individual variable.

Results

On the basis of 18 studies with a total of 549 participants, we obtained a small-level summary effect size (Hedges' g = 0.46, 95% confidence interval [CI; 0.08, 0.84]) for the immediate training outcomes, which was greatly reduced (g = -0.04, 95% CI [-0.46, 0.37]) after removal of outliers and correction for publication bias, whereas the effect size for immediate perceptual gains was nearly medium (g = 0.56, 95% CI [0.13, 1.00]) compared with the nonsignificant production gains. Critically, the summary effect sizes for generalizations to new talkers (g = 0.72, 95% CI [0.15, 1.29]) and for long-term retention (g = 1.09, 95% CI [0.39, 1.78]) were large. Moreover, the training program length and the talker presentation format were found to potentially moderate the immediate perceptual gains and generalization outcomes.

Conclusions

Our study presents the first meta-analysis on the role of talker variability in nonnative phonetic training, which demonstrates the heterogeneity and limitations of research on this topic. The results highlight the need for further investigation of the influential factors and underlying mechanisms for the presence or absence of talker variability effects. Supplemental Material https://doi.org/10.23641/asha.16959388.",2021-11-11 +33151818,Clausal Density Between Ages 4 and 9 Years for the Edmonton Narrative Norms Instrument: Reference Data and Psychometric Properties.,"Purpose This study provided reference data and examined psychometric properties for clausal density (CD; i.e., number of clauses per utterance) in children between ages 4 and 9 years from the database of the Edmonton Narrative Norms Instrument (ENNI). Method Participants in the ENNI database included 300 children with typical language (TL) and 77 children with language impairment (LI) between the ages of 4;0 (years;months) and 9;11. Narrative samples were collected using a story generation task, in which children were asked to tell stories based on six picture sequences. CD was computed from the narrative samples. The split-half reliability, concurrent criterion validity, and diagnostic accuracy were evaluated for CD by age. Results CD scores increased significantly between ages 4 and 9 years in children with TL and those with LI. Children with TL produced higher CD scores than those with LI at each age level. In addition, the correlation coefficients for the split-half reliability and concurrent criterion validity of CD scores were all significant at each age level, with the magnitude ranging from small to large. The diagnostic accuracy of CD scores, as revealed by sensitivity, specificity, and likelihood ratios, was poor. Conclusions The finding on diagnostic accuracy did not support the use of CD for identifying children with LI between ages 4 and 9 years. However, given the attested reliability and validity for CD, reference data of CD from the ENNI database can be used for evaluating children's difficulties with complex syntax and monitoring their change over time. Supplemental Material https://doi.org/10.23641/asha.13172129.",2020-11-05 +30055873,PhoPepMass: A database and search tool assisting human phosphorylation peptide identification from mass spectrometry data.,"Protein phosphorylation, one of the most important protein post-translational modifications, is involved in various biological processes, and the identification of phosphorylation peptides (phosphopeptides) and their corresponding phosphorylation sites (phosphosites) will facilitate the understanding of the molecular mechanism and function of phosphorylation. Mass spectrometry (MS) provides a high-throughput technology that enables the identification of large numbers of phosphosites. PhoPepMass is designed to assist human phosphopeptide identification from MS data based on a specific database of phophopeptide masses and a multivariate hypergeometric matching algorithm. It contains 244,915 phosphosites from several public sources. Moreover, the accurate masses of peptides and fragments with phosphosites were calculated. It is the first database that provides a systematic resource for the query of phosphosites on peptides and their corresponding masses. This allows researchers to search certain proteins of which phosphosites have been reported, to browse detailed phosphopeptide and fragment information, to match masses from MS analyses with defined threshold to the corresponding phosphopeptide, and to compare proprietary phosphopeptide discovery results with results from previous studies. Additionally, a database search software is created and a ""two-stage search strategy"" is suggested to identify phosphopeptides from tandem mass spectra of proteomics data. We expect PhoPepMass to be a useful tool and a source of reference for proteomics researchers. PhoPepMass is available at https://www.scbit.org/phopepmass/index.html.",2018-07-19 +34289221,"Exploring the diversity of promoter and 5'UTR sequences in ancestral, historic and modern wheat.","A data set of promoter and 5'UTR sequences of homoeo-alleles of 459 wheat genes that contribute to agriculturally important traits in 95 ancestral and commercial wheat cultivars is presented here. The high-stringency myBaits technology used made individual capture of homoeo-allele promoters possible, which is reported here for the first time. Promoters of most genes are remarkably conserved across the 83 hexaploid cultivars used with <7 haplotypes per promoter and 21% being identical to the reference Chinese Spring. InDels and many high-confidence SNPs are located within predicted plant transcription factor binding sites, potentially changing gene expression. Most haplotypes found in the Watkins landraces and a few haplotypes found in Triticum monococcum, germplasms hitherto not thought to have been used in modern wheat breeding, are already found in many commercial hexaploid wheats. The full data set which is useful for genomic and gene function studies and wheat breeding is available at https://rrescloud.rothamsted.ac.uk/index.php/s/DMCFDu5iAGTl50u/authenticate.",2021-09-16 +34211067,Introducing the novel Cytoscape app TimeNexus to analyze time-series data using temporal MultiLayer Networks (tMLNs).,"Integrating -omics data with biological networks such as protein-protein interaction networks is a popular and useful approach to interpret expression changes of genes in changing conditions, and to identify relevant cellular pathways, active subnetworks or network communities. Yet, most -omics data integration tools are restricted to static networks and therefore cannot easily be used for analyzing time-series data. Determining regulations or exploring the network structure over time requires time-dependent networks which incorporate time as one component in their structure. Here, we present a method to project time-series data on sequential layers of a multilayer network, thus creating a temporal multilayer network (tMLN). We implemented this method as a Cytoscape app we named TimeNexus. TimeNexus allows to easily create, manage and visualize temporal multilayer networks starting from a combination of node and edge tables carrying the information on the temporal network structure. To allow further analysis of the tMLN, TimeNexus creates and passes on regular Cytoscape networks in form of static versions of the tMLN in three different ways: (i) over the entire set of layers, (ii) over two consecutive layers at a time, (iii) or on one single layer at a time. We combined TimeNexus with the Cytoscape apps PathLinker and AnatApp/ANAT to extract active subnetworks from tMLNs. To test the usability of our app, we applied TimeNexus together with PathLinker or ANAT on temporal expression data of the yeast cell cycle and were able to identify active subnetworks relevant for different cell cycle phases. We furthermore used TimeNexus on our own temporal expression data from a mouse pain assay inducing hindpaw inflammation and detected active subnetworks relevant for an inflammatory response to injury, including immune response, cell stress response and regulation of apoptosis. TimeNexus is freely available from the Cytoscape app store at https://apps.cytoscape.org/apps/TimeNexus .",2021-07-01 +34287026,Application of an in Vitro Assay to Identify Chemicals That Increase Estradiol and Progesterone Synthesis and Are Potential Breast Cancer Risk Factors.,"

Background

Established breast cancer risk factors, such as hormone replacement therapy and reproductive history, are thought to act by increasing estrogen and progesterone (P4) activity.

Objective

We aimed to use in vitro screening data to identify chemicals that increase the synthesis of estradiol (E2) or P4 and evaluate potential risks.

Method

Using data from a high-throughput (HT) in vitro steroidogenesis assay developed for the U.S. Environmental Protection Agency (EPA) ToxCast program, we identified chemicals that increased estradiol (E2-up) or progesterone (P4-up) in human H295R adrenocortical carcinoma cells. We prioritized chemicals by their activity. We compiled in vivo studies and assessments about carcinogenicity and reproductive/developmental (repro/dev) toxicity. We identified exposure sources and predicted intakes from the U.S. EPA's ExpoCast.

Results

We found 296 chemicals increased E2 (182) or P4 (185), with 71 chemicals increasing both. In vivo data often showed effects consistent with this mechanism. Of the E2- and P4-up chemicals, about 30% were likely repro/dev toxicants or carcinogens, whereas only 5-13% were classified as unlikely. However, most of the chemicals had insufficient in vivo data to evaluate their effects. Of 45 chemicals associated with mammary gland effects, and also tested in the H294R assay, 29 increased E2 or P4, including the well-known mammary carcinogen 7,12-dimethylbenz(a)anthracene. E2- and P4-up chemicals include pesticides, consumer product ingredients, food additives, and drinking water contaminants.

Discussion

The U.S. EPA's in vitro screening data identified several hundred chemicals that should be considered as potential risk factors for breast cancer because they increased E2 or P4 synthesis. In vitro data is a helpful addition to current toxicity assessments, which are not sensitive to mammary gland effects. Relevant effects on the mammary gland are often not noticed or are dismissed, including for 2,4-dichlorophenol and cyfluthrin. Fifty-three active E2-up and 59 active P4-up chemicals that are in consumer products, food, pesticides, or drugs have not been evaluated for carcinogenic potential and are priorities for study and exposure reduction. https://doi.org/10.1289/EHP8608.",2021-07-21 +34252935,OncoThreads: visualization of large-scale longitudinal cancer molecular data.,"

Motivation

Molecular profiling of patient tumors and liquid biopsies over time with next-generation sequencing technologies and new immuno-profile assays are becoming part of standard research and clinical practice. With the wealth of new longitudinal data, there is a critical need for visualizations for cancer researchers to explore and interpret temporal patterns not just in a single patient but across cohorts.

Results

To address this need we developed OncoThreads, a tool for the visualization of longitudinal clinical and cancer genomics and other molecular data in patient cohorts. The tool visualizes patient cohorts as temporal heatmaps and Sankey diagrams that support the interactive exploration and ranking of a wide range of clinical and molecular features. This allows analysts to discover temporal patterns in longitudinal data, such as the impact of mutations on response to a treatment, for example, emergence of resistant clones. We demonstrate the functionality of OncoThreads using a cohort of 23 glioma patients sampled at 2-4 timepoints.

Availability and implementation

Freely available at http://oncothreads.gehlenborglab.org. Implemented in Java Script using the cBioPortal web API as a backend.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +33245761,MoonProt 3.0: an update of the moonlighting proteins database.,"MoonProt 3.0 (http://moonlightingproteins.org) is an updated open-access database storing expert-curated annotations for moonlighting proteins. Moonlighting proteins have two or more physiologically relevant distinct biochemical or biophysical functions performed by a single polypeptide chain. Here, we describe an expansion in the database since our previous report in the Database Issue of Nucleic Acids Research in 2018. For this release, the number of proteins annotated has been expanded to over 500 proteins and dozens of protein annotations have been updated with additional information, including more structures in the Protein Data Bank, compared with version 2.0. The new entries include more examples from humans, plants and archaea, more proteins involved in disease and proteins with different combinations of functions. More kinds of information about the proteins and the species in which they have multiple functions has been added, including CATH and SCOP classification of structure, known and predicted disorder, predicted transmembrane helices, type of organism, relationship of the protein to disease, and relationship of organism to cause of disease.",2021-01-01 +,SUN-333 Burosumab Improves Bone Density in Patients with X-Linked Hypophosphatemia,"Abstract Background: X-linked hypophosphatemia (XLH) causes rickets in children and osteomalacia in adults due to lifelong renal phosphate wasting that is mediated by high circulating levels of FGF-23. Burosumab, is a recently approved fully human monoclonal antibody that blocks FGF23, thereby correcting the renal phosphate leak, improving mineral metabolism and reducing osteomalacia by 50-75% in adults [1]. Whether this results in measurable changes in skeletal mass and microarchitecture is unclear. Objective: We examined the impact of burosumab on regional bone mineral density (BMD) and trabecular bone scores (TBS) in study subjects involved in two phase III clinical trials of burosumab. Methods: In these trails subjects received burosumab 1 mg/kg every 4 weeks. Some patients received placebo for the first 6 months of one trial so we considered their month 6 data as their baseline. Most of the patients had been treated at some point in the past with calcitriol and phosphorus. DXA and TBS were obtained at baseline and then after 6, 12 and 18-24 months of drug treatment. Paired t-tests and ANOVA were performed to assess changes in L-spine BMD, Total Hip BMD and TBS. Results: 25 subjects with XLH (mean age 38.9 years, 56% female) were enrolled in these studies. Paired data were available in 23 subjects at 6 months, 15 subjects at 12 months and 18 subjects at 18-24 months. Compared to baseline, there were significant increases in L-spine BMD at all time points by paired analysis: 6 months (+6.0%, p=<0.0001), 12 months (+6.95%, p=<0.0001), 18-24 months (+6.13%, p=0.0005). Although there was no significant difference in total hip BMD at 6 months when compared to baseline, there were significant increases at 12 months (+6.72%, p=0.0005) and a further increase at 18-24 months (+10.02%, p=0.0029). When all available subjects were analyzed by one-way ANOVA, there was a significant effect of time of treatment on these regional BMD measurements. There was no change in trabecular bone score over the course of treatment. Conclusion: Treatment with burosumab is associated with a marked improvement in BMD, particularly in the hip. Since the hip is a frequent site of fracture in XLH, the effect of burosumab at this site is of considerable clinical relevance. The lack of an effect on TBS may relate to the fact that this measurement is much less sensitive to therapeutic interventions than BMD assessed by DXA. References: [1] JBMR. 2019. https://doi.org/10.1002/jbmr.3843.",2020-05-08 +31972649,Early Identification of Trauma-induced Coagulopathy: Development and Validation of a Multivariable Risk Prediction Model.,"

Objective

The aim of this study was to develop and validate a risk prediction tool for trauma-induced coagulopathy (TIC), to support early therapeutic decision-making.

Background

TIC exacerbates hemorrhage and is associated with higher morbidity and mortality. Early and aggressive treatment of TIC improves outcome. However, injured patients that develop TIC can be difficult to identify, which may compromise effective treatment.

Methods

A Bayesian Network (BN) prediction model was developed using domain knowledge of the causal mechanisms of TIC, and trained using data from 600 patients recruited into the Activation of Coagulation and Inflammation in Trauma (ACIT) study. Performance (discrimination, calibration, and accuracy) was tested using 10-fold cross-validation and externally validated on data from new patients recruited at 3 trauma centers.

Results

Rates of TIC in the derivation and validation cohorts were 11.8% and 11.0%, respectively. Patients who developed TIC were significantly more likely to die (54.0% vs 5.5%, P < 0.0001), require a massive blood transfusion (43.5% vs 1.1%, P < 0.0001), or require damage control surgery (55.8% vs 3.4%, P < 0.0001), than those with normal coagulation. In the development dataset, the 14-predictor BN accurately predicted this high-risk patient group: area under the receiver operating characteristic curve (AUROC) 0.93, calibration slope (CS) 0.96, brier score (BS) 0.06, and brier skill score (BSS) 0.40. The model maintained excellent performance in the validation population: AUROC 0.95, CS 1.22, BS 0.05, and BSS 0.46.

Conclusions

A BN (http://www.traumamodels.com) can accurately predict the risk of TIC in an individual patient from standard admission clinical variables. This information may support early, accurate, and efficient activation of hemostatic resuscitation protocols.",2021-12-01 +29126312,MeT-DB V2.0: elucidating context-specific functions of N6-methyl-adenosine methyltranscriptome.,"Methyltranscriptome is an exciting new area that studies the mechanisms and functions of methylation in transcripts. A knowledge base with the systematic collection and curation of context specific transcriptome-wide methylations is critical for elucidating their biological functions as well as for developing bioinformatics tools. Since its inception in 2014, the Met-DB (Liu, H., Flores, M.A., Meng, J., Zhang, L., Zhao, X., Rao, M.K., Chen, Y. and Huang, Y. (2015) MeT-DB: a database of transcriptome methylation in mammalian cells. Nucleic Acids Res., 43, D197-D203), has become an important resource for methyltranscriptome, especially in the N6-methyl-adenosine (m6A) research community. Here, we report Met-DB v2.0, the significantly improved second version of Met-DB, which is entirely redesigned to focus more on elucidating context-specific m6A functions. Met-DB v2.0 has a major increase in context-specific m6A peaks and single-base sites predicted from 185 samples for 7 species from 26 independent studies. Moreover, it is also integrated with a new database for targets of m6A readers, erasers and writers and expanded with more collections of functional data. The redesigned Met-DB v2.0 web interface and genome browser provide more friendly, powerful, and informative ways to query and visualize the data. More importantly, MeT-DB v2.0 offers for the first time a series of tools specifically designed for understanding m6A functions. Met-DB V2.0 will be a valuable resource for m6A methyltranscriptome research. The Met-DB V2.0 database is available at http://compgenomics.utsa.edu/MeTDB/ and http://www.xjtlu.edu.cn/metdb2.",2018-01-01 +34160753,"Scipion PKPD: an Open-Source Platform for Biopharmaceutics, Pharmacokinetics and Pharmacodynamics Data Analysis.","

Purpose

Biopharmaceutics examines the interrelationship of the drug's physical/chemical properties, the dosage form (drug product) in which the drug is given, and the administration route on the rate and extent of sys- temic drug absorption. Pharmacokinetics is the study of the movement of drugs in the body. It uses mathematical models to evaluate the movement of absorption, distribution, metabolism, and excretion (ADME) within an organism. Finally, Pharmacodynamics is the analysis of how these drugs af- fect that organism. Pharmacokinetics data normally comes in samples over time of the drug concentration either in plasma or in some specific tissue. Similarly, pharmacodynamics data comes normally in samples over time of some quantity of interest (biophysical quantity like temperature, blood pres- sure, etc.). The data is submitted to a non-parametric analysis, in which a description of the observed data is reported (e.g., the Area Under the Curve), or to a parametric analysis by fitting a model (normally based on differential equations) so that prediction about future events can be made. This paper aims to introduce Scipion PKPD, an open-source platform for data analysis of this kind in the three domains (Biopharmaceutics, Pharmacokinetics, and Pharmacodynamics). The platform implements the most popular models and is open to new ones. The platform provides almost 100 different high-level operations that we call protocols.

Methods

We have developed a Python module integrated into the work- flow engine Scipion. The plugin implements the numerical analysis and meta- data handling tools to address multiple problems (see Suppl. Material for a detailed list of the tasks solved).

Results

We illustrate the use of this package with an integrative exam- ple that involves all these areas.

Conclusions

We show that the package successfully addresses these kinds of analyses. Scipion PKPD is freely available at https://github. com/cossorzano/scipion-pkpd .",2021-06-23 +31612325,PlantAFP: a curated database of plant-origin antifungal peptides.,"Emerging infectious diseases (EIDs) are a severe problem caused by fungi in human and plant species across the world. They pose a worldwide threat to food security as well as human health. Fungal infections are increasing now day by day worldwide, and the current antimycotic drugs are not effective due to the emergence of resistant strains. Therefore, it is an urgent need for the finding of new plant-origin antifungal peptides (PhytoAFPs). Huge numbers of peptides were extracted from different plant species which play a protective role against fungal infection. Hundreds of plant-origin peptides with antifungal activity have already been reported. So there is a requirement of a dedicated platform which systematically catalogs plant-origin peptides along with their antifungal properties. PlantAFP database is a resource of experimentally verified plant-origin antifungal peptides, collected from research articles, patents, and public databases. The current release of PlantAFP database contains 2585 peptide entries among which 510 are unique peptides. Each entry provides comprehensive information of a peptide that includes its peptide sequence, peptide name, peptide class, length of the peptide, molecular mass, antifungal activity, and origin of peptides. Besides this primary information, PlantAFP stores peptide sequences in SMILES format. In order to facilitate the user, many tools have been integrated into this database that includes BLAST search, peptide search, SMILES search, and peptide-mapping is also included in the database. PlantAFP database is accessible at http://bioinformatics.cimap.res.in/sharma/PlantAFP/.",2019-10-14 +,The exopolysaccharide properties and structures database: EPS-DB. Application to bacterial exopolysaccharides,"The EPS Database (EPS-DB) is a web-based, platform-independent database of bacterial exopolysaccharides (EPSs) providing access to detailed structural, taxonomic, growth conditions, functional properties, genetic, and bibliographic information for EPSs. It is freely available on the Internet as a website at http://www.epsdatabase.com. Several structural data representation schemes are used following the most commonly accepted formats. This guarantees full interoperability with other structural, experimental, and functional databases in the area of glycoscience. The scientific usage of EPS-DB throughout a user-friendly interface is presented with a subsection of the database exemplified by EPSs from lactic acid bacteria.",2019-02-01 +34889651,Setting the Stage for Speech Production: Infants Prefer Listening to Speech Sounds With Infant Vocal Resonances.,"

Purpose

Current models of speech development argue for an early link between speech production and perception in infants. Recent data show that young infants (at 4-6 months) preferentially attend to speech sounds (vowels) with infant vocal properties compared to those with adult vocal properties, suggesting the presence of special ""memory banks"" for one's own nascent speech-like productions. This study investigated whether the vocal resonances (formants) of the infant vocal tract are sufficient to elicit this preference and whether this perceptual bias changes with age and emerging vocal production skills.

Method

We selectively manipulated the fundamental frequency (f0 ) of vowels synthesized with formants specifying either an infant or adult vocal tract, and then tested the effects of those manipulations on the listening preferences of infants who were slightly older than those previously tested (at 6-8 months).

Results

Unlike findings with younger infants (at 4-6 months), slightly older infants in Experiment 1 displayed a robust preference for vowels with infant formants over adult formants when f0 was matched. The strength of this preference was also positively correlated with age among infants between 4 and 8 months. In Experiment 2, this preference favoring infant over adult formants was maintained when f0 values were modulated.

Conclusions

Infants between 6 and 8 months of age displayed a robust and distinct preference for speech with resonances specifying a vocal tract that is similar in size and length to their own. This finding, together with data indicating that this preference is not present in younger infants and appears to increase with age, suggests that nascent knowledge of the motor schema of the vocal tract may play a role in shaping this perceptual bias, lending support to current models of speech development.

Supplemental material

https://doi.org/10.23641/asha.17131805.",2021-12-10 +,Supporting Single Cell RNA-seq Analysis at Harvard - A Community Approach,"Recent advances in single cell transcriptomics make it possible to examine the gene expression profiles of thousands of individual cells, providing unprecedented insights into tissue heterogeneity, development and pathogenesis. In 2015, the Harvard Chan Bioinformatics Core (http://bioinformatics.sph.harvard.edu) teamed up with the Harvard Medical School (HMS) Single Cell Core (https://iccb.med.harvard.edu/single-cell-core) to standardize data analysis for the InDrop droplet barcoding system and prepare for projected demand within the Harvard community. Here we describe our approach to building single cell analytical expertise and infrastructure through our partnership with the Single Cell Core and multiple research labs. We outline the challenges we faced and our current best practices for data analysis (quality assessment, quantitation, clustering, visualization, and differential expression). Our pipeline, implemented within the bcbio-nextgen framework (https://bcbio-nextgen.readthedocs.io/), handles multiple UMI schemes to accommodate different single cell technologies (e.g. Drop-seq, Seq-well, Bio-Rad ddSeq, etc.). We also describe our approach to managing single cell projects, with their longer analysis times, increased complexity and need for rigorous experimental design, data management, computing infrastructure and methods evaluation. All of these require close collaboration and frequent communication with the bench biologists generating the data. Due to these factors, we have expanded our bioinformatics training program to include modules on single cell RNA-seq. With this program, we hope to develop analysis expertise within the community and an understanding of the methods and intricacies inherent in the technology - ultimately leading to better designed and more successful single cell RNA-seq experiments.",2019-12-01 +34936882,A census of the lung: CellCards from LungMAP.,"The human lung plays vital roles in respiration, host defense, and basic physiology. Recent technological advancements such as single-cell RNA sequencing and genetic lineage tracing have revealed novel cell types and enriched functional properties of existing cell types in lung. The time has come to take a new census. Initiated by members of the NHLBI-funded LungMAP Consortium and aided by experts in the lung biology community, we synthesized current data into a comprehensive and practical cellular census of the lung. Identities of cell types in the normal lung are captured in individual cell cards with delineation of function, markers, developmental lineages, heterogeneity, regenerative potential, disease links, and key experimental tools. This publication will serve as the starting point of a live, up-to-date guide for lung research at https://www.lungmap.net/cell-cards/. We hope that Lung CellCards will promote the community-wide effort to establish, maintain, and restore respiratory health.",2021-12-21 +31012755,"Integration of wheelchair service provision education: current situation, facilitators and barriers for academic rehabilitation programs worldwide.","Purpose: An estimated 75 million people with disabilities need wheelchairs globally, of whom 5-15% have one. Access to an appropriate wheelchair requires rehabilitation professionals trained to provide wheelchair service. One aim of the International Society of Wheelchair Professionals (ISWP) is to promote and facilitate the integration of wheelchair service provision education into academic rehabilitation programs worldwide. To inform the development of integration strategies, the purpose of this study was to develop an in-depth global portrait of the wheelchair service provision education offered in academic rehabilitation programs, the process of its integration and the associated facilitators and barriers.Method: Semi-structured qualitative interviews were conducted with a purposive sample of 14 representatives from academic rehabilitation programs (i.e., occupational therapy, physical therapy, and prosthetics and orthotics) in 11 countries, including low, middle and upper resourced settings.Findings: Thematic data analyses identified three overarching themes. The first theme, ""impact of context"", portrays factors related to local population needs, governance and supply chain of equipment and service delivery. The second theme, ""current and planned wheelchair education"", describes the content, pedagogic approach, student evaluation and feedback process. The third theme, ""integration process"", details five states of this process.Conclusions: This study describes in-depth the wheelchair service provision education across academic rehabilitation programs and resource settings, illustrating the context-dependent nature of its integration. This understanding may assist the global community of educators in preparing future rehabilitation professionals to better serve wheelchair users. This work has informed the development of ISWP's Seating and Mobility Academic Resource Toolkit (http://smart.wheelchairnetwork.org/).Implications for RehabilitationThe Dynamics of Context-Dependent Integration of Wheelchair Service Provision Education in Curricula model, depicting the findings of this study, may help to inform key stakeholders (i.e., academic institutions, health care providers and policy makers) about potential barriers and facilitators to the implementation of adequate wheelchair service provision education in the curricula of academic rehabilitation program.Study findings may lead to creative strategies, such as the expansion of ISWP's Seating and Mobility Academic Resource Toolkit (SMART; http://smart.wheelchairnetwork.org/), that may enable academic rehabilitation programs to be a part of the solution to strengthening rehabilitation systems worldwide, through appropriately trained rehabilitation professionals in wheelchair service provision.",2019-04-23 +34257418,Neptune: an environment for the delivery of genomic medicine.,"

Purpose

Genomic medicine holds great promise for improving health care, but integrating searchable and actionable genetic data into electronic health records (EHRs) remains a challenge. Here we describe Neptune, a system for managing the interaction between a clinical laboratory and an EHR system during the clinical reporting process.

Methods

We developed Neptune and applied it to two clinical sequencing projects that required report customization, variant reanalysis, and EHR integration.

Results

Neptune has been applied for the generation and delivery of over 15,000 clinical genomic reports. This work spans two clinical tests based on targeted gene panels that contain 68 and 153 genes respectively. These projects demanded customizable clinical reports that contained a variety of genetic data types including single-nucleotide variants (SNVs), copy-number variants (CNVs), pharmacogenomics, and polygenic risk scores. Two variant reanalysis activities were also supported, highlighting this important workflow.

Conclusion

Methods are needed for delivering structured genetic data to EHRs. This need extends beyond developing data formats to providing infrastructure that manages the reporting process itself. Neptune was successfully applied on two high-throughput clinical sequencing projects to build and deliver clinical reports to EHR systems. The software is open source and available at https://gitlab.com/bcm-hgsc/neptune .",2021-07-13 +32238171,Automated gene data integration with Databio.,"

Objective

Although sequencing and other high-throughput data production technologies are increasingly affordable, data analysis and interpretation remains a significant factor in the cost of -omics studies. Despite the broad acceptance of findable, accessible, interoperable, and reusable (FAIR) data principles which focus on data discoverability and annotation, data integration remains a significant bottleneck in linking prior work in order to better understand novel research. Relevant and timely information discovery is difficult for increasingly multi-disciplinary projects when scientists cannot easily keep up with work across multiple fields. Computational tools are necessary to accurately describe data contents, and empower linkage to existing resources without prior knowledge of the various database resources.

Results

We developed the Databio tool, accessible at https://datab.io/, to automate data parsing, identifier detection, and streamline common tasks to provide a point-and-click approach to data manipulation and integration in life sciences research and translational medicine. Databio uses fast real-time data structures and a data warehouse of 137 million identifiers, with automated heuristics to describe data provenance without highly specialized knowledge or bioinformatics training.",2020-04-01 +29040563,Ten years of CAZypedia: a living encyclopedia of carbohydrate-active enzymes.,"CAZypedia was initiated in 2007 to create a comprehensive, living encyclopedia of the carbohydrate-active enzymes (CAZymes) and associated carbohydrate-binding modules involved in the synthesis, modification and degradation of complex carbohydrates. CAZypedia is closely connected with the actively curated CAZy database, which provides a sequence-based foundation for the biochemical, mechanistic and structural characterization of these diverse proteins. Now celebrating its 10th anniversary online, CAZypedia is a successful example of dynamic, community-driven and expert-based biocuration. CAZypedia is an open-access resource available at URL http://www.cazypedia.org.",2018-12-01 +33787872,Drugmonizome and Drugmonizome-ML: integration and abstraction of small molecule attributes for drug enrichment analysis and machine learning.,"Understanding the underlying molecular and structural similarities between seemingly heterogeneous sets of drugs can aid in identifying drug repurposing opportunities and assist in the discovery of novel properties of preclinical small molecules. A wealth of information about drug and small molecule structure, targets, indications and side effects; induced gene expression signatures; and other attributes are publicly available through web-based tools, databases and repositories. By processing, abstracting and aggregating information from these resources into drug set libraries, knowledge about novel properties of drugs and small molecules can be systematically imputed with machine learning. In addition, drug set libraries can be used as the underlying database for drug set enrichment analysis. Here, we present Drugmonizome, a database with a search engine for querying annotated sets of drugs and small molecules for performing drug set enrichment analysis. Utilizing the data within Drugmonizome, we also developed Drugmonizome-ML. Drugmonizome-ML enables users to construct customized machine learning pipelines using the drug set libraries from Drugmonizome. To demonstrate the utility of Drugmonizome, drug sets from 12 independent SARS-CoV-2 in vitro screens were subjected to consensus enrichment analysis. Despite the low overlap among these 12 independent in vitro screens, we identified common biological processes critical for blocking viral replication. To demonstrate Drugmonizome-ML, we constructed a machine learning pipeline to predict whether approved and preclinical drugs may induce peripheral neuropathy as a potential side effect. Overall, the Drugmonizome and Drugmonizome-ML resources provide rich and diverse knowledge about drugs and small molecules for direct systems pharmacology applications. Database URL: https://maayanlab.cloud/drugmonizome/.",2021-03-01 +30371825,PubChem 2019 update: improved access to chemical data.,"PubChem (https://pubchem.ncbi.nlm.nih.gov) is a key chemical information resource for the biomedical research community. Substantial improvements were made in the past few years. New data content was added, including spectral information, scientific articles mentioning chemicals, and information for food and agricultural chemicals. PubChem released new web interfaces, such as PubChem Target View page, Sources page, Bioactivity dyad pages and Patent View page. PubChem also released a major update to PubChem Widgets and introduced a new programmatic access interface, called PUG-View. This paper describes these new developments in PubChem.",2019-01-01 +33169878,Open-access platform to synthesize knowledge of ape conservation across sites.,"Despite the large body of literature on ape conservation, much of the data needed for evidence-based conservation decision-making is still not readily accessible and standardized, rendering cross-site comparison difficult. To support knowledge synthesis and to complement the IUCN SSC Ape Populations, Environments and Surveys database, we created the A.P.E.S. Wiki (https://apeswiki.eva.mpg.de), an open-access platform providing site-level information on ape conservation status and context. The aim of this Wiki is to provide information and data about geographical ape locations, to curate information on individuals and organizations active in ape research and conservation, and to act as a tool to support collaboration between conservation practitioners, scientists, and other stakeholders. To illustrate the process and benefits of knowledge synthesis, we used the momentum of the update of the conservation action plan for western chimpanzees (Pan troglodytes verus) and began with this critically endangered taxon. First, we gathered information on 59 sites in West Africa from scientific publications, reports, and online sources. Information was compiled in a standardized format and can thus be summarized using a web scraping approach. We then asked experts working at those sites to review and complement the information (20 sites have been reviewed to date). We demonstrate the utility of the information available through the Wiki, for example, for studying species distribution. Importantly, as an open-access platform and based on the well-known wiki layout, the A.P.E.S. Wiki can contribute to direct and interactive information sharing and promote the efforts invested by the ape research and conservation community. The Section on Great Apes and the Section on Small Apes of the IUCN SSC Primate Specialist Group will guide and support the expansion of the platform to all small and great ape taxa. Similar collaborative efforts can contribute to extending knowledge synthesis to all nonhuman primate species.",2020-11-10 +34014733,Using standard celeration makes COVID-19 data more meaningful.,"

Introduction

The fourth sudden acute respiratory syndrome (SARS) virus, COVID-19, emerged in late 2019, leading to the most devastating pandemic since the Spanish influenza (H1N1) of 1918, which seized 50 million lives worldwide (https://www.cdc.gov/flu/pandemic-resources/1918-pandemic-h1n1.html). Elected officials must make critical system-level decisions for stymieing the spread of the virus. Businesspersons must make personnel, financial, and operational decisions to minimize transmission while preserving their business's vitality. Members of the public must make personal decisions about personal protective equipment and changing social, recreational, occupational, and spiritual behavior to protect themselves and others. The scientific community can shift how they illustrate the virus's behavior to the public in an appropriate and understandable way so that the public can make informed decisions. This article suggests the use of a single-case design and logarithmic analyses to improve the current methodologies for COVID-19 analysis and illustration.

Method

The Standard Celeration Chart was used with Theil's incomplete regression and a 7-point change analysis; the authors demonstrate a suitable virus-tracking and mitigation methodology.

Results

Analysis and data visualization are standardized, providing an accurate depiction of the virus's growth for public dissemination and decision-making. An analytic strategy is demonstrated for retrospectively detecting meaningful changes in viral growth or prospectively measuring such changes that coincide with known mitigation strategies.

Discussion

The authors suggest improvements in bridging science to application by making COVID-19 informatics more meaningful and actionable by lawmakers, businesspersons, and the public. Limitations and future directions for COVID-19 informatics are discussed. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-03-01 +34058399,BA-plotteR - A web tool for generating Bland-Altman plots and constructing limits of agreement.,"Investigators use Bland-Altman plot (Limits of Agreement plot) to compare two methods measuring the same continuous variable to determine interchangeability or agreement of the methods. The method has evolved to deal with heteroscedastic data and fixed or proportional biases (or both). Although an ordinary Bland-Altman plot can be readily made with various software applications, there is no free, open-source application that is dedicated to producing Bland-Altman plots and constructing limits of agreement for data that do not meet the assumptions of a simple comparison. To fill this gap, we created BA-plotteR, a web-based, open-source, freeware tool created in Shiny/R that is dedicated to creating Bland-Altman plots. We validated the tool using 20 datasets with various data distributions by comparing the output from the tool against manually derived results. The webtool handles data that requires a more complex analysis than is commonly available through commercial statistical programs. Moreover, the automated analysis of the data distribution will guide users and help them to correctly plot and analyse their data. The tool agreed perfectly with manually constructed plots. The Bland-Altman graphing tool provides clinical researchers with a tool that correctly analyzes and graphs studies involved in method comparisons. The tool can be accessed here: https://huygens.science.uva.nl/BA-plotteR.",2021-05-25 +34806541,Psychometric properties of two instruments measuring self-efficacy and outcome expectations of providing inhaler technique education to patients.,"

Introduction

Both the National Heart, Lung, and Blood Institute (NHLBI) and Global Initiative for Asthma (GINA) asthma practice guidelines recommend that providers routinely check inhaler technique and correct any mistakes that patients may make when using these devices. Providers, however, rarely check inhaler technique during asthma visits. The objectives of this study were to: (1) describe the development of an instrument to measure self-efficacy and outcome expectations regarding inhaler technique patient education, (2) evaluate the internal consistency reliability of the new scales, and (3) provide preliminary evidence of construct validity. Methods: First- and second-year physician assistant (PA) students at two institutions completed an anonymous and voluntary survey evaluating two new instruments, the Teaching Inhalers to Patients: Self-efficacy (TIP-SE) and the Teaching Inhalers to Patients: Outcome Expectations (TIP-OE) scales and sociodemographic characteristics. The data were analyzed using Principal Components Analysis (PCA), Cronbach's α, and multivariable logistic regression. Results: We had usable responses from 146 PA students (71.9% participation rate). The PCA identified one factor for the TIP-SE and TIP-OE, respectively. The internal consistency of the TIP-SE and TIP-OE was α = 0.96 and α = 0.92, respectively. The logistic regression found that second-year PA students who had higher mean TIP-SE scores were significantly more likely to report teaching patients to use inhalers during rotations (OR = 1.8, 95% CI = 1.1, 2.9). There was not a statistically significant relationship between reporting teaching patients to use inhalers during rotations and mean TIP-OE scores. Conclusion: The TIP-SE and TIP-OE show preliminary evidence of reliability and validity.Supplemental data for this article is available online at https://doi.org/10.1080/02770903.2021.2008428 .",2021-11-28 +34977294,Hydro-thermodynamic dataset of the Amazon River Plume and North Brazil Current retroflection.,"This dataset was generated by the ROMS model, the output files constitute a monthly and weekly mean hydro-thermodynamics climatology of the region of Amazon and Para river mouths and the North Brazil Current retroflection (60.5°-24°W and 5°S-16°N, with 0.25° of horizontal resolution). This dataset includes the tri-dimensional grids of temperature, salinity and ocean currents at 32 depth levels, as well as the sea surface height. Sea surface temperature and sea surface salinity were validated using the SODA dataset, surface currents were validated with SCUD dataset and the vertical structure of temperature and salinity were compared with values recorded at 38°W,8°N and 38°W,12°N PIRATA buoys. The dataset is hosted on the website https://www.seanoe.org/data/00718/82958/. This dataset will help oceanographers and other researchers have information about the hydro-thermodynamics of this region.",2021-12-11 +30321373,KinaMetrix: a web resource to investigate kinase conformations and inhibitor space.,"Protein kinases are among the most explored protein drug targets. Visualization of kinase conformations is critical for understanding structure-function relationship in this family and for developing chemically unique, conformation-specific small molecule drugs. We have developed Kinformation, a random forest classifier that annotates the conformation of over 3500 protein kinase structures in the Protein Data Bank. Kinformation was trained on structural descriptors derived from functionally important motifs to automatically categorize kinases into five major conformations with pharmacological relevance. Here we present KinaMetrix (http://KinaMetrix.com), a web resource enabling researchers to investigate the protein kinase conformational space as well as a subset of kinase inhibitors that exhibit conformational specificity. KinaMetrix allows users to classify uploaded kinase structures, as well as to derive structural descriptors of protein kinases. Uploaded structures can then be compared to atomic structures of other kinases, enabling users to identify kinases that occupy a similar conformational space to their uploaded structure. Finally, KinaMetrix also serves as a repository for both small molecule substructures that are significantly associated with each conformation type, and for homology models of kinases in inactive conformations. We expect KinaMetrix to serve as a resource for researchers studying kinase structural biology or developing conformation-specific kinase inhibitors.",2019-01-01 +32864809,The Parkinson's Disease Genome-Wide Association Study Locus Browser.,"

Background

Parkinson's disease (PD) is a neurodegenerative disease with an often complex component identifiable by genome-wide association studies. The most recent large-scale PD genome-wide association studies have identified more than 90 independent risk variants for PD risk and progression across more than 80 genomic regions. One major challenge in current genomics is the identification of the causal gene(s) and variant(s) at each genome-wide association study locus. The objective of the current study was to create a tool that would display data for relevant PD risk loci and provide guidance with the prioritization of causal genes and potential mechanisms at each locus.

Methods

We included all significant genome-wide signals from multiple recent PD genome-wide association studies including themost recent PD risk genome-wide association study, age-at-onset genome-wide association study, progression genome-wide association study, and Asian population PD risk genome-wide association study. We gathered data for all genes 1 Mb up and downstream of each variant to allow users to assess which gene(s) are most associated with the variant of interest based on a set of self-ranked criteria. Multiple databases were queried for each gene to collect additional causal data.

Results

We created a PD genome-wide association study browser tool (https://pdgenetics.shinyapps.io/GWASBrowser/) to assist the PD research community with the prioritization of genes for follow-up functional studies to identify potential therapeutic targets.

Conclusions

Our PD genome-wide association study browser tool provides users with a useful method of identifying potential causal genes at all known PD risk loci from large-scale PD genome-wide association studies. We plan to update this tool with new relevant data as sample sizes increase and new PD risk loci are discovered. © 2020 The Authors. Movement Disorders published by Wiley Periodicals LLC on behalf of International Parkinson and Movement Disorder Society. This article has been contributed to by US Government employees and their work is in the public domain in the USA.",2020-08-31 +34806926,Bright fluorescent purine analogues as promising probes.,"Modified bright fluorescent nucleosides that respond to the microenvironment have great potential as probes. A series of novel 8-(phenylethynyl)phenylated 2-amino-2'-deoxyadenosine and 2'-deoxyisoguanosine derivatives have been synthesized by Sonogashira-type coupling reaction and Suzuki reaction. The maximum emission of the new compounds is in the visible region, with strong solvatochromicity and pH-dependent fluorescent properties. Furthermore, some of them exhibit bright fluorescence emissions in various solvents (ε × Φ = 4000-39,000 cm-1 M-1). These consequences indicate that purine analogues could respond to the microenvironment and serve as promising fluorescent probes.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.2004418 .",2021-11-22 +34339611,"Breastfeeding within the circle of motherhood, restriction, and patriarchy: A qualitative study.","Our purpose in the present study is to analyze the opinions of women regarding the factors that support and hinder their breastfeeding. This is a phenomenological and qualitative study. The present study included 32 breastfeeding women who live in different provinces in the Central Anatolia Region of Turkey. The data were collected using a semi-structured interview form and were evaluated using the content analysis method. Three themes and ten sub-themes about women's experience on breastfeeding their babies and factors affecting their breastfeeding were found. The themes identifies were: motherhood, restriction, patriarchy.Supplemental data for this article is available online at https://doi.org/10.1080/07399332.2021.1935958 .",2021-08-02 +34542646,Higher maternal adiposity reduces offspring birthweight if associated with a metabolically favourable profile.,"

Aims/hypothesis

Higher maternal BMI during pregnancy is associated with higher offspring birthweight, but it is not known whether this is solely the result of adverse metabolic consequences of higher maternal adiposity, such as maternal insulin resistance and fetal exposure to higher glucose levels, or whether there is any effect of raised adiposity through non-metabolic (e.g. mechanical) factors. We aimed to use genetic variants known to predispose to higher adiposity, coupled with a favourable metabolic profile, in a Mendelian randomisation (MR) study comparing the effect of maternal 'metabolically favourable adiposity' on offspring birthweight with the effect of maternal general adiposity (as indexed by BMI).

Methods

To test the causal effects of maternal metabolically favourable adiposity or general adiposity on offspring birthweight, we performed two-sample MR. We used variants identified in large, published genetic-association studies as being associated with either higher adiposity and a favourable metabolic profile, or higher BMI (n = 442,278 and n = 322,154 for metabolically favourable adiposity and BMI, respectively). We then extracted data on the metabolically favourable adiposity and BMI variants from a large, published genetic-association study of maternal genotype and offspring birthweight controlling for fetal genetic effects (n = 406,063 with maternal and/or fetal genotype effect estimates). We used several sensitivity analyses to test the reliability of the results. As secondary analyses, we used data from four cohorts (total n = 9323 mother-child pairs) to test the effects of maternal metabolically favourable adiposity or BMI on maternal gestational glucose, anthropometric components of birthweight and cord-blood biomarkers.

Results

Higher maternal adiposity with a favourable metabolic profile was associated with lower offspring birthweight (-94 [95% CI -150, -38] g per 1 SD [6.5%] higher maternal metabolically favourable adiposity, p = 0.001). By contrast, higher maternal BMI was associated with higher offspring birthweight (35 [95% CI 16, 53] g per 1 SD [4 kg/m2] higher maternal BMI, p = 0.0002). Sensitivity analyses were broadly consistent with the main results. There was evidence of outlier SNPs for both exposures; their removal slightly strengthened the metabolically favourable adiposity estimate and made no difference to the BMI estimate. Our secondary analyses found evidence to suggest that a higher maternal metabolically favourable adiposity decreases pregnancy fasting glucose levels while a higher maternal BMI increases them. The effects on neonatal anthropometric traits were consistent with the overall effect on birthweight but the smaller sample sizes for these analyses meant that the effects were imprecisely estimated. We also found evidence to suggest that higher maternal metabolically favourable adiposity decreases cord-blood leptin while higher maternal BMI increases it.

Conclusions/interpretation

Our results show that higher adiposity in mothers does not necessarily lead to higher offspring birthweight. Higher maternal adiposity can lead to lower offspring birthweight if accompanied by a favourable metabolic profile.

Data availability

The data for the genome-wide association studies (GWAS) of BMI are available at https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files . The data for the GWAS of body fat percentage are available at https://walker05.u.hpc.mssm.edu .",2021-09-20 +,A large set of 26 new reference transcriptomes dedicated to comparative population genomics in crops and wild relatives,"We produced a unique large data set of reference transcriptomes to obtain new knowledge about the evolution of plant genomes and crop domestication. For this purpose, we validated a RNA‐Seq data assembly protocol to perform comparative population genomics. For the validation, we assessed and compared the quality of de novo Illumina short‐read assemblies using data from two crops for which an annotated reference genome was available, namely grapevine and sorghum. We used the same protocol for the release of 26 new transcriptomes of crop plants and wild relatives, including still understudied crops such as yam, pearl millet and fonio. The species list has a wide taxonomic representation with the inclusion of 15 monocots and 11 eudicots. All contigs were annotated using BLAST, prot4EST and Blast2GO. A strong originality of the data set is that each crop is associated with close relative species, which will permit whole‐genome comparative evolutionary studies between crops and their wild‐related species. This large resource will thus serve research communities working on both crops and model organisms. All the data are available at http://arcad-bioinformatics.southgreen.fr/.",2017-05-01 +30371881,iDog: an integrated resource for domestic dogs and wild canids.,"The domestic dog (Canis lupus familiaris) is indisputably one of man's best friends. It is also a fundamental model for many heritable human diseases. Here, we present iDog (http://bigd.big.ac.cn/idog), the first integrated resource dedicated to domestic dogs and wild canids. It incorporates a variety of omics data, including genome sequences assemblies for dhole and wolf, genomic variations extracted from hundreds of dog/wolf whole genomes, phenotype/disease traits curated from dog research communities and public resources, gene expression profiles derived from published RNA-Seq data, gene ontology for functional annotation, homolog gene information for multiple organisms and disease-related literature. Additionally, iDog integrates sequence alignment tools for data analyses and a genome browser for data visualization. iDog will not only benefit the global dog research community, but also provide access to a user-friendly consolidation of dog information to a large number of dog enthusiasts.",2019-01-01 +33869699,iTRAQ-Based proteomic dataset for bovine pre-ovulatory plasma and follicular fluid containing high and low Estradiol.,"This is isobaric tags for a relative and absolute quantification (iTRAQ)-Based Proteomic Data on bovine plasma (PL) and follicular fluid (FF) containing high and low pre-ovulatory circulating concentration of estradiol (E2). The PL and FF were collected from nine beef cows that were identified to initiate a new follicular wave on day -4 during synchronization. Follicular dynamics and ovulatory response were monitored using transrectal ultrasonography. Blood samples were collected at slaughter and FF was aspirated from dominant follicles (DF; >10 mm). Estradiol concentrations in PL and FF were measured by radioimmunoassays. Plasma and FF were labeled as containing high E2 (PL HE2 and FF HE2) or low E2 (PL LE2 and FF LE2). Abundant proteins (albumin, IgG, IgA, and alpha-1-antitrypsin) were depleted from the four PL and FF samples. Peptides were labeled with iTRAQ reagents and analyzed using 2-dimentional liquid chromatography ESI-based mass spectrometry. Proteins were identified and quantified using SEQUESTTM search engine embedded in Proteome Discoverer. The proteins matched with at least one unique peptide at minimum 95% confidence were considered positive identifications. Protein expression levels were determined by assigned fold change of >2.0 or <0.5 between any pair from the four sample types. The paired comparisons made were PL HE2 and PL LE2, FF HE2 and FF LE2, PL HE2 and FF HE2, and PL LE2 and FF LE2. Protein Analysis Through Evolutionary Relationships (PANTHER) and Database for Annotation, Visualization and Integrated Discovery (DAVID) were used to classify protein functions. This dataset includes the overview of workflow for identification and quantification of proteins and details on 231 proteins identified which includes 103 up- and down-regulate proteins. This dataset can be useful for further probing of the identified regulated proteins to better understand folliculogenesis and ovulation, particularly in bovine. This dataset is related to the article 'iTRAQ-Based Proteomic Analysis of Bovine Pre-ovulatory Plasma and Follicular Fluid' by P. A. Afedi, E. L. Larimore, R. A. Cushman, D. Raynie, G. A. Perry. Domestic Animal Endocrinology. https://doi.org/10.1016/j.domaniend.2021.106606.",2021-03-26 +,O2.1. LOCAL AND LONG-RANGE CONNECTIVITY PATTERNS OF AUDITORY PERCEPTUAL DISTURBANCE IN SCHIZOPHRENIA,"Abstract

Background

Auditory hallucinations are a prevalent, debilitating symptom of schizophrenia (Sz). Lack of detailed phenomenological assessments of perceptual disturbances in large psychiatric imaging datasets limits our ability to disentangle the underlying neural mechanisms of hallucinations. Our study investigates how changes in local functional communication dynamics may be associated with wide-ranging auditory disturbances in Sz.

Methods

Local functional connectivity was estimated using regional homogeneity (ReHo) analysis of resting fMRI data, which quantifies synchronization of fMRI activity of a voxel to its neighboring voxels. Resting fMRI data of 99 Sz patients was analyzed (mean age=36.2±13.3 y, sex=71/28 m/f); Auditory perceptual disturbance in the past week was estimated using the auditory perception state (APS) subscale score of the recently validated Auditory Perceptual Trait and State Scale (http://www.mdbrain.org/APTS.pdf). Voxelwise regression analysis of ReHo was performed including APS score as a regressor of interest. Significant results were thresholded using AFNI’s 3dClustSim with autocorrelation function option to yield corrected p<0.05, corresponding to cluster-size threshold of 49 voxels at voxelwise p=0.001.

Results

Higher APS scores were associated with reduced ReHo in clusters in left putamen, right putamen, left temporoparietal junction, and right hippocampus. In a follow-up analysis using these clusters as seeds in whole-brain resting-state functional connectivity analysis (rsFC) analysis, higher APS scores were significantly associated with reduced rsFC between the right putamen seed and clusters in the contralateral putamen and auditory cortex.

Discussion

Our findings are consistent with those of a prior study that reported abnormal ReHo in left and right putamen of a unmedicated first-episode Sz patients (Cui et al. 2016). However, in that small sample (n=32), striatal ReHo was elevated relative to controls, and AH severity was not significantly correlated with striatal ReHo measures. Our study investigated ReHo in a large sample of chronic, medicated patients (all except 4 were taking antipsychotic medication at time of study). While it is widely accepted that striatal signaling is disrupted in Sz, future work is needed to better understand how striatal signaling deficits may change over the course of illness and how this relates to particular symptoms such as hallucinations. Implications for development of novel therapies that account for these nuanced findings will be discussed.",2020-05-01 +,SUN-190 Consumption of Added Sugar among US Adults Who Are Aware and Unaware of Their Prediabetes Status,"Abstract BACKGROUND: Prediabetes affects 84.1 million Americans and is a primary determinant of type 2 diabetes (T2D). Moreover, 90% of U.S. adults with prediabetes are unaware that they have prediabetes. Greater consumption of added sugar is positively associated with prediabetes and T2D and Americans consume ~17 tsp. (71.4 g) of added sugar daily. Given that prediabetes awareness may influence dietary behavior, including consumption of added sugar, we test the hypothesis that prediabetes awareness is associated with reduced consumption of added sugar. METHODS: We performed a secondary data analysis using the 2013-2014 National Health and Nutrition Examination Survey and the USDA Food Patterns Equivalent Database. Individuals with prediabetes (HgA1c ≥5.7% and ≤ 6.4%) were dichotomized by awareness of their prediabetes diagnosis as confirmed by a healthcare provider (yes/no). Survey-weighted OLS regression was used to test whether being aware of having prediabetes was associated with reduced consumption of total added sugar intake (g/day) among individuals over the age of 20, controlling for sociodemographic covariates. RESULTS: 1,169 individuals were identified as having prediabetes, with 92% of the sample unaware of their condition and reported consuming, on average, approximately 16.7 tsp. equivalents (70 g/day) of added sugar (70 g/day). Awareness of having prediabetes was not significantly associated with consumption of added sugars (b=-2.21, p=0.3197). CONCLUSION: While our results indicate that, on average, prediabetes awareness is not associated with decreased consumption of added sugar, the findings have important clinical implications; specifically, early implementation of lifestyle modifications, including reducing added sugar, that may slow or prevent the onset of T2D. Dietary education, particularly as it relates to sources of added sugar, should be considered primary care for persons with prediabetes. References: (1) Centers for Disease Control and Prevention. National diabetes statistics report, 2017. 2017; https://www.cdc.gov/diabetes/data/statistics/statistics-report.html. Accessed April 20, 2017. (2) Stanhope KL. Sugar consumption, metabolic disease and obesity: The state of the controversy. Crit Rev Clin Lab Sci. 2016;53(1):52-67. (3) Bardenheier BH, Cogswell ME, Gregg EW, Williams DE, Zhang Z, Geiss LS. Does knowing one's elevated glycemic status make a difference in macronutrient intake? Diabetes Care. 2014;37(12):3143-3149.",2019-04-15 +35047694,Effects of dietary fat manipulation on cognition in mice and rats: protocol for a systematic review and meta-analysis.,"

Introduction and objective

The Western diet that comprises high levels of long-chain saturated fats and sugar is associated not only with metabolic disorders such as obesity and type 2 diabetes but also has been recently linked to brain changes and cognitive dysfunction. However, in animal studies, reported effects are variable, and the mechanisms underlying these effects are unclear. In the proposed review, we aim to summarise the diverse evidence of the effects of so-called 'high-fat' and ketogenic diets on behavioural measures of cognition in postweaning mice and rats, relative to animals on standard diets and to determine potential underlying mechanisms of high-fat diet-induced effects.

Search strategy

A comprehensive search strategy was designed to retrieve studies reporting use of a high-fat or ketogenic diet in postweaning mice and rats that included cognitive assessments. Three databases (Medline, SCOPUS and Web of Science) were searched and 4487 unique references were retrieved.

Screening and annotation

Studies were screened for inclusion by two independent reviewers, with 330 studies retained for analysis. Characteristics of disease model choice, experimental design, intervention use and outcome assessment are to be extracted using the Systematic Review Facility (http://syrf.org.uk/) tool. Studies will be assessed for study quality and risk of bias and confidence of mechanistic involvement.

Data management and reporting

For cognitive outcomes, effect sizes will be calculated using normalised mean difference and summarised using a random effects model. The contribution of potential sources of heterogeneity to the observed effects of diet on cognition will be assessed using multivariable meta-regression, with partitioning of heterogeneity as a sensitivity analysis. A preliminary version of this protocol was published on 9 April 2019 on the Collaborative Approach to Meta-Analysis and Review of Animal Data from Experimental Studies website (http://www.dcn.ed.ac.uk/camarades/research.html%23protocols).

Ethics and dissemination

No ethical approval is required as there are no subjects in the proposed study.",2020-11-18 +34314366,dbMCS: A Database for Exploring the Mutation Markers of Anti-Cancer Drug Sensitivity.,"The identification of mutation markers and the selection of appropriate treatment for patients with specific genome mutations are important steps in the development of targeted therapies and the realization of precision medicine for human cancers. To investigate the baseline characteristics of drug sensitivity markers and develop computational methods of mutation effect prediction, we presented a manually curated online-based database of mutation Markers for anti-Cancer drug Sensitivity (dbMCS). Currently, dbMCS contains 1271 mutations and 4427 mutation-disease-drug associations (3151 and 1276 for sensitivity and resistance, respectively) with their PubMed indexed articles. By comparing the mutations in dbMCS with the putative neutral polymorphisms, we investigated the characteristics of drug sensitivity markers. We found that the mutation markers tend to significantly impact on high-conservative regions both in DNA sequences and protein domains. And some of them presented pleiotropic effects depending on the tumor context, appearing concurrently in the sensitivity and resistance categories. In addition, we preliminarily explored the machine learning-based methods for identifying mutation markers of anti-cancer drug sensitivity and produced optimistic results, which suggests that a reliable dataset may provide new insights and essential clues for future cancer pharmacogenomics studies. dbMCS is available at http://bioinfo.aielab.cc/dbMCS/.",2021-11-05 +31157825,MENDA: a comprehensive curated resource of metabolic characterization in depression.,"Depression is a seriously disabling psychiatric disorder with a significant burden of disease. Metabolic abnormalities have been widely reported in depressed patients and animal models. However, there are few systematic efforts that integrate meaningful biological insights from these studies. Herein, available metabolic knowledge in the context of depression was integrated to provide a systematic and panoramic view of metabolic characterization. After screening more than 10 000 citations from five electronic literature databases and five metabolomics databases, we manually curated 5675 metabolite entries from 464 studies, including human, rat, mouse and non-human primate, to develop a new metabolite-disease association database, called MENDA (http://menda.cqmu.edu.cn:8080/index.php). The standardized data extraction process was used for data collection, a multi-faceted annotation scheme was developed, and a user-friendly search engine and web interface were integrated for database access. To facilitate data analysis and interpretation based on MENDA, we also proposed a systematic analytical framework, including data integration and biological function analysis. Case studies were provided that identified the consistently altered metabolites using the vote-counting method, and that captured the underlying molecular mechanism using pathway and network analyses. Collectively, we provided a comprehensive curation of metabolic characterization in depression. Our model of a specific psychiatry disorder may be replicated to study other complex diseases.",2020-07-01 +34252944,Build a better bootstrap and the RAWR shall beat a random path to your door: phylogenetic support estimation revisited.,"

Motivation

The standard bootstrap method is used throughout science and engineering to perform general-purpose non-parametric resampling and re-estimation. Among the most widely cited and widely used such applications is the phylogenetic bootstrap method, which Felsenstein proposed in 1985 as a means to place statistical confidence intervals on an estimated phylogeny (or estimate 'phylogenetic support'). A key simplifying assumption of the bootstrap method is that input data are independent and identically distributed (i.i.d.). However, the i.i.d. assumption is an over-simplification for biomolecular sequence analysis, as Felsenstein noted.

Results

In this study, we introduce a new sequence-aware non-parametric resampling technique, which we refer to as RAWR ('RAndom Walk Resampling'). RAWR consists of random walks that synthesize and extend the standard bootstrap method and the 'mirrored inputs' idea of Landan and Graur. We apply RAWR to the task of phylogenetic support estimation. RAWR's performance is compared to the state-of-the-art using synthetic and empirical data that span a range of dataset sizes and evolutionary divergence. We show that RAWR support estimates offer comparable or typically superior type I and type II error compared to phylogenetic bootstrap support. We also conduct a re-analysis of large-scale genomic sequence data from a recent study of Darwin's finches. Our findings clarify phylogenetic uncertainty in a charismatic clade that serves as an important model for complex adaptive evolution.

Availability and implementation

Data and software are publicly available under open-source software and open data licenses at: https://gitlab.msu.edu/liulab/RAWR-study-datasets-and-scripts.",2021-07-01 +34805165,Identification and Validation of the Pyroptosis-Related Molecular Subtypes of Lung Adenocarcinoma by Bioinformatics and Machine Learning.,"Lung cancer remains the leading cause of cancer death globally, with lung adenocarcinoma (LUAD) being its most prevalent subtype. Due to the heterogeneity of LUAD, patients given the same treatment regimen may have different responses and clinical outcomes. Therefore, identifying new subtypes of LUAD is important for predicting prognosis and providing personalized treatment for patients. Pyroptosis-related genes play an essential role in anticancer, but there is limited research investigating pyroptosis in LUAD. In this study, 33 pyroptosis gene expression profiles and clinical information were collected from The Cancer Genome Atlas (TCGA) and Gene Expression Omnibus (GEO) databases. By bioinformatics and machine learning analyses, we identified novel subtypes of LUAD based on 10 pyroptosis-related genes and further validated them in the GEO dataset, with machine learning models performing up to an AUC of 1 for classifying in GEO. A web-based tool was established for clinicians to use our clustering model (http://www.aimedicallab.com/tool/aiml-subphe-luad.html). LUAD patients were clustered into 3 subtypes (A, B, and C), and survival analysis showed that B had the best survival outcome and C had the worst survival outcome. The relationships between pyroptosis gene expression and clinical characteristics were further analyzed in the three molecular subtypes. Immune profiling revealed significant differences in immune cell infiltration among the three molecular subtypes. GO enrichment and KEGG pathway analyses were performed based on the differential genes of the three subtypes, indicating that differentially expressed genes (DEGs) were involved in multiple cellular and biological functions, including RNA catabolic process, mRNA catabolic process, and pathways of neurodegeneration-multiple diseases. Finally, we developed an 8-gene prognostic model that accurately predicted 1-, 3-, and 5-year overall survival. In conclusion, pyroptosis-related genes may play a critical role in LUAD, and provide new insights into the underlying mechanisms of LUAD.",2021-11-04 +33675341,Maternal BMI is positively associated with human milk fat: a systematic review and meta-regression analysis.,"

Background

Lack of robust estimates of human-milk nutrient composition and influential maternal factors, such as body composition, are barriers to informing nutrition policies and programs.

Objective

The objective was to understand the relation between maternal BMI and human-milk energy, fat, and/or total protein.

Methods

Four electronic databases (MEDLINE, Embase, CINAHL, and Web of Science) were searched. Outcomes assessed were human-milk energy (kcal/L), fat (g/L), and total protein (g/L) from mothers 1 to 6 mo postpartum. Studies with data on maternal BMI or weight and height that quantified human-milk energy, fat, or protein between 1 and 6 mo postpartum were eligible. Random-effects meta-regression weighted by the inverse of the study-level SE was completed for each of the 3 outcomes. The certainty of evidence for each outcome was assessed using the GRADE (Grading of Recommendations Assessment, Development, and Evaluation) approach.

Results

A total of 11,373 titles and abstracts were identified, and after full-text screening, 69 articles of 66 studies were included. Meta-regression results showed a positive association between maternal BMI and human-milk fat (β: 0.56 g/L; 95% CI: 0.034, 1.1; P = 0.04; I2 = 93.7%, n = 63 datapoints). There was no significant association between maternal BMI and human-milk energy (β: 3.9 kcal/L; 95% CI: -1.6, 9.5; P = 0.16, I2 = 93.3%, n = 40 datapoints) or total protein (β: 0.13 g/L; 95% CI: -0.16, 0.41; P = 0.37, I2 = 99.1%, n = 40 datapoints). The certainty of evidence for human-milk energy was low and the certainty of evidence for fat and total protein was very low.

Conclusions

Meta-regression analysis of available literature suggested an association between maternal BMI and human-milk fat between 1 and 6 mo postpartum. Future studies are needed to confirm the relation between maternal BMI; variation in human-milk energy, fat, and protein content; and the implications for child growth and development. This review is registered with International Prospective Register of Systematic Reviews (PROSPERO 2018 CRD42018098808) at https://www.crd.york.ac.uk/prospero/.",2021-04-01 +34050182,Enhancing CRISPR-Cas9 gRNA efficiency prediction by data integration and deep learning.,"The design of CRISPR gRNAs requires accurate on-target efficiency predictions, which demand high-quality gRNA activity data and efficient modeling. To advance, we here report on the generation of on-target gRNA activity data for 10,592 SpCas9 gRNAs. Integrating these with complementary published data, we train a deep learning model, CRISPRon, on 23,902 gRNAs. Compared to existing tools, CRISPRon exhibits significantly higher prediction performances on four test datasets not overlapping with training data used for the development of these tools. Furthermore, we present an interactive gRNA design webserver based on the CRISPRon standalone software, both available via https://rth.dk/resources/crispr/ . CRISPRon advances CRISPR applications by providing more accurate gRNA efficiency predictions than the existing tools.",2021-05-28 +33888098,Richer fusion network for breast cancer classification based on multimodal data.,"

Background

Deep learning algorithms significantly improve the accuracy of pathological image classification, but the accuracy of breast cancer classification using only single-mode pathological images still cannot meet the needs of clinical practice. Inspired by the real scenario of pathologists reading pathological images for diagnosis, we integrate pathological images and structured data extracted from clinical electronic medical record (EMR) to further improve the accuracy of breast cancer classification.

Methods

In this paper, we propose a new richer fusion network for the classification of benign and malignant breast cancer based on multimodal data. To make pathological image can be integrated more sufficient with structured EMR data, we proposed a method to extract richer multilevel feature representation of the pathological image from multiple convolutional layers. Meanwhile, to minimize the information loss for each modality before data fusion, we use the denoising autoencoder as a way to increase the low-dimensional structured EMR data to high-dimensional, instead of reducing the high-dimensional image data to low-dimensional before data fusion. In addition, denoising autoencoder naturally generalizes our method to make the accurate prediction with partially missing structured EMR data.

Results

The experimental results show that the proposed method is superior to the most advanced method in terms of the average classification accuracy (92.9%). In addition, we have released a dataset containing structured data from 185 patients that were extracted from EMR and 3764 paired pathological images of breast cancer, which can be publicly downloaded from http://ear.ict.ac.cn/?page_id=1663 .

Conclusions

We utilized a new richer fusion network to integrate highly heterogeneous data to leverage the structured EMR data to improve the accuracy of pathological image classification. Therefore, the application of automatic breast cancer classification algorithms in clinical practice becomes possible. Due to the generality of the proposed fusion method, it can be straightforwardly extended to the fusion of other structured data and unstructured data.",2021-04-22 +31127124,Exploratory Gene Ontology Analysis with Interactive Visualization.,"The Gene Ontology (GO) is a central resource for functional-genomics research. Scientists rely on the functional annotations in the GO for hypothesis generation and couple it with high-throughput biological data to enhance interpretation of results. At the same time, the sheer number of concepts (>30,000) and relationships (>70,000) presents a challenge: it can be difficult to draw a comprehensive picture of how certain concepts of interest might relate with the rest of the ontology structure. Here we present new visualization strategies to facilitate the exploration and use of the information in the GO. We rely on novel graphical display and software architecture that allow significant interaction. To illustrate the potential of our strategies, we provide examples from high-throughput genomic analyses, including chromatin immunoprecipitation experiments and genome-wide association studies. The scientist can also use our visualizations to identify gene sets that likely experience coordinated changes in their expression and use them to simulate biologically-grounded single cell RNA sequencing data, or conduct power studies for differential gene expression studies using our built-in pipeline. Our software and documentation are available at http://aegis.stanford.edu .",2019-05-24 +33211869,"Rfam 14: expanded coverage of metagenomic, viral and microRNA families.","Rfam is a database of RNA families where each of the 3444 families is represented by a multiple sequence alignment of known RNA sequences and a covariance model that can be used to search for additional members of the family. Recent developments have involved expert collaborations to improve the quality and coverage of Rfam data, focusing on microRNAs, viral and bacterial RNAs. We have completed the first phase of synchronising microRNA families in Rfam and miRBase, creating 356 new Rfam families and updating 40. We established a procedure for comprehensive annotation of viral RNA families starting with Flavivirus and Coronaviridae RNAs. We have also increased the coverage of bacterial and metagenome-based RNA families from the ZWD database. These developments have enabled a significant growth of the database, with the addition of 759 new families in Rfam 14. To facilitate further community contribution to Rfam, expert users are now able to build and submit new families using the newly developed Rfam Cloud family curation system. New Rfam website features include a new sequence similarity search powered by RNAcentral, as well as search and visualisation of families with pseudoknots. Rfam is freely available at https://rfam.org.",2021-01-01 +29761469,Hymenoptera Genome Database: Using HymenopteraMine to Enhance Genomic Studies of Hymenopteran Insects.,"The Hymenoptera Genome Database (HGD; http://hymenopteragenome.org ) is a genome informatics resource for insects of the order Hymenoptera, which includes bees, ants and wasps. HGD provides genome browsers with manual annotation tools (JBrowse/Apollo), BLAST, bulk data download, and a data mining warehouse (HymenopteraMine). This chapter focuses on the use of HymenopteraMine to create annotation data sets that can be exported for use in downstream analyses. HymenopteraMine leverages the InterMine platform to combine genome assemblies and official gene sets with data from OrthoDB, RefSeq, FlyBase, Gene Ontology, UniProt, InterPro, KEGG, Reactome, dbSNP, PubMed, and BioGrid, as well as precomputed gene expression information based on publicly available RNAseq. Built-in template queries provide starting points for data exploration, while the QueryBuilder tool supports construction of complex custom queries. The List Analysis and Genomic Regions search tools execute queries based on uploaded lists of identifiers and genome coordinates, respectively. HymenopteraMine facilitates cross-species data mining based on orthology and supports meta-analyses by tracking identifiers across gene sets and genome assemblies.",2018-01-01 +30788500,EnDisease: a manually curated database for enhancer-disease associations. ,"Genome-wide association studies have successfully identified thousands of genomic loci potentially associated with hundreds of complex traits in the past decade. Nevertheless, the fact that more than 90% of such disease-associated variants lie in non-coding DNA with unknown functional implications has been appealing for advanced analysis of plenty of genetic variants. Toward this goal, recent studies focusing on individual non-coding variants have revealed that complex diseases are often the consequences of erroneous interactions between enhancers and their target genes. However, such enhancer-disease associations are dispersed in a variety of independent studies, and thus far it is still difficult to carry out comprehensive downstream analysis with these experimentally supported enhancer-disease associations. To fill in this gap, we collected experimentally supported associations between complex diseases and enhancers and then developed a manually curated database called EnDisease (http://bioinfo.au.tsinghua.edu.cn/endisease/). Concretely, EnDisease documents 535 associations between 133 diseases and 454 enhancers, extracted from 199 articles. Moreover, after annotating these enhancers using 649 human and 115 mouse DNase-seq experiments, we find that cancer-related enhancers tend to be open across a large number of cell types. This database provides a user-friendly interface for browsing and searching, and it also allows users to download data freely. EnDisease has the potential to become a helpful and important resource for researchers who aim to understand the molecular mechanisms of enhancers involved in complex diseases.",2019-01-01 +30202870,SequencEnG: an interactive knowledge base of sequencing techniques.,

Summary

Next-generation sequencing (NGS) techniques are revolutionizing biomedical research by providing powerful methods for generating genomic and epigenomic profiles. The rapid progress is posing an acute challenge to students and researchers to stay acquainted with the numerous available methods. We have developed an interactive online educational resource called Sequencing Techniques Engine for Genomics (SequencEnG) to provide a tree-structured knowledge base of 66 different sequencing techniques and step-by-step NGS data analysis pipelines comparing popular tools. SequencEnG is designed to facilitate barrier-free learning of current NGS techniques and provides a user-friendly interface for searching through experimental and analysis methods.

Availability and implementation

SequencEnG is part of the project Knowledge Engine for Genomics (KnowEnG) and is freely available at http://education.knoweng.org/sequenceng/.,2019-04-01 +33868814,Multi-schema computational prediction of the comprehensive SARS-CoV-2 vs. human interactome.,"

Background

Understanding the disease pathogenesis of the novel coronavirus, denoted SARS-CoV-2, is critical to the development of anti-SARS-CoV-2 therapeutics. The global propagation of the viral disease, denoted COVID-19 (""coronavirus disease 2019""), has unified the scientific community in searching for possible inhibitory small molecules or polypeptides. A holistic understanding of the SARS-CoV-2 vs. human inter-species interactome promises to identify putative protein-protein interactions (PPI) that may be considered targets for the development of inhibitory therapeutics.

Methods

We leverage two state-of-the-art, sequence-based PPI predictors (PIPE4 & SPRINT) capable of generating the comprehensive SARS-CoV-2 vs. human interactome, comprising approximately 285,000 pairwise predictions. Three prediction schemas (all, proximal, RP-PPI) are leveraged to obtain our highest-confidence subset of PPIs and human proteins predicted to interact with each of the 14 SARS-CoV-2 proteins considered in this study. Notably, the use of the Reciprocal Perspective (RP) framework demonstrates improved predictive performance in multiple cross-validation experiments.

Results

The all schema identified 279 high-confidence putative interactions involving 225 human proteins, the proximal schema identified 129 high-confidence putative interactions involving 126 human proteins, and the RP-PPI schema identified 539 high-confidence putative interactions involving 494 human proteins. The intersection of the three sets of predictions comprise the seven highest-confidence PPIs. Notably, the Spike-ACE2 interaction was the highest ranked for both the PIPE4 and SPRINT predictors with the all and proximal schemas, corroborating existing evidence for this PPI. Several other predicted PPIs are biologically relevant within the context of the original SARS-CoV virus. Furthermore, the PIPE-Sites algorithm was used to identify the putative subsequence that might mediate each interaction and thereby inform the design of inhibitory polypeptides intended to disrupt the corresponding host-pathogen interactions.

Conclusion

We publicly released the comprehensive sets of PPI predictions and their corresponding PIPE-Sites landscapes in the following DataVerse repository: https://www.doi.org/10.5683/SP2/JZ77XA. The information provided represents theoretical modeling only and caution should be exercised in its use. It is intended as a resource for the scientific community at large in furthering our understanding of SARS-CoV-2.",2021-04-05 +34383025,HoPhage: an ab initio tool for identifying hosts of phage fragments from metaviromes. ,"We present HoPhage (Host of Phage) to identify the host of a given phage fragment from metavirome data at the genus level. HoPhage integrates two modules using a deep learning algorithm and a Markov chain model, respectively. HoPhage achieves 47.90% and 82.47% mean accuracy at the genus and phylum levels for ∼1 kb-long artificial phage fragments when predicting host among 50 genera, representing 7.54%-20.22% and 13.55%-24.31% improvement, respectively. By testing on three real virome samples, HoPhage yields 81.11% mean accuracy at the genus level within a much broader candidate host range. HoPhage is available at http://cqb.pku.edu.cn/ZhuLab/HoPhage/. Supplementary data are available at Bioinformatics online.",2021-08-12 +32960944,MMAP: a cloud computing platform for mining the maximum accuracy of predicting phenotypes from genotypes.,"Accurately predicting phenotypes from genotypes holds great promise to improve health management in humans and animals, and breeding efficiency in animals and plants. Although many prediction methods have been developed, the optimal method differs across datasets due to multiple factors, including species, environments, populations and traits of interest. Studies have demonstrated that the number of genes underlying a trait and its heritability are the two key factors that determine which method fits the trait the best. In many cases, however, these two factors are unknown for the traits of interest. We developed a cloud computing platform for Mining the Maximum Accuracy of Predicting phenotypes from genotypes (MMAP) using unsupervised learning on publicly available real data and simulated data. MMAP provides a user interface to upload input data, manage projects and analyses and download the output results. The platform is free for the public to conduct computations for predicting phenotypes and genetic merit using the best prediction method optimized from many available ones, including Ridge Regression, gBLUP, compressed BLUP, Bayesian LASSO, Bayes A, B, Cpi and many more. Users can also use the platform to conduct data analyses with any methods of their choice. It is expected that extensive usage of MMAP would enrich the training data, which in turn results in continual improvement of the identification of the best method for use with particular traits.

Availability and implementation

The MMAP user manual, tutorials and example datasets are available at http://zzlab.net/MMAP.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-01 +34003431,The C-BIG Repository: an Institution-Level Open Science Platform.,"In January 2016, the Montreal Neurological Institute-Hospital (The Neuro) declared itself an Open Science organization. This vision extends beyond efforts by individual scientists seeking to release individual datasets, software tools, or building platforms that provide for the free dissemination of such information. It involves multiple stakeholders and an infrastructure that considers governance, ethics, computational resourcing, physical design, workflows, training, education, and intra-institutional reporting structures. The C-BIG repository was built in response as The Neuro's institutional biospecimen and clinical data repository, and collects biospecimens as well as clinical, imaging, and genetic data from patients with neurological disease and healthy controls. It is aimed at helping scientific investigators, in both academia and industry, advance our understanding of neurological diseases and accelerate the development of treatments. As many neurological diseases are quite rare, they present several challenges to researchers due to their small patient populations. Overcoming these challenges required the aggregation of datasets from various projects and locations. The C-BIG repository achieves this goal and stands as a scalable working model for institutions to collect, track, curate, archive, and disseminate multimodal data from patients. In November 2020, a Registered Access layer was made available to the wider research community at https://cbigr-open.loris.ca , and in May 2021 fully open data will be released to complement the Registered Access data. This article outlines many of the aspects of The Neuro's transition to Open Science by describing the data to be released, C-BIG's full capabilities, and the design aspects that were implemented for effective data sharing.",2021-05-18 +34583740,FP-ADMET: a compendium of fingerprint-based ADMET prediction models.,"

Motivation

The absorption, distribution, metabolism, excretion, and toxicity (ADMET) of drugs plays a key role in determining which among the potential candidates are to be prioritized. In silico approaches based on machine learning methods are becoming increasing popular, but are nonetheless limited by the availability of data. With a view to making both data and models available to the scientific community, we have developed FPADMET which is a repository of molecular fingerprint-based predictive models for ADMET properties. In this article, we have examined the efficacy of fingerprint-based machine learning models for a large number of ADMET-related properties. The predictive ability of a set of 20 different binary fingerprints (based on substructure keys, atom pairs, local path environments, as well as custom fingerprints such as all-shortest paths) for over 50 ADMET and ADMET-related endpoints have been evaluated as part of the study. We find that for a majority of the properties, fingerprint-based random forest models yield comparable or better performance compared with traditional 2D/3D molecular descriptors.

Availability

The models are made available as part of open access software that can be downloaded from https://gitlab.com/vishsoft/fpadmet .",2021-09-28 +34876567,Spatial-proteomics reveals phospho-signaling dynamics at subcellular resolution.,"Dynamic change in subcellular localization of signaling proteins is a general concept that eukaryotic cells evolved for eliciting a coordinated response to stimuli. Mass spectrometry-based proteomics in combination with subcellular fractionation can provide comprehensive maps of spatio-temporal regulation of protein networks in cells, but involves laborious workflows that does not cover the phospho-proteome level. Here we present a high-throughput workflow based on sequential cell fractionation to profile the global proteome and phospho-proteome dynamics across six distinct subcellular fractions. We benchmark the workflow by studying spatio-temporal EGFR phospho-signaling dynamics in vitro in HeLa cells and in vivo in mouse tissues. Finally, we investigate the spatio-temporal stress signaling, revealing cellular relocation of ribosomal proteins in response to hypertonicity and muscle contraction. Proteomics data generated in this study can be explored through https://SpatialProteoDynamics.github.io .",2021-12-07 +34587072,GenNI: Human-AI Collaboration for Data-Backed Text Generation.,"Table2Text systems generate textual output based on structured data utilizing machine learning. These systems are essential for fluent natural language interfaces in tools such as virtual assistants; however, left to generate freely these ML systems often produce misleading or unexpected outputs. GenNI (Generation Negotiation Interface) is an interactive visual system for high-level human-AI collaboration in producing descriptive text. The tool utilizes a deep learning model designed with explicit control states. These controls allow users to globally constrain model generations, without sacrificing the representation power of the deep learning models. The visual interface makes it possible for users to interact with AI systems following a Refine-Forecast paradigm to ensure that the generation system acts in a manner human users find suitable. We report multiple use cases on two experiments that improve over uncontrolled generation approaches, while at the same time providing fine-grained control. A demo and source code are available at https://genni.vizhub.ai.",2021-12-24 +35342875,Implementing Automated Nonparametric Statistical Analysis on Functional Analysis Data: A Guide for Practitioners and Researchers.,"Functional analysis (FA) is an integral component of behavioral assessment and treatment given that clinicians design behavioral treatments based on FA results. Unfortunately, the interrater reliability of FA data interpretation by visual analysis can be inconsistent, potentially leading to ineffective treatment implementation. Hall et al. (2020) recently developed automated nonparametric statistical analysis (ANSA) to facilitate the interpretation of FA data and Kranak et al. (2021) subsequently extended and validated ANSA by applying it to unpublished clinical data. The results of both Hall et al. and Kranak et al. support ANSA as an emerging statistical supplement for interpreting FA data. In the present article, we show how ANSA can be applied to interpret FA data collected in clinical settings in multielement and pairwise designs. We provide a detailed overview of the calculations involved, how to use ANSA in practice, and recommendations for its implementation. A free web-based application is available at https://ansa.shinyapps.io/ansa/.

Supplementary information

The online version contains supplementary material available at 10.1007/s40614-021-00290-2.",2021-05-24 +34382401,A Preprocessing Tool for Enhanced Ion Mobility-Mass Spectrometry-Based Omics Workflows.,"The ability to improve the data quality of ion mobility-mass spectrometry (IM-MS) measurements is of great importance for enabling modular and efficient computational workflows and gaining better qualitative and quantitative insights from complex biological and environmental samples. We developed the PNNL PreProcessor, a standalone and user-friendly software housing various algorithmic implementations to generate new MS-files with enhanced signal quality and in the same instrument format. Different experimental approaches are supported for IM-MS based on Drift-Tube (DT) and Structures for Lossless Ion Manipulations (SLIM), including liquid chromatography (LC) and infusion analyses. The algorithms extend the dynamic range of the detection system, while reducing file sizes for faster and memory-efficient downstream processing. Specifically, multidimensional smoothing improves peak shapes of poorly defined low-abundance signals, and saturation repair reconstructs the intensity profile of high-abundance peaks from various analyte types. Other functionalities are data compression and interpolation, IM demultiplexing, noise filtering by low intensity threshold and spike removal, and exporting of acquisition metadata. Several advantages of the tool are illustrated, including an increase of 19.4% in lipid annotations and a two-times faster processing of LC-DT IM-MS data-independent acquisition spectra from a complex lipid extract of a standard human plasma sample. The software is freely available at https://omics.pnl.gov/software/pnnl-preprocessor.",2021-08-12 +33872372,miRMaster 2.0: multi-species non-coding RNA sequencing analyses at scale.,"Analyzing all features of small non-coding RNA sequencing data can be demanding and challenging. To facilitate this process, we developed miRMaster. After the analysis of over 125 000 human samples and 1.5 trillion human small RNA reads over 4 years, we present miRMaster 2 with a wide range of updates and new features. We extended our reference data sets so that miRMaster 2 now supports the analysis of eight species (e.g. human, mouse, chicken, dog, cow) and 10 non-coding RNA classes (e.g. microRNAs, piRNAs, tRNAs, rRNAs, circRNAs). We also incorporated new downstream analysis modules such as batch effect analysis or sample embeddings using UMAP, and updated annotation data bases included by default (miRBase, Ensembl, GtRNAdb). To accommodate the increasing popularity of single cell small-RNA sequencing data, we incorporated a module for unique molecular identifier (UMI) processing. Further, the output tables and graphics have been improved based on user feedback and new output formats that emerged in the community are now supported (e.g. miRGFF3). Finally, we integrated differential expression analysis with the miRNA enrichment analysis tool miEAA. miRMaster is freely available at https://www.ccb.uni-saarland.de/mirmaster2.",2021-07-01 +32757124,Predicting Lymph Node Metastasis in Intrahepatic Cholangiocarcinoma.,"

Background

The objective of the current study was to develop a model to predict the likelihood of occult lymph node metastasis (LNM) prior to resection of intrahepatic cholangiocarcinoma (ICC).

Methods

Patients who underwent hepatectomy for ICC between 2000 and 2017 were identified using a multi-institutional database. A novel model incorporating clinical and preoperative imaging data was developed to predict LNM.

Results

Among 980 patients who underwent resection of ICC, 190 (19.4%) individuals had at least one LNM identified on final pathology. An enhanced imaging model incorporating clinical and imaging data was developed to predict LNM ( https://k-sahara.shinyapps.io/ICC_imaging/ ). The performance of the enhanced imaging model was very good in the training data set (c-index 0.702), as well as the validation data set with bootstrapping resamples (c-index 0.701) and outperformed the preoperative imaging alone (c-index 0.660). The novel model predicted both 5-year overall survival (OS) (low risk 48.4% vs. high risk 18.4%) and 5-year disease-specific survival (DSS) (low risk 51.9% vs. high risk 25.2%, both p < 0.001). When applied among Nx patients, 5-year OS and DSS of low-risk Nx patients was comparable with that of N0 patients, while high-risk Nx patients had similar outcomes to N1 patients (p > 0.05).

Conclusion

This tool may represent an opportunity to stratify prognosis of Nx patients and can help inform clinical decision-making prior to resection of ICC.",2020-07-14 +34091426,Adaptive convolutional neural networks for accelerating magnetic resonance imaging via k-space data interpolation.,"Deep learning in k-space has demonstrated great potential for image reconstruction from undersampled k-space data in fast magnetic resonance imaging (MRI). However, existing deep learning-based image reconstruction methods typically apply weight-sharing convolutional neural networks (CNNs) to k-space data without taking into consideration the k-space data's spatial frequency properties, leading to ineffective learning of the image reconstruction models. Moreover, complementary information of spatially adjacent slices is often ignored in existing deep learning methods. To overcome such limitations, we have developed a deep learning algorithm, referred to as adaptive convolutional neural networks for k-space data interpolation (ACNN-k-Space), which adopts a residual Encoder-Decoder network architecture to interpolate the undersampled k-space data by integrating spatially contiguous slices as multi-channel input, along with k-space data from multiple coils if available. The network is enhanced by self-attention layers to adaptively focus on k-space data at different spatial frequencies and channels. We have evaluated our method on two public datasets and compared it with state-of-the-art existing methods. Ablation studies and experimental results demonstrate that our method effectively reconstructs images from undersampled k-space data and achieves significantly better image reconstruction performance than current state-of-the-art techniques. Source code of the method is available at https://gitlab.com/qgpmztmf/acnn-k-space.",2021-05-16 +33166440,Evaluating the reliability and readability of online information on osteoporosis.,"

Objective

Internet usage for obtaining health-related information is widely popular among patients. However, there are still concerns about the reliability and comprehensibility of online information. The purpose of this study is to investigate the reliability and readability of osteoporosis-related websites.

Methods

On April 2, 2020, we searched the term ""osteoporosis"" on Google (https://www.google.com). We evaluated the first 200 uniform resource locators (URLs) in the query results regarding typology, the Journal of the American Medical Association (JAMA) scores, Health on the Net Foundation Code of conduct (HONcode) certification, Flesch-Kincaid Grade (FKG), and Simple Measure of Gobbledygook (SMOG) scores. The JAMA scoring system and HONcode stamp were used for assessing the reliability, whereas FKG and SMOG scores were used to assess the readability of online information.

Results

Of the 151 analyzed websites, 57 (37.7%) were classified as highly reliable, and 19 (12.6%) were assigned with HONcode certification. The average FKG scores (8.81 ± 2.21) and SMOG scores (7.63 ± 1.81) were below the recommended grade, which is considered as easily readable. High reliable information was found to have higher readability scores, thereby representing the difficulty of readability. We observed a weak correlation between the increased reliability of information and decreased readability.

Conclusion

Osteoporosis-related content on the internet generally has low reliability. High-reliable information is available online in scientific published materials, health portals, and news. Although the readability of the overall material is acceptable, the high-reliable websites still require high literacy and comprehension skills.",2020-11-09 +34750159,Disease Activity and Adverse Events in Patients with ANCA-Associated Vasculitides Undergoing Long-Term Dialysis.,"

Background and objectives

Kidney impairment of ANCA-associated vasculitides can lead to kidney failure. Patients with kidney failure may suffer from vasculitis relapses but are also at high risk of infections and cardiovascular events, which questions the maintenance of immunosuppressive therapy.

Design, setting, participants, & measurements

Patients with ANCA-associated vasculitides initiating long-term dialysis between 2008 and 2012 in France registered in the national Renal Epidemiology and Information Network registry and paired with the National Health System database were included. We analyzed the proportion of patients in remission off immunosuppression over time and overall and event-free survival on dialysis (considering transplantation as a competing risk). We compared the incidence of vasculitis relapses, serious infections, cardiovascular events, and cancers before and after dialysis initiation.

Results

In total, 229 patients were included: 142 with granulomatous polyangiitis and 87 with microscopic polyangiitis. Mean follow-up after dialysis initiation was 4.6±2.7 years; 82 patients received a kidney transplant. The proportion of patients in remission off immunosuppression increased from 23% at dialysis initiation to 62% after 5 years. Overall survival rates on dialysis were 86%, 69%, and 62% at 1, 3, and 5 years, respectively. Main causes of death were infections (35%) and cardiovascular events (26%) but not vasculitis flares (6%). The incidence of vasculitis relapses decreased from 57 to seven episodes per 100 person-years before and after dialysis initiation (P=0.05). Overall, during follow-up, 45% of patients experienced a serious infection and 45% had a cardiovascular event, whereas 13% experienced a vasculitis relapse.

Conclusions

The proportion of patients with ANCA-associated vasculitis in remission off immunosuppression increases with time spent on dialysis. In this cohort, patients were far less likely to relapse from their vasculitis than to display serious infectious or cardiovascular events.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2021_11_08_CJN03190321.mp3.",2021-11-01 +34791106,DCMP: database of cancer mutant protein domains. ,"Protein domains are functional and structural units of proteins. They are responsible for a particular function that contributes to protein's overall role. Because of this essential role, the majority of the genetic variants occur in the domains. In this study, the somatic mutations across 21 cancer types were mapped to the individual protein domains. To map the mutations to the domains, we employed the whole human proteome to predict the domains in each protein sequence and recognized about 149 668 domains. A novel Perl-API program was developed to convert the protein domain positions into genomic positions, and users can freely access them through GitHub. We determined the distribution of protein domains across 23 chromosomes with the help of these genomic positions. Interestingly, chromosome 19 has more number of protein domains in comparison with other chromosomes. Then, we mapped the cancer mutations to all the protein domains. Around 46-65% of mutations were mapped to their corresponding protein domains, and significantly mutated domains for all the cancer types were determined using the local false discovery ratio (locfdr). The chromosome positions for all the protein domains can be verified using the cross-reference ensemble database. Database URL: https://dcmp.vit.ac.in/.",2021-11-01 +34701624,Is There a Role for Perioperative Radiotherapy in Surgically Resected Stage IV Rectal Cancer? A Propensity Score Matched Analysis.,"

Purpose/objective(s)

This study aimed to determine whether perioperative radiotherapy (RT) improves outcomes in stage IV rectal cancer patients treated with primary surgical resection and systemic chemotherapy and to identify predictive factors for selection of patients for these approaches.

Materials/methods

We searched the Surveillance, Epidemiology, and End Results (SEER) database for patients diagnosed between 2010 and 2015 with stage IV rectal cancer, but without brain or bone metastases. After applying the exclusion criteria, a total of 26,132 patients were included in the analysis; propensity score matching was used to balance their individual characteristics.

Results

Overall, 3283 (12.6%) patients received perioperative RT; the 3-year overall survival (OS) rates were 43.6% in the surgery group and 50.5% in the surgery with RT group (P < 0.001). The survival benefit of RT was maintained after propensity score matching and multivariate adjustment (HR 0.70, 95% CI, 0.66-0.81, P < 0.001). Interaction testing of the prognostic variables revealed a significant interaction between RT and the presence of lung metastasis (P < 0.001): the benefit of RT was observed only in patients without lung metastases (3-year OS 52.1% vs. 44.1%, P < 0.001), but it was observed regardless of liver metastases. Additionally, we developed a web-based calculator (http://bit.do/mRC_surv) to provide individualized estimates of OS benefit based on the receipt of perioperative RT.

Conclusion

Perioperative RT significantly improved OS rates, especially in patients without lung metastases. We successfully developed a nomogram and web-based calculator that could predict survival benefit with the addition of RT for these patients.",2021-11-01 +34725391,Redesigning an antibody H3 loop by virtual screening of a small library of human germline-derived sequences.,"The design of superior biologic therapeutics, including antibodies and engineered proteins, involves optimizing their specific ability to bind to disease-related molecular targets. Previously, we developed and applied the Assisted Design of Antibody and Protein Therapeutics (ADAPT) platform for virtual affinity maturation of antibodies (Vivcharuk et al. in PLoS One 12(7):e0181490, https://doi.org/10.1371/journal.pone.0181490 , 2017). However, ADAPT is limited to point mutations of hot-spot residues in existing CDR loops. In this study, we explore the possibility of wholesale replacement of the entire H3 loop with no restriction to maintain the parental loop length. This complements other currently published studies that sample replacements for the CDR loops L1, L2, L3, H1 and H2. Given the immense sequence space theoretically available to H3, we focused on the virtual grafting of over 5000 human germline-derived H3 sequences from the IGMT/LIGM database increasing the diversity of the sequence space when compared to using crystalized H3 loop sequences. H3 loop conformations are generated and scored to identify optimized H3 sequences. Experimental testing of high-ranking H3 sequences grafted into the framework of the bH1 antibody against human VEGF-A led to the discovery of multiple hits, some of which had similar or better affinities relative to the parental antibody. In over 75% of the tested designs, the re-designed H3 loop contributed favorably to overall binding affinity. The hits also demonstrated good developability attributes such as high thermal stability and no aggregation. Crystal structures of select re-designed H3 variants were solved and indicated that although some deviations from predicted structures were seen in the more solvent accessible regions of the H3 loop, they did not significantly affect predicted affinity scores.",2021-11-01 +33993206,"ConsRM: collection and large-scale prediction of the evolutionarily conserved RNA methylation sites, with implications for the functional epitranscriptome. ","Motivation N6-methyladenosine (m6A) is the most prevalent RNA modification on mRNAs and lncRNAs. Evidence increasingly demonstrates its crucial importance in essential molecular mechanisms and various diseases. With recent advances in sequencing techniques, tens of thousands of m6A sites are identified in a typical high-throughput experiment, posing a key challenge to distinguish the functional m6A sites from the remaining 'passenger' (or 'silent') sites. Results: We performed a comparative conservation analysis of the human and mouse m6A epitranscriptomes at single site resolution. A novel scoring framework, ConsRM, was devised to quantitatively measure the degree of conservation of individual m6A sites. ConsRM integrates multiple information sources and a positive-unlabeled learning framework, which integrated genomic and sequence features to trace subtle hints of epitranscriptome layer conservation. With a series validation experiments in mouse, fly and zebrafish, we showed that ConsRM outperformed well-adopted conservation scores (phastCons and phyloP) in distinguishing the conserved and unconserved m6A sites. Additionally, the m6A sites with a higher ConsRM score are more likely to be functionally important. An online database was developed containing the conservation metrics of 177 998 distinct human m6A sites to support conservation analysis and functional prioritization of individual m6A sites. And it is freely accessible at: https://www.xjtlu.edu.cn/biologicalsciences/con.",2021-11-01 +32579606,An interactive database for the investigation of high-density peptide microarray guided interaction patterns and antivenom cross-reactivity.,"Snakebite envenoming is a major neglected tropical disease that affects millions of people every year. The only effective treatment against snakebite envenoming consists of unspecified cocktails of polyclonal antibodies purified from the plasma of immunized production animals. Currently, little data exists on the molecular interactions between venom-toxin epitopes and antivenom-antibody paratopes. To address this issue, high-density peptide microarray (hdpm) technology has recently been adapted to the field of toxinology. However, analysis of such valuable datasets requires expert understanding and, thus, complicates its broad application within the field. In the present study, we developed a user-friendly, and high-throughput web application named ""Snake Toxin and Antivenom Binding Profiles"" (STAB Profiles), to allow straight-forward analysis of hdpm datasets. To test our tool and evaluate its performance with a large dataset, we conducted hdpm assays using all African snake toxin protein sequences available in the UniProt database at the time of study design, together with eight commercial antivenoms in clinical use in Africa, thus representing the largest venom-antivenom dataset to date. Furthermore, we introduced a novel method for evaluating raw signals from a peptide microarray experiment and a data normalization protocol enabling intra-microarray and even inter-microarray chip comparisons. Finally, these data, alongside all the data from previous similar studies by Engmark et al., were preprocessed according to our newly developed protocol and made publicly available for download through the STAB Profiles web application (http://tropicalpharmacology.com/tools/stab-profiles/). With these data and our tool, we were able to gain key insights into toxin-antivenom interactions and were able to differentiate the ability of different antivenoms to interact with certain toxins of interest. The data, as well as the web application, we present in this article should be of significant value to the venom-antivenom research community. Knowledge gained from our current and future analyses of this dataset carry the potential to guide the improvement and optimization of current antivenoms for maximum patient benefit, as well as aid the development of next-generation antivenoms.",2020-06-24 +34678053,Philadelphia's Excise Tax on Sugar-Sweetened and Artificially Sweetened Beverages and Supplemental Nutrition Assistance Program Benefit Redemption.,"Objectives. To assess the effect of a 2017 excise tax on sugar and artificially sweetened beverages in Philadelphia, Pennsylvania, on the shopping patterns of low-income populations using Supplemental Nutrition Assistance Program (SNAP) data. Methods. I used a synthetic controls approach to estimate the effect of the tax on Philadelphia and neighboring Pennsylvania counties (Bucks, Delaware, and Montgomery) as measured by total SNAP sales (""SNAP redemption"") and SNAP redemption per SNAP participant. I assembled biannual data (2005-2019) from all US counties for SNAP redemption and relevant predictors. I performed placebo tests to estimate statistically significant effects and conducted robustness checks. Results. Detectable increases in SNAP spending occurred in all 3 Philadelphia neighboring counties. Per-participant SNAP spending increased in 2 of the neighboring counties and decreased in Philadelphia. These effects were robust across multiple specifications and placebo tests. Conclusions. The tax contributed to increased SNAP shopping in Philadelphia's neighboring counties across both outcome measures, and decreased spending in Philadelphia (at least by 1 measure). This raises questions about retailer behavior, the effectiveness of the tax's public health aim of reducing sugar-sweetened beverage consumption, and policy aims of investing in low-income communities. (Am J Public Health. 2021;111(11):1986-1996. https://doi.org/10.2105/AJPH.2021.306464).",2021-10-22 +,ORGANIZATIONAL CULTURE AND WORKFORCE CONTRIBUTIONS TO QUALITY IN LONG-TERM CARE : ORGANISATIONAL CULTURE CHANGE IN RESIDENTIAL AGED CARE,"Abstract There is increasing demand for quality residential care services in the face of constrained resources. In previous work, we developed an intervention comprising external facilitation of change cycles (‘TOrCCh’: Towards Organisational Culture Change), implemented by staff work teams. This study was undertaken to develop, and evaluate a toolkit and training resource to support sustainable culture change in residential aged care facilities (RACF), eventually with minimal external facilitation. Eight RACFs across two Australian States participated. A toolkit was drafted iteratively engaging participating sites and a reference group. Participating facilities undertook one change cycle with some external facilitation by research staff. The toolkit was then refined, and a second change cycle was undertaken using the toolkit, with minimal external facilitation. Qualitative data were collected from project sponsors and/or managers, work teams, and other care staff. Participants perceived benefits including staff development, increased communication, teamwork and leadership. The intervention was perceived to provide a generic approach which could be applied to solve agreed challenges in the work place (“Let’s TOrCCh it!”), generating useful outcomes. The role of a project sponsor, and organisational support, were perceived as important for sustainability. Challenges were the complexity and application of the toolkit resource and management of work place constraints. Final products for the TOrCCh Project comprised Workteam Members and Leaders Guides as well as additional tools and resources accessible from https://www.perkins.org.au/wacha/torcch/. Our findings demonstrate that staff teams can work together to achieve change when provided with a toolkit and process.",2017-06-30 +34313872,Putting the cart before the horse: claims for mirror self-recognition in horses are unfounded.,"The recent article by Baragli, Scopa, Maglieri, and Palagi (Anim Cogn https://doi.org/10.1007/s10071-021-01502-7 , 2021) that claims to demonstrate mirror self-recognition (MSR) in horses is not based on compelling evidence. We identify problems with their experimental procedures, data, and assertion about ""demonstrating MSR at group level."" Examples of these problems include incomplete experimental design, absence of important control conditions, inappropriate terminology, suboptimal mark application procedures and coding of videos, ambiguity of videos presented as supporting evidence, and inconsistencies in data presentation and interpretation. It is not the case that their study ""marks a turning point in the analytical technique of MSR exploration.""",2021-07-27 +29564831,"ChromothripsisDB: A Curated Database for the Documentation, Visualization, and Mining of Chromothripsis Data.","ChromothripsisDB ( http://cgma.scu.edu.cn/ChromothripsisDB ) is a manually curated database containing a unified description of published chromothripsis cases and relevant genomic aberrations. Available data includes copy number alterations, chromosome structural variations, and gene annotations. The criteria used for detecting chromothripsis in each study are also provided. At present, the molecular mechanisms involved in chromothripsis phenomenon are not fully understood. Thus, further studies with large number of identified chromothripsis samples are needed. The current release of ChromothripsisDB contains more than 400 patient samples, representing over 100 research articles. It represents an extraordinary resource for mining the existing knowledge of chromothripsis.",2018-01-01 +33772125,What is the best caries removal strategy for primary molars?,"Aim This systematic review and meta-analysis of randomised controlled trials (RCTs) aimed to compare the efficacy of different caries removal techniques: complete caries removal (CCR), selective caries removal (SCR) and stepwise caries removal (SWR) for deep carious lesions in vital primary teeth.Data sources The design of this review followed the PRISMA guidance ( http://www.prisma-statement.org/ ). Relevant studies were identified using electronic databases (PubMed [Medline], Cochrane Library, EMBASE) and finally reference lists were screened. The US National Institutes of Health Trials Register (NIHTR; http://clinicaltrials.gov ) and World Health Organisation International Clinical Trials Registry Platform (WHO ICTRP; http://apps.who.int/trialsearch ) were used to help assess publication bias, as it was not possible to test funnel plot asymmetry. Duplicates were located and eliminated using EndNote X7 programme.Study selection To be included, studies had to be published RCTs comparing SCR or SWR with CCR as caries removal strategies for deep carious lesions in vital primary teeth. The outcomes were pulp exposure, pulpo-periodontal complications (clinical and radiological failures) and/or restorative failures. RCTs applying these caries removal techniques were excluded if one of the other trial arms did not involve caries removal (that is, Hall Technique, therapeutic sealing of cavity lesions). Initially, 1,374 potentially eligible articles were identified, out of which 15 (English or French language only) were selected for full-text screening, which included ten relevant references corresponding to eight studies.Data extraction and synthesis Two authors independently extracted data using a piloted data extraction sheet, with a third reviewer resolving any disagreements. The authors performed conventional intention-to-treat and per-protocol meta-analyses, and calculated odds ratios (ORs) as effect estimates in the random-effects model, using Revman5.Results The eight included RCTs were conducted between 1977 and 2018. They include 669 patients and 824 teeth, with follow-up ranging from four weeks to 24 months. Collective results showed reduced risk of pulp exposure after SCR (OR: 0.10, 95% CI [0.04, 0.25]) or SWR (OR: 0.20, 95% CI [0.09, 0.44]), compared with CCR. There was a higher risk of composite restorative failure (OR: 2.61, 95% CI [1.05, 6.49]) using United States Public Health Service (USPHS) criteria, after SCR was compared with CCR only in intention-to-treat analysis. However, when comparing the risk of clinical or radiographic failure of pulpo-periodontal complications, no difference was found between SCR, CCR or SWR.Conclusion The conclusions of the paper are that there is a significant decrease in pulp exposure risk with SCR and SWR in comparison with CCR. However, there is a need for further studies with less risk of bias powered to report on the long-term outcomes of pulpo-periodontal health and restoration longevity.",2021-01-01 +31570194,The Landscape of Circular RNA Expression in the Human Brain.,"

Background

Circular RNAs (circRNAs) are enriched in the mammalian brain and upregulated in response to neuronal differentiation and depolarization. These RNA molecules, formed by noncanonical back-splicing, have both regulatory and translational potential.

Methods

Here, we carried out an extensive characterization of circRNA expression in the human brain, in nearly 200 human brain samples, from both healthy controls and autism cases.

Results

We identified hundreds of novel circRNAs and demonstrated that circRNAs are not expressed stochastically, but rather as major isoforms. We characterized interindividual variability of circRNA expression in the human brain and showed that interindividual variability is less pronounced than variability between the cerebral cortex and cerebellum. Finally, we identified a circRNA coexpression module upregulated in autism samples, thereby adding another layer of complexity to the transcriptome changes observed in the autism brain.

Conclusions

These data provide a comprehensive catalog of circRNAs, as well as a deeper insight into their expression in the human brain, and are available as a free resource in browsable format (http://www.voineagulab.unsw.edu.au/circ_rna).",2019-08-07 +32960888,Home automation using general purpose household electric appliances with Raspberry Pi and commercial smartphone.,"This study presents the design and implementation of a home automation system that focuses on the use of ordinary electrical appliances for remote control using Raspberry Pi and relay circuits and does not use expensive IP-based devices. Common Lights, Heating, Ventilation, and Air Conditioning (HVAC), fans, and other electronic devices are among the appliances that can be used in this system. A smartphone app is designed that helps the user to design the smart home to his actual home via easy and interactive drag & drop option. The system provides control over the appliances via both the local network and remote access. Data logging over the Microsoft Azure cloud database ensures system recovery in case of gateway failure and data record for lateral use. Periodical notifications also help the user to optimize the usage of home appliances. Moreover, the user can set his preferences and the appliances are auto turned off and on to meet user-specific requirements. Raspberry Pi acting as the server maintains the database of each appliance. HTTP web interface and apache server are used for communication between the android app and raspberry pi. With a 5v relay circuit and micro-processor Raspberry Pi, the proposed system is low-cost, energy-efficient, easy to operate, and affordable for low-income houses.",2020-09-22 +29457794,Recon3D enables a three-dimensional view of gene variation in human metabolism.,"Genome-scale network reconstructions have helped uncover the molecular basis of metabolism. Here we present Recon3D, a computational resource that includes three-dimensional (3D) metabolite and protein structure data and enables integrated analyses of metabolic functions in humans. We use Recon3D to functionally characterize mutations associated with disease, and identify metabolic response signatures that are caused by exposure to certain drugs. Recon3D represents the most comprehensive human metabolic network model to date, accounting for 3,288 open reading frames (representing 17% of functionally annotated human genes), 13,543 metabolic reactions involving 4,140 unique metabolites, and 12,890 protein structures. These data provide a unique resource for investigating molecular mechanisms of human metabolism. Recon3D is available at http://vmh.life.",2018-02-19 +29106664,ProteomicsDB.,"ProteomicsDB (https://www.ProteomicsDB.org) is a protein-centric in-memory database for the exploration of large collections of quantitative mass spectrometry-based proteomics data. ProteomicsDB was first released in 2014 to enable the interactive exploration of the first draft of the human proteome. To date, it contains quantitative data from 78 projects totalling over 19k LC-MS/MS experiments. A standardized analysis pipeline enables comparisons between multiple datasets to facilitate the exploration of protein expression across hundreds of tissues, body fluids and cell lines. We recently extended the data model to enable the storage and integrated visualization of other quantitative omics data. This includes transcriptomics data from e.g. NCBI GEO, protein-protein interaction information from STRING, functional annotations from KEGG, drug-sensitivity/selectivity data from several public sources and reference mass spectra from the ProteomeTools project. The extended functionality transforms ProteomicsDB into a multi-purpose resource connecting quantification and meta-data for each protein. The rich user interface helps researchers to navigate all data sources in either a protein-centric or multi-protein-centric manner. Several options are available to download data manually, while our application programming interface enables accessing quantitative data systematically.",2018-01-01 +34618085,iRegNet: an integrative Regulatory Network analysis tool for Arabidopsis thaliana.,"Gene expression is delicately controlled via multilayered genetic and/or epigenetic regulatory mechanisms. Rapid development of the high-throughput sequencing (HTS) technology and its derivative methods including chromatin immunoprecipitation sequencing (ChIP-seq) and DNA affinity purification sequencing (DAP-seq) have generated a large volume of data on DNA-protein interactions (DPIs) and histone modifications on a genome-wide scale. However, the ability to comprehensively retrieve empirically validated upstream regulatory networks of genes of interest (GOIs) and genomic regions of interest (ROIs) remains limited. Here, we present integrative Regulatory Network (iRegNet), a web application that analyzes the upstream regulatory network for user-queried GOIs or ROIs in the Arabidopsis (Arabidopsis thaliana) genome. iRegNet covers the largest empirically proven DNA-binding profiles of Arabidopsis transcription factors (TFs) and non-TF proteins, and histone modifications obtained from all currently available Arabidopsis ChIP-seq and DAP-seq data. iRegNet not only catalogs upstream regulomes and epigenetic chromatin states for single-query gene/genomic region but also suggests significantly overrepresented upstream genetic regulators and epigenetic chromatin states of user-submitted multiple query genes/genomic regions. Furthermore, gene-to-gene coexpression index and protein-protein interaction information were also integrated into iRegNet for a more reliable identification of upstream regulators and realistic regulatory networks. Thus, iRegNet will help discover upstream regulators as well as molecular regulatory networks of GOI(s) and/or ROI(s), and is freely available at http://chromatindynamics.snu.ac.kr:8082/iRegNet_main.",2021-11-01 +29724163,EuGI: a novel resource for studying genomic islands to facilitate horizontal gene transfer detection in eukaryotes.,"BACKGROUND:Genomic islands (GIs) are inserts of foreign DNA that have potentially arisen through horizontal gene transfer (HGT). There are evidences that GIs can contribute significantly to the evolution of prokaryotes. The acquisition of GIs through HGT in eukaryotes has, however, been largely unexplored. In this study, the previously developed GI prediction tool, SeqWord Gene Island Sniffer (SWGIS), is modified to predict GIs in eukaryotic chromosomes. Artificial simulations are used to estimate ratios of predicting false positive and false negative GIs by inserting GIs into different test chromosomes and performing the SWGIS v2.0 algorithm. Using SWGIS v2.0, GIs are then identified in 36 fungal, 22 protozoan and 8 invertebrate genomes. RESULTS:SWGIS v2.0 predicts GIs in large eukaryotic chromosomes based on the atypical nucleotide composition of these regions. Averages for predicting false negative and false positive GIs were 20.1% and 11.01% respectively. A total of 10,550 GIs were identified in 66 eukaryotic species with 5299 of these GIs coding for at least one functional protein. The EuGI web-resource, freely accessible at http://eugi.bi.up.ac.za , was developed that allows browsing the database created from identified GIs and genes within GIs through an interactive and visual interface. CONCLUSIONS:SWGIS v2.0 along with the EuGI database, which houses GIs identified in 66 different eukaryotic species, and the EuGI web-resource, provide the first comprehensive resource for studying HGT in eukaryotes.",2018-05-03 +34823845,Fetal magnetic resonance imaging (MRI) enhances the diagnosis of congenital body anomalies.,"

Aims

We sought to assess variability and concordance between fetal MRI and ultrasound (USS) in the evaluation of fetal body abnormalities.

Methods

All fetal body anomalies reported on F-MRI within the iFIND database (http://www.ifindproject.com) were included. Differences in findings regarding anomalies on contemporaneous USS were explored. Three clinical specialists evaluated each case independently, and the anomaly severity was graded: as ""insignificant"" to ""lethal"". The value of MRI in alteration of either antenatal or postnatal care was established.

Results

Fifty-four cases were identified consisting of 5 healthy controls, 37 with USS-identified body anomalies, and 12 with known CNS or cardiac anomalies. In fetuses with a known body anomaly, information on the MRI was relevant to change the clinical course in 59% of cases. There was also an incidental detection rate of 7% in fetuses with known cardiac or CNS anomalies, or 1.5% of normal control, although these were rarely clinically relevant. Importantly, fetuses undergoing MRI for cardiac concerns did have major anomalies that were missed (one case of oesophageal atresia and two cases of ARM).

Conclusions

In cases where fetal anomalies are suspected, F-MRI is a valuable means of further characterizing anomalies and may detect additional anomalies in fetuses with recognized cardiac or CNS anomalies. In fetuses with a recognized body anomaly, more than half of those scanned by MRI had information available which changed clinical management. Importantly there were also incidental findings in healthy control fetuses, so the management of these needs to be recognized in fetal MRI research.

Level of evidence

II, Prospective cohort study.",2021-10-30 +33411920,"Neisseria gonorrhoeae Sequence Typing for Antimicrobial Resistance (NG-STAR) clonal complexes are consistent with genomic phylogeny and provide simple nomenclature, rapid visualization and antimicrobial resistance (AMR) lineage predictions.","

Objectives

Surveillance of antimicrobial resistance (AMR) in Neisseria gonorrhoeae, supported by molecular typing, ideally through genome sequencing, is imperative. We defined N. gonorrhoeae Sequence Typing for Antimicrobial Resistance (NG-STAR) clonal complexes (CCs) and validated their usefulness in gonococcal AMR surveillance.

Methods

All NG-STAR alleles and STs available in the public database (https://ngstar.canada.ca/) were analysed using PHYLOViZ 2.0 to define CCs according to the closest founder ST with ≥5 identical alleles and founding ST with the highest number of links. The published 2013 European gonococcal dataset (n = 1054), the 2016 WHO reference strain panel (n = 14) and N. gonorrhoeae isolates with ceftriaxone resistance determinant penA-60.001 (n = 7) from several countries were used for validation.

Results

The majority of the isolates (n = 1063) were designated to 71 CCs. The most common CC was CC90 (n = 194), followed by CC63 (n = 166), CC139 (n = 73), CC158 (n = 73) and CC127 (n = 62). CC90 included isolates belonging to the internationally spread MDR clone N. gonorrhoeae Multi-Antigen Sequence Typing (NG-MAST) G1407 (predominantly MLST ST1901). The ceftriaxone-resistant isolates with penA-60.001 (n = 7) belonged to CC73 or STs linking between CC90 and CC73 (ST233 and ST1133). Phylogenomic analysis revealed that NG-STAR CCs more appropriately correlated to phylogenomic AMR clusters compared with MLST STs, NG-MAST STs, NG-MAST genogroups and NG-STAR STs.

Conclusions

NG-STAR CCs: are consistent with the gonococcal genome phylogeny; allow rapid visualizations with limited computational requirements; provide a simple, reproducible and portable nomenclature (for WGS and conventional Sanger sequencing data); and predict AMR lineages. Phenotypic AMR surveillance, supplemented with WGS, is imperative and NG-STAR CCs can effectively support this.",2021-03-01 +29106667,"MicrobiomeDB: a systems biology platform for integrating, mining and analyzing microbiome experiments.","MicrobiomeDB (http://microbiomeDB.org) is a data discovery and analysis platform that empowers researchers to fully leverage experimental variables to interrogate microbiome datasets. MicrobiomeDB was developed in collaboration with the Eukaryotic Pathogens Bioinformatics Resource Center (http://EuPathDB.org) and leverages the infrastructure and user interface of EuPathDB, which allows users to construct in silico experiments using an intuitive graphical 'strategy' approach. The current release of the database integrates microbial census data with sample details for nearly 14 000 samples originating from human, animal and environmental sources, including over 9000 samples from healthy human subjects in the Human Microbiome Project (http://portal.ihmpdcc.org/). Query results can be statistically analyzed and graphically visualized via interactive web applications launched directly in the browser, providing insight into microbial community diversity and allowing users to identify taxa associated with any experimental covariate.",2018-01-01 +,"Fruit and Nut Germplasm Collections: Treasuries of Genetic Diversity: The USDA-ARS National Clonal Germplasm Repository for Tree Fruit, Nut Crops, and Grapes, Davis, CA","The National Clonal Germplasm Repository Davis, CA (NCGR) curates the national collections of the following 14 Mediterranean fruit and nut crops: almond, apricot, cherry, fig, grape, kiwifruit, mulberry, olive, peach, persimmon, pistachio, plum, pomegranate, and walnut. The overarching goal is to preserve these genetics for current and future generations. The challenge is that these crops do not breed true and must therefore be maintained as plants in the field, making preservation of clonal crops considerably more expensive than annual crops that can be stored as seeds. The mission of the Repository is to acquire additional genetics to fill gaps in the collections, maintain the plants in the collections, freely distribute the genetics (typically as dormant scionwood) to scientists worldwide, and to evaluate the collections and make those data available online on the Germplasm Resources Information Network (GRIN-Global, https://npgsweb.ars-grin.gov/gringlobal/search.aspx). Because of free distribution of the germplasm, there is nothing modern and under patent or proprietary protection in the collections. Rather, they consist of older cultivars; breeder lines; and the genetically richest portion of the collections, the crop wild relatives. All are available for scientific study. Challenges with managing an expanding collection are discussed.",2020-04-01 +34716373,PlantPathMarks (PPMdb): an interactive hub for pathways-based markers in plant genomes.,"ABSTRACT: Over the past decade, the problem of finding an efficient gene-targeting marker set or signature for plant trait characterization has remained challenging. Many databases focusing on pathway mining have been released with one major deficiency, as they lack to develop marker sets that target only genes controlling a specific pathway or certain biological process. Herein, we present the PlantPathMarks database (PPMdb) as a comprehensive, web-based, user-friendly, and interactive hub for pathway-based markers in plant genomes. Based on our newly developed pathway gene set mining approach, two novel pathway-based marker systems called pathway gene-targeted markers (PGTMs) and pathway microsatellite-targeted markers (PMTMs) were developed as a novel class of annotation-based markers. In the PPMdb database, 2,690,742 pathway-based markers reflecting 9,894 marker panels were developed across 82 plant genomes. The markers include 691,555 PGTMs and 1,999,187 PMTMs. Across these genomes, 165,378 enzyme-coding genes were mapped against 126 KEGG reference pathway maps. PPMdb is furnished with three interactive visualization tools (Map Browse, JBrowse and Species Comparison) to visualize, map, and compare the developed markers over their KEGG reference pathway maps. All the stored marker panels can be freely downloaded. PPMdb promises to create a radical shift in the paradigm of the area of molecular marker research. The use of PPMdb as a mega-tool represents an impediment for non-bioinformatician plant scientists and breeders. PPMdb is freely available at http://ppmdb.easyomics.org .",2021-10-29 +33846313,Go Get Data (GGD) is a framework that facilitates reproducible access to genomic data.,"The rapid increase in the amount of genomic data provides researchers with an opportunity to integrate diverse datasets and annotations when addressing a wide range of biological questions. However, genomic datasets are deposited on different platforms and are stored in numerous formats from multiple genome builds, which complicates the task of collecting, annotating, transforming, and integrating data as needed. Here, we developed Go Get Data (GGD) as a fast, reproducible approach to installing standardized data recipes. GGD is available on Github ( https://gogetdata.github.io/ ), is extendable to other data types, and can streamline the complexities typically associated with data integration, saving researchers time and improving research reproducibility.",2021-04-12 +29036329,m6AVar: a database of functional variants involved in m6A modification.,"Identifying disease-causing variants among a large number of single nucleotide variants (SNVs) is still a major challenge. Recently, N6-methyladenosine (m6A) has become a research hotspot because of its critical roles in many fundamental biological processes and a variety of diseases. Therefore, it is important to evaluate the effect of variants on m6A modification, in order to gain a better understanding of them. Here, we report m6AVar (http://m6avar.renlab.org), a comprehensive database of m6A-associated variants that potentially influence m6A modification, which will help to interpret variants by m6A function. The m6A-associated variants were derived from three different m6A sources including miCLIP/PA-m6A-seq experiments (high confidence), MeRIP-Seq experiments (medium confidence) and transcriptome-wide predictions (low confidence). Currently, m6AVar contains 16 132 high, 71 321 medium and 326 915 low confidence level m6A-associated variants. We also integrated the RBP-binding regions, miRNA-targets and splicing sites associated with variants to help users investigate the effect of m6A-associated variants on post-transcriptional regulation. Because it integrates the data from genome-wide association studies (GWAS) and ClinVar, m6AVar is also a useful resource for investigating the relationship between the m6A-associated variants and disease. Overall, m6AVar will serve as a useful resource for annotating variants and identifying disease-causing variants.",2018-01-01 +33738787,Co-designing an Early Menopause Digital Resource: Model for Interdisciplinary Knowledge Translation.,"Early menopause/premature ovarian insufficiency is associated with negative health impacts, unmet information needs, delayed diagnosis, and variation in management. Co-designed digital resources for women with early menopause/premature ovarian insufficiency and health practitioners were developed to address information needs and support management. A five-phase mixed methods multidisciplinary research, co-design and translation process comprised: (1) survey/interviews with women and health practitioners to explore early menopause/premature ovarian insufficiency needs, experiences, and management; (2) appraisal of clinical guidelines to develop management algorithms; (3) digital resource development (https://healthtalkaustralia.org/early-menopause-experiences-and-perspectives-of-women-and-health-professionals/; (4) evaluation; and (5) dissemination/implementation. The digital resources included audio/video clips of women with early menopause/premature ovarian insufficiency and health practitioners providing early menopause/premature ovarian insufficiency care, a question prompt list, health practitioner algorithms, information links, and a list of services for women, achieving high satisfaction ratings from women and health practitioners. Engaging our stakeholder partners, multimodal dissemination has included community and conference presentations, social media, lay and professional publications, and webinars. This project provides a model for successful interdisciplinary co-design research translation to improve women's health.",2020-09-01 +34160298,Mining of Consumer Product Ingredient and Purchasing Data to Identify Potential Chemical Coexposures.,"

Background

Chemicals in consumer products are a major contributor to human chemical coexposures. Consumers purchase and use a wide variety of products containing potentially thousands of chemicals. There is a need to identify potential real-world chemical coexposures to prioritize in vitro toxicity screening. However, due to the vast number of potential chemical combinations, this identification has been a major challenge.

Objectives

We aimed to develop and implement a data-driven procedure for identifying prevalent chemical combinations to which humans are exposed through purchase and use of consumer products.

Methods

We applied frequent itemset mining to an integrated data set linking consumer product chemical ingredient data with product purchasing data from 60,000 households to identify chemical combinations resulting from co-use of consumer products.

Results

We identified co-occurrence patterns of chemicals over all households as well as those specific to demographic groups based on race/ethnicity, income, education, and family composition. We also identified chemicals with the highest potential for aggregate exposure by identifying chemicals occurring in multiple products used by the same household. Last, a case study of chemicals active in estrogen and androgen receptor in silico models revealed priority chemical combinations co-targeting receptors involved in important biological signaling pathways.

Discussion

Integration and comprehensive analysis of household purchasing data and product-chemical information provided a means to assess human near-field exposure and inform selection of chemical combinations for high-throughput screening in in vitro assays. https://doi.org/10.1289/EHP8610.",2021-06-23 +33398323,Expansion and re-classification of the extracytoplasmic function (ECF) σ factor family.,"Extracytoplasmic function σ factors (ECFs) represent one of the major bacterial signal transduction mechanisms in terms of abundance, diversity and importance, particularly in mediating stress responses. Here, we performed a comprehensive phylogenetic analysis of this protein family by scrutinizing all proteins in the NCBI database. As a result, we identified an average of ∼10 ECFs per bacterial genome and 157 phylogenetic ECF groups that feature a conserved genetic neighborhood and a similar regulation mechanism. Our analysis expands previous classification efforts ∼50-fold, enriches many original ECF groups with previously unclassified proteins and identifies 22 entirely new ECF groups. The ECF groups are hierarchically related to each other and are further composed of subgroups with closely related sequences. This two-tiered classification allows for the accurate prediction of common promoter motifs and the inference of putative regulatory mechanisms across subgroups composing an ECF group. This comprehensive, high-resolution description of the phylogenetic distribution of the ECF family, together with the massive expansion of classified ECF sequences and an openly accessible data repository called 'ECF Hub' (https://www.computational.bio.uni-giessen.de/ecfhub), will serve as a powerful hypothesis-generator to guide future research in the field.",2021-01-01 +33051671,StreptomeDB 3.0: an updated compendium of streptomycetes natural products.,"Antimicrobial resistance is an emerging global health threat necessitating the rapid development of novel antimicrobials. Remarkably, the vast majority of currently available antibiotics are natural products (NPs) isolated from streptomycetes, soil-dwelling bacteria of the genus Streptomyces. However, there is still a huge reservoir of streptomycetes NPs which remains pharmaceutically untapped and a compendium thereof could serve as a source of inspiration for the rational design of novel antibiotics. Initially released in 2012, StreptomeDB (http://www.pharmbioinf.uni-freiburg.de/streptomedb) is the first and only public online database that enables the interactive phylogenetic exploration of streptomycetes and their isolated or mutasynthesized NPs. In this third release, there are substantial improvements over its forerunners, especially in terms of data content. For instance, about 2500 unique NPs were newly annotated through manual curation of about 1300 PubMed-indexed articles, published in the last five years since the second release. To increase interoperability, StreptomeDB entries were hyperlinked to several spectral, (bio)chemical and chemical vendor databases, and also to a genome-based NP prediction server. Moreover, predicted pharmacokinetic and toxicity profiles were added. Lastly, some recent real-world use cases of StreptomeDB are highlighted, to illustrate its applicability in life sciences.",2021-01-01 +33137193,"AcrHub: an integrative hub for investigating, predicting and mapping anti-CRISPR proteins.","Anti-CRISPR (Acr) proteins naturally inhibit CRISPR-Cas adaptive immune systems across bacterial and archaeal domains of life. This emerging field has caused a paradigm shift in the way we think about the CRISPR-Cas system, and promises a number of useful applications from gene editing to phage therapy. As the number of verified and predicted Acrs rapidly expands, few online resources have been developed to deal with this wealth of information. To overcome this shortcoming, we developed AcrHub, an integrative database to provide an all-in-one solution for investigating, predicting and mapping Acr proteins. AcrHub catalogs 339 non-redundant experimentally validated Acrs and over 70 000 predicted Acrs extracted from genome sequence data from a diverse range of prokaryotic organisms and their viruses. It integrates state-of-the-art predictors to predict potential Acrs, and incorporates three analytical modules: similarity analysis, phylogenetic analysis and homology network analysis, to analyze their relationships with known Acrs. By interconnecting all modules as a platform, AcrHub presents enriched and in-depth analysis of known and potential Acrs and therefore provides new and exciting insights into the future of Acr discovery and validation. AcrHub is freely available at http://pacrispr.erc.monash.edu/AcrHub/.",2021-01-01 +32921303,ANDDigest: a new web-based module of ANDSystem for the search of knowledge in the scientific literature.,"

Background

The rapid growth of scientific literature has rendered the task of finding relevant information one of the critical problems in almost any research. Search engines, like Google Scholar, Web of Knowledge, PubMed, Scopus, and others, are highly effective in document search; however, they do not allow knowledge extraction. In contrast to the search engines, text-mining systems provide extraction of knowledge with representations in the form of semantic networks. Of particular interest are tools performing a full cycle of knowledge management and engineering, including automated retrieval, integration, and representation of knowledge in the form of semantic networks, their visualization, and analysis. STRING, Pathway Studio, MetaCore, and others are well-known examples of such products. Previously, we developed the Associative Network Discovery System (ANDSystem), which also implements such a cycle. However, the drawback of these systems is dependence on the employed ontologies describing the subject area, which limits their functionality in searching information based on user-specified queries.

Results

The ANDDigest system is a new web-based module of the ANDSystem tool, permitting searching within PubMed by using dictionaries from the ANDSystem tool and sets of user-defined keywords. ANDDigest allows performing the search based on complex queries simultaneously, taking into account many types of objects from the ANDSystem's ontology. The system has a user-friendly interface, providing sorting, visualization, and filtering of the found information, including mapping of mentioned objects in text, linking to external databases, sorting of data by publication date, citations number, journal H-indices, etc. The system provides data on trends for identified entities based on dynamics of interest according to the frequency of their mentions in PubMed by years.

Conclusions

The main feature of ANDDigest is its functionality, serving as a specialized search for information about multiple associative relationships of objects from the ANDSystem's ontology vocabularies, taking into account user-specified keywords. The tool can be applied to the interpretation of experimental genetics data, the search for associations between molecular genetics objects, and the preparation of scientific and analytical reviews. It is presently available at https://anddigest.sysbio.ru/ .",2020-09-14 +34037798,CPA: a web-based platform for consensus pathway analysis and interactive visualization.,"In molecular biology and genetics, there is a large gap between the ease of data collection and our ability to extract knowledge from these data. Contributing to this gap is the fact that living organisms are complex systems whose emerging phenotypes are the results of multiple complex interactions taking place on various pathways. This demands powerful yet user-friendly pathway analysis tools to translate the now abundant high-throughput data into a better understanding of the underlying biological phenomena. Here we introduce Consensus Pathway Analysis (CPA), a web-based platform that allows researchers to (i) perform pathway analysis using eight established methods (GSEA, GSA, FGSEA, PADOG, Impact Analysis, ORA/Webgestalt, KS-test, Wilcox-test), (ii) perform meta-analysis of multiple datasets, (iii) combine methods and datasets to accurately identify the impacted pathways underlying the studied condition and (iv) interactively explore impacted pathways, and browse relationships between pathways and genes. The platform supports three types of input: (i) a list of differentially expressed genes, (ii) genes and fold changes and (iii) an expression matrix. It also allows users to import data from NCBI GEO. The CPA platform currently supports the analysis of multiple organisms using KEGG and Gene Ontology, and it is freely available at http://cpa.tinnguyen-lab.com.",2021-07-01 +,A Semi-automatic Diagnosis of Hip Dysplasia on X-Ray Films,"Background: Diagnosis of hip joint plays an important role in early screening of hip diseases such as coxarthritis, heterotopic ossification, osteonecrosis of the femoral head, etc. Early detection of hip dysplasia on X-ray films may probably conduce to early treatment of patients, which can help to cure patients or relieve their pain as much as possible. There has been no method or tool for automatic diagnosis of hip dysplasia till now. Results: A semi-automatic method for diagnosis of hip dysplasia is proposed. Considering the complexity of medical imaging, the contour of acetabulum, femoral head, and the upper side of thigh-bone are manually marked. Feature points are extracted according to marked contours. Traditional knowledge-driven diagnostic criteria is abandoned. Instead, a data-driven diagnostic model for hip dysplasia is presented. Angles including CE, sharp, and Tonnis angle which are commonly measured in clinical diagnosis, are automatically obtained. Samples, each of which consists of these three angle values, are used for clustering according to their densities in a descending order. A three-dimensional normal distribution derived from the cluster is built and regarded as the parametric model for diagnosis of hip dysplasia. Experiments on 143 X-ray films including 286 samples (i.e., 143 left and 143 right hip joints) demonstrate the effectiveness of our method. According to the method, a computer-aided diagnosis tool is developed for the convenience of clinicians, which can be downloaded at http://www.bio-nefu.com/HIPindex/. The data used to support the findings of this study are available from the corresponding authors upon request. Conclusions: This data-driven method provides a more objective measurement of the angles. Besides, it provides a new criterion for diagnosis of hip dysplasia other than doctors' experience deriving from knowledge-driven clinical manual, which actually corresponds to very different way for clinical diagnosis of hip dysplasia.",2020-01-01 +27794045,Update of the FANTOM web resource: high resolution transcriptome of diverse cell types in mammals.,"Upon the first publication of the fifth iteration of the Functional Annotation of Mammalian Genomes collaborative project, FANTOM5, we gathered a series of primary data and database systems into the FANTOM web resource (http://fantom.gsc.riken.jp) to facilitate researchers to explore transcriptional regulation and cellular states. In the course of the collaboration, primary data and analysis results have been expanded, and functionalities of the database systems enhanced. We believe that our data and web systems are invaluable resources, and we think the scientific community will benefit for this recent update to deepen their understanding of mammalian cellular organization. We introduce the contents of FANTOM5 here, report recent updates in the web resource and provide future perspectives.",2016-10-27 +,First Report of ‘Candidatus Phytoplasma aurantifolia’-Related Strains Infecting Potato (Solanum tuberosum) in Jordan,"Potato (Solanum tuberosum L. ‘Spunta’, Solanaceae) is an important economic crop in Jordan. In November 2013, potato plants showing symptoms of leaf reddening, aerial and abnormally small and deformed tubers, suggestive of possible phytoplasma infection, were observed in three potato fields with disease incidence of 3 to 5% in the Jordan Valley region. Leaf samples were collected from 14 symptomatic and five asymptomatic potato plants. Total genomic DNA was extracted by a cetyltrimethylammonium bromide protocol (Doyle and Doyle 1987). The 16S rRNA gene was partially amplified using the phytoplasma universal primer pairs P1/P7 followed by nested polymerase chain reaction (PCR) with primer pair R16F2n/R16R2 (Deng and Hiruki 1991; Gundersen and Lee 1996). DNA extracts of three symptomatic potato samples showed positive results for phytoplasma infection, yielding a specific PCR amplicon around 1.25 kbp. No phytoplasma was detected in asymptomatic potato plants that were sampled from the same field. PCR products from the three positive samples where cloned into pGEMT-Easy vector, sequenced (Macrogen, Amsterdam, The Netherlands) and analyzed through BLAST search. The sequences (GenBank accession nos. MH085230, MH085231, and MH085232) from Jordan shared 99% identity with sequences of ‘Candidatus Phytoplasma aurantifolia’ from Thailand (JN006076 and JN006079) and China (JQ923433). Furthermore, the identity among all Jordanian phytoplasma strains of this study was 99%. Additionally, a virtual restriction fragment length polymorphism was done for (sub)group classification (iPhyClassifier, http://plantpathology.ba.ars.usda.gov/​cgi-bin/resource/iphyclassifier.cgi, Beltsville, MD, Zhao et al. 2009) and confirmed the identity of the phytoplasma as a member of subgroup 16SrII. Phylogenetic analysis (BioNumerics, Applied Math, Belgium) based on partial 16S rRNA gene sequences of representative phytoplasma strains placed the Jordanian potato phytoplasma strains in a single distinct cluster together with ‘Ca. P. aurantifolia’ subgroup 16SrII. Taken together, these results confirmed the unique occurrence of ‘Ca. P. aurantifolia’-related strains in potato in Jordan Valley. The presence of potatoes infected by a member of the subgroup 16SrII phytoplasma in Jordan may have serious epidemiological implications on this crop. Our results will open an avenue to future studies on the spread and impact of this phytoplasma and its potential insect vectors. To the best of our knowledge, this is the first report of ‘Ca. P. aurantifolia’-related strains infecting potato in Jordan.",2019-06-01 +34706638,ANAT 3.0: a framework for elucidating functional protein subnetworks using graph-theoretic and machine learning approaches.,"

Background

ANAT is a Cytoscape plugin for the inference of functional protein-protein interaction networks in yeast and human. It is a flexible graphical tool for scientists to explore and elucidate the protein-protein interaction pathways of a process under study.

Results

Here we present ANAT3.0, which comes with updated PPI network databases of 544,455 (human) and 155,504 (yeast) interactions, and a new machine-learning layer for refined network elucidation. Together they improve network reconstruction to more than twofold increase in the quality of reconstructing known signaling pathways from KEGG.

Conclusions

ANAT3.0 includes improved network reconstruction algorithms and more comprehensive protein-protein interaction networks than previous versions. ANAT is available for download on the Cytoscape Appstore and at https://www.cs.tau.ac.il/~bnet/ANAT/ .",2021-10-27 +34794089,Optimal control gradient precision trade-offs: Application to fast generation of DeepControl libraries for MRI.,"We have recently demonstrated supervised deep learning methods for rapid generation of radiofrequency pulses in magnetic resonance imaging (https://doi.org/10.1002/mrm.27740, https://doi.org/10.1002/mrm.28667). Unlike the previous iterative optimization approaches, deep learning methods generate a pulse using a fixed number of floating-point operations - this is important in MRI, where patient-specific pulses preferably must be produced in real time. However, deep learning requires vast training libraries, which must be generated using the traditional methods, e.g., iterative quantum optimal control methods. Those methods are usually variations of gradient descent, and the calculation of the gradient of the performance metric with respect to the pulse waveform can be the most numerically intensive step. In this communication, we explore various ways in which the calculation of gradients in quantum optimal control theory may be accelerated. Four optimization avenues are explored: truncated commutator series expansions at zeroth and first order, a novel midpoint truncation scheme at first order, and the exact complex-step method. For the spin systems relevant to MRI, the first-order midpoint truncation is found to be sufficiently accurate, but also significantly faster than the machine precision gradient. This makes the generation of training databases for the machine learning methods considerably more realistic.",2021-10-27 +34807777,Effectiveness of an Aggression Management Training Program in Japan: A Quasi-Experimental Study.,This study evaluated the effects of the Comprehensive Violence Prevention and Protection Program (CVPPP) training for managing patient aggression in 95 participants who attended the FY 2019 program using a single-group pretest-posttest design. The comparison of findings before and 1 month after the training showed a significant improvement in staff anger (p < 0.01) and their negative (p < 0.01) and positive attitudes (p < 0.01) toward psychiatric inpatient aggression and confidence (p < 0.01). Staff with controlling and self-affirming traits provided more effective care and demonstrated a positive attitude toward inpatient aggression (p < 0.05).Supplemental data for this article is available online at https://doi.org/10.1080/01612840.2021.1999542 .,2021-11-22 +33270826,vSampler: fast and annotation-based matched variant sampling tool.,"

Summary

Sampling of control variants having matched properties with input variants is widely used in enrichment analysis of genome-wide association studies/quantitative trait loci and negative data construction for pathogenic/regulatory variant prediction methods. Spurious enrichment results because of confounding factors, such as minor allele frequency and linkage disequilibrium pattern, can be avoided by calibration of statistical significance based on matched controls. Here, we presented vSampler which can generate sets of randomly drawn variants with comprehensive choices of matching properties, such as tissue/cell type-specific epigenomic features. Importantly, the development of a novel data structure and sampling algorithms for vSampler makes it significantly fast than existing tools.

Availability and implementation

vSampler web server and local program are available at http://mulinlab.org/vsampler.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +32501478,SnpHub: an easy-to-set-up web server framework for exploring large-scale genomic variation data in the post-genomic era with applications in wheat. ,"The cost of high-throughput sequencing is rapidly decreasing, allowing researchers to investigate genomic variations across hundreds or even thousands of samples in the post-genomic era. The management and exploration of these large-scale genomic variation data require programming skills. The public genotype querying databases of many species are usually centralized and implemented independently, making them difficult to update with new data over time. Currently, there is a lack of a widely used framework for setting up user-friendly web servers to explore new genomic variation data in diverse species. Here, we present SnpHub, a Shiny/R-based server framework for retrieving, analysing, and visualizing large-scale genomic variation data that can be easily set up on any Linux server. After a pre-building process based on the provided VCF files and genome annotation files, the local server allows users to interactively access single-nucleotide polymorphisms and small insertions/deletions with annotation information by locus or gene and to define sample sets through a web page. Users can freely analyse and visualize genomic variations in heatmaps, phylogenetic trees, haplotype networks, or geographical maps. Sample-specific sequences can be accessed as replaced by detected sequence variations. SnpHub can be applied to any species, and we build up a SnpHub portal website for wheat and its progenitors based on published data in recent studies. SnpHub and its tutorial are available at http://guoweilong.github.io/SnpHub/. The wheat-SnpHub-portal website can be accessed at http://wheat.cau.edu.cn/Wheat_SnpHub_Portal/.",2020-06-01 +34581805,"BioSeq-BLM: a platform for analyzing DNA, RNA and protein sequences based on biological language models.","In order to uncover the meanings of 'book of life', 155 different biological language models (BLMs) for DNA, RNA and protein sequence analysis are discussed in this study, which are able to extract the linguistic properties of 'book of life'. We also extend the BLMs into a system called BioSeq-BLM for automatically representing and analyzing the sequence data. Experimental results show that the predictors generated by BioSeq-BLM achieve comparable or even obviously better performance than the exiting state-of-the-art predictors published in literatures, indicating that BioSeq-BLM will provide new approaches for biological sequence analysis based on natural language processing technologies, and contribute to the development of this very important field. In order to help the readers to use BioSeq-BLM for their own experiments, the corresponding web server and stand-alone package are established and released, which can be freely accessed at http://bliulab.net/BioSeq-BLM/.",2021-12-01 +34723208,A Transfer Learning-Based Approach with Deep CNN for COVID-19- and Pneumonia-Affected Chest X-ray Image Classification.,"The COVID-19 pandemic creates a significant impact on everyone's life. One of the fundamental movements to cope with this challenge is identifying the COVID-19-affected patients as early as possible. In this paper, we classified COVID-19, Pneumonia, and Healthy cases from the chest X-ray images by applying the transfer learning approach on the pre-trained VGG-19 architecture. We use MongoDB as a database to store the original image and corresponding category. The analysis is performed on a public dataset of 3797 X-ray images, among them COVID-19 affected (1184 images), Pneumonia affected (1294 images), and Healthy (1319 images) (https://www.kaggle.com/tawsifurrahman/covid19-radiography-database/version/3). This research gained an accuracy of 97.11%, average precision of 97%, and average Recall of 97% on the test dataset.",2021-10-26 +34699529,"The GH19 Engineering Database: Sequence diversity, substrate scope, and evolution in glycoside hydrolase family 19.","The glycoside hydrolase 19 (GH19) is a bifunctional family of chitinases and endolysins, which have been studied for the control of plant fungal pests, the recycle of chitin biomass, and the treatment of multi-drug resistant bacteria. The GH19 domain-containing sequences (22,461) were divided into a chitinase and an endolysin subfamily by analyzing sequence networks, guided by taxonomy and the substrate specificity of characterized enzymes. The chitinase subfamily was split into seventeen groups, thus extending the previous classification. The endolysin subfamily is more diverse and consists of thirty-four groups. Despite their sequence diversity, twenty-six residues are conserved in chitinases and endolysins, which can be distinguished by two specific sequence patterns at six and four positions, respectively. Their location outside the catalytic cleft suggests a possible mechanism for substrate specificity that goes beyond the direct interaction with the substrate. The evolution of the GH19 catalytic domain was investigated by large-scale phylogeny. The inferred evolutionary history and putative horizontal gene transfer events differ from previous works. While no clear patterns were detected in endolysins, chitinases varied in sequence length by up to four loop insertions, causing at least eight distinct presence/absence loop combinations. The annotated GH19 sequences and structures are accessible via the GH19 Engineering Database (GH19ED, https://gh19ed.biocatnet.de). The GH19ED has been developed to support the prediction of substrate specificity and the search for novel GH19 enzymes from neglected taxonomic groups or in regions of the sequence space where few sequences have been described yet.",2021-10-26 +34126844,Incidence and prevalence of psychogenic nonepileptic seizures (functional seizures): a systematic review and an analytical study.,"

Aim

Psychogenic nonepileptic seizures (PNES) or functional seizures are universal phenomena. However, data on their epidemiology is limited. The aim of the current study was to review the literature on the epidemiology of PNES and to provide analytical estimates of its incidence and prevalence based on the direct data that are available from previous studies on PNES.

Methods

The methods of this work had two parts: (1) MEDLINE, PsycINFO, and Scopus from inception to 19 October 2019 were systematically searched. (2) The analytical study of the incidence and prevalence of PNES was performed, based on the following data from previous studies: incidence of PNES, duration of PNES before making a diagnosis, outcome and mortality of PNES.

Results

The search strategy yielded five articles; three were on the incidence and two on the prevalence. In the analytical part of the study, the incidence of PNES was calculated to be 3.1 (95% Confidence Interval: 1.1-5.1) per 100,000 population per year. The calculated prevalence rate of PNES in 2019 was 108.5 (95% Confidence Interval: 39.2-177.8) per 100,000 population, in the USA.

Conclusion

While, the generalizability of these calculated incidence and prevalence rates to other places in the world is limited, they give us a reasonable hint that PNES is a common condition and the prevalence is much more than that it was thought before.Supplemental data for this article is available online at https://doi.org/10.1080/00207454.2021.1942870.",2021-06-28 +32830796,Prospective Surveillance and Risk Reduction of Cancer Treatment-Related Lymphedema: Systematic Review and Meta-Analysis.,"

Problem identification

Secondary lymphedema is a chronic condition that may result from cancer-related treatments. Evidence is emerging on prospective surveillance and risk reduction.

Literature search

Databases were systematically searched through April 1, 2019, for comparative studies evaluating interventions aiming to prevent lymphedema in patients with cancer.

Data evaluation

A random-effects model was used to perform meta-analysis, when appropriate.

Synthesis

A total of 26 studies (4,095 patients) were included, with 23 providing data sufficient for meta-analysis. Surveillance programs increased the likelihood of detecting lymphedema. Physiotherapy, exercise programs, and delayed exercise reduced the incidence of lymphedema.

Implications for research

Future research should standardize (a) evidence-based interventions to reduce the development of lymphedema and increase the likelihood of early detection and (b) outcome measures to build a body of evidence that leads to practice change.

Supplemental material can be found at https

//onf.ons.org/supplementary-material-systematic-review-cancer-treatment-related-lymphedema.",2020-09-01 +34580383,An enhanced variant effect predictor based on a deep generative model and the Born-Again Networks.,"The development of an accurate and reliable variant effect prediction tool is important for research in human genetic diseases. A large number of predictors have been developed towards this goal, yet many of these predictors suffer from the problem of data circularity. Here we present MTBAN (Mutation effect predictor using the Temporal convolutional network and the Born-Again Networks), a method for predicting the deleteriousness of variants. We apply a form of knowledge distillation technique known as the Born-Again Networks (BAN) to a previously developed deep autoregressive generative model, mutationTCN, to achieve an improved performance in variant effect prediction. As the model is fully unsupervised and trained only on the evolutionarily related sequences of a protein, it does not suffer from the problem of data circularity which is common across supervised predictors. When evaluated on a test dataset consisting of deleterious and benign human protein variants, MTBAN shows an outstanding predictive ability compared to other well-known variant effect predictors. We also offer a user-friendly web server to predict variant effects using MTBAN, freely accessible at http://mtban.kaist.ac.kr . To our knowledge, MTBAN is the first variant effect prediction tool based on a deep generative model that provides a user-friendly web server for the prediction of deleteriousness of variants.",2021-09-27 +33136473,"A Web-Based Platform on Coronavirus Disease-19 to Maintain Predicted Diagnostic, Drug, and Vaccine Candidates.","A web-based resource CoronaVIR (https://webs.iiitd.edu.in/raghava/coronavir/) has been developed to maintain the predicted and existing information on coronavirus severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). We have integrated multiple modules, including ""Genomics,"" ""Diagnosis,"" ""Immunotherapy,"" and ""Drug Designing"" to understand the holistic view of this pandemic medical disaster. The genomics module provides genomic information of different strains of this virus to understand genomic level alterations. The diagnosis module includes detailed information on currently-in-use diagnostics tests as well as five novel universal primer sets predicted using in silico tools. The Immunotherapy module provides information on epitope-based potential vaccine candidates (e.g., LQLPQGTTLPKGFYA, VILLNKHIDAYKTFPPTEPKKDKKKK, EITVATSRTLS, GKGQQQQGQTV, SELVIGAVILR) predicted using state-of-the-art software and resources in the field of immune informatics. These epitopes have the potential to activate both adaptive (e.g., B cell and T cell) and innate (e.g., vaccine adjuvants) immune systems as well as suitable for all strains of SARS-CoV-2. Besides, we have also predicted potential candidates for siRNA-based therapy and RNA-based vaccine adjuvants. The drug designing module maintains information about potential drug targets, tertiary structures, and potential drug molecules. These potential drug molecules were identified from FDA-approved drugs using the docking-based approach. We also compiled information from the literature and Internet on potential drugs, repurposing drugs, and monoclonal antibodies. To understand host-virus interaction, we identified cell-penetrating peptides in the virus. In this study, state-of-the-art techniques have been used for predicting the potential candidates for diagnostics and therapeutics.",2020-10-30 +33836695,"One ""misunderstood"" health issue: demonstrating and communicating the safety of influenza a vaccination in pregnancy: a systematic review and meta-analysis.","

Background

The American College of Obstetricians and Gynecologists (ACOG) makes certain recommendations including the annual influenza vaccination of pregnant and pre-pregnant women during influenza (flu) season with an inactivated influenza vaccine as soon as it becomes available. The Centers for Disease Control and Prevention's (CDC) Advisory Committee on Immunization Practices in association with ACOG state that the vaccine is safe to be given any trimester during pregnancy. However, due to a lack of communication, the public is unaware of the effects of influenza A vaccination in pregnancy. Since this is a vital public health concern, we aimed to communicate with evidence, the safety of influenza A vaccination in pregnancy in order to improve the rate of influenza A vaccines in pregnant women.

Methods

This health communication issue was based on the impact of influenza vaccine on fetal outcomes. Therefore, a search was carried out through medical-based online databases including: Cochrane Central, EMBASE, Web of Science, MEDLINE, http://www.ClinicalTrials.gov , and Google scholar for relevant English-based publications. Adverse fetal outcomes were considered as the endpoints of this analysis. The most specific RevMan 5.3 (latest version) software was used to carry out this analysis. Risk ratios (RR) with 95% confidence intervals (CI) were involved in data and results representation and interpretation.

Results

A total number of 679, 992 pregnant women participated in this analysis. Based on this current analysis, premature/preterm birth (< 37 weeks) was significantly reduced in pregnant women who were vaccinated for influenza A (RR: 0.80, 95% CI: 0.69-0.92; P = 0.002) as compared to those women who were not vaccinated. Similarly, influenza A vaccination decreased the risk for very preterm birth (< 32 weeks) (RR: 0.70, 95% CI: 0.58-0.84; P = 0.0001). The risks for infants with low birth weight (RR: 0.71, 95% CI: 0.49-1.04; P = 0.08), very low birth weight (RR: 0.69, 95% CI: 0.23-2.11; P = 0.52) and infants small for gestational age (RR: 0.93, 95% CI: 0.83-1.05; P = 0.26) were not increased with the vaccine. Influenza A vaccination was not associated with increased risks of stillbirth (RR: 0.63, 95% CI: 0.38-1.03; P = 0.07), birth defects (RR: 0.67, 95% CI: 0.26-1.72; P = 0.41), admission to neonatal intensive care unit or Apgar score < 7 in 5 min.

Conclusion

Influenza vaccine is completely safe in pregnancy. It significantly lowers premature birth and is not associated with any serious adverse neonatal outcome. Hence, this important piece of information should be communicated and conveyed to all pregnant women, for a safer and healthier pregnancy. At last, this public health issue should further be addressed to the population through media and other communication means in order to improve the rate of influenza A vaccines in pregnant women for a healthier and more productive population.",2021-04-09 +30967549,iFISH is a publically available resource enabling versatile DNA FISH to study genome architecture.,"DNA fluorescence in situ hybridization (DNA FISH) is a powerful method to study chromosomal organization in single cells. At present, there is a lack of free resources of DNA FISH probes and probe design tools which can be readily applied. Here, we describe iFISH, an open-source repository currently comprising 380 DNA FISH probes targeting multiple loci on the human autosomes and chromosome X, as well as a genome-wide database of optimally designed oligonucleotides and a freely accessible web interface ( http://ifish4u.org ) that can be used to design DNA FISH probes. We individually validate 153 probes and take advantage of our probe repository to quantify the extent of intermingling between multiple heterologous chromosome pairs, showing a much higher extent of intermingling in human embryonic stem cells compared to fibroblasts. In conclusion, iFISH is a versatile and expandable resource, which can greatly facilitate the use of DNA FISH in research and diagnostics.",2019-04-09 +32898258,EXPath 2.0: An Updated Database for Integrating High-Throughput Gene Expression Data with Biological Pathways.,"Co-expressed genes tend to have regulatory relationships and participate in similar biological processes. Construction of gene correlation networks from microarray or RNA-seq expression data has been widely applied to study transcriptional regulatory mechanisms and metabolic pathways under specific conditions. Furthermore, since transcription factors (TFs) are critical regulators of gene expression, it is worth investigating TFs on the promoters of co-expressed genes. Although co-expressed genes and their related metabolic pathways can be easily identified from previous resources, such as EXPath and EXPath Tool, this information is not simultaneously available to identify their regulatory TFs. EXPath 2.0 is an updated database for the investigation of regulatory mechanisms in various plant metabolic pathways with 1,881 microarray and 978 RNA-seq samples. There are six significant improvements in EXPath 2.0: (i) the number of species has been extended from three to six to include Arabidopsis, rice, maize, Medicago, soybean and tomato; (ii) gene expression at various developmental stages have been added; (iii) construction of correlation networks according to a group of genes is available; (iv) hierarchical figures of the enriched Gene Ontology (GO) terms are accessible; (v) promoter analysis of genes in a metabolic pathway or correlation network is provided; and (vi) user's gene expression data can be uploaded and analyzed. Thus, EXPath 2.0 is an updated platform for investigating gene expression profiles and metabolic pathways under specific conditions. It facilitates users to access the regulatory mechanisms of plant biological processes. The new version is available at http://EXPath.itps.ncku.edu.tw.",2020-10-01 +34709876,"Cross-Sector Monitoring and Evaluation Framework: Social, Economic, and Health Conditions Impacted During the COVID-19 Pandemic.","Public Health 3.0 approaches are critical for monitoring disparities in economic, social, and overall health impacts following the COVID-19 pandemic and its associated policy changes to slow community spread. Timely, cross-sector data as identified using this approach help decisionmakers identify changes, track racial disparities, and address unintended consequences during a pandemic. We applied a monitoring and evaluation framework that combined policy changes with timely, relevant cross-sector data and community review. Indicators covered unemployment, basic needs, family violence, education, childcare, access to health care, and mental, physical, and behavioral health. In response to increasing COVID-19 cases, nonpharmaceutical intervention strategies were implemented in March 2020 in King County, Washington. By December 2020, 554 000 unemployment claims were filed. Social service calls increased 100%, behavioral health crisis calls increased 25%, and domestic violence calls increased 25%, with disproportionate impact on communities of color. This framework can be replicated by local jurisdictions to inform and address racial inequities in ongoing COVID-19 mitigation and recovery. Cross-sector collaboration between public health and sectors addressing the social determinants of health are an essential first step to have an impact on long-standing racial inequities. (Am J Public Health. 2021;111(S3):S215-S223. https://doi.org/10.2105/AJPH.2021.306422).",2021-10-01 +34327051,Detection of genomic regions associated malformations in newborn piglets: a machine-learning approach.,"

Background

A significant proportion of perinatal losses in pigs occurs due to congenital malformations. The purpose of this study is the identification of genomic loci associated with fetal malformations in piglets.

Methods

The malformations were divided into two groups: associated with limb defects (piglet splay leg) and associated with other congenital anomalies found in newborn piglets. 148 Landrace and 170 Large White piglets were selected for the study. A genome-wide association study based on the gradient boosting machine algorithm was performed to identify markers associated with congenital anomalies and piglet splay leg.

Results

Forty-nine SNPs (23 SNPs in Landrace pigs and 26 SNPs in Large White) were associated with congenital anomalies, 22 of which were localized in genes. A total of 156 SNPs (28 SNPs in Landrace; 128 in Large White) were identified for piglet splay leg, of which 79 SNPs were localized in genes. We have demonstrated that the gradient boosting machine algorithm can identify SNPs and their combinations associated with significant selection indicators of studied malformations and productive characteristics.

Data availability

Genotyping and phenotyping data are available at http://www.compubioverne.group/data-and-software/.",2021-07-22 +32119071,ProCaff: protein-carbohydrate complex binding affinity database.,"MOTIVATION:Protein-carbohydrate interactions perform several cellular and biological functions and their structure and function are mainly dictated by their binding affinity. Although plenty of experimental data on binding affinity are available, there is no reliable and comprehensive database in the literature. RESULTS:We have developed a database on binding affinity of protein-carbohydrate complexes, ProCaff, which contains 3122 entries on dissociation constant (Kd), Gibbs free energy change (ΔG), experimental conditions, sequence, structure and literature information. Additional features include the options to search, display, visualization, download and upload the data. AVAILABILITY AND IMPLEMENTATION:The database is freely available at http://web.iitm.ac.in/bioinfo2/procaff/. The website is implemented using HTML and PHP and supports recent versions of major browsers such as Chrome, Firefox, IE10 and Opera. CONTACT:gromiha@iitm.ac.in. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-06-01 +34560325,Discovering pesticides and their TPs in Luxembourg waters using open cheminformatics approaches.,"The diversity of hundreds of thousands of potential organic pollutants and the lack of (publicly available) information about many of them is a huge challenge for environmental sciences, engineering, and regulation. Suspect screening based on high-resolution liquid chromatography-mass spectrometry (LC-HRMS) has enormous potential to help characterize the presence of these chemicals in our environment, enabling the detection of known and newly emerging pollutants, as well as their potential transformation products (TPs). Here, suspect list creation (focusing on pesticides relevant for Luxembourg, incorporating data sources in 4 languages) was coupled to an automated retrieval of related TPs from PubChem based on high confidence suspect hits, to screen for pesticides and their TPs in Luxembourgish river samples. A computational workflow was established to combine LC-HRMS analysis and pre-screening of the suspects (including automated quality control steps), with spectral annotation to determine which pesticides and, in a second step, their related TPs may be present in the samples. The data analysis with Shinyscreen (https://gitlab.lcsb.uni.lu/eci/shinyscreen/), an open source software developed in house, coupled with custom-made scripts, revealed the presence of 162 potential pesticide masses and 96 potential TP masses in the samples. Further identification of these mass matches was performed using the open source approach MetFrag (https://msbi.ipb-halle.de/MetFrag/). Eventual target analysis of 36 suspects resulted in 31 pesticides and TPs confirmed at Level-1 (highest confidence), and five pesticides and TPs not confirmed due to different retention times. Spatio-temporal analysis of the results showed that TPs and pesticides followed similar trends, with a maximum number of potential detections in July. The highest detections were in the rivers Alzette and Mess and the lowest in the Sûre and Eisch. This study (a) added pesticides, classification information and related TPs into the open domain, (b) developed automated open source retrieval methods - both enhancing FAIRness (Findability, Accessibility, Interoperability and Reusability) of the data and methods; and (c) will directly support ""L'Administration de la Gestion de l'Eau"" on further monitoring steps in Luxembourg.",2021-09-21 +29161421,Europe PMC in 2017.,"Europe PMC (https://europepmc.org) is a comprehensive resource of biomedical research publications that offers advanced tools for search, retrieval, and interaction with the scientific literature. This article outlines new developments since 2014. In addition to delivering the core database and services, Europe PMC focuses on three areas of development: individual user services, data integration, and infrastructure to support text and data mining. Europe PMC now provides user accounts to save search queries and claim publications to ORCIDs, as well as open access profiles for authors based on public ORCID records. We continue to foster connections between scientific data and literature in a number of ways. All the data behind the paper - whether in structured archives, generic archives or as supplemental files - are now available via links to the BioStudies database. Text-mined biological concepts, including database accession numbers and data DOIs, are highlighted in the text and linked to the appropriate data resources. The SciLite community annotation platform accepts text-mining results from various contributors and overlays them on research articles as licence allows. In addition, text miners and developers can access all open content via APIs or via the FTP site.",2018-01-01 +33313828,PSORTdb 4.0: expanded and redesigned bacterial and archaeal protein subcellular localization database incorporating new secondary localizations.,"Protein subcellular localization (SCL) is important for understanding protein function, genome annotation, and aids identification of potential cell surface diagnostic markers, drug targets, or vaccine components. PSORTdb comprises ePSORTdb, a manually curated database of experimentally verified protein SCLs, and cPSORTdb, a pre-computed database of PSORTb-predicted SCLs for NCBI's RefSeq deduced bacterial and archaeal proteomes. We now report PSORTdb 4.0 (http://db.psort.org/). It features a website refresh, in particular a more user-friendly database search. It also addresses the need to uniquely identify proteins from NCBI genomes now that GI numbers have been retired. It further expands both ePSORTdb and cPSORTdb, including additional data about novel secondary localizations, such as proteins found in bacterial outer membrane vesicles. Protein predictions in cPSORTdb have increased along with the number of available microbial genomes, from approximately 13 million when PSORTdb 3.0 was released, to over 66 million currently. Now, analyses of both complete and draft genomes are included. This expanded database will be of wide use to researchers developing SCL predictors or studying diverse microbes, including medically, agriculturally and industrially important species that have both classic or atypical cell envelope structures or vesicles.",2021-01-01 +33237329,MobiDB: intrinsically disordered proteins in 2021.,"The MobiDB database (URL: https://mobidb.org/) provides predictions and annotations for intrinsically disordered proteins. Here, we report recent developments implemented in MobiDB version 4, regarding the database format, with novel types of annotations and an improved update process. The new website includes a re-designed user interface, a more effective search engine and advanced API for programmatic access. The new database schema gives more flexibility for the users, as well as simplifying the maintenance and updates. In addition, the new entry page provides more visualisation tools including customizable feature viewer and graphs of the residue contact maps. MobiDB v4 annotates the binding modes of disordered proteins, whether they undergo disorder-to-order transitions or remain disordered in the bound state. In addition, disordered regions undergoing liquid-liquid phase separation or post-translational modifications are defined. The integrated information is presented in a simplified interface, which enables faster searches and allows large customized datasets to be downloaded in TSV, Fasta or JSON formats. An alternative advanced interface allows users to drill deeper into features of interest. A new statistics page provides information at database and proteome levels. The new MobiDB version presents state-of-the-art knowledge on disordered proteins and improves data accessibility for both computational and experimental users.",2021-01-01 +34905997,Broad Anti-Cancer Activity Produced by Targeted Nutrients Deprivation (TND) of Multiple Non-Essential Amino Acids.,"It has been known for close to 100 years that the metabolism of cancer cells is altered and different than that of healthy cells in the body. On that basis, we have developed an entirely novel approach to managing cancer, termed Targeted Nutrients Deprivation (TND). TND employs a formulated diet depleted of multiple non-essential amino acids (NEAAs) that are required by tumor cells but not by normal cells. Cancer cells specifically require those NEAAs due to their heightened and rewired metabolism. We demonstrated that our first proprietary formulated TND diet-FTN203-significantly reduced the growth of multiple human tumor xenografts in mouse. In combination with chemotherapy and immunotherapy, FTN203 further enhanced therapeutic efficacy. Reliance on FTN203 as the sole nutrition source was shown to be safe without causing detrimental body-weight loss or internal organ damage. Our findings indicate that TND is a novel and safe approach to managing cancer.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.2013904 .",2021-12-15 +34608449,Hybrid In Silico Approach Reveals Novel Inhibitors of Multiple SARS-CoV-2 Variants.,"The National Center for Advancing Translational Sciences (NCATS) has been actively generating SARS-CoV-2 high-throughput screening data and disseminates it through the OpenData Portal (https://opendata.ncats.nih.gov/covid19/). Here, we provide a hybrid approach that utilizes NCATS screening data from the SARS-CoV-2 cytopathic effect reduction assay to build predictive models, using both machine learning and pharmacophore-based modeling. Optimized models were used to perform two iterative rounds of virtual screening to predict small molecules active against SARS-CoV-2. Experimental testing with live virus provided 100 (∼16% of predicted hits) active compounds (efficacy > 30%, IC50 ≤ 15 μM). Systematic clustering analysis of active compounds revealed three promising chemotypes which have not been previously identified as inhibitors of SARS-CoV-2 infection. Further investigation resulted in the identification of allosteric binders to host receptor angiotensin-converting enzyme 2; these compounds were then shown to inhibit the entry of pseudoparticles bearing spike protein of wild-type SARS-CoV-2, as well as South African B.1.351 and UK B.1.1.7 variants.",2021-09-17 +34309741,The Negative Religiousness-IQ Nexus is a Jensen Effect on Individual-Level Data: A Refutation of Dutton et al.'s 'The Myth of the Stupid Believer'.,"A recent study by Dutton et al. (J Relig Health 59:1567-1579. https://doi.org/10.1007/s10943-019-00926-3 , 2020) found that the religiousness-IQ nexus is not on g when comparing different groups with various degrees of religiosity and the non-religious. It suggested, accordingly, that the nexus related to the relationship between specialized analytic abilities on the IQ test and autism traits, with the latter predicting atheism. The study was limited by the fact that it was on group-level data, it used only one measure of religiosity that measure may have been confounded by the social element to church membership and it involved relatively few items via which a Jensen effect could be calculated. Here, we test whether the religiousness-IQ nexus is on g with individual-level data using archival data from the Vietnam Experience Study, in which 4462 US veterans were subjected to detailed psychological tests. We used multiple measures of religiosity-which we factor-analysed to a religion-factor-and a large number of items. We found, contrary to the findings of Dutton et al. (2020), that the IQ differences with regard to whether or not subjects believed in God are indeed a Jensen effect. We also uncovered a number of anomalies, which we explore.",2021-07-26 +34169131,CTD data over a repeated section in the Vema Channel.,"We present a CTD dataset of repeated sections across the Vema Channel in the South Atlantic approximately along 31°12' S between longitudes 39°18.0 W and 39°30.0' W. The Vema Channel is a narrow conduit for Antarctic Bottom Water across the Rio Grande Rise. The measurements at CTD stations (Conductivity-Temperature-Depth) across the Vema Channel were started by German scientists in 1991. In 2002, Russian scientists took part in this work and have been establishing stations across this standard section until recently. The data were collected using the Sea-Bird Electronics SBE-19 profiler. The data are presented in tabular format. The data are available at http://dx.doi.org/10.17632/hh4hhn6ny8.1.",2021-06-09 +32725804,Empirical Bayes small area prediction under a zero-inflated lognormal model with correlated random area effects. ,"Many variables of interest in agricultural or economical surveys have skewed distributions and can equal zero. Our data are measures of sheet and rill erosion called Revised Universal Soil Loss Equation - 2 (RUSLE2). Small area estimates of mean RUSLE2 erosion are of interest. We use a zero-inflated lognormal mixed effects model for small area estimation. The model combines a unit-level lognormal model for the positive RUSLE2 responses with a unit-level logistic mixed effects model for the binary indicator that the response is nonzero. In the Conservation Effects Assessment Project (CEAP) data, counties with a higher probability of nonzero responses also tend to have a higher mean among the positive RUSLE2 values. We capture this property of the data through an assumption that the pair of random effects for a county are correlated. We develop empirical Bayes (EB) small area predictors and a bootstrap estimator of the mean squared error (MSE). In simulations, the proposed predictor is superior to simpler alternatives. We then apply the method to construct EB predictors of mean RUSLE2 erosion for South Dakota counties. To obtain auxiliary variables for the population of cropland in South Dakota, we integrate a satellite-derived land cover map with a geographic database of soil properties. We provide an R Shiny application called viscover (available at https://lyux.shinyapps.io/viscover/) to visualize the overlay operations required to construct the covariates. On the basis of bootstrap estimates of the mean square error, we conclude that the EB predictors of mean RUSLE2 erosion are superior to direct estimators.",2020-07-28 +31992626,Microbial Life Deep Underfoot. ,"Soil is one of the most diverse microbial habitats on Earth. While the distribution and abundance of microbial taxa in surface soils have been well described, the phylogenetic and functional diversity of bacteria and archaea in deep-soil strata remains unexplored. Brewer et al. (mBio 10:e01318-19, 2019, https://doi.org/10.1128/mBio.01318-19) documented consistent shifts in the composition and genomic attributes of microbial communities as a function of depth in 20 soil pits that spanned a range of ecosystems across North America. The unique microorganisms found in deep soils appear to be adapted to conditions of low energy based on the recovery of genes that code for traits such as internal resource storage, mixotrophy, and dormancy.",2020-01-28 +31114925,ImmuneRegulation: a web-based tool for identifying human immune regulatory elements.,"Humans vary considerably both in their baseline and activated immune phenotypes. We developed a user-friendly open-access web portal, ImmuneRegulation, that enables users to interactively explore immune regulatory elements that drive cell-type or cohort-specific gene expression levels. ImmuneRegulation currently provides the largest centrally integrated resource on human transcriptome regulation across whole blood and blood cell types, including (i) ∼43,000 genotyped individuals with associated gene expression data from ∼51,000 experiments, yielding genetic variant-gene expression associations on ∼220 million eQTLs; (ii) 14 million transcription factor (TF)-binding region hits extracted from 1945 ChIP-seq studies; and (iii) the latest GWAS catalog with 67,230 published variant-trait associations. Users can interactively explore associations between queried gene(s) and their regulators (cis-eQTLs, trans-eQTLs or TFs) across multiple cohorts and studies. These regulators may explain genotype-dependent gene expression variations and be critical in selecting the ideal cohorts or cell types for follow-up studies or in developing predictive models. Overall, ImmuneRegulation significantly lowers the barriers between complex immune regulation data and researchers who want rapid, intuitive and high-quality access to the effects of regulatory elements on gene expression in multiple studies to empower investigators in translating these rich data into biological insights and clinical applications, and is freely available at https://immuneregulation.mssm.edu.",2019-07-01 +31432427,Essential Features and Use Cases of the Cerebrospinal Fluid Proteome Resource (CSF-PR).,"Every year, a large number of published studies present biomarkers for various neurological disorders. Many of these studies are based on mass spectrometry proteomics data and describe comparison of the abundance of proteins in cerebrospinal fluid between two or more disease groups. As the number of such studies is growing, it is no longer straightforward to obtain an overview of which specific proteins are increased or decreased between the numerous relevant diseases and their many subcategories, or to see the larger picture or trends between related diseases. To alleviate this situation, we therefore mined the literature for mass spectrometry-based proteomics studies including quantitative protein data from cerebrospinal fluid of patients with multiple sclerosis, Alzheimer's disease, and Parkinson's disease and organized the extracted data in the Cerebrospinal Fluid Proteome Resource (CSF-PR). CSF-PR is freely available online at http://probe.uib.no/csf-pr , is highly interactive, and allows for easy navigation, visualization, and export of the published scientific data. This chapter will guide the user through some of the most important features of the tool and show examples of the suggested use cases.",2019-01-01 +34676796,"Relationship between Depression and Anxiety, Health Status and Lung Function in Patients with Alpha-1 Antitrypsin Deficiency.","Alpha-1 Antitrypsin deficiency (AATD) is a genetic condition that can lead to Chronic Obstructive Pulmonary Disease. The burden of psychological disease, its impact and contributing factors in patients with AATD are largely unknown. This study determined the prevalence of depression and anxiety in AATD and its clinical impact. All subjects with PiZZ/PiZnull (n = 635) and PiSZ (n = 111) genotypes within the AATD registry who had sufficient data to calculate pulmonary physiological and health status (HS) decline were grouped as those with or without a diagnosis of depression and/or anxiety. Univariate and multivariate analyses were performed on physiological, demographic and HS parameters. Depression and/or anxiety was present in 16.4% overall in both PiSZ and PiZZ/PiZnull cohorts and was associated with lower baseline pulmonary function and worse HS. In the multivariable analysis of the PiZZ/PiZnull cohort, a greater average decline in FEV1% predicted was observed in those with depression and/or anxiety than those without (-1.53 SD ± 2.26 per year, -0.99 ± 1.79, respectively; p = 0.03) but there was no difference in HS decline (p = 0.33). No differences were seen in the PiSZ cohort. Dyspnoea (mMRC score) was generally worse in those with depression and/or anxiety than those without. Comorbidity burden did not differ between those with or without depression and/or anxiety. Disease severity and progression may be contributing to the prevalence of psychological factors in PiZZ/PiZnull patients. Patients who are declining rapidly should be actively monitored for psychological co-morbidity and treated by cognitive or pharmacological means.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.1991904 .",2021-10-22 +34721973,Development of a novel embryonic germline gene-related prognostic model of lung adenocarcinoma.,"

Background

Emerging evidence implicates the correlation of embryonic germline genes with the tumor progress and patient's outcome. However, the prognostic value of these genes in lung adenocarcinoma (LUAD) has not been fully studied. Here we systematically evaluated this issue, and constructed a novel signature and a nomogram associated with embryonic germline genes for predicting the outcomes of lung adenocarcinoma.

Methods

The LUAD cohorts retrieved from The Cancer Genome Atlas (TCGA) and Gene Expression Omnibus (GEO) database were used as training set and testing set, respectively. The embryonic germline genes were downloaded from the website https://venn.lodder.dev. Then, the differentially expressed embryonic germline genes (DEGGs) between the tumor and normal samples were identified by limma package. The functional enrichment and pathway analyses were also performed by clusterProfiler package. The prognostic model was constructed by the least absolute shrinkage and selection operator (LASSO)-Cox regression method. Survival and Receiver Operating Characteristic (ROC) analyses were performed to validate the model using training set and four testing GEO datasets. Finally, a prognostic nomogram based on the signature genes was constructed using multivariate regression method.

Results

Among the identified 269 DEGGs, 249 were up-regulated and 20 were down-regulated. GO and KEGG analyses revealed that these DEGGs were mainly enriched in the process of cell proliferation and DNA damage repair. Then, 103 DEGGs with prognostic value were identified by univariate Cox regression and further filtered by LASSO method. The resulting sixteen DEGGs were included in step multivariate Cox regression and an eleven embryonic germline gene related signature (EGRS) was constructed. The model could robustly stratify the LUAD patients into high-risk and low-risk groups in both training and testing sets, and low-risk patients had much better outcomes. The multi-ROC analysis also showed that the EGRS model had the best predictive efficacy compared with other common clinicopathological factors. The EGRS model also showed robust predictive ability in four independent external datasets, and the area under curve (AUC) was 0.726 (GSE30219), 0.764 (GSE50081), 0.657 (GSE37745) and 0.668 (GSE72094). More importantly, the expression level of some genes in EGRS has a significant correlation with the progression of LUAD clinicopathology, suggesting these genes might play an important role in the progression of LUAD. Finally, based on EGRS genes, we built and calibrated a nomogram for conveniently evaluating patients' outcomes.",2021-10-21 +34009252,MassExplorer: a computational tool for analyzing desorption electrospray ionization mass spectrometry data ,"In the last few years, desorption electrospray ionization mass spectrometry imaging (DESI-MSI) has been increasingly used for simultaneous detection of thousands of metabolites and lipids from human tissues and biofluids. To successfully find the most significant differences between two sets of DESI-MSI data (e.g., healthy vs disease) requires the application of accurate computational and statistical methods that can pre-process the data under various normalization settings and help identify these changes among thousands of detected metabolites. Here, we report MassExplorer, a novel computational tool, to help pre-process DESI-MSI data, visualize raw data, build predictive models using the statistical lasso approach to select for a sparse set of significant molecular changes, and interpret selected metabolites. This tool, which is available for both online and offline use, is flexible for both chemists and biologists and statisticians as it helps in visualizing structure of DESI-MSI data and in analyzing the statistically significant metabolites that are differentially expressed across both sample types. Based on the modules in MassExplorer, we expect it to be immediately useful for various biological and chemical applications in mass spectrometry. MassExplorer is available as an online R-Shiny application or Mac OS X compatible standalone application. The application, sample performance, source code and corresponding guide can be found at: https://zarelab.com/research/massexplorer-a-tool-to-help-guide-analysis-of-mass-spectrometry-samples/. Supplementary data are available at Bioinformatics online.",2021-05-19 +34721349,Strategy and Performance Evaluation of Low-Frequency Variant Calling for SARS-CoV-2 Using Targeted Deep Illumina Sequencing.,"The ongoing COVID-19 pandemic, caused by SARS-CoV-2, constitutes a tremendous global health issue. Continuous monitoring of the virus has become a cornerstone to make rational decisions on implementing societal and sanitary measures to curtail the virus spread. Additionally, emerging SARS-CoV-2 variants have increased the need for genomic surveillance to detect particular strains because of their potentially increased transmissibility, pathogenicity and immune escape. Targeted SARS-CoV-2 sequencing of diagnostic and wastewater samples has been explored as an epidemiological surveillance method for the competent authorities. Currently, only the consensus genome sequence of the most abundant strain is taken into consideration for analysis, but multiple variant strains are now circulating in the population. Consequently, in diagnostic samples, potential co-infection(s) by several different variants can occur or quasispecies can develop during an infection in an individual. In wastewater samples, multiple variant strains will often be simultaneously present. Currently, quality criteria are mainly available for constructing the consensus genome sequence, and some guidelines exist for the detection of co-infections and quasispecies in diagnostic samples. The performance of detection and quantification of low-frequency variants using whole genome sequencing (WGS) of SARS-CoV-2 remains largely unknown. Here, we evaluated the detection and quantification of mutations present at low abundances using the mutations defining the SARS-CoV-2 lineage B.1.1.7 (alpha variant) as a case study. Real sequencing data were in silico modified by introducing mutations of interest into raw wild-type sequencing data, or by mixing wild-type and mutant raw sequencing data, to construct mixed samples subjected to WGS using a tiling amplicon-based targeted metagenomics approach and Illumina sequencing. As anticipated, higher variation and lower sensitivity were observed at lower coverages and allelic frequencies. We found that detection of all low-frequency variants at an abundance of 10, 5, 3, and 1%, requires at least a sequencing coverage of 250, 500, 1500, and 10,000×, respectively. Although increasing variability of estimated allelic frequencies at decreasing coverages and lower allelic frequencies was observed, its impact on reliable quantification was limited. This study provides a highly sensitive low-frequency variant detection approach, which is publicly available at https://galaxy.sciensano.be, and specific recommendations for minimum sequencing coverages to detect clade-defining mutations at certain allelic frequencies. This approach will be useful to detect and quantify low-frequency variants in both diagnostic (e.g., co-infections and quasispecies) and wastewater [e.g., multiple variants of concern (VOCs)] samples.",2021-10-13 +34468307,Remote homology clustering identifies lowly conserved families of effector proteins in plant-pathogenic fungi. ,"Plant diseases caused by fungal pathogens are typically initiated by molecular interactions between 'effector' molecules released by a pathogen and receptor molecules on or within the plant host cell. In many cases these effector-receptor interactions directly determine host resistance or susceptibility. The search for fungal effector proteins is a developing area in fungal-plant pathology, with more than 165 distinct confirmed fungal effector proteins in the public domain. For a small number of these, novel effectors can be rapidly discovered across multiple fungal species through the identification of known effector homologues. However, many have no detectable homology by standard sequence-based search methods. This study employs a novel comparison method (RemEff) that is capable of identifying protein families with greater sensitivity than traditional homology-inference methods, leveraging a growing pool of confirmed fungal effector data to enable the prediction of novel fungal effector candidates by protein family association. Resources relating to the RemEff method and data used in this study are available from https://figshare.com/projects/Effector_protein_remote_homology/87965.",2021-09-01 +35083039,KinderMiner Web: a simple web tool for ranking pairwise associations in biomedical applications. ,"Many important scientific discoveries require lengthy experimental processes of trial and error and could benefit from intelligent prioritization based on deep domain understanding. While exponential growth in the scientific literature makes it difficult to keep current in even a single domain, that same rapid growth in literature also presents an opportunity for automated extraction of knowledge via text mining. We have developed a web application implementation of the KinderMiner algorithm for proposing ranked associations between a list of target terms and a key phrase. Any key phrase and target term list can be used for biomedical inquiry. We built the web application around a text index derived from PubMed. It is the first publicly available implementation of the algorithm, is fast and easy to use, and includes an interactive analysis tool. The KinderMiner web application is a public resource offering scientists a cohesive summary of what is currently known about a particular topic within the literature, and helping them to prioritize experiments around that topic. It performs comparably or better to similar state-of-the-art text mining tools, is more flexible, and can be applied to any biomedical topic of interest. It is also continually improving with quarterly updates to the underlying text index and through response to suggestions from the community. The web application is available at https://www.kinderminer.org.",2020-07-30 +34152132,SBOLCanvas: A Visual Editor for Genetic Designs.,"SBOLCanvas is a web-based application that can create and edit genetic constructs using the SBOL data and visual standards. SBOLCanvas allows a user to create a genetic design visually and structurally from start to finish. It also allows users to incorporate existing SBOL data from a SynBioHub repository. By the nature of being a web-based application, SBOLCanvas is readily accessible and easy to use. A live version of the latest release can be found at https://sbolcanvas.org.",2021-06-21 +34544142,Eagle for better genome-wide association mapping. ,"Eagle is an R package for multi-locus association mapping on a genome-wide scale. It is unlike other multi-locus packages in that it is easy to use for R users and non-users alike. It has two modes of use, command line and graphical user interface. Eagle is fully documented and has its own supporting website, http://eagle.r-forge.r-project.org/index.html. Eagle is a significant improvement over the method-of-choice, single-locus association mapping. It has greater power to detect SNP-trait associations. It is based on model selection, linear mixed models, and a clever idea on how random effects can be used to identify SNP-trait associations. Through an example with real mouse data, we demonstrate Eagle's ability to bring clarity and increased insight to single-locus findings. Initially, we see Eagle complementing single-locus analyses. However, over time, we hope the community will make, increasingly, multi-locus association mapping their method-of-choice for the analysis of genome-wide association study data.",2021-09-01 +33238003,Siberian sturgeon multi-tissue reference transcriptome database. ,"Siberian sturgeon is a long lived and late maturing fish farmed for caviar production in 50 countries. Functional genomics enable to find genes of interest for fish farming. In the absence of a reference genome, a reference transcriptome is very useful for sequencing based functional studies. We present here a high-quality transcriptome assembly database built using RNA-seq reads coming from brain, pituitary, gonadal, liver, stomach, kidney, anterior kidney, heart, embryonic and pre-larval tissues. It will facilitate crucial research on topics such as puberty, reproduction, growth, food intake and immunology. This database represents a major contribution to the publicly available sturgeon transcriptome reference datasets. The database is publicly available at http://siberiansturgeontissuedb.sigenae.org Supplementary information:  Supplementary data are available at Database online.",2020-11-01 +32848764,GEO Data Sets Analysis Identifies COX-2 and Its Related Micro RNAs as Biomarkers for Non-Ischemic Heart Failure.,"Heart failure (HF) is a heterogeneous clinical syndrome with a variety of causes, risk factors, and pathology. Clinically, only brain natriuretic peptide (BNP) or its precursor N-terminus proBNP (NTproBNP) has been validated for HF diagnosis, but they are also affected by other conditions, such as female gender, renal disease, and acute coronary syndromes, and false low levels in the setting of obesity or flash pulmonary edema. In addition, there is no one biomarker which could encompass all heart failure phenotypes. Advances in bioinformatics have provided us with large databases that characterize the complex genetic and epigenetic changes associated with human diseases. The use of data mining strategies on public access databases to identify previously unknown disease markers is an innovative approach to identify potential biomarkers or even new therapeutic targets in complex diseases such as heart failure (HF). In this study, we analyzed the genomic and transcription data of HF peripheral blood mononuclear cell (PBMC) samples obtained from the Gene Expression Omnibus data sets using Omicsbean online database (http://www.omicsbean.cn/) and found that the prostaglandin-endoperoxide synthase 2 (PTGS2), also named as cyclooxygenase-2 (COX-2), as well as its related micro RNAs including miR-1297 and miR-4649-3p might be used as potential biomarkers for non-ischemic heart failure. Our result showed that plasma COX-2 and miR-4649-3p were significantly up-regulated, whereas the plasma miR-1297 was significantly decreased, and miR-4649-3p displayed high predictive power for non-ischemic heart failure.",2020-08-05 +30266410,HCCDB: A Database of Hepatocellular Carcinoma Expression Atlas.,"Hepatocellular carcinoma (HCC) is highly heterogeneous in nature and has been one of the most common cancer types worldwide. To ensure repeatability of identified gene expression patterns and comprehensively annotate the transcriptomes of HCC, we carefully curated 15 public HCC expression datasets that cover around 4000 clinical samples and developed the database HCCDB to serve as a one-stop online resource for exploring HCC gene expression with user-friendly interfaces. The global differential gene expression landscape of HCC was established by analyzing the consistently differentially expressed genes across multiple datasets. Moreover, a 4D metric was proposed to fully characterize the expression pattern of each gene by integrating data from The Cancer Genome Atlas (TCGA) and Genotype-Tissue Expression (GTEx). To facilitate a comprehensive understanding of gene expression patterns in HCC, HCCDB also provides links to third-party databases on drug, proteomics, and literatures, and graphically displays the results from computational analyses, including differential expression analysis, tissue-specific and tumor-specific expression analysis, survival analysis, and co-expression analysis. HCCDB is freely accessible at http://lifeome.net/database/hccdb.",2018-08-01 +32977347,"[Registry Research Funding of the German Society of Plastic, Reconstructive and Aesthetic Surgeons (DGPRÄC) and Research Funding Report 2019/2020].","

Background

 Since 2015/16 the DGPRÄC collects, evaluates and publishes the research activities of academic sections, departments and clinics for plastic surgery at university hospitals in Germany, in order to raise the awareness of plastic surgical research performance.

Materials and methods

 The directors of plastic surgical academic institutions were contacted via the DGPRÄC and asked to report any requested/approved and rejected research applications to public, non-public and industrial funding organizations. Data was collected in our previously established online database: https://docs.google.com/forms/d/e/1FAIpQLSe6F5xmTyw-k7VKJx_2jkPA4LBXsA0sgBGMrC3rx_4bHj6uzQ/viewform?usp=sf_link. In addition, applications were identified via the DFG's public database GEPRIS.

Results

 A total of 41 funding applications to the public funding institutes DFG, BMBF, BMWi, BMG and EU were identified. 75.6 % (31/41) of the applications had already been approved at the time of data collection, of which 77.4 % (24/31) were DFG, 9.7 % (3/31) were BMWi, 6.5 % (2/31) were EU and 3.2 % (1/31) were BMBF or BMG applications. The average funding amounted to 358 301 Euro. In 50.0 % (12/24) of the cases, the approved DFG proposals were assigned to the subject review board 205-27 Orthopedics, Trauma Surgery, Reconstructive Surgery.

Conclusion

 The continuous publication of plastic surgical research funding reports submitted by the convention of university plastic surgeons of the DGPRÄC portraits the excellent, collaborative research activity in the field of plastic surgery.",2020-09-25 +34326396,"AnnoMiner is a new web-tool to integrate epigenetics, transcription factor occupancy and transcriptomics data to predict transcriptional regulators.","Gene expression regulation requires precise transcriptional programs, led by transcription factors in combination with epigenetic events. Recent advances in epigenomic and transcriptomic techniques provided insight into different gene regulation mechanisms. However, to date it remains challenging to understand how combinations of transcription factors together with epigenetic events control cell-type specific gene expression. We have developed the AnnoMiner web-server, an innovative and flexible tool to annotate and integrate epigenetic, and transcription factor occupancy data. First, AnnoMiner annotates user-provided peaks with gene features. Second, AnnoMiner can integrate genome binding data from two different transcriptional regulators together with gene features. Third, AnnoMiner offers to explore the transcriptional deregulation of genes nearby, or within a specified genomic region surrounding a user-provided peak. AnnoMiner's fourth function performs transcription factor or histone modification enrichment analysis for user-provided gene lists by utilizing hundreds of public, high-quality datasets from ENCODE for the model organisms human, mouse, Drosophila and C. elegans. Thus, AnnoMiner can predict transcriptional regulators for a studied process without the strict need for chromatin data from the same process. We compared AnnoMiner to existing tools and experimentally validated several transcriptional regulators predicted by AnnoMiner to indeed contribute to muscle morphogenesis in Drosophila. AnnoMiner is freely available at http://chimborazo.ibdm.univ-mrs.fr/AnnoMiner/ .",2021-07-29 +30357393,GENCODE reference annotation for the human and mouse genomes.,"The accurate identification and description of the genes in the human and mouse genomes is a fundamental requirement for high quality analysis of data informing both genome biology and clinical genomics. Over the last 15 years, the GENCODE consortium has been producing reference quality gene annotations to provide this foundational resource. The GENCODE consortium includes both experimental and computational biology groups who work together to improve and extend the GENCODE gene annotation. Specifically, we generate primary data, create bioinformatics tools and provide analysis to support the work of expert manual gene annotators and automated gene annotation pipelines. In addition, manual and computational annotation workflows use any and all publicly available data and analysis, along with the research literature to identify and characterise gene loci to the highest standard. GENCODE gene annotations are accessible via the Ensembl and UCSC Genome Browsers, the Ensembl FTP site, Ensembl Biomart, Ensembl Perl and REST APIs as well as https://www.gencodegenes.org.",2019-01-01 +33939828,PE-Designer and PE-Analyzer: web-based design and analysis tools for CRISPR prime editing.,"Prime editing technology is capable of generating targeted insertions, deletions, and base conversions. However, the process of designing prime editing guide RNAs (pegRNAs), which contain a primer binding site and a reverse-transcription template at the 3' end, is more complex than that for the single guide RNAs used with CRISPR nucleases or base editors. Furthermore, the assessment of high-throughput sequencing data after prime editors (PEs) have been employed should consider the unique feature of PEs; thus, pre-existing assessment tools cannot directly be adopted for PEs. Here, we present two user-friendly web-based tools for PEs, named PE-Designer and PE-Analyzer. PE-Designer, a dedicated tool for pegRNA selection, provides all possible target sequences, pegRNA extension sequences, and nicking guide RNA sequences together with useful information, and displays the results in an interactive image. PE-Analyzer, a dedicated tool for PE outcome analysis, accepts high-throughput sequencing data, summarizes mutation-related information in a table, and provides interactive graphs. PE-Analyzer was mainly written using JavaScript so that it can analyze several data sets without requiring that huge sequencing data (>100MB) be uploaded to the server, reducing analysis time and increasing personal security. PE-Designer and PE-Analyzer are freely available at http://www.rgenome.net/pe-designer/ and http://www.rgenome.net/pe-analyzer/ without a login process.",2021-07-01 +33784300,Orchestrating privacy-protected big data analyses of data from different resources with R and DataSHIELD.,"Combined analysis of multiple, large datasets is a common objective in the health- and biosciences. Existing methods tend to require researchers to physically bring data together in one place or follow an analysis plan and share results. Developed over the last 10 years, the DataSHIELD platform is a collection of R packages that reduce the challenges of these methods. These include ethico-legal constraints which limit researchers' ability to physically bring data together and the analytical inflexibility associated with conventional approaches to sharing results. The key feature of DataSHIELD is that data from research studies stay on a server at each of the institutions that are responsible for the data. Each institution has control over who can access their data. The platform allows an analyst to pass commands to each server and the analyst receives results that do not disclose the individual-level data of any study participants. DataSHIELD uses Opal which is a data integration system used by epidemiological studies and developed by the OBiBa open source project in the domain of bioinformatics. However, until now the analysis of big data with DataSHIELD has been limited by the storage formats available in Opal and the analysis capabilities available in the DataSHIELD R packages. We present a new architecture (""resources"") for DataSHIELD and Opal to allow large, complex datasets to be used at their original location, in their original format and with external computing facilities. We provide some real big data analysis examples in genomics and geospatial projects. For genomic data analyses, we also illustrate how to extend the resources concept to address specific big data infrastructures such as GA4GH or EGA, and make use of shell commands. Our new infrastructure will help researchers to perform data analyses in a privacy-protected way from existing data sharing initiatives or projects. To help researchers use this framework, we describe selected packages and present an online book (https://isglobal-brge.github.io/resource_bookdown).",2021-03-30 +34019663,MetaboAnalyst 5.0: narrowing the gap between raw spectra and functional insights.,"Since its first release over a decade ago, the MetaboAnalyst web-based platform has become widely used for comprehensive metabolomics data analysis and interpretation. Here we introduce MetaboAnalyst version 5.0, aiming to narrow the gap from raw data to functional insights for global metabolomics based on high-resolution mass spectrometry (HRMS). Three modules have been developed to help achieve this goal, including: (i) a LC-MS Spectra Processing module which offers an easy-to-use pipeline that can perform automated parameter optimization and resumable analysis to significantly lower the barriers to LC-MS1 spectra processing; (ii) a Functional Analysis module which expands the previous MS Peaks to Pathways module to allow users to intuitively select any peak groups of interest and evaluate their enrichment of potential functions as defined by metabolic pathways and metabolite sets; (iii) a Functional Meta-Analysis module to combine multiple global metabolomics datasets obtained under complementary conditions or from similar studies to arrive at comprehensive functional insights. There are many other new functions including weighted joint-pathway analysis, data-driven network analysis, batch effect correction, merging technical replicates, improved compound name matching, etc. The web interface, graphics and underlying codebase have also been refactored to improve performance and user experience. At the end of an analysis session, users can now easily switch to other compatible modules for a more streamlined data analysis. MetaboAnalyst 5.0 is freely available at https://www.metaboanalyst.ca.",2021-07-01 +34621514,rmcorrShiny: A web and standalone application for repeated measures correlation.,"We describe a web and standalone Shiny app for calculating the common, linear within-individual association for repeated assessments of paired measures with multiple individuals: repeated measures correlation (rmcorr). This tool makes rmcorr more widely accessible, providing a graphical interface for performing and visualizing the output of analysis with rmcorr. In contrast to rmcorr, most widely used correlation techniques assume paired data are independent. Incorrectly analyzing repeated measures data as independent will likely produce misleading results. Using aggregation or separate models to address the issue of independence may obscure meaningful patterns and will also tend to reduce statistical power. rmcorrShiny (repeated measures correlation Shiny) provides a simple and accessible solution for computing the repeated measures correlation. It is available at: https://lmarusich.shinyapps.io/shiny_rmcorr/.",2021-07-30 +34663069,TopProperty: Robust Metaprediction of Transmembrane and Globular Protein Features Using Deep Neural Networks.,"Transmembrane proteins (TMPs) are critical components of cellular life. However, due to experimental challenges, the number of experimentally resolved TMP structures is severely underrepresented in databases compared to their cellular abundance. Prediction of (per-residue) features such as transmembrane topology, membrane exposure, secondary structure, and solvent accessibility can be a useful starting point for experimental design or protein structure prediction but often requires different computational tools for different features or types of proteins. We present TopProperty, a metapredictor that predicts all of these features for TMPs or globular proteins. TopProperty is trained on datasets without bias toward a high number of sequence homologs, and the predictions are significantly better than the evaluated state-of-the-art primary predictors on all quality metrics. TopProperty eliminates the need for protein type- or feature-tailored tools, specifically for TMPs. TopProperty is freely available as a web server and standalone at https://cpclab.uni-duesseldorf.de/topsuite/.",2021-10-18 +34733246,fIDBAC: A Platform for Fast Bacterial Genome Identification and Typing.,"To study the contamination of microorganisms in the food industry, pharmaceutical industry, clinical diagnosis, or bacterial taxonomy, accurate identification of species is a key starting point of further investigation. The conventional method of identification by the 16S rDNA gene or other marker gene comparison is not accurate, because it uses a tiny part of the genomic information. The average nucleotide identity calculated between two whole bacterial genomes was proven to be consistent with DNA-DNA hybridization and adopted as the gold standard of bacterial species delineation. Furthermore, there are more bacterial genomes available in public databases recently. All of those contribute to a genome era of bacterial species identification. However, wrongly labeled and low-quality bacterial genome assemblies, especially from type strains, greatly affect accurate identification. In this study, we employed a multi-step strategy to create a type-strain genome database, by removing the wrongly labeled and low-quality genome assemblies. Based on the curated database, a fast bacterial genome identification platform (fIDBAC) was developed (http://fbac.dmicrobe.cn/). The fIDBAC is aimed to provide a single, coherent, and automated workflow for species identification, strain typing, and downstream analysis, such as CDS prediction, drug resistance genes, virulence gene annotation, and phylogenetic analysis.",2021-10-18 +34663651,Trends in gender of authors of original research in oncology among major medical journals: a retrospective bibliometric study.,"

Objective

We evaluated the temporal trend in gender ratios of first and last authors in the field of oncological research published in major general medical and oncology journals and examined the gender pattern in coauthorship.

Design

We conducted a retrospective study in PubMed using the R package RISmed. We retrieved original research articles published in four general medical journals and six oncology specialty journals. These journals were selected based on their impact factors and popularity among oncologists. We identified the names of first and last authors from 1 January 2002 to 31 December 2019. The gender of the authors was identified and validated using the Gender API database (https://gender-api.com/).

Primary and secondary outcome measures

The percentages of first and last authors by gender and the gender ratios (male to female) and temporal trends in gender ratios of first and last authors were determined.

Results

We identified 34 624 research articles, in which 32 452 had the gender of both first and last authors identified. Among these 11 650 (33.6%) had women as the first author and 7908 (22.8%) as the last author, respectively. The proportion of female first and last authors increased from 26.6% and 16.2% in 2002, to 32.9% and 27.5% in 2019, respectively. However, the gender ratio (male to female) of first and last authors decreased by 1.5% and 2.6% per year, respectively, which were statistically significant (first author: incidence rate ratio (IRR) 0.98, 95% CI 0.97 to 1.00; last author: IRR 0.97, 95% CI 0.96 to 0.99). Male first and last authorship was the most common combination. Male-female and female-female pairs increased by 2.0% and 5.0%, respectively (IRR 1.02, 95% CI 1.01 to 1.03 and IRR 1.05, 95% CI 1.04 to 1.06, respectively).

Conclusions

The continued under-representation of women means that more efforts to address parity for advancement of women in academic oncology are needed.",2021-10-18 +34663470,"Classifying natural products from plants, fungi or bacteria using the COCONUT database and machine learning.","Natural products (NPs) represent one of the most important resources for discovering new drugs. Here we asked whether NP origin can be assigned from their molecular structure in a subset of 60,171 NPs in the recently reported Collection of Open Natural Products (COCONUT) database assigned to plants, fungi, or bacteria. Visualizing this subset in an interactive tree-map (TMAP) calculated using MAP4 (MinHashed atom pair fingerprint) clustered NPs according to their assigned origin ( https://tm.gdb.tools/map4/coconut_tmap/ ), and a support vector machine (SVM) trained with MAP4 correctly assigned the origin for 94% of plant, 89% of fungal, and 89% of bacterial NPs in this subset. An online tool based on an SVM trained with the entire subset correctly assigned the origin of further NPs with similar performance ( https://np-svm-map4.gdb.tools/ ). Origin information might be useful when searching for biosynthetic genes of NPs isolated from plants but produced by endophytic microorganisms.",2021-10-18 +33137204,KinaseMD: kinase mutations and drug response database.,"Mutations in kinases are abundant and critical to study signaling pathways and regulatory roles in human disease, especially in cancer. Somatic mutations in kinase genes can affect drug treatment, both sensitivity and resistance, to clinically used kinase inhibitors. Here, we present a newly constructed database, KinaseMD (kinase mutations and drug response), to structurally and functionally annotate kinase mutations. KinaseMD integrates 679 374 somatic mutations, 251 522 network-rewiring events, and 390 460 drug response records curated from various sources for 547 kinases. We uniquely annotate the mutations and kinase inhibitor response in four types of protein substructures (gatekeeper, A-loop, G-loop and αC-helix) that are linked to kinase inhibitor resistance in literature. In addition, we annotate functional mutations that may rewire kinase regulatory network and report four phosphorylation signals (gain, loss, up-regulation and down-regulation). Overall, KinaseMD provides the most updated information on mutations, unique annotations of drug response especially drug resistance and functional sites of kinases. KinaseMD is accessible at https://bioinfo.uth.edu/kmd/, having functions for searching, browsing and downloading data. To our knowledge, there has been no systematic annotation of these structural mutations linking to kinase inhibitor response. In summary, KinaseMD is a centralized database for kinase mutations and drug response.",2021-01-01 +32941621,dbCAN-PUL: a database of experimentally characterized CAZyme gene clusters and their substrates.,"PULs (polysaccharide utilization loci) are discrete gene clusters of CAZymes (Carbohydrate Active EnZymes) and other genes that work together to digest and utilize carbohydrate substrates. While PULs have been extensively characterized in Bacteroidetes, there exist PULs from other bacterial phyla, as well as archaea and metagenomes, that remain to be catalogued in a database for efficient retrieval. We have developed an online database dbCAN-PUL (http://bcb.unl.edu/dbCAN_PUL/) to display experimentally verified CAZyme-containing PULs from literature with pertinent metadata, sequences, and annotation. Compared to other online CAZyme and PUL resources, dbCAN-PUL has the following new features: (i) Batch download of PUL data by target substrate, species/genome, genus, or experimental characterization method; (ii) Annotation for each PUL that displays associated metadata such as substrate(s), experimental characterization method(s) and protein sequence information, (iii) Links to external annotation pages for CAZymes (CAZy), transporters (UniProt) and other genes, (iv) Display of homologous gene clusters in GenBank sequences via integrated MultiGeneBlast tool and (v) An integrated BLASTX service available for users to query their sequences against PUL proteins in dbCAN-PUL. With these features, dbCAN-PUL will be an important repository for CAZyme and PUL research, complementing our other web servers and databases (dbCAN2, dbCAN-seq).",2021-01-01 +33179754,TISCH: a comprehensive web resource enabling interactive single-cell transcriptome visualization of tumor microenvironment.,"Cancer immunotherapy targeting co-inhibitory pathways by checkpoint blockade shows remarkable efficacy in a variety of cancer types. However, only a minority of patients respond to treatment due to the stochastic heterogeneity of tumor microenvironment (TME). Recent advances in single-cell RNA-seq technologies enabled comprehensive characterization of the immune system heterogeneity in tumors but posed computational challenges on integrating and utilizing the massive published datasets to inform immunotherapy. Here, we present Tumor Immune Single Cell Hub (TISCH, http://tisch.comp-genomics.org), a large-scale curated database that integrates single-cell transcriptomic profiles of nearly 2 million cells from 76 high-quality tumor datasets across 27 cancer types. All the data were uniformly processed with a standardized workflow, including quality control, batch effect removal, clustering, cell-type annotation, malignant cell classification, differential expression analysis and functional enrichment analysis. TISCH provides interactive gene expression visualization across multiple datasets at the single-cell level or cluster level, allowing systematic comparison between different cell-types, patients, tissue origins, treatment and response groups, and even different cancer-types. In summary, TISCH provides a user-friendly interface for systematically visualizing, searching and downloading gene expression atlas in the TME from multiple cancer types, enabling fast, flexible and comprehensive exploration of the TME.",2021-01-01 +33137185,iCSDB: an integrated database of CRISPR screens.,"High-throughput screening based on CRISPR-Cas9 libraries has become an attractive and powerful technique to identify target genes for functional studies. However, accessibility of public data is limited due to the lack of user-friendly utilities and up-to-date resources covering experiments from third parties. Here, we describe iCSDB, an integrated database of CRISPR screening experiments using human cell lines. We compiled two major sources of CRISPR-Cas9 screening: the DepMap portal and BioGRID ORCS. DepMap portal itself is an integrated database that includes three large-scale projects of CRISPR screening. We additionally aggregated CRISPR screens from BioGRID ORCS that is a collection of screening results from PubMed articles. Currently, iCSDB contains 1375 genome-wide screens across 976 human cell lines, covering 28 tissues and 70 cancer types. Importantly, the batch effects from different CRISPR libraries were removed and the screening scores were converted into a single metric to estimate the knockout efficiency. Clinical and molecular information were also integrated to help users to select cell lines of interest readily. Furthermore, we have implemented various interactive tools and viewers to facilitate users to choose, examine and compare the screen results both at the gene and guide RNA levels. iCSDB is available at https://www.kobic.re.kr/icsdb/.",2021-01-01 +33035337,DockCoV2: a drug database against SARS-CoV-2.,"The current state of the COVID-19 pandemic is a global health crisis. To fight the novel coronavirus, one of the best-known ways is to block enzymes essential for virus replication. Currently, we know that the SARS-CoV-2 virus encodes about 29 proteins such as spike protein, 3C-like protease (3CLpro), RNA-dependent RNA polymerase (RdRp), Papain-like protease (PLpro), and nucleocapsid (N) protein. SARS-CoV-2 uses human angiotensin-converting enzyme 2 (ACE2) for viral entry and transmembrane serine protease family member II (TMPRSS2) for spike protein priming. Thus in order to speed up the discovery of potential drugs, we develop DockCoV2, a drug database for SARS-CoV-2. DockCoV2 focuses on predicting the binding affinity of FDA-approved and Taiwan National Health Insurance (NHI) drugs with the seven proteins mentioned above. This database contains a total of 3,109 drugs. DockCoV2 is easy to use and search against, is well cross-linked to external databases, and provides the state-of-the-art prediction results in one site. Users can download their drug-protein docking data of interest and examine additional drug-related information on DockCoV2. Furthermore, DockCoV2 provides experimental information to help users understand which drugs have already been reported to be effective against MERS or SARS-CoV. DockCoV2 is available at https://covirus.cc/drugs/.",2021-01-01 +33010159,PROTAC-DB: an online database of PROTACs.,"Proteolysis-targeting chimeras (PROTACs), which selectively degrade targeted proteins by the ubiquitin-proteasome system, have emerged as a novel therapeutic technology with potential advantages over traditional inhibition strategies. In the past few years, this technology has achieved substantial progress and two PROTACs have been advanced into phase I clinical trials. However, this technology is still maturing and the design of PROTACs remains a great challenge. In order to promote the rational design of PROTACs, we present PROTAC-DB, a web-based open-access database that integrates structural information and experimental data of PROTACs. Currently, PROTAC-DB consists of 1662 PROTACs, 202 warheads (small molecules that target the proteins of interest), 65 E3 ligands (small molecules capable of recruiting E3 ligases) and 806 linkers, as well as their chemical structures, biological activities, and physicochemical properties. Except the biological activities of warheads and E3 ligands, PROTAC-DB also provides the degradation capacities, binding affinities and cellular activities for PROTACs. PROTAC-DB can be queried with two general searching approaches: text-based (target name, compound name or ID) and structure-based. In addition, for the convenience of users, a filtering tool for the searching results based on the physicochemical properties of compounds is also offered. PROTAC-DB is freely accessible at http://cadd.zju.edu.cn/protacdb/.",2021-01-01 +34873696,Research Highlight: Social dispersal in giraffes.,"Research Highlight: Bond, M. L., Lee, D. E., Ozgul, A., Farine, D. R., & König, B. (2021). Leaving by staying: Social dispersal in giraffes. Journal of Animal Ecology, https://doi.org/10.1111/1365-2656.13582. Dispersal is a key ecological and evolutionary process, which shows marked variability between and within species. The social and kinship structure of species fundamentally affects the patterns and types of dispersal, but information on how animals with fission-fusion group dynamics disperse is missing. Bond et al. provide novel data on natal dispersal of giraffe calves in relation to their dynamic multilayered social system, showing that individuals from both sexes can disperse socially, by switching association with different social groups, without leaving their natal area. The results highlight that traditional spatial-only measures of dispersal, such as dispersal distance, may be inadequate for social species with overlapping social units.",2021-12-01 +33221926,HumanMetagenomeDB: a public repository of curated and standardized metadata for human metagenomes.,"Metagenomics became a standard strategy to comprehend the functional potential of microbial communities, including the human microbiome. Currently, the number of metagenomes in public repositories is increasing exponentially. The Sequence Read Archive (SRA) and the MG-RAST are the two main repositories for metagenomic data. These databases allow scientists to reanalyze samples and explore new hypotheses. However, mining samples from them can be a limiting factor, since the metadata available in these repositories is often misannotated, misleading, and decentralized, creating an overly complex environment for sample reanalysis. The main goal of the HumanMetagenomeDB is to simplify the identification and use of public human metagenomes of interest. HumanMetagenomeDB version 1.0 contains metadata of 69 822 metagenomes. We standardized 203 attributes, based on standardized ontologies, describing host characteristics (e.g. sex, age and body mass index), diagnosis information (e.g. cancer, Crohn's disease and Parkinson), location (e.g. country, longitude and latitude), sampling site (e.g. gut, lung and skin) and sequencing attributes (e.g. sequencing platform, average length and sequence quality). Further, HumanMetagenomeDB version 1.0 metagenomes encompass 58 countries, 9 main sample sites (i.e. body parts), 58 diagnoses and multiple ages, ranging from just born to 91 years old. The HumanMetagenomeDB is publicly available at https://webapp.ufz.de/hmgdb/.",2021-01-01 +33137183,IMG/VR v3: an integrated ecological and evolutionary framework for interrogating genomes of uncultivated viruses.,"Viruses are integral components of all ecosystems and microbiomes on Earth. Through pervasive infections of their cellular hosts, viruses can reshape microbial community structure and drive global nutrient cycling. Over the past decade, viral sequences identified from genomes and metagenomes have provided an unprecedented view of viral genome diversity in nature. Since 2016, the IMG/VR database has provided access to the largest collection of viral sequences obtained from (meta)genomes. Here, we present the third version of IMG/VR, composed of 18 373 cultivated and 2 314 329 uncultivated viral genomes (UViGs), nearly tripling the total number of sequences compared to the previous version. These clustered into 935 362 viral Operational Taxonomic Units (vOTUs), including 188 930 with two or more members. UViGs in IMG/VR are now reported as single viral contigs, integrated proviruses or genome bins, and are annotated with a new standardized pipeline including genome quality estimation using CheckV, taxonomic classification reflecting the latest ICTV update, and expanded host taxonomy prediction. The new IMG/VR interface enables users to efficiently browse, search, and select UViGs based on genome features and/or sequence similarity. IMG/VR v3 is available at https://img.jgi.doe.gov/vr, and the underlying data are available to download at https://genome.jgi.doe.gov/portal/IMG_VR.",2021-01-01 +34820480,Clustering Analysis Methods for GNSS Observations: A Data-Driven Approach to Identifying California's Major Faults.,"We present a data-driven approach to clustering or grouping Global Navigation Satellite System (GNSS) stations according to observed velocities, displacements or other selected characteristics. Clustering GNSS stations provides useful scientific information, and is a necessary initial step in other analysis, such as detecting aseismic transient signals (Granat et al., 2013, https://doi.org/10.1785/0220130039). Desired features of the data can be selected for clustering, including some subset of displacement or velocity components, uncertainty estimates, station location, and other relevant information. Based on those selections, the clustering procedure autonomously groups the GNSS stations according to a selected clustering method. We have implemented this approach as a Python application, allowing us to draw upon the full range of open source clustering methods available in Python's scikit-learn package (Pedregosa et al., 2011, https://doi.org/10.5555/1953048.2078195). The application returns the stations labeled by group as a table and color coded KML file and is designed to work with the GNSS information available from GeoGateway (Donnellan et al., 2021, https://doi.org/10.1007/s12145-020-00561-7; Heflin et al., 2020, https://doi.org/10.1029/2019ea000644) but is easily extensible. We demonstrate the methodology on California and western Nevada. The results show partitions that follow faults or geologic boundaries, including for recent large earthquakes and post-seismic motion. The San Andreas fault system is most prominent, reflecting Pacific-North American plate boundary motion. Deformation reflected as class boundaries is distributed north and south of the central California creeping section. For most models a cluster boundary connects the southernmost San Andreas fault with the Eastern California Shear Zone (ECSZ) rather than continuing through the San Gorgonio Pass.",2021-10-29 +34095051,Systematic Organization of COVID-19 Data Supported by the Adverse Outcome Pathway Framework.,"Adverse Outcome Pathways (AOP) provide structured frameworks for the systematic organization of research data and knowledge. The AOP framework follows a set of key principles that allow for broad application across diverse disciplines related to human health, including toxicology, pharmacology, virology and medical research. The COVID-19 pandemic engages a great number of scientists world-wide and data is increasing with exponential speed. Diligent data management strategies are employed but approaches for systematically organizing the data-derived information and knowledge are lacking. We believe AOPs can play an important role in improving interpretation and efficient application of scientific understanding of COVID-19. Here, we outline a newly initiated effort, the CIAO project (https://www.ciao-covid.net/), to streamline collaboration between scientists across the world toward development of AOPs for COVID-19, and describe the overarching aims of the effort, as well as the expected outcomes and research support that they will provide.",2021-05-19 +,Implant Material As A Modifiable Risk Factor For Infection In THA: A Literature And Registry Review,"

Aim:

Periprosthetic Joint Infection (PJI) is a rare but serious post-operative complication of hip replacement that often ends in complex implant revision and dramatically impacts the quality of life of the patient. Finally, costs associated with PJI significantly impact healthcare systems. Current research focuses on understanding the mechanisms of infection and identifying the risk factors related thereto. The objective of this study was to examine the potential impact of bearing materials on the incidence of infection in THA.

Methods:

Registries are a valuable tool to analyze large cohorts of THA patients and the influence of selected parameters on the clinical outcome of the surgeries. Data from THA patients recorded in the NJR, AOA and NZ registries were analyzed with respect to the incidence of infection. Material data and scientific publications were also reviewed to investigate if the incidence of PJI might be correlated with the specific bearing material used.

Results:

The use of metal bearings was consensually identified in all large patient cohorts as an independent risk factor for PJI. In contrast, using ceramic bearings was associated with a lower risk of revision for PJI.1In vitro and ex vivo studies comparing the biological response to ceramic, metal and polyethylene materials are helpful to explain these findings. Metal exposure might activate the immune system and the released metal particles and ions might trigger adverse reactions with high inflammatory potential In contrast, extreme low wear ceramic bearings are well tolerated, show an excellent biological behavior 2,3,4,5,6 and might even support the wound healing process by initiating a healthy fibrotic pseudo-capsulation4. Furthermore, low wear is less likely to serve as a nidus for infection.

Discussion and conclusion:

Considering modifiable risk factors prior to THA is a key aspect for surgery success, implant longevity and patient satisfaction. Selecting a bearing material with enhanced biocompatibility like ceramics seems to have a measureable impact on the clinical outcomes. Favourable host-implant interactions might explain this observation.

Literature:

1) Lenguerrand et al. Risk factors associated with revision for prosthetic joint infection after hip replacement: a prospective observational cohort study The Lancet, 2018DOI:https://doi.org/10.1016/S1473-3099(18)30345-1 2) Faye PA et al. Biomed Mater. 2017;12(1):015023 3) Cunningham BW et al. Journal of Neurosurgery: Spine. 2013;19(3):336-350 4) Savarino L et al. Acta Orthopaedica. 2009;80(2):162-167 5) Asif IM et al. Front. Bioeng. Biotechnol. Conference Abstract: 10th World Biomaterials Congress. doi: 10.3389/conf. FBIOE.2016.01.00793 6) Asif I M et al. Characterisation and Biological Impact of Wear Particles from Composite Ceramic Hip Replacements. PhD thesis, University of Leeds (2018). http://etheses.whiterose.ac.uk/20563/ 7) Pitto et al Are ceramic-on-ceramic bearings in total hip arthroplasty associated with reduced revision risk for late dislocation? Clin Orthop Relat Res. 2015;473(12):3790–3795. doi:10.1007/s11999-015-4395-6",2020-05-01 +32241255,EpiMOLAS: an intuitive web-based framework for genome-wide DNA methylation analysis.,"

Background

DNA methylation is a crucial epigenomic mechanism in various biological processes. Using whole-genome bisulfite sequencing (WGBS) technology, methylated cytosine sites can be revealed at the single nucleotide level. However, the WGBS data analysis process is usually complicated and challenging.

Results

To alleviate the associated difficulties, we integrated the WGBS data processing steps and downstream analysis into a two-phase approach. First, we set up the required tools in Galaxy and developed workflows to calculate the methylation level from raw WGBS data and generate a methylation status summary, the mtable. This computation environment is wrapped into the Docker container image DocMethyl, which allows users to rapidly deploy an executable environment without tedious software installation and library dependency problems. Next, the mtable files were uploaded to the web server EpiMOLAS_web to link with the gene annotation databases that enable rapid data retrieval and analyses.

Conclusion

To our knowledge, the EpiMOLAS framework, consisting of DocMethyl and EpiMOLAS_web, is the first approach to include containerization technology and a web-based system for WGBS data analysis from raw data processing to downstream analysis. EpiMOLAS will help users cope with their WGBS data and also conduct reproducible analyses of publicly available data, thereby gaining insights into the mechanisms underlying complex biological phenomenon. The Galaxy Docker image DocMethyl is available at https://hub.docker.com/r/lsbnb/docmethyl/. EpiMOLAS_web is publicly accessible at http://symbiosis.iis.sinica.edu.tw/epimolas/.",2020-04-02 +29970001,Plant organelle RNA editing and its specificity factors: enhancements of analyses and new database features in PREPACT 3.0.,"

Background

Gene expression in plant chloroplasts and mitochondria is affected by RNA editing. Numerous C-to-U conversions, accompanied by reverse U-to-C exchanges in some plant clades, alter the genetic information encoded in the organelle genomes. Predicting and analyzing RNA editing, which ranges from only few sites in some species to thousands in other taxa, is bioinformatically demanding.

Results

Here, we present major enhancements and extensions of PREPACT, a WWW-based service for analysing, predicting and cataloguing plant-type RNA editing. New features in PREPACT's core include direct GenBank accession query input and options to restrict searches to candidate U-to-C editing or to sites where editing has been documented previously in the references. The reference database has been extended by 20 new organelle editomes. PREPACT 3.0 features new modules ""EdiFacts"" and ""TargetScan"". EdiFacts integrates information on pentatricopeptide repeat (PPR) proteins characterized as site-specific RNA editing factors. PREPACT's editome references connect into EdiFacts, linking editing events to specific co-factors where known. TargetScan allows position-weighted querying for sequence motifs in the organelle references, optionally restricted to coding regions or sequences around editing sites, or in queries uploaded by the user. TargetScan is mainly intended to evaluate and further refine the proposed PPR-RNA recognition code but may be handy for other tasks as well. We present an analysis for the immediate sequence environment of more than 15,000 documented editing sites finding strong and different bias in the editome data sets.

Conclusions

We exemplarily present the novel features of PREPACT 3.0 aimed to enhance the analyses of plant-type RNA editing, including its new modules EdiFacts integrating information on characterized editing factors and TargetScan aimed to analyse RNA editing site recognition specificities.",2018-07-03 +34081438,Alignment-Free Antimicrobial Peptide Predictors: Improving Performance by a Thorough Analysis of the Largest Available Data Set.,"In the last two decades, a large number of machine-learning-based predictors for the activities of antimicrobial peptides (AMPs) have been proposed. These predictors differ from one another in the learning method and in the training and testing data sets used. Unfortunately, the training data sets present several drawbacks, such as a low representativeness regarding the experimentally validated AMP space, and duplicated peptide sequences between negative and positive data sets. These limitations give a low confidence to most of the approaches to be used in prospective studies. To address these weaknesses, we propose novel modeling and assessing data sets from the largest experimentally validated nonredundant peptide data set reported to date. From these novel data sets, alignment-free quantitative sequence-activity models (AF-QSAMs) based on Random Forest are created to identify general AMPs and their antibacterial, antifungal, antiparasitic, and antiviral functional types. An applicability domain analysis is carried out to determine the reliability of the predictions obtained, which, to the best of our knowledge, is performed for the first time for AMP recognition. A benchmarking is undertaken between the models proposed and several models from the literature that are freely available in 13 programs (ClassAMP, iAMP-2L, ADAM, MLAMP, AMPScanner v2.0, AntiFP, AMPfun, PEPred-suite, AxPEP, CAMPR3, iAMPpred, APIN, and Meta-iAVP). The models proposed are those with the best performance in all of the endpoints modeled, while most of the methods from the literature have weak-to-random predictive agreements. The models proposed are also assessed through Y-scrambling and repeated k-fold cross-validation tests, demonstrating that the outcomes obtained by them are not given by chance. Three chemometric analyses also confirmed the relevance of the peptides descriptors used in the modeling. Therefore, it can be concluded that the models built by fixing the drawbacks existing in the literature contribute to identifying antibacterial, antifungal, antiparasitic, and antiviral peptides with high effectivity and reliability. Models are freely available via the AMPDiscover tool at https://biocom-ampdiscover.cicese.mx/.",2021-06-03 +33643383,Abiotic Stress-Responsive miRNA and Transcription Factor-Mediated Gene Regulatory Network in Oryza sativa: Construction and Structural Measure Study.,"Climate changes and environmental stresses have a consequential association with crop plant growth and yield, meaning it is necessary to cultivate crops that have tolerance toward the changing climate and environmental disturbances such as water stress, temperature fluctuation, and salt toxicity. Recent studies have shown that trans-acting regulatory elements, including microRNAs (miRNAs) and transcription factors (TFs), are emerging as promising tools for engineering naive improved crop varieties with tolerance for multiple environmental stresses and enhanced quality as well as yield. However, the interwoven complex regulatory function of TFs and miRNAs at transcriptional and post-transcriptional levels is unexplored in Oryza sativa. To this end, we have constructed a multiple abiotic stress responsive TF-miRNA-gene regulatory network for O. sativa using a transcriptome and degradome sequencing data meta-analysis approach. The theoretical network approach has shown the networks to be dense, scale-free, and small-world, which makes the network stable. They are also invariant to scale change where an efficient, quick transmission of biological signals occurs within the network on extrinsic hindrance. The analysis also deciphered the existence of communities (cluster of TF, miRNA, and genes) working together to help plants in acclimatizing to multiple stresses. It highlighted that genes, TFs, and miRNAs shared by multiple stress conditions that work as hubs or bottlenecks for signal propagation, for example, during the interaction between stress-responsive genes (TFs/miRNAs/other genes) and genes involved in floral development pathways under multiple environmental stresses. This study further highlights how the fine-tuning feedback mechanism works for balancing stress tolerance and how timely flowering enable crops to survive in adverse conditions. This study developed the abiotic stress-responsive regulatory network, APRegNet database (http://lms.snu.edu.in/APRegNet), which may help researchers studying the roles of miRNAs and TFs. Furthermore, it advances current understanding of multiple abiotic stress tolerance mechanisms.",2021-02-12 +29847084,Conditional Toxicity Value (CTV) Predictor: An In Silico Approach for Generating Quantitative Risk Estimates for Chemicals.,"

Background

Human health assessments synthesize human, animal, and mechanistic data to produce toxicity values that are key inputs to risk-based decision making. Traditional assessments are data-, time-, and resource-intensive, and they cannot be developed for most environmental chemicals owing to a lack of appropriate data.

Objectives

As recommended by the National Research Council, we propose a solution for predicting toxicity values for data-poor chemicals through development of quantitative structure-activity relationship (QSAR) models.

Methods

We used a comprehensive database of chemicals with existing regulatory toxicity values from U.S. federal and state agencies to develop quantitative QSAR models. We compared QSAR-based model predictions to those based on high-throughput screening (HTS) assays.

Results

QSAR models for noncancer threshold-based values and cancer slope factors had cross-validation-based Q2 of 0.25-0.45, mean model errors of 0.70-1.11 log10 units, and applicability domains covering >80% of environmental chemicals. Toxicity values predicted from QSAR models developed in this study were more accurate and precise than those based on HTS assays or mean-based predictions. A publicly accessible web interface to make predictions for any chemical of interest is available at http://toxvalue.org.

Conclusions

An in silico tool that can predict toxicity values with an uncertainty of an order of magnitude or less can be used to quickly and quantitatively assess risks of environmental chemicals when traditional toxicity data or human health assessments are unavailable. This tool can fill a critical gap in the risk assessment and management of data-poor chemicals. https://doi.org/10.1289/EHP2998.",2018-05-29 +,Transcriptome profiling of faba bean (Vicia faba L.) drought-tolerant variety hassawi-2 under drought stress using RNA sequencing,"Drought is the major factor that limits faba bean (Vicia faba L.) production worldwide. To decipher the molecular basis of drought stress response, we carried out transcriptome profiling of the drought-tolerant genotype of faba bean (Vicia faba L.), Hassawi-2, under drought stress conditions using RNA sequencing.De novo assembly of a total of 606.35 M high-quality pair-end clean reads yielded 164,679 unigenes of leaf tissues. A total of 35,143 (12,805 upregulated and 22,338 downregulated unigenes) and 28,892 (16,247 upregulated and 12,645 down-regulated) genes were differentially expressed under drought stress conditions in the vegetative and flowering stages, respectively. According to the GO and KEGG databases, both the vegetative and flowering stages regulated energy metabolism, transmembrane transporter activity, and secondary metabolites. A total of 538 (272 upregulated and 266 downregulated) and 642 (300 upregulated and 342 downregulated) putative transcription factors in the vegetative and flowering stages, respectively, were identified and classified under different transcription factor families. In addition, a substantial proportion of DEGs identified here were novel, as they could not be mapped to any functional database, therefore suggesting a specific response to drought stress in faba bean. The RNA-seq results were also validated by quantitative reverse-transcription PCR analysis.The transcriptome data generated here using RNA sequencing is the first report of faba bean under drought stress indicating the genes involved in drought tolerance. This information can be used to improve drought tolerance in elite faba bean cultivars and to develop tolerant germplasm for other legume crops.How to cite: Khan MA, Alghamdi SS, Ammar MH, et al. Transcriptome profiling of faba bean (Vicia faba L.) drought-tolerant variety hassawi-2 under drought stress using RNA sequencing. Electron J Biotechnol 2019;39. https://doi.org/10.1016/j.ejbt.2019.02.004.",2019-05-01 +33950214,PLIP 2021: expanding the scope of the protein-ligand interaction profiler to DNA and RNA.,"With the growth of protein structure data, the analysis of molecular interactions between ligands and their target molecules is gaining importance. PLIP, the protein-ligand interaction profiler, detects and visualises these interactions and provides data in formats suitable for further processing. PLIP has proven very successful in applications ranging from the characterisation of docking experiments to the assessment of novel ligand-protein complexes. Besides ligand-protein interactions, interactions with DNA and RNA play a vital role in many applications, such as drugs targeting DNA or RNA-binding proteins. To date, over 7% of all 3D structures in the Protein Data Bank include DNA or RNA. Therefore, we extended PLIP to encompass these important molecules. We demonstrate the power of this extension with examples of a cancer drug binding to a DNA target, and an RNA-protein complex central to a neurological disease. PLIP is available online at https://plip-tool.biotec.tu-dresden.de and as open source code. So far, the engine has served over a million queries and the source code has been downloaded several thousand times.",2021-07-01 +34414641,Utilization of the Boston Children's Hospital SRTR Cohort Visualization Tool to increase team understanding of reporting cohorts.,"

Background

While reviewing outcomes metrics and data from the SRTR, it became apparent that prospective assessment of the SRTR reporting cohorts would be an important proactive strategy for internal quality control. It was particularly important to identify the number of patient deaths and graft failures within 1 year of transplant that would result in being flagged by the UNOS and the MPSC.

Methods

A simple Microsoft Excel line graph was created to visually display retrospective, current, and future SRTR cohorts. Data provided by the SRTR CUSUM (https://securesrtr.transplant.hrsa.gov/srtr-reports/cusum-charts/) Reports and the SRTR 1 Year Expected Survival Excel Worksheet (https://securesrtr.transplant.hrsa.gov/srtr-reports/current-release/) were leveraged to identify whether programs were in jeopardy of being flagged by UNOS/MPSC for outcomes.

Results & conclusions

The creation of this visual tool has greatly improved team understanding of SRTR report cohorts, as well as the risk of being flagged by regulatory agencies, for adverse outcomes.",2021-08-19 +34803258,DaiCee: A database for anti-cancer compounds with targets and side effect profiles.,"Identification of the toxicity of compounds is more crucial before entering clinical trials. Awareness of physiochemical properties, possible targets and side effects has become a major public health issue to reduce risks. Experimental determination of analyzing the physiochemical properties of a drug, their interaction with specific receptors and identifying their side-effects remain challenging is time consuming and costly. We describe a manually compiled database named DaiCee database, which contains 2100 anticancer drugs with information on their physiochemical properties, targets of action and side effects. It includes both synthetic and herbal anti-cancer compounds. It allows the search for SMILES notation, Lipinski's and ADME/T properties, targets and side effect profiles of the drugs. This helps to identify drugs with effective anticancer properties, their toxic nature, drug-likeness for in-vitro and in-vivo experiments. It also used for comparative analysis and screening of effective anticancer drugs using available data for compounds in the database. The database will be updated regularly to provide the users with latest information. The database is available at the URL http://www.hccbif.org/usersearch.php.",2020-11-30 +,MON-LB003 Risk Factors For 30-day Readmission After Diabetic Ketoacidosis Using Nationwide Readmissions Database,"Abstract Introduction: Nearly 1 in 10 individuals in the United States have Diabetes Mellitus [1]. One potential preventable complication is Diabetic Ketoacidosis (DKA). Better understanding of the risk factors for readmissions of DKA will allow the development and implementation of specific patient-centered interventions to decrease future readmissions. We sought out to determine the 30-day all-cause readmission rate for adults (age > 18) admitted with DKA and the associated predictors of readmissions. Methods: We utilized Agency of Healthcare Research and Quality’s (AHRQ) Health Care Utilization Project’s (HCUP) 2014 Nationwide Readmission Database which includes 14.9 Million discharges across 22 states accounting for 51.2% of the total U.S. population and 49.3% of all U.S. hospitalizations to identify admissions with a DKA related ICD-9 diagnosis (250.10, 250.11, 250.12, and 250.13) associated with both Type 1 and Type 2 Diabetes Mellitus. Applicable admissions were all adults (age > 18)with an index hospitalization between January 1 and November 30, 2014. Patients who died during index admission and those with missing covariates were excluded. All-cause readmission including DKA within 30-days of DKA were analyzed. Statistical analysis was completed with Stata 15 (StataCorp, College Station, TX) with p-values < 0.05 considered statistically significant. A univariate and multivariate analysis of data collected was completed using both odds ratio and chi square test for significance. Predictors for readmission were determined using a multivariate logistic regression model following sequential step-wise elimination of covariates including demographics, comorbidities, hospital characteristics, length of stay (LOS) for index admission, and the modified Elixhauser Comorbidity Index. Results: A total of 66,896 patients met criteria for DKA related index admission. Of which, there was 12,954 (19.36%) all-cause readmissions within 30-days including 7,167 were again for DKA accounting for 55.32% of all readmissions. Multivariate analysis showed that the predictors of 30-day readmission were younger age, (with adults age <35 the highest risk), female, disposition at discharge to short term hospital or home health or against medical advice), from a zip code with the lowest income quartile, Medicare as payer, lengthier LOS, presence of comorbidities, absence of obesity, and presence of renal failure. Conclusion: Almost 1 in 5 (19.36%) patients discharged after a DKA admission were readmitted within 30 days. Physician awareness and development of targeted interventions for individuals with risk factors and high-risk for readmissions may help decrease future morbidity and mortality. References: [1] CDC: National Diabetes Statistics Report (2017). https://www.cdc.gov/diabetes/pdfs/data/statistics/national-diabetes-statistics-report.pdf Unless otherwise noted, all abstracts presented at ENDO are embargoed until the date and time of presentation. For oral presentations, the abstracts are embargoed until the session begins. Abstracts presented at a news conference are embargoed until the date and time of the news conference. The Endocrine Society reserves the right to lift the embargo on specific abstracts that are selected for promotion prior to or during ENDO.",2019-04-15 +,Prognostic value of long non-coding RNA TP73-AS1 expression in different types of cancer: A systematic review and meta-analysis,"TP73 antisense RNA 1 (TP73-AS1), a newly discovered long non-coding RNA (lncRNA), has been reported to be upregulated in various kinds of tumors, and shows a variable influence on living quality and prognosis of patients. Thus, we conducted a meta-analysis to evaluate the overall prognostic value of the lncRNA TP73-AS1 in cancer patients.A systematic literature retrieval was carried out using the PubMed, Cochrane Library, EMBASE, and Web of Science databases. We calculated the pooled hazard ratio (HR) and odds ratio (OR) with 95% confidence intervals (CIs) to evaluate the association of TP73-AS1 expression with prognostic and clinicopathological parameters. A total of 15 studies including 1057 cancer patients were finally selected for the meta-analysis. The results demonstrated that high TP73-AS1 expression was significantly associated with shorter overall survival (OS) (HR = 1.97, 95% CI: 1.68–2.31, P < 0.001). According to a fixed-effects or random-effects model, elevated TP73-AS1 expression markedly predicted advanced clinical stage (OR = 3.30, 95% CI: 2.35–4.64, P < 0.001), larger tumor size (OR = 2.37, 95% CI: 1.75–3.22, P < 0.001), earlier lymph node metastasis (OR = 3.28, 95% CI: 1.59–6.76, P = 0.001), and distant metastasis (OR = 4.94, 95% CI: 2.61–9.37, P < 0.001).High lncRNA TP73-AS1 expression appears to be predictive of a worse OS and clinicopathologic features for patients with various types of malignant tumors. These results provide a basis for utilizing TP73-AS1 expression as an unfavorable indicator to predict survival outcomes.How to cite: Wang X, Shu K, Wang Z, et al. Prognostic value of long non-coding RNA TP73-AS1 expression in different types of cancer: A systematic review and meta-analysis. Electron J Biotechnol 2020;43. https://doi.org/10.1016/j.ejbt.2019.12.005.",2020-01-01 +33631799,ConnecTF: A platform to integrate transcription factor-gene interactions and validate regulatory networks.,"Deciphering gene regulatory networks (GRNs) is both a promise and challenge of systems biology. The promise lies in identifying key transcription factors (TFs) that enable an organism to react to changes in its environment. The challenge lies in validating GRNs that involve hundreds of TFs with hundreds of thousands of interactions with their genome-wide targets experimentally determined by high-throughput sequencing. To address this challenge, we developed ConnecTF, a species-independent, web-based platform that integrates genome-wide studies of TF-target binding, TF-target regulation, and other TF-centric omic datasets and uses these to build and refine validated or inferred GRNs. We demonstrate the functionality of ConnecTF by showing how integration within and across TF-target datasets uncovers biological insights. Case study 1 uses integration of TF-target gene regulation and binding datasets to uncover TF mode-of-action and identify potential TF partners for 14 TFs in abscisic acid signaling. Case study 2 demonstrates how genome-wide TF-target data and automated functions in ConnecTF are used in precision/recall analysis and pruning of an inferred GRN for nitrogen signaling. Case study 3 uses ConnecTF to chart a network path from NLP7, a master TF in nitrogen signaling, to direct secondary TF2s and to its indirect targets in a Network Walking approach. The public version of ConnecTF (https://ConnecTF.org) contains 3,738,278 TF-target interactions for 423 TFs in Arabidopsis, 839,210 TF-target interactions for 139 TFs in maize (Zea mays), and 293,094 TF-target interactions for 26 TFs in rice (Oryza sativa). The database and tools in ConnecTF will advance the exploration of GRNs in plant systems biology applications for model and crop species.",2021-02-01 +33475415,"""Discrimination, dispositions, and cardiovascular responses to stress."" Correction to Richman et al. (2007).","Reports an error in ""Discrimination, dispositions, and cardiovascular responses to stress"" by Laura Smart Richman, Gary G. Bennett, Jolynn Pek, Edward C. Suarez, Ilene Siegler and Redford B. Williams Jr. (Health Psychology, 2007[Nov], Vol 26[6], 675-683). In the article (http://dx.doi.org/10.1037/0278-6133.26.6.675), Edward C. Suarez was not originally included in the byline but has been added on the basis of his significant contributions to the concepts, design, data, and/or specimens analyzed in the article. The online version of this article has been corrected. (The following abstract of the original article appeared in record 2007-16656-004.) Objective: Recent research suggests that past exposure to discrimination may influence perceptions of, and physiological responses to, new challenges. The authors examined how race and trait levels of hostility and optimism interact with past exposure to discrimination to predict physiological reactivity and recovery during an anger recall task. Design: A community sample of 165 normotensive Black and White adults participated in an anger recall task while having their cardiovascular function monitored. Main Outcome Measures: Blood pressure and heart rate indicators of physiological reactivity and recovery. Results and Conclusion: Participants had higher reactivity and slower recovery to the anger recall task when they had high past discrimination, low cynicism, or high optimism. The pattern of effects was similar for both racial groups, but Blacks had more acute reactivity and slower recovery than Whites. These results are consistent with the perspective of discrimination as a chronic stressor that is related to acute stress responses, particularly for Blacks. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-02-01 +33210724,Expression of trefoil factor 3 is decreased in colorectal cancer.,"In colorectal cancer (CRC), high expression of trefoil factor 3 (TFF3) is associated with tumor progression and reduced patient survival; however, bioinformatics analyses of public 'omics' databases show low TFF3 expression in CRCs as compared to normal tissues. Thus, we examined TFF3 expression in CRCs and matching normal tissues to evaluate its role in CRC progression. TFF3 gene expression was characterized using the bioinformatics portal UALCAN (http://ualcan.path.uab.edu). Tissue microarrays (TMAs) of archival CRC specimens (n=96) were immunostained with anti‑human TFF3 antibodies. Immunohistochemical (IHC) staining intensity was semi‑quantitatively scored. For this cohort, the median follow‑up was 5.4 years. Associations between clinical and pathological variables were determined using Chi‑square or Fisher's exact tests. Univariate disease‑free survival was estimated by the Kaplan‑Meier method. Omics data analyses by UALCAN showed downregulation of TFF3 expression in CRC relative to normal tissue at protein (χ2, P<0.0001) levels. There was a similar decreasing trend of TFF3 expression in the pathologic stages of the CRCs (RNA, χ2, P=0.88 and protein, χ2 P<0.0001). UALCAN data analysis showed that TFF3 exhibited 27% lower mRNA expression in tumors with mutant TP53 (P=0.007). Confirming the findings of omics analyses, IHC analysis of TMAs exhibited lower TFF3 expression in 95.6% (65 of 68) of the available normal‑tumor matching pairs (χ2, P<0.0001). There was no statistically significant association of tumor TFF3 expression with patient sex, race/ethnicity, tumor location within the colorectum, Tumor, Node, Metastasis (TNM) stage, lymph node metastasis, or surgical margins. However, low TFF3 IHC staining in tumor tissue was associated with histological grade (P=0.026). Kaplan‑Meier survival analysis showed no prognostic value of low TFF3 expression relative to those with high expression (log‑rank, P=0.605). Our findings demonstrate low expression of TFF3 in CRCs. Association between low TFF3 and histopathological features suggests involvement of this molecule in progression of CRC.",2020-10-30 +34388151,Global survey-based assessment of lifestyle changes during the COVID-19 pandemic.,"Along with the major impact on public health, the COVID-19 outbreak has caused unprecedented concerns ranging from sudden loss of employment to mental stress and anxiety. We implemented a survey-based data collection platform to characterize how the COVID-19 pandemic has affected the socio-economic, physical and mental health conditions of individuals. We focused on three broad areas, namely, changes in social interaction during home confinement, economic impact and their health status. We identified a substantial increase in virtual interaction among individuals, which might be a way to alleviate the sudden unprecedented mental health burden, exacerbated by general awareness about viral infections or other manifestations associated with them. The majority of participants (85%) lived with one or more companions and unemployment issues did not affect 91% of the total survey takers, which was one of the crucial consequences of the pandemic. Nevertheless, measures such as an increased frequency of technology-aided distant social interaction, focus on physical fitness and leisure activities were adopted as coping mechanisms during this period of home isolation. Collectively, these metrics provide a succinct and informative summary of the socio-economic and health impact of the COVID-19 pandemic on the individuals. Findings from our study reflect that continuous surveillance of the psychological consequences for outbreaks should become routine as part of preparedness efforts worldwide. Given the limitations of analyzing the large number of variables, we have made the raw data publicly available on the OMF ME/CFS Data Center server to facilitate further analyses (https://igenomed.stanford.edu/dataset/survey-study-on-lifestyle-changes-during-covid-19-pandemic).",2021-08-13 +31347432,ANDB: Development of a Database Based on a Global Survey of Literature on Areca Nut and Associated Health Effects.,"Areca nut (AN), commonly known as ""Supari"" in India is an addictive substance and widely consumed with or without tobacco as a part of customs in many South East Asian countries. Owing to the adverse health effects of AN, public awareness and stringent government policies to prohibit AN production and regulation of products containing AN should be addressed without further delay. Lack of a research database, motivated us to develop a comprehensive online portal on global survey of published articles with reference to AN. The Areca nut database (ANDB) is a manually curated database which provides the information on global literature according to the publication year, author, population, harmful effects, and associated disease. The present study is an attempt to deliver the relevant information which would be helpful to researchers in prioritizing the research areas with respect to AN and associated health effects. The portal has been developed in MySQL and the interface has been designed using core PhP and CSS, HTML. ANDB is an online resource available to provide global literature of AN in a user-friendly manner. It can be accessed freely on http://arecanut.icmr.org.in/. To the best of our knowledge, ANDB is the first portal delivering inclusive scientific literature related to AN and its health effects. This evidence-based scientific information would be useful for policy makers to make guidelines for increasing awareness and implementing the laws for regulated use of this potentially carcinogenic substance, thereby controlling the burden of many dreaded diseases primarily oral submucous fibrosis, cardiovascular disease, and cancers.",2019-07-26 +31905146,iLncRNAdis-FB: Identify lncRNA-Disease Associations by Fusing Biological Feature Blocks Through Deep Neural Network.,"Identification of lncRNA-disease associations is not only important for exploring the disease mechanism, but will also facilitate the molecular targeting drug discovery. Fusing multiple biological information is able to generate a more comprehensive view of lncRNA-disease association feature. However, the existing fusion strategies in this field fail to remove the noisy and irrelevant information from each data source. As a result, their predictive performance is still too low to be applied to real world applications. In this regard, a novel computational predictor called iLncRNAdis-FB is proposed based on the Convolution Neural Network (CNN) to integrate different data sources by using the feature blocks in a supervised manner. The lncRNA similarity matrix and disease similarity matrix are constructed, based on which the three-dimensional feature blocks are generated. These feature blocks are then fed into CNN to train the model so as to predict unknown lncRNA-disease associations. Experimental results show that iLncRNAdis-FB achieves better performance compared with other state-of-the-art predictors. Furthermore, a web server of iLncRNAdis-FB has been established at http://bliulab.net/iLncRNAdis-FB/, by which users can submit lncRNA sequences to detect their potential associated diseases.",2021-09-01 +33641081,Ten-Year Outcomes of Percutaneous Coronary Intervention Versus Coronary Artery Bypass Grafting for Patients with Type 2 Diabetes Mellitus Suffering from Left Main Coronary Disease: A Meta-Analysis.,"

Introduction

In this meta-analysis, we aimed to systematically compare the 10-year outcomes of percutaneous coronary intervention (PCI) versus coronary artery bypass grafting (CABG) in patients with type 2 diabetes mellitus (T2DM) suffering from left main coronary artery disease (LMCD).

Methods

Medical Literature Analysis and Retrieval System Online (MEDLINE), http://www.ClinicalTrials.gov , Excerpta Medica dataBASE (EMBASE), Cochrane Central, Web of Science, and Google scholar were searched for publications comparing 10-year outcomes of PCI versus CABG in patients with T2DM suffering from LMCD. Cardiovascular outcomes were considered as the clinical endpoints. Statistical analysis was carried out using RevMan software (version 5.4). Risk ratios (RR) with 95% confidence intervals (CI) were used to represent the data after analysis.

Results

Eight studies (three randomized trials and five observational studies) with a total number of 3835 participants with T2DM were included in this analysis; 2340 participants were assigned to the PCI group and 1495 participants were assigned to the CABG group. Results of this analysis showed that mortality (RR 0.85, 95% CI 0.73-1.00; P = 0.05), myocardial infarction (RR 0.53, 95% CI 0.35-0.80; P = 0.002), repeated revascularization (RR 0.34, 95% CI 0.26-0.46; P = 0.00001), and target vessel revascularization (RR 0.26, 95% CI 0.18-0.38; P = 0.00001) were significantly higher with PCI when compared to CABG in these patients with diabetes and LMCD. Major adverse cardiac and cerebrovascular events were also significantly higher with PCI at 10 years (RR 0.67, 95% CI 0.49-0.92; P = 0.01). However, CABG was associated with a significantly higher risk of stroke (RR 2.16, 95% CI 1.39-3.37; P = 0.0007).

Conclusions

During a long-term follow-up time period of 10 years, PCI was associated with worse clinical outcomes compared to CABG in these patients with T2DM suffering from LMCD. However, a significantly higher risk of stroke was observed with CABG. This piece of information might be vital in order to carefully choose and prevent complications following revascularization in such patients.",2021-02-27 +33935558,"Unlocking the Entomological Collection of the Natural History Museum of Maputo, Mozambique.","

Background

The collections of the Natural History Museum of Maputo have a crucial role in the safeguarding of Mozambique's biodiversity, representing an important repository of data and materials regarding the natural heritage of the country. In this paper, a dataset is described, based on the Museum's Entomological Collection recording 409 species belonging to seven orders and 48 families. Each specimen's available data, such as geographical coordinates and taxonomic information, have been digitised to build the dataset. The specimens included in the dataset were obtained between 1914-2018 by collectors and researchers from the Natural History Museum of Maputo (once known as ""Museu Alváro de Castro"") in all the country's provinces, with the exception of Cabo Delgado Province.

New information

This paper adds data to the Biodiversity Network of Mozambique and the Global Biodiversity Information Facility, within the objectives of the SECOSUD II Project and the Biodiversity Information for Development Programme. The aforementioned insect dataset is available on the GBIF Engine data portal (https://doi.org/10.15468/j8ikhb). Data were also shared on the Mozambican national portal of biodiversity data BioNoMo (https://bionomo.openscidata.org), developed by SECOSUD II Project.",2021-04-21 +34932484,iDRBP-EL: identifying DNA- and RNA- binding proteins based on hierarchical ensemble learning.,"Identification of DNA-binding proteins (DBPs) and RNA-binding proteins (RBPs) from the primary sequences is essential for further explor-ing protein-nucleic acid interactions. Previous studies have shown that machine-learning-based methods can efficiently identify DBPs or RBPs. However, the information used in these methods is slightly unitary, and most of them only can predict DBPs or RBPs. In this study, we proposed a computational predictor iDRBP-EL to identify DNA- and RNA- binding proteins, and introduced hierarchical ensemble learn-ing to integrate three level information. The method can integrate the information of different features, machine learning algorithms and data into one multi-label model. The ablation experiment showed that the fusion of different information can improve the prediction perfor-mance and overcome the cross-prediction problem. Experimental results on the independent datasets showed that iDRBP-EL outperformed all the other competing methods. Moreover, we established a user-friendly webserver iDRBP-EL (http://bliulab.net/iDRBP-EL), which can predict both DBPs and RBPs only based on protein sequences.",2021-12-21 +32469063,mCSM-membrane: predicting the effects of mutations on transmembrane proteins.,"Significant efforts have been invested into understanding and predicting the molecular consequences of mutations in protein coding regions, however nearly all approaches have been developed using globular, soluble proteins. These methods have been shown to poorly translate to studying the effects of mutations in membrane proteins. To fill this gap, here we report, mCSM-membrane, a user-friendly web server that can be used to analyse the impacts of mutations on membrane protein stability and the likelihood of them being disease associated. mCSM-membrane derives from our well-established mutation modelling approach that uses graph-based signatures to model protein geometry and physicochemical properties for supervised learning. Our stability predictor achieved correlations of up to 0.72 and 0.67 (on cross validation and blind tests, respectively), while our pathogenicity predictor achieved a Matthew's Correlation Coefficient (MCC) of up to 0.77 and 0.73, outperforming previously described methods in both predicting changes in stability and in identifying pathogenic variants. mCSM-membrane will be an invaluable and dedicated resource for investigating the effects of single-point mutations on membrane proteins through a freely available, user friendly web server at http://biosig.unimelb.edu.au/mcsm_membrane.",2020-07-01 +30450127,Knomics-Biota - a system for exploratory analysis of human gut microbiota data.,

Background

Metagenomic surveys of human microbiota are becoming increasingly widespread in academic research as well as in food and pharmaceutical industries and clinical context. Intuitive tools for investigating experimental data are of high interest to researchers.

Results

Knomics-Biota is a web-based resource for exploratory analysis of human gut metagenomes. Users can generate and share analytical reports corresponding to common experimental schemes (like case-control study or paired comparison). Interactive visualizations and statistical analysis are provided in association with the external factors and in the context of thousands of publicly available datasets arranged into thematic collections. The web-service is available at https://biota.knomics.ru.

Conclusions

Knomics-Biota web service is a comprehensive tool for interactive metagenomic data analysis.,2018-11-06 +32345346,REPIC: a database for exploring the N6-methyladenosine methylome.,"The REPIC (RNA EPItranscriptome Collection) database records about 10 million peaks called from publicly available m6A-seq and MeRIP-seq data using our unified pipeline. These data were collected from 672 samples of 49 studies, covering 61 cell lines or tissues in 11 organisms. REPIC allows users to query N6-methyladenosine (m6A) modification sites by specific cell lines or tissue types. In addition, it integrates m6A/MeRIP-seq data with 1418 histone ChIP-seq and 118 DNase-seq data tracks from the ENCODE project in a modern genome browser to present a comprehensive atlas of m6A methylation sites, histone modification sites, and chromatin accessibility regions. REPIC is accessible at https://repicmod.uchicago.edu/repic.",2020-04-28 +34213534,Linear functional organization of the omic embedding space. ,"We are increasingly accumulating complex omics data that capture different aspects of cellular functioning. A key challenge is to untangle their complexity and effectively mine them for new biomedical information. To decipher this new information, we introduce algorithms based on network embeddings. Such algorithms represent biological macromolecules as vectors in d-dimensional space, in which topologically similar molecules are embedded close in space and knowledge is extracted directly by vector operations. Recently, it has been shown that neural networks used to obtain vectorial representations (embeddings) are implicitly factorizing a mutual information matrix, called Positive Pointwise Mutual Information (PPMI) matrix. Thus, we propose the use of the PPMI matrix to represent the human protein-protein interaction (PPI) network and also introduce the Graphlet Degree Vector PPMI matrix of the PPI network to capture different topological (structural) similarities of the nodes in the molecular network. We generate the embeddings by decomposing these matrices with Non-Negative Matrix Tri-Factorization. We demonstrate that genes that are embedded close in these spaces have similar biological functions, so we can extract new biomedical knowledge directly by doing linear operations on their embedding vector representations. We exploit this property to predict new genes participating in protein complexes and to identify new cancer-related genes based on the cosine similarities between the vector representations of the genes. We validate 80% of our novel cancer-related gene predictions in the literature and also by patient survival curves that demonstrating that 93.3% of them have a potential clinical relevance as biomarkers of cancer. Code and data are available online at https://gitlab.bsc.es/axenos/embedded-omics-data-geometry/. Supplementary data are available at Bioinformatics online.",2021-07-02 +33413085,"H2V: a database of human genes and proteins that respond to SARS-CoV-2, SARS-CoV, and MERS-CoV infection.","

Background

The ongoing global COVID-19 pandemic is caused by SARS-CoV-2, a novel coronavirus first discovered at the end of 2019. It has led to more than 50 million confirmed cases and more than 1 million deaths across 219 countries as of 11 November 2020, according to WHO statistics. SARS-CoV-2, SARS-CoV, and MERS-CoV are similar. They are highly pathogenic and threaten public health, impair the economy, and inflict long-term impacts on society. No drug or vaccine has been approved as a treatment for these viruses. Efforts to develop antiviral measures have been hampered by the insufficient understanding of how the human body responds to viral infections at the cellular and molecular levels.

Results

In this study, journal articles and transcriptomic and proteomic data surveying coronavirus infections were collected. Response genes and proteins were then identified by differential analyses comparing gene/protein levels between infected and control samples. Finally, the H2V database was created to contain the human genes and proteins that respond to SARS-CoV-2, SARS-CoV, and MERS-CoV infection.

Conclusions

H2V provides molecular information about the human response to infection. It can be a powerful tool to discover cellular pathways and processes relevant for viral pathogenesis to identify potential drug targets. It is expected to accelerate the process of antiviral agent development and to inform preparations for potential future coronavirus-related emergencies. The database is available at: http://www.zhounan.org/h2v .",2021-01-07 +32934280,Analysis of rice nuclear-localized seed-expressed proteins and their database (RSNP-DB).,"Nuclear proteins are primarily regulatory factors governing gene expression. Multiple factors determine the localization of a protein in the nucleus. An upright identification of nuclear proteins is way far from accuracy. We have attempted to combine information from subcellular prediction tools, experimental evidence, and nuclear proteome data to identify a reliable list of seed-expressed nuclear proteins in rice. Depending upon the number of prediction tools calling a protein nuclear, we could sort 19,441 seed expressed proteins into five categories. Of which, half of the seed-expressed proteins were called nuclear by at least one out of four prediction tools. Further, gene ontology (GO) enrichment and transcription factor composition analysis showed that 6116 seed-expressed proteins could be called nuclear with a greater assertion. Localization evidence from experimental data was available for 1360 proteins. Their analysis showed that a 92.04% accuracy of a nuclear call is valid for proteins predicted nuclear by at least three tools. Distribution of nuclear localization signals and nuclear export signals showed that the majority of category four members were nuclear resident proteins, whereas other categories have a low fraction of nuclear resident proteins and significantly higher constitution of shuttling proteins. We compiled all the above information for the seed-expressed genes in the form of a searchable database named Rice Seed Nuclear Protein DataBase (RSNP-DB) https://pmb.du.ac.in/rsnpdb . This information will be useful for comprehending the role of seed nuclear proteome in rice.",2020-09-15 +33185687,CGPE: an integrated online server for Cancer Gene and Pathway Exploration.,"

Summary

Cancer Gene and Pathway Explorer (CGPE) is developed to guide biological and clinical researchers, especially those with limited informatics and programming skills, performing preliminary cancer-related biomedical research using transcriptional data and publications. CGPE enables three user-friendly online analytical and visualization modules without requiring any local deployment. The GenePub HotIndex applies natural language processing, statistics and association discovery to provide analytical results on gene-specific PubMed publications, including gene-specific research trends, cancer types correlations, top-related genes and the WordCloud of publication profiles. The OnlineGSEA enables Gene Set Enrichment Analysis (GSEA) and results visualizations through an easy-to-follow interface for public or in-house transcriptional datasets, integrating the GSEA algorithm and preprocessed public TCGA and GEO datasets. The preprocessed datasets ensure gene sets analysis with appropriate pathway alternation and gene signatures. The CellLine Search presents evidence-based guidance for cell line selections with combined information on cell line dependency, gene expressions and pathway activity maps, which are valuable knowledge to have before conducting gene-related experiments. In a nutshell, the CGPE webserver provides a user-friendly, visual, intuitive and informative bioinformatics tool that allows biomedical researchers to perform efficient analyses and preliminary studies on in-house and publicly available bioinformatics data.

Availability and implementation

The webserver is freely available online at https://cgpe.soic.iupui.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-08-01 +34820494,Data on the genome of Bacillus subtilis A1- Midalam from beach soil.,"The draft genome sequence of Bacillus subtilis A1, isolated from beach soil, has been shown to produce biofilm. The genome size is 4,215,114 bp with an average G+C content of 43.5%. The genome of Bacillus subtilis A1 has 4413 total genes which include 4166 protein-coding sequences, 126 pseudo genes, 10 rRNA genes with 3 operons (5S, 16S and 23S), 86 tRNA genes and 5 noncoding RNA (ncRNA) genes. The genome contains genes coding for surfactin, fengycin, bacillaene, sublancin 168, bacillibactin, subtilosin A, bacilysin. The whole genome project has been deposited in GenBank under the accession number CP075344.1. The raw data is available at https://www.ncbi.nlm.nih.gov/nuccore/CP075344.1.",2021-11-07 +34637380,"To improve the predictions of binding residues with DNA, RNA, carbohydrate, and peptide via multi-task deep neural networks. ","The interactions of proteins with DNA, RNA, peptide, and carbohydrate play key roles in various biological processes. The studies of uncharacterized proteinmolecules interactions could be aided by accurate predictions of residues that bind with partner molecules. However, the existing methods for predicting binding residues on proteins remain of relatively low accuracies due to the limited number of complex structures in databases. As different types of molecules partially share chemical mechanisms, the predictions for each molecular type should benefit from the binding information with other molecules types. In this study, we employed a multiple task deep learning strategy to develop a new sequence-based method for simultaneously predicting binding residues/sites with multiple important molecule types named MTDsite. By combining four training sets for DNA, RNA, peptide, and carbohydrate-binding proteins, our method yielded accurate and robust predictions with AUC values of 0.852, 0836, 0.758, and 0.776 on their respective independent test sets, which are 0.52 to 6.6% better than other state-of-the-art methods. To my best knowledge, this is the first method using multi-task framework to predict multiple molecular binding sites simultaneously. http://biomed.nscc-gz.cn/server/MTDsite/ Contact: yangyd25@mail.sysu.edu.cn.",2021-10-12 +34487138,DisoLipPred: Accurate prediction of disordered lipid binding residues in protein sequences with deep recurrent networks and transfer learning. ,"Intrinsically disordered protein regions interact with proteins, nucleic acids and lipids. Regions that bind lipids are implicated in a wide spectrum of cellular functions and several human diseases. Motivated by the growing amount of experimental data for these interactions and lack of tools that can predict them from the protein sequence, we develop DisoLipPred, the first predictor of the disordered lipid-binding residues (DLBRs). DisoLipPred relies on a deep bidirectional recurrent network that implements three innovative features: transfer learning, bypass module that sidesteps predictions for putative structured residues, and expanded inputs that cover physiochemical properties associated with the protein-lipid interactions. Ablation analysis shows that these features drive predictive quality of DisoLipPred. Tests on an independent test dataset and the yeast proteome reveal that DisoLipPred generates accurate results and that none of the related existing tools can be used to indirectly identify DLBR. We also show that DisoLipPred's predictions complement the results generated by predictors of the transmembrane regions. Altogether, we conclude that DisoLipPred provides high-quality predictions of DLBRs that complement the currently available methods. DisoLipPred's webserver is available at http://biomine.cs.vcu.edu/servers/DisoLipPred/. Supplementary data are available at Bioinformatics online.",2021-09-06 +33502254,Redlines and Greenspace: The Relationship between Historical Redlining and 2010 Greenspace across the United States.,"

Introduction

Redlining, a racist mortgage appraisal practice of the 1930s, established and exacerbated racial residential segregation boundaries in the United States. Investment risk grades assigned >80y ago through security maps from the Home Owners' Loan Corporation (HOLC) are associated with current sociodemographics and adverse health outcomes. We assessed whether historical HOLC investment grades are associated with 2010 greenspace, a health-promoting neighborhood resource.

Objectives

We compared 2010 normalized difference vegetation index (NDVI) across previous HOLC neighborhood grades using propensity score restriction and matching.

Methods

Security map shapefiles were downloaded from the Mapping Inequality Project. Neighborhood investment risk grades included A (best, green), B (blue), C (yellow), and D (hazardous, red, i.e., redlined). We used 2010 satellite imagery to calculate the average NDVI for each HOLC neighborhood. Our main outcomes were 2010 annual average NDVI and summer NDVI. We assigned areal-apportioned 1940 census measures to each HOLC neighborhood. We used propensity score restriction, matching, and targeted maximum likelihood estimation to limit model extrapolation, reduce confounding, and estimate the association between HOLC grade and NDVI for the following comparisons: Grades B vs. A, C vs. B, and D vs. C.

Results

Across 102 urban areas (4,141 HOLC polygons), annual average ±standard deviation (SD) 2010 NDVI was 0.47 (±0.09), 0.43 (±0.09), 0.39 (±0.09), and 0.36 (±0.10) in Grades A-D, respectively. In analyses adjusted for current ecoregion and census region, 1940s census measures, and 1940s population density, annual average NDVI values in 2010 were estimated at -0.039 (95% CI: -0.045, -0.034), -0.024 (95% CI: -0.030, -0.018), and -0.026 (95% CI: -0.037, -0.015) for Grades B vs. A, C vs. B, and D vs. C, respectively, in the 1930s.

Discussion

Estimates adjusted for historical characteristics indicate that neighborhoods assigned worse HOLC grades in the 1930s are associated with reduced present-day greenspace. https://doi.org/10.1289/EHP7495.",2021-01-27 +32601280,Incorporating hybrid models into lysine malonylation sites prediction on mammalian and plant proteins.,"Protein malonylation, a reversible post-translational modification of lysine residues, is associated with various biological functions, such as cellular regulation and pathogenesis. In proteomics, to improve our understanding of the mechanisms of malonylation at the molecular level, the identification of malonylation sites via an efficient methodology is essential. However, experimental identification of malonylated substrates via mass spectrometry is time-consuming, labor-intensive, and expensive. Although numerous methods have been developed to predict malonylation sites in mammalian proteins, the computational resource for identifying plant malonylation sites is very limited. In this study, a hybrid model incorporating multiple convolutional neural networks (CNNs) with physicochemical properties, evolutionary information, and sequenced-based features was developed for identifying protein malonylation sites in mammals. For plant malonylation, multiple CNNs and random forests were integrated into a secondary modeling phase using a support vector machine. The independent testing has demonstrated that the mammalian and plant malonylation models can yield the area under the receiver operating characteristic curves (AUC) at 0.943 and 0.772, respectively. The proposed scheme has been implemented as a web-based tool, Kmalo (https://fdblab.csie.ncu.edu.tw/kmalo/home.html), which can help facilitate the functional investigation of protein malonylation on mammals and plants.",2020-06-29 +,First Report of Wheat dwarf virus in Winter Wheat in Estonia,"Cereals are the most important crops in the Baltic Sea Region. In 2017, cereal cultivation occupied 46.3% of agricultural growth area in Estonia (Statistics Estonia, https://www.stat.ee/). In July 2017, two fields of winter wheat (Triticum aestivum) cultivar Olivin showing general yellowing and chlorosis, dwarfing, and reduced heading were reported by the grower at Meremäe, Võru county, Southeastern Estonia (57°45′03.6′′N, 27°29′03.1′′E). Twenty symptomatic plants were collected for the analysis. RNA was extracted from leaves using the method described by Oñate-Sánchez and Vicente-Carbajosa (2008) without DNase treatment. RT-PCR was used for detection of yellow dwarf luteoviruses and poleroviruses by the generic primer pair of YanR (5′-TGTTGAGGAGTCTACCTATTTG-3′) and ShuF (5′-TACGGTAAGTGCCCAACTCC-3′) and of oat sterile dwarf fijivirus by the segment 10 specific primer pair of Fseg10_700R (5′-GCTAAACGGTTGAACTCTTTGT-3′) and Fseg10_18F (5′-ACTGTCCATTCCTGCAACACT-3′). None of the samples were positive for these viruses. Therefore, the material was decided to be analyzed by high-throughput sequencing. A sequencing library respective of the small RNA pool of all samples was prepared and indexed using TruSeq siRNA kit (Illumina). The sequencing of 1 × 50 nt reads was performed on the Illumina HiSeq2500 platform (University of Tartu, Estonia). Sequencing data were analyzed using Geneious 11.1.5 (Biomatters). After removal of adaptor sequences and quality control, the reads were mapped to a custom-made plant virus genome database consisting of Poaceae-infecting species. Presence of wheat dwarf virus (WDV; genus Mastrevirus, family Geminiviridae) was identified. In total, 203,631 of 71.7 million reads were mapped on the assembled genome with a mean coverage of 1,815.4 nt per position spanning 100% of the genome. The most abundant size class of the reads was 24 nt. A complete genome of 2,750 nt was assembled and deposited in GenBank (accession no. MK193742). Presence of WDV was also confirmed by amplification of a 1,201-bp product in PCR using the primer pair WDV-F (5′-CTTACGGAGTAGAGATGTTC-3′) and WDV-R (5′-AACAGAGTGTAAGCAAGCCA-3′; Kvarnheden et al. 2002), annealing to the positions 1,877 to 1,896 and 328 to 309 in the circular ssDNA genome of WDV, respectively. In MUSCLE multiple sequence alignment with 300 other genomic sequences of WDV isolates, Ukrainian isolate Uk-Miron (GenBank accession no. FN806784) showed the highest percentage of identity (99.2%) with our isolate. In a maximum likelihood phylogenetic tree calculated using the GTR substitution model with 500 bootstrap replicates, the Estonian isolate grouped together with other isolates of wheat strain WDV/E, being again most closely related to Uk-Miron. WDV has been previously identified as an economically important pathogen in several countries in Europe, Asia, and North Africa. Apart from Estonia, WDV has been reported in Sweden (Kvarnheden et al. 2002) and Finland (Lemmetty and Huusela-Veistola 2005). According to our knowledge, WDV has not been previously found in the Baltic States or Russia. In this report, we identified the occurrence of WDV for the first time in Estonia, in a location close to Russian and Latvian border, suggesting a spread of WDV possibly also in these neighboring countries. Earlier, in 2013 to 2015, we used high-throughput sequencing to analyze the cereal samples collected from 47 fields mainly located in Southeastern Estonia but did not detect WDV. Therefore, it is either not distributed widely or there were unfavorable spread conditions for WDV during these years.",2019-07-01 +34282449,DMIL-IsoFun: predicting isoform function using deep multi-instance learning. ,"Alternative splicing creates the considerable proteomic diversity and complexity on relatively limited genome. Proteoforms translated from alternatively spliced isoforms of a gene actually execute the biological functions of this gene, which reflect the functional knowledge of genes at a finer granular level. Recently, some computational approaches have been proposed to differentiate isoform functions using sequence and expression data. However, their performance is far from being desirable, mainly due to the imbalance and lack of annotations at isoform-level, and the difficulty of modeling gene-isoform relations. We propose a deep multi-instance learning based framework (DMIL-IsoFun) to differentiate the functions of isoforms. DMIL-IsoFun firstly introduces a multi-instance learning convolution neural network trained with isoform sequences and gene-level annotations to extract the feature vectors and initialize the annotations of isoforms, and then uses a class-imbalance Graph Convolution Network to refine the annotations of individual isoforms based on the isoform co-expression network and extracted features. Extensive experimental results show that DMIL-IsoFun improves the Smin and Fmax of state-of-the-art solutions by at least 29.6% and 40.8%. The effectiveness of DMIL-IsoFun is further confirmed on a testbed of human multiple-isoform genes, and Maize isoforms related with photosynthesis. The code and data are available at http://www.sdu-idea.cn/codes.php?name=DMIL-Isofun. Supplementary data are available at Bioinformatics online.",2021-07-20 +34636837,ImmuCellAI-mouse: a tool for comprehensive prediction of mouse immune cell abundance and immune microenvironment depiction. ,"Immune cells are important components of the immune system and are crucial for disease initiation, progression, prognosis, and survival. Although several computational methods have been designed for predicting the abundance of immune cells, very few tools are applicable to mouse. Given that mouse is the most widely used animal model in biomedical research, there is an urgent need to develop a precise algorithm for predicting mouse immune cells. We developed a tool named ImmuCellAI-mouse (Immune Cell Abundance Identifier for mouse), for estimating the abundance of 36 immune cell (sub)types from gene expression data in a hierarchical strategy of three layers. Reference expression profile and robust marker gene sets of immune cell types were curated. The abundance of cells in three layers was predicted separately by calculating the ssGSEA enrichment score of the expression deviation profile per cell type. Benchmark results showed high accuracy of ImmuCellAI-mouse in predicting most immune cell types, with correlation coefficients between predicted value and real cell proportion of most cell types being larger than 0.8. We applied ImmuCellAI-mouse to a mouse breast tumor dataset and revealed the dynamic change of immune cell infiltration during treatment, which is consistent with the findings of the original study but with more details. We also constructed an online server for ImmuCellAI-mouse, on which users can upload expression matrices for analysis. ImmuCellAI-mouse will be a useful tool for studying the immune microenvironment, cancer immunology, and immunotherapy in mouse models, providing an indispensable supplement for human disease studies. Software is available at http://bioinfo.life.hust.edu.cn/ImmuCellAI-mouse/. Supplementary data are available at Bioinformatics online.",2021-10-12 +28708831,biochem4j: Integrated and extensible biochemical knowledge through graph databases.,"Biologists and biochemists have at their disposal a number of excellent, publicly available data resources such as UniProt, KEGG, and NCBI Taxonomy, which catalogue biological entities. Despite the usefulness of these resources, they remain fundamentally unconnected. While links may appear between entries across these databases, users are typically only able to follow such links by manual browsing or through specialised workflows. Although many of the resources provide web-service interfaces for computational access, performing federated queries across databases remains a non-trivial but essential activity in interdisciplinary systems and synthetic biology programmes. What is needed are integrated repositories to catalogue both biological entities and-crucially-the relationships between them. Such a resource should be extensible, such that newly discovered relationships-for example, those between novel, synthetic enzymes and non-natural products-can be added over time. With the introduction of graph databases, the barrier to the rapid generation, extension and querying of such a resource has been lowered considerably. With a particular focus on metabolic engineering as an illustrative application domain, biochem4j, freely available at http://biochem4j.org, is introduced to provide an integrated, queryable database that warehouses chemical, reaction, enzyme and taxonomic data from a range of reliable resources. The biochem4j framework establishes a starting point for the flexible integration and exploitation of an ever-wider range of biological data sources, from public databases to laboratory-specific experimental datasets, for the benefit of systems biologists, biosystems engineers and the wider community of molecular biologists and biological chemists.",2017-07-14 +34026967,Milk microfiltration process dataset annotated from a collection of scientific papers.,"Milk microfiltration process plays a key role in the dairy industry. Crossflow microfiltration of skimmed milk using a membrane with 0.1 µm mean pore size is widely used to fractionate the two main groups of dairy proteins: casein micelles (~150 nm) and serum proteins (~2-15 nm). Retentate, containing mainly casein micelles, is generally used to enrich vat milk for cheese making. Permeate, containing serum proteins, lactose and minerals, is usually ultrafiltered in order to produce protein-rich concentrate with a high nutritional value dedicated to specific populations such as infants and seniors. The great interest in these protein fractions explains the increasing number of microfiltration equipments in the dairy industry. This data article contains data associated with milk microfiltration process experiments and properties of the resulting dairy fractions annotated from a collection of scientific documents. These data are stored in INRAE public repository (see Data accessibility in the Specification Table for direct links to data). They have been structured using MILK MICROFILTRATION ontology and are replicated in @Web data warehouse providing additional querying tools (https://www6.inrae.fr/cati-icat-atweb/).",2021-04-17 +30447998,BioJupies: Automated Generation of Interactive Notebooks for RNA-Seq Data Analysis in the Cloud.,"BioJupies is a web application that enables the automated creation, storage, and deployment of Jupyter Notebooks containing RNA-seq data analyses. Through an intuitive interface, novice users can rapidly generate tailored reports to analyze and visualize their own raw sequencing files, gene expression tables, or fetch data from >9,000 published studies containing >300,000 preprocessed RNA-seq samples. Generated notebooks have the executable code of the entire pipeline, rich narrative text, interactive data visualizations, differential expression, and enrichment analyses. The notebooks are permanently stored in the cloud and made available online through a persistent URL. The notebooks are downloadable, customizable, and can run within a Docker container. By providing an intuitive user interface for notebook generation for RNA-seq data analysis, starting from the raw reads all the way to a complete interactive and reproducible report, BioJupies is a useful resource for experimental and computational biologists. BioJupies is freely available as a web-based application from http://biojupies.cloud.",2018-11-14 +35049081,Parsimony analysis of phylogenomic datasets (I): scripts and guidelines for using TNT (Tree Analysis using New Technology).,"We discuss here the use of TNT (Tree Analysis using New Technology) for phylogenomic analysis. For such data, parsimony is a useful alternative to model-based analyses, which frequently utilize models that make unrealistic assumptions (e.g. low heterotachy), struggle with high levels of missing data, etc. Parsimony and model-based methods often yield trees with few topological differences, which can then be analyzed further in order to investigate whether these few topological differences are due to undesirable analysis artefacts. This is facilitated by the greater speed and computational efficiency of parsimony, which allow for a more in-depth analysis of datasets. We here briefly describe the computationally most efficient and versatile parsimony software, TNT, which can be used for phylogenetic and phylogenomic analyses. In particular, we describe and provide a series of scripts that are specifically designed for the analysis of phylogenomic datasets. This includes scripts for concatenation of gene data files in different formats, generation of plots and datasets with different levels of gene/taxon occupancy, calculation of different support measures and phylogenetic reconstruction based on concatenated matrices and single genes. The execution of the scripts is also demonstrated with video clips (https://www.youtube.com/channel/UCpIgK8sVH-yK0Bo3fK62IxA). Lastly, we describe the main commands and functions that enable efficient phylogenomic analyses in TNT.",2021-07-14 +33978717,ProteoMill: Efficient network-based functional analysis portal for proteomics data. ,"Functional analysis has become a common approach to incorporate biological knowledge into the analysis of omics data, and to explore molecular events that govern a disease state. It is though only one step in a wider analytical pipeline that typically requires use of multiple individual analysis software. There is currently a need for a well-integrated omics analysis tool that performs all the steps. The ProteoMill portal is developed as an R Shiny application and integrates all necessary steps from data-upload, converting identifiers, to quality control, differential expression and network-based functional analysis into a single fast, interactive easy to use workflow. Further, it maintains annotation data sources up to date, overcoming a common problem with use of outdated information, and seamlessly integrates multiple R-packages for an improved user-experience. The functionality provided in this software can benefit researchers by facilitating the exploratory analysis of proteomics data. ProteoMill is available at https://proteomill.com.",2021-05-12 +33872484,"Korea National Health and Nutrition Examination Survey, 20th anniversary: accomplishments and future directions.","The Korea National Health and Nutrition Examination Survey (KNHANES) was initiated in 1998 to provide evidence for the development and evaluation of health policies and programs. The Korea Disease Control and Prevention Agency is responsible for the KNHANES and has conducted it as a series of surveys. Over the past 20 years, efforts to produce accurate, timely, and nationwide health statistics have been refined by establishing a continuous annual survey system with full-time field staff, incrementally expanding survey components, collaborating with relevant academic societies for quality control, and revising the survey methods. Additionally, the utility of the collected data was increased by linking the KNHANES data with related data from other government agencies or institutions and making the overall data publicly available on the official website of KNHANES (https://knhanes.kdca.go.kr). Additional long-term plans are being developed, including plans to continue producing nationwide health indicators and invigorating the utilization of the KNHANES data.",2021-04-19 +29351546,Accessing an Expanded Exposure Science Module at the Comparative Toxicogenomics Database.,"SUMMARY:The Comparative Toxicogenomics Database (CTD; http://ctdbase.org) is a free resource that provides manually curated information on chemical, gene, phenotype, and disease relationships to advance understanding of the effect of environmental exposures on human health. Four core content areas are independently curated: chemical-gene interactions, chemical-disease and gene-disease associations, chemical-phenotype interactions, and environmental exposure data (e.g., effects of chemical stressors on humans). Since releasing exposure data in 2015, we have vastly increased our coverage of chemicals and disease/phenotype outcomes; greatly expanded access to exposure content; added search capability by stressors, cohorts, population demographics, and measured outcomes; and created user-specified displays of content. These enhancements aim to facilitate human studies by allowing comparisons among experimental parameters and across studies involving specified chemicals, populations, or outcomes. Integration of data among CTD's four content areas and external data sets, such as Gene Ontology annotations and pathway information, links exposure data with over 1.8 million chemical-gene, chemical-disease and gene-disease interactions. Our analysis tools reveal direct and inferred relationships among the data and provide opportunities to generate predictive connections between environmental exposures and population-level health outcomes. https://doi.org/10.1289/EHP2873.",2018-01-18 +33306800,CEG 2.0: an updated database of clusters of essential genes including eukaryotic organisms. ,"Essential genes are key elements for organisms to maintain their living. Building databases that store essential genes in the form of homologous clusters, rather than storing them as a singleton, can provide more enlightening information such as the general essentiality of homologous genes in multiple organisms. In 2013, the first database to store prokaryotic essential genes in clusters, CEG (Clusters of Essential Genes), was constructed. Afterward, the amount of available data for essential genes increased by a factor >3 since the last revision. Herein, we updated CEG to version 2, including more prokaryotic essential genes (from 16 gene datasets to 29 gene datasets) and newly added eukaryotic essential genes (nine species), specifically the human essential genes of 12 cancer cell lines. For prokaryotes, information associated with drug targets, such as protein structure, ligand-protein interaction, virulence factor and matched drugs, is also provided. Finally, we provided the service of essential gene prediction for both prokaryotes and eukaryotes. We hope our updated database will benefit more researchers in drug targets and evolutionary genomics. Database URL: http://cefg.uestc.cn/ceg.",2020-12-01 +31788184,Novel insights into how the mean and heterogeneity of abiotic conditions together shape forb species richness patterns in the Allegheny plateau ecoregion.,"

Abstract

While plant community theory tends to emphasize the importance of abiotic heterogeneity along niche axes, much empirical work seeks to characterize the influence of the absolute magnitude of key abiotic variables on diversity. Both magnitude (as reflected, e.g., by a mean) and heterogeneity (variance) in abiotic conditions likely contribute to biodiversity patterns in plant communities, but given the large number of putative abiotic drivers and the fact that each may vary at different spatiotemporal scales, the challenge of linking observed biotic patterns with the underlying environment remains acute. Using monitoring data from a natural resource agency, we compared how well statistical models of the mean, heterogeneity, and both the mean and heterogeneity combined of 17 abiotic factor variables explained patterns of forb species richness in Northeast Ohio, USA. We performed our analyses at two spatial scales, repeated in spring and summer across four forest types. Although all models explained a great deal of the variance in species richness, models including both the mean and heterogeneity of different abiotic factors together outperformed models including either the mean or the heterogeneity of abiotic factors alone. Variability in forb species richness was mostly due to changes in mean calcium levels regardless of forest type. After accounting for forest type, we were able to attribute variation in forb species richness to changes in the heterogeneity of different abiotic factors as well. Our results suggest that multiple mechanisms act simultaneously according to different aspects of the abiotic environment to structure forb communities, and this underscores the importance of considering both the magnitude of and heterogeneity in multiple abiotic factors when looking for links between the abiotic environment and plant community patterns. Finally, we identify novel patterns across spatial scales, forest types, and seasons that can guide future research in this vein.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.kp3cb17.",2019-09-30 +32627830,Sequence Compression Benchmark (SCB) database-A comprehensive evaluation of reference-free compressors for FASTA-formatted sequences. ,"Nearly all molecular sequence databases currently use gzip for data compression. Ongoing rapid accumulation of stored data calls for a more efficient compression tool. Although numerous compressors exist, both specialized and general-purpose, choosing one of them was difficult because no comprehensive analysis of their comparative advantages for sequence compression was available. We systematically benchmarked 430 settings of 48 compressors (including 29 specialized sequence compressors and 19 general-purpose compressors) on representative FASTA-formatted datasets of DNA, RNA, and protein sequences. Each compressor was evaluated on 17 performance measures, including compression strength, as well as time and memory required for compression and decompression. We used 27 test datasets including individual genomes of various sizes, DNA and RNA datasets, and standard protein datasets. We summarized the results as the Sequence Compression Benchmark database (SCB database, http://kirr.dyndns.org/sequence-compression-benchmark/), which allows custom visualizations to be built for selected subsets of benchmark results. We found that modern compressors offer a large improvement in compactness and speed compared to gzip. Our benchmark allows compressors and their settings to be compared using a variety of performance measures, offering the opportunity to select the optimal compressor on the basis of the data type and usage scenario specific to a particular application.",2020-07-01 +34626303,Development of a proteochemometric-based support vector machine model for predicting bioactive molecules of tubulin receptors.,"Microtubules are receiving enormous interest in drug discovery due to the important roles they play in cellular functions. Targeting tubulin polymerization presents an excellent opportunity for the development of anti-tubulin drugs. Drug resistance and high toxicity of currently used tubulin-binding agents have necessitated the pursuit of novel drug candidates with increased therapeutic potency. The design of novel drug candidates can be achieved using efficient computational techniques to support existing efforts. Proteochemometric (PCM) modeling is a computational technique that can be employed to elucidate the bioactivity relations between related targets and multiple ligands. We have developed a PCM-based Support Vector Machine (SVM) approach for predicting the bioactivity between tubulin receptors and small, drug-like molecules. The bioactivity datasets used for training the SVM algorithm were obtained from the Binding DB database. The SVM-based PCM model yielded a good overall predictive performance with an area under the curve (AUC) of 87%, Matthews correlation coefficient (MCC) of 72%, overall accuracy of 93%, and a classification error of 7%. The algorithm allows the prediction of the likelihood of new interactions based on confidence scores between the query datasets, comprising ligands in SMILES format and protein sequences of tubulin targets. The algorithm has been implemented as a web server known as TubPred, accessible via http://35.167.90.225:5000/ .",2021-10-09 +34115950,SC1: A Tool for Interactive Web-Based Single-Cell RNA-Seq Data Analysis.,"Single-cell RNA-Seq (scRNA-Seq) is critical for studying cellular function and phenotypic heterogeneity as well as the development of tissues and tumors. In this study, we present SC1 a web-based highly interactive scRNA-Seq data analysis tool publicly accessible at https://sc1.engr.uconn.edu. The tool presents an integrated workflow for scRNA-Seq analysis, implements a novel method of selecting informative genes based on term-frequency inverse-document-frequency scores, and provides a broad range of methods for clustering, differential expression analysis, gene enrichment, interactive visualization, and cell cycle analysis. The tool integrates other single-cell omics data modalities such as T-cell receptor (TCR)-Seq and supports several single-cell sequencing technologies. In just a few steps, researchers can generate a comprehensive analysis and gain powerful insights from their scRNA-Seq data.",2021-06-11 +33045361,tRFTar: Prediction of tRF-target gene interactions via systemic re-analysis of Argonaute CLIP-seq datasets.,"tRNA-derived fragments (tRFs), which by definition are cleaved from tRNAs, comprise a novel class of regulatory small non-coding RNAs. Recent evidence has revealed that tRFs can be loaded onto Argonaute (AGO) family proteins to perform post-transcriptional regulations via substantial tRF-target gene interactions (TGIs). However, there is no resource that systematically profiles potential AGO-mediated TGIs. To this end, we performed a systemic computational screening of potential AGO-mediated TGIs by a re-analysis of 146 crosslinking-immunoprecipitation and high-throughput sequencing (CLIP-seq) datasets in which 920,690 TGIs between 12,102 tRFs and 5,688 target genes were identified. The predicted TGIs have superior signal-to-noise ratio and good consistency with TGIs identified from an orthogonal technique. AGO-bound tRFs are not evenly distributed, where the 5'-tRF and 3'-tRF are enriched and some commonly expressed tRFs are also overrepresented. The tRFs tend to target conserved regions of transcripts and co-express with their target genes. Filtering TGIs with consistent co-expression with target genes results in a set of regulatory TGIs that contains 25,281 tRF-target pairs. Together, our results unveiled the extensive regulatory interactions between tRFs and target genes. Finally, the CLIP-derived TGIs were incorporated in a user-friendly online platform termed as tRFTar, where various functions like custom searching, co-expressed TGI filtering, genome browser and TGI-based tRF functional enrichment analysis are enabled to help users to investigate the functions of tRFs. The tRFTar is freely available at http://www.rnanut.net/tRFTar/.",2020-10-09 +33776956,AMRmap: An Interactive Web Platform for Analysis of Antimicrobial Resistance Surveillance Data in Russia.,"Surveillance of antimicrobial resistance (AMR) is crucial for identifying trends in resistance and developing strategies for prevention and treatment of infections. Globally, AMR surveillance systems differ in terms of organizational principles, comprehensiveness, accessibility, and usability of data presentation. Until recently, the data on AMR in Russia were scarcely available, especially to international community, despite the fact that the large prospective multicenter surveillance in Russia was conducted and data were accumulated for over 20 years. We describe the source of data, structure, and functionality of a new-generation web platform, called AMRmap (https://amrmap.net/), for analysis of AMR surveillance data in Russia. The developed platform currently comprises susceptibility data of >40,000 clinical isolates, and the data on abundance of key resistance determinants, including acquired carbapenemases in gram-negatives, are updated annually with information on >5,000 new isolates. The AMRmap allows smart data filtration by multiple parameters and provides interactive data analysis and visualization tools: MIC and S/I/R distribution plots, time-trends and regression plots, associated resistance plots, prevalence maps, statistical significance graphs, and tables.",2021-03-12 +34958962,Polar Gini Curve: A Technique to Discover Gene Expression Spatial Patterns from Single-cell RNA-seq Data.,"In this work, we describe the development of Polar Gini Curve, a method for characterizing cluster markers by analyzing single-cell RNA sequencing (scRNA-seq) data. Polar Gini Curve combines the gene expression and the 2D coordinates (""spatial"") information to detect patterns of uniformity in any clustered cells from scRNA-seq data. We demonstrate that Polar Gini Curve can help users characterize the shape and density distribution of cells in a particular cluster, which can be generated during routine scRNA-seq data analysis. To quantify the extent to which a gene is uniformly distributed in a cell cluster space, we combine two polar Gini curves (PGCs)-one drawn upon the cell-points expressing the gene (the ""foreground curve"") and the other drawn upon all cell-points in the cluster (the ""background curve""). We show that genes with highly dissimilar foreground and background curves tend not to uniformly distributed in the cell cluster-thus having spatially divergent gene expression patterns within the cluster. Genes with similar foreground and background curves tend to uniformly distributed in the cell cluster-thus having uniform gene expression patterns within the cluster. Such quantitative attributes of PGCs can be applied to sensitively discover biomarkers across clusters from scRNA-seq data. We demonstrate the performance of the Polar Gini Curve framework in several simulation case studies. Using this framework to analyze a real-world neonatal mouse heart cell dataset, the detected biomarkers may characterize novel subtypes of cardiac muscle cells. The source code and data for Polar Gini Curve could be found at http://discovery.informatics.uab.edu/PGC/ or https://figshare.com/projects/Polar_Gini_Curve/76749.",2021-06-01 +29555687,Proteomic Analysis of the Cell Cycle of Procylic Form Trypanosoma brucei.,"We describe a single-step centrifugal elutriation method to produce synchronous Gap1 (G1)-phase procyclic trypanosomes at a scale amenable for proteomic analysis of the cell cycle. Using ten-plex tandem mass tag (TMT) labeling and mass spectrometry (MS)-based proteomics technology, the expression levels of 5325 proteins were quantified across the cell cycle in this parasite. Of these, 384 proteins were classified as cell-cycle regulated and subdivided into nine clusters with distinct temporal regulation. These groups included many known cell cycle regulators in trypanosomes, which validates the approach. In addition, we identify 40 novel cell cycle regulated proteins that are essential for trypanosome survival and thus represent potential future drug targets for the prevention of trypanosomiasis. Through cross-comparison to the TrypTag endogenous tagging microscopy database, we were able to validate the cell-cycle regulated patterns of expression for many of the proteins of unknown function detected in our proteomic analysis. A convenient interface to access and interrogate these data is also presented, providing a useful resource for the scientific community. Data are available via ProteomeXchange with identifier PXD008741 (https://www.ebi.ac.uk/pride/archive/).",2018-03-19 +34753535,Evaluation of the ocular safety associated with the exhalation delivery system with fluticasone.,"Background: Intranasal corticosteroids (INCS) are the cornerstone of treatment for chronic rhinosinusitis. Although INCS are generally considered safe and effective, there is a concern that chronic use may lead to ocular adverse effects. Objective: To assess ocular safety of the exhalation delivery system with fluticasone propionate (EDS-FLU) in patients with chronic rhinosinusitis with nasal polyps. Methods: Ocular safety data were collected during two randomized, double-blind, placebo controlled studies with open-label extensions. Ophthalmologists performed tonometry, slit-lamp, and visual acuity examinations to assess intraocular pressure (IOP) and the presence of cataracts. Ocular examinations were conducted before double-blind treatment, at the end of the 16-week double-blind phase, and at the end of the 8-week open-label phase. The results of pooled data from patients who received EDS-FLU 186 µg (n = 160), EDS-FLU 372 µg (n = 161), and EDS-placebo (n = 161) twice daily are reported here. Results: At the end of the double-blind phase, six patients developed elevated average IOP > 21 mm Hg: two patients (1.2%) in the EDS-placebo group, three patients (1.9%) in the EDS-FLU 186 µg group, and one patient (0.6%) in the EDS-FLU 372 µg group. In addition, 6 of 482 patients developed cataracts: 3 patients in the EDS-placebo group, 2 patients in the EDS-FLU 186 µg group, and 1 patient in the EDS-FLU 372 µg group. At the end of the open-label phase, two additional patients showed IOP > 21 mm Hg and two additional patients developed cataracts. Conclusion: No increased risk of elevated IOP was detected with EDS-FLU; the rate of cataract development was similar to EDS-placebo and to that reported with other INCS.Clinical trials NCT01622569 and NCT01624662, www.clinicaltrials.gov.",2021-11-09 +,331 Efficient quality control methods for genomic and pedigree data used in routine genomic evaluation,"Abstract Quality control and consistency tests on genotypes and historical pedigree data are applied in a routine genomic evaluation and academic research. The quality control takes more time to finish as more genotypes become available, and this step is a bottleneck in a pipeline of routine evaluation. For the efficient quality control, we have developed several algorithms and a computer program to support for large-scale, biallelic, single nucleotide polymorphisms (SNPs). The program is designed to detect unsatisfactory genomic markers and individuals in terms of call rate, marker allele frequencies, duplicate samples, and Mendelian inconsistency in the large genomic data with the pedigree including millions of individuals. Duplicated genotypes can be detected using a set of markers. An SNP genotype is packed into a 2-bit representation in memory that enables bitwise operations with parallel computing to efficiently perform the quality control. The software optionally checks the inconsistency of pedigree information. We compared QCF90 with preGSf90, a preceding program, in terms of memory usage and computing time using a data set including 200,000 genotyped individuals, 50,000 SNP markers per individual, and 216,500 pedigree individuals. In total running time, QCF90 was approximately 6 times faster than PREGSF90 (307 s vs 2075 s) while the memory usage was 30 times less (2 GB vs 75 GB) using only 1 thread. The QCF90 program performed better in speed as more threads were used. A check for genomic duplications took 159 s with 16 threads when 5,000 genotypes were compared with 200,000 genotypes using 2500 SNP markers. The new tool is useful in the routine genomic evaluation and the academic research in which both the genotypes and the pedigree information are used. The QCF90 executable is available at http://nce.ads.uga.edu with a user manual.",2019-12-01 +31181401,A Multidimensional Characterization of E3 Ubiquitin Ligase and Substrate Interaction Network.,"E3 ubiquitin ligases (E3s) play a critical role in molecular and cellular mechanisms. However, a large number of E3-substrate interactions (ESIs) remain unrevealed. Here, we integrated the increasing omics data with biological knowledge to characterize and identify ESIs. Multidimensional features were computed to obtain the association patterns of ESIs, and an ensemble prediction model was constructed to identify ESIs. Comparison with non-ESI cases revealed the specific association patterns of ESIs, which provided meaningful insights into ESI interpretation. Reliability of the prediction model was confirmed from various perspectives. Notably, our evaluations on leucine-rich repeat family of F box (FBXL) family were consistent with a proteomic study, and several substrates for SKP2 and an orphan E3 FBXL6 were experimentally verified. Moreover, a cancer hallmark ESI landscape was studied. Taken together, our study catches a glimpse at the omics-driven ESI association patterns and provides a valuable resource (http://www.esinet.dicp.ac.cn/home.php) to assist ubiquitination research.",2019-05-27 +34622437,Transcatheter aortic valve implantation results are not superimposable to surgery in patients with aortic stenosis at low surgical risk. ,"The aim of this meta-analysis was to compare the impact of transcatheter aortic valve implantation (TAVI) vs. surgical aortic valve replacement (SAVR) in patients with severe aortic valve stenosis at low surgical risk. All randomized controlled trials (RCTs) and observational studies (Obs) published from January 2014 until March 31st, 2020 were retrieved through the PubMed computerized database and at the site https://www.clinicaltrials.com. The relative risk (RR) with the 95% confidence interval (CI) was used to evaluate the effect of the intervention under comparison. The primary endpoints were all-cause 30-day mortality and 1-year mortality. The 30-day safety endpoints were: stroke, acute kidney injury stage 2 or 3, major bleeding, moderate/severe paravalvular leak, need for new permanent pacemaker (PM) implantation. After detailed review 9 studies, related to 4 RCTs and 5 Obs, were selected. The overall analysis of RCTs plus Obs showed a significantly lower 30-day mortality for TAVI (RR = 0.55; 95% CI 0.45-0.68, p < 0.00001; I² = 0%). However, an increased risk of new PM implantation (RR = 2.87; 95% CI 2.01-3.67, p < 0.00001, I² = 0%) and of paravalvular leak (RR = 7.28; 95% CI 3.83-13.81, p < 0.00001, I² = 0%) was observed in TAVI compared to SAVR. On the contrary, a lower incidence of major bleeding (RR = 0.38; 95% CI 0.27-0.54, p < 0.00001, I² = 0%) and of acute kidney injury was observed (RR = 0.33; 95% CI 0.19-0.56, p < 0.0001, I² = 0%) in TAVI. TAVI and SVAR in the treatment of AS in the patients at low surgical risk are not superimposable. In particular, if 30-day and 1-year mortality, major bleeding and acute kidney injury were significantly lower for TAVI, the need of new PM implantation and paravalvular leak were significantly lower in SAVR. Consequently, we suggest the need of more trials to evaluate the effectiveness of TAVI as routine therapeutic procedure in the treatment of patients with low surgical risk AS.",2021-10-08 +32415965,Palantir: a springboard for the analysis of secondary metabolite gene clusters in large-scale genome mining projects.,"

Summary

To support small and large-scale genome mining projects, we present Post-processing Analysis tooLbox for ANTIsmash Reports (Palantir), a dedicated software suite for handling and refining secondary metabolite biosynthetic gene cluster (BGC) data annotated with the popular antiSMASH pipeline. Palantir provides new functionalities building on NRPS/PKS predictions from antiSMASH, such as improved BGC annotation, module delineation and easy access to sub-sequences at different levels (cluster, gene, module and domain). Moreover, it can parse user-provided antiSMASH reports and reformat them for direct use or storage in a relational database.

Availability and implementation

Palantir is released both as a Perl API available on CPAN (https://metacpan.org/release/Bio-Palantir) and as a web application (http://palantir.uliege.be). As a practical use case, the web interface also features a database built from the mining of 1616 cyanobacterial genomes, of which 1488 were predicted to encode at least one BGC.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +29069501,jMorp: Japanese Multi Omics Reference Panel.,"We developed jMorp, a new database containing metabolome and proteome data for plasma obtained from >5000 healthy Japanese volunteers from the Tohoku Medical Megabank Cohort Study, which is available at https://jmorp.megabank.tohoku.ac.jp. Metabolome data were measured by proton nuclear magnetic resonance (NMR) and liquid chromatography-mass spectrometry (LC-MS), while proteome data were obtained by nanoLC-MS. We released the concentration distributions of 37 metabolites identified by NMR, distributions of peak intensities of 257 characterized metabolites by LC-MS, and observed frequencies of 256 abundant proteins. Additionally, correlation networks for the metabolites can be observed using an interactive network viewer. Compared with some existing databases, jMorp has some unique features: (i) Metabolome data were obtained using a single protocol in a single institute, ensuring that measurement biases were significantly minimized; (ii) The database contains large-scale data for healthy volunteers with various health records and genome data and (iii) Correlations between metabolites can be easily observed using the graphical viewer. Metabolites data are becoming important intermediate markers for evaluating the health states of humans, and thus jMorp is an outstanding resource for a wide range of researchers, particularly those in the fields of medical science, applied molecular biology, and biochemistry.",2018-01-01 +35111740,A Novel Model Based on Necroptosis-Related Genes for Predicting Prognosis of Patients With Prostate Adenocarcinoma.,"Background: Necroptosis is a newly recognized form of cell death. Here, we applied bioinformatics tools to identify necroptosis-related genes using a dataset from The Cancer Genome Atlas (TCGA) database, then constructed a model for prognosis of patients with prostate cancer. Methods: RNA sequence (RNA-seq) data and clinical information for Prostate adenocarcinoma (PRAD) patients were obtained from the TCGA portal (http://tcga-data.nci.nih.gov/tcga/). We performed comprehensive bioinformatics analyses to identify hub genes as potential prognostic biomarkers in PRAD u followed by establishment and validation of a prognostic model. Next, we assessed the overall prediction performance of the model using receiver operating characteristic (ROC) curves and the area under curve (AUC) of the ROC. Results: A total of 5 necroptosis-related genes, namely ALOX15, BCL2, IFNA1, PYGL and TLR3, were used to construct a survival prognostic model. The model exhibited excellent performance in the TCGA cohort and validation group and had good prediction accuracy in screening out high-risk prostate cancer patients. Conclusion: We successfully identified necroptosis-related genes and constructed a prognostic model that can accurately predict 1- 3-and 5-years overall survival (OS) rates of PRAD patients. Our riskscore model has provided novel strategy for the prediction of PRAD patients' prognosis.",2021-01-01 +33910445,Development and Validation of a RNA Binding Protein-Associated Prognostic Model for Hepatocellular Carcinoma.,"

Background

Dysregulation of RNA binding proteins (RBPs) has been identified in multiple malignant tumors correlated with tumor progression and occurrence. However, the function of RBPs is not well understood in hepatocellular carcinoma (HCC).

Methods

The RNA sequence data of HCC was extracted out of the Cancer Genome Atlas (TCGA) database and different RBPs were calculated between regular and cancerous tissue. The study explored the expression and predictive value of the RBPs systemically with a series of bioinformatic analyzes.

Results

A total of 330 RBPs, including 208 up-regulated and 122 down-regulated RBPs, were classified differently. Four RBPs (MRPL54, EZH2, PPARGC1A, EIF2AK4) were defined as the forecast related hub gene and used to construct a model for prediction. Further study showed that the high-risk subgroup is poor survived (OS) compared to the model-based low-risk subgroup. The area of the prognostic model under the time-dependent receiver operator characteristic (ROC) curve is 0.814 in TCGA training group and 0.729 in validation group, indicating a strong prognostic model. We also created a predictive nomogram and a web-based calculator (https://dxyjiang.shinyapps.io/RBPpredict/) based on the 4 RBPs and internal validation in the TCGA cohort, which displayed a beneficial predictive ability for HCC.

Conclusions

Our results provide new insights into HCC pathogenesis. The 4-RBP gene signature showed a reliable HCC prediction ability with possible applications in therapeutic decision making and personalized therapy.",2021-01-01 +33211888,CSEA-DB: an omnibus for human complex trait and cell type associations.,"During the past decade, genome-wide association studies (GWAS) have identified many genetic variants with susceptibility to several thousands of complex diseases or traits. The genetic regulation of gene expression is highly tissue-specific and cell type-specific. Recently, single-cell technology has paved the way to dissect cellular heterogeneity in human tissues. Here, we present a reference database for GWAS trait-associated cell type-specificity, named Cell type-Specific Enrichment Analysis DataBase (CSEA-DB, available at https://bioinfo.uth.edu/CSEADB/). Specifically, we curated total of 5120 GWAS summary statistics data for a wide range of human traits and diseases followed by rigorous quality control. We further collected >900 000 cells from the leading consortia such as Human Cell Landscape, Human Cell Atlas, and extensive literature mining, including 752 tissue cell types from 71 adult and fetal tissues across 11 human organ systems. The tissues and cell types were annotated with Uberon and Cell Ontology. By applying our deTS algorithm, we conducted 10 250 480 times of trait-cell type associations, reporting a total of 598 (11.68%) GWAS traits with at least one significantly associated cell type. In summary, CSEA-DB could serve as a repository of association map for human complex traits and their underlying cell types, manually curated GWAS, and single-cell transcriptome resources.",2021-01-01 +33272170,"Nitrite and Nitrate Levels in Groundwater, Water Distribution Network, Bottled Water and Juices in Iran: A Systematic Review.","

Background

Nitrate and nitrite can get into the body through the consumption of contaminated water either directly or indirectly. The accumulation of these compounds in the body, in the long run, leads to health problems, for example, digestive disorders, cancers, and even death threats in children. The aim of this review was to investigate nitrate and nitrite pollution levels in drinking water and fruit juices in Iran.

Methods

In this review, data were collected through searching the Scientific Information Database, Science-Direct, Scopus, PubMed, Google Scholar, and Magiran databases using the keywords Nitrate, Nitrite, Drinking water, Drinking Water Resources, Juice and Iran. Finally, the location of the studies was geocoded through the Google My Maps (https://www.google.com/mymaps) software.

Results

Studies clearly indicated that the juices are safe in terms of nitrate. Nitrate and nitrite values were less than the national and international standards in all samples of bottled drinking water except for a few of the studies. The results of the reviewed studies also indicated that the nitrate content was higher than that written on the label in 96% of the samples, and nitrite was not labeled in 80% of them. The nitrate quantity was higher than the permissible limit in the water distribution network of Bushehr, Gilan and Mazandaran Provinces. Talesh, Ardabil, Hashtgerd, Divandareh, and Kerman cities had high nitrate levels in more than 50% of wells.

Conclusion

Using nitrogen fertilizers and the lack of a wastewater treatment system were the main reasons for the presence of nitrate and nitrite.",2021-01-01 +33151284,DBAASP v3: database of antimicrobial/cytotoxic activity and structure of peptides as a resource for development of new therapeutics.,"The Database of Antimicrobial Activity and Structure of Peptides (DBAASP) is an open-access, comprehensive database containing information on amino acid sequences, chemical modifications, 3D structures, bioactivities and toxicities of peptides that possess antimicrobial properties. DBAASP is updated continuously, and at present, version 3.0 (DBAASP v3) contains >15 700 entries (8000 more than the previous version), including >14 500 monomers and nearly 400 homo- and hetero-multimers. Of the monomeric antimicrobial peptides (AMPs), >12 000 are synthetic, about 2700 are ribosomally synthesized, and about 170 are non-ribosomally synthesized. Approximately 3/4 of the entries were added after the initial release of the database in 2014 reflecting the recent sharp increase in interest in AMPs. Despite the increased interest, adoption of peptide antimicrobials in clinical practice is still limited as a consequence of several factors including side effects, problems with bioavailability and high production costs. To assist in developing and optimizing de novo peptides with desired biological activities, DBAASP offers several tools including a sophisticated multifactor analysis of relevant physicochemical properties. Furthermore, DBAASP has implemented a structure modelling pipeline that automates the setup, execution and upload of molecular dynamics (MD) simulations of database peptides. At present, >3200 peptides have been populated with MD trajectories and related analyses that are both viewable within the web browser and available for download. More than 400 DBAASP entries also have links to experimentally determined structures in the Protein Data Bank. DBAASP v3 is freely accessible at http://dbaasp.org.",2021-01-01 +35036172,ReGSP: a visualized application for homology-based gene searching and plotting using multiple reference sequences.,"The massively parallel nature of next-generation sequencing technologies has contributed to the generation of massive sequence data in the last two decades. Deciphering the meaning of each generated sequence requires multiple analysis tools, at all stages of analysis, from the reads stage all the way up to the whole-genome level. Homology-based approaches based on related reference sequences are usually the preferred option for gene and transcript prediction in newly sequenced genomes, resulting in the popularity of a variety of BLAST and BLAST-based tools. For organelle genomes, a single-reference-based gene finding tool that uses grouping parameters for BLAST results has been implemented in the Genome Search Plotter (GSP). However, this tool does not accept multiple and user-customized reference sequences required for a broad homology search. Here, we present multiple Reference-based Gene Search and Plot (ReGSP), a simple and convenient web tool that accepts multiple reference sequences for homology-based gene search. The tool incorporates cPlot, a novel dot plot tool, for illustrating nucleotide sequence similarity between the query and the reference sequences. ReGSP has an easy-to-use web interface and is freely accessible at https://ds.mju.ac.kr/regsp.",2021-12-23 +34278324,University of Hawai'i Cancer Center Connection: Pacific Tracker (PacTrac) Version 3.1 Diet and Physical Activity Assessment Tool for the Pacific Region.,"The Pacific Tracker (PacTrac) is a web-based diet and physical activity assessment program created to analyze dietary recall or dietary record data from the Pacific region. Version 3.1 modifications make the tool available for public use (under check it out) to enter, analyze, view and print out data; and for research use, for saving and downloading of multiple entries in a research mode. PacTrac 3.1 (https://nappactrac31.ctahr.hawaii.edu/default.htm) is managed through the Children's Healthy Living Center of Excellence (CHL Center) at the College of Tropical Agriculture and Human Resources at the University of Hawai'i, in collaboration with the University of Hawai'i Cancer Center.",2021-07-01 +,A comprehensive phylogeny of flat bark beetles (Coleoptera: Cucujidae) with a revised classification and a new South American genus,"The phylogenetic relationships within the beetle family Cucujidae were investigated for the first time. Fifty‐seven morphological characters were used for maximum parsimony and Bayesian analyses. Mitochondrial genomes were generated from museum specimens through genome skimming and used for phylogenetic analyses in a maximum likelihood framework due to the difficulty in collecting many of the Cucujidae species and the lack of availability of ethanol‐preserved specimens. Both morphological and molecular analyses supported the monophyly of the four established genera: Cucujus Fabricius, Pediacus Shuckard, Palaestes Perty, and Platisus Erichson. They also led to the description of a new genus, Thesaurus gen.n., from South America, and the recognition of two new subfamilies, Pediacinae subfam.n. and Platisinae subfam.n. Diagnoses of Cucujidae subfamilies and genera are provided along with illustrations and keys for their identification. Phylogenetic relationships between the genera were analysed and discussed based on morphological and molecular data. Three new species were described in Thesaurus gen.n.: T. albertalleni gen. et sp.n. from Venezuela, T. zaitsevi sp.n. from Peru, and T. macclarini sp.n. from Ecuador. Illustrations and a key to species are provided. The previously unknown larvae of Palaestes and Thesaurus are illustrated and described for the first time. The Eocene fossil species, Platisus punctatus (Ramírez, Corsolini & Di Iorio), from Patagonia, Argentina, is here transferred to the genus Thesaurus. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:BF6C58C0‐C390‐4C32‐9243‐2483B6FBE369.",2020-04-01 +34627983,"Traditional uses, phytochemistry, pharmacology, toxicity and quality control of medicinal genus Aralia: A review.","

Ethnopharmacological relevance

Aralia, which belongs to Araliaceae family, is mainly distributed in Asia, such as China, Japan and South Korea. It has a long medicinal history and is widely used in the treatment of various diseases, such as hepatitis, rheumatoid arthritis, bruises, lumps and carbuncles.

Aim of the study

The purpose of this review is to systematically evaluate the traditional uses, phytochemistry, pharmacology, toxicity and quality control of main medicinal plants of Aralia, discusses the application of ethnic medicine, modern scientific research and the relationship between them, and put forward some suggestions to promote the further development and utilization of Aralia.

Materials and methods

The relevant information on Aralia was collected through electronic databases (PubMed, Web of Science, Science Direct, Springer, CNKI and Wanfang), Chinese herbal classics, Ph.D. and M.Sc. dissertations, Chinese Pharmacopoeia. Plant names were verified by ""The Plant List"" (http://www.theplantlist.org). The literature cited in this review can be traced back to 1878 to 2021.

Results

More than 290 chemical constituents have been isolated from the genus Aralia, including triterpenoid saponins, terpenoids, organic acids, flavonoids, polyacetylenes, phenylpropanoids and other constituents. Pharmacological studies have shown that the extracts and compounds of Aralia have a wide range of pharmacological activities, including anti-inflammation, analgesic, anti-tumor, liver protection, protection of cardiovascular and nervous system, regulating substance metabolism, antibacterial, antiviral and antioxidation.

Conclusions

The genus Aralia is not only an excellent traditional herbal medicine, but also a source of bioactive molecules with good application prospects. However, the structure-activity relationship, in vivo activity and action mechanism of its bioactive components need to be further studied. In addition, more toxicological and quality control studies are essential to evaluate the efficacy and safety of Aralia as medicine.",2021-10-07 +34353187,Living well with dementia: The role volunteer-based social recreational programs in promoting social connectedness of people with dementia and their caregivers.,"Objectives: This evaluation study was designed to examine the factors that contribute the promotion of social connectedness among people with dementia and their caregivers through social recreational programs; develop an understanding of volunteer's impact on program success; and identify the barriers and facilitators to improve the volunteer-based programs to promote social connectedness.Method: A qualitative descriptive research design was used to explore the study participants' lived experiences of social recreational programs from Alzheimer's Society of Durham Region (ASDR) in Ontario, Canada. A final sample of 31 participants was recruited including people with dementia, informal caregivers, and community volunteers. Qualitative data was collected through face-to-face semi-structured interviews. Emerging themes were derived from the qualitative descriptive data using thematic analysis.Results: The qualitative interviews highlighted the impact of social recreational programs on people with dementia, caregivers and volunteers in the promotion of social connectedness, as well as the examination of barriers and facilitators to identify opportunities for the future improvement of ASDR programs that would benefit the dementia populations. The study findings revealed that the project 'Living Well with Dementia' has been able to successfully foster social connectedness through its volunteer-led social recreational programs by promoting the physical and mental well-being of people with dementia and their caregivers.Conclusion: Our study findings underscored the critical roles of volunteers who contributed to the success of community-based programs. Future research is needed to identify the opportunities to address current gaps in services and to strengthen the social recreational programs using evidence-based practices and client-centered approaches.Supplemental data for this article can be accessed online at http://doi.org/10.1080/13607863.2021.1950614.",2021-08-06 +33579334,Gene function finding through cross-organism ensemble learning.,"

Background

Structured biological information about genes and proteins is a valuable resource to improve discovery and understanding of complex biological processes via machine learning algorithms. Gene Ontology (GO) controlled annotations describe, in a structured form, features and functions of genes and proteins of many organisms. However, such valuable annotations are not always reliable and sometimes are incomplete, especially for rarely studied organisms. Here, we present GeFF (Gene Function Finder), a novel cross-organism ensemble learning method able to reliably predict new GO annotations of a target organism from GO annotations of another source organism evolutionarily related and better studied.

Results

Using a supervised method, GeFF predicts unknown annotations from random perturbations of existing annotations. The perturbation consists in randomly deleting a fraction of known annotations in order to produce a reduced annotation set. The key idea is to train a supervised machine learning algorithm with the reduced annotation set to predict, namely to rebuild, the original annotations. The resulting prediction model, in addition to accurately rebuilding the original known annotations for an organism from their perturbed version, also effectively predicts new unknown annotations for the organism. Moreover, the prediction model is also able to discover new unknown annotations in different target organisms without retraining.We combined our novel method with different ensemble learning approaches and compared them to each other and to an equivalent single model technique. We tested the method with five different organisms using their GO annotations: Homo sapiens, Mus musculus, Bos taurus, Gallus gallus and Dictyostelium discoideum. The outcomes demonstrate the effectiveness of the cross-organism ensemble approach, which can be customized with a trade-off between the desired number of predicted new annotations and their precision.A Web application to browse both input annotations used and predicted ones, choosing the ensemble prediction method to use, is publicly available at http://tiny.cc/geff/ .

Conclusions

Our novel cross-organism ensemble learning method provides reliable predicted novel gene annotations, i.e., functions, ranked according to an associated likelihood value. They are very valuable both to speed the annotation curation, focusing it on the prioritized new annotations predicted, and to complement known annotations available.",2021-02-12 +34726960,Lipopolysaccharides derived from gram-negative bacterial pool of human gut microbiota promote inflammation and obesity development.,"Lipopolysaccharide (LPS) is the major component of the outer membrane of Gram-negative bacteria. It is found from intestinal microbes in the circulatory system and considered a trigger factor for low-grade inflammation in obesity. High-fat diet intake and its related obesity can cause gut microbiota disorder, leading to increased gut permeability, paracellular absorption and transcellular transport of endogenous endotoxin in the cardiovascular system. High-fat diet intake can also increase plasma LPS levels, and causing chronic or ""low-grade"" inflammation. In this review article, we summarize the recent research advancements on the mechanism of low-grade inflammation and its related obesity. We also propose several approaches that can be used to reduce endogenous endotoxin absorption.Supplemental data for this article is available online at https://doi.org/10.1080/08830185.2021.1996573 .",2021-11-02 +34000199,A Deep Learning Enhanced Novel Software Tool for Laryngeal Dynamics Analysis.,"Purpose High-speed videoendoscopy (HSV) is an emerging, but barely used, endoscopy technique in the clinic to assess and diagnose voice disorders because of the lack of dedicated software to analyze the data. HSV allows to quantify the vocal fold oscillations by segmenting the glottal area. This challenging task has been tackled by various studies; however, the proposed approaches are mostly limited and not suitable for daily clinical routine. Method We developed a user-friendly software in C# that allows the editing, motion correction, segmentation, and quantitative analysis of HSV data. We further provide pretrained deep neural networks for fully automatic glottis segmentation. Results We freely provide our software Glottis Analysis Tools (GAT). Using GAT, we provide a general threshold-based region growing platform that enables the user to analyze data from various sources, such as in vivo recordings, ex vivo recordings, and high-speed footage of artificial vocal folds. Additionally, especially for in vivo recordings, we provide three robust neural networks at various speed and quality settings to allow a fully automatic glottis segmentation needed for application by untrained personnel. GAT further evaluates video and audio data in parallel and is able to extract various features from the video data, among others the glottal area waveform, that is, the changing glottal area over time. In total, GAT provides 79 unique quantitative analysis parameters for video- and audio-based signals. Many of these parameters have already been shown to reflect voice disorders, highlighting the clinical importance and usefulness of the GAT software. Conclusion GAT is a unique tool to process HSV and audio data to determine quantitative, clinically relevant parameters for research, diagnosis, and treatment of laryngeal disorders. Supplemental Material https://doi.org/10.23641/asha.14575533.",2021-05-17 +34786950,Use of Mixture Dosing and Nonlinear Mixed Effect Modeling of Eight Environmental Contaminants in Rabbits to Improve Extrapolation Value of Toxicokinetic Data.,"

Background

Although in vivo studies of internal exposure to hazardous substances have been carried out for many years, there is room for progress to improve their informative value while adhering to the four R's: replacement, reduction, refinement, and responsibility rule.

Objectives

The objective of the study was to illustrate how toxicokinetic (TK) study design and data analysis can be implemented under the 4R rule to plan a chronic dosage regimen for investigating TK/toxicodynamic (TD) relationships.

Methods

The intravenous (IV) and oral serum concentrations of eight hazardous environmental contaminants including 1,1-Dichloro-2,2-bis(p-chlorophenyl)ethylene (pp'DDE), ß-Hexachlorocyclohexane (β-HCH), hexachlorobenzene (HCB), 2,2'4,4'-tetrabromodiphenyl ether (BDE-47), perfluorooctane sulfonate (PFOS), perfluorooctanoic acid (PFOA), di(2ethylhexyl)phthalate (DEHP), and bisphenol S (BPS) were obtained after mixture dosing in rabbits using a sparse sampling design. Data were comprehensively analyzed using nonlinear mixed effect (NLME) modeling.

Results

The short persistence of BPS and of the DEHP metabolite (mono-2-ethylhexyl phthalate), reflected by their mean residence times (MRT) of a few hours, was due to their efficient clearance (CL, 3.2 and 0.47L/kg/h). The longer MRT of the other compounds (1-48 d) resulted either from their extremely low clearance (lower than 0.01L/kg/h for PFOA and PFOS) or from their very large volume of distribution (VSS) ranging from 33 to 45L/kg. Estimates of CL, VSS, and bioavailability were used to compute the oral loading and daily maintenance doses required to attain a nominal steady-state serum concentration of 1 ng/mL. Simulations with the NLME model were applied to predict the serum concentration profile and to contrast the differential rates of accumulation in the central vs. peripheral compartments.

Conclusion

NLME modeling of the IV and oral TK of hazardous environmental contaminants, in rabbits while fulfilling the 4R rule, was able to provide the physiological basis for interspecies extrapolation of exposure rates in a TK/TD approach to risk assessment. https://doi.org/10.1289/EHP8957.",2021-11-17 +34264488,FAIRSCAPE: a Framework for FAIR and Reproducible Biomedical Analytics.,"Results of computational analyses require transparent disclosure of their supporting resources, while the analyses themselves often can be very large scale and involve multiple processing steps separated in time. Evidence for the correctness of any analysis should include not only a textual description, but also a formal record of the computations which produced the result, including accessible data and software with runtime parameters, environment, and personnel involved. This article describes FAIRSCAPE, a reusable computational framework, enabling simplified access to modern scalable cloud-based components. FAIRSCAPE fully implements the FAIR data principles and extends them to provide fully FAIR Evidence, including machine-interpretable provenance of datasets, software and computations, as metadata for all computed results. The FAIRSCAPE microservices framework creates a complete Evidence Graph for every computational result, including persistent identifiers with metadata, resolvable to the software, computations, and datasets used in the computation; and stores a URI to the root of the graph in the result's metadata. An ontology for Evidence Graphs, EVI ( https://w3id.org/EVI ), supports inferential reasoning over the evidence. FAIRSCAPE can run nested or disjoint workflows and preserves provenance across them. It can run Apache Spark jobs, scripts, workflows, or user-supplied containers. All objects are assigned persistent IDs, including software. All results are annotated with FAIR metadata using the evidence graph model for access, validation, reproducibility, and re-use of archived data and software.",2021-07-15 +34316271,Virtual Herbarium ALTB: collection of vascular plants of the Altai Mountain Country.,"

Background

The herbarium of the South-Siberian Botanical Garden of Altai State University (ALTB) houses the largest collection of plants from the Altai Mountain Country (AMC), an area that extends across Russia, Kazakhstan, Mongolia and China. The collection of ALTB includes more than 450,00 specimens, making it the seventh largest in Russia and the fourth largest amongst Russian university herbaria. Altai State University (ASU), the home of ALTB, is one of the most important centres of academic education and research in Siberia and the Russian Far East. It is a sociocultural centre that provides a distinguished learning environment for undergraduate and graduate students in many scholarly and professional fields, meeting the needs of today's knowledge-based post-industrial society and contributing to regional development. It actively promotes international cooperation and strategic collaboration amongst countries of the AMC in the fields of science, education and culture. In particular, the activities of the South-Siberian Botanical Garden include: development of measures to protect rare and endangered plant species, research on the flora and vegetation of the AMC, preparation and publication of a multi-volume work ""Flora Altaica"", monographic study of individual plant groups, conducting laboratory classes, summer practicals and special courses. The main purpose of this article is to attract the attention of the scientific community to the botanical research of transboundary territory of the Altai Mountain Country (Russia, Kazakhstan, China and Mongolia) and to the future development of digital plant collections in partnership with Global Biodiversity Information Facility (GBIF).

New information

The Virtual Herbarium ALTB (Russian interface - altb.asu.ru) is the largest digital collection of plants from the transboundary territory of the Altai Mountain Country and the main source of primary material for the ""Flora Altaica"" project (http://altaiflora.asu.ru/en/). Since 2017, when Altai State University became a GBIF data publisher, data from the Virtual Herbarium ALTB has been exported to the dataset ""Virtual Herbarium ALTB (South-Siberian Botanical Garden)"" in GBIF. Currently, it includes images and data from 22,466 vascular plants, of which 67% have geographic coordinates (accessed on 30.03.2021). Most of the specimens have been collected since 1977, with the most intensive collecting years being 1995-2008. In 2019, the label-data table of the Virtual Herbarium ALTB was modified to bring it into conformity with the Darwin Core specification (http://altb.asu.ru/). This effectively solved the major impediment to sharing plant diversity data from the AMC and adjacent regions in a multilingual environment.",2021-07-13 +34613368,COVID-19 knowledge graph from semantic integration of biomedical literature and databases. ,"The global response to the COVID-19 pandemic has led to a rapid increase of scientific literature on this deadly disease. Extracting knowledge from biomedical literature and integrating it with relevant information from curated biological databases is essential to gain insight into COVID-19 etiology, diagnosis, and treatment. We used Semantic Web technology RDF to integrate COVID-19 knowledge mined from literature by iTextMine, PubTator, and SemRep with relevant biological databases and formalized the knowledge in a standardized and computable COVID-19 Knowledge Graph (KG). We published the COVID-19 KG via a SPARQL endpoint to support federated queries on the Semantic Web and developed a knowledge portal with browsing and searching interfaces. We also developed a RESTful API to support programmatic access and provided RDF dumps for download. The COVID-19 Knowledge Graph is publicly available under CC-BY 4.0 license at https://research.bioinformatics.udel.edu/covid19kg/.",2021-10-06 +,Best Paper Selection,"Banda JM, Evans L, Vanguri RS, Tatonetti NP, Ryan PB, Shah NH. A curated and standardized adverse drug event resource to accelerate drug safety research. Sci Data 2016;3:160026 +https://www.nature.com/articles/sdata201626 Bauer CR, Ganslandt T, Baum B, Christoph J, Engel I, Lobe M, Mate S, Staubert S, Drepper J, Prokosch HU, Winter A, Sax U. Integrated Data Repository Toolkit (IDRT). A Suite of Programs to Facilitate Health Analytics on Heterogeneous Medical Data. Methods Inf Med 2016;55(2):125-35 +https://methods.schattauer.de/en/contents/archivestandard/issue/2324/manuscript/25160.html Greene D, NIHR BioResource, Richardson S, Turro E. Phenotype Similarity Regression for Identifying the Genetic Determinants of Rare Diseases. Am J Hum Genet 2016;98(3):490-9 +https://linkinghub.elsevier.com/retrieve/pii/S0002-9297(16)00014-8 Sarntivijai S, Vasant D, Jupp S, Saunders G, Bento AP, Gonzalez D, Betts J, Hasan S, Koscielny G, Dunham I, Parkinson H, Malone J. Linking rare and common disease: mapping clinical diseasephenotypes to ontologies in therapeutic target validation. J Biomed Semantics 2016;7-8 +https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-016-0051-7",2017-08-01 +28473704,CancerPDF: A repository of cancer-associated peptidome found in human biofluids.,"CancerPDF (Cancer Peptidome Database of bioFluids) is a comprehensive database of endogenous peptides detected in the human biofluids. The peptidome patterns reflect the synthesis, processing and degradation of proteins in the tissue environment and therefore can act as a gold mine to probe the peptide-based cancer biomarkers. Although an extensive data on cancer peptidome has been generated in the recent years, lack of a comprehensive resource restrains the facility to query the growing community knowledge. We have developed the cancer peptidome resource named CancerPDF, to collect and compile all the endogenous peptides isolated from human biofluids in various cancer profiling studies. CancerPDF has 14,367 entries with 9,692 unique peptide sequences corresponding to 2,230 unique precursor proteins from 56 high-throughput studies for ~27 cancer conditions. We have provided an interactive interface to query the endogenous peptides along with the primary information such as m/z, precursor protein, the type of cancer and its regulation status in cancer. To add-on, many web-based tools have been incorporated, which comprise of search, browse and similarity identification modules. We consider that the CancerPDF will be an invaluable resource to unwind the potential of peptidome-based cancer biomarkers. The CancerPDF is available at the web address http://crdd.osdd.net/raghava/cancerpdf/ .",2017-05-04 +33619257,Uniform genomic data analysis in the NCI Genomic Data Commons.,"The goal of the National Cancer Institute's (NCI's) Genomic Data Commons (GDC) is to provide the cancer research community with a data repository of uniformly processed genomic and associated clinical data that enables data sharing and collaborative analysis in the support of precision medicine. The initial GDC dataset include genomic, epigenomic, proteomic, clinical and other data from the NCI TCGA and TARGET programs. Data production for the GDC started in June, 2015 using an OpenStack-based private cloud. By June of 2016, the GDC had analyzed more than 50,000 raw sequencing data inputs, as well as multiple other data types. Using the latest human genome reference build GRCh38, the GDC generated a variety of data types from aligned reads to somatic mutations, gene expression, miRNA expression, DNA methylation status, and copy number variation. In this paper, we describe the pipelines and workflows used to process and harmonize the data in the GDC. The generated data, as well as the original input files from TCGA and TARGET, are available for download and exploratory analysis at the GDC Data Portal and Legacy Archive ( https://gdc.cancer.gov/ ).",2021-02-22 +31174603,NanoARG: a web service for detecting and contextualizing antimicrobial resistance genes from nanopore-derived metagenomes.,"

Background

Direct and indirect selection pressures imposed by antibiotics and co-selective agents and horizontal gene transfer are fundamental drivers of the evolution and spread of antibiotic resistance. Therefore, effective environmental monitoring tools should ideally capture not only antibiotic resistance genes (ARGs), but also mobile genetic elements (MGEs) and indicators of co-selective forces, such as metal resistance genes (MRGs). A major challenge towards characterizing the potential human health risk of antibiotic resistance is the ability to identify ARG-carrying microorganisms, of which human pathogens are arguably of greatest risk. Historically, short reads produced by next-generation sequencing technologies have hampered confidence in assemblies for achieving these purposes.

Results

Here, we introduce NanoARG, an online computational resource that takes advantage of the long reads produced by nanopore sequencing technology. Specifically, long nanopore reads enable identification of ARGs in the context of relevant neighboring genes, thus providing valuable insight into mobility, co-selection, and pathogenicity. NanoARG was applied to study a variety of nanopore sequencing data to demonstrate its functionality. NanoARG was further validated through characterizing its ability to correctly identify ARGs in sequences of varying lengths and a range of sequencing error rates.

Conclusions

NanoARG allows users to upload sequence data online and provides various means to analyze and visualize the data, including quantitative and simultaneous profiling of ARGs, MRGs, MGEs, and putative pathogens. A user-friendly interface allows users the analysis of long DNA sequences (including assembled contigs), facilitating data processing, analysis, and visualization. NanoARG is publicly available and freely accessible at https://bench.cs.vt.edu/nanoarg .",2019-06-07 +,LncRNA-599554 sponges miR-15a-5p to contribute inductive ability of dermal papilla cells through positive regulation of the expression of Wnt3a in cashmere goat,"Long non-coding RNAs (lncRNAs), as post-transcriptional regulators, were thought to function in the inductive property of dermal papilla cells (DPCs) in cashmere goat. Previously, lncRNA-599554 was identified in secondary hair follicle (SHF) of cashmere goat, but its functional significance is unknown.In the present investigation, we verified that lncRNA-599554 had significantly higher expression at the anagen dermal papilla of cashmere goat SHF than that at telogen. Based on overexpression and knockdown techniques, we found that lncRNA-599554 contributes the inductive property of DPCs of cashmere goat, which was assessed by detecting the changes in the expression of several typical indictor genes in DPCs including ET-1, SCF, Versican, ALP, Lef1 and Ptc-1. Based on RNA pull-down assay, we verified that lncRNA-599554 directly interacted with chi-miR-15a-5p. Also, we showed that lncRNA-599554 positively regulated the Wnt3a expression in DPCs but which did not appear to involve its modulating of promoter methylation. Based on the use of Dual-luciferase reporter assays, our data indicated that lncRNA-599554 regulated the Wnt3a expression through chi-miR-15a-5p-mediated post-transcriptional level.We showed that lncRNA-599554 contributes the inductive property of DPCs in cashmere goat which might be achieved through sponging chi-miR-15b-5p to promote the Wnt3a expression. The results from the present investigation provided a novel insight into the functional mechanism of lncRNA-599554 in the SHF regeneration of cashmere goat along with the formation and growth of cashmere fiber.How to cite: Yin RH, Wang YR, Zhao SJ, et al. LncRNA-599554 sponges miR-15a-5p to contribute inductive ability of dermal papilla cells through positive regulation of the expression of Wnt3a in cashmere goat. Electron J Biotechnol 2020;45. https://doi.org/10.1016/j.ejbt.2020.03.002",2020-05-01 +32758136,GSDB: a database of 3D chromosome and genome structures reconstructed from Hi-C data.,"Advances in the study of chromosome conformation capture technologies, such as Hi-C technique - capable of capturing chromosomal interactions in a genome-wide scale - have led to the development of three-dimensional chromosome and genome structure reconstruction methods from Hi-C data. The three dimensional genome structure is important because it plays a role in a variety of important biological activities such as DNA replication, gene regulation, genome interaction, and gene expression. In recent years, numerous Hi-C datasets have been generated, and likewise, a number of genome structure construction algorithms have been developed.In this work, we outline the construction of a novel Genome Structure Database (GSDB) to create a comprehensive repository that contains 3D structures for Hi-C datasets constructed by a variety of 3D structure reconstruction tools. The GSDB contains over 50,000 structures from 12 state-of-the-art Hi-C data structure prediction algorithms for 32 Hi-C datasets.GSDB functions as a centralized collection of genome structures which will enable the exploration of the dynamic architectures of chromosomes and genomes for biomedical research. GSDB is accessible at http://sysbio.rnet.missouri.edu/3dgenome/GSDB.",2020-08-05 +34609711,ABCpred: a webserver for the discovery of acetyl- and butyryl-cholinesterase inhibitors.,"Alzheimer's disease (AD) is one of the most common forms of dementia and is associated with a decline in cognitive function and language ability. The deficiency of the cholinergic neurotransmitter known as acetylcholine (ACh) is associated with AD. Acetylcholinesterase (AChE) hydrolyses ACh and inhibits the cholinergic transmission. Furthermore, both AChE and butyrylcholinesterase (BChE) plays important roles in early and late stages of AD. Therefore, the inhibition of either or both cholinesterase enzymes represent a promising therapeutic route for treating AD. In this study, a large-scale classification structure-activity relationship model was developed to predict cholinesterase inhibitory activities as well as revealing important substructures governing their activities. Herein, a non-redundant dataset constituting 985 and 1056 compounds for AChE and BChE, respectively, was obtained from the ChEMBL database. These inhibitors were described by 12 sets of molecular fingerprints and predictive models were developed using the random forest algorithm. Evaluation of the model performance by means of Matthews correlation coefficient and consideration of the model's interpretability indicated that the SubstructureCount fingerprint was the most robust with five-fold cross-validated MCC of [0.76, 0.82] for AChE and BChE, respectively, and test MCC of [0.73, 0.97]. Feature interpretation revealed that the aromatic ring system, heterocyclic nitrogen containing compounds and amines are important for cholinesterase inhibition. Finally, the model was deployed as a publicly available webserver called the ABCpred at http://codes.bio/abcpred/ .",2021-10-05 +30588614,The IUPHAR Pharmacology Education Project.,"Online learning, an essential component of most traditional contact-based educational programs, must be of high quality to contribute effectively to learning. The availability of first-class web-based materials is particularly valued by both learners and educators in resource-poor nations. In this Practice article, we introduce the International Union of Basic and Clinical Pharmacology (IUPHAR) Pharmacology Education Project (PEP) (https://www.pharmacologyeducation.org/), a freely accessible online learning resource intended to support education and training in pharmacological sciences worldwide.",2018-12-26 +34614393,MANORAA: A machine learning platform to guide protein-ligand design by anchors and influential distances.,"The MANORAA platform uses structure-based approaches to provide information on drug design originally derived from mapping tens of thousands of amino acids on a grid. In-depth analyses of the pockets, frequently occurring atoms, influential distances, and active-site boundaries are used for the analysis of active sites. The algorithms derived provide model equations that can predict whether changes in distances, such as contraction or expansion, will result in improved binding affinity. The algorithm is confirmed using kinetic studies of dihydrofolate reductase (DHFR), together with two DHFR-TS crystal structures. Empirical analyses of 881 crystal structures involving 180 ligands are used to interpret protein-ligand binding affinities. MANORAA links to major biological databases for web-based analysis of drug design. The frequency of atoms inside the main protease structures, including those from SARS-CoV-2, shows how the rigid part of the ligand can be used as a probe for molecular design (http://manoraa.org).",2021-10-05 +33007622,DINAX- a comprehensive database of inherited ataxias.,"

Background

Neurodegenerative disorders such as hereditary ataxia often manifest overlapping symptoms and are likely to be misdiagnosed based on clinical phenotypes. To identify the genes associated with such disorders for diagnostic purposes, geneticists often use high throughput technologies which generate an enormous amount of data on variants whose relevance can be unclear. Besides, analysis and interpretation of high throughput data require gleaning of several web-based resources which can be laborious and time-consuming. To overcome these, we have created a Database for Inherited Ataxia (DINAX), a repository of gene variants from publicly available information.

Methods

DINAX is implemented as a MySQL relational database using the PHP scripting language. Web interfaces were developed using HTML, CSS, and JavaScript. Variant and phenotype information was collected and manually curated from published literature and primary databases such as OMIM and ClinVar. These were further analyzed to decipher expression and pathway analysis.

Results

DINAX is an inventory of 7166 genomic variants (single nucleotide polymorphisms, deletions, insertions, and translocations) reported till date among the 185 genes associated with different subtypes of inherited ataxia. DINAX implements a dual search methodology for genes and phenotypes linking to ataxia associated genes, variants, and their source. Pathway analysis confirmed their association with ataxia.

Conclusion

The database is created to provide a single web source for obtaining information about ataxia related genes. Besides, the database facilitates easy identification of known and reported variants as well as the novel or unreported variants. DINAX is freely available at http://slsdb.manipal.edu/dinax.",2020-09-17 +36303742,Visualizing Phytochemical-Protein Interaction Networks: Momordica charantia and Cancer.,"The in silico study of medicinal plants is a rapidly growing field. Techniques such as reverse screening and network pharmacology are used to study the complex cellular action of medicinal plants against disease. However, it is difficult to produce a meaningful visualization of phytochemical-protein interactions (PCPIs) in the cell. This study introduces a novel workflow combining various tools to visualize a PCPI network for a medicinal plant against a disease. The five steps are 1) phytochemical compilation, 2) reverse screening, 3) network building, 4) network visualization, and 5) evaluation. The output is a PCPI network that encodes multiple dimensions of information, including subcellular location, phytochemical class, pharmacokinetic data, and prediction probability. As a proof of concept, we built a PCPI network for bitter gourd (Momordica charantia L.) against colorectal cancer. The network and workflow are available at https://yumibriones.github.io/network/. The PCPI network highlights high-confidence interactions for further in vitro or in vivo study. The overall workflow is broadly transferable and can be used to visualize the action of other medicinal plants or small molecules against other diseases.",2021-12-13 +33656920,Human Colonization with Extended-Spectrum Beta-Lactamase-Producing E. coli in Relation to Animal and Environmental Exposures in Bangladesh: An Observational One Health Study.,"

Background

Human exposure to intensively farmed livestock is a potential risk for transmission of antibiotic-resistant bacteria (ARB) but few studies have assessed the relative role of animal vs. environmental sources of ARB in low-resource community settings.

Objectives

We conducted an observational study to compare ARB colonization and antibiotic-resistant gene prevalence and abundance in humans with high or low exposure to poultry in rural households, commercial poultry farms, and urban markets in Bangladesh.

Methods

Extended-spectrum β-lactamase (ESBL)-producing and carbapenem-resistant E. coli were quantified in feces from adults with high or low poultry exposure (n=100, respectively), poultry (n=200), drinking water (n=120), and wastewater (n=120) from 40 rural households, 40 poultry farms, and 40 urban markets.

Results

ESBL-producing E. coli (ESBL-EC) prevalence was 67.5% (95% CI: 61.0, 74.0) in samples from adults, 68.0% (95% CI: 61.5, 74.5) in samples from poultry, and 92.5% (95% CI: 87.7, 97.3) in wastewater samples. Carbapenem-resistant E. coli prevalence was high in market wastewaters [30% (95% CI: 15.0, 45.0)] but low in humans (1%) and poultry (1%). Human, poultry, and wastewater isolates shared common resistance genes: blaCTX-M-1, qnr, and blaTEM. Human colonization was not significantly associated with exposure to poultry or setting (rural, farm, or market). Ninety-five percent of commercial poultry farms routinely administered antibiotics. Susceptibility tests were significantly different in household vs. farm and market poultry isolates for four of seven antibiotic classes. In human isolates, there were no differences except aminoglycoside resistance (16.4% high vs. 4.4% low exposure, p=0.02). Urban market wastewaters and poultry samples had significantly higher concentrations of ESBL-EC (p<0.001) and blaCTX-M-1 (p<0.001) compared with samples from farms and rural households.

Discussion

ESBL-EC colonization was high in humans but not significantly associated with exposure to poultry. Bidirectional transmission of antibiotic resistance is likely between humans, poultry, and the environment in these community settings, underlining the importance of One Health mitigation strategies. https://doi.org/10.1289/EHP7670.",2021-03-03 +33591210,Participatory Research for Environmental Justice: A Critical Interpretive Synthesis.,"

Background

Environmental health risks are disproportionately colocated with communities in poverty and communities of color. In some cases, participatory research projects have effectively addressed structural causes of health risk in environmental justice (EJ) communities. However, many such projects fail to catalyze change at a structural level.

Objectives

This review employs Critical Interpretive Synthesis (CIS) to theorize specific elements of participatory research for environmental health that effectively prompt structural change in EJ communities.

Methods

Academic database search was used to identify peer-reviewed literature describing participatory research with EJ communities to address environmental health. Synthetic constructs were developed iteratively related to study characteristics, design elements, and outcomes; and data were extracted for included records. Statistical analyses were performed to assess correlations between study design elements and structural change outcomes. Through critical, comparative, and contextual analyses of the ""structural change"" case study group and ""non- structural change"" group, informed by relevant theoretical literature, a synthesizing argument was generated.

Results

From 505 total records identified, eligibility screening produced 232 case study articles, representing 154 case studies, and 55 theoretical articles for synthesis. Twenty-six case studies resulted in a structural change outcome. The synthesizing argument states that participatory research with EJ communities may be more likely to result in structural change when a) community members hold formal leadership roles; b) project design includes decision-makers and policy goals; and c) long term partnerships are sustained through multiple funding mechanisms. The assumption of EJ community benefit through research participation is critically examined.

Discussion

Recommended future directions include establishing structural change as a goal of participatory research, employing participatory assessment of community benefit, and increased hiring of faculty of color at research institutions. The power, privilege, and political influence that academic institutions are able to leverage in partnership with EJ communities may be as valuable as the research itself. https://doi.org/10.1289/EHP6274.",2021-02-16 +30053267,dbCAN-seq: a database of carbohydrate-active enzyme (CAZyme) sequence and annotation.,"Carbohydrate-active enzyme (CAZymes) are not only the most important enzymes for bioenergy and agricultural industries, but also very important for human health, in that human gut microbiota encode hundreds of CAZyme genes in their genomes for degrading various dietary and host carbohydrates. We have built an online database dbCAN-seq (http://cys.bios.niu.edu/dbCAN_seq) to provide pre-computed CAZyme sequence and annotation data for 5,349 bacterial genomes. Compared to the other CAZyme resources, dbCAN-seq has the following new features: (i) a convenient download page to allow batch download of all the sequence and annotation data; (ii) an annotation page for every CAZyme to provide the most comprehensive annotation data; (iii) a metadata page to organize the bacterial genomes according to species metadata such as disease, habitat, oxygen requirement, temperature, metabolism; (iv) a very fast tool to identify physically linked CAZyme gene clusters (CGCs) and (v) a powerful search function to allow fast and efficient data query. With these unique utilities, dbCAN-seq will become a valuable web resource for CAZyme research, with a focus complementary to dbCAN (automated CAZyme annotation server) and CAZy (CAZyme family classification and reference database).",2018-01-01 +34671257,Pharmacovigilance Bibliometrics: Visualizing Thematic Development in the Category of Pharmacology and Pharmacy in Web of Science.,"Introduction: Pharmacovigilance studies include monitoring and preventing the occurrence of new, rare, or serious adverse drug reactions, making it possible to discover new safety issues without delay. Bibliometrics could assist scholars to analyze the development of pharmacovigilance. Methods: The MeSH terms of both pharmacovigilance and ""adverse drug reaction reporting system"" were retrieved in the Science Citation Index Expanded. The articles from 1974 to July 2021 in the pharmacology and pharmacy category were recruited. The citation reports including the publication numbers, h-index, and sum and average cited times in terms of annuals, countries, organizations, authors and journals were tabulated. The coauthorship relations in the analysis units of countries, organizations, and authors; the top 10 burst references; the document citation network; and the author's keywords co-occurrence overlay map were visualized by bibliometric software including the website (https://bibliometric.com/), VOSviewer, CiteSpace, and CitNetExplorer. Results: From 1974 to the present, the most high-yield publication year, country, institute, author, and journal were 2020 (n = 222), France (n = 522), Netherlands Pharmacovigilance Centre Lareb (n = 82), Jean-Louis Montastruc (n = 125), Drug Safety (n = 384), respectively, in all 2,128 articles. Similarly, the United States, Institut National de la Sante et de la Recherche Medicale, and Jean-Louis Montastruc had the most coauthorship strength at the macrolevel (global), mesolevel (local), and microlevel (individual). The topics of burst references covered are the development of methodology, issues of patients reporting and under-reporting, evaluation of methods and databases, assessment of causality, and perspectives in pharmacovigilance. Eight clusters were grouped in the document citation network. ""Pharmacovigilance,"" ""adverse drug reactions,"" ""pharmacoepidemiology,"" ""drug safety,"" and ""signal detection"" were the research priorities, while ""drug-related side effects and adverse reactions,"" ""VigiBase,"" ""disproportionality analysis,"" ""social media,"" ""FAERS,"" ""chemotherapy,"" ""patient safety,"" ""reporting odds ratio,"" and ""preventability"" might be the future research hotspots. Conclusion: Positive synergies can be observed in this study by employing the multiple software tools which established the relationship between the units of analysis. The bibliometric analysis can organize the thematic development and guide the hotspots of pharmacovigilance in pharmacology and pharmacy.",2021-10-04 +28981577,'Multi-omic' data analysis using O-miner.,"Innovations in -omics technologies have driven advances in biomedical research. However, integrating and analysing the large volumes of data generated from different high-throughput -omics technologies remain a significant challenge to basic and clinical scientists without bioinformatics skills or access to bioinformatics support. To address this demand, we have significantly updated our previous O-miner analytical suite, to incorporate several new features and data types to provide an efficient and easy-to-use Web tool for the automated analysis of data from '-omics' technologies. Created from a biologist's perspective, this tool allows for the automated analysis of large and complex transcriptomic, genomic and methylomic data sets, together with biological/clinical information, to identify significantly altered pathways and prioritize novel biomarkers/targets for biological validation. Our resource can be used to analyse both in-house data and the huge amount of publicly available information from array and sequencing platforms. Multiple data sets can be easily combined, allowing for meta-analyses. Here, we describe the analytical pipelines currently available in O-miner and present examples of use to demonstrate its utility and relevance in maximizing research output. O-miner Web server is free to use and is available at http://www.o-miner.org.",2019-01-01 +34554817,"Adolescent Police Stops, Self-Harm, and Attempted Suicide: Findings From the UK Millennium Cohort Study, 2012‒2019.","Objectives. To explore associations between police stops, self-harm, and attempted suicide among a large, representative sample of adolescents in the United Kingdom. Methods. Data were drawn from the 3 most recent sweeps of the UK Millennium Cohort Study (MCS), from 2012 to 2019. The MCS is an ongoing nationally representative contemporary birth cohort of children born in the United Kingdom between September 2000 and January 2002 (n = 10 345). Weights were used to account for sample design and multiple imputation for missing data. Results. Youths experiencing police stops by the age of 14 years (14.77%) reported significantly higher rates of self-harm (incidence rate ratio = 1.52; 95% confidence interval [CI] = 1.35, 1.69) at age 17 years and significantly higher odds of attempted suicide (odds ratio = 2.25; 95% CI = 1.84, 2.76) by age 17 years. These patterns were largely consistent across examined features of police stops and generally did not vary by sociodemographic factors. In addition, 17.73% to 40.18% of associations between police stops and outcomes were explained by mental distress. Conclusions. Police-initiated encounters are associated with youth self-harm and attempted suicide. Youths may benefit when school counselors or social workers provide mental health screenings and offer counseling care following these events. (Am J Public Health. 2021;111(10):1885-1893. https://doi.org/10.2105/AJPH.2021.306434).",2021-09-23 +34778058,A Novel Nomogram for Predicting the Risk of Short-Term Recurrence After Surgery in Glioma Patients.,"

Objective

The aim of this study was to establish a nomogram model for predicting the risk of short-term recurrence in glioma patients.

Methods

The clinical data of recurrent glioma patients were summarized and analyzed in this study. Univariate and multivariate logistic regression analyses were performed to analyze the correlation between clinical data and the risk of short-term recurrence after operation. A nomogram was established based on the multivariate logistic regression model results.

Results

A total of 175 patients with recurrent glioma were enrolled, with 53 patients in the short-term recurrence (STR) group (recurrent time ≤6 months) and 122 patients in the long-term recurrence (LTR) group (recurrent time ≥36 months). Univariate analysis revealed that age at diagnosis, Karnofsky performance scores (KPSs), tumor location, glioma grade, glioma type, extent of resection (EOR), adjuvant chemotherapy (ad-CT), concurrent chemotherapy (co-CT), and isocitrate dehydrogenase (IDH) status were significantly associated with the short-term glioma recurrence. Multivariate analyses revealed that age at diagnosis, KPS, glioma grade, EOR, and IDH were independent risk factors for short-term glioma recurrence. A risk nomogram for the short-term recurrence of glioma was established, with the concordance index (C-index) of 0.971. The findings of calibration and receiver operating characteristic (ROC) curves showed that our nomogram model had good performance and discrimination to estimate short-term recurrence probability.

Conclusion

This nomogram model provides reliable information about the risk of short-term glioma recurrence for oncologists and neurosurgeons. This model can predict the short-term recurrence probability and give assistance to decide the interval of follow-up or formulate individualized treatment strategies based on the predicted results. A free online prediction risk tool for this nomogram is provided: https://rj2021.shinyapps.io/Nomogram_ recurrence-risk/.",2021-10-26 +33683899,mineXpert2: Full-Depth Visualization and Exploration of MSn Mass Spectrometry Data.,"mineXpert is a mass spectrometric data visualization and exploration software supporting only MS1 data that is aimed at proteomics scientists who do rarely require manual MS/MS data visualization and exploration (Rusconi, F. J. Proteome Res. 2019, 18, 2254-2259). In order to adapt it to new use cases in our facility and to widen its user base, mineXpert was entirely rewritten with the main aim of implementing MSn data support. Other feature additions were new data visualization and exploration methods, with an overhaul of the data plotting code to allow more flexible uses of mass data integration results. Further, the whole mass spectral data set can now be explored in a table view where the user may filter the data using a number of criteria that can be logically combined to pinpoint the smallest feature of interest. Ion mobility mass spectrometry is supported with specific data exploration and plotting. With mineXpert2, we provide a software program that will be of use to all mass spectrometrists, without restrictions on the field of endeavor, from pure chemistry to proteomics and metabolomics. As staff members of a mass spectrometry facility, we want to provide all users with a mass spectrometry data visualization and exploration software solution that frees them from the need to use closed-source vendor software. After conversion of the mass data to mzML, mineXpert2 requires no proprietary software whatsoever. The reference implementation is version 7.0.0 or greater. The software, a detailed user manual, and video tutorials are available at http://www.msxpertsuite.org.",2021-03-08 +34165490,SOMDE: A scalable method for identifying spatially variable genes with self-organizing map. ,"Recent developments of spatial transcriptomic sequencing technologies provide powerful tools for understanding cells in the physical context of tissue microenvironments. A fundamental task in spatial gene expression analysis is to identify genes with spatially variable expression patterns, or spatially variable genes (SVgenes). Several computational methods have been developed for this task. Their high computational complexity limited their scalability to the latest and future large-scale spatial expression data. We present SOMDE, an efficient method for identifying SVgenes in large-scale spatial expression data. SOMDE uses self-organizing map (SOM) to cluster neighboring cells into nodes, and then uses a Gaussian process to fit the node-level spatial gene expression to identify SVgenes. Experiments show that SOMDE is about 5-50 times faster than existing methods with comparable results. The adjustable resolution of SOMDE makes it the only method that can give results in ∼5 minutes in large datasets of more than 20,000 sequencing sites. SOMDE is available as a python package on PyPI at https://pypi.org/project/somde free for academic use. Supplementary data are available at Bioinformatics online.",2021-06-24 +34120557,Hearing aid acquisition and ownership: what can we learn from online consumer reviews?,"

Objective

To explore the publicised opinions of consumers actively participating in online hearing aid reviews.

Design

A retrospective design examining data generated from an online consumer review website (www.HearingTracker.com). Qualitative data (open text responses) were analysed using the open source automated topic modelling software IRaMuTeQ (http://www.iramuteq.org/) to identify themes. Outputs were compared with quantitative data from the consumer reviews (short response questions exploring hearing aid performance and benefit, and some meta-data such as hearing aid brand and years of hearing aid ownership).

Study sample

1378 online consumer hearing aid reviews.

Results

Six clusters within two domains were identified. The domain Device Acquisition included three clusters: Finding the right provider, device and price-point; Selecting a hearing aid to suit the hearing loss; Attaining physical fit and device management skills. The domain Device Use included three clusters: Smartphone streaming to hearing aids; Hearing aid adjustment using smartphone; and Hearing in noise.

Conclusions

Although online hearing aid consumers indicate positive performance on multiple-choice questions relating to hearing aid performance and benefit, their online reviews describe a number of barriers limiting their success. Hearing healthcare clinicians must employ a personalised approach to audiological rehabilitation to ensure individual clients' needs are met.",2021-06-13 +32369809,Introducing the Bird Chromosome Database: An Overview of Cytogenetic Studies in Birds.,"Bird chromosomes, which have been investigated scientifically for more than a century, present a number of unique features. In general, bird karyotypes have a high diploid number (2n) of typically around 80 chromosomes that are divided into macro- and microchromosomes. In recent decades, FISH studies using whole chromosome painting probes have shown that the macrochromosomes evolved through both inter- and intrachromosomal rearrangements. However, chromosome painting data are available for only a few bird species, which hinders a more systematic approach to the understanding of the evolutionary history of the enigmatic bird karyotype. Thus, we decided to create an innovative database through compilation of the cytogenetic data available for birds, including chromosome numbers and the results of chromosome painting with chicken (Gallus gallus) probes. The data were obtained through an extensive literature review, which focused on cytogenetic studies published up to 2019. In the first version of the ""Bird Chromosome Database (BCD)"" (https://sites.unipampa.edu.br/birdchromosomedatabase) we have compiled data on the chromosome numbers of 1,067 bird species and chromosome painting data on 96 species. We found considerable variation in the diploid numbers, which ranged from 40 to 142, although most (around 50%) of the species studied up to now have between 78 and 82 chromosomes. Despite its importance for cytogenetic research, chromosome painting has been applied to less than 1% of all bird species. The BCD will enable researchers to identify the main knowledge gaps in bird cytogenetics, including the most under-sampled groups, and make inferences on chromosomal homologies in phylogenetic studies.",2020-05-06 +34693847,Higher-order trajectories of pain and depressive symptoms link midlife financial stress to women's well-being in later life.,"Objectives: Consistent with biopsychosocial models, shared pathophysiological conditions underlying both physical pain and depressive symptoms can result in the clustering of pain and depressive symptoms. However, previous studies have not investigated a higher-order construct capturing both pain and depressive symptoms over time. Furthermore, research has not identified trajectory antecedents (e.g. perceived family financial stress) and their consequences for later-life health and well-being. The present study sought to address these gaps in the research.Method: Using prospective data over 23 years from 244 long-term married women, the present study estimated latent growth curves in a structural equation model (more specifically a parallel trajectory model was estimated).Results: Family financial strain in midlife was, on average, associated with a higher initial level (β = .37, p < .001) and rate of change (β = .20, p = .045) of pain-depressive symptoms trajectories, which, in turn, contributed to health and well-being challenges, including the level and rate of change in physical limitations (β = .50, p < .001 and 0.43, p < .001, respectively), memory impairment (β = .47 and .47, p < .001, respectively), and loneliness (β = .63, p = < .001 and .28, p = .022, respectively) in later years. The adverse influence of family financial strain on pain-depressive symptoms trajectories weakened under high levels of marital closeness (β = -.10, p = .032). Conclusion: These findings emphasize the necessity of policies and interventions that focus on reducing adults' stressful life circumstances and further developing protective factors that can aid in the redirection of adverse pain-depressive symptoms trajectories.Supplemental data for this article are available online at https://doi.org/10.1080/13607863.2021.1993129.",2021-10-25 +34492338,GAPIT Version 3: Boosting Power and Accuracy for Genomic Association and Prediction.,"Genome-wide association study (GWAS) and genomic prediction/selection (GP/GS) are the two essential enterprises in genomic research. Due to the great magnitude and complexity of genomic and phenotypic data, analytical methods and their associated software packages are frequently advanced. GAPIT is a widely-used genomic association and prediction integrated tool as an R package. The first version was released to the public in 2012 with the implementation of the general linear model (GLM), mixed linear model (MLM), compressed MLM (CMLM), and genomic best linear unbiased prediction (gBLUP). The second version was released in 2016 with several new implementations, including enriched CMLM (ECMLM) and settlement of MLMs under progressively exclusive relationship (SUPER). All the GWAS methods are based on the single-locus test. For the first time, in the current release of GAPIT, version 3 implemented three multi-locus test methods, including multiple loci mixed model (MLMM), fixed and random model circulating probability unification (FarmCPU), and Bayesian-information and linkage-disequilibrium iteratively nested keyway (BLINK). Additionally, two GP/GS methods were implemented based on CMLM (named compressed BLUP; cBLUP) and SUPER (named SUPER BLUP; sBLUP). These new implementations not only boost statistical power for GWAS and prediction accuracy for GP/GS, but also improve computing speed and increase the capacity to analyze big genomic data. Here, we document the current upgrade of GAPIT by describing the selection of the recently developed methods, their implementations, and potential impact. All documents, including source code, user manual, demo data, and tutorials, are freely available at the GAPIT website (http://zzlab.net/GAPIT).",2021-08-01 +34499112,Integrating genome-scale metabolic modelling and transfer learning for human gene regulatory network reconstruction. ,"Gene regulation is responsible for controlling numerous physiological functions and dynamically responding to environmental fluctuations. Reconstructing the human network of gene regulatory interactions is thus paramount to understanding the cell functional organisation across cell types, as well as to elucidating pathogenic processes and identifying molecular drug targets. Although significant effort has been devoted towards this direction, existing computational methods mainly rely on gene expression levels, possibly ignoring the information conveyed by mechanistic biochemical knowledge. Moreover, except for a few recent attempts, most of the existing approaches only consider the information of the organism under analysis, without exploiting the information of related model organisms. We propose a novel method for the reconstruction of the human gene regulatory network, based on a transfer learning strategy that synergically exploits information from human and mouse, conveyed by gene-related metabolic features generated in-silico from gene expression data. Specifically, we learn a predictive model from metabolic activity inferred via tissue-specific metabolic modelling of artificial gene knockouts. Our experiments show that the combination of our transfer learning approach with the constructed metabolic features provides a significant advantage in terms of reconstruction accuracy, as well as additional clues on the contribution of each constructed metabolic feature. The system, the datasets and all the results obtained in this study are available at: https://doi.org/10.6084/m9.figshare.c.5237687. Supplementary data are available at Bioinformatics online.",2021-09-09 +29688370,YummyData: providing high-quality open life science data. ,"Many life science datasets are now available via Linked Data technologies, meaning that they are represented in a common format (the Resource Description Framework), and are accessible via standard APIs (SPARQL endpoints). While this is an important step toward developing an interoperable bioinformatics data landscape, it also creates a new set of obstacles, as it is often difficult for researchers to find the datasets they need. Different providers frequently offer the same datasets, with different levels of support: as well as having more or less up-to-date data, some providers add metadata to describe the content, structures, and ontologies of the stored datasets while others do not. We currently lack a place where researchers can go to easily assess datasets from different providers in terms of metrics such as service stability or metadata richness. We also lack a space for collecting feedback and improving data providers’ awareness of user needs. To address this issue, we have developed YummyData, which consists of two components. One periodically polls a curated list of SPARQL endpoints, monitoring the states of their Linked Data implementations and content. The other presents the information measured for the endpoints and provides a forum for discussion and feedback. YummyData is designed to improve the findability and reusability of life science datasets provided as Linked Data and to foster its adoption. It is freely accessible at http://yummydata.org/.Database URL: http://yummydata.org/",2018-01-01 +32215309,Datasets associated with investigating the potential for beneficial reuse of produced water from oil and gas extraction outside of the energy sector.,"The data in this report are associated with https://doi.org/10.1016/j.scitotenv.2020.137085[4] and include data on water volumes and water quality related to the major unconventional oil and gas plays in the U.S. The data include volumes of water co-produced with oil and gas production, county-level estimates of annual water use volumes by various sectors, including hydraulic fracturing water use, and the quality of produced water. The data on volumes of produced water and hydraulic fracturing water volumes were obtained from the IHS Enerdeq and FracFocus databases. Water use in other sectors was obtained from the U.S. Geological Survey water use database. Data on produced water quality were obtained from the USGS produced waters database.",2020-03-09 +31669269,RNAdt: An online tutorial and data portal for the RNA structurome era.,"RNA is not only a passive transporter of genetic information, but also a pivotal player in all domains of life. RNA can regulate gene expression because of its involvement in transcription, mRNA modification and processing, and translation. RNA also possesses other intricate functions such as catalysis, ligand sensing, interaction with biomolecules, response to environment stresses, and information storage. The primary structure of RNA is single stranded, but it always folds into complex secondary and tertiary structures owing to base pairing and effects from the cellular environment. The importance of structure has been increasingly recognized in understanding the myriad functions of RNA. After decades of development, there is a wide range of RNA structure probing techniques. The marriage between structure probing and high-throughput sequencing (HTS) especially enables the measurement of RNA structure on a transcriptomic scale, advancing the advent of the RNA structurome era. Dozens of HTS-associated RNA structure probing methods have been published, so it is urgent to provide a user-friendly and easy-to-use resource for users who are perplexed by selecting the most suitable method for their experiments. Motivated by this demand, we collected currently available HTS-associated RNA structure probing methods and then developed RNAdt (freely accessible at http://www.zhounan.org/rnadt). RNAdt can be used as a web-based tutorial to learn fundamental knowledge of HTS-associated RNA structure probing methods. RNAdt can also be used as a data portal to access HTS data sets from previous RNA structurome studies. At the end of this work, we also provided perspectives on future development of RNA structure probing methods. Our study is expected to facilitate RNA structure probing and ultimately elucidate the connection between RNA structure and biological functions.",2019-10-24 +30244175,PTMD: A Database of Human Disease-associated Post-translational Modifications.,"Various posttranslational modifications (PTMs) participate in nearly all aspects of biological processes by regulating protein functions, and aberrant states of PTMs are frequently implicated in human diseases. Therefore, an integral resource of PTM-disease associations (PDAs) would be a great help for both academic research and clinical use. In this work, we reported PTMD, a well-curated database containing PTMs that are associated with human diseases. We manually collected 1950 known PDAs in 749 proteins for 23 types of PTMs and 275 types of diseases from the literature. Database analyses show that phosphorylation has the largest number of disease associations, whereas neurologic diseases have the largest number of PTM associations. We classified all known PDAs into six classes according to the PTM status in diseases and demonstrated that the upregulation and presence of PTM events account for a predominant proportion of disease-associated PTM events. By reconstructing a disease-gene network, we observed that breast cancers have the largest number of associated PTMs and AKT1 has the largest number of PTMs connected to diseases. Finally, the PTMD database was developed with detailed annotations and can be a useful resource for further analyzing the relations between PTMs and human diseases. PTMD is freely accessible at http://ptmd.biocuckoo.org.",2018-08-01 +34975080,"Standardization of an in-house multiplex real-time polymerase chain reaction for the simultaneous detection of Toxoplasma gondii, Rubella virus, cytomegalovirus, herpes simplex Virus 1 and 2, and Treponema pallidum infection among pregnant women.","

Background

An in-house multiplex real-time polymerase chain reaction (PCR) was developed in two cocktails for the identification of six Toxoplasma gondii, Rubella virus, cytomegalovirus, herpes simplex virus (1 and 2), and Treponema pallidum (syphilis) (TORCH-S) agents, which causes congenital infection among pregnant women.

Objective

Standardization and validation of an in-house multiplex real-time PCR assay for the detection of TORCH-S infection.

Methods

This study was conducted from February 2017 to February 2019. Primers specific for T. gondii, Rubella virus, cytomegalovirus, herpes simplex virus (1 and 2), and T. pallidum were designed using Primer3 software (https://bioinfo.ut.ee/primer3-0.4.0/). The primer sequences obtained were subjected to BLAST analysis using BLAST database. Synthetic DNA was obtained to use as positive control templates for all the six TORCH-S agents. The lower limit of the detection was performed using plasmid construct for each virus serially diluted from 10-1 to 10-9.

Results

An in-house multiplex real-time PCR was standardized and validated in two cocktails for TORCH-S agents, cocktail-1 (HSV1, rubella, and T. gondii), and cocktail-2 (HSV2, CMV, and T. pallidum). The lower limit of the detection for HSV1, rubella, and Toxoplasma were 60.7 copies/10 μl input, 76.4 copies/10 μl input, and 34.4 copies/10 μl input and for HSV2, CMV, and T. pallidum were 80.8 copies/10 μl input, 166 copies/10 μl input, and 43.7 copies/10 μl input, respectively.

Conclusion

TORCH-S infection is one of the significant reasons for irregular pregnant outcomes. It is absolutely important to screen TORCH-S infection for women who had the histories of abnormal pregnancies to prevent birth defects and perinatal complications. This multiplex real-time PCR assay provides a rapid, sensitive, and specific technique to detect these six TORCH-S agents.",2021-10-01 +34490883,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines for Perioperative Spine: Preoperative Osteoporosis Assessment.,"

Background

Osteoporosis is a metabolic bone disease that commonly affects the elderly. Degenerative spinal disease that may require surgical intervention is also prevalent in this susceptible population. If undiagnosed or untreated before spine surgery, osteoporosis may result in an increased risk of postoperative adverse events. Nontreatment of osteoporosis preoperatively may be related to a poor understanding of bone physiology, a lack of standardized treatment algorithms, limited cost-effective interventions, and reluctance by spine surgeons to be the primary provider of osteoporosis management.

Objective

The objective of this evidence-based review is to develop guidelines for the preoperative assessment and treatment of osteoporosis in patients undergoing spine surgery.

Methods

A systematic review of the literature was performed using the National Library of Medicine/PubMed database and Embase for studies relevant to preoperative diagnostic studies that predict increased risk of osteoporosis-related postoperative adverse events and whether the preoperative treatment of low bone mineral density (BMD) in patients with osteoporosis improves outcome.

Results

Out of 281 studies, 17 met the inclusion criteria and were included for systematic review. The task force affirmed a Grade B recommendation that preoperative osteoporosis testing with a dual-energy X-ray absorptiometry scan (T-score < -2.5), a computed tomography scan (Hounsfield units <97.9), and serum vitamin D3 level (<20 ng/mL) predict an increased risk of osteoporosis-related adverse events after spine surgery. The task force determined a Grade B recommendation that preoperative osteoporosis treatment with teriparatide increases BMD, induces earlier and more robust fusion, and may improve select patient outcomes. There is insufficient evidence regarding preoperative treatment with bisphosphonates alone and postoperative outcome.

Conclusion

This evidence-based clinical guideline provides a recommendation that patients with suspected osteoporosis undergo preoperative assessment and be appropriately counseled about the risk of postoperative adverse events if osteoporosis is confirmed. In addition, preoperative optimization of BMD with select treatments improves certain patient outcomes.The full guidelines can be accessed at https://www.cns.org/guidelines/browse-guidelines-detail/3-preoperative-osteoporosis-assessment.",2021-10-01 +34670838,"The translatome of neuronal cell bodies, dendrites, and axons. ","To form synaptic connections and store information, neurons continuously remodel their proteomes. The impressive length of dendrites and axons imposes logistical challenges to maintain synaptic proteins at locations remote from the transcription source (the nucleus). The discovery of thousands of messenger RNAs (mRNAs) near synapses suggested that neurons overcome distance and gain autonomy by producing proteins locally. It is not generally known, however, if, how, and when localized mRNAs are translated into protein. To investigate the translational landscape in neuronal subregions, we performed simultaneous RNA sequencing (RNA-seq) and ribosome sequencing (Ribo-seq) from microdissected rodent brain slices to identify and quantify the transcriptome and translatome in cell bodies (somata) as well as dendrites and axons (neuropil). Thousands of transcripts were differentially translated between somatic and synaptic regions, with many scaffold and signaling molecules displaying increased translation levels in the neuropil. Most translational changes between compartments could be accounted for by differences in RNA abundance. Pervasive translational regulation was observed in both somata and neuropil influenced by specific mRNA features (e.g., untranslated region [UTR] length, RNA-binding protein [RBP] motifs, and upstream open reading frames [uORFs]). For over 800 mRNAs, the dominant source of translation was the neuropil. We constructed a searchable and interactive database for exploring mRNA transcripts and their translation levels in the somata and neuropil [MPI Brain Research, The mRNA translation landscape in the synaptic neuropil. https://public.brain.mpg.de/dashapps/localseq/ Accessed 5 October 2021]. Overall, our findings emphasize the substantial contribution of local translation to maintaining synaptic protein levels and indicate that on-site translational control is an important mechanism to control synaptic strength.",2021-10-01 +34490879,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines for Perioperative Spine: Preoperative Pulmonary Evaluation and Optimization.,"

Background

There are no current recommendations for preoperative pulmonary evaluation and management of patients undergoing elective spine surgery.

Objective

The aim of this guideline is to determine preoperative risk factors for perioperative and postoperative pulmonary adverse events and to determine the optimal preoperative evaluation and management of at-risk patients.

Methods

A systematic review of the literature was performed using the National Library of Medicine PubMed database and the Cochrane Library for studies relevant to postoperative pulmonary adverse events in patients undergoing spine surgery. Clinical studies evaluating preoperative patient risk factors and preoperative diagnostic and treatment interventions were selected for review.

Results

The literature search yielded 152 abstracts relevant to the PICO (patient/population, intervention, comparison, and outcomes) questions included in this chapter. The task force selected 65 articles for full-text review, and 24 were selected for inclusion in this systematic review. Twenty-three articles addressed preoperative patient risk factors. One article addressed preoperative diagnostic studies of pulmonary function. There were no studies meeting the inclusion criteria for preoperative pulmonary treatment.

Conclusion

There is substantial evidence for multiple preoperative patient factors that predict an increased risk of a postoperative pulmonary adverse event. Individuals with these risk factors (functional dependence, advanced age [≥65 yr], chronic obstructive pulmonary disease, congestive heart failure, weight loss, and obstructive sleep apnea) who are undergoing spine surgery should be counseled regarding the potential increased risk of a perioperative and postoperative pulmonary adverse events. There is insufficient evidence to support any specific preoperative diagnostic test for predicting the risk of postoperative pulmonary adverse events or any treatment intervention that reduces risk. It is suggested, however, to consider appropriate preoperative pulmonary diagnostic testing and treatment to address active pulmonary symptoms of existing or suspected disease.The full guidelines can be accessed at https://www.cns.org/guidelines/browse-guidelines-detail/5-preoperative-pulmonary-evaluation-optimization.",2021-10-01 +34626475,Food Enzyme Database (FEDA): a web application gathering information about food enzyme preparations available on the European market. ,"Following the European Commission No. 1332/2008 regulation and the consequent necessity of a scientific evaluation of food enzymes (FEs) for their approval for sale on the European Union market, many FE dossiers have been submitted to the European Commission and various documents currently co-exist. In order to centralize all relevant information in one structured location that is easily accessible to support enforcement laboratories and the competent authorities, we developed a web application, called Food Enzyme Database (FEDA). FEDA allows searching and collection of information originating from many different sources in one centralized portal. Queries can be performed using key information types, which include information on the producing company, production source (strain type, genetically modified microorganism status), type of enzyme protein and evaluation status with employed evaluation criteria. The database contains all current publicly available information. Centralizing all information coupled with intuitive searching functionality also allows the generation of general statistics regarding the current market situation. FEDA is open access and is freely available at the following location: https://feda.sciensano.be. Database URL : https://feda.sciensano.be.",2021-10-01 +34490886,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines for Perioperative Spine: Preoperative Surgical Risk Assessment.,"

Background

Patient factors (increased body mass index [BMI], smoking, and diabetes) may impact outcomes after spine surgery. There is a lack of consensus regarding which factors should be screened for and potentially modified preoperatively to optimize outcome.

Objective

The purpose of this evidence-based clinical practice guideline is to determine if preoperative patient factors of diabetes, smoking, and increased BMI impact surgical outcomes.

Methods

A systematic review of the literature for studies relevant to spine surgery was performed using the National Library of Medicine PubMed database and the Cochrane Library. Clinical studies evaluating the impact of diabetes or increased BMI with reoperation and/or surgical site infection (SSI) were selected for review. In addition, the impact of preoperative smoking on patients undergoing spinal fusion was reviewed.

Results

A total of 699 articles met inclusion criteria and 64 were included in the systematic review. In patients with diabetes, a preoperative hemoglobin A1c (HbA1c) >7.5 mg/dL is associated with an increased risk of reoperation or infection after spine surgery. The review noted conflicting studies regarding the relationship between increased BMI and SSI or reoperation. Preoperative smoking is associated with increased risk of reoperation (Grade B). There is insufficient evidence that cessation of smoking before spine surgery decreases the risk of reoperation.

Conclusion

This evidence-based guideline provides a Grade B recommendation that diabetic individuals undergoing spine surgery should have a preoperative HbA1c test before surgery and should be counseled regarding the increased risk of reoperation or infection if the level is >7.5 mg/dL. There is conflicting evidence that BMI correlates with greater SSI rate or reoperation rate (Grade I). Smoking is associated with increased risk of reoperation (Grade B) in patients undergoing spinal fusion.The full guidelines can be accessed at https://www.cns.org/guidelines/browse-guidelines-detail/2-preoperative-surgical-risk-assessement.",2021-10-01 +34490881,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines for Perioperative Spine: Preoperative Opioid Evaluation.,"

Background

Opioid use disorders in the United States have rapidly increased, yet little is known about the relationship between preoperative opioid duration and dose and patient outcomes after spine surgery. Likewise, the utility of preoperative opioid weaning is poorly understood.

Objective

The purpose of this evidence-based clinical practice guideline is to determine if duration and dose of preoperative opioids or preoperative opioid weaning is associated with patient-reported outcomes or adverse events after elective spine surgery for degenerative conditions.

Methods

A systematic review of the literature was performed using the National Library of Medicine/PubMed database and Embase for studies relevant to opioid use among adult patients undergoing spine surgery. Clinical studies evaluating preoperative duration, dose, and opioid weaning and outcomes were selected for review.

Results

A total of 41 of 845 studies met the inclusion criteria and none were Level I evidence. The use of any opioids before surgery was associated with longer postoperative opioid use, and longer duration of opioid use was associated with worse outcomes, such as higher complications, longer length of stay, higher costs, and increased utilization of resources. There is insufficient evidence to support the efficacy of opioid weaning on postoperative opioid use, improving outcome, or reducing adverse events after spine surgery.

Conclusion

This evidence-based clinical guideline provides Grade B recommendations that preoperative opioid use and longer duration of preoperative opioid use are associated with chronic postoperative opioid use and worse outcome after spine surgery. Insufficient evidence supports the efficacy of an opioid wean before spine surgery (Grade I).The full guidelines can be accessed at https://www.cns.org/guidelines/browse-guidelines-detail/1-preoperative-opioid-evaluation.",2021-10-01 +34191783,A Web-Based Deep Learning Model for Automated Diagnosis of Otoscopic Images.,"

Objectives

To develop a multiclass-classifier deep learning model and website for distinguishing tympanic membrane (TM) pathologies based on otoscopic images.

Methods

An otoscopic image database developed by utilizing publicly available online images and open databases was assessed by convolutional neural network (CNN) models including ResNet-50, Inception-V3, Inception-Resnet-V2, and MobileNetV2. Training and testing were conducted with a 75:25 breakdown. Area under the curve of receiver operating characteristics (AUC-ROC), accuracy, sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) were used to compare different CNN models' performances in classifying TM images.

Results

Our database included 400 images, organized into normal (n = 196) and abnormal classes (n = 204), including acute otitis media (n = 116), otitis externa (n = 44), chronic suppurative otitis media (n = 23), and cerumen impaction (n = 21). For binary classification between normal versus abnormal TM, the best performing model had average AUC-ROC of 0.902 (MobileNetV2), followed by 0.745 (Inception-Resnet-V2), 0.731 (ResNet-50), and 0.636 (Inception-V3). Accuracy ranged between 0.73-0.77, sensitivity 0.72-0.88, specificity 0.58-0.84, PPV 0.68-0.81, and NPV 0.73-0.83. Macro-AUC-ROC for MobileNetV2 based multiclass-classifier was 0.91, with accuracy of 66%. Binary and multiclass-classifier models based on MobileNetV2 were loaded onto a publicly accessible and user-friendly website (https://headneckml.com/tympanic). This allows the readership to upload TM images for real-time predictions using the developed algorithms.

Conclusions

Novel CNN algorithms were developed with high AUC-ROCs for differentiating between various TM pathologies. This was further deployed as a proof-of-concept publicly accessible website for real-time predictions.",2021-10-01 +34880704,"Biomass and mortmass of woody vegetation in metal-contaminated areas (Southern Urals, Russia).","

Background

Since the mid-2000s, long-term monitoring of various components of natural ecosystems under conditions of industrial pollution has been carried out in the Southern Urals. As a part of these monitoring programmes, the data on various components of biota in different biotopes, collected with different methods and in different time intervals, continue to be gathered. In addition, data collected through these monitoring programmes can also be used to study the local biodiversity of non-polluted areas.In 2012, in the vicinity of the Karabash Copper Smelter, a study of communities of small mammals was carried out, considering the heterogeneity of their habitats. Within the framework of this project, we presented a detailed description of the state of woody vegetation in the study area.

New information

The dataset (available from the GBIF network at https://www.gbif.org/dataset/61384edd-2d0a-437b-8cf0-ff4d2dfcc0da) includes the results of an assessment of the woody vegetation biomass at seven habitats (pine, birch and floodplain forests, reed swamp, sparse birch stand, marshy meadow and dump of household waste) of areas with different levels of industrial pollution in the vicinities of the Karabash, the Southern Urals. Karabash Copper Smelter (KCS) is one of Russia's most significant point polluters; the main components of its emissions are heavy metals, dust and sulphur dioxide. Parameters of woody vegetation (diameter at breast height, diameter at root collar level and biomass) were estimated for seven forest elements (forest stand, subcanopy (undergrowth and underwood), half-dead tree of a forest stand and four types of coarse woody debris (downed bole, fragment of downed bole, standing dead tree and stump)) at 41 sampling plots (20 at unpolluted and 21 at polluted areas) and 165 subplots (81 and 84, respectively). The dataset includes 411 sampling events (estimation events of the forest elements at sampling plots and subplots), corresponding to 5786 occurrences (estimations of the woody vegetation components) observed during July 2012. For most woody vegetation components (72%), an estimate of the above-ground phytomass is given. For each sampling event, information on the presence or absence of woody vegetation species at the considered habitats is provided (a total of 1479 occurrences with status ""absent""). The dataset can be used for environmental monitoring, sustainable forest management, modelling forest productivity considering global changes, studying the structure and biodiversity of forest cover and assessing forests' carbon-sequestration capacity. In addition, the dataset provides information about different forest ecosystems under the influence of strong industrial pollution.",2021-11-29 +,Folic Acid in Prenatal Supplements: Labeled Amounts Compared to Recommendations (P11-024-19),"Abstract

Objectives

Most prenatal supplements available in the US contain synthetic folic acid. We compared the labeled amounts of folic acid in prenatal supplements with: 1) the Recommended Dietary Allowance (RDA) of 360 mcg and Tolerable Upper Intake Level (UL) of 1000 mcg for pregnant women established by National Academies of Science, Engineering and Medicine's Food and Nutrition Board (FNB) and expressed as synthetic folic acid from supplements and fortified foods; 2) current population-based Daily Values (DV) used for labeling dietary supplements and established by the US Food and Drug Administration (FDA); 3) FDA criteria for making a neural tube defects health claim on prenatal supplement labels; and 4) 2009/2016 recommendations for the prevention of neural tube defects by the US Preventive Services Task Force (USPSTF). In 2016, the FDA revised its DV to 600 mcg DFE folate (360 mcg folic acid) to reflect amounts consistent with the RDAs. This new DV is lower than the pre-2016 DV of 800 mcg from food and supplement sources and the 800 mcg level to make a health claim.

Methods

We reviewed the synthetic folic acid content as declared on prenatal supplement labels sold with and without a prescription, using data in the Dietary Supplements Label Database (DSLD) (website: https://dsld.nlm.nih.gov/dsld/) and DailyMed (website: https://dailymed.nlm.nih.gov/dailymed/index.cfm).

Results

The many recommendations for folate versus folic acid are often unclear (e.g., Dietary Folate Equivalents vs. mcg folic acid). Amounts ≥ 800 mcg folic acid per serving, the prior DV, were present on 99% of 79 prescription and 91% of 121 nonprescription labels reviewed. 94% of the prescription and 16% of nonprescription prenatal supplements were labeled at 1000 mcg per serving, and none (0%) of the prescription and 74% of the nonprescription were labeled at 800 mcg. These labeled amounts (from supplements alone) were higher than the USPSTF recommended daily intake of 400 to 800 mcg and the current DV and RDA values.

Conclusions

The DV, UL, the criterion for making a health claim on prenatal supplement labels, the USPSTF recommendations, and the units used for expressing folate and folic acid recommendations need to be harmonized and clarified.

Funding Sources

Office of Dietary Supplements, NIH.",2019-06-01 +30576482,NBDC RDF portal: a comprehensive repository for semantic data in life sciences. ,"In the life sciences, researchers increasingly want to access multiple databases in an integrated way. However, different databases currently use different formats and vocabularies, hindering the proper integration of heterogeneous life science data. Adopting the Resource Description Framework (RDF) has the potential to address such issues by improving database interoperability, leading to advances in automatic data processing. Based on this idea, we have advised many Japanese database development groups to expose their databases in RDF. To further promote such activities, we have developed an RDF-based life science dataset repository called the National Bioscience Database Center (NBDC) RDF portal. All the datasets in this repository have been reviewed by the NBDC to ensure interoperability and queryability. As of July 2018, the service includes 21 RDF datasets, comprising over 45.5 billion triples. It provides SPARQL endpoints for all datasets, useful metadata and the ability to download RDF files. The NBDC RDF portal can be accessed at https://integbio.jp/rdf/.",2018-01-01 +34601118,G-quadruplexes in genomes of viruses infecting eukaryotes or prokaryotes are under different selection pressures from hosts.,"G-quadruplexes in viral genomes can be applied as the targets of antiviral therapies, which has attracted wide interest. However, it is still not clear whether the pervasive number of such elements in the viral world is the result of natural selection for functionality. In this study, we identified putative quadruplex-forming sequences (PQSs) across the known viral genomes and analyzed the abundance, structural stability, and conservation of viral PQSs. A Viral Putative G-quadruplex Database (http://jsjds.hzau.edu.cn/MBPC/ViPGD/index.php/home/index) was constructed to collect the details of each viral PQS, which provides guidance for selecting the desirable PQS. The PQS with two putative G-tetrads (G2-PQS) was significantly enriched in both eukaryotic viruses and prokaryotic viruses, whereas the PQSs with three putative G-tetrads (G3-PQS) were only enriched in eukaryotic viruses and depleted in prokaryotic viruses. The structural stability of PQSs in prokaryotic viruses was significantly lower than that in eukaryotic viruses. Conservation analysis showed that the G2-PQS, instead of G3-PQS, was highly conserved within the genus. This suggested that the G2-quadruplex might play an important role in viral biology, and the difference in the occurrence of G-quadruplex between eukaryotic viruses and prokaryotic viruses may result from the different selection pressures from hosts.",2021-09-30 +34591511,Individual variation in white matter microstructure is related to better recovery from negative stimuli.,"The uncinate fasciculus is a white matter tract that may facilitate emotion regulation by carrying connections from the prefrontal cortex to regions of the temporal lobe, including the amygdala. Depression and anxiety are associated with reduced uncinate fasciculus fractional anisotropy (FA)-a diffusion tensor imaging measure related to white matter integrity. In the current study, we tested whether FA in the uncinate fasciculus is associated with individual differences in emotional recovery measured with corrugator supercilii electromyography in response to negative, neutral, and positive images in 108 participants from the Midlife in the US (MIDUS; http://midus.wisc.edu) Refresher study. Corrugator activity is linearly associated with changes in affect, and differentiated negative, neutral, and positive emotional responses. Higher uncinate fasciculus FA was associated with lower corrugator activity 4-8 seconds after negative image offset, indicative of better recovery from negative provocation. In an exploratory analysis, we found a similar association for the inferior fronto-occipital, inferior longitudinal and superior longitudinal fasciculi. These results suggest that the microstructural features of the uncinate fasciculus, and these other association white matter fibers, may support emotion regulatory processes with greater white matter integrity facilitating healthier affective functioning. (PsycInfo Database Record (c) 2022 APA, all rights reserved).",2021-09-30 +34660608,Using Immune-Related Long Non-coding Ribonucleic Acids to Develop a Novel Prognosis Signature and Predict the Immune Landscape of Colon Cancer.,"Purpose: This study aimed to construct a novel signature to predict the survival of patients with colon cancer and the associated immune landscape, based on immune-related long noncoding ribonucleic acids (irlncRNAs). Methods: Expression profiles of irlncRNAs in 457 patients with colon cancer were retrieved from the TCGA database (https://portal.gdc.cancer.gov). Differentially expressed (DE) irlncRNAs were identified and irlncRNA pairs were recognized using Lasso regression and Cox regression analyses. Akaike information criterion (AIC) values of receiver operating characteristic (ROC) curve were calculated to identify the ideal cut-off point for dividing patients into two groups and constructing the prognosis signature. Quantitative real-time polymerase chain reaction (qRT-PCR) was performed to validate the expression of LINC02195 and SCARNA9 in colon cancer. Results: We identified 22 irlncRNA pairs and patients were divided into high-risk and low-risk groups based on the calculated risk score using these 22 irlncRNA pairs. The irlncRNA pairs were significantly related to patient survival. Low-risk patients had a significantly longer survival time than high-risk patients (p < 0.001). The area under the curve of the signature to predict 5-year survival was 0.951. The risk score correlated with tumor stage, infiltration depth, lymph node metastasis, and distant metastasis. The risk score remained significant after univariate and multivariate Cox regression analyses. A nomogram model to predict patient survival was developed based on the results of Cox regression analysis. Immune cell infiltration status, expression of some immune checkpoint genes, and sensitivity to chemotherapeutics were also related to the risk score. The results of qRT-PCR revealed that LINC02195 and SCARNA9 were significantly upregulated in colon cancer tissues. Conclusion: The constructed prognosis signature showed remarkable efficiency in predicting patient survival, immune cell infiltration status, expression of immune checkpoint genes, and sensitivity to chemotherapeutics.",2021-09-30 +34850851,Commentary on: Screening of immunosuppressive cells from colorectal adenocarcinoma and identification of prognostic markers. ,"Colorectal adenocarcinoma (COAD) is one subtype of colorectal carcinoma (CRC), whose development is associated with genetics, inappropriate immune response, and environmental factors. Although significant advances have been made in the treatment of COAD, the mortality rate remains high. It is a pressing need to explore novel therapeutic targets of COAD. Available evidence indicated that immune cell infiltration was correlated with cancer prognosis. To reveal the roles of immune cells in the COAD prognosis, a study published in Bioscience Reports by Li et al. (Bioscience Reports (2021) 41, https://doi.org/10.1042/BSR20203496) analyzed data from The Cancer Genome Atlas (TCGA) and Gene Expression Omnibus (GEO) dataset. It demonstrated a beneficial effect of Th17 cells in COAD prognosis. In addition, six hub genes (KRT23, ULBP2, ASRGL1, SERPINA1, SCIN, and SLC28A2) were identified to correlate with Th17 cells and COAD prognosis, suggesting one new therapy strategy and some predictive biomarkers of COAD. These findings reported by Li et al. may pave one way to explore the molecular mechanism of COAD further.",2021-12-01 +34812122,Internet access and partnership formation in the United States.,"The Internet has fundamentally altered how we communicate and access information and who we can interact with. However, the implications of Internet access for partnership formation are theoretically ambiguous. We examine their association using data from the National Longitudinal Survey of Youth (NLSY97) and Current Population Survey (CPS) in the United States. We find that the relationship between Internet access and partnership states (in the NLSY97) or partnership status (in the CPS) is age-dependent. While negative at the youngest adult ages, the association becomes positive as individuals reach their mid- to late 20s, for both same-sex and different-sex partnerships. The results suggest that Internet access is positively associated with union formation when individuals enter the stage in the young adult life course when they feel ready to commit to a long-term partnership. Our study contributes to a growing literature that highlights the implications of digital technologies for demographic processes.Supplementary material for this article is available at: https://doi.org/10.1080/00324728.2021.1999485.",2021-11-23 +34814339,Predicting S-nitrosylation proteins and sites by fusing multiple features.,"Protein S-nitrosylation is one of the most important post-translational modifications, a well-grounded understanding of S-nitrosylation is very significant since it plays a key role in a variety of biological processes. For an uncharacterized protein sequence, it is a very meaningful problem for both basic research and drug development when we can firstly identify whether it is a S-nitrosylation protein or not, and then predict the specific S-nitrosylation site(s). This work has proposed two models for identifying S-nitrosylation protein and its PTM sites. Firstly, three kinds of features are extracted from protein sequence: KNN scoring of functional domain annotation, PseAAC and bag-of-words based on the physical and chemical properties of amino acids. Secondly, the synthetic minority oversampling technique is used to balance the data sets, and some state-of-the-art classifiers and feature fusion strategies are performed on the balanced data sets. In the five-fold cross-validation for predicting S-nitrosylation proteins, the results of Accuracy (ACC), Matthew's correlation coefficient (MCC) and area under ROC curve (AUC) are 81.84%, 0.5178, 0.8635, respectively. Finally, a model for predicting S-nitrosylation sites has been constructed on the basis of tripeptide composition (TPC) and the composition of k-spaced amino acid pairs (CKSAAP). To eliminate redundant information and improve work efficiency, elastic nets are employed for feature selection. The five-fold cross-validation tests have indicated the promising success rates of the proposed model. For the convenience of related researchers, the web-server named ""RF-SNOPS"" has been established at http://www.jci-bioinfo.cn/RF-SNOPS.",2021-10-01 +34516309,A nine-hub-gene signature of metabolic syndrome identified using machine learning algorithms and integrated bioinformatics.,"Early risk assessments and interventions for metabolic syndrome (MetS) are limited because of a lack of effective biomarkers. In the present study, several candidate genes were selected as a blood-based transcriptomic signature for MetS. We collected so far the largest MetS-associated peripheral blood high-throughput transcriptomics data and put forward a novel feature selection strategy by combining weighted gene co-expression network analysis, protein-protein interaction network analysis, LASSO regression and random forest approaches. Two gene modules and 51 hub genes as well as a 9-hub-gene signature associated with metabolic syndrome were identified. Then, based on this 9-hub-gene signature, we performed logistic analysis and subsequently established a web nomogram calculator for metabolic syndrome risk (https://xjtulgz.shinyapps.io/DynNomapp/). This 9-hub-gene signature showed excellent classification and calibration performance (AUC = 0.968 in training set, AUC = 0.883 in internal validation set, AUC = 0.861 in external validation set) as well as ideal potential clinical benefit.",2021-12-01 +34738868,Adenylated proteins in mouse B16-F10 melanoma cells cluster in functional categories: a new paradigm for cellular regulation?,"In mammals, AMPylation of cellular proteins is carried out by Huntingtin yeast-interacting protein E, and pseudokinase SelO. Lysates from mouse B16-F10 melanoma cells have been fractionated by immuno-precipitation using magnetic Dynabeads coated with antibodies against both adenosine 5'-monophosphate in phosphate ester linkage to tyrosine, and adenosine-phosphate. Proteins pulled down with both these antibodies were subject to post-translational modification, most likely AMPylation. Using tandem mass spectrometry, analysis of these protein fractions identified 333 proteins that could be pulled down by both antibodies. Many of these proteins clustered in 13 functional Ingenuity Pathway Analysis categories of 4 or more adenylated proteins including some from the cytoskeleton, and some involved with initiating the unfolded protein response.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1995608 .",2021-11-05 +30668832,DASHR 2.0: integrated database of human small non-coding RNA genes and mature products.,"

Motivation

Small non-coding RNAs (sncRNAs, <100 nts) are highly abundant RNAs that regulate diverse and often tissue-specific cellular processes by associating with transcription factor complexes or binding to mRNAs. While thousands of sncRNA genes exist in the human genome, no single resource provides searchable, unified annotation, expression and processing information for full sncRNA transcripts and mature RNA products derived from these larger RNAs.

Results

Our goal is to establish a complete catalog of annotation, expression, processing, conservation, tissue-specificity and other biological features for all human sncRNA genes and mature products derived from all major RNA classes. DASHR (Database of small human non-coding RNAs) v2.0 database is the first that integrates human sncRNA gene and mature products profiles obtained from multiple RNA-seq protocols. Altogether, 185 tissues/cell types and sncRNA annotations and >800 curated experiments from ENCODE and GEO/SRA across multiple RNA-seq protocols for both GRCh38/hg38 and GRCh37/hg19 assemblies are integrated in DASHR. Moreover, DASHR is the first to contain both known and novel, previously un-annotated sncRNA loci identified by unsupervised segmentation (13 times more loci with 1 678 800 total). Additionally, DASHR v2.0 adds >3 200 000 annotations for non-small RNA genes and other genomic features (long-noncoding RNAs, mRNAs, promoters, repeats). Furthermore, DASHR v2.0 introduces an enhanced user interface, interactive experiment-by-locus table view, sncRNA locus sorting and filtering by biological features. All annotation and expression information directly downloadable and accessible as UCSC genome browser tracks.

Availability and implementation

DASHR v2.0 is freely available at https://lisanwanglab.org/DASHRv2.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +32661237,"GlobalFungi, a global database of fungal occurrences from high-throughput-sequencing metabarcoding studies.","Fungi are key players in vital ecosystem services, spanning carbon cycling, decomposition, symbiotic associations with cultivated and wild plants and pathogenicity. The high importance of fungi in ecosystem processes contrasts with the incompleteness of our understanding of the patterns of fungal biogeography and the environmental factors that drive those patterns. To reduce this gap of knowledge, we collected and validated data published on the composition of soil fungal communities in terrestrial environments including soil and plant-associated habitats and made them publicly accessible through a user interface at https://globalfungi.com . The GlobalFungi database contains over 600 million observations of fungal sequences across > 17 000 samples with geographical locations and additional metadata contained in 178 original studies with millions of unique nucleotide sequences (sequence variants) of the fungal internal transcribed spacers (ITS) 1 and 2 representing fungal species and genera. The study represents the most comprehensive atlas of global fungal distribution, and it is framed in such a way that third-party data addition is possible.",2020-07-13 +34019646,OmicsAnalyst: a comprehensive web-based platform for visual analytics of multi-omics data.,"Data analysis and interpretation remain a critical bottleneck in current multi-omics studies. Here, we introduce OmicsAnalyst, a user-friendly, web-based platform that allows users to perform a wide range of well-established data-driven approaches for multi-omics integration, and visually explore their results in a clear and meaningful manner. To help navigate complex landscapes of multi-omics analysis, these approaches are organized into three visual analytics tracks: (i) the correlation network analysis track, where users choose among univariate and multivariate methods to identify important features and explore their relationships in 2D or 3D networks; (ii) the cluster heatmap analysis track, where users apply several cutting-edge multi-view clustering algorithms and explore their results via interactive heatmaps; and (iii) the dimension reduction analysis track, where users choose among several recent multivariate techniques to reveal global data structures, and explore corresponding scores, loadings and biplots in interactive 3D scatter plots. The three visual analytics tracks are equipped with comprehensive options for parameter customization, view customization and targeted analysis. OmicsAnalyst lowers the access barriers to many well-established methods for multi-omics integration via novel visual analytics. It is freely available at https://www.omicsanalyst.ca.",2021-07-01 +33956157,Mol* Viewer: modern web app for 3D visualization and analysis of large biomolecular structures.,"Large biomolecular structures are being determined experimentally on a daily basis using established techniques such as crystallography and electron microscopy. In addition, emerging integrative or hybrid methods (I/HM) are producing structural models of huge macromolecular machines and assemblies, sometimes containing 100s of millions of non-hydrogen atoms. The performance requirements for visualization and analysis tools delivering these data are increasing rapidly. Significant progress in developing online, web-native three-dimensional (3D) visualization tools was previously accomplished with the introduction of the LiteMol suite and NGL Viewers. Thereafter, Mol* development was jointly initiated by PDBe and RCSB PDB to combine and build on the strengths of LiteMol (developed by PDBe) and NGL (developed by RCSB PDB). The web-native Mol* Viewer enables 3D visualization and streaming of macromolecular coordinate and experimental data, together with capabilities for displaying structure quality, functional, or biological context annotations. High-performance graphics and data management allows users to simultaneously visualise up to hundreds of (superimposed) protein structures, stream molecular dynamics simulation trajectories, render cell-level models, or display huge I/HM structures. It is the primary 3D structure viewer used by PDBe and RCSB PDB. It can be easily integrated into third-party services. Mol* Viewer is open source and freely available at https://molstar.org/.",2021-07-01 +34673997,[Outpatient urology in Europe].,"In the care for patients with urological diseases, outpatient urology secures a near-to-home treatment by specialists in urology and is located between general practitioner and urological clinic. Comparably little is known about the structure and fields of work in this area of urology. A survey of the EAU Section ESUO of outpatient and office urology ( https://uroweb.org/section/esuo/ ) shows the diversity in terms of content and organisation of this sector in Europe, in which more than 16,500 outpatient urologists and thus about half of all professional urologists work full-time. This diversity is related to the diagnostic and therapeutic methods in outpatient urology and to the working conditions of outpatient urologists. For comparison, this information about European countries is contrasted with data from the German office urology as one type of outpatient urology.",2021-10-21 +30357403,EncoMPASS: an online database for analyzing structure and symmetry in membrane proteins.,"The EncoMPASS online database (http://encompass.ninds.nih.gov) collects, organizes, and presents information about membrane proteins of known structure, emphasizing their structural similarities as well as their quaternary and internal symmetries. Unlike, e.g. SCOP, the EncoMPASS database does not aim for a strict classification of membrane proteins, but instead is organized as a protein chain-centric network of sequence and structural homologues. The online server for the EncoMPASS database provides tools for comparing the structural features of its entries, making it a useful resource for homology modeling and active site identification studies. The database can also be used for inferring functionality, which for membrane proteins often involves symmetry-related mechanisms. To this end, the online database also provides a comprehensive description of both the quaternary and internal symmetries in known membrane protein structures, with a particular focus on their orientation relative to the membrane.",2019-01-01 +,Payment and Delivery System Innovations : Patient‐reported Outcomes from the Comprehensive Care for Joint Replacement Evaluation,"

Research Objective

The Comprehensive Care for Joint Replacement (CJR) model is intended to encourage participant hospitals to reduce Medicare payments by coordinating care with the physicians, postacute care (PAC) providers, and other providers involved in an episode of care for a lower extremity joint replacement (LEJR), which comprises the surgery plus the services provided in the 90 days after hospital discharge. Although participant hospitals are incentivized to improve or maintain quality of care, reductions in institutional PAC attributable to the model could have adverse effects on patient recovery and care experiences.1 We surveyed Medicare fee‐for‐service (FFS) beneficiaries a few months after LEJR surgery to document their self‐reported functional status, pain, satisfaction with care management and overall recovery, care transitions, and dependence on caregivers for help with activities of daily living (ADLs). These patient‐reported outcomes are important indicators of quality that cannot be measured using secondary data.

Study Design

From 171 metropolitan statistical areas (MSAs) that met CJR eligibility criteria, the Centers for Medicare & Medicaid Services (CMS) randomly selected 67 for CJR and 104 for the control group. Participation in CJR was mandatory for all hospitals in the 67 selected MSAs during the time covered by our analysis. We surveyed a stratified random sample of Medicare FFS beneficiaries who had LEJR surgery in CJR hospitals and a matched group of beneficiaries who had surgery in control hospitals. Patients received the survey approximately 90 to 120 days after hospital discharge. We estimated risk‐adjusted differences between CJR and control respondents on all outcomes.

Population Studied

Medicare FFS beneficiaries who had LEJR surgery in March, April, August, or September 2017 were sampled from the CJR intervention group (7,604 beneficiaries) and the control group (7,188 beneficiaries). We oversampled patients with hip fractures to assess results for beneficiaries who may be most sensitive to care changes made by hospitals in response to CJR model incentives. Response rates for the survey were similar for the CJR and control groups (70.7% and 71.4%, respectively).

Principal Findings

The CJR model did not have a significant impact on patient‐reported functional status, pain, satisfaction with care management and overall recovery, and care transitions. The only significant difference was that CJR respondents reported needing more caregiver help putting on or taking off clothes after returning home than did control respondents. On a 100‐point scale, the difference was ‐2.3 points (P < .01). All measures were similar for CJR and control respondents with hip fractures.

Conclusions

The CJR model resulted in a small increase in reported caregiver help needed after patients returned home. Functional status and satisfaction with care and recovery roughly 90 to 120 days after hospital discharge, however, were not affected by the model, indicating that concerns about dependence on caregivers did not translate to worse functional recovery or satisfaction with care. Implications for Policy or Practice: Other evaluation results show that CJR reduced Medicare payments by reducing institutional PAC.1 Despite lower use in institutional PAC, the model did not affect patient‐reported outcomes and satisfaction. [1] https://innovation.cms.gov/Files/reports/cjr-secondannrpt.pdf

Primary Funding Source

Centers for Medicare and Medicaid Services.",2020-08-01 +31292921,Core transcriptional signatures of phase change in the migratory locust.,"Phenotypic plasticity plays fundamental roles in successful adaptation of animals in response to environmental variations. Here, to reveal the transcriptome reprogramming in locust phase change, a typical phenotypic plasticity, we conducted a comprehensive analysis of multiple phase-related transcriptomic datasets of the migratory locust. We defined PhaseCore genes according to their contribution to phase differentiation by the adjustment for confounding principal components analysis algorithm (AC-PCA). Compared with other genes, PhaseCore genes predicted phase status with over 87.5% accuracy and displayed more unique gene attributes including the faster evolution rate, higher CpG content and higher specific expression level. Then, we identified 20 transcription factors (TFs) named PhaseCoreTF genes that are associated with the regulation of PhaseCore genes. Finally, we experimentally verified the regulatory roles of three representative TFs (Hr4, Hr46, and grh) in phase change by RNAi. Our findings revealed that core transcriptional signatures are involved in the global regulation of locust phase changes, suggesting a potential common mechanism underlying phenotypic plasticity in insects. The expression and network data are accessible in an online resource called LocustMine (http://www.locustmine.org:8080/locustmine).",2019-07-10 +34726854,[Study of the functional significance of polymorphic loci of the LOXL1 gene associated with glaucoma according to genome-wide studies (in silico analysis)].,"Glaucoma is one of the most common eye diseases leading to blindness, and whole-genome studies have shown that genetic factors are important in its formation.Purpose - to perform an in silico analysis of the functional significance of polymorphic loci of the LOXL1 gene associated with glaucoma, using data from wholegenome studies.

Material and methods

Using the catalog of genome-wide studies (GWAS) of the National Human Genome Research Institute (https://www.genome.gov/gwastudies/), three polymorphic loci of the LOXL1 gene (rs2165241, rs4886776, rs893818) associated with glaucoma (pseudoexfoliation glaucoma/syndrome) were chosen for the study. Using modern databases on functional genomics (SIFT, PolyPhen-2, HaploReg, GTExportal), the functional significance of these polymorphic loci was assessed (nonsynonymous substitutions, epigenetic effects, association with gene expression, associations with alternative splicing of gene transcripts).

Results

The work establishes the important functional significance of the rs2165241, rs4886776 and rs893818 polymorphic loci of the LOXL1 gene. They demonstrate significant epigenetic effects (affect the affinity to five transcription factors, are located in the region of promoters and enhancers, in the region of hypersensitivity to DNase-1), are associated with the expression and alternative splicing of three genes (LOXL1, LOXL1-AS1, RP11-24D15.1) in cell cultures, organs and tissues pathogenetically significant for development of glaucoma, are strongly linked to the rs1048661 polymorphism, which causes the replacement of the Arg141Leu amino acid in the LOXL1 polypeptide.

Conclusion

Polymorphic loci of the LOXL1 gene (rs2165241, rs4886776, and rs893818) are of great functional importance (epigenetic, eQTL, and sQTL), which may be the biomedical basis of their associations with glaucoma.",2021-01-01 +35098124,Evaluation of COVID-19 Restrictions on Distance Runners' Training Habits Using Wearable Trackers.,"The COVID-19 pandemic caused widespread disruption to many individuals' lifestyles. Social distancing restrictions implemented during this global pandemic may bring potential impact on physical activity habits of the general population. However, running is one of the most popular forms of physical activity worldwide and one in which it could be maintained even during most COVID-19 restrictions. We aimed to determine the impact of COVID-19 restrictions on runners' training habits through analyzing the training records obtained from their GPS enabled wearable trackers. Retrospective and prospective data were collected from an online database (https://wetrac.ucalgary.ca). Runners' training habits, including frequency, intensity and duration of training, weekly mileage and running locations were analyzed and compared 9 months before and after the start of COVID-19 restrictions in March 2020. We found that runners ran 3 km per week more (p = 0.05, Cohen's d = 0.12) after the start of COVID-19 restrictions, and added 0.3 training sessions per week (p = 0.03, Cohen's d = 0.14). Moreover, runners ran an additional 0.4 sessions outdoors (p < 0.01, Cohen's d = 0.21) but there was no significant change in the intensity or duration of training sessions. Our findings suggested that runners adopted slightly different training regimen as a result of COVID-19 restrictions. Our results described the collective changes, irrespective of differences in response measures adopted by various countries or cities during the COVID-19 pandemic.",2021-01-01 +34908131,InDeep: 3D fully convolutional neural networks to assist in silico drug design on protein-protein interactions. ,"Protein-protein interactions (PPIs) are key elements in numerous biological pathways and the subject of a growing number of drug discovery projects including against infectious diseases. Designing drugs on PPI targets remains a difficult task and requires extensive efforts to qualify a given interaction as an eligible target. To this end, besides the evident need to determine the role of PPIs in disease-associated pathways and their experimental characterization as therapeutics targets, prediction of their capacity to be bound by other protein partners or modulated by future drugs is of primary importance. We present InDeep, a tool for predicting functional binding sites within proteins that could either host protein epitopes or future drugs. Leveraging deep learning on a curated data set of PPIs, this tool can proceed to enhanced functional binding site predictions either on experimental structures or along molecular dynamics trajectories. The benchmark of InDeep demonstrates that our tool outperforms state of the art ligandable binding sites predictors when assessing PPI targets but also conventional targets. This offers new opportunities to assist drug design projects on PPIs by identifying pertinent binding pockets at or in the vicinity of PPI interfaces. The tool is available on GitLab at https://gitlab.pasteur.fr/InDeep/InDeep.",2021-12-15 +34712007,International Harmonization of Nomenclature and Diagnostic Criteria (INHAND): Nonproliferative and Proliferative Lesions of the Rabbit.,"The INHAND (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions Project (www.toxpath.org/inhand.asp) is a joint initiative of the Societies of Toxicologic Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP) and North America (STP) to develop an internationally accepted nomenclature for proliferative and non-proliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature for classifying microscopic lesions observed in most tissues and organs from the laboratory rabbit used in nonclinical safety studies. Some of the lesions are illustrated by color photomicrographs. The standardized nomenclature presented in this document is also available electronically on the internet (http://www.goreni.org/). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous lesions as well as lesions induced by exposure to test materials. Relevant infectious and parasitic lesions are included as well. A widely accepted and utilized international harmonization of nomenclature for lesions in laboratory animals will provide a common language among regulatory and scientific research organizations in different countries and increase and enrich international exchanges of information among toxicologists and pathologists.",2021-09-28 +34712008,International Harmonization of Nomenclature and Diagnostic Criteria (INHAND): Non-proliferative and Proliferative Lesions of the Non-human Primate (M. fascicularis).,"The INHAND (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions Project (www.toxpath.org/inhand.asp) is a joint initiative of the Societies of Toxicologic Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP) and North America (STP) to develop an internationally accepted nomenclature for proliferative and nonproliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature for classifying microscopic lesions observed in most tissues and organs from the nonhuman primate used in nonclinical safety studies. Some of the lesions are illustrated by color photomicrographs. The standardized nomenclature presented in this document is also available electronically on the internet (http://www.goreni.org/). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous lesions as well as lesions induced by exposure to test materials. Relevant infectious and parasitic lesions are included as well. A widely accepted and utilized international harmonization of nomenclature for lesions in laboratory animals will provide a common language among regulatory and scientific research organizations in different countries and increase and enrich international exchanges of information among toxicologists and pathologists.",2021-09-28 +33722161,Data mining patented antibody sequences.,"The patent literature should reflect the past 30 years of engineering efforts directed toward developing monoclonal antibody therapeutics. Such information is potentially valuable for rational antibody design. Patents, however, are designed not to convey scientific knowledge, but to provide legal protection. It is not obvious whether antibody information from patent documents, such as antibody sequences, is useful in conveying engineering know-how, rather than as a legal reference only. To assess the utility of patent data for therapeutic antibody engineering, we quantified the amount of antibody sequences in patents destined for medicinal purposes and how well they reflect the primary sequences of therapeutic antibodies in clinical use. We identified 16,526 patent families covering major jurisdictions (e.g., US Patent and Trademark Office (USPTO) and World Intellectual Property Organization) that contained antibody sequences. These families held 245,109 unique antibody chains (135,397 heavy chains and 109,712 light chains) that we compiled in our Patented Antibody Database (PAD, http://naturalantibody.com/pad). We find that antibodies make up a non-trivial proportion of all patent amino acid sequence depositions (e.g., 11% of USPTO Full Text database). Our analysis of the 16,526 families demonstrates that the volume of patent documents with antibody sequences is growing, with the majority of documents classified as containing antibodies for medicinal purposes. We further studied the 245,109 antibody chains from patent literature to reveal that they very well reflect the primary sequences of antibody therapeutics in clinical use. This suggests that the patent literature could serve as a reference for previous engineering efforts to improve rational antibody design.",2021-01-01 +32399202,"Visualize omics data on networks with Omics Visualizer, a Cytoscape App.","Cytoscape is an open-source software used to analyze and visualize biological networks. In addition to being able to import networks from a variety of sources, Cytoscape allows users to import tabular node data and visualize it onto networks. Unfortunately, such data tables can only contain one row of data per node, whereas omics data often have multiple rows for the same gene or protein, representing different post-translational modification sites, peptides, splice isoforms, or conditions. Here, we present a new app, Omics Visualizer, that allows users to import data tables with several rows referring to the same node, connect them to one or more networks, and visualize the connected data onto networks. Omics Visualizer uses the Cytoscape enhancedGraphics app to show the data either in the nodes (pie visualization) or around the nodes (donut visualization), where the colors of the slices represent the imported values. If the user does not provide a network, the app can retrieve one from the STRING database using the Cytoscape stringApp. The Omics Visualizer app is freely available at https://apps.cytoscape.org/apps/omicsvisualizer.",2020-02-28 +34235432,"PIP-SNP: a pipeline for processing SNP data featured as linkage disequilibrium bin mapping, genotype imputing and marker synthesizing.","Genome-wide association study data analyses often face two significant challenges: (i) high dimensionality of single-nucleotide polymorphism (SNP) genotypes and (ii) imputation of missing values. SNPs are not independent due to physical linkage and natural selection. The correlation of nearby SNPs is known as linkage disequilibrium (LD), which can be used for LD conceptual SNP bin mapping, missing genotype inferencing and SNP dimension reduction. We used a stochastic process to describe the SNP signals and proposed two types of autocorrelations to measure nearby SNPs' information redundancy. Based on the calculated autocorrelation coefficients, we constructed LD bins. We adopted a k-nearest neighbors algorithm (kNN) to impute the missing genotypes. We proposed several novel methods to find the optimal synthetic marker to represent the SNP bin. We also proposed methods to evaluate the information loss or information conservation between using the original genome-wide markers and using dimension-reduced synthetic markers. Our performance assessments on the real-life SNP data from a rice recombinant inbred line (RIL) population and a rice HapMap project show that the new methods produce satisfactory results. We implemented these functional modules in C/C++ and streamlined them into a web-based pipeline named PIP-SNP (https://bioinfo.noble.org/PIP_SNP/) for processing SNP data.",2021-07-05 +32620933,Probabilistic identification of saccharide moieties in biomolecules and their protein complexes.,"The chemical composition of saccharide complexes underlies their biomedical activities as biomarkers for cardiometabolic disease, various types of cancer, and other conditions. However, because these molecules may undergo major structural modifications, distinguishing between compounds of saccharide and non-saccharide origin becomes a challenging computational problem that hinders the aggregation of information about their bioactive moieties. We have developed an algorithm and software package called ""Cheminformatics Tool for Probabilistic Identification of Carbohydrates"" (CTPIC) that analyzes the covalent structure of a compound to yield a probabilistic measure for distinguishing saccharides and saccharide-derivatives from non-saccharides. CTPIC analysis of the RCSB Ligand Expo (database of small molecules found to bind proteins in the Protein Data Bank) led to a substantial increase in the number of ligands characterized as saccharides. CTPIC analysis of Protein Data Bank identified 7.7% of the proteins as saccharide-binding. CTPIC is freely available as a webservice at (http://ctpic.nmrfam.wisc.edu).",2020-07-03 +33850977,Standardized method for material flow data collection at city level.,"The collection of material flow data is the first step in the evaluation of the circular economy performance and material metabolism at the city level. However, Chinese statistical data are published by Chinese National Bureau of Statistics, and provincial and municipal Bureau of Statistics. This resulted in data being scattered in dispersed sources and varying between cities, even brings about mistakes. Therefore, we established a standardized data collection and accounting method with regular data sources for Chinese cities. In this data collection method, material flow accounting mainly consists of three parts: direct material input, material recycling, and waste disposal. It covers four types of materials, including fossil fuels, biomass, metal minerals, and non-metallic minerals with 155 items. We combined the data sources for the material flow accounting within a standardized Excel spreadsheet with detailed information on statistical data sources and equations to convert the information into material flow data. The statistical data were derived from the China City Statistical Yearbook, the provincial Statistic Yearbook, and the city's statistical yearbook. The estimated data in the material accounting were obtained by converting statistical data using relative coefficients. According to the main sources and features of materials use, the intersectoral material flows can also be estimated following this standardized method for urban metabolism analysis, circular economy performance evaluation, and ecological network analysis. The standardized method for material flow data collection was adopted in the article ""H. Gao, X. Tian, Y. Zhang, L. Shi, F. Shi, 2021. Evaluating circular economy performance based on ecological network analysis: A framework and application at city level. 105257. Resources, Conservation & Recycling. https://doi.org/10.1016/j.resconrec.2020.105257"".",2021-02-24 +31035717,Conserved Secondary Structures in Viral mRNAs. ,"RNA secondary structure in untranslated and protein coding regions has been shown to play an important role in regulatory processes and the viral replication cycle. While structures in non-coding regions have been investigated extensively, a thorough overview of the structural repertoire of protein coding mRNAs, especially for viruses, is lacking. Secondary structure prediction of large molecules, such as long mRNAs remains a challenging task, as the contingent of structures a sequence can theoretically fold into grows exponentially with sequence length. We applied a structure prediction pipeline to Viral Orthologous Groups that first identifies the local boundaries of potentially structured regions and subsequently predicts their functional importance. Using this procedure, the orthologous groups were split into structurally homogenous subgroups, which we call subVOGs. This is the first compilation of potentially functional conserved RNA structures in viral coding regions, covering the complete RefSeq viral database. We were able to recover structural elements from previous studies and discovered a variety of novel structured regions. The subVOGs are available through our web resource RNASIV (RNA structure in viruses; http://rnasiv.bio.wzw.tum.de).",2019-04-29 +30456247,Data describing the eco-physiological responses of twenty-four sunflower genotypes to water deficit.,"This article presents experimental data describing the physiology and morphology of sunflower plants subjected to water deficit. Twenty-four sunflower genotypes were selected to represent genetic diversity within cultivated sunflower and included both inbred lines and their hybrids. Drought stress was applied to plants in pots at the vegetative stage using the high-throughput phenotyping platform Heliaphen at INRA Toulouse (France). Here, we provide data including specific leaf area, osmotic potential and adjustment, carbon isotope discrimination, leaf transpiration, plant architecture: plant height, leaf number, stem diameter. We also provide leaf areas of individual organs through time and growth rate during the stress period, environmental data such as temperatures, wind and radiation during the experiment. These data differentiate both treatment and the different genotypes and constitute a valuable resource to the community to study adaptation of crops to drought and the physiological basis of heterosis. It is available on the following repository: https://doi.org/10.25794/phenotype/er6lPW7V.",2018-10-18 +32699555,Deep learning methods improve linear B-cell epitope prediction.,"

Background

B-cell epitopes play important roles in vaccine design, clinical diagnosis, and antibody production. Although some models have been developed to predict linear or conformational B-cell epitopes, their performance is still unsatisfactory. Hundreds of thousands of linear B-cell epitope data have accumulated in the Immune Epitope Database (IEDB). These data can be explored using the deep learning methods, in order to create better predictive models for linear B-cell epitopes.

Results

After data cleaning, we obtained 240,563 peptide samples with experimental evidence from the IEDB database, including 25,884 linear B-cell epitopes and 214,679 non-epitopes. Based on the peptide center, we adapted each peptide to the same length by trimming or extending. A random portion of the data, with the same amount of epitopes and non-epitopes, were set aside as test dataset. Then a same number of epitopes and non-epitopes were randomly selected from the remaining data to build a classifier with the feedforward deep neural network. We built eleven classifiers to form an ensemble prediction model. The model will report a peptide as an epitope if it was classified as epitope by all eleven classifiers. Then we used the test data set to evaluate the performance of the model using the area value under the receiver operating characteristic (ROC) curve (AUC) as an indicator. We established 40 models to predict linear B-cell epitopes of length from 11 to 50 separately, and found that the AUC value increased with the length and tended to be stable when the length was 38. Repeated results showed that the models constructed by this method were robust. Tested on our and two public test datasets, our models outperformed current major models available.

Conclusions

We applied the feedforward deep neural network to the large amount of linear B-cell epitope data with experimental evidence in the IEDB database, and constructed ensemble prediction models with better performance than the current major models available. We named the models as DLBEpitope and provided web services using the models at http://ccb1.bmi.ac.cn:81/dlbepitope/.",2020-04-17 +34579792,How can SHAP values help to shape metabolic stability of chemical compounds?,"

Background

Computational methods support nowadays each stage of drug design campaigns. They assist not only in the process of identification of new active compounds towards particular biological target, but also help in the evaluation and optimization of their physicochemical and pharmacokinetic properties. Such features are not less important in terms of the possible turn of a compound into a future drug than its desired affinity profile towards considered proteins. In the study, we focus on metabolic stability, which determines the time that the compound can act in the organism and play its role as a drug. Due to great complexity of xenobiotic transformation pathways in the living organisms, evaluation and optimization of metabolic stability remains a big challenge.

Results

Here, we present a novel methodology for the evaluation and analysis of structural features influencing metabolic stability. To this end, we use a well-established explainability method called SHAP. We built several predictive models and analyse their predictions with the SHAP values to reveal how particular compound substructures influence the model's prediction. The method can be widely applied by users thanks to the web service, which accompanies the article. It allows a detailed analysis of SHAP values obtained for compounds from the ChEMBL database, as well as their determination and analysis for any compound submitted by a user. Moreover, the service enables manual analysis of the possible structural modifications via the provision of analogous analysis for the most similar compound from the ChEMBL dataset.

Conclusions

To our knowledge, this is the first attempt to employ SHAP to reveal which substructural features are utilized by machine learning models when evaluating compound metabolic stability. The accompanying web service for metabolic stability evaluation can be of great help for medicinal chemists. Its significant usefulness is related not only to the possibility of assessing compound stability, but also to the provision of information about substructures influencing this parameter. It can assist in the design of new ligands with improved metabolic stability, helping in the detection of privileged and unfavourable chemical moieties during stability optimization. The tool is available at https://metstab-shap.matinf.uj.edu.pl/ .",2021-09-27 +34798322,Accurate Identification of the Trabecular Meshwork under Gonioscopic View in Real Time Using Deep Learning.,"

Purpose

Accurate identification of iridocorneal structures on gonioscopy is difficult to master, and errors can lead to grave surgical complications. This study aimed to develop and train convolutional neural networks (CNNs) to accurately identify the trabecular meshwork (TM) in gonioscopic videos in real time for eventual clinical integrations.

Design

Cross-sectional study.

Participants

Adult patients with open angle were identified in academic glaucoma clinics in both Taipei, Taiwan, and Irvine, California.

Methods

Neural Encoder-Decoder CNNs (U-nets) were trained to predict a curve marking the TM using an expert-annotated data set of 378 gonioscopy images. The model was trained and evaluated with stratified cross-validation grouped by patients to ensure uncorrelated training and testing sets, as well as on a separate test set and 3 intraoperative gonioscopic videos of ab interno trabeculotomy with Trabectome (totaling 90 seconds long, 30 frames per second). We also evaluated our model's performance by comparing its accuracy against ophthalmologists.

Main outcome measures

Successful development of real-time-capable CNNs that are accurate in predicting and marking the TM's position in video frames of gonioscopic views. Models were evaluated in comparison with human expert annotations of static images and video data.

Results

The best CNN model produced test set predictions with a median deviation of 0.8% of the video frame's height (15.25 μm) from the human experts' annotations. This error is less than the average vertical height of the TM. The worst test frame prediction of this model had an average deviation of 4% of the frame height (76.28 μm), which is still considered a successful prediction. When challenged with unseen images, the CNN model scored greater than 2 standard deviations above the mean performance of the surveyed general ophthalmologists.

Conclusions

Our CNN model can identify the TM in gonioscopy videos in real time with remarkable accuracy, allowing it to be used in connection with a video camera intraoperatively. This model can have applications in surgical training, automated screenings, and intraoperative guidance. The dataset developed in this study is one of the first publicly available gonioscopy image banks (https://lin.hs.uci.edu/research), which may encourage future investigations in this topic.",2021-11-16 +34604676,Phosphoproteomics Provides Novel Insights into the Response of Primary Acute Lymphoblastic Leukemia Cells to Microtubule Depolymerization in G1 Phase of the Cell Cycle.,"Microtubule targeting agents (MTAs) have been used for the treatment of cancer for many decades and are among the most successful chemotherapeutic agents. However, their application and effectiveness are limited because of toxicity and resistance as well as a lack of knowledge of molecular mechanisms downstream of microtubule inhibition. Insights into key pathways that link microtubule disruption to cell death is critical for optimal use of these drugs, for defining biomarkers useful in patient stratification, and for informed design of drug combinations. Although MTAs characteristically induce death in mitosis, microtubule destabilizing agents such as vincristine also induce death directly in G1 phase in primary acute lymphoblastic leukemia (ALL) cells. Because many signaling pathways regulating cell survival and death involve changes in protein expression and phosphorylation, we undertook a comprehensive quantitative proteomic study of G1 phase ALL cells treated with vincristine. The results revealed distinct alterations associated with c-Jun N-terminal kinase signaling, anti-proliferative signaling, the DNA damage response, and cytoskeletal remodeling. Signals specifically associated with cell death were identified by pre-treatment with the CDK4/6 inhibitor palbociclib, which caused G1 arrest and precluded death induction. These results provide insights into signaling mechanisms regulating cellular responses to microtubule inhibition and provide a foundation for a better understanding of the clinical mechanisms of MTAs and for the design of novel drug combinations. The mass spectrometry proteomics data have been deposited to the PRIDE Archive (http://www.ebi.ac.uk/pride/archive/) via the PRIDE partner repository with the data set identifier PXD027190 and 10.6019/PXD027190.",2021-09-16 +34310203,The Subjective Experience of Word-Finding Difficulties in People With Aphasia: A Thematic Analysis of Interview Data.,"

Purpose

Anomia, or difficulty with naming and word finding, is a pervasive deficit among individuals with aphasia. There is an extensive literature on the mechanisms underlying anomia and on approaches to treatment, but very little is known about the subjective experience of anomia during day-to-day life.

Method

As part of a larger testing battery, 53 adults with poststroke aphasia took part in a novel, structured interview that included an open-ended question about the general experience of anomia: ""Do you ever know what you want to say, but you can't say it out loud? Please describe that feeling."" Video-recorded interview responses were transcribed and analyzed using thematic analysis, an iterative, data-driven process that categorizes interview data into common themes.

Results

Five main themes emerged among the data from 37 participants who produced adequate responses for use in thematic analysis: strategies to cope with or compensate for anomia, comments on awareness of the level of breakdown (e.g., ""I have an idea, but can't get the right words""), negative emotions, impact on relationships, and changes in frequency over time.

Conclusions

Participants showed strong awareness of anomia and its implications, demonstrating an ability to describe their language breakdown, identify relevant strategies to compensate and/or cope, and acknowledge the impact of anomia on their emotions and social interactions. This patient perspective may serve as a valuable supplement to information typically gained via objective language assessments. Clinicians and researchers may wish to consider incorporating similar subjective measures during assessment and treatment planning. Supplemental Material https://doi.org/10.23641/asha.15032643.",2021-07-26 +34582257,Blast Exposure and Self-Reported Hearing Difficulty in Service Members and Veterans Who Have Normal Pure-Tone Hearing Sensitivity: The Mediating Role of Posttraumatic Stress Disorder.,"Purpose Evidence suggests that military blast exposure may lead to self-reported hearing difficulties despite audiometrically normal hearing. Research identifying potential mechanisms of this association remains limited. The purpose of this article is to evaluate the associations between blast, posttraumatic stress disorder (PTSD), and self-reported hearing difficulty, and to examine PTSD as a possible mediator of the association between blast exposure and hearing difficulty. Method We used baseline data from the Noise Outcomes in Service members Epidemiology (NOISE) study (n = 477). Participants in this study undergo a comprehensive hearing, and tinnitus if applicable, evaluation and complete a large number of surveys. Pertinent data extracted from these surveys included information on participant's demographics, military service history, including exposure to blast, and health conditions such as symptoms of PTSD. Using regression models and following a formal causal mediation framework, we estimated total associations, natural direct and indirect associations, and percent mediated. Results We found that individuals with blast exposure had higher prevalence of both probable PTSD and self-reported hearing difficulty than individuals who were not blast exposed. Compared with participants without blast exposure, those with blast exposure had twice the prevalence of self-reported hearing difficulty, with 41% of the association mediated through probable PTSD. Conclusion As PTSD is a possible mediator of the association between blast exposure and hearing difficulty, Service members and Veterans with normal pure-tone hearing sensitivity who report hearing difficulties and a history of blast exposure may benefit from evaluation for PTSD symptoms. Supplemental Material https://doi.org/10.23641/asha.16674247.",2021-09-28 +29433427,MethCNA: a database for integrating genomic and epigenomic data in human cancer.,"

Background

The integration of DNA methylation and copy number alteration data promises to provide valuable insight into the underlying molecular mechanisms responsible for cancer initiation and progression. However, the generation and processing of these datasets are costly and time-consuming if carried out separately. The Illumina Infinium HumanMethylation450 BeadChip, initially designed for the evaluation of DNA methylation levels, allows copy number variant calling using bioinformatics tools.

Results

A substantial amount of Infinium HumanMethylation450 data across various cancer types has been accumulated in recent years and is a valuable resource for large-scale data analysis. Here we present MethCNA, a comprehensive database for genomic and epigenomic data integration in human cancer. In the current release, MethCNA contains about 10,000 tumor samples representing 37 cancer types. All raw array data were collected from The Cancer Genome Atlas and NCBI Gene Expression Omnibus database and analyzed using a pipeline that integrated multiple computational resources and tools. The normalized copy number aberration data and DNA methylation alterations were obtained. We provide a user-friendly web-interface for data mining and visualization.

Conclusions

The Illumina Infinium HumanMethylation450 BeadChip enables the interrogation and integration of both genomic and epigenomic data from exactly the same DNA specimen, and thus can aid in distinguishing driver from passenger mutations in cancer. We expect MethCNA will enable researchers to explore DNA methylation and copy number alteration patterns, identify key oncogenic drivers in cancer, and assist in the development of targeted therapies. MethCNA is publicly available online at http://cgma.scu.edu.cn/MethCNA .",2018-02-13 +34368571,CpACpP: In Silico Cell-Penetrating Anticancer Peptide Prediction Using a Novel Bioinformatics Framework.,"Cell-penetrating anticancer peptides (Cp-ACPs) are considered promising candidates in solid tumor and hematologic cancer therapies. Current approaches for the design and discovery of Cp-ACPs trust the expensive high-throughput screenings that often give rise to multiple obstacles, including instrumentation adaptation and experimental handling. The application of machine learning (ML) tools developed for peptide activity prediction is importantly of growing interest. In this study, we applied the random forest (RF)-, support vector machine (SVM)-, and eXtreme gradient boosting (XGBoost)-based algorithms to predict the active Cp-ACPs using an experimentally validated data set. The model, CpACpP, was developed on the basis of two independent cell-penetrating peptide (CPP) and anticancer peptide (ACP) subpredictors. Various compositional and physiochemical-based features were combined or selected using the multilayered recursive feature elimination (RFE) method for both data sets. Our results showed that the ACP subclassifiers obtain a mean performance accuracy (ACC) of 0.98 with an area under curve (AUC) ≈ 0.98 vis-à-vis the CPP predictors displaying relevant values of ∼0.94 and ∼0.95 via the hybrid-based features and independent data sets, respectively. Also, the predicting evaluation of Cp-ACPs gave accuracies of ∼0.79 and 0.89 on a series of independent sequences by applying our CPP and ACP classifiers, respectively, which leaves the performance of our predictors better than the earlier reported ACPred, mACPpred, MLCPP, and CPPred-RF. The described consensus-based fusion method additionally reached an AUC of 0.94 for the prediction of Cp-ACP (http://cbb1.ut.ac.ir/CpACpP/Index).",2021-07-25 +33431043,Visualization of very large high-dimensional data sets as minimum spanning trees.,"The chemical sciences are producing an unprecedented amount of large, high-dimensional data sets containing chemical structures and associated properties. However, there are currently no algorithms to visualize such data while preserving both global and local features with a sufficient level of detail to allow for human inspection and interpretation. Here, we propose a solution to this problem with a new data visualization method, TMAP, capable of representing data sets of up to millions of data points and arbitrary high dimensionality as a two-dimensional tree (http://tmap.gdb.tools). Visualizations based on TMAP are better suited than t-SNE or UMAP for the exploration and interpretation of large data sets due to their tree-like nature, increased local and global neighborhood and structure preservation, and the transparency of the methods the algorithm is based on. We apply TMAP to the most used chemistry data sets including databases of molecules such as ChEMBL, FDB17, the Natural Products Atlas, DSSTox, as well as to the MoleculeNet benchmark collection of data sets. We also show its broad applicability with further examples from biology, particle physics, and literature.",2020-02-12 +31794409,Cooperation Learning From Multiple Social Networks: Consistent and Complementary Perspectives.,"GWI survey1 has highlighted the flourishing use of multiple social networks: the average number of social media accounts per Internet user is 5.54, and among them, 2.82 are being used actively. Indeed, users tend to express their views in more than one social media site. Hence, merging social signals of the same user across different social networks together, if available, can facilitate the downstream analyses. Previous work has paid little attention on modeling the cooperation among the following factors when fusing data from multiple social networks: 1) as data from different sources characterizes the characteristics of the same social user, the source consistency merits our attention; 2) due to their different functional emphases, some aspects of the same user captured by different social networks can be just complementary and results in the source complementarity; and 3) different sources can contribute differently to the user characterization and hence lead to the different source confidence. Toward this end, we propose a novel unified model, which co-regularizes source consistency, complementarity, and confidence to boost the learning performance with multiple social networks. In addition, we derived its theoretical solution and verified the model with the real-world application of user interest inference. Extensive experiments over several state-of-the-art competitors have justified the superiority of our model.1http://tinyurl.com/zk6kgc9.",2021-09-15 +30624621,KASPspoon: an in vitro and in silico PCR analysis tool for high-throughput SNP genotyping.,"

Motivation

Fine mapping becomes a routine trial following quantitative trait loci (QTL) mapping studies to shrink the size of genomic segments underlying causal variants. The availability of whole genome sequences can facilitate the development of high marker density and predict gene content in genomic segments of interest. Correlations between genetic and physical positions of these loci require handling of different experimental genetic data types, and ultimately converting them into positioning markers using a routine and efficient tool.

Results

To convert classical QTL markers into KASP assay primers, KASPspoon simulates a PCR by running an approximate-match searching analysis on user-entered primer pairs against the provided sequences, and then comparing in vitro and in silico PCR results. KASPspoon reports amplimers close to or adjoining genes/SNPs/simple sequence repeats and those that are shared between in vitro and in silico PCR results to select the most appropriate amplimers for gene discovery. KASPspoon compares physical and genetic maps, and reports the primer set genome coverage for PCR-walking. KASPspoon could be used to design KASP assay primers to convert QTL acquired by classical molecular markers into high-throughput genotyping assays and to provide major SNP resource for the dissection of genotypic and phenotypic variation. In addition to human-readable output files, KASPspoon creates Circos configurations that illustrate different in silico and in vitro results.

Availability and implementation

Code available under GNU GPL at (http://www.ageri.sci.eg/index.php/facilities-services/ageri-softwares/kaspspoon).

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +30230259,Sequence-based searching of custom proteome and transcriptome databases.,"A long-term goal in renal physiology is to understand the mechanisms involved in collecting duct function and regulation at a cellular and molecular level. The first step in modeling of these mechanisms, which can provide a guide to experimentation, is the generation of a list of model components. We have curated a list of proteins expressed in the rat renal inner medullary collecting duct (IMCD) from proteomic data from 18 different publications. The database has been posted as a public resource at https://hpcwebapps.cit.nih.gov/ESBL/Database/IMCD_Proteome_Database/. It includes 8956 different proteins. To search the IMCD Proteomic Database efficiently, we have created a Java-based program called curated database Basic Local Alignment Search Tool (cdbBLAST), which uses the NCBI BLAST kernel to search for specific amino acid sequences corresponding to proteins in the database. cdbBLAST reports information on the matched protein and identifies proteins in the database that have similar sequences. We have also adapted cdbBLAST to interrogate our previously published IMCD Transcriptome Database. We have made the cdbBLAST program available for use either as a web application or a downloadable .jar file at https://hpcwebapps.cit.nih.gov/ESBL/Database/cdbBLAST/. Database searching based on protein sequence removes ambiguities arising from the standard search method based on official gene symbols and allows the user efficient identification of related proteins that may fulfill the same functional roles.",2018-09-01 +,Challenges of Caregivers of Patients With Alzheimer’s Disease,"Abstract The purpose of this presentation is to identify burden and problems experienced by caregivers of patients with Alzheimer’s Disease (AD). AD results in gradual deterioration of cognition, language, and memory that can impact an individual’s ability to independently perform daily functional activities (CDC, 2019). The role of caregivers is significant in providing assistance to the patients with chronic AD which can be a source of strain for caregiver population. About 16.3 million informal AD caregivers have spent 18.5 billion hours, which is equal to value of $234 billion, to assist patients with other dementia types and AD in 2018 (Alzheimer’s Association, 2019). In-depth literature synthesis was carried out using multiple databases. Recent and relevant articles were selected to be added in the review. Due to responsibility of constant vigilance of AD patients, the caregivers may overlook their self-care needs and detach themselves from social life. Literature analysis revealed common challenges and needs of AD care partners including limited social engagement, concerns of sexuality, and sleep problems. Understanding caregiver problems will help nurses and other health care professionals to support families by planning preventive measures. Resources can be invested to improve physical and mental well-being of caregivers. Researches can be planned to bridge the knowledge gap identified through literature review on this topic. References Alzheimer’s Association. (2019). Alzheimer’s disease caregivers. http://act.alz.org/site/DocServer/caregivers_fact_sheet.pdf?docID=3022 Centers for Disease Control and Prevention. (2019). Alzheimer’s disease and healthy aging. https://www.cdc.gov/aging/aginginfo/alzheimers.htm",2020-01-01 +29351734,EOGD: the Euplotes octocarinatus genome database.,"

Background

Euplotes, a ciliated protozoan, is a useful unicellular model organism. Studies on Euplotes have provided excellent insights into various basic biological principles. We have recently sequenced the macronuclear genome of the common freshwater species Euplotes octocarinatus to provide novel insights into Euplotes genetics and molecular biology.

Results

In this study, we present the E. octocarinatus Genome Database (EOGD), a functional annotation and analysis platform for the global study of the Euplotes genome. EOGD includes macronuclear genomic and transcriptomic data, predicted gene models, coding sequences, protein sequences, and functional annotations. The GBrowser and BLAST tools are embedded in EOGD to enable the search, visualization and analysis of E. octocarinatus genomic and transcriptomic data.

Conclusions

EOGD is a useful resource for the research community, particularly for researchers who conduct genome-scale analysis and molecular biology studies of Euplotes or other ciliates. EOGD will be continuously updated to integrate more datasets and analytical tools. EOGD is freely available at http://ciliates.ihb.ac.cn/database/home/#eo .",2018-01-19 +29036542,Database Resources of the BIG Data Center in 2018.,"The BIG Data Center at Beijing Institute of Genomics (BIG) of the Chinese Academy of Sciences provides freely open access to a suite of database resources in support of worldwide research activities in both academia and industry. With the vast amounts of omics data generated at ever-greater scales and rates, the BIG Data Center is continually expanding, updating and enriching its core database resources through big-data integration and value-added curation, including BioCode (a repository archiving bioinformatics tool codes), BioProject (a biological project library), BioSample (a biological sample library), Genome Sequence Archive (GSA, a data repository for archiving raw sequence reads), Genome Warehouse (GWH, a centralized resource housing genome-scale data), Genome Variation Map (GVM, a public repository of genome variations), Gene Expression Nebulas (GEN, a database of gene expression profiles based on RNA-Seq data), Methylation Bank (MethBank, an integrated databank of DNA methylomes), and Science Wikis (a series of biological knowledge wikis for community annotations). In addition, three featured web services are provided, viz., BIG Search (search as a service; a scalable inter-domain text search engine), BIG SSO (single sign-on as a service; a user access control system to gain access to multiple independent systems with a single ID and password) and Gsub (submission as a service; a unified submission service for all relevant resources). All of these resources are publicly accessible through the home page of the BIG Data Center at http://bigd.big.ac.cn.",2018-01-01 +,The ALFAM2 database on ammonia emission from field-applied manure: Description and illustrative analysis,"Ammonia (NH3) emission from animal manure contributes to air pollution and ecosystem degradation, and the loss of reactive nitrogen (N) from agricultural systems. Estimates of NH3 emission are necessary for national inventories and nutrient management, and NH3 emission from field-applied manure has been measured in many studies over the past few decades. In this work, we facilitate the use of these data by collecting and organizing them in the ALFAM2 database. In this paper we describe the development of the database and summarise its contents, quantify effects of application methods and other variables on emission using a data subset, and discuss challenges for data analysis and model development. The database contains measurements of emission, manure and soil properties, weather, application technique, and other variables for 1895 plots from 22 research institutes in 12 countries. Data on five manure types (cattle, pig, mink, poultry, mixed, as well as sludge and “other”) applied to three types of crops (grass, small grains, maize, as well as stubble and bare soil) are included. Application methods represented in the database include broadcast, trailing hose, trailing shoe (narrow band application), and open slot injection. Cattle manure application to grassland was the most common combination, and analysis of this subset (with dry matter (DM) limited to <15%) was carried out using mixed- and fixed-effects models in order to quantify effects of management and environment on ammonia emission, and to highlight challenges for use of the database. Measured emission in this subset ranged from <1% to 130% of applied ammonia after 48 h. Results showed clear, albeit variable, reductions in NH3 emission due to trailing hose, trailing shoe, and open slot injection of slurry compared to broadcast application. There was evidence of positive effects of air temperature and wind speed on NH3 emission, and limited evidence of effects of slurry DM. However, random-effects coefficients for differences among research institutes were among the largest model coefficients, and showed a deviation from the mean response by more than 100% in some cases. The source of these institute differences could not be determined with certainty, but there is some evidence that they are related to differences in soils, or differences in application or measurement methods. The ALFAM2 database should be useful for development and evaluation of both emission factors and emission models, but users need to recognize the limitations caused by confounding variables, imbalance in the dataset, and dependence among observations from the same institute. Variation among measurements and in reported variables highlights the importance of international agreement on how NH3 emission should be measured, along with necessary types of supporting data and standard protocols for their measurement. Both are needed in order to produce more accurate and useful ammonia emission measurements. Expansion of the ALFAM2 database will continue, and readers are invited to contact the corresponding author for information on data submission. The latest version of the database is available at http://www.alfam.dk.",2018-08-01 +34529321,The evolution of the antimicrobial peptide database over 18 years: Milestones and new features.,"The antimicrobial peptide database (APD) has served the antimicrobial peptide field for 18 years. Because it is widely used in research and education, this article documents database milestones and key events that have transformed it into the current form. A comparison is made for the APD peptide statistics between 2010 and 2020, validating the major database findings to date. We also describe new additions ranging from peptide entries to search functions. Of note, the APD also contains antimicrobial peptides from host microbiota, which are important in shaping immune systems and could be linked to a variety of human diseases. Finally, the database has been re-programmed to the web branding and latest security compliance of the University of Nebraska Medical Center. The reprogrammed APD can be accessed at https://aps.unmc.edu.",2021-09-24 +34559210,QSDB-a graphical Quorum Sensing Database. ,"The human microbiome is largely shaped by the chemical interactions of its microbial members, which includes cross-talk via shared signals or quenching of the signalling of other species. Quorum sensing is a process that allows microbes to coordinate their behaviour in dependence of their population density and to adjust gene expression accordingly. We present the Quorum Sensing Database (QSDB), a comprehensive database of all published sensing and quenching relations between organisms and signalling molecules of the human microbiome, as well as an interactive web interface that allows browsing the database, provides graphical depictions of sensing mechanisms as Systems Biology Graphical Notation diagrams and links to other databases. Database URL: QSDB (Quorum Sensing DataBase) is freely available via an interactive web interface and as a downloadable csv file at http://qsdb.org.",2021-09-24 +34726633,Layer groups: Brillouin-zone and crystallographic databases on the Bilbao Crystallographic Server.,"The section of the Bilbao Crystallographic Server (https://www.cryst.ehu.es/) dedicated to subperiodic groups contains crystallographic and Brillouin-zone databases for the layer groups. The crystallographic databases include the generators/general positions (GENPOS), Wyckoff positions (WYCKPOS) and maximal subgroups (MAXSUB). The Brillouin-zone database (LKVEC) offers k-vector tables and Brillouin-zone figures of all 80 layer groups which form the background of the classification of their irreducible representations. The symmetry properties of the wavevectors are described applying the so-called reciprocal-space-group approach and this classification scheme is compared with that of Litvin & Wike [(1991), Character Tables and Compatibility Relations of the Eighty Layer Groups and Seventeen Plane Groups. New York: Plenum Press]. The specification of independent parameter ranges of k vectors in the representation domains of the Brillouin zones provides a solution to the problems of uniqueness and completeness of layer-group representations. The Brillouin-zone figures and k-vector tables are described in detail and illustrated by several examples.",2021-09-24 +34132627,Developing a Scale Measuring Patient Expectations and Service Quality of Hospitals in India during COVID-19.,"This paper develops a scale that measures the perceived service quality of hospitals during a pandemic. To develop the scale, data from 206 respondents from India, was subjected to exploratory and confirmatory factor analysis. The newly developed scale was named PAND-SERVQUAL, which includes factors namely, assistance, facility & layout, trust, empathy, promptness, and knowledge. The resulting scale is likely to be useful for researchers exploring service quality research and health care quality as well. Findings will facilitate understanding patient's expectations regarding the service quality of hospitals during a pandemic.Supplemental data for this article is available online at https://doi.org/10.1080/00185868.2021.1939827 .",2021-06-16 +29126216,"PAGER 2.0: an update to the pathway, annotated-list and gene-signature electronic repository for Human Network Biology.","Integrative Gene-set, Network and Pathway Analysis (GNPA) is a powerful data analysis approach developed to help interpret high-throughput omics data. In PAGER 1.0, we demonstrated that researchers can gain unbiased and reproducible biological insights with the introduction of PAGs (Pathways, Annotated-lists and Gene-signatures) as the basic data representation elements. In PAGER 2.0, we improve the utility of integrative GNPA by significantly expanding the coverage of PAGs and PAG-to-PAG relationships in the database, defining a new metric to quantify PAG data qualities, and developing new software features to simplify online integrative GNPA. Specifically, we included 84 282 PAGs spanning 24 different data sources that cover human diseases, published gene-expression signatures, drug-gene, miRNA-gene interactions, pathways and tissue-specific gene expressions. We introduced a new normalized Cohesion Coefficient (nCoCo) score to assess the biological relevance of genes inside a PAG, and RP-score to rank genes and assign gene-specific weights inside a PAG. The companion web interface contains numerous features to help users query and navigate the database content. The database content can be freely downloaded and is compatible with third-party Gene Set Enrichment Analysis tools. We expect PAGER 2.0 to become a major resource in integrative GNPA. PAGER 2.0 is available at http://discovery.informatics.uab.edu/PAGER/.",2018-01-01 +30916462,MolTarPred: A web tool for comprehensive target prediction with reliability estimation.,"Molecular target prediction can provide a starting point to understand the efficacy and side effects of phenotypic screening hits. Unfortunately, the vast majority of in silico target prediction methods are not available as web tools. Furthermore, these are limited in the number of targets that can be predicted, do not estimate which target predictions are more reliable and/or lack comprehensive retrospective validations. We present MolTarPred ( http://moltarpred.marseille.inserm.fr/), a user-friendly web tool for predicting protein targets of small organic compounds. It is powered by a large knowledge base comprising 607,659 compounds and 4,553 macromolecular targets collected from the ChEMBL database. In about 1 min, the predicted targets for the supplied molecule will be listed in a table. The chemical structures of the query molecule and the most similar compounds annotated with the predicted target will also be shown to permit visual inspection and comparison. Practical examples of the use of MolTarPred are showcased. MolTarPred is a new resource for scientists that require a more complete knowledge of the polypharmacology of a molecule. The introduction of a reliability score constitutes an attractive functionality of MolTarPred, as it permits focusing experimental confirmatory tests on the most reliable predictions, which leads to higher prospective hit rates.",2019-04-22 +34755188,The impact of maternal age on gene expression during the GV to MII transition in euploid human oocytes.,"

Study question

Are there age-related differences in gene expression during the germinal vesicle (GV) to metaphase II (MII) stage transition in euploid human oocytes?

Summary answer

A decrease in mitochondrial-related transcripts from GV to MII oocytes was observed, with a much greater reduction in MII oocytes with advanced age.

What is known already

Early embryonic development is dependent on maternal transcripts accumulated and stored within the oocyte during oogenesis. Transcriptional activity of the oocyte, which dictates its ultimate developmental potential, may be influenced by age and explain the reduced competence of advanced maternal age (AMA) oocytes compared with the young maternal age (YMA). Gene expression has been studied in human and animal oocytes; however, RNA sequencing could provide further insights into the transcriptome profiling of GV and in vivo matured MII euploid oocytes of YMA and AMA patients.

Study design, size, duration

Fifteen women treated for infertility in a single IVF unit agreed to participate in this study. Five GV and 5 MII oocytes from 6, 21-26 years old women (YMA cohort) and 5 GV and 6 MII oocytes from 6, 41-44 years old women (AMA cohort) undergoing IVF treatment were donated. The samples were collected within a time frame of 4 months. RNA was isolated and deep sequenced at the single-cell level. All donors provided either GV or MII oocytes.

Participants/materials, setting, methods

Cumulus dissection from donated oocytes was performed 38 h after hCG injection, denuded oocytes were inserted into lysis buffer supplemented with RNase inhibitor. The samples were stored at -80°C until further use. Isolated RNA from GV and MII oocytes underwent library preparation using an oligo deoxy-thymidine (dT) priming approach (SMART-Seq v4 Ultra Low Input RNA assay; Takara Bio, Japan) and Nextera XT DNA library preparation assay (Illumina, USA) followed by deep sequencing. Data processing, quality assessment and bioinformatics analysis were performed using source-software, mainly including FastQC, HISAT2, StringTie and edgeR, along with functional annotation analysis, while scploid R package was employed to determine the ploidy status.

Main results and the role of chance

Following deep sequencing of single GV and MII oocytes in both YMA and AMA cohorts, several hundred transcripts were found to be expressed at significantly different levels. When YMA and AMA MII oocyte transcriptomes were compared, the most significant of these were related to mitochondrial structure and function, including biological processes, mitochondrial respiratory chain complex I assembly and mitochondrial translational termination (false discovery rate (FDR) 6.0E-10 to 1.2E-7). These results indicate a higher energy potential of the YMA MII cohort that is reduced with ageing. Other biological processes that were significantly higher in the YMA MII cohort included transcripts involved in the translation process (FDR 1.9E-2). Lack of these transcripts could lead to inappropriate protein synthesis prior to or upon fertilisation of the AMA MII oocytes.

Large scale data

The RNA sequencing data were deposited in the Gene Expression Omnibus (https://www.ncbi.nlm.nih.gov/geo), under the accession number: GSE164371.

Limitations, reasons for caution

The relatively small sample size could be a reason for caution. However, the RNA sequencing results showed homogeneous clustering with low intra-group variation and five to six biological replicates derived from at least three different women per group minimised the potential impact of the sample size.

Wider implications of the findings

Understanding the effects of ageing on the oocyte transcriptome could highlight the mechanisms involved in GV to MII transition and identify biomarkers that characterise good MII oocyte quality. This knowledge has the potential to guide IVF regimes for AMA patients.

Study funding/competing interest(s)

This work was supported by the Medical Research Council (MRC Grant number MR/K020501/1).",2021-12-01 +31147699,ORVAL: a novel platform for the prediction and exploration of disease-causing oligogenic variant combinations.,"A tremendous amount of DNA sequencing data is being produced around the world with the ambition to capture in more detail the mechanisms underlying human diseases. While numerous bioinformatics tools exist that allow the discovery of causal variants in Mendelian diseases, little to no support is provided to do the same for variant combinations, an essential task for the discovery of the causes of oligogenic diseases. ORVAL (the Oligogenic Resource for Variant AnaLysis), which is presented here, provides an answer to this problem by focusing on generating networks of candidate pathogenic variant combinations in gene pairs, as opposed to isolated variants in unique genes. This online platform integrates innovative machine learning methods for combinatorial variant pathogenicity prediction with visualization techniques, offering several interactive and exploratory tools, such as pathogenic gene and protein interaction networks, a ranking of pathogenic gene pairs, as well as visual mappings of the cellular location and pathway information. ORVAL is the first web-based exploration platform dedicated to identifying networks of candidate pathogenic variant combinations with the sole ambition to help in uncovering oligogenic causes for patients that cannot rely on the classical disease analysis tools. ORVAL is available at https://orval.ibsquare.be.",2019-07-01 +33136286,An assembly of galanin-galanin receptor signaling network.,"The galanin receptor family of proteins is present throughout the central nervous system and endocrine system. It comprises of three subtypes-GalR1, GalR2, and GalR3; all of which are G-protein-coupled receptors. Galanin predominantly acts as an inhibitory, hyper-polarizing neuromodulator, which has several physiological as well as pathological functions. Galanin has a role in mediating food intake, memory, sexual behavior, nociception and is also associated with diseases such as Alzheimer's disease, epilepsy, diabetes mellitus, and chronic pain. However, the understanding of signaling mechanisms of the galanin family of neuropeptides is limited and an organized pathway map is not yet available. Therefore, a detailed literature mining of the publicly available articles pertaining to the galanin receptor was followed by manual curation of the reactions and their integration into a map. This resulted in the cataloging of molecular reactions involving 64 molecules into five categories such as molecular association, activation/inhibition, catalysis, transport, and gene regulation. For enabling easy access of biomedical researchers, the galanin-galanin receptor signaling pathway data was uploaded to WikiPathways ( https://www.wikipathways.org/index.php/Pathway:WP4970 ), a freely available database of biological pathways.",2020-11-02 +33460477,Proteomic profile of pre-implantational ovine embryos produced in vivo.,"The present study was conducted to decipher the proteome of in vivo-produced pre-implantation ovine embryos. Ten locally adapted Morana Nova ewes received hormonal treatment and were inseminated 12 hr after ovulation. Six days later, 54 embryos (morula and blastocyst developmental state) were recovered from eight ewes and pooled to obtain sufficient protein for proteomic analysis. Extracted embryo proteins were analysed by LC-MS/MS, followed by identification based on four database searches (PEAKS, Proteome Discoverer software, SearchGUI software, PepExplorer). Identified proteins were analysed for gene ontology terms, protein clusters and interactions. Genes associated with the ovine embryo proteome were screened for miRNA targets using data sets of TargetScan (http://www.targetscan.org) and mIRBase (http://www.mirbase.org) servers. There were 667 proteins identified in the ovine embryos. Biological processes of such proteins were mainly related to cellular process and regulation, and molecular functions, to binding and catalytic activity. Analysis of the embryo proteins revealed 49 enriched functional clusters, linked to energy metabolism (TCA cycle, pyruvate and glycolysis metabolism), zona pellucida (ZP), MAPK signalling pathway, tight junction, binding of sperm to ZP, translation, proteasome, cell cycle and calcium/phospholipid binding. Sixteen miRNAs were related to 25 pre-implantation ovine embryo genes, all conserved in human, bovine and ovine species. The interaction network generated by miRNet showed four key miRNAs (hsa-mir-106b-5p; hsa-mir-30-5p; hsa-mir-103a-5p and hsa-mir-106a-5p) with potential interactions with embryo-expressed genes. Functional analysis of the network indicated that miRNAs modulate genes related to cell cycle, regulation of stem cell and embryonic cell differentiation, among others. Retrieved miRNAs also modulate the expression of genes involved in cell signalling pathways, such as MAPK, Wnt, TGF-beta, p53 and Toll-like receptor. The current study describes the first major proteomic profile of 6-day-old ovine embryos produced in vivo, setting a comprehensive foundation for our understanding of embryo physiology in the ovine species.",2021-02-02 +34346299,"The effect of antenatal education on expectant mother's childbirth attitudes, maternal role attainment, and self-confidence levels.","We aim to determine the effect of antenatal education on the attitudes of expectant mothers toward birth, maternal role attainment and self-confidence levels. We carried out this quasi-experimental, non-randomized, prospective study in a hospital located in Istanbul, in the pre- and post-education model. Women in the education group (EG = 60) attended 6 weeks of education. Women in the control group (CG = 60) participated in a periodic follow-up visit. We collected the data using Childbirth Attitudes Questionnaire (CAQ), Pharis Self-Confidence Scale (PSCS), and Semantic Differential Scale-Myself as Mother (MMS). We made three measures in total: in the first visit, after six weeks and in the sixth week postpartum. We found the mean scores of second measurement of CAQ, PSCS, third measurement of MMS statistically significant in favor of EG (p < 0.05). Antenatal educations positively affect childbirth attitude, maternal role attainment and self-confidence levels.Supplemental data for this article is available online at https://doi.org/10.1080/07399332.2021.1935959 .",2021-08-04 +34630517,NetGenes: A Database of Essential Genes Predicted Using Features From Interaction Networks.,"Essential gene prediction models built so far are heavily reliant on sequence-based features, and the scope of network-based features has been narrow. Previous work from our group demonstrated the importance of using network-based features for predicting essential genes with high accuracy. Here, we apply our approach for the prediction of essential genes to organisms from the STRING database and host the results in a standalone website. Our database, NetGenes, contains essential gene predictions for 2,700+ bacteria predicted using features derived from STRING protein-protein functional association networks. Housing a total of over 2.1 million genes, NetGenes offers various features like essentiality scores, annotations, and feature vectors for each gene. NetGenes database is available from https://rbc-dsai-iitm.github.io/NetGenes/.",2021-09-23 +34554397,"Analysis of the HEXA, HEXB, ARSA, and SMPD1 Genes in 68 Iranian Patients.","Lysosomal storage diseases (LSDs) are known as genetic disorders with an overall prevalence of 1 per 7700 live births. Sphingolipidosis, which is a subgroup of LSDs, is resulted from mutations in the coding genes of specific enzymes of sphingolipid hydrolases. The current study aimed to provide additional knowledge on the genotype of sphingolipidoses disease among Iranian patients affected by the disease. In this research, we studied 68 unrelated Iranian patients diagnosed with one kind of sphingolipidoses from 2014 to 2019. Thereafter, genomic DNA was isolated from their peripheral blood leukocytes samples in EDTA in terms of the manufacturer's protocol. All the coding exons and exon-intron boundaries of the related genes were sequenced and then analyzed using the NCBI database. Finally, they were reviewed using some databases such as the Human Gene Mutation Database (HGMD) and ClinVar ( https://www.ncbi.nlm.nih.gov/clinva ). By studying 22 MLD patients, 18 different variations of the ARSA gene were found, one of which was new including, named as c.472 T > G p. (Cys158Gly). Out of 15 Sandhoff disease (SD) patients, 11 different variations of the HEXB gene were found. Correspondingly, the c.1083-2delA was not reported earlier. By investigating 21 Iranian patients with Tay-Sachs disease (TSD), one new variant was found as c.622delG. The study of 10 Niemann-Pick disease A/B (NPDA/B (patients has led to the identification of 9 different SMPD1 gene variations, among which 3 variations were novel mutations. The results of the present study can be expanded to the genotypic spectrum of Iranian patients with MLD, SD, TSD, and NPD diseases and also used to innovate more effective methods for the detection of genetic carriers as well as diagnosing and counseling of Iranian patients affected with these disorders.",2021-09-23 +34631816,Reference Values of Right Ventricular Volumes and Ejection Fraction by Three-Dimensional Echocardiography in Adults: A Systematic Review and Meta-Analysis.,"Objective: This study was conducted in order to determine the reference values for right ventricular (RV) volumes and ejection fraction (EF) using three-dimensional echocardiography (3DE) and to identify sources of variance through a systematic review and meta-analysis. Methods: This systematic review was preregistered with the International Prospective Register of Systematic Reviews (https://www.crd.york.ac.uk/PROSPERO/) (CRD42020211002). Relevant studies were identified by searches of the PubMed, Embase, and Cochrane Library databases through October 12, 2020. Pooled reference values were calculated using the random-effects model weighted by inverse variance. Meta-regression analysis and Egger's test were used to determine the source of heterogeneity. A subgroup analysis was performed to evaluate the reference values across different conditions. Results: The search identified 25 studies of 2,165 subjects. The mean reference values were as follows: RV end-diastolic volume, 100.71 ml [95% confidence interval (CI), 90.92-110.51 ml); RV end-systolic volume, 44.19 ml (95% CI, 39.05-49.33 ml); RV end-diastolic volume indexed, 57.01 ml/m2 (95% CI, 51.93-62.08 ml/m2); RV end-systolic volume indexed, 25.41 ml/m2 (95% CI, 22.58-28.24 ml/m2); and RVEF, 56.20% (95% CI, 54.59-57.82%). The sex- and age-specific reference values were assessed according to the studies reporting the values of different sexes and age distributions, respectively. In addition, the vendor- and software-specific reference values were analyzed. The meta-regression analysis revealed that sex, frame rate, pulmonary artery systolic pressure, and software packages were associated with variations in RV volumes (P < 0.05). Inter-vendor and inter-software discrepancies may explain the variability of RVEF. Conclusions: The reference values for RV volumes and RVEF using 3DE were assessed. The confounders that impacted the variability in RV volumes or RVEF contained the sex, frame rate, pulmonary artery systolic pressure, inter-vendor discrepancies, and inter-software discrepancies.",2021-09-23 +34554191,The Systems Biology Simulation Core Library. ,"Studying biological systems generally relies on computational modelling and simulation, e.g., model-driven discovery and hypothesis testing. Progress in standardisation efforts led to the development of interrelated file formats to exchange and reuse models in systems biology, such as SBML, the Simulation Experiment Description Markup Language (SED-ML), or the Open Modeling EXchange format (OMEX). Conducting simulation experiments based on these formats requires efficient and reusable implementations to make them accessible to the broader scientific community and to ensure the reproducibility of the results. The Systems Biology Simulation Core Library (SBSCL) provides interpreters and solvers for these standards as a versatile open-source API in JavaTM. The library simulates even complex bio-models and supports deterministic Ordinary Differential Equations (ODEs); Stochastic Differential Equations (SDEs); constraint-based analyses; recent SBML and SED-ML versions; exchange of results, and visualisation of in silico experiments; open modelling exchange formats (COMBINE archives); hierarchically structured models; and compatibility with standard testing systems, including the Systems Biology Test Suite and published models from the BioModels and BiGG databases. SBSCL is freely available at https://draeger-lab.github.io/SBSCL/ and via Maven Central. The material available at Bioinformatics online provides details on resources and availability, implementation, support of the SBML Test Suite, BioModels, and BiGG simulations with benchmark comparisons, and comparison to other simulators with SBML support.",2021-09-23 +32467670,OSskcm: an online survival analysis webserver for skin cutaneous melanoma based on 1085 transcriptomic profiles.,"

Background

Cutaneous melanoma is one of the most aggressive and lethal skin cancers. It is greatly important to identify prognostic biomarkers to guide the clinical management. However, it is technically challenging for untrained researchers to process high dimensional profiling data and identify potential prognostic genes in profiling datasets.

Methods

In this study, we developed a webserver to analyze the prognostic values of genes in cutaneous melanoma using data from TCGA and GEO databases. The webserver is named Online consensus Survival webserver for Skin Cutaneous Melanoma (OSskcm) which includes 1085 clinical melanoma samples. The OSskcm is hosted in a windows tomcat server. Server-side scripts were developed in Java script. The database system is managed by a SQL Server, which integrates gene expression data and clinical data. The Kaplan-Meier (KM) survival curves, Hazard ratio (HR) and 95% confidence interval (95%CI) were calculated in a univariate Cox regression analysis.

Results

In OSskcm, by inputting official gene symbol and selecting proper options, users could obtain KM survival plot with log-rank P value and HR on the output web page. In addition, clinical characters including race, stage, gender, age and type of therapy could also be included in the prognosis analysis as confounding factors to constrain the analysis in a subgroup of melanoma patients.

Conclusion

The OSskcm is highly valuable for biologists and clinicians to perform the assessment and validation of new or interested prognostic biomarkers for melanoma. OSskcm can be accessed online at: http://bioinfo.henu.edu.cn/Melanoma/MelanomaList.jsp.",2020-05-19 +34586975,Reasons for delay in seeking healthcare among women with acute coronary syndrome from rural and urban areas in Jordan.,We aimed to explore reasons for delay in seeking healthcare among women with acute coronary syndrome (ACS) for the first time from urban and rural areas in Jordan. A qualitative descriptive design was used through face-to-face interviews with 33 women. Themes that explained why women delayed seeking healthcare when experiencing ACS were: Knowledge deficit about coronary artery disease; the effect of disparity in healthcare services on women decision; and life priorities of women during the ACS attack. Educational needs should be addressed based on variations in both areas.Supplemental data for this article is available online at https://doi.org/10.1080/07399332.2021.1955889 .,2021-09-29 +28891124,Dockground: A comprehensive data resource for modeling of protein complexes.,"Characterization of life processes at the molecular level requires structural details of protein interactions. The number of experimentally determined structures of protein-protein complexes accounts only for a fraction of known protein interactions. This gap in structural description of the interactome has to be bridged by modeling. An essential part of the development of structural modeling/docking techniques for protein interactions is databases of protein-protein complexes. They are necessary for studying protein interfaces, providing a knowledge base for docking algorithms, and developing intermolecular potentials, search procedures, and scoring functions. Development of protein-protein docking techniques requires thorough benchmarking of different parts of the docking protocols on carefully curated sets of protein-protein complexes. We present a comprehensive description of the Dockground resource (http://dockground.compbio.ku.edu) for structural modeling of protein interactions, including previously unpublished unbound docking benchmark set 4, and the X-ray docking decoy set 2. The resource offers a variety of interconnected datasets of protein-protein complexes and other data for the development and testing of different aspects of protein docking methodologies. Based on protein-protein complexes extracted from the PDB biounit files, Dockground offers sets of X-ray unbound, simulated unbound, model, and docking decoy structures. All datasets are freely available for download, as a whole or selecting specific structures, through a user-friendly interface on one integrated website.",2017-10-10 +34772750,Development and validation of multivariable prediction models for adverse COVID-19 outcomes in patients with IBD.,"

Objectives

Develop an individualised prognostic risk prediction tool for predicting the probability of adverse COVID-19 outcomes in patients with inflammatory bowel disease (IBD).

Design and setting

This study developed and validated prognostic penalised logistic regression models using reports to the international Surveillance Epidemiology of Coronavirus Under Research Exclusion for Inflammatory Bowel Disease voluntary registry from March to October 2020. Model development was done using a training data set (85% of cases reported 13 March-15 September 2020), and model validation was conducted using a test data set (the remaining 15% of cases plus all cases reported 16 September-20 October 2020).

Participants

We included 2709 cases from 59 countries (mean age 41.2 years (SD 18), 50.2% male). All submitted cases after removing duplicates were included.

Primary and secondary outcome measures

COVID-19 related: (1) Hospitalisation+: composite outcome of hospitalisation, ICU admission, mechanical ventilation or death; (2) Intensive Care Unit+ (ICU+): composite outcome of ICU admission, mechanical ventilation or death; (3) Death. We assessed the resulting models' discrimination using the area under the curve of the receiver operator characteristic curves and reported the corresponding 95% CIs.

Results

Of the submitted cases, a total of 633 (24%) were hospitalised, 137 (5%) were admitted to the ICU or intubated and 69 (3%) died. 2009 patients comprised the training set and 700 the test set. The models demonstrated excellent discrimination, with a test set area under the curve (95% CI) of 0.79 (0.75 to 0.83) for Hospitalisation+, 0.88 (0.82 to 0.95) for ICU+ and 0.94 (0.89 to 0.99) for Death. Age, comorbidities, corticosteroid use and male gender were associated with a higher risk of death, while the use of biological therapies was associated with a lower risk.

Conclusions

Prognostic models can effectively predict who is at higher risk for COVID-19-related adverse outcomes in a population of patients with IBD. A free online risk calculator (https://covidibd.org/covid-19-risk-calculator/) is available for healthcare providers to facilitate discussion of risks due to COVID-19 with patients with IBD.",2021-11-12 +26888663,A web-oriented software for the optimization of pooled experiments in NGS for detection of rare mutations.,"

Background

The cost per patient of next generation sequencing for detection of rare mutations may be significantly reduced using pooled experiments. Recently, some techniques have been proposed for the planning of pooled experiments and for the optimal allocation of patients into pools. However, the lack of a user friendly resource for planning the design of pooled experiments forces the scientists to do frequent, complex and long computations.

Results

OPENDoRM is a powerful collection of novel mathematical algorithms usable via an intuitive graphical user interface. It enables researchers to speed up the planning of their routine experiments, as well as, to support scientists without specific bioinformatics expertises. Users can automatically carry out analysis in terms of costs associated with the optimal allocation of patients in pools. They are also able to choose between three distinct pooling mathematical methods, each of which also suggests the optimal configuration for the submitted experiment. Importantly, in order to keep track of the performed experiments, users can save and export the results of their experiments in standard tabular and charts contents.

Conclusion

OPENDoRM is a freely available web-oriented application for the planning of pooled NGS experiments, available at: http://www-labgtp.na.icar.cnr.it/OPENDoRM. Its easy and intuitive graphical user interface enables researchers to plan theirs experiments using novel algorithms, and to interactively visualize the results.",2016-02-17 +32813752,SVAD: A genetic database curates non-ischemic sudden cardiac death-associated variants.,"Sudden cardiac death (SCD) is an important cause of mortality worldwide. It accounts for approximately half of all deaths from cardiovascular disease. While coronary artery disease and acute myocardial infarction account for the majority of SCD in the elderly population, inherited cardiac diseases (inherited CDs) comprise a substantial proportion of younger SCD victims with a significant genetic component. Currently, the use of next-generation sequencing enables the rapid analysis to investigate relationships between genetic variants and inherited CDs causing SCD. Genetic contribution to risk has been considered an alternate predictor of SCD. In the past years, large numbers of SCD susceptibility variants were reported, but these results are scattered in numerous publications. Here, we present the SCD-associated Variants Annotation Database (SVAD) to facilitate the interpretation of variants and to meet the needs of data integration. SVAD contains data from a broad screening of scientific literature. It was constructed to provide a comprehensive collection of genetic variants along with integrated information regarding their effects. At present, SVAD has accumulated 2,292 entries within 1,239 variants by manually surveying pertinent literature, and approximately one-third of the collected variants are pathogenic/likely-pathogenic following the ACMG guidelines. To the best of our knowledge, SVAD is the most comprehensive database that can provide integrated information on the associated variants in various types of inherited CDs. SVAD represents a valuable source of variant information based on scientific literature and benefits clinicians and researchers, and it is now available on http://svad.mbc.nctu.edu.tw/.",2020-08-19 +34605674,Long-Term Exposure to Transportation Noise and Risk of Incident Stroke: A Pooled Study of Nine Scandinavian Cohorts.,"

Background

Transportation noise is increasingly acknowledged as a cardiovascular risk factor, but the evidence base for an association with stroke is sparse.

Objective

We aimed to investigate the association between transportation noise and stroke incidence in a large Scandinavian population.

Methods

We harmonized and pooled data from nine Scandinavian cohorts (seven Swedish, two Danish), totaling 135,951 participants. We identified residential address history and estimated road, railway, and aircraft noise for all addresses. Information on stroke incidence was acquired through linkage to national patient and mortality registries. We analyzed data using Cox proportional hazards models, including socioeconomic and lifestyle confounders, and air pollution.

Results

During follow-up (median=19.5y), 11,056 stroke cases were identified. Road traffic noise (Lden) was associated with risk of stroke, with a hazard ratio (HR) of 1.06 [95% confidence interval (CI): 1.03, 1.08] per 10-dB higher 5-y mean time-weighted exposure in analyses adjusted for individual- and area-level socioeconomic covariates. The association was approximately linear and persisted after adjustment for air pollution [particulate matter (PM) with an aerodynamic diameter of ≤2.5μm (PM2.5) and NO2]. Stroke was associated with moderate levels of 5-y aircraft noise exposure (40-50 vs. ≤40 dB) (HR=1.12; 95% CI: 0.99, 1.27), but not with higher exposure (≥50 dB, HR=0.94; 95% CI: 0.79, 1.11). Railway noise was not associated with stroke.

Discussion

In this pooled study, road traffic noise was associated with a higher risk of stroke. This finding supports road traffic noise as an important cardiovascular risk factor that should be included when estimating the burden of disease due to traffic noise. https://doi.org/10.1289/EHP8949.",2021-10-04 +34888617,Structural feature-driven pattern analysis for multitarget modulator landscapes. ,"Multitargeting features of small-molecules have been of increasing interest in recent years. Polypharmacological drugs that address several therapeutic targets may provide greater therapeutic benefits for patients. Furthermore, multitarget compounds can be used to address proteins of the same (or similar) protein families for their exploration as potential pharmacological targets. In addition, the knowledge of multitargeting features is of major importance in the drug selection process; particularly in ultra-large virtual screening procedures to gain high-quality compound collections. However, large-scale multitarget modulator landscapes are almost non-existent. We implemented a specific feature-driven computer-aided pattern analysis (C@PA) to extract molecular-structural features of inhibitors of the model protein family of ATP-binding cassette (ABC) transporters. New molecular-structural features have been identified that successfully expanded the known multitarget modulator landscape of pan-ABC transporter inhibitors. The prediction capability was biologically confirmed by the successful discovery of pan-ABC transporter inhibitors with a distinct inhibitory activity profile. The multitarget dataset is available under the http://www.panabc.info URL and its use is free of charge. Supplementary data is available at Bioinformatics online.",2021-12-09 +32909908,Deep Learning and Multivariable Models Select EVAR Patients for Short-Stay Discharge.,"

Objectives

We sought to develop a prediction score with data from the Vascular Quality Initiative (VQI) EVAR in efforts to assist endovascular specialists in deciding whether or not a patient is appropriate for short-stay discharge.

Background

Small series describe short-stay discharge following elective EVAR. Our study aims to quantify characteristics associated with this decision.

Methods

The VQI EVAR and NSQIP datasets were queried. Patients who underwent elective EVAR recorded in VQI, between 1/2010-5/2017 were split 2:1 into test and analytic cohorts via random number assignment. Cross-reference with the Medicare claims database confirmed all-cause mortality data. Bootstrap sampling was employed in model. Deep learning algorithms independently evaluated each dataset as a sensitivity test.

Results

Univariate outcomes, including 30-day survival, were statistically worse in the DD group when compared to the SD group (all P < 0.05). A prediction score, SD-EVAR, derived from the VQI EVAR dataset including pre- and intra-op variables that discriminate between SD and DD was externally validated in NSQIP (Pearson correlation coefficient = 0.79, P < 0.001); deep learning analysis concurred. This score suggests 66% of EVAR patients may be appropriate for short-stay discharge. A free smart phone app calculating short-stay discharge potential is available through QxMD Calculate https://qxcalc.app.link/vqidis.

Conclusions

Selecting patients for short-stay discharge after EVAR is possible without increasing harm. The majority of infrarenal AAA patients treated with EVAR in the United States fit a risk profile consistent with short-stay discharge, representing a significant cost-savings potential to the healthcare system.",2020-09-10 +,Systematic Review of Equations for Estimating Energy Requirement in the Elderly: Results and Future Perspectives,"Abstract

Objectives

Estimating the right energy requirement for the elderly is a clinically relevant topic since malnutrition is common in such population. Predictive equations are widely used to estimate the resting energy expenditure (REE). However, only a few equations have been specifically developed for the elderly, and they often provide different outputs. The present work aimed at presenting a web application able to assist the clinicians in identifying the most appropriate equation to estimate the REE in the elderly.

Methods

The development of the application is based on a systematic review of studies that had tested the performance of a predictive equation to estimate REE vs. a gold standard in subjects older than 65 years of age. The systematic review was carried out using PubMed, Scopus, and Embase following the PRISMA guidelines. Furthermore, the equations retrieved were applied to a sample of 88 subjects enrolled in an Italian nursing home to evaluate the agreement among the estimated REE. The agreement was assessed using the Intraclass Correlation Coefficient (ICC) for the sample overall and for specific subsets of patients (males, females, normal-weight and overweight/obese subjects).

Results

The initial search identified 6353 studies. After the screening, 69 studies, corresponding to 210 single equations, were included in the analysis. The type and number of parameters used in each equation were highly variable and the most frequently used were demographics, anthropometric and laboratory data, and physical activity frequency. The application of the equations to the sample of 88 subjects enrolled in the nursing home showed that the ones that included a small number of parameters were found to have a good agreement (especially those including the body weight alone: ICC = 0.75, 95% IC 0.69–0.81) while the addition of other parameters resulted in a worsening of the agreement. The same results were obtained for the sample overall and for the specific subsets of patients considered. The results of the systematic review served as a basis for the development of the web application (http://r-ubesp.dctv.unipd.it:3838/equationer).

Conclusions

The proposed web application is expected to guide the clinicians in identifying the most appropriate equation to estimate REE according to the subject's characteristics.

Funding Sources

University of Padova.",2020-05-29 +,Annual continuous fields of woody vegetation structure in the Lower Mekong region from 2000‐2017 Landsat time-series,"Spatially and temporally consistent vegetation structure time-series have great potential to improve the capacity for national land cover monitoring, to reduce latency and cost of international reporting, and to harmonize regional land cover characterizations. Here we present a semi-automatic, operational algorithm for mapping and monitoring of woody vegetation canopy cover and height at a regional scale using freely available Landsat time-series data. The presented algorithm employs automatic data processing and mapping using a set of lidar-based vegetation structure prediction models. Changes in vegetation cover are detected separately and integrated into the structure time-series. Sample-based validation and inter-comparison with existing datasets demonstrates the spatial and temporal consistency of our regional data time-series. The dataset reliably reflects changes in tree cover (tree cover loss user's accuracy of 0.84 and producer's accuracy of 0.75) and can serve as a tool to map annual forest extent (user's accuracy of 0.98 and producer's accuracy of 0.81 for 10% canopy cover threshold to define the forest class). The tree height estimates are consistent with a GLAS-based global map (mean average error of 3.7 m, the correlation coefficient of 0.92 and the R2 of 0.85). The algorithm was prototyped within the Lower Mekong region where it revealed an intensive woody vegetation dynamic. Of the year 2000 forest area (defined using canopy cover threshold of 10% and tree height threshold of 5 m), 9.4% was deforested by the year 2017, and 16.6% was affected by stand-replacement disturbance followed by reforestation. The average annual area of stand-level forest disturbance within the region was 2.34 Mha, and increased by 34% from 2001 (1.85 Mha) to 2017 (2.48 Mha). Total forest area decreased by 6.2% within the region, and 11.1% of year 2000 primary forest area was lost by 2017. At the national level, Cambodia demonstrated the highest rate of deforestation, with a net forest area loss of 22.5%. We estimated that 21.3% of 2017 forest cover had an age of 17 years or less, illustrating the intensive forest land uses within the region. The time-series product is suitable for mapping annual land cover and inter-annual land cover change using customized class definitions. The regionally-consistent data are publicly available for download (https://glad.umd.edu/), and online analysis (https://rlcms-servir.adpc.net/en/forest-monitor/), and serve as an input to the SERVIR-Mekong Regional Land Cover Monitoring System.",2019-10-01 +27899581,"HmtDB 2016: data update, a better performing query system and human mitochondrial DNA haplogroup predictor.","The HmtDB resource hosts a database of human mitochondrial genome sequences from individuals with healthy and disease phenotypes. The database is intended to support both population geneticists as well as clinicians undertaking the task to assess the pathogenicity of specific mtDNA mutations. The wide application of next-generation sequencing (NGS) has provided an enormous volume of high-resolution data at a low price, increasing the availability of human mitochondrial sequencing data, which called for a cogent and significant expansion of HmtDB data content that has more than tripled in the current release. We here describe additional novel features, including: (i) a complete, user-friendly restyling of the web interface, (ii) links to the command-line stand-alone and web versions of the MToolBox package, an up-to-date tool to reconstruct and analyze human mitochondrial DNA from NGS data and (iii) the implementation of the Reconstructed Sapiens Reference Sequence (RSRS) as mitochondrial reference sequence. The overall update renders HmtDB an even more handy and useful resource as it enables a more rapid data access, processing and analysis. HmtDB is accessible at http://www.hmtdb.uniba.it/.",2016-11-28 +34515387,SynWiki: Functional annotation of the first artificial organism Mycoplasma mycoides JCVI-syn3A.,"The new field of synthetic biology aims at the creation of artificially designed organisms. A major breakthrough in the field was the generation of the artificial synthetic organism Mycoplasma mycoides JCVI-syn3A. This bacterium possesses only 452 protein-coding genes, the smallest number for any organism that is viable independent of a host cell. However, about one third of the proteins have no known function indicating major gaps in our understanding of simple living cells. To facilitate the investigation of the components of this minimal bacterium, we have generated the database SynWiki (http://synwiki.uni-goettingen.de/). SynWiki is based on a relational database and gives access to published information about the genes and proteins of M. mycoides JCVI-syn3A. To gain a better understanding of the functions of the genes and proteins of the artificial bacteria, protein-protein interactions that may provide clues for the protein functions are included in an interactive manner. SynWiki is an important tool for the synthetic biology community that will support the comprehensive understanding of a minimal cell as well as the functional annotation of so far uncharacterized proteins.",2021-09-20 +34308911,Optimized Molecular Interaction Networks for the Study of Skeletal Muscle.,"

Background

Molecular interaction networks (MINs) aim to capture the complex relationships between interacting molecules within a biological system. MINs can be constructed from existing knowledge of molecular functional associations, such as protein-protein binding interactions (PPI) or gene co-expression, and these different sources may be combined into a single MIN. A given MIN may be more or less optimal in its representation of the important functional relationships of molecules in a tissue.

Objective

The aim of this study was to establish whether a combined MIN derived from different types of functional association could better capture muscle-relevant biology compared to its constituent single-source MINs.

Methods

MINs were constructed from functional association databases for both protein-binding and gene co-expression. The networks were then compared based on the capture of muscle-relevant genes and gene ontology (GO) terms, tested in two different ways using established biological network clustering algorithms. The top performing MINs were combined to test whether an optimal MIN for skeletal muscle could be constructed.

Results

The STRING PPI network was the best performing single-source MIN among those tested. Combining STRING with interactions from either the MyoMiner or CoXPRESSdb gene co-expression sources resulted in a combined network with improved performance relative to its constituent networks.

Conclusion

MINs constructed from multiple types of functional association can better represent the functional relationships of molecules in a given tissue. Such networks may be used to improve the analysis and interpretation of functional genomics data in the study of skeletal muscle and neuromuscular diseases. Networks and clusters described by this study, including the combinations of STRING with MyoMiner or with CoXPRESSdb, are available for download from https://www.sys-myo.com/myominer/download.php.",2021-01-01 +34567536,TAPAS: Towards Automated Processing and Analysis of multi-dimensional bioimage data.,"Modern microscopy is based on reproducible quantitative analysis, image data should be batch-processed by a standardized system that can be shared and easily reused by others. Furthermore, such system should require none or minimal programming from the users. We developed TAPAS (Towards an Automated Processing and Analysis System). The goal is to design an easy system for describing and exchanging processing workflows. The protocols are simple text files comprising a linear list of commands used to process and analyse the images. An extensive set of 60 modules is already available, mostly based on the tools proposed in the 3D ImageJ Suite. We propose a wizard, called TAPAS menu, to help the user design the protocol by listing the available modules and the parameters associated. Most modules will have default parameters values for most common tasks. Once the user has designed the protocol, he/she can apply the protocol to a set of images, that can be either stored locally or on a OMERO database. An extensive documentation including the list of modules, various tutorials and link to the source code is available at https://imagej.net/TAPAS.",2020-10-28 +29222768,"Thai Norms for Name, Image, and Category Agreement, Object Familiarity, Visual Complexity, Manipulability, and Age of Acquisition for 480 Color Photographic Objects.","Normative databases containing psycholinguistic variables are commonly used to aid stimulus selection for investigations into language and other cognitive processes. Norms exist for many languages, but not for Thai. The aim of the present research, therefore, was to obtain Thai normative data for the BOSS, a set of 480 high resolution color photographic images of real objects (Brodeur et al. in PLoS ONE 5(5), 2010.  https://doi.org/10.1371/journal.pone.0010773 ). Norms were provided by 584 Thai university students on eight dimensions: name agreement, object familiarity, visual complexity, category agreement, image agreement, two types of manipulability (graspability and mimeability), and age of acquisition. The results revealed comparatively similar levels of name agreement to Brodeur et al. especially when unfamiliar items were factored out. The pattern of intercorrelations among the Thai psycholinguistic norms was comparable to previous studies and our cross-linguistic correlations were robust for the same set of pictures in English and French. Conjointly, the findings extend the relevancy of the BOSS to Thailand, supporting this photographic resource for investigations of language and other cognitive processes in monolingual, multilingual, and brain-impaired populations.",2018-06-01 +,279 Nutritional and environmental impacts of removing beef cattle from US agriculture.,"Abstract Popular press materials, government documents, and peer-reviewed papers often focus on reducing or eliminating beef production as a way to enhance healthfulness and minimize environmental impacts of human diets. The objective of this work is to quantify the contributions of beef products to U.S. agriculture in terms of human edible nutrient supply and greenhouse gas (GHG) emissions. Data on U.S. beef production were obtained from the analysis conducted by White and Hall (2017; https://doi.org/10.1073/pnas.1707322114), which utilized data from the U.S. Department of Agriculture Agricultural and Economic Research Services and Food Composition databases; the U.S. Food and Drug Administration; the U.S. Environmental Protection Agency; the United Nations Food and Agriculture Organization; and other peer-reviewed, published sources to estimate nutritional and GHG contributions of livestock to U.S. agriculture. Beef emissions were disaggregated from the reported animal metrics to assess contributions of beef to nutrient supplies and GHG emissions in the U.S. From this assessment, the U.S. beef industry provides sufficient product to meet the protein, vitamin B12, long-chain omega-3 and -6 fatty acid requirements of 43, 137, 47, and 487 million people, respectively. In the U.S., beef production was estimated to account for 53% of GHG emissions from U.S. animal agriculture and 25% of GHG emissions from all of U.S. agriculture. An important consideration not accounted for in the analysis by White and Hall was the efficiency of converting human-inedible feeds into human edible food. Future work should focus on qualifying the environmental impact of beef cattle production in the context of optimizing the quantity of human-edible material that can be produced from a set land area.",2018-12-01 +31182652,Practice Resource for Forensic Training in General Psychiatry Residency Programs.,"Full Document: Alonso-Katzowitz JS, Cardasis W, Cerny-Suelzer CA, et al: Practice Resource for Forensic Training in General Psychiatry Residency Programs. Journal of the American Academy of Psychiatry and the Law Online Supplement 2019, 47 (1). Available at: http://www.jaapl.org/content/47/1_Supplement.",2019-06-01 +34930068,Cancer-Related Malnutrition: Epidemiological Results from the Latin American Study of Malnutrition in the Oncology Practice.,"Malnutrition can affect the patient diagnosed with, and treated for, cancer. However, until a dedicated study is completed, estimates of malnutrition rates will be disparate and unrepresentative of cancer patients' nutritional reality. Objective: To estimate the prevalence of malnutrition among patients being cared for cancer in Latin American (LATAM) hospitals by means of a multicenter, multinational study. Methods: The Latin American Study of Malnutrition in Oncology (LASOMO) was completed with 1,842 patients (Women: 56.2%; Age ≥ 60 years: 43.2%; Chemotherapy: 55.1%; Radiotherapy: 17.8%; Surgery: 27.1%) assisted at 52 health centers from 10 LATAM countries. Malnutrition prevalence was estimated from the (B + C) scores assigned to the patient with the Subjective Global Assessment by Detsky et al. (1987). Malnutrition prevalence was distributed regarding the demographic features of the patient, the primary tumor location, and the current cytoreducing treatment. Results: Malnutrition affected 59.1% of the surveyed patients. Malnutrition prevalence was higher among male patients and those with tumors of the digestive tract and the hemolymphopoietic system. Malnutrition was also associated with the current cytoreducing modality, with chemotherapy returning the highest prevalence. Conclusions: Malnutrition can be present in more than half of the patients being cared for cancer in LATAM health centers.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.2014902.",2021-12-20 +29649979,Ginseng Genome Database: an open-access platform for genomics of Panax ginseng.,"BACKGROUND:The ginseng (Panax ginseng C.A. Meyer) is a perennial herbaceous plant that has been used in traditional oriental medicine for thousands of years. Ginsenosides, which have significant pharmacological effects on human health, are the foremost bioactive constituents in this plant. Having realized the importance of this plant to humans, an integrated omics resource becomes indispensable to facilitate genomic research, molecular breeding and pharmacological study of this herb. DESCRIPTION:The first draft genome sequences of P. ginseng cultivar ""Chunpoong"" were reported recently. Here, using the draft genome, transcriptome, and functional annotation datasets of P. ginseng, we have constructed the Ginseng Genome Database http://ginsengdb.snu.ac.kr /, the first open-access platform to provide comprehensive genomic resources of P. ginseng. The current version of this database provides the most up-to-date draft genome sequence (of approximately 3000 Mbp of scaffold sequences) along with the structural and functional annotations for 59,352 genes and digital expression of genes based on transcriptome data from different tissues, growth stages and treatments. In addition, tools for visualization and the genomic data from various analyses are provided. All data in the database were manually curated and integrated within a user-friendly query page. CONCLUSION:This database provides valuable resources for a range of research fields related to P. ginseng and other species belonging to the Apiales order as well as for plant research communities in general. Ginseng genome database can be accessed at http://ginsengdb.snu.ac.kr /.",2018-04-12 +34233539,"Confronting Two Crises: The COVID-19 Pandemic, the Opioid Epidemic, and the Industrial Hygienist.","This article was originally written for and published in the January 2021 issue of The Synergist, a monthly publication of the American Industrial Hygiene Association. The article addresses the convergence of the COVID-19 and opioid crises, the impact of the opioid crisis on the workplace and workers, and the role that industrial hygienists can play in developing workplace programs to prevent and respond to opioid misuse. While the article is specifically written for industrial hygienists, the review and recommendations will be useful to others who are developing workplace opioid prevention programs. Note that the data presented in this article were current as of January 2021. Centers for Disease Control and Prevention's latest available data are for the twelve-month period ending October 2020 and include 88,990 total overdose deaths and 91,862 predicted, when reporting is completed. Source: https://www.cdc.gov/nchs/nvss/vsrr/drug-overdose-data.htm (accessed on 15 June 2021).",2021-07-07 +31978081,All of gene expression (AOE): An integrated index for public gene expression databases.,"Gene expression data have been archived as microarray and RNA-seq datasets in two public databases, Gene Expression Omnibus (GEO) and ArrayExpress (AE). In 2018, the DNA DataBank of Japan started a similar repository called the Genomic Expression Archive (GEA). These databases are useful resources for the functional interpretation of genes, but have been separately maintained and may lack RNA-seq data, while the original sequence data are available in the Sequence Read Archive (SRA). We constructed an index for those gene expression data repositories, called All Of gene Expression (AOE), to integrate publicly available gene expression data. The web interface of AOE can graphically query data in addition to the application programming interface. By collecting gene expression data from RNA-seq in the SRA, AOE also includes data not included in GEO and AE. AOE is accessible as a search tool from the GEA website and is freely available at https://aoe.dbcls.jp/.",2020-01-24 +34296322,"On the causal relationships between hyperinsulinaemia, insulin resistance, obesity and dysglycaemia in type 2 diabetes.","Hundreds of millions of people are affected by hyperinsulinaemia, insulin resistance, obesity and the dysglycaemia that mark a common progression from metabolic health to type 2 diabetes. Although the relative contribution of these features and the order in which they appear may differ between individuals, the common clustering and seemingly progressive nature of type 2 diabetes aetiology has guided research and clinical practice in this area for decades. At the same time, lively debate around the causal relationships between these features has continued, as new data from human trials and highly controlled animal studies are presented. This 'For debate' article was prompted by the review in Diabetologia by Esser, Utzschneider and Kahn ( https://doi.org/10.1007/s00125-020-05245-x ), with the purpose of reviewing established and emerging data that provide insight into the relative contributions of hyperinsulinaemia and impaired glucose-stimulated insulin secretion in progressive stages between health, obesity and diabetes. It is concluded that these beta cell defects are not mutually exclusive and that they are both important, but at different stages.",2021-07-22 +33363449,PASS: A Multimodal Database of Physical Activity and Stress for Mobile Passive Body/ Brain-Computer Interface Research.,"With the burgeoning of wearable devices and passive body/brain-computer interfaces (B/BCIs), automated stress monitoring in everyday settings has gained significant attention recently, with applications ranging from serious games to clinical monitoring. With mobile users, however, challenges arise due to other overlapping (and potentially confounding) physiological responses (e.g., due to physical activity) that may mask the effects of stress, as well as movement artifacts that can be introduced in the measured signals. For example, the classical increase in heart rate can no longer be attributed solely to stress and could be caused by the activity itself. This makes the development of mobile passive B/BCIs challenging. In this paper, we introduce PASS, a multimodal database of Physical Activity and StresS collected from 48 participants. Participants performed tasks of varying stress levels at three different activity levels and provided quantitative ratings of their perceived stress and fatigue levels. To manipulate stress, two video games (i.e., a calm exploration game and a survival game) were used. Peripheral physical activity (electrocardiography, electrodermal activity, breathing, skin temperature) as well as cerebral activity (electroencephalography) were measured throughout the experiment. A complete description of the experimental protocol is provided and preliminary analyses are performed to investigate the physiological reactions to stress in the presence of physical activity. The PASS database, including raw data and subjective ratings has been made available to the research community at http://musaelab.ca/pass-database/. It is hoped that this database will help advance mobile passive B/BCIs for use in everyday settings.",2020-12-08 +32338876,CycloBranch 2: Molecular Formula Annotations Applied to imzML Data Sets in Bimodal Fusion and LC-MS Data Files.,"Natural product chemistry, microbiology, and food, human, and plant metabolomics represent a few sources of complex metabolomics data generated by mass spectrometry. Among the medley of software tools used to handle these data sets, no universal tool can qualitatively, quantitatively, or statistically address major biological questions or tasks. CycloBranch 2, an open and platform-free software, at least now provides the de novo generation of molecular formulas of unknown compounds in both liquid chromatography/mass spectrometry and mass spectrometry imaging datafiles. For imaging files, this database-free approach was documented in the bimodal image fusion and characterization of three small molecules, including metallophores. The fine isotope ratio data filtering step distinguished 34S/13C2 and 41K/13C2 features. The standalone software package is implemented in C++ and can be downloaded from https://ms.biomed.cas.cz/cyclobranch/ and used under GNU General Public License.",2020-05-08 +33795896,COVID-19: The Ivermectin African Enigma.,"

Introduction

The low frequency of cases and deaths from the SARS-CoV-2 COVID-19 virus in some countries of Africa has called our attention about the unusual behavior of this disease. The ivermectin is considered a drug of choice for various parasitic and viral diseases and shown to have in vitro effects against SARS-CoV-2.

Aims

Our study aimed to describe SARS-CoV2 infection and death rates in African countries that participated in an intensive Ivermectin mass campaign carried out to control onchocerciasis and compare them with those of countries that did not participate.

Methods

Data from 19 countries that participated in the World Health Organization (WHO) sponsored African Programme for Onchocerciasis Control (APOC), from 1995 until 2015, were compared with thirty-five (Non-APOC), countries that were not included. Information was obtained from https://www.worldometers.info/coronavirus/ database. Generalized Poisson regression models were used to obtain estimates of the effect of APOC status on cumulative SARS-CoV-2 infection and mortality rates.

Results

After controlling for different factors, including the Human Development Index (HDI), APOC countries (vs. non-APOC), show 28% lower mortality (0.72; 95% CI: 0.67-0.78) and 8% lower rate of infection (0.92; 95% CI: 0.91-0.93) due to COVID-19.

Conclusions

The incidence in mortality rates and number of cases is significantly lower among the APOC countries compared to non-APOC countries. That a mass public health preventive campaign against COVID-19 may have taken place, inadvertently, in some African countries with massive community ivermectin use is an attractive hypothesis. Additional studies are needed to confirm it.",2020-12-30 +34179843,Causal interactions from proteomic profiles: Molecular data meet pathway knowledge.,"We present a computational method to infer causal mechanisms in cell biology by analyzing changes in high-throughput proteomic profiles on the background of prior knowledge captured in biochemical reaction knowledge bases. The method mimics a biologist's traditional approach of explaining changes in data using prior knowledge but does this at the scale of hundreds of thousands of reactions. This is a specific example of how to automate scientific reasoning processes and illustrates the power of mapping from experimental data to prior knowledge via logic programming. The identified mechanisms can explain how experimental and physiological perturbations, propagating in a network of reactions, affect cellular responses and their phenotypic consequences. Causal pathway analysis is a powerful and flexible discovery tool for a wide range of cellular profiling data types and biological questions. The automated causation inference tool, as well as the source code, are freely available at http://causalpath.org.",2021-05-12 +32921804,Heterogeneity in lobar and near-acini deposition of inhaled aerosol in the mouse lung. ,"Laboratory animals are often used to derive health risk from environmental exposure or to assess the therapeutic effect of a drug delivered by inhaled therapy. Knowledge of the in-situ distribution of deposited particles on airway and alveolar surfaces is essential in any assessment of these effects. A unique database including both high-resolution lung anatomy and deposition data in four strains of laboratory mice have been recently made publicly available to the research community (https://doi.org/10.25820/9arg-9w56). Using these data, we investigated the effect of particle size on the distribution of deposited particles at the lobar and near-acini level. Analysis was performed on a total of 33 mice where 3, 16 and 14 animals were exposed to 0.5μm, 1μm and 2μm particles, respectively. Ratio of normalized deposition to normalized volume was calculated for each lobe (DVlobe ). At the near-acini level, the skew and standard deviation of the frequency distribution of particle deposition were calculated. Significant deviation above 1 was found for DV ratio in the cranial lobe (DVCranial ). DVMiddle , DVCaudal and DVAccessory were all significantly <1 and lower than DVleft (p<0.01). At the near-acini level, skew and standard deviation were positively correlated with particle size and the presence of hot spots (high deposition) were mainly found in the apical region of the lung. These results highlight the uneven distribution of deposited particles in the mouse lung. Thus, depending on the lung sample location, individual analysis to determine overall deposition may either underestimate or overestimate total lung burden, at least for micron-sized particles.",2020-08-13 +36303760,Peptimetric: Quantifying and Visualizing Differences in Peptidomic Data.,"Finding new sustainable means of diagnosing and treating diseases is one of the most pressing issues of our time. In recent years, several endogenous peptides have been found to be both excellent biomarkers for many diseases and to possess important physiological roles which may be utilized in treatments. The detection of peptides has been facilitated by the rapid development of biological mass spectrometry and now the combination of fast and sensitive high resolution MS instruments and stable nano HP-LC equipment sequences thousands of peptides in one single experiment. In most research conducted with these advanced systems, proteolytically cleaved proteins are analyzed and the specific peptides are identified by software dedicated for protein quantification using different proteomics workflows. Analysis of endogenous peptides with peptidomics workflows also benefit from the novel sensitive and advanced instrumentation, however, the generated peptidomic data is vast and subsequently laborious to visualize and examine, creating a bottleneck in the analysis. Therefore, we have created Peptimetric, an application designed to allow researchers to investigate and discover differences between peptidomic samples. Peptimetric allows the user to dynamically and interactively investigate the proteins, peptides, and some general characteristics of multiple samples, and is available as a web application at https://peptimetric.herokuapp.com. To illustrate the utility of Peptimetric, we've applied it to a peptidomic dataset of 15 urine samples from diabetic patients and corresponding data from healthy subjects.",2021-08-25 +32935001,An Introduction to DLforum - An online discussion forum for data linkage researchers and practitioners https://dmm.anu.edu.au/DLforum/.,"Data linkage, the process of identifying records that refer to the same entities across databases, is a crucial component of Population Data Science. Data linkage has a history going back over fifty years with many different methods and techniques being developed in various disciplines including computer science, statistics, and health informatics. Data linkage researchers and practitioners are commonly only familiar with methods and techniques that have been developed or are used in their own discipline, and they often only follow research that is being published at venues in their own discipline. There is currently no single online resource that allows data linkage researchers and practitioners across different disciplines to exchange ideas, post questions, or advertise new publications, software, open positions, or upcoming conferences and workshops. This leads to a communication gap in the multi-disciplinary field of data linkage. We aim to address this gap with the DLforum, a public online discussion forum for data linkage. DLforum contains several discussion areas, including publication announcements, resources (software and datasets), information about upcoming conferences and workshops, job opportunities, and general questions related to data linkage. The forum includes a moderation process where all registered users can post content and reply to posts by other users. We anticipate that the number of users registered and the amount of content posted in the forum will show that such an online forum is of value to data linkage researchers and practitioners from different disciplines to effectively communicate and exchange their knowledge, and thus form an online community of practice. In this paper we describe the methods of developing the DLforum, its structure and content, and our plan on how to evaluate the forum. The DLforum is freely available at: https://dmm.anu.edu.au/DLforum/.",2018-02-20 +29997612,CDG: An Online Server for Detecting Biologically Closest Disease-Causing Genes and its Application to Primary Immunodeficiency.,"High-throughput genomic technologies yield about 20,000 variants in the protein-coding exome of each individual. A commonly used approach to select candidate disease-causing variants is to test whether the associated gene has been previously reported to be disease-causing. In the absence of known disease-causing genes, it can be challenging to associate candidate genes with specific genetic diseases. To facilitate the discovery of novel gene-disease associations, we determined the putative biologically closest known genes and their associated diseases for 13,005 human genes not currently reported to be disease-associated. We used these data to construct the closest disease-causing genes (CDG) server, which can be used to infer the closest genes with an associated disease for a user-defined list of genes or diseases. We demonstrate the utility of the CDG server in five immunodeficiency patient exomes across different diseases and modes of inheritance, where CDG dramatically reduced the number of candidate genes to be evaluated. This resource will be a considerable asset for ascertaining the potential relevance of genetic variants found in patient exomes to specific diseases of interest. The CDG database and online server are freely available to non-commercial users at: http://lab.rockefeller.edu/casanova/CDG.",2018-06-27 +34363966,"Editorial: Prenatal Depressive Symptoms, Cortical Morphology, and Reward Sensitivity in Preschoolers.","Studies drawing on data from the Growing Up in Singapore Towards Healthy Outcomes (GUSTO, https://www.gusto.sg) have provided unprecedented evidence for associations between prenatal maternal mental health symptoms and variations in offspring early brain structural and functional development.1 Wei et al.2 expand upon these studies by using data from GUSTO to test for both sex-specific effects of prenatal maternal depressive symptoms (pre-MDS) and to examine whether cortical development mediated the relationship between pre-MDS and child sensitivity to reward and punishment in preschoolers. The study found a fascinating sex-specific pattern. It showed that higher pre-MDS was associated with greater cortical surface area in boys and lower surface area in girls, specifically in areas of the prefrontal cortex, superior temporal gyrus, and superior parietal lobule. Regarding their hypothesized mediation model, their analysis found that superior parietal lobule surface area mediated the association between pre-MDS and sensitivity to reward in girls but not boys. In this editorial, I will discuss some of the implications, limitations, and future directions for this line of research.",2021-08-04 +33737208,Search and visualization of gene-drug-disease interactions for pharmacogenomics and precision medicine research using GeneDive.,"

Background

Understanding the relationships between genes, drugs, and disease states is at the core of pharmacogenomics. Two leading approaches for identifying these relationships in medical literature are: human expert led manual curation efforts, and modern data mining based automated approaches. The former generates small amounts of high-quality data, and the latter offers large volumes of mixed quality data. The algorithmically extracted relationships are often accompanied by supporting evidence, such as, confidence scores, source articles, and surrounding contexts (excerpts) from the articles, that can be used as data quality indicators. Tools that can leverage these quality indicators to help the user gain access to larger and high-quality data are needed.

Approach

We introduce GeneDive, a web application for pharmacogenomics researchers and precision medicine practitioners that makes gene, disease, and drug interactions data easily accessible and usable. GeneDive is designed to meet three key objectives: (1) provide functionality to manage information-overload problem and facilitate easy assimilation of supporting evidence, (2) support longitudinal and exploratory research investigations, and (3) offer integration of user-provided interactions data without requiring data sharing.

Results

GeneDive offers multiple search modalities, visualizations, and other features that guide the user efficiently to the information of their interest. To facilitate exploratory research, GeneDive makes the supporting evidence and context for each interaction readily available and allows the data quality threshold to be controlled by the user as per their risk tolerance level. The interactive search-visualization loop enables relationship discoveries between diseases, genes, and drugs that might not be explicitly described in literature but are emergent from the source medical corpus and deductive reasoning. The ability to utilize user's data either in combination with the GeneDive native datasets or in isolation promotes richer data-driven exploration and discovery. These functionalities along with GeneDive's applicability for precision medicine, bringing the knowledge contained in biomedical literature to bear on particular clinical situations and improving patient care, are illustrated through detailed use cases.

Conclusion

GeneDive is a comprehensive, broad-use biological interactions browser. The GeneDive application and information about its underlying system architecture are available at http://www.genedive.net. GeneDive Docker image is also available for download at this URL, allowing users to (1) import their own interaction data securely and privately; and (2) generate and test hypotheses across their own and other datasets.",2021-03-16 +34351410,Single-cell transcriptome profiling of the human developing spinal cord reveals a conserved genetic programme with human-specific features.,"The spinal cord receives input from peripheral sensory neurons and controls motor output by regulating muscle innervating motor neurons. These functions are carried out by neural circuits comprising molecularly distinct neuronal subtypes generated in a characteristic spatiotemporal arrangement from progenitors in the embryonic neural tube. To gain insight into the diversity and complexity of cells in the developing human neural tube, we used single-cell mRNA sequencing to profile cervical and thoracic regions in four human embryos of Carnegie stages (CS) CS12, CS14, CS17 and CS19 from gestational weeks 4-7. Analysis of progenitor and neuronal populations from the neural tube and dorsal root ganglia identified dozens of distinct cell types and facilitated the reconstruction of the differentiation pathways of specific neuronal subtypes. Comparison with mouse revealed overall similarity of mammalian neural tube development while highlighting some human-specific features. These data provide a catalogue of gene expression and cell type identity in the human neural tube that will support future studies of sensory and motor control systems. The data can be explored at https://shiny.crick.ac.uk/scviewer/neuraltube/.",2021-08-05 +34630938,"Getting to know each other: PPIMem, a novel approach for predicting transmembrane protein-protein complexes.","Because of their considerable number and diversity, membrane proteins and their macromolecular complexes represent the functional units of cells. Their quaternary structure may be stabilized by interactions between the α-helices of different proteins in the hydrophobic region of the cell membrane. Membrane proteins equally represent potential pharmacological targets par excellence for various diseases. Unfortunately, their experimental 3D structure and that of their complexes with other intramembrane protein partners are scarce due to technical difficulties. To overcome this key problem, we devised PPIMem, a computational approach for the specific prediction of higher-order structures of α-helical transmembrane proteins. The novel approach involves proper identification of the amino acid residues at the interface of molecular complexes with a 3D structure. The identified residues compose then nonlinear interaction motifs that are conveniently expressed as mathematical regular expressions. These are efficiently implemented for motif search in amino acid sequence databases, and for the accurate prediction of intramembrane protein-protein complexes. Our template interface-based approach predicted 21,544 binary complexes between 1,504 eukaryotic plasma membrane proteins across 39 species. We compare our predictions to experimental datasets of protein-protein interactions as a first validation method. The online database that results from the PPIMem algorithm with the annotated predicted interactions are implemented as a web server and can be accessed directly at https://transint.univ-evry.fr.",2021-09-17 +32610336,The Prospective Studies of Atherosclerosis (Proof-ATHERO) Consortium: Design and Rationale.,"Atherosclerosis - the pathophysiological mechanism shared by most cardiovascular diseases - can be directly or indirectly assessed by a variety of clinical tests including measurement of carotid intima-media thickness, carotid plaque, -ankle-brachial index, pulse wave velocity, and coronary -artery calcium. The Prospective Studies of Atherosclerosis -(Proof-ATHERO) consortium (https://clinicalepi.i-med.ac.at/research/proof-athero/) collates de-identified individual-participant data of studies with information on atherosclerosis measures, risk factors for cardiovascular disease, and incidence of cardiovascular diseases. It currently comprises 74 studies that involve 106,846 participants from 25 countries and over 40 cities. In summary, 21 studies recruited participants from the general population (n = 67,784), 16 from high-risk populations (n = 22,677), and 37 as part of clinical trials (n = 16,385). Baseline years of contributing studies range from April 1980 to July 2014; the latest follow-up was until June 2019. Mean age at baseline was 59 years (standard deviation: 10) and 50% were female. Over a total of 830,619 person-years of follow-up, 17,270 incident cardiovascular events (including coronary heart disease and stroke) and 13,270 deaths were recorded, corresponding to cumulative incidences of 2.1% and 1.6% per annum, respectively. The consortium is coordinated by the Clinical Epidemiology Team at the Medical University of Innsbruck, Austria. Contributing studies undergo a detailed data cleaning and harmonisation procedure before being incorporated in the Proof-ATHERO central database. Statistical analyses are being conducted according to pre-defined analysis plans and use established methods for individual-participant data meta-analysis. Capitalising on its large sample size, the multi-institutional collaborative Proof-ATHERO consortium aims to better characterise, understand, and predict the development of atherosclerosis and its clinical consequences.",2020-07-01 +29681281,Simultaneous classification of multiple classes in NMR metabolomics and vibrational spectroscopy using interval-based classification methods: iECVA vs iPLS-DA.,"Interval based chemometric algorithms have proven to be very powerful for spectral alignments, spectral regressions and spectral classifications. The interval-based methods may not only improve the performance, but also reduce model complexity and enhance the spectral interpretation. Extended Canonical Variate Analysis (ECVA) is a powerful method for multiple group classifications of multivariate data and can easily be extended to an interval approach, iECVA. This study outlines the iECVA method and compares its performance to interval Partial Least Squares Discriminant Analysis (iPLS-DA) on three spectroscopic datasets from Nuclear Magnetic Resonance (NMR), Near Infrared (NIR) and Infrared (IR) spectroscopy, respectively. The results invariantly show that the interval-based classification methods greatly enhance the interpretability of the models by identifying important spectral regions, which facilitate interpretation and biomarker discovery. Although the results for the two methods are similar regarding the number of misclassifications and identified important regions, the model complexity of the PLS-DA proved to consistently lower than the ECVA. The Matlab source codes for both iECVA and iPLS-DA are made freely available at www.

Models

life.ku.dk.",2018-03-29 +32315389,HotSpot3D web server: an integrated resource for mutation analysis in protein 3D structures.,"

Motivation

HotSpot3D is a widely used software for identifying mutation hotspots on the 3D structures of proteins. To further assist users, we developed a new HotSpot3D web server to make this software more versatile, convenient and interactive.

Results

The HotSpot3D web server performs data pre-processing, clustering, visualization and log-viewing on one stop. Users can interactively explore each cluster and easily re-visualize the mutational clusters within browsers. We also provide a database that allows users to search and visualize proximal mutations from 33 cancers in the Cancer Genome Atlas.

Availability and implementation

http://niulab.scgrid.cn/HotSpot3D/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +34297817,ENNAVIA is a novel method which employs neural networks for antiviral and anti-coronavirus activity prediction for therapeutic peptides. ,"Viruses represent one of the greatest threats to human health, necessitating the development of new antiviral drug candidates. Antiviral peptides often possess excellent biological activity and a favourable toxicity profile, and therefore represent a promising field of novel antiviral drugs. As the quantity of sequencing data grows annually, the development of an accurate in silico method for the prediction of peptide antiviral activities is important. This study leverages advances in deep learning and cheminformatics to produce a novel sequence-based deep neural network classifier for the prediction of antiviral peptide activity. The method outperforms the existent best-in-class, with an external test accuracy of 93.9%, Matthews correlation coefficient of 0.87 and an Area Under the Curve of 0.93 on the dataset of experimentally validated peptide activities. This cutting-edge classifier is available as an online web server at https://research.timmons.eu/ennavia, facilitating in silico screening and design of peptide antiviral drugs by the wider research community.",2021-11-01 +34541527,Accurate annotation of protein coding sequences with IDTAXA.,"The observed diversity of protein coding sequences continues to increase far more rapidly than knowledge of their functions, making classification algorithms essential for assigning a function to proteins using only their sequence. Most pipelines for annotating proteins rely on searches for homologous sequences in databases of previously annotated proteins using BLAST or HMMER. Here, we develop a new approach for classifying proteins into a taxonomy of functions and demonstrate its utility for genome annotation. Our algorithm, IDTAXA, was more accurate than BLAST or HMMER at assigning sequences to KEGG ortholog groups. Moreover, IDTAXA correctly avoided classifying sequences with novel functions to existing groups, which is a common error mode for classification approaches that rely on E-values as a proxy for confidence. We demonstrate IDTAXA's utility for annotating eukaryotic and prokaryotic genomes by assigning functions to proteins within a multi-level ontology and applied IDTAXA to detect genome contamination in eukaryotic genomes. Finally, we re-annotated 8604 microbial genomes with known antibiotic resistance phenotypes to discover two novel associations between proteins and antibiotic resistance. IDTAXA is available as a web tool (http://DECIPHER.codes/Classification.html) or as part of the open source DECIPHER R package from Bioconductor.",2021-09-16 +34531327,T1TAdb: the database of type I toxin-antitoxin systems.,"Type I toxin-antitoxin (T1TA) systems constitute a large class of genetic modules with antisense RNA (asRNA)-mediated regulation of gene expression. They are widespread in bacteria and consist of an mRNA coding for a toxic protein and a noncoding asRNA that acts as an antitoxin preventing the synthesis of the toxin by directly base-pairing to its cognate mRNA. The co- and post-transcriptional regulation of T1TA systems is intimately linked to RNA sequence and structure, therefore it is essential to have an accurate annotation of the mRNA and asRNA molecules to understand this regulation. However, most T1TA systems have been identified by means of bioinformatic analyses solely based on the toxin protein sequences, and there is no central repository of information on their specific RNA features. Here we present the first database dedicated to type I TA systems, named T1TAdb. It is an open-access web database (https://d-lab.arna.cnrs.fr/t1tadb) with a collection of ∼1900 loci in ∼500 bacterial strains in which a toxin-coding sequence has been previously identified. RNA molecules were annotated with a bioinformatic procedure based on key determinants of the mRNA structure and the genetic organization of the T1TA loci. Besides RNA and protein secondary structure predictions, T1TAdb also identifies promoter, ribosome-binding, and mRNA-asRNA interaction sites. It also includes tools for comparative analysis, such as sequence similarity search and computation of structural multiple alignments, which are annotated with covariation information. To our knowledge, T1TAdb represents the largest collection of features, sequences, and structural annotations on this class of genetic modules.",2021-09-16 +34282143,Development of a fixed module repertoire for the analysis and interpretation of blood transcriptome data.,"As the capacity for generating large-scale molecular profiling data continues to grow, the ability to extract meaningful biological knowledge from it remains a limitation. Here, we describe the development of a new fixed repertoire of transcriptional modules, BloodGen3, that is designed to serve as a stable reusable framework for the analysis and interpretation of blood transcriptome data. The construction of this repertoire is based on co-clustering patterns observed across sixteen immunological and physiological states encompassing 985 blood transcriptome profiles. Interpretation is supported by customized resources, including module-level analysis workflows, fingerprint grid plot visualizations, interactive web applications and an extensive annotation framework comprising functional profiling reports and reference transcriptional profiles. Taken together, this well-characterized and well-supported transcriptional module repertoire can be employed for the interpretation and benchmarking of blood transcriptome profiles within and across patient cohorts. Blood transcriptome fingerprints for the 16 reference cohorts can be accessed interactively via:  https://drinchai.shinyapps.io/BloodGen3Module/ .",2021-07-19 +29504895,FunGeneNet: a web tool to estimate enrichment of functional interactions in experimental gene sets.,"BACKGROUND:Estimation of functional connectivity in gene sets derived from genome-wide or other biological experiments is one of the essential tasks of bioinformatics. A promising approach for solving this problem is to compare gene networks built using experimental gene sets with random networks. One of the resources that make such an analysis possible is CrossTalkZ, which uses the FunCoup database. However, existing methods, including CrossTalkZ, do not take into account individual types of interactions, such as protein/protein interactions, expression regulation, transport regulation, catalytic reactions, etc., but rather work with generalized types characterizing the existence of any connection between network members. RESULTS:We developed the online tool FunGeneNet, which utilizes the ANDSystem and STRING to reconstruct gene networks using experimental gene sets and to estimate their difference from random networks. To compare the reconstructed networks with random ones, the node permutation algorithm implemented in CrossTalkZ was taken as a basis. To study the FunGeneNet applicability, the functional connectivity analysis of networks constructed for gene sets involved in the Gene Ontology biological processes was conducted. We showed that the method sensitivity exceeds 0.8 at a specificity of 0.95. We found that the significance level of the difference between gene networks of biological processes and random networks is determined by the type of connections considered between objects. At the same time, the highest reliability is achieved for the generalized form of connections that takes into account all the individual types of connections. By taking examples of the thyroid cancer networks and the apoptosis network, it is demonstrated that key participants in these processes are involved in the interactions of those types by which these networks differ from random ones. CONCLUSIONS:FunGeneNet is a web tool aimed at proving the functionality of networks in a wide range of sizes of experimental gene sets, both for different global networks and for different types of interactions. Using examples of thyroid cancer and apoptosis networks, we have shown that the links over-represented in the analyzed network in comparison with the random ones make possible a biological interpretation of the original gene/protein sets. The FunGeneNet web tool for assessment of the functional enrichment of networks is available at http://www-bionet.sscc.ru/fungenenet/ .",2018-02-09 +31412589,A Comprehensive Superposition of Viral Polymerase Structures. ,"Nucleic acid polymerases are essential enzymes that replicate the genomes of both RNA and DNA viruses. These enzymes are generally encoded by viruses themselves so as to provide biochemical functions and control elements that differ from those of the host cell polymerases. The core active site structure used by all replicative polymerases is highly conserved and composed of two key aspartate residues from the conserved motifs A and C, but beyond this there is significant divergence among structures. These differences can make it difficult to select which portions of structures to align for comparisons, yet there are extended structural similarities within different groups of viral polymerases that should clearly be considered to generate optimal alignments. This manuscript describes a comprehensive structure-based superposition of every viral polymerase structure solved thus far based on an alignment-tree approach wherein aligned regions grow in complexity as similarity among polymerases increases. The result is a set of 646 structures that have been aligned into a single common orientation. This provides a convenient resource for directly comparing viral polymerases and illustrating structural conservation among them. It also sets the stage for detailed bioinformatics analysis to further assess common structural features. The full set of protein data bank (PDB) formatted files is publicly available at http://www.zenodo.org/communities/pols/.",2019-08-13 +34026726,iMAP: A Web Server for Metabolomics Data Integrative Analysis.,"Metabolomics data analysis depends on the utilization of bioinformatics tools. To meet the evolving needs of metabolomics research, several integrated platforms have been developed. Our group has developed a desktop platform IP4M (integrated Platform for Metabolomics Data Analysis) which allows users to perform a nearly complete metabolomics data analysis in one-stop. With the extensive usage of IP4M, more and more demands were raised from users worldwide for a web version and a more customized workflow. Thus, iMAP (integrated Metabolomics Analysis Platform) was developed with extended functions, improved performances, and redesigned structures. Compared with existing platforms, iMAP has more methods and usage modes. A new module was developed with an automatic pipeline for train-test set separation, feature selection, and predictive model construction and validation. A new module was incorporated with sufficient editable parameters for network construction, visualization, and analysis. Moreover, plenty of plotting tools have been upgraded for highly customized publication-ready figures. Overall, iMAP is a good alternative tool with complementary functions to existing metabolomics data analysis platforms. iMAP is freely available for academic usage at https://imap.metaboprofile.cloud/ (License MPL 2.0).",2021-05-05 +32705130,CNSA: a data repository for archiving omics data. ,"With the application and development of high-throughput sequencing technology in life and health sciences, massive multi-omics data brings the problem of efficient management and utilization. Database development and biocuration are the prerequisites for the reuse of these big data. Here, relying on China National GeneBank (CNGB), we present CNGB Sequence Archive (CNSA) for archiving omics data, including raw sequencing data and its further analyzed results which are organized into six objects, namely Project, Sample, Experiment, Run, Assembly and Variation at present. Moreover, CNSA has created a correlation model of living samples, sample information and analytical data on some projects. Both living samples and analytical data are directly correlated with the sample information. From either one, information or data of the other two can be obtained, so that all data can be traced throughout the life cycle from the living sample to the sample information to the analytical data. Complying with the data standards commonly used in the life sciences, CNSA is committed to building a comprehensive and curated data repository for storing, managing and sharing of omics data. We will continue to improve the data standards and provide free access to open-data resources for worldwide scientific communities to support academic research and the bio-industry. Database URL: https://db.cngb.org/cnsa/.",2020-01-01 +32472933,Online searching platform for the antibiotic resistome in bacterial tree of life and global habitats. ,"Metagenomic analysis reveals that antibiotic-resistance genes (ARGs) are widely distributed in both human-associated and non-human-associated habitats. However, it is difficult to equally compare ARGs between samples without a standard method. Here, we constructed a comprehensive profile of the distribution of potential ARGs in bacterial tree of life and global habitats by investigating ARGs in 55 000 bacterial genomes, 16 000 bacterial plasmid sequences, 3000 bacterial integron sequences and 850 metagenomes using a standard pipeline. We found that >80% of all known ARGs are not carried by any plasmid or integron sequences. Among potential mobile ARGs, tetracycline and beta-lactam resistance genes (such as tetA, tetM and class A beta-lactamase gene) distribute in multiple pathogens across bacterial phyla, indicating their clinical relevance and importance. We showed that class 1 integrases (intI1) display a poor linear relationship with total ARGs in both non-human-associated and human-associated environments. Furthermore, both total ARGs and intI1 genes show little correlation with the degree of anthropogenicity. These observations highlight the need to differentiate ARGs of high clinical relevance. This profile is published on an online platform (ARGs-OSP, http://args-osp.herokuapp.com/) as a valuable resource for the most challenging topics in this field, i.e. the risk, evolution and emergence of ARGs.",2020-07-01 +34903906,Work from home during the COVID-19 pandemic: An observational study based on a large geo-tagged COVID-19 Twitter dataset (UsaGeoCov19).,"As COVID-19 swept over the world, people discussed facts, expressed opinions, and shared sentiments about the pandemic on social media. Since policies such as travel restriction and lockdown in reaction to COVID-19 were made at different levels of the society (e.g., schools and employers) and the government, we build a large geo-tagged Twitter dataset titled UsaGeoCov19 and perform an exploratory analysis by geographic location. Specifically, we collect 650,563 unique geo-tagged tweets across the United States covering the date range from January 25 to May 10, 2020. Tweet locations enable us to conduct region-specific studies such as tweeting volumes and sentiment, sometimes in response to local regulations and reported COVID-19 cases. During this period, many people started working from home. The gap between workdays and weekends in hourly tweet volumes inspire us to propose algorithms to estimate work engagement during the COVID-19 crisis. This paper also summarizes themes and topics of tweets in our dataset using both social media exclusive tools (i.e., #hashtags, @mentions) and the latent Dirichlet allocation model. We welcome requests for data sharing and conversations for more insights. UsaGeoCov19 link:http://yunhefeng.me/geo-tagged_twitter_datasets/.",2021-12-09 +32683045,IC4R-2.0: Rice Genome Reannotation Using Massive RNA-seq Data.,"Genome reannotation aims for complete and accurate characterization of gene models and thus is of critical significance for in-depth exploration of gene function. Although the availability of massive RNA-seq data provides great opportunities for gene model refinement, few efforts have been made to adopt these precious data in rice genome reannotation. Here we reannotate the rice (Oryza sativa L. ssp. japonica) genome based on integration of large-scale RNA-seq data and release a new annotation system IC4R-2.0. In general, IC4R-2.0 significantly improves the completeness of gene structure, identifies a number of novel genes, and integrates a variety of functional annotations. Furthermore, long non-coding RNAs (lncRNAs) and circular RNAs (circRNAs) are systematically characterized in the rice genome. Performance evaluation shows that compared to previous annotation systems, IC4R-2.0 achieves higher integrity and quality, primarily attributable to massive RNA-seq data applied in genome annotation. Consequently, we incorporate the improved annotations into the Information Commons for Rice (IC4R), a database integrating multiple omics data of rice, and accordingly update IC4R by providing more user-friendly web interfaces and implementing a series of practical online tools. Together, the updated IC4R, which is equipped with the improved annotations, bears great promise for comparative and functional genomic studies in rice and other monocotyledonous species. The IC4R-2.0 annotation system and related resources are freely accessible at http://ic4r.org/.",2020-04-01 +36303753,BRANEart: Identify Stability Strength and Weakness Regions in Membrane Proteins.,"Understanding the role of stability strengths and weaknesses in proteins is a key objective for rationalizing their dynamical and functional properties such as conformational changes, catalytic activity, and protein-protein and protein-ligand interactions. We present BRANEart, a new, fast and accurate method to evaluate the per-residue contributions to the overall stability of membrane proteins. It is based on an extended set of recently introduced statistical potentials derived from membrane protein structures, which better describe the stability properties of this class of proteins than standard potentials derived from globular proteins. We defined a per-residue membrane propensity index from combinations of these potentials, which can be used to identify residues which strongly contribute to the stability of the transmembrane region or which would, on the contrary, be more stable in extramembrane regions, or vice versa. Large-scale application to membrane and globular proteins sets and application to tests cases show excellent agreement with experimental data. BRANEart thus appears as a useful instrument to analyze in detail the overall stability properties of a target membrane protein, to position it relative to the lipid bilayer, and to rationally modify its biophysical characteristics and function. BRANEart can be freely accessed from http://babylone.3bio.ulb.ac.be/BRANEart.",2021-12-02 +34521345,PathFams: statistical detection of pathogen-associated protein domains.,"

Background

A substantial fraction of genes identified within bacterial genomes encode proteins of unknown function. Identifying which of these proteins represent potential virulence factors, and mapping their key virulence determinants, is a challenging but important goal.

Results

To facilitate virulence factor discovery, we performed a comprehensive analysis of 17,929 protein domain families within the Pfam database, and scored them based on their overrepresentation in pathogenic versus non-pathogenic species, taxonomic distribution, relative abundance in metagenomic datasets, and other factors.

Conclusions

We identify pathogen-associated domain families, candidate virulence factors in the human gut, and eukaryotic-like mimicry domains with likely roles in virulence. Furthermore, we provide an interactive database called PathFams to allow users to explore pathogen-associated domains as well as identify pathogen-associated domains and domain architectures in user-uploaded sequences of interest. PathFams is freely available at https://pathfams.uwaterloo.ca .",2021-09-14 +,Intra-country introductions unraveling global hotspots of alien fish species,"Alien or non-native species are defined as species living outside their natural distributional ranges. The spread of alien species is increasing globally as a result of rapid technological advances and globalization. Recent investigations have estimated global hotspots of alien established species on the basis of geopolitical boundaries, including Dawson et al. (in: Nat Ecol Evol 1:186. https://doi.org/10.1038/s41559-017-0186 , 2017). In particular, these investigations do not consider Intra-Country Established Alien Species, i.e., successful introductions that occur among regions within the same country. In continental countries such as Brazil, the USA and China, studies excluding Intra-Country Established Alien Species (IEAS) waste essential information. Here, we argue that researchers should also consider intra-country introductions when estimating and addressing the risks of alien introductions. By using detailed data for freshwater fish including IEAS in large countries, we demonstrate that novel hotspots for IEAS have arisen worldwide. We illustrate emblematic examples of IEAS, as well as their vectors and negative impacts, to demonstrate the range of impacts that might be missed when excluding IEAS data from analysis. We recognize the need for generalizations, but generalizations based on incomplete data can misinform conservation efforts, particularly in megadiverse regions. Ignores IEAS influences how we count non-native species, invasions and perceive invisibility and impacts. Consequently, upcoming records and analysis of invasion patterns and management of aliens and EAS global hotspots must account for such biases in quantifying the IEAS portion.",2019-09-01 +30407594,"PANTHER version 14: more genomes, a new PANTHER GO-slim and improvements in enrichment analysis tools.","PANTHER (Protein Analysis Through Evolutionary Relationships, http://pantherdb.org) is a resource for the evolutionary and functional classification of genes from organisms across the tree of life. We report the improvements we have made to the resource during the past two years. For evolutionary classifications, we have added more prokaryotic and plant genomes to the phylogenetic gene trees, expanding the representation of gene evolution in these lineages. We have refined many protein family boundaries, and have aligned PANTHER with the MEROPS resource for protease and protease inhibitor families. For functional classifications, we have developed an entirely new PANTHER GO-slim, containing over four times as many Gene Ontology terms as our previous GO-slim, as well as curated associations of genes to these terms. Lastly, we have made substantial improvements to the enrichment analysis tools available on the PANTHER website: users can now analyze over 900 different genomes, using updated statistical tests with false discovery rate corrections for multiple testing. The overrepresentation test is also available as a web service, for easy addition to third-party sites.",2019-01-01 +32365218,The ecology of medical care in Switzerland: prevalence of illness in the community and healthcare utilisation in Switzerland.,"

Introduction

The allocation and equal distribution of healthcare resources is one of the major challenges of today. Therefore, a framework to analyse the prevalence of illness in the community and the use of various sources of healthcare is crucial. The aim of the study was to evaluate the health-seeking behaviour of 1025 individuals in Switzerland in a 2-month period in 2018.

Methods

Population-based, cross-sectional health survey with a multistage, stratified cluster design. The LINK Institute (Luzern, Switzerland, https://www.link.ch/) interviewed a representative sample of the adult Swiss population (age ≥18 years, stratified by language region: German-, French- and Italian-speaking, 70, 25 and 5%, respectively) by telephone. There were two interview rounds to account for potential seasonal variations, in May (n = 506) and November 2018 (n = 516). The health-seeking behaviour of these individuals during the previous 2 months was analysed.

Results

In total, data of 1025 individuals were analysed: 51% females, median age 52 years (range 1885). During the preceding 2 months, per 1000 adults, 546 had at least one symptom, 184 reported several symptoms, 243 sought medical advice, 164 first contacted their general practitioner, 81 directly contacted a specialist in a private practice, 16 were self-admitted to an accident and emergency department, 17 firstly contacted a pharmacy and 6 contacted an alternative medicine healthcare provider. In total, 21 persons were admitted to a hospital, of whom 8 underwent surgical procedures, 18 were at first transferred to a regular ward and 3 required intensive care unit services. Because of their current health problem, 387 individuals took medication and 259 bought their medication themselves. The vast majority (95%) of subjects was registered with a general practitioner.

Conclusions

This study represented an attempt to map the healthcare utilisation of the Swiss population. These results may be useful for further delineation of healthcare policies and medical education to meet the demand and needs of people in Switzerland. They indicate that general practitioners are the most important healthcare resource in Switzerland. Compared with specialists, they provide twice as much health advice at less costs. To optimise the health care system in Switzerland, we suggest to allocate resources where they are most needed.",2020-05-04 +32631222,iPNHOT: a knowledge-based approach for identifying protein-nucleic acid interaction hot spots.,"BACKGROUND:The interaction between proteins and nucleic acids plays pivotal roles in various biological processes such as transcription, translation, and gene regulation. Hot spots are a small set of residues that contribute most to the binding affinity of a protein-nucleic acid interaction. Compared to the extensive studies of the hot spots on protein-protein interfaces, the hot spot residues within protein-nucleic acids interfaces remain less well-studied, in part because mutagenesis data for protein-nucleic acids interaction are not as abundant as that for protein-protein interactions. RESULTS:In this study, we built a new computational model, iPNHOT, to effectively predict hot spot residues on protein-nucleic acids interfaces. One training data set and an independent test set were collected from dbAMEPNI and some recent literature, respectively. To build our model, we generated 97 different sequential and structural features and used a two-step strategy to select the relevant features. The final model was built based only on 7 features using a support vector machine (SVM). The features include two unique features such as ∆SASsa1/2 and esp3, which are newly proposed in this study. Based on the cross validation results, our model gave F1 score and AUROC as 0.725 and 0.807 on the subset collected from ProNIT, respectively, compared to 0.407 and 0.670 of mCSM-NA, a state-of-the art model to predict the thermodynamic effects of protein-nucleic acid interaction. The iPNHOT model was further tested on the independent test set, which showed that our model outperformed other methods. CONCLUSION:In this study, by collecting data from a recently published database dbAMEPNI, we proposed a new model, iPNHOT, to predict hotspots on both protein-DNA and protein-RNA interfaces. The results show that our model outperforms the existing state-of-art models. Our model is available for users through a webserver: http://zhulab.ahu.edu.cn/iPNHOT/ .",2020-07-06 +31553718,Accurate genome-wide predictions of spatio-temporal gene expression during embryonic development.,"Comprehensive information on the timing and location of gene expression is fundamental to our understanding of embryonic development and tissue formation. While high-throughput in situ hybridization projects provide invaluable information about developmental gene expression patterns for model organisms like Drosophila, the output of these experiments is primarily qualitative, and a high proportion of protein coding genes and most non-coding genes lack any annotation. Accurate data-centric predictions of spatio-temporal gene expression will therefore complement current in situ hybridization efforts. Here, we applied a machine learning approach by training models on all public gene expression and chromatin data, even from whole-organism experiments, to provide genome-wide, quantitative spatio-temporal predictions for all genes. We developed structured in silico nano-dissection, a computational approach that predicts gene expression in >200 tissue-developmental stages. The algorithm integrates expression signals from a compendium of 6,378 genome-wide expression and chromatin profiling experiments in a cell lineage-aware fashion. We systematically evaluated our performance via cross-validation and experimentally confirmed 22 new predictions for four different embryonic tissues. The model also predicts complex, multi-tissue expression and developmental regulation with high accuracy. We further show the potential of applying these genome-wide predictions to extract tissue specificity signals from non-tissue-dissected experiments, and to prioritize tissues and stages for disease modeling. This resource, together with the exploratory tools are freely available at our webserver http://find.princeton.edu, which provides a valuable tool for a range of applications, from predicting spatio-temporal expression patterns to recognizing tissue signatures from differential gene expression profiles.",2019-09-25 +32128395,Stitching the synapse: Cross-linking mass spectrometry into resolving synaptic protein interactions.,"Synaptic transmission is the predominant form of communication in the brain. It requires functionally specialized molecular machineries constituted by thousands of interacting synaptic proteins. Here, we made use of recent advances in cross-linking mass spectrometry (XL-MS) in combination with biochemical and computational approaches to reveal the architecture and assembly of synaptic protein complexes from mouse brain hippocampus and cerebellum. We obtained 11,999 unique lysine-lysine cross-links, comprising connections within and between 2362 proteins. This extensive collection was the basis to identify novel protein partners, to model protein conformational dynamics, and to delineate within and between protein interactions of main synaptic constituents, such as Camk2, the AMPA-type glutamate receptor, and associated proteins. Using XL-MS, we generated a protein interaction resource that we made easily accessible via a web-based platform (http://xlink.cncr.nl) to provide new entries into exploration of all protein interactions identified.",2020-02-19 +27789702,Cistrome Data Browser: a data portal for ChIP-Seq and chromatin accessibility data in human and mouse.,"Chromatin immunoprecipitation, DNase I hypersensitivity and transposase-accessibility assays combined with high-throughput sequencing enable the genome-wide study of chromatin dynamics, transcription factor binding and gene regulation. Although rapidly accumulating publicly available ChIP-seq, DNase-seq and ATAC-seq data are a valuable resource for the systematic investigation of gene regulation processes, a lack of standardized curation, quality control and analysis procedures have hindered extensive reuse of these data. To overcome this challenge, we built the Cistrome database, a collection of ChIP-seq and chromatin accessibility data (DNase-seq and ATAC-seq) published before January 1, 2016, including 13 366 human and 9953 mouse samples. All the data have been carefully curated and processed with a streamlined analysis pipeline and evaluated with comprehensive quality control metrics. We have also created a user-friendly web server for data query, exploration and visualization. The resulting Cistrome DB (Cistrome Data Browser), available online at http://cistrome.org/db, is expected to become a valuable resource for transcriptional and epigenetic regulation studies.",2016-10-26 +,Developing educational resources for population genetics in R: an open and collaborative approach,"The r computing and statistical language community has developed a myriad of resources for conducting population genetic analyses. However, resources for learning how to carry out population genetic analyses in r are scattered and often incomplete, which can make acquiring this skill unnecessarily difficult and time consuming. To address this gap, we developed an online community resource with guidance and working demonstrations for conducting population genetic analyses in r. The resource is freely available at http://popgen.nescent.org and includes material for both novices and advanced users of r for population genetics. To facilitate continued maintenance and growth of this resource, we developed a toolchain, process and conventions designed to (i) minimize financial and labour costs of upkeep; (ii) to provide a low barrier to contribution; and (iii) to ensure strong quality assurance. The toolchain includes automatic integration testing of every change and rebuilding of the website when new vignettes or edits are accepted. The process and conventions largely follow a common, distributed version control‐based contribution workflow, which is used to provide and manage open peer review by designated website editors. The online resources include detailed documentation of this process, including video tutorials. We invite the community of population geneticists working in r to contribute to this resource, whether for a new use case of their own, or as one of the vignettes from the ‘wish list’ we maintain, or by improving existing vignettes.",2017-01-01 +34571026,Global Incidence of Acute Pancreatitis Is Increasing Over Time: A Systematic Review and Meta-Analysis.,"

Background & aims

Acute pancreatitis is a common disease with significant associated morbidity and mortality. We performed a systematic review and meta-analysis of population-based studies to explore the changing temporal trends of acute pancreatitis incidence globally.

Methods

We performed a systematic literature search to identify population-based studies reporting the annual incidence of acute pancreatitis. Abstracts were assessed independently to identify applicable articles for full-text review and data extraction. Joinpoint temporal trend analyses were performed to calculate the average annual percent change (AAPC) with 95% confidence intervals (CIs). The AAPCs were pooled in a meta-analysis to capture the overall and regional trends in acute pancreatitis incidence over time. Temporal data were summarized in a static map and an interactive, web-based map.

Results

Forty-four studies reported the temporal incidence of acute pancreatitis (online interactive map: https://kaplan-acute-pancreatitis-ucalgary.hub.arcgis.com/). The incidence of acute pancreatitis has increased from 1961 to 2016 (AAPC, 3.07%; 95% CI, 2.30% to 3.84%; n = 34). Increasing incidence was observed in North America (AAPC, 3.67%; 95% CI, 2.76% to 4.57%; n = 4) and Europe (AAPC, 2.77%; 95% CI, 1.91% to 3.63%; n = 23). The incidence of acute pancreatitis was stable in Asia (AAPC, -0.28%; 95% CI, -5.03% to 4.47%; n = 4).

Conclusions

This meta-analysis provides a comprehensive overview of the global incidence of acute pancreatitis over the last 56 years and demonstrates a steadily rising incidence over time in most countries of the Western world. More studies are needed to better define the changing incidence of acute pancreatitis in Asia, Africa, and Latin America.",2021-09-25 +28502574,DINeR: Database for Insect Neuropeptide Research.,"Neuropeptides are responsible for regulating a variety of functions, including development, metabolism, water and ion homeostasis, and as neuromodulators in circuits of the central nervous system. Numerous neuropeptides have been identified and characterized. However, both discovery and functional characterization of neuropeptides across the massive Class Insecta has been sporadic. To leverage advances in post-genomic technologies for this rapidly growing field, insect neuroendocrinology requires a consolidated, comprehensive and standardised resource for managing neuropeptide information. The Database for Insect Neuropeptide Research (DINeR) is a web-based database-application used for search and retrieval of neuropeptide information of various insect species detailing their isoform sequences, physiological functionality and images of their receptor-binding sites, in an intuitive, accessible and user-friendly format. The curated data includes representatives of 50 well described neuropeptide families from over 400 different insect species. Approximately 4700 FASTA formatted, neuropeptide isoform amino acid sequences and over 200 records of physiological functionality have been recorded based on published literature. Also available are images of neuropeptide receptor locations. In addition, the data include comprehensive summaries for each neuropeptide family, including their function, location, known functionality, as well as cladograms, sequence alignments and logos covering most insect orders. Moreover, we have adopted a standardised nomenclature to address inconsistent classification of neuropeptides. As part of the H2020 nEUROSTRESSPEP project, the data will be actively maintained and curated, ensuring a comprehensive and standardised resource for the scientific community. DINeR is publicly available at the project website: http://www.neurostresspep.eu/diner/.",2017-05-11 +34809521,Selective precipitation of RNA with linear polyacrylamide.,"Selective precipitation of RNA is often used in molecular biology as one of the methods for separation of nucleic acids to obtain samples enriched with DNA or RNA molecules alone or for purification of RNA samples. In the present study a simple and fast approach for selective precipitation of RNA with linear polyacrylamide is proposed for the first time. The method is based on the different predispositions of the DNA and RNA molecules to bind with the polyacrylamide. In this process, the linear polyacrylamide is used as the flocculant, collecting RNA particles to form aggregate, which then precipitated at low alcohol concentration. During and after precipitation the temperature is adjusted to maintain high solubility of DNA and other contaminates at given pH, salt and alcohol concentrations on the one hand, and globular state of polyacrylamide, preventing solubility of the RNA-LPA aggregate, on the other hand. The precipitated RNA can be used directly for RT-qPCR assay. The principal advantage of the present approach is the fast and quantitative precipitation of most RNA species from very dilute solutions. This makes it possible to obtain both almost DNA-free RNA and RNA-free DNA samples in one process.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.2007397 .",2021-11-22 +32527280,DDIEM: drug database for inborn errors of metabolism.,"

Background

Inborn errors of metabolism (IEM) represent a subclass of rare inherited diseases caused by a wide range of defects in metabolic enzymes or their regulation. Of over a thousand characterized IEMs, only about half are understood at the molecular level, and overall the development of treatment and management strategies has proved challenging. An overview of the changing landscape of therapeutic approaches is helpful in assessing strategic patterns in the approach to therapy, but the information is scattered throughout the literature and public data resources.

Results

We gathered data on therapeutic strategies for 300 diseases into the Drug Database for Inborn Errors of Metabolism (DDIEM). Therapeutic approaches, including both successful and ineffective treatments, were manually classified by their mechanisms of action using a new ontology.

Conclusions

We present a manually curated, ontologically formalized knowledgebase of drugs, therapeutic procedures, and mitigated phenotypes. DDIEM is freely available through a web interface and for download at http://ddiem.phenomebrowser.net.",2020-06-11 +35047875,Structure-Based Modeling of SARS-CoV-2 Peptide/HLA-A02 Antigens.,"SARS-CoV-2-specific CD4 and CD8 T cells have been shown to be present in individuals with acute, mild, and asymptomatic Coronavirus disease (COVID-19). Toward the development of diagnostic and therapeutic tools to fight COVID-19, it is important to predict and characterize T cell epitopes expressed by SARS-CoV-2. Here, we use RosettaMHC, a comparative modeling approach which leverages existing structures of peptide/MHC complexes available in the Protein Data Bank, to derive accurate 3D models for putative SARS-CoV-2 CD8 epitopes. We outline an application of our method to model 8-10 residue epitopic peptides predicted to bind to the common allele HLA-A*02:01, and we make our models publicly available through an online database (https://rosettamhc.chemistry.ucsc.edu). We further compare electrostatic surfaces with models of homologous peptide/HLA-A*02:01 complexes from human common cold coronavirus strains to identify epitopes which may be recognized by a shared pool of cross-reactive TCRs. As more detailed studies on antigen-specific T cell recognition become available, RosettaMHC models can be used to understand the link between peptide/HLA complex structure and surface chemistry with immunogenicity, in the context of SARS-CoV-2 infection.",2020-11-17 +,Cloning and expression analysis of three critical triterpenoid pathway genes in Osmanthus fragrans,"Osmanthus fragrans is an important ornamental tree and has been widely planted in China because of its pleasant aroma, which is mainly due to terpenes. The monoterpenoid and sesquiterpenoid metabolic pathways of sweet osmanthus have been well studied. However, these studies were mainly focused on volatile small molecule compounds. The molecular regulation mechanism of synthesis of large molecule compounds (triterpenoids) remains unclear. Squalene synthase (SQS), squalene epoxidase (SQE), and beta-amyrin synthase (BETA-AS) are three critical enzymes of the triterpenoid biosynthesis pathway.In this study, the full-length cDNA and gDNA sequences of OfSQS, OfSQE, and OfBETA-AS were isolated from sweet osmanthus. Phylogenetic analysis suggested that OfSQS and OfSQE had the closest relationship with Sesamum indicum, and OfBETA-AS sequence shared the highest similarity of 99% with that of Olea europaea. The qRT-PCR analysis revealed that the three genes were highly expressed in flowers, especially OfSQE and OfBETA-AS, which were predominantly expressed in the flowers of both “Boye” and “Rixiang” cultivars, suggesting that they might play important roles in the accumulation of triterpenoids in flowers of O. fragrans. Furthermore, the expression of OfBETA-AS in the two cultivars was significantly different during all the five flowering stages; this suggested that OfBETA-AS may be the critical gene for the differences in the accumulation of triterpenoids.The evidence indicates that OfBETA-AS could be the key gene in the triterpenoid synthesis pathway, and it could also be used as a critical gene resource in the synthesis of essential oils by using bioengineered bacteria.How to cite: Yang X, Ding W, Yue Y, et al. Cloning and expression analysis of three critical triterpenoids pathway genes in Osmanthus fragrans. Electron J Biotechnol 2018;36. https://doi.org/10.1016/j.ejbt.2018.08.007.",2018-11-01 +33002112,EnzyMine: a comprehensive database for enzyme function annotation with enzymatic reaction chemical feature. ,"Addition of chemical structural information in enzymatic reactions has proven to be significant for accurate enzyme function prediction. However, such chemical data lack systematic feature mining and hardly exist in enzyme-related databases. Therefore, global mining of enzymatic reactions will offer a unique landscape for researchers to understand the basic functional mechanisms of natural bioprocesses and facilitate enzyme function annotation. Here, we established a new knowledge base called EnzyMine, through which we propose to elucidate enzymatic reaction features and then link them with sequence and structural annotations. EnzyMine represents an advanced database that extends enzyme knowledge by incorporating reaction chemical feature strategies, strengthening the connectivity between enzyme and metabolic reactions. Therefore, it has the potential to reveal many new metabolic pathways involved with given enzymes, as well as expand enzyme function annotation. Database URL: http://www.rxnfinder.org/enzymine/.",2020-10-01 +33970215,Differential gene expression analysis for multi-subject single cell RNA sequencing studies with aggregateBioVar. ,"Single cell RNA sequencing (scRNA-seq) studies provide more granular biological information than bulk RNA sequencing, but bulk RNA sequencing has remained popular due to relatively lower costs per sample, which has allowed investigators to process more biological replicates and design more powerful studies. As scRNA-seq costs have decreased, collecting data from more than one biological replicate has become more feasible, but careful modeling of different layers of biological variation remains challenging for many users. Here, we propose a statistical model for scRNA-seq gene counts, describe a simple method for estimating model parameters, and show that failing to account for additional biological variation in scRNA-seq studies can inflate false discovery rates of statistical tests. In a simulation study, we show that when the gene expression distribution of a population of cells varies between subjects, a naïve approach to differential expression analysis will inflate the false discovery rate. We also compare multiple differential expression testing methods on scRNA-seq data sets from human samples and from animal models. These analyses suggest that a naïve approach to differential expression testing could lead to many false discoveries; in contrast, an approach based on pseudobulk counts has better false discovery rate control. A software package, aggregateBioVar, is freely available on Bioconductor (https://www.bioconductor.org/packages/release/bioc/html/aggregateBioVar.html) to accommodate compatibility with upstream and downstream methods in scRNA-seq data analysis pipelines. Raw gene-by-cell count matrices for pig scRNA-seq data are available as GEO accession GSE150211. Supplementary data are available at Bioinformatics online.",2021-05-10 +34724641,Machine learning-augmented objective functional testing in the degenerative spine: quantifying impairment using patient-specific five-repetition sit-to-stand assessment.,"

Objective

What is considered ""abnormal"" in clinical testing is typically defined by simple thresholds derived from normative data. For instance, when testing using the five-repetition sit-to-stand (5R-STS) test, the upper limit of normal (ULN) from a population of spine-healthy volunteers (10.5 seconds) is used to identify objective functional impairment (OFI), but this fails to consider different properties of individuals (e.g., taller and shorter, older and younger). Therefore, the authors developed a personalized testing strategy to quantify patient-specific OFI using machine learning.

Methods

Patients with disc herniation, spinal stenosis, spondylolisthesis, or discogenic chronic low-back pain and a population of spine-healthy volunteers, from two prospective studies, were included. A machine learning model was trained on normative data to predict personalized ""expected"" test times and their confidence intervals and ULNs (99th percentiles) based on simple demographics. OFI was defined as a test time greater than the personalized ULN. OFI was categorized into types 1 to 3 based on a clustering algorithm. A web app was developed to deploy the model clinically.

Results

Overall, 288 patients and 129 spine-healthy individuals were included. The model predicted ""expected"" test times with a mean absolute error of 1.18 (95% CI 1.13-1.21) seconds and R2 of 0.37 (95% CI 0.34-0.41). Based on the implemented personalized testing strategy, 191 patients (66.3%) exhibited OFI. Type 1, 2, and 3 impairments were seen in 64 (33.5%), 91 (47.6%), and 36 (18.8%) patients, respectively. Increasing detected levels of OFI were associated with statistically significant increases in subjective functional impairment, extreme anxiety and depression symptoms, being bedridden, extreme pain or discomfort, inability to carry out activities of daily living, and a limited ability to work.

Conclusions

In the era of ""precision medicine,"" simple population-based thresholds may eventually not be adequate to monitor quality and safety in neurosurgery. Individualized assessment integrating machine learning techniques provides more detailed and objective clinical assessment. The personalized testing strategy demonstrated concurrent validity with quality-of-life measures, and the freely accessible web app (https://neurosurgery.shinyapps.io/5RSTS/) enabled clinical application.",2021-11-01 +33067612,A novel estimator of the interaction matrix in Graphical Gaussian Model of omics data using the entropy of non-equilibrium systems.,"

Motivation

Inferring the direct relationships between biomolecules from omics datasets is essential for the understanding of biological and disease mechanisms. Gaussian Graphical Model (GGM) provides a fairly simple and accurate representation of these interactions. However, estimation of the associated interaction matrix using data is challenging due to a high number of measured molecules and a low number of samples.

Results

In this article, we use the thermodynamic entropy of the non-equilibrium system of molecules and the data-driven constraints among their expressions to derive an analytic formula for the interaction matrix of Gaussian models. Through a data simulation, we show that our method returns an improved estimation of the interaction matrix. Also, using the developed method, we estimate the interaction matrix associated with plasma proteome and construct the corresponding GGM and show that known NAFLD-related proteins like ADIPOQ, APOC, APOE, DPP4, CAT, GC, HP, CETP, SERPINA1, COLA1, PIGR, IGHD, SAA1 and FCGBP are among the top 15% most interacting proteins of the dataset.

Availability and implementation

The supplementary materials can be found in the following URL: http://dynamic-proteome.utmb.edu/PrecisionMatrixEstimater/PrecisionMatrixEstimater.aspx.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +34292731,Nanoscale Solid-Phase Isobaric Labeling for Multiplexed Quantitative Phosphoproteomics.,"We established a workflow for highly sensitive multiplexed quantitative phosphoproteomics using a nanoscale solid-phase tandem mass tag (TMT) labeling reactor. Phosphopeptides were first enriched by titanium oxide chromatography and then labeled with isobaric TMT reagents in a StageTip packed with hydrophobic polymer-based sorbents. We found that TMT-labeled singly phosphorylated peptides tend to flow through the titanium oxide column. Therefore, TMT labeling should be performed after the enrichment step from tryptic peptides, resulting in the need for microscale reactions with small amounts of phosphopeptides. Using an optimized protocol for tens to hundreds of nanograms of phosphopeptides, we obtained a nearly 10-fold increase in sensitivity compared to the conventional solution-based TMT protocol. We demonstrate that this nanoscale phosphoproteomics protocol works for 50 μg of HeLa proteins treated with selumetinib, and we successfully quantified the selumetinib-regulated phosphorylated sites on a proteome scale. The MS raw data files have been deposited with the ProteomeXchange Consortium via the jPOST partner repository (https://jpostdb.org) with the data set identifier PXD025536.",2021-07-22 +33642266,Sleep and Covid-19.,"

Background

COVID-19 pandemic has affected the world globally causing widespread repercussions on individuals' physical, mental and emotional well-being. In such times, sleep is likely to be affected.

Objective

The aim of this study was to present the available literature on sleep and also the foresight as to the future national strategy to mitigate the effects of this pandemic.

Materials and methods

An extensive literature search on PubMed, Google Scholar, Epistemonikos database (https://www.epistemonikos.org), PsycINFO for available literature on the prevalence of sleep problem on COVID-19 was done. Cross-citation search was also conducted to increase relevance of the review. The key words used were- (((((((((((insomnia)) OR (sleep)) OR (sleepiness)) OR (""sleep quality"")) OR (OSA)) OR (""obstructive sleep apnoea"")) OR (""obstructive sleep apnea"")) OR ((""sleep problem"")) AND ""covid-19"" OR covid19* OR ""COVID-19"" OR ""2019-nCoV"" OR cv19* OR ""cv-19"" OR ""cv 19"" OR ""n-cov"" OR ncov* OR ""sars-cov-2"" OR ""sars-cov2"" OR ""2019-ncov"" OR ""SARS-Coronavirus-2"" OR ""SARS-Coronavirus2"" OR (wuhan* AND (virus OR viruses OR viral)) OR (covid* AND (virus OR viruses OR viral)) OR ""covid-19-related"" OR ""SARS-CoV-2-related"" OR ""SARS-CoV2-related"" OR ""2019-nCoV-related"" OR ""cv-19-related"" OR ""n-cov-related""). Inclusion criteria consisted of articles in English, published from Jan 2020 till 19 Apr 2020. Two reviewers independently screened each research study for inclusion and eligibility.

Results and conclusion

Sleep is affected during COVID-19 pandemic in patients, their families, health-care workers and their families, population in isolation, and quarantine and as such in public. Limited literature exists with subjective data and no objective criteria were found to study sleep in COVID-19 pandemic. OSA was found to be a frequent baseline characteristic of COVID-19 patients. A need to follow guidelines is of paramount importance and strategies to better sleep in the population needs to be addressed.",2021-01-01 +33523614,Direct Observational Therapy for the Treatment of Tuberculosis: A Review of Clinical Evidence and Guidelines,"Tuberculosis (TB) is an infectious disease caused by the bacteria Mycobacterium tuberculosis. It is transmitted between humans primarily through aerosols that are generated through the forceful expiration of air (e.g., coughing, sneezing). Infection with M. tuberculosis does not always result in active TB disease, producing instead an asymptomatic latent TB infection. People with latent TB cannot spread the disease to others, but they can develop active TB disease. Symptoms of active TB disease include a bad cough, fever, and weight loss. The current treatment for active and latent TB involve long courses of antibiotic treatments, which often include more than one drug., Incomplete treatment adherence is a major challenge of TB treatment, and failing to complete the treatment can result in persistent disease or the development of drug-resistant TB. One of the strategies for improving adherence is directly observed or direct observational therapy (DOT).– Standard DOT is conducted in person and involves directly watching the patient swallow each dose of medication. Compared to self-administered therapy, DOT has been shown to be effective, however, it is very resource intensive for both the patient and the health care service. It is unclear who should provide DOT, and whether this person needs to be a health care professional (e.g., public health nurse) or whether lay people can also provide DOT (e.g., family, community members). It is also unclear whether the location where DOT is administered is important. DOT could involve the patients returning to a health care facility every day (e.g., TB clinic, hospital), but it is also possible that DOT can occur at other locations (e.g., workplace, home). Alternatively, thanks to advances in technology, video observational therapy (VOT) is possible, where patients are observed taking their medication over video (often facilitated through a smart phone). VOT can occur in real time (i.e., synchronous VOT), or patients can record and submit videos (i.e., asynchronous VOT). VOT could help minimize resources for providing DOT, but there are some privacy concerns with VOT due to the technology. The purpose of the current report is to summarize and critically appraise the relevant evidence regarding the provision of DOT for the treatment of TB. Additionally, evidence-based guidelines with recommendations regarding the use of DOT for the treatment of TB will be reviewed. This report is a component of a larger CADTH Condition Level Review on TB. A condition level review is an assessment that incorporates all aspects of a condition, from prevention, detection, treatment, and management. For more information on CADTH’s Condition Level Review of TB, please visit the project page (https://www.cadth.ca/tuberculosis).",2021-02-02 +31701143,The European Bioinformatics Institute in 2020: building a global infrastructure of interconnected data resources for the life sciences.,"Data resources at the European Bioinformatics Institute (EMBL-EBI, https://www.ebi.ac.uk/) archive, organize and provide added-value analysis of research data produced around the world. This year's update for EMBL-EBI focuses on data exchanges among resources, both within the institute and with a wider global infrastructure. Within EMBL-EBI, data resources exchange data through a rich network of data flows mediated by automated systems. This network ensures that users are served with as much information as possible from any search and any starting point within EMBL-EBI's websites. EMBL-EBI data resources also exchange data with hundreds of other data resources worldwide and collectively are a key component of a global infrastructure of interconnected life sciences data resources. We also describe the BioImage Archive, a deposition database for raw images derived from primary research that will supply data for future knowledgebases that will add value through curation of primary image data. We also report a new release of the PRIDE database with an improved technical infrastructure, a new API, a new webpage, and improved data exchange with UniProt and Expression Atlas. Training is a core mission of EMBL-EBI and in 2018 our training team served more users, both in-person and through web-based programmes, than ever before.",2020-01-01 +34938773,PreBINDS: An Interactive Web Tool to Create Appropriate Datasets for Predicting Compound-Protein Interactions.,"Given the abundant computational resources and the huge amount of data of compound-protein interactions (CPIs), constructing appropriate datasets for learning and evaluating prediction models for CPIs is not always easy. For this study, we have developed a web server to facilitate the development and evaluation of prediction models by providing an appropriate dataset according to the task. Our web server provides an environment and dataset that aid model developers and evaluators in obtaining a suitable dataset for both proteins and compounds, in addition to attributes necessary for deep learning. With the web server interface, users can customize the CPI dataset derived from ChEMBL by setting positive and negative thresholds to be adjusted according to the user's definitions. We have also implemented a function for graphic display of the distribution of activity values in the dataset as a histogram to set appropriate thresholds for positive and negative examples. These functions enable effective development and evaluation of models. Furthermore, users can prepare their task-specific datasets by selecting a set of target proteins based on various criteria such as Pfam families, ChEMBL's classification, and sequence similarities. The accuracy and efficiency of in silico screening and drug design using machine learning including deep learning can therefore be improved by facilitating access to an appropriate dataset prepared using our web server (https://binds.lifematics.work/).",2021-12-06 +27451428,SZDB: A Database for Schizophrenia Genetic Research.,"Schizophrenia (SZ) is a debilitating brain disorder with a complex genetic architecture. Genetic studies, especially recent genome-wide association studies (GWAS), have identified multiple variants (loci) conferring risk to SZ. However, how to efficiently extract meaningful biological information from bulk genetic findings of SZ remains a major challenge. There is a pressing need to integrate multiple layers of data from various sources, eg, genetic findings from GWAS, copy number variations (CNVs), association and linkage studies, gene expression, protein-protein interaction (PPI), co-expression, expression quantitative trait loci (eQTL), and Encyclopedia of DNA Elements (ENCODE) data, to provide a comprehensive resource to facilitate the translation of genetic findings into SZ molecular diagnosis and mechanism study. Here we developed the SZDB database (http://www.szdb.org/), a comprehensive resource for SZ research. SZ genetic data, gene expression data, network-based data, brain eQTL data, and SNP function annotation information were systematically extracted, curated and deposited in SZDB. In-depth analyses and systematic integration were performed to identify top prioritized SZ genes and enriched pathways. Multiple types of data from various layers of SZ research were systematically integrated and deposited in SZDB. In-depth data analyses and integration identified top prioritized SZ genes and enriched pathways. We further showed that genes implicated in SZ are highly co-expressed in human brain and proteins encoded by the prioritized SZ risk genes are significantly interacted. The user-friendly SZDB provides high-confidence candidate variants and genes for further functional characterization. More important, SZDB provides convenient online tools for data search and browse, data integration, and customized data analyses.",2017-03-01 +33238004,MetaTropismDB: a database of organ-specific metastasis induced by human cancer cell lines in mouse models. ,"The organotropism is the propensity of metastatic cancer cells to colonize preferably certain distant organs, resulting in a non-random distribution of metastases. In order to shed light on this behaviour, several studies were performed by the injection of human cancer cell lines into immunocompromised mouse models. However, the information about these experiments is spread in the literature. For each xenograft experiment reported in the literature, we annotated both the experimental conditions and outcomes, including details on inoculated human cell lines, mouse models, injection methods, sites of metastasis, organs not colonized, rate of metastasis, latency time, overall survival and the involved genes. We created MetaTropismDB, a freely available database collecting hand-curated data useful to highlight the mechanisms of organ-specific metastasis. Currently, it stores the results of 513 experiments in which injections of 219 human cell lines have been carried out in mouse models. Notably, 296 genes involved in organotropic metastases have been collected. This specialized database allows the researchers to compare the current results about organotropism and plan future experiments in order to identify which tumour molecular signatures establish if and where the metastasis will develop. Database URL:  http://www.introni.it/Metastasis/metastasis.html.",2020-11-01 +34511303,"European Association of Urology Guidelines on Non-muscle-invasive Bladder Cancer (Ta, T1, and Carcinoma in Situ).","

Context

The European Association of Urology (EAU) has released an updated version of the guidelines on non-muscle-invasive bladder cancer (NMIBC).

Objective

To present the 2021 EAU guidelines on NMIBC.

Evidence acquisition

A broad and comprehensive scoping exercise covering all areas of the NMIBC guidelines since the 2020 version was performed. Databases covered by the search included Medline, EMBASE, and the Cochrane Libraries. Previous guidelines were updated, and the level of evidence and grade of recommendation were assigned.

Evidence synthesis

Tumours staged as Ta, T1 and carcinoma in situ (CIS) are grouped under the heading of NMIBC. Diagnosis depends on cystoscopy and histological evaluation of tissue obtained via transurethral resection of the bladder (TURB) for papillary tumours or via multiple bladder biopsies for CIS. For papillary lesions, a complete TURB is essential for the patient's prognosis and correct diagnosis. In cases for which the initial resection is incomplete, there is no muscle in the specimen, or a T1 tumour is detected, a second TURB should be performed within 2-6 wk. The risk of progression may be estimated for individual patients using the 2021 EAU scoring model. On the basis of their individual risk of progression, patients are stratified as having low, intermediate, high, or very high risk, which is pivotal to recommending adjuvant treatment. For patients with tumours presumed to be at low risk and for small papillary recurrences detected more than 1 yr after a previous TURB, one immediate chemotherapy instillation is recommended. Patients with an intermediate-risk tumour should receive 1 yr of full-dose intravesical bacillus Calmette-Guérin (BCG) immunotherapy or instillations of chemotherapy for a maximum of 1 yr. For patients with high-risk tumours, full-dose intravesical BCG for 1-3 yr is indicated. For patients at very high risk of tumour progression, immediate radical cystectomy should be considered. Cystectomy is also recommended for BCG-unresponsive tumours. The extended version of the guidelines is available on the EAU website at https://uroweb.org/guideline/non-muscle-invasive-bladder-cancer/.

Conclusions

These abridged EAU guidelines present updated information on the diagnosis and treatment of NMIBC for incorporation into clinical practice.

Patient summary

The European Association of Urology has released updated guidelines on the classification, risk factors, diagnosis, prognostic factors, and treatment of non-muscle-invasive bladder cancer. The recommendations are based on the literature up to 2020, with emphasis on the highest level of evidence. Classification of patients as having low, intermediate, or and high risk is essential in deciding on suitable treatment. Surgical removal of the bladder should be considered for tumours that do not respond to bacillus Calmette-Guérin (BCG) treatment and tumours with the highest risk of progression.",2021-09-10 +31873725,Causal network perturbations for instance-specific analysis of single cell and disease samples.,"

Motivation

Complex diseases involve perturbation in multiple pathways and a major challenge in clinical genomics is characterizing pathway perturbations in individual samples. This can lead to patient-specific identification of the underlying mechanism of disease thereby improving diagnosis and personalizing treatment. Existing methods rely on external databases to quantify pathway activity scores. This ignores the data dependencies and that pathways are incomplete or condition-specific.

Results

ssNPA is a new approach for subtyping samples based on deregulation of their gene networks. ssNPA learns a causal graph directly from control data. Sample-specific network neighborhood deregulation is quantified via the error incurred in predicting the expression of each gene from its Markov blanket. We evaluate the performance of ssNPA on liver development single-cell RNA-seq data, where the correct cell timing is recovered; and two TCGA datasets, where ssNPA patient clusters have significant survival differences. In all analyses ssNPA consistently outperforms alternative methods, highlighting the advantage of network-based approaches.

Availability and implementation

http://www.benoslab.pitt.edu/Software/ssnpa/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +33798715,COnVIDa: COVID-19 multidisciplinary data collection and dashboard.,"Since the first reported case in Wuhan in late 2019, COVID-19 has rapidly spread worldwide, dramatically impacting the lives of millions of citizens. To deal with the severe crisis resulting from the pandemic, worldwide institutions have been forced to make decisions that profoundly affect the socio-economic realm. In this sense, researchers from diverse knowledge areas are investigating the behavior of the disease in a rush against time. In both cases, the lack of reliable data has been an obstacle to carry out such tasks with accuracy. To tackle this challenge, COnVIDa (https://convida.inf.um.es) has been designed and developed as a user-friendly tool that easily gathers rigorous multidisciplinary data related to the COVID-19 pandemic from different data sources. In particular, the pandemic expansion is analyzed with variables of health nature, but also social ones, mobility, etc. Besides, COnVIDa permits to smoothly join such data, compare and download them for further analysis. Due to the open-science nature of the project, COnVIDa is easily extensible to any other region of the planet. In this way, COnVIDa becomes a data facilitator for decision-making processes, as well as a catalyst for new scientific researches related to this pandemic.",2021-03-30 +32325033,BraInMap Elucidates the Macromolecular Connectivity Landscape of Mammalian Brain.,"Connectivity webs mediate the unique biology of the mammalian brain. Yet, while cell circuit maps are increasingly available, knowledge of their underlying molecular networks remains limited. Here, we applied multi-dimensional biochemical fractionation with mass spectrometry and machine learning to survey endogenous macromolecules across the adult mouse brain. We defined a global ""interactome"" comprising over one thousand multi-protein complexes. These include hundreds of brain-selective assemblies that have distinct physical and functional attributes, show regional and cell-type specificity, and have links to core neurological processes and disorders. Using reciprocal pull-downs and a transgenic model, we validated a putative 28-member RNA-binding protein complex associated with amyotrophic lateral sclerosis, suggesting a coordinated function in alternative splicing in disease progression. This brain interaction map (BraInMap) resource facilitates mechanistic exploration of the unique molecular machinery driving core cellular processes of the central nervous system. It is publicly available and can be explored here https://www.bu.edu/dbin/cnsb/mousebrain/.",2020-04-01 +30032758,"A review on human fecal metabolomics: Methods, applications and the human fecal metabolome database.","Metabolomic analysis of human biospecimens had progressed quickly over the past decade. Technological and methodological advances have led to the comprehensive characterization of human serum, urine, cerebrospinal fluid and saliva metabolomes, and the creation of freely available metabolome reference databases. Unfortunately, the characterization of the human fecal metabolome still lags behind these other metabolomes in terms of the availability of standardized methods and freely available resources. The purpose of this review is to bring the knowledge of the human fecal metabolome, and the methods to characterize it, to the same level as most other human biofluid metabolomes. More specifically, this review is intended to critically assess the field of fecal metabolomics and to provide a comprehensive review of the current state of knowledge with regard to the protocols, technologies and remaining challenges in fecal metabolite analysis. In addition to providing an overview of fecal metabolomics and some consensus recommendations, we also present the human fecal metabolome database (HFMDB - http://www.fecalmetabolome.ca), a freely available, manually curated resource that currently contains over 6000 identified human fecal metabolites. Each entry in the HFMDB includes extensive chemical information, metabolite descriptions and reference data in the same format as the Human Metabolome Database (HMDB).",2018-05-12 +34086846,TIGA: Target illumination GWAS analytics. ,"Genome wide association studies (GWAS) can reveal important genotype-phenotype associations, however, data quality and interpretability issues must be addressed. For drug discovery scientists seeking to prioritize targets based on the available evidence, these issues go beyond the single study. Here, we describe rational ranking, filtering and interpretation of inferred gene-trait associations and data aggregation across studies by leveraging existing curation and harmonization efforts. Each gene-trait association is evaluated for confidence, with scores derived solely from aggregated statistics, linking a protein-coding gene and phenotype. We propose a method for assessing confidence in gene-trait associations from evidence aggregated across studies, including a bibliometric assessment of scientific consensus based on the iCite Relative Citation Ratio, and meanRank scores, to aggregate multivariate evidence. This method, intended for drug target hypothesis generation, scoring and ranking, has been implemented as an analytical pipeline, available as open source, with public datasets of results, and a web application designed for usability by drug discovery scientists. Web application, datasets, and source code via: https://unmtid-shinyapps.net/tiga/. Supplementary data are available at Bioinformatics online.",2021-06-04 +32636234,Examining and Fine-tuning the Selection of Glycan Compositions with GlyConnect Compozitor.,"A key point in achieving accurate intact glycopeptide identification is the definition of the glycan composition file that is used to match experimental with theoretical masses by a glycoproteomics search engine. At present, these files are mainly built from searching the literature and/or querying data sources focused on posttranslational modifications. Most glycoproteomics search engines include a default composition file that is readily used when processing MS data. We introduce here a glycan composition visualizing and comparative tool associated with the GlyConnect database and called GlyConnect Compozitor. It offers a web interface through which the database can be queried to bring out contextual information relative to a set of glycan compositions. The tool takes advantage of compositions being related to one another through shared monosaccharide counts and outputs interactive graphs summarizing information searched in the database. These results provide a guide for selecting or deselecting compositions in a file in order to reflect the context of a study as closely as possible. They also confirm the consistency of a set of compositions based on the content of the GlyConnect database. As part of the tool collection of the Glycomics@ExPASy initiative, Compozitor is hosted at https://glyconnect.expasy.org/compozitor/ where it can be run as a web application. It is also directly accessible from the GlyConnect database.",2020-07-07 +34412448,Data resource profile: the Korea National Hospital Discharge In-depth Injury Survey.,"The Korea National Hospital Discharge In-depth Injury Survey (KNHDIS), which was started in 2005, is a national probability survey of general hospitals in Korea with 100 or more beds conducted by the Korea Disease Control and Prevention Agency (KDCA). The KNHDIS captures approximately 9% of discharged cases from sampled hospitals using a 2-stage stratified cluster sampling scheme, among which 13% are injury related cases, defined as S00-T98 (injury, poisoning, and certain other consequences of external causes) using International Classification of Diseases, 10th revision codes. The KNHDIS collects information on characteristics of injury-related discharges in order to understand the scale of injuries, identify risk factors, and provide data supporting prevention policies and intervention strategies. The types of data captured include the hospitals' information, detailed clinical information, and injury-related codes such as the mechanism, activities undertaken when injured (sports, leisure activities, work, treatment, and education), external causes of the injury, and location of the occurrence of the injury based on the International Classification of External Causes of Injuries. Furthermore, the means of transportation, risk factors for suicide, and toxic substances are recoreded. Annual reports of the KNHDIS are publicly accessible to browse via the KDCA website (http://www.kdca.go.kr) and microdata are available free of charge upon request via email (kcdcinjury@korea.kr).",2021-08-17 +32549744,"Biodiversity data and new species descriptions of polychaetes from offshore waters of the Falkland Islands, an area undergoing hydrocarbon exploration.","Benthic environmental impact assessments and monitoring programs accompanying offshore hydrocarbon industry activities result in large collections of benthic organisms. Such collections offer great potential for systematics, biodiversity and biogeography research, but these opportunities are only rarely realised. In recent decades, the hydrocarbon industry has started exploration activities in offshore waters off the Falkland Islands. A large collection of ca. 25,000 polychaete (Annelida) specimens, representing some 233 morphological species was processed at the Natural History Museum, London. Taxonomic assessment led to recognition of many polychaete species that are new to science. The existing taxonomic literature for the region is outdated and many species in existing literature are likely misidentifications. Initially, an online taxonomic guide (http://falklands.myspecies.info) was created, to provide a single taxonomic source for 191 polychaete species to standardise identification across different environmental contractors working in Falkland Islands. Here, this effort is continued to make data available for 18,015 specimens through publication of raw biodiversity data, checklist with links to online taxonomic information and formal descriptions of five new species. New species were chosen across different families to highlight the taxonomic novelty of this area: Apistobranchus jasoni Neal & Paterson, sp. nov. (Apistobranchidae), Leitoscoloplos olei Neal & Paterson, sp. nov. (Orbiniidae), Prosphaerosyllis modinouae Neal & Paterson, sp. nov. (Syllidae) and Aphelochaeta falklandica Paterson & Neal, sp. nov., and Dodecaceria saeria Paterson & Neal, sp. nov. (both Cirratulidae). The potential of the Falkland Islands material to provide up to date informationfor known species described in the literature is also highlighted by publishing images and redescription of Harmothoe anderssoni Bergström, 1916 and Aphelochaeta longisetosa (Hartmann-Schröder, 1965). Biodiversity and abundance data are made available through a DarwinCore database, including material collected from 83 stations at Sea Lion developmental oil field in North Falklands Basin and voucher specimens' data collected from exploratory oil wells in East Falklands Basin.",2020-06-03 +34674629,A consensus-based ensemble approach to improve transcriptome assembly.,"

Background

Systems-level analyses, such as differential gene expression analysis, co-expression analysis, and metabolic pathway reconstruction, depend on the accuracy of the transcriptome. Multiple tools exist to perform transcriptome assembly from RNAseq data. However, assembling high quality transcriptomes is still not a trivial problem. This is especially the case for non-model organisms where adequate reference genomes are often not available. Different methods produce different transcriptome models and there is no easy way to determine which are more accurate. Furthermore, having alternative-splicing events exacerbates such difficult assembly problems. While benchmarking transcriptome assemblies is critical, this is also not trivial due to the general lack of true reference transcriptomes.

Results

In this study, we first provide a pipeline to generate a set of the simulated benchmark transcriptome and corresponding RNAseq data. Using the simulated benchmarking datasets, we compared the performance of various transcriptome assembly approaches including both de novo and genome-guided methods. The results showed that the assembly performance deteriorates significantly when alternative transcripts (isoforms) exist or for genome-guided methods when the reference is not available from the same genome. To improve the transcriptome assembly performance, leveraging the overlapping predictions between different assemblies, we present a new consensus-based ensemble transcriptome assembly approach, ConSemble.

Conclusions

Without using a reference genome, ConSemble using four de novo assemblers achieved an accuracy up to twice as high as any de novo assemblers we compared. When a reference genome is available, ConSemble using four genome-guided assemblies removed many incorrectly assembled contigs with minimal impact on correctly assembled contigs, achieving higher precision and accuracy than individual genome-guided methods. Furthermore, ConSemble using de novo assemblers matched or exceeded the best performing genome-guided assemblers even when the transcriptomes included isoforms. We thus demonstrated that the ConSemble consensus strategy both for de novo and genome-guided assemblers can improve transcriptome assembly. The RNAseq simulation pipeline, the benchmark transcriptome datasets, and the script to perform the ConSemble assembly are all freely available from: http://bioinfolab.unl.edu/emlab/consemble/ .",2021-10-21 +34715773,Analytics and visualization tools to characterize single-cell stochasticity using bacterial single-cell movie cytometry data.,"

Background

Time-lapse microscopy live-cell imaging is essential for studying the evolution of bacterial communities at single-cell resolution. It allows capturing detailed information about the morphology, gene expression, and spatial characteristics of individual cells at every time instance of the imaging experiment. The image analysis of bacterial ""single-cell movies"" (videos) generates big data in the form of multidimensional time series of measured bacterial attributes. If properly analyzed, these datasets can help us decipher the bacterial communities' growth dynamics and identify the sources and potential functional role of intra- and inter-subpopulation heterogeneity. Recent research has highlighted the importance of investigating the role of biological ""noise"" in gene regulation, cell growth, cell division, etc. Single-cell analytics of complex single-cell movie datasets, capturing the interaction of multiple micro-colonies with thousands of cells, can shed light on essential phenomena for human health, such as the competition of pathogens and benign microbiome cells, the emergence of dormant cells (""persisters""), the formation of biofilms under different stress conditions, etc. However, highly accurate and automated bacterial bioimage analysis and single-cell analytics methods remain elusive, even though they are required before we can routinely exploit the plethora of data that single-cell movies generate.

Results

We present visualization and single-cell analytics using R (ViSCAR), a set of methods and corresponding functions, to visually explore and correlate single-cell attributes generated from the image processing of complex bacterial single-cell movies. They can be used to model and visualize the spatiotemporal evolution of attributes at different levels of the microbial community organization (i.e., cell population, colony, generation, etc.), to discover possible epigenetic information transfer across cell generations, infer mathematical and statistical models describing various stochastic phenomena (e.g., cell growth, cell division), and even identify and auto-correct errors introduced unavoidably during the bioimage analysis of a dense movie with thousands of overcrowded cells in the microscope's field of view.

Conclusions

ViSCAR empowers researchers to capture and characterize the stochasticity, uncover the mechanisms leading to cellular phenotypes of interest, and decipher a large heterogeneous microbial communities' dynamic behavior. ViSCAR source code is available from GitLab at https://gitlab.com/ManolakosLab/viscar .",2021-10-29 +30010730,RabGTD: a comprehensive database of rabbit genome and transcriptome. ,"The rabbit is a very important species for both biomedical research and agriculture animal breeding. They are not only the most-used experimental animals for the production of antibodies, but also widely used for studying a variety of human diseases. Here we developed RabGTD, the first comprehensive rabbit database containing both genome and transcriptome data generated by next-generation sequencing. Genomic variations coming from 79 samples were identified and annotated, including 33 samples of wild rabbits and 46 samples of domestic rabbits with diverse populations. Gene expression profiles of 86 tissue samples were complied, including those from the most commonly used models for hyperlipidemia and atherosclerosis. RabGTD is a web-based and open-access resource, which also provides convenient functions and friendly interfaces of searching, browsing and downloading for users to explore the big data.Database URL: http://www.picb.ac.cn/RabGTD/.",2018-01-01 +34126115,Generation of High-Quality Pharmacokinetic Data From Parallel Tail Vein Dosing And Bleeding in Non-cannulated Rats.,"It is common practice to use cannulated rats for pharmacokinetic (PK) in-life studies as it yields high quality PK parameter estimation. While offering many benefits, cannulation requires surgery, post-surgical care, and cannula maintenance. As an alternative approach, the strategy of dosing and bleeding rats via the tail vein in a single experiment is technically feasible and theoretically offers many benefits. Unfortunately, however, as reported by F Tse et al. in 1984 (J Pharm Sci 73: https://doi.org/10.1002/jps.2600731128), parallel tail dosing and bleeding is scientifically flawed and yields inaccurate estimation of PK parameters following intravenous administration. The underlying causality of poor data quality has not been addressed in over 35 years. To overcome the technical flaws associated with parallel tail dosing and bleeding, we have developed a Tail-Dose-Bleed (TDB) method as a substitute for use of cannulated rats. Specifically, the method introduces a flush procedure after dosing, uses separate tail veins for dosing and bleeding, and adjusts dosing and sampling to the proximal and distal portions of the tail, respectively. To demonstrate the proof of principle for this TDB technique, several cassette dosing studies were conducted. The performance of the TDB technique is compared in both stand alone and animal crossover studies employing conventional jugular/femoral bleeding and dosing. The poor data via tail dosing and bleeding previously described by Tse et al. are also recapitulated using their described approach. To ensure broad applicability of the TDB technique, data were generated utilizing compounds of diverse physical chemical properties manifesting a range of clearance and/or volume of distribution characteristics. These data demonstrate that the TDB approach yields comparable PK profiles and parameters as compared to conventional femoral dosing / jugular bleeding. Using this newly described TDB procedure, we demonstrate the ability to overcome documented data quality issues when dosing and bleeding via the tail. The TDB technique has numerous operational advantages of reduced study turnaround time and improved cost effectiveness, but most importantly, addresses key animal welfare concerns relevant to institutional animal care and use committees (IACUC). The notable advantage here is reduced animal stress and discomfort by eliminating the need for surgery and recovery. And by consequence, allows for animals to be group housed and re-used without concern for loss of cannula patency. The tail dose and bleed method is simple and appears readily transferable to other laboratories.",2021-06-12 +34496744,RPocket: an intuitive database of RNA pocket topology information with RNA-ligand data resources.,"

Background

RNA regulates a variety of biological functions by interacting with other molecules. The ligand often binds in the RNA pocket to trigger structural changes or functions. Thus, it is essential to explore and visualize the RNA pocket to elucidate the structural and recognition mechanism for the RNA-ligand complex formation.

Results

In this work, we developed one user-friendly bioinformatics tool, RPocket. This database provides geometrical size, centroid, shape, secondary structure element for RNA pocket, RNA-ligand interaction information, and functional sites. We extracted 240 RNA pockets from 94 non-redundant RNA-ligand complex structures. We developed RPDescriptor to calculate the pocket geometrical property quantitatively. The geometrical information was then subjected to RNA-ligand binding analysis by incorporating the sequence, secondary structure, and geometrical combinations. This new approach takes advantage of both the atom-level precision of the structure and the nucleotide-level tertiary interactions. The results show that the higher-level topological pattern indeed improves the tertiary structure prediction. We also proposed a potential mechanism for RNA-ligand complex formation. The electrostatic interactions are responsible for long-range recognition, while the Van der Waals and hydrophobic contacts for short-range binding and optimization. These interaction pairs can be considered as distance constraints to guide complex structural modeling and drug design.

Conclusion

RPocket database would facilitate RNA-ligand engineering to regulate the complex formation for biological or medical applications. RPocket is available at http://zhaoserver.com.cn/RPocket/RPocket.html .",2021-09-08 +32871004,"CaNDis: a web server for investigation of causal relationships between diseases, drugs and drug targets.","

Motivation

Causal biological interaction networks represent cellular regulatory pathways. Their fusion with other biological data enables insights into disease mechanisms and novel opportunities for drug discovery.

Results

We developed Causal Network of Diseases (CaNDis), a web server for the exploration of a human causal interaction network, which we expanded with data on diseases and FDA-approved drugs, on the basis of which we constructed a disease-disease network in which the links represent the similarity between diseases. We show how CaNDis can be used to identify candidate genes with known and novel roles in disease co-occurrence and drug-drug interactions.

Availabilityand implementation

CaNDis is freely available to academic users at http://candis.ijs.si and http://candis.insilab.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +33797898,pDeep3: Toward More Accurate Spectrum Prediction with Fast Few-Shot Learning.,"Spectrum prediction using deep learning has attracted a lot of attention in recent years. Although existing deep learning methods have dramatically increased the prediction accuracy, there is still considerable space for improvement, which is presently limited by the difference of fragmentation types or instrument settings. In this work, we use the few-shot learning method to fit the data online to make up for the shortcoming. The method is evaluated using ten data sets, where the instruments includes Velos, QE, Lumos, and Sciex, with collision energies being differently set. Experimental results show that few-shot learning can achieve higher prediction accuracy with almost negligible computing resources. For example, on the data set from a untrained instrument Sciex-6600, within about 10 s, the prediction accuracy is increased from 69.7% to 86.4%; on the CID (collision-induced dissociation) data set, the prediction accuracy of the model trained by HCD (higher energy collision dissociation) spectra is increased from 48.0% to 83.9%. It is also shown that, the method is not critical to data quality and is sufficiently efficient to fill the accuracy gap. The source code of pDeep3 is available at http://pfind.ict.ac.cn/software/pdeep3.",2021-04-02 +34020534,scCancer: a package for automated processing of single-cell RNA-seq data in cancer. ,"Molecular heterogeneities and complex microenvironments bring great challenges for cancer diagnosis and treatment. Recent advances in single-cell RNA-sequencing (scRNA-seq) technology make it possible to study cancer cell heterogeneities and microenvironments at single-cell transcriptomic level. Here, we develop an R package named scCancer, which focuses on processing and analyzing scRNA-seq data for cancer research. Except basic data processing steps, this package takes several special considerations for cancer-specific features. Firstly, the package introduced comprehensive quality control metrics. Secondly, it used a data-driven machine learning algorithm to accurately identify major cancer microenvironment cell populations. Thirdly, it estimated a malignancy score to classify malignant (cancerous) and non-malignant cells. Then, it analyzed intra-tumor heterogeneities by key cellular phenotypes (such as cell cycle and stemness), gene signatures and cell-cell interactions. Besides, it provided multi-sample data integration analysis with different batch-effect correction strategies. Finally, user-friendly graphic reports were generated for all the analyses. By testing on 56 samples with 433 405 cells in total, we demonstrated its good performance. The package is available at: http://lifeome.net/software/sccancer/.",2021-05-01 +34758707,Factors associated with physical violence against children in Haiti: a national population-based cross-sectional survey.,"Considering the recent sociopolitical and environmental stress in Haiti, from the COVID pandemic to repeated natural disasters, we aimed to identify risk and protective factors associated with childhood physical violence (CPV) after the 2010 earthquake. A population-based national survey was administered to 13-24-year-old Haitians in 2012. A three-stage clustered sample design was utilized. Adjusted prevalence ratios (aPR) and risk ratios (aRR). 64% of survey respondents experienced CPV were estimated. Respondents who reported emotional and/or sexual abuse prior to age 12 were twice as likely to be victims of physical violence later during childhood (emotional aRR 1.9, 95% CI 1.3-2.7; sexual aRR 2.1, 95% CI 1.4-3.1). Feeling close or very close to one's mother was protective (aPR 0.66, 95% CI 0.47-0.92). This study is the first to describe risk and protective factors and also delineate temporality of exposures associated with CPV.Supplemental data for this article is available online at https://doi.org/10.1080/17457300.2021.1996398.",2021-11-10 +34849200,Is there an advantageous arrangement of aromatic residues in proteins? Statistical analysis of aromatic interactions in globular proteins.,"The aim of this study was to evaluate the favorability of different conformations of aromatic residues in proteins by analysing the occurrence of particular conformations. The clustering of protein structures from the Protein Data Bank (PDB) was performed. Conformations of interacting aromatic residues were analyzed for 511 282 pairs in 35 493 protein structures sharing less than 50% identity. Pairs with a parallel arrangement of aromatic residues made up 6.2% of all possible ones, which was twice as much as expected. Pairs with a perpendicular arrangement of aromatic residues made up 25%. We demonstrate that the most favorable arrangement was at an angle of 60° between the interacting aromatic residues. Among all possible aromatic pairs, the His-His pair was twice as frequent as expected, and the His-Phe pair was less frequent than expected. A server (CARP - Contacts of Aromatic Residues in Proteins) has been created for calculating essential structural features of interacting aromatic residues in proteins: http://bioproteom.protres.ru/arom_q_prog/.",2021-11-01 +34862222,'What is the risk to me from COVID-19?': Public involvement in providing mortality risk information for people with 'high-risk' conditions for COVID-19 (OurRisk.CoV).,"Patients and public have sought mortality risk information throughout the pandemic, but their needs may not be served by current risk prediction tools. Our mixed methods study involved: (1) systematic review of published risk tools for prognosis, (2) provision and patient testing of new mortality risk estimates for people with high-risk conditions and (3) iterative patient and public involvement and engagement with qualitative analysis. Only one of 53 (2%) previously published risk tools involved patients or the public, while 11/53 (21%) had publicly accessible portals, but all for use by clinicians and researchers.Among people with a wide range of underlying conditions, there has been sustained interest and engagement in accessible and tailored, pre- and postpandemic mortality information. Informed by patient feedback, we provide such information in 'five clicks' (https://covid19-phenomics.org/OurRiskCoV.html), as context for decision making and discussions with health professionals and family members. Further development requires curation and regular updating of NHS data and wider patient and public engagement.",2021-11-01 +34128979,Cheetah-MS: a web server to model protein complexes using tandem cross-linking mass spectrometry data. ,"Protein-protein interactions (PPI) are central in many biological processes but difficult to characterize, especially in complex, unfractionated samples. Chemical cross-linking combined with mass spectrometry (MS) and computational modeling is gaining recognition as a viable tool in protein interaction studies. Here, we introduce Cheetah-MS, a web server for predicting the PPIs in a complex mixture of samples. It combines the capability and sensitivity of MS to analyze complex samples with the power and resolution of protein-protein docking. It produces the quaternary structure of the PPI of interest by analyzing tandem MS/MS data (also called MS2). Combining MS analysis and modeling increases the sensitivity and, importantly, facilitates the interpretation of the results. Cheetah-MS is freely available as a web server at https://www.txms.org. Supplementary data are available at Bioinformatics online.",2021-06-15 +34509701,Prediction of Mannheimia haemolytica serotypes based on whole genomic sequences.,The aim of the investigation was to predict the serotypes of M. haemolytica based on whole genomic sequences with the capsular gene region as target. A total of 22 strains selected to have been serotyped and to represent all serotypes were investigated by whole genomic sequencing. The BIGSdb (Bacterial Isolate Genome Sequence Database) was downloaded and installed on a Linux server. Here the sequence database was setup with unique loci at serotype level. The server allows serotypes of M. haemolytica to be predicted from whole genomic sequences and the service is available to the public for free from https://ivsmlst.sund.root.ku.dk.,2021-09-06 +33999189,ProteoVision: web server for advanced visualization of ribosomal proteins.,"ProteoVision is a web server designed to explore protein structure and evolution through simultaneous visualization of multiple sequence alignments, topology diagrams and 3D structures. Starting with a multiple sequence alignment, ProteoVision computes conservation scores and a variety of physicochemical properties and simultaneously maps and visualizes alignments and other data on multiple levels of representation. The web server calculates and displays frequencies of amino acids. ProteoVision is optimized for ribosomal proteins but is applicable to analysis of any protein. ProteoVision handles internally generated and user uploaded alignments and connects them with a selected structure, found in the PDB or uploaded by the user. It can generate de novo topology diagrams from three-dimensional structures. All displayed data is interactive and can be saved in various formats as publication quality images or external datasets or PyMol Scripts. ProteoVision enables detailed study of protein fragments defined by Evolutionary Classification of protein Domains (ECOD) classification. ProteoVision is available at http://proteovision.chemistry.gatech.edu/.",2021-07-01 +33948944,Using gastrointestinal distress reports to predict youth anxiety risk: Implications for mental health literacy and community care.,"This study investigates the generalizability and predictive validity of associations between gastrointestinal (GI) symptoms and youth anxiety to establish their utility in community mental health decision-making. We analyzed data from youth ages 3 to 21 years in volunteer cohorts collected in Los Angeles (N = 327) and New York City (N = 102), as well as the Healthy Brain Network cohort (N = 1957). Youth GI distress was measured through items taken from the parent-reported Child Behavior Checklist (CBCL). We examined generalizability of GI-anxiety associations across cohorts and anxiety reporters, then evaluated the performance of these models in predicting youth anxiety in holdout data. Consistent with previous work, higher levels of gastrointestinal distress were associated with more parent-reported youth anxiety behaviors in all three cohorts. Models trained on data from the Healthy Brain Network cohort predicted parent-reported and child-reported anxiety behaviors, as well as clinician-evaluated anxiety diagnoses, at above chance levels in holdout data. Models which included GI symptoms often, but not always, outperformed models based on age and sex alone in predicting youth anxiety. Based on the generalizability and predictive validity of GI-anxiety associations investigated here, GI symptoms may be an effective tool for child-facing professionals for identifying children at risk for anxiety (Preprint: https://psyarxiv.com/zgavu/).",2021-05-04 +34601555,MungeSumstats: A Bioconductor package for the standardisation and quality control of many GWAS summary statistics. ,"Genome-wide association studies (GWAS) summary statistics have popularised and accelerated genetic research. However, a lack of standardisation of the file formats used has proven problematic when running secondary analysis tools or performing meta-analysis studies. To address this issue, we have developed MungeSumstats, a Bioconductor R package for the standardisation and quality control of GWAS summary statistics. MungeSumstats can handle the most common summary statistic formats, including variant call format (VCF) producing a reformatted, standardised, tabular summary statistic file, VCF or R native data object. MungeSumstats is available on Bioconductor (v 3.13) and can also be found on Github at: https://neurogenomics.github.io/MungeSumstats. The analysis deriving the most common summary statistic formats is available at: https://al-murphy.github.io/SumstatFormats.",2021-10-02 +34625409,Novel Models of Genetic Education and Testing for Pancreatic Cancer Interception: Preliminary Results from the GENERATE Study.,"Up to 10% of patients with pancreatic ductal adenocarcinoma (PDAC) carry underlying germline pathogenic variants in cancer susceptibility genes. The GENetic Education Risk Assessment and TEsting (GENERATE) study aimed to evaluate novel methods of genetic education and testing in relatives of patients with PDAC. Eligible individuals had a family history of PDAC and a relative with a germline pathogenic variant in APC, ATM, BRCA1, BRCA2, CDKN2A, EPCAM, MLH1, MSH2, MSH6, PALB2, PMS2, STK11, or TP53 genes. Participants were recruited at six academic cancer centers and through social media campaigns and patient advocacy efforts. Enrollment occurred via the study website (https://GENERATEstudy.org) and all participation, including collecting a saliva sample for genetic testing, could be done from home. Participants were randomized to one of two remote methods that delivered genetic education about the risks of inherited PDAC and strategies for surveillance. The primary outcome of the study was uptake of genetic testing. From 5/8/2019 to 5/6/2020, 49 participants were randomized to each of the intervention arms. Overall, 90 of 98 (92%) of randomized participants completed genetic testing. The most frequently detected pathogenic variants included those in BRCA2 (N = 15, 17%), ATM (N = 11, 12%), and CDKN2A (N = 4, 4%). Participation in the study remained steady throughout the onset of the Coronavirus disease (COVID-19) pandemic. Preliminary data from the GENERATE study indicate success of remote alternatives to traditional cascade testing, with genetic testing rates over 90% and a high rate of identification of germline pathogenic variant carriers who would be ideal candidates for PDAC interception approaches. PREVENTION RELEVANCE: Preliminary data from the GENERATE study indicate success of remote alternatives for pancreatic cancer genetic testing and education, with genetic testing uptake rates over 90% and a high rate of identification of germline pathogenic variant carriers who would be ideal candidates for pancreatic cancer interception.",2021-10-08 +30561649,Transcriptome alterations in myotonic dystrophy skeletal muscle and heart.,"Myotonic dystrophy (dystrophia myotonica, DM) is a multi-systemic disease caused by expanded CTG or CCTG microsatellite repeats. Characterized by symptoms in muscle, heart and central nervous system, among others, it is one of the most variable diseases known. A major pathogenic event in DM is the sequestration of muscleblind-like proteins by CUG or CCUG repeat-containing RNAs transcribed from expanded repeats, and differences in the extent of MBNL sequestration dependent on repeat length and expression level may account for some portion of the variability. However, many other cellular pathways are reported to be perturbed in DM, and the severity of specific disease symptoms varies among individuals. To help understand this variability and facilitate research into DM, we generated 120 RNASeq transcriptomes from skeletal and heart muscle derived from healthy and DM1 biopsies and autopsies. A limited number of DM2 and Duchenne muscular dystrophy samples were also sequenced. We analyzed splicing and gene expression, identified tissue-specific changes in RNA processing and uncovered transcriptome changes strongly correlating with muscle strength. We created a web resource at http://DMseq.org that hosts raw and processed transcriptome data and provides a lightweight, responsive interface that enables browsing of processed data across the genome.",2019-04-01 +31045208,IntFOLD: an integrated web resource for high performance protein structure and function prediction.,"The IntFOLD server provides a unified resource for the automated prediction of: protein tertiary structures with built-in estimates of model accuracy (EMA), protein structural domain boundaries, natively unstructured or disordered regions in proteins, and protein-ligand interactions. The component methods have been independently evaluated via the successive blind CASP experiments and the continual CAMEO benchmarking project. The IntFOLD server has established its ranking as one of the best performing publicly available servers, based on independent official evaluation metrics. Here, we describe significant updates to the server back end, where we have focused on performance improvements in tertiary structure predictions, in terms of global 3D model quality and accuracy self-estimates (ASE), which we achieve using our newly improved ModFOLD7_rank algorithm. We also report on various upgrades to the front end including: a streamlined submission process, enhanced visualization of models, new confidence scores for ranking, and links for accessing all annotated model data. Furthermore, we now include an option for users to submit selected models for further refinement via convenient push buttons. The IntFOLD server is freely available at: http://www.reading.ac.uk/bioinf/IntFOLD/.",2019-07-01 +32719467,STAGdb: a 30K SNP genotyping array and Science Gateway for Acropora corals and their dinoflagellate symbionts.,"Standardized identification of genotypes is necessary in animals that reproduce asexually and form large clonal populations such as coral. We developed a high-resolution hybridization-based genotype array coupled with an analysis workflow and database for the most speciose genus of coral, Acropora, and their symbionts. We designed the array to co-analyze host and symbionts based on bi-allelic single nucleotide polymorphisms (SNP) markers identified from genomic data of the two Caribbean Acropora species as well as their dominant dinoflagellate symbiont, Symbiodinium 'fitti'. SNPs were selected to resolve multi-locus genotypes of host (called genets) and symbionts (called strains), distinguish host populations and determine ancestry of coral hybrids between Caribbean acroporids. Pacific acroporids can also be genotyped using a subset of the SNP loci and additional markers enable the detection of symbionts belonging to the genera Breviolum, Cladocopium, and Durusdinium. Analytic tools to produce multi-locus genotypes of hosts based on these SNP markers were combined in a workflow called the Standard Tools for Acroporid Genotyping (STAG). The STAG workflow and database are contained within a customized Galaxy environment (https://coralsnp.science.psu.edu/galaxy/), which allows for consistent identification of host genet and symbiont strains and serves as a template for the development of arrays for additional coral genera. STAG data can be used to track temporal and spatial changes of sampled genets necessary for restoration planning and can be applied to downstream genomic analyses. Using STAG, we uncover bi-directional hybridization between and population structure within Caribbean acroporids and detect a cryptic Acroporid species in the Pacific.",2020-07-27 +32902328,Methodological Considerations for Epidemiological Studies of Air Pollution and the SARS and COVID-19 Coronavirus Outbreaks.,"

Background

Studies have reported that ambient air pollution is associated with an increased risk of developing or dying from coronavirus-2 (COVID-19). Methodological approaches to investigate the health impacts of air pollution on epidemics should differ from those used for chronic diseases, but the methods used in these studies have not been appraised critically.

Objectives

Our study aimed to identify and critique the methodological approaches of studies of air pollution on infections and mortality due to COVID-19 and to identify and critique the methodological approaches of similar studies concerning severe acute respiratory syndrome (SARS).

Methods

Published and unpublished papers of associations between air pollution and developing or dying from COVID-19 or SARS that were reported as of 10 May 2020 were identified through electronic databases, internet searches, and other sources.

Results

All six COVID-19 studies and two of three SARS studies reported positive associations. Two were time series studies that estimated associations between daily changes in air pollution, one was a cohort that assessed associations between air pollution and the secondary spread of SARS, and six were ecological studies that used area-wide exposures and outcomes. Common shortcomings included possible cross-level bias in ecological studies, underreporting of health outcomes, using grouped data, the lack of highly spatially resolved air pollution measures, inadequate control for confounding and evaluation of effect modification, not accounting for regional variations in the timing of outbreaks' temporal changes in at-risk populations, and not accounting for nonindependence of outcomes.

Discussion

Studies of air pollution and novel coronaviruses have relied mainly on ecological measures of exposures and outcomes and are susceptible to important sources of bias. Although longitudinal studies with individual-level data may be imperfect, they are needed to adequately address this topic. The complexities involved in these types of studies underscore the need for careful design and for peer review. https://doi.org/10.1289/EHP7411.",2020-09-09 +32655358,BETA: A Large Benchmark Database Toward SSVEP-BCI Application.,"The brain-computer interface (BCI) provides an alternative means to communicate and it has sparked growing interest in the past two decades. Specifically, for Steady-State Visual Evoked Potential (SSVEP) based BCI, marked improvement has been made in the frequency recognition method and data sharing. However, the number of pubic databases is still limited in this field. Therefore, we present a BEnchmark database Towards BCI Application (BETA) in the study. The BETA database is composed of 64-channel Electroencephalogram (EEG) data of 70 subjects performing a 40-target cued-spelling task. The design and the acquisition of the BETA are in pursuit of meeting the demand from real-world applications and it can be used as a test-bed for these scenarios. We validate the database by a series of analyses and conduct the classification analysis of eleven frequency recognition methods on BETA. We recommend using the metric of wide-band signal-to-noise ratio (SNR) and BCI quotient to characterize the SSVEP at the single-trial and population levels, respectively. The BETA database can be downloaded from the following link http://bci.med.tsinghua.edu.cn/download.html.",2020-06-23 +34095372,Human tear proteome dataset in response to daily wear of water gradient contact lens using SWATH-MS approach.,"Water Gradient Contact Lens (WGCL) is a new generation material that combines the benefits of Silicone hydrogel (SiHy) and traditional hydrogel contact lenses by modifying the materials between the core and the surface. However, its impact on tear proteome has not been explored. Tears were collected on healthy young adults using Schirmer's strip at baseline, 1-week, and 1-month of WGCL lens wear (n=15) and age-matched untouched controls (n=10). Equal amounts of tears samples from individuals of WGCL and control groups were randomly pooled to form representative equal parts at each condition (n=3 for WGCL wear and age-matched untouched control group) at each condition (baseline, 1-week, and 1-month). Tears were prepared using the S-Trap sample preparation followed by the analysis of a TripleTOF 6600 mass spectrometer. Using Information-dependent acquisition (IDA), a total of 725 tear proteins (6760 distinct peptides) were identified in the constructed spectral library at 1% FDR. Using data-independent acquisition (SWATH-MS), data were analyzed and processed using PeakView (v2.2, SCIEX), with the top differentially expressed proteins at each time point (baseline, 1-week, and 1-month) presented. All acquired raw data (IDA and SWATH-MS) were submitted and published on the Peptide Atlas public repository (http://www.peptideatlas.org/) for general release (Data ID PASS01589).",2021-05-12 +33348264,Combining in vivo pathohistological and redox status analysis with in silico toxicogenomic study to explore the phthalates and bisphenol A mixture-induced testicular toxicity.,"The aim of this study was to: (i) determine and compare the capacity of bis (2 -ethylhexyl) phthalate (DEHP), dibutyl phthalate (DBP), bisphenol A (BPA), and their mixture to produce testicular toxicity after the subacute exposure; (ii) explore the mechanisms behind the observed changes using in silico toxicogenomic approach. Male rats were randomly split into groups (n = 6): (1) Control (corn oil); (2) DEHP (50 mg/kg b.w./day); (3) DBP (50 mg/kg b.w./day); (4) BPA (25 mg/kg b.w./day); and (5) MIX (50 mg/kg b.w./day DEHP + 50 mg/kg b.w/day DBP + 25 mg/kg b.w./day BPA). Animals were sacrificed after 28 days of oral exposure, testes were extracted and prepared for histological assessments under the light microscope (haematoxylin and eosin staining) and redox status analysis. The Comparative Toxicogenomics Database (CTD; http://CTD.mdibl.org), Cytoscape software (https://cytoscape.org) and ToppGene Suite (https://toppgene.cchmc.org) were used for data-mining. Present pathohistological study has demonstrated more pronounced testicular toxicity of the MIX group (desquamated germinal epithelium cells, enlarged cells with hyperchromatic nuclei, multinucleated cell forms and intracytoplasmic vacuoles) in comparison with the single substances, while effects on redox status parameters were either more prominent, or present only in the MIX group. In silico investigation revealed 20 genes linked to male reproductive disorders, affected by all three investigated substances. Effects on metabolism, AhR pathway, apoptosis and oxidative stress could be singled out as the most probable mechanisms involved in the subacute DEHP, DBP and BPA mixture testicular toxicity, while the effect on oxidative stress parameters was confirmed by in vivo experiment.",2020-12-13 +32528639,Plant-mSubP: a computational framework for the prediction of single- and multi-target protein subcellular localization using integrated machine-learning approaches.,"The subcellular localization of proteins is very important for characterizing its function in a cell. Accurate prediction of the subcellular locations in computational paradigm has been an active area of interest. Most of the work has been focused on single localization prediction. Only few studies have discussed the multi-target localization, but have not achieved good accuracy so far; in plant sciences, very limited work has been done. Here we report the development of a novel tool Plant-mSubP, which is based on integrated machine learning approaches to efficiently predict the subcellular localizations in plant proteomes. The proposed approach predicts with high accuracy 11 single localizations and three dual locations of plant cell. Several hybrid features based on composition and physicochemical properties of a protein such as amino acid composition, pseudo amino acid composition, auto-correlation descriptors, quasi-sequence-order descriptors and hybrid features are used to represent the protein. The performance of the proposed method has been assessed through a training set as well as an independent test set. Using the hybrid feature of the pseudo amino acid composition, N-Center-C terminal amino acid composition and the dipeptide composition (PseAAC-NCC-DIPEP), an overall accuracy of 81.97 %, 84.75 % and 87.88 % is achieved on the training data set of proteins containing the single-label, single- and dual-label combined, and dual-label proteins, respectively. When tested on the independent data, an accuracy of 64.36 %, 64.84 % and 81.08 % is achieved on the single-label, single- and dual-label, and dual-label proteins, respectively. The prediction models have been implemented on a web server available at http://bioinfo.usu.edu/Plant-mSubP/. The results indicate that the proposed approach is comparable to the existing methods in single localization prediction and outperforms all other existing tools when compared for dual-label proteins. The prediction tool will be a useful resource for better annotation of various plant proteomes.",2019-10-17 +33304468,PDmethDB: A curated Parkinson's disease associated methylation information database.,"Parkinson's disease (PD) is the second most common neurodegenerative disease, of which the histopathological hallmark is the formation of Lewy bodies consisting of α-synuclein as the major component. α-Synuclein can sequester DNA Methyltransferase 1 (DNMT1), the maintenance DNA methylation enzyme, from the nucleus and into the cytoplasm, leading to global DNA hypomethylation in human brain. As DNA methylation is a major epigenetic modification that regulates gene expression and there is no specific database storing PD associated methylation information, PDmethDB (Parkinson's Disease Methylation Database) aims to curate PD associated methylation information from literature to facilitate the study of the relationship between PD and methylation. Currently, PDmethDB contains 97,077 PD methylation associated entries among 12,308 molecules, 37,944 CpG sites, 31 tissues and 3 species through a review of about 1600 published papers. This includes information concerning the gene/molecule name, CpG site, methylation alteration, expression alteration, tissue, PMID, experimental method, and a brief description about the entry. PDmethDB provides a user-friendly interface to search, browse, download and submit data. PDmethDB supports browsing by molecule, species, tissue, gene region, methylation alteration and experimental methods. PDmethDB also shows the entry gene interaction network including protein-protein interactions and miRNA-targets interactions with a highlight of PD associated genes from DisGeNET database. PDmethDB aims to facilitate the understanding of the relationship between PD and methylation. Database URL: https://ageing.shinyapps.io/pdmethdb/.",2020-11-20 +34019776,Conducting a Virtual Study With Special Considerations for Working With Persons With Aphasia.,"Purpose The use of technology (e.g., telehealth) in clinical settings has rapidly increased, and its use in research settings continues to grow. The aim of this report is to present one potential solution to a clinical issue that of virtual and remote assessment for the purposes of spoken language research in persons with aphasia (PWA). To do so, we report detailed methods for conducting a multitimepoint (test-retest) virtual paradigm, assessing lifestyle, physiological, cognitive, and linguistic factors in persons with and without aphasia. Method Procedures for virtual assessment are detailed in a sample of adults with no brain damage (N = 24) and PWA (N = 25) on a test-retest paradigm (data collection approximately 10 ± 3 days apart). This report provides practical information about pre-assessment (e.g., recruitment, scheduling), assessment (e.g., aphasia-friendly consent presentation, investigator fidelity), and postassessment (e.g., data storage, quality check) procedures for human behavior research using a virtual platform. Results Preliminary study data are provided, indicating high retention rates, high rates of data acquisition, and feasibility. Common technological troubles and solutions are discussed, and solutions are offered. The results suggest that our pre-assessment, assessment, and postassessment procedures contributed to the success of our study. Conclusions We provide a practical methodology for conducting a multitimepoint study, with considerations for PWA, adding to the body of research on telehealth in clinical populations. Future studies should continue to evaluate telemethodology, which may be core for diversifying studies, improving study retention, and enrolling larger sample sizes. Supplemental Material https://doi.org/10.23641/asha.14608101.",2021-05-21 +34319215,What Is Adulthood? A Comparison of the Adulthood Criteria of Greek Emerging Adults and Their Parents.,"Extensive research has investigated the criteria that emerging adults consider important for adulthood. Limited research has investigated this topic between adults from different age groups. This study aims to compare the criteria that emerging adults and their parents use to define adulthood. Participants included 73 Greek emerging adult and parent dyads. The emerging adult participants aged 18.5-23.6 (54.8% females), and the parent participants aged 41.3-59.5 years (82.2% females). All participants provided socio-demographic data and completed the Greek version of the Scale of Conceptions of the Transition to Adulthood. Results revealed that a) the most widely endorsed criteria for both emerging adults and their parents included accepting responsibility for one's actions, avoiding committing petty crimes, and avoiding drunk driving, b) emerging adults assigned on average less importance than did their parents on the adulthood criteria, and c) emerging adults and their parents agree strongly on the ranking of the adulthood criteria. These results extend previous findings from China and the United States, which evidenced both congruence and disagreement between emerging adults and their parents in the criteria for adulthood.Supplemental data for this article is available online at https://doi.org/10.1080/00221325.2021.1957761 .",2021-07-28 +28984188,ScaPD: a database for human scaffold proteins.,"

Background

Scaffold proteins play a critical role in an increasing number of biological signaling processes, including simple tethering mechanism, regulating selectivity in pathways, shaping cellular behaviors. While many databases document the signaling pathways, few databases are devoted to the scaffold proteins that medicate signal transduction.

Results

Here, we have developed a user-friendly database, ScaPD, to describe computationally predicted, experimentally validated scaffold proteins and associated signaling pathways. It currently contains 273 scaffold proteins and 1118 associated signaling pathways. The database allows users to search, navigate and download the scaffold protein-mediated signaling networks.

Conclusions

Manually curated and predicted scaffold protein data will be a foundation for further investigation of the scaffold protein in the signal transduction. With maintained up-to-date data, ScaPD ( http://bioinfo.wilmer.jhu.edu/ScaPD ) will be a valuable resource for understanding how individual signaling pathways are regulated.",2017-10-03 +34185052,PolarProtPred: Predicting apical and basolateral localization of transmembrane proteins using putative short linear motifs and deep learning. ,"Cell polarity refers to the asymmetric organization of cellular components in various cells. Epithelial cells are the best-known examples of polarized cells, featuring apical and basolateral membrane domains. Mounting evidence suggests that short linear motifs play a major role in protein trafficking to these domains, although the exact rules governing them are still elusive. In this study we prepared neural networks that capture recurrent patterns to classify transmembrane proteins localizing into apical and basolateral membranes. Asymmetric expression of drug transporters results in vectorial drug transport, governing the pharmacokinetics of numerous substances, yet the data on how proteins are sorted in epithelial cells is very scattered. The provided method may offer help to experimentalists to identify or better characterize molecular networks regulating the distribution of transporters or surface receptors (including viral entry receptors like that of COVID-19). The prediction server PolarProtPred is available at http://polarprotpred.ttk.hu. Supplementary data are available at Bioinformatics online.",2021-06-29 +32964354,A novel online calculator predicting short-term postoperative outcomes in patients with metastatic brain tumors.,"

Purpose

Establishing predictors of hospital length of stay (LOS), discharge deposition, and total hospital charges is essential to providing high-quality, value-based care. Though previous research has investigated these outcomes for patients with metastatic brain tumors, there are currently no tools that synthesize such research findings and allow for prediction of these outcomes on a patient-by-patient basis. The present study sought to develop a prediction calculator that uses patient demographic and clinical information to predict extended hospital length of stay, non-routine discharge disposition, and high total hospital charges for patients with metastatic brain tumors.

Methods

Patients undergoing surgery for metastatic brain tumors at a single academic institution were analyzed (2017-2019). Multivariate logistic regression was used to identify independent predictors of extended LOS (> 7 days), non-routine discharge, and high total hospital charges (> $ 46,082.63). p < 0.05 was considered statistically significant. C-statistics and the Hosmer-Lemeshow test were used to assess model discrimination and calibration, respectively.

Results

A total of 235 patients were included in our analysis, with a mean age of 62.74 years. The majority of patients were female (52.3%) and Caucasian (76.6%). Our models predicting extended LOS, non-routine discharge, and high hospital charges had optimism-corrected c-statistics > 0.7, and all three models demonstrated adequate calibration (p > 0.05). The final models are available as an online calculator ( https://neurooncsurgery.shinyapps.io/brain_mets_calculator/ ).

Conclusions

Our models predicting postoperative outcomes allow for individualized risk-estimation for patients following surgery for metastatic brain tumors. Our results may be useful in helping clinicians to provide resource-conscious, high-value care.",2020-09-22 +32884207,From sequence analysis of DPP-4 to molecular docking based searching of its inhibitors.,"Literature data suggests that Dipeptidyl peptidase-4 (DPP-4) is a potential target for type 2 Diabetes Mellitus. Therefore, it is of interest to identify new DPP-4 inhibitors using molecular docking analysis. We document compounds such as STOCK1N-98884, STOCK1N-98881, and STOCK1N-98866 with optimal binding features with DPP-4 from the ligand database at https://www.ibscreen.com/ for further consideration.",2020-06-30 +30364992,Genome properties in 2019: a new companion database to InterPro for the inference of complete functional attributes.,"Automatic annotation of protein function is routinely applied to newly sequenced genomes. While this provides a fine-grained view of an organism's functional protein repertoire, proteins, more commonly function in a coordinated manner, such as in pathways or multimeric complexes. Genome Properties (GPs) define such functional entities as a series of steps, originally described by either TIGRFAMs or Pfam entries. To increase the scope of coverage, we have migrated GPs to function as a companion resource utilizing InterPro entries. Having introduced GPs-specific versioned releases, we provide software and data via a GitHub repository, and have developed a new web interface to GPs (available at https://www.ebi.ac.uk/interpro/genomeproperties). In addition to exploring each of the 1286 GPs, the website contains GPs pre-calculated for a representative set of proteomes; these results can be used to profile GPs phylogenetically via an interactive viewer. Users can upload novel data to the viewer for comparison with the pre-calculated results. Over the last year, we have added ∼700 new GPs, increasing the coverage of eukaryotic systems, as well as increasing general coverage through automatic generation of GPs from related resources. All data are freely available via the website and the GitHub repository.",2019-01-01 +34016012,IoT-enabled cloud-based real-time remote ECG monitoring system.,"Statistical reports all around the world have deemed cardiovascular diseases (CVDs) as the largest contributor to the death count. The electrocardiogram (ECG) is a widely accepted technology employed for investigation of CVDs of the person. The proposed solution deals with an efficient internet of things (IoT) enabled real-time ECG monitoring system using cloud computing technologies. The article presents a cloud-centric solution to provide remote monitoring of CVD. Sensed ECG data are transmitted to S3 bucket provided by Amazon web service (AWS) through a mobile gateway. AWS cloud uses HTTP and MQTT servers to provide data visualisation, quick response and long-live connection to device and user. Bluetooth low energy (BLE 4.0) is used as a communication protocol for low-power data transmission between device and mobile gateway. The proposed system is implemented with filtering algorithms to ignore distractions, environmental noise and motion artefacts. It offers an analysis of ECG signals to detect various parameters such as heartbeat, PQRST wave and QRS complex intervals along with respiration rate. The proposed system prototype has been tested and validated for reliable ECG monitoring remotely in real-time.",2021-05-21 +32619768,SR4R: An Integrative SNP Resource for Genomic Breeding and Population Research in Rice.,"The information commons for rice (IC4R) database is a collection of 18 million single nucleotide polymorphisms (SNPs) identified by resequencing of 5152 rice accessions. Although IC4R offers ultra-high density rice variation map, these raw SNPs are not readily usable for the public. To satisfy different research utilizations of SNPs for population genetics, evolutionary analysis, association studies, and genomic breeding in rice, raw genotypic data of these 18 million SNPs were processed by unified bioinformatics pipelines. The outcomes were used to develop a daughter database of IC4R - SnpReady for Rice (SR4R). SR4R presents four reference SNP panels, including 2,097,405 hapmapSNPs after data filtration and genotype imputation, 156,502 tagSNPs selected from linkage disequilibrium-based redundancy removal, 1180 fixedSNPs selected from genes exhibiting selective sweep signatures, and 38 barcodeSNPs selected from DNA fingerprinting simulation. SR4R thus offers a highly efficient rice variation map that combines reduced SNP redundancy with extensive data describing the genetic diversity of rice populations. In addition, SR4R provides rice researchers with a web interface that enables them to browse all four SNP panels, use online toolkits, as well as retrieve the original data and scripts for a variety of population genetics analyses on local computers. SR4R is freely available to academic users at http://sr4r.ic4r.org/.",2020-04-01 +34428318,VIP-HL: Semi-automated ACMG/AMP variant interpretation platform for genetic hearing loss.,"The American College of Medical Genetics and Genomics, and the Association for Molecular Pathology (ACMG/AMP) have proposed a set of evidence-based guidelines to support sequence variant interpretation. The ClinGen hearing loss expert panel (HL-EP) introduced further specifications into the ACMG/AMP framework for genetic hearing loss. This study developed a tool named Variant Interpretation Platform for genetic Hearing Loss (VIP-HL), aiming to semi-automate the HL ACMG/AMP rules. VIP-HL aggregates information from external databases to automate 13 out of 24 ACMG/AMP rules specified by HL-EP, namely PVS1, PS1, PM1, PM2, PM4, PM5, PP3, BA1, BS1, BS2, BP3, BP4, and BP7. We benchmarked VIP-HL using 50 variants in which 82 rules were activated by the ClinGen HL-EP. VIP-HL concordantly activated 93% (76/82) rules, significantly higher than that of by InterVar (48%; 39/82). VIP-HL is an integrated online tool for reliable automated variant classification in hearing loss genes. It assists curators in variant interpretation and provides a platform for users to share classifications with each other. VIP-HL is available with a user-friendly web interface at http://hearing.genetics.bgi.com/.",2021-09-02 +34481878,Application and advantages of zebrafish model in the study of neurovascular unit.,"The concept of ""Neurovascular Unit"" (NVU) was put forward, so that the research goal of Central Nervous System (CNS) diseases gradually transitioned from a single neuron to the structural and functional integrity of the NVU. Zebrafish has the advantages of high homology with human genes, strong reproductive capacity and visualization of neural circuits, so it has become an emerging model organism for NVU research and has been applied to a variety of CNS diseases. Based on CNKI (https://www.cnki.net/) and PubMed (https://pubmed.ncbi.nlm.nih.gov/about/) databases, the author of this article sorted out the relevant literature, analyzed the construction of a zebrafish model of various CNS diseases,and the use of diagrams showed the application of zebrafish in the NVU, revealed its relationship, which would provide new methods and references for the treatment and research of CNS diseases.",2021-09-02 +34475414,"Cydrasil 3, a curated 16S rRNA gene reference package and web app for cyanobacterial phylogenetic placement.","Cyanobacteria are a widespread and important bacterial phylum, responsible for a significant portion of global carbon and nitrogen fixation. Unfortunately, reliable and accurate automated classification of cyanobacterial 16S rRNA gene sequences is muddled by conflicting systematic frameworks, inconsistent taxonomic definitions (including the phylum itself), and database errors. To address this, we introduce Cydrasil 3 ( https://www.cydrasil.org ), a curated 16S rRNA gene reference package, database, and web application designed to provide a full phylogenetic perspective for cyanobacterial systematics and routine identification. Cydrasil 3 contains over 1300 manually curated sequences longer than 1100 base pairs and can be used for phylogenetic placement or as a reference sequence set for de novo phylogenetic reconstructions. The web application (utilizing PaPaRA and EPA-ng) can place thousands of sequences into the reference tree and has detailed instructions on how to analyze results. While the Cydrasil web application offers no taxonomic assignments, it instead provides phylogenetic placement, as well as a searchable database with curation notes and metadata, and a mechanism for community feedback.",2021-09-02 +29145635,tRex: A Web Portal for Exploration of tRNA-Derived Fragments in Arabidopsis thaliana.,"tRNA-derived fragments (tRFs) constitute a new class of short regulatory RNAs that are a product of nascent or mature tRNA processing. tRF sequences have been identified in all domains of life; however, most published research pertains to human, yeast and some bacterial organisms. Despite growing interest in plant tRFs and accumulating evidence of their function in plant development and stress responses, no public, web-based repository dedicated to these molecules is currently available. Here, we introduce tRex (http://combio.pl/trex)-the first comprehensive data-driven online resource specifically dedicated to tRFs in the model plant Arabidopsis thaliana. The portal is based on verified Arabidopsis tRNA annotation and includes in-house-generated and publicly available small RNA sequencing experiments from various tissues, ecotypes, genotypes and stress conditions. The provided web-based tools are designed in a user-friendly manner and allow for seamless exploration of the data that are presented in the form of dynamic tables and cumulative coverage profiles. The tRex database is connected to external genomic and citation resources, which makes it a one-stop solution for Arabidopsis tRF-related research.",2018-01-01 +34480995,"Traditional uses, bioactive composition, pharmacology, and toxicology of Phyllanthus emblica fruits: A comprehensive review.","

Ethnopharmacological relevance

The fruits of Phyllanthus emblica Linn or Emblica officinalis Gaertn (Phyllanthaceae), (FPE) commonly known as Indian gooseberry or Amla, gained immense importance in indigenous traditional medicinal systems, including Ayurveda, for its medicinal and nutritional benefits. It is used to cure several diseases such as common cold, fever, cough, asthma, bronchitis, diabetes, cephalalgia, ophthalmopathy, dyspepsia, colic, flatulence, hyperacidity, peptic ulcer, erysipelas, skin diseases, leprosy, hematogenesis, inflammation, anemia, emaciation, hepatopathy, jaundice, diarrhea, dysentery, hemorrhages, leucorrhea, menorrhagia, cardiac disorders, and premature greying of hair.

Aim of the study

In the present review, we presented a comprehensive analysis of the ethnopharmacology, bioactive composition, and toxicity of P. emblica to identify the gap between research and the current applications and to help explore the trends and perspectives for future studies.

Materials and methods

We collected the literature published before April 2021 on the phytochemistry, pharmacology, and toxicity of FPE. Literature in English from scientific databases such as PubMed, ScienceDirect, Wiley, Springer, and Google Scholar, books. These reports were analyzed and summarized to prepare this review. The plant taxonomy was verified by ""The Plant List"" database (http://www.theplantlist.org).

Results and conclusion

s: FPE have been used as a rich source of vitamin C, minerals, and amino acids. Several bioactive molecules were isolated and identified from FPE such as tannins, flavonoids, saponins, terpenoids, alkaloids, ascorbic acid etc. The in vitro and in vivo pharmacological studies on FPE revealed its antimicrobial, antioxidant, anti-inflammatory, anti-diabetic, anticancer, radioprotective, hepatoprotective, immunomodulatory, hypolipidemic, anti-venom, wound healing, HIV-reverse transcriptase effect. Toxicological studies on fruits indicated the absence of any adverse effect even at a high dose after oral administration.

Conclusions

Although FPE showed remarkable therapeutic activities against several diseases such as diabetes, cancer, inflammation, hepatitis B virus, and malaria, there were several drawbacks in some previous reports including the lack of information on the drug dose, standards, controls, and mechanism of action of the extract. Further in-depth studies are required to explain the mechanism of action of the extracts to reveal the role of the bioactive compounds in the reported activities.",2021-09-02 +34922949,Parallel Single-Cell Multiomics Analysis of Neonatal Skin Reveals the Transitional Fibroblast States that Restrict Differentiation into Distinct Fates.,"One of the keys to achieving skin regeneration lies within understanding the heterogeneity of neonatal fibroblasts, which support skin regeneration. However, the molecular underpinnings regulating the cellular states and fates of these cells are not fully understood. To investigate this, we performed a parallel multiomics analysis by processing neonatal murine skin for single-cell Assay for Transposase-Accessible Chromatin sequencing and single-cell RNA sequencing separately. Our approach revealed that fibroblast clusters could be sorted into papillary and reticular lineages on the basis of transcriptome profiling, as previously reported. However, single-cell Assay for Transposase-Accessible Chromatin sequencing analysis of neonatal fibroblast lineage markers, such as Dpp4/Cd26, Corin, and Dlk1 along with markers of myofibroblasts, revealed accessible chromatin in all fibroblast populations despite their lineage-specific transcriptome profiles. These results suggest that accessible chromatin does not always translate to gene expression and that many fibroblast lineage markers reflect a fibroblast state, which includes neonatal papillary fibroblasts, reticular fibroblasts, and myofibroblasts. This analysis also provides a possible explanation as to why these marker genes can be promiscuously expressed in different fibroblast populations under different conditions. Our single-cell Assay for Transposase-Accessible Chromatin sequencing analysis also revealed that the functional lineage restriction between dermal papilla and adipocyte fates is regulated by distinct chromatin landscapes. Finally, we have developed a webtool for our multiomics analysis: https://skinregeneration.org/scatacseq-and-scrnaseq-data-from-thompson-et-al-2021-2/.",2021-12-17 +31583635,Informed Use of Protein-Protein Interaction Data: A Focus on the Integrated Interactions Database (IID).,"Protein-protein interaction data is fundamental in molecular biology, and numerous online databases provide access to this data. However, the huge quantity, complexity, and variety of PPI data can be overwhelming, and rather than helping to address research problems, the data may add to their complexity and reduce interpretability. This protocol focuses on solutions for some of the main challenges of using PPI data, including accessing data, ensuring relevance by integrating useful annotations, and improving interpretability. While the issues are generic, we highlight how to perform such operations using Integrated Interactions Database (IID; http://ophid.utoronto.ca/iid ).",2020-01-01 +31972020,A highly contiguous genome assembly of the bat hawkmoth Hyles vespertilio (Lepidoptera: Sphingidae). ,"Adapted to different ecological niches, moth species belonging to the Hyles genus exhibit a spectacular diversity of larval color patterns. These species diverged ∼7.5 million years ago, making this rather young genus an interesting system to study a wide range of questions including the process of speciation, ecological adaptation, and adaptive radiation. Here we present a high-quality genome assembly of the bat hawkmoth Hyles vespertilio, the first reference genome of a member of the Hyles genus. We generated 51× Pacific Biosciences long reads with an average read length of 8.9 kb. Pacific Biosciences reads longer than 4 kb were assembled into contigs, resulting in a 651.4-Mb assembly consisting of 530 contigs with an N50 value of 7.5 Mb. The circular mitochondrial contig has a length of 15,303 bp. The H. vespertilio genome is very repeat-rich and exhibits a higher repeat content (50.3%) than other Bombycoidea species such as Bombyx mori (45.7%) and Manduca sexta (27.5%). We developed a comprehensive gene annotation workflow to obtain consensus gene models from different evidence including gene projections, protein homology, transcriptome data, and ab initio predictions. The resulting gene annotation is highly complete with 94.5% of BUSCO genes being completely present, which is higher than the BUSCO completeness of the B. mori (92.2%) and M. sexta (90%) annotations. Our gene annotation strategy has general applicability to other genomes, and the H. vespertilio genome provides a valuable molecular resource to study a range of questions in this genus, including phylogeny, incomplete lineage sorting, speciation, and hybridization. A genome browser displaying the genome, alignments, and annotations is available at https://genome-public.pks.mpg.de/cgi-bin/hgTracks?db=HLhylVes1.",2020-01-01 +29069466,MGA repository: a curated data resource for ChIP-seq and other genome annotated data.,"The Mass Genome Annotation (MGA) repository is a resource designed to store published next generation sequencing data and other genome annotation data (such as gene start sites, SNPs, etc.) in a completely standardised format. Each sample has undergone local processing in order the meet the strict MGA format requirements. The original data source, the reformatting procedure and the biological characteristics of the samples are described in an accompanying documentation file manually edited by data curators. 10 model organisms are currently represented: Homo sapiens, Mus musculus, Danio rerio, Drosophila melanogaster, Apis mellifera, Caenorhabditis elegans, Arabidopsis thaliana, Zea mays, Saccharomyces cerevisiae and Schizosaccharomyces pombe. As of today, the resource contains over 24 000 samples. In conjunction with other tools developed by our group (the ChIP-Seq and SSA servers), it allows users to carry out a great variety of analysis task with MGA samples, such as making aggregation plots and heat maps for selected genomic regions, finding peak regions, generating custom tracks for visualizing genomic features in a UCSC genome browser window, or downloading chromatin data in a table format suitable for local processing with more advanced statistical analysis software such as R. Home page: http://ccg.vital-it.ch/mga/.",2018-01-01 +33543751,Crypt4GH: a file format standard enabling native access to encrypted data. ,"The majority of genome analysis tools and pipelines require data to be decrypted for access. This potentially leaves sensitive genetic data exposed, either because the unencrypted data is not removed after analysis, or because the data leaves traces on the permanent storage medium. We defined a file container specification enabling direct byte-level compatible random access to encrypted genetic data stored in community standards such as SAM/BAM/CRAM/VCF/BCF. By standardizing this format, we show how it can be added as a native file format to genomic libraries, enabling direct analysis of encrypted data without the need to create a decrypted copy. The Crypt4GH specification can be found at: http://samtools.github.io/hts-specs/crypt4gh.pdf.",2021-02-05 +34734992,CSM-AB: graph-based antibody-antigen binding affinity prediction and docking scoring function. ,"Understanding antibody-antigen interactions is key to improving their binding affinities and specificities. While experimental approaches are fundamental for developing new therapeutics, computational methods can provide quick assessment of binding landscapes, guiding experimental design. Despite this, little effort has been devoted to accurately predicting the binding affinity between antibodies and antigens and to develop tailored docking scoring functions for this type of interaction. Here, we developed CSM-AB, a machine learning method capable of predicting antibody-antigen binding affinity by modelling interaction interfaces as graph-based signatures. CSM-AB outperformed alternative methods achieving a Pearson's correlation of up to 0.64 on blind tests. We also show CSM-AB can accurately rank near-native poses, working effectively as a docking scoring function. We believe CSM-AB will be an invaluable tool to assist in the development of new immunotherapies. CSM-AB is freely available as a user-friendly web interface and API at http://biosig.unimelb.edu.au/csm_ab. Supplementary data are available at Bioinformatics online.",2021-11-04 +34014278,3DBionotes COVID-19 Edition.,"

Summary

The web platform 3DBionotes-WS integrates multiple Web Services and an interactive Web Viewer to provide a unified environment in which biological annotations can be analyzed in their structural context. Since the COVID-19 outbreak, new structural data from many viral proteins have been provided at a very fast pace. This effort includes many cryogenic Electron Microscopy (cryo-EM) studies, together with more traditional ones (X-rays, NMR), using several modeling approaches and complemented with structural predictions. At the same time, a plethora of new genomics and interactomics information (including fragment screening and structure-based virtual screening efforts) have been made available from different servers. In this context we have developed 3DBionotes-COVID-19 as an answer to: (1) The need to explore multi-omics data in a unified context with a special focus on structural information and (2) the drive to incorporate quality measurements, especially in the form of advanced validation metrics for cryogenic Electron Microscopy.

Availability

https://3dbionotes.cnb.csic.es/ws/covid19.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-20 +33964130,GSpace: an exact coalescence simulator of recombining genomes under isolation by distance. ,"Simulation-based inference can bypass the limitations of statistical methods based on analytical approximations, but software allowing simulation of structured population genetic data without the classical n-coalescent approximations (such as those following from assuming large population size) are scarce or slow. We present GSpace, a simulator for genomic data, based on a generation-by-generation coalescence algorithm taking into account small population size, recombination, and isolation by distance. Freely available at site web INRAe (http://www1.montpellier.inra.fr/CBGP/software/gspace/download.html).",2021-05-08 +34559105,Computational study on novel natural inhibitors targeting c-MET.,"

Abstract

This study was designed to select ideal lead compounds and preclinical drug candidates http://dict.youdao.com/w/eng/preclinical_drug_candidate/javascript:void (0); with inhibitory effect on c-MET from the drug library (ZINC database).A battery of computer-aided virtual techniques was used to identify possible inhibitors of c-MET. A total of 17,931 ligands were screened from the ZINC15 database. LibDock is applied for structure-based screening followed by absorption, distribution, metabolic, and excretion, and toxicity prediction. Molecular docking was conducted to confirm the binding affinity mechanism between the ligand and c-MET. Molecular dynamics simulations were used to assess the stability of ligand-c-MET complexes.Two new natural compounds ZINC000005879645 and ZINC000002528509 were found to bind to c-MET in the ZINC database, showing higher binding affinity. In addition, they were predicted to have lower rodent carcinogenicity, Ames mutagenicity, developmental toxicity potential, and high tolerance to cytochrome P4502D6. Molecular dynamics simulation shows that ZINC000005879645 and ZINC000002528509 have more favorable potential energies with c-MET, which could exist stably in the natural environment.This study suggests that ZINC000005879645 and ZINC000002528509 are ideal latent inhibitors of c-MET targeting. As drug candidates, these 2 compounds have low cytotoxicity and hepatotoxicity as well as important implications for the design and improvement of c-MET target drugs.",2021-09-01 +34467754,Ligand Strain Energy in Large Library Docking.,"While small molecule internal strain is crucial to molecular docking, using it in evaluating ligand scores has remained elusive. Here, we investigate a technique that calculates strain using relative torsional populations in the Cambridge Structural Database, enabling fast precalculation of these energies. In retrospective studies of large docking screens of the dopamine D4 receptor and of AmpC β-lactamase, where close to 600 docking hits were tested experimentally, including such strain energies improved hit rates by preferentially reducing the ranks of strained high-scoring decoy molecules. In a 40-target subset of the DUD-E benchmark, we found two thresholds that usefully distinguished between ligands and decoys: one based on the total strain energy of the small molecules and another based on the maximum strain allowed for any given torsion within them. Using these criteria, about 75% of the benchmark targets had improved enrichment after strain filtering. Relying on precalculated population distributions, this approach is rapid, taking less than 0.04 s to evaluate a conformation on a standard core, making it pragmatic for precalculating strain in even ultralarge libraries. Since it is scoring function agnostic, it may be useful to multiple docking approaches; it is openly available at http://tldr.docking.org.",2021-09-01 +33876217,CanDriS: posterior profiling of cancer-driving sites based on two-component evolutionary model. ,"Current cancer genomics databases have accumulated millions of somatic mutations that remain to be further explored. Due to the over-excess mutations unrelated to cancer, the great challenge is to identify somatic mutations that are cancer-driven. Under the notion that carcinogenesis is a form of somatic-cell evolution, we developed a two-component mixture model: while the ground component corresponds to passenger mutations, the rapidly evolving component corresponds to driver mutations. Then, we implemented an empirical Bayesian procedure to calculate the posterior probability of a site being cancer-driven. Based on these, we developed a software CanDriS (Cancer Driver Sites) to profile the potential cancer-driving sites for thousands of tumor samples from the Cancer Genome Atlas and International Cancer Genome Consortium across tumor types and pan-cancer level. As a result, we identified that approximately 1% of the sites have posterior probabilities larger than 0.90 and listed potential cancer-wide and cancer-specific driver mutations. By comprehensively profiling all potential cancer-driving sites, CanDriS greatly enhances our ability to refine our knowledge of the genetic basis of cancer and might guide clinical medication in the upcoming era of precision medicine. The results were displayed in a database CandrisDB (http://biopharm.zju.edu.cn/candrisdb/).",2021-09-01 +33834201,A community-supported metaproteomic pipeline for improving peptide identifications in hydrothermal vent microbiota. ,"Microorganisms in deep-sea hydrothermal vents provide valuable insights into life under extreme conditions. Mass spectrometry-based proteomics has been widely used to identify protein expression and function. However, the metaproteomic studies in deep-sea microbiota have been constrained largely by the low identification rates of protein or peptide. To improve the efficiency of metaproteomics for hydrothermal vent microbiota, we firstly constructed a microbial gene database (HVentDB) based on 117 public metagenomic samples from hydrothermal vents and proposed a metaproteomic analysis strategy, which takes the advantages of not only the sample-matched metagenome, but also the metagenomic information released publicly in the community of hydrothermal vents. A two-stage false discovery rate method was followed up to control the risk of false positive. By applying our community-supported strategy to a hydrothermal vent sediment sample, about twice as many peptides were identified when compared with the ways against the sample-matched metagenome or the public reference database. In addition, more enriched and explainable taxonomic and functional profiles were detected by the HVentDB-based approach exclusively, as well as many important proteins involved in methane, amino acid, sugar, glycan metabolism and DNA repair, etc. The new metaproteomic analysis strategy will enhance our understanding of microbiota, including their lifestyles and metabolic capabilities in extreme environments. The database HVentDB is freely accessible from http://lilab.life.sjtu.edu.cn:8080/HventDB/main.html.",2021-09-01 +34604978,"Variants of SARS-CoV-2, their effects on infection, transmission and neutralization by vaccine-induced antibodies.","OBJECTIVE: The current study reviewed Severe Acute Respiratory Syndrome Coronavirus-2 (SARS-CoV-2) variants for their effects on infection, transmission and neutralization by vaccine-induced antibodies. MATERIALS AND METHODS: The research articles for the current study were searched over PubMed, Google Scholar, EMBASE and Web of Science online databases. The keywords used were: ((""SARS-CoV-2"" OR ""COVID-19"") AND (""mutation"" OR ""variant"") AND (""death"" OR ""hospitalization"" OR ""infection"" OR ""transmission"") AND (""antibody"" OR ""neutralize"" OR ""vaccine"")). A total of 333 research articles were retrieved through online-database search. These articles were further scrutinized for their relevancy. Additionally, searches were performed to find the latest relevant information over Google search engine and relevant news browsers. Finally, around 35 germane articles were considered for scripting the current report. RESULTS: The mutations have changed amino acids at key positions in spike protein viz. S477N, E484K, Q677H, E484Q, L452R, K417T, K417N and N501Y. These mutations are relevant for different characteristics and are present in newly evolved strains of SARS-CoV-2 like E484K in B.1.526, B.1.525, P.2, B.1.1.7, P.1 and B.1.351. Mutations have increased the immune escape potential leading to 3.5-6.5-folds decrease in neutralization of antibodies (Pfizer and Moderna vaccines). The variant, B.1.617 circulating in India and many other countries (double variant) having E484Q and L452R mutations, has raised the infection rate and decreased the neutralization capacity of the vaccine-induced antibodies. Deadly K417N+E484K+N501Y triplet mutations found in B.1.351 and P.1 have increased the transmission ability of these strains by 50% leading to greater COVID-19 hospitalization, ICU admissions and deaths. CONCLUSIONS: The new SARS-CoV-2 variants have compromised the neutralization potential of the currently used vaccines, but still, they have considerable efficacy to reduce infection and mortality.

Graphical abstract

https://www.europeanreview.org/wp/wp-content/uploads/Graphical_Abstract.jpg.",2021-09-01 +34591515,"""I am what I am: A meta-analysis of the association between substance user identities and substance use-related outcomes."" Correction to Montes and Pearson (2021).","Reports an error in ""I am what I am: A meta-analysis of the association between substance user identities and substance use-related outcomes"" by Kevin S. Montes and Matthew R. Pearson (Psychology of Addictive Behaviors, 2021[May], Vol 35[3], 231-246). In the article (https://doi.org/10.1037/adb0000721), the affiliation names for the authors were incorrect. The correct affiliation names are Department of Psychology, California State University, Dominguez Hills, for Kevin S. Montes; and Center on Alcohol, Substance Use, & Addictions, University of New Mexico, for Matthew R. Pearson. (The following abstract of the original article appeared in record 2021-33942-001.) Objective: Research indicates that a substance user identity (i.e., drinking, smoking, and marijuana identity) is positively correlated with substance use-related outcomes (e.g., frequency, quantity, consequences, and disorder symptoms). The current study aimed to meta-analytically derive single, weighted effect size estimates of the identity-outcome association as well as to examine moderators (e.g., substance use type, explicit/implicit assessment, demographic characteristics, and research design) of this association.

Method

Random effects meta-analysis was conducted on 70 unique samples that assessed substance user identity and at least one substance use-related outcome (frequency, quantity, consequences, and/or disorder symptoms), and provided the necessary information for effect size calculations.

Results

Substance user identity was found to be a statistically significant moderate-to-large correlate of all substance use-related outcomes examined in the current study (r w = .365, p < .001, rw² = .133). The strongest associations were observed between identity and disorder symptoms (alcohol) and frequency of substance use (tobacco or marijuana). In terms of moderators of the identity-outcome association, the link between explicit drinking identity and alcohol use-related outcomes appeared to be stronger in magnitude than the relationship between implicit drinking identity and alcohol use-related outcomes; however, this difference appears to be largely due to the finding that implicit measures have lower reliability. The strongest identity-outcome association was observed among younger individuals.

Conclusions

Substance user identity is clearly an important correlate of substance use-related outcomes and this association is stronger among younger individuals. Additional theoretical, empirical, and intervention research is needed to utilize knowledge gleaned from the current study on the identity-outcome association. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-09-01 +34607527,Transglutaminase 2 as a therapeutic target for neurological conditions.,"

Introduction

Transglutaminase 2 (TG2) has been implicated in numerous neurological conditions, including neurodegenerative diseases, multiple sclerosis, and CNS injury. Early studies on the role of TG2 in neurodegenerative conditions focused on its ability to 'crosslink' proteins into insoluble aggregates. However, more recent studies have suggested that this is unlikely to be the primary mechanism by which TG2 contributes to the pathogenic processes. Although the specific mechanisms by which TG2 is involved in neurological conditions have not been clearly defined, TG2 regulates numerous cellular processes through which it could contribute to a specific disease. Given the fact that TG2 is a stress-induced gene and elevated in disease or injury conditions, TG2 inhibitors may be useful neurotherapeutics.

Areas covered

Overview of TG2 and different TG2 inhibitors. A brief review of TG2 in neurodegenerative diseases, multiple sclerosis and CNS injury and inhibitors that have been tested in different models. Database search: https://pubmed.ncbi.nlm.nih.gov prior to 1 July 2021.

Expert opinion

Currently, it appears unlikely that inhibiting TG2 in the context of neurodegenerative diseases would be therapeutically advantageous. However, for multiple sclerosis and CNS injuries, TG2 inhibitors may have the potential to be therapeutically useful and thus there is rationale for their further development.",2021-09-01 +34539007,Association of HScore Parameters with Severe COVID-19: A Systematic Review and Meta-Analysis.,"

Background

Several reports have associated the severe Coronavirus disease-2019 (sCOVID-19) with secondary-hemophagocytic lymphohistiocytosis (sHLH) and proposed utilizing the hemophagocytic syndrome diagnostic score (HScore) for sCOVID-19 patients. We conducted a systematic review and meta-analysis to find the possible association of HScore parameters with severity in COVID-19 patients.

Methods

A systematic search was performed in Medline via PubMed, EMBASE, and Cochrane databases using all HScore and COVID-19 keywords. The studies were all from 2020, and the study language was limited to English. The records were screened based on inclusion/exclusion criteria. Random/fixed-effect models were employed for meta-analysis, based on the I2 index of parameters. The pooled mean differences were estimated for continuous parameters. The pooled odds-ratio was estimated for fever. The level of significance was set at 0.05.

Results

Eighteen studies (comprising 2459 patients) out of 26151 screened studies were included in this meta-analysis. The results showed that the level of leukocyte, neutrophil, aspartate transaminase (AST), ferritin, and fibrinogen were significantly higher in sCOVID-19 patients than in non-severe ones. Significant lower levels of lymphocyte, platelet, and hemoglobin were also found in sCOVID-19 patients than non-severe patients. Fever was nearly associated with two times increased odds of sCOVID-19 (P=0.051).

Conclusion

Lymphopenia, thrombocytopenia, hypohemoglobinemia, hyperferritinemia, high levels of AST, and fever are common features of both sCOVID-19 and HLH. However, the leukocytosis, neutrophilia, and hyperfibrinogenemia found in sCOVID-19 are in contrast with HScore. Conclusively, HScore parameters could be risk factors for sCOVID-19. However, some parameters' roles are contradictory, suggesting the need for further investigation and a new way of HScore interpretation in sCOVID-19 patients.A preprint of this study was published at https://www.researchsquare.com/article/rs-54490/v2.",2021-09-01 +32071071,BlobToolKit - Interactive Quality Assessment of Genome Assemblies.,"Reconstruction of target genomes from sequence data produced by instruments that are agnostic as to the species-of-origin may be confounded by contaminant DNA. Whether introduced during sample processing or through co-extraction alongside the target DNA, if insufficient care is taken during the assembly process, the final assembled genome may be a mixture of data from several species. Such assemblies can confound sequence-based biological inference and, when deposited in public databases, may be included in downstream analyses by users unaware of underlying problems. We present BlobToolKit, a software suite to aid researchers in identifying and isolating non-target data in draft and publicly available genome assemblies. BlobToolKit can be used to process assembly, read and analysis files for fully reproducible interactive exploration in the browser-based Viewer. BlobToolKit can be used during assembly to filter non-target DNA, helping researchers produce assemblies with high biological credibility. We have been running an automated BlobToolKit pipeline on eukaryotic assemblies publicly available in the International Nucleotide Sequence Data Collaboration and are making the results available through a public instance of the Viewer at https://blobtoolkit.genomehubs.org/view We aim to complete analysis of all publicly available genomes and then maintain currency with the flow of new genomes. We have worked to embed these views into the presentation of genome assemblies at the European Nucleotide Archive, providing an indication of assembly quality alongside the public record with links out to allow full exploration in the Viewer.",2020-04-09 +31290936,MemBlob database and server for identifying transmembrane regions using cryo-EM maps.,"

Summary

The identification of transmembrane helices in transmembrane proteins is crucial, not only to understand their mechanism of action but also to develop new therapies. While experimental data on the boundaries of membrane-embedded regions are sparse, this information is present in cryo-electron microscopy (cryo-EM) density maps and it has not been utilized yet for determining membrane regions. We developed a computational pipeline, where the inputs of a cryo-EM map, the corresponding atomistic structure, and the potential bilayer orientation determined by TMDET algorithm of a given protein result in an output defining the residues assigned to the bulk water phase, lipid interface and the lipid hydrophobic core. Based on this method, we built a database involving published cryo-EM protein structures and a server to be able to compute this data for newly obtained structures.

Availability and implementation

http://memblob.hegelab.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +33985424,BoardION: real-time monitoring of Oxford Nanopore sequencing instruments.,"

Background

One of the main advantages of the Oxford Nanopore Technology (ONT) is the possibility of real-time sequencing. This gives access to information during the experiment and allows either to control the sequencing or to stop the sequencing once the results have been obtained. However, the ONT sequencing interface is not sufficient to explore the quality of sequencing data in depth and existing quality control tools do not take full advantage of real-time data streaming.

Results

Herein, we present BoardION, an interactive web application to analyze the efficiency of ONT sequencing runs. The interactive interface of BoardION allows users to easily explore sequencing metrics and optimize the quantity and the quality of the data generated during the experiment. It also enables the comparison of multiple flowcells to assess library preparation protocols or the quality of input samples.

Conclusion

BoardION is dedicated to people who manage ONT sequencing instruments and allows them to remotely and in real time monitor their experiments and compare multiple sequencing runs. Source code, a Docker image and a demo version are available at http://www.genoscope.cns.fr/boardion/ .",2021-05-13 +30689843,ccPDB 2.0: an updated version of datasets created and compiled from Protein Data Bank. ,"ccPDB 2.0 (http://webs.iiitd.edu.in/raghava/ccpdb) is an updated version of the manually curated database ccPDB that maintains datasets required for developing methods to predict the structure and function of proteins. The number of datasets compiled from literature increased from 45 to 141 in ccPDB 2.0. Similarly, the number of protein structures used for creating datasets also increased from ~74 000 to ~137 000 (PDB March 2018 release). ccPDB 2.0 provides the same web services and flexible tools which were present in the previous version of the database. In the updated version, links of the number of methods developed in the past few years have also been incorporated. This updated resource is built on responsive templates which is compatible with smartphones (mobile, iPhone, iPad, tablets etc.) and large screen gadgets. In summary, ccPDB 2.0 is a user-friendly web-based platform that provides comprehensive as well as updated information about datasets.",2019-01-01 +34085586,A Decade of Drinking: Temporal Trends in Apparent Household Beer Intake and Standard Drink Consumption in the United States.,"Beer remains the greatest source of per capita alcohol consumption in the United States, and increasing market availability and consumer demand for higher alcohol has meaningful public health consequences. Objectives: To determine whether apparent alcohol intake from beer changed among households over time, we used nationally-representative US Nielsen Consumer Panel purchasing data from 2004 to 2014, and incorporated information on percent alcohol by volume (ABV) to compute the number of standard drinks of alcohol consumed from beer as a result. Methods: We queried external data sources (e.g. official manufacture, consumer beer-related websites) to obtain beer-specific ABVs, merged this information with Nielsen consumer-level data, and calculated the average rate of beer and standard drink consumption per household per year. We used joinpoint regression to estimate annual percentage changes and annual absolute changes in intake over time, with separate piecewise linear segments fit between years if a significant deviation in trend was detected. Results: Higher alcohol content beer consumption increased steadily across the decade, accounting for 9.6% of total intake in 2004 compared to 21.6% of total intake by 2014. Standard drink intake from beer declined sharply post-2011 by 3.04% annually (95% CI: -5.93, -0.06) or by 4.52 standard drinks (95% CI: -8.69, -0.35) yearly - coinciding with several beer industry transitions, market share fluctuations, and consumer preference changes for beer occurring around that time. Conclusions: Despite consistent increases in higher alcohol content beer intake across the decade, households do not appear to be consuming more standard drinks of alcohol from beer as a result.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1928208 .",2021-06-04 +32657416,MutCombinator: identification of mutated peptides allowing combinatorial mutations using nucleotide-based graph search.,"

Motivation

Proteogenomics has proven its utility by integrating genomics and proteomics. Typical approaches use data from next-generation sequencing to infer proteins expressed. A sample-specific protein sequence database is often adopted to identify novel peptides from matched mass spectrometry-based proteomics; nevertheless, there is no software that can practically identify all possible forms of mutated peptides suggested by various genomic information sources.

Results

We propose MutCombinator, which enables us to practically identify mutated peptides from tandem mass spectra allowing combinatorial mutations during the database search. It uses an upgraded version of a variant graph, keeping track of frame information. The variant graph is indexed by nine nucleotides for fast access. Using MutCombinator, we could identify more mutated peptides than previous methods, because combinations of point mutations are considered and also because it can be practically applied together with a large mutation database such as COSMIC. Furthermore, MutCombinator supports in-frame search for coding regions and three-frame search for non-coding regions.

Availability and implementation

https://prix.hanyang.ac.kr/download/mutcombinator.jsp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-07-01 +31588509,MutEx: a multifaceted gateway for exploring integrative pan-cancer genomic data.,"Somatic mutation and gene expression dysregulation are considered two major tumorigenesis factors. While independent investigations of either factor pervade, studies of associations between somatic mutations and gene expression changes have been sporadic and nonsystematic. Utilizing genomic data collected from 11 315 subjects of 33 distinct cancer types, we constructed MutEx, a pan-cancer integrative genomic database. This database records the relationships among gene expression, somatic mutation and survival data for cancer patients. MutEx can be used to swiftly explore the relationship between these genomic/clinic features within and across cancer types and, more importantly, search for corroborating evidence for hypothesis inception. Our database also incorporated Gene Ontology and several pathway databases to enhance functional annotation, and elastic net and a gene expression composite score to aid in survival analysis. To demonstrate the usability of MutEx, we provide several application examples, including top somatic mutations associated with the most extensive expression dysregulation in breast cancer, differential mutational burden downstream of DNA mismatch repair gene mutations and composite gene expression score-based survival difference in breast cancer. MutEx can be accessed at http://www.innovebioinfo.com/Databases/Mutationdb_About.php.",2020-07-01 +30805385,Cloning and Expression Analysis of ZmERD3 Gene From Zea mays.,"

Background

Stresses (such as drought, salt, viruses, and others) seriously affect plant productivity. To cope with these threats, plants express a large number of genes, including several members of ERD (early responsive to dehydration) genes to synthesize and assemble adaptive molecules. But, the function of ERD3 gene hasn't been known so far.

Objectives

The purpose of the present study was to clone the stress-resistance gene: ZmERD3, and to analyze its expression pattern in the maize plant organs at different stages and under various stress treatments.

Materials and methods

MaizeGDB database search together with the bioinformatics analysis led to the identification of ZmERD3 gene in Zea mays. The cDNA sequence and promoter of ZmERD3 gene were obtained through PCR. Bioinformatics analysis was performed through online tools. The tissue-specific expression profile of the ZmERD3 gene in maize plant was carried out using the quantitative real time PCR (qRT-PCR) technique and its expression pattern in response to stress treatments (such as PEG, NaCl, ABA, and low temperature) was also analyzed through qRT-PCR method.

Results

Based on the homology alignment with AtERD3 (XP_002867953) in MaizeGDB (http://www. maizegdb.org/), the cDNA sequence and promoter region of the ZmERD3 gene were obtained. The bioinformatic analysis showed that ZmERD3 protein has one specific hit of methyltransferase and a high probability of location in the cytoplasm, and there are many cis-regulatory elements responsive to light, heat, cold, dehydration, as well as other stresses in its promoter sequence. Expression analysis revealed that the amount of ZmERD3 mRNA is different in all indicated organs of the maize plant. In addition, the ZmERD3 expression could be induced by abiotic stress treatments. Compared to the control, treatment with NaCl or PEG-6000 could significantly enhance the expression ability of ZmERD3 gene. As well, its expression level was increased about 20 times above the control after exposure to NaCl and PEG-6000 treatments for 3-6 h.

Conclusions

One putative methyltransferase gene, ZmERD3 was cloned. ZmERD3 expression exhibited an obvious tissue-specificity, and its expression could make a significant response to NaCl and PEG-6000 treatments.",2018-05-15 +34329304,pyProGA-A PyMOL plugin for protein residue network analysis.,"The field of protein residue network (PRN) research has brought several useful methods and techniques for structural analysis of proteins and protein complexes. Many of these are ripe and ready to be used by the proteomics community outside of the PRN specialists. In this paper we present software which collects an ensemble of (network) methods tailored towards the analysis of protein-protein interactions (PPI) and/or interactions of proteins with ligands of other type, e.g. nucleic acids, oligosaccharides etc. In parallel, we propose the use of the network differential analysis as a method to identify residues mediating key interactions between proteins. We use a model system, to show that in combination with other, already published methods, also included in pyProGA, it can be used to make such predictions. Such extended repertoire of methods allows to cross-check predictions with other methods as well, as we show here. In addition, the possibility to construct PRN models from various kinds of input is so far a unique asset of our code. One can use structural data as defined in PDB files and/or from data on residue pair interaction energies, either from force-field parameters or fragment molecular orbital (FMO) calculations. pyProGA is a free open-source software available from https://gitlab.com/Vlado_S/pyproga.",2021-07-30 +30380072,LncRNA2Target v2.0: a comprehensive database for target genes of lncRNAs in human and mouse.,"Long non-coding RNAs (lncRNAs) play crucial roles in regulating gene expression, and a growing number of researchers have focused on the identification of target genes of lncRNAs. However, no online repository is available to collect the information on target genes regulated by lncRNAs. To make it convenient for researchers to know what genes are regulated by a lncRNA of interest, we developed a database named lncRNA2Target to provide a comprehensive resource of lncRNA target genes in 2015. To update the database this year, we retrieved all new lncRNA-target relationships from papers published from 1 August 2014 to 30 April 2018 and RNA-seq datasets before and after knockdown or overexpression of a specific lncRNA. LncRNA2Target database v2.0 provides a web interface through which its users can search for the targets of a particular lncRNA or for the lncRNAs that target a particular gene, and is freely accessible at http://123.59.132.21/lncrna2target.",2019-01-01 +30357587,The Japan Monkey Centre Primates Brain Imaging Repository for comparative neuroscience: an archive of digital records including records for endangered species.,"Advances in magnetic resonance imaging (MRI) and computational analysis technology have enabled comparisons among various primate brains in a three-dimensional electronic format. Results from comparative studies provide information about common features across primates and species-specific features of neuroanatomy. Investigation of various species of non-human primates is important for understanding such features, but the majority of comparative MRI studies have been based on experimental primates, such as common marmoset, macaques, and chimpanzee. A major obstacle has been the lack of a database that includes non-experimental primates' brain MRIs. To facilitate scientific discoveries in the field of comparative neuroanatomy and brain evolution, we launched a collaborative project to develop an open-resource repository of non-human primate brain images obtained using ex vivo MRI. As an initial open resource, here we release a collection of structural MRI and diffusion tensor images obtained from 12 species: pygmy marmoset, owl monkey, white-fronted capuchin, crab-eating macaque, Japanese macaque, bonnet macaque, toque macaque, Sykes' monkey, red-tailed monkey, Schmidt's guenon, de Brazza's guenon, and lar gibbon. Sixteen postmortem brain samples from the 12 species, stored in the Japan Monkey Centre (JMC), were scanned using a 9.4-T MRI scanner and made available through the JMC collaborative research program ( http://www.j-monkey.jp/BIR/index_e.html ). The expected significant contributions of the JMC Primates Brain Imaging Repository include (1) resources for comparative neuroscience research, (2) preservation of various primate brains, including those of endangered species, in a permanent digital form, (3) resources with higher resolution for identifying neuroanatomical features, compared to previous MRI atlases, (4) resources for optimizing methods of scanning large fixed brains, and (5) references for veterinary neuroradiology. User-initiated research projects beyond these contributions are also anticipated.",2018-10-24 +31504214,CroP-Coordinated Panel visualization for biological networks analysis.,"SUMMARY:CroP is a data visualization application that focuses on the analysis of relational data that changes over time. While it was specifically designed for addressing the preeminent need to interpret large scale time series from gene expression studies, CroP is prepared to analyze datasets from multiple contexts. Multiple datasets can be uploaded simultaneously and viewed through dynamic visualization models, which are contained within flexible panels that allow users to adapt the workspace to their data. Through clustering and the time curve visualization it is possible to quickly identify groups of data points with similar proprieties or behaviors, as well as temporal patterns across all points, such as periodic waves of expression. Additionally, it integrates a public biomedical database for gene annotation. CroP will be of major interest to biologists who seek to extract relations from complex sets of data. AVAILABILITY AND IMPLEMENTATION:CroP is freely available for download as an executable jar at https://cdv.dei.uc.pt/crop/.",2020-02-01 +34878869,"Effects of Government-Implemented Cash Plus Model on Violence Experiences and Perpetration Among Adolescents in Tanzania, 2018‒2019.","Objectives. To examine the impacts of a government-implemented cash plus program on violence experiences and perpetration among Tanzanian adolescents. Methods. We used data from a cluster randomized controlled trial (n = 130 communities) conducted in the Mbeya and Iringa regions of Tanzania to isolate impacts of the ""plus"" components of the cash plus intervention. The panel sample comprised 904 adolescents aged 14 to 19 years living in households receiving a government cash transfer. We estimated intent-to-treat impacts on violence experiences, violence perpetration, and pathways of impact. Results. The plus intervention reduced female participants' experiences of sexual violence by 5 percentage points and male participants' perpetration of physical violence by 6 percentage points. There were no intervention impacts on emotional violence, physical violence, or help seeking. Examining pathways, we found positive impacts on self-esteem and participation in livestock tending and, among female participants, a positive impact on sexual debut delays and a negative effect on school attendance. Conclusions. By addressing poverty and multidimensional vulnerability, integrated social protection can reduce violence. Public Health Implications. There is high potential for scale-up and sustainability, and this program reaches some of the most vulnerable and marginalized adolescents. (Am J Public Health. 2021;111(12):2227-2238. https://doi.org/10.2105/AJPH.2021.306509).",2021-12-01 +33056713,"Misidentification of Bellator gymnostethus (Gilbert, 1892) as Prionotus ruscarius Gilbert amp; Starks, 1904 (Scorpaeniformes: Triglidae).","The checklist by Robertson et al. (2017) of fishes from the tropical eastern Pacific included information on three members of the family Triglidae: Bellator loxias (Jordan, 1897), Prionotus ruscarius and P. stephanophrys Lockington, 1881. Unfortunately, the identification of four specimens as P. ruscarius is incorrect, as they are Bellator gymnostethus. We thank Benjamin Victor for bringing these misidentifications to our notice through his work with mtDNA sequence data from the Barcode of Life Database (BOLD: http://www.boldsystems.org). The photographs of the four specimens on the BOLD website clearly depict a Bellator species rather than a Prionotus. However, the photograph in the 2017 paper (Figure 75, page 78), is correctly identified and labelled as P. ruscarius, and this species was collected on the cruise of the Miguel Oliver discussed in Robertson et al. (2017), see Benavides Moreno et al. (2019). This correction brings the number of triglids collected on that cruise to four species.",2020-09-14 +29220077,Using the Arabidopsis Information Resource (TAIR) to Find Information About Arabidopsis Genes.,"The Arabidopsis Information Resource (TAIR; http://arabidopsis.org) is a comprehensive Web resource of Arabidopsis biology for plant scientists. TAIR curates and integrates information about genes, proteins, gene function, orthologs, gene expression, mutant phenotypes, biological materials such as clones and seed stocks, genetic markers, genetic and physical maps, genome organization, images of mutant plants, protein sub-cellular localizations, publications, and the research community. The various data types are extensively interconnected and can be accessed through a variety of Web-based search and display tools. This unit primarily focuses on some basic methods for searching, browsing, visualizing, and analyzing information about Arabidopsis genes and genome. Additionally, we describe how members of the community can share data using TAIR's Online Annotation Submission Tool (TOAST), in order to make their published research more accessible and visible. © 2017 by John Wiley & Sons, Inc.",2017-12-08 +34624332,3D-cardiomics: A spatial transcriptional atlas of the mammalian heart.,"Understanding the spatial gene expression and regulation in the heart is key to uncovering its developmental and physiological processes, during homeostasis and disease. Numerous techniques exist to gain gene expression and regulation information in organs such as the heart, but few utilize intuitive true-to-life three-dimensional representations to analyze and visualise results. Here we combined transcriptomics with 3D-modelling to interrogate spatial gene expression in the mammalian heart. For this, we microdissected and sequenced transcriptome-wide 18 anatomical sections of the adult mouse heart. Our study has unveiled known and novel genes that display complex spatial expression in the heart sub-compartments. We have also created 3D-cardiomics, an interface for spatial transcriptome analysis and visualization that allows the easy exploration of these data in a 3D model of the heart. 3D-cardiomics is accessible from http://3d-cardiomics.erc.monash.edu/.",2021-10-05 +33216126,eMPRess: a systematic cophylogeny reconciliation tool.,"

Summary

We describe eMPRess, a software program for phylogenetic tree reconciliation under the duplication-transfer-loss model that systematically addresses the problems of choosing event costs and selecting representative solutions, enabling users to make more robust inferences.

Availability and implementation

eMPRess is freely available at http://www.cs.hmc.edu/empress.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-08-01 +30010738,HRPDviewer: human ribosome profiling data viewer. ,"Translational regulation plays an important role in protein synthesis. Dysregulation of translation causes abnormal cell physiology and leads to diseases such as inflammatory disorders and cancers. An emerging technique, called ribosome profiling (ribo-seq), was developed to capture a snapshot of translation. It is based on deep sequencing of ribosome-protected mRNA fragments. A lot of ribo-seq data have been generated in various studies, so databases are needed for depositing and visualizing the published ribo-seq data. Nowadays, GWIPS-viz, RPFdb and TranslatomeDB are the three largest databases developed for this purpose. However, two challenges remain to be addressed. First, GWIPS-viz and RPFdb databases align the published ribo-seq data to the genome. Since ribo-seq data aim to reveal the actively translated mRNA transcripts, there are advantages of aligning ribo-req data to the transcriptome over the genome. Second, TranslatomeDB does not provide any visualization and the other two databases only provide visualization of the ribo-seq data around a specific genomic location, while simultaneous visualization of the ribo-seq data on multiple mRNA transcripts produced from the same gene or different genes is desired. To address these two challenges, we developed the Human Ribosome Profiling Data viewer (HRPDviewer). HRPDviewer (i) contains 610 published human ribo-seq datasets from Gene Expression Omnibus, (ii) aligns the ribo-seq data to the transcriptome and (iii) provides visualization of the ribo-seq data on the selected mRNA transcripts. Using HRPDviewer, researchers can compare the ribosome binding patterns of multiple mRNA transcripts from the same gene or different genes to gain an accurate understanding of protein synthesis in human cells. We believe that HRPDviewer is a useful resource for researchers to study translational regulation in human.Database URL: http://cosbi4.ee.ncku.edu.tw/HRPDviewer/ or http://cosbi5.ee.ncku.edu.tw/HRPDviewer/.",2018-01-01 +34818340,The prevalence of patellofemoral pain in the Rugby League World Cup (RLWC) 2021 spectators: A protocol of a cross-sectional study.,"Patellofemoral pain (PFP) can cause significant pain leading to limitations in societal participation and physical activity. PFP is usually associated with athletes undergoing intensive physical training, or military recruits; but recent evidence shows that PFP is common in the general population. The relationship of PFP with physical activity is not entirely clear. Our aim is to provide a better estimate of the general population prevalence of PFP and to relate this to the level of physical activity, and demographic characteristics. The Survey instrument for Natural history, Aetiology and Prevalence of Patellofemoral pain Studies (SNAPPS) was developed as a PFP screening tool to be used in the community. The electronic version of the SNAPPS (eSNAPPS) has recently been validated and was used to survey attendees at mass-participation running events. We will use an electronic survey to collect data from a sample of 1100 Rugby League World Cup spectators. The survey will have four sections: i) general and demographic; ii) knee pain (eSNAPPS); iii) level of physical activity; and iv) quality of life in relation to knee pain. The primary analytic approach will be descriptive of PFP prevalence. Secondary analyses will explore the relationships of the presence of PFP and the other variables. We will disseminate this work by publication of peer-reviewed papers in scientific journals, presentations at scientific conferences, and on the dedicated SNAPPS website https://www.snappspfp.com/.",2021-11-24 +34915178,ACUTE TRANSCRANIAL DIRECT CURRENT STIMULATION (tDCS) IMPROVES VENTILATORY VARIABILITY AND AUTONOMIC MODULATION IN RESISTANT HYPERTENSIVE PATIENTS.,"Here, we assessed the impact of one session of transcranial direct current stimulation (tDCS) or SHAM (20 min, each) on ventilatory responses to cardiopulmonary exercise test, central and peripheral blood pressure (BP), and autonomic modulation in resistant hypertensive (RHT) patients. RHT subjects (n = 13) were randomly submitted to SHAM and tDCS crossing sessions (1 week of ""washout""). Patients and a technician who set the tDCS/Sham room up were both blind. After brain stimulation, patients were submitted to a cardiopulmonary exercise test to evaluate ventilatory and cardiovascular response to exercise. Hemodynamic (Finometer®, Beatscope), and autonomic variables were measured at baseline (before tDCS/Sham) and after incremental exercise. RESULTS: Our study shows that tDCS condition improved heart rate recovery, VO2 peak, and vagal modulation (after cardiopulmonary exercise test); attenuated the ventilatory variability response, central and peripheral blood pressure well as sympathetic modulation (after cardiopulmonary exercise test) in comparison with SHAM. These data suggest that acute tDCS sessions prevented oscillatory ventilation behavior during the cardiopulmonary exercise test and mitigated the increase of systolic blood pressure in RHT patients. After the exercise test, tDCS promotes better vagal reentry and improved autonomic modulation, possibly reducing central blood pressure and aortic augmentation index compared to SHAM. Brazilian Registry of Clinical Trials (ReBEC): https://ensaiosclinicos.gov.br/rg/RBR-8n7c9p.",2021-12-13 +32096817,DIMERBOW: exploring possible GPCR dimer interfaces.,"MOTIVATION:G protein-coupled receptors (GPCRs) can form homo-, heterodimers and larger order oligomers that exert different functions than monomers. The pharmacological potential of such complexes is hampered by the limited information available on the type of complex formed and its quaternary structure. Several GPCR structures in the Protein Data Bank display crystallographic interfaces potentially compatible with physiological interactions. RESULTS:Here, we present DIMERBOW, a database and web application aimed to visually browse the complete repertoire of potential GPCR dimers present in solved structures. The tool is suited to help finding the best possible structural template to model GPCR homomers. AVAILABILITY AND IMPLEMENTATION:DIMERBOW is available at http://lmc.uab.es/dimerbow/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +34692951,"Additional data and experimental setups, for a comparative study of alloys in contact to eutectic melts for thermal storage.","Three different eutectic salt mixtures have been brought into contact with three different high temperature alloys to assess corrosion damages for next-generation CSPs. This article contains additional material to support findings and assessments reported on our main article in the Solar Energy Journal [https://doi.org/10.1016/j.solener.2021.06.069]. Five sections, A-E, provide data to ensure reproducibility and confidence in our claims in the main article. A newly designed experimental setup for high temperature exposures is described as well as impurities within used chemicals. Material thickness measurements document alloy consumption by eutectic salts. Reaction enthalpies are listed illustrating individual metal species in contact with salt species at relevant temperatures. Thermodynamic single point equilibrium calculations have extended environmentally induced Laves phase precipitation found for alloy Kanthal APMT in contact with molten chlorides.",2021-10-04 +,Genetic variability of Prunus padus (Rosaceae) elaborates “a new Eurasian phylogeographical paradigm”,"The extent of glacial survival of woody plants in temperate Asia is still poorly known. A reliable way to clarify this issue in the absence of sufficient paleontological data is a phylogeographic analysis of contemporary populations. A recent study of Juniperus communis genetic diversity in Eurasia suggested that this species with wide ecological tolerance survived the glaciation in many periglacial microrefugia at high latitudes and subsequently spread to new areas during interglacials (Hantemirova et al. in J Biogeogr 44:271–282, 2017. https://doi.org/10.1111/jbi.12867). This pattern was termed a “new Eurasian phylogeographical paradigm” as opposed to survival in few major refugia. We have tested the proposed “paradigm” with another hardy species with wide Eurasian area, Prunus padus, to find out if any general phylogeographic patterns may exist for cold-tolerant Eurasian arboreal plant species. We interpret the observed genetic structure [nuclear (ITS) and plastid DNA] of the Eurasian populations of P. padus as plausibly resulted from at least two cycles of glacial survivals in refugia followed by post-glacial colonization events. The species likely originated in East Asia and subsequently spread across all Eurasia. Its continuous range had been fragmented by early-Pleistocene glaciations, when the species survived in the Caucasian and Far Eastern refugia as well as in northern periglacial microrefugia with an active gene flow between them. The known major glacial refugia, such as Iberian Peninsula, the Colchis, the Southern Urals, and the Beringia, played little role as a source of the species post-glacial expansion.",2020-02-01 +,Fruit detection and 3D location using instance segmentation neural networks and structure-from-motion photogrammetry,"The development of remote fruit detection systems able to identify and 3D locate fruits provides opportunities to improve the efficiency of agriculture management. Most of the current fruit detection systems are based on 2D image analysis. Although the use of 3D sensors is emerging, precise 3D fruit location is still a pending issue. This work presents a new methodology for fruit detection and 3D location consisting of: (1) 2D fruit detection and segmentation using Mask R-CNN instance segmentation neural network; (2) 3D point cloud generation of detected apples using structure-from-motion (SfM) photogrammetry; (3) projection of 2D image detections onto 3D space; (4) false positives removal using a trained support vector machine. This methodology was tested on 11 Fuji apple trees containing a total of 1455 apples. Results showed that, by combining instance segmentation with SfM the system performance increased from an F1-score of 0.816 (2D fruit detection) to 0.881 (3D fruit detection and location) with respect to the total amount of fruits. The main advantages of this methodology are the reduced number of false positives and the higher detection rate, while the main disadvantage is the high processing time required for SfM, which makes it presently unsuitable for real-time work. From these results, it can be concluded that the combination of instance segmentation and SfM provides high performance fruit detection with high 3D data precision. The dataset has been made publicly available and an interactive visualization of fruit detection results is accessible at http://www.grap.udl.cat/documents/photogrammetry_fruit_detection.html.",2020-02-01 +34398224,DeepSec: a deep learning framework for secreted protein discovery in human body fluids. ,"Human proteins that are secreted into different body fluids from various cells can be promising disease indicators. Modern proteomics research empowered by both qualitative and quantitative profiling techniques has made great progress in protein discovery in various human fluids. However, due to the large numbers of proteins and diverse modifications present in the fluids, as well as the existing technical limits of major proteomics platforms (e.g., mass spectrometry), large discrepancies are often generated from different experimental studies. As a result, a comprehensive proteomics landscape across major human fluids are not well determined. To facilitate this process, we have developed a deep learning framework, named DeepSec, to identify secreted proteins in twelve types of human body fluids. DeepSec adopts an end-to-end sequence-based approach, where a Convolutional Neural Network (CNN) is built to learn the abstract sequence features followed by a Bidirectional Gated Recurrent Unit (BGRU) with fully connected layer for protein classification. DeepSec has demonstrated promising performances with average AUCs of 0.85-0.94 on testing datasets in each type of fluids, which outperforms existing state-of-the-art methods available mostly on blood proteins. As an illustration of how to apply DeepSec in biomarker discovery research, we conducted a case study on kidney cancer by using genomics data from the cancer genome atlas (TCGA) and have identified 104 possible marker proteins. DeepSec is available at https://bmbl.bmi.osumc.edu/deepsec/. Supplement ary data are available at Bioinformatics online.",2021-08-16 +34452955,International initiative for a curated SDHB variant database improving the diagnosis of hereditary paraganglioma and pheochromocytoma.,"

Background

SDHB is one of the major genes predisposing to paraganglioma/pheochromocytoma (PPGL). Identifying pathogenic SDHB variants in patients with PPGL is essential to the management of patients and relatives due to the increased risk of recurrences, metastases and the emergence of non-PPGL tumours. In this context, the 'NGS and PPGL (NGSnPPGL) Study Group' initiated an international effort to collect, annotate and classify SDHB variants and to provide an accurate, expert-curated and freely available SDHB variant database.

Methods

A total of 223 distinct SDHB variants from 737 patients were collected worldwide. Using multiple criteria, each variant was first classified according to a 5-tier grouping based on American College of Medical Genetics and NGSnPPGL standardised recommendations and was then manually reviewed by a panel of experts in the field.

Results

This multistep process resulted in 23 benign/likely benign, 149 pathogenic/likely pathogenic variants and 51 variants of unknown significance (VUS). Expert curation reduced by half the number of variants initially classified as VUS. Variant classifications are publicly accessible via the Leiden Open Variation Database system (https://databases.lovd.nl/shared/genes/SDHB).

Conclusion

This international initiative by a panel of experts allowed us to establish a consensus classification for 223 SDHB variants that should be used as a routine tool by geneticists in charge of PPGL laboratory diagnosis. This accurate classification of SDHB genetic variants will help to clarify the diagnosis of hereditary PPGL and to improve the clinical care of patients and relatives with PPGL.",2021-08-27 +30395331,The Gene Ontology Resource: 20 years and still GOing strong.,"The Gene Ontology resource (GO; http://geneontology.org) provides structured, computable knowledge regarding the functions of genes and gene products. Founded in 1998, GO has become widely adopted in the life sciences, and its contents are under continual improvement, both in quantity and in quality. Here, we report the major developments of the GO resource during the past two years. Each monthly release of the GO resource is now packaged and given a unique identifier (DOI), enabling GO-based analyses on a specific release to be reproduced in the future. The molecular function ontology has been refactored to better represent the overall activities of gene products, with a focus on transcription regulator activities. Quality assurance efforts have been ramped up to address potentially out-of-date or inaccurate annotations. New evidence codes for high-throughput experiments now enable users to filter out annotations obtained from these sources. GO-CAM, a new framework for representing gene function that is more expressive than standard GO annotations, has been released, and users can now explore the growing repository of these models. We also provide the 'GO ribbon' widget for visualizing GO annotations to a gene; the widget can be easily embedded in any web page.",2019-01-01 +31095607,CiliaCarta: An integrated and validated compendium of ciliary genes.,"The cilium is an essential organelle at the surface of mammalian cells whose dysfunction causes a wide range of genetic diseases collectively called ciliopathies. The current rate at which new ciliopathy genes are identified suggests that many ciliary components remain undiscovered. We generated and rigorously analyzed genomic, proteomic, transcriptomic and evolutionary data and systematically integrated these using Bayesian statistics into a predictive score for ciliary function. This resulted in 285 candidate ciliary genes. We generated independent experimental evidence of ciliary associations for 24 out of 36 analyzed candidate proteins using multiple cell and animal model systems (mouse, zebrafish and nematode) and techniques. For example, we show that OSCP1, which has previously been implicated in two distinct non-ciliary processes, causes ciliogenic and ciliopathy-associated tissue phenotypes when depleted in zebrafish. The candidate list forms the basis of CiliaCarta, a comprehensive ciliary compendium covering 956 genes. The resource can be used to objectively prioritize candidate genes in whole exome or genome sequencing of ciliopathy patients and can be accessed at http://bioinformatics.bio.uu.nl/john/syscilia/ciliacarta/.",2019-05-16 +33933133,Wellmap: a file format for microplate layouts.,"

Objective

Microplates are ubiquitous in biological research because they make it easy to collect data for hundreds of different conditions in a single experiment. Despite this, there is no standard method to annotate the wealth of data contained in each plate.

Results

We introduce a new file format, called wellmap, for describing the layout of wells on microplates. The format is text-based and emphasizes being easy to read, write, and share. It is capable of describing any layout for any experiment. It is also accompanied by a tool for generating clear visualizations of layout files, and a simple API for parsing layout files in analysis scripts written in python or R. We have used wellmap in our own research to annotate data from a wide variety of experiments, including qPCR and flow cytometry. Given the large number of experiments that make use of microplates, it is our hope that other researchers will find this file format as useful as we have. For complete instructions on how to install and use wellmap, visit: https://wellmap.rtfd.io .",2021-05-01 +32257241,"Coriander Genomics Database: a genomic, transcriptomic, and metabolic database for coriander.","Coriander (Coriandrum sativum L.), also known as cilantro, is a globally important vegetable and spice crop. Its genome and that of carrot are models for studying the evolution of the Apiaceae family. Here, we developed the Coriander Genomics Database (CGDB, http://cgdb.bio2db.com/) to collect, store, and integrate the genomic, transcriptomic, metabolic, functional annotation, and repeat sequence data of coriander and carrot to serve as a central online platform for Apiaceae and other related plants. Using these data sets in the CGDB, we intriguingly found that seven transcription factor (TF) families showed significantly greater numbers of members in the coriander genome than in the carrot genome. The highest ratio of the numbers of MADS TFs between coriander and carrot reached 3.15, followed by those for tubby protein (TUB) and heat shock factors. As a demonstration of CGDB applications, we identified 17 TUB family genes and conducted systematic comparative and evolutionary analyses. RNA-seq data deposited in the CGDB also suggest dose compensation effects of gene expression in coriander. CGDB allows bulk downloading, significance searches, genome browser analyses, and BLAST searches for comparisons between coriander and other plants regarding genomics, gene families, gene collinearity, gene expression, and the metabolome. A detailed user manual and contact information are also available to provide support to the scientific research community and address scientific questions. CGDB will be continuously updated, and new data will be integrated for comparative and functional genomic analysis in Apiaceae and other related plants.",2020-04-01 +34874866,Toward Real-Time Muscle Force Inference and Device Control via Optical-Flow-Tracked Muscle Deformation.,"Despite the utility of musculoskeletal dynamics modeling, there exists no safe, noninvasive method of measuring in vivo muscle output force in real time - limiting both biomechanical insight into dexterous motion and intuitive control of assistive devices. In this paper, we demonstrate that muscle deformation constitutes a promising, yet unexplored signal from which to 1) infer such forces and 2) build novel device control schemes. Through a case study of the elbow joint on a preliminary cohort of 10 subjects, we show that muscle deformation (specifically, thickness change of the brachioradialis, as measured via ultrasound and tracked via optical flow) correlates well with elbow output force to an extent comparable with standard surface electromyography (sEMG) activation during varied isometric elbow contraction. We then show that, given real-time visual feedback, subjects can readily perform a trajectory tracking task using this deformation signal, and that they largely prefer this method to a comparable sEMG-based control scheme and perform the tracking task with similar accuracy. Together, these contributions illustrate muscle deformation's potential utility for both biomechanical study of individual muscle dynamics and device control, in a manner that - thanks to, unlike sEMG, the localized nature of the signal and its tight mechanistic coupling to output force - is readily extensible to multiple muscles and device degrees of freedom. To enable such future extensions, all modeling, tracking, and visualization software described in this paper, as well as all raw and processed data, have been made available on SimTK as part of the Open-Arm project (https://simtk.org/projects/openarm) for general research use.",2021-12-23 +32765174,Checklist of rodents and insectivores of the Crimean Peninsula.,"A dataset comprising 6806 records is presented of 17 (of total 24) rodent and insectivore species from the Crimean Peninsula collected during a 35-year period. All records are stored in the Public Mammal Database (Mammals of Russia; http://rusmam.ru/). The density of occurrence points allows visual evaluation of species distribution, even on large-scale maps. Each record contains the species name, locality description, and geographic coordinates, coordinate accuracy, date and author of the record, data source, and the method of species identification.",2020-07-13 +29036410,SPRINT: an SNP-free toolkit for identifying RNA editing sites.,"

Motivation

RNA editing generates post-transcriptional sequence alterations. Detection of RNA editing sites (RESs) typically requires the filtering of SNVs called from RNA-seq data using an SNP database, an obstacle that is difficult to overcome for most organisms.

Results

Here, we present a novel method named SPRINT that identifies RESs without the need to filter out SNPs. SPRINT also integrates the detection of hyper RESs from remapped reads, and has been fully automated to any RNA-seq data with reference genome sequence available. We have rigorously validated SPRINT's effectiveness in detecting RESs using RNA-seq data of samples in which genes encoding RNA editing enzymes are knock down or over-expressed, and have also demonstrated its superiority over current methods. We have applied SPRINT to investigate RNA editing across tissues and species, and also in the development of mouse embryonic central nervous system. A web resource (http://sprint.tianlab.cn) of RESs identified by SPRINT has been constructed.

Availability and implementation

The software and related data are available at http://sprint.tianlab.cn.

Contact

weidong.tian@fudan.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +33430968,The diatomic molecular spectroscopy database.,"

Motivation

The spectroscopy of diatomic molecules is an important research area in chemical physics due to its relevance in astrochemistry, combustion chemistry, and ultracold physics. However, there is currently no database where the user can easily retrieve, in a useful format, the spectroscopic constants of a given molecule. A similar situation appears concerning the vibrational Franck-Condon factors for diatomic molecules, a crucial parameter to infer laser cooling prospects for molecules. To address this problem, and inspired by the idea that data should be open and freely accessible, we have developed a user-friendly website (https://rios.mp.fhi.mpg.de) where the user can retrieve spectroscopic constants and Franck-Condon factors in useful formats.

Implementation

In this database, the spectroscopic constants of the ground states and first excited states of the diatomic molecules are accessible from the website and can be retrieved in readable formats. The website is implemented within the LAMP web service stacks. In particular, using Linux as the operative system, Apache as the HTTP Server, MySQL as the database management system, and PHP as the programming language for the web. Furthermore, the user can register and upload new data. This project is licensed under the Free-Libre/Open Source Software (FLOSS) license Apache License 2.0 which allows free and open access to the codes as well as efficient collaboration in the maintenance of the software.

Conclusions and impact

The present data-driven website presents essential information in a user-friendly manner and may help the chemical physics community to identify molecules that should be explored through spectroscopic techniques.",2020-05-11 +32390414,Retip: Retention Time Prediction for Compound Annotation in Untargeted Metabolomics.,"Unidentified peaks remain a major problem in untargeted metabolomics by LC-MS/MS. Confidence in peak annotations increases by combining MS/MS matching and retention time. We here show how retention times can be predicted from molecular structures. Two large, publicly available data sets were used for model training in machine learning: the Fiehn hydrophilic interaction liquid chromatography data set (HILIC) of 981 primary metabolites and biogenic amines,and the RIKEN plant specialized metabolome annotation (PlaSMA) database of 852 secondary metabolites that uses reversed-phase liquid chromatography (RPLC). Five different machine learning algorithms have been integrated into the Retip R package: the random forest, Bayesian-regularized neural network, XGBoost, light gradient-boosting machine (LightGBM), and Keras algorithms for building the retention time prediction models. A complete workflow for retention time prediction was developed in R. It can be freely downloaded from the GitHub repository (https://www.retip.app). Keras outperformed other machine learning algorithms in the test set with minimum overfitting, verified by small error differences between training, test, and validation sets. Keras yielded a mean absolute error of 0.78 min for HILIC and 0.57 min for RPLC. Retip is integrated into the mass spectrometry software tools MS-DIAL and MS-FINDER, allowing a complete compound annotation workflow. In a test application on mouse blood plasma samples, we found a 68% reduction in the number of candidate structures when searching all isomers in MS-FINDER compound identification software. Retention time prediction increases the identification rate in liquid chromatography and subsequently leads to an improved biological interpretation of metabolomics data.",2020-05-21 +33941415,Carotid Ultrasound Boundary Study (CUBS): An Open Multicenter Analysis of Computerized Intima-Media Thickness Measurement Systems and Their Clinical Impact.,"Common carotid intima-media thickness (CIMT) is a commonly used marker for atherosclerosis and is often computed in carotid ultrasound images. An analysis of different computerized techniques for CIMT measurement and their clinical impacts on the same patient data set is lacking. Here we compared and assessed five computerized CIMT algorithms against three expert analysts' manual measurements on a data set of 1088 patients from two centers. Inter- and intra-observer variability was assessed, and the computerized CIMT values were compared with those manually obtained. The CIMT measurements were used to assess the correlation with clinical parameters, cardiovascular event prediction through a generalized linear model and the Kaplan-Meier hazard ratio. CIMT measurements obtained with a skilled analyst's segmentation and the computerized segmentation were comparable in statistical analyses, suggesting they can be used interchangeably for CIMT quantification and clinical outcome investigation. To facilitate future studies, the entire data set used is made publicly available for the community at http://dx.doi.org/10.17632/fpv535fss7.1.",2021-04-30 +30951672,exRNA Atlas Analysis Reveals Distinct Extracellular RNA Cargo Types and Their Carriers Present across Human Biofluids.,"To develop a map of cell-cell communication mediated by extracellular RNA (exRNA), the NIH Extracellular RNA Communication Consortium created the exRNA Atlas resource (https://exrna-atlas.org). The Atlas version 4P1 hosts 5,309 exRNA-seq and exRNA qPCR profiles from 19 studies and a suite of analysis and visualization tools. To analyze variation between profiles, we apply computational deconvolution. The analysis leads to a model with six exRNA cargo types (CT1, CT2, CT3A, CT3B, CT3C, CT4), each detectable in multiple biofluids (serum, plasma, CSF, saliva, urine). Five of the cargo types associate with known vesicular and non-vesicular (lipoprotein and ribonucleoprotein) exRNA carriers. To validate utility of this model, we re-analyze an exercise response study by deconvolution to identify physiologically relevant response pathways that were not detected previously. To enable wide application of this model, as part of the exRNA Atlas resource, we provide tools for deconvolution and analysis of user-provided case-control studies.",2019-04-01 +32602538,Biomedical named entity recognition and linking datasets: survey and our recent development.,"Natural language processing (NLP) is widely applied in biological domains to retrieve information from publications. Systems to address numerous applications exist, such as biomedical named entity recognition (BNER), named entity normalization (NEN) and protein-protein interaction extraction (PPIE). High-quality datasets can assist the development of robust and reliable systems; however, due to the endless applications and evolving techniques, the annotations of benchmark datasets may become outdated and inappropriate. In this study, we first review commonlyused BNER datasets and their potential annotation problems such as inconsistency and low portability. Then, we introduce a revised version of the JNLPBA dataset that solves potential problems in the original and use state-of-the-art named entity recognition systems to evaluate its portability to different kinds of biomedical literature, including protein-protein interaction and biology events. Lastly, we introduce an ensembled biomedical entity dataset (EBED) by extending the revised JNLPBA dataset with PubMed Central full-text paragraphs, figure captions and patent abstracts. This EBED is a multi-task dataset that covers annotations including gene, disease and chemical entities. In total, it contains 85000 entity mentions, 25000 entity mentions with database identifiers and 5000 attribute tags. To demonstrate the usage of the EBED, we review the BNER track from the AI CUP Biomedical Paper Analysis challenge. Availability: The revised JNLPBA dataset is available at https://iasl-btm.iis.sinica.edu.tw/BNER/Content/Re vised_JNLPBA.zip. The EBED dataset is available at https://iasl-btm.iis.sinica.edu.tw/BNER/Content/AICUP _EBED_dataset.rar. Contact: Email: thtsai@g.ncu.edu.tw, Tel. 886-3-4227151 ext. 35203, Fax: 886-3-422-2681 Email: hsu@iis.sinica.edu.tw, Tel. 886-2-2788-3799 ext. 2211, Fax: 886-2-2782-4814 Supplementary information: Supplementary data are available at Briefings in Bioinformatics online.",2020-12-01 +,MON-488 Technologies of Diffuse Optics in the Diagnosis of Thyroid Cancer,"Abstract BACKGROUND: The most common tool to test malignancy in the study of thyroid nodules (NT) is ultrasound and fine needle aspiration biopsy (FNAB). However, the sensitivity and specificity of the method and the effectiveness in thyroid cancer are limited; therefore new methods to study thyroid nodules are required. In this way our goal is to introduce hybrid diffuse optical instruments that are capable to measure and discriminate altered microvascular blood flow, blood volume and tissue scattering coefficients of TN. Near-infrared diffuse optical technologies aim to overcome the shortcomings of present techniques while screening for malignant thyroid nodules for early and fast diagnosis of cancer. This idea was based on the previous experience in breast cancers with diffuse optical techniques. METHODS: We have developed a device based on near-infrared diffuse correlation spectroscopy (DCS), which is a technology aimed at retrieving the microvascular flow of a certain region of tissue by mean of low power near-infrared laser light, and used in combination with a commercial ultrasound system (US). In order to combine these devices, we have developed a probe enabling multimodal data acquisition and subsequently we have analyzed the optical properties and the blood flow index in the thyroid lobes of eleven subjects who presented a thyroid nodule. RESULTS: Four subjects have required FNAB: P4 and P7 were reported as being malignant (Bethesda VI and IV respectively) while P6 and P8 were evaluated as being benign (Bethesda II). Surgical removal confirmed papillary thyroid carcinoma in P4, while denied the result of FNAB for P7 (Multinodular thyroid hyperplasia). We have considered the contralateral lobe as intra-subject reference to validate the feasibility of the DCS system in a very absorbing tissue as thyroid is. The difference between the blood flow index of the nodule and the contralateral lobe is maximum for subject P4, while the difference in benign subjects is lower. T-test showed no significant difference between benign nodules and contralateral lobes. Subject P7 showed a small difference as for other benign subjects despite the FNAB results indicating presence of malignancy. CONCLUSION: Apparently diffuse optics technologies would be able to differentiate malignant thyroid nodules from benign thyroid nodules, but more measurements require confirming our preliminary results as that diffuse optical technology can complement the current techniques such as US and FNAB. A new measurement campaign is being scheduled with a completed, fully integrated device that was developed within the LUCA project (http://www.luca-project.eu).",2020-05-08 +32122231,ncRPheno: a comprehensive database platform for identification and validation of disease related noncoding RNAs.,"Noncoding RNAs (ncRNAs) play critical roles in many critical biological processes and have become a novel class of potential targets and bio-markers for disease diagnosis, therapy, and prognosis. Annotating and analysing ncRNA-disease association data are essential but challenging. Current computational resources lack comprehensive database platforms to consistently interpret and prioritize ncRNA-disease association data for biomedical investigation and application. Here, we present the ncRPheno database platform (http://lilab2.sysu.edu.cn/ncrpheno), which comprehensively integrates and annotates ncRNA-disease association data and provides novel searches, visualizations, and utilities for association identification and validation. ncRPheno contains 482,751 non-redundant associations between 14,494 ncRNAs and 3,210 disease phenotypes across 11 species with supporting evidence in the literature. A scoring model was refined to prioritize the associations based on evidential metrics. Moreover, ncRPheno provides user-friendly web interfaces, novel visualizations, and programmatic access to enable easy exploration, analysis, and utilization of the association data. A case study through ncRPheno demonstrated a comprehensive landscape of ncRNAs dysregulation associated with 22 cancers and uncovered 821 cancer-associated common ncRNAs. As a unique database platform, ncRPheno outperforms the existing similar databases in terms of data coverage and utilities, and it will assist studies in encoding ncRNAs associated with phenotypes ranging from genetic disorders to complex diseases.

Abbreviations

APIs: application programming interfaces; circRNA: circular RNA; ECO: Evidence & Conclusion Ontology; EFO: Experimental Factor Ontology; FDR: false discovery rate; GO: Gene Ontology; GWAS: genome wide association studies; HPO: Human Phenotype Ontology; ICGC: International Cancer Genome Consortium; lncRNA: long noncoding RNA; miRNA: micro RNA; ncRNA: noncoding RNA; NGS: next generation sequencing; OMIM: Online Mendelian Inheritance in Man; piRNA: piwi-interacting RNA; snoRNA: small nucleolar RNA; TCGA: The Cancer Genome Atlas.",2020-03-26 +26737757,A survey of remote optical photoplethysmographic imaging methods.,"In recent years researchers have presented a number of new methods for recovering physiological parameters using just low-cost digital cameras and image processing. The ubiquity of digital cameras presents the possibility for many new, low-cost applications of vital sign monitoring. In this paper we present a review of the work on remote photoplethysmographic (PPG) imaging using digital cameras. This review specifically focuses on the state-of-the-art in PPG imaging where: 1) measures beyond pulse rate are evaluated, 2) non-ideal conditions (e.g., the presence of motion artifacts) are explored, and 3) use cases in relevant environments are demonstrated. We discuss gaps within the literature and future challenges for the research community. To aid in the continuing advancement of PPG imaging research, we are making available a website with the references collected for this review as well as information on available code and datasets of interest. It is our hope that this website will become a valuable resource for the PPG imaging community. The site can be found at: http://web.mit.edu/~djmcduff/www/ remote-physiology.html.",2015-01-01 +34882429,Neighborhood Racial and Economic Segregation and Disparities in Violence During the COVID-19 Pandemic.,"Objectives. To describe associations between neighborhood racial and economic segregation and violence during the COVID-19 pandemic. Methods. For 13 US cities, we obtained zip code-level data on 5 violence outcomes from March through July 2018 through 2020. Using negative binomial regressions and marginal contrasts, we estimated differences between quintiles of racial, economic, and racialized economic segregation using the Index of Concentration at the Extremes as a measure of neighborhood privilege (1) in 2020 and (2) relative to 2018 through 2019 (difference-in-differences). Results. In 2020, violence was higher in less-privileged neighborhoods than in the most privileged. For example, if all zip codes were in the least privileged versus most privileged quintile of racialized economic segregation, we estimated 146.2 additional aggravated assaults (95% confidence interval = 112.4, 205.8) per zip code on average across cities. Differences over time in less-privileged zip codes were greater than differences over time in the most privileged for firearm violence, aggravated assault, and homicide. Conclusions. Marginalized communities endure endemically high levels of violence. The events of 2020 exacerbated disparities in several forms of violence. Public Health Implications. To reduce violence and related disparities, immediate and long-term investments in low-income neighborhoods of color are warranted. (Am J Public Health. 2022;112(1):144-153. https://doi.org/10.2105/AJPH.2021.306540).",2021-12-09 +32265943,Test of Arabidopsis Space Transcriptome: A Discovery Environment to Explore Multiple Plant Biology Spaceflight Experiments.,"Recent advances in the routine access to space along with increasing opportunities to perform plant growth experiments on board the International Space Station have led to an ever-increasing body of transcriptomic, proteomic, and epigenomic data from plants experiencing spaceflight. These datasets hold great promise to help understand how plant biology reacts to this unique environment. However, analyses that mine across such expanses of data are often complex to implement, being impeded by the sheer number of potential comparisons that are possible. Complexities in how the output of these multiple parallel analyses can be presented to the researcher in an accessible and intuitive form provides further barriers to such research. Recent developments in computational systems biology have led to rapid advances in interactive data visualization environments designed to perform just such tasks. However, to date none of these tools have been tailored to the analysis of the broad-ranging plant biology spaceflight data. We have therefore developed the Test Of Arabidopsis Space Transcriptome (TOAST) database (https://astrobiology.botany.wisc.edu/astrobotany-toast) to address this gap in our capabilities. TOAST is a relational database that uses the Qlik database management software to link plant biology, spaceflight-related omics datasets, and their associated metadata. This environment helps visualize relationships across multiple levels of experiments in an easy to use gene-centric platform. TOAST draws on data from The US National Aeronautics and Space Administration's (NASA's) GeneLab and other data repositories and also connects results to a suite of web-based analytical tools to facilitate further investigation of responses to spaceflight and related stresses. The TOAST graphical user interface allows for quick comparisons between plant spaceflight experiments using real-time, gene-specific queries, or by using functional gene ontology, Kyoto Encyclopedia of Genes and Genomes pathway, or other filtering systems to explore genetic networks of interest. Testing of the database shows that TOAST confirms patterns of gene expression already highlighted in the literature, such as revealing the modulation of oxidative stress-related responses across multiple plant spaceflight experiments. However, this data exploration environment can also drive new insights into patterns of spaceflight responsive gene expression. For example, TOAST analyses highlight changes to mitochondrial function as likely shared responses in many plant spaceflight experiments.",2020-03-04 +30395287,UniProt: a worldwide hub of protein knowledge.,"The UniProt Knowledgebase is a collection of sequences and annotations for over 120 million proteins across all branches of life. Detailed annotations extracted from the literature by expert curators have been collected for over half a million of these proteins. These annotations are supplemented by annotations provided by rule based automated systems, and those imported from other resources. In this article we describe significant updates that we have made over the last 2 years to the resource. We have greatly expanded the number of Reference Proteomes that we provide and in particular we have focussed on improving the number of viral Reference Proteomes. The UniProt website has been augmented with new data visualizations for the subcellular localization of proteins as well as their structure and interactions. UniProt resources are available under a CC-BY (4.0) license via the web at https://www.uniprot.org/.",2019-01-01 +35047951,ViralFP: A Web Application of Viral Fusion Proteins.,"Viral fusion proteins are attached to the membrane of enveloped viruses (a group that includes Coronaviruses, Dengue, HIV and Influenza) and catalyze fusion between the viral and host membranes, enabling the virus to insert its genetic material into the host cell. Given the importance of these biomolecules, this work presents a centralized database containing the most relevant information on viral fusion proteins, available through a free-to-use web server accessible through the URL https://viralfp.bio.di.uminho.pt/. This web application contains several bioinformatic tools, such as Clustal sequence alignment and Weblogo, including as well a machine learning-based tool capable of predicting the location of fusion peptides (the component of fusion proteins that inserts into the host's cell membrane) within the fusion protein sequence. Given the crucial role of these proteins in viral infection, their importance as natural targets of our immune system and their potential as therapeutic targets, this web application aims to foster our ability to fight pathogenic viruses.",2021-08-23 +29156005,aBiofilm: a resource of anti-biofilm agents and their potential implications in targeting antibiotic drug resistance.,"Biofilms play an important role in the antibiotic drug resistance, which is threatening public health globally. Almost, all microbes mimic multicellular lifestyle to form biofilm by undergoing phenotypic changes to adapt adverse environmental conditions. Many anti-biofilm agents have been experimentally validated to disrupt the biofilms during last three decades. To organize this data, we developed the 'aBiofilm' resource (http://bioinfo.imtech.res.in/manojk/abiofilm/) that harbors a database, a predictor, and the data visualization modules. The database contains biological, chemical, and structural details of 5027 anti-biofilm agents (1720 unique) reported from 1988-2017. These agents target over 140 organisms including Gram-negative, Gram-positive bacteria, and fungus. They are mainly chemicals, peptides, phages, secondary metabolites, antibodies, nanoparticles and extracts. They show the diverse mode of actions by attacking mainly signaling molecules, biofilm matrix, genes, extracellular polymeric substances, and many more. The QSAR based predictor identifies the anti-biofilm potential of an unknown chemical with an accuracy of ∼80.00%. The data visualization section summarized the biofilm stages targeted (Circos plot); interaction maps (Cytoscape) and chemicals diversification (CheS-Mapper) of the agents. This comprehensive platform would help the researchers to understand the multilevel communication in the microbial consortium. It may aid in developing anti-biofilm therapeutics to deal with antibiotic drug resistance menace.",2018-01-01 +28977551,AAgMarker 1.0: a resource of serological autoantigen biomarkers for clinical diagnosis and prognosis of various human diseases.,"Autoantibodies are produced to target an individual's own antigens (e.g. proteins). They can trigger autoimmune responses and inflammation, and thus, cause many types of diseases. Many high-throughput autoantibody profiling projects have been reported for unbiased identification of serological autoantigen-based biomarkers. However, a lack of centralized data portal for these published assays has been a major obstacle to further data mining and cross-evaluate the quality of these datasets generated from different diseases. Here, we introduce a user-friendly database, AAgMarker 1.0, which collects many published raw datasets obtained from serum profiling assays on the proteome microarrays, and provides a toolbox for mining these data. The current version of AAgMarker 1.0 contains 854 serum samples, involving 136 092 proteins. A total of 7803 (4470 non-redundant) candidate autoantigen biomarkers were identified and collected for 12 diseases, such as Alzheimer's disease, Bechet's disease and Parkinson's disease. Seven statistical parameters are introduced to quantitatively assess these biomarkers. Users can retrieve, analyse and compare the datasets through basic search, advanced search and browse. These biomarkers are also downloadable by disease terms. The AAgMarker 1.0 is now freely accessible at http://bioinfo.wilmer.jhu.edu/AAgMarker/. We believe this database will be a valuable resource for the community of both biomedical and clinical research.",2018-01-01 +33609102,CuBlock: a cross-platform normalization method for gene-expression microarrays. ,"Cross-(multi)platform normalization of gene-expression microarray data remains an unresolved issue. Despite the existence of several algorithms, they are either constrained by the need to normalize all samples of all platforms together, compromising scalability and reuse, by adherence to the platforms of a specific provider, or simply by poor performance. In addition, many of the methods presented in the literature have not been specifically tested against multi-platform data and/or other methods applicable in this context. Thus, we set out to develop a normalization algorithm appropriate for gene-expression studies based on multiple, potentially large microarray sets collected along multiple platforms and at different times, applicable in systematic studies aimed at extracting knowledge from the wealth of microarray data available in public repositories; for example, for the extraction of Real-World Data to complement data from Randomized Controlled Trials. Our main focus or criterion for performance was on the capacity of the algorithm to properly separate samples from different biological groups. We present CuBlock, an algorithm addressing this objective, together with a strategy to validate cross-platform normalization methods. To validate the algorithm and benchmark it against existing methods, we used two distinct data sets, one specifically generated for testing and standardization purposes and one from an actual experimental study. Using these data sets, we benchmarked CuBlock against ComBat (Johnson et al., 2007), UPC (Piccolo et al., 2013), YuGene (Lê Cao et al., 2014), DBNorm (Meng et al., 2017), Shambhala (Borisov et al., 2019) and a simple log2 transform as reference. We note that many other popular normalization methods are not applicable in this context. CuBlock was the only algorithm in this group that could always and clearly differentiate the underlying biological groups after mixing the data, from up to six different platforms in this study. CuBlock can be downloaded from https://www.mathworks.com/matlabcentral/fileexchange/77882-cublock. Supplementary data are available at Bioinformatics online.",2021-02-20 +34039282,Inferring and analyzing gene regulatory networks from multi-factorial expression data: a complete and interactive suite.,"

Background

High-throughput transcriptomic datasets are often examined to discover new actors and regulators of a biological response. To this end, graphical interfaces have been developed and allow a broad range of users to conduct standard analyses from RNA-seq data, even with little programming experience. Although existing solutions usually provide adequate procedures for normalization, exploration or differential expression, more advanced features, such as gene clustering or regulatory network inference, often miss or do not reflect current state of the art methodologies.

Results

We developed here a user interface called DIANE (Dashboard for the Inference and Analysis of Networks from Expression data) designed to harness the potential of multi-factorial expression datasets from any organisms through a precise set of methods. DIANE interactive workflow provides normalization, dimensionality reduction, differential expression and ontology enrichment. Gene clustering can be performed and explored via configurable Mixture Models, and Random Forests are used to infer gene regulatory networks. DIANE also includes a novel procedure to assess the statistical significance of regulator-target influence measures based on permutations for Random Forest importance metrics. All along the pipeline, session reports and results can be downloaded to ensure clear and reproducible analyses.

Conclusions

We demonstrate the value and the benefits of DIANE using a recently published data set describing the transcriptional response of Arabidopsis thaliana under the combination of temperature, drought and salinity perturbations. We show that DIANE can intuitively carry out informative exploration and statistical procedures with RNA-Seq data, perform model based gene expression profiles clustering and go further into gene network reconstruction, providing relevant candidate genes or signalling pathways to explore. DIANE is available as a web service ( https://diane.bpmp.inrae.fr ), or can be installed and locally launched as a complete R package.",2021-05-26 +33476183,SuperPlotsOfData-a web app for the transparent display and quantitative comparison of continuous data from different conditions.,"Plots and charts are graphical tools that make data intelligible and digestible by humans. But the oversimplification of data by only plotting the statistical summaries conflicts with the transparent communication of results. Therefore, plotting of all data are generally encouraged and this can be achieved by using a dotplot for discrete conditions. Dotplots, however, often fail to communicate whether the data are from different technical or biological replicates. The superplot has been proposed by Lord and colleagues (Lord et al., 2020) to improve the communication of experimental design and results. To simplify the plotting of data from discrete conditions as a superplot, the SuperPlotsOfData web app was generated. The tool offers easy and open access to state-of-the-art data visualization. In addition, it incorporates recent innovations in data visualization and analysis, including raincloud plots and estimation statistics. The free, open source webtool can be accessed at: https://huygens.science.uva.nl/SuperPlotsOfData/.",2021-01-21 +34875958,A novel self-report scale of interoception: the three-domain interoceptive sensations questionnaire (THISQ).,"

Objectives

The self-reported perception of bodily sensations is assumed predictive for health and disease. Existing questionnaires mostly focus on aversive sensations, and associated emotions and cognitions, which potentially confounds associations between interoception and illness. Therefore, we developed the Three-domain Interoceptive Sensations Questionnaire (THISQ), assessing self-reported perception of neutral respiratory, cardiac, and gastroesophageal sensations.

Design

Using cross-sectional surveys, we developed and validated the THISQ.

Main outcome measures

In Sample 1 (n = 357), a pool of 28 Dutch items was subjected to exploratory factor analysis. Eighteen items with a primary factor loading >.40 were retained for confirmatory factor analysis in Sample 2 (n = 374) and Sample 3 (n = 484) for the validation of the Dutch and English questionnaire, respectively.

Results

Analyses supported the 3-factor solution: cardiorespiratory activation, cardiorespiratory deactivation, and gastroesophageal sensations. Scales showed acceptable to good internal consistency. Convergent validity was confirmed by significant medium associations between THISQ scores and other self-report measures of interoception. Divergent validity was supported by non-significant or small associations with measures of negative affectivity and symptom-related anxiety.

Conclusion

Our findings suggest that the Dutch and English THISQs are valid and reliable self-report measures of interoception, which could advance our understanding of interoceptive processes in health and disease.Supplemental data for this article is available online at https://doi.org/10.1080/08870446.2021.2009479 .",2021-12-07 +34927525,Changes in Exercise Capacity and Health-Related Quality of Life at Four and Eight Weeks of a Pulmonary Rehabilitation Program in People with COPD.,"Pulmonary Rehabilitation (PR) is a key intervention in the management of people with chronic obstructive pulmonary disease (COPD), though few studies have assessed where changes in outcomes occur during a PR program. The aim of this study was to determine the changes in exercise capacity and health-related quality of life at four and eight weeks during a twice-weekly supervised PR program in people with COPD. Fifty participants with COPD were recruited and attended PR twice-weekly for eight weeks. The outcome measures were the endurance shuttle walk test (ESWT), six-minute walk distance (6MWD), St George's Respiratory Questionnaire (SGRQ), COPD Assessment Test (CAT) and the Hospital Anxiety and Depression Scale (HADS) which were measured at baseline, four and eight weeks. Compared to baseline, at week four there were significant improvements in ESWT (mean difference [95%CI] 197 [89 to 305] seconds), 6MWD (22 [8 to 36] metres), SGRQ symptom score (-6 [-12 to -1] points) and SGRQ total score (-4 [-7 to -1] points). Between week four and eight there were further significant improvements in ESWT (94 [8 to 181] seconds) only. By week eight, ESWT, 6MWD, SGRQ symptoms and total score, and CAT had all improved significantly compared to baseline measures. This study demonstrated that participants with moderate to very severe COPD who participated in a twice weekly, eight-week PR program (16 sessions) had significant improvement in ESWT, 6MWD, SGRQ, and CAT score with the greatest improvements occurring in the first four weeks of the program.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.2013793 .",2021-12-19 +30165663,High-resolution analysis of the pneumococcal transcriptome under a wide range of infection-relevant conditions.,"Streptococcus pneumoniae is an opportunistic human pathogen that typically colonizes the nasopharyngeal passage and causes lethal disease in other host niches, such as the lung or the meninges. The expression and regulation of pneumococcal genes at different life-cycle stages, such as commensal or pathogenic, are not entirely understood. To chart the transcriptional responses of S. pneumoniae, we used RNA-seq to quantify the relative abundance of the transcriptome under 22 different infection-relevant conditions. The data demonstrated a high level of dynamic expression and, strikingly, all annotated pneumococcal genomic features were expressed in at least one of the studied conditions. By computing the correlation values of every pair of genes across all studied conditions, we created a co-expression matrix that provides valuable information on both operon structure and regulatory processes. The co-expression data are highly consistent with well-characterized operons and regulons, such as the PyrR, ComE and ComX regulons, and have allowed us to identify a new member of the competence regulon. Lastly, we created an interactive data center named PneumoExpress (https://veeninglab.com/pneumoexpress) that enables users to access the expression data as well as the co-expression matrix in an intuitive and efficient manner, providing a valuable resource to the pneumococcal research community.",2018-11-01 +31959765,High-resolution and bias-corrected CMIP5 projections for climate change impact assessments.,"Projections of climate change are available at coarse scales (70-400 km). But agricultural and species models typically require finer scale climate data to model climate change impacts. Here, we present a global database of future climates developed by applying the delta method -a method for climate model bias correction. We performed a technical evaluation of the bias-correction method using a 'perfect sibling' framework and show that it reduces climate model bias by 50-70%. The data include monthly maximum and minimum temperatures and monthly total precipitation, and a set of bioclimatic indices, and can be used for assessing impacts of climate change on agriculture and biodiversity. The data are publicly available in the World Data Center for Climate (WDCC; cera-www.dkrz.de), as well as in the CCAFS-Climate data portal (http://ccafs-climate.org). The database has been used up to date in more than 350 studies of ecosystem and agricultural impact assessment.",2020-01-20 +31048973,Taxonomic studies of pteridophytes of Ambon and Seram (Moluccas) collected on Indonesian-Japanese botanical expeditions 1983-1986. XIII. Hymenophyllaceae.,"Identifications are given for 713 specimens of Hymenophyllaceae collected on Ambon and Seram islands, the Moluccas, Indonesia, during 1983-86. The collection is composed of forty-seven species and one variety belonging to seven genera. The dataset is deposited in GBIF and available at https://www.gbif.jp/ipt/resource?r=seram_hymen.",2019-04-01 +29743053,PDXliver: a database of liver cancer patient derived xenograft mouse models.,"

Background

Liver cancer is the second leading cause of cancer-related deaths and characterized by heterogeneity and drug resistance. Patient-derived xenograft (PDX) models have been widely used in cancer research because they reproduce the characteristics of original tumors. However, the current studies of liver cancer PDX mice are scattered and the number of available PDX models are too small to represent the heterogeneity of liver cancer patients. To improve this situation and to complement available PDX models related resources, here we constructed a comprehensive database, PDXliver, to integrate and analyze liver cancer PDX models.

Description

Currently, PDXliver contains 116 PDX models from Chinese liver cancer patients, 51 of them were established by the in-house PDX platform and others were curated from the public literatures. These models are annotated with complete information, including clinical characteristics of patients, genome-wide expression profiles, germline variations, somatic mutations and copy number alterations. Analysis of expression subtypes and mutated genes show that PDXliver represents the diversity of human patients. Another feature of PDXliver is storing drug response data of PDX mice, which makes it possible to explore the association between molecular profiles and drug sensitivity. All data can be accessed via the Browse and Search pages. Additionally, two tools are provided to interactively visualize the omics data of selected PDXs or to compare two groups of PDXs.

Conclusion

As far as we known, PDXliver is the first public database of liver cancer PDX models. We hope that this comprehensive resource will accelerate the utility of PDX models and facilitate liver cancer research. The PDXliver database is freely available online at: http://www.picb.ac.cn/PDXliver/.",2018-05-09 +34461704,BCG vaccination impact on mortality and recovery rates in COVID-19: A meta-analysis. ,"COVID-19 is a pandemic caused by SARS-CoV-2 virus which is a very worrisome public health emergency. In this study, we compared the mortality rate and recovery rate in countries with and without BCG vaccination policy. The data of mortality of COVID-19 was extracted from worldometer (https://www.worldometers.info/coronavirus/) on 26th July 2020. The data of countries where BCG vaccination is being done for all individuals is taken from BCG world atlas (http://www.bcgatlas.org/index.php), updated in 2017. BCG vaccination policy recommended countries are intervention group versus countries without BCG vaccination policies which are regarded as control group. Pooled analysis of countries with and without BCG vaccination policy revealed mortality rate of 1.31% (95%CI - 1.31% to 1.32%; I2 = 100%, p<0.01) and 3.25% (95%CI - 3.23% to 3.26%; I2 = 100%, p<0.01), respectively. The recovery rates in two country groups were found to be 72.60% (95%CI - 72.57% to 72.63%) and 55.94% (95%CI - 55.90% to 55.98%), respectively. 52 individuals need to be BCG vaccinated to prevent one death (NNT = 52). In BCG vaccination program countries, there is statistically and clinically significant less mortality (p<0.001) as compared to countries without BCG policy. Our findings corroborate the hypothesis that BCG vaccination may provide protection from COVID-19. High quality evidence from randomised controlled trials are required to establish causality between BCG vaccination and protection from severe COVID-19.",2021-08-09 +31713623,The Year of the Rat: The Rat Genome Database at 20: a multi-species knowledgebase and analysis platform.,"Formed in late 1999, the Rat Genome Database (RGD, https://rgd.mcw.edu) will be 20 in 2020, the Year of the Rat. Because the laboratory rat, Rattus norvegicus, has been used as a model for complex human diseases such as cardiovascular disease, diabetes, cancer, neurological disorders and arthritis, among others, for >150 years, RGD has always been disease-focused and committed to providing data and tools for researchers doing comparative genomics and translational studies. At its inception, before the sequencing of the rat genome, RGD started with only a few data types localized on genetic and radiation hybrid (RH) maps and offered only a few tools for querying and consolidating that data. Since that time, RGD has expanded to include a wealth of structured and standardized genetic, genomic, phenotypic, and disease-related data for eight species, and a suite of innovative tools for querying, analyzing and visualizing this data. This article provides an overview of recent substantial additions and improvements to RGD's data and tools that can assist researchers in finding and utilizing the data they need, whether their goal is to develop new precision models of disease or to more fully explore emerging details within a system or across multiple systems.",2020-01-01 +34174885,Large-scale literature mining to assess the relation between anti-cancer drugs and cancer types.,"

Background

There is a huge body of scientific literature describing the relation between tumor types and anti-cancer drugs. The vast amount of scientific literature makes it impossible for researchers and physicians to extract all relevant information manually.

Methods

In order to cope with the large amount of literature we applied an automated text mining approach to assess the relations between 30 most frequent cancer types and 270 anti-cancer drugs. We applied two different approaches, a classical text mining based on named entity recognition and an AI-based approach employing word embeddings. The consistency of literature mining results was validated with 3 independent methods: first, using data from FDA approvals, second, using experimentally measured IC-50 cell line data and third, using clinical patient survival data.

Results

We demonstrated that the automated text mining was able to successfully assess the relation between cancer types and anti-cancer drugs. All validation methods showed a good correspondence between the results from literature mining and independent confirmatory approaches. The relation between most frequent cancer types and drugs employed for their treatment were visualized in a large heatmap. All results are accessible in an interactive web-based knowledge base using the following link: https://knowledgebase.microdiscovery.de/heatmap .

Conclusions

Our approach is able to assess the relations between compounds and cancer types in an automated manner. Both, cancer types and compounds could be grouped into different clusters. Researchers can use the interactive knowledge base to inspect the presented results and follow their own research questions, for example the identification of novel indication areas for known drugs.",2021-06-26 +33216897,Predicted rat interactome database and gene set linkage analysis. ,"Rattus norvegicus, or the rat, has been widely used as animal models for a diversity of human diseases in the last 150 years. The rat, as a disease model, has the advantage of relatively large body size and highly similar physiology to humans. In drug discovery, rat models are routinely used in drug efficacy and toxicity assessments. To facilitate molecular pharmacology studies in rats, we present the predicted rat interactome database (PRID), which is a database of high-quality predicted functional gene interactions with balanced sensitivity and specificity. PRID integrates functional gene association data from 10 public databases and infers 305 939 putative functional associations, which are expected to include 13.02% of all rat protein interactions, and 52.59% of these function associations may represent protein interactions. This set of functional interactions may not only facilitate hypothesis formulation in molecular mechanism studies, but also serve as a reference interactome for users to perform gene set linkage analysis (GSLA), which is a web-based tool to infer the potential functional impacts of a set of changed genes observed in transcriptomics analyses. In a case study, we show that GSLA based on PRID may provide more precise and informative annotations for investigators to understand the physiological mechanisms underlying a phenotype and lead investigators to testable hypotheses for further studies. Widely used functional annotation tools such as Gene Ontology (GO) analysis, and Database for Annotation, Visualization and Integrated Discovery (DAVID) did not provide similar insights. Database URL: http://rat.biomedtzc.cn.",2020-11-01 +34853669,An Overview of Supervised Machine Learning Methods and Data Analysis for COVID-19 Detection.,"

Methods

Our analysis and machine learning algorithm is based on most cited two clinical datasets from the literature: one from San Raffaele Hospital Milan Italia and the other from Hospital Israelita Albert Einstein São Paulo Brasilia. The datasets were processed to select the best features that most influence the target, and it turned out that almost all of them are blood parameters. EDA (Exploratory Data Analysis) methods were applied to the datasets, and a comparative study of supervised machine learning models was done, after which the support vector machine (SVM) was selected as the one with the best performance.

Results

SVM being the best performant is used as our proposed supervised machine learning algorithm. An accuracy of 99.29%, sensitivity of 92.79%, and specificity of 100% were obtained with the dataset from Kaggle (https://www.kaggle.com/einsteindata4u/covid19) after applying optimization to SVM. The same procedure and work were performed with the dataset taken from San Raffaele Hospital (https://zenodo.org/record/3886927#.YIluB5AzbMV). Once more, the SVM presented the best performance among other machine learning algorithms, and 92.86%, 93.55%, and 90.91% for accuracy, sensitivity, and specificity, respectively, were obtained.

Conclusion

The obtained results, when compared with others from the literature based on these same datasets, are superior, leading us to conclude that our proposed solution is reliable for the COVID-19 diagnosis.",2021-11-22 +34802278,COVID-19 Surveiller: toward a robust and effective pandemic surveillance system basedon social media mining.,"The outbreak of the novel coronavirus, COVID-19, has become one of the most severe pandemics in human history. In this paper, we propose to leverage social media users as social sensors to simultaneously predict the pandemic trends and suggest potential risk factors for public health experts to understand spread situations and recommend proper interventions. More precisely, we develop novel deep learning models to recognize important entities and their relations over time, thereby establishing dynamic heterogeneous graphs to describe the observations of social media users. A dynamic graph neural network model can then forecast the trends (e.g. newly diagnosed cases and death rates) and identify high-risk events from social media. Based on the proposed computational method, we also develop a web-based system for domain experts without any computer science background to easily interact with. We conduct extensive experiments on large-scale datasets of COVID-19 related tweets provided by Twitter, which show that our method can precisely predict the new cases and death rates. We also demonstrate the robustness of our web-based pandemic surveillance system and its ability to retrieve essential knowledge and derive accurate predictions across a variety of circumstances. Our system is also available at http://scaiweb.cs.ucla.edu/covidsurveiller/. This article is part of the theme issue 'Data science approachs to infectious disease surveillance'.",2021-11-22 +29036683,CR2Cancer: a database for chromatin regulators in human cancer.,"Chromatin regulators (CRs) can dynamically modulate chromatin architecture to epigenetically regulate gene expression in response to intrinsic and extrinsic signalling cues. Somatic alterations or misexpression of CRs might reprogram the epigenomic landscape of chromatin, which in turn lead to a wide range of common diseases, notably cancer. Here, we present CR2Cancer, a comprehensive annotation and visualization database for CRs in human cancer constructed by high throughput data analysis and literature mining. We collected and integrated genomic, transcriptomic, proteomic, clinical and functional information for over 400 CRs across multiple cancer types. We also built diverse types of CR-associated relations, including cancer type dependent (CR-target and miRNA-CR) and independent (protein-protein interaction and drug-target) ones. Furthermore, we manually curated around 6000 items of aberrant molecular alterations and interactions of CRs in cancer development from 5007 publications. CR2Cancer provides a user-friendly web interface to conveniently browse, search and download data of interest. We believe that this database would become a valuable resource for cancer epigenetics investigation and potential clinical application. CR2Cancer is freely available at http://cis.hku.hk/CR2Cancer.",2018-01-01 +33865928,Predictors of Treatment Engagement and Outcome Among Adolescents With Attention-Deficit/Hyperactivity Disorder: An Integrative Data Analysis.,"

Objective

To identify patient- and treatment-level factors that predict intervention engagement and outcome for adolescents with attention-deficit/hyperactivity disorder (ADHD), guiding efforts to enhance care.

Method

Integrative data analysis was used to pool data from 4 randomized controlled trials of adolescent ADHD treatment with participants (N = 854) receiving various evidence-based behavioral therapy packages in 5 treatment arms (standard [STANDARD], comprehensive [COMP], engagement-focused [ENGAGE]), community-based usual care (UC), or no treatment (NOTX). Participants also displayed varying medication use patterns (negligible, inconsistent, consistent) during the trial. Regression and latent growth curve analyses examined treatment- and patient-level predictors of engagement and outcome.

Results

Compared with COMP, ENGAGE was associated with higher parent engagement in behavioral therapy (d = 1.35-1.73) when delivered in university, but not community, clinics. Under some conditions, ENGAGE also predicted youth engagement in behavioral therapy (d = 1.21) and lower likelihood of negligible medication use (odds ratio = 0.49 compared with NOTX). UC was associated with poorer parent engagement compared with COMP (d = -0.59) and negligible medication use (odds ratio = 2.29) compared with NOTX. Compared with COMP, ENGAGE (in university settings) was consistently associated with larger ADHD symptom improvements (d = 0.41-0.83) at 6-month follow-up and sometimes associated with larger grade point average (d = 0.68) and parent-teen conflict (d = 0.41) improvements. Consistent medication use during behavioral therapy was associated with larger improvements in ADHD symptoms (d = 0.28) and parent-teen conflict (d = 0.25-0.36). An ADHD+internalizing clinical profile predicted larger improvements in grade point average (d = 0.45). Family adversity predicted poorer parent and youth engagement (rate ratio = 0.90-0.95), negligible medication use (odds ratio = 1.22), and smaller improvements in grade point average (d = -0.23). African American race predicted smaller improvements in parent-teen conflict (d = -0.49).

Conclusion

Engagement-focused behavioral therapy and consistent medication use most frequently predicted stronger clinical engagement and outcomes for adolescents with ADHD. Youths who are African American or who experience family adversity may demonstrate treatment-related disparities for certain outcomes; youths with ADHD+internalizing symptoms may demonstrate excellent academic outcomes following behavioral therapy.

Data sharing

The full ADHD TIDAL dataset is publicly available through the National Data Archive (https://nda.nih.gov), including a data dictionary. The study protocol is also publicly available: https://doi.org/10.1186/s12888-020-02734-6.",2021-06-05 +34278900,Body Mass Index Alters the Predictive Value of the Neutrophil-to-Lymphocyte Ratio and Systemic Inflammation Response Index in Laryngeal Squamous Cell Carcinoma Patients.,"Laryngeal squamous cell carcinoma (LSCC) is a frequent cancer subtype among head and neck cancers. Exacerbated inflammation and nutritional deficit are common features in this type of cancer and can be used as a prognostic marker. This study aimed to investigate the relationship between body mass index (BMI), neutrophil-to-lymphocyte ratio (NLR), and systemic inflammation response index (SIRI) on overall survival (OS) of LSCC patients. In this retrospective cohort study, 168 patients were followed for 5 years. Data on clinical factors, patients' life habits, height, weight, and hematological parameters were collected. BMI, NLR, and SIRI were calculated. Pretreatment NLR≥ 2.02 and SIRI≥ 1160.85 were independent prognostic factors for poor OS. Low BMI did not significantly affect the OS. However, the inflammatory parameters had their predictive capacity altered when stratified by the BMI classification. NLR≥ 2.02 + Low BMI or SIRI≥ 1160.85 + Low BMI increased in 8.6 and 3.8 times the risk of death, respectively. In contrast, stratification by normal/high BMI classification eliminated the predictive capacity of NLR and SIRI. Here, we demonstrated the possible ability of BMI to change the prognostic capacity of inflammatory markers NLR and SIRI in patients with LSCC.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1952447.",2021-07-19 +34713086,Markup: A Web-Based Annotation Tool Powered by Active Learning.,"Across various domains, such as health and social care, law, news, and social media, there are increasing quantities of unstructured texts being produced. These potential data sources often contain rich information that could be used for domain-specific and research purposes. However, the unstructured nature of free-text data poses a significant challenge for its utilisation due to the necessity of substantial manual intervention from domain-experts to label embedded information. Annotation tools can assist with this process by providing functionality that enables the accurate capture and transformation of unstructured texts into structured annotations, which can be used individually, or as part of larger Natural Language Processing (NLP) pipelines. We present Markup (https://www.getmarkup.com/) an open-source, web-based annotation tool that is undergoing continued development for use across all domains. Markup incorporates NLP and Active Learning (AL) technologies to enable rapid and accurate annotation using custom user configurations, predictive annotation suggestions, and automated mapping suggestions to both domain-specific ontologies, such as the Unified Medical Language System (UMLS), and custom, user-defined ontologies. We demonstrate a real-world use case of how Markup has been used in a healthcare setting to annotate structured information from unstructured clinic letters, where captured annotations were used to build and test NLP applications.",2021-07-26 +34282681,Citrus Consumption and Risk of Non-Melanoma Skin Cancer in the UK Biobank.,"Background: Non-melanoma skin cancer (NMSC) incidence has been dramatically increasing worldwide. Psoralen, a known photocarcinogen, is naturally abundant in citrus products, leading to the hypothesis that high citrus consumption may increase NMSC risk.Methods: We fitted age- and multivariable-adjusted logistic regression models to evaluate the association between citrus consumption and NMSC risk among 197,372 UKBB participants. A total of 9,613 NMSC cases were identified using International Classification of Disease 10 codes. Citrus consumption data were collected via five rounds of 24-hour recall questionnaires.Results: We found no association between high total citrus consumption and NMSC risk, although a slightly elevated NMSC risk was observed among participants who consumed >0 to half a serving of total citrus per day (OR [95% CI] = 1.08 [1.01-1.16]). There was no association between individual citrus products and NMSC risk.Conclusion: High citrus consumption was not associated with an increased risk of NMSC in our UKBB sample. Further studies are needed to clarify these associations.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1952439 .",2021-07-20 +,"3402 A High-Impact, Structured, Collaborative Approach to Implementing and Utilizing the Research Performance Progress Report (RPPR) for a Clinical and Translational Science Award","OBJECTIVES/SPECIFIC AIMS: This presentation will highlight a structured, collaborative approach to implementing and utilizing the RPPR process created at the University of Minnesota CTSI in response to the need to enhance the quality, efficiency, consistency, and utilization of annual program reporting. The approach is in line with the NCATS’s strategic objective that encourages all CTS organizations to “disseminate research results and best practices broadly, and promote a culture of openness, sharing and transparency” (NCATS, 2016, p. 19). Program activities that support translational processes and contribute to clinical outcomes are complex, nonlinear, and multidisciplinary (Smith etal., 2017). In this complex context, the meaningful engagement and reflection of program staff and collaborators is essential for all aspects of program planning, implementation, reporting, and dissemination. The University of Minnesota CTSI’s key objectives, goals, and uses of RPPR are as follows: - Develop, align, and leverage the RPPR to fulfill the accountability requirements, needs, and expectations of multiple stakeholders: NIH/NCATS, Internal Advisory Board and External Advisory Board, campus/hub, program staff and collaborators. - Engage the CTSA staff and collaborators as a team in multiple aspects of program reporting. - Inform strategic management, continuous improvement, monitoring and evaluation, organizational learning and dissemination to program stakeholders. - Translate the reported information into practical, evidence-based issues and strategic questions for the leadership discussions and advisory board consultations, actionable work plans, communication to stakeholders, organizational learning, and translational science knowledge base. METHODS/STUDY POPULATION: A case study of the programmatic/evaluative and methodological approach/technique development that resulted in a formal, structured, collaborative, transparent process with detailed guidelines, templates, and timelines. The process and content for reporting has been developed via a variety of methods and sources: specific funder (NIH) requirements, Huddle meetings, document/content/database analysis, reflection meetings with component staff, informal conversations, and observations. Preparation for the report began almost one year in advance, including careful analysis of the report requirements, developing user-friendly, detailed guidelines, templates, and examples. The guide templates and worksheets were created as a result of time spent navigating current instructions provided by NIH and NCATS. Timeline/project plan was developed with start and end dates for all of the moving parts along with identified responsible personnel for each of the tasks. A grid of the grant components and responsible personnel was designed to highlight the matrixed organization of the grant and the need to work across components to create single reports. The RPPR key categories have also been considered for incorporating and tracking in a program activity/customer tracking system for ongoing data management and use. As a complex translational science program, UMN CTSI has multiple initiatives, variables, and metrics to report. The program staff has been deeply engaged in the evaluative reflection to identify, prioritize, and incorporate into the RPPR the metrics that most useful to manage and describe CTSI processes, participation, products, and outcomes. Program components responded differently to the collaborative approach implemented. The M&E technical assistance was implemented in 3 different ways: components either did the M&E RPPR template themselves, with minimal M&E team assistance; responded to comments and information provided by the M&E team as a first step; or requested a significant level of assistance from M&E. Participants/partners in developing and using RPPR include CTSI program leadership and staff, administration, communication staff, M&E team, and our collaborators. RESULTS/ANTICIPATED RESULTS: The proposed comprehensive approach to the annual program performance reporting shows sound promise to enhance program staff engagement, report utilization, learning, strategic management, self-evaluation capacity, and continuous improvement within a clinical and translational science organization. DISCUSSION/SIGNIFICANCE OF IMPACT: This structured approach’s impact is significant in that it fills the current gap in the practice, literature, and methodology and offers a practical example of a “practice that works” for CTR (and other) organizations and programs striving to improve their reporting practices, staff engagement, learning, and program impact. Leveraging and synergizing the RPPR requirements and other complex, data-demanding obligations and needs can help the CTS programs move beyond the once-a-year compilation of project accomplishments and challenges to developing and sharing a thoughtful translational science program success story. References: National Center for Advancing Translational Sciences. (2016). NCATS Strategic Plan. NIH. Available at: https://ncats.nih.gov/strategicplan Smith, C., Baveja, R., Grieb, T., & Mashour, G. (2017). Toward a science of translational science. Journal of Clinical and Translational Science, 1(4), 253-255. doi: 10.1017/cts.2017.14",2019-03-01 +32924890,Macrostructural Analyses of Cinderella Narratives in a Large Nonclinical Sample.,"Purpose Macrostructural narrative analyses are important clinical measures, revealing age-related declines and disorder-related impairments in the accuracy, completeness, logical sequencing, and organization of content. The current study aims to provide preliminary data on typical aging and psychometric evidence supporting multilevel Main Concept, Sequencing, and Story Grammar (MSSG) analyses that capture these aspects of narratives. Method Transcripts of Cinderella narratives for 92 healthy control participants stratified across four age brackets from the online database AphasiaBank were coded by Richardson and Dalton (2016) for main concept (MC) analysis. In the current study, MSSG analyses were completed for (a) logical sequencing, independently and in combination with MC accuracy and completeness (MC + sequencing), and (b) story grammar organization (i.e., inclusion of episodic components and complexity of episodes). Interrater agreement (99%-100%) revealed highly reliable scoring. Results Descriptive statistics for the typically aging sample are presented for sequencing, MC + sequencing, total episodic components, and episodic complexity. Scores for participants over 60 years of age were lower (poorer) than scores for those 20-59 years of age, supporting the construct validity of score use for identifying age-related declines in performance. Conclusions This study's novel MSSG analyses of narrative production efficiently assess the logical sequencing and story grammar organization of content in healthy controls. Preliminary reliability and validity evidence support the use of all scores to measure age-related changes in narrative macrostructure. Data from this typically aging sample provide a foundation for future research and clinical assessment aimed at quantifying narrative deficits in adults with communication disorders. Supplemental Material https://doi.org/10.23641/asha.12683495.",2020-07-28 +30632786,A Chemical Category-Based Prioritization Approach for Selecting 75 Per- and Polyfluoroalkyl Substances (PFAS) for Tiered Toxicity and Toxicokinetic Testing.,"Per- and polyfluoroalkyl substances (PFASs) are a group of fluorinated substances of interest to researchers, regulators, and the public due to their widespread presence in the environment. A few PFASs have comparatively extensive amounts of human epidemiological, exposure, and experimental animal toxicity data (e.g., perfluorooctanoic acid), whereas little toxicity and exposure information exists for much of the broader set of PFASs. Given that traditional approaches to generate toxicity information are resource intensive, new approach methods, including in vitro high-throughput toxicity (HTT) testing, are being employed to inform PFAS hazard characterization and further (in vivo) testing. The U.S. Environmental Protection Agency (EPA) and the National Toxicology Program (NTP) are collaborating to develop a risk-based approach for conducting PFAS toxicity testing to facilitate PFAS human health assessments. This article describes the construction of a PFAS screening library and the process by which a targeted subset of 75 PFASs were selected. Multiple factors were considered, including interest to the U.S. EPA, compounds within targeted categories, structural diversity, exposure considerations, procurability and testability, and availability of existing toxicity data. Generating targeted HTT data for PFASs represents a new frontier for informing priority setting. https://doi.org/10.1289/EHP4555.",2019-01-01 +31525460,Transfer of regulatory knowledge from human to mouse for functional genomics analysis.,"Transcriptome profiling followed by differential gene expression analysis often leads to lists of genes that are hard to analyze and interpret. Functional genomics tools are powerful approaches for downstream analysis, as they summarize the large and noisy gene expression space into a smaller number of biological meaningful features. In particular, methods that estimate the activity of processes by mapping transcripts level to process members are popular. However, footprints of either a pathway or transcription factor (TF) on gene expression show superior performance over mapping-based gene sets. These footprints are largely developed for humans and their usability in the broadly-used model organism Mus musculus is uncertain. Evolutionary conservation of the gene regulatory system suggests that footprints of human pathways and TFs can functionally characterize mice data. In this paper we analyze this hypothesis. We perform a comprehensive benchmark study exploiting two state-of-the-art footprint methods, DoRothEA and an extended version of PROGENy. These methods infer TF and pathway activity, respectively. Our results show that both can recover mouse perturbations, confirming our hypothesis that footprints are conserved between mice and humans. Subsequently, we illustrate the usability of PROGENy and DoRothEA by recovering pathway/TF-disease associations from newly generated disease sets. Additionally, we provide pathway and TF activity scores for a large collection of human and mouse perturbation and disease experiments (2374). We believe that this resource, available for interactive exploration and download (https://saezlab.shinyapps.io/footprint_scores/), can have broad applications including the study of diseases and therapeutics.",2019-09-13 +32282885,CScape-somatic: distinguishing driver and passenger point mutations in the cancer genome.,"

Motivation

Next-generation sequencing technologies have accelerated the discovery of single nucleotide variants in the human genome, stimulating the development of predictors for classifying which of these variants are likely functional in disease, and which neutral. Recently, we proposed CScape, a method for discriminating between cancer driver mutations and presumed benign variants. For the neutral class, this method relied on benign germline variants found in the 1000 Genomes Project database. Discrimination could, therefore, be influenced by the distinction of germline versus somatic, rather than neutral versus disease driver. This motivates this article in which we consider predictive discrimination between recurrent and rare somatic single point mutations based solely on using cancer data, and the distinction between these two somatic classes and germline single point mutations.

Results

For somatic point mutations in coding and non-coding regions of the genome, we propose CScape-somatic, an integrative classifier for predictively discriminating between recurrent and rare variants in the human cancer genome. In this study, we use purely cancer genome data and investigate the distinction between minimal occurrence and significantly recurrent somatic single point mutations in the human cancer genome. We show that this type of predictive distinction can give novel insight, and may deliver more meaningful prediction in both coding and non-coding regions of the cancer genome. Tested on somatic mutations, CScape-somatic outperforms alternative methods, reaching 74% balanced accuracy in coding regions and 69% in non-coding regions, whereas even higher accuracy may be achieved using thresholds to isolate high-confidence predictions.

Availability and implementation

Predictions and software are available at http://CScape-somatic.biocompute.org.uk/.

Contact

mark.f.rogers.phd@gmail.com or C.Campbell@bristol.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +32862462,Expansin Engineering Database: A navigation and classification tool for expansins and homologues.,"Expansins have the remarkable ability to loosen plant cell walls and cellulose material without showing catalytic activity and therefore have potential applications in biomass degradation. To support the study of sequence-structure-function relationships and the search for novel expansins, the Expansin Engineering Database (ExED, https://exed.biocatnet.de) collected sequence and structure data on expansins from Bacteria, Fungi, and Viridiplantae, and expansin-like homologues such as carbohydrate binding modules, glycoside hydrolases, loosenins, swollenins, cerato-platanins, and EXPNs. Based on global sequence alignment and protein sequence network analysis, the sequences are highly diverse. However, many similarities were found between the expansin domains. Newly created profile hidden Markov models of the two expansin domains enable standard numbering schemes, comprehensive conservation analyses, and genome annotation. Conserved key amino acids in the expansin domains were identified, a refined classification of expansins and carbohydrate binding modules was proposed, and new sequence motifs facilitate the search of novel candidate genes and the engineering of expansins.",2020-09-09 +29069476,"EBI Metagenomics in 2017: enriching the analysis of microbial communities, from sequence reads to assemblies.","EBI metagenomics (http://www.ebi.ac.uk/metagenomics) provides a free to use platform for the analysis and archiving of sequence data derived from the microbial populations found in a particular environment. Over the past two years, EBI metagenomics has increased the number of datasets analysed 10-fold. In addition to increased throughput, the underlying analysis pipeline has been overhauled to include both new or updated tools and reference databases. Of particular note is a new workflow for taxonomic assignments that has been extended to include assignments based on both the large and small subunit RNA marker genes and to encompass all cellular micro-organisms. We also describe the addition of metagenomic assembly as a new analysis service. Our pilot studies have produced over 2400 assemblies from datasets in the public domain. From these assemblies, we have produced a searchable, non-redundant protein database of over 50 million sequences. To provide improved access to the data stored within the resource, we have developed a programmatic interface that provides access to the analysis results and associated sample metadata. Finally, we have integrated the results of a series of statistical analyses that provide estimations of diversity and sample comparisons.",2018-01-01 +31939737,"A genetic, genomic, and computational resource for exploring neural circuit function. ","The anatomy of many neural circuits is being characterized with increasing resolution, but their molecular properties remain mostly unknown. Here, we characterize gene expression patterns in distinct neural cell types of the Drosophila visual system using genetic lines to access individual cell types, the TAPIN-seq method to measure their transcriptomes, and a probabilistic method to interpret these measurements. We used these tools to build a resource of high-resolution transcriptomes for 100 driver lines covering 67 cell types, available at http://www.opticlobe.com. Combining these transcriptomes with recently reported connectomes helps characterize how information is transmitted and processed across a range of scales, from individual synapses to circuit pathways. We describe examples that include identifying neurotransmitters, including cases of apparent co-release, generating functional hypotheses based on receptor expression, as well as identifying strong commonalities between different cell types.",2020-01-15 +34915781,"With age comes well-being: older age associated with lower stress, negative affect, and depression throughout the COVID-19 pandemic.","Objectives: Despite initial concerns about older adult's emotional well-being during the COVID-19 pandemic, reports from the first months of the pandemic suggested that older adults were faring better than younger adults, reporting lower stress, negative affect, depression, and anxiety. In this study, we examined whether this pattern would persist as the pandemic progressed.Method: A convenience sample of 1,171 community-dwelling adults in the United States, ages 18-90, filled out surveys on various metrics of emotional well-being starting in March 2020 and at various time points through April 2021. We created time bins to account for the occurrence of significant national events, allowing us to determine how age would relate to affective outcomes when additional national-level emotional events were overlaid upon the stress of the pandemic.Results: Older age was associated with lower stress, negative affect, and depressive symptomatology, and with higher positive affect, and this effect was consistent across time points measured from March, 2020 through April, 2021. Age was less associated with measures of worry and social isolation, but older adults were more worried about their personal health throughout the pandemic.Conclusion: These results are consistent with literature suggesting that older age is associated with increased resilience in the face of stressful life experiences and show that this pattern may extend to resilience in the face of a prolonged real-world stressor.Supplemental data for this article can be accessed online at https://doi.org/10.1080/13607863.2021.2010183 .",2021-12-16 +31844327,High-throughput phenotyping reveals expansive genetic and structural underpinnings of immune variation.,"By developing a high-density murine immunophenotyping platform compatible with high-throughput genetic screening, we have established profound contributions of genetics and structure to immune variation (http://www.immunophenotype.org). Specifically, high-throughput phenotyping of 530 unique mouse gene knockouts identified 140 monogenic 'hits', of which most had no previous immunologic association. Furthermore, hits were collectively enriched in genes for which humans show poor tolerance to loss of function. The immunophenotyping platform also exposed dense correlation networks linking immune parameters with each other and with specific physiologic traits. Such linkages limit freedom of movement for individual immune parameters, thereby imposing genetically regulated 'immunologic structures', the integrity of which was associated with immunocompetence. Hence, we provide an expanded genetic resource and structural perspective for understanding and monitoring immune variation in health and disease.",2019-12-16 +30950816,[Xenobiotic toxicity prediction combined with xenobiotic metabolism prediction in the human body].,"The majority of xenobiotics undergo a number of chemical reactions known as biotransformation in human body. The biological activity, toxicity, and other properties of the metabolites may significantly differ from those of the parent compound. Not only xenobiotic itself and its final metabolites produced in large quantities, but the intermediate and final metabolites that are formed in trace quantities, can cause undesirable effects. We have developed a freely available web resource MetaTox (http://www.way2drug.com/mg/) for integral assessment of xenobiotics toxicity taking into account their metabolism in the humans. The generation of the metabolite structures is based on the reaction fragments. The estimates of the probability of the reaction of a certain class and the probability of site of biotransformation are used at the generation of the xenobiotic metabolism pathways. The web resource MetaTox allows researchers to assess the metabolism of compounds in the humans and to obtain assessment of their acute, chronic toxicity, and adverse effects.",2019-02-01 +33329218,"Taking Construction Grammar One Step Further: Families, Clusters, and Networks of Evaluative Constructions in Russian.","We present a case study of grammatical constructions and how their function in a single language (Russian) can be captured through semantic and syntactic classification. Since 2016 an on-going joint project of UiT The Arctic University of Norway and the National Research University Higher School of Economics in Moscow has been collecting and analyzing multiword grammatical constructions of Russian. The main product is the Russian Constructicon (https://site.uit.no/russian-constructicon/), which, with over two thousand two hundred constructions (and more being continuously added), is arguably the largest openly available constructicon resource for any language. The combination of this large size with depth of analysis, containing both syntactic and semantic tags, makes it possible to view the interrelation of constructions as families and to discover trends in their behavior. Our annotation includes 53 semantic tags of varying frequency, with three tags that are by far more frequent than all the rest, accounting for 30% of the entire inventory of the Russian Constructicon. These three semantic types are Assessment, Attitude, and Intensity, all of which convey a speaker's evaluation of a topic, in contrast to most of the other tags (such as Time, Manner, and Comparison). Assessment and Attitude constructions are investigated in greater detail in this article. Secondary semantic tags reveal that negative evaluation among these two semantic types is more than twice as frequent as positive evaluation. Examples of negative evaluations are: for Assessment VP tak sebe, as in Na pianino ja igraju tak sebe ""I play the piano so-so [lit. thus self]""; for Attitude s PronPers-Gen xvatit/xvatilo (NP-Gen), as in S menja xvatit ""I'm fed up [lit. from me enough]."" In terms of syntax, the most frequent syntactic types of constructions in the Russian Constructicon are clausal constructions [constituting an independent clause like s PronPers-Gen xvatit/xvatilo (NP-Gen)] and constructions with the anchor in the role of adverbial modifier (like VP tak sebe). Our semantic and syntactic classification of this large body of Russian constructions makes it possible to postulate patterns of grammatical constructions constituting a radial category with central and peripheral types. Classification of large numbers of constructions reveals systematic relations that structure the grammar of a language.",2020-11-20 +34620367,Changes in energy drink consumption during the COVID-19 quarantine.,"The present letter to editor comments the manuscript ""Caliskan SG, Kilic MA, Bilgin MD. Acute effects of energy drink on hemodynamic and electrophysiologic parameters in habitual and non-habitual caffeine consumers. Clin Nutr ESPEN. 2021 Apr; 42:333-338. https://doi.org/10.1016/j.clnesp.2021.01.011. Epub 2021 Feb 4. PMID: 33745602."" presenting some data on consumption of energy drinks among medical students during COVID-19 pandemic.",2021-07-26 +34405389,Database of word-level statistics for Mandarin Chinese (DoWLS-MAN).,"In this article we present the Database of Word-Level Statistics for Mandarin Chinese (DoWLS-MAN). The database addresses the lack of agreement in phonological syllable segmentation specific to Mandarin by offering phonological features for each lexical item according to 16 schematic representations of the syllable (8 with tone and 8 without tone). Those lexical statistics that differ per phonological word and nonword due to changes in syllable segmentation are of the variant category and include subtitle lexical frequency, phonological neighborhood density measures, homophone density, and network science measures. The invariant characteristics consist of each items' lexical tone, phonological transcription, and syllable structure among others. The goal of DoWLS-MAN is to provide researchers both the ability to choose stimuli that are derived from a segmentation schema that supports an existing model of Mandarin speech processing, and the ability to choose stimuli that allow for the testing of hypotheses on phonological segmentation according to multiple schemas. In an exploratory analysis we illustrate how multiple schematic representations of the phonological mental lexicon can aid in hypothesis generation, specifically in terms of phonological processing when reading Chinese orthography. Users of the database can search among over 92,000 words, over 1600 out-of-vocabulary Chinese characters, and 4300 phonological nonwords according to either Chinese orthography, pinyin, or ASCII phonetic script. Users can also generate a list of phonological words and nonwords according to user-defined ranges and categories of lexical characteristics. DoWLS-MAN is available to the public for search or download at https://dowls.site .",2021-08-17 +34386295,Federated Galaxy: Biomedical Computing at the Frontier. ,"Biomedical data exploration requires integrative analyses of large datasets using a diverse ecosystem of tools. For more than a decade, the Galaxy project (https://galaxyproject.org) has provided researchers with a web-based, user-friendly, scalable data analysis framework complemented by a rich ecosystem of tools (https://usegalaxy.org/toolshed) used to perform genomic, proteomic, metabolomic, and imaging experiments. Galaxy can be deployed on the cloud (https://launch.usegalaxy.org), institutional computing clusters, and personal computers, or readily used on a number of public servers (e.g., https://usegalaxy.org). In this paper, we present our plan and progress towards creating Galaxy-as-a-Service-a federation of distributed data and computing resources into a panoptic analysis platform. Users can leverage a pool of public and institutional resources, in addition to plugging-in their private resources, helping answer the challenge of resource divergence across various Galaxy instances and enabling seamless analysis of biomedical data.",2018-07-01 +34712617,Cancer CRC: A Comprehensive Cancer Core Transcriptional Regulatory Circuit Resource and Analysis Platform.,"A core transcriptional regulatory circuit (CRC) is a group of interconnected auto-regulating transcription factors (TFs) that form loops and can be identified by super-enhancers (SEs). Studies have indicated that CRCs play an important role in defining cellular identity and determining cellular fate. Additionally, core TFs in CRCs are regulators of cell-type-specific transcriptional regulation. However, a global view of CRC properties across various cancer types has not been generated. Thus, we integrated paired cancer ATAC-seq and H3K27ac ChIP-seq data for specific cell lines to develop the Cancer CRC (http://bio.liclab.net/Cancer_crc/index.html). This platform documented 94,108 cancer CRCs, including 325 core TFs. The cancer CRC also provided the ""SE active core TFs analysis"" and ""TF enrichment analysis"" tools to identify potentially key TFs in cancer. In addition, we performed a comprehensive analysis of core TFs in various cancer types to reveal conserved and cancer-specific TFs.",2021-10-12 +32995658,Transcriptomic analysis of the signature of neurogenesis in human hippocampus suggests restricted progenitor cell progression post-childhood.,"

Purpose

Immunohistological investigations have given rise to divergent perspectives about adult hippocampal neurogenesis in humans. Therefore, this study aimed to examine whether a comprehensive transcriptomic analysis of signature markers of neurogenesis, supplemented with markers of gliogenesis, vasculogenesis, cell proliferation, and apoptosis, may help discern essential aspects of adult hippocampal neurogenesis in humans.

Materials and methods

RNA expression data for salient marker genes of neurogenesis, gliogenesis, vasculogenesis, and apoptosis in post-mortem human hippocampal tissue [from prenatal (n = 15), child (n = 5), adolescent (n = 4), and adult (n = 6) brains] were downloaded from the Allen Human Brain Atlas database (http://www.brainspan.org/rnaseq/search/index.html). Gene expression data was categorized, median values were computed, and age group-specific differential expression was subjected to statistical analysis (significance level, α = 0.01).

Results

With the exception of the genes encoding GFAP, BLBP, SOX2, and PSA-NCAM (unchanged), and the post-mitotic late maturation markers CALB1, CALB2, MAP2, and NEUN as well as the pan-neuronal marker PROX1 which were persistently expressed throughout, expression of all other genes associated with neurogenesis was steeply and progressively downregulated between perinatal life and adulthood. Interestingly, expression of the classical proliferation marker KI67 and a progenitor cell marker TBR2 were found to have reached baseline expression levels (zero expression score) at adolescence while the expression of immature neuronal, post-mitotic early and late maturation markers remained at a constant level after childhood. In contrast, markers of gliogenesis (other than PDGFRA and Vimentin) were significantly upregulated between prenatal life and childhood. Expression of the vasculogenesis markers VEGFA and FGF2 did not differ across any of the age groups studied, whereas the expression of apoptotic markers was progressively decreased after prenatal life.

Conclusions

Our findings indicate that the progression of neurogenesis from progenitor cells is highly restricted in the human brain from childhood onwards. An alternative possibility that limited neurogenesis may be continued in adolescents and adults from a developmentally arrested pool of immature neurons needs to be examined further through experimental studies.",2020-09-11 +32330240,Targeted domain assembly for fast functional profiling of metagenomic datasets with S3A.,"

Motivation

The understanding of the ever-increasing number of metagenomic sequences accumulating in our databases demands for approaches that rapidly 'explore' the content of multiple and/or large metagenomic datasets with respect to specific domain targets, avoiding full domain annotation and full assembly.

Results

S3A is a fast and accurate domain-targeted assembler designed for a rapid functional profiling. It is based on a novel construction and a fast traversal of the Overlap-Layout-Consensus graph, designed to reconstruct coding regions from domain annotated metagenomic sequence reads. S3A relies on high-quality domain annotation to efficiently assemble metagenomic sequences and on the design of a new confidence measure for a fast evaluation of overlapping reads. Its implementation is highly generic and can be applied to any arbitrary type of annotation. On simulated data, S3A achieves a level of accuracy similar to that of classical metagenomics assembly tools while permitting to conduct a faster and sensitive profiling on domains of interest. When studying a few dozens of functional domains-a typical scenario-S3A is up to an order of magnitude faster than general purpose metagenomic assemblers, thus enabling the analysis of a larger number of datasets in the same amount of time. S3A opens new avenues to the fast exploration of the rapidly increasing number of metagenomic datasets displaying an ever-increasing size.

Availability and implementation

S3A is available at http://www.lcqb.upmc.fr/S3A_ASSEMBLER/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-07-01 +35492053,VentMon: An open source inline ventilator tester and monitor.,"Humanitarian engineers responded to the pandemic ventilator shortage of March, 2020 by beginning over 100 open source ventilator projects [Robert L. Read et al. COVID-19 Vent List. Oct. 2020. url: https://docs.google.com/spreadsheets/d/1inYw5H4RiL0AC_J9vPWzJxXCdlkMLPBRdPgEVKF8DZw/edit#gid=0, Joshua M. Pearce. A review of open source ventilators for COVID-19 and future pandemics. In: F1000Research 9 (2020).]. By ventilator, we mean both an invasive ventilator (requiring intubation of the patient) and non-invasive ventilator (generally supporting spontaneously breathing). Inexpensive ventilator test equipment can facilitate projects forced to be geographically distributed by lockdowns. The VentMon is a modular, open source, IoT-enabled tester that plugs into a standard 22 mm airway between a ventilator and a physical test lung to test any ventilator. The VentMon measures flow, pressure, fractional oxygen, humidity, and temperature. Data is stored and graphed at a data lake accessible to all devlopment team members, and, eventually, clinicians. The open source design of the VentMon, its firmware, and cloud-based software may allow it to be used as a component of modular ventilators to provide a clinical readout. The software system surrounding VentMon has been designed to be as modular and composable as possible. By combining new, openly published standards for data with composable and modifiable hardware, the VentMon forms the beginning of an open system or eco-system of ventilation devices and data. Thanks to grants, 20 VentMons have been given away free of charge to pandemic response teams building open source ventilators.",2021-04-22 +34906035,Effect of Chrysin on Endoplasmic Reticulum Stress in a Rat Model of Testicular Torsion.,"

Background

The purpose of this study was to evaluate the possible therapeutic effect of chrysin (CHS) on testicular torsion/detorsion (T/D) injury in vivo through the mechanisms of oxidative stress and endoplasmic reticulum stress (ERS).

Methods

Eighteen male rats were divided into three groups of six subjects in each group: control, T/D and T/D + CHS (100 mg/kg). To evaluate the degree of oxidative stress, tissue malondialdehyde (MDA), total oxidant status (TOS) and total antioxidant status (TAS) levels were determined using colorimetric methods, while tissue superoxide dismutase (SOD) levels were determined using an ELISA kit. To evaluate the degree of ERS, tissue glucose regulatory protein 78 (GRP78), activating transcription factor 6 (ATF6) and C/EBP homologous protein (CHOP) levels were determined using ELISA kits. Johnsen's testicle scoring system was used for histological evaluation.

Results

In the T/D group, it is determined that statistically significant decreasing in the levels of TAS, SOD and Johnsen score, and increasing in TOS, MDA, GRP78, ATF6 and CHOP levels compared to control group (p < 0.05). CHS administration statistically significantly restored this T/D-induced damage (p < 0.05).

Conclusion

This is the first study to show that CHS prevent T/D-induced testicular damage through its ERS inhibitor activity. More comprehensive studies are needed to understand the underlying mechanisms.Supplemental data for this article is available online at https://doi.org/10.1080/08941939.2021.2015489 .",2021-12-14 +34009375,miRTargetLink 2.0-interactive miRNA target gene and target pathway networks.,"Which genes, gene sets or pathways are regulated by certain miRNAs? Which miRNAs regulate a particular target gene or target pathway in a certain physiological context? Answering such common research questions can be time consuming and labor intensive. Especially for researchers without computational experience, the integration of different data sources, selection of the right parameters and concise visualization can be demanding. A comprehensive analysis should be central to present adequate answers to complex biological questions. With miRTargetLink 2.0, we develop an all-in-one solution for human, mouse and rat miRNA networks. Users input in the unidirectional search mode either a single gene, gene set or gene pathway, alternatively a single miRNA, a set of miRNAs or an miRNA pathway. Moreover, genes and miRNAs can jointly be provided to the tool in the bidirectional search mode. For the selected entities, interaction graphs are generated from different data sources and dynamically presented. Connected application programming interfaces (APIs) to the tailored enrichment tools miEAA and GeneTrail facilitate downstream analysis of pathways and context-annotated categories of network nodes. MiRTargetLink 2.0 is freely accessible at https://www.ccb.uni-saarland.de/mirtargetlink2.",2021-07-01 +33832071,Genomic analysis of pancreatic cancer reveals 3 molecular subtypes with different clinical outcomes.,"

Abstract

Pancreatic cancer has a very high mortality with a 5-year survival of <5%. The purpose of this study was to classify specific molecular subtypes associated with prognosis of pancreatic cancer using The Cancer Genome Atlas (TCGA) multiplatform genomic data.Multiplatform genomic data (N = 178), including gene expression, copy number alteration, and somatic mutation data, were obtained from cancer browser (https://genome-cancer.ucsc.edu, cohort: TCGA Pancreatic Cancer). Clinical data including survival results were analyzed. We also used validation cohort (GSE50827) to confirm the robustness of these molecular subtypes in pancreatic cancer.When we performed unsupervised clustering using TCGA gene expression data, we found three distinct molecular subtypes associated with different survival results. Copy number alteration and somatic mutation data showed different genomic patterns for these three subtypes. Ingenuity pathway analysis revealed that each subtype showed differentially altered pathways. Using each subtype-specific genes (200 were selected), we could predict molecular subtype in another cohort, confirming the robustness of these molecular subtypes of pancreatic cancer. Cox regression analysis revealed that molecular subtype is the only significant prognostic factor for pancreatic cancer (P = .042, 95% confidence interval 0.523-0.98).Genomic analysis of pancreatic cancer revealed 3 distinct molecular subtypes associated with different survival results. Using these subtype-specific genes and prediction model, we could predict molecular subtype associated with prognosis of pancreatic cancer.",2021-04-01 +34776377,Deep neural network model for highly accurate prediction of BODIPYs absorption.,"A possibility to accurately predict the absorption maximum wavelength of BODIPYs was investigated. We found that previously reported models had a low accuracy (40-57 nm) to predict BODIPYs due to the limited dataset sizes and/or number of BODIPYs (few hundreds). New models developed in this study were based on data of 6000-plus fluorescent dyes (including 4000-plus BODIPYs) and the deep neural network architecture. The high prediction accuracy (five-fold cross-validation room mean squared error (RMSE) of 18.4 nm) was obtained using a consensus model, which was more accurate than individual models. This model provided the excellent accuracy (RMSE of 8 nm) for molecules previously synthesized in our laboratory as well as for prospective validation of three new BODIPYs. We found that solvent properties did not significantly influence the model accuracy since only few BODIPYs exhibited solvatochromism. The analysis of large prediction errors suggested that compounds able to have intermolecular interactions with solvent or salts were likely to be incorrectly predicted. The consensus model is freely available at https://ochem.eu/article/134921 and can help the other researchers to accelerate design of new dyes with desired properties.",2021-11-03 +35082975,COVID-19: A Vaccine Priority Index Mapping Tool for Rapidly Assessing Priority Populations in North Carolina.,"

Background

The initial limited supply of COVID-19 vaccine in the U.S. presented significant allocation, distribution, and delivery challenges. Information that can assist health officials, hospital administrators and other decision makers with readily identifying who and where to target vaccine resources and efforts can improve public health response.

Objective

The objective of this project was to develop a publicly available geographical information system (GIS) web mapping tool that would assist North Carolina health officials readily identify high-risk, high priority population groups and facilities in the immunization decision making process.

Methods

Publicly available data were used to identify 14 key health and socio-demographic variables and 5 differing themes (social and economic status; minority status and language; housing situation; at risk population; and health status). Vaccine priority population index (VPI) scores were created by calculating a percentile rank for each variable over each N.C. Census tract. All Census tracts (N = 2,195) values were ranked from lowest to highest (0.0 to 1.0) with a non-zero population and mapped using ArcGIS.

Results

The VPI tool was made publicly available (https://enchealth.org/) during the pandemic to readily assist with identifying high risk population priority areas in N.C. for the planning, distribution, and delivery of COVID-19 vaccine.

Discussion

While health officials may have benefitted by using the VPI tool during the pandemic, a more formal evaluation process is needed to fully assess its usefulness, functionality, and limitations.

Conclusion

When considering COVID-19 immunization efforts, the VPI tool can serve as an added component in the decision-making process.",2021-12-24 +32903775,"Analyzing and Decoding Natural Reach-and-Grasp Actions Using Gel, Water and Dry EEG Systems.","Reaching and grasping is an essential part of everybody's life, it allows meaningful interaction with the environment and is key to independent lifestyle. Recent electroencephalogram (EEG)-based studies have already shown that neural correlates of natural reach-and-grasp actions can be identified in the EEG. However, it is still in question whether these results obtained in a laboratory environment can make the transition to mobile applicable EEG systems for home use. In the current study, we investigated whether EEG-based correlates of natural reach-and-grasp actions can be successfully identified and decoded using mobile EEG systems, namely the water-based EEG-Versatile TM system and the dry-electrodes EEG-Hero TM headset. In addition, we also analyzed gel-based recordings obtained in a laboratory environment (g.USBamp/g.Ladybird, gold standard), which followed the same experimental parameters. For each recording system, 15 study participants performed 80 self-initiated reach-and-grasp actions toward a glass (palmar grasp) and a spoon (lateral grasp). Our results confirmed that EEG-based correlates of reach-and-grasp actions can be successfully identified using these mobile systems. In a single-trial multiclass-based decoding approach, which incorporated both movement conditions and rest, we could show that the low frequency time domain (LFTD) correlates were also decodable. Grand average peak accuracy calculated on unseen test data yielded for the water-based electrode system 62.3% (9.2% STD), whereas for the dry-electrodes headset reached 56.4% (8% STD). For the gel-based electrode system 61.3% (8.6% STD) could be achieved. To foster and promote further investigations in the field of EEG-based movement decoding, as well as to allow the interested community to make their own conclusions, we provide all datasets publicly available in the BNCI Horizon 2020 database (http://bnci-horizon-2020.eu/database/data-sets).",2020-08-12 +33973813,"First report of a 'Candidatus Phytoplasma aurantifolia'-related strain (16SrII-V) associated with phyllody, virescence, and shoot proliferation of sweet William (Dianthus barbatus) in Taiwan. ","The sweet William (Dianthus barbatus) is an ornamental belonging to the Caryophyllaceae family; the species produces clusters of flowers that comes in various colors and is grown commonly as garden plants (Lim 2014). In February 2021, sweet Williams showing symptoms typical of phytoplasma diseases were found in a garden located in Wufeng District, Taichung, Taiwan (24°04'37.6""N 120°43'20.4""E). Infected plants exhibited virescence and phyllody symptoms and produced an abnormal number of new shoots from the base of the flowers/flower-like structures (Figure S1) as well as the base of the plants. Among the fifteen plants grown in the area, two exhibited such symptoms. The two symptomatic plants, along with five symptomless plants were sampled. Two flower-like structures were collected from each of the symptomatic plants, and two flower samples were collected for each symptomless plant (Figure S2). Total DNA were extracted from each sample using the Synergy 2.0 Plant DNA Extraction Kit (OPS Diagnostics) and subjected to diagnostic PCR using primers P1/P7 (Schneider et al. 1995). All four symptomatic samples produced a 1.8-kb amplicon and the ten symptomless samples did not. The amplification products were diluted fifty-fold and used in a second round of PCR using primers R16F2n/R16R2 (Gundersen and Lee 1996). Again, only the symptomatic samples produced an expected 1.25-kb amplicon. A sample was selected for each plant and the PCR products from the first round of PCR were cloned using the pGEM-T Easy Vector System (Promega Inc.) and sequenced (three clones per sample). Fragments of the 16S rRNA gene (1,248 bp; GenBank accession: MW788688) were analyzed using iPhyClassifier (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi). Sequences obtained from the two infected plants were identical, and were classified to the 16SrII-V subgroup with similarity coefficients of 1.0; they also shared 98.6% similarity with the sequence of a 'Candidatus Phytoplasma aurantifolia' reference strain (accession: U15442). BLASTn results indicated that the 16S rRNA gene sequences detected were identical to those of 16SrII-V phytoplasmas affecting mungbean (accession: MW319764), lilac tasselflower (accession: MT420682), peanut (accession: JX403944) and green manure soybean (accession: MW393690) found in Taiwan. To corroborate the above results, 16SrII group-specific primers were used to conduct nested and semi-nested PCR targeting the pathogen's 16S rRNA gene (outer primers: rpF1C/rp(I)R1A; inner primers: rp(II)F1/rp(II)R1; Martini et al. 2007) and immunodominant membrane protein gene (imp; outer primers: IMP-II-F1/IMP-II-R1; inner primers: IMP-II-F2/IMP-II-R1; Al-Subhi et al. 2017). In both assays, the symptomatic samples produced the expected amplicons and the symptomless samples did not. The coding sequence of the imp gene (519 bp; accession: MW755353) was the same among all symptomatic samples, and shared 100% identity with that of the peanut witches'-broom phytoplasma (16SrII; accession: GU214176). To our knowledge, this is the first report of a 16SrII-V phytoplasma infecting sweet Williams in Taiwan. Since 16SrII-V phytoplasmas have also been found infecting mungbeans and peanuts in Taiwan (Liu et al. 2015), the findings here suggest that by serving as a natural host in the field, the sweet William may potentially contribute to the spread of 16SrII-V phytoplasmas to food crops.",2021-05-11 +33522661,The Open Brain Consent: Informing research participants and obtaining consent to share brain imaging data.,"Having the means to share research data openly is essential to modern science. For human research, a key aspect in this endeavor is obtaining consent from participants, not just to take part in a study, which is a basic ethical principle, but also to share their data with the scientific community. To ensure that the participants' privacy is respected, national and/or supranational regulations and laws are in place. It is, however, not always clear to researchers what the implications of those are, nor how to comply with them. The Open Brain Consent (https://open-brain-consent.readthedocs.io) is an international initiative that aims to provide researchers in the brain imaging community with information about data sharing options and tools. We present here a short history of this project and its latest developments, and share pointers to consent forms, including a template consent form that is compliant with the EU general data protection regulation. We also share pointers to an associated data user agreement that is not only useful in the EU context, but also for any researchers dealing with personal (clinical) data elsewhere.",2021-02-01 +31015229,The Cancer Editome Atlas: A Resource for Exploratory Analysis of the Adenosine-to-Inosine RNA Editome in Cancer.,"Increasing evidence has suggested a role for adenosine-to-inosine RNA editing in carcinogenesis. However, the clinical utility of RNA editing remains limited because functions of the vast majority of editing events remain largely unexplored. To help the cancer research community investigate functional consequences of individual editing events, we have developed a user-friendly bioinformatic resource, The Cancer Editome Atlas (TCEA; http://tcea.tmu.edu.tw). TCEA characterizes >192 million editing events at >4.6 million editing sites from approximately 11,000 samples across 33 cancer types in The Cancer Genome Atlas. Clinical information, miRNA expression, and alteration in miRNA targeting modulated through RNA editing are also integrated into TCEA. TCEA supports several modules to search, analyze, and visualize the cancer editome, providing a solid basis for investigating the oncogenic mechanisms of RNA editing and expediting the identification of therapeutic targets in cancer. SIGNIFICANCE: This user-friendly bioinformatic resource reduces the barrier to analyzing the huge and complex cancer RNA editome that cancer researchers face and facilitates the identification of novel therapeutic targets in cancer.",2019-04-23 +34143733,Unsupervised Domain Adaptation With Variational Approximation for Cardiac Segmentation.,"Unsupervised domain adaptation is useful in medical image segmentation. Particularly, when ground truths of the target images are not available, domain adaptation can train a target-specific model by utilizing the existing labeled images from other modalities. Most of the reported works mapped images of both the source and target domains into a common latent feature space, and then reduced their discrepancy either implicitly with adversarial training or explicitly by directly minimizing a discrepancy metric. In this work, we propose a new framework, where the latent features of both domains are driven towards a common and parameterized variational form, whose conditional distribution given the image is Gaussian. This is achieved by two networks based on variational auto-encoders (VAEs) and a regularization for this variational approximation. Both of the VAEs, each for one domain, contain a segmentation module, where the source segmentation is trained in a supervised manner, while the target one is trained unsupervisedly. We validated the proposed domain adaptation method using two cardiac segmentation tasks, i.e., the cross-modality (CT and MR) whole heart segmentation and the cross-sequence cardiac MR segmentation. Results show that the proposed method achieved better accuracies compared to two state-of-the-art approaches and demonstrated good potential for cardiac segmentation. Furthermore, the proposed explicit regularization was shown to be effective and efficient in narrowing down the distribution gap between domains, which is useful for unsupervised domain adaptation. The code and data have been released via https://zmiclab.github.io/projects.html.",2021-11-30 +29776332,TelNet - a database for human and yeast genes involved in telomere maintenance.,"

Background

The ends of linear chromosomes, the telomeres, comprise repetitive DNA sequences in complex with proteins that protects them from being processed by the DNA repair machinery. Cancer cells need to counteract the shortening of telomere repeats during replication for their unlimited proliferation by reactivating the reverse transcriptase telomerase or by using the alternative lengthening of telomeres (ALT) pathway. The different telomere maintenance (TM) mechanisms appear to involve hundreds of proteins but their telomere repeat length related activities are only partly understood. Currently, a database that integrates information on TM relevant genes is missing.

Description

To provide a resource for studies that dissect TM features, we here introduce the TelNet database at http://www.cancertelsys.org/telnet/ . It offers a comprehensive compilation of more than 2000 human and 1100 yeast genes linked to telomere maintenance. These genes were annotated in terms of TM mechanism, associated specific functions and orthologous genes, a TM significance score and information from peer-reviewed literature. This TM information can be retrieved via different search and view modes and evaluated for a set of genes as demonstrated for an exemplary application.

Conclusion

TelNet supports the annotation of genes identified from bioinformatics analysis pipelines to reveal possible connections with TM networks. We anticipate that TelNet will be a helpful resource for researchers that study telomeres.",2018-05-18 +28529077,PLMD: An updated data resource of protein lysine modifications.,"Post-translational modifications (PTMs) occurring at protein lysine residues, or protein lysine modifications (PLMs), play critical roles in regulating biological processes. Due to the explosive expansion of the amount of PLM substrates and the discovery of novel PLM types, here we greatly updated our previous studies, and presented a much more integrative resource of protein lysine modification database (PLMD). In PLMD, we totally collected and integrated 284,780 modification events in 53,501 proteins across 176 eukaryotes and prokaryotes for up to 20 types of PLMs, including ubiquitination, acetylation, sumoylation, methylation, succinylation, malonylation, glutarylation, glycation, formylation, hydroxylation, butyrylation, propionylation, crotonylation, pupylation, neddylation, 2-hydroxyisobutyrylation, phosphoglycerylation, carboxylation, lipoylation and biotinylation. Using the data set, a motif-based analysis was performed for each PLM type, and the results demonstrated that different PLM types preferentially recognize distinct sequence motifs for the modifications. Moreover, various PLMs synergistically orchestrate specific cellular biological processes by mutual crosstalks with each other, and we totally found 65,297 PLM events involved in 90 types of PLM co-occurrences on the same lysine residues. Finally, various options were provided for accessing the data, while original references and other annotations were also present for each PLM substrate. Taken together, we anticipated the PLMD database can serve as a useful resource for further researches of PLMs. PLMD 3.0 was implemented in PHP + MySQL and freely available at http://plmd.biocuckoo.org.",2017-05-03 +30682850,NP-Scout: Machine Learning Approach for the Quantification and Visualization of the Natural Product-Likeness of Small Molecules. ,"Natural products (NPs) remain the most prolific resource for the development of smallmolecule drugs. Here we report a new machine learning approach that allows the identification of natural products with high accuracy. The method also generates similarity maps, which highlight atoms that contribute significantly to the classification of small molecules as a natural product or synthetic molecule. The method can hence be utilized to (i) identify natural products in large molecular libraries, (ii) quantify the natural product-likeness of small molecules, and (iii) visualize atoms in small molecules that are characteristic of natural products or synthetic molecules. The models are based on random forest classifiers trained on data sets consisting of more than 265,000 to 322,000 natural products and synthetic molecules. Two-dimensional molecular descriptors, MACCS keys and Morgan2 fingerprints were explored. On an independent test set the models reached areas under the receiver operating characteristic curve (AUC) of 0.997 and Matthews correlation coefficients (MCCs) of 0.954 and higher. The method was further tested on data from the Dictionary of Natural Products, ChEMBL and other resources. The best-performing models are accessible as a free web service at http://npscout.zbh.uni-hamburg.de/npscout.",2019-01-24 +34499117,INTERCAAT: identifying interface residues between macromolecules.,"

Summary

The Interface Contact definition with Adaptable Atom Types (INTERCAAT) was developed to determine the atomic interactions between molecules that form a known three dimensional structure. First, INTERCAAT creates a Voronoi tessellation where each atom acts as a seed. Interactions are defined by atoms that share a hyperplane and whose distance is less than the sum of each atoms' Van der Waals radii plus the diameter of a solvent molecule. Interacting atoms are then classified and interactions are filtered based on compatibility. INTERCAAT implements an adaptive atom classification method; therefore, it can explore interfaces between a variety macromolecules.

Availability

Source code is freely available at (https://gitlab.com/fiserlab.org/intercaat).

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-09-09 +29206899,MOSAIC: a chemical-genetic interaction data repository and web resource for exploring chemical modes of action.,"Summary:Chemical-genomic approaches that map interactions between small molecules and genetic perturbations offer a promising strategy for functional annotation of uncharacterized bioactive compounds. We recently developed a new high-throughput platform for mapping chemical-genetic (CG) interactions in yeast that can be scaled to screen large compound collections, and we applied this system to generate CG interaction profiles for more than 13 000 compounds. When integrated with the existing global yeast genetic interaction network, CG interaction profiles can enable mode-of-action prediction for previously uncharacterized compounds as well as discover unexpected secondary effects for known drugs. To facilitate future analysis of these valuable data, we developed a public database and web interface named MOSAIC. The website provides a convenient interface for querying compounds, bioprocesses (Gene Ontology terms) and genes for CG information including direct CG interactions, bioprocesses and gene-level target predictions. MOSAIC also provides access to chemical structure information of screened molecules, chemical-genomic profiles and the ability to search for compounds sharing structural and functional similarity. This resource will be of interest to chemical biologists for discovering new small molecule probes with specific modes-of-action as well as computational biologists interested in analysing CG interaction networks. Availability and implementation:MOSAIC is available at http://mosaic.cs.umn.edu. Contact:hisyo@riken.jp, yoshidam@riken.jp, charlie.boone@utoronto.ca or chadm@umn.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +34412673,Easy-Prime: a machine learning-based prime editor design tool.,"Prime editing is a revolutionary genome-editing technology that can make a wide range of precise edits in DNA. However, designing highly efficient prime editors (PEs) remains challenging. We develop Easy-Prime, a machine learning-based program trained with multiple published data sources. Easy-Prime captures both known and novel features, such as RNA folding structure, and optimizes feature combinations to improve editing efficiency. We provide optimized PE design for installation of 89.5% of 152,351 GWAS variants. Easy-Prime is available both as a command line tool and an interactive PE design server at: http://easy-prime.cc/ .",2021-08-19 +34389682,A polyyne toxin produced by an antagonistic bacterium blinds and lyses a Chlamydomonad alga. ,"Algae are key contributors to global carbon fixation and form the basis of many food webs. In nature, their growth is often supported or suppressed by microorganisms. The bacterium Pseudomonas protegens Pf-5 arrests the growth of the green unicellular alga Chlamydomonas reinhardtii, deflagellates the alga by the cyclic lipopeptide orfamide A, and alters its morphology [P. Aiyar et al., Nat. Commun. 8, 1756 (2017)]. Using a combination of Raman microspectroscopy, genome mining, and mutational analysis, we discovered a polyyne toxin, protegencin, which is secreted by P. protegens, penetrates the algal cells, and causes destruction of the carotenoids of their primitive visual system, the eyespot. Together with secreted orfamide A, protegencin thus prevents the phototactic behavior of C. reinhardtii A mutant of P. protegens deficient in protegencin production does not affect growth or eyespot carotenoids of C. reinhardtii Protegencin acts in a direct and destructive way by lysing and killing the algal cells. The toxic effect of protegencin is also observed in an eyeless mutant and with the colony-forming Chlorophyte alga Gonium pectorale These data reveal a two-pronged molecular strategy involving a cyclic lipopeptide and a conjugated tetrayne used by bacteria to attack select Chlamydomonad algae. In conjunction with the bloom-forming activity of several chlorophytes and the presence of the protegencin gene cluster in over 50 different Pseudomonas genomes [A. J. Mullins et al., bioRxiv [Preprint] (2021). https://www.biorxiv.org/content/10.1101/2021.03.05.433886v1 (Accessed 17 April 2021)], these data are highly relevant to ecological interactions between Chlorophyte algae and Pseudomonadales bacteria.",2021-08-01 +31565697,Global spatio-temporally harmonised datasets for producing high-resolution gridded population distribution datasets.,"Multi-temporal, globally consistent, high-resolution human population datasets provide consistent and comparable population distributions in support of mapping sub-national heterogeneities in health, wealth, and resource access, and monitoring change in these over time. The production of more reliable and spatially detailed population datasets is increasingly necessary due to the importance of improving metrics at sub-national and multi-temporal scales. This is in support of measurement and monitoring of UN Sustainable Development Goals and related agendas. In response to these agendas, a method has been developed to assemble and harmonise a unique, open access, archive of geospatial datasets. Datasets are provided as global, annual time series, where pertinent at the timescale of population analyses and where data is available, for use in the construction of population distribution layers. The archive includes sub-national census-based population estimates, matched to a geospatial layer denoting administrative unit boundaries, and a number of co-registered gridded geospatial factors that correlate strongly with population presence and density. Here, we describe these harmonised datasets and their limitations, along with the production workflow. Further, we demonstrate applications of the archive by producing multi-temporal gridded population outputs for Africa and using these to derive health and development metrics. The geospatial archive is available at https://doi.org/10.5258/SOTON/WP00650.",2019-06-18 +34390706,Racial Differences in Adherence to Lung Cancer Screening Follow-up: A Systematic Review and Meta-analysis.,"

Background

In 2013, the United States Preventive Services Taskforce instituted recommendations for annual lung cancer screening (LCS) with low-dose chest CT imaging for high-risk individuals. LCS reduces lung cancer mortality, with greater reduction observed in Black participants in clinical trials. Although racial disparities in lung cancer mortality have been well documented, less is known about disparities in LCS participation and adherence to follow-up in clinical practice.

Research question

What is the association between race and adherence to LCS follow-up?

Study design and methods

A systematic review was conducted through a search of published studies in MEDLINE, PubMed, EMBASE, Web of Science, and Cumulative Index to Nursing and Allied Health Literature Database from database inception through October 2020. We included studies that examined rates of adherence to LCS follow-up and compared rates by race. Studies were pooled using random-effects meta-analysis.

Results

We screened 18,300 titles and abstracts, and 229 studies were selected for full-text review. Nine studies met inclusion criteria; seven were included in the meta-analysis. Median adherent follow-up rate was 37% (range, 16%-82%). Notable differences among the studies included the proportion of the Black population (range, 4%-47%) and the structure of the LCS programs. The meta-analyses showed lower adherence to LCS follow-up in the Black population (OR, 0.67; 95% CI, 0.55-0.80). This disparity persisted across all malignancy risk levels determined by initial screening results.

Interpretation

Lower adherence to LCS follow-up in Black compared with White patients occurs despite the higher potential lung cancer mortality benefit. Literature specifically addressing race-related barriers to LCS adherence remains limited. To ensure equity in LCS benefits, greater outreach to eligible Black patients should be implemented through increased physician education and use of screening program coordinators to focus on this patient population.

Trial registry

PROSPERO; No.: CRD42020214213; URL: http://www.crd.york.ac.uk/PROSPERO.",2021-08-12 +29297316,A database of human genes and a gene network involved in response to tick-borne encephalitis virus infection.,"BACKGROUND:Tick-borne encephalitis is caused by the neurotropic, positive-sense RNA virus, tick-borne encephalitis virus (TBEV). TBEV infection can lead to a variety of clinical manifestations ranging from slight fever to severe neurological illness. Very little is known about genetic factors predisposing to severe forms of disease caused by TBEV. The aims of the study were to compile a catalog of human genes involved in response to TBEV infection and to rank genes from the catalog based on the number of neighbors in the network of pairwise interactions involving these genes and TBEV RNA or proteins. RESULTS:Based on manual review and curation of scientific publications a catalog comprising 140 human genes involved in response to TBEV infection was developed. To provide access to data on all genes, the TBEVhostDB web resource ( http://icg.nsc.ru/TBEVHostDB/ ) was created. We reconstructed a network formed by pairwise interactions between TBEV virion itself, viral RNA and viral proteins and 140 genes/proteins from TBEVHostDB. Genes were ranked according to the number of interactions in the network. Two genes/proteins (CCR5 and IFNAR1) that had maximal number of interactions were revealed. It was found that the subnetworks formed by CCR5 and IFNAR1 and their neighbors were a fragments of two key pathways functioning during the course of tick-borne encephalitis: (1) the attenuation of interferon-I signaling pathway by the TBEV NS5 protein that targeted peptidase D; (2) proinflammation and tissue damage pathway triggered by chemokine receptor CCR5 interacting with CD4, CCL3, CCL4, CCL2. Among nine genes associated with severe forms of TBEV infection, three genes/proteins (CCR5, IL10, ARID1B) were found to have protein-protein interactions within the network, and two genes/proteins (IFNL3 and the IL10, that was just mentioned) were up- or down-regulated in response to TBEV infection. Based on this finding, potential mechanisms for participation of CCR5, IL10, ARID1B, and IFNL3 in the host response to TBEV infection were suggested. CONCLUSIONS:A database comprising 140 human genes involved in response to TBEV infection was compiled and the TBEVHostDB web resource, providing access to all genes was created. This is the first effort of integrating and unifying data on genetic factors that may predispose to severe forms of diseases caused by TBEV. The TBEVHostDB could potentially be used for assessment of risk factors for severe forms of tick-borne encephalitis and for the design of personalized pharmacological strategies for the treatment of TBEV infection.",2017-12-28 +32733904,Overexpression of DGKI in Gastric Cancer Predicts Poor Prognosis.,"Background: Diacylglycerol kinase iota (DGKI) is overexpressed in a variety of cancers and is associated with poor prognosis in colon cancer. This study evaluated the prognostic value of DGKI in gastric cancer (GC) using data from The Cancer Genome Atlas (TCGA). Methods: RNA sequencing results and clinical data of gastric adenoma and adenocarcinoma samples were obtained from the TCGA database (https://portal.gdc.cancer.gov). The Wilcoxon or Kruskal-Wallis test and logistic regression were used to analyze the relationship between DGKI and the clinicopathological characteristics of GC patients. Univariate Cox regression and Kaplan-Meier analysis were used to analyze the clinicopathological characteristics of GC patients and the relationship between DGKI and overall survival time, and multivariate Cox regression analysis was used to identify independent risk factors affecting the prognosis of GC patients. Gene set enrichment analysis (GSEA) was performed using the TCGA dataset. Results: DGKI was overexpressed in gastric tumors and was related to poor prognosis (p = 0.003). Overexpression of DGKI in GC was significantly correlated with high grade (OR = 1.71 for G3 vs. G2), stage (OR = 2.08 for II vs. I) and T classification (OR = 4.64 for T4 vs. T1; OR = 3.99 for T3 vs. T1; OR = 3.37 for T2 vs. T1) (all p <0.05). DGKI (OR = 7.34; p = 0.000) was an independent risk factor affecting the survival of GC patients. The MAPK signaling pathway was differentially enriched with DGKI overexpression. Conclusion: DGKI overexpression may be a potential molecular marker for poor prognosis in GC. The MAPK signaling pathway may be one of the key pathways related to DGKI regulation in GC.",2020-07-07 +34654818,A metabolome atlas of the aging mouse brain.,"The mammalian brain relies on neurochemistry to fulfill its functions. Yet, the complexity of the brain metabolome and its changes during diseases or aging remain poorly understood. Here, we generate a metabolome atlas of the aging wildtype mouse brain from 10 anatomical regions spanning from adolescence to old age. We combine data from three assays and structurally annotate 1,547 metabolites. Almost all metabolites significantly differ between brain regions or age groups, but not by sex. A shift in sphingolipid patterns during aging related to myelin remodeling is accompanied by large changes in other metabolic pathways. Functionally related brain regions (brain stem, cerebrum and cerebellum) are also metabolically similar. In cerebrum, metabolic correlations markedly weaken between adolescence and adulthood, whereas at old age, cross-region correlation patterns reflect decreased brain segregation. We show that metabolic changes can be mapped to existing gene and protein brain atlases. The brain metabolome atlas is publicly available ( https://mouse.atlas.metabolomics.us/ ) and serves as a foundation dataset for future metabolomic studies.",2021-10-15 +32128557,Incorporation of a unified protein abundance dataset into the Saccharomyces genome database. ,"The identification and accurate quantitation of protein abundance has been a major objective of proteomics research. Abundance studies have the potential to provide users with data that can be used to gain a deeper understanding of protein function and regulation and can also help identify cellular pathways and modules that operate under various environmental stress conditions. One of the central missions of the Saccharomyces Genome Database (SGD; https://www.yeastgenome.org) is to work with researchers to identify and incorporate datasets of interest to the wider scientific community, thereby enabling hypothesis-driven research. A large number of studies have detailed efforts to generate proteome-wide abundance data, but deeper analyses of these data have been hampered by the inability to compare results between studies. Recently, a unified protein abundance dataset was generated through the evaluation of more than 20 abundance datasets, which were normalized and converted to common measurement units, in this case molecules per cell. We have incorporated these normalized protein abundance data and associated metadata into the SGD database, as well as the SGD YeastMine data warehouse, resulting in the addition of 56 487 values for untreated cells grown in either rich or defined media and 28 335 values for cells treated with environmental stressors. Abundance data for protein-coding genes are displayed in a sortable, filterable table on Protein pages, available through Locus Summary pages. A median abundance value was incorporated, and a median absolute deviation was calculated for each protein-coding gene and incorporated into SGD. These values are displayed in the Protein section of the Locus Summary page. The inclusion of these data has enhanced the quality and quantity of protein experimental information presented at SGD and provides opportunities for researchers to access and utilize the data to further their research.",2020-01-01 +34719864,Improved Protein Structure Prediction Using a New Multi-Scale Network and Homologous Templates.,"The accuracy of de novo protein structure prediction has been improved considerably in recent years, mostly due to the introduction of deep learning techniques. In this work, trRosettaX, an improved version of trRosetta for protein structure prediction is presented. The major improvement over trRosetta consists of two folds. The first is the application of a new multi-scale network, i.e., Res2Net, for improved prediction of inter-residue geometries, including distance and orientations. The second is an attention-based module to exploit multiple homologous templates to increase the accuracy further. Compared with trRosetta, trRosettaX improves the contact precision by 6% and 8% on the free modeling targets of CASP13 and CASP14, respectively. A preliminary version of trRosettaX is ranked as one of the top server groups in CASP14's blind test. Additional benchmark test on 161 targets from CAMEO (between Jun and Sep 2020) shows that trRosettaX achieves an average TM-score ≈0.8, outperforming the top groups in CAMEO. These data suggest the effectiveness of using the multi-scale network and the benefit of incorporating homologous templates into the network. The trRosettaX algorithm is incorporated into the trRosetta server since Nov 2020. The web server, the training and inference codes are available at: https://yanglab.nankai.edu.cn/trRosetta/.",2021-10-31 +33748796,Appyters: Turning Jupyter Notebooks into data-driven web apps.,"Jupyter Notebooks have transformed the communication of data analysis pipelines by facilitating a modular structure that brings together code, markdown text, and interactive visualizations. Here, we extended Jupyter Notebooks to broaden their accessibility with Appyters. Appyters turn Jupyter Notebooks into fully functional standalone web-based bioinformatics applications. Appyters present to users an entry form enabling them to upload their data and set various parameters for a multitude of data analysis workflows. Once the form is filled, the Appyter executes the corresponding notebook in the cloud, producing the output without requiring the user to interact directly with the code. Appyters were used to create many bioinformatics web-based reusable workflows, including applications to build customized machine learning pipelines, analyze omics data, and produce publishable figures. These Appyters are served in the Appyters Catalog at https://appyters.maayanlab.cloud. In summary, Appyters enable the rapid development of interactive web-based bioinformatics applications.",2021-03-04 +29168245,Homology-based hydrogen bond information improves crystallographic structures in the PDB.,"The Protein Data Bank (PDB) is the global archive for structural information on macromolecules, and a popular resource for researchers, teachers, and students, amassing more than one million unique users each year. Crystallographic structure models in the PDB (more than 100,000 entries) are optimized against the crystal diffraction data and geometrical restraints. This process of crystallographic refinement typically ignored hydrogen bond (H-bond) distances as a source of information. However, H-bond restraints can improve structures at low resolution where diffraction data are limited. To improve low-resolution structure refinement, we present methods for deriving H-bond information either globally from well-refined high-resolution structures from the PDB-REDO databank, or specifically from on-the-fly constructed sets of homologous high-resolution structures. Refinement incorporating HOmology DErived Restraints (HODER), improves geometrical quality and the fit to the diffraction data for many low-resolution structures. To make these improvements readily available to the general public, we applied our new algorithms to all crystallographic structures in the PDB: using massively parallel computing, we constructed a new instance of the PDB-REDO databank (https://pdb-redo.eu). This resource is useful for researchers to gain insight on individual structures, on specific protein families (as we demonstrate with examples), and on general features of protein structure using data mining approaches on a uniformly treated dataset.",2017-12-08 +34389508,Fast and Accurate Ophthalmic Medication Bottle Identification Using Deep Learning on a Smartphone Device.,"

Purpose

To assess the accuracy and efficacy of deep learning models, specifically convolutional neural networks (CNNs), to identify glaucoma medication bottles.

Design

Algorithm development for predicting ophthalmic medication bottles using a large mobile image-based dataset.

Participants

A total of 3750 mobile images of 5 ophthalmic medication bottles were included: brimonidine tartrate, dorzolamide-timolol, latanoprost, prednisolone acetate, and moxifloxacin.

Methods

Seven CNN models were initially pretrained on a large-scale image database and subsequently retrained to classify 5 commonly prescribed topical ophthalmic medications using a training dataset of 2250 mobile-phone captured images. The retrained CNN models' accuracies were compared using k-fold cross-validation (k = 10). The top 2 performing CNN models were then embedded into separate iOS apps and evaluated using 1500 mobile images not included in the training dataset.

Main outcome measures

Prediction accuracy, image processing time.

Results

Of the 7 CNN architectures, MobileNet v2 yielded the highest k-fold cross-validation accuracy of 0.974 (95% confidence interval [CI], 0.966-0.980) and the shortest average image processing time at 3.45 (95% CI, 3.13-3.77) sec/image. ResNet V2 had the second highest accuracy of 0.961 (95% CI, 0.952-0.969). When the 2 app-embedded CNNs were compared, in terms of accuracy, MobileNet V2, with an image prediction accuracy of 0.86 (95% CI, 0.84-0.88), was significantly greater than ResNet V2, 0.68 (95% CI, 0.66-0.71) (Table 1). Sensitivities and specificities varied between medications (Table 1). There was no significant difference in average imaging processing time, 0.32 (95% CI, 0.28-0.36) sec/image and 0.31 (95% CI, 0.29-0.33) sec/image for MobileNet V2 and ResNet V2, respectively. Information on beta-testing of the iOS app can be found here: https://lin.hs.uci.edu/research/.

Conclusions

We have retrained MobileNet V2 to accurately identify ophthalmic medication bottles and demonstrated that this neural network can operate in a smartphone environment. This work serves as a proof-of-concept for the production of a CNN-based smartphone application to empower patients by decreasing risk for error.",2021-08-11 +34378957,Revisiting Species Identification within the Enterobacter cloacae Complex by Matrix-Assisted Laser Desorption Ionization-Time of Flight Mass Spectrometry.,"Matrix-assisted laser desorption ionization-time of flight mass spectrometry (MALDI-TOF MS) is commonly used by clinical microbiology laboratories to identify pathogens, despite some limitations of the technique. The Enterobacter cloacae complex (ECC) taxonomy has recently been expanded, leading to uncertain identification of some species within the ECC when commercial MALDI-TOF MS is used. This technique is especially unsuited in the case of E. hormaechei, the main species responsible for infections and one of the most prone, within the ECC, to acquire antibiotic resistance. Hence, rapid and reliable identification at the species level could improve patient management. Here, we evaluated the performance of the Bruker Microflex MALDI-TOF MS instrument to identify ECC isolates using two databases and algorithms in comparison to the hsp60 gene sequencing reference method: the Bruker database included in the MALDI Biotyper software and an extensive online database coupled to an original Mass Spectrometric Identification (MSI) algorithm. Among a panel of 94 ECC isolates tested in triplicate, the online database coupled to MSI software allowed the highest rate of identification at the species level (92%) compared to the MALDI Biotyper database (25%), especially for the species E. hormaechei (97% versus 20%). We show that by creating a database of MALDI-TOF reference spectral profiles with a high number of representatives associated with the performant MSI software, we were able to substantially improve the identification of the E. cloacae complex members, with only 8% of isolates misidentified at the species level. This online database is available through a free online MSI application (https://msi.happy-dev.fr/). IMPORTANCE Creation of a database of MALDI-TOF reference spectral profiles with a high number of representatives associated with the performant MSI software enables substantial improvement in identification of E. cloacae complex members. Moreover, this online database is available through a free online MSI application (https://msi.happy-dev.fr/).",2021-08-11 +34458333,Diabetes Promotes Retinal Vascular Endothelial Cell Injury by Inducing CCN1 Expression.,"Purpose: Diabetic retinopathy (DR) is one of the most common diabetic microvascular complications. However, the pathogenesis of DR has not yet been fully elucidated. This study aimed to discover novel and key molecules involved in the pathogenesis of DR, which could potentially be targets for therapeutic DR intervention. Methods: To identify potential genes involved in the pathogenesis of DR, we analyzed the public database of neovascular membranes (NVMs) from patients with proliferative diabetic retinopathy (PDR) and healthy controls (HCs) (GSE102485, https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE102485). Further, we compared these findings by performing RNA-sequencing analysis of peripheral blood mononuclear cells (PBMC) from patients with DR, control patients with non-complicated diabetes mellitus (DMC), and HCs. To determine the critical role of candidate genes in DR, knockdown or knockout was performed in human retinal vascular endothelial cells (HRVECs). The oxidative stress pathway, as well as tight junction integrity, was analyzed. Results: Transcriptional profiles showed distinct patterns between the NVMs of patients with DR and those of the HCs. Those genes enriched in either extracellular matrix (ECM)-receptor interaction or focal adhesion pathways were considerably upregulated. Both pathways were important for maintaining the integrity of retinal vascular structure and function. Importantly, the gene encoding the matricellular protein CCN1, a key gene in cell physiology, was differentially expressed in both pathways. Knockdown of CCN1 by small interfering RNA (siRNA) or knockout of CCN1 by the CRISPR-Cas9 technique in HRVECs significantly increased the levels of VE-cadherin, reduced the level of NADPH oxidase 4 (NOX4), and inhibited the generation of reactive oxygen species (ROS). Conclusion: The present study identifies CCN1 as an important regulator in the pathogenesis of DR. Increased expression of CCN1 stimulates oxidative stress and disrupts tight junction integrity in endothelial cells by inducing NOX4. Thus, targeting the CCN1/NOX4 axis provides a therapeutic strategy for treating DR by alleviating endothelial cell injury.",2021-08-11 +34340658,"Time to first birth and its determinants among married female youths in Ethiopia, 2020: survival analysis based on EDHS 2016.","

Introduction

The first birth is the most significant events in a woman's life that indicates the beginning of undertaking the intensive responsibilities of motherhood and childcare. Age at first birth has health, economic and social consequences and implications. But little has been known on the time to first birth and its determinants in Ethiopia. Therefore, this research is planned to address this issue.

Objectives of the study

To assess the time to first birth and its determinants among married female youths in Ethiopia, 2020.

Methods

The data was accessed freely through ( https://www.dhsprogram.com ). Survival analysis of time to first birth was done based on EDHS 2016 data among 2597 weighted study subjects. The data was extracted using STATA version 14.0. Kaplan Meier's survival and Log rank test were used to compare survival experiences of respondents using categorical variables. Proportional hazard assumption was checked and was not violated. Cox proportional hazard model was applied, hazard ratio with 95% CI was computed and variables with p value < 0.05 in the multivariable analysis were taken as significant determinants.

Results

Overall median survival time was 18 years (IQR = 17-20). The significant determinants of time to first birth are place of residence (being rural (AHR = 1.49, 95% CI 1.13, 1.97),Religion (being Muslim [AHR = 1.57, 95% CI 1.22, 2.02),being protestant (AHR = 1.73, 95% CI 1.34, 2.24)], age at first sex [first sex < 15 years (AHR = 1.68, 95% CI 1.23, 2.29)] and first sex between 15 and 17 years (AHR = 1.54, 95% CI 1.29, 1.85), age at first marriage (marriage < 15 years (AHR = 6.52, 95% CI 4.91, 8.64), marriage between 15 and 17 (AHR = 2.63, 95% CI 2.20, 3.14), unmet need for family planning (AHR = 1.23, 95% CI 1.00, 1.52) CONCLUSION: In this study, the median age at first birth was 18 years. This show, about 50% of study participants give birth for the first time before their 18th birth day. This age is the ideal age for schooling and to do other personal development activities. Therefore giving birth before 18 year will limit female youths from attending school and performing personal development activities in addition to health and demographic consequences of early child bearing.",2021-08-02 +29650251,Exploring the molecular mechanisms of Traditional Chinese Medicine components using gene expression signatures and connectivity map.,"BACKGROUND AND OBJECTIVE:Traditional Chinese Medicine (TCM) has been practiced over thousands of years in China and other Asian countries for treating various symptoms and diseases. However, the underlying molecular mechanisms of TCM are poorly understood, partly due to the ""multi-component, multi-target"" nature of TCM. To uncover the molecular mechanisms of TCM, we perform comprehensive gene expression analysis using connectivity map. METHODS:We interrogated gene expression signatures obtained 102 TCM components using the next generation Connectivity Map (CMap) resource. We performed systematic data mining and analysis on the mechanism of action (MoA) of these TCM components based on the CMap results. RESULTS:We clustered the 102 TCM components into four groups based on their MoAs using next generation CMap resource. We performed gene set enrichment analysis on these components to provide additional supports for explaining these molecular mechanisms. We also provided literature evidence to validate the MoAs identified through this bioinformatics analysis. Finally, we developed the Traditional Chinese Medicine Drug Repurposing Hub (TCM Hub) - a connectivity map resource to facilitate the elucidation of TCM MoA for drug repurposing research. TCMHub is freely available in http://tanlab.ucdenver.edu/TCMHub. CONCLUSIONS:Molecular mechanisms of TCM could be uncovered by using gene expression signatures and connectivity map. Through this analysis, we identified many of the TCM components possess diverse MoAs, this may explain the applications of TCM in treating various symptoms and diseases.",2018-04-04 +34497538,Development and Validation of a Prediction Model for Elevated Arterial Stiffness in Chinese Patients With Diabetes Using Machine Learning.,"

Background

Arterial stiffness assessed by pulse wave velocity is a major risk factor for cardiovascular diseases. The incidence of cardiovascular events remains high in diabetics. However, a clinical prediction model for elevated arterial stiffness using machine learning to identify subjects consequently at higher risk remains to be developed.

Methods

Least absolute shrinkage and selection operator and support vector machine-recursive feature elimination were used for feature selection. Four machine learning algorithms were used to construct a prediction model, and their performance was compared based on the area under the receiver operating characteristic curve metric in a discovery dataset (n = 760). The model with the best performance was selected and validated in an independent dataset (n = 912) from the Dryad Digital Repository (https://doi.org/10.5061/dryad.m484p). To apply our model to clinical practice, we built a free and user-friendly web online tool.

Results

The predictive model includes the predictors: age, systolic blood pressure, diastolic blood pressure, and body mass index. In the discovery cohort, the gradient boosting-based model outperformed other methods in the elevated arterial stiffness prediction. In the validation cohort, the gradient boosting model showed a good discrimination capacity. A cutoff value of 0.46 for the elevated arterial stiffness risk score in the gradient boosting model resulted in a good specificity (0.813 in the discovery data and 0.761 in the validation data) and sensitivity (0.875 and 0.738, respectively) trade-off points.

Conclusion

The gradient boosting-based prediction system presents a good classification in elevated arterial stiffness prediction. The web online tool makes our gradient boosting-based model easily accessible for further clinical studies and utilization.",2021-08-23 +33228746,Extracorporeal shock wave therapy versus local corticosteroid injection for the treatment of carpal tunnel syndrome: a meta-analysis.,"

Background

Many studies have demonstrated the effectiveness of extracorporeal shock wave therapy (ESWT) and local corticosteroid injection (LCI) for the treatment of carpal tunnel syndrome (CTS), and some studies showed that the effect of ESWT was superior to LCI. We performed this meta-analysis to compare the clinical effects across the two therapies.

Methods

Relevant randomized controlled trials (RCTs) comparing ESWT and LCI for the treatment of CTS were searched in electronic database. The Cochrane risk bias tool was used for quality assessment. After data extraction and quality assessment of the included studies, a meta-analysis was performed using RevMan 5.3 software. Mean differences (MDs), odds ratios (ORs), and 95% confidence intervals (CIs) were analyzed. The protocol for this systematic review was registered on INPLASY (202080025) and is available in full on the inplasy.com ( https://doi.org/10.37766/inplasy2020.8.0025 ) RESULTS: A total of 5 RCT studies with 204 patients were included from the electronic database. The meta-analysis results showed that two therapies were not significantly different in terms of visual analog scale (VAS) score (P = 0.65), Boston Carpal Tunnel Questionnaire (BQ) score (P = 0.14), sensory distal latency (P = 0.66), and nerve conduction velocity (NCV) of the sensory nerve (P = 0.06). There were significant differences between the results of motor distal latency (P < 0.0001), compound muscle action potential (CMAP) amplitude (P < 0.00001), and sensory nerve action potential (SNAP) amplitude (P = 0.004).

Conclusions

In terms of pain relief and function improvement, the effects of ESWT and LCI are not significantly different. In terms of electrophysiological parameters, LCI has a stronger effect on shortening motor distal latency; ESWT is superior to LCI in improving action potential amplitude. ESWT is a noninvasive treatment with fewer complications and greater patient safety. In light of the heterogeneity and limitations, these conclusions require further research for definitive conclusions to be drawn.",2020-11-23 +32317856,"Quantitative data from six years (2013-2018) of light trap sampling of macromoths (Lepidoptera) in Mt. Hallasan National Park, South Korea.","

Background

This paper presents the results of long-term monitoring of macromoth communities in Mt. Hallasan National Park, South Korea. This mountain shows an altitudinal gradient of vegetation from evergreen deciduous to boreal trees, harbouring more than 550 species of vascular plants. The goal of this project was to investigate the changes in moth assemblages along the altitudinal gradient in this mountain ecosystem. We monitored macromoth communities at 11 sites in Mt. Hallasan National Park from 2013 to 2018, during which time moths were collected once a month from May to October, using an ultraviolet bucket trap. The generated dataset, which represented 587 species and 13,249 individuals from 14 families, can be adopted to establish a baseline for development of a network-orientated database to assess temporal and spatial changes of moths in temperate and tropical forests.

New information

This is the first long-term sampling-event dataset on macromoth assemblages in changing vegetation from evergreen deciduous to boreal tree zones, conducted in Mt. Hallasan National Park, the national park at the highest elevation and located on the largest volcanic island in South Korea. The aim of this study was to provide a description and a link to published data in the format of a peer-reviewed journal and to provide recognition of the effort in a scholarly article (based on data paper definition published at https://www.gbif.org/en/data-papers).",2020-04-07 +34376650,"The molecular basis, genetic control and pleiotropic effects of local gene co-expression.","Nearby genes are often expressed as a group. Yet, the prevalence, molecular mechanisms and genetic control of local gene co-expression are far from being understood. Here, by leveraging gene expression measurements across 49 human tissues and hundreds of individuals, we find that local gene co-expression occurs in 13% to 53% of genes per tissue. By integrating various molecular assays (e.g. ChIP-seq and Hi-C), we estimate the ability of several mechanisms, such as enhancer-gene interactions, in distinguishing gene pairs that are co-expressed from those that are not. Notably, we identify 32,636 expression quantitative trait loci (eQTLs) which associate with co-expressed gene pairs and often overlap enhancer regions. Due to affecting several genes, these eQTLs are more often associated with multiple human traits than other eQTLs. Our study paves the way to comprehend trait pleiotropy and functional interpretation of QTL and GWAS findings. All local gene co-expression identified here is available through a public database ( https://glcoex.unil.ch/ ).",2021-08-10 +30217829,Observed Antibody Space: A Resource for Data Mining Next-Generation Sequencing of Antibody Repertoires.,"Abs are immune system proteins that recognize noxious molecules for elimination. Their sequence diversity and binding versatility have made Abs the primary class of biopharmaceuticals. Recently, it has become possible to query their immense natural diversity using next-generation sequencing of Ig gene repertoires (Ig-seq). However, Ig-seq outputs are currently fragmented across repositories and tend to be presented as raw nucleotide reads, which means nontrivial effort is required to reuse the data for analysis. To address this issue, we have collected Ig-seq outputs from 55 studies, covering more than half a billion Ab sequences across diverse immune states, organisms (primarily human and mouse), and individuals. We have sorted, cleaned, annotated, translated, and numbered these sequences and make the data available via our Observed Antibody Space (OAS) resource at http://antibodymap.org The data within OAS will be regularly updated with newly released Ig-seq datasets. We believe OAS will facilitate data mining of immune repertoires for improved understanding of the immune system and development of better biotherapeutics.",2018-09-14 +34562305,PANNZER-A practical tool for protein function prediction.,"The facility of next-generation sequencing has led to an explosion of gene catalogs for novel genomes, transcriptomes and metagenomes, which are functionally uncharacterized. Computational inference has emerged as a necessary substitute for first-hand experimental evidence. PANNZER (Protein ANNotation with Z-scoRE) is a high-throughput functional annotation web server that stands out among similar publically accessible web servers in supporting submission of up to 100,000 protein sequences at once and providing both Gene Ontology (GO) annotations and free text description predictions. Here, we demonstrate the use of PANNZER and discuss future plans and challenges. We present two case studies to illustrate problems related to data quality and method evaluation. Some commonly used evaluation metrics and evaluation datasets promote methods that favor unspecific and broad functional classes over more informative and specific classes. We argue that this can bias the development of automated function prediction methods. The PANNZER web server and source code are available at http://ekhidna2.biocenter.helsinki.fi/sanspanz/.",2021-10-14 +29186335,The OncoPPi Portal: an integrative resource to explore and prioritize protein-protein interactions for cancer target discovery.,"Motivation:As cancer genomics initiatives move toward comprehensive identification of genetic alterations in cancer, attention is now turning to understanding how interactions among these genes lead to the acquisition of tumor hallmarks. Emerging pharmacological and clinical data suggest a highly promising role of cancer-specific protein-protein interactions (PPIs) as druggable cancer targets. However, large-scale experimental identification of cancer-related PPIs remains challenging, and currently available resources to explore oncogenic PPI networks are limited. Results:Recently, we have developed a PPI high-throughput screening platform to detect PPIs between cancer-associated proteins in the context of cancer cells. Here, we present the OncoPPi Portal, an interactive web resource that allows investigators to access, manipulate and interpret a high-quality cancer-focused network of PPIs experimentally detected in cancer cell lines. To facilitate prioritization of PPIs for further biological studies, this resource combines network connectivity analysis, mutual exclusivity analysis of genomic alterations, cellular co-localization of interacting proteins and domain-domain interactions. Estimates of PPI essentiality allow users to evaluate the functional impact of PPI disruption on cancer cell proliferation. Furthermore, connecting the OncoPPi network with the approved drugs and compounds in clinical trials enables discovery of new tumor dependencies to inform strategies to interrogate undruggable targets like tumor suppressors. The OncoPPi Portal serves as a resource for the cancer research community to facilitate discovery of cancer targets and therapeutic development. Availability and implementation:The OncoPPi Portal is available at http://oncoppi.emory.edu. Contact:andrey.ivanov@emory.edu or hfu@emory.edu.",2018-04-01 +34800808,Performance optimization of salp swarm algorithm for multi-threshold image segmentation: Comprehensive study of breast cancer microscopy.,"Multi-threshold image segmentation (MIS) is now a well known image segmentation technique, and many researchers have applied intelligent algorithms to it, but these methods suffer from local optimal drawbacks. This paper presented a novel approach to improve the Salp Swarm Algorithm (SSA), namely EHSSA, and applied it to MIS. Knowing the inaccuracies and discussions on implementation of this method, a new efficient mechanism is proposed to improve global search capability of the algorithm and avoid falling into a local optimum. Moreover, the excellence of the proposed algorithm was proved by comparative experiments at IEEE CEC2014. Afterward, the performance of EHSSA was demonstrated by testing a set of images selected from the Berkeley segmentation data set 500 (BSDS500), and the experimental results were analyzed by evaluating the parameters, which proved the efficiency of the proposed algorithm in MIS. Furthermore, EHSSA was applied to the microscopic image segmentation of breast cancer. Medical image segmentation is the study of how to quickly extract objects of interest (human organs) from various images to perform qualitative and quantitative analysis of diseased tissues and improve the accuracy of their diagnosis, which assists the physician in making more informed decisions and patient rehabilitation. The results of this set of experiments also proved its superior performance. For any info about this paper, readers can refer to https://aliasgharheidari.com.",2021-11-06 +34643666,Multi-instance learning of graph neural networks for aqueous pKa prediction. ,"The acid dissociation constant (pKa) is a critical parameter to reflect the ionization ability of chemical compounds and is widely applied in a variety of industries. However, the experimental determination of pKa is intricate and time-consuming, especially for the exact determination of micro pKa information at the atomic level. Hence, a fast and accurate prediction of pKa values of chemical compounds is of broad interest. Here, we compiled a large scale pKa dataset containing 16595 compounds with 17489 pKa values. Based on this dataset, a novel pK a prediction model, named Graph-pKa, was established using graph neural networks. Graph-pKa performed well on the prediction of macro pK a values, with a mean absolute error around 0.55 and a coefficient of determination around 0.92 on the test dataset. Furthermore, combining multi-instance learning, Graph-pKa was also able to automatically deconvolute the predicted macro pKa into discrete micro pK a values. The Graph-pK a model is now freely accessible via a web-based interface (https://pka.simm.ac.cn/). Supplementary data are available at Bioinformatics online.",2021-10-13 +33022192,"Young Adults With Developmental Language Disorder: A Systematic Review of Education, Employment, and Independent Living Outcomes.","Purpose Research on developmental language disorder (DLD) in adulthood has increased rapidly in recent years. However, to date, there has been no systematic literature review on this topic, thereby limiting the possibility to have a comprehensive overview of publications in this field. Method Following Preferred Reporting Items for Systematic Review and Meta-Analyses (PRISMA) guidelines, we conducted a systematic literature review. A literature search was undertaken in four databases, from 2005 to 2018. We selected articles with original data related to life outcomes of young adults with and without DLD, all aged between 18 and 34 years, in three life areas: education, employment, and independent living. Methodological characteristics of the studies were analyzed. Results Fifteen articles were selected with longitudinal designs. In every life area, young adults with DLD were compared to their typically developing peers to identify their strengths and weaknesses. The predictive role of language abilities was also examined. Conclusions Outcomes within each life area are heterogeneous. Nevertheless, similarly to young children and adolescents, young adults with DLD face numerous challenges. Although language abilities partly predict some of these outcomes, much of the variance remains unaccounted for and some outcomes are unrelated to this predictor. This systematic literature review has implications for researchers and practitioners to identify promising avenues for research, interventions, and policy development. Supplemental Material https://doi.org/10.23641/asha.13022552.",2020-10-06 +32179154,Prediction calculator for nonroutine discharge and length of stay after spine surgery.,"

Background context

Following spine surgery, delays in referral to rehabilitation facilities leads to increased length of hospital stay (LOS), increases costs, more risk of hospital acquired complications, and decreased patient satisfaction.

Purpose

We sought to create a prediction calculator to determine the expected LOS after spine surgery and identify patients most likely to need postoperative nonhome discharge. The goal would be to facilitate earlier referral to rehabilitation and thereby ultimately shorten LOS, reduce costs, and improve patient satisfaction.

Study design

Retrospective.

Patient sample

We retrospectively reviewed all adult patients who underwent spine surgery for all indications between January and June 2018.

Outcome measures

Length of stay and discharge disposition.

Methods

Demographic variables, insurance status, baseline comorbidities, narcotic use, operative characteristics, as well as postoperative length of stay and discharge disposition data were collected. Univariable and multivariable analyses were performed to identify independent predictors of LOS and discharge disposition.

Results

Two hundred fifty-seven patients were included. Mean age was 59 years, 46% were females, and 52% had private insurance vs 7% with Medicaid and 41% with Medicare. The most commonly performed procedure was lumbar fusion (31.9%). Mean LOS after surgery was 4.8 days and 18% had prolonged LOS >7 days. Age, insurance type, marriage status, and surgical procedure were significantly associated with LOS and discharge disposition. The final model had an area under the curve of 89% with good discrimination. A web based calculator was developed: https://jhuspine1.shinyapps.io/RehabLOS/ CONCLUSIONS: This study established a novel pilot calculator to identify those patients most likely to be discharged to rehabilitation facilities and to predict LOS after spine surgery. Our calculator had a high predictive accuracy of 89% compared to others in the literature. With validation this tool may ultimately facilitate streamlining of the postoperative period to shorten LOS, optimize resource utilization, and improve patient care.",2020-03-13 +34309170,Ileorectal anastomosis in patients with Crohn's disease. Postoperative complications and functional outcome-a systematic review.,"

Aim

The objective of this systematic review was to investigate the outcomes of ileorectal anastomosis (IRA) in Crohn's disease and to clarify whether there are any time-related trends in outcome measures. The primary outcomes are risk of anastomotic leakage, death, clinical recurrence and subsequent diverting or permanent stoma and/or proctectomy. Secondary end-points are quality of life and functional outcome.

Methods

Systematic searches were conducted using the Cochrane Library, Embase and MEDLINE. The complete search strategy is uploaded online at http://www.crd.york.ac.uk/prospero/. Human studies in English with over five subjects were included and no limit was set regarding the date of publication. All relevant studies were screened by two reviewers. The web-based software platform www.covidence.org was used for primary screening of the title, abstract, full-text review and data extraction.

Results

The search identified 2231 unique articles. After the screening process, 37 remained. Key results were an overall anastomotic leak rate of 6.4%; cumulative rates of clinical recurrence of 43% and 67% at 5 and 10 years, respectively; an overall rate of proctectomy of 18.9%; and subsequent ileostomy required in 18.8%. Only one study presented useful data on quality of life. Recurrence rates remained stable over time. A small decline in the anastomotic leak rate was found.

Conclusions

Only minor improvements in the outcomes of IRA in patients with Crohn´s disease have occurred during the past 50 years regarding anastomotic leakage and recurrence, except for a slight increase in the rate of a functioning IRA. These results call for implementation guidelines in patient selection for IRA and postoperative medical treatment and follow-up.",2021-08-10 +33076954,Predicted functional interactome of Caenorhabditis elegans and a web tool for the functional interpretation of differentially expressed genes.,"

Background

The nematode worm, Caenorhabditis elegans, is a saprophytic species that has been emerging as a standard model organism since the early 1960s. This species is useful in numerous fields, including developmental biology, neurobiology, and ageing. A high-quality comprehensive molecular interaction network is needed to facilitate molecular mechanism studies in C. elegans.

Results

We present the predicted functional interactome of Caenorhabditis elegans (FIC), which integrates functional association data from 10 public databases to infer functional gene interactions on diverse functional perspectives. In this work, FIC includes 108,550 putative functional associations with balanced sensitivity and specificity, which are expected to cover 21.42% of all C. elegans protein interactions, and 29.25% of these associations may represent protein interactions. Based on FIC, we developed a gene set linkage analysis (GSLA) web tool to interpret potential functional impacts from a set of differentially expressed genes observed in transcriptome analyses.

Conclusion

We present the predicted C. elegans interactome database FIC, which is a high-quality database of predicted functional interactions among genes. The functional interactions in FIC serve as a good reference interactome for GSLA to annotate differentially expressed genes for their potential functional impacts. In a case study, the FIC/GSLA system shows more comprehensive and concise annotations compared to other widely used gene set annotation tools, including PANTHER and DAVID. FIC and its associated GSLA are available at the website http://worm.biomedtzc.cn .",2020-10-19 +34126702,Machine Learning Model for Predicting Postoperative Survival of Patients with Colorectal Cancer.,"

Purpose

Machine learning (ML) is a strong candidate for making accurate predictions, as we can use large amount of data with powerful computational algorithms. We developed a ML based model to predict survival of patients with colorectal cancer (CRC) using data from two independent datasets.

Materials and methods

A total of 364,316 and 1,572 CRC patients were included from the Surveillance, Epidemiology, and End Results (SEER) and a Korean dataset, respectively. As SEER combines data from 18 cancer registries, internal validation was done using 18-Fold-Cross-Validation then external validation was performed by testing the trained model on the Korean dataset. Performance was evaluated using area under the receiver operating characteristic curve (AUROC), sensitivity and positive predictive values.

Results

Clinicopathological characteristics were significantly different between the two datasets and the SEER showed a significant lower 5-year survival rate compared to the Korean dataset (60.1% vs. 75.3%, p < 0.001). The ML-based model using the Light gradient boosting algorithm achieved a better performance in predicting 5-year-survival compared to American Joint Committee on Cancer stage (AUROC, 0.804 vs. 0.736; p < 0.001). The most important features which influenced model performance were age, number of examined lymph nodes, and tumor size. Sensitivity and positive predictive values of predicting 5-year-survival for classes including dead or alive were reported as 68.14%, 77.51% and 49.88%, 88.1% respectively in the validation set. Survival probability can be checked using the web-based survival predictor (http://colorectalcancer.pythonanywhere.com).

Conclusion

ML-based model achieved a much better performance compared to staging in individualized estimation of survival of patients with CRC.",2021-06-15 +27905880,The Lair: a resource for exploratory analysis of published RNA-Seq data.,"Increased emphasis on reproducibility of published research in the last few years has led to the large-scale archiving of sequencing data. While this data can, in theory, be used to reproduce results in papers, it is difficult to use in practice. We introduce a series of tools for processing and analyzing RNA-Seq data in the Sequence Read Archive, that together have allowed us to build an easily extendable resource for analysis of data underlying published papers. Our system makes the exploration of data easily accessible and usable without technical expertise. Our database and associated tools can be accessed at The Lair: http://pachterlab.github.io/lair .",2016-12-01 +30397019,iProteinDB: An Integrative Database of Drosophila Post-translational Modifications.,"Post-translational modification (PTM) serves as a regulatory mechanism for protein function, influencing their stability, interactions, activity and localization, and is critical in many signaling pathways. The best characterized PTM is phosphorylation, whereby a phosphate is added to an acceptor residue, most commonly serine, threonine and tyrosine in metazoans. As proteins are often phosphorylated at multiple sites, identifying those sites that are important for function is a challenging problem. Considering that any given phosphorylation site might be non-functional, prioritizing evolutionarily conserved phosphosites provides a general strategy to identify the putative functional sites. To facilitate the identification of conserved phosphosites, we generated a large-scale phosphoproteomics dataset from Drosophila embryos collected from six closely-related species. We built iProteinDB (https://www.flyrnai.org/tools/iproteindb/), a resource integrating these data with other high-throughput PTM datasets, including vertebrates, and manually curated information for Drosophila At iProteinDB, scientists can view the PTM landscape for any Drosophila protein and identify predicted functional phosphosites based on a comparative analysis of data from closely-related Drosophila species. Further, iProteinDB enables comparison of PTM data from Drosophila to that of orthologous proteins from other model organisms, including human, mouse, rat, Xenopus tropicalis, Danio rerio, and Caenorhabditis elegans.",2019-01-09 +32701164,WellExplorer: an integrative resource linking hydraulic fracturing chemicals with hormonal pathways and geographic location. ,"Exposure to hydraulic fracturing fluid in drinking water increases the risk of many adverse health outcomes. Unfortunately, most individuals and researchers are unaware of the health risks posed by a particular well due to the diversity of chemical ingredients used across sites. We constructed WellExplorer (http://WellExplorer.org), an interactive tool for researchers and community members to use for retrieving information regarding the hormonal, testosterone and estrogen modulators located at each well. We found that wells in Alabama use a disproportionately high number of ingredients targeting estrogen pathways, while Illinois, Ohio and Pennsylvania use a disproportionately high number of ingredients targeting testosterone pathways. Researchers can utilize WellExplorer to study health outcomes related to exposure to fracturing chemicals in their population-based cohorts. Community members can use this resource to search their home or work locations (e.g. town or zip code) to determine proximity between where they live or work and specific hormonal exposures.",2020-01-01 +32227202,annonex2embl: automatic preparation of annotated DNA sequences for bulk submissions to ENA.,"

Motivation

The submission of annotated sequence data to public sequence databases constitutes a central pillar in biological research. The surge of novel DNA sequences awaiting database submission due to the application of next-generation sequencing has increased the need for software tools that facilitate bulk submissions. This need has yet to be met with the concurrent development of tools to automate the preparatory work preceding such submissions.

Results

The author introduce annonex2embl, a Python package that automates the preparation of complete sequence flatfiles for large-scale sequence submissions to the European Nucleotide Archive. The tool enables the conversion of DNA sequence alignments that are co-supplied with sequence annotations and metadata to submission-ready flatfiles. Among other features, the software automatically accounts for length differences among the input sequences while maintaining correct annotations, automatically interlaces metadata to each record and displays a design suitable for easy integration into bioinformatic workflows. As proof of its utility, annonex2embl is employed in preparing a dataset of more than 1500 fungal DNA sequences for database submission.

Availability and implementation

annonex2embl is freely available via the Python package index at http://pypi.python.org/pypi/annonex2embl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +34822018,Establishment and validation of a nomogram and web calculator for the risk of new vertebral compression fractures and cement leakage after percutaneous vertebroplasty in patients with osteoporotic vertebral compression fractures.,"

Purpose

The aim of this work was to investigate the risk factors for cement leakage and new-onset OVCF after Percutaneous vertebroplasty (PVP) and to develop and validate a clinical prediction model (Nomogram).

Methods

Patients with Osteoporotic VCF (OVCF) treated with PVP at Liuzhou People's Hospital from June 2016 to June 2018 were reviewed and met the inclusion criteria. Relevant data affecting bone cement leakage and new onset of OVCF were collected. Predictors were screened using univariate and multi-factor logistic analysis to construct Nomogram and web calculators. The consistency of the prediction models was assessed using calibration plots, and their predictive power was assessed by tenfold cross-validation. Clinical value was assessed using Decision curve analysis (DCA) and clinical impact plots.

Results

Higher BMI was associated with lower bone mineral density (BMD). Higher BMI, lower BMD, multiple vertebral fractures, no previous anti-osteoporosis treatment, and steroid use were independent risk factors for new vertebral fractures. Cement injection volume, time to surgery, and multiple vertebral fractures were risk factors for cement leakage after PVP. The development and validation of the Nomogram also demonstrated the predictive ability and clinical value of the model.

Conclusions

The established Nomogram and web calculator (https://dr-lee.shinyapps.io/RefractureApp/) (https://dr-lee.shinyapps.io/LeakageApp/) can effectively predict the occurrence of cement leakage and new OVCF after PVP.",2021-11-25 +31813311,New discoveries for an old drug: a review of recent olanzapine research.,"Objective: Based on a substantial literature, olanzapine appears to be one of the most efficacious antipsychotics marketed in the United States, with only clozapine clearly more advantageous. However, olanzapine is marred by an equally substantial literature demonstrating a metabolic burden of olanzapine, particularly for weight gain. With the publication of successful strategies to limit olanzapine induced weight gain, a reassessment of the clinical utility of olanzapine appears warranted. The purpose of this paper is to review recent evidence for olanzapine, highlighting use in both schizophrenia and other conditions, safety and supporting the use of olanzapine above 20 mg/day, focusing on studies published since our previous reviews in 2008 and 2009.Data Sources: The US National Library of Medicine's PubMed resource (https://www.ncbi.nlm.nih.gov/pubmed/) was searched using the text word 'olanzapine' for all English-language articles published between 2008 to July 2019, inclusive with a specific focus on double-blind randomized controlled trials and meta-analyses. In addition, we examined the review articles for other reports of interest that may have been missed by our initial search.Data Extraction: The studies were evaluated based on efficacy and safety data.Results: Use of olanzapine may be decreasing but remains common overall. Evidence continues to support both the relative efficacy advantage and weight gain/metabolic disadvantages of olanzapine in schizophrenia, and recent research supports olanzapine's use in treating anorexia nervosa and chemotherapy-induced nausea. The evidence for high dose olanzapine dosages >20 mg remains limited. Non-pharmacological options, such as dietary counseling and exercise, appear to be efficacious in addressing antipsychotic-induced weight gain. Topiramate, metformin and possibly the olanzapine-samidorphan combination also appear helpful.Conclusions: Olanzapine remains a useful antipsychotic, but requires with careful monitoring. Further research is needed to compare the different options available to mitigate olanzapine-induced weight gain and to evaluate potential synergism between pharmacological and non-pharmacological treatments.",2020-01-03 +30874591,PAmiRDB: A web resource for plant miRNAs targeting viruses.,"MicroRNAs (miRNAs) have emerged to be essential constituents of host antiviral-defense mechanisms. The miRNA mediated antiviral mechanism was first experimentally established in animals, which proved that host miRNAs regulate viral gene expression by targeting the animal virus mRNAs. There are comparatively fewer reports about such interactions in plants, however, artificial miRNA studies prove that miRNAs play similar antiviral role in plants too. To explore the extent of this phenomenon in plant genomes, and in the absence of any publicly available resource for prediction of plant miRNAs targeting viruses, we were motivated to predict such interactions of plant miRNAs and viral genes. The intriguing results of the predictions are compiled as a database, which we have named as PAmiRDB. The current version of PAmiRDB includes more than 2600 plant miRNAs and their specific interactions with corresponding targets in approximately 500 viral species (predominantly from the major plant-infecting virus families of geminiviruses and potyviruses). PAmiRDB is a database of known plant miRNAs and their predicted targets in virus genomes. The innovative database query-interface enables global and comprehensive investigation of such predicted interactions between host miRNAs and viral genes. The database integrated-tools also helps researchers to design experiments to confirm such interactions. PAmiRDB is available at http://bioinfo.icgeb.res.in/pamirdb.",2019-03-15 +32510556,MetaFS: Performance assessment of biomarker discovery in metaproteomics. ,"Metaproteomics suffers from the issues of dimensionality and sparsity. Data reduction methods can maximally identify the relevant subset of significant differential features and reduce data redundancy. Feature selection (FS) methods were applied to obtain the significant differential subset. So far, a variety of feature selection methods have been developed for metaproteomic study. However, due to FS's performance depended heavily on the data characteristics of a given research, the well-suitable feature selection method must be carefully selected to obtain the reproducible differential proteins. Moreover, it is critical to evaluate the performance of each FS method according to comprehensive criteria, because the single criterion is not sufficient to reflect the overall performance of the FS method. Therefore, we developed an online tool named MetaFS, which provided 13 types of FS methods and conducted the comprehensive evaluation on the complex FS methods using four widely accepted and independent criteria. Furthermore, the function and reliability of MetaFS were systematically tested and validated via two case studies. In sum, MetaFS could be a distinguished tool for discovering the overall well-performed FS method for selecting the potential biomarkers in microbiome studies. The online tool is freely available at https://idrblab.org/metafs/.",2021-05-01 +34380590,The impact of liver disease on mortality in cystic fibrosis-A systematic review.,"

Background

There is conflicting evidence on the impact of liver disease (CFLD) on life expectancy in CF. Therefore the aim of this systematic review was to evaluate the impact of liver disease (CFLD) on mortality in CF.

Methods

The protocol was published at (https://hrbopenresearch.org/articles/3-44/v3) using PRISPMA-P guidelines and registered in Prospero 2020 (CRD42020182885). Three databases were searched for publications (1938-2020) where the outcome was all-cause mortality (defined as death and transplantation) or CF-specific mortality in participants with CFLD. Studies with and without a comparator group were included. Studies were divided into 2 groups based on the definition of CFLD: Group 1 used 2 categories of liver disease (i) liver disease with portal hypertension (PH) (ii) non-specific abnormalities which did not meet the criteria for PH, Group 2 studies only included participants with PH.

Results

All 14 eligible studies were observational, with a moderate-high risk of bias, Six of the 14 studies directly compared mortality between those with CFLD and those with no liver disease, and 5/6 demonstrated that those with CFLD had at least 3 time the risk of death compared to those with no liver disease. Pulmonary complications were the primary cause of death.

Conclusion

This SR demonstrates that liver disease shortens life expectancy in CF, and that pulmonary complications are the primary cause of death in those with CFLD. There has been no improvement in survival for persons with CFLD despite significant improvements in life expectancy for persons with CF who have no evidence of liver disease.",2021-08-08 +33494695,GECO: gene expression clustering optimization app for non-linear data visualization of patterns.,"

Background

Due to continued advances in sequencing technology, the limitation in understanding biological systems through an ""-omics"" lens is no longer the generation of data, but the ability to analyze it. Importantly, much of this rich -omics data is publicly available waiting to be further investigated. Although many code-based pipelines exist, there is a lack of user-friendly and accessible applications that enable rapid analysis or visualization of data.

Results

GECO (Gene Expression Clustering Optimization; http://www.theGECOapp.com ) is a minimalistic GUI app that utilizes non-linear reduction techniques to rapidly visualize expression trends in many types of biological data matrices (such as bulk RNA-seq or proteomics). The required input is a data matrix with samples and any type of expression level of genes/protein/other with a unique ID. The output is an interactive t-SNE or UMAP analysis that clusters genes (or proteins/other unique IDs) based on their expression patterns across the multiple samples enabling visualization of expression trends. Customizable settings for dimensionality reduction, data normalization, along with visualization parameters including coloring and filters, ensure adaptability to a variety of user uploaded data.

Conclusion

This local and cloud-hosted web browser app enables investigation of any -omic data matrix in a rapid and code-independent manner. With the continued growth of available -omic data, the ability to quickly evaluate a dataset, including specific genes of interest, is more important than ever. GECO is intended to supplement traditional statistical analysis methods and is particularly useful when visualizing clusters of genes with similar trajectories across many samples (ex: multiple cell types, time course, dose response). Users will be empowered to investigate -omic data with a new lens of visualization and analysis that has the potential to uncover genes of interest, cohorts of co-regulated genes programs, and previously undetected patterns of expression.",2021-01-25 +33956141,CeLaVi: an interactive cell lineage visualization tool.,"Recent innovations in genetics and imaging are providing the means to reconstruct cell lineages, either by tracking cell divisions using live microscopy, or by deducing the history of cells using molecular recorders. A cell lineage on its own, however, is simply a description of cell divisions as branching events. A major goal of current research is to integrate this description of cell relationships with information about the spatial distribution and identities of the cells those divisions produce. Visualizing, interpreting and exploring these complex data in an intuitive manner requires the development of new tools. Here we present CeLaVi, a web-based visualization tool that allows users to navigate and interact with a representation of cell lineages, whilst simultaneously visualizing the spatial distribution, identities and properties of cells. CeLaVi's principal functions include the ability to explore and manipulate the cell lineage tree; to visualise the spatial distribution of cell clones at different depths of the tree; to colour cells in the 3D viewer based on lineage relationships; to visualise various cell qualities on the 3D viewer (e.g. gene expression, cell type) and to annotate selected cells/clones. All these capabilities are demonstrated with four different example data sets. CeLaVi is available at http://www.celavi.pro.",2021-07-01 +31714620,PEATmoss (Physcomitrella Expression Atlas Tool): a unified gene expression atlas for the model plant Physcomitrella patens.,"Physcomitrella patens is a bryophyte model plant that is often used to study plant evolution and development. Its resources are of great importance for comparative genomics and evo-devo approaches. However, expression data from Physcomitrella patens were so far generated using different gene annotation versions and three different platforms: CombiMatrix and NimbleGen expression microarrays and RNA sequencing. The currently available P. patens expression data are distributed across three tools with different visualization methods to access the data. Here, we introduce an interactive expression atlas, Physcomitrella Expression Atlas Tool (PEATmoss), that unifies publicly available expression data for P. patens and provides multiple visualization methods to query the data in a single web-based tool. Moreover, PEATmoss includes 35 expression experiments not previously available in any other expression atlas. To facilitate gene expression queries across different gene annotation versions, and to access P. patens annotations and related resources, a lookup database and web tool linked to PEATmoss was implemented. PEATmoss can be accessed at https://peatmoss.online.uni-marburg.de.",2020-01-11 +34735308,"Assessment of sports nutrition knowledge, dietary intake, and nutrition information source in female collegiate athletes: A descriptive feasibility study.","Objective: This descriptive feasibility study aimed to assess dietary intake, sports nutrition knowledge, and nutrition information source in collegiate athletes. Participants: Fourteen indoor volleyball female collegiate athletes from a National Collegiate Athletic Association Division I university. Methods: Participants completed a Nutrition for Sports Knowledge Questionnaire (NSKQ) once and dietary and body composition assessments over four time points. Results: Pre-season mean energy and carbohydrate intake were lower than the American College of Sports Medicine Recommendations (25 ± 6.4 vs 37-41 kcal/kg BW/day and 3 ± 0.9 vs 6-10 g/kg BW/day; respectively). Off-season carbohydrate intake followed similar trends. The average score on the NSKQ was 45 ± 9.6% out of 100. Athletic trainers were identified as a top nutrition source followed by strength and conditioning coaches and nutritionists. Conclusion: Female volleyball athletes have inadequate dietary intake and sports nutrition knowledge and may benefit from nutrition education and counseling by trained sports nutrition experts.Supplemental data for this article can be accessed online at https://doi.org/10.1080/07448481.2021.1987919.",2021-11-04 +34048582,LipidSig: a web-based tool for lipidomic data analysis.,"With the continuing rise of lipidomic studies, there is an urgent need for a useful and comprehensive tool to facilitate lipidomic data analysis. The most important features making lipids different from general metabolites are their various characteristics, including their lipid classes, double bonds, chain lengths, etc. Based on these characteristics, lipid species can be classified into different categories and, more interestingly, exert specific biological functions in a group. In an effort to simplify lipidomic analysis workflows and enhance the exploration of lipid characteristics, we have developed a highly flexible and user-friendly web server called LipidSig. It consists of five sections, namely, Profiling, Differential Expression, Correlation, Network and Machine Learning, and evaluates lipid effects on cellular or disease phenotypes. One of the specialties of LipidSig is the conversion between lipid species and characteristics according to a user-defined characteristics table. This function allows for efficient data mining for both individual lipids and subgroups of characteristics. To expand the server's practical utility, we also provide analyses focusing on fatty acid properties and multiple characteristics. In summary, LipidSig is expected to help users identify significant lipid-related features and to advance the field of lipid biology. The LipidSig webserver is freely available at http://chenglab.cmu.edu.tw/lipidsig.",2021-07-01 +,First Report of Colletotrichum tropicale Causing Anthracnose on Pomegranate in Brazil,"Anthracnose is a common disease that affects yield and quality of pomegranate (Punica granatum L.) fruit in different parts of world including Brazil. Colletotrichum gloeosporioides Penz. was considered to be the causal agent of pomegranate anthracnose because the identification of this species only relied on morphological characteristics and analysis of the ribosomal internal transcribed spacer region (ITS) (Rahimlou et al. 2014). Recently, multigene phylogenetic analyses revealed that C. gloeosporioides is a species complex (Weir et al. 2012). With an ApMat-based phylogeny, C. theobromicola Delacr. and an unresolved taxon were isolated from anthracnose lesions on pomegranate fruits (Sharma et al. 2015). Fruits of pomegranate showing dark, circular, and necrotic lesions were collected in the states of Alagoas and Bahia, in the northeastern region of Brazil, between August 2012 and January 2013. Small pieces of tissue taken from the lesions were surface disinfested in 70% ethanol for 30 s and in 1% NaClO for 1 min, rinsed in sterile water, plated on potato dextrose agar (PDA; Kasvi), and incubated at 25°C for 7 days. After 7 days on PDA at 25°C, colonies of the isolates varied between white and gray with a greenish reverse, and with a mean growth rate of 7.6 mm/day. Conidia were hyaline, cylindrical with rounded ends, and measured 14.3 (12.30 to 20.01) × 4.8 (3.36 to 6.11) μm. Appressoria were brown, clavate, and globose and measured 8.4 (5.28 to 12.12) × 6.3 (4.8 to 9.1) µm, which matched with those described for C. tropicale. Three single-spore isolates were obtained and stored in the Culture Collection of Phytopathogenic Fungi at the Universidade Federal de Alagoas. Genomic DNA was used as template for polymerase chain reaction amplifications of partial sequences of the glyceraldehyde-3-phosphate dehydrogenase (GAPDH) and β-tubulin (TUB2) genes and the ITS region (Weir et al. 2012). In the GenBank database, sequences of GAPDH (97 to 100%) and TUB2 (100%) were similar to those of C. tropicale E.I. Rojas, S.A. Rehner & Samuels, whereas those of ITS were 99 to 100% similar with C. siamense, C. tropicale, and C. gloeosporioides, all members of the C. gloeosporioides complex. Phylogenetic Bayesian inference analysis based in a combined data set (GAPDH, TUB2, and ITS) confirmed our isolates as C. tropicale. Partial sequences were deposited in GenBank (KY769889, KY769899, KY769905, KY769890, KY769891, MG647010, and MG647012), and the alignment and tree were deposited into TreeBASE (https://www.treebase.org/; accession no. 22650). To confirm the pathogenicity, 5-mm mycelial discs from a 7-day-old PDA culture were placed on healthy pomegranate fruits superficially wounded with a sterilized needle. In control fruits, only PDA discs were used. Fruits were kept in a humid chamber at 25°C. After 7 days, circular necrotic lesions were observed on inoculated fruits. To fulfill Koch’s postulates, C. tropicale was successfully reisolated from inoculated fruits. The control fruits showed no symptoms. Although C. tropicale was previously isolated from fruits of carnauba palm and mango in Brazil (Araújo et al. 2018; Lima et al. 2013), this is its first report on pomegranate fruits in the world.",2019-03-01 +29440217,Using big data to improve cardiovascular care and outcomes in China: a protocol for the CHinese Electronic health Records Research in Yinzhou (CHERRY) Study.,"INTRODUCTION:Data based on electronic health records (EHRs) are rich with individual-level longitudinal measurement information and are becoming an increasingly common data source for clinical risk prediction worldwide. However, few EHR-based cohort studies are available in China. Harnessing EHRs for research requires a full understanding of data linkages, management, and data quality in large data sets, which presents unique analytical opportunities and challenges. The purpose of this study is to provide a framework to establish a uniquely integrated EHR database in China for scientific research. METHODS AND ANALYSIS:The CHinese Electronic health Records Research in Yinzhou (CHERRY) Study will extract individual participant data within the regional health information system of an eastern coastal area of China to establish a longitudinal population-based ambispective cohort study for cardiovascular care and outcomes research. A total of 1 053 565 Chinese adults aged over 18 years were registered in the health information system in 2009, and there were 23 394 deaths from 1 January 2009 to 31 December 2015. The study will include information from multiple epidemiological surveys; EHRs for chronic disease management; and health administrative, clinical, laboratory, drug and electronic medical record (EMR) databases. Follow-up of fatal and non-fatal clinical events is achieved through records linkage to the regional system of disease surveillance, chronic disease management and EMRs (based on diagnostic codes from the International Classification of Diseases, tenth revision). The CHERRY Study will provide a unique platform and serve as a valuable big data resource for cardiovascular risk prediction and population management, for primary and secondary prevention of cardiovascular events in China. ETHICS AND DISSEMINATION:The CHERRY Study was approved by the Peking University Institutional Review Board (IRB00001052-16011) in April 2016. Results of the study will be disseminated through published journal articles, conferences and seminar presentations, and on the study website (http://www.cherry-study.org).",2018-02-12 +31696236,Mouse Phenome Database: a data repository and analysis suite for curated primary mouse phenotype data.,"The Mouse Phenome Database (MPD; https://phenome.jax.org) is a widely accessed and highly functional data repository housing primary phenotype data for the laboratory mouse accessible via APIs and providing tools to analyze and visualize those data. Data come from investigators around the world and represent a broad scope of phenotyping endpoints and disease-related traits in naïve mice and those exposed to drugs, environmental agents or other treatments. MPD houses rigorously curated per-animal data with detailed protocols. Public ontologies and controlled vocabularies are used for annotation. In addition to phenotype tools, genetic analysis tools enable users to integrate and interpret genome-phenome relations across the database. Strain types and populations include inbred, recombinant inbred, F1 hybrid, transgenic, targeted mutants, chromosome substitution, Collaborative Cross, Diversity Outbred and other mapping populations. Our new analysis tools allow users to apply selected data in an integrated fashion to address problems in trait associations, reproducibility, polygenic syndrome model selection and multi-trait modeling. As we refine these tools and approaches, we will continue to provide users a means to identify consistent, quality studies that have high translational relevance.",2020-01-01 +33691615,AgroSeek: a system for computational analysis of environmental metagenomic data and associated metadata.,"

Background

Metagenomics is gaining attention as a powerful tool for identifying how agricultural management practices influence human and animal health, especially in terms of potential to contribute to the spread of antibiotic resistance. However, the ability to compare the distribution and prevalence of antibiotic resistance genes (ARGs) across multiple studies and environments is currently impossible without a complete re-analysis of published datasets. This challenge must be addressed for metagenomics to realize its potential for helping guide effective policy and practice measures relevant to agricultural ecosystems, for example, identifying critical control points for mitigating the spread of antibiotic resistance.

Results

Here we introduce AgroSeek, a centralized web-based system that provides computational tools for analysis and comparison of metagenomic data sets tailored specifically to researchers and other users in the agricultural sector interested in tracking and mitigating the spread of ARGs. AgroSeek draws from rich, user-provided metagenomic data and metadata to facilitate analysis, comparison, and prediction in a user-friendly fashion. Further, AgroSeek draws from publicly-contributed data sets to provide a point of comparison and context for data analysis. To incorporate metadata into our analysis and comparison procedures, we provide flexible metadata templates, including user-customized metadata attributes to facilitate data sharing, while maintaining the metadata in a comparable fashion for the broader user community and to support large-scale comparative and predictive analysis.

Conclusion

AgroSeek provides an easy-to-use tool for environmental metagenomic analysis and comparison, based on both gene annotations and associated metadata, with this initial demonstration focusing on control of antibiotic resistance in agricultural ecosystems. Agroseek creates a space for metagenomic data sharing and collaboration to assist policy makers, stakeholders, and the public in decision-making. AgroSeek is publicly-available at https://agroseek.cs.vt.edu/ .",2021-03-10 +32696355,Exploring Protein Intrinsic Disorder with MobiDB.,"Nowadays, it is well established that many proteins or regions under physiological conditions lack a fixed three-dimensional structure and are intrinsically disordered. MobiDB is the main repository of protein disorder and mobility annotations, combining different data sources to provide an exhaustive overview of intrinsic disorder. MobiDB includes curated annotations from other databases, indirect disorder evidence from structural data, and disorder predictions from protein sequences. It provides an easy-to-use web server to visualize and explore disorder information. This chapter describes the data available in MobiDB, emphasizing how to use and access the intrinsic disorder data. MobiDB is available at URL http://mobidb.bio.unipd.it .",2020-01-01 +32469073,piNET: a versatile web platform for downstream analysis and visualization of proteomics data.,"Rapid progress in proteomics and large-scale profiling of biological systems at the protein level necessitates the continued development of efficient computational tools for the analysis and interpretation of proteomics data. Here, we present the piNET server that facilitates integrated annotation, analysis and visualization of quantitative proteomics data, with emphasis on PTM networks and integration with the LINCS library of chemical and genetic perturbation signatures in order to provide further mechanistic and functional insights. The primary input for the server consists of a set of peptides or proteins, optionally with PTM sites, and their corresponding abundance values. Several interconnected workflows can be used to generate: (i) interactive graphs and tables providing comprehensive annotation and mapping between peptides and proteins with PTM sites; (ii) high resolution and interactive visualization for enzyme-substrate networks, including kinases and their phospho-peptide targets; (iii) mapping and visualization of LINCS signature connectivity for chemical inhibitors or genetic knockdown of enzymes upstream of their target PTM sites. piNET has been built using a modular Spring-Boot JAVA platform as a fast, versatile and easy to use tool. The Apache Lucene indexing is used for fast mapping of peptides into UniProt entries for the human, mouse and other commonly used model organism proteomes. PTM-centric network analyses combine PhosphoSitePlus, iPTMnet and SIGNOR databases of validated enzyme-substrate relationships, for kinase networks augmented by DeepPhos predictions and sequence-based mapping of PhosphoSitePlus consensus motifs. Concordant LINCS signatures are mapped using iLINCS. For each workflow, a RESTful API counterpart can be used to generate the results programmatically in the json format. The server is available at http://pinet-server.org, and it is free and open to all users without login requirement.",2020-07-01 +33895377,A fusion of data science and feed-forward neural network-based modelling of COVID-19 outbreak forecasting in IRAQ.,"

Background

Iraq is among the countries affected by the COVID-19 pandemic. As of 2 August 2020, 129,151 COVID-19 cases were confirmed, including 91,949 recovered cases and 4,867 deaths. After the announcement of lockdown in early April 2020, situation in Iraq was getting steady until late May 2020, when daily COVID-19 infections have raised suddenly due to gradual easing of lockdown restrictions. In this context, it is important to develop a forecasting model to evaluate the COVID-19 outbreak in Iraq and so to guide future health policy.

Methods

COVID-19 lag data were made available by the University of Anbar through their online analytical platform (https://www.uoanbar.edu.iq/covid/), engaged with the day-to-day figures form the Iraqi health authorities. 154 days of patient data were provided covering the period from 2 March 2020 to 2 August 2020. An ensemble of feed-forward neural networks has been adopted to forecast COVID-19 outbreak in Iraq. Also, this study highlights some key questions about this pandemic using data analytics.

Results

Forecasting were achieved with accuracy of 87.6% for daily infections, 82.4% for daily recovered cases, and 84.3% for daily deaths. It is anticipated that COVID-19 infections in Iraq will reach about 308,996 cases by the end of September 2020, including 228,551 to recover and 9,477 deaths.

Conclusion

The applications of artificial neural networks supported by advanced data analytics represent a promising solution through which to realise intelligent solutions, enabling the space of analytical operations to drive a national health policy to contain COVID-19 pandemic.",2021-04-22 +35865753,"Intra-Annual Variation of Eddy Diffusion (k zz ) in the MLT, From SABER and SCIAMACHY Atomic Oxygen Climatologies.","Atomic oxygen (O) in the mesosphere and lower thermosphere (MLT) results from a balance between production via photo-dissociation in the lower thermosphere and chemical loss by recombination in the upper mesosphere. The transport of O downward from the lower thermosphere into the mesosphere is preferentially driven by the eddy diffusion process that results from dissipating gravity waves and instabilities. The motivation here is to probe the intra-annual variability of the eddy diffusion coefficient (k zz ) and eddy velocity in the MLT based on the climatology of the region, initially accomplished by Garcia and Solomon (1985, https://doi.org/10.1029/JD090iD02p03850). In the current study, the intra-annual cycle was divided into 26 two-week periods for each of three zones: the northern hemisphere (NH), southern hemisphere (SH), and equatorial (EQ). Both 16 years of SABER (2002-2018) and 10 years of SCIAMACHY (2002-2012) O density measurements, along with NRLMSIS® 2.0 were used for calculation of atomic oxygen eddy diffusion velocities and fluxes. Our prominent findings include a dominant annual oscillation below 87 km in the NH and SH zones, with a factor of 3-4 variation between winter and summer at 83 km, and a dominant semiannual oscillation at all altitudes in the EQ zone. The measured global average k zz at 96 km lacks the intra-annual variability of upper atmosphere density data deduced by Qian et al. (2009, https://doi.org/10.1029/2008JA013643). The very large seasonal (and hemispherical) variations in k zz and O densities are important to separate and isolate in satellite analysis and to incorporate in MLT models.",2021-12-06 +34357513,Anti-Ebola: an initiative to predict Ebola virus inhibitors through machine learning.,"Ebola virus is a deadly pathogen responsible for a frequent series of outbreaks since 1976. Despite various efforts from researchers worldwide, its mortality and fatality are quite high. For antiviral drug discovery, the computational efforts are considered highly useful. Therefore, we have developed an 'anti-Ebola' web server, through quantitative structure-activity relationship information of available molecules with experimental anti-Ebola activities. Three hundred and five unique anti-Ebola compounds with their respective IC50 values were extracted from the 'DrugRepV' database. Later, the compounds were used to extract the molecular descriptors, which were subjected to regression-based model development. The robust machine learning techniques, namely support vector machine, random forest and artificial neural network, were employed using tenfold cross-validation. After a randomization approach, the best predictive model showed Pearson's correlation coefficient ranges from 0.83 to 0.98 on training/testing (T274) dataset. The robustness of the developed models was cross-evaluated using William's plot. The highly robust computational models are integrated into the web server. The 'anti-Ebola' web server is freely available at https://bioinfo.imtech.res.in/manojk/antiebola . We anticipate this will serve the scientific community for developing effective inhibitors against the Ebola virus.",2021-08-06 +33657805,LM-GlycomeAtlas Ver. 2.0: An Integrated Visualization for Lectin Microarray-based Mouse Tissue Glycome Mapping Data with Lectin Histochemistry.,"Laser microdissection-assisted lectin microarray has been used to obtain quantitative and qualitative information on glycans on proteins expressed in microscopic regions of formalin-fixed paraffin-embedded tissue sections. For the effective visualization of this ""tissue glycome mapping"" data, a novel online tool, LM-GlycomeAtlas (https://glycosmos.org/lm_glycomeatlas/index), was launched in the freely available glycoscience portal, the GlyCosmos Portal (https://glycosmos.org). In LM-GlycomeAtlas Version 1.0, nine tissues from normal mice were used to provide one data set of glycomic profiles. Here we introduce an updated version of LM-GlycomeAtlas, which includes more spatial information. We designed it to deposit multiple data sets of glycomic profiles with high-resolution histological images, which included staining images with multiple lectins on the array. The additionally implemented interfaces allow users to display multiple histological images of interest (e.g., diseased and normal mice), thereby facilitating the evaluation of tissue glycomic profiling and glyco-pathological analysis. Using these updated interfaces, 451 glycomic profiling data and 42 histological images obtained from 14 tissues of normal and diseased mice were successfully visualized. By easy integration with other tools for glycoproteomic data and protein glycosylation machinery, LM-GlycomeAtlas will be one of the most valuable open resources that contribute to both glycoscience and proteomics communities.",2021-03-04 +34515619,The effects of yakson or gentle human touch training given to mothers with preterm babies on attachment levels and the responses of the baby: a randomized controlled trial.,"This study was carried out to determine the effects of the yakson or gentle human touch methods applied by mothers on their preterm new-borns on the mother's attachment levels and the baby's response. The yakson and gentle human touch methods are touch-based methods where stimuli with stress-reducing effects are given to preterm new-borns. This study was conducted with 90 women and their new-borns (Yakson = 30, gentle human touch = 30, control = 30) who were hospitalized at the neonatal intensive care unit of a state hospital in Turkey between August 2016 and August 2017. As a result, it was determined that the yakson and gentle human touch methods practiced on the new-borns by their mothers increased mother-baby attachment, contributed to sleep-calmness states, vital signs and weight gain of the baby and reduced their durations of hospital stay.Supplemental data for this article is available online at https://doi.org/10.1080/07399332.2021.1958817 .",2021-09-13 +33405381,Development and implementation of common data elements for venous thromboembolism research: on behalf of SSC Subcommittee on official Communication from the SSC of the ISTH.,"Clinical research in venous thromboembolism (VTE) is hindered by variability in the collection and reporting of data and outcomes. A consistent data language facilitates efficiencies, leads to higher quality data, and permits between-study comparisons and evidence synthesis. The International Society on Thrombosis and Haemostasis (ISTH) launched an international task force of more than 50 researchers to develop common data elements for clinical research in venous thromboembolism. The project was organized in seven working groups, each focusing on a topic area: General Core Data Elements; Anticoagulation and Other Therapies; Chronic VTE and Functional Outcomes; Diagnosis of VTE; Malignancy; Perioperative; and Predictors of VTE. The groups met via teleconference to collaboratively identify key data elements and develop definitions and data standards that were structured in a project-specific taxonomy. A Steering Committee met by teleconference and in-person to determine the overall scope of the project and resolve questions arising from the working groups. ISTH held an open public comment period to enable broader stakeholder involvement and feedback. The common data elements were then refined by the working groups to create a set of 512 unique data elements that are publicly available at http://isth.breakthrough.healthcare. The ISTH VTE Common Data Elements are intended to be a living project with ongoing curation, future expansion, and adaptation to meet the needs of the thrombosis and hemostasis research community.",2021-01-01 +28651001,PpTFDB: A pigeonpea transcription factor database for exploring functional genomics in legumes.,"Pigeonpea (Cajanus cajan L.), a diploid legume crop, is a member of the tribe Phaseoleae. This tribe is descended from the millettioid (tropical) clade of the subfamily Papilionoideae, which includes many important legume crop species such as soybean (Glycine max), mung bean (Vigna radiata), cowpea (Vigna ungiculata), and common bean (Phaseolus vulgaris). It plays major role in food and nutritional security, being rich source of proteins, minerals and vitamins. We have developed a comprehensive Pigeonpea Transcription Factors Database (PpTFDB) that encompasses information about 1829 putative transcription factors (TFs) and their 55 TF families. PpTFDB provides a comprehensive information about each of the identified TFs that includes chromosomal location, protein physicochemical properties, sequence data, protein functional annotation, simple sequence repeats (SSRs) with primers derived from their motifs, orthology with related legume crops, and gene ontology (GO) assignment to respective TFs. (PpTFDB: http://14.139.229.199/PpTFDB/Home.aspx) is a freely available and user friendly web resource that facilitates users to retrieve the information of individual members of a TF family through a set of query interfaces including TF ID or protein functional annotation. In addition, users can also get the information by browsing interfaces, which include browsing by TF Categories and by, GO Categories. This PpTFDB will serve as a promising central resource for researchers as well as breeders who are working towards crop improvement of legume crops.",2017-06-26 +30395283,"OrthoDB v10: sampling the diversity of animal, plant, fungal, protist, bacterial and viral genomes for evolutionary and functional annotations of orthologs.","OrthoDB (https://www.orthodb.org) provides evolutionary and functional annotations of orthologs. This update features a major scaling up of the resource coverage, sampling the genomic diversity of 1271 eukaryotes, 6013 prokaryotes and 6488 viruses. These include putative orthologs among 448 metazoan, 117 plant, 549 fungal, 148 protist, 5609 bacterial, and 404 archaeal genomes, picking up the best sequenced and annotated representatives for each species or operational taxonomic unit. OrthoDB relies on a concept of hierarchy of levels-of-orthology to enable more finely resolved gene orthologies for more closely related species. Since orthologs are the most likely candidates to retain functions of their ancestor gene, OrthoDB is aimed at narrowing down hypotheses about gene functions and enabling comparative evolutionary studies. Optional registered-user sessions allow on-line BUSCO assessments of gene set completeness and mapping of the uploaded data to OrthoDB to enable further interactive exploration of related annotations and generation of comparative charts. The accelerating expansion of genomics data continues to add valuable information, and OrthoDB strives to provide orthologs from the broadest coverage of species, as well as to extensively collate available functional annotations and to compute evolutionary annotations. The data can be browsed online, downloaded or assessed via REST API or SPARQL RDF compatible with both UniProt and Ensembl.",2019-01-01 +29059366,SBCDDB: Sleeping Beauty Cancer Driver Database for gene discovery in mouse models of human cancers.,"Large-scale oncogenomic studies have identified few frequently mutated cancer drivers and hundreds of infrequently mutated drivers. Defining the biological context for rare driving events is fundamentally important to increasing our understanding of the druggable pathways in cancer. Sleeping Beauty (SB) insertional mutagenesis is a powerful gene discovery tool used to model human cancers in mice. Our lab and others have published a number of studies that identify cancer drivers from these models using various statistical and computational approaches. Here, we have integrated SB data from primary tumor models into an analysis and reporting framework, the Sleeping Beauty Cancer Driver DataBase (SBCDDB, http://sbcddb.moffitt.org), which identifies drivers in individual tumors or tumor populations. Unique to this effort, the SBCDDB utilizes a single, scalable, statistical analysis method that enables data to be grouped by different biological properties. This allows for SB drivers to be evaluated (and re-evaluated) under different contexts. The SBCDDB provides visual representations highlighting the spatial attributes of transposon mutagenesis and couples this functionality with analysis of gene sets, enabling users to interrogate relationships between drivers. The SBCDDB is a powerful resource for comparative oncogenomic analyses with human cancer genomics datasets for driver prioritization.",2018-01-01 +35127198,Responsible research and innovation training programs: implementation and evaluation of the HEIRRI project.,"Responsible research and innovation, or RRI, is a concept that aims to bring together society and science for a better future. There are six key elements of RRI: public engagement, gender equality, science education, open access, ethics and governance. Higher Education Institutions and Responsible Research and Innovation (HEIRRI) project aimed to bring the concept of RRI into the educational system. Using state-of-the-art review of good practices, HEIRRI team developed 10 training programs on RRI for different higher education institution educational levels, including a summer school and a massive open online course (MOOC). We conducted pilot of the trainings and evaluated participants' experiences. Satisfaction with HEIRRI training programs on responsible research and innovation was high, both for participants and for the trainers, and trainings raised awareness of RRI. Participants' feedback was used to identify areas that need improvement and provided for recommendations for final versions of the HEIRRI training programs. In order to equip researchers with skills to recognize and apply RRI values, RRI should be included in their education. HEIRRI training is suitable for a range of different disciplines, including forensic science, and is free to use and adjust for specific contexts (available from: https://rri-tools.eu/heirri-training-programmes). Supplemental data for this article is available online at https://doi.org/10.1080/20961790.2021.1970319 .",2021-11-02 +,"Mitochondrial capture enriches mito‐DNA 100 fold, enabling PCR‐free mitogenomics biodiversity analysis","Biodiversity analyses based on next‐generation sequencing (NGS) platforms have developed by leaps and bounds in recent years. A PCR‐free strategy, which can alleviate taxonomic bias, was considered as a promising approach to delivering reliable species compositions of targeted environments. The major impediment of such a method is the lack of appropriate mitochondrial DNA enrichment ways. Because mitochondrial genomes (mitogenomes) make up only a small proportion of total DNA, PCR‐free methods will inevitably result in a huge excess of data (>99%). Furthermore, the massive volume of sequence data is highly demanding on computing resources. Here, we present a mitogenome enrichment pipeline via a gene capture chip that was designed by virtue of the mitogenome sequences of the 1000 Insect Transcriptome Evolution project (1KITE, www.1kite.org). A mock sample containing 49 species was used to evaluate the efficiency of the mitogenome capture method. We demonstrate that the proportion of mitochondrial DNA can be increased by approximately 100‐fold (from the original 0.47% to 42.52%). Variation in phylogenetic distances of target taxa to the probe set could in principle result in bias in abundance. However, the frequencies of input taxa were largely maintained after capture (R² = 0.81). We suggest that our mitogenome capture approach coupled with PCR‐free shotgun sequencing could provide ecological researchers an efficient NGS method to deliver reliable biodiversity assessment.",2016-03-01 +27173523,The Chinchilla Research Resource Database: resource for an otolaryngology disease model. ,"The long-tailed chinchilla (Chinchilla lanigera) is an established animal model for diseases of the inner and middle ear, among others. In particular, chinchilla is commonly used to study diseases involving viral and bacterial pathogens and polymicrobial infections of the upper respiratory tract and the ear, such as otitis media. The value of the chinchilla as a model for human diseases prompted the sequencing of its genome in 2012 and the more recent development of the Chinchilla Research Resource Database (http://crrd.mcw.edu) to provide investigators with easy access to relevant datasets and software tools to enhance their research. The Chinchilla Research Resource Database contains a complete catalog of genes for chinchilla and, for comparative purposes, human. Chinchilla genes can be viewed in the context of their genomic scaffold positions using the JBrowse genome browser. In contrast to the corresponding records at NCBI, individual gene reports at CRRD include functional annotations for Disease, Gene Ontology (GO) Biological Process, GO Molecular Function, GO Cellular Component and Pathway assigned to chinchilla genes based on annotations from the corresponding human orthologs. Data can be retrieved via keyword and gene-specific searches. Lists of genes with similar functional attributes can be assembled by leveraging the hierarchical structure of the Disease, GO and Pathway vocabularies through the Ontology Search and Browser tool. Such lists can then be further analyzed for commonalities using the Gene Annotator (GA) Tool. All data in the Chinchilla Research Resource Database is freely accessible and downloadable via the CRRD FTP site or using the download functions available in the search and analysis tools. The Chinchilla Research Resource Database is a rich resource for researchers using, or considering the use of, chinchilla as a model for human disease.Database URL: http://crrd.mcw.edu.",2016-05-12 +32650665,Classification of Physiologic Swallowing Impairment Severity: A Latent Class Analysis of Modified Barium Swallow Impairment Profile Scores.,"Purpose Our objectives were to (a) identify oral and pharyngeal physiologic swallowing impairment severity classes based on latent class analyses (LCAs) of the Modified Barium Swallow Impairment Profile (MBSImP) swallow task scores and (b) quantify the probability of severity class membership given composite MBSImP oral total (OT) and pharyngeal total (PT) scores. Method MBSImP scores were collected from a patient database of 319 consecutive modified barium swallow studies. Because of missing swallow task scores, LCA was performed using 25 multiply imputed data sets. Results LCA revealed a three-class structure for both oral and pharyngeal models. We identified OT and PT score intervals to assign subjects to oral and pharyngeal impairment latent severity classes, respectively, with high probability (probability of class membership ≥ 0.9 given OT or PT scores within specified ranges) and high confidence (95% credible interval [CI] widths ≤ 0.24 for all total scores within specified ranges). OT scores ranging from 0 to 10 and from 14 to 18 yielded assignments in Oral Latent Classes 1 and 2, respectively, while OT = 22 was assigned to Oral Latent Class 3. PT scores ranging from 0 to 13 and from 18 to 24 yielded assignments in Pharyngeal Latent Classes 1 and 2, respectively, while PT = 26 was assigned to Pharyngeal Latent Class 3. Conclusions LCA of MBSImP task-level data revealed significant underlying oral and pharyngeal ordinal class structures representing increasingly severe gradations of physiologic swallow impairment. Clinically meaningful OT and PT score ranges were derived facilitating latent class assignment. Supplemental Material https://doi.org/10.23641/asha.12315677.",2020-07-10 +33034552,Transcriptome-wide expression profiling of Sporothrix schenckii yeast and mycelial forms and the establishment of the Sporothrix Genome DataBase. ,"Sporothrix schenckii is a dimorphic fungus existing as mould in the environment and as yeast in the host. The morphological shift between mycelial/yeast phases is crucial for its virulence, but the transcriptional networks implicated in dimorphic transition are still not fully understood. Here, we report the global transcriptomic differences occurring between mould and yeast phases of S. schenckii, including changes in gene expression profiles associated with these distinct cellular phenotypes. Moreover, we also propose a new genome annotation, which reveals a more complex transcriptional architecture than previously assumed. Using RNA-seq, we identified a total of 17 307 genes, of which 11 217 were classified as protein-encoding genes, whereas 6090 were designated as non-coding RNAs (ncRNAs). Approximately ~71 % of all annotated genes were found to overlap and the different-strand overlapping type was the most common. Gene expression analysis revealed that 8795 genes were differentially regulated among yeast and mould forms. Differential gene expression was also observed for antisense ncRNAs overlapping neighbouring protein-encoding genes. The release of transcriptome-wide data and the establishment of the Sporothrix Genome DataBase (http://sporothrixgenomedatabase.unime.it) represent an important milestone for Sporothrix research, because they provide a strong basis for future studies on the molecular pathways involved in numerous biological processes.",2020-10-01 +32754758,"ConoMode, a database for conopeptide binding modes. ","ConoMode is a database for complex three-dimensional (3D) structures of conopeptides binding with their target proteins. Conopeptides, a large family of peptides from the venom of marine snails of the Conus genus, have exceptionally diverse sequences, and their high specificity to block ion channels makes them crucial as drug leads and tools for physiological studies. ConoMode is a specialized archive for the collection of 3D coordinate data for the conopeptides and their binding target proteins from published literature and the Protein Data Bank. These 3D structures can be determined using experimental methods such as X-ray crystallography and electron microscopy and computational methods including docking, homology modeling and molecular dynamics simulations. The binding modes for the conopeptides determined using computational modeling must be validated based on experimental data. The 3D coordinate data from ConoMode can be searched, visualized, downloaded and uploaded. Currently, ConoMode manages 19 conopeptide sequences (from 10 Conus species), 15 protein sequences and 37 3D structures. ConoMode utilizes a modern technical framework to provide a good user experience on mobile devices with touch interaction features. Furthermore, the database is fully optimized for unstructured data and flexible data models. Database URL: http://conomode.qnlm.ac/conomode/conomode/index.",2020-01-01 +,Global crop waste burning – micro-biochar; how a small community development organization learned experientially to address a huge problem one tiny field at a time,"The world’s 2.5 billion poorest people - small farmers living at the far fringe of the developing world – and their billion or so slightly better off neighbors burn 10.5 billion metric tonnes (tonnes) of crop waste annually. Smoke from their fires reddens the sun, closes airports, shuts schools and governments – and kills millions of people (World Health Organization (WHO). who.int/health-topics/air-pollution#tab=tab_1). Their fires release 16.6 billion tonnes of CO2, and emit 9.8 billion tonnes CO2e, 1.1 billion tonnes of smog precursors and 66 million tonnes of PM2.5. (Akagi et al., Atmospheric Chem Physics 4039-4071, 2011; Environmental Protection Agency, epa.gov/ghgemissions/understanding-global-warming-potentials; Food and Agriculture Organization, FAOSTAT, http://www.fao.org/faostat/en/#data) [See Attachments 1–3. For details of the Attachments, please see the section below entitled “Availability of data and materials.”]. No one yet has stopped the burning. Seminars, health warnings, bans, threats, jailings, shootings – nothing has worked, because not one has offered farmers a better alternative. This is the story of how Warm Heart, a small, community development NGO, learned enough about small farmers’ plight to collaborate with them to develop the technology, training and social organization to mobilize villages to form biochar social enterprises. These make it profitable for farmers to convert crop waste into biochar, reducing CO2e, smog precursor and PM2.5 emissions, improving health and generating new local income – in short, to address the big three SDGs (1, 2 and 3) from the bottom. Warm Heart, however, wanted more; it wanted a system so appealing that it would spread by imitation and not require outside intervention. Based on what it has learned, Warm Heart wants to teach others that the knowledge to stop the smoke and improve the quality of one’s life does not require outside experts and lots of money. It wants to teach that anyone can learn to create a more sustainable world by themselves. This article traces the experiential learning process by which Warm Heart and its partners achieved their goals and shares Warm Heart’s open source solution. It serves four purposes. The article closely explores an experiential learning process. It details the underlying logic, workings and consequences of crop waste burning in the developing world. It demonstrates the application of this knowledge to the development of a sustainable – even profitable – solution to this global problem that does not require costly outside intervention but can be undertaken by local communities and small NGOs anywhere. Finally, it models how local communities, small NGOs and social investors can turn this global problem into a profitable business opportunity.",2020-01-01 +33416829,"ProteomeExpert: a docker image based web-server for exploring, modeling, visualizing, and mining quantitative proteomic data sets. ","The rapid progresses of high throughput sequencing technology-based omics and mass spectrometry (MS)-based proteomics such as data-independent acquisition (DIA) and its penetration to clinical studies have generated increasing number of proteomic data sets containing 100 s-1000s samples. To analyze these quantitative proteomic data sets and other -omics data sets more efficiently and conveniently, we present a web server-based software tool ProteomeExpert implemented in Docker, which offers various analysis tools for experimental design, data mining, interpretation, and visualization of quantitative proteomic data sets. ProteomeExpert can be deployed on an operating system with Docker installed or with R language environment. The Docker image of ProteomeExpert is freely available from https://hub.docker.com/r/lifeinfo/proteomeexpert. The source code of ProteomeExpert is also openly accessible at http://www.github.com/lifeinfo/ProteomeExpert/. In addition, a demo server is provided at https://proteomic.shinyapps.io/peserver/. SUPPLEMENTARY DATA ARE AVAILABLE AT BIOINFORMATICS ONLINE.",2021-01-08 +34353196,Association of Emulsifier and Highly Processed Food Intake with Circulating Markers of Intestinal Permeability and Inflammation in the Cancer Prevention Study-3 Diet Assessment Sub-Study.,"Compelling animal studies report increased intestinal permeability, inflammation, and colorectal carcinogenesis with exposure to certain emulsifiers commonly added to processed foods, but human data are lacking. Highly processed food consumption is also associated with obesity and higher risk of chronic diseases. We cross-sectionally examined the association of emulsifier and highly processed food consumption estimated from six 24-h dietary recalls among 588 U.S. men and women over one year, with biomarkers of intestinal permeability and inflammation measured from two fasting blood samples collected six months apart. In multivariable-adjusted generalized linear models, greater emulsifier intake (g/d) was not associated with antibodies to flagellin (P-trend = 0.88), lipopolysaccharide (LPS) (P-trend = 0.56), or the combined total thereof (P-trend = 0.65) but was positively associated with an inflammatory biomarker, glycoprotein acetyls (GlycA) (P-trend = 0.02). Highly processed food intake (% kcal/d) was associated with higher anti-LPS antibodies (P-trend = 0.001) and total anti-flagellin and anti-LPS antibodies (P-trend = 0.005) but not with other biomarkers, whereas processed food intake expressed as % g/d was associated with higher GlycA (P-trend = 0.02). Our findings suggest that, broadly, highly processed food consumption may be associated with intestinal permeability biomarkers, and both emulsifier and highly processed food intakes may be associated with inflammation. Additional studies are warranted to further evaluate these relationships.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1957947.",2021-08-06 +33344750,Evaluating the Alzheimer's disease data landscape.,"

Introduction

Numerous studies have collected Alzheimer's disease (AD) cohort data sets. To achieve reproducible, robust results in data-driven approaches, an evaluation of the present data landscape is vital.

Methods

Previous efforts relied exclusively on metadata and literature. Here, we evaluate the data landscape by directly investigating nine patient-level data sets generated in major clinical cohort studies.

Results

The investigated cohorts differ in key characteristics, such as demographics and distributions of AD biomarkers. Analyzing the ethnoracial diversity revealed a strong bias toward White/Caucasian individuals. We described and compared the measured data modalities. Finally, the available longitudinal data for important AD biomarkers was evaluated. All results are explorable through our web application ADataViewer (https://adata.scai.fraunhofer.de).

Discussion

Our evaluation exposed critical limitations in the AD data landscape that impede comparative approaches across multiple data sets. Comparison of our results to those gained by metadata-based approaches highlights that thorough investigation of real patient-level data is imperative to assess a data landscape.",2020-12-16 +34351823,Nonstandardized Assessment of Cognitive-Communication Abilities Following Pediatric Traumatic Brain Injury: A Scoping Review.,"Purpose The purpose of this study is to describe and synthesize existing research on nonstandardized assessment of cognitive-communication abilities in children with traumatic brain injury (TBI) in order to improve the detection, diagnosis, and tracking of injury sequelae and guide appropriate service provision. Materials and Method A search of peer-reviewed journal databases revealed 504 unique articles published between January 2000 and August 2019. For full inclusion, articles had to report on empirical studies examining variables related to the nonstandardized assessment of cognitive-communication skills following TBI in children. Review articles, expert opinion pieces, and non-English language articles were excluded. The Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews guided this process. Results Results were tabulated for each of the 14 articles that met full inclusion criteria. Included studies presented five different types of nonstandardized assessment: discourse analysis (n = 3), systematic observation of child's performance during an instrumental activity of daily living (n = 4), virtual reality tasks (n = 3), structured cognitive tasks (n = 2), and functional rating scales (n = 2). The majority of included studies compared the outcomes of nonstandardized assessment against subtest scores and checklists drawn from a variety of existing standardized and criterion-referenced assessments. Targeted cognitive-communication skills included attention, working memory, self-regulation, planning, multitasking, social problem-solving, inferencing, and macrolevel discourse. Conclusions Preliminary research suggests that a well-designed and systematically implemented nonstandardized assessment can yield essential information about children's cognitive-communication abilities in real-world contexts. Further research is needed to validate these assessments and to determine in which settings and situations they may prove most effective. Supplemental Material https://doi.org/10.23641/asha.15079026.",2021-08-04 +32976910,Galaxy InteractoMIX: An Integrated Computational Platform for the Study of Protein-Protein Interaction Data.,"Protein interactions play a crucial role among the different functions of a cell and are central to our understanding of cellular processes both in health and disease. Here we present Galaxy InteractoMIX (http://galaxy.interactomix.com), a platform composed of 13 different computational tools each addressing specific aspects of the study of protein-protein interactions, ranging from large-scale cross-species protein-wide interactomes to atomic resolution level of protein complexes. Galaxy InteractoMIX provides an intuitive interface where users can retrieve consolidated interactomics data distributed across several databases or uncover links between diseases and genes by analyzing the interactomes underlying these diseases. The platform makes possible large-scale prediction and curation protein interactions using the conservation of motifs, interology, or presence or absence of key sequence signatures. The range of structure-based tools includes modeling and analysis of protein complexes, delineation of interfaces and the modeling of peptides acting as inhibitors of protein-protein interactions. Galaxy InteractoMIX includes a range of ready-to-use workflows to run complex analyses requiring minimal intervention by users. The potential range of applications of the platform covers different aspects of life science, biomedicine, biotechnology and drug discovery where protein associations are studied.",2020-09-23 +28854643,MSDB: A Comprehensive Database of Simple Sequence Repeats.,"Microsatellites, also known as Simple Sequence Repeats (SSRs), are short tandem repeats of 1-6 nt motifs present in all genomes, particularly eukaryotes. Besides their usefulness as genome markers, SSRs have been shown to perform important regulatory functions, and variations in their length at coding regions are linked to several disorders in humans. Microsatellites show a taxon-specific enrichment in eukaryotic genomes, and some may be functional. MSDB (Microsatellite Database) is a collection of >650 million SSRs from 6,893 species including Bacteria, Archaea, Fungi, Plants, and Animals. This database is by far the most exhaustive resource to access and analyze SSR data of multiple species. In addition to exploring data in a customizable tabular format, users can view and compare the data of multiple species simultaneously using our interactive plotting system. MSDB is developed using the Django framework and MySQL. It is freely available at http://tdb.ccmb.res.in/msdb.",2017-06-01 +32726198,A publicly accessible database for Clostridioides difficile genome sequences supports tracing of transmission chains and epidemics. ,"Clostridioides difficile is the primary infectious cause of antibiotic-associated diarrhea. Local transmissions and international outbreaks of this pathogen have been previously elucidated by bacterial whole-genome sequencing, but comparative genomic analyses at the global scale were hampered by the lack of specific bioinformatic tools. Here we introduce a publicly accessible database within EnteroBase (http://enterobase.warwick.ac.uk) that automatically retrieves and assembles C. difficile short-reads from the public domain, and calls alleles for core-genome multilocus sequence typing (cgMLST). We demonstrate that comparable levels of resolution and precision are attained by EnteroBase cgMLST and single-nucleotide polymorphism analysis. EnteroBase currently contains 18 254 quality-controlled C. difficile genomes, which have been assigned to hierarchical sets of single-linkage clusters by cgMLST distances. This hierarchical clustering is used to identify and name populations of C. difficile at all epidemiological levels, from recent transmission chains through to epidemic and endemic strains. Moreover, it puts newly collected isolates into phylogenetic and epidemiological context by identifying related strains among all previously published genome data. For example, HC2 clusters (i.e. chains of genomes with pairwise distances of up to two cgMLST alleles) were statistically associated with specific hospitals (P<10-4) or single wards (P=0.01) within hospitals, indicating they represented local transmission clusters. We also detected several HC2 clusters spanning more than one hospital that by retrospective epidemiological analysis were confirmed to be associated with inter-hospital patient transfers. In contrast, clustering at level HC150 correlated with k-mer-based classification and was largely compatible with PCR ribotyping, thus enabling comparisons to earlier surveillance data. EnteroBase enables contextual interpretation of a growing collection of assembled, quality-controlled C. difficile genome sequences and their associated metadata. Hierarchical clustering rapidly identifies database entries that are related at multiple levels of genetic distance, facilitating communication among researchers, clinicians and public-health officials who are combatting disease caused by C. difficile.",2020-07-29 +33369481,Community Approaches for Integrating Environmental Exposures into Human Models of Disease.,"

Background

A critical challenge in genomic medicine is identifying the genetic and environmental risk factors for disease. Currently, the available data links a majority of known coding human genes to phenotypes, but the environmental component of human disease is extremely underrepresented in these linked data sets. Without environmental exposure information, our ability to realize precision health is limited, even with the promise of modern genomics. Achieving integration of gene, phenotype, and environment will require extensive translation of data into a standard, computable form and the extension of the existing gene/phenotype data model. The data standards and models needed to achieve this integration do not currently exist.

Objectives

Our objective is to foster development of community-driven data-reporting standards and a computational model that will facilitate the inclusion of exposure data in computational analysis of human disease. To this end, we present a preliminary semantic data model and use cases and competency questions for further community-driven model development and refinement.

Discussion

There is a real desire by the exposure science, epidemiology, and toxicology communities to use informatics approaches to improve their research workflow, gain new insights, and increase data reuse. Critical to success is the development of a community-driven data model for describing environmental exposures and linking them to existing models of human disease. https://doi.org/10.1289/EHP7215.",2020-12-28 +27192119,dbCPG: A web resource for cancer predisposition genes.,"Cancer predisposition genes (CPGs) are genes in which inherited mutations confer highly or moderately increased risks of developing cancer. Identification of these genes and understanding the biological mechanisms that underlie them is crucial for the prevention, early diagnosis, and optimized management of cancer. Over the past decades, great efforts have been made to identify CPGs through multiple strategies. However, information on these CPGs and their molecular functions is scattered. To address this issue and provide a comprehensive resource for researchers, we developed the Cancer Predisposition Gene Database (dbCPG, Database URL: http://bioinfo.ahu.edu.cn:8080/dbCPG/index.jsp), the first literature-based gene resource for exploring human CPGs. It contains 827 human (724 protein-coding, 23 non-coding, and 80 unknown type genes), 637 rats, and 658 mouse CPGs. Furthermore, data mining was performed to gain insights into the understanding of the CPGs data, including functional annotation, gene prioritization, network analysis of prioritized genes and overlap analysis across multiple cancer types. A user-friendly web interface with multiple browse, search, and upload functions was also developed to facilitate access to the latest information on CPGs. Taken together, the dbCPG database provides a comprehensive data resource for further studies of cancer predisposition genes.",2016-06-01 +34040621,Easymap: A User-Friendly Software Package for Rapid Mapping-by-Sequencing of Point Mutations and Large Insertions.,"Mapping-by-sequencing strategies combine next-generation sequencing (NGS) with classical linkage analysis, allowing rapid identification of the causal mutations of the phenotypes exhibited by mutants isolated in a genetic screen. Computer programs that analyze NGS data obtained from a mapping population of individuals derived from a mutant of interest to identify a causal mutation are available; however, the installation and usage of such programs requires bioinformatic skills, modifying or combining pieces of existing software, or purchasing licenses. To ease this process, we developed Easymap, an open-source program that simplifies the data analysis workflows from raw NGS reads to candidate mutations. Easymap can perform bulked segregant mapping of point mutations induced by ethyl methanesulfonate (EMS) with DNA-seq or RNA-seq datasets, as well as tagged-sequence mapping for large insertions, such as transposons or T-DNAs. The mapping analyses implemented in Easymap have been validated with experimental and simulated datasets from different plant and animal model species. Easymap was designed to be accessible to all users regardless of their bioinformatics skills by implementing a user-friendly graphical interface, a simple universal installation script, and detailed mapping reports, including informative images and complementary data for assessment of the mapping results. Easymap is available at http://genetics.edu.umh.es/resources/easymap; its Quickstart Installation Guide details the recommended procedure for installation.",2021-05-07 +31373607,PIRD: Pan Immune Repertoire Database.,"

Motivation

T and B cell receptors (TCRs and BCRs) play a pivotal role in the adaptive immune system by recognizing an enormous variety of external and internal antigens. Understanding these receptors is critical for exploring the process of immunoreaction and exploiting potential applications in immunotherapy and antibody drug design. Although a large number of samples have had their TCR and BCR repertoires sequenced using high-throughput sequencing in recent years, very few databases have been constructed to store these kinds of data. To resolve this issue, we developed a database.

Results

We developed a database, the Pan Immune Repertoire Database (PIRD), located in China National GeneBank (CNGBdb), to collect and store annotated TCR and BCR sequencing data, including from Homo sapiens and other species. In addition to data storage, PIRD also provides functions of data visualization and interactive online analysis. Additionally, a manually curated database of TCRs and BCRs targeting known antigens (TBAdb) was also deposited in PIRD.

Availability and implementation

PIRD can be freely accessed at https://db.cngb.org/pird.",2020-02-01 +27899624,MicroScope in 2017: an expanding and evolving integrated resource for community expertise of microbial genomes.,"The annotation of genomes from NGS platforms needs to be automated and fully integrated. However, maintaining consistency and accuracy in genome annotation is a challenging problem because millions of protein database entries are not assigned reliable functions. This shortcoming limits the knowledge that can be extracted from genomes and metabolic models. Launched in 2005, the MicroScope platform (http://www.genoscope.cns.fr/agc/microscope) is an integrative resource that supports systematic and efficient revision of microbial genome annotation, data management and comparative analysis. Effective comparative analysis requires a consistent and complete view of biological data, and therefore, support for reviewing the quality of functional annotation is critical. MicroScope allows users to analyze microbial (meta)genomes together with post-genomic experiment results if any (i.e. transcriptomics, re-sequencing of evolved strains, mutant collections, phenotype data). It combines tools and graphical interfaces to analyze genomes and to perform the expert curation of gene functions in a comparative context. Starting with a short overview of the MicroScope system, this paper focuses on some major improvements of the Web interface, mainly for the submission of genomic data and on original tools and pipelines that have been developed and integrated in the platform: computation of pan-genomes and prediction of biosynthetic gene clusters. Today the resource contains data for more than 6000 microbial genomes, and among the 2700 personal accounts (65% of which are now from foreign countries), 14% of the users are performing expert annotations, on at least a weekly basis, contributing to improve the quality of microbial genome annotations.",2016-11-29 +34864847,DeepIDP-2L: protein intrinsically disordered region prediction by combining convolutional attention network and hierarchical attention network. ,"Intrinsically disordered regions (IDRs) are widely distributed in proteins. Accurate prediction of IDRs is critical for the protein structure and function analysis. The IDRs are divided into long disordered regions (LDRs) and short disordered regions (SDRs) according to their lengths. Previous studies have shown that LDRs and SDRs have different proprieties. However, the existing computational methods fail to extract different features for LDRs and SDRs separately. As a result, they achieve unstable performance on datasets with different ratios of LDRs and SDRs. In this study, a two-layer predictor was proposed called DeepIDP-2L. In the first layer, two kinds of attention-based models are used to extract different features for LDRs and SDRs, respectively. The hierarchical attention network (HAN) is used to capture the distribution pattern features of LDRs, and convolutional attention network (CAN) is used to capture the local correlation features of SDRs. The second layer of DeepIDP-2L maps the feature extracted in the first layer into a new feature space. Convolutional network (CNN) and bidirectional long short term memory (Bi-LSTM) are employed to capture the local and long-range information for predicting both SDRs and LDRs. Experimental results show that DeepIDP-2L can achieve more stable performance than other exiting predictors on independent test sets with different ratios of SDRs and LDRs. For the convenience of most experimental scientists, a user-friendly and publicly accessible web-server for the new predictor has been established at http://bliulab.net/DeepIDP-2L/. It is anticipated that DeepIDP-2L will become a very useful tool for identification of intrinsically disordered regions. Supplementary data are available at Bioinformatics online.",2021-12-02 +30357350,The Pfam protein families database in 2019.,"The last few years have witnessed significant changes in Pfam (https://pfam.xfam.org). The number of families has grown substantially to a total of 17,929 in release 32.0. New additions have been coupled with efforts to improve existing families, including refinement of domain boundaries, their classification into Pfam clans, as well as their functional annotation. We recently began to collaborate with the RepeatsDB resource to improve the definition of tandem repeat families within Pfam. We carried out a significant comparison to the structural classification database, namely the Evolutionary Classification of Protein Domains (ECOD) that led to the creation of 825 new families based on their set of uncharacterized families (EUFs). Furthermore, we also connected Pfam entries to the Sequence Ontology (SO) through mapping of the Pfam type definitions to SO terms. Since Pfam has many community contributors, we recently enabled the linking between authorship of all Pfam entries with the corresponding authors' ORCID identifiers. This effectively permits authors to claim credit for their Pfam curation and link them to their ORCID record.",2019-01-01 +34941243,Tracking PM2.5 and O3 Pollution and the Related Health Burden in China 2013-2020.,"Based on the exposure data sets from the Tracking Air Pollution in China (TAP, http://tapdata.org.cn/), we characterized the spatiotemporal variations in PM2.5 and O3 exposures and quantified the long- and short-term exposure related premature deaths during 2013-2020 with respect to the two-stage clean air actions (2013-2017 and 2018-2020). We find a 48% decrease in national PM2.5 exposure during 2013-2020, although the decrease rate has slowed after 2017. At the same time, O3 pollution worsened, with the average April-September O3 exposure increased by 17%. The improved air quality led to 308 thousand and 16 thousand avoided long- and short-term exposure related deaths, respectively, in 2020 compared to the 2013 level, which was majorly attributed to the reduction in ambient PM2.5 concentration. It is also noticed that with smaller PM2.5 reduction, the avoided long-term exposure associated deaths in 2017-2020 (13%) was greater than that in 2013-2017 (9%), because the exposure-response curve is nonlinear. As a result of the efforts in reducing PM2.5-polluted days with the daily average PM2.5 higher than 75 μg/m3 and the considerable increase in O3-polluted days with the daily maximum 8 h average O3 higher than 160 μg/m3, deaths attributable to the short-term O3 exposure were greater than those due to PM2.5 exposure since 2018. Future air quality improvement strategies for the coordinated control of PM2.5 and O3 are urgently needed.",2021-12-23 +34211562,Research on Diagnosis Prediction of Traditional Chinese Medicine Diseases Based on Improved Bayesian Combination Model.,"Traditional Chinese Medicine (TCM) clinical intelligent decision-making assistance has been a research hotspot in recent years. However, the recommendations of TCM disease diagnosis based on the current symptoms are difficult to achieve a good accuracy rate because of the ambiguity of the names of TCM diseases. The medical record data downloaded from ancient and modern medical records cloud platform developed by the Institute of Medical Information on TCM of the Chinese Academy of Chinese Medical Sciences (CACMC) and the practice guidelines data in the TCM clinical decision supporting system were utilized as the corpus. Based on the empirical analysis, a variety of improved Naïve Bayes algorithms are presented. The research findings show that the Naïve Bayes algorithm with main symptom weighted and equal probability has achieved better results, with an accuracy rate of 84.2%, which is 15.2% higher than the 69% of the classic Naïve Bayes algorithm (without prior probability). The performance of the Naïve Bayes classifier is greatly improved, and it has certain clinical practicability. The model is currently available at http://tcmcdsmvc.yiankb.com/.",2021-06-10 +34408729,Whole-Genome-Based Helicobacter pylori Geographic Surveillance: A Visualized and Expandable Webtool.,"Helicobacter pylori exhibit specific geographic distributions that are related to clinical outcomes. Despite the high infection rate of H. pylori throughout the world, the genetic epidemiology surveillance of H. pylori still needs to be improved. This study used the single nucleotide polymorphisms (SNPs) profiling approach based on whole genome sequencing (WGS) to facilitate genomic population analyses of H. pylori and encourage the dissemination of microbial genotyping strategies worldwide. A total number of 1,211 public H. pylori genomes were downloaded and used to construct the typing tool, named HpTT (H. pylori Typing Tool). Combined with the metadata, we developed two levels of genomic typing, including a continent-scale and a country scale that nested in the continent scale. Results showed that Asia was the largest isolate source in our dataset, while isolates from Europe and Oceania were comparatively more widespread. More specifically, Switzerland and Australia are the main sources of widespread isolates in their corresponding continents. To integrate all the typing information and enable researchers to compare their dataset against the existing global database easily and rapidly, a user-friendly website (https://db.cngb.org/HPTT/) was developed with both genomic typing tools and visualization tools. To further confirm the validity of the website, ten newly assembled genomes were downloaded and tested precisely located on the branch as we expected. In summary, the H. pylori typing tool (HpTT) is a novel genomic epidemiological tool that can achieve high-resolution analysis of genomic typing and visualizing simultaneously, providing insights into the genetic population structure, evolution analysis, and epidemiological surveillance of H. pylori.",2021-08-02 +34339006,The network map of Elabela signaling pathway in physiological and pathological conditions.,"Elabela (ELA; also called Apela and Toddler) is one of the recently discovered ligand among the two endogenous peptide ligands (Apelin and Elabela) of the apelin receptor (APLNR, also known as APJ). Elabela-induced signaling plays a crucial role in diverse biological processes, including formation of the embryonic cardiovascular system and early placental development by reducing the chances of occurrence of preeclampsia during pregnancy. It also plays the major role in the renoprotection by reducing kidney injury and the inflammatory response and regulation of gene expression associated with heart failure and fibrosis. Elabela may be processed into different active peptides, each of which binds to APLNR and predominantly activates the signals through PI3K/AKT pathway. Owing to its biomedical importance, we developed a consolidated signaling map of Elabela, in accordance with the NetPath criteria. The presented Elabela signaling map comprises 12 activation/inhibition events, 15 catalysis events, 1 molecular association, 34 gene regulation events and 32 protein expression events. The Elabela signaling pathway map is freely made available through the WikiPathways Database ( https://www.wikipathways.org/index.php/Pathway:WP5100 ).",2021-08-02 +31612957,DNAproDB: an expanded database and web-based tool for structural analysis of DNA-protein complexes.,"DNAproDB (https://dnaprodb.usc.edu) is a web-based database and structural analysis tool that offers a combination of data visualization, data processing and search functionality that improves the speed and ease with which researchers can analyze, access and visualize structural data of DNA-protein complexes. In this paper, we report significant improvements made to DNAproDB since its initial release. DNAproDB now supports any DNA secondary structure from typical B-form DNA to single-stranded DNA to G-quadruplexes. We have updated the structure of our data files to support complex DNA conformations, multiple DNA-protein complexes within a DNAproDB entry and model indexing for analysis of ensemble data. Support for chemically modified residues and nucleotides has been significantly improved along with the addition of new structural features, improved structural moiety assignment and use of more sequence-based annotations. We have redesigned our report pages and search forms to support these enhancements, and the DNAproDB website has been improved to be more responsive and user-friendly. DNAproDB is now integrated with the Nucleic Acid Database, and we have increased our coverage of available Protein Data Bank entries. Our database now contains 95% of all available DNA-protein complexes, making our tools for analysis of these structures accessible to a broad community.",2020-01-01 +34718416,CoCoPRED: coiled-coil protein structural feature prediction from amino acid sequence using deep neural networks. ,"Coiled-coil is composed of two or more helices that are wound around each other. It widely exists in proteins and has been discovered to play a variety of critical roles in biology processes. Generally, there are three types of structural features in coiled-coil: coiled-coil domain (CCD), oligomeric state, and register. However, most of the existing computational tools only focus on one of them. Here, we describe a new deep learning model, CoCoPRED, which is based on convolutional layers, bidirectional long short-term memory, and attention mechanism. It has three networks, i.e., CCD network, oligomeric state network, and register network, corresponding to the three types of structural features in coiled-coil. This means CoCoPRED has the ability of fulfilling comprehensive prediction for coiled-coil proteins. Through the 5-fold cross-validation experiment, we demonstrate that CoCoPRED can achieve better performance than the state-of-the-art models on both CCD prediction and oligomeric state prediction. Further analysis suggests the CCD prediction may be a performance indicator of the oligomeric state prediction in CoCoPRED. The attention heads in CoCoPRED indicate that registers a, b, and e are more crucial for the oligomeric state prediction. CoCoPRED is available at http://www.csbio.sjtu.edu.cn/bioinf/CoCoPRED. Supplementary data are available at Bioinformatics online.",2021-10-30 +31253075,JCD-DEA: a joint covariate detection tool for differential expression analysis on tumor expression profiles.,"

Background

Differential expression analysis on tumor expression profiles has always been a key issue for subsequent biological experimental validation. It is important how to select features which best discriminate between different groups of patients. Despite the emergence of multivariate analysis approaches, prevailing feature selection methods primarily focus on multiple hypothesis testing on individual variables, and then combine them for an explanatory result. Besides, these methods, which are commonly based on hypothesis testing, view classification as a posterior validation of the selected variables.

Results

Based on previously provided A5 feature selection strategy, we develop a joint covariate detection tool for differential expression analysis on tumor expression profiles. This software combines hypothesis testing with testing according to classification results. A model selection approach based on Gaussian mixture model is introduced in for automatic selection of features. Besides, a projection heatmap is proposed for the first time.

Conclusions

Joint covariate detection strengthens the viewpoint for selecting variables which are not only individually but also jointly significant. Experiments on simulation and realistic data show the effectiveness of the developed software, which enhances the reliability of joint covariate detection for differential expression analysis on tumor expression profiles. The software is available at http://bio-nefu.com/resource/jcd-dea .",2019-06-28 +32418327,Ligand-centered assessment of SARS-CoV-2 drug target models in the Protein Data Bank.,"A bright spot in the SARS-CoV-2 (CoV-2) coronavirus pandemic has been the immediate mobilization of the biomedical community, working to develop treatments and vaccines for COVID-19. Rational drug design against emerging threats depends on well-established methodology, mainly utilizing X-ray crystallography, to provide accurate structure models of the macromolecular drug targets and of their complexes with candidates for drug development. In the current crisis, the structural biological community has responded by presenting structure models of CoV-2 proteins and depositing them in the Protein Data Bank (PDB), usually without time embargo and before publication. Since the structures from the first-line research are produced in an accelerated mode, there is an elevated chance of mistakes and errors, with the ultimate risk of hindering, rather than speeding up, drug development. In the present work, we have used model-validation metrics and examined the electron density maps for the deposited models of CoV-2 proteins and a sample of related proteins available in the PDB as of April 1, 2020. We present these results with the aim of helping the biomedical community establish a better-validated pool of data. The proteins are divided into groups according to their structure and function. In most cases, no major corrections were necessary. However, in several cases significant revisions in the functionally sensitive area of protein-inhibitor complexes or for bound ions justified correction, re-refinement, and eventually reversioning in the PDB. The re-refined coordinate files and a tool for facilitating model comparisons are available at https://covid-19.bioreproducibility.org. DATABASE: Validated models of CoV-2 proteins are available in a dedicated, publicly accessible web service https://covid-19.bioreproducibility.org.",2020-06-24 +31713629,SpatialDB: a database for spatially resolved transcriptomes.,"Spatially resolved transcriptomic techniques allow the characterization of spatial organization of cells in tissues, which revolutionize the studies of tissue function and disease pathology. New strategies for detecting spatial gene expression patterns are emerging, and spatially resolved transcriptomic data are accumulating rapidly. However, it is not convenient for biologists to exploit these data due to the diversity of strategies and complexity in data analysis. Here, we present SpatialDB, the first manually curated database for spatially resolved transcriptomic techniques and datasets. The current version of SpatialDB contains 24 datasets (305 sub-datasets) from 5 species generated by 8 spatially resolved transcriptomic techniques. SpatialDB provides a user-friendly web interface for visualization and comparison of spatially resolved transcriptomic data. To further explore these data, SpatialDB also provides spatially variable genes and their functional enrichment annotation. SpatialDB offers a repository for research community to investigate the spatial cellular structure of tissues, and may bring new insights into understanding the cellular microenvironment in disease. SpatialDB is freely available at https://www.spatialomics.org/SpatialDB.",2020-01-01 +29899596,Cyanobacterial diversity held in microbial biological resource centers as a biotechnological asset: the case study of the newly established LEGE culture collection.,"Cyanobacteria are a well-known source of bioproducts which renders culturable strains a valuable resource for biotechnology purposes. We describe here the establishment of a cyanobacterial culture collection (CC) and present the first version of the strain catalog and its online database (http://lege.ciimar.up.pt/). The LEGE CC holds 386 strains, mainly collected in coastal (48%), estuarine (11%), and fresh (34%) water bodies, for the most part from Portugal (84%). By following the most recent taxonomic classification, LEGE CC strains were classified into at least 46 genera from six orders (41% belong to the Synechococcales), several of them are unique among the phylogenetic diversity of the cyanobacteria. For all strains, primary data were obtained and secondary data were surveyed and reviewed, which can be reached through the strain sheets either in the catalog or in the online database. An overview on the notable biodiversity of LEGE CC strains is showcased, including a searchable phylogenetic tree and images for all strains. With this work, 80% of the LEGE CC strains have now their 16S rRNA gene sequences deposited in GenBank. Also, based in primary data, it is demonstrated that several LEGE CC strains are a promising source of extracellular polymeric substances (EPS). Through a review of previously published data, it is exposed that LEGE CC strains have the potential or actual capacity to produce a variety of biotechnologically interesting compounds, including common cyanotoxins or unprecedented bioactive molecules. Phylogenetic diversity of LEGE CC strains does not entirely reflect chemodiversity. Further bioprospecting should, therefore, account for strain specificity of the valuable cyanobacterial holdings of LEGE CC.",2018-01-06 +31644494,Balancing Efficiency and Fairness in Liver Transplant Access: Tradeoff Curves for the Assessment of Organ Distribution Policies.,"

Background

Current distribution policies have resulted in persistent geographic disparity in access to donated livers across the country for waitlisted candidates.

Methods

Using mathematical optimization, and subsequently the Liver Simulation Allocation Model, the following organ distribution concepts were assessed: (1) current policy, (2) proposed alternative models, and (3) a novel continuous distribution model. A number of different scenarios for each policy distribution concept were generated and analyzed through efficiency-fairness tradeoff curves.

Results

The continuous distribution concept allowed both for the greatest reduction in patient deaths and for the most equitable geographic distribution across comparable organ transportation burden. When applied with an Optimized Prediction of Mortality allocation scheme, continuous distribution allowed for a significant reduction in number of deaths-on the order of 500 lives saved annually (https://livervis.github.io/).

Conclusions

Tradeoff curves allow for a visualized understanding on the efficiency/fairness balance, and have demonstrated that liver candidates awaiting transplant would benefit from a model employing continuous distribution as this holds the greatest advantage for mortality reduction. Development and implementation of continuous distribution models for all solid organ transplants may allow for minimization of the geographic disparity in organ distribution, and allow for efficient and fair access to a limited national resource for all candidates.",2020-05-01 +34043445,A Computational Model for Estimating the Speech Motor System's Sensitivity to Auditory Prediction Errors.,"Purpose The speech motor system uses feedforward and feedback control mechanisms that are both reliant on prediction errors. Here, we developed a state-space model to estimate the error sensitivity of the control systems. We examined (a) whether the model accounts for the error sensitivity of the control systems and (b) whether the two systems have similar error sensitivity. Method Participants (N = 50) completed an adaptation paradigm, in which their first and second formants were perturbed such that a participant's /ε/ would sound like her /ӕ/. We measured adaptive responses to the perturbations at early (0-80 ms) and late (220-300 ms) time points relative to the onset of the perturbations. As data-driven correlates of the error sensitivity of the feedforward and feedback systems, we used the average early responses and difference responses (i.e., late minus early responses), respectively. We fitted the state-space model to participants' adaptive responses and used the model's parameters as model-based estimates of error sensitivity. Results We found that the late responses were larger than the early responses. Additionally, the model-based estimates of error sensitivity strongly correlated with the data-driven estimates. However, the data-driven and model-based estimates of error sensitivity of the feedforward system did not correlate with those of the feedback system. Conclusions Overall, our results suggested that the dynamics of adaptive responses as well as error sensitivity of the control systems can be accurately predicted by the model. Furthermore, our results suggested that the feedforward and feedback control systems function independently. Supplemental Material https://doi.org/10.23641/asha.14669808.",2021-05-27 +33009914,A database resource and online analysis tools for coronaviruses on a historical and global scale. ,"The recent outbreak of COVID-19 caused by a new zoonotic origin coronavirus (SARS-CoV-2 or 2019-nCoV) has sound the alarm for the potential spread of epidemic coronavirus crossing species. With the urgent needs to assist disease control and to provide invaluable scientific information, we developed the coronavirus database (CoVdb), an online genomic, proteomic and evolutionary analysis platform. CoVdb has brought together genomes of more than 5000 coronavirus strains, which were collected from 1941 to 2020, in more than 60 countries and in hosts belonging to more than 30 species, ranging from fish to human. CoVdb presents comprehensive genomic information, such as gene function, subcellular localization, topology and protein structure. To facilitate coronavirus research, CoVdb also provides flexible search approaches and online tools to view and analyze protein structure, to perform multiple alignments, to automatically build phylogenetic trees and to carry on evolutionary analyses. CoVdb can be accessed freely at http://covdb.popgenetics.net. Hopefully, it will accelerate the progress to develop medicines or vaccines to control the pandemic of COVID-19.",2021-08-01 +34233068,A kaleidoscopic view of ovarian genes associated with premature ovarian insufficiency and senescence.,"Ovarian infertility and subfertility presenting with premature ovarian insufficiency (POI) and diminished ovarian reserve are major issues facing the developed world due to the trend of delaying childbirth. Ovarian senescence and POI represent a continuum of physiological/pathophysiological changes in ovarian follicle functions. Based on advances in whole exome sequencing, evaluation of gene copy variants, together with family-based and genome-wide association studies, we discussed genes responsible for POI and ovarian senescence. We used a gene-centric approach to sort out literature deposited in the Ovarian Kaleidoscope database (http://okdb.appliedbioinfo.net) by sub-categorizing candidate genes as ligand-receptor signaling, meiosis and DNA repair, transcriptional factors, RNA metabolism, enzymes, and others. We discussed individual gene mutations found in POI patients and verification of gene functions in gene-deleted model organisms. Decreased expression of some of the POI genes could be responsible for ovarian senescence, especially those essential for DNA repair, meiosis and mitochondrial functions. We propose to set up a candidate gene panel for targeted sequencing in POI patients together with studies on mitochondria-associated genes in middle-aged subfertile patients.",2021-08-01 +34532435,miRNA-218/FANCI is associated with metastasis and poor prognosis in lung adenocarcinoma: a bioinformatics analysis.,"

Background

In this study, tumor microarray analysis was used to screen the key messenger RNAs (mRNAs) and microRNAs related to the progression of lung adenocarcinoma (LUAD), in order to provide a theoretical basis for early diagnosis, therapeutic targets, and prognosis evaluation of patients with LUAD.

Methods

The mRNA and miRNA expression datasets came from the Gene Expression Omnibus (GEO) project database. Differentially expressed genes (DEGs) and microRNAs (DEMs) between LUAD tissues and adjacent lung tissue were obtained using GEO2R. The Search Tool for the Retrieval of Interacting Genes website was also employed to construct and visualize the interactions of overlapped DEGs. The overall survival of DEMs was investigated using the Kaplan-Meier plotter. The TargetScan website (http://www.targetscan.org/) was used to verify the relationship between FA Complementation Group I (FANCI) and the expression of miRNA-218 (miR-218). The expression of FANCI was verified using the GEO and Human Protein Atlas databases, as well as Real Time Quantitative PCR using our own samples. Next, we analyzed the relationship between the expression of FANCI and the clinicopathological characteristics as well as the prognosis of patients with LUAD. We also explored whether the FANCI was related to immune cell infiltration in LUAD.

Results

FANCI was identified as a hub gene and associated with poor OS. We found that miR-218 negatively regulates FANCI mRNA expression. At the mRNA expression and protein level, FANCI was more highly expressed in LUAD tissues. The expression of FANCI in LUAD was related to tumor size (χ2=13.96, P<0.001), lymphatic metastasis (χ2=3.88, P<0.05), distant metastasis (χ2=45.39, P<0.001), and stage (χ2=11.03, P<0.05). In addition, the Cox regression model found that FANCI mRNA expression was an independent predictive factor of patient survival (P<0.05). FANCI expression was both weakly related to B cells and neutrophil infiltration in LUAD.

Conclusions

miR-218 may negatively regulate FANCI, and FANCI could promote metastasis via extracellular matrix (ECM) receptor interaction, leading to poor prognosis of LUAD. FANCI may be a key gene to the determine metastasis and poor prognosis in patients with LUAD. Changes in the immune microenvironment may be the mechanism through which FANCI leads to poor prognosis of LUAD.",2021-08-01 +34415996,LINPS: a database for cancer-cell-specific perturbations of biological networks. ,"Screening for potential cancer therapies using existing large datasets of drug perturbations requires expertise and resources not available to all. This is often a barrier for lab scientists to tap into these valuable resources. To address these issues, one can take advantage of prior knowledge especially those coded in standard formats such as causal biological networks (CBN). Large datasets can be converted into appropriate structures, analyzed once and the results made freely available in easy-to-use formats. We used the Library of Integrated Cellular Signatures to model the cell-specific effect of hundreds of drug treatments on gene expression. These signatures were then used to predict the effect of the treatments on several CBN using the network perturbation amplitudes analysis. We packaged the pre-computed scores in a database with an interactive web interface. The intuitive user-friendly interface can be used to query the database for drug perturbations and quantify their effect on multiple key biological functions in cancer cell lines. In addition to describing the process of building the database and the interface, we provide a realistic use case to explain how to use and interpret the results. To sum, we pre-computed cancer-cell-specific perturbation amplitudes of several biological networks and made the output available in a database with an interactive web interface. Database URL https://mahshaaban.shinyapps.io/LINPSAPP/.",2021-08-01 +31396911,Modeling Biological Complexes Using Integrative Modeling Platform.,"Integrative structure modeling provides 3D models of macromolecular systems that are based on information from multiple types of experiments, physical principles, statistical inferences, and prior structural models. Here, we provide a hands-on realistic example of integrative structure modeling of the quaternary structure of the actin, tropomyosin, and gelsolin protein assembly based on electron microscopy, solution X-ray scattering, and chemical crosslinking data for the complex as well as excluded volume, sequence connectivity, and rigid atomic X-ray structures of the individual subunits. We follow the general four-stage process for integrative modeling, including gathering the input information, converting the input information into a representation of the system and a scoring function, sampling alternative model configurations guided by the scoring function, and analyzing the results. The computational aspects of this approach are implemented in our open-source Integrative Modeling Platform (IMP), a comprehensive and extensible software package for integrative modeling ( https://integrativemodeling.org ). In particular, we rely on the Python Modeling Interface (PMI) module of IMP that provides facile mixing and matching of macromolecular representations, restraints based on different types of information, sampling algorithms, and analysis including validations of the input data and output models. Finally, we also outline how to deposit an integrative structure and corresponding experimental data into PDB-Dev, the nascent worldwide Protein Data Bank (wwPDB) resource for archiving and disseminating integrative structures ( https://pdb-dev.wwpdb.org ). The example application provides a starting point for a user interested in using IMP for integrative modeling of other biomolecular systems.",2019-01-01 +34157880,QSAR analysis of the acute toxicity of avermectins towards Tetrahymena pyriformis.,"Avermectins have been effectively used in medicine, veterinary medicine, and agriculture as antiparasitic agents for many years. However, there are still no reliable data on the main ecotoxicological characteristics of most individual avermectins. Although many QSAR models have been proposed to describe the acute toxicity of organic compounds towards Tetrahymena pyriformis (T. pyriformis), avermectins are outside the applicability domain of these models. The influence of the molecular structures of various organic compounds on the acute toxicity towards T. pyriformis was studied using the OCHEM web platform (https://ochem.eu). A data set of 1792 toxicants was used to create models. The QSAR (Quantitative Structure-Activity Relationship) models were developed using the molecular descriptors Dragon, ISIDA, CDK, PyDescriptor, alvaDesc, and SIRMS and machine learning methods, such as Least Squares Support Vector Machine and Transformer Convolutional Neural Network. The HYBOT descriptors and Random Forest were used for a comparative QSAR investigation. Since the best predictive ability was demonstrated by the Transformer Convolutional Neural Network model, it was used to predict the toxicity of individual avermectins towards T. pyriformis. During a structural interpretation of the developed QSAR model, we determined the significant molecular transformations that increase and decrease the acute toxicity of organic compounds.",2021-07-01 +30701323,Gene co-expression network analysis identifies trait-related modules in Arabidopsis thaliana.,"

Main conclusion

A comprehensive network of the Arabidopsis transcriptome was analyzed and may serve as a valuable resource for candidate gene function investigations. A web tool to explore module information was also provided. Arabidopsis thaliana is a widely studied model plant whose transcriptome has been substantially profiled in various tissues, development stages and other conditions. These data can be reused for research on gene function through a systematic analysis of gene co-expression relationships. We collected microarray data from National Center for Biotechnology Information Gene Expression Omnibus, identified modules of co-expressed genes and annotated module functions. These modules were associated with experiments/traits, which provided potential signature modules for phenotypes. Novel heat shock proteins were implicated according to guilt by association. A higher-order module networks analysis suggested that the Arabidopsis network can be further organized into 15 meta-modules and that a chloroplast meta-module has a distinct gene expression pattern from the other 14 meta-modules. A comparison with the rice transcriptome revealed preserved modules and KEGG pathways. All the module gene information was available from an online tool at http://bioinformatics.fafu.edu.cn/arabi/ . Our findings provide a new source for future gene discovery in Arabidopsis.",2019-01-30 +34837785,Structural anomalies in a published NMR-derived structure of IRAK-M.,"Signaling by Toll-Like Receptors and the Interleukin-1 Receptor (IL1-R) involves intracellular binding of MyD88, followed by assembly of IL1-R Associated Kinases (IRAKs) into the so-called Myddosome. Using NMR, Nechama et al. determined the structure of the IRAK-M death domain monomer (PDBid: 5UKE). With this structure, they performed a docking study to model the location of IRAK-M in the Myddosome. Based on this, they present a molecular basis for selectivity of IRAK-M towards IRAK1 over IRAK2 binding. When we attempted to use 5UKE as a homology modeling template, we noticed that our 5UKE-based models had structural issues, such as disallowed torsion angles and solvent exposed tryptophans. We therefore analyzed the NMR ensemble of 5UKE using structure validation tools and we compared 5UKE with homologous high-resolution X-ray structures. We identified several structural anomalies in 5UKE, including packing issues, frayed helices and improbable side chain conformations. We used Yasara to build a homology model, based on two high resolution death domain crystal structures, as an alternative model for the IRAK-M death domain (atomic coordinates, modeling details and validation are available at https://swift.cmbi.umcn.nl/gv/service/5uke/). Our model agrees better with known death domain structure information than 5UKE and also with the chemical shift data that was deposited for 5UKE.",2021-11-05 +32997742,Colour deconvolution: stain unmixing in histological imaging.,"

Motivation

Microscopy images of stained cells and tissues play a central role in most biomedical experiments and routine histopathology. Storing colour histological images digitally opens the possibility to process numerically colour distribution and intensity to extract quantitative data. Among those numerical procedures are colour deconvolution, which enable decomposing an RGB image into channels representing the optical absorbance and transmittance of the dyes when their RGB representation is known. Consequently, a range of new applications become possible for morphological and histochemical segmentation, automated marker localization and image enhancement.

Availability and implementation

Colour deconvolution is presented here in two open-source forms: a MATLAB program/function and an ImageJ plugin written in Java. Both versions run in Windows, Macintosh and UNIX-based systems under the respective platforms. Source code and further documentation are available at: https://blog.bham.ac.uk/intellimic/g-landini-software/colour-deconvolution-2/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-01 +28985418,The SysteMHC Atlas project.,"Mass spectrometry (MS)-based immunopeptidomics investigates the repertoire of peptides presented at the cell surface by major histocompatibility complex (MHC) molecules. The broad clinical relevance of MHC-associated peptides, e.g. in precision medicine, provides a strong rationale for the large-scale generation of immunopeptidomic datasets and recent developments in MS-based peptide analysis technologies now support the generation of the required data. Importantly, the availability of diverse immunopeptidomic datasets has resulted in an increasing need to standardize, store and exchange this type of data to enable better collaborations among researchers, to advance the field more efficiently and to establish quality measures required for the meaningful comparison of datasets. Here we present the SysteMHC Atlas (https://systemhcatlas.org), a public database that aims at collecting, organizing, sharing, visualizing and exploring immunopeptidomic data generated by MS. The Atlas includes raw mass spectrometer output files collected from several laboratories around the globe, a catalog of context-specific datasets of MHC class I and class II peptides, standardized MHC allele-specific peptide spectral libraries consisting of consensus spectra calculated from repeat measurements of the same peptide sequence, and links to other proteomics and immunology databases. The SysteMHC Atlas project was created and will be further expanded using a uniform and open computational pipeline that controls the quality of peptide identifications and peptide annotations. Thus, the SysteMHC Atlas disseminates quality controlled immunopeptidomic information to the public domain and serves as a community resource toward the generation of a high-quality comprehensive map of the human immunopeptidome and the support of consistent measurement of immunopeptidomic sample cohorts.",2018-01-01 +33872868,Data sharing in PredRet for accurate prediction of retention time: Application to plant food bioactive compounds.,"Prediction of retention times (RTs) is increasingly considered in untargeted metabolomics to complement MS/MS matching for annotation of unidentified peaks. We tested the performance of PredRet (http://predret.org/) to predict RTs for plant food bioactive metabolites in a data sharing initiative containing entry sets of 29-103 compounds (totalling 467 compounds, >30 families) across 24 chromatographic systems (CSs). Between 27 and 667 predictions were obtained with a median prediction error of 0.03-0.76 min and interval width of 0.33-8.78 min. An external validation test of eight CSs showed high prediction accuracy. RT prediction was dependent on shape and type of LC gradient, and number of commonly measured compounds. Our study highlights PredRet's accuracy and ability to transpose RT data acquired from one CS to another CS. We recommend extensive RT data sharing in PredRet by the community interested in plant food bioactive metabolites to achieve a powerful community-driven open-access tool for metabolomics annotation.",2021-04-09 +34240108,HEMDAG: a family of modular and scalable hierarchical ensemble methods to improve Gene Ontology term prediction. ,"Automated protein function prediction is a complex multi-class, multi-label, structured classification problem in which protein functions are organized in a controlled vocabulary, according to the Gene Ontology (GO). ""Hierarchy-unaware"" classifiers, also known as ""flat"" methods, predict GO terms without exploiting the inherent structure of the ontology, potentially violating the True-Path-Rule (TPR) that governs the GO, while ""hierarchy-aware"" approaches, even if they obey the TPR, do not always show clear improvements with respect to flat methods, or do not scale well when applied to the full GO. To overcome these limitations, we propose Hierarchical Ensemble Methods for Directed Acyclic Graphs (HEMDAG), a family of highly modular hierarchical ensembles of classifiers, able to build upon any flat method and to provide ""TPR-safe"" predictions, by leveraging a combination of isotonic regression and TPR learning strategies. Extensive experiments on synthetic and real data across several organisms firstly show that HEMDAG can be used as a general tool to improve the predictions of flat classifiers, and secondly that HEMDAG is competitive versus state-of-the-art hierarchy-aware learning methods proposed in the last CAFA international challenges. Fully-tested R code freely available at https://anaconda.org/bioconda/r-hemdag. Tutorial and documentation at https://hemdag.readthedocs.io. Supplementary data are available at Bioinformatics online.",2021-07-07 +34337975,Bibliometric Analysis of Microtia-Related Publications From 2006 to 2020.,"

Objective

Microtia is a congenital auricular malformation with a hypoplastic external ear that ranges in severity from a slightly smaller auricle to complete the absence of the auricle. The present study was conducted to identify and analyze the characteristics of microtia-related articles published from 2006 to 2020 by using bibliometric analyses.

Method

Microtia-related studies published from 2006 to 2020 were retrieved from the Web of Science Core Collection database. Keywords, first author, citations, date of publication, and publication journal were extracted and quantitatively analyzed using Bibliographic Item Co-Occurrence Matrix Builder software and the Bibliometric (https://bibliometric.com/app). VOSviewer was used to visualize research and form a network map on keywords and citations.

Results

A total of 1031 articles from 2006 to 2020 were included. The number of articles showed an overall trend of growth over time. The United States and China are the top 2 countries in terms of the number of microtia-related articles. From the analysis of keyword clustering, keywords could be mainly divided into 4 clusters in the field of microtia research: surgery, tissue engineering, epidemiology, and rehabilitation including hearing-related treatments, evaluation of effects, and quality of life after surgery. The top 10 most frequently cited papers from 2006 to 2020 were also extracted and analyzed.

Conclusion

A bibliometric research of microtia-related articles from 2006 to 2020 was conducted. This study may be helpful to understand the current research status of microtia and find the research trends in this field, thus proposing future directions for microtia research.",2021-07-31 +31100356,"Integrating Culture-based Antibiotic Resistance Profiles with Whole-genome Sequencing Data for 11,087 Clinical Isolates.","Emerging antibiotic resistance is a major global health threat. The analysis of nucleic acid sequences linked to susceptibility phenotypes facilitates the study of genetic antibiotic resistance determinants to inform molecular diagnostics and drug development. We collected genetic data (11,087 newly-sequenced whole genomes) and culture-based resistance profiles (10,991 out of the 11,087 isolates comprehensively tested against 22 antibiotics in total) of clinical isolates including 18 main species spanning a time period of 30 years. Species and drug specific resistance patterns were observed including increased resistance rates for Acinetobacter baumannii to carbapenems and for Escherichia coli to fluoroquinolones. Species-level pan-genomes were constructed to reflect the genetic repertoire of the respective species, including conserved essential genes and known resistance factors. Integrating phenotypes and genotypes through species-level pan-genomes allowed to infer gene-drug resistance associations using statistical testing. The isolate collection and the analysis results have been integrated into GEAR-base, a resource available for academic research use free of charge at https://gear-base.com.",2019-04-01 +30349509,PanGFR-HM: A Dynamic Web Resource for Pan-Genomic and Functional Profiling of Human Microbiome With Comparative Features.,"The conglomerate of microorganisms inhabiting various body-sites of human, known as the human microbiome, is one of the key determinants of human health and disease. Comprehensive pan-genomic and functional analysis approach for human microbiome components can enrich our understanding about impact of microbiome on human health. By utilizing this approach we developed PanGFR-HM (http://www.bioinfo.iicb.res.in/pangfr-hm/) - a novel dynamic web-resource that integrates genomic and functional characteristics of 1293 complete microbial genomes available from Human Microbiome Project. The resource allows users to explore genomic/functional diversity and genome-based phylogenetic relationships between human associated microbial genomes, not provided by any other resource. The key features implemented here include pan-genome and functional analysis of organisms based on taxonomy or body-site, and comparative analysis between groups of organisms. The first feature can also identify probable gene-loss events and significantly over/under represented KEGG/COG categories within pan-genome. The unique second feature can perform comparative genomic, functional and pathways analysis between 4 groups of microbes. The dynamic nature of this resource enables users to define parameters for orthologous clustering and to select any set of organisms for analysis. As an application for comparative feature of PanGFR-HM, we performed a comparative analysis with 67 Lactobacillus genomes isolated from human gut, oral cavity and urogenital tract, and therefore characterized the body-site specific genes, enzymes and pathways. Altogether, PanGFR-HM, being unique in its content and functionality, is expected to provide a platform for microbiome-based comparative functional and evolutionary genomics.",2018-10-08 +32964659,Pharmacoinformatic Investigation of Medicinal Plants from East Africa.,"Medicinal plants have widely been used in the traditional treatment of ailments and have been proven effective. Their contribution still holds an important place in modern drug discovery due to their chemical, and biological diversities. However, the poor documentation of traditional medicine, in developing African countries for instance, can lead to the loss of knowledge related to such practices. In this study, we present the Eastern Africa Natural Products Database (EANPDB) containing the structural and bioactivity information of 1870 unique molecules isolated from about 300 source species from the Eastern African region. This represents the largest collection of natural products (NPs) from this geographical region, covering literature data of the period from 1962 to 2019. The computed physicochemical properties and toxicity profiles of each compound have been included. A comparative analysis of some physico-chemical properties like molecular weight, H-bond donor/acceptor, logPo/w , etc. as well scaffold diversity analysis has been carried out with other published NP databases. EANPDB was combined with the previously published Northern African Natural Products Database (NANPDB), to form a merger African Natural Products Database (ANPDB), containing ∼6500 unique molecules isolated from about 1000 source species (freely available at http://african-compounds.org). As a case study, latrunculins A and B isolated from the sponge Negombata magnifica (Podospongiidae) with previously reported antitumour activities, were identified via substructure searching as molecules to be explored as putative binders of histone deacetylases (HDACs).",2020-10-08 +32436316,CoFly: A gene coexpression database for the fruit fly Drosophila melanogaster.,"The fruit fly Drosophila melanogaster can be used as a model organism for studying various problems in biomedicine and pest management. A large number of fruit fly transcriptomes have been profiled in various cell types, tissues, development stages, toxicological exposures, and other conditions by microarray. Until now, there are still no database developed for exploring those precious data. Microarray data for 4,367 samples from National Center for Biotechnology Information Gene Expression Omnibus was collected, and analyzed by weighted gene coexpression network analysis algorithm. Fifty one gene coexpression modules that are related to cell types, tissues, development stages, and other experimental conditions were identified. The high dimensional gene expression was reduced to tens of modules that were associated with experiments/traits, representing signatures for phenotypes. Six modules were enriched with genomic regions of clustered genes. Hub genes could also be screened by intramodule connectivity. By analyzing higher order module networks, we found that cell signaling modules are more connected than other modules. Module-based gene function identification may help to discover novel gene function. An easy-to-use database was developed, which provides a new source for gene function study in the fruit fly (http://bioinformatics.fafu.edu.cn/fly/).",2020-05-20 +34275625,Diagnosis of respiratory disease in preweaned dairy calves using sequential thoracic ultrasonography and clinical respiratory scoring: Temporal transitions and association with growth rates.,"Bovine respiratory disease (BRD) in dairy calves is a multifactorial condition, involving environmental, host, and pathogen factors. Thoracic ultrasound scoring (TUS) has recently been validated as an accurate method of detecting BRD-related lung pathology in dairy calves. Previous studies investigating the use of TUS in preweaned dairy calves have largely been based on cross-sectional data from all-year production systems. The objectives of this longitudinal observational study were to characterize the temporal transitions in TUS scores in dairy calves from pasture-based, seasonal-calving herds using sequential examinations during the preweaning period, and to investigate the relationship between the presence and temporal pattern of BRD, diagnosed by TUS or clinical respiratory scoring (CRS), and average daily gain (ADG). In spring of 2019, 317 preweaned calves from 7 commercial dairy farms were recruited at less than 4 wk old (ranging from 1-27 d of age). Each farm was examined on at least 3 occasions at 20- to 28-d intervals and housed indoors in group or individual pens. At each visit TUS scores, CRS scores based on the University of Wisconsin Calf Respiratory Score Chart (https://www.vetmed.wisc.edu/fapm/wp-content/uploads/2020/01/calf_respiratory_scoring_chart.pdf), and live weight using a dairy breed-specific weigh band were recorded. All data were recorded by the same 2 veterinarians over the course of the study. The final data set consisted of 966 TUS and CRS scores collected from 317 calves over a period of approximately 6 wk from 7 farms. The data were analyzed in multivariable, mixed effects, linear regression models, with separate models constructed for TUS and CRS scores. Random effects (intercepts) were included for calf, farm, and visit week. Additionally, a random slope was included for age at sampling by farm. Median farm TUS score ranged from 0 to 2.5 over the 3 visits (possible range: 0-5). The percentage of calves with a TUS score ≥3 (consolidation of the full thickness of 1 lung lobe), on each farm ranged from 0 to 50%. The median CRS in calves on individual farms ranged from 1 to 3 over the 3 visits (possible range: 0-12). The percentage of calves on each farm with a CRS score ≥5 (possible range: 0-12) ranged from 0 to 26%. The TUS and CRS scores were weakly correlated. The TUS was associated with reduced ADG. Calves with TUS scores ≥3 grew at 126 g/d less than unaffected calves over the 3-wk period before examination. The predicted effect on ADG was dependent on the age and duration over which the animal was affected. Calves affected later (i.e., between visits 2 and 3) had lower predicted weights at 63 d compared with calves with increased TUS scores earlier in the study period. Calves with a TUS score ≥3 at each of the 3 sampling points had the lowest weight at 63 d of age. There was no association of CRS with ADG. This study showed that in contrast to CRS, higher TUS scores are associated with lower ADG, with weight loss being more pronounced in chronic cases.",2021-07-16 +29328995,FilTer BaSe: A web accessible chemical database for small compound libraries.,"Finding novel chemical agents for targeting disease associated drug targets often requires screening of large number of new chemical libraries. In silico methods are generally implemented at initial stages for virtual screening. Filtering of such compound libraries on physicochemical and substructure ground is done to ensure elimination of compounds with undesired chemical properties. Filtering procedure, is redundant, time consuming and requires efficient bioinformatics/computer manpower along with high end software involving huge capital investment that forms a major obstacle in drug discovery projects in academic setup. We present an open source resource, FilTer BaSe- a chemoinformatics platform (http://bioinfo.net.in/filterbase/) that host fully filtered, ready to use compound libraries with workable size. The resource also hosts a database that enables efficient searching the chemical space of around 348,000 compounds on the basis of physicochemical and substructure properties. Ready to use compound libraries and database presented here is expected to aid a helping hand for new drug developers and medicinal chemists.",2018-01-06 +33227677,Radiocesium concentration ratios and radiation dose to wild rodents in Fukushima Prefecture.,"Radiocesium was dispersed from the Fukushima Dai-ichi disaster in March 2011, causing comparatively high radioactive contamination in nearby environments. Radionuclide concentrations in wild rodents (Apodemus argenteus, and Apodemus speciosus) within these areas were monitored from 2012 to 2016. However, whole-organism to soil transfer parameters (i.e., concentration ratio, CRwo-soil) for wild rodents at Fukushima were not determined and hence were lacking from the international transfer databases. We augmented the 2012-2016 data by collecting soil activity concentrations (Bq kg-1, dry mass) from five rodent sampling sites in Fukushima Prefecture, and developed corresponding CRwo-soil values for radiocesium (134Cs and 137Cs) based on rodent radioactivity concentrations (Bq kg-1, fresh mass). The CRwo-soil were added to the Wildlife Transfer Database (WTD; http://www.wildlifetransferdatabase.org/), supporting the development of the International Commission on Radiological Protection's (ICRP) environmental protection framework, and increasing the WTD from 84 to 477 entries for cesium and Muridae ('Reference Rat'). Significant variation occurred in CRwo-soil values between study sites within Fukushima Prefecture. The geometric mean CRwo-soil, in this paper, was higher than that reported for Muridae species for Chernobyl. Radiocaesium absorbed dose rates were also estimated for wild rodents inhabiting the five Fukushima study sites and ranged from 1.3 to 33 μGy h-1. Absorbed dose rates decreased by a factor of two from 2012 to 2016. Dose rates in highly contaminated areas were within the ICRP derived consideration reference level for Reference Rat (0.1-1 mGy d-1), suggesting the possible occurrence of deleterious effects and need for radiological effect studies in the Fukushima area.",2020-11-20 +33718550,Social network data of Swiss farmers related to agricultural climate change mitigation.,"We present social network data of Swiss farmers, focusing on exchange and advice relations regarding agricultural climate change mitigation. The data were generated via face-to-face interviews in 2019 using the survey software Network Canvas (https://networkcanvas.com). We interviewed 50 farmers, with 25 of these participating in a regional climate protection initiative in Switzerland as well as 25 farmers located in the same region who did not participate in the initiative. Farmers were asked to indicate the persons with whom they regularly exchanged on topics related to climate change and mitigation in agriculture. The farmers assessed the type and strength of their relationships and were asked to rate the knowledge of their contacts regarding climate change mitigation. We also collected data on the perceived influence of farmers and other persons on farming decisions. Information on farmers' adoption of climate change mitigation measures and behavioural characteristics was collected in a previous online survey. Farm characteristics were obtained from census data.",2021-02-19 +34730236,Accurate classification of plasma cell dyscrasias is achieved by combining artificial intelligence and flow cytometry.,"Monoclonal gammopathy of unknown significance (MGUS), smouldering multiple myeloma (SMM), and multiple myeloma (MM) are very common neoplasms. However, it is often difficult to distinguish between these entities. In the present study, we aimed to classify the most powerful markers that could improve diagnosis by multiparametric flow cytometry (MFC). The present study included 348 patients based on two independent cohorts. We first assessed how representative the data were in the discovery cohort (123 MM, 97 MGUS) and then analysed their respective plasma cell (PC) phenotype in order to obtain a set of correlations with a hypersphere visualisation. Cluster of differentiation (CD)27 and CD38 were differentially expressed in MGUS and MM (P < 0·001). We found by a gradient boosting machine method that the percentage of abnormal PCs and the ratio PC/CD117 positive precursors were the most influential parameters at diagnosis to distinguish MGUS and MM. Finally, we designed a decisional algorithm allowing a predictive classification ≥95% when PC dyscrasias were suspected, without any misclassification between MGUS and SMM. We validated this algorithm in an independent cohort of PC dyscrasias (n = 87 MM, n = 41 MGUS). This artificial intelligence model is freely available online as a diagnostic tool application website for all MFC centers worldwide (https://aihematology.shinyapps.io/PCdyscrasiasToolDg/).",2021-11-03 +30355343,Genome-wide analyses reveal the IRE1a-XBP1 pathway promotes T helper cell differentiation by resolving secretory stress and accelerating proliferation.,"

Background

The IRE1a-XBP1 pathway is a conserved adaptive mediator of the unfolded protein response. The pathway is indispensable for the development of secretory cells by facilitating protein folding and enhancing secretory capacity. In the immune system, it is known to function in dendritic cells, plasma cells, and eosinophil development and differentiation, while its role in T helper cell is unexplored. Here, we investigated the role of the IRE1a-XBP1 pathway in regulating activation and differentiation of type-2 T helper cell (Th2), a major T helper cell type involved in allergy, asthma, helminth infection, pregnancy, and tumor immunosuppression.

Methods

We perturbed the IRE1a-XBP1 pathway and interrogated its role in Th2 cell differentiation. We performed genome-wide transcriptomic analysis of differential gene expression to reveal IRE1a-XBP1 pathway-regulated genes and predict their biological role. To identify direct target genes of XBP1 and define XBP1's regulatory network, we performed XBP1 ChIPmentation (ChIP-seq). We validated our predictions by flow cytometry, ELISA, and qPCR. We also used a fluorescent ubiquitin cell cycle indicator mouse to demonstrate the role of XBP1 in the cell cycle.

Results

We show that Th2 lymphocytes induce the IRE1a-XBP1 pathway during in vitro and in vivo activation. Genome-wide transcriptomic analysis of differential gene expression by perturbing the IRE1a-XBP1 pathway reveals XBP1-controlled genes and biological pathways. Performing XBP1 ChIPmentation (ChIP-seq) and integrating with transcriptomic data, we identify XBP1-controlled direct target genes and its transcriptional regulatory network. We observed that the IRE1a-XBP1 pathway controls cytokine secretion and the expression of two Th2 signature cytokines, IL13 and IL5. We also discovered that the IRE1a-XBP1 pathway facilitates activation-dependent Th2 cell proliferation by facilitating cell cycle progression through S and G2/M phase.

Conclusions

We confirm and detail the critical role of the IRE1a-XBP1 pathway during Th2 lymphocyte activation in regulating cytokine expression, secretion, and cell proliferation. Our high-quality genome-wide XBP1 ChIP and gene expression data provide a rich resource for investigating XBP1-regulated genes. We provide a browsable online database available at http://data.teichlab.org .",2018-10-24 +34328049,Exercise for depression and depressive symptoms in older adults: an umbrella review of systematic reviews and Meta-analyses.,"

Objectives

We aimed to gather and update the evidence on the impact of exercise on late-life depression.

Method

We conducted an umbrella review of meta-analyses of randomized controlled trials (RCTs) that assessed the effects of an exercise intervention for depression in older adults (e.g. 60+). Searches were conducted in Scopus, Web of Science, Embase, PubMed, BIREME, LILACS, SciELO, Cochrane Library for Systematic Reviews, and Opengray.eu. Methodological quality was assessed using A MeaSurement Tool to Assess Systematic Reviews (AMSTAR 2). Data analysis was performed with RStudio (version 4.0.2) and the generic inverse-variance method was used to pool the effect sizes from the included studies.

Results

Twelve meta-analyses of 97 RCTs were included. The AMSTAR 2 rating was considered critically low in five studies, low in six studies, and high in one study. The effect size expressed by the standardized mean difference (SMD) varied between studies from -0.90 (95% CI = -1.51; -0.28) to -0.14 (95% CI = -0.36; 0.07) in favor of the exercise intervention. Pooling of the effect sizes produced a statistically significant moderate effect in which exercise was associated with lower levels of depression and depressive symptoms (OR = 2.24, 95% CI 1.77; 2.84).

Conclusion

Our findings suggest that exercise produces a moderate improvement in depression and depressive symptoms in older patients. We recommend providing physical activity for older adults.

Key-points

We investigated the effects of exercise interventions for depression in older adults.Supplemental data for this article can be accessed online at https://doi.org/10.1080/13607863.2021.1951660.",2021-07-30 +34297061,miRAnno-network-based functional microRNA annotation. ,"Functional annotation is a common part of microRNA-related research, typically carried as pathway enrichment analysis of the selected microRNA targets. Here we propose miRAnno, a fast and easy-to-use web application for microRNA annotation. miRAnno uses comprehensive molecular interaction network and random walks with restart to measure the association between microRNAs and individual pathways. Independent validation shows that miRAnno achieves higher signal-to-noise ratio compared to the standard enrichment analysis. miRAnno is freely available at https://ophid.utoronto.ca/miRAnno/. Supplementary data are available at Bioinformatics online.",2021-07-23 +34702131,Psychological Correlates of Perceived Physical Activity Engagement During the COVID-19 Pandemic Among Previously Active Individuals.,"The COVID-19 pandemic altered daily life in the United States and disrupted how people engage in routine health behaviors, such as physical activity (PA). This study investigates factors that may have helped people sustain recommended levels of moderate to vigorous PA (MVPA) during this time. Using a cross-sectional design, we recruited from Amazon's Mechanical Turk in April/May 2020 a sample of individuals who reported that they had met recommended PA guidelines (≥ 150 weekly MVPA minutes; N = 397) prior to structural changes brought about by COVID-19. We assessed via self-report whether these individuals were meeting recommended levels of MVPA during the COVID-19 pandemic, their intrinsic motivation and identified regulation for exercise, exercise self-efficacy, perceived disruption to their exercise routine, and access to resources for PA. Higher identified regulation, self-efficacy, access to PA resources, and lower perceived disruption were associated with meeting PA guidelines during COVID-19. These findings provide insight into factors that may be important for continued engagement in MVPA when one experiences major disruptions to their exercise routine.Supplemental data for this article is available online at https://doi.org/10.1080/08964289.2021.1929811 .",2021-10-26 +32683444,Machine Boss: rapid prototyping of bioinformatic automata.,"

Motivation

Many software libraries for using Hidden Markov Models in bioinformatics focus on inference tasks, such as likelihood calculation, parameter-fitting and alignment. However, construction of the state machines can be a laborious task, automation of which would be time-saving and less error-prone.

Results

We present Machine Boss, a software tool implementing not just inference and parameter-fitting algorithms, but also a set of operations for manipulating and combining automata. The aim is to make prototyping of bioinformatics HMMs as quick and easy as the construction of regular expressions, with one-line 'recipes' for many common applications. We report data from several illustrative examples involving protein-to-DNA alignment, DNA data storage and nanopore sequence analysis.

Availability and implementation

Machine Boss is released under the BSD-3 open source license and is available from http://machineboss.org/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +29236308,Database of transcription factors in lung cancer (DBTFLC): A novel resource for exploring transcription factors associated with lung cancer.,"Lung cancer is considered as the most prevalent form of cancer and it is found to be frequent cause of cancer related death. Even though, approved molecular targeted therapies other than chemotherapy are currently unavailable, the mechanism of pathogenesis in lung cancer remains still unclear. Transcription factors (TFs) play a critical role in cancer cell processes, such as cell proliferation, apoptosis, migration, and regulate gene expression. Thus, the identification and characterization of transcription factors involved in lung cancer would provide valuable information for further elucidation of the mechanism(s) underlying pathogenesis and the identification of potential therapeutic target types, which are critical for the development of therapeutic strategies. Through an extensive literature survey, we have identified 349 transcription factors noted for their strong involvement in lung cancer. Database of Transcription Factors in Lung Cancer (DBTFLC) was constructed as a data repository and analytical platform for systematic collection, curation of TFs and their interacting partners. The database includes all pertinent information such as lung cancer related TFs, chromosomal location, family, lung cancer type, references, TF-TF interaction(s), and TF-target gene interaction(s); thus, it could serve as a valuable resource for therapeutic studies in lung cancer. The database is freely available at http://www.vit.ac.in/files/database/Home.php.",2018-04-17 +34251760,pr2-primers: An 18S rRNA primer database for protists.,"Metabarcoding of microbial eukaryotes (collectively known as protists) has developed tremendously in the last decade, almost solely relying on the 18S rRNA gene. As microbial eukaryotes are extremely diverse, many primers and primer pairs have been developed. To cover a relevant and representative fraction of the protist community in a given study system, an informed primer choice is necessary, as no primer pair can target all protists equally well. As such, a smart primer choice is very difficult even for experts and there are very few online resources available to list existing primers. We built a database listing 285 primers and 83 unique primer pairs that have been used for eukaryotic 18S rRNA gene metabarcoding. In silico performance of primer pairs was tested against two sequence databases: PR2 version 4.12.0 for eukaryotes and a subset of silva version 132 for bacteria and archaea. We developed an R-based web application enabling browsing of the database, visualization of the taxonomic distribution of the amplified sequences with the number of mismatches, and testing any user-defined primer or primer set (https://app.pr2-primers.org). Taxonomic specificity of primer pairs, amplicon size and location of mismatches can also be determined. We identified universal primer sets that matched the largest number of sequences and analysed the specificity of some primer sets designed to target certain groups. This tool enables guided primer choices that will help a wide range of researchers to include protists as part of their investigations.",2021-07-29 +34132752,"SANS serif: alignment-free, whole-genome based phylogenetic reconstruction. ","SANS serif is a novel software for alignment-free, whole-genome based phylogeny estimation that follows a pangenomic approach to efficiently calculate a set of splits in a phylogenetic tree or network. Implemented in C ++ and supported on Linux, MacOS, and Windows. The source code is freely available for download at https://gitlab.ub.uni-bielefeld.de/gi/sans. Supplementary data are available at Bioinformatics online.",2021-06-16 +31838187,International Severe Asthma Registry: Mission Statement.,"Regional and/or national severe asthma registries provide valuable country-specific information. However, they are often limited in scope within the broader definitions of severe asthma, have insufficient statistical power to answer many research questions, lack intraoperability to share lessons learned, and have fundamental differences in data collected, making cross comparisons difficult. What is missing is a worldwide registry which brings all severe asthma data together in a cohesive way, under a single umbrella, based on standardized data collection protocols, permitting data to be shared seamlessly. The International Severe Asthma Registry (ISAR; http://isaregistries.org/) is the first global adult severe asthma registry. It is a joint initiative where national registries (both newly created and preexisting) retain ownership of their own data but open their borders and share data with ISAR for ethically approved research purposes. Its strength comes from collection of patient-level, anonymous, longitudinal, real-life, standardized, high-quality data (using a core set of variables) from countries across the world, combined with organizational structure, database experience, inclusivity/openness, and clinical, academic, and database expertise. This gives ISAR sufficient statistical power to answer important research questions, sufficient data standardization to compare across countries and regions, and the structure and expertise necessary to ensure its continuance and the scientific integrity and clinical applicability of its research. ISAR offers a unique opportunity to implement existing knowledge, generate new knowledge, and identify the unknown, therefore promoting new research. The aim of this commentary is to fully describe how ISAR may improve our understanding of severe asthma.",2019-12-12 +,First Report of Sugarcane Yellow Leaf Disease in Mexico and Detection of ‘Candidatus Phytoplasma asteris’-Related Strains in Affected Plants,"Sugarcane is a common name for any of several tall perennial grass species of the genus Saccharum. As a major source for sugar production and an efficient feedstock for biofuel generation, sugarcane is widely cultivated in tropical and subtropical regions. Mexico alone produces over six million metric tons of cane sugar annually valued at 1.3 billion U.S. dollars. Although economically important, sugarcane is susceptible to devastating diseases caused by phloem-colonizing, cell wall-less bacteria known as phytoplasmas. Different sugarcane phytoplasmal diseases around the world have been attributed to diverse phytoplasmas belonging to six mutually distinct ‘Candidatus Phytoplasma’ species (Marcone 2002). During the 2015 to 2016 growing season, sugarcane plants exhibiting leaf discolorations (white and yellow streaks) indicative of sugarcane yellow leaf (ScYL) disease were observed in a sugarcane field in Cosamaloapan, Veracruz, Mexico, with less than 1% of the plants in the field being affected. Leaf samples were collected from three symptomatic and three asymptomatic plants in the same field. Total DNA was extracted from leaf midribs using a modified cetyltrimethylammonium bromide method (Pérez-López et al. 2016). A preliminary diagnostic assay was carried out using direct polymerase chain reactions (PCRs) with phytoplasma-specific primer pair R16F2n/R16R2. All PCR assays with DNA templates from symptomatic plants produced a phytoplasma-characteristic amplicon of 1.25 kb. No amplicon was detected from samples of asymptomatic plants. The DNA samples from symptomatic plants were subjected to further PCR analysis with primer pair P1A/16S-SR as previously described (Wei et al. 2011). All three samples were PCR positive, and each yielded a 1,539-bp amplicon. The amplicons were cloned and sequenced with at least 6× coverage per base position. DNA sequence analysis confirmed that the amplicon represented a near-full-length 16S rRNA gene and a partial 16S-23S RNA gene intergenic spacer. The obtained DNA sequences were deposited into GenBank (accession nos. MH891144 through MH891146). Analysis of the sequences through the iPhyClassifier (Zhao et al. 2009, https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi) revealed that the phytoplasmas detected in the ScYL-diseased plants were closely related to the reference strain of ‘Candidatus Phytoplasma asteris’ (aster yellows phytoplasma, >99.4% sequence similarity). Although the three 16S rRNA gene sequences differed from each other by two to five bases, all possessed the entire set of signature sequences (G196GGAGGA202, C444TGACGGTACC454, and C976ACAGTGGA GGTTATCAGTTG996) of ‘Ca. P. asteris’ (Lee et al. 2004), indicating the three ScYL phytoplasma strains are “sequevars” (Davis et al. 2015) affiliated with a single phytoplasma species. To our knowledge, this is the first report of phytoplasmal ScYL disease in Mexico. Associations of aster yellows phytoplasma with sugarcane diseases were previously reported in Cuba and Brazil (Arocha et al. 1999; Silva et al. 2009). Recently, infection of aster yellows phytoplasma in weedy grass growing near sugarcane fields in Mexico was noted (Pérez-López and Dumonceaux 2017). Findings of aster yellows phytoplasma infecting sugarcane in the countries of Brazil, Cuba, and now Mexico (this report) underscore the need for disease surveillance of sugarcane in neighboring countries, because insect vectors capable of spreading aster yellows phytoplasma strains are known to be present over wide areas, including the Caribbean countries and the United States.",2019-05-01 +30239683,ATD: a comprehensive bioinformatics resource for deciphering the association of autophagy and diseases. ,"Autophagy is the natural, regulated, destructive mechanism of the eukaryotes cell that disassembles unnecessary or dysfunctional components. In recent years, the association between autophagy and diseases has attracted more and more attention, but our understanding of the molecular mechanism about the association in the system perspective is limited and ambiguous. Hence, we developed the comprehensive bioinformatics resource Autophagy To Disease (ATD, http://auto2disease.nwsuaflmz.com) to archive autophagy-associated diseases. This resource provides bioinformatics annotation system about genes and chemicals about autophagy and human diseases by extracting results from previous studies with text mining technology. Based on the big data from ATD, we found that some classes of disease tend to be related with autophagy, including respiratory disease, cancer, urogenital disease and digestive system disease. We also found that some classes of autophagy-related diseases have a strong association among each other and constitute modules. Furthermore, we extracted the autophagy-disease-related genes (ADGs) from ATD and provided a novel algorithm Optimized Random Forest with Label model to predict potential ADGs. This bioinformatics annotation system about autophagy and human diseases may provide a basic resource for the further detection of the molecular mechanisms of autophagy pathway to disease.",2018-01-01 +,Applying an Intelligent Personal Agent on a Smart Home Using a Novel Dialogue Generator,"Nowadays, Intelligent Personal Agents include Natural Language Understanding (NLU) modules, that utilize Machine Learning (ML), which can be included in different kind of applications in order to enable the translation of users’ input into different kinds of actions, as well as ML modules that handle dialogue. This translation is attained by the matching of a user’s sentence with an intent contained in an Agent. This paper introduces the first generation of the CERTH Intelligent Personal Agent (CIPA) which is based on the RASA (https://rasa.com/) framework and utilizes two machine learning models for NLU and dialogue flow classification. Besides the architecture of CIPA—Generation A, a novel dialogue-story generator that is based on the idea of adjacency pairs is introduced. By utilizing on this novel-generator, the agent is able to create all the possible dialog trees in order to handle conversations without training on existing data in contrast with the majority of the current alternative solutions. CIPA supports multiple intents and it is capable of classifying complex sentences consisting of two user’s intents into two automatic operations from the part of the agent. The introduced CIPA—Generation A has been deployed and tested in a real-world scenario at Centre’s of Research & Technology Hellas (CERTH) nZEB Smart Home (https://smarthome.iti.gr/) in two different domains, energy and health domain.",2020-01-01 +34871897,Foot angular kinematics measured with inertial measurement units: A reliable criterion for real-time gait event detection.,"Accurate and reliable real-time detection of gait events using inertial measurement units (IMUs) is crucial for (1) developing clinically meaningful gait parameters to differentiate normal and impaired gait or (2) creating patient-tailored gait rehabilitation strategies or control of prosthetic devices using feedback from gait phases. However, most previous studies focused only on algorithms with high temporal accuracy and neglected the importance of (1) high reliability, i.e., detecting only and all true gait events, and (2) real-time implementation. Thus, in this study, we presented a novel approach for initial contact (IC) and terminal contact (TC) detection in real-time based on the measurement of the foot orientation. Unlike foot/shank angular velocity and acceleration, foot orientation provides physiologically meaningful kinematic features corresponding to our observational recognition of IC and TC, regardless of the walking modality. We conducted an experimental study to validate our algorithm, including seven participants performing four walking/running activities. By analyzing 5,555 ICs/TCs recorded during the tests, only our algorithm achieved a sensitivity and precision of 100%. Our obtained temporal accuracy (mean ± standard deviation of errors ranging from 0 ± 3 to 6 ± 5 time samples; sampling frequency: 100 Hz) was better than or comparable to those reported in the literature. Our algorithm's performance does not depend on thresholds and gait speed/modality, and it can be used for feedback-based therapeutic gait training or real-time control of assistive or prosthetic technologies. Nevertheless, its performance for pathological gait must be validated in the future. Finally, we shared the codes and sample data on https://www.ncbl.ualberta.ca/codes.",2021-11-27 +34837942,A multitask transfer learning framework for the prediction of virus-human protein-protein interactions.,"

Background

Viral infections are causing significant morbidity and mortality worldwide. Understanding the interaction patterns between a particular virus and human proteins plays a crucial role in unveiling the underlying mechanism of viral infection and pathogenesis. This could further help in prevention and treatment of virus-related diseases. However, the task of predicting protein-protein interactions between a new virus and human cells is extremely challenging due to scarce data on virus-human interactions and fast mutation rates of most viruses.

Results

We developed a multitask transfer learning approach that exploits the information of around 24 million protein sequences and the interaction patterns from the human interactome to counter the problem of small training datasets. Instead of using hand-crafted protein features, we utilize statistically rich protein representations learned by a deep language modeling approach from a massive source of protein sequences. Additionally, we employ an additional objective which aims to maximize the probability of observing human protein-protein interactions. This additional task objective acts as a regularizer and also allows to incorporate domain knowledge to inform the virus-human protein-protein interaction prediction model.

Conclusions

Our approach achieved competitive results on 13 benchmark datasets and the case study for the SARS-COV-2 virus receptor. Experimental results show that our proposed model works effectively for both virus-human and bacteria-human protein-protein interaction prediction tasks. We share our code for reproducibility and future research at https://git.l3s.uni-hannover.de/dong/multitask-transfer .",2021-11-27 +33720349,"Robust, flexible, and scalable tests for Hardy-Weinberg equilibrium across diverse ancestries.","Traditional Hardy-Weinberg equilibrium (HWE) tests (the χ2 test and the exact test) have long been used as a metric for evaluating genotype quality, as technical artifacts leading to incorrect genotype calls often can be identified as deviations from HWE. However, in data sets composed of individuals from diverse ancestries, HWE can be violated even without genotyping error, complicating the use of HWE testing to assess genotype data quality. In this manuscript, we present the Robust Unified Test for HWE (RUTH) to test for HWE while accounting for population structure and genotype uncertainty, and to evaluate the impact of population heterogeneity and genotype uncertainty on the standard HWE tests and alternative methods using simulated and real sequence data sets. Our results demonstrate that ignoring population structure or genotype uncertainty in HWE tests can inflate false-positive rates by many orders of magnitude. Our evaluations demonstrate different tradeoffs between false positives and statistical power across the methods, with RUTH consistently among the best across all evaluations. RUTH is implemented as a practical and scalable software tool to rapidly perform HWE tests across millions of markers and hundreds of thousands of individuals while supporting standard VCF/BCF formats. RUTH is publicly available at https://www.github.com/statgen/ruth.",2021-05-01 +32540200,BGVD: An Integrated Database for Bovine Sequencing Variations and Selective Signatures.,"Next-generation sequencing has yielded a vast amount of cattle genomic data for global characterization of population genetic diversity and identification of genomic regions under natural and artificial selection. However, efficient storage, querying, and visualization of such large datasets remain challenging. Here, we developed a comprehensive database, the Bovine Genome Variation Database (BGVD). It provides six main functionalities: gene search, variation search, genomic signature search, Genome Browser, alignment search tools, and the genome coordinate conversion tool. BGVD contains information on genomic variations comprising ~60.44 M SNPs, ~6.86 M indels, 76,634 CNV regions, and signatures of selective sweeps in 432 samples from modern cattle worldwide. Users can quickly retrieve distribution patterns of these variations for 54 cattle breeds through an interactive source of breed origin map, using a given gene symbol or genomic region for any of the three versions of the bovine reference genomes (ARS-UCD1.2, UMD3.1.1, and Btau 5.0.1). Signals of selection sweep are displayed as Manhattan plots and Genome Browser tracks. To further investigate and visualize the relationships between variants and signatures of selection, the Genome Browser integrates all variations, selection data, and resources, from NCBI, the UCSC Genome Browser, and Animal QTLdb. Collectively, all these features make the BGVD a useful archive for in-depth data mining and analyses of cattle biology and cattle breeding on a global scale. BGVD is publicly available at http://animal.nwsuaf.edu.cn/BosVar.",2020-04-01 +34323136,Food Quality Score and Risk of Breast Cancer among Iranian Women: Findings from a Case Control Study.,"The quality of foods we consume may be an important risk factor for breast cancer (BrCa); however, relations between quality of food metrics and BrCa risk have not been systematically investigated. The purpose of this study was to examine the association between food quality score (FQS) by assessing the intake of healthy and unhealthy food and the odds of (BrCa) among Iranian women. This hospital-based case-control study was carried out on 150 women with pathologically confirmed breast cancer within the past three months and 150 healthy controls that were age-match from the Cancer Research Center, Imam Khomeini hospital, Iran. Participants were interviewed to obtain data relating to diet (using a 147-item validated FFQ) and BrCa risk factors. We found a significant association between adherence to the FQS and odds of breast cancer in the fully adjusted model (OR: 0.58; P = 0.04) and in premenopausal women in the fully adjusted model (OR: 0.45; P = 0.02); however, we did not observe any association between postmenopausal women in the adjusted model (OR: 0.76; P = 0.5). We also failed to observe any association between healthy (p = 0.3) and unhealthy subgroups (p = 0.3) of FQS. Our findings suggest that adherence to FQS may be associated with an increased risk of breast cancer in crude and adjusted models in overall and premenopausal women. However, we did not see any association between FQS and BrCa risk in postmenopausal women. Prospective cohort studies are needed to confirm these findings.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1957136 .",2021-07-29 +34849578,GPA-Tree: Statistical Approach for Functional-Annotation-Tree-Guided Prioritization of GWAS Results.,"

Motivation

In spite of great success of genome-wide association studies (GWAS), multiple challenges still remain. First, complex traits are often associated with many single nucleotide polymorphisms (SNPs), each with small or moderate effect sizes. Second, our understanding of the functional mechanisms through which genetic variants are associated with complex traits is still limited. To address these challenges, we propose GPA-Tree and it simultaneously implements association mapping and identifies key combinations of functional annotations related to risk-associated SNPs by combining a decision tree algorithm with a hierarchical modeling framework.

Results

First, we implemented simulation studies to evaluate the proposed GPA-Tree method and compared its performance with existing statistical approaches. The results indicate that GPA-Tree outperforms existing statistical approaches in detecting risk-associated SNPs and identifying the true combinations of functional annotations with high accuracy. Second, we applied GPA-Tree to a systemic lupus erythematosus (SLE) GWAS and functional annotation data including GenoSkyline and GenoSkylinePlus. The results from GPA-Tree highlight the dysregulation of blood immune cells, including but not limited to primary B, memory helper T, regulatory T, neutrophils and CD8+ memory T cells in SLE. These results demonstrate that GPA-Tree can be a powerful tool that improves association mapping while facilitating understanding of the underlying genetic architecture of complex traits and potential mechanisms linking risk-associated SNPs with complex traits.

Availability

The GPATree software is available at https://dongjunchung.github.io/GPATree/.

Supplementary information

Supplementary information is available at Bioinformatics online.",2021-11-26 +33181822,"Color Data v2: a user-friendly, open-access database with hereditary cancer and hereditary cardiovascular conditions datasets. ","Publicly available genetic databases promote data sharing and fuel scientific discoveries for the prevention, treatment and management of disease. In 2018, we built Color Data, a user-friendly, open access database containing genotypic and self-reported phenotypic information from 50 000 individuals who were sequenced for 30 genes associated with hereditary cancer. In a continued effort to promote access to these types of data, we launched Color Data v2, an updated version of the Color Data database. This new release includes additional clinical genetic testing results from more than 18 000 individuals who were sequenced for 30 genes associated with hereditary cardiovascular conditions as well as polygenic risk scores for breast cancer, coronary artery disease and atrial fibrillation. In addition, we used self-reported phenotypic information to implement the following four clinical risk models: Gail Model for 5-year risk of breast cancer, Claus Model for lifetime risk of breast cancer, simple office-based Framingham Coronary Heart Disease Risk Score for 10-year risk of coronary heart disease and CHARGE-AF simple score for 5-year risk of atrial fibrillation. These new features and capabilities are highlighted through two sample queries in the database. We hope that the broad dissemination of these data will help researchers continue to explore genotype-phenotype correlations and identify novel variants for functional analysis, enabling scientific discoveries in the field of population genomics. Database URL: https://data.color.com/.",2020-01-01 +34037595,[The Pacemaker and Implantable Cardioverter-Defibrillator Registry of the Italian Association of Arrhythmology and Cardiac Pacing - Annual report 2019].,"

Background

The pacemaker (PM) and implantable cardioverter-defibrillator (ICD) Registry of the Italian Association of Arrhythmology and Cardiac Pacing (AIAC) monitors the main epidemiological data in real-world practice. The survey for the 2019 activity collects information about demographics, clinical characteristics, main indications for PM/ICD therapy and device types from the Italian collaborating centers.

Methods

The Registry collects prospectively national PM and ICD implantation activity on the basis of European cards.

Results

PM Registry: data about 22 889 PM implantations were collected (19 621 first implants and 3268 replacements). The number of collaborating centers was 173. Median age of treated patients was 81 years (75 quartile I; 87 quartile III). ECG indications included atrioventricular conduction disorders in 33.3% of first PM implants, sick sinus syndrome in 16.4%, atrial fibrillation plus bradycardia in 11.6%, other in 38.7%. Among atrioventricular conduction defects, third-degree atrioventricular block was the most common type (18.3% of first implants). Use of single-chamber PMs was reported in 25.5% of first implants, of dual-chamber PMs in 67.1%, of PMs with cardiac resynchronization therapy (CRT) in 1.5%, and of single lead atrial-synchronized ventricular stimulation (VDD/R PMs) in 5.8%. ICD Registry: data about 17 328 ICD implantations were collected (12 129 first implants and 5199 replacements). The number of collaborating centers was 425. Median age of treated patients was 71 years (62 quartile I; 77 quartile III). Primary prevention indication was reported in 83.1% of first implants, secondary prevention in 16.9% (cardiac arrest in 5.9%). A single-chamber ICD was used in 26.1% of first implants, dual-chamber ICD in 28.0% and biventricular ICD in 45.9%.

Conclusions

The PM and ICD Registry appears fundamental for monitoring PM and ICD utilization on a large national scale with rigorous examination of demographics and clinical indications. The PM Registry showed stable electrocardiographic and symptom indications, with an important prevalence of dual-chamber pacing. The use of CRT-PM regards a very limited number of patients. The ICD Registry documented a large use of prophylactic and biventricular ICD, reflecting a favorable adherence to trials and guidelines in clinical practice. In order to increase and optimize the cooperation of Italian implanting centers, online data entry (https://www.aiac.it/riprid) should be adopted at large scale.",2021-06-01 +29869221,Miami University deception detection database.,"In the present work, we introduce the Miami University Deception Detection Database (MU3D), a free resource containing 320 videos of target individuals telling truths and lies. Eighty (20 Black female, 20 Black male, 20 White female, and 20 White male) different targets were recorded speaking honestly and dishonestly about their social relationships. Each target generated four different videos (i.e., positive truth, negative truth, positive lie, negative lie), yielding 320 videos fully crossing target race, target gender, statement valence, and statement veracity. These videos were transcribed by trained research assistants and evaluated by naïve raters. Descriptive analyses of the video characteristics (e.g., length) and subjective ratings (e.g., target attractiveness) are provided. The stimuli and an information codebook can be accessed free of charge for academic research purposes from http://hdl.handle.net/2374.MIA/6067 . The MU3D offers scholars the ability to conduct research using standardized stimuli that can aid in building more comprehensive theories of interpersonal sensitivity, enhance replication among labs, facilitate the use of signal detection analyses, and promote consideration of race, gender, and their interactive effects in deception detection research.",2019-02-01 +34241550,Radiomic Features at CT Can Distinguish Pancreatic Cancer from Noncancerous Pancreas.,"Purpose To identify distinguishing CT radiomic features of pancreatic ductal adenocarcinoma (PDAC) and to investigate whether radiomic analysis with machine learning can distinguish between patients who have PDAC and those who do not. Materials and Methods This retrospective study included contrast material-enhanced CT images in 436 patients with PDAC and 479 healthy controls from 2012 to 2018 from Taiwan that were randomly divided for training and testing. Another 100 patients with PDAC (enriched for small PDACs) and 100 controls from Taiwan were identified for testing (from 2004 to 2011). An additional 182 patients with PDAC and 82 healthy controls from the United States were randomly divided for training and testing. Images were processed into patches. An XGBoost (https://xgboost.ai/) model was trained to classify patches as cancerous or noncancerous. Patients were classified as either having or not having PDAC on the basis of the proportion of patches classified as cancerous. For both patch-based and patient-based classification, the models were characterized as either a local model (trained on Taiwanese data only) or a generalized model (trained on both Taiwanese and U.S. data). Sensitivity, specificity, and accuracy were calculated for patch- and patient-based analysis for the models. Results The median tumor size was 2.8 cm (interquartile range, 2.0-4.0 cm) in the 536 Taiwanese patients with PDAC (mean age, 65 years ± 12 [standard deviation]; 289 men). Compared with normal pancreas, PDACs had lower values for radiomic features reflecting intensity and higher values for radiomic features reflecting heterogeneity. The performance metrics for the developed generalized model when tested on the Taiwanese and U.S. test data sets, respectively, were as follows: sensitivity, 94.7% (177 of 187) and 80.6% (29 of 36); specificity, 95.4% (187 of 196) and 100% (16 of 16); accuracy, 95.0% (364 of 383) and 86.5% (45 of 52); and area under the curve, 0.98 and 0.91. Conclusion Radiomic analysis with machine learning enabled accurate detection of PDAC at CT and could identify patients with PDAC. Keywords: CT, Computer Aided Diagnosis (CAD), Pancreas, Computer Applications-Detection/Diagnosis Supplemental material is available for this article. © RSNA, 2021.",2021-07-01 +,234. Reversal of Carbapenem and Amikacin Susceptibilities in Isogenic Klebsiella pneumoniae From a Patient with Persistent Bacteriuria,"Abstract

Background

Genomic tools permit a detailed analysis of antibiotic resistance determinants in bacteria, or resistome. Here we discuss variations in antibiotic resistance in K. pneumoniae (Kp) not explained by changes in the resistome

Methods

We compared Kp strains with divergent carbapenem and aminoglycoside susceptibilities. After identification of bacteria, antibiotic susceptibility testing was performed according to CLSI guidelines. Draft genome sequences were generated using Illumina MiSeq (Nextera paired-end library) and assembled using CLC Genomics Workbench (CLC bio, Cambridge, MA). Resistome, plasmid types and MLST were investigated using the CGE platform (http://cge.cbs.dtu.dk), while capsular type and virulence genes were investigated using the Pasteur BIGsDB database (https://bigsdb.pasteur.fr).

Results

While receiving amoxicillin-clavulanate, a 44-year old man with diabetes mellitus and paraplegia with neurogenic bladder grewKp resistant to carbapenems and amikacin from urine. He was treated with fosfomycin and amikacin, followed by imipenem and plazomicin, prior to lithotripsy. Three months later, while off antibiotics, urine cultures grew Kp susceptible to carbapenems and amikacin (figure). Genetic comparison between resistant (November 20, 2018) and susceptible (January 30, 2019) strains revealed they were isogenic, only differing by 559 SNPs (table). Both were ST14, presented capsular type 16, and shared cehalosporinase (blaSHV-28, blaCTX-M-15, blaTEM-1B, blaOXA-1) and aminoglycoside modifying enzyme (AME) (aph(3’’)-Ib, aph(6)-Id, aac(6’)-Ib-cr) genes. Although both had mutations in the outer membrane porin OmpK36, these differed (stop AA125 and frameshift AA183, respectively)

Conclusion

Carbapenem resistance in the initial Kp is likely explained by overexpression of cephalosporinases in combination with changes in membrane permeability, while amikacin resistance is likely due to AMEs. Since no significant gene variation was observed in the susceptible Kp, reversal of resistance was likely due to decreased expression of cephalosporinases and AMEs after antibiotics were stopped. Incorporation of antibiotic history and host factors can explain clinically important changes in antibiotic resistance

Disclosures

All authors: No reported disclosures.",2019-10-01 +35024355,Multilocus Sequence Typing Reveals Extensive Genetic Diversity of the Emerging Fungal Pathogen Scedosporium aurantiacum.,"Scedosporium spp. are the second most prevalent filamentous fungi after Aspergillus spp. recovered from cystic fibrosis (CF) patients in various regions of the world. Although invasive infection is uncommon prior to lung transplantation, fungal colonization may be a risk factor for invasive disease with attendant high mortality post-transplantation. Abundant in the environment, Scedosporium aurantiacum has emerged as an important fungal pathogen in a range of clinical settings. To investigate the population genetic structure of S. aurantiacum, a MultiLocus Sequence Typing (MLST) scheme was developed, screening 24 genetic loci for polymorphisms on a tester strain set. The six most polymorphic loci were selected to form the S. aurantiacum MLST scheme: actin (ACT), calmodulin (CAL), elongation factor-1α (EF1α), RNA polymerase subunit II (RPB2), manganese superoxide dismutase (SOD2), and β-tubulin (TUB). Among 188 global clinical, veterinary, and environmental strains, 5 to 18 variable sites per locus were revealed, resulting in 8 to 23 alleles per locus. MLST analysis observed a markedly high genetic diversity, reflected by 159 unique sequence types. Network analysis revealed a separation between Australian and non-Australian strains. Phylogenetic analysis showed two major clusters, indicating correlation with geographic origin. Linkage disequilibrium analysis revealed evidence of recombination. There was no clustering according to the source of the strains: clinical, veterinary, or environmental. The high diversity, especially amongst the Australian strains, suggests that S. aurantiacum may have originated within the Australian continent and was subsequently dispersed to other regions, as shown by the close phylogenetic relationships between some of the Australian sequence types and those found in other parts of the world. The MLST data are accessible at http://mlst.mycologylab.org. This is a joined publication of the ISHAM/ECMM working groups on ""Scedosporium/Pseudallescheria Infections"" and ""Fungal Respiratory Infections in Cystic Fibrosis"".",2021-12-27 +29243572,"Development, dissemination, and applications of a new terminological resource, the Q-Code taxonomy for professional aspects of general practice/family medicine.","

Background

While documentation of clinical aspects of General Practice/Family Medicine (GP/FM) is assured by the International Classification of Primary Care (ICPC), there is no taxonomy for the professional aspects (context and management) of GP/FM.

Objectives

To present the development, dissemination, applications, and resulting face validity of the Q-Codes taxonomy specifically designed to describe contextual features of GP/FM, proposed as an extension to the ICPC.

Development

The Q-Codes taxonomy was developed from Lamberts' seminal idea for indexing contextual content (1987) by a multi-disciplinary team of knowledge engineers, linguists and general practitioners, through a qualitative and iterative analysis of 1702 abstracts from six GP/FM conferences using Atlas.ti software. A total of 182 concepts, called Q-Codes, representing professional aspects of GP/FM were identified and organized in a taxonomy. Dissemination: The taxonomy is published as an online terminological resource, using semantic web techniques and web ontology language (OWL) ( http://www.hetop.eu/Q ). Each Q-Code is identified with a unique resource identifier (URI), and provided with preferred terms, and scope notes in ten languages (Portuguese, Spanish, English, French, Dutch, Korean, Vietnamese, Turkish, Georgian, German) and search filters for MEDLINE and web searches.

Applications

This taxonomy has already been used to support queries in bibliographic databases (e.g., MEDLINE), to facilitate indexing of grey literature in GP/FM as congress abstracts, master theses, websites and as an educational tool in vocational teaching, Conclusions: The rapidly growing list of practical applications provides face-validity for the usefulness of this freely available new terminological resource.",2017-12-15 +34592131,Men's Shame and Anger: Examining the Roles of Alexithymia and Psychological Distress.,"The psychological mechanisms connecting shame and anger among men remain underexplored. This study aimed to understand the potential roles of psychological distress and alexithymia in this pathway, both in the form of difficulty identifying and describing one's feelings. Self-report measures were completed by 1,000 men (age mean = 49.6 years; range = 19-86 years). Conditional process analysis investigated a moderated mediation effect to determine whether men's distress mediated the relationship between shame and anger, and whether this effect differed according to severity and type of alexithymia. Findings indicated moderated mediation, with psychological distress a significant mediator in the association between shame and anger. Furthermore, difficulties describing feelings (but not identifying feelings) moderated the relationship between shame and psychological distress. Men's shame can be expressed via anger when experiencing psychological distress, and the inability to express one's feelings exacerbates this pathway. Clinical and public health avenues to reduce the impact of alexithymia are discussed.Supplemental data for this article is available online at https://doi.org/10.1080/00223980.2021.1977598 .",2021-09-30 +31642487,AraPheno and the AraGWAS Catalog 2020: a major database update including RNA-Seq and knockout mutation data for Arabidopsis thaliana.,"Genome-wide association studies (GWAS) are integral for studying genotype-phenotype relationships and gaining a deeper understanding of the genetic architecture underlying trait variation. A plethora of genetic associations between distinct loci and various traits have been successfully discovered and published for the model plant Arabidopsis thaliana. This success and the free availability of full genomes and phenotypic data for more than 1,000 different natural inbred lines led to the development of several data repositories. AraPheno (https://arapheno.1001genomes.org) serves as a central repository of population-scale phenotypes in A. thaliana, while the AraGWAS Catalog (https://aragwas.1001genomes.org) provides a publicly available, manually curated and standardized collection of marker-trait associations for all available phenotypes from AraPheno. In this major update, we introduce the next generation of both platforms, including new data, features and tools. We included novel results on associations between knockout-mutations and all AraPheno traits. Furthermore, AraPheno has been extended to display RNA-Seq data for hundreds of accessions, providing expression information for over 28 000 genes for these accessions. All data, including the imputed genotype matrix used for GWAS, are easily downloadable via the respective databases.",2020-01-01 +34050762,ncFANs v2.0: an integrative platform for functional annotation of non-coding RNAs.,"Increasing evidence proves the essential regulatory roles of non-coding RNAs (ncRNAs) in biological processes. However, characterizing the specific functions of ncRNAs remains a challenging task, owing to the intensive consumption of the experimental approaches. Here, we present an online platform ncFANs v2.0 that is a significantly enhanced version of our previous ncFANs to provide multiple computational methods for ncRNA functional annotation. Specifically, ncFANs v2.0 was updated to embed three functional modules, including ncFANs-NET, ncFANs-eLnc and ncFANs-CHIP. ncFANs-NET is a new module designed for data-free functional annotation based on four kinds of pre-built networks, including the co-expression network, co-methylation network, long non-coding RNA (lncRNA)-centric regulatory network and random forest-based network. ncFANs-eLnc enables the one-stop identification of enhancer-derived lncRNAs from the de novo assembled transcriptome based on the user-defined or our pre-annotated enhancers. Moreover, ncFANs-CHIP inherits the original functions for microarray data-based functional annotation and supports more chip types. We believe that our ncFANs v2.0 carries sufficient convenience and practicability for biological researchers and facilitates unraveling the regulatory mechanisms of ncRNAs. The ncFANs v2.0 server is freely available at http://bioinfo.org/ncfans or http://ncfans.gene.ac.",2021-07-01 +33079977,UNRES-Dock-protein-protein and peptide-protein docking by coarse-grained replica-exchange MD simulations.,"

Motivation

The majority of the proteins in living organisms occur as homo- or hetero-multimeric structures. Although there are many tools to predict the structures of single-chain proteins or protein complexes with small ligands, peptide-protein and protein-protein docking is more challenging. In this work, we utilized multiplexed replica-exchange molecular dynamics (MREMD) simulations with the physics-based heavily coarse-grained UNRES model, which provides more than a 1000-fold simulation speed-up compared with all-atom approaches to predict structures of protein complexes.

Results

We present a new protein-protein and peptide-protein docking functionality of the UNRES package, which includes a variable degree of conformational flexibility. UNRES-Dock protocol was tested on a set of 55 complexes with size from 43 to 587 amino-acid residues, showing that structures of the complexes can be predicted with good quality, if the sampling of the conformational space is sufficient, especially for flexible peptide-protein systems. The developed automatized protocol has been implemented in the standalone UNRES package and in the UNRES server.

Availability and implementation

UNRES server: http://unres-server.chem.ug.edu.pl; UNRES package and data used in testing of UNRES-Dock: http://unres.pl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +35047007,Open Problems in Extracellular RNA Data Analysis: Insights From an ERCC Online Workshop.,"We now know RNA can survive the harsh environment of biofluids when encapsulated in vesicles or by associating with lipoproteins or RNA binding proteins. These extracellular RNA (exRNA) play a role in intercellular signaling, serve as biomarkers of disease, and form the basis of new strategies for disease treatment. The Extracellular RNA Communication Consortium (ERCC) hosted a two-day online workshop (April 19-20, 2021) on the unique challenges of exRNA data analysis. The goal was to foster an open dialog about best practices and discuss open problems in the field, focusing initially on small exRNA sequencing data. Video recordings of workshop presentations and discussions are available (https://exRNA.org/exRNAdata2021-videos/). There were three target audiences: experimentalists who generate exRNA sequencing data, computational and data scientists who work with those groups to analyze their data, and experimental and data scientists new to the field. Here we summarize issues explored during the workshop, including progress on an effort to develop an exRNA data analysis challenge to engage the community in solving some of these open problems.",2021-01-01 +34019643,eVITTA: a web-based visualization and inference toolbox for transcriptome analysis.,"Transcriptome profiling is essential for gene regulation studies in development and disease. Current web-based tools enable functional characterization of transcriptome data, but most are restricted to applying gene-list-based methods to single datasets, inefficient in leveraging up-to-date and species-specific information, and limited in their visualization options. Additionally, there is no systematic way to explore data stored in the largest transcriptome repository, NCBI GEO. To fill these gaps, we have developed eVITTA (easy Visualization and Inference Toolbox for Transcriptome Analysis; https://tau.cmmt.ubc.ca/eVITTA/). eVITTA provides modules for analysis and exploration of studies published in NCBI GEO (easyGEO), detailed molecular- and systems-level functional profiling (easyGSEA), and customizable comparisons among experimental groups (easyVizR). We tested eVITTA on transcriptomes of SARS-CoV-2 infected human nasopharyngeal swab samples, and identified a downregulation of olfactory signal transducers, in line with the clinical presentation of anosmia in COVID-19 patients. We also analyzed transcriptomes of Caenorhabditis elegans worms with disrupted S-adenosylmethionine metabolism, confirming activation of innate immune responses and feedback induction of one-carbon cycle genes. Collectively, eVITTA streamlines complex computational workflows into an accessible interface, thus filling the gap of an end-to-end platform capable of capturing both broad and granular changes in human and model organism transcriptomes.",2021-07-01 +31825307,"MouseBytes, an open-access high-throughput pipeline and database for rodent touchscreen-based cognitive assessment. ","Open Science has changed research by making data accessible and shareable, contributing to replicability to accelerate and disseminate knowledge. However, for rodent cognitive studies the availability of tools to share and disseminate data is scarce. Automated touchscreen-based tests enable systematic cognitive assessment with easily standardised outputs that can facilitate data dissemination. Here we present an integration of touchscreen cognitive testing with an open-access database public repository (mousebytes.ca), as well as a Web platform for knowledge dissemination (https://touchscreencognition.org). We complement these resources with the largest dataset of age-dependent high-level cognitive assessment of mouse models of Alzheimer's disease, expanding knowledge of affected cognitive domains from male and female mice of three strains. We envision that these new platforms will enhance sharing of protocols, data availability and transparency, allowing meta-analysis and reuse of mouse cognitive data to increase the replicability/reproducibility of datasets.",2019-12-11 +32023238,Predicting colorectal cancer risk from adenoma detection via a two-type branching process model.,"Despite advances in the modeling and understanding of colorectal cancer development, the dynamics of the progression from benign adenomatous polyp to colorectal carcinoma are still not fully resolved. To take advantage of adenoma size and prevalence data in the National Endoscopic Database of the Clinical Outcomes Research Initiative (CORI) as well as colorectal cancer incidence and size data from the Surveillance Epidemiology and End Results (SEER) database, we construct a two-type branching process model with compartments representing adenoma and carcinoma cells. To perform parameter inference we present a new large-size approximation to the size distribution of the cancer compartment and validate our approach on simulated data. By fitting the model to the CORI and SEER data, we learn biologically relevant parameters, including the transition rate from adenoma to cancer. The inferred parameters allow us to predict the individualized risk of the presence of cancer cells for each screened patient. We provide a web application which allows the user to calculate these individual probabilities at https://ccrc-eth.shinyapps.io/CCRC/. For example, we find a 1 in 100 chance of cancer given the presence of an adenoma between 10 and 20mm size in an average risk patient at age 50. We show that our two-type branching process model recapitulates the early growth dynamics of colon adenomas and cancers and can recover epidemiological trends such as adenoma prevalence and cancer incidence while remaining mathematically and computationally tractable.",2020-02-05 +33045068,COVID-Align: accurate online alignment of hCoV-19 genomes using a profile HMM.,"

Motivation

The first cases of the COVID-19 pandemic emerged in December 2019. Until the end of February 2020, the number of available genomes was below 1000 and their multiple alignment was easily achieved using standard approaches. Subsequently, the availability of genomes has grown dramatically. Moreover, some genomes are of low quality with sequencing/assembly errors, making accurate re-alignment of all genomes nearly impossible on a daily basis. A more efficient, yet accurate approach was clearly required to pursue all subsequent bioinformatics analyses of this crucial data.

Results

hCoV-19 genomes are highly conserved, with very few indels and no recombination. This makes the profile HMM approach particularly well suited to align new genomes, add them to an existing alignment and filter problematic ones. Using a core of ∼2500 high quality genomes, we estimated a profile using HMMER, and implemented this profile in COVID-Align, a user-friendly interface to be used online or as standalone via Docker. The alignment of 1000 genomes requires ∼50 minutes on our cluster. Moreover, COVID-Align provides summary statistics, which can be used to determine the sequencing quality and evolutionary novelty of input genomes (e.g. number of new mutations and indels).

Availability and implementation

https://covalign.pasteur.cloud, hub.docker.com/r/evolbioinfo/covid-align.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +30117424,"[Development of resources, technologies and services at the China Zebrafish Resource Center].","With the rapid growth of the Chinese zebrafish community, there is an increasing demand for various types of zebrafish-related resources and technologies. The China Zebrafish Resource Center (CZRC, web: http://zfish.cn) was established at the Institute of Hydrobiology (IHB), Chinese Academy of Sciences (CAS) in 2012. Till now, CZRC has built the largest zebrafish aquaculture unit in China, organized a resource bank containing more than 1200 zebrafish lines and more than 10 000 frozen sperm samples, among which over 200 mutant and transgenic lines were generated by CZRC. CZRC has established several technical supporting platforms, such as the zebrafish husbandry and health control program of international standard, a high-efficient gene manipulation technology platform, and a stable and efficient sperm cryopreservation technology platform. The main task of CZRC is to provide different types of services to zebrafish investigators in China and worldwide, such as resource services (e.g. zebrafish lines), technical services (e.g. gene knockout) and transgenic services, consultancy services (e.g. zebrafish husbandry and health consultation), and conference services [e.g. holding regular technical training courses and biennale Chinese Zebrafish Principal Investigator Meeting (CZPM)]. After five years' development, CZRC is now recognized as one of the three major resource centers in the global zebrafish community.",2018-08-01 +32312909,Optimizing Resources in Children's Surgical Care: An Update on the American College of Surgeons' Verification Program. ,"Surgical procedures are performed in the United States in a wide variety of clinical settings and with variation in clinical outcomes. In May 2012, the Task Force for Children's Surgical Care, an ad hoc multidisciplinary group comprising physicians representing specialties relevant to pediatric perioperative care, was convened to generate recommendations to optimize the delivery of children's surgical care. This group generated a white paper detailing the consensus opinions of the involved experts. Following these initial recommendations, the American College of Surgeons (ACS), Children's Hospital Association, and Task Force for Children's Surgical Care, with input from all related perioperative specialties, developed and published specific and detailed resource and quality standards designed to improve children's surgical care (https://www.facs.org/quality-programs/childrens-surgery/childrens-surgery-verification). In 2015, with the endorsement of the American Academy of Pediatrics (https://pediatrics.aappublications.org/content/135/6/e1538), the ACS established a pilot verification program. In January 2017, after completion of the pilot program, the ACS Children's Surgery Verification Quality Improvement Program was officially launched. Verified sites are listed on the program Web site at https://www.facs.org/quality-programs/childrens-surgery/childrens-surgery-verification/centers, and more than 150 are interested in verification. This report provides an update on the ACS Children's Surgery Verification Quality Improvement Program as it continues to evolve.",2020-04-20 +29106644,iUUCD 2.0: an update with rich annotations for ubiquitin and ubiquitin-like conjugations.,"Here, we described the updated database iUUCD 2.0 (http://iuucd.biocuckoo.org/) for ubiquitin-activating enzymes (E1s), ubiquitin-conjugating enzymes (E2s), ubiquitin-protein ligases (E3s), deubiquitinating enzymes (DUBs), ubiquitin/ubiquitin-like binding domains (UBDs) and ubiquitin-like domains (ULDs), which act as key regulators in modulating ubiquitin and ubiquitin-like (UB/UBL) conjugations. In total, iUUCD 2.0 contained 136 512 UB/UBL regulators, including 1230 E1s, 5636 E2s, 93 343 E3s, 9548 DUBs, 30 173 UBDs and 11 099 ULDs in 148 eukaryotic species. In particular, we provided rich annotations for regulators of eight model organisms, especially in humans, by compiling and integrating the knowledge from nearly 70 widely used public databases that cover cancer mutations, single nucleotide polymorphisms (SNPs), mRNA expression, DNA and RNA elements, protein-protein interactions, protein 3D structures, disease-associated information, drug-target relations, post-translational modifications, DNA methylation and protein expression/proteomics. Compared with our previously developed UUCD 1.0 (∼0.41 GB), iUUCD 2.0 has a size of ∼32.1 GB of data with a >75-fold increase in data volume. We anticipate that iUUCD 2.0 can be a more useful resource for further study of UB/UBL conjugations.",2018-01-01 +33674827,Gene-Set Integrative Analysis of Multi-Omics Data Using Tensor-based Association Test. ,"Facilitated by technological advances and the decrease in costs, it is feasible to gather subject data from several omics platforms. Each platform assesses different molecular events, and the challenge lies in efficiently analyzing these data to discover novel disease genes or mechanisms. A common strategy is to regress the outcomes on all omics variables in a gene set. However, this approach suffers from problems associated with high-dimensional inference. We introduce a tensor-based framework for variable-wise inference in multi-omics analysis. By accounting for the matrix structure of an individual's multi-omics data, the proposed tensor methods incorporate the relationship among omics effects, reduce the number of parameters, and boost the modeling efficiency. We derive the variable-specific tensor test and enhance computational efficiency of tensor modeling. Using simulations and data applications on the Cancer Cell Line Encyclopedia (CCLE), we demonstrate our method performs favorably over baseline methods and will be useful for gaining biological insights in multi-omics analysis. R function and instruction are available from the authors' website: https://www4.stat.ncsu.edu/∼jytzeng/Software/TR.omics/TRinstruction.pdf. Supplementary materials are available at Bioinformatics online.",2021-03-01 +34368755,The Face Image Meta-Database (fIMDb) & ChatLab Facial Anomaly Database (CFAD): Tools for research on face perception and social stigma. ,"Investigators increasingly need high quality face photographs that they can use in service of their scholarly pursuits-whether serving as experimental stimuli or to benchmark face recognition algorithms. Up to now, an index of known face databases, their features, and how to access them has not been available. This absence has had at least two negative repercussions: First, without alternatives, some researchers may have used face databases that are widely known but not optimal for their research. Second, a reliance on databases comprised only of young white faces will lead to science that isn't representative of all the people whose tax contributions, in many cases, make that research possible. The ""Face Image Meta-Database"" (fIMDb) provides researchers with the tools to find the face images best suited to their research, with filters to locate databases with people of a varied racial and ethnic backgrounds and ages. Problems of representation in face databases are not restricted to race and ethnicity or age - there is a dearth of databases with faces that have visible differences (e.g., scars, port wine stains, and cleft lip and palate). A well-characterized database is needed to support programmatic research into perceivers' attitudes, behaviors, and neural responses to anomalous faces. The ""ChatLab Facial Anomaly Database"" (CFAD) was constructed to fill this gap, with photographs of faces with visible differences of various types, etiologies, sizes, locations, and that depict individuals from various ethnic backgrounds and age groups. Both the fIMDb and CFAD are available from: https://cliffordworkman.com/resources/.",2021-07-24 +34328069,The GP-OH (General Practice - Organizational Health) Survey: Development and Validation of a Novel Instrument to Measure Organizational Health in General Practice.,"Primary care healthcare organizations are complex and multidimensional, and there has been much discussion about the potential dangers of focusing on outcomes as quality indicators in isolation without understanding the processes and system characteristics that drive them. Organizational health, as a concept, shifts the focus of measurement upstream and considers the elements needed for sustainable long-term success. This study has both designed and tested the first survey seeking to measure organizational health specifically within the context of primary care. A stepwise approach was taken to ensure that the validity and reliability of the survey was examined at multiple stages.Supplemental data for this article is available online at https://doi.org/10.1080/00185868.2021.1947164.",2021-07-30 +33401349,Introduction to the LIVECAT web-based computerized adaptive testing platform.,"This study introduces LIVECAT, a web-based computerized adaptive testing platform. This platform provides many functions, including writing item content, managing an item bank, creating and administering a test, reporting test results, and providing information about a test and examinees. The LIVECAT provides examination administrators with an easy and flexible environment for composing and managing examinations. It is available at http://www.thecatkorea.com/. Several tools were used to program LIVECAT, as follows: operating system, Amazon Linux; web server, nginx 1.18; WAS, Apache Tomcat 8.5; database, Amazon RDMS-Maria DB; and languages, JAVA8, HTML5/CSS, Javascript, and jQuery. The LIVECAT platform can be used to implement several item response theory (IRT) models such as the Rasch and 1-, 2-, 3-parameter logistic models. The administrator can choose a specific model of test construction in LIVECAT. Multimedia data such as images, audio files, and movies can be uploaded to items in LIVECAT. Two scoring methods (maximum likelihood estimation and expected a posteriori) are available in LIVECAT and the maximum Fisher information item selection method is applied to every IRT model in LIVECAT. The LIVECAT platform showed equal or better performance compared with a conventional test platform. The LIVECAT platform enables users without psychometric expertise to easily implement and perform computerized adaptive testing at their institutions. The most recent LIVECAT version only provides a dichotomous item response model and the basic components of CAT. Shortly, LIVECAT will include advanced functions, such as polytomous item response models, weighted likelihood estimation method, and content balancing method.",2020-09-29 +34750237,Radiation Safety Considerations and Clinical Advantages of α-Emitting Therapy Radionuclides.,"CE credit: For CE credit, you can access the test for this article, as well as additional JNMT CE tests, online at https://www.snmmilearningcenter.org Complete the test online no later than March 2025. Your online test will be scored immediately. You may make 3 attempts to pass the test and must answer 75% of the questions correctly to receive Continuing Education Hour (CEH) credit. Credit amounts can be found in the SNMMI Learning Center Activity. SNMMI members will have their CEH credit added to their VOICE transcript automatically; nonmembers will be able to print out a CE certificate upon successfully completing the test. The online test is free to SNMMI members; nonmembers must pay $15.00 by credit card when logging onto the website to take the test.α-emitting radionuclides provide an effective means of delivering large radiation doses to targeted treatment locations. 223RaCl2 is Food and Drug Administration-approved for treatment of metastatic castration-resistant prostate cancer, and 225Ac (225Ac-lintuzumab) radiolabeled antibodies have been shown to be beneficial for patients with acute myeloid leukemia. In recent years, there has been increasing use of α-emitters in theranostic agents with both small- and large-molecule constructs. The proper precautionary means for their use and surveying documentation of these isotopes in a clinical setting are an essential accompaniment to these treatments. Methods: Patient treatment data collected over a 3-y period, as well as regulatory requirements and safety practices, are described. Commonly used radiation instruments were evaluated for their ability to identify potential radioactive material spills and contamination events during a clinical administration of 225Ac. These instruments were placed at 0.32 cm from a 1.0-cm 225Ac disk source for measurement purposes. Radiation background values, efficiencies, and minimal detectable activities were measured and calculated for each type of detector. Results: The median external measured dose rate from 223RaCl2 patients (n = 611) was 2.5 μSv h-1 on contact and 0.2 μSv h-1 at 1 m immediately after administration. Similarly, 225Ac-lintuzumab (n = 19) patients had median external dose rates of 2.0 μSv h-1 on contact and 0.3 μSv h-1 at 1 m. For the measurement of 225Ac samples, a liquid scintillation counter was found to have the highest overall efficiency (97%), whereas a ZnS α-probe offered the lowest minimal detectable activity at 3 counts per minute. Conclusion: In this article, we report data from 630 patients who were undergoing treatment with the α-emitting isotopes 223Ra and 225Ac. Although α-emitters have the ability to deliver a higher internal radiation dose to the exposed tissues than can other unsealed radionuclides, they typically present minimal concerns about external dose rate. Additionally, α-radiation can be efficiently detected with appropriate radiation instrumentation, such as a liquid scintillation counter or ZnS probe, which should be prioritized when surveying for spills of α-emitters.",2021-11-08 +33789960,"A Bioinformatics Whole-Genome Sequencing Workflow for Clinical Mycobacterium tuberculosis Complex Isolate Analysis, Validated Using a Reference Collection Extensively Characterized with Conventional Methods and In Silico Approaches. ","The use of whole-genome sequencing (WGS) for routine typing of bacterial isolates has increased substantially in recent years. For Mycobacterium tuberculosis (MTB), in particular, WGS has the benefit of drastically reducing the time required to generate results compared to most conventional phenotypic methods. Consequently, a multitude of solutions for analyzing WGS MTB data have been developed, but their successful integration in clinical and national reference laboratories is hindered by the requirement for their validation, for which a consensus framework is still largely absent. We developed a bioinformatics workflow for (Illumina) WGS-based routine typing of MTB complex (MTBC) member isolates allowing complete characterization, including (sub)species confirmation and identification (16S, csb/RD, hsp65), single nucleotide polymorphism (SNP)-based antimicrobial resistance (AMR) prediction, and pathogen typing (spoligotyping, SNP barcoding, and core genome multilocus sequence typing). Workflow performance was validated on a per-assay basis using a collection of 238 in-house-sequenced MTBC isolates, extensively characterized with conventional molecular biology-based approaches supplemented with public data. For SNP-based AMR prediction, results from molecular genotyping methods were supplemented with in silico modified data sets, allowing us to greatly increase the set of evaluated mutations. The workflow demonstrated very high performance with performance metrics of >99% for all assays, except for spoligotyping, where sensitivity dropped to ∼90%. The validation framework for our WGS-based bioinformatics workflow can aid in the standardization of bioinformatics tools by the MTB community and other SNP-based applications regardless of the targeted pathogen(s). The bioinformatics workflow is available for academic and nonprofit use through the Galaxy instance of our institute at https://galaxy.sciensano.be.",2021-05-19 +34123499,ML-SIM: universal reconstruction of structured illumination microscopy images using transfer learning.,"Structured illumination microscopy (SIM) has become an important technique for optical super-resolution imaging because it allows a doubling of image resolution at speeds compatible with live-cell imaging. However, the reconstruction of SIM images is often slow, prone to artefacts, and requires multiple parameter adjustments to reflect different hardware or experimental conditions. Here, we introduce a versatile reconstruction method, ML-SIM, which makes use of transfer learning to obtain a parameter-free model that generalises beyond the task of reconstructing data recorded by a specific imaging system for a specific sample type. We demonstrate the generality of the model and the high quality of the obtained reconstructions by application of ML-SIM on raw data obtained for multiple sample types acquired on distinct SIM microscopes. ML-SIM is an end-to-end deep residual neural network that is trained on an auxiliary domain consisting of simulated images, but is transferable to the target task of reconstructing experimental SIM images. By generating the training data to reflect challenging imaging conditions encountered in real systems, ML-SIM becomes robust to noise and irregularities in the illumination patterns of the raw SIM input frames. Since ML-SIM does not require the acquisition of experimental training data, the method can be efficiently adapted to any specific experimental SIM implementation. We compare the reconstruction quality enabled by ML-SIM with current state-of-the-art SIM reconstruction methods and demonstrate advantages in terms of generality and robustness to noise for both simulated and experimental inputs, thus making ML-SIM a useful alternative to traditional methods for challenging imaging conditions. Additionally, reconstruction of a SIM stack is accomplished in less than 200 ms on a modern graphics processing unit, enabling future applications for real-time imaging. Source code and ready-to-use software for the method are available at http://ML-SIM.github.io.",2021-04-15 +,"Importance of building a digital species index (spindex) for entomology collections: A case study, results and recommendations","Abstract The Entomology Collection at the Academy of Natural Sciences of Drexel University (ANSP) contains approximately four million insect specimens including some of the oldest in the Western Hemisphere. Like most large entomology collections, no complete inventory of the species represented in the collection was available and even a physical search for a species could not ensure that all available specimens would be recovered for study. Between 2010 and 2014, we created a species-level index (called here spindex) of all species and their specimen counts at ANSP, along with each species’ location in the collection. Additional data captured during the project included the higher level classification of each species and type of specimen preparation. The spindex is searchable online: http://symbiont.ansp.org/entomology/. The spindex project documented 96,126 species in the ANSP Entomology Collection, representing about 10% of the described insect fauna. Additionally, over 900 putative primary types were discovered outside the Primary Type Collection. The completion of this project has improved access to the collection by enabling scientists and other users worldwide to search these collection holdings remotely and has facilitated staff in curation, research, collection management and funding proposals. A spindex is an important tool that is overlooked for planning and carrying out specimen level digitisation. This project is a case study for building a species-level index. A detailed protocol is provided, along with recommendations for other collections, including cost estimates and strategies for tracking progress and avoiding common obstacles.",2020-01-01 +28605773,"BioM2MetDisease: a manually curated database for associations between microRNAs, metabolites, small molecules and metabolic diseases. ","BioM2MetDisease is a manually curated database that aims to provide a comprehensive and experimentally supported resource of associations between metabolic diseases and various biomolecules. Recently, metabolic diseases such as diabetes have become one of the leading threats to people’s health. Metabolic disease associated with alterations of multiple types of biomolecules such as miRNAs and metabolites. An integrated and high-quality data source that collection of metabolic disease associated biomolecules is essential for exploring the underlying molecular mechanisms and discovering novel therapeutics. Here, we developed the BioM2MetDisease database, which currently documents 2681 entries of relationships between 1147 biomolecules (miRNAs, metabolites and small molecules/drugs) and 78 metabolic diseases across 14 species. Each entry includes biomolecule category, species, biomolecule name, disease name, dysregulation pattern, experimental technique, a brief description of metabolic disease-biomolecule relationships, the reference, additional annotation information etc. BioM2MetDisease provides a user-friendly interface to explore and retrieve all data conveniently. A submission page was also offered for researchers to submit new associations between biomolecules and metabolic diseases. BioM2MetDisease provides a comprehensive resource for studying biology molecules act in metabolic diseases, and it is helpful for understanding the molecular mechanisms and developing novel therapeutics for metabolic diseases. http://www.bio-bigdata.com/BioM2MetDisease/.",2017-01-01 +31950188,miR-TV: an interactive microRNA Target Viewer for microRNA and target gene expression interrogation for human cancer studies. ,"MicroRNAs (miRNAs) have been identified in many organisms, and they are essential for gene expression regulation in many critical cellular processes. The expression levels of these genes and miRNAs are closely associated with the progression of diseases such as cancers. Furthermore, survival analysis is a significant indicator for evaluating the criticality of these cellular processes in cancer progression. We established a web tool, miRNA Target Viewer (miR-TV), which integrates 5p-arm and 3p-arm miRNA expression profiles, mRNA target gene expression levels in healthy and cancer populations, and clinical data of cancer patients and their survival information. The developed miR-TV obtained miRNA-seq, mRNA-seq and clinical data from the Cancer Genome Atlas and potential miRNA target gene predictions from miRDB, targetScan and miRanda. The data presentation was implemented using the D3 javascript toolkit. The D3 toolkit is frequently used to provide an easy-to-use interactive interface. Our miR-TV provides a user-friendly and interactive interface, which can be beneficial for biomedical researchers to freely interrogate miRNA expression information and their potential target genes. We believe that such a data visualization bioinformatics tool is excellent for obtaining information from massive biological data. Database URL: http://mirtv.ibms.sinica.edu.tw.",2020-01-01 +31612915,MIBiG 2.0: a repository for biosynthetic gene clusters of known function.,"Fueled by the explosion of (meta)genomic data, genome mining of specialized metabolites has become a major technology for drug discovery and studying microbiome ecology. In these efforts, computational tools like antiSMASH have played a central role through the analysis of Biosynthetic Gene Clusters (BGCs). Thousands of candidate BGCs from microbial genomes have been identified and stored in public databases. Interpreting the function and novelty of these predicted BGCs requires comparison with a well-documented set of BGCs of known function. The MIBiG (Minimum Information about a Biosynthetic Gene Cluster) Data Standard and Repository was established in 2015 to enable curation and storage of known BGCs. Here, we present MIBiG 2.0, which encompasses major updates to the schema, the data, and the online repository itself. Over the past five years, 851 new BGCs have been added. Additionally, we performed extensive manual data curation of all entries to improve the annotation quality of our repository. We also redesigned the data schema to ensure the compliance of future annotations. Finally, we improved the user experience by adding new features such as query searches and a statistics page, and enabled direct link-outs to chemical structure databases. The repository is accessible online at https://mibig.secondarymetabolites.org/.",2020-01-01 +31691824,UCSC Genome Browser enters 20th year.,"The University of California Santa Cruz Genome Browser website (https://genome.ucsc.edu) enters its 20th year of providing high-quality genomics data visualization and genome annotations to the research community. In the past year, we have added a new option to our web BLAT tool that allows search against all genomes, a single-cell expression viewer (https://cells.ucsc.edu), a 'lollipop' plot display mode for high-density variation data, a RESTful API for data extraction and a custom-track backup feature. New datasets include Tabula Muris single-cell expression data, GeneHancer regulatory annotations, The Cancer Genome Atlas Pan-Cancer variants, Genome Reference Consortium Patch sequences, new ENCODE transcription factor binding site peaks and clusters, the Database of Genomic Variants Gold Standard Variants, Genomenon Mastermind variants and three new multi-species alignment tracks.",2020-01-01 +33313665,RCSB Protein Data Bank 1D Tools and Services. ,"Interoperability between polymer sequences and structural data is essential for providing a complete picture of protein and gene features and helping to understand biomolecular function. Herein, we present two resources designed to improve interoperability between the RCSB Protein Data Bank, the NCBI, and the UniProtKB data resources and visualize integrated data therefrom. The underlying tools provide a flexible means of mapping between the different coordinate spaces and an interactive tool allows convenient visualization of the 1-dimensional data over the web. https://1d-coordinates.rcsb.org and https://rcsb.github.io/rcsb-saguaro. Supplementary data are available at Bioinformatics online.",2020-12-12 +34050897,A Guideline-Based Decision Tree Achieves Better Glucose Control with Less Hypoglycemia at 3 Months in Chinese Diabetic Patients.,"

Introduction

China has the world's largest diabetes epidemic and has been facing a serious shortage of primary care providers for chronic diseases including diabetes. To help primary care physicians follow guidelines and mitigate the workload in primary care communities in China, we developed a guideline-based decision tree. This study aimed to validate it at 3 months with real-world data.

Methods

The decision tree was developed based on the 2017 Chinese Type 2 Diabetes (T2DM) guideline and 2018 guideline for primary care. It was validated with the data from two registry studies: the NEW2D and ORBIT studies. Patients' data were divided into two groups: the compliance and non-compliance group, depending on whether the physician's prescription was consistent with the decision tree or not. The primary outcome was the difference of change in HbA1c from baseline to 3 months between the two groups. The secondary outcomes included the difference in the proportion of patients achieving HbA1c < 7% at 3 months between the two groups, the incidence of self-reported hypoglycemia at 3 months, and the proportion of patients (baseline HbA1c ≥ 7%) with a HbA1c reduction ≥ 0.3%. The statistical analysis was performed using linear or logistic regression with inverse probability of treatment weighting with adjustments of confounding factors.

Results

There was a 0.9% reduction of HbA1c in the compliance group and a 0.8% reduction in the non-compliance group (P < 0.001); 61.1% of the participants in the compliance group and 44.3% of the participants in the non-compliance group achieved a HbA1c level < 7% at 3 months (P < 0.001). The hypoglycemic events occurred in 7.1% of patients in the compliance group vs. 9.4% in the non-compliance group (P < 0.001).

Conclusion

The decision tree can help physicians to treat their patients so that they achieve their glycemic targets with fewer hypoglycemic risks. ( http://www.clinicaltrials.gov NCT01525693 & NCT01859598).",2021-05-29 +33410471,"PM4NGS, a project management framework for next-generation sequencing data analysis. ","FAIR (Findability, Accessibility, Interoperability, and Reusability) next-generation sequencing (NGS) data analysis relies on complex computational biology workflows and pipelines to guarantee reproducibility, portability, and scalability. Moreover, workflow languages, managers, and container technologies have helped address the problem of data analysis pipeline execution across multiple platforms in scalable ways. Here, we present a project management framework for NGS data analysis called PM4NGS. This framework is composed of an automatic creation of a standard organizational structure of directories and files, bioinformatics tool management using Docker or Bioconda, and data analysis pipelines in CWL format. Pre-configured Jupyter notebooks with minimum Python code are included in PM4NGS to produce a project report and publication-ready figures. We present 3 pipelines for demonstration purposes including the analysis of RNA-Seq, ChIP-Seq, and ChIP-exo datasets. PM4NGS is an open source framework that creates a standard organizational structure for NGS data analysis projects. PM4NGS is easy to install, configure, and use by non-bioinformaticians on personal computers and laptops. It permits execution of the NGS data analysis on Windows 10 with the Windows Subsystem for Linux feature activated. The framework aims to reduce the gap between researcher in experimental laboratories producing NGS data and workflows for data analysis. PM4NGS documentation can be accessed at https://pm4ngs.readthedocs.io/.",2021-01-01 +34761331,Independently validated sex-specific nomograms for predicting survival in patients with newly diagnosed glioblastoma: NRG Oncology RTOG 0525 and 0825.,"

Background/purpose

Glioblastoma (GBM) is the most common primary malignant brain tumor. Sex has been shown to be an important prognostic factor for GBM. The purpose of this study was to develop and independently validate sex-specific nomograms for estimation of individualized GBM survival probabilities using data from 2 independent NRG Oncology clinical trials.

Methods

This analysis included information on 752 (NRG/RTOG 0525) and 599 (NRG/RTOG 0825) patients with newly diagnosed GBM. The Cox proportional hazard models by sex were developed using NRG/RTOG 0525 and significant variables were identified using a backward selection procedure. The final selected models by sex were then independently validated using NRG/RTOG 0825.

Results

Final nomograms were built by sex. Age at diagnosis, KPS, MGMT promoter methylation and location of tumor were common significant predictors of survival for both sexes. For both sexes, tumors in the frontal lobes had significantly better survival than tumors of multiple sites. Extent of resection, and use of corticosteroids were significant predictors of survival for males.

Conclusions

A sex specific nomogram that assesses individualized survival probabilities (6-, 12- and 24-months) for patients with GBM could be more useful than estimation of overall survival as there are factors that differ between males and females. A user friendly online application can be found here- https://npatilshinyappcalculator.shinyapps.io/SexDifferencesInGBM/ .",2021-11-10 +34099040,Psychometric validation and refinement of the Interoception Sensory Questionnaire (ISQ) in adolescents and adults on the autism spectrum.,"

Background

Individuals on the autism spectrum are reported to display alterations in interoception, the sense of the internal state of the body. The Interoception Sensory Questionnaire (ISQ) is a 20-item self-report measure of interoception specifically intended to measure this construct in autistic people. The psychometrics of the ISQ, however, have not previously been evaluated in a large sample of autistic individuals.

Methods

Using confirmatory factor analysis, we evaluated the latent structure of the ISQ in a large online sample of adults on the autism spectrum and found that the unidimensional model fit the data poorly. Using misspecification analysis to identify areas of local misfit and item response theory to investigate the appropriateness of the seven-point response scale, we removed redundant items and collapsed the response options to put forth a novel eight-item, five-response choice ISQ.

Results

The revised, five-response choice ISQ (ISQ-8) showed much improved fit while maintaining high internal reliability. Differential item functioning (DIF) analyses indicated that the items of the ISQ-8 were answered in comparable ways by autistic adolescents and adults and across multiple other sociodemographic groups.

Limitations

Our results were limited by the fact that we did not collect data for typically developing controls, preventing the analysis of DIF by diagnostic status. Additionally, while this study proposes a new 5-response scale for the ISQ-8, our data were not collected using this method; thus, the psychometric properties for the revised version of this instrument require further investigation.

Conclusion

The ISQ-8 shows promise as a reliable and valid measure of interoception in adolescents and adults on the autism spectrum, but additional work is needed to examine its psychometrics in this population. A free online score calculator has been created to facilitate the use of ISQ-8 latent trait scores for further studies of autistic adolescents and adults (available at https://asdmeasures.shinyapps.io/ISQ_score/ ).",2021-06-07 +31756036,Using MetaboAnalyst 4.0 for Comprehensive and Integrative Metabolomics Data Analysis.,"MetaboAnalyst (https://www.metaboanalyst.ca) is an easy-to-use web-based tool suite for comprehensive metabolomic data analysis, interpretation, and integration with other omics data. Since its first release in 2009, MetaboAnalyst has evolved significantly to meet the ever-expanding bioinformatics demands from the rapidly growing metabolomics community. In addition to providing a variety of data processing and normalization procedures, MetaboAnalyst supports a wide array of functions for statistical, functional, as well as data visualization tasks. Some of the most widely used approaches include PCA (principal component analysis), PLS-DA (partial least squares discriminant analysis), clustering analysis and visualization, MSEA (metabolite set enrichment analysis), MetPA (metabolic pathway analysis), biomarker selection via ROC (receiver operating characteristic) curve analysis, as well as time series and power analysis. The current version of MetaboAnalyst (4.0) features a complete overhaul of the user interface and significantly expanded underlying knowledge bases (compound database, pathway libraries, and metabolite sets). Three new modules have been added to support pathway activity prediction directly from mass peaks, biomarker meta-analysis, and network-based multi-omics data integration. To enable more transparent and reproducible analysis of metabolomic data, we have released a companion R package (MetaboAnalystR) to complement the web-based application. This article provides an overview of the main functional modules and the general workflow of MetaboAnalyst 4.0, followed by 12 detailed protocols: © 2019 by John Wiley & Sons, Inc. Basic Protocol 1: Data uploading, processing, and normalization Basic Protocol 2: Identification of significant variables Basic Protocol 3: Multivariate exploratory data analysis Basic Protocol 4: Functional interpretation of metabolomic data Basic Protocol 5: Biomarker analysis based on receiver operating characteristic (ROC) curves Basic Protocol 6: Time-series and two-factor data analysis Basic Protocol 7: Sample size estimation and power analysis Basic Protocol 8: Joint pathway analysis Basic Protocol 9: MS peaks to pathway activities Basic Protocol 10: Biomarker meta-analysis Basic Protocol 11: Knowledge-based network exploration of multi-omics data Basic Protocol 12: MetaboAnalystR introduction.",2019-12-01 +30476243,"STRING v11: protein-protein association networks with increased coverage, supporting functional discovery in genome-wide experimental datasets.","Proteins and their functional interactions form the backbone of the cellular machinery. Their connectivity network needs to be considered for the full understanding of biological phenomena, but the available information on protein-protein associations is incomplete and exhibits varying levels of annotation granularity and reliability. The STRING database aims to collect, score and integrate all publicly available sources of protein-protein interaction information, and to complement these with computational predictions. Its goal is to achieve a comprehensive and objective global network, including direct (physical) as well as indirect (functional) interactions. The latest version of STRING (11.0) more than doubles the number of organisms it covers, to 5090. The most important new feature is an option to upload entire, genome-wide datasets as input, allowing users to visualize subsets as interaction networks and to perform gene-set enrichment analysis on the entire input. For the enrichment analysis, STRING implements well-known classification systems such as Gene Ontology and KEGG, but also offers additional, new classification systems based on high-throughput text-mining as well as on a hierarchical clustering of the association network itself. The STRING resource is available online at https://string-db.org/.",2019-01-01 +30321400,EWASdb: epigenome-wide association study database.,"DNA methylation, the most intensively studied epigenetic modification, plays an important role in understanding the molecular basis of diseases. Furthermore, epigenome-wide association study (EWAS) provides a systematic approach to identify epigenetic variants underlying common diseases/phenotypes. However, there is no comprehensive database to archive the results of EWASs. To fill this gap, we developed the EWASdb, which is a part of 'The EWAS Project', to store the epigenetic association results of DNA methylation from EWASs. In its current version (v 1.0, up to July 2018), the EWASdb has curated 1319 EWASs associated with 302 diseases/phenotypes. There are three types of EWAS results curated in this database: (i) EWAS for single marker; (ii) EWAS for KEGG pathway and (iii) EWAS for GO (Gene Ontology) category. As the first comprehensive EWAS database, EWASdb has been searched or downloaded by researchers from 43 countries to date. We believe that EWASdb will become a valuable resource and significantly contribute to the epigenetic research of diseases/phenotypes and have potential clinical applications. EWASdb is freely available at http://www.ewas.org.cn/ewasdb or http://www.bioapp.org/ewasdb.",2019-01-01 +30933966,CRISPR screening using an expanded toolkit of autophagy reporters identifies TMEM41B as a novel autophagy factor.,"The power of forward genetics in yeast is the foundation on which the field of autophagy research firmly stands. Complementary work on autophagy in higher eukaryotes has revealed both the deep conservation of this process, as well as novel mechanisms by which autophagy is regulated in the context of development, immunity, and neuronal homeostasis. The recent emergence of new clustered regularly interspaced palindromic repeats/CRISPR-associated protein 9 (CRISPR/Cas9)-based technologies has begun facilitating efforts to define novel autophagy factors and pathways by forward genetic screening in mammalian cells. Here, we set out to develop an expanded toolkit of autophagy reporters amenable to CRISPR/Cas9 screening. Genome-wide screening of our reporters in mammalian cells recovered virtually all known autophagy-related (ATG) factors as well as previously uncharacterized factors, including vacuolar protein sorting 37 homolog A (VPS37A), transmembrane protein 251 (TMEM251), amyotrophic lateral sclerosis 2 (ALS2), and TMEM41B. To validate this data set, we used quantitative microscopy and biochemical analyses to show that 1 novel hit, TMEM41B, is required for phagophore maturation. TMEM41B is an integral endoplasmic reticulum (ER) membrane protein distantly related to the established autophagy factor vacuole membrane protein 1 (VMP1), and our data show that these two factors play related, albeit not fully overlapping, roles in autophagosome biogenesis. In sum, our work uncovers new ATG factors, reveals a malleable network of autophagy receptor genetic interactions, and provides a valuable resource (http://crispr.deniclab.com) for further mining of novel autophagy mechanisms.",2019-04-01 +34294084,Long non-coding RNA PSMA3-AS1 promotes glioma progression through modulating the miR-411-3p/HOXA10 pathway.,"

Background

Glioma is a common type of brain tumor and is classified as low and high grades according to morphology and molecules. Growing evidence has proved that long non-coding RNAs (lncRNAs) play pivotal roles in numerous tumors or diseases including glioma. Proteasome 20S subunit alpha 3 antisense RNA 1 (PSMA3-AS1), as a member of lncRNAs, has been disclosed to play a tumor-promoting role in cancer progression. However, the role of PSMA3-AS1 in glioma remains unknown. Therefore, we concentrated on researching the regulatory mechanism of PSMA3-AS1 in glioma.

Methods

PSMA3-AS1 expression was detected using RT-qPCR. Functional assays were performed to measure the effects of PSMA3-AS1 on glioma progression. After that, ENCORI ( http://starbase.sysu.edu.cn/ ) database was used to predict potential genes that could bind to PSMA3-AS1, and miR-411-3p was chosen for further studies. The interaction among PSMA3-AS1, miR-411-3p and homeobox A10 (HOXA10) were confirmed through mechanism assays.

Results

PSMA3-AS1 was verified to be up-regulated in glioma cells and promote glioma progression. Furthermore, PSMA3-AS1 could act as a competitive endogenous RNA (ceRNA) for miR-411-3p to regulate HOXA10 and thus affecting glioma progression.

Conclusion

PSMA3-AS1 stimulated glioma progression via the miR-411-3p/HOXA10 pathway, which might offer a novel insight for the therapy and treatment of glioma.",2021-07-22 +30371900,Ancestral Genomes: a resource for reconstructed ancestral genes and genomes across the tree of life.,"A growing number of whole genome sequencing projects, in combination with development of phylogenetic methods for reconstructing gene evolution, have provided us with a window into genomes that existed millions, and even billions, of years ago. Ancestral Genomes (http://ancestralgenomes.org) is a resource for comprehensive reconstructions of these 'fossil genomes'. Comprehensive sets of protein-coding genes have been reconstructed for 78 genomes of now-extinct species that were the common ancestors of extant species from across the tree of life. The reconstructed genes are based on the extensive library of over 15 000 gene family trees from the PANTHER database, and are updated on a yearly basis. For each ancestral gene, we assign a stable identifier, and provide additional information designed to facilitate analysis: an inferred name, a reconstructed protein sequence, a set of inferred Gene Ontology (GO) annotations, and a 'proxy gene' for each ancestral gene, defined as the least-diverged descendant of the ancestral gene in a given extant genome. On the Ancestral Genomes website, users can browse the Ancestral Genomes by selecting nodes in a species tree, and can compare an extant genome with any of its reconstructed ancestors to understand how the genome evolved.",2019-01-01 +32845473,Adverse Drug Events Observed with the Novel Sodium/Glucose Co-Transporter 2 Inhibitor Ipragliflozin for the Treatment of Patients with Type 2 Diabetes Mellitus: A Systematic Review and Meta-analysis of Randomized Studies.,"

Introduction

Type 2 diabetes mellitus (T2DM) is becoming a major issue worldwide. To effectively control the blood sugar of patients with T2DM, several novel oral hypoglycemic agents (OHAs) are being developed. Sodium/glucose co-transporter 2 (SGLT 2) inhibitors have recently shown beneficial outcomes in patients with T2DM. In this analysis, we aimed to systematically compare the adverse drug events observed with ipragliflozin versus placebo for the treatment of patients with T2DM.

Methods

http://www.ClinicalTrials.gov , the bibliographic database of life science and biomedical information MEDLINE, EMBASE and the Cochrane Central were searched for English publications satisfying the inclusion and exclusion criteria of this study. Adverse drug events were the end points in this analysis. The latest version (5.4) of the RevMan software was used to analyze the data, and risk ratios (RR) with 95% confidence intervals (CI) were used to represent the data post analysis.

Results

Eight randomized studies with a total of 1519 participants with T2DM were included in this analysis whereby total treatment-emergent adverse events (RR: 1.06, 95% CI: 0.96-1.16; P = 0.26), including mild (RR: 0.95, 95% CI: 0.79-1.13; P = 0.54), moderate (RR: 1.04, 95% CI: 0.72-1.51; P = 0.83) and severe treatment-emergent adverse events (RR: 0.72, 95% CI: 0.26-1.96; P = 0.52), were not significantly different in those patients who were assigned to ipragliflozin versus placebo for the treatment of T2DM. Moreover, drug-related adverse events (RR: 1.04, 95% CI: 0.69-1.58; P = 0.85), adverse events leading to drug discontinuation (RR: 1.09, 95% CI: 0.57-2.10; P = 0.79), urinary tract infection (RR: 1.03, 95% CI: 0.60-1.77; P = 0.91), naso-pharyngitis (RR: 0.54, 95% CI: 0.19-1.52; P = 0.25), constipation (RR: 1.94, 95% CI: 0.90-4.20; P = 0.09), dizziness (RR: 0.81, 95% CI: 0.20-3.23; P = 0.76), gastrointestinal disorders (RR: 0.96, 95% CI: 0.68-1.36; P = 0.82) and dehydration (RR: 2.26, 95% CI: 0.38-13.43; P = 0.37) were also not significantly different. However, genital infection (RR: 4.53, 95% CI: 1.48-13.85; P = 0.008) and hypoglycemia (RR: 1.68, 95% CI: 1.03-2.74; P = 0.04) rates were significantly higher in the ipragliflozin group.

Conclusions

The current analysis showed ipragliflozin to be associated with significantly higher genital infection rates compared to placebo, whereas no significant difference was observed compared to the other adverse drug events in these patients with T2DM. In addition, hypoglycemia was also not significantly different following sensitivity analysis.",2020-08-26 +31952477,Piphillin predicts metagenomic composition and dynamics from DADA2-corrected 16S rDNA sequences.,"BACKGROUND:Shotgun metagenomic sequencing reveals the potential in microbial communities. However, lower-cost 16S ribosomal RNA (rRNA) gene sequencing provides taxonomic, not functional, observations. To remedy this, we previously introduced Piphillin, a software package that predicts functional metagenomic content based on the frequency of detected 16S rRNA gene sequences corresponding to genomes in regularly updated, functionally annotated genome databases. Piphillin (and similar tools) have previously been evaluated on 16S rRNA data processed by the clustering of sequences into operational taxonomic units (OTUs). New techniques such as amplicon sequence variant error correction are in increased use, but it is unknown if these techniques perform better in metagenomic content prediction pipelines, or if they should be treated the same as OTU data in respect to optimal pipeline parameters. RESULTS:To evaluate the effect of 16S rRNA sequence analysis method (clustering sequences into OTUs vs amplicon sequence variant error correction into amplicon sequence variants (ASVs)) on the ability of Piphillin to predict functional metagenomic content, we evaluated Piphillin-predicted functional content from 16S rRNA sequence data processed through OTU clustering and error correction into ASVs compared to corresponding shotgun metagenomic data. We show a strong correlation between metagenomic data and Piphillin-predicted functional content resulting from both 16S rRNA sequence analysis methods. Differential abundance testing with Piphillin-predicted functional content exhibited a low false positive rate (< 0.05) while capturing a large fraction of the differentially abundant features resulting from corresponding metagenomic data. However, Piphillin prediction performance was optimal at different cutoff parameters depending on 16S rRNA sequence analysis method. Using data analyzed with amplicon sequence variant error correction, Piphillin outperformed comparable tools, for instance exhibiting 19% greater balanced accuracy and 54% greater precision compared to PICRUSt2. CONCLUSIONS:Our results demonstrate that raw Illumina sequences should be processed for subsequent Piphillin analysis using amplicon sequence variant error correction (with DADA2 or similar methods) and run using a 99% ID cutoff for Piphillin, while sequences generated on platforms other than Illumina should be processed via OTU clustering (e.g., UPARSE) and run using a 96% ID cutoff for Piphillin. Piphillin is publicly available for academic users (Piphillin server. http://piphillin.secondgenome.com/.).",2020-01-17 +,A spatio-temporal active-fire clustering approach for global burned area mapping at 250 m from MODIS data,"This paper presents the generation of a global burned area mapping algorithm using MODIS hotspots and near-infrared reflectance within ESA's Fire_cci project. The algorithm is based on a hybrid approach that combines MODIS highest resolution (250 m) near-infrared band and active fire information from thermal channels. The burned area is detected in two phases. In the first step, pixels with a high probability of being burned are selected in order to reduce commission errors. To do that, spatio-temporal active-fire clusters are created to determine adaptive thresholds. Finally, a contextual growing approach is applied from those pixels to the neighbouring area to fully detect the burned patch and reduce omission errors. The algorithm was used to obtain a time series of global burned area dataset (named FireCCI51), covering the 2001–2018 period. Validation based on 1200 sampled sites covering the period from 2003 to 2014 showed an average omission and commission errors of 67.1% and 54.4%. When using longer validation periods, the errors were found smaller (54.5% omission and 25.7% commission for the additional 1000 African sampled sites), which indicates that the product is negatively influenced by temporal reporting accuracy. The inter-comparison carried out with previous Fire_cci versions (FireCCI41 and FireCCI50), and NASA's standard burned area product (MCD64A1 c6) showed consistent spatial and temporal patterns. However, the new algorithm estimated an average BA of 4.63 Mkm², with a maximum of 5.19 Mkm² (2004) and a minimum of 3.94 Mkm² (in 2001), increasing current burned area estimations. Besides, the new product was found more sensitive to detect smaller burned patches. This new product, called FireCCI51, is publicly available at: http://cci.esa.int/data, last accessed on September 2019.",2020-01-01 +32611389,Proteus: An algorithm for proposing stabilizing mutation pairs based on interactions observed in known protein 3D structures.,"BACKGROUND:Protein engineering has many applications for industry, such as the development of new drugs, vaccines, treatment therapies, food, and biofuel production. A common way to engineer a protein is to perform mutations in functionally essential residues to optimize their function. However, the discovery of beneficial mutations for proteins is a complex task, with a time-consuming and high cost for experimental validation. Hence, computational approaches have been used to propose new insights for experiments narrowing the search space and reducing the costs. RESULTS:In this study, we developed Proteus (an acronym for Protein Engineering Supporter), a new algorithm for proposing mutation pairs in a target 3D structure. These suggestions are based on contacts observed in other known structures from Protein Data Bank (PDB). Proteus' basic assumption is that if a non-interacting pair of amino acid residues in the target structure is exchanged to an interacting pair, this could enhance protein stability. This trade is only allowed if the main-chain conformation of the residues involved in the contact is conserved. Furthermore, no steric impediment is expected between the proposed mutations and the surrounding protein atoms. To evaluate Proteus, we performed two case studies with proteins of industrial interests. In the first case study, we evaluated if the mutations suggested by Proteus for four protein structures enhance the number of inter-residue contacts. Our results suggest that most mutations proposed by Proteus increase the number of interactions into the protein. In the second case study, we used Proteus to suggest mutations for a lysozyme protein. Then, we compared Proteus' outcomes to mutations with available experimental evidence reported in the ProTherm database. Four mutations, in which our results agree with the experimental data, were found. This could be initial evidence that changes in the side-chain of some residues do not cause disturbances that harm protein structure stability. CONCLUSION:We believe that Proteus could be used combined with other methods to give new insights into the rational development of engineered proteins. Proteus user-friendly web-based tool is available at < http://proteus.dcc.ufmg.br >.",2020-07-01 +33564394,gprofiler2 -- an R package for gene list functional enrichment analysis and namespace conversion toolset g:Profiler. ,"g:Profiler ( https://biit.cs.ut.ee/gprofiler) is a widely used gene list functional profiling and namespace conversion toolset that has been contributing to reproducible biological data analysis already since 2007. Here we introduce the accompanying R package, gprofiler2, developed to facilitate programmatic access to g:Profiler computations and databases via REST API. The gprofiler2 package provides an easy-to-use functionality that enables researchers to incorporate functional enrichment analysis into automated analysis pipelines written in R. The package also implements interactive visualisation methods to help to interpret the enrichment results and to illustrate them for publications. In addition, gprofiler2 gives access to the versatile gene/protein identifier conversion functionality in g:Profiler enabling to map between hundreds of different identifier types or orthologous species. The gprofiler2 package is freely available at the CRAN repository.",2020-07-15 +34352654,Automatic multiclass intramedullary spinal cord tumor segmentation on MRI with deep learning.,"Spinal cord tumors lead to neurological morbidity and mortality. Being able to obtain morphometric quantification (size, location, growth rate) of the tumor, edema, and cavity can result in improved monitoring and treatment planning. Such quantification requires the segmentation of these structures into three separate classes. However, manual segmentation of three-dimensional structures is time consuming, tedious and prone to intra- and inter-rater variability, motivating the development of automated methods. Here, we tailor a model adapted to the spinal cord tumor segmentation task. Data were obtained from 343 patients using gadolinium-enhanced T1-weighted and T2-weighted MRI scans with cervical, thoracic, and/or lumbar coverage. The dataset includes the three most common intramedullary spinal cord tumor types: astrocytomas, ependymomas, and hemangioblastomas. The proposed approach is a cascaded architecture with U-Net-based models that segments tumors in a two-stage process: locate and label. The model first finds the spinal cord and generates bounding box coordinates. The images are cropped according to this output, leading to a reduced field of view, which mitigates class imbalance. The tumor is then segmented. The segmentation of the tumor, cavity, and edema (as a single class) reached 76.7 ± 1.5% of Dice score and the segmentation of tumors alone reached 61.8 ± 4.0% Dice score. The true positive detection rate was above 87% for tumor, edema, and cavity. To the best of our knowledge, this is the first fully automatic deep learning model for spinal cord tumor segmentation. The multiclass segmentation pipeline is available in the Spinal Cord Toolbox (https://spinalcordtoolbox.com/). It can be run with custom data on a regular computer within seconds.",2021-07-22 +31702008,Database Resources of the National Genomics Data Center in 2020.,"The National Genomics Data Center (NGDC) provides a suite of database resources to support worldwide research activities in both academia and industry. With the rapid advancements in higher-throughput and lower-cost sequencing technologies and accordingly the huge volume of multi-omics data generated at exponential scales and rates, NGDC is continually expanding, updating and enriching its core database resources through big data integration and value-added curation. In the past year, efforts for update have been mainly devoted to BioProject, BioSample, GSA, GWH, GVM, NONCODE, LncBook, EWAS Atlas and IC4R. Newly released resources include three human genome databases (PGG.SNV, PGG.Han and CGVD), eLMSG, EWAS Data Hub, GWAS Atlas, iSheep and PADS Arsenal. In addition, four web services, namely, eGPS Cloud, BIG Search, BIG Submission and BIG SSO, have been significantly improved and enhanced. All of these resources along with their services are publicly accessible at https://bigd.big.ac.cn.",2020-01-01 +34673265,A GO catalogue of human DNA-binding transcription factors.,"To control gene transcription, DNA-binding transcription factors recognise specific sequence motifs in gene regulatory regions. A complete and reliable GO annotation of all DNA-binding transcription factors is key to investigating the delicate balance of gene regulation in response to environmental and developmental stimuli. The need for such information is demonstrated by the many lists of transcription factors that have been produced over the past decade. The COST Action Gene Regulation Ensemble Effort for the Knowledge Commons (GREEKC) Consortium brought together experts in the field of transcription with the aim of providing high quality and interoperable gene regulatory data. The Gene Ontology (GO) Consortium provides strict definitions for gene product function, including factors that regulate transcription. The collaboration between the GREEKC and GO Consortia has enabled the application of those definitions to produce a new curated catalogue of over 1400 human DNA-binding transcription factors, that can be accessed at https://www.ebi.ac.uk/QuickGO/targetset/dbTF. This catalogue has facilitated an improvement in the GO annotation of human DNA-binding transcription factors and led to the GO annotation of almost sixty thousand DNA-binding transcription factors in over a hundred species. Thus, this work will aid researchers investigating the regulation of transcription in both biomedical and basic science.",2021-10-18 +28415075,HopBase: a unified resource for Humulus genomics. ,"Hop (Humulus lupulus L. var lupulus) is a dioecious plant of worldwide significance, used primarily for bittering and flavoring in brewing beer. Studies on the medicinal properties of several unique compounds produced by hop have led to additional interest from pharmacy and healthcare industries as well as livestock production as a natural antibiotic. Genomic research in hop has resulted a published draft genome and transcriptome assemblies. As research into the genomics of hop has gained interest, there is a critical need for centralized online genomic resources. To support the growing research community, we report the development of an online resource ""HopBase.org."" In addition to providing a gene annotation to the existing Shinsuwase draft genome, HopBase makes available genome assemblies and annotations for both the cultivar ""Teamaker"" and male hop accession number USDA 21422M. These genome assemblies, gene annotations, along with other common data, coupled with a genome browser and BLAST database enable the hop community to enter the genomic age. The HopBase genomic resource is accessible at http://hopbase.org and http://hopbase.cgrb.oregonstate.edu.",2017-01-01 +,"Phylogeny, character evolution and tribal classification in Crambinae and Scopariinae (Lepidoptera, Crambidae)","Crambinae (2047 spp.) and Scopariinae (577 spp.) are two major groups of pyraloid moths with a worldwide distribution. Their larvae feed predominantly on Poales and Bryophyta, with many cereal crop pests. We present the first molecular phylogeny of the two groups based on five nuclear genes and one mitochondrial gene (total = 4713 bp) sampled for 58 crambine species representing 56 genera and all tribes, 33 scopariine species representing 12 genera, and species in several other crambid lineages. Maximum likelihood and Bayesian analyses of the molecular data resolve suprageneric relationships in Crambinae and Scopariinae, whereas relationships between these and other subfamilies remain ambiguous. Crambinae and Scopariinae are each recovered as monophyletic groups, and Erupini, formerly regarded as an ingroup of Midilinae, is recovered as a possible sister group of Crambinae. The tree topology suggests the following two major changes within Crambinae: Prionapterygini Landry syn.n. of Ancylolomiini Ragonot stat. rev. and Myelobiini Minet syn.n. of Chiloini Heinemann. Argyriini Munroe is monophyletic after the transfer of Pseudocatharylla Bleszynski and Vaxi Bleszynski to Calamotrophini. Crambini, Diptychophorini and Haimbachiini are monophyletic after the exclusion of Ancylolomia Hübner, Euchromius Guenée, Micrelephas Dognin and Miyakea Marumo from Crambini, as well as Microchilo Okano from Diptychophorini. Euchromiini tribe n. is described for Euchromius. Microcramboides Bleszynski syn.n. and Tortriculladia Bleszynski syn.n. are synonymized with Microcrambus Bleszynski. In Scopariinae, Caradjaina Leraut syn.n. and Cholius Guenée syn.n. are synonymized with Scoparia Haworth, and, in addition, Dasyscopa Meyrick syn.n., Dipleurinodes Leraut syn.n. and Eudipleurina Leraut syn.n. are synonymized with Eudonia Billberg. Micraglossa melanoxantha (Turner) (Scoparia) comb.n. is proposed as a new combination. We analysed 27 morphological characters of wing venation, tympanal organs, male and female genitalia, as well as host plant data and egg‐laying behaviour. The ancestral character‐state reconstructions confirmed previous apomorphies and highlighted new apomorphies for some of the newly recovered clades. The derived, nonadhesive egg‐dropping behaviour is found to have evolved at least twice in Crambinae and is associated with the use of Pooideae as host plants. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:1A84282D‐930A‐4C32‐8340‐D681BFF27A12.",2019-10-01 +34448369,PrepFlow: A Toolkit for Chemical Library Preparation and Management for Virtual Screening.,"In the era of big data in Chemistry, the need for automated tools for virtual screening is compelling. Here, we present PrepFlow a toolkit for chemical library preparation and management. Starting from a list of compounds in SMILES or 2D molecular format, PrepFlow outputs a set of 3D molecular structures ready for use in subsequent drug discovery projects. Our development stands out for speed and robustness of execution, the efficient exploitation of HPC resources, and the implementation of an archiving strategy to save computer time, storage, and human intervention. Using a random selection of 600 compounds from available drug banks, we show that the preparation time per ligand on a desktop computer is 6.6 s. Thanks to these performances and the automatic parallelization on HPC, a chemical library of the size of ChEMBL (2 M) was prepared in around 3 days on a computer cluster. PrepFlow is freely distributed at the following link: https://ifm.chimie.unistra.fr/prepflow.",2021-08-27 +32730576,Co-phosphorylation networks reveal subtype-specific signaling modules in breast cancer.,"

Motivation

Protein phosphorylation is a ubiquitous mechanism of post-translational modification that plays a central role in cellular signaling. Phosphorylation is particularly important in the context of cancer, as downregulation of tumor suppressors and upregulation of oncogenes by the dysregulation of associated kinase and phosphatase networks are shown to have key roles in tumor growth and progression. Despite recent advances that enable large-scale monitoring of protein phosphorylation, these data are not fully incorporated into such computational tasks as phenotyping and subtyping of cancers.

Results

We develop a network-based algorithm, CoPPNet, to enable unsupervised subtyping of cancers using phosphorylation data. For this purpose, we integrate prior knowledge on evolutionary, structural and functional association of phosphosites, kinase-substrate associations and protein-protein interactions with the correlation of phosphorylation of phosphosites across different tumor samples (a.k.a co-phosphorylation) to construct a context-specific-weighted network of phosphosites. We then mine these networks to identify subnetworks with correlated phosphorylation patterns. We apply the proposed framework to two mass-spectrometry-based phosphorylation datasets for breast cancer (BC), and observe that (i) the phosphorylation pattern of the identified subnetworks are highly correlated with clinically identified subtypes, and (ii) the identified subnetworks are highly reproducible across datasets that are derived from different studies. Our results show that integration of quantitative phosphorylation data with network frameworks can provide mechanistic insights into the differences between the signaling mechanisms that drive BC subtypes. Furthermore, the reproducibility of the identified subnetworks suggests that phosphorylation can provide robust classification of disease response and markers.

Availability and implementation

CoPPNet is available at http://compbio.case.edu/coppnet/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +33620450,VoroCNN: Deep convolutional neural network built on 3D Voronoi tessellation of protein structures. ,"Effective use of evolutionary information has recently led to tremendous progress in computational prediction of three-dimensional (3D) structures of proteins and their complexes. Despite the progress, the accuracy of predicted structures tends to vary considerably from case to case. Since the utility of computational models depends on their accuracy, reliable estimates of deviation between predicted and native structures are of utmost importance. For the first time, we present a deep convolutional neural network (CNN) constructed on a Voronoi tessellation of 3D molecular structures. Despite the irregular data domain, our data representation allows us to efficiently introduce both convolution and pooling operations and train the network in an end-to-end fashion without precomputed descriptors. The resultant model, VoroCNN, predicts local qualities of 3D protein folds. The prediction results are competitive to state of the art and superior to the previous 3D CNN architectures built for the same task. We also discuss practical applications of VoroCNN, for example, in recognition of protein binding interfaces. The model, data, and evaluation tests are available at https://team.inria.fr/nano-d/software/vorocnn/. Supplementary data are available at Bioinformatics online.",2021-02-23 +34165491,MutaFrame - an interpretative visualization framework for deleteriousness prediction of missense variants in the human exome. ,"High-throughput experiments are generating ever increasing amounts of various -omics data, so shedding new light on the link between human disorders, their genetic causes, and the related impact on protein behavior and structure. While numerous bioinformatics tools now exist that predict which variants in the human exome cause diseases, few tools predict the reasons why they might do so. Yet, understanding the impact of variants at the molecular level is a prerequisite for the rational development of targeted drugs or personalized therapies. We present the updated MutaFrame webserver, which aims to meet this need. It offers two deleteriousness prediction softwares, DEOGEN2 and SNPMuSiC, and is designed for bioinformaticians and medical researchers who want to gain insights into the origins of monogenic diseases. It contains information at two levels for each human protein: its amino acid sequence and its 3-dimensional structure; we used the experimental structures whenever available, and modeled structures otherwise. MutaFrame also includes higher-level information, such as protein essentiality and protein-protein interactions. It has a user-friendly interface for the interpretation of results and a convenient visualization system for protein structures, in which the variant positions introduced by the user and other structural information are shown. In this way, MutaFrame aids our understanding of the pathogenic processes caused by single-site mutations and their molecular and contextual interpretation. Mutaframe webserver at http://mutaframe.com/. Supplementary data is available at Bioinformatics online.",2021-06-24 +34349788,IntSplice2: Prediction of the Splicing Effects of Intronic Single-Nucleotide Variants Using LightGBM Modeling.,"Prediction of the effect of a single-nucleotide variant (SNV) in an intronic region on aberrant pre-mRNA splicing is challenging except for an SNV affecting the canonical GU/AG splice sites (ss). To predict pathogenicity of SNVs at intronic positions -50 (Int-50) to -3 (Int-3) close to the 3' ss, we developed light gradient boosting machine (LightGBM)-based IntSplice2 models using pathogenic SNVs in the human gene mutation database (HGMD) and ClinVar and common SNVs in dbSNP with 0.01 ≤ minor allelic frequency (MAF) < 0.50. The LightGBM models were generated using features representing splicing cis-elements. The average recall/sensitivity and specificity of IntSplice2 by fivefold cross-validation (CV) of the training dataset were 0.764 and 0.884, respectively. The recall/sensitivity of IntSplice2 was lower than the average recall/sensitivity of 0.800 of IntSplice that we previously made with support vector machine (SVM) modeling for the same intronic positions. In contrast, the specificity of IntSplice2 was higher than the average specificity of 0.849 of IntSplice. For benchmarking (BM) of IntSplice2 with IntSplice, we made a test dataset that was not used to train IntSplice. After excluding the test dataset from the training dataset, we generated IntSplice2-BM and compared it with IntSplice using the test dataset. IntSplice2-BM was superior to IntSplice in all of the seven statistical measures of accuracy, precision, recall/sensitivity, specificity, F1 score, negative predictive value (NPV), and matthews correlation coefficient (MCC). We made the IntSplice2 web service at https://www.med.nagoya-u.ac.jp/neurogenetics/IntSplice2.",2021-07-19 +34335757,All-Trans Retinoic Acid Enhances Chemosensitivity to 5-FU by Targeting miR-378c/E2F7 Axis in Colorectal Cancer.,"Colorectal carcinoma (CRC), a life-threatening malignancy, has been found to present resistance to 5-fluorouracil (5-FU) and cause a poor prognosis for patients. Previous studies have proved that all-trans retinoic acid (ATRA) could inhibit the development of CRC cells. In addition, miR-378c was discovered to exert a vital role in various cancers. In this study, we utilized MTT (3-(4,5-dimethylthiazol-2-yl)-2,5-diphenyltetrazolium bromide), transwell assay, and flow cytometry to confirm that ATRA was able to enhance the inhibitory effects of 5-FU on HCT116 cells effectively by promoting cell apoptosis. Then, ENCORI database (http://starbase.sysu.edu.cn/) was employed to predict that miR-378c was downregulated dramatically in CRC and E2F7 was the direct target of miR-378c. QRT-PCR (quantitative real-time polymerase chain reaction) was conducted to verify that the expression level of miR-378c was decreased while E2F7 expression was upregulated in CRC tissues compared with para-carcinoma tissues. Additionally, treatment of 5-FU combined with ATRA could increase miR-378c expression, whereas it decreased the expression of E2F7. Dual-Luciferase Reporter assay results revealed that miR-378c could regulate the load of E2F7 by binding to its 3'UTR directly. Furthermore, miR-378c inhibitor or vector with E2F7 partially counteracted the effects of 5-FU combined with ATRA on viability, migration, invasion, and apoptosis of HCT116 cells. In conclusion, our study aims to confirm that ATRA enhances chemosensitivity to 5-FU of patients with CRC and expound the potential molecular mechanisms.",2021-07-19 +31690707,Next-Generation Sequence Databases: RNA and Genomic Informatics Resources for Plants.,"We developed public web sites and resources for data access, display, and analysis of plant small RNAs. These web sites are interconnected with related data types. The current generation of these informatics tools was developed for Illumina data, evolving over more than 15 years of improvements. Our online databases have customized web interfaces to uniquely handle and display RNA-derived data from diverse plant species, ranging from Arabidopsis (Arabidopsis thaliana) to wheat (Triticum spp.), including many crop and model species. The web interface displays the abundance and genomic context of data for small RNAs, parallel analysis of RNA ends/degradome reads, RNA sequencing, and even chromatin immunoprecipitation sequencing data; it also provides information about potentially novel transcripts (antisense transcripts, alternative splice isoforms, and regulatory intergenic transcripts). Numerous options are included for downloading data as tables or via web services. Interpretation of these data is facilitated by the inclusion of extensive repeat or transposon data in our genome viewer. We have developed graphical and analytical tools, including a new viewer and a query page for the analysis of phased small RNAs; these are particularly useful for understanding the complex small RNA pathways of plants. These public databases are accessible at https://mpss.danforthcenter.org.",2019-11-05 +27899584,CATH: an expanded resource to predict protein function through structure and sequence.,"The latest version of the CATH-Gene3D protein structure classification database has recently been released (version 4.1, http://www.cathdb.info). The resource comprises over 300 000 domain structures and over 53 million protein domains classified into 2737 homologous superfamilies, doubling the number of predicted protein domains in the previous version. The daily-updated CATH-B, which contains our very latest domain assignment data, provides putative classifications for over 100 000 additional protein domains. This article describes developments to the CATH-Gene3D resource over the last two years since the publication in 2015, including: significant increases to our structural and sequence coverage; expansion of the functional families in CATH; building a support vector machine (SVM) to automatically assign domains to superfamilies; improved search facilities to return alignments of query sequences against multiple sequence alignments; the redesign of the web pages and download site.",2016-11-28 +32897080,METATRYP v 2.0: Metaproteomic Least Common Ancestor Analysis for Taxonomic Inference Using Specialized Sequence Assemblies-Standalone Software and Web Servers for Marine Microorganisms and Coronaviruses.,"We present METATRYP version 2 software that identifies shared peptides across the predicted proteomes of organisms within environmental metaproteomics studies to enable accurate taxonomic attribution of peptides during protein inference. Improvements include ingestion of complex sequence assembly data categories (metagenomic and metatranscriptomic assemblies, single cell amplified genomes, and metagenome assembled genomes), prediction of the least common ancestor (LCA) for a peptide shared across multiple organisms, increased performance through updates to the backend architecture, and development of a web portal (https://metatryp.whoi.edu). Major expansion of the marine METATRYP database with predicted proteomes from environmental sequencing confirms a low occurrence of shared tryptic peptides among disparate marine microorganisms, implying tractability for targeted metaproteomics. METATRYP was designed to facilitate ocean metaproteomics and has been integrated into the Ocean Protein Portal (https://oceanproteinportal.org); however, it can be readily applied to other domains. We describe the rapid deployment of a coronavirus-specific web portal (https://metatryp-coronavirus.whoi.edu/) to aid in use of proteomics on coronavirus research during the ongoing pandemic. A coronavirus-focused METATRYP database identified potential SARS-CoV-2 peptide biomarkers and indicated very few shared tryptic peptides between SARS-CoV-2 and other disparate taxa analyzed, sharing <1% peptides with taxa outside of the betacoronavirus group, establishing that taxonomic specificity is achievable using tryptic peptide-based proteomic diagnostic approaches.",2020-09-23 +33031965,An Online Calculator for Predicting Academic Career Trajectory in Neurosurgery in the United States.,"

Objective

Determining factors that predict a career in academic neurosurgery can help to improve neurosurgical training and faculty mentoring efforts. Although many academic career predictors have been established in the literature, no method has yet been developed to allow for individualized predictions of an academic career trajectory. The objective of the present study was to develop a Web-based calculator for predicting the probability of a career in academic neurosurgery.

Methods

The present study used data from neurosurgeons listed in the American Association of Neurological Surgeons database. A logistic regression model was used to predict probability of an academic career, and bootstrapping with 2000 samples was used to calculate an optimism-corrected C-statistic. P < 0.05 was considered statistically significant.

Results

A total of 1818 neurosurgeons were included in our analysis. Most surgeons were male (89.7%) and employed in nonacademic positions (60.2%). Factors independently associated with an academic career were female sex, attending a residency program affiliated with a top 10 U.S. News medical school, attaining a Doctor of Philosophy (PhD) degree, attaining a Master of Science (MS) degree, higher h-index during residency, more months of protected research time during residency, and completing a clinical fellowship. Our final model had an optimism-corrected C-statistic of 0.74. This model was incorporated into a Web-based calculator (https://neurooncsurgery.shinyapps.io/academic_calculator/).

Conclusions

The present study consolidates previous research investigating neurosurgery career predictors into a simple, open-access tool. Our work may serve to better clarify the many factors influencing trainees' likelihood of pursuing a career in academic neurosurgery.",2020-10-05 +34596533,Incomplete Multiple Kernel Alignment Maximization for Clustering. ,"Multiple kernel alignment (MKA) maximization criterion has been widely applied into multiple kernel clustering (MKC) and many variants have been recently developed. Though demonstrating superior clustering performance in various applications, it is observed that none of them can effectively handle incomplete MKC, where parts or all of the pre-specified base kernel matrices are incomplete. To address this issue, we propose to integrate the imputation of incomplete kernel matrices and MKA maximization for clustering into a unified learning framework. The clustering of MKA maximization guides the imputation of incomplete kernel elements, and the completed kernel matrices are in turn combined to conduct the subsequent MKC. These two procedures are alternately performed until convergence. By this way, the imputation and MKC processes are seamlessly connected, with the aim to achieve better clustering performance. Besides theoretically analyzing the clustering generalization error bound, we empirically evaluate the clustering performance on five multiple kernel learning (MKL) benchmark datasets, and the results indicate the superiority of our algorithm over existing state-of-the-art counterparts. Our codes and data are publicly available at \url{https://xinwangliu.github.io/}.",2021-10-01 +34793516,Influence of adiposity and physical activity on the cardiometabolic association pattern of lipoprotein subclasses to aerobic fitness in prepubertal children.,"Aerobic fitness (AF) and lipoprotein subclasses associate to each other and to cardiovascular health. Adiposity and physical activity (PA) influence the association pattern of AF to lipoproteins almost inversely making it difficult to assess their independent and joint influence on the association pattern. This study, including 841 children (50% boys) 10.2 ± 0.3 years old with BMI 18.0 ± 3.0 kg/m2 from rural Western Norway, aimed at examining the association pattern of AF to the lipoprotein subclasses and to estimate the independent and joint influence of PA and adiposity on this pattern. We used multivariate analysis to determine the association pattern of a profile of 26 lipoprotein features to AF with and without adjustment for three measures of adiposity and a high-resolution PA descriptor of 23 intensity intervals derived from accelerometry. For data not adjusted for adiposity or PA, we observed a cardioprotective lipoprotein pattern associating to AF. This pattern withstood adjustment for PA, but the strength of association to AF was reduced by 58%, while adjustment for adiposity weakened the association of AF to the lipoproteins by 85% and with strongest changes in the associations to a cardioprotective high-density lipoprotein subclass pattern. When adjusted for both adiposity and PA, the cardioprotective lipoprotein pattern still associated to AF, but the strength of association was reduced by 90%. Our results imply that the (negative) influence of adiposity on the cardioprotective association pattern of lipoproteins to AF is considerably stronger than the (positive) contribution of PA to this pattern. However, our analysis shows that PA contributes also indirectly through a strong inverse association to adiposity. The trial was registered 7 May, 2014 in clinicaltrials.gov with trial reg. no.: NCT02132494 and the URL is https://clinicaltrials.gov/ct2/results?term=NCT02132494&cntry=NO.",2021-11-18 +33553526,Polycyclic aromatic hydrocarbons and stable isotopes of carbon and nitrogen in Baltic Sea blue mussels: Time series data 1981-2016.,"Blue mussels are a target species in contaminant monitoring regarding Polycyclic Aromatic Hydrocarbons (PAHs) in biota, and also used as an isotope baseline for trophic position assessment in other biota. The latter is crucial for calculating biomagnification potential of environmental contaminants. This data set comprises long-term time series of PAHs (15 individual substances) in Baltic Sea blue mussels (Mytilus trossulus edulis) from Kvädöfjärden (collected from a depth of 5-10 m), a reference area along the Swedish coast in the Baltic Proper from 25 years during 1987-2016, and of stable isotopes in five individuals (2 cm mussels) per year during the time period 1981-2017. The data has been co-analysed with environmental (oceanographic) data in ""The importance of adjusting contaminant concentrations using environmental data: a retrospective study of 25 years data in Baltic blue mussels"" published in: Science of the Total Environment (https://doi.org/10.1016/j.scitotenv.2020.143913).",2021-01-18 +34889147,"Effects of reversible cognitive frailty on disability, quality of life, depression, and hospitalization: a prospective cohort study.","

Objectives

Cognitive frailty, a potentially reversible condition describing the concurrence of physical frailty and mild cognitive impairment (MCI), has been recently proposed to incorporate subjective cognitive decline (SCD), a reversible pre-MCI state with more readily available cognitive reserve, as well as pre-physical frailty. Reversible cognitive frailty has been associated with dementia and mortality. We aimed to examine the association of reversible cognitive frailty with other adverse outcomes including disability, poor quality of life (QOL), depression, and hospitalization.

Methods

This was a cohort study with 1-year follow-up among 735 Chinese community-dwelling older adults with intact cognition. Reversible cognitive frailty was operationalized with the presence of pre-physical or physical frailty identified by the Frailty Phenotype and SCD identified by the simplified SCD questionnaire including four self-report cognitive domains of memory, naming, orientation, and mathematical reasoning. Adverse outcomes included incident Activities of Daily Living (ADL)-Instrumental ADL (IADL) disability, poor physical, mental and overall QOL, depression, and hospitalization over 1-year follow-up.

Results

The prevalence of reversible cognitive frailty was 27.8%. Participants with reversible cognitive frailty had higher risk of the incidence of ADL-IADL disability, poor physical QOL, poor mental QOL, poor overall QOL, and depression (Odds Ratios: 1.67-4.38, P < 0.05), but not higher risk of hospitalization over 1-year follow-up.

Conclusion

Reversible cognitive frailty was not uncommon and associated with incident disability, poor QOL, and depression among community-dwelling older adults. Early identification of reversible cognitive frailty can facilitate targeted interventions and may promote independence in older adults.Supplemental data for this article is available online at http://dx.doi.org/10.1080/13607863.2021.2011835.",2021-12-10 +31725863,GRONS: a comprehensive genetic resource of nicotine and smoking. ,"Nicotine, the primary psychoactive component in tobacco, can exert a broad impact on both the central and peripheral nervous systems. During the past years, a tremendous amount of efforts has been put to exploring the molecular mechanisms underlying tobacco smoking related behaviors and diseases, and many susceptibility genes have been identified via various genomic approaches. For many human complex diseases, there is a trend towards collecting and integrating the data from genetic studies and the biological information related to them into a comprehensive resource for further investigation, but we have not found such an effort for nicotine addiction or smoking-related phenotypes yet. To collect, curate, and integrate cross-platform genetic data so as to make them interpretable and easily accessible, we developed Genetic Resources Of Nicotine and Smoking (GRONS), a comprehensive database for genes related to biological response to nicotine exposure, tobacco smoking related behaviors or diseases. GRONS deposits genes from nicotine addiction studies in the following four categories, i.e. association study, genome-wide linkage scan, expression analysis on genes/proteins via high-throughput technologies, as well as single gene/protein-based experimental studies via literature search. Moreover, GRONS not only provides tools for data browse, search and graphical presentation of gene prioritization, but also presents the results from comprehensive bioinformatics analyses for the prioritized genes associated with nicotine addiction. With more and more genetic data and analysis tools integrated, GRONS will become a useful resource for studies focusing on nicotine addiction or tobacco smoking. Database URL: http://bioinfo.tmu.edu.cn/GRONS/.",2017-01-01 +31874631,"JCDB: a comprehensive knowledge base for Jatropha curcas, an emerging model for woody energy plants.","

Background

Jatropha curcas is an oil-bearing plant, and has seeds with high oil content (~ 40%). Several advantages, such as easy genetic transformation and short generation duration, have led to the emergence of J. curcas as a model for woody energy plants. With the development of high-throughput sequencing, the genome of Jatropha curcas has been sequenced by different groups and a mass of transcriptome data was released. How to integrate and analyze these omics data is crucial for functional genomics research on J. curcas.

Results

By establishing pipelines for processing novel gene identification, gene function annotation, and gene network construction, we systematically integrated and analyzed a series of J. curcas transcriptome data. Based on these data, we constructed a J. curcas database (JCDB), which not only includes general gene information, gene functional annotation, gene interaction networks, and gene expression matrices but also provides tools for browsing, searching, and downloading data, as well as online BLAST, the JBrowse genome browser, ID conversion, heatmaps, and gene network analysis tools.

Conclusions

JCDB is the most comprehensive and well annotated knowledge base for J. curcas. We believe it will make a valuable contribution to the functional genomics study of J. curcas. The database is accessible at http://jcdb.xtbg.ac.cn.",2019-12-24 +32786900,Bioactive Conformational Ensemble Server and Database. A Public Framework to Speed Up In Silico Drug Discovery.,"Modern high-throughput structure-based drug discovery algorithms consider ligand flexibility, but typically with low accuracy, which results in a loss of performance in the derived models. Here we present the bioactive conformational ensemble (BCE) server and its associated database. The server creates conformational ensembles of drug-like ligands and stores them in the BCE database, where a variety of analyses are offered to the user. The workflow implemented in the BCE server combines enhanced sampling molecular dynamics with self-consistent reaction field quantum mechanics (SCRF/QM) calculations. The server automatizes all of the steps to transform one-dimensional (1D) or 2D representation of drugs into 3D molecules, which are then titrated, parametrized, hydrated, and optimized before being subjected to Hamiltonian replica-exchange (HREX) molecular dynamics simulations. Ensembles are collected and subjected to a clustering procedure to derive representative conformers, which are then analyzed at the SCRF/QM level of theory. All structural data are organized in a noSQL database accessible through a graphical interface and in a programmatic manner through a REST API. The server allows the user to define a private workspace and offers a deposition protocol as well as input files for ""in house"" calculations in those cases where confidentiality is a must. The database and the associated server are available at https://mmb.irbbarcelona.org/BCE.",2020-09-01 +33932985,SPEAQeasy: a scalable pipeline for expression analysis and quantification for R/bioconductor-powered RNA-seq analyses.,"

Background

RNA sequencing (RNA-seq) is a common and widespread biological assay, and an increasing amount of data is generated with it. In practice, there are a large number of individual steps a researcher must perform before raw RNA-seq reads yield directly valuable information, such as differential gene expression data. Existing software tools are typically specialized, only performing one step-such as alignment of reads to a reference genome-of a larger workflow. The demand for a more comprehensive and reproducible workflow has led to the production of a number of publicly available RNA-seq pipelines. However, we have found that most require computational expertise to set up or share among several users, are not actively maintained, or lack features we have found to be important in our own analyses.

Results

In response to these concerns, we have developed a Scalable Pipeline for Expression Analysis and Quantification (SPEAQeasy), which is easy to install and share, and provides a bridge towards R/Bioconductor downstream analysis solutions. SPEAQeasy is portable across computational frameworks (SGE, SLURM, local, docker integration) and different configuration files are provided ( http://research.libd.org/SPEAQeasy/ ).

Conclusions

SPEAQeasy is user-friendly and lowers the computational-domain entry barrier for biologists and clinicians to RNA-seq data processing as the main input file is a table with sample names and their corresponding FASTQ files. The goal is to provide a flexible pipeline that is immediately usable by researchers, regardless of their technical background or computing environment.",2021-05-01 +27899678,Virus Variation Resource - improved response to emergent viral outbreaks.,"The Virus Variation Resource is a value-added viral sequence data resource hosted by the National Center for Biotechnology Information. The resource is located at http://www.ncbi.nlm.nih.gov/genome/viruses/variation/ and includes modules for seven viral groups: influenza virus, Dengue virus, West Nile virus, Ebolavirus, MERS coronavirus, Rotavirus A and Zika virus Each module is supported by pipelines that scan newly released GenBank records, annotate genes and proteins and parse sample descriptors and then map them to controlled vocabulary. These processes in turn support a purpose-built search interface where users can select sequences based on standardized gene, protein and metadata terms. Once sequences are selected, a suite of tools for downloading data, multi-sequence alignment and tree building supports a variety of user directed activities. This manuscript describes a series of features and functionalities recently added to the Virus Variation Resource.",2016-11-28 +31638900,"Genetic diversity and population structure of the sweet leaf herb, Stevia rebaudiana B., cultivated and landraces germplasm assessed by EST-SSRs genotyping and steviol glycosides phenotyping.","BACKGROUND:Stevia rebaudiana (Asteraceae), native from Paraguay, accumulates steviol glycosides (SGs) into its leaves. These compounds exhibit acaloric intense sweet taste which answers to consumer demands for reducing daily sugar intake. Despite the developpement of S. rebaudiana cultivation all over the world, the development of new cultivars is very recent, in particular due to a colossal lack of (1) germplasm collection and breeding, (2) studies on genetic diversity and its structuring, (3) genomic tools. RESULTS:In this study, we developped 18 EST-SSR from 150,258 EST from The Compositae Genome Project of UC Davis ( http://compgenomics.ucdavis.edu/data/ ). We genotyped 145 S. rebaudiana individuals, issued from thirty-one cultivars and thirty-one landraces of various origins worldwide. Markers polymorphic information content (PIC) ranged between 0.60 and 0.84. An average of 12 alleles per locus and a high observed heterozygoty of 0.69 could be observed. The landraces revealed twice as many private alleles as cultivars. The genotypes could be clustered into 3 genetic populations. The landraces were grouped in the same cluster in which the oldest cultivars ""Eirete"" and ""MoritaIII"" type are also found. The other two clusters only include cultivated genotypes. One of them revealed an original genetic variability. SG phenotypes could not discriminate the three genetic clusters but phenotyping showed a wide range of composition in terms of bitter to sweet SGs. CONCLUSION:This is the first study of genetic diversity in Stevia rebaudiana involving 145 genotypes, including known cultivars as well as landrace populations of different origin. This study pointed out the structuration of S. rebaudiana germplasm and the resource of the landrace populations for genetic improvement, even on the trait of SG's composition.",2019-10-21 +31675495,Cellular Senescence: Defining a Path Forward.,"Cellular senescence is a cell state implicated in various physiological processes and a wide spectrum of age-related diseases. Recently, interest in therapeutically targeting senescence to improve healthy aging and age-related disease, otherwise known as senotherapy, has been growing rapidly. Thus, the accurate detection of senescent cells, especially in vivo, is essential. Here, we present a consensus from the International Cell Senescence Association (ICSA), defining and discussing key cellular and molecular features of senescence and offering recommendations on how to use them as biomarkers. We also present a resource tool to facilitate the identification of genes linked with senescence, SeneQuest (available at http://Senequest.net). Lastly, we propose an algorithm to accurately assess and quantify senescence, both in cultured cells and in vivo.",2019-10-01 +34887491,Randomized trial of two artificial intelligence coaching interventions to increase physical activity in cancer survivors.,"Physical activity (PA) has numerous health benefits. Personalized coaching may increase adherence to PA recommendations, but it is challenging to deliver personalized coaching in a scalable manner. The objective of our study was to determine whether novel artificially intelligent (AI) coaching interventions increase PA among overweight or obese, physically inactive cancer survivors compared to a control arm that receives health information. We conducted a single-center, three-arm randomized trial with equal allocation to (1) voice-assisted AI coaching delivered by smart speaker (MyCoach), (2) autonomous AI coaching delivered by text message (SmartText), and (3) control. Data collection was automated via sensors and voice technology, effectively masking outcome ascertainment. The primary outcome was change in mean steps per day from baseline to the end of follow-up at 4 weeks. Of the 42 randomized participants, 91% were female, and 36% were Black; mean age was 62.1 years, and mean BMI was 32.9 kg/m2. The majority were breast cancer survivors (85.7%). At the end of 4 weeks follow-up, steps increased in the MyCoach arm by an average of 3618.2 steps/day; the net gain in this arm was significantly greater [net difference = 3568.9 steps/day (95% CI: 1483-5655), P value <0.001] compared to control arm, and [net difference = 2160.6 steps/day (95% CI: 11-4310), P value 0.049] compared to SmartText. In conclusion, AI-based voice-assisted coaching shows promise as a practical method of delivering scalable, individualized coaching to increase physical activity in sedentary cancer survivors. Additional research is needed to replicate these findings in a broader population of cancer survivors and to investigate the effects of these interventions in the general population.ClinicalTrials.gov Identifier: NCT03212079, July 11, 2017, https://clinicaltrials.gov/ct2/show/NCT03212079 .",2021-12-09 +34508973,UICPC: Centrality-based clustering for scRNA-seq data analysis without user input.,"scRNA-seq data analysis enables new possibilities for identification of novel cells, specific characterization of known cells and study of cell heterogeneity. The performance of most clustering methods especially developed for scRNA-seq is greatly influenced by user input. We propose a centrality-clustering method named UICPC and compare its performance with 9 state-of-the-art clustering methods on 11 real-world scRNA-seq datasets to demonstrate its effectiveness and usefulness in discovering cell groups. Our method does not require user input. However, it requires settings of threshold, which are benchmarked after performing extensive experiments. We observe that most compared approaches show poor performance due to high heterogeneity and large dataset dimensions. However, UICPC shows excellent performance in terms of NMI, Purity and ARI, respectively. UICPC is available as an R package and can be downloaded by clicking the link https://sites.google.com/view/hussinchowdhury/software.",2021-09-03 +34896348,Predicting High-Value Care Outcomes After Surgery for Non-Skull Base Meningiomas.,"

Objective

A need exists to better understand the prognostic factors that influence high-value care outcomes after meningioma surgery. The goal of the present study was to develop predictive models to determine the patients at risk of experiencing an extended hospital length of stay (LOS), nonroutine discharge disposition, and/or a 90-day hospital readmission after non-skull base meningioma resection.

Methods

In the present study, we analyzed the data from 396 patients who had undergone surgical resection of non-skull base meningiomas at a single institution between January 1, 2005 and December 31, 2020. The Mann-Whitney U test was used for bivariate analysis of the continuous variables and the Fisher exact test for bivariate analysis of the categorical variables. A multivariate analysis was conducted using logistic regression models.

Results

Most patients had had a falcine or parasagittal meningioma (66.2%), with the remainder having convexity (31.8%) or intraventricular (2.0%) tumors. Nonelective surgery (P < 0.0001) and an increased tumor volume (P = 0.0022) were significantly associated with a LOS >4 days on multivariate analysis. The independent predictors of a nonroutine discharge disposition included male sex (P = 0.0090), nonmarried status (P = 0.024), nonelective surgery (P = 0.0067), tumor location within the parasagittal or intraventricular region (P = 0.0084), and an increased modified frailty index score (P = 0.039). Hospital readmission within 90 days was independently associated with nonprivate insurance (P = 0.010) and nonmarried status (P = 0.0081). Three models predicting for a prolonged LOS, nonroutine discharge disposition, and 90-day readmission were implemented in the form of an open-access, online calculator (available at: https://neurooncsurgery3.shinyapps.io/non_skull_base_meningiomas/).

Conclusions

After external validation, our open-access, online calculator could be useful for assessing the likelihood of adverse postoperative outcomes for patients undergoing surgery of non-skull base meningioma.",2021-12-08 +34734968,Density-based binning of gene clusters to infer function or evolutionary history using GeneGrouper. ,"Identifying variant forms of gene clusters of interest in phylogenetically proximate and distant taxa can help to infer their evolutionary histories and functions. Conserved gene clusters may differ by only a few genes, but these small differences can in turn induce substantial phenotypes, such as by the formation of pseudogenes or insertions interrupting regulation. Particularly as microbial genomes and metagenomic assemblies become increasingly abundant, unsupervised grouping of similar, but not necessarily identical, gene clusters into consistent bins can provide a population-level understanding of their gene content variation and functional homology. We developed GeneGrouper, a command-line tool that uses a density-based clustering method to group gene clusters into bins. GeneGrouper demonstrated high recall and precision in benchmarks for the detection of the 23-gene Salmonella enterica LT2 Pdu gene cluster and four-gene Pseudomonas aeruginosa PAO1 Mex gene cluster among 435 genomes spanning mixed taxa. In a subsequent application investigating the diversity and impact of gene-complete and -incomplete LT2 Pdu gene clusters in 1130 S. enterica genomes, GeneGrouper identified a novel, frequently occurring pduN pseudogene. When investigated in vivo, introduction of the pduN pseudogene negatively impacted microcompartment formation. We next demonstrated the versatility of GeneGrouper by clustering distant homologous gene clusters and variable gene clusters found in integrative and conjugative elements. GeneGrouper software and code are publicly available at https://pypi.org/project/GeneGrouper/. Supplementary data are available at Bioinformatics online.",2021-11-04 +31006799,"A comprehensive reference transcriptome resource for the Iberian ribbed newt Pleurodeles waltl, an emerging model for developmental and regeneration biology.","Urodele newts have unique biological properties, notably including prominent regeneration ability. The Iberian ribbed newt, Pleurodeles waltl, is a promising model amphibian distinguished by ease of breeding and efficient transgenic and genome editing methods. However, limited genetic information is available for P. waltl. We conducted an intensive transcriptome analysis of P. waltl using RNA-sequencing to build and annotate gene models. We generated 1.2 billion Illumina reads from a wide variety of samples across 12 different tissues/organs, unfertilized egg, and embryos at eight different developmental stages. These reads were assembled into 1,395,387 contigs, from which 202,788 non-redundant ORF models were constructed. The set is expected to cover a large fraction of P. waltl protein-coding genes, as confirmed by BUSCO analysis, where 98% of universal single-copy orthologs were identified. Ortholog analyses revealed the gene repertoire evolution of urodele amphibians. Using the gene set as a reference, gene network analysis identified regeneration-, developmental-stage-, and tissue-specific co-expressed gene modules. Our transcriptome resource is expected to enhance future research employing this emerging model animal for regeneration research as well as for investigations in other areas including developmental biology, stem cell biology, and cancer research. These data are available via our portal website, iNewt (http://www.nibb.ac.jp/imori/main/).",2019-06-01 +33411784,Optimal tuning of weighted kNN- and diffusion-based methods for denoising single cell genomics data.,"The analysis of single-cell genomics data presents several statistical challenges, and extensive efforts have been made to produce methods for the analysis of this data that impute missing values, address sampling issues and quantify and correct for noise. In spite of such efforts, no consensus on best practices has been established and all current approaches vary substantially based on the available data and empirical tests. The k-Nearest Neighbor Graph (kNN-G) is often used to infer the identities of, and relationships between, cells and is the basis of many widely used dimensionality-reduction and projection methods. The kNN-G has also been the basis for imputation methods using, e.g., neighbor averaging and graph diffusion. However, due to the lack of an agreed-upon optimal objective function for choosing hyperparameters, these methods tend to oversmooth data, thereby resulting in a loss of information with regard to cell identity and the specific gene-to-gene patterns underlying regulatory mechanisms. In this paper, we investigate the tuning of kNN- and diffusion-based denoising methods with a novel non-stochastic method for optimally preserving biologically relevant informative variance in single-cell data. The framework, Denoising Expression data with a Weighted Affinity Kernel and Self-Supervision (DEWÄKSS), uses a self-supervised technique to tune its parameters. We demonstrate that denoising with optimal parameters selected by our objective function (i) is robust to preprocessing methods using data from established benchmarks, (ii) disentangles cellular identity and maintains robust clusters over dimension-reduction methods, (iii) maintains variance along several expression dimensions, unlike previous heuristic-based methods that tend to oversmooth data variance, and (iv) rarely involves diffusion but rather uses a fixed weighted kNN graph for denoising. Together, these findings provide a new understanding of kNN- and diffusion-based denoising methods. Code and example data for DEWÄKSS is available at https://gitlab.com/Xparx/dewakss/-/tree/Tjarnberg2020branch.",2021-01-07 +30045691,CIGene: a literature-based online resource for cancer initiation genes.,"

Background

Cancer initiation genes (CIGs) are genes that can directly promote cell proliferation or induce cancer. There are thousands of published studies identifying various CIGs; however, no systematic collection or description is available.

Results

To construct a CIG reference for genetic screening, we have collected 177 human genes curated from 1507 PubMed abstracts. To facilitate data queries and browsing, the identified CIGs along with extensive bioinformatic annotations were stored in an online database called CIGene. Initial functional analysis revealed an overlooked role for cell motility in cancer initiation. Subsequent cross-referencing of known tumor suppressor genes and oncogenes against the 177 CIGs identified 96 and 81 CIGs with and without known oncogenic roles, respectively. Successive network analyses of all 177 CIGs determined that the two groups of genes were more likely to link within their group. The distinct molecular functions for these groups were also confirmed with functional studies. While the 96 known oncogenic genes had fundamental roles in gene regulation and signaling, the remaining 81 genes possessed more ancillary functions, such enhancer binding. Further network and mutational analysis of the 96 known oncogenic genes revealed that mutations in these genes were highly prevalent in multiple cancers. By focusing on breast cancer, we found that 32 of the 96 genes with mutations in breast cancers were significantly associated with patient survival.

Conclusions

As the first literature-based online resource for CIGs, CIGene will serve as a useful gateway for the systematic analysis of cancer initiation. CIGene is freely available to all academic users at http://soft.bioinfo-minzhao.org/cigene/ .",2018-07-25 +34387143,"Links between Virginity Beliefs, Negative Feelings after Virginity Loss and Sexual Performance Anxiety in a Sample of German-Speaking Heterosexual-Identified Women and Men.","Gender norms can influence women and men adopting different beliefs toward their own virginity. The current online cross-sectional questionnaire study was applied in a sample of German-speaking heterosexual-identified women (n = 536) and men (n = 181; Mage = 23.6, SD = 3.7). In men negative virginity loss experiences and sexual performance anxiety were especially prevalent when virginity loss occurred at an age that was inconsistent with men's virginity beliefs. In women age at virginity loss was not linked to virginity loss experiences or sexual performance anxiety, but the holding of virginity beliefs that deviated from gender norms was associated with those variables.Supplemental data for this article is available online at https://doi.org/10.1080/0092623X.2021.1958963 .",2021-08-13 +33001472,The effect of immunonutrition in patients with acute pancreatitis: a systematic review and meta-analysis.,"

Background

The effect of immunonutrition is controversial compared to standard supplementation with respect to the management of patients with acute pancreatitis.

Methods

An online literature search on four databases (PubMed, Cochrane, Embase and Web of Science) was performed to identify all of the randomised controlled trials assessing the effects of enteral or parenteral immunonutrition in acute pancreatitis. A fixed or random effects model was chosen using revman, version 5.3 (https://revman.cochrane.org). The count data were analysed using the risk ratio (RR) and 95% confidence interval (CI).

Results

Five hundred and sixty-eight patients were included via our search in which 14 articles matched our criteria for enrolling the meta-analysis. Immunonutrition significantly reduced the risk of organ failure (RR = 0.42; 95% CI = 0.26-0.70, P = 0.0008), infectious complications (RR = 0.78; 95% CI = 0.62-0.99; P = 0.04) and mortality (RR = 0.37; 95% CI = 0.21-0.66; P = 0.006). Length of hospital stay was also shorter in patients who received immunonutrition (mean difference = -1.73 days; 95% CI = -2.36 to -1.10; P < 0.00001). Total interventions of patients were decreased (RR = 0.73; 95% CI = 0.55-0.97; P = 0.03). Body mass index in patients with immunonutrition was reduced more than standard nutrition (mean difference = -2.00; 95% CI = -3.96 to -0.04; P = 0.05).

Conclusions

Immunonutrition support such as glutamine and ω-3 fatty acids is potentially beneficial with respect to improving clinical outcomes in patients with acute pancreatitis.",2020-10-01 +33558695,Transcriptome-scale spatial gene expression in the human dorsolateral prefrontal cortex.,"We used the 10x Genomics Visium platform to define the spatial topography of gene expression in the six-layered human dorsolateral prefrontal cortex. We identified extensive layer-enriched expression signatures and refined associations to previous laminar markers. We overlaid our laminar expression signatures on large-scale single nucleus RNA-sequencing data, enhancing spatial annotation of expression-driven clusters. By integrating neuropsychiatric disorder gene sets, we showed differential layer-enriched expression of genes associated with schizophrenia and autism spectrum disorder, highlighting the clinical relevance of spatially defined expression. We then developed a data-driven framework to define unsupervised clusters in spatial transcriptomics data, which can be applied to other tissues or brain regions in which morphological architecture is not as well defined as cortical laminae. Last, we created a web application for the scientific community to explore these raw and summarized data to augment ongoing neuroscience and spatial transcriptomics research ( http://research.libd.org/spatialLIBD ).",2021-02-08 +34459349,Difficulties with Learning Musculoskeletal Physical Examination Skills: Student Perspectives and General Lessons Learned for Curricular Design.,"Phenomenon: The development of foundational clinical skills, such as physical examination, is essential to becoming a competent clinician. Musculoskeletal medicine is often considered a specialized area of practice despite the high prevalence of musculoskeletal conditions in the general population and presenting to general clinical practices. Prior work has shown that medical learners and practicing clinicians have low confidence in these skills but understanding of the student perspective on why these skills are more difficult to acquire is unclear.Approach: Our study was guided by social constructivist learning theory to explore the learner experience and present their perspectives. Qualitative analysis investigated the difference between learning musculoskeletal physical examination versus other body systems, using the voices from 11 semi-structured focus group interviews. Participants included third-year medical students across two academic cohorts at one institution. Our analysis was grounded in the principles of phenomenology and used triangulation and reflexivity to provide rigorous analysis.Findings: Students provided rich and insightful perspectives regarding their experiences in learning musculoskeletal physical examination techniques. Four themes were developed from our data: a) the need for opportunities for both supervised and self-directed practice; b) assessment and competence as motivations for learning; c) the need for a different approach to the content and structure of musculoskeletal medicine and its associated examination techniques; and d) the need for distinct expertise and technical skill from musculoskeletal examination teachers.Insights: This study provides a valuable lens to critically reflect on existing curriculum and pedagogical approaches to musculoskeletal examination skills. Lessons from this study may be applicable to curriculum design in general, especially the teaching of physical examination skills, such as how it is taught and integrated with other content (including anatomy), how much practice is required, who teaches physical examination skills, and what faculty development is needed to standardize teaching. Promoting a learner-centered approach to the teaching and learning of these clinical skills will be beneficial to all stakeholders, especially to our future physicians and their patients.Supplemental data for this article is available online at https://doi.org/10.1080/10401334.2021.1954930 .",2021-08-28 +30424756,X-search: an open access interface for cross-cohort exploration of the National Sleep Research Resource.,"

Background

The National Sleep Research Resource (NSRR) is a large-scale, openly shared, data repository of de-identified, highly curated clinical sleep data from multiple NIH-funded epidemiological studies. Although many data repositories allow users to browse their content, few support fine-grained, cross-cohort query and exploration at study-subject level. We introduce a cross-cohort query and exploration system, called X-search, to enable researchers to query patient cohort counts across a growing number of completed, NIH-funded studies in NSRR and explore the feasibility or likelihood of reusing the data for research studies.

Methods

X-search has been designed as a general framework with two loosely-coupled components: semantically annotated data repository and cross-cohort exploration engine. The semantically annotated data repository is comprised of a canonical data dictionary, data sources with a data dictionary, and mappings between each individual data dictionary and the canonical data dictionary. The cross-cohort exploration engine consists of five modules: query builder, graphical exploration, case-control exploration, query translation, and query execution. The canonical data dictionary serves as the unified metadata to drive the visual exploration interfaces and facilitate query translation through the mappings.

Results

X-search is publicly available at https://www.x-search.net/ with nine NSRR datasets consisting of over 26,000 unique subjects. The canonical data dictionary contains over 900 common data elements across the datasets. X-search has received over 1800 cross-cohort queries by users from 16 countries.

Conclusions

X-search provides a powerful cross-cohort exploration interface for querying and exploring heterogeneous datasets in the NSRR data repository, so as to enable researchers to evaluate the feasibility of potential research studies and generate potential hypotheses using the NSRR data.",2018-11-13 +34738573,Epidemiology of Tick-borne encephalitis in North-Eastern Italy (2017-2020): international insights from national notification reports.,"Sir, Italy is usually considered as a low-risk country for Tick Borne Encephalitis (TBE), a potentially severe disorder caused by the flavivirus TBE virus (TBEV) [1,2]. Endemicity for TBEV is historically restricted to the North-Eastern Regions of ""Triveneto"" (i.e. autonomous provinces of Trento [APT] and Bolzano [APB], and the regions of Veneto, and Friuli-Venezia-Giulia; total area 39,875.87 km2; total population 7,163,418 inhabitants according to 2020 census), with a notification rate estimated in 0.38 cases per 100,000 during the time period 2000-2013 [1,3].   Even though national estimates are substantially below the cut-off value of 5 cases per 100,000 that recommend active vaccination policies for the general population [1,2,4], overall incidence is on the rise, mirroring the pan-European trend [5,6]. The causes reasonably include a mixture of environmental (e.g. climate changes) and behavioral factors that eventually increase the likelihood of human interactions with a competent vector (i.e. Ixodes spp) from areas where the pathogen highly circulating in appropriate hosts (i.e. rodents and ungulates) [3,4].   In this regard, we think that a retrospective analysis of annual reports from Italian National Health Institute (Istituto Superiore di Sanità, or ISS; https://www.epicentro.iss.it/arbovirosi/bollettini) [7] may shed some insights on the ongoing epidemiology of TBEV in Italy.   According to official figures, a total of 103 Italian cases occurred between 2017 and 2020, 100 of them in the Triveneto, with a pooled incidence rate (IR) of 0.35 per 100,000 [95%CI 0.28-0.42] (Figure 1). Annual estimates peaked in 2018 (0.54 per 100,000 [95%CI 0.39-0.74]), but overall figures remain quite low, in particular when compared to nearby countries likewise Austria (399 cases, mean IR 1.51 per 100,000) or Slovenia (366 cases, mean IR 4.61 per 100,000), and Switzerland (377 cases reported in 2018 alone; crude IR 4.41 per 100,000) [8]. However, such figures require some comments.   Firstly, ISS bulletins report only on TBE cases characterized by meningitis and/or encephalitis, similarly to the figures reported by Austria and Slovenia, while Switzerland authorities usually report on all newly diagnosed infections, irrespective of complained symptoms [7,8]. In facts, only 20% to 30% of all TBEV infections usually evolve in CNS involvement [1,2]. Second, the mandatory reporting systems reportedly failed to recall a large share of patients (up to 45%) if hospital discharge data were not appropriately integrated [2]. In other words, it is reasonable that Italian figures may largely underestimate actual epidemiology of TBEV, particularly for the endemic areas of Triveneto [1,2]. Supposing a dropout rate of 45%, and assuming that TBE cases with CNS impairment would represent no more than 30% of actual TBEV infections, actual Italian burden between 2017 and 2020 may be estimated to 152 cases/year (95%CI 59.7-243.3) for Triveneto alone, with an IR equals to 2.8 per 100,000, i.e. an estimate approximating aforementioned figures for Switzerland in 2018.   Third, pooled Italian figures mask something alike ""a tale of two stories"". On the one hand, during the time period 2018 - 2020, Veneto, Friuli-Venezia-Giulia, and APB, exhibited incidence rates were alike the overall estimates for 2000 - 2013 (Figure 1). For example, IR for Friuli-Venezia-Giulia was 0.14 per 100,000 [95%CI 0.04-0.32], with a corresponding incidence rate ratio (IRR) of 0.44 [95%CI 0.19-1.01] compared to overall figures for 2000-2013. On the other hand, despite the active vaccination campaigns put in place by the local Authorities [4], estimates for the APT peaked to 1.96 cases per 100,000 [95%CI 1.34-2.77], with an IRR equals to 5.63 [95% 4.02-7.76]. As available evidence suggests that the majority of APT cases are clustered in some foci of hyperendemicity for TBEV-infected ticks [1,9], a possible explanation for these results may be found through the ""one health approach"", i.e. by summarizing environmental data with evidence from human and veterinary medicine. In facts, the mountainous territories of the APT, have become a popular holiday destination for Italian and foreigner tourists [4], but mostly represent appropriate habitats for both tick vectors and usual hosts for TBEV, and particularly ungulates [9]. Interestingly, their number remained substantially stable in the APT until the 2005, roughly doubling in the following decade [10]. That lead to increasingly interactions between humans and wild animals, and such a trend was somewhat mirrored by the increasing occurrence of TBEV infections [1,3,4,9].   Finally, the TBEV strain isolated in APT is only distantly related to the those from other areas of Triveneto, rather belonging to the TBEV-Eu subtype that is highly circulating in Central Europe since 2012 [9]. TBEV-Eu has been identified in migratory birds, including those hosted on their route by the forests of APT. If the migratory birds are the key player in the spreading of TBEV-Eu across Europe, their migration could in turn explain the heterogeneity of APT compared not only to bordering countries of Austria and Slovenia, but also to the nearby APB. In turn, such features suggest that TBEV-Eu could rapidly spread even in areas not usually associated with TBEV endemicity, not only in Italy but also in Southern Europe.   Therefore, Italian data stress the potentially extensive underestimation for TBEV infections, at least in North-Eastern region, and the significance of TBEV-Eu strain in the epidemiology of TBE emphasize the importance for appropriate surveillance of TBE cases, also in terms of genetic analysis.",2021-11-03 +34141842,"Data on the present and future distribution of suitable niches of the black vanilla orchid (Nigritella nigra s.l., Orchidaceae) and its pollinators.","The black vanilla orchid (Nigritella nigra s.l.) is a perennial plant found in the main European mountain ranges. It occurs in large numbers in the Alps, but it has become a rare and endangered species in Scandinavia due to the loss of suitable habitats. Here we present occurrence data on the occurrence of N. nigra s.l. and pollinators of this species which were used to evaluate the impact of climate change on the future distribution of the black vanilla orchid and its pollen vectors. Moreover, the values of bioclimatic variables for each locality are provided. The binary distribution models of both, orchids and insects, created using ecological niche modeling (ENM) technique are presented together with the information about changes in the coverage of suitable niches of studied organisms. Our data were used to evaluate the impact of climate change on orchid and its pollinator (https://doi.org/10.1016/j.gecco.2021.e01560) and datasets can be reused in other research on past and future distribution of suitable niches of the black vanilla orchid and its pollinators as well as in other biogeographical studies. Moreover, presented outcomes of research can be useful in establishing conservation plans for montane orchids and their pollinators.",2021-05-30 +34095222,HEMNMA-3D: Cryo Electron Tomography Method Based on Normal Mode Analysis to Study Continuous Conformational Variability of Macromolecular Complexes.,"Cryogenic electron tomography (cryo-ET) allows structural determination of biomolecules in their native environment (in situ). Its potential of providing information on the dynamics of macromolecular complexes in cells is still largely unexploited, due to the challenges of the data analysis. The crowded cell environment and continuous conformational changes of complexes make difficult disentangling the data heterogeneity. We present HEMNMA-3D, which is, to the best of our knowledge, the first method for analyzing cryo electron subtomograms in terms of continuous conformational changes of complexes. HEMNMA-3D uses a combination of elastic and rigid-body 3D-to-3D iterative alignments of a flexible 3D reference (atomic structure or electron microscopy density map) to match the conformation, orientation, and position of the complex in each subtomogram. The elastic matching combines molecular mechanics simulation (Normal Mode Analysis of the 3D reference) and experimental, subtomogram data analysis. The rigid-body alignment includes compensation for the missing wedge, due to the limited tilt angle of cryo-ET. The conformational parameters (amplitudes of normal modes) of the complexes in subtomograms obtained through the alignment are processed to visualize the distribution of conformations in a space of lower dimension (typically, 2D or 3D) referred to as space of conformations. This allows a visually interpretable insight into the dynamics of the complexes, by calculating 3D averages of subtomograms with similar conformations from selected (densest) regions and by recording movies of the 3D reference's displacement along selected trajectories through the densest regions. We describe HEMNMA-3D and show its validation using synthetic datasets. We apply HEMNMA-3D to an experimental dataset describing in situ nucleosome conformational variability. HEMNMA-3D software is available freely (open-source) as part of ContinuousFlex plugin of Scipion V3.0 (http://scipion.i2pc.es).",2021-05-19 +32576650,GapMind: Automated Annotation of Amino Acid Biosynthesis. ,"GapMind is a Web-based tool for annotating amino acid biosynthesis in bacteria and archaea (http://papers.genomics.lbl.gov/gaps). GapMind incorporates many variant pathways and 130 different reactions, and it analyzes a genome in just 15 s. To avoid error-prone transitive annotations, GapMind relies primarily on a database of experimentally characterized proteins. GapMind correctly handles fusion proteins and split proteins, which often cause errors for best-hit approaches. To improve GapMind's coverage, we examined genetic data from 35 bacteria that grow in defined media without amino acids, and we filled many gaps in amino acid biosynthesis pathways. For example, we identified additional genes for arginine synthesis with succinylated intermediates in Bacteroides thetaiotaomicron, and we propose that Dyella japonica synthesizes tyrosine from phenylalanine. Nevertheless, for many bacteria and archaea that grow in minimal media, genes for some steps still cannot be identified. To help interpret potential gaps, GapMind checks if they match known gaps in related microbes that can grow in minimal media. GapMind should aid the identification of microbial growth requirements.IMPORTANCE Many microbes can make all of the amino acids (the building blocks of proteins). In principle, we should be able to predict which amino acids a microbe can make, and which it requires as nutrients, by checking its genome sequence for all of the necessary genes. However, in practice, it is difficult to check for all of the alternative pathways. Furthermore, new pathways and enzymes are still being discovered. We built an automated tool, GapMind, to annotate amino acid biosynthesis in bacterial and archaeal genomes. We used GapMind to list gaps: cases where a microbe makes an amino acid but a complete pathway cannot be identified in its genome. We used these gaps, together with data from mutants, to identify new pathways and enzymes. However, for most bacteria and archaea, we still do not know how they can make all of the amino acids.",2020-06-23 +33974106,"Natural history, trajectory, and management of mechanically ventilated COVID-19 patients in the United Kingdom.","

Purpose

The trajectory of mechanically ventilated patients with coronavirus disease 2019 (COVID-19) is essential for clinical decisions, yet the focus so far has been on admission characteristics without consideration of the dynamic course of the disease in the context of applied therapeutic interventions.

Methods

We included adult patients undergoing invasive mechanical ventilation (IMV) within 48 h of intensive care unit (ICU) admission with complete clinical data until ICU death or discharge. We examined the importance of factors associated with disease progression over the first week, implementation and responsiveness to interventions used in acute respiratory distress syndrome (ARDS), and ICU outcome. We used machine learning (ML) and Explainable Artificial Intelligence (XAI) methods to characterise the evolution of clinical parameters and our ICU data visualisation tool is available as a web-based widget ( https://www.CovidUK.ICU ).

Results

Data for 633 adults with COVID-19 who underwent IMV between 01 March 2020 and 31 August 2020 were analysed. Overall mortality was 43.3% and highest with non-resolution of hypoxaemia [60.4% vs17.6%; P < 0.001; median PaO2/FiO2 on the day of death was 12.3(8.9-18.4) kPa] and non-response to proning (69.5% vs.31.1%; P < 0.001). Two ML models using weeklong data demonstrated an increased predictive accuracy for mortality compared to admission data (74.5% and 76.3% vs 60%, respectively). XAI models highlighted the increasing importance, over the first week, of PaO2/FiO2 in predicting mortality. Prone positioning improved oxygenation only in 45% of patients. A higher peak pressure (OR 1.42[1.06-1.91]; P < 0.05), raised respiratory component (OR 1.71[ 1.17-2.5]; P < 0.01) and cardiovascular component (OR 1.36 [1.04-1.75]; P < 0.05) of the sequential organ failure assessment (SOFA) score and raised lactate (OR 1.33 [0.99-1.79]; P = 0.057) immediately prior to application of prone positioning were associated with lack of oxygenation response. Prone positioning was not applied to 76% of patients with moderate hypoxemia and 45% of those with severe hypoxemia and patients who died without receiving proning interventions had more missed opportunities for prone intervention [7 (3-15.5) versus 2 (0-6); P < 0.001]. Despite the severity of gas exchange deficit, most patients received lung-protective ventilation with tidal volumes less than 8 mL/kg and plateau pressures less than 30cmH2O. This was despite systematic errors in measurement of height and derived ideal body weight.

Conclusions

Refractory hypoxaemia remains a major association with mortality, yet evidence based ARDS interventions, in particular prone positioning, were not implemented and had delayed application with an associated reduced responsiveness. Real-time service evaluation techniques offer opportunities to assess the delivery of care and improve protocolised implementation of evidence-based ARDS interventions, which might be associated with improvements in survival.",2021-05-11 +34386083,Relationship between p53 status and the bioeffect of ionizing radiation.,"Radiotherapy is widely used in the clinical treatment of cancer patients and it may be used alone or in combination with surgery or chemotherapy to inhibit tumor development. However, radiotherapy may at times not kill all cancer cells completely, as certain cells may develop radioresistance that counteracts the effects of radiation. The emergence of radioresistance is associated with the genetic background and epigenetic regulation of cells. p53 is an important tumor suppressor gene that is expressed at low levels in cells. However, when cells are subjected to stress-induced stimulation, the expression level of p53 increases, thereby preventing genomic disruption. This mechanism has important roles in maintaining cell stability and inhibiting carcinogenesis. However, mutation and deletion destroy the anticancer function of p53 and may induce carcinogenesis. In tumor radiotherapy, the status of p53 expression in cancer cells has a close relationship with radiotherapeutic efficacy. Therefore, understanding how p53 expression affects the cellular response to radiation is of great significance for solving the problem of radioresistance and improving radiotherapeutic outcomes. For the present review, the literature was searched for studies published between 1979 and 2021 using the PubMed database (https://pubmed.ncbi.nlm.nih.gov/) with the following key words: Wild-type p53, mutant-type p53, long non-coding RNA, microRNA, gene mutation, radioresistance and radiosensitivity. From the relevant studies retrieved, the association between different p53 mutants and cellular radiosensitivity, as well as the molecular mechanisms of p53 affecting the radiosensitivity of cells, were summarized. The aim of the present study was to provide useful information for understanding and resolving radioresistance, to help clinical researchers develop more accurate treatment strategies and to improve radiotherapeutic outcomes for cancer patients with p53 mutations.",2021-07-14 +30381914,MedPServer: A database for identification of therapeutic targets and novel leads pertaining to natural products.,"Natural products have been the source of treatment for various human diseases from time immemorial. Interests in natural product-based scaffolds for the discovery of modern drugs have grown in recent years. However, research on exploring the traditional medicinal systems for modern therapeutics is severely limited due to our incomplete understanding of the therapeutic mechanism of action. One possible solution is to develop computational approaches, based on ligand- and structure-based screening tools, for fast and plausible target identification, leading to elucidation of the therapeutic mechanism. In the present work, we present two methods based on shape-based and pharmacophore search to predict targets of natural products and elucidate their mechanism, and to identify natural product-based leads. These methods were tested on an in-house developed database of medicinal plants that include information from a largely unexplored North-East region of India, known as one of the twelve mega biodiversity regions. However, depending on the choice of the lead molecules, any existing databases can be used for screening. MedPServer is an open access resource available at http://bif.uohyd.ac.in/medserver/.",2018-11-28 +29846728,Chemical-Induced Phenotypes at CTD Help Inform the Predisease State and Construct Adverse Outcome Pathways.,"The Comparative Toxicogenomics Database (CTD; http://ctdbase.org) is a public resource that manually curates the scientific literature to provide content that illuminates the molecular mechanisms by which environmental exposures affect human health. We introduce our new chemical-phenotype module that describes how chemicals can affect molecular, cellular, and physiological phenotypes. At CTD, we operationally distinguish between phenotypes and diseases, wherein a phenotype refers to a nondisease biological event: eg, decreased cell cycle arrest (phenotype) versus liver cancer (disease), increased fat cell proliferation (phenotype) versus morbid obesity (disease), etc. Chemical-phenotype interactions are expressed in a formal structured notation using controlled terms for chemicals, phenotypes, taxon, and anatomical descriptors. Combining this information with CTD's chemical-disease module allows inferences to be made between phenotypes and diseases, yielding potential insight into the predisease state. Integration of all 4 CTD modules furnishes unique opportunities for toxicologists to generate computationally predictive adverse outcome pathways, linking chemical-gene molecular initiating events with phenotypic key events, adverse diseases, and population-level health outcomes. As examples, we present 3 diverse case studies discerning the effect of vehicle emissions on altered leukocyte migration, the role of cadmium in influencing phenotypes preceding Alzheimer disease, and the connection of arsenic-induced glucose metabolic phenotypes with diabetes. To date, CTD contains over 165 000 interactions that connect more than 6400 chemicals to 3900 phenotypes for 760 anatomical terms in 215 species, from over 19 000 scientific articles. To our knowledge, this is the first comprehensive set of manually curated, literature-based, contextualized, chemical-induced, nondisease phenotype data provided to the public.",2018-09-01 +32393257,MyoMiner: explore gene co-expression in normal and pathological muscle.,"

Background

High-throughput transcriptomics measures mRNA levels for thousands of genes in a biological sample. Most gene expression studies aim to identify genes that are differentially expressed between different biological conditions, such as between healthy and diseased states. However, these data can also be used to identify genes that are co-expressed within a biological condition. Gene co-expression is used in a guilt-by-association approach to prioritize candidate genes that could be involved in disease, and to gain insights into the functions of genes, protein relations, and signaling pathways. Most existing gene co-expression databases are generic, amalgamating data for a given organism regardless of tissue-type.

Methods

To study muscle-specific gene co-expression in both normal and pathological states, publicly available gene expression data were acquired for 2376 mouse and 2228 human striated muscle samples, and separated into 142 categories based on species (human or mouse), tissue origin, age, gender, anatomic part, and experimental condition. Co-expression values were calculated for each category to create the MyoMiner database.

Results

Within each category, users can select a gene of interest, and the MyoMiner web interface will return all correlated genes. For each co-expressed gene pair, adjusted p-value and confidence intervals are provided as measures of expression correlation strength. A standardized expression-level scatterplot is available for every gene pair r-value. MyoMiner has two extra functions: (a) a network interface for creating a 2-shell correlation network, based either on the most highly correlated genes or from a list of genes provided by the user with the option to include linked genes from the database and (b) a comparison tool from which the users can test whether any two correlation coefficients from different conditions are significantly different.

Conclusions

These co-expression analyses will help investigators to delineate the tissue-, cell-, and pathology-specific elements of muscle protein interactions, cell signaling and gene regulation. Changes in co-expression between pathologic and healthy tissue may suggest new disease mechanisms and help define novel therapeutic targets. Thus, MyoMiner is a powerful muscle-specific database for the discovery of genes that are associated with related functions based on their co-expression. MyoMiner is freely available at https://www.sys-myo.com/myominer.",2020-05-11 +34665987,"ABCC4, ITPA, NUDT15, TPMT and their interaction as genetic predictors of 6-mercaptopurine intolerance in chinese patients with acute lymphoblastic leukemia.","Inter-individual variance in 6-mercaptopurine (6-MP) dose intensity is common in patients with acute lymphoblastic leukemia (ALL). We aimed to evaluate the association of common variants of ABCC4, ITPA, NUDT15, and TPMT with 6-MP dose intensity and toxicity in pediatric ALL patients. In this cohort, 13.8% of patients were intolerant to 6-MP with actual dosage less than 50% of scheduled dose. Twenty percent of patients were found to be heterozygous or homozygous mutated with NUDT15. NUDT15 c.415C > T and the genotype-predicted NUDT15 activity were significantly associated with 6-MP intolerance. TPMT*3C variants were not common in this cohort (2.8%). NUDT15 polymorphisms and genotype predicted NUDT15 activity were significantly associated with 6-MP dose intensity and leukopenia episodes. Combination of ABCC4 and ITPA variants (ABCC4 c.912G > T and ITPA c.94C > A) also showed significant positive association with 6-MP intolerance in Chinese children with ALL. Further study on pharmacogenetic screening for ALL patients to avoid 6-MP induced toxicity is recommended.Supplemental data for this article is available online at https://doi.org/10.1080/08880018.2021.1973628.",2021-10-19 +,"Protein, fat, moisture and cooking yields from a U.S. study of retail beef cuts","Nutrient data from the U.S. Department of Agriculture (USDA) are an important resource for U.S. and international databases. To ensure that data for retail beef cuts in USDA's National Nutrient Database for Standard Reference (SR) are current, a comprehensive, nationwide, multi-phase study was conducted. Samples were collected and analyzed in three phases based on primal category. Using a statistically based sampling plan, 72 beef carcasses per phase were obtained with nationally representative quality and yield grades, genders and genetic types. Retail cuts were fabricated, cooked and dissected to obtain component weights. Nutrient values were determined by validated laboratories using quality assurance procedures. Full nutrient profiles were made available in SR (http://www.ars.usda.gov/nutrientdata). Results for 16 beef retail cuts were compared for cooking yield and protein, fat and moisture concentrations. For example, cooked fat levels differed among three roasted cuts and among three grilled cuts from chuck, rib and loin (p<0.01). Cooking yield for roasted ribeye (76%) was lower (p<0.001) than for grilled ribeye (83%) or for chuck eye grilled (80%) or roasted (84%). This study demonstrates the importance of maintaining data for a variety of retail beef cuts due to their unique properties and different cooking methods.",2015-11-01 +33248183,Exploring natural products-based cancer therapeutics derived from egyptian flora.,"

Ethnopharmacological relevance

Egyptian plants are a rich source of natural molecules, representing considerable biodiversity due to climate variations between the Northern, Southern, Eastern and Western regions of the country. Sinai is considered a precious nature reserves preserving flora, fauna, marine organisms, and historical habitats with ancient origins. Here, traditional medicinal approaches have been used for hundreds of years. Healthy lifestyles, low levels of stress and microbial infections, and a dependence on flora and herbal medicine might in combination explain why the burden of cancer is lower in some regions than in others.

Aim of the study

The primary aim of this review is to document the plants and natural products that are used as foods and medicines in Egypt, in general, and in Sinai, in particular, with a focus on those with demonstrated anticancer activities. The documented traditional uses of these plants are described, together with their chemical and pharmacological activities and the reported outcomes of clinical trials against cancer.

Materials and methods

A literature search was performed to identify texts describing the medicinal plants that are cultivated and grown in Egypt, including information found in textbooks, published articles, the plant list website (http://www.theplantlist.org/), the medicinal plant names services website (http://mpns.kew.org/mpns-portal/), and web databases (PubMed, Science Direct, and Google Scholar).

Results and discussion

We collected data for most of the plants cultivated or grown in Egypt that have been previously investigated for anticancer effects and reported their identified bioactive elements. Several plant species, belonging to different families and associated with 67 bioactive compounds, were investigated as potential anticancer agents (in vitro studies). The most potent cytotoxic activities were identified for the families Asteraceae, Lamiaceae, Chenopodiaceae, Apocynaceae, Asclepiadaceae, Euphorbiaceae, Gramineae, and Liliaceae. The anticancer activities of some species, such as Punica granatum L., Nerium oleander L., Olea europea L., Matricaria chamomilla L., Cassia acutifolia L., Nigella sativa L., Capsicum frutescens L., Withania somnifera L., and Zingiber officinale Roscoe, have been examined in clinical trials. Among the various Egyptian plant habitats, we found that most of these plants are grown in the North Sinai, New-Delta, and Giza Governorates.

Conclusion

In this review, we highlight the role played by Egyptian flora in current medicinal therapies and the possibility that these plants may be examined in further studies for the development of anticancer drugs. These bioactive plant extracts form the basis for the isolation of phytochemicals with demonstrated anticancer activities. Some active components derived from these plants have been applied to preclinical and clinical settings, including resveratrol, quercetin, isoquercetin, and rutin.",2020-11-25 +34951656,Multi-ethnic GWAS and fine-mapping of glycaemic traits identify novel loci in the PAGE Study.,"

Aims/hypothesis

Type 2 diabetes is a growing global public health challenge. Investigating quantitative traits, including fasting glucose, fasting insulin and HbA1c, that serve as early markers of type 2 diabetes progression may lead to a deeper understanding of the genetic aetiology of type 2 diabetes development. Previous genome-wide association studies (GWAS) have identified over 500 loci associated with type 2 diabetes, glycaemic traits and insulin-related traits. However, most of these findings were based only on populations of European ancestry. To address this research gap, we examined the genetic basis of fasting glucose, fasting insulin and HbA1c in participants of the diverse Population Architecture using Genomics and Epidemiology (PAGE) Study.

Methods

We conducted a GWAS of fasting glucose (n = 52,267), fasting insulin (n = 48,395) and HbA1c (n = 23,357) in participants without diabetes from the diverse PAGE Study (23% self-reported African American, 46% Hispanic/Latino, 40% European, 4% Asian, 3% Native Hawaiian, 0.8% Native American), performing transethnic and population-specific GWAS meta-analyses, followed by fine-mapping to identify and characterise novel loci and independent secondary signals in known loci.

Results

Four novel associations were identified (p < 5 × 10-9), including three loci associated with fasting insulin, and a novel, low-frequency African American-specific locus associated with fasting glucose. Additionally, seven secondary signals were identified, including novel independent secondary signals for fasting glucose at the known GCK locus and for fasting insulin at the known PPP1R3B locus in transethnic meta-analysis.

Conclusions/interpretation

Our findings provide new insights into the genetic architecture of glycaemic traits and highlight the continued importance of conducting genetic studies in diverse populations.

Data availability

Full summary statistics from each of the population-specific and transethnic results are available at NHGRI-EBI GWAS catalog ( https://www.ebi.ac.uk/gwas/downloads/summary-statistics ).",2021-12-24 +31173061,Protein contact prediction using metagenome sequence data and residual neural networks.,"

Motivation

Almost all protein residue contact prediction methods rely on the availability of deep multiple sequence alignments (MSAs). However, many proteins from the poorly populated families do not have sufficient number of homologs in the conventional UniProt database. Here we aim to solve this issue by exploring the rich sequence data from the metagenome sequencing projects.

Results

Based on the improved MSA constructed from the metagenome sequence data, we developed MapPred, a new deep learning-based contact prediction method. MapPred consists of two component methods, DeepMSA and DeepMeta, both trained with the residual neural networks. DeepMSA was inspired by the recent method DeepCov, which was trained on 441 matrices of covariance features. By considering the symmetry of contact map, we reduced the number of matrices to 231, which makes the training more efficient in DeepMSA. Experiments show that DeepMSA outperforms DeepCov by 10-13% in precision. DeepMeta works by combining predicted contacts and other sequence profile features. Experiments on three benchmark datasets suggest that the contribution from the metagenome sequence data is significant with P-values less than 4.04E-17. MapPred is shown to be complementary and comparable the state-of-the-art methods. The success of MapPred is attributed to three factors: the deeper MSA from the metagenome sequence data, improved feature design in DeepMSA and optimized training by the residual neural networks.

Availability and implementation

http://yanglab.nankai.edu.cn/mappred/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +31081040,AnnoTree: visualization and exploration of a functionally annotated microbial tree of life.,"Bacterial genomics has revolutionized our understanding of the microbial tree of life; however, mapping and visualizing the distribution of functional traits across bacteria remains a challenge. Here, we introduce AnnoTree-an interactive, functionally annotated bacterial tree of life that integrates taxonomic, phylogenetic and functional annotation data from over 27 000 bacterial and 1500 archaeal genomes. AnnoTree enables visualization of millions of precomputed genome annotations across the bacterial and archaeal phylogenies, thereby allowing users to explore gene distributions as well as patterns of gene gain and loss in prokaryotes. Using AnnoTree, we examined the phylogenomic distributions of 28 311 gene/protein families, and measured their phylogenetic conservation, patchiness, and lineage-specificity within bacteria. Our analyses revealed widespread phylogenetic patchiness among bacterial gene families, reflecting the dynamic evolution of prokaryotic genomes. Genes involved in phage infection/defense, mobile elements, and antibiotic resistance dominated the list of most patchy traits, as well as numerous intriguing metabolic enzymes that appear to have undergone frequent horizontal transfer. We anticipate that AnnoTree will be a valuable resource for exploring prokaryotic gene histories, and will act as a catalyst for biological and evolutionary hypothesis generation. AnnoTree is freely available at http://annotree.uwaterloo.ca.",2019-05-01 +34042965,SAMF: a Self-adaptive Protein Modeling Framework. ,"Gradient descent-based protein modeling is a popular protein structure prediction approach that takes as input the predicted inter-residue distances and other necessary constraints and folds protein structures by minimizing protein-specific energy potentials. The constraints from multiple predicted protein properties provide redundant and sometime conflicting information that can trap the optimization process into local minima and impairs the modeling efficiency. To address these issues, we developed a self-adaptive protein modeling framework, SAMF. It eliminates redundancy of constraints and resolves conflicts, folds protein structures in an iterative way, and picks up the best structures by a deep quality analysis system. Without a large amount of complicated domain knowledge and numerous patches as barriers, SAMF achieves the state-of-the-art performance by exploiting the power of cutting-edge techniques of deep learning. SAMF has a modular design and can be easily customized and extended. As the quality of input constraints is ever growing, the superiority of SAMF will be amplified over time. The source code and data for reproducing the results is available at https://msracb.blob.core.windows.net/pub/psp/SAMF.zip. Supplementary data are available at Bioinformatics online.",2021-05-27 +35295737,Decoding the Role of Astrocytes in the Entorhinal Cortex in Alzheimer's Disease Using High-Dimensional Single-Nucleus RNA Sequencing Data and Next-Generation Knowledge Discovery Methodologies: Focus on Drugs and Natural Product Remedies for Dementia.,"Introduction: Alzheimer's disease (AD) is a major cause of the development of cognitive decline and dementia. AD and associated dementias (ADRD) are the major contributors to the enormous burden of morbidity and mortality worldwide. To date, there are no robust therapies to alleviate or cure this debilitating disease. Most drug treatments focus on restoring the normal function of neurons and the cells that cause inflammation, such as microglia in the brain. However, the role of astrocytes, the brain's housekeeping cells, in the development of AD and the initiation of dementia is still not well understood. Objective: To decipher the role of astrocytes in the entorhinal cortex of AD patients using single nuclear RNA sequencing (snRNASeq) datasets from the Single Cell RNA-seq Database for Alzheimer's Disease (scREAD). The datasets were originally derived from astrocytes, isolated from the entorhinal cortex of AD brain and healthy brain to decipher disease-specific signaling pathways as well as drugs and natural products that reverse AD-specific signatures in astrocytes. Methods: We used snRNASeq datasets from the scREAD database originally derived from astrocytes isolated from the entorhinal cortex of AD and healthy brains from the Gene Expression Omnibus (GEO) (GSE138852 and GSE147528) and analyzed them using next-generation knowledge discovery (NGKD) platforms. scREAD is a user-friendly open-source interface available at https://bmbls.bmi.osumc.edu/scread/that enables more discovery-oriented strategies. snRNASeq data and metadata can also be visualized and downloaded via an interactive web application at adsn.ddnetbio.com. Differentially expressed genes (DEGs) for each snRNASeq dataset were analyzed using iPathwayGuide to compare and derive disease-specific pathways, gene ontologies, and in silico predictions of drugs and natural products that regulate AD -specific signatures in astrocytes. In addition, DEGs were analyzed using the L1000FWD and L1000CDS2 signature search programming interfaces (APIs) to identify additional drugs and natural products that mimic or reverse AD-specific gene signatures in astrocytes. Results: We found that PI3K/AKT signaling, Wnt signaling, neuroactive ligand-receptor interaction pathways, neurodegeneration pathways, etc. were significantly impaired in astrocytes from the entorhinal cortex of AD patients. Biological processes such as glutamate receptor signaling pathway, regulation of synapse organization, cell-cell adhesion via plasma membrane adhesion molecules, and chylomicrons were negatively enriched in the astrocytes from the entorhinal cortex of AD patients. Gene sets involved in cellular components such as postsynaptic membrane, synaptic membrane, postsynapse, and synapse part were negatively enriched (p < 0.01). Moreover, molecular functions such as glutamate receptor activity, neurotransmitter receptor activity, and extracellular ligand-gated ion channels were negatively regulated in the astrocytes of the entorhinal cortex of AD patients (p < 0.01). Moreover, the application of NGKD platforms revealed that antirheumatic drugs, vitamin-E, emetine, narciclasine, cephaeline, trichostatin A, withaferin A, dasatinib, etc. can potentially reverse gene signatures associated with AD. Conclusions: The present study highlights an innovative approach to use NGKD platforms to find unique disease-associated signaling pathways and specific synthetic drugs and natural products that can potentially reverse AD and ADRD-associated gene signatures.",2021-01-01 +32490075,Dataset of quantitative proteomic analysis to understand aging processes in rabbit liver.,"Here, we present a proteomics dataset of liver proteins to understand aging in rabbits, which complements the publication ""Quantitative proteomics to study aging in rabbit liver"" [1]. This dataset was generated to understand the molecular basis and metabolic changes of aging processes in liver, which is the main organ involved in metabolism, detoxification, transport, and signaling. Proteins from young, middle, and old age rabbits were extracted and digested. Generated peptides were labeled with light or heavy dimethyl groups at their N-termini, while lysine amines were labeled with TMT10-plex using a cPILOT workflow [2]. Labeled peptides were fractionated by basic pH reverse phase chromatography and analyzed with online reverse phase LC coupled with tandem mass spectrometry (MS/MS and MS3). The RAW files were generated using a Fusion Lumos Orbitrap mass spectrometer (Thermo Scientific) and processed with Proteome Discoverer (PD) version 2.2 to generate a list of identified and quantified proteins. Data was searched against the Rabbit UniProtKB redundant database. A total of 3,867 proteins were identified corresponding to 2,586 protein groups and 22,229 peptides. Dynamic levels of age-related proteins associated with fat metabolism, mitochondrial dysfunction, and protein degradation were detected. The mass spectrometry proteomics data (RAW files) and processed Proteome Discoverer 2.2 files (MSF files) have been deposited to the Proteomics Identification Database (PRIDE) ProteomeXchange Consortium and can be accessed with the dataset identifier PDX013220 (http://www.ebi.ac.uk/pride/archive/projects/PXD013220).",2020-05-15 +29565988,Hierarchical partitions of social networks between rivaling leaders.,"A model algorithm is proposed to imitate a series of of consecutive conflicts between leaders in social groups. The leaders are represented by local hubs, i.e., nodes with highest node degrees. We simulate subsequent hierarchical partitions of a complex connected network which represents a social structure. The partitions are supposed to appear as actions of members of two conflicted groups surrounding two strongest leaders. According to the model, links at the shortest path between the rival leaders are successively removed. When the group is split into two disjoint parts then each part is further divided as the initial network. The algorithm is stopped, if in all parts a distance from a local leader to any node in his group is shorter than three links. The numerically calculated size distribution of resulting fragments of scale-free Barabási-Albert networks reveals one largest fragment which contains the original leader (hub of the network) and a number of small fragments with opponents that are described by two Weibull distributions. A mean field calculation of the size of the largest fragment is in a good agreement with numerical results. The model assumptions are validated by an application of the algorithm to the data on political blogs in U.S. (L. Adamic and N. Glance, Proc. WWW-2005). The obtained fragments are clearly polarized; either they belong to Democrats, or to Republicans. This result confirms that during conflicts, hubs are centers of polarization.",2018-03-22 +33541212,PhasiRNAnalyzer: an integrated analyser for plant phased siRNAs.,"Phased siRNAs (phasiRNAs) are a class of small interfering RNAs (siRNAs) which play essential roles in plant development and defence. However, only a few phasiRNAs have been extensively studied due to the difficulties in identifying and characterizing plant phasiRNAs by plant biologists. Herein, we describe a comprehensive and multi-functional web server termed PhasiRNAnalyzer, which is able to identify all crucial components in plant phasiRNA's regulatory pathway (phase-initiator→PHAS gene→phasiRNA cluster→target gene). Currently, PhasiRNAnalyzer exhibits the following advantages: I) It is the most comprehensive platform which hosts 170 plant species with 256 genome data, 438 cDNA data and 271 degradome data. II) It can identify all crucial components in phasiRNA's regulatory pathway, and verify the interactions between phasiRNAs and their target genes based on degradome data. III) It can perform differential expression analysis of phasiRNAs on each PHAS gene locus between different samples conveniently. IV) It provides the user-friendly interfaces and introduces several improvements, primarily by making more accurate and efficient analysis when dealing with deep sequencing data. In summary, PhasiRNAnalyzer is a comprehensive and systemic phasiRNA analysis server with high sensitivity and efficiency. It can be freely accessed at https://cbi.njau.edu.cn/PPSA/.",2021-02-04 +31980371,Resistance Sniffer: An online tool for prediction of drug resistance patterns of Mycobacterium tuberculosis isolates using next generation sequencing data.,"The effective control of multidrug resistant tuberculosis (MDR-TB) relies upon the timely diagnosis and correct treatment of all tuberculosis cases. Whole genome sequencing (WGS) has great potential as a method for the rapid diagnosis of drug resistant Mycobacterium tuberculosis (Mtb) isolates. This method overcomes most of the problems that are associated with current phenotypic drug susceptibility testing. However, the application of WGS in the clinical setting has been deterred by data complexities and skill requirements for implementing the technologies as well as clinical interpretation of the next generation sequencing (NGS) data. The proposed diagnostic application was drawn upon recent discoveries of patterns of Mtb clade-specific genetic polymorphisms associated with antibiotic resistance. A catalogue of genetic determinants of resistance to thirteen anti-TB drugs for each phylogenetic clade was created. A computational algorithm for the identification of states of diagnostic polymorphisms was implemented as an online software tool, Resistance Sniffer (http://resistance-sniffer.bi.up.ac.za/), and as a stand-alone software tool to predict drug resistance in Mtb isolates using complete or partial genome datasets in different file formats including raw Illumina fastq read files. The program was validated on sequenced Mtb isolates with data on antibiotic resistance trials available from GMTV database and from the TB Platform of South African Medical Research Council (SAMRC), Pretoria. The program proved to be suitable for probabilistic prediction of drug resistance profiles of individual strains and large sequence data sets.",2020-01-17 +33002137,A content-based dataset recommendation system for researchers-a case study on Gene Expression Omnibus (GEO) repository.,"It is a growing trend among researchers to make their data publicly available for experimental reproducibility and data reusability. Sharing data with fellow researchers helps in increasing the visibility of the work. On the other hand, there are researchers who are inhibited by the lack of data resources. To overcome this challenge, many repositories and knowledge bases have been established to date to ease data sharing. Further, in the past two decades, there has been an exponential increase in the number of datasets added to these dataset repositories. However, most of these repositories are domain-specific, and none of them can recommend datasets to researchers/users. Naturally, it is challenging for a researcher to keep track of all the relevant repositories for potential use. Thus, a dataset recommender system that recommends datasets to a researcher based on previous publications can enhance their productivity and expedite further research. This work adopts an information retrieval (IR) paradigm for dataset recommendation. We hypothesize that two fundamental differences exist between dataset recommendation and PubMed-style biomedical IR beyond the corpus. First, instead of keywords, the query is the researcher, embodied by his or her publications. Second, to filter the relevant datasets from non-relevant ones, researchers are better represented by a set of interests, as opposed to the entire body of their research. This second approach is implemented using a non-parametric clustering technique. These clusters are used to recommend datasets for each researcher using the cosine similarity between the vector representations of publication clusters and datasets. The maximum normalized discounted cumulative gain at 10 (NDCG@10), precision at 10 (p@10) partial and p@10 strict of 0.89, 0.78 and 0.61, respectively, were obtained using the proposed method after manual evaluation by five researchers. As per the best of our knowledge, this is the first study of its kind on content-based dataset recommendation. We hope that this system will further promote data sharing, offset the researchers' workload in identifying the right dataset and increase the reusability of biomedical datasets. Database URL: http://genestudy.org/recommends/#/.",2020-01-01 +31031921,The distribution of plant consumption traits across habitat types and the patterns of fruit availability suggest a mechanism of coexistence of two sympatric frugivorous mammals.,"Understanding the mechanisms governing the coexistence of organisms is an important question in ecology, and providing potential solutions contributes to conservation science. In this study, we evaluated the contribution of several mechanisms to the coexistence of two sympatric frugivores, using western lowland gorillas (Gorilla gorilla gorilla) and central chimpanzees (Pan troglodytes troglodytes) in a tropical rainforest of southeast Cameroon as a model system. We collected great ape fecal samples to determine and classify fruit species consumed; we conducted great ape nest surveys to evaluate seasonal patterns of habitat use; and we collected botanical data to investigate the distribution of plant species across habitat types in relation to their ""consumption traits"" (which indicate whether plants are preferred or fallback for either gorilla, chimpanzee, or both). We found that patterns of habitat use varied seasonally for both gorillas and chimpanzees and that gorilla and chimpanzee preferred and fallback fruits differed. Also, the distribution of plant consumption traits was influenced by habitat type and matched accordingly with the patterns of habitat use by gorillas and chimpanzees. We show that neither habitat selection nor fruit preference alone can explain the coexistence of gorillas and chimpanzees, but that considering together the distribution of plant consumption traits of fruiting woody plants across habitats as well as the pattern of fruit availability may contribute to explaining coexistence. This supports the assumptions of niche theory with dominant and subordinate species in heterogeneous landscapes, whereby a species may prefer nesting in habitats where it is less subject to competitive exclusion and where food availability is higher. To our knowledge, our study is the first to investigate the contribution of plant consumption traits, seasonality, and habitat heterogeneity to enabling the coexistence of two sympatric frugivores.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://datadryad.org/resource/doi:10.5061/dryad.ms65f29.",2019-04-01 +33735194,COMSUC: A web server for the identification of consensus molecular subtypes of cancer based on multiple methods and multi-omics data.,"Extensive amounts of multi-omics data and multiple cancer subtyping methods have been developed rapidly, and generate discrepant clustering results, which poses challenges for cancer molecular subtype research. Thus, the development of methods for the identification of cancer consensus molecular subtypes is essential. The lack of intuitive and easy-to-use analytical tools has posed a barrier. Here, we report on the development of the COnsensus Molecular SUbtype of Cancer (COMSUC) web server. With COMSUC, users can explore consensus molecular subtypes of more than 30 cancers based on eight clustering methods, five types of omics data from public reference datasets or users' private data, and three consensus clustering methods. The web server provides interactive and modifiable visualization, and publishable output of analysis results. Researchers can also exchange consensus subtype results with collaborators via project IDs. COMSUC is now publicly and freely available with no login requirement at http://comsuc.bioinforai.tech/ (IP address: http://59.110.25.27/). For a video summary of this web server, see S1 Video and S1 File.",2021-03-18 +32661425,GPCRmd uncovers the dynamics of the 3D-GPCRome.,"G-protein-coupled receptors (GPCRs) are involved in numerous physiological processes and are the most frequent targets of approved drugs. The explosion in the number of new three-dimensional (3D) molecular structures of GPCRs (3D-GPCRome) over the last decade has greatly advanced the mechanistic understanding and drug design opportunities for this protein family. Molecular dynamics (MD) simulations have become a widely established technique for exploring the conformational landscape of proteins at an atomic level. However, the analysis and visualization of MD simulations require efficient storage resources and specialized software. Here we present GPCRmd (http://gpcrmd.org/), an online platform that incorporates web-based visualization capabilities as well as a comprehensive and user-friendly analysis toolbox that allows scientists from different disciplines to visualize, analyze and share GPCR MD data. GPCRmd originates from a community-driven effort to create an open, interactive and standardized database of GPCR MD simulations.",2020-07-13 +34487137,TCRpair: prediction of functional pairing between HLA-A*02:01-restricted T cell receptor α and β chains. ,"The ability of a T cell to recognize foreign peptides is defined by a single α and a single β hypervariable complementarity determining region (CDR3), which together form the T cell receptor (TCR) heterodimer. In ∼30%-35% of T cells, two α chains are expressed at the mRNA level but only one α chain is part of the functional TCR. This effect can also be observed for β chains, although it is less common. The identification of functional α/β chain pairs is instrumental in high-throughput characterization of therapeutic TCRs. TCRpair is the first method that predicts whether an α and β chain pair forms a functional, HLA-A*02:01 specific TCR without requiring the sequence of a recognized peptide. By taking additional amino acids flanking the CDR3 regions into account, TCRpair achieves an AUC of 0.71. TCRpair is implemented in Python using TensorFlow 2.0 and is freely available at https://www.github.com/amoesch/TCRpair. Supplementary data are available at Bioinformatics online.",2021-09-06 +32810207,MELODI Presto: a fast and agile tool to explore semantic triples derived from biomedical literature.,"

Summary

The field of literature-based discovery is growing in step with the volume of literature being produced. From modern natural language processing algorithms to high quality entity tagging, the methods and their impact are developing rapidly. One annotation object that arises from these approaches, the subject-predicate-object triple, is proving to be very useful in representing knowledge. We have implemented efficient search methods and an application programming interface, to create fast and convenient functions to utilize triples extracted from the biomedical literature by SemMedDB. By refining these data, we have identified a set of triples that focus on the mechanistic aspects of the literature, and provide simple methods to explore both enriched triples from single queries, and overlapping triples across two query lists.

Availability and implementation

https://melodi-presto.mrcieu.ac.uk/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +32083041,Simulation-Based Summative Assessment of Neonatal Resuscitation Providers Using the RETAIN Serious Board Game-A Pilot Study.,"Background: Each year, 13-26 million newborn babies require help to breathe at birth. Healthcare professionals (HCPs) who provide neonatal resuscitative care must be frequently evaluated to maintain and improve the quality of healthcare delivered. While simulation-based competence assessment is preferred, resource constraints hinder uptake. We aimed to examine if the RETAIN simulation-based boardgame can be used to assess HCPs' neonatal resuscitation knowledge. Method: Twenty neonatal HCPs (19 females) from the Royal Alexandra Hospital (Edmonton, Canada) were recruited. First, they completed an open-answer written test of one neonatal resuscitation scenario. Then, they completed one neonatal resuscitation scenario of difficulty comparable to that of the open-answer written test, but this time using the RETAIN board game. In the RETAIN board game (https://playretain.com, RETAIN Labs Medical Inc, Edmonton, Canada), players perform simulated neonatal resuscitation scenarios based on real-life cases, using action cards, and equipment pieces. Sessions were video-recorded and scored using Neonatal Resuscitation Program 2015 guidelines. Data are reported as mean (standard deviation) for normally distributed continuous variables, and as median (interquartile range) for non-normal continuous variables. Results: Participants consisted of the following HCPs: 8 nurses, 4 respiratory therapists, 4 nurse practitioners, and 4 neonatal fellows with median (IQR) 10.5(3-17) years of clinical experience. Overall mean (SD) Open-answer test and Game Performance was 8.6(2.1) out of 16 possible points (53%) and 29(3.2) out of 40 possible points (74%), respectively. Out of the 10 actions shared between the open-answer test and game scenario, performance on the open-answer test was mean (SD) 7.2(1.3) (72%) and game performance was mean (SD) 8.8(1.4) (88%) (V = 17, p < 0.01). Conclusion: RETAIN may provide an enjoyable and standardized alternative toward summative assessment of neonatal resuscitation providers. RETAIN may be used to improve more frequent and ubiquitous uptake of simulation-based competence assessment in healthcare settings.",2020-01-31 +33588537,High-throughput design of Peierls and charge density wave phases in Q1D organometallic materials.,"Soft-phonon modes of an undistorted phase encode a material's preference for symmetry lowering. However, the evidence is sparse for the relationship between an unstable phonon wavevector's reciprocal and the number of formula units in the stable distorted phase. This ""1/q*-criterion"" holds great potential for the first-principles design of materials, especially in low-dimension. We validate the approach on the Q1D organometallic materials space containing 1199 ring-metal units and identify candidates that are stable in undistorted (1 unit), Peierls (2 units), charge density wave (3-5 units), or long wave (>5 units) phases. We highlight materials exhibiting gap-opening as well as an uncommon gap-closing Peierls transition and discuss an example case stabilized as a charge density wave insulator. We present the data generated for this study through an interactive publicly accessible Big Data analytics platform (https://moldis.tifrh.res.in/data/rmq1d) facilitating limitless and seamless data-mining explorations.",2021-02-01 +33871602,Improved diet quality is associated with decreased concentrations of inflammatory markers in adults with uncontrolled asthma.,"

Background

Asthma has become one of the major public health challenges, and recent studies show promising clinical benefits of dietary interventions, such as the Dietary Approaches to Stop Hypertension (DASH) diet.

Objective

The objective of this study was to examine whether changes in diet quality are associated with changes in inflammatory markers important in asthma pathophysiology.

Methods

In this exploratory study in patients with poorly controlled asthma participating in a randomized controlled trial of a DASH intervention study, changes in concentrations of a broad panel of serum proteins (51-plex Luminex assay, Affymetrix) were determined, and their relation to diet quality (DASH score) assessed by combining data of both intervention and usual-care control groups. Second, the relation between the serum proteins, other biomarkers of inflammation and nutrition, and Asthma Control Questionnaire (ACQ) was assessed.

Results

During the first 3 mo, diet quality (DASH scores) were inversely associated (P < 0.05, false discovery rate P < 0.09) with serum concentrations of a large number serum proteins, reflecting not only general proinflammatory markers such as IL-1β, transforming growth factor α (TGF-α), and IL-6 (r = -0.31 to -0.39) but also a number of proteins associated with asthmatic conditions, specifically several T-helper (Th) 2 (Th2; r = -0.29 to -0.34) and Th17 (r = -0.4) associated cytokines and growth factors. Monokine induced by gamma/chemokine (C-X-C motif) ligand 9 (CXCL9) (MIG/CXCL9), a T-cell attractant induced by IFN-γ previously linked to asthma exacerbations, appeared to be the marker most consistently associated with DASH diet quality for the entire 6-mo study period (r = -0.40 and -0.30 for 0-3 and 3-6 mo, respectively, and standardized coefficient loadings -0.13 in the partial least squares analyses). Decreases in 19 serum protein concentrations were also correlated with improved asthma control during the 6-mo study period.

Conclusions

Our data in adult patients with poorly controlled asthma suggest that dietary changes, like the introduction of DASH, may have beneficial effects on reducing inflammatory status. This trial was registered at http://www.clinicaltrials.gov as NCT01725945.",2021-09-01 +34710770,Effects of anti-seizure medications on sleep architecture and daytime sleepiness in patients with epilepsy: A literature review.,"Anti-seizure medications (ASMs) may improve or be detrimental to sleep. A literature review (as an update to the 2014 review by Jain and Glauser [https://doi.org/10.1111/epi.12478]) of 25 ASMs of interest (articles from 12 ASMs included) on the effect of ASMs/non-drug treatments on sleep in patients with epilepsy was conducted. The most common objective instrument was polysomnography, and the most common subjective measures were the Epworth Sleepiness Scale and the Pittsburgh Sleep Quality Index. Eslicarbazepine acetate, lacosamide, and perampanel improved or had no effect on sleep. Perampanel was associated with low incidence of insomnia, and lacosamide with low incidence of daytime sleepiness adverse events. Clonazepam, felbamate, lamotrigine, oxcarbazepine, and phenobarbital worsened or had no effect on sleep. Lamotrigine may be associated with insomnia risk and phenobarbital with daytime sleepiness. Data for valproic acid were mixed. Overall, cannabidiol, carbamazepine, and levetiracetam had no effect on sleep. Epilepsy surgery may benefit sleep in patients with a good surgical outcome. Some ASMs, and, possibly, epilepsy surgery, may have positive effects on sleep, possibly linked to achieving seizure control. Nonetheless, other ASMs may worsen sleep in some settings. Clinicians should consider such observations when making treatment decisions, particularly for patients with comorbid sleep disorders.",2021-10-08 +32266390,TOD-CUP: a gene expression rank-based majority vote algorithm for tissue origin diagnosis of cancers of unknown primary.,"Gene expression profiling holds great potential as a new approach to histological diagnosis and precision medicine of cancers of unknown primary (CUP). Batch effects and different data types greatly decrease the predictive performance of biomarker-based algorithms, and few methods have been widely applied to identify tissue origin of CUP up to now. To address this problem and assist in more precise diagnosis, we have developed a gene expression rank-based majority vote algorithm for tissue origin diagnosis of CUP (TOD-CUP) of most common cancer types. Based on massive tissue-specific RNA-seq data sets (10 553) found in The Cancer Genome Atlas (TCGA), 538 feature genes (biomarkers) were selected based on their gene expression ranks and used to predict tissue types. The top scoring pairs (TSPs) classifier of the tumor type was optimized by the TCGA training samples. To test the prediction accuracy of our TOD-CUP algorithm, we analyzed (1) two microarray data sets (1029 Agilent and 2277 Affymetrix/Illumina chips) and found 91% and 94% prediction accuracy, respectively, (2) RNA-seq data from five cancer types derived from 141 public metastatic cancer tumor samples and achieved 94% accuracy and (3) a total of 25 clinical cancer samples (including 14 metastatic cancer samples) were able to classify 24/25 samples correctly (96.0% accuracy). Taken together, the TOD-CUP algorithm provides a powerful and robust means to accurately identify the tissue origin of 24 cancer types across different data platforms. To make the TOD-CUP algorithm easily accessible for clinical application, we established a Web-based server for tumor tissue origin diagnosis (http://ibi. zju.edu.cn/todcup/).",2021-03-01 +34291951,Impact of Systemic versus Intratympanic Dexamethasone Administration on the Perilymph Proteome.,"Glucocorticoids are the first-line treatment for sensorineural hearing loss, but little is known about the mechanism of their protective effect or the impact of route of administration. The recent development of hollow microneedles enables safe and reliable sampling of perilymph for proteomic analysis. Using these microneedles, we investigate the effect of intratympanic (IT) versus intraperitoneal (IP) dexamethasone administration on guinea pig perilymph proteome. Guinea pigs were treated with IT dexamethasone (n = 6), IP dexamethasone (n = 8), or untreated for control (n = 8) 6 h prior to aspiration. The round window membrane (RWM) was accessed via a postauricular approach, and hollow microneedles were used to perforate the RWM and aspirate 1 μL of perilymph. Perilymph samples were analyzed by liquid chromatography-mass spectrometry-based label-free quantitative proteomics. Mass spectrometry raw data files have been deposited in an international public repository (MassIVE proteomics repository at https://massive.ucsd.edu/) under data set # MSV000086887. In the 22 samples of perilymph analyzed, 632 proteins were detected, including the inner ear protein cochlin, a perilymph marker. Of these, 14 proteins were modulated by IP, and three proteins were modulated by IT dexamethasone. In both IP and IT dexamethasone groups, VGF nerve growth factor inducible was significantly upregulated compared to control. The remaining adjusted proteins modulate neurons, inflammation, or protein synthesis. Proteome analysis facilitated by the use of hollow microneedles shows that route of dexamethasone administration impacts changes seen in perilymph proteome. Compared to IT administration, the IP route was associated with greater changes in protein expression, including proteins involved in neuroprotection, inflammatory pathway, and protein synthesis. Our findings show that microneedles can mediate safe and effective intracochlear sampling and hold promise for inner ear diagnostics.",2021-07-22 +34709858,"Trends in Cannabis Involvement and Risk of Alcohol Involvement in Motor Vehicle Crash Fatalities in the United States, 2000‒2018.","Objectives. To assess cannabis and alcohol involvement among motor vehicle crash (MVC) fatalities in the United States. Methods. In this repeated cross-sectional analysis, we used data from the Fatality Analysis Reporting System from 2000 to 2018. Fatalities were cannabis-involved if an involved driver tested positive for a cannabinoid and alcohol-involved based on the highest blood alcohol concentration (BAC) of an involved driver. Multinomial mixed-effects logistic regression models assessed cannabis as a risk factor for alcohol by BAC level. Results. While trends in fatalities involving alcohol have remained stable, the percentage of fatalities involving cannabis and cannabis and alcohol increased from 9.0% in 2000 to 21.5% in 2018, and 4.8% in 2000 to 10.3% in 2018, respectively. In adjusted analyses, fatalities involving cannabis had 1.56 (95% confidence interval [CI] = 1.48, 1.65), 1.62 (95% CI = 1.52, 1.72), and 1.46 (95% CI = 1.42, 1.50) times the odds of involving BACs of 0.01% to 0.049%, 0.05% to 0.079%, and 0.08% or higher, respectively. Conclusions. The percentage of fatalities involving cannabis and coinvolving cannabis and alcohol doubled from 2000 to 2018, and cannabis was associated with alcohol coinvolvement. Further research is warranted to understand cannabis- and alcohol-involved MVC fatalities. (Am J Public Health. 2021;111(11):1976-1985. https://doi.org/10.2105/AJPH.2021.306466).",2021-10-28 +34305610,The Association Between STX1B Polymorphisms and Treatment Response in Patients With Epilepsy.,"Background: Epilepsy is a debilitating brain disease with complex inheritance and frequent treatment resistance. However, the role of STX1B single nucleotide polymorphisms (SNPs) in epilepsy treatment remains unknown. Objective: This study aimed to explore the genetic association of STX1B SNPs with treatment response in patients with epilepsy in a Han Chinese population. Methods: We first examined the associations between STX1B SNPs and epilepsy in 1000 Han Chinese and the associations between STX1B SNPs and drug-resistant epilepsy in 450 subjects. Expression quantitative trait loci analysis was then conducted using 16 drug-resistant epileptic brain tissue samples and results from the BrainCloud database (http://eqtl.brainseq.org). Results: The allelic frequencies of rs140820592 were different between the epilepsy and control groups (p = 0.002) after Bonferroni correction. The rs140820592 was associated with significantly lower epilepsy risk among 1,000 subjects in the dominant model after adjusting for gender and age and Bonferroni correction (OR = 0.542, 95%CI = 0.358-0.819, p = 0.004). The rs140820592 also conferred significantly lower risk of drug-resistant epilepsy among 450 subjects using the same dominant model after adjusting for gender and age and Bonferroni correction (OR = 0.260, 95%CI = 0.103-0.653, p = 0.004). Expression quantitative trait loci analysis revealed that rs140820592 was associated with STX1B expression level in drug-resistant epileptic brain tissues (p = 0.012), and this result was further verified in the BrainCloud database (http://eqtl.brainseq.org) (p = 2.3214 × 10-5). Conclusion: The STX1B rs140820592 may influence the risks of epilepsy and drug-resistant epilepsy by regulating STX1B expression in brain tissues.",2021-07-09 +33176685,Avian Immunome DB: an example of a user-friendly interface for extracting genetic information.,"

Background

Genomic and genetic studies often require a target list of genes before conducting any hypothesis testing or experimental verification. With the ever-growing number of sequenced genomes and a variety of different annotation strategies, comes the potential for ambiguous gene symbols, making it cumbersome to capture the ""correct"" set of genes. In this article, we present and describe the Avian Immunome DB (AVIMM) for easy gene property extraction as exemplified by avian immune genes. The avian immune system is characterised by a cascade of complex biological processes underlaid by more than 1000 different genes. It is a vital trait to study particularly in birds considering that they are a significant driver in spreading zoonotic diseases. With the completion of phase II of the B10K (""Bird 10,000 Genomes"") consortium's whole-genome sequencing effort, we have included 363 annotated bird genomes in addition to other publicly available bird genome data which serve as a valuable foundation for AVIMM.

Construction and content

A relational database with avian immune gene evidence from Gene Ontology, Ensembl, UniProt and the B10K consortium has been designed and set up. The foundation stone or the ""seed"" for the initial set of avian immune genes is based on the well-studied model organism chicken (Gallus gallus). Gene annotations, different transcript isoforms, nucleotide sequences and protein information, including amino acid sequences, are included. Ambiguous gene names (symbols) are resolved within the database and linked to their canonical gene symbol. AVIMM is supplemented by a command-line interface and a web front-end to query the database.

Utility and discussion

The internal mapping of unique gene symbol identifiers to canonical gene symbols allows for an ambiguous gene property search. The database is organised within core and feature tables, which makes it straightforward to extend for future purposes. The database design is ready to be applied to other taxa or biological processes. Currently, the database contains 1170 distinct avian immune genes with canonical gene symbols and 612 synonyms across 363 bird species. While the command-line interface readily integrates into bioinformatics pipelines, the intuitive web front-end with download functionality offers sophisticated search functionalities and tracks the origin for each record. AVIMM is publicly accessible at https://avimm.ab.mpg.de .",2020-11-12 +34247194,Identification of a seven-long non-coding RNA signature associated with Jab1/CSN5 in predicting hepatocellular carcinoma.,"Hepatocellular carcinoma (HCC) is a leading cause of cancer death worldwide, accounting for over 700,000 deaths each year. The lack of predictive and prognostic biomarkers for HCC, with effective therapy, remains a significant challenge for HCC management. Long non-coding RNAs (lncRNAs) play a key role in tumorigenesis and have clinical value as potential biomarkers in the early diagnosis and prediction of HCC. Jun activation domain-binding protein 1 (Jab1, also known as COP9 signalosome subunit 5, CSN5) is a potential oncogene that plays a critical role in the occurrence of HCC. Here, we performed a comprehensive analysis for Jab1/CSN5-associated lncRNAs to predict the prognosis of HCC. The differentially expressed (DE) lncRNAs between in HCC were analyzed based on the TCGA RNA-seq data. We detected 1031 upregulated lncRNAs in 371 HCC tissues and identified a seven-lncRNA signature strongly correlated with Jab1/CSN5 (SNHG6, CTD3065J16.9, LINC01604, CTD3025N20.3, KB-1460A1.5, RP13-582O9.7, and RP11-29520.2). We further evaluated the prognostic significance of these lncRNAs by GEPIA ( http://gepia.cancer-pku.cn/ ). The expression data in 364 liver tumors indicated that this seven-lncRNA signature could better predict worse survival in HCC patients. Moreover, 35 clinical HCC samples were evaluated to assess the validity and reproducibility of the bioinformatic analysis. We found that the targeted lncRNAs were upregulated, with a strong association with Jab1/CSN5 and prognostic value in HCC. Functional enrichment analysis by Gene Ontology (GO) showed that these seven prognostic lncRNAs exhibit oncogenic properties and are associated with prominent hallmarks of cancer. Overall, our findings demonstrate the clinical implication of Jab1/CSN5 with the seven-lncRNAs in predicting survival for patients with HCC.",2021-07-10 +29029599,LiverWiki: a wiki-based database for human liver.,"

Background

Recent advances in omics technology have produced a large amount of liver-related data. A comprehensive and up-to-date source of liver-related data is needed to allow biologists to access the latest data. However, current liver-related data sources each cover only a specific part of the liver. It is difficult for them to keep pace with the rapid increase of liver-related data available at those data resources. Integrating diverse liver-related data is a critical yet formidable challenge, as it requires sustained human effort.

Results

We present LiverWiki, a first wiki-based database that integrates liver-related genes, homolog genes, gene expressions in microarray datasets and RNA-Seq datasets, proteins, protein interactions, post-translational modifications, associated pathways, diseases, metabolites identified in the metabolomics datasets, and literatures into an easily accessible and searchable resource for community-driven sharing. LiverWiki houses information in a total of 141,897 content pages, including 19,787 liver-related gene pages, 17,077 homolog gene pages, 50,251 liver-related protein pages, 36,122 gene expression pages, 2067 metabolites identified in the metabolomics datasets, 16,366 disease-related molecules, and 227 liver disease pages. Other than assisting users in searching, browsing, reviewing, refining the contents on LiverWiki, the most important contribution of LiverWiki is to allow the community to create and update biological data of liver in visible and editable tables. This integrates newly produced data with existing knowledge. Implemented in mediawiki, LiverWiki provides powerful extensions to support community contributions.

Conclusions

The main goal of LiverWiki is to provide the research community with comprehensive liver-related data, as well as to allow the research community to share their liver-related data flexibly and efficiently. It also enables rapid sharing new discoveries by allowing the discoveries to be integrated and shared immediately, rather than relying on expert curators. The database is available online at http://liverwiki.hupo.org.cn /.",2017-10-13 +,First Report of Soft Rot on Pleurotus eryngii Caused by Fusarium solani in China,"The king oyster mushroom (Pleurotus eryngii) possesses high medicinal, nutritional, and commercial values and is cultivated on a large scale in China. In 2016, fresh P. eryngii yield reached 966,872.45 t from all over China (China Edible Fungi Association statistics; http://www.cefa.org.cn/2017/10/24/10250.html). However, in June 2017, a soft rot with more than 5% incidence was observed on fruiting bodies of P. eryngii in a mushroom factory in Suqian, Jiangsu Province, China. At first, fruiting bodies were covered by white and cobweb-like mycelia and spread slowly from the stipe to pileus. Mushrooms eventually turned dark brown and became rotten. The fungus was isolated from the symptomatic tissues by rinsing in sterile water, plating on potato dextrose agar (PDA), and incubating at 25°C in the dark. The colonies grew rapidly on PDA, appeared fluffy to appressed, white to cream-colored, and produced yellowish coloration on the bottom of plates. Colonies produced both microconidia and macroconidia. Microconidia were oval to kidney shaped, single-celled, and measured 2.3 to 4.2 × 5.3 to 10.4 μm; macroconidia were fusiform, slightly curved, and measured 4.3 to 6.5 × 21.7 to 30.4 μm, with two to five septa. Chlamydospores were rounded, thick-walled, and measured 6 to 10 μm. Total genomic DNA of the isolated fungus was extracted with a Plant/Fungi DNA Isolation Kit (Sigma-Aldrich, Ontario, Canada). Translation elongation factor 1-alpha (EF1) (O’Donnell 2000) and internal transcribed spacer (ITS) (White et al. 1990) were amplified with primers EF1-728F/tef1rev and ITS1/ITS4. Results of sequences were deposited in GenBank (accession nos. MK410936 and MK402158). The FUSARIUM-ID database (Geiser et al. 2004) and NCBI GenBank searches revealed 98 to 100% sequence identity with EF1 and ITS sequence with Fusarium solani species complex (FSSC). In a neighbor-joining phylogenetic analysis based on EF1 sequence from FSSC with MEGA6 software, isolate FSA1 located on the same clade with all F. solani. Pathogenicity tests were performed by spraying 48 ml of spore suspension (1 × 107 conidia/ml) on 48 fruiting bodies of P. eryngii, and sterilized distilled water was used as a negative control (12 fruiting bodies). The inoculated fruiting bodies were maintained at 16 to 18°C and 90 to 95% relative humidity for 7 to 10 days in an artificial climate chamber, after which typical rot symptoms were observed for inoculation with the spore suspension. F. solani was successfully reisolated from artificially infected fruiting bodies based on morphology and molecular evidence. Meanwhile, the sterile water control P. eryngii grew normally without any symptoms. To our knowledge, this is the first report of F. solani causing soft rot on P. eryngii, and this pathogen has been reported to infect other economically important crops such as soybeans, potato, citrus, peppers, orchids, and peas (Leslie and Summerell 2006). P. eryngii is one of the high-demand mushrooms, and incidence of this new pathogen can be a significant threat for P. eryngii production. It is a concern to producers of this edible fungus.",2019-08-01 +33239692,"VolcaNoseR is a web app for creating, exploring, labeling and sharing volcano plots.","Comparative genome- and proteome-wide screens yield large amounts of data. To efficiently present such datasets and to simplify the identification of hits, the results are often presented in a type of scatterplot known as a volcano plot, which shows a measure of effect size versus a measure of significance. The data points with the largest effect size and a statistical significance beyond a user-defined threshold are considered as hits. Such hits are usually annotated in the plot by a label with their name. Volcano plots can represent ten thousands of data points, of which typically only a handful is annotated. The information of data that is not annotated is hardly or not accessible. To simplify access to the data and enable its re-use, we have developed an open source and online web tool with R/Shiny. The web app is named VolcaNoseR and it can be used to create, explore, label and share volcano plots ( https://huygens.science.uva.nl/VolcaNoseR ). When the data is stored in an online data repository, the web app can retrieve that data together with user-defined settings to generate a customized, interactive volcano plot. Users can interact with the data, adjust the plot and share their modified plot together with the underlying data. Therefore, VolcaNoseR increases the transparency and re-use of large comparative genome- and proteome-wide datasets.",2020-11-25 +31504780,miRDB: an online database for prediction of functional microRNA targets.,"MicroRNAs (miRNAs) are small noncoding RNAs that act as master regulators in many biological processes. miRNAs function mainly by downregulating the expression of their gene targets. Thus, accurate prediction of miRNA targets is critical for characterization of miRNA functions. To this end, we have developed an online database, miRDB, for miRNA target prediction and functional annotations. Recently, we have performed major updates for miRDB. Specifically, by employing an improved algorithm for miRNA target prediction, we now present updated transcriptome-wide target prediction data in miRDB, including 3.5 million predicted targets regulated by 7000 miRNAs in five species. Further, we have implemented the new prediction algorithm into a web server, allowing custom target prediction with user-provided sequences. Another new database feature is the prediction of cell-specific miRNA targets. miRDB now hosts the expression profiles of over 1000 cell lines and presents target prediction data that are tailored for specific cell models. At last, a new web query interface has been added to miRDB for prediction of miRNA functions by integrative analysis of target prediction and Gene Ontology data. All data in miRDB are freely accessible at http://mirdb.org.",2020-01-01 +28562632,DrugSig: A resource for computational drug repositioning utilizing gene expression signatures.,"Computational drug repositioning has been proved as an effective approach to develop new drug uses. However, currently existing strategies strongly rely on drug response gene signatures which scattered in separated or individual experimental data, and resulted in low efficient outputs. So, a fully drug response gene signatures database will be very helpful to these methods. We collected drug response microarray data and annotated related drug and targets information from public databases and scientific literature. By selecting top 500 up-regulated and down-regulated genes as drug signatures, we manually established the DrugSig database. Currently DrugSig contains more than 1300 drugs, 7000 microarray and 800 targets. Moreover, we developed the signature based and target based functions to aid drug repositioning. The constructed database can serve as a resource to quicken computational drug repositioning. Database URL: http://biotechlab.fudan.edu.cn/database/drugsig/.",2017-05-31 +34752205,"PASTEC - a prospective, single-center, randomized, cross-over trial of pure physical versus physical plus attentional training in children with cancer.","Despite recent improvements in survival rates in children with cancer, long-term toxicities remain a major concern. Physical activity could reduce the impact of long-term sequelae, notably in neuropsychological and physical areas. We report of a randomized trial of pure physical versus physical/attentional training in pediatric oncology patients. Twenty-two patients aged 6-18 y.o. were included, irrespective of their clinical diagnosis or treatment status, stratified by age and randomized 1:1 into pure physical vs. physical/attentional activity arms, with a cross-over at study midpoint. Neurological, motor and neuropsychological assessments were performed at inclusion, start, crossover and end of the program. Feasibility, defined as > 80% patients attending > 80% of sessions, was the primary endpoint. Secondary outcomes were improvements in neuropsychological and motor performance tests. While 68% of patients attended more than 80% of sessions during the pre-crossover phase of the study, this dropped to 36% post-crossover. Our study therefore failed to meet our primary endpoint. Nonetheless, significant improvements in anxiety (p<0.001), emotional control (p = 0.04), organization skills (p = 0.03), as well as motor deficit scores (p = 0.04) were observed. We noted no significant difference between the pure physical and the physical/attentional training arms, or when analyzing subgroups by age or sequence of intervention. We conclude that physical activity has a positive impact on anxiety, emotional and organizational aspects as well as motor deficits. Attendance dropped during the course of the study and motivational interventions should be included in future studies or equivalent programs.Supplemental data for this article is available online at https://doi.org/10.1080/08880018.2021.1994677 .",2021-11-09 +29175726,"The redesigned Forensic Research/Reference on Genetics-knowledge base, FROG-kb.","The Forensic Resource/Reference on Genetics-knowledge base (FROG-kb) web site was introduced in 2011 and in the five years since the previous publication ongoing research into how the database can better serve forensics has resulted in extensive redesign of the database interface and functionality. Originally designed as a prototype to support forensic use of single nucleotide polymorphisms (SNPs), FROG-kb provides a freely accessible web interface that facilitates forensic practice and can be useful for teaching and research. Based on knowledge gained through its use, the web interface has been redesigned for easier navigation through the multiple components. The site also has functional enhancements, extensive new documentation, and new reference panels of SNPs with new curated data. FROG-kb focuses on single nucleotide polymorphisms (SNPs) and provides reference population data for several published panels of individual identification SNPs (IISNPs) and several published panels of ancestry inference SNPs (AISNPs). For each of the various marker panels with reference population data, FROG-kb calculates random match probabilities (RMP) and relative likelihoods of ancestry for a user-entered genotype profile (either completely or partially specified). Example genotype profiles are available and the User's Manual presents interpretation guidelines for the calculations. The extensive documentation along with ongoing updates makes FROG-kb a comprehensive tool in facilitating use of SNPs in forensic practice and education. An overview of the new FROG-kb with examples and material explaining the results of its use are presented here.",2017-11-14 +34432000,MoMA-LoopSampler: A web server to exhaustively sample protein loop conformations. ,"MoMA-LoopSampler is a sampling method that globally explores the conformational space of flexible protein loops. It combines a large structural library of three-residue fragments and a novel reinforcement-learning-based approach to accelerate the sampling process while maintaining diversity. The method generates a set of statistically-likely loop states satisfying geometric constraints, and its ability to sample experimentally observed conformations has been demonstrated. This paper presents a web user interface to MoMA-LoopSampler through the illustration of a typical use-case. MoMA-LoopSampler is freely available at: https://moma.laas.fr/applications/LoopSampler/ We recommend users to create an account, but anonymous access is possible. In most cases, jobs are completed within a few minutes. The waiting time may increase depending on the server load, but it very rarely exceeds an hour. For users requiring more intensive use, binaries can be provided upon request. Supplementary data are available at Bioinformatics online.",2021-08-25 +32047888,RBPTD: a database of cancer-related RNA-binding proteins in humans. ,"RNA-binding proteins (RBPs) play important roles in regulating the expression of genes involved in human physiological and pathological processes, especially in cancers. Many RBPs have been found to be dysregulated in cancers; however, there was no tool to incorporate high-throughput data from different dimensions to systematically identify cancer-related RBPs and to explore their causes of abnormality and their potential functions. Therefore, we developed a database named RBPTD to identify cancer-related RBPs in humans and systematically explore their functions and abnormalities by integrating different types of data, including gene expression profiles, prognosis data and DNA copy number variation (CNV), among 28 cancers. We found a total of 454 significantly differentially expressed RBPs, 1970 RBPs with significant prognostic value, and 53 dysregulated RBPs correlated with CNV abnormality. Functions of 26 cancer-related RBPs were explored by analysing high-throughput RNA sequencing data obtained by crosslinking immunoprecipitation, and the remaining RBP functions were predicted by calculating their correlation coefficient with other genes. Finally, we developed the RBPTD for users to explore functions and abnormalities of cancer-related RBPs to improve our understanding of their roles in tumorigenesis. Database URL: http: //www.rbptd.com.",2020-01-01 +34647497,Dietary Intake is Associated with miR-31 and miR-375 Expression in Patients with Head and Neck Squamous Cell Carcinoma.,"MicroRNAs (miRNAs) are important epigenetic regulators in head and neck squamous cell carcinoma (HNSCC), with miR-31 being considered an oncomir and miR-375, a tumor suppressor miR, which are up- and down-regulated in HNSCC, respectively. Nutrients are known to influence miRNA expression; however, this association is poorly explored in HNSCC. This work aimed to identify associations between dietary intake and the expression of miR-31 and miR-375 in patients newly diagnosed with HNSCC. The expression of miR-31 was positively associated with the consumption of iron (β = 16.65) and vitamin C (β = 0.37), and inversely associated with total sugar (β = -0.88), cholesterol (β= -0.23), vitamin B9 (β= -0.37) and zinc (β = -5.66) intake. The expression of miR-375 was positively associated with the consumption of selenium (β = 1.52), vitamin C (β = 0.17) and vitamin D (β = 13.01), and inversely associated with the consumption of added sugar (β = -0.49), phosphorus (β= -0.27) and vitamin B12 (β = -10.80). Our findings showed important associations between dietary intake and miR-31 and miR-375 expression in HNSCC, offering possible directions for further studies investigating how nutrients interfere with carcinogenesis.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1990972 .",2021-10-14 +31737449,Another look at the eigenvalues of a population matrix model.,"Population matrix models are important tools in resource management, in part because they are used to calculate the finite rate of growth (""dominant eigenvalue""). But understanding how a population matrix model converts life history traits into the finite rate of growth can be tricky. We introduce interactive software (""IsoPOPd"") that uses the characteristic equation to display how vital rates (survival and fertility) contribute to the finite rate of growth. Higher-order interactions among vital rates complicate the linkage between a management intervention and a population's growth rate. We illustrate the use of the software for investigating the consequences of three management interventions in a 3-stage model of white-tailed deer (Odocoileus virginianus). The software is applicable to any species with 2- or 3-stages, but the mathematical concepts underlying the software are applicable to a population matrix model of any size. The IsoPOPd software is available at: https://cwhl.vet.cornell.edu/tools/isopopd.",2019-11-11 +33202055,Low-dose CT image and projection dataset.,"

Purpose

To describe a large, publicly available dataset comprising computed tomography (CT) projection data from patient exams, both at routine clinical doses and simulated lower doses.

Acquisition and validation methods

The library was developed under local ethics committee approval. Projection and image data from 299 clinically performed patient CT exams were archived for three types of clinical exams: noncontrast head CT scans acquired for acute cognitive or motor deficit, low-dose noncontrast chest scans acquired to screen high-risk patients for pulmonary nodules, and contrast-enhanced CT scans of the abdomen acquired to look for metastatic liver lesions. Scans were performed on CT systems from two different CT manufacturers using routine clinical protocols. Projection data were validated by reconstructing the data using several different reconstruction algorithms and through use of the data in the 2016 Low Dose CT Grand Challenge. Reduced dose projection data were simulated for each scan using a validated noise-insertion method. Radiologists marked location and diagnosis for detected pathologies. Reference truth was obtained from the patient medical record, either from histology or subsequent imaging.

Data format and usage notes

Projection datasets were converted into the previously developed DICOM-CT-PD format, which is an extended DICOM format created to store CT projections and acquisition geometry in a nonproprietary format. Image data are stored in the standard DICOM image format and clinical data in a spreadsheet. Materials are provided to help investigators use the DICOM-CT-PD files, including a dictionary file, data reader, and user manual. The library is publicly available from The Cancer Imaging Archive (https://doi.org/10.7937/9npb-2637).

Potential applications

This CT data library will facilitate the development and validation of new CT reconstruction and/or denoising algorithms, including those associated with machine learning or artificial intelligence. The provided clinical information allows evaluation of task-based diagnostic performance.",2020-12-16 +,Computing uncertainty in the optimum nitrogen rate using a generalized cost function,"A Python package, “EONR”, was developed for computing the economic optimum nitrogen rate (EONR) and its profile-likelihood confidence intervals (CIs) under economic conditions defined by the user. This work was motivated by the need to improve nitrogen fertilizer recommendations using the maximum return to nitrogen approach, specifically to make it easier for researchers and other practitioners to calculate uncertainty and consider externalities to the cost function while computing the EONR. The “EONR” package fits yield response data to a re-parameterized quadratic-plateau model, which is generally accepted as the most appropriate model for describing yield response to nitrogen in maize (the package also supports the quadratic model). Although grain price and fertilizer cost are typically the only economic factors producers consider for determining the EONR, this package allows the user to also consider variable costs and/or externalities. A general cost function may be desired if the user wishes to consider costs to the farm operation (e.g., equipment, technology, labor, etc.) or environmental costs/penalties that may result from excess fertilizer application (e.g., water treatment or health costs that result from pollution) in addition to the traditional fertilizer to grain price ratio. In addition to the development of the “EONR” Python package, the objectives of this work were to: (i) design an algorithm that utilizes a general cost function for computing the EONR and its profile-likelihood CIs for any crop and (ii) clearly document the methodology and algorithms used. The “EONR” Python package can be downloaded from the Python Package Index (https://pypi.org/), and installation instructions, tutorials, and supplementary background information can be found in the online documentation (https://eonr.readthedocs.io).",2019-12-01 +34225374,MAGICPL: A Generic Process Description Language for Distributed Pseudonymization Scenarios.,"

Objectives

 Pseudonymization is an important aspect of projects dealing with sensitive patient data. Most projects build their own specialized, hard-coded, solutions. However, these overlap in many aspects of their functionality. As any re-implementation binds resources, we would like to propose a solution that facilitates and encourages the reuse of existing components.

Methods

 We analyzed already-established data protection concepts to gain an insight into their common features and the ways in which their components were linked together. We found that we could represent these pseudonymization processes with a simple descriptive language, which we have called MAGICPL, plus a relatively small set of components. We designed MAGICPL as an XML-based language, to make it human-readable and accessible to nonprogrammers. Additionally, a prototype implementation of the components was written in Java. MAGICPL makes it possible to reference the components using their class names, making it easy to extend or exchange the component set. Furthermore, there is a simple HTTP application programming interface (API) that runs the tasks and allows other systems to communicate with the pseudonymization process.

Results

 MAGICPL has been used in at least three projects, including the re-implementation of the pseudonymization process of the German Cancer Consortium, clinical data flows in a large-scale translational research network (National Network Genomic Medicine), and for our own institute's pseudonymization service.

Conclusions

 Putting our solution into productive use at both our own institute and at our partner sites facilitated a reduction in the time and effort required to build pseudonymization pipelines in medical research.",2021-05-01 +34085265,Fluorescence Anisotropy-Based Assay for Characterization of Ligand Binding Dynamics to GPCRs: The Case of Cy3B-Labeled Ligands Binding to MC4 Receptors in Budded Baculoviruses.,"During the past decade, fluorescence methods have become valuable tools for characterizing ligand binding to G protein-coupled receptors (GPCRs). However, only a few of the assays enable studying wild-type receptors and monitor the ligand binding in real time. One of the approaches that is inherently suitable for this purpose is the fluorescence anisotropy (FA) assay. In the FA assay, the change of ligand's rotational freedom connected with its binding to the receptor can be monitored with a conventional fluorescence plate reader equipped with suitable optical filters. To achieve the high receptor concentration required for the assay and the low autofluorescence levels essential for reliable results, budded baculoviruses that display GPCRs on their surfaces can be used. The monitoring process generates a substantial amount of kinetic data, which is usually stored as a proprietary file format limiting the flexibility of data analysis. To solve this problem, we propose the use of the data curation software Aparecium ( http://gpcr.ut.ee/aparecium.html ), which integrates experimental data with metadata in a Minimum Information for Data Analysis in Systems Biology (MIDAS) format. Aparecium enables data export to different software packages for fitting to suitable kinetic or equilibrium models. A combination of the FA assay with the novel data analysis strategy is suitable for screening new active compounds, but also for modeling complex systems of ligand binding to GPCRs. We present the proposed approach using different fluorescent probes and assay types to characterize ligand binding to melanocortin 4 (MC4) receptor.",2021-01-01 +34702851,Accurate prediction of protein torsion angles using evolutionary signatures and recurrent neural network.,"The amino acid sequence of a protein contains all the necessary information to specify its shape, which dictates its biological activities. However, it is challenging and expensive to experimentally determine the three-dimensional structure of proteins. The backbone torsion angles play a critical role in protein structure prediction, and accurately predicting the angles can considerably advance the tertiary structure prediction by accelerating efficient sampling of the large conformational space for low energy structures. Here we first time propose evolutionary signatures computed from protein sequence profiles, and a novel recurrent architecture, termed ESIDEN, that adopts a straightforward architecture of recurrent neural networks with a small number of learnable parameters. The ESIDEN can capture efficient information from both the classic and new features benefiting from different recurrent architectures in processing information. On the other hand, compared to widely used classic features, the new features, especially the Ramachandran basin potential, provide statistical and evolutionary information to improve prediction accuracy. On four widely used benchmark datasets, the ESIDEN significantly improves the accuracy in predicting the torsion angles by comparison to the best-so-far methods. As demonstrated in the present study, the predicted angles can be used as structural constraints to accurately infer protein tertiary structures. Moreover, the proposed features would pave the way to improve machine learning-based methods in protein folding and structure prediction, as well as function prediction. The source code and data are available at the website https://kornmann.bioch.ox.ac.uk/leri/resources/download.html .",2021-10-26 +34702133,"The Relevance of Competences for a Healthy, Physically Active Lifestyle in Persons with Multiple Sclerosis: a Path Analytical Approach.","To promote health and counteract the decline associated with the disease, persons with multiple sclerosis (pwMS) are advised to lead healthy, physically active lifestyles. The physical activity-related health competence (PAHCO) model posits that individuals must meet three integrated, person-related requirements for the adoption of such a lifestyle: movement competence, control competence, and self-regulation competence. To gain insights into the needs and challenges of pwMS, the goal of the present study was to empirically examine the roles of these competences within this target group. A total of 475 pwMS underwent a multidimensional, online-based assessment of PAHCO. These participants self-reported their amount of physical activity (PA), health status, disease-related, and sociodemographic information. We used a series of path analyses to investigate the relevance of the three competence areas for each individual's PA level and subjective health. Stepwise multivariate analyses revealed that self-regulation competence was significantly associated with overall PA volume. In contrast, movement competence did not contribute to this prediction. Control competence was also not related to PA level. However, in accordance with the PAHCO model, this factor exerted an independent, qualitative effect on participant health. In summary, self-regulation competence appears to play a crucial role with regard to PA volume. Specifically, control competence appears to be key for the qualitative aspect of PA promotion, characterizing the individual's application of an appropriate stimulus for the achievement of health. Integrating the promotion of self-regulation and control competences into rehabilitation practices can help to foster healthy, physically active lifestyles in pwMS.Supplemental data for this article is available online at https://doi.org/10.1080/08964289.2021.1935437 .",2021-10-26 +33437854,Datasets for recognition of aggressive interactions of children toward robotic toys.,"The data is related to unwanted interactions between a person and a small robotic toy based on acceleration sensor embedded within the robotic toy. Three toys were considered namely, a stuffed panda, a stuffed robot, and an excavator. Each toy was embedded with an accelerometer to record the interactions. Five different unwanted interactions were performed by adult participants and children. The considered interactions were hit, shake, throw, pickup, drop, and idle for the no interaction case. The collected data contains the magnitude of the resultant acceleration from the interactions. The data was processed by extracting the instances of interactions. A secondary dataset was created from the original one by creating artificial sequences. This data article contains the processed data that can be used to explore different machine learning models and techniques in classifying such interactions. Online repository contains the files: https://doi.org/10.7910/DVN/FHOO0Q.",2020-12-26 +33983436,eCOMPASS: evaluative comparison of multiple protein alignments by statistical score. ,"Detecting subtle biologically relevant patterns in protein sequences often requires the construction of a large and accurate multiple sequence alignment (MSA). Methods for constructing MSAs are usually evaluated using benchmark alignments, which, however, typically contain very few sequences and are therefore inappropriate when dealing with large numbers of proteins. eCOMPASS addresses this problem using a statistical measure of relative alignment quality based on direct coupling analysis (DCA): To maintain protein structural integrity over evolutionary time, substitutions at one residue position typically result in compensating substitutions at other positions. eCOMPASS computes the statistical significance of the congruence between high scoring directly coupled pairs and 3D contacts in corresponding structures, which depends upon properly aligned homologous residues. We illustrate eCOMPASS using both simulated and real MSAs. The eCOMPASS executable, C ++ open source code and input data sets are available at https://www.igs.umaryland.edu/labs/neuwald/software/compass. Supplementary data are available at Bioinformatics online.",2021-05-13 +32683440,Identifying disease-causing mutations with privacy protection.,"

Motivation

The use of genome data for diagnosis and treatment is becoming increasingly common. Researchers need access to as many genomes as possible to interpret the patient genome, to obtain some statistical patterns and to reveal disease-gene relationships. The sensitive information contained in the genome data and the high risk of re-identification increase the privacy and security concerns associated with sharing such data. In this article, we present an approach to identify disease-associated variants and genes while ensuring patient privacy. The proposed method uses secure multi-party computation to find disease-causing mutations under specific inheritance models without sacrificing the privacy of individuals. It discloses only variants or genes obtained as a result of the analysis. Thus, the vast majority of patient data can be kept private.

Results

Our prototype implementation performs analyses on thousands of genomic data in milliseconds, and the runtime scales logarithmically with the number of patients. We present the first inheritance model (recessive, dominant and compound heterozygous) based privacy-preserving analyses of genomic data to find disease-causing mutations. Furthermore, we re-implement the privacy-preserving methods (MAX, SETDIFF and INTERSECTION) proposed in a previous study. Our MAX, SETDIFF and INTERSECTION implementations are 2.5, 1122 and 341 times faster than the corresponding operations of the state-of-the-art protocol, respectively.

Availability and implementation

https://gitlab.com/DIFUTURE/privacy-preserving-genomic-diagnosis.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-01-01 +31245766,Identification of gene expression logical invariants in Arabidopsis.,"Numerous gene expression datasets from diverse tissue samples from the plant variety Arabidopsis thaliana have been already deposited in the public domain. There have been several attempts to do large scale meta-analyses of all of these datasets. Most of these analyses summarize pairwise gene expression relationships using correlation, or identify differentially expressed genes in two conditions. We propose here a new large scale meta-analysis of the publicly available Arabidopsis datasets to identify Boolean logical relationships between genes. Boolean logic is a branch of mathematics that deals with two possible values. In the context of gene expression datasets we use qualitative high and low expression values. A strong logical relationship between genes emerges if at least one of the quadrants is sparsely populated. We pointed out serious issues in the data normalization steps widely accepted and published recently in this context. We put together a web resource where gene expression relationships can be explored online which helps visualize the logical relationships between genes. We believe that this website will be useful in identifying important genes in different biological context. The web link is http://hegemon.ucsd.edu/plant/.",2019-03-20 +31220119,Interlaboratory study to validate a STR profiling method for intraspecies identification of mouse cell lines.,"The Consortium for Mouse Cell Line Authentication was formed to validate Short Tandem Repeat (STR) markers for intraspecies identification of mouse cell lines. The STR profiling method is a multiplex polymerase chain reaction (PCR) assay comprised of primers targeting 19 mouse STR markers and two human STR markers (for interspecies contamination screening). The goals of the Consortium were to perform an interlaboratory study to-(1) validate the mouse STR markers to uniquely identify mouse cell lines (intraspecies identification), (2) to provide a public database of mouse cell lines with the National Institute of Standards and Technology (NIST)-validated mouse STR profiles, and (3) to publish the results of the interlaboratory study. The interlaboratory study was an international effort that consisted of 12 participating laboratories representing institutions from academia, industry, biological resource centers, and government. The study was based on 50 of the most commonly used mouse cell lines obtained from the American Type Culture Collection (ATCC). Of the 50 mouse cell lines, 18 had unique STR profiles that were 100% concordant (match) among all Consortium laboratory members, and the remaining 32 cell lines had discordance that was resolved readily and led to improvement of the assay. The discordance was due to low signal and interpretation issues involving artifacts and genotyping errors. Although the total number of discordant STR profiles was relatively high in this study, the percent of labs agreeing on allele calls among the discordant samples was above 92%. The STR profiles, including electropherogram images, for NIST-validated mouse cell lines will be published on the NCBI BioSample Database (https://www.ncbi.nlm.nih.gov/biosample/). Overall, the interlaboratory study showed that the multiplex PCR method using 18 of the 19 mouse STR markers is capable of discriminating at the intraspecies level between mouse cell lines. Further studies are ongoing to refine the assay including (1) development of an allelic ladder for improving the accuracy of allele calling and (2) integration of stutter filters to identify true stutter.",2019-06-20 +32831056,Prognostic significance of autophagy-related genes within esophageal carcinoma.,"

Background

Several works suggest the importance of autophagy during esophageal carcinoma development. The aim of the study is to construct a scoring system according to the expression profiles of major autophagy-related genes (ARGs) among esophageal carcinoma cases.

Methods

The Cancer Genome Atlas was employed to obtain the esophageal carcinoma data. Thereafter, the online database Oncolnc ( http://www.oncolnc.org/ ) was employed to verify the accuracy of our results. According to our results, the included ARGs were related to overall survival (OS).

Results

We detected the expression patterns of ARG within esophageal carcinoma and normal esophageal tissues. In addition, we identified the autophagy related gene set, including 14 genes displaying remarkable significance in predicting the esophageal carcinoma prognosis. The cox regression results showed that, 7 ARGs (including TBK1, ATG5, HSP90AB1, VAMP7, DNAJB1, GABARAPL2, and MAP2K7) were screened to calculate the ARGs scores. Typically, patients with higher ARGs scores were associated with poorer OS. Moreover, the receiver operating characteristic (ROC) curve analysis suggested that, ARGs accurately distinguished the healthy people from esophageal carcinoma patients, with the area under curve (AUC) value of > 0.6.

Conclusion

A scoring system is constructed in this study based on the main ARGs, which accurately predicts the outcomes for esophageal carcinoma.",2020-08-24 +34019098,Semi-supervised peak calling with SPAN and JBR Genome Browser.,"The widespread application of ChIP-seq led to a growing need for consistent analysis of multiple epigenetics profiles, for instance, in human studies where multiple replicates are a common element of design. Such multisamples experimental designs introduced analytical and computational challenges. For example, when peak calling is done independently for each sample, small differences in signal strength/quality lead to a very different number of peaks for individual samples, making group-level analysis difficult. On the other side, when samples are pooled together for joint analysis, individual-level statistical differences are averaged out. Recently we have demonstrated that a semi-supervised peak calling approach (SPAN) allows for robust analysis of multiple epigenetic profiles while preserving individual sample statistics. Here, we present this approach's implementation, centered around the JBR genome browser, a stand-alone tool that allows for accessible and streamlined annotation, analysis, and visualization. Specifically, JBR supports graphical interactive manual region selection and annotation, thereby addressing supervised learning's key procedural challenge. Furthermore, JBR includes the capability for peak optimization, i.e., calibration of sample-specific peak calling parameters by leveraging manual annotation. This procedure can be applied to a broad range of ChIP-seq datasets of different quality and chromatin accessibility ATAC-seq, including single-cell experiments. JBR was designed for efficient data processing, resulting in fast viewing and analysis of multiple replicates, up to thousands of tracks. Accelerated execution and integrated semi-supervised peak calling make JBR and SPAN next-generation visualization and analysis tools for multisample epigenetic data.

Availability

SPAN and JBR run on Linux, Mac OS, and Windows, and is freely available at https://research.jetbrains.org/groups/biolabs/tools/span-peak-analyzer and https://research.jetbrains.org/groups/biolabs/tools/jbr-genome-browser.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-21 +,Understanding Transmission Dynamics of COVID-19-Type Infections by Direct Numerical Simulations of Cough/Sneeze Flows,"The transmission dynamics of highly contagious respiratory diseases like COVID-19 (through coughing/sneezing) is an open problem in the epidemiological studies of such diseases (Bourouiba, JAMA. https://doi.org/10.1001/jama.2020.4756. 2020). The problem is basically the fluid dynamics of a transient turbulent jet/puff with buoyancy, laden with evaporating droplets carrying the pathogen. A turbulent flow of this nature does not lend itself to reliable estimates through modeling approaches such as RANS (Reynolds-Averaged Navier–Stokes equations) or other droplet-based models. However, direct numerical simulations (DNS) of what may be called “cough/sneeze flows” can play an important role in understanding the spread of the contagion. The objective of this work is to develop a DNS code for studying cough/sneeze flows by a suitable combination of the DNS codes available with the authors (developed to study cumulus cloud flows including thermodynamics of phase change and the dynamics of small water droplets) and to generate useful data on these flows. Recent results from the cumulus cloud simulations are included to highlight the effect of turbulent entrainment (which is one of the key processes in determining the spread of the expiratory flows) on the distribution of liquid water content in a moist plume. Furthermore, preliminary results on the temperature distribution in a “dry cough” (i.e., without inclusion of liquid droplets) are reported to illustrate the large spatial extent and time duration over which the cough flow can persist after the coughing has stopped. We believe that simulations of this kind can help to devise more accurate guidelines for separation distances between neighbors in a group, design better masks, and minimize the spread of respiratory diseases of the COVID-19 type.",2020-01-01 +30614601,UVEOGENE: An SNP database for investigations on genetic factors associated with uveitis and their relationship with other systemic autoimmune diseases.,"Uveitis is an intraocular inflammatory disease which can lead to serious visual impairment. Genetic factors have been shown to be involved in its development. However, few databases have focused on the information of associations between single nucleotide polymorphisms (SNPs) and uveitis. To discover the exact genetic background of uveitis, we developed an SNP database specific for uveitis, ""UVEOGENE,"" which includes 370 genes and 918 SNPs covering 14 uveitis entities and 40 populations from 286 PubMed English-language papers. Stratification analyses by gender, HLA status, and different clinical features were also extracted from the publications. As a result, 371 associations were judged as ""statistically significant."" These associations were also shared with Global Variome shared Leiden Open Variation Database (LOVD) (https://databases.lovd.nl/shared/genes). Based on these associations, we investigated the genetic relationship among three widely studied uveitis entities including Behcet's disease (BD), Vogt-Koyanagi-Harada (VKH) disease, and acute anterior uveitis (AAU). Furthermore, ""UVEOGENE"" can be used as a reliable and informative resource to identify similarities as well as differences in the genetic susceptibility among uveitis and other autoimmune diseases. UVEOGENE is freely accessible at http://www.uvogene.com.",2019-01-16 +34112823,The Komagataeibacter europaeus GqqA is the prototype of a novel bifunctional N-Acyl-homoserine lactone acylase with prephenate dehydratase activity.,"Previously, we reported the isolation of a quorum quenching protein (QQ), designated GqqA, from Komagataeibacter europaeus CECT 8546 that is highly homologous to prephenate dehydratases (PDT) (Valera et al. in Microb Cell Fact 15, 88. https://doi.org/10.1186/s12934-016-0482-y , 2016). GqqA strongly interfered with N-acyl-homoserine lactone (AHL) quorum sensing signals from Gram-negative bacteria and affected biofilm formation in its native host strain Komagataeibacter europaeus. Here we present and discuss data identifying GqqA as a novel acylase. ESI-MS-MS data showed unambiguously that GqqA hydrolyzes the amide bond of the acyl side-chain of AHL molecules, but not the lactone ring. Consistent with this observation the protein sequence does not carry a conserved Zn2+ binding motif, known to be essential for metal-dependent lactonases, but in fact harboring the typical periplasmatic binding protein domain (PBP domain), acting as catalytic domain. We report structural details for the native structure at 2.5 Å resolution and for a truncated GqqA structure at 1.7 Å. The structures obtained highlight that GqqA acts as a dimer and complementary docking studies indicate that the lactone ring of the substrate binds within a cleft of the PBP domain and interacts with polar residues Y16, S17 and T174. The biochemical and phylogenetic analyses imply that GqqA represents the first member of a novel type of QQ family enzymes.",2021-06-10 +34226607,The osseointegration and stability of dental implants with different surface treatments in animal models: a network meta-analysis.,"Dental implants are commonly used to repair missing teeth. The implant surface plays a critical role in promoting osseointegration and implant success. However, little information is available about which implant surface treatment technology best promotes osseointegration and implant stability. The aim of this network meta-analysis was to evaluate the osseointegration and stability of four commonly used dental implants (SLA, SLActive, TiUnite, and Osseotite). The protocol of the current meta-analysis is registered in PROSPERO (International Prospective Register of Systematic Reviews) under the code CRD42020190907 ( https://www.crd.york.ac.uk ). We conducted a systematic review following PRISMA and Cochrane Recommendations. Medline (PubMed), Cochrane Library, Embase, and the Web of Science databases were searched. Only randomized controlled trials were considered. Twelve studies were included in the current network meta-analysis, eleven studies were included concerning the osseointegration effect and five studies were included for stability analysis (four studies were used to assess both stability and osseointegration). Rank possibility shows that the SLActive surface best promoted bone formation at an early healing stage and TiUnite seemed to be the best surface for overall osseointegration. For stability, TiUnite seemed to be the best surface. The present network meta-analysis showed that the SLActive surface has the potential to promote osseointegration at an early stage. The TiUnite surface had the best effect on osseointegration regarding the overall healing period. The TiUnite surface also had the best effect in stability.",2021-07-05 +27899599,PubChem BioAssay: 2017 update.,"PubChem's BioAssay database (https://pubchem.ncbi.nlm.nih.gov) has served as a public repository for small-molecule and RNAi screening data since 2004 providing open access of its data content to the community. PubChem accepts data submission from worldwide researchers at academia, industry and government agencies. PubChem also collaborates with other chemical biology database stakeholders with data exchange. With over a decade's development effort, it becomes an important information resource supporting drug discovery and chemical biology research. To facilitate data discovery, PubChem is integrated with all other databases at NCBI. In this work, we provide an update for the PubChem BioAssay database describing several recent development including added sources of research data, redesigned BioAssay record page, new BioAssay classification browser and new features in the Upload system facilitating data sharing.",2016-11-29 +34542002,A normative study of the Czech Edinburgh Cognitive and Behavioural ALS Screen (ECAS): a brief report.,"Edinburgh Cognitive and Behavioural ALS Screen (ECAS) is a brief, standardized assessment of cognitiveimpairment inamyotrophic lateral sclerosis.

Objective

We aimed to createa normative dataset for the ECAS Czech version (ECAS-CZ) in order to make the assessment applicable for clinical settings.

Method

Included were 102 healthy participants (mean age: 54.92 ± 14.55; education: 14.52 ± 2.44; 54:48 females/males) that fulfilled rigorous exclusion criteria and controlled for depressive symptoms.

Results

The internal consistency of ECAS-CZ was acceptable (Cronbach's α = .69). We found medium correlations (rho ≈ .5) of age and education with ECAS-CZ Total score but not with gender. Cut-offs with -2 SD's threshold are presented for the differentiation of cognitive impairment. We report percentile values for ECAS-CZ Total including all subscales.

Conclusion

We provide normative values for ECAS-CZ that are well suited for the detection of cognitive impairment in clinical settings especially for patients with ALS.Supplemental data for this article is available online at https://doi.org/10.1080/13854046.2021.1978553 .",2021-09-18 +34522848,An interactive single cell web portal identifies gene and cell networks in COVID-19 host responses.,"Numerous studies have provided single-cell transcriptome profiles of host responses to SARS-CoV-2 infection. Critically lacking however is a data mine that allows users to compare and explore cell profiles to gain insights and develop new hypotheses. To accomplish this, we harmonized datasets from COVID-19 and other control condition blood, bronchoalveolar lavage, and tissue samples, and derived a compendium of gene signature modules per cell type, subtype, clinical condition, and compartment. We demonstrate approaches to interacting with, exploring, and functional evaluating these modules via a new interactive web portal ToppCell (http://toppcell.cchmc.org/). As examples, we develop three hypotheses: (1) alternatively-differentiated monocyte-derived macrophages form a multicelllar signaling cascade that drives T cell recruitment and activation; (2) COVID-19-generated platelet subtypes exhibit dramatically altered potential to adhere, coagulate, and thrombose; and (3) extrafollicular B maturation is driven by a multilineage cell activation network that expresses an ensemble of genes strongly associated with risk for developing post-viral autoimmunity.",2021-09-10 +32404014,PDB-2-PBv3.0: An updated protein block database.,"Our protein block (PB) sequence database PDB-2-PBv1.0 provides PB sequences and dihedral angles for 74,297 protein structures comprising of 103,252 protein chains of Protein Data Bank (PDB) as on 2011. Since there are a lot of practical applications of PB and also as the size of PDB database increases, it becomes necessary to provide the PB sequences for all PDB protein structures. The current updated PDB-2-PBv3.0 contains PB sequences for 147,602 PDB structures comprising of 400,355 protein chains as on October 2019. When compared to our previous version PDB-2-PBv1.0, the current PDB-2-PBv3.0 contains 2- and 4-fold increase in the number of protein structures and chains, respectively. Notably, it provides PB information for any protein chain, regardless of the missing atom records of protein structure data in PDB. It includes protein interaction information with DNA and RNA along with their corresponding functional classes from Nucleic Acid Database (NDB) and PDB. Now, the updated version allows the user to download multiple PB records by parameter search and/or by a given list. This database is freely accessible at http://bioinfo.bdu.ac.in/pb3.",2020-04-01 +32809301,"Clean Water: What is Acceptable Microbial Risk? This report is based on a colloquium, sponsored by the American Academy of Microbiology, convened October 6-8, 2006, in Tucson, Arizona","Access to clean water is essential for life. In recent decades, technology, civic progress, and an abundance of resources have enabled developed countries to cultivate high-quality water sources and distribution systems. As a result, people in these countries now enjoy lower infectious disease rates, higher hygiene standards, and a higher quality of life than has ever been witnessed in history. It is a familiar scenario: an outbreak of gastrointestinal illness suddenly emerges in a community, and no one knows where it came from or how to stop it. At the start of an outbreak, only a few people are affected with the uncomfortable consequences: nausea, vomiting, cramping, and diarrhea. Sick people trickle into doctors' offices and clinics for help. Among them are elderly patients and small children, some of whom are admitted to the hospital. As the outbreak worsens, more and more people fall ill, and individuals who were weak or unwell before they became infected may develop life-threatening illnesses. Outbreaks like these can originate from a source that most people in the U.S. and other developed countries trust unquestioningly—drinking water. Although drinking water quality in developed countries is high, a number of outbreaks of waterborne illness are still reported every year (Dziuban, et al., 2006). Worldwide, the statistics are even more alarming; two million people die every year from diarrheal illness, most of which can be attributed to waterborne pathogens (Pruss, et al., 2002; http://www.who.int/water_sanitation_health/publications/facts2004/). Even more alarming is the situation in the developing world, where access to clean water is far from guaranteed, and diarrheal illnesses claim roughly 2 million lives every year, 90% of them small children (http://www.who.int/water_sanitation_health/). Today, scientists have a relatively new tool for addressing the problems of waterborne infectious disease: microbial risk assessment (MRA), a formal process for quantifying the health risks from pathogenic microorganisms. MRA is guided by a framework, which defines the activities necessary to obtain information required to develop a quantitative model for calculating health risks. The American Academy of Microbiology convened a colloquium October 6-8, 2006, in Tucson, Arizona, to review the status of microbial risk assessment as it applies to waterborne disease. Experts from diverse fields—including microbiology, public health, engineering, epidemiology, medicine, and water science—discussed some of the controversial topics in microbial risk assessment, research subjects that could move the field forward, and the need for increased training and risk communication. The colloquium elicited intense discussion as there is still need to solidify approaches to the microbial risk assessment of water. Numerical water quality standards, for example, are useful in some circumstances, but they are sometimes misapplied or calculated using specious assumptions. The term “acceptable risk” is also controversial. “Acceptable risk” implies that injuries from waterborne illness are expected and fitting, and acceptable risk figures may be appropriately used to derive water quality standards in some instances. For a number of reasons, much of the data available on microbes in water is related to indicator organisms (microbes that denote the presence of fecal material or pathogens), but these organisms are not a substitute for counting the actual pathogen concentrations in water. Another difficult point is sometimes reconciling the approaches between microbial risk assessment and epidemiological studies. The approaches, although potentially harmonious, often lack coordination. However, epidemio-logical studies can be extremely useful in identifying risks and every effort must be made to reconcile epidemiological and microbial risk determinations. An accessible international database of pathogen occurrence in water would be extremely useful. Making data of this kind more widely available would inform microbial risk assessment and risk management and enable implementation of public health initiatives that could save lives. Microbial risk assessment of water is an evolving field, and a great deal of novel research is needed to fill gaps in the understanding of human exposure to pathogens in water, to determine the current rate of waterborne illness, the dose-response relationships between pathogens and human health, and the role of waterborne opportunistic pathogens in human health. Since the field of microbial risk assessment relies on the skill sets of professionals in many disciplines, education and research in microbial risk should be interdisciplinary and collaborative. Finally, there is a need to effectively communicate microbial risk principles to consumers and the general public because a lack of information can have serious implications for communities. Risk managers and public health authorities need to make increased efforts to educate the public on everyday matters, like the need to change the filters in water purifiers and the need to upgrade and maintain water and wastewater treatment facilities. For all the challenges that still exist to advancing the science and application of microbial risk assessment, the effort to do so will offer many benefits. A primary advantage of the iterative process is that it helps to identify data gaps and uncertainties, and it focuses limited research resources towards key parameters that will improve the understanding of risk. When applied correctly, microbial risk assessment can help guide water quality management decisions; identify sensitive subpopulations, spot critical pathogen control points, and aid in assessment of the adequacy of drinking water treatment barriers.",2020-08-19 +33684889,"Determination of the g-, hyperfine coupling- and zero-field splitting tensors in EPR and ENDOR using extended Matlab codes.","The analysis of single crystal electron magnetic resonance (EMR) data has traditionally been performed using software in programming languages that are difficult to update, are not easily available, or are obsolete. By using a modern script-language with tools for the analysis and graphical display of the data, three MatLab® codes were prepared to compute the g, zero-field splitting (zfs) and hyperfine coupling (hfc) tensors from roadmaps obtained by EPR or ENDOR measurements in three crystal planes. Schonland's original method was used to compute the g- and hfc -tensors by a least-squares fit to the experimental data in each plane. The modifications required for the analysis of the zfs of radical pairs with S = 1 were accounted for. A non-linear fit was employed in a second code to obtain the hfc -tensor from EPR measurements, taking the nuclear Zeeman interaction of an I = ½ nucleus into account. A previously developed method to calculate the g- and hfc -tensors by a simultaneous linear fit to all data was used in the third code. The validity of the methods was examined by comparison with results obtained experimentally, and by roadmaps computed by exact diagonalization. The probable errors were estimated using functions for regression analysis available in MatLab. The software will be published at https://doi.org/10.17632/ps24sw95gz.1, Input and output examples presented in this work can also be downloaded from https://old.liu.se/simarc/downloads?l=en.",2021-02-23 +34164647,DUI: the drug use insights web server. ,"Substance abuse constitutes one of the major contemporary health epidemics. Recently, the use of social media platforms has garnered interest as a novel source of data for drug addiction epidemiology. Often however, the language used in such forums comprises slang and jargon. Currently, there are no publicly available resources to automatically analyse the esoteric language-use in the social media drug-use sub-culture. This lacunae introduces critical challenges for interpreting, sensemaking and modeling of addiction epidemiology using social media. Drug-Use Insights (DUI) is a public and open-source web application to address the aforementioned deficiency. DUI is underlined by a hierarchical taxonomy encompassing 108 different addiction related categories consisting of over 9,000 terms, where each category encompasses a set of semantically related terms. These categories and terms were established by utilizing thematic analysis in conjunction with term embeddings generated from 7,472,545 Reddit posts made by 1,402,017 redditors. Given post(s) from social media forums such as Reddit and Twitter, DUI can be used foremost to identify constituent terms related to drug use. Furthermore, the DUI categories and integrated visualization tools can be leveraged for semantic- and exploratory analysis. To the best of our knowledge, DUI utilizes the largest number of substance use and recovery social media posts used in a study and represents the first significant online taxonomy of drug abuse terminology. The DUI web server and source code are available at: http://haddock9.sfsu.edu/insight/. Supplementary data are available at Bioinformatics online.",2021-06-23 +34950688,A Preparatory Study for a Randomized Controlled Trial of Dietary Fiber Intake During Adult Pelvic Radiotherapy.,"Background: Patients undergoing pelvic radiotherapy are often advised to omit fiber-rich foods from their diet to reduce the adverse effects of treatment. Scientific evidence supporting this recommendation is lacking, and recent studies on animals and humans have suggested that there is a beneficial effect of dietary fiber for the alleviation of symptoms. Randomized controlled studies on dietary fiber intake during pelvic radiotherapy of sufficient size and duration are needed. As preparation for such a large-scale study, we evaluated the feasibility, compliance, participation rate, and logistics and report our findings here in this preparatory study. Methods: In this preparatory study of a fiber intervention trial, Swedish gynecological cancer patients scheduled for radiotherapy were recruited between January 2019 and August 2020. During the intervention, the participants filled out questionnaires and used an application. They also consumed a fiber supplement at first in powder form, later in capsules. Blood- and fecal samples were collected. The study is registered in clinicaltrials.gov (https://clinicaltrials.gov/ct2/show/NCT04534075?cond=fidura&draw=2&rank=1). Results: Among 136 approached patients, 57 started the study and the participation rate for primary outcomes was 63% (third blood sample) and 65% (third questionnaire). Barely half of the participants provided fecal samples. Providing concise and relevant information to the patients at the right time was crucial in getting them to participate and stay in the study. The most common reasons for declining participation or dropping out were the expected burden of radiotherapy or acute side effects. Tailoring the ambition level to each patient concerning the collection of data beyond the primary endpoints was an important strategy to keep the dropout rate at an acceptable level. Using capsules rather than psyllium in powder form made it much easier to document intake and to create a control group. During the course of the preparatory study, we improved the logistics and for the last 12 participants included, the participation rate was 100% for the earliest primary outcome. Conclusion: A variety of adjustments in this preparatory study resulted in an improved participation rate, which allowed us to set a final protocol and proceed with the main study.",2021-12-07 +34266288,CGRP measurements in human plasma - a methodological study.,"

Background

Calcitonin gene-related peptide plasma levels have frequently been determined as a biomarker for primary headaches. However, published data is often inconsistent resulting from different methods that are not precisely described in most studies.

Methods

We applied a well-proven enzyme-linked immunosorbent assay to measure calcitonin gene-related peptide concentrations in human blood plasma, we modified parameters of plasma preparation and protein purification and used calcitonin gene-related peptide-free plasma for standard solutions, which are described in detail.

Results

Calcitonin gene-related peptide levels are stable in plasma with peptidase inhibitors and after deep-freezing. Calcitonin gene-related peptide standard solutions based on synthetic intercellular fluid or pooled plasma with pre-absorbed calcitonin gene-related peptide influenced the measurements but yielded both comprehensible results. In a sample of 56 healthy subjects the calcitonin gene-related peptide plasma levels varied considerably from low (<50 pg/mL) to very high (>500 pg/mL) values. After a 12-hour exposure of these subjects to normobaric hypoxia the individual calcitonin gene-related peptide levels remained stable.

Conclusion

Buffering with peptidase inhibitors and immediate freezing or processing of plasma samples is essential to achieve reliable measurements. Individuals show considerable differences and partly high calcitonin gene-related peptide plasma levels without detectable pathological reason. Thus plasma measurements are suited particularly to follow calcitonin gene-related peptide levels in longitudinal studies.The use of data for this study was approved by the Ethics Committee of the MedicalUniversity of Innsbruck (https://www.i-med.ac.at/ethikkommission/; EK Nr: 1242/2017).",2021-07-16 +33062408,The role of m6A-related genes in the prognosis and immune microenvironment of pancreatic adenocarcinoma.,"

Background

Pancreatic adenocarcinoma (PAAD) is among the most lethal diseases and has a dismal prognosis; however, efficient treatment is currently limited. Several studies have observed epigenetic variation during tumorigenesis, suggesting the potential role of RNA methylation, especially N6-methyladenosine (m6A) modification, as a novel epigenetic modification mediating PAAD prognosis.

Methods

The expression levels of m6A-related genes were downloaded from The Cancer Genome Atlas-Pancreatic Adenocarcinoma (TCGA) and Genotype-Tissue Expression (GTEx) projects, and the findings were validated in four Expression Omnibus (GEO) datasets. A predictive model was constructed using a lasso regression and evaluated by a survival analysis and receiver operating characteristic curve. Consensus clustering identified two distinct subgroups with different immune activity signatures based on the expression pattern of m6A-related genes. The relationship between the mutation state of m6A-related genes and infiltration of immune cells was established and visualized using Tumor Immune Estimation Resource (https://cistrome.shinyapps.io/timer/).

Results

Fourteen of twenty-one m6A-related genes were differentially expressed between PAAD and normal tissues in TCGA-GTEx cohort. Among these genes, HNRNPC, IGF2BP2 and YTHDF1 were further validated in four GEO datasets. Moreover, an m6A-based model exhibited moderate accuracy in predicting overall survival in PAAD samples. Additionally, potential m6A modification targets were screened by selecting genes from a set of 23,391 genes that not only harbored the most m6A-modified sites but also showed a robust correlation with PAAD survival. Moreover, we correlated the expression level of m6A-related genes with the immune microenvironment of pancreatic cancer for the first time. Specifically, both arm-level gain and deletion of ALKBH5 decreased the infiltration of CD8+T cells (P < 0.05 and P < 0.01, respectively).

Conclusion

Collectively, our findings suggest a novel anticancer strategy for restoring balanced RNA methylation in tumor cells and guide clinical physicians in developing a new practical approach for considering the impact of related genes on prognosis.",2020-09-28 +34546076,Monitoring Drinking Water Quality in Nationally Representative Household Surveys in Low- and Middle-Income Countries: Cross-Sectional Analysis of 27 Multiple Indicator Cluster Surveys 2014-2020.,"

Background

The 2030 Sustainable Development Goals (SDGs) set an ambitious new benchmark for safely managed drinking water services (SMDWs), but many countries lack national data on the availability and quality of drinking water.

Objectives

We quantified the availability and microbiological quality of drinking water, monitored SMDWs, and examined risk factors for Escherichia coli (E. coli) contamination in 27 low-and middle-income countries (LMICs).

Methods

A new water quality module for household surveys was implemented in 27 Multiple Indicator Cluster Surveys. Teams used portable equipment to measure E. coli at the point of collection (PoC, n=61,170) and at the point of use (PoU, n=64,900) and asked respondents about the availability and accessibility of drinking water. Households were classified as having SMDW services if they used an improved water source that was free of E. coli contamination at PoC, accessible on premises, and available when needed. Compliance with individual SMDW criteria was also assessed. Modified Poisson regression was used to explore household and community risk factors for E. coli contamination.

Results

E. coli was commonly detected at the PoC (range 16-90%) and was more likely at the PoU (range 19-99%). On average, 84% of households used an improved drinking water source, and 31% met all of the SMDW criteria. E. coli contamination was the primary reason SMDW criteria were not met (15 of 27 countries). The prevalence of E. coli in PoC samples was lower among households using improved water sources [risk ratio (RR)=0.74; 95% confidence interval (CI): 0.64, 0.85] but not for households with water accessible on premises (RR=0.99; 95% CI: 0.94, 1.05) or available when needed (RR=0.95; 95% CI: 0.88, 1.02). E. coli contamination of PoU samples was less common for households in the richest vs. poorest wealth quintile (RR=0.70; 95% CI: 0.55, 0.88) and in communities with high (>75%) improved sanitation coverage (RR=0.94; 95% CI: 0.90, 0.97). Livestock ownership (RR=1.08; 95% CI: 1.04, 1.13), rural vs. urban residence (RR=1.10; 95% CI: 1.04, 1.16), and wet vs. dry season sampling (RR=1.07; 95% CI: 1.01, 1.15) were positively associated with contamination at the PoU.

Discussion

Cross-sectional water quality data can be collected in household surveys and can be used to assess inequalities in service levels, to track the SDG indicator of SMDWs, and to examine risk factors for contamination. There is an urgent need for better risk management to reduce widespread exposure to fecal contamination through drinking water services in LMICs. https://doi.org/10.1289/EHP8459.",2021-09-21 +34217324,An easy-to-operate web-based calculator for predicting the progression of chronic kidney disease.,"

Background

This study aimed to establish and validate an easy-to-operate novel scoring system based on simple and readily available clinical indices for predicting the progression of chronic kidney disease (CKD).

Methods

We retrospectively evaluated 1045 eligible CKD patients from a publicly available database. Factors included in the model were determined by univariate and multiple Cox proportional hazard analyses based on the training set.

Results

Independent prognostic factors including etiology, hemoglobin level, creatinine level, proteinuria, and urinary protein/creatinine ratio were determined and contained in the model. The model showed good calibration and discrimination. The area under the curve (AUC) values generated to predict 1-, 2-, and 3-year progression-free survival in the training set were 0.947, 0.931, and 0.939, respectively. In the validation set, the model still revealed excellent calibration and discrimination, and the AUC values generated to predict 1-, 2-, and 3-year progression-free survival were 0.948, 0.933, and 0.915, respectively. In addition, decision curve analysis demonstrated that the model was clinically beneficial. Moreover, to visualize the prediction results, we established a web-based calculator ( https://ncutool.shinyapps.io/CKDprogression/ ).

Conclusion

An easy-to-operate model based on five relevant factors was developed and validated as a conventional tool to assist doctors with clinical decision-making and personalized treatment.",2021-07-03 +34224878,REVA as A Well-curated Database for Human Expression-modulating Variants.,"More than 90% of disease- and trait-associated human variants are noncoding. By systematically screening multiple large-scale studies, we compiled REVA, a manually curated database for over 11.8 million experimentally tested noncoding variants with expression-modulating potentials. We provided 2424 functional annotations that could be used to pinpoint the plausible regulatory mechanism of these variants. We further benchmarked multiple state-of-the-art computational tools and found that their limited sensitivity remains a serious challenge for effective large-scale analysis. REVA provides high-quality experimentally tested expression-modulating variants with extensive functional annotations, which will be useful for users in the noncoding variant community. REVA is freely available at http://reva.gao-lab.org.",2021-07-03 +33998366,Perioperative Anaphylaxis from a Perspective of Temperature.,"Perioperative anaphylaxis poses a special challenge due to its unique condition with the additive effects of surgery and anesthesia, which tends to be more difficult to recognize, diagnose, and manage, resulting in potentially fatal outcomes. Appropriate prevention and treatment benefits patients and reduces mortality and morbidity. Significant body temperature changes occur during anaphylaxis and/or anesthesia, which correlates with the outcomes. During the perioperative period, body temperature and anaphylaxis bidirectionally interact with each other, and anaphylaxis is generally deteriorated by hypothermia, which is usually required in cardiac surgeries. Perioperative factors, such as surgery and anesthesia, affect body temperature and anaphylaxis. The complicated role of body temperature and its application in the diagnosis of perioperative anaphylaxis and prediction of the outcomes are still unclear. To date, a profile of body temperature change during perioperative anaphylaxis is lacking, which requires further study. This literature review was conducted with updated data on perioperative anaphylaxis from the perspective of temperature as a component aiming to bring attention to and offer some cues for improving perioperative prevention and management for perioperative medical teams.Supplemental data for this article is available online at https://doi.org/10.1080/08941939.2021.1922553 .",2021-05-17 +34595352,OptM: estimating the optimal number of migration edges on population trees using Treemix.,"The software Treemix has become extensively used to estimate the number of migration events, or edges (m), on population trees from genome-wide allele frequency data. However, the appropriate number of edges to include remains unclear. Here, I show that an optimal value of m can be inferred from the second-order rate of change in likelihood (Δm) across incremental values of m. Repurposed from its original use to estimate the number of population clusters in the software StructureK), I show using simulated populations that Δm performs equally as well as current recommendations for Treemix. A demonstration of an empirical dataset from domestic dogs indicates that this method may be preferable in large, complex population histories and can prioritize migration events for subsequent investigation. The method has been implemented in a freely available R package called ""OptM"" and as a web application (https://rfitak.shinyapps.io/OptM/) to interface directly with the output files of Treemix.",2021-09-16 +33970822,Health Information National Trends Survey (HINTS.gov).,"As an initiative of the National Cancer Institute (NCI), the Health Information National Trends Survey (HINTS) is a recurring, cross-sectional national survey to assess how adults in the United States access and use health information, their perceptions of health risk, and their participation in health-promoting or health-risking behaviors with an emphasis on cancer-related topics. The HINTS website () gives free public access to all HINTS survey materials, data, and analyses. HINTS data can be used to inform the design and evaluation of cancer control and other health related communication programs, and serve as a starting point for further independent research. This article provides an overview of the resources available at () and suggests factors to bear in mind while navigating the site.",2021-04-01 +34116107,CtNorm: Real time PCR cycle of threshold (Ct) normalization algorithm.,"In relative quantification with Real Time PCR (qRT-PCR,), accurate analysis requires equal amplification efficiency for both genes (Gene of interest and reference gene) and equal threshold values for all the samples. In this quantification method the expression level in treated samples will be calculated in comparison to the control group. We conducted the present study to design an algorithm for converting the data obtained from different runs containing identical standard samples into one run with the same amplification efficiency and threshold value. For this purpose, two formulas were designed; one to convert the amplification efficiency of the each run to 100%, and the other one for converting data from different runs into one run. Utilizing these two formulas, an algorithm was developed and named CtNorm. The online version of CtNorm algorithm is available at http://ctnorm.sums.ac.ir/. We used qRT-PCR technique to validate the accuracy of the designed algorithm for the normalization of four different human internal control genes. Normalizing the Ct values obtained from separate runs with the CtNorm algorithm has eliminated the differences and the average of the Ct values has become similar to the condition in which all the samples were amplified in a single run. The CtNorm algorithm could be utilized for equalizing the Ct values of several qRT-PCR runs with the same standard samples. The algorithm has also the ability to convert the amplification efficiency to 100% which is useful in absolute and relative quantification.",2021-06-08 +33262425,Transfer learning with chest X-rays for ER patient classification.,One of the challenges with urgent evaluation of patients with acute respiratory distress syndrome (ARDS) in the emergency room (ER) is distinguishing between cardiac vs infectious etiologies for their pulmonary findings. We conducted a retrospective study with the collected data of 171 ER patients. ER patient classification for cardiac and infection causes was evaluated with clinical data and chest X-ray image data. We show that a deep-learning model trained with an external image data set can be used to extract image features and improve the classification accuracy of a data set that does not contain enough image data to train a deep-learning model. An analysis of clinical feature importance was performed to identify the most important clinical features for ER patient classification. The current model is publicly available with an interface at the web link: http://nbttranslationalresearch.org/ .,2020-12-01 +31970096,Online Advanced Analytical Service: Profiles for Dengue Hemorrhagic Fever Transmission in Southern Thailand.,"

Background

Southern Thailand has the highest Dengue Hemorrhagic Fever (DHF) incidence and fatality rate in Thailand. Geographic Information Systems (GIS) technology and spatial analysis techniques are powerful tools to describe epidemiological patterns. The aim of this study was to develop an Online Advanced Analytical Service: Profiles for Dengue Hemorrhagic Fever Transmission (OSD) in Southern Thailand.

Methods

The system was developed using JavaServer Pages (JSP) and Database Management System (DBMS) with Structured Query Language (SQL) technology as the web database tool for data entry and data access, web Mathematica technology for data analysis and Google Maps™ API technology for online data display as the map service implementing GIS technology.

Results

The OSD system has been available online at URL http://www.s-cm.co/dengue . Users performed data entry using the web-service with login by social network (i.e. Facebook) account, used data analysis tools with online real-time statistical analysis and data display with transparent color circles overlaid on Google Maps™.

Conclusion

The OSD system display represents the distribution of DHF cases with spatial information. This system enables health planners to provide interventions for DHF focusing on prevention, control, and strategic planning.",2019-11-01 +31829415,Expanding the capillary electrophoresis-based glucose unit database of the GUcal app.,GUcal is a standalone application for automatically calculating the glucose unit (GU) values for separated N-glycan components of interest in an electropherogram and suggests their tentative structures by utilizing an internal database. We have expanded the original database of GUcal by integrating all publicly available capillary electrophoresis (CE) data in the GlycoStore collection (https://www.glycostore.org) and with in-house measured GU values. The GUcal app is freely available online (https://www.gucal.hu) and readily facilitates CE-based high throughput GU value determination for first line structural elucidation.,2020-05-01 +34678076,"Workplace Leave and Breastfeeding Duration Among Postpartum Women, 2016-2018.","Objectives. To examine associations of workplace leave length with breastfeeding initiation and continuation at 1, 2, and 3 months. Methods. We analyzed 2016 to 2018 data for 10 sites in the United States from the Pregnancy Risk Assessment Monitoring System, a site-specific, population-based surveillance system that samples women with a recent live birth 2 to 6 months after birth. Using multivariable logistic regression, we examined associations of leave length (< 3 vs ≥ 3 months) with breastfeeding outcomes. Results. Among 12 301 postpartum women who planned to or had returned to the job they had during pregnancy, 42.1% reported taking unpaid leave, 37.5% reported paid leave, 18.2% reported both unpaid and paid leave, and 2.2% reported no leave. Approximately two thirds (66.2%) of women reported taking less than 3 months of leave. Although 91.2% of women initiated breastfeeding, 81.2%, 72.1%, and 65.3% of women continued breastfeeding at 1, 2, and 3 months, respectively. Shorter leave length (< 3 months), whether paid or unpaid, was associated with lower prevalence of breastfeeding at 2 and 3 months compared with 3 or more months of leave. Conclusions. Women with less than 3 months of leave reported shorter breastfeeding duration than did women with 3 or more months of leave. (Am J Public Health. 2021;111(11):2036-2045. https://doi.org/10.2105/AJPH.2021.306484).",2021-10-22 +28832569,SweGen: a whole-genome data resource of genetic variability in a cross-section of the Swedish population.,"Here we describe the SweGen data set, a comprehensive map of genetic variation in the Swedish population. These data represent a basic resource for clinical genetics laboratories as well as for sequencing-based association studies by providing information on genetic variant frequencies in a cohort that is well matched to national patient cohorts. To select samples for this study, we first examined the genetic structure of the Swedish population using high-density SNP-array data from a nation-wide cohort of over 10 000 Swedish-born individuals included in the Swedish Twin Registry. A total of 1000 individuals, reflecting a cross-section of the population and capturing the main genetic structure, were selected for whole-genome sequencing. Analysis pipelines were developed for automated alignment, variant calling and quality control of the sequencing data. This resulted in a genome-wide collection of aggregated variant frequencies in the Swedish population that we have made available to the scientific community through the website https://swefreq.nbis.se. A total of 29.2 million single-nucleotide variants and 3.8 million indels were detected in the 1000 samples, with 9.9 million of these variants not present in current databases. Each sample contributed with an average of 7199 individual-specific variants. In addition, an average of 8645 larger structural variants (SVs) were detected per individual, and we demonstrate that the population frequencies of these SVs can be used for efficient filtering analyses. Finally, our results show that the genetic diversity within Sweden is substantial compared with the diversity among continental European populations, underscoring the relevance of establishing a local reference data set.",2017-08-23 +34153027,pyTFM: A tool for traction force and monolayer stress microscopy.,"Cellular force generation and force transmission are of fundamental importance for numerous biological processes and can be studied with the methods of Traction Force Microscopy (TFM) and Monolayer Stress Microscopy. Traction Force Microscopy and Monolayer Stress Microscopy solve the inverse problem of reconstructing cell-matrix tractions and inter- and intra-cellular stresses from the measured cell force-induced deformations of an adhesive substrate with known elasticity. Although several laboratories have developed software for Traction Force Microscopy and Monolayer Stress Microscopy computations, there is currently no software package available that allows non-expert users to perform a full evaluation of such experiments. Here we present pyTFM, a tool to perform Traction Force Microscopy and Monolayer Stress Microscopy on cell patches and cell layers grown in a 2-dimensional environment. pyTFM was optimized for ease-of-use; it is open-source and well documented (hosted at https://pytfm.readthedocs.io/) including usage examples and explanations of the theoretical background. pyTFM can be used as a standalone Python package or as an add-on to the image annotation tool ClickPoints. In combination with the ClickPoints environment, pyTFM allows the user to set all necessary analysis parameters, select regions of interest, examine the input data and intermediary results, and calculate a wide range of parameters describing forces, stresses, and their distribution. In this work, we also thoroughly analyze the accuracy and performance of the Traction Force Microscopy and Monolayer Stress Microscopy algorithms of pyTFM using synthetic and experimental data from epithelial cell patches.",2021-06-21 +30598101,Constructing tissue-specific transcriptional regulatory networks via a Markov random field.,"

Background

Recent advances in sequencing technologies have enabled parallel assays of chromatin accessibility and gene expression for major human cell lines. Such innovation provides a great opportunity to decode phenotypic consequences of genetic variation via the construction of predictive gene regulatory network models. However, there still lacks a computational method to systematically integrate chromatin accessibility information with gene expression data to recover complicated regulatory relationships between genes in a tissue-specific manner.

Results

We propose a Markov random field (MRF) model for constructing tissue-specific transcriptional regulatory networks via integrative analysis of DNase-seq and RNA-seq data. Our method, named CSNets (cell-line specific regulatory networks), first infers regulatory networks for individual cell lines using chromatin accessibility information, and then fine-tunes these networks using the MRF based on pairwise similarity between cell lines derived from gene expression data. Using this method, we constructed regulatory networks specific to 110 human cell lines and 13 major tissues with the use of ENCODE data. We demonstrated the high quality of these networks via comprehensive statistical analysis based on ChIP-seq profiles, functional annotations, taxonomic analysis, and literature surveys. We further applied these networks to analyze GWAS data of Crohn's disease and prostate cancer. Results were either consistent with the literature or provided biological insights into regulatory mechanisms of these two complex diseases. The website of CSNets is freely available at http://bioinfo.au.tsinghua.edu.cn/jianglab/CSNETS/ .

Conclusions

CSNets demonstrated the power of joint analysis on epigenomic and transcriptomic data towards the accurate construction of gene regulatory network. Our work provides not only a useful resource of regulatory networks to the community, but also valuable experiences in methodology development for multi-omics data integration.",2018-12-31 +31286135,VeriNA3d: an R package for nucleic acids data mining.,"

Summary

veriNA3d is an R package for the analysis of nucleic acids structural data, with an emphasis in complex RNA structures. In addition to single-structure analyses, veriNA3d also implements functions to handle whole datasets of mmCIF/PDB structures that could be retrieved from public/local repositories. Our package aims to fill a gap in the data mining of nucleic acids structures to produce flexible and high throughput analysis of structural databases.

Availability and implementation

http://mmb.irbbarcelona.org/gitlab/dgallego/veriNA3d.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31788512,An open dataset about georeferenced harmonized national agricultural censuses and surveys of seven mediterranean countries.,"The dataset presented in this paper is based on data gathered from several countries within the West Mediterranean area at the highest detailed scale regarding official statistics, with the aim of investigating land and food systems dynamics in the Mediterranean. Characterizing land and food systems dynamics is critical to reveal insights regarding interactions between current dynamics of agricultural practices, species diversity and local food systems. These interactions were analyzed, at multiple spatial scales, on a large part of the Mediterranean basin within the DIVERCROP Project (https://divercropblog.wordpress.com/). An harmonized dataset with the desired characteristics was not readily available from official sources and, therefore, it was necessary to build an ad hoc database that could: (1) cover the Mediterranean areas of seven countries, namely Algeria (DZ), France (FR), Italy (IT), Malta (MT), Portugal (PT), Spain (ES) and Tunisia (TN); (2) contain data referred to the most disaggregated level of administrative units for which data is available in each country; (3) contain data referred to at least two time points, including the latest available data, in each country; (4) contain data on number of farm holdings, on the physical areas covered by the main annual and permanent crops and on livestock (number of heads); (5) contain a primary key that allows joining the census and surveys database to a geographical dataset of administrative units covering the entire area; (6) have an associated complete geographical dataset of administrative units, to allow spatial data analyses.",2019-11-08 +32487232,YouTube™ as a source of information for Candida auris infection: a systematic review.,"BACKGROUND:Candida auris is a novel Candida species, and has emerged globally as a multidrug-resistant health care-associated fungal pathogen. YouTube™ (http://www.youtube.com) as the largest free video-sharing website is increasingly used to search health information. Thus, the aim of this study was to evaluate the content, reliability and quality of YouTube™ videos regarding Candida auris infection, and to identify whether it is a useful resource for people. METHODS:The YouTube™ was used to search systematically for videos using the keywords: ""Candida auris infection"" and ""Candida auris"". Strict inclusion and exclusion criteria were used to select the videos. The videos were reviewed and scored by two independent reviewers and recorded the ""title"", ""length"", ""views"", ""comments"", ""dislike"", ""like"", ""posted days"" and ""category of videos"". The videos were categorized as ""poor"", ""good"" and ""excellent"" by the score. The DISCERN tool was used to assess the reliability of the YouTube™ videos. RESULTS:Seventy-six videos were included in final analysis in our study. Most videos (59.2%, 55/76) had better quality. There were no statistically significant differences between groups in respect of the number of likes, dislikes, views, comments, percentage positivity, likebility, view rate and viewers' interaction. Length and posted days were significantly associated with the classification. The videos were categorized as ""educational video"", ""new report"", ""personal experience and blog entertainment"" and ""interview"". Significant differences were found in the source of videos and the characteristics of the individuals appearing in a video between the groups. CONCLUSION:YouTube™ has striking potential to be an effective user-friendly learning interface for people to obtain information of Candida auris infection.",2020-06-01 +34593810,A curated dataset for data-driven turbulence modelling.,"The recent surge in machine learning augmented turbulence modelling is a promising approach for addressing the limitations of Reynolds-averaged Navier-Stokes (RANS) models. This work presents the development of the first open-source dataset, curated and structured for immediate use in machine learning augmented corrective turbulence closure modelling. The dataset features a variety of RANS simulations with matching direct numerical simulation (DNS) and large-eddy simulation (LES) data. Four turbulence models are selected to form the initial dataset: k-ε, k-ε-ϕt-f, k-ω, and k-ω SST. The dataset consists of 29 cases per turbulence model, for several parametrically sweeping reference DNS/LES cases: periodic hills, square duct, parametric bumps, converging-diverging channel, and a curved backward-facing step. At each of the 895,640 points, various RANS features with DNS/LES labels are available. The feature set includes quantities used in current state-of-the-art models, and additional fields which enable the generation of new feature sets. The dataset reduces effort required to train, test, and benchmark new corrective RANS models. The dataset is available at  https://doi.org/10.34740/kaggle/dsv/2637500 .",2021-09-30 +33847541,A method for systematically ranking therapeutic drug candidates using multiple uncertain screening criteria.,"Multiple different screening tests for candidate leads in drug development may often yield conflicting or ambiguous results, sometimes making the selection of leads a nontrivial maximum-likelihood ranking problem. Here, we employ methods from the field of multiple criteria decision making (MCDM) to the problem of screening candidate antibody therapeutics. We employ the SMAA-TOPSIS method to rank a large cohort of antibodies using up to eight weighted screening criteria, in order to find lead candidate therapeutics for Alzheimer's disease, and determine their robustness to both uncertainty in screening measurements, as well as uncertainty in the user-defined weights of importance attributed to each screening criterion. To choose lead candidates and measure the confidence in their ranking, we propose two new quantities, the Retention Probability and the Topness, as robust measures for ranking. This method may enable more systematic screening of candidate therapeutics when it becomes difficult intuitively to process multi-variate screening data that distinguishes candidates, so that additional candidates may be exposed as potential leads, increasing the likelihood of success in downstream clinical trials. The method properly identifies true positives and true negatives from synthetic data, its predictions correlate well with known clinically approved antibodies vs. those still in trials, and it allows for ranking analyses using antibody developability profiles in the literature. We provide a webserver where users can apply the method to their own data: http://bjork.phas.ubc.ca.",2021-04-13 +31504189,MeLAD: an integrated resource for metalloenzyme-ligand associations.,"

Motivation

Metalloenzymes are attractive targets for therapeutic intervention owing to their central roles in various biological processes and pathological situations. The fast-growing body of structural data on metalloenzyme-ligand interactions is facilitating efficient drug discovery targeting metalloenzymes. However, there remains a shortage of specific databases that can provide centralized, interconnected information exclusive to metalloenzyme-ligand associations.

Results

We created a Metalloenzyme-Ligand Association Database (MeLAD), which is designed to provide curated structural data and information exclusive to metalloenzyme-ligand interactions, and more uniquely, present expanded associations that are represented by metal-binding pharmacophores (MBPs), metalloenzyme structural similarity (MeSIM) and ligand chemical similarity (LigSIM). MeLAD currently contains 6086 structurally resolved interactions of 1416 metalloenzymes with 3564 ligands, of which classical metal-binding, non-classical metal-binding, non-metal-binding and metal water-bridging interactions account for 63.0%, 2.3%, 34.4% and 0.3%, respectively. A total of 263 monodentate, 191 bidentate and 15 tridentate MBP chemotypes were included in MeLAD, which are linked to different active site metal ions and coordination modes. 3726 and 52 740 deductive metalloenzyme-ligand associations by MeSIM and LigSIM analyses, respectively, were included in MeLAD. An online server is provided for users to conduct metalloenzyme profiling prediction for small molecules of interest. MeLAD is searchable by multiple criteria, e.g. metalloenzyme name, ligand identifier, functional class, bioinorganic class, metal ion and metal-containing cofactor, which will serve as a valuable, integrative data source to foster metalloenzyme related research, particularly involved in drug discovery targeting metalloenzymes.

Availability and implementation

MeLAD is accessible at https://melad.ddtmlab.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +34430602,Identification of significant immune-related genes for epilepsy via bioinformatics analysis.,"

Background

Epilepsy is one of the most common neurological disorders, but its underlying mechanism has remained obscure, and the role of immune-related genes (IRGs) in epilepsy have not yet been investigated. Therefore, in this study, we explored the association between IRGs and epilepsy.

Methods

An IRG list was collected from the ImmPort database. The gene expression profiles of GSE143272 were collected from the Gene Expression Omnibus (GEO) database (https://www.ncbi.nlm.nih.gov/geo/). Differentially expressed genes (DEGs) between epilepsy and normal samples were analyzed, and the intersections between IRGs and DEGs were identified using the VennDiagram package, with the intersected genes subjected to further analysis. Enrichment function for intersected genes were performed, constructed a protein-protein interaction (PPI) network via the Search Tool for the Retrieval of Interacting Genes/Proteins (STRING) database, and the hub genes (top 10) of the PPI network were calculated by the cytoHubba plug-in in Cytoscape. The top correlated genes were selected to perform correlation analysis with immune cells infiltration and expression levels. Finally, we performed validation of the top correlated genes transcriptional expression levels using an animal model.

Results

There were a total of 245 DEGs detected in GSE143272, among which 143 were upregulated and 102 downregulated genes in epilepsy. A total of 44 differential IRGs were obtained via intersection of DEGs and IRGs. Enrichment function analysis of DEGs showed that they played a significant role in immune response. The gene CXCL1 was the most correlated with other differentially expressed IRGs via the PPI network. The results of immune cell infiltration analysis indicated that epilepsy patients had higher activated mast cells infiltration (P=0.021), but lower activated CD4 memory T cells (P=0.001), resting CD4 memory T cells (P=0.011), and gamma delta T cells (P=0.038) infiltration. It was revealed that CXCL1 and activated mast cells (R=0.25, P=0.019) and neutrophils (R=0.3, P=0.0043), and a negative correlation with T cells gamma delta (R=-0.25, P=0.018). The levels of CXCL1 expression were significantly lower in epilepsy patients than those in normal samples.

Conclusions

In this study, the results showed that IRGs such as CXCL1 have a significant influence on epilepsy via regulation of immune cells infiltration.",2021-07-01 +34244719,TIDB: a comprehensive database of trained immunity. ,"Trained immunity is a newly emerging concept that defines the ability of the innate immune system to form immune memory and provide long-lasting protection against previously encountered antigens. Accumulating evidence reveals that trained immunity not only has broad benefits to host defense but is also harmful to the host in chronic inflammatory diseases. However, all trained immunity-related information is scattered in the literature and thus is difficult to access. Here, we describe Trained Immunity DataBase (TIDB), a comprehensive database that provides well-studied trained immunity-related genes from human, rat and mouse as well as the related literature evidence. Moreover, TIDB also provides three modules to analyze the function of the trained-immunity-related genes of interest, including Reactome pathway over-representation analysis, Gene Ontology enrichment analysis and protein-protein interaction subnetwork reconstruction. We believe TIDB will help developing valuable strategies for vaccine design and immune-mediated disease therapy. Database URL: http://www.ieom-tm.com/tidb.",2021-07-01 +34048578,GalaxyHeteromer: protein heterodimer structure prediction by template-based and ab initio docking.,"Protein-protein interactions play crucial roles in diverse biological processes, including various disease progressions. Atomistic structural details of protein-protein interactions may provide important information that can facilitate the design of therapeutic agents. GalaxyHeteromer is a freely available automatic web server (http://galaxy.seoklab.org/heteromer) that predicts protein heterodimer complex structures from two subunit protein sequences or structures. When subunit structures are unavailable, they are predicted by template- or distance-prediction-based modelling methods. Heterodimer complex structures can be predicted by both template-based and ab initio docking, depending on the template's availability. Structural templates are detected from the protein structure database based on both the sequence and structure similarities. The templates for heterodimers may be selected from monomer and homo-oligomer structures, as well as from hetero-oligomers, owing to the evolutionary relationships of heterodimers with domains of monomers or subunits of homo-oligomers. In addition, the server employs one of the best ab initio docking methods when heterodimer templates are unavailable. The multiple heterodimer structure models and the associated scores, which are provided by the web server, may be further examined by user to test or develop functional hypotheses or to design new functional molecules.",2021-07-01 +34023906,MyCLADE: a multi-source domain annotation server for sequence functional exploration.,"The ever-increasing number of genomic and metagenomic sequences accumulating in our databases requires accurate approaches to explore their content against specific domain targets. MyCLADE is a user-friendly webserver designed for targeted functional profiling of genomic and metagenomic sequences based on a database of a few million probabilistic models of Pfam domains. It uses the MetaCLADE multi-source domain annotation strategy, modelling domains based on multiple probabilistic profiles. MyCLADE takes a list of protein sequences and possibly a target set of domains/clans as input and, for each sequence, it provides a domain architecture built from the targeted domains or from all Pfam domains. It is linked to the Pfam and QuickGO databases in multiple ways for easy retrieval of domain and clan information. E-value, bit-score, domain-dependent probability scores and logos representing the match of the model with the sequence are provided to help the user to assess the quality of each annotation. Availability and implementation: MyCLADE is freely available at http://www.lcqb.upmc.fr/myclade.",2021-07-01 +34019664,DeepGOWeb: fast and accurate protein function prediction on the (Semantic) Web.,"Understanding the functions of proteins is crucial to understand biological processes on a molecular level. Many more protein sequences are available than can be investigated experimentally. DeepGOPlus is a protein function prediction method based on deep learning and sequence similarity. DeepGOWeb makes the prediction model available through a website, an API, and through the SPARQL query language for interoperability with databases that rely on Semantic Web technologies. DeepGOWeb provides accurate and fast predictions and ensures that predicted functions are consistent with the Gene Ontology; it can provide predictions for any protein and any function in Gene Ontology. DeepGOWeb is freely available at https://deepgo.cbrc.kaust.edu.sa/.",2021-07-01 +33978743,InterEvDock3: a combined template-based and free docking server with increased performance through explicit modeling of complex homologs and integration of covariation-based contact maps.,"The InterEvDock3 protein docking server exploits the constraints of evolution by multiple means to generate structural models of protein assemblies. The server takes as input either several sequences or 3D structures of proteins known to interact. It returns a set of 10 consensus candidate complexes, together with interface predictions to guide further experimental validation interactively. Three key novelties were implemented in InterEvDock3 to help obtain more reliable models: users can (i) generate template-based structural models of assemblies using close and remote homologs of known 3D structure, detected through an automated search protocol, (ii) select the assembly models most consistent with contact maps from external methods that implement covariation-based contact prediction with or without deep learning and (iii) exploit a novel coevolution-based scoring scheme at atomic level, which leads to significantly higher free docking success rates. The performance of the server was validated on two large free docking benchmark databases, containing respectively 230 unbound targets (Weng dataset) and 812 models of unbound targets (PPI4DOCK dataset). Its effectiveness has also been proven on a number of challenging examples. The InterEvDock3 web interface is available at http://bioserv.rpbs.univ-paris-diderot.fr/services/InterEvDock3/.",2021-07-01 +33941712,Is There a Role for Perioperative Pelvic Radiotherapy in Surgically Resected Stage IV Rectal Cancer?: A Propensity Score-matched Analysis.,"

Objectives

This study aimed to determine whether perioperative pelvic radiotherapy (RT) improves outcomes in stage IV rectal cancer patients treated with primary surgical resection and systemic chemotherapy and to identify predictive factors for selection of patients for these approaches.

Materials and methods

We searched the Surveillance, Epidemiology, and End Results (SEER) database for patients diagnosed between 2010 and 2015 with stage IV rectal cancer, but without brain or bone metastases. After applying the exclusion criteria, a total of 26,132 patients were included in the analysis; propensity score matching was used to balance their individual characteristics.

Results

Overall, 3283 (12.6%) patients received perioperative RT; the 3-year overall survival (OS) rates were 43.6% in the surgery group and 50.5% in the surgery with RT group (P<0.001). The survival benefit of RT was maintained after propensity score matching and multivariate adjustment (hazard ratio: 0.70; 95% confidence interval: 0.66-0.81; P<0.001). Interaction testing of the prognostic variables showed a significant interaction between RT and the presence of lung metastasis (P<0.001): the benefit of RT was observed only in patients without lung metastases (3 y OS 52.1% vs. 44.1%, P<0.001), but it was observed regardless of liver metastases. In addition, we developed a web-based calculator (http://bit.do/mRC_surv) to provide individualized estimates of OS benefit based on the receipt of perioperative pelvic RT.

Conclusions

Perioperative pelvic RT significantly improved OS rates, especially in patients without lung metastases. We successfully developed a nomogram and web-based calculator that could predict survival benefit with the addition of RT for these patients.",2021-07-01 +33212503,DeepBL: a deep learning-based approach for in silico discovery of beta-lactamases. ,"Beta-lactamases (BLs) are enzymes localized in the periplasmic space of bacterial pathogens, where they confer resistance to beta-lactam antibiotics. Experimental identification of BLs is costly yet crucial to understand beta-lactam resistance mechanisms. To address this issue, we present DeepBL, a deep learning-based approach by incorporating sequence-derived features to enable high-throughput prediction of BLs. Specifically, DeepBL is implemented based on the Small VGGNet architecture and the TensorFlow deep learning library. Furthermore, the performance of DeepBL models is investigated in relation to the sequence redundancy level and negative sample selection in the benchmark dataset. The models are trained on datasets of varying sequence redundancy thresholds, and the model performance is evaluated by extensive benchmarking tests. Using the optimized DeepBL model, we perform proteome-wide screening for all reviewed bacterium protein sequences available from the UniProt database. These results are freely accessible at the DeepBL webserver at http://deepbl.erc.monash.edu.au/.",2021-07-01 +34086933,catRAPID omics v2.0: going deeper and wider in the prediction of protein-RNA interactions.,"Prediction of protein-RNA interactions is important to understand post-transcriptional events taking place in the cell. Here we introduce catRAPID omics v2.0, an update of our web server dedicated to the computation of protein-RNA interaction propensities at the transcriptome- and RNA-binding proteome-level in 8 model organisms. The server accepts multiple input protein or RNA sequences and computes their catRAPID interaction scores on updated precompiled libraries. Additionally, it is now possible to predict the interactions between a custom protein set and a custom RNA set. Considerable effort has been put into the generation of a new database of RNA-binding motifs that are searched within the predicted RNA targets of proteins. In this update, the sequence fragmentation scheme of the catRAPID fragment module has been included, which allows the server to handle long linear RNAs and to analyse circular RNAs. For the top-scoring protein-RNA pairs, the web server shows the predicted binding sites in both protein and RNA sequences and reports whether the predicted interactions are conserved in orthologous protein-RNA pairs. The catRAPID omics v2.0 web server is a powerful tool for the characterization and classification of RNA-protein interactions and is freely available at http://service.tartaglialab.com/page/catrapid_omics2_group along with documentation and tutorial.",2021-07-01 +34023887,DomainViz: intuitive visualization of consensus domain distributions across groups of proteins.,"The prediction of functional domains is typically among the first steps towards understanding the function of new proteins and protein families. There are numerous databases of annotated protein domains that permit researchers to identify domains on individual proteins of interest. However, it is necessary to perform high-throughput domain searches to gain evolutionary insight into the functions of proteins and protein families. Unfortunately, at present, it is difficult to search for, and visualize domain conservation across multiple proteins and/or multiple groups of proteins in an intuitive manner. Here we present DomainViz, a new web-server that streamlines the identification and visualization of domains across multiple protein sequences. Currently, DomainViz uses the well-established PFAM and Prosite databases for domain searching and assembles intuitive, publication-ready 'monument valley' plots (mv-plots) that display the extent of domain conservation along two dimensions: positionality and frequency of occurrence in the input protein sequences. In addition, DomainViz produces a conventional domain-ordering figure. DomainViz can be used to explore the conservation of domains within a single protein family, across multiple families, and across families from different species to support studies into protein function and evolution. The web-server is publicly available at: https://uhrigprotools.biology.ualberta.ca/domainviz.",2021-07-01 +33320930,CyanoPATH: a knowledgebase of genome-scale functional repertoire for toxic cyanobacterial blooms. ,"CyanoPATH is a database that curates and analyzes the common genomic functional repertoire for cyanobacteria harmful algal blooms (CyanoHABs) in eutrophic waters. Based on the literature of empirical studies and genome/protein databases, it summarizes four types of information: common biological functions (pathways) driving CyanoHABs, customized pathway maps, classification of blooming type based on databases and the genomes of cyanobacteria. A total of 19 pathways are reconstructed, which are involved in the utilization of macronutrients (e.g. carbon, nitrogen, phosphorus and sulfur), micronutrients (e.g. zinc, magnesium, iron, etc.) and other resources (e.g. light and vitamins) and in stress resistance (e.g. lead and copper). These pathways, comprised of both transport and biochemical reactions, are reconstructed with proteins from NCBI and reactions from KEGG and visualized with self-created transport/reaction maps. The pathways are hierarchical and consist of subpathways, protein/enzyme complexes and constituent proteins. New cyanobacterial genomes can be annotated and visualized for these pathways and compared with existing species. This set of genomic functional repertoire is useful in analyzing aquatic metagenomes and metatranscriptomes in CyanoHAB research. Most importantly, it establishes a link between genome and ecology. All these reference proteins, pathways and maps and genomes are free to download at http://www.csbg-jlu.info/CyanoPATH.",2021-07-01 +34422967,Nomograms for predicting cancer-specific survival in patients with primary central nervous system lymphoma: a population-based analysis.,"

Background

This study identified the risk factors for survival in patients with primary central nervous system lymphoma (PCNSL). Nomograms were developed and validated to predict individualized overall survival (OS) and cancer-specific survival (CSS) in this particular cohort.

Methods

Patients diagnosed with PCNSL between 1975 and 2016 were selected from the Surveillance, Epidemiology, and End Results (SEER) database for this study. The Cox regression model, the Fine and Grey's model, and the backward method were applied to determine the risk factors for OS and CSS. Nomograms were established accordingly. Internal and external validation was performed in an Asian population to examine the accuracy of the nomograms.

Results

A total of 5,900 patients with PCNSL were identified from the SEER database. A further 163 patients with PCNSL from the Beijing Tiantan Hospital between 2004 and 2018 were included. Age at diagnosis, tumor site, pathological subtype, surgery, chemotherapy, coexisting malignancies, and HIV infection were independent risk factors of CSS. In addition to the risk factors of CSS, gender, marital status, and radiation were also independent factors of OS. Nomograms were developed to estimate the 1-, 3-, and 5-year OS and CSS. The discrimination and calibration of the nomograms performed well. The C-indexes of the nomograms for OS and CSS prediction were 0.728 [95% confidence interval (CI): 0.703-0.753] and 0.726 (95% CI: 0.696-0.756), respectively. In addition, compared with previously published OS nomograms, the newly established nomograms displayed superior prediction for OS.

Conclusions

Nomograms predicting the 1-, 3- and 5-year OS and CSS of patients with PCNSL were established in this study. The validated nomograms showed relatively good performance and may be used clinically to evaluate patients' individualized risk and prognosis with PCNSL. Free software for individualized survival prediction is provided at http://www.pcnsl-survivalprediction.cn.",2021-07-01 +34422602,"Narrative review of intraductal papillary mucinous neoplasms: pathogenesis, diagnosis, and treatment of a true precancerous lesion.","

Objective

Although considerable progress has been made in our understanding of intraductal papillary mucinous neoplasm (IPMN) of the pancreas, there are still some problems to be solved.

Background

IPMN is one of the most important precancerous lesions of pancreatic cancer, but the relationship between IPMN and pancreatic cancer, and the specific mechanism of the development from IPMN to invasive carcinoma, remain to be explored in depth. With the development of imaging, the detection rate of IPMN has been greatly improved. However, the degree of malignancy of IPMN is difficult to assess, and its classification criteria and surgical treatment strategies are still controversial. Therefore, there is an urgent need for the best treatment plan for IPMN and research that can better predict IPMN recurrence and tumor malignancy.

Methods

From the online database Web of Science (https://webofknowledge.com/) and PubMed (https://pubmed.ncbi.nlm.nih.gov/), we use specific retrieval strategies to retrieve relevant articles based on the topics we discussed, and we review and discuss them.

Conclusions

This paper discusses the related research and progress of IPMN in recent years to improve the understanding of the incidence, diagnosis, treatment, and prognosis of this disease. The follow-up and monitoring of IPMN is particularly important, but the specific strategy also remains controversial.",2021-07-01 +33395472,"Understanding Pathways Between Agriculture, Food Systems, and Nutrition: An Evidence and Gap Map of Research Tools, Metrics, and Methods in the Last 10 Years.","New tools, metrics, and methods in agriculture, food systems, and nutrition (A&N) research proliferated in the decade following the 2007-2008 food price crisis. We map these developments across themes derived from conceptual A&N pathways and expert consultations. We created an interactive Evidence and Gap Map (EGM) from a systematic search of published and gray literature since 2008, following Campbell Collaboration guidelines. We retrieved over 30,000 reports from published literature databases, and individually searched 20 online repositories. We systematically screened 24,359 reports by title and/or abstract, 1577 by full report, and included 904 eligible reports. The EGM consists of rows of thematic domains and columns of types of tools, metrics, and methods, as well as extensive coding applied as filters. Each cell of the map represents research surrounding a type of tool, metric, or method within a given theme. Reports in each cell are grouped by stage of development, which expand to a corresponding bibliography. Users can filter EGM reports by various characteristics. The 4 most populated domains were: diets, nutrition, and health; primary food production; water, sanitation, and hygiene; and environment and sustainability. The 4 most common types of metrics, methods, and tools were: diet metrics; footprint analysis (especially water); technology applications; and network or Bayesian analysis. Gaps represent areas of few or no reports of innovation between 2008 and 2018. There were gaps in reports and innovations related to: power or conflicts of interest; food environments; markets; private sector engagement; food loss and waste; conflict; study design and system-level tools, metrics, and methods. The EGM is a comprehensive tool to navigate advances in measurement in A&N research: to highlight trends and gaps, conduct further synthesis and development, and prioritize the agenda for future work. This narrative synthesis accompanies the EGM, which can be found at https://www.anh-academy.org/evidence-and-gap-map.",2021-07-01 +31581093,DORMAN: Database of Reconstructed MetAbolic Networks.,"Genome-scale reconstructed metabolic networks have provided an organism specific understanding of cellular processes and their relations to phenotype. As they are deemed essential to study metabolism, the number of organisms with reconstructed metabolic networks continues to increase. This everlasting research interest lead to the development of online systems/repositories that store existing reconstructions and enable new model generation, integration, and constraint-based analyses. While features that support model reconstruction are widely available, current systems lack the means to help users who are interested in analyzing the topology of the reconstructed networks. Here, we present the Database of Reconstructed Metabolic Networks - DORMAN. DORMAN is a centralized online database that stores SBML-based reconstructed metabolic networks published in the literature, and provides web-based computational tools for visualizing and analyzing the model topology. Novel features of DORMAN are (i) interactive visualization interface that allows rendering of the complete network as well as editing and exporting the model, (ii) hierarchical navigation that provides efficient access to connected entities in the model, (iii) built-in query interface that allow posing topological queries, and finally, and (iv) model comparison tool that enables comparing models with different nomenclatures, using approximate string matching. DORMAN is online and freely accessible at http://ciceklab.cs.bilkent.edu.tr/dorman.",2021-07-01 +34422601,A narrative review of a type of pancreatitis worthy of attention: acute pancreatitis associated with pancreatic tumors-current problems and future thinking.,"

Objective

Our purpose is to explain the onset, diagnosis, and treatment of pancreatic tumor-associated pancreatitis (PTP), and inform clinicians about the management of PTP. It is hoped that clinicians can gain some experience and inspiration from this review, so that patients can obtain better treatment results.

Background

Acute pancreatitis (AP) is an inflammatory disease, and pancreatic tumors are one of the causes of pancreatitis. When pancreatic tumors and pancreatitis exist at the same time, and there is a ""connection"" between them, this type of pancreatitis is referred to as PTP. The manifestations of PTP can be as follows: (I) AP is the first symptom of pancreatic tumors; (II) pancreatitis is found in patients after pancreatic tumor diagnosis or during pancreatic tumor surgery. Because pancreatic tumors are not one of the most common causes of pancreatitis, PTP has not attracted the attention of researchers and clinicians, and there is no consistent and clear understanding of the diagnosis and treatment of PTP.

Methods

From the online database PubMed (https://pubmed.ncbi.nlm.nih.gov/) and Web of Science (https://webofknowledge.com/), we use specific retrieval strategies to retrieve relevant articles, and we review and discuss them.

Conclusions

What we need to realize is that PTP is different from ordinary AP. It has its own characteristics in terms of diagnosis and treatment, which requires the attention of clinicians. More importantly, future research should design the best diagnosis and treatment algorithms for PTP.",2021-07-01 +33381797,ChemGenerator: a web server for generating potential ligands for specific targets. ,"In drug discovery, one of the most important tasks is to find novel and biologically active molecules. Given that only a tip of iceberg of drugs was founded in nearly one-century's experimental exploration, it shows great significance to use in silico methods to expand chemical database and profile drug-target linkages. In this study, a web server named ChemGenerator was proposed to generate novel activates for specific targets based on users' input. The ChemGenerator relies on an autoencoder-based algorithm of Recurrent Neural Networks with Long Short-Term Memory by training of 7 million of molecular Simplified Molecular-Input Line-Entry System as the basic model, and further develops target guided generation by transfer learning. As results, ChemGenerator gains lower loss (<0.01) than existing reference model (0.2~0.4) and shows good performance in the case of Epidermal Growth Factor Receptor. Meanwhile, ChemGenerator is now freely accessible to the public by http://smiles.tcmobile.org. In proportion to endless molecular enumeration and time-consuming expensive experiments, this work demonstrates an efficient alternative way for the first virtual screening in drug discovery.",2021-07-01 +34019636,pLannotate: engineered plasmid annotation.,"Engineered plasmids are widely used in the biological sciences. Since many plasmids contain DNA sequences that have been reused and remixed by researchers for decades, annotation of their functional elements is often incomplete. Missing information about the presence, location, or precise identity of a plasmid feature can lead to unintended consequences or failed experiments. Many engineered plasmids contain sequences-such as recombinant DNA from all domains of life, wholly synthetic DNA sequences, and engineered gene expression elements-that are not predicted by microbial genome annotation pipelines. Existing plasmid annotation tools have limited feature libraries and do not detect incomplete fragments of features that are present in many plasmids for historical reasons and may impact their newly designed functions. We created the open source pLannotate web server so users can quickly and comprehensively annotate plasmid features. pLannotate is powered by large databases of genetic parts and proteins. It employs a filtering algorithm to display only the most relevant feature matches and also reports feature fragments. Finally, pLannotate displays a graphical map of the annotated plasmid, explains the provenance of each feature prediction, and allows results to be downloaded in a variety of formats. The webserver for pLannotate is accessible at: http://plannotate.barricklab.org/.",2021-07-01 +34550811,Finding the Middle Ground with the Clinical Laboratory's Role in SARS-CoV-2 Genomic Surveillance.,"Continued replacement of the dominant severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) lineages, and associated surges, highlights the importance of genomic surveillance to identify the next possible threats. Despite concerted efforts between clinical laboratories and public health to generate sequence data, the United States has lagged in percentage of SARS-CoV-2 cases sequenced. A more simple and cost-effective option is needed to allow front-line clinical laboratories to perform high-throughput surveillance and refer important samples for slow and expensive next-generation sequencing (NGS). In this issue of the Journal of Clinical Microbiology, A. Babiker, K. Immergluck, S. D. Stampfer, A. Rao, et al. (J Clin Microbiol 59:e01446-21, 2021, https://doi.org/10.1128/JCM.01446-21) describe a rapid and flexible multiplex single-nucleotide polymorphism (SNP) assay targeting mutations associated with Alpha, Beta/Gamma, and, added later, Delta variants. They show 100% accuracy in characterized variant pools and clinical samples confirmed by NGS. Such an approach could be a happy medium in the role of front-line laboratories to assist with critically needed high-throughput genomic surveillance.",2021-09-22 +31504765,GMrepo: a database of curated and consistently annotated human gut metagenomes.,"GMrepo (data repository for Gut Microbiota) is a database of curated and consistently annotated human gut metagenomes. Its main purpose is to facilitate the reusability and accessibility of the rapidly growing human metagenomic data. This is achieved by consistently annotating the microbial contents of collected samples using state-of-art toolsets and by manual curation of the meta-data of the corresponding human hosts. GMrepo organizes the collected samples according to their associated phenotypes and includes all possible related meta-data such as age, sex, country, body-mass-index (BMI) and recent antibiotics usage. To make relevant information easier to access, GMrepo is equipped with a graphical query builder, enabling users to make customized, complex and biologically relevant queries. For example, to find (1) samples from healthy individuals of 18 to 25 years old with BMIs between 18.5 and 24.9, or (2) projects that are related to colorectal neoplasms, with each containing >100 samples and both patients and healthy controls. Precomputed species/genus relative abundances, prevalence within and across phenotypes, and pairwise co-occurrence information are all available at the website and accessible through programmable interfaces. So far, GMrepo contains 58 903 human gut samples/runs (including 17 618 metagenomes and 41 285 amplicons) from 253 projects concerning 92 phenotypes. GMrepo is freely available at: https://gmrepo.humangut.info.",2020-01-01 +33538809,Narrative Scientific Data Visualization in an Immersive Environment. ,"Narrative visualization for scientific data explorations can help users better understand the domain knowledge, because narrative visualizations often present a sequence of facts and observations linked together by a unifying theme or argument. Narrative visualization in immersive environments can provide users with an intuitive experience to interactively explore the scientific data, because immersive environments provide a brand new strategy for interactive scientific data visualization and exploration. However, it is challenging to develop narrative scientific visualization in immersive environments. In this paper, we propose an immersive narrative visualization tool to create and customize scientific data explorations for ordinary users with little knowledge about programming on scientific visualization, They are allowed to define POIs (point of interests) conveniently by the handler of an immersive device. Automatic exploration animations with narrative annotations can be generated by the gradual transitions between consecutive POI pairs. Besides, interactive slicing can be also controlled by device handler. Evaluations including user study and case study are designed and conducted to show the usability and effectiveness of the proposed tool. Related information can be accessed at: https://dabigtou.github.io/richenliu/.",2021-02-04 +32735322,GeoMine: interactive pattern mining of protein-ligand interfaces in the Protein Data Bank.,"

Summary

The searching of user-defined 3D queries in molecular interfaces is a computationally challenging problem that is not satisfactorily solved so far. Most of the few existing tools focused on that purpose are desktop based and not openly available. Besides that, they show a lack of query versatility, search efficiency and user-friendliness. We address this issue with GeoMine, a publicly available web application that provides textual, numerical and geometrical search functionality for protein-ligand binding sites derived from structural data contained in the Protein Data Bank (PDB). The query generation is supported by a 3D representation of a start structure that provides interactively selectable elements like atoms, bonds and interactions. GeoMine gives full control over geometric variability in the query while performing a deterministic, precise search. Reasonably selective queries are processed on the entire set of protein-ligand complexes in the PDB within a few minutes. GeoMine offers an interactive and iterative search process of successive result analyses and query adaptations. From the numerous potential applications, we picked two from the field of side-effect analyze showcasing the usefulness of GeoMine.

Availability and implementation

GeoMine is part of the ProteinsPlus web application suite and freely available at https://proteins.plus.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +31647100,Bovine Genome Database: new annotation tools for a new reference genome.,"The Bovine Genome Database (BGD) (http://bovinegenome.org) has been the key community bovine genomics database for more than a decade. To accommodate the increasing amount and complexity of bovine genomics data, BGD continues to advance its practices in data acquisition, curation, integration and efficient data retrieval. BGD provides tools for genome browsing (JBrowse), genome annotation (Apollo), data mining (BovineMine) and sequence database searching (BLAST). To augment the BGD genome annotation capabilities, we have developed a new Apollo plug-in, called the Locus-Specific Alternate Assembly (LSAA) tool, which enables users to identify and report potential genome assembly errors and structural variants. BGD now hosts both the newest bovine reference genome assembly, ARS-UCD1.2, as well as the previous reference genome, UMD3.1.1, with cross-genome navigation and queries supported in JBrowse and BovineMine, respectively. Other notable enhancements to BovineMine include the incorporation of genomes and gene annotation datasets for non-bovine ruminant species (goat and sheep), support for multiple assemblies per organism in the Regions Search tool, integration of additional ontologies and development of many new template queries. To better serve the research community, we continue to focus on improving existing tools, developing new tools, adding new datasets and encouraging researchers to use these resources.",2020-01-01 +34252963,Statistical approaches for differential expression analysis in metatranscriptomics.,"

Motivation

Metatranscriptomics (MTX) has become an increasingly practical way to profile the functional activity of microbial communities in situ. However, MTX remains underutilized due to experimental and computational limitations. The latter are complicated by non-independent changes in both RNA transcript levels and their underlying genomic DNA copies (as microbes simultaneously change their overall abundance in the population and regulate individual transcripts), genetic plasticity (as whole loci are frequently gained and lost in microbial lineages) and measurement compositionality and zero-inflation. Here, we present a systematic evaluation of and recommendations for differential expression (DE) analysis in MTX.

Results

We designed and assessed six statistical models for DE discovery in MTX that incorporate different combinations of DNA and RNA normalization and assumptions about the underlying changes of gene copies or species abundance within communities. We evaluated these models on multiple simulated and real multi-omic datasets. Models adjusting transcripts relative to their encoding gene copies as a covariate were significantly more accurate in identifying DE from MTX in both simulated and real datasets. Moreover, we show that when paired DNA measurements (metagenomic data) are not available, models normalizing MTX measurements within-species while also adjusting for total-species RNA balance sensitivity, specificity and interpretability of DE detection, as does filtering likely technical zeros. The efficiency and accuracy of these models pave the way for more effective MTX-based DE discovery in microbial communities.

Availability and implementation

The analysis code and synthetic datasets used in this evaluation are available online at http://huttenhower.sph.harvard.edu/mtx2021.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +34935432,Maternal Phthalates Exposure and Blood Pressure during and after Pregnancy in the PROGRESS Study.,"

Background

Phthalate exposure is ubiquitous and may affect biological pathways related to regulators of blood pressure. Given the profound changes in vasculature during pregnancy, pregnant women may be particularly susceptible to the potential effects of phthalates on blood pressure.

Objectives

We examined associations of phthalate exposure during pregnancy with maternal blood pressure trajectories from mid-pregnancy through 72 months postpartum.

Methods

Women with singleton pregnancies delivering a live birth in Mexico City were enrolled during the second trimester (n=892). Spot urine samples from the second and third trimesters were analyzed for 15 phthalate metabolites. Blood pressure and covariate data were collected over nine visits through 72 months postpartum. We used linear, logistic, and linear mixed models; latent class growth models (LCGMs); and Bayesian kernel machine regression to estimate the relationship of urinary phthalate biomarkers with maternal blood pressure.

Results

As a joint mixture, phthalate biomarker concentrations during pregnancy were associated with higher blood pressure rise during mid-to-late gestation. With respect to individual biomarkers, second trimester concentrations of monobenzyl phthalate (MBzP) and di(2-ethylhexyl) phthalate biomarkers (ΣDEHP) were associated with higher third trimester blood pressure. Two trajectory classes were identified by LCGM, characterized by increasing blood pressure through 72 months postpartum (""increase-increase"") or decreased blood pressure through 18 months postpartum with a gradual increase thereafter (""decrease-increase""). Increasing exposure to phthalate mixtures during pregnancy was associated with higher odds of being in the increase-increase class. Similar associations were observed for mono-2-ethyl-5-carboxypentyl terephthalate (MECPTP) and dibutyl phthalate (ΣDBP) biomarkers. When specific time periods were examined, we observed specific temporal relationships were observed for ΣDEHP, MECPTP, MBzP, and ΣDBP.

Discussion

In our cohort of pregnant women from Mexico City, exposure to phthalates and phthalate biomarkers was associated with higher blood pressure during late pregnancy, as well as with long-term changes in blood pressure trajectories. https://doi.org/10.1289/EHP8562.",2021-12-22 +34604485,COVID-19 patients and Dementia: Frontal cortex transcriptomic data.,"Since the association of SARS-Cov-2 infection with Nervous System (NS) manifestations, we performed RNA-sequencing analysis in Frontal Cortex of COVID-19 positive or negative individuals and affected or not by Dementia individuals. We examined gene expression differences in individuals with COVID-19 and Dementia compared to Dementia only patients by collecting transcript counts in each sample and performing Differential Expression analysis. We found eleven genes satisfying our significance criteria, all of them being protein coding genes. These data are suitable for integration with supplemental samples and for analysis according to different individuals' classification. Also, differential expression evaluation may be implemented with other scientific purposes, such as research of unannotated genes, mRNA splicing and genes isoforms. The analysis of Differential Expressed genes in COVID-19 positive patients compared to non-COVID-19 patients is published in: S. Gagliardi, E.T. Poloni, C. Pandini, M. Garofalo, F. Dragoni, V. Medici, A. Davin, S.D. Visonà, M. Moretti, D. Sproviero, O. Pansarasa, A. Guaita, M. Ceroni, L. Tronconi, C. Cereda, Detection of SARS-CoV-2 genome and whole transcriptome sequencing in frontal cortex of COVID-19 patients., Brain. Behav. Immun. (2021). https://doi.org/10.1016/j.bbi.2021.05.012.",2021-09-29 +33537380,Circle drawing and tracing dataset for evaluation of fine motor control.,"We introduce a motion dataset from healthy human subjects (n = 125) performing two fine motor control tasks on a graphic tablet, namely circle drawing and circle tracing. The article reports the methods and materials used to capture the motion data. The method for data acquisition is the same as the one used to investigate some aspects of fine motor control in healthy subjects in the paper by Cohen et al. (2018) ""Precision in drawing and tracing tasks: Different measures for different aspects of fine motor control"" (https://doi.org/10.1016/j.humov.2018.08.004) [1]. The dataset shared here contains new raw files of the two-dimensional motion data, as well information on the participants (gender, age, laterality index). These data could be instrumental for assessing other aspects of fine motor control, such as speed-accuracy tradeoff, speed-curvature power law, etc., and/or test machine learning algorithms for e.g., task classification.",2021-01-16 +35734106,"The super-food Manuka honey, a comprehensive review of its analysis and authenticity approaches.","Manuka honey (MH) stands out from other honey types as a unique super-food with clinically proven antimicrobial and wound healing activities. Its unique traits and the broad range of applications (i.e. food, cosmetics, nutraceuticals /natural health products) have marked up its price 6 to 25 times than other honey types. Concurrent to the increased market demand, more fraudulence of MH emerged. This urged for the employment of analytical tools for the authenticity and quality assessment of MH and has been the focus of many researchers during the last decades. Our main focus was to review the literature dealing with MH authenticity during the period from 2010 to mid-2021 comprehensively via the Scifinder (https://sinfinder.cas.org) and Web of Science (https://webofknowledge.com) research engines. We used ""manuka honey analysis"", ""manuka honey quality control"", and ""manuka honey authenticity"" as a search terms, applied Boolean operators 'AND/OR' combination, performing in Jan 2017 from the following electronic databases. The state-of-the-art analytical approaches and respective chemical markers of MH are highlighted. The present study capitalizes on the most updated methodologies employed for the quality control and analysis of MH to ensure its authenticity and adulteration detection. The unique constituents of MH allowed for its successful discrimination through various analytical platforms, including mass spectrometry coupled to suitable chromatographic separation (i.e. GC-MS and LC-MS), nuclear magnetic resonance (NMR), and fluorescence analysis. Moreover, chemometric tools present potential for MH discrimination and has yet to be capitalized more upon for MH quality control analysis.",2021-06-30 +31642496,Gene4Denovo: an integrated database and analytic platform for de novo mutations in humans.,"De novo mutations (DNMs) significantly contribute to sporadic diseases, particularly in neuropsychiatric disorders. Whole-exome sequencing (WES) and whole-genome sequencing (WGS) provide effective methods for detecting DNMs and prioritizing candidate genes. However, it remains a challenge for scientists, clinicians, and biologists to conveniently access and analyse data regarding DNMs and candidate genes from scattered publications. To fill the unmet need, we integrated 580 799 DNMs, including 30 060 coding DNMs detected by WES/WGS from 23 951 individuals across 24 phenotypes and prioritized a list of candidate genes with different degrees of statistical evidence, including 346 genes with false discovery rates <0.05. We then developed a database called Gene4Denovo (http://www.genemed.tech/gene4denovo/), which allowed these genetic data to be conveniently catalogued, searched, browsed, and analysed. In addition, Gene4Denovo integrated data from >60 genomic sources to provide comprehensive variant-level and gene-level annotation and information regarding the DNMs and candidate genes. Furthermore, Gene4Denovo provides end-users with limited bioinformatics skills to analyse their own genetic data, perform comprehensive annotation, and prioritize candidate genes using custom parameters. In conclusion, Gene4Denovo conveniently allows for the accelerated interpretation of DNM pathogenicity and the clinical implication of DNMs in humans.",2020-01-01 +32248222,GPS-Palm: a deep learning-based graphic presentation system for the prediction of S-palmitoylation sites in proteins.,"As an important reversible lipid modification, S-palmitoylation mainly occurs at specific cysteine residues in proteins, participates in regulating various biological processes and is associated with human diseases. Besides experimental assays, computational prediction of S-palmitoylation sites can efficiently generate helpful candidates for further experimental consideration. Here, we reviewed the current progress in the development of S-palmitoylation site predictors, as well as training data sets, informative features and algorithms used in these tools. Then, we compiled a benchmark data set containing 3098 known S-palmitoylation sites identified from small- or large-scale experiments, and developed a new method named data quality discrimination (DQD) to distinguish data quality weights (DQWs) between the two types of the sites. Besides DQD and our previous methods, we encoded sequence similarity values into images, constructed a deep learning framework of convolutional neural networks (CNNs) and developed a novel algorithm of graphic presentation system (GPS) 6.0. We further integrated nine additional types of sequence-based and structural features, implemented parallel CNNs (pCNNs) and designed a new predictor called GPS-Palm. Compared with other existing tools, GPS-Palm showed a >31.3% improvement of the area under the curve (AUC) value (0.855 versus 0.651) for general prediction of S-palmitoylation sites. We also produced two species-specific predictors, with corresponding AUC values of 0.900 and 0.897 for predicting human- and mouse-specific sites, respectively. GPS-Palm is free for academic research at http://gpspalm.biocuckoo.cn/.",2021-03-01 +,Harmonizing Landsat 8 and Sentinel-2: A time-series-based reflectance adjustment approach,"We developed a Time-series-based Reflectance Adjustment (TRA) approach for reducing the reflectance differences between Landsat 8 and Sentinel-2 observations. This TRA approach used the time series of matched Landsat 8 and Sentinel-2 observations to build linear regression models to adjust reflectance differences between the two sensors for each individual pixel and each spectral band. We evaluated this approach for the NASA harmonized Landsat and Sentinel-2 (HLS) surface reflectance product (V1.4; https://hls.gsfc.nasa.gov/data/v1.4/) and top-of-atmosphere (TOA) reflectance with approximately 4 years of temporal coverage at five Military Grid Reference System (MGRS) tiles. Using this approach, the surface reflectance difference between Landsat 8 and Sentinel-2 in the HLS product reduced 45% for the blue band, 42% for the green band, 38% for the red band, 30% for the Near Infrared (NIR) band, 37% for the Shortwave Infrared (SWIR) 1 band, and 32% for the SWIR2 band. The TRA approach also reduced TOA reflectance difference between Landsat 8 and Sentinel-2 substantially, in which the blue band reduced 46%, the green and NIR bands reduced 42%, the red band reduced 48%, and the SWIR1 and SWIR2 bands reduced 44%. If the high aerosol observations were screened, the reflectance differences in the HLS product could be further reduced by 2–4% and the TOA reflectance differences could be further reduced by 3–6% for the six spectral bands. The TRA approach has also shown good results in reserving the spatial patterns and the heterogeneity of land surface. The transformation parameters estimated from the TRA approach can be directly used for future Landsat 8 and Sentinel-2 reflectance adjustment, with slightly lower (5%) reduction of reflectance difference.",2019-12-01 +31867418,Daily dataset of oil prices and stock prices for the top oil exporting and importing countries from the region of Asia.,"This data presented in this article is specifically employed from the Asian region based on the top position in the list of oil exporting and oil-importing countries around the world. Asia as the biggest continent on the earth had high consumption of energy [1]. Here we employed the daily prices of crude oil and seven oil trading countries, out of which three are oil exporting (Saudi Arabia, United Arab Emirates, Iraq) and four are oil-importing countries (China, Japan, South Korea, India), from the time period of 1-09-2009 to 31-08-2018. The data is collected from an authentic database Bloomberg. This data is related to the research paper ""Volatility spillover impact of world oil prices on leading Asian energy exporting and importing economies' stock returns. Energy, 188 (2019), 116002, https://doi.org/10.1016/j.energy.2019.116002 [2]"". This data is useful to compare the oil prices impact on the leading oil trading countries and also compare a set of countries affected most by oil prices' fluctuations, oil-exporting countries or oil-importing countries. Since this data covers the period of latest oil-crisis, so the impact of oil-crisis could also be analysed.",2019-11-23 +31728519,SyntDB: defining orthologues of human long noncoding RNAs across primates.,"SyntDB (http://syntdb.amu.edu.pl/) is a collection of data on long noncoding RNAs (lncRNAs) and their evolutionary relationships in twelve primate species, including humans. This is the first database dedicated to primate lncRNAs, thousands of which are uniquely stored in SyntDB. The lncRNAs were predicted with our computational pipeline using publicly available RNA-Seq data spanning diverse tissues and organs. Most of the species included in SyntDB still lack lncRNA annotations in public resources. In addition to providing users with unique sets of lncRNAs and their characteristics, SyntDB provides data on orthology relationships between the lncRNAs of humans and other primates, which are not available on this scale elsewhere. Keeping in mind that only a small fraction of currently known human lncRNAs have been functionally characterized and that lncRNA conservation is frequently used to identify the most relevant lncRNAs for functional studies, we believe that SyntDB will contribute to ongoing research aimed at deciphering the biological roles of lncRNAs.",2020-01-01 +31670377,NPInter v4.0: an integrated database of ncRNA interactions.,"Noncoding RNAs (ncRNAs) play crucial regulatory roles in a variety of biological circuits. To document regulatory interactions between ncRNAs and biomolecules, we previously created the NPInter database (http://bigdata.ibp.ac.cn/npinter). Since the last version of NPInter was issued, a rapidly growing number of studies have reported novel interactions and accumulated numerous high-throughput interactome data. We have therefore updated NPInter to its fourth edition in which are integrated 600 000 new experimentally identified ncRNA interactions. ncRNA-DNA interactions derived from ChIRP-seq data and circular RNA interactions have been included in the database. Additionally, disease associations were annotated to the interacting molecules. The database website has also been redesigned with a more user-friendly interface and several additional functional modules. Overall, NPInter v4.0 now provides more comprehensive data and services for researchers working on ncRNAs and their interactions with other biomolecules.",2020-01-01 +30204114,[HLA ALLELES IN KAZAKHSTAN AND IN THE GLOBAL GENOFUND].,"The HLA alleles in the Kazakhstan genofund and in the global genofund were compared. The study was carried out on 2283 Kazakhstan donors, 4 566 antigens by five locuses A, B, C, DRB1, DQB1. Comparison with the global HLA alleles was carried out using http://www.allelefrequencies.net open internet resource created by the Royal Liverpool University Hospital. The database included information on 8 locuses (A, B, C, DRB1, DPA1, DPB1, DQA1, DQB1), collected in different programs for HLA genofund studies (anthropological research, blood and bone marrow donorship, HLA-associated diseases, etc.). The results demonstrated the need in further development of the hemopoietic stem cell (HSC) donor register in the Republic for more effective search for compatible donors.",2018-07-01 +33612881,The Impact of COVID-19 on Weather Forecasts: A Balanced View.,"Aircraft reports are an important source of information for numerical weather prediction (NWP). From March 2020, the COVID-19 pandemic resulted in a large loss of aircraft data but despite this it is difficult to see any evidence of significant degradation in the forecast skill of global NWP systems. This apparent discrepancy is partly because forecast skill is very variable, showing both day-to-day noise and lower frequency dependence on the mean state of the atmosphere. The definitive way to cleanly assess aircraft impact is using a data denial experiment, which shows that the largest impact is in the upper troposphere. The method used by Chen (2020, https://doi.org/10.1029/2020gl088613) to estimate the impact of COVID-19 is oversimplistic. Chen understates the huge importance of satellite data for modern weather forecasts and raises more alarm than necessary about a drop in forecast accuracy.",2021-02-12 +34187366,Physcraper: a Python package for continually updated phylogenetic trees using the Open Tree of Life.,"

Background

Phylogenies are a key part of research in many areas of biology. Tools that automate some parts of the process of phylogenetic reconstruction, mainly molecular character matrix assembly, have been developed for the advantage of both specialists in the field of phylogenetics and non-specialists. However, interpretation of results, comparison with previously available phylogenetic hypotheses, and selection of one phylogeny for downstream analyses and discussion still impose difficulties to one that is not a specialist either on phylogenetic methods or on a particular group of study.

Results

Physcraper is a command-line Python program that automates the update of published phylogenies by adding public DNA sequences to underlying alignments of previously published phylogenies. It also provides a framework for straightforward comparison of published phylogenies with their updated versions, by leveraging upon tools from the Open Tree of Life project to link taxonomic information across databases. The program can be used by the nonspecialist, as a tool to generate phylogenetic hypotheses based on publicly available expert phylogenetic knowledge. Phylogeneticists and taxonomic group specialists will find it useful as a tool to facilitate molecular dataset gathering and comparison of alternative phylogenetic hypotheses (topologies).

Conclusion

The Physcraper workflow showcases the benefits of doing open science for phylogenetics, encouraging researchers to strive for better scientific sharing practices. Physcraper can be used with any OS and is released under an open-source license. Detailed instructions for installation and usage are available at https://physcraper.readthedocs.io.",2021-06-29 +34185575,"Voice Rehabilitation by Voice Prostheses After Total Laryngectomy: A Systematic Review and Network Meta-Analysis for 11,918 Patients.","Purpose Our aim was to assess the different voice prostheses (VPs) to identify the most efficient, safest, patient-tailored, longest lifetime, and inexpensive VPs and assess the different factors affecting their quality. Method In September 2017, 15 databases were searched to include all randomized controlled trials. A new search was done in May 2019 to include all other study design articles, which include all the new-era VPs subtypes. Network meta-analysis (NMA) was applied to all 27 outcomes, besides NMA overall and partial order setting was done by using Hasse scatter plots. p values were used in NMA, where the best VPs are approaching one and the least approaches zero. Meta-analysis was done for the rest of the outcomes. Results Two hundred one articles were eligible for inclusion in our study (N = 11,918). Provox-2 was significantly the most efficient and safest device concerning the most patient preference (odds ratio [OR] = 33.88 [0.65, 1762.24]; p = .92), the least dislodgement (risk ratio [RR] = 0.27 [0.13, 0.57]; p = .79), the least airflow resistance (RR = 0.42 [0.08, 2.11]; p = .84), the least granulation formation (RR = 0.73 [0.02, 26.32]; p = .60), and the least VPs' inaccurate size (RR = 0.77 (0.23, 2.61); p = .66). Heat and moisture exchanger addition showed a significant increase in maximum phonation time and breathing experience, with p values (1 and .59), respectively. While heat and moisture exchanger addition showed a significant decline in stoma cleaning frequency, coughing frequency, forced expectoration, sputum production, sleeping problems, and loosening of adhesive, with p values (.99, .72, .69, .96, 1, and 0.96), respectively, Groningen low resistance and Nijdam were considered the worst devices with both overall mean p value of .44. Conclusions Provox-2 is considered the best choice as being the most preferable for patients, with the least airflow resistance, dislodgment, granulation formation, and prosthesis inaccurate size. Groningen low resistance and Nijdam were considered the worst devices according to our analysis. Supplemental Material https://doi.org/10.23641/asha.14802903.",2021-06-29 +34188160,NOD: a web server to predict New use of Old Drugs to facilitate drug repurposing.,"Computational methods accelerate the drug repurposing pipelines that are a quicker and cost-effective alternative to discovering new molecules. However, there is a paucity of web servers to conduct fast, focussed, and customized investigations for identifying new uses of old drugs. We present the NOD web server, which has the mentioned characteristics. NOD uses a sensitive sequence-guided approach to identify close and distant homologs of a protein of interest. NOD then exploits this evolutionary information to suggest potential compounds from the DrugBank database that can be repurposed against the input protein. NOD also allows expansion of the chemical space of the potential candidates through similarity searches. We have validated the performance of NOD against available experimental and/or clinical reports. In 65.6% of the investigated cases in a control study, NOD is able to identify drugs more effectively than the searches made in DrugBank. NOD is freely-available at http://pauling.mbu.iisc.ac.in/NOD/NOD/ .",2021-06-29 +30365026,Victors: a web-based knowledge base of virulence factors in human and animal pathogens.,"Virulence factors (VFs) are molecules that allow microbial pathogens to overcome host defense mechanisms and cause disease in a host. It is critical to study VFs for better understanding microbial pathogenesis and host defense mechanisms. Victors (http://www.phidias.us/victors) is a novel, manually curated, web-based integrative knowledge base and analysis resource for VFs of pathogens that cause infectious diseases in human and animals. Currently, Victors contains 5296 VFs obtained via manual annotation from peer-reviewed publications, with 4648, 179, 105 and 364 VFs originating from 51 bacterial, 54 viral, 13 parasitic and 8 fungal species, respectively. Our data analysis identified many VF-specific patterns. Within the global VF pool, cytoplasmic proteins were more common, while adhesins were less common compared to findings on protective vaccine antigens. Many VFs showed homology with host proteins and the human proteins interacting with VFs represented the hubs of human-pathogen interactions. All Victors data are queriable with a user-friendly web interface. The VFs can also be searched by a customized BLAST sequence similarity searching program. These VFs and their interactions with the host are represented in a machine-readable Ontology of Host-Pathogen Interactions. Victors supports the 'One Health' research as a vital source of VFs in human and animal pathogens.",2019-01-01 +34859731,Creatine supplementation and VO2max: a systematic review and meta-analysis.,"Although creatine supplementation is well-known to increase exercise performance in acute high-intensity exercises, its role in aerobic performance based on VO2max is more controversial. Thus, we performed a systematic review and meta-analysis on the effects of creatine supplementation on VO2max. PubMed, Cochrane, Embase, and ScienceDirect were searched for randomized controlled trials (RCTs) reporting VO2max in creatine supplementation and placebo groups before and after supplementation. We computed a random-effects meta-analysis on VO2max at baseline, within groups following supplementation, on changes on VO2max between groups, and after supplementation between groups. Sensitivity analyses and meta-regression were conducted. We included 19 RCTs for a total of 424 individuals (mean age 30 years old, 82% men). VO2max did not differ at baseline between groups (creatine and placebo). Participants in both groups were engaged in exercise interventions in most studies (80%). Using changes in VO2max, VO2max increased in both groups but increased less after creatine supplementation than placebo (effect size [ES] = -0.32, 95%CI = -0.51 to -0.12, p = 0.002). Comparisons after creatine supplementation confirmed a lower VO2max in the creatine group compared to the placebo group (ES= -0.20, 95%CI = -0.39 to -0.001, p = 0.049). Meta-analysis after exclusion from meta-funnel resulted in similar outcomes in a subgroup of young and healthy participants. Meta-regressions on characteristics of supplementation, physical training, or sociodemographic were not statistically significant. Creatine supplementation has a negative effect on VO2max, regardless of the characteristics of training, supplementation, or population characteristics.Supplemental data for this article is available online at https://doi.org/10.1080/10408398.2021.2008864 .",2021-12-03 +31711193,CR10-A PUBLIC DATABASE OF COSMIC RADIATION MEASUREMENTS AT AVIATION ALTITUDES OF ABOUT 10 KM.,"Long-term measurements using silicon radiation spectrometer Liulin on board commercial aircraft have been performed since 2001; results were put into a new database, which covers more than 4500 flights with more than 130 000 measurements. Methodology and tools were developed to normalize the data with respect to latitude and altitude and thus enable comparison with other radiation detectors and with model calculations. This capability is demonstrated using data from the neutron monitor at Lomnický štít. Instead of providing data files for individual measurement period, two software solutions are delivered. First is a web-based user interface for visualizing and downloading arbitrary time window of interest from the database hosted at http://cr10.odz.ujf.cas.cz. The second is a set of interactive Python notebooks available at GitHub. Those implement the calibration, normalization and visualization methods-so the outputs can be tailored to user needs. The software and data are provided under GNU/CC license.",2019-12-01 +32776266,The International NERSH Data Pool of Health Professionals' Attitudes Toward Religiosity and Spirituality in 12 Countries.,"The amount of research concerned with the values of health professionals (HPs) is steadily growing. Around the world HPs face similar challenges when patients express their existential and spiritual views. How HPs engage these views, and the degree of embedment into consultations, differ across cultures. Today, more than ever before, researchers in this field need to share experiences and build new knowledge upon local findings. To meet this demand, we founded the international collaboration ""Network for Research on Spirituality and Health"" ( https://NERSH.org ). One of the central projects of our network has been to build a large international data pool of health professionals' attitudes toward religiosity and spirituality. Today the data pool hosts answers from more than 6,000 health professionals from 17 separate surveys derived from 12 countries. Data were gathered by either the questionnaire ""Religion and Spirituality in Medicine, Perspectives of Physicians"" (RSMPP) or its successor 'NERSH Questionnaire'. In this article we describe the methodology behind the construction of the data pool. We also present an overview of five available scales related to HP religiosity and spirituality, including a description of scale reliability and dimensionality.",2021-02-01 +34423492,KEGG mapping tools for uncovering hidden features in biological data.,"In contrast to artificial intelligence and machine learning approaches, KEGG (https://www.kegg.jp) has relied on human intelligence to develop ""models"" of biological systems, especially in the form of KEGG pathway maps that are manually created by capturing knowledge from published literature. The KEGG models can then be used in biological big data analysis, for example, for uncovering systemic functions of an organism hidden in its genome sequence through the simple procedure of KEGG mapping. Here we present an updated version of KEGG Mapper, a suite of KEGG mapping tools reported previously (Kanehisa and Sato, Protein Sci 2020; 29:28-35), together with the new versions of the KEGG pathway map viewer and the BRITE hierarchy viewer. Significant enhancements have been made for BRITE mapping, where the mapping result can be examined by manipulation of hierarchical trees, such as pruning and zooming. The tree manipulation feature has also been implemented in the taxonomy mapping tool for linking KO (KEGG Orthology) groups and modules to phenotypes.",2021-08-26 +34125674,Subjective and Objective Quality Assessment of 2D and 3D Foveated Video Compression in Virtual Reality.,"In Virtual Reality (VR), the requirements of much higher resolution and smooth viewing experiences under rapid and often real-time changes in viewing direction, leads to significant challenges in compression and communication. To reduce the stresses of very high bandwidth consumption, the concept of foveated video compression is being accorded renewed interest. By exploiting the space-variant property of retinal visual acuity, foveation has the potential to substantially reduce video resolution in the visual periphery, with hardly noticeable perceptual quality degradations. Accordingly, foveated image / video quality predictors are also becoming increasingly important, as a practical way to monitor and control future foveated compression algorithms. Towards advancing the development of foveated image / video quality assessment (FIQA / FVQA) algorithms, we have constructed 2D and (stereoscopic) 3D VR databases of foveated / compressed videos, and conducted a human study of perceptual quality on each database. Each database includes 10 reference videos and 180 foveated videos, which were processed by 3 levels of foveation on the reference videos. Foveation was applied by increasing compression with increased eccentricity. In the 2D study, each video was of resolution 7680×3840 and was viewed and quality-rated by 36 subjects, while in the 3D study, each video was of resolution 5376×5376 and rated by 34 subjects. Both studies were conducted on top of a foveated video player having low motion-to-photon latency (~50ms). We evaluated different objective image and video quality assessment algorithms, including both FIQA / FVQA algorithms and non-foveated algorithms, on our so called LIVE-Facebook Technologies Foveation-Compressed Virtual Reality (LIVE-FBT-FCVR) databases. We also present a statistical evaluation of the relative performances of these algorithms. The LIVE-FBT-FCVR databases have been made publicly available and can be accessed at https://live.ece.utexas.edu/research/LIVEFBTFCVR/index.html.",2021-06-28 +30395277,PlantPAN3.0: a new and updated resource for reconstructing transcriptional regulatory networks from ChIP-seq experiments in plants.,"The Plant Promoter Analysis Navigator (PlantPAN; http://PlantPAN.itps.ncku.edu.tw/) is an effective resource for predicting regulatory elements and reconstructing transcriptional regulatory networks for plant genes. In this release (PlantPAN 3.0), 17 230 TFs were collected from 78 plant species. To explore regulatory landscapes, genomic locations of TFBSs have been captured from 662 public ChIP-seq samples using standard data processing. A total of 1 233 999 regulatory linkages were identified from 99 regulatory factors (TFs, histones and other DNA-binding proteins) and their target genes across seven species. Additionally, this new version added 2449 matrices extracted from ChIP-seq peaks for cis-regulatory element prediction. In addition to integrated ChIP-seq data, four major improvements were provided for more comprehensive information of TF binding events, including (i) 1107 experimentally verified TF matrices from the literature, (ii) gene regulation network comparison between two species, (iii) 3D structures of TFs and TF-DNA complexes and (iv) condition-specific co-expression networks of TFs and their target genes extended to four species. The PlantPAN 3.0 can not only be efficiently used to investigate critical cis- and trans-regulatory elements in plant promoters, but also to reconstruct high-confidence relationships among TF-targets under specific conditions.",2019-01-01 +30371878,COSMIC: the Catalogue Of Somatic Mutations In Cancer.,"COSMIC, the Catalogue Of Somatic Mutations In Cancer (https://cancer.sanger.ac.uk) is the most detailed and comprehensive resource for exploring the effect of somatic mutations in human cancer. The latest release, COSMIC v86 (August 2018), includes almost 6 million coding mutations across 1.4 million tumour samples, curated from over 26 000 publications. In addition to coding mutations, COSMIC covers all the genetic mechanisms by which somatic mutations promote cancer, including non-coding mutations, gene fusions, copy-number variants and drug-resistance mutations. COSMIC is primarily hand-curated, ensuring quality, accuracy and descriptive data capture. Building on our manual curation processes, we are introducing new initiatives that allow us to prioritize key genes and diseases, and to react more quickly and comprehensively to new findings in the literature. Alongside improvements to the public website and data-download systems, new functionality in COSMIC-3D allows exploration of mutations within three-dimensional protein structures, their protein structural and functional impacts, and implications for druggability. In parallel with COSMIC's deep and broad variant coverage, the Cancer Gene Census (CGC) describes a curated catalogue of genes driving every form of human cancer. Currently describing 719 genes, the CGC has recently introduced functional descriptions of how each gene drives disease, summarized into the 10 cancer Hallmarks.",2019-01-01 +34381873,An automated approach to identify scientific publications reporting pharmacokinetic parameters.,"Pharmacokinetic (PK) predictions of new chemical entities are aided by prior knowledge from other compounds. The development of robust algorithms that improve preclinical and clinical phases of drug development remains constrained by the need to search, curate and standardise PK information across the constantly-growing scientific literature. The lack of centralised, up-to-date and comprehensive repositories of PK data represents a significant limitation in the drug development pipeline.In this work, we propose a machine learning approach to automatically identify and characterise scientific publications reporting PK parameters from in vivo data, providing a centralised repository of PK literature. A dataset of 4,792 PubMed publications was labelled by field experts depending on whether in vivo PK parameters were estimated in the study. Different classification pipelines were compared using a bootstrap approach and the best-performing architecture was used to develop a comprehensive and automatically-updated repository of PK publications. The best-performing architecture encoded documents using unigram features and mean pooling of BioBERT embeddings obtaining an F1 score of 83.8% on the test set. The pipeline retrieved over 121K PubMed publications in which in vivo PK parameters were estimated and it was scheduled to perform weekly updates on newly published articles. All the relevant documents were released through a publicly available web interface (https://app.pkpdai.com) and characterised by the drugs, species and conditions mentioned in the abstract, to facilitate the subsequent search of relevant PK data. This automated, open-access repository can be used to accelerate the search and comparison of PK results, curate ADME datasets, and facilitate subsequent text mining tasks in the PK domain.",2021-04-21 +,First Report of Rust Caused by Aecial Stage of Puccinia klugkistiana on Ligustrum japonicum in Korea,"Ligustrum japonicum Thunb. (Oleraceae), called Japanese privet or waxleaf privet, is native to Japan and southern Korea. Owing to its attractive evergreen foliage, it is widely cultivated in other regions with warm temperate climates. This shrub is also naturalized in California and in the southeastern United States from Texas to Virginia (https://www.invasiveplantatlas.org/). In June 2016, hundreds of L. japonicum shrubs were found affected by a rust at a disease incidence of 50% in a public garden in Daegu (35°53′25″N; 128°35′01″E), Korea. Similar symptoms with typical rust pustules on this plant species were also found in Jinju (35°10′46″N; 128°05′48″E), Korea. Examination of an affected plant revealed that orange-yellow rust pustules were formed on the lower leaf surface with corresponding yellowish to chlorotic lesions on the upper surface. Four representative voucher specimens have been deposited in the Korea University herbarium (KUS-F29287, F29290, F29781, and F29797), Seoul, Korea. Morphological characteristics of the aecial stage were as follows. Spermogonia were epiphyllous, aggregated, initially yellow and becoming brown, subepidermal, and flask shaped. Aecia were hypophyllous, densely clustered, on leaves and occasionally also on branches, causing distortion and hypertrophy of the host, cupulate, deeply immersed in host mesophyll, 120 to 200 µm diameter, and yellowish. Peridia were prominent, easily splitting vertically, peridial cells oblong to polygonal, 22 to 36 × 16 to 22 µm, inner wall 4 to 6 µm thick, verrucose, outer wall 6 to 10 µm thick, smooth to finely verrucose. Aeciospores were angularly globose to ellipsoid, densely and minutely verrucose, subhyaline to pale yellowish, 16 to 28 × 16 to 22 µm, walls 1 to 2 µm thick. Morphological characteristics of the collections were in good agreement with the aecial stage of Puccinia klugkistiana (Dietel) J.X. Ji & Kakish. (Hiratsuka et al. 1992; Ji et al. 2017). To confirm the identification, genomic DNA was extracted from aeciospores of two dried herbarium specimens. The internal transcribed spacer (ITS) and large subunit (LSU) regions of rRNA gene were polymerase chain reaction (PCR) amplified using specific primer pairs ITS5u/ITS4rust and universal primers LR0R/LR6 as described by Beenken et al. (2017). PCR products were sequenced and deposited in GenBank (MH644179 and MH644180 for ITS; MH644181 and MH644182 for LSU). No ITS sequence of P. klugkistiana is available in the NCBI database, because this this the first sequence data for ITS of this rust species. LSU sequences from two representative isolates had 100% homology with that of Aecidium klugkistianum (HQ699078) from China. The aecial stage of P. klugkistiana has been reported on eight species/varieties of Ligustrum from East Asia including China, Japan, and Korea (Farr and Rossman 2018; Hiratsuka et al. 1992). In Korea, L. ibota and L. obtusifolium have been recorded as host plants of the aecial stage of P. klugkistiana (Hiratsuka 1942). To our knowledge, this is the first report of the aecial stage of P. klugkistiana on L. japonicum in Korea. Rust infection caused necrotic lesions, greatly detracting from the beauty of the glossy leaves.",2019-01-01 +22715306,Database for vegetable phytochemicals and their mechanism of action.,"

Unlabelled

In an endeavor to screen bioactive compounds present in vegetables with effective mechanism using in silico method lead us to develop a vegetable phytochemicals and their target database (VPTD). The VPTD is a unique bioinformatics resource that compiles information about phytochemicals from vegetables and their mechanism. VPTD contains 2496 phytochemicals from 27 vegetables, their 3D images and their 1337 possible biological mechanism. Each phytochemical contain records of seven data fields providing detailed information on name, source, amount present, structure and mechanistic information. This information has been manually extracted and manually verified from numerous sources, including other electronic databases, textbooks and scientific journals. VPTD is fully searchable and supports extensive text search. The main focus of the VPTD is on providing possible mechanism of phytochemicals, which will help in discovery of potential drugs from one of the common bioresource-vegetable. VPTD is freely available.

Availability

The database is available for free at http://www/vptd.in.",2012-05-31 +30723964,Shared polygenic risk and causal inferences in amyotrophic lateral sclerosis.,"

Objective

To identify shared polygenic risk and causal associations in amyotrophic lateral sclerosis (ALS).

Methods

Linkage disequilibrium score regression and Mendelian randomization were applied in a large-scale, data-driven manner to explore genetic correlations and causal relationships between >700 phenotypic traits and ALS. Exposures consisted of publicly available genome-wide association studies (GWASes) summary statistics from MR Base and LD-hub. The outcome data came from the recently published ALS GWAS involving 20,806 cases and 59,804 controls. Multivariate analyses, genetic risk profiling, and Bayesian colocalization analyses were also performed.

Results

We have shown, by linkage disequilibrium score regression, that ALS shares polygenic risk genetic factors with a number of traits and conditions, including positive correlations with smoking status and moderate levels of physical activity, and negative correlations with higher cognitive performance, higher educational attainment, and light levels of physical activity. Using Mendelian randomization, we found evidence that hyperlipidemia is a causal risk factor for ALS and localized putative functional signals within loci of interest.

Interpretation

Here, we have developed a public resource (https://lng-nia.shinyapps.io/mrshiny) which we hope will become a valuable tool for the ALS community, and that will be expanded and updated as new data become available. Shared polygenic risk exists between ALS and educational attainment, physical activity, smoking, and tenseness/restlessness. We also found evidence that elevated low-desnity lipoprotein cholesterol is a causal risk factor for ALS. Future randomized controlled trials should be considered as a proof of causality. Ann Neurol 2019;85:470-481.",2019-03-13 +33338655,Road to effective data curation for translational research.,"Translational research today is data-intensive and requires multi-stakeholder collaborations to generate and pool data together for integrated analysis. This leads to the challenge of harmonization of data from different sources with different formats and standards, which is often overlooked during project planning and thus becomes a bottleneck of the research progress. We report on our experience and lessons learnt about data curation for translational research garnered over the course of the European Translational Research Infrastructure & Knowledge management Services (eTRIKS) program (https://www.etriks.org), a unique, 5-year, cross-organizational, cross-cultural collaboration project funded by the Innovative Medicines Initiative of the EU. Here, we discuss the obstacles and suggest what steps are needed for effective data curation in translational research, especially for projects involving multiple organizations from academia and industry.",2020-12-15 +33914722,"Health Status and Health Care Use Among Adolescents Identified With and Without Autism in Early Childhood - Four U.S. Sites, 2018-2020.","Persons identified in early childhood as having autism spectrum disorder (autism) often have co-occurring health problems that extend into adolescence (1-3). Although only limited data exist on their health and use of health care services as they transition to adolescence, emerging data suggest that a minority of these persons receive recommended guidance* from their primary care providers (PCPs) starting at age 12 years to ensure a planned transition from pediatric to adult health care (4,5). To address this gap in data, researchers analyzed preliminary data from a follow-up survey of parents and guardians of adolescents aged 12-16 years who previously participated in the Study to Explore Early Development (https://www.cdc.gov/ncbddd/autism/seed.html). The adolescents were originally studied at ages 2-5 years and identified at that age as having autism (autism group) or as general population controls (control group). Adjusted prevalence ratios (aPRs) that accounted for differences in demographic characteristics were used to compare outcomes between groups. Adolescents in the autism group were more likely than were those in the control group to have physical difficulties (21.2% versus 1.6%; aPR = 11.6; 95% confidence interval [CI] = 4.2-31.9), and to have additional mental health or other conditions (one or more condition: 63.0% versus 28.9%; aPR = 1.9; 95% CI = 1.5-2.5). Adolescents in the autism group were more likely to receive mental health services (41.8% versus 22.1%; aPR = 1.8, 95% CI = 1.3-2.6) but were also more likely to have an unmet medical or mental health service need§ (11.0% versus 3.2%; aPR = 3.1; 95% CI = 1.1-8.8). In both groups, a small percentage of adolescents (autism, 7.5%; control, 14.1%) received recommended health care transition (transition) guidance. These findings are consistent with previous research (4,5) indicating that few adolescents receive the recommended transition guidance and suggest that adolescents identified with autism in early childhood are more likely than adolescents in the general population to have unmet health care service needs. Improved provider training on the heath care needs of adolescents with autism and coordination of comprehensive programs to meet their needs can improve delivery of services and adherence to recommended guidance for transitioning from pediatric to adult health care.",2021-04-30 +30222561,Large-Scale Study of Perceptual Video Quality. ,"The great variations of videographic skills in videography, camera designs, compression and processing protocols, communication and bandwidth environments, and displays leads to an enormous variety of video impairments. Current noreference (NR) video quality models are unable to handle this diversity of distortions. This is true in part because available video quality assessment databases contain very limited content, fixed resolutions, were captured using a small number of camera devices by a few videographers and have been subjected to a modest number of distortions. As such, these databases fail to adequately represent real world videos, which contain very different kinds of content obtained under highly diverse imaging conditions and are subject to authentic, complex and often commingled distortions that are difficult or impossible to simulate. As a result, NR video quality predictors tested on real-world video data often perform poorly. Towards advancing NR video quality prediction, we have constructed a largescale video quality assessment database containing 585 videos of unique content, captured by a large number of users, with wide ranges of levels of complex, authentic distortions. We collected a large number of subjective video quality scores via crowdsourcing. A total of 4776 unique participants took part in the study, yielding more than 205000 opinion scores, resulting in an average of 240 recorded human opinions per video. We demonstrate the value of the new resource, which we call the LIVE Video Quality Challenge Database (LIVE-VQC for short), by conducting a comparison of leading NR video quality predictors on it. This study is the largest video quality assessment study ever conducted along several key dimensions: number of unique contents, capture devices, distortion types and combinations of distortions, study participants, and recorded subjective scores. The database is available for download on this link: http://live.ece.utexas.edu/research/LIVEVQC/index.html.",2018-09-12 +34260694,Refget: standardised access to reference sequences.,"

Motivation

Reference sequences are essential in creating a baseline of knowledge for many common bioinformatics methods, especially those using genomic sequencing.

Results

We have created refget, a Global Alliance for Genomics and Health API specification to access reference sequences and sub-sequences using an identifier derived from the sequence itself. We present four reference implementations across in-house and cloud infrastructure, a compliance suite and a web report used to ensure specification conformity across implementations.

Availability

The Refget specification can be found at: https://w3id.org/ga4gh/refget.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-14 +30247654,HACER: an atlas of human active enhancers to interpret regulatory variants.,"Recent studies have shown that disease-susceptibility variants frequently lie in cell-type-specific enhancer elements. To identify, interpret, and prioritize such risk variants, we must identify the enhancers active in disease-relevant cell types, their upstream transcription factor (TF) binding, and their downstream target genes. To address this need, we built HACER (http://bioinfo.vanderbilt.edu/AE/HACER/), an atlas of Human ACtive Enhancers to interpret Regulatory variants. The HACER atlas catalogues and annotates in-vivo transcribed cell-type-specific enhancers, as well as placing enhancers within transcriptional regulatory networks by integrating ENCODE TF ChIP-Seq and predicted/validated chromatin interaction data. We demonstrate the utility of HACER in (i) offering a mechanistic hypothesis to explain the association of SNP rs614367 with ER-positive breast cancer risk, (ii) exploring tumor-specific enhancers in selective MYC dysregulation and (iii) prioritizing/annotating non-coding regulatory regions targeting CCND1. HACER provides a valuable resource for studies of GWAS, non-coding variants, and enhancer-mediated regulation.",2019-01-01 +33304952,Hydrothermal plume detection dataset from Chinese cruises to the equatorial East Pacific Rise.,"In this data article, a dataset from hydrothermal plume investigations on East Pacific Rise collected during Chinese cruises from 2008 to 2011 is reported. The dataset is related to the research article entitled ""Abundance of low-temperature axial venting at the equatorial East Pacific Rise"" published in the journal Deep-Sea Research I by Chen et al. (2020). In the dataset, continuous strings of time-series sensor data were obtained by Miniature Autonomous Plume Recorders (MAPR) and an Oxidation-Reduction Potential (ORP) sensor, while the underwater position data was derived using Ultra Short Base Line (USBL) navigation. In this contribution, general characteristics of the data are summarized and showed here. All the data are stored in separate Microsoft Excel spreadsheets that are available for researchers and a link is provided to the full data at http://dx.doi.org/10.17632/jckyj5vyjx.1. The data will be of comparative value to those investigating hydrothermal activities along mid-ocean ridges, worldwide.",2020-11-20 +33185649,"A system-level analysis of patient disease trajectories based on clinical, phenotypic and molecular similarities.","

Motivation

Incorporating the temporal dimension into multimorbidity studies has shown to be crucial for achieving a better understanding of the disease associations. Furthermore, due to the multifactorial nature of human disease, exploring disease associations from different perspectives can provide a holistic view to support the study of their aetiology.

Results

In this work, a temporal systems-medicine approach is proposed for identifying time-dependent multimorbidity patterns from patient disease trajectories, by integrating data from electronic health records with genetic and phenotypic information. Specifically, the disease trajectories are clustered using an unsupervised algorithm based on dynamic time warping and three disease similarity metrics: clinical, genetic and phenotypic. An evaluation method is also presented for quantitatively assessing, in the different disease spaces, both the cluster homogeneity and the respective similarities between the associated diseases within individual trajectories. The latter can facilitate exploring the origin(s) in the identified disease patterns. The proposed integrative methodology can be applied to any longitudinal cohort and disease of interest. In this article, prostate cancer is selected as a use case of medical interest to demonstrate, for the first time, the identification of temporal disease multimorbidities in different disease spaces.

Availability and implementation

https://gitlab.com/agiannoula/diseasetrajectories.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-01 +31813095,CytoMegaloVirus Infection Database: A Public Omics Database for Systematic and Comparable Information of CMV.,"CytoMegaloVirus (CMV) is known to cause infection in humans and may remain dormant throughout the life span of an individual. CMV infection has been reported to be fatal in patients with weak immunity. It is transmitted through blood, saliva, urine, semen and breast milk. Although medications are available to treat the infected patients, there is no cure for CMV. This concern prompted us to construct a comprehensive database having exhaustive information regarding CMV, its infections and therapies to be available on a single platform. Thus, we propose a newly designed database that includes all the information from various public resources such as biological databases, virus taxonomy databanks, viral databases, and drug bank, integrated into this database, named as cytomegalovirus database (CMVdb). It features all the relevant data regarding the strains of CMV, genes, expressed proteins, the genomic sequence of CMV and drugs used in the treatment of cytomegalovirus infection. CMVdb has a unique feature of in-house data analysis, so all the data obtained from various resources are processed within the system. The user interface is more responsive because of the integrated platform that will highly facilitate the researchers. Based on CMVdb functionality and quality of the data, it will accelerate the research and development in the field of infectious diseases and immunology with a special focus on CMV. The obtained data would be useful in designing better therapeutic strategies and agents for the treatment of CMV infections. The proposed database (CMVdb) is freely accessible at http://shaktisahislab.com/include/CMV/ or http://weislab.com/WeiDOCK/include/content/CMV/.",2019-12-07 +33997645,Naltrexone Initiation in the Inpatient Setting for Alcohol Use Disorder: A Systematic Review of Clinical Outcomes.,"Alcohol use disorder (AUD) is a highly prevalent health issue in the United States. The number of those receiving medication-assisted treatment (MAT) is limited, despite strong evidence for their effectiveness. The inpatient setting may represent an important opportunity to initiate MAT. The goal of this study was to summarize the data on naltrexone initiation in the emergency department or inpatient setting for the management of AUDs. We searched ClinicalTrials.gov, Ovid EBM Reviews, Ovid Embase, Ovid Medline, Ovid PsycINFO, Scopus, and Web of Science from inception through October 31, 2019. Search strategies were created using a combination of keywords (Supplemental Appendix 1, available online at http://www.mcpiqojournal.org) and standardized index terms related to naltrexone therapy for medically hospitalized patients with AUD. Two uncontrolled pre-post study designs evaluated naltrexone prescription rates, 30-day readmission rates, and rehospitalization rates. Two authors independently abstracted data on study characteristics, results, and study-level risk of bias. The research team collaborated to assess the strength of evidence across studies. Two studies reported that implementing a protocol for naltrexone initiation increased MAT rates, with one study noting a substantial decrease in 30-day hospital readmissions. Overall, we found that there is a paucity of data on naltrexone initiation in the inpatient setting for AUDs. This likely reflects the nature of current clinical practice and prescriber comfortability. There is a need for further studies evaluating MAT initiation in the inpatient setting. Furthermore, efforts to increase provider knowledge of these therapeutic options are in need of further exploration.",2021-04-08 +34162705,Precipitation isotope time series predictions from machine learning applied in Europe. ,"Hydrogen and oxygen isotope values of precipitation are critically important quantities for applications in Earth, environmental, and biological sciences. However, direct measurements are not available at every location and time, and existing precipitation isotope models are often not sufficiently accurate for examining features such as long-term trends or interannual variability. This can limit applications that seek to use these values to identify the source history of water or to understand the hydrological or meteorological processes that determine these values. We developed a framework using machine learning to calculate isotope time series at monthly resolution using available climate and location data in order to improve precipitation isotope model predictions. Predictions from this model are currently available for any location in Europe for the past 70 y (1950-2019), which is the period for which all climate data used as predictor variables are available. This approach facilitates simple, user-friendly predictions of precipitation isotope time series that can be generated on demand and are accurate enough to be used for exploration of interannual and long-term variability in both hydrogen and oxygen isotopic systems. These predictions provide important isotope input variables for ecological and hydrological applications, as well as powerful targets for paleoclimate proxy calibration, and they can serve as resources for probing historic patterns in the isotopic composition of precipitation with a high level of meteorological accuracy. Predictions from our modeling framework, Piso.AI, are available at https://isotope.bot.unibas.ch/PisoAI/.",2021-06-01 +34197276,From education to empowerment: Redesigning the role of students in college health promotion.,"College health promotion departments frequently employ peer health educators to disseminate relevant education and conduct outreach to their student body. While there are certainly benefits to these programs, this approach is outdated and does little to empower students or engage them in the process of health promotion. In this viewpoint article, I describe the Community Health Organizer model, which expands students' role in peer engagement and advocacy beyond the traditional peer educator or peer counseling programs. Finally, I provide recommendations for college health practitioners interested in implementing a similar model on their campuses.Supplemental data for this article can be accessed online at https://doi.org/10.1080/07448481.2021.1920603.",2021-07-01 +28713666,Gramene Database: Navigating Plant Comparative Genomics Resources.,"Gramene (http://www.gramene.org) is an online, open source, curated resource for plant comparative genomics and pathway analysis designed to support researchers working in plant genomics, breeding, evolutionary biology, system biology, and metabolic engineering. It exploits phylogenetic relationships to enrich the annotation of genomic data and provides tools to perform powerful comparative analyses across a wide spectrum of plant species. It consists of an integrated portal for querying, visualizing and analyzing data for 44 plant reference genomes, genetic variation data sets for 12 species, expression data for 16 species, curated rice pathways and orthology-based pathway projections for 66 plant species including various crops. Here we briefly describe the functions and uses of the Gramene database.",2016-11-01 +32510568,CCLA: an accurate method and web server for cancer cell line authentication using gene expression profiles. ,"Cancer cell lines (CCLs) as important model systems play critical roles in cancer research. The misidentification and contamination of CCLs are serious problems, leading to unreliable results and waste of resources. Current methods for CCL authentication are mainly based on the CCL-specific genetic polymorphism, whereas no method is available for CCL authentication using gene expression profiles. Here, we developed a novel method and homonymic web server (CCLA, Cancer Cell Line Authentication, http://bioinfo.life.hust.edu.cn/web/CCLA/) to authenticate 1291 human CCLs of 28 tissues using gene expression profiles. CCLA showed an excellent speed advantage and high accuracy for CCL authentication, a top 1 accuracy of 96.58 or 92.15% (top 3 accuracy of 100 or 95.11%) for microarray or RNA-Seq validation data (719 samples, 461 CCLs), respectively. To the best of our knowledge, CCLA is the first approach to authenticate CCLs using gene expression data. Users can freely and conveniently authenticate CCLs using gene expression profiles or NCBI GEO accession on CCLA website.",2021-05-01 +34001434,TaxonKit: A practical and efficient NCBI taxonomy toolkit.,"The National Center for Biotechnology Information (NCBI) Taxonomy is widely applied in biomedical and ecological studies. Typical demands include querying taxonomy identifier (TaxIds) by taxonomy names, querying complete taxonomic lineages by TaxIds, listing descendants of given TaxIds, and others. However, existed tools are either limited in functionalities or inefficient in terms of runtime. In this work, we present TaxonKit, a command-line toolkit for comprehensive and efficient manipulation of NCBI Taxonomy data. TaxonKit comprises seven core subcommands providing functions, including TaxIds querying, listing, filtering, lineage retrieving and reformatting, lowest common ancestor computation, and TaxIds change tracking. The practical functions, competitive processing performance, scalability with different scales of datasets and good accessibility can facilitate taxonomy data manipulations. TaxonKit provides free access under the permissive MIT license on GitHub, Brewsci, and Bioconda. The documents are also available at https://bioinf.shenwei.me/taxonkit/.",2021-04-15 +29899502,Development of a consent resource for genomic data sharing in the clinical setting.,"

Purpose

Data sharing between clinicians, laboratories, and patients is essential for improvements in genomic medicine, but obtaining consent for individual-level data sharing is often hindered by a lack of time and resources. To address this issue, the Clinical Genome Resource (ClinGen) developed tools to facilitate consent, including a one-page consent form and online supplemental video with information on key topics, such as risks and benefits of data sharing.

Methods

To determine whether the consent form and video accurately conveyed key data sharing concepts, we surveyed 5,162 members of the general public. We measured comprehension at baseline, after reading the form and watching the video. Additionally, we assessed participants' attitudes toward genomic data sharing.

Results

Participants' performance on comprehension questions significantly improved over baseline after reading the form and continued to improve after watching the video.

Conclusion

Results suggest reading the form alone provided participants with important knowledge regarding broad data sharing, and watching the video allowed for broader comprehension. These materials are now available at http://www.clinicalgenome.org/share . These resources will provide patients a straightforward way to share their genetic and health information, and improve the scientific community's access to data generated through routine healthcare.",2018-06-13 +35386855,DMNet: Dual-Stream Marker Guided Deep Network for Dense Cell Segmentation and Lineage Tracking.,"Accurate segmentation and tracking of cells in microscopy image sequences is extremely beneficial in clinical diagnostic applications and biomedical research. A continuing challenge is the segmentation of dense touching cells and deforming cells with indistinct boundaries, in low signal-to-noise-ratio images. In this paper, we present a dual-stream marker-guided network (DMNet) for segmentation of touching cells in microscopy videos of many cell types. DMNet uses an explicit cell marker-detection stream, with a separate mask-prediction stream using a distance map penalty function, which enables supervised training to focus attention on touching and nearby cells. For multi-object cell tracking we use M2Track tracking-by-detection approach with multi-step data association. Our M2Track with mask overlap includes short term track-to-cell association followed by track-to-track association to re-link tracklets with missing segmentation masks over a short sequence of frames. Our combined detection, segmentation and tracking algorithm has proven its potential on the IEEE ISBI 2021 6th Cell Tracking Challenge (CTC-6) where we achieved multiple top three rankings for diverse cell types. Our team name is MU-Ba-US, and the implementation of DMNet is available at, http://celltrackingchallenge.net/participants/MU-Ba-US/.",2021-10-01 +31263870,ValTrendsDB: bringing Protein Data Bank validation information closer to the user.,"SUMMARY:Structures in PDB tend to contain errors. This is a very serious issue for authors that rely on such potentially problematic data. The community of structural biologists develops validation methods as countermeasures, which are also included in the PDB deposition system. But how are these validation efforts influencing the structure quality of subsequently published data? Which quality aspects are improving, and which remain problematic? We developed ValTrendsDB, a database that provides the results of an extensive exploratory analysis of relationships between quality criteria, size and metadata of biomacromolecules. Key input data are sourced from PDB. The discovered trends are presented via precomputed information-rich plots. ValTrendsDB also supports the visualization of a set of user-defined structures on top of general quality trends. Therefore, ValTrendsDB enables users to see the quality of structures published by selected author, laboratory or journal, discover quality outliers, etc. ValTrendsDB is updated weekly. AVAILABILITY AND IMPLEMENTATION:Freely accessible at http://ncbr.muni.cz/ValTrendsDB. The web interface was implemented in JavaScript. The database was implemented in C++. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-12-01 +34743679,"The Role of COVID-19, Race and Social Factors in Pregnancy Experiences in New York State: The CAP Study.","Given that New York State's (NYS) was the first epicenter of the COVID-19 pandemic in the United States (US), we were interested in potential racial/ethnic differences in pregnancy-related experiences among women pregnant during versus prior to the pandemic. We surveyed 1,525 women (18-44 years) proportionate to geographic and sociodemographic distribution between June 9, 20 and July 21, 20. We carried out bivariate analysis of various social and pregnancy-related factors by racial/ethnic identity (White, Black, Hispanic) and binary logistic and linear regression assessing the association between race/ethnicity, pregnancy prior to/during the pandemic, demographic characteristics, health and social wellbeing, and employment as an essential worker with pregnancy-related healthcare delays and changes. Overall, Black and Hispanic women were significantly more likely to experience a host of negative prenatal and postpartum experiences. In general, multivariate analyses revealed that individuals who were pregnant during the pandemic, lived in NYC, participated in social welfare programs, lacked health insurance, and/or were essential workers were more likely to report delays in prenatal and postpartum care and/or more changes/negative experiences. In light of previous evidence of racial disparities in birth experiences, the higher rates of negative pregnancy/birth-care and postpartum/newborn-care experiences among Black and Hispanic women in bivariate analysis warrant further inspection given that their aggregation for multivariate analysis may have obscured differences at the level of individual events. Findings support continued efforts for universal health insurance and improved social welfare programs. Guidelines are needed to protect essential workers' access to health services, particularly related to pregnancy given the time-sensitive nature of this care.Supplemental data for this article is available online at https://doi.org/10.1080/08964289.2021.1997893 .",2021-11-08 +34285772,YPIBP: A repository for phosphoinositide-binding proteins in yeast.,"Phosphoinositides (PIs) are a family of eight lipids consisting of phosphatidylinositol (PtdIns) and its seven phosphorylated forms. PIs have important regulatory functions in the cell including lipid signaling, protein transport, and membrane trafficking. Yeast has been recognized as a eukaryotic model system to study lipid-protein interactions. Hundreds of yeast PI-binding proteins have been identified, but this research knowledge remains scattered. Besides, the complete PI-binding spectrum and potential PI-binding domains have not been interlinked. No comprehensive databases are available to support the lipid-protein interaction research on phosphoinositides. Here we constructed the first knowledgebase of Yeast Phosphoinositide-Binding Proteins (YPIBP), a repository consisting of 679 PI-binding proteins collected from high-throughput proteome-array and lipid-array studies, QuickGO, and a rigorous literature mining. The YPIBP also contains protein domain information in categories of lipid-binding domains, lipid-related domains and other domains. The YPIBP provides search and browse modes along with two enrichment analyses (PI-binding enrichment analysis and domain enrichment analysis). An interactive visualization is given to summarize the PI-domain-protein interactome. Finally, three case studies were given to demonstrate the utility of YPIBP. The YPIBP knowledgebase consolidates the present knowledge and provides new insights of the PI-binding proteins by bringing comprehensive and in-depth interaction network of the PI-binding proteins. YPIBP is available at http://cosbi7.ee.ncku.edu.tw/YPIBP/.",2021-06-24 +34748390,The Index of Productive Syntax: Psychometric Properties and Suggested Modifications.,"

Purpose

The Index of Productive Syntax (IPSyn) is a well-known language sample analysis tool. However, its psychometric properties have not been assessed across a wide sample of typically developing preschool-age children and children with language disorders. We sought to determine the profile of IPSyn scores by age over early childhood. We additionally explored whether the IPSyn could be shortened to fewer items without loss of information and whether the required language sample could be shortened from a current required number of 100 utterances to 50.

Method

We used transcripts from the Child Language Data Exchange System, including 1,051 samples of adult-child conversational play with toys within the theoretical framework of item response theory. Samples included those from typically developing children as well as children with hearing loss, Down syndrome, and late language emergence.

Results

The Verb Phrase and Sentence Structure subscales showed more stable developmental trajectories over the preschool years and greater differentiation between typical and atypical cohorts than did the Noun Phrase and Question/Negation subscales. A number of current IPSyn scoring items can be dropped without loss of information, and 50-utterance samples demonstrate most of the same psychometric properties of longer samples.

Discussion

Our findings suggest ways in which the IPSyn can be automated and streamlined (proposed IPSyn-C) so as to provide useful clinical guidance with fewer items and a shorter required language sample. Reference values for the IPSyn-C are provided. Trajectories for one subscale (Question/Negation) appear inherently unstable and may require structured elicitation. Potential limitations, ramifications, and future directions are discussed.

Supplemental material

https://doi.org/10.23641/asha.16915690.",2021-11-08 +28605771,CHOmine: an integrated data warehouse for CHO systems biology and modeling. ,"The last decade has seen a surge in published genome-scale information for Chinese hamster ovary (CHO) cells, which are the main production vehicles for therapeutic proteins. While a single access point is available at www.CHOgenome.org, the primary data is distributed over several databases at different institutions. Currently research is frequently hampered by a plethora of gene names and IDs that vary between published draft genomes and databases making systems biology analyses cumbersome and elaborate. Here we present CHOmine, an integrative data warehouse connecting data from various databases and links to other ones. Furthermore, we introduce CHOmodel, a web based resource that provides access to recently published CHO cell line specific metabolic reconstructions. Both resources allow to query CHO relevant data, find interconnections between different types of data and thus provides a simple, standardized entry point to the world of CHO systems biology. http://www.chogenome.org.",2017-01-01 +34791992,Development and Psychometric Properties of the Sleep Parenting Scale for Infants.,"Although infants' sleep behaviors are shaped by their interactions with parents at bedtime, few tools exist to capture parents' sleep parenting practices. This study developed a Sleep Parenting Scale for Infants (SPS-I) and aimed to (1) explore and validate its factorial structure, (2) examine its measurement invariance across mothers and fathers, and (3) investigate its reliability and concurrent and convergent validity. SPS-I was developed via a combination of items modified from existing scales and the development of novel items. Participants included 188 mothers and 152 mother-father dyads resulting in 340 mothers and 152 fathers; about half were non-Hispanic white. Mothers and fathers completed a 14-item SPS-I for their 12-month-old infant. Exploratory factor analysis (EFA) and confirmatory factor analysis (CFA) were used to explore and validate SPS-I's underlying structure. Multigroup CFA was used to examine measurement invariance across mothers and fathers. Reliability was examined using Cronbach's alpha. Concurrent validity was assessed using linear regressions examining associations between SPS-I factors and parent-reported infants nighttime sleep duration. Convergent validity was assessed using paired-sample t-tests to test whether the SPS-I subscale scores were similar between mothers and fathers in the same household. EFA and CFA confirmed a 3-factor, 12-item model: sleep routines, sleep autonomy, and screen media in the sleep environment. SPS-I was invariant across mothers and fathers and was reliable. Concurrent and convergent validity were established. SPS-I has good psychometric properties, supporting its use for characterizing sleep routines, sleep autonomy, and screen media in the sleep environment by mothers and fathers.Supplemental data for this article is available online at https://doi.org/10.1080/08964289.2021.2002799 .",2021-11-18 +34260994,Unique neurocircuitry activation profiles during fear conditioning and extinction among women with posttraumatic stress disorder.,"

Background

Neurocircuitry models of posttraumatic stress disorder (PTSD) suggest specific alterations in brain structures linked with fear conditioning and extinction. Most models assume a unitary pattern of neurocircuitry dysfunction in PTSD and little attention has focused on defining unique profiles of neurocircuitry engagement (i.e., biotypes), despite known clinical heterogeneity in PTSD. Here, we aim to address this gap using a data-driven approach to characterize unique neurocircuitry profiles among women with PTSD.

Methods

Seventy-six women with PTSD related to assaultive violence exposure competed a task during fMRI that alternated between fear conditioning, where a geometric shape predicted the occurrence of an electric shock, and fear extinction, where the geometric shape no longer predicted electric shock. A multivariate clustering analysis was applied to neurocircuitry patterns constrained within an a priori mask of structures linked with emotion processing. Resulting biotypes were compared on clinical measures of neurocognition, trauma exposure, general mental health symptoms, and PTSD symptoms and on psychophysiological responding during the task.

Results

The clustering analysis identified three biotypes (BT), differentiated by patterns of engagement within salience, default mode, and visual processing networks. BT1 was characterized by higher working memory, fewer general mental health symptoms, and low childhood sexual abuse, and lower PTSD symptom severity. BT2 was characterized by lower verbal IQ but better extinction learning as defined by psychophysiology and threat expectancy. BT3 was characterized by low childhood sexual abuse, anxious arousal, and re-experiencing symptoms.

Conclusion

This data demonstrates unique profiles of neurocircuitry engagement in PTSD, each associated with different clinical characteristics, and suggests further research defining distinct biotypes of PTSD. Clinicaltrials.gov, https://clinicaltrials.gov/ct2/home, NCT02560389.",2021-07-06 +31927706,Web-Based Dashboard for the Interactive Visualization and Analysis of National Risk-Standardized Mortality Rates of Sepsis in the US.,"Sepsis mortality is heavily influenced by the quality of care in hospitals. Comparing risk-standardized mortality rate (RSMR) of sepsis patients in different states in the United States has potentially important clinical and policy implications. In the current study, we aimed to compare national sepsis RSMR using an interactive web-based dashboard. We analyzed sepsis mortality using the National Inpatient Sample Database of the US. The RSMR was calculated by the hierarchical logistic regression model. We wrote the interactive web-based dashboard using the Shiny framework, an R package that integrates R-based statistics computation and graphics generation. Visual summarizations (e.g., heat map, and time series chart), and interactive tools (e.g., year selection, automatic year play, map zoom, copy or print data, ranking data by name or value, and data search) were implemented to enhance user experience. The web-based dashboard (https://sepsismap.shinyapps.io/index2/) is cross-platform and publicly available to anyone with interest in sepsis outcomes, health inequality, and administration of state/federal healthcare. After extrapolation to the national level, approximately 35 million hospitalizations were analyzed for sepsis mortality each year. Eight years of sepsis mortality data were summarized into four easy to understand dimensions: Sepsis Identification Criteria; Sepsis Mortality Predictors; RSMR Map; RSMR Trend. Substantial variation in RSMR was observed for different states in the US. This web-based dashboard allows anyone to visualize the substantial variation in RSMR across the whole US. Our work has the potential to support healthcare transparency, information diffusion, health decision-making, and the formulation of new public policies.",2020-01-11 +34466411,LZerD Protein-Protein Docking Webserver Enhanced With de novo Structure Prediction.,"Protein-protein docking is a useful tool for modeling the structures of protein complexes that have yet to be experimentally determined. Understanding the structures of protein complexes is a key component for formulating hypotheses in biophysics regarding the functional mechanisms of complexes. Protein-protein docking is an established technique for cases where the structures of the subunits have been determined. While the number of known structures deposited in the Protein Data Bank is increasing, there are still many cases where the structures of individual proteins that users want to dock are not determined yet. Here, we have integrated the AttentiveDist method for protein structure prediction into our LZerD webserver for protein-protein docking, which enables users to simply submit protein sequences and obtain full-complex atomic models, without having to supply any structure themselves. We have further extended the LZerD docking interface with a symmetrical homodimer mode. The LZerD server is available at https://lzerd.kiharalab.org/.",2021-08-12 +33796850,"ResistoXplorer: a web-based tool for visual, statistical and exploratory data analysis of resistome data.","The study of resistomes using whole metagenomic sequencing enables high-throughput identification of resistance genes in complex microbial communities, such as the human microbiome. Over recent years, sophisticated and diverse pipelines have been established to facilitate raw data processing and annotation. Despite the progress, there are no easy-to-use tools for comprehensive visual, statistical and functional analysis of resistome data. Thus, exploration of the resulting large complex datasets remains a key bottleneck requiring robust computational resources and technical expertise, which creates a significant hurdle for advancements in the field. Here, we introduce ResistoXplorer, a user-friendly tool that integrates recent advancements in statistics and visualization, coupled with extensive functional annotations and phenotype collection, to enable high-throughput analysis of common outputs generated from metagenomic resistome studies. ResistoXplorer contains three modules-the 'Antimicrobial Resistance Gene Table' module offers various options for composition profiling, functional profiling and comparative analysis of resistome data; the 'Integration' module supports integrative exploratory analysis of resistome and microbiome abundance profiles derived from metagenomic samples; finally, the 'Antimicrobial Resistance Gene List' module enables users to intuitively explore the associations between antimicrobial resistance genes and the microbial hosts using network visual analytics to gain biological insights. ResistoXplorer is publicly available at http://www.resistoxplorer.no.",2021-03-24 +32821400,Kiwifruit Genome Database (KGD): a comprehensive resource for kiwifruit genomics.,"Kiwifruit (Actinidia spp.) plants produce economically important fruits containing abundant, balanced phytonutrients with extraordinarily high vitamin C contents. Since the release of the first kiwifruit reference genome sequence in 2013, large volumes of genome and transcriptome data have been rapidly accumulated for a handful of kiwifruit species. To efficiently store, analyze, integrate, and disseminate these large-scale datasets to the research community, we constructed the Kiwifruit Genome Database (KGD; http://kiwifruitgenome.org/). The database currently contains all publicly available genome and gene sequences, gene annotations, biochemical pathways, transcriptome profiles derived from public RNA-Seq datasets, and comparative genomic analysis results such as syntenic blocks and homologous gene pairs between different kiwifruit genome assemblies. A set of user-friendly query interfaces, analysis tools and visualization modules have been implemented in KGD to facilitate translational and applied research in kiwifruit, which include JBrowse, a popular genome browser, and the NCBI BLAST sequence search tool. Other notable tools developed within KGD include a genome synteny viewer and tools for differential gene expression analysis as well as gene ontology (GO) term and pathway enrichment analysis.",2020-08-01 +32703317,Roles of hsa-miR-12462 and SLC9A1 in acute myeloid leukemia.,"MicroRNAs (miRNAs) play important roles in cell proliferation, differentiation, and survival and may be useful for acute myeloid leukemia (AML) diagnosis and prognosis. In this study, we defined a novel miRNA, hsa-miR-12462, through small RNA sequencing of the bone marrow (BM) cells from 128 AML patients. Overexpression of hsa-miR-12462 in AML cells (U937 and HL-60) significantly decreased their growth rate when compared with those of the wild-type and MOCK controls. In a xenograft mouse model, tumor weight and size in the mice bearing the U937 cells with hsa-miR-12462 overexpression were significantly reduced when compared with those bearing the mock cells. The AML cells overexpressing hsa-miR-12462 had increased sensitivity to cytarabine chemotherapy. Combining the data from the MiRDB, an online microRNA database ( http://mirdb.org ), with the RNA-sequencing results, SLC9A1 was predicted to be one of the targets of hsa-miR-12462. hsa-miR-12462 was further confirmed to bind exclusively to the 3'UTR of SLC9A1 in U937 cells, leading to downregulation of SLC9A1. In summary, a higher level of hsa-miR-12462 in AML cells is associated with increased sensitivity to cytarabine chemotherapy via downregulation of SLC9A1.",2020-07-23 +34586500,Antarctica as a reservoir of planetary analogue environments.,"One of the main objectives of astrobiological research is the investigation of the habitability of other planetary bodies. Since space exploration missions are expensive and require long-term organization, the preliminary study of terrestrial environments is an essential step to prepare and support exploration missions. The Earth hosts a multitude of extreme environments whose characteristics resemble celestial bodies in our Solar System. In these environments, the physico-chemical properties partly match extraterrestrial environments and could clarify limits and adaptation mechanisms of life, the mineralogical or geochemical context, and support and interpret data sent back from planetary bodies. One of the best terrestrial analogues is Antarctica, whose conditions lie on the edge of habitability. It is characterized by a cold and dry climate (Onofri et al., Nova Hedwigia 68:175-182, 1999), low water availability, strong katabatic winds, salt concentration, desiccation, and high radiation. Thanks to the harsh conditions like those in other celestial bodies, Antarctica offers good terrestrial analogues for celestial body (Mars or icy moons; Léveillé, CR Palevol 8:637-648, https://doi.org/10.1016/j.crpv.2009.03.005 , 2009). The continent could be distinguished into several habitats, each with characteristics similar to those existing on other bodies. Here, we reported a description of each simulated parameter within the habitats, in relation to each of the simulated extraterrestrial environments.",2021-09-29 +30994884,Graph-based data integration from bioactive peptide databases of pharmaceutical interest: toward an organized collection enabling visual network analysis.,"

Motivation

Bioactive peptides have gained great attention in the academy and pharmaceutical industry since they play an important role in human health. However, the increasing number of bioactive peptide databases is causing the problem of data redundancy and duplicated efforts. Even worse is the fact that the available data is non-standardized and often dirty with data entry errors. Therefore, there is a need for a unified view that enables a more comprehensive analysis of the information on this topic residing at different sites.

Results

After collecting web pages from a large variety of bioactive peptide databases, we organized the web content into an integrated graph database (starPepDB) that holds a total of 71 310 nodes and 348 505 relationships. In this graph structure, there are 45 120 nodes representing peptides, and the rest of the nodes are connected to peptides for describing metadata. Additionally, to facilitate a better understanding of the integrated data, a software tool (starPep toolbox) has been developed for supporting visual network analysis in a user-friendly way; providing several functionalities such as peptide retrieval and filtering, network construction and visualization, interactive exploration and exporting data options.

Availability and implementation

Both starPepDB and starPep toolbox are freely available at http://mobiosd-hub.com/starpep/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +33833657,ATSAS 3.0: expanded functionality and new tools for small-angle scattering data analysis.,"The ATSAS software suite encompasses a number of programs for the processing, visualization, analysis and modelling of small-angle scattering data, with a focus on the data measured from biological macromolecules. Here, new developments in the ATSAS 3.0 package are described. They include IMSIM, for simulating isotropic 2D scattering patterns; IMOP, to perform operations on 2D images and masks; DATRESAMPLE, a method for variance estimation of structural invariants through parametric resampling; DATFT, which computes the pair distance distribution function by a direct Fourier transform of the scattering data; PDDFFIT, to compute the scattering data from a pair distance distribution function, allowing comparison with the experimental data; a new module in DATMW for Bayesian consensus-based concentration-independent molecular weight estimation; DATMIF, an ab initio shape analysis method that optimizes the search model directly against the scattering data; DAMEMB, an application to set up the initial search volume for multiphase modelling of membrane proteins; ELLLIP, to perform quasi-atomistic modelling of liposomes with elliptical shapes; NMATOR, which models conformational changes in nucleic acid structures through normal mode analysis in torsion angle space; DAMMIX, which reconstructs the shape of an unknown intermediate in an evolving system; and LIPMIX and BILMIX, for modelling multilamellar and asymmetric lipid vesicles, respectively. In addition, technical updates were deployed to facilitate maintainability of the package, which include porting the PRIMUS graphical interface to Qt5, updating SASpy - a PyMOL plugin to run a subset of ATSAS tools - to be both Python 2 and 3 compatible, and adding utilities to facilitate mmCIF compatibility in future ATSAS releases. All these features are implemented in ATSAS 3.0, freely available for academic users at https://www.embl-hamburg.de/biosaxs/software.html.",2021-02-01 +34643684,BERT-Kcr: Prediction of lysine crotonylation sites by a transfer learning method with pre-trained BERT models. ,"As one of the most important post-translational modifications (PTMs), protein lysine crotonylation (Kcr) has attracted wide attention, which involves in important physiological activities, such as cell differentiation and metabolism. However, experimental methods are expensive and time-consuming for Kcr identification. Instead, computational methods can predict Kcr sites in silico with high efficiency and low cost. In this study, we proposed a novel predictor, BERT-Kcr, for protein Kcr sites prediction, which was developed by using a transfer learning method with pre-trained bidirectional encoder representations from transformers (BERT) models. These models were originally used for natural language processing (NLP) tasks, such as sentence classification. Here, we transferred each amino acid into a word as the input information to the pre-trained BERT model. The features encoded by BERT were extracted and then fed to a BiLSTM network to build our final model. Compared with the models built by other machine learning and deep learning classifiers, BERT-Kcr achieved the best performance with AUROC of 0.983 for 10-fold cross-validation. Further evaluation on the independent test set indicates that BERT-Kcr outperforms the state-of-the-art model Deep-Kcr with an improvement of about 5% for AUROC. The results of our experiment indicate that the direct use of sequence information and advanced pre-trained models of natural language processing could be an effective way for identifying post-translational modification sites of proteins. The BERT-Kcr model is publicly available on http://zhulab.org.cn/BERT-Kcr_models/. Supplementary data are available at Bioinformatics online.",2021-10-13 +27899679,AtPID: a genome-scale resource for genotype-phenotype associations in Arabidopsis.,"AtPID (Arabidopsis thaliana Protein Interactome Database, available at http://www.megabionet.org/atpid) is an integrated database resource for protein interaction network and functional annotation. In the past few years, we collected 5564 mutants with significant morphological alterations and manually curated them to 167 plant ontology (PO) morphology categories. These single/multiple-gene mutants were indexed and linked to 3919 genes. After integrated these genotype-phenotype associations with the comprehensive protein interaction network in AtPID, we developed a Naïve Bayes method and predicted 4457 novel high confidence gene-PO pairs with 1369 genes as the complement. Along with the accumulated novel data for protein interaction and functional annotation, and the updated visualization toolkits, we present a genome-scale resource for genotype-phenotype associations for Arabidopsis in AtPID 5.0. In our updated website, all the new genotype-phenotype associations from mutants, protein network, and the protein annotation information can be vividly displayed in a comprehensive network view, which will greatly enhance plant protein function and genotype-phenotype association studies in a systematical way.",2016-11-28 +33385041,Highlights of the 2019 American Joint Replacement Registry Annual Report.,"The 2019 American Joint Replacement Registry shows continued growth in cases and data recorded. There are several trends noted in the registry that have been highlighted in this brief communication. More granular data collection is projected for future reports that may shed light on specific procedure and device survivorship and patient-reported outcomes. The authors encourage you to read the full report, available at the following link: http://ajrr.net/publications-data/annual-reports.",2020-12-22 +34244710,UCSC Cell Browser: Visualize Your Single-Cell Data.,"

Summary

As the use of single-cell technologies has grown, so has the need for tools to explore these large, complicated datasets. The UCSC Cell Browser is a tool that allows scientists to visualize gene expression and metadata annotation distribution throughout a single-cell dataset or multiple datasets.

Availability and implementation

We provide the UCSC Cell Browser as a free website where scientists can explore a growing collection of single-cell datasets and a freely available python package for scientists to create stable, self-contained visualizations for their own single-cell datasets. Learn more at https://cells.ucsc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-09 +31494246,SliceIt: A genome-wide resource and visualization tool to design CRISPR/Cas9 screens for editing protein-RNA interaction sites in the human genome.,"Several protein-RNA cross linking protocols have been established in recent years to delineate the molecular interaction of an RNA Binding Protein (RBP) and its target RNAs. However, functional dissection of the role of the RBP binding sites in modulating the post-transcriptional fate of the target RNA remains challenging. CRISPR/Cas9 genome editing system is being commonly employed to perturb both coding and noncoding regions in the genome. With the advancements in genome-scale CRISPR/Cas9 screens, it is now possible to not only perturb specific binding sites but also probe the global impact of protein-RNA interaction sites across cell types. Here, we present SliceIt (http://sliceit.soic.iupui.edu/), a database of in silico sgRNA (single guide RNA) library to facilitate conducting such high throughput screens. SliceIt comprises of ~4.8 million unique sgRNAs with an estimated range of 2-8 sgRNAs designed per RBP binding site, for eCLIP experiments of >100 RBPs in HepG2 and K562 cell lines from the ENCODE project. SliceIt provides a user friendly environment, developed using advanced search engine framework, Elasticsearch. It is available in both table and genome browser views facilitating the easy navigation of RBP binding sites, designed sgRNAs, exon expression levels across 53 human tissues along with prevalence of SNPs and GWAS hits on binding sites. Exon expression profiles enable examination of locus specific changes proximal to the binding sites. Users can also upload custom tracks of various file formats directly onto genome browser, to navigate additional genomic features in the genome and compare with other types of omics profiles. All the binding site-centric information is dynamically accessible via ""search by gene"", ""search by coordinates"" and ""search by RBP"" options and readily available to download. Validation of the sgRNA library in SliceIt was performed by selecting RBP binding sites in Lipt1 gene and designing sgRNAs. Effect of CRISPR/Cas9 perturbations on the selected binding sites in HepG2 cell line, was confirmed based on altered proximal exon expression levels using qPCR, further supporting the utility of the resource to design experiments for perturbing protein-RNA interaction networks. Thus, SliceIt provides a one-stop repertoire of guide RNA library to perturb RBP binding sites, along with several layers of functional information to design both low and high throughput CRISPR/Cas9 screens, for studying the phenotypes and diseases associated with RBP binding sites.",2019-09-05 +33191981,A survey of Big Data dimensions vs Social Networks analysis.,"The pervasive diffusion of Social Networks (SN) produced an unprecedented amount of heterogeneous data. Thus, traditional approaches quickly became unpractical for real life applications due their intrinsic properties: large amount of user-generated data (text, video, image and audio), data heterogeneity and high speed generation rate. More in detail, the analysis of user generated data by popular social networks (i.e Facebook (https://www.facebook.com/), Twitter (https://www.twitter.com/), Instagram (https://www.instagram.com/), LinkedIn (https://www.linkedin.com/)) poses quite intriguing challenges for both research and industry communities in the task of analyzing user behavior, user interactions, link evolution, opinion spreading and several other important aspects. This survey will focus on the analyses performed in last two decades on these kind of data w.r.t. the dimensions defined for Big Data paradigm (the so called Big Data 6 V's).",2020-11-09 +36303778,InterPepRank: Assessment of Docked Peptide Conformations by a Deep Graph Network.,"Peptide-protein interactions between a smaller or disordered peptide stretch and a folded receptor make up a large part of all protein-protein interactions. A common approach for modeling such interactions is to exhaustively sample the conformational space by fast-Fourier-transform docking, and then refine a top percentage of decoys. Commonly, methods capable of ranking the decoys for selection fast enough for larger scale studies rely on first-principle energy terms such as electrostatics, Van der Waals forces, or on pre-calculated statistical potentials. We present InterPepRank for peptide-protein complex scoring and ranking. InterPepRank is a machine learning-based method which encodes the structure of the complex as a graph; with physical pairwise interactions as edges and evolutionary and sequence features as nodes. The graph network is trained to predict the LRMSD of decoys by using edge-conditioned graph convolutions on a large set of peptide-protein complex decoys. InterPepRank is tested on a massive independent test set with no targets sharing CATH annotation nor 30% sequence identity with any target in training or validation data. On this set, InterPepRank has a median AUC of 0.86 for finding coarse peptide-protein complexes with LRMSD < 4Å. This is an improvement compared to other state-of-the-art ranking methods that have a median AUC between 0.65 and 0.79. When included as a selection-method for selecting decoys for refinement in a previously established peptide docking pipeline, InterPepRank improves the number of medium and high quality models produced by 80% and 40%, respectively. The InterPepRank program as well as all scripts for reproducing and retraining it are available from: http://wallnerlab.org/InterPepRank.",2021-10-25 +32992101,"Bioinformatic analysis of proteomic data for iron, inflammation, and hypoxic pathways in restless legs syndrome.","

Objective/background

We performed bioinformatic analysis of proteomic data to identify the biomarkers of restless legs syndrome (RLS) and provide insights into the putative pathomechanisms, including iron deficiency, inflammation, and hypoxic pathways.

Patients/methods

Patients with drug-naïve idiopathic RLS were recruited at a university hospital from June 2017 to February 2018. Serum samples from patients with RLS (n = 7) and healthy sex- and age-matched controls (n = 6) were evaluated by proteomic analysis. For differentially expressed proteins (DEPs) in patients with RLS, compared to those in controls, the expression profiles and protein-protein interaction (PPI) network were characterized between dysregulated proteins and extracted proteins involved in iron deficiency, hypoxia, and inflammation responses using the String database (http://string-DB.org). The PPI network was visualized by Cytoscape ver. 3. 7. 1. Statistical analyses of the validation Western blot assays were performed using a Student's t-test.

Results

Interactome network analysis revealed a relationship among the eight proteins, their associated genes, and 150, 47, and 11 proteins related to iron deficiency, inflammation, and hypoxic pathways, respectively. All DEPs were well associated with inflammation, and complement 3, complement C4A, alpha-2 HS glycoprotein, and alpha-2 macroglobulin precursor were found to be in hub positions of networks involved in PPIs including iron deficiency, hypoxia pathway, and inflammation. C3 and C4A were verified using western blotting.

Conclusions

We identified key molecules that represent the selected cellular pathways as protein biomarkers by PPI network analysis. Changes in inflammation can mediate or affect the pathomechanism of RLS and can thus act as systemic biomarkers.",2020-09-13 +34152205,"First Report of Xanthomonas citri subsp. citri causing Citrus Canker on lime in Rio Grande do Norte, Brazil. ","Citrus canker caused by Xanthomonas citri subsp. citri is one of the most important citrus diseases in the world (Gottwald et al. 2002), mainly for citrus-producing countries with humid sub-tropical regions such as United States, Argentina, and Brazil, where losses may be significant (Behlau et al. 2020). In the state of Rio Grande do Norte (RN), Brazil, citrus production is expanding and shows social and economic importance for small farmers, which produced approximately 297 tons of lime in this state in 2019 (IBGE 2021). In December 2019, we observed symptoms of erumpent lesions with margins surrounded by yellow haloes on leaves and fruit of the lime (Citrus aurantifolia cv. 'Galego') (about 5% incidence) in a plantation located in the municipality of Mossoró, RN (05°12'21.1""S, 37°19'16""W). Samples were collected from the lime orchard, and five bacterial strains (CCRMXC01 to CCRMXC05) showing yellow, convex, mucoid colonies were isolated in a nutrient-yeast-dextrose-agar medium (NYDA). Pathogenicity tests were performed on sweet orange (C. sinensis cv. 'Pêra') and lime (C. latifolia cv. 'Tahiti') seedlings. Four wounds per leaf (upper side) were carried out with an entomological pin and 10 µl of a bacterial suspension (108 CFU mL-1) were deposited on each wound. The negative control consisted of leaves treated with sterile distilled water (SDW). For each citrus species, we used four replicates per strain and one leaf with four wounds per replicate. Inoculated leaves developed erumpent lesions with margins surrounded by yellow haloes six days after inoculation (DAI) in both citrus species, while leaves treated with SDW remained symptomless. Nine DAI, we reisolated the pathogen and performed rep-PCR (REP, ERIC, and BOX-PCR) analyses (Gama et al. 2018) with the strains inoculated and reisolated to confirm the identity of the strains and to fulfill Koch's postulates. The strains were stored at the Culture Collection Rosa Mariano (CCRM) of the Phytobacteriology Laboratory at the Universidade Federal Rural de Pernambuco. The five strains reisolated showed the same REP, ERIC, and BOX-PCR profiles as the strains used for inoculations. The molecular identification was performed sequencing the dnaK, fyuA, gyrB, and rpoD genes (Young et al. 2008). Each fragment was sequenced in both the forward and reverse directions. Using the BLASTn tool, we observed that sequences of the dnaK (GenBank MW218913 to MW218917), fyuA (GenBank MW218918 to MW218922), and rpoD (GenBank MW218928 to MW218932) genes of the strains CCRMXC01 to CCRMXC05 showed 100% of identity with the sequences of these genes from the type strain (ICMP 24T) and of other strains of X. citri subsp. citri (ICMP 21 and ICMP 7493), while sequences of gryB (GenBank MW218923 to MW218927) of the former strains showed 100% identity with the gyrB sequence of the strains ICMP 24T and ICMP 7493 and 99,85% identity with strain ICMP 21. This short variation in the sequence of the gyrB gene also may be observed among strains of X. citri subsp. citri available in NCBI database (https://www.ncbi.nlm.nih.gov/). The phylogenetic analysis performed using Bayesian inference and the concatenated sequence of all the type or representative strains of species and pathovars of Xanthomonas available in GenBank showed that the strains CCRMXC01 to CCRMXC05 clustered together with strain ICMP 24T with 1.0 posterior probability. To our information, this is the first report of X. citri subsp. citri causing citrus canker on lime in RN state, Brazil.",2021-06-21 +34154618,Zinc transporter SLC39A13/ZIP13 facilitates the metastasis of human ovarian cancer cells via activating Src/FAK signaling pathway.,"

Background

Zinc transporters have been found to be associated with the pathogenesis of numerous human diseases including cancer. As the most lethal gynecologic malignancy, ovarian cancer is characterized by rapid progression and widespread metastases. However, the function and underlying mechanism of zinc transporters in ovarian cancer metastasis remain unclear.

Methods

The relationship between zinc transporter gene expressions and clinical outcomes of ovarian cancer was assessed with the online database Kaplan-Meier plotter ( http://kmplot.com/analysis/ ). Immunohistochemistry was performed to investigate the prognostic importance of ZIP13. The expression of ZIP13 in ovarian cancer cell lines was depleted to explore its effect on proliferation, adhesion, migration, and invasion both in vitro and in vivo assays. RNA-Seq, quantitative RT-PCR, and western blot analysis were performed to explore ZIP13-regulated downstream target genes.

Results

The expressions of several zinc transporters were highly associated the clinical outcomes of ovarian cancer patients. Among them, high ZIP13 expression was an independent prognostic factor for poor survival in patients with ovarian cancer. ZIP13 knockout suppressed the malignant phenotypes of ovarian cancer cells both in vitro and in vivo. Further investigation revealed that ZIP13 regulated intracellular zinc distribution and then affected the expressions of genes involved in extracellular matrix organization and cytokine-mediated signaling pathway. This led to the activation of Src/FAK pathway with increased expressions of pro-metastatic genes but decreased expressions of tumor suppressor genes.

Conclusions

ZIP13 is shown to be a novel driver of metastatic progression by modulating the Src/FAK signaling pathway, which may serve as a promising biomarker for prognostic evaluation and targeted therapy in ovarian cancer.",2021-06-21 +34406359,purgeR: Inbreeding and purging in pedigreed populations. ,"Inbreeding depression and genetic purging are important processes shaping the survivability and evolution of small populations. However, detecting purging is challenging in practice, in part because there are limited tools dedicated to it. I present a new R package to assist population analyses on detection and quantification of the inbreeding depression and genetic purging of biological fitness in pedigreed populations. It includes a collection of methods to estimate different measurements of inbreeding (Wright's, partial and ancestral inbreeding coefficients) as well as purging parameters (purged inbreeding, and opportunity of purging coefficients). Additional functions are also included to estimate population parameters, allowing to contextualise inbreeding and purging these results in terms of the population demographic history. purgeR is a valuable tool to gain insight into processes related to inbreeding and purging, and to better understand fitness and inbreeding load evolution in small populations. purgeR is an R package available at CRAN, and can be installed via install.packages('purgeR'). Source code is maintained at a GitLab repository (https://gitlab.com/elcortegano/purgeR). Supplementary data are available at Bioinformatics online.",2021-08-18 +35063224,Phase I single center trial of ketogenic diet for adults with traumatic brain injury.,"

Background

Traumatic Brain injury (TBI) is a major cause of mortality and morbidity in the United States. Ketogenic diet (KD) has been shown to have neuroprotective effects in acute brain injury, but limited data about its use in adult TBI patients is available. The objective of this study is to investigate the feasibility and safety of ketogenic diet (KD) for adult TBI patients in the Neuroscience Intensive Care Unit (NSICU).

Methods

TBI patients admitted to NSICU between June 2019 to March 2021 were enrolled in this single-center, open label, single-arm prospective intervention study. The primary feasibility outcome was achievement of ketosis (detection and maintenance of serum beta-hydroxybutyrate (BOB) levels above normal); secondary outcomes included laboratory and clinical adverse effects related to KD.

Results

10 adults with TBI with Abbreviated Injury Score (AIS)-Head ≥3 and ventriculostomy catheter to monitor intracranial pressure met inclusion/exclusion criteria and were placed on KD. Mean age was 47 years, and all patients were male. Eight out of 10 patients achieved ketosis within mean 2.2 days. KD was initiated within 8-33 h (average 23 h) of hospital admission. No clinical adverse effects were noted, 2 patients developed hypertriglyceridemia and 1 patient developed hypoglycemia. Serum glucose showed a decreasing trend in most patients.

Conclusions

This pilot study shows that KD is feasible in the management of TBI patients. A randomized controlled trial (RCT) is justified to further understand the optimal serum BOB levels, dose and duration of KD in TBI and its effect on the outcome. CLINICALTRIALS.

Gov identifier

NCT03982602, Registered 06/11/2019, https://clinicaltrials.gov/ct2/show/NCT03982602?term=brain+injury&cond=ketogenic+diet&draw=2&rank=3.",2021-11-16 +33476182,When Will He Talk? An Evidence-Based Tutorial for Measuring Progress Toward Use of Spoken Words in Preverbal Children With Autism Spectrum Disorder.,"Purpose Professionals face substantial challenges determining whether and when children with autism spectrum disorder (ASD) who are not yet using spoken words will use spoken language as their primary means of communication. This tutorial provides speech-language pathologists with practical guidance on how to measure expressive language predictors for progress monitoring and making intervention decisions for children with ASD who are preverbal. Method This tutorial is a repackaging effort that seeks to make the research accessible to clinicians wishing to implement evidence-based practice. Results We describe intentional communication, consonant inventory in communication acts, and responding to joint attention as particularly valuable prelinguistic skills to measure. We explain how and when to efficiently assess progress using published assessments periodically and using brief (5-min) communication samples for more frequent progress monitoring. Conclusions Communication samples can be used to show how a child performs within a therapeutic setting during teaching (treatment data) and outside of the therapeutic setting (generalization probe data). Both types of data are critical for determining whether the child is exhibiting progress and which aspects of intervention are facilitating progress toward use of spoken words. These recommendations also balance the evidence for best practices for progress monitoring and the demands on clinicians' time and effort. To encourage the measurement of prelinguistic skills of children with ASD who are preverbal in clinical practice, we include (a) example data collection documents, (b) examples with hypothetical data and interpretation, and (c) guidance on communication sampling procedures. Supplemental Material https://doi.org/10.23641/asha.13557836.",2021-01-21 +33304972,"Data on three-year monitoring of benthic macroinvertebrates in ditches of the orchard region of Altes Land, Germany.","The data presented in this article are related to the research article 'Chemical and biological monitoring of the load of plant protection products and of zoocoenoses in ditches of the orchard region Altes Land' (Süß et al., 2006) [1], which is only available in German. The benthic macro invertebrate data presented here were acquired from four ditches (three ditches were located in apple orchards, and one ditch was located in a grassland region) between 2001 and 2003 (Süß & Lorenz, 2020) [2]. This article describes the methods used to record the benthic macro invertebrate species. The field data set is publicly available at the OpenAgrar repository under https://doi.org/10.5073/20201029-170047[2]. It is related to two field data sets, in which pesticide monitoring data (Lorenz et al., 2018) [3] and zooplankton monitoring data (Lorenz & Mueller, 2019) [4] from the same ditches and time period have been presented.",2020-11-26 +32692836,Differential Gene Set Enrichment Analysis: a statistical approach to quantify the relative enrichment of two gene sets.,"

Motivation

Gene Set Enrichment Analysis (GSEA) is an algorithm widely used to identify statistically enriched gene sets in transcriptomic data. However, GSEA cannot examine the enrichment of two gene sets or pathways relative to one another. Here we present Differential Gene Set Enrichment Analysis (DGSEA), an adaptation of GSEA that quantifies the relative enrichment of two gene sets.

Results

After validating the method using synthetic data, we demonstrate that DGSEA accurately captures the hypoxia-induced coordinated upregulation of glycolysis and downregulation of oxidative phosphorylation. We also show that DGSEA is more predictive than GSEA of the metabolic state of cancer cell lines, including lactate secretion and intracellular concentrations of lactate and AMP. Finally, we demonstrate the application of DGSEA to generate hypotheses about differential metabolic pathway activity in cellular senescence. Together, these data demonstrate that DGSEA is a novel tool to examine the relative enrichment of gene sets in transcriptomic data.

Availability and implementation

DGSEA software and tutorials are available at https://jamesjoly.github.io/DGSEA/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-01-01 +33951459,Hotspot identifies informative gene modules across modalities of single-cell genomics.,"Two fundamental aims that emerge when analyzing single-cell RNA-seq data are identifying which genes vary in an informative manner and determining how these genes organize into modules. Here, we propose a general approach to these problems, called ""Hotspot,"" that operates directly on a given metric of cell-cell similarity, allowing for its integration with any method (linear or non-linear) for identifying the primary axes of transcriptional variation between cells. In addition, we show that when using multimodal data, Hotspot can be used to identify genes whose expression reflects alternative notions of similarity between cells, such as physical proximity in a tissue or clonal relatedness in a cell lineage tree. In this manner, we demonstrate that while Hotspot is capable of identifying genes that reflect nuanced transcriptional variability between T helper cells, it can also identify spatially dependent patterns of gene expression in the cerebellum as well as developmentally heritable expression programs during embryogenesis. Hotspot is implemented as an open-source Python package and is available for use at http://www.github.com/yoseflab/hotspot. A record of this paper's transparent peer review process is included in the supplemental information.",2021-05-04 +31738401,SPDI: data model for variants and applications at NCBI.,"

Motivation

Normalizing sequence variants on a reference, projecting them across congruent sequences and aggregating their diverse representations are critical to the elucidation of the genetic basis of disease and biological function. Inconsistent representation of variants among variant callers, local databases and tools result in discrepancies that complicate analysis. NCBI's genetic variation resources, dbSNP and ClinVar, require a robust, scalable set of principles to manage asserted sequence variants.

Results

The SPDI data model defines variants as a sequence of four attributes: sequence, position, deletion and insertion, and can be applied to nucleotide and protein variants. NCBI web services convert representations among HGVS, VCF and SPDI and provide two functions to aggregate variants. One, based on the NCBI Variant Overprecision Correction Algorithm, returns a unique, normalized representation termed the 'Contextual Allele'. The SPDI data model, with its four operations, defines exactly the reference subsequence affected by the variant, even in repeat regions, such as homopolymer and other sequence repeats. The second function projects variants across congruent sequences and depends on an alignment dataset of non-assembly NCBI RefSeq sequences (prefixed NM, NR and NG), as well as inter- and intra-assembly-associated genomic sequences (NCs, NTs and NWs), supporting robust projection of variants across congruent sequences and assembly versions. The variant is projected to all congruent Contextual Alleles. One of these Contextual Alleles, typically the allele based on the latest assembly version, represents the entire set, is designated the unique 'Canonical Allele' and is used directly to aggregate variants across congruent sequences.

Availability and implementation

The SPDI services are available for open access at: https://api.ncbi.nlm.nih.gov/variation/v0.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +32307725,"A database for inventory of proteoform profiles: ""2DE-pattern"".","The human proteome is composed of a diverse and heterogeneous range of gene products/proteoforms/protein species. Because of the growing amount of information about proteoforms generated by different methods, we need a convenient approach to make an inventory of the data. Here, we present a database of proteoforms that is based on information obtained by separation of proteoforms using 2DE followed by shotgun ESI-LC-MS/MS. The database's principles and structure are described. The database is called ""2DE-pattern"" as it contains multiple isoform-centric patterns of proteoforms separated according to 2DE principles. The database can be freely used at http://2de-pattern.pnpi.nrcki.ru.",2020-04-27 +32159764,"MMHub, a database for the mulberry metabolome. ","Mulberry is an important economic crop plant and traditional medicine. It contains a huge array of bioactive metabolites such as flavonoids, amino acids, alkaloids and vitamins. Consequently, mulberry has received increasing attention in recent years. MMHub (version 1.0) is the first open public repository of mass spectra of small chemical compounds (<1000 Da) in mulberry leaves. The database contains 936 electrospray ionization tandem mass spectrometry (ESI-MS2) data and lists the specific distribution of compounds in 91 mulberry resources with two biological duplicates. ESI-MS2 data were obtained under non-standardized and independent experimental conditions. In total, 124 metabolites were identified or tentatively annotated and details of 90 metabolites with associated chemical structures have been deposited in the database. Supporting information such as PubChem compound information, molecular formula and metabolite classification are also provided in the MS2 spectral tag library. The MMHub provides important and comprehensive metabolome data for scientists working with mulberry. This information will be useful for the screening of quality resources and specific metabolites of mulberry. Database URL: https://biodb.swu.edu.cn/mmdb/.",2020-01-01 +34139436,VICTOR: A visual analytics web application for comparing cluster sets.,"Clustering is the process of grouping different data objects based on similar properties. Clustering has applications in various case studies from several fields such as graph theory, image analysis, pattern recognition, statistics and others. Nowadays, there are numerous algorithms and tools able to generate clustering results. However, different algorithms or parameterizations may produce quite dissimilar cluster sets. In this way, the user is often forced to manually filter and compare these results in order to decide which of them generate the ideal clusters. To automate this process, in this study, we present VICTOR, the first fully interactive and dependency-free visual analytics web application which allows the visual comparison of the results of various clustering algorithms. VICTOR can handle multiple cluster set results simultaneously and compare them using ten different metrics. Clustering results can be filtered and compared to each other with the use of data tables or interactive heatmaps, bar plots, correlation networks, sankey and circos plots. We demonstrate VICTOR's functionality using three examples. In the first case, we compare five different network clustering algorithms on a Yeast protein-protein interaction dataset whereas in the second example, we test four different parameters of the MCL clustering algorithm on the same dataset. Finally, as a third example, we compare four different meta-analyses with hierarchically clustered differentially expressed genes found to be involved in myocardial infarction. VICTOR is available at http://victor.pavlopouloslab.info or http://bib.fleming.gr:3838/VICTOR.",2021-06-08 +,First Report of Stem and Root Rot of Chinese Kale Caused by Fusarium incarnatum-equiseti Species Complex in China,"Chinese kale (Brassica oleracea var. alboglabra) is an important green leaf crop in China. In October 2018, plants of cultivar ‘Sujie’ in the experimental field of the Vegetable Research Institute at the Guangdong Academy of Agricultural Sciences (VRI-GAAS) in Guangzhou, China, exhibited stem rot symptoms. Disease incidence was up to 30%. Symptom development was first observed on 2-month-old plants. Symptoms appeared as wilting and yellowing of leaves, followed by rotting of stem and root tissues. Infected plants had reduced growth, and severely infected plants died. Ten symptomatic plants were removed for pathogen isolation. Infected plant tissues were cut into pieces (5 × 5 mm) and surface sterilized with 1% NaOCl for 3 to 4 min followed by several washings with sterilized distilled water (SDW). Fungi were isolated by aseptically placing symptomatic plant tissues onto potato dextrose agar (PDA). The representative pathogen colonies were purified by single-spore technique onto 2% water agar followed by successive subculturing on PDA and carnation leaf agar (CLA) (Leslie and Summerell 2006). Cultures were incubated in a growth chamber at 25 ± 2°C with a 12-h photoperiod under fluorescent light for 2 weeks. Afterward, macroconidia and chlamydospores of isolates grown on CLA were examined. On PDA, colonies grew as white-colored, profuse, fuzzy mycelium with pale to dark brown pigmentation on the back side of the medium. Chlamydospores were globose, produced singly or in chains, measuring 7.1 to 9.6 μm in diameter with smooth outer walls. Macroconidia were relatively slender, 17 to 49 × 3 to 7 μm, bearing 3 to 5 septations, with a foot-shaped basal cell and a tapered or elongated apical cell. Microconidia were obovate, 8 to 11 × 3 to 4 μm, with a single septum. Mesoconidia were fusoid and had 1 to 4 septations. Morphological characters of the four fungi were consistent with F. semitectum Berkeley and Ravenel (syn. F. incarnatum) (Leslie and Summerell 2006). Molecular identification was performed by amplifying the internal transcribed spacer (ITS) region, the translation elongation factor 1a (EF-1α) gene, and the RNA polymerase II beta subunit (RPB2) gene using primer pairs ITS1/ITS4 (White et al. 1990), EF1/EF2 (Geiser et al. 2004), and RPB2AM-1bf/RPB2AM-7R (Miller and Huhndorf 2005), respectively. The ITS, EF-1α, and RPB2 sequences were deposited in GenBank (MK351313, MK439478, and MK487767). The ITS and EF-1α sequences had 99% homology with members of both F. incarnatum and F. equiseti in NCBI database; BLAST analysis of these in Fusarium-ID database (http://isolate.fusariumdb.org) showed 100 and 99% similarity with F. incarnatum-equiseti species complex (FIESC) (NRRL45997 [ITS] and NRRL36323 [EF-1α]), respectively. The RPB2 sequence shared 99% similarity with F. incarnatum strain ITEM6748 (GenBank LN901618) and FIESC (Fusarium-ID NRRL26417). The pathogen was identified as F. incarnatum, a member of FIESC, based on morphological and molecular evidence. Pathogenicity tests were conducted with all four isolates individually. Plants were grown in sterilized 15-cm-diameter plastic pots containing autoclaved commercial potting mix, and five replicate plants were included in each treatment. Sujie Chinese kale plants at four-leaf stage were inoculated by root-dip method in conidial suspension (1 × 106 conidia/ml) in SDW. Fungi were grown on CLA for 10 days at 25 ± 2°C under 12-h light/dark cycle. Control plants were dipped in SDW. Symptoms developed within 10 days postinoculation. Pathogen reisolations fulfilled Koch’s postulates, and identification was confirmed by morphological and molecular methods. The pathogen induced similar symptoms on the inoculated plants as in the field. The isolates were maintained in the VRI-GAAS culture collection (VRI 01 to 04). This is the first report of FIESC causing stem and root rot on Chinese kale in China. As the stem is the economically important part of the plant, serious measures must be taken to control this disease.",2019-07-01 +31050720,British Cardiovascular Intervention Society registry framework: a quality improvement initiative on behalf of the National Institute of Cardiovascular Outcomes Research (NICOR).,"The British Cardiovascular Intervention Society (BCIS) percutaneous coronary intervention (PCI) registry is hosted by the National Institute of Cardiovascular Outcomes Research (NICOR) at Bart's Heart Centre and collects clinical characteristics, indications, procedural details, and outcomes of all patients undergoing PCI in the UK. The data are used for audit and research to monitor and improve PCI practices and patient outcomes. Bespoke live data analysis and structured monthly reports are used to provide real-time feedback to all participating hospitals about the provision of care. Risk-adjusted analyses are used as a quality metric and benchmarking PCI practices. The consecutive patients undergoing PCI in all PCI performing hospitals in the UK from 1994 to present. One hundred and thirteen variables encompassing patient demographics, indication, procedural details, complications, and in-hospital outcomes are recorded. Prospective data are collected electronically and encrypted before transfer to central database servers. Data are validated locally and further range checks, sense checks, and assessments of internal consistency are applied during data uploads. Analyses of uploaded data including an assessment of data completeness are provided to all hospitals for validation, with repeat validation rounds prior to public reporting. Endpoints are in-hospital PCI complications, bleeding and mortality. All-cause mortality is obtained via linkage to the Office of National Statistics. No other linkages are available at present. Available for research by application to NICOR at http://www.nicor.org.uk/ using a data sharing agreement.",2019-10-01 +,"JcZFP8, a C2H2 zinc finger protein gene from Jatropha curcas, influences plant development in transgenic tobacco","Jatropha curcas L., as an important strategic biofuel resource with considerable economic potential, has attracted worldwide attention. However, J. curcas has yet to be domesticated. Plant height, an important agronomic trait of J. curcas, has not been sufficiently improved, and the genetic regulation of this trait in J. curcas is not fully understood. Zinc finger proteins (ZFPs), a class of transcription factors, have previously been shown to play critical roles in regulating multiple aspects of plant growth and development and may accordingly be implicated in the genetic regulation of plant height in J. curcas.In this study, we cloned JcZFP8, a C2H2 ZFP gene in J. curcas. We found that the JcZFP8 protein was localized in the nucleus and contained a conserved QALGGH motif in its C2H2 structure. Furthermore, ectopic expression of JcZFP8 under the control of the 35S promoter in transgenic tobacco resulted in dwarf plants with malformed leaves. However, when JcZFP8 was knocked out, the transgenic tobacco did not show the dwarf phenotype. After treatment with the gibberellic acid (GA) biosynthesis inhibitor paclobutrazol (PAC), the dwarf phenotype was more severe than plants that did not receive the PAC treatment, whereas application of exogenous gibberellin3 (GA3) reduced the dwarf phenotype in transgenic plants.The results of this study indicate that JcZFP8 may play a role in J. curcas plant phenotype through GA-related pathways. Our findings may help us to understand the genetic regulation of plant development in J. curcas and to accelerate breeding progress through engineering of the GA metabolic pathway in this plant.How to cite: Shi X, Wu Y, Dai T, et al. JcZFP8, a C2H2 zinc-finger protein gene from Jatropha curcas, influences plant development in transgenic tobacco. Electron J Biotechnol 2018;34. https://doi.org/10.1016/j.ejbt.2018.05.008.",2018-07-01 +,First Report of Xanthomonas hortorum pv. hederae Causing Bacterial Leaf Spot of Hedera helix in Taiwan,"Hedera helix (ivy) is a popular evergreen commonly grown worldwide as outdoor and indoor plants. In 2018, symptoms similar to bacterial leaf spot were observed on potted ivies in two nurseries, one located in Tianwei Township, Changhua County (observed in January) and the other in Nantun District, Taichung City, Taiwan (observed in May). In both locations, leaf spot symptoms were seen on over 90% of the plants. The lesions were irregularly shaped and were necrotic or water-soaked. A total of seven infected plants were sampled and brought to the laboratory, one collected in Changhua and six sampled in Taichung. Diseased tissues were cut, and bacterial streaming was observed under a light microscope. The samples were streaked onto nutrient agar (NA) and onto NA with 0.2% yeast extract and then were incubated at 25°C. Round yellow colonies were recovered from all seven plants, and seven independent bacterial strains (Hed1 to Hed7) were isolated (each from a different plant). Hed1 was obtained from the plant collected in Changhua, whereas the others were isolated from plants collected in Taichung. All seven strains produced yellow mucoid colonies on yeast dextrose calcium carbonate agar, similar to most known xanthomonads (Schaad et al. 2001). They tested positive for esculin degradation, weak positive for casein hydrolysis, and negative for the abilities to degrade Tween 80 and starch (Schaad et al. 2001). Infiltration of these strains’ suspensions into tomato leaves (cv. Known-you 301) induced hypersensitive responses. Identification of Hed1 to Hed7 was conducted via multilocus sequence analysis targeting the fusA, gyrB, gapA, gltA, lacF, and lepA genes (Almeida et al. 2010). Gene fragments were sequenced for all seven strains. For every fragment tested, Hed1 to Hed7 all had the same sequences (GenBank accession nos. MK124762 to MK124767). Comparing the concatenated 2,745-bp sequences of Hed1 to Hed7 among each other and against sequences included in the Plant-Associated Microbes Database (http://genome.ppws.vt.edu/cgi-bin/MLST/home.pl) (Almeida et al. 2010) revealed that all seven strains shared identical sequences with the type strain of Xanthomonas hortorum pv. hederae (LMG733) and had lower than 98% identity with the other Xanthomonas species and pathovars. To fulfill Koch’s postulates, three strains (Hed1, Hed2, and Hed6) were randomly selected and spray inoculated onto potted ivies at a concentration of approximately 1 × 108 CFU/ml (in 0.02% Silwet L-77). Each strain was inoculated onto three plants. Three additional plants sprayed with 0.02% Silwet L-77 solution without bacteria served as controls. The plants were then bagged in plastic bags for 3 days to maintain high humidity. Within 3 weeks, necrotic leaf spots developed on all bacteria-inoculated plants but not on the control plants. Bacterial strains were reisolated from the plants inoculated with Hed1, Hed2, and Hed6, and all reisolates shared the same gapA sequence with the original strains. Bacterial leaf spot on ivy has been reported in Greece, Japan, and other parts of the world (Pirc et al. 2012; Suzuki et al. 2002; Trantas et al. 2016). The present study is, to our knowledge, the first report of the occurrence of this disease in Taiwan. Ivies are often planted in shaded/indoor areas at high densities and are watered frequently in nurseries. These conditions could favor the development of bacterial leaf spot and the transmission of its pathogen.",2019-07-01 +,First Report of Anthracnose Crown Rot of Strawberry Caused by Colletotrichum siamense in Taiwan,"In Taiwan, strawberry (Fragaria × ananassa Duch.) is a high-value crop with an average annual cultivated area of ∼500 ha in the last 5 years. Over 90% of strawberry cultivation is in Miaoli County, with ‘Taoyuan No. 1’ as the predominant cultivar for more than 30 years. Anthracnose has become more destructive over the past decade. Although Colletotrichum gloeosporioides, C. dematium, C. fragariae, and C. acutatum were mentioned as the causal agents of strawberry anthracnose in Taiwan (Plant Protection Information System; https://otserv2.tactri.gov.tw/ppm/), we lack information on the isolation, pathogenicity, and morphological or molecular identification of the pathogen. From 2010 to 2016, we surveyed anthracnose in strawberries in Miaoli County; more than 50% of diseased plants showed typical anthracnose crown rot (ACR) symptoms (McInnes et al. 1992). ACR caused up to 30 to 40% plant loss during the seedling stage and ∼20% after transplanting. Infected crown tissue initially showed red and white marbling and then gradually brown rot, followed by rapid wilting of the entire plant. Anthracnose symptoms were observed in other parts of the plant, including leaves, petioles, runners, fruits, and roots. Symptoms appeared as circular black spots on the leaves and withering and girdling on runners. To isolate the causal agent, approximately 0.2 × 0.2-cm fragments of diseased crowns were surface disinfested with 1% sodium hypochlorite, triple rinsed with sterile water, and then placed onto 1.5% water agar. After 2 to 3 days, extended single hyphal tips from tissues were transferred to potato dextrose agar and incubated for 7 days at 25°C under a 12-h/12-h photoperiod. Colonies were initially white, later became somewhat zonate, velvety, light gray on the upper side and gray on the reverse side of plates, with concentric rings of salmon sporodochia. Conidia were 9.68 to 17.95 × 3.88 to 5.84 μm (14.53 ± 0.31 × 4.99 ± 0.08 μm, n = 90), hyaline, oblong to cylindrical, with round obtuse ends. Morphological characteristics of the causal agent resembled species belonging to the C. gloeosporioides species complex (Weir et al. 2012). To confirm the species identification, we extracted genomic DNA from 10 isolates by using the Plant Genomic DNA Extraction Miniprep System (Viogene, Taipei) and polymerase chain reaction–amplified the internal transcribed spacer (ITS) region, chitin synthase (CHS-1), actin (ACT), β-tubulin 2 (TUB2), calmodulin (CAL), and intergenic region of Apn2 and MAT1-2-1 (ApMAT) with published primers (Carbone and Kohn 1999; O’Donnell and Cigelnik 1997; Silva et al. 2012; Weir et al. 2012; White et al. 1990). Sequences were submitted to GenBank (accession nos. MK174223 [ITS], MK174224 [CHS-1], MK174225 [ACT], MK174226 [TUB2], MK174227 [CAL], and MK174228 [ApMAT]). The ITS, CHS-1, ACT, TUB2, CAL, and ApMAT sequences were compared with the GenBank nr database, restricted to type material. Results showed 98 to 99% identity to C. siamense (syn. C. dianesei and C. melanocaulon) (Liu et al. 2016; Prihastuti et al. 2009), which belongs to the C. gloeosporioides species complex, with the corresponding sequences (ITS: NR_144800; CHS-1: KX094094; ApMAT: KX094304 [Lima et al. 2013]; ACT: KX093987; TUB2: KX094290; and CAL: KX094036 [Doyle et al. 2013]). Koch’s postulates were fulfilled for two isolates (ML133 and ML612) by spraying 1 × 106 conidia/ml suspension on seedlings until run-off at the four- to five-leaf stage (two trials per isolate, n = 5 seedlings per trial). Inoculated plants were covered with plastic bags (>90% relative humidity) for 24 h at 30°C and then maintained in a growth chamber at 30°C, 70% relative humidity, under a 12-h/12-h photoperiod. After 7 days, all inoculated plants showed typical necrotic leaf spots and wilt symptoms similar to those in the field. Control plants sprayed with sterile water had no symptoms (n = 5 per trial). Longitudinal sections of the inoculated crown showed reddish-brown and white-marbled necrosis. The fungi were reisolated from lesions of diseased leaves or crowns with 100% frequency (n ≥ 3 isolates per trial), and morphological characteristics and gene sequences were identical to the original isolates. To our knowledge, this is the first report of C. siamense causing ACR of strawberry in Taiwan. The disease has the potential for causing serious losses to the strawberry industry in Taiwan, and research is needed on management strategies to minimize losses.",2019-07-01 +,Identification and pathogenicity of Phytophthora species in pear commercial orchards in Argentina,"ʻBartlettʼ pear cultivar accounts for 40% of the pear production area of Río Negro and Neuquén provinces, Argentina. During fall and spring 2014–2016, symptoms associated with Phytophthora rots were observed in declining commercial orchards. The aims of this survey were: (i) to identify the Phytophthora species associated with these irrigated commercial orchards, using different isolation strategies; and (ii) to investigate the pathogenic potential of Phytophthora isolates on pear fruit and tree. Several strategies were used to isolate Phytophthora spp. from soil/roots and collar rot. Fifty-two isolates were grouped by morphological and cultural characters, corresponding mainly to P. cactorum and other species of Clade 6. P. cactorum was the main species obtained from collar wood and root/soil by fruit bating. The identity of the species was confirmed by sequencing of the internal transcriber spacers (ITS, ITS1, and ITS4 primers) and comparison with sequences available in GenBank and Phytophthora Database (http://phytophthoradb.org). Sequences of P. inundata, P. rosacearum, P. lacustris, P. termophila and P.cactorum were deposited in GenBank. Pathogenicity tests were made for each species on 1-year-old shoots and fruit of ʻBartlettʼ and ʻd’Anjouʼ cultivars, and then on 2-year-old rootstock roots of ʻBartlettʼ cultivar. All Phytophthora species were pathogenic, with different levels of severity among isolates. P. cactorum was the most aggressive species on pear rootstock roots and 1-year-old shoots.",2019-07-01 +33242730,Review of the evidence for oceans and human health relationships in Europe: A systematic map.,"

Background

Globally, there is increasing scientific evidence of critical links between the oceans and human health, with research into issues such as pollution, harmful algal blooms and nutritional contributions. However, Oceans and Human Health (OHH) remains an emerging discipline. As such these links are poorly recognized in policy efforts such as the Sustainable Development Goals, with OHH not included in either marine (SDG14) or health (SDG3) goals. This is arguably short-sighted given recent development strategies such as the EU Blue Growth Agenda.

Objectives

In this systematic map we aim to build on recent efforts to enhance OHH in Europe by setting a baseline of existing evidence, asking: What links have been researched between marine environments and the positive and negative impacts to human health and wellbeing?

Methods

We searched eight bibliographic databases and queried 57 organizations identified through stakeholder consultation. Results include primary research and systematic reviews which were screened double blind against pre-defined inclusion criteria as per a published protocol. Studies were limited to Europe, US, Australia, New Zealand and Canada. Data was extracted according to a stakeholder-defined code book. A narrative synthesis explores the current evidence for relationships between marine exposures and human health outcomes, trends in knowledge gaps and change over time in the OHH research landscape. The resulting database is available on the website of the Seas, Oceans and Public Health in Europe website (https://sophie2020.eu/).

Results

A total of 1,542 unique articles were included in the database, including those examined within 56 systematic reviews. Research was dominated by a US focus representing 50.1% of articles. A high number of articles were found to link: marine biotechnology and cardiovascular or immune conditions, consumption of seafood and cardiovascular health, chemical pollution and neurological conditions, microbial pollution and gastrointestinal or respiratory health, and oil industry occupations with mental health. A lack of evidence relates to direct impacts of plastic pollution and work within a number of industries identified as relevant by stakeholders. Research over time is dominated by marine biotechnology, though this is narrow in focus. Pollution, food and disease/injury research follow similar trajectories. Wellbeing and climate change have emerged more recently as key topics but lag behind other categories in volume of evidence.

Conclusions

The evidence base for OHH of relevance to European policy is growing but remains patchy and poorly co-ordinated. Considerable scope for future evidence synthesis exists to better inform policy-makers, though reviews need to better incorporate complex exposures. Priorities for future research include: proactive assessments of chemical pollutants, measurable impacts arising from climate change, effects of emerging marine industries, and regional and global assessments for OHH interactions. Understanding of synergistic effects across multiple exposures and outcomes using systems approaches is recommended to guide policies within the Blue Growth Strategy. Co-ordination of research across Europe and dedicated centres of research would be effective first steps.",2020-11-23 +34676802,"Fabrication, depiction, DNA interaction, anti-bacterial, DFT and molecular docking studies of Co(II) and Cu(II) complexes of 3-methyl-1-phenyl-4-[(E)-(pyridin-2-yl)diazenyl]-1H-pyrazol-5-ol ligand.","Cobalt(II) and copper(II) complexes of the (3-methyl-1-phenyl-4-[E-(1iazinyl-2-yl)1iazinyl]-1H-pyrazole-5-ol) ligand were obtained by the diazotization reaction of 5-methyl-2-phenyl-2,4-dihydro-3H-pyrazol-3-one with 2-amino pyridine. The synthesized compounds were confirmed by analytical, and spectroscopic analyses (like, UV-Visible, FT-IR, NMR, and mass spectroscopy). Calf thymus DNA interaction with metal complexes is inspected by UV-Visible spectra, viscosity measurements, and thermal denaturation techniques. The intrinsic binding constant (Kb) was found to be 1.17 × 106 M-1, and 0.98 × 106 M-1 for Co(II) and Cu(II) complexes respectively. The Cleavage of pUC-19 DNA was monitored by gel electrophoresis. The computerized in silico molecular dockage studies of the composites with the target receptor Glu-6p and results showed that the compounds are potent drugs for the target enzyme. Further, the optimized structure of the azo dye ligand was obtained by the density functional theory (DFT) by Gaussian09 program by the RB3LYP at 6-311 G (++, g, d, p) basis set. Furthermore, screened for the bacterial action in contradiction of pathogenic organism's gram-negative Klebsiella pneumonia, gram-positive Bacillus subtills by a diffusion method.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1991373 .",2021-10-22 +34400655,"Open-access quantitative MRI data of the spinal cord and reproducibility across participants, sites and manufacturers.","In a companion paper by Cohen-Adad et al. we introduce the spine generic quantitative MRI protocol that provides valuable metrics for assessing spinal cord macrostructural and microstructural integrity. This protocol was used to acquire a single subject dataset across 19 centers and a multi-subject dataset across 42 centers (for a total of 260 participants), spanning the three main MRI manufacturers: GE, Philips and Siemens. Both datasets are publicly available via git-annex. Data were analysed using the Spinal Cord Toolbox to produce normative values as well as inter/intra-site and inter/intra-manufacturer statistics. Reproducibility for the spine generic protocol was high across sites and manufacturers, with an average inter-site coefficient of variation of less than 5% for all the metrics. Full documentation and results can be found at https://spine-generic.rtfd.io/ . The datasets and analysis pipeline will help pave the way towards accessible and reproducible quantitative MRI in the spinal cord.",2021-08-16 +31603498,dbInDel: a database of enhancer-associated insertion and deletion variants by analysis of H3K27ac ChIP-Seq.,"

Summary

Cancer hallmarks rely on its specific transcriptional programs, which are dysregulated by multiple mechanisms, including genomic aberrations in the DNA regulatory regions. Genome-wide association studies have shown many variants are found within putative enhancer elements. To provide insights into the regulatory role of enhancer-associated non-coding variants in cancer epigenome, and to facilitate the identification of functional non-coding mutations, we present dbInDel, a database where we have comprehensively analyzed enhancer-associated insertion and deletion variants for both human and murine samples using ChIP-Seq data. Moreover, we provide the identification and visualization of upstream TF binding motifs in InDel-containing enhancers. Downstream target genes are also predicted and analyzed in the context of cancer biology. The dbInDel database promotes the investigation of functional contributions of non-coding variants in cancer epigenome.

Availability and implementation

The database, dbInDel, can be accessed from http://enhancer-indel.cam-su.org/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +31560645,"Data for Decision-Making: Exploring the Division of Nutrition, Physical Activity, and Obesity's Data, Trends, and Maps.","Public health practitioners need quick and easy access to reliable surveillance data to monitor states' progress over time, compare benchmarks nationally or among states, and make strategic decisions about priorities and resources. Data, Trends, and Maps (DTM) at https://www.cdc.gov/nccdphp/dnpao/data-trends-maps/index.html is a free, online interactive database that houses and displays data on nutrition, physical activity, breastfeeding, and obesity that practitioners can use for public health action. Created in 2015 by the Centers for Disease Control and Prevention's (CDC) Division of Nutrition, Physical Activity, and Obesity, DTM was updated and relaunched in April 2017 with the capability to customize and download data sets directly; DTM also has other user-friendly features, such as visualization options. Since its relaunch, DTM has received more than 386,000 page views from approximately 110,000 unique visitors. However, the potential exists for more widespread use of DTM if more public health practitioners understood what the site offered and how others have used it in the field. Here, we explain how public health practitioners can explore the most recent state-level data on nutrition, physical activity, breastfeeding, and obesity and use this data to inform programmatic and policy efforts to prevent and control chronic diseases. We demonstrate 3 different ways practitioners can visualize data (ie, Explore by Location, Explore by Topic, and the Open Data Portal) and present 3 real-world examples to highlight DTM's utility as a public health tool.",2019-09-26 +34144671,Online database for brain cancer-implicated genes: exploring the subtype-specific mechanisms of brain cancer.,"

Background

Brain cancer is one of the eight most common cancers occurring in people aged 40+ and is the fifth-leading cause of cancer-related deaths for males aged 40-59. Accurate subtype identification is crucial for precise therapeutic treatment, which largely depends on understanding the biological pathways and regulatory mechanisms associated with different brain cancer subtypes. Unfortunately, the subtype-implicated genes that have been identified are scattered in thousands of published studies. So, systematic literature curation and cross-validation could provide a solid base for comparative genetic studies about major subtypes.

Results

Here, we constructed a literature-based brain cancer gene database (BCGene). In the current release, we have a collection of 1421 unique human genes gathered through an extensive manual examination of over 6000 PubMed abstracts. We comprehensively annotated those curated genes to facilitate biological pathway identification, cancer genomic comparison, and differential expression analysis in various anatomical brain regions. By curating cancer subtypes from the literature, our database provides a basis for exploring the common and unique genetic mechanisms among 40 brain cancer subtypes. By further prioritizing the relative importance of those curated genes in the development of brain cancer, we identified 33 top-ranked genes with evidence mentioned only once in the literature, which were significantly associated with survival rates in a combined dataset of 2997 brain cancer cases.

Conclusion

BCGene provides a useful tool for exploring the genetic mechanisms of and gene priorities in brain cancer. BCGene is freely available to academic users at http://soft.bioinfo-minzhao.org/bcgene/ .",2021-06-18 +34092780,"Transcultural Adaptation of Tibetan Nursing Trainees: A Case Study of ""9+3"" Vocational Technical Students in Sichuan Province, China.","BACKGROUND Nursing education is an important part of the ""9+3"" vocational education program led by Sichuan Province. In the internship stage, nursing students of Tibetan ethnicity may have problems of intercultural adaptation in the process of getting along with patients, which may affect the effective nursing outcome. The purpose of this study was to clarify the current situation of transcultural adaptation of Tibetan trainee nurses and to provide more theoretical support and guidance. MATERIAL AND METHODS We collected 237 valid survey questionnaires, based on Ward's acculturation process model, from a total of 363 Tibetan trainee nurses in the ""9+3"" free vocational education program in Chengdu, Luzhou, and Nanchong of Sichuan Province. The SPSSAU project (2020), an online application software retrieved from https://www.spssau.com, was used for data coding and archiving. RESULTS The results of questionnaire and data analysis showed that the overall level of transcultural adaptation of Tibetan trainee nurses was that the number of people with poor adaptation was slightly higher than those with good adaptation, and most Tibetan trainee nurses were in the middle level. Meanwhile, sociocultural adaptation was better than psychological adaptation. There were no statistically significant differences among the 4 grouping variables: gender, student home region, the city where the internship hospital was located, and whether they were from a single-child family or not. CONCLUSIONS The results revealed that there was still transcultural maladjustment among Tibetan nurses in the internship stage, and the psychological maladjustment was more obvious than the sociocultural maladjustment. We provide countermeasures and suggestions to solve the problems of transcultural adaptation reflected in the research.",2021-06-06 +32467965,AOP4EUpest: mapping of pesticides in adverse outcome pathways using a text mining tool.,"

Motivation

Exposure to pesticides may lead to adverse health effects in human populations, in particular vulnerable groups. The main long-term health concerns are neurodevelopmental disorders, carcinogenicity as well as endocrine disruption possibly leading to reproductive and metabolic disorders. Adverse outcome pathways (AOP) consist in linear representations of mechanistic perturbations at different levels of the biological organization. Although AOPs are chemical-agnostic, they can provide a better understanding of the Mode of Action of pesticides and can support a rational identification of effect markers.

Results

With the increasing amount of scientific literature and the development of biological databases, investigation of putative links between pesticides, from various chemical groups and AOPs using the biological events present in the AOP-Wiki database is now feasible. To identify co-occurrence between a specific pesticide and a biological event in scientific abstracts from the PubMed database, we used an updated version of the artificial intelligence-based AOP-helpFinder tool. This allowed us to decipher multiple links between the studied substances and molecular initiating events, key events and adverse outcomes. These results were collected, structured and presented in a web application named AOP4EUpest that can support regulatory assessment of the prioritized pesticides and trigger new epidemiological and experimental studies.

Availability and implementation

http://www.biomedicale.parisdescartes.fr/aop4EUpest/home.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +29194536,circlncRNAnet: an integrated web-based resource for mapping functional networks of long or circular forms of noncoding RNAs.,"

Background

Despite their lack of protein-coding potential, long noncoding RNAs (lncRNAs) and circular RNAs (circRNAs) have emerged as key determinants in gene regulation, acting to fine-tune transcriptional and signaling output. These noncoding RNA transcripts are known to affect expression of messenger RNAs (mRNAs) via epigenetic and post-transcriptional regulation. Given their widespread target spectrum, as well as extensive modes of action, a complete understanding of their biological relevance will depend on integrative analyses of systems data at various levels.

Findings

While a handful of publicly available databases have been reported, existing tools do not fully capture, from a network perspective, the functional implications of lncRNAs or circRNAs of interest. Through an integrated and streamlined design, circlncRNAnet aims to broaden the understanding of ncRNA candidates by testing in silico several hypotheses of ncRNA-based functions, on the basis of large-scale RNA-seq data. This web server is implemented with several features that represent advances in the bioinformatics of ncRNAs: (1) a flexible framework that accepts and processes user-defined next-generation sequencing-based expression data; (2) multiple analytic modules that assign and productively assess the regulatory networks of user-selected ncRNAs by cross-referencing extensively curated databases; (3) an all-purpose, information-rich workflow design that is tailored to all types of ncRNAs. Outputs on expression profiles, co-expression networks and pathways, and molecular interactomes, are dynamically and interactively displayed according to user-defined criteria.

Conclusions

In short, users may apply circlncRNAnet to obtain, in real time, multiple lines of functionally relevant information on circRNAs/lncRNAs of their interest. In summary, circlncRNAnet provides a ""one-stop"" resource for in-depth analyses of ncRNA biology. circlncRNAnet is freely available at http://app.cgu.edu.tw/circlnc/.",2018-01-01 +34473084,New restraints and validation approaches for nucleic acid structures in PDB-REDO.,"The quality of macromolecular structure models crucially depends on refinement and validation targets, which optimally describe the expected chemistry. Commonly used software for these two procedures has been designed and developed in a protein-centric manner, resulting in relatively few established features for the refinement and validation of nucleic acid-containing structure models. Here, new nucleic acid-specific approaches implemented in PDB-REDO are described, including a new restraint model using noncovalent geometries (base-pair hydrogen bonding and base-pair stacking) as refinement targets. New validation routines are also presented, including a metric for Watson-Crick base-pair geometry normality (ZbpG). Applying the PDB-REDO pipeline with the new restraint model to the whole Protein Data Bank (PDB) demonstrates an overall positive effect on the quality of nucleic acid-containing structure models. Finally, we discuss examples of improvements in the geometry of specific nucleic acid structures in the PDB. The new PDB-REDO models and pipeline are available at https://pdb-redo.eu/.",2021-08-24 +33646849,"Development of Severe COVID-19 Adaptive Risk Predictor (SCARP), a Calculator to Predict Severe Disease or Death in Hospitalized Patients With COVID-19.","

Background

Predicting the clinical trajectory of individual patients hospitalized with coronavirus disease 2019 (COVID-19) is challenging but necessary to inform clinical care. The majority of COVID-19 prognostic tools use only data present upon admission and do not incorporate changes occurring after admission.

Objective

To develop the Severe COVID-19 Adaptive Risk Predictor (SCARP) (https://rsconnect.biostat.jhsph.edu/covid_trajectory/), a novel tool that can provide dynamic risk predictions for progression from moderate disease to severe illness or death in patients with COVID-19 at any time within the first 14 days of their hospitalization.

Design

Retrospective observational cohort study.

Settings

Five hospitals in Maryland and Washington, D.C.

Patients

Patients who were hospitalized between 5 March and 4 December 2020 with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) confirmed by nucleic acid test and symptomatic disease.

Measurements

A clinical registry for patients hospitalized with COVID-19 was the primary data source; data included demographic characteristics, admission source, comorbid conditions, time-varying vital signs, laboratory measurements, and clinical severity. Random forest for survival, longitudinal, and multivariate (RF-SLAM) data analysis was applied to predict the 1-day and 7-day risks for progression to severe disease or death for any given day during the first 14 days of hospitalization.

Results

Among 3163 patients admitted with moderate COVID-19, 228 (7%) became severely ill or died in the next 24 hours; an additional 355 (11%) became severely ill or died in the next 7 days. The area under the receiver-operating characteristic curve (AUC) for 1-day risk predictions for progression to severe disease or death was 0.89 (95% CI, 0.88 to 0.90) and 0.89 (CI, 0.87 to 0.91) during the first and second weeks of hospitalization, respectively. The AUC for 7-day risk predictions for progression to severe disease or death was 0.83 (CI, 0.83 to 0.84) and 0.87 (CI, 0.86 to 0.89) during the first and second weeks of hospitalization, respectively.

Limitation

The SCARP tool was developed by using data from a single health system.

Conclusion

Using the predictive power of RF-SLAM and longitudinal data from more than 3000 patients hospitalized with COVID-19, an interactive tool was developed that rapidly and accurately provides the probability of an individual patient's progression to severe illness or death on the basis of readily available clinical information.

Primary funding source

Hopkins inHealth and COVID-19 Administrative Supplement for the HHS Region 3 Treatment Center from the Office of the Assistant Secretary for Preparedness and Response.",2021-03-02 +33307973,The National School Health Data Set: Every Student Counts! New Data Platform.,The National Association of School Nurses' (NASN's) data initiative The National School Health Data Set: Every Student Counts! (Every Student Counts!) is getting a new platform! This article reviews what Every Student Counts! is and shares some of the new features of the platform. For more information on NASN's initiative and to learn how school nurses can join the data revolution go to http://nasn.org/everystudentcounts.,2020-12-14 +32470107,Detecting Gene Ontology misannotations using taxon-specific rate ratio comparisons.,"

Motivation

Many protein function databases are built on automated or semi-automated curations and can contain various annotation errors. The correction of such misannotations is critical to improving the accuracy and reliability of the databases.

Results

We proposed a new approach to detect potentially incorrect Gene Ontology (GO) annotations by comparing the ratio of annotation rates (RAR) for the same GO term across different taxonomic groups, where those with a relatively low RAR usually correspond to incorrect annotations. As an illustration, we applied the approach to 20 commonly studied species in two recent UniProt-GOA releases and identified 250 potential misannotations in the 2018-11-6 release, where only 25% of them were corrected in the 2019-6-3 release. Importantly, 56% of the misannotations are 'Inferred from Biological aspect of Ancestor (IBA)' which is in contradiction with previous observations that attributed misannotations mainly to 'Inferred from Sequence or structural Similarity (ISS)', probably reflecting an error source shift due to the new developments of function annotation databases. The results demonstrated a simple but efficient misannotation detection approach that is useful for large-scale comparative protein function studies.

Availability and implementation

https://zhanglab.ccmb.med.umich.edu/RAR.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +33181826,WGVD: an integrated web-database for wheat genome variation and selective signatures. ,"Bread wheat is one of the most important crops worldwide. With the release of the complete wheat reference genome and the development of next-generation sequencing technology, a mass of genomic data from bread wheat and its progenitors has been yield and has provided genomic resources for wheat genetics research. To conveniently and effectively access and use these data, we established Wheat Genome Variation Database, an integrated web-database including genomic variations from whole-genome resequencing and exome-capture data for bread wheat and its progenitors, as well as selective signatures during the process of wheat domestication and improvement. In this version, WGVD contains 7 346 814 single nucleotide polymorphisms (SNPs) and 1 044 400 indels focusing on genic regions and upstream or downstream regions. We provide allele frequency distribution patterns of these variations for 5 ploidy wheat groups or 17 worldwide bread wheat groups, the annotation of the variant types and the genotypes of all individuals for 2 versions of bread wheat reference genome (IWGSC RefSeq v1.0 and IWGSC RefSeq v2.0). Selective footprints for Aegilops tauschii, wild emmer, domesticated emmer, bread wheat landrace and bread wheat variety are evaluated with two statistical tests (FST and Pi) based on SNPs from whole-genome resequencing data. In addition, we provide the Genome Browser to visualize the genomic variations, the selective footprints, the genotype patterns and the read coverage depth, and the alignment tool Blast to search the homologous regions between sequences. All of these features of WGVD will promote wheat functional studies and wheat breeding. http://animal.nwsuaf.edu.cn/code/index.php/Wheat.",2020-01-01 +34002774,nhKcr: a new bioinformatics tool for predicting crotonylation sites on human nonhistone proteins based on deep learning.,"Lysine crotonylation (Kcr) is a newly discovered type of protein post-translational modification and has been reported to be involved in various pathophysiological processes. High-resolution mass spectrometry is the primary approach for identification of Kcr sites. However, experimental approaches for identifying Kcr sites are often time-consuming and expensive when compared with computational approaches. To date, several predictors for Kcr site prediction have been developed, most of which are capable of predicting crotonylation sites on either histones alone or mixed histone and nonhistone proteins together. These methods exhibit high diversity in their algorithms, encoding schemes, feature selection techniques and performance assessment strategies. However, none of them were designed for predicting Kcr sites on nonhistone proteins. Therefore, it is desirable to develop an effective predictor for identifying Kcr sites from the large amount of nonhistone sequence data. For this purpose, we first provide a comprehensive review on six methods for predicting crotonylation sites. Second, we develop a novel deep learning-based computational framework termed as CNNrgb for Kcr site prediction on nonhistone proteins by integrating different types of features. We benchmark its performance against multiple commonly used machine learning classifiers (including random forest, logitboost, naïve Bayes and logistic regression) by performing both 10-fold cross-validation and independent test. The results show that the proposed CNNrgb framework achieves the best performance with high computational efficiency on large datasets. Moreover, to facilitate users' efforts to investigate Kcr sites on human nonhistone proteins, we implement an online server called nhKcr and compare it with other existing tools to illustrate the utility and robustness of our method. The nhKcr web server and all the datasets utilized in this study are freely accessible at http://nhKcr.erc.monash.edu/.",2021-11-01 +32221612,QuartataWeb: Integrated Chemical-Protein-Pathway Mapping for Polypharmacology and Chemogenomics.,"

Summary

QuartataWeb is a user-friendly server developed for polypharmacological and chemogenomics analyses. Users can easily obtain information on experimentally verified (known) and computationally predicted (new) interactions between 5494 drugs and 2807 human proteins in DrugBank, and between 315 514 chemicals and 9457 human proteins in the STITCH database. In addition, QuartataWeb links targets to KEGG pathways and GO annotations, completing the bridge from drugs/chemicals to function via protein targets and cellular pathways. It allows users to query a series of chemicals, drug combinations or multiple targets, to enable multi-drug, multi-target, multi-pathway analyses, toward facilitating the design of polypharmacological treatments for complex diseases.

Availability and implementation

QuartataWeb is freely accessible at http://quartata.csb.pitt.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +28923016,5-hydroxymethylcytosine is highly dynamic across human fetal brain development.,"

Background

Epigenetic processes play a key role in orchestrating transcriptional regulation during the development of the human central nervous system. We previously described dynamic changes in DNA methylation (5mC) occurring during human fetal brain development, but other epigenetic processes operating during this period have not been extensively explored. Of particular interest is DNA hydroxymethylation (5hmC), a modification that is enriched in the human brain and hypothesized to play an important role in neuronal function, learning and memory. In this study, we quantify 5hmC across the genome of 71 human fetal brain samples spanning 23 to 184 days post-conception.

Results

We identify widespread changes in 5hmC occurring during human brain development, notable sex-differences in 5hmC in the fetal brain, and interactions between 5mC and 5hmC at specific sites. Finally, we identify loci where 5hmC in the fetal brain is associated with genetic variation.

Conclusions

This study represents the first systematic analysis of dynamic changes in 5hmC across human neurodevelopment and highlights the potential importance of this modification in the human brain. A searchable database of our fetal brain 5hmC data is available as a resource to the research community at http://www.epigenomicslab.com/online-data-resources .",2017-09-18 +28977518,qPrimerDB: a thermodynamics-based gene-specific qPCR primer database for 147 organisms.,"Real-time quantitative polymerase chain reaction (qPCR) is one of the most important methods for analyzing the expression patterns of target genes. However, successful qPCR experiments rely heavily on the use of high-quality primers. Various qPCR primer databases have been developed to address this issue, but these databases target only a few important organisms. Here, we developed the qPrimerDB database, founded on an automatic gene-specific qPCR primer design and thermodynamics-based validation workflow. The qPrimerDB database is the most comprehensive qPCR primer database available to date, with a web front-end providing gene-specific and pre-computed primer pairs across 147 important organisms, including human, mouse, zebrafish, yeast, thale cress, rice and maize. In this database, we provide 3331426 of the best primer pairs for each gene, based on primer pair coverage, as well as 47760359 alternative gene-specific primer pairs, which can be conveniently batch downloaded. The specificity and efficiency was validated for qPCR primer pairs for 66 randomly selected genes, in six different organisms, through qPCR assays and gel electrophoresis. The qPrimerDB database represents a valuable, timesaving resource for gene expression analysis. This resource, which will be routinely updated, is publically accessible at http://biodb.swu.edu.cn/qprimerdb.",2018-01-01 +34840384,Predicting the Relation between Biopsychosocial Factors and Type of Childbirth using the Decision Tree Method: A Cohort Study.,"

Background

With the growing rate of cesarean sections, rising morbidity and mortality thereafter is an important health issue. Predictive models can identify individuals with a higher probability of cesarean section, and help them make better decisions. This study aimed to investigate the biopsychosocial factors associated with the method of childbirth and designed a predictive model using the decision tree C4.5 algorithm.

Methods

In this cohort study, the sample included 170 pregnant women in the third trimester of pregnancy referring to Shahroud Health Care Centers (Semnan, Iran), from 2018 to 2019. Blood samples were taken from mothers to measure the estrogen hormone at baseline. Birth information was recorded at the follow-up time per 30-42 days postpartum. Chi square, independent samples t test, and Mann-Whitney were used for comparisons between the two groups. Modeling was performed with the help of MATLAB software and C4.5 decision tree algorithm using input variables and target variable (childbirth method). The data were divided into training and testing datasets using the 70-30% method. In both stages, sensitivity, specificity, and accuracy were evaluated by the decision tree algorithm.

Results

Previous method of childbirth, maternal body mass index at childbirth, maternal age, and estrogen were the most significant factors predicting the childbirth method. The decision tree model's sensitivity, specificity, and accuracy were 85.48%, 94.34%, and 89.57% in the training stage, and 82.35%, 83.87%, and 83.33% in the testing stage, respectively.

Conclusion

The decision tree model was designed with high accuracy successfully predicted the method of childbirth. By recognizing the contributing factors, policymakers can take preventive action.It should be noted that this article was published in preprint form on the website of research square (https://www.researchsquare.com/article/rs-34770/v1).",2021-11-01 +34556767,Computational identification of multiple lysine PTM sites by analyzing the instance hardness and feature importance.,"Identification of post-translational modifications (PTM) is significant in the study of computational proteomics, cell biology, pathogenesis, and drug development due to its role in many bio-molecular mechanisms. Though there are several computational tools to identify individual PTMs, only three predictors have been established to predict multiple PTMs at the same lysine residue. Furthermore, detailed analysis and assessment on dataset balancing and the significance of different feature encoding techniques for a suitable multi-PTM prediction model are still lacking. This study introduces a computational method named 'iMul-kSite' for predicting acetylation, crotonylation, methylation, succinylation, and glutarylation, from an unrecognized peptide sample with one, multiple, or no modifications. After successfully eliminating the redundant data samples from the majority class by analyzing the hardness of the sequence-coupling information, feature representation has been optimized by adopting the combination of ANOVA F-Test and incremental feature selection approach. The proposed predictor predicts multi-label PTM sites with 92.83% accuracy using the top 100 features. It has also achieved a 93.36% aiming rate and 96.23% coverage rate, which are much better than the existing state-of-the-art predictors on the validation test. This performance indicates that 'iMul-kSite' can be used as a supportive tool for further K-PTM study. For the convenience of the experimental scientists, 'iMul-kSite' has been deployed as a user-friendly web-server at http://103.99.176.239/iMul-kSite .",2021-09-23 +34618598,Vaccine Coverage Across the Life Course in Michigan During the COVID-19 Pandemic: January‒September 2020.,"Objectives. To assess the impact of the COVID-19 pandemic on immunization services across the life course. Methods. In this retrospective study, we used Michigan immunization registry data from 2018 through September 2020 to assess the number of vaccine doses administered, number of sites providing immunization services to the Vaccines for Children population, provider location types that administer adult vaccines, and vaccination coverage for children. Results. Of 12 004 384 individual vaccine doses assessed, 48.6%, 15.6%, and 35.8% were administered to children (aged 0-8 years), adolescents (aged 9-18 years), and adults (aged 19‒105 years), respectively. Doses administered overall decreased beginning in February 2020, with peak declines observed in April 2020 (63.3%). Overall decreases in adult doses were observed in all settings except obstetrics and gynecology provider offices and pharmacies. Local health departments reported a 66.4% decrease in doses reported. For children, the total number of sites administering pediatric vaccines decreased while childhood vaccination coverage decreased 4.4% overall and 5.8% in Medicaid-enrolled children. Conclusions. The critical challenge is to return to prepandemic levels of vaccine doses administered as well as to catch up individuals for vaccinations missed. (Am J Public Health. 2021;111(11):2027-2035. https://doi.org/10.2105/AJPH.2021.306474).",2021-10-07 +29718389,An update on PUG-REST: RESTful interface for programmatic access to PubChem.,"PubChem (https://pubchem.ncbi.nlm.nih.gov) is one of the largest open chemical information resources available. It currently receives millions of unique users per month on average, serving as a key resource for many research fields such as cheminformatics, chemical biology, medicinal chemistry, and drug discovery. PubChem provides multiple programmatic access routes to its data and services. One of them is PUG-REST, a Representational State Transfer (REST)-like web service interface to PubChem. On average, PUG-REST receives more than a million requests per day from tens of thousands of unique users. The present paper provides an update on PUG-REST since our previous paper published in 2015. This includes access to new kinds of data (e.g. concise bioactivity data, table of contents headings, etc.), full implementation of synchronous fast structure search, support for assay data retrieval using accession identifiers in response to the deprecation of NCBI's GI numbers, data exchange between PUG-REST and NCBI's E-Utilities through the List Gateway, implementation of dynamic traffic control through throttling, and enhanced usage policies. In addition, example Perl scripts are provided, which the user can easily modify, run, or translate into another scripting language.",2018-07-01 +32940483,"""Political affiliation and employment screening decisions: The role of similarity and identification processes"": Correction to Roth et al. (2020).","Reports an error in ""Political affiliation and employment screening decisions: The role of similarity and identification processes"" by Philip L. Roth, Jason B. Thatcher, Philip Bobko, Kevin D. Matthews, Jill E. Ellingson and Caren B. Goldberg (Journal of Applied Psychology, 2020[May], Vol 105[5], 472-486). In the article ""Political Affiliation and Employment Screening Decisions: The Role of Similarity and Identification Processes,"" by Philip L. Roth, Jason B. Thatcher, Philip Bobko, Kevin D. Matthews, Jill E. Ellingson, and Caren B. Goldberg (Journal of Applied Psychology, Vol. 105, No.5, pp. 472- 486. http://dx.doi.org/10.1037/apl0000422), Kevin Matthews gathered data and administered study materials, not Philip Bobko. (The following abstract of the original article appeared in record 2019-56106-001.) Recent research in political science, along with theory in applied psychology, has suggested that political affiliation may be associated with substantial levels of affect and, thus, might influence employment decision-makers. We designed 2 experiments using social media screening tasks to examine the effects of political affiliation similarity on ratings of hireability. Our findings in both studies suggest that the identification (capturing positive affect) and disidentification (capturing negative affect) of a decision-maker with a job applicant's political affiliation were important variables that influenced perceived similarity. Consistent with the similarity-attraction paradigm, perceived similarity was related to liking and, in turn, liking was related to expected levels of applicant task and organizational citizenship behavior performance. Further, in both studies, political affiliation related variables influenced hireability decisions over and above job-relevant individuating information. Future research should continue to examine political affiliation similarity, particularly in light of its frequent availability to decision-makers (e.g., via social media websites). (PsycInfo Database Record (c) 2020 APA, all rights reserved).",2020-09-17 +30428831,funcExplorer: a tool for fast data-driven functional characterisation of high-throughput expression data.,"

Background

A widely applied approach to extract knowledge from high-throughput genomic data is clustering of gene expression profiles followed by functional enrichment analysis. This type of analysis, when done manually, is highly subjective and has limited reproducibility. Moreover, this pipeline can be very time-consuming and resource-demanding as enrichment analysis is done for tens to hundreds of clusters at a time. Thus, the task often needs programming skills to form a pipeline of different software tools or R packages to enable an automated approach. Furthermore, visualising the results can be challenging.

Results

We developed a web tool, funcExplorer, which automatically combines hierarchical clustering and enrichment analysis to detect functionally related gene clusters. The functional characterisation is achieved using structured knowledge from data sources such as Gene Ontology, KEGG and Reactome pathways, Human Protein Atlas, and Human Phenotype Ontology. funcExplorer includes various measures for finding biologically meaningful clusters, provides a modern graphical user interface, and has wide-ranging data export and sharing options as well as software transparency by open-source code. The results are presented in a visually compact and interactive format, enabling users to explore the biological essence of the data. We compared our results with previously published gene clusters to demonstrate that funcExplorer can perform the data characterisation equally well, but without requiring labour-intensive manual interference.

Conclusions

The open-source web tool funcExplorer enables scientists with high-throughput genomic data to obtain a preliminary interactive overview of the expression patterns, gene names, and shared functionalities in their dataset in a visually pleasing format. funcExplorer is publicly available at https://biit.cs.ut.ee/funcexplorer.",2018-11-14 +34774803,Temporal Transcript Profiling Identifies a Role for Unfolded Protein Stress in Human Gut Ischemia-Reperfusion Injury.,"

Background & aims

Intestinal ischemia-reperfusion injury is a serious and life-threatening condition. A better understanding of molecular mechanisms related to intestinal ischemia-reperfusion injury in human beings is imperative to find therapeutic targets and improve patient outcome.

Methods

First, the in vivo dynamic modulation of mucosal gene expression of the ischemia-reperfusion-injured human small intestine was studied. Based on functional enrichment analysis of the changing transcriptome, one of the predominantly regulated pathways was selected for further investigation in an in vitro human intestinal organoid model.

Results

Ischemia-reperfusion massively changed the transcriptional landscape of the human small intestine. Functional enrichment analysis based on gene ontology and pathways pointed to the response to unfolded protein as a predominantly regulated process. In addition, regulatory network analysis identified hypoxia-inducing factor 1A as one of the key mediators of ischemia-reperfusion-induced changes, including the unfolded protein response (UPR). Differential expression of genes involved in the UPR was confirmed using quantitative polymerase chain reaction analysis. Electron microscopy showed signs of endoplasmic reticulum stress. Collectively, these findings point to a critical role for unfolded protein stress in intestinal ischemia-reperfusion injury in human beings. In a human intestinal organoid model exposed to hypoxia-reoxygenation, attenuation of UPR activation with integrated stress response inhibitor strongly reduced pro-apoptotic activating transcription factor 4 (ATF4)-CCAAT/enhancer-binding protein homologous protein (CHOP) signaling.

Conclusions

Transcriptome analysis showed a crucial role for unfolded protein stress in the response to ischemia-reperfusion in human small intestine. UPR inhibition during hypoxia-reoxygenation in an intestinal organoid model suggests that downstream protein kinase R-like ER kinase (PERK) signaling may be a promising target to reduce intestinal ischemia-reperfusion injury. Microarray data are available in GEO (https://www.ncbi.nlm.nih.gov/gds, accession number GSE37013).",2021-11-11 +32938375,Computationally identifying hot spots in protein-DNA binding interfaces using an ensemble approach.,"

Background

Protein-DNA interaction governs a large number of cellular processes, and it can be altered by a small fraction of interface residues, i.e., the so-called hot spots, which account for most of the interface binding free energy. Accurate prediction of hot spots is critical to understand the principle of protein-DNA interactions. There are already some computational methods that can accurately and efficiently predict a large number of hot residues. However, the insufficiency of experimentally validated hot-spot residues in protein-DNA complexes and the low diversity of the employed features limit the performance of existing methods.

Results

Here, we report a new computational method for effectively predicting hot spots in protein-DNA binding interfaces. This method, called PreHots (the abbreviation of Predicting Hotspots), adopts an ensemble stacking classifier that integrates different machine learning classifiers to generate a robust model with 19 features selected by a sequential backward feature selection algorithm. To this end, we constructed two new and reliable datasets (one benchmark for model training and one independent dataset for validation), which totally consist of 123 hot spots and 137 non-hot spots from 89 protein-DNA complexes. The data were manually collected from the literature and existing databases with a strict process of redundancy removal. Our method achieves a sensitivity of 0.813 and an AUC score of 0.868 in 10-fold cross-validation on the benchmark dataset, and a sensitivity of 0.818 and an AUC score of 0.820 on the independent test dataset. The results show that our approach outperforms the existing ones.

Conclusions

PreHots, which is based on stack ensemble of boosting algorithms, can reliably predict hot spots at the protein-DNA binding interface on a large scale. Compared with the existing methods, PreHots can achieve better prediction performance. Both the webserver of PreHots and the datasets are freely available at: http://dmb.tongji.edu.cn/tools/PreHots/ .",2020-09-17 +34181500,Upper Extremity Task-Specific Training: Manual Development and Implementation Research within Inpatient Rehabilitation.,"A structured program of manualization and implementation of neurologic upper extremity task-specific training was developed at an inpatient rehabilitation hospital. The study used the Consolidated Framework for Implementation Research and engaged 31 stakeholders in manual refinement and examination of barriers after a year of training and use. Occupational therapists, occupational therapy assistants, and clinical educators provided input for manual revisions until consensus was achieved on usability, applicability, and implementation. Practitioners reported barriers such as intervention complexity, insufficient idea sharing for implementation, and a lack of motivators outside of the organization. The results can inform future implementation research in occupational therapy.Supplemental data for this article is available online at https://doi.org/10.1080/07380577.2021.1938338 .",2021-06-28 +34129355,First report of Fusarium commune causing root and crown rot on maize in Italy. ,"Maize (Zea mays L.) is a cereal crop of great economic importance in Italy; production is currently of 62,587,469 t, with an area that covers 628,801 ha, concentrated in northern Italy (ISTAT 2020). Fusarium species are associated with root and crown rot causing failures in crop establishment under high soil moisture. In 2019 maize seedlings collected in a farm located in San Zenone degli Ezzelini (VI, Italy) showed root and crown rot symptoms with browning of the stem tissues, wilting of the seedling, and collapsing due to the rotting tissues at the base of the stem. The incidence of diseased plants was approximately 15%. Seedlings were cleaned thoroughly from soil residues under tap water. Portions (about 3-5 mm) of tissue from roots and crowns of the diseased plants were cut and surface disinfected with a water solution of NaClO at 0.5% for 2 minutes and rinsed in sterile H20. The tissue fragments were plated on Potato Dextrose Agar (PDA) amended with 50 mg/l of streptomycin sulfate and incubated for 48-72 hours at 25oC. Over the 80 tissue fragments plated, 5% were identified as Fusarium verticillioides, 60% as Fusarium spp., 35% developed saprophytes. Fusarium spp. isolates that showed morphological characteristics not belonging to known pathogenic species on maize were selected and used for further investigation while species belonging to F. oxysporum were discarded. Single conidia of the Fusarium spp. colonies were cultured on PDA and Carnation Leaf Agar (CLA) for pathogenicity tests, morphological and molecular identification. The colonies showed white to pink, abundant, densely floccose to fluffy aerial mycelium. Colony reverse showed light violet pigmentation, in rings on PDA. On CLA the isolates produced slightly curved macronidia with 3 septa 28.1 - 65.5 µm long and 2.8-6.3 µm wide (n=50). Microconidia were cylindrical, aseptate, 4.5 -14.0 µm long and 1.5-3.9 µm wide (n=50). Spherical clamydospores were 8.8 ± 2.5 µm size (n=30), produced singly or in pairs on the mycelium, according to the description by Skovgaard et al. (2003) for F. commune. The identity of two single-conidia strains was confirmed by sequence comparison of the translation elongation factor-1α (tef-1α), and RNA polymerase II subunit (rpb2) gene fragments (O'Donnell et al. 2010). BLASTn searches of GenBank, and Fusarium-ID database, using the partial tef-1α (MW419921, MW419922) and rpb2 (MW419923, MW419924) sequences of representative isolate DB19lug07 and DB19lug20, revealed 99% identity for tef-1α and 100% identity to F. commune NRRL 28387(AF246832, AF250560). Pathogenicity tests were carried out by suspending conidia from a 10-days old culture on PDA in sterile H2O to 5×104 CFU/ml. Fifty seeds were immersed in 50 ml of the conidial suspension of each isolate for 24 hours and in sterile water (Koch et al. 2020). The seeds were drained, dried at room temperature, and sown in trays filled with a steamed mix of white peat and perlite, 80:20 v/v, and maintained at 25°C and RH of 80-85% for 14 days with 12 hours photoperiod. Seedlings were extracted from the substrate, washed under tap water, and observed for the presence of root and crown rots like the symptoms observed on the seedlings collected in the field. Control seedlings were healthy and F. commune was reisolated from the symptomatic ones and identified by resequencing of tef-1α gene. F. commune has been already reported on maize (Xi et al. 2019) and other plant species, like soybean (Ellis et al. 2013), sugarcane (Wang et al. 2018), potato (Osawa et al. 2020), indicating that some attention must be paid in crop rotation and residue management strategies. To our knowledge this is the first report of F. commune as a pathogen of maize in Italy. References Ellis M L et al. 2013. Plant Disease, 97, doi: 10.1094/PDIS-07-12-0644-PDN. ISTAT. 2020. http://dati.istat.it/Index.aspx?QueryId=33702. Accessed December 28, 2020. Koch, E. et al. 2020. Journal of Plant Diseases and Protection. 127, 883-893 doi: 10.1007/s41348-020-00350-w O'Donnell K et al. 2010. J. Clin. Microbiol. 48:3708. https://doi.org/10.1128/JCM.00989-10 Osawa H et al. 2020. Journal of General Plant Pathology, doi.org/10.1007/s10327-020-00969-5. Skovgaard K 2003. Mycologia, 95:4, 630-636, DOI: 10.1080/15572536.2004.11833067. Wang J et al. 2018. Plant Disease, 102, doi/10.1094/PDIS-07-17-1011-PDN Xi K et al. 2019. Plant Disease, 103, doi/10.1094/PDIS-09-18-1674-PDN.",2021-06-15 +30445567,OncoBase: a platform for decoding regulatory somatic mutations in human cancers.,"Whole-exome and whole-genome sequencing have revealed millions of somatic mutations associated with different human cancers, and the vast majority of them are located outside of coding sequences, making it challenging to directly interpret their functional effects. With the rapid advances in high-throughput sequencing technologies, genome-scale long-range chromatin interactions were detected, and distal target genes of regulatory elements were determined using three-dimensional (3D) chromatin looping. Herein, we present OncoBase (http://www.oncobase.biols.ac.cn/), an integrated database for annotating 81 385 242 somatic mutations in 68 cancer types from more than 120 cancer projects by exploring their roles in distal interactions between target genes and regulatory elements. OncoBase integrates local chromatin signatures, 3D chromatin interactions in different cell types and reconstruction of enhancer-target networks using state-of-the-art algorithms. It employs informative visualization tools to display the integrated local and 3D chromatin signatures and effects of somatic mutations on regulatory elements. Enhancer-promoter interactions estimated from chromatin interactions are integrated into a network diffusion system that quantitatively prioritizes somatic mutations and target genes from a large pool. Thus, OncoBase is a useful resource for the functional annotation of regulatory noncoding regions and systematically benchmarking the regulatory effects of embedded noncoding somatic mutations in human carcinogenesis.",2019-01-01 +34181220,"The First Nations Food, Nutrition and Environment Study (2008-2018)-rationale, design, methods and lessons learned.","

Objective

To describe the rationale, the participatory nature of the methodology, and the lessons learned during the First Nations Food, Nutrition and Environment Study (FNFNES), a community-based participatory research project implemented in eight Assembly of First Nations regions, which includes the entirety of Canada south of the 60th parallel.

Methods

FNFNES respected the First Nations principles of Ownership, Control, Access and Possession (OCAP®) ( https://fnigc.ca/ocap ). A random sampling strategy based on an ecosystem framework comprising 11 ecozones was adopted to collect representative nutritional and environmental health results for all First Nations adults living on-reserve south of the 60th parallel. Data collection occurred during the fall months from 2008 to 2016. Respective First Nations were involved in the planning and implementation of data collection for the five principal components: household interviews, tap water sampling for metals, surface water sampling for pharmaceuticals, hair sampling for mercury, and traditional food sampling for contaminants.

Results

A total of 6487 adults from 92 First Nations participated in the Study (participation rate 78%). A higher percentage of females (66%) participated than males (34%). The average age of males and females was similar (44 and 45 years, respectively). This study offers a novel body of coherent and regionally representative evidence on the human dimension of the ongoing environmental degradation affecting First Nations.

Conclusion

FNFNES serves as a good example of participatory research. We encourage public health professionals to develop policy and programs building on the participatory dimension of the research as well as on its results. The information collected by the FNFNES is also important for community empowerment, environmental stewardship and the general promotion of good health by and for First Nations peoples in Canada.",2021-06-28 +33661646,Triqler for MaxQuant: Enhancing Results from MaxQuant by Bayesian Error Propagation and Integration.,"Error estimation for differential protein quantification by label-free shotgun proteomics is challenging due to the multitude of error sources, each contributing uncertainty to the final results. We have previously designed a Bayesian model, Triqler, to combine such error terms into one combined quantification error. Here we present an interface for Triqler that takes MaxQuant results as input, allowing quick reanalysis of already processed data. We demonstrate that Triqler outperforms the original processing for a large set of both engineered and clinical/biological relevant data sets. Triqler and its interface to MaxQuant are available as a Python module under an Apache 2.0 license from https://pypi.org/project/triqler/.",2021-03-04 +34469179,Unisensory and Multisensory Stroop Effects Modulate Gender Differences in Verbal and Nonverbal Emotion Perception.,"Purpose This study aimed to examine the Stroop effects of verbal and nonverbal cues and their relative impacts on gender differences in unisensory and multisensory emotion perception. Method Experiment 1 investigated how well 88 normal Chinese adults (43 women and 45 men) could identify emotions conveyed through face, prosody and semantics as three independent channels. Experiments 2 and 3 further explored gender differences during multisensory integration of emotion through a cross-channel (prosody-semantics) and a cross-modal (face-prosody-semantics) Stroop task, respectively, in which 78 participants (41 women and 37 men) were asked to selectively attend to one of the two or three communication channels. Results The integration of accuracy and reaction time data indicated that paralinguistic cues (i.e., face and prosody) of emotions were consistently more salient than linguistic ones (i.e., semantics) throughout the study. Additionally, women demonstrated advantages in processing all three types of emotional signals in the unisensory task, but only preserved their strengths in paralinguistic processing and showed greater Stroop effects of nonverbal cues on verbal ones during multisensory perception. Conclusions These findings demonstrate clear gender differences in verbal and nonverbal emotion perception that are modulated by sensory channels, which have important theoretical and practical implications. Supplemental Material https://doi.org/10.23641/asha.16435599.",2021-09-22 +30329142,CancerSEA: a cancer single-cell state atlas.,"High functional heterogeneity of cancer cells poses a major challenge for cancer research. Single-cell sequencing technology provides an unprecedented opportunity to decipher diverse functional states of cancer cells at single-cell resolution, and cancer scRNA-seq datasets have been largely accumulated. This emphasizes the urgent need to build a dedicated resource to decode the functional states of cancer single cells. Here, we developed CancerSEA (http://biocc.hrbmu.edu.cn/CancerSEA/ or http://202.97.205.69/CancerSEA/), the first dedicated database that aims to comprehensively explore distinct functional states of cancer cells at the single-cell level. CancerSEA portrays a cancer single-cell functional state atlas, involving 14 functional states (including stemness, invasion, metastasis, proliferation, EMT, angiogenesis, apoptosis, cell cycle, differentiation, DNA damage, DNA repair, hypoxia, inflammation and quiescence) of 41 900 cancer single cells from 25 cancer types. It allows querying which functional states are associated with the gene (or gene list) of interest in different cancers. CancerSEA also provides functional state-associated PCG/lncRNA repertoires across all cancers, in specific cancers, and in individual cancer single-cell datasets. In summary, CancerSEA provides a user-friendly interface for comprehensively searching, browsing, visualizing and downloading functional state activity profiles of tens of thousands of cancer single cells and the corresponding PCGs/lncRNAs expression profiles.",2019-01-01 +34133192,Sequence-Specific Model for Predicting Peptide Collision Cross Section Values in Proteomic Ion Mobility Spectrometry.,"The contribution of peptide amino acid sequence to collision cross section values (CCS) has been investigated using a dataset of ∼134 000 peptides of four different charge states (1+ to 4+). The migration data were acquired using a two-dimensional liquid chromatography (LC)/trapped ion mobility spectrometry/quadrupole/time-of-flight mass spectrometry (MS) analysis of HeLa cell digests created using seven different proteases and was converted to CCS values. Following the previously reported modeling approaches using intrinsic size parameters (ISP), we extended this methodology to encode the position of individual residues within a peptide sequence. A generalized prediction model was built by dividing the dataset into eight groups (four charges for both tryptic/nontryptic peptides). Position-dependent ISPs were independently optimized for the eight subsets of peptides, resulting in prediction accuracy of ∼0.981 for the entire population of peptides. We find that ion mobility is strongly affected by the peptide's ability to solvate the positively charged sites. Internal positioning of polar residues and proline leads to decreased CCS values as they improve charge solvation; conversely, this ability decreases with increasing peptide charge due to electrostatic repulsion. Furthermore, higher helical propensity and peptide hydrophobicity result in a preferential formation of extended structures with higher than predicted CCS values. Finally, acidic/basic residues exhibit position-dependent ISP behavior consistent with electrostatic interaction with the peptide macrodipole, which affects the peptide helicity. The MS raw data files have been deposited with the ProteomeXchange Consortium via the jPOST partner repository (http://jpostdb.org) with the dataset identifiers PXD021440/JPST000959, PXD022800/JPST001017, and PXD026087/ JPST001176.",2021-06-16 +34061826,"Galaxy-ML: An accessible, reproducible, and scalable machine learning toolkit for biomedicine.","Supervised machine learning is an essential but difficult to use approach in biomedical data analysis. The Galaxy-ML toolkit (https://galaxyproject.org/community/machine-learning/) makes supervised machine learning more accessible to biomedical scientists by enabling them to perform end-to-end reproducible machine learning analyses at large scale using only a web browser. Galaxy-ML extends Galaxy (https://galaxyproject.org), a biomedical computational workbench used by tens of thousands of scientists across the world, with a suite of tools for all aspects of supervised machine learning.",2021-06-01 +34399622,A Widespread Bacterial Secretion System with Diverse Substrates.,"In host-associated bacteria, surface and secreted proteins mediate acquisition of nutrients, interactions with host cells, and specificity of tissue localization. In Gram-negative bacteria, the mechanism by which many proteins cross and/or become tethered to the outer membrane remains unclear. The domain of unknown function 560 (DUF560) occurs in outer membrane proteins throughout Proteobacteria and has been implicated in host-bacterium interactions and lipoprotein surface exposure. We used sequence similarity networking to reveal three subfamilies of DUF560 homologs. One subfamily includes those DUF560 proteins experimentally characterized thus far: NilB, a host range determinant of the nematode-mutualist Xenorhabdus nematophila, and the surface lipoprotein assembly modulators Slam1 and Slam2, which facilitate lipoprotein surface exposure in Neisseria meningitidis (Y. Hooda, C. C. Lai, A. Judd, C. M. Buckwalter, et al., Nat Microbiol 1:16009, 2016, https://doi.org/10.1038/nmicrobiol.2016.9; Y. Hooda, C. C. L. Lai, T. F. Moraes, Front Cell Infect Microbiol 7:207, 2017, https://doi.org/10.3389/fcimb.2017.00207). We show that DUF560 proteins from a second subfamily facilitate secretion of soluble, nonlipidated proteins across the outer membrane. Using in silico analysis, we demonstrate that DUF560 gene complement correlates with bacterial environment at a macro level and host association at a species level. The DUF560 protein superfamily represents a newly characterized Gram-negative secretion system capable of lipoprotein surface exposure and soluble protein secretion with conserved roles in facilitating symbiosis. In light of these data, we propose that it be titled the type 11 secretion system (TXISS). IMPORTANCE The microbial constituency of a host-associated microbiome emerges from a complex physical and chemical interplay of microbial colonization factors, host surface conditions, and host immunological responses. To fill unique niches within a host, bacteria encode surface and secreted proteins that enable interactions with and responses to the host and co-occurring microbes. Bioinformatic predictions of putative bacterial colonization factor localization and function facilitate hypotheses about the potential of bacteria to engage in pathogenic, mutualistic, or commensal activities. This study uses publicly available genome sequence data alongside experimental results from Xenorhabdus nematophila to demonstrate a role for DUF560 family proteins in secretion of bacterial effectors of host interactions. Our research delineates a broadly distributed family of proteins and enables more accurate predictions of the localization of colonization factors throughout Proteobacteria.",2021-08-17 +34661530,"Early prediction of in-hospital death of COVID-19 patients: a machine-learning model based on age, blood analyses, and chest x-ray score. ","An early-warning model to predict in-hospital mortality on admission of COVID-19 patients at an emergency department (ED) was developed and validated using a machine-learning model. In total, 2782 patients were enrolled between March 2020 and December 2020, including 2106 patients (first wave) and 676 patients (second wave) in the COVID-19 outbreak in Italy. The first-wave patients were divided into two groups with 1474 patients used to train the model, and 632 to validate it. The 676 patients in the second wave were used to test the model. Age, 17 blood analytes, and Brescia chest X-ray score were the variables processed using a random forests classification algorithm to build and validate the model. Receiver operating characteristic (ROC) analysis was used to assess the model performances. A web-based death-risk calculator was implemented and integrated within the Laboratory Information System of the hospital. The final score was constructed by age (the most powerful predictor), blood analytes (the strongest predictors were lactate dehydrogenase, D-dimer, neutrophil/lymphocyte ratio, C-reactive protein, lymphocyte %, ferritin std, and monocyte %), and Brescia chest X-ray score (https://bdbiomed.shinyapps.io/covid19score/). The areas under the ROC curve obtained for the three groups (training, validating, and testing) were 0.98, 0.83, and 0.78, respectively. The model predicts in-hospital mortality on the basis of data that can be obtained in a short time, directly at the ED on admission. It functions as a web-based calculator, providing a risk score which is easy to interpret. It can be used in the triage process to support the decision on patient allocation.",2021-10-18 +34908495,"Disparities in Air Pollution Exposure in the United States by Race/Ethnicity and Income, 1990-2010.","

Background

Few studies have investigated air pollution exposure disparities by race/ethnicity and income across criteria air pollutants, locations, or time.

Objective

The objective of this study was to quantify exposure disparities by race/ethnicity and income throughout the contiguous United States for six criteria air pollutants, during the period 1990 to 2010.

Methods

We quantified exposure disparities among racial/ethnic groups (non-Hispanic White, non-Hispanic Black, Hispanic (any race), non-Hispanic Asian) and by income for multiple spatial units (contiguous United States, states, urban vs. rural areas) and years (1990, 2000, 2010) for carbon monoxide (CO), nitrogen dioxide (NO2), ozone (O3), particulate matter with aerodynamic diameter ≤2.5μm (PM2.5; excluding year-1990), particulate matter with aerodynamic diameter ≤10μm (PM10), and sulfur dioxide (SO2). We used census data for demographic information and a national empirical model for ambient air pollution levels.

Results

For all years and pollutants, the racial/ethnic group with the highest national average exposure was a racial/ethnic minority group. In 2010, the disparity between the racial/ethnic group with the highest vs. lowest national-average exposure was largest for NO2 [54% (4.6 ppb)], smallest for O3 [3.6% (1.6 ppb)], and intermediate for the remaining pollutants (13%-19%). The disparities varied by U.S. state; for example, for PM2.5 in 2010, exposures were at least 5% higher than average in 63% of states for non-Hispanic Black populations; in 33% and 26% of states for Hispanic and for non-Hispanic Asian populations, respectively; and in no states for non-Hispanic White populations. Absolute exposure disparities were larger among racial/ethnic groups than among income categories (range among pollutants: between 1.1 and 21 times larger). Over the period studied, national absolute racial/ethnic exposure disparities declined by between 35% (0.66μg/m3; PM2.5) and 88% (0.35 ppm; CO); relative disparities declined to between 0.99× (PM2.5; i.e., nearly zero change) and 0.71× (CO; i.e., a ∼29% reduction).

Discussion

As air pollution concentrations declined during the period 1990 to 2010, absolute (and to a lesser extent, relative) racial/ethnic exposure disparities also declined. However, in 2010, racial/ethnic exposure disparities remained across income levels, in urban and rural areas, and in all states, for multiple pollutants. https://doi.org/10.1289/EHP8584.",2021-12-15 +32709339,A mass spectrometry database for identification of saponins in plants.,"Saponins constitute an important class of secondary metabolites of the plant kingdom. Here, we present a mass spectrometry-based database for rapid and easy identification of saponins henceforth referred to as saponin mass spectrometry database (SMSD). With a total of 4196 saponins, 214 of which were obtained from commercial sources. Through liquid chromatography-tandem high-resolution/mass spectrometry (HR/MS) analysis under negative ion mode, the fragmentation behavior for all parent fragment ions almost conformed to successive losses of sugar moieties, α-dissociation and McLafferty rearrangement of aglycones in high-energy collision induced dissociation. The saccharide moieties produced sugar fragment ions from m/z (monosaccharide) to m/z (polysaccharides). The parent and sugar fragment ions of other saponins were predicted using the above mentioned fragmentation pattern. The SMSD is freely accessible at http://47.92.73.208:8082/ or http://cpu-smsd.com (preferrably using google). It provides three search modes (""CLASSIFY"", ""SEARCH"" and ""METABOLITE""). Under the ""CLASSIFY"" function, saponins are classified with high predictive accuracies from all metabolites by establishment of logistic regression model through their mass data from HR/MS input as a csv file, where the first column is ID and the second column is mass. For the ""SEARCH"" function, saponins are searched against parent ions with certain mass tolerance in ""MS Ion Search"". Then, daughter ions with certain mass tolerance are input into ""MS/MS Ion Search"". The optimal candidates were screened out according to the match count and match rate values in comparison with fragment data in database. Additionally, another logistic regression model completely differentiated between parent and sugar fragment ions. This function designed in front web is conducive to search and recheck. With the ""METABOLITE"" function, saponins are searched using their common names, where both full and partial name searches are supported. With these modes, saponins of diverse chemical composition can be explored, grouped and identified with a high degree of predictive accuracy. This specialized database would aid in the identification of saponins in complex matrices particular in the study of traditional Chinese medicines or plant metabolomics.",2020-06-03 +34815621,"Cognitive appraisal, Coping, Stress and Fear Contracting Covid-19 in Working People in Pakistan.","The present study aimed to examine the relationship and prediction of cognitive appraisal and coping with Stress and Fear contracting COVID-19 among the working population of Pakistan. Cross-sectional research design was employed. The data was collected from 980 participants of almost 39 different professions using the purposive sampling technique. Stress Appraisal Measure (Peacock et al., in Stress Med 6:227-236, 1990, http://www.drpaulwong.com/wp-content/uploads/2018/03/Stress-Appraisal-Measure-SAM-Peacock-Wong-1990-Paper.pdf). Brief COPE Inventory (Caver, in Int J Behav Med 4:92-100, 1997), and Perceived Stress Scale (Cohen et al., in J Health Soc Behav 24:385-396, 1983) were used to measure cognitive appraisal, coping, and stress, respectively. Fear was measured by using Fear contracting COVID-19 questionnaire (Ali et al., in J Pakistan Soc Int Med 2(2):140-144, 2021). Age, education, and previously attended stress management training were significantly positively correlated with stress and fear. Females were more stressed and fearful than males. Average time spent on social media was significantly positively correlated with stress. Participants, who were employed, had family members of the older age group above 50 years and had family members with the history of biological diseases were more fearful. Results of hierarchical multiple regression analyses showed that threat, centrality, stressfulness appraisal, and avoidant emotional coping significantly positively predicted stress, whereas control-self appraisal and active emotional coping significantly negatively predicted stress. Moreover, threat, challenge, centrality, stressfulness appraisal, and problem-focused coping significantly positively predicted fear contracting COVID-19, whereas control-self appraisal and active emotional coping significantly negatively predicted fear contracting COVID-19 after controlling for covariates. This study will address the administrative authorities and government institutions to provide first-aid mental health services for emergencies, epidemics, or pandemics in the future.",2021-11-19 +34609634,Prevalence of and Relationship Between Caregiver Adversity Scores and Child Client Eco-systemic Structural Family Therapy (ESFT) Outcome: Implications for Family Based Mental Health Services (FBMHS).,"Adverse childhood experiences, especially with primary caregivers, impacts the mental, physical, and relational health of individuals (Felitti et al. in Am J Prev Med, 14(4):245-258. https://doi.org/10.1016/s0749-3797(98)00017-8 , 1998). Therefore, caregiver adversity is important to consider when delivering therapeutic interventions to children (Gardner et al. in Clin Soc Work J 42(1):81-89. https://doi.org/10.1007/s10615-012-0428-8 , 2014; Eslinger et al. in J Child Fam Stud 24(9):2757. https://doi.org/10.1007/s10826-014-0079-1 , 2015; Hagan et al. in J Trauma Stress 30(6):690-697, 2017). This study analyzed archival data to understand the role of caregiver adversity in Eco-Systemic Structural Family Therapy (ESFT) outcomes, within Family Based Mental Health Services. Results indicate caregiver lifetime adversity score did not predict treatment outcome. However, caregiver current adversity and family length of stay were negatively correlated as were length of stay and client discharge level of care. These findings suggest that ESFT benefits families regardless of caregiver childhood adversity level and that clinician attention to caregiver current adversity is important to ensure families receive the full benefits of ESFT. Implications for optimizing ESFT and future directions for ESFT clinical research are discussed.",2021-10-05 +,First Report of ‘Candidatus Phytoplasma ziziphi’ Subgroup 16SrV-B Associated with Prunus salicina Witches’-Broom in China,"The plum is one of the traditional fruit trees in China, with production reaching 6,791,974 metric tons in 2017, accounting for 57.8% of the world production (FAOSTAT, http://www.fao.org/faostat/en/#data/QC). In June 2018, Chinese plum trees (Prunus salicina L.) showing small leaves, witches’-broom, and shortened branches were found in two orchards (only two trees per orchard) located in the city of Qufu, Shandong Province, China. Leaf and stem tissues were collected from symptomatic and two symptomless trees. Under transmission electron microscopy, wall-less prokaryotes with pleomorphic shapes, comprising spheroidal, ovoid, dumbbell, and irregular tubular, mostly 150 to 500 nm across were observed in the phloem sieve tubes of symptomatic tissues. Indirect enzyme-linked immunosorbent assays were performed using the antibodies against immunodominant membrane protein (Imp) of jujube witches’-broom nky isolate. The Imp antibody can react with antigens from symptomatic plants but not with those from symptomless plants. The associated phytoplasma was designated as Prunus salicina witches’-broom (PSWB) phytoplasma. To further confirm the classification of PSWB phytoplasma, total DNA was extracted from each sample using the cetyltrimethylammonium bromide method. The 16S rRNA gene was amplified using phytoplasma-specific universal primers R16mF2/R16mR1 (Lee et al. 1998). Specific fragments, approximately 1.4 kb, were amplified from DNA samples isolated from four diseased plants tested but not from healthy-looking plum plants nor from the blank control (distilled water as a DNA template). The resultant fragments were then cloned into cloning vectors pMD18-T (Takara Bio, Dalian, China) and sequenced. All obtained sequences (1,433 bp) were identical, and one representative sequence was deposited in GenBank (accession no. MN080143). The 16S rRNA of PSWB phytoplasma was identical to those of the sweet cherry virescence phytoplasma (KF268424) and jujube witches’-broom isolate nky (CP025121) (Wang et al. 2018). Phylogenetic analysis of phytoplasma 16S rDNA sequences based on the maximum likelihood method using MEGA 6.0 software indicated the PSWB phytoplasma within the 16SrV-B and 16SrV-G cluster. The results of iPhyClassifier analysis (Zhao et al. 2009) showed that the R16F2n/R16R2 primed fragment patterns of PSWB phytoplasma, which were taken from the R16mF2/R16mR1 sequences, were identical and had a similarity coefficient (F) of 1.0 with that of the representative strain ‘Candidatus Phytoplasma ziziphi’ of 16SrV-B. European plum (Prunus domestica) witches’-broom was reported recently in Poland, and the etiological agent was ‘Ca. P. asteris’ representing subgroup I-B/L (Zwolińska et al. 2019). The 16SrV-B phytoplasma has not been previously associated with Chinese plum witches’-broom symptoms, although infection has been reported in cherry plum (Prunus cerasifera), causing small and rolled leaf symptoms in China (Hong et al. 2011). To our best knowledge, this is the first report of plums (P. salicina) infected by ‘Ca. P. ziziphi’ representing subgroup V-B in China. Although the disease was discovered by chance and sporadic, as the new natural plant host of phytoplasma, the plum plants could facilitate spread of the phytoplasma to other fruits crops, which has great significance in the pathogen’s epidemiology.",2020-02-01 +30357390,ALEdb 1.0: a database of mutations from adaptive laboratory evolution experimentation.,"Adaptive Laboratory Evolution (ALE) has emerged as an experimental approach to discover causal mutations that confer desired phenotypic functions. ALE not only represents a controllable experimental approach to systematically discover genotype-phenotype relationships, but also allows for the revelation of the series of genetic alterations required to acquire the new phenotype. Numerous ALE studies have been published, providing a strong impetus for developing databases to warehouse experimental evolution information and make it retrievable for large-scale analysis. Here, the first step towards establishing this resource is presented: ALEdb (http://aledb.org). This initial release contains over 11 000 mutations that have been discovered from eleven ALE publications. ALEdb (i) is a web-based platform that comprehensively reports on ALE acquired mutations and their conditions, (ii) reports key mutations using previously established trends, (iii) enables a search-driven workflow to enhance user mutation functional analysis through mutation cross-reference, (iv) allows exporting of mutation query results for custom analysis, (v) includes a bibliome describing the databased experiment publications and (vi) contains experimental evolution mutations from multiple model organisms. Thus, ALEdb is an informative platform which will become increasingly revealing as the number of reported ALE experiments and identified mutations continue to expand.",2019-01-01 +26400175,PubChem Substance and Compound databases.,"PubChem (https://pubchem.ncbi.nlm.nih.gov) is a public repository for information on chemical substances and their biological activities, launched in 2004 as a component of the Molecular Libraries Roadmap Initiatives of the US National Institutes of Health (NIH). For the past 11 years, PubChem has grown to a sizable system, serving as a chemical information resource for the scientific research community. PubChem consists of three inter-linked databases, Substance, Compound and BioAssay. The Substance database contains chemical information deposited by individual data contributors to PubChem, and the Compound database stores unique chemical structures extracted from the Substance database. Biological activity data of chemical substances tested in assay experiments are contained in the BioAssay database. This paper provides an overview of the PubChem Substance and Compound databases, including data sources and contents, data organization, data submission using PubChem Upload, chemical structure standardization, web-based interfaces for textual and non-textual searches, and programmatic access. It also gives a brief description of PubChem3D, a resource derived from theoretical three-dimensional structures of compounds in PubChem, as well as PubChemRDF, Resource Description Framework (RDF)-formatted PubChem data for data sharing, analysis and integration with information contained in other databases.",2015-09-22 +,Preferences for Outcomes Among Adults with Type 1 Diabetes and Caregivers of Children with Type 1 Diabetes,"

Purpose

Hemoglobin A1c (HbA1c) is the accepted measure of effectiveness for type 1 diabetes therapies. We investigated preferences for measures of diabetes control in addition to HbA1c among adults with type 1 diabetes and caregivers of children with type 1 diabetes.

Methods

Using discrete-choice experiment methodology, surveys for adults with type 1 diabetes and caregivers presented choices between hypothetical treatments described by six attributes with varying levels: HbA1c, time in optimal glucose range, weekly number and severity of hypoglycemic and hyperglycemic events, additional disease management time, and additional treatment cost. Choice data were analyzed using random-parameters logit.

Results

A total of 300 adults with type 1 diabetes and 400 caregivers completed the survey. Adults and caregivers placed the most importance on reducing hypoglycemic and hyperglycemic events. For adults, avoiding 1–5 mild-to-moderate hypoglycemic events (glucose 54–69 mg/dL)/week was five times more important than being a half-point above target HbA1c. Avoiding 1–5 hyperglycemic events (glucose >180 mg/dL)/week was seven times more important than being a half-point above target HbA1c. Additional time in optimal glucose range was as important as a reduction greater than a half-point in HbA1c. Avoiding hyperglycemic and hypoglycemic events was more important than all other outcomes for caregivers of younger children. Caregivers of children >12 years placed relatively more weight on avoiding hypoglycemic events <54 mg/dL than those with younger children and preferred avoiding additional costs.

Conclusion

Adults with type 1 diabetes and caregivers prioritize controlling hypoglycemic and hyperglycemic events, including mild-to-moderate events. These preferences should be considered in drug development and regulatory decisions. Video Abstract Point your SmartPhone at the code above. If you have a QR code reader, the video abstract will appear. Or use: https://youtu.be/zPrBA6mK5hM",2020-01-01 +31318409,PTM-Logo: a program for generation of sequence logos based on position-specific background amino-acid probabilities.,"

Summary

Identification of the amino-acid motifs in proteins that are targeted for post-translational modifications (PTMs) is of great importance in understanding regulatory networks. Information about targeted motifs can be derived from mass spectrometry data that identify peptides containing specific PTMs such as phosphorylation, ubiquitylation and acetylation. Comparison of input data against a standardized 'background' set allows identification of over- and under-represented amino acids surrounding the modified site. Conventionally, calculation of targeted motifs assumes a random background distribution of amino acids surrounding the modified position. However, we show that probabilities of amino acids depend on (i) the type of the modification and (ii) their positions relative to the modified site. Thus, software that identifies such over- and under-represented amino acids should make appropriate adjustments for these effects. Here we present a new program, PTM-Logo, that generates representations of these amino acid preferences ('logos') based on position-specific amino-acid probability backgrounds calculated either from user-input data or curated databases.

Availability and implementation

PTM-Logo is freely available online at http://sysbio.chula.ac.th/PTMLogo/ or https://hpcwebapps.cit.nih.gov/PTMLogo/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +34913495,The markerless lung target tracking AAPM Grand Challenge (MATCH) results.,"

Purpose

Lung stereotactic ablative body radiotherapy (SABR) is a radiation therapy success story with level 1 evidence demonstrating its efficacy. To provide real-time respiratory motion management for lung SABR, several commercial and preclinical markerless lung target tracking (MLTT) approaches have been developed. However, these approaches have yet to be benchmarked using a common measurement methodology. This knowledge gap motivated the MArkerless lung target Tracking CHallenge (MATCH). The aim was to localize lung targets accurately and precisely in a retrospective in silico study and a prospective experimental study.

Methods

MATCH was an American Association of Physicists in Medicine sponsored Grand Challenge. Common materials for the in silico and experimental studies were the experiment setup including an anthropomorphic thorax phantom with two targets within the lungs, and a lung SABR planning protocol. The phantom was moved rigidly with patient-measured lung target motion traces, which also acted as ground truth motion. In the retrospective in silico study a volumetric modulated arc therapy treatment was simulated and a dataset consisting of treatment planning data and intra-treatment kilovoltage (kV) and megavoltage (MV) images for four blinded lung motion traces was provided to the participants. The participants used their MLTT approach to localize the moving target based on the dataset. In the experimental study, the participants received the phantom experiment setup and five patient-measured lung motion traces. The participants used their MLTT approach to localize the moving target during an experimental SABR phantom treatment. The challenge was open to any participant, and participants could complete either one or both parts of the challenge. For both the in silico and experimental studies the MLTT results were analyzed and ranked using the prospectively defined metric of the percentage of the tracked target position being within 2 mm of the ground truth.

Results

A total of 30 institutions registered and 15 result submissions were received, four for the in silico study and 11 for the experimental study. The participating MLTT approaches were: Accuray CyberKnife (2), Accuray Radixact (2), BrainLab Vero, C-RAD, and preclinical MLTT (5) on a conventional linear accelerator (Varian TrueBeam). For the in silico study the percentage of the 3D tracking error within 2 mm ranged from 50% to 92%. For the experimental study, the percentage of the 3D tracking error within 2 mm ranged from 39% to 96%.

Conclusions

A common methodology for measuring the accuracy of MLTT approaches has been developed and used to benchmark preclinical and commercial approaches retrospectively and prospectively. Several MLTT approaches were able to track the target with sub-millimeter accuracy and precision. The study outcome paves the way for broader clinical implementation of MLTT. MATCH is live, with datasets and analysis software being available online at https://www.aapm.org/GrandChallenge/MATCH/ to support future research.",2021-12-29 +32822422,Deciphering the functional diversity of DNA-binding transcription factors in Bacteria and Archaea organisms.,"DNA-binding Transcription Factors (TFs) play a central role in regulation of gene expression in prokaryotic organisms, and similarities at the sequence level have been reported. These proteins are predicted with different abundances as a consequence of genome size, where small organisms contain a low proportion of TFs and large genomes contain a high proportion of TFs. In this work, we analyzed a collection of 668 experimentally validated TFs across 30 different species from diverse taxonomical classes, including Escherichia coli K-12, Bacillus subtilis 168, Corynebacterium glutamicum, and Streptomyces coelicolor, among others. This collection of TFs, together with 111 hidden Markov model profiles associated with DNA-binding TFs collected from diverse databases such as PFAM and DBD, was used to identify the repertoire of proteins putatively devoted to gene regulation in 1321 representative genomes of Archaea and Bacteria. The predicted regulatory proteins were posteriorly analyzed in terms of their genomic context, allowing the prediction of functions for TFs and their neighbor genes, such as genes involved in virulence, enzymatic functions, phosphorylation mechanisms, and antibiotic resistance. The functional analysis associated with PFAM groups showed diverse functional categories were significantly enriched in the collection of TFs and the proteins encoded by the neighbor genes, in particular, small-molecule binding and amino acid transmembrane transporter activities associated with the LysR family and proteins devoted to cellular aromatic compound metabolic processes or responses to drugs, stress, or abiotic stimuli in the MarR family. We consider that with the increasing data derived from new technologies, novel TFs can be identified and help improve the predictions for this class of proteins in complete genomes. The complete collection of experimentally characterized and predicted TFs is available at http://web.pcyt.unam.mx/EntrafDB/.",2020-08-21 +33674830,COVID-19 SignSym: a fast adaptation of a general clinical NLP tool to identify and normalize COVID-19 signs and symptoms to OMOP common data model.,"The COVID-19 pandemic swept across the world rapidly, infecting millions of people. An efficient tool that can accurately recognize important clinical concepts of COVID-19 from free text in electronic health records (EHRs) will be valuable to accelerate COVID-19 clinical research. To this end, this study aims at adapting the existing CLAMP natural language processing tool to quickly build COVID-19 SignSym, which can extract COVID-19 signs/symptoms and their 8 attributes (body location, severity, temporal expression, subject, condition, uncertainty, negation, and course) from clinical text. The extracted information is also mapped to standard concepts in the Observational Medical Outcomes Partnership common data model. A hybrid approach of combining deep learning-based models, curated lexicons, and pattern-based rules was applied to quickly build the COVID-19 SignSym from CLAMP, with optimized performance. Our extensive evaluation using 3 external sites with clinical notes of COVID-19 patients, as well as the online medical dialogues of COVID-19, shows COVID-19 SignSym can achieve high performance across data sources. The workflow used for this study can be generalized to other use cases, where existing clinical natural language processing tools need to be customized for specific information needs within a short time. COVID-19 SignSym is freely accessible to the research community as a downloadable package (https://clamp.uth.edu/covid/nlp.php) and has been used by 16 healthcare organizations to support clinical research of COVID-19.",2021-06-01 +34118870,Predicting biological pathways of chemical compounds with a profile-inspired approach.,"

Background

Assignment of chemical compounds to biological pathways is a crucial step to understand the relationship between the chemical repertory of an organism and its biology. Protein sequence profiles are very successful in capturing the main structural and functional features of a protein family, and can be used to assign new members to it based on matching of their sequences against these profiles. In this work, we extend this idea to chemical compounds, constructing a profile-inspired model for a set of related metabolites (those in the same biological pathway), based on a fragment-based vectorial representation of their chemical structures.

Results

We use this representation to predict the biological pathway of a chemical compound with good overall accuracy (AUC 0.74-0.90 depending on the database tested), and analyzed some factors that affect performance. The approach, which is compared with equivalent methods, can in addition detect those molecular fragments characteristic of a pathway.

Conclusions

The method is available as a graphical interactive web server http://csbg.cnb.csic.es/iFragMent .",2021-06-12 +33875291,Assessment of ANG variants in Parkinson's disease.,"Genetic risk factors are occasionally shared between different neurodegenerative diseases. Previous studies have linked ANG, a gene encoding angiogenin, to both Parkinson's disease (PD) and amyotrophic lateral sclerosis (ALS). Functional studies suggest ANG plays a neuroprotective role in both PD and amyotrophic lateral sclerosis by reducing cell death. We further explored the genetic association between ANG and PD by analyzing genotype data from the International Parkinson's Disease Genomics Consortium (14,671 cases and 17,667 controls) and whole genome sequencing data from the Accelerating Medicines Partnership - Parkinson's disease initiative (AMP-PD, https://amp-pd.org/) (1,647 cases and 1,050 controls). Our analysis did not replicate the findings of previous studies and identified no significant association between ANG variants and PD risk.",2021-03-24 +34655366,"A Method for More Accurate Determination of Resonance Frequency of the Cardiovascular System, and Evaluation of a Program to Perform It.","This study validated a more exact automated method of determining cardiovascular resonance frequency (RF) against the ""stepped"" protocol described by Lehrer et al. (Appl Psychophysiol Biofeedback 25(3):177-191, https://doi.org/10.1023/a:1009554825745 , 2000; in Foundations of heart rate variability biofeedback: A book of readings, The Association for Applied Psychophysiology and Biofeedback, pp 9-19, 2016). Thirteen participants completed a 15-min RF determination session by each method. The ""stepped"" protocol assesses HRV in five 3-min stationary windows from 4.5 to 6.5 breaths per minute (bpm), decreasing in 0.5 bpm steps. Multiple criteria, subjectively weighted by the clinician, determines RF. For this study, the proposed method used a sliding window with a fixed rate of change (67.04 ms per breath) at each of 78 breath cycles ranging from 4.25 to 6.75 bpm. Its algorithm analyzes IBI to locate the midpoint of the 1-min region of stable maximum peak-trough variability. RF is quantified from breath duration at that point. The software generates a visual display of superimposed HR and breathing data. Thus, the new method fully automates RF determination. Eleven of the 13 matched pairs fell within the 0.5 bpm resolution of the stepped method. Comparisons of LF power generated by the autoregressive (AR) spectral method showed a strong correlation in LF power production by the stepped and sliding methods (R = 0.751, p = 0.000). The ""sliding"" pacing protocol was favored by 69% of participants (p < 0.02). The new, fully-automated, method may facilitate both in-person and remote HRV biofeedback training. Software is available open-source.",2021-10-16 +30389920,Improved estimation of cancer dependencies from large-scale RNAi screens using model-based normalization and data integration.,"The availability of multiple datasets comprising genome-scale RNAi viability screens in hundreds of diverse cancer cell lines presents new opportunities for understanding cancer vulnerabilities. Integrated analyses of these data to assess differential dependency across genes and cell lines are challenging due to confounding factors such as batch effects and variable screen quality, as well as difficulty assessing gene dependency on an absolute scale. To address these issues, we incorporated cell line screen-quality parameters and hierarchical Bayesian inference into DEMETER2, an analytical framework for analyzing RNAi screens ( https://depmap.org/R2-D2 ). This model substantially improves estimates of gene dependency across a range of performance measures, including identification of gold-standard essential genes and agreement with CRISPR/Cas9-based viability screens. It also allows us to integrate information across three large RNAi screening datasets, providing a unified resource representing the most extensive compilation of cancer cell line genetic dependencies to date.",2018-11-02 +34374965,High-throughput Analysis of Synaptic Activity in Electrically Stimulated Neuronal Cultures.,"Synaptic dysfunction is a hallmark of various neurodegenerative and neurodevelopmental disorders. To interrogate synapse function in a systematic manner, we have established an automated high-throughput imaging pipeline based on fluorescence microscopy acquisition and image analysis of electrically stimulated synaptic transmission in neuronal cultures. Identification and measurement of synaptic signal fluctuations is achieved by means of an image analysis algorithm based on singular value decomposition. By exploiting the synchronicity of the evoked responses, the algorithm allows disentangling distinct temporally correlated patterns of firing synapse populations or cell types that are present in the same recording. We demonstrate the performance of the analysis with a pilot compound screen and show that the multiparametric readout allows classifying treatments by their spatiotemporal fingerprint. The image analysis and visualization software has been made publicly available on Github ( https://www.github.com/S3Toolbox ). The streamlined automation of multi-well image acquisition, electrical stimulation, analysis, and meta-data warehousing facilitates large-scale synapse-oriented screens and, in doing so, it will accelerate the drug discovery process.",2021-08-10 +30150996,AromaDb: A Database of Medicinal and Aromatic Plant's Aroma Molecules With Phytochemistry and Therapeutic Potentials.,"In traditional, herbal medicine, and aromatherapy, use of essential oils and their aroma compounds have been known since long, for the management of various human diseases. The essential oil is a mixture of highly complex, naturally occurring volatile aroma compounds synthesized by medicinal and aromatic plants as secondary metabolites. Essential oils widely used in pharmaceutical, cosmetic, sanitary, food industry and agriculture for their antibacterial, antiviral, antifungal, antiparasitic, insecticidal, anticancer, neuroprotective, psychophysiological, and anti-aging activities. Moreover, volatile aroma compounds comprise a chemically diverse class of low molecular weight organic compounds with significant vapor pressure. However, aroma compounds produced by plants, mainly attract pollinators, seed dispersers and provide defense against pests or pathogens. However, in humans, about 300 active olfactory receptor genes are involved to detect thousands of different aroma compounds and modulates expression of different metabolic genes regulating human psychophysiological activity, brain function, pharmacological signaling, and therapeutic potential. Keeping in mind this importance, present database, namely, AromaDb (http://bioinfo.cimap.res.in/aromadb/) covers information of plant varieties/chemotypes, essential oils, chemical constituents, GC-MS profile, yield variations due to agro-morphological parameters, trade data, aroma compounds, fragrance type, and bioactivity details. The database includes 1,321 aroma chemical structures, bioactivities of essential oil/aroma compounds, 357 fragrance type, 166 commercially used plants, and their high yielding 148 varieties/chemotypes. Also includes calculated cheminformatics properties related to identification, physico-chemical properties, pharmacokinetics, toxicological, and ecological information. Also comprises interacted human genes affecting various diseases related cell signaling pathways correlating the use of aromatherapy. This database could be a useful resource to the plant's growers/producers, an aroma/fragrance industrialist, health professionals, and researchers exploring the potential of essential oils and aroma compounds in the development of novel formulations against human diseases.",2018-08-13 +34406356,coronaSPAdes: from biosynthetic gene clusters to RNA viral assemblies. ,"The COVID-19 pandemic has ignited a broad scientific interest in viral research in general and coronavirus research in particular. The identification and characterization of viral species in natural reservoirs typically involves de novo assembly. However, existing genome, metagenome and transcriptome assemblers often are not able to assemble many viruses (including coronaviruses) into a single contig. Coverage variation between datasets and within dataset, presence of close strains, splice variants and contamination set a high bar for assemblers to process viral datasets with diverse properties. We developed coronaSPAdes, a novel assembler for RNA viral species recovery in general and coronaviruses in particular. coronaSPAdes leverages the knowledge about viral genome structures to improve assembly extending ideas initially implemented in biosyntheticSPAdes. We have shown that coronaSPAdes outperforms existing SPAdes modes and other popular short-read metagenome and viral assemblers in the recovery of full-length RNA viral genomes. coronaSPAdes version used in this article is a part of SPAdes 3.15 release and is freely available at http://cab.spbu.ru/software/spades. Supplementary data are available at Bioinformatics.",2021-08-18 +34345604,"Phytochemical screening, antimalarial activities, and genetic relationship of 16 indigenous Thai Asteraceae medicinal plants: A combinatorial approach using phylogeny and ethnobotanical bioprospecting in antimalarial drug discovery.","Emergence of artemisinin resistance leads the people to discover the new candidate for antimalarial drug. Combinatorial phylogeny and ethnobotanical approach may be useful to minimize the expenditure and time in laboratory testing. Seven hundred and thirty-three ethnomedicinal plants were listed from literature search. Obtained 340 internal transcribed spacer (ITS) sequences of plant list which met criteria were retrieved from GenBank NCBI and analyzed by MUSCLE and maximum likelihood phylogenetic test to generate the phylogenetic tree. Interactive phylogenetic tree was generated by Interactive Tree of Life (ITOL, https://itol.embl.de) and showed strong clustered pattern on Asteraceae. Afterward, 16 species of Asteraceae were selected to investigate the antimalarial activity, phytochemical, and genetic diversity. The presence of phytochemical was determined by standard method. DNA fluorescence-based assay was performed to determine the antimalarial activity against 3D7 Plasmodium falciparum. IC50μg/mL was used to categorize antimalarial activity. On the other hand, ITS universal primer was used to amplify and sequence the obtained extracted DNA of tested plant by cetyltrimethylammonium bromide method. Phylogenetic analyses were performed by MAFFT and RAxML with automatic bootstrapping. ITOL and Adobe Illustrator were used to generate interactive phylogenetic tree. All species tested showed the presence of phenolics and flavonoids, whereas alkaloids and terpenoids were shown vary among tested extracts. Among 16 species tested, 1 species exhibited good-moderate (Sphaeranthus indicus, IC506.59 μg/mL), 4 weak (Artemisia chinensis, Artemisia vulgaris, Tridax procumbens, and Blumea balsamifera), and 3 very weak (Eupatorium capillifolium, Wedelia trilobata, and Vernonia cinerea). Generated phylogenetic tree by ITS data was able to separate the tested species into their tribal classification. In addition, new medicinal properties of A. chinensis were discovered. Combining phylogeny approach with ethnobotanical data is useful to narrow down the selection of antimalarial plants candidate.",2021-07-16 +34705567,Auditory-Motor Mapping Training Facilitates Speech and Word Learning in Tone Language-Speaking Children With Autism: An Early Efficacy Study.,"

Purpose

It has been reported that tone language-speaking children with autism demonstrate speech-specific lexical tone processing difficulty, although they have intact or even better-than-normal processing of nonspeech/melodic pitch analogues. In this early efficacy study, we evaluated the therapeutic potential of Auditory-Motor Mapping Training (AMMT) in facilitating speech and word output for Mandarin-speaking nonverbal and low-verbal children with autism, in comparison with a matched non-AMMT-based control treatment.

Method

Fifteen Mandarin-speaking nonverbal and low-verbal children with autism spectrum disorder participated and completed all the AMMT-based treatment sessions by intoning (singing) and tapping the target words delivered via an app, whereas another 15 participants received control treatment. Generalized linear mixed-effects models were created to evaluate speech production accuracy and word production intelligibility across different groups and conditions.

Results

Results showed that the AMMT-based treatment provided a more effective training approach in accelerating the rate of speech (especially lexical tone) and word learning in the trained items. More importantly, the enhanced training efficacy on lexical tone acquisition remained at 2 weeks after therapy and generalized to untrained tones that were not practiced. Furthermore, the low-verbal participants showed higher improvement compared to the nonverbal participants.

Conclusions

These data provide the first empirical evidence for adopting the AMMT-based training to facilitate speech and word learning in Mandarin-speaking nonverbal and low-verbal children with autism. This early efficacy study holds promise for improving lexical tone production in Mandarin-speaking children with autism but should be further replicated in larger scale randomized studies. Supplemental Material https://doi.org/10.23641/asha.16834627.",2021-10-27 +33500680,The sequence of amino acids as the basis for the model of biological activity of peptides.,"The algorithm of building up a model for the biological activity of peptides as a mathematical function of a sequence of amino acids is suggested. The general scheme is the following: The total set of available data is distributed into the active training set, passive training set, calibration set, and validation set. The training (both active and passive) and calibration sets are a system of generation of a model of biological activity where each amino acid obtains special correlation weight. The numerical data on the correlation weights calculated by the Monte Carlo method using the CORAL software (http://www.insilico.eu/coral). The target function aimed to give the best result for the calibration set (not for the training set). The final checkup of the model is carried out with data on the validation set (peptides, which are not visible during the creation of the model). Described computational experiments confirm the ability of the approach to be a tool for the design of predictive models for the biological activity of peptides (expressed by pIC50).",2021-01-22 +31942977,ChIPSummitDB: a ChIP-seq-based database of human transcription factor binding sites and the topological arrangements of the proteins bound to them. ,"ChIP-seq reveals genomic regions where proteins, e.g. transcription factors (TFs) interact with DNA. A substantial fraction of these regions, however, do not contain the cognate binding site for the TF of interest. This phenomenon might be explained by protein-protein interactions and co-precipitation of interacting gene regulatory elements. We uniformly processed 3727 human ChIP-seq data sets and determined the cistrome of 292 TFs, as well as the distances between the TF binding motif centers and the ChIP-seq peak summits. ChIPSummitDB enables the analysis of ChIP-seq data using multiple approaches. The 292 cistromes and corresponding ChIP-seq peak sets can be browsed in GenomeView. Overlapping SNPs can be inspected in dbSNPView. Most importantly, the MotifView and PairShiftView pages show the average distance between motif centers and overlapping ChIP-seq peak summits and distance distributions thereof, respectively. In addition to providing a comprehensive human TF binding site collection, the ChIPSummitDB database and web interface allows for the examination of the topological arrangement of TF complexes genome-wide. ChIPSummitDB is freely accessible at http://summit.med.unideb.hu/summitdb/. The database will be regularly updated and extended with the newly available human and mouse ChIP-seq data sets.",2020-01-01 +,Primate Infectious Disease Ecology: Insights and Future Directions at the Human-Macaque Interface,"Global population expansion has increased interactions and conflicts between humans and nonhuman primates over shared ecological space and resources. Such ecological overlap, along with our shared evolutionary histories, makes human-nonhuman primate interfaces hot spots for the acquisition and transmission of parasites. In this chapter, we bring to light the importance of human-macaque interfaces in particular as hot spots for infectious disease ecological and epidemiological assessments. We first outline the significance and broader objectives behind research related to the subfield of primate infectious disease ecology and epidemiology. We then reveal how members of the genus Macaca, being among the most socioecologically flexible and invasive of all primate taxa, live under varying degrees of overlap with humans in anthropogenic landscapes. Thus, human-macaque interfaces may favor the bidirectional exchange of parasites. We then review studies that have isolated various types of parasites at human-macaque interfaces, using information from the Global Mammal Parasite Database (GMPD: http://www.mammalparasites.org/). Finally, we elaborate on avenues through which the implementation of both novel conceptual frameworks (e.g., Coupled Systems, One Health) and quantitative network-based approaches (e.g., social and bipartite networks, agent-based modeling) may potentially address some of the critical gaps in our current knowledge of infectious disease ecology at human-primate interfaces.",2019-07-23 +32892224,ProtFold-DFG: protein fold recognition by combining Directed Fusion Graph and PageRank algorithm. ,"As one of the most important tasks in protein structure prediction, protein fold recognition has attracted more and more attention. In this regard, some computational predictors have been proposed with the development of machine learning and artificial intelligence techniques. However, these existing computational methods are still suffering from some disadvantages. In this regard, we propose a new network-based predictor called ProtFold-DFG for protein fold recognition. We propose the Directed Fusion Graph (DFG) to fuse the ranking lists generated by different methods, which employs the transitive closure to incorporate more relationships among proteins and uses the KL divergence to calculate the relationship between two proteins so as to improve its generalization ability. Finally, the PageRank algorithm is performed on the DFG to accurately recognize the protein folds by considering the global interactions among proteins in the DFG. Tested on a widely used and rigorous benchmark data set, LINDAHL dataset, experimental results show that the ProtFold-DFG outperforms the other 35 competing methods, indicating that ProtFold-DFG will be a useful method for protein fold recognition. The source code and data of ProtFold-DFG can be downloaded from http://bliulab.net/ProtFold-DFG/download.",2021-05-01 +28529082,Rice Expression Database (RED): An integrated RNA-Seq-derived gene expression database for rice.,"Rice is one of the most important stable food as well as a monocotyledonous model organism for the plant research community. Here, we present RED (Rice Expression Database; http://expression.ic4r.org), an integrated database of rice gene expression profiles derived entirely from RNA-Seq data. RED features a comprehensive collection of 284 high-quality RNA-Seq experiments, integrates a large number of gene expression profiles and covers a wide range of rice growth stages as well as various treatments. Based on massive expression profiles, RED provides a list of housekeeping and tissue-specific genes and dynamically constructs co-expression networks for gene(s) of interest. Besides, it provides user-friendly web interfaces for querying, browsing and visualizing expression profiles of concerned genes. Together, as a core resource in BIG Data Center, RED bears great utility for characterizing the function of rice genes and better understanding important biological processes and mechanisms underlying complex agronomic traits in rice.",2017-05-04 +34157649,Effective and direct control of neural TTS prosody by removing interactions between different attributes.,"End-to-end TTS advancement has shown that synthesized speech prosody can be controlled by conditioning the decoder with speech prosody attribute labels. However, to annotate quantitatively the prosody patterns of a large set of training data is both time consuming and expensive. To use unannotated data, variational autoencoder (VAE) has been proposed to model individual prosody attribute as a random variable in the latent space. The VAE is an unsupervised approach and the corresponding latent variables are in general correlated with each other. For more effective and direct control of speech prosody along each attribute dimension, it is highly desirable to disentangle the correlated latent variables. Additionally, being able to interpret the disentangled attributes as speech perceptual cues is useful for designing more efficient prosody control of TTS. In this paper, we propose two attribute separation schemes: (1) using 3 separate VAEs to model the real-valued, different prosodic features, i.e., F0, energy and duration; (2) minimizing mutual information between different prosody attributes to remove their mutual correlations, for facilitating more direct prosody control. Experimental results confirm that the two proposed schemes can indeed make individual prosody attributes more interpretable and direct TTS prosody control more effective. The improvements are measured objectively by F0 Frame Error (FFE) and subjectively with MOS and A/B comparison listening tests, respectively. The scatter diagrams of t-SNE also demonstrate the correlations between prosody attributes, which are well disentangled by minimizing their mutual information. Synthesized TTS samples can be found at https://xiaochunan.github.io/prosody/index.html.",2021-06-11 +31389497,Self-injurious behavior and related mortality in children under 10 years of age: a retrospective health record study in Brazil.,"

Objective

To describe and analyze data on self-injurious behavior (SIB) and related mortality in children under 10 years old in Brazil.

Methods

A descriptive study was performed using secondary public health care data extracted from the Hospital Information System (Sistema de Informações Hospitalares, SIH) and Mortality Information System (Sistema de Informações sobre Mortalidade, SIM) in Brazil. The databases are available for online access at http://datasus.saude.gov.br/.

Results

In Brazil, according to SIH data, 11,312 hospitalizations of patients under 10 years of age were recorded from 1998 to 2018 as resulting from SIB (ICD-10 X60-X84 codes). Of these, 65 resulted in death. According to the SIM, from 1996 to 2016, 91 deaths related to SIB were recorded, 81 (89%) in children aged 5 to 9 years, nine (9.9%) in children aged 1 to 4 years, and one (1.1%) in a child below 1 year of age.

Conclusion

These results highlight the relevance of creating measures to better understand SIB and related mortality in this age group. They also reveal the vulnerability of children in Brazil and warrant further studies to address these issues.",2020-01-01 +26581084,Transcriptator: An Automated Computational Pipeline to Annotate Assembled Reads and Identify Non Coding RNA.,"RNA-seq is a new tool to measure RNA transcript counts, using high-throughput sequencing at an extraordinary accuracy. It provides quantitative means to explore the transcriptome of an organism of interest. However, interpreting this extremely large data into biological knowledge is a problem, and biologist-friendly tools are lacking. In our lab, we developed Transcriptator, a web application based on a computational Python pipeline with a user-friendly Java interface. This pipeline uses the web services available for BLAST (Basis Local Search Alignment Tool), QuickGO and DAVID (Database for Annotation, Visualization and Integrated Discovery) tools. It offers a report on statistical analysis of functional and Gene Ontology (GO) annotation's enrichment. It helps users to identify enriched biological themes, particularly GO terms, pathways, domains, gene/proteins features and protein-protein interactions related informations. It clusters the transcripts based on functional annotations and generates a tabular report for functional and gene ontology annotations for each submitted transcript to the web server. The implementation of QuickGo web-services in our pipeline enable the users to carry out GO-Slim analysis, whereas the integration of PORTRAIT (Prediction of transcriptomic non coding RNA (ncRNA) by ab initio methods) helps to identify the non coding RNAs and their regulatory role in transcriptome. In summary, Transcriptator is a useful software for both NGS and array data. It helps the users to characterize the de-novo assembled reads, obtained from NGS experiments for non-referenced organisms, while it also performs the functional enrichment analysis of differentially expressed transcripts/genes for both RNA-seq and micro-array experiments. It generates easy to read tables and interactive charts for better understanding of the data. The pipeline is modular in nature, and provides an opportunity to add new plugins in the future. Web application is freely available at: http://www-labgtp.na.icar.cnr.it/Transcriptator.",2015-11-18 +32548701,Knockout of vascular smooth muscle EGF receptor in a mouse model prevents obesity-induced vascular dysfunction and renal damage in vivo.,"

Aims/hypothesis

Obesity causes type 2 diabetes leading to vascular dysfunction and finally renal end-organ damage. Vascular smooth muscle (VSM) EGF receptor (EGFR) modulates vascular wall homeostasis in part via serum response factor (SRF), a major regulator of VSM differentiation and a sensor for glucose. We investigated the role of VSM-EGFR during obesity-induced renovascular dysfunction, as well as EGFR-hyperglycaemia crosstalk.

Methods

The role of VSM-EGFR during high-fat diet (HFD)-induced type 2 diabetes was investigated in a mouse model with inducible, VSM-specific EGFR-knockout (KO). Various structural and functional variables as well as transcriptome changes, in vivo and ex vivo, were assessed. The impact of hyperglycaemia on EGFR-induced signalling and SRF transcriptional activity and the underlying mechanisms were investigated at the cellular level.

Results

We show that VSM-EGFR mediates obesity/type 2 diabetes-induced vascular dysfunction, remodelling and transcriptome dysregulation preceding renal damage and identify an EGFR-glucose synergism in terms of SRF activation, matrix dysregulation and mitochondrial function. EGFR deletion protects the animals from HFD-induced endothelial dysfunction, creatininaemia and albuminuria. Furthermore, we show that HFD leads to marked changes of the aortic transcriptome in wild-type but not in KO animals, indicative of EGFR-dependent SRF activation, matrix dysregulation and mitochondrial dysfunction, the latter confirmed at the cellular level. Studies at the cellular level revealed that high glucose potentiated EGFR/EGF receptor 2 (ErbB2)-induced stimulation of SRF activity, enhancing the graded signalling responses to EGF, via the EGFR/ErbB2-ROCK-actin-MRTF pathway and promoted mitochondrial dysfunction.

Conclusions/interpretation

VSM-EGFR contributes to HFD-induced vascular and subsequent renal alterations. We propose that a potentiated EGFR/ErbB2-ROCK-MRTF-SRF signalling axis and mitochondrial dysfunction underlie the role of EGFR. This advanced working hypothesis will be investigated in mechanistic depth in future studies. VSM-EGFR may be a therapeutic target in cases of type 2 diabetes-induced renovascular disease.

Data availability

The datasets generated during and/or analysed during the current study are available in: (1) share_it, the data repository of the academic libraries of Saxony-Anhalt ( https://doi.org/10.25673/32049.2 ); and (2) in the gene expression omnibus database with the study identity GSE144838 ( https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE144838 ). Graphical abstract.",2020-06-17 +32142429,Deep Neural Network-Based Sinogram Super-Resolution and Bandwidth Enhancement for Limited-Data Photoacoustic Tomography.,"Photoacoustic tomography (PAT) is a noninvasive imaging modality combining the benefits of optical contrast at ultrasonic resolution. Analytical reconstruction algorithms for photoacoustic (PA) signals require a large number of data points for accurate image reconstruction. However, in practical scenarios, data are collected using the limited number of transducers along with data being often corrupted with noise resulting in only qualitative images. Furthermore, the collected boundary data are band-limited due to limited bandwidth (BW) of the transducer, making the PA imaging with limited data being qualitative. In this work, a deep neural network-based model with loss function being scaled root-mean-squared error was proposed for super-resolution, denoising, as well as BW enhancement of the PA signals collected at the boundary of the domain. The proposed network has been compared with traditional as well as other popular deep-learning methods in numerical as well as experimental cases and is shown to improve the collected boundary data, in turn, providing superior quality reconstructed PA image. The improvement obtained in the Pearson correlation, structural similarity index metric, and root-mean-square error was as high as 35.62%, 33.81%, and 41.07%, respectively, for phantom cases and signal-to-noise ratio improvement in the reconstructed PA images was as high as 11.65 dB for in vivo cases compared with reconstructed image obtained using original limited BW data. Code is available at https://sites.google.com/site/sercmig/home/dnnpat.",2020-11-24 +26476454,KEGG as a reference resource for gene and protein annotation.,"KEGG (http://www.kegg.jp/ or http://www.genome.jp/kegg/) is an integrated database resource for biological interpretation of genome sequences and other high-throughput data. Molecular functions of genes and proteins are associated with ortholog groups and stored in the KEGG Orthology (KO) database. The KEGG pathway maps, BRITE hierarchies and KEGG modules are developed as networks of KO nodes, representing high-level functions of the cell and the organism. Currently, more than 4000 complete genomes are annotated with KOs in the KEGG GENES database, which can be used as a reference data set for KO assignment and subsequent reconstruction of KEGG pathways and other molecular networks. As an annotation resource, the following improvements have been made. First, each KO record is re-examined and associated with protein sequence data used in experiments of functional characterization. Second, the GENES database now includes viruses, plasmids, and the addendum category for functionally characterized proteins that are not represented in complete genomes. Third, new automatic annotation servers, BlastKOALA and GhostKOALA, are made available utilizing the non-redundant pangenome data set generated from the GENES database. As a resource for translational bioinformatics, various data sets are created for antimicrobial resistance and drug interaction networks.",2015-10-17 +27275187,Advanced SPARQL querying in small molecule databases.,"

Background

In recent years, the Resource Description Framework (RDF) and the SPARQL query language have become more widely used in the area of cheminformatics and bioinformatics databases. These technologies allow better interoperability of various data sources and powerful searching facilities. However, we identified several deficiencies that make usage of such RDF databases restrictive or challenging for common users.

Results

We extended a SPARQL engine to be able to use special procedures inside SPARQL queries. This allows the user to work with data that cannot be simply precomputed and thus cannot be directly stored in the database. We designed an algorithm that checks a query against data ontology to identify possible user errors. This greatly improves query debugging. We also introduced an approach to visualize retrieved data in a user-friendly way, based on templates describing visualizations of resource classes. To integrate all of our approaches, we developed a simple web application.

Conclusions

Our system was implemented successfully, and we demonstrated its usability on the ChEBI database transformed into RDF form. To demonstrate procedure call functions, we employed compound similarity searching based on OrChem. The application is publicly available at https://bioinfo.uochb.cas.cz/projects/chemRDF.",2016-06-06 +34623878,"Mental Disorders, Gun Ownership, and Gun Carrying Among Soldiers After Leaving the Army, 2016-2019.","Objectives. To examine associations of current mental and substance use disorders with self-reported gun ownership and carrying among recently separated US Army soldiers. Veterans have high rates of both gun ownership and mental disorders, the conjunction of which might contribute to the high suicide rate in this group. Methods. Cross-sectional survey data were collected in 2018-2019 from 5682 recently separated personnel who took part in the Army Study to Assess Risk and Resilience in Servicemembers. Validated measures assessed recent mood, anxiety, substance use, and externalizing disorders. Logistic regression models examined associations of sociodemographic characteristics, service characteristics, and mental disorders with gun ownership and carrying. Results. Of the participants, 50% reported gun ownership. About half of owners reported carrying some or most of the time. Mental disorders were not associated significantly with gun ownership. However, among gun owners, major depressive disorder, panic disorder, posttraumatic stress disorder, and intermittent explosive disorder were associated with significantly elevated odds of carrying at least some of the time. Conclusions. Mental disorders are not associated with gun ownership among recently separated Army personnel, but some mental disorders are associated with carrying among gun owners. (Am J Public Health. 2021;111(10):1855-1864. https://doi.org/10.2105/AJPH.2021.306420).",2021-10-01 +31850800,Identifying and Prioritizing Chemicals with Uncertain Burden of Exposure: Opportunities for Biomonitoring and Health-Related Research.,"

Background

The National Institutes of Health's Environmental influences on Child Health Outcomes (ECHO) initiative aims to understand the impact of environmental factors on childhood disease. Over 40,000 chemicals are approved for commercial use. The challenge is to prioritize chemicals for biomonitoring that may present health risk concerns.

Objectives

Our aim was to prioritize chemicals that may elicit child health effects of interest to ECHO but that have not been biomonitored nationwide and to identify gaps needing additional research.

Methods

We searched databases and the literature for chemicals in environmental media and in consumer products that were potentially toxic. We selected chemicals that were not measured in the National Health and Nutrition Examination Survey. From over 700 chemicals, we chose 155 chemicals and created eight chemical panels. For each chemical, we compiled biomonitoring and toxicity data, U.S. Environmental Protection Agency exposure predictions, and annual production usage. We also applied predictive modeling to estimate toxicity. Using these data, we recommended chemicals either for biomonitoring, to be deferred pending additional data, or as low priority for biomonitoring.

Results

For the 155 chemicals, 97 were measured in food or water, 67 in air or house dust, and 52 in biospecimens. We found in vivo endocrine, developmental, reproductive, and neurotoxic effects for 61, 74, 47, and 32 chemicals, respectively. Eighty-six had data from high-throughput in vitro assays. Positive results for endocrine, developmental, neurotoxicity, and obesity were observed for 32, 11, 35, and 60 chemicals, respectively. Predictive modeling results suggested 90% are toxicants. Biomarkers were reported for 76 chemicals. Thirty-six were recommended for biomonitoring, 108 deferred pending additional research, and 11 as low priority for biomonitoring.

Discussion

The 108 deferred chemicals included those lacking biomonitoring methods or toxicity data, representing an opportunity for future research. Our evaluation was, in general, limited by the large number of unmeasured or untested chemicals. https://doi.org/10.1289/EHP5133.",2019-12-18 +,"First Report of ‘Candidatus Phytoplasma trifolii’–Related Strain Associated with a New Disease on Garlic in Zacatecas, Mexico","The state of Zacatecas is considered the largest producer of garlic (Allium sativum L.) in Mexico. In February 2016, symptoms of plant stunting, leaf yellowing, leaf malformation, and bright and “waxy” appearance of the leaves were observed in 40% of garlic plants in a commercial field in the municipality of Fresnillo in Zacatecas, Mexico (N: 23° 22.676, W: 102° 59.535 2017 masl). Total DNA was extracted from 15 symptomatic and five asymptomatic garlic plants (collected in the same commercial field) by a modified version of the Dellaporta method (Dellaporta et al. 1983). Direct and nested polymerase chain reaction (PCR) assays targeting the 16S rDNA gene were used to confirm the association of phytoplasma with the disease. The oligonucleotide primers used were P1 (5′-AAGAGTTTGATCCTGGCTCAGGATT-3′) and Tint (5′-TCAGGCGTGTGCTCTAACCAGC-3′) for direct PCR (Smart et al. 1996) and R16F2n (5′-GAAACGACTGCTAAGACTGG-3′) and R16R2 (5′-TGACGGGCGGTGTGTACAAACCCCG-3′) for nested PCR (Gundersen and Lee 1996). No PCR products were obtained from the five asymptomatic plants. The nested PCR amplicon (1.2 kb) amplified from each one of the 15 symptomatic plants was cloned separately into pGEM-T Easy Vector (Promega, Madison, WI) and directly sequenced. BLAST analysis of the 16S rDNA sequences revealed that they shared 100% sequence identity to each other and 99.0% sequence identity to ‘Candidatus Phytoplasma trifolii’ (Cpt) isolate Tomato-Zac (KX092011). Computer-simulated restriction fragment length polymorphism (RFLP) analysis of the 16S rDNA sequence from the Zacatecas garlic phytoplasma sequence (GenBank accession no. MH259307) using iPhyClassifier (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi) and RFLP profiles was compared with each 16S phytoplasma group and subgroup. The garlic phytoplasma 16S rDNA sequence shared 98.8% identity to the reference strain, Cpt subgroup A (accession no. AY390261). To our knowledge, this is the first report of Cpt associated with a new disease of garlic in Mexico. Also, adults of the beet leafhopper (Circulifer tenellus Baker) were collected from the sampled garlic field, and total DNA was extracted from a pool of 20 beet leafhoppers. Amplification of phytoplasma DNA using the same primers as mentioned previously was carried out, and further sequencing of the PCR products confirmed the presence of Cpt DNA with a nucleotide identity of 100% to the phytoplasma sequence detected in symptomatic garlic plants. Cpt has been associated to the big bud disease of pepper (Mauricio-Castillo et al. 2015) and dwarfing and yellowing of tomato plants in Zacatecas (Salas-Muñoz et al. 2016). Infection of members of different botanical families, Solanaceae (pepper and tomato) and Alliaceae (garlic), by Cpt in this area of Mexico may reflect the polyphagous habit of C. tenellus, and consequently a wider host range may be expected.",2018-12-01 +34323642,In silico strategies for modeling RNA aptamers and predicting binding sites of their molecular targets.,"RNA aptamers are single-stranded nucleic acids of 20-100 nucleotides, with high sensitivity and specificity against particular molecular targets. In vitro production and selection of aptamers can be performed using the SELEX method. However, this procedure requires considerable time and cost. In this sense, bioinformatics tools play an important role in reducing the time and cost associated with development and production of aptamers. In this article, we propose bioinformatics strategies for modeling and analysis of the interaction with molecular targets for two RNA aptamers: ATP binding RNA aptamer and iSpinach aptamer. For this purpose, molecular modeling of the tertiary structure of the aptamers was performed with two servers (SimRNA and RNAComposer); and AutoDock Vina and rDock programs were used to dock their respective ligands. The predictions developed with these methods could be used for in silico design of RNA aptamers, through a simple and accessible methodology.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1951754 .",2021-07-29 +32215567,CRiSP: accurate structure prediction of disulfide-rich peptides with cystine-specific sequence alignment and machine learning.,"

Motivation

High-throughput sequencing discovers many naturally occurring disulfide-rich peptides or cystine-rich peptides (CRPs) with diversified bioactivities. However, their structure information, which is very important to peptide drug discovery, is still very limited.

Results

We have developed a CRP-specific structure prediction method called Cystine-Rich peptide Structure Prediction (CRiSP), based on a customized template database with cystine-specific sequence alignment and three machine-learning predictors. The modeling accuracy is significantly better than several popular general-purpose structure modeling methods, and our CRiSP can provide useful model quality estimations.

Availability and implementation

The CRiSP server is freely available on the website at http://wulab.com.cn/CRISP.

Contact

wuyd@pkusz.edu.cn or jiangfan@pku.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +34320635,Accurate Large-scale Phylogeny-Aware Alignment using BAli-Phy. ,"BAli-Phy, a popular Bayesian method that co-estimates multiple sequence alignments and phylogenetic trees, is a rigorous statistical method, but due to its computational requirements, it has generally been limited to relatively small datasets (at most about 100 sequences). Here we repurpose BAli-Phy as a ``phylogeny-aware"" alignment method: we estimate the phylogeny from the input of unaligned sequences, and then use that as a fixed tree within BAli-Phy. We show that this approach achieves high accuracy, greatly superior to Prank, the current most popular phylogeny-aware alignment method, and is even more accurate than MAFFT, one of the top performing alignment methods in common use. Furthermore, this approach can be used to align very large datasets (up to 1000 sequences in this study). See https://doi.org/10.13012/B2IDB-7863273_V1 for datasets used in this study. Supplementary data are available at Bioinformatics online.",2021-07-28 +33497488,The Parkinson's Disease DNA Variant Browser.,"

Background

Parkinson's disease (PD) is a genetically complex neurodegenerative disease with ~20 genes known to contain mutations that cause PD or atypical parkinsonism. Large-scale next-generation sequencing projects have revolutionized genomics research. Applying these data to PD, many genes have been reported to contain putative disease-causing mutations. In most instances, however, the results remain quite limited and rather preliminary. Our aim was to assist researchers on their search for PD-risk genes and variant candidates with an easily accessible and open summary-level genomic data browser for the PD research community.

Methods

Sequencing and imputed genotype data were obtained from multiple sources and harmonized and aggregated.

Results

In total we included a total of 102,127 participants, including 28,453 PD cases, 1650 proxy cases, and 72,024 controls.

Conclusions

We present here the Parkinson's Disease Sequencing Browser: a Shiny-based web application that presents comprehensive summary-level frequency data from multiple large-scale genotyping and sequencing projects https://pdgenetics.shinyapps.io/VariantBrowser/. Published © 2021 This article is a U.S. Government work and is in the public domain in the USA. Movement Disorders published by Wiley Periodicals LLC on behalf of International Parkinson and Movement Disorder Society.",2021-01-26 +28779078,"Aspergillus Secondary Metabolite Database, a resource to understand the Secondary metabolome of Aspergillus genus.","Aspergillus is a genus of ubiquitous fungi that are pathologically & therapeutically important. Aspergillus Secondary Metabolites Database (A2MDB) is a curated compendium of information on Aspergillus & its secondary metabolome. A2MDB catalogs 807 unique non-redundantsecondary metabolites derived from 675 Aspergillus species. A2MDB has a compilation of 100 cellular targets of secondary metabolites, 44 secondary metabolic pathways, 150 electron and light microscopy images of various Aspergillus species. A phylogenetic representation of over 2500 strains has been provided. A2MDB presents a detailed chemical information of secondary metabolites and their mycotoxins. Molecular docking models of metabolite-target protein interactions have been put together. A2MDB also has epidemiological data representing Aspergillosis and global occurrence of Aspergillus species. Furthermore a novel classification of Aspergillosis along with 370 case reports with images, were made available. For each metabolite catalogued, external links to related databases have been provided. All this data is available on A2MDB, launched through Indian Institute of Chemical Technology, Hyderabad, India, as an open resource http://www.iictindia.org/A2MDB . We believe A2MDB is of practical relevance to the scientific community that is in pursuit of novel therapeutics.",2017-08-04 +33855981,Competence assessment of the clinical tutor: a multicentric observational study.,"

Background and aim of the study

In the international literature there are no validated tools which investigate clinical tutors' skills. The main objective of the study has been to describe the clinical nurse tutor's skills, required to properly train nursing students during their educational path.

Methods

In this observational study a non-probability sampling has been used. The study was led in two centers: the AOUPR of Parma and the AUSL of Parma, after obtaining the favorable opinion from the Ethics Committee of the Northern Emilia Large Section. The data have been collected by using a structured and self-given survey that investigated three areas. Each item has a 4-point Likert scale, in which 1 indicates ""for nothing"" and 4 ""very much"". The data have been analyzed with the statistical software IBM SPSS v.26 ® and with the open-source statistical software Jamovi v.1.6.9 (https://www.jamovi.org.). The number of factors in the original model was reduced using several established research steps and then evaluated for data quality and construct validity using principal component analysis and confimatory factor analysis.

Results

Among 397 administered questionnaires, only 300, which were considered valid, have been filled. The psychometric properties of the investigation tool turned out to be good in all the areas analyzed with a Cronbach alpha higher than 0.70. The extensive process resulted in a version with 4 factors.

Conclusions

Nurses' answers have allowed to draw the required profile of the clinical tutors in the different organizational contexts. The results can target possible training proposals to create opportunities for the clinical tutors.",2021-03-31 +33983377,Evaluation of Categorical Matrix Completion Algorithms: Towards Improved Active Learning for Drug Discovery. ,"High throughput and high content screening are extensively used to determine the effect of small molecule compounds and other potential therapeutics upon particular targets as part of the early drug development process. However, screening is typically used to find compounds that have a desired effect but not to identify potential undesirable side effects. This is because the size of the search space precludes measuring the potential effect of all compounds on all targets. Active machine learning has been proposed as a solution to this problem. In this article, we describe an improved imputation method, Impute By Committee, for completion of matrices containing categorical values. We compare this method to existing approaches in the context of modeling the effects of many compounds on many targets using latent similarities between compounds and conditions. We also compare these methods for the task of driving active learning in well-characterized settings for synthetic and real datasets. Our new approach performed the best overall both in the accuracy of matrix completion itself and in the number of experiments needed to train an accurate predictive model compared to random selection of experiments. We further improved upon the performance of our new method by developing an adaptive switching strategy for active learning that iteratively chooses between different matrix completion methods. A Reproducible Research Archive containing all data and code will be made available upon acceptance at http://murphylab.cbd.cmu.edu/software. Supplementary data are available at Bioinformatics online.",2021-05-13 +31228159,Networking in Biology: The Hybrid Rat Diversity Panel.,"One of the most fruitful resources for systems genetic studies of nonhuman mammals is a panel of inbred strains that exhibits significant genetic diversity between strains but genetic stability (isogenicity) within strains. These characteristics allow for fine mapping of complex phenotypes (QTLs) and provide statistical power to identify loci which contribute nominally to the phenotype. This type of resource also allows the planning and performance of investigations using the same genetic backgrounds over several generations of the test animals. Often, rats are preferred over mice for physiologic and behavioral studies because of their larger size and more distinguishable anatomy (particularly for their central nervous system). The Hybrid Rat Diversity Panel (HRDP) is a panel of inbred rat strains, which combines two recombinant inbred panels (the HXB/BXH, 30 strains; the LEXF/FXLE, 34 strains and 35 more strains of inbred rats which were selected for genetic diversity, based on their fully sequenced genomes and/or thorough genotyping). The genetic diversity and statistical power of this panel for mapping studies rivals or surpasses currently available panels in mouse. The genetic stability of this panel makes it particularly suitable for collection of high-throughput omics data as relevant technology becomes available for engaging in truly integrative systems biology. The PhenoGen website ( http://phenogen.org ) is the repository for the initial transcriptome data, making the raw data, the processed data, and the analysis results, e.g., organ-specific protein coding and noncoding transcripts, isoform analysis, expression quantitative trait loci, and co-expression networks, available to the research public. The data sets and tools being developed will complement current efforts to analyze the human transcriptome and its genetic controls (the Genotype-Tissue Expression Project (GTEx)) and allow for dissection of genetic networks that predispose to particular phenotypes and gene-by-environment interactions that are difficult or even impossible to study in humans. The HRDP is an essential population for exploring truly integrative systems genetics.",2019-01-01 +34963094,Target Actionability Review: a systematic evaluation of replication stress as a therapeutic target for paediatric solid malignancies.,"

Background

Owing to the high numbers of paediatric cancer-related deaths, advances in therapeutic options for childhood cancer is a heavily studied field, especially over the past decade. Classical chemotherapy offers some therapeutic benefit but has proven long-term complications in survivors, and there is an urgent need to identify novel target-driven therapies. Replication stress is a major cause of genomic instability in cancer, triggering the stalling of the replication fork. Failure of molecular response by DNA damage checkpoints, DNA repair mechanisms and restarting the replication forks can exacerbate replication stress and initiate cell death pathways, thus presenting as a novel therapeutic target. To bridge the gap between preclinical evidence and clinical utility thereof, we apply the literature-driven systematic target actionability review methodology to published proof-of-concept (PoC) data related to the process of replication stress.

Methods

A meticulous PubMed literature search was performed to gather replication stress-related articles (published between 2014 and 2021) across 16 different paediatric solid tumour types. Articles that fulfilled inclusion criteria were uploaded into the R2 informatics platform [r2.amc.nl] and assessed by critical appraisal. Key evidence based on nine pre-established PoC modules was summarised, and scores based on the quality and outcome of each study were assigned by two separate reviewers. Articles with discordant modules/scores were re-scored by a third independent reviewer, and a final consensus score was agreed upon by adjudication between all three reviewers. To visualise the final scores, an interactive heatmap summarising the evidence and scores associated with each PoC module across all, including paediatric tumour types, were generated.

Results and conclusions

145 publications related to targeting replication stress in paediatric tumours were systematically reviewed with an emphasis on DNA repair pathways and cell cycle checkpoint control. Although various targets in these pathways have been studied in these diseases to different extents, the results of this extensive literature search show that ATR, CHK1, PARP or WEE1 are the most promising targets using either single agents or in combination with chemotherapy or radiotherapy in neuroblastoma, osteosarcoma, high-grade glioma or medulloblastoma. Targeting these pathways in other paediatric malignancies may work as well, but here, the evidence was more limited. The evidence for other targets (such as ATM and DNA-PK) was also limited but showed promising results in some malignancies and requires more studies in other tumour types. Overall, we have created an extensive overview of targeting replication stress across 16 paediatric tumour types, which can be explored using the interactive heatmap on the R2 target actionability review platform [https://hgserver1.amc.nl/cgi-bin/r2/main.cgi?option=imi2_targetmap_v1].",2021-12-25 +32757615,Speech Prosody Interventions for Persons With Autism Spectrum Disorders: A Systematic Review.,"Purpose Persons with autism spectrum disorder (ASD) may demonstrate abnormal prosodic patterns in conversational speech, which can negatively affect social interactions. The purpose of this systematic review was to identify interventions measuring the improvement of expressive speech prosody in persons with ASD in order to support clinician's evidence-based decision making. Method We used 13 electronic databases to search for relevant articles using terms related to autism, intervention, and speech prosody. The databases identified a total of nine articles for the title, abstract, and full-text reviews. Five more articles were included after performing descendant and reference searches. One peer-reviewed article was excluded due to insufficient data received from the authors. We coded the resulting 13 articles for report, setting, intervention, outcome, and results characteristics and methodological quality. Results Results showed that interventions specifically targeting speech prosody using established and emerging evidence-based practices across more than 1 treatment day resulted in moderate to large improvements in speech prosody in persons with ASD. Interventions that indirectly targeted prosody or were very short resulted in small or nonsignificant effects. Discussion The results of this literature review suggest that interventions that directly target speech prosody using established evidence-based practices for ASD may be most effective for increasing typical prosodic patterns during speech for persons with ASD. Further research is needed to establish which interventions are most effective for each age range and context. Supplemental Material https://doi.org/10.23641/asha.12735926.",2020-08-03 +31228198,DEUS: an R package for accurate small RNA profiling based on differential expression of unique sequences.,"

Summary

Despite their fundamental role in various biological processes, the analysis of small RNA sequencing data remains a challenging task. Major obstacles arise when short RNA sequences map to multiple locations in the genome, align to regions that are not annotated or underwent post-transcriptional changes which hamper accurate mapping. In order to tackle these issues, we present a novel profiling strategy that circumvents the need for read mapping to a reference genome by utilizing the actual read sequences to determine expression intensities. After differential expression analysis of individual sequence counts, significant sequences are annotated against user defined feature databases and clustered by sequence similarity. This strategy enables a more comprehensive and concise representation of small RNA populations without any data loss or data distortion.

Availability and implementation

Code and documentation of our R package at http://ibis.helmholtz-muenchen.de/deus/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +31958638,BoMiProt: A database of bovine milk proteins.,"Bovine milk has become an important biological fluid for proteomic research due to its nutritional and immunological benefits. To date, over 300 publications have reported changes in bovine milk protein composition based on seasons, lactation stages, breeds, health status and milk fractions while there are no reports on consolidation or overlap of data between studies. Thus, we have developed a literature-based, manually curated open online database of bovine milk proteome, BoMiProt (http://bomiprot.org), with over 3100 proteins from whey, fat globule membranes and exosomes. Each entry in the database is thoroughly cross-referenced including 397 proteins with well-defined information on protein function, biochemical properties, post-translational modifications and significance in milk from different publications. Of 397 proteins, over 199 have been reported with a structural gallery of homology models and crystal structures in the database. The proteome data can be retrieved using several search parameters such as protein name, accession IDs, FASTA sequence. Furthermore, the proteome data can be filtered based on milk fractions, post-translational modifications and/or structures. Taken together, BoMiProt represents an extensive compilation of bovine milk proteins from literature, providing a foundation for future studies to identify specific milk proteins which may be linked to mammary gland pathophysiology. BIOLOGICAL SIGNIFICANCE: Protein data identified from different previously published proteomic studies on bovine milk samples (21 publications) were gathered in the BoMiProt database. Unification of the identified proteins will give researchers an initial reference database on bovine milk proteome to understand the complexities of milk as a biological fluid. BoMiProt has a user-friendly interface with several useful features, including different search criteria for primary and secondary information of proteins along with cross-references to external databases. The database will provide insights into the existing literature and possible future directions to investigate further and improve the beneficial effects of bovine milk components and dairy products on human health.",2020-01-17 +34251878,Health Effects of Naphthalene Exposure: A Systematic Evidence Map and Analysis of Potential Considerations for Dose-Response Evaluation.,"

Background

Naphthalene is a polycyclic aromatic hydrocarbon that has been associated with health effects, including cancer. As the state of the science on naphthalene toxicity continues to evolve, updated toxicity reference value(s) may be required to support human health risk assessment.

Objectives

We present a systematic evidence map of studies that could be used to derive toxicity reference value(s) for naphthalene.

Methods

Human and animal health effect studies and physiologically based pharmacokinetic (PBPK) models were identified from a literature search based on populations, exposures, comparators, and outcomes (PECO) criteria. Human and animal studies meeting PECO criteria were refined to a smaller subset considered most informative for deriving chronic reference value(s), which are preferred for assessing risk to the general public. This subset was evaluated for risk of bias and sensitivity, and the suitability of each study for dose-response analysis was qualitatively assessed. Lowest observed adverse effect levels (LOAELs) were extracted and summarized. Other potentially relevant studies (e.g., mechanistic and toxicokinetic studies) were tracked as supplemental information but not evaluated further. Existing reference values for naphthalene are also summarized.

Results

We identified 26 epidemiology studies and 16 animal studies that were considered most informative for further analysis. Eleven PBPK models were identified. The available epidemiology studies generally had significant risk of bias and/or sensitivity concerns and were mostly found to have low suitability for dose-response analysis due to the nature of the exposure measurements. The animal studies had fewer risk of bias and sensitivity concerns and were mostly found to be suitable for dose-response analysis.

Conclusion

Although both epidemiological and animal studies of naphthalene provide weight of evidence for hazard identification, the available animal studies appear more suitable for reference value derivation. PBPK models and mechanistic and toxicokinetic data can be applied to extrapolate these animal data to humans, considering mode of action and interspecies metabolic differences. https://doi.org/10.1289/EHP7381.",2021-07-12 +27789697,"Protein Data Bank Japan (PDBj): updated user interfaces, resource description framework, analysis tools for large structures.","The Protein Data Bank Japan (PDBj, http://pdbj.org), a member of the worldwide Protein Data Bank (wwPDB), accepts and processes the deposited data of experimentally determined macromolecular structures. While maintaining the archive in collaboration with other wwPDB partners, PDBj also provides a wide range of services and tools for analyzing structures and functions of proteins. We herein outline the updated web user interfaces together with RESTful web services and the backend relational database that support the former. To enhance the interoperability of the PDB data, we have previously developed PDB/RDF, PDB data in the Resource Description Framework (RDF) format, which is now a wwPDB standard called wwPDB/RDF. We have enhanced the connectivity of the wwPDB/RDF data by incorporating various external data resources. Services for searching, comparing and analyzing the ever-increasing large structures determined by hybrid methods are also described.",2016-10-26 +34632512,COVID-19: On the Disparity in Outcomes Between Military and Civilian Populations. ,"The CoronaVirus Disease 2019 (COVID-19) pandemic remains a formidable threat to populations around the world. The U.S. Military, in particular, represents a unique and distinguishable subset of the population, primarily due to the age and gender of active duty personnel. Current investigations have focused on health outcome forecasts for civilian populations, making them of limited value for military planning. We have developed and applied an age-structured susceptible, exposed, infectious, recovered, or dead compartmental model for both civilian and military populations, driven by estimates of the time-dependent reproduction number, R(t), which can be both fit to available data and also forecast future cases, intensive care unit (ICU) patients, and deaths. We show that the expected health outcomes for active duty military populations are substantially different than for civilian populations of the same size. Specifically, while the number of cases is not expected to differ dramatically, severity, both in terms of ICU burdens and deaths, is substantially lower. Our results confirm that the burden placed on military health centers will be substantially lower than that for equivalent-sized civilian populations. More practically, the tool we have developed to investigate this (https://q.predsci.com/covid19/) can be used by military health planners to estimate the resources needed in particular locations based on current estimates of the transmission profiles of COVID-19 within the surrounding civilian population in which the military installation is embedded. As this tool continues to be developed, it can be used to assess the likely impact of different intervention strategies, as well as vaccine policies; both for the current pandemic as well as future ones.",2021-10-11 +31600197,"The NIH Open Citation Collection: A public access, broad coverage resource.","Citation data have remained hidden behind proprietary, restrictive licensing agreements, which raises barriers to entry for analysts wishing to use the data, increases the expense of performing large-scale analyses, and reduces the robustness and reproducibility of the conclusions. For the past several years, the National Institutes of Health (NIH) Office of Portfolio Analysis (OPA) has been aggregating and enhancing citation data that can be shared publicly. Here, we describe the NIH Open Citation Collection (NIH-OCC), a public access database for biomedical research that is made freely available to the community. This dataset, which has been carefully generated from unrestricted data sources such as MedLine, PubMed Central (PMC), and CrossRef, now underlies the citation statistics delivered in the NIH iCite analytic platform. We have also included data from a machine learning pipeline that identifies, extracts, resolves, and disambiguates references from full-text articles available on the internet. Open citation links are available to the public in a major update of iCite (https://icite.od.nih.gov).",2019-10-10 +34523928,Rewinding the Molecular Clock: Looking at Pioneering Molecular Phylogenetics Experiments in the Light of Proteomics.,"Science is full of overlooked and undervalued research waiting to be rediscovered. Proteomics is no exception. In this perspective, we follow the ripples from a 1960 study of Zuckerkandl, Jones, and Pauling comparing tryptic peptides across animal species. This pioneering work directly led to the molecular clock hypothesis and the ensuing explosion in molecular phylogenetics. In the decades following, proteins continued to provide essential clues on evolutionary history. While technology has continued to improve, contemporary proteomics has strayed from this larger biological context, rarely comparing species or asking how protein structure, function, and interactions have evolved. Here we recombine proteomics with molecular phylogenetics, highlighting the value of framing proteomic results in a larger biological context and how almost forgotten research, though technologically surpassed, can still generate new ideas and illuminate our work from a different perspective. Though it is infeasible to read all research published on a large topic, looking up older papers can be surprisingly rewarding when rediscovering a ""gem"" at the end of a long citation chain, aided by digital collections and perpetually helpful librarians. Proper literature study reduces unnecessary repetition and allows research to be more insightful and impactful by truly standing on the shoulders of giants. All data was uploaded to MassIVE (https://massive.ucsd.edu/) as dataset MSV000087993.",2021-09-15 +33971107,The Effects of Chronic Exposure to Ambient Traffic-Related Air Pollution on Alzheimer's Disease Phenotypes in Wildtype and Genetically Predisposed Male and Female Rats.,"

Background

Epidemiological data link traffic-related air pollution (TRAP) to increased risk of Alzheimer's disease (AD). Preclinical data corroborating this association are largely from studies of male animals exposed acutely or subchronically to high levels of isolated fractions of TRAP. What remains unclear is whether chronic exposure to ambient TRAP modifies AD risk and the influence of sex on this interaction.

Objectives

This study sought to assess effects of chronic exposure to ambient TRAP on the time to onset and severity of AD phenotypes in a preclinical model and to determine whether sex or genetic susceptibility influences outcomes.

Methods

Male and female TgF344-AD rats that express human AD risk genes and wildtype littermates were housed in a vivarium adjacent to a heavily trafficked tunnel in Northern California and exposed for up to 14 months to filtered air (FA) or TRAP drawn from the tunnel and delivered to animals unchanged in real time. Refractive particles in the brain and AD phenotypes were quantified in 3-, 6-, 10-, and 15-month-old animals using hyperspectral imaging, behavioral testing, and neuropathologic measures.

Results

Particulate matter (PM) concentrations in TRAP exposure chambers fluctuated with traffic flow but remained below 24-h PM with aerodynamic diameter less than or equal to 2.5 micrometers (PM2.5) U.S. National Ambient Air Quality Standards limits. Ultrafine PM was a predominant component of TRAP. Nano-sized refractive particles were detected in the hippocampus of TRAP animals. TRAP-exposed animals had more amyloid plaque deposition, higher hyperphosphorylated tau levels, more neuronal cell loss, and greater cognitive deficits in an age-, genotype-, and sex-dependent manner. TRAP-exposed animals also had more microglial cell activation, but not astrogliosis.

Discussion

These data demonstrate that chronic exposure to ambient TRAP promoted AD phenotypes in wildtype and genetically susceptible rats. TRAP effects varied according to age, sex, and genotype, suggesting that AD progression depends on complex interactions between environment and genetics. These findings suggest current PM2.5 regulations are insufficient to protect the aging brain. https://doi.org/10.1289/EHP8905.",2021-05-10 +33338203,MuscleAtlasExplorer: a web service for studying gene expression in human skeletal muscle. ,"MuscleAtlasExplorer is a freely available web application that allows for the exploration of gene expression data from human skeletal muscle. It draws from an extensive publicly available dataset of 1654 skeletal muscle expression microarray samples. Detailed, manually curated, patient phenotype data, with information such as age, sex, BMI and disease status, are combined with skeletal muscle gene expression to provide insights into gene function in skeletal muscle. It aims to facilitate easy exploration of the data using powerful data visualization functions, while allowing for sample selection, in-depth inspection and further analysis using external tools. Availability: MuscleAtlasExplorer is available at https://mae.crc.med.lu.se/mae2 (username 'muscle' and password 'explorer' pre-publication).",2020-12-01 +29140469,SuperDRUG2: a one stop resource for approved/marketed drugs.,"Regular monitoring of drug regulatory agency web sites and similar resources for information on new drug approvals and changes to legal status of marketed drugs is impractical. It requires navigation through several resources to find complete information about a drug as none of the publicly accessible drug databases provide all features essential to complement in silico drug discovery. Here, we propose SuperDRUG2 (http://cheminfo.charite.de/superdrug2) as a comprehensive knowledge-base of approved and marketed drugs. We provide the largest collection of drugs (containing 4587 active pharmaceutical ingredients) which include small molecules, biological products and other drugs. The database is intended to serve as a one-stop resource providing data on: chemical structures, regulatory details, indications, drug targets, side-effects, physicochemical properties, pharmacokinetics and drug-drug interactions. We provide a 3D-superposition feature that facilitates estimation of the fit of a drug in the active site of a target with a known ligand bound to it. Apart from multiple other search options, we introduced pharmacokinetics simulation as a unique feature that allows users to visualise the 'plasma concentration versus time' profile for a given dose of drug with few other adjustable parameters to simulate the kinetics in a healthy individual and poor or extensive metabolisers.",2018-01-01 +29106626,PAMDB: a comprehensive Pseudomonas aeruginosa metabolome database.,"The Pseudomonas aeruginosaMetabolome Database (PAMDB, http://pseudomonas.umaryland.edu) is a searchable, richly annotated metabolite database specific to P. aeruginosa. P. aeruginosa is a soil organism and significant opportunistic pathogen that adapts to its environment through a versatile energy metabolism network. Furthermore, P. aeruginosa is a model organism for the study of biofilm formation, quorum sensing, and bioremediation processes, each of which are dependent on unique pathways and metabolites. The PAMDB is modelled on the Escherichia coli (ECMDB), yeast (YMDB) and human (HMDB) metabolome databases and contains >4370 metabolites and 938 pathways with links to over 1260 genes and proteins. The database information was compiled from electronic databases, journal articles and mass spectrometry (MS) metabolomic data obtained in our laboratories. For each metabolite entered, we provide detailed compound descriptions, names and synonyms, structural and physiochemical information, nuclear magnetic resonance (NMR) and MS spectra, enzymes and pathway information, as well as gene and protein sequences. The database allows extensive searching via chemical names, structure and molecular weight, together with gene, protein and pathway relationships. The PAMBD and its future iterations will provide a valuable resource to biologists, natural product chemists and clinicians in identifying active compounds, potential biomarkers and clinical diagnostics.",2018-01-01 +33390764,SHERPA-city: A web application to assess the impact of traffic measures on NO2 pollution in cities.,"This paper presents SHERPA-City, a web application to assess the potential of traffic measures to abate NO2 air pollution in cities. The application is developed by the Joint Research Centre. It is freely available (https://integrated-assessment.jrc.ec.europa.eu) and allows the user to perform a fast screening of possible NO2 abatement measures addressing traffic in European cities. SHERPA-City results depend on the quality of the default input data. It is therefore important to stress that the SHERPA-City default traffic flows, emission factors, fleet composition, road network topology, NO2 pollution from other sources and meteorological data are based on EU-wide datasets that may not always represent perfectly a particular local situation. This is why the SHERPA-City allows the default data to be substituted by local data, to better reflect local features. This tool must be considered as a first step in exploring options to abate NO2 air pollution through transport measures. The final decisions should be based, wherever possible, on full-scale modelling studies incorporating local knowledge.",2021-01-01 +34252929,Model learning to identify systemic regulators of the peripheral circadian clock.,"

Motivation

Personalized medicine aims at providing patient-tailored therapeutics based on multi-type data toward improved treatment outcomes. Chronotherapy that consists in adapting drug administration to the patient's circadian rhythms may be improved by such approach. Recent clinical studies demonstrated large variability in patients' circadian coordination and optimal drug timing. Consequently, new eHealth platforms allow the monitoring of circadian biomarkers in individual patients through wearable technologies (rest-activity, body temperature), blood or salivary samples (melatonin, cortisol) and daily questionnaires (food intake, symptoms). A current clinical challenge involves designing a methodology predicting from circadian biomarkers the patient peripheral circadian clocks and associated optimal drug timing. The mammalian circadian timing system being largely conserved between mouse and humans yet with phase opposition, the study was developed using available mouse datasets.

Results

We investigated at the molecular scale the influence of systemic regulators (e.g. temperature, hormones) on peripheral clocks, through a model learning approach involving systems biology models based on ordinary differential equations. Using as prior knowledge our existing circadian clock model, we derived an approximation for the action of systemic regulators on the expression of three core-clock genes: Bmal1, Per2 and Rev-Erbα. These time profiles were then fitted with a population of models, based on linear regression. Best models involved a modulation of either Bmal1 or Per2 transcription most likely by temperature or nutrient exposure cycles. This agreed with biological knowledge on temperature-dependent control of Per2 transcription. The strengths of systemic regulations were found to be significantly different according to mouse sex and genetic background.

Availability and implementation

https://gitlab.inria.fr/julmarti/model-learning-mb21eccb.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +31665439,MaGenDB: a functional genomics hub for Malvaceae plants.,"Malvaceae is a family of flowering plants containing many economically important plant species including cotton, cacao and durian. Recently, the genomes of several Malvaceae species have been decoded, and many omics data were generated for individual species. However, no integrative database of multiple species, enabling users to jointly compare and analyse relevant data, is available for Malvaceae. Thus, we developed a user-friendly database named MaGenDB (http://magen.whu.edu.cn) as a functional genomics hub for the plant community. We collected the genomes of 13 Malvaceae species, and comprehensively annotated genes from different perspectives including functional RNA/protein element, gene ontology, KEGG orthology, and gene family. We processed 374 sets of diverse omics data with the ENCODE pipelines and integrated them into a customised genome browser, and designed multiple dynamic charts to present gene/RNA/protein-level knowledge such as dynamic expression profiles and functional elements. We also implemented a smart search system for efficiently mining genes. In addition, we constructed a functional comparison system to help comparative analysis between genes on multiple features in one species or across closely related species. This database and associated tools will allow users to quickly retrieve large-scale functional information for biological discovery.",2020-01-01 +31624839,T-psi-C: user friendly database of tRNA sequences and structures.,"tRNAs have been widely studied for their role as genetic code decoders in the ribosome during translation, but have recently received new attention due to the discovery of novel roles beyond decoding, often in connection with human diseases. Yet, existing tRNA databases have not been updated for more than a decade, so they do not contain this new functional information and have not kept pace with the rate of discovery in this field. Therefore, a regularly updated database that contains information about newly discovered characteristics of tRNA molecules and can be regularly updated is strongly needed. Here, we report the creation of the T-psi-C database (http://tpsic.igcz.poznan.pl), an up-to-date collection of tRNA sequences that contains data obtained from high-throughput tRNA sequencing, e.g. all isoacceptors and isodecoders for human HEK293 cells. This database also contains 3D tRNA structures obtained from Protein Data Bank and generated using homology modeling. The T-psi-C database can be continuously updated by any member of the scientific community, and contains its own application programming interface (API), which allows users to retrieve or upload data in JSON format. Altogether, T-psi-C is user-friendly, easy to develop and an up-to-date source of knowledge about tRNAs.",2020-01-01 +31584086,PGG.Han: the Han Chinese genome database and analysis platform.,"As the largest ethnic group in the world, the Han Chinese population is nonetheless underrepresented in global efforts to catalogue the genomic variability of natural populations. Here, we developed the PGG.Han, a population genome database to serve as the central repository for the genomic data of the Han Chinese Genome Initiative (Phase I). In its current version, the PGG.Han archives whole-genome sequences or high-density genome-wide single-nucleotide variants (SNVs) of 114 783 Han Chinese individuals (a.k.a. the Han100K), representing geographical sub-populations covering 33 of the 34 administrative divisions of China, as well as Singapore. The PGG.Han provides: (i) an interactive interface for visualization of the fine-scale genetic structure of the Han Chinese population; (ii) genome-wide allele frequencies of hierarchical sub-populations; (iii) ancestry inference for individual samples and controlling population stratification based on nested ancestry informative markers (AIMs) panels; (iv) population-structure-aware shared control data for genotype-phenotype association studies (e.g. GWASs) and (v) a Han-Chinese-specific reference panel for genotype imputation. Computational tools are implemented into the PGG.Han, and an online user-friendly interface is provided for data analysis and results visualization. The PGG.Han database is freely accessible via http://www.pgghan.org or https://www.hanchinesegenomes.org.",2020-01-01 +32478594,"IMABASE: A new set of 313 colourised line drawings standardised in French for name agreement, image agreement, conceptual familiarity, age-of-acquisition, and imageability.","We provide normative data for a new set of 313 colourised line drawings. The drawings were standardised on name agreement (N = 60 participants), image agreement (N = 34), conceptual familiarity (N = 36), age of acquisition (N = 35), and imageability (N = 35). Objective visual complexity measures are given for the pictures, and objective word frequencies are provided for the modal names of the drawings. Reliability measures for the collected norms are very high. There are high levels of agreement between the names given by the participants and the drawings and comparative analyses indicate that the distribution of name agreement scores is very similar in both our own database and the MultiPic database (Duñabeitia et al., 2018). A novel ""picture-choice task"" used to assess name-image agreement (N = 30) reveals that the great majority of the IMABASE pictures that are also present in MultiPic are rated as providing better pictorial representations of the corresponding concepts. Finally, most of the correlations are comparable with those reported in other normative studies on colourised drawings. The whole set of pictures is freely available from https://leadserv.u-bourgogne.fr/~lead/imabase/ and the norms are available as Supplementary Material.",2020-07-03 +33831310,A Cadaveric and Magnetic Resonance Imaging Investigation of the Salpingopharyngeus.,"Purpose The aim of the study was to update our information regarding the salpingopharyngeus (SP) muscle using cadaveric and in vivo magnetic resonance imaging (MRI) data. Primary objectives were to (a) observe the presence/absence of the muscle and (b) quantify and describe its dimensions and course. Method SP specimens from 19 cadavers (10 women, nine men) were analyzed. Following head bisection, measurements of SP, including width of the cartilaginous attachment (CW) and width of the superior muscle base (SMW), were taken before and after removal of the overlying mucosa. In addition, SP was analyzed in 15 healthy subjects (eight men, seven women) using high-resolution three-dimensional MRI data. CW and SMW measures were replicated in the paraxial MRI view. Results The presence of the salpingopharyngeal fold and muscle was confirmed bilaterally in all cadaveric and living subjects. Following mucosa removal, mean cadaveric CW and SMW measurements were 5.6 and 3.8 mm, respectively. Mean in vivo CW and SMW were 6.1 and 3.7 mm, respectively. Results from the hierarchical regression analyses revealed that, in both cadaveric and living groups, SMW is dependent on the relationship between age and body weight, after controlling for sex. Conclusions The salpingopharyngeal fold and SP muscle are always present bilaterally and can be quantified at the superior origin using both cadaveric and in vivo three-dimensional MRI data. Though both the superior origin and inferior course of SP are highly variable, the size of the SP muscle is dependent on characteristics known to affect muscle fibers, such as the relationship between age and body weight. Given the consistent and quantifiable presence of the SP muscle, its potential role in velopharyngeal function for speech and swallowing is reconsidered. Supplemental Material https://doi.org/10.23641/asha.14347859.",2021-04-08 +34580386,AGO CLIP-based imputation of potent siRNA sequences targeting SARS-CoV-2 with antifibrotic miRNA-like activity.,"Coronavirus disease 2019 (COVID-19), caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2), is associated with fatal pulmonary fibrosis. Small interfering RNAs (siRNAs) can be developed to induce RNA interference against SARS-CoV-2, and their susceptible target sites can be inferred by Argonaute crosslinking immunoprecipitation sequencing (AGO CLIP). Here, by reanalysing AGO CLIP data in RNA viruses, we delineated putative AGO binding in the conserved non-structural protein 12 (nsp12) region encoding RNA-dependent RNA polymerase (RdRP) in SARS-CoV-2. We utilised the inferred AGO binding to optimise the local RNA folding parameter to calculate target accessibility and predict all potent siRNA target sites in the SARS-CoV-2 genome, avoiding sequence variants. siRNAs loaded onto AGO also repressed seed (positions 2-8)-matched transcripts by acting as microRNAs (miRNAs). To utilise this, we further screened 13 potential siRNAs whose seed sequences were matched to known antifibrotic miRNAs and confirmed their miRNA-like activity. A miR-27-mimicking siRNA designed to target the nsp12 region (27/RdRP) was validated to silence a synthesised nsp12 RNA mimic in lung cell lines and function as an antifibrotic miR-27 in regulating target transcriptomes related to TGF-β signalling. siRNA sequences with an antifibrotic miRNA-like activity that could synergistically treat COVID-19 are available online ( http://clip.korea.ac.kr/covid19 ).",2021-09-27 +32921304,Review of medical image recognition technologies to detect melanomas using neural networks.,"

Background

Melanoma is one of the most aggressive types of cancer that has become a world-class problem. According to the World Health Organization estimates, 132,000 cases of the disease and 66,000 deaths from malignant melanoma and other forms of skin cancer are reported annually worldwide ( https://apps.who.int/gho/data/?theme=main ) and those numbers continue to grow. In our opinion, due to the increasing incidence of the disease, it is necessary to find new, easy to use and sensitive methods for the early diagnosis of melanoma in a large number of people around the world. Over the last decade, neural networks show highly sensitive, specific, and accurate results.

Objective

This study presents a review of PubMed papers including requests «melanoma neural network» and «melanoma neural network dermatoscopy». We review recent researches and discuss their opportunities acceptable in clinical practice.

Methods

We searched the PubMed database for systematic reviews and original research papers on the requests «melanoma neural network» and «melanoma neural network dermatoscopy» published in English. Only papers that reported results, progress and outcomes are included in this review.

Results

We found 11 papers that match our requests that observed convolutional and deep-learning neural networks combined with fuzzy clustering or World Cup Optimization algorithms in analyzing dermatoscopic images. All of them require an ABCD (asymmetry, border, color, and differential structures) algorithm and its derivates (in combination with ABCD algorithm or separately). Also, they require a large dataset of dermatoscopic images and optimized estimation parameters to provide high specificity, accuracy and sensitivity.

Conclusions

According to the analyzed papers, neural networks show higher specificity, accuracy and sensitivity than dermatologists. Neural networks are able to evaluate features that might be unavailable to the naked human eye. Despite that, we need more datasets to confirm those statements. Nowadays machine learning becomes a helpful tool in early diagnosing skin diseases, especially melanoma.",2020-09-14 +34102270,"The traditional uses, phytochemistry, and pharmacological properties of Paris L. (Liliaceae): A review.","

Ethnopharmacological relevance

Paris L. (Liliaceae) consisted of 33 species, of which the study focused on Paris polyphylla Smith, P. polyphylla var. chinensis (Franch.) Hara, and P. polyphylla Smith var. yunnanensis (Franch.) Hand. -Mazz. Due of course to the good effects of analgesia and hemostasis, it was traditionally used to treat trauma by folk herbalists.

Aim of this review

This study summarized the traditional uses, distributions, phytochemical components, pharmacological properties, and toxicity evaluation of the genus Paris, and reviewed the economic value of cultivate P. polyphylla. This aim was that of providing a new and comprehensive recognition of these medicinal plants for the further utilization of Paris plants.

Materials and methods

The literature about traditional and folk uses of genus Paris was obtained from Duxiu Search, and China National Knowledge Infrastructure (CNKI). The other literature about genus Paris was searched online on Web of Science, PubMed, Google Scholar, Baidu Scholar, Scifinder database, and Springer research. The Scientific Database of China Plant Species (DCP) (http://db.kib.ac.cn/Default.aspx) databases were used to check the scientific names and provide species, varieties, and distribution of genus Paris. The botany studies information of genus Paris was available online from Plant Plus of China (www.iplant.cn). All the molecular structures of chemical compounds displayed in the text were produced by ChemBioDraw Ultra 14.0.

Results

The plants of genus Paris, containing about 33 species and 15 varieties, are mainly distributed in Southwest China (Yunnan, Sichuan, and Guizhou provinces). More than 320 chemical components have been isolated from genus Paris since 2020, including steroidal saponins, C-21 steroids, phytosterols, insect hormones, pentacyclic triterpenes, flavonoids, and other compounds. Arrays of pharmacological investigations revealed that compounds and extracts of Paris species possess a wide spectrum of pharmacological effects, such as antitumor, cytotoxic, antimicrobial, antifungal, hemostatic, and anti-inflammatory activities. The studies about toxicity evaluation suggested that Rhizome Paridis had slight liver toxicity.

Conclusions

The dried rhizomes of P. polyphylla, P. polyphylla var. chinensis, and P. polyphylla var. yunnanensis were used to treat wound, bleeding, and stomachache, etc. in folk medicine. Phytochemistry researches showed that different species had pretty similarities especially in terms of chemical constituents. Pharmacological studies witnessed that Rhizome Paridis has various activities. Among these activities, steroidal saponins were the main active ingredients. Furthermore, an important aspect responsible for increasing interest in genus Paris is the use of antifertility-nonhormonal contraceptives by women. Also, the development of TCM (Traditional Chinese medicine) planting industry can improve the income of ethnic minorities and promote economic development.",2021-06-05 +34111777,PID: An integrative and comprehensive platform of plant intron.,"Intron is a non-coding sequence of a broken gene and participates in important biological processes, such as transcription regulation, alternative splicing, and nuclear export. With the development of plant genomes, a comprehensive platform for intron analysis in plants must be established. Plant Intron Database (PID), a publicly available searchable database, was developed to efficiently store, query, analyze, and integrate intron resources in plants. The information of intron, exon, and gene can be searched by key words in PID. Users cannot only view intron length distribution pie chart and 5' and 3' splice site sequence feature maps in a statistical interface but can also browse intron information in a graphical visualization interface through JBrowse. ViroBlast for sequence homology searches, Intron detection and sequence interception tools were also provided. PID contains annotated genes from 118 sequenced plants, 24,782,048 introns, 30,843,049 exons, and 414 visual maps. This tool will greatly accelerate research on the distribution, length characteristics, and functions of introns in plants. PID is accessible at http://biodb.sdau.edu.cn/PID/index.php.",2021-06-05 +26360909,VirtualMicroscopy: ultra-fast interactive microscopy of gigapixel/terapixel images over internet.,"As digital imaging technology advances, gigapixel or terapixel super resolution microscopic images become available. We have built a real time virtual microscopy technique for interactive analysis of super high resolution microscopic images over internet on standard desktops, laptops or mobile devices. The presented virtual microscopy technique is demonstrated to perform as fast as using a microscopy locally without any delay to assess gigapixel ultra high resolution image data through wired or wireless internet by a Tablet or a standard PC. More importantly, the presented technology enables analysis of super high resolution microscopic image across sites and time and allows multi-person analysis at the same time, which greatly speed up data analysis process and reduces miscommunication among scientists and doctors. A web site has been created for illustration purposes. (http://www-o.ntust.edu.tw/~cweiwang/VirtualMicroscopy).",2015-09-11 +32450240,Sensitivity and Safety of Electromagnetic Navigation Bronchoscopy for Lung Cancer Diagnosis: Systematic Review and Meta-analysis.,"

Background

Bronchoscopy is a useful tool for the diagnosis of lesions near central airways; however, the diagnostic accuracy of these procedures for peripheral pulmonary lesions (PPLs) is a matter of ongoing debate. In this setting, electromagnetic navigation bronchoscopy (ENB) is a technique used to navigate and obtain samples from these lesions. This systematic review and meta-analysis aims to explore the sensitivity of ENB in patients with PPLs suspected of lung cancer.

Research question

In patients with peripheral pulmonary lesion suspected of lung cancer, what is the sensitivity and safety of electromagnetic navigation bronchoscopy compared to surgery or longitudinal follow up?

Study design and methods

A comprehensive search of several databases was performed. Extracted data included sensitivity of ENB for malignancy, adequacy of the tissue sample, and complications. The study quality was assessed using the QUADAS-2 tool, and the combined data were meta-analyzed using a bivariate method model. A summary receiver operatic characteristic curve (sROC) was created. Finally, the quality of evidence was rated using the Grading of Recommendations Assessment, Development and Evaluation approach.

Results

Forty studies with a total of 3,342 participants were included in our analysis. ENB reported a pooled sensitivity of 77% (95% CI, 72%-82%; I2 = 80.6%) and a specificity of 100% (95% CI, 99%-100%; I2 = 0%) for malignancy. The sROC showed an area under the curve of 0.955 (P = .03). ENB achieved a sufficient sample for ancillary tests in 90.9% (95% CI, 84.8%-96.9%; I2 = 80.7%). Risk of pneumothorax was 2.0% (95% CI, 1.0-3.0; I2 = 45.2%). We found subgroup differences according to the risk of bias and the number of sampling techniques. Meta-regression showed an association between sensitivity and the mean distance of the sensor tip to the center of the nodule, the number of tissue sampling techniques, and the cancer prevalence in the study.

Interpretation

ENB is very safe with good sensitivity for diagnosing malignancy in patients with PPLs. The applicability of our findings is limited because most studies were done with the superDimension navigation system and heterogeneity was high.

Trial registry

PROSPERO; No.: CRD42019109449; URL: https://www.crd.york.ac.uk/prospero/.",2020-05-23 +34121800,Testing for overall and cluster convergence of housing rents using robust methodology: evidence from Polish provincial capitals.,"The aim of this paper is to test for overall and cluster convergence of housing rents across Polish provincial capitals and to identify drivers of convergence club formation. In order to achieve the goal of the study, several novel convergence tests were used, including the Kong et al. (J Econom 209:185-207, 2019. https://doi.org/10.1016/j.jeconom.2018.12.022) and Phillips and Sul (Econometrica 75:1771-1855, 2007. https://doi.org/10.1111/j.1468-0262.2007.00811.x) approaches. Moreover, club convergence analysis was carried out in four different configurations, varying in the technique of trend component extraction from the data. In particular, three well-known methods of time series decomposition were used, i.e. the Hodrick-Prescott, Butterworth and Christiano-Fitzgerald filters, as well as the most recent boosted Hodrick-Prescott filter. The results indicated that rental prices across the studied cities do not share a common path in the long run. It is possible, however, to identify convergence clubs where rents are moving towards a club-specific steady state. Detailed analysis of the structure of estimated clusters showed that data filtering using the boosted Hodrick-Prescott method leads to the most reliable allocation of cities to convergence clubs. Moreover, the estimation of logit models revealed that the likelihood of any two cities belonging to the same convergence club depends mainly on similar levels in terms of the unemployment rate, housing stock, city area, and the number of students. Finally, recommendations for local and national policy-makers concerning the development of the rental market have been formulated, particularly in the areas of urban land-use planning policy, housing legislation and public-private partnerships.",2021-06-06 +33166013,"Using Integrative Modeling Platform to compute, validate, and archive a model of a protein complex structure.","Biology is advanced by producing structural models of biological systems, such as protein complexes. Some systems are recalcitrant to traditional structure determination methods. In such cases, it may still be possible to produce useful models by integrative structure determination that depends on simultaneous use of multiple types of data. An ensemble of models that are sufficiently consistent with the data is produced by a structural sampling method guided by a data-dependent scoring function. The variation in the ensemble of models quantified the uncertainty of the structure, generally resulting from the uncertainty in the input information and actual structural heterogeneity in the samples used to produce the data. Here, we describe how to generate, assess, and interpret ensembles of integrative structural models using our open source Integrative Modeling Platform program (https://integrativemodeling.org).",2020-12-03 +32392342,"EnzymeMiner: automated mining of soluble enzymes with diverse structures, catalytic properties and stabilities.","Millions of protein sequences are being discovered at an incredible pace, representing an inexhaustible source of biocatalysts. Despite genomic databases growing exponentially, classical biochemical characterization techniques are time-demanding, cost-ineffective and low-throughput. Therefore, computational methods are being developed to explore the unmapped sequence space efficiently. Selection of putative enzymes for biochemical characterization based on rational and robust analysis of all available sequences remains an unsolved problem. To address this challenge, we have developed EnzymeMiner-a web server for automated screening and annotation of diverse family members that enables selection of hits for wet-lab experiments. EnzymeMiner prioritizes sequences that are more likely to preserve the catalytic activity and are heterologously expressible in a soluble form in Escherichia coli. The solubility prediction employs the in-house SoluProt predictor developed using machine learning. EnzymeMiner reduces the time devoted to data gathering, multi-step analysis, sequence prioritization and selection from days to hours. The successful use case for the haloalkane dehalogenase family is described in a comprehensive tutorial available on the EnzymeMiner web page. EnzymeMiner is a universal tool applicable to any enzyme family that provides an interactive and easy-to-use web interface freely available at https://loschmidt.chemi.muni.cz/enzymeminer/.",2020-07-01 +33505707,GRACy: A tool for analysing human cytomegalovirus sequence data.,"Modern DNA sequencing has instituted a new era in human cytomegalovirus (HCMV) genomics. A key development has been the ability to determine the genome sequences of HCMV strains directly from clinical material. This involves the application of complex and often non-standardized bioinformatics approaches to analysing data of variable quality in a process that requires substantial manual intervention. To relieve this bottleneck, we have developed GRACy (Genome Reconstruction and Annotation of Cytomegalovirus), an easy-to-use toolkit for analysing HCMV sequence data. GRACy automates and integrates modules for read filtering, genotyping, genome assembly, genome annotation, variant analysis, and data submission. These modules were tested extensively on simulated and experimental data and outperformed generic approaches. GRACy is written in Python and is embedded in a graphical user interface with all required dependencies installed by a single command. It runs on the Linux operating system and is designed to allow the future implementation of a cross-platform version. GRACy is distributed under a GPL 3.0 license and is freely available at https://bioinformatics.cvr.ac.uk/software/ with the manual and a test dataset.",2020-12-30 +32402084,MISCAST: MIssense variant to protein StruCture Analysis web SuiTe.,"Human genome sequencing efforts have greatly expanded, and a plethora of missense variants identified both in patients and in the general population is now publicly accessible. Interpretation of the molecular-level effect of missense variants, however, remains challenging and requires a particular investigation of amino acid substitutions in the context of protein structure and function. Answers to questions like 'Is a variant perturbing a site involved in key macromolecular interactions and/or cellular signaling?', or 'Is a variant changing an amino acid located at the protein core or part of a cluster of known pathogenic mutations in 3D?' are crucial. Motivated by these needs, we developed MISCAST (missense variant to protein structure analysis web suite; http://miscast.broadinstitute.org/). MISCAST is an interactive and user-friendly web server to visualize and analyze missense variants in protein sequence and structure space. Additionally, a comprehensive set of protein structural and functional features have been aggregated in MISCAST from multiple databases, and displayed on structures alongside the variants to provide users with the biological context of the variant location in an integrated platform. We further made the annotated data and protein structures readily downloadable from MISCAST to foster advanced offline analysis of missense variants by a wide biological community.",2020-07-01 +32673043,Power contours: Optimising sample size and precision in experimental psychology and human neuroscience.,"When designing experimental studies with human participants, experimenters must decide how many trials each participant will complete, as well as how many participants to test. Most discussion of statistical power (the ability of a study design to detect an effect) has focused on sample size, and assumed sufficient trials. Here we explore the influence of both factors on statistical power, represented as a 2-dimensional plot on which iso-power contours can be visualized. We demonstrate the conditions under which the number of trials is particularly important, that is, when the within-participant variance is large relative to the between-participants variance. We then derive power contour plots using existing data sets for 8 experimental paradigms and methodologies (including reaction times, sensory thresholds, fMRI, MEG, and EEG), and provide example code to calculate estimates of the within- and between-participants variance for each method. In all cases, the within-participant variance was larger than the between-participants variance, meaning that the number of trials has a meaningful influence on statistical power in commonly used paradigms. An online tool is provided (https://shiny.york.ac.uk/powercontours/) for generating power contours, from which the optimal combination of trials and participants can be calculated when designing future studies. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2020-07-16 +34158935,Global earth mineral inventory: A data legacy.,"Minerals contain important clues to understanding the complex geologic history of Earth and other planetary bodies. Therefore, geologists have been collecting mineral samples and compiling data about these samples for centuries. These data have been used to better understand the movement of continental plates, the oxidation of Earth's atmosphere and the water regime of ancient martian landscapes. Datasets found at 'RRUFF.info/Evolution' and 'mindat.org' have documented a wealth of mineral occurrences around the world. One of the main goals in geoinformatics has been to facilitate discovery by creating and merging datasets from various scientific fields and using statistical methods and visualization tools to inspire and test hypotheses applicable to modelling Earth's past environments. To help achieve this goal, we have compiled physical, chemical and geological properties of minerals and linked them to the above-mentioned mineral occurrence datasets. As a part of the Deep Time Data Infrastructure, funded by the W.M. Keck Foundation, with significant support from the Deep Carbon Observatory (DCO) and the A.P. Sloan Foundation, GEMI ('Global Earth Mineral Inventory') was developed from the need of researchers to have all of the required mineral data visible in a single portal, connected by a robust, yet easy to understand schema. Our data legacy integrates these resources into a digestible format for exploration and analysis and has allowed researchers to gain valuable insights from mineralogical data. GEMI can be considered a network, with every node representing some feature of the datasets, for example, a node can represent geological parameters like colour, hardness or lustre. Exploring subnetworks gives the researcher a specific view of the data required for the task at hand. GEMI is accessible through the DCO Data Portal (https://dx.deepcarbon.net/11121/6200-6954-6634-8243-CC). We describe our efforts in compiling GEMI, the Data Policies for usage and sharing, and the evaluation metrics for this data legacy.",2020-11-11 +34082700,"Cruxome: a powerful tool for annotating, interpreting and reporting genetic variants.","

Background

Next-generation sequencing (NGS) is an efficient tool used for identifying pathogenic variants that cause Mendelian disorders. However, the lack of bioinformatics training of researchers makes the interpretation of identified variants a challenge in terms of precision and efficiency. In addition, the non-standardized phenotypic description of human diseases also makes it difficult to establish an integrated analysis pathway for variant annotation and interpretation. Solutions to these bottlenecks are urgently needed.

Results

We develop a tool named ""Cruxome"" to automatically annotate and interpret single nucleotide variants (SNVs) and small insertions and deletions (InDels). Our approach greatly simplifies the current burdensome task of clinical geneticists and scientists to identify the causative pathogenic variants and build personal knowledge reference bases. The integrated architecture of Cruxome offers key advantages such as an interactive and user-friendly interface and the assimilation of electronic health records of the patient. By combining a natural language processing algorithm, Cruxome can efficiently process the clinical description of diseases to HPO standardized vocabularies. By using machine learning, in silico predictive algorithms, integrated multiple databases and supplementary tools, Cruxome can automatically process SNVs and InDels variants (trio-family or proband-only cases) and clinical diagnosis records, then annotate, score, identify and interpret pathogenic variants to finally generate a standardized clinical report following American College of Medical Genetics and Genomics/ Association for Molecular Pathology (ACMG/AMP) guidelines. Cruxome also provides supplementary tools to examine and visualize the genes or variations in historical cases, which can help to better understand the genetic basis of the disease.

Conclusions

Cruxome is an efficient tool for annotation and interpretation of variations and dramatically reduces the workload for clinical geneticists and researchers to interpret NGS results, simplifying their decision-making processes. We present an online version of Cruxome, which is freely available to academics and clinical researchers. The site is accessible at http://114.251.61.49:10024/cruxome/ .",2021-06-03 +29129553,MitoepigenomeKB a comprehensive resource for human mitochondrial epigenetic data.,"Epigenetic modifications in the mitochondrial genome has been an emerging area of interest in the recent years in the field of mitochondrial biology. The renewed interest in the area has been largely fueled by a number of reports in the recent years suggesting the presence of epigenetic modifications in human mitochondrial genome and their associations with exposure to environmental factors and human diseases and or traits. Nevertheless there has been no systematic effort to curate, organize this information to enable cross-comparison between studies and datasets. We compiled 62 datasets from 9 studies on the epigenetic modifications in human mitochondrial genome to create a comprehensive catalog. This catalog is available as a user friendly interface - mitoepigenomeKB, where the data could be searched, browsed or visualized. The resource is available at URL: http://clingen.igib.res.in/mitoepigenome/. We hope mitoepigenomeKB would emerge as a central resource for datasets on epigenetic modifications in human mitochondria and would serve as the starting point to understanding the biology of human mitochondrial epigenome.",2017-11-09 +34428401,Spatial transcriptional mapping of the human nephrogenic program.,"Congenital abnormalities of the kidney and urinary tract are among the most common birth defects, affecting 3% of newborns. The human kidney forms around a million nephrons from a pool of nephron progenitors over a 30-week period of development. To establish a framework for human nephrogenesis, we spatially resolved a stereotypical process by which equipotent nephron progenitors generate a nephron anlage, then applied data-driven approaches to construct three-dimensional protein maps on anatomical models of the nephrogenic program. Single-cell RNA sequencing identified progenitor states, which were spatially mapped to the nephron anatomy, enabling the generation of functional gene networks predicting interactions within and between nephron cell types. Network mining identified known developmental disease genes and predicted targets of interest. The spatially resolved nephrogenic program made available through the Human Nephrogenesis Atlas (https://sckidney.flatironinstitute.org/) will facilitate an understanding of kidney development and disease and enhance efforts to generate new kidney structures.",2021-08-01 +26432828,"MaizeGDB update: new tools, data and interface for the maize model organism database.","MaizeGDB is a highly curated, community-oriented database and informatics service to researchers focused on the crop plant and model organism Zea mays ssp. mays. Although some form of the maize community database has existed over the last 25 years, there have only been two major releases. In 1991, the original maize genetics database MaizeDB was created. In 2003, the combined contents of MaizeDB and the sequence data from ZmDB were made accessible as a single resource named MaizeGDB. Over the next decade, MaizeGDB became more sequence driven while still maintaining traditional maize genetics datasets. This enabled the project to meet the continued growing and evolving needs of the maize research community, yet the interface and underlying infrastructure remained unchanged. In 2015, the MaizeGDB team completed a multi-year effort to update the MaizeGDB resource by reorganizing existing data, upgrading hardware and infrastructure, creating new tools, incorporating new data types (including diversity data, expression data, gene models, and metabolic pathways), and developing and deploying a modern interface. In addition to coordinating a data resource, the MaizeGDB team coordinates activities and provides technical support to the maize research community. MaizeGDB is accessible online at http://www.maizegdb.org.",2015-10-01 +34754049,Three-dimensional semi-automated volumetric assessment of the pulp space of teeth following regenerative dental procedures.,"The volumetric change that occurs in the pulp space over time represents a critical measure when it comes to determining the secondary outcomes of regenerative endodontic procedures (REPs). However, to date, only a few studies have investigated the accuracy of the available domain-specialized medical imaging tools with regard to three-dimensional (3D) volumetric assessment. This study sought to compare the accuracy of two different artificial intelligence-based medical imaging programs namely OsiriX MD (v 9.0, Pixmeo SARL, Bernex Switzerland, https://www.osirix-viewer.com ) and 3D Slicer ( http://www.slicer.org ), in terms of estimating the volume of the pulp space following a REP. An Invitro assessment was performed to check the reliability and sensitivity of the two medical imaging programs in use. For the subsequent clinical application, pre- and post-procedure cone beam computed tomography scans of 35 immature permanent teeth with necrotic pulp and periradicular pathosis that had been treated with a cell-homing concept-based REP were processed using the two biomedical DICOM software programs (OsiriX MD and 3D Slicer). The volumetric changes in the teeth's pulp spaces were assessed using semi-automated techniques in both programs. The data were statistically analyzed using t-tests and paired t-tests (P = 0.05). The pulp space volumes measured using both programs revealed a statistically significant decrease in the pulp space volume following the REP (P < 0.05), with no significant difference being found between the two programs (P > 0.05). The mean decreases in the pulp space volumes measured using OsiriX MD and 3D Slicer were 25.06% ± 19.45% and 26.10% ± 18.90%, respectively. The open-source software (3D Slicer) was found to be as accurate as the commercially available software with regard to the volumetric assessment of the post-REP pulp space. This study was the first to demonstrate the step-by-step application of 3D Slicer, a user-friendly and easily accessible open-source multiplatform software program for the segmentation and volume estimation of the pulp spaces of teeth treated with REPs.",2021-11-09 +33750289,Y-LineageTracker: a high-throughput analysis framework for Y-chromosomal next-generation sequencing data.,"

Background

Y-chromosome DNA (Y-DNA) has been used for tracing paternal lineages and offers a clear path from an individual to a known, or likely, direct paternal ancestor. The advance of next-generation sequencing (NGS) technologies increasingly improves the resolution of the non-recombining region of the Y-chromosome (NRY). However, a lack of suitable computer tools prevents the use of NGS data from the Y-DNA studies.

Results

We developed Y-LineageTracker, a high-throughput analysis framework that not only utilizes state-of-the-art methodologies to automatically determine NRY haplogroups and identify microsatellite variants of Y-chromosome on a fine scale, but also optimizes comprehensive Y-DNA analysis methods for NGS data. Notably, Y-LineageTracker integrates the NRY haplogroup and Y-STR analysis modules with recognized strategies to robustly suggest an interpretation for paternal genetics and evolution. NRY haplogroup module mainly covers haplogroup classification, clustering analysis, phylogeny construction, and divergence time estimation of NRY haplogroups, and Y-STR module mainly includes Y-STR genotyping, statistical calculation, network analysis, and estimation of time to the most recent common ancestor (TMRCA) based on Y-STR haplotypes. Performance comparison indicated that Y-LineageTracker outperformed existing Y-DNA analysis tools for the high performance and satisfactory visualization effect.

Conclusions

Y-LineageTracker is an open-source and user-friendly command-line tool that provide multiple functions to efficiently analyze Y-DNA from NGS data at both Y-SNP and Y-STR level. Additionally, Y-LineageTracker supports various formats of input data and produces high-quality figures suitable for publication. Y-LineageTracker is coded with Python3 and supports Windows, Linux, and macOS platforms, and can be installed manually or via the Python Package Index (PyPI). The source code, examples, and manual of Y-LineageTracker are freely available at https://www.picb.ac.cn/PGG/resource.php or CodeOcean ( https://codeocean.com/capsule/7424381/tree ).",2021-03-09 +33866021,"Loss of Claudin-3 Impairs Hepatic Metabolism, Biliary Barrier Function, and Cell Proliferation in the Murine Liver.","

Background & aims

Tight junctions in the liver are essential to maintain the blood-biliary barrier, however, the functional contribution of individual tight junction proteins to barrier and metabolic homeostasis remains largely unexplored. Here, we describe the cell type-specific expression of tight junction genes in the murine liver, and explore the regulation and functional importance of the transmembrane protein claudin-3 in liver metabolism, barrier function, and cell proliferation.

Methods

The cell type-specific expression of hepatic tight junction genes is described using our mouse liver single-cell sequencing data set. Differential gene expression in Cldn3-/- and Cldn3+/+ livers was assessed in young and aged mice by RNA sequencing (RNA-seq), and hepatic tissue was analyzed for lipid content and bile acid composition. A surgical model of partial hepatectomy was used to induce liver cell proliferation.

Results

Claudin-3 is a highly expressed tight junction protein found in the liver and is expressed predominantly in hepatocytes and cholangiocytes. The histology of Cldn3-/- livers showed no overt phenotype, and the canalicular tight junctions appeared intact. Nevertheless, by RNA-seq we detected a down-regulation of metabolic pathways in the livers of Cldn3-/- young and aged mice, as well as a decrease in lipid content and a weakened biliary barrier for primary bile acids, such as taurocholic acid, taurochenodeoxycholic acid, and taurine-conjugated muricholic acid. Coinciding with defects in the biliary barrier and lower lipid metabolism, there was a diminished hepatocyte proliferative response in Cldn3-/- mice after partial hepatectomy.

Conclusions

Our data show that, in the liver, claudin-3 is necessary to maintain metabolic homeostasis, retention of bile acids, and optimal hepatocyte proliferation during liver regeneration. The RNA-seq data set can be accessed at: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE159914.",2021-04-15 +33104790,"PhycoCosm, a comparative algal genomics resource.","Algae are a diverse, polyphyletic group of photosynthetic eukaryotes spanning nearly all eukaryotic lineages of life and collectively responsible for ∼50% of photosynthesis on Earth. Sequenced algal genomes, critical to understanding their complex biology, are growing in number and require efficient tools for analysis. PhycoCosm (https://phycocosm.jgi.doe.gov) is an algal multi-omics portal, developed by the US Department of Energy Joint Genome Institute to support analysis and distribution of algal genome sequences and other 'omics' data. PhycoCosm provides integration of genome sequence and annotation for >100 algal genomes with available multi-omics data and interactive web-based tools to enable algal research in bioenergy and the environment, encouraging community engagement and data exchange, and fostering new sequencing projects that will further these research goals.",2021-01-01 +33618772,MAIP: a web service for predicting blood-stage malaria inhibitors.,"Malaria is a disease affecting hundreds of millions of people across the world, mainly in developing countries and especially in sub-Saharan Africa. It is the cause of hundreds of thousands of deaths each year and there is an ever-present need to identify and develop effective new therapies to tackle the disease and overcome increasing drug resistance. Here, we extend a previous study in which a number of partners collaborated to develop a consensus in silico model that can be used to identify novel molecules that may have antimalarial properties. The performance of machine learning methods generally improves with the number of data points available for training. One practical challenge in building large training sets is that the data are often proprietary and cannot be straightforwardly integrated. Here, this was addressed by sharing QSAR models, each built on a private data set. We describe the development of an open-source software platform for creating such models, a comprehensive evaluation of methods to create a single consensus model and a web platform called MAIP available at https://www.ebi.ac.uk/chembl/maip/ . MAIP is freely available for the wider community to make large-scale predictions of potential malaria inhibiting compounds. This project also highlights some of the practical challenges in reproducing published computational methods and the opportunities that open-source software can offer to the community.",2021-02-22 +31724725,oRNAment: a database of putative RNA binding protein target sites in the transcriptomes of model species.,"Protein-RNA interactions are essential for controlling most aspects of RNA metabolism, including synthesis, processing, trafficking, stability and degradation. In vitro selection methods, such as RNAcompete and RNA Bind-n-Seq, have defined the consensus target motifs of hundreds of RNA-binding proteins (RBPs). However, readily available information about the distribution features of these motifs across full transcriptomes was hitherto lacking. Here, we introduce oRNAment (o RNA motifs enrichment in transcriptomes), a database that catalogues the putative motif instances of 223 RBPs, encompassing 453 motifs, in a transcriptome-wide fashion. The database covers 525 718 complete coding and non-coding RNA species across the transcriptomes of human and four prominent model organisms: Caenorhabditis elegans, Danio rerio, Drosophila melanogaster and Mus musculus. The unique features of oRNAment include: (i) hosting of the most comprehensive mapping of RBP motif instances to date, with 421 133 612 putative binding sites described across five species; (ii) options for the user to filter the data according to a specific threshold; (iii) a user-friendly interface and efficient back-end allowing the rapid querying of the data through multiple angles (i.e. transcript, RBP, or sequence attributes) and (iv) generation of several interactive data visualization charts describing the results of user queries. oRNAment is freely available at http://rnabiology.ircm.qc.ca/oRNAment/.",2020-01-01 +31701128,DriverDBv3: a multi-omics database for cancer driver gene research.,"An integrative multi-omics database is needed urgently, because focusing only on analysis of one-dimensional data falls far short of providing an understanding of cancer. Previously, we presented DriverDB, a cancer driver gene database that applies published bioinformatics algorithms to identify driver genes/mutations. The updated DriverDBv3 database (http://ngs.ym.edu.tw/driverdb) is designed to interpret cancer omics' sophisticated information with concise data visualization. To offer diverse insights into molecular dysregulation/dysfunction events, we incorporated computational tools to define CNV and methylation drivers. Further, four new features, CNV, Methylation, Survival, and miRNA, allow users to explore the relations from two perspectives in the 'Cancer' and 'Gene' sections. The 'Survival' panel offers not only significant survival genes, but gene pairs synergistic effects determine. A fresh function, 'Survival Analysis' in 'Customized-analysis,' allows users to investigate the co-occurring events in user-defined gene(s) by mutation status or by expression in a specific patient group. Moreover, we redesigned the web interface and provided interactive figures to interpret cancer omics' sophisticated information, and also constructed a Summary panel in the 'Cancer' and 'Gene' sections to visualize the features on multi-omics levels concisely. DriverDBv3 seeks to improve the study of integrative cancer omics data by identifying driver genes and contributes to cancer biology.",2020-01-01 +31680168,TSEA-DB: a trait-tissue association map for human complex traits and diseases.,"Assessing the causal tissues of human traits and diseases is important for better interpreting trait-associated genetic variants, understanding disease etiology, and improving treatment strategies. Here, we present a reference database for trait-associated tissue specificity based on genome-wide association study (GWAS) results, named Tissue-Specific Enrichment Analysis DataBase (TSEA-DB, available at https://bioinfo.uth.edu/TSEADB/). We collected GWAS summary statistics data for a wide range of human traits and diseases followed by rigorous quality control. The current version of TSEA-DB includes 4423 data sets from the UK Biobank (UKBB) and 596 from other resources (GWAS Catalog and literature mining), totaling 5019 unique GWAS data sets and 15 770 trait-associated gene sets. TSEA-DB aims to provide reference tissue(s) enriched with the genes from GWAS. To this end, we systematically performed a tissue-specific enrichment analysis using our recently developed tool deTS and gene expression profiles from two reference tissue panels: the GTEx panel (47 tissues) and the ENCODE panel (44 tissues). The comprehensive trait-tissue association results can be easily accessed, searched, visualized, analyzed, and compared across the studies and traits through our web site. TSEA-DB represents one of the many timely and comprehensive approaches in exploring human trait-tissue association.",2020-01-01 +33733367,Genetic Interaction Network Interpretation: A Tidy Data Science Perspective.,"As practitioners, we aim to provide a consolidated introduction of tidy data science along with routine packages for relational data representation and interpretation, with the focus on analytics related to human genetic interactions. We describe three showcases (also made available at https://23verse.github.io/gini ), all done so via the R one-liner, in this chapter defined as a sequential pipeline of elementary functions chained together achieving a complex task. We guide the readers through step-by-step instructions on (case 1) performing network module analysis of genetic interactions, followed by visualization and interpretation; (case 2) implementing a practical strategy of how to identify and interpret tissue-specific genetic interactions; and (case 3) carrying out interaction-based tissue clustering and differential interaction analysis. All showcases demonstrate simplistic beauty and efficient nature of this analytics. We anticipate that mastering a dozen of one-liners to efficiently interpret genetic interactions is very timely now; opportunities for computational translational research are arising for data scientists to harness therapeutic potential of human genetic interaction data that are ever-increasingly available.",2021-01-01 +33096244,iODA: An integrated tool for analysis of cancer pathway consistency from heterogeneous multi-omics data.,"The latest advances in the next generation sequencing technology have greatly facilitated the extensive research of genomics and transcriptomics, thereby promoting the decoding of carcinogenesis with unprecedented resolution. Considering the contribution of analyzing high-throughput multi-omics data to the exploration of cancer molecular mechanisms, an integrated tool for heterogeneous multi-omics data analysis (iODA) is proposed for the systems-level interpretation of multi-omics data, i.e., transcriptomic profiles (mRNA or miRNA expression data) and protein-DNA interactions (ChIP-Seq data). Considering the data heterogeneity, iODA can compare six statistical algorithms in differential analysis for the selected sample data and assist users in choosing the globally optimal one for dysfunctional mRNA or miRNA identification. Since molecular signatures are more consistent at the pathway level than at the gene level, the tool is able to enrich the identified dysfunctional molecules onto the KEGG pathways and extracted the consistent items as key components for further pathogenesis investigation. Compared with other tools, iODA is multi-functional for the systematic analysis of different level of omics data, and its analytical power was demonstrated through case studies of single and cross-level prostate cancer omics data. iODA is open source under GNU GPL and can be downloaded from http://www.sysbio.org.cn/iODA.",2020-10-20 +34289011,MDContactCom: a tool to identify differences of protein molecular dynamics from two MD simulation trajectories in terms of interresidue contacts. ,"Comparing results from multiple MD simulations performed under different conditions is essential during the initial stages of analysis. We propose a tool called MD Contact Comparison (MDContactCom) that compares residue-residue contact fluctuations of two MD trajectories, quantifies the differences, identifies sites that exhibit large differences, and visualizes those sites on the protein structure. Using this method, it is possible to identify sites affected by varying simulation conditions and reveal the path of propagation of the effect even when differences between the 3D structure of the molecule and the fluctuation RMSF of each residue is unclear. MDContactCom can monitor differences in complex protein dynamics between two MD trajectories and identify candidate sites to be analyzed in more detail. As such, MDContactCom is a versatile software package for analyzing most MD simulations. MDContactCom is freely available for download on GitLab. The software is implemented in Python3. https://gitlab.com/chiemotono/mdcontactcom. Supplementary data are available at Bioinformatics online.",2021-07-21 +34156447,mPPI: a database extension to visualize structural interactome in a one-to-many manner. ,"Protein-protein interaction (PPI) databases with structural information are useful to investigate biological functions at both systematic and atomic levels. However, most existing PPI databases only curate binary interactome. From the perspective of the display and function of PPI, as well as the structural binding interface, the related database and resources are summarized. We developed a database extension, named mPPI, for PPI structural visualization. Comparing with the existing structural interactomes that curate resolved PPI conformation in pairs, mPPI can visualize target protein and its multiple interactors simultaneously, which facilitates multi-target drug discovery and structure prediction of protein macro-complexes. By employing a protein-protein docking algorithm, mPPI largely extends the coverage of structural interactome from experimentally resolved complexes. mPPI is designed to be a customizable and convenient plugin for PPI databases. It possesses wide potential applications for various PPI databases, and it has been used for a neurodegenerative disease-related PPI database as demonstration. Scripts and implementation guidelines of mPPI are documented at the database tool website. Database URL  http://bis.zju.edu.cn/mppi/.",2021-06-01 +34289555,[SERPINE2 promotes cellular migration and invasion in esophageal squamous cell carcinoma by activating β-catenin].,"Objective: To clarify the function and molecular mechanisms of serpin family E member 2 (SERPINE2) in cellular migration and invasion of esophageal squamous cell carcinoma (ESCC). Methods: The expression of SERPINE2 in ESCC was analyzed by using online databases TCGA (http: //gepia.cancer-pku.cn/detail.php and http: //ualcan.path.uab. edu/index.html). The expressions of SERPINE2 mRNA in normal human esophageal epithelial cell line NE2, human ESCC cell lines KYSE30 and KYSE150 were detected by quantitative reverse transcriptase-polymerase chain reaction (qRT-PCR). SERPINE2-konckdown or SERPINE2-overexpressed plasmid was transfected into KYSE30 cells, and the efficiencies of the knockdown and overexpression system were tested by qRT-PCR. The relationships of SERPINE2 and ESCC migration and invasion were determined by migration and invasion assays in vitro. The associations between SERPINE2 expression and β-catenin as well as its target genes including c-Myc, cyclin D1 and CD44 were analyzed by immunofluorescence, qRT-PCR and western blot, respectively. Results: The expressions of SERPINE2 were significantly upregulated in both esophageal cancer (ESCA) and ESCC tissues compared to normal tissues by analyzing 182 and 95 cases, respectively (P<0.01). SERPINE2 is highly expressed in both KYSE30 and KYSE150 cells (P<0.05). The number of migrating and invading cells in control group were (212.66±24.11)/field and (136.00±14.42)/field, while were (88.33±9.71)/field and (77.00±9.53)/field in SERPINE2-knockdown 1 group, and (66.00±8.00)/field and (45.66±3.78)/field in SERPINE2-knockdown 2 group, respectively, and the differences were dramatically significant compared with the control group (P<0.01). The number of migrating and invading cells in control group were (250.00±30.00)/field and (203.33±15.27)/field, while were (383.33±35.11)/field and (246.66±25.16)/field in SERPINE2-overpressed group, and the differences were strikingly significant compared with the control group (P<0.01). The protein expression of β-catenin was upregulated while phosphorylated β-catenin protein expression was downregulated in SERPINE2-overexpressed KYSE30 cells when compared to control cells.The transcription activity of β-catenin was significantly upregulated and the mRNA expressions of its target genes including c-Myc, cyclin D1 and CD44 were all increased. After treated with 25 μM iCRT14, the number of migrated cells in the control and SERPINE2-overpressed groups were (200.00±36.05)/field and (258.33±22.54)/field, and the number of invaded cells were (160.00±17.32)/field and (188.33±25.65)/field, respectively, the differences were dramatically significant compared with the group without iCRT14 treatment (P<0.01). Conclusion: SERPINE2 is significantly upregulated in ESCC cells and can promote cellular migration and invasion by activating β-catenin, which may provide a potential therapeutic target for patients with ESCC.",2021-06-01 +34277802,Evaluation of the reporting quality of clinical practice guidelines on gliomas using the RIGHT checklist.,"

Background

The reporting quality of clinical practice guidelines (CPGs) for gliomas has not yet been thoroughly assessed. The International Reporting Items for Practice Guidelines in Healthcare (RIGHT) statement developed in 2016 provides a reporting framework to improve the quality of CPGs. We aimed to estimate the reporting quality of glioma guidelines using the RIGHT checklist and investigate how the reporting quality differs by selected characteristics.

Methods

We systematically searched electronic databases, guideline databases, and medical society websites to retrieve CPGs on glioma published between 2018 and 2020. We calculated the compliance of the CPGs to individual items, domains and the RIGHT checklist overall. We performed stratified analyses by publication year, country of development, reporting of funding, and impact factor (IF) of the journal.

Results

Our search revealed 20 eligible guidelines. Mean overall adherence to the RIGHT statement was 54.6%. Eight CPGs reported more than 60% of the items, and five reported less than 50%. All guidelines adhered to the items 1a, 3, 7a, 13a, while no guidelines reported the items 17 or 18b (see http://www.right-statement.org/right-statement/checklist for a description of the items). Two of the seven domains, ""Basic information"" and ""Background"", had mean reporting rates above 60%. The ""Review and quality assurance"" domain had the lowest mean reporting rate, 12.5%. The reporting quality of guidelines published in 2020, guidelines developed in the United States, and guidelines that reported funding tended to be above average.

Conclusions

The reporting quality of CPGs on gliomas is low and needs improvement. Particular attention should be paid on reporting the external review and quality assurance process. The use of the RIGHT criteria should be encouraged to guide the development, reporting and evaluation of CPGs.",2021-06-01 +34124923,"""Should I keep studying? Consequences of a decision to stop learning in young and older adults."" Correction to Krogulska et al. (2021).","Reports an error in ""Should I keep studying? Consequences of a decision to stop learning in young and older adults"" by Aleksandra Krogulska, Karolina Golik, Krystian Barzykowski, Jennifer Cox, Agata Jakubiak and Elizabeth A. Maylor (Psychology and Aging, 2021[Mar], Vol 36[2], 158-171). In the article (https:// doi.org/10.1037/pag0000594), there was a typographical error in the grant number awarded to Aleksandra Krogulska. The correct grant number is UMO-2016/21/N/HS6/02953. The online version of this article has been corrected. (The following abstract of the original article appeared in record 2021-24143-001.) In situations of cognitive overload, the role of a metacognitive decision to stop learning is of utmost importance. We investigated how young and older adults decide to stop learning as a strategy for maximizing memory performance when they face to-be-learned material exceeding their memory capability. People may decide to stop learning for two main reasons: they experience a growing feeling of disfluency as a learning episode progresses and/or they perceive such a decision to be beneficial for future memory performance. In Experiments 1 and 2, participants studied lists of 50 words. The majority of young and older adults stopped learning in conditions where they were allowed to do so. This decision, counterintuitively, decreased the number of recalled words. Crucially, a similar number of young and older adults stopped the presentation of to-be-remembered material, and both age groups suffered comparable consequences in their memory performance. In Experiments 3a and 3b, participants read an experimental scenario and decided whether they would stop learning based on this description alone. People in different age groups predicted their metacognitive decisions similarly. However, participants' forecasted performance did not reflect the negative influence of these decisions. Regardless of their age, people tend to make a suboptimal decision to stop learning, unaware of its negative consequences. Together, our results suggest that young and older adults can exert metamemory control to similar degrees even though their decisions may not be beneficial for memory performance. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-06-01 +33242091,"CSDB_GT, a curated glycosyltransferase database with close-to-full coverage on three most studied nonanimal species.","We report the accomplishment of the first stage of the development of a novel manually curated database on glycosyltransferase (GT) activities, CSDB_GT. CSDB_GT (http://csdb.glycoscience.ru/gt.html) has been supplemented with GT activities from Saccharomyces cerevisiae. Now it provides the close-to-complete coverage on experimentally confirmed GTs from the three most studied model organisms from the three kingdoms: plantae (Arabidopsis thaliana, ca. 930 activities), bacteria (Escherichia coli, ca. 820 activities) and fungi (S. cerevisiae, ca. 270 activities).",2021-06-01 +34098669,[High-fat diet promotes the impact of periodontitis on gut microbiota and glucose metabolism].,"Objective: To study whether high-fat diet could aggravate the effect of periodontitis on gut microbiota and glucose metabolism. Methods: Twenty-four male SD rats were randomly and equally divided into four groups based on table of random numbers (n=6 in each group): control group, in which rats were given normal chow diet; periodontitis group, in which periodontitis was induced by ligating bilateral maxillary second molars with 5-0 silk thread; high-fat diet group, in which rats were given high-fat diet; high-fat diet+periodontitis group, in which rats were given high-fat diet and periodontitis was induced at the end of the 8th week. Fasting blood glucose and glucose tolerance were measured at the end of the 12th week. Then the rats were euthanized and the cecum content was collected. The microbial 16S rRNA gene sequencing was performed on the Illumina MiSeq platform. The taxonomy of the sequences was analyzed through RDP Classifier (http://rdp.cme.msu.edu/) against the SILVA (SSU123) 16S rRNA database. Pearson correlation analysis was performed to analyze the correlation between changes in gut microbiota and blood glucose. Results: After 4 weeks of periodontitis induction, the fasting blood glucose levels of the periodontitis group and the high-fat diet group were (4.93±0.28) and (5.25±0.24) mmol/L, respectively, which were significantly higher than that of the control group [(4.56±0.20) mmol/L] (P<0.05) with glucose intolerance. The fasting blood glucose level of high-fat diet+periodontitis group [(5.53±0.14) mmol/L] was significantly higher than that of periodontitis group and high-fat diet group, respectively (P<0.05), with the glucose tolerance curve higher than that of periodontitis group. The 16S rRNA gene analysis revealed that the Bacteroides/Firmicutes ratio in the periodontitis group is (0.37±0.23), which was significantly lower than that of the control group (0.68±0.05) (P<0.05). The relative abundance of Lachnospiraceae_NK4A136_group in the periodontitis group was (14.03±6.38)%, which was significantly lower than that of the control group [(28.21±4.82)%] (P<0.05). The relative abundance of Allobaculum [(4.27±2.67)%], Ruminococcaceae_UCG_005 [(3.70±0.90)%], Blautia [(0.63±0.45)%] in the periodontitis group were significantly higher than those of the control group [(0.60±0.72) %, (0.43±0.16) %, (0.13±0.13) %, respectively](P<0.05). Compared with periodontitis group, the relative abundance of Proteobacteria in high-fat diet+periodontitis group [(3.06±0.90)%] was significantly higher than that of the periodontitis group [(1.40±0.98)%] (P<0.05). The principal coordinate analysis and similarity analysis based on the Bray-Curtis distance showed that samples of the high-fat diet+periodontitis group clustered separately from the periodontitis group and the high-fat diet group. The results of correlation analysis showed that the abundance of Lachnospiraceae_NK4A136_group was negatively correlated with fasting blood glucose and glucose levels after loading for 60 and 120 minutes (r=-0.56, -0.50, -0.42, respectively) (P<0.05). The abundance of Allobaculum, [Eubacterium]_coprostanoligenes_group, Peptococcaceae_uncultured, [Ruminococcus]_torques_group, and several genera belonging to the Proteobacteria were positively correlated with glucose levels after loading for 120 minutes (P<0.05). Conclusions: Periodontitis might be closely related to impaired gut microbiota and glucose metabolism, and the effect could be aggravated by high-fat diet.",2021-06-01 +34059664,Comprehensive analysis of SSRs and database construction using all complete gene-coding sequences in major horticultural and representative plants.,"Simple sequence repeats (SSRs) are one of the most important genetic markers and widely exist in most species. Here, we identified 249,822 SSRs from 3,951,919 genes in 112 plants. Then, we conducted a comprehensive analysis of these SSRs and constructed a plant SSR database (PSSRD). Interestingly, more SSRs were found in lower plants than in higher plants, showing that lower plants needed to adapt to early extreme environments. Four specific enriched functional terms in the lower plant Chlamydomonas reinhardtii were detected when it was compared with seven other higher plants. In addition, Guanylate_cyc existed in more genes of lower plants than of higher plants. In our PSSRD, we constructed an interactive plotting function in the chart interface, and users can easily view the detailed information of SSRs. All SSR information, including sequences, primers, and annotations, can be downloaded from our database. Moreover, we developed Web SSR Finder and Batch SSR Finder tools, which can be easily used for identifying SSRs. Our database was developed using PHP, HTML, JavaScript, and MySQL, which are freely available at http://www.pssrd.info/ . We conducted an analysis of the Myb gene families and flowering genes as two applications of the PSSRD. Further analysis indicated that whole-genome duplication and whole-genome triplication played a major role in the expansion of the Myb gene families. These SSR markers in our database will greatly facilitate comparative genomics and functional genomics studies in the future.",2021-06-01 +33137190,Ensembl 2021.,"The Ensembl project (https://www.ensembl.org) annotates genomes and disseminates genomic data for vertebrate species. We create detailed and comprehensive annotation of gene structures, regulatory elements and variants, and enable comparative genomics by inferring the evolutionary history of genes and genomes. Our integrated genomic data are made available in a variety of ways, including genome browsers, search interfaces, specialist tools such as the Ensembl Variant Effect Predictor, download files and programmatic interfaces. Here, we present recent Ensembl developments including two new website portals. Ensembl Rapid Release (http://rapid.ensembl.org) is designed to provide core tools and services for genomes as soon as possible and has been deployed to support large biodiversity sequencing projects. Our SARS-CoV-2 genome browser (https://covid-19.ensembl.org) integrates our own annotation with publicly available genomic data from numerous sources to facilitate the use of genomics in the international scientific response to the COVID-19 pandemic. We also report on other updates to our annotation resources, tools and services. All Ensembl data and software are freely available without restriction.",2021-01-01 +34295551,Identification of differentially expressed proteins in the locoregional recurrent esophageal squamous cell carcinoma by quantitative proteomics.,"

Background

This study aimed to identify potential biomarkers associated with locoregional recurrence in patients with esophageal squamous cell carcinoma (ESCC) after radical resection.

Methods

We performed a quantitative proteomics analysis using isobaric tags for relative and absolute quantification (iTRAQ) with reversed-phase liquid chromatography-mass spectrometry (RPLC-MS) to identify differential expression proteins (DEPs) between a locoregional recurrence group and good prognosis group of ESCC after radical esophagectomy. The bioinformatics analysis was performed with ingenuity pathway analysis software (IPA) and Gene Ontology (GO) database using the software of MAS 3.0. Kaplan-Meier (KM) Plotter Online Tool (http://www.kmplot.com) was used to evaluate the relationship between the differential expression of proteins and survival in patients with ESCC.

Results

More than 400 proteins were quantitated of which 27 proteins had upregulated expression and 55 proteins had downregulated expression in the locoregional recurrence group compared to the good prognosis group. These 82 DEPs were associated with biological procession of cancer development including cellular movement, cellular assembly and organization, cellular function and maintenance, cellular growth and proliferation, cell death and survival, DNA replication recombination and repair, and so on. Of these DEPs, SPTAN1 and AGT proteins were identified to be associated with RFS in ESCC. SPTAN1 was positively associated with RFS and AGT was negatively associated with RFS. Expression of SPTAN1 tended to have favorable OS while expression of AGT tended to have poor OS.

Conclusions

Our results demonstrated that quantitative proteomics is an effective discovery tool to identify biomarkers for prognosis prediction in ESCC. However, it needs more studies with large populations of ESCC to validate these potential biomarkers.",2021-06-01 +31680159,EPD in 2020: enhanced data visualization and extension to ncRNA promoters.,"The Eukaryotic Promoter Database (EPD), available online at https://epd.epfl.ch, provides accurate transcription start site (TSS) information for promoters of 15 model organisms plus corresponding functional genomics data that can be viewed in a genome browser, queried or analyzed via web interfaces, or exported in standard formats (FASTA, BED, CSV) for subsequent analysis with other tools. Recent work has focused on the improvement of the EPD promoter viewers, which use the UCSC Genome Browser as visualization platform. Thousands of high-resolution tracks for CAGE, ChIP-seq and similar data have been generated and organized into public track hubs. Customized, reproducible promoter views, combining EPD-supplied tracks with native UCSC Genome Browser tracks, can be accessed from the organism summary pages or from individual promoter entries. Moreover, thanks to recent improvements and stabilization of ncRNA gene catalogs, we were able to release promoter collections for certain classes of ncRNAs from human and mouse. Furthermore, we developed automatic computational protocols to assign orphan TSS peaks to downstream genes based on paired-end (RAMPAGE) TSS mapping data, which enabled us to add nearly 9000 new entries to the human promoter collection. Since our last article in this journal, EPD was extended to five more model organisms: rhesus monkey, rat, dog, chicken and Plasmodium falciparum.",2020-01-01 +31511885,SNP2APA: a database for evaluating effects of genetic variants on alternative polyadenylation in human cancers.,"Alternative polyadenylation (APA) is an important post-transcriptional regulation that recognizes different polyadenylation signals (PASs), resulting in transcripts with different 3' untranslated regions, thereby influencing a series of biological processes and functions. Recent studies have revealed that some single nucleotide polymorphisms (SNPs) could contribute to tumorigenesis and development through dysregulating APA. However, the associations between SNPs and APA in human cancers remain largely unknown. Here, using genotype and APA data of 9082 samples from The Cancer Genome Atlas (TCGA) and The Cancer 3'UTR Altas (TC3A), we systematically identified SNPs affecting APA events across 32 cancer types and defined them as APA quantitative trait loci (apaQTLs). As a result, a total of 467 942 cis-apaQTLs and 30 721 trans-apaQTLs were identified. By integrating apaQTLs with survival and genome-wide association studies (GWAS) data, we further identified 2154 apaQTLs associated with patient survival time and 151 342 apaQTLs located in GWAS loci. In addition, we designed an online tool to predict the effects of SNPs on PASs by utilizing PAS motif prediction tool. Finally, we developed SNP2APA, a user-friendly and intuitive database (http://gong_lab.hzau.edu.cn/SNP2APA/) for data browsing, searching, and downloading. SNP2APA will significantly improve our understanding of genetic variants and APA in human cancers.",2020-01-01 +30903186,BrAPI-an application programming interface for plant breeding applications.,"

Motivation

Modern genomic breeding methods rely heavily on very large amounts of phenotyping and genotyping data, presenting new challenges in effective data management and integration. Recently, the size and complexity of datasets have increased significantly, with the result that data are often stored on multiple systems. As analyses of interest increasingly require aggregation of datasets from diverse sources, data exchange between disparate systems becomes a challenge.

Results

To facilitate interoperability among breeding applications, we present the public plant Breeding Application Programming Interface (BrAPI). BrAPI is a standardized web service API specification. The development of BrAPI is a collaborative, community-based initiative involving a growing global community of over a hundred participants representing several dozen institutions and companies. Development of such a standard is recognized as critical to a number of important large breeding system initiatives as a foundational technology. The focus of the first version of the API is on providing services for connecting systems and retrieving basic breeding data including germplasm, study, observation, and marker data. A number of BrAPI-enabled applications, termed BrAPPs, have been written, that take advantage of the emerging support of BrAPI by many databases.

Availability and implementation

More information on BrAPI, including links to the specification, test suites, BrAPPs, and sample implementations is available at https://brapi.org/. The BrAPI specification and the developer tools are provided as free and open source.",2019-10-01 +34499147,SplicingFactory-splicing diversity analysis for transcriptome data. ,"Alternative splicing contributes to the diversity of RNA found in biological samples. Current tools investigating patterns of alternative splicing check for coordinated changes in the expression or relative ratio of RNA isoforms where specific isoforms are up- or downregulated in a condition. However, the molecular process of splicing is stochastic and changes in RNA isoform diversity for a gene might arise between samples or conditions. A specific condition can be dominated by a single isoform, while multiple isoforms with similar expression levels can be present in a different condition. These changes might be the result of mutations, drug treatments or differences in the cellular or tissue environment. Here, we present a tool for the characterization and analysis of RNA isoform diversity using isoform level expression measurements. We developed an R package called SplicingFactory, to calculate various RNA isoform diversity metrics, and compare them across conditions. Using the package, we tested the effect of RNA-seq quantification tools, quantification uncertainty, gene expression levels, and isoform numbers on the isoform diversity calculation. We analyzed a set of CD34+ hematopoietic stem cells and myelodysplastic syndrome samples and found a set of genes whose isoform diversity change is associated with SF3B1 mutations. The SplicingFactory package is freely available under the GPL-3.0 license from Bioconductor for the Windows, MacOS and Linux operating systems (https://www.bioconductor.org/packages/release/bioc/html/SplicingFactory.html). Supplementary data are available at Bioinformatics online.",2021-09-09 +,Mapping the risk of evaporated milk spoilage in the Mediterranean region based on the effect of temperature conditions on Geobacillus stearothermophilus growth,"A predictive model for the effect of storage temperature on the growth of Geobacillus stearothermophilus was applied in order to assess the risk of evaporated milk spoilage in the markets of the Mediterranean region. The growth of G. stearothermophilus in evaporated milk was evaluated during a shelf life of one year based on historical temperature profiles (hourly) covering 23 Mediterranean capitals for five years over the period 2012–2016 obtained from the Weather Underground database (http://www.wunderground.com/). In total, 115 scenarios were tested simulating the distribution and storage conditions of evaporated milk in the Mediterranean region. The highest growth of G. stearothermophilus was predicted for Marrakech, Damascus and Cairo over the period 2012–2016 with mean values of 7.2, 7.4 and 5.5 log CFU/ml, respectively, followed by Tunis, Podgorica and Tripoli with mean growth of 2.8, 2.4 and 2.3 log CFU/ml, respectively. For the rest 17 capitals the mean growth of the spoiler was <1.5 log CFU/ml. The capitals Podgorica, Cairo, Tunis and Ankara showed the highest variability in the growth during the 5 years examined with standard deviation values for growth of 2.01, 1.79, 1.77 and 1.25 log CFU/ml, respectively. The predicted extent and the variability of growth during the shelf life were used to assess the risk of spoilage which was visualised in a geographical risk map. The growth model of G. stearothermophilus was also used to evaluate adjustments of the evaporated milk expiration date which can reduce the risk of spoilage. The quantitative data provided in the present study can assist the food industry to effectively evaluate the microbiological stability of these products throughout distribution and storage at a reduced cost (by reducing sampling quality control) and assess whether and under which conditions (e.g. expiration date) will be able to export a product to a country without spoilage problems. This decision support may lead to a significant benefit for both the competitiveness of the food industry and the consumer.",2018-09-01 +25717189,rSeqNP: a non-parametric approach for detecting differential expression and splicing from RNA-Seq data.,"

Unlabelled

High-throughput sequencing of transcriptomes (RNA-Seq) has become a powerful tool to study gene expression. Here we present an R package, rSeqNP, which implements a non-parametric approach to test for differential expression and splicing from RNA-Seq data. rSeqNP uses permutation tests to access statistical significance and can be applied to a variety of experimental designs. By combining information across isoforms, rSeqNP is able to detect more differentially expressed or spliced genes from RNA-Seq data.

Availability and implementation

The R package with its source code and documentation are freely available at http://www-personal.umich.edu/∼jianghui/rseqnp/.

Contact

jianghui@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-24 +34498538,Quercetin Antagonizes Esophagus Cancer by Modulating miR-1-3p/TAGLN2 Pathway-Dependent Growth and Metastasis.,"The progression of esophagus cancer (EC) is associated with the alterative expressions of multiple microRNAs (miRs). MiR-1-3p is reported to inhibit the development of EC by targeting TAGLN2. Quercetin (Que) is a natural compound capable of antagonizing esophagus carcinoma (EC). In the current study, the role of miR-1-3p/TAGLN2 axis in the anti-EC function of Que was explored. Human EC cell lines KYSE-510 and TE-7 were treated with Que. Then the effects of Que on the growth and metastasis of EC cells, and on the activity of miR-1-3p/TAGLN2 axis were detected. The interaction between Que and miR-1-3p axis was further assessed by inhibiting miR-1-3p level in EC cells. The results showed that the treatment of Que impaired the growth and induced cell apoptosis in EC cells. The invasive ability of EC cells was also suppressed by Que. At molecular level, the expression of miR-1-3p was induced, while the expression of TAGLN2 was suppressed by Que. Moreover, the anti-EC effects of Que were blocked by miR-1-3p inhibition, which was represented by the restored growth and invasion of EC cells. Collectively, the current study demonstrated that Que exerted inhibitory effects on EC cells by inducing miR-1-3p.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1972125.",2021-09-09 +34454307,HOXD8 inhibits the proliferation and migration of triple-negative breast cancer cells and induces apoptosis in them through regulation of AKT/mTOR pathway.,"HOXD8 (Homeobox D8) functions as an apoptotic inducer to suppress tumor progression. However, the role of HOXD8 in triple-negative breast cancer (TNBC) has not been fully understood. Firstly, HOXD8 was found to be reduced in TNBC tissues based on the TCGA samples through Ualcan (http://ualcan.path.uab.edu/analysis.html) prediction. Moreover, data from qRT-PCR and western blot confirmed the lower expression of HOXD8 in the TNBC tissues or cells than that in paracancerous tissues or human mammary epithelial cell line (MCF10A), respectively. Secondly, pcDNA-mediated over-expression of HOXD8 were conducted in TNBC cells, and the gain-of functional assays showed that over-expression of HOXD8 promoted TNBC cell progression with repressed cell apoptosis and induced proliferation, migration and invasion. Moreover, xenografted mouse model was constructed by injection of tumor cell line with stable over-expression of HOXD8 to assess the in vivo tumor growth, and the results revealed that over-expression of HOXD8 inhibited tumor growth. Lastly, our results showed that AKT and mTOR phosphorylation were repressed by HOXD8 over-expression in TNBC cells. In conclusion, HOXD8 functioned as an apoptotic inducer to suppress TNBC cell growth and progression by inhibition of AKT/mTOR pathway.",2021-08-25 +34431435,Combinatorial Effect of Temozolomide and Naringenin in Human Glioblastoma Multiforme Cell Lines.,"Glioblastoma multiforme (GBM) is a grade IV, lethal, and the most common type of brain tumor. GBM can acquire resistance to temozolomide (TMZ) recommended for its treatment. Naringenin (NAG), a flavonoid generally found in grapefruit, has antioxidant, anti-proliferative, and anti-inflammatory properties. It has been reported that phytochemicals can reduce resistance and improve the efficacy of a chemo-resistant drug. The combinatorial effect of TMZ and NAG on cell proliferation was evaluated using 3-4,5-dimethylthiazol-2-yl-2,5-diphenyltetrazolium bromide (MTT) assay, and the apoptosis in the U87MG and LN229 GBM cells were evaluated by change in fluorescence intensity. The effect of NAG and TMZ on anchorage-independent single-cell colony formation and cell migration was investigated. NAG and TMZ demonstrated enhanced cytotoxic effects on U87MG and LN229 cell lines. The combination index value being less than one indicated the synergistic action of the two drugs in restricting the growth of the cells. The NAG and TMZ together resulted in higher fluorescence intensity as compared to the alone drug. Further, the study showed a marked reduction in the migration of the cells and the formation of a single cell colony.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1952438.",2021-08-25 +31853471,Panel data to investigate pricing behavior in the Spanish retail fuel market.,"The data described in this article were collected daily over the period 18 August 2014 to 15 June 2015 from the website of the Spanish Ministry of Industry, Energy and Tourism http://geoportalgasolineras.es. The database includes information on almost all gas stations located in Spain that sell to the public. For each gas station we have information of daily diesel prices (both posted price and net of taxes), brands and locations (latitude and longitude) and the Brent price. We also share a Stata program file to identify the nearest competitors of each gas station and their brand. The program also computes the distance to the nearest refinery and its brand. The data base can be used for analyzing firms pricing behavior focusing, for example, on asymmetric pricing, cartels or vertical integration, among other topics. This data base is used in the paper [2] ""Effects of antitrust prosecution on retail fuel prices"" that analyze the impact on prices of an antitrust sanction imposed to several brands on February 2015.",2019-11-27 +34056737,Comprehensive search filters for retrieving publications on nonhuman primates for literature reviews (filterNHP).,"Nonhuman primates (NHPs) are widely studied across many scientific disciplines using a variety of techniques in diverse environments. Due to the wide scope of NHP research, substantial overlap in research topics and questions can occur, whose resulting scientific evidence is synthesized by literature reviews. Identifying all relevant research on a particular topic involving NHPs can be difficult and time consuming. By adopting objective search development techniques from systematic reviews, we developed search filters to detect all scientific publications involving NHPs in PubMed, PsycINFO (via EBSCOhost), and Web of Science. We compared the performance of our comprehensive NHP search filters to search strings typical of a novice database user (i.e., NHP simple search strings) and validated their sensitivity by combining these searches with a topic search of cortisol related studies. For all comparisons, our comprehensive NHP search filters retrieved considerably more scientific publications than the NHP simple search strings. Importantly, our comprehensive NHP search filters are easy to use (text can be copied and pasted into the database search engine) and detect the most recent publications that have yet to be indexed by the bibliographic databases queried. Additionally, we developed filterNHP, an R package and web-based application (https://filterNHP.dpz.eu), for researchers interested in literature searches involving a taxonomic sub-group of NHPs. filterNHP alleviates time necessary for adapting our comprehensive NHP search filters for a particular NHP sub-group by automatizing the creation of these search filters. Altogether, our comprehensive NHP search filters and those for taxonomic sub-groups generated by filterNHP will enable swift and easy retrieval of the available scientific literature involving NHPs, and thereby help enhance the quality of new NHP literature reviews that guide future scientific research (new experiments) and public policy (e.g., on welfare and conservation).",2021-05-31 +,"First Detection of Wheat streak mosaic virus in Two Perennial Weed Species, Agropyron cristatum and Hordeum jubatum subsp. intermedium, in Canada","Wheat streak mosaic virus (WSMV) causes wheat streak mosaic disease, a potentially devastating infection of wheat with a worldwide distribution. WSMV is a positive-sense, single-stranded, monopartite RNA virus in the family Potyviridae. WSMV is vectored by the wheat curl mite (Aceria tosichella Keifer), and seed-transmission has been reported at a very low rate (Jones et al. 2005). This virus infects all varieties of wheat (Triticum aestivum L.), and most isolates of WSMV can infect barley (Hordeum vulgare L.), oats (Avena sativa L.), some varieties of maize (Zea mays L.), millet (Panicum, Setaria, and Echinochloa L. spp.), and a number of grassy weed species (Singh et al. 2018). WSMV is known for causing damage to wheat in western Canada. The most recent outbreak of WSMV in western Canada occurred in southern Alberta and Saskatchewan in 2016 to 2017 (Harding et al. 2017), with some fields showing total crop failure. Typical symptoms of infection are stunting, pale-green to yellow streaking in a mosaic pattern, and fewer and smaller heads. Severe infection can lead to shriveled grains and plant death. Both the wheat curl mite and WSMV can survive the winter on fall-seeded winter wheat, volunteer wheat, and some perennial grasses. Control of living hosts such as volunteer wheat plants and grassy weed hosts at least 2 weeks before planting the new crop of wheat is recommended for managing the diseases (Watkins et al. 1989). Knowledge of the host status of grassy weed species is important for making evidence-based management decisions. More than 40 species of grassy weeds have been reported as host of WSMV (Singh et al. 2018). During the 2017 WSMV survey in Alberta, plants belonging to two species of commonly occurring perennial weeds, crested wheatgrass (Agropyron cristatum [L.] Gaertn.) and foxtail barley (Hordeum jubatum subsp. intermedium Bowden), were sampled from the edges of a WSMV-positive wheat field. Morphological identities of both weed species were confirmed using description and keys available in the Flora of North America North of Mexico (Barkworth 2007; Bothmer et al. 2007). Weed samples were tested for WSMV by polymerase chain reaction targeting amplification of 144-bp NIb-specific fragment using primers WS5-7750 (5′-CTTATCAATGCCGACACAAAGGA-3′) and WS3-7895 (5′-GCTTCATGAATGTGTGTGACATGTA-3′) (Schubert et al. 2015). Out of four crested wheatgrass plants and two foxtail barley plants, one plant of each weed species was found to be infected with WSMV. As a further confirmation, whole coat protein (CP) gene was amplified from the positive samples using primers WSMV-CP-AM-F2 (5′-CTGGGACCCGAACGGATTTAG-3′) and WSMV-CP-AM-R (5′-GCTCACGCAAGAGCGTTTAC-3′) and cloned in pMiniT vector (New England Biolabs, Ipswich, MA). Cloned inserts were sequenced at The Centre for Applied Genomics (Toronto, ON, Canada). BLASTN analysis of the obtained CP gene sequences showed ≥98% sequence identity to CP gene sequences of WSMV isolates CK93 (AF511598), WO93 (AF511644), and KY0083SV (AF511624) present in NCBI nucleotide database. A. cristatum has been previously reported to be immune to WSMV (Slykhuis 1952). More recent surveys of grassy weeds either did not detect (Ito et al. 2012) or rarely detected WSMV in A. cristatum (Brey 1998). This is the first report of WSMV detection in A. cristatum in Canada. H. jubatum has been listed as nonhost to WSMV (http://www.dpvweb.net/dpv/showdpv.php?dpvno=393). To the best of our knowledge, this is the first report of detection of WSMV from H. jubatum subsp. intermedium. Further studies are needed to evaluate the role of these weed species in WSMV epidemiology under Canadian growing conditions.",2019-06-01 +34293476,CoVrimer: A tool for aligning SARS-CoV-2 primer sequences and selection of conserved/degenerate primers.,"As mutations in SARS-CoV-2 virus accumulate rapidly, novel primers that amplify this virus sensitively and specifically are in demand. We have developed a webserver named CoVrimer by which users can search for and align existing or newly designed conserved/degenerate primer pair sequences against the viral genome and assess the mutation load of both primers and amplicons. CoVrimer uses mutation data obtained from an online platform established by NGDC-CNCB (12 May 2021) to identify genomic regions, either conserved or with low levels of mutations, from which potential primer pairs are designed and provided to the user for filtering based on generalized and SARS-CoV-2 specific parameters. Alignments of primers and probes can be visualized with respect to the reference genome, indicating variant details and the level of conservation. Consequently, CoVrimer is likely to help researchers with the challenges posed by viral evolution and is freely available at http://konulabapps.bilkent.edu.tr:3838/CoVrimer/.",2021-07-20 +31972373,MMPdb and MitoPredictor: Tools for facilitating comparative analysis of animal mitochondrial proteomes.,"Data on experimentally-characterized animal mitochondrial proteomes (mt-proteomes) are limited to a few model organisms and are scattered across multiple databases, impeding a comparative analysis. We developed two resources to address these problems. First, we re-analyzed proteomic data from six species with experimentally characterized mt-proteomes: animals (Homo sapiens, Mus musculus, Caenorhabditis elegans, and Drosophila melanogaster), and outgroups (Acanthamoeba castellanii and Saccharomyces cerevisiae) and created the Metazoan Mitochondrial Proteome Database (MMPdb) to host the results. Second, we developed a novel pipeline, ""MitoPredictor"" that uses a Random Forest classifier to infer mitochondrial localization of proteins based on orthology, mitochondrial targeting signal prediction, and protein domain analyses. Both tools generate an R Shiny applet that can be used to visualize and interact with the results and can be used on a personal computer. MMPdb is also available online at https://mmpdb.eeob.iastate.edu/.",2020-01-20 +33704425,DCI: Learning Causal Differences between Gene Regulatory Networks. ,"Designing interventions to control gene regulation necessitates modeling a gene regulatory network by a causal graph. Currently, large-scale expression datasets from different conditions, cell types, disease states and developmental time points are being collected. However, application of classical causal inference algorithms to infer gene regulatory networks based on such data is still challenging, requiring high sample sizes and computational resources. Here, we describe an algorithm that efficiently learns the differences in gene regulatory mechanisms between different conditions. Our difference causal inference (DCI) algorithm infers changes (i.e., edges that appeared, disappeared or changed weight) between two causal graphs given gene expression data from the two conditions. This algorithm is efficient in its use of samples and computation since it infers the differences between causal graphs directly without estimating each possibly large causal graph separately. We provide a user-friendly Python implementation of DCI and also enable the user to learn the most robust difference causal graph across different tuning parameters via stability selection. Finally, we show how to apply DCI to single-cell RNA-seq data from different conditions and cell states, and we also validate our algorithm by predicting the effects of interventions. Python package freely available at http://uhlerlab.github.io/causaldag/dci. Supplementary information is available at Bioinformatics online.",2021-03-11 +34899577,"A National Representative, Cross-Sectional Study by the Hellenic Academy of NeuroImmunology (HEL.A.NI.) on COVID-19 and Multiple Sclerosis: Overall Impact and Willingness Toward Vaccination.","Background: In the context of the coronavirus disease 2019 (COVID-19) pandemic, the constant needs of people with multiple sclerosis (PwMS) and their caregivers were urgently highlighted. Aim: The present study aims to capture the effects of the COVID-19 pandemic in several aspects of the quality of life of PwMS, in perception and behavior to COVID-19 and multiple sclerosis (MS), as well as concerning healthcare, working conditions, and the willingness toward COVID-19 vaccination. Methods: This study is an initiative of the Hellenic Academy of Neuroimmunology (HEL.A.NI.) and it has been included in the MS Data Alliance (MSDA) Catalog, which can be accessed after creating an account on https://msda.emif-catalogue.eu/login. Two online questionnaires were administered: (i) impact of the COVID-19 pandemic on the quality of life, behavior, and healthcare of PwMS (Questionnaire A) and (ii) vaccination against COVID-19 (Questionnaire B). People with MS were invited to participate by the Hellenic Federation of Persons with Multiple Sclerosis (HFoPwMS). Results: Three-hundred-ninety PwMS responded to Questionnaire A, whereas 176 PwMS provided answers for Questionnaire B. Older age, longer disease duration, and higher MS-related disability were associated with the increased perceived sensitivity toward severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) infection, as well as the increased perceived severity of COVID-19 upon potential infection. A significant proportion of PwMS experienced restricted access to MS-related health professionals, disease-modifying therapy (DMT) prescription, and/or to MS-related laboratory examination due to the pandemic. Subgroups of PwMS reported exacerbated symptoms (i.e., chronic MS-related symptoms, fatigue and/or worsening of pre-existing fatigue, and sexual dysfunction and or/worsening of pre-existing sexual dysfunction). Overall, the majority of the participants reported either a strong willingness to get vaccinated against COVID-19 or a likeliness to undergo vaccination. Being aware of the HEL.A.NI. recommendations regarding COVID-19 vaccination for PwMS were reported to increase the willingness of the participants to receive the vaccine. Conclusions: Our results highlight the necessity of scientific and patient organizations in taking joint action to increase awareness on health-related issues during the pandemic and to provide accurate and up-to-date guidance for PwMS. Online information and communications technology (ICT) tools for polling public belief and behavior may prove valuable as means of retaining active routes of communication between stakeholders.",2021-11-25 +33100615,Health Heatmap of India: An Open Data Platform.,"Health Heatmap of India is an open data platform built for bringing together data from diverse sources and facilitating visualization, analysis, and insight building from such data. In this paper, we describe the context and need for such an open data platform and describe the technical aspects of building it. The beta site of the portal is available at https://healthheatmapindia.org.",2020-10-18 +,"10.4 MAPPING NEURO-BEHAVIORAL RELATIONSHIPS IN DIMENSIONAL GEOMETRIC EMBEDDING (N-BRIDGE) VIA PHARMACOLOGY, COMPUTATION AND CLINICAL NEUROIMAGING: UNIFYING CATEGORIES AND DIMENSIONS ALONG THE PSYCHOSIS SPECTRUM","Abstract

Background

A key challenge in the field of neuropsychiatry lies in matching patients with effective treatments. Most studies in psychiatry operate under the canonical assumption that categorical diagnostic clinical grouping and/or pre-existing clinical assessments are the ‘gold standard’ for describing behavioral - and therefore neural - variation in patients. Attempts to robustly characterize the neural substrates of these predefined variables have yielded limited success, suggesting an inadequate mapping to neurobiologically meaningful variation. Notably, a great deal of heterogeneity exists even within groups of patients with the same categorical diagnosis. Thus, understanding the mapping between specific behaviors and clinically-meaningful variation in neural properties is critical to develop and ultimately administer effective individualized neurobehavioral treatments.

Methods

Here, we describe a multivariate neuro-behavioral framework under which behavioral variation can be mapped to features of specific neural systems in a data-driven way. We leverage neural (fMRI-derived) and behavioral data from 436 psychosis-spectrum patients across genders that were publicly available via the NIMH Data Archive as part of the Bipolar & Schizophrenia Consortium for Parsing Intermediate Phenotypes study (https://ndar.nih.gov/edit_collection.html?id=2274). We relate these findings to effects from two pharmacological neuroimaging experiments manipulating the NMDA glutamate receptor via ketamine (N=40, both genders) and the 5-HT receptor via LSD (N=24, both genders).

Results

We first identify dimensions of maximal behavioral variation in patients by performing a principal component analysis across all behavioral measures. Importantly, these dimensions are not parallel to traditional clinical symptom scales derived from pre-existing clinical instruments used in psychiatry, and do not reflect conventional categorical diagnostic boundaries. We then demonstrate that variation along our identified behavioral dimensions relates to variation in specific neural systems, using a data-driven measure of functional connectivity (global brain connectivity) (p<.05 whole-brain corrected). Critically, these robust neuro-behavioral relationships were not observed using either traditional diagnostic groups or a priori clinical scales. We also demonstrate the flexibility to embed both categorical and continuous/dimensional features within the same generalized multivariate geometry. We further show that this framework can inform the identification of pharmacological targets for developing drugs for specific symptom profiles and may provide an assisted selection of behavioral measures that precisely pinpoint variation in a specific neural circuit at the individual subject-level in relation to ketamine and LSD effects.

Conclusions

Characterizing how and which specific sets of symptoms map to neural circuitry is a key step towards developing targeted and effective treatments for psychiatric disorders. We propose the Neuro-Behavioral Relationships In Dimensional Geometric Embedding (N-BRIDGE) framework as a key step towards unified mapping between the geometry of data-driven behavioral variation and the geometry of data-driven neural variation, thus integrating both categories and continuous dimensions in psychiatry.",2019-04-01 +32754757,CerealsDB-new tools for the analysis of the wheat genome: update 2020. ,"CerealsDB (www.cerealsdb.uk.net) is an online repository of mainly hexaploid wheat (Triticum aestivum) single nucleotide polymorphisms (SNPs) and genotyping data. The CerealsDB website has been designed to enable wheat breeders and scientists to select the appropriate markers for research breeding tasks, such as marker-assisted selection. We report a large update of genotyping information for over 6000 wheat accessions and describe new webtools for exploring and visualizing the data. We also describe a new database of quantitative trait loci that links phenotypic traits to CerealsDB SNP markers and allelic scores for each of those markers. CerealsDB is an open-access website that hosts information on wheat SNPs considered useful for both plant breeders and research scientists. The latest CerealsDB database is available at https://www.cerealsdb.uk.net/cerealgenomics/CerealsDB/indexNEW.php.",2020-01-01 +32860142,Early Prediction of Post-hepatectomy Liver Failure in Patients Undergoing Major Hepatectomy Using a PHLF Prognostic Nomogram.,"

Background

Liver resection (LR) is the main modality of treatment for hepatocellular carcinoma (HCC) and colorectal liver metastasis (CRLM). Post-hepatectomy liver failure (PHLF) remains the most dreaded complication. We aim to create a prognostic score for early risk stratification of patients undergoing LR.

Methodology

Clinical and operative data of 472 patients between 2000 and 2016 with HCC or CRLM undergoing major hepatectomy were extracted and analysed from a prospectively maintained database. PHLF was defined using the 50-50 criteria.

Results

Liver cirrhosis and fatty liver were histologically confirmed in 35.6% and 53% of patients. 4.7% (n = 22) of patients had PHLF. A 90-day mortality was 5.1% (n = 24). Pre-operative albumin-bilirubin score (p = 0.0385), prothrombin time (p < 0.0001) and the natural logarithm of the ratio of post-operative day 1 to pre-operative serum bilirubin (SB) (ln(POD1Bil/pre-opBil); p < 0.0001) were significantly independent predictors of PHLF. The PHLF prognostic nomogram was developed using these factors with receiver operating curve showing area under curve of 0.88. Excellent sensitivity (94.7%) and specificity (95.7%) for the prediction of PHLF (50-50 criteria) were achieved at cut-offs of 9 and 11 points on this model. This score was also predictive of PHLF according to PeakBil > 7 and International Study Group for Liver Surgery criteria, intensive care unit admissions, length of stay, all complications, major complications, re-admissions and mortality (p < 0.05).

Conclusions

The PHLF nomogram ( https://tinyurl.com/SGH-PHLF-Risk-Calculator ) can serve as a useful tool for early identification of patients at high risk of PHLF before the 'point of no return'. This allows enforcement of closer monitoring, timely intervention and mitigation of adverse outcomes.",2020-08-28 +34007002,Severity modeling of propionic acidemia using clinical and laboratory biomarkers.,"

Purpose

To conduct a proof-of-principle study to identify subtypes of propionic acidemia (PA) and associated biomarkers.

Methods

Data from a clinically diverse PA patient population ( https://clinicaltrials.gov/ct2/show/NCT02890342 ) were used to train and test machine learning models, identify PA-relevant biomarkers, and perform validation analysis using data from liver-transplanted participants. k-Means clustering was used to test for the existence of PA subtypes. Expert knowledge was used to define PA subtypes (mild and severe). Given expert classification, supervised machine learning (support vector machine with a polynomial kernel, svmPoly) performed dimensional reduction to define relevant features of each PA subtype.

Results

Forty participants enrolled in the study; five underwent liver transplant. Analysis with k-means clustering indicated that several PA subtypes may exist on the biochemical continuum. The conventional PA biomarkers, plasma total 2-methylctirate and propionylcarnitine, were not statistically significantly different between nontransplanted and transplanted participants motivating us to search for other biomarkers. Unbiased dimensional reduction using svmPoly revealed that plasma transthyretin, alanine:serine ratio, GDF15, FGF21, and in vivo 1-13C-propionate oxidation, play roles in defining PA subtypes.

Conclusion

Support vector machine prioritized biomarkers that helped classify propionic acidemia patients according to severity subtypes, with important ramifications for future clinical trials and management of PA.",2021-05-18 +29618898,An Integrated Molecular Database on Indian Insects.,"MOlecular Database on Indian Insects (MODII) is an online database linking several databases like Insect Pest Info, Insect Barcode Information System (IBIn), Insect Whole Genome sequence, Other Genomic Resources of National Bureau of Agricultural Insect Resources (NBAIR), Whole Genome sequencing of Honey bee viruses, Insecticide resistance gene database and Genomic tools. This database was developed with a holistic approach for collecting information about phenomic and genomic information of agriculturally important insects. This insect resource database is available online for free at http://cib.res.in.

Availability

http://cib.res.in/.",2018-02-28 +30355619,The NCI Transcriptional Pharmacodynamics Workbench: A Tool to Examine Dynamic Expression Profiling of Therapeutic Response in the NCI-60 Cell Line Panel.,": The intracellular effects and overall efficacies of anticancer therapies can vary significantly by tumor type. To identify patterns of drug-induced gene modulation that occur in different cancer cell types, we measured gene-expression changes across the NCI-60 cell line panel after exposure to 15 anticancer agents. The results were integrated into a combined database and set of interactive analysis tools, designated the NCI Transcriptional Pharmacodynamics Workbench (NCI TPW), that allows exploration of gene-expression modulation by molecular pathway, drug target, and association with drug sensitivity. We identified common transcriptional responses across agents and cell types and uncovered gene-expression changes associated with drug sensitivity. We also demonstrated the value of this tool for investigating clinically relevant molecular hypotheses and identifying candidate biomarkers of drug activity. The NCI TPW, publicly available at https://tpwb.nci.nih.gov, provides a comprehensive resource to facilitate understanding of tumor cell characteristics that define sensitivity to commonly used anticancer drugs. SIGNIFICANCE: The NCI Transcriptional Pharmacodynamics Workbench represents the most extensive compilation to date of directly measured longitudinal transcriptional responses to anticancer agents across a thoroughly characterized ensemble of cancer cell lines.",2018-10-24 +34533009,[The Russian registry of chronic hypoparathyroidism and clinical decision support system integration].,"According to available research, chronic hypoparathyroidism is a relatively rare disease characterized by low serum calcium levels and the absence or deficiency of parathyroid hormone. The chronic course of the disease is associated with the multicomponent medical therapy, careful dynamic monitoring to reduce the risks of various complications in different organs and systems as well as disability and mortality.The Russian registry of patients with chronic postsurgical and nonsurgical hypoparathyroidism has started its work in 2020, based on data of the Endocrinology Research Centre. The main goals of the Registry are the assessment of the actual prevalence, incidence of hypoparathyroidism, the key epidemiological characteristics, the analysis of the clinical features and medical therapy of chronic hypoparathyroidism in Russian Federation.This article covers all objectives of this project, the methodology for maintaining the registry of chronic postsurgical and nonsurgical hypoparathyroidism, the analytical possibilities of its use, including the integration of a decision support system designed to help specialists in real clinical practice follow the algorithms for diagnosis and treatment of the disease, approved by clinical guidelines.The registry of chronic postsurgical and nonsurgical hypoparathyroidism is located on a single platform for the registers of endocrinopathies, regulated by the Endocrinology Research Centre (http://gipopt.clin-reg.ru/).",2021-08-06 +34330087,ARTS: A novel In-vivo classifier of arteriolosclerosis for the older adult brain.,"Brain arteriolosclerosis, one of the main pathologies of cerebral small vessel disease, is common in older adults and has been linked to lower cognitive and motor function and higher odds of dementia. In spite of its frequency and associated morbidity, arteriolosclerosis can only be diagnosed at autopsy. Therefore, the purpose of this work was to develop an in-vivo classifier of arteriolosclerosis based on brain MRI. First, an ex-vivo classifier of arteriolosclerosis was developed based on features related to white matter hyperintensities, diffusion anisotropy and demographics by applying machine learning to ex-vivo MRI and pathology data from 119 participants of the Rush Memory and Aging Project (MAP) and Religious Orders Study (ROS), two longitudinal cohort studies of aging that recruit non-demented older adults. The ex-vivo classifier showed good performance in predicting the presence of arteriolosclerosis, with an average area under the receiver operating characteristic curve AUC = 0.78. The ex-vivo classifier was then translated to in-vivo based on available in-vivo and ex-vivo MRI data on the same participants. The in-vivo classifier was named ARTS (short for ARTerioloSclerosis), is fully automated, and provides a score linked to the likelihood a person suffers from arteriolosclerosis. The performance of ARTS in predicting the presence of arteriolosclerosis in-vivo was tested in a separate, 91% dementia-free group of 79 MAP/ROS participants and exhibited an AUC = 0.79 in persons with antemortem intervals shorter than 2.4 years. This level of performance in mostly non-demented older adults is notable considering that arteriolosclerosis can only be diagnosed at autopsy. The scan-rescan reproducibility of the ARTS score was excellent, with an intraclass correlation of 0.99, suggesting that application of ARTS in longitudinal studies may show high sensitivity in detecting small changes. Finally, higher ARTS scores in non-demented older adults were associated with greater decline in cognition two years after baseline MRI, especially in perceptual speed which has been linked to arteriolosclerosis and small vessel disease. This finding was shown in a separate group of 369 non-demented MAP/ROS participants and was validated in 72 non-demented Black participants of the Minority Aging Research Study (MARS) and also in 244 non-demented participants of the Alzheimer's Disease Neuroimaging Initiative 2 and 3. The results of this work suggest that ARTS may have broad implications in the advancement of diagnosis, prevention and treatment of arteriolosclerosis. ARTS is publicly available at https://www.nitrc.org/projects/arts/.",2021-07-24 +32551569,A Comprehensive Gene Inventory for Glucosinolate Biosynthetic Pathway in Arabidopsis thaliana.,"Glucosinolates (GSLs) are plant secondary metabolites comprising sulfur and nitrogen mainly found in plants from the order of Brassicales, such as broccoli, cabbage, and Arabidopsis thaliana. The activated forms of GSL play important roles in fighting against pathogens and have health benefits to humans. The increasing amount of data on A. thaliana generated from various omics technologies can be investigated more deeply in search of new genes or compounds involved in GSL biosynthesis and metabolism. This review describes a comprehensive inventory of A. thaliana GSLs identified from published literature and databases such as KNApSAcK, KEGG, and AraCyc. A total of 113 GSL genes encoding for 23 transcription components, 85 enzymes, and five protein transporters were experimentally characterized in the past two decades. Continuous efforts are still on going to identify all molecules related to the production of GSLs. A manually curated database known as SuCCombase (http://plant-scc.org) was developed to serve as a comprehensive GSL inventory. Realizing lack of information on the regulation of GSL biosynthesis and degradation mechanisms, this review also includes relevant information and their connections with crosstalk among various factors, such as light, sulfur metabolism, and nitrogen metabolism, not only in A. thaliana but also in other crucifers.",2020-07-01 +34512120,Toward integrating software defined networks with the Internet of Things: a review.,"Due to the outbreak of Covid-19 pandemic, activities in most sectors- be it business, education or even healthcare- are taking place in an online rather than in an inline style, and as a result, Internet traffic has increased drastically. Recent studies have highlighted that internet traffic has grown by 70% to 300% since March 2020. According to a recent CNN news article (https://www.cnn.com/2020/03/19/tech/netflix-internet-overload-eu/index.html), popular content providers such as Netflix and YouTube are slowing down in North-America and Europe to keep the internet from breaking. With that being addressed, the existing network deployment and solutions, even with the fifth generation mobile communication (5G) partial deployment, are currently under a huge burden. This work intends to review the integration of two of the most innovative network research areas, Software-defined Networks (SDN) and the Internet of Things (IoT). The IoT aims to interface questions over the Internet while the SDN offers orchestration for network management by decoupling the control plane and the data plane. In this article, we present the state of the art of Software-defined networking and the Internet of Things discussing the integrated architectures, challenges, and designs. Also, we discuss two proposals targeting the QoS Key Performance Indicators (KPIs) in IoT via SDN mobile edge computing along with a few directions of possible research that could fill in gaps in these domains.",2021-09-07 +33604423,HANDS: an RGB-D dataset of static hand-gestures for human-robot interaction.,"The HANDS dataset has been created for human-robot interaction research, and it is composed of spatially and temporally aligned RGB and Depth frames. It contains 12 static single-hand gestures performed with both the right-hand and the left-hand, and 3 static two-hands gestures for a total of 29 unique classes. Five actors (two females and three males) have been acquired performing the gestures, each of them adopting a different background and light conditions. For each actor, 150 RGB frames and their corresponding 150 Depth frames per gesture have been collected, for a total of 2400 RGB frames and 2400 Depth frames per actor. Data has been collected using a Kinect v2 camera intrinsically calibrated to spatially align RGB data to Depth data. The temporal alignment has been performed offline using MATLAB, aligning frames with a maximum temporal distance of 66  ms. This dataset has been used in [1] and it is freely available at http://dx.doi.org/10.17632/ndrczc35bt.1.",2021-01-30 +31042284,Microbiome Learning Repo (ML Repo): A public repository of microbiome regression and classification tasks. ,"The use of machine learning in high-dimensional biological applications, such as the human microbiome, has grown exponentially in recent years, but algorithm developers often lack the domain expertise required for interpretation and curation of the heterogeneous microbiome datasets. We present Microbiome Learning Repo (ML Repo, available at https://knights-lab.github.io/MLRepo/), a public, web-based repository of 33 curated classification and regression tasks from 15 published human microbiome datasets. We highlight the use of ML Repo in several use cases to demonstrate its wide application, and we expect it to be an important resource for algorithm developers.",2019-05-01 +33663384,DeltaNeTS+: elucidating the mechanism of drugs and diseases using gene expression and transcriptional regulatory networks.,"

Background

Knowledge on the molecular targets of diseases and drugs is crucial for elucidating disease pathogenesis and mechanism of action of drugs, and for driving drug discovery and treatment formulation. In this regard, high-throughput gene transcriptional profiling has become a leading technology, generating whole-genome data on the transcriptional alterations caused by diseases or drug compounds. However, identifying direct gene targets, especially in the background of indirect (downstream) effects, based on differential gene expressions is difficult due to the complexity of gene regulatory network governing the gene transcriptional processes.

Results

In this work, we developed a network analysis method, called DeltaNeTS+, for inferring direct gene targets of drugs and diseases from gene transcriptional profiles. DeltaNeTS+ uses a gene regulatory network model to identify direct perturbations to the transcription of genes using gene expression data. Importantly, DeltaNeTS+ is able to combine both steady-state and time-course expression profiles, as well as leverage information on the gene network structure. We demonstrated the power of DeltaNeTS+ in predicting gene targets using gene expression data in complex organisms, including Caenorhabditis elegans and human cell lines (T-cell and Calu-3). More specifically, in an application to time-course gene expression profiles of influenza A H1N1 (swine flu) and H5N1 (avian flu) infection, DeltaNeTS+ shed light on the key differences of dynamic cellular perturbations caused by the two influenza strains.

Conclusion

DeltaNeTS+ is a powerful network analysis tool for inferring gene targets from gene expression profiles. As demonstrated in the case studies, by incorporating available information on gene network structure, DeltaNeTS+ produces accurate predictions of direct gene targets from a small sample size (~ 10 s). Integrating static and dynamic expression data with transcriptional network structure extracted from genomic information, as enabled by DeltaNeTS+, is crucial toward personalized medicine, where treatments can be tailored to individual patients. DeltaNeTS+ can be freely downloaded from http://www.github.com/cabsel/deltanetsplus .",2021-03-04 +33824477,Sunglasses to hide behind may also prevent melanoma of the eyes.,"In 1967, Sandy Posey pronounced that sunglasses are essential beachwear ( https://www.youtube.com/watch?v=4HVBEb-GA1Y ). Now, whole-genome sequencing reveals that ultraviolet radiation (UVR) can contribute to melanomas in the iris and conjunctiva, data that provide a molecular explanation for why it is important to protect our eyes from exposure to UVR.",2021-04-06 +34601984,Anti-Gastric Cancer Effect of Purified Omphalia lapidescens Protein via Regulating the JAK/STAT3 Signaling Pathway.,"Gastric cancer is the leading cause of cancer-related death worldwide. The aim of present study was to investigate the anti-tumor effect of purified Omphalia lapidescens protein (pPeOp) in gastric cancer. Microarray analysis was performed to find out differentially expressed genes in pPeOp-treated MC-4 gastric cancer cells. The Janus kinase (JAK)/signal transducer and activator of transcription (STAT) three signaling pathway was most likely to be altered based on bioinformatics analysis. Interleukin-6 (IL-6) and NSC74859 were used as the agonist and inhibitor of the JAK/STAT3 signaling pathway, respectively. Flow cytometry and MTS assay were used for cell proliferation and viability analysis in pPeOp-treated gastric cancer cell lines with IL-6 or NSC74859. The anti-tumor effect was increased when pPeOp were co-treated with IL-6, while decreased in inhibitor treatment. The expression of the crucial members in the pathway of MC-4 cells, including glycoprotein 130 (GP130), JAK1, JAK2, STAT3, p-STAT3, suppressor of cytokine signaling SOCS1 and SOCS3, was detected by western blotting. pPeOp exhibited promising anticancer effect in the xenograft nude mice model, established by STAT3 knock down gastric cancer cells.Thus, JAK/STAT3 inhibition partially contributed to the anticancer effect of pPeOp, which may serve as a novel strategy for gastric cancer.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1960385.",2021-10-03 +33929906,CATMoS: Collaborative Acute Toxicity Modeling Suite.,"

Background

Humans are exposed to tens of thousands of chemical substances that need to be assessed for their potential toxicity. Acute systemic toxicity testing serves as the basis for regulatory hazard classification, labeling, and risk management. However, it is cost- and time-prohibitive to evaluate all new and existing chemicals using traditional rodent acute toxicity tests. In silico models built using existing data facilitate rapid acute toxicity predictions without using animals.

Objectives

The U.S. Interagency Coordinating Committee on the Validation of Alternative Methods (ICCVAM) Acute Toxicity Workgroup organized an international collaboration to develop in silico models for predicting acute oral toxicity based on five different end points: Lethal Dose 50 (LD50 value, U.S. Environmental Protection Agency hazard (four) categories, Globally Harmonized System for Classification and Labeling hazard (five) categories, very toxic chemicals [LD50 (LD50≤50mg/kg)], and nontoxic chemicals (LD50>2,000mg/kg).

Methods

An acute oral toxicity data inventory for 11,992 chemicals was compiled, split into training and evaluation sets, and made available to 35 participating international research groups that submitted a total of 139 predictive models. Predictions that fell within the applicability domains of the submitted models were evaluated using external validation sets. These were then combined into consensus models to leverage strengths of individual approaches.

Results

The resulting consensus predictions, which leverage the collective strengths of each individual model, form the Collaborative Acute Toxicity Modeling Suite (CATMoS). CATMoS demonstrated high performance in terms of accuracy and robustness when compared with in vivo results.

Discussion

CATMoS is being evaluated by regulatory agencies for its utility and applicability as a potential replacement for in vivo rat acute oral toxicity studies. CATMoS predictions for more than 800,000 chemicals have been made available via the National Toxicology Program's Integrated Chemical Environment tools and data sets (ice.ntp.niehs.nih.gov). The models are also implemented in a free, standalone, open-source tool, OPERA, which allows predictions of new and untested chemicals to be made. https://doi.org/10.1289/EHP8495.",2021-04-30 +33624747,PICS2: Next-generation fine mapping via probabilistic identification of causal SNPs. ,"The Probabilistic Identification of Causal SNPs (PICS) algorithm and web application was developed as a fine-mapping tool to determine the likelihood that each single nucleotide polymorphism (SNP) in LD with a reported index SNP is a true causal polymorphism. PICS is notable for its ability to identify candidate causal SNPs within a locus using only the index SNP, which are widely available from published GWAS, whereas other methods require full summary statistics or full genotype data. However, the original PICS web application operates on a single SNP at a time, with slow performance, severely limiting its usability. We have developed a next-generation PICS tool, PICS2, which enables performance of PICS analyses of large batches of index SNPs with much faster performance. Additional updates and extensions include use of LD reference data generated from 1000 Genomes phase 3; annotation of variant consequences; annotation of GTEx eQTL genes and downloadable PICS SNPs from GTEx eQTLs; the option of generating PICS probabilities from experimental summary statistics; and generation of PICS SNPs from all SNPs of the GWAS catalog, automatically updated weekly. These free and easy-to-use resources will enable efficient determination of candidate loci for biological studies to investigate the true causal variants underlying disease processes. PICS2 is available at https://pics2.ucsf.edu. Supplementary data are available at Bioinformatics online.",2021-02-24 +31979981,CPAD 2.0: a repository of curated experimental data on aggregating proteins and peptides.,"The Curated Protein Aggregation Database (CPAD) is a manually curated and open-access database dedicated to providing comprehensive information related to mechanistic, kinetic and structural aspects of protein and peptide aggregation. The database has been updated to CPAD 2.0 by significantly expanding datasets and improving the user-interface. Key features of CPAD 2.0 are (i) 83,098 data points on aggregation kinetics experiments, (ii) 565 structures related to aggregation, which are classified into proteins, fibrils, and protein-ligand complexes, (iii) 2031 aggregating/non-aggregating peptides with pre-calculated aggregation properties, and (iv) 912 aggregation-prone regions in amyloidogenic proteins. This database will help the scientific community (a) by facilitating research leading to improved understanding of protein aggregation, (b) by helping develop, validate and benchmark mechanistic and kinetic models of protein aggregation, and (c) by assisting experimentalists with design of their investigations and dissemination of data generated by their studies. CPAD 2.0 can be accessed at https://web.iitm.ac.in/bioinfo2/cpad2/index.html.",2020-01-24 +,Improving the precision of sea level data from satellite altimetry with high-frequency and regional sea state bias corrections,"The sea state bias (SSB) is a large source of uncertainty in the estimation of sea level from satellite altimetry. It is still unclear to what extent it depends on errors in parameter estimations (numerical source) or to the wave physics (physical source).By improving the application of this correction we compute 20-Hz sea level anomalies that are about 30% more precise (i.e. less noisy) than the current standards. The improvement is two-fold: first we prove that the SSB correction should be applied directly to the 20-Hz data (12 to 19% noise decrease); secondly, we show that by recomputing a regional SSB model (based on the 20-Hz estimations) even a simple parametric relation is sufficient to further improve the correction (further 15 to 19% noise decrease).We test our methodology using range, wave height and wind speed estimated with two retrackers applied to Jason-1 waveform data: the MLE4 retracked-data available in the Sensor Geophysical Data Records of the mission and the ALES retracked-data available in the OpenADB repository (https://openadb.dgfi.tum.de/). The regional SSB models are computed parametrically by means of a crossover analysis in the Mediterranean Sea and North Sea.Correcting the high-rate data for the SSB reduces the correlation between retracked parameters. Regional variations in the proposed models might be due to differences in wave climate and remaining sea-state dependent residual errors. The variations in the empirical model with respect to the retracker used recall the need for a specific SSB correction for any retracker.This study, while providing a significantly more precise solution to exploit high-rate sea level data, calls for a re-thinking of the SSB correction in both its physical and numerical component, gives robustness to previous theories and provides an immediate improvement for the application of satellite altimetry in the regions of study.",2018-12-01 +32597311,OncotRF: an online resource for exploration of tRNA-derived fragments in human cancers.,"Transfer RNA-derived fragments (tRFs) are a new class of small non-coding RNAs whose biological roles in cancers are not well understood. Emerging evidence suggests that tRFs are involved in gene regulation at multiple levels. In this study, we constructed an integrative database, OncotRF (http://bioinformatics.zju.edu.cn/OncotRF), for in silico exploration of tRF functions, and identification of diagnostic and prognostic biomarkers in cancers. The database contains an analysis pipeline for tRF identification and characterization, analysis results of 11,211 small RNA sequencing samples and 8,776 RNA sequencing samples, and clinicopathologic annotation data from The Cancer Genome Atlas (TCGA). The results include: tRF identification and quantification across 33 cancers, abnormally expressed tRFs and genes, tRF-gene correlations, tRF-gene networks, survival analyses, and tRF-related functional enrichment analyses. Users are also able to identify differentially expressed tRFs, predict their functions, and assess the relevance of the tRF expression levels to the clinical outcome according to user-defined groups. Additionally, an online Kaplan-Meier plotter is available in OncotRF for plotting survival curves according to user-defined groups. OncotRF will be a valuable online database and functional annotation tool for researchers studying the roles, functions, and mechanisms of tRFs in human cancers.",2020-06-28 +33736724,"A ""resistance calculator"": Simple stewardship intervention for refining empiric practices of antimicrobials in acute-care hospitals.","

Objective

In the era of widespread resistance, there are 2 time points at which most empiric prescription errors occur among hospitalized adults: (1) upon admission (UA) when treating patients at risk of multidrug-resistant organisms (MDROs) and (2) during hospitalization, when treating patients at risk of extensively drug-resistant organisms (XDROs). These errors adversely influence patient outcomes and the hospital's ecology.

Design and setting

Retrospective cohort study, Shamir Medical Center, Israel, 2016.

Patients

Adult patients (aged >18 years) hospitalized with sepsis.

Methods

Logistic regressions were used to develop predictive models for (1) MDRO UA and (2) nosocomial XDRO. Their performances on the derivation data sets, and on 7 other validation data sets, were assessed using the area under the receiver operating characteristic curve (ROC AUC).

Results

In total, 4,114 patients were included: 2,472 patients with sepsis UA and 1,642 with nosocomial sepsis. The MDRO UA score included 10 parameters, and with a cutoff of ≥22 points, it had an ROC AUC of 0.85. The nosocomial XDRO score included 7 parameters, and with a cutoff of ≥36 points, it had an ROC AUC of 0.87. The range of ROC AUCs for the validation data sets was 0.7-0.88 for the MDRO UA score and was 0.66-0.75 for nosocomial XDRO score. We created a free web calculator (https://assafharofe.azurewebsites.net).

Conclusions

A simple electronic calculator could aid with empiric prescription during an encounter with a septic patient. Future implementation studies are needed to evaluate its utility in improving patient outcomes and in reducing overall resistances.",2021-03-19 +32717064,Transcriptor: a comprehensive platform for annotation of the enzymatic functions of transcripts.,"

Motivation

Rapid advances in sequencing technology have resulted huge increases in the accessibility of sequencing data. Moreover, researchers are focusing more on organisms that lack a reference genome. However, few easy-to-use web servers focusing on annotations of enzymatic functions are available. Accordingly, in this study, we describe Transcriptor, a novel platform for annotating transcripts encoding enzymes.

Results

The transcripts were evaluated using more than 300 000 in-house enzymatic reactions through bridges of Enzyme Commission numbers. Transcriptor also enabled ontology term identification and along with associated enzymes, visualization and prediction of domains and annotation of regulatory structure, such as long noncoding RNAs, which could facilitate the discovery of new functions in model or nonmodel species. Transcriptor may have applications in elucidation of the roles of organs transcriptomes and secondary metabolite biosynthesis in organisms lacking a reference genome.

Availability and implementation

Transcriptor is available at http://design.rxnfinder.org/transcriptor/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +32512488,TTRMDB: A database for structural and functional analysis on the impact of SNPs over transthyretin (TTR) using bioinformatic tools.,"Hereditary Transthyretin-associated amyloidosis (ATTR) is an autosomal dominant protein-folding disorder with adult-onset caused by mutation of transthyretin (TTR). TTR is characterized by extracellular deposition of amyloid, leading to loss of autonomy and finally, death. More than 100 distinct mutations in TTR gene have been reported from variable age of onset, clinical expression and penetrance data. Besides, the cure for the disease remains still obscure. Further, the prioritizing of mutations concerning the characteristic features governing the stability and pathogenicity of TTR mutant proteins remains unanswered, to date and thus, a complex state of study for researchers. Herein, we provide a full report encompassing the effects of every reported mutant model of TTR protein about the stability, functionality and pathogenicity using various computational tools. In addition, the results obtained from our study were used to create TTRMDB (Transthyretin mutant database), which could be easy access to researchers at http://vit.ac.in/ttrmdb.",2020-05-25 +33471088,Genome-scale de novo assembly using ALGA. ,"There are very few methods for de novo genome assembly based on the overlap graph approach. It is considered as giving more exact results than the so-called de Bruijn graph approach but in much greater time and of much higher memory usage. It is not uncommon that assembly methods involving the overlap graph model are not able to successfully compute greater data sets, mainly due to memory limitation of a computer. This was the reason for developing in last decades mainly de Bruijn-based assembly methods, fast and fairly accurate. However, the latter methods can fail for longer or more repetitive genomes, as they decompose reads to shorter fragments and lose a part of information. An efficient assembler for processing big data sets and using the overlap graph model is still looked out. We propose a new genome-scale de novo assembler based on the overlap graph approach, designed for short-read sequencing data. The method, ALGA, incorporates several new ideas resulting in more exact contigs produced in short time. Among these ideas we have creation of a sparse but quite informative graph, reduction of the graph including a procedure referring to the problem of minimum spanning tree of a local subgraph, and graph traversal connected with simultaneous analysis of contigs stored so far. What is rare in genome assembly, the algorithm is almost parameter-free, with only one optional parameter to be set by a user. ALGA was compared with nine state-of-the-art assemblers in tests on genome-scale sequencing data obtained from real experiments on six organisms, differing in size, coverage, GC content, and repetition rate. ALGA produced best results in the sense of overall quality of genome reconstruction, understood as a good balance between genome coverage, accuracy, and length of resulting sequences. The algorithm is one of tools involved in processing data in currently realized national project Genomic Map of Poland. ALGA is available at http://alga.put.poznan.pl. Supplementary material is available at Bioinformatics online.",2021-01-20 +34118316,Development of knowledge-based clinical decision support system for patients included in colorectal screening program.,"

Background & aims

Colorectal (CRC) screening programs represent a large volume of procedures that need a follow-up endoscopy. A knowledge-based clinical decision support system (K-CDSS) is a technology which contains clinical rules and associations of compiled data that assist with clinical decision-making tasks. We develop a K-CDSS for management of patients included in CRC screening and surveillance of colorectal polyps.

Methods

We collected information on 48 variables from hospital colonoscopy records. Using DILEMMA Solutions Platform © (https://www.dilemasolution.com) we designed a prototype K-CDSS (PoliCare CDSS), to provide tailored recommendations by combining patients data and current guidelines recommendations. The accuracy of rules was verified using four scenarios (normal colonoscopy, lesions different than polyps, non-advanced adenomas and advanced adenomas). We studied the degree of agreement between the clinical assessments made by expert doctors and nurses equipped with PoliCare CDSS. Two experts confirmed a correlation between guidelines and PoliCare recommendations.

Results

56 consecutive endoscopy cases from colorectal screening program were included (62.8 years; range 53-71). Colonoscopy results were: absence of colon lesions (n=7, 12.5%), lesions in the colon that are not polyps (n=3, 5.4%) and resected colonic polyps (n=46, 82.1%; 100% R0 resection). Patients with resected polyps presented non-advanced adenoma (n=21, 45.6%) or advanced lesions (n=25, 54.4%). There were no differences in erroneous orders with PoliCare CDSS (Kappa value 1.0).

Conclusions

PoliCare CDSS can easily be integrated into the workflow for improving the overall efficiency and better adherence to evidence-based guidelines.",2021-06-09 +32931552,AFid: a tool for automated identification and exclusion of autofluorescent objects from microscopy images.,"

Motivation

Autofluorescence is a long-standing problem that has hindered the analysis of images of tissues acquired by fluorescence microscopy. Current approaches to mitigate autofluorescence in tissue are lab-based and involve either chemical treatment of sections or specialized instrumentation and software to 'unmix' autofluorescent signals. Importantly, these approaches are pre-emptive and there are currently no methods to deal with autofluorescence in acquired fluorescence microscopy images.

Results

To address this, we developed Autofluorescence Identifier (AFid). AFid identifies autofluorescent pixels as discrete objects in multi-channel images post-acquisition. These objects can then be tagged for exclusion from downstream analysis. We validated AFid using images of FFPE human colorectal tissue stained for common immune markers. Further, we demonstrate its utility for image analysis where its implementation allows the accurate measurement of HIV-Dendritic cell interactions in a colorectal explant model of HIV transmission. Therefore, AFid represents a major leap forward in the extraction of useful data from images plagued by autofluorescence by offering an approach that is easily incorporated into existing workflows and that can be used with various samples, staining panels and image acquisition methods. We have implemented AFid in ImageJ, Matlab and R to accommodate the diverse image analysis community.

Availability and implementation

AFid software is available at https://ellispatrick.github.io/AFid.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +33007423,Effect of melatonin supplementation on oxidative stress parameters: A systematic review and meta-analysis.,"

Background

Oxidative stress, defined as an imbalance between pro-oxidants and neutralizing antioxidants within the body, is a growing public health concern. Oxidative stress is involved in the progression of nearly all chronic diseases. Melatonin has been suggested to reduce oxidative stress by its potential radical scavenging properties.

Objective

To determine the efficacy and safety of melatonin as a therapy for the improvement of oxidative stress parameters in randomized controlled trials.

Methods

A systematic database search using Scopus, PubMed/Medline, EMBASE, Web of Science, the Cochrane Controlled Register of Trials and clinicaltrials.gov (https://clinicaltrials.gov) for studies published up to July 2020 was conducted. We included studies which investigated the effect of supplemental melatonin compared to placebo on oxidative stress parameters in unhealthy patients. Quantitative data synthesis was conducted using a random-effects model with standard mean difference (SMD) and 95 % confidence intervals (CI). Cochrane's Q and I2 values were used to evaluate heterogeneity.

Results

A total of 12 randomized controlled trials (RCTs) were eligible. The meta-analysis indicated an association between melatonin intake and a significant increase in total antioxidant capacity (TAC) (SMD: 0.76; 95 % CI: 0.30, 1.21; I2 = 80.1 %), glutathione (GSH) levels (SMD: 0.57; 95 % CI: 0.32, 0.83; I2 = 15.1 %), superoxide dismutase (SOD) (SMD: 1.38; 95 % CI: 0.13, 2.62; I2 = 86.9 %), glutathione peroxidase (GPx) (SMD: 1.36; 95 % CI: 0.46, 2.30; I2 = 89.3 %), glutathione reductase (GR) (SMD: 1.21; 95 % CI: 0.65, 1.77; I2 = 00.0 %) activities, and a significant reduction in malondialdehyde (MDA) levels (SMD: -0.79; 95 % CI: -1.19, -0.39; I2 = 73.1 %). Melatonin intake was not shown to significantly affect nitric oxide (NO) levels (SMD: -0.24; 95 % CI: -0.61, 0.14; I2 = 00.0 %) or catalase (CAT) activity (SMD: -1.38; 95 % CI: -1.42, 4.18; I2 = 96.6 %).

Conclusion

Melatonin intake was shown to have a significant impact on improving Oxidative stress parameters. However, future research through large, well-designed randomized controlled trials are required to determine the effect of melatonin on oxidative stress parameters in different age groups and different disease types.",2020-09-29 +33010165,"Newt: a comprehensive web-based tool for viewing, constructing and analyzing biological maps.","

Motivation

Visualization of cellular processes and pathways is a key recurring requirement for effective biological data analysis. There is a considerable need for sophisticated web-based pathway viewers and editors operating with widely accepted standard formats, using the latest visualization techniques and libraries.

Results

We developed a web-based tool named Newt for viewing, constructing and analyzing biological maps in standard formats such as SBGN, SBML and SIF.

Availability and implementation

Newt's source code is publicly available on GitHub and freely distributed under the GNU LGPL. Ample documentation on Newt can be found on http://newteditor.org and on YouTube.",2021-06-01 +34039967,Structure-based protein function prediction using graph convolutional networks.,"The rapid increase in the number of proteins in sequence databases and the diversity of their functions challenge computational approaches for automated function prediction. Here, we introduce DeepFRI, a Graph Convolutional Network for predicting protein functions by leveraging sequence features extracted from a protein language model and protein structures. It outperforms current leading methods and sequence-based Convolutional Neural Networks and scales to the size of current sequence repositories. Augmenting the training set of experimental structures with homology models allows us to significantly expand the number of predictable functions. DeepFRI has significant de-noising capability, with only a minor drop in performance when experimental structures are replaced by protein models. Class activation mapping allows function predictions at an unprecedented resolution, allowing site-specific annotations at the residue-level in an automated manner. We show the utility and high performance of our method by annotating structures from the PDB and SWISS-MODEL, making several new confident function predictions. DeepFRI is available as a webserver at https://beta.deepfri.flatironinstitute.org/ .",2021-05-26 +34136652,Mixology: a tool for calculating required masses and volumes for laboratory solutions.,"Laboratory work often requires making up solutions with defined concentrations of various components. Mixology is a tool we have created to simplify calculation of the masses and volumes required to obtain particular concentrations. It operates with many kinds of volumetric, mass and concentration units, including conversion between molarity- and mass-based concentrations using molecular masses retrieved from the Chemical Entities of Biological Interest (ChEBI) database. Mixology can be accessed at https://mixology.science.",2021-05-26 +34124457,Combination of UPLC-Q-TOF/MS and Network Pharmacology to Reveal the Mechanism of Qizhen Decoction in the Treatment of Colon Cancer.,"Traditional Chinese medicine (TCM) has been utilized for the treatment of colon cancer. Qizhen decoction (QZD), a potential compound prescription of TCM, possesses multiple biological activities. It has been proven clinically effective in the treatment of colon cancer. However, the molecular mechanism of anticolon cancer activity is still not clear. This study aimed to identify the chemical composition of QZD. Furthermore, a collaborative analysis strategy of network pharmacology and cell biology was used to further explore the critical signaling pathway of QZD anticancer activity. First, ultraperformance liquid chromatography-quadrupole time-of-flight/mass spectrometry (UPLC-Q-TOF/MS) was performed to identify the chemical composition of QZD. Then, the chemical composition database of QZD was constructed based on a systematic literature search and review of chemical constituents. Moreover, the common and indirect targets of chemical components of QZD and colon cancer were searched by multiple databases. A protein-protein interaction (PPI) network was constructed using the String database (https://www.string-db.org/). All of the targets were analyzed by Gene Oncology (GO) bioanalysis and Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway analysis, and the visual network topology diagram of ""Prescription-TCM-Chemical composition-Direct target-Indirect target-Pathway"" was constructed by Cytoscape software (v3.7.1). The top molecular pathway ranked by statistical significance was further verified by molecular biology methods. The results of UPLC-Q-TOF/MS showed that QZD had 111 kinds of chemical components, of which 103 were unique components and 8 were common components. Ten pivotal targets of QZD in the treatment of colon cancer were screened by the PPI network. Targets of QZD involve many biological processes, such as the signaling pathway, immune system, gene expression, and so on. QZD may interfere with biological pathways such as cell replication, oxygen-containing compounds, or organic matter by protein binding, regulation of signal receptors or enzyme binding, and affect cytoplasm and membrane-bound organelles. The main antitumor core pathways were the apoptosis metabolic pathway, the PI3K-Akt signal pathway, and so on. Expression of the PI3K-Akt signal pathway was significantly downregulated after the intervention of QZD, which was closely related to the inhibition of proliferation and migration of colon cancer cells by cell biology methods. The present work may facilitate a better understanding of the effective components, therapeutic targets, biological processes, and signaling pathways of QZD in the treatment of colon cancer and provide useful information about the utilization of QZD.",2021-05-26 +34038437,Genome-wide comparative analyses of GATA transcription factors among 19 Arabidopsis ecotype genomes: Intraspecific characteristics of GATA transcription factors.,"GATA transcription factors (TFs) are widespread eukaryotic regulators whose DNA-binding domain is a class IV zinc finger motif (CX2CX17-20CX2C) followed by a basic region. Due to the low cost of genome sequencing, multiple strains of specific species have been sequenced: e.g., number of plant genomes in the Plant Genome Database (http://www.plantgenome.info/) is 2,174 originated from 713 plant species. Thus, we investigated GATA TFs of 19 Arabidopsis thaliana genome-widely to understand intraspecific features of Arabidopsis GATA TFs with the pipeline of GATA database (http://gata.genefamily.info/). Numbers of GATA genes and GATA TFs of each A. thaliana genome range from 29 to 30 and from 39 to 42, respectively. Four cases of different pattern of alternative splicing forms of GATA genes among 19 A. thaliana genomes are identified. 22 of 2,195 amino acids (1.002%) from the alignment of GATA domain amino acid sequences display variations across 19 ecotype genomes. In addition, maximally four different amino acid sequences per each GATA domain identified in this study indicate that these position-specific amino acid variations may invoke intraspecific functional variations. Among 15 functionally characterized GATA genes, only five GATA genes display variations of amino acids across ecotypes of A. thaliana, implying variations of their biological roles across natural isolates of A. thaliana. PCA results from 28 characteristics of GATA genes display the four groups, same to those defined by the number of GATA genes. Topologies of bootstrapped phylogenetic trees of Arabidopsis chloroplasts and common GATA genes are mostly incongruent. Moreover, no relationship between geographical distribution and their phylogenetic relationships was found. Our results present that intraspecific variations of GATA TFs in A. thaliana are conserved and evolutionarily neutral along with 19 ecotypes, which is congruent to the fact that GATA TFs are one of the main regulators for controlling essential mechanisms, such as seed germination and hypocotyl elongation.",2021-05-26 +32572450,Dr AFC: drug repositioning through anti-fibrosis characteristic. ,"Fibrosis is a key component in the pathogenic mechanism of a variety of diseases. These diseases involving fibrosis may share common mechanisms and therapeutic targets, and therefore common intervention strategies and medicines may be applicable for these diseases. For this reason, deliberately introducing anti-fibrosis characteristics into predictive modeling may lead to more success in drug repositioning. In this study, anti-fibrosis knowledge base was first built by collecting data from multiple resources. Both structural and biological profiles were then derived from the knowledge base and used for constructing machine learning models including Structural Profile Prediction Model (SPPM) and Biological Profile Prediction Model (BPPM). Three external public data sets were employed for validation purpose and further exploration of potential repositioning drugs in wider chemical space. The resulting SPPM and BPPM models achieve area under the receiver operating characteristic curve (area under the curve) of 0.879 and 0.972 in the training set, and 0.814 and 0.874 in the testing set. Additionally, our results also demonstrate that substantial amount of multi-targeting natural products possess notable anti-fibrosis characteristics and might serve as encouraging candidates in fibrosis treatment and drug repositioning. To leverage our methodology and findings, we developed repositioning prediction platform, drug repositioning based on anti-fibrosis characteristic that is freely accessible via https://www.biosino.org/drafc.",2021-05-01 +34104357,Screening of world approved drugs against highly dynamical spike glycoprotein of SARS-CoV-2 using CaverDock and machine learning.,"The new severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) causes pathological pulmonary symptoms. Most efforts to develop vaccines and drugs against this virus target the spike glycoprotein, particularly its S1 subunit, which is recognised by angiotensin-converting enzyme 2. Here we use the in-house developed tool CaverDock to perform virtual screening against spike glycoprotein using a cryogenic electron microscopy structure (PDB-ID: 6VXX) and the representative structures of five most populated clusters from a previously published molecular dynamics simulation. The dataset of ligands was obtained from the ZINC database and consists of drugs approved for clinical use worldwide. Trajectories for the passage of individual drugs through the tunnel of the spike glycoprotein homotrimer, their binding energies within the tunnel, and the duration of their contacts with the trimer's three subunits were computed for the full dataset. Multivariate statistical methods were then used to establish structure-activity relationships and select top candidate for movement inhibition. This new protocol for the rapid screening of globally approved drugs (4359 ligands) in a multi-state protein structure (6 states) showed high robustness in the rate of finished calculations. The protocol is universal and can be applied to any target protein with an experimental tertiary structure containing protein tunnels or channels. The protocol will be implemented in the next version of CaverWeb (https://loschmidt.chemi.muni.cz/caverweb/) to make it accessible to the wider scientific community.",2021-05-26 +33430980,SmartGraph: a network pharmacology investigation platform.,"

Motivation

Drug discovery investigations need to incorporate network pharmacology concepts while navigating the complex landscape of drug-target and target-target interactions. This task requires solutions that integrate high-quality biomedical data, combined with analytic and predictive workflows as well as efficient visualization. SmartGraph is an innovative platform that utilizes state-of-the-art technologies such as a Neo4j graph-database, Angular web framework, RxJS asynchronous event library and D3 visualization to accomplish these goals.

Results

The SmartGraph framework integrates high quality bioactivity data and biological pathway information resulting in a knowledgebase comprised of 420,526 unique compound-target interactions defined between 271,098 unique compounds and 2018 targets. SmartGraph then performs bioactivity predictions based on the 63,783 Bemis-Murcko scaffolds extracted from these compounds. Through several use-cases, we illustrate the use of SmartGraph to generate hypotheses for elucidating mechanism-of-action, drug-repurposing and off-target prediction.

Availability

https://smartgraph.ncats.io/.",2020-01-21 +31901979,Genome-wide somatic copy number alteration analysis and database construction for cervical cancer.,"Cervical cancer is a common gynecological malignancy with high incidence and mortality. Somatic copy number alterations (CNAs) play an important role in identifying tumor suppressor genes and oncogenes and are a useful diagnostic indicator for many cancer types. However, the genomic landscape of CNAs in cervical cancer has not yet been comprehensively characterized. In the present study, we collected 974 cervical cancer samples from different data sources. All samples were analyzed by genomic arrays to obtain high-resolution CNAs. Focal genomic regions with CNA events and potential cancer driver genes were identified by GISTIC2.0. Meanwhile, we constructed a comprehensive cervical cancer database by PHP and self-written Perl and R scripts. In total, 54 recurrent regions of amplification and deletion were detected. Frequently altered tumor suppressor genes were found in these regions, including PIK3CA, ERBB2, EP300 and FBXW7. CNA hotspots and related enriched functional categories were also identified. The incidence of chromothripsis in cervical cancer was estimated to be 6.06%, and the chromosome pulverization hotspot regions were detected. Based on the curated data, we developed CNAdbCC (http://cailab.labshare.cn/CNAdbCC/), a comprehensive database for copy number alterations in cervical cancer. We provide a user-friendly Web interface for data mining and visualization. It is the most comprehensive public database devoted exclusively to genomic alterations in cervical cancer. These results extend our molecular understanding of cervical cancer. The database will enable researchers to explore specific CNA patterns in this lethal cancer and facilitate the discovery of therapeutic candidates.",2020-01-04 +34607397,The Dehgolan Prospective Cohort Study (DehPCS) on non-communicable diseases in a Kurdish community in the west of Iran.,"The Dehgolan Prospective Cohort Study (DehPCS) was conducted to examine and identify risk factors for the most prevalent non-communicable diseases (NCDs). In addition, in order to examine participants' health status, socioeconomic status, behavioral factors, nutritional status, and environmental exposures, the DehPCS collected, analyzed, and stored blood, urine, nail, and hair samples to conduct genetic studies and identify biomarkers and other biological determinants of NCDs. In total, 3,996 adults aged 35 to 70 from the general population participated in the study from February 2018 to March 2019. Of them, 43.7% were women. The first follow-up wave was conducted with 3,995 participants. Information on a wide range of variables was collected, including on socioeconomic status, lifestyle, nutritional status, habits, physical examination findings, medication use, and medical history. Proxy variables such as body mass index, metabolic equivalent task score, wealth index, and macronutrients and micronutrients were calculated. The most common self-reported diseases in descending order were kidney stones, hypertension, and fatty liver. The prevalence of diabetes and hypertension was 9.3% and 33.4%, respectively. All data, samples, and measurements will be collected again at 5-year intervals. Thus, it will be possible to examine time-dependent changes in the risk factors of NCDs. The DehPCS can be used to study the relationships among genetics, lifestyle, socioeconomic status, and environmental risk factors and the most prevalent NCDs in case-cohort studies using a nested case-control design that will be applied to the cohort infrastructure. Researchers can also submit pre-proposals via the following web address: http://c.ddrc.ac.ir/persianaccess/Account/Login.",2021-10-01 +34772729,Nocturnal Dipping and Left Ventricular Mass Index in the Chronic Kidney Disease in Children Cohort.,"

Background and objectives

The physiologic nocturnal BP decline is often blunted in patients with CKD; however, the consequences of BP nondipping in children are largely unknown. Our objective was to determine risk factors for nondipping and to investigate if nondipping is associated with higher left ventricular mass index in children with CKD.

Design, setting, participants, & measurements

We conducted a cross-sectional analysis of ambulatory BP monitoring and echocardiographic data in participants of the Chronic Kidney Disease in Children study. Multivariable linear and spline regression analyses were used to evaluate the relationship of risk factors with dipping and of dipping with left ventricular mass index.

Results

Within 552 participants, mean age was 11 (±4) years, mean eGFR was 53 (±20) ml/min per 1.73 m2, and 41% were classified as nondippers. In participants with nonglomerular CKD, female sex and higher sodium intake were significantly associated with less systolic and diastolic dipping (P≤0.05). In those with glomerular CKD, Black race and greater proteinuria were significantly associated with less systolic and diastolic dipping (P≤0.05). Systolic dipping and diastolic dipping were not significantly associated with left ventricular mass index; however, in spline regression plots, diastolic dipping appeared to have a nonlinear relationship with left ventricular mass index. As compared with diastolic dipping of 20%-25%, dipping of <20% was associated with 1.41-g/m2.7-higher left ventricular mass index (95% confidence interval, -0.47 to 3.29), and dipping of >25% was associated with 1.98-g/m2.7-higher left ventricular mass index (95% confidence interval, -0.77 to 4.73), although these relationships did not achieve statistical significance.

Conclusions

Black race, female sex, and greater proteinuria and sodium intake were significantly associated with blunted dipping in children with CKD. We did not find a statistically significant association between dipping and left ventricular mass index.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2021_12_20_CJN09810721.mp3.",2021-11-12 +33740068,Epidemiological analysis of injury occurrence and current prevention strategies on international amateur football level during the UEFA Regions Cup 2019.,"

Introduction

Football is the most popular sport worldwide and results in a high frequency of injuries. So far, mainly injuries in professional football have been investigated, and the literature lacks data regarding detailed injury epidemiology and current prevention data in amateur football tournaments.

Materials and methods

A prospective cohort study investigated an international amateur football tournament, the UEFA Regions' Cup, which took place in 2019 in Germany. Injury epidemiology, current prevention strategies of the teams and the implementation of the UEFA concussion protocol were investigated in detail by means of standardized injury definitions and data samples for football (Fuller et al., Scand J Med Sci Sports 16:83-92, https://doi.org/10.1111/j.1600-0838.2006.00528.x , 2006).

Results

138 player of 8 teams participated in this study, while 39 players were excluded. Overall injury incidence was 12.5 per 1000 h total football exposure, 43.5 per 1000 h for match exposure. No injuries were registered during training. Injury prevalence was 14.1% per player and 1.1 injuries per match were registered. The lower extremity was predominantly affected by injuries (71.4%) and the majority of injuries (78.6%) were non-severe injury types like contusions (50%) and sprains (18.2%). Two head injuries, one contusion and one skin lesion, were handled by the guidelines of the UEFA concussion protocol. 44.4% of the players indicated at least one previous injury before tournament, 45.3% of them during the last two football seasons before start of the tournament. Injury prevention performance was included in all participating teams during the tournament by warm up or training strategies (100%). During the warm-up program just 5 exercises of the FIFA 11 + program was detected by this investigation in participating teams to be done by more half of the teams. Running exercises were the most frequently performed exercises, while trunk muscle exercises were less represented (14.3%).

Conclusion

This study presents for the first time epidemiological injury and prevention data of the UEFA Regions Cup. Injury incidence was higher compared to injury reports of regular seasons, but lower compared to other amateur football tournaments. Currently used prevention programs revealed trunk muscle exercises as often neglected.",2021-03-19 +33859339,Assessment of phylo-functional coherence along the bacterial phylogeny and taxonomy.,"In this report we use available curated phylogenies, taxonomy, and genome annotations to assess the phylogenetic and gene content similarity associated with each different taxon and taxonomic rank. Subsequently, we employ the same data to assess the frontiers of functional coherence along the bacterial phylogeny. Our results show that within-group phylogenetic and gene content similarity of taxa in the same rank are not homogenous, and that these values show extensive overlap between ranks. Functional coherence along the 16S rRNA gene-based phylogeny was limited to 44 particular nodes presenting large variations in phylogenetic depth. For instance, the deep subtree affiliated to class Actinobacteria presented functional coherence, while the shallower family Enterobacteriaceae-affiliated subtree did not. On the other hand, functional coherence along the genome-based phylogeny delimited deep subtrees affiliated to phyla Actinobacteriota, Deinococcota, Chloroflexota, Firmicutes, and a subtree containing the rest of the bacterial phyla. The results presented here can be used to guide the exploration of results in many microbial ecology and evolution research scenarios. Moreover, we provide dedicated scripts and files that can be used to continue the exploration of functional coherence along the bacterial phylogeny employing different parameters or input data ( https://git.io/Jec5U ).",2021-04-15 +34103383,Assessing Adverse Drug Reactions Reported for New Respiratory Medications in the FDA Adverse Event Reporting System Database.,"

Background

Between 2012 and 2017, 25 new medications or combination products were approved by the Food and Drug Administration (FDA) for use in treatment of chronic lower respiratory diseases (CLRDs). With limited data on post-marketing patient exposure to these drugs, their safety profiles remain unknown. This study aims to provide post-marketing surveillance of these medications.

Methods

A list of new CLRD medications approved between 2012 and 2017 was generated through searches on Drugs.com (https://www.drugs.com), FDA.gov (https://www.fda.gov), and IBM Micromedex (https://www.micromedexsolutions.com/home/dispatch/ssl/true). Data describing adverse drug reactions (ADRs) were collected from the FDA Adverse Event Reporting System for analysis. Of the 25 identified medications, we selected 4 medications indicated for asthma or COPD with at least 500 reports. Only ADRs catalogued with these medications as the primary suspect were analyzed. Reporting odds ratios were calculated for the top 10 ADRs of each CLRD medication.

Results

A total of 61,682 ADR reports were collected for newly approved CLRD medications (n = 27,190 older adults; n = 30,502 male). Reports of COPD medications (umeclidinium and umeclidinium/vilanterol) indicate that umeclidinium/vilanterol yielded a higher reporting odds ratio than umeclidinium alone for reports of pain. Fluticasone furoate/vilanterol had higher reporting odds ratios for cough, pain, and dizziness than budesonide/formoterol and fluticasone propionate/salmeterol.

Conclusions

Our findings suggest that the incidence of different adverse events experienced by patients in post-marketing reports resembles the incidence reported in pre-marketing clinical trials for COPD medications, except for fluticasone furoate/vilanterol, which has several differences.",2021-06-08 +34034663,Secretome characterization of clinical isolates from the Mycobacterium abscessus complex provides insight into antigenic differences.,"

Background

Mycobacterium abscessus (MAB) is a widely disseminated pathogenic non-tuberculous mycobacterium (NTM). Like with the M. tuberculosis complex (MTBC), excreted / secreted (ES) proteins play an essential role for its virulence and survival inside the host. Here, we used a robust bioinformatics pipeline to predict the secretome of the M. abscessus ATCC 19977 reference strain and 15 clinical isolates belonging to all three MAB subspecies, M. abscessus subsp. abscessus, M. abscessus subsp. bolletii, and M. abscessus subsp. massiliense.

Results

We found that ~ 18% of the proteins encoded in the MAB genomes were predicted as secreted and that the three MAB subspecies shared > 85% of the predicted secretomes. MAB isolates with a rough (R) colony morphotype showed larger predicted secretomes than isolates with a smooth (S) morphotype. Additionally, proteins exclusive to the secretomes of MAB R variants had higher antigenic densities than those exclusive to S variants, independent of the subspecies. For all investigated isolates, ES proteins had a significantly higher antigenic density than non-ES proteins. We identified 337 MAB ES proteins with homologues in previously investigated M. tuberculosis secretomes. Among these, 222 have previous experimental support of secretion, and some proteins showed homology with protein drug targets reported in the DrugBank database. The predicted MAB secretomes showed a higher abundance of proteins related to quorum-sensing and Mce domains as compared to MTBC indicating the importance of these pathways for MAB pathogenicity and virulence. Comparison of the predicted secretome of M. abscessus ATCC 19977 with the list of essential genes revealed that 99 secreted proteins corresponded to essential proteins required for in vitro growth.

Conclusions

This study represents the first systematic prediction and in silico characterization of the MAB secretome. Our study demonstrates that bioinformatics strategies can help to broadly explore mycobacterial secretomes including those of clinical isolates and to tailor subsequent, complex and time-consuming experimental approaches accordingly. This approach can support systematic investigation exploring candidate proteins for new vaccines and diagnostic markers to distinguish between colonization and infection. All predicted secretomes were deposited in the Secret-AAR web-server ( http://microbiomics.ibt.unam.mx/tools/aar/index.php ).",2021-05-25 +33877974,Towards Perceptually Optimized Adaptive Video Streaming-A Realistic Quality of Experience Database.,"Measuring Quality of Experience (QoE) and integrating these measurements into video streaming algorithms is a multi-faceted problem that fundamentally requires the design of comprehensive subjective QoE databases and objective QoE prediction models. To achieve this goal, we have recently designed the LIVE-NFLX-II database, a highly-realistic database which contains subjective QoE responses to various design dimensions, such as bitrate adaptation algorithms, network conditions and video content. Our database builds on recent advancements in content-adaptive encoding and incorporates actual network traces to capture realistic network variations on the client device. The new database focuses on low bandwidth conditions which are more challenging for bitrate adaptation algorithms, which often must navigate tradeoffs between rebuffering and video quality. Using our database, we study the effects of multiple streaming dimensions on user experience and evaluate video quality and quality of experience models and analyze their strengths and weaknesses. We believe that the tools introduced here will help inspire further progress on the development of perceptually-optimized client adaptation and video streaming strategies. The database is publicly available at http://live.ece.utexas.edu/research/LIVE_NFLX_II/live_nflx_plus.html.",2021-05-25 +32881579,Correction to Geschwind et al. (2020).,"Reports an error in ""Positivity pays off: Clients' perspectives on positive compared with traditional cognitive behavioral therapy for depression"" by Nicole Geschwind, Emke Bosgraaf, Fredrike Bannink and Frenk Peeters (Psychotherapy, Advanced Online Publication, Feb 20, 2020, np). In the article http://dx.doi.org/10.1037/pst0000288), the second to last sentence does not appear correctly and should appear instead as follows: The conclusion emerging from this study is that exploring better moments and building positivity efficiently counters depressive symptoms and builds well-being. (The following abstract of the original article appeared in record 2020-12346-001.) In this qualitative study, we explored the experiences of clients receiving cognitive behavioral therapy (CBT) for major depressive disorder. All participants received 8 sessions of traditional CBT (based on Beck, Rush, Shaw, & Emergy, 1979) and 8 sessions of positive CBT (order counterbalanced). The aim of the study was to examine clients' experience of positive CBT and to contrast this with their experience of traditional CBT. Positive CBT structurally and selectively focuses on better moments (exceptions to the problem as opposed to the problem), strengths, and positive emotions and integrates traditional CBT with solution-focused brief therapy and positive psychology. In addition to conducting interviews with 12 individuals, the second author attended all therapy sessions of 4 clients and observed biweekly supervision sessions as further methods of data collection. Qualitative analysis showed that, despite initial skepticism, clients preferred positive CBT and indicated experiencing a steeper learning curve during positive, compared with traditional, CBT for depression. The popularity of positive CBT was attributable to 4 influences: feeling good and empowered, benefitting from upward spiral effects of positive emotions, learning to appreciate baby steps, and (re)discovering optimism as a personal strength. Qualitative analysis showed that, despite better moments and building positivity efficiently counters depressive symptoms and builds well-being. Clients perceived positive CBT's upbeat tone as stimulating and as motivating for change. (PsycInfo Database Record (c) 2020 APA, all rights reserved).",2020-09-01 +34765102,Diagnostic Accuracy and Usability of the ECG247 Smart Heart Sensor Compared to Conventional Holter Technology.,"

Background

Heart rhythm disorders, especially atrial fibrillation (AF), are increasing global health challenges. Conventional diagnostic tools for assessment of rhythm disorders suffer from limited availability, limited test duration time, and usability challenges. There is also a need for out-of-hospital investigation of arrhythmias. Therefore, the Norwegian ECG247 Smart Heart Sensor has been developed to simplify the assessment of heart rhythm disorders. The current study aimed to evaluate the diagnostic accuracy and usability of the ECG247 Smart Heart Sensor compared to conventional Holter monitors.

Methods

Parallel tests with ECG247 Smart Heart Sensor and a Holter monitor were performed in 151 consecutive patients referred for out-of-hospital long-term ECG recording at Sorlandet Hospital Arendal, Norway. All ECG data were automatically analysed by both systems and evaluated by hospital physicians. Participants were asked to complete a questionnaire scoring usability parameters after the test.

Results

A total of 150 patients (62% men, age 54 (±17) years) completed the study. The ECG quality from both monitors was considered satisfactory for rhythm analysis in all patients. AF was identified in 9 (6%) patients during the period with parallel tests. The diagnostic accuracy for automatic AF detection was 95% (95% CI 91-98) for the ECG247 Smart Heart Sensor and 81% (95% CI 74-87) for the Holter system. The proportion of false-positive AF was 4% in tests analysed by the ECG247 algorithm and 16% in tests analysed by the Holter algorithm. Other arrhythmias were absent/rare. The system usability score was significantly better for ECG247 Smart Heart Sensor compared to traditional Holter technology (score 87.4 vs. 67.5, p < 0.001).

Conclusions

The ECG247 Smart Heart Sensor showed at least comparable diagnostic accuracy for AF and improved usability compared to conventional Holter technology. ECG247 allows for prolonged monitoring and may improve detection of AF. This trial is registered with https://clinicaltrials.gov/ct2/show/NCT04700865.",2021-11-02 +29630066,"The Microbe Directory: An annotated, searchable inventory of microbes' characteristics.","The Microbe Directory is a collective research effort to profile and annotate more than 7,500 unique microbial species from the MetaPhlAn2 database that includes bacteria, archaea, viruses, fungi, and protozoa. By collecting and summarizing data on various microbes' characteristics, the project comprises a database that can be used downstream of large-scale metagenomic taxonomic analyses, allowing one to interpret and explore their taxonomic classifications to have a deeper understanding of the microbial ecosystem they are studying. Such characteristics include, but are not limited to: optimal pH, optimal temperature, Gram stain, biofilm-formation, spore-formation, antimicrobial resistance, and COGEM class risk rating. The database has been manually curated by trained student-researchers from Weill Cornell Medicine and CUNY-Hunter College, and its analysis remains an ongoing effort with open-source capabilities so others can contribute. Available in SQL, JSON, and CSV (i.e. Excel) formats, the Microbe Directory can be queried for the aforementioned parameters by a microorganism's taxonomy. In addition to the raw database, The Microbe Directory has an online counterpart ( https://microbe.directory/) that provides a user-friendly interface for storage, retrieval, and analysis into which other microbial database projects could be incorporated. The Microbe Directory was primarily designed to serve as a resource for researchers conducting metagenomic analyses, but its online web interface should also prove useful to any individual who wishes to learn more about any particular microbe.",2018-01-05 +29800349,"GDA, a web-based tool for Genomics and Drugs integrated analysis.","Several major screenings of genetic profiling and drug testing in cancer cell lines proved that the integration of genomic portraits and compound activities is effective in discovering new genetic markers of drug sensitivity and clinically relevant anticancer compounds. Despite most genetic and drug response data are publicly available, the availability of user-friendly tools for their integrative analysis remains limited, thus hampering an effective exploitation of this information. Here, we present GDA, a web-based tool for Genomics and Drugs integrated Analysis that combines drug response data for >50 800 compounds with mutations and gene expression profiles across 73 cancer cell lines. Genomic and pharmacological data are integrated through a modular architecture that allows users to identify compounds active towards cancer cell lines bearing a specific genomic background and, conversely, the mutational or transcriptional status of cells responding or not-responding to a specific compound. Results are presented through intuitive graphical representations and supplemented with information obtained from public repositories. As both personalized targeted therapies and drug-repurposing are gaining increasing attention, GDA represents a resource to formulate hypotheses on the interplay between genomic traits and drug response in cancer. GDA is freely available at http://gda.unimore.it/.",2018-07-01 +33978686,Full-length de novo protein structure determination from cryo-EM maps using deep learning. ,"Advances in microscopy instruments and image processing algorithms have led to an increasing number of cryo-EM maps. However, building accurate models for the EM maps at 3-5 Å resolution remains a challenging and time-consuming process. With the rapid growth of deposited EM maps, there is an increasing gap between the maps and reconstructed/modeled 3-dimensional (3D) structures. Therefore, automatic reconstruction of atomic-accuracy full-atomstructures fromEMmaps is pressingly needed. We present a semi-automatic de novo structure determination method using a deep learningbased framework, named as DeepMM, which builds atomic-accuracy all-atom models from cryo-EM maps at near-atomic resolution. In our method, the main-chain and Cα positions as well as their amino acid and secondary structure types are predicted in the EM map using Densely Connected Convolutional Networks. DeepMM was extensively validated on 40 simulated maps at 5 Å resolution and 30 experimental maps at 2.6-4.8 Å resolution as well as an EMDB-wide data set of 2931 experimental maps at 2.6-4.9 Å resolution, and compared with state-of-the-art algorithms including RosettaES, MAINMAST, and Phenix. Overall, our DeepMM algorithm obtained a significant improvement over existing methods in terms of both accuracy and coverage in building full-length protein structures on all test sets, demonstrating the efficacy and general applicability of DeepMM. http://huanglab.phys.hust.edu.cn/DeepMM. Supplementary data are available at Bioinformatics online.",2021-05-12 +28529078,HCSGD: An integrated database of human cellular senescence genes.,"Cellular senescence is an irreversible cell cycle arrest program in response to various exogenous and endogenous stimuli like telomere dysfunction and DNA damage. It has been widely accepted as an anti-tumor program and is also found closely related to embryo development, tissue repair, organismal aging and age-related degenerative diseases. In the past decades, numerous efforts have been made to uncover the gene regulatory mechanisms of cellular senescence. There is a strong demand to integrate these data from various resources into one open platform. To facilitate researchers on cellular senescence, we have developed Human Cellular Senescence Gene Database (HCSGD) by integrating multiple online published data sources into a comprehensive senescence gene annotation platform (http://bioinfo.au.tsinghua.edu.cn/member/xwwang/HCSGD). Potential Human Cellular Senescence Genes (HCSGS) were collected by combining information from published literatures, gene expression profiling data and Protein-Protein Interaction networks. Additionally, genes are annotated with gene ontology annotation and microRNA/drug/compound target information. HCSGD provides a valuable resource to visualize cellular senescence gene networks, browse annotated functional information, and retrieve senescence-associated genes with a user-friendly web interface.",2017-04-29 +32248093,TE141K: Artistic Text Benchmark for Text Effect Transfer.,"Text effects are combinations of visual elements such as outlines, colors and textures of text, which can dramatically improve its artistry. Although text effects are extensively utilized in the design industry, they are usually created by human experts due to their extreme complexity; this is laborious and not practical for normal users. In recent years, some efforts have been made toward automatic text effect transfer; however, the lack of data limits the capabilities of transfer models. To address this problem, we introduce a new text effects dataset, TE141K1 1.Project page: https://daooshee.github.io/TE141K/. with 141,081 text effect/glyph pairs in total. Our dataset consists of 152 professionally designed text effects rendered on glyphs, including English letters, Chinese characters, and Arabic numerals. To the best of our knowledge, this is the largest dataset for text effect transfer to date. Based on this dataset, we propose a baseline approach called text effect transfer GAN (TET-GAN), which supports the transfer of all 152 styles in one model and can efficiently extend to new styles. Finally, we conduct a comprehensive comparison in which 14 style transfer models are benchmarked. Experimental results demonstrate the superiority of TET-GAN both qualitatively and quantitatively and indicate that our dataset is effective and challenging.",2021-09-02 +34693926,Concordance Among 10 Different Anticholinergic Burden Scales in At-Risk Older Populations.,"

Objective

The aim of the study was to evaluate the concordance among 10 anticholinergic scales for the measurement of anticholinergic drug exposure in at-risk elderly complex chronic patients in primary care.

Methods

An 8-month cross-sectional, multicenter study was carried out in a cohort of complex chronic patients older than 65 years in treatment with at least 1 drug with anticholinergic activity. Demographic, pharmacological, and clinical data were collected. Anticholinergic burden and risk were detected using the 10 scales included on the anticholinergic burden calculator (http://www.anticholinergicscales.es/). We used κ statistics to evaluated the concordance 2 to 2 (according to risk: high, medium, low or without risk) among the included scales.

Results

Four hundred seventy-three patients were recruited (60.3% female, median age of 84 years [interquartile range = 10]). Eighty was the total number of anticholinergic drugs with any scale (1197 prescriptions), with a median of 2 drugs with anticholinergic activity per patient (interquartile range = 2). The κ statistics comparing all the 10 scales ranged from -0.175 (Drug Burden Index versus Chew Scale) to 0.708 (Anticholinergic Activity Scale [AAS] versus Chew Scale). The best concordance was obtained between AAS and Chew Scale (κ = 0.708), followed by Clinician-Rated Anticholinergic Scale and Duran Scale (κ = 0.632) and AAS and Anticholinergic Cognitive Burden Scale (κ = 0.618), being considered substantial strengths of concordance.

Conclusions

The agreement among the 10 scales in elderly patients with complex chronic conditions was highly variable. Great care should be taken when assessing anticholinergic drug exposure using existing scales because of the wide variability among them. The only scales that showed agreement were the AAS-Chew, Clinician-Rated Anticholinergic Scale-Duran, and AAS-Anticholinergic Cognitive Burden Scale pairs. In the rest of the cases, the scales are not interchangeable.",2021-10-22 +34184499,Genetic profile for the detection of susceptibility to poisoning by exposure to pesticides.,"

Introduction

In humans, there are sets of genes that encode enzymes that decrease or increase the risks derived from exposure to pesticides. These include DNA repair genes (XRCC1, OGG1 and XRCC4); pesticide metabolizers (GSTP1 and PON1), and genes that act against oxidative stress (SOD2 and NQO1).

Objective

The aim of this literature review is to provide information about the genes involved in the defence systems against exposure to pesticides, as well as their polymorphisms, functions, and general characteristics of the encoded enzymes.

Material and methods

Information was obtained from scientific articles published between 2015-2020 in the PubMed database (https://pubmed.ncbi.nlm.nih.gov).

Results

Genes related to the defence processes against pesticides present single-nucleotide polymorphisms (SNPs) with allelic variants that affect the expressions or structures of the encoded enzymes, negatively altering their activities. If we knew the genetic profile that includes polymorphisms of DNA-repairing genes, metabolizing genes, and genes against oxidative stress in subjects exposed to pesticides, we would also know about their susceptibility to poisoning caused by these chemicals.

Conclusions

The genes could be used to propose a genetic profile in farmers exposed to various pesticides, including 10 gene polymorphisms involved in susceptibility to various pathologies related to DNA repair, xenobiotic metabolism, and oxidative stress. It could also be useful as a preventive measure to identify susceptibility to pesticide poisoning.",2021-05-24 +34029128,Orthographic Support for Word Learning in Clinical Populations: A Systematic Review.,"Purpose A systematic review was performed to determine the extent to which orthographic facilitation, a strategy to improve word learning, has been demonstrated in the literature for children and adolescents from clinical categories such as developmental language disorders (DLD), autism spectrum disorders (ASD), Down syndrome, dyslexia, hearing impairment, intellectual disability, and cerebral palsy. Method Five databases were searched for all studies published through December 2019. Eligible studies included participants from a clinical population (DLD, ASD, dyslexia, cerebral palsy, Down syndrome, hearing impairment, etc.) and compared word learning with and without orthography. Selected studies were extracted for pertinent information. In addition, assessment of the methodological rigor was performed for each study. Results The review yielded five studies that targeted word learning with orthographic facilitation for children from various clinical populations including DLD, verbal children with autism, Down syndrome, and dyslexia. All studied populations showed a benefit for word learning in picture naming posttests when words were trained in the presence of orthography. Conclusions For the studied populations, training words in the presence of orthography will improve word learning accuracy and retention. The review highlights the need for more research in this area across other clinical populations. Supplemental Material https://doi.org/10.23641/asha.14632791.",2021-05-24 +34111737,"Release LTP_12_2020, featuring a new ARB alignment and improved 16S rRNA tree for prokaryotic type strains.","The new release of the All-Species Living Tree Project (LTP) represents an important step forward in the reconstruction of 16S rRNA gene phylogenies, since we not only provide an updated set of type strain sequences until December 2020, but also a series of improvements that increase the quality of the database. An improved universal alignment has been introduced that is implemented in the ARB format. In addition, all low-quality sequences present in the previous releases have been substituted by new entries with higher quality, many of them as a result of whole genome sequencing. Altogether, the improvements in the dataset and 16S rRNA sequence alignment allowed us to reconstruct robust phylogenies. The trees made available through this current LTP release feature the best topologies currently achievable. The given nomenclature and taxonomic hierarchy reflect all the changes available up to December 2020. The aim is to regularly update the validly published nomenclatural classification changes and new taxa proposals. The new release can be found at the following URL: https://imedea.uib-csic.es/mmg/ltp/.",2021-05-24 +29512401,GENIPAC: A Genomic Information Portal for Head and Neck Cancer Cell Systems.,"Head and neck cancer (HNC)-derived cell lines represent fundamental models for studying the biological mechanisms underlying cancer development and precision therapies. However, mining the genomic information of HNC cells from available databases requires knowledge on bioinformatics and computational skill sets. Here, we developed a user-friendly web resource for exploring, visualizing, and analyzing genomics information of commonly used HNC cell lines. We populated the current version of GENIPAC with 44 HNC cell lines from 3 studies: ORL Series, OPC-22, and H Series. Specifically, the mRNA expressions for all the 3 studies were derived with RNA-seq. The copy number alterations analysis of ORL Series was performed on the Genome Wide Human Cytoscan HD array, while copy number alterations for OPC-22 were derived from whole exome sequencing. Mutations from ORL Series and H Series were derived from RNA-seq information, while OPC-22 was based on whole exome sequencing. All genomic information was preprocessed with customized scripts and underwent data validation and correction through data set validator tools provided by cBioPortal. The clinical and genomic information of 44 HNC cell lines are easily assessable in GENIPAC. The functional utility of GENIPAC was demonstrated with some of the genomic alterations that are commonly reported in HNC, such as TP53, EGFR, CCND1, and PIK3CA. We showed that these genomic alterations as reported in The Cancer Genome Atlas database were recapitulated in the HNC cell lines in GENIPAC. Importantly, genomic alterations within pathways could be simultaneously visualized. We developed GENIPAC to create access to genomic information on HNC cell lines. This cancer omics initiative will help the research community to accelerate better understanding of HNC and the development of new precision therapeutic options for HNC treatment. GENIPAC is freely available at http://genipac.cancerresearch.my/ .",2018-03-07 +34558272,"The risk of blood-borne infections in Poland - opportunities and threats to public health, nationwide qualitative research.","

Introduction

The article presents the diagnosis of the problem of blood-borne infections in Poland from perspectives of experts' opinions at the voivodship level. The evaluation became the basis for subsequent analysis, aimed at creating assumptions for the proposed strategies to prevent blood-borne infections in Poland.

Material and methods

Diagnosis was based on the expertise of practitioners in epidemiology and service providers. Analysis covered assessment of service safety, examples, recommended practices, forms and scope of education. Also covered is information for the general public from different aspects: points of view of recipients and service providers, legislation, system organization, and finances. The SWOT method was used in analysis. The opportunities and threats concerning the risk of blood-borne diseases for the service sector are presented, as well as data gathered from 42 representative experts from across Poland.

Results

Databases on health indicators, covered by the reporting obligation of all diagnosed cases, are a crucial element of the surveillance system in public health (e.g. sentinel). Additional information on health care management (risk management) is not a common and routine approach. The study fills a gap in knowledge about risk management in the medical and non-medical services sector. The information also enriches education programmes (e.g. http://www.hcv.pzh.gov.pl/).

Conclusions

Currently, the evidence-based approaches in medicine and in public health are standard. Experts emphasize that the opportunities for the prevention of infection risk are linked to technological advances and innovations, while threats are seen in both financial and organizational constraints, and the non-normalized, dynamically developing service market.",2020-07-16 +34406374,The role of CpG island methylator phenotype in the clinical course of hepatocellular carcinoma. ,"Aberrant DNA methylation is strongly associated with heterogeneity in tumors. This study investigated the prognostic value of CpG island methylator phenotype (CIMP) in hepatocellular carcinoma (HCC). A total of 319 HCC samples with 21,121 CpG sites were included in this study and 215 disease-free survival (DFS) and overall survival (OS)-related CpG sites were identified. These CpG sites were divided into 7 clusters by using consensus clustering method. Cluster 4, which constructed the prognostic prediction model as the seed cluster to evaluate survival risk for DFS and OS of HCC patients, had the lowest methylation level with the worse prognosis. The low-risk group patients had a significantly prolonged DFS and OS than the patients in the high-risk group (p = 0.008 and p < 0.001, respectively). A receiver operating characteristic curve results for predicting DFS and OS was 0.691 and 0.695, respectively. These results suggested that the CpG site methylation appears to be an informative prognostic biomarker in HCC. The CpG site methylation-related prognostic model may be an innovative insight to evaluate clinical outcomes for HCC patients. The code of the analysis is available at https://www.bioconductor.org. Supplementary data are available at Bioinformatics online.",2021-08-18 +,A tutorial of diverse genome analysis tools found in the CoGe web-platform using Plasmodium spp. as a model,"Abstract Integrated platforms for storage, management, analysis and sharing of large quantities of omics data have become fundamental to comparative genomics. CoGe (https://genomevolution.org/coge/) is an online platform designed to manage and study genomic data, enabling both data- and hypothesis-driven comparative genomics. CoGe’s tools and resources can be used to organize and analyse both publicly available and private genomic data from any species. Here, we demonstrate the capabilities of CoGe through three example workflows using 17 Plasmodium genomes as a model. Plasmodium genomes present unique challenges for comparative genomics due to their rapidly evolving and highly variable genomic AT/GC content. These example workflows are intended to serve as templates to help guide researchers who would like to use CoGe to examine diverse aspects of genome evolution. In the first workflow, trends in genome composition and amino acid usage are explored. In the second, changes in genome structure and the distribution of synonymous (Ks) and non-synonymous (Kn) substitution values are evaluated across species with different levels of evolutionary relatedness. In the third workflow, microsyntenic analyses of multigene families’ genomic organization are conducted using two Plasmodium-specific gene families—serine repeat antigen, and cytoadherence-linked asexual gene—as models. In general, these example workflows show how to achieve quick, reproducible and shareable results using the CoGe platform. We were able to replicate previously published results, as well as leverage CoGe’s tools and resources to gain additional insight into various aspects of Plasmodium genome evolution. Our results highlight the usefulness of the CoGe platform, particularly in understanding complex features of genome evolution. Database URL: https://genomevolution.org/coge/",2018-01-01 +,Occurrence of the Stunt Nematode Neodolichorhynchus sulcatus as Pathogen of Pepper (Capsicum annuum) in Israel,"Stunt nematodes, family Dolichodoridae, are migratory ectoparasites of roots. They feed on epidermal cells by inserting only the stylet tip into the cell’s surface tissue. As one of the less common stunt nematodes, little is known about Neodolichorhynchus sulcatus (de Guiran, 1967) Jairajpuri & Hunt, 1984. To date, it has been reported from Morocco (de Guiran 1967), Spain (Tobar-Jiménez 1970), Cameroon (Sakwe and Geraert 1991), India (Sultan et al. 1995), and Iran (Pourjam et al. 2011). This is the first detection of N. sulcatus infecting pepper (variety 1204 Alef Beit Zeraim) root from a farm in Ein Yahav, Arava Rift, Israel (30°39′54.3′′N, 35°15′02.9′′E). In March 2019, we observed decline in development of pepper plants. We recovered nematodes from the pepper root zone at high population density. Nematodes were extracted from the soil using the Baermann tray technique. Identification was based on a combination of molecular and morphological methods. Genomic DNA was extracted from a single fresh nematode; amplicons from 18S rRNA, 28S D2 to D3 rRNA, and the intergenic spacer (ITS) region were generated following the procedure detailed in Qing et al. (2019); and sequencing was performed. The sequences were submitted to GenBank with accession numbers MK96525 to MK965256 (28S), MK965252 (18S), and MK965249 and MK965250 (ITS), providing the first rRNA data for N. sulcatus. Phylogenetic analysis placed this species as sister to Bitylenchus iphilus in 18S (98.15% similar in BLAST), sister to Paratrophurus bhutanensis in 28S (91.68% similar in BLAST), or sister to a well-supported clade containing B. iphilus (91.35% similar in BLAST, highest match), B. maximus, B. hispaniensis, P. bhutanensis, and P. bursifer in ITS (figures for phylogeny trees available at https://photos.app.goo.gl/2Y7fToEftBopbnB67). Although our species were not clustered with other Neodolichorhynchus species, morphology and morphometry confirmed its identity as N. sulcatus, including the cephalic region offset, basal bulb pyriform, a cuticle with 16 longitudinal ridges including the lateral fields, irregularly areolated lateral fields with three ridges, bursa not notched at tail tip, gubernaculum with smooth proximal end, and vulva lacking lateral flaps, tail cylindroid-conical with rounded and smooth terminus. Measurements (mean ± standard error, range, in μm) for female (n = 10): body length 801 ± 49.4 (731 to 901), anterior end to vulva 435 ± 34.8 (410 to 520), V value 54.2 ± 1.69 (52.4 to 57.7), lip height 4.53 ± 0.31 (4.39 to 5.41), stylet length 21.4 ± 0.37 (20.6 to 21.8), cone of stylet 11.4 ± 0.63 (10.7 to 12.8), anterior end to center of median bulb 86.0 ± 3.53 (78.7 to 91.3), pharynx length 146 ± 4.05 (139 to 153), maximum body width 25.6 ± 1.00 (24 to 27), anus/cloacal width 15.7 ± 1.39 (14.3 to 17.9), tail length 47.2 ± 3.85 (43.4 to 56.5); for male (n = 7): body length 786 ± 64.0 (704 to 874), lip height 4.34 ± 0.55 (3.67 to 5.24), stylet length 20.4 ± 0.73 (19.7 to 21.7), cone of stylet 10.9 ± 0.82 (10.3 to 12.7), anterior end to median bulb 81.2 ± 3.54 (75.2 to 87), pharynx length 138 ± 6.66 (132 to 148), maximum body width 22.6 ± 2.15 (20.4 to 25.8), anus/cloacal width 17.7 ± 1.50 (15.2 to 19.9), tail length 54.7 ± 5.67 (46.6 to 62.6), spicule length 26.3 ± 1.21 (24.2 to 27.6). To confirm pathogenicity, we performed inoculation assays in greenhouse conditions. Individual nematodes were manually picked, and 300 juveniles were inoculated onto healthy pepper (variety Maccabi) roots growing in sterile soil in a greenhouse. Three pots with four pepper plants per pot were inoculated, and three noninoculated pots served as a control. Nematodes were harvested from the root and soil 40 days after inoculation, yielding an average of 483 ± 75 nematodes per pot. We observed a reduction in plant growth and necrotic spots on the roots similar to those infected pepper in Ein Yahav. These results confirmed the nematode’s pathogenicity to pepper. This is the first report of N. sulcatus infecting pepper plants in Israel.",2020-02-01 +26919060,MSeqDR: A Centralized Knowledge Repository and Bioinformatics Web Resource to Facilitate Genomic Investigations in Mitochondrial Disease.,"MSeqDR is the Mitochondrial Disease Sequence Data Resource, a centralized and comprehensive genome and phenome bioinformatics resource built by the mitochondrial disease community to facilitate clinical diagnosis and research investigations of individual patient phenotypes, genomes, genes, and variants. A central Web portal (https://mseqdr.org) integrates community knowledge from expert-curated databases with genomic and phenotype data shared by clinicians and researchers. MSeqDR also functions as a centralized application server for Web-based tools to analyze data across both mitochondrial and nuclear DNA, including investigator-driven whole exome or genome dataset analyses through MSeqDR-Genesis. MSeqDR-GBrowse genome browser supports interactive genomic data exploration and visualization with custom tracks relevant to mtDNA variation and mitochondrial disease. MSeqDR-LSDB is a locus-specific database that currently manages 178 mitochondrial diseases, 1,363 genes associated with mitochondrial biology or disease, and 3,711 pathogenic variants in those genes. MSeqDR Disease Portal allows hierarchical tree-style disease exploration to evaluate their unique descriptions, phenotypes, and causative variants. Automated genomic data submission tools are provided that capture ClinVar compliant variant annotations. PhenoTips will be used for phenotypic data submission on deidentified patients using human phenotype ontology terminology. The development of a dynamic informed patient consent process to guide data access is underway to realize the full potential of these resources.",2016-03-21 +31605615,LeGOO: An Expertized Knowledge Database for the Model Legume Medicago truncatula.,"Medicago truncatula was proposed, about three decades ago, as a model legume to study the Rhizobium-legume symbiosis. It has now been adopted to study a wide range of biological questions, including various developmental processes (in particular root, symbiotic nodule and seed development), symbiotic (nitrogen-fixing and arbuscular mycorrhizal endosymbioses) and pathogenic interactions, as well as responses to abiotic stress. With a number of tools and resources set up in M. truncatula for omics, genetics and reverse genetics approaches, massive amounts of data have been produced, as well as four genome sequence releases. Many of these data were generated with heterogeneous tools, notably for transcriptomics studies, and are consequently difficult to integrate. This issue is addressed by the LeGOO (for Legume Graph-Oriented Organizer) knowledge base (https://www.legoo.org), which finds the correspondence between the multiple identifiers of the same gene. Furthermore, an important goal of LeGOO is to collect and represent biological information from peer-reviewed publications, whatever the technical approaches used to obtain this information. The information is modeled in a graph-oriented database, which enables flexible representation, with currently over 200,000 relations retrieved from 298 publications. LeGOO also provides the user with mining tools, including links to the Mt5.0 genome browser and associated information (on gene functional annotation, expression, methylome, natural diversity and available insertion mutants), as well as tools to navigate through different model species. LeGOO is, therefore, an innovative database that will be useful to the Medicago and legume community to better exploit the wealth of data produced on this model species.",2020-01-01 +34404338,The Development of the Love and Respect Marriage Scale.,"The constructs of love and respect have been known to be essential ingredients contributing positively to marital satisfaction, but to-date they have mostly been measured using separate scales. However, given the overlap between both constructs this study set out, using self-report methodologies, to develop a comprehensive scale which measures both love and respect known as The Love and Respect Marriage Scale. Using a nonclinical community Singapore sample (n = 400), an initial item pool was developed, and through exploratory factor analysis, a robust factor structure emerged that consisted of eight subscales and 46 items. This factor structure was shown to be a consistent and cross-culturally acceptable model using samples from USA, n = 396, South Africa, n = 390, Nigeria, n = 364, and India, n = 306. Good reliability values were achieved. Construct, convergent, divergent, and incremental validity were also demonstrated as comparisons were made with shorter established marriage scales. Implications and advantages of a longer marital scale were discussed.Supplemental data for this article is available online at https://doi.org/10.1080/0092623X.2021.1963362 .",2021-08-17 +,First Report of Shoot Blight Caused by Neoscytalidium dimidiatum on Citrus in Jordan,"In October 2017, a new disease was reported to the National Agricultural Research Center by a citrus farmer in Northern Shoneh, Irbid, Jordan. Symptoms were noticed on different citrus trees grafted on two rootstocks (sour orange [SO] and Citrus volkameriana [V]). The trees were clementine (C. clementina), grapefruit (C. paradisi), and pummelo (shaddock) (C. maxima); 300 trees were affected and were removed (2 of 22 ha) to reduce the inoculum. Affected trees suffered from dryness and death of branches (shoot dieback), discoloration of the vascular tissue, and sometimes gummosis. A primary survey showed that ∼10% of citrus trees in that area exhibited symptoms. Samples from different infected citrus types showing shoot dieback were collected. Shoots were cut into small pieces, disinfected with 70% ethanol for 1 min, 5% NaOCl for 3 min, and sterile distilled water for 3 min. Pieces were plated on PDA and incubated for 10 days at 25°C. For morphological characterization, 10 isolates from different citrus hosts were used. Colonies were olive green to greyish, grew rapidly, and colonized the PDA plate within 3 to 5 days. Hyphae were branched and septate. Conidia were hyaline, ellipsoid to ovoid, without septa or with 1 septum (Fernández-Herrera et al. 2017; Hajlaoui et al. 2018; Nouri et al. 2018). For molecular identification, one representative isolate from clementine grafted on SO was used. Mycelia of a single conidial culture were harvested and DNA extracted using a CTAB protocol (Doyle and Doyle 1990). Internal transcribed spacer (ITS1/ITS4) (White et al. 1990) and β-tubulin (BT) (Bt2a/Bt2b) (Glass and Donaldson 1995) gene regions were amplified and sent for sequencing at Macrogen, South Korea. Sequences were edited via Geneious 6.1.6 (https://www.geneious.com) and BLASTn at the NCBI website. Similar accessions were considered and aligned with the sequence of the fungus, and a phylogenic tree was created using Geneious 6.1.6. DNA sequences were deposited in GenBank (ITS, MK530327; BT, MK830061). The matched sequences were from Neoscytalidium dimidiatum (MH861121.1 for ITS and MH643766.1 for BT, with 92.33 and 100% homology, respectively). Based on morphological and molecular data, the isolates were identified as N. dimidiatum (Penz.) Crous & Slippers. Pathogenicity tests were conducted on 24 two-year-old potted citrus cultivars (lemon, navel orange, clementine, and pummelo) grafted on two rootstocks (SO and V). Three replicates/cultivar/rootstock and one control/cultivar/rootstock were used. On each test plant, three branches were inoculated. Agar plugs (5 mm diam.) of the fungus were inoculated under the bark using a 2-cm T-shaped wound and then Parafilmed three times (Nouri et al. 2018). Plants were placed at environmental temperature and >70% RH. Controls were inoculated with PDA plugs only. Branch dryness and discoloration were observed within 6 months on all inoculated plants. Shoot blight (dryness) was noticed on both rootstocks and on different citrus cultivars; however, death percentage varied significantly. The overall average death percentage was 64.58 and 83.33% for SO and V rootstocks, respectively. For SO rootstock it was 66.70, 75.00, 58.30, and 58.30% and for V rootstock 66.70, 83.30, 91.67, and 91.67% for lemon, mandarin, pummelo, and orange, respectively. Control plants remained healthy. N. dimidiatum was reisolated from the infected plants and identified morphologically. This is the first record of a disease caused by N. dimidiatum on citrus in Jordan. The disease was first reported on citrus in Italy (Polizzi et al. 2009). Citrus trees are important in the Jordan Valley; the total area planted with different types exceeds 6,400 ha, and production is >125,000 metric tons. This represents ∼10% of total fruit tree area and 22% of fruit tree production. Future studies are needed to survey the northern Jordan Valley and nurseries for prevalence and severity, study the genetic diversity of the pathogen, and implement management strategies.",2020-02-01 +34183376,"Development of Robust Quantitative Structure-Activity Relationship Models for CYP2C9, CYP2D6, and CYP3A4 Catalysis and Inhibition.","Cytochrome P450 enzymes are responsible for the metabolism of >75% of marketed drugs, making it essential to identify the contributions of individual cytochromes P450 to the total clearance of a new candidate drug. Overreliance on one cytochrome P450 for clearance levies a high risk of drug-drug interactions; and considering that several human cytochrome P450 enzymes are polymorphic, it can also lead to highly variable pharmacokinetics in the clinic. Thus, it would be advantageous to understand the likelihood of new chemical entities to interact with the major cytochrome P450 enzymes at an early stage in the drug discovery process. Typical screening assays using human liver microsomes do not provide sufficient information to distinguish the specific cytochromes P450 responsible for clearance. In this regard, we experimentally assessed the metabolic stability of ∼5000 compounds for the three most prominent xenobiotic metabolizing human cytochromes P450, i.e., CYP2C9, CYP2D6, and CYP3A4, and used the data sets to develop quantitative structure-activity relationship models for the prediction of high-clearance substrates for these enzymes. Screening library included the NCATS Pharmaceutical Collection, comprising clinically approved low-molecular-weight compounds, and an annotated library consisting of drug-like compounds. To identify inhibitors, the library was screened against a luminescence-based cytochrome P450 inhibition assay; and through crossreferencing hits from the two assays, we were able to distinguish substrates and inhibitors of these enzymes. The best substrate and inhibitor models (balanced accuracies ∼0.7), as well as the data used to develop these models, have been made publicly available (https://opendata.ncats.nih.gov/adme) to advance drug discovery across all research groups. SIGNIFICANCE STATEMENT: In drug discovery and development, drug candidates with indiscriminate cytochrome P450 metabolic profiles are considered advantageous, since they provide less risk of potential issues with cytochrome P450 polymorphisms and drug-drug interactions. This study developed robust substrate and inhibitor quantitative structure-activity relationship models for the three major xenobiotic metabolizing cytochromes P450, i.e., CYP2C9, CYP2D6, and CYP3A4. The use of these models early in drug discovery will enable project teams to strategize or pivot when necessary, thereby accelerating drug discovery research.",2021-06-28 +32150667,Development and validation of a 10-gene prognostic signature for acute myeloid leukaemia.,"Acute myeloid leukaemia (AML) is the most common type of adult acute leukaemia and has a poor prognosis. Thus, optimal risk stratification is of greatest importance for reasonable choice of treatment and prognostic evaluation. For our study, a total of 1707 samples of AML patients from three public databases were divided into meta-training, meta-testing and validation sets. The meta-training set was used to build risk prediction model, and the other four data sets were employed for validation. By log-rank test and univariate COX regression analysis as well as LASSO-COX, AML patients were divided into high-risk and low-risk groups based on AML risk score (AMLRS) which was constituted by 10 survival-related genes. In meta-training, meta-testing and validation sets, the patient in the low-risk group all had a significantly longer OS (overall survival) than those in the high-risk group (P < .001), and the area under ROC curve (AUC) by time-dependent ROC was 0.5854-0.7905 for 1 year, 0.6652-0.8066 for 3 years and 0.6622-0.8034 for 5 years. Multivariate COX regression analysis indicated that AMLRS was an independent prognostic factor in four data sets. Nomogram combining the AMLRS and two clinical parameters performed well in predicting 1-year, 3-year and 5-year OS. Finally, we created a web-based prognostic model to predict the prognosis of AML patients (https://tcgi.shinyapps.io/amlrs_nomogram/).",2020-03-09 +33866367,usDSM: a novel method for deleterious synonymous mutation prediction using undersampling scheme. ,"Although synonymous mutations do not alter the encoded amino acids, they may impact protein function by interfering with the regulation of RNA splicing or altering transcript splicing. New progress on next-generation sequencing technologies has put the exploration of synonymous mutations at the forefront of precision medicine. Several approaches have been proposed for predicting the deleterious synonymous mutations specifically, but their performance is limited by imbalance of the positive and negative samples. In this study, we firstly expanded the number of samples greatly from various data sources and compared six undersampling strategies to solve the problem of the imbalanced datasets. The results suggested that cluster centroid is the most effective scheme. Secondly, we presented a computational model, undersampling scheme based method for deleterious synonymous mutation (usDSM) prediction, using 14-dimensional biology features and random forest classifier to detect the deleterious synonymous mutation. The results on the test datasets indicated that the proposed usDSM model can attain superior performance in comparison with other state-of-the-art machine learning methods. Lastly, we found that the deep learning model did not play a substantial role in deleterious synonymous mutation prediction through a lot of experiments, although it achieves superior results in other fields. In conclusion, we hope our work will contribute to the future development of computational methods for a more accurate prediction of the deleterious effect of human synonymous mutation. The web server of usDSM is freely accessible at http://usdsm.xialab.info/.",2021-09-01 +27899644,The dbGaP data browser: a new tool for browsing dbGaP controlled-access genomic data.,"The database of Genotypes and Phenotypes (dbGaP) Data Browser (https://www.ncbi.nlm.nih.gov/gap/ddb/) was developed in response to requests from the scientific community for a resource that enable view-only access to summary-level information and individual-level genotype and sequence data associated with phenotypic features maintained in the controlled-access tier of dbGaP. Until now, the dbGaP controlled-access environment required investigators to submit a data access request, wait for Data Access Committee review, download each data set and locally examine them for potentially relevant information. Existing unrestricted-access genomic data browsing resources (e.g. http://evs.gs.washington.edu/EVS/, http://exac.broadinstitute.org/) provide only summary statistics or aggregate allele frequencies. The dbGaP Data Browser serves as a third solution, providing researchers with view-only access to a compilation of individual-level data from general research use (GRU) studies through a simplified controlled-access process. The National Institutes of Health (NIH) will continue to improve the Browser in response to user feedback and believes that this tool may decrease unnecessary download requests, while still facilitating responsible genomic data-sharing.",2016-11-29 +29106642,"MSDD: a manually curated database of experimentally supported associations among miRNAs, SNPs and human diseases.","The MiRNA SNP Disease Database (MSDD, http://www.bio-bigdata.com/msdd/) is a manually curated database that provides comprehensive experimentally supported associations among microRNAs (miRNAs), single nucleotide polymorphisms (SNPs) and human diseases. SNPs in miRNA-related functional regions such as mature miRNAs, promoter regions, pri-miRNAs, pre-miRNAs and target gene 3'-UTRs, collectively called 'miRSNPs', represent a novel category of functional molecules. miRSNPs can lead to miRNA and its target gene dysregulation, and resulting in susceptibility to or onset of human diseases. A curated collection and summary of miRSNP-associated diseases is essential for a thorough understanding of the mechanisms and functions of miRSNPs. Here, we describe MSDD, which currently documents 525 associations among 182 human miRNAs, 197 SNPs, 153 genes and 164 human diseases through a review of more than 2000 published papers. Each association incorporates information on the miRNAs, SNPs, miRNA target genes and disease names, SNP locations and alleles, the miRNA dysfunctional pattern, experimental techniques, a brief functional description, the original reference and additional annotation. MSDD provides a user-friendly interface to conveniently browse, retrieve, download and submit novel data. MSDD will significantly improve our understanding of miRNA dysfunction in disease, and thus, MSDD has the potential to serve as a timely and valuable resource.",2018-01-01 +33602119,"MeltingPlot, a user-friendly online tool for epidemiological investigation using High Resolution Melting data.","

Background

The rapid identification of pathogen clones is pivotal for effective epidemiological control strategies in hospital settings. High Resolution Melting (HRM) is a molecular biology technique suitable for fast and inexpensive pathogen typing protocols. Unfortunately, the mathematical/informatics skills required to analyse HRM data for pathogen typing likely limit the application of this promising technique in hospital settings.

Results

MeltingPlot is the first tool specifically designed for epidemiological investigations using HRM data, easing the application of HRM typing to large real-time surveillance and rapid outbreak reconstructions. MeltingPlot implements a graph-based algorithm designed to discriminate pathogen clones on the basis of HRM data, producing portable typing results. The tool also merges typing information with isolates and patients metadata to create graphical and tabular outputs useful in epidemiological investigations and it runs in a few seconds even with hundreds of isolates.

Availability

https://skynet.unimi.it/index.php/tools/meltingplot/ .

Conclusions

The analysis and result interpretation of HRM typing protocols can be not trivial and this likely limited its application in hospital settings. MeltingPlot is a web tool designed to help the user to reconstruct epidemiological events by combining HRM-based clustering methods and the isolate/patient metadata. The tool can be used for the implementation of HRM based real time large scale surveillance programs in hospital settings.",2021-02-18 +33822870,ATSE: a peptide toxicity predictor by exploiting structural and evolutionary information based on graph neural network and attention mechanism. ,"Peptides have recently emerged as promising therapeutic agents against various diseases. For both research and safety regulation purposes, it is of high importance to develop computational methods to accurately predict the potential toxicity of peptides within the vast number of candidate peptides. In this study, we proposed ATSE, a peptide toxicity predictor by exploiting structural and evolutionary information based on graph neural networks and attention mechanism. More specifically, it consists of four modules: (i) a sequence processing module for converting peptide sequences to molecular graphs and evolutionary profiles, (ii) a feature extraction module designed to learn discriminative features from graph structural information and evolutionary information, (iii) an attention module employed to optimize the features and (iv) an output module determining a peptide as toxic or non-toxic, using optimized features from the attention module. Comparative studies demonstrate that the proposed ATSE significantly outperforms all other competing methods. We found that structural information is complementary to the evolutionary information, effectively improving the predictive performance. Importantly, the data-driven features learned by ATSE can be interpreted and visualized, providing additional information for further analysis. Moreover, we present a user-friendly online computational platform that implements the proposed ATSE, which is now available at http://server.malab.cn/ATSE. We expect that it can be a powerful and useful tool for researchers of interest.",2021-09-01 +33901284,Atomic-level evolutionary information improves protein-protein interface scoring. ,"The crucial role of protein interactions and the difficulty in characterising them experimentally strongly motivates the development of computational approaches for structural prediction. Even when protein-protein docking samples correct models, current scoring functions struggle to discriminate them from incorrect decoys. The previous incorporation of conservation and coevolution information has shown promise for improving protein-protein scoring. Here, we present a novel strategy to integrate atomic-level evolutionary information into different types of scoring functions to improve their docking discrimination. : We applied this general strategy to our residue-level statistical potential from InterEvScore and to two atomic-level scores, SOAP-PP and Rosetta interface score (ISC). Including evolutionary information from as few as ten homologous sequences improves the top 10 success rates of individual atomic-level scores SOAP-PP and Rosetta ISC by respectively 6 and 13.5 percentage points, on a large benchmark of 752 docking cases. The best individual homology-enriched score reaches a top 10 success rate of 34.4%. A consensus approach based on the complementarity between different homology-enriched scores further increases the top 10 success rate to 40%. All data used for benchmarking and scoring results, as well as a Singularity container of the pipeline, are available at http://biodev.cea.fr/interevol/interevdata/. Supplementary data are available at Bioinformatics online.",2021-04-26 +33900090,Pathway-Based Drug Repurposing with DPNetinfer: A Method to Predict Drug-Pathway Associations via Network-Based Approaches.,"Identification of drug-pathway associations plays an important role in pathway-based drug repurposing. However, it is time-consuming and costly to uncover new drug-pathway associations experimentally. The drug-induced transcriptomics data provide a global view of cellular pathways and tell how these pathways change under different treatments. These data enable computational approaches for large-scale prediction of drug-pathway associations. Here we introduced DPNetinfer, a novel computational method to predict potential drug-pathway associations based on substructure-drug-pathway networks via network-based approaches. The results demonstrated that DPNetinfer performed well in a pan-cancer network with an AUC (area under curve) = 0.9358. Meanwhile, DPNetinfer was shown to have a good capability of generalization on two external validation sets (AUC = 0.8519 and 0.7494, respectively). As a case study, DPNetinfer was used in pathway-based drug repurposing for cancer therapy. Unexpected anticancer activities of some nononcology drugs were then identified on the PI3K-Akt pathway. Considering tumor heterogeneity, seven primary site-based models were constructed by DPNetinfer in different drug-pathway networks. In a word, DPNetinfer provides a powerful tool for large-scale prediction of drug-pathway associations in pathway-based drug repurposing. A web tool for DPNetinfer is freely available at http://lmmd.ecust.edu.cn/netinfer/.",2021-04-26 +31742321,KofamKOALA: KEGG Ortholog assignment based on profile HMM and adaptive score threshold.,"

Summary

KofamKOALA is a web server to assign KEGG Orthologs (KOs) to protein sequences by homology search against a database of profile hidden Markov models (KOfam) with pre-computed adaptive score thresholds. KofamKOALA is faster than existing KO assignment tools with its accuracy being comparable to the best performing tools. Function annotation by KofamKOALA helps linking genes to KEGG resources such as the KEGG pathway maps and facilitates molecular network reconstruction.

Availability and implementation

KofamKOALA, KofamScan and KOfam are freely available from GenomeNet (https://www.genome.jp/tools/kofamkoala/).

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +31909114,Transcriptomic and proteomic data in developing tomato fruit.,"Transcriptomic and proteomic analyses were performed on three replicates of tomato fruit pericarp samples collected at nine developmental stages, each replicate resulting from the pooling of at least 15 fruits. For transcriptome analysis, Illumina-sequenced libraries were mapped on the tomato genome with the aim to obtain absolute quantification of mRNA abundance. To achieve this, spikes were added at the beginning of the RNA extraction procedure. From 34,725 possible transcripts identified in the tomato, 22,877 were quantified in at least one of the nine developmental stages. For the proteome analysis, label-free liquid chromatography coupled to tandem mass spectrometry (LC-MS/MS) was used. Peptide ions, and subsequently the proteins from which they were derived, were quantified by integrating the signal intensities obtained from extracted ion currents (XIC) with the MassChroQ software. Absolute concentrations of individual proteins were estimated for 2375 proteins by using a mixed effects model from log10-transformed intensities and normalized to the total protein content. Transcriptomics data are available via GEO repository with accession number GSE128739. The raw MS output files and identification data were deposited on-line using the PROTICdb database (http://moulon.inra.fr/protic/tomato_fruit_development) and MS proteomics data have also been deposited to the ProteomeXchange with the dataset identifier PXD012877. The main added value of these quantitative datasets is their use in a mathematical model to estimate protein turnover in developing tomato fruit.",2019-12-17 +34128961,PAX2GRAPHML: a Python library for large-scale regulation network analysis using BIOPAX. ,"PAX2GRAPHML is an open source Python library that allows to easily manipulate BioPAX source files as regulated reaction graphs described in .graphml format. The concept of regulated reactions, which allows connecting regulatory, signaling and metabolic levels, has been used. Biochemical reactions and regulatory interactions are homogeneously described by regulated reactions involving substrates, products, activators and inhibitors as elements. PAX2GRAPHML is highly flexible and allows generating graphs of regulated reactions from a single BioPAX source or by combining and filtering BioPAX sources. Supported by the graph exchange format .graphml, the large-scale graphs produced from one or more data sources can be further analyzed with PAX2GRAPHML or standard Python and R graph libraries. https://pax2graphml.genouest.org.",2021-06-15 +,"First Report of ‘Candidatus Phytoplasma trifolii’-Related Strain Associated With a New Disease in Tomatillo Plants in Zacatecas, Mexico","Tomatillo (Physalis ixocarpa) is an important crop in Mexico with a total of 698,016 tons on 42,882 ha in 2016. In July 2015, symptoms of yellowing, stunted growth, foliar deformation, and phyllody were observed in approximately 12% of tomatillo plants from 25 ha of commercial crops in Zacatecas, Mexico (22°43′17.1′′ N, 102°41′06.5′′ W). Total DNA was extracted from nine symptomatic and four symptomless tomatillo plants. Direct and nested PCR assays targeting the 16S rRNA gene were used to confirm the association of phytoplasma with the symptomatology. The primers used for direct PCR were P1 5′-AAGAGTTTGATCCTGGCTCAGGATT-3′ and Tint 5′-TCAGGCGTGTGCTCTAACCAGC-3′ (Smart et al. 1996), and for nested PCR, R16F2n 5′-GAAACGACTGCTAAGACTGG-3′ and R16R2 5′-TGACGGGCGGTGTGTACAAACCCCG-3′ (Gundersen and Lee 1996). No PCR products were obtained from the symptomless plants. The nested PCR amplicons (1.2 kb) amplified from all symptomatic plants were cloned separately and directly sequenced. BLAST analysis of the 16S rDNA sequences revealed that they shared 100% sequence identity to each other and 99.0% sequence identity with those of the 16SrVI group, ‘Candidatus Phytoplasma trifolii’ strains. Computer simulated RFLP analysis of the Zacatecas tomatillo phytoplasma sequence (GenBank accession no. MG775048) was performed using iPhyClassifier (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi) and RFLP profiles were compared with each phytoplasma group and subgroup (Zhao et al. 2013) confirming that the analyzed sequence shared 98.9% identity with the reference strain (AY390261). Finally, virtual RFLP patterns were identical to those of the group 16SrVI, subgroup A. Although some reports associate phytoplasmas of the group 16SrVI with diseases in other solanaceous crops such as chili pepper and tomato in Mexico (Mauricio-Castillo et al. 2015; Salas-Muñoz et al. 2016), there are no reports about the presence of Ca. P. trifolii related to tomatillo diseases. To our knowledge, this is the first report of Ca. P. trifolii associated with symptomatic tomatillo plants. The presence of phytoplasmas belonging to 16SrVI group on tomatillo could therefore represent a serious threat for this valuable crop in Mexico.",2018-08-01 +33718542,Cover cropped and tilled table grape vineyard: Data on leaves and stems.,"Data presented are on mass, length, SPAD and some physiological parameters of leaves and stems in a table grape vineyard of Italia variety grafted onto 1103 Paulsen, covered with a plastic sheet to advance ripening and managed with two soil systems in the Puglia region, South-eastern Italy in 2015 and 2016. The two systems differed for the soil management since in one area of the vineyard a cover crop was used (Trifolium repens L.), whereas in the other area only soil tillage was adopted. The data of the two seasons include: (a) mass of leaves of primary shoot, secondary shoot and opposite the cluster; (b) length of secondary shoots; (c) number of both secondary shoots and leaves of secondary shoots; (d) SPAD values and area of leaves opposite both first and second cluster on the primary shoot; (e) mass of stems of both primary and secondary shoots; and (f) some physiological parameters (Ψstem, temperature, Fv/Fm). The data in this article support and augment information presented in the research article 'Cover crops in the inter-row of a table grape vineyard managed with irrigation sensors: effects on yield, quality and glutamine synthetase activity in leaves' (Sci. Hortic. 281, 2021 https://doi.org/10.1016/j.scienta.2021.109963).",2021-02-11 +34665841,"Molecular species delimitation of shrub frogs of the genus Pseudophilautus (Anura, Rhacophoridae).","Sri Lanka is an amphibian hotspot of global significance. Its anuran fauna is dominated by the shrub frogs of the genus Pseudophilautus. Except for one small clade of four species in Peninsular India, these cool-wet adapted frogs, numbering some 59 extant species, are distributed mainly across the montane and lowland rain forests of the island. With species described primarily by morphological means, the diversification has never yet been subjected to a molecular species delimitation analysis, a procedure now routinely applied in taxonomy. Here we test the species boundaries of Pseudophilautus in the context of the phylogenetic species concept (PSC). We use all the putative species for which credible molecular data are available (nDNA-Rag-1; mt-DNA- 12S rRNA, 16S rRNA) to build a well resolved phylogeny, which is subjected to species delimitation analyses. The ABGD, bPTP, mPTP and bGMYC species delimitation methods applied to the 16S rRNA frog barcoding gene (for all species), 12S rRNA and Rag-1 nDNA grouped P. procax and P. abundus; P. hallidayi and P. fergusonianus; P. reticulatus and P. pappilosus; P. pleurotaenia and P. hoipolloi; P. hoffmani and P. asankai; P. silvaticus and P. limbus; P. dilmah and P. hankeni; P. fulvus and P. silus.. Surprisingly, all analyses recovered 14 unidentified potential new species as well. The geophylogeny affirms a distribution across the island's aseasonal 'wet zone' and its three principal hill ranges, suggestive of allopatric speciation playing a dominant role, especially between mountain masses. Among the species that are merged by the delimitation analyses, a pattern leading towards a model of parapatric speciation emerges-ongoing speciation in the presence of gene flow. This delimitation analysis reinforces the species hypotheses, paving the way to a reasonable understanding of Sri Lankan Pseudophilautus, enabling both deeper analyses and conservation efforts of this remarkable diversification. http://zoobank.org/urn:lsid:zoobank.org:pub:DA869B6B-870A-4ED3-BF5D-5AA3F69DDD27.",2021-10-19 +33203349,MADA: a web service for analysing DNA methylation array data.,"

Background

DNA methylation in the human genome is acknowledged to be widely associated with biological processes and complex diseases. The Illumina Infinium methylation arrays have been approved as one of the most efficient and universal technologies to investigate the whole genome changes of methylation patterns. As methylation arrays may still be the dominant method for detecting methylation in the anticipated future, it is crucial to develop a reliable workflow to analysis methylation array data.

Results

In this study, we develop a web service MADA for the whole process of methylation arrays data analysis, which includes the steps of a comprehensive differential methylation analysis pipeline: pre-processing (data loading, quality control, data filtering, and normalization), batch effect correction, differential methylation analysis, and downstream analysis. In addition, we provide the visualization of pre-processing, differentially methylated probes or regions, gene ontology, pathway and cluster analysis results. Moreover, a customization function for users to define their own workflow is also provided in MADA.

Conclusions

With the analysis of two case studies, we have shown that MADA can complete the whole procedure of methylation array data analysis. MADA provides a graphical user interface and enables users with no computational skills and limited bioinformatics background to carry on complicated methylation array data analysis. The web server is available at: http://120.24.94.89:8080/MADA.",2020-11-18 +33449291,High-Dimensional Immune Monitoring for Chimeric Antigen Receptor T Cell Therapies.,"

Purpose of review

High-dimensional flow cytometry experiments have become a method of choice for high-throughput integration and characterization of cell populations. Here, we present a summary of state-of-the-art R-based pipelines used for differential analyses of cytometry data, largely based on chimeric antigen receptor (CAR) T cell therapies. These pipelines are based on publicly available R libraries, put together in a systematic and functional fashion, therefore free of cost.

Recent findings

In recent years, existing tools tailored to analyze complex high-dimensional data such as single-cell RNA sequencing (scRNAseq) have been successfully ported to cytometry studies due to the similar nature of flow cytometry and scRNAseq platforms. Existing environments like Cytobank (Kotecha et al., 2010), FlowJo (FlowJo™ Software) and FCS Express (https://denovosoftware.com) already offer a variety of these ported tools, but they either come at a premium or are fairly complicated to manage by an inexperienced user. To mitigate these limitations, experienced cytometrists and bioinformaticians usually incorporate these functions into an RShiny (https://shiny.rstudio.com) application that ultimately offers a user-friendly, intuitive environment that can be used to analyze flow cytometry data. Computational tools and Shiny-based tools are the perfect answer to the ever-growing dimensionality and complexity of flow cytometry data, by offering a dynamic, yet user-friendly exploratory space, tailored to bridge the space between the lab experimental world and the computational, machine learning space.",2021-01-15 +27940610,RPAN: rice pan-genome browser for ∼3000 rice genomes.,"A pan-genome is the union of the gene sets of all the individuals of a clade or a species and it provides a new dimension of genome complexity with the presence/absence variations (PAVs) of genes among these genomes. With the progress of sequencing technologies, pan-genome study is becoming affordable for eukaryotes with large-sized genomes. The Asian cultivated rice, Oryza sativa L., is one of the major food sources for the world and a model organism in plant biology. Recently, the 3000 Rice Genome Project (3K RGP) sequenced more than 3000 rice genomes with a mean sequencing depth of 14.3×, which provided a tremendous resource for rice research. In this paper, we present a genome browser, Rice Pan-genome Browser (RPAN), as a tool to search and visualize the rice pan-genome derived from 3K RGP. RPAN contains a database of the basic information of 3010 rice accessions, including genomic sequences, gene annotations, PAV information and gene expression data of the rice pan-genome. At least 12 000 novel genes absent in the reference genome were included. RPAN also provides multiple search and visualization functions. RPAN can be a rich resource for rice biology and rice breeding. It is available at http://cgm.sjtu.edu.cn/3kricedb/ or http://www.rmbreeding.cn/pan3k.",2016-12-10 +30083972,"Description, characterization, and evaluation of an online social networking community: the American Cancer Society's Cancer Survivors Network®.","

Purpose

To describe (a) the conceptualization, purpose, and features of The American Cancer Society's Cancer Survivors Network® (CSN; http://csn.cancer.org ), (b) the ongoing two-phase evaluation process of CSN, and (c) the characteristics of CSN members.

Methods

An online opt-in self-report survey of CSN members (N = 4762) was conducted and digital metrics of site use were collected.

Results

Annually, CSN attracts over 3.6 million unique users from over 200 countries/territories. Most commonly used site features are discussion boards (81.1%), the search function (63.8%), and the member resource library (50.2%). The survey sample is mostly female (69.6%), non-Hispanic white (84.1%), and self-identified as a cancer survivor (49.8%), or both cancer survivor and cancer caregiver (31.9%). A larger number of survey respondents reported head and neck cancer (12.5%), relative to cancer incidence/prevalence data.

Conclusions

The volume of CSN traffic suggests high demand among cancer survivors and caregivers for informational and/or emotional support from other cancer survivors and caregivers. CSN may be particularly beneficial for individuals with rare cancers. Furthermore, this study documents a group of individuals whose cancer experience is multifaceted (e.g., survivors became caregivers or vice versa), and for whom CSN has the capacity to provide support at multiple points during their cancer experiences.

Implications for cancer survivors

CSN is a free, internet-based social networking site available to all cancer survivors and caregivers, worldwide. Evaluation of the site is ongoing and will be used to inform improvements to usability, reach, recruitment, retention, and potential health impact(s) of this valuable resource.",2018-08-06 +32845940,A multilevel analysis of short birth interval and its determinants among reproductive age women in developing regions of Ethiopia.,"

Background

Short Birth Interval negatively affects the health of both mothers and children in developing nations, like, Ethiopia. However, studies conducted to date in Ethiopia upon short birth interval were inconclusive and they did not show the extent and determinants of short birth interval in developing (Afar, Somali, Gambella, and Benishangul-Gumuz) regions of the country. Thus, this study was intended to assess the short birth interval and its determinants in the four developing regions of the country.

Methods

Data were retrieved from the Demographic and Health Survey program official database website (http://dhsprogram.com). A sample of 2683 women of childbearing age group (15-49) who had at least two alive consecutive children in the four developing regions of Ethiopia was included in this study. A multilevel multivariable logistic regression model was fitted to identify the independent predictors of short birth interval and Akaike's Information Criterion (AIC) was used during the model selection procedure.

Results

In this study, the prevalence of short birth interval was 46% [95% CI; 43.7%, 47.9%]. The multilevel multivariable logistic regression model showed women living in rural area [AOR = 1.52, CI: 1.12, 2.05], women attended secondary education and above level [AOR = 0.27, CI: 0.05, 0.54], have no media exposure [AOR = 1.35, CI: 1.18, 1.56], female sex of the index child [AOR = 1.13, CI:1.07,1.20], breastfeeding duration [AOR = 0.79, CI: 0.77, 0.82], having six and more ideal number of children [AOR = 1.14, CI: 1.09, 1.20] and having preferred waiting time to birth two years and above [AOR = 0.86, CI: 0.78, 0.95] were the predictors of short birth interval.

Conclusions

The prevalence of short birth intervals in the developing regions of Ethiopia is still high. Therefore, the government of Ethiopia should work on the access of family planning and education in rural parts of the developing regions where more than 90% of the population in these regions is pastoral.",2020-08-26 +33504796,HD-EEG for tracking sub-second brain dynamics during cognitive tasks.,"This work provides the community with high-density Electroencephalography (HD-EEG, 256 channels) datasets collected during task-free and task-related paradigms. It includes forty-three healthy participants performing visual naming and spelling tasks, visual and auditory naming tasks and a visual working memory task in addition to resting state. The HD-EEG data are furnished in the Brain Imaging Data Structure (BIDS) format. These datasets can be used to (i) track brain networks dynamics and their rapid reconfigurations at sub-second time scale in different conditions, (naming/spelling/rest) and modalities, (auditory/visual) and compare them to each other, (ii) validate several parameters involved in the methods used to estimate cortical brain networks through scalp EEG, such as the open question of optimal number of channels and number of regions of interest and (iii) allow the reproducibility of results obtained so far using HD-EEG. We hope that delivering these datasets will lead to the development of new methods that can be used to estimate brain cortical networks and to better understand the general functioning of the brain during rest and task. Data are freely available from https://openneuro.org .",2021-01-27 +34516542,A statistical model for describing and simulating microbial community profiles.,"Many methods have been developed for statistical analysis of microbial community profiles, but due to the complex nature of typical microbiome measurements (e.g. sparsity, zero-inflation, non-independence, and compositionality) and of the associated underlying biology, it is difficult to compare or evaluate such methods within a single systematic framework. To address this challenge, we developed SparseDOSSA (Sparse Data Observations for the Simulation of Synthetic Abundances): a statistical model of microbial ecological population structure, which can be used to parameterize real-world microbial community profiles and to simulate new, realistic profiles of known structure for methods evaluation. Specifically, SparseDOSSA's model captures marginal microbial feature abundances as a zero-inflated log-normal distribution, with additional model components for absolute cell counts and the sequence read generation process, microbe-microbe, and microbe-environment interactions. Together, these allow fully known covariance structure between synthetic features (i.e. ""taxa"") or between features and ""phenotypes"" to be simulated for method benchmarking. Here, we demonstrate SparseDOSSA's performance for 1) accurately modeling human-associated microbial population profiles; 2) generating synthetic communities with controlled population and ecological structures; 3) spiking-in true positive synthetic associations to benchmark analysis methods; and 4) recapitulating an end-to-end mouse microbiome feeding experiment. Together, these represent the most common analysis types in assessment of real microbial community environmental and epidemiological statistics, thus demonstrating SparseDOSSA's utility as a general-purpose aid for modeling communities and evaluating quantitative methods. An open-source implementation is available at http://huttenhower.sph.harvard.edu/sparsedossa2.",2021-09-13 +33490324,UrbangEnCy: An emergency events dataset based on citizen sensors for monitoring urban scenarios in Ecuador.,"Recently, the use of the citizen-sensors (people generating and sharing real data by social media) for detecting and disseminating emergency events in real-time have shown a considerable increase because people at the place of the event, as well as elsewhere, can quickly post relevant information on this type of alerts. Here, we present an emergency events dataset called UrbangEnCy. The dataset contains over 25500 texts in Spanish posted on Twitter from January 19th to August 19th, 2020, with emergencies and non-emergencies related content in Ecuador. We obtained, cleaned and, filtered these tweets and, then we selected the location and temporal data as well as tweet content. Besides, the data set includes annotations regarding the type of tweet (emergency / non-emergency) as well as additional nomenclature used to describe emergencies in the Center for immediate response service to emergencies (ECU 911) of Ecuador and international emergency services agencies (ESAs). UrbangEnCy dataset facilitates evaluating data science performance, machine learning, and natural language processing algorithms used with supervised and unsupervised problems re- related to text mining and pattern recognition. The dataset is freely and publicly available at https://doi.org/10.17632/4x37zz82k8.",2020-12-24 +34767312,"Development and multicenter validation of FIB-6: A novel, machine learning, simple bedside score to rule out liver cirrhosis and compensated advanced chronic liver disease in patients with chronic hepatitis C.","

Background

Non-invasive tests (NITs), such as Fibrosis-4 index (FIB-4) and the aspartate aminotransferase-to-platelet ratio index (APRI), developed using classical statistical methods, are increasingly used for determining liver fibrosis stages and recommended in treatment guidelines replacing the liver biopsy. Application of conventional cutoffs of FIB-4 and APRI resulted in high rates of misclassification of fibrosis stages.

Aim

There is an unmet need for more accurate NITs that can overcome the limitations of FIB-4 and APRI.

Patients and methods

Machine learning with the random forest algorithm was used to develop a non-invasive index using retrospective data of 7238 patients with biopsy-proven chronic hepatitis C from two centers in Egypt; derivation dataset (n = 1821) and validation set in the second center (n = 5417). Receiver operator curve analysis was used to define cutoffs for different stages of fibrosis. Performance of the new score was externally validated in cohorts from two other sites in Egypt (n = 560) and seven different countries (n = 1317). Fibrosis stages were determined using the METAVIR score. Results were also compared with three established tools (FIB-4, APRI, and the aspartate aminotransferase-to-alanine aminotransferase ratio [AAR]).

Results

Age in addition to readily available laboratory parameters such as aspartate, and alanine aminotransferases, alkaline phosphatase, albumin (g/dl), and platelet count (/cm3 ) correlated with the biopsy-derived stage of liver fibrosis in the derivation cohort and were used to construct the model for predicting the fibrosis stage by applying the random forest algorithm, resulting in an FIB-6 index, which can be calculated easily at http://fib6.elriah.info. Application of the cutoff values derived from the derivation group on the validation groups yielded very good performance in ruling out cirrhosis (negative predictive value [NPV] = 97.7%), compensated advance liver disease (NPV = 90.2%), and significant fibrosis (NPV = 65.7%). In the external validation groups from different countries, FIB-6 demonstrated higher sensitivity and NPV than FIB-4, APRI, and AAR.

Conclusion

FIB-6 score is a non-invasive, simple, and accurate test for ruling out liver cirrhosis and compensated advance liver disease in patients with chronic hepatitis C and performs better than APRI, FIB-4, and AAR.",2021-11-24 +,First Report of the Leafhoppers Ceratagallia nitidula and Empoasca abrupta (Hemiptera: Cicadellidae) as Vectors of ‘Candidatus Phytoplasma trifolii’,"In May 2016, leafhoppers (Cicadellidae) of two species, Ceratagallia nitidula Oman and Empoasca abrupta DeLong, were collected in the municipality of Calera de V.R., Zacatecas (22°57′55.03″ N, 102°41′04.79″ W) and identified using diagnostic characters of the male genitalia (Hamilton 1998). To test for their ability to transmit phytoplasmas, the two leafhopper species were allowed to feed on ‘Candidatus Phytoplasma trifolii’–infected chili pepper plants for 1 week. Inoculations were done in duplicate, allowing groups of 25 leafhoppers of each species to feed on each of 10 healthy chili pepper plants exposed separately for 48 h; five healthy plants were used as negative controls (no insect infestation). After this period, the leafhopper populations were recovered from every cage and tested for ‘Ca. P. trifolii’ presence. Infested chili pepper plants and negative controls were maintained separately at 22°C, and symptoms were evaluated 40 days after leafhopper infestation. No symptoms were observed in negative controls, whereas six out 10 and seven out 10 plants inoculated by C. nitidula and E. abrupta, respectively, showed symptoms including foliar deformation, long internodes, fall of flowers, and yellowing. Symptomatic and asymptomatic plants as well as pools of 20 insects of each species were tested for phytoplasma infection by direct (Smart et al. 1996) and nested (Gundersen and Lee 1996) polymerase chain reaction (PCR) assays. The nested PCR amplicons (1.2 kb) obtained from all the positive extracts were cloned separately and directly sequenced. No PCR products were obtained from negative controls and symptomless plants. BLAST analyses of the amplified sequences confirmed the presence of ‘Ca. P. trifolii’ strains in C. nitidula, E. abrupta, and symptomatic chili pepper. Computer-simulated restriction fragment length polymorphism analysis was performed using iPhyClassifier (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi), and the patterns obtained from the aligned 16S rDNA sequence (MG958642) isolated from C. nitidula, E. abrupta, and chili pepper were most closely related (98.7%) to the reference strain (AY390261) of the group 16SrVI (‘Ca. P. trifolii’), subgroup A (Hiruki and Wang 2004). Although there are reports about the presence of ‘Ca. P. trifolii’ affecting crops of economic interest in Mexico (Salas-Muñoz et al. 2016), the information on their vectors is limited. To our knowledge, this is the first report of these two leafhopper species as vectors of ‘Ca. P. trifolii’. Only one other species of Empoasca, E. papayae Oman from the Caribbean region, has been reported as a phytoplasma vector (Sein and Adsuar 1947). Austroagallia torrida (Evans), which belongs to the same leafhopper tribe (Agalliini) as Ceratagallia, was reported as a vector of the phytoplasma associated with rugose leaf curl disease of clovers in Australia (Grylls et al. 1974), but no other species of this tribe have been identified as phytoplasma vectors. The identification of these two new vectors of phytoplasmas can help to establish strategies to avoid the spread of diseases caused by ‘Ca. P. trifolii’ in Mexico and other countries.",2018-12-01 +30097821,PubChem chemical structure standardization.,"

Background

PubChem is a chemical information repository, consisting of three primary databases: Substance, Compound, and BioAssay. When individual data contributors submit chemical substance descriptions to Substance, the unique chemical structures are extracted and stored into Compound through an automated process called structure standardization. The present study describes the PubChem standardization approaches and analyzes them for their success rates, reasons that cause structures to be rejected, and modifications applied to structures during the standardization process. Furthermore, the PubChem standardization is compared to the structure normalization of the IUPAC International Chemical Identifier (InChI) software, as manifested by conversion of the InChI back into a chemical structure.

Results

The observed rejection rate for substances processed by PubChem standardization was 0.36%, which is predominantly attributed to structures with invalid atom valences that cannot be readily corrected without additional information from contributors. Of all structures that pass standardization, 44% are modified in the process, reducing the count of unique structures from 53,574,724 in substance to 45,808,881 in compound as identified by de-aromatized canonical isomeric SMILES. Even though the processing time is very low on average (only 0.4% of structures have individual standardization time above 0.1 s), total standardization time is completely dominated by edge cases: 90% of the time to standardize all structures in PubChem substance is spent on the 2.05% of structures with the highest individual standardization time. It is worth noting that 60% of the structures obtained from PubChem structure standardization are not identical to the chemical structure resulting from the InChI (primarily due to preferences for a different tautomeric form).

Conclusions

Standardization of chemical structures is complicated by the diversity of chemical information and their representations approaches. The PubChem standardization is an effective and efficient tool to account for molecular diversity and to eliminate invalid/incomplete structures. Further development will concentrate on improved tautomer consideration and an expanded stereocenter definition. Modifications are difficult to thoroughly validate, with slight changes often affecting many thousands of structures and various edge cases. The PubChem structure standardization service is accessible as a public resource ( https://pubchem.ncbi.nlm.nih.gov/standardize ), and via programmatic interfaces.",2018-08-10 +34749182,Charting the human amygdala development across childhood and adolescence: Manual and automatic segmentation.,"The developmental pattern of the amygdala throughout childhood and adolescence has been inconsistently reported in previous neuroimaging studies. Given the relatively small size of the amygdala on full brain MRI scans, discrepancies may be partly due to methodological differences in amygdalar segmentation. To investigate the impact of volume extraction methods on amygdala volume, we compared FreeSurfer, FSL and volBrain segmentation measurements with those obtained by manual tracing. The manual tracing method, which we used as the 'gold standard', exhibited almost perfect intra- and inter-rater reliability. We observed systematic differences in amygdala volumes between automatic (FreeSurfer and volBrain) and manual methods. Specifically, compared with the manual tracing, FreeSurfer estimated larger amygdalae, and volBrain produced smaller amygdalae while FSL demonstrated a mixed pattern. The tracing bias was not uniform, but higher for smaller amygdalae. We further modeled amygdalar growth curves using accelerated longitudinal cohort data from the Chinese Color Nest Project (http://deepneuro.bnu.edu.cn/?p=163). Trajectory modeling and statistical assessments of the manually traced amygdalae revealed linearly increasing and parallel developmental patterns for both girls and boys, although the amygdalae of boys were larger than those of girls. Compared to these trajectories, the shapes of developmental curves were similar when using the volBrain derived volumes. FreeSurfer derived trajectories had more nonlinearities and appeared flatter. FSL derived trajectories demonstrated an inverted U shape and were significantly different from those derived from manual tracing method. The use of amygdala volumes adjusted for total gray-matter volumes, but not intracranial volumes, resolved the shape discrepancies and led to reproducible growth curves between manual tracing and the automatic methods (except FSL). Our findings revealed steady growth of the human amygdala, mirroring its functional development across the school age. Methodological improvements are warranted for current automatic tools to achieve more accurate amygdala structure at school age, calling for next generation tools.",2021-10-28 +33571101,Quantifying Intensities of Transcription Factor-DNA Binding by Learning From an Ensemble of Protein Binding Microarrays.,"The control of the coordinated expression of genes is primarily regulated by the interactions between transcription factors (TFs) and their DNA binding sites, which are an integral part of transcriptional regulatory networks. There are many computational tools focused on determining TF binding or unbinding to a DNA sequence. However, other tools focused on further determining the relative preference of such binding are needed. Here, we propose a regression model with deep learning, called SemanticBI, to predict intensities of TF-DNA binding. SemanticBI is a convolutional neural network (CNN)-recurrent neural network (RNN) architecture model that was trained on an ensemble of protein binding microarray data sets that covered multiple TFs. Using this approach, SemanticBI exhibited superior accuracy in predicting binding intensities compared to other popular methods. Moreover, SemanticBI uncovered vectorized sequence-oriented features using its CNN-RNN architecture, which is an abstract representation of the original DNA sequences. Additionally, the use of SemanticBI raises the question of whether motifs are necessary for computational models of TF binding. The online SemanticBI service can be accessed at http://qianglab.scst.suda.edu.cn/semantic/.",2021-07-27 +34180600,Primary antibiotic resistance of Helicobacter pylori isolates is twofold more frequent in HIV-positive than HIV-negative individuals: A descriptive observational study.,"The antimicrobial susceptibility of Helicobacter pylori strains isolated from HIV-positive individuals is not well characterized. This study aimed to measure the prevalence and long-term trends associated with primary H. pylori antibiotic resistance, evaluate correlations with antibiotic consumption, and compare predictors for H. pylori antibiotic resistance between HIV-positive and HIV-negative individuals. In this longitudinal registry study, we evaluated consecutive adults with and without HIV infection, naïve to H. pylori treatment, who underwent upper gastrointestinal endoscopy and had a positive H. pylori culture, with susceptibility testing available, between 2004 and 2015. Outpatient antibiotic consumption data were based on nationwide aggregated numbers. H. pylori was isolated from gastric biopsies of 3008/8321 patients, 181/477 (37.9%) were HIV-positive and 2827/7844 (36.0%) HIV-negative. Overall cohort mean prevalence of H. pylori primary antibiotic resistance was 11.1% for clarithromycin, 17.8% levofloxacin, and 39.4% metronidazole. The prevalence of H. pylori primary resistance was significantly higher for these three drugs in HIV-positive individuals across the study period. Linear regression showed that the prevalence of clarithromycin and levofloxacin resistance correlated with the country aggregate daily dose consumption of macrolides and quinolones, respectively. Multivariable regression analysis showed that HIV infection is a strong independent risk factor for multiple H. pylori antibiotic resistance. In summary, HIV infection is a risk factor for carrying multi-resistant H. pylori strains and this is correlated with antibiotic consumption. Empirical therapies should be avoided in HIV-positive individuals. These data highlight the need to implement ongoing monitoring of H. pylori antimicrobial susceptibility among HIV-positive individuals. The study is registered at ISRCTN registry, number 13466428: https://www.isrctn.com/ISRCTN13466428.",2021-06-01 +34058905,Dynamic Nomogram for Predicting Lateral Cervical Lymph Node Metastasis in Papillary Thyroid Carcinoma.,"

Objective

To establish a dynamic nomogram based on preoperative clinical data for prediction of lateral lymph node metastasis (LLNM) of papillary thyroid carcinoma.

Study design

Retrospective study.

Setting

The Sixth Affiliated Hospital of Sun Yat-Sen University.

Methods

The data of 477 patients from 2 centers formed the training group and validation group and were retrospectively reviewed. Preoperative clinical factors influencing LLNM were identified by univariable and multivariable analysis and were to construct a predictive dynamic nomogram for LLNM. Receiver operating characteristic analysis and calibration curves were used to evaluate the predictive power of the nomogram.

Results

The following were identified as independent risk factors for LLNM: male sex (odds ratio [OR] = 4.6, P = .04), tumor size ≥10.5 mm (OR = 7.9, P = .008), thyroid nodules (OR = 6.1, P = .013), irregular tumor shape (OR = 24.6, P = .001), rich lymph node vascularity (OR = 9.7, P = .004), and lymph node location. The dynamic nomogram constructed with these factors is available at https://zxh1119.shinyapps.io/DynNomapp/. The nomogram showed good performance, with an area under the curve of 0.956 (95% CI, 0.925-0.986), a sensitivity of 0.87, and a specificity of 0.91, if high-risk patients were defined as those with a predicted probability ≥0.3 or total score ≥200. The nomogram performed well in the external validation cohort (area under the curve, 0.915; 95% CI, 0.862-0.967).

Conclusions

The dynamic nomogram for preoperative prediction of LLNM in papillary thyroid carcinoma can help surgeons identify high-risk patients and develop individualized treatment plans.",2021-06-01 +34878310,A Cold Chain-Independent Specimen Collection and Transport Medium Improves Diagnostic Sensitivity and Minimizes Biosafety Challenges of COVID-19 Molecular Diagnosis.,"Equitable and timely access to COVID-19-related care has emerged as a major challenge, especially in developing and low-income countries. In India, ∼65% of the population lives in villages where infrastructural constraints limit the access to molecular diagnostics of COVID-19 infection. Especially, the requirement of a cold chain transport for sustained sample integrity and associated biosafety challenges pose major bottlenecks to the equitable access. Here, we developed an innovative clinical specimen collection medium, named SupraSens microbial transport medium (SSTM). SSTM allowed a cold chain-independent transport at a wide temperature range (15°C to 40°C) and directly inactivated SARS-CoV-2 (<15 min). Evaluation of SSTM compared to commercial viral transport medium (VTM) in field studies (n = 181 patients) highlighted that, for the samples from same patients, SSTM could capture more symptomatic (∼26.67%, 4/15) and asymptomatic (52.63%, 10/19) COVID-19 patients. Compared to VTM, SSTM yielded significantly lower quantitative PCR (qPCR) threshold cycle (Ct) values (mean ΔCt > -3.50), thereby improving diagnostic sensitivity of SSTM (18.79% [34/181]) versus that of VTM (11.05% [20/181]). Overall, SSTM had detection of COVID-19 patients 70% higher than that of VTM. Since the logistical and infrastructural constraints are not unique to India, our study highlights the invaluable global utility of SSTM as a key to accurately identify those infected and control COVID-19 transmission. Taken together, our data provide a strong justification to the adoption of SSTM for sample collection and transport during the pandemic. IMPORTANCE Approximately forty-four percent of the global population lives in villages, including 59% in Africa (https://unhabitat.org/World%20Cities%20Report%202020). The fast-evolving nature of SARS-CoV-2 and its extremely contagious nature warrant early and accurate COVID-19 diagnostics across rural and urban population as a key to prevent viral transmission. Unfortunately, lack of adequate infrastructure, including the availability of biosafety-compliant facilities and an end-to-end cold chain availability for COVID-19 molecular diagnosis, limits the accessibility of testing in these countries. Here, we fulfill this urgent unmet need by developing a sample collection and transport medium, SSTM, that does not require cold chain, neutralizes the virus quickly, and maintains the sample integrity at broad temperature range without compromising sensitivity. Further, we observed that use of SSTM in field studies during pandemic improved the diagnostic sensitivity, thereby establishing the feasibility of molecular testing even in the infrastructural constraints of remote, hilly, or rural communities in India and elsewhere.",2021-12-08 +30050107,Developmental and genetic regulation of the human cortex transcriptome illuminate schizophrenia pathogenesis.,"Genome-wide association studies have identified 108 schizophrenia risk loci, but biological mechanisms for individual loci are largely unknown. Using developmental, genetic and illness-based RNA sequencing expression analysis in human brain, we characterized the human brain transcriptome around these loci and found enrichment for developmentally regulated genes with novel examples of shifting isoform usage across pre- and postnatal life. We found widespread expression quantitative trait loci (eQTLs), including many with transcript specificity and previously unannotated sequence that were independently replicated. We leveraged this general eQTL database to show that 48.1% of risk variants for schizophrenia associate with nearby expression. We lastly found 237 genes significantly differentially expressed between patients and controls, which replicated in an independent dataset, implicated synaptic processes, and were strongly regulated in early development. These findings together offer genetics- and diagnosis-related targets for better modeling of schizophrenia risk. This resource is publicly available at http://eqtl.brainseq.org/phase1 .",2018-07-26 +31705629,VariCarta: A Comprehensive Database of Harmonized Genomic Variants Found in Autism Spectrum Disorder Sequencing Studies.,"Recent years have seen a boom in the application of the next-generation sequencing technology to the study of human disorders, including Autism Spectrum Disorder (ASD), where the focus has been on identifying rare, possibly causative genomic variants in ASD individuals. Because of the high genetic heterogeneity of ASD, a large number of subjects is needed to establish evidence for a variant or gene ASD-association, thus aggregating data across cohorts and studies is necessary. However, methodological inconsistencies and subject overlap across studies complicate data aggregation. Here we present VariCarta, a web-based database developed to address these challenges by collecting, reconciling, and consistently cataloging literature-derived genomic variants found in ASD subjects using ongoing semi-manual curation. The careful manual curation combined with a robust data import pipeline rectifies errors, converts variants into a standardized format, identifies and harmonizes cohort overlaps, and documents data provenance. The harmonization aspect is especially important since it prevents the potential double counting of variants, which can lead to inflation of gene-based evidence for ASD-association. The database currently contains 170,416 variant events from 10,893 subjects, collected across 61 publications, and reconciles 16,202 variants that have been reported in literature multiple times. VariCarta is freely accessible at http://varicarta.msl.ubc.ca. Autism Res 2019, 12: 1728-1736. © 2019 International Society for Autism Research, Wiley Periodicals, Inc. LAY SUMMARY: The search for genetic factors underlying Autism Spectrum Disorder (ASD) yielded numerous studies reporting potentially causative genomic variants found in ASD individuals. However, methodological differences and subject overlap across studies complicate the assembly of these data, diminishing its utility and accessibility. We developed VariCarta, a web-based database that aggregates carefully curated, annotated, and harmonized literature-derived variants identified in individuals with ASD using ongoing semi-manual curation.",2019-11-09 +34146585,Quantitative analysis of mitochondrial ATP synthesis.,"We present a computational framework for analyzing and simulating mitochondrial ATP synthesis using basic thermodynamic and kinetic principles. The framework invokes detailed descriptions of the thermodynamic driving forces associated with the processes of the electron transport chain, mitochondrial ATP synthetase, and phosphate and adenine nucleotide transporters. Assembling models of these discrete processes into an integrated model of mitochondrial ATP synthesis, we illustrate how to analyze and simulate in vitro respirometry experiments and how models identified from in vitro experimental data effectively explain cardiac respiratory control in vivo. Computer codes for these analyses are embedded as Python scripts in a Jupyter Book to facilitate easy adoption and modification of the concepts developed here. This accessible framework may also prove useful in supporting educational applications. All source codes are available on at https://beards-lab.github.io/QAMAS_book/.",2021-06-17 +34058973,GLEANER: a web server for GermLine cycle Expression ANalysis and Epigenetic Roadmap visualization.,"

Background

Germline cells are important carriers of genetic and epigenetic information transmitted across generations in mammals. During the mammalian germline cell development cycle (i.e., the germline cycle), cell potency changes cyclically, accompanied by dynamic transcriptional changes and epigenetic reprogramming. Recently, to understand these dynamic and regulatory mechanisms, multiomic analyses, including transcriptomic and epigenomic analyses of DNA methylation, chromatin accessibility and histone modifications of germline cells, have been performed for different stages in human and mouse germline cycles. However, the long time span of the germline cycle and material scarcity of germline cells have largely limited the understanding of these dynamic characteristic changes. A tool that integrates the existing multiomics data and visualizes the overall continuous dynamic trends in the germline cycle can partially overcome such limitations.

Results

Here, we present GLEANER, a web server for GermLine cycle Expression ANalysis and Epigenetics Roadmap visualization. GLEANER provides a comprehensive collection of the transcriptome, DNA methylome, chromatin accessibility, and H3K4me3, H3K27me3, and H3K9me3 histone modification characteristics in human and mouse germline cycles. For each input gene, GLEANER shows the integrative analysis results of its transcriptional and epigenetic features, the genes with correlated transcriptional changes, and the overall continuous dynamic trends in the germline cycle. We further used two case studies to demonstrate the detailed functionality of GLEANER and highlighted that it can provide valuable clues to the epigenetic regulation mechanisms in the genetic and epigenetic information transmitted during the germline cycle.

Conclusions

To the best of our knowledge, GLEANER is the first web server dedicated to the analysis and visualization of multiomics data related to the mammalian germline cycle. GLEANER is freely available at http://compbio-zhanglab.org/GLEANER .",2021-05-31 +,A genetic analysis of the European hedgehog (Erinaceus europaeus): an applicative case study to support its eradication from Pianosa Island (Tuscan Archipelago),"The introduction of allochthonous species represents a serious threat for the native gene pools and ecosystem biodiversity. The effect is particularly disastrous for insular biocoenoses, such as in the Tuscan archipelago, one of the most important biodiversity hotspot in the Mediterranean area. The EU tool LIFE + has funded an eradication project involving a set of allochthonous species on Pianosa Island (http://www.restoconlife.eu), including the European hedgehog (Erinaceus europaeus). Since eradication projects should not leave out of consideration a genetic analysis of the target species, the aim of our study was to characterize the genetic profile of the Pianosa hedgehog population. In particular, the data obtained had to help assessing the most compatible area for the release of all captured individuals. In the present work, eleven microsatellite loci and two mitochondrial gene portions (COXI and 16S) were characterized in individuals of E. europaeus from Pianosa, Elba, Sardinia Islands and mainland Italy. Both mtDNA and microsatellite data confirmed that the present-day population of Pianosa has an extremely low genetic diversity and a profile very similar to that of Elba. Consequently, our results do suggest that the Pianosa hedgehogs originated from a pool of individuals moved by human from Elba in recent times and could be relocated there.",2019-04-01 +34349282,NovoSpaRc: flexible spatial reconstruction of single-cell gene expression with optimal transport.,"Single-cell RNA-sequencing (scRNA-seq) technologies have revolutionized modern biomedical sciences. A fundamental challenge is to incorporate spatial information to study tissue organization and spatial gene expression patterns. Here, we describe a detailed protocol for using novoSpaRc, a computational framework that probabilistically assigns cells to tissue locations. At the core of this framework lies a structural correspondence hypothesis, that cells in physical proximity share similar gene expression profiles. Given scRNA-seq data, novoSpaRc spatially reconstructs tissues based on this hypothesis, and optionally, by including a reference atlas of marker genes to improve reconstruction. We describe the novoSpaRc algorithm, and its implementation in an open-source Python package ( https://pypi.org/project/novosparc ). NovoSpaRc maps a scRNA-seq dataset of 10,000 cells onto 1,000 locations in <5 min. We describe results obtained using novoSpaRc to reconstruct the mouse organ of Corti de novo based on the structural correspondence assumption and human osteosarcoma cultured cells based on marker gene information, and provide a step-by-step guide to Drosophila embryo reconstruction in the Procedure to demonstrate how these two strategies can be combined.",2021-08-04 +35784406,Anthropological analyses of 30 insertion/deletion autosomal markers in five major ethnic groups of Pakistan.,"We investigated the forensic efficacy of the 30 insertion/deletion (Indel) markers included in the Qiagen Investigator® DIPplex kit in 529 Pakistani individuals from five major subpopulations in Pakistan (Punjabi, Pashtun, Sindhi, Saraiki, and Baloch). In the Sindhi population, the distribution of HLD81 and HLD97 alleles deviated from Hardy-Weinberg equilibrium after Bonferroni correction. The combined match probability ranged from 2.0E-12 (Pashtun and Baloch) to 1.0E-12 (Sindhi), and the mean paternity exclusion power varied from 0.995 (Punjabi, Sindhi, and Saraiki) to 0.996 (Pashtun and Baloch). The high combined power of discrimination (0.999 999 999 999 97) and low combined match probability (1.7E-12) for all subpopulations studied support the utility of the 30 Indel markers for forensic identification in the studied subpopulations. The allele frequencies of the Indel markers in the Pakistani subpopulations were compared with those from 18 other populations. The results show that the populations clustered according to geography. The subpopulations investigated in this work showed a close genetic relationship with others from Pakistan, as well as with South Central Asian and Middle Eastern populations. The results suggest that the Investigator® DIPplex kit can be useful as a supplementary tool for human identification in the five Pakistani subpopulations investigated in this study. Supplemental data for this article is available online at https://doi.org/10.1080/20961790.2021.1933366 .",2021-08-28 +36003898,Improving access to geodetic imaging crustal deformation data using GeoGateway.,"GeoGateway (http://geo-gateway.org) is a web-based interface for analysis and modeling of geodetic imaging data and to support response to related disasters. Geodetic imaging data product currently supported by GeoGateway include Global Navigation Satellite System (GNSS) daily position time series and derived velocities and displacements and airborne Interferometric Synthetic Aperture Radar (InSAR) from NASA's UAVSAR platform. GeoGateway allows users to layer data products in a web map interface and extract information from various tools. Extracted products can be downloaded for further analysis. GeoGateway includes overlays of California fault traces, seismicity from user selected search parameters, and user supplied map files. GeoGateway also provides earthquake nowcasts and hazard maps as well as products created for related response to natural disasters. A user guide is present in the GeoGateway interface. The GeoGateway development team is also growing the user base through workshops, webinars, and video tutorials. GeoGateway is used in the classroom and for research by experts and non-experts including by students.",2021-01-18 +32543078,Binding of a negative allosteric modulator and competitive antagonist can occur simultaneously at the ionotropic glutamate receptor GluA2.,"Ionotropic glutamate receptors are ligand-gated ion channels governing neurotransmission in the central nervous system. Three major types of antagonists are known for the AMPA-type receptor GluA2: competitive, noncompetitive (i.e., negative allosteric modulators; NAMs) used for treatment of epilepsy, and uncompetitive antagonists. We here report a 4.65 Å resolution X-ray structure of GluA2, revealing that four molecules of the competitive antagonist ZK200775 and four molecules of the NAM GYKI53655 are capable of binding at the same time. Using negative stain electron microscopy, we show that GYKI53655 alone or ZK200775/GYKI53655 in combination predominantly results in compact receptor forms. The agonist AMPA provides a mixed population of compact and bulgy shapes of GluA2 not impacted by addition of GYKI53655. Taken together, this suggests that the two different mechanisms of antagonism that lead to channel closure are independent and that the distribution between bulgy and compact receptors primarily depends on the ligand bound in the glutamate binding site. DATABASE: The atomic coordinates and structure factors from the crystal structure determination have been deposited in the Protein Data Bank under accession code https://doi.org/10.2210/pdb6RUQ/pdb. The electron microscopy 3D reconstruction volumes have been deposited in EMDB (EMD-4875: Apo; EMD-4920: ZK200775/GYKI53655; EMD-4921: AMPA compact; EMD-4922: AMPA/GYKI53655 bulgy; EMD-4923: GYKI53655; EMD-4924: AMPA bulgy; EMD-4925: AMPA/GYKI53655 compact).",2020-07-08 +33196836,OrthoDB in 2020: evolutionary and functional annotations of orthologs.,"OrthoDB provides evolutionary and functional annotations of orthologs, inferred for a vast number of available organisms. OrthoDB is leading in the coverage and genomic diversity sampling of Eukaryotes, Prokaryotes and Viruses, and the sampling of Bacteria is further set to increase three-fold. The user interface has been enhanced in response to the massive growth in data. OrthoDB provides three views on the data: (i) a list of orthologous groups related to a user query, which are now arranged to visualize their hierarchical relations, (ii) a detailed view of an orthologous group, now featuring a Sankey diagram to facilitate navigation between the levels of orthology, from more finely-resolved to more general groups of orthologs, as well as an arrangement of orthologs into an interactive organism taxonomy structure, and (iii) we added a gene-centric view, showing the gene functional annotations and the pair-wise orthologs in example species. The OrthoDB standalone software for delineation of orthologs, Orthologer, is freely available. Online BUSCO assessments and mapping to OrthoDB of user-uploaded data enable interactive exploration of related annotations and generation of comparative charts. OrthoDB strives to predict orthologs from the broadest coverage of species, as well as to extensively collate available functional annotations, and to compute evolutionary annotations such as evolutionary rate and phyletic profile. OrthoDB data can be assessed via SPARQL RDF, REST API, downloaded or browsed online from https://orthodb.org.",2021-01-01 +34132767,VoroContacts: a tool for the analysis of interatomic contacts in macromolecular structures. ,"VoroContacts is a versatile tool for computing and analyzing contact surface areas (CSAs) and solvent accessible surface areas (SASAs) for 3 D structures of proteins, nucleic acids and their complexes at the atomic resolution. CSAs and SASAs are derived using Voronoi tessellation of 3 D structure, represented as a collection of atomic balls. VoroContacts web server features a highly configurable query interface, which enables on-the-fly analysis of contacts for selected set of atoms and allows filtering interatomic contacts by their type, surface areas, distance between contacting atoms and sequence separation between contacting residues. The VoroContacts functionality is also implemented as part of the standalone Voronota package, enabling batch processing. https://bioinformatics.lt/wtsam/vorocontacts. Supplementary data are available at Bioinformatics online.",2021-06-16 +34379574,"Lessons learned about post-secondary institution tobacco policy change by Tobacco-Free Generation Campus Initiative Grantees, 2018-2020.","

Objective

The Tobacco-Free Generation Campus Initiative (TFGCI) granted funding to U.S. post-secondary institutions to assist with creating 100% smoke- and tobacco-free campus policies to prevent young adult tobacco use. This study assessed grantee experiences working on campus tobacco policy change. Participants: Sixty U.S. post-secondary institutions completing TFGCI funded work between 2018 and 2020. Methods: An online survey assessment was completed by each institution at the end of their 18-month grant period to share facilitators and barriers to policy change, lessons learned, and advice for future efforts. Results: Many institutions faced challenges of time and capital constraints and pushback from campus constituents. Collaboration, diverse constituent engagement, and educational efforts throughout the advocacy process were important facilitators of policy change. Conclusions: Adopting and implementing comprehensive tobacco policy on college campuses is challenging. Regardless of institution type, commitment to the long-term goal and diverse stakeholder support guided movement toward 100% smoke- and tobacco-free campus policies.Supplemental data for this article can be accessed online at https://doi.org/10.1080/07448481.2021.1953032 .",2021-08-11 +33999713,"First Report of Collar Rot in Purple Passion Fruit (Passiflora edulis) Caused by Neocosmospora solani in Yunnan province, China. ","Purple passion fruit (Passiflora edulis Sims) is a perennial climbing vine native to South America that is grown worldwide as an edible tropical fruit with excellent nutritional value and high economic value (Zibadi et al. 2007). With the increasing expansion of the plantation area in China, considerable economic loss caused by collar rot has attracted wide attention. From 2018-2020, collar rot resulted in the death of many plants of P. edulis 'Mantianxing', a commercial cultivar in China, in southwest China's Yunnan province. The disease spread quickly, and field incidence reached more than 50%. Stem rot symptoms were observed at the base of the stem, about 5-10 cm from the ground, resulting in wilting, defoliation, and death of plants. Representative symptomatic samples were collected from the base of five plants, surface disinfested for 30 seconds with 75% ethanol and 15 min with 10% hypochlorite, washed three times with sterile distilled water, then transferred to potato dextrose agar (PDA) dishes. After 2 days in the dark at 28℃, emerging fungal colonies were purified on new PDA dishes cultured at 28℃ for 7 days. The mycelia were flocculent. The color of the surface and the reverse colony was white and cream, respectively. On synthetic nutrient agar (SNA) medium, microconidia were oval, ellipsoidal or reniform, 0- or 1-septate, and 6.7-23.1 μm in length (n>30); macroconidia were straight to slightly curved, 3- or 5-septate, and 30.8-53.9 μm in length (n>30). Genomic DNA, extracted from six isolates, was amplified with three pairs of primers, ITS1 and ITS4 (White et al. 1990) , EF1-728F and EF1-986R (Carbone and Kohn 1999), and fRPB2-5F and fRPB2-7cR (Liu et al. 1999). The amplicons from all six isolates were sequenced and identical sequences obtained. The sequence of one representative isolate was uploaded to NCBI (National Center for Biotechnology Information) and analyzed with BLASTn in the Fusarium MLST database (https://fusarium.mycobank.org). The sequence of the internal transcribed spacer 1 (ITS1) region (GenBank MN944550) showed 99.1% (449/453 bp) identity to Fusarium solani strain NRRL 53667 (syn: Neocosmospora solani, GenBank MH582405). The sequence of the translation elongation factor-1 (EF-1) gene (GenBank MN938933) showed 97.8% identity (263/269 bp) to F. solani strain NRRL 32828 (GenBank DQ247135). The sequence of the second largest subunit of RNA polymerase Ⅱ (RPB2) gene (GenBank MW002686) showed 98.7% identity (810/821 bp) to F. solani strain NRRL 43441 (GenBank MH582407). Based on a multilocus phylogenetic analysis of the ITS1, EF-1 and RPB2 sequences, coupled with the morphological characteristics, the isolate (designated as NsPed1) was considered to be Neocosmospora solani (syn: Fusarium solani) (Crespo et al. 2019). Subsequently, three-month-old healthy seedlings and 45-day-old cuttings of P. edulis 'Mantianxing' plants were inoculated with the isolate NsPed1 to test its pathogenicity. Stems were wounded, approximately 1-2 mm deep, in the collar region of plants at 2 cm above the soil. A disk (9 mm in diameter) of NsPed1-colonized PDA was placed on the wound. Sterile PDA served as controls. All plants were kept in a growth chamber with 28-30°C, 60% relative humidity, and 16/8-h light/dark photoperiod. Fifteen plants were used for each treatment and replicated three times. Two weeks after inoculation, the stems of the inoculated plants turned brown with a lesion, 2-5 cm in length, and the leaves wilted. These symptoms were similar to those of the diseased plants in the field. The control plants were asymptomatic. N. solani NsPed1 was re-isolated from the infected plants, satisfying Koch's postulates. Taken together, N. solani NsPed1 was identified as the causal pathogen of collar rot in P. edulis 'Mantianxing'. Knowledge of the causal organism of collar rot in purple passion fruit will lead to improved measures to prevent and control the disease in China and other countries.",2021-05-17 +33875000,A community-driven resource for genomic epidemiology and antimicrobial resistance prediction of Neisseria gonorrhoeae at Pathogenwatch.,"

Background

Antimicrobial-resistant (AMR) Neisseria gonorrhoeae is an urgent threat to public health, as strains resistant to at least one of the two last-line antibiotics used in empiric therapy of gonorrhoea, ceftriaxone and azithromycin, have spread internationally. Whole genome sequencing (WGS) data can be used to identify new AMR clones and transmission networks and inform the development of point-of-care tests for antimicrobial susceptibility, novel antimicrobials and vaccines. Community-driven tools that provide an easy access to and analysis of genomic and epidemiological data is the way forward for public health surveillance.

Methods

Here we present a public health-focussed scheme for genomic epidemiology of N. gonorrhoeae at Pathogenwatch ( https://pathogen.watch/ngonorrhoeae ). An international advisory group of experts in epidemiology, public health, genetics and genomics of N. gonorrhoeae was convened to inform on the utility of current and future analytics in the platform. We implement backwards compatibility with MLST, NG-MAST and NG-STAR typing schemes as well as an exhaustive library of genetic AMR determinants linked to a genotypic prediction of resistance to eight antibiotics. A collection of over 12,000 N. gonorrhoeae genome sequences from public archives has been quality-checked, assembled and made public together with available metadata for contextualization.

Results

AMR prediction from genome data revealed specificity values over 99% for azithromycin, ciprofloxacin and ceftriaxone and sensitivity values around 99% for benzylpenicillin and tetracycline. A case study using the Pathogenwatch collection of N. gonorrhoeae public genomes showed the global expansion of an azithromycin-resistant lineage carrying a mosaic mtr over at least the last 10 years, emphasising the power of Pathogenwatch to explore and evaluate genomic epidemiology questions of public health concern.

Conclusions

The N. gonorrhoeae scheme in Pathogenwatch provides customised bioinformatic pipelines guided by expert opinion that can be adapted to public health agencies and departments with little expertise in bioinformatics and lower-resourced settings with internet connection but limited computational infrastructure. The advisory group will assess and identify ongoing public health needs in the field of gonorrhoea, particularly regarding gonococcal AMR, in order to further enhance utility with modified or new analytic methods.",2021-04-19 +34215160,Language Profiles and Their Relation to Cognitive and Motor Skills at 30 Months of Age: An Online Investigation of Low-Risk Preterm and Full-Term Children.,"Purpose Wide interindividual variability characterizes language development in the general and at-risk populations of up to 3 years of age. We adopted a complex approach that considers multiple aspects of lexical and grammatical skills to identify language profiles in low-risk preterm and full-term children. We also investigated biological and environmental predictors and relations between language profiles and cognitive and motor skills. Method We enrolled 200 thirty-month-old Italian-speaking children-consisting of 100 low-risk preterm and 100 comparable full-term children. Parents filled out the Italian version of the MacArthur-Bates Communicative Development Inventories Infant and Toddler Short Forms (word comprehension, word production, and incomplete and complete sentence production), Parent Report of Children's Abilities-Revised (cognitive score), and Early Motor Questionnaire (fine motor, gross motor, perception-action, and total motor scores) questionnaires. Results A latent profile analysis identified four profiles: poor (21%), with lowest receptive and expressive vocabulary and absent or limited word combination and phonological accuracy; weak (22.5%), with average receptive but limited expressive vocabulary, incomplete sentences, and absent or limited phonological accuracy; average (25%), with average receptive and expressive vocabulary, use of incomplete and complete sentences, and partial phonological accuracy; and advanced (31.5%), with highest expressive vocabulary, complete sentence production, and phonological accuracy. Lower cognitive and motor scores characterized the poor profile, and lower cognitive and perception-action scores characterized the weak profile. Having a nonworking mother and a father with lower education increased the probability of a child's assignment to the poor profile, whereas being small for gestational age at birth increased it for the weak profile. Conclusions These findings suggest a need for a person-centered and cross-domain approach to identifying children with language weaknesses and implementing timely interventions. An online procedure for data collection and data-driven analyses based on multiple lexical and grammatical skills appear to be promising methodological innovations. Supplemental Material https://doi.org/10.23641/asha.14818179.",2021-07-02 +28838067,Zebrafish Models of Human Disease: Gaining Insight into Human Disease at ZFIN.,"The Zebrafish Model Organism Database (ZFIN; https://zfin.org) is the central resource for genetic, genomic, and phenotypic data for zebrafish (Danio rerio) research. ZFIN continuously assesses trends in zebrafish research, adding new data types and providing data repositories and tools that members of the research community can use to navigate data. The many research advantages and flexibility of manipulation of zebrafish have made them an increasingly attractive animal to model and study human disease.To facilitate disease-related research, ZFIN developed support to provide human disease information as well as annotation of zebrafish models of human disease. Human disease term pages at ZFIN provide information about disease names, synonyms, and references to other databases as well as a list of publications reporting studies of human diseases in which zebrafish were used. Zebrafish orthologs of human genes that are implicated in human disease etiology are routinely studied to provide an understanding of the molecular basis of disease. Therefore, a list of human genes involved in the disease with their corresponding zebrafish ortholog is displayed on the disease page, with links to additional information regarding the genes and existing mutations. Studying human disease often requires the use of models that recapitulate some or all of the pathologies observed in human diseases. Access to information regarding existing and published models can be critical, because they provide a tractable way to gain insight into the phenotypic outcomes of the disease. ZFIN annotates zebrafish models of human disease and supports retrieval of these published models by listing zebrafish models on the disease term page as well as by providing search interfaces and data download files to access the data. The improvements ZFIN has made to annotate, display, and search data related to human disease, especially zebrafish models for disease and disease-associated gene information, should be helpful to researchers and clinicians considering the use of zebrafish to study human disease.",2017-07-01 +32868357,"Determining the long-term health burden and risk of sequelae for 14 foodborne infections in British Columbia, Canada: protocol for a retrospective population-based cohort study.","

Introduction

Over one in eight Canadians is affected by a foodborne infection annually; however, the long-term consequences, including the risks and costs of sequelae, are unclear. We aim to estimate the health burden and direct costs of 14 infections commonly transmitted by food, considering the acute illness and subsequent sequelae and mortality, for the population of British Columbia, Canada (~4.7 million).

Methods and analysis

We will conduct a population-based retrospective cohort study of the British Columbia provincial population, over a 10-year study period (1 January 2005 to 31 December 2014). Exposure is defined as a provincially reported illness caused by Clostridium botulinum, Campylobacter, Cryptosporidium, Cyclospora, Giardia, hepatitis A virus, Listeria, non-typhoidal Salmonella spp, Salmonella Typhi, Salmonella Paratyphi, Shiga toxin-producing Escherichia coli, Shigella, Vibrio parahaemolyticus or Yersinia (excluding pestis). We will link individual-level longitudinal data from eight province-wide administrative health and reportable disease databases that include physician visits, hospitalisations and day surgeries, deaths, stillbirths, prescription medications (except those to treat HIV) and reportable foodborne diseases. Using these linked databases, we will investigate the likelihood of various sequelae and death. Hazard models will be used to estimate the risk of outcomes and their association with the type of foodborne infection. Epidemiological analyses will be conducted to determine the progression of illness and the fraction of sequelae attributable to specific foodborne infections. Economic analyses will assess the consequent direct healthcare costs.

Ethics and dissemination

This study has been approved by a University of Waterloo Research Ethics Committee (no 30645), the University of British Columbia Behavioral Research Ethics Board (no H16-00021) and McGill University's Institutional Review Board (no A03-M12-19A). Results will be disseminated via presentations to academics, public health practitioners and knowledge users, and publication in peer-reviewed journals. Where such publications are not open access, manuscripts will also be available via the University of Waterloo's Institutional Repository (https://uwspace.uwaterloo.ca).",2020-08-31 +34735297,Bidirectional Associations of Childhood Stuttering With Behavior and Temperament.,"

Purpose

Behavior and temperament (e.g., emotional reactivity, self-regulation) have been considered relevant to stuttering and its developmental course, but the direction of this relation is still unknown. Knowledge of behavior difficulties and temperament in childhood stuttering can improve screening and intervention. The current study examined both directions of the relationship between stuttering and behavior difficulties and temperament and between persistent stuttering and behavior difficulties and temperament across childhood.

Method

This study was embedded in the Generation R Study, a population-based cohort from fetal life onward in the Netherlands. We analyzed data from 145 children (4.2%) with a history of stuttering (118 recovered, 27 persistent) and 3,276 children without such a history. Behavior and temperament were repeatedly assessed using parental questionnaires (Child Behavior Checklist) and Infant/Child Behavior Questionnaire between 0.5 and 9 years of age. Multiple logistic and linear regression analyses were performed.

Results

Six-month-old children who were less able to ""recover from distress,"" indicating poor self-regulation, were more likely to develop persistent stuttering later in childhood (odds ratio = 2.05, 95% confidence interval (CI) [1.03, 4.05], p = .04). In the opposite direction, children with a history of stuttering showed more negative affectivity (β = 0.19, 95% CI [0.02, 0.37], p = .03) at 6 years of age than children without such a history. Stuttering persistence was associated with increased internalizing behaviors (β = 0.38, 95% CI [0.03, 0.74], p = .04) and higher emotional reactivity (β = 0.53, 95% CI [0.09, 0.89], p = .02) at the age of 9 years.

Conclusions

Behavior and temperament were associated with stuttering persistency-seemingly as both predictor and consequence-but did not predict a history of stuttering. We suggest that children who persist in stuttering should be carefully monitored, and if behavioral or temperamental problems appear, treatment for these problems should be offered. Supplemental Material https://doi.org/10.23641/asha.16869479.",2021-11-04 +34694908,Treatment of Underlying Forms: A Bayesian Meta-Analysis of the Effects of Treatment and Person-Related Variables on Treatment Response.,"Purpose This meta-analysis synthesizes published studies using ""treatment of underlying forms"" (TUF) for sentence-level deficits in people with aphasia (PWA). The study aims were to examine group-level evidence for TUF efficacy, to characterize the effects of treatment-related variables (sentence structural family and complexity; treatment dose) in relation to the Complexity Account of Treatment Efficacy (CATE) hypothesis, and to examine the effects of person-level variables (aphasia severity, sentence comprehension impairment, and time postonset of aphasia) on TUF response. Method Data from 13 single-subject, multiple-baseline TUF studies, including 46 PWA, were analyzed. Bayesian generalized linear mixed-effects interrupted time series models were used to assess the effect of treatment-related variables on probe accuracy during baseline and treatment. The moderating influence of person-level variables on TUF response was also investigated. Results The results provide group-level evidence for TUF efficacy demonstrating increased probe accuracy during treatment compared with baseline phases. Greater amounts of TUF were associated with larger increases in accuracy, with greater gains for treated than untreated sentences. The findings revealed generalization effects for sentences that were of the same family but less complex than treated sentences. Aphasia severity may moderate TUF response, with people with milder aphasia demonstrating greater gains compared with people with more severe aphasia. Sentence comprehension performance did not moderate TUF response. Greater time postonset of aphasia was associated with smaller improvements for treated sentences but not for untreated sentences. Conclusions Our results provide generalizable group-level evidence of TUF efficacy. Treatment and generalization responses were consistent with the CATE hypothesis. Model results also identified person-level moderators of TUF (aphasia severity, time postonset of aphasia) and preliminary estimates of the effects of varying amounts of TUF for treated and untreated sentences. Taken together, these findings add to the TUF evidence and may guide future TUF treatment-candidate selection. Supplemental Material https://doi.org/10.23641/asha.16828630.",2021-10-25 +26615199,ELM 2016--data update and new functionality of the eukaryotic linear motif resource.,"The Eukaryotic Linear Motif (ELM) resource (http://elm.eu.org) is a manually curated database of short linear motifs (SLiMs). In this update, we present the latest additions to this resource, along with more improvements to the web interface. ELM 2016 contains more than 240 different motif classes with over 2700 experimentally validated instances, manually curated from more than 2400 scientific publications. In addition, more data have been made available as individually searchable pages and are downloadable in various formats.",2015-11-28 +32280941,Maternal outcomes and birth interventions among women who begin labour intending to give birth at home compared to women of low obstetrical risk who intend to give birth in hospital: A systematic review and meta-analyses.,"

Background

We previously concluded that risk of stillbirth, neonatal mortality or morbidity is not different whether birth is intended at home or hospital. Here, we compare the occurrence of birth interventions and maternal outcomes among low-risk women who begin labour intending to birth at home compared to women intending to birth in hospital.

Methods

We used our registered protocol (PROSPERO, http://www.crd.york.ac.uk, No.CRD42013004046) and searched five databases from 1990-2018. Using R, we obtained pooled estimates of effect (accounting for study design, study setting and parity).

Findings

16 studies provided data from ~500,000 intended home births for the meta-analyses. There were no reported maternal deaths. When controlling for parity in well-integrated settings we found women intending to give birth at home compared to hospital were less likely to experience: caesarean section OR 0.58(0.44,0.77); operative vaginal birth OR 0.42(0.23,0.76); epidural analgesia OR 0.30(0.24,0.38); episiotomy OR 0.45(0.28,0.73); 3rd or 4th degree tear OR 0.57(0.43,0.75); oxytocin augmentation OR 0.37(0.26,0.51) and maternal infection OR 0.23(0.15,0.35). Pooled results for postpartum haemorrhage showed women intending home births were either less likely or did not differ from those intending hospital birth [OR 0.66(0.54,0.80) and RR 1.30(0.79,2.13) from 2 studies that could not be pooled with the others]. Similar results were found when data were stratified by parity and by degree of integration into health systems.

Interpretation

Among low-risk women, those intending to birth at home experienced fewer birth interventions and untoward maternal outcomes. These findings along with earlier work reporting neonatal outcomes inform families, health care providers and policy makers around the safety of intended home births.

Funding

Partial funding: Association of Ontario Midwives open peer reviewed grant.",2020-04-05 +27924022,Expanded national database collection and data coverage in the FINDbase worldwide database for clinically relevant genomic variation allele frequencies.,"FINDbase (http://www.findbase.org) is a comprehensive data repository that records the prevalence of clinically relevant genomic variants in various populations worldwide, such as pathogenic variants leading mostly to monogenic disorders and pharmacogenomics biomarkers. The database also records the incidence of rare genetic diseases in various populations, all in well-distinct data modules. Here, we report extensive data content updates in all data modules, with direct implications to clinical pharmacogenomics. Also, we report significant new developments in FINDbase, namely (i) the release of a new version of the ETHNOS software that catalyzes development curation of national/ethnic genetic databases, (ii) the migration of all FINDbase data content into 90 distinct national/ethnic mutation databases, all built around Microsoft's PivotViewer (http://www.getpivot.com) software (iii) new data visualization tools and (iv) the interrelation of FINDbase with DruGeVar database with direct implications in clinical pharmacogenomics. The abovementioned updates further enhance the impact of FINDbase, as a key resource for Genomic Medicine applications.",2016-10-18 +35308974,Epilepsy-Connect: An Integrated Knowledgebase for Characterizing Alterations in Consciousness State of Pharmacoresistant Epilepsy Patients.,"Alterations in consciousness state are a defining characteristic of focal epileptic seizures. Consequently, understanding the complex changes in neurocognitive networks which underpin seizure-induced alterations in consciousness state is important for advancement in seizure classification. Comprehension of these changes are complicated by a lack of data standardization; however, the use of a common terminological system or ontology in a patient registry minimizes this issue. In this paper, we introduce an integrated knowledgebase called Epilepsy-Connect to improve the understanding of changes in consciousness states during focal seizures of pharmacoresistant epilepsy patients. This registry catalogues over 809 seizures from 70 patients at University Hospital's Epilepsy Center who were undergoing stereotactic electroencephalography (SEEG) monitoring as part of an evaluation for surgical intervention. Although Epilepsy-Connect focuses on consciousness states, it aims to enable users to leverage data from an informatics platform to analyze epilepsy data in a streamlined manner. Epilepsy-Connect is available at https://bmhinformatics.case.edu/Epilepsyconnect/login/.",2021-01-01 +,GENE-42. EXOME SEQUENCING ANALYSIS TO IDENTIFY POSSIBLE GENOMIC DRIVERS OF METASTATIC INVASION INTO THE SPINE,"Abstract The spine is a common site of metastatic spread of many cancers, causing debilitating pain and suffering. Treatment of spinal metastases is limited by resistance to radiation, chemotherapy, and proximity to the spinal cord. It is currently not known what causes the high incidence of spinal metastases, yet theories have been proposed, like the venous metastatic spread theory (Batson, 1940) and the “seed and soil” hypothesis (Paget, 1889), postulating that factors intrinsic to the tumor cells or to the microenvironment determine the location of cancer dissemination. Recent advances in high throughput sequencing allow for in depth analyses of the molecular signatures of tumors. To identify if there are intrinsic genetic alterations common to cancer cells that disseminate into the spine, we sequenced the exome of metastatic tumor cells harvested from the vertebrae of 9 patients with different primary tumors: carcinomas and sarcomas. Exome sequencing was performed using the HiSeq 4000 (Illumina) platform on DNA from tumor cell lines and control blood or bone marrow. Data was analyzed at the University of Michigan Bioinformatics Core and variants were called using the VarScan method (http://varscan.sourceforge.net/) in somatic mode. This analysis identified a total of 2366 genes with high impact mutations (888–1026 per sample); of these, 232 genes are common to all patients analyzed. Ninety-six of the identified genes (4%) are included in the Catalogue of Somatic Mutations in Cancer, and of these, seven: ACSL6, ACVR1, ALK, FGFR2, HSP90AA1, PTPN6 and PTPRB, have high impact mutations in all 9 patients with spinal metastatic disease. Pathway analysis of genes mutated in 5 or more patients shows significant overrepresentation of 41 KEGG pathways including TGFb, HIF1-a, VEGF, Wnt and Estrogen signaling pathways. Ongoing experiments are performed to validate the sequencing analysis and characterize functional consequences of the common mutations identified.",2019-11-01 +32539086,ForageGrassBase: molecular resource for the forage grass meadow fescue (Festuca pratensis Huds.). ,"Meadow fescue (Festuca pratensis Huds.) is one of the most important forage grasses in temperate regions. It is a diploid (2n = 14) outbreeding species that belongs to the genus Festuca. Together with Lolium perenne, they are the most important genera of forage grasses. Meadow fescue has very high quality of yield with good winter survival and persistency. However, extensive genomic resources for meadow fescue have not become available so far. To address this lack of comprehensive publicly available datasets, we have developed functionally annotated draft genome sequences of two meadow fescue genotypes, 'HF7/2' and 'B14/16', and constructed the platform ForageGrassBase, available at http://foragegrass.org/, for data visualization, download and querying. This is the first open-access platform that provides extensive genomic resources related to this forage grass species. The current database provides the most up-to-date draft genome sequence along with structural and functional annotations for genes that can be accessed using Genome Browser (GBrowse), along with comparative genomic alignments to Arabidopsis, L. perenne, barley, rice, Brachypodium and maize genomes. We have integrated homologous search tool BLAST also for the users to analyze their data. Combined, GBrowse, BLAST and downloadable data gives a user-friendly access to meadow fescue genomic resources. To our knowledge, ForageGrassBase is the first genome database dedicated to forage grasses. The current forage grass database provides valuable resources for a range of research fields related to meadow fescue and other forage crop species, as well as for plant research communities in general. The genome database can be accessed at http://foragegrass.org.",2020-01-01 +31665429,DrugCombDB: a comprehensive database of drug combinations toward the discovery of combinatorial therapy.,"Drug combinations have demonstrated high efficacy and low adverse side effects compared to single drug administration in cancer therapies and thus have drawn intensive attention from researchers and pharmaceutical enterprises. Due to the rapid development of high-throughput screening (HTS), the number of drug combination datasets available has increased tremendously in recent years. Therefore, there is an urgent need for a comprehensive database that is crucial to both experimental and computational screening of synergistic drug combinations. In this paper, we present DrugCombDB, a comprehensive database devoted to the curation of drug combinations from various data sources: (i) HTS assays of drug combinations; (ii) manual curations from the literature; and (iii) FDA Orange Book and external databases. Specifically, DrugCombDB includes 448 555 drug combinations derived from HTS assays, covering 2887 unique drugs and 124 human cancer cell lines. In particular, DrugCombDB has more than 6000 000 quantitative dose responses from which we computed multiple synergy scores to determine the overall synergistic or antagonistic effects of drug combinations. In addition to the combinations extracted from existing databases, we manually curated 457 drug combinations from thousands of PubMed publications. To benefit the further experimental validation and development of computational models, multiple datasets that are ready to train prediction models for classification and regression analysis were constructed and other significant related data were gathered. A website with a user-friendly graphical visualization has been developed for users to access the wealth of data and download prebuilt datasets. Our database is available at http://drugcombdb.denglab.org/.",2020-01-01 +34429017,Association of SERPINE1 rs1799889 polymorphism with arterial ischemic stroke in children: a systematic review and meta-analysis.,"Inherited thrombophilias are well-established predisposing factors for venous thromboembolism, but their role in arterial ischemic stroke (AIS) in children, remains unclear. The association between SERPINE1 rs1799889 polymorphism and AIS in children was evaluated by several studies, whereas the results were conflicting. Thus, we performed this meta-analysis to combine and analyze the available studies in order to provide a more accurate result on the association. PubMed, Scopus, EMBASE, SciELO, MedRxiv, China Biology Medicine Disk, DeepDyve, CNKI, and Web of Science were used to identify all relevant articles published up to 30 November 2020, without any restrictions on ethnicity. Summary odds ratios (ORs) with 95% confidence intervals (CIs) were used to determine the strength of the associations. A total of eight case-control studies with 600 cases and 2,156 controls were selected. No significant association between SERPINE1 rs1799889 polymorphism and AIS in children susceptibility was noted. In the stratified analyses by ethnicity, source of controls, genotyping methods, and age groups, there was still no significant association between SERPINE1 rs1799889 polymorphism and AIS risk in children. This study suggested that SERPINE1 rs1799889 polymorphism might be not related to etiology of AIS in children. Moreover, well-designed, large-scale and multicenter clinical studies are required to improve and validate these results.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1966798 .",2021-08-25 +,EVAPO: A smartphone application to estimate potential evapotranspiration using cloud gridded meteorological data from NASA-POWER system,"In this study a new android app for smartphones to estimate potential evapotranspuration (PET) in real time, using gridded data from NASA-POWER, to any location in the world, would result in a more efficient irrigation and increase irrigation water conservation. The smartphone app called EVAPO uses meteorological data to calculate PET using the Penman–Monteith (FAO56) method. To evaluate performance of the proposed method, we compared PET estimated by the EVAPO with that estimated from climatic data from conventional surface meteorological stations. The accuracy, tendency and precision of the models were determined using the Willmott et al. (1985) concordance index (d), systematic root mean square error (RMSEs) and determination index (R2), respectively. The results obtained were satisfactory for all studied locations whit mean values of 0.67, 0.95 (mm) and 0.72 for d, RMSEs and R2, respectively. The app can be accessed in the Play Store (free): https://play.google.com/store/apps/details?id=br.com.maldonado.instantet0.",2019-01-01 +,The ECOSTRESS spectral library version 1.0,"In June 2018, the ECOsystem Spaceborne Thermal Radiometer Experiment on Space Station (ECOSTRESS) mission was launched to measure plant temperatures and better understand how they respond to stress. While the ECOSTRESS mission delivers imagery with ~60 m spatial resolution, it is often useful to have spectra at the leaf level in order to explain variability seen at the pixel level. As it was originally titled, the Advanced Spaceborne Thermal Emission Reflection Radiometer (ASTER) spectral library version 2.0 has been expanded to support ECOSTRESS studies by including major additions of laboratory measured vegetation and non-photosynthetic vegetation (NPV) spectra. The library now contains 541 leaf visible shortwave infrared (VIS/SWIR) spectra, 472 leaf thermal infrared (TIR) spectra, and 51 NPV VIS/SWIR and TIR spectra. Previously, the library primarily contained VSWIR and TIR laboratory spectra of minerals, rocks, and man-made materials. This new library, containing over 3000 spectra, was renamed the ECOSTRESS spectral library version 1.0 and is publicly available (http://speclib.jpl.nasa.gov). It should be noted that as with the prior versions of the library, the VSWIR and TIR measurements were made with separate instruments with different calibration sources. Care should be taken when combining the data into a seamless spectrum to cover the entire spectral range. The ECOSTRESS spectral library provides a comprehensive collection of natural and man-made laboratory collected spectra covering the wavelength range of 0.35–15.4 μm.",2019-09-01 +34484239,PopCover-2.0. Improved Selection of Peptide Sets With Optimal HLA and Pathogen Diversity Coverage.,"The use of minimal peptide sets offers an appealing alternative for design of vaccines and T cell diagnostics compared to conventional whole protein approaches. T cell immunogenicity towards peptides is contingent on binding to human leukocyte antigen (HLA) molecules of the given individual. HLA is highly polymorphic, and each variant typically presents a different repertoire of peptides. This polymorphism combined with pathogen diversity challenges the rational selection of peptide sets with broad immunogenic potential and population coverage. Here we propose PopCover-2.0, a simple yet highly effective method, for resolving this challenge. The method takes as input a set of (predicted) CD8 and/or CD4 T cell epitopes with associated HLA restriction and pathogen strain annotation together with information on HLA allele frequencies, and identifies peptide sets with optimal pathogen and HLA (class I and II) coverage. PopCover-2.0 was benchmarked on historic data in the context of HIV and SARS-CoV-2. Further, the immunogenicity of the selected SARS-CoV-2 peptides was confirmed by experimentally validating the peptide pools for T cell responses in a panel of SARS-CoV-2 infected individuals. In summary, PopCover-2.0 is an effective method for rational selection of peptide subsets with broad HLA and pathogen coverage. The tool is available at https://services.healthtech.dtu.dk/service.php?PopCover-2.0.",2021-08-17 +30006920,A Web-Based Atlas Combining MRI and Histology of the Squirrel Monkey Brain.,"The squirrel monkey (Saimiri sciureus) is a commonly-used surrogate for humans in biomedical research. In the neuroimaging community, MRI and histological atlases serve as valuable resources for anatomical, physiological, and functional studies of the brain; however, no digital MRI/histology atlas is currently available for the squirrel monkey. This paper describes the construction of a web-based multi-modal atlas of the squirrel monkey brain. The MRI-derived information includes anatomical MRI contrast (i.e., T2-weighted and proton-density-weighted) and diffusion MRI metrics (i.e., fractional anisotropy and mean diffusivity) from data acquired both in vivo and ex vivo on a 9.4 Tesla scanner. The histological images include Nissl and myelin stains, co-registered to the corresponding MRI, allowing identification of cyto- and myelo-architecture. In addition, a bidirectional neuronal tracer, biotinylated dextran amine (BDA) was injected into the primary motor cortex, enabling highly specific identification of regions connected to the injection location. The atlas integrates the results of common image analysis methods including diffusion tensor imaging glyphs, labels of 57 white-matter tracts identified using DTI-tractography, and 18 cortical regions of interest identified from Nissl-revealed cyto-architecture. All data are presented in a common space, and all image types are accessible through a web-based atlas viewer, which allows visualization and interaction of user-selectable contrasts and varying resolutions. By providing an easy to use reference system of anatomical information, our web-accessible multi-contrast atlas forms a rich and convenient resource for comparisons of brain findings across subjects or modalities. The atlas is called the Combined Histology-MRI Integrated Atlas of the Squirrel Monkey (CHIASM). All images are accessible through our web-based viewer ( https://chiasm.vuse.vanderbilt.edu /), and data are available for download at ( https://www.nitrc.org/projects/smatlas/ ).",2019-01-01 +34165986,BlackSheep: A Bioconductor and Bioconda Package for Differential Extreme Value Analysis.,"Unbiased assays such as shotgun proteomics and RNA-seq provide high-resolution molecular characterization of tumors. These assays measure molecules with highly varied distributions, making interpretation and hypothesis testing challenging. Samples with the most extreme measurements for a molecule can reveal the most interesting biological insights yet are often excluded from analysis. Furthermore, rare disease subtypes are, by definition, underrepresented in cancer cohorts. To provide a strategy for identifying molecules aberrantly enriched in small sample cohorts, we present BlackSheep, a package for nonparametric description and differential analysis of genome-wide data, available from Bioconductor (https://www.bioconductor.org/packages/release/bioc/html/blacksheepr.html) and Bioconda (https://bioconda.github.io/recipes/blksheep/README.html). BlackSheep is a complementary tool to other differential expression analysis methods, which is particularly useful when analyzing small subgroups in a larger cohort.",2021-06-24 +34198016,Normalization of breast MRIs using cycle-consistent generative adversarial networks.,"

Objectives

Dynamic Contrast Enhanced-Magnetic Resonance Imaging (DCE-MRI) is widely used to complement ultrasound examinations and x-ray mammography for early detection and diagnosis of breast cancer. However, images generated by various MRI scanners (e.g., GE Healthcare, and Siemens) differ both in intensity and noise distribution, preventing algorithms trained on MRIs from one scanner to generalize to data from other scanners. In this work, we propose a method to solve this problem by normalizing images between various scanners.

Methods

MRI normalization is challenging because it requires normalizing intensity values and mapping noise distributions between scanners. We utilize a cycle-consistent generative adversarial network to learn a bidirectional mapping and perform normalization between MRIs produced by GE Healthcare and Siemens scanners in an unpaired setting. Initial experiments demonstrate that the traditional CycleGAN architecture struggles to preserve the anatomical structures of the breast during normalization. Thus, we propose two technical innovations in order to preserve both the shape of the breast as well as the tissue structures within the breast. First, we incorporate mutual information loss during training in order to ensure anatomical consistency. Second, we propose a modified discriminator architecture that utilizes a smaller field-of-view to ensure the preservation of finer details in the breast tissue.

Results

Quantitative and qualitative evaluations show that the second innovation consistently preserves the breast shape and tissue structures while also performing the proper intensity normalization and noise distribution mapping.

Conclusion

Our results demonstrate that the proposed model can successfully learn a bidirectional mapping and perform normalization between MRIs produced by different vendors, potentially enabling improved diagnosis and detection of breast cancer. All the data used in this study are publicly available at https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70226903.",2021-06-08 +28387841,"TimeTree: A Resource for Timelines, Timetrees, and Divergence Times.","Evolutionary information on species divergence times is fundamental to studies of biodiversity, development, and disease. Molecular dating has enhanced our understanding of the temporal patterns of species divergences over the last five decades, and the number of studies is increasing quickly due to an exponential growth in the available collection of molecular sequences from diverse species and large number of genes. Our TimeTree resource is a public knowledge-base with the primary focus to make available all species divergence times derived using molecular sequence data to scientists, educators, and the general public in a consistent and accessible format. Here, we report a major expansion of the TimeTree resource, which more than triples the number of species (>97,000) and more than triples the number of studies assembled (>3,000). Furthermore, scientists can access not only the divergence time between two species or higher taxa, but also a timetree of a group of species and a timeline that traces a species' evolution through time. The new timetree and timeline visualizations are integrated with display of events on earth and environmental history over geological time, which will lead to broader and better understanding of the interplay of the change in the biosphere with the diversity of species on Earth. The next generation TimeTree resource is publicly available online at http://www.timetree.org.",2017-07-01 +31805847,BEST: a web server for brain expression Spatio-temporal pattern analysis.,"BACKGROUND:Dysregulated gene expression patterns have been reported in several mental disorders. Limited by the difficulty of obtaining samples, psychiatric molecular mechanism research still relies heavily on clues from genetics studies. By using reference data from brain expression studies, multiple types of comprehensive gene expression pattern analysis have been performed on psychiatric genetic results. These systems-level spatial-temporal expression pattern analyses provided evidence on specific brain regions, developmental stages and molecular pathways that are possibly involved in psychiatric pathophysiology. At present, there is no online tool for such systematic analysis, which hinders the applications of analysis by non-informatics researchers such as experimental biologists and clinical molecular biologists. RESULTS:We developed the BEST web server to support Brain Expression Spatio-Temporal pattern analysis. There are three highlighted features of BEST: 1) visualization: it generates user-friendly visual results that are easy to interpret, including heatmaps, Venn diagrams, gene co-expression networks and cluster-based Manhattan gene plots; these results illustrate the complex spatio-temporal expression patterns, including expression quantification and correlation between genes; 2) integration: it provides comprehensive human brain spatio-temporal expression patterns by integrating data from currently available databases; 3) multi-dimensionality: it analyses input genes as both a whole set and several subsets (clusters) which are enriched according to co-expression patterns, and it also presents the correlation between genetic and expression data. CONCLUSIONS:To the best of our knowledge, BEST is the first data tool to support comprehensive human brain spatial-temporal expression pattern analysis. It helps to bridge disease-related genetic studies and mechanism studies, provides clues for key gene and molecular system identification, and supports the analysis of disease sensitive brain region and age stages. BEST is freely available at http://best.psych.ac.cn.",2019-12-05 +,Global database of plants with root‐symbiotic nitrogen fixation: NodDB,"Plants associated with symbiotic N‐fixing bacteria play important roles in early successional, riparian and semi‐dry ecosystems. These so‐called N‐fixing plants are widely used for reclamation of disturbed vegetation and improvement of soil fertility in agroforestry. Yet, available information about plants that are capable of establishing nodulation is fragmented and somewhat outdated. This article introduces the NodDB database of N‐fixing plants based on morphological and phylogenetic evidence (available at https://doi.org/10.15156/bio/587469) and discusses plant groups with conflicting reports and interpretation, such as certain legume clades and the Zygophyllaceae family. During angiosperm evolution, N‐fixing plants became common in the fabid rather than in the ‘nitrogen‐fixing’ clade. The global GBIF plant species distribution data indicated that N‐fixing plants tend to be relatively more diverse in savanna and semi‐desert biomes. The compiled and re‐interpreted information about N‐fixing plants enables accurate analyses of biogeography and community ecology of biological N fixation.",2018-05-01 +33865096,"Prediction of eye, hair and skin colour in Latin Americans.","Here we evaluate the accuracy of prediction for eye, hair and skin pigmentation in a dataset of > 6500 individuals from Mexico, Colombia, Peru, Chile and Brazil (including genome-wide SNP data and quantitative/categorical pigmentation phenotypes - the CANDELA dataset CAN). We evaluated accuracy in relation to different analytical methods and various phenotypic predictors. As expected from statistical principles, we observe that quantitative traits are more sensitive to changes in the prediction models than categorical traits. We find that Random Forest or Linear Regression are generally the best performing methods. We also compare the prediction accuracy of SNP sets defined in the CAN dataset (including 56, 101 and 120 SNPs for eye, hair and skin colour prediction, respectively) to the well-established HIrisPlex-S SNP set (including 6, 22 and 36 SNPs for eye, hair and skin colour prediction respectively). When training prediction models on the CAN data, we observe remarkably similar performances for HIrisPlex-S and the larger CAN SNP sets for the prediction of hair (categorical) and eye (both categorical and quantitative), while the CAN sets outperform HIrisPlex-S for quantitative, but not for categorical skin pigmentation prediction. The performance of HIrisPlex-S, when models are trained in a world-wide sample (although consisting of 80% Europeans, https://hirisplex.erasmusmc.nl), is lower relative to training in the CAN data (particularly for hair and skin colour). Altogether, our observations are consistent with common variation of eye and hair colour having a relatively simple genetic architecture, which is well captured by HIrisPlex-S, even in admixed Latin Americans (with partial European ancestry). By contrast, since skin pigmentation is a more polygenic trait, accuracy is more sensitive to prediction SNP set size, although here this effect was only apparent for a quantitative measure of skin pigmentation. Our results support the use of HIrisPlex-S in the prediction of categorical pigmentation traits for forensic purposes in Latin America, while illustrating the impact of training datasets on its accuracy.",2021-04-06 +34160842,Does transcriptional heterogeneity facilitate the development of genetic drug resistance?,"Non-genetic forms of antimicrobial (drug) resistance can result from cell-to-cell variability that is not encoded in the genetic material. Data from recent studies also suggest that non-genetic mechanisms can facilitate the development of genetic drug resistance. We speculate on how the interplay between non-genetic and genetic mechanisms may affect microbial adaptation and evolution during drug treatment. We argue that cellular heterogeneity arising from fluctuations in gene expression, epigenetic modifications, as well as genetic changes contribute to drug resistance at different timescales, and that the interplay between these mechanisms enhance pathogen resistance. Accordingly, developing a better understanding of the role of non-genetic mechanisms in drug resistance and how they interact with genetic mechanisms will enhance our ability to combat antimicrobial resistance. Also see the video abstract here: https://youtu.be/aefGpdh-bgU.",2021-06-23 +34398696,Associations between subjective social status and psychological well-being among college students.,"Background:Higher subjective social status (SSS) is associated with better mental health among youth; however, few studies have examined youth's perceptions of past (childhood) or future (adulthood) SSS. Methods:Utilizing latent profile analysis, we examined unique profiles of past, present, and future SSS among 401 college students in the United States and tested associations between these profiles and psychological well-being (ie, depressive symptoms, negative affect, positive affect, and flourishing), controlling for family socioeconomic status (SES). Results:Results revealed four profiles: Low SSS (8%), Upward SSS (18%), Moderate SSS (43%), and High SSS (31%). Youth in the High SSS profile had the best psychological well-being, and those in the Low SSS profile had the worst. While the Upward SSS profile was associated with depressive symptoms and negative affect, it was protective in terms of positive affect. Discussion: Findings highlight unique effects of upward SSS mobility.Supplemental data for this article can be accessed online at https://doi.org/10.1080/07448481.2021.1954010.",2021-08-16 +33983762,"""Anosognosia for prospective and retrospective memory deficits: Assessment and theoretical considerations"": Correction to Chapman et al. (2019).","Reports an error in ""Anosognosia for prospective and retrospective memory deficits: Assessment and theoretical considerations"" by Silvia Chapman, Nicoletta Beschin, Stephanie Cosentino, Mitchell S. V. Elkind, Sergio Della Sala and Gianna Cocchini (Neuropsychology, 2019[Oct], Vol 33[7], 1020-1031). In the article (http://dx.doi.org/10.1037/neu0000568), the racial and ethnic description of the participants was missing. The following text has been added to the first paragraph under the ""Participants"" heading in the ""Method"" section: ""The racial and ethnic distribution of the participants was 76.5% (n = 39) White, 13.7% (n = 7) Black, 5.9% (n = 3) Hispanic, and 3.9% (n = 2) Asian."" The online version of this article has been corrected. (The following abstract of the original article appeared in record 2019-33671-001.) Objective: Patients who suffer from memory loss after an Acquired Brain Injury (ABI) may also suffer from anosognosia, or unawareness of their memory loss. How we define and measure anosognosia can have critical implications for its study and clinical assessment. Commonly used measures often lack standardization and reliability checks for responses. Moreover, these methods rely heavily on cognitive abilities (e.g., language abilities) that are often affected after brain injury. The aim of this study is to elucidate how to best conceptualize and detect anosognosia for memory loss by introducing a new method of assessment, the Visual-Analogue Test for Anosognosia for memory impairment (VATAmem). Method: A total of 51 patients (M = 61 years, M = 13 years of education) with memory difficulties after ABI were recruited from outpatient clinics. A total of 73 informants were also recruited (M = 51 years old, M = 13 years of education). Both patients and informants evaluated the severity of patients' everyday memory mistakes on the VATAmem, for prospective and retrospective memory deficits by using visual analogue scales, vignettes, and check questions to ensure reliability. Results and Conclusion: A total of 30% of the patients were deemed unaware of their memory deficits. Patients were less aware of their prospective (29%) than their retrospective memory difficulties (18%). The new method of assessment provided by the VATAmem reduced possible false positives and enhanced reliability. We conclude that careful consideration of methodology is a key step to interpreting anosognosia findings within a theoretical framework. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-05-13 +33983784,"""Spiteful and contemptuous: A new look at the emotional experiences related to psychopathy"": Correction to Garofalo et al. (2019).","Reports an error in ""Spiteful and contemptuous: A new look at the emotional experiences related to psychopathy"" by Carlo Garofalo, Craig S. Neumann, Virgil Zeigler-Hill and J. Reid Meloy (Personality Disorders: Theory, Research, and Treatment, 2019[Mar], Vol 10[2], 173-184). In the article (http://dx.doi.org/10.1037/per0000310), there was an error in the placement of the correlation coefficients in Table 4. In the right-most column, the first row should have read "".20***"", and each subsequent coefficient should have been shifted down one row. The bottom row should remain blank. The online version of this article has been corrected. (The following abstract of the original article appeared in record 2018-62012-001.) Psychopathy has long been conceptualized in terms of an absence of emotion. Yet, recent studies have suggested that the experience of other-directed negative emotions may be more intimately linked to psychopathy than previously acknowledged, although there is limited knowledge concerning the experience of such emotions. The present study examined the disposition to experience two other-directed emotions, spitefulness and contempt, that are conceptually linked with psychopathy but currently are limited in empirical support. Across 2 studies with 3 nonclinical samples (Ns = 1,237, 239, 521), we found evidence that psychopathic traits-as assessed via the Self-Report Psychopathy Scale (SRP; Paulhus, Neumann, & Hare, 2016; Study 1 and Study 2) and the Triarchic Psychopathy Measure (TriPM; Patrick, 2010; Study 2)-were positively associated with spitefulness (Study 1) and contempt (Study 2). These associations were consistent across psychopathy instruments (SRP and TriPM) and dimensions (i.e., the SRP Interpersonal, Affective, Lifestyle, and Antisocial facets, and the TriPM Meanness and Disinhibition dimensions), were stronger for the interpersonal and affective traits of psychopathy, and held when accounting for several theoretically relevant covariates. The only exception concerned the TriPM Boldness scale, which had less consistent associations with contempt. The present findings further our understanding of the emotional experiences related to psychopathy, highlighting the relevance of focusing on other-directed negative emotions, especially those that are interpersonal in nature and share an antagonistic component. (PsycInfo Database Record (c) 2022 APA, all rights reserved).",2021-05-13 +33722144,Guilt and Burnout in Medical Students.,"

Theory

Burnout is prevalent among medical students and is correlated with negative feelings, behaviors, and outcomes. Empathy is a desired trait for medical students that has been correlated with reduced burnout. The concept of guilt is closely related to concern about the well-being of others; therefore, feelings of guilt may be associated with empathy. Excessive guilt poses an increased risk for internalized distress, symptoms such as anhedonia, and may be related to burnout. The relationship between pathogenic guilt and burnout in medical students is unknown.

Hypothesis

We hypothesize that pathogenic guilt is present and related to both burnout and empathy in medical students.

Methods

We conducted a cross-sectional survey study of all students in one medical school. Data were collected in February 2020. The Oldenburg Burnout Inventory (OBLI), Toronto Empathy Questionnaire (TEQ), and Interpersonal Guilt Questionaire-67 (IGQ-67) were used. A modified version of IGQ-67 was used to measure four subscales of pathogenic guilt: survival guilt, separation guilt, omnipotence guilt, and self-hate guilt. Data analyses for this study including screening, evaluation of assumptions, descriptive statistics, reliabilities, one-way ANOVA, and correlation coefficients, were conducted using SPSS version 26.

Results

Of 300, 168 (56.0%) students participated in the study. Survival, omnipotence, and self-hate classes of pathogenic guilt were positively correlated with burnout. Empathy was correlated with two classes of pathogenic guilt: survival and omnipotence. Empathy was inversely related to burnout (disengagement).

Conclusions

Pathogenic guilt may be a contributor to burnout in medical students. Guilt should be a target of prevention and treatment in burnout in medical students.Supplemental data for this article is available online at https://doi.org/10.1080/10401334.2021.1891544.",2021-03-15 +34596487,Patients' Priorities for Surrogate Decision-Making: Possible Influence of Misinformed Beliefs.,"

Background

Many patients have three primary goals for how treatment decisions are made for them in the event of decisional incapacity. They want to be treated consistent with their preferences and values, they want their family to be involved in making decisions, and they want to minimize the stress on their family. The present paper investigates how patients' beliefs about surrogate decision-making influence which of these three goals they prioritize. Methods: Quantitative survey of 1,169 U.S. patients to assess their beliefs about surrogate decision-making, and how these beliefs influence patients' priorities for surrogate decision-making. Results: Most patients believed that families in general (68.8%) and their own family in particular (83.4%) frequently, almost always, or always know which treatments the patient would want in the event of incapacity. Patients with these beliefs were more likely to prioritize the goal of involving their family in treatment decision-making over the goal of minimizing family stress. Most patients (77.4%) also believed their family would experience significant stress from helping to make treatment decisions. However, patients' priorities were largely unchanged by this belief. Conclusions: Prior reports suggest that patients overestimate the extent to which their family knows which treatments they want in the event of decisional incapacity. The present analysis adds that these patients might be more likely to prioritize the goal of involving their family in treatment decision-making, even when this results in the family experiencing significant distress. This finding highlights that patients' misinformed beliefs about their family's knowledge might influence patients' priorities for surrogate decision-making, raising important questions for clinical practice, policy, and future research.Supplemental data for this article is available online at https://doi.org/10.1080/23294515.2021.1983665.",2021-10-01 +33711505,EasyMAP: A user-friendly online platform for analyzing 16S ribosomal DNA sequencing data.,"As next-generation sequencing technology has become more advanced, research on microbial 16S ribosomal DNA sequences has developed rapidly. Sequencing of 16S ribosomal DNA allows the composition of bacteria and archaea in a sample to be obtained and many analytical tools related to 16S ribosomal DNA sequences have been proposed; however, most do not include a user-friendly platform with a graphical user interface. Here, a comprehensive and easy-to-use online platform, Easy Microbiome Analysis Platform (EasyMAP), has been developed for analysis of 16S ribosomal DNA sequencing data. EasyMAP integrates the QIIME2, LefSe, and PICRUSt pipelines and includes temporal profiling analysis. Users can perform quality checks, taxonomy differential abundance analysis, microbial gene function prediction and longitudinal analysis with step-by-step guidance. EasyMAP is a user-friendly tool for comprehensive analysis of 16S ribosomal DNA sequencing data. The web server and documentation are freely available at http://easymap.cgm.ntu.edu.tw/.",2021-03-09 +32679723,Comparative Assessment of Protein Kinase Inhibitors in Public Databases and in PKIDB. ,"Since the first approval of a protein kinase inhibitor (PKI) by the Food and Drug Administration (FDA) in 2001, 55 new PKIs have reached the market, and many inhibitors are currently being evaluated in clinical trials. This is a clear indication that protein kinases still represent major drug targets for the pharmaceutical industry. In a previous work, we have introduced PKIDB, a publicly available database, gathering PKIs that have already been approved (Phase 4), as well as those currently in clinical trials (Phases 0 to 3). This database is updated frequently, and an analysis of the new data is presented here. In addition, we compared the set of PKIs present in PKIDB with the PKIs in early preclinical studies found in ChEMBL, the largest publicly available chemical database. For each dataset, the distribution of physicochemical descriptors related to drug-likeness is presented. From these results, updated guidelines to prioritize compounds for targeting protein kinases are proposed. The results of a principal component analysis (PCA) show that the PKIDB dataset is fully encompassed within all PKIs found in the public database. This observation is reinforced by a principal moments of inertia (PMI) analysis of all molecules. Interestingly, we notice that PKIs in clinical trials tend to explore new 3D chemical space. While a great majority of PKIs is located on the area of ""flatland"", we find few compounds exploring the 3D structural space. Finally, a scaffold diversity analysis of the two datasets, based on frequency counts was performed. The results give insight into the chemical space of PKIs, and can guide researchers to reach out new unexplored areas. PKIDB is freely accessible from the following website: http://www.icoa.fr/pkidb.",2020-07-15 +34700730,How Significant is the Delineation Bias in CT Radiomics Prognostic Power?,"

Purpose/objective(s)

Radiomics is referred to as quantitative image biomarkers for medical image analysis. Recently, Welch et al. and Traverso, Kazmierski et al. showed that radiomics' predictive power might be caused by its correlation with tumor volume. In this work, we investigated how tumor delineation affects the radiomic values and their prognostic power.

Materials/methods

Data. Three hundred sixty-one patient CT images from the Lung1 dataset [https://xnat.bmia.nl] and 210 patient images from the Lung2 dataset were used in the study. The gross tumor volume (GTV) ROI of each patient was eroded by applying eight times a kernel of size one. The GLCM, GLSZM, and GLRLM features were extracted from the original and the eroded images.

Analysis

The feature-volume relations were assessed by calculating the Spearman correlation for each patient using the eroded ROIs. Based on mean(|ρSpearman|) < 0.3 criteria, we selected five features. We built two models to stratify the patient into two survival groups and generated their survival curves with the Kaplan-Meyer estimator: 1) the first model was fit with features calculated from the original ROIs; 2) the second model was fit with features from the eroded ROIs. We used the log-rank test to evaluate the significance of the stratification, calculating their P-values.

Results

The stratification was significant in Lung1 and Lung2 datasets, with a P-value of 0.02 and 0.05 using the features extracted from original ROIs, however there were multiple intersection make the P-values inaccurate. Instead, the stratification was significant in Lung1 and Lung2 with a P-value of 0.03 and less of 0.005 using features extracted from eroded ROIs.

Conclusion

Radiomic features extracted from routinely delineated GTVs can be biased by the tumor-air edge in CT. Our results show that features extracted from eroded GTVs can achieve better results than the features from the original GTVs. In addition, five features used in our analysis were selected to be unbiased by tumor volume.",2021-11-01 +34723644,Texture Consumption Patterns of 8- to 12-Month-Old Infants: A Reflection of Typical Feeding Development.,"Purpose The lack of age-appropriate expectations for the acquisition of feeding skills and consumption of textured food in early childhood inhibits early and accurate identification of developmental delay in feeding and pediatric feeding disorder. The objective of this study was to describe texture intake patterns in a cohort of typically developing infants between 8 and 12 months of age, with the aim of informing future research to establish targets for feeding skill acquisition. Method Using cross-sectional methodology, we studied the presence of liquid and solid textures and drinking methods in the diet, consumption patterns by texture and drinking methods, and caloric intake by texture via caregiver questionnaire and 3-day dietary intake record in 63 healthy infants between 8 and 12 months of age. Descriptive statistics and a one-way analysis of variance were conducted to compare the effect of age on texture intake patterns. Results Findings reveal rapid advancement of intake patterns for texture overall and for energy intake by texture between 8 and 12 months of age. Whereas liquids continue to provide a large proportion of total energy through this time, solids contribute an equal proportion of energy by 12 months of age. Conclusions This study describes texture intake patterns in a cohort of typically developing infants between 8 and 12 months of age by examining the presence of texture and drinking methods, liquid and solid consumption patterns, and energy intake by texture. When applied to data from a future population sample, findings will provide a threshold for age expectations for typical and disordered feeding development to aid in the detection of developmental delay in feeding and pediatric feeding disorder. What Is Known: Expectations regarding early feeding development have been focused on nutrition parameters. Lack of standardized, age-appropriate expectations for texture progression in infancy and early childhood inhibits early and accurate identification and treatment of pediatric feeding disorder. What Is New: We have described changes in dietary composition by texture and drinking method in healthy infants. Together with nutritional composition, this study describes a more comprehensive assessment of infant feeding, particularly to clinicians who need to diagnose feeding skill deficits. Supplemental Material https://doi.org/10.23641/asha.16879615.",2021-11-01 +32371094,Micronutrient status during paediatric critical illness: A scoping review.,"

Background

No evidence based recommendations for micronutrient requirements during paediatric critical illness are available, other than those arising from recommended nutrient intakes (RNI) for healthy children and expert opinion.

Objectives

The objective of this review is to examine the available evidence from micronutrient status in critically ill children considering studies which describe 1) micronutrient levels, 2) associations between micronutrient levels and clinical outcome, and 3) impact on clinical outcome with micronutrient supplementation during PICU admission.

Design

Scoping review.

Eligibility criteria

Any study which used a qualitative and quantitative design considering causes and consequences of micronutrient levels or micronutrient supplementation during paediatric critical illness.

Sources of evidence

NICE Healthcare Databases Advanced Search website (https://hdas.nice.org.uk/) was used as a tool for multiple searches, with a content analysis and charting of data extracted.

Results

711 records were identified, 35 were included in the review. Studies evaluated serum micronutrient status was determined on admission day in majority of patients. A content analysis identified (n = 49) initial codes, (n = 14) sub-categories and (n = 5) overarching themes during critical illness, which were identified as: i) low levels of micronutrients, ii) causes of aberrant micronutrient levels, iii) associations between micronutrients levels and outcome, iv) supplementation of micronutrients.

Conclusion

During critical illness, micronutrients should be provided in sufficient amounts to meet reference nutrient intakes for age. Although, there is insufficient data to recommend routine supplementations of micronutrients at higher doses during critical illness, the 'absence of evidence should not imply evidence of absence', and well designed prospective studies are urgently needed to elucidate paediatric micronutrient requirements during critical illness. The absence of reliable biomarkers make it challenging to determine whether low serum levels are reflective of a true deficiency or as a result redistribution, particularly during the acute phase of critical illness. As more children continue to survive a PICU admission, particularly those with complex diseases micronutrient supplementation research should also be inclusive of the recovery phase following critical illness.",2020-04-22 +27924042,PlantTFDB 4.0: toward a central hub for transcription factors and regulatory interactions in plants.,"With the goal of providing a comprehensive, high-quality resource for both plant transcription factors (TFs) and their regulatory interactions with target genes, we upgraded plant TF database PlantTFDB to version 4.0 (http://planttfdb.cbi.pku.edu.cn/). In the new version, we identified 320 370 TFs from 165 species, presenting a more comprehensive genomic TF repertoires of green plants. Besides updating the pre-existing abundant functional and evolutionary annotation for identified TFs, we generated three new types of annotation which provide more directly clues to investigate functional mechanisms underlying: (i) a set of high-quality, non-redundant TF binding motifs derived from experiments; (ii) multiple types of regulatory elements identified from high-throughput sequencing data; (iii) regulatory interactions curated from literature and inferred by combining TF binding motifs and regulatory elements. In addition, we upgraded previous TF prediction server, and set up four novel tools for regulation prediction and functional enrichment analyses. Finally, we set up a novel companion portal PlantRegMap (http://plantregmap.cbi.pku.edu.cn) for users to access the regulation resource and analysis tools conveniently.",2016-10-24 +33689356,XlinkCyNET: A Cytoscape Application for Visualization of Protein Interaction Networks Based on Cross-Linking Mass Spectrometry Identifications.,"Software tools that allow the visualization and analysis of protein interaction networks are essential for studies in systems biology. One of the most popular network visualization tools in biology is Cytoscape, which offers a great selection of plug-ins for the interpretation of network data. Chemical cross-linking coupled to mass spectrometry (XL-MS) is an increasingly important source for protein interaction data; however, to date, no Cytoscape tools are available to analyze XL-MS results. In light of the suitability of the Cytoscape platform and to expand its toolbox, here we introduce XlinkCyNET, an open-source Cytoscape Java plug-in for exploring large-scale XL-MS-based protein interaction networks. XlinkCyNET offers the rapid and easy visualization of intra- and interprotein cross-links in a rectangular-bar style as well as on the 3D structure, allowing the interrogation of protein interaction networks at the residue level. XlinkCyNET is freely available from the Cytoscape App Store (http://apps.cytoscape.org/apps/xlinkcynet) and at the Liu lab webpage (https://www.theliulab.com/software/xlinkcynet).",2021-03-09 +28150237,Navigating the Glycome Space and Connecting the Glycoproteome.,"UniCarbKB ( http://unicarbkb.org ) is a comprehensive resource for mammalian glycoprotein and annotation data. In particular, the database provides information on the oligosaccharides characterized from a glycoprotein at either the global or site-specific level. This evidence is accumulated from a peer-reviewed and manually curated collection of information on oligosaccharides derived from membrane and secreted glycoproteins purified from biological fluids and/or tissues. This information is further supplemented with experimental method descriptions that summarize important sample preparation and analytical strategies. A new release of UniCarbKB is published every three months, each includes a collection of curated data and improvements to database functionality. In this Chapter, we outline the objectives of UniCarbKB, and describe a selection of step-by-step workflows for navigating the information available. We also provide a short description of web services available and future plans for improving data access. The information presented in this Chapter supplements content available in our knowledgebase including regular updates on interface improvements, new features, and revisions to the database content ( http://confluence.unicarbkb.org ).",2017-01-01 +33448030,Diagnostic value of cutaneous manifestation of SARS-CoV-2 infection.,"

Background

One of the challenging aspects of SARS-CoV-2 infection is its diverse multisystemic disease presentation.

Objectives

To evaluate the diagnostic value of cutaneous manifestations of SARS-CoV-2 infection and investigate their duration and timing in relation to other COVID-19 symptoms.

Methods

We used data from 336 847 UK users of the COVID Symptom Study app to assess the diagnostic value of body rash or an acral rash in SARS-CoV-2 infection, and data from an independent online survey of 11 544 respondents to investigate skin-specific symptoms and collect their photographs.

Results

Using data from the app, we show significant association between skin rashes and a positive swab test result (odds ratio 1·67, 95% confidence interval 1·42-1·97). Strikingly, among the respondents of the independent online survey, we found that 17% of SARS-CoV-2-positive cases reported skin rashes as the first presentation, and 21% as the only clinical sign of COVID-19. Together with the British Association of Dermatologists, we have compiled a catalogue of images of the most common skin manifestations of COVID-19 from 400 individuals (https://covidskinsigns.com), which we have made publicly available to assist clinicians in recognition of this early clinical feature of COVID-19.

Conclusions

Skin rashes cluster with other COVID-19 symptoms, are predictive of a positive swab test, and occur in a significant number of cases, either alone or before other classical symptoms. Recognizing rashes is important in identifying new and earlier cases of COVID-19.",2021-03-02 +34749531,Restraint of Fumarate Accrual by HIF-1α Preserves miR-27a-Mediated Limitation of Interleukin 10 during Infection of Macrophages by Histoplasma capsulatum.,"Hypoxia-inducible factor 1α (HIF-1α) regulates the immunometabolic phenotype of macrophages, including the orchestration of inflammatory and antimicrobial processes. Macrophages deficient in HIF-1α produce excessive quantities of the anti-inflammatory cytokine interleukin 10 (IL-10) during infection with the intracellular fungal pathogen Histoplasma capsulatum (R. A. Fecher, M. C. Horwath, D. Friedrich, J. Rupp, G. S. Deepe, J Immunol 197:565-579, 2016, https://doi.org/10.4049/jimmunol.1600342). Thus, the macrophage fails to become activated in response to proinflammatory cytokines and remains the intracellular niche of the pathogen. Here, we identify the tricarboxylic acid (TCA) cycle metabolite fumarate as the driver of IL-10 during macrophage infection with H. capsulatum in the absence of HIF-1α. Accumulation of fumarate reduced expression of a HIF-1α-dependent microRNA (miRNA), miR-27a, known to mediate decay of Il10 mRNA. Inhibition of fumarate accrual in vivo limited IL-10 and fungal growth. Our data demonstrate the critical role of HIF-1α in shaping appropriate TCA cycle activity in response to infection and highlight the consequences of a dysregulated immunometabolic response. IMPORTANCE Histoplasma capsulatum and related Histoplasma species are intracellular fungal pathogens endemic to broad regions of the globe, including the Americas, Africa, and Asia. While most infections resolve with mild or no symptoms, failure of the host to control fungal growth produces severe disease. Previously, we reported that loss of a key transcriptional regulator, hypoxia-inducible factor 1α (HIF-1α), in macrophages led to a lethal failure to control growth of Histoplasma (R. A. Fecher, M. C. Horwath, D. Friedrich, J. Rupp, G. S. Deepe, J Immunol 197:565-579, 2016, https://doi.org/10.4049/jimmunol.1600342). Inhibition of phagocyte activation due to excessive interleukin 10 by HIF-1α-deficient macrophages drove this outcome. In this study, we demonstrate that HIF-1α maintains contextually appropriate TCA cycle metabolism within Histoplasma-infected macrophages. The absence of HIF-1α results in excessive fumarate production that alters miRNA-27a regulation of interleukin-10. HIF-1α thus preserves the capacity of macrophages to transition from a permissive intracellular niche to the site of pathogen killing.",2021-11-09 +33347479,Synaptic polarity and sign-balance prediction using gene expression data in the Caenorhabditis elegans chemical synapse neuronal connectome network.,"Graph theoretical analyses of nervous systems usually omit the aspect of connection polarity, due to data insufficiency. The chemical synapse network of Caenorhabditis elegans is a well-reconstructed directed network, but the signs of its connections are yet to be elucidated. Here, we present the gene expression-based sign prediction of the ionotropic chemical synapse connectome of C. elegans (3,638 connections and 20,589 synapses total), incorporating available presynaptic neurotransmitter and postsynaptic receptor gene expression data for three major neurotransmitter systems. We made predictions for more than two-thirds of these chemical synapses and observed an excitatory-inhibitory (E:I) ratio close to 4:1 which was found similar to that observed in many real-world networks. Our open source tool (http://EleganSign.linkgroup.hu) is simple but efficient in predicting polarities by integrating neuronal connectome and gene expression data.",2020-12-21 +34050758,GEPIA2021: integrating multiple deconvolution-based analysis into GEPIA.,"In 2017, we released GEPIA (Gene Expression Profiling Interactive Analysis) webserver to facilitate the widely used analyses based on the bulk gene expression datasets in the TCGA and the GTEx projects, providing the biologists and clinicians with a handy tool to perform comprehensive and complex data mining tasks. Recently, the deconvolution tools have led to revolutionary trends to resolve bulk RNA datasets at cell type-level resolution, interrogating the characteristics of different cell types in cancer and controlled cohorts became an important strategy to investigate the biological questions. Thus, we present GEPIA2021, a standalone extension of GEPIA, allowing users to perform multiple interactive analysis based on the deconvolution results, including cell type-level proportion comparison, correlation analysis, differential expression, and survival analysis. With GEPIA2021, experimental biologists could easily explore the large TCGA and GTEx datasets and validate their hypotheses in an enhanced resolution. GEPIA2021 is publicly accessible at http://gepia2021.cancer-pku.cn/.",2021-07-01 +29487113,Panorama Public: A Public Repository for Quantitative Data Sets Processed in Skyline.,"To address the growing need for a centralized, community resource of published results processed with Skyline, and to provide reviewers and readers immediate visual access to the data behind published conclusions, we present Panorama Public (https://panoramaweb.org/public.url), a repository of Skyline documents supporting published results. Panorama Public is built on Panorama, an open source data management system for mass spectrometry data processed with the Skyline targeted mass spectrometry environment. The Panorama web application facilitates viewing, sharing, and disseminating results contained in Skyline documents via a web-browser. Skyline users can easily upload their documents to a Panorama server and allow other researchers to explore uploaded results in the Panorama web-interface through a variety of familiar summary graphs as well as annotated views of the chromatographic peaks processed with Skyline. This makes Panorama ideal for sharing targeted, quantitative results contained in Skyline documents with collaborators, reviewers, and the larger proteomics community. The Panorama Public repository employs the full data visualization capabilities of Panorama which facilitates sharing results with reviewers during manuscript review.",2018-02-27 +33905501,Recognizing and validating ligands with CheckMyBlob.,"Structure-guided drug design depends on the correct identification of ligands in crystal structures of protein complexes. However, the interpretation of the electron density maps is challenging and often burdened with confirmation bias. Ligand identification can be aided by automatic methods such as CheckMyBlob, a machine learning algorithm that learns to generalize ligand descriptions from sets of moieties deposited in the Protein Data Bank. Here, we present the CheckMyBlob web server, a platform that can identify ligands in unmodeled fragments of electron density maps or validate ligands in existing models. The server processes PDB/mmCIF and MTZ files and returns a ranking of 10 most likely ligands for each detected electron density blob along with interactive 3D visualizations. Additionally, for each prediction/validation, a plugin script is generated that enables users to conduct a detailed analysis of the server results in Coot. The CheckMyBlob web server is available at https://checkmyblob.bioreproducibility.org.",2021-07-01 +31605112,Identification of metabolites from tandem mass spectra with a machine learning approach utilizing structural features.,"

Motivation

Untargeted mass spectrometry (MS/MS) is a powerful method for detecting metabolites in biological samples. However, fast and accurate identification of the metabolites' structures from MS/MS spectra is still a great challenge.

Results

We present a new analysis method, called SubFragment-Matching (SF-Matching) that is based on the hypothesis that molecules with similar structural features will exhibit similar fragmentation patterns. We combine information on fragmentation patterns of molecules with shared substructures and then use random forest models to predict whether a given structure can yield a certain fragmentation pattern. These models can then be used to score candidate molecules for a given mass spectrum. For rapid identification, we pre-compute such scores for common biological molecular structure databases. Using benchmarking datasets, we find that our method has similar performance to CSI: FingerID and those very high accuracies can be achieved by combining our method with CSI: FingerID. Rarefaction analysis of the training dataset shows that the performance of our method will increase as more experimental data become available.

Availability and implementation

SF-Matching is available from http://www.bork.embl.de/Docu/sf_matching.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +34019657,ProteinTools: a toolkit to analyze protein structures.,"The experimental characterization and computational prediction of protein structures has become increasingly rapid and precise. However, the analysis of protein structures often requires researchers to use several software packages or web servers, which complicates matters. To provide long-established structural analyses in a modern, easy-to-use interface, we implemented ProteinTools, a web server toolkit for protein structure analysis. ProteinTools gathers four applications so far, namely the identification of hydrophobic clusters, hydrogen bond networks, salt bridges, and contact maps. In all cases, the input data is a PDB identifier or an uploaded structure, whereas the output is an interactive dynamic web interface. Thanks to the modular nature of ProteinTools, the addition of new applications will become an easy task. Given the current need to have these tools in a single, fast, and interpretable interface, we believe that ProteinTools will become an essential toolkit for the wider protein research community. The web server is available at https://proteintools.uni-bayreuth.de.",2021-07-01 +,Transcriptome and proteome analyses of resistant preharvest peanut seed coat in response to Aspergillus flavus infection,"The infection of peanut (Arachis hypogaea L.) seed coat by the pathogenic fungus Aspergillus flavus has highly negative economic and health impacts. However, the molecular mechanism underlying such defense response remains poorly understood. This study aims to address this issue by profiling the transcriptomic and proteomic changes that occur during the infection of the resistant peanut cultivar J11 by A. flavus.Transcriptomic study led to the detection of 13,539 genes, among which 663 exhibited differential expression. Further functional analysis found the differentially expressed genes to encode a wide range of pathogenesis- and/or defense-related proteins such as transcription factors, pathogenesis-related proteins, and chitinases. Changes in the expression patterns of these genes might contribute to peanut resistance to A. flavus. On the other hand, the proteomic profiling showed that 314 of the 1382 detected protein candidates were aberrantly expressed as a result of A. flavus invasion. However, the correlation between the transcriptomic and proteomic data was poor. We further demonstrated by in vitro fungistasis tests that hevamine-A, which was enriched at both transcript and protein levels, could directly inhibit the growth of A. flavus.The results demonstrate the power of complementary transcriptomic and proteomic analyses in the study of pathogen defense and resistance in plants and the chitinase could play an important role in the defense response of peanut to A. flavus. The current study also constitutes the first step toward building an integrated omics data platform for the development of Aspergillus-resistant peanut cultivars.How to cite: Zhao X, Li C, Yan C, et al. Transcriptome and proteome analyses of resistant preharvest peanut seed-coat in response to Aspergillus flavus infection. Electron J Biotechnol 2019;39. https://doi.org/10.1016/j.ejbt.2019.03.003.",2019-05-01 +,First Report of Weir’s Cushion Rust on Colorado Blue Spruce Caused by Chrysomyxa weirii in Michigan,"Weir’s cushion rust caused by Chrysomyxa weirii, an autoecious microcyclic rust fungus, is known to affect trees in the genus Picea in North America and south-central Asia (Crane et al. 2000). This species, although placed in Chrysomyxa, belongs phylogenetically to Melampsora (Feau et al. 2011). Trees affected by this disease initially exhibit nondiagnostic yellowish-orange spots and chlorotic bands with blister-like pustules developing throughout the summer on current-year needles. Infection with C. weirii results in discoloration and shedding of year-old needles, but it rarely causes mortality. However, nursery stock infected with C. weirii is restricted from sale. Weir’s cushion rust on Picea pungens (Colorado blue spruce) has been observed in nursery, landscape, and ornamental trees in Michigan. Since the first detection of the disease at the Upper Peninsula in 2012, it has spread to the Lower Peninsula of Michigan, including northern (Grand Traverse, Manistee, and Wexford), western (Allegan), and eastern (Oakland) counties (pers. comm. Jill O’Donnell). In May 2018, a teliospore mass was observed on the current year’s needles of an approximately 40-year-old P. pungens in Cadillac, MI (43°11′37.2′′N, 85°22′28.8′′W). The teliospore mass was removed from symptomatic needles and viewed under a Leica DM750 microscope and photographed with a Leica LAS X software imaging system (version 4.9.0; Leica Microsystems, Buffalo Grove, IL). The teliospores (n = 30) were typical of C. weirii, which are variable (from irregular to cylindrical or rhomboidal) in shape and were measured using the open access GNU image manipulation program (GIMP version 2.8.20, https://www.gimp.org), 21.1 ± SE 0.5 (15.4 to 27.8) × 8.2 ± SE 0.1 (6.4 to 9.0) μm, similar in size of teliospores from a previous study (Crane et al. 2000). To confirm the morphological identification, genomic DNA was extracted from the teliospore mass and symptomatic needles using a modified Qiagen DNeasy Plant Mini extraction kit (Qiagen, Valencia, CA) protocol. The internal transcribed spacer (ITS) regions including 5.8S rRNA gene were amplified for each sample using the universal fungal specific forward primer ITS4F and the rust-specific reverse primer ITS4BR (Feau et al. 2011). Purified polymerase chain reaction products were sequenced, and chromatograms were manually edited and submitted to the NCBI BLAST database. Samples were identified with 100% match (788/788 bp) to C. weirii in GenBank (accession no. GU049472). The sequences from this study were deposited in GenBank (MH801151). To our knowledge, this is the first report of C. weirii causing Weir’s cushion rust on P. pungens in Michigan. P. pungens is a popular species planted for urban gardens and landscaping along roadways in Midwestern cities (Wade 2010). P. pungens is also the main spruce species used for Christmas tree production in Michigan (USDA NASS 2014). In the last 6 years, C. weirii has spread from the Upper Peninsula of Michigan throughout the Lower Peninsula, most likely by wind-driven rain. Further research is currently underway in the rapid diagnosis of the disease at the chlorotic band stage to improve timing of disease management strategies.",2019-05-01 +34583798,Affective and anxiety disorders in patients with different rare chronic diseases: a systematic review and meta-analysis.,"We aimed to identify the prevalence of affective and anxiety disorders across different rare disease and identify correlates of psychopathology. We performed a systematic review and meta-analysis. We systematically searched Medline, PSYNDEX, PsycINFO for observational studies examining clinically diagnosed affective and/or anxiety disorders in adults with rare chronic diseases. Two researchers reviewed titles and abstracts independently and, for eligible studies, independently extracted data. The prevalence rates were pooled using a random intercept logistic regression model. We published a review protocol (http://www.crd.york.ac.uk/PROSPERO/display_record.php?ID=CRD42018106614CRD42018106614). We identified and screened 34 402 records for eligibility and considered 39 studies in the qualitative and 37 studies in the quantitative analysis, including N = 5951 patients with 24 different rare diseases. Heterogeneity between studies was large. Prevalence rates ranged widely between studies, with pooled prevalence estimates of 13.1% (95% CI 9.6-17.7%; I2 = 87%, p < 0.001) for current and 39.3% (95% CI 31.7-47.4%; I2 = 84%, p < 0.001) for lifetime major depressive disorder, 21.2% (95% CI 15.4-28.6%; I2 = 90%, p < 0.001) for current and 46.1% (95% CI 35.8-56.8%; I2 = 90%, p < 0.001) for lifetime affective disorders, and 39.6% (95% CI 25.5-55.6%; I2 = 96%, p < 0.001) for current and 44.2% (95% CI 27.0-62.9%; I2 = 94%, p < 0.001) for lifetime anxiety disorders. Sensitivity analyses excluding studies of low quality revealed nearly the same results. We conducted the first systematic review examining affective and anxiety disorders in adults with different rare diseases and found high prevalence rates. Supporting patients in disease adjustment can be crucial for their overall health and well-being.",2021-09-29 +33677494,ASpli: Integrative analysis of splicing landscapes through RNA-Seq assays. ,"Genome-wide analysis of alternative splicing has been a very active field of research since the early days of Next Generation Sequencing technologies. Since then, ever-growing data availability and the development of increasingly sophisticated analysis methods have uncovered the complexity of the general splicing repertoire. A large number of splicing analysis methodologies exist, each of them presenting its own strengths and weaknesses. For instance methods exclusively relying on junction information do not take advantage of the large majority of reads produced in an RNA-seq assay, isoform reconstruction methods might not detect novel intron retention events, some solutions can only handle canonical splicing events, and many existing methods can only perform pairwise comparisons. In this contribution, we present ASpli, a computational suite implemented in R statistical language, that allows the identification of changes in both, annotated and novel alternative splicing events and can deal with simple, multi-factor or paired experimental designs. Our integrative computational workflow considers the same GLM model, applied to different sets of reads and junctions, in order to compute complementary splicing signals.Analyzing simulated and real data we found that the consolidation of these signals resulted in a robust proxy of the occurrence of splicing alterations. While the analysis of junctions allowed us to uncover annotated as well as non-annotated events, read coverage signals notably increased recall capabilities at a very competitive performance when compared against other state-of-the-art splicing analysis algorithms. ASpli is freely available from the Bioconductor project site https://www.bioconductor.org/packages/ASpli. Supplementary data are available at Bioinformatics online.",2021-03-02 +33363571,Analysis of Pan-omics Data in Human Interactome Network (APODHIN).,"Analysis of Pan-omics Data in Human Interactome Network (APODHIN) is a platform for integrative analysis of transcriptomics, proteomics, genomics, and metabolomics data for identification of key molecular players and their interconnections exemplified in cancer scenario. APODHIN works on a meta-interactome network consisting of human protein-protein interactions (PPIs), miRNA-target gene regulatory interactions, and transcription factor-target gene regulatory relationships. In its first module, APODHIN maps proteins/genes/miRNAs from different omics data in its meta-interactome network and extracts the network of biomolecules that are differentially altered in the given scenario. Using this context specific, filtered interaction network, APODHIN identifies topologically important nodes (TINs) implementing graph theory based network topology analysis and further justifies their role via pathway and disease marker mapping. These TINs could be used as prospective diagnostic and/or prognostic biomarkers and/or potential therapeutic targets. In its second module, APODHIN attempts to identify cross pathway regulatory and PPI links connecting signaling proteins, transcription factors (TFs), and miRNAs to metabolic enzymes via utilization of single-omics and/or pan-omics data and implementation of mathematical modeling. Interconnections between regulatory components such as signaling proteins/TFs/miRNAs and metabolic pathways need to be elucidated more elaborately in order to understand the role of oncogene and tumor suppressors in regulation of metabolic reprogramming during cancer. APODHIN platform contains a web server component where users can upload single/multi omics data to identify TINs and cross-pathway links. Tabular, graphical and 3D network representations of the identified TINs and cross-pathway links are provided for better appreciation. Additionally, this platform also provides few example data analysis of cancer specific, single and/or multi omics dataset for cervical, ovarian, and breast cancers where meta-interactome networks, TINs, and cross-pathway links are provided. APODHIN platform is freely available at http://www.hpppi.iicb.res.in/APODHIN/home.html.",2020-12-08 +28415074,NaviCom: a web application to create interactive molecular network portraits using multi-level omics data. ,"Human diseases such as cancer are routinely characterized by high-throughput molecular technologies, and multi-level omics data are accumulated in public databases at increasing rate. Retrieval and visualization of these data in the context of molecular network maps can provide insights into the pattern of regulation of molecular functions reflected by an omics profile. In order to make this task easy, we developed NaviCom, a Python package and web platform for visualization of multi-level omics data on top of biological network maps. NaviCom is bridging the gap between cBioPortal, the most used resource of large-scale cancer omics data and NaviCell, a data visualization web service that contains several molecular network map collections. NaviCom proposes several standardized modes of data display on top of molecular network maps, allowing addressing specific biological questions. We illustrate how users can easily create interactive network-based cancer molecular portraits via NaviCom web interface using the maps of Atlas of Cancer Signalling Network (ACSN) and other maps. Analysis of these molecular portraits can help in formulating a scientific hypothesis on the molecular mechanisms deregulated in the studied disease. NaviCom is available at https://navicom.curie.fr.",2017-01-01 +30282372,Tetrabenazine Treatment Patterns and Outcomes for Chorea Associated with Huntington Disease: A Retrospective Chart Review.,"

Background

Huntington disease (HD) is a neurodegenerative disorder characterized by motor impairments (including chorea), along with behavioral, psychiatric, and cognitive symptoms. Tetrabenazine was the first US Food and Drug Administration (FDA)-approved treatment for chorea related to HD.

Objective

To examine pharmacologic treatment patterns among patients using tetrabenazine, including reasons for treatment initiation, non-initiation, dose adjustments, and discontinuation, and to quantify the burden of chorea based on healthcare resource utilization.

Methods

In this retrospective patient chart review, neurologists were recruited from the Medefield (http://www.medefield.com) opt-in panel, and selected ≤5 medical charts based on the criteria provided and abstracted data on demographics, disease history, healthcare resource use, and treatment patterns.

Results

138 neurologists participated and 512 HD patient charts were reviewed. Among these patients, 26.4% did not initiate tetrabenazine. Most HD patients (66.5%) received a tetrabenazine dose ≤50 mg. The most common reasons for stopping upward titration were optimal chorea control (55.5%), intolerability of higher doses (31.2%), and reaching the maximum recommended dosage despite suboptimal chorea control (11.4%). Chorea severity and non-persistence to tetrabenazine were associated with increased emergency room visits, hospitalizations, and days hospitalized.

Conclusions

Although tetrabenazine was the sole FDA-approved treatment for HD chorea until April 2017, more than one-quarter of respondents never initiated therapy. Tetrabenazine dosing was lower than predicted, and many patients experienced adverse symptoms of intolerability at high doses. New safer and more tolerable treatment options, such as deutetrabenazine, may improve treatment outcomes and reduce healthcare resource use.",2018-01-01 +27987162,"Ensembl Plants: Integrating Tools for Visualizing, Mining, and Analyzing Plant Genomic Data.","Ensembl Plants ( http://plants.ensembl.org ) is an integrative resource presenting genome-scale information for 39 sequenced plant species. Available data includes genome sequence, gene models, functional annotation, and polymorphic loci; for the latter, additional information including population structure, individual genotypes, linkage, and phenotype data is available for some species. Comparative data is also available, including genomic alignments and ""gene trees,"" which show the inferred evolutionary history of each gene family represented in the resource. Access to the data is provided through a genome browser, which incorporates many specialist interfaces for different data types, through a variety of programmatic interfaces, and via a specialist data mining tool supporting rapid filtering and retrieval of bulk data. Genomic data from many non-plant species, including those of plant pathogens, pests, and pollinators, is also available via the same interfaces through other divisions of Ensembl.Ensembl Plants is updated 4-6 times a year and is developed in collaboration with our international partners in the Gramene ( http://www.gramene.org ) and transPLANT projects ( http://www.transplantdb.eu ).",2017-01-01 +31427789,A global overview of pleiotropy and genetic architecture in complex traits.,"After a decade of genome-wide association studies (GWASs), fundamental questions in human genetics, such as the extent of pleiotropy across the genome and variation in genetic architecture across traits, are still unanswered. The current availability of hundreds of GWASs provides a unique opportunity to address these questions. We systematically analyzed 4,155 publicly available GWASs. For a subset of well-powered GWASs on 558 traits, we provide an extensive overview of pleiotropy and genetic architecture. We show that trait-associated loci cover more than half of the genome, and 90% of these overlap with loci from multiple traits. We find that potential causal variants are enriched in coding and flanking regions, as well as in regulatory elements, and show variation in polygenicity and discoverability of traits. Our results provide insights into how genetic variation contributes to trait variation. All GWAS results can be queried and visualized at the GWAS ATLAS resource ( https://atlas.ctglab.nl ).",2019-08-19 +31163709,"De Novo Sequencing, Assembly, and Annotation of Four Threespine Stickleback Genomes Based on Microfluidic Partitioned DNA Libraries. ",": The threespine stickleback is a geographically widespread and ecologically highly diverse fish that has emerged as a powerful model system for evolutionary genomics and developmental biology. Investigations in this species currently rely on a single high-quality reference genome, but would benefit from the availability of additional, independently sequenced and assembled genomes. We present here the assembly of four new stickleback genomes, based on the sequencing of microfluidic partitioned DNA libraries. The base pair lengths of the four genomes reach 92-101% of the standard reference genome length. Together with their de novo gene annotation, these assemblies offer a resource enhancing genomic investigations in stickleback. The genomes and their annotations are available from the Dryad Digital Repository (https://doi.org/10.5061/dryad.113j3h7).",2019-06-03 +33434514,Exploration of Coding and Non-coding Variants in Cancer Using GenomePaint.,"GenomePaint (https://genomepaint.stjude.cloud/) is an interactive visualization platform for whole-genome, whole-exome, transcriptome, and epigenomic data of tumor samples. Its design captures the inter-relatedness between DNA variations and RNA expression, supporting in-depth exploration of both individual cancer genomes and full cohorts. Regulatory non-coding variants can be inspected and analyzed along with coding variants, and their functional impact further explored by examining 3D genome data from cancer cell lines. Further, GenomePaint correlates mutation and expression patterns with patient outcomes, and supports custom data upload. We used GenomePaint to unveil aberrant splicing that disrupts the RING domain of CREBBP, discover cis activation of the MYC oncogene by duplication of the NOTCH1-MYC enhancer in B-lineage acute lymphoblastic leukemia, and explore the inter- and intra-tumor heterogeneity at EGFR in adult glioblastomas. These examples demonstrate that deep multi-omics exploration of individual cancer genomes enabled by GenomePaint can lead to biological insights for follow-up validation.",2021-01-01 +30980347,"Motor content norms for 4,565 verbs in Spanish.","Embodiment theory suggests that, during the processing of words related to movement, as in the case of action verbs, somatotopic activation is produced in the motor and premotor cortices. In the same way, some studies have demonstrated that patients with frontal-lobe damage, such as Parkinson's patients, have difficulties processing that kind of stimulus. At the moment, no standardized data exist concerning the motor content of Spanish verbs. Therefore, the aim of the present research was to develop a database of 4,565 verbs in Spanish through a survey filled out by 152 university students. The value for the motor content was obtained by calculating the average value from the answers of the participants. In addition, the reliability of the results was estimated, as well as their convergent validity, using diverse correlation coefficients. The database and the raw responses of the participants can be downloaded from this website: https://inco.grupos.uniovi.es/enlaces.",2020-04-01 +31978149,Performance of Qure.ai automatic classifiers against a large annotated database of patients with diverse forms of tuberculosis.,"Availability of trained radiologists for fast processing of CXRs in regions burdened with tuberculosis always has been a challenge, affecting both timely diagnosis and patient monitoring. The paucity of annotated images of lungs of TB patients hampers attempts to apply data-oriented algorithms for research and clinical practices. The TB Portals Program database (TBPP, https://TBPortals.niaid.nih.gov) is a global collaboration curating a large collection of the most dangerous, hard-to-cure drug-resistant tuberculosis (DR-TB) patient cases. TBPP, with 1,179 (83%) DR-TB patient cases, is a unique collection that is well positioned as a testing ground for deep learning classifiers. As of January 2019, the TBPP database contains 1,538 CXRs, of which 346 (22.5%) are annotated by a radiologist and 104 (6.7%) by a pulmonologist-leaving 1,088 (70.7%) CXRs without annotations. The Qure.ai qXR artificial intelligence automated CXR interpretation tool, was blind-tested on the 346 radiologist-annotated CXRs from the TBPP database. Qure.ai qXR CXR predictions for cavity, nodule, pleural effusion, hilar lymphadenopathy was successfully matching human expert annotations. In addition, we tested the 12 Qure.ai classifiers to find whether they correlate with treatment success (information provided by treating physicians). Ten descriptors were found as significant: abnormal CXR (p = 0.0005), pleural effusion (p = 0.048), nodule (p = 0.0004), hilar lymphadenopathy (p = 0.0038), cavity (p = 0.0002), opacity (p = 0.0006), atelectasis (p = 0.0074), consolidation (p = 0.0004), indicator of TB disease (p = < .0001), and fibrosis (p = < .0001). We conclude that applying fully automated Qure.ai CXR analysis tool is useful for fast, accurate, uniform, large-scale CXR annotation assistance, as it performed well even for DR-TB cases that were not used for initial training. Testing artificial intelligence algorithms (encapsulating both machine learning and deep learning classifiers) on diverse data collections, such as TBPP, is critically important toward progressing to clinically adopted automatic assistants for medical data analysis.",2020-01-24 +34236848,ePharmaLib: A Versatile Library of e-Pharmacophores to Address Small-Molecule (Poly-)Pharmacology.,"Bioactive compounds oftentimes bind to several target proteins, thereby exhibiting polypharmacology. Experimentally determining these interactions is however laborious, and structure-based virtual screening (SBVS) of bioactive compounds could expedite drug discovery by prioritizing hits for experimental validation. Here, we present ePharmaLib, a library of 15,148 e-pharmacophores modeled from solved structures of pharmaceutically relevant protein-ligand complexes of the screening Protein Data Bank (sc-PDB). ePharmaLib can be used for target fishing of phenotypic hits, side effect predictions, drug repurposing, and scaffold hopping. In retrospective SBVS, a good balance was obtained between computational efficiency and predictive accuracy. As a proof of concept, we carried out prospective SBVS in conjunction with a photometric assay, which inferred that the mechanism of action of neopterin (an endogenous immunomodulator) putatively stems from its inhibition (IC50 = 18 μM) of the human purine nucleoside phosphorylase. This ready-to-use library is freely available at http://www.pharmbioinf.uni-freiburg.de/epharmalib.",2021-07-08 +34842444,Prenatal and Postnatal Household Air Pollution Exposure and Infant Growth Trajectories: Evidence from a Rural Ghanaian Pregnancy Cohort.,"

Background

The exposure-response association between prenatal and postnatal household air pollution (HAP) and infant growth trajectories is unknown.

Objectives

To evaluate associations between prenatal and postnatal HAP exposure and stove interventions on growth trajectories over the first year of life.

Methods

The Ghana Randomized Air Pollution and Health Study enrolled n=1,414 pregnant women at ≤24wk gestation from Kintampo, Ghana, and randomized them to liquefied petroleum gas (LPG), improved biomass, or open fire (control) stoves. We quantified HAP exposure by repeated, personal prenatal and postnatal carbon monoxide (CO) and, in a subset, fine particulate matter [PM with an aerodynamic diameter of ≤2.5μm (PM2.5)] assessments. Length, weight, mid-upper arm circumference (MUAC) and head circumference (HC) were measured at birth, 3, 6, 9, and 12 months; weight-for-age, length-for-age (LAZ), and weight-for-length z (WLZ)-scores were calculated. For each anthropometric measure, we employed latent class growth analysis to generate growth trajectories over the first year of life and assigned each child to a trajectory group. We then employed ordinal logistic regression to determine associations between HAP exposures and growth trajectory assignments. Associations with stove intervention arm were also considered.

Results

Of the 1,306 live births, 1,144 had valid CO data and anthropometric variables measured at least once. Prenatal HAP exposure increased risk for lower length [CO odds ratio (OR)= 1.17, 95% CI: 1.01, 1.35 per 1-ppm increase; PM2.5 OR= 1.07, 95% CI: 1.02, 1.13 per 10-μg/m3 increase], lower LAZ z-score (CO OR= 1.15, 95% CI: 1.01, 1.32 per 1-ppm increase) and stunting (CO OR= 1.25, 95% CI: 1.08, 1.45) trajectories. Postnatal HAP exposure increased risk for smaller HC (CO OR= 1.09, 95% CI: 1.04, 1.13 per 1-ppm increase), smaller MUAC and lower WLZ-score (PM2.5 OR= 1.07, 95% CI: 1.00, 1.14 and OR= 1.09, 95% CI: 1.01, 1.19 per 10-μg/m3 increase, respectively) trajectories. Infants in the LPG arm had decreased odds of having smaller HC and MUAC trajectories as compared with those in the open fire stove arm (OR= 0.58, 95% CI: 0.37, 0.92 and OR= 0.45, 95% CI: 0.22, 0.90, respectively).

Discussion

Higher early life HAP exposure (during pregnancy and through the first year of life) was associated with poorer infant growth trajectories among children in rural Ghana. A cleaner-burning stove intervention may have improved some growth trajectories. https://doi.org/10.1289/EHP8109.",2021-11-29 +34473559,"State-Level Sexism and Women's Health Care Access in the United States: Differences by Race/Ethnicity, 2014-2019.","Objectives. To quantify racial/ethnic differences in the relationship between state-level sexism and barriers to health care access among non-Hispanic White, non-Hispanic Black, and Hispanic women in the United States. Methods. We merged a multidimensional state-level sexism index compiled from administrative data with the national Consumer Survey of Health Care Access (2014-2019; n = 10 898) to test associations between exposure to state-level sexism and barriers to access, availability, and affordability of health care. Results. Greater exposure to state-level sexism was associated with more barriers to health care access among non-Hispanic Black and Hispanic women, but not non-Hispanic White women. Affordability barriers (cost of medical bills, health insurance, prescriptions, and tests) appeared to drive these associations. More frequent need for care exacerbated the relationship between state-level sexism and barriers to care for Hispanic women. Conclusions. The relationship between state-level sexism and women's barriers to health care access differs by race/ethnicity and frequency of needing care. Public Health Implications. State-level policies may be used strategically to promote health care equity at the intersection of gender and race/ethnicity. (Am J Public Health. 2021;111(10):1796-1805. https://doi.org/10.2105/AJPH.2021.306455).",2021-09-02 +31904820,BCdatabaser: on-the-fly reference database creation for (meta-)barcoding.,"SUMMARY:DNA barcoding and meta-barcoding have become irreplaceable in research and applications, where identification of taxa alone or within a mixture, respectively, becomes relevant. Pioneering studies were in the microbiological context, yet nowadays also plants and animals become targeted. Given the variety of markers used, formatting requirements for classifiers and constant growth of primary databases, there is a need for dedicated reference database creation. We developed a web and command-line interface to generate such on-the-fly for any applicable marker and taxonomic group with optional filtering, formatting and restriction specific for (meta-)barcoding purposes. Also, databases optionally receive a DOI, making them well-documented with meta-data, publicly sharable and citable. AVAILABILITY AND IMPLEMENTATION:source code: https://www.github.com/molbiodiv/bcdatabaser, webservice: https://bcdatabaser.molecular.eco, documentation: https://molbiodiv.github.io/bcdatabaser.",2020-04-01 +34343273,SAMPDI-3D: predicting the effects of protein and DNA mutations on protein-DNA interactions. ,"Mutations that alter protein-DNA interactions may be pathogenic and cause diseases. Therefore, it is extremely important to quantify the effect of mutations on protein-DNA binding free energy to reveal the molecular origin of diseases and to assist the development of treatments. Although several methods that predict the change of protein-DNA binding affinity upon mutations in the binding protein were developed, the effect of DNA mutations was not considered yet. Here, we report a new version of SAMPDI, the SAMPDI-3D, which is a gradient boosting decision tree machine learning method to predict the change of the protein-DNA binding free energy caused by mutations in both the binding protein and the bases of the corresponding DNA. The method is shown to achieve Pearson correlation coefficient of 0.76 and 0.80 in a benchmarking test against experimentally determined change of the binding free energy caused by mutations in the binding protein or DNA, respectively. Furthermore, three datasets collected from literature were used to do blind benchmark for SAMPDI-3D and it is shown that it outperforms all existing state-of-the-art methods. The method is very fast allowing for genome-scale investigations. It is available as a web server and a stand-code at http://compbio.clemson.edu/SAMPDI-3D/. Supplementary data are available at Bioinformatics online.",2021-08-03 +34237253,Molecular topography of an entire nervous system.,"We have produced gene expression profiles of all 302 neurons of the C. elegans nervous system that match the single-cell resolution of its anatomy and wiring diagram. Our results suggest that individual neuron classes can be solely identified by combinatorial expression of specific gene families. For example, each neuron class expresses distinct codes of ∼23 neuropeptide genes and ∼36 neuropeptide receptors, delineating a complex and expansive ""wireless"" signaling network. To demonstrate the utility of this comprehensive gene expression catalog, we used computational approaches to (1) identify cis-regulatory elements for neuron-specific gene expression and (2) reveal adhesion proteins with potential roles in process placement and synaptic specificity. Our expression data are available at https://cengen.org and can be interrogated at the web application CengenApp. We expect that this neuron-specific directory of gene expression will spur investigations of underlying mechanisms that define anatomy, connectivity, and function throughout the C. elegans nervous system.",2021-07-07 +34740890,Personalised risk prediction following emergency department assessment for syncope.,"

Background

Published risk tools do not provide possible management options for syncope in the emergency department (ED). Using the 30-day observed risk estimates based on the Canadian Syncope Risk Score (CSRS), we developed personalised risk prediction to guide management decisions.

Methods

We pooled previously reported data from two large cohort studies, the CSRS derivation and validation cohorts, that prospectively enrolled adults (≥16 years) with syncope at 11 Canadian EDs between 2010 and 2018. Using this larger cohort, we calculated the CSRS calibration and discrimination, and determined with greater precision than in previous studies the 30-day risk of adjudicated serious outcomes not identified during the index ED evaluation depending on the CSRS and the risk category. Based on these findings, we developed an on-line calculator and pictorial decision aids.

Results

8233 patients were included of whom 295 (3.6%, 95% CI 3.2% to 4.0%) experienced 30-day serious outcomes. The calibration slope was 1.0, and the area under the curve was 0.88 (95% CI 0.87 to 0.91). The observed risk increased from 0.3% (95% CI 0.2% to 0.5%) in the very-low-risk group (CSRS -3 to -2) to 42.7% (95% CI 35.0% to 50.7%), in the very-high-risk (CSRS≥+6) group (Cochrane-Armitage trend test p<0.001). Among the very-low and low-risk patients (score -3 to 0), ≤1.0% had any serious outcome, there was one death due to sepsis and none suffered a ventricular arrhythmia. Among the medium-risk patients (score +1 to+3), 7.8% had serious outcomes, with <1% death, and a serious outcome was present in >20% of high/very-high-risk patients (score +4 to+11) including 4%-6% deaths. The online calculator and the pictorial aids can be found at: https://teamvenk.com/csrs CONCLUSIONS: 30-day observed risk estimates from a large cohort of patients can be obtained for management decision-making. Our work suggests very-low-risk and low-risk patients may be discharged, discussion with patients regarding investigations and disposition are needed for medium-risk patients, and high-risk patients should be hospitalised. The online calculator, accompanied by pictorial decision aids for the CSRS, may assist in discussion with patients.",2021-11-05 +32500917,Obtaining extremely large and accurate protein multiple sequence alignments from curated hierarchical alignments. ,"For optimal performance, machine learning methods for protein sequence/structural analysis typically require as input a large multiple sequence alignment (MSA), which is often created using query-based iterative programs, such as PSI-BLAST or JackHMMER. However, because these programs align database sequences using a query sequence as a template, they may fail to detect or may tend to misalign sequences distantly related to the query. More generally, automated MSA programs often fail to align sequences correctly due to the unpredictable nature of protein evolution. Addressing this problem typically requires manual curation in the light of structural data. However, curated MSAs tend to contain too few sequences to serve as input for statistically based methods. We address these shortcomings by making publicly available a set of 252 curated hierarchical MSAs (hiMSAs), containing a total of 26 212 066 sequences, along with programs for generating from these extremely large MSAs. Each hiMSA consists of a set of hierarchically arranged MSAs representing individual subgroups within a superfamily along with template MSAs specifying how to align each subgroup MSA against MSAs higher up the hierarchy. Central to this approach is the MAPGAPS search program, which uses a hiMSA as a query to align (potentially vast numbers of) matching database sequences with accuracy comparable to that of the curated hiMSA. We illustrate this process for the exonuclease-endonuclease-phosphatase superfamily and for pleckstrin homology domains. A set of extremely large MSAs generated from the hiMSAs in this way is available as input for deep learning, big data analyses. MAPGAPS, auxiliary programs CDD2MGS, AddPhylum, PurgeMSA and ConvertMSA and links to National Center for Biotechnology Information data files are available at https://www.igs.umaryland.edu/labs/neuwald/software/mapgaps/.",2020-01-01 +34093642,"Machine Learning Assisted Prediction of Prognostic Biomarkers Associated With COVID-19, Using Clinical and Proteomics Data.","With the availability of COVID-19-related clinical data, healthcare researchers can now explore the potential of computational technologies such as artificial intelligence (AI) and machine learning (ML) to discover biomarkers for accurate detection, early diagnosis, and prognosis for the management of COVID-19. However, the identification of biomarkers associated with survival and deaths remains a major challenge for early prognosis. In the present study, we have evaluated and developed AI-based prediction algorithms for predicting a COVID-19 patient's survival or death based on a publicly available dataset consisting of clinical parameters and protein profile data of hospital-admitted COVID-19 patients. The best classification model based on clinical parameters achieved a maximum accuracy of 89.47% for predicting survival or death of COVID-19 patients, with a sensitivity and specificity of 85.71 and 92.45%, respectively. The classification model based on normalized protein expression values of 45 proteins achieved a maximum accuracy of 89.01% for predicting the survival or death, with a sensitivity and specificity of 92.68 and 86%, respectively. Interestingly, we identified 9 clinical and 45 protein-based putative biomarkers associated with the survival/death of COVID-19 patients. Based on our findings, few clinical features and proteins correlate significantly with the literature and reaffirm their role in the COVID-19 disease progression at the molecular level. The machine learning-based models developed in the present study have the potential to predict the survival chances of COVID-19 positive patients in the early stages of the disease or at the time of hospitalization. However, this has to be verified on a larger cohort of patients before it can be put to actual clinical practice. We have also developed a webserver CovidPrognosis, where clinical information can be uploaded to predict the survival chances of a COVID-19 patient. The webserver is available at http://14.139.62.220/covidprognosis/.",2021-05-20 +34618599,"Assessing Language in Unstructured Conversation in People With Aphasia: Methods, Psychometric Integrity, Normative Data, and Comparison to a Structured Narrative Task.","Purpose This study evaluated interrater reliability (IRR) and test-retest stability (TRTS) of seven linguistic measures (percent correct information units, relevance, subject-verb-[object], complete utterance, grammaticality, referential cohesion, global coherence), and communicative success in unstructured conversation and in a story narrative monologue (SNM) in persons with aphasia (PWAs) and matched participants without aphasia (M-PWoAs). Furthermore, the relationship of language in unstructured conversation and SNM was investigated for these measures. Methods Twenty PWAs and 20 M-PWoAs participated in two unstructured conversations on different days with different speech-language pathologists trained as social conversation partners. An 8- to 12-min segment of each conversation was analyzed. Additionally, a wordless picture book was used to elicit an SNM sample at each visit. Correlational analyses were conducted to address the primary research questions. Normative range and minimal detectable change data were also calculated for the measures in both conditions. Results IRR and TRTS were moderate to good for parametric measures and moderate to excellent for nonparametric measures for both groups, except for TRTS for referential cohesion for the PWAs in conversation. Furthermore, in PWAs, a strong correlation was demonstrated for three of eight measures across conditions. Moderate or weaker correlations were demonstrated for three of eight measures, and correlations for two of eight measures were not significant. An ancillary finding was no significant differences occurred for sample-to-sample variability between the two conditions for any measure. Conclusions This study replicates previous research demonstrating the feasibility to reliably measure language in unstructured conversation in PWAs. Furthermore, this study provides preliminary evidence that language production varies for some measures between unstructured conversation and SNM, contributing to a literature base that demonstrates language variation between different types of monologue. Thus, these findings suggest that inclusion of the specific types of discourse of interest to the PWA may be important for comprehensive assessment of aphasia. Supplemental Material https://doi.org/10.23641/asha.16569360.",2021-10-07 +32519765,A novel nonsense mutation of ZEB2 gene in a Chinese patient with Mowat-Wilson syndrome.,"

Background

Mowat-Wilson syndrome (MWS) is a rare genetic disorder characterized by intellectual disability, distinctive facial features, and multiple anomalies caused by haploinsufficiency of the ZEB2 gene. We investigated the genetic causes of MWS in a 14-year-old girl who had characteristic features of MWS.

Methods

Clinical data and peripheral blood DNA samples were collected from the proband. Following extraction of genomic DNA, whole-exome sequencing was conducted to detect genetic variants. Bioinformatics analysis was carried out to predict the function of the mutant gene.

Results

Mutation analysis of the proband identified a novel nonsense mutation (c.250G > T, p.E84*) within exon 3 of the ZEB2 gene. This novel alteration resulted in a termination codon at amino acid position 84, which was predicted to encode a truncated protein. This variant was not present in unrelated healthy control samples that were obtained from the exome sequence databases ExAc browser (http://exac.broadinstitute.org/) and gnomAD browser (http://gnomad.broadinstitute.org/). It is a novel variant that was determined to be a deleterious mutation according to the variant interpretation guidelines of the ACMG. The results of our study suggest that the p.E84* mutation in the ZEB2 gene was probably the pathogenic mutation that caused MWS in the proband.

Conclusions

This study reports the novel mutation in the proband will provide a basic foundation for further investigations to elucidate the ZEB2-related mechanisms of MWS.",2020-06-10 +34468237,Dysfunctional Bronchial Cilia Are a Feature of Chronic Obstructive Pulmonary Disease (COPD).,"Impaired mucociliary clearance may increase COPD exacerbation risk. We aimed to compare bronchial ciliary function and epithelial ultrastructure of COPD patients to healthy controls and explore its relationship to exacerbator phenotypes (frequent [FE] and infrequent [IFE] exacerbator). In this cross-sectional study, 16 COPD patients and 12 controls underwent bronchial brushings. Ciliary beat frequency (CBF) and dyskinesia index (DI; % of dyskinetic cilia) were assessed using digital high-speed video microscopy, and epithelial ultrastructure using transmission electron microscopy (TEM). Bronchial epithelium in COPD showed lower CBF and higher DI, compared to controls (median [IQR] CBF: 6.8 (6.1-7.2) Hz vs 8.5 (7.7-8.9) Hz, p<0.001 and DI: 73.8 (60.7-89.8) % vs 14.5 (11.2-16.9) %, p<0.001, respectively). This was true for FE and IFE phenotypes of COPD, which were similar in terms of bronchial CBF or DI. Subgroup analyses demonstrated lower CBF and higher DI in FE and IFE COPD phenotypes compared to controls, irrespective of smoking status. TEM showed more loss of cilia, extrusion of cells, cytoplasmic blebs and dead cells in COPD patients versus controls. Profound dysfunction of bronchial cilia is a feature of COPD irrespective of exacerbation phenotype and smoking status, which is likely to contribute to poor mucus clearance in COPD.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.1963695 .",2021-09-01 +27513924,Efficient Gene Tree Correction Guided by Genome Evolution.,"

Motivations

Gene trees inferred solely from multiple alignments of homologous sequences often contain weakly supported and uncertain branches. Information for their full resolution may lie in the dependency between gene families and their genomic context. Integrative methods, using species tree information in addition to sequence information, often rely on a computationally intensive tree space search which forecloses an application to large genomic databases.

Results

We propose a new method, called ProfileNJ, that takes a gene tree with statistical supports on its branches, and corrects its weakly supported parts by using a combination of information from a species tree and a distance matrix. Its low running time enabled us to use it on the whole Ensembl Compara database, for which we propose an alternative, arguably more plausible set of gene trees. This allowed us to perform a genome-wide analysis of duplication and loss patterns on the history of 63 eukaryote species, and predict ancestral gene content and order for all ancestors along the phylogeny.

Availability

A web interface called RefineTree, including ProfileNJ as well as a other gene tree correction methods, which we also test on the Ensembl gene families, is available at: http://www-ens.iro.umontreal.ca/~adbit/polytomysolver.html. The code of ProfileNJ as well as the set of gene trees corrected by ProfileNJ from Ensembl Compara version 73 families are also made available.",2016-08-11 +33288955,Tutorial: guidelines for the computational analysis of single-cell RNA sequencing data.,"Single-cell RNA sequencing (scRNA-seq) is a popular and powerful technology that allows you to profile the whole transcriptome of a large number of individual cells. However, the analysis of the large volumes of data generated from these experiments requires specialized statistical and computational methods. Here we present an overview of the computational workflow involved in processing scRNA-seq data. We discuss some of the most common tasks and the tools available for addressing central biological questions. In this article and our companion website ( https://scrnaseq-course.cog.sanger.ac.uk/website/index.html ), we provide guidelines regarding best practices for performing computational analyses. This tutorial provides a hands-on guide for experimentalists interested in analyzing their data as well as an overview for bioinformaticians seeking to develop new computational methods.",2020-12-07 +33285150,Multi-assignment clustering: Machine learning from a biological perspective.,"A common approach for analyzing large-scale molecular data is to cluster objects sharing similar characteristics. This assumes that genes with highly similar expression profiles are likely participating in a common molecular process. Biological systems are extremely complex and challenging to understand, with proteins having multiple functions that sometimes need to be activated or expressed in a time-dependent manner. Thus, the strategies applied for clustering of these molecules into groups are of key importance for translation of data to biologically interpretable findings. Here we implemented a multi-assignment clustering (MAsC) approach that allows molecules to be assigned to multiple clusters, rather than single ones as in commonly used clustering techniques. When applied to high-throughput transcriptomics data, MAsC increased power of the downstream pathway analysis and allowed identification of pathways with high biological relevance to the experimental setting and the biological systems studied. Multi-assignment clustering also reduced noise in the clustering partition by excluding genes with a low correlation to all of the resulting clusters. Together, these findings suggest that our methodology facilitates translation of large-scale molecular data into biological knowledge. The method is made available as an R package on GitLab (https://gitlab.com/wolftower/masc).",2020-12-04 +27899658,The BIG Data Center: from deposition to integration to translation.,"Biological data are generated at unprecedentedly exponential rates, posing considerable challenges in big data deposition, integration and translation. The BIG Data Center, established at Beijing Institute of Genomics (BIG), Chinese Academy of Sciences, provides a suite of database resources, including (i) Genome Sequence Archive, a data repository specialized for archiving raw sequence reads, (ii) Gene Expression Nebulas, a data portal of gene expression profiles based entirely on RNA-Seq data, (iii) Genome Variation Map, a comprehensive collection of genome variations for featured species, (iv) Genome Warehouse, a centralized resource housing genome-scale data with particular focus on economically important animals and plants, (v) Methylation Bank, an integrated database of whole-genome single-base resolution methylomes and (vi) Science Wikis, a central access point for biological wikis developed for community annotations. The BIG Data Center is dedicated to constructing and maintaining biological databases through big data integration and value-added curation, conducting basic research to translate big data into big knowledge and providing freely open access to a variety of data resources in support of worldwide research activities in both academia and industry. All of these resources are publicly available and can be found at http://bigd.big.ac.cn.",2016-11-28 +34500318,Developing and validating a prediction model for lymphedema detection in breast cancer survivors.,"

Purpose

Early detection and intervention of lymphedema is essential for improving the quality of life of breast cancer survivors. Previous studies have shown that patients have symptoms such as arm tightness and arm heaviness before experiencing obvious limb swelling. Thus, this study aimed to develop a symptom-warning model for the early detection of breast cancer-related lymphedema.

Methods

A cross-sectional study was conducted at a tertiary hospital in Beijing between April 2017 and December 2018. A total of 24 lymphedema-associated symptoms were identified as candidate predictors. Circumferential measurements were used to diagnose lymphedema. The data were randomly split into training and validation sets with a 7:3 ratio to derive and evaluate six machine learning models. Both the discrimination and calibration of each model were assessed on the validation set.

Results

A total of 533 patients were included in the study. The logistic regression model showed the best performance for early detection of lymphedema, with AUC = 0.889 (0.840-0.938), sensitivity = 0.771, specificity = 0.883, accuracy = 0.825, and Brier scores = 0.141. Calibration was also acceptable. It has been deployed as an open-access web application, allowing users to estimate the probability of lymphedema individually in real time. The application can be found at https://apredictiontoolforlymphedema.shinyapps.io/dynnomapp/.

Conclusion

The symptom-warning model developed by logistic regression performed well in the early detection of lymphedema. Integrating this model into an open-access web application is beneficial to patients and healthcare providers to monitor lymphedema status in real-time.",2021-08-31 +32389610,Physical activity for older Australians with mild cognitive impairment or subjective cognitive decline - A narrative review to support guideline development.,"

Objectives

This review informed development of the first national Physical Activity (PA) Guidelines for Older Australians with Mild Cognitive Impairment (MCI) or Subjective Cognitive Decline (SCD) (http://www.dementiaresearch.org.au/images/dcrc/output-files/1567-pa_guidelines_for_mci_or_scd_full_report_final.pdf). These guidelines are directed at healthcare professionals and aim to encourage older adults with SCD/MCI to engage in PA to enhance cognitive, mental and physical health.

Design

A narrative review was undertaken to inform the guideline adaptation process.

Methods

A systematic search of existing PA guidelines for older adults was performed and evaluated using the Appraisal of Guidelines for Research and Evaluation II Instrument. The guideline assessed as most appropriate was adapted to the population with SCD/MCI using the Guideline Adaptation Resource Toolkit, supported by the narrative review.

Results

The search for existing PA guidelines for older adults yielded 22 guidelines, none of which specifically considered older adults with SCD/MCI. The Canadian Physical Activity Guidelines for Older Adults were selected for adaptation to the population with SCD/MCI. The narrative review found 24 high-quality randomised controlled trials and 17 observational studies. These supported the four guideline recommendations that address aerobic PA, progressive resistance training, balance exercises and consultation with healthcare professionals to tailor PA to the individual.

Conclusions

This review found evidence to support the four guideline recommendations. These recommendations provide specific guidance for older adults with SCD/MCI, their families, health professionals, community organisations and government to obtain benefits from undertaking PA. The review also highlights important future research directions, including the need for targeted translation and implementation research for diverse consumers.",2020-03-19 +26989148,Wikidata as a semantic framework for the Gene Wiki initiative. ,"Open biological data are distributed over many resources making them challenging to integrate, to update and to disseminate quickly. Wikidata is a growing, open community database which can serve this purpose and also provides tight integration with Wikipedia. In order to improve the state of biological data, facilitate data management and dissemination, we imported all human and mouse genes, and all human and mouse proteins into Wikidata. In total, 59,721 human genes and 73,355 mouse genes have been imported from NCBI and 27,306 human proteins and 16,728 mouse proteins have been imported from the Swissprot subset of UniProt. As Wikidata is open and can be edited by anybody, our corpus of imported data serves as the starting point for integration of further data by scientists, the Wikidata community and citizen scientists alike. The first use case for these data is to populate Wikipedia Gene Wiki infoboxes directly from Wikidata with the data integrated above. This enables immediate updates of the Gene Wiki infoboxes as soon as the data in Wikidata are modified. Although Gene Wiki pages are currently only on the English language version of Wikipedia, the multilingual nature of Wikidata allows for usage of the data we imported in all 280 different language Wikipedias. Apart from the Gene Wiki infobox use case, a SPARQL endpoint and exporting functionality to several standard formats (e.g. JSON, XML) enable use of the data by scientists. In summary, we created a fully open and extensible data resource for human and mouse molecular biology and biochemistry data. This resource enriches all the Wikipedias with structured information and serves as a new linking hub for the biological semantic web. Database URL: https://www.wikidata.org/.",2016-03-17 +33971164,Accurate prediction of protein-ATP binding residues using position-specific frequency matrix.,"Knowledge of protein-ATP interaction can help for protein functional annotation and drug discovery. Accurately identifying protein-ATP binding residues is an important but challenging task to gain the knowledge of protein-ATP interactions, especially for the case where only protein sequence information is given. In this study, we propose a novel method, named DeepATPseq, to predict protein-ATP binding residues without using any information about protein three-dimension structure or sequence-derived structural information. In DeepATPseq, the HHBlits-generated position-specific frequency matrix (PSFM) profile is first employed to extract the feature information of each residue. Then, for each residue, the PSFM-based feature is fed into two prediction models, which are generated by the algorithms of deep convolutional neural network (DCNN) and support vector machine (SVM) separately. The final ATP-binding probability of the corresponding residue is calculated by the weighted sum of the outputted values of DCNN-based and SVM-based models. Experimental results on the independent validation data set demonstrate that DeepATPseq could achieve an accuracy of 77.71%, covering 57.42% of all ATP-binding residues, while achieving a Matthew's correlation coefficient value (0.655) that is significantly higher than that of existing sequence-based methods and comparable to that of the state-of-the-art structure-based predictors. Detailed data analysis show that the major advantage of DeepATPseq lies at the combination utilization of DCNN and SVM that helps dig out more discriminative information from the PSFM profiles. The online server and standalone package of DeepATPseq are freely available at: https://jun-csbio.github.io/DeepATPseq/for academic use.",2021-05-07 +31754718,MiST 3.0: an updated microbial signal transduction database with an emphasis on chemosensory systems.,"Bacteria and archaea employ dedicated signal transduction systems that modulate gene expression, second-messenger turnover, quorum sensing, biofilm formation, motility, host-pathogen and beneficial interactions. The updated MiST database provides a comprehensive classification of microbial signal transduction systems. This update is a result of a substantial scaling to accommodate constantly growing microbial genomic data. More than 125 000 genomes, 516 million genes and almost 100 million unique protein sequences are currently stored in the database. For each bacterial and archaeal genome, MiST 3.0 provides a complete signal transduction profile, thus facilitating theoretical and experimental studies on signal transduction and gene regulation. New software infrastructure and distributed pipeline implemented in MiST 3.0 enable regular genome updates based on the NCBI RefSeq database. A novel MiST feature is the integration of unique profile HMMs to link complex chemosensory systems with corresponding chemoreceptors in bacterial and archaeal genomes. The data can be explored online or via RESTful API (freely available at https://mistdb.com).",2020-01-01 +31918459,OSdlbcl: An online consensus survival analysis web server based on gene expression profiles of diffuse large B-cell lymphoma.,"Diffuse large B-cell lymphoma (DLBCL) is the most common subtype of non-Hodgkin lymphoma (NHL) and is a clinical, pathological, and molecular heterogeneous disease with highly variable clinical outcomes. Currently, valid prognostic biomarkers in DLBCL are still lacking. To optimize targeted therapy and improve the prognosis of DLBCL, the performance of proposed biomarkers needs to be evaluated in multiple cohorts, and new biomarkers need to be investigated in large datasets. Here, we developed a consensus Online Survival analysis web server for Diffuse Large B-Cell Lymphoma, abbreviated OSdlbcl, to assess the prognostic value of individual gene. To build OSdlbcl, we collected 1100 samples with gene expression profiles and clinical follow-up information from The Cancer Genome Atlas (TCGA) and Gene Expression Omnibus (GEO) databases. In addition, DNA mutation data were also collected from the TCGA database. Overall survival (OS), progression-free survival (PFS), disease-specific survival (DSS), disease-free interval (DFI), and progression-free interval (PFI) are important endpoints to reflect the survival rate in OSdlbcl. Moreover, clinical features were integrated into OSdlbcl to allow data stratifications according to the user's special needs. By inputting an official gene symbol and selecting desired criteria, the survival analysis results can be graphically presented by the Kaplan-Meier (KM) plot with hazard ratio (HR) and log-rank p value. As a proof-of-concept demonstration, the prognostic value of 23 previously reported survival associated biomarkers, such as transcription factors FOXP1 and BCL2, was evaluated in OSdlbcl and found to be significantly associated with survival as reported (HR = 1.73, P < .01; HR = 1.47, P = .03, respectively). In conclusion, OSdlbcl is a new web server that integrates public gene expression, gene mutation data, and clinical follow-up information to provide prognosis evaluations for biomarker development for DLBCL. The OSdlbcl web server is available at https://bioinfo.henu.edu.cn/DLBCL/DLBCLList.jsp.",2020-01-09 +33395322,Risk Characterization and Probabilistic Concentration-Response Modeling of Complex Environmental Mixtures Using New Approach Methodologies (NAMs) Data from Organotypic in Vitro Human Stem Cell Assays.,"

Background

Risk assessment of chemical mixtures or complex substances remains a major methodological challenge due to lack of available hazard or exposure data. Therefore, risk assessors usually infer hazard or risk from data on the subset of constituents with available toxicity values.

Objectives

We evaluated the validity of the widely used traditional mixtures risk assessment paradigms, Independent Action (IA) and Concentration Addition (CA), with new approach methodologies (NAMs) data from human cell-based in vitro assays.

Methods

A diverse set of 42 chemicals was tested both individually and as mixtures for functional and cytotoxic effects in vitro. A panel of induced pluripotent stem cell (iPSCs)-derived models (hepatocytes, cardiomyocytes, endothelial, and neurons) and one primary cell type (HUVEC) were used. Bayesian concentration-response modeling of individual chemicals or their mixtures was performed for a total of 47 phenotypes to derive point-of-departure (POD) values. Probabilistic IA or CA was conducted to estimate the mixture effects based on the bioactivity profiles from the individual chemicals and compared with mixture bioactivity.

Results

All mixtures showed significant bioactivity, even though some were constructed using individual chemical concentrations considered ""low"" or ""safe."" Even though CA is much more accurate as a predictor of mixture effects in comparison with IA, with CA-based POD typically within an order of magnitude of the actual mixture, in some cases, the bioactivity of the mixtures appeared to be much greater than that of their components under either additivity assumption.

Discussion

These results suggest that CA is a preferred first approximation for predicting mixture toxicity when data for all constituents are available. However, because the accuracy of additivity assumptions varies greatly across phenotypes, we posit that mixtures and complex substances need to be directly tested for their hazard potential. NAMs provide a practical solution that rapidly yields highly informative data for mixtures risk assessment. https://doi.org/10.1289/EHP7600.",2021-01-04 +,The oldest known mastotermitids (Blattodea: Termitoidae) and phylogeny of basal termites,"Five finely preserved termites from the mid‐Cretaceous (Cenomanian) amber of Myanmar provide new information allowing a reanalysis of the phylogeny of basal termites. The Mastotermitidae family is recovered as monophyletic, and a redefined Hodotermitidae sensu lato is also monophyletic to include Archotermopsidae, Hodotermitidae and Stolotermitidae. Such a phylogenetic relationship agrees with the results from previous molecular phylogeny. Alongside these findings, there are many taxa that can only be shown to be termites with no other phylogenetically informative data. These form a comb of ‘grade groups’ emerging in the Late Jurassic. The new amber specimens are described as two new species. Anisotermes xiai gen. et sp.n. is described from multiple castes and has symplesiomorphic characteristics: large body size, a broad pronotum, well‐developed reticulated veins, and a large anal lobe of the hindwings. It shares wing features with the other new species, Mastotermes monostichus sp.n. Both new taxa are assigned to the Mastotermitidae, as they are shown to have synapomorphies that unite the family. This published work has been registered on ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:1AD5CECA‐27B7‐48D5‐88DC‐CEC5150962D7.",2019-07-01 +33997237,Comparative proteomics of Brucella melitensis is a useful toolbox for developing prophylactic interventions in a One-Health context.,"Brucellosis caused by Brucella melitensis is a zoonosis frequently reported in the Mediterranean and Middle-East regions and responsible for important economic losses and reduced animal welfare. To date, current strategies applied to control or eradicate the disease relies on diagnostic tests that suffer from limited specificity in non-vaccinated animals; while prophylactic measures, when applied, use a live attenuated bacterial strain characterized by residual virulence on adult pregnant animals and difficulties in distinguishing vaccinated from infected animals. To overcome these issues, studies are desired to elucidate the bacterial biology and the pathogenetic mechanisms of both the vaccinal strain and the pathogenic strains. Proteomics has a potential in tackling issues of One-Health concern; here, we employed label-free shotgun proteomics to investigate the protein repertoire of the vaccinal strain B. melitensis Rev.1 and compare it with the proteome of the Brucella melitensis 16 M, a reference strain representative of B. melitensis field strains. Comparative proteomics profiling underlines common and diverging traits between the two strains. Common features suggest the potential biochemical routes responsible for the residual virulence of the vaccinal strain, whilst the diverging traits are suggestive biochemical signatures to be further investigated to provide an optimized diagnostic capable of discriminating the vaccinated from infected animals. The data presented in this study are openly available in PRIDE data repository at https://www.ebi.ac.uk/pride/, reference number PXD022472.",2021-04-23 +28765725,"An Online Database of the Immatures of Coleoptera (Arthopoda, Insecta) Described from Brazil.","

Background

An online database of the described immature beetles from Brazil is presented for the first time based on published literature. The main purpose of this online database is to ensure accessibility to data associated with the described immature Coleoptera from Brazil, which will be useful for future biological, ecological, conservational and biogeographical studies.

New information

More than 9,486 specimens of 248 genera, 282 species and 4 subspecies of 76 Coleoptera families from 15 states and the Federal District of Brazil were found. Taxonomical and ecological information about each species, when available, are given. The dataset of Immatures of Coleoptera described from Brazil are available and can be accessed through the portals of GBIF at http://www.gbif.org/dataset/8e0e9330-e1b2-475a-9891-4fa8e5c6f57f and the SiBBr at http://ipt.sibbr.gov.br/sibbr/resource?r=coleoptera_immature_of_brazil.",2017-04-06 +32681998,Molecular characterisation of Trichomonas vaginalis isolates in Southwest Turkey with multilocus sequence typing and genetic structure analysis in relation to different countries.,"Trichomonas vaginalis, a flagellated protozoan parasite, is among the most common sexually transmitted pathogens in the world. The present study aimed to identify the genetic profiles of T. vaginalis in the southwest of Turkey with multilocus sequence typing (MLST) and to analyse the genetic structure of the parasite in a collection of isolates from different countries. The study included 27 T. vaginalis isolates from symptomatic females in Aydin, Turkey. Seven housekeeping genes of T. vaginalis were partially amplified and sequenced after genomic DNA extraction from in vitro cultures. The allele profiles and sequence types (STs) of the isolates were determined by using the MLST database (https://pubmlst.org/tvaginalis). The genetic structure and differentiation of the parasite were analysed in relation to findings from other countries by assembling the available MLST sequences. When referred to the database, a total of 22 STs, including 18 new STs were found; besides, there were two new allele types. The genetic analysis of MLST data demonstrated the presence of two main genetic structures: Type I and Type II. In addition, the neighbor-joining method also revealed that the isolates were clustered into two groups. The genetic types distributed almost equally in the Netherlands and the USA, however, the predominance of Type I was noted in Turkey and the UK. The genetic differentiation among four countries was significant (p < .05), the gene flow was relatively high between the Netherlands and the USA, in contrast to Turkey. Finally, genetic variations were originated within populations (93.8%) rather than among populations (6.2%). In conclusion, we studied the genetic diversity of T. vaginalis isolates with MLST in the southwest of Turkey and showed the origin of genetic differentiation of the parasite among different countries. The presentation of MLST profiles and genetic variance of T. vaginalis isolates will contribute to the development of new diagnostic and treatment options for the parasite.",2020-07-16 +33950233,Fundamental gene network rewiring at the second order within and across mammalian systems. ,"Genetic or epigenetic events can rewire molecular networks to induce extraordinary phenotypical divergences. Among the many network rewiring approaches, no model-free statistical methods can differentiate gene-gene pattern changes not attributed to marginal changes. This may obscure fundamental rewiring from superficial changes. Here we introduce a model-free Sharma-Song test to determine if patterns differ in the second order, meaning that the deviation of the joint distribution from the product of marginal distributions is unequal across conditions. We prove an asymptotic chi-squared null distribution for the test statistic. Simulation studies demonstrate its advantage over alternative methods in detecting second-order differential patterns. Applying the test on three independent mammalian developmental transcriptome datasets, we report a lower frequency of co-expression network rewiring between human and mouse for the same tissue group than the frequency of rewiring between tissue groups within the same species. We also find secondorder differential patterns between microRNA promoters and genes contrasting cerebellum and liver development in mice. These patterns are enriched in the spliceosome pathway regulating tissue specificity. Complementary to previous mammalian comparative studies mostly driven by first-order effects, our findings contribute an understanding of system-wide second-order gene network rewiring within and across mammalian systems. Second-order differential patterns constitute evidence for fundamentally rewired biological circuitry due to evolution, environment, or disease. The generic Sharma-Song test is available from the R package 'DiffXTables' at https://cran.rproject.org/package=DiffXTables. Other code and data are described in Methods. Supplementary data are available at Bioinformatics online.",2021-05-05 +27903906,EuPathDB: the eukaryotic pathogen genomics database resource.,"The Eukaryotic Pathogen Genomics Database Resource (EuPathDB, http://eupathdb.org) is a collection of databases covering 170+ eukaryotic pathogens (protists & fungi), along with relevant free-living and non-pathogenic species, and select pathogen hosts. To facilitate the discovery of meaningful biological relationships, the databases couple preconfigured searches with visualization and analysis tools for comprehensive data mining via intuitive graphical interfaces and APIs. All data are analyzed with the same workflows, including creation of gene orthology profiles, so data are easily compared across data sets, data types and organisms. EuPathDB is updated with numerous new analysis tools, features, data sets and data types. New tools include GO, metabolic pathway and word enrichment analyses plus an online workspace for analysis of personal, non-public, large-scale data. Expanded data content is mostly genomic and functional genomic data while new data types include protein microarray, metabolic pathways, compounds, quantitative proteomics, copy number variation, and polysomal transcriptomics. New features include consistent categorization of searches, data sets and genome browser tracks; redesigned gene pages; effective integration of alternative transcripts; and a EuPathDB Galaxy instance for private analyses of a user's data. Forthcoming upgrades include user workspaces for private integration of data with existing EuPathDB data and improved integration and presentation of host-pathogen interactions.",2016-11-29 +34013078,Host pharmacogenetic factors that may affect liver neoplasm incidence upon using direct-acting antivirals for treating hepatitis C infection.,"

Introduction

Direct-acting antivirals (DAAs) represent a breakthrough in hepatitis C virus (HCV) treatment as they directly inhibit HCV nonstructural (NS) proteins (NS3/4A, NS5A, and NS5B). However, ongoing debates exist regarding their relationship with hepatocellular carcinoma (HCC) whose incidence is widely debated among investigators. This study was conducted to identify host pharmacogenetic factors that may influence HCC incidence upon using HCV DAAs.

Materials and methods

Details regarding 16 HCV DAAs were collected from literature and DrugBank database. Digital structures of these drugs were fed into the pharmacogenomics/pharmacovigilance in - silico pipeline (PHARMIP) to predict the genetic factors that may underpin HCC development.

Results

We identified 184 unique genes and 40 unique variants that may have key answers for the DAA/HCC paradox. These findings could be used in different methods to aid in the precise application of HCV DAAs and minimize the proposed risk for HCC. All results could be accessed at: https://doi.org/10.17632/8ws8258hn3.2.

Discussion

All the identified factors are evidence related to HCC and significantly predicted by PHARMIP as DAA targets. We discuss some examples of the methods of using these results to address the DAA/HCC controversy based on the following three primary levels: 1 - individual DAA drug, 2 - DAA subclass, and 3 - the entire DAA class. Further wet laboratory investigation is required to evaluate these results.",2021-05-03 +33957235,White matter hyperintensities segmentation using the ensemble U-Net with multi-scale highlighting foregrounds.,"White matter hyperintensities (WMHs) are abnormal signals within the white matter region on the human brain MRI and have been associated with aging processes, cognitive decline, and dementia. In the current study, we proposed a U-Net with multi-scale highlighting foregrounds (HF) for WMHs segmentation. Our method, U-Net with HF, is designed to improve the detection of the WMH voxels with partial volume effects. We evaluated the segmentation performance of the proposed approach using the Challenge training dataset. Then we assessed the clinical utility of the WMH volumes that were automatically computed using our method and the Alzheimer's Disease Neuroimaging Initiative database. We demonstrated that the U-Net with HF significantly improved the detection of the WMH voxels at the boundary of the WMHs or in small WMH clusters quantitatively and qualitatively. Up to date, the proposed method has achieved the best overall evaluation scores, the highest dice similarity index, and the best F1-score among 39 methods submitted on the WMH Segmentation Challenge that was initially hosted by MICCAI 2017 and is continuously accepting new challengers. The evaluation of the clinical utility showed that the WMH volume that was automatically computed using U-Net with HF was significantly associated with cognitive performance and improves the classification between cognitive normal and Alzheimer's disease subjects and between patients with mild cognitive impairment and those with Alzheimer's disease. The implementation of our proposed method is publicly available using Dockerhub (https://hub.docker.com/r/wmhchallenge/pgs).",2021-05-03 +34481398,Mangrove's rhizospheric engineering with bacterial inoculation improve degradation of diesel contamination.,"Mangroves (Avicennia marina) growing in intertidal areas are often exposed to diesel spills, adversely damaging the ecosystem. Herein, we showed for the first time that mangrove seedlings' associations with bacteria could reprogram host-growth, physiology, and ability to degrade diesel. We found four bacterial strains [Sphingomonas sp.-LK11, Rhodococcus corynebacterioides-NZ1, Bacillus subtilis-EP1 Bacillus safensis-SH10] exhibiting significant growth during diesel degradation (2% and 5%, v/v) and higher expression of alkane monooxygenase compared to control. This is in synergy with reduced long-chain n-alkanes (C24-C30) during microbe-diesel interactions in the bioreactor. Among individual strains, SH10 exhibited significantly higher potential to improve mangrove seedling's morphology, anatomy and growth during diesel treatment in rhizosphere compared to control. This was also evidenced by reduced activities and gene expression of antioxidant enzymes (catalases, peroxidases, ascorbic peroxidases, superoxide dismutases and polyphenol peroxidases) and lipid peroxidation during microbe-diesel interactions. Interestingly, we noticed significantly higher soil-enzyme activities (phosphatases and glucosidases) and essential metabolites in seedling's rhizosphere after bacteria and diesel treatments. Degradation of longer n-alkane chains in the rhizosphere also revealed a potential pathway that benefits mangroves by bacterial strains during diesel contaminations. Current results support microbes' application to rhizoengineer plant growth, responses, and phytoextraction abilities in environments contaminated with diesel spills. AVAILABILITY OF DATA AND MATERIALS: The datasets generated during the current study are available in the NCBI GenBank ((https://www.ncbi.nlm.nih.gov).",2021-08-28 +33970229,ChemHub: a knowledgebase of functional chemicals for synthetic biology studies. ,"The field of synthetic biology lacks a comprehensive knowledgebase for selecting synthetic target molecules according to their functions, economic applications, and known biosynthetic pathways. We implemented ChemHub, a knowledgebase containing >90,000 chemicals and their functions, along with related biosynthesis information for these chemicals that was manually extracted from >600,000 published studies by more than 100 people over the past 10 years. Multiple algorithms were implemented to enable biosynthetic pathway design and precursor discovery, which can support investigation of the biosynthetic potential of these functional chemicals. ChemHub is freely available at: http://www.rxnfinder.org/chemhub/. Supplementary data are available at Bioinformatics online.",2021-05-10 +33659582,THP1 proteomics in response to mycobacterium tuberculosis infection.,"Temporal data on how the mycobacterium infection establishes itself inside the host cell is not available. We differentiated human THP1 cell line with PMA and infected them with different laboratory (H37Ra and H37Rv) and clinical strains (BND433 and JAL2287) of mycobacterium tuberculosis (Mtb). Uninfected differentiated THP1 cells were used as infection control. Host proteome was investigated at four different time points to understand the dynamics of host response to mycobacterial infection with time. The investigated time points included 6 hrs, 18 hrs, 30 hrs and 42 hrs of infection with all the Mtb strains. SWATH-MS method was used to quantitate the host proteome in response to Mtb infection and the data thus obtained are available via PRIDE repository with the dataset identifier PXD022352 (https://www.ebi.ac.uk/pride/archive/projects/PXD022352).",2021-01-30 +33722115,What Were You Thinking? Medical Students' Metacognition and Perceptions of Self-Regulated Learning.,"

Phenomenon

As a component of self-regulated learning, metacognition is gaining attention in the medical education research community. Metacognition, simply put, is thinking about one's thinking. Having a metacognitive habit of mind is essential for healthcare professionals. This study identified the metacognitive competencies of medical students as they completed a conceptual learning task, and provided insight into students' perceptions of self-regulated learning in their curriculum. Approach: Eleven third-year medical students from a Dutch University were purposively sampled to participate in this qualitative study. The study design included a think-aloud assignment followed by a semi-structured interview. During the assignment, participants were instructed to think aloud while solving questions about medical physiological concepts such as blood flow, pressure, and resistance. Think-aloud data were collected through audiotaping and used to identify participants' metacognitive competencies. The assignment also served as a prompt for an interview in which participants were questioned about metacognitive knowledge, monitoring, experiences, and perceptions of self-regulated learning in their curriculum. All data were transcribed verbatim and analyzed iteratively using a template analysis. Findings: Students differed in their use of metacognitive skills, with an overall focus on monitoring and, to a lesser extent, on planning and evaluation. Additionally, differences were found in students' metacognitive knowledge and metacognitive experiences. There was apparent use of inefficient, superficial predictive cues. Regarding perceptions of self-regulated learning skills, some students felt no need to develop such skills as they perceived medical education as an exercise in memorizing facts. Others emphasized the need for more insight into their actual level of knowledge and competence. Insights: Pre-clinical medical students require explicit teaching of metacognitive skills to facilitate self-regulated learning. Educators should aim to integrate metacognition in the everyday discourse of the classroom to foster an environment in which students discuss their own learning.Supplemental data for this article is available online at https://doi.org/10.1080/10401334.2021.1889559.",2021-03-15 +34117883,MecCog: A knowledge representation framework for genetic disease mechanism. ,"Experimental findings on genetic disease mechanisms are scattered throughout the literature and represented in many ways, including unstructured text, cartoons, pathway diagrams, and network graphs. Integration and structuring of such mechanistic information greatly enhances its utility. MecCog is a graphical framework for building integrated representations (mechanism schemas) of mechanisms by which a genetic variant causes a disease phenotype. A MecCog mechanism schema displays the propagation of system perturbations across stages of biological organization, using graphical notations to symbolize perturbed entities and activities, hyperlinked evidence tagging, a mechanism ontology, and depiction of knowledge gaps, ambiguities, and uncertainties. The web platform enables a user to construct, store, publish, browse, query, and comment on schemas. MecCog facilitates the identification of potential biomarkers, therapeutic intervention sites, and critical future experiments. The MecCog framework is freely available at http://www.meccog.org. Supplementary data are available at Bioinformatics online.",2021-06-12 +34158830,Psycho-oncology service provisions for hospitalised cancer patients before and during the COVID-19 pandemic in an oncology centre in eastern India.,"

Background

Addressing the mental health needs of cancer patients and their caregivers improves the quality of care the patient receives in any cancer care ecosystem. International practice currently encourages integrated care for physical and mental health in oncology. The coronavirus disease (COVID-19) pandemic has affected the delivery of healthcare services across the world. The current research paper is on the psycho-oncology service provision for hospitalised cancer patients before and during the COVID-19 pandemic.

Methods

All patients who were referred to psycho-oncology services during the study period of 1 month, in the two successive years of 2019 and 2020, were included in the study. Retrospective data were collected from the centralised electronic medical records for patients. Data included cancer diagnosis, reason for admission, admitting team and reason for a psychiatric referral. Other parameters that were measured were the timing of the psychiatric assessment, psychiatric diagnosis and psycho-oncology care provided, which included psychological interventions carried out and medications prescribed. The overall institutional data on cancer care provision are also presented in brief to provide context to the psycho-oncology services.

Results

Integrated psycho-oncology services reviewed and managed patients round the year in the hospital where the study was conducted. During the 1-month study period, in 2019 and 2020, the total number of hospitalised cancer patients managed by the services was 74 and 52, respectively. During the study period of 2020, 292 patients with cancer who were being treated in the hospital had tested positive for severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) tested on reverse transcription-polymerase chain reaction (RT-PCR) and 50 members of healthcare staff also tested positive. The most common diagnosis of patients was found to be stress-related adjustment disorder [16/74 (21.6%) in 2019 and 16/52 (30.8%) in 2020]. The paper discusses the common stressors voiced by the patients and their caregivers during the COVID-19 pandemic. Several challenges of providing psychological services were overcome by the team and the paper touches upon the common strategies that were used during the pandemic. Most patients did not need medications, but a significant minority did benefit from treatment with psychotropic medications. Simple psychological interventions such as sleep hygiene, supportive therapy sessions and psycho-education benefited many patients and were feasible even during the pandemic.

Conclusion

The provision of psycho-oncology services to cancer patients and their caregivers was important before and during the COVID-19 pandemic.Watch a video which illustrates the psycho-oncology service provisions in an oncology centre in Eastern India during the COVID-19 pandemic here: https://ecancer.org/en/video/9707-psycho-oncology-service-provisions-for-hospitalised-cancer-patients-before-and-during-the-covid19-pandemic.",2021-05-10 +28299908,The DrugAge database of aging-related drugs.,"Aging is a major worldwide medical challenge. Not surprisingly, identifying drugs and compounds that extend lifespan in model organisms is a growing research area. Here, we present DrugAge (http://genomics.senescence.info/drugs/), a curated database of lifespan-extending drugs and compounds. At the time of writing, DrugAge contains 1316 entries featuring 418 different compounds from studies across 27 model organisms, including worms, flies, yeast and mice. Data were manually curated from 324 publications. Using drug-gene interaction data, we also performed a functional enrichment analysis of targets of lifespan-extending drugs. Enriched terms include various functional categories related to glutathione and antioxidant activity, ion transport and metabolic processes. In addition, we found a modest but significant overlap between targets of lifespan-extending drugs and known aging-related genes, suggesting that some but not most aging-related pathways have been targeted pharmacologically in longevity studies. DrugAge is freely available online for the scientific community and will be an important resource for biogerontologists.",2017-03-16 +33175160,The European Nucleotide Archive in 2020.,"The European Nucleotide Archive (ENA; https://www.ebi.ac.uk/ena), provided by the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI), has for almost forty years continued in its mission to freely archive and present the world's public sequencing data for the benefit of the entire scientific community and for the acceleration of the global research effort. Here we highlight the major developments to ENA services and content in 2020, focussing in particular on the recently released updated ENA browser, modernisation of our release process and our data coordination collaborations with specific research communities.",2021-01-01 +33938221,D3DistalMutation: a Database to Explore the Effect of Distal Mutations on Enzyme Activity.,"Enzyme activity is affected by amino acid mutations, particularly mutations near the active site. Increasing evidence has shown that distal mutations more than 10 Å away from the active site may significantly affect enzyme activity. However, it is difficult to study the enzyme regulation mechanism of distal mutations due to the lack of a systematic collection of three-dimensional (3D) structures, highlighting distal mutation site and the corresponding enzyme activity change. Therefore, we constructed a distal mutation database, namely, D3DistalMutation, which relates the distal mutation to enzyme activity. As a result, we observed that approximately 80% of distal mutations could affect enzyme activity and 72.7% of distal mutations would decrease or abolish enzyme activity in D3DistalMutation. Only 6.6% of distal mutations in D3DistalMutation could increase enzyme activity, which have great potential to the industrial field. Among these mutations, the Y to F, S to D, and T to D mutations are most likely to increase enzyme activity, which sheds some light on industrial catalysis. Distal mutations decreasing enzyme activity in the allosteric pocket play an indispensable role in allosteric drug design. In addition, the pockets in the enzyme structures are provided to explore the enzyme regulation mechanism of distal mutations. D3DistalMutation is accessible free of charge at https://www.d3pharma.com/D3DistalMutation/index.php.",2021-05-02 +33934337,Outcomes of treatments for keratomalacia in dogs and cats: a systematic review of the published literature including non-randomised controlled and non-controlled studies.,"

Objectives

The aim of this review was to interrogate the evidence base for treatment of keratomalacia in dogs and cats, through examination of the applicable literature.

Materials and methods

Studies were screened for evidence to answer the following question Which of the treatment options for keratomalacia in dogs and cats offers the best chance of globe survival, the fastest time to resolution with globe survival, and the best visual outcome. The search utilised the PubMed (http://www.pubmed.gov/) and ISI Web of Science (http://wok.mimas.ac.uk/) databases. Databases were searched using the following terms: (keratomalacia OR corneal melt OR corneal malacia) AND (dog OR canine OR canid OR cat OR feline OR felid) AND (treatment OR outcome OR morbidity OR complications). Studies were assessed by one author (CH) and excluded if they related to less than three keratomalacia cases, experimental treatments, in vitro studies, or did not provide information regarding outcome. Studies were classified to a level of evidence according to the system described by the Oxford Centre for EvidenceBased Medicine.

Results

Eighteen (18) studies were identified as providing information to answer the proposed question, one as level 3, 10 as level 4 and seven as level 5 evidence. Only one study compared two treatments, the remaining were prospective or retrospective case series of a single treatment intervention. Study design was highly variable with respect to population size, followup and outcome assessment, making direct comparison difficult, and metaanalysis was not applied.

Clinical significance

Overall, the evidence for improved outcome of one proposed treatment over another proposed treatment for keratomalacia in dogs and/or cats is very weak.",2021-05-02 +34973417,"GranatumX: A Community-engaging, Modularized, and Flexible Webtool for Single-cell Data Analysis.","We present GranatumX, a next-generation software environment for single-cell RNA sequencing (scRNA-seq) data analysis. GranatumX is inspired by the interactive webtool Granatum. GranatumX enables biologists to access the latest scRNA-seq bioinformatics methods in a web-based graphical environment. It also offers software developers the opportunity to rapidly promote their own tools with others in customizable pipelines. The architecture of GranatumX allows for easy inclusion of plugin modules, named Gboxes, which wrap around bioinformatics tools written in various programming languages and on various platforms. GranatumX can be run on the cloud or private servers and generate reproducible results. It is a community-engaging, flexible, and evolving software ecosystem for scRNA-seq analysis, connecting developers with bench scientists. GranatumX is freely accessible at http://garmiregroup.org/granatumx/app.",2021-06-01 +,126 The Canadian Neonatal Brain Platform: A three-pillar approach,"Abstract

Background

Brain injury and abnormal maturation in the neonatal period is associated with long-term changes underlying significant cognitive, motor, language and behavioural deficiencies. Our understanding of clear cerebral disruptors of brain development and the extent of their impact are still limited, mostly due to the lack of robust non-invasive biomarkers, difficulties in conducting studies in newborns and the use of small isolated cohorts. To address all these issues, we have created the Canadian Neonatal Brain Platform (CNBP).

Objectives

The aims of CNBP are threefold: 1) To create and manage a national neonatal MRI registry to support prospective multicenter trials; 2) To make accessible to the community standardized sets of tools and protocols for image acquisition and analysis specifically designed for neonatology; 3) To better identify the concerns of parents of preterm infants by building a robust partnership.

Design/Methods

Aim 1: We modelled our architecture with a semi-decentralized overall structure composed of a site-specific infrastructure for initial localized data aggregation and anonymization, and a central server for post-processing and ensuring long-term scaleability. Aim 2: We conducted experiments to simulate human rater image quality assessment with machine learning. In addition, we performed brain segmentation using various open source neonatal neuroimaging analysis software to qualitatively validate their results against human rater golden standard. Aim 3: We deployed Mieux Agir au Quotidien (http://developpementenfant.ca), a web-based educational and support program for parents of preterm infants that incorporates state-of-the-art teaching modules on developmentally supportive care.

Results

Aim 1: Our infrastructure (see Figure 1) was successfully deployed in Compute Canada. We used DICOMTransit to collect, aggregate, anonymize and centralize data from MRI scanners, Canadian Neonatal Network and Canadian Neonatal Follow-up Network. We implemented LORIS (https://dev.cnbp.ca) to build a clinical neonatal imaging registry. Aim 2: Our pipeline reached a 75% sensitivity and 85% positive predictive value to control for quality. Our assessment of the segmentation tools revealed that MANTIS provides the most robust segmentation results. Aim 3: Mieux Agir au Quotidien reached over 700K visits in 2017. We have established a special partnership with parents of preterm infants, which has enabled us to gather first-rate information on parents’ concern and knowledge about the disorders of preterm infants, now available in english.

Conclusion

CNBP has successfully progressed towards achieving its aims by establishing an online data processing and integration portal integrated with numerous neonatal specific analysis software while providing social and knowledge transfer to the general public.",2019-05-31 +33247935,An informatics research platform to make public gene expression time-course datasets reusable for more scientific discoveries.,"The exponential growth of genomic/genetic data in the era of Big Data demands new solutions for making these data findable, accessible, interoperable and reusable. In this article, we present a web-based platform named Gene Expression Time-Course Research (GETc) Platform that enables the discovery and visualization of time-course gene expression data and analytical results from the NIH/NCBI-sponsored Gene Expression Omnibus (GEO). The analytical results are produced from an analytic pipeline based on the ordinary differential equation model. Furthermore, in order to extract scientific insights from these results and disseminate the scientific findings, close and efficient collaborations between domain-specific experts from biomedical and scientific fields and data scientists is required. Therefore, GETc provides several recommendation functions and tools to facilitate effective collaborations. GETc platform is a very useful tool for researchers from the biomedical genomics community to present and communicate large numbers of analysis results from GEO. It is generalizable and broadly applicable across different biomedical research areas. GETc is a user-friendly and efficient web-based platform freely accessible at http://genestudy.org/.",2020-11-01 +30496475,PlanMine 3.0-improvements to a mineable resource of flatworm biology and biodiversity.,"Flatworms (Platyhelminthes) are a basally branching phylum that harbours a wealth of fascinating biology, including planarians with their astonishing regenerative abilities and the parasitic tape worms and blood flukes that exert a massive impact on human health. PlanMine (http://planmine.mpi-cbg.de/) has the mission objective of providing both a mineable sequence repository for planarians and also a resource for the comparative analysis of flatworm biology. While the original PlanMine release was entirely based on transcriptomes, the current release transitions to a more genomic perspective. Building on the recent availability of a high quality genome assembly of the planarian model species Schmidtea mediterranea, we provide a gene prediction set that now assign existing transcripts to defined genomic coordinates. The addition of recent single cell and bulk RNA-seq datasets greatly expands the available gene expression information. Further, we add transcriptomes from a broad range of other flatworms and provide a phylogeny-aware interface that makes evolutionary species comparisons accessible to non-experts. At its core, PlanMine continues to utilize the powerful InterMine framework and consistent data annotations to enable meaningful inter-species comparisons. Overall, PlanMine 3.0 thus provides a host of new features that makes the fascinating biology of flatworms accessible to the wider research community.",2019-01-01 +30535108,"Systematic domain-based aggregation of protein structures highlights DNA-, RNA- and other ligand-binding positions.","Domains are fundamental subunits of proteins, and while they play major roles in facilitating protein-DNA, protein-RNA and other protein-ligand interactions, a systematic assessment of their various interaction modes is still lacking. A comprehensive resource identifying positions within domains that tend to interact with nucleic acids, small molecules and other ligands would expand our knowledge of domain functionality as well as aid in detecting ligand-binding sites within structurally uncharacterized proteins. Here, we introduce an approach to identify per-domain-position interaction 'frequencies' by aggregating protein co-complex structures by domain and ascertaining how often residues mapping to each domain position interact with ligands. We perform this domain-based analysis on ∼91000 co-complex structures, and infer positions involved in binding DNA, RNA, peptides, ions or small molecules across 4128 domains, which we refer to collectively as the InteracDome. Cross-validation testing reveals that ligand-binding positions for 2152 domains are highly consistent and can be used to identify residues facilitating interactions in ∼63-69% of human genes. Our resource of domain-inferred ligand-binding sites should be a great aid in understanding disease etiology: whereas these sites are enriched in Mendelian-associated and cancer somatic mutations, they are depleted in polymorphisms observed across healthy populations. The InteracDome is available at http://interacdome.princeton.edu.",2019-01-01 +34384217,UPLC-MS-Based Serum Metabolic Profiling Reveals Potential Biomarkers for Predicting Propofol Responsiveness in Females.,"Although previous studies have shown that certain factors interfere with the sensitivity of propofol, the mechanisms for interindividual variability in response to propofol remain unclear. This study aimed to screen the metabolites to predict patients' sensitivity to propofol and to identify metabolic pathways to explore possible mechanisms associated with propofol resistance. Sera from 40 female patients undergoing elective hysteroscopic surgery in a prospective cohort propofol study were obtained before the administration of propofol. The patients' responsiveness to propofol was differentiated based on propofol effect-site concentration. Serum samples from two sets, a discovery set (n = 24) and an independent validation set (n = 16), were analyzed using ultraperformance liquid chromatography coupled with mass spectrometry based untargeted metabolomics. In the discovery set, 494 differential metabolites were screened out, and then 391 potential candidate biomarkers with the area under receiver operating characteristic curve >0.80 were selected. Pathway analysis showed that the pathway of glycerophospholipid metabolism was the most influential pathway. In the independent validation set, six potential biomarkers enabled the discrimination of poor responders from good and intermediate responders, which might be applied to predict propofol sensitivity. The mass spectrometry data are available via MetaboLights (http://www.ebi.ac.uk/metabolights/login) with the identifier MTBLS2311.",2021-08-12 +34164199,"Construction, validation and, visualization of a web-based nomogram for predicting the overall survival and cancer-specific survival of leiomyosarcoma patients with lung metastasis.","

Background

This study sought to assess the prognostic factors for leiomyosarcoma (LMS) patients with lung metastasis and construct web-based nomograms to predict overall survival (OS) and cancer-specific survival (CSS).

Method

Patients diagnosed with LMS combined with lung metastasis between 2010 and 2016 were identified in the Surveillance, Epidemiology, and End Results (SEER) database. The patients were randomly divided into a training set and a testing set. The X-tile analysis provides the best age and tumor size cut-off point, and changes continuous variables into categorical variables. The independent prognostic factors were determined by Cox regression analysis, and 2 nomograms were established. Receiver operating characteristic curves and calibration curves were used to evaluate the nomograms. Based on the nomograms, 2 web-based nomograms were established.

Results

Two hundred and twenty-eight cases were included in the OS nomogram construction, and were randomly divided into a training set (n=160) and a validation set (n=68). Age, T stage, bone metastasis, surgery, chemotherapy, marital status, tumor size, and tumor site were found to be correlated with OS. One hundred and eighty-three cases were enrolled in the CSS nomogram construction, and randomly divided into a training set (n=129) and a validation set (n=54). Age, bone metastasis, surgery, chemotherapy, tumor size, and tumor site were found to be correlated with CSS. Two nomograms were established to predict OS and CSS. In the training set, the areas under the curve of the nomogram for predicting 1-, 2-, and 3-year OS were 0.783, 0.830, and 0.832, respectively, and those for predicting 1-, 2-, and 3-year CSS were 0.889, 0.777, and 0.884, respectively. Two web-based nomograms were established to predict OS (https://wenn23.shinyapps.io/lmslmosapp/), and CSS (https://wenn23.shinyapps.io/lmslmcssapp/).

Conclusion

The developed web-based nomogram is a useful tool for accurately analyzing the prognosis of LMS patients with lung metastasis, and could help clinical doctors to make personalized clinical decisions.",2021-05-01 +34020544,Ori-Finder 3: a web server for genome-wide prediction of replication origins in Saccharomyces cerevisiae. ,"DNA replication is a fundamental process in all organisms; this event initiates at sites termed origins of replication. The characteristics of eukaryotic replication origins are best understood in Saccharomyces cerevisiae. For this species, origin prediction algorithms or web servers have been developed based on the sequence features of autonomously replicating sequences (ARSs). However, their performances are far from satisfactory. By utilizing the Z-curve methodology, we present a novel pipeline, Ori-Finder 3, for the computational prediction of replication origins in S. cerevisiae at the genome-wide level based solely on DNA sequences. The ARS exhibiting both an AT-rich stretch and ARS consensus sequence element can be predicted at the single-nucleotide level. For the identified ARSs in the S. cerevisiae reference genome, 83 and 60% of the top 100 and top 300 predictions matched the known ARS records, respectively. Based on Ori-Finder 3, we subsequently built a database of the predicted ARSs identified in more than a hundred S. cerevisiae genomes. Consequently, we developed a user-friendly web server including the ARS prediction pipeline and the predicted ARSs database, which can be freely accessed at http://tubic.tju.edu.cn/Ori-Finder3.",2021-05-01 +31114869,MEXPRESS update 2019.,"The recent growth in the number of publicly available cancer omics databases has been accompanied by the development of various tools that allow researchers to visually explore these data. In 2015, we built MEXPRESS, an online tool for the integration and visualization of gene expression, DNA methylation and clinical data from The Cancer Genome Atlas (TCGA), a large collection of publicly available multi-omics cancer data. MEXPRESS addresses the need for an easy-to-use, interactive application that allows researchers to identify dysregulated genes and their clinical relevance in cancer. Furthermore, while other tools typically do not support integrated visualization of expression and DNA methylation data in combination with the precise genomic location of the methylation, MEXPRESS is unique in how it depicts these diverse data types together. Motivated by the large number of users MEXPRESS has managed to attract over the past 3 years and the recent migration of all TCGA data to a new data portal, we developed a new version of MEXPRESS (https://mexpress.be). It contains the latest TCGA data, additional types of omics and clinical data and extra functionality, allowing users to explore mechanisms of gene dysregulation beyond expression and DNA methylation.",2019-07-01 +34164315,A systematic review and meta-analysis of the effects of general anesthesia combined with continuous paravertebral block in breast cancer surgery and postoperative analgesia.,"

Background

This study aimed to compare the effects of general anesthesia (GA) combined with continuous paravertebral block (CPVB) in breast cancer surgery via systematic review and meta-analysis, in order to provide a theoretical basis for the clinical use of CPVB surgical analgesia.

Methods

A search of the PubMed, Embase, Medline, Ovid, Springer, and Web of Science databases was conducted to screen clinical trials on GA + CPVB for breast cancer surgery published before December 31, 2020. The Cochrane Handbook for Systematic Reviews of Intervention 5.0.2 was adopted for bias risk assessment, and Review Manager 5.3 software (RevMan, The Cochrane Collaboration, http://tech.cochrane.org/revman) was applied for meta-analysis of the literature.

Results

A total of 15 studies that satisfied the requirements were included, involving a total of 1,435 research subjects. The results of our meta-analysis showed the following: the visual analogue scale (VAS) score of the observation group (group A) was significantly reduced [mean difference (MD) =-0.68; 95% confidential interval (CI): -1.04 - -0.33; Z=3.80; P=0.0001]; the level of monocyte chemoattractant protein -1 (MCP-1) was notably decreased (MD =-18.64; 95% CI: -29.68 - -7.61; Z=3.31; P=0.0009); the level of tumor necrosis factor-α (TNF-α) was markedly lower (MD =-1.89; 95% CI: -2.66 - -1.13; Z=4.87; P<0.00001); the interleukin-6 (IL-6) level was obviously reduced (MD =-12.10; 95% CI: -19.22 - -4.99; Z=3.33; P=0.0009); and the incidence of postoperative adverse reactions was substantially decreased (MD = 0.16; 95% CI: 0.07-0.36; Z=4.47; P<0.00001). Compared with group B, the differences of the above five indicators showed statistical significance. In addition, the heart rate (HR) (MD =-1.56; 95% CI: -6.20 - 3.08; Z=0.66; P=0.51), mean arterial pressure (MAP) (MD = 4.66; 95% CI: -0.12 -9.43; Z=1.91; P=0.06), Ramsay score (MD =0.44; 95% CI: -0.06-0.93; Z=1.73; P=0.08) of patients in group A showed no statistical differences compared to group B.

Conclusions

GA + CPVB applied to breast cancer surgery for analgesia can reduce the levels of MCP-1, TNF-α, and IL-6 in patients, thereby providing good postoperative analgesia. Therefore, GA + CPVB could effectively reduce the incidence of pain and adverse reactions in patients, and is effective for analgesia in breast cancer surgery.",2021-05-01 +32778891,MolAICal: a soft tool for 3D drug design of protein targets by artificial intelligence and classical algorithm. ,"Deep learning is an important branch of artificial intelligence that has been successfully applied into medicine and two-dimensional ligand design. The three-dimensional (3D) ligand generation in the 3D pocket of protein target is an interesting and challenging issue for drug design by deep learning. Here, the MolAICal software is introduced to supply a way for generating 3D drugs in the 3D pocket of protein targets by combining with merits of deep learning model and classical algorithm. The MolAICal software mainly contains two modules for 3D drug design. In the first module of MolAICal, it employs the genetic algorithm, deep learning model trained by FDA-approved drug fragments and Vinardo score fitting on the basis of PDBbind database for drug design. In the second module, it uses deep learning generative model trained by drug-like molecules of ZINC database and molecular docking invoked by Autodock Vina automatically. Besides, the Lipinski's rule of five, Pan-assay interference compounds (PAINS), synthetic accessibility (SA) and other user-defined rules are introduced for filtering out unwanted ligands in MolAICal. To show the drug design modules of MolAICal, the membrane protein glucagon receptor and non-membrane protein SARS-CoV-2 main protease are chosen as the investigative drug targets. The results show MolAICal can generate the various and novel ligands with good binding scores and appropriate XLOGP values. We believe that MolAICal can use the advantages of deep learning model and classical programming for designing 3D drugs in protein pocket. MolAICal is freely for any nonprofit purpose and accessible at https://molaical.github.io.",2021-05-01 +27097230,SignaFish: A Zebrafish-Specific Signaling Pathway Resource.,"Understanding living systems requires an in-depth knowledge of the signaling networks that drive cellular homeostasis, regulate intercellular communication, and contribute to cell fates during development. Several resources exist to provide high-throughput data sets or manually curated interaction information from human or invertebrate model organisms. We previously developed SignaLink, a uniformly curated, multi-layered signaling resource containing information for human and for the model organisms nematode Caenorhabditis elegans and fruit fly Drosophila melanogaster. Until now, the use of the SignaLink database for zebrafish pathway analysis was limited. To overcome this limitation, we created SignaFish ( http://signafish.org ), a fish-specific signaling resource, built using the concept of SignaLink. SignaFish contains more than 200 curation-based signaling interactions, 132 further interactions listed in other resources, and it also lists potential miRNA-based regulatory connections for seven major signaling pathways. From the SignaFish website, users can reach other web resources, such as ZFIN. SignaFish provides signaling or signaling-related interactions that can be examined for each gene or downloaded for each signaling pathway. We believe that the SignaFish resource will serve as a novel navigating point for experimental design and evaluation for the zebrafish community and for researchers focusing on nonmodel fish species, such as cyclids.",2016-04-20 +33346815,FireProtASR: A Web Server for Fully Automated Ancestral Sequence Reconstruction. ,"There is a great interest in increasing proteins' stability to widen their usability in numerous biomedical and biotechnological applications. However, native proteins cannot usually withstand the harsh industrial environment, since they are evolved to function under mild conditions. Ancestral sequence reconstruction is a well-established method for deducing the evolutionary history of genes. Besides its applicability to discover the most probable evolutionary ancestors of the modern proteins, ancestral sequence reconstruction has proven to be a useful approach for the design of highly stable proteins. Recently, several computational tools were developed, which make the ancestral reconstruction algorithms accessible to the community, while leaving the most crucial steps of the preparation of the input data on users' side. FireProtASR aims to overcome this obstacle by constructing a fully automated workflow, allowing even the unexperienced users to obtain ancestral sequences based on a sequence query as the only input. FireProtASR is complemented with an interactive, easy-to-use web interface and is freely available at https://loschmidt.chemi.muni.cz/fireprotasr/.",2021-07-01 +26581408,ERAIZDA: a model for holistic annotation of animal infectious and zoonotic diseases. ,"There is an urgent need for a unified resource that integrates trans-disciplinary annotations of emerging and reemerging animal infectious and zoonotic diseases. Such data integration will provide wonderful opportunity for epidemiologists, researchers and health policy makers to make data-driven decisions designed to improve animal health. Integrating emerging and reemerging animal infectious and zoonotic disease data from a large variety of sources into a unified open-access resource provides more plausible arguments to achieve better understanding of infectious and zoonotic diseases. We have developed a model for interlinking annotations of these diseases. These diseases are of particular interest because of the threats they pose to animal health, human health and global health security. We demonstrated the application of this model using brucellosis, an infectious and zoonotic disease. Preliminary annotations were deposited into VetBioBase database (http://vetbiobase.igbb.msstate.edu). This database is associated with user-friendly tools to facilitate searching, retrieving and downloading of disease-related information. Database URL: http://vetbiobase.igbb.msstate.edu.",2015-11-18 +34033509,Preliminary Evidence on the Impact of Hearing Aid Use on Falls Risk in Individuals With Self-Reported Hearing Loss.,"Purpose Falls are considered a significant public health issue, and hearing loss has been shown to be an independent risk factor for falls. The primary objective of this study was to determine if hearing aid use modified (reduced) the association. We hypothesized that routine hearing aid use would reduce the impact of hearing loss on the odds of falling. If hearing aid users have reduced odds of falling, then that would have an important impact on falls prevention health care. Method Data from 8,091 individuals 40 years of age and older who completed National Health and Nutrition Examination Survey (NHANES) cycles 1999-2004 were used. NHANES comprises a series of cross-sectional studies, each of which is representative of the total civilian noninstitutionalized population of children and adults in the United States, enabling unbiased national estimates of health that can be independently reproduced. Self-reported hearing, hearing aid status, falls history, and comorbidities were extracted and analyzed using regression modeling. Results The 8,091 individuals were grouped based on a self-reported history of falls in the last year. Self-reported hearing loss was significantly associated with odds of falling. Categorizing individuals based on routine hearing aid use was included as an interaction term in the fully adjusted models and was not significant, suggesting no difference in falls based on hearing aid status. Conclusions The unique results of the current study show that when examining self-reported hearing in a nationally representative sample, hearing aid use does not appear to mitigate or modify the association between self-reported hearing and falls. Future research designs are highlighted to address limitations identified using NHANES data for this research and focus on the use of experimental designs to further understand the association between hearing loss and falls, including whether hearing loss may be a modifiable risk factor for falls. Supplemental Material https://doi.org/10.23641/asha.14642784.",2021-05-25 +34318330,Risk scorecard to minimize impact of COVID-19 when reopening. ,"We present a novel approach for exiting coronavirus disease 2019 (COVID-19) lockdowns using a 'risk scorecard' to prioritize activities to resume whilst allowing safe reopening. We modelled cases generated in the community/week, incorporating parameters for social distancing, contact tracing and imported cases. We set thresholds for cases and analysed the effect of varying parameters. An online tool to facilitate country-specific use including the modification of parameters (https://sshsphdemos.shinyapps.io/covid_riskbudget/) enables visualization of effects of parameter changes and trade-offs. Local outbreak investigation data from Singapore illustrate this. Setting a threshold of 0.9 mean number of secondary cases arising from a case to keep R < 1, we showed that opening all activities excluding high-risk ones (e.g. nightclubs) allows cases to remain within threshold; while opening high-risk activities would exceed the threshold and result in escalating cases. An 80% reduction in imported cases per week (141 to 29) reduced steady-state cases by 30% (295 to 205). One-off surges in cases (due to superspreading) had no effect on the steady state if the R remains <1. Increasing the effectiveness of contact tracing (probability of a community case being isolated when infectious) by 33% (0.6 to 0.8) reduced cases by 22% (295 to 231). Cases grew exponentially if the product of the mean number of secondary cases arising from a case and (1-probability of case being isolated) was >1. Countries can utilize a 'risk scorecard' to balance relaxations for travel and domestic activity depending on factors that reduce disease impact, including hospital/ICU capacity, contact tracing, quarantine and vaccination. The tool enabled visualization of the combinations of imported cases and activity levels on the case numbers and the trade-offs required. For vaccination, a reduction factor should be applied both for likelihood of an infected case being present and a close contact getting infected.",2021-10-01 +33929850,Identification of the Core Chemical Structure in SureChEMBL Patents.,"The SureChEMBL database provides open access to 17 million chemical entities mentioned in 14 million patents published since 1970. However, alongside with molecules covered by patent claims, the database is full of starting materials and intermediate products of little pharmacological relevance. Herein, we introduce a new filtering protocol to automatically select the core chemical structures best representing a congeneric series of pharmacologically relevant molecules in patents. The protocol is first validated against a selection of 890 SureChEMBL patents for which a total of 51,738 manually curated molecules are deposited in ChEMBL. Our protocol was able to select 92.5% of the molecules in ChEMBL from all 270,968 molecules in SureChEMBL for those patents. Subsequently, the protocol was applied to all 240,988 US pharmacological patents for which 9,111,706 molecules are available in SureChEMBL. The unsupervised filtering process selected 5,949,214 molecules (65.3% of the total number of molecules) that form highly congeneric chemical series in 188,795 of those patents (78.3% of the total number of patents). A SureChEMBL version enriched with molecules of pharmacological relevance is available for download at https://ftp.ebi.ac.uk/pub/databases/chembl/SureChEMBLccs.",2021-04-30 +30603993,Dynamics of a consumer-resource reaction-diffusion model : Homogeneous versus heterogeneous environments.,"We study the dynamics of a consumer-resource reaction-diffusion model, proposed recently by Zhang et al. (Ecol Lett 20(9):1118-1128, 2017), in both homogeneous and heterogeneous environments. For homogeneous environments we establish the global stability of constant steady states. For heterogeneous environments we study the existence and stability of positive steady states and the persistence of time-dependent solutions. Our results illustrate that for heterogeneous environments there are some parameter regions in which the resources are only partially limited in space, a unique feature which does not occur in homogeneous environments. Such difference between homogeneous and heterogeneous environments seems to be closely connected with a recent finding by Zhang et al. (2017), which says that in consumer-resource models, homogeneously distributed resources could support higher population abundance than heterogeneously distributed resources. This is opposite to the prediction by Lou (J Differ Equ 223(2):400-426, 2006. https://doi.org/10.1016/j.jde.2005.05.010 ) for logistic-type models. For both small and high yield rates, we also show that when a consumer exists in a region with a heterogeneously distributed input of exploitable renewed limiting resources, the total population abundance at equilibrium can reach a greater abundance when it diffuses than when it does not. In contrast, such phenomenon may fail for intermediate yield rates.",2019-01-02 +33589839,Joint probabilistic modeling of single-cell multi-omic data with totalVI.,"The paired measurement of RNA and surface proteins in single cells with cellular indexing of transcriptomes and epitopes by sequencing (CITE-seq) is a promising approach to connect transcriptional variation with cell phenotypes and functions. However, combining these paired views into a unified representation of cell state is made challenging by the unique technical characteristics of each measurement. Here we present Total Variational Inference (totalVI; https://scvi-tools.org ), a framework for end-to-end joint analysis of CITE-seq data that probabilistically represents the data as a composite of biological and technical factors, including protein background and batch effects. To evaluate totalVI's performance, we profiled immune cells from murine spleen and lymph nodes with CITE-seq, measuring over 100 surface proteins. We demonstrate that totalVI provides a cohesive solution for common analysis tasks such as dimensionality reduction, the integration of datasets with different measured proteins, estimation of correlations between molecules and differential expression testing.",2021-02-15 +31728526,TerrestrialMetagenomeDB: a public repository of curated and standardized metadata for terrestrial metagenomes.,"Microbiome studies focused on the genetic potential of microbial communities (metagenomics) became standard within microbial ecology. MG-RAST and the Sequence Read Archive (SRA), the two main metagenome repositories, contain over 202 858 public available metagenomes and this number has increased exponentially. However, mining databases can be challenging due to misannotated, misleading and decentralized data. The main goal of TerrestrialMetagenomeDB is to make it easier for scientists to find terrestrial metagenomes of interest that could be compared with novel datasets in meta-analyses. We defined terrestrial metagenomes as those that do not belong to marine environments. Further, we curated the database using text mining to assign potential descriptive keywords that better contextualize environmental aspects of terrestrial metagenomes, such as biomes and materials. TerrestrialMetagenomeDB release 1.0 includes 15 022 terrestrial metagenomes from SRA and MG-RAST. Together, the downloadable data amounts to 68 Tbp. In total, 199 terrestrial terms were divided into 14 categories. These metagenomes span 83 countries, 30 biomes and 7 main source materials. The TerrestrialMetagenomeDB is publicly available at https://webapp.ufz.de/tmdb.",2020-01-01 +34556911,Evaluation of White Matter Tracts Fractional Anisotropy Using Tract-Based Spatial Statistics and Its correlation with Amyotrophic Lateral Sclerosis Functional Rating Scale Score in Patients with Motor Neuron Disease.,"Background Motor neuron diseases cause progressive degeneration of upper and lower motor neurons. No Indian studies are available on diffusion tensor imaging (DTI) findings in these patients. Aims This study was done to identify white matter tracts that have reduced fractional anisotropy (FA) in motor neuron disease (MND) patients using tract-based spatial statistics and to correlate FA values with Amyotrophic Lateral Sclerosis Functional Rating Scale (ALSFRS-R) score. Settings and Design A case-control study in a tertiary care hospital. Materials and Methods We did DTI sequence (20 gradient directions, b -value 1,000) in 15 MND patients (10 men and 5 women; mean age: 46.5 ± 16.5 years; 11 amyotrophic lateral sclerosis [ALS], 2 monomelic amyotrophy, 1 progressive muscular atrophy, and 1 bulbar ALS) and 15 age- and sex-matched controls. The data set from each subject was postprocessed using FSL downloaded from the FMRIB Software Library, Oxford, United Kingdom (http://www.fmrib.ox.ac.uk/fsl). Statistical Analysis The statistical permutation tool ""randomize"" with 5,000 permutations was used to identify voxels that were different between the patient data set and the control data set. Mean FA values of these voxels were obtained separately for each tract as per ""JHU white-matter tractography atlas."" SPSS was used to look to correlate tract-wise mean FA value with ALSFRS-R score. Results We found clusters of reduced FA values in multiple tracts in the brain of patients with MND. Receiver operating characteristic curves plotted for individual tracts, showed that bilateral corticospinal tract, bilateral anterior thalamic radiation, bilateral uncinate fasciculus, and right superior longitudinal fasciculus were the best discriminators (area under the curve > 0.8, p < 0.01). FA values did not correlate with ALFRS-R severity score. Conclusion In MND patients, not only the motor tracts, but several nonmotor association tracts are additionally affected, reflecting nonmotor pathological processes in ALS.",2021-04-01 +31850066,"Identifying Common Genes, Cell Types and Brain Regions Between Diseases of the Nervous System.","Background: Diseases of the nervous system are widely considered to be caused by genetic mutations, and they have been shown to share pathogenic genes. Discovering the shared mechanisms of these diseases is useful for designing common treatments. Method: In this study, by reviewing 518 articles published after 2007 on 20 diseases of the nervous system, we compiled data on 1607 mutations occurring in 365 genes, totals that are 1.9 and 3.2 times larger than those collected in the Clinvar database, respectively. A combination with the Clinvar data gives 2434 pathogenic mutations and 424 genes. Using this information, we measured the genetic similarities between the diseases according to the number of genes causing two diseases simultaneously. Further detection was carried out on the similarity between diseases in terms of cell types. Disease-related cell types were defined as those with disease-related gene enrichment among the marker genes of cells, as ascertained by analyzing single-cell sequencing data. Enrichment profiles of the disease-related genes over 25 cell types were constructed. The disease similarity in terms of cell types was obtained by calculating the distances between the enrichment profiles of these genes. The same strategy was applied to measure the disease similarity in terms of brain regions by analyzing the gene expression data from 10 brain regions. Results: The disease similarity was first measured in terms of genes. The result indicated that the proportions of overlapped genes between diseases were significantly correlated to the DMN scores (phenotypic similarity), with a Pearson correlation coefficient of 0.40 and P-value = 6.0×10-3. The disease similarity analysis for cell types identified that the distances between enrichment profiles of the disease-related genes were negatively correlated to the DMN scores, with Spearman correlation coefficient = -0.26 (P-value = 1.5 × 10-2). However, the brain region enrichment profile distances of the disease-related genes were not significantly correlated with the DMN score. Besides the similarity of diseases, this study identified novel relationships between diseases and cell types. Conclusion: We manually constructed the most comprehensive dataset to date for genes with mutations related to 20 nervous system diseases. By using this dataset, the similarities between diseases in terms of genes and cell types were found to be significantly correlated to their phenotypic similarity. However, the disease similarities in terms of brain regions were not significantly correlated with the phenotypic similarities. Thus, the phenotypic similarity between the diseases is more likely to be caused by dysfunctions of the same genes or the same types of neurons rather than the same brain regions. The data are collected into the database NeurodisM, which is available at http://biomed-ai.org/neurodism.",2019-11-29 +32487016,ncRI: a manually curated database for experimentally validated non-coding RNAs in inflammation.,"

Background

Inflammation has been considered to be central to the onset, progression, and outcome of infectious diseases, especially as one of the hallmarks of cancer. Non-coding RNAs (ncRNAs), such as miRNAs and lncRNAs, have emerged as vital regulators in control of immune and inflammatory processes, and also play important roles in the inflammatory disease and immunotherapy.

Results

In this study, we presented a database ncRI, which documented experimentally verified ncRNAs in inflammatory diseases, from published articles. Each entry contained the detailed information about ncRNA name, inflammatory diseases, mechanism, experimental techniques (e.g., microarray, RNA-seq, qRT-PCR), experimental samples (cell line and/or tissue), expression patterns of ncRNA (up-regulated or down-regulated), reference information (PubMed ID, year of publication, title of paper) and so on. Collectively, ncRI recorded 11,166 entries that include 1976 miRNAs, 1377 lncRNAs and 107 other ncRNAs across 3 species (human, mouse, and rat) from more than 2000 articles. All these data are free for users to search, browse and download.

Conclusion

In summary, the presented database ncRI provides a relatively comprehensive credible repository about ncRNAs and their roles in inflammatory diseases, and will be helpful for research on immunotherapy. The ncRI is now freely available to all users at http://www.jianglab.cn/ncRI/.",2020-06-01 +32960948,Text mining for modeling of protein complexes enhanced by machine learning.,"

Motivation

Procedures for structural modeling of protein-protein complexes (protein docking) produce a number of models which need to be further analyzed and scored. Scoring can be based on independently determined constraints on the structure of the complex, such as knowledge of amino acids essential for the protein interaction. Previously, we showed that text mining of residues in freely available PubMed abstracts of papers on studies of protein-protein interactions may generate such constraints. However, absence of post-processing of the spotted residues reduced usability of the constraints, as a significant number of the residues were not relevant for the binding of the specific proteins.

Results

We explored filtering of the irrelevant residues by two machine learning approaches, Deep Recursive Neural Network (DRNN) and Support Vector Machine (SVM) models with different training/testing schemes. The results showed that the DRNN model is superior to the SVM model when training is performed on the PMC-OA full-text articles and applied to classification (interface or non-interface) of the residues spotted in the PubMed abstracts. When both training and testing is performed on full-text articles or on abstracts, the performance of these models is similar. Thus, in such cases, there is no need to utilize computationally demanding DRNN approach, which is computationally expensive especially at the training stage. The reason is that SVM success is often determined by the similarity in data/text patterns in the training and the testing sets, whereas the sentence structures in the abstracts are, in general, different from those in the full text articles.

Availabilityand implementation

The code and the datasets generated in this study are available at https://gitlab.ku.edu/vakser-lab-public/text-mining/-/tree/2020-09-04.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +29897426,Connectome verification: inter-rater and connection reliability of tract-tracing-based intrinsic hypothalamic connectivity.,"MOTIVATION:Structural connectomics supports understanding aspects of neuronal dynamics and brain functions. Conducting metastudies of tract-tracing publications is one option to generate connectome databases by collating neuronal connectivity data. Meanwhile, it is a common practice that the neuronal connections and their attributes of such retrospective data collations are extracted from tract-tracing publications manually by experts. As the description of tract-tracing results is often not clear-cut and the documentation of interregional connections is not standardized, the extraction of connectivity data from tract-tracing publications could be complex. This might entail that different experts interpret such non-standardized descriptions of neuronal connections from the same publication in variable ways. Hitherto, no investigation is available that determines the variability of extracted connectivity information from original tract-tracing publications. A relatively large variability of connectivity information could produce significant misconstructions of adjacency matrices with faults in network and graph analyzes. The objective of this study is to investigate the inter-rater and inter-observation variability of tract-tracing-based documentations of neuronal connections. To demonstrate the variability of neuronal connections, data of 16 publications which describe neuronal connections of subregions of the hypothalamus have been assessed by way of example. RESULTS:A workflow is proposed that allows detecting variability of connectivity at different steps of data processing in connectome metastudies. Variability between three blinded experts was found by comparing the connection information in a sample of 16 publications that describe tract-tracing-based neuronal connections in the hypothalamus. Furthermore, observation scores, matrix visualizations of discrepant connections and weight variations in adjacency matrices are analyzed. AVAILABILITY:The resulting data and software are available at http://neuroviisas.med.uni-rostock.de/neuroviisas.shtml.",2019-09-01 +32548237,Improving preclinical to clinical translation in Alzheimer's disease research.,"

Introduction

Preclinical testing in animal models is a critical component of the drug discovery and development process. While hundreds of interventions have demonstrated preclinical efficacy for ameliorating cognitive impairments in animal models, none have confirmed efficacy in Alzheimer's disease (AD) clinical trials. Critically this lack of translation to the clinic points in part to issues with the animal models, the preclinical assays used, and lack of scientific rigor and reproducibility during execution. In an effort to improve this translation, the Preclinical Testing Core (PTC) of the Model Organism Development and Evaluation for Late-onset AD (MODEL-AD) consortium has established a rigorous screening strategy with go/no-go decision points that permits unbiased assessments of therapeutic agents.

Methods

An initial screen evaluates drug stability, formulation, and pharmacokinetics (PK) to confirm appreciable brain exposure in the disease model at the pathologically relevant ages, followed by pharmacodynamics (PD) and predictive PK/PD modeling to inform the dose regimen for long-term studies. The secondary screen evaluates target engagement and disease modifying activity using non-invasive positron emission tomography/magnetic resonance imaging (PET/MRI). Provided the compound meets its ""go"" criteria for these endpoints, evaluation for efficacy on behavioral endpoints are conducted.

Results

Validation of this pipeline using tool compounds revealed the importance of critical quality control (QC) steps that researchers need to be aware of when executing preclinical studies. These include confirmation of the active pharmaceutical ingredient and at the precise concentration expected; and an experimental design that is well powered and in line with the Animal Research Reporting of In vivo Experiments (ARRIVE) guidelines.

Discussion

Taken together our experience executing a rigorous screening strategy with QC checkpoints provides insight to the challenges of conducting translational studies in animal models. The PTC pipeline is a National Institute on Aging (NIA)-supported resource accessible to the research community for investigators to nominate compounds for testing (https://stopadportal.synapse.org/), and these resources will ultimately enable better translational studies to be conducted.",2020-06-14 +32163115,The Glycine Receptor Allosteric Ligands Library (GRALL).,"MOTIVATION:Glycine receptors (GlyRs) mediate fast inhibitory neurotransmission in the brain and have been recognized as key pharmacological targets for pain. A large number of chemically diverse compounds that are able to modulate GlyR function both positively and negatively have been reported, which provides useful information for the development of pharmacological strategies and models for the allosteric modulation of these ion channels. RESULTS:Based on existing literature, we have collected 218 unique chemical entities with documented modulatory activities at homomeric GlyR-α1 and -α3 and built a database named GRALL. This collection includes agonists, antagonists, positive and negative allosteric modulators and a number of experimentally inactive compounds. Most importantly, for a large fraction of them a structural annotation based on their putative binding site on the receptor is provided. This type of annotation, which is currently missing in other drug banks, along with the availability of cooperativity factors from radioligand displacement experiments are expected to improve the predictivity of in silico methodologies for allosteric drug discovery and boost the development of conformation-based pharmacological approaches. AVAILABILITY AND IMPLEMENTATION:The GRALL library is distributed as a web-accessible database at the following link: https://ifm.chimie.unistra.fr/grall. For each molecular entry, it provides information on the chemical structure, the ligand-binding site, the direction of modulation, the potency, the 3D molecular structure and quantum-mechanical charges as determined by our in-house pipeline. CONTACT:mcecchini@unistra.fr. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-06-01 +33707775,Plasma metabolites to profile pathways in noncommunicable disease multimorbidity.,"Multimorbidity, the simultaneous presence of multiple chronic conditions, is an increasing global health problem and research into its determinants is of high priority. We used baseline untargeted plasma metabolomics profiling covering >1,000 metabolites as a comprehensive readout of human physiology to characterize pathways associated with and across 27 incident noncommunicable diseases (NCDs) assessed using electronic health record hospitalization and cancer registry data from over 11,000 participants (219,415 person years). We identified 420 metabolites shared between at least 2 NCDs, representing 65.5% of all 640 significant metabolite-disease associations. We integrated baseline data on over 50 diverse clinical risk factors and characteristics to identify actionable shared pathways represented by those metabolites. Our study highlights liver and kidney function, lipid and glucose metabolism, low-grade inflammation, surrogates of gut microbial diversity and specific health-related behaviors as antecedents of common NCD multimorbidity with potential for early prevention. We integrated results into an open-access webserver ( https://omicscience.org/apps/mwasdisease/ ) to facilitate future research and meta-analyses.",2021-03-11 +34170194,Long-term cerebrovascular dysfunction in the offspring from maternal electronic cigarette use during pregnancy.,"Electronic cigarettes (E-cigs) have been promoted as harm-free or less risky than smoking, even for women during pregnancy. These claims are made largely on E-cig aerosol having fewer number of toxic chemicals compared with cigarette smoke. Given that even low levels of smoking are found to produce adverse birth outcomes, we sought to test the hypothesis that vaping during pregnancy (with or without nicotine) would not be harm-free and would result in vascular dysfunction that would be evident in offspring during adolescent and/or adult life. Pregnant female Sprague Dawley rats were exposed to E-cig aerosol (1 h/day, 5 days/wk, starting on gestational day 2 until pups were weaned) using e-liquid with 0 mg/mL (E-cig0) or 18 mg/mL nicotine (E-cig18) and compared with ambient air-exposed controls. Body mass at birth and at weaning were not different between groups. Assessment of middle cerebral artery (MCA) reactivity revealed a 51%-56% reduction in endothelial-dependent dilation response to acetylcholine (ACh) for both E-cig0 and E-cig18 in 1-mo, 3-mo (adolescent), and 7-mo-old (adult) offspring (P < 0.05 compared with air, all time points). MCA responses to sodium nitroprusside (SNP) and myogenic tone were not different across groups, suggesting that endothelial-independent responses were not altered. The MCA vasoconstrictor response (5-hydroxytryptamine, 5-HT) was also not different across treatment and age groups. These data demonstrate that maternal vaping during pregnancy is not harm-free and confers significant cerebrovascular health risk/dysfunction to offspring that persists into adult life. NEW & NOTEWORTHY These data established that vaping electronic cigarettes during pregnancy, with or without nicotine, is not safe and confers significant risk potential to the cerebrovascular health of offspring in early and adult life. A key finding is that vaping without nicotine does not protect offspring from cerebrovascular dysfunction and results in the same level of cerebrovascular dysfunction (compared with maternal vaping with nicotine), indicating that the physical and/or chemical properties from the base solution (other than nicotine) are responsible for the cerebrovascular dysfunction that we observed. Listen to this article's corresponding podcast at https://ajpheart.podbean.com/e/maternal-vaping-impairs-vascular-function-in-theoffspring/.",2021-06-25 +33969254,TOMATOMET: A metabolome database consists of 7118 accurate mass values detected in mature fruits of 25 tomato cultivars.,"The total number of low-molecular-weight compounds in the plant kingdom, most of which are secondary metabolites, is hypothesized to be over one million, although only a limited number of plant compounds have been characterized. Untargeted analysis, especially using mass spectrometry (MS), has been useful for understanding the plant metabolome; however, due to the limited availability of authentic compounds for MS-based identification, the identities of most of the ion peaks detected by MS remain unknown. Accurate mass values of peaks obtained by high accuracy mass measurement and, if available, MS/MS fragmentation patterns provide abundant annotation for each peak. Here, we carried out an untargeted analysis of compounds in the mature fruit of 25 tomato cultivars using liquid chromatography-Orbitrap MS for accurate mass measurement, followed by manual curation to construct the metabolome database TOMATOMET (http://metabolites.in/tomato-fruits/). The database contains 7,118 peaks with accurate mass values, in which 1,577 ion peaks are annotated as members of a chemical group. Remarkably, 71% of the mass values are not found in the accurate masses detected previously in Arabidopsis thaliana, Medicago truncatula or Jatropha curcas, indicating significant chemical diversity among plant species that remains to be solved. Interestingly, substantial chemical diversity exists also among tomato cultivars, indicating that chemical profiling from distinct cultivars contributes towards understanding the metabolome, even in a single organ of a species, and can prioritize some desirable metabolic targets for further applications such as breeding.",2021-04-29 +34238385,OUTBREAK: a user-friendly georeferencing online tool for disease surveillance.,"The current COVID-19 pandemic has already claimed more than 3.7 million victims and it will cause more deaths in the coming months. Tools that track the number and locations of cases are critical for surveillance and help in making policy decisions for controlling the outbreak. However, the current surveillance web-based dashboards run on proprietary platforms, which are often expensive and require specific computational knowledge. We developed a user-friendly web tool, named OUTBREAK, that facilitates epidemic surveillance by showing in an animated graph the timeline and geolocations of cases of an outbreak. It permits even non-specialist users to input data most conveniently and track outbreaks in real-time. We applied our tool to visualize the SARS 2003, MERS, and COVID19 epidemics, and provided them as examples on the website. Through the zoom feature, it is also possible to visualize cases at city and even neighborhood levels. We made the tool freely available at https://outbreak.sysbio.tools/ . OUTBREAK has the potential to guide and help health authorities to intervene and minimize the effects of outbreaks.",2021-07-08 +35023998,HTTP-level e-commerce data based on server access logs for an online store.,"Web server logs have been extensively used as a source of data on the characteristics of Web traffic and users' navigational patterns. In particular, Web bot detection and online purchase prediction using methods from artificial intelligence (AI) are currently key areas of research. However, in reality, it is hard to obtain logs from actual online stores and there is no common dataset that can be used across different studies. Moreover, there is a lack of studies exploring Web traffic over a longer period of time, due to the unavailability of long-term data from server logs. The need to develop reliable models of Web traffic, Web user navigation, and e-customer behaviour calls for an up-to-date, large-volume e-commerce dataset on Web traffic. Similarly, AI problems require a sufficient amount of solid, real-life data to train and validate new models and methods. Thus, to meet a demand of a publicly available long-term e-commerce dataset, we collected access log data describing the operation of an online store over a six-month period. Using a program written in the C# language, data were aggregated, transformed, and anonymized. As a result, we release this EClog dataset in CSV format, which covers 183 days of HTTP-level e-commerce traffic. The data will be beneficial for research in many areas, including computer science, data science, management, and sociology.",2020-10-07 +31706268,Genome-wide prediction and prioritization of human aging genes by data fusion: a machine learning approach.,"BACKGROUND:Machine learning can effectively nominate novel genes for various research purposes in the laboratory. On a genome-wide scale, we implemented multiple databases and algorithms to predict and prioritize the human aging genes (PPHAGE). RESULTS:We fused data from 11 databases, and used Naïve Bayes classifier and positive unlabeled learning (PUL) methods, NB, Spy, and Rocchio-SVM, to rank human genes in respect with their implication in aging. The PUL methods enabled us to identify a list of negative (non-aging) genes to use alongside the seed (known age-related) genes in the ranking process. Comparison of the PUL algorithms revealed that none of the methods for identifying a negative sample were advantageous over other methods, and their simultaneous use in a form of fusion was critical for obtaining optimal results (PPHAGE is publicly available at https://cbb.ut.ac.ir/pphage). CONCLUSION:We predict and prioritize over 3,000 candidate age-related genes in human, based on significant ranking scores. The identified candidate genes are associated with pathways, ontologies, and diseases that are linked to aging, such as cancer and diabetes. Our data offer a platform for future experimental research on the genetic and biological aspects of aging. Additionally, we demonstrate that fusion of PUL methods and data sources can be successfully used for aging and disease candidate gene prioritization.",2019-11-09 +31874614,Predicting drug-target interactions from drug structure and protein sequence using novel convolutional neural networks.,"

Background

Accurate identification of potential interactions between drugs and protein targets is a critical step to accelerate drug discovery. Despite many relative experimental researches have been done in the past decades, detecting drug-target interactions (DTIs) remains to be extremely resource-intensive and time-consuming. Therefore, many computational approaches have been developed for predicting drug-target associations on a large scale.

Results

In this paper, we proposed an deep learning-based method to predict DTIs only using the information of drug structures and protein sequences. The final results showed that our method can achieve good performance with the accuracies up to 92.0%, 90.0%, 92.0% and 90.7% for the target families of enzymes, ion channels, GPCRs and nuclear receptors of our created dataset, respectively. Another dataset derived from DrugBank was used to further assess the generalization of the model, which yielded an accuracy of 0.9015 and an AUC value of 0.9557.

Conclusion

It was elucidated that our model shows improved performance in comparison with other state-of-the-art computational methods on the common benchmark datasets. Experimental results demonstrated that our model successfully extracted more nuanced yet useful features, and therefore can be used as a practical tool to discover new drugs.

Availability

http://deeplearner.ahu.edu.cn/web/CnnDTI.htm.",2019-12-24 +32976074,First Report of grapevine red globe virus in grapevines in Washington State. ,"Grapevine red globe virus (GRGV; genus Maculavirus, family Tymoviridae) has been reported in grapevines (Vitis spp.) from Italy, Greece, France, China, Spain and Germany and in California, U.S.A. (Sabanadzovic et al. 2000; Cretazzo et al. 2017; Fan et al. 2016; Ruiz-Garcia et al., 2018). During surveys of grapevine nurseries, a total of 241 composite samples, each consisting of four petioles from mature leaves/vine from five asymptomatic grapevines, from 33 grapevine (Vitis vinifera) cultivars were collected. Total RNA isolated from these samples using Spectrum Total RNA isolation kit (Sigma-Aldrich, St. Louis, MO) was subjected to high-throughput sequencing (HTS) on an Illumina HiSeq2500 or Novaseq 6000 platforms in paired-end mode (Genomics Core Facility, Huntsman Cancer Institute, Utah University, Salt Lake City, UT). After trimming raw reads based on quality and ambiguity, the paired-end quality reads of approximately 120 (HiSeq) or 145 (Novaseq) base pair (bp) length were assembled de novo into a pool of contigs (CLC Genomics workbench 12). These contigs were subjected to BLASTn analysis against the nonredundant virus database from GenBank (http://www.ncbi.nlm.nih.gov/blast). A total of 49 contig sequences, ranging from 200 to 1645 bp in length with an average coverage ranging up to 418.7, aligning with GRGV genome were detected in cvs. Aglianico, Cabernet franc, Pinot gris and Riesling. BLASTn analysis of contigs greater than 500 bp length showed sequence identity between 88.5% and 95% with corresponding GRGV sequences reported from other countries. These results indicated the presence of genetically distinct isolates of GRGV. HTS data also revealed coinfection of GRGV in all samples with one or more of the following virus and/or viroids: grapevine rupestris stem pitting associated virus, grapevine rupestris vein feathering virus, hop stunt viroid or grapevine yellow speckle viroid-1. To further confirm infection by GRGV, total RNA was extracted from two asymptomatic Pinot gris vines previously tested positive in HTS using Spectrum Total RNA isolation kit and subjected to reverse transcription-PCR using primers specific to the replicase polyprotein gene of the virus (RG4847F: 5'-TGGTCTGTTGTTCGCATCTT-3' and RG6076R: 5' CGGAAGGGGAAGCATTGATCT-3', Cretazzo et al., 2017). Sequence analysis of the approximately1,250 bp amplicons (accession number MT749359) showed 91.2 % nt sequence identity with corresponding sequence of GRGV isolate from Brazil (KX828704.1). To our knowledge, this is the first report of GRGV in Washington State. Together with the report of the occurrence of GRGV in California (Sabanadzovic et al. 2000), these/span> results indicate wide geographical distribution of the virus. Although GRGV can cause asymptomatic infections in grapevines (Martelli et al. 2002), the economic importance of GRGV as single or coinfections with other viruses needs to be examined to assess the potential significance of the virus to grape production and grapevine certification programs.",2020-09-25 +34486947,ATP2B1 genotypes rs2070759 and rs2681472 polymorphisms and risk of hypertension in Saudi population.,"This study examined an association of ATP2B1 gene polymorphism and hypertension in the Saudi population. The 246 hypertensive cases and 300 healthy human controls were genotyped. The results showed that genotypes rs.207075 (CA + AA) [p = 0.05; OR: 95% CI, 1.5:(1.0 to 2.4) and p = 0.001, OR: 95% CI, 2.4: (1.5 to 4.0) and rs2681472 (CT + TT) [p = 0.05; OR: 95% CI, 1.5 (1.0 to 2.4) and p = 0.006 OR: 95% CI, 2.0 (1.2 to 3.1) respectively] associated with the risk of hypertension. Cases carrying the recessive models: [(CA + AA)/(CT + TT)] and [(AA)/(TT)] genotypes confer a strong susceptibility risk of hypertension [p = 0.002; OR: (95%CI) 1.8 (1.2 to 2.6) and p = 0.001; OR: (95%CI) 2.6 (1.5 to 4.7) respectively]. However, cases with body-mass-index (BMI)<25, carrying homozygous mutant genotypes [AA, rs2070759, p = 0.007; OR: (95%CI) 2.75(1.37 to 5.5) and (TT, rs2681472, p = 0.05; OR: (95%CI) 1.96 (1.03 to 3.72)] as well as A allele of rs2070759 [p = 0.006; OR: (95%CI) 1.62 (1.16 to 2.25)] and T allele of rs2681472, p = 0.04, 1.43(1.03 to 1.98)] showed a significant association with high risk of hypertension. In short, a significant association between ATP2B1 gene polymorphism and risk of hypertension was noticed. In addition, individuals carrying recessive genotypes have greater risk in developing hypertension than those carrying dominant genotypes. Moreover, cases with high-risk BMI associated with ATP2B1 variants may play a critical role in developing hypertension.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1973034 .",2021-09-06 +28605765,Workflow and web application for annotating NCBI BioProject transcriptome data. ,"The volume of transcriptome data is growing exponentially due to rapid improvement of experimental technologies. In response, large central resources such as those of the National Center for Biotechnology Information (NCBI) are continually adapting their computational infrastructure to accommodate this large influx of data. New and specialized databases, such as Transcriptome Shotgun Assembly Sequence Database (TSA) and Sequence Read Archive (SRA), have been created to aid the development and expansion of centralized repositories. Although the central resource databases are under continual development, they do not include automatic pipelines to increase annotation of newly deposited data. Therefore, third-party applications are required to achieve that aim. Here, we present an automatic workflow and web application for the annotation of transcriptome data. The workflow creates secondary data such as sequencing reads and BLAST alignments, which are available through the web application. They are based on freely available bioinformatics tools and scripts developed in-house. The interactive web application provides a search engine and several browser utilities. Graphical views of transcript alignments are available through SeqViewer, an embedded tool developed by NCBI for viewing biological sequence data. The web application is tightly integrated with other NCBI web applications and tools to extend the functionality of data processing and interconnectivity. We present a case study for the species Physalis peruviana with data generated from BioProject ID 67621. URL: http://www.ncbi.nlm.nih.gov/projects/physalis/.",2017-01-01 +33112191,Assessing United States County-Level Exposure for Research on Tropical Cyclones and Human Health.,"

Background

Tropical cyclone epidemiology can be advanced through exposure assessment methods that are comprehensive and consistent across space and time, as these facilitate multiyear, multistorm studies. Further, an understanding of patterns in and between exposure metrics that are based on specific hazards of the storm can help in designing tropical cyclone epidemiological research.

Objectives

a) Provide an open-source data set for tropical cyclone exposure assessment for epidemiological research; and b) investigate patterns and agreement between county-level assessments of tropical cyclone exposure based on different storm hazards.

Methods

We created an open-source data set with data at the county level on exposure to four tropical cyclone hazards: peak sustained wind, rainfall, flooding, and tornadoes. The data cover all eastern U.S. counties for all land-falling or near-land Atlantic basin storms, covering 1996-2011 for all metrics and up to 1988-2018 for specific metrics. We validated measurements against other data sources and investigated patterns and agreement among binary exposure classifications based on these metrics, as well as compared them to use of distance from the storm's track, which has been used as a proxy for exposure in some epidemiological studies.

Results

Our open-source data set was typically consistent with data from other sources, and we present and discuss areas of disagreement and other caveats. Over the study period and area, tropical cyclones typically brought different hazards to different counties. Therefore, when comparing exposure assessment between different hazard-specific metrics, agreement was usually low, as it also was when comparing exposure assessment based on a distance-based proxy measurement and any of the hazard-specific metrics.

Discussion

Our results provide a multihazard data set that can be leveraged for epidemiological research on tropical cyclones, as well as insights that can inform the design and analysis for tropical cyclone epidemiological research. https://doi.org/10.1289/EHP6976.",2020-10-28 +31599923,BiomeNet: a database for construction and analysis of functional interaction networks for any species with a sequenced genome.,"

Motivation

Owing to advanced DNA sequencing and genome assembly technology, the number of species with sequenced genomes is rapidly increasing. The aim of the recently launched Earth BioGenome Project is to sequence genomes of all eukaryotic species on Earth over the next 10 years, making it feasible to obtain genomic blueprints of the majority of animal and plant species by this time. Genetic models of the sequenced species will later be subject to functional annotation, and a comprehensive molecular network should facilitate functional analysis of individual genes and pathways. However, network databases are lagging behind genome sequencing projects as even the largest network database provides gene networks for less than 10% of sequenced eukaryotic genomes, and the knowledge gap between genomes and interactomes continues to widen.

Results

We present BiomeNet, a database of 95 scored networks comprising over 8 million co-functional links, which can build and analyze gene networks for any species with the sequenced genome. BiomeNet transfers functional interactions between orthologous proteins from source networks to the target species within minutes and automatically constructs gene networks with the quality comparable to that of existing networks. BiomeNet enables assembly of the first-in-species gene networks not available through other databases, which are highly predictive of diverse biological processes and can also provide network analysis by extracting subnetworks for individual biological processes and network-based gene prioritizations. These data indicate that BiomeNet could enhance the benefits of decoding the genomes of various species, thus improving our understanding of the Earth' biodiversity.

Availability and implementation

The BiomeNet is freely available at http://kobic.re.kr/biomenet/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +33957557,Disentangle domain features for cross-modality cardiac image segmentation.,"Unsupervised domain adaptation (UDA) generally learns a mapping to align the distribution of the source domain and target domain. The learned mapping can boost the performance of the model on the target data, of which the labels are unavailable for model training. Previous UDA methods mainly focus on domain-invariant features (DIFs) without considering the domain-specific features (DSFs), which could be used as complementary information to constrain the model. In this work, we propose a new UDA framework for cross-modality image segmentation. The framework first disentangles each domain into the DIFs and DSFs. To enhance the representation of DIFs, self-attention modules are used in the encoder which allows attention-driven, long-range dependency modeling for image generation tasks. Furthermore, a zero loss is minimized to enforce the information of target (source) DSFs, contained in the source (target) images, to be as close to zero as possible. These features are then iteratively decoded and encoded twice to maintain the consistency of the anatomical structure. To improve the quality of the generated images and segmentation results, several discriminators are introduced for adversarial learning. Finally, with the source data and their DIFs, we train a segmentation network, which can be applicable to target images. We validated the proposed framework for cross-modality cardiac segmentation using two public datasets, and the results showed our method delivered promising performance and compared favorably to state-of-the-art approaches in terms of segmentation accuracies. The source code of this work will be released via https://zmiclab.github.io/projects.html, once this manuscript is accepted for publication.",2021-04-16 +33259462,The Use of Time to Pregnancy for Estimating and Monitoring Human Fecundity From Demographic and Health Surveys.,"

Background

Available studies on the prevalence of infertility have proved to have certain limitations, with a scarcity of population-based studies and inconsistent reporting from surveys in countries at all income levels. We wanted to test the applicability of the current duration approach to data from the important Demographic and Health Surveys (DHS) program, funded by USAID since its inception in 1985, https://dhsprogram.com/.

Methods

The current duration approach assumes that there is a well-defined time of initiation of attempts to get pregnant and defines the current duration of a still ongoing pregnancy attempt as the time interval from initiation to interview. The DHS interviews do not have an explicit question about initiation. We focused on nullipari and substituted date of ""establishment of relationship with current partner"" for initiation. Our study used the current duration approach on 15 datasets from DHS during 2002-2016 in eight different countries from sub-Saharan Africa, Asia, and Latin America.

Results

Well-established statistical techniques for current duration data yielded results that for some countries postulated surprisingly long median times to pregnancy and surprisingly high estimates of infertility prevalence. Further study of the data structures revealed serious deviations from expected patterns, in contrast to our earlier experience from surveys in France and the United States where participants were asked explicitly about time of initiation of attempts to become pregnant.

Conclusions

Using cohabitation as a proxy for the initiation of attempts to get pregnant is too crude. Using the current duration approach with DHS data will require more explicit questions during the DHS interviews about initiation of pregnancy attempt.",2021-01-01 +31274860,"Executive Summary: Nurses Specialized in Wound, Ostomy and Continence Canada (NSWOCC) Nursing Best Practice Recommendations: Enterocutaneous Fistula and Enteroatmospheric Fistula.","Enterocutaneous fistulas (ECF) and enteroatmospheric fistulas (EAF) pose significant quality-of-life concerns for patients, and management challenges for the interprofessional healthcare team. In 2009, the Canadian Association for Enterostomal The developed best practice recommendations for the management of ECF. Over time, evidence and practice evolve, and the Nurses Specialized in Wound, Ostomy and Continence Canada performed a comprehensive review of the literature and revised the practice recommendation document. The revised recommendations provide evidence-based management guidance for ECF and EAF in the adult population whether in acute care, community/home care, or long-term/residential settings, and are specifically created for nurses. The revised recommendations include organizational support, assessment, nutrition, pharmaceutical management, education, and local fistula management. This article serves as an executive summary for this clinical resource; the full guideline is available at http://nswoc.ca/ecf-best-practices/.",2019-07-01 +32818252,The optimal discovery procedure for significance analysis of general gene expression studies.,"

Motivation

Analysis of biological data often involves the simultaneous testing of thousands of genes. This requires two key steps: the ranking of genes and the selection of important genes based on a significance threshold. One such testing procedure, called the optimal discovery procedure (ODP), leverages information across different tests to provide an optimal ranking of genes. This approach can lead to substantial improvements in statistical power compared to other methods. However, current applications of the ODP have only been established for simple study designs using microarray technology. Here, we extend this work to the analysis of complex study designs and RNA-sequencing studies.

Results

We apply our extended framework to a static RNA-sequencing study, a longitudinal study, an independent sampling time-series study,and an independent sampling dose-response study. Our method shows improved performance compared to other testing procedures, finding more differentially expressed genes and increasing power for enrichment analysis. Thus, the extended ODP enables a favorable significance analysis of genome-wide gene expression studies.

Availability and implementation

The algorithm is implemented in our freely available R package called edge and can be downloaded at https://www.bioconductor.org/packages/release/bioc/html/edge.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +32777814,BSAseq: an interactive and integrated web-based workflow for identification of causal mutations in bulked F2 populations.,"

Summary

With the advance of next-generation sequencing technologies and reductions in the costs of these techniques, bulked segregant analysis (BSA) has become not only a powerful tool for mapping quantitative trait loci but also a useful way to identify causal gene mutations underlying phenotypes of interest. However, due to the presence of background mutations and errors in sequencing, genotyping, and reference assembly, it is often difficult to distinguish true causal mutations from background mutations. In this study, we developed the BSAseq workflow, which includes an automated bioinformatics analysis pipeline with a probabilistic model for estimating the linked region (the region linked to the causal mutation) and an interactive Shiny web application for visualizing the results. We deeply sequenced a sorghum male-sterile parental line (ms8) to capture the majority of background mutations in our bulked F2 data. We applied the workflow to 11 bulked sorghum F2 populations and 1 rice F2 population and identified the true causal mutation in each population. The workflow is intuitive and straightforward, facilitating its adoption by users without bioinformatics analysis skills. We anticipate that the BSAseq workflow will be broadly applicable to the identification of causal mutations for many phenotypes of interest.

Availability and implementation

BSAseq is freely available on https://www.sciapps.org/page/bsa.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +34616218,Ferns at the digital herbarium of the Central Siberian Botanical Garden SB RAS.,"

Background

According to the data in Index Herbariorum as of 1 December 2020, there are 3426 active herbaria in the world, containing 396,204,891 specimens and 124 herbaria in Russia with more than 16,175,000 specimens. The Central Siberian Botanical Garden of the Siberian Branch of the Russian Academy of Sciences (CSBG SB RAS, Novosibirsk), founded in 1946, historically has two herbarium collections (NS and NSK). Currently these collections contain about 800,000 herbarium specimens comprising vascular plants, mosses, lichens and fungi gathered from all over the world. Digitisation of the NSK type specimens of vascular plants began in 2014 by using the special scanner Herbscan. In 2018, we started digitisation of the NS and NSK collections by using ObjectScan 1600.Pteridophytes (ferns, lycophytes and their extinct free-sporing relatives) are a diverse group of plants that today comprises approximately 12,900 species and plays a major role in terrestrial ecosystems. All herbarium specimens of ferns, collected over 170 years between 1851 and 2021 and stored in the NS and NSK collections, were digitised in 2021, placed at the CSBG SB RAS digital Herbarium (http://herb.csbg.nsc.ru:8081) and published through GBIF. Twenty families of Polypodiopsida, but not Equisetaceae, were included in this dataset. Family Ophioglossaceae was digitised and published in GBIF as a separate dataset.

New information

By August 2021, more than 62,600 specimens with good quality images and fully-captured label transcriptions had been placed at CSBG SB RAS Digital Herbarium. A total of 7,758 records of fern occurrences of 363 taxa in the world with 92% geolocations including 5100 records from Russia with 98.7% geolocations that are new for GBIF.org in 2021 were entered. In the dataset specimens from 43 countries of Europe, Asia, America, Africa and Australia (Oceania), 89% of them from Russia, are presented.",2021-09-17 +32401507,Estimating the Effect of Single-Point Mutations on Protein Thermodynamic Stability and Analyzing the Mutation Landscape of the p53 Protein.,"Nonsynonymous single-nucleotide polymorphisms often result in altered protein stability while playing crucial roles both in the evolution process and in the development of human diseases. Prediction of change in the thermodynamic stability due to such missense mutations will help in protein engineering endeavors and will contribute to a better understanding of different disease conditions. Here, we develop a machine-learning-based framework, viz., ProTSPoM, to estimate the change in protein thermodynamic stability arising out of single-point mutations (SPMs). ProTSPoM outperforms existing methods on the S2648 and S1925 databases and reports a Pearson correlation coefficient of 0.82 (0.88) and a root-mean-squared-error of 0.92 (1.06) kcal/mol between the predicted and experimental ΔΔG values on the long-established S350 (tumor suppressor p53 protein) data set. Further, we estimate the change in thermodynamic stability for all possible SPMs in the DNA binding domain of the p53 protein. We identify single-nucleotide polymorphisms in p53 which are plausibly detrimental to its structural integrity and interaction affinity with the DNA molecule. ProTSPoM with its reliable estimates and time-efficient prediction is well suited to be integrated with existing protein engineering techniques. The ProTSPoM web server is accessible at http://cosmos.iitkgp.ac.in/ProTSPoM/.",2020-05-21 +34387910,Molecular basis of mucopolysaccharidosis IVA (Morquio A syndrome): A review and classification of GALNS gene variants and reporting of 68 novel variants.,"Mucopolysaccharidosis IVA (MPS IVA, Morquio A syndrome) is a rare autosomal recessive lysosomal storage disorder caused by mutations in the N-acetylgalactosamine-6-sulfatase (GALNS) gene. We collected, analyzed, and uniformly summarized all published GALNS gene variants, thus updating the previous mutation review (published in 2014). In addition, new variants were communicated by seven reference laboratories in Europe, the Middle East, Latin America, Asia, and the United States. All data were analyzed to determine common alleles, geographic distribution, level of homozygosity, and genotype-phenotype correlation. Moreover, variants were classified according to their pathogenicity as suggested by ACMG. Including those previously published, we assembled 446 unique variants, among which 68 were novel, from 1190 subjects (including newborn screening positive subjects). Variants' distribution was missense (65.0%), followed by nonsense (8.1%), splicing (7.2%), small frameshift deletions(del)/insertions(ins) (7.0%), intronic (4.0%), and large del/ins and complex rearrangements (3.8%). Half (50.4%) of the subjects were homozygous, 37.1% were compound heterozygous, and 10.7% had only one variant detected. The novel variants underwent in silico analysis to evaluate their pathogenicity. All variants were submitted to ClinVar (https://www.ncbi.nlm.nih.gov/clinvar/) to make them publicly available. Mutation updates are essential for the correct molecular diagnoses, genetic counseling, prenatal and preimplantation diagnosis, and disease management.",2021-08-23 +33605125,[Influence of present weather conditions on the appeal of Bishkek and Osh residents for emergency care because of cardiovascular diseases].,"Purpose of the study. To determine the influence of air temperature, atmospheric pressure, precipitation, fast weather changes on the number of emergency calls to patients with heart disease in the cities of Bishkek and Osh.

Materials and methods

The data of calls to the ambulance station of the cities of Bishkek and Osh for a 20-year period (1998-2018) were analyzed by classes of diseases in accordance with the ICD 10th revision: A00-R99 - from all causes; I00-I99 - diseases of the circulatory system. More than 450 thousand ambulance calls were analyzed. To assess the meteorological indicators, the archived data of the site https://www.gismeteo.ru/diary/5327 were used. Correlation analysis was performed using the SPSS program.Results and conclusion. The data obtained indicate the presence of moderate and strong statistically significant relationships in the number of patients with cardiovascular diseases seeking medical care with periods of prolonged heat, days with a fast weather change, days with increased atmospheric pressure after the invasion of the cold air front and precipitation. The necessity of seasonal prophylaxis of meteopathic reactions taking into account weather changes is shown.",2021-01-01 +33297937,So you think you can PLS-DA?,"

Background

Partial Least-Squares Discriminant Analysis (PLS-DA) is a popular machine learning tool that is gaining increasing attention as a useful feature selector and classifier. In an effort to understand its strengths and weaknesses, we performed a series of experiments with synthetic data and compared its performance to its close relative from which it was initially invented, namely Principal Component Analysis (PCA).

Results

We demonstrate that even though PCA ignores the information regarding the class labels of the samples, this unsupervised tool can be remarkably effective as a feature selector. In some cases, it outperforms PLS-DA, which is made aware of the class labels in its input. Our experiments range from looking at the signal-to-noise ratio in the feature selection task, to considering many practical distributions and models encountered when analyzing bioinformatics and clinical data. Other methods were also evaluated. Finally, we analyzed an interesting data set from 396 vaginal microbiome samples where the ground truth for the feature selection was available. All the 3D figures shown in this paper as well as the supplementary ones can be viewed interactively at http://biorg.cs.fiu.edu/plsda CONCLUSIONS: Our results highlighted the strengths and weaknesses of PLS-DA in comparison with PCA for different underlying data models.",2020-12-09 +34528868,Philips IntelliSpace Cognition digital test battery: Equivalence and measurement invariance compared to traditional analog test versions.,"Objective: To collect evidence of validity for a selection of digital tests on the Philips IntelliSpace Cognition (ISC) platform.Method: A total of 200 healthy participants (age 50-80) completed both the ISC battery and an analog version of the battery during separate visits. The battery included the following screeners and cognitive tests: Mini-Mental State Examination (2nd edition), Clock Drawing Test, Trail-Making Test (TMT), Rey Auditory Verbal Learning Test (RAVLT), Rey-Osterrieth Complex Figure Test (ROCFT), Letter Fluency, Star Cancellation Test, and Digit Span Test. The ISC tests were administered on an iPad Pro and were automatically scored using designated algorithms. The analog tests were administered in line with existing guidelines and scored by trained neuropsychologists. Criterion validity was established through relative agreement coefficients and raw score equivalence tests. In addition,measurement invariance analysis was used to compare the factor structures of both versions. Finally, we explored effects of demographics and experience with digital devices on performance.Results: We found fair to excellent relative agreement between test versions. Absolute equivalence was found for RAVLT, Letter Fluency, Star Cancellation Test, and Digit Span Test. Importantly, we demonstrated equal loadings of the digital and analog test versions on the same set of underlying cognitive domains. Demographic effects were mostly comparable between modalities, and people's experience with digital devices was found to only influence performance on TMT B.Conclusions: This study provides several sources of evidence for the validity of the ISC test battery, offering an important step in validating ISC for clinical use.Supplemental data for this article is available online at https://doi.org/10.1080/13854046.2021.1974565.",2021-09-16 +,OneTwoTree: An online tool for phylogeny reconstruction,"Phylogeny reconstruction is a key instrument in numerous biological analyses, ranging from evolutionary and ecology research, to conservation and systems biology. The increasing accumulation of genomic data makes it possible to reconstruct phylogenies with both high accuracy and at increasingly finer resolution. Yet, taking advantage of the enormous amount of sequence data available requires the use of computational tools for efficient data retrieval and processing, or else the process could quickly become an error‐prone endeavour. Here, we present OneTwoTree (http://onetwotree.tau.ac.il/), a Web‐based tool for tree reconstruction based on the supermatrix paradigm. Given a list of taxa names of interest as the sole input requirement, OneTwoTree retrieves all available sequence data from NCBI GenBank, clusters these into orthology groups, identifies the most informative set of markers, searches for an appropriate outgroup, and assembles a partitioned sequence matrix that is then used for the final phylogeny reconstruction step. OneTwoTree further allows users to control various steps of the process, such as the merging of sequences from similar clusters, or phylogeny reconstruction based on markers from a specific genome type. By comparing the performance of OneTwoTree to a manually reconstructed phylogeny of the Antirrhineae tribe, we show that the use of OneTwoTree resulted in substantially higher data coverage in terms of both taxon sampling and the number of informative markers assembled. OneTwoTree provides a flexible online tool for species‐tree reconstruction, aimed to assist researchers ranging in their level of prior expertise in the task of phylogeny reconstruction.",2018-11-01 +33002005,"RNAthor - fast, accurate normalization, visualization and statistical analysis of RNA probing data resolved by capillary electrophoresis.","RNAs adopt specific structures to perform their functions, which are critical to fundamental cellular processes. For decades, these structures have been determined and modeled with strong support from computational methods. Still, the accuracy of the latter ones depends on the availability of experimental data, for example, chemical probing information that can define pseudo-energy constraints for RNA folding algorithms. At the same time, diverse computational tools have been developed to facilitate analysis and visualization of data from RNA structure probing experiments followed by capillary electrophoresis or next-generation sequencing. RNAthor, a new software tool for the fully automated normalization of SHAPE and DMS probing data resolved by capillary electrophoresis, has recently joined this collection. RNAthor automatically identifies unreliable probing data. It normalizes the reactivity information to a uniform scale and uses it in the RNA secondary structure prediction. Our web server also provides tools for fast and easy RNA probing data visualization and statistical analysis that facilitates the comparison of multiple data sets. RNAthor is freely available at http://rnathor.cs.put.poznan.pl/.",2020-10-01 +27651464,LNCediting: a database for functional effects of RNA editing in lncRNAs.,"RNA editing is a widespread post-transcriptional mechanism that can make a single base change on specific nucleotide sequence in an RNA transcript. RNA editing events can result in missense codon changes and modulation of alternative splicing in mRNA, and modification of regulatory RNAs and their binding sites in noncoding RNAs. Recent computational studies accurately detected more than 2 million A-to-I RNA editing sites from next-generation sequencing (NGS). However, the vast majority of these RNA editing sites have unknown functions and are in noncoding regions of the genome. To provide a useful resource for the functional effects of RNA editing in long noncoding RNAs (lncRNAs), we systematically analyzed the A-to-I editing sites in lncRNAs across human, rhesus, mouse, and fly, and observed an appreciable number of RNA editing sites which can significantly impact the secondary structures of lncRNAs and lncRNA-miRNA interactions. All the data were compiled into LNCediting, a user-friendly database (http://bioinfo.life.hust.edu.cn/LNCediting/). LNCediting provides customized tools to predict functional effects of novel editing sites in lncRNAs. We hope that it will become an important resource for exploring functions of RNA editing sites in lncRNAs.",2016-09-19 +34450382,Computer-aided prediction of inhibitors against STAT3 for managing COVID-19 associated cytokine storm.,"

Background

Proinflammatory cytokines are correlated with the severity of disease in patients with COVID-19. IL6-mediated activation of STAT3 proliferates proinflammatory responses that lead to cytokine storm promotion. Thus, STAT3 inhibitors may play a crucial role in managing the COVID-19 pathogenesis. The present study discusses a method for predicting inhibitors against the STAT3 signaling pathway.

Method

The main dataset comprises 1565 STAT3 inhibitors and 1671 non-inhibitors used for training, testing, and evaluation of models. A number of machine learning classifiers have been implemented to develop the models.

Results

The outcomes of the data analysis show that rings and aromatic groups are significantly abundant in STAT3 inhibitors compared to non-inhibitors. First, we developed models using 2-D and 3-D chemical descriptors and achieved a maximum AUC of 0.84 and 0.73, respectively. Second, fingerprints are used to build predictive models and achieved 0.86 AUC with an accuracy of 78.70% on the validation dataset. Finally, models were developed using hybrid descriptors, which achieved a maximum of 0.87 AUC with 78.55% accuracy on the validation dataset.

Conclusion

We used the best model to identify STAT3 inhibitors in FDA-approved drugs and found few drugs (e.g., Tamoxifen and Perindopril) to manage the cytokine storm in COVID-19 patients. A webserver ""STAT3In"" (https://webs.iiitd.edu.in/raghava/stat3in/) has been developed to predict and design STAT3 inhibitors.",2021-08-21 +34042834,Motivating Developers to Use Interoperable Standards for Data in Pandemic Health Apps.,"The COVID-19 pandemic has brought along a massive increase in app development. However, most of these apps are not using interoperable data. The COMPASS project of the German COVID-19 Research Network of University Medicine (""Netzwerk Universitätsmedizin (NUM)"") tackles this issue, by offering open-source technology, best practice catalogues, and suggestions for designing interoperable pandemic health applications (https://www.netzwerk-universitaetsmedizin.de/projekte/compass). Therefore, COMPASS conceived a framework that includes automated conformity checks as well as reference implementations for more efficient and pandemic-tailored app developments. It further aims to motivate and support developers to use interoperable standards.",2021-05-01 +29931156,RBPMetaDB: a comprehensive annotation of mouse RNA-Seq datasets with perturbations of RNA-binding proteins. ,"RNA-binding proteins (RBPs) may play a critical role in gene regulation in various diseases or biological processes by controlling post-transcriptional events such as polyadenylation, splicing and mRNA stabilization via binding activities to RNA molecules. Owing to the importance of RBPs in gene regulation, a great number of studies have been conducted, resulting in a large amount of RNA-Seq datasets. However, these datasets usually do not have structured organization of metadata, which limits their potentially wide use. To bridge this gap, the metadata of a comprehensive set of publicly available mouse RNA-Seq datasets with perturbed RBPs were collected and integrated into a database called RBPMetaDB. This database contains 292 mouse RNA-Seq datasets for a comprehensive list of 187 RBPs. These RBPs account for only ∼10% of all known RBPs annotated in Gene Ontology, indicating that most are still unexplored using high-throughput sequencing. This negative information provides a great pool of candidate RBPs for biologists to conduct future experimental studies. In addition, we found that DNA-binding activities are significantly enriched among RBPs in RBPMetaDB, suggesting that prior studies of these DNA- and RNA-binding factors focus more on DNA-binding activities instead of RNA-binding activities. This result reveals the opportunity to efficiently reuse these data for investigation of the roles of their RNA-binding activities. A web application has also been implemented to enable easy access and wide use of RBPMetaDB. It is expected that RBPMetaDB will be a great resource for improving understanding of the biological roles of RBPs.Database URL: http://rbpmetadb.yubiolab.org.",2018-01-01 +34418982,LncRNA MANCR positively affects the malignant progression of lung adenocarcinoma.,"

Background

LncRNA MANCR (mitosis-related lncRNA, LINC00704) is deemed as a pivotal regulator in various cancers, yet the biological function it performs in lung adenocarcinoma (LUAD) was rarely reported. We made an in-depth study to clarify its effect during the progression of this cancer.

Methods

Expression data and clinical information were first accessed from TCGA LUAD dataset ( https://portal.gdc.cancer.gov/repository ). Differentially expressed lncRNAs were identified. R package ""survival"" determined the survival significance of the lncRNA MANCR. GSEA software was applied to conduct single sample enrichment analysis. qRT-PCR was used to examine MANCR expression. The expression levels of related proteins were tested using Western blot assay. The impact of MANCR on cancer cell biological behaviors was investigated via cell function experiments.

Results

MANCR was significantly upregulated in LUAD cells. It also resulted in a poor prognosis. When MANCR expression was down-regulated, the expression of proteins related to invasion and migration, cell cycle and proliferation was decreased, while the expression of proteins associated with apoptosis was elevated. Furthermore, in vitro experiments revealed that silencing MANCR inhibited cancer cell functions, blocked cell cycle progression while promoting cell apoptosis.

Conclusion

LncRNA MANCR can lead to enhanced proliferative, invasive and migratory abilities of cancer cells while reducing cell apoptosis. Hence, MANCR might be a novel biomarker of LUAD.",2021-08-21 +34402371,Evaluation of fMRI activation in hemiparetic stroke patients after rehabilitation with low-frequency repetitive transcranial magnetic stimulation and intensive occupational therapy.,"Purpose: To evaluate activity changes associated with the intervention of low-frequency repetitive transcranial magnetic stimulation (rTMS) and intensive occupational therapy (OT) after stroke using functional magnetic resonance (fMRI).Methods: Seventy stroke patients were scanned while performing finger tapping tasks twice, before and 12 days after the intervention. Recovery of motor functions assessed using Fugl-Meyer Assessment (FMA) and Wolf Motor Function Test-Functional Ability Scale (WMFT-FAS) for upper extremity at each time point. An fMRI analysis was performed, and a region of interest (ROI) analysis was conducted using percentage signal changes (% SC) to determine the magnitude of activation.Results: FMA and WMFT-FAS were significantly increased from pre-intervention to post-intervention. Intervention related activations were seen in the ipsilesional premotor cortex (PMC) and primary motor cortex (M1), thalamo-cortico regions with the paretic hand movements. With the unaffected hand movements, significant clusters in the contralesional primary somatosensory cortex (S1), superior parietal cortex, and bilateral cerebellum were observed. The ROI-based analysis revealed that ipsilesional M1, contralesional PMC, and supplementary motor area (SMA) showed significantly higher results with the paretic hand movements, a trend toward a significant decrease in the contralesional S1 with the unaffected hand movements from the pre-intervention to post-intervention.Conclusions: Our findings suggest that gains in motor functions produced by the intervention of rTMS and intensive OT in hemiparesis stroke patients may be associated with the ipsilesional hemisphere and contralesional hemisphere as well. Identifying rTMS and OT intervention based on cortical patterns may help to implement rTMS in motor rehabilitation after stroke.Supplementary data for this article is available online at https://doi.org/10.1080/00207454.2021.1968858 .",2021-09-15 +33288787,Meta-analysis of viscosity of aqueous deep eutectic solvents and their components.,"Deep eutectic solvents (DES) formed by quaternary ammonium salts and hydrogen bond donors are a promising green alternative to organic solvents. Their high viscosity at ambient temperatures can limit biocatalytic applications and therefore requires fine-tuning by adjusting water content and temperature. Here, we performed a meta-analysis of the impact of water content and temperature on the viscosities of four deep eutectic solvents (glyceline, reline, N,N-diethylethanol ammonium chloride-glycerol, N,N-diethylethanol ammonium chloride-ethylene glycol), their components (choline chloride, urea, glycerol, ethylene glycol), methanol, and pure water. We analyzed the viscosity data by an automated workflow, using Arrhenius and Vogel-Fulcher-Tammann-Hesse models. The consistency and completeness of experimental data and metadata was used as an essential criterion of data quality. We found that viscosities were reported for different temperature ranges, half the time without specifying a method of desiccation, and in almost half of the reports without specifying experimental errors. We found that the viscosity of the pure components varied widely, but that all aqueous mixtures (except for reline) have similar excess activation energy of viscous flow [Formula: see text]= 3-5 kJ/mol, whereas reline had a negative excess activation energy ([Formula: see text]= - 19 kJ/mol). The data and workflows used are accessible at  https://doi.org/10.15490/FAIRDOMHUB.1.STUDY.767.1 .",2020-12-07 +35002013,Development of High Performance Computing Tools for Estimation of High-Resolution Surface Energy Balance Products Using sUAS Information. ,"sUAS (small-Unmanned Aircraft System) and advanced surface energy balance models allow detailed assessment and monitoring (at plant scale) of different (agricultural, urban, and natural) environments. Significant progress has been made in the understanding and modeling of atmosphere-plant-soil interactions and numerical quantification of the internal processes at plant scale. Similarly, progress has been made in ground truth information comparison and validation models. An example of this progress is the application of sUAS information using the Two-Source Surface Energy Balance (TSEB) model in commercial vineyards by the Grape Remote sensing Atmospheric Profile and Evapotranspiration eXperiment - GRAPEX Project in California. With advances in frequent sUAS data collection for larger areas, sUAS information processing becomes computationally expensive on local computers. Additionally, fragmentation of different models and tools necessary to process the data and validate the results is a limiting factor. For example, in the referred GRAPEX project, commercial software (ArcGIS and MS Excel) and Python and Matlab code are needed to complete the analysis. There is a need to assess and integrate research conducted with sUAS and surface energy balance models in a sharing platform to be easily migrated to high performance computing (HPC) resources. This research, sponsored by the National Science Foundation FAIR Cyber Training Fellowships, is integrating disparate software and code under a unified language (Python). The Python code for estimating the surface energy fluxes using TSEB2T model as well as the EC footprint analysis code for ground truth information comparison were hosted in myGeoHub site https://mygeohub.org/ to be reproducible and replicable.",2021-04-12 +26974042,A benchmark for comparison of dental radiography analysis algorithms.,"Dental radiography plays an important role in clinical diagnosis, treatment and surgery. In recent years, efforts have been made on developing computerized dental X-ray image analysis systems for clinical usages. A novel framework for objective evaluation of automatic dental radiography analysis algorithms has been established under the auspices of the IEEE International Symposium on Biomedical Imaging 2015 Bitewing Radiography Caries Detection Challenge and Cephalometric X-ray Image Analysis Challenge. In this article, we present the datasets, methods and results of the challenge and lay down the principles for future uses of this benchmark. The main contributions of the challenge include the creation of the dental anatomy data repository of bitewing radiographs, the creation of the anatomical abnormality classification data repository of cephalometric radiographs, and the definition of objective quantitative evaluation for comparison and ranking of the algorithms. With this benchmark, seven automatic methods for analysing cephalometric X-ray image and two automatic methods for detecting bitewing radiography caries have been compared, and detailed quantitative evaluation results are presented in this paper. Based on the quantitative evaluation results, we believe automatic dental radiography analysis is still a challenging and unsolved problem. The datasets and the evaluation software will be made available to the research community, further encouraging future developments in this field. (http://www-o.ntust.edu.tw/~cweiwang/ISBI2015/).",2016-02-28 +33242243,Multiscale Cross-Domain Thermochemical Knowledge-Graph.,"In this paper, we develop a set of software agents which improve a knowledge-graph containing thermodynamic data of chemical species by means of quantum chemical calculations and error-canceling balanced reactions. The knowledge-graph represents species-associated information by making use of the principles of linked data, as employed in the Semantic Web, where concepts correspond to vertices and relationships between the concepts correspond to edges of the graph. We implement this representation by means of ontologies, which formalize the definition of concepts and their relationships, as a critical step to achieve interoperability between heterogeneous data formats and software. The agents, which conduct quantum chemical calculations and derive the estimates of standard enthalpies of formation, update the knowledge-graph with newly obtained results, improving data values, and adding nodes and connections between them. A key distinguishing feature of our approach is that it extends an existing, general-purpose knowledge-graph, called J-Park Simulator (http://theworldavatar.com), and its ecosystem of autonomous agents, thus enabling seamless cross-domain applications in wider contexts. To this end, we demonstrate how quantum calculations can directly affect the atmospheric dispersion of pollutants in an industrial emission use-case.",2020-11-26 +33730865,Development of a Gestational and Lactational Physiologically Based Pharmacokinetic (PBPK) Model for Perfluorooctane Sulfonate (PFOS) in Rats and Humans and Its Implications in the Derivation of Health-Based Toxicity Values.,"

Background

There is a great concern on potential adverse effects of exposure to perfluorooctane sulfonate (PFOS) in sensitive subpopulations, such as pregnant women, fetuses, and neonates, due to its reported transplacental and lactational transfer and reproductive and developmental toxicities in animals and humans.

Objectives

This study aimed to develop a gestational and lactational physiologically based pharmacokinetic (PBPK) model in rats and humans for PFOS to aid risk assessment in sensitive human subpopulations.

Methods

Based upon existing PBPK models for PFOS, the present model addressed a data gap of including a physiologically based description of basolateral and apical membrane transporter-mediated renal reabsorption and excretion in kidneys during gestation and lactation. The model was calibrated with published rat toxicokinetic and human biomonitoring data and was independently evaluated with separate data. Monte Carlo simulation was used to address the interindividual variability.

Results

Model simulations were generally within 2-fold of observed PFOS concentrations in maternal/fetal/neonatal plasma and liver in rats and humans. Estimated fifth percentile human equivalent doses (HEDs) based on selected critical toxicity studies in rats following U.S. Environmental Protection Agency (EPA) guidelines ranged from 0.08 to 0.91μg/kg per day. These values are lower than the HEDs estimated in U.S. EPA guidance (0.51-1.6μg/kg per day) using an empirical toxicokinetic model in adults.

Conclusions

The results support the importance of renal reabsorption/excretion during pregnancy and lactation in PFOS dosimetry and suggest that the derivation of health-based toxicity values based on developmental toxicity studies should consider gestational/lactational dosimetry estimated from a life stage-appropriate PBPK model. This study provides a quantitative tool to aid risk reevaluation of PFOS, especially in sensitive human subpopulations, and it provides a basis for extrapolating to other per- and polyfluoroalkyl substances (PFAS). All model codes and detailed tutorials are provided in the Supplemental Materials to allow readers to reproduce our results and to use this model. https://doi.org/10.1289/EHP7671.",2021-03-17 +33997360,PCPD: Plant cytochrome P450 database and web-based tools for structural construction and ligand docking.,"Plant cytochrome P450s play key roles in the diversification and functional modification of plant natural products. Although over 200,000 plant P450 gene sequences have been recorded, only seven crystalized P450 genes severely hampered the functional characterization, gene mining and engineering of important P450s. Here, we combined Rosetta homologous modeling and MD-based refinement to construct a high-resolution P450 structure prediction process (PCPCM), which was applied to 181 plant P450s with identified functions. Furthermore, we constructed a ligand docking process (PCPLD) that can be applied for plant P450s virtual screening. 10 examples of virtual screening indicated the process can reduce about 80% screening space for next experimental verification. Finally, we constructed a plant P450 database (PCPD: http://p450.biodesign.ac.cn/), which includes the sequences, structures and functions of the 181 plant P450s, and a web service based on PCPCM and PCPLD. Our study not only developed methods for the P450-specific structure analysis, but also introduced a universal approach that can assist the mining and functional analysis of P450 enzymes.",2021-04-24 +31960022,Building a pipeline to solicit expert knowledge from the community to aid gene summary curation. ,"Brief summaries describing the function of each gene's product(s) are of great value to the research community, especially when interpreting genome-wide studies that reveal changes to hundreds of genes. However, manually writing such summaries, even for a single species, is a daunting task; for example, the Drosophila melanogaster genome contains almost 14 000 protein-coding genes. One solution is to use computational methods to generate summaries, but this often fails to capture the key functions or express them eloquently. Here, we describe how we solicited help from the research community to generate manually written summaries of D. melanogaster gene function. Based on the data within the FlyBase database, we developed a computational pipeline to identify researchers who have worked extensively on each gene. We e-mailed these researchers to ask them to draft a brief summary of the main function(s) of the gene's product, which we edited for consistency to produce a 'gene snapshot'. This approach yielded 1800 gene snapshot submissions within a 3-month period. We discuss the general utility of this strategy for other databases that capture data from the research literature. Database URL: https://flybase.org/.",2020-01-01 +31777312,12 Components of a Strong Vision Health System of Care: Part 3-Standardized Approach for Rescreening.,"Strong school-based vision and eye health systems include 12 key components to be implemented before, during, and after the actual vision screening event. The National Center for Children's Vision and Eye Health (NCCVEH) at Prevent Blindness partnered with the National Association of School Nurses (NASN) to provide guidance for school nurses for each of the 12 key components via a Vision and Eye Health webpage on the NASN website ( https://www.nasn.org/nasn-resources/practice-topics/vision-health ). This online resource is designed to support school nurses accountable for vision screening and maintaining the eye health of preschool- and school-age children. This NCCVEH/NASN webpage addresses key activities that provide overall support for a child's vision and eye health-beginning with parent/caregiver education and ending with an annual evaluation of the school's vision and eye health system. NASN School Nurse is publishing information about each of these 12 components. The May 2019 installment provided details about the 12 Components approach as a whole and Components 1 and 2: Family Education and a Comprehensive Communication/Approval Process. The July 2019 edition described Components 3 and 4: Vision Screening Tools and Procedures and Vision Health for Children With Special Health Care Needs. This article describes Component 5: Standardized Approach for Rescreening.",2019-11-28 +33489005,FIREcaller: Detecting frequently interacting regions from Hi-C data.,"Hi-C experiments have been widely adopted to study chromatin spatial organization, which plays an essential role in genome function. We have recently identified frequently interacting regions (FIREs) and found that they are closely associated with cell-type-specific gene regulation. However, computational tools for detecting FIREs from Hi-C data are still lacking. In this work, we present FIREcaller, a stand-alone, user-friendly R package for detecting FIREs from Hi-C data. FIREcaller takes raw Hi-C contact matrices as input, performs within-sample and cross-sample normalization, and outputs continuous FIRE scores, dichotomous FIREs, and super-FIREs. Applying FIREcaller to Hi-C data from various human tissues, we demonstrate that FIREs and super-FIREs identified, in a tissue-specific manner, are closely related to gene regulation, are enriched for enhancer-promoter (E-P) interactions, tend to overlap with regions exhibiting epigenomic signatures of cis-regulatory roles, and aid the interpretation or GWAS variants. The FIREcaller package is implemented in R and freely available at https://yunliweb.its.unc.edu/FIREcaller.",2020-12-29 +31501868,Diurnal.plant.tools: Comparative Transcriptomic and Co-expression Analyses of Diurnal Gene Expression of the Archaeplastida Kingdom.,"Almost all organisms coordinate some aspects of their biology through the diurnal cycle. Photosynthetic organisms, and plants especially, have established complex programs that coordinate physiological, metabolic and developmental processes with the changing light. The diurnal regulation of the underlying transcriptional processes is observed when groups of functionally related genes (gene modules) are expressed at a specific time of the day. However, studying the diurnal regulation of these gene modules in the plant kingdom was hampered by the large amount of data required for the analyses. To meet this need, we used gene expression data from 17 diurnal studies spanning the whole Archaeplastida kingdom (Plantae kingdom in the broad sense) to make an online diurnal database. We have equipped the database with tools that allow user-friendly cross-species comparisons of gene expression profiles, entire co-expression networks, co-expressed clusters (involved in specific biological processes), time-specific gene expression and others. We exemplify how these tools can be used by studying three important biological questions: (i) the evolution of cell division, (ii) the diurnal control of gene modules in algae and (iii) the conservation of diurnally controlled modules across species. The database is freely available at http://diurnal.plant.tools.",2020-01-01 +31491771,Data for the dosimetry of low- and medium-energy kV x rays.,"Following the publication of the ICRU Report 90 (2016) on key data for measurement standards in radiation dosimetry, where ionometric air-kerma standards for kilovoltage (kV) x-ray beams are estimated to change by up to about 0.5%, an update of the backscatter factors and water/air ratios of mass energy-absorption coefficients in kV dosimetry protocols was deemed necessary for consistency through the entire dosimetry chain. In addition, numerical methods and Monte Carlo (MC) systems that did not exist at the time when air-kerma protocols were developed, are currently available. Calculations of the chamber-independent quantities required for the dosimetry of low- and medium-energy kV x rays were carried out using a consistent set of key data throughout the complete process. The quantities were based on MC calculations of a database for a dense grid of monoenergetic photons for different beam diameters and source-to-surface distances, followed by an averaging procedure to compute water/air energy-absorption coefficient ratios and backscatter factors for 342 experimental and calculated kV spectra. It was found that for a given HVL and field size the variation of backscatter factors for different kVs can be up to about 5%, a trend confirmed with independent calculations that shows the limitation of using only the HVL for the beam quality specification of kV x rays. Extensive tables as a function of beam quality in terms of kV and HVL were developed for configurations that might be encountered in clinical practice; the data are also available in the form of a GUI web app at http://52.233.195.208. Results were compared with data used at PTB for deriving low-energy [Formula: see text] ion chamber calibration coefficients, finding agreement within about [Formula: see text]0.5%, and with independent full MC kerma calculations that agreed within better than about 1%. Compared with the data in the AAPM TG-61 protocol (Ma et al 2001 Med. Phys. 28 868-93) there was in general good agreement for the ratios of mass energy-absorption coefficients, although differences of up to 1.5% resulted when both kV and HVL were taken into account; more significant discrepancies, within about 2%-6%, were obtained for backscatter factors, the present values being generally higher.",2019-10-21 +33840048,Rank-preserving biclustering algorithm: a case study on miRNA breast cancer.,"Effective biomarkers aid in the early diagnosis and monitoring of breast cancer and thus play an important role in the treatment of patients suffering from the disease. Growing evidence indicates that alteration of expression levels of miRNA is one of the principal causes of cancer. We analyze breast cancer miRNA data to discover a list of biclusters as well as breast cancer miRNA biomarkers which can help to understand better this critical disease and take important clinical decisions for treatment and diagnosis. In this paper, we propose a pattern-based parallel biclustering algorithm termed Rank-Preserving Biclustering (RPBic). The key strategy is to identify rank-preserved rows under a subset of columns based on a modified version of all substrings common subsequence (ALCS) framework. To illustrate the effectiveness of the RPBic algorithm, we consider synthetic datasets and show that RPBic outperforms relevant biclustering algorithms in terms of relevance and recovery. For breast cancer data, we identify 68 biclusters and establish that they have strong clinical characteristics among the samples. The differentially co-expressed miRNAs are found to be involved in KEGG cancer related pathways. Moreover, we identify frequency-based biomarkers (hsa-miR-410, hsa-miR-483-5p) and network-based biomarkers (hsa-miR-454, hsa-miR-137) which we validate to have strong connectivity with breast cancer. The source code and the datasets used can be found at http://agnigarh.tezu.ernet.in/~rosy8/Bioinformatics_RPBic_Data.rar . Graphical Abstract.",2021-04-11 +26519400,KEGG Bioinformatics Resource for Plant Genomics and Metabolomics.,"In the era of high-throughput biology it is necessary to develop not only elaborate computational methods but also well-curated databases that can be used as reference for data interpretation. KEGG ( http://www.kegg.jp/ ) is such a reference knowledge base with two specific aims. One is to compile knowledge on high-level functions of the cell and the organism in terms of the molecular interaction and reaction networks, which is implemented in KEGG pathway maps, BRITE functional hierarchies, and KEGG modules. The other is to expand knowledge on genes and proteins involved in the molecular networks from experimentally observed organisms to other organisms using the concept of orthologs, which is implemented in the KEGG Orthology (KO) system. Thus, KEGG is a generic resource applicable to all organisms and enables interpretation of high-level functions from genomic and molecular data. Here we first present a brief overview of the entire KEGG resource, and then give an introduction of how to use KEGG in plant genomics and metabolomics research.",2016-01-01 +30208340,CCGD-ESCC: A Comprehensive Database for Genetic Variants Associated with Esophageal Squamous Cell Carcinoma in Chinese Population.,"Esophageal squamous-cell carcinoma (ESCC) is one of the most lethal malignancies in the world and occurs at particularly higher frequency in China. While several genome-wide association studies (GWAS) of germline variants and whole-genome or whole-exome sequencing studies of somatic mutations in ESCC have been published, there is no comprehensive database publically available for this cancer. Here, we developed the Chinese Cancer Genomic Database-Esophageal Squamous Cell Carcinoma (CCGD-ESCC) database, which contains the associations of 69,593 single nucleotide polymorphisms (SNPs) with ESCC risk in 2022 cases and 2039 controls, survival time of 1006 ESCC patients (survival GWAS) and gene expression (expression quantitative trait loci, eQTL) in 94 ESCC patients. Moreover, this database also provides the associations between 8833 somatic mutations and survival time in 675 ESCC patients. Our user-friendly database is a resource useful for biologists and oncologists not only in identifying the associations of genetic variants or somatic mutations with the development and progression of ESCC but also in studying the underlying mechanisms for tumorigenesis of the cancer. CCGD-ESCC is freely accessible at http://db.cbi.pku.edu.cn/ccgd/ESCCdb.",2018-08-01 +34281453,The time to diagnosis and survival in children with solid tumors and lymphoma: results from a single center in Turkey.,"The longer diagnostic intervals in low- and middle-income countries have been proposed among the possible causes of poorer outcomes in children with cancer. In this single-center study from Turkey, the diagnostic intervals and survival status of 138 children with solid tumors and lymphoma (excluding leukemia) were prospectively evaluated. The median total interval (from the beginning of the first cancer-related symptom to the first day of the cancer-specific therapy), the median patient interval (the time interval from the notification of the first cancer-related symptom to the first admission to a healthcare facility), and the median physician interval (the time interval between the first healthcare admission to the first pediatric oncology visit) were 65, 26, and 24 days, respectively. The estimated 5-year overall survival and event-free survival rates were 80.7% and 69.1%, respectively. The longer time intervals were correlated with age, paternal education, localization, and tumor type. Interestingly, none of the time parameters were found to be associated with survival on regression analysis. In conclusion, the diagnostic delay in children with cancer is multifactorial, and the patient- and disease-related factors are as important as the time intervals on survival.Supplemental data for this article is available online at https://doi.org/10.1080/08880018.2021.1951903.",2021-07-20 +31807141,SEQdata-BEACON: a comprehensive database of sequencing performance and statistical tools for performance evaluation and yield simulation in BGISEQ-500.,"

Background

The sequencing platform BGISEQ-500 is based on DNBSEQ technology and provides high throughput with low costs. This sequencer has been widely used in various areas of scientific and clinical research. A better understanding of the sequencing process and performance of this system is essential for stabilizing the sequencing process, accurately interpreting sequencing results and efficiently solving sequencing problems. To address these concerns, a comprehensive database, SEQdata-BEACON, was constructed to accumulate the run performance data in BGISEQ-500.

Results

A total of 60 BGISEQ-500 instruments in the BGI-Wuhan lab were used to collect sequencing performance data. Lanes in paired-end 100 (PE100) sequencing using 10 bp barcode were chosen, and each lane was assigned a unique entry number as its identification number (ID). From November 2018 to April 2019, 2236 entries were recorded in the database containing 65 metrics about sample, yield, quality, machine state and supplies information. Using a correlation matrix, 52 numerical metrics were clustered into three groups signifying yield-quality, machine state and sequencing calibration. The distributions of the metrics also delivered information about patterns and rendered clues for further explanation or analysis of the sequencing process. Using the data of a total of 200 cycles, a linear regression model well simulated the final outputs. Moreover, the predicted final yield could be provided in the 15th cycle of the early stage of sequencing, and the corresponding R2 of the 200th and 15th cycle models were 0.97 and 0.81, respectively. The model was run with the test sets obtained from May 2019 to predict the yield, which resulted in an R2 of 0.96. These results indicate that our simulation model was reliable and effective.

Conclusions

Data sources, statistical findings and application tools provide a constantly updated reference for BGISEQ-500 users to comprehensively understand DNBSEQ technology, solve sequencing problems and optimize run performance. These resources are available on our website http://seqBEACON.genomics.cn:443/home.html.",2019-11-15 +33633365,ArchR is a scalable software package for integrative single-cell chromatin accessibility analysis.,"The advent of single-cell chromatin accessibility profiling has accelerated the ability to map gene regulatory landscapes but has outpaced the development of scalable software to rapidly extract biological meaning from these data. Here we present a software suite for single-cell analysis of regulatory chromatin in R (ArchR; https://www.archrproject.com/ ) that enables fast and comprehensive analysis of single-cell chromatin accessibility data. ArchR provides an intuitive, user-focused interface for complex single-cell analyses, including doublet removal, single-cell clustering and cell type identification, unified peak set generation, cellular trajectory identification, DNA element-to-gene linkage, transcription factor footprinting, mRNA expression level prediction from chromatin accessibility and multi-omic integration with single-cell RNA sequencing (scRNA-seq). Enabling the analysis of over 1.2 million single cells within 8 h on a standard Unix laptop, ArchR is a comprehensive software suite for end-to-end analysis of single-cell chromatin accessibility that will accelerate the understanding of gene regulation at the resolution of individual cells.",2021-02-25 +34411491,Expanding Availability of Speech-Generating Device Evaluation and Treatment to People With Amyotrophic Lateral Sclerosis (pALS) Through Telepractice: Perspectives of pALS and Communication Partners.,"Purpose To examine the experiences of people with ALS (pALS) and their communication partners (cALS) regarding receiving speech-generating device (SGD) evaluation and treatment via telepractice. Method Eight pALS along with a primary cALS participated in telepractice SGD evaluation and treatment with an augmentative and alternative communication (AAC) specialist and representatives from multiple SGD vendors. Participants were interviewed postevaluation and post-SGD training to examine their experiences. Mixed methods data were collected through Likert scale responses and qualitative interviews. Results Telepractice SGD evaluation and training were feasible and resulted in all pALS receiving SGDs they were able to use to communicate. In both Likert rating items and qualitative interviews, participants rated the telepractice experience very highly in terms of giving them access to AAC services via an AAC specialist that they would not have otherwise been able to access, and doing so in a format that was possible given their limitations in mobility, endurance, and caregiver availability. Suggestions for improving the telepractice experience were provided. Conclusions Telepractice should be considered as an option to provide vital SGD services to patients who are geographically remote, mobility impaired, unable to leave their home, experience fatigue with travel, or otherwise would not have access to these specialized services. Telepractice allows patients to preserve their time and energy for the assessment and treatment sessions, resulting in perhaps deeper and more frequent engagement in evaluation and training. Telepractice could serve as an alternative to outpatient, in-person evaluations, or be utilized in conjunction with in-person appointments. Supplemental Material https://doi.org/10.23641/asha.15094257.",2021-08-19 +33124659,A molecular map of lung neuroendocrine neoplasms. ,"Lung neuroendocrine neoplasms (LNENs) are rare solid cancers, with most genomic studies including a limited number of samples. Recently, generating the first multi-omic dataset for atypical pulmonary carcinoids and the first methylation dataset for large-cell neuroendocrine carcinomas led us to the discovery of clinically relevant molecular groups, as well as a new entity of pulmonary carcinoids (supra-carcinoids). To promote the integration of LNENs molecular data, we provide here detailed information on data generation and quality control for whole-genome/exome sequencing, RNA sequencing, and EPIC 850K methylation arrays for a total of 84 patients with LNENs. We integrate the transcriptomic data with other previously published data and generate the first comprehensive molecular map of LNENs using the Uniform Manifold Approximation and Projection (UMAP) dimension reduction technique. We show that this map captures the main biological findings of previous studies and can be used as reference to integrate datasets for which RNA sequencing is available. The generated map can be interactively explored and interrogated on the UCSC TumorMap portal (https://tumormap.ucsc.edu/?p=RCG_lungNENomics/LNEN). The data, source code, and compute environments used to generate and evaluate the map as well as the raw data are available, respectively, in a Nextjournal interactive notebook (https://nextjournal.com/rarecancersgenomics/a-molecular-map-of-lung-neuroendocrine-neoplasms/) and at the EMBL-EBI European Genome-phenome Archive and Gene Expression Omnibus data repositories. We provide data and all resources needed to integrate them with future LNENs transcriptomic studies, allowing meaningful conclusions to be drawn that will eventually lead to a better understanding of this rare understudied disease.",2020-10-01 +28402545,ORIO (Online Resource for Integrative Omics): a web-based platform for rapid integration of next generation sequencing data.,"Established and emerging next generation sequencing (NGS)-based technologies allow for genome-wide interrogation of diverse biological processes. However, accessibility of NGS data remains a problem, and few user-friendly resources exist for integrative analysis of NGS data from different sources and experimental techniques. Here, we present Online Resource for Integrative Omics (ORIO; https://orio.niehs.nih.gov/), a web-based resource with an intuitive user interface for rapid analysis and integration of NGS data. To use ORIO, the user specifies NGS data of interest along with a list of genomic coordinates. Genomic coordinates may be biologically relevant features from a variety of sources, such as ChIP-seq peaks for a given protein or transcription start sites from known gene models. ORIO first iteratively finds read coverage values at each genomic feature for each NGS dataset. Data are then integrated using clustering-based approaches, giving hierarchical relationships across NGS datasets and separating individual genomic features into groups. In focusing its analysis on read coverage, ORIO makes limited assumptions about the analyzed data; this allows the tool to be applied across data from a variety of experiments and techniques. Results from analysis are presented in dynamic displays alongside user-controlled statistical tests, supporting rapid statistical validation of observed results. We emphasize the versatility of ORIO through diverse examples, ranging from NGS data quality control to characterization of enhancer regions and integration of gene expression information. Easily accessible on a public web server, we anticipate wide use of ORIO in genome-wide investigations by life scientists.",2017-06-01 +31882993,ShinyGO: a graphical gene-set enrichment tool for animals and plants.,"

Motivation

Gene lists are routinely produced from various omic studies. Enrichment analysis can link these gene lists with underlying molecular pathways and functional categories such as gene ontology (GO) and other databases.

Results

To complement existing tools, we developed ShinyGO based on a large annotation database derived from Ensembl and STRING-db for 59 plant, 256 animal, 115 archeal and 1678 bacterial species. ShinyGO's novel features include graphical visualization of enrichment results and gene characteristics, and application program interface access to KEGG and STRING for the retrieval of pathway diagrams and protein-protein interaction networks. ShinyGO is an intuitive, graphical web application that can help researchers gain actionable insights from gene-sets.

Availability and implementation

http://ge-lab.org/go/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +33346833,Minimally-overlapping words for sequence similarity search. ,"Analysis of genetic sequences is usually based on finding similar parts of sequences, e.g. DNA reads and/or genomes. For big data, this is typically done via ""seeds"": simple similarities (e.g. exact matches) that can be found quickly. For huge data, sparse seeding is useful, where we only consider seeds at a subset of positions in a sequence. Here we study a simple sparse-seeding method: using seeds at positions of certain ""words"" (e.g. ac, at, gc, or gt). Sensitivity is maximized by using words with minimal overlaps. That is because, in a random sequence, minimally-overlapping words are anti-clumped. We provide evidence that this is often superior to acclaimed ""minimizer"" sparse-seeding methods. Our approach can be unified with design of inexact (spaced and subset) seeds, further boosting sensitivity. Thus, we present a promising approach to sequence similarity search, with open questions on how to optimize it. Software to design and test minimally-overlapping words is freely available at https://gitlab.com/mcfrith/noverlap. Supplementary data are available at Bioinformatics online.",2020-12-21 +27679478,Influenza Research Database: An integrated bioinformatics resource for influenza virus research.,"The Influenza Research Database (IRD) is a U.S. National Institute of Allergy and Infectious Diseases (NIAID)-sponsored Bioinformatics Resource Center dedicated to providing bioinformatics support for influenza virus research. IRD facilitates the research and development of vaccines, diagnostics and therapeutics against influenza virus by providing a comprehensive collection of influenza-related data integrated from various sources, a growing suite of analysis and visualization tools for data mining and hypothesis generation, personal workbench spaces for data storage and sharing, and active user community support. Here, we describe the recent improvements in IRD including the use of cloud and high performance computing resources, analysis and visualization of user-provided sequence data with associated metadata, predictions of novel variant proteins, annotations of phenotype-associated sequence markers and their predicted phenotypic effects, hemagglutinin (HA) clade classifications, an automated tool for HA subtype numbering conversion, linkouts to disease event data and the addition of host factor and antiviral drug components. All data and tools are freely available without restriction from the IRD website at https://www.fludb.org.",2016-09-26 +29718411,BUSCA: an integrative web server to predict subcellular localization of proteins.,"Here, we present BUSCA (http://busca.biocomp.unibo.it), a novel web server that integrates different computational tools for predicting protein subcellular localization. BUSCA combines methods for identifying signal and transit peptides (DeepSig and TPpred3), GPI-anchors (PredGPI) and transmembrane domains (ENSEMBLE3.0 and BetAware) with tools for discriminating subcellular localization of both globular and membrane proteins (BaCelLo, MemLoci and SChloro). Outcomes from the different tools are processed and integrated for annotating subcellular localization of both eukaryotic and bacterial protein sequences. We benchmark BUSCA against protein targets derived from recent CAFA experiments and other specific data sets, reporting performance at the state-of-the-art. BUSCA scores better than all other evaluated methods on 2732 targets from CAFA2, with a F1 value equal to 0.49 and among the best methods when predicting targets from CAFA3. We propose BUSCA as an integrated and accurate resource for the annotation of protein subcellular localization.",2018-07-01 +27899660,CEBS: a comprehensive annotated database of toxicological data.,"The Chemical Effects in Biological Systems database (CEBS) is a comprehensive and unique toxicology resource that compiles individual and summary animal data from the National Toxicology Program (NTP) testing program and other depositors into a single electronic repository. CEBS has undergone significant updates in recent years and currently contains over 11 000 test articles (exposure agents) and over 8000 studies including all available NTP carcinogenicity, short-term toxicity and genetic toxicity studies. Study data provided to CEBS are manually curated, accessioned and subject to quality assurance review prior to release to ensure high quality. The CEBS database has two main components: data collection and data delivery. To accommodate the breadth of data produced by NTP, the CEBS data collection component is an integrated relational design that allows the flexibility to capture any type of electronic data (to date). The data delivery component of the database comprises a series of dedicated user interface tables containing pre-processed data that support each component of the user interface. The user interface has been updated to include a series of nine Guided Search tools that allow access to NTP summary and conclusion data and larger non-NTP datasets. The CEBS database can be accessed online at http://www.niehs.nih.gov/research/resources/databases/cebs/.",2016-11-28 +33304957,Thermomechanical and microhardness data of melamine-formaldehyde-based self-healing resin film able to undergo reversible crosslinking via Diels-Alder reaction.,"The data presented in this article characterize the thermomechanical and microhardness properties of a novel melamine-formaldehyde resin (MF) intended for the use as a self-healing surface coating. The investigated MF resin is able to undergo reversible crosslinking via Diels Alder reactive groups. The microhardness data were obtained from nanoindentation measurements performed on solid resin film samples at different stages of the self-healing cycle. Thermomechanical analysis was performed under dynamic load conditions. The data provide supplemental material to the manuscript published by Urdl et al. 2020 (http://doi.org/10.1016/j.eurpolymj.2020.109601, [1]) on the self-healing performance of this resin, where a more thorough discussion on the preparation, the properties of this coating material and its application in impregnated paper-based decorative laminates can be found [1].",2020-11-21 +,Novel genes in response to varying water deficit in oil crop Camelina sativa,"Camelina [Camelina sativa (L.) Crtz.] of the Brassicaceae family is an annual oilseed crop that has received increasing attention as a biofuel feedstock because of its excellent adaptability to low fertility drylands. Even though moisture is one of the critical factors affecting plant growth and seed yield, genetic studies on response to water deficit in camelina have been limited. This study aimed at identifying camelina genes responding to water deficit and validating their expression pattern. Camelina plants were subjected to 1.9 kPa (well-watered), 4.6 kPa (3 days after water stop), 73.2 kPa (17 days after water stop), and 2.0 kPa (rehydrated) soil water regimes. The transcriptome of test and control plants was analyzed using RNA sequences for de novo sequence assembly followed by gene annotation, which was performed against 39 plant species of the Phytozome database (http://www.phytozome.net). Six metabolic novel genes that responded to water deficit in camelina were selected for further analysis: PYRIMIDINE 4, CYP89A9, FASCICLIN-like arabinogalactan, eukaryotic aspartyl protease family protein, photosystem II light harvesting complex gene 2.3, and adenosylmethionine decarboxylase family protein. The expression pattern of these genes obtained by real-time reverse transcription PCR and quantitative reverse transcription PCR analysis were similar to that obtained by transcriptome analysis. The up-regulated genes are known to function in protecting cells against water deficit, whereas the genes with reduced expression were found to be suppressed by lower soil water causing a decrease in their function. The information obtained from the molecular validation of the changes in expression patterns of these six metabolic genes presents insight as to which genes of the camelina may account for actively responding to soil water deficit, which will be target loci for investigating the allelic variations or SNP screening in the camelina germplasm for future breeding program.",2019-04-01 +28203705,Pln24NT: a web resource for plant 24-nt siRNA producing loci.,"

Abstract

In plants, 24 nucleotide small interfering RNAs (24-nt siRNAs) account for a large percentage of the total siRNA pool, and they play an important role in guiding plant-specific RNA-directed DNA methylation (RdDM), which transcriptionally silences transposon elements, transgenes, repetitive sequences and some endogenous genes. Several loci in plant genomes produce clusters of 24-nt RNAs, and these loci are receiving increasing attention from the research community. However, at present there is no bioinformatics resource dedicated to 24-nt siRNA loci and their derived 24-nt siRNAs. Thus, in this study, Pln24NT, a freely available web resource, was created to centralize 24-nt siRNA loci and 24-nt siRNA information, including fundamental locus information, expression profiles and annotation of transposon elements, from next-generation sequencing (NGS) data for 10 popular plant species. An intuitive web interface was also developed for convenient searching and browsing, and analytical tools were included to help users flexibly analyze their own siRNA NGS data. Pln24NT will help the plant research community to discover and characterize 24-nt siRNAs, and may prove useful for studying the roles of siRNA in RNA-directed DNA methylation in plants.

Availability and implementation

http://bioinformatics.caf.ac.cn/Pln24NT .

Contact

suxh@caf.ac.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +32700975,Cytotoxicity Burst? Differentiating Specific from Nonspecific Effects in Tox21 in Vitro Reporter Gene Assays.,"

Background

High-throughput screening of chemicals with in vitro reporter gene assays in Tox21 has produced a large database on cytotoxicity and specific modes of action. However, the validity of some of the reported activities is questionable due to the ""cytotoxicity burst,"" which refers to the supposition that many stress responses are activated in a nonspecific way at concentrations close to cell death.

Objectives

We propose a pragmatic method to identify whether reporter gene activation is specific or cytotoxicity-triggered by comparing the measured effects with baseline toxicity.

Methods

Baseline toxicity, also termed narcosis, is the minimal toxicity any chemical causes. Quantitative structure-activity relationships (QSARs) developed for baseline toxicity in mammalian reporter gene cell lines served as anchors to define the chemical-specific threshold for the cytotoxicity burst and to evaluate the degree of specificity of the reporter gene activation. Measured 10% effect concentrations were related to measured or QSAR-predicted 10% cytotoxicity concentrations yielding specificity ratios (SR). We applied this approach to our own experimental data and to ∼8,000 chemicals that were tested in six of the high-throughput Tox21 reporter gene assays.

Results

Confirmed baseline toxicants activated reporter gene activity around cytotoxic concentrations triggered by the cytotoxicity burst. In six Tox21 assays, 37%-87% of the active hits were presumably caused by the cytotoxicity burst (SR<1) and only 2%-14% were specific with SR≥10 against experimental cytotoxicity but 75%-97% were specific against baseline toxicity. This difference was caused by a large fraction of chemicals showing excess cytotoxicity.

Conclusions

The specificity analysis for measured in vitro effects identified whether a cytotoxicity burst had likely occurred. The SR-analysis not only prevented false positives, but it may also serve as measure for relative effect potency and can be used for quantitative in vitro-in vivo extrapolation and risk assessment of chemicals. https://doi.org/10.1289/EHP6664.",2020-07-23 +32773643,Genomic variance of Open Reading Frames (ORFs) and Spike protein in severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).,"

Background

The outbreak of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has caused severe pneumonia at December 2019. Since then, it has been wildly spread from Wuhan, China, to Asia, European, and United States to become the pandemic worldwide. Now coronavirus disease 2019 were globally diagnosed over 3 084 740 cases with mortality of 212 561 toll. Current reports variants are found in SARS-CoV-2, majoring in functional ribonucleic acid (RNA) to transcribe into structural proteins as transmembrane spike (S) glycoprotein and the nucleocapsid (N) protein holds the virus RNA genome; the envelope (E) and membrane (M) alone with spike protein form viral envelope. The nonstructural RNA genome includes ORF1ab, ORF3, ORF6, 7a, 8, and ORF10 with highly conserved information for genome synthesis and replication in ORF1ab.

Methods

We apply genomic alignment analysis to observe SARS-CoV-2 sequences from GenBank (http://www.ncbi.nim.nih.gov/genebank/): MN 908947 (China, C1); MN985325 (United States: WA, UW); MN996527 (China, C2); MT007544 (Australia: Victoria, A1); MT027064 (United States: CA, UC); MT039890 (South Korea, K1); MT066175 (Taiwan, T1); MT066176 (Taiwan, T2); LC528232 (Japan, J1); and LC528233 (Japan, J2) and Global Initiative on Sharing All Influenza Data database (https://www.gisaid.org). We adopt Multiple Sequence Alignments web from Clustalw (https://www.genome.jp/tools-bin/clustalw) and Geneious web (https://www.geneious.com.

Results

We analyze database by genome alignment search for nonstructural ORFs and structural E, M, N, and S proteins. Mutations in ORF1ab, ORF3, and ORF6 are observed; specific variants in spike region are detected.

Conclusion

We perform genomic analysis and comparative multiple sequence of SARS-CoV-2. Large scaling sequence alignments trace to localize and catch different mutant strains in United possibly to transmit severe deadly threat to humans. Studies about the biological symptom of SARS-CoV-2 in clinic animal and humans will be applied and manipulated to find mechanisms and shield the light for understanding the origin of pandemic crisis.",2020-08-01 +,First Report of Phytophthora cinnamomi Causing Root and Crown Rot of Ficus carica in Turkey,"Turkey is the largest fig (Ficus carica L.) producing country; production consists of more than 50,000 ha and 300 kt of fruit annually. In April 2017, about 20% of 2-year-old fig trees (cv. Bursa Siyahı) exhibited wilting, foliar chlorosis, and rapid tree decline in an experimental orchard of 1,500 trees at the Serince Station of the GAP Agricultural Research Institute, Şanlıurfa Province, southeastern Turkey (36°53ʹ09ʺ N; 38°55ʹ29ʺ E). The initial symptoms evident in affected plants were lack of vigor and foliar chlorosis, which developed into wilting and whole tree decline within 7 to 10 days. By this stage, dark-brown to black necrosis had developed under the bark of the crown, extending from the tap root and other large roots, and feeder roots had decayed. Crown and root samples of 10 symptomatic plants were surface sterilized in 70% ethanol and cultured on Phytophthora-selective PARPH-GACMA medium (Türkölmez et al. 2015). After incubation in the dark at 26°C for 7 to 10 days, hyphal tips of Phytophthora-like colonies growing from the crown and root tissues were transferred to GACMA. These isolates developed coralloid hyphae, with spherical lateral and terminal swellings 34 ± 11 μm in diameter, single or in clusters, and globose, terminal, and thin-walled chlamydospores 32 ± 13 μm in diameter. Sporangia, which were produced in nonsterilized soil extract (10 g/liter), were nonpapillate and ovoid, obpyriform, or ellipsoidal with an inconspicuous apical thickening, 60.1 ± 9.6 × 39.5 ± 3.5 µm and a length-to-width ratio of 1.5. These morphological characteristics are consistent with those of Phytophthora cinnamomi (Erwin and Ribeiro 2005). The rDNA internal transcribed spacer (ITS) and β-tubulin gene of two isolates (PH101 and PH102) were amplified with ITS6/ITS4 and Bt2a/Bt2b primer pairs, respectively, and the amplicons sequenced. The 650- and 662-bp ITS nucleotide sequences (GenBank accessions MH680960 and MH680961) had 99.69 and 100% identity with the sequences of P. cinnamomi isolates PD_01134 and PD_02690, respectively, in the Phytophthora Database (http://www.phytophthoradb.org/blast.php). The sequences MH680962 (434 bp) and MH680963 (1,140 bp) for β-tubulin had 100 and 99.73% identity with P. cinnamomi isolates PD_00138 and PD_00138, respectively, thus confirming the morphological identification. Pathogenicity tests were performed on 1-year-old potted F. carica ‘Bursa Siyahı’ seedlings. A 5-mm-diameter colonized GACMA disk was used for inoculating a wound (5-mm diameter) made in the bark of one of the taproots of each of the five seedlings. Five control plants received sterile GACMA disks. Inoculation points were sealed with sterile moist cotton and Parafilm and were covered with sterile soil. Treated plants were maintained in a growth chamber with a 16-h/8-h (light/dark) photoperiod at 26°C and 55% relative humidity and were watered daily. The experiment was repeated. Within 6 weeks, inoculated plants in both experiments developed extensive root necrosis, root loss, and discoloration and death of leaves similar to those observed in naturally affected trees. Control plants remained healthy. P. cinnamomi was only reisolated from inoculated plants, thus fulfilling Koch’s postulates. In Florida, a similar fig disease caused by P. cinnamomi has been reported (Alfieri et al. 1984). In Turkey, P. cinnamomi has previously been reported on walnut (Kurbetli 2013) and protea (Tok and Avci 2015). This is the first report of P. cinnamomi infection of figs in Turkey. This polyphagous pathogen represents a potential threat to commercial fig plantings as well as to other crops in this region.",2019-04-01 +34038548,Estimage: a webserver hub for the computation of methylation age.,"Methylage is an epigenetic marker of biological age that exploits the correlation between the methylation state of specific CG dinucleotides (CpGs) and chronological age (in years), gestational age (in weeks), cellular age (in cell cycles or as telomere length, in kilobases). Using DNA methylation data, methylage is measurable via the so called epigenetic clocks. Importantly, alterations of the correlation between methylage and age (age acceleration or deceleration) have been stably associated with pathological states and occur long before clinical signs of diseases become overt, making epigenetic clocks a potentially disruptive tool in preventive, diagnostic and also in forensic applications. Nevertheless, methylage dependency from CpGs selection, mathematical modelling, tissue specificity and age range, still makes the potential of this biomarker limited. In order to enhance model comparisons, interchange, availability, robustness and standardization, we organized a selected set of clocks within a hub webservice, EstimAge (Estimate of methylation Age, http://estimage.iac.rm.cnr.it), which intuitively and informatively enables quick identification, computation and comparison of available clocks, with the support of standard statistics.",2021-07-01 +33367483,A non-linear regression method for estimation of gene-environment heritability. ,"Gene-environment (GxE) interactions are one of the least studied aspects of the genetic architecture of human traits and diseases. The environment of an individual is inherently high dimensional, evolves through time and can be expensive and time consuming to measure. The UK Biobank study, with all 500,000 participants having undergone an extensive baseline questionnaire, represents a unique opportunity to assess GxE heritability for many traits and diseases in a well powered setting. We have developed a randomized Haseman-Elston non-linear regression method applicable when many environmental variables have been measured on each individual. The method (GPLEMMA) simultaneously estimates a linear environmental score (ES) and its GxE heritability. We compare the method via simulation to a whole-genome regression approach (LEMMA) for estimating GxE heritability. We show that GPLEMMA is more computationally efficient than LEMMA on large datasets, and produces results highly correlated with those from LEMMA when applied to simulated data and real data from the UK Biobank. Software implementing the GPLEMMA method is available from https://jmarchini.org/gplemma/. Supplementary data are available at Bioinformatics online.",2020-12-26 +33765910,BugSeq: a highly accurate cloud platform for long-read metagenomic analyses.,"

Background

As the use of nanopore sequencing for metagenomic analysis increases, tools capable of performing long-read taxonomic classification (ie. determining the composition of a sample) in a fast and accurate manner are needed. Existing tools were either designed for short-read data (eg. Centrifuge), take days to analyse modern sequencer outputs (eg. MetaMaps) or suffer from suboptimal accuracy (eg. CDKAM). Additionally, all tools require command line expertise and do not scale in the cloud.

Results

We present BugSeq, a novel, highly accurate metagenomic classifier for nanopore reads. We evaluate BugSeq on simulated data, mock microbial communities and real clinical samples. On the ZymoBIOMICS Even and Log communities, BugSeq (F1 = 0.95 at species level) offers better read classification than MetaMaps (F1 = 0.89-0.94) in a fraction of the time. BugSeq significantly improves on the accuracy of Centrifuge (F1 = 0.79-0.93) and CDKAM (F1 = 0.91-0.94) while offering competitive run times. When applied to 41 samples from patients with lower respiratory tract infections, BugSeq produces greater concordance with microbiological culture and qPCR compared with ""What's In My Pot"" analysis.

Conclusion

BugSeq is deployed to the cloud for easy and scalable long-read metagenomic analyses. BugSeq is freely available for non-commercial use at https://bugseq.com/free .",2021-03-25 +34242538,Loneliness and psychological distress in everyday life among Latinx college students.,"

Objective

Changes in surroundings and social relationships may heighten feelings of loneliness, suggesting the need to measure as a state. This study tested whether loneliness fluctuates within and across days and the resultant associations with psychological distress. Further it tested familism as a moderator as endorsing this cultural value may buffer the negative effects of state loneliness.

Participants

Participants (n = 220) were Latinx undergraduate students.

Methods

Students reported their loneliness levels and psychological distress twice a day for two weeks using an ecological momentary assessment approach.

Results

Results showed that experiencing a higher than usual level of loneliness predicted greater sadness, stress, and anxiety at both the moment-to-moment and day-to-day level. Familism, measured at baseline, only moderated the relationship between loneliness and sadness.

Conclusions

The findings suggest being in a lonely moment may lead to the initiation or amplification of psychological distress immediately and the effects may linger over the day.Supplemental data for this article can be accessed online at https://doi.org/10.1080/07448481.2021.1927051.",2021-07-09 +34140592,Multivariable mortality risk prediction using machine learning for COVID-19 patients at admission (AICOVID).,"In Coronavirus disease 2019 (COVID-19), early identification of patients with a high risk of mortality can significantly improve triage, bed allocation, timely management, and possibly, outcome. The study objective is to develop and validate individualized mortality risk scores based on the anonymized clinical and laboratory data at admission and determine the probability of Deaths at 7 and 28 days. Data of 1393 admitted patients (Expired-8.54%) was collected from six Apollo Hospital centers (from April to July 2020) using a standardized template and electronic medical records. 63 Clinical and Laboratory parameters were studied based on the patient's initial clinical state at admission and laboratory parameters within the first 24 h. The Machine Learning (ML) modelling was performed using eXtreme Gradient Boosting (XGB) Algorithm. 'Time to event' using Cox Proportional Hazard Model was used and combined with XGB Algorithm. The prospective validation cohort was selected of 977 patients (Expired-8.3%) from six centers from July to October 2020. The Clinical API for the Algorithm is  http://20.44.39.47/covid19v2/page1.php being used prospectively. Out of the 63 clinical and laboratory parameters, Age [adjusted hazard ratio (HR) 2.31; 95% CI 1.52-3.53], Male Gender (HR 1.72, 95% CI 1.06-2.85), Respiratory Distress (HR 1.79, 95% CI 1.32-2.53), Diabetes Mellitus (HR 1.21, 95% CI 0.83-1.77), Chronic Kidney Disease (HR 3.04, 95% CI 1.72-5.38), Coronary Artery Disease (HR 1.56, 95% CI - 0.91 to 2.69), respiratory rate > 24/min (HR 1.54, 95% CI 1.03-2.3), oxygen saturation below 90% (HR 2.84, 95% CI 1.87-4.3), Lymphocyte% in DLC (HR 1.99, 95% CI 1.23-2.32), INR (HR 1.71, 95% CI 1.31-2.13), LDH (HR 4.02, 95% CI 2.66-6.07) and Ferritin (HR 2.48, 95% CI 1.32-4.74) were found to be significant. The performance parameters of the current model is at AUC ROC Score of 0.8685 and Accuracy Score of 96.89. The validation cohort had the AUC of 0.782 and Accuracy of 0.93. The model for Mortality Risk Prediction provides insight into the COVID Clinical and Laboratory Parameters at admission. It is one of the early studies, reflecting on 'time to event' at the admission, accurately predicting patient outcomes.",2021-06-17 +33862060,"Genomic epidemiology of group B streptococci spanning 10 years in an Irish maternity hospital, 2008-2017.","

Objectives

The genomic epidemiology of group b streptococcal (GBS) isolates from the Rotunda maternity hospital, Dublin, 2008-2017, was investigated.

Methods

Whole genome sequences of isolates (invasive, n = 114; non-invasive, n = 76) from infants and women were analysed using the PubMLST database (https://pubmlst.org/sagalactiae/).

Results

Serotypes III (36%), Ia (18%), V (17%), II (11%) and Ib, (9%) and sequence types (ST) 17 (23%), ST-23 (14%), ST-1 (12%) and ST-19 (7%) were most common. Core genome MLST (cgMLST) differentiated isolates of the same ST, grouped STs into five lineages congruent with known clonal complexes and identified known mother-baby pairs and suspected linked infant cases. Clonal complex (CC) 17 accounted for 40% and 22% of infant and maternal invasive cases, respectively and 21% of non-invasive isolates. CC23 and CC19 were associated with maternal disease (30%) and carriage (24%), respectively. Erythromycin (26%) and clindamycin (18%) resistance increased over the study period and was associated with presence of the erm(B) gene (55%), CC1 (33%) and CC19 (24%). A multi-resistant integrative conjugative element incorporated in the PI-1 locus was detected in CC17, an ST-12 and ST-23 isolate confirming the global dissemination of this element. All isolates possessed one or more pilus islands. Genes encoding other potential protective proteins including Sip, C5a peptidase and Srr1 were present in 100%, 99.5% and 65.8% of isolates, respectively. The srr2 gene was unique to CC17.

Conclusions

The PubMLST.org website provides a valuable framework for genomic GBS surveillance to inform on local and global GBS epidemiology, preventive and control measures.",2021-04-20 +34042047,A high rate of COVID-19 vaccine hesitancy in a large-scale survey on Arabs. ,"Vaccine hesitancy can limit the benefits of available vaccines in halting the spread of COVID-19 pandemic. Previously published studies paid little attention to Arab countries, which has a population of over 440 million. In this study, we present the results of the first large-scale multinational study that measures vaccine hesitancy among Arab-speaking subjects. An online survey in Arabic was conducted from 14 January 2021 to 29 January 2021. It consisted of 17 questions capturing demographic data, acceptance of COVID-19 vaccine, attitudes toward the need for COVID-19 vaccination and associated health policies, and reasons for vaccination hesitancy. R software v.4.0.2 was used for data analysis and visualization. The survey recruited 36,220 eligible participants (61.1% males, 38.9% females, mean age 32.6 ± 10.8 years) from all the 23 Arab countries and territories (83.4%) and 122 other countries (16.6%). Our analysis shows a significant rate of vaccine hesitancy among Arabs in and outside the Arab region (83% and 81%, respectively). The most cited reasons for hesitancy are concerns about side effects and distrust in health care policies, vaccine expedited production, published studies and vaccine producing companies. We also found that female participants, those who are 30-59 years old, those with no chronic diseases, those with lower level of academic education, and those who do not know the type of vaccine authorized in their countries are more hesitant to receive COVID-19 vaccination. On the other hand, participants who regularly receive the influenza vaccine, health care workers, and those from countries with higher rates of COVID-19 infections showed more vaccination willingness. Interactive representation of our results is posted on our project website at https://mainapp.shinyapps.io/CVHAA. Our results show higher vaccine hesitancy and refusal among Arab subjects, related mainly to distrust and concerns about side effects. Health authorities and Arab scientific community have to transparently address these concerns to improve vaccine acceptance. This study received no funding.",2021-05-27 +33426407,EPA's DSSTox database: History of development of a curated chemistry resource supporting computational toxicology research. ,"The US Environmental Protection Agency's (EPA) Distributed Structure-Searchable Toxicity (DSSTox) database, launched publicly in 2004, currently exceeds 875 K substances spanning hundreds of lists of interest to EPA and environmental researchers. From its inception, DSSTox has focused curation efforts on resolving chemical identifier errors and conflicts in the public domain towards the goal of assigning accurate chemical structures to data and lists of importance to the environmental research and regulatory community. Accurate structure-data associations, in turn, are necessary inputs to structure-based predictive models supporting hazard and risk assessments. In 2014, the legacy, manually curated DSSTox_V1 content was migrated to a MySQL data model, with modern cheminformatics tools supporting both manual and automated curation processes to increase efficiencies. This was followed by sequential auto-loads of filtered portions of three public datasets: EPA's Substance Registry Services (SRS), the National Library of Medicine's ChemID, and PubChem. This process was constrained by a key requirement of uniquely mapped identifiers (i.e., CAS RN, name and structure) for each substance, rejecting content where any two identifiers were conflicted either within or across datasets. This rejected content highlighted the degree of conflicting, inaccurate substance-structure ID mappings in the public domain, ranging from 12% (within EPA SRS) to 49% (across ChemID and PubChem). Substances successfully added to DSSTox from each auto-load were assigned to one of five qc_levels, conveying curator confidence in each dataset. This process enabled a significant expansion of DSSTox content to provide better coverage of the chemical landscape of interest to environmental scientists, while retaining focus on the accuracy of substance-structure-data associations. Currently, DSSTox serves as the core foundation of EPA's CompTox Chemicals Dashboard [https://comptox.epa.gov/dashboard], which provides public access to DSSTox content in support of a broad range of modeling and research activities within EPA and, increasingly, across the field of computational toxicology.",2019-11-01 +34551023,Collective conceptualization of parental support of dual career athletes: The EMPATIA framework.,"

Background

This study aimed to use a concept mapping methodology to develop a European framework of the needs of parents/guardians (P/G) for supporting athletes combining sport and education (dual career, DC).

Methods

By means of a concept mapping methodology, 337 French, Irish, Italian, Portuguese, and Slovenian parents sorted and rated 80 potential statements associated to parenting DC athletes.

Results

Five distinct clusters emerged: 1. P/G' roles, needs and awareness to support athletes, including 22 statements (mean:3.7; range: 3.2-4.2 pt); 2. Requirements for effective planning of DC pathway, including 19 statements (mean:3.7; range: 3.2-4.5 pt); 3. Educational opportunity, including 13 statements (mean:3.5; range: 3.1-4.0 pt); 4. Policy and provision for DC, including 19 statements (mean:3.7; range: 3.1-4.2 pt); and 5. Athletes' lifestyle & self-management, including 7 statements (mean:4.0; range: 3.5-4.5 pt). Estimates of effect size (Partial eta-squared) were calculated for ANOVAs to assess the degree of variability on the statement importance ranking as the dependent variable accounted for by the demographic data. The concept mapping showed good validity (stress value: 0.11) and high reliability (rSHT: 0.99, rSHM: 0.98; rRR:0.98). One-third of the statements indicated differences (p<0.05) in relation to the P/Gs' gender and the athletes' education level, competition level and sport typology.

Conclusion

In synthesizing the opinions, experience and needs of P/Gs of DC athletes the present framework provided sound theoretical underpinnings to inform the development of an online educational programme for empowering parenting DC athletes (https://edu.empatiasport.eu/eng/), as well as be a foundation for future Pan-European DC research on how these statements interact with each other, in different European contexts.",2021-09-22 +32477399,Coexpression Analysis Reveals Dynamic Modules Regulating the Growth and Development of Cirri in the Rattans (Calamus simplicifolius and Daemonorops jenkinsiana).,"Rattan is regarded as one of the major non-timber forest products, second only to wood and bamboo, worldwide. Although the published genomes of Calamus simplicifolius and Daemonorops jenkinsiana have facilitated genome-wide gene functional analyses, coexpression networks (CENs) provide more comprehensive and complete annotations of gene function at the transcriptome level. Thus, we analyzed the CENs of the two rattans, C. simplicifolius and D. jenkinsiana, by integrating the genome sequences and analyzing in-house transcriptome data from different development stages of their cirri using a well-developed strategy. A total of 3,504 and 3,027 functional modules were identified in C. simplicifolius and D. jenkinsiana, respectively, based on a combination of CENs, gene family classification, and function enrichment tools. These modules covered the major developmental processes, including photosynthesis, lignin biosynthesis, flavonoid biosynthesis, and phenylpropanoid biosynthesis. Reference annotations were refined using CENs and functional modules. Moreover, we obtained novel insights into the regulation of cirrus growth and development in rattans. Furthermore, Rattan-NET (http://rattan.bamboogdb.org/), an online database with analysis tools for gene set enrichment analysis, module enrichment, network comparison analysis, and cis-element analysis, was constructed for the easy analysis of gene function and regulation modules involved in the growth and development of cirri in rattans.",2020-05-12 +32761211,"A citizen science initiative for open data and visualization of COVID-19 outbreak in Kerala, India.","

Objective

India reported its first coronavirus disease 2019 (COVID-19) case in the state of Kerala and an outbreak initiated subsequently. The Department of Health Services, Government of Kerala, initially released daily updates through daily textual bulletins for public awareness to control the spread of the disease. However, these unstructured data limit upstream applications, such as visualization, and analysis, thus demanding refinement to generate open and reusable datasets.

Materials and methods

Through a citizen science initiative, we leveraged publicly available and crowd-verified data on COVID-19 outbreak in Kerala from the government bulletins and media outlets to generate reusable datasets. This was further visualized as a dashboard through a front-end Web application and a JSON (JavaScript Object Notation) repository, which serves as an application programming interface for the front end.

Results

From the sourced data, we provided real-time analysis, and daily updates of COVID-19 cases in Kerala, through a user-friendly bilingual dashboard (https://covid19kerala.info/) for nonspecialists. To ensure longevity and reusability, the dataset was deposited in an open-access public repository for future analysis. Finally, we provide outbreak trends and demographic characteristics of the individuals affected with COVID-19 in Kerala during the first 138 days of the outbreak.

Discussion

We anticipate that our dataset can form the basis for future studies, supplemented with clinical and epidemiological data from the individuals affected with COVID-19 in Kerala.

Conclusions

We reported a citizen science initiative on the COVID-19 outbreak in Kerala to collect and deposit data in a structured format, which was utilized for visualizing the outbreak trend and describing demographic characteristics of affected individuals.",2020-12-01 +34536568,SmProt: A Reliable Repository with Comprehensive Annotation of Small Proteins Identified from Ribosome Profiling.,"Small proteins specifically refer to proteins consisting of less than 100 amino acids translated from small open reading frames (sORFs), which were usually missed in previous genome annotation. The significance of small proteins has been revealed in current years, along with the discovery of their diverse functions. However, systematic annotation of small proteins is still insufficient. SmProt was specially developed to provide valuable information on small proteins for scientific community. Here we present the update of SmProt, which emphasizes reliability of translated sORFs, genetic variants in translated sORFs, disease-specific sORF translation events or sequences, and remarkably increased data volume. More components such as non-ATG translation initiation, function, and new sources are also included. SmProt incorporated 638,958 unique small proteins curated from 3,165,229 primary records, which were computationally predicted from 419 ribosome profiling (Ribo-seq) datasets or collected from literature and other sources from 370 cell lines or tissues in 8 species (Homo sapiens, Mus musculus, Rattus norvegicus, Drosophila melanogaster, Danio rerio, Saccharomyces cerevisiae, Caenorhabditis elegans, and Escherichia coli). In addition, small protein families identified from human microbiomes were also collected. All datasets in SmProt are free to access, and available for browse, search, and bulk downloads at http://bigdata.ibp.ac.cn/SmProt/.",2021-08-01 +33613986,PhenoSpace: A Shiny application to visualize trait data in the phenotypic space of the global spectrum of plant form and function.,"A recent analysis of variation in six major traits conducted on a large worldwide sample of vascular plant species showed that three-quarters of trait variation was captured by a two-dimensional global spectrum of plant form and function (""global spectrum"" hereafter). We developed the PhenoSpace application, whose aim is to visualize and export the position of any individual/population/species in the phenotypic space of the global spectrum.PhenoSpace is a Shiny application that helps users to manipulate and visualize data pertaining to the global spectrum of plant form and function. It is freely accessible at the following URL: https://shiny.cefe.cnrs.fr/PhenoSpace/.PhenoSpace has three main functionalities. First, it allows users to visualize the phenotypic space of the global spectrum using different combinations of traits and growth forms. Second, trait data from any new user-defined dataset can be projected onto the phenotypic space of the global spectrum, provided that at least two of the six traits are available. Finally, figures produced and loadings of the imported data on the PCA axes can be downloaded, allowing users to conduct further analyses.PhenoSpace fulfills the practical goal of positioning plants in the phenotypic space of the global spectrum, making it possible to compare trait variation at any level of organization against the worldwide background. This serves a major aim of comparative plant ecology, which is to put specific sets of individuals, populations or species into a broader context, facilitating comparison and synthesis of results across different continents and environments using relevant indicators of plant design and function.",2021-01-26 +34555697,QSPR modeling of absorption maxima of dyes used in dye sensitized solar cells (DSSCs).,"Dye-sensitized solar cells (DSSCs) have recently received a significant attention as possible sources of renewable energy. As a result, a significant effort is being made to develop organic dyes for highly power conversion efficient DSSCs, in order to overcome the disadvantages of previous solar cell systems, such as cost reduction, weight reduction, and production methods that minimize environmental pollution. As shown by multiple recent research publications, computational techniques such as quantitative structure-property relationship (QSPR) modeling may aid in the development of suitable dyes for DSSCs satisfying many fundamental desired characteristics. The current report provides robust, externally verified QSPR models for five chemical classes of organic dyes (Triphenylamines, Phenothiazines, Indolines, Porphyrins and Coumarins) based on experimentally determined absorption maxima values. The size of the dye data points utilized to develop the models is the largest known to date. The QSPR models were constructed using only two-dimensional descriptors with clear physicochemical meaning. Using the best subset selection approach, we built 5, 3, 4, 3 and 2 descriptor models for the Triphenylamine, Phenothiazine, Indoline, Porphyrin and Coumarin classes, respectively. The models were validated both internally and externally, and then consensus predictions were made for specific categories of dyes using the developed partial least squares (PLS) models, and the ""Intelligent consensus predictor"" tool (http://teqip.jdvu.ac.in/QSAR_Tools/) was used to determine whether the quality of test set compound predictions can be improved through the ""intelligent"" selection of multiple PLS models. We identified from the insights gained from the developed models several chemical attributes that are important in enhancing the absorption maxima. Thus, our study may be utilized to predict the λmax values of novel or untested organic dyes and to give insights that will aid in the development of new dyes for use in solar cells with increased λmax values and enhanced power conversion efficiency.",2021-09-21 +33493161,PASA: Proteomic analysis of serum antibodies web server.,"

Motivation

A comprehensive characterization of the humoral response towards a specific antigen requires quantification of the B-cell receptor repertoire by next-generation sequencing (BCR-Seq), as well as the analysis of serum antibodies against this antigen, using proteomics. The proteomic analysis is challenging since it necessitates the mapping of antigen-specific peptides to individual B-cell clones.

Results

The PASA web server provides a robust computational platform for the analysis and integration of data obtained from proteomics of serum antibodies. PASA maps peptides derived from antibodies raised against a specific antigen to corresponding antibody sequences. It then analyzes and integrates proteomics and BCR-Seq data, thus providing a comprehensive characterization of the humoral response. The PASA web server is freely available at https://pasa.tau.ac.il and open to all users without a login requirement.",2021-01-25 +32006276,Using Dali for Protein Structure Comparison.,"The exponential growth in the number of newly solved protein structures makes correlating and classifying the data an important task. Distance matrix alignment (Dali) is used routinely by crystallographers worldwide to screen the database of known structures for similarity to newly determined structures. Dali is easily accessible through the web server ( http://ekhidna.biocenter.helsinki.fi/dali ). Alternatively, the program may be downloaded and pairwise comparisons performed locally on Linux computers.",2020-01-01 +32394607,Evaluation of inhibitor of apoptosis genes as targets for RNAi-mediated control of insect pests.,"Apoptosis has been widely studied from mammals to insects. Inhibitor of apoptosis (IAP) protein is a negative regulator of apoptosis. Recent studies suggest that iap genes could be excellent targets for RNA interference (RNAi)-mediated control of insect pests. However, not much is known about iap genes in one of the well-known insect model species, Tribolium castaneum. The orthologues of five iap genes were identified in T. castaneum by searching its genome at NCBI (https://www.ncbi.nlm.nih.gov/) and UniProt (https://www.uniprot.org/) databases using Drosophila melanogaster and Aedes aegypti IAP protein sequences as queries. RNAi assays were performed in T. castaneum cell line (TcA) and larvae. The knockdown of iap1 gene induced a distinct apoptotic phenotype in TcA cells and induced 91% mortality in T. castaneum larvae. Whereas, knockdown of iap5 resulted in a decrease in cell proliferation in TcA cells and developmental defects in T. castaneum larvae which led to 100% mortality. Knockdown of the other three iap genes identified did not cause a significant effect on cells or insects. These data increase our understanding of iap genes in insects and provide opportunities for developing iap1 and iap5 as targets for RNAi-based insect pest control.",2020-05-11 +32506918,Proteomic Analysis Reveals Proteins and Pathways Associated with Lactation in Bovine Mammary Epithelial Cell-Derived Exosomes.,"Milk-derived exosomes have been reported, which are involved in many biological processes. The exosomes derived from mammary glands are not known yet, and their relationship with mammary gland lactation and the origin of milk-derived exosomes are largely unclear. The present study aimed to investigate the proteome of exosomes derived from bovine mammary epithelial cells (BMECs) and compare them with milk-derived exosomes in the database. BMEC-derived exosomes were successfully separated from the culture supernatant of BMECs by a combined ultracentrifugation approach, and the purity of exosomes was identified by western blot analysis. Liquid chromatography with tandem mass spectrometry identified 638 proteins in BMEC-derived exosomes. The MS data were deposited into the PUBLIC repository ProteomeXchange, dataset identifier(s): https://www.iprox.org/page/PSV023.html;?url=1590961453176tKpa. Gene Ontology annotation and Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway analysis showed that these proteins were associated with specific biological processes and molecular functions of metabolism. Cross comparison of these proteins with the protein database of milk exosomes showed that 77 common expressed proteins (CEPs) were in both BMEC- and milk-derived exosomes. The KEGG pathway analysis for these CEPs showed that they were mainly involved in signaling pathways associated with milk biosynthesis in BMECs. Among these CEPs, six proteins have been previously reported to be associated with the lactation function. The western blot analysis detected that expression of these six proteins in BMEC-derived exosomes was increased after the stimulation of methionine and β-estradiol on BMECs. In summary, the proteome of BMEC-derived exosomes reveals that they are associated with milk biosynthesis in BMECs and might be a source of milk-derived exosomes.",2020-06-15 +31919937,Real-world evidence of secukinumab in psoriasis treatment - a meta-analysis of 43 studies.,"Real-world evidence (RWE) meta-analyses provide valuable insights from patients in routine clinical practice. Secukinumab, the first fully human monoclonal antibody that neutralizes IL-17A, has shown long-lasting effectiveness and safety in plaque psoriasis (PsO). Since its licence approval in 2015, many RWE studies have been published. The objective of this study was to review all available literature on RWE studies with secukinumab and the secukinumab arm of comparator studies in patients with moderate-to-severe PsO to evaluate its effectiveness, drug survival and safety. https://www.embase.com and https://clinicaltrials.gov databases were searched using prespecified inclusion criteria between 1 January 2015 and 31 May 2019. Using a meta-package and R statistical software to analyse data, key outcomes were measured at 3, 6 and 12 months. PASI and DLQI score data were recorded for patients who remained on secukinumab treatment. Overall, 43 studies were included. Drug survival was 90% at 3 and 6 months, and 80% at 12 months. At 12 months, 8% of patients had discontinued treatment due to lack of effectiveness. At 3, 6 and 12 months, Psoriasis Area and Severity Index (PASI) 90 scores were as follows: 50%, 53% and 60%, and PASI 100 scores were 36%, 46% and 51%, respectively. At 3, 6 and 12 months, 57%, 55% and 65% of patients achieved a Dermatology Life Quality Index (DLQI) score of 0 or 1, respectively. Adverse events were consistent with rates observed in clinical trials with no new safety signals. This meta-analysis strengthens existing evidence on the clinical effectiveness of secukinumab in patients with moderate-to-severe PsO, demonstrating high drug survival rates, high levels of patient-reported outcomes, and good tolerance.",2020-02-18 +33492549,The transplant cohort of the German center for infection research (DZIF Tx-Cohort): study design and baseline characteristics.,"Infectious complications are the major cause of morbidity and mortality after solid organ and stem cell transplantation. To better understand host and environmental factors associated with an increased risk of infection as well as the effect of infections on function and survival of transplanted organs, we established the DZIF Transplant Cohort, a multicentre prospective cohort study within the organizational structure of the German Center for Infection Research. At time of transplantation, heart-, kidney-, lung-, liver-, pancreas- and hematopoetic stem cell- transplanted patients are enrolled into the study. Follow-up visits are scheduled at 3, 6, 9, 12 months after transplantation, and annually thereafter; extracurricular visits are conducted in case of infectious complications. Comprehensive standard operating procedures, web-based data collection and monitoring tools as well as a state of the art biobanking concept for blood, purified PBMCs, urine, and faeces samples ensure high quality of data and biosample collection. By collecting detailed information on immunosuppressive medication, infectious complications, type of infectious agent and therapy, as well as by providing corresponding biosamples, the cohort will establish the foundation for a broad spectrum of studies in the field of infectious diseases and transplant medicine. By January 2020, baseline data and biosamples of about 1400 patients have been collected. We plan to recruit 3500 patients by 2023, and continue follow-up visits and the documentation of infectious events at least until 2025. Information about the DZIF Transplant Cohort is available at https://www.dzif.de/en/working-group/transplant-cohort .",2021-01-25 +34790910,CellPAINT: Turnkey Illustration of Molecular Cell Biology.,"CellPAINT is an interactive digital tool that allows non-expert users to create illustrations of the molecular structure of cells and viruses. We present a new release with several key enhancements, including the ability to generate custom ingredients from structure information in the Protein Data Bank, and interaction, grouping, and locking functions that streamline the creation of assemblies and illustration of large, complex scenes. An example of CellPAINT as a tool for hypothesis generation in the interpretation of cryoelectron tomograms is presented. CellPAINT is freely available at http://ccsb.scripps.edu/cellpaint.",2021-03-29 +33754940,Weight Status and Effects of Non-Tobacco Flavors on E-Cigarette Product Appeal.,"

Background

Flavors in tobacco products may be salient drivers of tobacco product use among people with overweight or obesity. Yet, whether perceived appeal of e-cigarettes with different flavors varies as a function of weight status is unknown. Purpose: To conduct secondary data analyses of a laboratory experiment to examine whether weight moderates differences in perceived appeal of fruit, menthol, and tobacco flavored e-cigarettes in young adults who vape. Methods: Using a within-subjects experimental design, young adults in Los Angeles, CA, USA. (Mage = 25.36 ± 4.42 Range: 18-35) with normal weight (n = 48) or overweight/obesity (n = 51) were administered standardized doses of e-cigarette solutions varying in flavor (fruit, menthol, tobacco). Following each administration, participants rated the appeal of the solution (range: 0-100). Results: The extent to which menthol (vs. tobacco)-flavored e-cigarettes were rated more appealing was amplified among participants with overweight or obesity versus normal weight participants (flavor × weight interaction Estimate = 7.54, p = .01 95% CI = [2.30, 12.80]). There were no differences in the effects of fruit (vs. tobacco) flavored e-cigarettes on appeal as a function of weight status. Conclusions: Menthol flavors in e-cigarettes may be disproportionately appealing to young adults with overweight and obesity.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1899229.",2021-03-23 +30721922,G4Hunter web application: a web server for G-quadruplex prediction.,"MOTIVATION:Expanding research highlights the importance of guanine quadruplex structures. Therefore, easy-accessible tools for quadruplex analyses in DNA and RNA molecules are important for the scientific community. RESULTS:We developed a web version of the G4Hunter application. This new web-based server is a platform-independent and user-friendly application for quadruplex analyses. It allows retrieval of gene/nucleotide sequence entries from NCBI databases and provides complete characterization of localization and quadruplex propensity of quadruplex-forming sequences. The G4Hunter web application includes an interactive graphical data representation with many useful options including visualization, sorting, data storage and export. AVAILABILITY AND IMPLEMENTATION:G4Hunter web application can be accessed at: http://bioinformatics.ibp.cz. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +34180617,[Vaccine against COVID-19].,"The COVID-19 pandemic has produced a huge health, economic and psychological collapse in our society. Health workers have had to face one of the greatest challenges in history, trying to show the population how to deal with this disease. We have learned that vaccines are the great instrument for the fight against infectious diseases and a large number of them began to appear, not as a product of chance but as a product of the enormous progress experienced in recent years with vaccines against new infectious diseases, against other diseases such as Alzheimer's and especially against cancer. All this knowledge has been applied to this disease. Practitioners lamented the little information available to them when asked questions from patients. This document wanted to be a response to these concerns, with a scientific desire, with evidence that put aside unverified data and hoaxes. Faced with an avalanche of information, most of it without the appropriate ""peer review"" as indicated in the introduction, any publication becomes obsolete at the time of publication, and we opted for an ""online"" publication, with the incorporation of versions. This online publication has been published in the documents of Spanish Society of Chemotherapy, at https://seq.es/vacunacion-covid-19.",2021-06-28 +33976825,Automated location invariant animal detection in camera trap images using publicly available data sources.,"A time-consuming challenge faced by camera trap practitioners is the extraction of meaningful data from images to inform ecological management. An increasingly popular solution is automated image classification software. However, most solutions are not sufficiently robust to be deployed on a large scale due to lack of location invariance when transferring models between sites. This prevents optimal use of ecological data resulting in significant expenditure of time and resources to annotate and retrain deep learning models.We present a method ecologists can use to develop optimized location invariant camera trap object detectors by (a) evaluating publicly available image datasets characterized by high intradataset variability in training deep learning models for camera trap object detection and (b) using small subsets of camera trap images to optimize models for high accuracy domain-specific applications.We collected and annotated three datasets of images of striped hyena, rhinoceros, and pigs, from the image-sharing websites FlickR and iNaturalist (FiN), to train three object detection models. We compared the performance of these models to that of three models trained on the Wildlife Conservation Society and Camera CATalogue datasets, when tested on out-of-sample Snapshot Serengeti datasets. We then increased FiN model robustness by infusing small subsets of camera trap images into training.In all experiments, the mean Average Precision (mAP) of the FiN trained models was significantly higher (82.33%-88.59%) than that achieved by the models trained only on camera trap datasets (38.5%-66.74%). Infusion further improved mAP by 1.78%-32.08%.Ecologists can use FiN images for training deep learning object detection solutions for camera trap image processing to develop location invariant, robust, out-of-the-box software. Models can be further optimized by infusion of 5%-10% camera trap images into training data. This would allow AI technologies to be deployed on a large scale in ecological applications. Datasets and code related to this study are open source and available on this repository: https://doi.org/10.5061/dryad.1c59zw3tx.",2021-03-10 +31665425,TFBSshape: an expanded motif database for DNA shape features of transcription factor binding sites.,"TFBSshape (https://tfbsshape.usc.edu) is a motif database for analyzing structural profiles of transcription factor binding sites (TFBSs). The main rationale for this database is to be able to derive mechanistic insights in protein-DNA readout modes from sequencing data without available structures. We extended the quantity and dimensionality of TFBSshape, from mostly in vitro to in vivo binding and from unmethylated to methylated DNA. This new release of TFBSshape improves its functionality and launches a responsive and user-friendly web interface for easy access to the data. The current expansion includes new entries from the most recent collections of transcription factors (TFs) from the JASPAR and UniPROBE databases, methylated TFBSs derived from in vitro high-throughput EpiSELEX-seq binding assays and in vivo methylated TFBSs from the MeDReaders database. TFBSshape content has increased to 2428 structural profiles for 1900 TFs from 39 different species. The structural profiles for each TFBS entry now include 13 shape features and minor groove electrostatic potential for standard DNA and four shape features for methylated DNA. We improved the flexibility and accuracy for the shape-based alignment of TFBSs and designed new tools to compare methylated and unmethylated structural profiles of TFs and methods to derive DNA shape-preserving nucleotide mutations in TFBSs.",2020-01-01 +32541556,Interactive Web Application for Plotting Personalized Prognosis Prediction Curves in Allogeneic Hematopoietic Cell Transplantation Using Machine Learning.,"

Background

Allogeneic hematopoietic cell transplantation (allo-HCT) is a curative treatment option for malignant hematological disorders. Transplant clinicians estimate patient-specific prognosis empirically in clinical practice based on previous studies on similar patients. However, this approach does not provide objective data. The present study primarily aimed to develop a tool capable of providing accurate personalized prognosis prediction after allo-HCT in an objective manner.

Methods

We developed an interactive web application tool with a graphical user interface capable of plotting the personalized survival and cumulative incidence prediction curves after allo-HCT adjusted by 8 patient-specific factors, which are known as prognostic predictors, and assessed their predictive performances. A random survival forest model using the data of patients who underwent allo-HCT at our institution was applied to develop this application.

Results

We succeeded in showing the personalized prognosis prediction curves of 1-year overall survival, progression-free survival, relapse/progression, and nonrelapse mortality (NRM) interactively using our web application (https://predicted-os-after-transplantation.shinyapps.io/RSF_model/). To assess its predictive performance, the entire cohort (363 cases) was split into a training cohort (70%) and a test cohort (30%) time-sequentially based on the patients' transplant dates. The areas under the receiver-operating characteristic curves for 1-year overall survival, progression-free survival, relapse/progression, and nonrelapse mortality in test cohort were 0.70, 0.72, 0.73, and 0.77, respectively.

Conclusions

The new web application could allow transplant clinicians to inform a new allo-HCT candidate of the objective personalized prognosis prediction and facilitate decision-making.",2021-05-01 +34387199,Development and External Validation of a Model to Predict Overall Survival in Patients with Resected Gallbladder Cancer. ,"The aim of this study was to develop and validate a clinical prediction model to predict overall survival in patients with non-metastatic, resected gallbladder cancer (GBC). Although several tools are available, no optimal method has been identified to assess survival in patients with resected GBC. Data from a Dutch, nation-wide cohort of patients with resected GBC was used to develop a prediction model for overall survival. The model was internally validated and a cohort of Australian GBC patients who underwent resection was used for external validation. The performance of the AJCC staging system and the present model were compared. In total, 446 patients were included; 380 patients in the development cohort, and 66 patients in the validation cohort. In the development cohort median survival was 22 months (median follow-up 75 months). Age, T/N classification, resection margin, differentiation grade and vascular invasion were independent predictors of survival. The externally validated c-index was 0.75 (95%CI 0.69-0.80), implying good discriminatory capacity. The discriminative ability of the present model after internal validation was superior to the ability of the AJCC staging system (Harrell's C-index 0.71, (95%CI 0.69-0.72) versus 0.59 (95%CI 0.57-0.60)). The proposed model for the prediction of overall survival in patients with resected GBC demonstrates good discriminatory capacity, reasonable calibration and outperforms the authoritative AJCC staging system. This model can be a useful tool for physicians and patients to obtain information about survival after resection and is available from https://gallbladderresearch.shinyapps.io/Predict_GBC_survival/.",2021-08-13 +30024243,"Procedural Frames in Negotiations: How Offering My Resources Versus Requesting Yours Impacts Perception, Behavior, and Outcomes: Correction to Trötschel et al. (2015).","Reports an error in ""Procedural frames in negotiations: How offering my resources versus requesting yours impacts perception, behavior, and outcomes"" by Roman Trötschel, David D. Loschelder, Benjamin P. Höhne and Johann M. Majer (Journal of Personality and Social Psychology, 2015[Mar], Vol 108[3], 417-435). In the article ""Procedural Frames in Negotiations: How Offering My Resources Versus Requesting Yours Impacts Perception, Behavior, and Outcomes"" by Roman Trötschel, David D. Loschelder, Benjamin P. Höhne, and Johann M. Majer (Journal of Personality and Social Psychology, 2015, Vol. 108, No. 3, pp. 417-435. http://dx.doi.org/10.1037/pspi0000009), rounding errors in p values occur in the Results under the Concession rate section of Experiment 4a and in the Outcome profits section of Experiment 5. The second sentence of the Discussion section of Experiment 4a should read as follows: Averaged across roles (i.e., buyers and sellers) parties made lower concessions and achieved higher individual outcomes when offering rather requesting resources. The last sentence of the Concession rates section of Experiment 5 should read as follows: This pattern was reversed when animals from zoo Y were addressed first, although this contrast effect did not reach significance. (The following abstract of the original article appeared in record 2015-09924-002.) Although abundant negotiation research has examined outcome frames, little is known about the procedural framing of negotiation proposals (i.e., offering my vs. requesting your resources). In a series of 8 experiments, we tested the prediction that negotiators would show a stronger concession aversion and attain better individual outcomes when their own resource, rather than the counterpart's, is the accentuated reference resource in a transaction. First, senders of proposals revealed a stronger concession aversion when they offered their own rather than requested the counterpart's resources-both in buyer-seller (Experiment 1a) and in classic transaction negotiations (Experiment 2a). Expectedly, this effect reversed for recipients: When receiving requests rather than offers, recipients experienced a stronger concession aversion in buyer-seller (Experiment 1b) and transaction negotiations (Experiment 2b). Experiments 3-5 investigated procedural frames in the interactive process of negotiations-with elementary schoolchildren (Experiment 3), in a buyer-seller context (Experiments 4a and 4b), and in a computer-mediated transaction negotiation void of buyer and seller roles (Experiment 5). In summary, 8 experiments showed that negotiators are more concession averse and claim more individual value when negotiation proposals are framed to highlight their own rather than the counterpart's resources. (PsycINFO Database Record",2018-08-01 +34188785,Eleven routine clinical features predict COVID-19 severity uncovered by machine learning of longitudinal measurements.,"Severity prediction of COVID-19 remains one of the major clinical challenges for the ongoing pandemic. Here, we have recruited a 144 COVID-19 patient cohort, resulting in a data matrix containing 3,065 readings for 124 types of measurements over 52 days. A machine learning model was established to predict the disease progression based on the cohort consisting of training, validation, and internal test sets. A panel of eleven routine clinical factors constructed a classifier for COVID-19 severity prediction, achieving accuracy of over 98% in the discovery set. Validation of the model in an independent cohort containing 25 patients achieved accuracy of 80%. The overall sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) were 0.70, 0.99, 0.93, and 0.93, respectively. Our model captured predictive dynamics of lactate dehydrogenase (LDH) and creatine kinase (CK) while their levels were in the normal range. This model is accessible at https://www.guomics.com/covidAI/ for research purpose.",2021-06-17 +33163148,HMST-Seq-Analyzer: A new python tool for differential methylation and hydroxymethylation analysis in various DNA methylation sequencing data.,"DNA methylation (5mC) and hydroxymethylation (5hmC) are chemical modifications of cytosine bases which play a crucial role in epigenetic gene regulation. However, cost, data complexity and unavailability of comprehensive analytical tools is one of the major challenges in exploring these epigenetic marks. Hydroxymethylation-and Methylation-Sensitive Tag sequencing (HMST-seq) is one of the most cost-effective techniques that enables simultaneous detection of 5mC and 5hmC at single base pair resolution. We present HMST-Seq-Analyzer as a comprehensive and robust method for performing simultaneous differential methylation analysis on 5mC and 5hmC data sets. HMST-Seq-Analyzer can detect Differentially Methylated Regions (DMRs), annotate them, give a visual overview of methylation status and also perform preliminary quality check on the data. In addition to HMST-Seq, our tool can be used on whole-genome bisulfite sequencing (WGBS) and reduced representation bisulfite sequencing (RRBS) data sets as well. The tool is written in Python with capacity to process data in parallel and is available at (https://hmst-seq.github.io/hmst/).",2020-10-10 +34435916,Predicting physical distancing over time during COVID-19: testing an integrated model.,"Objective: We applied an integrated social cognition model to predict physical distancing behavior, a key COVID-19 preventive behavior, over a four-month period. Design: A three-wave longitudinal survey design. Methods: Australian and US residents (N = 601) completed self-report measures of social cognition constructs (attitude, subjective norm, moral norm, perceived behavioral control [PBC]), intention, habit, and physical distancing behavior on an initial occasion (T1) and on two further occasions one week (T2) and four months (T3) later. Results: A structural equation model revealed that subjective norm, moral norm, and PBC, were consistent predictors of physical distancing intention on all three occasions. Intention and habit at T1 and T2 predicted physical distancing behavior at T2 and T3, respectively. Intention at T2 mediated effects of subjective norm, moral norm, and PBC at T2 on physical distancing behavior at T3, and habit at T1 and T2 mediated effects of behavior at T1 and T2 on follow-up behavior at T2 and T3, respectively. Conclusion: Normative (subjective and moral norms) and capacity (PBC) constructs were consistent predictors of physical distancing intention, and intention and habit were consistent predictors of physical distancing behavior. Interventions promoting physical distancing should target change in normative and personal capacity beliefs, and habit.Supplemental data for this article is available online at https://doi.org/10.1080/08870446.2021.1968397 .",2021-08-26 +30032609,KELM-CPPpred: Kernel Extreme Learning Machine Based Prediction Model for Cell-Penetrating Peptides.,"Cell-penetrating peptides (CPPs) facilitate the transport of pharmacologically active molecules, such as plasmid DNA, short interfering RNA, nanoparticles, and small peptides. The accurate identification of new and unique CPPs is the initial step to gain insight into CPP activity. Experiments can provide detailed insight into the cell-penetration property of CPPs. However, the synthesis and identification of CPPs through wet-lab experiments is both resource- and time-expensive. Therefore, the development of an efficient prediction tool is essential for the identification of unique CPP prior to experiments. To this end, we developed a kernel extreme learning machine (KELM) based CPP prediction model called KELM-CPPpred. The main data set used in this study consists of 408 CPPs and an equal number of non-CPPs. The input features, used to train the proposed prediction model, include amino acid composition, dipeptide amino acid composition, pseudo amino acid composition, and the motif-based hybrid features. We further used an independent data set to validate the proposed model. In addition, we have also tested the prediction accuracy of KELM-CPPpred models with the existing artificial neural network (ANN), random forest (RF), and support vector machine (SVM) approaches on respective benchmark data sets used in the previous studies. Empirical tests showed that KELM-CPPpred outperformed existing prediction approaches based on SVM, RF, and ANN. We developed a web interface named KELM-CPPpred, which is freely available at http://sairam.people.iitgn.ac.in/KELM-CPPpred.html.",2018-08-13 +32788408,Interplay between Position-Dependent Codon Usage Bias and Hydrogen Bonding at the 5' End of ORFeomes. ,"Codon usage bias exerts control over a wide variety of molecular processes. The positioning of synonymous codons within coding sequences (CDSs) dictates protein expression by mechanisms such as local translation efficiency, mRNA Gibbs free energy, and protein cotranslational folding. In this work, we explore how codon usage affects the position-dependent content of hydrogen bonding, which in turn influences energy requirements for unwinding double-stranded DNA (dsDNA). We categorized codons according to their hydrogen bond content and found differential effects on hydrogen bonding encoded by codon variants. The specific positional disposition of codon variants within CDSs creates a ramp of hydrogen bonding at the 5' end of the ORFeome in Escherichia coli CDSs occupying the first position of operons are subjected to selective pressure that reduces their hydrogen bonding compared to internal CDSs, and highly transcribed CDSs demand a lower maximum capacity of hydrogen bonds per codon, suggesting that the energetic requirement for unwinding the dsDNA in highly transcribed CDSs has evolved to be minimized in E. coli Subsequent analysis of over 14,000 ORFeomes showed a pervasive ramp of hydrogen bonding at the 5' end in Bacteria and Archaea that positively correlates with the probability of mRNA secondary structure formation. Both the ramp and the correlation were not found in Fungi The position-dependent hydrogen bonding might be part of the mechanism that contributes to the coordination between transcription and translation in Bacteria and Archaea A Web-based application to analyze the position-dependent hydrogen bonding of ORFeomes has been developed and is publicly available (https://juanvillada.shinyapps.io/hbonds/).IMPORTANCE Redundancy of the genetic code creates a vast space of alternatives to encode a protein. Synonymous codons exert control over a variety of molecular and physiological processes of cells mainly through influencing protein biosynthesis. Recent findings have shown that synonymous codon choice affects transcription by controlling mRNA abundance, mRNA stability, transcription termination, and transcript biosynthesis cost. In this work, by analyzing thousands of Bacteria, Archaea, and Fungi genomes, we extend recent findings by showing that synonymous codon choice, corresponding to the number of hydrogen bonds in a codon, can also have an effect on the energetic requirements for unwinding double-stranded DNA in a position-dependent fashion. This report offers new perspectives on the mechanism behind the transcription-translation coordination and complements previous hypotheses on the resource allocation strategies used by Bacteria and Archaea to manage energy efficiency in gene expression.",2020-08-11 +34026466,"The EU-TOPIA evaluation tool: An online modelling-based tool for informing breast, cervical, and colorectal cancer screening decisions in Europe.","

Background

Aiming to support European countries in improving their breast, cervical, and colorectal cancer (CRC) screening programmes, the EU-TOPIA consortium has developed an online user-friendly tool (the EU-TOPIA evaluation tool; https://miscan.eu-topia.org) based on the Microsimulation Screening Analysis (MISCAN) model.

Methods

We designed an online platform that allows stakeholders to use their country-specific data (demographic, epidemiological, and cancer screening information) to quantify future harms and benefits of different cancer screening scenarios in their country. Current cancer screening programmes and impacts of potential changes in screening protocols (such as extending target ages or increasing screening attendance) can be simulated. Results are scaled to the country-specific population. To illustrate the tool, we used the tool to simulate two different CRC screening scenarios in the Netherlands: biennial fecal immunochemical testing (FIT) in ages 55-75 and colonoscopy every ten years in ages 55-75. Data from the Dutch screening programme was used to inform both scenarios.

Results

A total of 482,700 CRC cases and 178,000 CRC deaths were estimated in the Netherlands with FIT screening (for individuals aged 40-100 years, 2018-2050), with 47.3 million FITs performed (1.92 million positives of which 1.64 million adhered to diagnostic colonoscopy). With colonoscopy screening, CRC incidence and mortality were, respectively, up to 17% and 14% lower than in the current FIT screening programme, requiring, however, a colonoscopy demand that was 7-fold higher.

Conclusions

Our study presents an essential online tool for stakeholders and medical societies to quantify estimates of benefits and harms of early cancer detection in Europe.",2021-04-30 +27899582,"The Zebrafish Model Organism Database: new support for human disease models, mutation details, gene expression phenotypes and searching.","The Zebrafish Model Organism Database (ZFIN; http://zfin.org) is the central resource for zebrafish (Danio rerio) genetic, genomic, phenotypic and developmental data. ZFIN curators provide expert manual curation and integration of comprehensive data involving zebrafish genes, mutants, transgenic constructs and lines, phenotypes, genotypes, gene expressions, morpholinos, TALENs, CRISPRs, antibodies, anatomical structures, models of human disease and publications. We integrate curated, directly submitted, and collaboratively generated data, making these available to zebrafish research community. Among the vertebrate model organisms, zebrafish are superbly suited for rapid generation of sequence-targeted mutant lines, characterization of phenotypes including gene expression patterns, and generation of human disease models. The recent rapid adoption of zebrafish as human disease models is making management of these data particularly important to both the research and clinical communities. Here, we describe recent enhancements to ZFIN including use of the zebrafish experimental conditions ontology, 'Fish' records in the ZFIN database, support for gene expression phenotypes, models of human disease, mutation details at the DNA, RNA and protein levels, and updates to the ZFIN single box search.",2016-11-28 +34585603,Drinking Water Disinfection by-Products and Congenital Malformations: A Nationwide Register-Based Prospective Study.,"

Background

Drinking water chlorination by-products have been associated with adverse reproductive outcomes, although the findings for congenital malformations are still inconclusive.

Objective

We conducted a nationwide register-based prospective study to assess whether first trimester maternal exposure to the four most common trihalomethanes [total trihalomethanes (TTHM)] via municipal drinking water was associated with risk of congenital malformation among newborns.

Methods

We included all births during 2005-2015 (live and stillbirths) of mothers residing in Swedish localities having >10,000 inhabitants, two or fewer operating water works, and sufficient municipal TTHM monitoring data. Individual maternal first trimester exposure was obtained by linking TTHM measurements to residential information, categorized into no chlorination and <5, 5-15, and >15μg TTHM/L. We also made chlorination treatment-specific analyses (exclusive use of chloramine or hypochlorite). Outcomes and covariates were obtained via linkage to health care and administrative registers. Odds ratios (ORs) and 95% confidence intervals (CIs) were estimated by logistic regression.

Results

Based on 623,468 births and a prevalence of congenital malformation of ∼2 cases/100 births, we observed associations between TTHM exposure in areas using chloramine and malformations of the nervous system (OR=1.82; 95% CI: 1.07, 3.12), urinary system (OR=2.06; 95% CI: 1.53, 2.78), genitals (OR=1.77; 95% CI: 1.38, 2.26), and limbs (OR=1.34; 95% CI: 1.10, 1.64), comparing the highest exposed category with the unexposed. No associations were observed in areas using exclusively hypochlorite as the primary water treatment method. By contrast, for malformations of the heart, a significant inverse association was observed only in areas using hypochlorite.

Discussion

TTHM exposure was associated with the increased risk of malformations of the nervous system, urinary system, genitals, and limbs in areas exclusively using chloramine. An association between chloramine-related chlorination by-products and congenital malformations has not previously been highlighted and needs further attention. https://doi.org/10.1289/EHP9122.",2021-09-29 +34435882,Utilizing a Biology-Driven Approach to Map the Exposome in Health and Disease: An Essential Investment to Drive the Next Generation of Environmental Discovery.,"

Background

Recent developments in technologies have offered opportunities to measure the exposome with unprecedented accuracy and scale. However, because most investigations have targeted only a few exposures at a time, it is hypothesized that the majority of the environmental determinants of chronic diseases remain unknown.

Objectives

We describe a functional exposome concept and explain how it can leverage existing bioassays and high-resolution mass spectrometry for exploratory study. We discuss how such an approach can address well-known barriers to interpret exposures and present a vision of next-generation exposomics.

Discussion

The exposome is vast. Instead of trying to capture all exposures, we can reduce the complexity by measuring the functional exposome-the totality of the biologically active exposures relevant to disease development-through coupling biochemical receptor-binding assays with affinity purification-mass spectrometry. We claim the idea of capturing exposures with functional biomolecules opens new opportunities to solve critical problems in exposomics, including low-dose detection, unknown annotations, and complex mixtures of exposures. Although novel, biology-based measurement can make use of the existing data processing and bioinformatics pipelines. The functional exposome concept also complements conventional targeted and untargeted approaches for understanding exposure-disease relationships.

Conclusions

Although measurement technology has advanced, critical technological, analytical, and inferential barriers impede the detection of many environmental exposures relevant to chronic-disease etiology. Through biology-driven exposomics, it is possible to simultaneously scale up discovery of these causal environmental factors. https://doi.org/10.1289/EHP8327.",2021-08-26 +27493588,Recognizing millions of consistently unidentified spectra across hundreds of shotgun proteomics datasets.,"Mass spectrometry (MS) is the main technology used in proteomics approaches. However, on average 75% of spectra analysed in an MS experiment remain unidentified. We propose to use spectrum clustering at a large-scale to shed a light on these unidentified spectra. PRoteomics IDEntifications database (PRIDE) Archive is one of the largest MS proteomics public data repositories worldwide. By clustering all tandem MS spectra publicly available in PRIDE Archive, coming from hundreds of datasets, we were able to consistently characterize three distinct groups of spectra: 1) incorrectly identified spectra, 2) spectra correctly identified but below the set scoring threshold, and 3) truly unidentified spectra. Using a multitude of complementary analysis approaches, we were able to identify less than 20% of the consistently unidentified spectra. The complete spectrum clustering results are available through the new version of the PRIDE Cluster resource (http://www.ebi.ac.uk/pride/cluster). This resource is intended, among other aims, to encourage and simplify further investigation into these unidentified spectra.",2016-06-27 +35295098,Ecological Thresholds of Toxicological Concern: A Review.,"The ecological threshold of toxicological concern (ecoTTC) is analogous to traditional human health-based TTCs but with derivation and application to ecological species. An ecoTTC is computed from the probability distribution of predicted no effect concentrations (PNECs) derived from either chronic or extrapolated acute toxicity data for toxicologically or chemically similar groups of chemicals. There has been increasing interest in using ecoTTCs in screening level environmental risk assessments and a computational platform has been developed for derivation with aquatic species toxicity data (https://envirotoxdatabase.org/). Current research and development areas include assessing mode of action-based chemical groupings, conservatism in estimated PNECs and ecoTTCs compared to existing regulatory values, and the influence of taxa (e.g., algae, invertebrates, and fish) composition in the distribution of PNEC values. The ecoTTC continues to develop as a valuable alternative strategy within the toolbox of traditional and new approach methods for ecological chemical assessment. This brief review article describes the ecoTTC concept and potential applications in ecological risk assessment, provides an overview of the ecoTTC workflow and how the values can be derived, and highlights recent developments and ongoing research. Future applications of ecoTTC concept in different disciplines are discussed along with opportunities for its use.",2021-03-05 +32362126,TaxIt: An Iterative Computational Pipeline for Untargeted Strain-Level Identification Using MS/MS Spectra from Pathogenic Single-Organism Samples.,"Untargeted accurate strain-level classification of a priori unidentified organisms using tandem mass spectrometry is a challenging task. Reference databases often lack taxonomic depth, limiting peptide assignments to the species level. However, the extension with detailed strain information increases runtime and decreases statistical power. In addition, larger databases contain a higher number of similar proteomes. We present TaxIt, an iterative workflow to address the increasing search space required for MS/MS-based strain-level classification of samples with unknown taxonomic origin. TaxIt first applies reference sequence data for initial identification of species candidates, followed by automated acquisition of relevant strain sequences for low level classification. Furthermore, proteome similarities resulting in ambiguous taxonomic assignments are addressed with an abundance weighting strategy to increase the confidence in candidate taxa. For benchmarking the performance of our method, we apply our iterative workflow on several samples of bacterial and viral origin. In comparison to noniterative approaches using unique peptides or advanced abundance correction, TaxIt identifies microbial strains correctly in all examples presented (with one tie), thereby demonstrating the potential for untargeted and deeper taxonomic classification. TaxIt makes extensive use of public, unrestricted, and continuously growing sequence resources such as the NCBI databases and is available under open-source BSD license at https://gitlab.com/rki_bioinformatics/TaxIt.",2020-05-15 +27170236,Advancing Exposure Science through Chemical Data Curation and Integration in the Comparative Toxicogenomics Database.,"

Background

Exposure science studies the interactions and outcomes between environmental stressors and human or ecological receptors. To augment its role in understanding human health and the exposome, we aimed to centralize and integrate exposure science data into the broader biological framework of the Comparative Toxicogenomics Database (CTD), a public resource that promotes understanding of environmental chemicals and their effects on human health.

Objectives

We integrated exposure data within the CTD to provide a centralized, freely available resource that facilitates identification of connections between real-world exposures, chemicals, genes/proteins, diseases, biological processes, and molecular pathways.

Methods

We developed a manual curation paradigm that captures exposure data from the scientific literature using controlled vocabularies and free text within the context of four primary exposure concepts: stressor, receptor, exposure event, and exposure outcome. Using data from the Agricultural Health Study, we have illustrated the benefits of both centralization and integration of exposure information with CTD core data.

Results

We have described our curation process, demonstrated how exposure data can be accessed and analyzed in the CTD, and shown how this integration provides a broad biological context for exposure data to promote mechanistic understanding of environmental influences on human health.

Conclusions

Curation and integration of exposure data within the CTD provides researchers with new opportunities to correlate exposures with human health outcomes, to identify underlying potential molecular mechanisms, and to improve understanding about the exposome.

Citation

Grondin CJ, Davis AP, Wiegers TC, King BL, Wiegers JA, Reif DM, Hoppin JA, Mattingly CJ. 2016. Advancing exposure science through chemical data curation and integration in the Comparative Toxicogenomics Database. Environ Health Perspect 124:1592-1599; http://dx.doi.org/10.1289/EHP174.",2016-05-12 +27845739,Impact of SNPs on Protein Phosphorylation Status in Rice (Oryza sativa L.). ,"Single nucleotide polymorphisms (SNPs) are widely used in functional genomics and genetics research work. The high-quality sequence of rice genome has provided a genome-wide SNP and proteome resource. However, the impact of SNPs on protein phosphorylation status in rice is not fully understood. In this paper, we firstly updated rice SNP resource based on the new rice genome Ver. 7.0, then systematically analyzed the potential impact of Non-synonymous SNPs (nsSNPs) on the protein phosphorylation status. There were 3,897,312 SNPs in Ver. 7.0 rice genome, among which 9.9% was nsSNPs. Whilst, a total 2,508,261 phosphorylated sites were predicted in rice proteome. Interestingly, we observed that 150,197 (39.1%) nsSNPs could influence protein phosphorylation status, among which 52.2% might induce changes of protein kinase (PK) types for adjacent phosphorylation sites. We constructed a database, SNP_rice, to deposit the updated rice SNP resource and phosSNPs information. It was freely available to academic researchers at http://bioinformatics.fafu.edu.cn. As a case study, we detected five nsSNPs that potentially influenced heterotrimeric G proteins phosphorylation status in rice, indicating that genetic polymorphisms showed impact on the signal transduction by influencing the phosphorylation status of heterotrimeric G proteins. The results in this work could be a useful resource for future experimental identification and provide interesting information for better rice breeding.",2016-11-11 +34868711,Impact of smoking cannabidiol (CBD)-rich marijuana on driving ability.,"To investigate effects of smoking cannabidiol (CBD)-rich marijuana on driving ability and determine free CBD and Δ9-tetrahydrocannabinol (THC) concentrations in capillary blood samples, a randomised, double-blind, placebo-controlled, two-way crossover pilot study was conducted with 33 participants. Participants smoked a joint containing 500 mg of tobacco and either 500 mg of CBD-rich marijuana (16.6% total CBD; 0.9% total THC) or 500 mg of a placebo substance, then performed three different dimensions of the Vienna Test System TRAFFIC examining reaction time, behaviour under stress, and concentration performance. For further assessment of participants' fitness to drive, three tests of balance and coordination were evaluated and vital signs (blood pressure and pulse) were measured. Dried blood spot samples of capillary blood were taken after smoking and after completion of the tests to determine the cannabinoid concentrations (CBD, THC and THC-metabolites). The results revealed no significant differences between the effects of smoking CBD-rich marijuana and placebo on reaction time, motor time, behaviour under stress, or concentration performance. Maximum free CBD and THC concentrations in capillary blood were detected shortly after smoking, ranging between 2.6-440.0 ng/mL and 6.7-102.0 ng/mL, respectively. After 45 min, capillary blood concentrations had already declined and were in the range of 1.9-135.0 ng/mL (free CBD) and 0.9-38.0 ng/mL (free THC). Although the observed levels of free THC concentrations have been reported to cause symptoms of impairment in previous studies in which THC-rich marijuana was smoked, no signs of impairment were found in the current study. This finding suggests that higher CBD concentrations cause a negative allosteric effect in the endocannabinoid system, preventing the formation of such symptoms. Nevertheless, it is recommended that consumers refrain from driving for several hours after smoking CBD-rich marijuana, as legal THC concentration limits may be exceeded. Supplemental data for this article is available online at https://doi.org/10.1080/20961790.2021.1946924 .",2021-09-28 +34432983,Perceptions of African American English by Students in Speech-Language Pathology Programs.,"Purpose Despite the increased awareness that all dialects are valid linguistic forms, perceptions of African American English (AAE) use are often negative in the general population. Students training for careers as speech-language pathologists (SLPs) are required to have coursework relating to cultural and linguistic diversity. However, little is known about the perceptions of AAE among students in SLP programs. Method Seventy-three students from 46 randomly selected university programs in the United States completed an online survey including explicit statements regarding the validity of AAE and a matched-guide task assessing participants' implicit perceptions of AAE. Participants were randomly assigned to one of four audio pairings that differed in terms of the dialect spoken and the formality of the conversational context. Participants rated the speaker on 11 attributes (e.g., literate/illiterate, rich/poor) using the Revised Speech Dialect Attitudinal Scale. Results Participants indicated positive opinions of statements on the validity of AAE. However, across three categories of personal attributes-sociointellectual, aesthetic, and dynamism-participants who heard the Mainstream American English recordings rated the speaker differently than recordings including AAE. Conclusions Students in SLP programs express positive opinions regarding AAE, and yet, they rate speakers who speak AAE lower in personal attributes. The results highlight the importance of expanding training for future SLPs to include not only explicit statements about the value of AAE but also activities addressing implicit perceptions of dialect use. We provide a brief discussion of how the current data can be implemented for such an activity. Lesson plans and materials are provided as supplemental materials. Supplemental Material https://doi.org/10.23641/asha.15241638.",2021-08-25 +30161123,Interactive implementations of thermodynamics-based RNA structure and RNA-RNA interaction prediction approaches for example-driven teaching.,"The investigation of RNA-based regulation of cellular processes is becoming an increasingly important part of biological or medical research. For the analysis of this type of data, RNA-related prediction tools are integrated into many pipelines and workflows. In order to correctly apply and tune these programs, the user has to have a precise understanding of their limitations and concepts. Within this manuscript, we provide the mathematical foundations and extract the algorithmic ideas that are core to state-of-the-art RNA structure and RNA-RNA interaction prediction algorithms. To allow the reader to change and adapt the algorithms or to play with different inputs, we provide an open-source web interface to JavaScript implementations and visualizations of each algorithm. The conceptual, teaching-focused presentation enables a high-level survey of the approaches, while providing sufficient details for understanding important concepts. This is boosted by the simple generation and study of examples using the web interface available at http://rna.informatik.uni-freiburg.de/Teaching/. In combination, we provide a valuable resource for teaching, learning, and understanding the discussed prediction tools and thus enable a more informed analysis of RNA-related effects.",2018-08-30 +34251276,Discourses Around Male IPV Related Systemic Biases on Reddit.,"To date research on intimate partner violence (IPV) has focused on the experience of females. The limited studies on male IPV survivors have shown that they are less likely to disclose their IPV experiences. Systemic biases may marginalize and silence male IPV survivors.The current study sought to explore the discourse around perceived systemic biases that may be present for male IPV survivors.A widely used social networking site (http://www.reddit.com/) was scraped for submissions relating to male IPV. Search was carried out using three keywords resulting in 917 submissions, out of which 82 met inclusion criteria. Submissions were included in final analysis if they consisted of more than half a page of data pertaining to male IPV. Thematic content analysis was utilized to analyze the data.Responses reflect common experiences with participants identifying multiple sources of perceived systemic biases: (1) social norms, (2) legal system, (3) social services, (4) media, and (5) government.The sources of potential support for male IPV survivors exhibit substantial pervasive biases against males as victims of IPV. Findings from current study can inform policies across multiple systems.",2021-07-12 +34465994,Power Spectral Changes of Quantitative EEG in the Subjective Cognitive Decline: Comparison of Community Normal Control Groups.,"

Purpose

The purpose of this study is to compare and analyze the power spectral changes between subjective cognitive decline (SCD) subjects and normal controls (NC) while checking the preclinical stage of AD in the SCD subjects and to use the derived data for biomarker research that can diagnose early-stage AD in the future.

Methods

We recruited 23 SCD patients and 23 normal control subjects and QEEG analysis including power spectral density (PSD) and source-level analysis were performed. An automated preprocessing procedure and statistical analysis were performed by iSync Brain® (iMediSync Inc., Republic of Korea) (https://isyncbrain.com/) using the international standard 10-20 system (19 electrodes).

Results

Absolute PSD, there was no statistically significant difference in all of the EEG power measurements of the 19 channels. In the relative PSD analysis, the average delta band power of the SCD group was significantly higher in Fp2, F4, and F8 than NC. Alpha1 band power of the O1 channel was 22.56±16.05 for the SCD group and 33.19±19.05 for the NC (p-value <0.05). Source-level analysis did not show a statistically significant difference.

Conclusion

SCD subjects showed a partial increase of delta waves in the frontal lobe region and a partial decrease in alpha1, a fast wave in the occipital region, compared to the NC. SCD is considered one of the earliest clinical symptoms of AD and it is predicted to be related to minor nerve damage. We were able to observe the power spectral changes in SCD subjects in this cross-sectional study, a large number of subjects and longitudinal studies are needed to evaluate their predictability for future deterioration such as conversion to MCI.",2021-08-24 +33404525,CERES: a cryo-EM re-refinement system for continuous improvement of deposited models.,"The field of electron cryomicroscopy (cryo-EM) has advanced quickly in recent years as the result of numerous technological and methodological developments. This has led to an increase in the number of atomic structures determined using this method. Recently, several tools for the analysis of cryo-EM data and models have been developed within the Phenix software package, such as phenix.real_space_refine for the refinement of atomic models against real-space maps. Also, new validation metrics have been developed for low-resolution cryo-EM models. To understand the quality of deposited cryo-EM structures and how they might be improved, models deposited in the Protein Data Bank that have map resolutions of better than 5 Å were automatically re-refined using current versions of Phenix tools. The results are available on a publicly accessible web page (https://cci.lbl.gov/ceres). The implementation of a Cryo-EM Re-refinement System (CERES) for the improvement of models deposited in the wwPDB, and the results of the re-refinements, are described. Based on these results, contents are proposed for a `cryo-EM Table 1', which summarizes experimental details and validation metrics in a similar way to `Table 1' in crystallography. The consistent use of robust metrics for the evaluation of cryo-EM models and data should accompany every structure deposition and be reported in scientific publications.",2021-01-01 +32454857,UNaProd: A Universal Natural Product Database for Materia Medica of Iranian Traditional Medicine.,"

Background

Iranian traditional medicine (ITM) is a holistic medical system that uses a wide range of medicinal substances to treat disease. Reorganization and standardization of the data on ITM concepts is a necessity for optimal use of this rich source. In an initial step towards this goal, we created a database of ITM materia medica. Main Body. Primarily based on Makhzan al-Advieh, which is the most recent encyclopedia of materia medica in ITM with the largest number of monographs, a database of natural medicinal substances was created using both text mining methods and manual editing. UNaProd, a Universal Natural Product database for materia medica of ITM, is currently host to 2696 monographs, from herbal to animal to mineral compounds in 16 diverse attributes such as origin and scientific name. Currently, systems biology, and more precisely systems medicine and pharmacology, can be an aid in providing rationalizations for many traditional medicines and elucidating a great deal of knowledge they can offer to guide future research in medicine.

Conclusions

A database of materia medica is a stepping stone in creating a systems pharmacology platform of ITM that encompasses the relationships between the drugs, their targets, and diseases. UNaProd is hyperlinked to IrGO and CMAUP databases for Mizaj and molecular features, respectively, and it is freely available at http://jafarilab.com/unaprod/.",2020-05-13 +33764612,Systematic investigation of PRMT6 substrate recognition reveals broad specificity with a preference for an RG motif or basic and bulky residues.,"Protein arginine methyltransferase 6 (PRMT6) catalyses the asymmetric dimethylation of arginines on numerous substrate proteins within the human cell. In particular, PRMT6 methylates histone H3 arginine 2 (H3R2) which affects both gene repression and activation. However, the substrate specificity of PRMT6 has not been comprehensively analysed. Here, we systematically characterise the substrate recognition motif of PRMT6, finding that it has broad specificity and recognises the RG motif. Working with a H3 tail peptide as a template, on which we made 204 amino acid substitutions, we use targeted mass spectrometry to measure their effect on PRMT6 in vitro activity. We first show that PRMT6 methylates R2 and R8 in the H3 peptide, although H3R8 is methylated with lower efficiency and is not an in vivo PRMT6 substrate. We then quantify the effect of 194 of these amino acid substitutions on methylation at both H3R2 and H3R8. In both cases, we find that PRMT6 tolerates essentially any amino acid substitution in the H3 peptide, but that positively charged and bulky residues are preferred near the target arginine. We show that PRMT6 also has preference for glycine, but only in the position immediately following the target arginine. This indicates that PRMT6 recognises the RG motif rather than the RGG motif. We further confirm this preference for the RG motif on another PRMT6 substrate, histone H4R3. This broad specificity and recognition of RG rather than RGG are distinctive among the PRMT family and has implications for the development of drugs to selectively target PRMT6. DATABASES: Panorama Public (https://panoramaweb.org/PRMT6motif.url); ProteomeXchange (PXD016711).",2021-04-14 +33887543,SLC6A3 as a potential circulating biomarker for gastric cancer detection and progression monitoring.,"

Background

Gastric cancer is a malignant tumor originating from the gastric mucosal epithelium, with no obvious symptoms at the early stage. The dopamine transporter gene (SLC6A3) is involved in the metabolism of dopamine and catecholamine and is a potential gene for Parkinson's disease and alcoholism. But the role of SLC6A3 in gastric cancer is still unknown. The aim of our study is to investigate the potential diagnostic value of SLC6A3 on gastric cancer.

Methods

Quantitative real-time PCR (RT-qPCR) was used to detect the expression of SLC6A3 in clinical samples and cells. A total of 246 samples were enrolled in this study (26 pairs of tissue samples; Serum of 113 patients with gastric cancer, 51 polyps patients and 56 healthy controls). The diagnostic value of SLC6A3 was evaluated by the ROC curve and analyzed the changes of SLC6A3 expression before and after surgery. The prognostic value, interacting proteins and related pathways of SLC6A3 were evaluated by TCGA analysis in UALCAN database (http://ualcan.path.uab.edu/).

Results

The expression level of SLC6A3 in gastric cancer was significantly higher than that in controls. Further, the proportion under the ROC curve (AUC) for SLC6A3, CEA and CA19-9 was 0.818 (95 % confidence interval [CI]: 0.754 to 0.883, P < 0.001), and the expression level of SLC6A3 in the serum of patients with gastric cancer decreased significantly after surgery (P < 0.001). Bioinformatic enrichment analysis of SLC6A3 displayed the relevant metabolic pathways involved in its interacting proteins.

Conclusion

SLC6A3 is involved in the occurrence and development of gastric cancer and can be used as a potential diagnostic indicator for gastric cancer.",2021-04-14 +33237325,CATH: increased structural coverage of functional space.,"CATH (https://www.cathdb.info) identifies domains in protein structures from wwPDB and classifies these into evolutionary superfamilies, thereby providing structural and functional annotations. There are two levels: CATH-B, a daily snapshot of the latest domain structures and superfamily assignments, and CATH+, with additional derived data, such as predicted sequence domains, and functionally coherent sequence subsets (Functional Families or FunFams). The latest CATH+ release, version 4.3, significantly increases coverage of structural and sequence data, with an addition of 65,351 fully-classified domains structures (+15%), providing 500 238 structural domains, and 151 million predicted sequence domains (+59%) assigned to 5481 superfamilies. The FunFam generation pipeline has been re-engineered to cope with the increased influx of data. Three times more sequences are captured in FunFams, with a concomitant increase in functional purity, information content and structural coverage. FunFam expansion increases the structural annotations provided for experimental GO terms (+59%). We also present CATH-FunVar web-pages displaying variations in protein sequences and their proximity to known or predicted functional sites. We present two case studies (1) putative cancer drivers and (2) SARS-CoV-2 proteins. Finally, we have improved links to and from CATH including SCOP, InterPro, Aquaria and 2DProt.",2021-01-01 +34427156,Frailty Impact during and after Pulmonary Rehabilitation.,"Frailty is a condition of reduced physiologic reserve common in COPD candidates to pulmonary rehabilitation, however no study has investigated whether frailty impacts the decline that a great part of COPD patients face after the completion of the rehabilitation program. Study objectives are to verify frailty impact on pulmonary rehabilitation outcomes during and after the program. This is a secondary analysis of a longitudinal study. Stable COPD patients GOLD I-III were randomized to a three-month endurance versus endurance and resistance training. Participants performed a multidimensional assessment at baseline, at the end of the rehabilitation program and after six months. Frailty was defined using a two-step approach including PRISMA-7 and Timed ""Up and Go"" test. Frailty interaction with time was evaluated using generalized least-squared regression models for repeated measures, correcting for potential confounders. Of the 53 participants with a mean age of 73 (SD:8) years 38 (72%) were frail. The mean 6MWD and V'O2peak increased in frail and no frail patients during pulmonary rehabilitation and declined after its completion, while CAT score showed a steep decline during the training, and a mild decline later. Frailty showed a significant interaction with time in terms of 6MWD variation during (β adj:43.6 meters, p-value:0.01) and after (β adj:-47 meters, p-value:0.02) pulmonary rehabilitation; no significant interaction was found in terms of V'O2peak and CAT score variation. In conclusion, frail COPD patients have a higher potential to benefit from pulmonary rehabilitation, but a higher risk to have a steeper decline later.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.1967915 .",2021-08-24 +33835461,WHISTLE: A Functionally Annotated High-Accuracy Map of Human m6A Epitranscriptome.,"N6-Methyladenosine (m6A) is the most prevalent posttranscriptional modification in eukaryotes and plays a pivotal role in various biological processes, such as splicing, RNA degradation, and RNA-protein interaction. Accurately identification of the location of m6A is essential for related downstream studies. In this chapter, we introduce a prediction framework WHISTLE, which enables us to acquire so far the most accurate map of the transcriptome-wide human m6A RNA-methylation sites (with an average AUC: 0.948 and 0.880 under the full transcript or mature messenger RNA models, respectively, when tested on independent datasets). Besides, each individual m6A site was also functionally annotated according to the ""guilt-by-association"" principle by integrating RNA methylation data, gene expression data and protein-protein interaction data. A web server was constructed for conveniently querying the predicted RNA methylation sites and their putative biological functions. The website supports the query by genes, by GO function, table view, and the download of all the functionally annotated map of predicted map of human m6A epitranscriptome. The WHISTLE web server is freely available at: www.xjtlu.edu.cn/biologicalsciences/whistle and http://whistle-epitranscriptome.com .",2021-01-01 +34426108,The association between the incidence risk of pneumonitis and PD-1/PD-L1 inhibitors in advanced NSCLC: A meta-analysis of randomized controlled trials.,"

Objective

Immune checkpoint inhibitors (ICIs) have shown a significant efficacy for patients with non-small cell lung cancer (NSCLC). However, checkpoint inhibitor pneumonitis (CIP) is a rare but severe and life-threatening adverse event. Hence, we performed a systematic review and meta-analysis to evaluate the incidence and risk of CIP in patients with NSCLC.

Methods

Pubmed, Embase, Cochrane Library and ClinicalTrials.gov (http://clinicaltrials.gov/) were searched up to December 15, 2020. Studies regarding all-grade and high-grade pneumonitis were included. The data was analyzed using meta-packages of R 3.6.0.

Results

A total of sixteen randomized controlled trials including 9500 patients were identified for further evaluation. The overall incidence of all-grade and high-grade CIP was 4.17% and 2.02%, respectively. Compared with conventional chemotherapy, patients treated with ICIs significantly increased risk of all-grade (RR: 4.11, p < 0.0001) and high-grade (RR: 3.16, p < 0.0001) pneumonitis. Subgroup analysis showed the ICIs combined with chemotherapy was associated with a higher incidence of CIP than monotherapy alone (6.03% vs 3.32%, p = 0.01). And the rate of death owing to CIP was higher than chemotherapy-mediated pneumonitis.

Conclusion

There were a higher incidence and risk of pneumonitis with the application of ICIs when compared with chemotherapy. Higher mortality rate of pneumonitis was more frequent in ICIs group. Thus, early detection, proper administration and optimal management are needed for physicians prevent potentially CIP deterioration.",2021-08-10 +32228437,iMarmot: an integrative platform for comparative and functional genomics of marmots.,"

Background

Marmots are large Holarctic rodents with unique biological features, making them potential animal models in various research fields. Due to the rapid accumulation of the genetic data in marmots, a highly integrative database is urgent needed.

Description

iMarmot is freely available on the web at http://www.marmotdb.org/ and currently contains the biological information of 14 marmots, genomic sequence of 6 marmots, syntenic relationship and orthologs among 3 marmots, and expression profiles of several hibernators and plague hosts. To assist with the genomic and transcriptomic analysis, we also integrated a set of analysis and visualization tools, such as KEGG or GO enrichment analysis, PCA, Blast, Muscle, GeneWise, Lastz, and JBrowse. Particularly, one DEGs (differentially expressed genes) module has been implemented in this database to visualize the gene expression changes in hibernators and plague hosts.

Conclusion

This database will provide comprehensive information and analysis platform for researchers interested in understanding the biological features of marmots.",2020-03-30 +29873704,A comparative synteny analysis tool for target-gene SNP marker discovery: connecting genomics data to breeding in Solanaceae. ,"It is necessary for molecular breeders to overcome the difficulties in applying abundant genomic information to crop breeding. Candidate orthologs would be discovered more efficiently in less-studied crops if the information gained from studies of related crops were used. We developed a comparative analysis tool and web-based genome viewer to identify orthologous genes based synteny as well as sequence similarity between tomato, pepper and potato. The tool has a step-by-step interface with multiple viewing levels to support the easy and accurate exploration of functional orthologs. Furthermore, it provides access to single nucleotide-polymorphism markers from the massive genetic resource pool in order to accelerate the development of molecular markers for candidate orthologs in the Solanaceae. This tool provides a bridge between genome data and breeding by supporting effective marker development, data utilization and communication.Database URL: http://tgsol.seeders.co.kr/scomp/.",2018-01-01 +34236662,GO Enrichment Analysis for Differential Proteomics Using ProteoRE.,"With the increased simplicity of producing proteomics data, the bottleneck has now shifted to the functional analysis of large lists of proteins to translate this primary level of information into meaningful biological knowledge. Tools implementing such approach are a powerful way to gain biological insights related to their samples, provided that biologists/clinicians have access to computational solutions even when they have little programming experience or bioinformatics support. To achieve this goal, we designed ProteoRE (Proteomics Research Environment), a unified online research service that provides end-users with a set of tools to interpret their proteomics data in a collaborative and reproducible manner. ProteoRE is built upon the Galaxy framework, a workflow system allowing for data and analysis persistence, and providing user interfaces to facilitate the interaction with tools dedicated to the functional and the visual analysis of proteomics datasets. A set of tools relying on computational methods selected for their complementarity in terms of functional analysis was developed and made accessible via the ProteoRE web portal. In this chapter, a step-by-step protocol linking these tools is designed to perform a functional annotation and GO-based enrichment analyses applied to a set of differentially expressed proteins as a use case. Analytical practices, guidelines as well as tips related to this strategy are also provided. Tools, datasets, and results are freely available at http://www.proteore.org , allowing researchers to reuse them.",2021-01-01 +28361715,SheddomeDB: the ectodomain shedding database for membrane-bound shed markers.,"

Background

A number of membrane-anchored proteins are known to be released from cell surface via ectodomain shedding. The cleavage and release of membrane proteins has been shown to modulate various cellular processes and disease pathologies. Numerous studies revealed that cell membrane molecules of diverse functional groups are subjected to proteolytic cleavage, and the released soluble form of proteins may modulate various signaling processes. Therefore, in addition to the secreted protein markers that undergo secretion through the secretory pathway, the shed membrane proteins may comprise an additional resource of noninvasive and accessible biomarkers. In this context, identifying the membrane-bound proteins that will be shed has become important in the discovery of clinically noninvasive biomarkers. Nevertheless, a data repository for biological and clinical researchers to review the shedding information, which is experimentally validated, for membrane-bound protein shed markers is still lacking.

Results

In this study, the database SheddomeDB was developed to integrate publicly available data of the shed membrane proteins. A comprehensive literature survey was performed to collect the membrane proteins that were verified to be cleaved or released in the supernatant by immunological-based validation experiments. From 436 studies on shedding, 401 validated shed membrane proteins were included, among which 199 shed membrane proteins have not been annotated or validated yet by existing cleavage databases. SheddomeDB attempted to provide a comprehensive shedding report, including the regulation of shedding machinery and the related function or diseases involved in the shedding events. In addition, our published tool ShedP was embedded into SheddomeDB to support researchers for predicting the shedding event on unknown or unrecorded membrane proteins.

Conclusions

To the best of our knowledge, SheddomeDB is the first database for the identification of experimentally validated shed membrane proteins and currently may provide the most number of membrane proteins for reviewing the shedding information. The database included membrane-bound shed markers associated with numerous cellular processes and diseases, and some of these markers are potential novel markers because they are not annotated or validated yet in other databases. SheddomeDB may provide a useful resource for discovering membrane-bound shed markers. The interactive web of SheddomeDB is publicly available at http://bal.ym.edu.tw/SheddomeDB/ .",2017-03-14 +33848195,What Influences Speech-Language Pathologists' Use of Different Types of Language Assessments for Elementary School-Age Children?,"Purpose This study reports on data from a survey of speech-language pathologists' (SLPs) language assessment practices for elementary school-age children. The objective was to investigate the regularity with which SLPs use different types of assessments (described across data types, task types, environmental contexts, and dynamic features). This study also investigated factors that influence assessment practice, the main sources from which SLPs obtain information on language assessment and the main challenges reported by SLPs in relation to language assessment. Method A web-based survey was used to collect information from 407 Australian SLPs regarding the types of assessments they use. Factors that influenced the regularity with which different types of assessments were used were investigated using regression analysis. Results Most SLPs regularly used assessments that are norm-referenced, decontextualized, and conducted in a clinical context and less regularly used other types of assessments. Service agency, Australian state, and SLPs' years of experience were found to influence the regularity with which some types of assessments were used. Informal discussions with colleagues were the most frequently identified source of information on assessment practice. Main challenges related to limited time, lack of assessment materials, and lack of confidence in assessing children from culturally and linguistically diverse backgrounds. Conclusions SLPs could improve current language assessment practice for elementary school-age children through more regular use of some types of assessments. Actions to facilitate evidence-based assessment practice should consider the contextual differences that exist between service agencies and states and address challenges that SLPs experience in relation to language assessment. Supplemental Material https://doi.org/10.23641/asha.14378948.",2021-04-13 +31680157,ChimerDB 4.0: an updated and expanded database of fusion genes.,"Fusion genes represent an important class of biomarkers and therapeutic targets in cancer. ChimerDB is a comprehensive database of fusion genes encompassing analysis of deep sequencing data (ChimerSeq) and text mining of publications (ChimerPub) with extensive manual annotations (ChimerKB). In this update, we present all three modules substantially enhanced by incorporating the recent flood of deep sequencing data and related publications. ChimerSeq now covers all 10 565 patients in the TCGA project, with compilation of computational results from two reliable programs of STAR-Fusion and FusionScan with several public resources. In sum, ChimerSeq includes 65 945 fusion candidates, 21 106 of which were predicted by multiple programs (ChimerSeq-Plus). ChimerPub has been upgraded by applying a deep learning method for text mining followed by extensive manual curation, which yielded 1257 fusion genes including 777 cases with experimental supports (ChimerPub-Plus). ChimerKB includes 1597 fusion genes with publication support, experimental evidences and breakpoint information. Importantly, we implemented several new features to aid estimation of functional significance, including the fusion structure viewer with domain information, gene expression plot of fusion positive versus negative patients and a STRING network viewer. The user interface also was greatly enhanced by applying responsive web design. ChimerDB 4.0 is available at http://www.kobic.re.kr/chimerdb/.",2020-01-01 +34420484,Genetic association study of CTLA4 and FCεRIα polymorphisms in asthmatic patients in the southwestern region of Iran.,"Asthma is a heterogeneous chronic pulmonary disease that develops due to the interaction of genetic and environmental factors. This study aimed to investigate the polymorphisms of CTLA4(SNP-318C > T, SNP + 49A > G) and FCεRIα(SNP-344T > C) genes in asthmatic patients in Southwest Iran. The study enrolled 200 patients with asthma of Arab and Bakhtiary descent and 200 healthy controls, where asthmatic patients and healthy controls were selected based on a spirometry test. Genomic DNA from whole blood samples using the TaqMan assay was used to study the genotypes of patients and healthy controls.The results indicated no statistically significant difference between cases and controls for the SNP-344C > T of the FCεR1α gene and the SNP + 49A > G, SNP-318C > T of the CTLA4 gene. There was a significant correlation between the CTLA4-318C > T allele frequency in both the case and control groups (OR = 1.83; 95%CI, 1.14-2.94; P = 0.01). We stratified genotypes according to age, gender, ethnicity, and smoking status and discovered a significant suggestive association between the SNP + 49A > G of the CTLA4 gene and smoking. Additionally, SNP + 49A > G was found to be associated with gender and age. The results indicated that the SNP-318C > T polymorphism in the CTLA4 gene might contribute to the development of asthma in the studied population. Meanwhile, smoking can exacerbate asthma in individuals with SNP + 49A > G of the CTLA4 gene.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1964525 .",2021-08-23 +33535061,ES-ARCNN: Predicting enhancer strength by using data augmentation and residual convolutional neural network.,"Enhancers are non-coding DNA sequences bound by proteins called transcription factors. They function as distant regulators of gene transcription and participate in the development and maintenance of cell types and tissues. Since experimental validation of enhancers is expensive and time-consuming, many computational methods have been developed to predict enhancers and their strength. However, most of these methods still lack good performance in the prediction of enhancer strength. Here, we present a method to predict Enhancers Strength (i.e., strong and weak) by using Augmented data and Residual Convolutional Neural Network (ES-ARCNN). To train ES-ARCNN, we used two data augmentation tricks (i.e., reverse complement and shift) to previously identified enhancers for enlarging a previously identified dataset of enhancers. We further employed a residual convolutional neural network and trained it using the augmented dataset. Compared with other state-of-the-art methods in the 10-fold cross-validation (CV) test, ES-ARCNN has the best performance with the accuracy of 66.17%, and the tricks of data augmentation can effectively improve the prediction performance. We further tested ES-ARCNN on an independent dataset and obtained 65.5% accuracy, which has more than 4% improvement over the other three existing methods. The results in 10CV and independent tests show that ES-ARCNN can effectively predict the enhancer strength. The transcription factor binding sites (TFBSs) enrichment analysis shows that from the mechanistic perspective, enhancer strength is associated with a higher density of important TFBSs in a tissue. A user-friendly web-application is also provided at http://compgenomics.utsa.edu/ES-ARCNN/.",2021-01-31 +31701126,PlantRegMap: charting functional regulatory maps in plants.,"With the goal of charting plant transcriptional regulatory maps (i.e. transcription factors (TFs), cis-elements and interactions between them), we have upgraded the TF-centred database PlantTFDB (http://planttfdb.cbi.pku.edu.cn/) to a plant regulatory data and analysis platform PlantRegMap (http://plantregmap.cbi.pku.edu.cn/) over the past three years. In this version, we updated the annotations for the previously collected TFs and set up a new section, 'extended TF repertoires' (TFext), to allow users prompt access to the TF repertoires of newly sequenced species. In addition to our regular TF updates, we are dedicated to updating the data on cis-elements and functional interactions between TFs and cis-elements. We established genome-wide conservation landscapes for 63 representative plants and then developed an algorithm, FunTFBS, to screen for functional regulatory elements and interactions by coupling the base-varied binding affinities of TFs with the evolutionary footprints on their binding sites. Using the FunTFBS algorithm and the conservation landscapes, we further identified over 20 million functional TF binding sites (TFBSs) and two million functional interactions for 21 346 TFs, charting the functional regulatory maps of these 63 plants. These resources are publicly available at PlantRegMap (http://plantregmap.cbi.pku.edu.cn/) and a cloud-based mirror (http://plantregmap.gao-lab.org/), providing the plant research community with valuable resources for decoding plant transcriptional regulatory systems.",2020-01-01 +,First Report of ‘Candidatus Phytoplasma asteris’ Associated with Witches’ Brooms on Sharp-Flowered Rush (Juncus acutiflorus) in Poland,"Juncus acutiflorus L. (Juncaceae) is a species of rush widespread in Europe, North Africa, and Southwest Asia. The health of sharp-flowered rush is threatened by witches’ brooms occurrence. Typical symptoms are dense masses of shoots, usually growing from a single point, with the resulting structure resembling a broom. Witches’ brooms are typically formed in response to Livia junci (Liviinae, Hemiptera) feeding, oligofag, which typically feeds on Juncus sp. These symptoms are similar to those observed in Juncus articulatus L. (Juncaceae) related to phytoplasma infection in Silesia, Poland (Jarzembowski et al. 2015). In August 2017, samples were collected from nine symptomatic and four asymptomatic plants growing in Jagniątków, Poland. DNA from 100 mg of inflorescence and leaf samples was extracted using a DNeasy Plant Mini Kit (Qiagen, Syngen Biotech, Wrocław, Poland) according to the manufacturer’s protocol. Additionally, 10 L. junci specimens (adult and the last larva stage) were collected from symptomatic plants and preserved in ethanol (75%). DNA from the insects (treated as one sample) was extracted using a DNeasy Blood and Tissue Kit (Qiagen, Syngen Biotech). The extracts were used as the template in polymerase chain reaction (PCR) assays with the universal phytoplasma primers P1/P7 followed by R16F2/R16R2 (Lee et al. 1998). Additionally, primers rp1/rp2 followed by rp3/rp4, allowing amplification of fragments of rpl22 and rps3 genes (Nakamura et al. 1996), and primers AYsecYF1/AYsec for amplification of secY gene were applied. Water blank samples were included as negative controls. PCR products of expected size for each primer set were amplified from the insects and six symptomatic plants that were assayed. No amplification was observed in symptomless J. acutiflorus samples or in water blanks. Amplicons representing three genetic loci were sequenced at Genomed S.A., Warsaw, Poland. DNA samples were sequenced at least twice in both directions. Sequencing results of the PCR products confirmed that six symptomatic plants and the insects were infected by a phytoplasma. The obtained sequences were nearly identical, and representative sequences of 16S rDNA fragments (accession nos. MG976242 to MG976246), secY gene (MG983765 to MG983769), and ribosomal protein gene (MG983760 to MG983764) from four plants and one insect sample were deposited in GenBank. Determination of the phytoplasma classification group was based on the nucleotide sequence of the F2n/R2 PCR fragment within the 16S gene. Using iPhyClassifier, the online tool for phytoplasma classification (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier_legacy.cgi), the sequence of J. acutiflorus phytoplasma shared 99.9% identity with that of the ‘Candidatus Phytoplasma asteris’ reference strain (GenBank accession no. M30790). BLAST analysis performed for ribosomal proteins and secY genes confirmed the highest identity (99%) of analyzed sequences with those of ‘Ca. P. asteris’ (CP015149 and KJ394491, respectively). To our knowledge, this is the first report of a phytoplasma of group 16SrI affecting J. acutiflorus in Poland and in the world. The impact of the detected phytoplasma in the regional ecosystem is being assessed. It is highly probable that L. junci is a possible vector for ‘Ca. P. asteris’.",2018-10-01 +33988713,DoChaP: the domain change presenter.,"Alternative splicing results in multiple transcripts of the same gene, possibly encoding for different protein isoforms with different domains. Whereas it is possible to manually determine the effect of alternative splicing on the domain composition for a single event, the process requires the tedious integration of several data sources; it is error prone and not feasible for genome-wide characterization of domains affected by differential splicing. To fulfill the need for an automated solution, we developed the Domain Change Presenter (DoChaP, https://dochap.bgu.ac.il/), a web server for the visualization of exon-domain associations. DoChaP visualizes all transcripts of a given gene, the encoded proteins and their domains, and enables a comparison between the transcripts and between their protein products. The colors and organization make the structural effect of alternative splicing events on protein structures easily identified. To enable the study of the conservation of exons structure, alternative splicing, and the effect of alternative splicing on protein domains, DoChaP also provides a two-species comparison of exon-domain associations. DoChaP thus provides a unique and easy-to-use visualization of the exon-domain association and conservation, and will facilitate the study of the structural effects of alternative splicing in health and disease.",2021-07-01 +29158627,Development of transcriptome based web genomic resources of yellow mosaic disease in Vigna mungo.,"Vigna mungo (Urdbean) is cultivated in the tropical and sub-tropical continental region of Asia. It is not only important source of dietary protein and nutritional elements, but also of immense value to human health due to medicinal properties. Yellow mosaic disease caused by Mungbean Yellow Mosaic India Virus is known to incur huge loss to crop, adversely affecting crop yield. Contrasting genotypes are ideal source for knowledge discovery of plant defence mechanism and associated candidate genes for varietal improvement. Whole genome sequence of this crop is yet to be completed. Moreover, genomic resources are also not freely accessible, thus available transcriptome data can be of immense use. V. mungo Transcriptome database, accessible at http://webtom.cabgrid.res.in/vmtdb/ has been developed using available data of two contrasting varieties viz., cv. VM84 (resistant) and cv. T9 (susceptible). De novo assembly was carried out using Trinity and CAP3. Out of total 240,945 unigenes, 165,894 (68.8%) showed similarity with known genes against NR database, and remaining 31.2% were found to be novel. We found 22,101 differentially expressed genes in all datasets, 44,335 putative genic SSR markers, 4105 SNPs and Indels, 64,964 transcriptional factor, 546 mature miRNA target prediction in 703 differentially expressed unigenes and 137 pathways. MAPK, salicylic acid-binding protein 2-like, pathogenesis-related protein and NBS-LRR domain were found which may play an important role in defence against pathogens. This is the first web genomic resource of V. mungo for future genome annotation as well as ready to use markers for future variety improvement program.",2017-09-18 +33893808,MutationTaster2021.,"Here we present an update to MutationTaster, our DNA variant effect prediction tool. The new version uses a different prediction model and attains higher accuracy than its predecessor, especially for rare benign variants. In addition, we have integrated many sources of data that only became available after the last release (such as gnomAD and ExAC pLI scores) and changed the splice site prediction model. To more easily assess the relevance of detected known disease mutations to the clinical phenotype of the patient, MutationTaster now provides information on the diseases they cause. Further changes represent a major overhaul of the interfaces to increase user-friendliness whilst many changes under the hood have been designed to accelerate the processing of uploaded VCF files. We also offer an API for the rapid automated query of smaller numbers of variants from within other software. MutationTaster2021 integrates our disease mutation search engine, MutationDistiller, to prioritise variants from VCF files using the patient's clinical phenotype. The novel version is available at https://www.genecascade.org/MutationTaster2021/. This website is free and open to all users and there is no login requirement.",2021-07-01 +27899569,MEGARes: an antimicrobial resistance database for high throughput sequencing.,"Antimicrobial resistance has become an imminent concern for public health. As methods for detection and characterization of antimicrobial resistance move from targeted culture and polymerase chain reaction to high throughput metagenomics, appropriate resources for the analysis of large-scale data are required. Currently, antimicrobial resistance databases are tailored to smaller-scale, functional profiling of genes using highly descriptive annotations. Such characteristics do not facilitate the analysis of large-scale, ecological sequence datasets such as those produced with the use of metagenomics for surveillance. In order to overcome these limitations, we present MEGARes (https://megares.meglab.org), a hand-curated antimicrobial resistance database and annotation structure that provides a foundation for the development of high throughput acyclical classifiers and hierarchical statistical analysis of big data. MEGARes can be browsed as a stand-alone resource through the website or can be easily integrated into sequence analysis pipelines through download. Also via the website, we provide documentation for AmrPlusPlus, a user-friendly Galaxy pipeline for the analysis of high throughput sequencing data that is pre-packaged for use with the MEGARes database.",2016-11-28 +30143675,ESCC ATLAS: A population wide compendium of biomarkers for Esophageal Squamous Cell Carcinoma.,"Esophageal cancer (EC) is the eighth most aggressive malignancy and its treatment remains a challenge due to the lack of biomarkers that can facilitate early detection. EC is identified in two major histological forms namely - Adenocarcinoma (EAC) and Squamous cell carcinoma (ESCC), each showing differences in the incidence among populations that are geographically separated. Hence the detection of potential drug target and biomarkers demands a population-centric understanding of the molecular and cellular mechanisms of EC. To provide an adequate impetus to the biomarker discovery for ESCC, which is the most prevalent esophageal cancer worldwide, here we have developed ESCC ATLAS, a manually curated database that integrates genetic, epigenetic, transcriptomic, and proteomic ESCC-related genes from the published literature. It consists of 3475 genes associated to molecular signatures such as, altered transcription (2600), altered translation (560), contain copy number variation/structural variations (233), SNPs (102), altered DNA methylation (82), Histone modifications (16) and miRNA based regulation (261). We provide a user-friendly web interface ( http://www.esccatlas.org , freely accessible for academic, non-profit users) that facilitates the exploration and the analysis of genes among different populations. We anticipate it to be a valuable resource for the population specific investigation and biomarker discovery for ESCC.",2018-08-24 +33846395,Thyroid hormone receptor α1 acts as a new squamous cell lung cancer diagnostic marker and poor prognosis predictor.,"Lung cancer is considered the major cause of cancer-related deaths worldwide. Unfortunately, all chemotherapy regimens used in lung cancer treatment showed nearly the same efficacy. Finding a new therapeutic target that can be used as an alternative after the failure of or in association with chemotherapy to improve the prognosis is an urgent demand. Up to date, it is Known that thyroid hormones (THs) and Thyroid hormone receptors (THRs) control the progression of several types of tumours. Nevertheless, their role in non-small cell lung cancer (NSCLC) is unknown. This study investigated the expression of THRα1 in NSCLC cases and its correlation to tumour clinicopathological parameters to shed new light on the relevance of THRα1 in lung cancer. Immunohistochemistry utilizing THRα1 antibody was performed on tissue sections obtained from 80 patients diagnosed with NSCLC. We also investigated the expression of THRα gene in Microarrays of lung squamous cell carcinoma (SCC) and adenocarcinoma (AC) patients by using GEO data sets on https://www.ncbi.nlm.nih.gov . We showed, for the first time, the expression of THRα1 in NSCLC. Intermediate and high THRα1 expressions were detected in (25% and 66.7%) of SCC cases respectively. High THRα1 expression was associated with shorter OS. On the other hand, 86.7% of AC cases revealed low THRα1 expression. Inflammatory cells in SCC cases showed high THRα1 expression. By analysing GEO data sets, a significant increase in THRα gene expression was found in SCC compared to AC cases. Our study underscores the possibility of using THRα1 expression not only as a prognostic marker, but also as an innovative diagnostic additive tool for lung SCC, which could be tested as a potential therapeutic target for SCC in the future.",2021-04-12 +34602293,The effect of pH on the growth rate of Bacillus cereus sensu lato: Quantifying strain variability and modelling the combined effects of temperature and pH.,"In this study, the effect of pH, alone or in combination with temperature, on the maximum growth rate (μmax) of B. cereus sensu lato was investigated. In phase 1, the effect of pH at 30 °C was studied for 16 mesophilic strains and 2 psychrotrophic strains of Bacillus cereus sensu lato. The μmax vs. pH relationship was found to show a similar pattern for all the strains. Several pH models from literature were evaluated and the best performing 'growth rate vs. pH' model selected. A stochastic model was then developed to predict the maximum specific growth rate of mesophilic B. cereus at 30 °C as a function of pH, the intra-species variability being incorporated via considering the model parameters (e.g. pHmin) randomly distributed. The predicted maximum specific growth rates were acceptably close to independent published data. In phase 2, the combined effects of temperature and pH were studied. Growth rates were also generated at 15, 20 and 40 °C for a selection of strains and the pH model was fitted at each temperature. Interestingly, the results showed that the estimates for the pHmin parameter for mesophilic strains were lower at 20-30 °C than near the optimum temperature (40 °C), suggesting that experiments for the determination of this parameter should be conducted at lower-than-optimum temperatures. New equations were proposed for the relationship between temperature and the minimum pH-values, which were also consistent with the experimental growth boundaries. The parameters defining this equation quantify the minimum temperature for growth observed experimentally, the temperature of maximum enzyme stability and the maximum temperature for growth. Deviations from the Gamma hypothesis (multiplicative effects of environmental factors on the maximum specific growth rate) were observed near the growth limits, especially at 40 °C. To improve model performance, two approaches, one based on a minimum pH-term (doi: https://doi.org/10.3389/fmicb.2019.01510) and one based on an interaction term (doi: http://dx.doi.org/10.1016/S0168-1605(01)00640-7) were evaluated.",2021-09-24 +34270397,Assessing motivations and gender as factors in college students' views of nonmedical prescription stimulant use.,"Introduction: Two prominent motivation categories of college student nonmedical prescription stimulant use (NMUPS) are for academic and recreational purposes. However, little research focuses on these motivations' association with college students' NMUPS views. Further, limited research assesses if user gender influences views. Methods: The current online scenario study implemented a 2 × 2 factorial design assessing 148 college undergraduates' (75% females; Mage = 19.18; SDage = 1.30) NMUPS views based on user motivation and gender. Participants reported their drug use stigmatization, prescription stimulant expectancies (ie, anticipated drug use beliefs and outcomes), and personal substance use. Results and Discussion: Results showed that user gender did not influence participants' NMUPS views; however, participants viewed academic use less negatively compared to recreational use, thus highlighting the need to educate students on the negative consequences of NMUPS, even when use is for academically related tasks. Furthermore, exploratory analyses showed drug use stigmatization and prescription stimulant expectancies predicted participant NMUPS views.Supplemental data for this article can be accessed online at https://doi.org/10.1080/07448481.2021.1942005.",2021-07-16 +30625451,The 'virtual DBS population': five realistic computational models of deep brain stimulation patients for electromagnetic MR safety studies.,"We design, develop, and disseminate a 'virtual population' of five realistic computational models of deep brain stimulation (DBS) patients for electromagnetic (EM) analysis. We found five DBS patients in our institution' research patient database who received high quality post-DBS surgery computer tomography (CT) examinations of the head and neck. Three patients have a single implanted pulse generator (IPG) and the two others have two IPGs (one for each lead). Moreover, one patient has two abandoned leads on each side of the head. For each patient, we combined the head and neck volumes into a 'virtual CT', from which we extracted the full-length DBS path including the IPG, extension cables, and leads. We corrected topology errors in this path, such as self-intersections, using a previously published optimization procedure. We segmented the virtual CT volume into bones, internal air, and soft tissue classes and created two-manifold, watertight surface meshes of these distributions. In addition, we added a segmented model of the brain (grey matter, white matter, eyes and cerebrospinal fluid) to one of the model (nickname Freddie) that was derived from a T1-weighted MR image obtained prior to the DBS implantation. We simulated the EM fields and specific absorption rate (SAR) induced at 3 Tesla by a quadrature birdcage body coil in each of the five patient models using a co-simulation strategy. We found that inter-subject peak SAR variability across models was independent of the target averaging mass and equal to ~45%. In our simulations of the full brain segmentation and six simplified versions of the Freddie model, the error associated with incorrect dielectric property assignment around the DBS electrodes was greater than the error associated with modeling the whole model as a single tissue class. Our DBS patient models are freely available on our lab website (Webpage of the Martinos Center Phantom Resource 2018 https://phantoms.martinos.org/Main_Page).",2019-02-04 +34558969,Exposure to Air Pollution in Relation to Risk of Dementia and Related Outcomes: An Updated Systematic Review of the Epidemiological Literature.,"

Background

Dementia is a devastating neurologic condition that is common in older adults. We previously reviewed the epidemiological evidence examining the hypothesis that long-term exposure to air pollution affects dementia risk. Since then, the evidence base has expanded rapidly.

Objectives

With this update, we collectively review new and previously identified epidemiological studies on air pollution and late-life cognitive health, highlighting new developments and critically discussing the merits of the evidence.

Methods

Using a registered protocol (PROSPERO 2020 CRD42020152943), we updated our literature review to capture studies published through 31 December 2020, extracted data, and conducted a bias assessment.

Results

We identified 66 papers (49 new) for inclusion in this review. Cognitive level remained the most commonly considered outcome, and particulate matter (PM) remained the most commonly considered air pollutant. Since our prior review, exposure estimation methods in this research have improved, and more papers have looked at cognitive change, neuroimaging, and incident cognitive impairment/dementia, though methodological concerns remain common. Many studies continue to rely on administrative records to ascertain dementia, have high potential for selection bias, and adjust for putative mediating factors in primary models. A subset of 35 studies met strict quality criteria. Although high-quality studies of fine particulate matter with aerodynamic diameter ≤2.5μm (PM2.5) and cognitive decline generally supported an adverse association, other findings related to PM2.5 and findings related to particulate matter with aerodynamic diameter ≤10μm (PM10, NO2, and NOx) were inconclusive, and too few papers reported findings with ozone to comment on the likely direction of association. Notably, only a few findings on dementia were included for consideration on the basis of quality criteria.

Discussion

Strong conclusions remain elusive, although the weight of the evidence suggests an adverse association between PM2.5 and cognitive decline. However, we note a continued need to confront methodological challenges in this line of research. https://doi.org/10.1289/EHP8716.",2021-09-24 +34208327,"Deep-Framework: A Distributed, Scalable, and Edge-Oriented Framework for Real-Time Analysis of Video Streams. ","Edge computing is the best approach for meeting the exponential demand and the real-time requirements of many video analytics applications. Since most of the recent advances regarding the extraction of information from images and video rely on computation heavy deep learning algorithms, there is a growing need for solutions that allow the deployment and use of new models on scalable and flexible edge architectures. In this work, we present Deep-Framework, a novel open source framework for developing edge-oriented real-time video analytics applications based on deep learning. Deep-Framework has a scalable multi-stream architecture based on Docker and abstracts away from the user the complexity of cluster configuration, orchestration of services, and GPU resources allocation. It provides Python interfaces for integrating deep learning models developed with the most popular frameworks and also provides high-level APIs based on standard HTTP and WebRTC interfaces for consuming the extracted video data on clients running on browsers or any other web-based platform.",2021-06-11 +27189610,"BioSharing: curated and crowd-sourced metadata standards, databases and data policies in the life sciences. ","BioSharing (http://www.biosharing.org) is a manually curated, searchable portal of three linked registries. These resources cover standards (terminologies, formats and models, and reporting guidelines), databases, and data policies in the life sciences, broadly encompassing the biological, environmental and biomedical sciences. Launched in 2011 and built by the same core team as the successful MIBBI portal, BioSharing harnesses community curation to collate and cross-reference resources across the life sciences from around the world. BioSharing makes these resources findable and accessible (the core of the FAIR principle). Every record is designed to be interlinked, providing a detailed description not only on the resource itself, but also on its relations with other life science infrastructures. Serving a variety of stakeholders, BioSharing cultivates a growing community, to which it offers diverse benefits. It is a resource for funding bodies and journal publishers to navigate the metadata landscape of the biological sciences; an educational resource for librarians and information advisors; a publicising platform for standard and database developers/curators; and a research tool for bench and computer scientists to plan their work. BioSharing is working with an increasing number of journals and other registries, for example linking standards and databases to training material and tools. Driven by an international Advisory Board, the BioSharing user-base has grown by over 40% (by unique IP address), in the last year thanks to successful engagement with researchers, publishers, librarians, developers and other stakeholders via several routes, including a joint RDA/Force11 working group and a collaboration with the International Society for Biocuration. In this article, we describe BioSharing, with a particular focus on community-led curation.Database URL: https://www.biosharing.org.",2016-05-17 +34041327,Data on the mechanisms of antidiarrhoeal activity of methanol leaf extract of Combretum hypopilinum Diels (Combretaceae): Involvement of opioidergic and (α1 and β)-adrenergic pathways.,"This article describes the dataset for the elucidation of the possible mechanisms of antidiarrhoeal actions of methanol leaves extract of Combretum hypopilinum (Diels) Combretaceae in mice. The plant has been used in traditional medicine to treat diarrhoea in Nigeria and other African countries. We introduce the data for the antidiarrhoeal activity of the methanol leaf extract of Combretum hypopilinum at 1,000 mg/kg investigated using charcoal meal test in mice with loperamide (5 mg/kg) as the standard antidiarrhoeal agent. To elucidate the possible mechanisms of its antidiarrhoeal action, naloxone (2 mg/kg), prazosin (1 mg/kg), yohimbine (2 mg/kg), propranolol (1 mg/kg), pilocarpine (1 mg/kg) and isosorbide dinitrate (150 mg/kg) were separately administered to different groups of mice 30 minutes before administration of the extract. Each mouse was dissected using dissecting set, and the small intestine was immediately removed from pylorus to caecum, placed lengthwise on moist filter paper and measured the distance travelled by charcoal relative to the length of the intestine using a calibrated ruler in centimetre. Besides, the peristaltic index and inhibition of charcoal movement of each animal were calculated and recorded. The methods for the data collection is similar to the one used to investigate the possible pathways involved in the antidiarrhoeal action of Combretum hypopilinum in mice in the research article by Ahmad et al. (2020) ""Mechanisms of Antidiarrhoeal Activity of Methanol Leaf Extract of Combretum hypopilinum Diels (Combretaceae): Involvement of Opioidergic and (α1 and β)-Adrenergic Pathways"" (https://doi.org/10.1016/j.jep.2020.113750) [1]. Therefore, this datasets could form a basis for in-depth research to elucidate further the pharmacological properties of the plant Combretum hypopilinum and its bioactive compounds to develop standardized herbal product and novel compound for management of diarrhoea. It could also be instrumental for evaluating the plant's pharmacological potentials using other computational-based and artificial intelligence approaches, including predictive modelling and simulation.",2021-05-17 +34558968,Key Characteristics of Cardiovascular Toxicants.,"

Background

The concept of chemical agents having properties that confer potential hazard called key characteristics (KCs) was first developed to identify carcinogenic hazards. Identification of KCs of cardiovascular (CV) toxicants could facilitate the systematic assessment of CV hazards and understanding of assay and data gaps associated with current approaches.

Objectives

We sought to develop a consensus-based synthesis of scientific evidence on the KCs of chemical and nonchemical agents known to cause CV toxicity along with methods to measure them.

Methods

An expert working group was convened to discuss mechanisms associated with CV toxicity.

Results

The group identified 12 KCs of CV toxicants, defined as exogenous agents that adversely interfere with function of the CV system. The KCs were organized into those primarily affecting cardiac tissue (numbers 1-4 below), the vascular system (5-7), or both (8-12), as follows: 1) impairs regulation of cardiac excitability, 2) impairs cardiac contractility and relaxation, 3) induces cardiomyocyte injury and death, 4) induces proliferation of valve stroma, 5) impacts endothelial and vascular function, 6) alters hemostasis, 7) causes dyslipidemia, 8) impairs mitochondrial function, 9) modifies autonomic nervous system activity, 10) induces oxidative stress, 11) causes inflammation, and 12) alters hormone signaling.

Discussion

These 12 KCs can be used to help identify pharmaceuticals and environmental pollutants as CV toxicants, as well as to better understand the mechanistic underpinnings of their toxicity. For example, evidence exists that fine particulate matter [PM ≤2.5μm in aerodynamic diameter (PM2.5)] air pollution, arsenic, anthracycline drugs, and other exogenous chemicals possess one or more of the described KCs. In conclusion, the KCs could be used to identify potential CV toxicants and to define a set of test methods to evaluate CV toxicity in a more comprehensive and standardized manner than current approaches. https://doi.org/10.1289/EHP9321.",2021-09-24 +34473228,Improved estimation of model quality using predicted inter-residue distance. ,"Protein model quality assessment (QA) is an essential component in protein structure prediction, which aims to estimate the quality of a structure model and/or select the most accurate model out from a pool of structure models, without knowing the native structure. QA remains a challenging task in protein structure prediction. Based on the inter-residue distance predicted by the recent deep learning-based structure prediction algorithm trRosetta, we developed QDistance, a new approach to the estimation of both global and local qualities. QDistance works for both single-model and multi-models inputs. We designed several distance-based features to assess the agreement between the predicted and model-derived inter-residue distances. Together with a few widely used features, they are fed into a simple yet powerful linear regression model to infer the global QA scores. The local QA scores for each structure model are predicted based on a comparative analysis with a set of selected reference models. For multi-models input, the reference models are selected from the input based on the predicted global QA scores. For single-model input, the reference models are predicted by trRosetta. With the informative distance-based features, QDistance can predict the global quality with satisfactory accuracy. Benchmark tests on the CASP13 and the CAMEO structure models suggested that QDistance was competitive other methods. Blind tests in the CASP14 experiments showed that QDistance was robust and ranked among the top predictors. Especially, QDistance was the top 3 local QA method and made the most accurate local QA prediction for unreliable local region. Analysis showed that this superior performance can be attributed to the inclusion of the predicted inter-residue distance. http://yanglab.nankai.edu.cn/QDistance. Supplementary data are available at Bioinformatics online.",2021-09-02 +33557754,Visual4DTracker: a tool to interact with 3D + t image stacks.,"

Background

Biological phenomena usually evolves over time and recent advances in high-throughput microscopy have made possible to collect multiple 3D images over time, generating [Formula: see text] (or 4D) datasets. To extract useful information there is the need to extract spatial and temporal data on the particles that are in the images, but particle tracking and feature extraction need some kind of assistance.

Results

This manuscript introduces our new freely downloadable toolbox, the Visual4DTracker. It is a MATLAB package implementing several useful functionalities to navigate, analyse and proof-read the track of each particle detected in any [Formula: see text] stack. Furthermore, it allows users to proof-read and to evaluate the traces with respect to a given gold standard. The Visual4DTracker toolbox permits the users to visualize and save all the generated results through a user-friendly graphical user interface. This tool has been successfully used in three applicative examples. The first processes synthetic data to show all the software functionalities. The second shows how to process a 4D image stack showing the time-lapse growth of Drosophila cells in an embryo. The third example presents the quantitative analysis of insulin granules in living beta-cells, showing that such particles have two main dynamics that coexist inside the cells.

Conclusions

Visual4DTracker is a software package for MATLAB to visualize, handle and manually track [Formula: see text] stacks of microscopy images containing objects such cells, granules, etc.. With its unique set of functions, it remarkably permits the user to analyze and proof-read 4D data in a friendly 3D fashion. The tool is freely available at https://drive.google.com/drive/folders/19AEn0TqP-2B8Z10kOavEAopTUxsKUV73?usp=sharing.",2021-02-08 +30368470,AAPL Practice Resource for the Forensic Psychiatric Evaluation of Competence to Stand Trial.,"Full Document: Wall BW, Ash P, Keram E, et al: AAPL Practice Resource for the Forensic Psychiatric Evaluation of Competence to Stand Trial Update 2018. Journal of the American Academy of Psychiatry and the Law Online Supplement 2018, 46 (3). Available at: http://www.jaapl.org/content/46/3_Supplement.",2018-09-01 +34554815,Racial Disparities of E-Cigarette Use Among US Youths: 2014‒2019.,"Objectives To evaluate disparities in youth e-cigarette use patterns and flavor use by race/ethnicity over time. Methods We used data from the US 2014-2019 National Youth Tobacco Survey (NYTS) to examine trends in dual use (co-use of e-cigarettes and cigarettes or other tobacco products), occasional (≤ 5 days) versus frequent use (≥ 20 days) in the past 30 days, and flavor use among current (past-30-day) e-cigarette users (n = 13 178) across racial/ethnic groups (non-Hispanic Whites, non-Hispanic Blacks, Hispanics/Latinos, and non-Hispanic others). Results Among current e-cigarette users, dual use and occasional use decreased significantly from 2014 to 2019 across racial and ethnic groups except for non-Hispanic Blacks; frequent use and flavored e-cigarette use increased among non-Hispanic Whites, Hispanics/Latinos, and non-Hispanic others but not among non-Hispanic Blacks. In 2019, non-Hispanic Black e-cigarette users were more likely to report dual use (adjusted odds ratio [AOR] = 2.2; 95% confidence interval [CI] = 1.5, 3.2; P < .001) and occasional use of e-cigarettes (AOR = 3.7; 95% CI = 2.3, 5.9; P < .001) but less likely to report frequent use (AOR = 0.2; 95% CI = 0.1, 0.4; P < .001) and flavored e-cigarette use (AOR = 0.4; 95% CI = 0.3, 0.5; P < .001) than their White peers. Conclusions Youth e-cigarette use patterns differed considerably across racial/ethnic groups, and tailored strategies to address disparities in e-cigarette use are needed. (Am J Public Health. 2021;111(11):2050-2058. https://doi.org/10.2105/AJPH.2021.306448).",2021-09-23 +34232697,"Reporting and Reproducibility of Meta-Analysis in Speech, Language, and Hearing Research.","Purpose The purposes of this meta-review are to (a) articulate the importance of transparency and reproducibility in meta-analysis, (b) assess the transparency and reproducibility of meta-analyses published in journals of the American Speech-Language-Hearing Association (ASHA), and (c) discuss the implications of our findings and recommendations for future research. Method We conducted a meta-review of all meta-analyses published in ASHA journals through December 31, 2020. Our systematic review yielded 47 meta-analyses for inclusion in this review. We coded all eligible reports on the core elements of transparency and reproducibility in meta-analysis. Results Our findings suggest that though reporting tendencies have improved over time, much work is needed to promote transparency and reproducibility in meta-analytic work. Key areas for future accountability include preregistering study protocol, using Preferred Reporting in Systematic Reviews and Meta-Analysis checklists, providing full data sets, and publishing analytic codes. Conclusions The state of reporting in meta-analysis is improving over time. We conclude with a discussion of specific areas that need further attention, and recommendations for researchers to consider when conducting future meta-analyses. Supplemental Material https://doi.org/10.23641/asha.14888481.",2021-07-07 +34316703,MutViz 2.0: visual analysis of somatic mutations and the impact of mutational signatures on selected genomic regions.,"Patterns of somatic single nucleotide variants observed in human cancers vary widely between different tumor types. They depend not only on the activity of diverse mutational processes, such as exposure to ultraviolet light and the deamination of methylated cytosines, but largely also on the sequence content of different genomic regions on which these processes act. With MutViz (http://gmql.eu/mutviz/), we have presented a user-friendly web tool for the identification of mutation enrichments that offers preloaded mutations from public datasets for a variety of cancer types, well organized within an effective database architecture. Somatic mutation patterns can be visually and statistically analyzed within arbitrary sets of small, user-provided genomic regions, such as promoters or collections of transcription factor binding sites. Here, we present MutViz 2.0, a largely extended and consolidated version of the tool: we took into account the immediate (trinucleotide) sequence context of mutations, improved the representation of clinical annotation of tumor samples and devised a method for signature refitting on limited genomic regions to infer the contribution of individual mutational processes to the mutation patterns observed in these regions. We described both the features of MutViz 2.0, concentrating on the novelties, and the substantial re-engineering of the cloud-based architecture.",2021-04-09 +32383136,Role of microRNAs in epidermal growth factor receptor signaling pathway in cervical cancer.,"Cervical cancer is one of the most common disorders in females all around the world. Similar to other types of cancer, several signaling pathways are demonstrated to be involved in the progression of this cancer including ERK/MAPK, PI3K/AKT, apoptotic signaling pathways, Wnt, and epidermal growth factor receptor (EGFR). Various microRNAs (miRNAs) and their target genes involved in cervical cancer have been extracted from the kinds of literature of Scopus, Pubmed and Google scholar databases. Regarding the targets, some of them were found to belong in EGFR signaling pathways. The regulation patterns of these miRNA are different in cervical cancer; however, their main aim is to trigger EGFR signaling to proceed with cancer. Moreover, several predicted miRNAs were found to have some interactions with the differentially expressed genes of cervical cancer which are the members of the EGFR signaling pathway by using miRWalk 3.0 (https://mirwalk.umm.uni-heidelberg.de/) and TargetScan 7.1 (https://www.targetscan.org/vert_71/). Also, the microarray data were obtained from the NCBI-Gene Expression Omnibus (GEO) datasets of cervical cancer. In the present review, we highlight the miRNAs involved in cervical cancer and the role of their targets in the EGFR signaling pathway. Furthermore, some predicted miRNAs were the candidate to target EGFR signaling pathway members differentially expressed in cervical cancer samples compared to normal samples.",2020-05-07 +27924020,LincSNP 2.0: an updated database for linking disease-associated SNPs to human long non-coding RNAs and their TFBSs.,"We describe LincSNP 2.0 (http://bioinfo.hrbmu.edu.cn/LincSNP), an updated database that is used specifically to store and annotate disease-associated single nucleotide polymorphisms (SNPs) in human long non-coding RNAs (lncRNAs) and their transcription factor binding sites (TFBSs). In LincSNP 2.0, we have updated the database with more data and several new features, including (i) expanding disease-associated SNPs in human lncRNAs; (ii) identifying disease-associated SNPs in lncRNA TFBSs; (iii) updating LD-SNPs from the 1000 Genomes Project; and (iv) collecting more experimentally supported SNP-lncRNA-disease associations. Furthermore, we developed three flexible online tools to retrieve and analyze the data. Linc-Mart is a convenient way for users to customize their own data. Linc-Browse is a tool for all data visualization. Linc-Score predicts the associations between lncRNA and disease. In addition, we provided users a newly designed, user-friendly interface to search and download all the data in LincSNP 2.0 and we also provided an interface to submit novel data into the database. LincSNP 2.0 is a continually updated database and will serve as an important resource for investigating the functions and mechanisms of lncRNAs in human diseases.",2016-10-23 +30048518,YARG: A repository for arsenic-related genes in yeast.,"Arsenic is a toxic metalloid. Moderate levels of arsenic exposure from drinking water can cause various human health problems such as skin lesions, circulatory disorders and cancers. Thus, arsenic toxicity is a key focus area for environmental and toxicological investigations. Many arsenic-related genes in yeast have been identified by experimental strategies such as phenotypic screening and transcriptional profiling. These identified arsenic-related genes are valuable information for studying arsenic toxicity. However, the literature about these identified arsenic-related genes is widely dispersed and cannot be easily acquired by researchers. This prompts us to develop YARG (Yeast Arsenic-Related Genes) database, which comprehensively collects 3396 arsenic-related genes in the literature. For each arsenic-related gene, the number and types of experimental evidence (phenotypic screening and/or transcriptional profiling) are provided. Users can use both search and browse modes to query arsenic-related genes in YARG. We used two case studies to show that YARG can return biologically meaningful arsenic-related information for the query gene(s). We believe that YARG is a useful resource for arsenic toxicity research. YARG is available at http://cosbi4.ee.ncku.edu.tw/YARG/.",2018-07-26 +26496946,Colorectal cancer atlas: An integrative resource for genomic and proteomic annotations from colorectal cancer cell lines and tissues.,"In order to advance our understanding of colorectal cancer (CRC) development and progression, biomedical researchers have generated large amounts of OMICS data from CRC patient samples and representative cell lines. However, these data are deposited in various repositories or in supplementary tables. A database which integrates data from heterogeneous resources and enables analysis of the multidimensional data sets, specifically pertaining to CRC is currently lacking. Here, we have developed Colorectal Cancer Atlas (http://www.colonatlas.org), an integrated web-based resource that catalogues the genomic and proteomic annotations identified in CRC tissues and cell lines. The data catalogued to-date include sequence variations as well as quantitative and non-quantitative protein expression data. The database enables the analysis of these data in the context of signaling pathways, protein-protein interactions, Gene Ontology terms, protein domains and post-translational modifications. Currently, Colorectal Cancer Atlas contains data for >13 711 CRC tissues, >165 CRC cell lines, 62 251 protein identifications, >8.3 million MS/MS spectra, >18 410 genes with sequence variations (404 278 entries) and 351 pathways with sequence variants. Overall, Colorectal Cancer Atlas has been designed to serve as a central resource to facilitate research in CRC.",2015-10-22 +27738138,"The Candida Genome Database (CGD): incorporation of Assembly 22, systematic identifiers and visualization of high throughput sequencing data.","The Candida Genome Database (CGD, http://www.candidagenome.org/) is a freely available online resource that provides gene, protein and sequence information for multiple Candida species, along with web-based tools for accessing, analyzing and exploring these data. The mission of CGD is to facilitate and accelerate research into Candida pathogenesis and biology, by curating the scientific literature in real time, and connecting literature-derived annotations to the latest version of the genomic sequence and its annotations. Here, we report the incorporation into CGD of Assembly 22, the first chromosome-level, phased diploid assembly of the C. albicans genome, coupled with improvements that we have made to the assembly using additional available sequence data. We also report the creation of systematic identifiers for C. albicans genes and sequence features using a system similar to that adopted by the yeast community over two decades ago. Finally, we describe the incorporation of JBrowse into CGD, which allows online browsing of mapped high throughput sequencing data, and its implementation for several RNA-Seq data sets, as well as the whole genome sequencing data that was used in the construction of Assembly 22.",2016-10-13 +33901274,TIMEx: tumor-immune microenvironment deconvolution web-portal for bulk transcriptomics using pan-cancer scRNA-seq signatures.,"

Summary

The heterogeneous cell types of the tumor-immune microenvironment (TIME) play key roles in determining cancer progression, metastasis, and response to treatment. We report the development of TIMEx, a novel tumor-immune microenvironment deconvolution method emphasizing on estimating infiltrating immune cells for bulk transcriptomics using pan-cancer single-cell RNA-seq signatures. We also implemented a comprehensive, user-friendly web-portal for users to evaluate TIMEx and other deconvolution methods with bulk transcriptomic profiles.

Availability

TIMEx web portal is freely accessible at http://timex.moffitt.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-26 +33691010,TreeFix-TP: Phylogenetic Error-Correction for Infectious Disease Transmission Network Inference.,"Many existing methods for estimation of infectious disease transmission networks use a phylogeny of the infecting strains as the basis for transmission network inference, and accurate network inference relies on accuracy of this underlying evolutionary history. However, phylogenetic reconstruction can be highly error prone and more sophisticated methods can fail to scale to larger outbreaks, negatively impacting downstream transmission network inference.We introduce a new method, TreeFix-TP, for accurate and scalable reconstruction of transmission phylogenies based on an error-correction framework. Our method uses intra-host strain diversity and host information to balance a parsimonious evaluation of the implied transmission network with statistical hypothesis testing on sequence data likelihood. The reconstructed tree minimizes the number of required disease transmissions while being as well supported by sequence data as the maximum likelihood phylogeny. Using a simulation framework for viral transmission and evolution and real data from ten HCV outbreaks, we demonstrate that error-correction with TreeFix-TP improves phylogenetic accuracy and outbreak source detection. Our results show that using TreeFix-TP can lead to significant improvement in transmission phylogeny inference and that its performance is robust to variations in transmission and evolutionary parameters. TreeFix-TP is freely available open-source from https://compbio.engr.uconn.edu/software/treefix-tp/.",2021-01-01 +31651301,Teaching a difficult topic using a problem-based concept resembling a computer game: development and evaluation of an e-learning application for medical molecular genetics.,"BACKGROUND:Genetic testing rapidly penetrates into all medical specialties and medical students must acquire skills in this area. However, many of them consider it difficult. Furthermore, many find these topics less appealing and not connected to their future specialization in different fields of clinical medicine. Student-centred strategies such as problem-based learning, gamification and the use of real data can increase the appeal of a difficult topic such as genetic testing, a field at the crossroads of genetics, molecular biology and bioinformatics. METHODS:We designed an electronic teaching application which students registered in the undergraduate Medical Biology course can access online. A study was carried out to assess the influence of implementation of the new method. We performed pretest/posttest evaluation and analyzed the results using the sign test with median values. We also collected students' personal comments. RESULTS:The newly developed interactive application simulates the process of molecular genetic diagnostics of a hereditary disorder in a family. Thirteen tasks guide students through clinical and laboratory steps needed to reach the final diagnosis. Genetics and genomics are fields strongly dependent on electronic databases and computer-based data analysis tools. The tasks employ publicly available internet bioinformatic resources used routinely in medical genetics departments worldwide. Authenticity is assured by the use of modified and de-identified clinical and laboratory data from real families analyzed in our previous research projects. Each task contains links to databases and data processing tools needed to solve the task, and an answer box. If the entered answer is correct, the system allows the user to proceed to the next task. The solving of consecutive tasks arranged into a single narrative resembles a computer game, making the concept appealing. There was a statistically significant improvement of knowledge and skills after the practical class, and most comments on the application were positive. A demo version is available at https://medbio.lf2.cuni.cz/demo_m/ . Full version is available on request from the authors. CONCLUSIONS:Our concept proved to be appealing to the students and effective in teaching medical molecular genetics. It can be modified for training in the use of electronic information resources in other medical specialties.",2019-10-24 +31775077,"Mitochondrial DNA variation in Sub-Saharan Africa: Forensic data from a mixed West African sample, Côte d'Ivoire (Ivory Coast), and Rwanda.","This study provides 398 novel complete mitochondrial control region sequences that augment the still underrepresented data from Africa by three datasets: a mixed West African sample set deriving from 12 countries (n = 145) and datasets from Côte d'Ivoire (Ivory Coast) (n = 100) as well as Rwanda (n = 153). The analysis of mtDNA variation and genetic comparisons with published data revealed low random match probabilities in all three datasets and typical West African and East African diversity, respectively. Genetic parameters indicate that the presented mixed West African dataset may serve as first forensic mtDNA control region database for West Africa in general. In addition, a strategy for responsible forensic application of precious mtDNA population samples potentially containing close maternal relatives is outlined. The datasets will be uploaded to the forensic mtDNA database EMPOP (https://empop.online) upon publication.",2019-11-07 +34286635,Docosahexaenoic Acid in the Inhibition of Tumor Cell Growth in Preclinical Models of Ovarian Cancer.,"There is a strong rationale for investigating nutritional interventions with docosahexaenoic acid (DHA) in cancer prevention and therapy; however, the effects of DHA on ovarian cancer (OC) have not been well studied. Here, we investigated if DHA alone and in combination with carboplatin reduces OC cell growth in vitro. In vivo, we used a high-grade serous OC patient-derived xenograft (PDX) mouse model to investigate if DHA affects OC growth and enhances the anticancer actions of carboplatin. We showed synergistic cell killing by DHA and carboplatin in DHA-resistant Kuramochi and SKOV3 OC cells, which corresponded with increased DHA incorporation into whole-cell membrane phospholipids (P < 0.05). In vivo, feeding mice a diet supplemented with 3.9% (w/w of fat) DHA resulted in a significant reduction in PDX growth with and without carboplatin (P < 0.05). This reduction in tumor growth was accompanied by an increased tumor necrotic region (P < 0.05) and improved survival. Plasma membranes in tumors and livers excised from mice fed a DHA diet had ∼ twofold increase in DHA incorporation as compared with mice fed a control diet. Our findings indicate that DHA supplementation reduces cancer cell growth and enhances the efficacy of carboplatin in preclinical models of OC through increased apoptosis and necrosis.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1952453.",2021-07-21 +33256598,Highly diversified core promoters in the human genome and their effects on gene expression and disease predisposition.,"

Background

Core promoter controls transcription initiation. However, little is known for core promoter diversity in the human genome and its relationship with diseases. We hypothesized that as a functional important component in the genome, the core promoter in the human genome could be under evolutionary selection, as reflected by its highly diversification in order to adjust gene expression for better adaptation to the different environment.

Results

Applying the ""Exome-based Variant Detection in Core-promoters"" method, we analyzed human core-promoter diversity by using the 2682 exome data sets of 25 worldwide human populations sequenced by the 1000 Genome Project. Collectively, we identified 31,996 variants in the core promoter region (- 100 to + 100) of 12,509 human genes ( https://dbhcpd.fhs.um.edu.mo ). Analyzing the rich variation data identified highly ethnic-specific patterns of core promoter variation between different ethnic populations, the genes with highly variable core promoters, the motifs affected by the variants, and their involved functional pathways. eQTL test revealed that 12% of core promoter variants can significantly alter gene expression level. Comparison with GWAS data we located 163 variants as the GWAS identified traits associated with multiple diseases, half of these variants can alter gene expression.

Conclusion

Data from our study reals the highly diversified nature of core promoter in the human genome, and highlights that core promoter variation could play important roles not only in gene expression regulation but also in disease predisposition.",2020-11-30 +34255609,Concerns about starting antiretroviral treatment among pregnant women in Lesotho.,"The recommendation to start antiretroviral treatment independently of CD4 or viral load count is adopted as a strategy for reducing HIV/AIDS incidence rates in countries with a high prevalence rate, such as Lesotho. For example, the number of new HIV infections has lowered from 20,000 in 2010 to 11,000 in 2018 [UNAIDS Country Factsheets. (2019). https://www.unaids.org/en/regionscountries/countries/lesotho]. Lesotho introduced the 'test and treat' strategy in 2013 to address the HIV/AIDS pandemic, representing a shift from the provider-initiated HIV testing and counselling guidelines. The purpose of this paper was to understand pregnant women's concerns about starting antiretroviral treatment to limit risks of mother-to-child HIV-transmission during the implementation of 'test and treat' protocol in Lesotho. The study used a qualitative research approach and collected information from Lesotho public antenatal clinics. In-depth interviews were conducted with eighteen pregnant women living with HIV/AIDS and data were analysed manually following the constructivist grounded theory. Findings reveal the sadness experienced at the diagnosis stage, concerns about accessing treatment and maintaining adherence, and concerns about disclosure. It was concluded that these factors stemmed from fears about triggering enacted stigma in the illness experience of pregnant women, which could hamper the implementation of the 'test and treat' protocol in Lesotho.",2021-07-13 +32810235,PCAT: an integrated portal for genomic and preclinical testing data of pediatric cancer patient-derived xenograft models.,"Although cancer is the leading cause of disease-related mortality in children, the relative rarity of pediatric cancers poses a significant challenge for developing novel therapeutics to further improve prognosis. Patient-derived xenograft (PDX) models, which are usually developed from high-risk tumors, are a useful platform to study molecular driver events, identify biomarkers and prioritize therapeutic agents. Here, we develop PDX for Childhood Cancer Therapeutics (PCAT), a new integrated portal for pediatric cancer PDX models. Distinct from previously reported PDX portals, PCAT is focused on pediatric cancer models and provides intuitive interfaces for querying and data mining. The current release comprises 324 models and their associated clinical and genomic data, including gene expression, mutation and copy number alteration. Importantly, PCAT curates preclinical testing results for 68 models and 79 therapeutic agents manually collected from individual agent testing studies published since 2008. To facilitate comparisons of patterns between patient tumors and PDX models, PCAT curates clinical and molecular data of patient tumors from the TARGET project. In addition, PCAT provides access to gene fusions identified in nearly 1000 TARGET samples. PCAT was built using R-shiny and MySQL. The portal can be accessed at http://pcat.zhenglab.info or http://www.pedtranscriptome.org.",2021-01-01 +31329252,sAOP: linking chemical stressors to adverse outcomes pathway networks.,"

Motivation

Adverse outcome pathway (AOP) is a toxicological concept proposed to provide a mechanistic representation of biological perturbation over different layers of biological organization. Although AOPs are by definition chemical-agnostic, many chemical stressors can putatively interfere with one or several AOPs and such information would be relevant for regulatory decision-making.

Results

With the recent development of AOPs networks aiming to facilitate the identification of interactions among AOPs, we developed a stressor-AOP network (sAOP). Using the 'cytotoxitiy burst' (CTB) approach, we mapped bioactive compounds from the ToxCast data to a list of AOPs reported in AOP-Wiki database. With this analysis, a variety of relevant connections between chemicals and AOP components can be identified suggesting multiple effects not observed in the simplified 'one-biological perturbation to one-adverse outcome' model. The results may assist in the prioritization of chemicals to assess risk-based evaluations in the context of human health.

Availability and implementation

sAOP is available at http://saop.cpr.ku.dk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +33997188,SkullBreak / SkullFix - Dataset for automatic cranial implant design and a benchmark for volumetric shape learning tasks.,"The article introduces two complementary datasets intended for the development of data-driven solutions for cranial implant design, which remains to be a time-consuming and laborious task in current clinical routine of cranioplasty. The two datasets, referred to as the SkullBreak and SkullFix in this article, are both adapted from a public head CT collection CQ500 (http://headctstudy.qure.ai/dataset) with CC BY-NC-SA 4.0 license. The SkullBreak contains 114 and 20 complete skulls, each accompanied by five defective skulls and the corresponding cranial implants, for training and evaluation respectively. The SkullFix contains 100 triplets (complete skull, defective skull and the implant) for training and 110 triplets for evaluation. The SkullFix dataset was first used in the MICCAI 2020 AutoImplant Challenge (https://autoimplant.grand-challenge.org/) and the ground truth, i.e., the complete skulls and the implants in the evaluation set are held private by the organizers. The two datasets are not overlapping and differ regarding data selection and synthetic defect creation and each serves as a complement to the other. Besides cranial implant design, the datasets can be used for the evaluation of volumetric shape learning algorithms, such as volumetric shape completion. This article gives a description of the two datasets in detail.",2021-02-24 +32183752,Bacterial otitis media in sub-Saharan Africa: a systematic review and meta-analysis.,"BACKGROUND:Otitis media is inflammation of the middle ear, comprising a spectrum of diseases. It is the commonest episode of infection in children, which often occurs after an acute upper respiratory tract infection. Otitis media is ranked as the second most important cause of hearing loss and the fifth global burden of disease with a higher incidence in developing worlds like Sub-Saharan Africa and South Asia. Therefore, this systematic review is aimed to quantitatively estimate the current status of bacterial otitis media, bacterial etiology and their susceptibility profile in sub-Saharan Africa. METHODS:A literature search was conducted from major databases and indexing services including EMBASE (Ovid interface), PubMed/MEDLINE, Google Scholar, ScienceDirect, Cochrane Library, WHO African Index-Medicus and others. All studies (published and unpublished) addressing the prevalence of otitis media and clinical isolates conducted in sub-Saharan Africa were included. Format prepared in Microsoft Excel was used to extract the data and data was exported to Stata version 15 software for the analyses. Der-Simonian-Laird random-effects model at a 95% confidence level was used for pooled estimation of outcomes. The degree of heterogeneity was presented with I2 statistics. Publication bias was presented with funnel plots of standard error supplemented by Begg's and Egger's tests. The study protocol is registered on PROSPERO with reference number ID: CRD42018102485 and the published methodology is available from http://www.crd.york.ac.uk/CRD42018102485. RESULTS:A total of 33 studies with 6034 patients were included in this study. All studies have collected ear swab/discharge samples for bacterial isolation. The pooled isolation rate of bacterial agents from the CSOM subgroup was 98%, patients with otitis media subgroup 87% and pediatric otitis media 86%. A univariate meta-regression analysis indicated the type of otitis media was a possible source of heterogeneity (p-value = 0.001). The commonest isolates were P. aeruginosa (23-25%), S. aureus (18-27%), Proteus species (11-19%) and Klebsiella species. High level of resistance was observed against Ampicillin, Amoxicillin-clavulanate, Cotrimoxazole, Amoxicillin, and Cefuroxime. CONCLUSION:The analysis revealed that bacterial pathogens like P. aeruginosa and S. aureus are majorly responsible for otitis media in sub-Saharan Africa. The isolates have a high level of resistance to commonly used drugs for the management of otitis media.",2020-03-17 +34055638,Integrated Analysis of ceRNA Network Reveals Prognostic and Metastasis Associated Biomarkers in Breast Cancer.,"

Background

Breast cancer is a malignancy and lethal tumor in women. Metastasis of breast cancer is one of the causes of poor prognosis. Increasing evidences have suggested that the competing endogenous RNAs (ceRNAs) were associated with the metastasis of breast cancer. Nonetheless, potential roles of ceRNAs in regulating the metastasis of breast cancer remain unclear.

Methods

The RNA expression (3 levels) and follow-up data of breast cancer and noncancerous tissue samples were downloaded from the Cancer Genome Atlas (TCGA). Differentially expressed and metastasis associated RNAs were identified for functional analysis and constructing the metastasis associated ceRNA network by comprehensively bioinformatic analysis. The Kaplan-Meier (K-M) survival curve was utilized to screen the prognostic RNAs in metastasis associated ceRNA network. Moreover, we further identified the metastasis associated biomarkers with operating characteristic (ROC) curve. Ultimately, the data of Cancer Cell Line Encyclopedia (CCLE, https://portals.broadinstitute.org/ccle) website were selected to obtained the reliable metastasis associated biomarkers.

Results

1005 mRNAs, 22 miRNAs and 164 lncRNAs were screened as differentially expressed and metastasis associated RNAs. The results of GO function and KEGG pathway enrichment analysis showed that these RNAs are mainly associated with the metabolic processes and stress responses. Next, a metastasis associated ceRNA (including 104 mRNAs, 19 miRNAs, and 16 lncRNAs) network was established, and 12 RNAs were found to be related to the overall survival (OS) of patients. In addition, 3 RNAs (hsa-miR-105-5p, BCAR1, and PANX2) were identified to serve as reliable metastasis associated biomarkers. Eventually, the results of mechanism analysis suggested that BCAR1 might promote the metastasis of breast cancer by facilitating Rap 1 signaling pathway.

Conclusion

In the present research, we identified 3 RNAs (hsa-miR-105-5p, BCAR1 and PANX2) might associated with prognosis and metastasis of breast cancer, which might be provide a new perspective for metastasis of breast cancer and contributed to the treatment of breast cancer.",2021-05-13 +30481262,PhastWeb: a web interface for evolutionary conservation scoring of multiple sequence alignments using phastCons and phyloP.,"

Summary

The Phylogenetic Analysis with Space/Time models (PHAST) package is a widely used software package for comparative genomics that has been freely available for download since 2002. Here, we introduce a web interface (phastWeb) that makes it possible to use two of the most popular programs in PHAST, phastCons and phyloP, without downloading and installing the PHAST software. This interface allows users to upload a sequence alignment and either upload a corresponding phylogeny or have one estimated from the alignment. After processing, users can visualize alignments and conservation scores as genome browser tracks and download estimated tree models and raw scores for further analysis. Altogether, this resource makes key features of the PHAST package conveniently available to a broad audience.

Availability and implementation

PhastWeb is freely available on the web at http://compgen.cshl.edu/phastweb/. The website provides instructions as well as examples.",2019-07-01 +34282687,Glucosinolate-Enriched Fractions from Maca (Lepidium meyenii) Exert Myrosinase-Dependent Cytotoxic Effects against HepG2/C3A and HT29 Tumor Cell Lines.,"The consumption of glucosinolate (GL)-rich foods, including Brassica vegetables, such as mustard, broccoli, and maca, is associated with decreased risk of developing cancer. The GL content in maca, which is recognized as a ""superfood"", is approximately 100-times higher than that in other brassicas. Although maca is a potential dietary source of GLs, limited studies have examined the bioactivity of maca GLs using the combination of chemical characterization and bioassays. In this study, the fractions (Lm-II and Lm-III) rich in intact GLs (glucotropaeolin and glucolimnanthin) were isolated and characterized from maca ethanolic extracts using chromatography and mass spectrometry. Additionally, the growth-inhibitory effects of Lm-II and Lm-II fractions against hepatocellular carcinoma (HepG2/C3A) and colon adenocarcinoma (HT29) cell lines were examined in the absence or presence of myrosinase (MYR). Fractions lacking low molecular weight sugars dose-dependently exerted cytotoxic effects in the presence of MYR. The half-maximal inhibitory concentration values of Lm-II and Lm-III against HepG2/C3A were 118.8 and 69.9 µg/mL, respectively, while those against HT29 were 102.6 and 71.5 µg/mL, respectively. These results suggest that the anticancer properties of maca can be attributed to GLs and corroborate the categorization of maca as a ""superfood.""Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1952444.",2021-07-20 +34441149,A Hybrid Analysis-Based Approach to Android Malware Family Classification. ,"With the popularity of Android, malware detection and family classification have also become a research focus. Many excellent methods have been proposed by previous authors, but static and dynamic analyses inevitably require complex processes. A hybrid analysis method for detecting Android malware and classifying malware families is presented in this paper, and is partially optimized for multiple-feature data. For static analysis, we use permissions and intent as static features and use three feature selection methods to form a subset of three candidate features. Compared with various models, including k-nearest neighbors and random forest, random forest is the best, with a detection rate of 95.04%, while the chi-square test is the best feature selection method. After using feature selection to explore the critical static features contained in this dataset, we analyzed a subset of important features to gain more insight into the malware. In a dynamic analysis based on network traffic, unlike those that focus on a one-way flow of traffic and work on HTTP protocols and transport layer protocols, we focused on sessions and retained protocol layers. The Res7LSTM model is then used to further classify the malicious and partially benign samples detected in the static detection. The experimental results show that our approach can not only work with fewer static features and guarantee sufficient accuracy, but also improve the detection rate of Android malware family classification from 71.48% in previous work to 99% when cutting the traffic in terms of the sessions and protocols of all layers.",2021-08-03 +34254220,Predictors of the Quality of Life of Informal Carers of Adults on the Autism Spectrum.,"Carers of adults on the autism spectrum often experience high levels of stress, worry, and caregiver burden. There are few studies identifying the predictors of carer mental well-being and none have been conducted in Australia. Data from the Autism Cooperative Research Centre for Living with Autism's Australian Longitudinal Study of Autism in Adulthood was used to test the conceptual model by Sonido et al. (Rev J Autism Dev Disord, 2019, https://doi.org/10.1007/s40489-019-00177-8 ) by (a) identifying the predictors of mental well-being for carers of adults on the spectrum, (b) using model selection to determine which predictors contribute to the model of best fit, and (c) testing for mediating relationships between the predictors. Several predictors were directly associated with carer psychological quality of life, including carer age, care recipient intellectual disability, and carer intolerance of uncertainty. Model selection strongly supported the inclusion of most clusters from the conceptual model. Some mediating relationships were found, such as care recipient depressive behaviours mediating the relationships between caregiver burden and psychological quality of life. Future studies of the conceptual model will improve understanding of the predictors of carer mental well-being and enable tailored interventions to improve the psychological health of carers of adults on the autism spectrum.",2021-07-12 +33760053,STREME: Accurate and versatile sequence motif discovery. ,"Sequence motif discovery algorithms can identify novel sequence patterns that perform biological functions in DNA, RNA and protein sequences-for example, the binding site motifs of DNA-and RNA-binding proteins. The STREME algorithm presented here advances the state-of-the-art in ab initio motif discovery in terms of both accuracy and versatility. Using in vivo DNA (ChIP-seq) and RNA (CLIP-seq) data, and validating motifs with reference motifs derived from in vitro data, we show that STREME is more accurate, sensitive and thorough than several widely used algorithms (DREME, HOMER, MEME, Peak-motifs) and two other representative algorithms (ProSampler and Weeder). STREME's capabilities include the ability to find motifs in datasets with hundreds of thousands of sequences, to find both short and long motifs (from 3 to 30 positions), to perform differential motif discovery in pairs of sequence datasets, and to find motifs in sequences over virtually any alphabet (DNA, RNA, protein and user-defined alphabets). Unlike most motif discovery algorithms, STREME reports a useful estimate of the statistical significance of each motif it discovers. STREME is easy to use individually via its web server or via the command line, and is completely integrated with the widely-used MEME Suite of sequence analysis tools. The name STREME stands for ""Simple, Thorough, Rapid, Enriched Motif Elicitation"". The STREME web server and source code are provided freely for non-commercial use at http://meme-suite.org.",2021-03-24 +31888639,Genome analysis and knowledge-driven variant interpretation with TGex.,"BACKGROUND:The clinical genetics revolution ushers in great opportunities, accompanied by significant challenges. The fundamental mission in clinical genetics is to analyze genomes, and to identify the most relevant genetic variations underlying a patient's phenotypes and symptoms. The adoption of Whole Genome Sequencing requires novel capacities for interpretation of non-coding variants. RESULTS:We present TGex, the Translational Genomics expert, a novel genome variation analysis and interpretation platform, with remarkable exome analysis capacities and a pioneering approach of non-coding variants interpretation. TGex's main strength is combining state-of-the-art variant filtering with knowledge-driven analysis made possible by VarElect, our highly effective gene-phenotype interpretation tool. VarElect leverages the widely used GeneCards knowledgebase, which integrates information from > 150 automatically-mined data sources. Access to such a comprehensive data compendium also facilitates TGex's broad variant annotation, supporting evidence exploration, and decision making. TGex has an interactive, user-friendly, and easy adaptive interface, ACMG compliance, and an automated reporting system. Beyond comprehensive whole exome sequence capabilities, TGex encompasses innovative non-coding variants interpretation, towards the goal of maximal exploitation of whole genome sequence analyses in the clinical genetics practice. This is enabled by GeneCards' recently developed GeneHancer, a novel integrative and fully annotated database of human enhancers and promoters. Examining use-cases from a variety of TGex users world-wide, we demonstrate its high diagnostic yields (42% for single exome and 50% for trios in 1500 rare genetic disease cases) and critical actionable genetic findings. The platform's support for integration with EHR and LIMS through dedicated APIs facilitates automated retrieval of patient data for TGex's customizable reporting engine, establishing a rapid and cost-effective workflow for an entire range of clinical genetic testing, including rare disorders, cancer predisposition, tumor biopsies and health screening. CONCLUSIONS:TGex is an innovative tool for the annotation, analysis and prioritization of coding and non-coding genomic variants. It provides access to an extensive knowledgebase of genomic annotations, with intuitive and flexible configuration options, allows quick adaptation, and addresses various workflow requirements. It thus simplifies and accelerates variant interpretation in clinical genetics workflows, with remarkable diagnostic yield, as exemplified in the described use cases. TGex is available at http://tgex.genecards.org/.",2019-12-30 +34042953,Using Interpretable Deep Learning to Model Cancer Dependencies. ,"Cancer dependencies provide potential drug targets. Unfortunately, dependencies differ among cancers and even individuals. To this end, visible neural networks (VNNs) are promising due to robust performance and the interpretability required for the biomedical field. We design Biological VNN (BioVNN) using pathway knowledge to predict cancer dependencies. Despite having fewer parameters, BioVNN marginally outperforms traditional neural networks and converges faster. BioVNN also outperforms a neural network based on randomized pathways. More importantly, dependency predictions can be explained by correlating with the neuron output states of relevant pathways, which suggest dependency mechanisms. In feature importance analysis, BioVNN recapitulates known reaction partners and proposes new ones. Such robust and interpretable VNNs may facilitate the understanding of cancer dependency and the development of targeted therapies. Code and data are available at http://static.lichtargelab.org/BioVNN/. See Bioinformatics online.",2021-05-27 +26582924,enviPath--The environmental contaminant biotransformation pathway resource.,"The University of Minnesota Biocatalysis/Biodegradation Database and Pathway Prediction System (UM-BBD/PPS) has been a unique resource covering microbial biotransformation pathways of primarily xenobiotic chemicals for over 15 years. This paper introduces the successor system, enviPath (The Environmental Contaminant Biotransformation Pathway Resource), which is a complete redesign and reimplementation of UM-BBD/PPS. enviPath uses the database from the UM-BBD/PPS as a basis, extends the use of this database, and allows users to include their own data to support multiple use cases. Relative reasoning is supported for the refinement of predictions and to allow its extensions in terms of previously published, but not implemented machine learning models. User access is simplified by providing a REST API that simplifies the inclusion of enviPath into existing workflows. An RDF database is used to enable simple integration with other databases. enviPath is publicly available at https://envipath.org with free and open access to its core data.",2015-11-17 +32096819,Phage protein receptors have multiple interaction partners and high expressions.,"MOTIVATION:Receptors on host cells play a critical role in viral infection. How phages select receptors is still unknown. RESULTS:Here, we manually curated a high-quality database named phageReceptor, including 427 pairs of phage-host receptor interactions, 341 unique viral species or sub-species and 69 bacterial species. Sugars and proteins were most widely used by phages as receptors. The receptor usage of phages in Gram-positive bacteria was different from that in Gram-negative bacteria. Most protein receptors were located on the outer membrane. The phage protein receptors (PPRs) were highly diverse in their structures, and had little sequence identity and no common protein domain with mammalian virus receptors. Further functional characterization of PPRs in Escherichia coli showed that they had larger node degrees and betweennesses in the protein-protein interaction network, and higher expression levels, than other outer membrane proteins, plasma membrane proteins or other intracellular proteins. These findings were consistent with what observed for mammalian virus receptors reported in previous studies, suggesting that viral protein receptors tend to have multiple interaction partners and high expressions. The study deepens our understanding of virus-host interactions. AVAILABILITY AND IMPLEMENTATION:phageReceptor is publicly available from: http://www.computationalbiology.cn/phageReceptor/index.html. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +31432762,tRic: a user-friendly data portal to explore the expression landscape of tRNAs in human cancers.,"Transfer RNAs (tRNAs) play critical roles in human cancer. Currently, no database provides the expression landscape and clinical relevance of tRNAs across a variety of human cancers. Utilizing miRNA-seq data from The Cancer Genome Atlas, we quantified the relative expression of tRNA genes and merged them into the codon level and amino level across 31 cancer types. The expression of tRNAs is associated with clinical features of patient smoking history and overall survival, and disease stage, subtype, and grade. We further analysed codon frequency and amino acid frequency for each protein coding gene and linked alterations of tRNA expression with protein translational efficiency. We include these data resources in a user-friendly data portal, tRic (tRNA in cancer, https://hanlab.uth.edu/tRic/ or http://bioinfo.life.hust.edu.cn/tRic/), which can be of significant interest to the research community.",2019-08-25 +34453674,Cloning of Mn-SOD gene and its mRNA expression difference and antioxidant enzyme activities under hypoxia stress of cobia Rachycentron canadum.,"

Background

Environmental hypoxia affects the survival and development of organisms. It is also an important environmental factor that leads to oxidative damage. Hypoxia is a condition in which tissues are deprived of oxygen; reoxygenation is the phenomenon in which hypoxic tissues are exposed to oxygen. Hypoxia-reoxygenation is vital in pathogenesis, where the production of reactive oxygen species and antioxidant disparity significantly contribute to disease progression, and it is one of the most common physiological stressors in the aquaculture industry.

Methods and results

In this study, the full length of complementary DNA (cDNA) of the manganese superoxide dismutase (Mn-SOD) gene of healthy cobia Rachycentron canadum was analysed using rapid amplification of cDNA ends. The real-time quantitative Polymerase Chain Reaction was used to measure the expression levels of Mn-SOD mRNAs in various tissues (heart, muscle, brain, liver, kidney, gill, intestine, and spleen). The 2-ΔΔCT method was used to performed the expression analysis. The experimental data were analysed using SPSS ver. 19.0 ( https://spss.software.informer.com/19.0/ ). P < 0.05 and P < 0.01 were set as significant differences. The values were articulated as mean ± standard deviation. The Mn-SOD gene cDNA sequence was 1209 bp long, including a 684 bp open reading frame, 42 bp 5'UTR and 483 bp 3'UTR, encoding 227 amino acids. Under hypoxia-reoxygen stress, the expression of Mn-SOD in brain tissue was significantly lower than in the control group after 8 h of reoxygenation and higher than the control group after 24 h. Hypoxia and subsequent reoxygenation triggered a disturbance in antioxidant homeostasis, displayed in the modification of GPx expression/activity in the liver: GPx was improved.

Conclusions

These results provide valuable information on the role of Mn-SOD regulation in oxidative stress caused by hypoxia.",2021-08-28 +34022612,A stochastic approach for modelling the effects of temperature on the growth rate of Bacillus cereus sensu lato.,"A stochastic model that predicts the maximum specific growth rate (μmax) of Bacillus cereus sensu lato as a function of temperature was developed. The model integrates the intra-species variability by incorporating distributions of cardinal parameters (Tmin, Topt, Tmax) in the model. Growth rate data were generated for 22 strains, covering 5 major phylogenetic groups of B. cereus, and their cardinal temperatures identified. Published growth rate data were also incorporated in the model fitting, resulting in a set of 33 strains. Based on their cardinal temperatures, we identified clusters of Bacillus cereus strains that show similar response to temperature and these clusters were considered separately in the stochastic model. Interestingly, the μopt values for psychrotrophic strains were found to be significantly lower than those obtained for mesophilic strains. The model developed within this work takes into account some correlations existing between parameters (μopt, Tmin, Topt, Tmax). In particular, the relationship highlighted between the b-slope of the Ratkowsky model and Tmin (doi: https://doi.org/10.3389/fmicb.2017.01890) was adapted to the case of the popular Cardinal Temperature Model. This resulted in a reduced model in which μopt is replaced by a function of Tmin, Topt and 2 strain-independent parameters. A correlation between the Tmin parameter and the experimental minimal growth temperature was also highlighted and integrated in the model for improved predictions near the temperature growth limits. Compared to the classical approach, the model developed in this study leads to improved predictions for temperatures around Tmin and more realistic tails for the predicted distributions of μmax. It can be useful for describing the variability of the Bacillus cereus Group in Quantitative Microbial Risk Assessment (QMRA). An example of application of the stochastic model to Reconstituted Infant Formulae (RIF) was proposed.",2021-05-11 +34582261,The Complex Epidemiological Relationship between Flooding Events and Human Outbreaks of Mosquito-Borne Diseases: A Scoping Review.,"

Background

Climate change is expected to increase the frequency of flooding events. Although rainfall is highly correlated with mosquito-borne diseases (MBD) in humans, less research focuses on understanding the impact of flooding events on disease incidence. This lack of research presents a significant gap in climate change-driven disease forecasting.

Objectives

We conducted a scoping review to assess the strength of evidence regarding the potential relationship between flooding and MBD and to determine knowledge gaps.

Methods

PubMed, Embase, and Web of Science were searched through 31 December 2020 and supplemented with review of citations in relevant publications. Studies on rainfall were included only if the operationalization allowed for distinction of unusually heavy rainfall events. Data were abstracted by disease (dengue, malaria, or other) and stratified by post-event timing of disease assessment. Studies that conducted statistical testing were summarized in detail.

Results

From 3,008 initial results, we included 131 relevant studies (dengue n=45, malaria n=61, other MBD n=49). Dengue studies indicated short-term (<1 month) decreases and subsequent (1-4 month) increases in incidence. Malaria studies indicated post-event incidence increases, but the results were mixed, and the temporal pattern was less clear. Statistical evidence was limited for other MBD, though findings suggest that human outbreaks of Murray Valley encephalitis, Ross River virus, Barmah Forest virus, Rift Valley fever, and Japanese encephalitis may follow flooding.

Discussion

Flooding is generally associated with increased incidence of MBD, potentially following a brief decrease in incidence for some diseases. Methodological inconsistencies significantly limit direct comparison and generalizability of study results. Regions with established MBD and weather surveillance should be leveraged to conduct multisite research to a) standardize the quantification of relevant flooding, b) study nonlinear relationships between rainfall and disease, c) report outcomes at multiple lag periods, and d) investigate interacting factors that modify the likelihood and severity of outbreaks across different settings. https://doi.org/10.1289/EHP8887.",2021-09-28 +33975493,"Sociometric Risk Network Structure, HIV Prevalence, and Drug Injection-Related Norms among People Who Inject Drugs (PWID) in Athens, Greece.","Background: Structural properties of sociometric networks have been associated with behaviors related to HIV transmission. Very few studies, however, have explored the correlation between sociometric network factors and drug injection-related norms. Methods: This exploratory work: (i) describes basic structural qualities of a sociometric risk network of participants in the Transmission Reduction Intervention Project (TRIP) in Athens, Greece, in the context of a large HIV outbreak among people who inject drugs (PWID); (ii) measures HIV prevalence within specific structures within the sociometric risk network of PWID in TRIP; and (iii) explores the association of structural properties of the sociometric risk network in TRIP with drug injection-related norms. Results: The sociometric risk network in TRIP consisted of a large component (n=241, 67.8%), a few small components (n=36, 10.1%) with 2-10 individuals each, and some isolates (n=79, 22.2%). HIV prevalence was significantly higher in the large component (55.6%), the 2-core (59.1%) and 3-core (66.3%) of the large component, and the 3-cliques of the cores. Drug injection-related norms were significantly associated with structural characteristics of the sociometric risk network. A safe behavioral pattern (use of unclean cooker/filter/rinse water was never encouraged) was significantly (p=0.03) less normative among people who TRIP participants of the 2-core injected with (40.5%) than among network contacts of TRIP participants outside the 2-core (55.6%). On the contrary, at drug-using venues, 2-core members reported that safer behaviors were normative compared to what was reported by those without 2-core membership. Conclusions: Sociometric network data can give useful insights into HIV transmission dynamics and inform prevention strategies.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1914103 .",2021-05-11 +34659645,Commentary: Does Twitter have a role in improving Family Planning messages and services in Low-and-Middle-Income Countries (LMICs)?,"Stakeholders are coming together to develop a vision for increasing access to family planning (FP) by 2030. Of the 923 million women in the developing world who wish to avoid a pregnancy, 218 million women are not using a modern contraceptive (Guttmacher Institute, 2020). In 2016, over 3.4 billion people were using the internet (https://ourworldindata.org/internet 2016). Moreover, internet users in the developing world use social media more frequently than Internet users in the U.S. and Europe. Of the many proposed actions to accelerate progress in family planning, the use of Twitter should be a key component. In this commentary, we describe the use of Twitter in a select group of low-and-middle-income countries that have made commitments to the family planning 2020 initiative (FP2020 countries and have the potential to leverage Twitter with current and potential family planning users. We examine Twitter feeds in eight key FP2020 countries, and we look at the content of Tweets issued by the ministries of health in most of these same countries. Our view is that it is feasible and easy to access Twitter feeds in low-and -middle income countries. We base our view on the types of reproductive health and family planning terms discussed in a public forum such as Twitter by current and potential users and their partners and ministries of health. We highlight two broad considerations that merit discussion among interested stakeholders, including policy makers, program designers, and health advocates. The first relates to the use of Twitter within family planning programs, and the second relates to themes that require more significant research. Data coupled with analytical capacity will help policy makers and program designers to effectively leverage Twitter for expanding the reach of family planning services and influencing social media policy. Our aim is to not only to contribute to the body of knowledge but also to spur greater engagement by program personnel, researchers, health advocates and contraceptive users.",2021-09-08 +33814007,Parents' experiences of caring for a young child with type 1 diabetes: a systematic review and synthesis of qualitative evidence.,"

Aims

To synthesise the qualitative evidence on parents' experiences of caring for a child aged ≤8 years with type 1 diabetes to identify: the challenges they encounter; their views about support received; ways in which support could be improved; and, directions for future research.

Methods

We searched Medline, EMBASE, CINAHL, PsycINFO and Web of Science databases to identify qualitative studies reporting parents' views and experiences of caring for a child with type 1 diabetes aged ≤8 years. Key analytical themes were identified using thematic synthesis.

Results

Fourteen studies were included. The synthesis resulted in the generation of two overarching themes. Monopolisation of life describes the all-encompassing impact diabetes could have on parents due to the constant worry they experienced and the perceived need for vigilance. It describes how parents' caring responsibilities could affect their wellbeing, relationships and finances, and how a lack of trusted sources of childcare and a desire to enable a 'normal' childhood constrained personal choices and activities. However, use of diabetes technologies could lessen some of these burdens. Experiences of professional and informal support describes how encounters with healthcare professionals, while generally perceived as helpful, could lead to frustration and anxiety, and how connecting with other parents caring for a child with type 1 diabetes provided valued emotional and practical support.

Conclusions

This synthesis outlines the challenges parents encounter, their views about support received and ways in which support might be improved. It also highlights significant limitations in the current literature and points to important areas for future research, including how sociodemographic factors and use of newer diabetes technologies influence parents' diabetes management practices and experiences. PROSPERO: https://www.crd.york.ac.uk/prospero/display_record.php?ID=CRD42019128710.",2021-04-04 +31749633,OSacc: Gene Expression-Based Survival Analysis Web Tool For Adrenocortical Carcinoma.,"Gene expression profiling data with long-term clinical follow-up information are great resources to screen, develop, evaluate and validate prognostic biomarkers in translational cancer research. However, an easy-to-use interactive online tool is needed to analyze these profiling and clinical data. In the current work, we developed OSacc (Online consensus Survival analysis of ACC), a web tool that provides rapid and user-friendly survival analysis based on seven independent transcriptomic profiles with long-term clinical follow-up information of 259 ACC patients gathered from The Cancer Genome Atlas (TCGA) and Gene Expression Omnibus (GEO) databases. OSacc allows researchers and clinicians to evaluate the prognostic value of genes of interest by Kaplan-Meier (KM) survival plot with hazard ratio (HR) and log-rank test in ACC. OSacc is freely available at http://bioinfo.henu.edu.cn/ACC/ACCList.jsp.",2019-10-24 +34017677,A property-response perspective on modern toxicity assessment and drug toxicity index (DTI).,"Toxicity related failures in drug discovery and clinical development have motivated scientists and regulators to develop a wide range of in-vitro, in-silico tools coupled with data science methods. Older drug discovery rules are being constantly modified to churn out any hidden predictive value. Nonetheless, the dose-response concepts remain central to all these methods. Over the last 2 decades medicinal chemists, and pharmacologists have observed that different physicochemical, and pharmacological properties capture trends in toxic responses. We propose that these observations should be viewed in a comprehensive property-response framework where dose is only a factor that modifies the inherent toxicity potential. We then introduce the recently proposed ""Drug Toxicity Index (DTI)"" and briefly summarize its applications. A webserver is available to calculate DTI values (https://all-tool-kit.github.io/Web-Tool.html).",2021-05-15 +33889684,Dataset for transient 3D simulations of turbulent premixed flames of Gas-to-Liquid (GTL) fuel.,"A fan-stirred combustion vessel is used to study the premixed turbulent combustion of diesel, Gas to Liquids (GTL) and 50/50 diesel-GTL and to generate these datasets. A numerical simulation approach is implemented for modelling the premixed combustion of the three fuels under different thermodynamics and turbulence initial conditions, using Zimont Turbulent Flame Speed Closure (Zimont TFC) model. Different parameters are obtained from these simulation runs such as turbulent eddy viscosity (µ), turbulent kinetic energy (k), Damkohler number (Da), Reynolds number (ReT) and turbulent flame speed (St). The raw, filtered and pre-processed data are imported from ANSYS Fluent and then listed on filtered tables for the ease of accessibility. These datasets can be then used to perform research in different related areas such as chemical kinetic mechanisms, ignition delay time, flame ignition mechanisms and flame extinction and diffusion. Also, they can be employed to further understand trends, patterns, and anomalies in data. In addition, they can be compared with other numerical models to establish a robust knowledge about the modelling of premixed turbulent combustion. For more information and discussion of the dataset creation, the reader is directed to the full-length article, ""Abdellatif M. Sadeq, Samer F. Ahmed, Ahmad K. Sleiti, Transient 3D simulations of turbulent premixed flames of gas-to-liquid (GTL) fuel in a fan-stirred combustion vessel, Fuel, Volume 291, 2021, 120,184, ISSN 0016 2361, https://doi.org/10.1016/j.fuel.2021.120184"" [1].",2021-03-23 +28654729,SIGNOR: A Database of Causal Relationships Between Biological Entities-A Short Guide to Searching and Browsing.,"SIGNOR (http://signor.uniroma2.it), the SIGnaling Network Open Resource, is a database designed to store experimentally validated causal interactions, i.e., interactions where a source entity has a regulatory effect (up-regulation, down-regulation, etc.) on a second target entity. SIGNOR acts both as a source of signaling information and a support for data analysis, modeling, and prediction. A user-friendly interface features the ability to search entries for any given protein or group of proteins and to display their interactions graphically in a network view. At the time of writing, SIGNOR stores approximately 16,000 manually curated interactions connecting more than 4,000 biological entities (proteins, chemicals, protein complexes, etc.) that play a role in signal transduction. SIGNOR also offers a collection of 37 signaling pathways. SIGNOR can be queried by three search tools: ""single-entity"" search, ""multiple-entity"" search, and ""pathway"" search. This manuscript describes two basic protocols detailing how to navigate and search the SIGNOR database and how to download the annotated dataset for local use. Finally, the support protocol reviews the utilities of the graphic visualizer. © 2017 by John Wiley & Sons, Inc.",2017-06-27 +33693483,MixTwice: large-scale hypothesis testing for peptide arrays by variance mixing. ,"Peptide microarrays have emerged as a powerful technology in immunoproteomics as they provide a tool to measure the abundance of different antibodies in patient serum samples. The high dimensionality and small sample size of many experiments challenge conventional statistical approaches, including those aiming to control the false discovery rate (FDR). Motivated by limitations in reproducibility and power of current methods, we advance an empirical Bayesian tool that computes local false discovery rate statistics and local false sign rate statistics when provided with data on estimated effects and estimated standard errors from all the measured peptides. As the name suggests, the MixTwice tool involves the estimation of two mixing distributions, one on underlying effects and one on underlying variance parameters. Constrained optimization techniques provide for model fitting of mixing distributions under weak shape constraints (unimodality of the effect distribution). Numerical experiments show that MixTwice can accurately estimate generative parameters and powerfully identify non-null peptides. In a peptide array study of rheumatoid arthritis (RA), MixTwice recovers meaningful peptide markers in one case where the signal is weak, and has strong reproducibility properties in one case where the signal is strong. Availability MixTwice is available as an R software package https://cran.rproject. org/web/packages/MixTwice/ Supplementary information Supplementary data are available at Bioinformatics online.",2021-03-08 +30458705,Dissecting clinical outcome of porcine circovirus type 2 with in vivo derived transcriptomic signatures of host tissue responses.,"

Background

Porcine Circovirus Type 2 (PCV2) is a pathogen that has the ability to cause often devastating disease manifestations in pig populations with major economic implications. How PCV2 establishes subclinical persistence and why certain individuals progress to lethal lymphoid depletion remain to be elucidated.

Results

Here we present PorSignDB, a gene signature database describing in vivo porcine tissue physiology that we generated from a large compendium of in vivo transcriptional profiles and that we subsequently leveraged for deciphering the distinct physiological states underlying PCV2-affected lymph nodes. This systems genomics approach indicated that subclinical PCV2 infections suppress a myeloid leukocyte mediated immune response. However, in contrast an inflammatory myeloid cell activation is promoted in PCV2 patients with clinical manifestations. Functional genomics further uncovered STAT3 as a druggable PCV2 host factor candidate. Moreover, IL-2 supplementation of primary lymphocytes enabled ex vivo study of PCV2 replication in its target cell, the lymphoblast.

Conclusion

Our systematic dissection of the mechanistic basis of PCV2 reveals that subclinical and clinical PCV2 display two diametrically opposed immunotranscriptomic recalibrations that represent distinct physiological states in vivo, which suggests a paradigm shift in this field. Finally, our PorSignDB signature database is publicly available as a community resource ( http://www.vetvirology.ugent.be/PorSignDB/ , included in Gene Sets from Community Contributors http://software.broadinstitute.org/gsea/msigdb/contributed_genesets.jsp ) and provides systems biologists with a valuable tool for catalyzing studies of human and veterinary disease. Finally, a primary porcine lymphoblast cell culture system paves the way for unraveling the impact of host genetics on PCV2 replication.",2018-11-20 +31724711,The SCOP database in 2020: expanded classification of representative family and superfamily domains of known protein structures.,"The Structural Classification of Proteins (SCOP) database is a classification of protein domains organised according to their evolutionary and structural relationships. We report a major effort to increase the coverage of structural data, aiming to provide classification of almost all domain superfamilies with representatives in the PDB. We have also improved the database schema, provided a new API and modernised the web interface. This is by far the most significant update in coverage since SCOP 1.75 and builds on the advances in schema from the SCOP 2 prototype. The database is accessible from http://scop.mrc-lmb.cam.ac.uk.",2020-01-01 +31828101,Bioinformatics Analysis Identified Key Molecular Changes in Bladder Cancer Development and Recurrence.,"Background and Objectives: Bladder cancer (BC) is a complex tumor associated with high recurrence and mortality. To discover key molecular changes in BC, we analyzed next-generation sequencing data of BC and surrounding tissue samples from clinical specimens. Methods. Gene expression profiling datasets of bladder cancer were analyzed online. The Database for Annotation, Visualization, and Integrated Discovery (DAVID, https://david.ncifcrf.gov/) was used to perform Gene Ontology (GO) functional and KEGG pathway enrichment analyses. Molecular Complex Detection (MCODE) in Cytoscape software (Cytoscape_v3.6.1) was applied to identify hub genes. Protein expression and survival data were downloaded from OncoLnc (http://www.oncolnc.org/). Gene expression data were obtained from the ONCOMINE website (https://www.oncomine.org/). Results. We identified 4211 differentially expressed genes (DEGs) by analysis of surrounding tissue vs. cancer tissue (SC analysis) and 410 DEGs by analysis of cancer tissue vs. recurrent tissue cluster (CR analysis). GO function analysis revealed enrichment of DEGs in genes related to the cytoplasm and nucleoplasm for both clusters, and KEGG pathway analysis showed enrichment of DEGs in the PI3K-Akt signaling pathway. We defined the 20 genes with the highest degree of connectivity as the hub genes. Cox regression revealed CCNB1, ESPL1, CENPM, BLM, and ASPM were related to overall survival. The expression levels of CCNB1, ESPL1, CENPM, BLM, and ASPM were 4.795-, 5.028-, 8.691-, 2.083-, and 3.725-fold higher in BC than the levels in normal tissues, respectively. Conclusions. The results suggested that the functions of CCNB1, ESPL1, CENPM, BLM, and ASPM may contribute to BC development and the functions of CCNB1, ESPL1, CENPM, and BLM may also contribute to BC recurrence.",2019-11-16 +34037796,"PlantDeepSEA, a deep learning-based web service to predict the regulatory effects of genomic variants in plants.","Characterizing regulatory effects of genomic variants in plants remains a challenge. Although several tools based on deep-learning models and large-scale chromatin-profiling data have been available to predict regulatory elements and variant effects, no dedicated tools or web services have been reported in plants. Here, we present PlantDeepSEA as a deep learning-based web service to predict regulatory effects of genomic variants in multiple tissues of six plant species (including four crops). PlantDeepSEA provides two main functions. One is called Variant Effector, which aims to predict the effects of sequence variants on chromatin accessibility. Another is Sequence Profiler, a utility that performs 'in silico saturated mutagenesis' analysis to discover high-impact sites (e.g., cis-regulatory elements) within a sequence. When validated on independent test sets, the area under receiver operating characteristic curve of deep learning models in PlantDeepSEA ranges from 0.93 to 0.99. We demonstrate the usability of the web service with two examples. PlantDeepSEA could help to prioritize regulatory causal variants and might improve our understanding of their mechanisms of action in different tissues in plants. PlantDeepSEA is available at http://plantdeepsea.ncpgr.cn/.",2021-07-01 +28425468,Transcriptome Analysis of Mango (Mangifera indica L.) Fruit Epidermal Peel to Identify Putative Cuticle-Associated Genes.,"Mango fruit (Mangifera indica L.) are highly perishable and have a limited shelf life, due to postharvest desiccation and senescence, which limits their global distribution. Recent studies of tomato fruit suggest that these traits are influenced by the expression of genes that are associated with cuticle metabolism. However, studies of these phenomena in mango fruit are limited by the lack of genome-scale data. In order to gain insight into the mango cuticle biogenesis and identify putative cuticle-associated genes, we analyzed the transcriptomes of peels from ripe and overripe mango fruit using RNA-Seq. Approximately 400 million reads were generated and de novo assembled into 107,744 unigenes, with a mean length of 1,717 bp and with this information an online Mango RNA-Seq Database (http://bioinfo.bti.cornell.edu/cgi-bin/mango/index.cgi) which is a valuable genomic resource for molecular research into the biology of mango fruit was created. RNA-Seq analysis suggested that the pathway leading to biosynthesis of the cuticle component, cutin, is up-regulated during overripening. This data was supported by analysis of the expression of several putative cuticle-associated genes and by gravimetric and microscopic studies of cuticle deposition, revealing a complex continuous pattern of cuticle deposition during fruit development and involving substantial accumulation during ripening/overripening.",2017-04-20 +33866464,Body mass index and leptin levels in serum and cerebrospinal fluid in relation to delayed cerebral ischemia and outcome after aneurysmal subarachnoid hemorrhage.,"Aneurysmal subarachnoid hemorrhage (SAH) is associated with a high mortality rate and may leave surviving patients severely disabled. After the initial hemorrhage, clinical outcome is further compromised by the occurrence of delayed cerebral ischemia (DCI). Overweight and obesity have previously been associated with protective effects in the post-bleeding phase. The aim of this study was to assess the effects of a patient's body mass index (BMI) and leptin levels on the occurrence of DCI, DCI-related cerebral infarction, and clinical outcome. In total, 263 SAH patients were included of which leptin levels were assessed in 24 cases. BMI was recorded along disease severity documented by the Hunt and Hess and modified Fisher scales. The occurrence of clinical or functional DCI (neuromonitoring, CT Perfusion) was assessed. Long-term clinical outcome was documented after 12 months (extended Glasgow outcome scale). A total of 136 (51.7%) patients developed DCI of which 72 (27.4%) developed DCI-related cerebral infarctions. No association between BMI and DCI occurrence (P = .410) or better clinical outcome (P = .643) was identified. Early leptin concentration in serum (P = .258) and CSF (P = .159) showed no predictive value in identifying patients at risk of unfavorable outcomes. However, a significant increase of leptin levels in CSF occurred from 326.0 pg/ml IQR 171.9 prior to DCI development to 579.2 pg/ml IQR 211.9 during ongoing DCI (P = .049). In our data, no association between obesity and clinical outcome was detected. After DCI development, leptin levels in CSF increased either by an upsurge of active transport or disruption of the blood-CSF barrier. This trial has been registered at ClinicalTrials.gov (NCT02142166) as part of a larger-scale prospective data collection. BioSAB: https://clinicaltrials.gov/ct2/show/NCT02142166.",2021-04-17 +34191593,Microrisk Lab: An Online Freeware for Predictive Microbiology.,"Microrisk Lab is an R-based online modeling freeware designed to realize parameter estimation and model simulation in predictive microbiology. A total of 36 peer-reviewed models were integrated for parameter estimation (including primary models of bacterial growth/inactivation under static and nonisothermal conditions, secondary models of specific growth rate, and competition models of two-flora growth) and model simulation (including integrated models of deterministic or stochastic bacterial growth/inactivation under static and nonisothermal conditions) in Microrisk Lab. Each modeling section was designed to provide numerical and graphical results with comprehensive statistical indicators depending on the appropriate data set and/or parameter setting. In this study, six case studies were reproduced in Microrisk Lab and compared in parallel with DMFit, GInaFiT, IPMP 2013/GraphPad Prism, Bioinactivation FE, and @Risk, respectively. The estimated and simulated results demonstrated that the performance of Microrisk Lab was statistically equivalent to that of other existing modeling systems. Microrisk Lab allows for a friendly user experience when modeling microbial behaviors owing to its interactive interfaces, high integration, and interconnectivity. Users can freely access this application at https://microrisklab.shinyapps.io/english/ or https://microrisklab.shinyapps.io/chinese/.",2021-06-30 +34242532,Transition readiness and quality of life in emerging adult college students.,"

Objective

To examine the relative contribution of transition readiness (i.e., healthcare self-management) to health-related quality of life (HRQoL) among emerging adult (EA) college students without a chronic medical condition (CMC).Participants: College students (n = 2372; Mage = 19.32, SD = 1.26) from a Midwestern university.Methods: Participants completed online measures of demographics, HRQoL, and transition readiness.Results: Hierarchical regression analyses found transition readiness accounted an additional 3-4% of the variability in mental and physical HRQoL (p < .001), beyond demographic factors. 11.3% of EAs reported overall mastery of transition readiness, with navigating health insurance being the weakest area.Conclusions: Findings support the consensus that transition readiness is relevant to HRQoL for all EAs, including those without a CMC. EAs without a CMC demonstrate relatively weak transition readiness skills. Primary and university-based healthcare might consider programs supporting transition readiness and HRQoL among underresourced EAs.Supplemental data for this article can be accessed online at https://doi.org/10.1080/07448481.2021.1923507 .",2021-07-09 +33824725,Genetic variability in COVID-19-related genes in the Brazilian population.,"SARS-CoV-2 utilizes the angiotensin-converting enzyme 2 (ACE2) receptor and transmembrane serine protease (TMPRSS2) to infect human lung cells. Previous studies have suggested that different host ACE2 and TMPRSS2 genetic backgrounds might contribute to differences in the rate of SARS-CoV-2 infection or COVID-19 severity. Recent studies have also shown that variants in 15 genes related to type I interferon immunity to influenza virus might predispose patients toward life-threatening COVID-19 pneumonia. Other genes (SLC6A20, LZTFL1, CCR9, FYCO1, CXCR6, XCR1, IL6, CTSL, ABO, and FURIN) and HLA alleles have also been implicated in the response to infection with SARS-CoV-2. Currently, Brazil has recorded the third-highest number of COVID-19 cases worldwide. We aimed to investigate the genetic variation present in COVID-19-related genes in the Brazilian population. We analyzed 27 candidate genes and HLA alleles in 954 admixed Brazilian exomes. We used the information available in two public databases (http://www.bipmed.org and http://abraom.ib.usp.br/) and additional exomes from individuals born in southeast Brazil, the region of the country with the highest number of COVID-19 patients. Variant allele frequencies were compared with the 1000 Genomes Project phase 3 (1KGP) and gnomAD databases. We detected 395 nonsynonymous variants; of these, 325 were also found in the 1KGP and/or gnomAD. Six of these variants were previously reported to influence the rate of infection or clinical prognosis of COVID-19. The remaining 70 variants were identified exclusively in the Brazilian sample, with a mean allele frequency of 0.0025. In silico analysis revealed that seven of these variants are predicted to affect protein function. Furthermore, we identified HLA alleles previously associated with the COVID-19 response at loci DQB1 and DRB1. Our results showed genetic variability common to other populations and rare and ultrarare variants exclusively found in the Brazilian population. These findings might lead to differences in the rate of infection or response to infection by SARS-CoV-2 and should be further investigated in patients with this disease.",2021-04-02 +32525671,XenoNet: Inference and Likelihood of Intermediate Metabolite Formation.,"Drug metabolism is a common cause of adverse drug reactions. Drug molecules can be metabolized into reactive metabolites, which can conjugate to biomolecules, like protein and DNA, in a process termed bioactivation. To mitigate adverse reactions caused by bioactivation, both experimental and computational screening assays are utilized. Experimental assays for assessing the formation of reactive metabolites are low throughput and expensive to perform, so they are often reserved until later stages of the drug development pipeline when the drug candidate pools are already significantly narrowed. In contrast, computational methods are high throughput and cheap to perform to screen thousands to millions of compounds for potentially toxic molecules during the early stages of the drug development pipeline. Commonly used computational methods focus on detecting and structurally characterizing reactive metabolite-biomolecule adducts or predicting sites on a drug molecule that are liable to form reactive metabolites. However, such methods are often only concerned with the structure of the initial drug molecule or of the adduct formed when a biomolecule conjugates to a reactive metabolite. Thus, these methods are likely to miss intermediate metabolites that may lead to subsequent reactive metabolite formation. To address these shortcomings, we create XenoNet, a metabolic network predictor, that can take a pair of a substrate and a target product as input and (1) enumerate pathways, or sequences of intermediate metabolite structures, between the pair, and (2) compute the likelihood of those pathways and intermediate metabolites. We validate XenoNet on a large, chemically diverse data set of 17 054 metabolic networks built from a literature-derived reaction database. Each metabolic network has a defined substrate molecule that has been experimentally observed to undergo metabolism into a defined product metabolite. XenoNet can predict experimentally observed pathways and intermediate metabolites linking the input substrate and product pair with a recall of 88 and 46%, respectively. Using likelihood scoring, XenoNet also achieves a top-one pathway and intermediate metabolite accuracy of 93.6 and 51.9%, respectively. We further validate XenoNet against prior methods for metabolite prediction. XenoNet significantly outperforms all prior methods across multiple metrics. XenoNet is available at https://swami.wustl.edu/xenonet.",2020-06-29 +,Development of a Sentinel-2 burned area algorithm: Generation of a small fire database for sub-Saharan Africa,"A locally-adapted multitemporal two-phase burned area (BA) algorithm has been developed using as inputs Sentinel-2 MSI reflectance measurements in the short and near infrared wavebands plus the active fires detected by Terra and Aqua MODIS sensors. An initial burned area map is created in the first step, from which tile dependent statistics are extracted for the second step. The whole Sub-Saharan Africa (around 25 M km2) was processed with this algorithm at a spatial resolution of 20 m, from January to December 2016. This period covers two half fire seasons on the Northern Hemisphere and an entire fire season in the South. The area was selected as existing BA products account it to include around 70% of global BA. Validation of this product was based on a two-stage stratified random sampling of Landsat multitemporal images. Higher accuracy values than existing global BA products were observed, with Dice coefficient of 77% and omission and commission errors of 26.5% and 19.3% respectively. The standard NASA BA product (MCD64A1 c6) showed a similar commission error (20.4%), but much higher omission errors (59.6%), with a lower Dice coefficient (53.6%). The BA algorithm was processed over >11,000 Sentinel-2 images to create a database that would also include small fires (<100 ha). This is the first time a continental BA product is generated from medium resolution sensors (spatial resolution = 20 m), showing their operational potential for improving our current understanding of global fire impacts. Total BA estimated from our product was 4.9 M km2, around 80% larger area than what the NASA BA product (MCD64A1 c6) detected in the same period (2.7 M km2). The main differences between the two products were found in regions where small fires (<100 ha) account for a significant proportion of total BA, as global products based on coarse pixel sizes (500 m for MCD64A1) unlikely detect them. On the negative side, Sentinel-2 based products have lower temporal resolution and consequently are more affected by cloud/cloud shadows and have less temporal reporting accuracy than global BA products. The product derived from S2 imagery would greatly contribute to better understanding the impacts of small fires in global fire regimes, particularly in tropical regions, where such fires are frequent. This product is named FireCCISFD11 and it is publicly available at: https://www.esa-fire-cci.org/node/262, last accessed on November 2018.",2019-03-01 +28960889,eSnail: A transcriptome-based molecular resource of the central nervous system for terrestrial gastropods.,"To expand on emerging terrestrial gastropod molecular resources, we have undertaken transcriptome-based sequencing of the central nervous system (CNS) from six ecologically invasive terrestrial gastropods. Focusing on snail species Cochlicella acuta and Helix aspersa and reticulated slugs Deroceras invadens, Deroceras reticulatum, Lehmannia nyctelia and Milax gagates, we obtained a total of 367,869,636 high-quality reads and compared them with existing CNS transcript resources for the invasive Mediterranean snail, Theba pisana. In total, we obtained 419,289 unique transcripts (unigenes) from 1,410,569 assembled contigs, with blast search analysis of multiple protein databases leading to the annotation of 124,268 unigenes, of which 92,544 mapped to ncbi nonredundant protein databases. We found that these transcriptomes have representatives in most biological functions, based on comparison of gene ontology, kegg pathway and protein family contents, demonstrating a high range of transcripts responsible for regulating metabolic activities and molecular functions occurring within the CNS. To provide an accessible genetic resource, we also demonstrate the presence of 66,687 microsatellites and 304,693 single-nucleotide variants, which can be used for the design of potentially thousands of unique primers for functional screening. An online ""eSnail"" database with a user-friendly web interface was implemented to query all the information obtained herein (http://soft.bioinfo-minzhao.org/esnail). We demonstrate the usefulness of the database through the mining of molluscan neuropeptides. As the most comprehensive CNS transcriptome resource for terrestrial gastropods, eSnail may serve as a useful gateway for researchers to explore gastropod CNS function for multiple purposes, including for the development of biocontrol approaches.",2017-11-12 +31304209,Dataset of lithium phosphate recovery from a low concentrated lithium-containing solution.,"The lithium-containing solution is also rich in lithium after preparation of lithium carbonate. With the depletion of primary lithium resource, it is necessary to recovery lithium from a low concentrated lithium-containing solution which can solve the shortage of lithium resources and avoid the waste of lithium. In this article, the lithium phosphate is recovered from lithium-containing solution with a concentration of 2 g/L after preparation of lithium carbonate. The results show that by the application of ultrasound, the lithium recovery rate can be increased. The concentration of lithium is less than 0.3 g/L after preparation of lithium phosphate. For lithium carbonate recovery by ultrasound, please refer to the full length article entitled ""Lithium carbonate recovery from lithium-containing solution by ultrasound assisted precipitation"", https://doi.org/10.1016/j.ultsonch.2018.12.025 (Chunlong Zhao et al., 2019) [1].",2019-05-24 +27899627,"Improvements to PATRIC, the all-bacterial Bioinformatics Database and Analysis Resource Center.","The Pathosystems Resource Integration Center (PATRIC) is the bacterial Bioinformatics Resource Center (https://www.patricbrc.org). Recent changes to PATRIC include a redesign of the web interface and some new services that provide users with a platform that takes them from raw reads to an integrated analysis experience. The redesigned interface allows researchers direct access to tools and data, and the emphasis has changed to user-created genome-groups, with detailed summaries and views of the data that researchers have selected. Perhaps the biggest change has been the enhanced capability for researchers to analyze their private data and compare it to the available public data. Researchers can assemble their raw sequence reads and annotate the contigs using RASTtk. PATRIC also provides services for RNA-Seq, variation, model reconstruction and differential expression analysis, all delivered through an updated private workspace. Private data can be compared by 'virtual integration' to any of PATRIC's public data. The number of genomes available for comparison in PATRIC has expanded to over 80 000, with a special emphasis on genomes with antimicrobial resistance data. PATRIC uses this data to improve both subsystem annotation and k-mer classification, and tags new genomes as having signatures that indicate susceptibility or resistance to specific antibiotics.",2016-11-29 +,MON-166 Excess Cardiovascular Events Despite Good Glycaemic Control,"Abstract The Royal Australasian College of General Practitioners (RACGP) have published standards of care for patients with type II diabetes. Data from rural Australia are limited. In the light of recent trials such as EMPA-REG standards of care in routine clinical practice in regional Queensland were compared to RACGP standards and the presence of cardiovascular complications noted. Methods: We retrospectively analyzed data from 258 patients in a single, primary care setting in regional Queensland, Australia. Data were collected for glycaemic control, lipids, BMI,blood pressure, lipids, microalbuminuria and presence of cardiovascular complications. In total 258 patients were identified. The average BMI was 32.4 kg/m2, age 63, 120 female, 138 male. Only 3 smoked but 67 (25%) consumed more than 2 standard drinks per day. Average BP was 136/81 though 42% exceeded the systolic BP target of 140 mmHg and 19% exceeded the diastolic target of 90 mmHg. Average HbA1c achieved was 6.8% with 53% at or below the target of 7%. Average total cholesterol was 4.4 (170), HDL 1.2 (46), LDL 2.3 (89) and triglyceride 2.0 (77) mmol/L (mg/dL). Microalbumin was elevated in 20% of female and 40% of male subjects. The prevalence of ischaemic heart disease was 25%, cerebrovascular disease 5%, peripheral vascular disease 6% and chronic kidney disease 7%. The majority were treated with sulphonylureas, metformin and insulin with relatively little use of SGLT2 inhibitors or GLP1 analogues. Glycaemic control was better than reported in the Australian National Diabetes Audit (ANDA) (HbA1c 8.1%). Smoking rates were low however blood pressure targets were often not met. Females suffered 26% of IHD burden, compared to 2% in the general female population aged 45-54. The prevalence of microalbuminuria in male subjects is twice the typical rate for Australia. In summary, despite achieving target levels of glycaemic control or exceeding them, there remains a high burden of cardiovascular complications when diabetes is treated with traditional agents. Ischaemic heart disease in female patients and microalbuminuria in male subjects may be suitable targets for SGLT2 inhibition given the published evidence of benefits. We intend to repeat our survey after a targeted intervention in these areas of interest. References: https://www.racgp.org.au/your-practice/guidelines/diabetes, http://www.health.gov.au/internet/main/publishingAlcohol, http://www.abs.gov.au/ausstats, https://www.aihw.gov.au/reports, Zinman, C. Wanner, J.M. Lachin, D. Fitchett, E. Bluhmki, S. Hantel, et al. Empagliflozin, cardiovascular outcomes, and mortality in type 2 diabetes. N Engl J Med, 373 (22) (2015), pp. 2117-2128 [Epub 2015/09/18].",2019-04-15 +34270697,Phosphate binding sites prediction in phosphorylation-dependent protein-protein interactions. ,"Phosphate binding plays an important role in modulating protein-protein interactions, which are ubiquitous in various biological processes. Accurate prediction of phosphate binding sites is an important but challenging task. Small size and diversity of phosphate binding sites lead to a substantial challenge for developing accurate prediction methods. Here we present the phosphate binding site predictor (PBSP), a novel and accurate approach to identifying phosphate binding sites from protein structures. PBSP combines an energy-based ligand-binding sites identification method with reverse focused docking using a phosphate probe. We show that PBSP outperforms not only general ligand binding sites predictors but also other existing phospholigand-specific binding sites predictors. It achieves ∼95% success rate for top 10 predicted sites with an average Matthews correlation coefficient (MCC) value of 0.84 for successful predictions. PBSP can accurately predict phosphate binding modes, with average position error of 1.4 Å and 2.4 Å in bound and unbound datasets, respectively. Lastly, visual inspection of the predictions is conducted. Reasons for failed predictions are further analyzed and possible ways to improve the performance are provided. These results demonstrate a novel and accurate approach to phosphate binding sites identification in protein structures. The software and benchmark datasets are freely available at http://web.pkusz.edu.cn/wu/PBSP/. Supplementary data are available at Bioinformatics online.",2021-07-16 +31258549,L1000 Viewer: A Search Engine and Web Interface for the LINCS Data Repository.,"The LINCS L1000 data repository contains almost two million gene expression profiles for thousands of small molecules and drugs. However, due to the complexity and the size of the data repository and a lack of an interoperable interface, the creation of pharmacologically meaningful workflows utilizing these data is severely hampered. In order to overcome this limitation, we developed the L1000 Viewer, a search engine and graphical web interface for the LINCS data repository. The web interface serves as an interactive platform allowing the user to select different forms of perturbation profiles, e.g., for specific cell lines, drugs, dosages, time points and combinations thereof. At its core, our method has a database we created from inferring and utilizing the intricate dependency graph structure among the data files. The L1000 Viewer is accessible via http://L1000viewer.bio-complexity.com/.",2019-06-14 +28751672,"The LncRNA Connectivity Map: Using LncRNA Signatures to Connect Small Molecules, LncRNAs, and Diseases.","Well characterized the connections among diseases, long non-coding RNAs (lncRNAs) and drugs are important for elucidating the key roles of lncRNAs in biological mechanisms in various biological states. In this study, we constructed a database called LNCmap (LncRNA Connectivity Map), available at http://www.bio-bigdata.com/LNCmap/ , to establish the correlations among diseases, physiological processes, and the action of small molecule therapeutics by attempting to describe all biological states in terms of lncRNA signatures. By reannotating the microarray data from the Connectivity Map database, the LNCmap obtained 237 lncRNA signatures of 5916 instances corresponding to 1262 small molecular drugs. We provided a user-friendly interface for the convenient browsing, retrieval and download of the database, including detailed information and the associations of drugs and corresponding affected lncRNAs. Additionally, we developed two enrichment analysis methods for users to identify candidate drugs for a particular disease by inputting the corresponding lncRNA expression profiles or an associated lncRNA list and then comparing them to the lncRNA signatures in our database. Overall, LNCmap could significantly improve our understanding of the biological roles of lncRNAs and provide a unique resource to reveal the connections among drugs, lncRNAs and diseases.",2017-07-27 +33879674,Identification of key genes and pathways in discoid lupus skin via bioinformatics analysis.,"

Abstract

Discoid lupus erythematosus (DLE) is the most common skin manifestation of lupus; however, the molecular mechanisms underlying DLE remain unknown. Therefore, we aimed to identify key differentially expressed genes (DEGs) in discoid lupus skin and investigate their potential pathways.To identify candidate genes involved in the occurrence and development of the disease, we downloaded the microarray datasets GSE52471 and GSE72535 from the Gene Expression Database (GEO). DEGs between discoid lupus skin and normal controls were selected using the GEO2R tool and Venn diagram software (http://bioinformatics.psb.ugent.be/webtools/Venn/). The Database for Annotation, Visualization, and Integrated Discovery (DAVID), Enrichr, and Cytoscape ClueGo were used to analyze the Kyoto Encyclopedia of Gene and Genome pathways and gene ontology. Protein-protein interactions (PPIs) of these DEGs were further assessed using the Search Tool for the Retrieval Interacting Genes version 10.0.Seventy three DEGs were co-expressed in both datasets. DEGs were predominantly upregulated in receptor signaling pathways of the immune response. In the PPI network, 69 upregulated genes were selected. Furthermore, 4 genes (CXCL10, ISG15, IFIH1, and IRF7) were found to be significantly upregulated in the RIG-I-like receptor signaling pathway, from analysis of Enrichr and Cytoscape ClueGo.The results of this study may provide new insights into the potential molecular mechanisms of DLE. However, further experimentation is required to confirm these findings.",2021-04-01 +33939447,Correction to Ding et al. (2021).,"Reports an error in ""Interpersonal Regulation Questionnaire (IRQ): Psychometric properties and gender differences in Chinese young adolescents"" by Ruyi Ding, Wei He, Jin Liu, Tuo Liu, Dan Zhang and Shiguang Ni (Psychological Assessment, Advanced Online Publication, Mar 18, 2021, np). In the article ""Interpersonal Regulation Questionnaire (IRQ): Psychometric Properties and Gender Differences in Chinese Young Adolescents,"" by Ruyi Ding, Wei He, Jin Liu, Tuo Liu, Dan Zhang, and Shiguang Ni (Psychological Assessment, 2021, Vol. 33, No. 4, pp. e13-e28, https://doi.org/10.1037/ pas0000997), the following funding information was missing from the author note: ""This study was funded by the Shenzhen Humanities & Social Sciences Key Research Bases, Tsinghua SIGS Overseas Research Cooperation Foundation (Grant No. HW2020004), National Philosophy and Social Sciences Foundation of China (Grant No. 20AZD085) and the Guangdong Natural Science Foundation (Grant No. 2020A1515010949)."" All versions of this article have been corrected. (The following abstract of the original article appeared in record 2021-27042-001.) The Interpersonal Regulation Questionnaire (IRQ) is a scale developed to measure the tendency and efficacy of intrinsic interpersonal emotion regulation across positive and negative affective states. As the psychometric properties of the IRQ across cultures and different ages have not been well established, the current study was conducted to examine the applicability of the translated IRQ in a sample of Chinese young adolescents (initial n = 487; 50.20% are males; M = 14.52 years old, SD = .75). The original four-factor structure of the IRQ (i.e., negative-tendency, negative-efficacy, positive-tendency, and positive-efficacy) and other parsimonious models were examined and compared using confirmatory factor analysis. The results demonstrated that only the correlated-four-factor model had acceptable model fit indices. The internal consistencies of the four sub-scales were all above .70. Strict measurement invariance (i.e., configural, metric, and scalar) was achieved between males and females. In addition, latent mean comparison showed that females reported higher negative-efficacy and positive-tendency than males, while no gender variations were found for the remaining two factors. The validity of the IRQ was further supported by its convergent-discriminant associations with emotional well-being and distress, emotional expressivity, social competence, empathic responding, cognitive reappraisal, and delinquent behavior. Taken together, the IRQ is a reliable and valid measure for Chinese young adolescents' intrinsic interpersonal emotion regulation. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-04-01 +33030962,Preoperative Exercise Training to Prevent Postoperative Pulmonary Complications in Adults Undergoing Major Surgery. A Systematic Review and Meta-analysis with Trial Sequential Analysis.,"Rationale: Poor preoperative physical fitness and respiratory muscle weakness are associated with postoperative pulmonary complications (PPCs) that result in prolonged hospital length of stay and increased mortality.Objectives: To examine the effect of preoperative exercise training on the risk of PPCs across different surgical settings.Methods: We searched MEDLINE, Web of Science, Embase, the Physiotherapy Evidence Database, and the Cochrane Central Register, without language restrictions, for studies from inception to July 2020. We included randomized controlled trials that compared patients receiving exercise training with those receiving usual care or sham training before cardiac, lung, esophageal, or abdominal surgery. PPCs were the main outcome; secondary outcomes were preoperative functional changes and postoperative mortality, cardiovascular complications, and hospital length of stay. The study was registered with PROSPERO (International Prospective Register of Systematic Reviews).Results: From 29 studies, 2,070 patients were pooled for meta-analysis. Compared with the control condition, preoperative exercise training was associated with a lower incidence of PPCs (23 studies, 1,864 patients; relative risk, 0.52; 95% confidence interval [CI], 0.41 to 0.66; grading of evidence, moderate); Trial Sequential Analysis confirmed effectiveness, and there was no evidence of difference of effect across surgeries, type of training (respiratory muscles, endurance or combined), or preoperative duration of training. At the end of the preoperative period, exercise training resulted in increased peak oxygen uptake (weighted mean difference [WMD], +2 ml/kg/min; 99% CI, 0.3 to 3.7) and higher maximal inspiratory pressure (WMD, +12.2 cm H2O; 99% CI, 6.3 to 18.2). Hospital length of stay was shortened (WMD, -2.3 d; 99% CI, -3.82 to -0.75) in the intervention group, whereas no difference was found in postoperative mortality.Conclusions: Preoperative exercise training improves physical fitness and reduces the risk of developing PPCs while minimizing hospital resources use, regardless of the type of intervention and surgery performed.Systematic review registered with https://www.crd.york.ac.uk/prospero/ (CRD 42018096956).",2021-04-01 +34060868,The magical orbitofrontal cortex.,"This special issue, commissioned after the 4th Quadrennial Meeting on Orbitofrontal Cortex Function held in Paris in November of 2019 (https://ofc2019.sciencesconf.org/), is intended to provide a snapshot of this ongoing transformation; we hope that the ideas presented herein will provide a foundation for the next stage in the evolution of our understanding of this magical brain region. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-04-01 +34383568,Neural Speech Encoding in Infancy Predicts Future Language and Communication Difficulties.,"Purpose This study aimed to construct an objective and cost-effective prognostic tool to forecast the future language and communication abilities of individual infants. Method Speech-evoked electroencephalography (EEG) data were collected from 118 infants during the first year of life during the exposure to speech stimuli that differed principally in fundamental frequency. Language and communication outcomes, namely four subtests of the MacArthur-Bates Communicative Development Inventories (MCDI)-Chinese version, were collected between 3 and 16 months after initial EEG testing. In the two-way classification, children were classified into those with future MCDI scores below the 25th percentile for their age group and those above the same percentile, while the three-way classification classified them into < 25th, 25th-75th, and > 75th percentile groups. Machine learning (support vector machine classification) with cross validation was used for model construction. Statistical significance was assessed. Results Across the four MCDI measures of early gestures, later gestures, vocabulary comprehension, and vocabulary production, the areas under the receiver-operating characteristic curve of the predictive models were respectively .92 ± .031, .91 ± .028, .90 ± .035, and .89 ± .039 for the two-way classification, and .88 ± .041, .89 ± .033, .85 ± .047, and .85 ± .050 for the three-way classification (p < .01 for all models). Conclusions Future language and communication variability can be predicted by an objective EEG method that indicates the function of the auditory neural pathway foundational to spoken language development, with precision sufficient for individual predictions. Longer-term research is needed to assess predictability of categorical diagnostic status. Supplemental Material https://doi.org/10.23641/asha.15138546.",2021-08-12 +31558563,Modeling Cellular Response in Large-Scale Radiogenomic Databases to Advance Precision Radiotherapy.,"Radiotherapy is integral to the care of a majority of patients with cancer. Despite differences in tumor responses to radiation (radioresponse), dose prescriptions are not currently tailored to individual patients. Recent large-scale cancer cell line databases hold the promise of unravelling the complex molecular arrangements underlying cellular response to radiation, which is critical for novel predictive biomarker discovery. Here, we present RadioGx, a computational platform for integrative analyses of radioresponse using radiogenomic databases. We fit the dose-response data within RadioGx to the linear-quadratic model. The imputed survival across a range of dose levels (AUC) was a robust radioresponse indicator that correlated with biological processes known to underpin the cellular response to radiation. Using AUC as a metric for further investigations, we found that radiation sensitivity was significantly associated with disruptive mutations in genes related to nonhomologous end joining. Next, by simulating the effects of different oxygen levels, we identified putative genes that may influence radioresponse specifically under hypoxic conditions. Furthermore, using transcriptomic data, we found evidence for tissue-specific determinants of radioresponse, suggesting that tumor type could influence the validity of putative predictive biomarkers of radioresponse. Finally, integrating radioresponse with drug response data, we found that drug classes impacting the cytoskeleton, DNA replication, and mitosis display similar therapeutic effects to ionizing radiation on cancer cell lines. In summary, RadioGx provides a unique computational toolbox for hypothesis generation to advance preclinical research for radiation oncology and precision medicine. SIGNIFICANCE: The RadioGx computational platform enables integrative analyses of cellular response to radiation with drug responses and genome-wide molecular data. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/79/24/6227/F1.large.jpg.See related commentary by Spratt and Speers, p. 6076.",2019-09-26 +32805011,HaploGrouper: a generalized approach to haplogroup classification.,"

Motivation

We introduce HaploGrouper, a versatile software to classify haplotypes into haplogroups on the basis of a known phylogenetic tree. A typical use case for this software is the assignment of haplogroups to human mitochondrial DNA (mtDNA) or Y-chromosome haplotypes. Existing state-of-the-art haplogroup-calling software is typically hard-wired to work only with either mtDNA or Y-chromosome haplotypes from humans.

Results

HaploGrouper exhibits comparable accuracy in these instances and has the advantage of being able to assign haplogroups to any kind of haplotypes from any species-given an extant annotated phylogenetic tree defined by sequence variants.

Availability and implementation

The software is available at the following URL https://gitlab.com/bio_anth_decode/haploGrouper.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +35382104,Building an Open Resources Repository for COVID-19 Research.,"The COVID-19 outbreak is a global pandemic declared by the World Health Organization, with rapidly increasing cases in most countries. A wide range of research is urgently needed for understanding the COVID-19 pandemic, such as transmissibility, geographic spreading, risk factors for infections, and economic impacts. Reliable data archive and sharing are essential to jump-start innovative research to combat COVID-19. This research is a collaborative and innovative effort in building such an archive, including the collection of various data resources relevant to COVID-19 research, such as daily cases, social media, population mobility, health facilities, climate, socioeconomic data, research articles, policy and regulation, and global news. Due to the heterogeneity between data sources, our effort also includes processing and integrating different datasets based on GIS (Geographic Information System) base maps to make them relatable and comparable. To keep the data files permanent, we published all open data to the Harvard Dataverse (https://dataverse.harvard.edu/dataverse/2019ncov), an online data management and sharing platform with a permanent Digital Object Identifier number for each dataset. Finally, preliminary studies are conducted based on the shared COVID-19 datasets and revealed different spatial transmission patterns among mainland China, Italy, and the United States.",2020-09-01 +34624638,Minimum detectable spinal cord atrophy with automatic segmentation: Investigations using an open-access dataset of healthy participants.,"Spinal cord atrophy is a well-known biomarker in multiple sclerosis (MS) and other diseases. It is measured by segmenting the spinal cord on an MRI image and computing the average cross-sectional area (CSA) over a few slices. Introduced about 25 years ago, this procedure is highly sensitive to the quality of the segmentation and is prone to rater-bias. Recently, fully-automated spinal cord segmentation methods, which remove the rater-bias and enable the automated analysis of large populations, have been introduced. A lingering question related to these automated methods is: How reliable are they at detecting atrophy? In this study, we evaluated the precision and accuracy of automated atrophy measurements by simulating scan-rescan experiments. Spinal cord MRI data from the open-access spine-generic project were used. The dataset aggregates 42 sites worldwide and consists of 260 healthy subjects and includes T1w and T2w contrasts. To simulate atrophy, each volume was globally rescaled at various scaling factors. Moreover, to simulate patient repositioning, random rigid transformations were applied. Using the DeepSeg algorithm from the Spinal Cord Toolbox, the spinal cord was segmented and vertebral levels were identified. Then, the average CSA between C3-C5 vertebral levels was computed for each Monte Carlo sample, allowing us to derive measures of atrophy, intra/inter-subject variability, and sample-size calculations. The minimum sample size required to detect an atrophy of 2% between unpaired study arms, commonly seen in MS studies, was 467 +/- 13.9 using T1w and 467 +/- 3.2 using T2w images. The minimum sample size to detect a longitudinal atrophy (between paired study arms) of 0.8% was 60 +/- 25.1 using T1w and 10 +/- 1.2 using T2w images. At the intra-subject level, the estimated CSA, observed in this study, showed good precision compared to other studies with COVs (across Monte Carlo transformations) of 0.8% for T1w and 0.6% for T2w images. While these sample sizes seem small, we would like to stress that these results correspond to a ""best case"" scenario, in that the dataset used here was of particularly good quality and the model for simulating atrophy does not encompass all the variability met in real-life datasets. The simulated atrophy and scan-rescan variability may over-simplify the biological reality. The proposed framework is open-source and available at https://csa-atrophy.readthedocs.io/.",2021-10-04 +33357225,Learning to detect the onset of slow activity after a generalized tonic-clonic seizure.,"

Background

Sudden death in epilepsy (SUDEP) is a rare disease in US, however, they account for 8-17% of deaths in people with epilepsy. This disease involves complicated physiological patterns and it is still not clear what are the physio-/bio-makers that can be used as an indicator to predict SUDEP so that care providers can intervene and treat patients in a timely manner. For this sake, UTHealth School of Biomedical Informatics (SBMI) organized a machine learning Hackathon to call for advanced solutions https://sbmi.uth.edu/hackathon/archive/sept19.htm .

Methods

In recent years, deep learning has become state of the art for many domains with large amounts data. Although healthcare has accumulated a lot of data, they are often not abundant enough for subpopulation studies where deep learning could be beneficial. Taking these limitations into account, we present a framework to apply deep learning to the detection of the onset of slow activity after a generalized tonic-clonic seizure, as well as other EEG signal detection problems exhibiting data paucity.

Results

We conducted ten training runs for our full method and seven model variants, statistically demonstrating the impact of each technique used in our framework with a high degree of confidence.

Conclusions

Our findings point toward deep learning being a viable method for detection of the onset of slow activity provided approperiate regularization is performed.",2020-12-24 +33905618,The PhenX Toolkit: Establishing Standard Measures for COVID-19 Research.,"The PhenX (consensus measures for Phenotypes and eXposures) Toolkit (https://www.phenxtoolkit.org/) is a publicly available, web-based catalog of recommended, well-established measurement protocols of phenotypes and exposures. The goal of PhenX is to facilitate the use of standard measures, enhance data interoperability, and promote collaborative and translational research. PhenX is driven by the scientific community and historically has depended on working groups of experts to recommend measures for release in the PhenX Toolkit. The urgent need for recommended, standard measures for COVID-19 research triggered the development of a ""rapid release"" process for releasing new content in the PhenX Toolkit. Initially, PhenX collaborated with the National Institutes of Health (NIH) Office of Behavioral and Social Sciences Research, the National Human Genome Research Institute, and the NIH Disaster Research Response (DR2) program to create a library of COVID-19 measurement protocols. With additional support from NIH, PhenX adapted crowdsourcing techniques to accelerate prioritization and recommendation of protocols for release in the PhenX Toolkit. Prioritized COVID-19-specific protocols were used to anchor and define specialty collections of protocols that were subject to review and approval by the PhenX Steering Committee. In addition to the COVID-19-specific protocols, the specialty collections include existing, well-established PhenX protocols, use of which will further enhance data interoperability and cross-study analysis. The COVID-19 specialty collections are Behaviors and Risks; Ethnicity, Race and Demographics; History, Treatment and Outcomes; Information Resources; Psychosocial and Mental Health; and Socioeconomic. The development and usage of PhenX COVID-19 specialty collections are described in this article. © 2021 The Authors. Basic Protocol: Selecting COVID-19 protocols.",2021-04-01 +34379029,"Synthesis, spectroscopic and biological activity evaluation of Ni(II), Cu(II) and Zn(II) complexes of schiff base derived from pyridoxal and 4-fluorobenzohydrazide.","A novel Schiff base ligand, 4-fluoro-N-((3-hydroxy-5-(hydroxymethyl)-2-methylpyridin-4-yl)methylene)benzohydrazide (PLFBH) was synthesized by condensationof pyridoxal and 4-fluorobenzohydrazide. Its complexes with Ni(II), Cu(II), and Zn(II) metal ionswere prepared and characterized by spectroscopic IR, 1H-NMR, UV, LC-MS, ESR, and powder XRD studies and by elemental analysis and thermal analysis, molar conductance, and magnetic susceptibility measurements. The results indicate the geometry of the complexes to be hexa coordinate distorted octahedral. Based on the electronic absorption and fluorescence emission spectra and viscosity studies, an intercalative mode of binding of the complexes with CT-DNA was suggested, which was also supported by DNA docking studies. The docking studies of metal complexes with DNA were carried out using Autodock 4.2. The in vitro anticancer assay for the Cu(II)-PLFBH complex was performed to assess the ability of the complex to inhibit human cell proliferation on HeLa human cervical carcinoma cells, MCF-7 human breast carcinoma cells, and A549 human lung carcinoma cells. The Cu(II)-PLFBH complex exhibited moderate to good inhibitory effect on the cancer cell lines studied. The complexes showed good cleavageability toward plasmid pBR322 DNA. The metal complexes were found to show good antibacterial activity against gram positive bacteria, Staphylococcus aureus and Bacillus cereus and gram negative bacteria Escherichia coli and Pseudomonas aeruginosa cultures,while the ligand showed marginal activity.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1961271 .",2021-08-11 +33763122,Origin Sample Prediction and Spatial Modeling of Antimicrobial Resistance in Metagenomic Sequencing Data.,"The steady elaboration of the Metagenomic and Metadesign of Subways and Urban Biomes (MetaSUB) international consortium project raises important new questions about the origin, variation, and antimicrobial resistance of the collected samples. CAMDA (Critical Assessment of Massive Data Analysis, http://camda.info/) forum organizes annual challenges where different bioinformatics and statistical approaches are tested on samples collected around the world for bacterial classification and prediction of geographical origin. This work proposes a method which not only predicts the locations of unknown samples, but also estimates the relative risk of antimicrobial resistance through spatial modeling. We introduce a new component in the standard analysis as we apply a Bayesian spatial convolution model which accounts for spatial structure of the data as defined by the longitude and latitude of the samples and assess the relative risk of antimicrobial resistance taxa across regions which is relevant to public health. We can then use the estimated relative risk as a new measure for antimicrobial resistance. We also compare the performance of several machine learning methods, such as Gradient Boosting Machine, Random Forest, and Neural Network to predict the geographical origin of the mystery samples. All three methods show consistent results with some superiority of Random Forest classifier. In our future work we can consider a broader class of spatial models and incorporate covariates related to the environment and climate profiles of the samples to achieve more reliable estimation of the relative risk related to antimicrobial resistance.",2021-03-04 +28365734,MiDAS 2.0: an ecosystem-specific taxonomy and online database for the organisms of wastewater treatment systems expanded for anaerobic digester groups. ,"Wastewater is increasingly viewed as a resource, with anaerobic digester technology being routinely implemented for biogas production. Characterising the microbial communities involved in wastewater treatment facilities and their anaerobic digesters is considered key to their optimal design and operation. Amplicon sequencing of the 16S rRNA gene allows high-throughput monitoring of these systems. The MiDAS field guide is a public resource providing amplicon sequencing protocols and an ecosystem-specific taxonomic database optimized for use with wastewater treatment facility samples. The curated taxonomy endeavours to provide a genus-level-classification for abundant phylotypes and the online field guide links this identity to published information regarding their ecology, function and distribution. This article describes the expansion of the database resources to cover the organisms of the anaerobic digester systems fed primary sludge and surplus activated sludge. The updated database includes descriptions of the abundant genus-level-taxa in influent wastewater, activated sludge and anaerobic digesters. Abundance information is also included to allow assessment of the role of emigration in the ecology of each phylotype. MiDAS is intended as a collaborative resource for the progression of research into the ecology of wastewater treatment, by providing a public repository for knowledge that is accessible to all interested in these biotechnologically important systems. http://www.midasfieldguide.org.",2017-01-01 +33757926,[Genitourinary menopause syndrome. Postmenopausal women management: CNGOF and GEMVi clinical practice guidelines].,"

Introduction

Genitourinary menopause syndrome (SGUM) is defined as a set of symptoms associated with a decrease of estrogen and other sexual steroids during menopause. The main symptoms are vulvovaginal (dryness, burning, itching), sexual (dyspareunia), and urinary (urinary infections, pollakiuria, nycturia, pain, urinary incontinence by urgenturia). SGUM leads to an alteration of the quality of life, and affects especially women's sexuality.

Objective

The objective of this review was to elaborate guidelines for clinical practice regarding the management of SGUM in postmenopausal women, and in particular, in women with a history of breast cancer, treated or not with hormone therapy.

Materials and methods

A systematic review of the literature on SGUM management was conducted on Pubmed, Medline and Cochrane Library. Recommendations from international scholarly societies were also taken into account: International Menopause Society (IMS) https://www.imsociety.org, The North American Menopause Society (NAMS) https://www.menopause.org, Canadian Menopause Society https://www.sigmamenopause.com, European Menopause and Andropause Society (EMAS) https://www.emas-online.org, International Society for the Study of Women's Sexual Health (ISSWSH) https://www.isswsh.org.

Results

Vaginal use of lubricants, moisturizers and hyaluronic acid improves the symptoms of SGUM and may be offered to all patients. For postmenopausal women, local estrogen will be preferred to the oral route because of their safety and efficacy on all symptoms of SGUM during low-dose use. Prasterone is a local treatment that can be proposed as an effective alternative for the management of dyspareunia and sexual function disorder. Current data on oral testosterone, tibolone, oral or transdermal DHEA and herbal medicine are currently limited. Ospemifène, which has shown a significant improvement in sexual symptoms, is not currently marketed in France. In the particular case of women with a history of breast cancer, non-hormonal regimens are a first-line therapy. Current data on the risk of breast cancer recurrence when administering low-dose local estrogen are reassuring but do not support a conclusion that this treatment is safe.

Conclusion

SGUM is a common symptom that can affect the quality of life of postmenopausal women. A treatment should be systematically proposed. Local non-hormonal treatment may be offered in all women. Local low-dose estrogen therapy and Prasterone has shown an interest in the management of symptoms. In women before a history of breast cancer, local non-hormonal treatment should be offered first-line. The safety of low-dose local estrogen therapy and Prasterone cannot be established at this time. Other alternatives exist but are not currently recommended in France due to lack of data.",2021-03-20 +32590376,Data-driven discovery of 3D and 2D thermoelectric materials.,"In this work, we first perform a systematic search for high-efficiency three-dimensional (3D) and two-dimensional (2D) thermoelectric materials by combining semiclassical transport techniques with density functional theory (DFT) calculations and then train machine-learning models on the thermoelectric data. Out of 36 000 three-dimensional and 900 two-dimensional materials currently in the publicly available JARVIS-DFT database, we identify 2932 3D and 148 2D promising thermoelectric materials using a multi-steps screening procedure, where specific thresholds are chosen for key quantities like bandgaps, Seebeck coefficients and power factors. We compute the Seebeck coefficients for all the materials currently in the database and validate our calculations by comparing our results, for a subset of materials, to experimental and existing computational datasets. We also investigate the effect of chemical, structural, crystallographic and dimensionality trends on thermoelectric performance. We predict several classes of efficient 3D and 2D materials such as Ba(MgX)2 (X = P, As, Bi), X2YZ6 (X = K, Rb, Y=Pd, Pt, Z = Cl, Br), K2PtX2 (X = S, Se), NbCu3X4 (X = S, Se, Te), Sr2XYO6 (X = Ta, Zn, Y=Ga, Mo), TaCu3X4 (X = S, Se, Te), and XYN (X = Ti, Zr, Y=Cl, Br). Finally, as high-throughput DFT is computationally expensive, we train machine learning models using gradient boosting decision trees and classical force-field inspired descriptors for n-and p-type Seebeck coefficients and power factors, to quickly pre-screen materials for guiding the next set of DFT calculations. The dataset and tools are made publicly available at the websites: https://www.ctcms.nist.gov/~knc6/JVASP.html, https://www.ctcms.nist.gov/jarvisml/and https://jarvis.nist.gov/.",2020-06-26 +33788592,Coaching Childcare Providers to Support Toddlers' Gesture Use With Children Experiencing Early Childhood Poverty.,"Purpose The purpose of this study is to examine the impact of an intervention in which childcare providers (CCPs) are coached to support toddlers' gesture use during every day classroom routines. Method This study uses a multiple-baseline across strategies single-case experimental design to examine the impact of a coaching intervention on three CCPs' use of communication strategies with toddlers experiencing early childhood poverty. The CCPs were coached with a systematic framework called Setting the Stage, Observation and Opportunities to Embed, Problem-solving and Planning, Reflection and Review as they learned to implement three strategies to support toddlers' gesture use-modeling gestures with a short phrase, opportunities to gesture, and responding/expanding child gestures. CCPs were coached during book sharing and another classroom routine of their choice. Social validity data on the coaching approach and on the intervention strategies were gathered from postintervention interviews. Results The visual analysis and nonoverlap of all pairs' effect size indicates that the coaching intervention had a functional relation with CCPs' use of modeling gestures and responding/expanding gestures during book sharing, play, and circle time. Social validity data indicate that CCPs found the coaching framework supportive of their learning and feelings of self-efficacy, and that the intervention strategies supported their toddlers' communication. Conclusions The coaching framework was used to increase CCP strategy use during everyday classroom routines with toddlers. CCPs endorsed the coaching approach and the intervention strategies. This study adds to the literature supporting efforts to enhance children's earliest language learning environments. Supplemental Material https://doi.org/10.23641/asha.14044055.",2021-03-31 +33656437,Validation strategy of a bioinformatics whole genome sequencing workflow for Shiga toxin-producing Escherichia coli using a reference collection extensively characterized with conventional methods. ,"Whole genome sequencing (WGS) enables complete characterization of bacterial pathogenic isolates at single nucleotide resolution, making it the ultimate tool for routine surveillance and outbreak investigation. The lack of standardization, and the variation regarding bioinformatics workflows and parameters, however, complicates interoperability among (inter)national laboratories. We present a validation strategy applied to a bioinformatics workflow for Illumina data that performs complete characterization of Shiga toxin-producing Escherichia coli (STEC) isolates including antimicrobial resistance prediction, virulence gene detection, serotype prediction, plasmid replicon detection and sequence typing. The workflow supports three commonly used bioinformatics approaches for the detection of genes and alleles: alignment with blast+, kmer-based read mapping with KMA, and direct read mapping with SRST2. A collection of 131 STEC isolates collected from food and human sources, extensively characterized with conventional molecular methods, was used as a validation dataset. Using a validation strategy specifically adopted to WGS, we demonstrated high performance with repeatability, reproducibility, accuracy, precision, sensitivity and specificity above 95 % for the majority of all assays. The WGS workflow is publicly available as a 'push-button' pipeline at https://galaxy.sciensano.be. Our validation strategy and accompanying reference dataset consisting of both conventional and WGS data can be used for characterizing the performance of various bioinformatics workflows and assays, facilitating interoperability between laboratories with different WGS and bioinformatics set-ups.",2021-03-03 +31195415,"[The Onkonet database: taking stock of an Internet-based, multi-centre database on surgical prostate cancer treatment].","

Background

 The Onkonet database has been developed and coordinated by the Berliner Tumorzentrum e. V. (http://www.prostata-ca.net) and contains data on pre-, peri- and postoperative parameters of radical prostatectomy documented since January 2005. With its user-friendly interface and its integrated benchmarking tool, the main goal of Onkonet was to outline and improve the surgical care of prostate cancer patients in Germany. This study aimed to analyse all Onkonet data documented from the beginning of the project until June 2018. We focused on the completeness and plausibility of data to investigate and define the possibilities and limits of further analyses.

Patients and methods

 All patients who underwent radical prostatectomy in one of the urological clinics participating in this project until June 2018 were included in this retrospective study. The completeness of all documented patient data was analysed using Excel 2013. The statistical analysis was descriptive.

Results

 A total of 21 474 patients were documented in Onkonet. 58,6 % (12 591) of them had a complete dataset including date of birth, date of surgery, dates of hospitalisation and discharge, initial PSA value, Gleason score of the biopsy, clinical T stage, pathological T stage, pathological Gleason score, as well as information on the surgical technique. Mean completeness of pre-operative parameters was 26,8 %, of hospitalisation parameters 64,5 %, and of pathological parameters 58,1 %. Amongst these, the documentation of the pathological T stage was complete in 80,1 %, documentation of N stage in 78,8 %, of M stage in 74,8 %, of pathological Gleason Score in 78,7 %, and of R1 status in 78,7 %. Completeness of follow-up data was 8,1 %, with PSA data being available in 27,2 %, continence data in 23,0 %, and potency data in 13,9 %.

Conclusions

 Comprising 21 474 documented patients and over 200 parameters, Onkonet is one of the most comprehensive clinical registers for the documentation of prostate cancer patients in Germany. The data analysis showed that the limitations of such a database are mainly due to the high number of parameters and the high susceptibility to errors due to manual data submission.",2019-06-13 +33775147,Collective knowledge: organizing research projects as a database of reusable components and portable workflows with common interfaces.,"This article provides the motivation and overview of the Collective Knowledge Framework (CK or cKnowledge). The CK concept is to decompose research projects into reusable components that encapsulate research artifacts and provide unified application programming interfaces (APIs), command-line interfaces (CLIs), meta descriptions and common automation actions for related artifacts. The CK framework is used to organize and manage research projects as a database of such components. Inspired by the USB 'plug and play' approach for hardware, CK also helps to assemble portable workflows that can automatically plug in compatible components from different users and vendors (models, datasets, frameworks, compilers, tools). Such workflows can build and run algorithms on different platforms and environments in a unified way using the customizable CK program pipeline with software detection plugins and the automatic installation of missing packages. This article presents a number of industrial projects in which the modular CK approach was successfully validated in order to automate benchmarking, auto-tuning and co-design of efficient software and hardware for machine learning and artificial intelligence in terms of speed, accuracy, energy, size and various costs. The CK framework also helped to automate the artifact evaluation process at several computer science conferences as well as to make it easier to reproduce, compare and reuse research techniques from published papers, deploy them in production, and automatically adapt them to continuously changing datasets, models and systems. The long-term goal is to accelerate innovation by connecting researchers and practitioners to share and reuse all their knowledge, best practices, artifacts, workflows and experimental results in a common, portable and reproducible format at https://cKnowledge.io/. This article is part of the theme issue 'Reliability and reproducibility in computational science: implementing verification, validation and uncertainty quantification in silico'.",2021-03-29 +31693302,Molecular insight into a new low-affinity xylan binding module from the xylanolytic gut symbiont Roseburia intestinalis.,"Efficient capture of glycans, the prime metabolic resources in the human gut, confers a key competitive advantage for gut microbiota members equipped with extracellular glycoside hydrolases (GHs) to target these substrates. The association of glycans to the bacterial cell surface is typically mediated by carbohydrate binding modules (CBMs). Here, we report the structure of RiCBM86 appended to a GH family 10 xylanase from Roseburia intestinalis. This CBM represents a new family of xylan binding CBMs present in xylanases from abundant and prevalent healthy human gut Clostridiales. RiCBM86 adopts a canonical β-sandwich fold, but shows structural divergence from known CBMs. The structure of RiCBM86 has been determined with a bound xylohexaose, which revealed an open and shallow binding site. RiCBM86 recognizes only a single xylosyl ring with direct hydrogen bonds. This mode of recognition is unprecedented amongst previously reported xylan binding type-B CBMs that display more extensive hydrogen-bonding patterns to their ligands or employ Ca2+ to mediate ligand-binding. The architecture of RiCBM86 is consistent with an atypically low binding affinity (K about 0.5 mm for xylohexaose) compared to most xylan binding CBMs. Analyses using NMR spectroscopy corroborated the observations from the complex structure and the preference of RiCBM86 to arabinoxylan over glucuronoxylan, consistent with the largely negatively charged surface flanking the binding site. Mutational analysis and affinity electrophoresis established the importance of key binding residues, which are conserved in the family. This study provides novel insight into the structural features that shape low-affinity CBMs that mediate extended bacterial glycan capture in the human gut niche. DATABASES: Structural data are available in the protein data bank database under the accession number 6SGF. Sequence data are available in the GenBank database under the accession number EEV01588.1. The assignment of the Roseburia intestinalis xylan binding module into the CBM86 new family is available in the CAZy database (http://www.cazy.org/CBM86.html).",2019-11-20 +33779260,First Report of Bacterial Canker Caused by Pseudomonas syringae pv. morsprunorum Race 1 on Cherry in Chile. ,"Chile is the main exporter of sweet cherries (Prunus avium), with a total of 228.6 thousand tons exported in the 2019-20 season, and a production from the Coquimbo to the Aysén region (http://www.iqonsulting.com/yb/). In January 2019, cherry trees from a commercial orchard located near Osorno city (40°37'S, 72°54'W), Region de Los Lagos, Chile, showed symptoms such as the presence of wood cankers, necrotic spots in leaves, and premature defoliation, with a mean disease incidence near 40%. Symptomatic leaves with necrotic spots were collected for analysis, from which all the necrotic spots were extracted by incision with a sterile scalpel, macerated in 30 mL of AFT buffer and subsequently, 100 µL of the suspension was plated on King's B (KB) agar and incubated for 48 to 72 h at 27°C, obtaining a total of two bacterial colonies identified as 7684.1 and 7684.2. Afterward, each colony was stroked in a new KB agar plate, incubated for 16 h at 27°C, and the obtained biomass was used in subsequent experiments. In KB agar, both colonies exhibited fluorescence under UV light and, according to the LOPAT method (Lelliott et al., 1966), they were gram negative, positive to levan and tobacco hypersensitivity tests and negative to oxidase, potato soft rot, arginine dihydrolase and gelatin tests, and were confirmed as Pseudomonas syringae. Then, the 16s and gyrB genes of each isolate were amplified by PCR, sequenced, and compared with the NCBI Genbank database (Weisburg et al., 1991; Sarkar and Guttman, 2004), finding a 99,93% genetic similarity (1064/1065) with a previously reported 16s sequence of a Pseudomonas syringae pv. morsprunorum (Psm) isolate (accession number CP026558.1), and a 99,69% (636/638) with a previously reported gyrB gene of Psm (accession number LC364094.1), respectively. Additionally, the closest pathovar different to morsprunorum aligned with our gyrB sequence was P. syringae pv. aesculin, with 97,8% of identity (624/638). Our sequences were deposited in Genbank with the accession numbers MN528473 (16s), MN535696 (gyrB) for 7684.1, and MN528474 (16s), MN535697 (gyrB) for 7684.2. To identify if the isolates correspond to Psm races 1 (Psm1) or 2 (Psm2), race-specific conventional PCRs and qPCRs assays were carried out using the specific primers described by Kaluzna et al., (2016), showing that the two isolates were positive to Psm1 in both PCR assays. Pathogenicity was tested by inoculating immature cherry fruitlets (cv. Sweetheart) with bacterial suspension at 108 CFU/mL. For each strain, ten fruitlets were inoculated by pricking with a sterile needle previously immersed in the bacterial suspension (Ruinelli et al., 2019). Sterile distilled water was used as negative control. Seven to fourteen days post-inoculation, necrotic and water-soaked brown lesions with yellow margins were observed on the fruits inoculated with bacterial strains. The pathogen was reisolated and confirmed as Pseudomonas syringae pv. morsprunorum by 16s and gyrB sequencing, and as race 1 by race-specific PCRs. Our results were confirmed by the National Plant Protection Organization, (Servicio Agrícola y Ganadero de Chile, SAG), generating the first report of Psm race 1 in Chile. Thus, SAG established new protocols for quarantine of absent pests in the national territory (Resol. N°3080, SAG, Chile), and an immediate phytosanitary program for Psm (Resol. Exenta N°8948/2019, SAG, Chile). In conclusion, our discovery contributes to the monitoring and control of the disease in Chile.",2021-03-29 +34176369,Validating ADME QSAR Models Using Marketed Drugs.,"Problems with drug ADME are responsible for many clinical failures. By understanding the ADME properties of marketed drugs and modeling how chemical structure contributes to these inherent properties, we can help new projects reduce their risk profiles. Kinetic aqueous solubility, the parallel artificial membrane permeability assay (PAMPA), and rat liver microsomal stability constitute the Tier I ADME assays at the National Center for Advancing Translational Sciences (NCATS). Using recent data generated from in-house lead optimization Tier I studies, we update quantitative structure-activity relationship (QSAR) models for these three endpoints and validate in silico performance against a set of marketed drugs (balanced accuracies range between 71% and 85%). Improved models and experimental datasets are of direct relevance to drug discovery projects and, together with the prediction services that have been made publicly available at the ADME@NCATS web portal (https://opendata.ncats.nih.gov/adme/), provide important tools for the drug discovery community. The results are discussed in light of our previously reported ADME models and state-of-the-art models from scientific literature.Graphical Abstract[Figure: see text].",2021-06-26 +,6ER-009 Can tolerability and safety of daa-2 for hepatitis c be estimated only by randomised clinical trials? a systematic review with meta-analysis,"

Background

Every year an increase in new cases of patients with chronic hepatitis C (CHC) from HCV has been registered. The availability of second-generation DAA (DAA-2) has permitted a rise of SVR rates compatible with a good safety profile.

Purpose

To research literature evidence regarding existence of tolerability and safety data obtained from a comparison between DAA-2 and standard of care.

Material and methods

The review included RCT and other CT concluded and published until 20 June 2017, related to patients with CHC treated with DAA-2 (sofosbuvir; simeprevir; ledipasvir; daclatasvir; ombitasvir; paritaprevir; dasabuvir) in monotherapy or combined therapy, compared with gold standard (PegIFN ±Ribavirin (RBV) ±first generation DAA (DAA-1). Adverse reactions (ADR) data were searched during the treatment period and not beyond 30 days from the end of it. Databases Cochrane Central Register of Controlled Trials/Central, Embase and Pubmed were consulted: the research methodology adopted was the one with MeSH Terms when available. For included studies the meta-analysis with R was made.

Results

The articles identified were 174. Some (nine) were recognised by more databases and the articles (168) that did not find correspondence with the primary endpoint and did not belong to inclusion criteria were discarded. The studies included were six: five RCT and one observational study. The serious adverse events (SAE) and interruptions of therapy data between exposed (treated) and not-exposed (controls) patients were used for meta-analysis. One study that did not report the SAE numbers for controls was excluded from the meta-analysis. No differences in the effect between treated and controls were observed, neither for SAE incidence nor for interruptions treatment incidence. The 95% CI of the OR around the evaluation of the overall effect included the value 1: OR: 0.702 (95% CI: 0.381 to 1.295) and OR: 0.769 (95% CI: 0.277 to 2.138), respectively. The overall effect for SAE and interruptions resulted with P0.257 and P0.615, respectively.

Conclusion

No substantial differences remained in SAE and the interruptions rate between the two treatments, DAA-2 and gold standard. Furthermore, a significant heterogeneity between studies was observed. The introduction of large registries would be useful in valuing the risk of ADRs, their nature and the real frequency of SAE in the population, that can be barely estimated only by RCT.

References and/or Acknowledgements

R Core Team. http://www.R-project.org/ + No conflict of interest",2018-01-01 +28968821,SiNoPsis: Single Nucleotide Polymorphisms selection and promoter profiling.,"

Motivation

The selection of a single nucleotide polymorphism (SNP) using bibliographic methods can be a very time-consuming task. Moreover, a SNP selected in this way may not be easily visualized in its genomic context by a standard user hoping to correlate it with other valuable information. Here we propose a web form built on top of Circos that can assist SNP-centered screening, based on their location in the genome and the regulatory modules they can disrupt. Its use may allow researchers to prioritize SNPs in genotyping and disease studies.

Results

SiNoPsis is bundled as a web portal. It focuses on the different structures involved in the genomic expression of a gene, especially those found in the core promoter upstream region. These structures include transcription factor binding sites (for promoter and enhancer signals), histones and promoter flanking regions. Additionally, the tool provides eQTL and linkage disequilibrium (LD) properties for a given SNP query, yielding further clues about other indirectly associated SNPs. Possible disruptions of the aforementioned structures affecting gene transcription are reported using multiple resource databases. SiNoPsis has a simple user-friendly interface, which allows single queries by gene symbol, genomic coordinates, Ensembl gene identifiers, RefSeq transcript identifiers and SNPs. It is the only portal providing useful SNP selection based on regulatory modules and LD with functional variants in both textual and graphic modes (by properly defining the arguments and parameters needed to run Circos).

Availability and implementation

SiNoPsis is freely available at https://compgen.bio.ub.edu/SiNoPsis/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +33900807,Differences in Daily Voice Use Measures Between Female Patients With Nonphonotraumatic Vocal Hyperfunction and Matched Controls.,"Purpose The purpose of this study was to obtain a more comprehensive understanding of the pathophysiology and impact on daily voice use of nonphonotraumatic vocal hyperfunction (NPVH). Method An ambulatory voice monitor collected 1 week of data from 36 patients with NPVH and 36 vocally healthy matched controls. A subset of 11 patients with NPVH were monitored after voice therapy. Daily voice use measures included neck-skin acceleration magnitude, fundamental frequency (f o), cepstral peak prominence (CPP), and the difference between the first and second harmonic magnitudes (H1-H2). Additional comparisons included 118 patients with phonotraumatic vocal hyperfunction (PVH) and 89 additional vocally healthy controls. Results The NPVH group, compared to the matched control group, exhibited increased f o (Cohen's d = 0.6), reduced CPP (d = -0.9), and less positive H1-H2 skewness (d = -1.1). Classifiers used CPP mean and H1-H2 mode to maximally differentiate the NPVH and matched control groups (area under the receiver operating characteristic curve of 0.78). Classifiers performed well on unseen data: the logit decreased in patients with NPVH after therapy; ≥ 85% of the control and PVH groups were identified as ""normal"" or ""not NPVH,"" respectively. Conclusions The NPVH group's daily voice use is less periodic (CPP), is higher pitched (f o), and has less abrupt vocal fold closure (H1-H2 skew) compared to the matched control group. The combination of CPP mean and H1-H2 mode appears to reflect a pathophysiological continuum in NPVH patients of inefficient phonation with minimal potential for phonotrauma. Further validation of the classification model is needed to better understand potential clinical uses. Supplemental Material https://doi.org/10.23641/asha.14390771.",2021-04-23 +34365791,The Novel Interplay between Commensal Gut Bacteria and Metabolites in Diet-Induced Hyperlipidemic Rats Treated with Simvastatin.,"Hyperlipidemia is one kind of metabolic syndrome for which the treatment commonly includes simvastatin (SV). Individuals vary widely in statin responses, and growing evidence implicates gut microbiome involvement in this variability. However, the associated molecular mechanisms between metabolic improvement and microbiota composition following SV treatment are still not fully understood. In this study, combinatory approaches using ultrahigh-performance liquid chromatography coupled with hybrid triple quadrupole time-of-flight mass spectrometry (UHPLC-Q-TOF MS/MS)-based metabolomic profiling, PCR-denaturing gradient gel electrophoresis (PCR-DGGE), quantitative PCR (qPCR), and 16S rRNA gene sequencing-based gut microbiota profiling were performed to investigate the interplay of endogenous metabolites and the gut microbiota related to SV treatment. A total of 6 key differential endogenous metabolites were identified that affect the metabolism of amino acids (phenylalanine and tyrosine), unsaturated fatty acids (linoleic acid and 9-hydroxyoctadecadienoic acid (9-HODE)), and the functions of gut microbial metabolism. Moreover, a total of 22 differentially abundant taxa were obtained following SV treatment. Three bacterial taxa were identified to be involved in SV treatment, namely, Bacteroidaceae, Prevotellaceae, and Porphyromonadaceae. These findings suggested that the phenylalanine and tyrosine-associated amino acid metabolism pathways, as well as the linoleic acid and 9-HODE-associated unsaturated fatty acid metabolism pathways, which are involved in gut flora interactions, might be potential therapeutic targets for improvement in SV hypolipidemic efficacy. The mass spectrometric data have been deposited to MassIVE (https://massive.ucsd.edu/ProteoSAFe/static/massive.jsp). Username: MSV000087842_reviewer. Password: hardworkingzsr.",2021-08-09 +34312695,Progress of 3D Printing Techniques for Nasal Cartilage Regeneration.,"Once cartilage is damaged, its self-repair capacity is very limited. The strategy of tissue engineering has brought a new idea for repairing cartilage defect and cartilage regeneration. In particular, nasal cartilage regeneration is a challenge because of the steady increase in nasal reconstruction after oncologic resection, trauma, or rhinoplasty. From this perspective, three-dimensional (3D) printing has emerged as a promising technology to address the complexity of nasal cartilage regeneration, using patient's image data and computer-aided deposition of cells and biomaterials to precisely fabricate complex, personalized tissue-engineered constructs. In this review, we summarized the major progress of three prevalent 3D printing approaches, including inkjet-based printing, extrusion-based printing and laser-assisted printing. Examples are highlighted to illustrate 3D printing for nasal cartilage regeneration, with special focus on the selection of seeded cell, scaffolds and growth factors. The purpose of this paper is to systematically review recent research about the challenges and progress and look forward to the future of 3D printing techniques for nasal cartilage regeneration.Level of Evidence III This journal requires that authors assign a level of evidence to each submission to which Evidence-Based Medicine rankings are applicable. This excludes Review Articles, Book Reviews, and manuscripts that concern Basic Science, Animal Studies, Cadaver Studies, and Experimental Studies. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors https://www.springer.com/00266 .",2021-07-26 +,Integration of ecosystem-based models into an existing interactive web-based tool for improved aquaculture decision-making,"Proper site selection is critical to the development and expansion of marine aquaculture. Major considerations for site selection include: potential for competing uses, environmental interactions, and animal productivity. Two types of existing site selection tools, mapping and modeling, have proven useful independently, and in some recent studies have proven useful when used together. GIS-based mapping tools have become important in the decision-making process. These tools provide access to marine and coastal datasets allowing farmers and extension agents to gather information on availability of cultivation sites. They are also used by resource managers to assess potential use conflicts (e.g. existence of commercial fishing, mooring areas, fixed fishing gear) and possible environmental interactions (e.g. presence of seagrasses, contaminants, threatened or endangered species). Models have been used separately to predict animal growth, farm productivity, and farm-related effects on the surrounding water and sediment quality.The integration of the Farm Aquaculture Resource Management (FARM) model (http://www.farmscale.org) into the U.S. state of Connecticut's Aquaculture Mapping Atlas (http://seagrant.uconn.edu/whatwedo/aquaculture/shellmap.php) was tested in three geographically distinct waterbodies within Connecticut (CT) waters of Long Island Sound. Nearshore waters within the towns of Mystic, Milford, and Westport were selected as pilot locations to determine usability and capability of the combined tools. Data from two long-term offshore sampling stations adjacent to existing shellfish leases were used to test spatial and temporal sampling variability impacts on model results. Partnerships with local monitoring programs and growers were important for acquisition of water quality data, oyster measurement data, and information about local culture practices. All sites were deemed suitable for oyster aquaculture based on model results that predicted Moderate to High growth based on estimated time to reach harvest size from one in (2.54cm) seed oysters (Crassostrea virginica). Time to harvest varied from 282days (High growth) to 645days (Moderate growth) among the 22 stations in the three nearshore sites, and 724–956days (Moderate growth) at the two offshore sites. Results from the two long-term offshore stations indicate that data from the same year must be used when comparing production-based suitability of sites. Addition of potential production estimates improved the ability to select between suitable mapping-based sites. This mapping and modeling combination should be encouraged to provide a strong basis for successful siting and expansion of aquaculture while minimizing user conflict and adverse environmental interactions. This approach may be particularly useful in waterbodies where shellfish aquaculture is possible but is not well established.",2016-02-01 +34310195,Aphasia and Friendship: The Role and Perspectives of Speech-Language Pathologists.,"Purpose Speech-language pathologists (SLPs) who work with people with aphasia focus on assessment and intervention to support improved communication outcomes for their clients. Friendship, a key component of quality of life, often depends on communicative interaction, and many people with aphasia report having reduced social circles. The purpose of this study was to explore the perceptions of SLPs working with clients with aphasia on their role in supporting friendship development and maintenance. Method An online survey composed of questions addressing SLP perspectives and goal setting, assessment, and treatment practices related to aphasia and friendship was distributed to SLPs across the United States. Survey data were analyzed using both quantitative and qualitative methods. Results Forty-seven SLPs completed the survey. While many SLPs reported that the friendships of their clients with aphasia were impacted by aphasia and that it was within their scope of practice to support friendship development and maintenance, many did not specifically assess or target friendship and friendship outcomes in the treatment plan. SLPs identified barriers and facilitators to focusing on friendship within the context of speech and language therapy. Conclusions Findings suggest the majority of participating SLPs were interested in addressing friendship with clients with aphasia; however, they experienced barriers in practice. Further examination of SLP perspectives and clinical practice regarding friendship and aphasia is warranted. Additionally, research investigating effective assessment and therapeutic methods that target friendship in aphasia is needed to support clinical practice and the well-being of clients with aphasia. Supplemental Material https://doi.org/10.23641/asha.15032217.",2021-07-26 +32735679,Planet Microbe: a platform for marine microbiology to discover and analyze interconnected 'omics and environmental data.,"In recent years, large-scale oceanic sequencing efforts have provided a deeper understanding of marine microbial communities and their dynamics. These research endeavors require the acquisition of complex and varied datasets through large, interdisciplinary and collaborative efforts. However, no unifying framework currently exists for the marine science community to integrate sequencing data with physical, geological, and geochemical datasets. Planet Microbe is a web-based platform that enables data discovery from curated historical and on-going oceanographic sequencing efforts. In Planet Microbe, each 'omics sample is linked with other biological and physiochemical measurements collected for the same water samples or during the same sample collection event, to provide a broader environmental context. This work highlights the need for curated aggregation efforts that can enable new insights into high-quality metagenomic datasets. Planet Microbe is freely accessible from https://www.planetmicrobe.org/.",2021-01-01 +29220464,KiPho: malaria parasite kinome and phosphatome portal. ,"The Plasmodium kinases and phosphatases play an essential role in the regulation of substrate reversible-phosphorylation and overall cellular homeostasis. Reversible phosphorylation is one of the key post-translational modifications (PTMs) essential for parasite survival. Thus, a complete and comprehensive information of malarial kinases and phosphatases as a single web resource will not only aid in systematic and better understanding of the PTMs, but also facilitate efforts to look for novel drug targets for malaria. In the current work, we have developed KiPho, a comprehensive and one step web-based information resource for Plasmodium kinases and phosphatases. To develop KiPho, we have made use of search methods to retrieve, consolidate and integrate predicted as well as annotated information from several publically available web repositories. Additionally, we have incorporated relevant and manually curated data, which will be updated from time to time with the availability of new information. The KiPho (Malaria Parasite Kinome-Phosphatome) resource is freely available at http://bioinfo.icgeb.res.in/kipho.",2017-01-01 +31696036,FlavoDb: a web-based chemical repository of flavonoid compounds.,"There are many online resources that focus on chemical diversity of natural compounds, but only handful of resources exist that focus solely on flavonoid compounds and integrate structural and functional properties; however, extensive collated flavonoid literature is still unavailable to scientific community. Here we present an open access database 'FlavoDb' that is focused on providing physicochemical properties as well as topological descriptors that can be effectively implemented in deducing large scale quantitative structure property models of flavonoid compounds. In the current version of database, we present data on 1, 19,400 flavonoid compounds, thereby covering most of the known structural space of flavonoid class of compounds. Moreover, effective structure searching tool presented here is expected to provide an interactive and easy-to-use tool for obtaining flavonoid-based literature and allied information. Data from FlavoDb can be freely accessed via its intuitive graphical user interface made available at following web address: http://bioinfo.net.in/flavodb/home.html.",2019-10-31 +33547313,"The Cuban Human Brain Mapping Project, a young and middle age population-based EEG, MRI, and cognition dataset.","The Cuban Human Brain Mapping Project (CHBMP) repository is an open multimodal neuroimaging and cognitive dataset from 282 young and middle age healthy participants (31.9 ± 9.3 years, age range 18-68 years). This dataset was acquired from 2004 to 2008 as a subset of a larger stratified random sample of 2,019 participants from La Lisa municipality in La Habana, Cuba. The exclusion criteria included the presence of disease or brain dysfunctions. Participant data that is being shared comprises i) high-density (64-120 channels) resting-state electroencephalograms (EEG), ii) magnetic resonance images (MRI), iii) psychological tests (MMSE, WAIS-III, computerized go-no go reaction time), as well as iv,) demographic information (age, gender, education, ethnicity, handedness, and weight). The EEG data contains recordings with at least 30 minutes in duration including the following conditions: eyes closed, eyes open, hyperventilation, and subsequent recovery. The MRI consists of anatomical T1 as well as diffusion-weighted (DWI) images acquired on a 1.5 Tesla system. The dataset presented here is hosted by Synapse.org and available at https://chbmp-open.loris.ca .",2021-02-05 +29084591,Using DIVAN to assess disease/trait-associated single nucleotide variants in genome-wide scale.,"

Objective

The majority of sequence variants identified by Genome-wide association studies (GWASs) fall outside of the protein-coding regions. Unlike coding variants, it is challenging to connect these noncoding variants to the pathophysiology of complex diseases/traits due to the lack of functional annotations in the non-coding regions. To overcome this, by leveraging the rich collection of genomic and epigenomic profiles, we have developed DIVAN, or Disease/trait-specific Variant ANnotation, which enables the assignment of a measurement (D-score) for each base of the human genome in a disease/trait-specific manner. To facilitate the utilization of DIVAN, we pre-computed D-scores for every base of the human genome (hg19) for 45 different diseases/traits.

Results

In this work, we present a detailed protocol on how to utilize DIVAN software toolkit to retrieve D-scores either by variant identifiers or by genomic regions for a disease/trait of interest. We also demonstrate the utilities of the D-scores using real data examples. We believe that the pre-computed D-scores for 45 diseases/traits is a useful resource to follow up on the discoveries made by GWASs, and the DIVAN software toolkit provides a convenient way to access this resource. DIVAN is freely available at https://sites.google.com/site/emorydivan/software .",2017-10-30 +33771184,Circulating long noncoding RNAs as potential biomarkers for stomach cancer: a systematic review and meta-analysis.,"

Background

Recent researches have suggested that long noncoding RNA (lncRNA) is involved in the tumorigenesis and development of stomach cancer (SC). This meta-analysis aimed to identify the diagnostic performance of circulating lncRNAs in SC.

Methods

All relevant studies were systematically searched through PubMed, Web of Science, Cochrane Library, and EMBASE databases. The diagnostic values of lncRNAs were mainly assessed by pooled sensitivity, specificity, and summary receiver operating characteristic area under the curve (SROC AUC). Meta-DiSc 1.4, Review Manager 5.3, and STATA 12.0 were used for statistical analysis. The protocol for this systematic review was registered on INPLASY (INPLASY202120079) and is available in full on the inplasy.com ( https://doi.org/10.37766/inplasy2021.2.0079 ).

Results

A total of 42 eligible studies were included in this meta-analysis. The pooled sensitivity, specificity, and SROC AUC were 0.78 (95%CI 0.75-0.81), 0.75 (95%CI 0.71-0.78), and 0.83 (95%CI 0.80-0.86), respectively, suggesting that the lncRNAs test had a high accuracy for the diagnosis of SC. Obvious heterogeneity might come from the type of lncRNA through subgroup and meta-regression analysis. Fagan diagram shows the clinical value of lncRNAs test in SC.

Conclusions

Abnormal expression of circulating lncRNAs exhibits a high efficacy for diagnosing SC, which is promising in clinical application.",2021-03-26 +31305071,WinProphet: A User-Friendly Pipeline Management System for Proteomics Data Analysis Based on Trans-Proteomic Pipeline.,"Protein and peptide identification and quantitation are essential tasks in proteomics research and involve a series of steps in analyzing mass spectrometry data. Trans-Proteomic Pipeline (TPP) provides a wide range of useful tools through its web interfaces for analyses such as sequence database search, statistical validation, and quantitation. To utilize the powerful functionality of TPP without the need for manual intervention to launch each step, we developed a software tool, called WinProphet, to create and automatically execute a pipeline for proteomic analyses. It seamlessly integrates with TPP and other external command-line programs, supporting various functionalities, including database search for protein and peptide identification, spectral library construction and search, data-independent acquisition (DIA) data analysis, and isobaric labeling and label-free quantitation. WinProphet is a standalone, installation-free tool with graphical interfaces for users to configure, manage, and automatically execute pipelines. The constructed pipelines can be exported as XML files with all of the parameter settings for reusability and portability. The executable files, user manual, and sample data sets of WinProphet are freely available at  http://ms.iis.sinica.edu.tw/COmics/Software_WinProphet.html .",2019-07-29 +34490294,Development of an Early Warning Model for Predicting the Death Risk of Coronavirus Disease 2019 Based on Data Immediately Available on Admission.,"Introduction: COVID-19 has overloaded worldwide medical facilities, leaving some potentially high-risk patients trapped in outpatient clinics without sufficient treatment. However, there is still a lack of a simple and effective tool to identify these patients early. Methods: A retrospective cohort study was conducted to develop an early warning model for predicting the death risk of COVID-19. Seventy-five percent of the cases were used to construct the prediction model, and the remaining 25% were used to verify the prediction model based on data immediately available on admission. Results: From March 1, 2020, to April 16, 2020, a total of 4,711 COVID-19 patients were included in our study. The average age was 63.37 ± 16.70 years, of which 1,148 (24.37%) died. Finally, age, SpO2, body temperature (T), and mean arterial pressure (MAP) were selected for constructing the model by univariate analysis, multivariate analysis, and a review of the literature. We used five common methods for constructing the model and finally found that the full model had the best specificity and higher accuracy. The area under the ROC curve (AUC), specificity, sensitivity, and accuracy of full model in train cohort were, respectively, 0.798 (0.779, 0.816), 0.804, 0.656, and 0.768, and in the validation cohort were, respectively, 0.783 (0.751, 0.815), 0.800, 0.616, and 0.755. Visualization tools of the prediction model included a nomogram and an online dynamic nomogram (https://wanghai.shinyapps.io/dynnomapp/). Conclusion: We developed a prediction model that might aid in the early identification of COVID-19 patients with a high probability of mortality on admission. However, further research is required to determine whether this tool can be applied for outpatient or home-based COVID-19 patients.",2021-08-19 +32568117,Medical safety reporting system neccessity and analysis of Turkey 2016 data: A health policy report.,"

Background

The National Safety Reporting System, which is developed for Turkey, aims to classify medical errors with a coding methodology that handles errors in subcategories. Error entries done via the system are added to the statistics immediately by advanced live data reporting capabilities of the software.

Objective

Our aim was to provide information about the Turkey local reporting system to ensure patient safety by detecting medical errors.

Methods

The data used for analyses were obtained from https://www.grs.saglik.gov.tr and the web service used by hospital information systems. The error reporting time, most commonly reported errors, errors by professions and errors by location were examined under the major error categories and percentages that have been used in relevant data.

Results

In total, 53,477 errors were submitted to the National Safety Reporting System in 2016. When these entries were split into relevant categories such as drug errors, laboratory errors, surgical errors and patient safety errors, the most common errors were wrong dosage order, hemolyzed sample, not marking the side to be operated on and patient fall (patient/caretaker related), respectively.

Conclusion

In order to reduce medical errors and provide patient safety, every institution must first of all do its own self-assessment. New user-friendly systems can be developed in order to increase medical error notifications and thus institutions can improve their healthcare quality.",2021-01-01 +35845286,"An open-source, expert-designed decision tree application to support accurate diagnosis of myeloid malignancies.","Accurate, reproducible diagnoses can be difficult to make in haemato-oncology due to multi-parameter clinical data, complex diagnostic criteria and time-pressured environments. We have designed a decision tree application (DTA) that reflects WHO diagnostic criteria to support accurate diagnoses of myeloid malignancies. The DTA returned the correct diagnoses in 94% of clinical cases tested. The DTA maintained a high level of accuracy in a second validation using artificially generated clinical cases. Optimisations have been made to the DTA based on the validations, and the revised version is now publicly available for use at http://bit.do/ADAtool.",2021-03-26 +31562099,Cross-Species Protein Function Prediction with Asynchronous-Random Walk.,"Protein function prediction is a fundamental task in the post-genomic era. Available functional annotations of proteins are incomplete and the annotations of two homologous species are complementary to each other. However, how to effectively leverage mutually complementary annotations of different species to further boost the prediction performance is still not well studied. In this paper, we propose a cross-species protein function prediction approach by performing Asynchronous Random Walk on a heterogeneous network (AsyRW). AsyRW first constructs a heterogeneous network to integrate multiple functional association networks derived from different biological data, established homology-relationships between proteins from different species, known annotations of proteins and Gene Ontology (GO). To account for the intrinsic structures of intra- and inter-species of proteins and that of GO, AsyRW quantifies the individual walk lengths of each network node using the gravity-like theory, and then performs asynchronous-random walk with the individual length to predict associations between proteins and GO terms. Experiments on annotations archived in different years show that individual walk length and asynchronous-random walk can effectively leverage the complementary annotations of different species, AsyRW has a significantly improved performance to other related and competitive methods. The codes of AsyRW are available at: http://mlda.swu.edu.cn/codes.php?name=AsyRW.",2021-07-01 +33869975,Quantum Mechanical Methods Predict Accurate Thermodynamics of Biochemical Reactions.,"Thermodynamics plays a crucial role in regulating the metabolic processes in all living organisms. Accurate determination of biochemical and biophysical properties is important to understand, analyze, and synthetically design such metabolic processes for engineered systems. In this work, we extensively performed first-principles quantum mechanical calculations to assess its accuracy in estimating free energy of biochemical reactions and developed automated quantum-chemistry (QC) pipeline (https://appdev.kbase.us/narrative/45710) for the prediction of thermodynamics parameters of biochemical reactions. We benchmark the QC methods based on density functional theory (DFT) against different basis sets, solvation models, pH, and exchange-correlation functionals using the known thermodynamic properties from the NIST database. Our results show that QC calculations when combined with simple calibration yield a mean absolute error in the range of 1.60-2.27 kcal/mol for different exchange-correlation functionals, which is comparable to the error in the experimental measurements. This accuracy over a diverse set of metabolic reactions is unprecedented and near the benchmark chemical accuracy of 1 kcal/mol that is usually desired from DFT calculations.",2021-03-25 +33491257,LINC02288 promotes chondrocyte apoptosis and inflammation through miR-374a-3p targeting RTN3.,"

Background

Dysregulation of long non-coding RNAs (lncRNAs) is related to the occurrence of osteoarthritis (OA). In the present study, we explored the role of LINC02288 and its regulatory mechanism in OA development.

Methods

GSE113825 was obtained from Gene Expression Omnibus (GEO) database and analyzed to identify the differentially expressed lncRNAs in OA. Gene enrichment analyses and Kyoto Encyclopedia of Genes and Genomes biological process analysis were performed through Metascape (http://metascape.org/gp). The interactions among LINC02288, miR-374a-3p and RTN3 were determined using RNA immunoprecipitation (RIP) assays and dual luciferase reporter assays. Chondrocyte apoptosis was examined using flow cytometry. Western blot assays were conducted to assess the pro-apoptotic and anti-apoptotic markers.

Results

We identified a total of 4,491 differentially expressed lncRNAs. We focused on LINC02288 as the top-ranked up-regulated lncRNA in OA as indicated by a significant p-value. LINC02288 was significantly up-regulated, which was further verified by a real-time polymerase chain reaction. Down-regulation of LINC02288 significantly reduced the apoptosis of OA chondrocytes induced by interleukin-1β and the production of pro-inflammatory cytokines. These effects were further verified in an OA rat model. An RIP assay and dual luciferase assay further confirmed that LINC02288 served as a sponge of miR-374a-3p. Moreover, the overexpression of RTN3 could partially reverse the effects of LINC02288 knockdown, mediating inhibitory effects on chondrocyte apoptosis and the inflammatory response. Down-regulation of LINC02288 alleviated OA development in an in vivo OA animal model.

Conclusions

Our findings indicate that LINC02288 contributes to OA progression by targeting the miR-374a-3p/RTN3 axis, which might provide a promising molecular therapy strategy for OA.",2021-03-25 +,GENE-12. THE CHILDREN’S BRAIN TUMOR TISSUE CONSORTIUM (CBTTC) INFRASTRUCTURE FACILITATES COLLABORATIVE RESEARCH IN PEDIATRIC CENTRAL NERVOUS SYSTEM TUMORS,"Abstract INTRODUCTION: The Children’s Brain Tumor Tissue Consortium (CBTTC) is a multi-institutional, international research collaboration comprised of 13 institutions utilizing an infrastructure of web based open source tools to accelerate pediatric brain tumor research. The CBTTC mission is to provide the largest accessible, de-identified, longitudinal clinical data set linked to available biospecimens and -omic data in the world. METHODS: Clinical data collection and protection of Personal Health Information (PHI) is a major research regulatory hurdle. The CBTTC database is protected by a custom designed electronic honest broker that maintains links from a subject to multiple research records in the physical biobank and data collection tool. Currently the CBTTC utilizes three interconnected open source tools; the biorepository portal, electronic honest broker and the harvest query tool. CBTTC biospecimen and clinical records are also connected via web-protocols to the pedCBioPortal, a genomic data visualization tool, and Cavatica, a cloud based infrastructure genomic data storage and analysis. These tools work concurrently and communicate over https protocols and complement well known research tools such as an enterprise laboratory management systems (LIMS) and REDCap for data management. They are further expanded to include imaging tools, pathology slide review, genomic analysis and file repository resources. RESULTS: The CBTTC integrates phenotypic and genomic data for pediatric brain tumors and associated biospecimens. The platform facilitates open ended longitudinal data collection currently reflecting 1,900 subjects and 9,140 specimens available for research. The CBTTC has empowered 16 unique hypothesis driven collaborative research projects to date (See: www.cbttc.org). Remarkably, researchers are able to link molecular biology findings with clinical information such as overall survival through the web-based interface. DISCUSSION: The web-platform based approach facilitates real-time collaboration with researchers around the world. The CBTTC continues to grow, with additional collaborating sites and data generation added each year.",2017-05-31 +,First Report of Fusarium cerealis Causing Root Rot on Soybean,"In 2017, soybean plants (cultivar 24-10RY, R4 growth stage) with poor standing, stunting, and leaf chlorosis were collected at Carman, Manitoba. They had few nitrogen-fixing nodules and lateral roots left on the root system and reddish brown-to-black lesions on tap and lateral roots. Roots and lower stems had cortical decay symptoms when split in half. Root pieces (1 to 2 cm) from 48 plants were surface sterilized in 0.5% NaOCl, rinsed twice in sterilized water, air dried on sterilized filter papers, and placed on potato dextrose agar (PDA) amended with 100 mg of streptomycin sulfate. Plates were incubated for 3 days at 26°C and 12 h light/12 h dark. The growing hyphae were transferred using the hyphal tip method to new PDA plates. Out of 240 single spore isolates, 12 were identified as Fusarium cerealis based on morphological characteristics and microscopic examination. On PDA, cultures grew and produced profuse, fuzzy, aerial mycelium with dark red and yellowish color around the center of the cultures within 5 days. On Spezieller Nährstoffarmer agar medium at 25°C for 10 days, macroconidia were stout, thick walled, apical and basal cells curved, usually with four to six septa, and 23.9 to 41.2 × 4.8 to 7.3 μm (Leslie and Summerell 2006). The identity of all isolates was confirmed by sequencing the translational elongation factor 1-alpha (TEF1) gene. TEF1 was amplified by polymerase chain reaction using the universal primers EF1 and EF2 (O’Donnell et al. 1998). The online resource Basic Local Alignment Search Tool (BLAST; https://www.ncbi.nlm.nih.gov/BLAST) confirmed the fungus identity as 100% F. cerealis. The TEF1 sequence for the original F. cerealis strain Carm17 isolated from field-infected soybean plants was deposited to GenBank with accession number MH151080. To confirm pathogenicity, three 5-mm plugs from 1-week-old culture of isolate Carm17 were placed in 1-liter conical flask containing a sterilized potato and soil mix (500 g of loamy soil, 50 g of finely chopped potatoes, and 100 ml of sterilized H₂O to keep the mix fairly wet) (Ko and Hora 1971). The inoculum was kept at 25°C with 12 h light/12 h dark for 7 days and shaken once after 4 days to allow the fungus to grow throughout the medium. The inoculum was air dried in laminar flow cabinet overnight, ground with mortar and pestle, and sieved with 2-mm sterilized sieves. Five grams of this inoculum was placed 3 cm below the surface of pasteurized soil in 6-in pots. In control pots, 5 g of noninoculated sterilized potato and soil mix was used. Five seeds of cultivar 24-10RY were seeded per pot, and five pots were used for inoculated and control pots. Pots were kept in the greenhouse with 24/16°C day/night temperature, 13 h light/11 h dark cycle, and 80% relative humidity. Disease symptoms similar to those observed in the field were visible on the root system of all inoculated soybean plants after 40 days. Noninoculated plants had root rot symptoms. The pathogen was reisolated from infected roots and identified as F. cerealis (strain Carm20) as described above. TEF1 gene has been sequenced. The TEF1 gene sequence of Carm20 (accession no. MH151081) was identical to that of Carm17. The experiment was repeated two times. To our knowledge, this is the first report of F. cerealis causing root rot on soybean anywhere on the globe. This information will have an impact on future scientific research on soybean root rots and on F. cerealis, especially with regard to this pathogen’s prevalence in soybean fields, its effects on crop productivity, mycotoxin production, and further interaction with other species to form root rot pathogen complexes.",2018-12-01 +34355632,Uptake of Clinical Prognostic Tools in COPD Exacerbations Requiring Hospitalisation.,"Clinical prognostic tools are used to objectively predict outcomes in many fields of medicine. Whilst over 400 have been developed for use in chronic obstructive pulmonary disease (COPD), only a minority have undergone full external validation and just one, the DECAF score, has undergone an implementation study supporting use in clinical practice. Little is known about how such tools are used in the UK. We distributed surveys at two time points, in 2017 and 2019, to hospitals included in the Royal College of Physicians of London national COPD secondary care audit program. The survey assessed the use of prognostic tools in routine care of hospitalized COPD patients. Hospital response rates were 71/196 in 2017 and 72/196 in 2019. The use of the DECAF and PEARL scores more than doubled in decisions about unsupported discharge (7%-15.3%), admission avoidance (8.1%-17%) and readmission avoidance (4.8%-13.1%); it more than tripled (8.8%-27.8%) in decisions around hospital-at-home or early supported discharge schemes. In other areas, routine use of clinical prognostic tools was uncommon. In palliative care decisions, the use of the Gold Standards Framework Prognostic Indicator Guidance fell (5.6%-1.4%). In 2017, 43.7% of hospitals used at least one clinical prognostic tool in routine COPD care, increasing to 52.1% in 2019. Such tools can help challenge prognostic pessimism and improve care. To integrate these further into routine clinical care, future research should explore current barriers to their use and focus on implementation studies.Supplemental data for this article is available online at https://dx.doi.org/10.1080/15412555.2021.1959540.",2021-08-06 +33055239,A systematic evaluation of bioinformatics tools for identification of long noncoding RNAs.,"High-throughput RNA sequencing unveiled the complexity of transcriptome and significantly increased the records of long noncoding RNAs (lncRNAs), which were reported to participate in a variety of biological processes. Identification of lncRNAs is a key step in lncRNA analysis, and a bunch of bioinformatics tools have been developed for this purpose in recent years. While these tools allow us to identify lncRNA more efficiently and accurately, they may produce inconsistent results, making selection a confusing issue. We compared the performance of 41 analysis models based on 14 software packages and different data sets, including high-quality data and low-quality data from 33 species. In addition, computational efficiency, robustness, and joint prediction of the models were explored. As a practical guidance, key points for lncRNA identification under different situations were summarized. In this investigation, no one of these models could be superior to others under all test conditions. The performance of a model relied to a great extent on the source of transcripts and the quality of assemblies. As general references, FEELnc_all_cl, CPC, and CPAT_mouse work well in most species while COME, CNCI, and lncScore are good choices for model organisms. Since these tools are sensitive to different factors such as the species involved and the quality of assembly, researchers must carefully select the appropriate tool based on the actual data. Alternatively, our test suggests that joint prediction could behave better than any single model if proper models were chosen. All scripts/data used in this research can be accessed at http://bioinfo.ihb.ac.cn/elit.",2020-10-14 +33902704,The impact of sequencing depth on the inferred taxonomic composition and AMR gene content of metagenomic samples.,"

Background

Shotgun metagenomics is increasingly used to characterise microbial communities, particularly for the investigation of antimicrobial resistance (AMR) in different animal and environmental contexts. There are many different approaches for inferring the taxonomic composition and AMR gene content of complex community samples from shotgun metagenomic data, but there has been little work establishing the optimum sequencing depth, data processing and analysis methods for these samples. In this study we used shotgun metagenomics and sequencing of cultured isolates from the same samples to address these issues. We sampled three potential environmental AMR gene reservoirs (pig caeca, river sediment, effluent) and sequenced samples with shotgun metagenomics at high depth (~ 200 million reads per sample). Alongside this, we cultured single-colony isolates of Enterobacteriaceae from the same samples and used hybrid sequencing (short- and long-reads) to create high-quality assemblies for comparison to the metagenomic data. To automate data processing, we developed an open-source software pipeline, 'ResPipe'.

Results

Taxonomic profiling was much more stable to sequencing depth than AMR gene content. 1 million reads per sample was sufficient to achieve < 1% dissimilarity to the full taxonomic composition. However, at least 80 million reads per sample were required to recover the full richness of different AMR gene families present in the sample, and additional allelic diversity of AMR genes was still being discovered in effluent at 200 million reads per sample. Normalising the number of reads mapping to AMR genes using gene length and an exogenous spike of Thermus thermophilus DNA substantially changed the estimated gene abundance distributions. While the majority of genomic content from cultured isolates from effluent was recoverable using shotgun metagenomics, this was not the case for pig caeca or river sediment.

Conclusions

Sequencing depth and profiling method can critically affect the profiling of polymicrobial animal and environmental samples with shotgun metagenomics. Both sequencing of cultured isolates and shotgun metagenomics can recover substantial diversity that is not identified using the other methods. Particular consideration is required when inferring AMR gene content or presence by mapping metagenomic reads to a database. ResPipe, the open-source software pipeline we have developed, is freely available ( https://gitlab.com/hsgweon/ResPipe ).",2019-10-24 +32531869,Personalised biopsy schedules based on risk of Gleason upgrading for patients with low-risk prostate cancer on active surveillance.,"

Objective

To develop a model and methodology for predicting the risk of Gleason upgrading in patients with prostate cancer on active surveillance (AS) and using the predicted risks to create risk-based personalised biopsy schedules as an alternative to one-size-fits-all schedules (e.g. annually). Furthermore, to assist patients and doctors in making shared decisions on biopsy schedules, by providing them quantitative estimates of the burden and benefit of opting for personalised vs any other schedule in AS. Lastly, to externally validate our model and implement it along with personalised schedules in a ready to use web-application.

Patients and methods

Repeat prostate-specific antigen (PSA) measurements, timing and results of previous biopsies, and age at baseline from the world's largest AS study, Prostate Cancer Research International Active Surveillance (PRIAS; 7813 patients, 1134 experienced upgrading). We fitted a Bayesian joint model for time-to-event and longitudinal data to this dataset. We then validated our model externally in the largest six AS cohorts of the Movember Foundation's third Global Action Plan (GAP3) database (>20 000 patients, 27 centres worldwide). Using the model predicted upgrading risks; we scheduled biopsies whenever a patient's upgrading risk was above a certain threshold. To assist patients/doctors in the choice of this threshold, and to compare the resulting personalised schedule with currently practiced schedules, along with the timing and the total number of biopsies (burden) planned, for each schedule we provided them with the time delay expected in detecting upgrading (shorter is better).

Results

The cause-specific cumulative upgrading risk at the 5-year follow-up was 35% in PRIAS, and at most 50% in the GAP3 cohorts. In the PRIAS-based model, PSA velocity was a stronger predictor of upgrading (hazard ratio [HR] 2.47, 95% confidence interval [CI] 1.93-2.99) than the PSA level (HR 0.99, 95% CI 0.89-1.11). Our model had a moderate area under the receiver operating characteristic curve (0.6-0.7) in the validation cohorts. The prediction error was moderate (0.1-0.2) in theGAP3 cohorts where the impact of the PSA level and velocity on upgrading risk was similar to PRIAS, but large (0.2-0.3) otherwise. Our model required re-calibration of baseline upgrading risk in the validation cohorts. We implemented the validated models and the methodology for personalised schedules in a web-application (http://tiny.cc/biopsy).

Conclusions

We successfully developed and validated a model for predicting upgrading risk, and providing risk-based personalised biopsy decisions in AS of prostate cancer. Personalised prostate biopsies are a novel alternative to fixed one-size-fits-all schedules, which may help to reduce unnecessary prostate biopsies, while maintaining cancer control. The model and schedules made available via a web-application enable shared decision-making on biopsy schedules by comparing fixed and personalised schedules on total biopsies and expected time delay in detecting upgrading.",2020-08-01 +33761868,m7GDisAI: N7-methylguanosine (m7G) sites and diseases associations inference based on heterogeneous network.,"

Background

Recent studies have confirmed that N7-methylguanosine (m7G) modification plays an important role in regulating various biological processes and has associations with multiple diseases. Wet-lab experiments are cost and time ineffective for the identification of disease-associated m7G sites. To date, tens of thousands of m7G sites have been identified by high-throughput sequencing approaches and the information is publicly available in bioinformatics databases, which can be leveraged to predict potential disease-associated m7G sites using a computational perspective. Thus, computational methods for m7G-disease association prediction are urgently needed, but none are currently available at present.

Results

To fill this gap, we collected association information between m7G sites and diseases, genomic information of m7G sites, and phenotypic information of diseases from different databases to build an m7G-disease association dataset. To infer potential disease-associated m7G sites, we then proposed a heterogeneous network-based model, m7G Sites and Diseases Associations Inference (m7GDisAI) model. m7GDisAI predicts the potential disease-associated m7G sites by applying a matrix decomposition method on heterogeneous networks which integrate comprehensive similarity information of m7G sites and diseases. To evaluate the prediction performance, 10 runs of tenfold cross validation were first conducted, and m7GDisAI got the highest AUC of 0.740(± 0.0024). Then global and local leave-one-out cross validation (LOOCV) experiments were implemented to evaluate the model's accuracy in global and local situations respectively. AUC of 0.769 was achieved in global LOOCV, while 0.635 in local LOOCV. A case study was finally conducted to identify the most promising ovarian cancer-related m7G sites for further functional analysis. Gene Ontology (GO) enrichment analysis was performed to explore the complex associations between host gene of m7G sites and GO terms. The results showed that m7GDisAI identified disease-associated m7G sites and their host genes are consistently related to the pathogenesis of ovarian cancer, which may provide some clues for pathogenesis of diseases.

Conclusion

The m7GDisAI web server can be accessed at http://180.208.58.66/m7GDisAI/ , which provides a user-friendly interface to query disease associated m7G. The list of top 20 m7G sites predicted to be associted with 177 diseases can be achieved. Furthermore, detailed information about specific m7G sites and diseases are also shown.",2021-03-24 +33707582,A two-stream convolutional neural network for microRNA transcription start site feature integration and identification.,"MicroRNAs (miRNAs) play important roles in post-transcriptional gene regulation and phenotype development. Understanding the regulation of miRNA genes is critical to understand gene regulation. One of the challenges to study miRNA gene regulation is the lack of condition-specific annotation of miRNA transcription start sites (TSSs). Unlike protein-coding genes, miRNA TSSs can be tens of thousands of nucleotides away from the precursor miRNAs and they are hard to be detected by conventional RNA-Seq experiments. A number of studies have been attempted to computationally predict miRNA TSSs. However, high-resolution condition-specific miRNA TSS prediction remains a challenging problem. Recently, deep learning models have been successfully applied to various bioinformatics problems but have not been effectively created for condition-specific miRNA TSS prediction. Here we created a two-stream deep learning model called D-miRT for computational prediction of condition-specific miRNA TSSs ( http://hulab.ucf.edu/research/projects/DmiRT/ ). D-miRT is a natural fit for the integration of low-resolution epigenetic features (DNase-Seq and histone modification data) and high-resolution sequence features. Compared with alternative computational models on different sets of training data, D-miRT outperformed all baseline models and demonstrated high accuracy for condition-specific miRNA TSS prediction tasks. Comparing with the most recent approaches on cell-specific miRNA TSS identification using cell lines that were unseen to the model training processes, D-miRT also showed superior performance.",2021-03-11 +33975943,Controls on Interspecies Electron Transport and Size Limitation of Anaerobically Methane-Oxidizing Microbial Consortia. ,"About 382 Tg yr-1 of methane rising through the seafloor is oxidized anaerobically (W. S. Reeburgh, Chem Rev 107:486-513, 2007, https://doi.org/10.1021/cr050362v), preventing it from reaching the atmosphere, where it acts as a strong greenhouse gas. Microbial consortia composed of anaerobic methanotrophic archaea and sulfate-reducing bacteria couple the oxidation of methane to the reduction of sulfate under anaerobic conditions via a syntrophic process. Recent experimental studies and modeling efforts indicate that direct interspecies electron transfer (DIET) is involved in this syntrophy. Here, we explore a fluorescent in situ hybridization-nanoscale secondary ion mass spectrometry data set of large, segregated anaerobic oxidation of methane (AOM) consortia that reveal a decline in metabolic activity away from the archaeal-bacterial interface and use a process-based model to identify the physiological controls on rates of AOM. Simulations reproducing the observational data reveal that ohmic resistance and activation loss are the two main factors causing the declining metabolic activity, where activation loss dominated at a distance of <8 μm. These voltage losses limit the maximum spatial distance between syntrophic partners with model simulations, indicating that sulfate-reducing bacterial cells can remain metabolically active up to ∼30 μm away from the archaeal-bacterial interface. Model simulations further predict that a hybrid metabolism that combines DIET with a small contribution of diffusive exchange of electron donors can offer energetic advantages for syntrophic consortia.IMPORTANCE Anaerobic oxidation of methane is a globally important, microbially mediated process reducing the emission of methane, a potent greenhouse gas. In this study, we investigate the mechanism of how a microbial consortium consisting of archaea and bacteria carries out this process and how these organisms interact with each other through the sharing of electrons. We present a process-based model validated by novel experimental measurements of the metabolic activity of individual, phylogenetically identified cells in very large (>20-μm-diameter) microbial aggregates. Model simulations indicate that extracellular electron transfer between archaeal and bacterial cells within a consortium is limited by potential losses and suggest that a flexible use of electron donors can provide energetic advantages for syntrophic consortia.",2021-05-11 +31584078,Genus for biomolecules.,"The 'Genus for biomolecules' database (http://genus.fuw.edu.pl) collects information about topological structure and complexity of proteins and RNA chains, which is captured by the genus of a given chain and its subchains. For each biomolecule, this information is shown in the form of a genus trace plot, as well as a genus matrix diagram. We assemble such information for all and RNA structures deposited in the Protein Data Bank (PDB). This database presents also various statistics and extensive information about the biological function of the analyzed biomolecules. The database is regularly self-updating, once new structures are deposited in the PDB. Moreover, users can analyze their own structures.",2020-01-01 +32074470,CoMPARA: Collaborative Modeling Project for Androgen Receptor Activity.,"BACKGROUND:Endocrine disrupting chemicals (EDCs) are xenobiotics that mimic the interaction of natural hormones and alter synthesis, transport, or metabolic pathways. The prospect of EDCs causing adverse health effects in humans and wildlife has led to the development of scientific and regulatory approaches for evaluating bioactivity. This need is being addressed using high-throughput screening (HTS) in vitro approaches and computational modeling. OBJECTIVES:In support of the Endocrine Disruptor Screening Program, the U.S. Environmental Protection Agency (EPA) led two worldwide consortiums to virtually screen chemicals for their potential estrogenic and androgenic activities. Here, we describe the Collaborative Modeling Project for Androgen Receptor Activity (CoMPARA) efforts, which follows the steps of the Collaborative Estrogen Receptor Activity Prediction Project (CERAPP). METHODS:The CoMPARA list of screened chemicals built on CERAPP's list of 32,464 chemicals to include additional chemicals of interest, as well as simulated ToxCast™ metabolites, totaling 55,450 chemical structures. Computational toxicology scientists from 25 international groups contributed 91 predictive models for binding, agonist, and antagonist activity predictions. Models were underpinned by a common training set of 1,746 chemicals compiled from a combined data set of 11 ToxCast™/Tox21 HTS in vitro assays. RESULTS:The resulting models were evaluated using curated literature data extracted from different sources. To overcome the limitations of single-model approaches, CoMPARA predictions were combined into consensus models that provided averaged predictive accuracy of approximately 80% for the evaluation set. DISCUSSION:The strengths and limitations of the consensus predictions were discussed with example chemicals; then, the models were implemented into the free and open-source OPERA application to enable screening of new chemicals with a defined applicability domain and accuracy assessment. This implementation was used to screen the entire EPA DSSTox database of ∼875,000 chemicals, and their predicted AR activities have been made available on the EPA CompTox Chemicals dashboard and National Toxicology Program's Integrated Chemical Environment. https://doi.org/10.1289/EHP5580.",2020-02-07 +31598610,Building a Science Gateway For Processing and Modeling Sequencing Data Via Apache Airavata. ,"The amount of DNA sequencing data has been exponentially growing during the past decade due to advances in sequencing technology. Processing and modeling large amounts of sequencing data can be computationally intractable for desktop computing platforms. High performance computing (HPC) resources offer advantages in terms of computing power, and can be a general solution to these problems. Using HPCs directly for computational needs requires skilled users who know their way around HPCs and acquiring such skills take time. Science gateways acts as the middle layer between users and HPCs, providing users with the resources to accomplish compute-intensive tasks without requiring specialized expertise. We developed a web-based computing platform for genome biologists by customizing the PHP Gateway for Airavata (PGA) framework that accesses publicly accessible HPC resources via Apache Airavata. This web computing platform takes advantage of the Extreme Science and Engineering Discovery Environment (XSEDE) which provides the resources for gateway development, including access to CPU, GPU, and storage resources. We used this platform to develop a gateway for the dREG algorithm, an online computing tool for finding functional regions in mammalian genomes using nascent RNA sequencing data. The dREG gateway provides its users a free, powerful and user-friendly GPU computing resource based on XSEDE, circumventing the need of specialized knowledge about installation, configuration, and execution on an HPC for biologists. The dREG gateway is available at: https://dREG.dnasequence.org/.",2018-07-01 +33741167,Scoping review on clinical definition of bovine respiratory disease complex and related clinical signs in dairy cows.,"Bovine respiratory disease complex (BRD) is a worldwide multifactorial infectious disease. Antimicrobials are commonly used for treating BRD because bacteria are often involved. The clinical diagnosis of BRD is a challenge, especially in adult dairy cows, where information on this syndrome is scant. Having a definition based on consistent and reliable clinical signs would improve the accuracy of BRD diagnosis and could help to develop an optimal treatment approach by an early detection. The aim of this scoping review was to review clinical signs that could be recognized by producers in dairy cattle suffering from naturally occurring infectious respiratory disease, as reported in the literature. A review of the literature was performed for articles published between January 1, 1990 and January 1, 2020. The search of literature in English, French, and Italian languages included 2 different databases (Pubmed, https://pubmed.ncbi.nlm.nih.gov/; CAB abstract, https://www.cabi.org/publishing-products/cab-abstracts/). Clinical signs were categorized as follows: (1) ""general manifestations of disease,"" which included behavioral changes or fever; (2) ""alterations in respiratory function,"" which included clinical signs specifically associated with the respiratory tract examination; and (3) ""clinical signs of other body systems,"" which included clinical signs related to other systems such as diarrhea or subcutaneous emphysema. The focus of the review was on clinical signs that could be monitored by animal handlers and producers. A total of 1,067 titles were screened, and 23 studies were finally included. The most common general clinical signs were increased body temperature (reported in 83% of studies, n = 19), change in feed intake (26%, n = 6), altered mentation (22%, n = 5), and decreased milk production (17%, n = 4). The alterations in respiratory function noted were nasal discharge (74%, n = 17), cough (65%, n = 15), altered respiratory dynamic or dyspnea (61%, n = 14), increased respiratory rate (43%, n = 10), and ocular discharge or lacrimation (30%, n = 7). The clinical signs associated with infectious respiratory disease reported in the 23 studies generally lacked a clear description of what constitutes a deviation from normality (0-50% of studies clearly reported what was considered normal versus abnormal depending on the clinical signs). This limitation prevented any comparison between studies that apparently reported the same ""clinical sign,"" but possibly referred to a different assessment and definition of what was considered normal versus abnormal. Therefore, the definition of clinical signs in a repeatable way with validated interobserver agreement to determine the optimal combination for the diagnosis of BRD in dairy cows is needed. This could lead to a more judicious use of antimicrobials for respiratory disease in adult dairy cows.",2021-03-23 +33782607,Martini 3: a general purpose force field for coarse-grained molecular dynamics.,"The coarse-grained Martini force field is widely used in biomolecular simulations. Here we present the refined model, Martini 3 ( http://cgmartini.nl ), with an improved interaction balance, new bead types and expanded ability to include specific interactions representing, for example, hydrogen bonding and electronic polarizability. The updated model allows more accurate predictions of molecular packing and interactions in general, which is exemplified with a vast and diverse set of applications, ranging from oil/water partitioning and miscibility data to complex molecular systems, involving protein-protein and protein-lipid interactions and material science applications as ionic liquids and aedamers.",2021-03-29 +28488387,Open-access evidence database of controlled trials and systematic reviews in youth mental health.,"

Aim

To present an update to an evidence-mapping project that consolidates the evidence base of interventions in youth mental health. To promote dissemination of this resource, the evidence map has been translated into a free online database (https://orygen.org.au/Campus/Expert-Network/Evidence-Finder or https://headspace.org.au/research-database/). Included studies are extensively indexed to facilitate searching.

Methods

A systematic search for prevention and treatment studies in young people (mean age 6-25 years) is conducted annually using Embase, MEDLINE, PsycINFO and the Cochrane Library. Included studies are restricted to controlled trials and systematic reviews published since 1980.

Results

To date, 221 866 publications have been screened, of which 2680 have been included in the database. Updates are conducted annually.

Conclusions

This shared resource can be utilized to substantially reduce the amount of time involved with conducting literature searches. It is designed to promote the uptake of evidence-based practice and facilitate research to address gaps in youth mental health.",2017-05-10 +34079055,Mammary cell gene expression atlas links epithelial cell remodeling events to breast carcinogenesis.,"The female mammary epithelium undergoes reorganization during development, pregnancy, and menopause, linking higher risk with breast cancer development. To characterize these periods of complex remodeling, here we report integrated 50 K mouse and 24 K human mammary epithelial cell atlases obtained by single-cell RNA sequencing, which covers most lifetime stages. Our results indicate a putative trajectory that originates from embryonic mammary stem cells which differentiates into three epithelial lineages (basal, luminal hormone-sensing, and luminal alveolar), presumably arising from unipotent progenitors in postnatal glands. The lineage-specific genes infer cells of origin of breast cancer using The Cancer Genome Atlas data and single-cell RNA sequencing of human breast cancer, as well as the association of gland reorganization to different breast cancer subtypes. This comprehensive mammary cell gene expression atlas ( https://mouse-mammary-epithelium-integrated.cells.ucsc.edu ) presents insights into the impact of the internal and external stimuli on the mammary epithelium at an advanced resolution.",2021-06-02 +26438538,dbSUPER: a database of super-enhancers in mouse and human genome.,"Super-enhancers are clusters of transcriptional enhancers that drive cell-type-specific gene expression and are crucial to cell identity. Many disease-associated sequence variations are enriched in super-enhancer regions of disease-relevant cell types. Thus, super-enhancers can be used as potential biomarkers for disease diagnosis and therapeutics. Current studies have identified super-enhancers in more than 100 cell types and demonstrated their functional importance. However, a centralized resource to integrate all these findings is not currently available. We developed dbSUPER (http://bioinfo.au.tsinghua.edu.cn/dbsuper/), the first integrated and interactive database of super-enhancers, with the primary goal of providing a resource for assistance in further studies related to transcriptional control of cell identity and disease. dbSUPER provides a responsive and user-friendly web interface to facilitate efficient and comprehensive search and browsing. The data can be easily sent to Galaxy instances, GREAT and Cistrome web-servers for downstream analysis, and can also be visualized in the UCSC genome browser where custom tracks can be added automatically. The data can be downloaded and exported in variety of formats. Furthermore, dbSUPER lists genes associated with super-enhancers and also links to external databases such as GeneCards, UniProt and Entrez. dbSUPER also provides an overlap analysis tool to annotate user-defined regions. We believe dbSUPER is a valuable resource for the biology and genetic research communities.",2015-10-04 +34180678,Genetic Variability of the SARS-CoV-2 Pocketome.,"In the absence of effective treatment, COVID-19 is likely to remain a global disease burden. Compounding this threat is the near certainty that novel coronaviruses with pandemic potential will emerge in years to come. Pan-coronavirus drugs-agents active against both SARS-CoV-2 and other coronaviruses-would address both threats. A strategy to develop such broad-spectrum inhibitors is to pharmacologically target binding sites on SARS-CoV-2 proteins that are highly conserved in other known coronaviruses, the assumption being that any selective pressure to keep a site conserved across past viruses will apply to future ones. Here we systematically mapped druggable binding pockets on the experimental structure of 15 SARS-CoV-2 proteins and analyzed their variation across 27 α- and β-coronaviruses and across thousands of SARS-CoV-2 samples from COVID-19 patients. We find that the two most conserved druggable sites are a pocket overlapping the RNA binding site of the helicase nsp13 and the catalytic site of the RNA-dependent RNA polymerase nsp12, both components of the viral replication-transcription complex. We present the data on a public web portal (https://www.thesgc.org/SARSCoV2_pocketome/), where users can interactively navigate individual protein structures and view the genetic variability of drug-binding pockets in 3D.",2021-06-28 +36311989,"Comparison of the number of live births, maternal age at childbirth, and weight of live births between Korean women and immigrant women in 2018.","

Purpose

This study compared maternal age at childbirth, the number of live births, and the weight of live births between Korean women and immigrant women using statistical data from the Republic of Korea for the period of 2008-2018.

Methods

The analysis was conducted using data from the Microdata Integrated Service of Statistics Korea (https://mdis.kostat.go.kr/index.do).

Results

Korean women and immigrant women showed a higher age at childbirth in 2018 than in 2008. The percentage of newborns of Korean women with a birth weight of less than 2.5 kg increased slightly for 3 consecutive years from 2016 to 2018, whereas for immigrant women, this percentage increased in 2017 compared to 2016 and then decreased again in 2018. Very low birth weight (less than 1.5 kg) became more common among immigrant women from 2016 to 2018. Birth at a gestational age of fewer than 37 weeks increased both among Korean and immigrant women from 2016 to 2018. In both groups, the percentage of women who had their first child within their first 2 years of marriage decreased from 2008 to 2018.

Conclusion

Immigrant women had higher birth rates than Korean women, while both groups showed an increasing trend in preterm birth. Greater attention should be paid to the pregnancy and birth needs of immigrant women, and steps are needed to ensure health equity and access in order to prevent preterm births. It is also necessary to identify factors that affect preterm birth and birth of very low birth weight infants among immigrant women in the future.",2021-03-23 +36303781,Antibody Class(es) Predictor for Epitopes (AbCPE): A Multi-Label Classification Algorithm.,"Development of vaccines and therapeutic antibodies to deal with infectious and other diseases are the most perceptible scientific interventions that have had huge impact on public health including that in the current Covid-19 pandemic. From inactivation methodologies to reverse vaccinology, vaccine development strategies of 21st century have undergone several transformations and are moving towards rational design approaches. These developments are driven by data as the combinatorials involved in antigenic diversity of pathogens and immune repertoire of hosts are enormous. The computational prediction of epitopes is central to these developments and numerous B-cell epitope prediction methods developed over the years in the field of immunoinformatics have contributed enormously. Most of these methods predict epitopes that could potentially bind to an antibody regardless of its type and only a few account for antibody class specific epitope prediction. Recent studies have provided evidence of more than one class of antibodies being associated with a particular disease. Therefore, it is desirable to predict and prioritize 'peptidome' representing B-cell epitopes that can potentially bind to multiple classes of antibodies, as an open problem in immunoinformatics. To address this, AbCPE, a novel algorithm based on multi-label classification approach has been developed for prediction of antibody class(es) to which an epitope can potentially bind. The epitopes binding to one or more antibody classes (IgG, IgE, IgA and IgM) have been used as a knowledgebase to derive features for prediction. Multi-label algorithms, Binary Relevance and Label Powerset were applied along with Random Forest and AdaBoost. Classifier performance was assessed using evaluation measures like Hamming Loss, Precision, Recall and F1 score. The Binary Relevance model based on dipeptide composition, Random Forest and AdaBoost achieved the best results with Hamming Loss of 0.1121 and 0.1074 on training and test sets respectively. The results obtained by AbCPE are promising. To the best of our knowledge, this is the first multi-label method developed for prediction of antibody class(es) for sequential B-cell epitopes and is expected to bring a paradigm shift in the field of immunoinformatics and immunotherapeutic developments in synthetic biology. The AbCPE web server is available at http://bioinfo.unipune.ac.in/AbCPE/Home.html.",2021-09-07 +33751898,Rate of Communicative Gestures and Developmental Outcomes in Toddlers With and Without Autism Spectrum Disorder During a Home Observation.,"Purpose Most toddlers with autism spectrum disorder and other developmental delays receive early intervention at home and may not participate in a clinic-based communication evaluation. However, there is limited research that has prospectively examined communication in very young children with and without autism in a home-based setting. This study used granular observational coding to document the communicative acts performed by toddlers with autism, developmental delay, and typical development in the home environment. Method Children were selected from the archival database of the FIRST WORDS Project (N = 211). At approximately 20 months of age, each child participated in everyday activities with a caregiver during an hour-long, video-recorded, naturalistic home observation. Inventories of unique gestures, rates per minute, and proportions of types of communicative acts and communicative functions were coded and compared using a one-way analysis of variance. Concurrent and prospective relationships between rate of communication and measures of social communication, language development, and autism symptoms were examined. Results A total of 40,738 communicative acts were coded. Children with autism, developmental delay, and typical development used eight, nine, and 12 unique gestures on average, respectively. Children with autism used deictic gestures, vocalizations, and communicative acts for behavior regulation at significantly lower rates than the other groups. Statistically significant correlations were observed between rate of communication and several outcome measures. Conclusion Observation of social communication in the natural environment may improve early identification of children with autism and communication delays, complement clinic-based assessments, and provide useful information about a child's social communication profile and the family's preferred activities and intervention priorities. Supplemental Material https://doi.org/10.23641/asha.14204522.",2021-03-22 +34240787,Linking patterns of infant eye movements to a neural network model of the ventral stream using representational similarity analysis.,"Little is known about the development of higher-level areas of visual cortex during infancy, and even less is known about how the development of visually guided behavior is related to the different levels of the cortical processing hierarchy. As a first step toward filling these gaps, we used representational similarity analysis (RSA) to assess links between gaze patterns and a neural network model that captures key properties of the ventral visual processing stream. We recorded the eye movements of 4- to 12-month-old infants (N = 54) as they viewed photographs of scenes. For each infant, we calculated the similarity of the gaze patterns for each pair of photographs. We also analyzed the images using a convolutional neural network model in which the successive layers correspond approximately to the sequence of areas along the ventral stream. For each layer of the network, we calculated the similarity of the activation patterns for each pair of photographs, which was then compared with the infant gaze data. We found that the network layers corresponding to lower-level areas of visual cortex accounted for gaze patterns better in younger infants than in older infants, whereas the network layers corresponding to higher-level areas of visual cortex accounted for gaze patterns better in older infants than in younger infants. Thus, between 4 and 12 months, gaze becomes increasingly controlled by more abstract, higher-level representations. These results also demonstrate the feasibility of using RSA to link infant gaze behavior to neural network models. A video abstract of this article can be viewed at https://youtu.be/K5mF2Rw98Is.",2021-07-21 +28775335,NuBBEDB: an updated database to uncover chemical and biological information from Brazilian biodiversity.,"The intrinsic value of biodiversity extends beyond species diversity, genetic heritage, ecosystem variability and ecological services, such as climate regulation, water quality, nutrient cycling and the provision of reproductive habitats it is also an inexhaustible source of molecules and products beneficial to human well-being. To uncover the chemistry of Brazilian natural products, the Nuclei of Bioassays, Ecophysiology and Biosynthesis of Natural Products Database (NuBBEDB) was created as the first natural product library from Brazilian biodiversity. Since its launch in 2013, the NuBBEDB has proven to be an important resource for new drug design and dereplication studies. Consequently, continuous efforts have been made to expand its contents and include a greater diversity of natural sources to establish it as a comprehensive compendium of available biogeochemical information about Brazilian biodiversity. The content in the NuBBEDB is freely accessible online (https://nubbe.iq.unesp.br/portal/nubbedb.html) and provides validated multidisciplinary information, chemical descriptors, species sources, geographic locations, spectroscopic data (NMR) and pharmacological properties. Herein, we report the latest advancements concerning the interface, content and functionality of the NuBBEDB. We also present a preliminary study on the current profile of the compounds present in Brazilian territory.",2017-08-03 +,Chromosome numbers of the flora of Germany—a new online database of georeferenced chromosome counts and flow cytometric ploidy estimates,"Chromosomal speciation processes gain increasing attention in plant systematics and evolution, and new approaches revealed a high diversity in chromosome numbers even within recognized taxa. Reliable counts linked to known accessions are thus needed yet often hardly available. We present a new online database for chromosome counts and ploidy estimates of the flora of Germany with a detailed documentation of the examined material, and its sampling locality. The chromosome database builds upon a relational database and includes standardized taxon identification, study date, georeferenced locality and additional collection as well as publication details from which the karyological information was extracted. In order to reach the best compatibility with other botanical publications of the study region, taxonomic concepts and nomenclature follow the “Rothmaler”, a widely accepted field flora of vascular plants in Germany. Our online database is available at http://chromosomes.senckenberg.de . The site consists of the main page with project information, a search tool, an interactive map display, a contact and a data submission form. The zoomable map shows the localities of the search result, allows to refine the geographic search as well as to select individual data points.",2017-10-01 +31120982,TB DEPOT (Data Exploration Portal): A multi-domain tuberculosis data analysis resource.,"The NIAID TB Portals Program (TBPP) established a unique and growing database repository of socioeconomic, geographic, clinical, laboratory, radiological, and genomic data from patient cases of drug-resistant tuberculosis (DR-TB). Currently, there are 2,428 total cases from nine country sites (Azerbaijan, Belarus, Moldova, Georgia, Romania, China, India, Kazakhstan, and South Africa), 1,611 (66%) of which are multidrug- or extensively-drug resistant and 1,185 (49%), 863 (36%), and 952 (39%) of which contain X-ray, computed tomography (CT) scan, and genomic data, respectively. We introduce the Data Exploration Portal (TB DEPOT, https://depot.tbportals.niaid.nih.gov) to visualize and analyze these multi-domain data. The TB DEPOT leverages the TBPP integration of clinical, socioeconomic, genomic, and imaging data into standardized formats and enables user-driven, repeatable, and reproducible analyses. It furthers the TBPP goals to provide a web-enabled analytics platform to countries with a high burden of multidrug-resistant TB (MDR-TB) but limited IT resources and inaccessible data, and enables the reusability of data, in conformity with the NIH's Findable, Accessible, Interoperable, and Reusable (FAIR) principles. TB DEPOT provides access to ""analysis-ready"" data and the ability to generate and test complex clinically-oriented hypotheses instantaneously with minimal statistical background and data processing skills. TB DEPOT is also promising for enhancing medical training and furnishing well annotated, hard to find, MDR-TB patient cases. TB DEPOT, as part of TBPP, further fosters collaborative research efforts to better understand drug-resistant tuberculosis and aid in the development of novel diagnostics and personalized treatment regimens.",2019-05-23 +32221380,WeiBI (web-based platform): Enriching integrated interaction network with increased coverage and functional proteins from genome-wide experimental OMICS data.,"Many molecular system biology approaches recognize various interactions and functional associations of proteins that occur in cellular processing. Further understanding of the characterization technique reveals noteworthy information. These types of known and predicted interactions, gained through multiple resources, are thought to be important for experimental data to satisfy comprehensive and quality needs. The current work proposes the ""WeiBI (WeiBiologicalInteractions)"" database that clarifies direct and indirect partnerships associated with biological interactions. This database contains information concerning protein's functional partnerships and interactions along with their integration into a statistical model that can be computationally predicted for humans. This novel approach in WeiBI version 1.0 collects information using an improved algorithm by transferring interactions between more than 115570 entries, allowing statistical analysis with the automated background for the given inputs for functional enrichment. This approach also allows the input of an entity's list from a database along with the visualization of subsets as an interaction network and successful performance of the enrichment analysis for a gene set. This wisely improved algorithm is user-friendly, and its accessibility and higher accuracy make it the best database for exploring interactions among genomes' network and reflects the importance of this study. The proposed server ""WeiBI"" is accessible at http://weislab.com/WeiDOCK/?page=PKPD.",2020-03-27 +29059408,PopHuman: the human population genomics browser.,"The 1000 Genomes Project (1000GP) represents the most comprehensive world-wide nucleotide variation data set so far in humans, providing the sequencing and analysis of 2504 genomes from 26 populations and reporting >84 million variants. The availability of this sequence data provides the human lineage with an invaluable resource for population genomics studies, allowing the testing of molecular population genetics hypotheses and eventually the understanding of the evolutionary dynamics of genetic variation in human populations. Here we present PopHuman, a new population genomics-oriented genome browser based on JBrowse that allows the interactive visualization and retrieval of an extensive inventory of population genetics metrics. Efficient and reliable parameter estimates have been computed using a novel pipeline that faces the unique features and limitations of the 1000GP data, and include a battery of nucleotide variation measures, divergence and linkage disequilibrium parameters, as well as different tests of neutrality, estimated in non-overlapping windows along the chromosomes and in annotated genes for all 26 populations of the 1000GP. PopHuman is open and freely available at http://pophuman.uab.cat.",2018-01-01 +33407091,Cell-specific characterization of the placental methylome.,"

Background

DNA methylation (DNAm) profiling has emerged as a powerful tool for characterizing the placental methylome. However, previous studies have focused primarily on whole placental tissue, which is a mixture of epigenetically distinct cell populations. Here, we present the first methylome-wide analysis of first trimester (n = 9) and term (n = 19) human placental samples of four cell populations: trophoblasts, Hofbauer cells, endothelial cells, and stromal cells, using the Illumina EPIC methylation array, which quantifies DNAm at > 850,000 CpGs.

Results

The most distinct DNAm profiles were those of placental trophoblasts, which are central to many pregnancy-essential functions, and Hofbauer cells, which are a rare fetal-derived macrophage population. Cell-specific DNAm occurs at functionally-relevant genes, including genes associated with placental development and preeclampsia. Known placental-specific methylation marks, such as those associated with genomic imprinting, repetitive element hypomethylation, and placental partially methylated domains, were found to be more pronounced in trophoblasts and often absent in Hofbauer cells. Lastly, we characterize the cell composition and cell-specific DNAm dynamics across gestation.

Conclusions

Our results provide a comprehensive analysis of DNAm in human placental cell types from first trimester and term pregnancies. This data will serve as a useful DNAm reference for future placental studies, and we provide access to this data via download from GEO (GSE159526), through interactive exploration from the web browser ( https://robinsonlab.shinyapps.io/Placental_Methylome_Browser/ ), and through the R package planet, which allows estimation of cell composition directly from placental DNAm data.",2021-01-06 +33715003,POKY: a software suite for multidimensional NMR and 3D structure calculation of biomolecules. ,"The need for an efficient and cost-effective method is compelling in biomolecular NMR. To tackle this problem, we have developed the Poky suite, the revolutionized platform with boundless possibilities for advancing research and technology development in signal detection, resonance assignment, structure calculation, and relaxation studies with the help of many automation and user interface tools. This software is extensible and scalable by scripting and batching as well as providing modern graphical user interfaces and a diverse range of modules right out of the box. Poky is freely available to non-commercial users at https://poky.clas.ucdenver.edu. Supplementary data are available at Bioinformatics online.",2021-03-14 +32510176,ProTrack: An Interactive Multi-Omics Data Browser for Proteogenomic Studies.,"The Clinical Proteomic Tumor Analysis Consortium (CPTAC) initiative has generated extensive multi-omics data resources of deep proteogenomic profiles for multiple cancer types. To enable the broader community of biological and medical researchers to intuitively query, explore, and download data and analysis results from various CPTAC projects, a prototype user-friendly web application called ""ProTrack"" is built with the CPTAC clear cell renal cell carcinoma (ccRCC) data set (http://ccrcc.cptac-data-view.org). Here the salient features of this application which provides a dynamic, comprehensive, and granular visualization of the rich proteogenomic data is described.",2020-08-06 +30304987,An integrated framework for identification of effective and synergistic anti-cancer drug combinations.,"Combination drug therapy is considered a better treatment option for various diseases, such as cancer, HIV, hypertension, and infections as compared to targeted drug therapies. Combination or synergism helps to overcome drug resistance, reduction in drug toxicity and dosage. Considering the complexity and heterogeneity among cancer types, drug combination provides promising treatment strategy. Increase in drug combination data raises a challenge for developing a computational approach that can effectively predict drugs synergism. There is a need to model the combination drug screening data to predict new synergistic drug combinations for successful cancer treatment. In such a scenario, machine learning approaches can be used to alleviate the process of drugs synergy prediction. Experimental data from a single-agent or multi-agent drug screens provides feature data for model training. On the contrary, identification of effective drug combination using clinical trials is a time consuming and resource intensive task. This paper attempts to address the aforementioned challenges by developing a computational approach to effectively predict drug synergy. Single-drug efficacy is used for predicting drug synergism. Our approach obviates the need to understand the underlying drug mechanism to predict drug combination synergy. For this purpose, nine machine learning algorithms are trained. It is observed that the Random forest models, in comparison to other models, have shown significant performance. The K -fold cross-validation is performed to evaluate the robustness of the best predictive model. The proposed approach is applied to mutant-BRAF melanoma and further validated using melanoma cell-lines from AstraZeneca-Sanger Drug Combination Prediction DREAM Challenge dataset.",2018-06-28 +34700656,Mature Outcomes of 61.2 Gy Concomitant Boost (CB) Thoracic Radiotherapy (TRT) in Limited Stage Small Cell Lung Cancer (LSCLC): CALGB 30610 (Alliance) / RTOG 0538.,"

Purpose/objective(s)

We report mature outcomes of LSCLC patients randomized to the 61.2 Gy CB TRT arm of CALGB 30610/RTOG 0538. The study initially included 2 experimental arms, 61.2 Gy CB TRT over 5 weeks and 70 Gy daily (QD) over 7 weeks, both with higher predicted biologic efficacy than standard 45 Gy twice-daily (BID)TRT. The CB arm was discontinued after planned early interim analysis comparing toxicity in the experimental arms, but the study provides the largest prospective dataset assessing 61.2 Gy CB TRT in LSCLC MATERIALS/METHODS: Eligible patients had LSCLC with regional lymph node involvement, excluding contralateral hilar and supraclavicular nodes, and ECOG PS 0-2. TRT began with either the first or 2nd (of 4 total) cycle of cisplatin-based chemotherapy, and prophylactic cranial radiotherapy was recommended in cases of complete (or near complete) response. In the initial phase of the trial, patients were randomized with a 1:1:1 allocation to 45 Gy BID, 70 Gy QD and 61.2 Gy CB TRT. A decision to drop an experimental arm was mandated after toxicity data was available for approximately 70 patients in each cohort. Although toxicity was similar in both experimental arms, a decision was made to discontinue the 61.2 Gy cohort after discussion with the DSMB.

Results

From March 2008 until March 2013 (when the cohort was discontinued), 93 patients were assigned to receive 61.2 Gy CB TRT. The median age was 62 years (range 41 to 77), the majority of patients were Caucasian (86%), male (51%), ECOG PS 0-1 (95%). Most patients started TRT with the first chemotherapy cycle (65%) and had 3D planning (63%). After median follow up of 7 years for surviving patients, median overall survival (OS) and progression free survival were 32.3 months (95% CI: 21.1-44.8) and 15.6 months (95% CI: 10.0-27.5), respectively. Two-year OS was 57% (95% CI: 0.47-0.68), with 28% OS (95% CI: 0.2-0.4) at 5 years. Rates of grade 3 and 4 hematologic adverse events (AEs) were 20.5% and 59.1%, with 40.9% and 25% Grade 3 and 4 non-hematologic AEs. Grade 3+ dysphagia and dyspnea were 16% and 7%, respectively. There were 4 Grade 5 AEs, including one patient with febrile neutropenia.

Conclusion

Outcomes are similar to the contemporaneous CONVERT trial, and compare favorably to the prior phase II trial examining 61.2 Gy CB TRT (RTOG 0239) in LSCLC, despite the allowance of early stage (N0) disease in those studies. The improvement in OS compared with RTOG 0239 is likely attributed to improved staging and advances in radiotherapy planning and delivery, though additional factors may have contributed. Support: U10CA180821, U10CA180882; U10CA180868 (NRG), U10CA180888 (SWOG); https://acknowledgments.alliancefound.org Clinicaltrials.gov Id: NCT00632853.",2021-11-01 +31096452,Online community collaborative map: A geospatial and data visualization tool for cancer data.,"The aim of this study was to develop an online collaborative map to enable researchers to locate, explore, and share cancer data.This 2-scale (global and country-level) cancer map adopts a database-driven model, which was implemented using the Google Map Application Programming Interface (API) and asynchronous JavaScript and XML (AJAX) technology. Seven visualization techniques were used to present data. Data on worldwide cancer mortality between 1950 and 2013 were taken from the International Agency for Research on Cancer (IARC) database. Incidence data were from the IARC CI5plus database. Survival data were from the IARC SURVCAN study. Prevalence data between 1990 and 2017 were from the Institute for Health Metrics and Evaluation's (IHME) catalog while demographic data were from the World Bank Data Catalog. Cancer data for Taiwan between 1991 and 2016 were obtained from the Department of Health and Welfare. This study used visualization techniques that included: a choropleth map to display the prevalence of cancer; a tornado diagram to show the age-standardized mortality rates of all cancers among men and women in 2013; a treemap to show a ranking of cancer mortality data; a sunburst chart to show mortality rates of all cancers by gender; a line chart to show mortality trends for all cancers; a bar chart to show mortality and incidence rates and a heatmap to show variations in cancer across different countries.The world cancer map generated by this study can be accessed at http://worldmap.csmu-liawyp.tw. Country-level mortality data are presented as crude and age-standardized rates.We used visualization methodologies and constructed an easily maintainable web-based user interface with cancer data from administrative regions in 150 countries. This serves as a platform that allows researchers to manage and disseminate cancer data.",2019-05-01 +34180339,Fluorescent labeling of s2T-incorporated DNA and m5s2U-modified RNA.,"We report herein comprehensive investigations of alkylation/sulfur exchange reactions of sulfur-containing substrates including nucleosides such as s2U, m5s2U, s4U, s2A and s2T-incorporated DNA enable by comprehensive screenings of the reagents (2a-2h). It has been proven that iodoacetamide (2a) displays the most promising feasibility toward sulfur-containing substrates including s2T, s2U, m5s2U, s4U and s2A. In sharp contrast, the alkylation process with S-benzyl methanethiosulfonate (BMTS, 2h) displays the best application potential only for s4U. Based on these results, the fluorescent labeling of s2T-incorporated DNA and m5s2U-modified RNA has been achieved.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1942044 .",2021-06-26 +32127419,Systematic Localization of Escherichia coli Membrane Proteins. ,"The molecular architecture and function of the Gram-negative bacterial cell envelope are dictated by protein composition and localization. Proteins that localize to the inner membranes (IM) and outer membranes (OM) of Gram-negative bacteria play critical and distinct roles in cellular physiology; however, approaches to systematically interrogate their distribution across both membranes and the soluble cell fraction are lacking. Here, we employed multiplexed quantitative mass spectrometry using tandem mass tag (TMT) labeling to assess membrane protein localization in a proteome-wide fashion by separating IM and OM vesicles from exponentially growing Escherichia coli K-12 cells on a sucrose density gradient. The migration patterns for >1,600 proteins were classified in an unbiased manner, accurately recapitulating decades of knowledge in membrane protein localization in E. coli For 559 proteins that are currently annotated as peripherally associated with the IM (G. Orfanoudaki and A. Economou, Mol Cell Proteomics 13:3674-3687, 2014, https://doi.org/10.1074/mcp.O114.041137) and that display potential for dual localization to either the IM or cytoplasm, we could allocate 110 proteins to the IM and 206 proteins to the soluble cell fraction based on their fractionation patterns. In addition, we uncovered 63 cases, in which our data disagreed with current localization annotation in protein databases. For 42 of these cases, we were able to find supportive evidence for our localization findings in the literature. We anticipate that our systems-level analysis of the E. coli membrane proteome will serve as a useful reference data set to query membrane protein localization, as well as to provide a novel methodology to rapidly and systematically map membrane protein localization in more poorly characterized Gram-negative species.IMPORTANCE Current knowledge of protein localization, particularly outer membrane proteins, is highly dependent on bioinformatic predictions. To date, no systematic experimental studies have directly compared protein localization spanning the inner and outer membranes of E. coli By combining sucrose density gradient fractionation of inner membrane (IM) and outer membrane (OM) proteins with multiplex quantitative proteomics, we systematically quantified localization patterns for >1,600 proteins, providing high-confidence localization annotations for 1,368 proteins. Of these proteins, we resolve the predominant localization of 316 proteins that currently have dual annotation (cytoplasmic and IM) in protein databases and identify new annotations for 42 additional proteins. Overall, we present a novel quantitative methodology to systematically map membrane proteins in Gram-negative bacteria and use it to unravel the biological complexity of the membrane proteome architecture in E. coli.",2020-03-03 +32804993,minMLST: machine learning for optimization of bacterial strain typing.,"

Motivation

High-resolution microbial strain typing is essential for various clinical purposes, including disease outbreak investigation, tracking of microbial transmission events and epidemiological surveillance of bacterial infections. The widely used approach for multilocus sequence typing (MLST) that is based on the core genome, cgMLST, has the advantage of a high level of typeability and maximal discriminatory power. Yet, the transition from a seven loci-based scheme to cgMLST involves several challenges, that include the need by some users to maintain backward compatibility, growing difficulties in the day-to-day communication within the microbiology community with respect to nomenclature and ontology, issues with typeability, especially if a more stringent approach to loci presence is used, and computational requirements concerning laboratory data management and sharing with end-users. Hence, methods for optimizing cgMLST schemes through careful reduction of the number of loci are expected to be beneficial for practical needs in different settings.

Results

We present a new machine learning-based methodology, minMLST, for minimizing the number of genes in cgMLST schemes by identifying subsets of informative genes and analyzing the trade-off between gene reduction and typing performance. The results achieved with minMLST over eight bacterial species show that despite the reduction in the number of genes up to a factor of 10, the typing performance remains very high and significant with an Adjusted Rand Index that ranges between 0.4 and 0.93 in different species and a P-value < 10-3. The identification of such optimized MLST schemes for bacterial strain typing is expected to improve the implementation of cgMLST by improving interlaboratory agreement and communication.

Availability and implementation

The python package minMLST is available at https://PyPi.org/project/minmlst/PyPI and supported on Linux and Windows.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +34170294,Toward comprehensive functional analysis of gene lists weighted by gene essentiality scores. ,"Gene functional enrichment analysis represents one of the most popular bioinformatics methods for annotating the pathways and function categories of a given gene list. Current algorithms for enrichment computation such as Fisher's exact test and hypergeometric test totally depend on the category count numbers of the gene list and one gene set. In this case, whatever the genes are, they were treated equally. However, actually genes show different scores in their essentiality in a gene list and in a gene set. It is thus hypothesized that the essentiality scores could be important and should be considered in gene functional analysis. For this purpose, here we proposed WEAT (https://www.cuilab.cn/weat/), a weighted gene set enrichment algorithm and online tool by weighting genes using essentiality scores. We confirmed the usefulness of WEAT using three case studies, the functional analysis of one aging-related gene list, one gene list involved in Lung Squamous Cell Carcinoma (LUSC), and one cardiomyopathy gene list from Drosophila model. Finally, we believe that the WEAT method and tool could provide more possibilities for further exploring the functions of given gene lists. The datasets generated and analyzed during the current study are available on our website at https://www.cuilab.cn/weat/. Supplementary data are available at Bioinformatics online.",2021-06-25 +32022785,Therapeutic Drug Monitoring of Asparaginase: Intra-individual Variability and Predictivity in Children With Acute Lymphoblastic Leukemia Treated With PEG-Asparaginase in the AIEOP-BFM Acute Lymphoblastic Leukemia 2009 Study.,"

Background

Therapeutic drug monitoring (TDM) can identify patients with subtherapeutic asparaginase (ASNase) activity [silent inactivation (SI)] and prospectively guide therapeutic adaptation. However, limited intra-individual variability is a precondition for targeted dosing and the diagnosis of SI.

Methods

In the AIEOP-BFM acute lymphoblastic leukemia (ALL) 2009 trial, 2771 children with ALL were included and underwent ASNase-TDM in a central laboratory in Münster. Two biweekly administrations of pegylated ASNase during induction and a third dose during reinduction or the high-risk block, which was administered several weeks later, were monitored. We calculated (1) the incidence of SI; and (2) the predictivity of SI for SI after the subsequent administration. ASNase activities monitored during induction were categorized into percentiles at the respective sampling time points. These percentiles were used to calculate the intra-individual range of percentiles as a surrogate for intrapatient variability and to evaluate the predictivity of ASNase activity for the subsequent administration.

Results

The overall incidence of SI was low (4.9%). The positive predictive value of SI identified by one sample was ≤21%. Confirmation of SI by a second sample indicated a high positive predictive value of 100% for biweekly administrations, but not for administration more than 17 weeks later. Sampling and/or documentation errors were risks for misdiagnosis of SI. High intra-individual variability in ASNase activities, with ranges of percentiles over more than 2 quartiles and low predictivity, was observed in approximately 25% of the patients. These patients were likely to fail dose individualization based on TDM data.

Conclusions

To use TDM as a basis for clinical decisions, standardized clinical procedures are required and high intra-individual variability should be taken into account. Details of the treatment are available in the European Clinical Trials Database at https://www.clinicaltrialsregister.eu/ctr-search/trial/2007-004270-43/DE.",2020-06-01 +32541506,Complications during hospitalization and at 30 days in the intensive cardiac care unit for patients with ST-elevation versus non-ST-elevation acute coronary syndrome: A protocol for systematic review and meta analysis.,"

Background

In this meta-analysis, we aimed to systematically compare the complications during hospitalization and at 30 days respectively, in intensive cardiac care unit (ICCU) for patients with ST elevation (STE) vs non-STE acute coronary syndrome (NSTE ACS).

Methods

Electronic search databases including http://www.ClinicalTrials.gov, EMBASE, Cochrane Central, Google Scholar, Web of Science, and MEDLINE were searched for publications comparing complications observed in STE ACS vs NSTE ACS patients admitted in ICCU, intensive care unit (ICU) or coronary care unit (CCU). This is a meta-analysis and risk ratios (RR) with 95% confidence intervals (CI) were used to illustrate the data following analysis by the RevMan 5.3 software.

Results

Six studies consisting of a total number of 25,604 participants (12,880 participants admitted due to STE ACS and 12,724 participants admitted due to NSTE ACS) were included. Our results showed that the total outcomes including severely abnormal electrocardiography (ECG) (RR: 1.48, 95% CI: 1.27-1.73; P = .00001) and mortality (RR: 1.83, 95% CI: 1.64-2.04; P = .00001) were significantly higher in patients with STE ACS. Re-infarction (RR: 0.86, 95% CI: 0.62-1.19; P = .37) and heart failure (RR: 1.04, 95% CI: 0.88-1.23; P = .62) were similarly manifested in those patients with ACS. However, the risk for recurrent angina was significantly higher with NSTE ACS (RR: 0.65, 95% CI: 0.46-0.92; P = .01).

Conclusions

Patients with STE ACS were at a higher risk for in-hospital and 30 days mortality in this analysis. In hospital, severely abnormal ECG was also significantly higher in this category of patients compared to NSTE ACS. However, re-admission for heart failure and re-infarction was similar in both groups. Future studies should be able to confirm this hypothesis.",2020-06-01 +,First Report of Powdery Mildew Caused by Golovinomyces spadiceus on Industrial Hemp (Cannabis sativa) in Kentucky,"Industrial hemp (Cannabis sativa) was reintroduced to the United States as a pilot research program under the 2014 Farm Bill. By 2017, there were over 25,000 acres of industrial hemp in the United States, with Kentucky having the second highest acreage in the United States (https://www.votehemp.com/resources/publications/). Hemp is processed for fiber (4%), grain/seed (18%), and cannabidiol (CBD, 62%), and grosses $7.5M for Kentucky growers. Between 2014 and 2018, powdery mildew was observed in numerous greenhouses, in multiple locations, and on several varieties including but not limited to Cherry Wine, Endurance, Otto, proprietary CBD varieties, and fiber and grain breeding lines. Symptoms initially appeared as small, inconspicuous white patches on the adaxial side of leaves. Colonies expanded in size, often coalescing and infecting entire leaves and succulent stems. The disease spread readily to asymptomatic plants. Pathogenicity of three isolates collected from hemp specimens was confirmed through touch inoculation where conidia from infected leaves were pressed onto asymptomatic leaves. Inoculated plants were moist chambered for 48 h and maintained in the greenhouse. Symptoms appeared within 6 to 7 days, and morphological features were identical to the original; noninoculated control plants did not develop symptoms. Mycelia were amphigenous and occasionally caulicolous; hyphae were septate with septations 5 to 6 μm apart. Foot cells were cylindrical, measuring (42 to) 57 to 107 (to 120) μm × 9 to 11 μm, followed by one to two shorter cells. Conidiophores were hyaline, singular, and erect, measuring (80 to) 115 to 187 (to 209) μm in length, followed by two to three immature conidia forming a crenate outline. Conidia were catenescent and ellipsoid to ovoid, measuring (29 to) 30 to 39 (to 41) μm × (13 to) 15 to 20 (to 22) μm. Chasmothecia readily formed during autumn, were round and dark brown at maturity, measured (96 to) 109 to 138 (to 159) μm in diameter, and displayed nondescript myceloid appendages. Mature chasmothecia contained five to 15 ovoid-saccate asci, most with short stalks. Asci measured (52 to) 56 to 75 (to 78) μm × (25 to) 29 to 43 (to 50) μm, and each ascus contained two ovoid ascospores measuring (15 to) 18 to 27 (to 32) × (9 to) 11 to 18 (to 19) μm. Morphological characteristics were consistent with descriptions of Golovinomyces spadiceus except that foot cells from these isolates had a wider range of lengths compared with previous reports, 30 to 80 × 9 to 15 μm versus 42 to 116 × 9 to 11 μm (Braun and Cook 2012). Conidial measurements were similar but not identical to G. ambrosiae, which is reported to have a longer conidial length/width ratio (2.0) than G. spadiceus (1.5 to 2.0); these isolates had conidial length/width ratios consistent with G. spadiceus (Braun and Cook 2012). Identification was confirmed by sequencing the 28S and internal transcribed spacer (ITS) regions with primers PM5G/NLP2 for the 3′ half of ITS and 28S and ITS5/PM6G for the 5′ half of ITS (Bradshaw et al. 2017). Sequence data were deposited into GenBank (MK305282). A GenBank BLAST search resulted in a 100% similarity to G. spadiceus (GenBank accession AB769427) and 97% similarity to G. asterum, G. orontii, and G. cichoracearum. The latter species were eliminated based on strong inconsistences in morphological comparisons. This species was determined to be G. spadiceus based on morphological features and sequence data. This determination is consistent with those of powdery mildew fungi collected from Cannabis in Canada (Pépin et al. 2018). All samples collected during this period were consistent; no other powdery mildew species was identified. This is the first documented report of G. spadiceus causing powdery mildew on hemp in the United States. With recent legalization of hemp in the United States, it is important to document the species and distribution of powdery mildew fungi affecting this crop.",2019-07-01 +27605102,PHYLOViZ 2.0: providing scalable data integration and visualization for multiple phylogenetic inference methods.,"High Throughput Sequencing provides a cost effective means of generating high resolution data for hundreds or even thousands of strains, and is rapidly superseding methodologies based on a few genomic loci. The wealth of genomic data deposited on public databases such as Sequence Read Archive/European Nucleotide Archive provides a powerful resource for evolutionary analysis and epidemiological surveillance. However, many of the analysis tools currently available do not scale well to these large datasets, nor provide the means to fully integrate ancillary data. Here we present PHYLOViZ 2.0, an extension of PHYLOViZ tool, a platform independent Java tool that allows phylogenetic inference and data visualization for large datasets of sequence based typing methods, including Single Nucleotide Polymorphism (SNP) and whole genome/core genome Multilocus Sequence Typing (wg/cgMLST) analysis. PHYLOViZ 2.0 incorporates new data analysis algorithms and new visualization modules, as well as the capability of saving projects for subsequent work or for dissemination of results.

Availability and implementation

http://www.phyloviz.net/ (licensed under GPLv3).

Contact

cvaz@inesc-id.ptSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-06 +34458138,A 20-Year Research Trend Analysis of the Influence of Anesthesia on Tumor Prognosis Using Bibliometric Methods.,"

Background

Bibliometric analysis is used to gain a systematic understanding of developments in the field of the influence of anesthesia on tumor prognosis and changes in research hot spots over the past 20 years.

Methods

Relevant publications from the Web of Science Core Collection (WoSCC) were downloaded on May 5, 2021. Acquired data were then analyzed using the Online Analysis Platform of Literature Metrology (http://biblimetric.com) and the CiteSpace software was used to analyze and predict trends and hot spots in this field.

Results

1,521 publications on the influence of anesthesia on tumor prognosis were identified and 1494 qualifying records were included in the final analysis. The leading country in this field was the United States of America (USA). The University of Texas MD Anderson Cancer Center (Houston, TX, USA) and Pennsylvania State University (State College, PA, USA) featured the highest number of publications among all institutions. Co-citation cluster labels revealed characteristics of ten main clusters: total intravenous anesthesia, opioid growth factor receptor, gastric cancer cell, opioid receptor, murine model, natural killer cell activity, health-related quality, glioma cell, opioid switching and mu-type opioid receptor. Keyword burst detection indicated that randomized controlled trials (RCTs), volatile anesthetics, and ropivacaine were the newly emerging research hot spots.

Conclusions

This study compiled 1494 publications covering anesthesia and tumor prognosis research and showed that the direction of these studies is likely in transition from opioids and their receptors to other anesthetics, and from retrospective studies to prospective randomized controlled trials. It provides guidance for further research and clinical applications on choosing anesthetic methods and drugs.",2021-08-12 +32626466,Application of data science in risk assessment and early warning.,"The food supply chain has been recognised by the EU as a critical infrastructure, and its complexity is the main cause of vulnerability. Depending on the food matrix, natural and/or deliberate contamination, food-borne diseases or even food fraud incidents may occur worldwide. Consequently, robust predictive models and/or software tools are needed to support decision-making and mitigating risks in an efficient and timely manner. In this frame, the fellow participated in data collection and analysis tasks, so as to provide additional predictive models. The working programme, covered a wide range of aspects related to risk assessment including identification of emerging risks (quantitative), microbiological risk assessment, authenticity assessment, spatio-temporal epidemiological modelling and database formation for hosting predictive microbial models. The training and close integration, in the open-source, in-house (German Federal Institute for Risk Assessment (BfR)) developed software tools under the framework of FoodRisk-Labs (https://foodrisklabs.bfr.bund.de.) for data analysis, predictive microbiology, quantitative microbiological risk assessment and automatic data retrieval purposes allowed for the independent use. Moreover, the fellow actively contributed to the update of the upcoming Yersinia enterocolitica risk assessment, and also in authenticity assessment of edible oils. Over the course of the year, the fellow was closely involved in international and national research projects with experts in the above-mentioned disciplines. Lastly, he consolidated his acquired knowledge by presenting his scientific work to conferences, and BfR-internal meetings.",2019-09-17 +31838261,Comparative analysis of genomic variability for drug-resistant strains of Mycobacterium tuberculosis: The special case of Belarus.,"Mycobacterium tuberculosis (M.tb) is the leading cause of death from an infectious disease. Drug resistant tuberculosis (DR-TB) threatens to exacerbate challenges in diagnostics and treatment. It is important to monitor strains circulating in countries with heavy burden of DR-TB, to make informed decisions about treatment, and because in these countries there is an elevated probability that DR-TB may advance to the totally drug resistant form. The TB Portals Program (TBPP, https://TBPortals.niaid.nih.gov) formed a global network of participating institutions and hospitals collecting and analyzing de-identified clinical, imaging and socioeconomic data, augmenting these with genomic sequencing results. TB Portals database includes complete M.tb genomes, with the information about spoligotypes, strains, and genomic variants related to drug resistance. Within the framework of TB Portals, we created Data Exploration Portal (DEPOT), to facilitate visualization and statistical analysis of user-defined cohorts from the entire TB Portals database. A continuing TB Portals research objective is to actively monitor and examine genomic variability that may account for observed differences in DR-TB incident rates and/or difficulties with diagnosis and treatment. Our analysis identified that several genomic variants implicated in drug resistance or improved fitness of the pathogen, were significantly more frequent in M.tb strains circulating in Belarus in comparison with other countries. Further studies are necessary to reveal whether the corresponding genomic variants may explain unusually high burden of drug-resistant M.tb in Belarus and suggest improvements for diagnostic and drug therapies.",2019-12-12 +30664776,An online resource for GPCR structure determination and analysis.,"G-protein-coupled receptors (GPCRs) transduce physiological and sensory stimuli into appropriate cellular responses and mediate the actions of one-third of drugs. GPCR structural studies have revealed the general bases of receptor activation, signaling, drug action and allosteric modulation, but so far cover only 13% of nonolfactory receptors. We broadly surveyed the receptor modifications/engineering and methods used to produce all available GPCR crystal and cryo-electron microscopy (cryo-EM) structures, and present an interactive resource integrated in GPCRdb ( http://www.gpcrdb.org ) to assist users in designing constructs and browsing appropriate experimental conditions for structure studies.",2019-01-21 +30027541,Designing An Individualized EHR Learning Plan For Providers.,"Electronic Health Records (EHRs) have been quickly implemented for meaningful use incentives; however these implementations have been associated with provider dissatisfaction and burnout. There are no previously reported instances of a comprehensive EHR educational program designed to engage providers and assist in improving efficiency and understanding of the EHR. Utilizing adult learning theory as a framework, Stanford Children's Health designed a tailored provider efficiency program with various inputs from: (1) provider specific EHR data; (2) provider survey data; and (3) structured observation sessions. This case report outlines the design of this individualized training program including team structure, resource requirements, and early provider response.

Citation

Stevens LA, DiAngi YT, Schremp JD, Martorana MJ, Miller RE, Lee TC, Pageler NM. Designing An Individualized EHR Learning Plan. Appl Clin Inform 2017; 8:924-935 https://doi.org/10.4338/040054.",2017-12-20 +32120139,"FRCD: A comprehensive food risk component database with molecular scaffold, chemical diversity, toxicity, and biodegradability analysis.","The presence of natural toxins, pesticide residues, and illegal additives in food products has been associated with a range of potential health hazards. However, no systematic database exists that comprehensively includes and integrates all research information on these compounds, and valuable information remains scattered across numerous databases and extensive literature reports. Thus, using natural language processing technology, we curated 12,018 food risk components from 152,737 literature reports, 12 authoritative databases, and numerous related regulatory documents. Data on molecular structures, physicochemical properties, chemical taxonomy, absorption, distribution, metabolism, excretion, toxicity properties, and physiological targets within the human body were integrated to afford the comprehensive food risk component database (FRCD, http://www.rxnfinder.org/frcd/). We also analyzed the molecular scaffold and chemical diversity, in addition to evaluating the toxicity and biodegradability of the food risk components. The FRCD could be considered a highly promising tool for future food safety studies.",2020-02-24 +33434383,The Ocean barcode atlas: A web service to explore the biodiversity and biogeography of marine organisms.,"The Ocean Barcode Atlas (OBA) is a user friendly web service designed for biologists who wish to explore the biodiversity and biogeography of marine organisms locked in otherwise difficult to mine planetary scale DNA metabarcode data sets. Using just a web browser, a comprehensive picture of the diversity of a taxon or a barcode sequence is visualized graphically on world maps and interactive charts. Interactive results panels allow dynamic threshold adjustments and the display of diversity results in their environmental context measured at the time of sampling (temperature, oxygen, latitude, etc). Ecological analyses such as alpha and beta-diversity plots are produced via publication quality vector graphics representations. Currently, the Ocean Barcode Altas is deployed online with the (i) Tara Oceans eukaryotic 18S-V9 rDNA metabarcodes; (ii) Tara Oceans 16S/18S rRNA mi Tags; and (iii) 16S-V4 V5 metabarcodes collected during the Malaspina-2010 expedition. Additional prokaryotic or eukaryotic plankton barcode data sets will be added upon availability, given they provide the required complement of barcodes (including raw reads to compute barcode abundance) associated with their contextual environmental variables. Ocean Barcode Atlas is a freely-available web service at: http://oba.mio.osupytheas.fr/ocean-atlas/.",2021-02-10 +34379042,"Urinary 8-oxo-7,8-dihydroguanosine levels are elevated in HCV-infected patients.","HCV patients are usually under substantial oxidative stress because of viral infection. A total of 177 patients with HCV infection and 198 age- and sex-matched healthy controls were enrolled in this study. We evaluated the urinary levels of 8-oxo-7, 8-dihydro-2'deoxyguanosine (8-oxodGuo) and 8-oxo-7, 8-dihydroguanosine (8-oxoGuo) in patients with HCV infection and explored the factors affecting the urinary 8-oxodGuo or 8-oxoGuo levels. Biomarkers of liver function, cancer, and inflammation were determined. Nonparametric correlations were used to evaluate the correlation between 8-oxoGuo or 8-oxodGuo and various laboratory biochemical indicators. Results showed that the levels of urinary 8-oxoGuo both in male and female patients with HCV infection were significantly higher than those in healthy controls (both p < 0.0001), while the urinary 8-oxodGuo levels only in male patients with HCV infection were significantly higher than those in healthy controls (p < 0.01). Urinary 8-oxoGuo was significantly associated with the white blood cell count, C-reactive protein level, and 8-oxodGuo level (p = 0.016, p = 0.003, and p = 0.000, respectively). Urinary 8-oxodGuo was significantly associated with the white blood cell count and 8-oxoGuo level (p = 0.018 and p = 0.000, respectively). A regression equation of urinary 8-oxoGuo or 8-oxodGuo was also established using the biomarkers in plasma. The results suggested that patients with a high C-reactive protein level are likely to have high urinary 8-oxoGuo levels as well, which may be useful for assessing the level of inflammation and oxidative stress in HCV patients.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1961272 .",2021-08-11 +32105730,ncEP: A Manually Curated Database for Experimentally Validated ncRNA-encoded Proteins or Peptides.,"Noncoding RNAs (ncRNAs), such as lncRNAs, circRNAs and pri-miRNAs, play important roles in physiological and pathological processes. Recently, it was demonstrated that they could encode proteins or peptides. However, relevant information is scattered across numerous published articles, which is inconvenient for the exploration of ncRNA translation by researchers. In this study, we presented an ncEP database, which records the low-throughput experimentally validated (LTEV) proteins or peptides encoded by ncRNAs, from published articles. Collectively, ncEP contains 80 entries including 74 proteins or peptides, 22 lncRNAs, 11 circRNAs, 9 pri-miRNAs and 37 other ncRNAs across 18 species from more than 50 articles of over 2000 candidate articles. We have provided a user-friendly interface for users to search, browse, visualize, download and submit data. In summary, ncEP provides a relatively comprehensive repository of the LTEV proteins or peptides encoded by ncRNAs and will enrich the knowledge for translation process. ncEP is freely available at http://www.jianglab.cn/ncEP/.",2020-02-24 +34024112,[Assessment of adherence to the Mediterranean diet in university Health Sciences students and its relationship with level of physical activity].,"

Introduction

Objective: to assess the degree of adherence to the Mediterranean diet and the practice of physical activity in university Health Sciences students in Castile-La Mancha. Methods: this was a cross-sectional, observational study by means of a dietary and physical activity survey. The sample consisted of 575 university students (77.7 % women). An initial data collection survey was developed using the Google Forms platform (https://www.google.com/forms/about/). Adherence to the Mediterranean diet was assessed with the Mediterranean Diet Adherence Screener (MEDAS) questionnaire and the modified Prevention with Mediterranean Diet (PREDIMED) questionnaire. The Rapid Assessment of Physical Activity Scale (RAPA) questionnaire was used to measure physical activity. Results: we found a 58.3 % adherence to Mediterranean diet among Health Sciences students, with 38.6 % of average adherence, and 5.0 % of poor adherence, with a low consumption of fruits with no gender differences, and a high consumption of red or processed meat and butter or cream with significant differences between women and men. There is also a high consumption of carbonated beverages (more frequent in women). Likewise, a high percentage of students (22.5 %) do practically no physical activity. As for physical exercise, it is always higher in men, with significant differences (p > 0.05). Conclusion: this study suggests that the sample of university Health Sciences students in Castile-La Mancha shows an acceptable adherence to the Mediterranean diet and insufficient levels of physical activity.",2021-07-01 +34048577,Mergeomics 2.0: a web server for multi-omics data integration to elucidate disease networks and predict therapeutics.,"The Mergeomics web server is a flexible online tool for multi-omics data integration to derive biological pathways, networks, and key drivers important to disease pathogenesis and is based on the open source Mergeomics R package. The web server takes summary statistics of multi-omics disease association studies (GWAS, EWAS, TWAS, PWAS, etc.) as input and features four functions: Marker Dependency Filtering (MDF) to correct for known dependency between omics markers, Marker Set Enrichment Analysis (MSEA) to detect disease relevant biological processes, Meta-MSEA to examine the consistency of biological processes informed by various omics datasets, and Key Driver Analysis (KDA) to identify essential regulators of disease-associated pathways and networks. The web server has been extensively updated and streamlined in version 2.0 including an overhauled user interface, improved tutorials and results interpretation for each analytical step, inclusion of numerous disease GWAS, functional genomics datasets, and molecular networks to allow for comprehensive omics integrations, increased functionality to decrease user workload, and increased flexibility to cater to user-specific needs. Finally, we have incorporated our newly developed drug repositioning pipeline PharmOmics for prediction of potential drugs targeting disease processes that were identified by Mergeomics. Mergeomics is freely accessible at http://mergeomics.research.idre.ucla.edu and does not require login.",2021-07-01 +32817611,"Overview and Methods for the Youth Risk Behavior Surveillance System - United States, 2019.","Health risk behaviors practiced during adolescence often persist into adulthood and contribute to the leading causes of morbidity and mortality in the United States. Youth health behavior data at the national, state, territorial, tribal, and local levels help monitor the effectiveness of public health interventions designed to promote adolescent health. The Youth Risk Behavior Surveillance System (YRBSS) is the largest public health surveillance system in the United States, monitoring a broad range of health-related behaviors among high school students. YRBSS includes a nationally representative Youth Risk Behavior Survey (YRBS) and separate state, local school district, territorial, and tribal school-based YRBSs. This overview report describes the surveillance system and the 2019 survey methodology, including sampling, data collection procedures, response rates, data processing, weighting, and analyses presented in this MMWR Supplement. A 2019 YRBS participation map, survey response rates, and student demographic characteristics are included. In 2019, a total of 78 YRBSs were administered to high school student populations across the United States (national and 44 states, 28 local school districts, three territories, and two tribal governments), the greatest number of participating sites with representative data since the surveillance system was established in 1991. The nine reports in this MMWR Supplement are based on national YRBS data collected during August 2018-June 2019. A full description of 2019 YRBS results and downloadable data are available (https://www.cdc.gov/healthyyouth/data/yrbs/index.htm).Efforts to improve YRBSS and related data are ongoing and include updating reliability testing for the national questionnaire, transitioning to electronic survey administration (e.g., pilot testing for a tablet platform), and exploring innovative analytic methods to stratify data by school-level socioeconomic status and geographic location. Stakeholders and public health practitioners can use YRBS data (comparable across national, state, tribal, territorial, and local jurisdictions) to estimate the prevalence of health-related behaviors among different student groups, identify student risk behaviors, monitor health behavior trends, guide public health interventions, and track progress toward national health objectives.",2020-08-21 +33851388,Acquired FXIII Deficiency is Associated with High Morbidity.,"

Background

A factor XIII (FXIII) level >30% is considered necessary to prevent spontaneous bleeding. Bleeding is also a risk in patients with acquired FXIII deficiency, but the hemostatic level of FXIII in this context remains to be determined.

Methods

We retrospectively analyzed all patients diagnosed with acquired FXIII deficiency at a large hospital over 3 years (study ID NCT04416594, http://www.clinicaltrials.gov) and assessed clinical data to identify the best cut-off point for FXIII activity to distinguish between low and high risk of major bleeding in a mixed medical and surgical population.

Results

Of the 97 patients who experienced bleeding despite a normal coagulation test, 43.2% had FXIII activity <70%. FXIII activity was significantly lower in surgical patients and patients admitted to the intensive care unit (ICU). Low FXIII activity was significantly associated with long ICU stays and a high incidence of major bleeding.

Conclusion

Acquired FXIII deficiency is associated with high morbidity. The hemostatic level of FXIII in the setting of acquired FXIII deficiency might be above 30%.",2021-06-06 +33815435,"High-Resolution Transcriptome Atlas and Improved Genome Assembly of Common Buckwheat, Fagopyrum esculentum.","Common buckwheat (Fagopyrum esculentum) is an important non-cereal grain crop and a prospective component of functional food. Despite this, the genomic resources for this species and for the whole family Polygonaceae, to which it belongs, are scarce. Here, we report the assembly of the buckwheat genome using long-read technology and a high-resolution expression atlas including 46 organs and developmental stages. We found that the buckwheat genome has an extremely high content of transposable elements, including several classes of recently (0.5-1 Mya) multiplied TEs (""transposon burst"") and gradually accumulated TEs. The difference in TE content is a major factor contributing to the three-fold increase in the genome size of F. esculentum compared with its sister species F. tataricum. Moreover, we detected the differences in TE content between the wild ancestral subspecies F. esculentum ssp. ancestrale and buckwheat cultivars, suggesting that TE activity accompanied buckwheat domestication. Expression profiling allowed us to test a hypothesis about the genetic control of petaloidy of tepals in buckwheat. We showed that it is not mediated by B-class gene activity, in contrast to the prediction from the ABC model. Based on a survey of expression profiles and phylogenetic analysis, we identified the MYB family transcription factor gene tr_18111 as a potential candidate for the determination of conical cells in buckwheat petaloid tepals. The information on expression patterns has been integrated into the publicly available database TraVA: http://travadb.org/browse/Species=Fesc/. The improved genome assembly and transcriptomic resources will enable research on buckwheat, including practical applications.",2021-03-16 +34196933,In silico prediction of chemical-induced hematotoxicity with machine learning and deep learning methods.,"Chemical-induced hematotoxicity is an important concern in the drug discovery, since it can often be fatal when it happens. It is quite useful for us to give special attention to chemicals which can cause hematotoxicity. In the present study, we focused on in silico prediction of chemical-induced hematotoxicity with machine learning (ML) and deep learning (DL) methods. We collected a large data set contained 632 hematotoxic chemicals and 1525 approved drugs without hematotoxicity. Computational models were built using several different machine learning and deep learning algorithms integrated on the Online Chemical Modeling Environment (OCHEM). Based on the three best individual models, a consensus model was developed. It yielded the prediction accuracy of 0.83 and balanced accuracy of 0.77 on external validation. The consensus model and the best individual model developed with random forest regression and classification algorithm (RFR) and QNPR descriptors were made available at https://ochem.eu/article/135149 , respectively. The relevance of 8 commonly used molecular properties and chemical-induced hematotoxicity was also investigated. Several molecular properties have an obvious differentiating effect on chemical-induced hematotoxicity. Besides, 12 structural alerts responsible for chemical hematotoxicity were identified using frequency analysis of substructures from Klekota-Roth fingerprint. These results should provide meaningful knowledge and useful tools for hematotoxicity evaluation in drug discovery and environmental risk assessment.",2021-07-01 +34050760,MTR3D: identifying regions within protein tertiary structures under purifying selection.,"The identification of disease-causal variants is non-trivial. By mapping population variation from over 448,000 exome and genome sequences to over 81,000 experimental structures and homology models of the human proteome, we have calculated both regional intolerance to missense variation (Missense Tolerance Ratio, MTR), using a sliding window of 21-41 codons, and introduce a new 3D spatial intolerance to missense variation score (3D Missense Tolerance Ratio, MTR3D), using spheres of 5-8 Å. We show that the MTR3D is less biased by regions with limited data and more accurately identifies regions under purifying selection than estimates relying on the sequence alone. Intolerant regions were highly enriched for both ClinVar pathogenic and COSMIC somatic missense variants (Mann-Whitney U test P < 2.2 × 10-16). Further, we combine sequence- and spatial-based scores to generate a consensus score, MTRX, which distinguishes pathogenic from benign variants more accurately than either score separately (AUC = 0.85). The MTR3D server enables easy visualisation of population variation, MTR, MTR3D and MTRX scores across the entire gene and protein structure for >17,000 human genes and >42,000 alternative alternate transcripts, including both Ensembl and RefSeq transcripts. MTR3D is freely available by user-friendly web-interface and API at http://biosig.unimelb.edu.au/mtr3d/.",2021-07-01 +32614398,Using AnABlast for intergenic sORF prediction in the Caenorhabditis elegans genome.,"

Motivation

Short bioactive peptides encoded by small open reading frames (sORFs) play important roles in eukaryotes. Bioinformatics prediction of ORFs is an early step in a genome sequence analysis, but sORFs encoding short peptides, often using non-AUG initiation codons, are not easily discriminated from false ORFs occurring by chance.

Results

AnABlast is a computational tool designed to highlight putative protein-coding regions in genomic DNA sequences. This protein-coding finder is independent of ORF length and reading frame shifts, thus making of AnABlast a potentially useful tool to predict sORFs. Using this algorithm, here, we report the identification of 82 putative new intergenic sORFs in the Caenorhabditis elegans genome. Sequence similarity, motif presence, expression data and RNA interference experiments support that the underlined sORFs likely encode functional peptides, encouraging the use of AnABlast as a new approach for the accurate prediction of intergenic sORFs in annotated eukaryotic genomes.

Availability and implementation

AnABlast is freely available at http://www.bioinfocabd.upo.es/ab/. The C.elegans genome browser with AnABlast results, annotated genes and all data used in this study is available at http://www.bioinfocabd.upo.es/celegans.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +34157250,The Contribution of Socioeconomic Status to Children's Performance on Three Grammatical Measures in the Edmonton Narrative Norms Instrument.,"Purpose The purpose of this study was to evaluate the relative contribution of socioeconomic status (SES) on three grammatical measures-finite verb morphology composite (FVMC), percent grammatical utterances (PGU), and clausal density-in children between the ages of 4 and 9 years. Method Data for this study were from the normative sample in the Edmonton Narrative Norms Instrument. For 359 children, hierarchical linear regression was performed to evaluate the amount of variance in FVMC, PGU, and clausal density that was uniquely explained by SES after accounting for child chronological age and language status (typical, impaired). Results After child age and language status were controlled, SES was a significant predictor of PGU and clausal density scores, but not of FVMC scores. SES uniquely accounted for 0.5% of variance in PGU scores and 0.8% of variance in clausal density scores. Conclusions Consistent with maturational accounts of children's development of tense markers, results of this study offer evidence that, among grammatical measures, FVMC is uniquely robust to variation in SES. Although significant, the variance of PGU and clausal density scores uniquely accounted for by SES was close to minimum. Clinicians can therefore include these three grammatical measures for assessing children of different socioeconomic backgrounds. Supplemental Material https://doi.org/10.23641/asha.14810484.",2021-06-22 +29985390,Subclonal mutation selection in mouse lymphomagenesis identifies known cancer loci and suggests novel candidates.,"Determining whether recurrent but rare cancer mutations are bona fide driver mutations remains a bottleneck in cancer research. Here we present the most comprehensive analysis of murine leukemia virus-driven lymphomagenesis produced to date, sequencing 700,000 mutations from >500 malignancies collected at time points throughout tumor development. This scale of data allows novel statistical approaches for identifying selected mutations and yields a high-resolution, genome-wide map of the selective forces surrounding cancer gene loci. We also demonstrate negative selection of mutations that may be deleterious to tumor development indicating novel avenues for therapy. Screening of two BCL2 transgenic models confirmed known drivers of human non-Hodgkin lymphoma, and implicates novel candidates including modifiers of immunosurveillance and MHC loci. Correlating mutations with genotypic and phenotypic features independently of local variance in mutation density also provides support for weakly evidenced cancer genes. An online resource http://mulv.lms.mrc.ac.uk allows customized queries of the entire dataset.",2018-07-09 +34468180,"Changes in Latina Women's Exposure to Cleaning Chemicals Associated with Switching from Conventional to ""Green"" Household Cleaning Products: The LUCIR Intervention Study.","

Background

Household cleaning products may be a significant source of chemical exposures, including carcinogens and suspected endocrine disruptors.

Objectives

We characterized exposures during routine household cleaning and tested an intervention to reduce exposures to cleaning product chemicals.

Methods

The Lifting Up Communities with Interventions and Research (LUCIR) Study is a youth-led, community-based intervention project. Youth researchers conducted personal air monitoring with 50 Latina women while they cleaned their homes with their regular cleaning products (preintervention visit) and then 1 week later while they used ""green"" cleaning products provided by the study (postintervention visit). Air samples were analyzed for volatile and semivolatile organic compounds using gas chromatography-mass spectrometry and high-performance liquid chromatography. We compared pre- and postintervention air concentrations of 47 chemicals of concern, selected because they were on California's Proposition 65 list of carcinogens or reproductive/developmental toxicants or were suspected endocrine disruptors. Youth researchers were integrally involved in the study design, data collection, interpretation, and dissemination of findings.

Results

We observed statistically significant decreases in air concentrations of 17 chemicals of concern when participants switched to green cleaning products, including decreases in geometric mean concentrations of 1,4-dioxane (-46.4%), chloroform (-86.7%), benzene (-24.8%), naphthalene (-40.3%), toluene (-24.2%), and hexane (-35.5%). We observed significant increases in air concentrations of three fragrance compounds: the plant-derived terpene, beta-myrcene (221.5%), and the synthetic musks celestolide (31.0%) and galaxolide (79.6%). Almost all participants (98%) said the replacement products worked as well as their original products, and 90% said that they would consider buying the replacement products in the future.

Discussion

This study demonstrates that choosing cleaning products that are marketed as green may reduce exposure to several carcinogens and endocrine disruptors. Future studies should determine whether use of unscented green products would further reduce exposure to terpenes and musks. https://doi.org/10.1289/EHP8831.",2021-09-01 +34124451,Solubility Prediction from Molecular Properties and Analytical Data Using an In-phase Deep Neural Network (Ip-DNN).,"Materials informatics is an emerging field that allows us to predict the properties of materials and has been applied in various research and development fields, such as materials science. In particular, solubility factors such as the Hansen and Hildebrand solubility parameters (HSPs and SP, respectively) and Log P are important values for understanding the physical properties of various substances. In this study, we succeeded at establishing a solubility prediction tool using a unique machine learning method called the in-phase deep neural network (ip-DNN), which starts exclusively from the analytical input data (e.g., NMR information, refractive index, and density) to predict solubility by predicting intermediate elements, such as molecular components and molecular descriptors, in the multiple-step method. For improving the level of accuracy of the prediction, intermediate regression models were employed when performing in-phase machine learning. In addition, we developed a website dedicated to the established solubility prediction method, which is freely available at ""http://dmar.riken.jp/matsolca/"".",2021-05-17 +32009518,RATEmiRs: the rat atlas of tissue-specific and enriched miRNAs for discerning baseline expression exclusivity of candidate biomarkers.,"MicroRNAs (miRNAs) are small RNAs that regulate mRNA expression and have been targeted as biomarkers of organ damage and disease. To explore the utility of miRNAs to assess injury to specific tissues, a tissue atlas of miRNA abundance was constructed. The Rat Atlas of Tissue-specific and Enriched miRNAs (RATEmiRs) catalogues miRNA sequencing data from 21 and 23 tissues in male and female Sprague-Dawley rats, respectively. RATEmiRs identifies tissue-enriched (TE), tissue-specific (TS), or organ-specific (OS) miRNAs via comparisons of one or more tissue or organ vs others. We provide a brief overview of RATEmiRs and present how to use it to detect miRNA expression abundance of candidate biomarkers as well as to compare the expression of miRNAs between rat and human. The database is available at https://www.niehs.nih.gov/ratemirs/.",2020-02-12 +34314651,Data-driven identification of complex disease phenotypes.,"Disease interaction in multimorbid patients is relevant to treatment and prognosis, yet poorly understood. In the present work, we combine approaches from network science, machine learning and computational phenotyping to assess interactions between two or more diseases in a transparent way across the full diagnostic spectrum. We demonstrate that health states of hospitalized patients can be better characterized by including higher-order features capturing interactions between more than two diseases. We identify a meaningful set of higher-order diagnosis features that account for synergistic disease interactions in a population-wide (N = 9 M) medical claims dataset. We construct a generalized disease network where (higher-order) diagnosis features are linked if they predict similar diagnoses across the whole diagnostic spectrum. The fact that specific diagnoses are generally represented multiple times in the network allows for the identification of putatively different disease phenotypes that may reflect different disease aetiologies. At the example of obesity, we demonstrate the purely data-driven detection of two complex phenotypes of obesity. As indicated by a matched comparison between patients having these phenotypes, we show that these phenotypes show specific characteristics of what has been controversially discussed in the medical literature as metabolically healthy and unhealthy obesity, respectively. The findings also suggest that metabolically healthy patients show some progression towards more unhealthy obesity over time, a finding that is consistent with longitudinal studies indicating a transient nature of metabolically healthy obesity. The disease network is available for exploration at https://disease.network/.",2021-07-28 +33667985,Antibodies against HLA cross-reactivity groups: From single antigen bead assay to immunoinformatics interpretation of epitopes.,"Identification of anti-human leukocyte antigen (HLA) antibodies (Abs) is based on Luminex™ technology. We used bioinformatics to (i) study the correlations of mean fluorescence intensities (MFIs) for all the possible allele pairs, and (ii) determine the degree of epitope homology between HLA antigens. Using MFI data on anti-HLA Abs from 6000 Luminex™ assays, we provide an updated overview of class I and II HLA antigen cross-reactivity in which each node corresponded to an allele and each link corresponded to a strong correlation between two alleles (Spearman's ρ > 0.8). We compared these correlations with the serological groups and the results of an epitope analysis. The strongest correlations concerned allele-specific Abs directed against the same antigen. For the HLA-A locus, the highest values of Spearman's ρ reflected broad specificity. For the HLA-B locus, graphs defined the HLA-Bw4 public epitope, and correlations between HLA-A and -B alleles were only present for beads with the same Bw4 public epitope. For the HLA-C locus, we identified two groups that differed with regard to their KIR ligand subclassification. Lastly, the HLA-DRB1 subgroups were part of a network. In the epitope analysis, Spearman's ρ was related to the number of matched epitopes within pairs of alleles. The combination of Spearman's ρ with simple, undirected graphing constitutes an effective tool for understanding routinely encountered cross-reactivity profiles. Based on this model, we have implemented an online data visualization tool available at http://cusureau.pythonanywhere.com/.",2021-03-02 +31589313,WASPS: web-assisted symbolic plasmid synteny server.,"

Motivation

Comparative plasmid genome analyses require complex tools, the manipulation of large numbers of sequences and constitute a daunting task for the wet bench experimentalist. Dedicated plasmid databases are sparse, only comprise bacterial plasmids and provide exclusively access to sequence similarity searches.

Results

We have developed Web-Assisted Symbolic Plasmid Synteny (WASPS), a web service granting protein and DNA sequence similarity searches against a database comprising all completely sequenced natural plasmids from bacterial, archaeal and eukaryal origin. This database pre-calculates orthologous protein clustering and enables WASPS to generate fully resolved plasmid synteny maps in real time using internal and user-provided DNA sequences.

Availability and implementation

WASPS queries befit all current browsers such as Firefox, Edge or Safari while the best functionality is achieved with Chrome. Internet Explorer is not supported. WASPS is freely accessible at https://archaea.i2bc.paris-saclay.fr/wasps/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +33468588,A First Look at the Essential Genes of Pseudomonas protegens. ,"Transposon insertion sequencing is a useful tool to identify the genes that are essential for a bacterial species to grow and divide effectively. In this issue of Journal of Bacteriology, Fabian et al. present the first set of transposon insertion sequencing data highlighting the genes essential to the plant-commensal species Pseudomonas protegens strain Pf-5 and describe comparative analyses with other pseudomonads (B. K. Fabian, C. Foster, A. J. Asher, L. D. Elbourne, et al., J Bacteriol 203:e00432-20, 2021, https://doi.org/10.1128/JB.00432-20).",2021-03-08 +34643443,"Low and High Ambient Temperatures during Pregnancy and Birth Weight among 624,940 Singleton Term Births in Israel (2010-2014): An Investigation of Potential Windows of Susceptibility.","

Background

Exposure to heat during pregnancy has been associated with reduced fetal growth. Less is known about associations with cold and the potential for critical time windows of exposure.

Objectives

We aimed to evaluate, in a national retrospective cohort, critical windows of susceptibility during pregnancy to extreme temperatures (low and high) and fetal growth, among 624,940 singleton term births in Israel during the period 2010-2014.

Methods

Temperature exposures were estimated using a spatially refined gridded climate data set with a 1-h and 1-km2 resolution. Percentiles of temperature were categorized by climatic zone for the entire pregnancy and by trimesters and weeks. Generalized additive models with the distributed lag nonlinear model framework were used to estimate unadjusted and adjusted associations between percentiles and categories of temperature and fetal growth markers: term [births after 36 weeks of gestational age (GA)] mean birth weight and term low birth weight (tLBW, term infants with birth weight below 2,500g).

Results

After adjustment, extreme temperatures (percentiles) during the entire pregnancy were associated with a lower mean birth weight {≤10th vs. 41st-50th percentile: -56g [95% confidence interval (CI): -63g, -50g)]; >90th vs. 41st-50th percentile: -65g; 95% CI: -72g, -58g}. Similar inverse U-shaped patterns were observed for all trimesters, with stronger associations for heat than for cold and for exposures during the third trimester. For heat, results suggest critical windows between 3-9 and 19-34 GA-weeks, with the strongest association estimated at 3 GA-weeks (temperature >90th vs. 41st-50th percentiles: -3.8g; 95% CI: -7.1g, -0.4g). For cold, there was a consistent trend of null associations early in pregnancy and stronger inverse associations over time, with the strongest association at 36 GA-week (≤10th vs. 41st-50th percentiles: -2.9g; 95% CI: -6.5g, 0.7g). For tLBW, U-shape patterns were estimated for the entire pregnancy and third trimester exposures, as well as nonsignificant associations with heat for 29-36 GA-weeks. Generally, the patterns of associations with temperatures during the entire pregnancy were consistent when stratified by urbanicity and geocoding hierarchy, when estimated for daily minimum and maximum temperatures, when exposures were classified based on temperature distributions in 49 natural regions, and when estimated for all live births.

Discussion

Findings from our study of term live births in Israel (2010-2014) suggest that exposure to extreme temperatures, especially heat, during specific time windows may result in reduced fetal growth. https://doi.org/10.1289/EHP8117.",2021-10-13 +31593245,BCSExplorer: a customized biosynthetic chemical space explorer with multifunctional objective function analysis.,"

Summary

The biosynthetic ability of living organisms has important applications in producing bulk chemicals, biofuels and natural products. Based on the most comprehensive biosynthesis knowledgebase, a computational system, BCSExplorer, is proposed to discover the unexplored chemical space using nature's biosynthetic potential. BCSExplorer first integrates the most comprehensive biosynthetic reaction database with 280 000 biochemical reactions and 60 000 chemicals biosynthesized globally over the past 130 years. Second, in this study, a biosynthesis tree is computed for a starting chemical molecule based on a comprehensive biotransformation rule library covering almost all biosynthetic possibilities, in which redundant rules are removed using a new algorithm. Moreover, biosynthesis feasibility, drug-likeness and toxicity analysis of a new generation of compounds will be pursued in further studies to meet various needs. BCSExplorer represents a novel method to explore biosynthetically available chemical space.

Availability and implementation

BCSExplorer is available at: http://www.rxnfinder.org/bcsexplorer/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +31793066,Practical recommendations of the EAU-ESPU guidelines committee for monosymptomatic enuresis-Bedwetting.,"

Background and aims

The objective of this update of the EAU-ESPU guidelines recommendations for nocturnal enuresis was to review the recent published literature of studies, reviews, guidelines regarding the etiology, diagnosis and treatment options of nocturnal enuresis and transform the information into a practical recommendation strategy for the general practitioner, pediatrician, pediatric urologist and urologist.

Material and methods

Since 2012 a monthly literature search using Scopus® was performed and the relevant literature was reviewed and prospectively registered on the European Urology bedwetting enuresis resource center (http://bedwetting.europeanurology.com/). In addition, guideline papers and statements of the European Society for Paediatric Urology (ESPU), the European Association of Urology (EAU), the National Institute for Health and Care Excellence (NICE) and the International Children Continence Society (ICCS) were used to update the knowledge and evidence resulting in this practical recommendation strategy. Recommendations have been discussed and agreed within the working group of the EAU-ESPU guidelines committee members.

Results

The recommendations focus to place the child and his family in a control position. Pragmatic analysis is made of the bedwetting problem by collecting voiding and drinking habits during the day, measuring nighttime urine production and identification of possible risk factors such as high-volume evening drinking, nighttime overactive bladder, behavioral or psychological problems or sleep disordered breathing. A questionnaire will help to identify those risk factors.

Conclusion

Motivation of the child is important for success. Continuous involvement of the child and the family in the treatment will improve treatment compliance, success and patient satisfaction.",2019-12-02 +32016318,"Variation benchmark datasets: update, criteria, quality and applications. ","Development of new computational methods and testing their performance has to be carried out using experimental data. Only in comparison to existing knowledge can method performance be assessed. For that purpose, benchmark datasets with known and verified outcome are needed. High-quality benchmark datasets are valuable and may be difficult, laborious and time consuming to generate. VariBench and VariSNP are the two existing databases for sharing variation benchmark datasets used mainly for variation interpretation. They have been used for training and benchmarking predictors for various types of variations and their effects. VariBench was updated with 419 new datasets from 109 papers containing altogether 329 014 152 variants; however, there is plenty of redundancy between the datasets. VariBench is freely available at http://structure.bmc.lu.se/VariBench/. The contents of the datasets vary depending on information in the original source. The available datasets have been categorized into 20 groups and subgroups. There are datasets for insertions and deletions, substitutions in coding and non-coding region, structure mapped, synonymous and benign variants. Effect-specific datasets include DNA regulatory elements, RNA splicing, and protein property for aggregation, binding free energy, disorder and stability. Then there are several datasets for molecule-specific and disease-specific applications, as well as one dataset for variation phenotype effects. Variants are often described at three molecular levels (DNA, RNA and protein) and sometimes also at the protein structural level including relevant cross references and variant descriptions. The updated VariBench facilitates development and testing of new methods and comparison of obtained performances to previously published methods. We compared the performance of the pathogenicity/tolerance predictor PON-P2 to several benchmark studies, and show that such comparisons are feasible and useful, however, there may be limitations due to lack of provided details and shared data. Database URL: http://structure.bmc.lu.se/VariBench.",2020-01-01 +34645500,Periappendiceal fat-stranding models for discriminating between complicated and uncomplicated acute appendicitis: a diagnostic and validation study.,"

Background

Recent studies have reported promising outcomes of non-operative treatment for uncomplicated appendicitis; however, the preoperative prediction of complicated appendicitis is challenging. We developed models by incorporating fat stranding (FS), which is commonly observed in perforated appendicitis.

Material and methods

We reviewed the data of 402 consecutive patients with confirmed acute appendicitis from our prospective registry. Multivariate logistic regression was performed to select clinical and radiographic factors predicting complicated acute appendicitis in our model 1 (involving backward elimination) and model 2 (involving stepwise selection). We compared c statistics among scoring systems developed by Bröker et al. (in J Surg Res 176(1):79-83. https://doi.org/10.1016/j.jss.2011.09.049 , 2012), Imaoka et al. (in World J Emerg Surg 11(1):1-5, 2016), Khan et al. (in Cureus. https://doi.org/1010.7759/cureus.4765 , 2019), Kim et al. (in Ann Coloproctol 31(5):192, 2015), Kang et al. (in Medicine 98(23): e15768, 2019), Atema et al. (in Br J Surg 102(8):979-990. https://doi.org/10.1002/bjs.9835 , 2015), Avanesov et al. (in Eur Radiol 28(9):3601-3610, 2018), and Kim et al. (in Abdom Radiol 46:1-12, 2020). Finally, we examined our models by performing the integrated discrimination improvement (IDI) test.

Results

Among enrolled patients, 64 (15.9%) had complicated acute appendicitis. We developed new 10-point scoring models by including the following variables: C-reactive protein, neutrophil to lymphocyte ratio, and computed tomography features of FS, ascites, and appendicolith. A cutoff score of ≥ 6 exhibited a high sensitivity of 82.8% and a specificity of 82.8% for model 1 and 81.3% and 82.3% for model 2, respectively, with c statistics of 0.878 (model 1) and 0.879 (model 2). Compared with the model developed by Bröker et al. which included C-reactive protein and the abdominal pain duration (c statistic: 0.778), the models developed by Atema et al. (c statistic: 0.826, IDI: 5.92%, P = 0.0248), H.Y Kim et al. (c statistics: 0.838, IDI: 13.82%, P = 0.0248), and our two models (IDI: 18.29%, P < 0.0001) demonstrated a significantly higher diagnostic accuracy.

Conclusion

Our models and the scoring systems developed by Atema et al. and Kim et al. were validated to have a high diagnostic accuracy; moreover, our models included the lowest number of variables.",2021-10-13 +32599617,DeepVF: a deep learning-based hybrid framework for identifying virulence factors using the stacking strategy. ,"Virulence factors (VFs) enable pathogens to infect their hosts. A wealth of individual, disease-focused studies has identified a wide variety of VFs, and the growing mass of bacterial genome sequence data provides an opportunity for computational methods aimed at predicting VFs. Despite their attractive advantages and performance improvements, the existing methods have some limitations and drawbacks. Firstly, as the characteristics and mechanisms of VFs are continually evolving with the emergence of antibiotic resistance, it is more and more difficult to identify novel VFs using existing tools that were previously developed based on the outdated data sets; secondly, few systematic feature engineering efforts have been made to examine the utility of different types of features for model performances, as the majority of tools only focused on extracting very few types of features. By addressing the aforementioned issues, the accuracy of VF predictors can likely be significantly improved. This, in turn, would be particularly useful in the context of genome wide predictions of VFs. In this work, we present a deep learning (DL)-based hybrid framework (termed DeepVF) that is utilizing the stacking strategy to achieve more accurate identification of VFs. Using an enlarged, up-to-date dataset, DeepVF comprehensively explores a wide range of heterogeneous features with popular machine learning algorithms. Specifically, four classical algorithms, including random forest, support vector machines, extreme gradient boosting and multilayer perceptron, and three DL algorithms, including convolutional neural networks, long short-term memory networks and deep neural networks are employed to train 62 baseline models using these features. In order to integrate their individual strengths, DeepVF effectively combines these baseline models to construct the final meta model using the stacking strategy. Extensive benchmarking experiments demonstrate the effectiveness of DeepVF: it achieves a more accurate and stable performance compared with baseline models on the benchmark dataset and clearly outperforms state-of-the-art VF predictors on the independent test. Using the proposed hybrid ensemble model, a user-friendly online predictor of DeepVF (http://deepvf.erc.monash.edu/) is implemented. Furthermore, its utility, from the user's viewpoint, is compared with that of existing toolkits. We believe that DeepVF will be exploited as a useful tool for screening and identifying potential VFs from protein-coding gene sequences in bacterial genomes.",2021-05-01 +27903890,Pharos: Collating protein information to shed light on the druggable genome.,"The 'druggable genome' encompasses several protein families, but only a subset of targets within them have attracted significant research attention and thus have information about them publicly available. The Illuminating the Druggable Genome (IDG) program was initiated in 2014, has the goal of developing experimental techniques and a Knowledge Management Center (KMC) that would collect and organize information about protein targets from four families, representing the most common druggable targets with an emphasis on understudied proteins. Here, we describe two resources developed by the KMC: the Target Central Resource Database (TCRD) which collates many heterogeneous gene/protein datasets and Pharos (https://pharos.nih.gov), a multimodal web interface that presents the data from TCRD. We briefly describe the types and sources of data considered by the KMC and then highlight features of the Pharos interface designed to enable intuitive access to the IDG knowledgebase. The aim of Pharos is to encourage 'serendipitous browsing', whereby related, relevant information is made easily discoverable. We conclude by describing two use cases that highlight the utility of Pharos and TCRD.",2016-11-29 +31482727,Drug-drug interaction prediction using PASS.,"Simultaneous use of the drugs may lead to undesirable Drug-Drug Interactions (DDIs) in the human body. Many DDIs are associated with changes in drug metabolism that performed by Drug-Metabolizing Enzymes (DMEs). In this case, DDI manifests itself as a result of the effect of one drug on the biotransformation of other drug(s), its slowing down (in the case of inhibiting DME) or acceleration (in case of induction of DME), which leads to a change in the pharmacological effect of the drugs combination. We used OpeRational ClassificAtion (ORCA) system for categorizing DDIs. ORCA divides DDIs into five classes: contraindicated (class 1), provisionally contraindicated (class 2), conditional (class 3), minimal risk (class 4), no interaction (class 5). We collected a training set consisting of several thousands of drug pairs. Algorithm of PASS program was used for the first, second and third classes DDI prediction. Chemical descriptors called PoSMNA (Pairs of Substances Multilevel Neighbourhoods of Atoms) were developed and implemented in PASS software to describe in a machine-readable format drug substances pairs instead of the single molecules. The average accuracy of DDI class prediction is about 0.84. A freely available web resource for DDI prediction was developed (http://way2drug.com/ddi/).",2019-09-04 +33663847,Predicting sperm production of young dairy bulls using collection history and management factors.,"Selection of elite young dairy bulls by using genomic data shortened the generation interval and increased pressure to collect and market germplasm at an early age. The objectives of this study were (1) develop prediction models for daily, weekly, and monthly total sperm (TSp) production from collection history, health status, and management factors, and (2) assess the ability of these models to forecast future TSp production, as well as differences in prediction accuracy by seasonality or age of bull. Data consisted of 43,918 daily processing records from 1,037 Holstein and Jersey bulls between 10 and 28 mo of age at collection. Potential explanatory variables included year and season of collection, barn location, collection frequency, breed, scrotal circumference, TSp in previous months, health events, and age at arrival, first collection, and current collection. Linear regression, random forest (RF), Bayesian regularized neural network, model tree, multilayer perceptron neural network with multiple layers, and extreme learning machine were used to predict daily, weekly, and monthly TSp (R v3.5.1, https://www.r-project.org/). In the additive approach, all prior data were used for training; however, in the fixed-window approach, records from 3 previous months were used for age-based prediction, records from 4 previous months or 1 yr were used for the monthly date-based analyses, and records from 1 previous month or year were used for the weekly date-based analyses. Model performance was measured by root mean squared error (RMSE) and the correlation (r) between actual and predicted TSp in testing sets. In monthly analyses, RF with additive training performed best in age-based (RMSE = 13.6 billion cells, r = 0.93) and date-based (RMSE = 11.9, r = 0.94) prediction, compared with linear regression (age-based RMSE = 16.6, r = 0.89; date-based RMSE = 15.5, r = 0.90) and Bayesian regularized neural network (age-based RMSE = 14.1, r = 0.92). On average, RMSE was 0.93 or 0.14 billion cells greater with fixed 4-mo or 1-yr training windows, respectively, than in the additive analyses. The most important management variables affecting TSp were collection frequency, TSp in previous months, and age at collection. Results indicate RF models with additive training can predict TSp output of individual bulls with ≥85% accuracy up to 4 mo into the future. Spikes in accuracy were associated with sire summary times and company processing changes, and accuracy tended to stabilize when bulls reached 19 to 20 mo of age.",2021-03-02 +33712023,A prognostic model for hepatocellular carcinoma based on apoptosis-related genes.,"

Background

Dysregulation of the balance between proliferation and apoptosis is the basis for human hepatocarcinogenesis. In many malignant tumors, such as hepatocellular carcinoma (HCC), there is a correlation between apoptotic dysregulation and poor prognosis. However, the prognostic values of apoptosis-related genes (ARGs) in HCC have not been elucidated.

Methods

To screen for differentially expressed ARGs, the expression levels of 161 ARGs from The Cancer Genome Atlas (TCGA) database ( https://cancergenome.nih.gov/ ) were analyzed. Gene Ontology (GO) enrichment and the Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway analyses were performed to evaluate the underlying molecular mechanisms of differentially expressed ARGs in HCC. The prognostic values of ARGs were established using Cox regression, and subsequently, a prognostic risk model for scoring patients was developed. Kaplan-Meier (K-M) and receiver operating characteristic (ROC) curves were plotted to determine the prognostic value of the model.

Results

Compared with normal tissues, 43 highly upregulated and 8 downregulated ARGs in HCC tissues were screened. GO analysis results revealed that these 51 genes are indeed related to the apoptosis function. KEGG analysis revealed that these 51 genes were correlated with MAPK, P53, TNF, and PI3K-AKT signaling pathways, while Cox regression revealed that 5 ARGs (PPP2R5B, SQSTM1, TOP2A, BMF, and LGALS3) were associated with prognosis and were, therefore, obtained to develop the prognostic model. Based on the median risk scores, patients were categorized into high-risk and low-risk groups. Patients in the low-risk groups exhibited significantly elevated 2-year or 5-year survival probabilities (p < 0.0001). The risk model had a better clinical potency than the other clinical characteristics, with the area under the ROC curve (AUC = 0.741). The prognosis of HCC patients was established from a plotted nomogram.

Conclusion

Based on the differential expression of ARGs, we established a novel risk model for predicting HCC prognosis. This model can also be used to inform the individualized treatment of HCC patients.",2021-03-12 +34494856,"Ozone Exposure, Outdoor Physical Activity, and Incident Type 2 Diabetes in the SALSA Cohort of Older Mexican Americans.","

Background

Type 2 diabetes is a leading contributor to the global burden of morbidity and mortality. Ozone (O3) exposure has previously been linked to diabetes.

Objective

We studied the impact of O3 exposure on incident diabetes risk in elderly Mexican Americans and investigated whether outdoor physical activity modifies the association.

Methods

We selected 1,090 Mexican American participants from the Sacramento Area Latino Study on Aging conducted from 1998 to 2007. Ambient O3 exposure levels were modeled with a land-use regression built with saturation monitoring data collected at 49 sites across the Sacramento metropolitan area. Using Cox proportional hazard models, we estimated the risk of developing incident diabetes based on average O3 exposure modeled for 5-y prior to incident diabetes diagnosis or last follow-up. Further, we estimated outdoor leisure-time physical activity at baseline and investigated whether higher vs. lower levels modified the association between O3 exposure and diabetes.

Results

In total, 186 incident diabetes cases were identified during 10-y follow-up. Higher levels of physical activity were negatively associated with incident diabetes [hazard ratio (HR)=0.64 (95% CI: 0.43, 0.95)]. The estimated HRs for incident diabetes was 1.13 (95% CI: 1.00, 1.28) per 10-ppb increment of 5-y average O3 exposure; also, this association was stronger among those physically active outdoors [HR=1.52 (95% CI: 1.21, 1.90)], and close to null for those reporting lower levels of outdoor activity [HR=1.04 (95% CI: 0.90, 1.20), pinteraction=0.01].

Conclusions

Our findings suggest that ambient O3 exposure contributes to the development of type 2 diabetes, particularly among those with higher levels of leisure-time outdoor physical activity. Policies and strategies are needed to reduce O3 exposure to guarantee that the health benefits of physical activity are not diminished by higher levels of O3 pollution in susceptible populations such as older Hispanics. https://doi.org/10.1289/EHP8620.",2021-09-08 +34009014,DNA Methylation in Babies Born to Nonsmoking Mothers Exposed to Secondhand Smoke during Pregnancy: An Epigenome-Wide Association Study.,"

Background

Maternal smoking during pregnancy is related to altered DNA methylation in infant umbilical cord blood. The extent to which low levels of smoke exposure among nonsmoking pregnant women relates to offspring DNA methylation is unknown.

Objective

This study sought to evaluate relationships between maternal prenatal plasma cotinine levels and DNA methylation in umbilical cord blood in newborns using the Infinium HumanMethylation 450K BeadChip.

Methods

Participants from the Newborn Epigenetics Study cohort who reported not smoking during pregnancy had verified low levels of cotinine from maternal prenatal plasma (0 ng/mL to <4 ng/mL), and offspring epigenetic data from umbilical cord blood were included in this study (n=79). Multivariable linear regression models were fit to the data, controlling for cell proportions, age, race, education, and parity. Estimates represent changes in response to any 1-ng/mL unit increase in exposure.

Results

Multivariable linear regression models yielded 29,049 CpGs that were differentially methylated in relation to increases in cotinine at a 5% false discovery rate. Top CpGs were within or near genes involved in neuronal functioning (PRKG1, DLGAP2, BSG), carcinogenesis (FHIT, HSPC157) and inflammation (AGER). Kyoto Encyclopedia of Genes and Genomes (KEGG) analyses suggest cotinine was related to methylation of gene pathways controlling neuronal signaling, metabolic regulation, cell signaling and regulation, and cancer. Further, enhancers associated with transcription start sites were enriched in altered CpGs. Using an independent sample from the same study population (n=115), bisulfite pyrosequencing was performed with infant cord blood DNA for two genes within our top 20 hits (AGER and PRKG1). Results from pyrosequencing replicated epigenome results for PRKG1 (cg17079497, estimate=-1.09, standard error (SE)=0.45, p=0.018) but not for AGER (cg09199225; estimate=-0.16, SE=0.21, p=0.44).

Discussion

Secondhand smoke exposure among nonsmoking women may alter DNA methylation in regions involved in development, carcinogenesis, and neuronal functioning. These novel findings suggest that even low levels of smoke exposure during pregnancy may be sufficient to alter DNA methylation in distinct sites of mixed umbilical cord blood leukocytes in pathways that are known to be altered in cord blood from pregnant active smokers. https://doi.org/10.1289/EHP8099.",2021-05-19 +,A14 Estimating time since HIV infection using next-generation sequencing data: A unique tool to help understand HIV prevention among high-risk young women in Ukraine,"Abstract The transitions study examines HIV risk among adolescent girls and young women through their sexual life course from first sex, to past and current engagement in casual sex, transactional sex, and, for some, formal sex work (FSW). Understanding the timing of HIV infection and the circumstances around early infection in young females is critical to HIV prevention interventions. We inferred time since HIV infection using next-generation sequencing (NGS) of the HIV pol gene isolated from cross-sectional samples among high-risk young women in Dnipro, Ukraine. Dried blood spots were collected on Whatman 903 cards from young women aged 14–24 engaged in casual sex (n = 894), transactional sex (n = 464), and FSW (n = 452). The HIV pol gene was sequenced using an in-house NGS HIV drug resistance mutation genotyping assay. Time since HIV infection was inferred using an online tool as described by Puller et al. (2017) freely available at https://hiv.biozentrum.unibas.ch/ETI/. The mean estimated time since HIV infection (ETI) for participants engaged in casual sex, transactional sex, and FSW is 1.98, 1.84, and 3.01 years, respectively. ETI was used to determine the duration of HIV infection for each participant and compared to the number of sexually active years prior to FSW. Among FSW, 61 per cent of participants were infected with HIV prior to entry into sex work. In general, ETI from NGS data suggests that FSWs were infected with HIV before entry into FSW. Expansion of targeted prevention programs beyond FSW could play an important role in mitigating HIV transmission at the population level.",2019-08-01 +33026997,A Testing Environment for Continuous Colormaps.,"Many computer science disciplines (e.g., combinatorial optimization, natural language processing, and information retrieval) use standard or established test suites for evaluating algorithms. In visualization, similar approaches have been adopted in some areas (e.g., volume visualization), while user testimonies and empirical studies have been the dominant means of evaluation in most other areas, such as designing colormaps. In this paper, we propose to establish a test suite for evaluating the design of colormaps. With such a suite, the users can observe the effects when different continuous colormaps are applied to planar scalar fields that may exhibit various characteristic features, such as jumps, local extrema, ridge or valley lines, different distributions of scalar values, different gradients, different signal frequencies, different levels of noise, and so on. The suite also includes an expansible collection of real-world data sets including the most popular data for colormap testing in the visualization literature. The test suite has been integrated into a web-based application for creating continuous colormaps (https://ccctool.com/), facilitating close inter-operation between design and evaluation processes. This new facility complements traditional evaluation methods such as user testimonies and empirical studies.",2021-01-28 +34186186,"In silico modelling of acute toxicity of 1, 2, 4-triazole antifungal agents towards zebrafish (Danio rerio) embryos: Application of the Small Dataset Modeller tool.","Nowadays, there is a widespread use of triazole antifungal agents to kill broad classes of fungi in farming lands and to protect herbs, fruits and grains. These agents further deposit into the aquatic systems causing toxicity to the living aquatic creatures, which can then affect human beings. Considering this issue, risk assessment of these toxic chemicals is a very essential task. Due to the inadequate experimental data on acute toxicity of antifungal agents containing the 1, 2, 4-triazole ring, higher testing costs along with the regulatory restrictions and the international regulations to lessen animal testing emphasize on in silico techniques such as quantitative structure-activity relationship (QSAR) studies. The application of QSAR modelling has created an easier avenue to predict activity/property/toxicity of newly synthesized compounds. In the present study, we have used 23 antifungal agents containing the 1, 2, 4-triazole ring to develop 2D-QSAR models and explored their structural attributes crucial for acute toxicity towards embryonic phase of zebrafish (Danio rerio). Here, we have employed simple 2D descriptors to develop the QSAR models. The models were evolved by executing the Small Dataset Modeller tool (https://dtclab.webs.com/software-tools), and the validation of the models was achieved by employing different precise validation principles. The statistical validation metrics confirm that built models are robust, useful and well predictive to forecast the acute toxicity of new compounds.",2021-06-26 +33976872,Analysis of tRNA Cys processing under salt stress in Bacillus subtilis spore outgrowth using RNA sequencing data.,"Background: In spore-forming bacteria, the molecular mechanisms of accumulation of transfer RNA (tRNA) during sporulation must be a priority as tRNAs play an essential role in protein synthesis during spore germination and outgrowth. However, tRNA processing has not been extensively studied in these conditions, and knowledge of these mechanisms is important to understand long-term stress survival.    Methods:To gain further insight into tRNA processing during spore germination and outgrowth, the expression of the single copy tRNA Cys gene was analyzed in the presence and absence of 1.2 M NaCl in Bacillus subtilis using RNA-Seq data obtained from the Gene Expression Omnibus (GEO) database. The CLC Genomics work bench 12.0.2 (CLC Bio, Aarhus, Denmark, https://www.qiagenbioinformatics.com/) was used to analyze reads from the tRNA Cys gene.  Results:The results show that spores store different populations of tRNA Cys-related molecules.  One such population, representing 60% of total tRNA Cys, was composed of tRNA Cys fragments.  Half of these fragments (3´-tRF) possessed CC, CCA or incorrect additions at the 3´end. tRNA Cys with correct CCA addition at the 3´end represented 23% of total tRNA Cys, while with CC addition represented 9% of the total and with incorrect addition represented 7%. While an accumulation of tRNA Cys precursors was induced by upregulation of the rrnD operon under the control of  σ A -dependent promoters under both conditions investigated, salt stress produced only a modest effect on tRNA Cys expression and the accumulation of tRNA Cys related species. Conclusions:The results demonstrate that tRNA Cys molecules resident in spores undergo dynamic processing to produce functional molecules that may play an essential role during protein synthesis.",2020-06-03 +32647500,MicroRNA-377 Alleviates Myocardial Injury Induced by Hypoxia/Reoxygenation via Downregulating LILRB2 Expression.,"

Background

miR-377 is closely related to myocardial regeneration. miR-377-adjusted mesenchymal stem cells abducted ischemic cardiac angiogenesis. Nevertheless, there were rarely reports about the impact of miR-377 on myocardial ischemia injury. The purpose of this work is that whether miR-377 can protect against myocardial injury caused by hypoxia/reoxygenation (H/R).

Methods

Gene expression omnibus database (http://www.ncbi.nlm.nih.gov/geo/; no. GSE53211) was utilized to study the differential expression of miR-377 in patients with an acute ST-segment elevation myocardial infarction and healthy controls. The luciferase activity was determined utilizing the dual-luciferase reporter system. Quantitative real-time polymerase chain reaction and Western blotting were used to measure the messenger RNA and protein level.

Results

Low expression of miR-377 and high expression of leukocyte immunoglobulin-like receptor B2 (LILRB2) were identified in patients with myocardial infarction from analyzing the Gene Expression Omnibus data set. Besides, miR-377 expression was downregulated in cardiomyocyte exposed to H/R. Additionally, overexpression of miR-377 could visibly improve cardiomyocyte injury by regulating cell activity and apoptosis.

Conclusions

In short, our findings suggested that miR-377/LILRB2 might regard as a hopeful therapeutic target for myocardial ischemic.",2020-04-01 +33709443,LIMONADA: A database dedicated to the simulation of biological membranes.,"Cellular membranes are composed of a wide diversity of lipid species in varying proportions and these compositions are representative of the organism, cellular type and organelle to which they belong. Because models of these molecular systems simulated by MD steadily gain in size and complexity, they are increasingly representative of specific compositions and behaviors of biological membranes. Due to the number of lipid species involved, of force fields and topologies and because of the complexity of membrane objects that have been simulated, LIMONADA has been developed as an open database allowing to handle the various aspects of lipid membrane simulation. LIMONADA presents published membrane patches with their simulation files and the cellular membrane it models. Their compositions are then detailed based on the lipid identification from LIPID MAPS database plus the lipid topologies and the force field used. LIMONADA is freely accessible on the web at https://limonada.univ-reims.fr/.",2021-03-11 +33707181,Identification of Patients with CKD in Medical Databases: A Comparison of Different Algorithms.,"

Background and objectives

Despite CKD consensus definitions, epidemiologic studies use multiple different algorithms to identify CKD. We aimed to elucidate if this affects the patient characteristics and the estimated prevalence and prognosis of CKD by applying six different algorithms to identify CKD in population-based medical databases and compare the cohorts.

Design, setting, participants, & measurements

Patients with CKD in Northern Denmark (2009-2016) were identified using six different algorithms: five were laboratory based defined by (1) one measured outpatient eGFR <60 ml/min per 1.73 m2 (single test, n=103,435), (2) two such findings ≥90 days apart (Kidney Disease Improving Global Outcomes, n=84,688), (3) two such findings ≥90 days apart with no eGFR >60 ml/min per 1.73 m2 observed in-between (Kidney Disease Improving Global Outcomes, persistent, n=68,994), (4) two such findings ≥90 and <365 days apart (Kidney Disease Improving Global Outcomes, time limited, n=75,031), and (5) two eGFRs <60 ml/min per 1.73 m2 or two urine albumin-creatinine ratios >30 mg/g ≥90 days apart (Kidney Disease Improving Global Outcomes, eGFR/albuminuria, n=100,957). The sixth included patients identified by reported in- and outpatient hospital International Classification of Diseases diagnoses of CKD (hospital-diagnosed, n=27,947). For each cohort, we estimated baseline eGFR, CKD prevalence, and 1-year mortality using the Kaplan-Meier method.

Results

The five different laboratory-based algorithms resulted in large differences in the estimated prevalence of CKD from 4637-8327 per 100,000 population. In contrast, 1-year mortality varied only slightly (7%-9%). Baseline eGFR levels at diagnosis were comparable (53-56 ml/min per 1.73 m2), whereas median time since first recorded eGFR <60 ml/min per 1.73 m2 varied from 0 months (single-test) to 17 months (Kidney Disease Improving Global Outcomes, persistent). The hospital-diagnosed algorithm yielded markedly lower CKD prevalence (775 per 100,000 population), a lower baseline eGFR (47 ml/min per 1.73 m2), longer time since first eGFR <60 ml/min per 1.73 m2 (median 70 months), and much higher 1-year mortality (22%).

Conclusions

Population prevalence of CKD identified in medical databases greatly depends on the applied algorithm to define CKD. Despite these differences, laboratory-based algorithms produce cohorts with similar prognosis.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2021_03_11_CJN15691020_final.mp3.",2021-03-11 +33912980,Physical activity attenuates postprandial hyperglycaemia in homozygous TBC1D4 loss-of-function mutation carriers.,"

Aims/hypothesis

The common muscle-specific TBC1D4 p.Arg684Ter loss-of-function variant defines a subtype of non-autoimmune diabetes in Arctic populations. Homozygous carriers are characterised by elevated postprandial glucose and insulin levels. Because 3.8% of the Greenlandic population are homozygous carriers, it is important to explore possibilities for precision medicine. We aimed to investigate whether physical activity attenuates the effect of this variant on 2 h plasma glucose levels after an oral glucose load.

Methods

In a Greenlandic population cohort (n = 2655), 2 h plasma glucose levels were obtained after an OGTT, physical activity was estimated as physical activity energy expenditure and TBC1D4 genotype was determined. We performed TBC1D4-physical activity interaction analysis, applying a linear mixed model to correct for genetic admixture and relatedness.

Results

Physical activity was inversely associated with 2 h plasma glucose levels (β[main effect of physical activity] -0.0033 [mmol/l] / [kJ kg-1 day-1], p = 6.5 × 10-5), and significantly more so among homozygous carriers of the TBC1D4 risk variant compared with heterozygous carriers and non-carriers (β[interaction] -0.015 [mmol/l] / [kJ kg-1 day-1], p = 0.0085). The estimated effect size suggests that 1 h of vigorous physical activity per day (compared with resting) reduces 2 h plasma glucose levels by an additional ~0.7 mmol/l in homozygous carriers of the risk variant.

Conclusions/interpretation

Physical activity improves glucose homeostasis particularly in homozygous TBC1D4 risk variant carriers via a skeletal muscle TBC1 domain family member 4-independent pathway. This provides a rationale to implement physical activity as lifestyle precision medicine in Arctic populations.

Data repository

The Greenlandic Cardio-Metabochip data for the Inuit Health in Transition study has been deposited at the European Genome-phenome Archive ( https://www.ebi.ac.uk/ega/dacs/EGAC00001000736 ) under accession EGAD00010001428.",2021-04-29 +33984158,Cancer progression as a sequence of atavistic reversions.,"It has long been recognized that cancer onset and progression represent a type of reversion to an ancestral quasi-unicellular phenotype. This general concept has been refined into the atavistic model of cancer that attempts to provide a quantitative analysis and testable predictions based on genomic data. Over the past decade, support for the multicellular-to-unicellular reversion predicted by the atavism model has come from phylostratigraphy. Here, we propose that cancer onset and progression involve more than a one-off multicellular-to-unicellular reversion, and are better described as a series of reversionary transitions. We make new predictions based on the chronology of the unicellular-eukaryote-to-multicellular-eukaryote transition. We also make new predictions based on three other evolutionary transitions that occurred in our lineage: eukaryogenesis, oxidative phosphorylation and the transition to adaptive immunity. We propose several modifications to current phylostratigraphy to improve age resolution to test these predictions. Also see the video abstract here: https://youtu.be/3unEu5JYJrQ.",2021-05-13 +32812285,Aom2 S: A new web-based application for DNA/RNA tandem mass spectrometry data interpretation.,"

Rationale

The Analysis of Oligonucleotide Modifications from Mass Spectra (Aom2 S) was created to support the analysis of oligonucleotide mass spectra. This application complements the existing software tools by providing a comprehensive analysis of oligonucleotide fragments from high-resolution tandem mass spectrometry (HR-MS/MS) data in a flexible and user-friendly manner, directly accessible through a web browser without any need for installation.

Methods

MS measurements of aminoC6-DNA and inosine-RNA were performed using an LTQ Orbitrap FT-MS instrument. The obtained data were analyzed by our newly developed open-source package Aom2 S accessible from the ms.epfl.ch web page or directly at https://mstools.epfl.ch/am2s/ to demonstrate the various functionalities of this tool, notably the possibility to identify different product ions from a nucleotide sequence with any fixed/variable modification by matching theoretical isotopic patterns to any experimental mass spectra with similarity scores ranking.

Results

A detailed description of the Aom2 S tool with its user-friendly interface is exemplified using HR-MS/MS data of modified DNA and RNA oligonucleotides. Explanations of analysis parameters and tool workflow, as well as multiple options for viewing and exporting the results, are provided. Product ion assignment and modification localization can be achieved in seconds, and results can be exported as tables, matched mass spectra, and fragmentation maps.

Conclusions

A new open source tool (Aom2 S) for the analysis of HR-MS/MS data for modified DNA and RNA oligonucleotides is described. Aom2 S is fast, highly flexible, and versatile, allowing automatic precursor and product ion assignment in a comprehensive manner, including internal fragments and variable modification localization, with clear graphical representation of the results.",2020-12-01 +34491799,Listening in 2020: A Survey of Adults' Experiences With Pandemic-Related Disruptions.,"

Purpose

The COVID-19 pandemic has introduced lifestyle changes that may negatively impact communication, including the pervasive use of face masks and videoconferencing technology. Here, we examine the effects of age and self-rated hearing on subjective measures of speech understanding via a survey accessed by adults residing in the United States.

Method

Responses to an online survey were obtained from adults (21 years of age and older) during the summer and fall of 2020. The survey included questions about hearing and speech understanding in a variety of scenarios and different listening conditions, including when communicating with people using face masks in quiet and noisy environments and when using videoconferencing.

Results

Data from 1,703 surveys were analyzed. In general, the use of face masks led to the perception of poorer speech understanding and greater need for concentration, especially in noisy environments. When responses from all participants were considered, poorer self-rated communication ability was noted as age increased. However, among people who categorized their overall hearing as ""Excellent"" or ""Good,"" younger adults rated their speech understanding ability in noisy situations as poorer than middle-age or older adults. Among people who rated their overall hearing as ""Fair"" or ""Poor,"" middle-age adults indicated having more difficulty communicating with people using face masks, as compared with older adults. Examination of open-ended responses suggested that the strategies individuals use when communicating with people wearing face masks vary by age and self-rated hearing. Notably, middle-age and older adults were more likely to report using strategies that could put them at risk (e.g., asking others to remove their face masks).

Conclusions

Even younger adults with self-perceived good hearing are not immune to communication challenges brought about by face masks. Among individuals with similar degrees of self-rated hearing, the expected increase in communication difficulty with age was not noted among our respondents.

Supplemental material

https://doi.org/10.23641/asha.16528431.",2021-09-07 +31114916,WebGestalt 2019: gene set analysis toolkit with revamped UIs and APIs.,"WebGestalt is a popular tool for the interpretation of gene lists derived from large scale -omics studies. In the 2019 update, WebGestalt supports 12 organisms, 342 gene identifiers and 155 175 functional categories, as well as user-uploaded functional databases. To address the growing and unique need for phosphoproteomics data interpretation, we have implemented phosphosite set analysis to identify important kinases from phosphoproteomics data. We have completely redesigned result visualizations and user interfaces to improve user-friendliness and to provide multiple types of interactive and publication-ready figures. To facilitate comprehension of the enrichment results, we have implemented two methods to reduce redundancy between enriched gene sets. We introduced a web API for other applications to get data programmatically from the WebGestalt server or pass data to WebGestalt for analysis. We also wrapped the core computation into an R package called WebGestaltR for users to perform analysis locally or in third party workflows. WebGestalt can be freely accessed at http://www.webgestalt.org.",2019-07-01 +34344612,Recent advances in immunotherapy for hepatocellular carcinoma.,"

Background

Treatment of hepatocellular carcinoma (HCC) is challenging as most patients are diagnosed at advanced stage with underlying chronic liver conditions. Conventional systemic chemotherapy has failed in HCC, and the clinical efficacy of FDA-approved molecular targeted agents such as sorafenib and lenvatinib remains unsatisfactory.

Data sources

Literature search was conducted in PubMed for relevant articles published before January 2021. The search aimed to identify recent developments in immune-based treatment approaches for HCC. Information of clinical trials was obtained from https://clinicaltrials.gov/.

Results

Two immune checkpoint inhibitors (ICIs), nivolumab and pembrolizumab were approved as monotherapies, which has revolutionized HCC treatment. Besides, combination ICIs have also got accelerated FDA approval recently. Immune-based therapies have challenged targeted drugs owing to their safety, tolerability, and survival benefits. In addition to the significant success in ICIs, other immunotherapeutic strategies such as cancer vaccine, chimeric antigen receptor T-cells, natural killer cells, cytokines, and combination therapy, have also shown promising outcomes in clinical trials. Various diagnostic and prognostic biomarkers have been identified which can help in clinical decision making when starting treatment with ICIs.

Conclusions

Immunotherapy has emerged as one of the mainstream treatment modalities for advanced HCC in recent years. However, challenges such as low response rate and acquired resistance in previously respondent patients still exist. Further research is needed to understand the unique resistance mechanism to immunotherapy and to discover more predictive biomarkers to guide clinical decision making.",2021-07-24 +32347334,Retrograde intrarenal surgery of renal stones: a critical multi-aspect evaluation of the outcomes by the Turkish Academy of Urology Prospective Study Group (ACUP Study).,"

Aims

To outline and evaluate the incidence, management and follow-up of the residual fragments (RFs) following retrograde intrarenal surgery (RIRS) of renal stones by the Turkish Academy of Urology Prospective Study Group (ACUP Study).

Methods

Following the ethical committee approval, 15 centers providing data regarding the incidence, management, and follow-up of RFs after RIRS were included and all relevant information was recorded into the same electronic database program ( https://acup.uroturk.org.tr/ ) created by Turkish Urology Academy for Residual Stone Study.

Results

A total of 1112 cases underwent RIRS for renal calculi and RFs were observed in 276 cases (24.8%). Of all the parameters evaluated, our results demonstrated no statistically significant relation between preoperative DJ stenting and the presence of RFs (χ2 (1) = 158.418; p = 0.099). RFs were significantly higher in patients treated with UAS (82 patients, 29.3%) during the procedure compared to the cases who did not receive UAS (194 patients, 23.3%) (χ2 (1) = 3.999; p = 0.046). The mean period for a secondary intervention after RIRS was 28.39 (± 12.52) days. Regarding the procedures applied for RF removal, re-RIRS was the most commonly performed approach (56%).

Conclusions

Despite the reported safe and successful outcomes, the incidence of RFs is higher, after the RIRS procedure particularly in cases with relatively larger calculi. Such cases need to be followed in a close manner and although a second flexible ureteroscopy is the treatment of choice for fragment removal in the majority of these patients, shock wave lithotripsy and percutaneous nephrolithotomy may also be preferred in selected cases.",2020-04-28 +33692117,Deceased-Donor Acute Kidney Injury and BK Polyomavirus in Kidney Transplant Recipients.,"

Background and objectives

BK polyomavirus (BKV) infection commonly complicates kidney transplantation, contributing to morbidity and allograft failure. The virus is often donor-derived and influenced by ischemia-reperfusion processes and disruption of structural allograft integrity. We hypothesized that deceased-donor AKI associates with BKV infection in recipients.

Design, setting, participants, & measurements

We studied 1025 kidney recipients from 801 deceased donors transplanted between 2010 and 2013, at 13 academic centers. We fitted Cox proportional-hazards models for BKV DNAemia (detectable in recipient blood by clinical PCR testing) within 1 year post-transplantation, adjusting for donor AKI and other donor- and recipient-related factors. We validated findings from this prospective cohort with analyses for graft failure attributed to BKV within the Organ Procurement and Transplantation Network (OPTN) database.

Results

The multicenter cohort mean kidney donor profile index was 49±27%, and 26% of donors had AKI. Mean recipient age was 54±13 years, and 25% developed BKV DNAemia. Donor AKI was associated with lower risk for BKV DNAemia (adjusted hazard ratio, 0.53; 95% confidence interval, 0.36 to 0.79). In the OPTN database, 22,537 (25%) patients received donor AKI kidneys, and 272 (0.3%) developed graft failure from BKV. The adjusted hazard ratio for the outcome with donor AKI was 0.7 (95% confidence interval, 0.52 to 0.95).

Conclusions

In a well-characterized, multicenter cohort, contrary to our hypothesis, deceased-donor AKI independently associated with lower risk for BKV DNAemia. Within the OPTN database, donor AKI was also associated with lower risk for graft failure attributed to BKV.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2021_03_10_CJN18101120_final.mp3.",2021-03-10 +28968812,MetSigDis: a manually curated resource for the metabolic signatures of diseases.,"Complex diseases cannot be understood only on the basis of single gene, single mRNA transcript or single protein but the effect of their collaborations. The combination consequence in molecular level can be captured by the alterations of metabolites. With the rapidly developing of biomedical instruments and analytical platforms, a large number of metabolite signatures of complex diseases were identified and documented in the literature. Biologists' hardship in the face of this large amount of papers recorded metabolic signatures of experiments' results calls for an automated data repository. Therefore, we developed MetSigDis aiming to provide a comprehensive resource of metabolite alterations in various diseases. MetSigDis is freely available at http://www.bio-annotation.cn/MetSigDis/. By reviewing hundreds of publications, we collected 6849 curated relationships between 2420 metabolites and 129 diseases across eight species involving Homo sapiens and model organisms. All of these relationships were used in constructing a metabolite disease network (MDN). This network displayed scale-free characteristics according to the degree distribution (power-law distribution with R2 = 0.909), and the subnetwork of MDN for interesting diseases and their related metabolites can be visualized in the Web. The common alterations of metabolites reflect the metabolic similarity of diseases, which is measured using Jaccard index. We observed that metabolite-based similar diseases are inclined to share semantic associations of Disease Ontology. A human disease network was then built, where a node represents a disease, and an edge indicates similarity of pair-wise diseases. The network validated the observation that linked diseases based on metabolites should have more overlapped genes.",2019-01-01 +32240307,A Systematic Review of Collection and Analysis of Human Milk for Macronutrient Composition.,"

Background

As human milk (HM) composition varies by time and across even a single feed, methods of sample collection can significantly affect the results of compositional analyses and complicate comparisons between studies.

Objective

The aim was to compare the results obtained for HM macronutrient composition between studies utilizing different sampling methodologies. The results will be used as a basis to identify the most reliable HM sampling approach.

Methods

EMBASE, MEDLINE/PubMed, Cochrane Library, Scopus, Web of Science, and ProQuest databases were searched for relevant articles. Observational and interventional studies were included, and at least 2 authors screened studies and undertook data extraction. Quality assessment was conducted using the Newcastle-Ottawa scale and previously published pragmatic score.

Results

A total of 5301 publications were identified from our search, of which 101 studies were included (n = 5049 breastfeeding women). Methods used for HM collection were divided into 3 categories: collection of milk from all feeds over 24 h (32 studies, n = 1309 participants), collection at one time point (62 studies, n = 3432 participants), and ""other methods"" (7 studies, n = 308 participants). Fat and protein concentrations varied between collection methods within lactation stage, but there were no obvious differences in lactose concentrations. There was substantial variability between studies in other factors potentially impacting HM composition, including stage of lactation, gestational age, and analytical method, which complicated direct comparison of methods.

Conclusions

This review describes the first systematic evaluation of sampling methodologies used in studies reporting HM composition and highlights the wide range of collection methods applied in the field. This information provides an important basis for developing recommendations for best practices for HM collection for compositional analysis, which will ultimately allow combination of information from different studies and thus strengthen the body of evidence relating to contemporary HM composition. This trial was registered at PROSPERO as CRD42017072563, https://www.crd.york.ac.uk/prospero/display_record.php?ID=CRD42017072563.",2020-06-01 +33074067,Spatiotemporal dynamics of the COVID-19 pandemic in the arctic: early data and emerging trends.,"Since February 2020 the COVID-19 pandemic has been unfolding in the Arctic, placing many communities at risk due to remoteness, limited healthcare options, underlying health issues and other compounding factors. Preliminary analysis of available COVID-19 data in the Arctic at the regional (subnational) level suggests that COVID-19 infections and mortality were highly variable, but generally remained below respective national levels. Based on the trends and magnitude of the pandemic through July, we classify Arctic regions into four groups: Iceland, Faroe Islands, Northern Norway, and Northern Finland with elevated early incidence rates, but where strict quarantines and other measures promptly curtailed the pandemic; Northern Sweden and Alaska, where the initial wave of infections persisted amid weak (Sweden) or variable (Alaska) quarantine measures; Northern Russia characterised by the late start and subsequent steep growth of COVID-19 cases and fatalities and multiple outbreaks; and Northern Canada and Greenland with no significant proliferation of the pandemic. Despite limitations in available data, further efforts to track and analyse the pandemic at the pan-Arctic, regional and local scales are crucial. This includes understanding of the COVID-19 patterns, mortality and morbidity, the relationships with public-health conditions, socioeconomic characteristics, policies, and experiences of the Indigenous Peoples. Data used in this paper are available at https://arctic.uni.edu/arctic-covid-19.",2020-12-01 +29893885,EviNet: a web platform for network enrichment analysis with flexible definition of gene sets.,"The new web resource EviNet provides an easily run interface to network enrichment analysis for exploration of novel, experimentally defined gene sets. The major advantages of this analysis are (i) applicability to any genes found in the global network rather than only to those with pathway/ontology term annotations, (ii) ability to connect genes via different molecular mechanisms rather than within one high-throughput platform, and (iii) statistical power sufficient to detect enrichment of very small sets, down to individual genes. The users' gene sets are either defined prior to upload or derived interactively from an uploaded file by differential expression criteria. The pathways and networks used in the analysis can be chosen from the collection menu. The calculation is typically done within seconds or minutes and the stable URL is provided immediately. The results are presented in both visual (network graphs) and tabular formats using jQuery libraries. Uploaded data and analysis results are kept in separated project directories not accessible by other users. EviNet is available at https://www.evinet.org/.",2018-07-01 +34396393,DAMA-a method for computing multiple alignments of protein structures using local structure descriptors. ,"The well-known fact that protein structures are more conserved than their sequences forms the basis of several areas of computational structural biology. Methods based on the structure analysis provide more complete information on residue conservation in evolutionary processes. This is crucial for the determination of evolutionary relationships between proteins and for the identification of recurrent structural patterns present in biomolecules involved in similar functions. However, algorithmic structural alignment is much more difficult than multiple sequence alignment. This study is devoted to the development and applications of DAMA-a novel effective environment capable to compute and analyze multiple structure alignments. DAMA is based on local structural similarities, using local 3D structure descriptors and thus accounts for nearest-neighbor molecular environments of aligned residues. It is constrained neither by protein topology nor by its global structure. DAMA is an extension of our previous study (DEDAL) which demonstrated the applicability of local descriptors to pairwise alignment problems (Daniluk and Lesyng, 2011). Since the multiple alignment problem is NP-complete (Daniluk and Lesyng, 2014), an effective heuristic approach has been developed without imposing any artificial constraints. The alignment algorithm searches for the largest, consistent ensemble of similar descriptors. The new method is capable to capture most of the biologically significant similarities present in canonical test sets and is discriminatory enough to prevent the emergence of larger, but meaningless, solutions. Tests performed on the test sets, including protein kinases, demonstrate DAMA's capability of identifying equivalent residues, which should be very useful in discovering the biological nature of proteins similarity. Performance profiles show the advantage of DAMA over other methods, in particular when using a strict similarity measure QC, which is the ratio of correctly aligned columns, and when applying the methods to more difficult cases. DAMA is available online at http://dworkowa.imdik.pan.pl/EP/DAMA. Linux binaries of the software are available upon request. Supplementary data are available at Bioinformatics online.",2021-08-16 +33677478,Bali-Phy version 3: Model-based co-estimation of alignment and phylogeny. ,"We describe improvements to BAli-Phy, a Markov chain Monte Carlo (MCMC) program that jointly estimates phylogeny, alignment, and other parameters from unaligned sequence data. Version 3 is substantially faster for large trees, and implements covarion models, additional codon models, and other new models. It implements ancestral state reconstruction, allows prior selection for all model parameters, and can also analyze multiple genes simultaneously. Software is available for download at http://www.bali-phy.org. C++ source code is freely available on Github under the GPL2 License.",2021-03-02 +34348529,Genetic Evidence on the Association of Interleukin (IL)-1-Mediated Chronic Inflammation with Airflow Obstruction: A Mendelian Randomization Study.,"Preclinical studies suggest interleukin (IL)-1α/β is involved in the pathogenesis of chronic obstructive pulmonary disease (COPD). However, recent trials of anti-IL-1 therapies showed limited benefit for COPD. To clarify, we primarily examined total and direct effects of IL-1 and its receptors/coreceptors/receptor antagonists (IL-1/IL-1Rs) on airflow obstruction (AO) using Mendelian randomization (MR), and secondarily explored reverse causation using bidirectional MR. We selected independent cis protein quantitative trait loci (cis-pQTLs) as genetic instruments for IL-1/IL-1Rs from two proteomic genome-wide association studies (n = 11,594) of European ancestry (mean age ∼47 years). We applied those cis-pQTLs to the International COPD Genetics Consortium (n = 15,256 cases, 47,936 controls) of ∼81.9% European descent (∼57 years). No IL-1/IL-1Rs were significantly associated with AO after correction for multiple testing. However, a higher genetically predicted IL-1 receptor antagonist (IL-1Ra) was nominally associated with a 20% reduction in AO risk using univariable MR, with a larger direct effect (∼31%, i.e. not via IL-1α/β) using multivariable MR. Furthermore, higher total IL-18 binding protein (IL-18BP) was nominally associated with lower AO. Nominal total effects were also noted for higher IL-1α with lower AO and higher IL-1R1 with higher AO. Higher IL-1Ra and IL-18BP might have a role in preventing AO, but need to be contextualized.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.1955848 .",2021-08-04 +33789926,A Novel Three-Choice Touchscreen Task to Examine Spatial Attention and Orienting Responses in Rodents. ,"Mammalian orienting behavior consists of coordinated movements of the eyes, head, pinnae, vibrissae, or body to attend to an external stimulus. The present study aimed to develop a novel operant task using a touch-screen system to measure spatial attention. In this task, rats were trained to nose-poke a light stimulus presented in one of three locations. The stimulus was presented more frequently in the center location to develop spatial attention bias toward the center stimulus. Changes in orienting responses were detected by measuring the animals' response accuracy and latency to stimuli at the lateral locations, following reversible unilateral chemogenetic inactivation of the superior colliculus (SC). Additionally, spontaneous turning and rotation behavior was measured using an open-field test (OFT). Our results show that right SC inactivation significantly increased the whole body turn angle in the OFT, in line with previous literature that indicated an ipsiversive orientating bias and the presence of contralateral neglect following unilateral SC lesions. In the touch screen orienting task, unilateral SC inactivation significantly increased bias toward the ipsilateral side, as measured by response frequency in various experimental conditions, and a very large left-shift of a respective psychometric function. Our results demonstrate that this novel touchscreen task is able to detect changes in spatial attention and orienting responses because of e.g. experimental manipulations or injury with very high sensitivity, while taking advantage of the touch screen technology that allows for high transferability of the task between labs and for open-source data sharing through https://www.mousebytes.ca.",2021-07-09 +32926121,CHESPA/CHESCA-SPARKY: automated NMR data analysis plugins for SPARKY to map protein allostery.,"

Motivation

Correlated Nuclear Magnetic Resonance (NMR) chemical shift changes identified through the CHEmical Shift Projection Analysis (CHESPA) and CHEmical Shift Covariance Analysis (CHESCA) reveal pathways of allosteric transitions in biological macromolecules. To address the need for an automated platform that implements CHESPA and CHESCA and integrates them with other NMR analysis software packages, we introduce here integrated plugins for NMRFAM-SPARKY that implement the seamless detection and visualization of allosteric networks.

Availability and implementation

CHESCA-SPARKY and CHESPA-SPARKY are available in the latest version of NMRFAM-SPARKY from the National Magnetic Resonance Facility at Madison (http://pine.nmrfam.wisc.edu/download_packages.html), the NMRbox Project (https://nmrbox.org) and to subscribers to the SBGrid (https://sbgrid.org). The assigned spectra involved in this study and tutorial videos using this dataset are available at https://sites.google.com/view/chescachespa-sparky.

Supplementary information

Supplementary data are available at Bioinformatics Online.",2021-05-01 +34291724,"N-3 Long Chain Fatty Acids Supplementation, Fatty Acids Desaturase Activity, and Colorectal Cancer Risk: A Randomized Controlled Trial.","

Introduction

n-3 long-chain polyunsaturated fatty acids (LCPUFA) have anti-inflammatory effects and may reduce colorectal cancer risk. The purpose of this study was to evaluate the effects of n-3 LCPUFA supplementation on markers of rectal cell proliferation and apoptosis and examine how genetic variation in desaturase enzymes might modify this effect.

Methods

We conducted a randomized, double-blind, control six-month trial of 2.5 grams of n-3 LCPUFA per day compared to olive oil. Study participants had a history of colorectal adenomas. Randomization was stratified based on the gene variant rs174535 in the fatty acid desaturase 1 enzyme (FADS1). Our primary outcome was change in markers of rectal epithelial proliferation and apoptosis.

Results

A total of 141 subjects were randomized. We found no difference in apoptosis markers between participants randomized to n-3 LCPUFA compared to olive oil (P = 0.41). N-3 LCPUFA supplementation increased cell proliferation in the lower colonic crypt compared to olive oil (P = 0.03) however baseline indexes of proliferation were different between the groups at randomization. We found no evidence that genotype modified the effect.

Conclusions

Our study did not show evidence of a proliferative or pro-apoptotic effect on n-3 LCPUFA supplementation on rectal mucosa regardless of the FADS genotype.ClinicalTrials.gov Identifier: NCT01661764Supplemental data for this article is available online at https://dx.doi.org/10.1080/01635581.2021.1955286.",2021-07-22 +33719512,Information and Communication Technology in Schools: Its Contribution to Equitable Speech-Language Therapy Services in an Underserved Small Island Developing State.,"Purpose Access to speech-language therapy services for children with communication difficulties is limited in vulnerable countries within the Majority world, such as Small Island Developing States. The use of information and communication technology (ICT) has been identified as a possible solution to provide equitable access to services in Minority world countries. This study explored ICT-related conditions in remote schools of the Maldives, a Small Island Developing State, in order to identify potential service delivery approaches. Method A mixed methods approach was used, involving (a) an online survey of 107 teachers, (b) observational data from four schools, (c) interviews with 31 teachers and the four principals of participating schools, and (d) 13 relevant online documents. Content analysis was used to analyze and integrate data from all sources. Results Teachers' access to ICT devices and fixed broadband Internet varied across schools. The government had limited funds to provide adequate fixed broadband Internet for them. However, favorable prospects were also discovered, including high access to 4G mobile broadband Internet in islands, high levels of confidence among teachers to use ICT, a variety of ICT uses currently employed by teachers, and the presence of financial aid for students with special education needs. Conclusions The findings of the study support the potential for using mobile broadband Internet, available ICT devices, and teachers as agents of service delivery in remote schools to enhance speech-language therapy service delivery in the Maldives. The creation of relevant digital educational content for teachers could further support children with communication difficulties in the country. Supplemental Material https://doi.org/10.23641/asha.14143910.",2021-03-09 +34289789,Development and field testing of a neuro psychomotor multidimensional grid for the assessment of children with cns tumor.,"Central Nervous System (CNS) tumors are the most common pediatric solid tumor and development neuro psychomotor (DNPM) therapy can contribute to the rehabilitation of these children. This paper describes the development of a DNPM multidimensional assessment grid for children with CNS tumor (DNPM-CNS grid).The development process included 4 phases: (P1) literature review and grid development (Version 1.0), (P2) two rounds consultations with experts (Version 1.1 and 2.0), (P3) field testing, (P4) final revision (Version 3.0).(P1) The DNPM-CNS grid was developed based on previous tools and manuals and on clinical experience with this patient population. (P2) A total of 52 questionnaires were filled in by experts about relevance of assessment areas, pertinence, comprehensibility and feasibility of the grid. Average scores ranged from 7.6 to 10. (P3) At case level, good inter-rater agreement scores (78%) and limited non-evaluability rates (18%) emerged. At item level, 27% of items reached high disagreement and 26% high not-evaluability rates. The qualitative assessment was judged clinically useful for planning the neuro-oncology rehabilitation treatment and a good feasibility of the DNPM-CNS grid emerged both for preschool and school-age children. (P4) The final version of the grid consists of 8 assessment areas with 133 items.The DNPM-CNS grid is a comprehensive tool that can guide the overall DNPM assessment in a limited amount of time. It can be used as a screening tool to customize more specific assessments. Further research is needed to better characterize grid psychometric properties.Supplemental data for this article is available online at https://doi.org/10.1080/08880018.2021.1948648 .",2021-07-22 +32565674,PSI-MOUSE: Predicting Mouse Pseudouridine Sites From Sequence and Genome-Derived Features.,"Pseudouridine (Ψ) is the first discovered and the most prevalent posttranscriptional modification, which has been widely studied during the past decades. Pseudouridine was observed in almost all kinds of RNAs and shown to have important biological functions. Currently, the time-consuming and high-cost procedures of experimental approaches limit its uses in real-life Ψ site detection. Alternatively, by taking advantage of the explosive growth of Ψ sequencing data, the computational methods may provide a more cost-effective avenue. To date, the existing mouse Ψ site predictors were all developed based on sequence-derived features, and their performance can be further improved by adding the domain knowledge derived feature. Therefore, it is highly desirable to propose a genomic feature-based computational method to increase the accuracy and efficiency of the identification of Ψ RNA modification in the mouse transcriptome. In our study, a predictive framework PSI-MOUSE was built. Besides the conventional sequence-based features, PSI-MOUSE first introduced 38 additional genomic features derived from the mouse genome, which achieved a satisfactory improvement in the prediction performance, compared with other existing models. Moreover, PSI-MOUSE also features in automatically annotating the putative Ψ sites with diverse types of posttranscriptional regulations (RNA-binding protein [RBP]-binding regions, miRNA-RNA interactions, and splicing sites), which can serve as a useful research tool for the study of Ψ RNA modification in the mouse genome. Finally, 3282 experimentally validated mouse Ψ sites were also collected in a database with customized query functions. For the convenience of academic users, a website was built to provide a user-friendly interface for the query and analysis on the database. The website is freely accessible at www.xjtlu.edu.cn/biologicalsciences/psimouse and http://psimouse.rnamd.com. We introduced the genome-derived features to mouse for the first time, and we achieved a good performance in mouse Ψ site prediction. Compared with the existing state-of-art methods, our newly developed approach PSI-MOUSE obtained a substantial improvement in prediction accuracy, marking the reliable contributions of genomic features for the prediction of RNA modifications in a species other than human.",2020-06-09 +33590873,Converting disease maps into heavyweight ontologies: general methodology and application to Alzheimer's disease. ,"Omics technologies offer great promises for improving our understanding of diseases. The integration and interpretation of such data pose major challenges, calling for adequate knowledge models. Disease maps provide curated knowledge about disorders' pathophysiology at the molecular level adapted to omics measurements. However, the expressiveness of disease maps could be increased to help in avoiding ambiguities and misinterpretations and to reinforce their interoperability with other knowledge resources. Ontology is an adequate framework to overcome this limitation, through their axiomatic definitions and logical reasoning properties. We introduce the Disease Map Ontology (DMO), an ontological upper model based on systems biology terms. We then propose to apply DMO to Alzheimer's disease (AD). Specifically, we use it to drive the conversion of AlzPathway, a disease map devoted to AD, into a formal ontology: Alzheimer DMO. We demonstrate that it allows one to deal with issues related to redundancy, naming, consistency, process classification and pathway relationships. Furthermore, we show that it can store and manage multi-omics data. Finally, we expand the model using elements from other resources, such as clinical features contained in the AD Ontology, resulting in an enriched model called ADMO-plus. The current versions of DMO, ADMO and ADMO-plus are freely available at http://bioportal.bioontology.org/ontologies/ADMO.",2021-02-01 +30917137,An analysis and metric of reusable data licensing practices for biomedical resources.,"Data are the foundation of science, and there is an increasing focus on how data can be reused and enhanced to drive scientific discoveries. However, most seemingly ""open data"" do not provide legal permissions for reuse and redistribution. The inability to integrate and redistribute our collective data resources blocks innovation and stymies the creation of life-improving diagnostic and drug selection tools. To help the biomedical research and research support communities (e.g. libraries, funders, repositories, etc.) understand and navigate the data licensing landscape, the (Re)usable Data Project (RDP) (http://reusabledata.org) assesses the licensing characteristics of data resources and how licensing behaviors impact reuse. We have created a ruleset to determine the reusability of data resources and have applied it to 56 scientific data resources (e.g. databases) to date. The results show significant reuse and interoperability barriers. Inspired by game-changing projects like Creative Commons, the Wikipedia Foundation, and the Free Software movement, we hope to engage the scientific community in the discussion regarding the legal use and reuse of scientific data, including the balance of openness and how to create sustainable data resources in an increasingly competitive environment.",2019-03-27 +34288828,Peer victimization and relationships to approach and avoidance coping to health and health behaviors.,"Peer victimization during high school is a common experience associated with engagement in risky health behaviors and elevated depressive symptoms. Mechanisms linking peer victimization to health outcomes remain inadequately understood. In the current study, latent class analysis was used to identify latent subclasses of college students who display similar patterns of responses to frequent peer victimization experiences during high school. We also examined moderating and mediating effects of coping (approach/avoidance) on relationships between victimization class and health outcomes (i.e., binge drinking, current smoking, depressive symptoms). College students completed questionnaire measures of peer victimization, approach and avoidance coping, binge drinking, smoking, and depressive symptoms. Four distinct patterns of peer victimization were identified among college students (Low, High, Moderate, and Social/Verbal). Moderation models revealed significant interactions of moderate victimization x approach coping on depressive symptoms and high victimization x avoidance coping on binge drinking. Mediation models revealed a significant indirect effect of avoidance coping on depressive symptoms for those in the high victimization class. Findings provide a greater understanding of the complex patterns of peer victimization. Coping efforts among varying peer victimization classes had different relationships with health outcomes during the college years. Interventions aimed at reducing health-risk and depressive symptoms among college student might benefit from increased attention to high school victimization experiences and current coping processes.Supplemental data for this article is available online at https://doi.org/10.1080/08964289.2021.1946468 .",2021-07-21 +33311457,Single-nucleus transcriptomics reveals functional compartmentalization in syncytial skeletal muscle cells.,"Syncytial skeletal muscle cells contain hundreds of nuclei in a shared cytoplasm. We investigated nuclear heterogeneity and transcriptional dynamics in the uninjured and regenerating muscle using single-nucleus RNA-sequencing (snRNAseq) of isolated nuclei from muscle fibers. This revealed distinct nuclear subtypes unrelated to fiber type diversity, previously unknown subtypes as well as the expected ones at the neuromuscular and myotendinous junctions. In fibers of the Mdx dystrophy mouse model, distinct subtypes emerged, among them nuclei expressing a repair signature that were also abundant in the muscle of dystrophy patients, and a nuclear population associated with necrotic fibers. Finally, modifications of our approach revealed the compartmentalization in the rare and specialized muscle spindle. Our data identifies nuclear compartments of the myofiber and defines a molecular roadmap for their functional analyses; the data can be freely explored on the MyoExplorer server ( https://shiny.mdc-berlin.de/MyoExplorer/ ).",2020-12-11 +34289311,A Qualitative Evidence Synthesis of Parental Experiences and Perceptions of Parent-Child Interaction Therapy for Preschool Children With Communication Difficulties.,"Purpose Parent-child interaction therapy refers to a number of interventions mediated by trained parents to treat developmental difficulties, including speech, language, and communication. Understanding the experiences of parents who take part in parent-child interaction therapy is a key aspect of determining how this intervention can be implemented successfully. However, to date, there has been limited work on synthesizing parental views of this intervention. Method We used qualitative evidence synthesis that involved searching the literature for qualitative studies addressing the experiences and perceptions of parent-child interaction therapy for parents of preschool children with communication difficulties. We identified 27 studies (from 32 publications) and synthesized the data using thematic synthesis. We appraised the quality of included studies using Critical Appraisal Skills Programme (CASP) and assessed our confidence in the review findings using GRADE Confidence in the Evidence from Reviews of Qualitative research (CERQual). Results At the beginning of this intervention, parents may have competing demands and varied expectations about the intervention. Their engagement is facilitated when the intervention is tailored to their individual family, their preferences for learning, and when they have a trusting relationship with the clinician. At the end of the intervention, although most parents perceive an improvement in their child's communication and feel empowered to facilitate this, they have concerns about their child's future needs. Conclusions It is important that clinicians explore parents' readiness for this intervention by discussing their needs and preferences openly, and that they facilitate their engagement through a supportive relationship. They also need to consider how parents will transition out of the intervention and continue to support their child's language development. Supplemental Material https://doi.org/10.23641/asha.14978454.",2021-07-21 +33453123,Aethionema arabicum genome annotation using PacBio full-length transcripts provides a valuable resource for seed dormancy and Brassicaceae evolution research.,"Aethionema arabicum is an important model plant for Brassicaceae trait evolution, particularly of seed (development, regulation, germination, dormancy) and fruit (development, dehiscence mechanisms) characters. Its genome assembly was recently improved but the gene annotation was not updated. Here, we improved the Ae. arabicum gene annotation using 294 RNA-seq libraries and 136 307 full-length PacBio Iso-seq transcripts, increasing BUSCO completeness by 11.6% and featuring 5606 additional genes. Analysis of orthologs showed a lower number of genes in Ae. arabicum than in other Brassicaceae, which could be partially explained by loss of homeologs derived from the At-α polyploidization event and by a lower occurrence of tandem duplications after divergence of Aethionema from the other Brassicaceae. Benchmarking of MADS-box genes identified orthologs of FUL and AGL79 not found in previous versions. Analysis of full-length transcripts related to ABA-mediated seed dormancy discovered a conserved isoform of PIF6-β and antisense transcripts in ABI3, ABI4 and DOG1, among other cases found of different alternative splicing between Turkey and Cyprus ecotypes. The presented data allow alternative splicing mining and proposition of numerous hypotheses to research evolution and functional genomics. Annotation data and sequences are available at the Ae. arabicum DB (https://plantcode.online.uni-marburg.de/aetar_db).",2021-02-08 +32683441,proABC-2: PRediction of AntiBody contacts v2 and its application to information-driven docking.,"

Motivation

Monoclonal antibodies are essential tools in the contemporary therapeutic armory. Understanding how these recognize their antigen is a fundamental step in their rational design and engineering. The rising amount of publicly available data is catalyzing the development of computational approaches able to offer valuable, faster and cheaper alternatives to classical experimental methodologies used for the study of antibody-antigen complexes.

Results

Here, we present proABC-2, an update of the original random-forest antibody paratope predictor, based on a convolutional neural network algorithm. We also demonstrate how the predictions can be fruitfully used to drive the docking in HADDOCK.

Availability and implementation

The proABC-2 server is freely available at: https://wenmr.science.uu.nl/proabc2/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +31189880,A pan-cancer analysis of synonymous mutations.,"Synonymous mutations have been viewed as silent mutations, since they only affect the DNA and mRNA, but not the amino acid sequence of the resulting protein. Nonetheless, recent studies suggest their significant impact on splicing, RNA stability, RNA folding, translation or co-translational protein folding. Hence, we compile 659194 synonymous mutations found in human cancer and characterize their properties. We provide the user-friendly, comprehensive resource for synonymous mutations in cancer, SynMICdb ( http://SynMICdb.dkfz.de ), which also contains orthogonal information about gene annotation, recurrence, mutation loads, cancer association, conservation, alternative events, impact on mRNA structure and a SynMICdb score. Notably, synonymous and missense mutations are depleted at the 5'-end of the coding sequence as well as at the ends of internal exons independent of mutational signatures. For patient-derived synonymous mutations in the oncogene KRAS, we indicate that single point mutations can have a relevant impact on expression as well as on mRNA secondary structure.",2019-06-12 +31811943,GliomaDB: A Web Server for Integrating Glioma Omics Data and Interactive Analysis.,"Gliomas are one of the most common types of brain cancers. Numerous efforts have been devoted to studying the mechanisms of glioma genesis and identifying biomarkers for diagnosis and treatment. To help further investigations, we present a comprehensive database named GliomaDB. GliomaDB includes 21,086 samples from 4303 patients and integrates genomic, transcriptomic, epigenomic, clinical, and gene-drug association data regarding glioblastoma multiforme (GBM) and low-grade glioma (LGG) from The Cancer Genome Atlas (TCGA), Gene Expression Omnibus (GEO), the Chinese Glioma Genome Atlas (CGGA), the Memorial Sloan Kettering Cancer Center Integrated Mutation Profiling of Actionable Cancer Targets (MSK-IMPACT), the US Food and Drug Administration (FDA), and PharmGKB. GliomaDB offers a user-friendly interface for two main types of functionalities. The first comprises queries of (i) somatic mutations, (ii) gene expression, (iii) microRNA (miRNA) expression, and (iv) DNA methylation. In addition, queries can be executed at the gene, region, and base level. Second, GliomaDB allows users to perform survival analysis, coexpression network visualization, multi-omics data visualization, and targeted drug recommendations based on personalized variations. GliomaDB bridges the gap between glioma genomics big data and the delivery of integrated information for end users, thus enabling both researchers and clinicians to effectively use publicly available data and empowering the progression of precision medicine in glioma. GliomaDB is freely accessible at http://bigd.big.ac.cn/gliomaDB.",2019-08-01 +33937454,Dataset of the next-generation sequencing of variable 16S rRNA from bacteria and ITS2 regions from fungi and plants derived from honeybees kept under anthropogenic landscapes.,"Forager Apis melliefera honeybees were collected from four localities located in Europe, i.e.: London, UK; Athens, Greece; Marchamalo, Spain and Lublin, Poland. Furthermore, from Asia we have collected A. mellifera as well as A. cerana foragers form Chiang Mai in Thailand We used next generation sequencing (NGS) to analyse the 16S rRNA bacterial gene amplicons based on the V3-V4 region and the ITS2 region from fungi and plants derived from honeybee samples. Amplicon libraries, were prepared using the 16S Metagenomic Sequencing Library Preparation, Preparing 16S Ribosomal RNA Gene Amplicons for the Illumina MiSeq System (Illumina®) protocol. NGS raw data are available at https://www.ncbi.nlm.nih.gov/bioproject/PRJNA686953. Furthermore, isolated DNA was used as the template for screening pathogens: Nosema apis, N. ceranae, N. bombi, tracheal mite (Acarapis woodi), any organism in the parasitic order Trypanosomatida, including Crithidia spp. (i.e., Crithidia mellificae), neogregarines including Mattesia and Apicystis spp. (i.e., Apicistis bombi). The presented data can be used to compare the metagenomic samples from different honeybee population all over the world. A higher load of fungi, and bacteria groups such as: Firmicutes (Lactobacillus); γ- proteobacteria, Neisseriaceae, and other unidentified bacteria was observed for Nosema cearana and neogregarines infected honeybees. Healthy honeybees had a higher load of plant pollens, and bacteria groups such as: Orbales, Gilliamella, Snodgrassella, and Enterobacteriaceae. More details can be found in research article [1] Ptaszyńska et al. 2021.",2021-04-02 +25161242,Polytomy refinement for the correction of dubious duplications in gene trees.,"

Motivation

Large-scale methods for inferring gene trees are error-prone. Correcting gene trees for weakly supported features often results in non-binary trees, i.e. trees with polytomies, thus raising the natural question of refining such polytomies into binary trees. A feature pointing toward potential errors in gene trees are duplications that are not supported by the presence of multiple gene copies.

Results

We introduce the problem of refining polytomies in a gene tree while minimizing the number of created non-apparent duplications in the resulting tree. We show that this problem can be described as a graph-theoretical optimization problem. We provide a bounded heuristic with guaranteed optimality for well-characterized instances. We apply our algorithm to a set of ray-finned fish gene trees from the Ensembl database to illustrate its ability to correct dubious duplications.

Availability and implementation

The C++ source code for the algorithms and simulations described in the article are available at http://www-ens.iro.umontreal.ca/~lafonman/software.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +31665262,mCSM-AB2: guiding rational antibody design using graph-based signatures.,"

Motivation

A lack of accurate computational tools to guide rational mutagenesis has made affinity maturation a recurrent challenge in antibody (Ab) development. We previously showed that graph-based signatures can be used to predict the effects of mutations on Ab binding affinity.

Results

Here we present an updated and refined version of this approach, mCSM-AB2, capable of accurately modelling the effects of mutations on Ab-antigen binding affinity, through the inclusion of evolutionary and energetic terms. Using a new and expanded database of over 1800 mutations with experimental binding measurements and structural information, mCSM-AB2 achieved a Pearson's correlation of 0.73 and 0.77 across training and blind tests, respectively, outperforming available methods currently used for rational Ab engineering.

Availability and implementation

mCSM-AB2 is available as a user-friendly and freely accessible web server providing rapid analysis of both individual mutations or the entire binding interface to guide rational antibody affinity maturation at http://biosig.unimelb.edu.au/mcsm_ab2.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +32587063,Microwave-Generated Steam Decontamination of N95 Respirators Utilizing Universally Accessible Materials. ,"The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic has caused a severe, international shortage of N95 respirators, which are essential to protect health care providers from infection. Given the contemporary limitations of the supply chain, it is imperative to identify effective means of decontaminating, reusing, and thereby conserving N95 respirator stockpiles. To be effective, decontamination must result in sterilization of the N95 respirator without impairment of respirator filtration or user fit. Although numerous methods of N95 decontamination exist, none are universally accessible. In this work, we describe a microwave-generated steam decontamination protocol for N95 respirators for use in health care systems of all sizes, geographies, and means. Using widely available glass containers, mesh from commercial produce bags, a rubber band, and a 1,100-W commercially available microwave, we constructed an effective, standardized, and reproducible means of decontaminating N95 respirators. Employing this methodology against MS2 phage, a highly conservative surrogate for SARS-CoV-2 contamination, we report an average 6-log10 plaque-forming unit (PFU) (99.9999%) and a minimum 5-log10 PFU (99.999%) reduction after a single 3-min microwave treatment. Notably, quantified respirator fit and function were preserved, even after 20 sequential cycles of microwave steam decontamination. This method provides a valuable means of effective decontamination and reuse of N95 respirators by frontline providers facing urgent need.IMPORTANCE Due to the rapid spread of coronavirus disease 2019 (COVID-19), there is an increasing shortage of protective gear necessary to keep health care providers safe from infection. As of 9 April 2020, the CDC reported 9,282 cumulative cases of COVID-19 among U.S. health care workers (CDC COVID-19 Response Team, MMWR Morb Mortal Wkly Rep 69:477-481, 2020, https://doi.org/10.15585/mmwr.mm6915e6). N95 respirators are recommended by the CDC as the ideal method of protection from COVID-19. Although N95 respirators are traditionally single use, the shortages have necessitated the need for reuse. Effective methods of N95 decontamination that do not affect the fit or filtration ability of N95 respirators are essential. Numerous methods of N95 decontamination exist; however, none are universally accessible. In this study, we describe an effective, standardized, and reproducible means of decontaminating N95 respirators using widely available materials. The N95 decontamination method described in this work will provide a valuable resource for hospitals, health care centers, and outpatient practices that are experiencing increasing shortages of N95 respirators due to the COVID-19 pandemic.",2020-06-25 +34155277,Newly diagnosed diabetes has high risk for cardiovascular outcome in ischemic stroke patients.,"We investigated cardiovascular outcomes in ischemic stroke patients with newly diagnosed diabetes mellitus (DM) compared with those of patients with previously known DM and no DM using the glycosylated hemoglobin (HbA1c) criteria. The relationship between new DM diagnosis and cardiovascular risk remains unclear to date. We performed post hoc analysis using the data of participants from the Prevention of Cardiovascular events in iSchemic Stroke patients with high risk of cerebral hemOrrhage (PICASSO) trial. Newly diagnosed DM was defined as HbA1c of ≥ 6.5% without known DM history. The outcome was the incidence of composite cardiovascular events, including stroke (ischemic and hemorrhagic), myocardial infarction, and cardiovascular death. In total, 1306 patients were included; 38 patients (2.9%) had newly diagnosed DM; 438 patients (33.5%), known DM; and 830 patients (63.6%), no DM. In patients with newly diagnosed DM, known DM, and no DM, the incidence of ischemic stroke was 8.93, 3.79, and 2.64 per 100 person-years (log-rank test; p = 0.0092), while that of composite cardiovascular events was 8.93, 5.92, and 3.87 per 100 person-years (p = 0.025), respectively. Newly diagnosed DM was an important risk factor for ischemic stroke and composite cardiovascular events after ischemic stroke.Registration: URL: https://www.clinicaltrials.gov . Unique identifier: NCT01013532.",2021-06-21 +33565189,Cancer SIGVAR: A semiautomated interpretation tool for germline variants of hereditary cancer-related genes.,"Cancer is one of the most important health issues globally and the accuracy of interpretation of cancer-related variants is critical for the clinical management of hereditary cancer. ClinGen Sequence Variant Interpretation Working Groups have developed many adaptations of American College of Medical Genetics and Genomics and the Association of Molecular Pathologists guidelines to improve the consistency of interpretation. We combined the most recent adaptations to expand the number of the criteria from 28 to 48 and developed a tool called Cancer SIGVAR to help genetic counselors interpret the clinical significance of cancer germline variants. Our tool can accept VCF files as input and realize fully automated interpretation based on 21 criteria and semiautomated interpretation based on 48 criteria. We validated the performance of our tool with the ClinVar and CLINVITAE benchmark databases, achieving an average consistency for pathogenic and benign assessment up to 93.71% and 79.38%, respectively. We compared Cancer SIGVAR with two similar tools, InterVar and PathoMAN, and analyzed the main differences in criteria and implementation. Furthermore, we selected 911 variants from another two in-house benchmark databases, and semiautomated interpretation reached an average classification consistency of 98.35%. Our findings highlight the need to optimize automated interpretation tools based on constantly updated guidelines. Cancer SIGVAR is publicly available at http://cancersigvar.bgi.com/.",2021-03-06 +33586340,Myogenesis modelled by human pluripotent stem cells: a multi-omic study of Duchenne myopathy early onset.,"

Background

Duchenne muscular dystrophy (DMD) causes severe disability of children and death of young men, with an incidence of approximately 1/5000 male births. Symptoms appear in early childhood, with a diagnosis made mostly around 4 years old, a time where the amount of muscle damage is already significant, preventing early therapeutic interventions that could be more efficient at halting disease progression. In the meantime, the precise moment at which disease phenotypes arise-even asymptomatically-is still unknown. Thus, there is a critical need to better define DMD onset as well as its first manifestations, which could help identify early disease biomarkers and novel therapeutic targets.

Methods

We have used both human tissue-derived myoblasts and human induced pluripotent stem cells (hiPSCs) from DMD patients to model skeletal myogenesis and compared their differentiation dynamics with that of healthy control cells by a comprehensive multi-omic analysis at seven time points. Results were strengthened with the analysis of isogenic CRISPR-edited human embryonic stem cells and through comparisons against published transcriptomic and proteomic datasets from human DMD muscles. The study was completed with DMD knockdown/rescue experiments in hiPSC-derived skeletal muscle progenitor cells and adenosine triphosphate measurement in hiPSC-derived myotubes.

Results

Transcriptome and miRnome comparisons combined with protein analyses demonstrated that hiPSC differentiation (i) leads to embryonic/foetal myotubes that mimic described DMD phenotypes at the differentiation endpoint and (ii) homogeneously and robustly recapitulates key developmental steps-mesoderm, somite, and skeletal muscle. Starting at the somite stage, DMD dysregulations concerned almost 10% of the transcriptome. These include mitochondrial genes whose dysregulations escalate during differentiation. We also describe fibrosis as an intrinsic feature of DMD skeletal muscle cells that begins early during myogenesis. All the omics data are available online for exploration through a graphical interface at https://muscle-dmd.omics.ovh/.

Conclusions

Our data argue for an early developmental manifestation of DMD whose onset is triggered before the entry into the skeletal muscle compartment, data leading to a necessary reconsideration of dystrophin roles during muscle development. This hiPSC model of skeletal muscle differentiation offers the possibility to explore these functions as well as find earlier DMD biomarkers and therapeutic targets.",2021-02-14 +26510927,The HAND Database: a gateway to understanding the role of HIV in HIV-associated neurocognitive disorders.,"

Background

Despite an augmented research effort and scale-up of highly active antiretroviral therapy, a high prevalence of HIV-1-associated neurocognitive disorders (HAND) persists in the HIV-infected population. Nearly 50 % of all HIV-1-infected individuals suffer from a neurocognitive disorder due to neural and synaptodendritic damage. Challenges in HAND research, including limited availability of brain tissue from HIV patients, variation in HAND study protocols, and virus genotyping inconsistency and errors, however, have resulted in studies with insufficient power to delineate molecular mechanisms underlying HAND pathogenesis. There exists, therefore, a great need for a reliable and centralized resource specific to HAND research, particularly for epidemiological study and surveillance in resource-limited countries where severe forms of HAND persist.

Description

To address the aforementioned imperative need, here we present the HAND Database, a resource containing well-curated and up-to-date HAND virus information and associated clinical and epidemiological data. This database provides information on 5,783 non-redundant HIV-1 sequences from global HAND research published to date, representing a total of 163 unique individuals that have been assessed for HAND. A user-friendly interface allows for flexible searching, filtering, browsing, and downloading of data. The most comprehensive database of its kind, the HAND Database not only bolsters current HAND research by increasing sampling power and reducing study biases caused by protocol variation and genotyping inconsistency, it allows for comparison between HAND studies across different dimensions. Development of the HAND Database has also revealed significant knowledge gaps in HIV-driven neuropathology. These gaps include inadequate sequencing of viral genes beyond env, lack of HAND viral data from HIV epidemiologically important regions including Asian and Sub-Saharan African countries, and biased sampling toward the male gender, all factors that impede efforts toward providing an improved quality of life to HIV-infected individuals, and toward elimination of viruses in the brain.

Conclusion

Our aim with the HAND database is to provide researchers in both the HIV and neuroscience fields a comprehensive and rigorous data source toward better understanding virus compartmentalization and to help in design of improved strategies against HAND viruses. We also expect this resource, which will be updated on a regular basis, to be useful as a reliable reference for further HAND epidemiology studies. The HAND Database is freely available and accessible online at http://www.handdatabase.org .",2015-10-28 +27997202,Peptigram: A Web-Based Application for Peptidomics Data Visualization.,"Tandem mass spectrometry (MS/MS) techniques, developed for protein identification, are increasingly being applied in the field of peptidomics. Using this approach, the set of protein fragments observed in a sample of interest can be determined to gain insights into important biological processes such as signaling and other bioactivities. As the peptidomics era progresses, there is a need for robust and convenient methods to inspect and analyze MS/MS derived data. Here, we present Peptigram, a novel tool dedicated to the visualization and comparison of peptides detected by MS/MS. The principal advantage of Peptigram is that it provides visualizations at both the protein and peptide level, allowing users to simultaneously visualize the peptide distributions of one or more samples of interest, mapped to their parent proteins. In this way rapid comparisons between samples can be made in terms of their peptide coverage and abundance. Moreover, Peptigram integrates and displays key sequence features from external databases and links with peptide analysis tools to offer the user a comprehensive peptide discovery resource. Here, we illustrate the use of Peptigram on a data set of milk hydrolysates. For convenience, Peptigram is implemented as a web application, and is freely available for academic use at http://bioware.ucd.ie/peptigram .",2016-12-20 +27789686,GenomeCRISPR - a database for high-throughput CRISPR/Cas9 screens.,"Over the past years, CRISPR/Cas9 mediated genome editing has developed into a powerful tool for modifying genomes in various organisms. In high-throughput screens, CRISPR/Cas9 mediated gene perturbations can be used for the systematic functional analysis of whole genomes. Discoveries from such screens provide a wealth of knowledge about gene to phenotype relationships in various biological model systems. However, a database resource to query results efficiently has been lacking. To this end, we developed GenomeCRISPR (http://genomecrispr.org), a database for genome-scale CRISPR/Cas9 screens. Currently, GenomeCRISPR contains data on more than 550 000 single guide RNAs (sgRNA) derived from 84 different experiments performed in 48 different human cell lines, comprising all screens in human cells using CRISPR/Cas published to date. GenomeCRISPR provides data mining options and tools, such as gene or genomic region search. Phenotypic and genome track views allow users to investigate and compare the results of different screens, or the impact of different sgRNAs on the gene of interest. An Application Programming Interface (API) allows for automated data access and batch download. As more screening data will become available, we also aim at extending the database to include functional genomic data from other organisms and enable cross-species comparisons.",2016-10-26 +33122990,A Benchmark Dataset for RSVP-Based Brain-Computer Interfaces.,"This paper reports on a benchmark dataset acquired with a brain-computer interface (BCI) system based on the rapid serial visual presentation (RSVP) paradigm. The dataset consists of 64-channel electroencephalogram (EEG) data from 64 healthy subjects (sub1,…, sub64) while they performed a target image detection task. For each subject, the data contained two groups (""A"" and ""B""). Each group contained two blocks, and each block included 40 trials that corresponded to 40 stimulus sequences. Each sequence contained 100 images presented at 10 Hz (10 images per second). The stimulus images were street-view images of two categories: target images with human and non-target images without human. Target images were presented randomly in the stimulus sequence with a probability of 1∼4%. During the stimulus presentation, subjects were asked to search for the target images and ignore the non-target images in a subjective manner. To keep all original information, the dataset was the raw continuous data without any processing. On one hand, the dataset can be used as a benchmark dataset to compare the algorithms for target identification in RSVP-based BCIs. On the other hand, the dataset can be used to design new system diagrams and evaluate their BCI performance without collecting any new data through offline simulation. Furthermore, the dataset also provides high-quality data for characterizing and modeling event-related potentials (ERPs) and steady-state visual evoked potentials (SSVEPs) in RSVP-based BCIs. The dataset is freely available from http://bci.med.tsinghua.edu.cn/download.html.",2020-10-02 +33793551,Predicting the cumulative medical load of COVID-19 outbreaks after the peak in daily fatalities.,"The distinct ways the COVID-19 pandemic has been unfolding in different countries and regions suggest that local societal and governmental structures play an important role not only for the baseline infection rate, but also for short and long-term reactions to the outbreak. We propose to investigate the question of how societies as a whole, and governments in particular, modulate the dynamics of a novel epidemic using a generalization of the SIR model, the reactive SIR (short-term and long-term reaction) model. We posit that containment measures are equivalent to a feedback between the status of the outbreak and the reproduction factor. Short-term reaction to an outbreak corresponds in this framework to the reaction of governments and individuals to daily cases and fatalities. The reaction to the cumulative number of cases or deaths, and not to daily numbers, is captured in contrast by long-term reaction. We present the exact phase space solution of the controlled SIR model and use it to quantify containment policies for a large number of countries in terms of short and long-term control parameters. We find increased contributions of long-term control for countries and regions in which the outbreak was suppressed substantially together with a strong correlation between the strength of societal and governmental policies and the time needed to contain COVID-19 outbreaks. Furthermore, for numerous countries and regions we identified a predictive relation between the number of fatalities within a fixed period before and after the peak of daily fatality counts, which allows to gauge the cumulative medical load of COVID-19 outbreaks that should be expected after the peak. These results suggest that the proposed model is applicable not only for understanding the outbreak dynamics, but also for predicting future cases and fatalities once the effectiveness of outbreak suppression policies is established with sufficient certainty. Finally, we provide a web app (https://itp.uni-frankfurt.de/covid-19/) with tools for visualising the phase space representation of real-world COVID-19 data and for exporting the preprocessed data for further analysis.",2021-04-01 +27638885,The international Genome sample resource (IGSR): A worldwide collection of genome variation incorporating the 1000 Genomes Project data.,"The International Genome Sample Resource (IGSR; http://www.internationalgenome.org) expands in data type and population diversity the resources from the 1000 Genomes Project. IGSR represents the largest open collection of human variation data and provides easy access to these resources. IGSR was established in 2015 to maintain and extend the 1000 Genomes Project data, which has been widely used as a reference set of human variation and by researchers developing analysis methods. IGSR has mapped all of the 1000 Genomes sequence to the newest human reference (GRCh38), and will release updated variant calls to ensure maximal usefulness of the existing data. IGSR is collecting new structural variation data on the 1000 Genomes samples from long read sequencing and other technologies, and will collect relevant functional data into a single comprehensive resource. IGSR is extending coverage with new populations sequenced by collaborating groups. Here, we present the new data and analysis that IGSR has made available. We have also introduced a new data portal that increases discoverability of our data-previously only browseable through our FTP site-by focusing on particular samples, populations or data sets of interest.",2016-09-15 +33667090,ZINC Express: A Virtual Assistant for Purchasing Compounds Annotated in the ZINC Database.,"Many laboratories working in the field of drug discovery use the ZINC database to identify and then acquire commercially available chemicals. However, finding the best deal for a given compound is often time-intensive and laborious, as the process involves searching for all vendors selling the desired compound, comparing prices, and interacting with the preferred vendor. To streamline this process, we have developed ZINC Express, a web application that simplifies the online purchase of chemicals annotated in the ZINC database. For any compound with a known ZINC ID, ZINC Express finds a list of vendors offering that compound and for each such vendor returns the available package quantities, the price of each package, and the price per milligram along with a link to that vendor. We expect that ZINC Express will be of use to both computational and experimental researchers. ZINC Express is freely accessible online at https://zincexpress.mml.unc.edu/.",2021-03-05 +33900211,"Effect of Diameter and Number of Hepatocellular Carcinomas on Survival After Resection, Transarterial Chemoembolization, and Ablation.","

Introduction

Most studies predicting survival after resection, transarterial chemoembolization (TACE), and ablation analyzed diameter and number of hepatocellular carcinomas (HCCs) as dichotomous variables, resulting in an underestimation of risk variation. We aimed to develop and validate a new prognostic model for patients with HCC using largest diameter and number of HCCs as continuous variables.

Methods

The prognostic model was developed using data from patients undergoing resection, TACE, and ablation in 645 Japanese institutions. The model results were shown after balanced using the inverse probability of treatment-weighted analysis and were externally validated in an international multi-institution cohort.

Results

Of 77,268 patients, 43,904 patients, including 15,313 (34.9%) undergoing liver resection, 13,375 (30.5%) undergoing TACE, and 15,216 (34.7%) undergoing ablation, met the inclusion criteria. Our model (http://www.u-tokyo-hbp-transplant-surgery.jp/about/calculation.html) showed that the 5-year overall survival (OS) in patients with HCC undergoing these procedures decreased with progressive incremental increases in diameter and number of HCCs. For patients undergoing resection, the inverse probability of treatment-weighted-adjusted 5-year OS probabilities were 10%-20% higher compared with patients undergoing TACE for 1-6 HCC lesions <10 cm and were also 10%-20% higher compared with patients undergoing ablation when the HCC diameter was 2-3 cm. For patients undergoing resection and TACE, the model performed well in the external cohort.

Discussion

Our novel prognostic model performed well in predicting OS after resection and TACE for HCC and demonstrated that resection may have a survival benefit over TACE and ablation based on the diameter and number of HCCs.",2021-08-01 +31608124,Predicting circRNA-Disease Associations Based on Improved Collaboration Filtering Recommendation System With Multiple Data.,"With the development of high-throughput techniques, various biological molecules are discovered, which includes the circular RNAs (circRNAs). Circular RNA is a novel endogenous noncoding RNA that plays significant roles in regulating gene expression, moderating the microRNAs transcription as sponges, diagnosing diseases, and so on. Based on the circRNA particular molecular structures that are closed-loop structures with neither 5'-3' polarities nor polyadenylated tails, circRNAs are more stable and conservative than the normal linear coding or noncoding RNAs, which makes circRNAs a biomarker of various diseases. Although some conventional experiments are used to identify the associations between circRNAs and diseases, almost the techniques and experiments are time-consuming and expensive. In this study, we propose a collaboration filtering recommendation system-based computational method, which handles the ""cold start"" problem to predict the potential circRNA-disease associations, which is named ICFCDA. All the known circRNA-disease associations data are downloaded from circR2Disease database (http://bioinfo.snnu.edu.cn/CircR2Disease/). Based on these data, multiple data are extracted from different databases to calculate the circRNA similarity networks and the disease similarity networks. The collaboration filtering recommendation system algorithm is first employed to predict circRNA-disease associations. Then, the leave-one-out cross validation mechanism is adopted to measure the performance of our proposed computational method. ICFCDA achieves the areas under the curve of 0.946, which is better than other existing methods. In order to further illustrate the performance of ICFCDA, case studies of some common diseases are made, and the results are confirmed by other databases. The experimental results show that ICFCDA is competent in predicting the circRNA-disease associations.",2019-09-25 +,3339 Development of a Competency-based Informatics Course for Translational Researchers,"OBJECTIVES/SPECIFIC AIMS: Translational researchers often require the use of informatics methods in their work. Lack of an understanding of key informatics principles and methods limits the abilities of translational researchers to successfully implement Findable, Accessible, Interoperable, Reusable (FAIR) principles in grant proposal submissions and performed studies. In this study we describe our work in addressing this limitation in the workforce by developing a competency-based, modular course in informatics to meet the needs of diverse translational researchers. METHODS/STUDY POPULATION: We established a Translational Research Informatics Education Collaborative (TRIEC) consisting of faculty at the University of Utah (UU) with different primary expertise in informatics methods, and working in different tiers of the translational spectrum. The TRIEC, in collaboration with the Foundation of Workforce Development of the Utah Center for Clinical and Translational Science (CCTS), gathered informatics needs of early investigators by consolidating requests for informatics services, assistance provided in grant writing, and consultations. We then reviewed existing courses and literature for informatics courses that focused on clinical and translational researchers [3–9]. Using the structure and content of the identified courses, we developed an initial draft of a syllabus for a Translational Research Informatics (TRI) course which included key informatics topics to be covered and learning activities, and iteratively refined it through discussions. The course was approved by the UU Department of Biomedical Informatics, UU Graduate School and the CCTS. RESULTS/ANTICIPATED RESULTS: The TRI course introduces informatics PhD students, clinicians, and public health practitioners who have a demonstrated interest in research, to fundamental principles and tools of informatics. At the completion of the course, students will be able to describe and identify informatics tools and methods relevant to translational research and demonstrate inter-professional collaboration in the development of a research proposal addressing a relevant translational science question that utilizes the state-of-the-art in informatics. TRI covers a diverse set of informatics content presented as modules: genomics and bioinformatics, electronic health records, exposomics, microbiomics, molecular methods, data integration and fusion, metadata management, semantics, software architectures, mobile computing, sensors, recruitment, community engagement, secure computing environments, data mining, machine learning, deep learning, artificial intelligence and data science, open source informatics tools and platforms, research reproducibility, and uncertainty quantification. The teaching methods for TRI include (1) modular didactic learning consisting of presentations and readings and face-to-face discussions of the content, (2) student presentations of informatics literature relevant to their final project, and (3) a final project consisting of the development, critique and chalk talk and formal presentations of informatics methods and/or aims of an National Institutes of Health style K or R grant proposal. For (3), the student presents their translational research proposal concept at the beginning of the course, and works with members of the TRIEC with corresponding expertise. The final course grade is a combination of the final project, paper presentations and class participation. We offered TRI to a first cohort of students in the Fall semester of 2018. DISCUSSION/SIGNIFICANCE OF IMPACT: Translational research informatics is a sub-domain of biomedical informatics that applies and develops informatics theory and methods for translational research. TRI covers a diverse set of informatics topics that are applicable across the translational spectrum. It covers both didactic material and hands-on experience in using the material in grant proposals and research studies. TRI’s course content, teaching methodology and learning activities enable students to initially learn factual informatics knowledge and skills for translational research correspond to the ‘Remember, Understand, and Apply’ levels of the Bloom’s taxonomy [10]. The final project provides opportunity for applying these informatics concepts corresponding to the ‘Analyze, Evaluate, and Create’ levels of the Bloom’s taxonomy [10]. This inter-professional, competency-based, modular course will develop an informatics-enabled workforce trained in using state-of-the-art informatics solutions, increasing the effectiveness of translational science and precision medicine, and promoting FAIR principles in research data management and processes. Future work includes opening the course to all Clinical and Translational Science Award hubs and publishing the course material as a reference book. While student evaluations for the first cohort will be available end of the semester, true evaluation of TRI will be the number of trainees taking the course and successful grant proposal submissions. References: 1. Wilkinson MD, Dumontier M, et al. The FAIR Guiding Principles for scientific data management and stewardship. Sci Data. 2016 Mar 15. 2. National Center for Advancing Translational Sciences. Translational Science Spectrum. National Center for Advancing Translational Sciences. 2015 [cited 2018 Nov 15]. Available from: https://ncats.nih.gov/translation/spectrum 3. Hu H, Mural RJ, Liebman MN. Biomedical Informatics in Translational Research. 1 edition. Boston: Artech House; 2008. 264 p. 4. Payne PRO, Embi PJ, Niland J. Foundational biomedical informatics research in the clinical and translational science era: a call to action. J Am Med Inform Assoc JAMIA. 2010;17(6):615–6. 5. Payne PRO, Embi PJ, editors. Translational Informatics: Realizing the Promise of Knowledge-Driven Healthcare. Softcover reprint of the original 1st ed. 2015 edition. Springer; 2016. 196 p. 6. Richesson R, Andrews J, editors. Clinical Research Informatics. 2nd ed. Springer International Publishing; 2019. (Health Informatics). 7. Robertson D, MD GHW, editors. Clinical and Translational Science: Principles of Human Research. 2 edition. Amsterdam: Academic Press; 2017. 808 p. 8. Shen B, Tang H, Jiang X, editors. Translational Biomedical Informatics: A Precision Medicine Perspective. Softcover reprint of the original 1st ed. 2016 edition. S.l.: Springer; 2018. 340 p. 9. Valenta AL, Meagher EA, Tachinardi U, Starren J. Core informatics competencies for clinical and translational scientists: what do our customers and collaborators need to know? J Am Med Inform Assoc. 2016 Jul 1;23(4):835–9. 10. Anderson LW, Krathwohl DR, Airasian PW, Cruikshank KA, Mayer RE, Pintrich PR, Raths J, Wittrock MC. A Taxonomy for Learning, Teaching, and Assessing: A Revision of Bloom’s Taxonomy of Educational Objectives, Abridged Edition. 1 edition. New York: Pearson; 2000.",2019-03-01 +31745164,Hunter-Gatherers Harvested and Heated Microbial Biogenic Iron Oxides to Produce Rock Art Pigment.,"Red mineral pigment use is recognized as a fundamental component of a series of traits associated with human evolutionary development, social interaction, and behavioral complexity. Iron-enriched mineral deposits have been collected and prepared as pigment for use in rock art, personal adornment, and mortuary practices for millennia, yet little is known about early developments in mineral processing techniques in North America. Microanalysis of rock art pigments from the North American Pacific Northwest reveals a sophisticated use of iron oxide produced by the biomineralizing bacterium Leptothrix ochracea; a keystone species of chemolithotroph recognized in recent advances in the development of thermostable, colorfast biomaterial pigments. Here we show evidence for human engagement with this bacterium, including nanostructural and magnetic properties evident of thermal enhancement, indicating that controlled use of pyrotechnology was a key feature of how biogenic iron oxides were prepared into paint. Our results demonstrate that hunter-gatherers in this area of study prepared pigments by harvesting aquatic microbial iron mats dominated by iron-oxidizing bacteria, which were subsequently heated in large open hearths at a controlled range of 750 °C to 850 °C. This technical gesture was performed to enhance color properties, and increase colorfastness and resistance to degradation. This skilled production of highly thermostable and long-lasting rock art paint represents a specialized technological innovation. Our results contribute to a growing body of knowledge on historical-ecological resource use practices in the Pacific Northwest during the Late Holocene.Figshare link to figures: https://figshare.com/s/9392a0081632c20e9484.",2019-11-19 +32322110,Criteria of the German Consortium for Hereditary Breast and Ovarian Cancer for the Classification of Germline Sequence Variants in Risk Genes for Hereditary Breast and Ovarian Cancer.,"More than ten years ago, the German Consortium for Hereditary Breast and Ovarian Cancer (GC-HBOC) set up a panel of experts (VUS Task Force) which was tasked with reviewing the classifications of genetic variants reported by individual centres of the GC-HBOC to the central database in Leipzig and reclassifying them, where necessary, based on the most recent data. When it evaluates variants, the VUS Task Force must arrive at a consensus. The resulting classifications are recorded in a central database where they serve as a basis for ensuring the consistent evaluation of previously known and newly identified variants in the different centres of the GC-HBOC. The standardised VUS evaluation by the VUS Task Force is a key element of the recall system which has also been set up by the GC-HBOC. The system will be used to pass on information to families monitored and managed by GC-HBOC centres in the event that previously classified variants are reclassified based on new information. The evaluation algorithm of the VUS Task Force was compiled using internationally established assessment methods (IARC, ACMG, ENIGMA) and is presented here together with the underlying evaluation criteria used to arrive at the classification decision using a flow chart. In addition, the characteristics and special features of specific individual risk genes associated with breast and/or ovarian cancer are discussed in separate subsections. The URLs of relevant databases have also been included together with extensive literature references to provide additional information and cover the scope and dynamism of the current state of knowledge on the evaluation of genetic variants. In future, if criteria are updated based on new information, the update will be published on the website of the GC-HBOC ( https://www.konsortium-familiaerer-brustkrebs.de/ ).",2020-04-21 +33295052,Supporting habitat conservation with automated change detection in Google Earth Engine.,"A significant limitation in biodiversity conservation has been the effective implementation of laws and regulations that protect species' habitats from degradation. Flexible, efficient, and effective monitoring and enforcement methods are needed to help conservation policies realize their full benefit. As remote sensing data become more numerous and accessible, they can be used to identify and quantify land-cover changes and habitat loss. However, these data remain underused for systematic conservation monitoring in part because of a lack of simple tools. We adapted 2 algorithms that automatically identify differences between pairs of images. We used free, publicly available satellite data to evaluate their ability to rapidly detect land-cover changes in a variety of land-cover types. We compared algorithm predictions with ground-truthed results at 100 sites of known change in the United States. We also compared algorithm predictions to manually created polygons delineating anthropogenic change in 4 case studies involving imperiled species' habitat: oil and gas development in the range of the Greater Sage Grouse (Centrocercus urophasianus); sand mining operations in the range of the dunes sagebrush lizard (Sceloporus arenicolus); loss of Piping Plover (Charadrius melodus) coastal habitat after Hurricane Michael (2018); and residential development in St. Andrew beach mouse (Peromyscus polionotus peninsularis) habitat. Both algorithms effectively discriminated between pixels corresponding to land-cover change and unchanged pixels as indicated by area under a receiver operating characteristic curve >0.90. The algorithm that was most effective differed among the case-study habitat types, and both effectively delineated habitat loss as indicated by low omission (min. = 0.0) and commission (min. = 0.0) rates, and moderate polygon overlap (max. = 47%). Our results showed how these algorithms can be used to help close the implementation gap of monitoring and enforcement in biodiversity conservation. We provide a free online tool that can be used to run these analyses (https://conservationist.io/habitatpatrol).",2021-01-13 +33963730,Correlates of serum IGF-1 in young children with moderate acute malnutrition: a cross-sectional study in Burkina Faso.,"

Background

Serum insulin-like growth factor 1 (sIGF-1) is an important growth factor in childhood. However, studies on sIGF-1 among children from low-income countries are few, and the role of body composition is unknown.

Objectives

To assess the associations of anthropometry, body composition, inflammation, and breastfeeding with sIGF-1 among children with moderate acute malnutrition (MAM).

Methods

A cross-sectional study based on admission data from 6- to 23-mo-old children with MAM participating in a nutrition intervention trial (Treatfood) in Burkina Faso. Linear regression analysis was used to identify correlates of sIGF-1.

Results

Among 1546 children, the median (IQR) sIGF-1 was 12 (8.2-18.3) ng/mL. sIGF-1 was highest at 6 mo, with a nadir ∼10-11 mo, and higher in girls than boys. Length-for-age z score (LAZ), weight-for-length z score (WLZ), and midupper arm circumference were positively associated with sIGF-1 (P ≤ 0.001). Fat-free mass (FFM) was also positively associated, as sIGF-1 increased 1.5 (95% CI: 0.5, 2.5) ng/mL for each 1-kg increase in FFM. However, the association disappeared after adjustment for height. Elevated serum C-reactive protein and α1-acid glycoprotein were negatively associated with sIGF-1 (P ≤ 0.001), as was fever (P < 0.001) but not a positive malaria test per se (P = 0.15). Children never breastfed had lower sIGF-1 (-5.1; 95% CI: -9.8, -0.3).

Conclusions

LAZ and WLZ were positively and inflammation negatively associated with sIGF-1. As all children were moderately malnourished and many had inflammation, this probably explains the very low median sIGF-1. The association of FFM with sIGF-1 was fully explained by height. There was a marked age pattern, with a nadir in late infancy, confirming findings from smaller studies from well-nourished populations. There is a need for prospective studies to disentangle the role of sIGF-1 in growth and health. This trial was registered at https://www.isrctn.com as ISRCTN42569496.",2021-09-01 +33663541,Weighted gene co-expression network analysis to identify key modules and hub genes related to hyperlipidaemia.,"

Background

The purpose of this study was to explore the potential molecular targets of hyperlipidaemia and the related molecular mechanisms.

Methods

The microarray dataset of GSE66676 obtained from patients with hyperlipidaemia was downloaded. Weighted gene co-expression network (WGCNA) analysis was used to analyse the gene expression profile, and the royal blue module was considered to have the highest correlation. Gene Ontology (GO) functional and Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway enrichment analyses were implemented for the identification of genes in the royal blue module using the Database for Annotation, Visualization and Integrated Discovery (DAVID) online tool (version 6.8; http://david.abcc.ncifcrf.gov ). A protein-protein interaction (PPI) network was established by using the online STRING tool. Then, several hub genes were identified by the MCODE and cytoHubba plug-ins in Cytoscape software.

Results

The significant module (royal blue) identified was associated with TC, TG and non-HDL-C. GO and KEGG enrichment analyses revealed that the genes in the royal blue module were associated with carbon metabolism, steroid biosynthesis, fatty acid metabolism and biosynthesis pathways of unsaturated fatty acids. SQLE (degree = 17) was revealed as a key molecule associated with hypercholesterolaemia (HCH), and SCD was revealed as a key molecule associated with hypertriglyceridaemia (HTG). RT-qPCR analysis also confirmed the above results based on our HCH/HTG samples.

Conclusions

SQLE and SCD are related to hyperlipidaemia, and SQLE/SCD may be new targets for cholesterol-lowering or triglyceride-lowering therapy, respectively.",2021-03-04 +33304964,"Tissue, urine and serum NMR metabolomics dataset from a 5/6 nephrectomy rat model of chronic kidney disease.","Serum, urine and tissue from a rat model of chronic kidney disease (CKD) were analysed using nuclear magnetic resonance (NMR) spectroscopy-based metabolomics methods, and compared with samples from sham operated rats. Both urine and serum were sampled at multiple timepoints, and the results have been reported elsewhere (https://doi.org/10.1007/s11306-019-1569-3[1]). The data could be useful to researchers working with human CKD or rat models of the disease. In addition, several different types of NMR spectra were recorded, including 1D NOESY, CPMG, and 2D J-resolved spectra, and the data could be useful for method comparison and algorithm development, both in terms of NMR spectroscopy and multivariate analysis.",2020-11-23 +32499815,"LuluDB-The Database Created Based on Small RNA, Transcriptome, and Degradome Sequencing Shows the Wide Landscape of Non-coding and Coding RNA in Yellow Lupine (Lupinus luteus L.) Flowers and Pods.","Yellow lupine (Lupinus luteus L.) belongs to a legume family that benefits from symbiosis with nitrogen-fixing bacteria. Its seeds are rich in protein, which makes it a valuable food source for animals and humans. Yellow lupine is also the model plant for basic research on nodulation or abscission of organs. Nevertheless, the knowledge about the molecular regulatory mechanisms of its generative development is still incomplete. The RNA-Seq technique is becoming more prominent in high-throughput identification and expression profiling of both coding and non-coding RNA sequences. However, the huge amount of data generated with this method may discourage other scientific groups from making full use of them. To overcome this inconvenience, we have created a database containing analysis-ready information about non-coding and coding L. luteus RNA sequences (LuluDB). LuluDB was created on the basis of RNA-Seq analysis of small RNA, transcriptome, and degradome libraries obtained from yellow lupine cv. Taper flowers, pod walls, and seeds in various stages of development, flower pedicels, and pods undergoing abscission or maintained on the plant. It contains sequences of miRNAs and phased siRNAs identified in L. luteus, information about their expression in individual samples, and their target sequences. LuluDB also contains identified lncRNAs and protein-coding RNA sequences with their organ expression and annotations to widely used databases like GO, KEGG, NCBI, Rfam, Pfam, etc. The database also provides sequence homology search by BLAST using, e.g., an unknown sequence as a query. To present the full capabilities offered by our database, we performed a case study concerning transcripts annotated as DCL 1-4 (DICER LIKE 1-4) homologs involved in small non-coding RNA biogenesis and identified miRNAs that most likely regulate DCL1 and DCL2 expression in yellow lupine. LuluDB is available at http://luluseqdb.umk.pl/basic/web/index.php.",2020-05-15 +34123358,Automated generation of context-specific gene regulatory networks with a weighted approach in Drosophila melanogaster.,"The regulation of gene expression is a key factor in the development and maintenance of life in all organisms. Even so, little is known at whole genome scale for most genes and contexts. We propose a method, Tool for Weighted Epigenomic Networks in Drosophila melanogaster (Fly T-WEoN), to generate context-specific gene regulatory networks starting from a reference network that contains all known gene regulations in the fly. Unlikely regulations are removed by applying a series of knowledge-based filters. Each of these filters is implemented as an independent module that considers a type of experimental evidence, including DNA methylation, chromatin accessibility, histone modifications and gene expression. Fly T-WEoN is based on heuristic rules that reflect current knowledge on gene regulation in D. melanogaster obtained from the literature. Experimental data files can be generated with several standard procedures and used solely when and if available. Fly T-WEoN is available as a Cytoscape application that permits integration with other tools and facilitates downstream network analysis. In this work, we first demonstrate the reliability of our method to then provide a relevant application case of our tool: early development of D. melanogaster. Fly T-WEoN together with its step-by-step guide is available at https://weon.readthedocs.io.",2021-06-11 +34020445,Effect on gut microbiota of a 1-y lifestyle intervention with Mediterranean diet compared with energy-reduced Mediterranean diet and physical activity promotion: PREDIMED-Plus Study.,"

Background

The Mediterranean diet is a well-recognized healthy diet that has shown to induce positive changes in gut microbiota. Lifestyle changes such as diet along with physical activity could aid in weight loss and improve cardiovascular risk factors.

Objectives

To investigate the effect of an intensive lifestyle weight loss intervention on gut microbiota.

Methods

This is a substudy of the PREDIMED-Plus (Prevención con Dieta Mediterránea-Plus), a randomized controlled trial conducted in overweight/obese men and women (aged 55-75 y) with metabolic syndrome. The intervention group (IG) underwent an intensive weight loss lifestyle intervention based on an energy-restricted Mediterranean diet (MedDiet) and physical activity promotion, and the control group (CG) underwent a non-energy-restricted MedDiet for 1 y. Anthropometric, biochemical, and gut microbial 16S rRNA sequencing data were analyzed at baseline (n = 362) and 1-y follow-up (n = 343).

Results

IG participants had a weight loss of 4.2 (IQR, -6.8, -2.5) kg compared with 0.2 (IQR, -2.1, 1.4) kg in the CG (P < 0.001). Reductions in BMI, fasting glucose, glycated hemoglobin, and triglycerides and an increase in HDL cholesterol were greater in IG than in CG participants (P < 0.05). We observed a decrease in Butyricicoccus, Haemophilus, Ruminiclostridium 5, and Eubacterium hallii in the IG compared with the CG. Many genera shifted in the same direction within both intervention groups, indicating an overall effect of the MedDiet. Decreases in Haemophilus, Coprococcus 3, and few other genera were associated with a decrease in adiposity parameters in both intervention groups. Changes in Lachnospiraceae NK4A136 were positively associated with changes in MedDiet adherence.

Conclusions

Weight loss induced by an energy-restricted MedDiet and physical activity induce changes in gut microbiota. The role of MedDiet-induced changes on the host might be via short-chain fatty acid producing bacteria, whereas with energy restriction, these changes might be modulated with other mechanisms, which need to be explored in future studies. This trial was registered at http://www.isrctn.com/ISRCTN89898870 as ISRCT 89898870.",2021-09-01 +34378468,Strategies to Prevent Readmissions to Hospital for COPD: A Systematic Review.,"Patients with chronic obstructive pulmonary disease (COPD) experience high rates of hospital readmissions, placing substantial clinical and economic strain on the healthcare system. Therefore, it is essential to implement evidence-based strategies for preventing these readmissions. The primary objective of our systematic review was to identify and describe the domains of existing primary research on strategies aimed at reducing hospital readmissions among adult patients with COPD. We also aimed to identify existing gaps in the literature to facilitate future research efforts. A total of 843 studies were captured by the initial search and 96 were included in the final review (25 randomized controlled trials, 37 observational studies, and 34 non-randomized interventional studies). Of the included studies, 72% (n = 69) were considered low risk of bias. The majority of included studies (n = 76) evaluated patient-level readmission prevention strategies (medication and other treatments (n = 25), multi-modal (n = 19), follow-up (n = 16), telehealth (n = 8), education and coaching (n = 8)). Fewer assessed broader system- (n = 13) and policy-level (n = 7) strategies. We observed a trend toward reduced all-cause readmissions with the use of medication and other treatments, as well as a trend toward reduced COPD-related readmissions with the use of multi-modal and broader scale system-level interventions. Notably, much of this evidence supported shorter-term (30-day) readmission outcomes, while little evidence was available for longer-term outcomes. These findings should be interpreted with caution, as considerable between-study heterogeneity was also identified. Overall, this review identified several evidence-based interventions for reducing readmissions among patients with COPD that should be targeted for future research.Supplemental data for this article is available online at https://doi.org/10.1080/15412555.2021.1955338 .",2021-08-11 +31304213,OakEcol: A database of Oak-associated biodiversity within the UK.,"Globally there is increasing concern about the decline in the health of oak Quercus trees. The impact of a decline in oak trees on associated biodiversity, species that utilize oak trees, is unknown. Here we collate a database of all known birds, bryophytes, fungi, invertebrates, lichens and mammals that use oak (Quercus petraea and Q. robur) in the UK. In total 2300 species are listed in the database. For each species we provide a level of association with oak, ranging from obligate (only found on oak) to cosmopolitan (found on a wide range of other tree species). Data on the ecology of each oak associated species was collated: part of tree used, use made of tree (feeding, roosting, breeding), age of tree, woodland type, tree form (coppice, pollarded, or natural growth form) and season when the tree was used. Data on use or otherwise by each of the 2300 species of 30 other tree species was also collated. A complete list of data sources is provided. For further insights into how this data can be used see Collapsing foundations: The ecology of the British oak, implications of its decline and mitigation options [1]. Data can be found at EIDC https://doi.org/10.5285/22b3d41e-7c35-4c51-9e55-0f47bb845202.",2019-06-10 +31026199,Poorer Speech Reception Threshold in Noise Is Associated With Lower Brain Volume in Auditory and Cognitive Processing Regions.,"Purpose Hearing loss is associated with changes in brain volume in regions supporting auditory and cognitive processing. The purpose of this study was to determine whether there is a systematic association between hearing ability and brain volume in cross-sectional data from a large nonclinical cohort of middle-aged adults available from the UK Biobank Resource ( http://www.ukbiobank.ac.uk ). Method We performed a set of regression analyses to determine the association between speech reception threshold in noise (SRTn) and global brain volume as well as predefined regions of interest (ROIs) based on T1-weighted structural images, controlling for hearing-related comorbidities and cognition as well as demographic factors. In a 2nd set of analyses, we additionally controlled for hearing aid (HA) use. We predicted statistically significant associations globally and in ROIs including auditory and cognitive processing regions, possibly modulated by HA use. Results Whole-brain gray matter volume was significantly lower for individuals with poorer SRTn. Furthermore, the volume of 9 predicted ROIs including both auditory and cognitive processing regions was lower for individuals with poorer SRTn. The greatest percentage difference (-0.57%) in ROI volume relating to a 1 SD worsening of SRTn was found in the left superior temporal gyrus. HA use did not substantially modulate the pattern of association between brain volume and SRTn. Conclusions In a large middle-aged nonclinical population, poorer hearing ability is associated with lower brain volume globally as well as in cortical and subcortical regions involved in auditory and cognitive processing, but there was no conclusive evidence that this effect is moderated by HA use. This pattern of results supports the notion that poor hearing leads to reduced volume in brain regions recruited during speech understanding under challenging conditions. These findings should be tested in future longitudinal, experimental studies. Supplemental Material https://doi.org/10.23641/asha.7949357.",2019-04-01 +,Improvement of the alkali stability of Penicillium cyclopium lipase by error-prone PCR,"Lipases are extensively exploited in lots of industrial fields; cold-adapted lipases with alkali-resistance are especially desired in detergent industry. Penicillium cyclopium lipase I (PCL) might be suitable for applications of detergent industry due to its high catalytic efficiency at low temperature and relatively good alkali stability. In this study, to better meet the requirements, the alkali stability of PCL was further improved via directed evolution with error-prone PCR.The mutant PCL (N157F) with an improved alkali stability was selected based on a high-throughput activity assay. After incubating at pH 11.0 for 120 min, N157F retained 70% of its initial activity, which was 23% higher than that of wild type PCL. Combined with the three-dimensional structure analysis, N157F exhibited an improved alkali stability under the high pH condition due to the interactions of hydrophilicity and β-strand propensity.This work provided the theoretical foundation and preliminary data for improving alkali stability of PCL to meet the industrial requirements, which is also beneficial to improving alkali-tolerance ability of other industrial enzymes via molecular modification.How to cite: Huang L, Zheng D, Zhao Y, et al. Improvement of the alkali stability of Penicillium cyclopium lipase by error-prone PCR. Electron J Biotechnol 2019;39. https://doi.org/10.1016/j.ejbt.2019.04.002",2019-05-01 +33532821,Detecting High Scoring Local Alignments in Pangenome Graphs. ,"Increasing amounts of individual genomes sequenced per species motivate the usage of pangenomic approaches. Pangenomes may be represented as graphical structures, e.g. compacted colored de Bruijn graphs, which offer a low memory usage and facilitate reference-free sequence comparisons. While sequence-to-graph mapping to graphical pangenomes has been studied for some time, no local alignment search tool in the vein of BLAST has been proposed yet. We present a new heuristic method to find maximum scoring local alignments of a DNA query sequence to a pangenome represented as a compacted colored de Bruijn graph. Our approach additionally allows a comparison of similarity among sequences within the pangenome. We show that local alignment scores follow an exponential-tail distribution similar to BLAST scores, and we discuss how to estimate its parameters to separate local alignments representing sequence homology from spurious findings. An implementation of our method is presented, and its performance and usability are shown. Our approach scales sublinearly in running time and memory usage with respect to the number of genomes under consideration. This is an advantage over classical methods that do not make use of sequence similarity within the pangenome. Source code and test data are available from https://gitlab.ub.uni-bielefeld.de/gi/plast. Supplementary data are available at Bioinformatics online.",2021-02-03 +33786017,The incubation period of coronavirus disease (COVID-19): A tremendous public health threat-Forecasting from publicly available case data in India.,"The World Health Organization (WHO) declared the Coronavirus Disease (COVID-19) a pandemic due to the huge upsurge in the number of reported cases worldwide. The COVID-19 pandemic in India has become a public health threat, and if we go by the number of confirmed cases then the situation seems to be a matter of grave concern. According to real-time data, the numbers of confirmed cases are growing exponentially. No doubt, substantial public health interventions both at the national and state levels are implemented immediately by the Government of India; there is a need for improved preparedness plans and mitigation strategies along with accurate forecasting. The present study aims to forecast the COVID-19 outbreak infected cases in India. The data have been obtained from https://www.covid19india.org, https://www.worldometers.info/coronavirus, and ICMR reported publicly available information about COVID-19 confirmation cases. We have used the double exponential smoothing method for forecasting the trends in terms of confirmed, active, recovered and death cases from COVID-19 for emergency preparedness and future predictions. Findings reveal that the estimated value of point forecast is just 8.22% of the total number of confirmed cases reported on a daily basis across the country. It was observed that the deaths were lower for the states and union territories with a higher detection rate. It is suggested that by keeping in view the limited healthcare resources in the country, accurate forecasting, early detection, and avoidance of acute care for the majority of infected cases is indispensable.",2021-02-03 +30964323,The Carcinogenome Project: In Vitro Gene Expression Profiling of Chemical Perturbations to Predict Long-Term Carcinogenicity.,"

Background

Most chemicals in commerce have not been evaluated for their carcinogenic potential. The de facto gold-standard approach to carcinogen testing adopts the 2-y rodent bioassay, a time-consuming and costly procedure. High-throughput in vitro assays are a promising alternative for addressing the limitations in carcinogen screening.

Objectives

We developed a screening process for predicting chemical carcinogenicity and genotoxicity and characterizing modes of actions (MoAs) using in vitro gene expression assays.

Methods

We generated a large toxicogenomics resource comprising [Formula: see text] expression profiles corresponding to 330 chemicals profiled in HepG2 (human hepatocellular carcinoma cell line) at multiple doses and replicates. Predictive models of carcinogenicity and genotoxicity were built using a random forest classifier. Differential pathway enrichment analysis was performed to identify pathways associated with carcinogen exposure. Signatures of carcinogenicity and genotoxicity were compared with external sources, including Drugmatrix and the Connectivity Map.

Results

Among profiles with sufficient bioactivity, our classifiers achieved 72.2% Area Under the ROC Curve (AUC) for predicting carcinogenicity and 82.3% AUC for predicting genotoxicity. Chemical bioactivity, as measured by the strength and reproducibility of the transcriptional response, was not significantly associated with long-term carcinogenicity in doses up to [Formula: see text]. However, sufficient bioactivity was necessary for a chemical to be used for prediction of carcinogenicity. Pathway enrichment analysis revealed pathways consistent with known pathways that drive cancer, including DNA damage and repair. The data is available at https://clue.io/CRCGN_ABC , and a portal for query and visualization of the results is accessible at https://carcinogenome.org .

Discussion

We demonstrated an in vitro screening approach using gene expression profiling to predict carcinogenicity and infer MoAs of chemical perturbations. https://doi.org/10.1289/EHP3986.",2019-04-01 +31641158,"Diat.barcode, an open-access curated barcode library for diatoms.","Diatoms (Bacillariophyta) are ubiquitous microalgae which produce a siliceous exoskeleton and which make a major contribution to the productivity of oceans and freshwaters. They display a huge diversity, which makes them excellent ecological indicators of aquatic ecosystems. Usually, diatoms are identified using characteristics of their exoskeleton morphology. DNA-barcoding is an alternative to this and the use of High-Throughput-Sequencing enables the rapid analysis of many environmental samples at a lower cost than analyses under microscope. However, to identify environmental sequences correctly, an expertly curated reference library is needed. Several curated libraries for protists exists; none, however are dedicated to diatoms. Diat.barcode is an open-access library dedicated to diatoms which has been maintained since 2012. Data come from two sources (1) the NCBI nucleotide database and (2) unpublished sequencing data of culture collections. Since 2017, several experts have collaborated to curate this library for rbcL, a chloroplast marker suitable for species-level identification of diatoms. For the latest version of the database (version 7), 605 of the 3482 taxonomical names originally assigned by the authors of the rbcL sequences were modified after curation. The database is accessible at https://www6.inra.fr/carrtel-collection_eng/Barcoding-database .",2019-10-22 +34456567,Nomogram for the Prediction of Intrahospital Mortality Risk of Patients with ST-Segment Elevation Myocardial Infarction Complicated with Hyperuricemia: A Multicenter Retrospective Study.,"

Purpose

This study aimed to establish an accurate and easy predictive model for ST-segment elevation myocardial infarction (STEMI) patients with hyperuricemia, using readily available features to estimate intrahospital mortality risk.

Patients and methods

This was a multicenter retrospective study involving the development of risk prediction models for intrahospital mortality among all STEMI patients with hyperuricemia from Zunyi Medical University Chest Pain Center's specialized alliance between January 1, 2016 and June 30, 2020. The primary outcome was intrahospital mortality. A total of 48 candidate variables were considered from demographic and clinical data. The least absolute shrinkage and selection operator (LASSO) was used to develop a nomogram. Concordance index values, decision curve analysis, the area under the curve (AUC), and clinical impact curves were examined. In this study, 489 patients with STEMI were included in the training dataset and an additional 209 patients from the 44 chest pain centers were included in the test cohort. B-type natriuretic peptides, α-hydroxybutyrate dehydrogenase (α-HBDH), cystatin C, out-of-hospital cardiac arrest (OHCA), shock index, and neutrophil-to-lymphocyte ratio were associated with intrahospital mortality and included in the nomogram.

Results

The model showed good discrimination power, and the AUC generated to predict survival in the training set was 0.875 (95% confidence interval, 0.825-0.925). In the validation set, the AUC of survival predictions was 0.87 (95% confidence interval, 0.792-0.947). Calibration plots and decision curve analysis showed good model performance in both datasets. A web-based calculator (https://bzxzmu.shinyapps.io/STEMI-with-Hyperuricemia-intrahospital-mortality/) was established based on the nomogram model, which was used to measure the levels of OHCA, neutrophil-to-lymphocyte ratio, shock index, α-HBDH, cystatin C, and B-type natriuretic peptides.

Conclusion

For practical applications, this model may prove clinically useful for personalized therapy management in patients with STEMI with hyperuricemia.",2021-08-21 +33137091,netboxr: Automated discovery of biological process modules by network analysis in R.,"

Summary

Large-scale sequencing projects, such as The Cancer Genome Atlas (TCGA) and the International Cancer Genome Consortium (ICGC), have generated high throughput sequencing and molecular profiling data sets, but it is still challenging to identify potentially causal changes in cellular processes in cancer as well as in other diseases in an automated fashion. We developed the netboxr package written in the R programming language, which makes use of the NetBox algorithm to identify candidate cancer-related functional modules. The algorithm makes use of a data-driven, network-based approach that combines prior knowledge with a network clustering algorithm, obviating the need for and the limitation of independently curated functionally labeled gene sets. The method can combine multiple data types, such as mutations and copy number alterations, leading to more reliable identification of functional modules. We make the tool available in the Bioconductor R ecosystem for applications in cancer research and cell biology.

Availability and implementation

The netboxr package is free and open-sourced under the GNU GPL-3 license R package available at https://www.bioconductor.org/packages/release/bioc/html/netboxr.html.",2020-11-02 +27701074,"CeNDR, the Caenorhabditis elegans natural diversity resource.","Studies in model organisms have yielded considerable insights into the etiology of disease and our understanding of evolutionary processes. Caenorhabditis elegans is among the most powerful model organisms used to understand biology. However, C. elegans is not used as extensively as other model organisms to investigate how natural variation shapes traits, especially through the use of genome-wide association (GWA) analyses. Here, we introduce a new platform, the C. elegans Natural Diversity Resource (CeNDR) to enable statistical genetics and genomics studies of C. elegans and to connect the results to human disease. CeNDR provides the research community with wild strains, genome-wide sequence and variant data for every strain, and a GWA mapping portal for studying natural variation in C. elegans Additionally, researchers outside of the C. elegans community can benefit from public mappings and integrated tools for comparative analyses. CeNDR uses several databases that are continually updated through the addition of new strains, sequencing data, and association mapping results. The CeNDR data are accessible through a freely available web portal located at http://www.elegansvariation.org or through an application programming interface.",2016-10-03 +33663822,Screening of specific nucleic acid targets for Cronobacter sakazakii and visual detection by loop-mediated isothermal amplification and lateral flow dipstick method in powdered infant formula.,"Due to the lack of specific genes for rapid detection methods of Cronobacter sakazakii in food samples, whole genome sequence analysis was performed in this investigation using the basic local alignment search tool. Forty-two DNA fragments unique to C. sakazakii were mined, then primers were designed and screened by PCR and loop-mediated isothermal amplification (LAMP). Two primer sets, CS1 and CS31, were found as specific and stable primers, with their corresponding nucleic acid targets the CSK29544_00235 gene and CSK29544_03484 gene, respectively. Furthermore, compared with 3 genes reported previously, these 2 genes were verified as more specific to C. sakazakii among Cronobacter species, by sequence similarity alignment using Cronobacter MLST databases (http://pubmlst.org/cronobacter). The specificity of the LAMP reaction approached 100% by using 48 bacterial strains, which included 22 C. sakazakii strains. Subsequently, LAMP was combined with visual lateral flow dipstick (LFD) based on the above 2 nucleic acid targets, and was demonstrated as a rapid, efficient method with high specificity. Finally, the detection sensitivity of this assay system for pure cultures and artificially contaminated milk was measured as 4.5 × 100 cfu/mL and 5.7 × 101 cfu/g, respectively. Total time to detection for this assay was within 2 h. Thus, the establishment of this LAMP-LFD method shows great significance and potential for rapid detection of C. sakazakii in powdered infant formula.",2021-03-02 +33982382,New perspectives on the calculation of bioaccumulation metrics for active substances in living organisms.,"Today, only few ready-to-use and convenient decision-making tools are available in ecotoxicology concerning accumulation and effects of chemical substances on organisms, accounting for exposure situations that are known to be complex (routes of exposure, metabolism, mixtures, etc.). This paper presents new perspectives on the generic calculation of bioaccumulation metrics via the innovative web tool MOSAICbioacc (http://mosaic.univ-lyon1.fr/bioacc). MOSAICbioacc provides all kinds of bioaccumulation metrics associated with their uncertainty whatever the species-compound combination. MOSAICbioacc expects accumulation-depuration data as inputs, even with complex exposure and clearance patterns, to quickly perform their relevant analysis. MOSAICbioacc intends to facilitate the daily work of regulators, or any ecotoxicologist, who will freely benefit from a user-friendly online interface that automatically fits toxicokinetic models without need for users to invest in the technical aspects to get bioaccumulation metrics estimates. MOSAICbioacc also provides all results in a fully transparent way to ensure reproducibility. Integr Environ Assess Manag 2022;18:10-18. © 2021 SETAC.",2021-06-01 +33119754,HeRA: an atlas of enhancer RNAs across human tissues.,"Enhancer RNA (eRNA) is a type of long non-coding RNA transcribed from DNA enhancer regions. Despite critical roles of eRNA in gene regulation, the expression landscape of eRNAs in normal human tissue remains unexplored. Using numerous samples from the Genotype-Tissue Expression project, we characterized 45 411 detectable eRNAs and identified tens of thousands of associations between eRNAs and traits, including gender, race, and age. We constructed a co-expression network to identify millions of putative eRNA regulators and target genes across different tissues. We further constructed a user-friendly data portal, Human enhancer RNA Atlas (HeRA, https://hanlab.uth.edu/HeRA/). In HeRA, users can search, browse, and download the eRNA expression profile, trait-related eRNAs, and eRNA co-expression network by searching the eRNA ID, gene symbol, and genomic region in one or multiple tissues. HeRA is the first data portal to characterize eRNAs from 9577 samples across 54 human tissues and facilitates functional and mechanistic investigations of eRNAs.",2021-01-01 +33983414,CNVfilteR: an R/bioconductor package to identify false positives produced by germline NGS CNV detection tools.,"Germline copy-number variants (CNVs) are relevant mutations for multiple genetics fields, such as the study of hereditary diseases. However, available benchmarks show that all next-generation sequencing (NGS) CNV calling tools produce false positives. We developed CNVfilteR, an R package that uses the single nucleotide variant calls usually obtained in germline NGS pipelines to identify those false positives. The package can detect both false deletions and false duplications. We evaluated CNVfilteR performance on callsets generated by 13 CNV calling tools on 3 whole-genome sequencing and 541 panel samples, showing a decrease of up to 44.8% in false positives and consistent F1-score increase. Using CNVfilteR to detect false-positive calls can improve the overall performance of existing CNV calling pipelines.

Availability

CNVfilteR is released under Artistic-2.0 License. Source code and documentation are freely available at Bioconductor (http://www.bioconductor.org/packages/CNVfilteR).

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-13 +33461215,Ligand-based approach for predicting drug targets and for virtual screening against COVID-19.,"Discovering efficient drugs and identifying target proteins are still an unmet but urgent need for curing coronavirus disease 2019 (COVID-19). Protein structure-based docking is a widely applied approach for discovering active compounds against drug targets and for predicting potential targets of active compounds. However, this approach has its inherent deficiency caused by e.g. various different conformations with largely varied binding pockets adopted by proteins, or the lack of true target proteins in the database. This deficiency may result in false negative results. As a complementary approach to the protein structure-based platform for COVID-19, termed as D3Docking in our previous work, we developed in this study a ligand-based method, named D3Similarity, which is based on the molecular similarity evaluation between the submitted molecule(s) and those in an active compound database. The database is constituted by all the reported bioactive molecules against the coronaviruses, viz., severe acute respiratory syndrome coronavirus (SARS), Middle East respiratory syndrome coronavirus (MERS), severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2), human betacoronavirus 2c EMC/2012 (HCoV-EMC), human CoV 229E (HCoV-229E) and feline infectious peritonitis virus (FIPV), some of which have target or mechanism information but some do not. Based on the two-dimensional (2D) and three-dimensional (3D) similarity evaluation of molecular structures, virtual screening and target prediction could be performed according to similarity ranking results. With two examples, we demonstrated the reliability and efficiency of D3Similarity by using 2D × 3D value as score for drug discovery and target prediction against COVID-19. The database, which will be updated regularly, is available free of charge at https://www.d3pharma.com/D3Targets-2019-nCoV/D3Similarity/index.php.",2021-03-01 +32911539,Anticoagulant therapy for splanchnic vein thrombosis: a systematic review and meta-analysis.,"Treatment of splanchnic vein thrombosis (SVT) is challenging, and evidence to guide therapeutic decisions remains scarce. The objective of this systematic review and meta-analysis was to determine the efficacy and safety of anticoagulant therapy for SVT. MEDLINE, EMBASE, and clinicaltrials.gov were searched from inception through December 2019, without language restrictions, to include observational studies and randomized controlled trials reporting radiological or clinical outcomes in patients with SVT. Pooled proportions and risk ratios (RRs) with 95% confidence intervals (CIs) were calculated in a random-effects model. Of 4312 records identified by the search, 97 studies including 7969 patients were analyzed. In patients receiving anticoagulation, the rates of SVT recanalization, SVT progression, recurrent venous thromboembolism (VTE), major bleeding, and overall mortality were 58% (95% CI, 51-64), 5% (95% CI, 3-7), 11% (95% CI, 8-15), 9% (95% CI, 7-12), and 11% (95% CI, 9-14), respectively. The corresponding values in patients without anticoagulation were 22% (95% CI, 15-31), 15% (95% CI, 8-27), 14% (95% CI, 9-21), 16% (95% CI, 13-20), and 25% (95% CI, 20-31). Compared with no treatment, anticoagulant therapy obtained higher recanalization (RR, 2.39; 95% CI, 1.66-3.44) and lower thrombosis progression (RR, 0.24; 95% CI, 0.13-0.42), major bleeding (RR, 0.73; 95% CI, 0.58-0.92), and overall mortality (RR, 0.45; 95% CI, 0.33-0.60). These results demonstrate that anticoagulant therapy improves SVT recanalization and reduces the risk of thrombosis progression without increasing major bleeding. The incidence of recurrent VTE remained substantial in patients receiving anticoagulation, as well. Effects were consistent across the different subgroups of patients. This trial was registered on the PROPERO database at (https://www.crd.york.ac.uk/prospero//display_record.php?ID=CRD42019127870) as #CRD42019127870.",2021-03-01 +34177338,MassBase: A large-scaled depository of mass spectrometry datasets for metabolome analysis.,"Depository of low-molecular-weight compounds or metabolites detected in various organisms in a non-targeted manner is indispensable for metabolomics research. Due to the diverse chemical compounds, various mass spectrometry (MS) setups with state-of-the-art technologies have been used. Over the past two decades, we have analyzed various biological samples by using gas chromatography-mass spectrometry, liquid chromatography-mass spectrometry, or capillary electrophoresis-mass spectrometry, and archived the datasets in the depository MassBase (http://webs2.kazusa.or.jp/massbase/). As the format of MS datasets depends on the MS setup used, we converted each raw binary dataset of the mass chromatogram to text file format, and thereafter, information of the chromatograph peak was extracted in the text file from the converted file. In total, the depository comprises 46,493 datasets, of which 38,750 belong to the plant species and 7,743 are authentic or mixed chemicals as well as other sources (microorganisms, animals, and foods), as on August 1, 2020. All files in the depository can be downloaded in bulk from the website. Mass chromatograms of 90 plant species obtained by LC-Fourier transform ion cyclotron resonance MS or Orbitrap MS, which detect the ionized molecules with high accuracy allowing speculation of chemical compositions, were converted to text files by the software PowerGet, and the chemical annotation of each peak was added. The processed datasets were deposited in the annotation database KomicMarket2 (http://webs2.kazusa.or.jp/km2/). The archives provide fundamental resources for comparative metabolomics and functional genomics, which may result in deeper understanding of living organisms.",2021-03-01 +33970665,Correction to Scarpina et al. (2020).,"Reports an error in ""Is bimanual interference affected in the case of a central proprioceptive loss? New insight from a left-brain-damaged single-case study"" by Federica Scarpina, Sofia Tagini, Marco Rabuffetti, Giovanni Albani, Francesca Garbarini and Alessandro Mauro (Neuropsychology, 2020[May], Vol 34[4], 479-492). In the article ""Is Bimanual Interference Affected in the Case of a Central Proprioceptive Loss? New Insight From a Left-Brain-Damaged Single-Case Study,"" by Federica Scarpina, Sofia Tagini, Marco Rabuffetti, Giovanni Albani, Francesca Garbarini, and Alessandro Mauro (Neuropsychology, 2020, Vol. 34, No. 4, pp. 479-492, https://doi.org/10.1037/neu0000624), in the author note, the Scientific Institute for Research, Hospitalization and Healthcare was incorrectly listed as the Scientific Hospitalization and Treatment Institute in the affiliations for Federica Scarpina, Giovanni Albani, and Alessandro Mauro, and was missing from the affiliation for Marco Rabuffetti. The online version of the article has been corrected. (The following abstract of the original article appeared in record 2020-15658-001.) Objective: It was suggested that the bimanual coupling effect might be linked to motor intentionality and planning, which are the top-down components of motor execution. However, previous results in pathological and healthy individuals have also underlined the pivotal role of bottom-up sensorimotor information.

Method

In this single-case study, the Circles-Lines Coupling Task was administered to a left-parietal-brain-damaged individual. The cerebral lesion caused a central proprioceptive loss, relative to the impaired right hand, when out of the visual control. For the 1st time in literature, we sought to investigate whether the movement of the unaffected hand induced an efficient coupling effect on the movement of the affected one. The bimanual task was performed in the presence and absence of visual input. The patient's performance was compared with that of healthy controls.

Results

We observed the traditional bimanual coupling effect in healthy controls. Moreover, we also replicated the effect when they performed the task blindfolded. In the case of the patient, both hands showed the typical ovalization of the line trajectory when the task was performed in visual modality. It is interesting that when the patient performed the task blindfolded, the trajectories of the impaired right hand seemed to be not influenced by the concomitant circular movement of the spared left hand.

Conclusions

The movement of the unaffected hand induced a bimanual coupling effect on the movement of the affected one only when the visual input was available. In absence of a visual feedback, the aberrant proprioceptive information might preclude the emerging of bimanual coupling, even in the case of a preserved motor intentionality and planning. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-03-01 +33647211,Cultural Adaptation and Validation of Speech Handicap Index: A Scoping Review.,"Objectives The Speech Handicap Index (SHI) is a self-reported speech-related quality of life assessment originally developed for measuring the psychosocial speech impact in patients with oral or pharyngeal cancer. This review article provides a scoping review of the literature on the validated SHI, with the purpose of identifying and documenting available studies and procedures for the cultural adaption and validation of SHI. Method Prime databases including PubMed, EMBASE, and Google Scholar were searched for journal publications reporting validation of the SHI. Reviews and reference cross-checking were performed using a priori selection criteria. A body of literature related to SHI was scoped and publication quality was categorized independently by two investigators. After applying all the screening criteria, articles that met the eligibility criteria were included in the review. Results The scoping review yielded 10 articles that met the inclusion criteria presenting the SHI in eight different languages, including Dutch, U.K. English, French, Korean, Simplified Chinese (Mandarin), Lithuanian, Italian, and European Portuguese. All of them reported validity, reliability, and translation method. Discussion and Conclusions High reliability and validity between various language versions of the SHI were identified. The current scoping review provides a useful summary and could be a helpful precursor to a systematic review on SHI in the future. Supplemental Material https://doi.org/10.23641/asha.14082704.",2021-03-01 +33416848,SARS-CoV-2 3D database: understanding the coronavirus proteome and evaluating possible drug targets.,"The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) is a rapidly growing infectious disease, widely spread with high mortality rates. Since the release of the SARS-CoV-2 genome sequence in March 2020, there has been an international focus on developing target-based drug discovery, which also requires knowledge of the 3D structure of the proteome. Where there are no experimentally solved structures, our group has created 3D models with coverage of 97.5% and characterized them using state-of-the-art computational approaches. Models of protomers and oligomers, together with predictions of substrate and allosteric binding sites, protein-ligand docking, SARS-CoV-2 protein interactions with human proteins, impacts of mutations, and mapped solved experimental structures are freely available for download. These are implemented in SARS CoV-2 3D, a comprehensive and user-friendly database, available at https://sars3d.com/. This provides essential information for drug discovery, both to evaluate targets and design new potential therapeutics.",2021-03-01 +33787871,Wormicloud: a new text summarization tool based on word clouds to explore the C. elegans literature. ,"Finding relevant information from newly published scientific papers is becoming increasingly difficult due to the pace at which articles are published every year as well as the increasing amount of information per paper. Biocuration and model organism databases provide a map for researchers to navigate through the complex structure of the biomedical literature by distilling knowledge into curated and standardized information. In addition, scientific search engines such as PubMed and text-mining tools such as Textpresso allow researchers to easily search for specific biological aspects from newly published papers, facilitating knowledge transfer. However, digesting the information returned by these systems-often a large number of documents-still requires considerable effort. In this paper, we present Wormicloud, a new tool that summarizes scientific articles in a graphical way through word clouds. This tool is aimed at facilitating the discovery of new experimental results not yet curated by model organism databases and is designed for both researchers and biocurators. Wormicloud is customized for the Caenorhabditis  elegans literature and provides several advantages over existing solutions, including being able to perform full-text searches through Textpresso, which provides more accurate results than other existing literature search engines. Wormicloud is integrated through direct links from gene interaction pages in WormBase. Additionally, it allows analysis on the gene sets obtained from literature searches with other WormBase tools such as SimpleMine and Gene Set Enrichment. Database URL: https://wormicloud.textpressolab.com.",2021-03-01 +33842609,Hot spots and trends in knee revision research since the 21st century: a bibliometric analysis.,"

Background

With the popularization of knee replacement surgery in the treatment of the advanced lesions of knee joint, the amount of knee revision surgery is increasing unceasingly. Meanwhile, the continuous introduction of new clinical concepts and new technology poses a challenge to researchers and surgeons. Our study aims to inform the future scientific research and clinical treatment, by investigating the hot spots and trends of the knee revision research field with the method of bibliometric analysis.

Methods

Publications on knee revision included in the database of Web of Science Core Collection (WoSCC) between 2000 and 2018 were reviewed and MeSH terms of them were extracted from PubMed. Online bibliometric analysis website (http://bibliometric.com/), two pieces of software called ""CiteSpace"" and ""Bibliographic Item Co-Occurrence Matrix Builder"" (BICOMB) were used to analyze the publications reviewed at quantitative level. Another piece of software called ""gCLUTO"", was used to investigate the hot spots with visualization techniques at qualitative level.

Results

A total of 906 publications were retrieved between 2000 and 2018. There is an increasing number of publications, from 15 in 2000 to 86 in 2018. Journal of Arthroplasty is the leading journal which has the most publications on knee revision. The United States has been the biggest contributor. Mayo Clinic became the leader among the institutions which have conducted correlational researches. David G. Lewallen, Robert L. Barrack and Michael A. Mont should be regarded as the scholars who have made outstanding contribution. Hot spots were summed up in six clusters, respectively, the solutions for infection, prostheses, the adverse effects, the surgical techniques, epidemiological characters, and the pathophysiology of the revision knee.

Conclusions

We found a growing trend in knee revision research and extracted the most contributive researchers, institutions, countries, journals, and most-cited articles worldwide. The solutions for complications, surgical applications and analysis for epidemiological characters have been the hot spots. Multi-disciplinary integration is becoming the time-trend of hot spots. Minimally invasive and navigation are directions of revision surgery. They together constitute a solid foundation and set up a fingerpost for the future scientific research and clinical treatment.",2021-03-01 +33693668,CMBD: a manually curated cancer metabolic biomarker knowledge database. ,"The pathogenesis of cancer is influenced by interactions among genes, proteins, metabolites and other small molecules. Understanding cancer progression at the metabolic level is propitious to the visual decoding of changes in living organisms. To date, a large number of metabolic biomarkers in cancer have been measured and reported, which provide an alternative method for cancer precision diagnosis, treatment and prognosis. To systematically understand the heterogeneity of cancers, we developed the database CMBD to integrate the cancer metabolic biomarkers scattered over literatures in PubMed. At present, CMBD contains 438 manually curated relationships between 282 biomarkers and 76 cancer subtypes of 18 tissues reported in 248 literatures. Users can access the comprehensive metabolic biomarker information about cancers, references, clinical samples and their relationships from our online database. As case studies, pathway analysis was performed on the metabolic biomarkers of breast and prostate cancers, respectively. 'Phenylalanine, tyrosine and tryptophan biosynthesis', 'phenylalanine metabolism' and 'primary bile acid biosynthesis' were identified as playing key roles in breast cancer. 'Glyoxylate and dicarboxylate metabolism', 'citrate cycle (TCA cycle)', and 'alanine, aspartate and glutamate metabolism' have important functions in prostate cancer. These findings provide us with an understanding of the metabolic pathway of cancer initiation and progression. Database URL: http://www.sysbio.org.cn/CMBD/.",2021-03-01 +33517377,Postdischarge interventions for children hospitalized with severe acute malnutrition: a systematic review and meta-analysis.,"

Background

Children hospitalized with severe acute malnutrition (SAM) have poor long-term outcomes following discharge, with high rates of mortality, morbidity, and impaired neurodevelopment. There is currently minimal guidance on how to support children with SAM following discharge from inpatient treatment.

Objectives

This systematic review and meta-analysis aimed to examine whether postdischarge interventions can improve outcomes in children recovering from complicated SAM.

Methods

Systematic searches of 4 databases were undertaken to identify studies of interventions delivered completely or partially after hospital discharge in children aged 6-59 mo, following inpatient treatment of SAM. The main outcome of interest was mortality. Random-effects meta-analysis was undertaken where ≥2 studies were sufficiently similar in intervention and outcome.

Results

Ten studies fulfilled the inclusion criteria, recruiting 39-1781 participants in 7 countries between 1975 and 2015. Studies evaluated provision of zinc (2 studies), probiotics or synbiotics (2 studies), antibiotics (1 study), pancreatic enzymes (1 study), and psychosocial stimulation (4 studies). Six studies had unclear or high risk of bias in ≥2 domains. Compared with standard care, pancreatic enzyme supplementation reduced inpatient mortality (37.8% compared with 18.6%, P < 0.05). In meta-analysis there was some evidence that prebiotics or synbiotics reduced mortality (RR: 0.72; 95% CI: 0.51, 1.00; P = 0.049). Psychosocial stimulation reduced mortality in meta-analysis of the 2 trials reporting deaths (RR: 0.36; 95% CI: 0.15, 0.87), and improved neurodevelopmental scores in ≥1 domain in all studies. There was no evidence that zinc reduced mortality in the single study reporting deaths. Antibiotics reduced infectious morbidity but did not reduce mortality.

Conclusions

Several biological and psychosocial interventions show promise in improving outcomes in children following hospitalization for SAM and require further exploration in larger randomized mortality trials. This study was registered with PROSPERO as CRD42018111342 (https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=111342).",2021-03-01 +33034338,Computer-aided prediction and design of IL-6 inducing peptides: IL-6 plays a crucial role in COVID-19.,"Interleukin 6 (IL-6) is a pro-inflammatory cytokine that stimulates acute phase responses, hematopoiesis and specific immune reactions. Recently, it was found that the IL-6 plays a vital role in the progression of COVID-19, which is responsible for the high mortality rate. In order to facilitate the scientific community to fight against COVID-19, we have developed a method for predicting IL-6 inducing peptides/epitopes. The models were trained and tested on experimentally validated 365 IL-6 inducing and 2991 non-inducing peptides extracted from the immune epitope database. Initially, 9149 features of each peptide were computed using Pfeature, which were reduced to 186 features using the SVC-L1 technique. These features were ranked based on their classification ability, and the top 10 features were used for developing prediction models. A wide range of machine learning techniques has been deployed to develop models. Random Forest-based model achieves a maximum AUROC of 0.84 and 0.83 on training and independent validation dataset, respectively. We have also identified IL-6 inducing peptides in different proteins of SARS-CoV-2, using our best models to design vaccine against COVID-19. A web server named as IL-6Pred and a standalone package has been developed for predicting, designing and screening of IL-6 inducing peptides (https://webs.iiitd.edu.in/raghava/il6pred/).",2021-03-01 +32810205,"Development of Proxy and Self-report Burn Model System Pediatric Itch Interference Scales: A National Institute on Disability, Independent Living, and Rehabilitation Research Burn Model System Study.","Pruritus is a commonly reported symptom after burn injury. Valid and reliable scales to measure itch in pediatric burn survivors are important for treatment and epidemiological studies. This study sought to develop psychometrically sound, publicly available self- and proxy-report measures of itch for use in pediatric burn survivors suitable for use in research and clinical practice. A panel of burn experts developed a definition of itch interference and a set of parallel self- and proxy-report candidate items that covered important activities affected by itch. Candidate items were evaluated in cognitive interviews with pediatric burn survivors (n = 4) and proxies (n = 2). Items were translated to Spanish and administered in both English and Spanish to a sample (N = 264) of pediatric burn survivors and/or their proxy enrolled in the Burn Model System (BMS) longitudinal database. The mean age of the pediatric sample was 13 years and average time since burn 5 years. The final itch interference measures each included 5 parallel items calibrated using a one-parameter graded response item response theory model, with a mean of 50 representing the average itch interference of the sample. Reliability of the scores is excellent between the mean and two standard deviations above. Initial analyses provide support for validity of the score. Concordance between the self- and proxy-report scores was moderate (ICC = 0.68). The results support the reliability and validity of the itch scale in children and youth with burn injury. The new BMS Pediatric Itch Interference scales are freely and publicly available at https://burndata.washington.edu/itch.",2021-03-01 +32807955,ReDU: a framework to find and reanalyze public mass spectrometry data.,"We present ReDU ( https://redu.ucsd.edu/ ), a system for metadata capture of public mass spectrometry-based metabolomics data, with validated controlled vocabularies. Systematic capture of knowledge enables the reanalysis of public data and/or co-analysis of one's own data. ReDU enables multiple types of analyses, including finding chemicals and associated metadata, comparing the shared and different chemicals between groups of samples, and metadata-filtered, repository-scale molecular networking.",2020-08-17 +33604756,"GW-SEM 2.0: Efficient, Flexible, and Accessible Multivariate GWAS.","Most genome-wide association study (GWAS) analyses test the association between single-nucleotide polymorphisms (SNPs) and a single trait or outcome. While valuable second-step analyses of these associations (e.g., calculating genetic correlations between traits) are common, single-step multivariate analyses of GWAS data are rarely performed. This is unfortunate because multivariate analyses can reveal information which is irrevocably obscured in multi-step analysis. One simple example is the distinction between variance common to a set of measures, and variance specific to each. Neither GWAS of sum- or factor-scores, nor GWAS of the individual measures will deliver a clean picture of loci associated with each measure's specific variance. While multivariate GWAS opens up a broad new landscape of feasible and informative analyses, its adoption has been slow, likely due to the heavy computational demands and difficulties specifying models it requires. Here we describe GW-SEM 2.0, which is designed to simplify model specification and overcome the inherent computational challenges associated with multivariate GWAS. In addition, GW-SEM 2.0 allows users to accurately model ordinal items, which are common in behavioral and psychological research, within a GWAS context. This new release enhances computational efficiency, allows users to select the fit function that is appropriate for their analyses, expands compatibility with standard genomic data formats, and outputs results for seamless reading into other standard post-GWAS processing software. To demonstrate GW-SEM's utility, we conducted (1) a series of GWAS using three substance use frequency items from data in the UK Biobank, (2) a timing study for several predefined GWAS functions, and (3) a Type I Error rate study. Our multivariate GWAS analyses emphasize the utility of GW-SEM for identifying novel patterns of associations that vary considerably between genomic loci for specific substances, highlighting the importance of differentiating between substance-specific use behaviors and polysubstance use. The timing studies demonstrate that the analyses take a reasonable amount of time and show the cost of including additional items. The Type I Error rate study demonstrates that hypothesis tests for genetic associations with latent variable models follow the hypothesized uniform distribution. Taken together, we suggest that GW-SEM may provide substantially deeper insights into the underlying genomic architecture for multivariate behavioral and psychological systems than is currently possible with standard GWAS methods. The current release of GW-SEM 2.0 is available on CRAN (stable release) and GitHub (beta release), and tutorials are available on our github wiki ( https://jpritikin.github.io/gwsem/ ).",2021-02-19 +27747157,Brain transcriptomes of honey bees (Apis mellifera) experimentally infected by two pathogens: Black queen cell virus and Nosema ceranae.,"Regulation of gene expression in the brain plays an important role in behavioral plasticity and decision making in response to external stimuli. However, both can be severely affected by environmental factors, such as parasites and pathogens. In honey bees, the emergence and re-emergence of pathogens and potential for pathogen co-infection and interaction have been suggested as major components that significantly impaired social behavior and survival. To understand how the honey bee is affected and responds to interacting pathogens, we co-infected workers with two prevalent pathogens of different nature, the positive single strand RNA virus Black queen cell virus (BQCV), and the Microsporidia Nosema ceranae, and explored gene expression changes in brains upon single infections and co-infections. Our data provide an important resource for research on honey bee diseases, and more generally on insect host-pathogen and pathogen-pathogen interactions. Raw and processed data are publicly available in the NCBI/GEO database: (http://www.ncbi.nlm.nih.gov/geo/) under accession number GSE81664.",2016-09-28 +33313775,Prediction of protein-carbohydrate complex binding affinity using structural features. ,"Protein-carbohydrate interactions play a major role in several cellular and biological processes. Elucidating the factors influencing the binding affinity of protein-carbohydrate complexes and predicting their free energy of binding provide deep insights for understanding the recognition mechanism. In this work, we have collected the experimental binding affinity data for a set of 389 protein-carbohydrate complexes and derived several structure-based features such as contact potentials, interaction energy, number of binding residues and contacts between different types of atoms. Our analysis on the relationship between binding affinity and structural features revealed that the important factors depend on the type of the complex based on number of carbohydrate and protein chains. Specifically, binding site residues, accessible surface area, interactions between various atoms and energy contributions are important to understand the binding affinity. Further, we have developed multiple regression equations for predicting the binding affinity of protein-carbohydrate complexes belonging to six categories of protein-carbohydrate complexes. Our method showed an average correlation and mean absolute error of 0.731 and 1.149 kcal/mol, respectively, between experimental and predicted binding affinities on a jackknife test. We have developed a web server PCA-Pred, Protein-Carbohydrate Affinity Predictor, for predicting the binding affinity of protein-carbohydrate complexes. The web server is freely accessible at https://web.iitm.ac.in/bioinfo2/pcapred/. The web server is implemented using HTML and Python and supports recent versions of major browsers such as Chrome, Firefox, IE10 and Opera.",2021-07-01 +32901028,A global-scale data set of mining areas.,"The area used for mineral extraction is a key indicator for understanding and mitigating the environmental impacts caused by the extractive sector. To date, worldwide data products on mineral extraction do not report the area used by mining activities. In this paper, we contribute to filling this gap by presenting a new data set of mining extents derived by visual interpretation of satellite images. We delineated mining areas within a 10 km buffer from the approximate geographical coordinates of more than six thousand active mining sites across the globe. The result is a global-scale data set consisting of 21,060 polygons that add up to 57,277 km2. The polygons cover all mining above-ground features that could be identified from the satellite images, including open cuts, tailings dams, waste rock dumps, water ponds, and processing infrastructure. The data set is available for download from https://doi.org/10.1594/PANGAEA.910894 and visualization at www.fineprint.global/viewer .",2020-09-08 +34009334,Vaxign2: the second generation of the first Web-based vaccine design program using reverse vaccinology and machine learning.,"Vaccination is one of the most significant inventions in medicine. Reverse vaccinology (RV) is a state-of-the-art technique to predict vaccine candidates from pathogen's genome(s). To promote vaccine development, we updated Vaxign2, the first web-based vaccine design program using reverse vaccinology with machine learning. Vaxign2 is a comprehensive web server for rational vaccine design, consisting of predictive and computational workflow components. The predictive part includes the original Vaxign filtering-based method and a new machine learning-based method, Vaxign-ML. The benchmarking results using a validation dataset showed that Vaxign-ML had superior prediction performance compared to other RV tools. Besides the prediction component, Vaxign2 implemented various post-prediction analyses to significantly enhance users' capability to refine the prediction results based on different vaccine design rationales and considerably reduce user time to analyze the Vaxign/Vaxign-ML prediction results. Users provide proteome sequences as input data, select candidates based on Vaxign outputs and Vaxign-ML scores, and perform post-prediction analysis. Vaxign2 also includes precomputed results from approximately 1 million proteins in 398 proteomes of 36 pathogens. As a demonstration, Vaxign2 was used to effectively analyse SARS-CoV-2, the coronavirus causing COVID-19. The comprehensive framework of Vaxign2 can support better and more rational vaccine design. Vaxign2 is publicly accessible at http://www.violinet.org/vaxign2.",2021-07-01 +33048108,A comprehensive comparison of residue-level methylation levels with the regression-based gene-level methylation estimations by ReGear. ,"DNA methylation is a biological process impacting the gene functions without changing the underlying DNA sequence. The DNA methylation machinery usually attaches methyl groups to some specific cytosine residues, which modify the chromatin architectures. Such modifications in the promoter regions will inactivate some tumor-suppressor genes. DNA methylation within the coding region may significantly reduce the transcription elongation efficiency. The gene function may be tuned through some cytosines are methylated. This study hypothesizes that the overall methylation level across a gene may have a better association with the sample labels like diseases than the methylations of individual cytosines. The gene methylation level is formulated as a regression model using the methylation levels of all the cytosines within this gene. A comprehensive evaluation of various feature selection algorithms and classification algorithms is carried out between the gene-level and residue-level methylation levels. A comprehensive evaluation was conducted to compare the gene and cytosine methylation levels for their associations with the sample labels and classification performances. The unsupervised clustering was also improved using the gene methylation levels. Some genes demonstrated statistically significant associations with the class label, even when no residue-level methylation features have statistically significant associations with the class label. So in summary, the trained gene methylation levels improved various methylome-based machine learning models. Both methodology development of regression algorithms and experimental validation of the gene-level methylation biomarkers are worth of further investigations in the future studies. The source code, example data files and manual are available at http://www.healthinformaticslab.org/supp/.",2021-07-01 +33367506,Deep forest ensemble learning for classification of alignments of non-coding RNA sequences based on multi-view structure representations. ,"Non-coding RNAs (ncRNAs) play crucial roles in multiple biological processes. However, only a few ncRNAs' functions have been well studied. Given the significance of ncRNAs classification for understanding ncRNAs' functions, more and more computational methods have been introduced to improve the classification automatically and accurately. In this paper, based on a convolutional neural network and a deep forest algorithm, multi-grained cascade forest (GcForest), we propose a novel deep fusion learning framework, GcForest fusion method (GCFM), to classify alignments of ncRNA sequences for accurate clustering of ncRNAs. GCFM integrates a multi-view structure feature representation including sequence-structure alignment encoding, structure image representation and shape alignment encoding of structural subunits, enabling us to capture the potential specificity between ncRNAs. For the classification of pairwise alignment of two ncRNA sequences, the F-value of GCFM improves 6% than an existing alignment-based method. Furthermore, the clustering of ncRNA families is carried out based on the classification matrix generated from GCFM. Results suggest better performance (with 20% accuracy improved) than existing ncRNA clustering methods (RNAclust, Ensembleclust and CNNclust). Additionally, we apply GCFM to construct a phylogenetic tree of ncRNA and predict the probability of interactions between RNAs. Most ncRNAs are located correctly in the phylogenetic tree, and the prediction accuracy of RNA interaction is 90.63%. A web server (http://bmbl.sdstate.edu/gcfm/) is developed to maximize its availability, and the source code and related data are available at the same URL.",2021-07-01 +32753501,Bactopia: a Flexible Pipeline for Complete Analysis of Bacterial Genomes. ,"Sequencing of bacterial genomes using Illumina technology has become such a standard procedure that often data are generated faster than can be conveniently analyzed. We created a new series of pipelines called Bactopia, built using Nextflow workflow software, to provide efficient comparative genomic analyses for bacterial species or genera. Bactopia consists of a data set setup step (Bactopia Data Sets [BaDs]), which creates a series of customizable data sets for the species of interest, the Bactopia Analysis Pipeline (BaAP), which performs quality control, genome assembly, and several other functions based on the available data sets and outputs the processed data to a structured directory format, and a series of Bactopia Tools (BaTs) that perform specific postprocessing on some or all of the processed data. BaTs include pan-genome analysis, computing average nucleotide identity between samples, extracting and profiling the 16S genes, and taxonomic classification using highly conserved genes. It is expected that the number of BaTs will increase to fill specific applications in the future. As a demonstration, we performed an analysis of 1,664 public Lactobacillus genomes, focusing on Lactobacillus crispatus, a species that is a common part of the human vaginal microbiome. Bactopia is an open source system that can scale from projects as small as one bacterial genome to ones including thousands of genomes and that allows for great flexibility in choosing comparison data sets and options for downstream analysis. Bactopia code can be accessed at https://www.github.com/bactopia/bactopiaIMPORTANCE It is now relatively easy to obtain a high-quality draft genome sequence of a bacterium, but bioinformatic analysis requires organization and optimization of multiple open source software tools. We present Bactopia, a pipeline for bacterial genome analysis, as an option for processing bacterial genome data. Bactopia also automates downloading of data from multiple public sources and species-specific customization. Because the pipeline is written in the Nextflow language, analyses can be scaled from individual genomes on a local computer to thousands of genomes using cloud resources. As a usage example, we processed 1,664 Lactobacillus genomes from public sources and used comparative analysis workflows (Bactopia Tools) to identify and analyze members of the L. crispatus species.",2020-08-04 +29893754,Pharmacological and genomic profiling of neurofibromatosis type 1 plexiform neurofibroma-derived schwann cells.,"Neurofibromatosis type I (NF1) is an autosomal dominant genetic condition characterized by peripheral nervous system tumors (PNSTs), including plexiform neurofibromas (pNFs) that cause nerve dysfunction, deformity, pain damage to adjacent structures, and can undergo malignant transformation. There are no effective therapies to prevent or treat pNFs. Drug discovery efforts are slowed by the 'benign' nature of the Schwann cells that are the progenitor cells of pNF. In this work we characterize a set of pNF-derived cell lines at the genomic level (via SNP Arrays, RNAseq, and Whole Exome- Sequencing), and carry out dose response-based quantitative high-throughput screening (qHTS) with a collection of 1,912 oncology-focused compounds in a 1536-well microplate cell proliferation assays. Through the characterization and screening of NF1-/-, NF1+/+ and NF1+/- Schwann cell lines, this resource introduces novel therapeutic avenues for the development for NF1 associated pNF as well as all solid tumors with NF1 somatic mutations. The integrated data sets are openly available for further analysis at http://www.synapse.org/pnfCellCulture.",2018-06-12 +27899673,DiseaseMeth version 2.0: a major expansion and update of the human disease methylation database.,"The human disease methylation database (DiseaseMeth, http://bioinfo.hrbmu.edu.cn/diseasemeth/) is an interactive database that aims to present the most complete collection and annotation of aberrant DNA methylation in human diseases, especially various cancers. Recently, the high-throughput microarray and sequencing technologies have promoted the production of methylome data that contain comprehensive knowledge of human diseases. In this DiseaseMeth update, we have increased the number of samples from 3610 to 32 701, the number of diseases from 72 to 88 and the disease-gene associations from 216 201 to 679 602. DiseaseMeth version 2.0 provides an expanded comprehensive list of disease-gene associations based on manual curation from experimental studies and computational identification from high-throughput methylome data. Besides the data expansion, we also updated the search engine and visualization tools. In particular, we enhanced the differential analysis tools, which now enable online automated identification of DNA methylation abnormalities in human disease in a case-control or disease-disease manner. To facilitate further mining of the disease methylome, three new web tools were developed for cluster analysis, functional annotation and survival analysis. DiseaseMeth version 2.0 should be a useful resource platform for further understanding the molecular mechanisms of human diseases.",2016-11-29 +27766955,e-GRASP: an integrated evolutionary and GRASP resource for exploring disease associations.,"

Background

Genome-wide association studies (GWAS) have become a mainstay of biological research concerned with discovering genetic variation linked to phenotypic traits and diseases. Both discrete and continuous traits can be analyzed in GWAS to discover associations between single nucleotide polymorphisms (SNPs) and traits of interest. Associations are typically determined by estimating the significance of the statistical relationship between genetic loci and the given trait. However, the prioritization of bona fide, reproducible genetic associations from GWAS results remains a central challenge in identifying genomic loci underlying common complex diseases. Evolutionary-aware meta-analysis of the growing GWAS literature is one way to address this challenge and to advance from association to causation in the discovery of genotype-phenotype relationships.

Description

We have created an evolutionary GWAS resource to enable in-depth query and exploration of published GWAS results. This resource uses the publically available GWAS results annotated in the GRASP2 database. The GRASP2 database includes results from 2082 studies, 177 broad phenotype categories, and ~8.87 million SNP-phenotype associations. For each SNP in e-GRASP, we present information from the GRASP2 database for convenience as well as evolutionary information (e.g., rate and timespan). Users can, therefore, identify not only SNPs with highly significant phenotype-association P-values, but also SNPs that are highly replicated and/or occur at evolutionarily conserved sites that are likely to be functionally important. Additionally, we provide an evolutionary-adjusted SNP association ranking (E-rank) that uses cross-species evolutionary conservation scores and population allele frequencies to transform P-values in an effort to enhance the discovery of SNPs with a greater probability of biologically meaningful disease associations.

Conclusion

By adding an evolutionary dimension to the GWAS results available in the GRASP2 database, our e-GRASP resource will enable a more effective exploration of SNPs not only by the statistical significance of trait associations, but also by the number of studies in which associations have been replicated, and the evolutionary context of the associated mutations. Therefore, e-GRASP will be a valuable resource for aiding researchers in the identification of bona fide, reproducible genetic associations from GWAS results. This resource is freely available at http://www.mypeg.info/egrasp .",2016-10-17 +32360208,Interactions between antiretroviral therapy and complementary and alternative medicine: a narrative review.,"

Background

The use of complementary and alternative medicine including herbal medicine (phytotherapy), vitamins, minerals and food supplements is frequent among people living with HIV/AIDS (PLWHAs) who take antiretroviral (ARV) drugs, but is often not known by their prescribing physicians. Some drug-supplement combinations may result in clinically meaningful interactions.

Aims

In this literature review, we aimed to investigate the evidence for complementary and alternative medicine interactions with ARVs.

Sources

A bibliographic search of all in vitro, human studies and case reports of the PubMed database was performed to assess the risk of interactions between complementary and alternative self-medication products and ARVs. The 'HIV drug interaction' (https://www.hiv-druginteractions.org) and 'Natural medicines comprehensive database' (https://naturalmedicines.therapeuticresearch.com) interaction checkers were also analysed.

Content

St John's wort, some forms of garlic, grapefruit and red rice yeast are known to have significant interaction and thus should not be co-administered, or should be used with caution with certain ARV classes. Data on other plant-based supplements come from in vitro studies or very small size in vivo studies and are thus insufficient to conclude the real in vivo impact in case of concomitant administration with ARVs. Some polyvalent minerals such as calcium, magnesium, and iron salts can reduce the absorption of integrase inhibitors by chelation. Potential interactions with vitamin C and quercetin with some ARVs should be noted and efficacy and tolerance of the treatment should be monitored.

Implications

This review shows the importance of screening all PLWHAs for complementary and alternative medicine use to prevent treatment failure or adverse effects related to an interaction with ARVs. Further human studies are warranted to describe the clinical significance of in vitro interactions between numerous complementary and alternative medicine and ARVs.",2020-04-28 +31684815,Finite Verb Morphology Composite Between Age 4 and Age 9 for the Edmonton Narrative Norms Instrument: Reference Data and Psychometric Properties.,"Purpose The purpose of this study was to provide reference data and evaluate the psychometric properties for the finite verb morphology composite (FVMC) measure in children between 4 and 9 years of age from the database of the Edmonton Narrative Norms Instrument (ENNI; Schneider, Dubé, & Hayward, 2005). Method Participants included 377 children between age 4 and age 9, including 300 children with typical language and 77 children with language impairment (LI). Narrative samples were collected using a story generation task. FVMC scores were computed from the samples. Split-half reliability, concurrent criterion validity, and diagnostic accuracy for FVMC were further evaluated. Results Children's performance on FVMC increased significantly between age 4 and age 9 in the typical language and LI groups. Moreover, the correlation coefficients for the split-half reliability and concurrent criterion validity of FVMC were medium to large (rs ≥ .429, ps < .001) at each age level. The diagnostic accuracy of FVMC was good or acceptable from age 4 to age 7, but it dropped to a poor level at age 8 and age 9. Conclusion With the empirical evidence, FVMC is appropriate for identifying children with LI between age 4 and age 7. The reference data of FVMC could also be used for monitoring treatment progress. Supplemental Material https://doi.org/10.23641/asha.10073183.",2019-11-04 +31324861,"SMAC, a computational system to link literature, biomedical and expression data.","High-throughput technologies have produced a large amount of experimental and biomedical data creating an urgent need for comprehensive and automated mining approaches. To meet this need, we developed SMAC (SMart Automatic Classification method): a tool to extract, prioritise, integrate and analyse biomedical and molecular data according to user-defined terms. The robust ranking step performed on Medical Subject Headings (MeSH) ensures that papers are prioritised based on specific user requirements. SMAC then retrieves any related molecular data from the Gene Expression Omnibus and performs a wide range of bioinformatics analyses to extract biological insights. These features make SMAC a robust tool to explore the literature around any biomedical topic. SMAC can easily be customised/expanded and is distributed as a Docker container ( https://hub.docker.com/r/hfx320/smac ) ready-to-use on Windows, Mac and Linux OS. SMAC's functionalities have already been adapted and integrated into the Breast Cancer Now Tissue Bank bioinformatics platform and the Pancreatic Expression Database.",2019-07-19 +27671474,ANTISTAPHYBASE: database of antimicrobial peptides (AMPs) and essential oils (EOs) against methicillin-resistant Staphylococcus aureus (MRSA) and Staphylococcus aureus.,"Staphylococcus aureus and methicillin-resistant S. aureus are major pathogens. The antimicrobial peptides and essential oils (EOs) display narrow- or broad-spectrum activity against bacteria including these strains. A centralized resource, such as a database, designed specifically for anti-S. aureus/anti-methicillin-resistant S. aureus antimicrobial peptides and EOs is therefore needed to facilitate the comprehensive investigation of their structure/activity associations and combinations. The database ANTISTAPHYBASE is created to facilitate access to important information on antimicrobial peptides and essential peptides against methicillin-resistant S. aureus and S. aureus. At the moment, the database contains 596 sequences of antimicrobial peptides produced by diverse organisms and 287 essential oil records. It permits a quick and easy search of peptides based on their activity as well as their general, physicochemical properties and literature data. These data are very useful to perform further bioinformatic or chemometric analysis and would certainly be useful for the development of new drugs for medical use. The ANTISTAPHYBASE database is freely available at: https://www.antistaphybase.com/ .",2016-09-26 +33884222,Chemo-Preventive Effect of Vegetables and Fruits Consumption on the COVID-19 Pandemic.,"Coronavirus disease 2019 (COVID-19) is a new disease caused by the novel severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). It is a global pandemic that has claimed the death of 1,536,957 human beings worldwide including 287,842 deaths in the United States as of December 3, 2020. It has become a major threat to the medical community and the entire healthcare system in every part of the world. Recently, the Food and Drug Administration (FDA) has approved the emergency use of Pfizer and Moderna COVID-19 vaccine on December 12, 2020. However, there are concern about the new COVID-19 vaccine safety, efficacy, and immunity after the vaccination. In addition, both coronavirus and COVID-19 vaccine are new at this point and there is no scientific evidence to know whether people who are vaccinated can still carry the COVID 19 pathogens and pass them along to others. Therefore, many people all over the world have an increased interest in consuming more VF for the purpose of maintaining their health and boosting their immune system. Identifying novel antiviral agents for COVID-19 is of critical importance, and VF is an excellent source for drug discovery and therapeutic development. The objective of this study is to test the hypothesis that a high intake of vegetables and/or fruits prevents COVID-19 incidence and reduces the mortality rate. To achieve this objective, we collected the diet data of COVID-19 from Kaggle (https://www.kaggle.com/mariaren/covid19-healthy-diet-dataset), and used a machine-learning algorithm to examine the effects of different food types on COVID-19 incidences and deaths. Specifically, we used the feature selection method to identify the factors (e.g., diet-related factors) that contribute to COVID-19 morbidity and mortality. Data generated from the study demonstrated that VF intake can help to combat the SARS-CoV-2. Taken together, VF may be potential chemopreventive agents for COVID-19 due to their antiviral properties and their ability to boost the human body immune system.",2021-03-25 +,PSIII-22 Performance of DNA 600 Duroc sired pigs when split sex fed with commercial diets with or without a blend of phytonutrients (Lean Fuel),"Abstract The objective was to evaluate the effect of a blend of phytonutrients (Lean Fuel, LF) on performance of pigs in late finishing in a commercial research barn. A total of 590 DNA 600 Duroc finishing pigs (BW=75.7 + 1.0 kg) were blocked by weight and sex and allocated across two dietary treatments with 6 replications per treatment and 21 to 26 pigs per pen. Dietary treatments were: barrow (B) diets with and without LF and gilt (G) diets with and without LF. Diets were formulated to split-sex requirements (CON) for each period and LF diets were control diets + 0.125% LF. The experiment was 44-d where d 0 was 98-d post-wean. All data were analyzed using the MIXED procedure of SAS as a randomized complete block design as a 2 x 2 (diet x sex) factorial arrangement. Pen served as the experimental unit. Overall (d 0-44), the B gained more weight (P = 0.0282) and consumed more feed (P > 0.10) in ADFI. There was no diet x sex interaction (P > 0.10) for ADG. There was a tendency for an interaction (P = 0.0545) for ADFI, where LF B consumed more feed compared to CON B and CON G consumed more feed compared to LF G. There was an interaction for G:F (P = 0.0028) where G on LF had higher G:F compared to G on CON whereas G:F for B was not different to B on LF. In conclusion, LF improved ADG and G:F, but did so differently for G and B. http://www.conferenceharvester.com/",2019-07-01 +31527858,Estimating cumulative point prevalence of rare diseases: analysis of the Orphanet database.,"Rare diseases, an emerging global public health priority, require an evidence-based estimate of the global point prevalence to inform public policy. We used the publicly available epidemiological data in the Orphanet database to calculate such a prevalence estimate. Overall, Orphanet contains information on 6172 unique rare diseases; 71.9% of which are genetic and 69.9% which are exclusively pediatric onset. Global point prevalence was calculated using rare disease prevalence data for predefined geographic regions from the 'Orphanet Epidemiological file' (http://www.orphadata.org/cgi-bin/epidemio.html). Of the 5304 diseases defined by point prevalence, 84.5% of those analysed have a point prevalence of <1/1 000 000. However 77.3-80.7% of the population burden of rare diseases is attributable to the 4.2% (n = 149) diseases in the most common prevalence range (1-5 per 10 000). Consequently national definitions of 'Rare Diseases' (ranging from prevalence of 5 to 80 per 100 000) represent a variable number of rare disease patients despite sharing the majority of rare disease in their scope. Our analysis yields a conservative, evidence-based estimate for the population prevalence of rare diseases of 3.5-5.9%, which equates to 263-446 million persons affected globally at any point in time. This figure is derived from data from 67.6% of the prevalent rare diseases; using the European definition of 5 per 10 000; and excluding rare cancers, infectious diseases, and poisonings. Future registry research and the implementation of rare disease codification in healthcare systems will further refine the estimates.",2019-09-16 +27822553,mockrobiota: a Public Resource for Microbiome Bioinformatics Benchmarking. ,"Mock communities are an important tool for validating, optimizing, and comparing bioinformatics methods for microbial community analysis. We present mockrobiota, a public resource for sharing, validating, and documenting mock community data resources, available at http://caporaso-lab.github.io/mockrobiota/. The materials contained in mockrobiota include data set and sample metadata, expected composition data (taxonomy or gene annotations or reference sequences for mock community members), and links to raw data (e.g., raw sequence data) for each mock community data set. mockrobiota does not supply physical sample materials directly, but the data set metadata included for each mock community indicate whether physical sample materials are available. At the time of this writing, mockrobiota contains 11 mock community data sets with known species compositions, including bacterial, archaeal, and eukaryotic mock communities, analyzed by high-throughput marker gene sequencing. IMPORTANCE The availability of standard and public mock community data will facilitate ongoing method optimizations, comparisons across studies that share source data, and greater transparency and access and eliminate redundancy. These are also valuable resources for bioinformatics teaching and training. This dynamic resource is intended to expand and evolve to meet the changing needs of the omics community.",2016-09-01 +32805048,mixtureS: a novel tool for bacterial strain genome reconstruction from reads.,"

Motivation

It is essential to study bacterial strains in environmental samples. Existing methods and tools often depend on known strains or known variations, cannot work on individual samples, not reliable, or not easy to use, etc. It is thus important to develop more user-friendly tools that can identify bacterial strains more accurately.

Results

We developed a new tool called mixtureS that can de novo identify bacterial strains from shotgun reads of a clonal or metagenomic sample, without prior knowledge about the strains and their variations. Tested on 243 simulated datasets and 195 experimental datasets, mixtureS reliably identified the strains, their numbers and their abundance. Compared with three tools, mixtureS showed better performance in almost all simulated datasets and the vast majority of experimental datasets.

Availability and implementation

The source code and tool mixtureS is available at http://www.cs.ucf.edu/˜xiaoman/mixtureS/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +34363617,A randomized study of IV prochlorperazine plus diphenhydramine versus IV hydromorphone for migraine-associated symptoms: A post hoc analysis.,"

Objective

We conducted a randomized trial among emergency department patients with migraine to determine the relative impact on migraine-associated symptoms of hydromorphone, an opioid, versus prochlorperazine, an antidopaminergic antiemetic.

Methods

This was a post hoc analysis of data from a double-blind study registered at http://clinicaltrials.gov (NCT02389829). Patients who met International Classification of Headache Disorders, 3rd edition criteria for migraine without aura or for probable migraine without aura were eligible for participation. Participants received either hydromorphone 1 mg IV or prochlorperazine 10 mg IV plus diphenhydramine 25 mg IV and could receive a second dose of the same medication 1 h later if needed. The outcomes were sustained relief of nausea, photophobia, and phonophobia.

Results

A total of 127 patients were enrolled, of whom 63 received prochlorperazine and 64 received hydromorphone. Of 49 patients in the prochlorperazine arm who reported nausea at baseline, 34 (69.4%) reported complete resolution without relapse versus 15/49 (30.6%) in the hydromorphone arm (absolute risk reduction [ARR] = 38.8%, 95% CI: 20.5%-57.0%, p < 0.001). Of 55 patients in the prochlorperazine arm who reported photophobia at baseline, 23 (41.8%) reported complete resolution without relapse versus 13/62 (20.9%) patients treated with hydromorphone (ARR = 20.8%, 95% CI: 4.3%-37.3%, p = 0.014). Of 56 patients in the prochlorperazine arm who reported phonophobia at baseline, 25 (44.6%) reported complete resolution without relapse versus 16/59 (27.1%) in the hydromorphone arm (ARR = 17.5%, 95% CI: 0.3%-34.8%, p = 0.049). For adverse events, three patients in the prochlorperazine arm reported anxiety or restlessness, and nine patients in the hydromorphone arm reported dizziness or weakness.

Conclusions

Prochlorperazine plus diphenhydramine is more efficacious than hydromorphone for the treatment of migraine-associated symptoms.",2021-08-07 +31443048,MILAMP: Multiple Instance Prediction of Amyloid Proteins.,"Amyloid proteins are implicated in several diseases such as Parkinson's, Alzheimer's, prion diseases, etc. In order to characterize the amyloidogenicity of a given protein, it is important to locate the amyloid forming hotspot regions within the protein as well as to analyze the effects of mutations on these proteins. The biochemical and biological assays used for this purpose can be facilitated by computational means. This paper presents a machine learning method that can predict hotspot amyloidogenic regions within proteins and characterize changes in their amyloidogenicity due to point mutations. The proposed method called MILAMP (Multiple Instance Learning of AMyloid Proteins) achieves high accuracy for identification of amyloid proteins, hotspot localization, and prediction of mutation effects on amyloidogenicity by integrating heterogenous data sources and exploiting common predictive patterns across these tasks through multiple instance learning. The paper presents comprehensive benchmarking experiments to test the predictive performance of MILAMP in comparison to previously published state of the art techniques for amyloid prediction. The python code for the implementation and webserver for MILAMP is available at the URL: http://faculty.pieas.edu.pk/fayyaz/software.html#MILAMP.",2021-05-01 +27239230,Badapple: promiscuity patterns from noisy evidence.,"

Background

Bioassay data analysis continues to be an essential, routine, yet challenging task in modern drug discovery and chemical biology research. The challenge is to infer reliable knowledge from big and noisy data. Some aspects of this problem are general with solutions informed by existing and emerging data science best practices. Some aspects are domain specific, and rely on expertise in bioassay methodology and chemical biology. Testing compounds for biological activity requires complex and innovative methodology, producing results varying widely in accuracy, precision, and information content. Hit selection criteria involve optimizing such that the overall probability of success in a project is maximized, and resource-wasteful ""false trails"" are avoided. This ""fail-early"" approach is embraced both in pharmaceutical and academic drug discovery, since follow-up capacity is resource-limited. Thus, early identification of likely promiscuous compounds has practical value.

Results

Here we describe an algorithm for identifying likely promiscuous compounds via associated scaffolds which combines general and domain-specific features to assist and accelerate drug discovery informatics, called Badapple: bioassay-data associative promiscuity pattern learning engine. Results are described from an analysis using data from MLP assays via the BioAssay Research Database (BARD) http://bard.nih.gov. Specific examples are analyzed in the context of medicinal chemistry, to illustrate associations with mechanisms of promiscuity. Badapple has been developed at UNM, released and deployed for public use two ways: (1) BARD plugin, integrated into the public BARD REST API and BARD web client; and (2) public web app hosted at UNM.

Conclusions

Badapple is a method for rapidly identifying likely promiscuous compounds via associated scaffolds. Badapple generates a score associated with a pragmatic, empirical definition of promiscuity, with the overall goal to identify ""false trails"" and streamline workflows. Unlike methods reliant on expert curation of chemical substructure patterns, Badapple is fully evidence-driven, automated, self-improving via integration of additional data, and focused on scaffolds. Badapple is robust with respect to noise and errors, and skeptical of scanty evidence.",2016-05-28 +34267778,Identifying miRNA-mRNA Integration Set Associated With Survival Time.,"In the ""personalized medicine"" era, one of the most difficult problems is identification of combined markers from different omics platforms. Many methods have been developed to identify candidate markers for each type of omics data, but few methods facilitate the identification of multiple markers on multi-omics platforms. microRNAs (miRNAs) is well known to affect only indirectly phenotypes by regulating mRNA expression and/or protein translation. To take into account this knowledge into practice, we suggest a miRNA-mRNA integration model for survival time analysis, called mimi-surv, which accounts for the biological relationship, to identify such integrated markers more efficiently. Through simulation studies, we found that the statistical power of mimi-surv be better than other models. Application to real datasets from Seoul National University Hospital and The Cancer Genome Atlas demonstrated that mimi-surv successfully identified miRNA-mRNA integrations sets associated with progression-free survival of pancreatic ductal adenocarcinoma (PDAC) patients. Only mimi-surv found miR-96, a previously unidentified PDAC-related miRNA in these two real datasets. Furthermore, mimi-surv was shown to identify more PDAC related miRNAs than other methods because it used the known structure for miRNA-mRNA regularization. An implementation of mimi-surv is available at http://statgen.snu.ac.kr/software/mimi-surv.",2021-06-29 +33647438,PepTherDia: database and structural composition analysis of approved peptide therapeutics and diagnostics.,"As of 2020, there were >100 approved peptides with therapeutic or diagnostic applications. However, a complete database providing information on marketed peptides is not freely available, making the peptide chemists' job of designing future peptide drug candidates challenging. Unlike the rules for small-molecule drugs, there is no general set of guidelines for designing a successful peptide-based drug. In this review, together with our freely available database (PepTherDia, http://peptherdia.herokuapp.com), we provide insights into what a successful peptide therapeutic or diagnostic agent looks like and lay the foundation for establishing a set of rules to help future medicinal chemists to design peptide candidates with increased approval rates.",2021-02-26 +34047888,"Advancing the Psychometric Study of Human Life History Indicators : K Does Not Measure Life History Speed, but Theory and Evidence Suggest It Deserves Further Attention.","In this article we attend to recent critiques of psychometric applications of life history (LH) theory to variance among humans and develop theory to advance the study of latent LH constructs. We then reanalyze data (n = 4,244) previously examined by Richardson et al. (Evolutionary Psychology, 15(1), 2017, https://doi.org/10.1177/1474704916666840 to determine whether (a) previously reported evidence of multidimensionality is robust to the modeling approach employed and (b) the structure of LH indicators is invariant by sex. Findings provide further evidence that a single LH dimension is implausible and that researchers should cease interpreting K-factor scores as empirical proxies for LH speed. In contrast to the original study, we detected a small inverse correlation between mating competition and Super-K that is consistent with a trade-off. Tests of measurement invariance across the sexes revealed evidence of metric invariance (i.e., equivalence of factor loadings), consistent with the theory that K is a proximate cause of its indicators; however, evidence of partial scalar invariance suggests use of scores likely introduces bias when the sexes are compared. We discuss limitations and identify approaches that researchers may use to further evaluate the validity of the K-factor and other applications of LH to human variation.",2021-05-28 +27899567,Expansion of the Gene Ontology knowledgebase and resources.,"The Gene Ontology (GO) is a comprehensive resource of computable knowledge regarding the functions of genes and gene products. As such, it is extensively used by the biomedical research community for the analysis of -omics and related data. Our continued focus is on improving the quality and utility of the GO resources, and we welcome and encourage input from researchers in all areas of biology. In this update, we summarize the current contents of the GO knowledgebase, and present several new features and improvements that have been made to the ontology, the annotations and the tools. Among the highlights are 1) developments that facilitate access to, and application of, the GO knowledgebase, and 2) extensions to the resource as well as increasing support for descriptions of causal models of biological systems and network biology. To learn more, visit http://geneontology.org/.",2016-11-29 +31803240,Towards the Complete Goat Pan-Genome by Recovering Missing Genomic Segments From the Reference Genome.,"It is broadly expected that next generation sequencing will ultimately generate a complete genome as is the latest goat reference genome (ARS1), which is considered to be one of the most continuous assemblies in livestock. However, the rich diversity of worldwide goat breeds indicates that a genome from one individual would be insufficient to represent the whole genomic contents of goats. By comparing nine de novo assemblies from seven sibling species of domestic goat with ARS1 and using resequencing and transcriptome data from goats for verification, we identified a total of 38.3 Mb sequences that were absent in ARS1. The pan-sequences contain genic fractions with considerable expression. Using the pan-genome (ARS1 together with the pan-sequences) as a reference genome, variation calling efficacy can be appreciably improved. A total of 56,657 spurious SNPs per individual were repressed and 24,414 novel SNPs per individual on average were recovered as a result of better reads mapping quality. The transcriptomic mapping rate was also increased by ∼1.15%. Our study demonstrated that comparing de novo assemblies from closely related species is an efficient and reliable strategy for finding missing sequences from the reference genome and could be applicable to other species. Pan-genome can serve as an improved reference genome in animals for a better exploration of the underlying genomic variations and could increase the probability of finding genotype-phenotype associations assessed by a comprehensive variation database containing much more differences between individuals. We have constructed a goat pan-genome web interface for data visualization (http://animal.nwsuaf.edu.cn/panGoat).",2019-11-15 +34251031,PRO-C3 and ADAPT algorithm accurately identify patients with advanced fibrosis due to alcohol-related liver disease.,"

Background

Alcohol is a main cause of preventable deaths and frequently leads to the development of alcohol-related liver disease. Due to the lack of diagnostics, patients are commonly diagnosed after developing clinical manifestations. Recently, the biomarker PRO-C3 was shown to accurately identify fibrosis due to non-alcoholic fatty liver disease.

Aim

To assess the diagnostic accuracy of PRO-C3, the ADAPT score and best-performing non-patented serological test to detect advanced alcohol-related liver fibrosis.

Methods

We enrolled 426 patients with alcohol overuse in a prospective biopsy-controlled study. We evaluated the accuracy of PRO-C3 and the PRO-C3-based algorithm ADAPT to detect advanced liver fibrosis.

Results

The accuracy of PRO-C3 was good with an AUROC of 0.85 (95% CI 0.79-0.90). The best-performing non-patented test was the Forns index with an AUROC of 0.83 (95% CI 0.78-0.89). The ADAPT algorithm performed better as compared to both the Forns index and PRO-C3 alone with an AUROC = 0.88 (95% CI 0.83-0.93).

Conclusion

PRO-C3 is a new marker with high accuracy to detect advanced alcohol-related liver fibrosis. The diagnostic accuracy of PRO-C3 can be further improved by using the ADAPT algorithm in which the test outperforms currently available non-patented serological fibrosis markers. The study is registered in the Odense Patient Data Exploratory Network (OPEN) under study identification numbers OP_040 (https://open.rsyd.dk/OpenProjects/da/openProject.jsp?openNo=40) and OP_239 (https://open.rsyd.dk/OpenProjects/openProject.jsp?openNo=239&lang=da).",2021-07-12 +32853931,DatAC: A visual analytics platform to explore climate and air quality indicators associated with the COVID-19 pandemic in Spain.,"The coronavirus disease 2019 (COVID-19) pandemic has caused an unprecedented global health crisis, with several countries imposing lockdowns to control the coronavirus spread. Important research efforts are focused on evaluating the association of environmental factors with the survival and spread of the virus and different works have been published, with contradictory results in some cases. Data with spatial and temporal information is a key factor to get reliable results and, although there are some data repositories for monitoring the disease both globally and locally, an application that integrates and aggregates data from meteorological and air quality variables with COVID-19 information has not been described so far to the best of our knowledge. Here, we present DatAC (Data Against COVID-19), a data fusion project with an interactive web frontend that integrates COVID-19 and environmental data in Spain. DatAC is provided with powerful data analysis and statistical capabilities that allow users to explore and analyze individual trends and associations among the provided data. Using the application, we have evaluated the impact of the Spanish lockdown on the air quality, observing that NO2, CO, PM2.5, PM10 and SO2 levels decreased drastically in the entire territory, while O3 levels increased. We observed similar trends in urban and rural areas, although the impact has been more important in the former. Moreover, the application allowed us to analyze correlations among climate factors, such as ambient temperature, and the incidence of COVID-19 in Spain. Our results indicate that temperature is not the driving factor and without effective control actions, outbreaks will appear and warm weather will not substantially limit the growth of the pandemic. DatAC is available at https://covid19.genyo.es.",2020-08-04 +27789705,CARD 2017: expansion and model-centric curation of the comprehensive antibiotic resistance database.,"The Comprehensive Antibiotic Resistance Database (CARD; http://arpcard.mcmaster.ca) is a manually curated resource containing high quality reference data on the molecular basis of antimicrobial resistance (AMR), with an emphasis on the genes, proteins and mutations involved in AMR. CARD is ontologically structured, model centric, and spans the breadth of AMR drug classes and resistance mechanisms, including intrinsic, mutation-driven and acquired resistance. It is built upon the Antibiotic Resistance Ontology (ARO), a custom built, interconnected and hierarchical controlled vocabulary allowing advanced data sharing and organization. Its design allows the development of novel genome analysis tools, such as the Resistance Gene Identifier (RGI) for resistome prediction from raw genome sequence. Recent improvements include extensive curation of additional reference sequences and mutations, development of a unique Model Ontology and accompanying AMR detection models to power sequence analysis, new visualization tools, and expansion of the RGI for detection of emergent AMR threats. CARD curation is updated monthly based on an interplay of manual literature curation, computational text mining, and genome analysis.",2016-10-26 +34352094,The susceptibility of attaining and maintaining DMARD-free remission in different (rheumatoid) arthritis phenotypes. ,"To compare (sustained) DMARD-free remission rates((S)DFR), defined as respectively ≥6 months and >1 year, after 2 and 5 years between three clinical arthritis phenotypes; undifferentiated arthritis(UA), autoantibody-negative(RA-) and positive rheumatoid arthritis(RA+). All UA(n = 130), RA-(n = 176) and RA + (n = 331) patients from the tREACH trial, a stratified single-blinded trial with a treat-to-target approach, were used. (S)DFR comparisons between phenotypes after 2 and 5 years were performed with Logistic regression. Medication use and early and late flares(DAS ≥ 2.4), respectively defined as < 12 and >12 months after reaching DFR, were also compared. Cox proportional hazard models were used to evaluate potential predictors for (S)DFR. Within 2 and 5 years less DFR was seen in RA + (17.2-25.7%), followed by RA-(28.4-42.1%) and UA patients(43.1-58.5%). This also applied for SDFR within 2 and 5 years (respectively 7.6% and 21.4%; 20.5% and 38.1%; and 35.4% and 55.4%). A flare during tapering was seen in 22.7% of patients. Of the patients in DFR 7.5% had an early flare and 3.4% a late flare. Also more treatment intensifications occurred in RA+ compared with RA- and UA. We found that higher baseline DAS, ACPA positivity, BMI and smoking were negatively associated with (S)DFR, while clinical phenotype(reference RA+), short symptom duration(<6 months) and remission within 6 months were positively associated. (Long-term) clinical outcomes differ between undifferentiated arthritis, autoantibody-negative and positive rheumatoid arthritis(RA). These data reconfirm that RA can be subdivided into aforementioned clinical phenotypes and that treatment might be stratified upon these phenotypes, although validation is needed. ISRCTN, https://www.isrctn.com/, ISRCTN26791028.",2021-08-05 +,LONGITUDINAL ATTRITION AND RETENTION IN THE MIDLIFE IN THE U.S. STUDY,"Abstract The MIDUS study offers unique opportunities for examining the nature of longitudinal retention and the challenges of retaining participants in a national study of health and well-being across time. Begun in 1995, MIDUS (Midlife in the U.S.) investigates aging as an integrated bio-psycho-social process (http://midus.wisc.edu/) and completed three waves of survey data collection as of 2015; retention characteristics of the sample will be modelled and profiled for waves 2 and 3. Additionally, the presentation will discuss a new MIDUS project designed to convert living participants who have dropped out, examine the extent to which attrition is ignorable, and test the effectiveness of different imputation strategies for modelling missing data due to attrition. Finally, administrative and outreach strategies for maintaining contact with longitudinal participants will be reviewed.",2018-11-01 +34181169,SARS-CoV-2 signaling pathway map: A functional landscape of molecular mechanisms in COVID-19.,"Coronavirus disease (COVID-19) is caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). COVID-19 has been declared a pandemic by WHO. The clinical manifestation and disease progression in COVID-19 patients varies from minimal symptoms to severe respiratory issues with multiple organ failure. Understanding the mechanism of SARS-CoV-2 interaction with host cells will provide key insights into the effective molecular targets for the development of novel therapeutics. Recent studies have identified virus-mediated phosphorylation or activation of some major signaling pathways, such as ERK1/2, JNK, p38, PI3K/AKT and NF-κB signaling, that potentially elicit the cytokine storm that serves as a major cause of tissue injuries. Several studies highlight the aggressive inflammatory response particularly 'cytokine storm' in SARS-CoV-2 patients. A depiction of host molecular dynamics triggered by SARS-CoV-2 in the form of a network of signaling molecules will be helpful for COVID-19 research. Therefore, we developed the signaling pathway map of SARS-CoV-2 infection using data mined from the recently published literature. This integrated signaling pathway map of SARS-CoV-2 consists of 326 proteins and 73 reactions. These include information pertaining to 1,629 molecular association events, 30 enzyme catalysis events, 43 activation/inhibition events, and 8,531 gene regulation events. The pathway map is publicly available through WikiPathways: https://www.wikipathways.org/index.php/Pathway:WP5115 .",2021-06-28 +34121543,Calvatia Lilacina Extracts Exert Anti-Breast-Cancer Bioactivity through the Apoptosis Induction Dependent on Mitochondrial Reactive Oxygen Species and Caspase Activation.,"Puffballs are a class of fungi widely distributed worldwide and associated with various bioactivities. This research mainly showed the antitumor bioactivity of extracts from Calvatia lilacina (CL), which is a common variety of puffballs. NMR and high-performance liquid chromatography methods are used to characterize the extracts. Results showed that CL extracts obtained with petroleum ether, ethyl acetate, ethanol, and water elicited obvious inhibitory effects on the proliferation of A549, Caco-2, and MDA-MB-231. Among these extracts, petroleum ether extract demonstrated the highest performance. This extract was then separated into seven sub-fractions (SFs). Three of these SFs (3#, 6#, and 7#) induces a decrease in the viability of MDA-MB-231 cells in which 7# SF exhibited the highest cytotoxicity, where the major component was found to be ergosta-7,22-dien-3-one. Further tests revealed that 7# SF from petroleum ether extract could trigger severe cell death in human breast cancer cells (MDA-MB-231) by activating the apoptotic pathway dependent on mitochondrial reactive oxygen species and caspase activation. All these results in combination indicate that the mechanism of extract-potentiated apoptosis associates closely with ROS-dependent mitochondrial dysfunction events which further induces mitochondria-mediated intrinsic cytochrome C-caspase-related pathway of apoptosis.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1936576.",2021-06-12 +27899613,NSDNA: a manually curated database of experimentally supported ncRNAs associated with nervous system diseases.,"The Nervous System Disease NcRNAome Atlas (NSDNA) (http://www.bio-bigdata.net/nsdna/) is a manually curated database that provides comprehensive experimentally supported associations about nervous system diseases (NSDs) and noncoding RNAs (ncRNAs). NSDs represent a common group of disorders, some of which are characterized by high morbidity and disabilities. The pathogenesis of NSDs at the molecular level remains poorly understood. ncRNAs are a large family of functionally important RNA molecules. Increasing evidence shows that diverse ncRNAs play a critical role in various NSDs. Mining and summarizing NSD-ncRNA association data can help researchers discover useful information. Hence, we developed an NSDNA database that documents 24 713 associations between 142 NSDs and 8593 ncRNAs in 11 species, curated from more than 1300 articles. This database provides a user-friendly interface for browsing and searching and allows for data downloading flexibility. In addition, NSDNA offers a submission page for researchers to submit novel NSD-ncRNA associations. It represents an extremely useful and valuable resource for researchers who seek to understand the functions and molecular mechanisms of ncRNA involved in NSDs.",2016-11-28 +,Germinate 3: Development of a Common Platform to Support the Distribution of Experimental Data on Crop Wild Relatives,"Conservation and exploitation of crop wild relative species is an important component in ensuring food security and improving current agricultural output. By identifying agriculturally important characteristics that express favorable response to both biotic and abiotic stress currently unused by breeders, the incorporation of this new genetic material into genetic background stocks may help mitigate problems imposed by climate change, land degradation, and population pressure. This is particularly important in countries that will be more severely affected by the threat of reduced yields. The ability to effectively manage genetic resources collections and integrate unique and diverse data types is crucial in exploring, understanding, and exploiting the diversity contained within genebanks. Providing a common interface through which experimental and background data can be disseminated to both researchers and breeders will bring focus and facilitate community building into research communities. We have taken wild barley (Hordeum spp.) and potato (Solanum spp.) collections along with wheat (Triticum spp.) and maize (Zea mays subsp. mays) and their wild relatives and incorporated this data into web-based information resources built using the Germinate platform (https://ics.hutton.ac.uk/get-germinate, accessed 4 Apr. 2017). We have tailored these to better meet the demands of researchers by developing both new data visualization tools and integration with current software such as Helium, Flapjack, and CurlyWhirly (https://ics.hutton.ac.uk/software, accessed 4 Apr. 2017) and presented the data in a common platform. While the underlying species differ, the approach taken ensures that tools are compatible across all database instances. We will describe these database instances and show that Germinate offers a common platform that will aid in the exploration and wider use of these species.",2017-05-01 +30016397,dbCID: a manually curated resource for exploring the driver indels in human cancer.,"While recent advances in next-generation sequencing technologies have enabled the creation of a multitude of databases in cancer genomic research, there is no comprehensive database focusing on the annotation of driver indels (insertions and deletions) yet. Therefore, we have developed the database of Cancer driver InDels (dbCID), which is a collection of known coding indels that likely to be engaged in cancer development, progression or therapy. dbCID contains experimentally supported and putative driver indels derived from manual curation of literature and is freely available online at http://bioinfo.ahu.edu.cn:8080/dbCID. Using the data deposited in dbCID, we summarized features of driver indels in four levels (gene, DNA, transcript and protein) through comparing with putative neutral indels. We found that most of the genes containing driver indels in dbCID are known cancer genes playing a role in tumorigenesis. Contrary to the expectation, the sequences affected by driver frameshift indels are not larger than those by neutral ones. In addition, the frameshift and inframe driver indels prefer to disrupt high-conservative regions both in DNA sequences and protein domains. Finally, we developed a computational method for discriminating cancer driver from neutral frameshift indels based on the deposited data in dbCID. The proposed method outperformed other widely used non-cancer-specific predictors on an external test set, which demonstrated the usefulness of the data deposited in dbCID. We hope dbCID will be a benchmark for improving and evaluating prediction algorithms, and the characteristics summarized here may assist with investigating the mechanism of indel-cancer association.",2019-09-01 +33626541,Genetic Polymorphisms in Activating Transcription Factor 3 Binding Site and the Prognosis of Early-Stage Non-Small Cell Lung Cancer.,"

Background

Activating transcription factor 3 (ATF3) plays a significant role in cancer development and progression. We investigated the association between variants in expression quantitative trait loci (eQTLs) within ATF3 binding regions and the prognosis of non-small cell lung cancer (NSCLC) after surgery.

Methods

A total of 772 patients with NSCLC who underwent curative surgery were enrolled. Using a public database (http://galaxyproject.org), we selected 104 single nucleotide polymorphisms (SNPs) in eQTLs in the ATF3 binding regions. The association of those SNPs with disease-free survival (DFS) was evaluated.

Results

Among those SNPs, HAX1 rs11265425T>G was associated with significantly worse DFS (aHR = 1.30, 95% CI = 1.00-1.69, p = 0.05), and ME3 rs10400291C>A was associated with significantly better DFS (aHR = 0.66, 95% CI = 0.46-0.95, p = 0.03). Regarding HAX1 rs11265425T>G, the significant association remained only in adenocarcinoma, and the association was significant only in squamous cell carcinoma regarding ME3 rs10400291C>A. ChIP-qPCR assays showed that the two variants reside in active enhancers where H3K27Ac and ATF3 binding occurs. Promoter assays showed that rs11265425 G allele had significantly higher HAX1 promoter activity than T allele. HAX1 RNA expression was significantly higher in tumor than in normal lung, and higher in rs11265425 TG+GG genotypes than in TT genotype. Conversely, ME3 expression was significantly lower in tumor than in normal lung, and higher in rs10400291 AA genotype than in CC+CA genotypes.

Conclusions

In conclusion, this study shows that the functional polymorphisms in ATF3 binding sites, HAX1 rs11265425T>G and ME3 rs10400291C>A are associated with the clinical outcomes of patients in surgically resected NSCLC.",2021-02-24 +31218882,SeqScrub: a web tool for automatic cleaning and annotation of FASTA file headers for bioinformatic applications.,"Data consistency is necessary for effective bioinformatic analysis. SeqScrub is a web tool that parses and maintains consistent information about protein and DNA sequences in FASTA file format, checks if records are current, and adds taxonomic information by matching identifiers against entries in authoritative biological sequence databases. SeqScrub provides a powerful, yet simple workflow for managing, enriching and exchanging data, which is crucial to establish a record of provenance for sequences found from broad and varied searches; for example, using BLAST on continually updated genome sequence sets. Headers standardized using SeqScrub can be parsed by a majority of bioinformatic tools, stay uniformly named between collaborators and contain informative labels to aid management of reproducible, scientific data. SeqScrub is available at http://bioinf.scmb.uq.edu.au/seqscrub.",2019-06-20 +33752689,Should policy makers trust composite indices? A commentary on the pitfalls of inappropriate indices for policy formation.,"

Background

This paper critically discusses the use and merits of global indices, in particular, the Global Health Security Index (GHSI; Cameron et al. https://www.ghsindex.org/#l-section--map ) in times of an imminent crisis, such as the current pandemic. This index ranked 195 countries according to their expected preparedness in the case of a pandemic or other biological threat. The coronavirus disease 2019 (Covid-19) pandemic provides the background to compare each country's predicted performance from the GHSI with the actual performance. In general, there is an inverted relation between predicted versus actual performance, i.e. the predicted top performers are among those that are the worst hit. Obviously, this reflects poorly on the potential policy uses of this index in imminent crisis management.

Methods

The paper analyses the GHSI and identifies why it may have struggled to predict actual pandemic preparedness as evidenced by the Covid-19 pandemic. The paper also uses two different data sets, one from the Worldmeter on the spread of the Covid-19 pandemics, and the other from the International Network for Government Science Advice (INGSA) Evidence-to-Policy Tracker, to draw comparisons between the actual introduction of pandemic response policies and the corresponding death rate in 29 selected countries.

Results

This paper analyses the reasons for the poor match between prediction and reality in the index, and mentions six general observations applying to global indices in this respect. These observations are based on methodological and conceptual analyses. The level of abstraction in these global indices builds uncertainties upon uncertainties and hides implicit value assumptions, which potentially removes them from the policy needs on the ground.

Conclusions

From the analysis, the question is raised if the policy community might have better tools for decision-making in a pandemic. On the basis of data from the INGSA Evidence-to-Policy Tracker, and with backing in studies from social psychology and philosophy of science, some simple heuristics are suggested, which may be more useful than a global index.",2021-03-22 +33622334,Identification of major depressive disorder disease-related genes and functional pathways based on system dynamic changes of network connectivity.,"

Background

Major depressive disorder (MDD) is a leading psychiatric disorder that involves complex abnormal biological functions and neural networks. This study aimed to compare the changes in the network connectivity of different brain tissues under different pathological conditions, analyzed the biological pathways and genes that are significantly related to disease progression, and further predicted the potential therapeutic drug targets.

Methods

Expression of differentially expressed genes (DEGs) were analyzed with postmortem cingulate cortex (ACC) and prefrontal cortex (PFC) mRNA expression profile datasets downloaded from the Gene Expression Omnibus (GEO) database, including 76 MDD patients and 76 healthy subjects in ACC and 63 MDD patients and 63 healthy subjects in PFC. The co-expression network construction was based on system network analysis. The function of the genes was annotated by Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway analysis. Human Protein Reference Database (HPRD, http://www.hprd.org/ ) was used for gene interaction relationship mapping.

Results

We filtered 586 DEGs in ACC and 616 DEGs in PFC for further analysis. By constructing the co-expression network, we found that the gene connectivity was significantly reduced under disease conditions (P = 0.04 in PFC and P = 1.227e-09 in ACC). Crosstalk analysis showed that CD19, PTDSS2 and NDST2 were significantly differentially expressed in ACC and PFC of MDD patients. Among them, CD19 and PTDSS2 have been targeted by several drugs in the Drugbank database. KEGG pathway analysis demonstrated that the function of CD19 and PTDSS2 were enriched with the pathway of Glycerophospholipid metabolism and T cell receptor signaling pathway.

Conclusion

Co-expression network and tissue comparing analysis can identify signaling pathways and cross talk genes related to MDD, which may provide novel insight for understanding the molecular mechanisms of MDD.",2021-02-23 +33732693,pCysMod: Prediction of Multiple Cysteine Modifications Based on Deep Learning Framework.,"Thiol groups on cysteines can undergo multiple post-translational modifications (PTMs), acting as a molecular switch to maintain redox homeostasis and regulating a series of cell signaling transductions. Identification of sophistical protein cysteine modifications is crucial for dissecting its underlying regulatory mechanism. Instead of a time-consuming and labor-intensive experimental method, various computational methods have attracted intense research interest due to their convenience and low cost. Here, we developed the first comprehensive deep learning based tool pCysMod for multiple protein cysteine modification prediction, including S-nitrosylation, S-palmitoylation, S-sulfenylation, S-sulfhydration, and S-sulfinylation. Experimentally verified cysteine sites curated from literature and sites collected by other databases and predicting tools were integrated as benchmark dataset. Several protein sequence features were extracted and united into a deep learning model, and the hyperparameters were optimized by particle swarm optimization algorithms. Cross-validations indicated our model showed excellent robustness and outperformed existing tools, which was able to achieve an average AUC of 0.793, 0.807, 0.796, 0.793, and 0.876 for S-nitrosylation, S-palmitoylation, S-sulfenylation, S-sulfhydration, and S-sulfinylation, demonstrating pCysMod was stable and suitable for protein cysteine modification prediction. Besides, we constructed a comprehensive protein cysteine modification prediction web server based on this model to benefit the researches finding the potential modification sites of their interested proteins, which could be accessed at http://pcysmod.omicsbio.info. This work will undoubtedly greatly promote the study of protein cysteine modification and contribute to clarifying the biological regulation mechanisms of cysteine modification within and among the cells.",2021-02-23 +,Genetic and physical mapping of a new rice blast resistance specificity Pi-67 from a broad spectrum resistant genotype Tetep,"The Vietnamese rice landrace Tetep is known world-wide for its exceptional broad-spectrum and durable resistance to blast disease caused by Pyricularia oryzae. In present study, we report on identification and mapping of a new blast resistance gene Pi-67 from a doubled haploid line TDH251 that derives its resistance from Tetep. Using a multipronged mapping strategy combining bulked segregant analysis, recessive class approach and conventional pathotype analysis, the resistance gene was fine mapped to a 0.4 cM interval flanked by markers YL87/155 and RRS8 near the centromere of chromosome 12. By projecting the sequences of flanking markers on the reference sequence of cv. Nipponbare, a 2.03 Mb region extending from position 10.60 to 12.63 Mb near the centromere of rice chromosome 12 was delineated as the region of blast resistance locus. A total of 106 predicted genes were identified in Pi-67 region by surveying the equivalent genomic region of cv. Nipponbare in Rice Annotation Project Database (RAP-DB) (http://rapdbbeta.dna.affrc.go.jp). Out of these, Os12g0281600 encoding nucleotide-binding site and leucine-rich repeat (NBS–LRR) protein was short listed as a potential candidate for the blast resistance gene identified from TDH251. Based on conventional pathotype analysis and allele sequencing, Pi-67(t) could be differentiated from other two blast resistance genes Pi19 and Pi-42(t) that occupy syntenic position on chromosome 12. The physical localization of Pi-67 and identification of a NBS–LRR gene Os12g0281600 as a potential R-gene candidate has set the stage for cloning and functional characterization of this resistance gene.",2019-01-01 +34034755,Gene Ontology Meta Annotator for Plants (GOMAP).,"Annotating gene structures and functions to genome assemblies is necessary to make assembly resources useful for biological inference. Gene Ontology (GO) term assignment is the most used functional annotation system, and new methods for GO assignment have improved the quality of GO-based function predictions. The Gene Ontology Meta Annotator for Plants (GOMAP) is an optimized, high-throughput, and reproducible pipeline for genome-scale GO annotation of plants. We containerized GOMAP to increase portability and reproducibility and also optimized its performance for HPC environments. Here we report on the pipeline's availability and performance for annotating large, repetitive plant genomes and describe how GOMAP was used to annotate multiple maize genomes as a test case. Assessment shows that GOMAP expands and improves the number of genes annotated and annotations assigned per gene as well as the quality (based on [Formula: see text]) of GO assignments in maize. GOMAP has been deployed to annotate other species including wheat, rice, barley, cotton, and soy. Instructions and access to the GOMAP Singularity container are freely available online at https://bioinformapping.com/gomap/ . A list of annotated genomes and links to data is maintained at https://dill-picl.org/projects/gomap/ .",2021-05-25 +34392755,Subjective memory complaints and social participation among older adults: results from the health and retirement study.,"Objectives:This study aims to examine whether subjective memory complaints (SMC) contribute to social participation among older adults.Method:The study sample was 4,713 community-dwelling older adults aged 65 years and older from four waves (2010, 2012, 2014, 2016) of the Health and Retirement Study. Hierarchical linear modeling analysis was used to examine the association of SMC with social participation after controlling for factors influencing social participation. Demographic factors (i.e. age, gender, and perceived socioeconomic status) were entered in block 1, health-related factors (i.e. health conditions, perceived health, instrumental activities of daily living, memory-immediate and delayed, and depressive symptoms) were entered in block 2, environmental factors (i.e. perceived social support and strain from spouse, child, family, and friend) were entered in block 3, and SMC was entered in block 4.Results:The result showed that factors significantly contributing to social participation are age (standardized β = -0.08, p < 0.01), perceived socioeconomic status (β = 0.16, p < 0.001), perceived health (β = 0.15, p < 0.001), instrumental activities of daily living (β = 0.12, p < 0.001), memory-immediate and delayed (β = 0.09, p < 0.001; β = 0.08, p < 0.001, respectively), social support from spouse and friend (β = 0.04, p < 0.05; β = 0.13, p < 0.001, respectively), social strain from friend (β = 0.07, p < 0.001), and SMC (β = -0.05, p < 0.001). The demographic factors explained 9.5%, health-related factors explained 8.5%, environmental factors explained 2.4%, and SMC explained 0.1% of the variance in social participation.Conclusion: This finding suggests that SMC may contribute to social participation in older adults.Supplemental data for this article can be accessed online at https://doi.org/10.1080/13607863.2021.1961123 .",2021-08-14 +33245691,NonClasGP-Pred: robust and efficient prediction of non-classically secreted proteins by integrating subset-specific optimal models of imbalanced data. ,"Non-classically secreted proteins (NCSPs) are proteins that are located in the extracellular environment, although there is a lack of known signal peptides or secretion motifs. They usually perform different biological functions in intracellular and extracellular environments, and several of their biological functions are linked to bacterial virulence and cell defence. Accurate protein localization is essential for all living organisms, however, the performance of existing methods developed for NCSP identification has been unsatisfactory and in particular suffer from data deficiency and possible overfitting problems. Further improvement is desirable, especially to address the lack of informative features and mining subset-specific features in imbalanced datasets. In the present study, a new computational predictor was developed for NCSP prediction of gram-positive bacteria. First, to address the possible prediction bias caused by the data imbalance problem, ten balanced subdatasets were generated for ensemble model construction. Then, the F-score algorithm combined with sequential forward search was used to strengthen the feature representation ability for each of the training subdatasets. Third, the subset-specific optimal feature combination process was adopted to characterize the original data from different aspects, and all subdataset-based models were integrated into a unified model, NonClasGP-Pred, which achieved an excellent performance with an accuracy of 93.23 %, a sensitivity of 100 %, a specificity of 89.01 %, a Matthew's correlation coefficient of 87.68 % and an area under the curve value of 0.9975 for ten-fold cross-validation. Based on assessment on the independent test dataset, the proposed model outperformed state-of-the-art available toolkits. For availability and implementation, see: http://lab.malab.cn/~wangchao/softwares/NonClasGP/.",2020-11-27 +31713145,Multilocus variable number tandem repeat analysis (MLVA)-typing of Brucella abortus isolates of India reveals limited genetic diversity.,"Multilocus variable number tandem repeat analysis (MLVA) technique has wide applications in studying phylogenies and short-term epidemiology of pathogens. The technique has been extensively used worldwide in molecular epidemiology of Brucella genus. Only one study on this aspect is reported from India despite its economic and public health significance in country. The present study isolated B. abortus from domesticated bovines of Jammu region of Jammu and Kashmir state, India, and applied MLVA for 16 loci (MLVA-16). MLVA results were compared with the results of a previous study and with MLVA data of Indian isolates present in http://microbesgenotyping.i2bc.paris-saclay.fr/database. In the study, 136 samples from bovines (cattle and buffaloes) of 47 farms of Jammu region were processed for isolation. Eleven isolates of B. abortus biovar 1 from 6 farms were obtained. In MLVA-16 analysis, although the isolates were classified in a single cluster, 5 genotypes were obtained with a specific genotype being prevalent on each farm. The study identifies that MLVA-16 is capable to differentiate B. abortus strains in an area having high genetic similarity among isolates. On comparing the results with previous study and database, the isolates were found to have high genetic similarity indicating that the genetic diversity of B. abortus in India is very limited. It probably indicates that India is contaminated recently with B. abortus. To test this hypothesis, analysis of whole genome sequencing data of diverse collection of Indian B. abortus strains is required.",2019-11-11 +34170200,Assessing the Protective Effect of Moringa oleifera Extract against Bone Metastasis: An In Vitro Simulated Digestion Approach.,"Moringa oleifera possesses numerous advantageous effects like anti-microbial, antioxidant, and anti-inflammatory, leaves contain a high multiplicity of the bioactive compound; however, little is identified about its bioaccessibility. The objective of this study was to assess the bioefficacy, bioaccessible and anticancer activity of Moringa oleifera in a PC3 cell line before and after simulated in vitro digestion. Digested and non-digested extracts were prepared and evaluated for total polyphenols, flavonoids, and total antioxidant capacity by spectrophotometric analysis and LCMS analysis. Cell viability, apoptosis, colony formation, cell cycle, Glutathione level, and gene expression study were tested with Moringa oleifera (MO) and digested Moringa oleifera (DMO). Results revealed that total polyphenols, total flavonoids, and TAC were significantly (P < 0.05) reduced after in vitro digestion. Furthermore, biological activity against the PC3 cell line showed that DMO extracts significant cytotoxic and reduced cell vitality compared to the MO. In addition, DMO extract had a noteworthy effect in apoptosis and inhibiting the colony formation ability; while cell cycle was blocked in S phase by both extracts but significant effect showed in DMO. These studies have increased understanding of the influence of in vitro simulation digestion on the biological activity effect of M. oleifera against prostate cancer bone metastasis.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1933099 .",2021-06-25 +34304977,"Development and test-retest reliability assessment of a low-cost, 3D printed tool for assessing different aspects of hand dexterity. ","Hand dexterity assessments related to fine motor movements are routinely administered in clinical settings to ascertain an individual's hand function. However, to perform a detailed assessment multiple devices are needed which can be time-consuming and costly to administer. We designed and assessed the test-retest reliability of a 3D printed dexterity device in a cohort of healthy young adults and community-dwelling older adults. This study examines the reliability of the device, association between perceived fine motor gripping and manipulation dexterity components, and dominant hand outperformance during both tasks. Test-retest study of a clinical measurement tool. A convenience sample of thirty-six healthy community-dwelling older and young adults was included in our study. The device was used to collect data at two testing sessions to establish test-retest reliability. Fine motor manipulation dexterity was assessed by lifting notched pegs over a vertical barrier and inserting them into randomly oriented holes sequentially. Fine motor gripping dexterity was assessed by taking these notched pegs out of the holes, lifting them over the barrier and dropping them into a large container. Intraclass correlation coefficient (ICC)2,1 showed good to excellent test-retest reliability on the dominant and nondominant hands when using the device. Only modest association was found within-hand for the gripping versus manipulation dexterity tests. The between-hand motor gripping dexterity test demonstrated a strong association; however, between-hand the motor manipulation dexterity test was only moderately associated. The device was reliable, discriminated between the motor gripping and motor manipulating dexterity tasks, and was sensitive to handedness during the motor manipulating dexterity task. It shows promise as a hand dexterity assessment device which may provide efficiency and cost advantages. It is freely available via http://www.rehabtools.org/dexterity.html.",2021-07-22 +33555348,FutureTox IV Workshop Summary: Predictive Toxicology for Healthy Children.,"FutureTox IV, a Society of Toxicology Contemporary Concepts in Toxicology workshop, was held in November 2018. Building upon FutureTox I, II, and III, this conference focused on the latest science and technology for in vitro profiling and in silico modeling as it relates to predictive developmental and reproductive toxicity (DART). Publicly available high-throughput screening data sets are now available for broad in vitro profiling of bioactivities across large inventories of chemicals. Coupling this vast amount of mechanistic data with a deeper understanding of molecular embryology and post-natal development lays the groundwork for using new approach methodologies (NAMs) to evaluate chemical toxicity, drug efficacy, and safety assessment for embryo-fetal development. NAM is a term recently adopted in reference to any technology, methodology, approach, or combination thereof that can be used to provide information on chemical hazard and risk assessment to avoid the use of intact animals (U.S. Environmental Protection Agency [EPA], Strategic plan to promote the development and implementation of alternative test methods within the tsca program, 2018, https://www.epa.gov/sites/production/files/2018-06/documents/epa_alt_strat_plan_6-20-18_clean_final.pdf). There are challenges to implementing NAMs to evaluate chemicals for developmental toxicity compared with adult toxicity. This forum article reviews the 2018 workshop activities, highlighting challenges and opportunities for applying NAMs for adverse pregnancy outcomes (eg, preterm labor, malformations, low birth weight) as well as disorders manifesting postnatally (eg, neurodevelopmental impairment, breast cancer, cardiovascular disease, fertility). DART is an important concern for different regulatory statutes and test guidelines. Leveraging advancements in such approaches and the accompanying efficiencies to detecting potential hazards to human development are the unifying concepts toward implementing NAMs in DART testing. Although use of NAMs for higher level regulatory decision making is still on the horizon, the conference highlighted novel testing platforms and computational models that cover multiple levels of biological organization, with the unique temporal dynamics of embryonic development, and novel approaches for estimating toxicokinetic parameters essential in supporting in vitro to in vivo extrapolation.",2021-04-01 +29764369,VAReporter: variant reporter for cancer research of massive parallel sequencing.,"BACKGROUND:High throughput sequencing technologies have been an increasingly critical aspect of precision medicine owing to a better identification of disease targets, which contributes to improved health care cost and clinical outcomes. In particular, disease-oriented targeted enrichment sequencing is becoming a widely-accepted application for diagnostic purposes, which can interrogate known diagnostic variants as well as identify novel biomarkers from panels of entire human coding exome or disease-associated genes. RESULTS:We introduce a workflow named VAReporter to facilitate the management of variant assessment in disease-targeted sequencing, the identification of pathogenic variants, the interpretation of biological effects and the prioritization of clinically actionable targets. State-of-art algorithms that account for mutation phenotypes are used to rank the importance of mutated genes through visual analytic strategies. We established an extensive annotation source by integrating a wide variety of biomedical databases and followed the American College of Medical Genetics and Genomics (ACMG) guidelines for interpretation and reporting of sequence variations. CONCLUSIONS:In summary, VAReporter is the first web server designed to provide a ""one-stop"" resource for individual's diagnosis and large-scale cohort studies, and is freely available at http://rnd.cgu.edu.tw/vareporter .",2018-05-09 +33137644,PlacentaCellEnrich: A tool to characterize gene sets using placenta cell-specific gene enrichment analysis.,"Single-cell RNA-Sequencing (scRNA-Seq) has improved our understanding of individual cell types in the human placenta. However, placental scRNA-Seq data is not readily accessible when trying to understand how expression patterns in model systems correspond to those from first trimester human placenta. Therefore, we developed PlacentaCellEnrich, a tool that takes a gene set as input, and then reports if the input set is enriched for genes with placenta cell-specific expression patterns, based on human placenta scRNA-Seq data. The PlacentaCellEnrich tool is freely available at https://placentacellenrich.gdcb.iastate.edu/ for non-profit academic use under the MIT license.",2020-10-27 +34388006,Psychometrics of the Pragmatic Rating Scale for School-Age Children With a Range of Linguistic and Social Communication Skills.,"Purpose Social communication or pragmatic skills are continuously distributed in the general population. Impairment in these skills is associated with two clinical disorders, autism spectrum disorder (ASD) and social (pragmatic) communication disorder. Such impairment can impact a child's peer acceptance, school performance, and current and later mental health. Valid, reliable, examiner-rated observational measures of social communication from a semistructured language sample are needed to detect social communication impairment. We evaluated the psychometrics of an examiner-rated measure of social (pragmatic) communication, the Pragmatic Rating Scale-School Age (PRS-SA). Method The analytic sample consisted of 130 children, ages 7-12 years, from five mutually exclusive groups: ASD (n = 25), language concern (LC; n = 5), ASD + LC (n = 10), social communication impairment only (n = 22), and typically developing (TD; n = 68). All children received language and autism assessments. The PRS-SA was rated separately using video-recorded communication samples from the Autism Diagnostic Observation Schedule. Assessment data were employed to evaluate the psychometrics of the PRS-SA. Analysis of covariance models were used to assess whether the PRS-SA would detect differences in social communication functioning across the five groups. Results The PRS-SA demonstrated strong internal reliability, concurrent validity, and interrater reliability. PRS-SA scores were significantly higher in all groups compared to the TD group and differed significantly in most pairwise comparisons; the ASD + LC group had the highest (more atypical) scores. Conclusions The PRS-SA shows promise as a measure of social communication skills in school-age verbally fluent children with a range of social and language abilities. More research is needed with a larger sample, including a wider age range and geographical diversity, to replicate findings. Supplemental Material https://doi.org/10.23641/asha.15138240.",2021-08-13 +35543572,Syphilis in Poland in 2019.,"

Purpose

The aim of the study was to assess the epidemiological situation of syphilis cases in Poland in 2019 in comparison to previous years.

Material and methods

Analysis of the epidemiological situation was based on case-based data from reports of newly detected syphilis cases received from doctors and laboratories. Additionally aggregated data from MZ-56 reports on infectious diseases, infections and poisoning from 2013 to 2018 sent from Sanitary Inspections to NIPH NIH - NRI was used. Also, data about treatment patients in dermatology/venerology clinics in 2019 reported on MZ-14 forms and published in statistics bulletin on Ministry of Health on e-health system website (actually: https://e-zdrowie.gov.pl; https://cez.gov.pl) and NIPH NIH - NRI website were used.

Results

In 2019 in Poland 1,511 syphilis cases were recognized (diagnosis rate was 3.96 per 100,000), including 79 cases among non-Polish citizens. The frequency of newly detected syphilis cases increased by 5% compared to the previous year and was higher by 13% compared to the median in 2013-2017 years. The syphilis cases were most often detected in the age group between 30 and 34 (20.7%) and among men (86.9%). Most cases were recognized among men who have sex with men (42%).

Conclusion

In 2019, the number of newly detected syphilis cases increased compared to the previous year. Preventive initiatives should be taken, especially among key population as young people, men who have sexual contact with men, and who have risky sexual behaviors. Low syphilis diagnosis rate compared to European countries and a huge difference in some regions in Poland for diagnosis rates indicate on problem with recognition and reporting. To improve the functioning of national surveillance is essential to adequate assessment of epidemiological situation.",2021-01-01 +33786050,"Microsatellite-Based Genotyping, Analysis of Population Structure, Presence of Trichomonas vaginalis Virus (TVV) and Mycoplasma hominis in T. vaginalis Isolates from Southwest of Turkey.","

Background

The present study aimed to determine genetic diversity of Trichomonas vaginalis (T. vaginalis) isolates with microsatellite markers in Turkey (Nov 2015 to 2016) and to create a web-based microsatellite typing (MT) approach for the global interpretation of the data. In addition, the endosymbiosis of Mycoplasma hominis (M. hominis) and T. vaginalis virus (TVV) in the isolates was also examined.

Methods

The allele sizes for each locus were calculated and microsatellite types were determined according to the allele profiles. The population structure was examined with Bayesian clustering method. A website (http://mttype.adu.edu.tr) was created for collection and sharing of microsatellite data. Presence of TVV and M. hominis in T. vaginalis isolates were investigated with electrophoresis and PCR.

Results

Of 630 vaginal samples T. vaginalis was detected in 30 (4.7%) and those were used for further analysis. The structure produced by a clustering algorithm revealed eight genetic groups. The typing of isolates according to microsatellites revealed 23 different microsatellite types. Three clones were determined among isolates (MT10 16.7%; MT18 10% and MT3 6.7%). The frequency of TVV and M. hominis was 16.6% (n=5) and 20% (n=6), respectively.

Conclusion

Presence of three clones among 30 T. vaginalis isolates indicated that microsatellite-based genotyping was efficient to determine the clonal distribution of T. vaginalis isolates. Therefore, a promising tool might be developed further and adapted to the studies dealing with molecular epidemiology of T. vaginalis. Microsatellite data from forthcoming studies will be deposited and presented on the website. In addition, we also presented the frequency of two endosymbionts in T. vaginalis isolates for the first time in Turkey.",2021-01-01 +34419752,Metabolic variables associated with response to cognitive behavioural therapy for depression in females: A Canadian biomarker integration network for depression (CAN-BIND) study.,"

Introduction

Cognitive behavioural therapy (CBT) is an established first-line treatment for depression; however, it remains unclear which factors predict a positive outcome with this approach. Prior work suggests that co-morbid obesity predicts a poorer response to antidepressant medication. The current study examined whether there is an association between weight parameters and improvement of depressive symptoms with CBT.

Methods

This was a secondary analysis of data from the ""Clinical and Biological Markers of Response to Cognitive Behavioural Therapy for Depression - 6"" (CANBIND-6; https://clinicaltrials.gov/ct2/show/NCT02883257) study. Adult participants (n = 41) with a diagnosis of Major Depressive Disorder (MDD) or Persistent Depressive Disorder (PDD) were recruited from an outpatient tertiary psychiatric centre in Canada. Participants completed 20 individual sessions of CBT over 16 weeks. The primary measure for treatment outcome was the Montgomery-Åsberg Depression Rating Scale (MADRS) total score at week 16.

Results

Thirty-seven participants completed assessments pre and post CBT. Baseline weight parameters were not correlated with treatment response to CBT in the entire group. There was a significant sex*waist circumference (WC) (B:-1.34; p = 0.004) and sex*body mass index (BMI) interaction (B:-2.03; p:0.009). In female participants, baseline waist circumference, but not BMI, significantly predicted week 16 MADRS after controlling for age and baseline MADRS (B:0.422 p:0.049).

Limitations

The major limitation of our preliminary finding is the small sample size.

Conclusion

Our preliminary findings suggest that higher waist circumference may be associated with a better treatment response to CBT for depression in females. This result could be of clinical relevance and warrants further investigation in larger and independent samples.",2021-07-21 +35116615,Primary bladder schwannoma: a case report and literature review.,"Primary bladder schwannoma is an extremely rare bladder tumor that originates from Schwann cells in the nerve sheath and often associated with von Reichnhausen's disease. Isolated cases of urinary bladder schwannoma are incredibly rare with no more than 1/1,000 of bladder tumours. We report a 33-year-old female patient who did not have any symptoms and was found by computed tomography (CT). Preoperative cystoscopy revealed a large sessile and smooth-surfaced mass on the anterior top of the bladder. Then she was successfully managed by partial cystectomy. Hematoxylin-eosin (HE) staining and immunohistochemistry (IHC) confirmed the mass was schwannoma. She was discharged 16 days after admission. In addition, she was followed up without intravesical recurrence or metastases for 29 months. Subsequently, literatures in PubMed (https://pubmed.ncbi.nlm.nih.gov/) accessed to bladder schwannoma since 1993 are searched and reviewed, more clinical data are provided to better assist in the diagnosis and treatment. In summary, bladder schwannoma is a rare benign tumor of the urinary system. Imaging examination and cystoscopy have a hint on the disease to a certain extent. The first choice of treatment is surgical resection, pathology is the gold standard and S-100 is usually positive. On account of the possibility of malignant transformation of the disease, Long-term follow-up is necessary.",2021-06-01 +33966515,"Developing Risk Assessment Criteria and Predicting High- and Low-Dengue Risk Villages for Strengthening Dengue Prevention Activities: Community Participatory Action Research, Thailand.","

Background

Risk assessment criteria for predicting dengue outbreak must be appropriated at village levels. We aimed to develop risk dengue village prediction criteria, predict village dengue risk, and strengthen dengue prevention based on community participation.

Methods

This participatory research conducted in Southern Thailand included the following 5 phases: (i) preparing communities in 3 districts; (ii) developing risk dengue village prediction criteria; (iii) applying computer program; (iv) predicting village dengue risk with 75 public health providers in 39 PCUs; and (v) utilizing findings to strengthen dengue prevention activities in 220 villages. Data collecting for prediction used secondary data from primary care units in the past 5 year and current year. Descriptive statistics used calculating criteria and comparing with standard level to adjust score of risk.

Results

Risk dengue village assessment criteria had 2 aspects: dengue severity (3 factors) and dengue outbreak opportunity (3 factors). Total scores were 33 points and cut-off of 17 points for high and low dengue risks villages. All criteria were applied using computer program (http://surat.denguelim.com). Risk prediction involved stakeholder participation in 220 villages, and used for strengthening dengue prevention activities. The concept of integrated vector management included larval indices surveillance system, garbage management, larval indices level lower than the standard, community capacity activities for dengue prevention, and school-based dengue prevention. The risk prediction criteria and process mobilized villages for dengue prevention activities to decrease morbidity rate.

Conclusion

Dengue risk assessment criteria were appropriated within the village, with its smallest unit, the household, included. The data can be utilized at village levels for evaluating dengue outbreak risks.",2021-01-01 +32942983,PAWER: protein array web exploreR.,"

Background

Protein microarray is a well-established approach for characterizing activity levels of thousands of proteins in a parallel manner. Analysis of protein microarray data is complex and time-consuming, while existing solutions are either outdated or challenging to use without programming skills. The typical data analysis pipeline consists of a data preprocessing step, followed by differential expression analysis, which is then put into context via functional enrichment. Normally, biologists would need to assemble their own workflow by combining a set of unrelated tools to analyze experimental data. Provided that most of these tools are developed independently by various bioinformatics groups, making them work together could be a real challenge.

Results

Here we present PAWER, the online web tool dedicated solely to protein microarray analysis. PAWER enables biologists to carry out all the necessary analysis steps in one go. PAWER provides access to state-of-the-art computational methods through the user-friendly interface, resulting in publication-ready illustrations. We also provide an R package for more advanced use cases, such as bespoke analysis workflows.

Conclusions

PAWER is freely available at https://biit.cs.ut.ee/pawer .",2020-09-17 +31592084,The Grass Carp Genomic Visualization Database (GCGVD): an informational platform for genome biology of grass carp.,"With the release of the draft genome of the grass carp, researches on the grass carp from the genetic level and the further molecular mechanisms of economically valuable physiological behaviors have gained great attention. In this paper, we integrated a large number of genomic, genetic and some other data resources and established a web-based grass carp genomic visualization database (GCGVD). To view these data more effectively, we visualized grass carp and zebrafish gene collinearity and genetic linkage map using Scalable Vector Graphics (SVG) format in the browser, and genomic annotations by JBrowse. Furthermore, we carried out some preliminary study on a whole-genome alternative splicing (AS)of the grass carp. The RNA-seq reads of 15 samples were aligned to the reference genome of the grass carp by Bowtie2 software. RNA-seq reads of each sample and density map of reads were also exhibited in JBrowse. Additionally, we designed a universal grass carp genome annotation data model to improve the retrieval speed and scalability. Compared with the published database GCGD previously, we newly added the visualization of some more genomic annotations, conserved domain and RNA-seq reads aligned to the reference genome. GCGVD can be accessed at http://122.112.216.104.",2019-08-07 +32011235,Transcriptional landscape of myogenesis from human pluripotent stem cells reveals a key role of TWIST1 in maintenance of skeletal muscle progenitors. ,"Generation of skeletal muscle cells with human pluripotent stem cells (hPSCs) opens new avenues for deciphering essential, but poorly understood aspects of transcriptional regulation in human myogenic specification. In this study, we characterized the transcriptional landscape of distinct human myogenic stages, including OCT4::EGFP+ pluripotent stem cells, MSGN1::EGFP+ presomite cells, PAX7::EGFP+ skeletal muscle progenitor cells, MYOG::EGFP+ myoblasts, and multinucleated myotubes. We defined signature gene expression profiles from each isolated cell population with unbiased clustering analysis, which provided unique insights into the transcriptional dynamics of human myogenesis from undifferentiated hPSCs to fully differentiated myotubes. Using a knock-out strategy, we identified TWIST1 as a critical factor in maintenance of human PAX7::EGFP+ putative skeletal muscle progenitor cells. Our data revealed a new role of TWIST1 in human skeletal muscle progenitors, and we have established a foundation to identify transcriptional regulations of human myogenic ontogeny (online database can be accessed in http://www.myogenesis.net/).",2020-02-03 +31034103,Functional analysis tools for post-translational modification: a post-translational modification database for analysis of proteins and metabolic pathways.,"Post-translational modifications (PTMs) are critical regulators of protein function, and nearly 200 different types of PTM have been identified. Advances in high-resolution mass spectrometry have led to the identification of an unprecedented number of PTM sites in numerous organisms, potentially facilitating a more complete understanding of how PTMs regulate cellular behavior. While databases have been created to house the resulting data, most of these resources focus on individual types of PTM, do not consider quantitative PTM analyses or do not provide tools for the visualization and analysis of PTM data. Here, we describe the Functional Analysis Tools for Post-Translational Modifications (FAT-PTM) database (https://bioinformatics.cse.unr.edu/fat-ptm/), which currently supports eight different types of PTM and over 49 000 PTM sites identified in large-scale proteomic surveys of the model organism Arabidopsis thaliana. The FAT-PTM database currently supports tools to visualize protein-centric PTM networks, quantitative phosphorylation site data from over 10 different quantitative phosphoproteomic studies, PTM information displayed in protein-centric metabolic pathways and groups of proteins that are co-modified by multiple PTMs. Overall, the FAT-PTM database provides users with a robust platform to share and visualize experimentally supported PTM data, develop hypotheses related to target proteins or identify emergent patterns in PTM data for signaling and metabolic pathways.",2019-05-31 +34124628,Uncovering transmission patterns of COVID-19 outbreaks: A region-wide comprehensive retrospective study in Hong Kong.,"

Background

Given the dynamism and heterogeneity of COVID-19 transmission patterns, determining the most effective yet timely strategies for specific regions remains a severe challenge for public health decision-makers.

Methods

In this work, we proposed a spatiotemporal connectivity analysis method for discovering transmission patterns across geographic locations and age-groups throughout different COVID-19 outbreak phases. First, we constructed the transmission networks of the confirmed cases during different phases by considering the spatiotemporal connectivity of any two cases. Then, for each case and those cases immediately pointed from it, we characterized the corresponding cross-district/population transmission pattern by counting their district-to-district and age-to-age occurrences. By summating the cross-district/population transmission patterns of all cases during a given period, we obtained the aggregated cross-district and cross-population transmission patterns.

Findings

We conducted a region-wide comprehensive retrospective study in Hong Kong based on the complete data report of COVID-19 cases, covering all 18 districts between January 23, 2020, and January 8, 2021 (https://data.gov.hk/en-data/dataset/hk-dh-chpsebcddr-novel-infectious-agent). The spatiotemporal connectivity analysis clearly unveiled the quantitative differences among various outbreak waves in their transmission scales, durations, and patterns. Moreover, for the statistically similar waves, their cross-district/population transmission patterns could be quite different (e.g., the cross-district transmission of the fourth wave was more diverse than that of the third wave, while the transmission over age-groups of the fourth wave was more concentrated than that of the third wave). At an overall level, super-spreader individuals (highly connected cases in the transmission networks) were usually concentrated in only a few districts (2 out of 18 in our study) or age-groups (3 out of 11 in our study).

Interpretation

With the discovered cross-district or cross-population transmission patterns, all of the waves of COVID-19 outbreaks in Hong Kong can be systematically scrutinized. Among all districts, quite a few (e.g., the Yau Tsim Mong district) were instrumental in spreading the virus throughout the pandemic. Aside from being exceptionally densely populated, these districts were also social-economic centers. With a variety of situated public venues, such as restaurants and singing/dancing clubs, these districts played host to all kinds of social gathering events, thereby providing opportunities for widespread and rapid transmission of the virus. Thus, these districts should be given the highest priority when deploying district-specific social distancing or intervention strategies, such as lockdown and stringent mandatory coronavirus testing for identifying and obstructing the chain of transmission. We also observed that most of the reported cases and the highly connected cases were middle-aged and elderly people (40- to 69-year-olds). People in these age-groups were active in various public places and social activities, and thus had high chances of being infected by or infecting others.

Funding

General research fund of the Hong Kong research grants council.",2021-06-06 +33376807,Concomitant prediction of environmental fate and toxicity of chemical compounds.,"The environmental fate of many functional molecules that are produced on a large scale as precursors or as additives to specialty goods (plastics, fibers, construction materials, etc.), let alone those synthesized by the pharmaceutical industry, is generally unknown. Assessing their environmental fate is crucial when taking decisions on the manufacturing, handling, usage, and release of these substances, as is the evaluation of their toxicity in humans and other higher organisms. While this data are often hard to come by, the experimental data already available on the biodegradability and toxicity of many unusual compounds (including genuinely xenobiotic molecules) make it possible to develop machine learning systems to predict these features. As such, we have created a predictor of the ""risk"" associated with the use and release of any chemical. This new system merges computational methods to predict biodegradability with others that assess biological toxicity. The combined platform, named BiodegPred (https://sysbiol.cnb.csic.es/BiodegPred/), provides an informed prognosis of the chance a given molecule can eventually be catabolized in the biosphere, as well as of its eventual toxicity, all available through a simple web interface. While the platform described does not give much information about specific degradation kinetics or particular biodegradation pathways, BiodegPred has been instrumental in anticipating the probable behavior of a large number of new molecules (e.g. antiviral compounds) for which no biodegradation data previously existed.",2020-11-13 +29092939,"Explore, Visualize, and Analyze Functional Cancer Proteomic Data Using the Cancer Proteome Atlas.","Reverse-phase protein arrays (RPPA) represent a powerful functional proteomic approach to elucidate cancer-related molecular mechanisms and to develop novel cancer therapies. To facilitate community-based investigation of the large-scale protein expression data generated by this platform, we have developed a user-friendly, open-access bioinformatic resource, The Cancer Proteome Atlas (TCPA, http://tcpaportal.org), which contains two separate web applications. The first one focuses on RPPA data of patient tumors, which contains >8,000 samples of 32 cancer types from The Cancer Genome Atlas and other independent patient cohorts. The second application focuses on the RPPA data of cancer cell lines and contains >650 independent cell lines across 19 lineages. Many of these cell lines have publicly available, high-quality DNA, RNA, and drug screening data. TCPA provides various analytic and visualization modules to help cancer researchers explore these datasets and generate testable hypotheses in an effective and intuitive manner. Cancer Res; 77(21); e51-54. ©2017 AACR.",2017-11-01 +,Giant taxon‐character matrices II: a response to Laing et al. (2017),"The trend towards big data analyses in evolutionary biology has been observed in phylogenetics via the assembly of giant datasets composed of genomic and phenotypic data. We recently (Simões et al., 2017. Giant taxon‐character matrices: Quality of character constructions remains critical regardless of size. Cladistics 33, 198–219) presented a critique of the phylogenetic character concepts used in current morphological datasets, with the caution that giant datasets did not obviate the empirical requirement of rigor in character construction. Laing et al. (2017. Giant taxon‐character matrices: The future of morphological systematics. Cladistics, https://doi.org/10.1111/cla.12197) have since argued that we had ‘suggested’ that large datasets inherently contain flawed characters, and that we had presented a substandard methodology of phylogenetic analysis. Laing et al. concluded by discussing their approach to phylogenetic signal, total evidence and the inevitability of large datasets. We here reply to Laing et al. by reviewing what we actually wrote regarding dataset size, characters and methodology. We show that Laing et al.'s. central premise is unsupported, thus characterizing a Straw Man argument, and deeply misrepresents our original study. In part two, we discuss total evidence and phylogenetic signal issues raised by Laing et al. that are of major consequence to the appropriate construction of large morphological datasets.",2018-12-01 +32068469,Analysis of Kansas Water Well Policies and Proposal of Nonpublic Household Water Well Recommendations.,"BACKGROUND:Many nonpublic water well users unknowingly consume contaminated groundwater containing unsafe levels of pollutants. This has important implications for more than 13 million households in the United States that rely upon nonpublic water wells for drinking, cooking, and other household uses. Although public water quality is regulated through the Safe Drinking Water Act, there are no drinking water standards for nonpublic water well quality in Kansas, nor is there an adequate public health infrastructure in place to prevent or address potential exposures to contamination. OBJECTIVES:This project was conducted to identify promising action steps that would protect Kansans relying on nonpublic water wells for drinking, cooking, and other household purposes. METHODS:The project team consisted of public health, environmental health, and legal professionals with experience working on groundwater quality issues impacting nonpublic water wells in Kansas. From 2015 through 2018, the team established and convened an advisory group; reviewed relevant state statutes and regulations, all Kansas county environmental codes, and a representative sample of 23 city water well codes; conducted an extensive review of academic literature to identify best practices; conducted dozens of key informant interviews; proposed recommendations; engaged dozens of stakeholders through a survey of these proposed recommendations; and conducted interactive webinars to identify which organizations need to lead each of the recommendations. DISCUSSION:The project team developed 18 recommendations. The recommendations are organized by survey respondents' perceptions of potential public health impact. There are very few standard practices in Kansas that ensure safe water for nonpublic household water wells. Although not all of the 18 recommendations may be applicable to other communities and states, many likely would be useful for governmental agencies, academic institutions, nonprofit organizations, and others to consider. These recommendations offer more protections for nonpublic household water well users than any resource we have found. https://doi.org/10.1289/EHP5507.",2020-02-18 +31380747,SafePredict: A Meta-Algorithm for Machine Learning That Uses Refusals to Guarantee Correctness.,"SafePredict is a novel meta-algorithm that works with any base prediction algorithm for online data to guarantee an arbitrarily chosen correctness rate, 1-ϵ, by allowing refusals. Allowing refusals means that the meta-algorithm may refuse to emit a prediction produced by the base algorithm so that the error rate on non-refused predictions does not exceed ϵ. The SafePredict error bound does not rely on any assumptions on the data distribution or the base predictor. When the base predictor happens not to exceed the target error rate ϵ, SafePredict refuses only a finite number of times. When the error rate of the base predictor changes through time SafePredict makes use of a weight-shifting heuristic that adapts to these changes without knowing when the changes occur yet still maintains the correctness guarantee. Empirical results show that (i) SafePredict compares favorably with state-of-the-art confidence-based refusal mechanisms which fail to offer robust error guarantees; and (ii) combining SafePredict with such refusal mechanisms can in many cases further reduce the number of refusals. Our software is included in the supplementary material, which can be found on the Computer Society Digital Library at http://doi.ieeecomputersociety.org/10.1109/TPAMI.2019.2932415.",2021-01-08 +27490710,The Evolution of Digital Chemistry at Southampton.,"In this paper we take a historical view of e-Science and e-Research developments within the Chemical Sciences at the University of Southampton, showing the development of several stages of the evolving data ecosystem as Chemistry moves into the digital age of the 21(st) Century. We cover our research on aspects of the representation of chemical information in the context of the world wide web (WWW) and its semantic enhancement (the Semantic Web) and illustrate this with the example of the representation of quantities and units within the Semantic Web. We explore the changing nature of laboratories as computing power becomes increasing powerful and pervasive and specifically look at the function and role of electronic or digital notebooks. Having focussed on the creation of chemical data and information in context, we finish the paper by following the use and reuse of this data as facilitated by the features provided by digital repositories and their importance in facilitating the exchange of chemical information touching on the issues of open and or intelligent access to the data.",2015-09-01 +32595658,BRIDGE - A Visual Analytics Web Tool for Barley Genebank Genomics.,"Genebanks harbor a large treasure trove of untapped plant genetic diversity. A growing world population and a changing climate require an increase in the production and development of stress resistant plant cultivars while decreasing the acreage. These requirements for improved plant cultivars can be supported by the broader exploitation of plant genetic resources (PGR) as inputs for genomics-assisted breeding. To support this process we have developed BRIDGE, a data warehouse and exploratory data analysis tool for genebank genomics of barley (Hordeum vulgare L.). Using efficient technologies for data storage, data transfer and web development, we facilitate access to digital genebank resources of barley by prioritizing the interactive and visual analysis of integrated genotypic and phenotypic data. The underlying data resulted from a barley genebank genomics study cataloging sequence and morphological data of 22,626 barley accessions, mainly from the German Federal ex situ genebank. BRIDGE consists of interactively coupled modules to visualize integrated, curated and quality checked data, such as variation data, results of dimensionality reduction and genome wide association studies (GWAS), phenotyping results, passport data as well as the geographic distribution of germplasm samples. The core component is a manager for custom collections of germplasm. A search module to find and select germplasm by passport and phenotypic attributes is included as well as modules to export genotypic data in gzip-compressed variant call format (VCF) files and phenotypic data in MIAPPE-compliant ISA-Tab files. BRIDGE is accessible at the following URL: https://bridge.ipk-gatersleben.de.",2020-06-11 +29855811,Procura-PALavras (P-PAL): A Web-based interface for a new European Portuguese lexical database.,"In this article, we present Procura-PALavras (P-PAL), a Web-based interface for a new European Portuguese (EP) lexical database. Based on a contemporary printed corpus of over 227 million words, P-PAL provides a broad range of word attributes and statistics, including several measures of word frequency (e.g., raw counts, per-million word frequency, logarithmic Zipf scale), morpho-syntactic information (e.g., parts of speech [PoSs], grammatical gender and number, dominant PoS, and frequency and relative frequency of the dominant PoS), as well as several lexical and sublexical orthographic (e.g., number of letters; consonant-vowel orthographic structure; density and frequency of orthographic neighbors; orthographic Levenshtein distance; orthographic uniqueness point; orthographic syllabification; and trigram, bigram, and letter type and token frequencies), and phonological measures (e.g., pronunciation, number of phonemes, stress, density and frequency of phonological neighbors, transposed and phonographic neighbors, syllabification, and biphone and phone type and token frequencies) for ~53,000 lemmatized and ~208,000 nonlemmatized EP word forms. To obtain these metrics, researchers can choose between two word queries in the application: (i) analyze words previously selected for specific attributes and/or lexical and sublexical characteristics, or (ii) generate word lists that meet word requirements defined by the user in the menu of analyses. For the measures it provides and the flexibility it allows, P-PAL will be a key resource to support research in all cognitive areas that use EP verbal stimuli. P-PAL is freely available at http://p-pal.di.uminho.pt/tools .",2018-08-01 +27436706,OpenSlice: Quantitative data sharing from HyperPeaks to global ion chromatograms (GICs).,"Data sharing in the field of MS has advanced greatly thanks to innovations such as the standardized formats, data repositories, and publications guidelines. However, there is currently no data sharing mechanism that enables real-time data browsing and deep linking on a large scale: unrestricted data access (particularly at the quantitative level) ultimately requires the user to download a local copy of the relevant data files (e.g., in order to generate extracted ion chromatograms [XICs]). In this technical resource, we present a set of technologies (collectively termed OpenSlice) that enable the user to quantitatively query hundreds of hours of proteomics discovery data (i.e., nontargeted acquisition) in real time: the user is able to effectively generate XICs for arbitrary masses on the fly and across the entire dataset (so-called global ion chromatograms), interacting with the results through a very intuitive browser-based interface. A key design consideration underlying the OpenSlice approach is the notion that every aspect of the acquired data must be accessible through a RESTful uniform resource locator based application programming interface, up to and including individual chromatographic peaks (hence HyperPeaks). A publicly accessible demonstration of this technology based on the Clinical Proteomics Tumor Analysis Consortium CompRef dataset is made available at http://compref.fenyolab.org.",2016-09-01 +31215090,Identification of 17 mRNAs and a miRNA as an integrated prognostic signature for lung squamous cell carcinoma.,"

Background

Gene signatures for predicting the outcome of lung squamous cell carcinoma (LUSC) have been employed for many years. However, various signatures have been applied in clinical practice. Therefore, in the present study, we aimed to filter out an effective LUSC prognostic gene signature by simultaneously integrating mRNA and microRNA (miRNA).

Methods

First, based on data from the Cancer Genome Atlas (TCGA) (https://www.cancer.gov/tcga), mRNAs and miRNAs that were related to overall survival of LUSC were obtained by the least absolute shrinkage and selection operator method. Subsequently, the predicting effect was tested by time-dependent receiver operating characteristic curve analysis and Kaplan-Meier survival analysis. Next, related clinical indices were added to evaluate the efficiency of the selected gene signatures. Finally, validation and comparison using three independent gene signatures were performed using data from the Gene Expression Omnibus database (https://www.ncbi.nlm.nih.gov/geo).

Results

Our data showed that the prognostic index (PI) contained 17 mRNAs and one miRNA. According to the best normalized cut-off of PI (0.0247), the hazard ratio of the PI was 3.40 (95% confidence interval = 2.33-4.96). Moreover, when clinical factors were introduced, the PI was still the most significant index. In addition, only two Gene Ontology terms with p < 0.05 were reported. Furthermore, validation implied that, using our 18-gene signature, only hazard ratio = 1.36 (95% confidence interval = 1.01-1.83) was significant compared to the other three groups of gene biomarkers.

Conclusions

The 18-gene signature selected based on data from the TCGA database had an effective prognostic value for LUSC patients.",2019-08-01 +33779967,Pharmacokinetics of Eltrombopag in Healthy Chinese Subjects and Effect of Sex and Genetic Polymorphism on its Pharmacokinetic and Pharmacodynamic Variability.,"

Background and objective

Eltrombopag is the first oral, small-molecule, non-peptide thrombopoietin receptor agonist for the treatment of idiopathic thrombocytopenic purpura. This study investigated the pharmacokinetics of eltrombopag in healthy Chinese subjects and evaluated the effect of sex and genetic polymorphisms on its variability.

Methods

Forty-eight healthy subjects were administered a single dose of eltrombopag (25 mg). Plasma concentrations of eltrombopag were determined using a validated liquid chromatography-tandem mass spectrometry method, and platelet counts were determined by blood tests. CYP1A2 rs762551, CYP2C8*3 rs10509681, CYP2C8*3 rs11572080, UGT1A1 rs887829, UGT1A3 rs3806596, and BCRP rs2231142 polymorphisms were genotyped by Sanger sequencing. A back-propagation artificial neural network (BP-ANN) model was constructed to predict pharmacokinetics based on physiological factors and genetic polymorphism data.

Results

Compared with male subjects, female subjects who received a single 25-mg dose of eltrombopag exhibited a significantly increased mean maximum plasma concentration (Cmax) and significantly decreased apparent clearance. Additionally, CYP1A2 rs762551 C>A single nucleotide polymorphism influenced distribution and elimination. C-allele carriers exhibited 30% higher systemic exposure and 20% lower apparent clearance compared with homozygous A-allele carriers. Mean percentage increases in platelet counts from baseline to Day 5 were 9.38% and 17.06% in male and female subjects, respectively. The BP-ANN model had a high goodness-of-fit index and good coherence between predicted and measured concentrations (R = 0.98979).

Conclusion

Sex and CYP1A2 rs762551 C>A were associated with the pharmacokinetic variability of eltrombopag in healthy Chinese subjects. Females exhibited a better platelet-elevating effect compared with males administered the same dosage. The developed BP-ANN model based on physiological factors and genetic polymorphism data could be promising for applications in pharmacokinetic studies.

Trial registrations

https://www.Chinadrugtrials.org.cn CTR20190898.",2021-03-29 +31777112,How the Human Brain Sleeps: Direct Cortical Recordings of Normal Brain Activity.,"OBJECTIVE:Regional variations in oscillatory activity during human sleep remain unknown. Using the unique ability of intracranial electroencephalography to study in situ brain physiology, this study assesses regional variations of electroencephalographic sleep activity and creates the first atlas of human sleep using recordings from the first sleep cycle. METHODS:Intracerebral electroencephalographic recordings with channels displaying physiological activity from nonlesional tissue were selected from 91 patients of 3 tertiary epilepsy centers. Sections during non-rapid eye movement sleep (stages N2 and N3) and rapid eye movement sleep (stage R) were selected from the first sleep cycle for oscillatory and nonoscillatory signal analysis. Results of 1,468 channels were grouped into 38 regions covering all cortical areas. RESULTS:We found regional differences in the distribution of sleep transients and spectral content during all sleep stages. There was a caudorostral gradient, with more slow frequencies and fewer spindles in temporoparieto-occipital than in frontal cortex. Moreover, deep-seated structures showed spectral peaks differing from the baseline electroencephalogram. The regions with >60% of channels presenting significant rhythmic activity were either mesial or temporal basal structures that contribute minimally to the scalp electroencephalogram. Finally, during deeper sleep stages, electroencephalographic analysis revealed a more homogeneous spatial distribution, with increased coupling between high and low frequencies. INTERPRETATION:This study provides a better understanding of the regional variability of sleep, and establishes a baseline for human sleep in all cortical regions during the first sleep cycle. Furthermore, the open-access atlas will be a unique resource for research (https://mni-open-ieegatlas. RESEARCH:mcgill.ca). ANN NEUROL 2020;87:289-301.",2019-12-13 +34135895,Transcriptomic Signature Differences Between SARS-CoV-2 and Influenza Virus Infected Patients.,"The reason why most individuals with COVID-19 have relatively limited symptoms while other develop respiratory distress with life-threatening complications remains unknown. Increasing evidence suggests that COVID-19 associated adverse outcomes mainly rely on dysregulated immunity. Here, we compared transcriptomic profiles of blood cells from 103 patients with different severity levels of COVID-19 with that of 27 healthy and 22 influenza-infected individuals. Data provided a complete overview of SARS-CoV-2-induced immune signature, including a dramatic defect in IFN responses, a reduction of toxicity-related molecules in NK cells, an increased degranulation of neutrophils, a dysregulation of T cells, a dramatic increase in B cell function and immunoglobulin production, as well as an important over-expression of genes involved in metabolism and cell cycle in patients infected with SARS-CoV-2 compared to those infected with influenza viruses. These features also differed according to COVID-19 severity. Overall and specific gene expression patterns across groups can be visualized on an interactive website (https://bix.unil.ch/covid/). Collectively, these transcriptomic host responses to SARS-CoV-2 infection are discussed in the context of current studies, thereby improving our understanding of COVID-19 pathogenesis and shaping the severity level of COVID-19.",2021-05-31 +33610623,Survival after palliative radiation therapy for cancer: The METSSS model.,"

Background

We propose a predictive model that identifies patients at greatest risk of death after palliative radiotherapy, and subsequently, can help medical professionals choose treatments that better align with patient choice and prognosis.

Methods

The National Cancer Database was queried for recipients of palliative radiotherapy during first course of treatment. Cox regression models and adjusted hazard ratios with 95% confidence intervals were used to evaluate survival predictors. The mortality risk index was calculated using predictors from the estimated Cox regression model, with higher values indicating higher mortality risk. Based on tertile cutpoints, patients were divided into low, medium, and high risk groups.

Results

A total of 68,505 patients were included from 2010-2014, median age 65.7 years. Several risk factors were found to predict survival: (1) location of metastases (liver, bone, lung, and brain); (2) age; (3) tumor primary (prostate, breast, lung, other); (4) gender; (5) Charlson-Deyo comorbidity score; and (6) radiotherapy site. The median survival times were 11.66 months, 5.09 months, and 3.28 months in the low (n=22,621), medium (n=22,638), and high risk groups (n=22,611), respectively. A nomogram was created and validated to predict survival, available online, https://tinyurl.com/METSSSmodel. Harrel's C-index was 0.71 and receiver operator characteristic area under the curve was 0.76 at 4 years.

Conclusion

We created a predictive nomogram for survival of patients receiving palliative radiotherapy during their first course of treatment (named METSSS), based on Metastases location, Elderly (age), Tumor primary, Sex, Sickness/comorbidity, and Site of radiotherapy.",2021-02-19 +32321114,Neuropsychiatric Lupus Erythematosus: Future Directions and Challenges; a Systematic Review and Survey.,"This study aimed to systematically review neuropsychiatric lupus erythematosus (NPSLE) and establish a simplified diagnostic criterion for NPSLE. Publications from 1994 to 2018 in the database (Wanfang data (http://www.wanfangdata.com.cn/index.html) and China National Knowledge Internet (http://www.cnki.net)) were included. In total, 284 original case reports and 24 unpublished cases were collected, and clinical parameters were analyzed. An attempt was made to develop a set of simplified diagnostic criteria for NPSLE based on cases described in the survey and literature; moreover, and pathophysiology and management guidelines were studied. The incidence rate of NPSLE was estimated to be 12.4% of SLE patients in China. A total of 408 NPSLE patients had 652 NP events, of which 91.2% affected the central nervous system and 8.8% affected the peripheral nervous system. Five signs (manifestations, disease activity, antibodies, thrombosis, and skin lesions) showed that negative and positive predictive values were more than 70%, included in the diagnostic criteria. The specificity, accuracy, and positive predictive value (PPV) of the revised diagnostic criteria were significantly higher than those of the American College of Rheumatology (ACR) criteria (χ2=13.642, 15.591, 65.010, p<0.001). The area under the curve (AUC) for revised diagnostic criteria was 0.962 (standard error=0.015, 95% confidence intervals [CI] =0.933-0.990), while the AUC for the ACR criteria was 0.900 (standard error=0.024, 95% CI=0.853-0.946). The AUC for the revised diagnostic criteria was different from that for the ACR criteria (Z=2.19, p<0.05). Understanding the pathophysiologic mechanisms leading to NPSLE is essential for the evaluation and design of effective interventions. The set of diagnostic criteria proposed here represents a simplified, reliable, and cost-effective approach used to diagnose NPSLE. The revised diagnostic criteria may improve the accuracy rate for diagnosing NPSLE compared to the ACR criteria.",2020-04-17 +32246585,Structural analysis of β-L-arabinobiose-binding protein in the metabolic pathway of hydroxyproline-rich glycoproteins in Bifidobacterium longum.,"Bifidobacterium longum is a symbiotic human gut bacterium that has a degradation system for β-arabinooligosaccharides, which are present in the hydroxyproline-rich glycoproteins of edible plants. Whereas microbial degradation systems for α-linked arabinofuranosyl carbohydrates have been extensively studied, little is understood about the degradation systems targeting β-linked arabinofuranosyl carbohydrates. We functionally and structurally analyzed a substrate-binding protein (SBP) of a putative ABC transporter (BLLJ_0208) in the β-arabinooligosaccharide degradation system. Thermal shift assays and isothermal titration calorimetry revealed that the SBP specifically bound Araf-β1,2-Araf (β-Ara2 ) with a Kd of 0.150 μm, but did not bind L-arabinose or methyl-β-Ara2 . Therefore, the SBP was termed β-arabinobiose-binding protein (BABP). Crystal structures of BABP complexed with β-Ara2 were determined at resolutions of up to 1.78 Å. The findings showed that β-Ara2 was bound to BABP within a short tunnel between two lobes as an α-anomeric form at its reducing end. BABP forms extensive interactions with β-Ara2 , and its binding mode was unique among SBPs. A molecular dynamics simulation revealed that the closed conformation of substrate-bound BABP is stable, whereas substrate-free form can adopt a fully open and two distinct semi-open states. The importer system specific for β-Ara2 may contribute to microbial survival in biological niches with limited amounts of digestible carbohydrates. DATABASE: Atomic coordinates and structure factors (codes 6LCE and 6LCF) have been deposited in the Protein Data Bank (http://wwpdb.org/).",2020-04-17 +34279167,Self-Directed Learning in Medical Education: Training for a Lifetime of Discovery.,"Issue: Life-long learning is a skill that is central to competent health professionals, and medical educators have sought to understand how adult professionals learn, adapt to new information, and independently seek to learn more. Accrediting bodies now mandate that training programs teach in ways that promote self-directed learning (SDL) but do not provide adequate guidance on how to address this requirement. Evidence: The model for the SDL mandate in physician training is based mostly on early childhood and secondary education evidence and literature, and may not capture the unique environment of medical training and clinical education. Furthermore, there is uncertainty about how medical schools and postgraduate training programs should implement and evaluate SDL educational interventions. The Shapiro Institute for Education and Research, in conjunction with the Association of American Medical Colleges, convened teams from eight medical schools from North America to address the challenge of defining, implementing, and evaluating SDL and the structures needed to nurture and support its development in health professional training. Implications: In this commentary, the authors describe SDL in Medical Education, (SDL-ME), which is a construct of learning and pedagogy specific to medical students and physicians in training. SDL-ME builds on the foundations of SDL and self-regulated learning theory, but is specifically contextualized for the unique responsibilities of physicians to patients, inter-professional teams, and society. Through consensus, the authors offer suggestions for training programs to teach and evaluate SDL-ME. To teach self-directed learning requires placing the construct in the context of patient care and of an obligation to society at large. The SDL-ME construct builds upon SDL and SRL frameworks and suggests SDL as foundational to health professional identity formation.KEYWORDSself-directed learning; graduate medical education; undergraduate medical education; theoretical frameworksSupplemental data for this article is available online at https://doi.org/10.1080/10401334.2021.1938074 .",2021-07-19 +33539619,A scoring system to predict a prolonged length of stay after surgery for Crohn's disease.,"

Aim

Many factors influence the postoperative length of stay (LOS) in Crohn's disease (CD). This study aims to identify the factors associated with a prolonged LOS after ileocolic resection (ICR) for CD and to develop a scoring system to predict the postoperative LOS in CD.

Method

Patient data were collected from St Marks Hospital, London, UK, and the Humanitas Clinical and Research Center Milan, Italy, for all patients who underwent an ICR for CD from 2005 to 2017. Logistic regression was used for multivariate analysis. The scoring system was developed from the logistic regression model. The performance of the scoring system was evaluated using the area under the receiver operating characteristic curve (AUROC).

Results

A total of 628 surgeries were included in the analysis. Eighty eight surgeries were excluded due to missing data. The remaining 543 were divided into two cohorts for the development (n = 418) and validation (n = 125) of the scoring system. The regression model was statistically significant (p < 0.0001). The statistically significant independent variables included the time since diagnosis, American Society of Anesthesiologists (ASA) grade, perioperative use of steroids, surgical access, strictureplasty and platelet count. The AUROCs for the development and validation cohorts were 0.732 and 0.7, respectively (p < 0.0001). The cut-off score suggested by Youden's index was 50, with a sensitivity of 65.6% and a specificity of 73.3%.

Conclusion

The time since diagnosis, ASA grade, steroid use, surgical access, strictureplasty and platelet count were associated with a prolonged LOS and were used to develop a scoring system. The calculator is available online at https://rebrand.ly/Crohnscal.",2021-02-20 +34250319,Review of Early Immune Response to SARS-CoV-2 Vaccination Among Patients With CKD.,"The effects of the coronavirus disease-2019 (COVID-19) pandemic, particularly among those with chronic kidney disease (CKD), who commonly have defects in humoral and cellular immunity, and the efficacy of vaccinations against severe acute respiratory syndrome coronavirus-2 (SARS-CoV-2) are uncertain. To inform public health and clinical practice, we synthesized published studies and preprints evaluating surrogate measures of immunity after SARS-CoV-2 vaccination in patients with CKD, including those receiving dialysis or with a kidney transplant. We found 35 studies (28 published, 7 preprints), with sample sizes ranging from 23 to 1140 participants and follow-up ranging from 1 week to 1 month after vaccination. Seventeen of these studies enrolled a control group. In the 22 studies of patients receiving dialysis, the development of antibodies was observed in 18% to 53% after 1 dose and in 70% to 96% after 2 doses of mRNA vaccine. In the 14 studies of transplant recipients, 3% to 59% mounted detectable humoral or cellular responses after 2 doses of mRNA vaccine. After vaccination, there were a few reported cases of relapse or de novo glomerulonephritis, and acute transplant rejection, suggesting a need for ongoing surveillance. Studies are needed to better evaluate the effectiveness of SARS-CoV-2 vaccination in these populations. Rigorous surveillance is necessary for detection of long-term adverse effects in patients with autoimmune disease and transplant recipients. For transplant recipients and those with suboptimal immune responses, alternate vaccination platforms and strategies should be considered. As additional data arise, the NephJC COVID-19 page will continue to be updated (http://www.nephjc.com/news/covid-vaccine).",2021-07-06 +32650717,Escher-Trace: a web application for pathway-based visualization of stable isotope tracing data.,"

Background

Stable isotope tracing has become an invaluable tool for probing the metabolism of biological systems. However, data analysis and visualization from metabolic tracing studies often involve multiple software packages and lack pathway architecture. A deep understanding of the metabolic contexts from such datasets is required for biological interpretation. Currently, there is no single software package that allows researchers to analyze and integrate stable isotope tracing data into annotated or custom-built metabolic networks.

Results

We built a standalone web-based software, Escher-Trace, for analyzing tracing data and communicating results. Escher-Trace allows users to upload baseline corrected mass spectrometer (MS) tracing data and correct for natural isotope abundance, generate publication quality graphs of metabolite labeling, and present data in the context of annotated metabolic pathways. Here we provide a detailed walk-through of how to incorporate and visualize 13C metabolic tracing data into the Escher-Trace platform.

Conclusions

Escher-Trace is an open-source software for analysis and interpretation of stable isotope tracing data and is available at https://escher-trace.github.io/ .",2020-07-10 +33746592,Carboxypeptidase A4 negatively correlates with p53 expression and regulates the stemness of breast cancer cells.,"Background: Triple-negative breast cancer (TNBC) is an aggressive cancer subtype lacking effective treatment options, and p53 is the most frequently mutated or deleted gene. Carboxypeptidase A4 (CPA4) is an extracellular metallocarboxypeptidase, which was closely associated with aggressiveness. Although a recent study indicated that CPA4 could induce epithelial‑mesenchymal transition in breast cancer cells, no studies investigated its stemness-related function and the correlation between CPA4 and p53 in TNBC. In this study, we aimed to investigate the CPA4 levels in breast cancer tissues and analyze its association with p53, and study its roles in cancer stemness maintenance. Methods: CPA4 mRNA level and its prognostic value were analyzed by using online database UALCAN (http://ualcan.path.uab.edu) and Kaplan-Meier plotter (www.kmplot.com), respectively. The expression of CPA4, p53 and ALDH1A1 in breast cancer and adjacent normal tissues were evaluated by IHC using the corresponding primary antibodies on a commercial tissue array (Shanghai Biochip Co., Ltd., Shanghai, China). siRNA knockdown was used to study the function of proliferation, colony formation assay and sphere formation in serum-free medium. Results: Analysis of the UALCAN datasets identified that CPA4 mRNA levels were elevated in TNBC, especially in the TP53-mutant subgroup. Furthermore, high levels of CPA4 mRNA were significantly associated with unfavourable overall survival OS in breast cancer patients. Immunohistochemistical analysis demonstrated that CPA4 levels were elevated in 32.1% of breast cancer samples (45/140), and the positive rates of ALDH1A1 and p53 in the breast cancer tissues were 25% (35/140) and 50% (70/140), respectively. Statistical analysis revealed high levels of CPA4 was significantly associated with TNBC phenotype. Correlation analysis indicated that CPA4 over-expression was positively associated with ALDH1A1 (P<0.01) and negatively correlated with p53 (P<0.05). In Kaplan-Meier survival analysis, either high CPA4 or ALDH1A1 levels was significantly correlated with poor survival in breast cancer patients. Functional studies demonstrated that down-regulation of CPA4 significantly inhibited TNBC cell proliferation, colony-formation assays in soft agar and sphere formation in serum-free medium. Conclusion: This study demonstrated for the first time that CPA4 was negatively correlates with p53 expression and inhibition of CPA4 could reduce the number of breast cancer cells with stemness property. It might be a potential target for the TNBC treatment.",2021-02-18 +33600198,Gambling fallacies: Predicting problem gambling in a national sample.,"Objective: The relationship between the level of gambling fallacy endorsement and type of gambler (nongambler, recreational gambler, at-risk gambler, and problem/pathological gambler) was assessed both concurrently and prospectively in a large national cohort of Canadian adults. Method: This cohort (n = 10,199 at baseline; 18-24 years, n = 481, 43% female; 25-34 years, n = 1,335, 62% female; 35-44 years, n = 1,543, 55% female, 45-54 years, n = 1,985, 58% female; 55-64 years, n = 2,459, 55% female; 65-74 years, n = 1,865, 44% female, 75+ years, n = 531, 43% female) was recruited from LEO, Leger Opinion's registered online panelists. The follow-up survey was completed by 55.9% of the cohort, 1 year after baseline. The full survey can be viewed at https://www.ucalgary.ca/research/national-gambling-study/. For the current study, scores on the Gambling Fallacies Measure, the Problem and Pathological Gambling Measure, Gambling Participation Instrument, and Impulsivity were analyzed. Results: There were three main findings. The first is that gambling fallacies are common in all categories of gamblers but somewhat more prevalent in problem and pathological gamblers. Second, the multivariate analysis determined that gambling fallacies are significant concurrent and prospective predictors of the problem/pathological gambling category, but not strong predictors relative to other variables. Third, problem gambling and heavier gambling involvement are also predictors of a future higher level of gambling fallacies. Conclusions: Collectively, these results show that gambling fallacies have some etiological relationship to problem gambling but are not the main cause of problem gambling and should not be the exclusive focus of problem gambling treatment. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-02-18 +26097180,"ZFIN, The zebrafish model organism database: Updates and new directions.","The Zebrafish Model Organism Database (ZFIN; http://zfin.org) is the central resource for genetic and genomic data from zebrafish (Danio rerio) research. ZFIN staff curate detailed information about genes, mutants, genotypes, reporter lines, sequences, constructs, antibodies, knockdown reagents, expression patterns, phenotypes, gene product function, and orthology from publications. Researchers can submit mutant, transgenic, expression, and phenotype data directly to ZFIN and use the ZFIN Community Wiki to share antibody and protocol information. Data can be accessed through topic-specific searches, a new site-wide search, and the data-mining resource ZebrafishMine (http://zebrafishmine.org). Data download and web service options are also available. ZFIN collaborates with major bioinformatics organizations to verify and integrate genomic sequence data, provide nomenclature support, establish reciprocal links, and participate in the development of standardized structured vocabularies (ontologies) used for data annotation and searching. ZFIN-curated gene, function, expression, and phenotype data are available for comparative exploration at several multi-species resources. The use of zebrafish as a model for human disease is increasing. ZFIN is supporting this growing area with three major projects: adding easy access to computed orthology data from gene pages, curating details of the gene expression pattern changes in mutant fish, and curating zebrafish models of human diseases.",2015-07-08 +,"PSIV-13 Basal endogenous loss, standardized total tract digestibility, and retention of Ca in sows change throughout gestation, but microbial phytase reduces basal endogenous loss of Ca by gestating sows","Abstract The objective was to test the hypothesis that standardized total tract digestibility (STTD) of Ca and Ca and P retention and the response to microbial phytase in diets fed to sows are constant throughout gestation. Thirty-six sows (parity = 3.3) were allotted to 4 diets on d 7 post-breeding. Two corn-based diets in which calcium carbonate was the sole source of Ca and 2 Ca-free diets were formulated without or with phytase (500 units per kg). Sows were housed individually in metabolism crates during early-gestation, mid-gestation, and late-gestation, and feces and urine were quantitatively collected. Data were analyzed by repeated measures using a model that included phytase, period of gestation, and the interaction between phytase and period as fixed effects, and block and replicate as random effects. Interactions between period and phytase were not observed. The basal endogenous loss (BEL) of Ca was greater (P < 0.05) in early-gestation than in mid- and late-gestation, but phytase reduced (P = 0.002) BEL of Ca and tended (P = 0.099) to increase apparent total tract digestibility (ATTD) of P in the Ca-free diet. Phytase did not affect ATTD of DM, STTD of Ca, ATTD of P, or Ca and P retention in sows fed the diet containing calcium carbonate (Table 1). The ATTD of DM was not affected by period, but Ca retention and ATTD of Ca and P were least (P < 0.05) in mid-gestation, followed by early- and late-gestation, and the STTD of Ca in mid-gestation was also reduced (P < 0.05) compared with early- or late-gestation. Phosphorus retention was greater (P < 0.05) in late-gestation than in the earlier periods. In conclusion, BEL of Ca, STTD of Ca, ATTD of P, and Ca and P retention in sows change throughout gestation regardless of use of phytase. http://www.conferenceharvester.com/",2019-07-01 +29329522,A-GAME: improving the assembly of pooled functional metagenomics sequence data.,"

Background

Expression screening of environmental DNA (eDNA) libraries is a popular approach for the identification and characterization of novel microbial enzymes with promising biotechnological properties. In such ""functional metagenomics"" experiments, inserts, selected on the basis of activity assays, are sequenced with high throughput sequencing technologies. Assembly is followed by gene prediction, annotation and identification of candidate genes that are subsequently evaluated for biotechnological applications.

Results

Here we present A-GAME (A GAlaxy suite for functional MEtagenomics), a web service incorporating state of the art tools and workflows for the analysis of eDNA sequence data. We illustrate the potential of A-GAME workflows using real functional metagenomics data, showing that they outperform alternative metagenomics assemblers. Dedicated tools available in A-GAME allow efficient analysis of pooled libraries and rapid identification of candidate genes, reducing sequencing costs and saving the need for laborious manual annotation.

Conclusion

In conclusion, we believe A-GAME will constitute a valuable resource for the functional metagenomics community. A-GAME is publicly available at http://beaconlab.it/agame.",2018-01-12 +,A21 Retrospectively describing hepatitis C virus transmission dynamics and tracking HCV transmission networks in real-time for strategic elimination interventions,"Abstract Despite impressive uptake of direct acting antivirals for hepatitis C virus (HCV) in the Netherlands among HIV/HCV co-infected men who have sex with men (MSM), HCV transmission continues, especially among patients previously successfully treated for HCV. The incidence of reinfection occurs at the extremely high rate of 15 per 100 person-years. Clearly, more sophisticated methods are necessary to identify the sources and timing of new HCV infections among MSM. The aim of this research is to phylogenetically characterize HCV transmission dynamics within MSM-specific networks in order to provide a solid base for targeted interventions to monitor, control, and eventually stop the ongoing transmission of HCV among HIV-infected MSM and to prevent further spread of HCV to the community at large. The methodology that will be used is two-fold. Firstly, it concerns setting up a real-time monitoring system to track the HCV epidemic using phylogenetic tools and open-source software from http://nextstrain.org. Secondly, several phylogenetic methods will be used to retrospectively identify transmission clusters in Amsterdam and define epidemiological characteristics, including the directionality of transmission and the size and introduction dates of the clusters. This means that cluster cut-off points will have to be calculated. This research will result in a web-based molecular surveillance tool to monitor the persistence of endemic clades, emergence of new clades, and transmission clusters in ‘real time’, which, combined with clinical and epidemiological data, will be used for targeted interventions. The surveillance tool will be based on the open-source software from nextstrain.org. Secondly, by retrospectively describing the HCV transmission clusters in terms of introduction dates and subsequent dynamics, we may be able to better predict the future dynamics of the different clusters. High-resolution viral sequencing will allow us to identify the source and timing of (new) HCV infections and follow the trajectory of these MSM-specific lineages through the MSM population. Real-time insight in transmission networks using a web-based molecular surveillance tool will identify key targets for rapid interventions, awareness campaigns, and testing strategies. This can be used to prevent further spread to HIV-negative MSM and to control and eventually eliminate HCV from the MSM population.",2019-08-01 +26325390,A Geometric Representation of Collective Attention Flows.,"With the fast development of Internet and WWW, ""information overload"" has become an overwhelming problem, and collective attention of users will play a more important role nowadays. As a result, knowing how collective attention distributes and flows among different websites is the first step to understand the underlying dynamics of attention on WWW. In this paper, we propose a method to embed a large number of web sites into a high dimensional Euclidean space according to the novel concept of flow distance, which both considers connection topology between sites and collective click behaviors of users. With this geometric representation, we visualize the attention flow in the data set of Indiana university clickstream over one day. It turns out that all the websites can be embedded into a 20 dimensional ball, in which, close sites are always visited by users sequentially. The distributions of websites, attention flows, and dissipations can be divided into three spherical crowns (core, interim, and periphery). 20% popular sites (Google.com, Myspace.com, Facebook.com, etc.) attracting 75% attention flows with only 55% dissipations (log off users) locate in the central layer with the radius 4.1. While 60% sites attracting only about 22% traffics with almost 38% dissipations locate in the middle area with radius between 4.1 and 6.3. Other 20% sites are far from the central area. All the cumulative distributions of variables can be well fitted by ""S""-shaped curves. And the patterns are stable across different periods. Thus, the overall distribution and the dynamics of collective attention on websites can be well exhibited by this geometric representation.",2015-09-01 +25260700,HTSeq--a Python framework to work with high-throughput sequencing data.,"

Motivation

A large choice of tools exists for many standard tasks in the analysis of high-throughput sequencing (HTS) data. However, once a project deviates from standard workflows, custom scripts are needed.

Results

We present HTSeq, a Python library to facilitate the rapid development of such scripts. HTSeq offers parsers for many common data formats in HTS projects, as well as classes to represent data, such as genomic coordinates, sequences, sequencing reads, alignments, gene model information and variant calls, and provides data structures that allow for querying via genomic coordinates. We also present htseq-count, a tool developed with HTSeq that preprocesses RNA-Seq data for differential expression analysis by counting the overlap of reads with genes.

Availability and implementation

HTSeq is released as an open-source software under the GNU General Public Licence and available from http://www-huber.embl.de/HTSeq or from the Python Package Index at https://pypi.python.org/pypi/HTSeq.",2014-09-25 +28135259,Building ProteomeTools based on a complete synthetic human proteome.,"We describe ProteomeTools, a project building molecular and digital tools from the human proteome to facilitate biomedical research. Here we report the generation and multimodal liquid chromatography-tandem mass spectrometry analysis of >330,000 synthetic tryptic peptides representing essentially all canonical human gene products, and we exemplify the utility of these data in several applications. The resource (available at http://www.proteometools.org) will be extended to >1 million peptides, and all data will be shared with the community via ProteomicsDB and ProteomeXchange.",2017-01-30 +34013642,Efficient study design to estimate population means with multiple measurement instruments.,"Outcomes from studies assessing exposure often use multiple measurements. In previous work, using a model first proposed by Buonoccorsi (1991), we showed that combining direct (eg, biomarkers) and indirect (eg, self-report) measurements provides a more accurate picture of true exposure than estimates obtained when using a single type of measurement. In this article, we propose a tool for efficient design of studies that include both direct and indirect measurements of a relevant outcome. Based on data from a pilot or preliminary study, the tool, which is available online as a shiny app at https://michalbitan.shinyapps.io/shinyApp/, can be used to compute: (1) the sample size required for a statistical power analysis, while optimizing the percent of participants who should provide direct measures of exposure (biomarkers) in addition to the indirect (self-report) measures provided by all participants; (2) the ideal number of replicates; and (3) the allocation of resources to intervention and control arms. In addition we show how to examine the sensitivity of results to underlying assumptions. We illustrate our analysis using studies of tobacco smoke exposure and nutrition. In these examples, a near-optimal allocation of the resources can be found even if the assumptions are not precise.",2021-05-20 +31738385,DeepMSA: constructing deep multiple sequence alignment to improve contact prediction and fold-recognition for distant-homology proteins.,"

Motivation

The success of genome sequencing techniques has resulted in rapid explosion of protein sequences. Collections of multiple homologous sequences can provide critical information to the modeling of structure and function of unknown proteins. There are however no standard and efficient pipeline available for sensitive multiple sequence alignment (MSA) collection. This is particularly challenging when large whole-genome and metagenome databases are involved.

Results

We developed DeepMSA, a new open-source method for sensitive MSA construction, which has homologous sequences and alignments created from multi-sources of whole-genome and metagenome databases through complementary hidden Markov model algorithms. The practical usefulness of the pipeline was examined in three large-scale benchmark experiments based on 614 non-redundant proteins. First, DeepMSA was utilized to generate MSAs for residue-level contact prediction by six coevolution and deep learning-based programs, which resulted in an accuracy increase in long-range contacts by up to 24.4% compared to the default programs. Next, multiple threading programs are performed for homologous structure identification, where the average TM-score of the template alignments has over 7.5% increases with the use of the new DeepMSA profiles. Finally, DeepMSA was used for secondary structure prediction and resulted in statistically significant improvements in the Q3 accuracy. It is noted that all these improvements were achieved without re-training the parameters and neural-network models, demonstrating the robustness and general usefulness of the DeepMSA in protein structural bioinformatics applications, especially for targets without homologous templates in the PDB library.

Availability and implementation

https://zhanglab.ccmb.med.umich.edu/DeepMSA/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +27899630,European Nucleotide Archive in 2016.,"The European Nucleotide Archive (ENA; http://www.ebi.ac.uk/ena) offers a rich platform for data sharing, publishing and archiving and a globally comprehensive data set for onward use by the scientific community. With a broad scope spanning raw sequencing reads, genome assemblies and functional annotation, the resource provides extensive data submission, search and download facilities across web and programmatic interfaces. Here, we outline ENA content and major access modalities, highlight major developments in 2016 and outline a number of examples of data reuse from ENA.",2016-11-29 +33594931,"Knowledge and Perceptions of Tobacco-Related Harm Associated with Intention to Quit among Cigarette Smokers, e-Cigarette Users, and Dual Users: Findings from the US Population Assessment of Tobacco and Health (PATH) Wave 1.","

Objective

To study the association between knowledge of diseases caused by smoking, perceptions of harm of cigarettes and intention to quit among cigarettes and e-cigarettes users. Methods: Using US Population Assessment of Tobacco and Health (PATH) Wave 1 data (2013-2014), we investigated the mean knowledge of diseases due to smoking and perceptions of harm of cigarettes scores among cigarette smokers (n = 8,263), e-cigarette users (n = 829), and dual users (n = 745) and examined the association between knowledge, perceptions of harm and intention to quit. Results: E-cigarette users had the highest scores in both knowledge and perceptions of harm items. We found a stronger association between knowledge and intention to quit among females (aOR: 1.25; 95% CI: 1.18, 1.34) compared to males (aOR: 1.11; 95% CI: 1.05, 1.18). We observed a strong association between perceptions of harm and intention to quit among cigarette smokers (p < 0.0001) and dual users (p = 0.0001), but not e-cigarette users. Conclusions: Our study indicates it is urgent for federal and state governments to develop comprehensive guidelines for targeted health messaging regarding the harms of cigarettes, noncombustible tobacco products, and dual use, and the benefits of cessation. Further, findings suggest that effective health education should include tobacco product-specific risks and the comprehensive negative health impacts of tobacco given the strong positive association of perceptions of harm and intention to quit.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1879145.",2021-02-17 +34139900,Assessment of biogrowth assemblages with depth in a seawater intake system of a coastal power station.,"Marine biogrowth infestation of a seawater intake system was investigated. A digital camera fixed onto a skid was used to record the biogrowth at intervals of 5m up to a depth of 55m. Divers inspected the intake shaft and collected the biogrowth samples for biomass estimation. A biomass density of 7.5kg m-2 and 28.2kg m-2 was recorded at 5 and 30m depths respectively. Inspection by the divers revealed that hard-shelled organisms such as oysters and brown and green mussels were observed in plenty up to a thickness of 15cm and bryozoans grew as epibionts. At lower depths (<40m), hydroids grew on the shells of green mussels along with silt accumulation. The biofouling community was composed of 46 organisms, exhibiting variation in distribution and abundance. The study explains the extent and type of marine biogrowth phenomena with depth and describes biofouling preventive methods.Supplemental data for this article is available online at https://doi.org/10.1080/08927014.2021.1933457 .",2021-05-01 +32840559,SOLQC: Synthetic Oligo Library Quality Control tool.,"

Motivation

Recent years have seen a growing number and an expanding scope of studies using synthetic oligo libraries for a range of applications in synthetic biology. As experiments are growing by numbers and complexity, analysis tools can facilitate quality control and support better assessment and inference.

Results

We present a novel analysis tool, called SOLQC, which enables fast and comprehensive analysis of synthetic oligo libraries, based on NGS analysis performed by the user. SOLQC provides statistical information such as the distribution of variant representation, different error rates and their dependence on sequence or library properties. SOLQC produces graphical reports from the analysis, in a flexible format. We demonstrate SOLQC by analyzing literature libraries. We also discuss the potential benefits and relevance of the different components of the analysis.

Availability and implementation

SOLQC is a free software for non-commercial use, available at https://app.gitbook.com/@yoav-orlev/s/solqc/. For commercial use please contact the authors.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +33109075,HiCHap: a package to correct and analyze the diploid Hi-C data.,"

Background

In diploid cells, it is important to construct maternal and paternal Hi-C contact maps respectively since the two homologous chromosomes can differ in chromatin three-dimensional (3D) organization. Though previous softwares could construct diploid (maternal and paternal) Hi-C contact maps by using phased genetic variants, they all neglected the systematic biases in diploid Hi-C contact maps caused by variable genetic variant density in the genome. In addition, few of softwares provided quantitative analyses on allele-specific chromatin 3D organization, including compartment, topological domain and chromatin loop.

Results

In this work, we revealed the feature of allele-assignment bias caused by the variable genetic variant density, and then proposed a novel strategy to correct the systematic biases in diploid Hi-C contact maps. Based on the bias correction, we developed an integrated tool, called HiCHap, to perform read mapping, contact map construction, whole-genome identification of compartments, topological domains and chromatin loops, and allele-specific testing for diploid Hi-C data. Our results show that the correction on allele-assignment bias in HiCHap does significantly improve the quality of diploid Hi-C contact maps, which subsequently facilitates the whole-genome identification of diploid chromatin 3D organization, including compartments, topological domains and chromatin loops. Finally, HiCHap also supports the data analysis for haploid Hi-C maps without distinguishing two homologous chromosomes.

Conclusions

We provided an integrated package HiCHap to perform the data processing, bias correction and structural analysis for diploid Hi-C data. The source code and tutorial of software HiCHap are freely available at https://pypi.org/project/HiCHap/ .",2020-10-27 +34824725,Does Balancing a Total Hip Arthroplasty Require a New Paradigm? Functional 3-Dimensional Balancing in Total Hip Arthroplasty.,"

Background

Traditional principles for successful outcomes in Total Hip Arthroplasty (THA) have relied largely on placing the socket in the native position and trying to restore static anatomical femoral parameters gauged on X-rays or intra-operative measurement. Stability is conventionally achieved by making appropriate changes during the time of trial reduction. Post-operative complications of dislocation and significant Limb Length Discrepancy (LLD) requiring foot wear modification represents opposite ends of the spectrum from a biomechanical perspective and these continue to be relatively high. A move towards giving more importance to functional dynamic parameters rather than static anatomical parameters and less reliance on stability testing at trial reduction is warranted.

Methods

Intraoperative 3D functional balancing of THA without stability testing at trial reduction was practiced in all subjects undergoing THA in our unit from April 2014. To date 1019 patients have had their hips replaced with the same technique. They were followed up till April 2020 for post-operative complications of dislocation and significant LLD needing footwear modification. A secondary cohort of 114 patients from 1st January to December 31st 2017 within this primary group were analyzed clinically and radiologically to ascertain the implications of functional 3D balancing on X-ray parameters, clinical outcome scores (Harris Hip Score and Oxford Hip Score), ability to squat, and subtle subjective post-operative perception of limb lengthening (POPLL).

Results

In the primary group of 1019 patients, there were only two dislocations and no patient needed footwear modification for LLD. In the detailed analysis of the secondary cohort of 114 patients, the correlation with restoration of static radiological parameters was inconsistent. 40 patients could not squat and 4 patients had subtle subjective post-operative perceived limb lengthening (POPLL). Measured outcomes such as HHS and OHS were improved in all patients with significant statistical significance (P < 0.001).

Conclusion

This study underlines the fact that more importance must be given to functional dynamic parameters by 3D balancing of the THA and not on static anatomical X-rays parameters and stability testing during trial reduction. This represents a paradigm shift in the evolution of total hip arthroplasty.

Level of evidence

A Level II study. (Data collected from the ongoing prospective study) (http://www.spine.org/Documents/LevelsofEvidenceFinal.pdf).

Supplementary information

The online version contains supplementary material available at 10.1007/s43465-021-00505-3.",2021-09-05 +33135068,Protein residues determining interaction specificity in paralogous families.,"

Motivation

Predicting the residues controlling a protein's interaction specificity is important not only to better understand its interactions but also to design mutations aimed at fine-tuning or swapping them as well.

Results

In this work, we present a methodology that combines sequence information (in the form of multiple sequence alignments) with interactome information to detect that kind of residues in paralogous families of proteins. The interactome is used to define pairwise similarities of interaction contexts for the proteins in the alignment. The method looks for alignment positions with patterns of amino-acid changes reflecting the similarities/differences in the interaction neighborhoods of the corresponding proteins. We tested this new methodology in a large set of human paralogous families with structurally characterized interactions, and discuss in detail the results for the RasH family. We show that this approach is a better predictor of interfacial residues than both, sequence conservation and an equivalent 'unsupervised' method that does not use interactome information.

Availability and implementation

http://csbg.cnb.csic.es/pazos/Xdet/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +34085565,Patterns of Nutrient Intake in Relation to Gastric Cancer: A Case Control Study.,"Gastric Cancer (GC) is the most common cancer among Iranian men. We conducted a case-control study to investigate the association between patterns of nutrient intake and the risk of GC in Iran. We enrolled 178 GC patients and 271 controls matched for age and sex. We collected dietary intakes using a validated diet history questionnaire. We performed factor analysis on 28 nutrients using multivariate logistic regression models on tertiles of factor scores and estimated odds ratios (OR) and 95% confidence intervals (95% CI). We identified three nutrient patterns. The first pattern included pantothenic acid, riboflavin, zinc, animal protein, and calcium. Selenium, thiamin, carbohydrate, vegetable protein, niacin and low intake of vitamin E loaded the second pattern, and the third pattern was abundant in fiber, carotene, vitamin C and A. We found no significant association between GC and any of the dietary patterns. However, in the first patterns, men in the highest tertile had significantly higher odds of GC than the lowest (OR = 2.15, 95% CI: 1.13-4.09, p trend = 0.02). A dietary pattern loaded by animal products may increase the risk of GC among Iranian men. Larger studies are required to approve these findings in overall and in different subgroups.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1931697.",2021-06-04 +29234333,Putative Microsatellite DNA Marker-Based Wheat Genomic Resource for Varietal Improvement and Management.,"Wheat fulfills 20% of global caloric requirement. World needs 60% more wheat for 9 billion population by 2050 but climate change with increasing temperature is projected to affect wheat productivity adversely. Trait improvement and management of wheat germplasm requires genomic resource. Simple Sequence Repeats (SSRs) being highly polymorphic and ubiquitously distributed in the genome, can be a marker of choice but there is no structured marker database with options to generate primer pairs for genotyping on desired chromosome/physical location. Previously associated markers with different wheat trait are also not available in any database. Limitations of in vitro SSR discovery can be overcome by genome-wide in silico mining of SSR. Triticum aestivum SSR database (TaSSRDb) is an integrated online database with three-tier architecture, developed using PHP and MySQL and accessible at http://webtom.cabgrid.res.in/wheatssr/. For genotyping, Primer3 standalone code computes primers on user request. Chromosome-wise SSR calling for all the three sub genomes along with choice of motif types is provided in addition to the primer generation for desired marker. We report here a database of highest number of SSRs (476,169) from complex, hexaploid wheat genome (~17 GB) along with previously reported 268 SSR markers associated with 11 traits. Highest (116.93 SSRs/Mb) and lowest (74.57 SSRs/Mb) SSR densities were found on 2D and 3A chromosome, respectively. To obtain homozygous locus, e-PCR was done. Such 30 loci were randomly selected for PCR validation in panel of 18 wheat Advance Varietal Trial (AVT) lines. TaSSRDb can be a valuable genomic resource tool for linkage mapping, gene/QTL (Quantitative trait locus) discovery, diversity analysis, traceability and variety identification. Varietal specific profiling and differentiation can supplement DUS (Distinctiveness, Uniformity, and Stability) testing, EDV (Essentially Derived Variety)/IV (Initial Variety) disputes, seed purity and hybrid wheat testing. All these are required in germplasm management as well as also in the endeavor of wheat productivity.",2017-11-28 +,DomeTree: a canonical toolkit for mitochondrial DNA analyses in domesticated animals,"Mitochondrial DNA (mtDNA) is widely used in various genetic studies of domesticated animals. Many applications require comprehensive knowledge about the phylogeny of mtDNA variants. Herein, we provide the most up‐to‐date mtDNA phylogeny (i.e. haplogroup tree or matrilineal genealogy) and a standardized hierarchical haplogroup nomenclature system for domesticated cattle, dogs, goats, horses, pigs, sheep, yaks and chickens. These high‐resolution mtDNA haplogroup trees based on 1240 complete or near‐complete mtDNA genome sequences are available in open resource DomeTree (http://www.dometree.org). In addition, we offer the software MitoToolPy (http://www.mitotool.org/mp.html) to facilitate the mtDNA data analyses. We will continuously and regularly update DomeTree and MitoToolPy.",2015-09-01 +31629694,PerMemDB: A database for eukaryotic peripheral membrane proteins.,"The majority of all proteins in cells interact with membranes either permanently or temporarily. Peripheral membrane proteins form transient complexes with membrane proteins and/or lipids, via non-covalent interactions and are of outmost importance, due to numerous cellular functions in which they participate. In an effort to collect data regarding this heterogeneous group of proteins we designed and constructed a database, called PerMemDB. PerMemDB is currently the most complete and comprehensive repository of data for eukaryotic peripheral membrane proteins deposited in UniProt or predicted with the use of MBPpred - a computational method that specializes in the detection of proteins that interact non-covalently with membrane lipids, via membrane binding domains. The first version of the database contains 231,770 peripheral membrane proteins from 1009 organisms. All entries have cross-references to other databases, literature references and annotation regarding their interactions with other proteins. Moreover, additional sequence annotation of the characteristic domains that allow these proteins to interact with membranes is available, due to the application of MBPpred. Through the web interface of PerMemDB, users can browse the contents of the database, submit advanced text searches and BLAST queries against the protein sequences deposited in PerMemDB. We expect this repository to serve as a source of information that will allow the scientific community to gain a deeper understanding of the evolution and function of peripheral membrane proteins via the enhancement of proteome-wide analyses. The database is available at: http://bioinformatics.biol.uoa.gr/db=permemdb.",2019-10-17 +33326076,Detection and Characterization of Ribosome-Associated Long Noncoding RNAs.,"Ribosome profiling shows potential for studying the function of long noncoding RNAs (lncRNAs). We introduce a bioinformatics pipeline for detecting ribosome-associated lncRNAs (ribo-lncRNAs) from ribosome profiling data. Further, we describe a machine-learning approach for the characterization of ribo-lncRNAs based on their sequence features. Scripts for ribo-lncRNA analysis can be accessed at ( https://ribolnc.hamadalab.com/ ).",2021-01-01 +32089838,Identifying orthologs with OMA: A primer.,"The Orthologous Matrix (OMA) is a method and database that allows users to identify orthologs among many genomes. OMA provides three different types of orthologs: pairwise orthologs, OMA Groups and Hierarchical Orthologous Groups (HOGs). This Primer is organized in two parts. In the first part, we provide all the necessary background information to understand the concepts of orthology, how we infer them and the different subtypes of orthology in OMA, as well as what types of analyses they should be used for. In the second part, we describe protocols for using the OMA browser to find a specific gene and its various types of orthologs. By the end of the Primer, readers should be able to (i) understand homology and the different types of orthologs reported in OMA, (ii) understand the best type of orthologs to use for a particular analysis; (iii) find particular genes of interest in the OMA browser; and (iv) identify orthologs for a given gene.  The data can be freely accessed from the OMA browser at https://omabrowser.org.",2020-01-17 +34274525,Predicting critical illness on initial diagnosis of COVID-19 based on easily obtained clinical variables: development and validation of the PRIORITY model.,"

Objectives

We aimed to develop and validate a prediction model, based on clinical history and examination findings on initial diagnosis of coronavirus disease 2019 (COVID-19), to identify patients at risk of critical outcomes.

Methods

We used data from the SEMI-COVID-19 Registry, a cohort of consecutive patients hospitalized for COVID-19 from 132 centres in Spain (23rd March to 21st May 2020). For the development cohort, tertiary referral hospitals were selected, while the validation cohort included smaller hospitals. The primary outcome was a composite of in-hospital death, mechanical ventilation, or admission to intensive care unit. Clinical signs and symptoms, demographics, and medical history ascertained at presentation were screened using least absolute shrinkage and selection operator, and logistic regression was used to construct the predictive model.

Results

There were 10 433 patients, 7850 in the development cohort (primary outcome 25.1%, 1967/7850) and 2583 in the validation cohort (outcome 27.0%, 698/2583). The PRIORITY model included: age, dependency, cardiovascular disease, chronic kidney disease, dyspnoea, tachypnoea, confusion, systolic blood pressure, and SpO2 ≤93% or oxygen requirement. The model showed high discrimination for critical illness in both the development (C-statistic 0.823; 95% confidence interval (CI) 0.813, 0.834) and validation (C-statistic 0.794; 95%CI 0.775, 0.813) cohorts. A freely available web-based calculator was developed based on this model (https://www.evidencio.com/models/show/2344).

Conclusions

The PRIORITY model, based on easily obtained clinical information, had good discrimination and generalizability for identifying COVID-19 patients at risk of critical outcomes.",2021-07-15 +33505795,Bacterial meta-analysis of chicken cecal microbiota.,"Poultry production is an industry that generates 90,000 metric tons of chicken meat worldwide. Thus, optimizing chicken growth and sustainable production is of great importance. A central factor determining not only production parameters, but also stability of the immune system and chicken health, is the diversity and variability of the microbiota present throughout the gastrointestinal tract. To date, several studies have investigated the relationship between bacterial communities and the gut microbiome, with limited data to compare. This study aims to create a bacterial meta-analysis based on studies using amplicon sequencing with Illumina sequencing technologies in order to build a baseline for comparison in future analyses of the cecal bacterial composition in chicken. A systematic literature review was performed (SYRF ID: e84f0468-e418-4eec-9da4-b517f1b4809d. Full project URL: https://app.syrf.org.uk/projects/e84f0468-e418-4eec-9da4-b517f1b4809d/detail). From all the available and analyzed manuscripts only nine contained full raw-sequence data available and the corresponding metadata. A total of 324 samples, comprising three different regions within the 16S rRNA gene, were analyzed. Due to the heterogeneity of the data, each region was analyzed independently and an effort for a joint analysis was performed as well. Taxonomic profiling revealed 11 phyla, with Firmicutes as the most prevalent phylum, followed by Bacteroidetes and Proteobacteria. At genus level, 109 genera were found. Shannon metric for alpha diversity showed that factors like type of chickens (Commercial or experimental) and 16S rRNA gene subregion have negligible effect on diversity. Despite the large number of parameters that were taken into account, the identification of common bacteria showed five genera to be common for all sets in at least 50% of the samples. These genera are highly associated to cellulose degradation and short chain fatty acids synthesis. In general, it was possible to identify some commonalities in the bacterial cecal microbial community despite the extensive variability and factors differing from one study to another.",2021-01-05 +30774152,Using the Dietary Supplement Label Database to Identify Potentially Harmful Dietary Supplement Ingredients.,"Over half of young adults, athletes, and Military Service Members self-report using at least one dietary supplement (DS) 1 or more times per week. DS may be consumed to improve health, provide more energy, increase muscle strength, and/or enhance performance. The United States Food and Drug Administration (FDA) has raised concerns regarding adulteration, safety, and adverse events associated with DS marketed for brain health and bodybuilding. Some DS products may compromise health as well as lead to a serious adverse event. The National Institutes of Health (NIH) Dietary Supplement Label Database (DSLD), available at https://dsld.nlm.nih.gov/, can be freely accessed and used by researchers, providers, and consumers alike to screen for potentially harmful DS. It was developed to serve the research community and as a resource for health care providers and the public. Herein we provide two examples of how the database can be used to identify DS ingredients of concern in products marketed for brain health and bodybuilding. The search for DS marketed for brain health returned 49 unique DS, and the search on DS marketed for bodybuilding returned 18 unique DS. Search results were cross-referenced with the Operation Supplement Safety High-Risk Supplement List, the FDA Tainted Products Marketed as Dietary Supplements list, the Natural Medicines database, and NIH Office of Dietary Supplements Fact Sheets. Three ingredients found in DS marketed for brain health and two ingredients in DS marketed for bodybuilding were identified as ""of concern"". Educational tools, including the DSLD, can help consumers and providers make informed decisions regarding DS.",2018-09-01 +33517358,Repurposing approved drugs for cancer therapy.,"

Background

Many drugs approved for other indications can control the growth of tumor cells and limit adverse events (AE).

Data sources

Literature searches with keywords 'repurposing and cancer' books, websites: https://clinicaltrials.gov/, for drug structures: https://pubchem.ncbi.nlm.nih.gov/.

Areas of agreement

Introducing approved drugs, such as those developed to treat diabetes (Metformin) or inflammation (Thalidomide), identified to have cytostatic activity, can enhance chemotherapy or even replace more cytotoxic drugs. Also, anti-inflammatory compounds, cytokines and inhibitors of proteolysis can be used to control the side effects of chemo- and immuno-therapies or as second-line treatments for tumors resistant to kinase inhibitors (KI). Drugs specifically developed for cancer therapy, such as interferons (IFN), the tyrosine KI abivertinib TKI (tyrosine kinase inhibitor) and interleukin-6 (IL-6) receptor inhibitors, may help control symptoms of Covid-19.

Areas of controversy

Better knowledge of mechanisms of drug activities is essential for repurposing. Chemotherapies induce ER stress and enhance mutation rates and chromosome alterations, leading to resistance that cannot always be related to mutations in the target gene. Metformin, thalidomide and cytokines (IFN, tumor necrosis factor (TNF), interleukin-2 (IL-2) and others) have pleiomorphic activities, some of which can enhance tumorigenesis. The small and fragile patient pools available for clinical trials can cloud the data on the usefulness of cotreatments.

Growing points

Better understanding of drug metabolism and mechanisms should aid in repurposing drugs for primary, adjuvant and adjunct treatments.

Areas timely for developing research

Optimizing drug combinations, reducing cytotoxicity of chemotherapeutics and controlling associated inflammation.",2021-03-01 +33325500,DeMaSk: a deep mutational scanning substitution matrix and its use for variant impact prediction. ,"Accurately predicting the quantitative impact of a substitution on a protein's molecular function would be a great aid in understanding the effects of observed genetic variants across populations. While this remains a challenging task, new approaches can leverage data from the increasing numbers of comprehensive deep mutational scanning (DMS) studies that systematically mutate proteins and measure fitness. We introduce DeMaSk, an intuitive and interpretable method based only upon DMS datasets and sequence homologs that predicts the impact of missense mutations within any protein. DeMaSk first infers a directional amino acid substitution matrix from DMS datasets and then fits a linear model that combines these substitution scores with measures of per-position evolutionary conservation and variant frequency across homologs. Despite its simplicity, DeMaSk has state-of-the-art performance in predicting the impact of amino acid substitutions, and can easily and rapidly be applied to any protein sequence. https://demask.princeton.edu generates fitness impact predictions and visualizations for any user-submitted protein sequence. Supplementary data are available at Bioinformatics online.",2020-12-16 +34196977,"A high-need, high-impact educational session on firearms and death by suicide.","

Background

The suicide rate among Veterans is 1.5 times greater than that for civilians, partly a result of the high use of firearms as the means for suicide. One effective strategy to reduce Veteran suicide by firearms is to provide counseling on firearm safety as a method of means reduction. However, many clinicians do not discuss firearm safety with Veterans.

Aims

This study evaluates a one-hour educational seminar for clinicians on lethal means safety.

Materials and methods

One hundred and ninety clinicians from the Veterans Health Administration, including social workers, psychologists, psychiatrists, and nurses, participated in the training. The seminar was streamed during the South Central Mental Health Illness Research, Education and Clinical Center's Community-Based Outpatient Clinic Mental Health Grand Rounds, for which participants were eligible for continuing education units. Data were collected immediately after the training and four months later.

Results

After completing the training, participants believed that it was important to speak with Veterans about firearm safety and felt knowledgeable and empowered to do so. Four months after the seminar, participants had counseled, on average, over half of Veterans treated about lethal means safety.

Discussion

Participants reported the value of normalizing discussions about firearm safety with their Veterans and focusing on the safety aspect while discussing firearms.

Conclusions

This webinar was effective in providing clinicians the skills to talk with Veterans about firearm safety and continued to impact their practice four months after training. The training is available for free on our website at https://www.mirecc.va.gov/visn16/public-health-approach-to-firearms-and-death-by-suicide.asp.",2021-07-01 +32882005,A standardized framework for testing the performance of sleep-tracking technology: step-by-step guidelines and open-source code. ,"Sleep-tracking devices, particularly within the consumer sleep technology (CST) space, are increasingly used in both research and clinical settings, providing new opportunities for large-scale data collection in highly ecological conditions. Due to the fast pace of the CST industry combined with the lack of a standardized framework to evaluate the performance of sleep trackers, their accuracy and reliability in measuring sleep remains largely unknown. Here, we provide a step-by-step analytical framework for evaluating the performance of sleep trackers (including standard actigraphy), as compared with gold-standard polysomnography (PSG) or other reference methods. The analytical guidelines are based on recent recommendations for evaluating and using CST from our group and others (de Zambotti and colleagues; Depner and colleagues), and include raw data organization as well as critical analytical procedures, including discrepancy analysis, Bland-Altman plots, and epoch-by-epoch analysis. Analytical steps are accompanied by open-source R functions (depicted at https://sri-human-sleep.github.io/sleep-trackers-performance/AnalyticalPipeline_v1.0.0.html). In addition, an empirical sample dataset is used to describe and discuss the main outcomes of the proposed pipeline. The guidelines and the accompanying functions are aimed at standardizing the testing of CSTs performance, to not only increase the replicability of validation studies, but also to provide ready-to-use tools to researchers and clinicians. All in all, this work can help to increase the efficiency, interpretation, and quality of validation studies, and to improve the informed adoption of CST in research and clinical settings.",2021-02-01 +34478338,A Comprehensive Assessment of Associations between Prenatal Phthalate Exposure and the Placental Transcriptomic Landscape.,"

Background

Phthalates are commonly used endocrine-disrupting chemicals that are ubiquitous in the general population. Prenatal phthalate exposure may alter placental physiology and fetal development, leading to adverse perinatal and childhood health outcomes.

Objective

We examined associations between prenatal phthalate exposure in the second and third trimesters and the placental transcriptome at birth, including genes and long noncoding RNAs (lncRNAs), to gain insight into potential mechanisms of action during fetal development.

Methods

The ECHO PATHWAYs consortium quantified 21 urinary phthalate metabolites from 760 women enrolled in the CANDLE study (Shelby County, TN) using high-performance liquid chromatography-tandem mass spectrometry. Placental transcriptomic data were obtained using paired-end RNA sequencing. Linear models were fitted to estimate separate associations between maternal urinary phthalate metabolite concentration during the second and third trimester and placental gene expression at birth, adjusted for confounding variables. Genes were considered differentially expressed at a Benjamini-Hochberg false discovery rate (FDR) p<0.05. Associations between phthalate metabolites and biological pathways were identified using self-contained gene set testing and considered significantly altered with an FDR-adjusted p<0.2.

Results

We observed significant associations between second-trimester phthalate metabolites mono (carboxyisooctyl) phthalate (MCIOP), mono-2-ethyl-5-carboxypentyl phthalate, and mono-2-ethyl-5-oxohexyl phthalate and 18 genes in total, including four lncRNAs. Specifically, placental expression of NEAT1 was associated with multiple phthalate metabolites. Third-trimester MCIOP and mono-isobutyl phthalate concentrations were significantly associated with placental expression of 18 genes and two genes, respectively. Expression of genes within 27 biological pathways was associated with mono-methyl phthalate, MCIOP, and monoethyl phthalate concentrations.

Discussion

To our knowledge, this is the first genome-wide assessment of the relationship between the placental transcriptome at birth and prenatal phthalate exposure in a large and diverse birth cohort. We identified numerous genes and lncRNAs associated with prenatal phthalate exposure. These associations mirror findings from other epidemiological and in vitro analyses and may provide insight into biological pathways affected in utero by phthalate exposure. https://doi.org/10.1289/EHP8973.",2021-09-03 +32190163,The Prospective Lynch Syndrome Database reports enable evidence-based personal precision health care.,"The aims of the Prospective Lynch Syndrome Database (PLSD) are to provide empirical prospectively observed data on the incidences of cancer in different organs, survival following cancer and the effects of interventions in carriers of pathogenic variants of the mismatch repair genes (path_MMR) categorized by age, gene and gender. Although PLSD is assumption-free, as with any study the ascertainment procedures used to identify the study cohort will introduce selection biases which have to be declared and considered in detail in order to provide robust and valid results. This paper provides a commentary on the methods used and considers how results from the PLSD reports should be interpreted. A number of the results from PLSD were novel and some in conflict with previous assumptions. Notably, colonoscopic surveillance did not prevent colo-rectal cancer, survival after colo-rectal, endometrial and ovarian cancer was good, no survival gain was observed with more frequent colonoscopy, new causes of cancer-related death were observed in survivors of first cancers due to later cancers in other organs, variants in the different MMR genes caused distinct multi-cancer syndromes characterized by different penetrance and phenotypes. The www.PLSD.eu website together with the InSiGHT database website (https://www.insight-group.org/variants/databases/) now facilitate evidence-based personalized precision health care for individual carriers at increased risk of cancer. The arguments are summarized in a final discussion on how to conceptualize current knowledge for the different practical purposes of treating cancers, genetic counselling and prevention, and for understanding /research on carcinogenetic mechanisms.",2020-03-14 +33596448,Succinyl-CoA:3-oxoacid coenzyme A transferase (SCOT) deficiency: A rare and potentially fatal metabolic disease.,"Succinyl-CoA:3-oxoacid coenzyme A transferase deficiency (SCOTD) is a rare autosomal recessive disorder of ketone body utilization caused by mutations in OXCT1. We performed a systematic literature search and evaluated clinical, biochemical and genetic data on 34 previously published and 10 novel patients with SCOTD. Structural mapping and in silico analysis of protein variants is also presented. All patients presented with severe ketoacidotic episodes. Age at first symptoms ranged from 36 h to 3 years (median 7 months). About 70% of patients manifested in the first year of life, approximately one quarter already within the neonatal period. Two patients died, while the remainder (95%) were alive at the time of the report. Almost all the surviving patients (92%) showed normal psychomotor development and no neurologic abnormalities. A total of 29 missense mutations are reported. Analysis of the published crystal structure of the human SCOT enzyme, paired with both sequence-based and structure-based methods to predict variant pathogenicity, provides insight into the biochemical consequences of the reported variants. Pathogenic variants cluster in SCOT protein regions that affect certain structures of the protein. The described pathogenic variants can be viewed in an interactive map of the SCOT protein at https://michelanglo.sgc.ox.ac.uk/r/oxct. This comprehensive data analysis provides a systematic overview of all cases of SCOTD published to date. Although SCOTD is a rather benign disorder with often favourable outcome, metabolic crises can be life-threatening or even fatal. As the diagnosis can only be made by enzyme studies or mutation analyses, SCOTD may be underdiagnosed.",2021-02-14 +33929145,[Russian version of PEACH scale (validation and normative data)].,"PEACH is an important tool for evaluation of children's hearing development, used in age 2-7 years. It is also appropriate for amplification outcomes measurements. PEACH scale includes 13 questions. Parents fill the questionnaire after week observation of child's hearing behavior in different situations. The goal of the study was validation of Russian version of PEACH scale. Translation and cross-cultural adaptation were performed following international guidelines. 50 children with normal hearing and 50 hearing impaired children were involved in the validation process. All of the hearing-impaired children used hearing aids or cochlear implants. PEACH scores of the children with normal hearing have strong correlation with data of original version (ρ=0.998; p<0.05) and can be used as a normative data for Russian version. PEACH scores of the hearing-impaired children were worse in higher degrees of hearing loss, which shows sensitivity of the method. Test-retest reliability in children with normal hearing was ρ=1.0 (p<0.05), in hearing impaired children ρ=0.976 (p<0.05). Russian PEACH scale is free available at the official site of Center of Pediatric Audiology: https://dgsc.kzdrav.gov.spb.ru.",2021-01-01 +33016997,ADACT: a tool for analysing (dis)similarity among nucleotide and protein sequences using minimal and relative absent words.,"

Motivation

Researchers and practitioners use a number of popular sequence comparison tools that use many alignment-based techniques. Due to high time and space complexity and length-related restrictions, researchers often seek alignment-free tools. Recently, some interesting ideas, namely, Minimal Absent Words (MAW) and Relative Absent Words (RAW), have received much interest among the scientific community as distance measures that can give us alignment-free alternatives. This drives us to structure a framework for analysing biological sequences in an alignment-free manner.

Results

In this application note, we present Alignment-free Dissimilarity Analysis & Comparison Tool (ADACT), a simple web-based tool that computes the analogy among sequences using a varied number of indexes through the distance matrix, species relation list and phylogenetic tree. This tool basically combines absent word (MAW or RAW) computation, dissimilarity measures, species relationship and thus brings all required software in one platform for the ease of researchers and practitioners alike in the field of bioinformatics. We have also developed a restful API.

Availability and implementation

ADACT has been hosted at http://research.buet.ac.bd/ADACT/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-06-01 +34193950,Identification of subtypes of anticancer peptides based on sequential features and physicochemical properties.,"Anticancer peptides (ACPs) are a kind of bioactive peptides which could be used as a novel type of anticancer drug that has several advantages over chemistry-based drug, including high specificity, strong tumor penetration capacity, and low toxicity to normal cells. As the number of experimentally verified bioactive peptides has increased significantly, various of in silico approaches are imperative for investigating the characteristics of ACPs. However, the lack of methods for investigating the differences in physicochemical properties of ACPs. In this study, we compared the N- and C-terminal amino acid composition for each peptide, there are three major subtypes of ACPs that are defined based on the distribution of positively charged residues. For the first time, we were motivated to develop a two-step machine learning model for identification of the subtypes of ACPs, which classify the input data into the corresponding group before applying the classifier. Further, to improve the predictive power, the hybrid feature sets were considered for prediction. Evaluation by five-fold cross-validation showed that the two-step model trained with sequence-based features and physicochemical properties was most effective in discriminating between ACPs and non-ACPs. The two-step model trained with the hybrid features performed well, with a sensitivity of 86.75%, a specificity of 85.75%, an accuracy of 86.08%, and a Matthews Correlation Coefficient value of 0.703. Furthermore, the model also consistently provides the effective performance in independent testing set, with sensitivity of 77.6%, specificity of 94.74%, accuracy of 88.99% and the MCC value reached 0.75. Finally, the two-step model has been implemented as a web-based tool, namely iDACP, which is now freely available at http://mer.hc.mmh.org.tw/iDACP/ .",2021-06-30 +33579190,A graph-based algorithm for detecting rigid domains in protein structures.,"

Background

Conformational transitions are implicated in the biological function of many proteins. Structural changes in proteins can be described approximately as the relative movement of rigid domains against each other. Despite previous efforts, there is a need to develop new domain segmentation algorithms that are capable of analysing the entire structure database efficiently and do not require the choice of protein-dependent tuning parameters such as the number of rigid domains.

Results

We develop a graph-based method for detecting rigid domains in proteins. Structural information from multiple conformational states is represented by a graph whose nodes correspond to amino acids. Graph clustering algorithms allow us to reduce the graph and run the Viterbi algorithm on the associated line graph to obtain a segmentation of the input structures into rigid domains. In contrast to many alternative methods, our approach does not require knowledge about the number of rigid domains. Moreover, we identified default values for the algorithmic parameters that are suitable for a large number of conformational ensembles. We test our algorithm on examples from the DynDom database and illustrate our method on various challenging systems whose structural transitions have been studied extensively.

Conclusions

The results strongly suggest that our graph-based algorithm forms a novel framework to characterize structural transitions in proteins via detecting their rigid domains. The web server is available at http://azifi.tz.agrar.uni-goettingen.de/webservice/ .",2021-02-12 +27899579,SNP2TFBS - a database of regulatory SNPs affecting predicted transcription factor binding site affinity.,"SNP2TFBS is a computational resource intended to support researchers investigating the molecular mechanisms underlying regulatory variation in the human genome. The database essentially consists of a collection of text files providing specific annotations for human single nucleotide polymorphisms (SNPs), namely whether they are predicted to abolish, create or change the affinity of one or several transcription factor (TF) binding sites. A SNP's effect on TF binding is estimated based on a position weight matrix (PWM) model for the binding specificity of the corresponding factor. These data files are regenerated at regular intervals by an automatic procedure that takes as input a reference genome, a comprehensive SNP catalogue and a collection of PWMs. SNP2TFBS is also accessible over a web interface, enabling users to view the information provided for an individual SNP, to extract SNPs based on various search criteria, to annotate uploaded sets of SNPs or to display statistics about the frequencies of binding sites affected by selected SNPs. Homepage: http://ccg.vital-it.ch/snp2tfbs/.",2016-11-28 +31256756,12 Components of a Strong Vision Health System of Care: Part 2-Vision Screening Tools and Procedures and Vision Health for Children With Special Health Care Needs.,Successful vision screening efforts require the implementation of 12 key components of a strong vision health system of care. The National Center for Children's Vision and Eye Health (NCCVEH) at Prevent Blindness partnered with the National Association of School Nurses (NASN) to provide guidance around these 12 components via a Vision and Eye Health webpage on the NASN website ( https://www.nasn.org/nasn-resources/practice-topics/vision-health ). This online resource is organized according to the 12 Components of a Strong Vison Health System of Care to support school nurses accountable for screening the vision of preschool and K-12 students. This NCCVEH/NASN webpage addresses key activities that support a child's vision health-beginning with parent/caregiver education and ending with an annual evaluation of the school's vision health system. Each of these 12 components will be described in NASN School Nurse. The May 2019 installment provided information about the 12 components approach as a whole and details on Family Education and a Comprehensive Communication/Approval Process. This installment describes Components 3 and 4: Vision Screening Tools and Procedures and Vision Health for Children with Special Health Care Needs.,2019-07-01 +32031718,Metastable decomposition at the peptide C-terminus: Possible use in protein identification.,"

Rationale

The b n-1 ion of a peptide, as well as a [b n-1  + 18] ion, can be observed not only as normal product ions, but also as prominent metastable ions in a reflectron-embedded matrix-assisted laser desorption ionization time-of-flight spectrometer. The m/z values for the peaks are slightly shifted compared with the ordinary product ions and appear as relatively broad peaks, which permits them to be discriminated from other ions.

Methods

A standard protein mixture and gel-derived proteins digested with LysN protease, which cleaves peptide linkages in proteins at the N-terminal side of Lys residues, were examined. The collected data were used for protein identification using in-house software, iD-plus (http://coco.protein.osaka-u.ac.jp/id-plus/), which was developed for searching for proteins in the peptide database, based on enzyme specificity (N-terminal Lys in this study), peptide masses and C-terminal amino acids.

Results

The b n-1 as well as [b n-1  + 18] ions were observed as broad ion peaks for all of the peptides (86 peptides) examined in this study. In silico calculations using the database of LysN digested peptides (11 969 470), created from 553 941 protein sequences (SwissProt: 2017_03), indicate that the use of no less than four peptides permits a protein to be identified without the need of any probability-based scoring.

Conclusions

The preference for b n-1 ion formation is probably due to the higher propensity of the C-terminal peptide bond to be cleaved than other internal bonds. The fact that such C-terminal fragmentation takes place for most of the peptides examined suggests that the use of an N-terminal specific enzyme would allow the C-terminal amino acids to be more reliably read out than other internal sequences, information that could be efficiently used for protein identification.",2020-05-01 +33866624,An Arabidopsis lipid map reveals differences between tissues and dynamic changes throughout development.,"Mass spectrometry is the predominant analytical tool used in the field of plant lipidomics. However, there are many challenges associated with the mass spectrometric detection and identification of lipids because of the highly complex nature of plant lipids. Studies into lipid biosynthetic pathways, gene functions in lipid metabolism, lipid changes during plant growth and development, and the holistic examination of the role of plant lipids in environmental stress responses are often hindered. Here, we leveraged a robust pipeline that we previously established to extract and analyze lipid profiles of different tissues and developmental stages from the model plant Arabidopsis thaliana. We analyzed seven tissues at several different developmental stages and identified more than 200 lipids from each tissue analyzed. The data were used to create a web-accessible in silico lipid map that has been integrated into an electronic Fluorescent Pictograph (eFP) browser. This in silico library of Arabidopsis lipids allows the visualization and exploration of the distribution and changes of lipid levels across selected developmental stages. Furthermore, it provides information on the characteristic fragments of lipids and adducts observed in the mass spectrometer and their retention times, which can be used for lipid identification. The Arabidopsis tissue lipid map can be accessed at http://bar.utoronto.ca/efp_arabidopsis_lipid/cgi-bin/efpWeb.cgi.",2021-05-24 +31424526,HiLight-PTM: an online application to aid matching peptide pairs with isotopically labelled PTMs.,"

Motivation

Database searching of isotopically labelled PTMs can be problematic and we frequently find that only one, or neither in a heavy/light pair are assigned. In such cases, having a pair of MS/MS spectra that differ due to an isotopic label can assist in identifying the relevant m/z values that support the correct peptide annotation or can be used for de novo sequencing.

Results

We have developed an online application that identifies matching peaks and peaks differing by the appropriate mass shift (difference between heavy and light PTM) between two MS/MS spectra. Furthermore, the application predicts, from the exact-match peaks, the mass of their complementary ions and highlights these as high confidence matches between the two spectra. The result is a tool to visually compare two spectra, and downloadable peaks lists that can be used to support de novo sequencing.

Availability and implementation

HiLight-PTM is released using shinyapps.io by RStudio, and can be accessed from any internet browser at https://harrywhitwell.shinyapps.io/hilight-ptm/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +33451280,Accucopy: accurate and fast inference of allele-specific copy number alterations from low-coverage low-purity tumor sequencing data.,"

Background

Copy number alterations (CNAs), due to their large impact on the genome, have been an important contributing factor to oncogenesis and metastasis. Detecting genomic alterations from the shallow-sequencing data of a low-purity tumor sample remains a challenging task.

Results

We introduce Accucopy, a method to infer total copy numbers (TCNs) and allele-specific copy numbers (ASCNs) from challenging low-purity and low-coverage tumor samples. Accucopy adopts many robust statistical techniques such as kernel smoothing of coverage differentiation information to discern signals from noise and combines ideas from time-series analysis and the signal-processing field to derive a range of estimates for the period in a histogram of coverage differentiation information. Statistical learning models such as the tiered Gaussian mixture model, the expectation-maximization algorithm, and sparse Bayesian learning were customized and built into the model. Accucopy is implemented in C++ /Rust, packaged in a docker image, and supports non-human samples, more at http://www.yfish.org/software/ .

Conclusions

We describe Accucopy, a method that can predict both TCNs and ASCNs from low-coverage low-purity tumor sequencing data. Through comparative analyses in both simulated and real-sequencing samples, we demonstrate that Accucopy is more accurate than Sclust, ABSOLUTE, and Sequenza.",2021-01-15 +28426719,Transcriptome of interstitial cells of Cajal reveals unique and selective gene signatures.,"Transcriptome-scale data can reveal essential clues into understanding the underlying molecular mechanisms behind specific cellular functions and biological processes. Transcriptomics is a continually growing field of research utilized in biomarker discovery. The transcriptomic profile of interstitial cells of Cajal (ICC), which serve as slow-wave electrical pacemakers for gastrointestinal (GI) smooth muscle, has yet to be uncovered. Using copGFP-labeled ICC mice and flow cytometry, we isolated ICC populations from the murine small intestine and colon and obtained their transcriptomes. In analyzing the transcriptome, we identified a unique set of ICC-restricted markers including transcription factors, epigenetic enzymes/regulators, growth factors, receptors, protein kinases/phosphatases, and ion channels/transporters. This analysis provides new and unique insights into the cellular and biological functions of ICC in GI physiology. Additionally, we constructed an interactive ICC genome browser (http://med.unr.edu/physio/transcriptome) based on the UCSC genome database. To our knowledge, this is the first online resource that provides a comprehensive library of all known genetic transcripts expressed in primary ICC. Our genome browser offers a new perspective into the alternative expression of genes in ICC and provides a valuable reference for future functional studies.",2017-04-20 +31624847,Beware the Jaccard: the choice of similarity measure is important and non-trivial in genomic colocalisation analysis.,"The generation and systematic collection of genome-wide data is ever-increasing. This vast amount of data has enabled researchers to study relations between a variety of genomic and epigenomic features, including genetic variation, gene regulation and phenotypic traits. Such relations are typically investigated by comparatively assessing genomic co-occurrence. Technically, this corresponds to assessing the similarity of pairs of genome-wide binary vectors. A variety of similarity measures have been proposed for this problem in other fields like ecology. However, while several of these measures have been employed for assessing genomic co-occurrence, their appropriateness for the genomic setting has never been investigated. We show that the choice of similarity measure may strongly influence results and propose two alternative modelling assumptions that can be used to guide this choice. On both simulated and real genomic data, the Jaccard index is strongly altered by dataset size and should be used with caution. The Forbes coefficient (fold change) and tetrachoric correlation are less influenced by dataset size, but one should be aware of increased variance for small datasets. All results on simulated and real data can be inspected and reproduced at https://hyperbrowser.uio.no/sim-measure.",2020-09-01 +33793301,"Quantitative Microbial Risk Assessment for Airborne Transmission of SARS-CoV-2 via Breathing, Speaking, Singing, Coughing, and Sneezing.","

Background

Evidence for indoor airborne transmission of SARS-CoV-2 is accumulating.

Objectives

We assessed of the risk of illness due to airborne SARS-CoV-2 particles from breathing, speaking, singing, coughing, and sneezing in indoor environments.

Methods

A risk assessment model, AirCoV2, for exposure to SARS-CoV-2 particles in aerosol droplets was developed. Previously published data on droplets expelled by breathing, speaking, singing, coughing, and sneezing by an infected person were used as inputs. Scenarios encompassed virus concentration, exposure time, and ventilation. Newly collected data of virus RNA copies in mucus from patients are presented.

Results

The expelled volume of aerosols was highest for a sneeze, followed by a cough, singing, speaking, and breathing. After 20 min of exposure, at 107 RNA copies/mL in mucus, all mean illness risks were largely estimated to be below 0.001, except for the ""high"" sneeze scenario. At virus concentrations above 108 RNA copies/mL, and after 2 h of exposure, in the high and ""low"" sneeze scenarios, the high cough scenario and the singing scenario, risks exceeded 0.01 and may become very high, whereas the low coughing scenario, the high and low speaking scenarios and the breathing scenario remained below 0.1. After 2 h of exposure, singing became the second highest risk scenario. One air exchange per hour reduced risk of illness by about a factor of 2. Six air exchanges per hour reduced risks of illness by a factor of 8-13 for the sneeze and cough scenarios and by a factor of 4-9 for the other scenarios.

Discussion

The large variation in the volume of expelled aerosols is discussed. The model calculations indicated that SARS-CoV-2 transmission via aerosols outside of the 1.5-m social distancing norm can occur. Virus concentrations in aerosols and/or the amount of expelled aerosol droplets need to be high for substantial transmission via this route. AirCoV2 is made available as interactive computational tool. https://doi.org/10.1289/EHP7886.",2021-04-01 +32761141,SPDB: a specialized database and web-based analysis platform for swine pathogens. ,"The rapid and accurate diagnosis of swine diseases is indispensable for reducing their negative impacts on the pork industry. Next-generation sequencing (NGS) is a promising diagnostic tool for swine diseases. To support the application of NGS in the diagnosis of swine disease, we established the Swine Pathogen Database (SPDB). The SPDB represents the first comprehensive and highly specialized database and analysis platform for swine pathogens. The current version features an online genome search tool, which now contains 26 148 genomes of swine, swine pathogens and phylogenetically related species. This database offers a comprehensive bioinformatics analysis pipeline for the identification of 4403 swine pathogens and their related species in clinical samples, based on targeted 16S rRNA gene sequencing and metagenomic NGS data. The SPDB provides a powerful and user-friendly service for veterinarians and researchers to support the applications of NGS in swine disease research. Database URL: http://spdatabase.com:2080/.",2020-01-01 +31733062,ParameciumDB 2019: integrating genomic data across the genus for functional and evolutionary biology.,"ParameciumDB (https://paramecium.i2bc.paris-saclay.fr) is a community model organism database for the genome and genetics of the ciliate Paramecium. ParameciumDB development relies on the GMOD (www.gmod.org) toolkit. The ParameciumDB web site has been publicly available since 2006 when the P. tetraurelia somatic genome sequence was released, revealing that a series of whole genome duplications punctuated the evolutionary history of the species. The genome is linked to available genetic data and stocks. ParameciumDB has undergone major changes in its content and website since the last update published in 2011. Genomes from multiple Paramecium species, especially from the P. aurelia complex, are now included in ParameciumDB. A new modern web interface accompanies this transition to a database for the whole Paramecium genus. Gene pages have been enriched with orthology relationships, among the Paramecium species and with a panel of model organisms across the eukaryotic tree. This update also presents expert curation of Paramecium mitochondrial genomes.",2020-01-01 +34424052,Air Pollution Particulate Matter Exposure and Chronic Cerebral Hypoperfusion and Measures of White Matter Injury in a Murine Model.,"

Background

Exposure to ambient air pollution particulate matter (PM) is associated with increased risk of dementia and accelerated cognitive loss. Vascular contributions to cognitive impairment are well recognized. Chronic cerebral hypoperfusion (CCH) promotes neuroinflammation and blood-brain barrier weakening, which may augment neurotoxic effects of PM.

Objectives

This study examined interactions of nanoscale particulate matter (nPM; fine particulate matter with aerodynamic diameter ≤200 nm) and CCH secondary to bilateral carotid artery stenosis (BCAS) in a murine model to produce white matter injury. Based on other air pollution interactions, we predicted synergies of nPM with BCAS.

Methods

nPM was collected using a particle sampler near a Los Angeles, California, freeway. Mice were exposed to 10 wk of reaerosolized nPM or filtered air (FA) for 150 h. CCH was induced by BCAS surgery. Mice (C57BL/6J males) were randomized to four exposure paradigms: a) FA, b) nPM, c) FA + BCAS, and d) nPM + BCAS. Behavioral outcomes, white matter injury, glial cell activation, inflammation, and oxidative stress were assessed.

Results

The joint nPM + BCAS group exhibited synergistic effects on white matter injury (2.3× the additive nPM and FA + BCAS scores) with greater loss of corpus callosum volume on T2 magnetic resonance imaging (MRI) (30% smaller than FA group). Histochemical analyses suggested potential microglial-specific inflammatory responses with synergistic effects on corpus callosum C5 immunofluorescent density and whole brain nitrate concentrations (2.1× and 3.9× the additive nPM and FA + BCAS effects, respectively) in the joint exposure group. Transcriptomic responses (RNA-Seq) showed greater impact of nPM + BCAS than individual additive effects, consistent with changes in proinflammatory pathways. Although nPM exposure alone did not alter working memory, the nPM + BCAS cohort demonstrated impaired working memory when compared to the FA + BCAS group.

Discussion

Our data suggest that nPM and CCH contribute to white matter injury in a synergistic manner in a mouse model. Adverse neurological effects may be aggravated in a susceptible population exposed to air pollution. https://doi.org/10.1289/EHP8792.",2021-08-23 +34056907,Combined Metabolomics with Transcriptomics Reveals Important Serum Biomarkers Correlated with Lung Cancer Proliferation through a Calcium Signaling Pathway.,"Lung cancer (LC) is one of the most malignant cancers in the world, but currently, it lacks effective noninvasive biomarkers to assist its early diagnosis. Our study aims to discover potential serum diagnostic biomarkers for LC. In our study, untargeted serum metabolomics of a discovery cohort and targeted analysis of a test cohort were performed based on gas chromatography-mass spectrometry. Both univariate and multivariate statistical analyses were employed to screen for differential metabolites between LC and healthy control (HC), followed by the selection of candidate biomarkers through multiple algorithms. The results showed that 15 metabolites were significantly dysregulated between LC and HC, and a panel, comprising cholesterol, oleic acid, myo-inositol, 2-hydroxybutyric acid, and 4-hydroxybutyric acid, was demonstrated to have excellent differentiating capability for LC based on multiple classification modelings. In addition, the molecular interaction analysis combined with transcriptomics revealed a close correlation between the candidate biomarkers and LC proliferation via a Ca2+ signaling pathway. Our study discovered that cholesterol, oleic acid, myo-inositol, 2-hydroxybutyric acid, and 4-hydroxybutyric acid in combination could be a promising diagnostic biomarker for LC, and most importantly, our results will shed some light on the pathophysiological mechanism underlying LC to understand it deeply. The data that support the findings of this study are openly available in MetaboLights at https://www.ebi.ac.uk/metabolights/, reference number MTBLS1517.",2021-05-30 +31586392,APAatlas: decoding alternative polyadenylation across human tissues.,"Alternative polyadenylation (APA) is an RNA-processing mechanism on the 3' terminus that generates distinct isoforms of mRNAs and/or other RNA polymerase II transcripts with different 3'UTR lengths. Widespread APA affects post-transcriptional gene regulation in mRNA translation, stability, and localization, and exhibits strong tissue specificity. However, no existing database provides comprehensive information about APA events in a large number of human normal tissues. Using the RNA-seq data from the Genotype-Tissue Expression project, we systematically identified APA events from 9475 samples across 53 human tissues and examined their associations with multiple traits and gene expression across tissues. We further developed APAatlas, a user-friendly database (https://hanlab.uth.edu/apa/) for searching, browsing and downloading related information. APAatlas will help the biomedical research community elucidate the functions and mechanisms of APA events in human tissues.",2020-01-01 +27543076,RNALocate: a resource for RNA subcellular localizations.,"Increasing evidence has revealed that RNA subcellular localization is a very important feature for deeply understanding RNA's biological functions after being transported into intra- or extra-cellular regions. RNALocate is a web-accessible database that aims to provide a high-quality RNA subcellular localization resource and facilitate future researches on RNA function or structure. The current version of RNALocate documents more than 37 700 manually curated RNA subcellular localization entries with experimental evidence, involving more than 21 800 RNAs with 42 subcellular localizations in 65 species, mainly including Homo sapiens, Mus musculus and Saccharomyces cerevisiae etc. Besides, RNA homology, sequence and interaction data have also been integrated into RNALocate. Users can access these data through online search, browse, blast and visualization tools. In conclusion, RNALocate will be of help in elucidating the entirety of RNA subcellular localization, and developing new prediction methods. The database is available at http://www.rna-society.org/rnalocate/.",2016-08-19 +31568988,Systematic screening of protein-coding gene expression identified HMMR as a potential independent indicator of unfavorable survival in patients with papillary muscle-invasive bladder cancer.,"Papillary and non-papillary are two histological patterns of bladder carcinogenesis and are considered as dual-track oncogenic pathways, which have different genetic alterations. The TCGA-bladder cancer (BLCA) database contains clinicopathological, genomic and survival data from over 400 muscle-invasive bladder cancer patients. In this study, using data from this database, we performed a systematic screening of gene expression to identify the protein-coding gene that might have prognostic value in papillary and non-papillary muscle-invasive bladder cancer (MIBC). The data of patients with primary MIBC in TCGA-BLCA was acquired from the UCSC Xena project (http://xena.ucsc.edu) for re-analysis. By setting |log2 fold change|≥2 and adjusted p value <0.01 as the screening criteria, we found 751 significantly dysregulated genes, including 183 overexpressed and 568 downregulated genes. HMMR was identified as a potential prognostic marker with unique expression. Multivariate analysis showed that its expression was an independent prognostic indicator of shorter progression-free survival (PFS) (HR: 1.400, 95%CI: 1.021-1.920, p = 0.037) in the papillary subtype. ENST00000393915.8 and ENST00000358715.3, two transcripts that contain all 18 exons and encode the full length of HMMR, were significantly upregulated in cancer tissues compared with normal bladder tissues. None of the 17 CpG sites in its DNA locus was relevant to HMMR expression. 26/403 (6.5%) MIBC cases had HMMR gene-level amplification, which was associated with upregulated HMMR expression compared with the copy-neutral and deletion groups. Gene set enrichment analysis (GSEA) in papillary MIBC found that the high HMMR expression group was associated with upregulated genes enriched in multiple gene sets with well-established role in BC development, including G2M checkpoint, E2 F Targets, Myc Targets V1, Myc Targets V2 and Glycolysis. Based on these findings, we infer that HMMR expression might be a specific prognostic marker in terms of PFS in papillary MIBC. DNA amplification might be an important mechanism of its elevation.",2019-09-27 +34019771,Practice Mediates Bidirectional Dual-Task Interference When Performing a Novel Sequential Nonword Repetition Task.,"Introduction The current study examined the extent to which practice amount mediates dual-task interference patterns associated with concurrent performance of a novel speech task and attention-demanding visuomotor task. Method A Sequential Nonword Repetition Task was used to examine the effect of practice on interference associated with concurrent performance of a Visuomotor Pursuit Task. Twenty-five young adult participants were assigned to either an Extended Practice Group or a Limited Practice Group and performed a novel Sequential Nonword Repetition Task in isolation and while performing a concurrent visuomotor pursuit rotor task. Results Participants in the Limited Practice Group who were afforded a limited amount of practice exhibited dual-task interference (i.e., dual-task performance reductions) for both the speech and visuomotor tasks (i.e., bidirectional dual-task interference). Conversely, participants in the Extended Practice Group who were afforded extended practice exhibited little-to-no observable dual-task interference on the nonword repetition task. Conclusion Data from the current investigation suggest that the amount of initial practice mediates the degree of dual-task interference observed when a novel speech production task is performed with an attention-demanding Visuomotor Pursuit Task. Supplemental Material https://doi.org/10.23641/asha.14608071.",2021-05-21 +33633572,TOXPANEL: A Gene-Set Analysis Tool to Assess Liver and Kidney Injuries.,"Gene-set analysis is commonly used to identify trends in gene expression when cells, tissues, organs, or organisms are subjected to conditions that differ from those within the normal physiological range. However, tools for gene-set analysis to assess liver and kidney injury responses are less common. Furthermore, most websites for gene-set analysis lack the option for users to customize their gene-set database. Here, we present the ToxPanel website, which allows users to perform gene-set analysis to assess liver and kidney injuries using activation scores based on gene-expression fold-change values. The results are graphically presented to assess constituent injury phenotypes (histopathology), with interactive result tables that identify the main contributing genes to a given signal. In addition, ToxPanel offers the flexibility to analyze any set of custom genes based on gene fold-change values. ToxPanel is publically available online at https://toxpanel.bhsai.org. ToxPanel allows users to access our previously developed liver and kidney injury gene sets, which we have shown in previous work to yield robust results that correlate with the degree of injury. Users can also test and validate their customized gene sets using the ToxPanel website.",2021-02-09 +33450251,ELASPIC2 (EL2): Combining Contextualized Language Models and Graph Neural Networks to Predict Effects of Mutations.,"The ELASPIC web server allows users to evaluate the effect of mutations on protein folding and protein-protein interaction on a proteome-wide scale. It uses homology models of proteins and protein-protein interactions, which have been precalculated for several proteomes, and machine learning models, which integrate structural information with sequence conservation scores, in order to make its predictions. Since the original publication of the ELASPIC web server, several advances have motivated a revisiting of the problem of mutation effect prediction. First, progress in neural network architectures and self-supervised pre-trained has resulted in models which provide more informative embeddings of protein sequence and structure than those used by the original version of ELASPIC. Second, the amount of training data has increased several-fold, largely driven by advances in deep mutation scanning and other multiplexed assays of variant effect. Here, we describe two machine learning models which leverage the recent advances in order to achieve superior accuracy in predicting the effect of mutation on protein folding and protein-protein interaction. The models incorporate features generated using pre-trained transformer- and graph convolution-based neural networks, and are trained to optimize a ranking objective function, which permits the use of heterogeneous training data. The outputs from the new models have been incorporated into the ELASPIC web server, available at http://elaspic.kimlab.org.",2021-01-13 +27899622,UniProt: the universal protein knowledgebase.,"The UniProt knowledgebase is a large resource of protein sequences and associated detailed annotation. The database contains over 60 million sequences, of which over half a million sequences have been curated by experts who critically review experimental and predicted data for each protein. The remainder are automatically annotated based on rule systems that rely on the expert curated knowledge. Since our last update in 2014, we have more than doubled the number of reference proteomes to 5631, giving a greater coverage of taxonomic diversity. We implemented a pipeline to remove redundant highly similar proteomes that were causing excessive redundancy in UniProt. The initial run of this pipeline reduced the number of sequences in UniProt by 47 million. For our users interested in the accessory proteomes, we have made available sets of pan proteome sequences that cover the diversity of sequences for each species that is found in its strains and sub-strains. To help interpretation of genomic variants, we provide tracks of detailed protein information for the major genome browsers. We provide a SPARQL endpoint that allows complex queries of the more than 22 billion triples of data in UniProt (http://sparql.uniprot.org/). UniProt resources can be accessed via the website at http://www.uniprot.org/.",2016-11-29 +31263866,The NEW ESID online database network.,"SUMMARY:Primary Immunodeficiencies (PIDs) belong to the group of rare diseases. The European Society for Immunodeficiencies (ESID) operates an international research database application for continuous long-term documentation of patient data. The system is a web application which runs in a standard browser. Therefore, the system is easy to access from any location. Technically, the system is based on Gails backed by MariaDB with high standard security features to comply with the demands of a modern research platform. AVAILABILITY AND IMPLEMENTATION:The ESID Online Database is accessible via the official website: https://esid.org/Working-Parties/Registry-Working-Party/ESID-Registry. A demo system is available via: https://cci-esid-reg-demo-app.uniklinik-freiburg.de/EERS with user demouser and password Demo-2019.",2019-12-01 +27736745,Exploring human disease using the Rat Genome Database.,"Rattus norvegicus, the laboratory rat, has been a crucial model for studies of the environmental and genetic factors associated with human diseases for over 150 years. It is the primary model organism for toxicology and pharmacology studies, and has features that make it the model of choice in many complex-disease studies. Since 1999, the Rat Genome Database (RGD; http://rgd.mcw.edu) has been the premier resource for genomic, genetic, phenotype and strain data for the laboratory rat. The primary role of RGD is to curate rat data and validate orthologous relationships with human and mouse genes, and make these data available for incorporation into other major databases such as NCBI, Ensembl and UniProt. RGD also provides official nomenclature for rat genes, quantitative trait loci, strains and genetic markers, as well as unique identifiers. The RGD team adds enormous value to these basic data elements through functional and disease annotations, the analysis and visual presentation of pathways, and the integration of phenotype measurement data for strains used as disease models. Because much of the rat research community focuses on understanding human diseases, RGD provides a number of datasets and software tools that allow users to easily explore and make disease-related connections among these datasets. RGD also provides comprehensive human and mouse data for comparative purposes, illustrating the value of the rat in translational research. This article introduces RGD and its suite of tools and datasets to researchers - within and beyond the rat community - who are particularly interested in leveraging rat-based insights to understand human diseases.",2016-10-01 +32766702,The Male Fertility Gene Atlas: a web tool for collecting and integrating OMICS data in the context of male infertility.,"

Study question

How can one design and implement a system that provides a comprehensive overview of research results in the field of epi-/genetics of male infertility and germ cells?

Summary answer

Working at the interface of literature search engines and raw data repositories, the newly developed Male Fertility Gene Atlas (MFGA) provides a system that can represent aggregated results from scientific publications in a standardized way and perform advanced searches, for example based on the conditions (phenotypes) and genes related to male infertility.

What is known already

PubMed and Google Scholar are established search engines for research literature. Additionally, repositories like Gene Expression Omnibus and Sequence Read Archive provide access to raw data. Selected processed data can be accessed by visualization tools like the ReproGenomics Viewer.

Study design, size, duration

The MFGA was developed in a time frame of 18 months under a rapid prototyping approach.

Participants/materials, setting, methods

In the context of the Clinical Research Unit 'Male Germ Cells' (CRU326), a group of around 50 domain experts in the fields of male infertility and germ cells helped to develop the requirements engineering and feedback loops. They provided a set of 39 representative and heterogeneous publications to establish a basis for the system requirements.

Main results and the role of chance

The MFGA is freely available online at https://mfga.uni-muenster.de. To date, it contains 115 data sets corresponding to 54 manually curated publications and provides an advanced search function based on study conditions, meta-information and genes, whereby it returns the publications' exact tables and figures that fit the search request as well as a list of the most frequently investigated genes in the result set. Currently, study data for 31 different tissue types, 32 different cell types and 20 conditions are available. Also, ∼8000 and ∼1000 distinct genes have been found to be mentioned in at least 10 and 15 of the publications, respectively.

Large scale data

Not applicable because no novel data were produced.

Limitations, reasons for caution

For the most part, the content of the system currently includes the selected publications from the development process. However, a structured process for the prospective literature search and inclusion into the MFGA has been defined and is currently implemented.

Wider implications of the findings

The technical implementation of the MFGA allows for accommodating a wide range of heterogeneous data from aggregated research results. This implementation can be transferred to other diseases to establish comparable systems and generally support research in the medical field.

Study funding/competing interest(s)

This work was carried out within the frame of the German Research Foundation (DFG) Clinical Research Unit 'Male Germ Cells: from Genes to Function' (CRU326). The authors declare no conflicts of interest.",2020-09-01 +33592504,TAP 1.0: A robust immunoinformatic tool for the prediction of tumor T-cell antigens based on AAindex properties.,"Immunotherapy is a research area with great potential in drug discovery for cancer treatment. Because of the capacity of tumor antigens to activate the immune response and promote the destruction of tumor cells, they are considered excellent immunotherapeutic drugs. In this work, we evaluated fifteen machine learning algorithms for the classification of tumor antigens. For this purpose, we build robust datasets, carefully selected from the TANTIGEN and IEDB databases. The feature computation of all antigens in this study was performed by developing a script written in Python 3.8, which allowed the calculation of 544 physicochemical and biochemical properties extracted from the AAindex database. All classifiers were subjected to the training, 10-fold cross-validation, and testing on an independent dataset. The results of this study showed that the quadratic discriminant classifier presented the best performance measures over the independent dataset, accuracy = 0.7384, AUC = 0.817, recall = 0.676, precision = 0.7857, F1 = 0.713, kappa = 0.4764, and Matthews correlation coefficient = 0.4834, outperforming common machine learning classifiers used in the bioinformatics area. We believe that our prediction model could be of great importance in the field of cancer immunotherapy for the search of potential tumor antigens. Taking all aspects mentioned before, we developed an immunoinformatic tool called TAP 1.0 with a friendly interface for tumor antigens prediction, available at https://tapredictor.herokuapp.com/.",2021-02-08 +33500498,Multi-Q 2 software facilitates isobaric labeling quantitation analysis with improved accuracy and coverage.,"Mass spectrometry-based proteomics using isobaric labeling for multiplex quantitation has become a popular approach for proteomic studies. We present Multi-Q 2, an isobaric-labeling quantitation tool which can yield the largest quantitation coverage and improved quantitation accuracy compared to three state-of-the-art methods. Multi-Q 2 supports identification results from several popular proteomic data analysis platforms for quantitation, offering up to 12% improvement in quantitation coverage for accepting identification results from multiple search engines when compared with MaxQuant and PatternLab. It is equipped with various quantitation algorithms, including a ratio compression correction algorithm, and results in up to 336 algorithmic combinations. Systematic evaluation shows different algorithmic combinations have different strengths and are suitable for different situations. We also demonstrate that the flexibility of Multi-Q 2 in customizing algorithmic combination can lead to improved quantitation accuracy over existing tools. Moreover, the use of complementary algorithmic combinations can be an effective strategy to enhance sensitivity when searching for biomarkers from differentially expressed proteins in proteomic experiments. Multi-Q 2 provides interactive graphical interfaces to process quantitation and to display ratios at protein, peptide, and spectrum levels. It also supports a heatmap module, enabling users to cluster proteins based on their abundance ratios and to visualize the clustering results. Multi-Q 2 executable files, sample data sets, and user manual are freely available at http://ms.iis.sinica.edu.tw/COmics/Software_Multi-Q2.html .",2021-01-26 +33601085,FAD-BERT: Improved prediction of FAD binding sites using pre-training of deep bidirectional transformers.,"The electron transport chain is a series of protein complexes embedded in the process of cellular respiration, which is an important process to transfer electrons and other macromolecules throughout the cell. Identifying Flavin Adenine Dinucleotide (FAD) binding sites in the electron transport chain is vital since it helps biological researchers precisely understand how electrons are produced and are transported in cells. This study distills and analyzes the contextualized word embedding from pre-trained BERT models to explore similarities in natural language and protein sequences. Thereby, we propose a new approach based on Pre-training of Bidirectional Encoder Representations from Transformers (BERT), Position-specific Scoring Matrix profiles (PSSM), Amino Acid Index database (AAIndex) to predict FAD-binding sites from the transport proteins which are found in nature recently. Our proposed approach archives 85.14% accuracy and improves accuracy by 11%, with Matthew's correlation coefficient of 0.39 compared to the previous method on the same independent set. We also deploy a web server that identifies FAD-binding sites in electron transporters available for academics at http://140.138.155.216/fadbert/.",2021-02-08 +33557954,HMD-ARG: hierarchical multi-task deep learning for annotating antibiotic resistance genes.,"

Background

The spread of antibiotic resistance has become one of the most urgent threats to global health, which is estimated to cause 700,000 deaths each year globally. Its surrogates, antibiotic resistance genes (ARGs), are highly transmittable between food, water, animal, and human to mitigate the efficacy of antibiotics. Accurately identifying ARGs is thus an indispensable step to understanding the ecology, and transmission of ARGs between environmental and human-associated reservoirs. Unfortunately, the previous computational methods for identifying ARGs are mostly based on sequence alignment, which cannot identify novel ARGs, and their applications are limited by currently incomplete knowledge about ARGs.

Results

Here, we propose an end-to-end Hierarchical Multi-task Deep learning framework for ARG annotation (HMD-ARG). Taking raw sequence encoding as input, HMD-ARG can identify, without querying against existing sequence databases, multiple ARG properties simultaneously, including if the input protein sequence is an ARG, and if so, what antibiotic family it is resistant to, what resistant mechanism the ARG takes, and if the ARG is an intrinsic one or acquired one. In addition, if the predicted antibiotic family is beta-lactamase, HMD-ARG further predicts the subclass of beta-lactamase that the ARG is resistant to. Comprehensive experiments, including cross-fold validation, third-party dataset validation in human gut microbiota, wet-experimental functional validation, and structural investigation of predicted conserved sites, demonstrate not only the superior performance of our method over the state-of-art methods, but also the effectiveness and robustness of the proposed method.

Conclusions

We propose a hierarchical multi-task method, HMD-ARG, which is based on deep learning and can provide detailed annotations of ARGs from three important aspects: resistant antibiotic class, resistant mechanism, and gene mobility. We believe that HMD-ARG can serve as a powerful tool to identify antibiotic resistance genes and, therefore mitigate their global threat. Our method and the constructed database are available at http://www.cbrc.kaust.edu.sa/HMDARG/ . Video abstract (MP4 50984 kb).",2021-02-08 +28969593,BioCarian: search engine for exploratory searches in heterogeneous biological databases.,"

Background

There are a large number of biological databases publicly available for scientists in the web. Also, there are many private databases generated in the course of research projects. These databases are in a wide variety of formats. Web standards have evolved in the recent times and semantic web technologies are now available to interconnect diverse and heterogeneous sources of data. Therefore, integration and querying of biological databases can be facilitated by techniques used in semantic web. Heterogeneous databases can be converted into Resource Description Format (RDF) and queried using SPARQL language. Searching for exact queries in these databases is trivial. However, exploratory searches need customized solutions, especially when multiple databases are involved. This process is cumbersome and time consuming for those without a sufficient background in computer science. In this context, a search engine facilitating exploratory searches of databases would be of great help to the scientific community.

Results

We present BioCarian, an efficient and user-friendly search engine for performing exploratory searches on biological databases. The search engine is an interface for SPARQL queries over RDF databases. We note that many of the databases can be converted to tabular form. We first convert the tabular databases to RDF. The search engine provides a graphical interface based on facets to explore the converted databases. The facet interface is more advanced than conventional facets. It allows complex queries to be constructed, and have additional features like ranking of facet values based on several criteria, visually indicating the relevance of a facet value and presenting the most important facet values when a large number of choices are available. For the advanced users, SPARQL queries can be run directly on the databases. Using this feature, users will be able to incorporate federated searches of SPARQL endpoints. We used the search engine to do an exploratory search on previously published viral integration data and were able to deduce the main conclusions of the original publication. BioCarian is accessible via http://www.biocarian.com .

Conclusions

We have developed a search engine to explore RDF databases that can be used by both novice and advanced users.",2017-10-02 +28077565,MAHMI database: a comprehensive MetaHit-based resource for the study of the mechanism of action of the human microbiota. ,"The Mechanism of Action of the Human Microbiome (MAHMI) database is a unique resource that provides comprehensive information about the sequence of potential immunomodulatory and antiproliferative peptides encrypted in the proteins produced by the human gut microbiota. Currently, MAHMI database contains over 300 hundred million peptide entries, with detailed information about peptide sequence, sources and potential bioactivity. The reference peptide data section is curated manually by domain experts. The in silico peptide data section is populated automatically through the systematic processing of publicly available exoproteomes of the human microbiome. Bioactivity prediction is based on the global alignment of the automatically processed peptides with experimentally validated immunomodulatory and antiproliferative peptides, in the reference section. MAHMI provides researchers with a comparative tool for inspecting the potential immunomodulatory or antiproliferative bioactivity of new amino acidic sequences and identifying promising peptides to be further investigated. Moreover, researchers are welcome to submit new experimental evidence on peptide bioactivity, namely, empiric and structural data, as a proactive, expert means to keep the database updated and improve the implemented bioactivity prediction method. Bioactive peptides identified by MAHMI have a huge biotechnological potential, including the manipulation of aberrant immune responses and the design of new functional ingredients/foods based on the genetic sequences of the human microbiome. Hopefully, the resources provided by MAHMI will be useful to those researching gastrointestinal disorders of autoimmune and inflammatory nature, such as Inflammatory Bowel Diseases. MAHMI database is routinely updated and is available free of charge. Database URL: http://mahmi.org/.",2017-01-10 +33755125,Probabilistic Thermodynamic Analysis of Metabolic Networks. ,"Random sampling of metabolic fluxes can provide a comprehensive description of the capabilities of a metabolic network. However, current sampling approaches do not model thermodynamics explicitly, leading to inaccurate predictions of an organism's potential or actual metabolic operations. We present a probabilistic framework combining thermodynamic quantities with steady-state flux constraints to analyze the properties of a metabolic network. It includes methods for probabilistic metabolic optimization and for joint sampling of thermodynamic and flux spaces. Applied to a model of E. coli, we use the methods to reveal known and novel mechanisms of substrate channeling, and to accurately predict reaction directions and metabolite concentrations. Interestingly, predicted flux distributions are multimodal, leading to discrete hypotheses on E. coli's metabolic capabilities. Python and MATLAB packages available at https://gitlab.com/csb.ethz/pta. Supplementary data are available at Bioinformatics online.",2021-03-23 +34517753,Bacteroidetocins Target the Essential Outer Membrane Protein BamA of Bacteroidales Symbionts and Pathogens.,"Bacteroidetocins are a family of antibacterial peptide toxins that are produced by and target members of the phylum Bacteroidetes. To date, 19 bacteroidetocins have been identified, and four have been tested and shown to kill diverse Bacteroidales species (M. J. Coyne, N. Béchon, L. M. Matano, V. L. McEneany, et al., Nat Commun 10:3460, 2019, https://doi.org/10.1038/s41467-019-11494-1). Here, we identify the target and likely mechanism of action of the bacteroidetocins. We selected seven spontaneous mutants of four different genera, all resistant to bacteroidetocin A (Bd-A) and found that all contained mutations in a single gene, bamA. Construction of three of these bamA mutants in the wild-type (WT) strains confirmed they confer resistance to Bd-A as well as to other bacteroidetocins. We identified an aspartate residue of BamA at the beginning of exterior loop 3 (eL3) that, when altered, renders strains resistant to Bd-A. Analysis of a panel of diverse Bacteroidales strains showed a correlation between the presence of this aspartate residue and Bd-A sensitivity. Fluorescence microscopy and transmission electron microscopy (TEM) analysis of Bd-A-treated cells showed cellular morphological changes consistent with a BamA defect. Transcriptomic analysis of Bd-A-treated cells revealed gene expression changes indicative of cell envelope stress. Studies in mice revealed that bacteroidetocin-resistant mutants are outcompeted by their WT strain in vivo. Analyses of longitudinal human gut isolates showed that bamA mutations leading to bacteroidetocin resistance do not become fixed in the human gut, even in bacteroidetocin-producing strains and nonproducing coresident strains. Together, these data lend further support to the applicability of the bacteroidetocins as therapeutic peptides in the treatment of maladies involving Bacteroidales species. IMPORTANCE The bacteroidetocins are a newly discovered class of bacteriocins specific to Bacteroidetes with a spectrum of targets extending from symbiotic gut Bacteroides, Parabacteroides, and Prevotella species to pathogenic oral and vaginal Prevotella species. We previously showed that one such bacteroidetocin, Bd-A, is active at nanomolar concentrations, is water soluble, and is bactericidal, all desirable features in a therapeutic antibacterial peptide. Here, we identify the target of several of the bacteroidetocins as the essential outer membrane protein BamA. Although mutations in bamA can be selected in bacteria grown in vitro, we show both in a mouse model and in human gut ecosystems that bamA mutants leading to Bd-A resistance are fitness attenuated and are not selected. These features further support the potential usefulness of the bacteroidetocins as therapeutics for maladies associated with pathogenic Prevotella species, such as recurrent bacterial vaginosis, for which there are few effective treatments.",2021-09-14 +27924014,"The STRING database in 2017: quality-controlled protein-protein association networks, made broadly accessible.","A system-wide understanding of cellular function requires knowledge of all functional interactions between the expressed proteins. The STRING database aims to collect and integrate this information, by consolidating known and predicted protein-protein association data for a large number of organisms. The associations in STRING include direct (physical) interactions, as well as indirect (functional) interactions, as long as both are specific and biologically meaningful. Apart from collecting and reassessing available experimental data on protein-protein interactions, and importing known pathways and protein complexes from curated databases, interaction predictions are derived from the following sources: (i) systematic co-expression analysis, (ii) detection of shared selective signals across genomes, (iii) automated text-mining of the scientific literature and (iv) computational transfer of interaction knowledge between organisms based on gene orthology. In the latest version 10.5 of STRING, the biggest changes are concerned with data dissemination: the web frontend has been completely redesigned to reduce dependency on outdated browser technologies, and the database can now also be queried from inside the popular Cytoscape software framework. Further improvements include automated background analysis of user inputs for functional enrichments, and streamlined download options. The STRING resource is available online, at http://string-db.org/.",2016-10-18 +29396322,A Landscape of Metabolic Variation across Tumor Types.,"Tumor metabolism is reorganized to support proliferation in the face of growth-related stress. Unlike the widespread profiling of changes to metabolic enzyme levels in cancer, comparatively less attention has been paid to the substrates/products of enzyme-catalyzed reactions, small-molecule metabolites. We developed an informatic pipeline to concurrently analyze metabolomics data from over 900 tissue samples spanning seven cancer types, revealing extensive heterogeneity in metabolic changes relative to normal tissue across cancers of different tissues of origin. Despite this heterogeneity, a number of metabolites were recurrently differentially abundant across many cancers, such as lactate and acyl-carnitine species. Through joint analysis of metabolomic data alongside clinical features of patient samples, we also identified a small number of metabolites, including several polyamines and kynurenine, which were associated with aggressive tumors across several tumor types. Our findings offer a glimpse onto common patterns of metabolic reprogramming across cancers, and the work serves as a large-scale resource accessible via a web application (http://www.sanderlab.org/pancanmet).",2018-01-27 +32765964,WHONDRS-GUI: a web application for global survey of surface water metabolites.,"

Background

The Worldwide Hydrobiogeochemistry Observation Network for Dynamic River Systems (WHONDRS) is a consortium that aims to understand complex hydrologic, biogeochemical, and microbial connections within river corridors experiencing perturbations such as dam operations, floods, and droughts. For one ongoing WHONDRS sampling campaign, surface water metabolite and microbiome samples are collected through a global survey to generate knowledge across diverse river corridors. Metabolomics analysis and a suite of geochemical analyses have been performed for collected samples through the Environmental Molecular Sciences Laboratory (EMSL). The obtained knowledge and data package inform mechanistic and data-driven models to enhance predictions of outcomes of hydrologic perturbations and watershed function, one of the most critical components in model-data integration. To support efforts of the multi-domain integration and make the ever-growing data package more accessible for researchers across the world, a Shiny/R Graphical User Interface (GUI) called WHONDRS-GUI was created.

Results

The web application can be run on any modern web browser without any programming or operational system requirements, thus providing an open, well-structured, discoverable dataset for WHONDRS. Together with a context-aware dynamic user interface, the WHONDRS-GUI has functionality for searching, compiling, integrating, visualizing and exporting different data types that can easily be used by the community. The web application and data package are available at https://data.ess-dive.lbl.gov/view/doi:10.15485/1484811, which enables users to simultaneously obtain access to the data and code and to subsequently run the web app locally. The WHONDRS-GUI is also available for online use at Shiny Server (https://xmlin.shinyapps.io/whondrs/).",2020-07-22 +32343964,Longitudinal Competence Programs for Basic Point-of-Care Ultrasound in Critical Care: A Systematic Review.,"

Background

Competence in point-of-care ultrasound (PoCUS) is widely recommended by several critical care societies. Despite numerous introductory short courses, very few doctors attain PoCUS competence because of the challenges in establishing longitudinal competence programs.

Research question

To evaluate the methodologic quality of the literature on basic PoCUS competence processes in critical care.

Study design and methods

A systematic review to identify manuscripts meeting predefined inclusion criteria was performed using three medical databases (PubMed, OVID Embase, and Web of Science); using extra references from original articles, review articles, and expert panel guidelines; and by directly contacting authors for further information if required. The objectives, domains, and inclusion and exclusion criteria of the review were determined during discussions between experienced PoCUS educators. Data extraction and analyses were performed independently by three reviewers.

Results

Of the 5,408 abstracts extracted, 42 met the inclusion criteria for longitudinal PoCUS competence. Each study was described along four broad categories: general information, study design, and trainee characteristics; description of introductory course; description of longitudinal competence program; and grading of overall methodologic quality on a 4-point Likert scale. Thirty-nine studies (92.9%) were from a single center. Most studies lacked important details on study methodology such as prior ultrasound experience, pre- and postcourse tests, models for hands-on sessions, ratio of instructors to trainees, competence assessment criteria, number of scans performed by individual trainees, and formative and summative assessments. The studies were rated as follows: poor = 19 (45.2%), average = 15 (35.7%), good = 4 (9.5%), and excellent = 4 (9.5%).

Interpretation

Ther is very little high-quality evidence on PoCUS competence. To help frame policy guidelines to improve PoCUS education, there is a need for well-designed longitudinal studies on PoCUS competence.

Trial registry

PROSPERO database; No.: CRD42018094033; URL: https://www.crd.york.ac.uk/PROSPERO/.",2020-04-25 +33392267,A Semi-automatic Diagnosis of Hip Dysplasia on X-Ray Films.,"Background: Diagnosis of hip joint plays an important role in early screening of hip diseases such as coxarthritis, heterotopic ossification, osteonecrosis of the femoral head, etc. Early detection of hip dysplasia on X-ray films may probably conduce to early treatment of patients, which can help to cure patients or relieve their pain as much as possible. There has been no method or tool for automatic diagnosis of hip dysplasia till now. Results: A semi-automatic method for diagnosis of hip dysplasia is proposed. Considering the complexity of medical imaging, the contour of acetabulum, femoral head, and the upper side of thigh-bone are manually marked. Feature points are extracted according to marked contours. Traditional knowledge-driven diagnostic criteria is abandoned. Instead, a data-driven diagnostic model for hip dysplasia is presented. Angles including CE, sharp, and Tonnis angle which are commonly measured in clinical diagnosis, are automatically obtained. Samples, each of which consists of these three angle values, are used for clustering according to their densities in a descending order. A three-dimensional normal distribution derived from the cluster is built and regarded as the parametric model for diagnosis of hip dysplasia. Experiments on 143 X-ray films including 286 samples (i.e., 143 left and 143 right hip joints) demonstrate the effectiveness of our method. According to the method, a computer-aided diagnosis tool is developed for the convenience of clinicians, which can be downloaded at http://www.bio-nefu.com/HIPindex/. The data used to support the findings of this study are available from the corresponding authors upon request. Conclusions: This data-driven method provides a more objective measurement of the angles. Besides, it provides a new criterion for diagnosis of hip dysplasia other than doctors' experience deriving from knowledge-driven clinical manual, which actually corresponds to very different way for clinical diagnosis of hip dysplasia.",2020-12-17 +35935888,"Cortinarius subgenus Leprocybe, unexpected diversity and significant differences in species compositions between western and eastern North America.","The focus of this paper is the North American species of Cortinarius in subg. Leprocybe. Eighteen species, including twelve new ones, and two tentative (aff.) species, are delimited based on morphological and molecular data (DNA ITS-LSU sequences). Existing type specimens of species in subg. Leprocybe were also studied, and neo- or epitypes designated for C. cotoneus, C. melanotus, C. phrygianus and C. venetus to stabilize the nomenclature. In addition, to improve the infrasubgeneric classification of Leprocybe three new sections are proposed: sect. Fuscotomentosi, sect. Melanoti and sect. Squamiveneti. This study adds substantial information to the knowledge of subg. Leprocybe in North America against a background of European species. To date only two species, C. phrygianus and C. squamivenetus have been reported from both continents. Citation: Ammirati J, Liimatainen K, Bojantchev D, et al. 2021. Cortinarius subgenus Leprocybe, unexpected diversity and significant differences in species compositions between western and eastern North America. Persoonia 46: 216-239. https://doi.org/10.3767/persoonia.2021.46.08.",2021-05-27 +34148448,A rapid benchtop method to assess biofilm on marine fouling control coatings.,"A rapid benchtop method to measure the torque associated with minidiscs rotating in water using a sensitive analytical rheometer has been used to monitor the drag caused by marine fouling on coated discs. The method was calibrated using sandpaper surfaces of known roughness. Minidiscs coated with commercial fouling control coatings, plus an inactive control, were exposed in an estuarine harbour. After 176 days the drag on the fouling control-coated discs, expressed as a moment coefficient, was between 73% and 90% less than the drag on the control coating. The method has potential use as a screen for novel antifouling and drag reducing coatings and surfaces. Roughness functions derived using Granville's indirect similarity law are similar to patterns found in the general hydrodynamics literature, and so rotational minidisc results can be considered with reference to other fouling drag datasets.Supplemental data for this article is available online at https://doi.org/10.1080/08927014.2021.1929937 .",2021-04-01 +33289896,Evolutionary Sequence Analysis and Visualization with Wasabi.,"Wasabi is an open-source, web-based graphical environment for evolutionary sequence analysis and visualization, designed to work with multiple sequence alignments within their phylogenetic context. Its interactive user interface provides convenient access to external data sources and computational tools and is easily extendable with custom tools and pipelines using a plugin system. Wasabi stores intermediate editing and analysis steps as workflow histories and provides direct-access web links to datasets, allowing for reproducible, collaborative research, and easy dissemination of the results. In addition to shared analyses and installation-free usage, the web-based design allows Wasabi to be run as a cross-platform, stand-alone application and makes its integration to other web services straightforward.This chapter gives a detailed description and guidelines for the use of Wasabi's analysis environment. Example use cases will give step-by-step instructions for practical application of the public Wasabi, from quick data visualization to branched analysis pipelines and publishing of results. We end with a brief discussion of advanced usage of Wasabi, including command-line communication, interface extension, offline usage, and integration to local and public web services. The public Wasabi application, its source code, documentation, and other materials are available at http://wasabiapp.org.",2021-01-01 +32096630,Breaking Down Structural Diversity for Comprehensive Prediction of Ion-Neutral Collision Cross Sections.,"Identification of unknowns is a bottleneck for large-scale untargeted analyses like metabolomics or drug metabolite identification. Ion mobility-mass spectrometry (IM-MS) provides rapid two-dimensional separation of ions based on their mobility through a neutral buffer gas. The mobility of an ion is related to its collision cross section (CCS) with the buffer gas, a physical property that is determined by the size and shape of the ion. This structural dependency makes CCS a promising characteristic for compound identification, but this utility is limited by the availability of high-quality reference CCS values. CCS prediction using machine learning (ML) has recently shown promise in the field, but accurate and broadly applicable models are still lacking. Here we present a novel ML approach that employs a comprehensive collection of CCS values covering a wide range of chemical space. Using this diverse database, we identified the structural characteristics, represented by molecular quantum numbers (MQNs), that contribute to variance in CCS and assessed the performance of a variety of ML algorithms in predicting CCS. We found that by breaking down the chemical structural diversity using unsupervised clustering based on the MQNs, specific and accurate prediction models for each cluster can be trained, which showed superior performance than a single model trained with all data. Using this approach, we have robustly trained and characterized a CCS prediction model with high accuracy on diverse chemical structures. An all-in-one web interface (https://CCSbase.net) was built for querying the CCS database and accessing the predictive model to support unknown compound identifications.",2020-03-06 +,"TBIO-29. PedcBioPortal, A CANCER DATA VISUALIZATION TOOL FOR INTEGRATIVE PEDIATRIC CANCER ANALYSES","Abstract The pediatric cancer genome is severely under-represented in genomic warehouses as existing data portals have primarily focused on adult malignancies. This is markedly more pronounced for pediatric brain tumor data as large-scale pediatric initiatives like TARGET do not capture these tumors. To address this unmet need, we have developed a new cancer visualization tool (http://pedcbioportal.org). PedcBioPortal is an instance of cBioPortal, supporting pan-cancer integrative analysis of published pediatric cancer data and consortia-based efforts including the Children’s Brain Tumor Tissue Consortium (CBTTC), the Pediatric NeuroOncology Consortium (PNOC), and the St. Baldrick’s Pediatric Stand Up 2 Cancer Dream Team. The aim of PedcBioPortal is to lower the barrier to large-scale pediatric and pan-cancer genomics analysis by providing rich visualizations and detailed statistics/summaries that integrate multi-dimensional datasets. Key functionalities include the ability to view genetic lesions along the genome, correlate genomic and clinical attributes, and obtain study level summary statistics. Importantly, clinical trials data, pharmacological agent information, pathology reports, and tissue images are all incorporated, demonstrating the platform’s ability to house both structured and unstructured data. Additionally, application program interfaces (APIs) supporting bioinformatics/data scientist access for large-scale analytics are available. PedcBioPortal is part of an evolving cloud-based ecosystem of applications dedicated to the empowered use of large-scale pediatric data that flows from biospecimen availability, to the processing of raw data, and finally to interpretation of summarized data. The cBioPortal software is developed and maintained by the cBio Consortium a multi-institutional team (http://www.cbioportal.org).",2018-06-01 +33792640,"Gene Tracer: A smart, interactive, voice-controlled Alexa skill for gene information retrieval and browsing, mutation annotation, and network visualization. ","Traditionally, an individual can only query and retrieve information from a genome browser by using accessories such as a mouse and keyboard. However, technology has changed the way that people interact with their screens. We hypothesized that we could leverage technological advances to use voice recognition as an interactive input to query and visualize genomic information. We developed an Amazon Alexa skill called Gene Tracer that allows users to use their voice to find disease-associated gene information, deleterious mutations, and gene networks, while simultaneously enjoy a genome browser-like visualization experience on their screen. As the voice can be well recognized and understood, Gene Tracer provides users with more flexibility to acquire knowledge and is broadly applicable to other scenarios. Alexa skill store (https://www.amazon.com/LT-Gene-tracer/dp/B08HCL1V68/) and a demonstration video (https://youtu.be/XbDbx7JDKmI). Supplementary data are available at Bioinformatics online.",2021-04-01 +32395534,The expression change of OTUD3-PTEN signaling axis in glioma cells.,"

Background

OTU domain-containing protein 3 (OTUD3), as a deubiquitinase (DUB) belonging to the ovarian tumor protease (OTU) family, has been reported to suppress tumor via OTUD3-PTEN signaling axis. Glioma is the most common primary intracranial tumor with high invasiveness and poor prognosis. Although less than half of the patients have phosphatase and tension homologue deleted in chromosome 10 (PTEN) mutations or homozygous deletions, two-thirds of glioma possess diminished PTEN expression. Hence, it is conceivable that other obscure mechanisms may cause the decreased expression of the PTEN protein.

Methods

OTUD3 expression was assessed in human normal and glioma tissues at The Cancer Genome Atlas (TCGA) database (https://www.cancer.gov/) and Genotype-Tissue Expression (GTEx) database (https://commonfund.nih.gov/GTex). The mRNA levels of OTUD3 in C6 cells and primary astrocytes were detected using real-time fluorescence quantitative PCR. Western blot was performed to assay PTEN and OTUD3 protein expression in C6 cells and primary astrocytes. By generating Kaplan-Meier curves, we predicted the association between OTUD3 expression and prognosis in glioma patients.

Results

(I) OTUD3 transcription was markedly downregulated in glioma based on microarray data for gene expression between human gliomas and normal brain samples. (II) The mRNA levels of OTUD3 in C6 cells was significantly lower than that of in primary astrocytes. (III) The expressions of protein PTEN and OTUD3 in C6 cells were significantly decreased when compared with primary astrocytes. (IV) Glioma patients with high expression of OTUD3 had a longer survival time than patients with low expression.

Conclusions

Our present findings demonstrated that low expression of OTUD3 in glioma may be involved in PTEN related glioma and may contribute to patient survival.",2020-04-01 +33826413,"Rare, Protein-Altering Variants in AS3MT and Arsenic Metabolism Efficiency: A Multi-Population Association Study.","

Background

Common genetic variation in the arsenic methyltransferase (AS3MT) gene region is known to be associated with arsenic metabolism efficiency (AME), measured as the percentage of dimethylarsinic acid (DMA%) in the urine. Rare, protein-altering variants in AS3MT could have even larger effects on AME, but their contribution to AME has not been investigated.

Objectives

We estimated the impact of rare, protein-coding variation in AS3MT on AME using a multi-population approach to facilitate the discovery of population-specific and shared causal rare variants.

Methods

We generated targeted DNA sequencing data for the coding regions of AS3MT for three arsenic-exposed cohorts with existing data on arsenic species measured in urine: Health Effects of Arsenic Longitudinal Study (HEALS, n=2,434), Strong Heart Study (SHS, n=868), and New Hampshire Skin Cancer Study (NHSCS, n=666). We assessed the collective effects of rare (allele frequency <1%), protein-altering AS3MT variants on DMA%, using multiple approaches, including a test of the association between rare allele carrier status (yes/no) and DMA% using linear regression (adjusted for common variants in 10q24.32 region, age, sex, and population structure).

Results

We identified 23 carriers of rare-protein-altering AS3MT variant across all cohorts (13 in HEALS and 5 in both SHS and NHSCS), including 6 carriers of predicted loss-of-function variants. DMA% was 6-10% lower in carriers compared with noncarriers in HEALS [β=-9.4 (95% CI: -13.9, -4.8)], SHS [β=-6.9 (95% CI: -13.6, -0.2)], and NHSCS [β=-8.7 (95% CI: -15.6, -2.2)]. In meta-analyses across cohorts, DMA% was 8.7% lower in carriers [β=-8.7 (95% CI: -11.9, -5.4)].

Discussion

Rare, protein-altering variants in AS3MT were associated with lower mean DMA%, an indicator of reduced AME. Although a small percentage of the population (0.5-0.7%) carry these variants, they are associated with a 6-10% decrease in DMA% that is consistent across multiple ancestral and environmental backgrounds. https://doi.org/10.1289/EHP8152.",2021-04-07 +29939204,Landscape of the long non-coding RNA transcriptome in human heart.,"Long non-coding RNAs (lncRNAs) have been revealed to play essential roles in the human cardiovascular system. However, information about their mechanisms is limited, and a comprehensive view of cardiac lncRNAs is lacking from a multiple tissues perspective to date. Here, the landscape of the lncRNA transcriptome in human heart was summarized. We summarized all lncRNA transcripts from publicly available human transcriptome resources (156 heart samples and 210 samples from 29 other tissues) and systematically analysed all annotated and novel lncRNAs expressed in heart. A total of 7485 lncRNAs whose expression was elevated in heart (HE lncRNAs) and 453 lncRNAs expressed in all 30 analysed tissues (EIA lncRNAs) were extracted. Using various bioinformatics resources, methods and tools, the features of these lncRNAs were discussed from various perspectives, including genomic structure, conservation, dynamic variation during heart development, cis-regulation, differential expression in cardiovascular diseases and cancers as well as regulation at transcriptional and post-transcriptional levels. Afterwards, all the features discussed above were integrated into a user-friendly resource named CARDIO-LNCRNAS (http://bio-bigdata.hrbmu.edu.cn/CARDIO-LNCRNAS/ or http://www.bio-bigdata.net/CARDIO-LNCRNAS/). This study represents the first global view of lncRNAs in the human cardiovascular system based on multiple tissues and sheds light on the role of lncRNAs in developments and heart disorders.",2019-09-01 +31099399,epiTAD: a web application for visualizing chromosome conformation capture data in the context of genetic epidemiology.,"

Summary

Complementary advances in genomic technology and public data resources have created opportunities for researchers to conduct multifaceted examination of the genome on a large scale. To meet the need for integrative genome wide exploration, we present epiTAD. This web-based tool enables researchers to compare genomic 3D organization and annotations across multiple databases in an interactive manner to facilitate in silico discovery.

Availability and implementation

epiTAD can be accessed at https://apps.gerkelab.com/epiTAD/ where we have additionally made publicly available the source code and a Docker containerized version of the application.",2019-11-01 +30844057,Bacterial Feature Finder (BaFF)-a system for extracting features overrepresented in sets of prokaryotic organisms.,"

Motivation

The results of some experimental and computational techniques are given in terms of large sets of organisms, especially prokaryotic. While their distinctive features can provide useful data regarding specific phenomenon, there are no automated tools for extracting them.

Results

We present here the Bacterial Feature Finder web server, a tool to automatically interrogate sets of prokaryotic organisms provided by the user to evaluate their specific biological features. At the core of the system is a searchable database of qualitative and quantitative features compiled for more than 23 000 prokaryotic organisms. Both the input set of organisms and the background set used to calculate the enriched features can be directly provided by the user, or they can be obtained by searching the database. The results are presented via an interactive graphical interface, with links to external resources.

Availability and implementation

The web server is freely available at http://csbg.cnb.csic.es/BaFF. It has been tested in the main web browsers and does not require any especial plug-ins or additional software.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +32073269,COLMAR Lipids Web Server and Ultrahigh-Resolution Methods for Two-Dimensional Nuclear Magnetic Resonance- and Mass Spectrometry-Based Lipidomics.,"Accurate identification of lipids in biological samples is a key step in lipidomics studies. Multidimensional nuclear magnetic resonance (NMR) spectroscopy is a powerful analytical tool for this purpose as it provides comprehensive structural information on lipid composition at atomic resolution. However, the interpretation of NMR spectra of complex lipid mixtures is currently hampered by limited spectral resolution and the absence of a customized lipid NMR database along with user-friendly spectral analysis tools. We introduce a new two-dimensional (2D) NMR metabolite database ""COLMAR Lipids"" that was specifically curated for hydrophobic metabolites presently containing 501 compounds with accurate experimental 2D 13C-1H heteronuclear single quantum coherence (HSQC) chemical shift data measured in CDCl3. A new module in the public COLMAR suite of NMR web servers was developed for the (semi)automated analysis of complex lipidomics mixtures (http://spin.ccic.osu.edu/index.php/colmarm/index2). To obtain 2D HSQC spectra with the necessary high spectral resolution along both 13C and 1H dimensions, nonuniform sampling in combination with pure shift spectroscopy was applied allowing the extraction of an abundance of unique cross-peaks belonging to hydrophobic compounds in complex lipidomics mixtures. As shown here, this information is critical for the unambiguous identification of underlying lipid molecules by means of the new COLMAR Lipids web server, also in combination with mass spectrometry, as is demonstrated for Caco-2 cell and lung tissue cell extracts.",2020-03-04 +31559753,[Mouse liver proteome database].,"The liver is the metabolic center of mammalian body. Systematic study on liver's proteome expression under different physiological and pathological conditions helps us understand the functional mechanisms of the liver. With the rapid development of liquid chromatography tandem mass spectrometry technique, numerous studies on liver physiology and pathology features produced a large number of proteomics data. In this paper, 834 proteomics experiments of mouse liver were systematically collected and the mouse liver proteome database (Mouse Liver Portal, http://mouseliver.com) was established. The Mouse Liver Portal contains the liver's proteomics data under different physiology and pathology conditions, such as different gender, age, circadian rhythm, cell type and different phase of partial hepatectomy, non-alcoholic fatty liver. This portal provides the changes in proteins' expression in different conditions of the liver, differently expressed proteins and the biological processes which they are involved in, potential signal transduction and regulatory networks. As the most comprehensive mouse liver proteome database, it can provide important resources and clues for liver biology research.",2019-09-01 +27328919,TP53 Variations in Human Cancers: New Lessons from the IARC TP53 Database and Genomics Data.,"TP53 gene mutations are one of the most frequent somatic events in cancer. The IARC TP53 Database (http://p53.iarc.fr) is a popular resource that compiles occurrence and phenotype data on TP53 germline and somatic variations linked to human cancer. The deluge of data coming from cancer genomic studies generates new data on TP53 variations and attracts a growing number of database users for the interpretation of TP53 variants. Here, we present the current contents and functionalities of the IARC TP53 Database and perform a systematic analysis of TP53 somatic mutation data extracted from this database and from genomic data repositories. This analysis showed that IARC has more TP53 somatic mutation data than genomic repositories (29,000 vs. 4,000). However, the more complete screening achieved by genomic studies highlighted some overlooked facts about TP53 mutations, such as the presence of a significant number of mutations occurring outside the DNA-binding domain in specific cancer types. We also provide an update on TP53 inherited variants including the ones that should be considered as neutral frequent variations. We thus provide an update of current knowledge on TP53 variations in human cancer as well as inform users on the efficient use of the IARC TP53 Database.",2016-07-08 +33693405,Dashboard of Sentiment in Austrian Social Media During COVID-19.,"To track online emotional expressions on social media platforms close to real-time during the COVID-19 pandemic, we built a self-updating monitor of emotion dynamics using digital traces from three different data sources in Austria. This allows decision makers and the interested public to assess dynamics of sentiment online during the pandemic. We used web scraping and API access to retrieve data from the news platform derstandard.at, Twitter, and a chat platform for students. We documented the technical details of our workflow to provide materials for other researchers interested in building a similar tool for different contexts. Automated text analysis allowed us to highlight changes of language use during COVID-19 in comparison to a neutral baseline. We used special word clouds to visualize that overall difference. Longitudinally, our time series showed spikes in anxiety that can be linked to several events and media reporting. Additionally, we found a marked decrease in anger. The changes lasted for remarkably long periods of time (up to 12 weeks). We have also discussed these and more patterns and connect them to the emergence of collective emotions. The interactive dashboard showcasing our data is available online at http://www.mpellert.at/covid19_monitor_austria/. Our work is part of a web archive of resources on COVID-19 collected by the Austrian National Library.",2020-10-26 +33481113,Association between chronic kidney disease and COVID-19-related mortality in New York.,"

Purpose

To evaluate mortality risk of CKD patients infected with COVID-19, and assess shared characteristics associated with health disparities in CKD outcome.

Methods

We extracted the data from a case series of 7624 patients presented at Mount Sinai Health System, in New York for testing between 3/28/2020 and 4/16/2020. De-identified patient data set is being produced by the Scientific Computing department and made available to the Mount Sinai research community at the following website: https://msdw.mountsinai.org/ .

Results

Of 7624 COVID-19 patients, 7.8% (n = 597) had CKD on hospital admission, and 11.2% (n = 856) died of COVID-19 infection. CKD patients were older, more likely to have diabetes, hypertension, and chronic obstructive pulmonary disease (COPD), were current or former smokers, had a longer time to discharge, and had worse survival compared to non-CKD patients (p < 0.05). COVID-19 mortality rate was significantly higher in CKD patients (23.1% vs 10.2%) with a 1.51 greater odds of dying (95% CI: 1.19-1.90). Controlling for demographic, behavioral, and clinical covariates, the logistic regression analysis showed significant and consistent effects of CKD, older age, male gender, and hypertension with mortality (p < 0.05).

Conclusion

CKD was a significant independent predictor of COVID-19 mortality, along with older age, male gender, and hypertension. Future research will investigate the effects of COVID-19 on long-term renal function.",2021-01-22 +28386528,De novo transcriptome assembly and its annotation for the aposematic wood tiger moth (Parasemia plantaginis).,"In this paper we report the public availability of transcriptome resources for the aposematic wood tiger moth (Parasemia plantaginis). A comprehensive assembly methods, quality statistics, and annotation are provided. This reference transcriptome may serve as a useful resource for investigating functional gene activity in aposematic Lepidopteran species. All data is freely available at the European Nucleotide Archive (http://www.ebi.ac.uk/ena) under study accession number: PRJEB14172.",2017-03-21 +34512505,An Externally Validated Dynamic Nomogram for Predicting Unfavorable Prognosis in Patients With Aneurysmal Subarachnoid Hemorrhage.,"Background: Aneurysmal subarachnoid hemorrhage (aSAH) leads to severe disability and functional dependence. However, no reliable method exists to predict the clinical prognosis after aSAH. Thus, this study aimed to develop a web-based dynamic nomogram to precisely evaluate the risk of poor outcomes in patients with aSAH. Methods: Clinical patient data were retrospectively analyzed at two medical centers. One center with 126 patients was used to develop the model. Least absolute shrinkage and selection operator (LASSO) analysis was used to select the optimal variables. Multivariable logistic regression was applied to identify independent prognostic factors and construct a nomogram based on the selected variables. The C-index and Hosmer-Lemeshow p-value and Brier score was used to reflect the discrimination and calibration capacities of the model. Receiver operating characteristic curve and calibration curve (1,000 bootstrap resamples) were generated for internal validation, while another center with 84 patients was used to validate the model externally. Decision curve analysis (DCA) and clinical impact curves (CICs) were used to evaluate the clinical usefulness of the nomogram. Results: Unfavorable prognosis was observed in 46 (37%) patients in the training cohort and 24 (29%) patients in the external validation cohort. The independent prognostic factors of the nomogram, including neutrophil-to-lymphocyte ratio (NLR) (p = 0.005), World Federation of Neurosurgical Societies (WFNS) grade (p = 0.002), and delayed cerebral ischemia (DCI) (p = 0.0003), were identified using LASSO and multivariable logistic regression. A dynamic nomogram (https://hu-ping.shinyapps.io/DynNomapp/) was developed. The nomogram model demonstrated excellent discrimination, with a bias-corrected C-index of 0.85, and calibration capacities (Hosmer-Lemeshow p-value, 0.412; Brier score, 0.12) in the training cohort. Application of the model to the external validation cohort yielded a C-index of 0.84 and a Brier score of 0.13. Both DCA and CIC showed a superior overall net benefit over the entire range of threshold probabilities. Conclusion: This study identified that NLR on admission, WFNS grade, and DCI independently predicted unfavorable prognosis in patients with aSAH. These factors were used to develop a web-based dynamic nomogram application to calculate the precise probability of a poor patient outcome. This tool will benefit personalized treatment and patient management and help neurosurgeons make better clinical decisions.",2021-08-26 +32985502,Vegetation traits of pre-Alpine grasslands in southern Germany.,"The data set contains information on aboveground vegetation traits of > 100 georeferenced locations within ten temperate pre-Alpine grassland plots in southern Germany. The grasslands were sampled in April 2018 for the following traits: bulk canopy height; weight of fresh and dry biomass; dry weight percentage of the plant functional types (PFT) non-green vegetation, legumes, non-leguminous forbs, and graminoids; total green area index (GAI) and PFT-specific GAI; plant water content; plant carbon and nitrogen content (community values and PFT-specific values); as well as leaf mass per area (LMA) of PFT. In addition, a species specific inventory of the plots was conducted in June 2020 and provides plot-level information on grassland type and plant species composition. The data set was obtained within the framework of the SUSALPS project (""Sustainable use of alpine and pre-alpine grassland soils in a changing climate""; https://www.susalps.de/ ) to provide in-situ data for the calibration and validation of remote sensing based models to estimate grassland traits.",2020-09-28 +26589523,The Resource Identification Initiative: A Cultural Shift in Publishing.,"A central tenet in support of research reproducibility is the ability to uniquely identify research resources, i.e., reagents, tools, and materials that are used to perform experiments. However, current reporting practices for research resources are insufficient to identify the exact resources that are reported or to answer basic questions such as ""How did other studies use resource X?"" To address this issue, the Resource Identification Initiative was launched as a pilot project to improve the reporting standards for research resources in the methods sections of papers and thereby improve identifiability and scientific reproducibility. The pilot engaged over 25 biomedical journal editors from most major publishers, as well as scientists and funding officials. Authors were asked to include Research Resource Identifiers (RRIDs) in their manuscripts prior to publication for three resource types: antibodies, model organisms, and tools (i.e., software and databases). RRIDs are assigned by an authoritative database, for example a model organism database, for each type of resource. To make it easier for authors to obtain RRIDs, resources were aggregated from the appropriate databases and their RRIDs made available in a central web portal ( http://scicrunch.org/resources ). RRIDs meet three key criteria: they are machine readable, free to generate and access, and are consistent across publishers and journals. The pilot was launched in February of 2014 and over 300 papers have appeared that report RRIDs. The number of journals participating has expanded from the original 25 to more than 40 with RRIDs appearing in 62 different journals to date. Here, we present an overview of the pilot project and its outcomes to date. We show that authors are able to identify resources and are supportive of the goals of the project. Identifiability of the resources post-pilot showed a dramatic improvement for all three resource types, suggesting that the project has had a significant impact on identifiability of research resources.",2016-04-01 +30942864,Protein multiple alignments: sequence-based versus structure-based programs.,"

Motivation

Multiple sequence alignment programs have proved to be very useful and have already been evaluated in the literature yet not alignment programs based on structure or both sequence and structure. In the present article we wish to evaluate the added value provided through considering structures.

Results

We compared the multiple alignments resulting from 25 programs either based on sequence, structure or both, to reference alignments deposited in five databases (BALIBASE 2 and 3, HOMSTRAD, OXBENCH and SISYPHUS). On the whole, the structure-based methods compute more reliable alignments than the sequence-based ones, and even than the sequence+structure-based programs whatever the databases. Two programs lead, MAMMOTH and MATRAS, nevertheless the performances of MUSTANG, MATT, 3DCOMB, TCOFFEE+TM_ALIGN and TCOFFEE+SAP are better for some alignments. The advantage of structure-based methods increases at low levels of sequence identity, or for residues in regular secondary structures or buried ones. Concerning gap management, sequence-based programs set less gaps than structure-based programs. Concerning the databases, the alignments of the manually built databases are more challenging for the programs.

Availability and implementation

All data and results presented in this study are available at: http://wwwabi.snv.jussieu.fr/people/mathilde/download/AliMulComp/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +29209336,DRDB: An Online Date Palm Genomic Resource Database.,"Background: Date palm (Phoenix dactylifera L.) is a cultivated woody plant with agricultural and economic importance in many countries around the world. With the advantages of next generation sequencing technologies, genome sequences for many date palm cultivars have been released recently. Short sequence repeat (SSR) and single nucleotide polymorphism (SNP) can be identified from these genomic data, and have been proven to be very useful biomarkers in plant genome analysis and breeding. Results: Here, we first improved the date palm genome assembly using 130X of HiSeq data generated in our lab. Then 246,445 SSRs (214,901 SSRs and 31,544 compound SSRs) were annotated in this genome assembly; among the SSRs, mononucleotide SSRs (58.92%) were the most abundant, followed by di- (29.92%), tri- (8.14%), tetra- (2.47%), penta- (0.36%), and hexa-nucleotide SSRs (0.19%). The high-quality PCR primer pairs were designed for most (174,497; 70.81% out of total) SSRs. We also annotated 6,375,806 SNPs with raw read depth≥3 in 90% cultivars. To further reduce false positive SNPs, we only kept 5,572,650 (87.40% out of total) SNPs with at least 20% cultivars support for downstream analyses. The high-quality PCR primer pairs were also obtained for 4,177,778 (65.53%) SNPs. We reconstructed the phylogenetic relationships among the 62 cultivars using these variants and found that they can be divided into three clusters, namely North Africa, Egypt - Sudan, and Middle East - South Asian, with Egypt - Sudan being the admixture of North Africa and Middle East - South Asian cultivars; we further confirmed these clusters using principal component analysis. Moreover, 34,346 SSRs and 4,177,778 SNPs with PCR primers were assigned to shared cultivars for cultivar classification and diversity analysis. All these SSRs, SNPs and their classification are available in our database, and can be used for cultivar identification, comparison, and molecular breeding. Conclusion:DRDB is a comprehensive genomic resource database of date palm. It can serve as a bioinformatics platform for date palm genomics, genetics, and molecular breeding. DRDB is freely available at http://drdb.big.ac.cn/home.",2017-11-02 +34157249,The Effect of Deep Brain Stimulation of the Subthalamic Nucleus on Language Function in Parkinson's Disease: A Systematic Review.,"Purpose This systematic review focuses on the effect of bilateral deep brain stimulation (DBS) of the subthalamic nucleus (STN) on language function in Parkinson's disease (PD). It fills an important gap in recent reviews by considering other language tasks in addition to verbal fluency. Method We critically and systematically reviewed the literature on studies that investigated the effect of bilateral STN-DBS on language function in PD. All studies included a matched PD control group who were on best medical treatment, with language testing at similar baseline and follow-up intervals as the DBS PD group. Results Thirteen identified studies included a form of a verbal fluency task, seven studies included picture naming, and only two studies included more language-oriented tasks. We found that verbal fluency was negatively affected after DBS, whereas picture naming was unaffected. Studies investigating individual change patterns using reliable change indices showed that individual variability is larger for picture naming than for verbal fluency. Conclusions Verbal fluency is the most frequently investigated aspect of language function. Our analysis showed a pattern of decline in verbal fluency across multiple studies after STN-DBS, whereas picture naming was unaffected. Data on more language-oriented tests in a large DBS sample and best medical treatment control group are sparse. The investigation of language function in PD after DBS requires sensitive language tests (with and without time pressure) and experimental designs as used in the studies reviewed here. Reliable change index statistics are a promising tool for investigating individual differences in performance after DBS. Supplemental Material https://doi.org/10.23641/asha.14794458.",2021-06-22 +33532841,BAGET 2.0: an updated web tool for the effortless retrieval of prokaryotic gene context and sequence. ,"The retrieval of a single gene sequence and context from completely sequenced bacterial and archaeal genomes constitutes an intimidating task for the wet bench biologist. Existing web-based genome browsers are either too complex for routine use or only provide a subset of the available prokaryotic genomes. We have developed BAGET 2.0 (Bacterial and Archaeal Gene Exploration Tool), an updated web service granting access in just three mouse clicks to the sequence and synteny of any gene from completely sequenced bacteria and archaea. User-provided annotated genomes can be processed as well. BAGET 2.0 relies on a local database updated on a daily basis. BAGET 2.0 befits all current browsers such as Chrome, Firefox, Edge, Opera and Safari. Internet Explorer 11 is supported. BAGET 2.0 is freely accessible at https://archaea.i2bc.paris-saclay.fr/baget/.",2021-02-03 +33313647,Prediction of bio-sequence modifications and the associations with diseases.,"Modifications of protein, RNA and DNA play an important role in many biological processes and are related to some diseases. Therefore, accurate identification and comprehensive understanding of protein, RNA and DNA modification sites can promote research on disease treatment and prevention. With the development of sequencing technology, the number of known sequences has continued to increase. In the past decade, many computational tools that can be used to predict protein, RNA and DNA modification sites have been developed. In this review, we comprehensively summarized the modification site predictors for three different biological sequences and the association with diseases. The relevant web server is accessible at http://lab.malab.cn/∼acy/PTM_data/ some sample data on protein, RNA and DNA modification can be downloaded from that website.",2021-03-01 +31681951,GenCLiP 3: mining human genes' functions and regulatory networks from PubMed based on co-occurrences and natural language processing. ,"We present a web server, GenCLiP 3, which is an updated version of GenCLiP 2.0 to enhance analysis of human gene functions and regulatory networks, with the following improvements: i) accurate recognition of molecular interactions with polarity and directionality from the entire PubMed database; ii) support for Boolean search to customize multiple-term search and to quickly retrieve function related genes; iii) strengthened association between gene and keyword by a new scoring method; and iv) daily updates following literature release at PubMed FTP. The server is freely available for academic use at: http://ci.smu.edu.cn/genclip3/. Supplementary data are available at Bioinformatics online.",2019-11-04 +32294195,ASFVdb: an integrative resource for genomic and proteomic analyses of African swine fever virus. ,"The recent outbreaks of African swine fever (ASF) in China and Europe have threatened the swine industry globally. To control the transmission of ASF virus (ASFV), we developed the African swine fever virus database (ASFVdb), an online data visualization and analysis platform for comparative genomics and proteomics. On the basis of known ASFV genes, ASFVdb reannotates the genomes of every strain and newly annotates 5352 possible open reading frames (ORFs) of 45 strains. Moreover, ASFVdb performs a thorough analysis of the population genetics of all the published genomes of ASFV strains and performs functional and structural predictions for all genes. Users can obtain not only basic information for each gene but also its distribution in strains and conserved or high mutation regions, possible subcellular location and topology. In the genome browser, ASFVdb provides a sliding window for results of population genetic analysis, which facilitates genetic and evolutionary analyses at the genomic level. The web interface was constructed based on SWAV 1.0. ASFVdb is freely accessible at http://asfvdb.popgenetics.net.",2020-01-01 +32427338,RiboToolkit: an integrated platform for analysis and annotation of ribosome profiling data to decode mRNA translation at codon resolution.,"Ribosome profiling (Ribo-seq) is a powerful technology for globally monitoring RNA translation; ranging from codon occupancy profiling, identification of actively translated open reading frames (ORFs), to the quantification of translational efficiency under various physiological or experimental conditions. However, analyzing and decoding translation information from Ribo-seq data is not trivial. Although there are many existing tools to analyze Ribo-seq data, most of these tools are designed for specific or limited functionalities and an easy-to-use integrated tool to analyze Ribo-seq data is lacking. Fortunately, the small size (26-34 nt) of ribosome protected fragments (RPFs) in Ribo-seq and the relatively small amount of sequencing data greatly facilitates the development of such a web platform, which is easy to manipulate for users with or without bioinformatic expertise. Thus, we developed RiboToolkit (http://rnabioinfor.tch.harvard.edu/RiboToolkit), a convenient, freely available, web-based service to centralize Ribo-seq data analyses, including data cleaning and quality evaluation, expression analysis based on RPFs, codon occupancy, translation efficiency analysis, differential translation analysis, functional annotation, translation metagene analysis, and identification of actively translated ORFs. Besides, easy-to-use web interfaces were developed to facilitate data analysis and intuitively visualize results. Thus, RiboToolkit will greatly facilitate the study of mRNA translation based on ribosome profiling.",2020-07-01 +31047795,Metabolomics-Driven Exploration of the Chemical Drug Space to Predict Combination Antimicrobial Therapies.,"Alternative to the conventional search for single-target, single-compound treatments, combination therapies can open entirely new opportunities to fight antibiotic resistance. However, combinatorial complexity prohibits experimental testing of drug combinations on a large scale, and methods to rationally design combination therapies are lagging behind. Here, we developed a combined experimental-computational approach to predict drug-drug interactions using high-throughput metabolomics. The approach was tested on 1,279 pharmacologically diverse drugs applied to the gram-negative bacterium Escherichia coli. Combining our metabolic profiling of drug response with previously generated metabolic and chemogenomic profiles of 3,807 single-gene deletion strains revealed an unexpectedly large space of inhibited gene functions and enabled rational design of drug combinations. This approach is applicable to other therapeutic areas and can unveil unprecedented insights into drug tolerance, side effects, and repurposing. The compendium of drug-associated metabolome profiles is available at https://zampierigroup.shinyapps.io/EcoPrestMet, providing a valuable resource for the microbiological and pharmacological communities.",2019-04-29 +34454123,Treatment of Neovascular Age-Related Macular Degeneration: An Economic Cost-Risk Analysis of Anti-Vascular Endothelial Growth Factor Agents.,"

Purpose

To find the best cost-effective neovascular age-related macular degeneration (nAMD) treatment to improve vision while avoiding complications. The model is based on a cost-risk tradeoff analysis from policymakers' perspective.

Design

A powerful and flexible simulation modeled outcomes of 2 years of treatment with the 4 commonly used anti-vascular endothelial growth factor drugs (bevacizumab, ranibizumab, aflibercept, and brolucizumab) across 3 injection protocols, building on prior findings that these drugs are noninferior. The model incorporates blinding complications, their management, and associated costs to society. Each option and several what-if scenarios were simulated 1,000 times with 100,000 hypothetical patients.

Participants

One hundred thousand simulated patients using data from published clinical trials.

Method

Case- and eye-specific cost-risk economic analysis.

Main outcome measures

Costs of nAMD treatment per patient and number of eyes that become blind as a result of treatment over 2 years.

Results

Using published prices and fees, the injection protocol that follows published clinical studies, results showed that the mean±standard deviation cost per patient were $16,859 ± $3.65, $32,949 ± $3.27, $39,831 ± $3.80, and $53,056 ± $2.99 for bevacizumab, brolucizumab, aflibercept, and ranibizumab, respectively. The numbers±standard deviations of treated eyes that became blind were 108 ± 10.18, 694 ± 26.66, 168 ± 12.83, and 108 ± 10.52, respectively. We further provide a lower bound (when all patients are maximally extended) and upper bound (when no patient is extended) to these numbers. For brolucizumab, the upper bound is the 2-month interval injection protocol.

Conclusions

Taking a policymaking perspective, this study suggested that bevacizumab is the preferred first-line therapy. Recommendation for second-line therapy depends on the extent of the policymaker's risk aversion because of the tradeoff between cost and risk of blindness as a result of treatment. If risk neutral, the least expensive option (brolucizumab) is preferred. But if policymakers are moderately to highly risk averse, then aflibercept or ranibizumab are preferred. Because medical advances and different costs may change our findings, we provide a free application (https://eye-inj.shinyapps.io/calc/) for readers who wish to use different cost structures. Simulating outcomes is an innovative approach, unique in ophthalmology, and presents a significant opportunity because it can be adapted easily to different settings (using different costs, risks, and protocols) and to other diseases (e.g., diabetic macular edema), to ultimately improve wide-scale decision-making and use of funds.",2021-08-25 +31588507,"VDJdb in 2019: database extension, new analysis infrastructure and a T-cell receptor motif compendium.","Here, we report an update of the VDJdb database with a substantial increase in the number of T-cell receptor (TCR) sequences and their cognate antigens. The update further provides a new database infrastructure featuring two additional analysis modes that facilitate database querying and real-world data analysis. The increased yield of TCR specificity identification methods and the overall increase in the number of studies in the field has allowed us to expand the database more than 5-fold. Furthermore, several new analysis methods are included. For example, batch annotation of TCR repertoire sequencing samples allows for annotating large datasets on-line. Using recently developed bioinformatic methods for TCR motif mining, we have built a reduced set of high-quality TCR motifs that can be used for both training TCR specificity predictors and matching against TCRs of interest. These additions enhance the versatility of the VDJdb in the task of exploring T-cell antigen specificities. The database is available at https://vdjdb.cdr3.net.",2020-01-01 +31950190,PCaLiStDB: a lifestyle database for precision prevention of prostate cancer. ,"The interaction between genes, lifestyles and environmental factors makes the genesis and progress of prostate cancer (PCa) very heterogeneous. Positive lifestyle is important to the prevention and controlling of PCa. To investigate the relationship between PCa and lifestyle at systems level, we established a PCa related lifestyle database (PCaLiStDB) and collected the PCa-related lifestyles including foods, nutrients, life habits and social and environmental factors as well as associated genes and physiological and biochemical indexes together with the disease phenotypes and drugs. Data format standardization was implemented for the future Lifestyle-Wide Association Studies of PCa (PCa_LWAS). Currently, 2290 single-factor lifestyles and 856 joint effects of two or more lifestyles were collected. Among these, 394 are protective factors, 556 are risk factors, 45 are no-influencing factors, 52 are factors with contradictory views and 1977 factors are lacking effective literatures support. PCaLiStDB is expected to facilitate the prevention and control of PCa, as well as the promotion of mechanistic study of lifestyles on PCa. Database URL: http://www.sysbio.org.cn/pcalistdb/.",2020-01-01 +34153203,Accuracy Assessment of Two Electromagnetic Articulographs: Northern Digital Inc. WAVE and Northern Digital Inc. VOX.,"Purpose This study compares two electromagnetic articulographs manufactured by Northern Digital, Inc.: the NDI Wave System (from 2008) and the NDI Vox-EMA System (from 2020). Method Four experiments were completed: (a) comparison of statically positioned sensors, (b) tracking dynamic movements of sensors manipulated using a motor-driven LEGO apparatus, (c) tracking small and large movements of sensors mounted in a rigid bar manipulated by hand, and (d) tracking movements of sensors rotated on a circular disc. We assessed spatial variability for statically positioned sensors, variability in the transduced Euclidean distances between sensor pairs, and missing data rates. For sensors tracking circular movements, we compared the fit between fitted ideal circles and actual trajectories. Results The average sensor pair tracking error (i.e., the standard deviation of the Euclidean distances) was 1.37 mm for the WAVE and 0.12 mm for the VOX during automated trials at the fastest speed, and 0.35 mm for the WAVE and 0.14 mm for the VOX during the tracking of large manual movements. The average standard deviation of the fitted circle radii charted by manual circular disc movements was 0.72 mm for the WAVE sensors and 0.14 mm for the VOX sensors. There was no significant difference between the WAVE and the VOX in the number of missing frames. Conclusions In general, the VOX system significantly outperformed the WAVE on measures of both static precision and dynamic accuracy (automated and manual). For both systems, positional precision and spatial variability were influenced by the sensors' position relative to the field generator unit (worse when further away). Supplemental Material https://doi.org/10.23641/asha.14787846.",2021-06-21 +33568007,Endovascular aortic repair with EndoAnchors demonstrate good mid-term outcomes in physician-initiated multicenter analysis-The PERU registry.,"

Objectives

We aim to describe real-world outcomes from multicenter data about the efficacy of adjunct Heli-FX EndoAnchor usage in preventing or repairing failures during infrarenal endovascular aneurysm repair (EVAR), so-called EndoSutured-aneurysm-repair (ESAR).

Methods

The current study has been assigned an identifier (NCT04100499) at the US National Library of Medicine (https://ClinicalTrials.gov). It is an observational retrospective study of prospectively collected data from seven vascular surgery departments between June 2010 and December 2019. Patients included in the ANCHOR registry were excluded from this analysis. The decision for the use of EndoAnchors was made by the treating surgeon or multidisciplinary aortic committee according to each center's practice. Follow-up imaging was scheduled according to each center's protocol, which necessarily included either abdominal ultrasound or radiography or computed tomographic scan imaging. The main outcomes analyzed were technical success, freedom from type Ia endoleaks (IaEL), all-cause and aneurysm-related mortality, and sac variation and trends evaluated for those with at least six months imaging follow-up.

Results

Two hundred and seventy-five patients underwent ESAR in participating centers during the study period. After exclusions, 221 patients (184 males, 37 females, mean age 75 ± 8.3 years) were finally included for analysis. Median follow-up for the cohort was 27 (interquartile range 12-48) months. A median 6 (interquartile range 3) EndoAnchors were deployed at ESAR, 175 (79%) procedures were primary and 46 (21%) revision cases, 40 associated with type IaEL. Technical success at operation (initial), 30-day, and overall success were 89, 95.5, and 96.8%, respectively; the 30-day success was higher due to those with subsequent spontaneous proximal endoleak seal. At two years, freedom from type IaEL was 94% for the whole series; 96% and 86% for the primary and revision groups, respectively; whereas freedom from all-cause mortality, aneurysm-related mortality, and reintervention was 89%, 98%, and 87%, respectively. Sac evolution pre-ESAR was 66 ± 15.1 vs. post ESAR 61 ± 17.5 (p < 0.001) and for 180 patients with at least six-month follow-up, 92.2% of them being in a stable (51%) or regression (41%) situation.

Conclusions

This real-world registry demonstrates that adjunct EndoAnchor usage at EVAR achieves high rates of freedom from type IaEL at mid-term including in a high number of patients with hostile neck anatomy, with positive trends in sac-size evolution. Further data with longer follow-up may help to establish EndoAnchor usage as a routine adjunct to EVAR, especially in hostile necks.",2021-02-10 +33532815,eHSCPr discriminating the cell identity involved in endothelial to hematopoietic transition. ,"Hematopoietic stem cells (HSCs) give rise to all blood cells and play a vital role throughout the whole lifespan through their pluripotency and self-renewal properties. Accurately identifying the stages of early HSCs is extremely important, as it may open up new prospects for extracorporeal blood research. Existing experimental techniques for identifying the early stages of HSCs development are time-consuming and expensive. Machine learning has shown its excellence in massive single-cell data processing and it is desirable to develop related computational models as good complements to experimental techniques. In this study, we presented a novel predictor called eHSCPr specifically for predicting the early stages of HSCs development. To reveal the distinct genes at each developmental stage of HSCs, we compared F-score with three state-of-art differential gene selection methods (limma, DESeq2, edgeR) and evaluated their performance. F-score captured the more critical surface markers of endothelial cells and hematopoietic cells, and the area under receiver operating characteristic curve (ROC) value was 0.987. Based on SVM, the 10-fold cross-validation accuracy of eHSCpr in the independent dataset and the training dataset reached 94.84% and 94.19%, respectively. Importantly, we performed transcription analysis on the F-score gene set, which indeed further enriched the signal markers of HSCs development stages. eHSCPr can be a powerful tool for predicting early stages of HSCs development, facilitating hypothesis-driven experimental design and providing crucial clues for the in vitro blood regeneration studies. http://bioinfor.imu.edu.cn/ehscpr. Supplementary data are available at Bioinformatics online.",2021-02-03 +33904368,Multidimensional healthy life expectancy of the older population in China.,"Research on healthy life expectancy (HLE) that considers cognitive impairment has been inadequate, particularly in the context of less developed countries. Using data from the China Health and Retirement Longitudinal Study, our study fills this research gap by computing active life expectancy (ALE), cognitive-impairment-free life expectancy (CIFLE), and active and cognitive-impairment-free life expectancy (ACIFLE) for China's older population, using multistate life tables. Results show that at age 60, the three life expectancies were 19.4 years (ALE), 9.5 years (CIFLE), and 8.8 years (ACIFLE) during the period 2011-13. HLE exhibits significant differentials by sex, urban/rural residence, educational level, marital status, and health status at age 60. Among China's older people, males and those living in urban areas experience higher CIFLE, and those who live with a spouse, are more educated, and are healthy at age 60 expect more years in good health according to all three HLE measures.Supplementary material for this article is available at: https://doi.org/10.1080/00324728.2021.1914854.",2021-04-27 +33068114,NanoGalaxy: Nanopore long-read sequencing data analysis in Galaxy. ,"Long-read sequencing can be applied to generate very long contigs and even completely assembled genomes at relatively low cost and with minimal sample preparation. As a result, long-read sequencing platforms are becoming more popular. In this respect, the Oxford Nanopore Technologies-based long-read sequencing ""nanopore"" platform is becoming a widely used tool with a broad range of applications and end-users. However, the need to explore and manipulate the complex data generated by long-read sequencing platforms necessitates accompanying specialized bioinformatics platforms and tools to process the long-read data correctly. Importantly, such tools should additionally help democratize bioinformatics analysis by enabling easy access and ease-of-use solutions for researchers. The Galaxy platform provides a user-friendly interface to computational command line-based tools, handles the software dependencies, and provides refined workflows. The users do not have to possess programming experience or extended computer skills. The interface enables researchers to perform powerful bioinformatics analysis, including the assembly and analysis of short- or long-read sequence data. The newly developed ""NanoGalaxy"" is a Galaxy-based toolkit for analysing long-read sequencing data, which is suitable for diverse applications, including de novo genome assembly from genomic, metagenomic, and plasmid sequence reads. A range of best-practice tools and workflows for long-read sequence genome assembly has been integrated into a NanoGalaxy platform to facilitate easy access and use of bioinformatics tools for researchers. NanoGalaxy is freely available at the European Galaxy server https://nanopore.usegalaxy.eu with supporting self-learning training material available at https://training.galaxyproject.org.",2020-10-01 +26582919,EBI metagenomics in 2016--an expanding and evolving resource for the analysis and archiving of metagenomic data.,"EBI metagenomics (https://www.ebi.ac.uk/metagenomics/) is a freely available hub for the analysis and archiving of metagenomic and metatranscriptomic data. Over the last 2 years, the resource has undergone rapid growth, with an increase of over five-fold in the number of processed samples and consequently represents one of the largest resources of analysed shotgun metagenomes. Here, we report the status of the resource in 2016 and give an overview of new developments. In particular, we describe updates to data content, a complete overhaul of the analysis pipeline, streamlining of data presentation via the website and the development of a new web based tool to compare functional analyses of sequence runs within a study. We also highlight two of the higher profile projects that have been analysed using the resource in the last year: the oceanographic projects Ocean Sampling Day and Tara Oceans.",2015-11-17 +34353186,Associations of spousal and non-spousal caregiving with six-year trajectories of depressive symptoms among older women in the Caregiver-Study of Osteoporotic Fractures study.,"Objectives:Caregiving and becoming widowed are risk factors for depression in older adults, but few studies have examined their combined effect on depressive symptom trajectories. In a cohort of older women (mean age = 80.7 years) from the Caregiver-Study of Osteoporotic Fractures, we used latent class growth curve modeling to identify trajectories of depressive symptoms over approximately six years.Method:We used multinomial logistic regression to assess the relative odds of four depressive symptom trajectories (consistently low, consistently moderate, moderate/increasing, and consistently high), among three groups: spousal caregivers (n = 149), non-spousal caregivers (n = 157), and non-caregivers (n = 422). We also repeated this analysis with combined caregiving status and widowhood as the exposure.Results:Compared to non-caregivers, spousal caregivers had greater relative odds of consistently high versus consistently low depressive symptoms (adjusted odds ratio [aOR] = 3.6, 95% confidence interval [CI]: 1.9, 6.5). Non-spousal caregivers did not differ from non-caregivers in depressive trajectories. Compared to non-caregivers who did not become widowed, both widowed and non-widowed spousal caregivers had greater relative odds of consistently high versus consistently low depressive symptoms (aOR = 4.9, 95% CI: 1.9, 12.7 and aOR = 3.0, 95% CI: 1.5, 6.0, respectively). Non-widowed spousal caregivers, but not widowed spousal caregivers, had a non-statistically-significant trend toward increased relative odds of moderate/increasing depressive symptoms (aOR = 1.5, 95% CI: 0.7, 3.4).Conclusion:Spousal caregiving and widowhood, but not non-spousal caregiving, are associated with trajectories reflecting greater depressive symptoms over time. Informal caregiving is common among older women, and women caring for spouses should be monitored for depression, both during caregiving and after spousal loss.Supplemental data for this article can be accessed online at https://doi.org/10.1080/13607863.2021.1950611.",2021-08-06 +31551865,"Creative Flexibility Performance Is Neither Related to Anxiety, Nor to Self-Control Strength, Nor to Their Interaction.","Previous research has reliably found that self-control strength moderates the anxiety-performance relationship for cognitive and perceptual-motor tasks that involve executive functioning. In the present preregistered experiment (N = 200; https://aspredicted.org/a775h.pdf), we investigated whether the interaction of anxiety and self-control also predicts creative flexibility performance. According to the Attentional Control Theory, anxiety can impair executive functioning. In the case that creative flexibility relies on executive functions, anxiety should therefore interfere with creative flexibility performance. However, self-control strength has been demonstrated to serve as a buffer against the negative effects of anxiety on executive functioning. Therefore, we assumed that there will be a negative relationship between anxiety and creative flexibility performance, and that this negative relationship would be more pronounced for participants who are low compared to high in momentary self-control strength. Analogous to the previous studies, we manipulated the participants' self-control strength (ego depletion vs. no depletion) and subsequently induced a potentially threatening test situation. The participants then completed a measure of their state anxiety and a standardized test of creative flexibility. Contrary to our expectation, self-control strength, state anxiety, and their interaction did not predict creative flexibility performance. Complementary Bayesian hypothesis testing revealed strong support for the null hypothesis. Therefore, we conclude that, at least under certain conditions, creative flexibility performance may be unrelated to resource-dependent executive functions.",2019-08-28 +26989145,An integrative data analysis platform for gene set analysis and knowledge discovery in a data warehouse framework. ,"Data analysis is one of the most critical and challenging steps in drug discovery and disease biology. A user-friendly resource to visualize and analyse high-throughput data provides a powerful medium for both experimental and computational biologists to understand vastly different biological data types and obtain a concise, simplified and meaningful output for better knowledge discovery. We have previously developed TargetMine, an integrated data warehouse optimized for target prioritization. Here we describe how upgraded and newly modelled data types in TargetMine can now survey the wider biological and chemical data space, relevant to drug discovery and development. To enhance the scope of TargetMine from target prioritization to broad-based knowledge discovery, we have also developed a new auxiliary toolkit to assist with data analysis and visualization in TargetMine. This toolkit features interactive data analysis tools to query and analyse the biological data compiled within the TargetMine data warehouse. The enhanced system enables users to discover new hypotheses interactively by performing complicated searches with no programming and obtaining the results in an easy to comprehend output format. Database URL: http://targetmine.mizuguchilab.org.",2016-03-17 +29258817,A Completely Reimplemented MPI Bioinformatics Toolkit with a New HHpred Server at its Core.,"The MPI Bioinformatics Toolkit (https://toolkit.tuebingen.mpg.de) is a free, one-stop web service for protein bioinformatic analysis. It currently offers 34 interconnected external and in-house tools, whose functionality covers sequence similarity searching, alignment construction, detection of sequence features, structure prediction, and sequence classification. This breadth has made the Toolkit an important resource for experimental biology and for teaching bioinformatic inquiry. Recently, we replaced the first version of the Toolkit, which was released in 2005 and had served around 2.5 million queries, with an entirely new version, focusing on improved features for the comprehensive analysis of proteins, as well as on promoting teaching. For instance, our popular remote homology detection server, HHpred, now allows pairwise comparison of two sequences or alignments and offers additional profile HMMs for several model organisms and domain databases. Here, we introduce the new version of our Toolkit and its application to the analysis of proteins.",2017-12-16 +33355345,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline on Neuroablative Procedures for Patients With Cancer Pain.,"

Background

Managing cancer pain once it is refractory to conventional treatment continues to challenge caregivers committed to serving those who are suffering from a malignancy. Although neuromodulation has a role in the treatment of cancer pain for some patients, these therapies may not be suitable for all patients. Therefore, neuroablative procedures, which were once a mainstay in treating intractable cancer pain, are again on the rise. This guideline serves as a systematic review of the literature of the outcomes following neuroablative procedures.

Objective

To establish clinical practice guidelines for the use of neuroablative procedures to treat patients with cancer pain.

Methods

A systematic review of neuroablative procedures used to treat patients with cancer pain from 1980 to April 2019 was performed using the United States National Library of Medicine PubMed database, EMBASE, and Cochrane CENTRAL. After inclusion criteria were established, full text articles that met the inclusion criteria were reviewed by 2 members of the task force and the quality of the evidence was graded.

Results

In total, 14 646 relevant abstracts were identified by the literature search, from which 189 met initial screening criteria. After full text review, 58 of the 189 articles were included and subdivided into 4 different clinical scenarios. These include unilateral somatic nociceptive/neuropathic body cancer pain, craniofacial cancer pain, midline subdiaphragmatic visceral cancer pain, and disseminated cancer pain. Class II and III evidence was available for these 4 clinical scenarios. Level III recommendations were developed for the use of neuroablative procedures to treat patients with cancer pain.

Conclusion

Neuroablative procedures may be an option for treating patients with refractory cancer pain. Serious adverse events were reported in some studies, but were relatively uncommon. Improved imaging, refinements in technique and the availability of new lesioning modalities may minimize the risks of neuroablation even further.The full guidelines can be accessed at https://www.cns.org/guidelines/browse-guidelines-detail/guidelines-on-neuroablative-procedures-patients-wi.",2021-02-01 +33482697,A vasculature-centric approach to developing novel treatment options for glioblastoma.,"Introduction: Glioblastoma is invariably deadly and is characterized by extensive vascularization and macrophage-dominant immunosuppression; nevertheless, anti-angiogenesis has so far failed to prolong overall survival of patients. Regardless of the problems in clinical development, the rationale for the application of anti-angiogenics in glioblastoma remains.Areas covered: Resistance to anti-angiogenics is discussed, including vessel co-option and amplification of hypoxic signaling in response to vessel destruction. The modulation of GSC and tumor-associated macrophages by dysfunctional tumor vessels and by hypoxia are outlined. Pharmacologic approaches to sensitizing glioblastomas to anti-angiogenics and evidence for the cooperation of anti-angiogenics with immunotherapies are summarized. Database search: https://pubmed.ncbi.nlm.nih.gov prior to December 12, 2020.Expert opinion: Despite drawbacks in the clinical development of vascular endothelial growth factor A (VEGF)-targeted agents, there is still rationale for the use of anti-angiogenics. The better understanding of vascular co-option and adverse effects of blood vessel destruction guides to improve strategies for vascular targeting. The pivotal role of the vasculature and of angiogenic factors such as VEGF for the induction and maintenance of immunosuppression in glioblastoma supports the use of anti-angiogenics in combination with immunotherapy. Proinflammatory repolarization of perivascular and perinecrotic tumor-associated macrophages is probably paramount for overcoming treatment resistance to virtually any treatment.",2021-02-01 +33523580,Assessment of the anti-rheumatoid arthritis activity of Gastrodia elata (tian-ma) and Radix aconitic lateralis preparata (fu-zi) via network pharmacology and untargeted metabolomics analyses.,"

Aim

Gastrodia elata and Radix aconiti lateralis preparrata are respectively named as Tian-Ma and Fu-Zi (TF) in Chinese. We explored the active components against rheumatoid arthritis (RA) from an extensively used couplet of Chinese herbs, Gastrodia elata and Radix aconiti lateralis preparata (TF) via untargeted metabolomics and network pharmacological approaches.

Methods

Water extracts of TF were mixed at ratios 1:1, 3:2 and 2:3 (w/w). Ultra-performance liquid chromatography/tandem mass spectrometry (UPLC-MS/MS) was then utilized as metabolomics screening. Human Metabolome (http://www.hmdb.ca/) and Lipidmaps (http://www.lipidmaps.org/) databases were used to annotate detected compounds. Further identification of vital genes and important pathways associated with the anti-RA properties of the TF preparations was done via network pharmacology, and verified by real-time quantitative polymerase chain reaction (RT-qPCR).

Results

Four key compounds involved in unsaturated fatty acid biosynthesis and isoflavonoid biosynthesis were identified through metabolomics analyses. Three key components of TF associated with anti-RA activity were linoleic acid, daidzein, and daidzin. Results of RT-qPCR revealed that all 3 tested TF couplets (1:1, 3:2, and 2:3) markedly suppressed the transcription of PTGS2. These results were consistent with our network pharmacological predictions.

Conclusions

The anti-RA properties of Tian-Ma and Fu-Zi are associated with the inhibition of arachidonic acid metabolism pathway.",2021-02-01 +32737813,FANCat: French affective norms for ten emotional categories.,"The present study develops key research for French word norms that combines the predominant theories of dimensional and discrete (or categorical) emotions. As a result, we provide the database FANCat, affective norms for a set of 1031 French words on ten discrete emotion categories: fear, anger, disgust, sadness, anxiety, awe, excitement, contentment, amusement, and serenity. FANCat complements a previous word set, FAN, which provides only the dimensional norms, valence, and arousal (Monnier & Syssau, 2014). Herein, we introduce five discrete positive emotions in efforts to differentiate positive emotions at higher resolution and specificity. Although ten emotional categories were considered in FANCat norms, results showed a high degree of inter-rater reliability and a good external validity. Then, distributional analyses of words into the ten emotion categories revealed that positive words evoked mainly the emotions awe, contentment, and amusement, and principally evoked either one positive emotion only (""pure"" words) or two (mixed words). This study contributes to a deeper understanding of the relationship between language, and negative and positive emotions. It is also currently the only norms database in French that analyses ten discrete emotions as well as including valence and arousal. FANCat is available at https://www.researchgate.net/publication/338622765_FANCat_database .",2021-02-01 +33850986,"Data on the geology and structure of the Copper Cliff embayment and offset dyke, Sudbury Igneous Complex, Canada.","This contribution describes maps of the Copper Cliff Embayment (CCE) and Offset (CCO) dyke. The associated study attempts to unravel the mode of melt emplacement and the role of pre-impact faults in the deformation of the southern part of the Sudbury Igneous Complex (SIC). This contribution summarizes field observations (maps and images) and structural measurements. In addition, perspective views of the 3D Move model of the CCE and CCO dyke are provided. This data can be used by researchers and exploration geologists working in the Sudbury mining camp as a basis for future mapping, research and exploration efforts in the Copper Cliff area. This article is a co-submission to the following article: L. Mathieu, U. Riller, L. Gibson, P. Lightfoot (2021) Structural controls on the localization of the mineralized Copper Cliff embayment and the Copper Cliff offset dyke, Sudbury Igneous Complex, Canada, Ore Geol. Rev., https://doi.org/10.1016/j.oregeorev.2021.104071.",2021-03-16 +34151658,Some Bryophytes Trigger Cytotoxicity of Stem Cell-like Population in 5-Fluorouracil Resistant Colon Cancer Cells.,"Colorectal cancer is the third most common cancer worldwide. Cancer stem cells are known to play an important role in relapse, and metastases of the disease after chemotherapy. Investigation of new drugs, and their combinations targeting these cells and thus eliminating cancer is one of the most urgent needs of today's chemotherapy. The aim of the present study was to evaluate the effects of Bryophytes like Abietinella abietina (AA), Homolothecium sericeum (HS), Tortella tortuosa (TT), Syntrichia ruralis (SR), and Bryoerythrophyllum rubrum (BR) species extracted with ethyl alcohol on 5-fluorouracil(5-FU) resistant colorectal cancer cell lines (HCT116 and HT29). After extraction, stock solutions of bryophytes were prepared, and IC50 values were detected in drug-resistant cells obtained with 5-FU application. CD24+, CD44+/CD133+ surface markers and P-glycoprotein (P-gp) mediated efflux were isolated from both 5-FU treated cells and analyzed using the flow cytometry. In all bryophyte-treated groups, the binding Rho123low (low Rho fluorescence) and Rhohigh (high Rho fluorescence) were sorted from 5-FU resistant HCT116, and HT-29 cells. All types of bryophytes were found cytotoxic. Bryophyte extract reduced the percentage of Rholow cells in cultures incubated with 5-FU. In summary, the implementation of these bryophytes might be regarded as an effective approach for treatment of colorectal cancer due to their cytotoxic effect that decreases the recurrence of the disease.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1933098.",2021-06-20 +32255260,Reverse protein engineering of a novel 4-domain copper nitrite reductase reveals functional regulation by protein-protein interaction.,"Cu-containing nitrite reductases that convert NO2 - to NO are critical enzymes in nitrogen-based energy metabolism. Among organisms in the order Rhizobiales, we have identified two copies of nirK, one encoding a new class of 4-domain CuNiR that has both cytochrome and cupredoxin domains fused at the N terminus and the other, a classical 2-domain CuNiR (Br2D NiR). We report the first enzymatic studies of a novel 4-domain CuNiR from Bradyrhizobium sp. ORS 375 (BrNiR), its genetically engineered 3- and 2-domain variants, and Br2D NiR revealing up to ~ 500-fold difference in catalytic efficiency in comparison with classical 2-domain CuNiRs. Contrary to the expectation that tethering would enhance electron delivery by restricting the conformational search by having a self-contained donor-acceptor system, we demonstrate that 4-domain BrNiR utilizes N-terminal tethering for downregulating enzymatic activity instead. Both Br2D NiR and an engineered 2-domain variant of BrNiR (Δ(Cytc-Cup) BrNiR) have 3 to 5% NiR activity compared to the well-characterized 2-domain CuNiRs from Alcaligenes xylosoxidans (AxNiR) and Achromobacter cycloclastes (AcNiR). Structural comparison of Δ(Cytc-Cup) BrNiR and Br2D NiR with classical 2-domain AxNiR and AcNiR reveals structural differences of the proton transfer pathway that could be responsible for the lowering of activity. Our study provides insights into unique structural and functional characteristics of naturally occurring 4-domain CuNiR and its engineered 3- and 2-domain variants. The reverse protein engineering approach utilized here has shed light onto the broader question of the evolution of transient encounter complexes and tethered electron transfer complexes. ENZYME: Copper-containing nitrite reductase (CuNiR) (EC 1.7.2.1). DATABASE: The atomic coordinate and structure factor of Δ(Cytc-Cup) BrNiR and Br2D NiR have been deposited in the Protein Data Bank (http://www.rcsb.org/) under the accession code 6THE and 6THF, respectively.",2020-04-28 +31853227,GBDTCDA: Predicting circRNA-disease Associations Based on Gradient Boosting Decision Tree with Multiple Biological Data Fusion.,"Circular RNA (circRNA) is a closed-loop structural non-coding RNA molecule which plays a significant role during the gene regulation processes. There are many previous studies shown that circRNAs can be regarded as the sponges of miRNAs. Thus, circRNA is also a key point for disease diagnosing, treating and inferring. However, traditional experimental approaches to verify the associations between the circRNA and disease are time-consuming and money-consuming. There are few computational models to predict potential circRNA-disease associations, which become our motivation to propose a new computational model. In this study, we propose a machine learning based computational model named Gradient Boosting Decision Tree with multiple biological data to predict circRNA-disease associations (GBDTCDA). The known circRNA-disease associations' data are downloaded from cricR2Disease database (http://bioinfo.snnu.edu.cn/CircR2Disease/). The feature vector of each circRNA-disease association pair is composed of four parts, which are the statistics information of different biological networks, the graph theory information of different biological networks, circRNA-disease associations' network information and circRNA nucleotide sequence information, respectively. Therefore, we use those feature vectors to train the gradient boosting decision tree regression model. Then, the leave one out cross validation (LOOCV) is adopted to evaluate the performance of our computational model. As for predicting some common diseases related circRNAs, our method GBDTCDA also obtains the better results. The Area under the ROC Curve (AUC) values of Basal cell carcinoma, Non-small cell lung cancer and cervical cancer are 95.8%, 88.3% and 93.5%, respectively. For further illustrating the performance of GBDTCDA, a case study of breast cancer is also supplemented in this study. Thus, our proposed method GBDTCDA is a powerful tool to predict potential circRNA-disease associations based on experimental results and analyses.",2019-11-08 +32838752,"Machine learning models predicting multidrug resistant urinary tract infections using ""DsaaS"".","

Background

The scope of this work is to build a Machine Learning model able to predict patients risk to contract a multidrug resistant urinary tract infection (MDR UTI) after hospitalization. To achieve this goal, we used different popular Machine Learning tools. Moreover, we integrated an easy-to-use cloud platform, called DSaaS (Data Science as a Service), well suited for hospital structures, where healthcare operators might not have specific competences in using programming languages but still, they do need to analyze data as a continuous process. Moreover, DSaaS allows the validation of data analysis models based on supervised Machine Learning regression and classification algorithms.

Results

We used DSaaS on a real antibiotic stewardship dataset to make predictions about antibiotic resistance in the Clinical Pathology Operative Unit of the Principe di Piemonte Hospital in Senigallia, Marche, Italy. Data related to a total of 1486 hospitalized patients with nosocomial urinary tract infection (UTI). Sex, age, age class, ward and time period, were used to predict the onset of a MDR UTI. Machine Learning methods such as Catboost, Support Vector Machine and Neural Networks were utilized to build predictive models. Among the performance evaluators, already implemented in DSaaS, we used accuracy (ACC), area under receiver operating characteristic curve (AUC-ROC), area under Precision-Recall curve (AUC-PRC), F1 score, sensitivity (SEN), specificity and Matthews correlation coefficient (MCC). Catboost exhibited the best predictive results (MCC 0.909; SEN 0.904; F1 score 0.809; AUC-PRC 0.853, AUC-ROC 0.739; ACC 0.717) with the highest value in every metric.

Conclusions

the predictive model built with DSaaS may serve as a useful support tool for physicians treating hospitalized patients with a high risk to acquire MDR UTIs. We obtained these results using only five easy and fast predictors accessible for each patient hospitalization. In future, DSaaS will be enriched with more features like unsupervised Machine Learning techniques, streaming data analysis, distributed calculation and big data storage and management to allow researchers to perform a complete data analysis pipeline. The DSaaS prototype is available as a demo at the following address: https://dsaas-demo.shinyapps.io/Server/.",2020-08-21 +31114876,"BEERE: a web server for biomedical entity expansion, ranking and explorations.","BEERE (Biomedical Entity Expansion, Ranking and Explorations) is a new web-based data analysis tool to help biomedical researchers characterize any input list of genes/proteins, biomedical terms or their combinations, i.e. 'biomedical entities', in the context of existing literature. Specifically, BEERE first aims to help users examine the credibility of known entity-to-entity associative or semantic relationships supported by database or literature references from the user input of a gene/term list. Then, it will help users uncover the relative importance of each entity-a gene or a term-within the user input by computing the ranking scores of all entities. At last, it will help users hypothesize new gene functions or genotype-phenotype associations by an interactive visual interface of constructed global entity relationship network. The output from BEERE includes: a list of the original entities matched with known relationships in databases; any expanded entities that may be generated from the analysis; the ranks and ranking scores reported with statistical significance for each entity; and an interactive graphical display of the gene or term network within data provenance annotations that link to external data sources. The web server is free and open to all users with no login requirement and can be accessed at http://discovery.informatics.uab.edu/beere/.",2019-07-01 +33529731,Deep learning for the radiographic diagnosis of proximal femur fractures: Limitations and programming issues.,"

Introduction

Radiology is one of the domains where artificial intelligence (AI) yields encouraging results, with diagnostic accuracy that approaches that of experienced radiologists and physicians. Diagnostic errors in traumatology are rare but can have serious functional consequences. Using AI as a radiological diagnostic aid may be beneficial in the emergency room. Thus, an effective, low-cost software that helps with making radiographic diagnoses would be a relevant tool for current clinical practice, although this concept has rarely been evaluated in orthopedics for proximal femur fractures (PFF). This led us to conduct a prospective study with the goals of: 1) programming deep learning software to help make the diagnosis of PFF on radiographs and 2) to evaluate its performance.

Hypothesis

It is possible to program an effective deep learning software to help make the diagnosis of PFF based on a limited number of radiographs.

Methods

Our database consisted of 1309 radiographs: 963 had a PFF, while 346 did not. The sample size was increased 8-fold (resulting in 10,472 radiographs) using a validated technique. Each radiograph was evaluated by an orthopedic surgeon using RectLabel™ software (https://rectlabel.com), by differentiating between healthy and fractured zones. Fractures were classified according to the AO system. The deep learning algorithm was programmed on Tensorflow™ software (Google Brain, Santa Clara, Ca, USA, tensorflow.org). In all, 9425 annotated radiographs (90%) were used for the training phase and 1074 (10%) for the test phase.

Results

The sensitivity of the algorithm was 61% for femoral neck fractures and 67% for trochanteric fractures. The specificity was 67% and 69%, the positive predictive value was 55% and 56%, while the negative predictive value was 74% and 78%, respectively.

Conclusion

Our results are not good enough for our algorithm to be used in current clinical practice. Programming of deep learning software with sufficient diagnostic accuracy can only be done with several tens of thousands of radiographs, or by using transfer learning.

Level of evidence

III; Diagnostic studies, Study of nonconsecutive patients, without consistently applied reference ""gold"" standard.",2021-01-30 +31524396,HybridMolDB: A Manually Curated Database Dedicated to Hybrid Molecules for Chemical Biology and Drug Discovery.,"Hybrid-molecule-based drug design is the combination of two or more bioactive molecules into a single chemical entity. This strategy may be used to achieve better affinity and efficacy or improved properties compared with the parent molecules, to interact with two or multiple targets, to reduce undesirable side effects, to decrease drug-drug interactions, or to reduce the emergence of drug resistance. The approach offers the prospect of better drugs for the treatment of many human diseases. Research activity in this area is increasing and has attracted many practitioners worldwide. To accelerate the design and discovery of new hybrid-molecule-based drugs, it is essential to properly collect and annotate experimental data obtained from known hybrid molecules. To address this need, we have developed HybridMolDB ( http://www.idruglab.com/HybridMolDB/index.php ), a manually curated database dedicated to hybrid molecules for chemical biology and drug discovery. It contains structures, manually annotated design protocols, pharmacological data, some physicochemical properties, ligand efficiency, drug-likeness, and ADMET characteristics, and the biological targets of known hybrid molecules. HybridMolDB supports a range of query types, including searches by text, protein sequence, chemical structure similarity, and property ranges. The database serves as an open source facilitating the development and/or optimization of related in silico tools for the design and discovery of hybrid-molecule-based drugs and chemical probes.",2019-09-25 +30107613,Deep genome annotation of the opportunistic human pathogen Streptococcus pneumoniae D39.,"A precise understanding of the genomic organization into transcriptional units and their regulation is essential for our comprehension of opportunistic human pathogens and how they cause disease. Using single-molecule real-time (PacBio) sequencing we unambiguously determined the genome sequence of Streptococcus pneumoniae strain D39 and revealed several inversions previously undetected by short-read sequencing. Significantly, a chromosomal inversion results in antigenic variation of PhtD, an important surface-exposed virulence factor. We generated a new genome annotation using automated tools, followed by manual curation, reflecting the current knowledge in the field. By combining sequence-driven terminator prediction, deep paired-end transcriptome sequencing and enrichment of primary transcripts by Cappable-Seq, we mapped 1015 transcriptional start sites and 748 termination sites. We show that the pneumococcal transcriptional landscape is complex and includes many secondary, antisense and internal promoters. Using this new genomic map, we identified several new small RNAs (sRNAs), RNA switches (including sixteen previously misidentified as sRNAs), and antisense RNAs. In total, we annotated 89 new protein-encoding genes, 34 sRNAs and 165 pseudogenes, bringing the S. pneumoniae D39 repertoire to 2146 genetic elements. We report operon structures and observed that 9% of operons are leaderless. The genome data are accessible in an online resource called PneumoBrowse (https://veeninglab.com/pneumobrowse) providing one of the most complete inventories of a bacterial genome to date. PneumoBrowse will accelerate pneumococcal research and the development of new prevention and treatment strategies.",2018-11-01 +34017114,Erectile dysfunction and prostate diseases are the predominant Google search terms amongst men's health topics.,"Patients are becoming increasingly active consumers of health information on the internet with urologic concerns being no exception. Our objective was to explore online search trends for topics related to men's health and identify information-seeking patterns related to news and media coverage of these topics. We used Google Trends ( http://google.com/trends ) to explore search trends for various search terms related to men's health in the United States over a 5-year period. Search queries provided graphs depicting search volume as a function of time, geographical data, and related topics and queries. Isolated spikes in search volume were further explored to identify a related event. Erectile dysfunction was the most-searched topic over the last 5 years in the United States. Prostate cancer and benign prostatic hyperplasia were the second and third most-searched topics, respectively. Other popular topics involved symptoms or pathologies of the testicles and penis. Most topics had relatively stable search volumes, with the exceptions of premature ejaculation and Peyronie's disease. Several observed spikes in search volume were attributable to singular events, mostly in the form of online article publications or social media posts. We believe it may be helpful for providers to stay informed of cultural events relating to medical conditions to anticipate patient concerns.",2021-05-20 +29136092,microRPM: a microRNA prediction model based only on plant small RNA sequencing data.,"Motivation:MicroRNAs (miRNAs) are endogenous non-coding small RNAs (of about 22 nucleotides), which play an important role in the post-transcriptional regulation of gene expression via either mRNA cleavage or translation inhibition. Several machine learning-based approaches have been developed to identify novel miRNAs from next generation sequencing (NGS) data. Typically, precursor/genomic sequences are required as references for most methods. However, the non-availability of genomic sequences is often a limitation in miRNA discovery in non-model plants. A systematic approach to determine novel miRNAs without reference sequences is thus necessary. Results:In this study, an effective method was developed to identify miRNAs from non-model plants based only on NGS datasets. The miRNA prediction model was trained with several duplex structure-related features of mature miRNAs and their passenger strands using a support vector machine algorithm. The accuracy of the independent test reached 96.61% and 93.04% for dicots (Arabidopsis) and monocots (rice), respectively. Furthermore, true small RNA sequencing data from orchids was tested in this study. Twenty-one predicted orchid miRNAs were selected and experimentally validated. Significantly, 18 of them were confirmed in the qRT-PCR experiment. This novel approach was also compiled as a user-friendly program called microRPM (miRNA Prediction Model). Availability and implementation:This resource is freely available at http://microRPM.itps.ncku.edu.tw. Contact:nslin@sinica.edu.tw or sarah321@mail.ncku.edu.tw. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +33079985,SeqEditor: an application for primer design and sequence analysis with or without GTF/GFF files.,"

Motivation

Sequence analyses oriented to investigate specific features, patterns and functions of protein and DNA/RNA sequences usually require tools based on graphic interfaces whose main characteristic is their intuitiveness and interactivity with the user's expertise, especially when curation or primer design tasks are required. However, interface-based tools usually pose certain computational limitations when managing large sequences or complex datasets, such as genome and transcriptome assemblies. Having these requirments in mind we have developed SeqEditor an interactive software tool for nucleotide and protein sequences' analysis.

Result

SeqEditor is a cross-platform desktop application for the analysis of nucleotide and protein sequences. It is managed through a Graphical User Interface and can work either as a graphical sequence browser or as a fasta task manager for multi-fasta files. SeqEditor has been optimized for the management of large sequences, such as contigs, scaffolds or even chromosomes, and includes a GTF/GFF viewer to visualize and manage annotation files. In turn, this allows for content mining from reference genomes and transcriptomes with similar efficiency to that of command line tools. SeqEditor also incorporates a set of tools for singleplex and multiplex PCR primer design and pooling that uses a newly optimized and validated search strategy for target and species-specific primers. All these features make SeqEditor a flexible application that can be used to analyses complex sequences, design primers in PCR assays oriented for diagnosis, and/or manage, edit and personalize reference sequence datasets.

Availabilityand implementation

SeqEditor was developed in Java using Eclipse Rich Client Platform and is publicly available at https://gpro.biotechvana.com/download/SeqEditor as binaries for Windows, Linux and Mac OS. The user manual and tutorials are available online at https://gpro.biotechvana.com/tool/seqeditor/manual.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-07-01 +33514746,"Nelumbo genome database, an integrative resource for gene expression and variants of Nelumbo nucifera.","Sacred lotus (Nelumbo nucifera, or lotus) is one of the most widely grown aquatic plant species with important uses, such as in water gardening and in vegetable and herbal medicine. A public genomic database of lotus would facilitate studies of lotus and other aquatic plant species. Here, we constructed an integrative database: the Nelumbo Genome Database (NGD, http://nelumbo.biocloud.net ). This database is a collection of the most updated lotus genome assembly and contains information on both gene expression in different tissues and coexpression networks. In the NGD, we also integrated genetic variants and key traits from our 62 newly sequenced lotus cultivars and 26 previously reported cultivars, which are valuable for lotus germplasm studies. As applications including BLAST, BLAT, Primer, Annotation Search, Variant and Trait Search are deployed, users can perform sequence analyses and gene searches via the NGD. Overall, the valuable genomic resources provided in the NGD will facilitate future studies on population genetics and molecular breeding of lotus.",2021-01-29 +33626204,Machine learning-based analysis of alveolar and vascular injury in SARS-CoV-2 acute respiratory failure.,"Severe acute respiratory syndrome-coronavirus-2 (SARS-CoV-2) pneumopathy is characterized by a complex clinical picture and heterogeneous pathological lesions, both involving alveolar and vascular components. The severity and distribution of morphological lesions associated with SARS-CoV-2 and how they relate to clinical, laboratory, and radiological data have not yet been studied systematically. The main goals of the present study were to objectively identify pathological phenotypes and factors that, in addition to SARS-CoV-2, may influence their occurrence. Lungs from 26 patients who died from SARS-CoV-2 acute respiratory failure were comprehensively analysed. Robust machine learning techniques were implemented to obtain a global pathological score to distinguish phenotypes with prevalent vascular or alveolar injury. The score was then analysed to assess its possible correlation with clinical, laboratory, radiological, and tissue viral data. Furthermore, an exploratory random forest algorithm was developed to identify the most discriminative clinical characteristics at hospital admission that might predict pathological phenotypes of SARS-CoV-2. Vascular injury phenotype was observed in most cases being consistently present as pure form or in combination with alveolar injury. Phenotypes with more severe alveolar injury showed significantly more frequent tracheal intubation; longer invasive mechanical ventilation, illness duration, intensive care unit or hospital ward stay; and lower tissue viral quantity (p < 0.001). Furthermore, in this phenotype, superimposed infections, tumours, and aspiration pneumonia were also more frequent (p < 0.001). Random forest algorithm identified some clinical features at admission (body mass index, white blood cells, D-dimer, lymphocyte and platelet counts, fever, respiratory rate, and PaCO2 ) to stratify patients into different clinical clusters and potential pathological phenotypes (a web-app for score assessment has also been developed; https://r-ubesp.dctv.unipd.it/shiny/AVI-Score/). In SARS-CoV-2 positive patients, alveolar injury is often associated with other factors in addition to viral infection. Identifying phenotypical patterns at admission may enable a better stratification of patients, ultimately favouring the most appropriate management. © 2021 The Pathological Society of Great Britain and Ireland. Published by John Wiley & Sons, Ltd.",2021-03-30 +31335342,Weekend Admission to Inpatient Rehabilitation Facilities Is Associated With Transfer to Acute Care in a Nationwide Sample of Patients With Stroke.,"

Objective

The aim of the study was to determine the impact of weekend versus weekday admission to an inpatient rehabilitation facility on the risk of acute care transfer in patients with stroke.

Design

This was a retrospective analysis using the Uniform Data System for Medical Rehabilitation, a national database comprising data from 70% of US inpatient rehabilitation facilities. A total of 1,051,436 adult (age ≥18 yrs) stroke cases were identified between 2002 and 2014 that met inclusion criteria. Logistic regression models were developed to test for associations between weekend (Friday-Sunday) versus weekday (Monday-Thursday) inpatient rehabilitation facility admission and transfer to acute care (primary outcome) and inpatient rehabilitation facility length of stay (secondary outcome), adjusting for relevant patient, medical, and facility variables. A secondary analysis examined acute care transfer from 2002 to 2009 before passage of the Affordable Care Act (ACA), 2010 to 2012 post-Affordable Care Act, and 2013 to 2014 after implementation of the Hospital Readmissions Reduction Program.

Results

Weekend inpatient rehabilitation facility admission was associated with increased odds of acute care transfer (odds ratio = 1.06, 95% confidence interval = 1.04-1.08) and slightly shorter inpatient rehabilitation facility length of stay (P < 0.001). Overall, the risk of acute care transfer decreased after the ACA and Hospital Readmissions Reduction Program.

Conclusions

Weekend admission to inpatient rehabilitation facility may pose a modest increase in the risk of transfer to acute care in patients with stroke.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) Understand disparities in obesity rates among adolescents with mobility disabilities; (2) Describe limitations of current clinical screening methods of obesity in children with mobility disabilities; and (3) Identify potential alternatives for obesity screening in children with mobility disabilities.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2020-01-01 +34192986,Low Abundance of Lactococcus lactis in Human Colorectal Cancer Is Associated with Decreased Natural Killer Cells.,"A limited number of studies have demonstrated the role of Lactococcus lactis (L. lactis) in human colorectal cancers (CRCs). The association of L. lactis abundance with the density of natural killer (NK) cells has not been investigated before. In this study, the L. lactis abundance in 60 CRC specimens, 20 adenoma (AD) specimens, and 29 normal colorectal tissues (NCs) specimens was investigated using the fluorescence in situ hybridization of 16S ribosomal RNA. The density of NK cells was detected using immunofluorescence in 28 CRC specimens, 12 AD specimens, and 22 NC specimens. The presence of L. lactis in NCs (48.28%) was detected significantly higher than that in the AD (20.00%, P = .044) and CRC (23.33%, P = .018) specimens. The abundance of L. lactis in NCs (32.73 ± 7.24) was also found to be significantly higher than that in AD (8.91 ± 5.89, P = .029) and CRC (5.63 ± 1.67, P = .003) specimens. In addition, the density of NKp30+ NK cells in NCs (51.14 ± 4.84) was significantly higher than that in the AD (6.10 ± 1.31) and CRC (1.72 ± 0.40) specimens (P < .001). Moreover, a positive association of L. lactis abundance with NKp30+ NK cells density in the colorectal samples (P < .001) was observed. The low abundance of L. lactis in the CRC tissues was associated with the decreased NK cells, which suggested that this might contribute to the progression of CRC by decreasing the number of NK cells.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1944649.",2021-06-30 +31417899,Ultra-Low-Cost 3D Bioprinting: Modification and Application of an Off-the-Shelf Desktop 3D-Printer for Biofabrication.,"3D bioprinting has become a versatile and powerful method in tissue engineering and regenerative medicine and is increasingly adapted by other disciplines due to its tremendous potential beyond its typical applications. However, commercially available 3D bioprinting systems are typically expensive circumventing the broad implementation, including laboratories in low-resource settings. To address the limitations of conventional and commercially available technology, we developed a 3D bioprinter by modification of an off-the-shelf 3D desktop printer, that can be installed within a single day, is of handy size to fit into a standard laminar flow hood, customizable, ultra-low cost and thus, affordable to a broad range of research labs, or educational institutions. We evaluate accuracy and reproducibility of printing results using alginate and alginate/gelatin-hydrogels and demonstrate its potential for biomedical use by printing of various two-and three-dimensional cell-free and mammalian cell-laden objects using recombinant HEKYFP cells, stably expressing yellow fluorescent protein (YFP) as a model system and high-content imaging. We further provide a parts list and 3D design files in STL and STEP format for reconstructing the device. A time-lapse video of the custom-built device during operation is available at https://vimeo.com/274482794.",2019-07-31 +27794041,NGSmethDB 2017: enhanced methylomes and differential methylation.,"The 2017 update of NGSmethDB stores whole genome methylomes generated from short-read data sets obtained by bisulfite sequencing (WGBS) technology. To generate high-quality methylomes, stringent quality controls were integrated with third-part software, adding also a two-step mapping process to exploit the advantages of the new genome assembly models. The samples were all profiled under constant parameter settings, thus enabling comparative downstream analyses. Besides a significant increase in the number of samples, NGSmethDB now includes two additional data-types, which are a valuable resource for the discovery of methylation epigenetic biomarkers: (i) differentially methylated single-cytosines; and (ii) methylation segments (i.e. genome regions of homogeneous methylation). The NGSmethDB back-end is now based on MongoDB, a NoSQL hierarchical database using JSON-formatted documents and dynamic schemas, thus accelerating sample comparative analyses. Besides conventional database dumps, track hubs were implemented, which improved database access, visualization in genome browsers and comparative analyses to third-part annotations. In addition, the database can be also accessed through a RESTful API. Lastly, a Python client and a multiplatform virtual machine allow for program-driven access from user desktop. This way, private methylation data can be compared to NGSmethDB without the need to upload them to public servers. Database website: http://bioinfo2.ugr.es/NGSmethDB.",2016-10-27 +31292807,Bioactivity-explorer: a web application for interactive visualization and exploration of bioactivity data.,"To better leverage the accumulated bioactivity data in the ChEMBL database, we have developed Bioactivity-explorer, a web application for interactive visualization and exploration of the large-scale bioactivity data in ChEMBL. Mining and integration of the Therapeutic Target Database disease-target mapping into the ChEMBL database has enabled Bioactivity-explorer to include 493,430 scaffolds, 31,400,000 matched molecular pairs, 1330,220 target-target interactions in terms of shared active compounds, 4526,718 target-target interactions in terms of shared active scaffolds, 97,041,700 molecule-molecule interactions and 14,974 disease-target mappings. This web tool is available at http://cadd.pharmacy.nankai.edu.cn/b17r . The source codes of the front end and back end, released under MIT license, can be found at GitHub.",2019-07-10 +34216777,Editorial: Toward Neurobiological-Based Treatments of Depression and Anxiety: A Potential Case for the Nucleus Accumbens.,"Depression and anxiety disorders together account for the majority of mental health disorders in childhood and adolescence, and are often comorbid.1 The frequent co-occurrence of these disorders has motivated clinicians and researchers to consider dimensional taxonomy models that focus on neurobiological substrates that explain transdiagnostic constructs of functioning (eg, reward processing abnormalities). Such an approach would redefine not only depression and anxiety disorders but could also revolutionize clinical care, as such biobehavioral targets, rather than a traditional primary diagnosis, could serve as the basis for treatment planning. In this issue of the Journal, Auerbach et al.2 examined whether and how a key structure involved in reward processing, the nucleus accumbens (NAcc), is altered in adolescents aged 14 to 17 years with depression and/or anxiety (including generalized anxiety, separation anxiety, social anxiety, specific phobia, agoraphobia, and panic) disorders, and whether NAcc morphometry and function would improve prediction of 6-month symptomatology. As part of the Boston Adolescent Neuroimaging of Depression and Anxiety (BANDA) initiative,3 the researchers compared 129 adolescents with primary diagnoses of depression and/or anxiety and 64 psychiatrically healthy controls on gray matter volumes of the NAcc and on functional activation of the NAcc during a monetary incentive delay task using magnetic resonance imaging (MRI) protocols harmonized with the Human Connectome project (http://www.humanconnectomeproject.com/). Compared to healthy adolescents, depressed/anxious adolescents exhibited significantly smaller volumes of the NAcc and blunted NAcc responses to reward receipt. Among the 88 depressed/anxious adolescents and 57 healthy controls who provided symptom data 6 months later, the researchers also found that inclusion of NAcc volumes, but not reward-related responses of the NAcc on the task, significantly improved statistical prediction of subsequent depression symptoms.",2021-06-30 +33968128,The Molecular Functions of MeCP2 in Rett Syndrome Pathology.,"MeCP2 protein, encoded by the MECP2 gene, binds to DNA and affects transcription. Outside of this activity the true range of MeCP2 function is still not entirely clear. As MECP2 gene mutations cause the neurodevelopmental disorder Rett syndrome in 1 in 10,000 female births, much of what is known about the biologic function of MeCP2 comes from studying human cell culture models and rodent models with Mecp2 gene mutations. In this review, the full scope of MeCP2 research available in the NIH Pubmed (https://pubmed.ncbi.nlm.nih.gov/) data base to date is considered. While not all original research can be mentioned due to space limitations, the main aspects of MeCP2 and Rett syndrome research are discussed while highlighting the work of individual researchers and research groups. First, the primary functions of MeCP2 relevant to Rett syndrome are summarized and explored. Second, the conflicting evidence and controversies surrounding emerging aspects of MeCP2 biology are examined. Next, the most obvious gaps in MeCP2 research studies are noted. Finally, the most recent discoveries in MeCP2 and Rett syndrome research are explored with a focus on the potential and pitfalls of novel treatments and therapies.",2021-04-23 +35424427,DiaNat-DB: a molecular database of antidiabetic compounds from medicinal plants.,"Natural products are an invaluable source of molecules with a large variety of biological activities. Interest in natural products in drug discovery is documented in an increasing number of publications of bioactive secondary metabolites. Among those, medicinal plants are one of the most studied for this endeavor. An ever thriving area of opportunity within the field concerns the discovery of antidiabetic natural products. As a result, a vast amount of secondary metabolites are isolated from medicinal plants used against diabetes mellitus but whose information has not been organized systematically yet. Several research articles enumerate antidiabetic compounds, but the lack of a chemical database for antidiabetic metabolites limits their application in drug development. In this work, we present DiaNat-DB, a comprehensive collection of 336 molecules from medicinal plants reported to have in vitro or in vivo antidiabetic activity. We also discuss a chemoinformatic analysis of DiaNat-DB to compare antidiabetic drugs and natural product databases. To further explore the antidiabetic chemical space based on DiaNat compounds, we searched for analogs in ZINC15, an extensive database listing commercially available compounds. This work will help future analyses, design, and development of new antidiabetic drugs. DiaNat-DB and its ZINC15 analogs are freely available at http://rdu.iquimica.unam.mx/handle/20.500.12214/1186.",2021-01-28 +32516398,Tropical principal component analysis on the space of phylogenetic trees.,"

Motivation

Due to new technology for efficiently generating genome data, machine learning methods are urgently needed to analyze large sets of gene trees over the space of phylogenetic trees. However, the space of phylogenetic trees is not Euclidean, so ordinary machine learning methods cannot be directly applied. In 2019, Yoshida et al. introduced the notion of tropical principal component analysis (PCA), a statistical method for visualization and dimensionality reduction using a tropical polytope with a fixed number of vertices that minimizes the sum of tropical distances between each data point and its tropical projection. However, their work focused on the tropical projective space rather than the space of phylogenetic trees. We focus here on tropical PCA for dimension reduction and visualization over the space of phylogenetic trees.

Results

Our main results are 2-fold: (i) theoretical interpretations of the tropical principal components over the space of phylogenetic trees, namely, the existence of a tropical cell decomposition into regions of fixed tree topology; and (ii) the development of a stochastic optimization method to estimate tropical PCs over the space of phylogenetic trees using a Markov Chain Monte Carlo approach. This method performs well with simulation studies, and it is applied to three empirical datasets: Apicomplexa and African coelacanth genomes as well as sequences of hemagglutinin for influenza from New York.

Availability and implementation

Dataset: http://polytopes.net/Data.tar.gz. Code: http://polytopes.net/tropica_MCMC_codes.tar.gz.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-11-01 +33502607,Missense3D-DB web catalogue: an atom-based analysis and repository of 4M human protein-coding genetic variants.,"The interpretation of human genetic variation is one of the greatest challenges of modern genetics. New approaches are urgently needed to prioritize variants, especially those that are rare or lack a definitive clinical interpretation. We examined 10,136,597 human missense genetic variants from GnomAD, ClinVar and UniProt. We were able to perform large-scale atom-based mapping and phenotype interpretation of 3,960,015 of these variants onto 18,874 experimental and 84,818 in house predicted three-dimensional coordinates of the human proteome. We demonstrate that 14% of amino acid substitutions from the GnomAD database that could be structurally analysed are predicted to affect protein structure (n = 568,548, of which 566,439 rare or extremely rare) and may, therefore, have a yet unknown disease-causing effect. The same is true for 19.0% (n = 6266) of variants of unknown clinical significance or conflicting interpretation reported in the ClinVar database. The results of the structural analysis are available in the dedicated web catalogue Missense3D-DB ( http://missense3d.bc.ic.ac.uk/ ). For each of the 4 M variants, the results of the structural analysis are presented in a friendly concise format that can be included in clinical genetic reports. A detailed report of the structural analysis is also available for the non-experts in structural biology. Population frequency and predictions from SIFT and PolyPhen are included for a more comprehensive variant interpretation. This is the first large-scale atom-based structural interpretation of human genetic variation and offers geneticists and the biomedical community a new approach to genetic variant interpretation.",2021-01-27 +30329036,GeneLab: Omics database for spaceflight experiments.,"

Motivation

To curate and organize expensive spaceflight experiments conducted aboard space stations and maximize the scientific return of investment, while democratizing access to vast amounts of spaceflight related omics data generated from several model organisms.

Results

The GeneLab Data System (GLDS) is an open access database containing fully coordinated and curated 'omics' (genomics, transcriptomics, proteomics, metabolomics) data, detailed metadata and radiation dosimetry for a variety of model organisms. GLDS is supported by an integrated data system allowing federated search across several public bioinformatics repositories. Archived datasets can be queried using full-text search (e.g. keywords, Boolean and wildcards) and results can be sorted in multifactorial manner using assistive filters. GLDS also provides a collaborative platform built on GenomeSpace for sharing files and analyses with collaborators. It currently houses 172 datasets and supports standard guidelines for submission of datasets, MIAME (for microarray), ENCODE Consortium Guidelines (for RNA-seq) and MIAPE Guidelines (for proteomics).

Availability and implementation

https://genelab.nasa.gov/.",2019-05-01 +33680356,Maize specialized metabolome networks reveal organ-preferential mixed glycosides.,"Despite the scientific and economic importance of maize, little is known about its specialized metabolism. Here, five maize organs were profiled using different reversed-phase liquid chromatography-mass spectrometry methods. The resulting spectral metadata, combined with candidate substrate-product pair (CSPP) networks, allowed the structural characterization of 427 of the 5,420 profiled compounds, including phenylpropanoids, flavonoids, benzoxazinoids, and auxin-related compounds, among others. Only 75 of the 427 compounds were already described in maize. Analysis of the CSPP networks showed that phenylpropanoids are present in all organs, whereas other metabolic classes are rather organ-enriched. Frequently occurring CSPP mass differences often corresponded with glycosyl- and acyltransferase reactions. The interplay of glycosylations and acylations yields a wide variety of mixed glycosides, bearing substructures corresponding to the different biochemical classes. For example, in the tassel, many phenylpropanoid and flavonoid-bearing glycosides also contain auxin-derived moieties. The characterized compounds and mass differences are an important step forward in metabolic pathway discovery and systems biology research. The spectral metadata of the 5,420 compounds is publicly available (DynLib spectral database, https://bioit3.irc.ugent.be/dynlib/).",2021-01-26 +33496608,First Report of Fusarium wilt of Coleus forskohlii Caused by Fusarium oxysporum in China. ,"Coleus forskohlii (Wild) Briq. is an aromatic plant in the Lamiaceae family cultivated primarily in India, Sri Lanka, Nepal and China (Yunnan Province). This herb is considered to have medicinal properties and the whole plant can be used to treat asthma, cancer and other diseases with remarkable efficacy. Due to the high medicinal and economic value of C. forskohlii, it has been introduced to Tongcheng (N29°18'12.24″, E113°53'59.36″), Hubei Province for cultivation. However, severe Fusarium wilt disease of C. forskohlii has been epidemic in Tongcheng since 2018 with a disease incidence of 5 to 30% in surveyed fields. This disease is characterized typically by root rot, vascular discoloration and leaf wilting of C. forskohlii (Fig 1), resulting in progressive plant death. Ten diseased plants were collected from the fields and the roots and stems were rinsed in 70% ethanol for 5 min and samples at the junction of disease and healthy tissues (0.5 × 0.5 cm2) were cutted and placed on potato dextrose agar (PDA) for fungal isolation in a dark chamber at 28°C. Eventually, ten pure isolates were obtained from hyphal-tip followed by single-spore purification on PDA. Seven of the purified isolates showed white aerial mycelium initially and secreted orange-brown pigment 8 days after incubation. Macroconidia were falciform, hyaline, three to five septate, ranging from 2.02 to 4.17 (mean 2.98 µm) × 10.05 to 21.90 µm (mean 12.04 µm) in size (n = 30) (Fig 2). These morphological characteristics resembled Fusarium oxysporum. (Leslie and Summerell 2006) and we selected one of them for molecular identification. Genome DNA was extracted from isolate (RS-4) using the CTAB method (Mahadevakumar et al. 2018). The translation elongation factor 1 alpha (EF-1α) DNA sequence was amplified using primers EF1/EF2 (Geiser et al. 2004), and the second largest subunit of RNA polymerase II (RPB2) DNA sequence was amplified using primers fRPB2-5F/fRPB2-7cR (Liu et al. 1999). The obtained EF-1α sequence of RS-4 (MW219142) showed 100% identity with that of F. oxysporum (FD_01376) (FUSARIUM-ID database). RPB2 sequences of RS-4 (MW219143) showed 100% identity with F. oxysporum (FD_01679) (FUSARIUM-ID database). Moreover, a phylogenetic tree of the EF-1α gene sequence of RS-4 was constructed based on the Neighbor-Joining method in MEGA7 software (Tamura et al. 2013) and revealed that strain RS-4 was closest to F. oxysporum (Fig 2). To test the pathogenicity of RS-4, six healthy leaves of C. forskohlii were collected and inoculated either with the colonized PDA discs (diameter, 5 mm) of RS-4 or control PDA discs, in a moist chamber at 25 ± 2°C. Five days later, brown-black lesions were observed on all inoculated leaves. However, the non-inoculated leaves were maintained asymptomatic. For in vivo pathogenicity test, twenty-day-old C. forskohlii plants (n=3) were inoculated with 106 spores/ml of the RS-4 at a position approximately 1 cm above the soil. Three seedlings treated with sterile water were used as controls. These inoculated and control seedlings were incubated in a moist chamber (25 ± 2 °C, RH 85%). Three days later, typical Fusarium rot symptoms were observed on all inoculated seedlings with rotten stems and withering leaves (Fig 2). Fungal pathogens were re-isolated from the inoculated sites of in vitro and in vivo inoculations by repeating the above isolating operation, and were reconfirmed through morphological features. This is the first report of F. oxysporum causing root rot on C. forskohlii in China. F. oxysporum is one of the most economically important fungal pathogens causing vascular wilt on a wide range of plants worldwide (Dean et al. 2012). The identification of F. oxysporum as the causal agent of the observed Fusarium wilt on C. forskohlii, is critical to the prevention and control of this disease in the future. Acknowledgement This research was supported by funding from the Key project at the central government level titled, ""The ability to establish sustainable uses for valuable Chinese medicinale resources"" (2060302) Reference Dean, R., et al. 2012. Mol. Plant. Pathol. 13: 414. https://doi.org/10.1111/j.1364-3703.2011.00783.x. Geiser, D. M., et al. 2004. Eur. J. Plant Pathol. 110: 473. https://doi.org/10.1023/B:EJPP.0000032386.75915.a0. Leslie, J. F. and Summerell, B. A. 2006. The Fusarium Laboratory Manual. Blackwell Publishing, Oxford, U.K. Liu, Y. J., et al. 1999. Mol. Biol. Evol. 16: 1799. https://doi.org/10.1093/oxfordjournals.molbev.a026092 Mahadevakumar, S. et al. 2018. Eur. J. Plant Pathol. 151:1081. https://doi.org/10.1007/s10658-017-1415-2. Tamura, K., et al. 2013. Mol. Biol. Evol. 30: 2725. https://doi.org/10.1093/molbev/msw054.",2021-01-26 +35837577,Universal Epidemic Curve for COVID-19 and Its Usage for Forecasting.,"We construct a universal epidemic curve for COVID-19 using the epidemic curves of eight nations that have reached saturation for the first phase and then fit an eight-degree polynomial that passes through the universal curve. We take India's epidemic curve up to January 1, 2021 and match it with the universal curve by minimizing square-root error between the model prediction and actual value. The constructed curve has been used to forecast epidemic evolution up to February 25, 2021. The predictions of our model and those of supermodel for India (Agrawal et al. in Indian J Med Res, 2020; Vidyasagar et al. in https://www.iith.ac.in/~m_vidyasagar/arXiv/Super-Model.pdf, 2020) are reasonably close to each other considering the uncertainties in data fitting.",2021-02-27 +,PSV-3 Effects of space allowance and marketing strategy on growth performance of pigs raised to heavy market weights,"Abstract A total of 976 pigs (PIC 327×L42, initially 22 ± 1.5 kg BW) were used in a 160-d study to determine the influence of space allowance and marketing strategy on performance of pigs raised to heavy market weights (165 kg). Pens were blocked by location and allotted to 1 of 6 treatments with 8 pens/treatment. The first four treatments reduced space allowance/pig via initial pen stocking density: 14 pigs/pen (1.20 m2/pig), 17 pigs/pen (0.98 m2/pig), 20 pigs/pen (0.84 m2/pig), or 23 pigs/pen (0.73 m2/pig). The fifth treatment began with 25 pigs/pen (0.67 m2/pig) and the heaviest 3 pigs/pen were removed on d 93, then on d 122 pens were marketed to a common inventory of 20 pigs/pen, and on d 147 marketed to a common pen inventory of 17 pigs/pen. The sixth treatment began with 23 pigs/pen (0.73 m2/pig) and were marketed to a common inventory of 20 pigs/pen on d 108 and marketed to a common inventory of 17 pigs/pen on d 147. Data were analyzed using PROC GLIMMIX with pen as the experimental unit. Overall (d 0 to 160) ADG, ADFI, and final BW decreased (linear, P < 0.001) and G:F increased (quadratic, P = 0.042) as space allowance decreased. When comparing treatments with multiple marketing events (treatments 5 and 6) to treatment 4, there was no evidence for differences (P > 0.05) for overall ADG or ADFI; however, overall G:F was improved (P < 0.05) for pigs initially stocked at 0.67 m2/pig and marketed four times compared to both treatments that initially allowed 0.73 m2/pig, regardless of marketing structure. These results indicate that decreasing space allowance of heavy weight pigs reduces growth, feed intake and final BW, although use of multiple marketing events prior to final marketing may allow for increased number of pigs marketed/pen while balancing reduced growth performance often associated with increased stocking density. http://www.conferenceharvester.com/",2019-07-01 +30066211,HAMdb: a database of human autophagy modulators with specific pathway and disease information.,"Autophagy is an important homeostatic cellular recycling mechanism responsible for degrading unnecessary or dysfunctional cellular organelles and proteins in all living cells. In addition to its vital homeostatic role, this degradation pathway also involves in various human disorders, including metabolic conditions, neurodegenerative diseases, cancers and infectious diseases. Therefore, the comprehensive understanding of autophagy process, autophagy-related modulators and corresponding pathway and disease information will be of great help for identifying the new autophagy modulators, potential drug candidates, new diagnostic and therapeutic targets. In recent years, some autophagy databases providing structural and functional information were developed, but the specific databases covering autophagy modulator (proteins, chemicals and microRNAs)-related target, pathway and disease information do not exist. Hence, we developed an online resource, Human Autophagy Modulator Database (HAMdb, http://hamdb.scbdd.com ), to provide researchers related pathway and disease information as many as possible. HAMdb contains 796 proteins, 841 chemicals and 132 microRNAs. Their specific effects on autophagy, physicochemical information, biological information and disease information were manually collected and compiled. Additionally, lots of external links were available for more information covering extensive biomedical knowledge. HAMdb provides a user-friendly interface to query, search, browse autophagy modulators and their comprehensive related information. HAMdb will help researchers understand the whole autophagy process and provide detailed information about related diseases. Furthermore, it can give hints for the identification of new diagnostic and therapeutic targets and the discovery of new autophagy modulators. In a word, we hope that HAMdb has the potential to promote the autophagy research in pharmacological and pathophysiological area.",2018-07-31 +,Molecular phylogenetic and morphological studies on the systematic position of Heracula discivitta reveal a new subfamily of Pseudobistonidae (Lepidoptera: Geometroidea),"Heracula discivitta Moore is an uncommon moth species currently recorded from India, Nepal and China. Although this species has traditionally been placed in Lymantriinae, its systematic position in Macroheterocera has been enigmatic due to its unique morphological features. Here we used molecular and morphological data to explore the systematic position of H. discivitta. Our molecular phylogenetic analyses indicate that this species is sister to Pseudobiston pinratanai Inoue, a member of a recently established monotypic family Pseudobistonidae. The examinations of morphological features further show that H. discivitta shares synapomorphies with Pseudobistonidae. Based on the analysis results, we propose a new subfamily of Pseudobistonidae (Heraculinae subfam.n.) to accommodate H. discivitta. The resemblance of the habitus to that of the brahmaeid genus Calliprogonos Mell & Hering is discussed. This published work has been registered on ZooBank, http://zoobank.org/urn:lsid:urn:lsid:zoobank.org:pub:63D17850‐6D51‐4E03‐A5D6‐F9EF6E7AF402.",2019-01-01 +31260629,Computational Model To Predict the Fraction of Unbound Drug in the Brain.,"Knowing the value of the unbound drug fraction in the brain (fu,brain) is essential in estimating its effects and toxicity on the central nervous system (CNS); however, no model to predict fu,brain without experimental procedures is publicly available. In this study, we collected 253 measurements from the literature and an open database and built in silico models to predict fu,brain using only freely available software. By selecting appropriate descriptors, training, and evaluation, our model showed an acceptable performance on a test data set (R2 = 0.630, percentage of compounds predicted within a 3-fold error: 69.4%) using chemical structure alone. Our model is available at https://drumap.nibiohn.go.jp/fubrain/ , and all of our data sets can be obtained from the Supporting Information.",2019-07-11 +28961690,ncDR: a comprehensive resource of non-coding RNAs involved in drug resistance.,"

Summary

As a promising field of individualized therapy, non-coding RNA pharmacogenomics promotes the understanding of different individual responses to certain drugs and acts as a reasonable reference for clinical treatment. However, relevant information is scattered across the published literature, which is inconvenient for researchers to explore non-coding RNAs that are involved in drug resistance. To address this, we systemically identified validated and predicted drug resistance-associated microRNAs and long non-coding RNAs through manual curation and computational analysis. Subsequently, we constructed an omnibus repository named ncDR, which furnishes a user-friendly interface that allows for convenient browsing, visualization, querying and downloading of data. Given the rapidly increasing interest in precision medicine, ncDR will significantly improve our understanding of the roles of regulatory non-coding RNAs in drug resistance and has the potential to be a timely and valuable resource.

Availability and implementation

http://www.jianglab.cn/ncDR/.

Contact

jiangwei@hrbmu.edu.cn or lw2247@yeah.net.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +28521733,NFPscanner: a webtool for knowledge-based deciphering of biomedical networks.,"

Background

Many biological pathways have been created to represent different types of knowledge, such as genetic interactions, metabolic reactions, and gene-regulating and physical-binding relationships. Biologists are using a wide range of omics data to elaborately construct various context-specific differential molecular networks. However, they cannot easily gain insight into unfamiliar gene networks with the tools that are currently available for pathways resource and network analysis. They would benefit from the development of a standardized tool to compare functions of multiple biological networks quantitatively and promptly.

Results

To address this challenge, we developed NFPscanner, a web server for deciphering gene networks with pathway associations. Adapted from a recently reported knowledge-based framework called network fingerprint, NFPscanner integrates the annotated pathways of 7 databases, 4 algorithms, and 2 graphical visualization modules into a webtool. It implements 3 types of network analysis: Fingerprint: Deciphering gene networks and highlighting inherent pathway modules Alignment: Discovering functional associations by finding optimized node mapping between 2 gene networks Enrichment: Calculating and visualizing gene ontology (GO) and pathway enrichment for genes in networks Users can upload gene networks to NFPscanner through the web interface and then interactively explore the networks' functions.

Conclusions

NFPscanner is open-source software for non-commercial use, freely accessible at http://biotech.bmi.ac.cn/nfs .",2017-05-18 +27924023,MethSMRT: an integrative database for DNA N6-methyladenine and N4-methylcytosine generated by single-molecular real-time sequencing.,"DNA methylation is an important type of epigenetic modifications, where 5- methylcytosine (5mC), 6-methyadenine (6mA) and 4-methylcytosine (4mC) are the most common types. Previous efforts have been largely focused on 5mC, providing invaluable insights into epigenetic regulation through DNA methylation. Recently developed single-molecule real-time (SMRT) sequencing technology provides a unique opportunity to detect the less studied DNA 6mA and 4mC modifications at single-nucleotide resolution. With a rapidly increased amount of SMRT sequencing data generated, there is an emerging demand to systematically explore DNA 6mA and 4mC modifications from these data sets. MethSMRT is the first resource hosting DNA 6mA and 4mC methylomes. All the data sets were processed using the same analysis pipeline with the same quality control. The current version of the database provides a platform to store, browse, search and download epigenome-wide methylation profiles of 156 species, including seven eukaryotes such as Arabidopsis, C. elegans, Drosophila, mouse and yeast, as well as 149 prokaryotes. It also offers a genome browser to visualize the methylation sites and related information such as single nucleotide polymorphisms (SNP) and genomic annotation. Furthermore, the database provides a quick summary of statistics of methylome of 6mA and 4mC and predicted methylation motifs for each species. MethSMRT is publicly available at http://sysbio.sysu.edu.cn/methsmrt/ without use restriction.",2016-10-18 +29040681,20 years of the SMART protein domain annotation resource.,"SMART (Simple Modular Architecture Research Tool) is a web resource (http://smart.embl.de) for the identification and annotation of protein domains and the analysis of protein domain architectures. SMART version 8 contains manually curated models for more than 1300 protein domains, with approximately 100 new models added since our last update article (1). The underlying protein databases were synchronized with UniProt (2), Ensembl (3) and STRING (4), doubling the total number of annotated domains and other protein features to more than 200 million. In its 20th year, the SMART analysis results pages have been streamlined again and its information sources have been updated. SMART's vector based display engine has been extended to all protein schematics in SMART and rewritten to use the latest web technologies. The internal full text search engine has been redesigned and updated, resulting in greatly increased search speed.",2018-01-01 +34121525,Music for your mental health? The development and evaluation of a group mental health intervention in subacute rehabilitation.,"Objectives: Short-stay residents of nursing homes experience high rates of mental health (MH) distress compared to community dwelling counterparts, yet MH interventions are difficult to implement and sustain. We modified a music therapy framework to Effective Music in Psychotherapy. Using the modified model, we integrated music listening into MH group intervention and evaluated MH outcomes. This pilot study reports the development and evaluation of the Mental Health and Music Group for short-stay nursing homes residents.Method: The group was developed and refined to be non-sequential and non-cumulative, specific to the needs of short-stay nursing home residents. Pre-/post-session ratings examined affect, quality of life, and pain. Leaders monitored engagement across and between sessions. Qualitative interviews were conducted with a selection of attendees.Results: Findings indicated decreases in irritation and worry, and increases in mood, energy, and self-as-a-whole from pre- to post-session. There were no changes in pain, perception of physical health or life-as-a-whole, energetic, sad, or happy affect, or differences in engagement. Qualitative interviews demonstrated benefits of group modality and music to group cohesion and highlighted the relevance of music for mental health outcomes.Conclusion: The Mental Health and Music group was designed around a framework of Effective Music in Psychotherapy, for short-stay nursing home settings, and demonstrated promising results. Future research can solidify the interventions generalizeability to medical and rehabilitation settings addressing the specific population needs and preferences.Supplemental data for this article is available online at https://doi.org/10.1080/13607863.2021.1935463 .",2021-06-14 +34236625,Prediction of Protein Solubility Based on Sequence Feature Fusion and DDcCNN.,"

Background

Prediction of protein solubility is an indispensable prerequisite for pharmaceutical research and production. The general and specific objective of this work is to design a new model for predicting protein solubility by using protein sequence feature fusion and deep dual-channel convolutional neural networks (DDcCNN) to improve the performance of existing prediction models.

Methods

The redundancy of raw protein is reduced by CD-HIT. The four subsequences are built from protein sequence: one global and three locals. The global subsequence is the entire protein sequence, and these local subsequences are obtained by moving a sliding window with some rules. Using G-gap to extract the features of the above four subsequences, a mixed matrix is constructed as the input of one channel which is composed of three-layer convolutional operating. Additional features are extracted by SCRATCH tool as input of another channel, which is consist of a single convolution in order to find hidden relationships and improve the accuracy of predictor. The outputs of two parallel channels are concatenated as the input of the hidden layer. And the prediction of protein solubility is obtained in the output layer. The best protein solubility prediction model is obtained by doing some comparative experiments of different frameworks.

Results

The performance indicators of DDcCNN model (our designed) are as follows: accuracy of 77.82%, Matthew's correlation coefficient of 0.57, sensitivity of 76.13% and specificity of 79.32%. The results of some comparative experiments show that the overall performance of DDcCNN model is better than existing models (GCNN, LCNN and PCNN). The related models and data are publicly deposited at http://www.ddccnn.wang .

Conclusion

The satisfactory performance of DDcCNN model reveals that these features and flexible computational methodologies can reinforce the existing prediction models for better prediction of protein solubility could be applied in several applications, such as to preselect initial targets that are soluble or to alter solubility of target proteins, thus can help to reduce the production cost.",2021-07-08 +32753773,hubViz: A Novel Tool for Hub-centric Visualization. ,"Visualization algorithms have been widely used for intuitive interrogation of genomic data and popularly used tools include MDS, t-SNE, and UMAP. However, these algorithms are not tuned for the visualization of binary data and none of them consider the hubness of observations for the visualization. In order to address these limitations, here we propose hubViz, a novel tool for hub-centric visualization of binary data. We evaluated the performance of hubViz with its application to the gene expression data measured in multiple brain regions of rats exposed to cocaine, the single-cell RNA-seq data of peripheral blood mononuclear cells treated with interferon beta, and the literature mining data to investigate relationships among diseases. We further evaluated the performance of hubViz using simulation studies. We showed that hubViz provides effective visual inspection by locating the hub in the center and the contrasting elements in the opposite sides around the center. We believe that hubViz and its software can be powerful tools that can improve visualizations of various genomic data. The hubViz is implemented as an R package hubviz, which is publicly available at https://dongjunchung.github.io/hubviz/.",2020-06-07 +29087517,TFClass: expanding the classification of human transcription factors to their mammalian orthologs.,"TFClass is a resource that classifies eukaryotic transcription factors (TFs) according to their DNA-binding domains (DBDs), available online at http://tfclass.bioinf.med.uni-goettingen.de. The classification scheme of TFClass was originally derived for human TFs and is expanded here to the whole taxonomic class of mammalia. Combining information from different resources, checking manually the retrieved mammalian TFs sequences and applying extensive phylogenetic analyses, >39 000 TFs from up to 41 mammalian species were assigned to the Superclasses, Classes, Families and Subfamilies of TFClass. As a result, TFClass now provides the corresponding sequence collection in FASTA format, sequence logos and phylogenetic trees at different classification levels, predicted TF binding sites for human, mouse, dog and cow genomes as well as links to several external databases. In particular, all those TFs that are also documented in the TRANSFAC® database (FACTOR table) have been linked and can be freely accessed. TRANSFAC® FACTOR can also be queried through an own search interface.",2018-01-01 +33113273,A novel workflow to improve genotyping of multigene families in wildlife species: An experimental set-up with a known model system.,"Genotyping complex multigene families in novel systems is particularly challenging. Target primers frequently amplify simultaneously multiple loci leading to high PCR and sequencing artefacts such as chimeras and allele amplification bias. Most genotyping pipelines have been validated in nonmodel systems whereby the real genotype is unknown and the generation of artefacts may be highly repeatable. Further hindering accurate genotyping, the relationship between artefacts and genotype complexity (i.e. number of alleles per genotype) within a PCR remains poorly described. Here, we investigated the latter by experimentally combining multiple known major histocompatibility complex (MHC) haplotypes of a model organism (chicken, Gallus gallus, 43 artificial genotypes with 2-13 alleles per amplicon). In addition to well-defined 'optimal' primers, we simulated a nonmodel species situation by designing 'cross-species' primers based on sequence data from closely related Galliform species. We applied a novel open-source genotyping pipeline (ACACIA; https://gitlab.com/psc_santos/ACACIA), and compared its performance with another, previously published pipeline (AmpliSAS). Allele calling accuracy was higher when using ACACIA (98.5% versus 97% and 77.8% versus 75% for the 'optimal' and 'cross-species' data sets, respectively). Systematic allele dropout of three alleles owing to primer mismatch in the 'cross-species' data set explained high allele calling repeatability (100% when using ACACIA) despite low accuracy, demonstrating that repeatability can be misleading when evaluating genotyping workflows. Genotype complexity was positively associated with nonchimeric artefacts, chimeric artefacts (nonlinearly by levelling when amplifying more than 4-6 alleles) and allele amplification bias. Our study exemplifies and demonstrates pitfalls researchers should avoid to reliably genotype complex multigene families.",2020-11-21 +32577440,"Long duration underwater glider dataset: Indian Ocean from Perth, Australia to Mirissa, Sri Lanka.","This data was collected using an underwater research vehicle, Slocum glider. The glider is an autonomous robot that is able to measure several water properties from surface to 1000 m depth. The duration of missions for underwater gliders are on the order of 1 month to over a year. Detailed here is the live satellite telemetered dataset as transmitted during mission. Dataset includes positional data, vehicle engineering, attitude, temperature, salinity, and depth averaged currents. Raw data as well as some derived variables from the raw data are included in this dataset. This data is transmitted every couple hours and comprises of a subset of the data stored on the vehicle's internal memory. The data is returned in segmented files which comprises of an underwater segment bookended by GPS positions. Because most analysis require longer time series, effort has been taken to conglomerate the segment datasets into a single continuous dataset stitching together the segments. The platform chosen for this is currently MATLAB. This data can provide the community with an example dataset of underwater glider data pertinent to a long duration and low energy glider mission. It also includes ocean measurements of temperature, salinity, and ocean currents. Built into the dataset object are various functions designed to help the user navigate and display a glider's collected data. The effort is being made to serve future datasets, 2017 and onward, via ERDAPP at the following location: http://slocum-data.marine.rutgers.edu/erddap/index.html Analyzed by the Applied Ocean Research article: Modeling for the Performance of Navigation, Control and Data Post-Processing of Underwater Gliders where it is used for flight efficiency analysis as well as ocean model comparisons.",2020-05-21 +35935896,"A re-evaluation of Penicillium section Canescentia, including the description of five new species.","A survey of Penicillium in the fynbos biome from South Africa resulted in the isolation of 61 species of which 29 were found to be new. In this study we focus on Penicillium section Canescentia, providing a phylogenetic re-evaluation based on the analysis of partial beta-tubulin (BenA), calmodulin (CaM) and RNA polymerase II second largest subunit (RPB2) sequence data. Based on phylogenies we show that five fynbos species are new and several previously assigned synonyms of P. canescens and P. janczewskii should be considered as distinct species. As such, we provide descriptions for the five new species and introduce the new name P. elizabethiae for the illegitimate P. echinatum. We also update the accepted species list and synonymies of section Canescentia species and provide a review of extrolites produced by these species. Citation: Visagie CM, Frisvad JC, Houbraken J, et al. 2021. A re-evaluation of Penicillium section Canescentia, including the description of five new species. Persoonia 46: 163-187. https://doi.org/10.3767/persoonia.2021.46.06.",2021-05-06 +33269445,Implicit consequentiality bias in English: A corpus of 300+ verbs.,"This study provides implicit verb consequentiality norms for a corpus of 305 English verbs, for which Ferstl et al. (Behavior Research Methods, 43, 124-135, 2011) previously provided implicit causality norms. An online sentence completion study was conducted, with data analyzed from 124 respondents who completed fragments such as ""John liked Mary and so…"". The resulting bias scores are presented in an Appendix, with more detail in supplementary material in the University of Sussex Research Data Repository (via https://doi.org/10.25377/sussex.c.5082122 ), where we also present lexical and semantic verb features: frequency, semantic class and emotional valence of the verbs. We compare our results with those of our study of implicit causality and with the few published studies of implicit consequentiality. As in our previous study, we also considered effects of gender and verb valence, which requires stable norms for a large number of verbs. The corpus will facilitate future studies in a range of areas, including psycholinguistics and social psychology, particularly those requiring parallel sentence completion norms for both causality and consequentiality.",2020-12-02 +28178937,SPANG: a SPARQL client supporting generation and reuse of queries for distributed RDF databases.,"

Background

Toward improved interoperability of distributed biological databases, an increasing number of datasets have been published in the standardized Resource Description Framework (RDF). Although the powerful SPARQL Protocol and RDF Query Language (SPARQL) provides a basis for exploiting RDF databases, writing SPARQL code is burdensome for users including bioinformaticians. Thus, an easy-to-use interface is necessary.

Results

We developed SPANG, a SPARQL client that has unique features for querying RDF datasets. SPANG dynamically generates typical SPARQL queries according to specified arguments. It can also call SPARQL template libraries constructed in a local system or published on the Web. Further, it enables combinatorial execution of multiple queries, each with a distinct target database. These features facilitate easy and effective access to RDF datasets and integrative analysis of distributed data.

Conclusions

SPANG helps users to exploit RDF datasets by generation and reuse of SPARQL queries through a simple interface. This client will enhance integrative exploitation of biological RDF datasets distributed across the Web. This software package is freely available at http://purl.org/net/spang .",2017-02-08 +34055810,DeepOMe: A Web Server for the Prediction of 2'-O-Me Sites Based on the Hybrid CNN and BLSTM Architecture.,"2'-O-methylations (2'-O-Me or Nm) are one of the most important layers of regulatory control over gene expression. With increasing attentions focused on the characteristics, mechanisms and influences of 2'-O-Me, a revolutionary technique termed Nm-seq were established, allowing the identification of precise 2'-O-Me sites in RNA sequences with high sensitivity. However, as the costs and complexities involved with this new method, the large-scale detection and in-depth study of 2'-O-Me is still largely limited. Therefore, the development of a novel computational method to identify 2'-O-Me sites with adequate reliability is urgently needed at the current stage. To address the above issue, we proposed a hybrid deep-learning algorithm named DeepOMe that combined Convolutional Neural Networks (CNN) and Bidirectional Long Short-term Memory (BLSTM) to accurately predict 2'-O-Me sites in human transcriptome. Validating under 4-, 6-, 8-, and 10-fold cross-validation, we confirmed that our proposed model achieved a high performance (AUC close to 0.998 and AUPR close to 0.880). When testing in the independent data set, DeepOMe was substantially superior to NmSEER V2.0. To facilitate the usage of DeepOMe, a user-friendly web-server was constructed, which can be freely accessed at http://deepome.renlab.org.",2021-05-14 +26243198,miRegulome: a knowledge-base of miRNA regulomics and analysis.,"

Unlabelled

miRNAs regulate post transcriptional gene expression by targeting multiple mRNAs and hence can modulate multiple signalling pathways, biological processes, and patho-physiologies. Therefore, understanding of miRNA regulatory networks is essential in order to modulate the functions of a miRNA. The focus of several existing databases is to provide information on specific aspects of miRNA regulation. However, an integrated resource on the miRNA regulome is currently not available to facilitate the exploration and understanding of miRNA regulomics. miRegulome attempts to bridge this gap. The current version of miRegulome v1.0 provides details on the entire regulatory modules of miRNAs altered in response to chemical treatments and transcription factors, based on validated data manually curated from published literature. Modules of miRegulome (upstream regulators, downstream targets, miRNA regulated pathways, functions, diseases, etc) are hyperlinked to an appropriate external resource and are displayed visually to provide a comprehensive understanding. Four analysis tools are incorporated to identify relationships among different modules based on user specified datasets. miRegulome and its tools are helpful in understanding the biology of miRNAs and will also facilitate the discovery of biomarkers and therapeutics. With added features in upcoming releases, miRegulome will be an essential resource to the scientific community.

Availability

http://bnet.egr.vcu.edu/miRegulome.",2015-08-05 +28383342,The Children's Health Exposure Analysis Resource: enabling research into the environmental influences on children's health outcomes.,"

Purpose of review

The Children's Health Exposure Analysis Resource (CHEAR) is a new infrastructure supported by the National Institute of Environmental Health Sciences to expand the ability of children's health researchers to include analysis of environmental exposures in their research and to incorporate the emerging concept of the exposome.

Recent findings

There is extensive discussion of the potential of the exposome to advance understanding of the totality of environmental influences on human health. Children's health is a logical choice to demonstrate the exposome concept due to the extensive existing knowledge of individual environmental exposures affecting normal health and development and the short latency between exposures and observable phenotypes. Achieving this demonstration will require access to extensive analytical capabilities to measure a suite of exposures through traditional biomonitoring approaches and to cross-validate these with emerging exposomic approaches.

Summary

CHEAR is a full-service exposure assessment resource, linking up-front consultation with both laboratory and data analysis. Analyses of biological samples are intended to enhance studies by including targeted analysis of specific exposures and untargeted analysis of small molecules associated with phenotypic endpoints. Services provided by CHEAR are made available without cost but require a brief application and adherence to policies detailed on the CHEAR web page at https://chearprogram.org/.",2017-06-01 +31409791,"DRAMP 2.0, an updated data repository of antimicrobial peptides.","Data Repository of Antimicrobial Peptides (DRAMP, http://dramp.cpu-bioinfor.org/ ) is an open-access comprehensive database containing general, patent and clinical antimicrobial peptides (AMPs). Currently DRAMP has been updated to version 2.0, it contains a total of 19,899 entries (newly added 2,550 entries), including 5,084 general entries, 14,739 patent entries, and 76 clinical entries. The update covers new entries, structures, annotations, classifications and downloads. Compared with APD and CAMP, DRAMP contains 14,040 (70.56% in DRAMP) non-overlapping sequences. In order to facilitate users to trace original references, PubMed_ID of references have been contained in activity information. The data of DRAMP can be downloaded by dataset and activity, and the website source code is also available on dedicatedly designed download webpage. Although thousands of AMPs have been reported, only a few parts have entered clinical stage. In the paper, we described several AMPs in clinical trials, including their properties, indications and clinicaltrials.gov identifiers. Finally, we provide the applications of DRAMP in the development of AMPs.",2019-08-13 +27008011,The Gene Expression Omnibus Database.,"The Gene Expression Omnibus (GEO) database is an international public repository that archives and freely distributes high-throughput gene expression and other functional genomics data sets. Created in 2000 as a worldwide resource for gene expression studies, GEO has evolved with rapidly changing technologies and now accepts high-throughput data for many other data applications, including those that examine genome methylation, chromatin structure, and genome-protein interactions. GEO supports community-derived reporting standards that specify provision of several critical study elements including raw data, processed data, and descriptive metadata. The database not only provides access to data for tens of thousands of studies, but also offers various Web-based tools and strategies that enable users to locate data relevant to their specific interests, as well as to visualize and analyze the data. This chapter includes detailed descriptions of methods to query and download GEO data and use the analysis and visualization tools. The GEO homepage is at http://www.ncbi.nlm.nih.gov/geo/.",2016-01-01 +29539190,"MSeqDR mvTool: A mitochondrial DNA Web and API resource for comprehensive variant annotation, universal nomenclature collation, and reference genome conversion.","Accurate mitochondrial DNA (mtDNA) variant annotation is essential for the clinical diagnosis of diverse human diseases. Substantial challenges to this process include the inconsistency in mtDNA nomenclatures, the existence of multiple reference genomes, and a lack of reference population frequency data. Clinicians need a simple bioinformatics tool that is user-friendly, and bioinformaticians need a powerful informatics resource for programmatic usage. Here, we report the development and functionality of the MSeqDR mtDNA Variant Tool set (mvTool), a one-stop mtDNA variant annotation and analysis Web service. mvTool is built upon the MSeqDR infrastructure (https://mseqdr.org), with contributions of expert curated data from MITOMAP (https://www.mitomap.org) and HmtDB (https://www.hmtdb.uniba.it/hmdb). mvTool supports all mtDNA nomenclatures, converts variants to standard rCRS- and HGVS-based nomenclatures, and annotates novel mtDNA variants. Besides generic annotations from dbNSFP and Variant Effect Predictor (VEP), mvTool provides allele frequencies in more than 47,000 germline mitogenomes, and disease and pathogenicity classifications from MSeqDR, Mitomap, HmtDB and ClinVar (Landrum et al., 2013). mvTools also provides mtDNA somatic variants annotations. ""mvTool API"" is implemented for programmatic access using inputs in VCF, HGVS, or classical mtDNA variant nomenclatures. The results are reported as hyperlinked html tables, JSON, Excel, and VCF formats. MSeqDR mvTool is freely accessible at https://mseqdr.org/mvtool.php.",2018-04-06 +31688932,Taxonomic weighting improves the accuracy of a gap-filling algorithm for metabolic models.,"

Motivation

The increasing availability of annotated genome sequences enables construction of genome-scale metabolic networks, which are useful tools for studying organisms of interest. However, due to incomplete genome annotations, draft metabolic models contain gaps that must be filled in a time-consuming process before they are usable. Optimization-based algorithms that fill these gaps have been developed, however, gap-filling algorithms show significant error rates and often introduce incorrect reactions.

Results

Here, we present a new gap-filling method that computes the costs of candidate gap-filling reactions from a universal reaction database (MetaCyc) based on taxonomic information. When gap-filling a metabolic model for an organism M (such as Escherichia coli), the cost for reaction R is based on the frequency with which R occurs in other organisms within the phylum of M (in this case, Proteobacteria). The assumption behind this method is that different taxonomic groups are biased toward using different metabolic reactions. Evaluation of the new gap-filler on randomly degraded variants of the EcoCyc metabolic model for E.coli showed an increase in the average F1-score to 99.0 (when using the variable weights by frequency method at the phylum level), compared to 91.0 using the previous MetaFlux gap-filler and 80.3 using a basic gap-filler. Evaluation on two other microbial metabolic models showed similar improvements.

Availability and implementation

The Pathway Tools software (including MetaFlux) is free for academic use and is available at http://pathwaytools.com. Additional code for reproducing the results presented here is available at www.ai.sri.com/pkarp/pubs/taxgap/supplementary.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +32907531,IonCRAM: a reference-based compression tool for ion torrent sequence files.,"

Background

Ion Torrent is one of the major next generation sequencing (NGS) technologies and it is frequently used in medical research and diagnosis. The built-in software for the Ion Torrent sequencing machines delivers the sequencing results in the BAM format. In addition to the usual SAM/BAM fields, the Ion Torrent BAM file includes technology-specific flow signal data. The flow signals occupy a big portion of the BAM file (about 75% for the human genome). Compressing SAM/BAM into CRAM format significantly reduces the space needed to store the NGS results. However, the tools for generating the CRAM formats are not designed to handle the flow signals. This missing feature has motivated us to develop a new program to improve the compression of the Ion Torrent files for long term archiving.

Results

In this paper, we present IonCRAM, the first reference-based compression tool to compress Ion Torrent BAM files for long term archiving. For the BAM files, IonCRAM could achieve a space saving of about 43%. This space saving is superior to what achieved with the CRAM format by about 8-9%.

Conclusions

Reducing the space consumption of NGS data reduces the cost of storage and data transfer. Therefore, developing efficient compression software for clinical NGS data goes beyond the computational interest; as it ultimately contributes to the overall cost reduction of the clinical test. The space saving achieved by our tool is a practical step in this direction. The tool is open source and available at Code Ocean, github, and http://ioncram.saudigenomeproject.com .",2020-09-09 +32490072,In situ soil moisture and temperature network in genhe watershed and saihanba area in China.,"The dataset presented in this article is related to the work ""Evaluation and Analysis of SMAP, AMSR2, and MEaSUREs Freeze/Thaw Products in China [1]"". Soil moisture and temperature are important variables of land-atmosphere energy exchange, monitoring vegetation growth, predicting drought disasters and climate and hydrological modelling [2], [3], [4], [5], [6]. This work provides detailed information on in situ soil moisture and temperature data network established in the Genhe watershed and Saihanba area in China, respectively. The Genhe watershed represents the complex surface heterogeneity in Northeast China. Therefore, data from 22 in situ sites were established in the Genhe watershed since March 2016 to improve the dynamic analysis and modeling of remotely sensed information for complex land surfaces. Saihanba is currently China's largest manmade forest and has a unique alpine wetland and a complete aquatic ecosystem. There are 29 in situ sites deployed in Saihanba since August 2018 for studying the cold temperate continental monsoon climate and estimating forest carbon storage capacity and carbon emissions from manmade forests. Soil temperature and permittivity data in the network were measured using ECH2O EC-5TM probes (Decagon Devices, Inc., Washington, USA, https://www.metergroup.com/) and XingShiTu (XST) probes (BEIJING XST Co., Ltd., www.xingshitu.com) every 30 min at depths of 3, 5, and 10 cm for the Genhe watershed continuous automatic observation network, and depths of 5 and 10 cm for the Saihanba continuous automatic observation network. In the Genhe watershed, soil moisture and soil temperature data in the network were automatically collected using the EM50 data collection system. The Saihanba area has the XST data collection system to record soil temperature and permittivity. The permittivity data collected with the XST data collector were transformed to soil moisture data (volumetric water content) based on the formula developed by [7]. The datasets of the Genhe watershed and Saihanba area consist of raw data acquired by the data collector and processed data of soil moisture and temperature. The Saihanba dataset also includes the calibration data based on soil texture. The result of temporal variations analysis in observed data in the Genhe Watershed and the processing in observed data in the saihanba area show that the long-term in situ soil moisture and temperature datasets can be used for the validation/calibration and improvement of the soil moisture and soil freeze/thaw algorithm.",2020-05-19 +32336249,A novel pattern matching algorithm for genomic patterns related to protein motifs.,"Background: Patterns on proteins and genomic sequences are vastly analyzed, extracted and collected in databases. Although protein patterns originate from genomic coding regions, very few works have directly or indirectly dealt with coding region patterns induced from protein patterns. Results: In this paper, we have defined a new genomic pattern structure suitable for representing induced patterns from proteins. The provided pattern structure, which is called ""Consecutive Positions Scoring Matrix (CPSSM)"", is a replacement for protein patterns and profiles in the genomic context. CPSSMs can be identified, discovered, and searched in genomes. Then, we have presented a novel pattern matching algorithm between the defined genomic pattern and genomic sequences based on dynamic programming. In addition, we have modified the provided algorithm to support intronic gaps and huge sequences. We have implemented and tested the provided algorithm on real data. The results on Saccharomyces cerevisiae's genome show 132% more true positives and no false negatives and the results on human genome show no false negatives and 10 times as many true positives as those in previous works. Conclusion: CPSSM and provided methods could be used for open reading frame detection and gene finding. The application is available with source codes to run and download at http://app.foroughmand.ir/cpssm/.",2020-02-01 +34126890,Corrigendum to: A Review on Lung Cancer Diagnosis Using Data Mining Algorithms.,"Due to an oversight of the author, the principal author's name was published incorrectly in the author list and in the Fig. (3): Shakeel et al. (2019) have been changed to Shakeel et al. (2020) and Palani and Venkata-lakshmi (2019) have been changed to Palani and Venkatalakshmi (2018) in the article entitled ""A Review on Lung Cancer Diagnosis Using Data Mining Algorithms"" in ""Current Medical Imaging"", 2021; 17(1), [1]. The original article can be found online at https://www.eurekaselect.com/183148/article.",2021-01-01 +,snpReady: a tool to assist breeders in genomic analysis,"The snpReady R package is a new instrument developed to help breeders in genomic projects such as genomic prediction and association studies. This package offers three different methods to build the genomic relationship matrix, a new imputation method for missing markers based on Wright’s theory, and a population genetic overview. Therefore, we implemented three functions (raw.data, G.matrix, and popgen). Hence, this tool allows the raw data to be transformed from different genotyping platforms to numeric matrices and performs quality control (missing data and allele frequency). Moreover, the package generates and exports four different relationship matrices (proposed by Yang et al. (N 569:565–569, 2010), VanRaden (JDS 91:4414–23, 2008), and the Gaussian kernel) depending on the purpose and software to be used in further analysis. Finally, based on the genotypic matrix, the package estimates the genetic variability, effective population size, and endogamy, among other population genetic parameters. Empirical comparisons between the method of imputation proposed and other well-known approaches have shown a lower accuracy of imputation, however, with no significant impact on the genome prediction accuracies when a lower amount of missing data is allowed. The functions and arguments were designed to carry out the preparation of genomic datasets in a straightforward, fast, and more computationally efficient way.The package and its details are available at CRAN or http://www.github.com/italo-granato/snpReady .",2018-08-01 +31192369,VarMap: a web tool for mapping genomic coordinates to protein sequence and structure and retrieving protein structural annotations.,"

Motivation

Understanding the protein structural context and patterning on proteins of genomic variants can help to separate benign from pathogenic variants and reveal molecular consequences. However, mapping genomic coordinates to protein structures is non-trivial, complicated by alternative splicing and transcript evidence.

Results

Here we present VarMap, a web tool for mapping a list of chromosome coordinates to canonical UniProt sequences and associated protein 3D structures, including validation checks, and annotating them with structural information.

Availability and implementation

https://www.ebi.ac.uk/thornton-srv/databases/VarMap.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +33546593,Targeted enrichment of novel chloroplast-based probes reveals a large-scale phylogeny of 412 bamboos.,"

Background

The subfamily Bambusoideae belongs to the grass family Poaceae and has significant roles in culture, economy, and ecology. However, the phylogenetic relationships based on large-scale chloroplast genomes (CpGenomes) were elusive. Moreover, most of the chloroplast DNA sequencing methods cannot meet the requirements of large-scale CpGenome sequencing, which greatly limits and impedes the in-depth research of plant genetics and evolution.

Results

To develop a set of bamboo probes, we used 99 high-quality CpGenomes with 6 bamboo CpGenomes as representative species for the probe design, and assembled 15 M unique sequences as the final pan-chloroplast genome. A total of 180,519 probes for chloroplast DNA fragments were designed and synthesized by a novel hybridization-based targeted enrichment approach. Another 468 CpGenomes were selected as test data to verify the quality of the newly synthesized probes and the efficiency of the probes for chloroplast capture. We then successfully applied the probes to synthesize, enrich, and assemble 358 non-redundant CpGenomes of woody bamboo in China. Evaluation analysis showed the probes may be applicable to chloroplasts in Magnoliales, Pinales, Poales et al. Moreover, we reconstructed a phylogenetic tree of 412 bamboos (358 in-house and 54 published), supporting a non-monophyletic lineage of the genus Phyllostachys. Additionally, we shared our data by uploading a dataset of bamboo CpGenome into CNGB ( https://db.cngb.org/search/project/CNP0000502/ ) to enrich resources and promote the development of bamboo phylogenetics.

Conclusions

The development of the CpGenome enrichment pipeline and its performance on bamboos recommended an inexpensive, high-throughput, time-saving and efficient CpGenome sequencing strategy, which can be applied to facilitate the phylogenetics analysis of most green plants.",2021-02-05 +32984460,"Dataset on ten-years monitoring of MSWI bottom ashes in six MSWI plants in the Canton of Zürich, Switzerland.","The dataset presented in this article is the supplementary data for the research article ""Ten-years monitoring of MSWI bottom ashes with focus on TOC development and leaching behaviour"" (https://doi.org/10.1016/j.wasman.2020.07.038) by Glauser et al. (2020) [1]. From 2008-2018 bottom ashes have been monitored in six MSWI plants in the Canton of Zürich with regular sampling campaigns and analysis of important species defined in the Swiss Waste Legislation [2]. Both the size of the dataset and the long period of consistent and representative monitoring are unique for Switzerland. Relevant aspects of the monitoring data are discussed and interpreted in the above mentioned research article and complemented by simple emission forecast modelling. While only selected species were discussed in the research article, this data article covers all the monitoring data. The focus of the monitoring was laid on carbon-species with the analysis of total carbon (TC), total organic carbon (TOC), total inorganic carbon (TIC), degradable organic carbon (OC) and elemental carbon (EC). Total contents of nitrogen (N), sulphur (S), phosphorus (P), selected heavy metals (As, Cd, Cr, Cu, Ni, Pb, Sb and Zn) and loss on ignition (LOI) complete the solid chemical analysis. In addition, particulate ferrous (Fe) and non-ferrous (NF) metals and unburnt material were determined manually. Batch eluate tests according to Swiss Waste Legislation [3] were performed and analysed for dissolved organic carbon (DOC), ammonium (NH4 +), nitrite (NO2 -), fluoride (F-), sulphite (SO3 2-), sulphide (S2 -), chromate Cr(IV) and the heavy metals Cu (aq) and Zn (aq) and Cr(IV). In addition, data on the biochemical oxygen demand (BOD) and the physical parameters pH and electrical conductivity complete the eluate analysis.",2020-09-04 +29963332,The Lake-Catchment (LakeCat) Dataset: characterizing landscape features for lake basins within the conterminous USA.,"Natural and human-related landscape features influence the ecology and water quality of lakes. Summarizing these features in a hydrologically meaningful way is critical to understanding and managing lake ecosystems. Such summaries are often done by delineating watershed boundaries of individual lakes. However, many technical challenges are associated with delineating hundreds or thousands of lake watersheds at broad spatial extents. These challenges can limit the application of analyses and models to new, unsampled locations. We present the Lake-Catchment (LakeCat) Dataset (https://www.epa.gov/national-aquatic-resource-surveys/lakecat) of watershed features for 378,088 lakes within the conterminous USA. We describe the methods we used to: 1) delineate lake catchments, 2) hydrologically connect nested lake catchments, and 3) generate several hundred watershed-level metrics that summarize both natural (e.g., soils, geology, climate, and land cover) and anthropogenic (e.g., urbanization, agriculture, and mines) features. We illustrate how this data set can be used with a random forest model to predict the probability of lake eutrophication by combining LakeCat with data from US Environmental Protection Agency's National Lakes Assessment (NLA). This model correctly predicted the trophic state of 72% of NLA lakes, and we applied the model to predict the probability of eutrophication at 297,071 unsampled lakes across the conterminous USA. The large suite of LakeCat metrics could be used to improve analyses of lakes at broad spatial extents, improve the applicability of analyses to unsampled lakes, and ultimately improve the management of these important ecosystems.",2018-06-01 +27515824,"Ricebase: a breeding and genetics platform for rice, integrating individual molecular markers, pedigrees and whole-genome-based data. ","Ricebase (http://ricebase.org) is an integrative genomic database for rice (Oryza sativa) with an emphasis on combining datasets in a way that maintains the key links between past and current genetic studies. Ricebase includes DNA sequence data, gene annotations, nucleotide variation data and molecular marker fragment size data. Rice research has benefited from early adoption and extensive use of simple sequence repeat (SSR) markers; however, the majority of rice SSR markers were developed prior to the latest rice pseudomolecule assembly. Interpretation of new research using SNPs in the context of literature citing SSRs requires a common coordinate system. A new pipeline, using a stepwise relaxation of stringency, was used to map SSR primers onto the latest rice pseudomolecule assembly. The SSR markers and experimentally assayed amplicon sizes are presented in a relational database with a web-based front end, and are available as a track loaded in a genome browser with links connecting the browser and database. The combined capabilities of Ricebase link genetic markers, genome context, allele states across rice germplasm and potentially user curated phenotypic interpretations as a community resource for genetic discovery and breeding in rice.",2016-08-10 +33552037,BioMaster: An Integrated Database and Analytic Platform to Provide Comprehensive Information About BioBrick Parts.,"Synthetic biology seeks to create new biological parts, devices, and systems, and to reconfigure existing natural biological systems for custom-designed purposes. The standardized BioBrick parts are the foundation of synthetic biology. The incomplete and flawed metadata of BioBrick parts, however, are a major obstacle for designing genetic circuit easily, quickly, and accurately. Here, a database termed BioMaster http://www.biomaster-uestc.cn was developed to extensively complement information about BioBrick parts, which includes 47,934 items of BioBrick parts from the international Genetically Engineered Machine (iGEM) Registry with more comprehensive information integrated from 10 databases, providing corresponding information about functions, activities, interactions, and related literature. Moreover, BioMaster is also a user-friendly platform for retrieval and analyses of relevant information on BioBrick parts.",2021-01-21 +34043983,Prediction of vaginal birth after cesarean delivery in term gestations: a calculator without race and ethnicity.,"

Background

Investigators have attempted to derive tools that could provide clinicians with an easily obtainable estimate of the chance of vaginal birth after cesarean delivery for those who undertake trial of labor after cesarean delivery. One tool that has been validated externally was derived from data from the Maternal-Fetal Medicine Units Cesarean Registry. However, concern has been raised that this tool includes the socially constructed variables of race and ethnicity.

Objective

This study aimed to develop an accurate tool to predict vaginal birth after cesarean delivery, using data easily obtainable early in pregnancy, without the inclusion of race and ethnicity.

Study design

This was a secondary analysis of the Cesarean Registry of the Maternal-Fetal Medicine Units Network. The approach to the current analysis is similar to that of the analysis in which the previous vaginal birth after cesarean delivery prediction tool was derived. Specifically, individuals were included in this analysis if they were delivered on or after 37 0/7 weeks' gestation with a live singleton cephalic fetus at the time of labor and delivery admission, had a trial of labor after cesarean delivery, and had a history of 1 previous low-transverse cesarean delivery. Information was only considered for inclusion in the model if it was ascertainable at an initial prenatal visit. Model selection and internal validation were performed using a cross-validation procedure, with the dataset randomly and equally divided into a training set and a test set. The training set was used to identify factors associated with vaginal birth after cesarean delivery and build the logistic regression predictive model using stepwise backward elimination. A final model was generated that included all variables found to be significant (P<.05). The accuracy of the model to predict vaginal birth after cesarean delivery was assessed using the concordance index. The independent test set was used to estimate classification errors and validate the model that had been developed from the training set, and calibration was assessed. The final model was then applied to the overall analytical population.

Results

Of the 11,687 individuals who met the inclusion criteria for this secondary analysis, 8636 (74%) experienced vaginal birth after cesarean delivery. The backward elimination variable selection yielded a model from the training set that included maternal age, prepregnancy weight, height, indication for previous cesarean delivery, obstetrical history, and chronic hypertension. Vaginal birth after cesarean delivery was significantly more likely for women who were taller and had a previous vaginal birth, particularly if that vaginal birth had occurred after a previous cesarean delivery. Conversely, vaginal birth after cesarean delivery was significantly less likely for women whose age was older, whose weight was heavier, whose indication for previous cesarean delivery was arrest of dilation or descent, and who had a history of medication-treated chronic hypertension. The model had excellent calibration between predicted and empirical probabilities and, when applied to the overall analytical population, an area under the receiver operating characteristic curve of 0.75 (95% confidence interval, 0.74-0.77), which is similar to the area under the receiver operating characteristic curve of the previous model (0.75) that included race and ethnicity.

Conclusion

We successfully derived an accurate model (available at https://mfmunetwork.bsc.gwu.edu/web/mfmunetwork/vaginal-birth-after-cesarean-calculator), which did not include race or ethnicity, for the estimation of the probability of vaginal birth after cesarean delivery.",2021-05-24 +34043002,Identification of evolutionarily stable functional and immunogenic sites across the SARS-CoV-2 proteome and greater coronavirus family.,"

Motivation

Since the first recognized case of COVID-19, more than 100 million people have been infected worldwide. Global efforts in drug and vaccine development to fight the disease have yielded vaccines and drug candidates to cure COVID-19. However, the spread of SARS-CoV-2 variants threatens the continued efficacy of these treatments. In order to address this, we interrogate the evolutionary history of the entire SARS-CoV-2 proteome to identify evolutionarily conserved functional sites that can inform the search for treatments with broader coverage across the coronavirus family.

Results

Combining coronavirus family sequence information with the mutations observed in the current COVID-19 outbreak, we systematically and comprehensively define evolutionarily stable sites that may provide useful drug and vaccine targets and which are less likely to be compromised by the emergence of new virus strains. Several experimentally-validated effective drugs interact with these proposed target sites. In addition, the same evolutionary information can prioritize cross reactive antigens that are useful in directing multi-epitope vaccine strategies to illicit broadly neutralizing immune responses to the betacoronavirus family. Although the results are focused on SARS-CoV-2, these approaches stem from evolutionary principles that are agnostic to the organism or infective agent.

Availability

The results of this work are made interactively available at http://cov.lichtargelab.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-27 +33976128,The RNA landscape of the human placenta in health and disease.,"The placenta is the interface between mother and fetus and inadequate function contributes to short and long-term ill-health. The placenta is absent from most large-scale RNA-Seq datasets. We therefore analyze long and small RNAs (~101 and 20 million reads per sample respectively) from 302 human placentas, including 94 cases of preeclampsia (PE) and 56 cases of fetal growth restriction (FGR). The placental transcriptome has the seventh lowest complexity of 50 human tissues: 271 genes account for 50% of all reads. We identify multiple circular RNAs and validate 6 of these by Sanger sequencing across the back-splice junction. Using large-scale mass spectrometry datasets, we find strong evidence of peptides produced by translation of two circular RNAs. We also identify novel piRNAs which are clustered on Chr1 and Chr14. PE and FGR are associated with multiple and overlapping differences in mRNA, lincRNA and circRNA but fewer consistent differences in small RNAs. Of the three protein coding genes differentially expressed in both PE and FGR, one encodes a secreted protein FSTL3 (follistatin-like 3). Elevated serum levels of FSTL3 in pregnant women are predictive of subsequent PE and FGR. To aid visualization of our placenta transcriptome data, we develop a web application ( https://www.obgyn.cam.ac.uk/placentome/ ).",2021-05-11 +27087309,Kalium: a database of potassium channel toxins from scorpion venom. ,"Kalium (http://kaliumdb.org/) is a manually curated database that accumulates data on potassium channel toxins purified from scorpion venom (KTx). This database is an open-access resource, and provides easy access to pages of other databases of interest, such as UniProt, PDB, NCBI Taxonomy Browser, and PubMed. General achievements of Kalium are a strict and easy regulation of KTx classification based on the unified nomenclature supported by researchers in the field, removal of peptides with partial sequence and entries supported by transcriptomic information only, classification of β-family toxins, and addition of a novel λ-family. Molecules presented in the database can be processed by the Clustal Omega server using a one-click option. Molecular masses of mature peptides are calculated and available activity data are compiled for all KTx. We believe that Kalium is not only of high interest to professional toxinologists, but also of general utility to the scientific community.Database URL:http://kaliumdb.org/.",2016-04-17 +32479601,CReSCENT: CanceR Single Cell ExpressioN Toolkit.,"CReSCENT: CanceR Single Cell ExpressioN Toolkit (https://crescent.cloud), is an intuitive and scalable web portal incorporating a containerized pipeline execution engine for standardized analysis of single-cell RNA sequencing (scRNA-seq) data. While scRNA-seq data for tumour specimens are readily generated, subsequent analysis requires high-performance computing infrastructure and user expertise to build analysis pipelines and tailor interpretation for cancer biology. CReSCENT uses public data sets and preconfigured pipelines that are accessible to computational biology non-experts and are user-editable to allow optimization, comparison, and reanalysis for specific experiments. Users can also upload their own scRNA-seq data for analysis and results can be kept private or shared with other users.",2020-07-01 +31978578,Genetic diversity of Neisseria gonorrhoeae multi-antigen sequence types in Russia and Europe.,"OBJECTIVES:The goal of this work was to assess the genetic diversity of Neisseria gonorrhoeae isolates in Russia and Europe and to compare the distribution of the N. gonorrhoeae multi-antigen sequencing types (NG-MAST) of Russian isolates with that of isolates from European countries. METHODS:NG-MAST typing was performed for 804 N. gonorrhoeae isolates collected in Russia in 2013-2018. For isolates from European countries, data from the https://pathogen.watch/collection/eurogasp2013 database were used. RESULTS:Among the isolates from Russia, 296 NG-MAST types were found. A maximum likelihood phylogenetic tree was constructed. Phylogenetic analysis revealed seven major genogroups uniting the most frequent Russian sequence types: G807, G1993, G9476, G14942, G1152, G9486, and G12531. CONCLUSIONS:The NG-MAST type distribution in Russia differed from that in European countries. Most of the Russian isolates had sequence types that were not found in Europe. Only 33% of the Russian isolates belonged to genogroups established for European countries, and the widespread European genogroup G1407 was represented by only nine isolates. Analysis of the Russian isolates belonging to phylogenetically close European genogroups indicated similarities in drug resistance, although no epidemically dangerous drug-resistant clones were found among the Russian isolates.",2020-01-21 +33117076,"Arachnids (Araneae, Opiliones) from grass stand and forest litter in the Urals, Russia.","

Background

Since the late 1980s, long-term monitoring of various components of natural ecosystems under conditions of industrial pollution has been carried out in the Central Urals. In the mid-2000s, similar programmes were started in the Southern Urals. As a part of these monitoring programmes, the data on invertebrates in different types of biotopes, collected with different methods and in a different time intervals, continue to be gathered. Amongst the most well-studied groups of invertebrates are spiders and harvestmen whose communities are a convenient indicator of the environment. The data collected through these monitoring programmes can also be used to study natural local biodiversity.

New information

The dataset, presented here, includes information from a long-term monitoring programme for Araneae and Opiliones that inhabit grass stands of secondary dry meadows and litter of spruce-fir, aspen-birch and pine-birch forests in the Central and Southern Urals. The dataset (available from the GBIF network at https://www.gbif.org/dataset/e170dbd1-a67f-4514-841c-5296b290ca90) describes the assemblage structure of spiders and harvestmen (list of species and their abundance), age-sex composition and seasonal and inter-annual dynamics for two large areas in the southern taiga zone of the Ural Mountains. The dataset includes 1,351 samples, which correspond to 5,462 occurrences identified during 2004-2009, 2013 and 2018. In total, we collected 10,433 specimens, representing 178 species (36% of arachnofauna of the Urals), 115 genera (54%) and 23 families (100%). Most of the data (4,939 of 5,462 occurrences, 90%) were collected in the western macro-slope of the Ural Mountains (European part of Russia), the rest in the eastern macro-slope (Asian part). All represented data were sampled in industrially undisturbed areas and are used as a local reference for ecotoxicological monitoring. The dataset provides new useful information for recording the state of biodiversity for the Central and Southern Urals and contributes to the study of biodiversity conservation.",2020-10-08 +33527888,Life expectancy inequalities in Hungary over 25 years: The role of avoidable deaths.,"Using mortality registers and administrative data on income and population, we develop new evidence on the magnitude of life expectancy inequality in Hungary and the scope for health policy in mitigating this. We document considerable inequalities in life expectancy at age 45 across settlement-level income groups, and show that these inequalities have increased between 1991-96 and 2011-16 for both men and women. We show that avoidable deaths play a large role in life expectancy inequality. Income-related inequalities in health behaviours, access to care, and healthcare use are all closely linked to the inequality in life expectancy.Supplementary material for this article is available at: https://doi.org/10.1080/00324728.2021.1877332.",2021-02-02 +34347531,Fine Particulate Matter and Dementia Incidence in the Adult Changes in Thought Study.,"

Background

Air pollution may be associated with elevated dementia risk. Prior research has limitations that may affect reliability, and no studies have evaluated this question in a population-based cohort of men and women in the United States.

Objectives

We evaluated the association between time-varying, 10-y average fine particulate matter (PM2.5) exposure and hazard of all-cause dementia. An additional goal was to understand how to adequately control for age and calendar-time-related confounding through choice of the time axis and covariate adjustment.

Methods

Using the Adult Changes in Thought (ACT) population-based prospective cohort study in Seattle, we linked spatiotemporal model-based PM2.5 exposures to participant addresses from 1978 to 2018. Dementia diagnoses were made using high-quality, standardized, consensus-based protocols at biennial follow-ups. We conducted multivariable Cox proportional hazards regression to evaluate the association between time-varying, 10-y average PM2.5 exposure and time to event in a model with age as the time axis, stratified by apolipoprotein E (APOE) genotype, and adjusted for sex, education, race, neighborhood median household income, and calendar time. Alternative models used calendar time as the time axis.

Results

We report 1,136 cases of incident dementia among 4,166 individuals with nonmissing APOE status. Mean [mean ± standard deviation (SD)] 10-y average PM2.5 was 10.1 (±2.9) μg/m3. Each 1-μg/m3 increase in the moving average of 10-y PM2.5 was associated with a 16% greater hazard of all-cause dementia [1.16 (95% confidence interval: 1.03, 1.31)]. Results using calendar time as the time axis were similar.

Discussion

In this prospective cohort study with extensive exposure data and consensus-based outcome ascertainment, elevated long-term exposure to PM2.5 was associated with increased hazard of all-cause dementia. We found that optimal control of age and time confounding could be achieved through use of either age or calendar time as the time axis in our study. Our results strengthen evidence on the neurodegenerative effects of PM2.5. https://doi.org/10.1289/EHP9018.",2021-08-04 +25638809,SPARQL-enabled identifier conversion with Identifiers.org.,"

Motivation

On the semantic web, in life sciences in particular, data is often distributed via multiple resources. Each of these sources is likely to use their own International Resource Identifier for conceptually the same resource or database record. The lack of correspondence between identifiers introduces a barrier when executing federated SPARQL queries across life science data.

Results

We introduce a novel SPARQL-based service to enable on-the-fly integration of life science data. This service uses the identifier patterns defined in the Identifiers.org Registry to generate a plurality of identifier variants, which can then be used to match source identifiers with target identifiers. We demonstrate the utility of this identifier integration approach by answering queries across major producers of life science Linked Data.

Availability and implementation

The SPARQL-based identifier conversion service is available without restriction at http://identifiers.org/services/sparql.",2015-01-31 +32660628,Autologous cell-free serum preparations in the management of knee osteoarthritis: what is the current clinical evidence?,"

Background

There is paucity in the current literature regarding clinical outcomes of autologous cell-free serum preparations. The objective of this paper is to collate the clinical evidence and review the results of intraarticular injections of autologous cell-free serum preparations in the management of knee osteoarthritis (OA).

Methods

A comprehensive English literature search was undertaken using the healthcare database website (https://hdas.nice.org.uk/). The PubMed, Medline, CINAHL, Embase and the Cochrane library databases were searched to identify all studies of autologous protein solution/autologous conditioned serum (ACS/APS) in the management of knee OA. We evaluated the reported clinical outcomes with respect to pain, function, morbidity, adverse effects and complications.

Results

Fifteen relevant articles were identified in the current literature. Outcomes following injection of ACS/APS have been reported in patients with age range (34-87 years) and unilateral or bilateral knee OA. Seven studies reported improvement in visual analog scale (VAS) whereas the Western Ontario and McMaster Universities osteoarthritis instrument (WOMAC) score improved in nine studies. Considerable variation was noted in the injection technique and duration of post-procedure assessment with only one study reporting long-term follow-up beyond 24 months. Joint swelling and injection-site pain were reported to be the most common complications; only one study reported a case of septic arthritis. However, no evidence is available to clearly identify factors that may predict the outcomes following this procedure.

Conclusion

Current data from the clinical studies would suggest that the intraarticular administration of autologous cell-free serum preparations, such as ACS/APS, in patients with knee OA may improve pain and function, with limited morbidity. High-quality clinical trials with stratified patient cohorts, longer follow-up duration and robust reporting of outcome measures are essential to improve the understanding of the indications and clinical effectiveness of these novel products.",2020-03-23 +28381244,DisBind: A database of classified functional binding sites in disordered and structured regions of intrinsically disordered proteins.,"

Background

Intrinsically unstructured or disordered proteins function via interacting with other molecules. Annotation of these binding sites is the first step for mapping functional impact of genetic variants in coding regions of human and other genomes, considering that a significant portion of eukaryotic genomes code for intrinsically disordered regions in proteins.

Results

DisBind (available at http://biophy.dzu.edu.cn/DisBind ) is a collection of experimentally supported binding sites in intrinsically disordered proteins and proteins with both structured and disordered regions. There are a total of 226 IDPs with functional site annotations. These IDPs contain 465 structured regions (ORs) and 428 IDRs according to annotation by DisProt. The database contains a total of 4232 binding residues (from UniProt and PDB structures) in which 2836 residues are in ORs and 1396 in IDRs. These binding sites are classified according to their interacting partners including proteins, RNA, DNA, metal ions and others with 2984, 258, 383, 350, and 262 annotated binding sites, respectively. Each entry contains site-specific annotations (structured regions, intrinsically disordered regions, and functional binding regions) that are experimentally supported according to PDB structures or annotations from UniProt.

Conclusion

The searchable DisBind provides a reliable data resource for functional classification of intrinsically disordered proteins at the residue level.",2017-04-05 +26138588,SmedGD 2.0: The Schmidtea mediterranea genome database.,"Planarians have emerged as excellent models for the study of key biological processes such as stem cell function and regulation, axial polarity specification, regeneration, and tissue homeostasis among others. The most widely used organism for these studies is the free-living flatworm Schmidtea mediterranea. In 2007, the Schmidtea mediterranea Genome Database (SmedGD) was first released to provide a much needed resource for the small, but growing planarian community. SmedGD 1.0 has been a depository for genome sequence, a draft assembly, and related experimental data (e.g., RNAi phenotypes, in situ hybridization images, and differential gene expression results). We report here a comprehensive update to SmedGD (SmedGD 2.0) that aims to expand its role as an interactive community resource. The new database includes more recent, and up-to-date transcription data, provides tools that enhance interconnectivity between different genome assemblies and transcriptomes, including next-generation assemblies for both the sexual and asexual biotypes of S. mediterranea. SmedGD 2.0 (http://smedgd.stowers.org) not only provides significantly improved gene annotations, but also tools for data sharing, attributes that will help both the planarian and biomedical communities to more efficiently mine the genomics and transcriptomics of S. mediterranea.",2015-07-17 +34032963,Mental Health Challenges Related to Neoliberal Capitalism in the United States.,"Rates of mental illness have increased dramatically over the past 15 years in the United States [Products-Data Briefs-Number 283-August 2017. Centers for Disease Control and Prevention. https://www.cdc.gov/nchs/products/databriefs/db283.htm . Published August 15, 2017]. Additionally, life expectancy has fallen over the past several years due to increases in death from suicide, opioid overdose, and alcoholic liver cirrhosis as reported by Case and Deaton [Deaths of despair and the future of capitalism. Princeton University Press, 2020]. Over the last decade some have questioned whether these changes are due to neoliberal capitalist policies and ideologies. Neoliberal capitalism incorporates theories of eliminating all restrictions on the market and decreasing government assistance programs as reported by Harvey [A brief history of neoliberalism, Oxford University Press, 2005]. Since then these policies have led to income inequality, disempowerment of workers, outsourcing of manufacturing jobs, inadequate social services, mass incarceration and an expensive and ineffective healthcare system as reported by Case and Deaton [Deaths of despair and the future of capitalism. Princeton University Press, 2020] and Nkansah-Amankra et al. [International Journal of Health Services 43(2):217-240, 2013]. Studies have shown that the consequences of these policies and ideologies likely have a role in increasing rates of mental illness. This paper will discuss how these factors increase mental distress and postulate ways that mental health professionals can advocate for change.",2021-05-25 +,Iron Physiological Requirements of Pregnant Women Assessed by the Stable Isotope Tracer Technique (P24-062-19),"Abstract

Objectives

Iron physiological requirement, the core index to formulate the dietary reference intakes (DRIs), is of great importance for the health of pregnant women and fetus and can help the mother to accurately plan iron supplement. While the direct measurement data of iron physiological requirements in pregnancy is still lacking worldwide, the objective of this study is to assess this value in Chinese pregnant women by stable isotope tracer technique.

Methods

11 women of reproductive age who had been pregnant during the study from Hebei province in 2015 were included in the final analysis. Subjects were participated in a two-week metabolic test with oral intake of 50 mg 58Fe and followed up to about 2 years. The abundance of 58Fe and the total iron concentration was detected by MC-ICP-MS and AAS. The iron physiological requirements in pregnancy were calculated by the formula.

Results

The average iron physiological requirement of 11 subjects in the whole pregnancy was 3054.12 µg/d in total and 43.99 µg/(kg.d) after weight adjusted. The iron physiological requirement in the first, second and third trimester was 2039.07 µg/d, 3258.90 µg/d and 4134.47 µg/d, respectively. The iron physiological requirement adjusted by weight in the first, second and third trimester was 32.28 µg/(kg.d), 46.90 µg/(kg.d) and 55.74 µg/(kg.d), respectively. Comparing the results of each pregnancy period, there was no significant difference (P > 0.05).

Conclusions

The pregnant women need more than 2 times iron over that of the non-pregnant. The iron physiological requirements in different trimester showed no significant difference, while there was an increasing trend of requirements went with the pregnancy progress.

Funding Sources

National Natural Science Foundation of China (https://isisn.nsfc.gov.cn/egrantindex/funcindex/prjsearch-list, grant no.:81,330,066)",2019-06-01 +27733503,"Ontobee: A linked ontology data server to support ontology term dereferencing, linkage, query and integration.","Linked Data (LD) aims to achieve interconnected data by representing entities using Unified Resource Identifiers (URIs), and sharing information using Resource Description Frameworks (RDFs) and HTTP. Ontologies, which logically represent entities and relations in specific domains, are the basis of LD. Ontobee (http://www.ontobee.org/) is a linked ontology data server that stores ontology information using RDF triple store technology and supports query, visualization and linkage of ontology terms. Ontobee is also the default linked data server for publishing and browsing biomedical ontologies in the Open Biological Ontology (OBO) Foundry (http://obofoundry.org) library. Ontobee currently hosts more than 180 ontologies (including 131 OBO Foundry Library ontologies) with over four million terms. Ontobee provides a user-friendly web interface for querying and visualizing the details and hierarchy of a specific ontology term. Using the eXtensible Stylesheet Language Transformation (XSLT) technology, Ontobee is able to dereference a single ontology term URI, and then output RDF/eXtensible Markup Language (XML) for computer processing or display the HTML information on a web browser for human users. Statistics and detailed information are generated and displayed for each ontology listed in Ontobee. In addition, a SPARQL web interface is provided for custom advanced SPARQL queries of one or multiple ontologies.",2016-10-12 +33465314,The Influence of Quantitative Intervention Dosage on Oral Language Outcomes for Children With Developmental Language Disorder: A Systematic Review and Narrative Synthesis.,"Purpose The aim of this study was to examine the degree to which quantitative aspects of dosage (dose, dose frequency, and total intervention duration) have been examined in intervention studies for children with developmental language disorder (DLD). Additionally, to establish the optimal quantitative dosage characteristics for phonology, vocabulary, and morphosyntax outcomes. Method This registered review (PROSPERO ID CRD42017076663) adhered to PRISMA guidelines. Search terms were included in seven electronic databases. We included peer-reviewed quasi-experimental, randomized controlled trial or cohort analytical studies, published in any language between January 2006 and May 2020. Included articles reported on participants with DLD (M = 3-18 years); oral language interventions with phonology, vocabulary, or morphosyntax outcomes; and experimental manipulation or statistical analysis of any quantitative aspect of dosage. Studies were appraised using the Cochrane risk-of-bias tool. Results Two hundred forty-four articles reported on oral language interventions with children with DLD in the domains of interest; 13 focused on experimentally/statistically manipulating quantitative aspects of dosage. No article reported phonological outcomes, three reported vocabulary, and eight reported morphosyntax. Dose frequency was the most common characteristic manipulated. Conclusions Research is in its infancy, and significant further research is required to inform speech-language pathologists in practice. Dosage characteristics are rarely adequately controlled for their individual effects to be identified. Findings to date suggest that there is a point in vocabulary and morphosyntax interventions after which there are diminishing returns from additional dosage. If dose is high (number of learning opportunities within a session), then the literature suggests that session frequency can be reduced. Frequent, short sessions (2/3 × per week, approximately 2 min) and less frequent, long sessions (1 × per week, approximately 20 min) have yielded the best outcomes when composite language measures have been used; however, replication and further research are required before clinicians can confidently integrate these findings into clinical practice. Supplemental Material https://doi.org/10.23641/asha.13570934.",2021-01-19 +31640730,NARD: whole-genome reference panel of 1779 Northeast Asians improves imputation accuracy of rare and low-frequency variants.,"Here, we present the Northeast Asian Reference Database (NARD), including whole-genome sequencing data of 1779 individuals from Korea, Mongolia, Japan, China, and Hong Kong. NARD provides the genetic diversity of Korean (n = 850) and Mongolian (n = 384) ancestries that were not present in the 1000 Genomes Project Phase 3 (1KGP3). We combined and re-phased the genotypes from NARD and 1KGP3 to construct a union set of haplotypes. This approach established a robust imputation reference panel for Northeast Asians, which yields the greatest imputation accuracy of rare and low-frequency variants compared with the existing panels. NARD imputation panel is available at https://nard.macrogen.com/ .",2019-10-22 +33588676,Adverse School Outcomes and Risky Sexual Health Behaviors among High School Students with E-Cigarette and Marijuana Use.,"

Background

While several health risks of e-cigarette and marijuana use have been described, little is known about their associations with school-related outcomes and risky sexual behaviors in adolescents. Objectives: To determine the odds of adverse school outcomes and risky sexual behaviors among youth with single or dual use of e-cigarettes and marijuana. Methods: We used data from the 2015 and 2017 waves of the Youth Risk Behavior Survey, a nationally representative survey of high school students in the US. Participants (N=30,389) were divided into four exposure groups for single or dual use of e-cigarettes and marijuana. We compared rates of e-cigarette and/or marijuana use for different demographic characteristics using chi-square tests and performed multivariate logistic regressions exploring associations among e-cigarette and marijuana use and adverse school outcomes and risky sexual behaviors adjusting for confounding factors. Results: Participants reported e-cigarette-only (7.7%), marijuana-only (8.5%), and dual e-cigarette/marijuana (9.2%) use. Youth in all three use categories had higher odds of reporting grades that were mostly C's or lower than youth with no use, but no difference was found between youth with e-cigarette-only vs marijuana-only use. Increased odds of having sex without a condom were seen in youth with marijuana-only use (vs. e-cigarette-only use or no use) but not in youth with e-cigarette-only use or dual use. Conclusions: We found increased odds of adverse school-related outcomes and contrasting sexual risk profiles among youth with single or dual e-cigarette and marijuana use.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1883659.",2021-02-15 +32392297,BIOMEX: an interactive workflow for (single cell) omics data interpretation and visualization.,"The amount of biological data, generated with (single cell) omics technologies, is rapidly increasing, thereby exacerbating bottlenecks in the data analysis and interpretation of omics experiments. Data mining platforms that facilitate non-bioinformatician experimental scientists to analyze a wide range of experimental designs and data types can alleviate such bottlenecks, aiding in the exploration of (newly generated or publicly available) omics datasets. Here, we present BIOMEX, a browser-based software, designed to facilitate the Biological Interpretation Of Multi-omics EXperiments by bench scientists. BIOMEX integrates state-of-the-art statistical tools and field-tested algorithms into a flexible but well-defined workflow that accommodates metabolomics, transcriptomics, proteomics, mass cytometry and single cell data from different platforms and organisms. The BIOMEX workflow is accompanied by a manual and video tutorials that provide the necessary background to navigate the interface and get acquainted with the employed methods. BIOMEX guides the user through omics-tailored analyses, such as data pretreatment and normalization, dimensionality reduction, differential and enrichment analysis, pathway mapping, clustering, marker analysis, trajectory inference, meta-analysis and others. BIOMEX is fully interactive, allowing users to easily change parameters and generate customized plots exportable as high-quality publication-ready figures. BIOMEX is open source and freely available at https://www.vibcancer.be/software-tools/biomex.",2020-07-01 +31640808,PGG.SNV: understanding the evolutionary and medical implications of human single nucleotide variations in diverse populations.,"Despite the tremendous growth of the DNA sequencing data in the last decade, our understanding of the human genome is still in its infancy. To understand the implications of genetic variants in the light of population genetics and molecular evolution, we developed a database, PGG.SNV ( https://www.pggsnv.org ), which gives much higher weight to previously under-investigated indigenous populations in Asia. PGG.SNV archives 265 million SNVs across 220,147 present-day genomes and 1018 ancient genomes, including 1009 newly sequenced genomes, representing 977 global populations. Moreover, estimation of population genetic diversity and evolutionary parameters is available in PGG.SNV, a unique feature compared with other databases.",2019-10-22 +31701148,JASPAR 2020: update of the open-access database of transcription factor binding profiles.,"JASPAR (http://jaspar.genereg.net) is an open-access database of curated, non-redundant transcription factor (TF)-binding profiles stored as position frequency matrices (PFMs) for TFs across multiple species in six taxonomic groups. In this 8th release of JASPAR, the CORE collection has been expanded with 245 new PFMs (169 for vertebrates, 42 for plants, 17 for nematodes, 10 for insects, and 7 for fungi), and 156 PFMs were updated (125 for vertebrates, 28 for plants and 3 for insects). These new profiles represent an 18% expansion compared to the previous release. JASPAR 2020 comes with a novel collection of unvalidated TF-binding profiles for which our curators did not find orthogonal supporting evidence in the literature. This collection has a dedicated web form to engage the community in the curation of unvalidated TF-binding profiles. Moreover, we created a Q&A forum to ease the communication between the user community and JASPAR curators. Finally, we updated the genomic tracks, inference tool, and TF-binding profile similarity clusters. All the data is available through the JASPAR website, its associated RESTful API, and through the JASPAR2020 R/Bioconductor package.",2020-01-01 +31665416,IMG-ABC v.5.0: an update to the IMG/Atlas of Biosynthetic Gene Clusters Knowledgebase.,"Microbial secondary metabolism is a reservoir of bioactive compounds of immense biotechnological and biomedical potential. The biosynthetic machinery responsible for the production of these secondary metabolites (SMs) (also called natural products) is often encoded by collocated groups of genes called biosynthetic gene clusters (BGCs). High-throughput genome sequencing of both isolates and metagenomic samples combined with the development of specialized computational workflows is enabling systematic identification of BGCs and the discovery of novel SMs. In order to advance exploration of microbial secondary metabolism and its diversity, we developed the largest publicly available database of predicted BGCs combined with experimentally verified BGCs, the Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters (IMG-ABC) (https://img.jgi.doe.gov/abc-public). Here we describe the first major content update of the IMG-ABC knowledgebase, since its initial release in 2015, refreshing the BGC prediction pipeline with the latest version of antiSMASH (v5) as well as presenting the data in the context of underlying environmental metadata sourced from GOLD (https://gold.jgi.doe.gov/). This update has greatly improved the quality and expanded the types of predicted BGCs compared to the previous version.",2020-01-01 +34034357,Talking about post-injury sexual functioning: The views of people with spinal cord injuries-A qualitative interview study.,"

Aim

This study aimed to explore perceptions of people with spinal cord injuries regarding the information they received during their rehabilitation programme on post-injury sexual functioning.

Background

Spinal cord injury is a traumatic, life-altering event that is associated with loss of motor and sensory function and sexual impairment. Existing evidence suggests that sexual issues are poorly handled during the rehabilitation phase of the patient's journey.

Design

A descriptive qualitative design was utilized in this study.

Methods

Twenty-nine people with spinal cord injury participated in qualitative in-depth interviews between November 2017 and April 2018, and data were analysed using the Burnard (1991, https://doi.org/10.1016/0260-6917(91)90009-y) thematic analysis framework.

Results

Some participants indicated they were sexually inactive prior to their spinal cord injury. They testified that they had not received information on post-injury sexual functioning. Many participants who received post-injury information on sexual functioning reported dissatisfaction with the content and timing of this information.

Conclusion

Personal conversations between spinal cord injured patients and dedicated members of the interdisciplinary health team can enhance the quality of rehabilitation care and patients' satisfaction with rehabilitation care. Nurses are central clinicians in the rehabilitation programme of spinal cord injured patients and should engage in individually designed conversations about post-injury sexual functioning.",2021-05-25 +32392295,CausalMGM: an interactive web-based causal discovery tool.,"High-throughput sequencing and the availability of large online data repositories (e.g. The Cancer Genome Atlas and Trans-Omics for Precision Medicine) have the potential to revolutionize systems biology by enabling researchers to study interactions between data from different modalities (i.e. genetic, genomic, clinical, behavioral, etc.). Currently, data mining and statistical approaches are confined to identifying correlates in these datasets, but researchers are often interested in identifying cause-and-effect relationships. Causal discovery methods were developed to infer such cause-and-effect relationships from observational data. Though these algorithms have had demonstrated successes in several biomedical applications, they are difficult to use for non-experts. So, there is a need for web-based tools to make causal discovery methods accessible. Here, we present CausalMGM (http://causalmgm.org/), the first web-based causal discovery tool that enables researchers to find cause-and-effect relationships from observational data. Web-based CausalMGM consists of three data analysis tools: (i) feature selection and clustering; (ii) automated identification of cause-and-effect relationships via a graphical model; and (iii) interactive visualization of the learned causal (directed) graph. We demonstrate how CausalMGM enables an end-to-end exploratory analysis of biomedical datasets, giving researchers a clearer picture of its capabilities.",2020-07-01 +32842359,[Integrated bioinformatics analysis of key genes in allergic rhinitis].,"Objective: To obtain biomarkers of allergic rhinitis (AR) by performing bioinformatics analysis on gene chips related to allergic rhinitis in the Gene Expression Database (GEO). Methods: From June 2018 to December 2019, we downloaded data (GSE46171) involving 3 control individuals and 6 AR patients from the publicallyavailable Gene Expression Omnibus database (GEO,http://www.ncbi.nlm.nih.gov/geo), and differentially expressed genes (DEGs) were screened between AR and normal tissues by using the GEO2R online tool comprehensively. Then, we used the bioinformatics methods, including Gene Ontology (GO) analysis and Kyoto Encyclopedia of Gene, Genome (KEGG) pathway analysis and protein-protein interaction (PPI) network construction to identify key genes in AR. In the same period, the inferior turbinate mucosa tissues of 15 AR patients and 15 healthy controls were collected during operationinthe Department of Otolaryngology Head and Neck Surgery of the People 's Hospital of Wuhan Universityto further verify important genes and pathways and perform real-time quantitative PCR.SPSS9.0 statistical software was used for statistical analysis. Results: Two hundred and seventeen DEGs genes were selected, of which 112 were down-regulated genes and 105 were up-regulated genes. Among them, the five up-regulated genes with the most significant differences were KLK7, TMPRSS11A, SPRR2C, TPSAB1, and TXLNGY; the five down-regulated genes with the most significant differences were: XIST, CTAG1A, PRB1, CXCL11 and PRB2. By constructing a PPI network among 217 DEGs, the 15 hub genes obtained were IFIH1, CCR2, CD80, TLR7, EIF1AY, DDX3Y, RSAD2, RPS4Y2, RPS4Y1, XAF1, KDM5D, ZFY, NLGN4Y, IFIT5 and DDX60L, these Genes were at a hub in a gene network. We collected inferior turbinate mucosa tissue during surgery,and these 15 genes were verified, and the expressions of IFIH1, CCR2, CD80, TLR7, RSAD2, XAF1, IFIT5 and DDX60L were reduced, wherea the expressions of EIF1AY, DDX3Y, RPS4Y2, RPS4Y1, KDM5D, ZFY and NLGN4Y were increased, differences were statistically significant (all P<0.05). Conclusions: The study finds 217 genes closely related to allergic rhinitis and obtains 15 hub genes through the PPI network. These genes may be involved in the pathogenesis of allergic rhinitis and are expected to become new biomarkers for allergic rhinitis.",2020-05-01 +30637945,"The Quetzal Coalescence template library: A C++ programmers resource for integrating distributional, demographic and coalescent models.","Genetic samples can be used to understand and predict the behaviour of species living in a fragmented and temporally changing environment. In this regard, models of coalescence conditioned to an environment through an explicit modelling of population growth and migration have been developed in recent years, and simulators implementing these models have been developed, enabling biologists to estimate parameters of interest with Approximate Bayesian Computation techniques. However, model choice remains limited, and developing new coalescence simulators is extremely time consuming because code re-use is limited. We present Quetzal, a C++ library composed of re-usable components, which is sufficiently general to efficiently implement a wide range of spatially explicit coalescence-based environmental models of population genetics and to embed the simulation in an Approximate Bayesian Computation framework. Quetzal is not a simulation program, but a toolbox for programming simulators aimed at the community of scientific coders and research software engineers in molecular ecology and phylogeography. This new code resource is open-source and available at https://becheler.github.io/pages/quetzal.html along with other documentation resources.",2019-05-01 +32570384,"Heimdall, a Computer Program for Electronic Health Records Data Visualization.","

Introduction

Electronic health records (EHR) comprehend structured and unstructured data, that are usually time dependent, enabling the use of timelines. However, it is often difficult to display all data without inducing information overload. In both clinical usual care and medical research, users should be able to quickly find relevant information, with minimal cognitive overhead. Our goal was to devise simple visualization techniques for handling medical data in both contexts.

Methods

An abstraction layer for structured EHR data was devised after an informal literature review and discussions between authors. The ""Heimdall"" prototype was developed. Two experts evaluated the tool by answering 5 questions on 24 clinical cases.

Results

Temporal data was abstracted in three simple types: events, states and measures, with appropriate visual representations for each type. Heimdall can load and display complex heterogeneous structured temporal data in a straightforward way. The main view can display events, states and measures along a shared timeline. Users can summarize data using temporal, hierarchical compression and filters. Default and custom views can be used to work in problem- oriented ways. The evaluation found conclusive results.

Conclusion

The ""Heimdall"" prototype provides a comprehensive and efficient graphical interface for EHR data visualization. It is open source, can be used with an R package, and is available at https://koromix.dev/files/R.",2020-06-01 +30718155,Utilising a Data Capture Tool to Populate a Cardiac Rehabilitation Registry: A Feasibility Study.,"BACKGROUND:Clinical registries are effective for monitoring clinical practice, yet manual data collection can limit their implementation and sustainability. The objective of this study was to assess the feasibility of using a data capture tool to collect cardiac rehabilitation (CR) minimum variables from electronic hospital administration databases to populate a new CR registry in Australia. METHODS:Two CR facilities located in Melbourne, Australia participated, providing data on 42 variables including: patient socio-demographics, risk factors and co-morbidities, CR program information (e.g. number of CR sessions), process indicators (e.g. wait time) and patient outcomes (e.g. change in exercise capacity). A pre-programmed, automated data capture tool (GeneRic Health Network Information for the Enterprise [20]: https://www.grhanite.com/) (GRHANITE™) was installed at the sites to extract data available in an electronic format from hospital sites. Additionally, clinicians entered data on CR patients into a purpose-built web-based tool (Research Electronic Data Capture: https://www.project-redcap.org/) (REDCap). Formative evaluation including staff feedback was collected. RESULTS:The GRHANITE™ tool was successfully installed at the two CR sites and data from 176 patients (median age = 67 years, 76% male) were securely extracted between September-December 2017. Data pulled electronically from hospital databases was limited to seven of the 42 requested variables. This is due to CR sites only capturing basic patient information (e.g. socio-demographics, CR appointment bookings) in hospital administrative databases. The remaining clinical information required for the CR registry was collected in formats (e.g. paper-based, scanned or Excel spreadsheet) deemed unusable for electronic data capture. Manually entered data into the web-tool enabled data collection on all remaining variables. Compared to historical methods of data collection, CR staff reported that the REDCap tool reduced data entry time. CONCLUSIONS:The key benefits of a scalable, automated data capture tool like GRHANITE™ cannot be fully realised in settings with under-developed electronic health infrastructure. While this approach remains promising for creating and maintaining a registry that monitors the quality of CR provided to patients, further investment is required in the digital platforms underpinning this approach.",2019-01-28 +32889703,Six- and Twelve-Month Follow-up Results of a Cluster Randomized Controlled Trial of a CBT-Based Group Course.,"In the current study, we examined the durability of intervention gains over a 6- and 12-month follow-up period after the implementation of a CBT-based group intervention ""Adolescent Coping with Depression Course"" (ACDC) for adolescents with subclinical or mild-to-moderate depression. Data were collected from 228 youth, 133 of whom were allocated to the 14-week ACDC intervention and 95 to the usual care (UC) control condition. Analyses for the main outcome variable of depressive symptoms were performed using a random effects repeated measures piecewise growth model to estimate trajectory shape over time on an intention-to-treat basis. Results revealed that the reduction in depressive symptoms achieved during the intervention phase continued across the follow-up period for both ACDC and UC (i.e., depressive symptoms showed a significantly decreasing trend in both groups in intervention and follow-up phases); however, no differential effects between conditions were found during the follow-up phase. The direct and indirect effects of the intervention on the other outcome variables' follow-up results were also presented. ISRCTN registry ISRCTN19700389. Registered 6 October 2015. https://doi.org/10.1186/ISRCTN19700389 . Full Protocol: https://doi.org/10.1186/s12888-016-0954-y.",2021-05-01 +32393982,iPiDi-PUL: identifying Piwi-interacting RNA-disease associations based on positive unlabeled learning. ,"Accumulated researches have revealed that Piwi-interacting RNAs (piRNAs) are regulating the development of germ and stem cells, and they are closely associated with the progression of many diseases. As the number of the detected piRNAs is increasing rapidly, it is important to computationally identify new piRNA-disease associations with low cost and provide candidate piRNA targets for disease treatment. However, it is a challenging problem to learn effective association patterns from the positive piRNA-disease associations and the large amount of unknown piRNA-disease pairs. In this study, we proposed a computational predictor called iPiDi-PUL to identify the piRNA-disease associations. iPiDi-PUL extracted the features of piRNA-disease associations from three biological data sources, including piRNA sequence information, disease semantic terms and the available piRNA-disease association network. Principal component analysis (PCA) was then performed on these features to extract the key features. The training datasets were constructed based on known positive associations and the negative associations selected from the unknown pairs. Various random forest classifiers trained with these different training sets were merged to give the predictive results via an ensemble learning approach. Finally, the web server of iPiDi-PUL was established at http://bliulab.net/iPiDi-PUL to help the researchers to explore the associated diseases for newly discovered piRNAs.",2021-05-01 +31500643,OPA1: 516 unique variants and 831 patients registered in an updated centralized Variome database.,"BACKGROUND:The dysfunction of OPA1, a dynamin GTPase involved in mitochondrial fusion, is responsible for a large spectrum of neurological disorders, each of which includes optic neuropathy. The database dedicated to OPA1 ( https://www.lovd.nl/OPA1 ), created in 2005, has now evolved towards a centralized and more reliable database using the Global Variome shared Leiden Open-source Variation Database (LOVD) installation. RESULTS:The updated OPA1 database, which registers all the patients from our center as well as those reported in the literature, now covers a total of 831 patients: 697 with isolated dominant optic atrophy (DOA), 47 with DOA ""plus"", and 83 with asymptomatic or unclassified DOA. It comprises 516 unique OPA1 variants, of which more than 80% (414) are considered pathogenic. Full clinical data for 118 patients are documented using the Human Phenotype Ontology, a standard vocabulary for referencing phenotypic abnormalities. Contributors may now make online submissions of phenotypes related to OPA1 mutations, giving clinical and molecular descriptions together with detailed ophthalmological and neurological data, according to an international thesaurus. CONCLUSIONS:The evolution of the OPA1 database towards the LOVD, using unified nomenclature, should ensure its interoperability with other databases and prove useful for molecular diagnoses based on gene-panel sequencing, large-scale mutation statistics, and genotype-phenotype correlations.",2019-09-10 +31106372,Yvis: antibody high-density alignment visualization and analysis platform with an integrated database.,"As antibodies are a very important tool for diagnosis, therapy, and experimental biology, a large number of antibody structures and sequences have become available in recent years. Therefore, tools that allow the analysis, comparison, and visualization of this large amount of antibody data are crucially needed. We developed the antibody high-density alignment visualization and analysis (Yvis) platform to provide an innovative, robust and high-density data visualization of antibody sequence alignments, called Collier de Diamants. The Yvis platform also provides an integrated structural database, which is updated weekly, and many different search and filter options. This platform can help to formulate hypotheses concerning the key residues in antibody structures or interactions to improve the understanding of antibody properties. The Yvis platform is available at http://bioinfo.icb.ufmg.br/yvis/.",2019-07-01 +32685972,FoldRec-C2C: protein fold recognition by combining cluster-to-cluster model and protein similarity network. ,"As a key for studying the protein structures, protein fold recognition is playing an important role in predicting the protein structures associated with COVID-19 and other important structures. However, the existing computational predictors only focus on the protein pairwise similarity or the similarity between two groups of proteins from 2-folds. However, the homology relationship among proteins is in a hierarchical structure. The global protein similarity network will contribute to the performance improvement. In this study, we proposed a predictor called FoldRec-C2C to globally incorporate the interactions among proteins into the prediction. For the FoldRec-C2C predictor, protein fold recognition problem is treated as an information retrieval task in nature language processing. The initial ranking results were generated by a surprised ranking algorithm Learning to Rank, and then three re-ranking algorithms were performed on the ranking lists to adjust the results globally based on the protein similarity network, including seq-to-seq model, seq-to-cluster model and cluster-to-cluster model (C2C). When tested on a widely used and rigorous benchmark dataset LINDAHL dataset, FoldRec-C2C outperforms other 34 state-of-the-art methods in this field. The source code and data of FoldRec-C2C can be downloaded from http://bliulab.net/FoldRec-C2C/download.",2021-05-01 +31208323,Laboratory information management software for engineered mini-protein therapeutic workflow.,"

Background

Protein based therapeutics are one of the fastest growing classes of novel medical interventions in areas such as cancer, infectious disease, and inflammation. Protein engineering plays an important role in the optimization of desired therapeutic properties such as reducing immunogenicity, increasing stability for storage, increasing target specificity, etc. One category of protein therapeutics is nature-inspired bioengineered cystine-dense peptides (CDPs) for various biological targets. These engineered proteins are often further modified by synthetic chemistry. For example, candidate mini-proteins can be conjugated into active small molecule drugs. We refer to modified mini-proteins as ""Optides"" (Optimized peptides). To efficiently serve the multidisciplinary lab scientists with varied therapeutic portfolio research goals in a non-commercial setting, a cost effective extendable laboratory information management system (LIMS) is/was needed.

Results

We have developed a LIMS named Optide-Hunter for a generalized engineered protein compounds workflow that tracks entities and assays from creation to preclinical experiments. The implementation and custom modules are built using LabKey server, which is an Open Source platform for scientific data integration and analysis. Optide-Hunter contains a compound registry, in-silico assays, high throughput production, large-scale production, in vivo assays and data extraction from a specimen-tracking database. It is used to store, extract, and view data for various therapeutics projects. Optide-Hunter also includes external processing stand-alone software (HPLCPeakClassifierApp) for automated chromatogram classification. The HPLCPeakClassifierApp is used for pre-processing of HPLC data prior to loading to Optide-Hunter. The custom implementation is done using data transformation modules in R, SQL, javascript, and java and is Open Source to assist new users in customizing it for their unique workflows. Instructions for exploring a deployed version of Optide-Hunter can be found at https://www.labkey.com/case%20study/optide-hunter CONCLUSION: The Optide-Hunter LIMS system is designed and built to track the process of engineering, producing and prioritizing protein therapeutic candidates. It can be easily adapted and extended for use in small or large research laboratories where multidisciplinary scientists are collaborating to engineer compounds for potential therapeutic or protein science applications. Open Source exploration of Optide-Hunter can help any bioinformatics scientist adapt, extend, and deploy an equivalent system tailored to each laboratory's workflow.",2019-06-17 +32639365,A transcriptional toolbox for exploring peripheral neuroimmune interactions.,"

Abstract

Correct communication between immune cells and peripheral neurons is crucial for the protection of our bodies. Its breakdown is observed in many common, often painful conditions, including arthritis, neuropathies, and inflammatory bowel or bladder disease. Here, we have characterised the immune response in a mouse model of neuropathic pain using flow cytometry and cell-type-specific RNA sequencing (RNA-seq). We found few striking sex differences, but a very persistent inflammatory response, with increased numbers of monocytes and macrophages up to 3 1/2 months after the initial injury. This raises the question of whether the commonly used categorisation of pain into ""inflammatory"" and ""neuropathic"" is one that is mechanistically appropriate. Finally, we collated our data with other published RNA-seq data sets on neurons, macrophages, and Schwann cells in naive and nerve injury states. The result is a practical web-based tool for the transcriptional data mining of peripheral neuroimmune interactions. http://rna-seq-browser.herokuapp.com/.",2020-09-01 +34029472,Thermodynamics-Based Molecular Modeling of α-Helices in Membranes and Micelles.,"The Folding of Membrane-Associated Peptides (FMAP) method was developed for modeling α-helix formation by linear peptides in micelles and lipid bilayers. FMAP 2.0 identifies locations of α-helices in the amino acid sequence, generates their three-dimensional models in planar bilayers or spherical micelles, and estimates their thermodynamic stabilities and tilt angles, depending on temperature and pH. The method was tested for 723 peptides (926 data points) experimentally studied in different environments and for 170 single-pass transmembrane (TM) proteins with available crystal structures. FMAP 2.0 detected more than 95% of experimentally observed α-helices with an average error in helix end determination of around 2, 3, 4, and 5 residues per helix for peptides in water, micelles, bilayers, and TM proteins, respectively. Helical and nonhelical residue states were predicted with an accuracy from 0.86 to 0.96, and the Matthews correlation coefficient was from 0.64 to 0.88 depending on the environment. Experimental micelle- and membrane-binding energies and tilt angles of peptides were reproduced with a root-mean-square deviation of around 2 kcal/mol and 7°, respectively. The TM and non-TM states of hydrophobic and pH-triggered α-helical peptides in various lipid bilayers were reproduced in more than 95% of cases. The FMAP 2.0 web server (https://membranome.org/fmap) is publicly available to explore the structural polymorphism of antimicrobial, cell-penetrating, fusion, and other membrane-binding peptides, which is important for understanding the mechanisms of their biological activities.",2021-05-24 +32145010,nanoTRON: a Picasso module for MLP-based classification of super-resolution data.,"MOTIVATION:Classification of images is an essential task in higher-level analysis of biological data. By bypassing the diffraction limit of light, super-resolution microscopy opened up a new way to look at molecular details using light microscopy, producing large amounts of data with exquisite spatial detail. Statistical exploration of data usually needs initial classification, which is up to now often performed manually. RESULTS:We introduce nanoTRON, an interactive open-source tool, which allows super-resolution data classification based on image recognition. It extends the software package Picasso with the first deep learning tool with a graphic user interface. AVAILABILITY AND IMPLEMENTATION:nanoTRON is written in Python and freely available under the MIT license as a part of the software collection Picasso on GitHub (http://www.github.com/jungmannlab/picasso). All raw data can be obtained from the authors upon reasonable request. CONTACT:jungmann@biochem.mpg.de. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-06-01 +33677518,DeepS: A web server for image optical sectioning and super resolution microscopy based on a deep learning framework. ,"Microscopy technology plays important roles in many biological research fields. Solvent-cleared brain high-resolution (HR) 3 D image reconstruction is an important microscopy application. However, 3 D microscopy image generation is time-consuming and expensive. Therefore, we have developed a deep learning framework (DeepS) for both image optical sectioning and super resolution microscopy. Using DeepS to perform super resolution solvent-cleared mouse brain microscopy 3 D image yields improved performance in comparison with the standard image processing workflow. We have also developed a web server to allow online usage of DeepS. Users can train their own models with only one pair of training images using the transfer learning function of the web server. http://deeps.cibr.ac.cn. Supplementary data are available at Bioinformatics online.",2021-03-02 +32301980,"Galaxy HiCExplorer 3: a web server for reproducible Hi-C, capture Hi-C and single-cell Hi-C data analysis, quality control and visualization.","The Galaxy HiCExplorer provides a web service at https://hicexplorer.usegalaxy.eu. It enables the integrative analysis of chromosome conformation by providing tools and computational resources to pre-process, analyse and visualize Hi-C, Capture Hi-C (cHi-C) and single-cell Hi-C (scHi-C) data. Since the last publication, Galaxy HiCExplorer has been expanded considerably with new tools to facilitate the analysis of cHi-C and to provide an in-depth analysis of Hi-C data. Moreover, it supports the analysis of scHi-C data by offering a broad range of tools. With the help of the standard graphical user interface of Galaxy, presented workflows, extensive documentation and tutorials, novices as well as Hi-C experts are supported in their Hi-C data analysis with Galaxy HiCExplorer.",2020-07-01 +30101316,Processing of big heterogeneous genomic datasets for tertiary analysis of Next Generation Sequencing data.,"

Motivation

We previously proposed a paradigm shift in genomic data management, based on the Genomic Data Model (GDM) for mediating existing data formats and on the GenoMetric Query Language (GMQL) for supporting, at a high level of abstraction, data extraction and the most common data-driven computations required by tertiary data analysis of Next Generation Sequencing datasets. Here, we present a new GMQL-based system with enhanced accessibility, portability, scalability and performance.

Results

The new system has a well-designed modular architecture featuring: (i) an intermediate representation supporting many different implementations (including Spark, Flink and SciDB); (ii) a high-level technology-independent repository abstraction, supporting different repository technologies (e.g., local file system, Hadoop File System, database or others); (iii) several system interfaces, including a user-friendly Web-based interface, a Web Service interface, and a programmatic interface for Python language. Biological use case examples, using public ENCODE, Roadmap Epigenomics and TCGA datasets, demonstrate the relevance of our work.

Availability and implementation

The GMQL system is freely available for non-commercial use as open source project at: http://www.bioinformatics.deib.polimi.it/GMQLsystem/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +,O1.8. UNDERSTANDING THE HERITABILITY OF SCHIZOPHRENIA,"Abstract

Background

Schizophrenia is highly heritable as shown by MZ/DZ twin comparisons and is calculated to be at about 80%, although if familial transmission were Mendelian, it would be considered 100%. Since the field of genetics has produced the needed technology to examine DNA at the molecular level (beginning in the 1980’s), researchers have attempted to find a major gene for schizophrenia, without success. Once it was realized that the most likely underlying genetic mechanism is multiple gene variants of small individual effect, it was realized that large sample sizes would be needed to detect them. Thus, international collaborations were organized, the most important being the Psychiatric Genomics Consortium (PGC), now over 900 investigators having accumulated over 100,000 individual DNA samples.

Methods

The most current analyses of these data comparing patients with schizophrenia to controls has now yielded over 250 genome-wide significant loci producing modest elevation in the risk for schizophrenia (unpublished data presented at the World Congress of Psychiatric Genetics 10/18; https://www.med.unc.edu/pgc/pgc-workgroups). However, when taken together the heritability due to these loci or SNPs (SNP-Based Heritability) is calculated to be only about 23% (Lee et al., 2012). The difference between the overall heritability and the contribution of modest effect loci is thus 57%. This latter figure is the so called “missing heritability and likely due to mutations of large affect that segregate with illness within some families. Recently we performed a sequencing study of families multiply affected with schizophrenia.

Results

We found mutations present in all affecteds and no unaffecteds in 7 of 9 families (Homann et al., 2016). These mutations were unique to each family but were within genes relevant to neuronal differentiation and migration, such as the SHANK-2 gene in one large family. Other investigators have similar findings in different families.

Discussion

In summary, the genetic basis for schizophrenia is heterogeneous, while some individuals inherit a large number of common risk genes, others inherit mutations that are of high risk within their individual families. This heterogeneity makes it unlikely that molecular genetics will be useful for prediction of who will develop schizophrenia, but on the other hand, classes of genes involved may lead to novel drug targets for developing future medications. References: Lee SH, DeCandia TR, Ripke S, Yang J; Schizophrenia Psychiatric Genome-Wide Association Study Consortium (PGC-SCZ); International Schizophrenia Consortium (ISC); Molecular Genetics of Schizophrenia Collaboration (MGS), Sullivan PF, Goddard ME, Keller MC, Visscher PM, Wray NR. Estimating the proportion of variation in susceptibility to schizophrenia captured by common SNPs. Nat Genet. 2012 Feb 19;44(3):247–50. doi: 10.1038/ng.1108. Homann OR, Misura K, Lamas E, Sandrock RW, Nelson P, McDonough SI, DeLisi LE. Whole-genome sequencing in multiplex families with psychoses reveals mutations in the SHANK2 and SMARCA1 genes segregating with illness..Mol Psychiatry. 2016; 21: 1690–1695. doi: 10.1038/mp.2016.24. PMID: 27001614",2019-04-01 +34338552,"Long-Term Exposure to Road Traffic Noise and Air Pollution, and Incident Atrial Fibrillation in the Danish Nurse Cohort.","

Background

Associations between long-term exposure to air pollution and road traffic noise have been established for ischemic heart disease, but findings have been mixed for atrial fibrillation (AF).

Objectives

The goal of the study was to examine associations of long-term exposure to road traffic noise and air pollution with AF.

Methods

Time-varying Cox regression models were used to estimate associations of 1-, 3-, and 23-y mean road traffic noise and air pollution exposures with AF incidence in 23,528 women enrolled in the Danish Nurse Cohort (age >44y at baseline in 1993 or 1999). AF diagnoses were ascertained via the Danish National Patient Register. Annual mean weighted 24-h average road traffic noise levels (Lden) at the nurses' residences, since 1970, were estimated using the Nord2000 model, and annual mean levels of particulate matter with a diameter <2.5μm (PM2.5) and nitrogen dioxide (NO2) were estimated using the DEHM/UBM/AirGIS model.

Results

Of 23,528 nurses with no prior AF diagnosis at the cohort baseline, 1,522 developed AF during follow-up. In a fully adjusted model (including PM2.5), the estimated risk of AF was 18% higher [hazard ratio (HR); 95% confidence interval (CI): 1.18; 1.02, 1.36] in nurses with residential 3-y mean Lden levels >58 dB vs. <48 dB, with similar findings for 1-y mean exposures. A 3.9-μg/m3 increase in 3-y mean PM2.5 was associated with incident AF before and after adjustment for concurrent exposure to road traffic noise (HR 1.09; 95% CI: 1.00, 1.20 and 1.08; 95% CI: 0.97, 1.19, respectively). Associations with 1-y mean PM2.5 exposures were positive but closer to the null and not significant. Associations with NO2 were null for all time periods before and after adjustment for road traffic noise and inverse when adjusted for concurrent PM2.5.

Conclusion

Our analysis of prospective data from a cohort of Danish female nurses followed for up to 14 y provided suggestive evidence of independent associations between incident AF and 1- and 3-y exposures to road traffic noise and PM2.5. https://doi.org/10.1289/EHP8090.",2021-08-02 +33982118,Serum Erythroferrone During Pregnancy Is Related to Erythropoietin but Does Not Predict the Risk of Anemia.,"

Background

Maintaining adequate iron status during pregnancy is important for the mother and her developing fetus. Iron homeostasis is influenced by 3 regulatory hormones: erythropoietin (EPO), hepcidin, and erythroferrone (ERFE). To date, normative data on ERFE across pregnancy and its relations to other hormones and iron status indicators are limited.

Objectives

The objective of this study was to characterize maternal ERFE across pregnancy and at delivery and evaluate the utility of hepcidin, ERFE, and EPO in identifying women with increased iron needs.

Methods

ERFE was measured in extant serum samples collected from 2 longitudinal cohorts composed of women carrying multiple fetuses (n = 79) and pregnant adolescents (n = 218) at midgestation (∼26 wk) and delivery (∼39 wk). Receiver operating characteristic curves were generated to characterize the predictive ability of serum ERFE, hepcidin, and EPO and their ratios to identify women at increased risk of iron deficiency and anemia.

Results

In these pregnant women, mean ERFE was 0.48 ng/mL at both ∼25 wk of gestation and at delivery. ERFE was positively associated with EPO at midgestation (β = 0.14, P = 0.002, n = 202) and delivery (β = 0.12, P < 0.001, n = 225) but was not significantly associated with maternal hepcidin at any time point surveyed. Of all hormones measured at midgestation and delivery, EPO was best able to identify women with anemia (AUC: 0.86 and 0.75, respectively) and depleted iron stores (AUC: 0.77 and 0.84), whereas the hepcidin-to-EPO ratio was best able to identify women with iron deficiency anemia (AUC: 0.85 and 0.84).

Conclusions

Maternal ERFE was significantly associated with EPO but was not able to identify women with gestational iron deficiency. At term, the hepcidin-to-EPO ratio, an index that accounts for both iron status and erythropoietic demand, and EPO were the strongest indicators of maternal iron deficiency and anemia. This trial was registered at clinicaltrials.gov as NCT04517734 (https://clinicaltrials.gov/ct2/show/NCT04517734).",2021-07-01 +31375602,Impact of maternal education on response to lifestyle interventions to reduce gestational weight gain: individual participant data meta-analysis.,"

Objectives

To identify if maternal educational attainment is a prognostic factor for gestational weight gain (GWG), and to determine the differential effects of lifestyle interventions (diet based, physical activity based or mixed approach) on GWG, stratified by educational attainment.

Design

Individual participant data meta-analysis using the previously established International Weight Management in Pregnancy (i-WIP) Collaborative Group database (https://iwipgroup.wixsite.com/collaboration). Preferred Reporting Items for Systematic reviews and Meta-Analysis of Individual Participant Data Statement guidelines were followed.

Data sources

Major electronic databases, from inception to February 2017.

Eligibility criteria

Randomised controlled trials on diet and physical activity-based interventions in pregnancy. Maternal educational attainment was required for inclusion and was categorised as higher education (≥tertiary) or lower education (≤secondary).

Risk of bias

Cochrane risk of bias tool was used.

Data synthesis

Principle measures of effect were OR and regression coefficient.

Results

Of the 36 randomised controlled trials in the i-WIP database, 21 trials and 5183 pregnant women were included. Women with lower educational attainment had an increased risk of excessive (OR 1.182; 95% CI 1.008 to 1.385, p =0.039) and inadequate weight gain (OR 1.284; 95% CI 1.045 to 1.577, p =0.017). Among women with lower education, diet basedinterventions reduced risk of excessive weight gain (OR 0.515; 95% CI 0.339 to 0.785, p = 0.002) and inadequate weight gain (OR 0.504; 95% CI 0.288 to 0.884, p=0.017), and reduced kg/week gain (B -0.055; 95% CI -0.098 to -0.012, p=0.012). Mixed interventions reduced risk of excessive weight gain for women with lower education (OR 0.735; 95% CI 0.561 to 0.963, p=0.026). Among women with high education, diet based interventions reduced risk of excessive weight gain (OR 0.609; 95% CI 0.437 to 0.849, p=0.003), and mixed interventions reduced kg/week gain (B -0.053; 95% CI -0.069 to -0.037,p<0.001). Physical activity based interventions did not impact GWG when stratified by education.

Conclusions

Pregnant women with lower education are at an increased risk of excessive and inadequate GWG. Diet based interventions seem the most appropriate choice for these women, and additional support through mixed interventions may also be beneficial.",2019-08-01 +34737066,Ground truth generalizability affects performance of the artificial intelligence model in automated vertebral fracture detection on plain lateral radiographs of the spine.,"

Background context

Computer-aided diagnosis with artificial intelligence (AI) has been used clinically, and ground truth generalizability is important for AI performance in medical image analyses. The AI model was trained on one specific group of older adults (aged≧60) has not yet been shown to work equally well in a younger adult group (aged 18-59).

Purpose

To compare the performance of the developed AI model with ensemble method trained with the ground truth for those aged 60 years or older in identifying vertebral fractures (VFs) on plain lateral radiographs of spine (PLRS) between younger and older adult populations.

Study design/setting

Retrospective analysis of PLRS in a single medical institution.

Outcome measures

Accuracy, sensitivity, specificity, and interobserver reliability (kappa value) were used to compare diagnostic performance of the AI model and subspecialists' consensus between the two groups.

Methods

Between January 2016 and December 2018, the ground truth of 941 patients (one PLRS per person) aged 60 years and older with 1101 VFs and 6358 normal vertebrae was used to set up the AI model. The framework of the developed AI model includes: object detection with You Only Look Once Version 3 (YOLOv3) at T0-L5 levels in the PLRS, data pre-preprocessing with image-size and quality processing, and AI ensemble model (ResNet34, DenseNet121, and DenseNet201) for identifying or grading VFs. The reported overall accuracy, sensitivity and specificity were 92%, 91% and 93%, respectively, and external validation was also performed. Thereafter, patients diagnosed as VFs and treated in our institution during October 2019 to August 2020 were the study group regardless of age. In total, 258 patients (339 VFs and 1725 normal vertebrae) in the older adult population (mean age 78±10.4; range, 60-106) were enrolled. In the younger adult population (mean age 36±9.43; range, 20-49), 106 patients (120 VFs and 728 normal vertebrae) were enrolled. After identification and grading of VFs based on the Genant method with consensus between two subspecialists', VFs in each PLRS with human labels were defined as the testing dataset. The corresponding CT or MRI scan was used for labeling in the PLRS. The bootstrap method was applied to the testing dataset.

Results

The model for clinical application, Digital Imaging and Communications in Medicine (DICOM) format, is uploaded directly (available at: http://140.113.114.104/vght_demo/svf-model (grading) and http://140.113.114.104/vght demo/svf-model2 (labeling). Overall accuracy, sensitivity and specificity in the older adult population were 93.36% (95% CI 93.34%-93.38%), 88.97% (95% CI 88.59%-88.99%) and 94.26% (95% CI 94.23%-94.29%), respectively. Overall accuracy, sensitivity and specificity in the younger adult population were 93.75% (95% CI 93.7%-93.8%), 65.00% (95% CI 64.33%-65.67%) and 98.49% (95% CI 98.45%-98.52%), respectively. Accuracy reached 100% in VFs grading once the VFs were labeled accurately. The unique pattern of limbus-like VFs, 43 (35.8%) were investigated only in the younger adult population. If limbus-like VFs from the dataset were not included, the accuracy increased from 93.75% (95% CI 93.70%-93.80%) to 95.78% (95% CI 95.73%-95.82%), sensitivity increased from 65.00% (95% CI 64.33%-65.67%) to 70.13% (95% CI 68.98%-71.27%) and specificity remained unchanged at 98.49% (95% CI 98.45%-98.52%), respectively. The main causes of false negative results in older adults were patients' lung markings, diaphragm or bowel airs (37%, n=14) followed by type I fracture (29%, n=11). The main causes of false negatives in younger adults were limbus-like VFs (45%, n=19), followed by type I fracture (26%, n=11). The overall kappa between AI discrimination and subspecialists' consensus in the older and younger adult populations were 0.77 (95% CI, 0.733-0.805) and 0.72 (95% CI, 0.6524-0.80), respectively.

Conclusions

The developed VF-identifying AI ensemble model based on ground truth of older adults achieved better performance in identifying VFs in older adults and non-fractured thoracic and lumbar vertebrae in the younger adults. Different age distribution may have potential disease diversity and implicate the effect of ground truth generalizability on the AI model performance.",2021-11-01 +33508080,Associations between Maternal Dietary Patterns and Perinatal Outcomes: A Systematic Review and Meta-Analysis of Cohort Studies.,"The aim was to systematically review and meta-analyze prospective cohort studies investigating the relation between maternal dietary patterns during pregnancy with pregnancy and birth outcomes. PubMed, Scopus, and ISI Web of Science were searched from inception until October 2019 for eligible studies. Studies reporting relative risk, ORs, or incidences (for binary data) or means ± SDs or B-coefficients (for continuous outcomes) comparing the highest and lowest adherence with maternal dietary patterns were included. Dietary patterns were categorized as ""healthy,"" ""unhealthy,"" or ""mixed."" No language restrictions were applied. Study-specific effect sizes with SEs for outcomes of interest were pooled using a random-effects model. Quality of evidence was assessed using Grading of Recommendations Assessment, Development, and Evaluation (GRADE). Sixty-six relevant publications were included. A higher maternal adherence to a healthy diet was associated with a reduced risk of gestational hypertension (14%, P < 0.001), maternal depression (40%, P = 0.004), low birth weight (28%, P = 0.001), preterm birth (56%, P < 0.001), higher gestational weight gain (Hedges' g: 0.15; P = 0.01), and birth weight (Hedges' g: 0.19; P = 0.007). Higher maternal adherence to an unhealthy or a mixed diet was associated with higher odds of gestational hypertension (23%, P < 0.001 for unhealthy, and 8%, P = 0.01 for mixed diet). In stratified analyses, a higher healthy eating index was associated with reduced odds of being large based on gestational age (31%, P = 0.02) and a higher head circumference at birth (0.23 cm, P = 0.02). The Mediterranean and ""prudent"" dietary patterns were related to lower odds of being small based on gestational age (46%, P = 0.04) and preterm birth (52%, P = 0.03), respectively. The overall GRADE quality of the evidence for most associations was low or very low, indicating that future high-quality research is warranted. This study was registered at http://www.crd.york.ac.uk/PROSPERO as CRD42018089756.",2021-07-01 +33756618,"WCO-Lite version 1.1: an online nomenclatural catalogue of harvestmen of the world (Arachnida, Opiliones) curated in TaxonWorks.","The ""World Catalogue of Opiliones"" (WCO) is a collaborative effort to comprehensively index the Earth's species of harvestmen. This paper announces one component of the WCO, ""WCO-Lite"" a website available at https://wcolite.com/. WCO-Lite provides a graphic user interface for a second component of the WCO, ""Opiliones of the World"", a database on the taxonomy of the harvestmen curated in TaxonWorks (TW). WCO-Lite interfaces include: (1) a checklist of all valid taxa of the arachnid Opiliones, exhaustive up to December 2018; (2) a taxonomic tree; (3) a search engine comprising two modules; and (4) a counter of species diversity for each taxon. An e-Book companion was launched simultaneously with WCO-Lite version 1.1 on September 12, 2020 to account for the formal publication of mandatory nomenclatural changes and availability of taxonomic names. The collective components of the WCO are also being summarized in a forthcoming conventional paper-form catalogue, currently in manuscript stage.",2021-01-15 +32664861,Seamless integration of image and molecular analysis for spatial transcriptomics workflows.,"

Background

Recent advancements in in situ gene expression technologies constitute a new and rapidly evolving field of transcriptomics. With the recent launch of the 10x Genomics Visium platform, such methods have started to become widely adopted. The experimental protocol is conducted on individual tissue sections collected from a larger tissue sample. The two-dimensional nature of this data requires multiple consecutive sections to be collected from the sample in order to construct a comprehensive three-dimensional map of the tissue. However, there is currently no software available that lets the user process the images, align stacked experiments, and finally visualize them together in 3D to create a holistic view of the tissue.

Results

We have developed an R package named STUtility that takes 10x Genomics Visium data as input and provides features to perform standardized data transformations, alignment of multiple tissue sections, regional annotation, and visualizations of the combined data in a 3D model framework.

Conclusions

STUtility lets the user process, analyze and visualize multiple samples of spatially resolved RNA sequencing and image data from the 10x Genomics Visium platform. The package builds on the Seurat framework and uses familiar APIs and well-proven analysis methods. An introduction to the software package is available at https://ludvigla.github.io/STUtility_web_site/ .",2020-07-14 +32663247,TE-greedy-nester: structure-based detection of LTR retrotransposons and their nesting.,"

Motivation

Transposable elements (TEs) in eukaryotes often get inserted into one another, forming sequences that become a complex mixture of full-length elements and their fragments. The reconstruction of full-length elements and the order in which they have been inserted is important for genome and transposon evolution studies. However, the accumulation of mutations and genome rearrangements over evolutionary time makes this process error-prone and decreases the efficiency of software aiming to recover all nested full-length TEs.

Results

We created software that uses a greedy recursive algorithm to mine increasingly fragmented copies of full-length LTR retrotransposons in assembled genomes and other sequence data. The software called TE-greedy-nester considers not only sequence similarity but also the structure of elements. This new tool was tested on a set of natural and synthetic sequences and its accuracy was compared to similar software. We found TE-greedy-nester to be superior in a number of parameters, namely computation time and full-length TE recovery in highly nested regions.

Availability and implementation

http://gitlab.fi.muni.cz/lexa/nested.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +34746714,Novel genetic variants associated with mortality after unrelated donor allogeneic hematopoietic cell transplantation.,"

Background

Identification of non-human leukocyte antigen (HLA) genetic risk factors could improve survival after allogeneic blood or marrow transplant (BMT) through matching at additional loci or individualizing risk prediction. We hypothesized that non-HLA loci contributed significantly to 1-year overall survival (OS), disease related mortality (DRM) or transplant related mortality (TRM) after unrelated donor (URD)BMT.

Methods

We performed a genome-wide association study (GWAS) in 2,887 acute myeloid leukemia (AML), myelodysplastic syndrome (MDS) and acute lymphoblastic leukemia (ALL) patients and their ≥8/8 HLA-matched URDs comprising two independent cohorts treated from 2000-2011.

Findings

Using meta-analyses of both cohorts, genome-wide significant associations (p < 5 × 10-8) were identified in: recipient genomes with OS at MBNL1 (rs9990017, HR = 1.4, 95% CI 1.24-1.56, p = 3.3 × 10-8) and donor-recipient genotype mismatch with OS at LINC02774 (rs10927108, HR = 1.34, 95% CI 1.21-1.48, p = 2.0 × 10-8); donor genomes with DRM at PCNX4 (rs79076914, HR = 1.7, 95% CI 1.41-2.05, p = 3.15 × 10-8), LINC01194 (rs79498125, HR = 1.86, 95% CI 1.49-2.31, p = 2.84 × 10-8), ARID5B (rs2167710, HR = 1.5, 95% CI 1.31-1.73, p = 6.9 × 10-9) and CT49 (rs32250, HR = 1.44, 95% CI1.26-1.64, p = 2.6 × 10-8); recipient genomes at PILRB with TRM (rs141591562, HR = 2.33, 95% CI 1.74-3.12, p = 1.26 × 10-8) and donor-recipient genotype mismatch between EPGN and MTHF2DL with TRM (rs75868097, HR = 2.66, 95% CI 1.92-3.58, p = 4.6 × 10-9). Results publicly available at https://fuma.ctglab.nl/browse.

Interpretation

These data provide the first evidence that non-HLA common genetic variation at novel loci with biochemical function significantly impacts 1-year URD-BMT survival. Our findings have implications for donor selection, could guide treatment strategies and provide individualized risk prediction after future validation and functional studies.

Funding

This project was funded by grants from the National Institutes of Health, USA.",2021-08-25 +30596994,MoMo: discovery of statistically significant post-translational modification motifs.,"

Motivation

Post-translational modifications (PTMs) of proteins are associated with many significant biological functions and can be identified in high throughput using tandem mass spectrometry. Many PTMs are associated with short sequence patterns called 'motifs' that help localize the modifying enzyme. Accordingly, many algorithms have been designed to identify these motifs from mass spectrometry data. Accurate statistical confidence estimates for discovered motifs are critically important for proper interpretation and in the design of downstream experimental validation.

Results

We describe a method for assigning statistical confidence estimates to PTM motifs, and we demonstrate that this method provides accurate P-values on both simulated and real data. Our methods are implemented in MoMo, a software tool for discovering motifs among sets of PTMs that we make available as a web server and as downloadable source code. MoMo re-implements the two most widely used PTM motif discovery algorithms-motif-x and MoDL-while offering many enhancements. Relative to motif-x, MoMo offers improved statistical confidence estimates and more accurate calculation of motif scores. The MoMo web server offers more proteome databases, more input formats, larger inputs and longer running times than the motif-x web server. Finally, our study demonstrates that the confidence estimates produced by motif-x are inaccurate. This inaccuracy stems in part from the common practice of drawing 'background' peptides from an unshuffled proteome database. Our results thus suggest that many of the papers that use motif-x to find motifs may be reporting results that lack statistical support.

Availability and implementation

The MoMo web server and source code are provided at http://meme-suite.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +32786838,Visible Post-Data Analysis Protocol for Natural Mycotoxin Production.,"Fungal natural products are routinely analyzed using target detection protocols by comparing to commercial standards. However, discovery of new products suffers from a lack of high-throughput analytical techniques. Post-data process techniques have become popular tools for natural product confirmations and mycotoxin family analysis. In this work, a visible post-data process procedure with MZmine, GNPS, and Xcalibur was used for efficient analysis of high-resolution mass spectrometry. Conjugated products were screened with an optimized diagnostic fragmentation filtering module in MZmine and further confirmed with Xcalibur by comparing to unconjugated commercial standards. MS/MS spectral data were processed and used to establish a feature based on a molecular networking map in GNPS (Global Natural Products Social Molecular Networking; https://gnps.ucsd.edu), for visualization of fungal natural product families. The results demonstrate the potential of combining MZmine-, GNPS-, and Xcalibur-based methods for visible analysis of fungal natural products.",2020-08-21 +33598546,Development and Validation of Three Regional Microsimulation Models for Predicting Colorectal Cancer Screening Benefits in Europe.,"Background. Validated microsimulation models have been shown to be useful tools in providing support for colorectal cancer (CRC) screening decisions. Aiming to assist European countries in reducing CRC mortality, we developed and validated three regional models for evaluating CRC screening in Europe. Methods. Microsimulation Screening Analysis-Colon (MISCAN-Colon) model versions for Italy, Slovenia, and Finland were quantified using data from different national institutions. These models were validated against the best available evidence for the effectiveness of screening from their region (when available): the Screening for COlon REctum (SCORE) trial and the Florentine fecal immunochemical test (FIT) screening study for Italy; the Norwegian Colorectal Cancer Prevention (NORCCAP) trial and the guaiac fecal occult blood test (gFOBT) Finnish population-based study for Finland. When published evidence was not available (Slovenia), the model was validated using cancer registry data. Results. Our three models reproduced age-specific CRC incidence rates and stage distributions in the prescreening period. Moreover, the Italian and Finnish models replicated CRC mortality reductions (reasonably) well against the best available evidence. CRC mortality reductions were predicted slightly larger than those observed (except for the Florentine FIT study), but consistently within the corresponding 95% confidence intervals. Conclusions. Our findings corroborate the MISCAN-Colon reliability in supporting decision making on CRC screening. Furthermore, our study provides the model structure for an additional tool (EU-TOPIA CRC evaluation tool: http://miscan.eu-topia.org) that aims to help policymakers and researchers monitoring or improving CRC screening in Europe.",2021-01-29 +31681276,Tumor-Infiltrating Immune Cells Act as a Marker for Prognosis in Colorectal Cancer.,"Tumor-infiltrating immune cells (TIICs) play essential roles in cancer development and progression. However, the association of TIICs with prognosis in colorectal cancer (CRC) patients remains elusive. Infiltration of TIICs was assessed using ssGSEA and CIBERSORT tools. The association of TIICs with prognosis was analyzed in 1,802 CRC data downloaded from the GEO (https://www.ncbi.nlm.nih.gov/geo/) and TCGA (https://portal.gdc.cancer.gov/) databases. Three populations of TIICs, including CD66b+ tumor-associated neutrophils (TANs), FoxP3+ Tregs, and CD163+ tumor-associated macrophages (TAMs) were selected for immunohistochemistry (IHC) validation analysis in 1,008 CRC biopsies, and their influence on clinical features and prognosis of CRC patients was analyzed. Prognostic models were constructed based on the training cohort (359 patients). The models were further tested and verified in testing (249 patients) and validation cohorts (400 patients). Based on ssGSEA and CIBERSORT analysis, the correlation between TIICs and CRC prognosis was inconsistent in different datasets. Moreover, the results with disease-free survival (DFS) and overall survival (OS) data in the same dataset also differed. The high abundance of TIICs found by ssGSEA or CIBERSORT tools can be used for prognostic evaluation effectively. IHC results showed that TANs, Tregs, TAMs were significantly correlated with prognosis in CRC patients and were independent prognostic factors (P DFS ≤ 0.001; P OS ≤ 0.023). The prognostic predictive models were constructed based on the numbers of TANs, Tregs, TAMs (C-indexDFS&OS = 0.86; AICDFS = 448.43; AICOS = 184.30) and they were more reliable than traditional indicators for evaluating prognosis in CRC patients. Besides, TIICs may affect the response to chemotherapy. In conclusion, TIICs were correlated with clinical features and prognosis in patients with CRC and thus can be used as markers.",2019-10-17 +33444113,"CALR-ETdb, the database of calreticulin variants diversity in essential thrombocythemia.","Essential thrombocythemia (ET) is a blood cancer defined by a strong increase of platelet numbers. A quarter of patients suffering from ET show mutations in the last exon of calreticulin (CALR) gene. Two variants named type 1 and type 2 represent 85% of these patients. However, a large number of other variants have been determined. In this study, we have compiled variants taken from COSMIC database and literature leading to 155 different variants. This large number of variants allowed redefining 5 new classes extending the classification of type 1-like and type 2-like to a finer description. These analyses showed that last class, named E, corresponding to more than 10% of CALR variants seemed not attached to ET. Structural properties analyzed showed that CALR variants associated to ET have common features. All the compiled and refined information had been included into a freely dedicated database CALR-ETdb (https://www.dsimb.inserm.fr/CALR-ET).",2021-01-14 +29155231,RTFAdb: A database of computationally predicted associations between retrotransposons and transcription factors in the human and mouse genomes.,"In recent years, retrotransposons have gained increasing attention as a source of binding motifs for transcription factors (TFs). Despite the substantial roles of these mobile genetic elements in the regulation of gene expression, a comprehensive resource enabling the investigation of retrotransposon species that are bound by TFs is still lacking. Herein, I introduce for the first time a novel database called RTFAdb, which allows exploring computationally predicted associations between retrotransposons and TFs in diverse cell lines and tissues of human and mouse. My database, using over 3.000 TF ChIP-seq binding profiles collected from human and mouse samples, makes possible searching more than 1.500 retrotransposon species in the binding sites of a total of 596 TFs. RTFAdb is freely available at http://tools.ibg.deu.edu.tr/rtfa/ and has the potential to offer novel insights into mammalian transcriptional networks by providing an additional layer of information regarding the regulatory roles of retrotransposons.",2017-11-17 +34310282,FloorLevel-Net: Recognizing Floor-Level Lines With Height-Attention-Guided Multi-Task Learning.,"The ability to recognize the position and order of the floor-level lines that divide adjacent building floors can benefit many applications, for example, urban augmented reality (AR). This work tackles the problem of locating floor-level lines in street-view images, using a supervised deep learning approach. Unfortunately, very little data is available for training such a network - current street-view datasets contain either semantic annotations that lack geometric attributes, or rectified facades without perspective priors. To address this issue, we first compile a new dataset and develop a new data augmentation scheme to synthesize training samples by harassing (i) the rich semantics of existing rectified facades and (ii) perspective priors of buildings in diverse street views. Next, we design FloorLevel-Net, a multi-task learning network that associates explicit features of building facades and implicit floor-level lines, along with a height-attention mechanism to help enforce a vertical ordering of floor-level lines. The generated segmentations are then passed to a second-stage geometry post-processing to exploit self-constrained geometric priors for plausible and consistent reconstruction of floor-level lines. Quantitative and qualitative evaluations conducted on assorted facades in existing datasets and street views from Google demonstrate the effectiveness of our approach. Also, we present context-aware image overlay results and show the potentials of our approach in enriching AR-related applications. Project website: https://wumengyangok.github.io/Project/FloorLevelNet.",2021-01-01 +34244991,Decoding and comprehension skills mediate the link between a small-group reading programme and English national literacy assessments.,"

Background

Despite the fact that literacy instruction is a main focus of primary education, many children struggle to meet nationally set standards.

Aims

We aimed to test which components of a comprehensive reading programme (ABRACADABRA: https://eur03.safelinks.protection.

Outlook

com/?url=https%3A%2F%2Fdoi.org%2F10.1186%2FISRCTN18254678&data=04%7C01%7Cjanet.vousden%40ntu.ac.uk%7C880280e0b00749df855308d94068a0bb%7C8acbc2c5c8ed42c78169ba438a0dbe2f%7C1%7C0%7C637611640381216902%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=%2B4U9sGfofkyCPEY7lWz8n3TPoMOAeJMXyFwdhW6EpUw%3D&reserved=0) mediated the effect of the programme on nationally assessed literacy outcomes.

Sample

Following blind allocation, 516 Year 1 pupils from 40 schools were randomized to the programme group, and 908 Year 1 pupils, to a control condition.

Methods

Pupils in the programme completed 20 weeks of instruction in grapheme/phoneme knowledge, decoding, and comprehension. Control children received regular classroom instruction.

Results

Children in the programme group were significantly better at these taught skills after the programme finished (effect sizes: grapheme/phoneme knowledge, β = .33, 95% CI [0.09-0.57]; decoding, β = .26, 95% CI [0.09-0.43]; and comprehension, β = .26, 95% CI [0.05-0.47]). Improvements in the programme group's decoding and comprehension skills fully mediated the improvements in national literacy assessments serving as a delayed post-test 12 months after the programme. Programme group pupils were 2.3 (95% CI [1.4-4.1]) times more likely to achieve/exceed the expected standard in reading, and 1.8 (95% CI [1.2-2.6]) times more likely to achieve/exceed the expected standard in writing due to an increase in the trained skills.

Conclusions

These results provide strong evidence that a programme that incorporates decoding and comprehension instruction for typically developing beginning readers improves distal educational outcomes in reading and writing through increasing proficiencies targeted by the reading programme.",2021-07-10 +33443662,Prognostic model for patients with advanced cancer using a combination of routine blood test values.,"

Purpose

The purpose of this study was to develop a simple prognostic model based on objective indicators alone, i.e., routine blood test data, without using any subjective variables such as patient's symptoms and physician's prediction.

Methods

The subjects of this retrospective study were patients at the palliative care unit of Tohoku University Hospital, Japan. Eligible patients were over 20 years old and had advanced cancer (n = 225). The model for predicting survival was developed based on Cox proportional hazards regression models for univariable and multivariable analyses of 20 items selected from routine blood test data. All the analyses were performed according to the TRIPOD statement ( https://www.tripod-statement.org/ ).

Results

The univariable and multivariable regression analyses identified total bilirubin, creatinine, urea/creatinine ratio, aspartate aminotransferase, albumin, total leukocyte count, differential lymphocyte count, and platelet/lymphocyte ratio as significant risk factors for mortality. Based on the hazard ratios, the area under the curve for the new risk model was 0.87 for accuracy, 0.83 for sensitivity, and 0.74 for specificity. Diagnostic accuracy was higher than provided by the Palliative Prognostic Score and the Palliative Prognostic Index. The Kaplan-Meier analysis demonstrated a survival significance of classifying patients according to their score into low-, medium-, and high-mortality risk groups having median survival times of 67 days, 34 days, and 11 days, respectively (p < 0.001).

Conclusions

We developed a simple and accurate prognostic model for predicting the survival of patients with advanced cancer based on routine blood test values alone that may be useful for appropriate advanced care planning in a palliative care setting.",2021-01-14 +32221846,Aspirin Versus Clopidogrel Monotherapy for the Secondary Prevention of Recurrent Cerebrovascular Attack Following Previous Ischemic Stroke in Patients with Type 2 Diabetes Mellitus: A Systematic Review and Meta-Analysis.,"

Introduction

Type 2 diabetes mellitus (T2DM) and stroke are two different diseases, but have many aspects in common. Aspirin is recommended as an initial treatment for the secondary prevention of recurrent ischemic stroke in patients with T2DM. However, clopidogrel is an oral antiplatelet drug that might be another choice in case of aspirin intolerance. In this analysis, we aimed to systematically compare aspirin versus clopidogrel monotherapy for the secondary prevention of recurrent cerebrovascular attack following previous ischemic stroke in patients with T2DM.

Methods

Online medical databases including Web of Science, MEDLINE, Cochrane central, EMBASE and http://www.ClinicalTrials.com were searched for published articles that satisfied the inclusion and exclusion criteria of this study. Recurrent stroke, fatal stroke, cerebral hemorrhage, myocardial infarction and mortality were considered the main end points in these patients with T2DM. RevMan 5.3 software was used to statistically analyze the data representing each subgroup. Risk ratios (RRs) with 95% confidence intervals (CIs) were used to represent the results following analysis.

Results

A total of 9218 participants with T2DM who were previously affected by ischemic stroke were included in this analysis, whereby 4917 were assigned to aspirin and 4301 to clopidogrel. This current analysis showed that there was no significant difference in recurrent stroke rate (RR: 0.79, 95% CI: 0.61-1.02; P = 0.07) observed with aspirin versus clopidogrel in these patients with T2DM. The risk of fatal stroke (RR: 0.88, 95% CI: 0.39-1.98; P = 0.76), cerebral hemorrhage (RR: 0.65, 95% CI: 0.38-1.11; P = 0.12), myocardial infarction (RR: 0.88, 95% CI: 0.43-1.79; P = 0.71) and mortality (RR: 1.07, 95% CI: 0.90-1.27; P = 0.44) were also similarly manifested.

Conclusion

Clopidogrel monotherapy was neither inferior nor superior to aspirin monotherapy for the secondary prevention of recurrent cerebrovascular attack following previous ischemic stroke in patients with T2DM. Hence, clopidogrel or aspirin monotherapy is equally safe and effective in these patients with T2DM.",2020-03-27 +32904338,Vertical distributions of soil microbial biomass carbon: a global dataset.,"Soil microbial biomass carbon (SMBC) is important in regulating soil organic carbon (SOC) dynamics along soil profiles by mediating the decomposition and formation of SOC. The dataset (VDMBC) is about the vertical distributions of SOC, SMBC, and soil microbial quotient (SMQ = SMBC/SOC) and their relations to environmental factors across five continents. Data were collected from literature, with a total of 289 soil profiles and 1040 observations in different soil layers compiled. The associated environment data collectd include climate, ecosystem types, and edaphic factors. We developed this dataset by searching the Web of Sciene and the China National Knowledge Infrastructure from the year of 1970 to 2019. All the data in this dataset met two creteria: 1) there were at least three mineral soil layers along a soil profile, and 2) SMBC was measured using the fumigation extraction method. The data in tables and texts were obtained from literature directly, and the data in figures were extracted by using the GetData Graph digitizer software version 2.25. When climate and soil properties were not available from publications, we obtainted the data from the World Weather Information Service (https://worldweather.wmo.int/en/home.html) and SoilGrids at a spatial resolution of 250 meters (version 0.5.3, https://soilgrids.org). The units of all the variables were converted to the standard international units or commonly used ones and the values were transformed correspondingly. For example, the value of soil organic matter (SOM) was converted to SOC by using the equation (SOC = SOM × 0.58). This dataset can be used in predicting global SOC changes along soil profiles by using the multi-layer soil carbon models. It can also be used to analyse how soil microbial biomass changes with plant roots as well as the composition, structure, and functions of soil microbial communities along soil profiles at large spatial scales. This dataset offers opportunities to improve our prediction of SOC dynamics under global changes and to advance our understanding of the environmental controls.",2020-08-08 +30407520,Building a livestock genetic and genomic information knowledgebase through integrative developments of Animal QTLdb and CorrDB.,"Successful development of biological databases requires accommodation of the burgeoning amounts of data from high-throughput genomics pipelines. As the volume of curated data in Animal QTLdb (https://www.animalgenome.org/QTLdb) increases exponentially, the resulting challenges must be met with rapid infrastructure development to effectively accommodate abundant data curation and make metadata analysis more powerful. The development of Animal QTLdb and CorrDB for the past 15 years has provided valuable tools for researchers to utilize a wealth of phenotype/genotype data to study the genetic architecture of livestock traits. We have focused our efforts on data curation, improved data quality maintenance, new tool developments, and database co-developments, in order to provide convenient platforms for users to query and analyze data. The database currently has 158 499 QTL/associations, 10 482 correlations and 1977 heritability data as a result of an average 32% data increase per year. In addition, we have made >14 functional improvements or new tool implementations since our last report. Our ultimate goals of database development are to provide infrastructure for data collection, curation, and annotation, and more importantly, to support innovated data structure for new types of data mining, data reanalysis, and networked genetic analysis that lead to the generation of new knowledge.",2019-01-01 +29618542,AAPL Practice Resource for the Forensic Evaluation of Psychiatric Disability.,"Full Document: Anfang SA, Gold LH, Meyer DJ: AAPL practice resource for the forensic evaluation of psychiatric disability. Journal of the American Academy of Psychiatry and the Law Online Supplement 2018, 46 (1). Available at: http://www.jaapl.org/content/46/1_Supplement.",2018-03-01 +31405382,The bio.tools registry of software tools and data resources for the life sciences.,"Bioinformaticians and biologists rely increasingly upon workflows for the flexible utilization of the many life science tools that are needed to optimally convert data into knowledge. We outline a pan-European enterprise to provide a catalogue ( https://bio.tools ) of tools and databases that can be used in these workflows. bio.tools not only lists where to find resources, but also provides a wide variety of practical information.",2019-08-12 +34323617,A Data-Driven Transcriptional Taxonomy of Adipogenic Chemicals to Identify White and Brite Adipogens.,"

Background

Chemicals in disparate structural classes activate specific subsets of the transcriptional programs of peroxisome proliferator-activated receptor-γ (PPARγ) to generate adipocytes with distinct phenotypes.

Objectives

Our objectives were to a) establish a novel classification method to predict PPARγ ligands and modifying chemicals; and b) create a taxonomy to group chemicals on the basis of their effects on PPARγ's transcriptome and downstream metabolic functions. We tested the hypothesis that environmental adipogens highly ranked by the taxonomy, but segregated from therapeutic PPARγ ligands, would induce white but not brite adipogenesis.

Methods

3T3-L1 cells were differentiated in the presence of 76 chemicals (negative controls, nuclear receptor ligands known to influence adipocyte biology, potential environmental PPARγ ligands). Differentiation was assessed by measuring lipid accumulation. mRNA expression was determined by RNA-sequencing (RNA-Seq) and validated by reverse transcription-quantitative polymerase chain reaction. A novel classification model was developed using an amended random forest procedure. A subset of environmental contaminants identified as strong PPARγ agonists were analyzed by their effects on lipid handling, mitochondrial biogenesis, and cellular respiration in 3T3-L1 cells and human preadipocytes.

Results

We used lipid accumulation and RNA-Seq data to develop a classification system that a) identified PPARγ agonists; and b) sorted chemicals into likely white or brite adipogens. Expression of Cidec was the most efficacious indicator of strong PPARγ activation. 3T3-L1 cells treated with two known environmental PPARγ ligands, tetrabromobisphenol A and triphenyl phosphate, which sorted distinctly from therapeutic ligands, had higher expression of white adipocyte genes but no difference in Pgc1a and Ucp1 expression, and higher fatty acid uptake but not mitochondrial biogenesis. Moreover, cells treated with two chemicals identified as highly ranked PPARγ agonists, tonalide and quinoxyfen, induced white adipogenesis without the concomitant health-promoting characteristics of brite adipocytes in mouse and human preadipocytes.

Discussion

A novel classification procedure accurately identified environmental chemicals as PPARγ ligands distinct from known PPARγ-activating therapeutics.

Conclusion

The computational and experimental framework has general applicability to the classification of as-yet uncharacterized chemicals. https://doi.org/10.1289/EHP6886.",2021-07-29 +34183340,Cohort profile: the Westlake BioBank for Chinese (WBBC) pilot project.,"

Purpose

The Westlake BioBank for Chinese (WBBC) pilot cohort is a population-based prospective study with its major purpose to better understand the effect of genetic and environmental factors on growth and development from adolescents to adults.

Participants

A total of 14 726 participants (4751 males and 9975 females) aged 14-25 years were recruited and the baseline survey was carried out from 2017 to 2019. The pilot cohort contains rich range of information regarding of demographics and anthropometric measurements, lifestyle and sleep patterns, clinical and health outcomes. Visit the WBBC website for more information (https://wbbc.westlake.edu.cn/index.html).

Findings to date

The mean age of the study samples were 18.6 years for males and 18.5 years for females, respectively. The mean height and weight were 172.9 cm and 65.81 kg for males, and 160.1 cm and 52.85 kg for females. Results indicated that the prevalence of underweight in female was much higher than male, but the prevalence of overweight and obesity in female was lower than male. The mean serum 25(OH)D level in the 14 726 young participants was 22.4±5.3 ng/mL, and male had a higher level of serum 25(OH)D than female, overall, 33.5% of the participants had vitamin D deficiency and even more participants suffered from vitamin D insufficiency (58.2%). The proportion of deficiency in females was much higher than that in males (41.8 vs 16.4%). The issue of underweight and vitamin D deficiency in young people should be paid attention, especially in females. These results reflected the fact that thinness and paler skin are preferred in modern aesthetics of Chinese culture.

Future plans

WBBC pilot is designed as a prospective cohort study and provides a unique and rich data set analysing health trajectories from adolescents to young adults. WBBC will continue to collect samples with old age.",2021-06-28 +34181083,A detailed analysis of anatomical plausibility of crossed and uncrossed streamline rendition of the dentato-rubro-thalamic tract (DRT(T)) in a commercial stereotactic planning system.,"

Background

An increasing number of neurosurgeons use display of the dentato-rubro-thalamic tract (DRT) based on diffusion weighted imaging (dMRI) as basis for their routine planning of stimulation or lesioning approaches in stereotactic tremor surgery. An evaluation of the anatomical validity of the display of the DRT with respect to modern stereotactic planning systems and across different tracking environments has not been performed.

Methods

Distinct dMRI and anatomical magnetic resonance imaging (MRI) data of high and low quality from 9 subjects were used. Six subjects had repeated MRI scans and therefore entered the analysis twice. Standardized DICOM structure templates for volume of interest definition were applied in native space for all investigations. For tracking BrainLab Elements (BrainLab, Munich, Germany), two tensor deterministic tracking (FT2), MRtrix IFOD2 ( https://www.mrtrix.org ), and a global tracking (GT) approach were used to compare the display of the uncrossed (DRTu) and crossed (DRTx) fiber structure after transformation into MNI space. The resulting streamlines were investigated for congruence, reproducibility, anatomical validity, and penetration of anatomical way point structures.

Results

In general, the DRTu can be depicted with good quality (as judged by waypoints). FT2 (surgical) and GT (neuroscientific) show high congruence. While GT shows partly reproducible results for DRTx, the crossed pathway cannot be reliably reconstructed with the other (iFOD2 and FT2) algorithms.

Conclusion

Since a direct anatomical comparison is difficult in the individual subjects, we chose a comparison with two research tracking environments as the best possible ""ground truth."" FT2 is useful especially because of its manual editing possibilities of cutting erroneous fibers on the single subject level. An uncertainty of 2 mm as mean displacement of DRTu is expectable and should be respected when using this approach for surgical planning. Tractographic renditions of the DRTx on the single subject level seem to be still illusive.",2021-06-28 +33861422,Metastases from metastases: comparative metastatic potential of human cancer cell lines originated from primary tumors or metastases in various tissues.,"Although metastases from original (primary) tumors are highly studied, metastases from metastatic sites (secondary tumors) are far less studied. Here, using data from metastasis map (MetMap) project reported in a recent study (Jin et al. in Nature 588(7837): 331-336. https://doi.org/10.1038/s41586-020-2969-2 , 2020), we found that human cancer cell lines isolated from metastatic sites have higher potential to metastasize to another site in mice, compared to human cancer cell lines isolated from primary sites, for certain types of cancer including liver, lung and pancreas cancer. In contrast, for cancer types such as ovarian and skin cancer, human cancer cell lines originated from primary tumors have increased metastatic potential in mice, compared to human cancer cell lines originated from metastatic sites. This preliminary analysis points that the potential of metastases to further metastasize compared to that of primary tumors might be cancer type-dependent, and further research is needed to understand why certain cancer cell lines isolated from metastatic sites are more likely to spread to other organs.",2021-04-16 +32909142,Democratizing Access to Community-Based Survey Findings Through Dynamic Data Visualizations.,"OurStats ( https://www.cbrc.net/ourstats ) is a data visualization dashboard developed by the Community-Based Research Centre (CBRC) to increase access to data from the Sex Now surveys-Canada's largest community-based surveillance study of gay and bisexual men. An evaluation of the OurStats dashboard was conducted using an online survey distributed through the CBRC and Advance Alliance-an alliance of Canada's leading HIV and queer men's health organizations. Since being launched in November 2019 (through December 2019), 350 unique visitors used the OurStats Dashboard (5.8 per day). Based on responses from 10 community partners, all respondents said they would probably/definitely use OurStats again and would probably/definitely recommend it to colleagues; nine felt it was much/somewhat better than traditional academic outputs (e.g., poster presentations, journal articles); and seven felt it was much/somewhat better than traditional knowledge translation outputs (e.g., fliers, posters, and social media posts). Respondents said they would use OurStats to identify needs of gay and bisexual men (n = 9), prepare grant/funding applications (n = 9), prepare presentations about Sex Now data (n = 7), and evaluate the impact of local programs (n = 4). Overall, half felt that OurStats was somewhat/extremely easy to use and half felt that it was somewhat difficult to use. The most commonly identified requested improvement was to provide help documentation that explained how each of the display settings changed the visualizations. From these findings, we conclude that dynamic visualizations for community-based survey data are highly feasible and acceptable, provided appropriate support is available to help community partners use these tools.",2020-09-09 +30854545,Brain annotation toolbox: exploring the functional and genetic associations of neuroimaging results.,"

Motivation

Advances in neuroimaging and sequencing techniques provide an unprecedented opportunity to map the function of brain regions and identify the roots of psychiatric diseases. However, the results from most neuroimaging studies, i.e. activated clusters/regions or functional connectivities between brain regions, frequently cannot be conveniently and systematically interpreted, rendering the biological meaning unclear.

Results

We describe a brain annotation toolbox that generates functional and genetic annotations for neuroimaging results. The voxel-level functional description from the Neurosynth database and gene expression profile from the Allen Human Brain Atlas are used to generate functional/genetic information for region-level neuroimaging results. The validity of the approach is demonstrated by showing that the functional and genetic annotations for specific brain regions are consistent with each other; and further the region by region functional similarity network and genetic similarity network are highly correlated for major brain atlases. One application of brain annotation toolbox is to help provide functional/genetic annotations for newly discovered regions with unknown functions, e.g. the 97 new regions identified in the Human Connectome Project. Importantly, this toolbox can help understand differences between psychiatric patients and controls, and this is demonstrated using schizophrenia and autism data, for which the functional and genetic annotations for the neuroimaging changes in patients are consistent with each other and help interpret the results.

Availability and implementation

BAT is implemented as a free and open-source MATLAB toolbox and is publicly available at http://123.56.224.61:1313/post/bat.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +31424530,Multi-scale structural analysis of proteins by deep semantic segmentation.,"

Motivation

Recent advances in computational methods have facilitated large-scale sampling of protein structures, leading to breakthroughs in protein structural prediction and enabling de novo protein design. Establishing methods to identify candidate structures that can lead to native folds or designable structures remains a challenge, since few existing metrics capture high-level structural features such as architectures, folds and conformity to conserved structural motifs. Convolutional Neural Networks (CNNs) have been successfully used in semantic segmentation-a subfield of image classification in which a class label is predicted for every pixel. Here, we apply semantic segmentation to protein structures as a novel strategy for fold identification and structure quality assessment.

Results

We train a CNN that assigns each residue in a multi-domain protein to one of 38 architecture classes designated by the CATH database. Our model achieves a high per-residue accuracy of 90.8% on the test set (95.0% average per-class accuracy; 87.8% average per-structure accuracy). We demonstrate that individual class probabilities can be used as a metric that indicates the degree to which a randomly generated structure assumes a specific fold, as well as a metric that highlights non-conformative regions of a protein belonging to a known class. These capabilities yield a powerful tool for guiding structural sampling for both structural prediction and design.

Availability and implementation

The trained classifier network, parser network, and entropy calculation scripts are available for download at https://git.io/fp6bd, with detailed usage instructions provided at the download page. A step-by-step tutorial for setup is provided at https://goo.gl/e8GB2S. All Rosetta commands, RosettaRemodel blueprints, and predictions for all datasets used in the study are available in the Supplementary Information.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +26590264,MutationAligner: a resource of recurrent mutation hotspots in protein domains in cancer.,"The MutationAligner web resource, available at http://www.mutationaligner.org, enables discovery and exploration of somatic mutation hotspots identified in protein domains in currently (mid-2015) more than 5000 cancer patient samples across 22 different tumor types. Using multiple sequence alignments of protein domains in the human genome, we extend the principle of recurrence analysis by aggregating mutations in homologous positions across sets of paralogous genes. Protein domain analysis enhances the statistical power to detect cancer-relevant mutations and links mutations to the specific biological functions encoded in domains. We illustrate how the MutationAligner database and interactive web tool can be used to explore, visualize and analyze mutation hotspots in protein domains across genes and tumor types. We believe that MutationAligner will be an important resource for the cancer research community by providing detailed clues for the functional importance of particular mutations, as well as for the design of functional genomics experiments and for decision support in precision medicine. MutationAligner is slated to be periodically updated to incorporate additional analyses and new data from cancer genomics projects.",2015-11-20 +33972020,REP2: A Web Server to Detect Common Tandem Repeats in Protein Sequences.,"Ensembles of tandem repeats (TRs) in protein sequences expand rapidly to form domains well suited for interactions with proteins. For this reason, they are relatively frequent. Some TRs have known structures and therefore it is advantageous to predict their presence in a protein sequence. However, since most TRs diverge quickly, their detection by classical sequence comparison algorithms is not very accurate. Previously, we developed a method and a web server that used curated profiles and thresholds for the detection of 11 common TRs. Here we present a new web server (REP2) that allows the analysis of TRs in both individual and aligned sequences. We provide currently precomputed analyses for a selection of 78 UniProt reference proteomes. We illustrate how these data can be used to study the evolution of TRs using comparative genomics. REP2 can be accessed at http://cbdm-01.zdv.uni-mainz.de/~munoz/rep/.",2021-02-24 +30307528,A global transcriptomic pipeline decoding core network of genes involved in stages leading to acquisition of drug-resistance to cisplatin in osteosarcoma cells.,"

Motivation

Traditional cancer therapy is focused on eradicating fast proliferating population of tumor cells. However, existing evidences suggest survival of sub-population of cancer cells that can resist chemotherapy by entering a 'persister' state of minimal growth. These cells eventually survive to produce cells resistant to drugs. The identifying of appropriate targets that can eliminate the drug-tolerant 'persisters' remains a challenge. Hence, a deeper understanding of the distinctive genetic signatures that lead to resistance is of utmost importance to design an appropriate therapy.

Results

In this study, deep-sequencing of mRNA was performed in osteosarcoma (OS) cells, exposed to the widely used drug, cisplatin which is an integral part of current treatment regime for OS. Transcriptomic analysis was performed in (i) untreated OS; (ii) persister sub-population of cells post-drug shock; (iii) cells which evade growth bottleneck and (iv) drug-resistant cells obtained after several rounds of drug shock and revival. The transcriptomic signatures and pathways regulated in each group were compared; the transcriptomic pipeline to the acquisition of resistance was analyzed and the core network of genes altered during the process was delineated. Additionally, our transcriptomic data were compared with OS patient data obtained from Gene Ontology Omnibus. We observed a sub-set of genes to be commonly expressed in both data sets with a high correlation (0.81) in expression pattern. To the best of our knowledge, this study is uniquely designed to understand the series of genetic changes leading to the emergence of drug-resistant cells, and implications from this study have a potential therapeutic impact.

Availability and implementation

All raw data can be accessed from GEO database (https://www.ncbi.nlm.nih.gov/geo/) under the GEO accession number GSE86053.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +31406905,"KFuji RGB-DS database: Fuji apple multi-modal images for fruit detection with color, depth and range-corrected IR data.","This article contains data related to the research article entitle ""Multi-modal Deep Learning for Fruit Detection Using RGB-D Cameras and their Radiometric Capabilities"" [1]. The development of reliable fruit detection and localization systems is essential for future sustainable agronomic management of high-value crops. RGB-D sensors have shown potential for fruit detection and localization since they provide 3D information with color data. However, the lack of substantial datasets is a barrier for exploiting the use of these sensors. This article presents the KFuji RGB-DS database which is composed by 967 multi-modal images of Fuji apples on trees captured using Microsoft Kinect v2 (Microsoft, Redmond, WA, USA). Each image contains information from 3 different modalities: color (RGB), depth (D) and range corrected IR intensity (S). Ground truth fruit locations were manually annotated, labeling a total of 12,839 apples in all the dataset. The current dataset is publicly available at http://www.grap.udl.cat/publicacions/datasets.html.",2019-07-19 +34003326,Surgical Algorithms in Rhinoplasty: A Scoping Review of the Current Status.,"

Background

Algorithms define a sequence to approaching a subject. This study represents a scoping review seeking to define the role of surgical algorithms in rhinoplasty.

Methods

A scoping review was conducted. Pubmed/MEDLINE, Web of Science, and Google Scholar, as well as a citation searching was performed to find eligible studies for review. Eligibility criteria included studies published in English, human subjects ≥ 15 years old, and all included surgical algorithms were for primary cosmetic rhinoplasty.

Results

In total, 514 studies included the key search terms. Thirty-nine studies were evaluated for data extraction. The majority of the algorithms were from USA-based publications (22/39). Flow-chart type algorithm was used in 23 and text-based algorithms in 15 of the 39 studies. Algorithms related to tip shape and/or position were most frequent (19/39), followed by those for crooked nose and dorsal height. Only 7 of the algorithms described outcomes for utilizing the algorithm.

Conclusions

Very few surgical algorithms have been published for cosmetic rhinoplasty. A minority of these studies have published outcomes. This study provides a description and summary of these algorithms and also shows that future studies could be done to further develop surgical algorithms for rhinoplasty and evaluate outcomes.

Level of evidence iii

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com.",2021-05-18 +29077795,Group spike-and-slab lasso generalized linear models for disease prediction and associated genes detection by incorporating pathway information.,"

Motivation

Large-scale molecular data have been increasingly used as an important resource for prognostic prediction of diseases and detection of associated genes. However, standard approaches for omics data analysis ignore the group structure among genes encoded in functional relationships or pathway information.

Results

We propose new Bayesian hierarchical generalized linear models, called group spike-and-slab lasso GLMs, for predicting disease outcomes and detecting associated genes by incorporating large-scale molecular data and group structures. The proposed model employs a mixture double-exponential prior for coefficients that induces self-adaptive shrinkage amount on different coefficients. The group information is incorporated into the model by setting group-specific parameters. We have developed a fast and stable deterministic algorithm to fit the proposed hierarchal GLMs, which can perform variable selection within groups. We assess the performance of the proposed method on several simulated scenarios, by varying the overlap among groups, group size, number of non-null groups, and the correlation within group. Compared with existing methods, the proposed method provides not only more accurate estimates of the parameters but also better prediction. We further demonstrate the application of the proposed procedure on three cancer datasets by utilizing pathway structures of genes. Our results show that the proposed method generates powerful models for predicting disease outcomes and detecting associated genes.

Availability and implementation

The methods have been implemented in a freely available R package BhGLM (http://www.ssg.uab.edu/bhglm/).

Contact

nyi@uab.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +26553803,Gramene 2016: comparative plant genomics and pathway resources.,"Gramene (http://www.gramene.org) is an online resource for comparative functional genomics in crops and model plant species. Its two main frameworks are genomes (collaboration with Ensembl Plants) and pathways (The Plant Reactome and archival BioCyc databases). Since our last NAR update, the database website adopted a new Drupal management platform. The genomes section features 39 fully assembled reference genomes that are integrated using ontology-based annotation and comparative analyses, and accessed through both visual and programmatic interfaces. Additional community data, such as genetic variation, expression and methylation, are also mapped for a subset of genomes. The Plant Reactome pathway portal (http://plantreactome.gramene.org) provides a reference resource for analyzing plant metabolic and regulatory pathways. In addition to ∼ 200 curated rice reference pathways, the portal hosts gene homology-based pathway projections for 33 plant species. Both the genome and pathway browsers interface with the EMBL-EBI's Expression Atlas to enable the projection of baseline and differential expression data from curated expression studies in plants. Gramene's archive website (http://archive.gramene.org) continues to provide previously reported resources on comparative maps, markers and QTL. To further aid our users, we have also introduced a live monthly educational webinar series and a Gramene YouTube channel carrying video tutorials.",2015-11-08 +33963845,COVIDOUTCOME-estimating COVID severity based on mutation signatures in the SARS-CoV-2 genome. ,"Numerous studies demonstrate frequent mutations in the genome of SARS-CoV-2. Our goal was to statistically link mutations to severe disease outcome. We used an automated machine learning approach where 1594 viral genomes with available clinical follow-up data were used as the training set (797 'severe' and 797 'mild'). The best algorithm, based on random forest classification combined with the LASSO feature selection algorithm, was employed to the training set to link mutation signatures and outcome. The performance of the final model was estimated by repeated, stratified, 10-fold cross validation (CV) and then adjusted for multiple testing with Bootstrap Bias Corrected CV. We identified 26 protein and Untranslated Region (UTR) mutations significantly linked to severe outcome. The best classification algorithm uses a mutation signature of 22 mutations as well as the patient's age as the input and shows high classification efficiency with an area under the curve (AUC) of 0.94 [confidence interval (CI): [0.912, 0.962]] and a prediction accuracy of 87% (CI: [0.830, 0.903]). Finally, we established an online platform (https://covidoutcome.com/) that is capable to use a viral sequence and the patient's age as the input and provides a percentage estimation of disease severity. We demonstrate a statistical association between mutation signatures of SARS-CoV-2 and severe outcome of COVID-19. The established analysis platform enables a real-time analysis of new viral genomes.",2021-05-01 +32898222,SMI-BLAST: a novel supervised search framework based on PSI-BLAST for protein remote homology detection.,"

Motivation

As one of the most important and widely used mainstream iterative search tool for protein sequence search, an accurate Position-Specific Scoring Matrix (PSSM) is the key of PSI-BLAST. However, PSSMs containing non-homologous information obviously reduce the performance of PSI-BLAST for protein remote homology.

Results

To further study this problem, we summarize three types of Incorrectly Selected Homology (ISH) errors in PSSMs. A new search tool Supervised-Manner-based Iterative BLAST (SMI-BLAST) is proposed based on PSI-BLAST for solving these errors. SMI-BLAST obviously outperforms PSI-BLAST on the Structural Classification of Proteins-extended (SCOPe) dataset. Compared with PSI-BLAST on the ISH error subsets of SCOPe dataset, SMI-BLAST detects 1.6-2.87 folds more remote homologous sequences, and outperforms PSI-BLAST by 35.66% in terms of ROC1 scores. Furthermore, this framework is applied to JackHMMER, DELTA-BLAST and PSI-BLASTexB, and their performance is further improved.

Availability and implementation

User-friendly webservers for SMI-BLAST, JackHMMER, DELTA-BLAST and PSI-BLASTexB are established at http://bliulab.net/SMI-BLAST/, by which the users can easily get the results without the need to go through the mathematical details.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +29688375,CITGeneDB: a comprehensive database of human and mouse genes enhancing or suppressing cold-induced thermogenesis validated by perturbation experiments in mice. ,"Cold-induced thermogenesis increases energy expenditure and can reduce body weight in mammals, so the genes involved in it are thought to be potential therapeutic targets for treating obesity and diabetes. In the quest for more effective therapies, a great deal of research has been conducted to elucidate the regulatory mechanism of cold-induced thermogenesis. Over the last decade, a large number of genes that can enhance or suppress cold-induced thermogenesis have been discovered, but a comprehensive list of these genes is lacking. To fill this gap, we examined all of the annotated human and mouse genes and curated those demonstrated to enhance or suppress cold-induced thermogenesis by in vivo or ex vivo experiments in mice. The results of this highly accurate and comprehensive annotation are hosted on a database called CITGeneDB, which includes a searchable web interface to facilitate broad public use. The database will be updated as new genes are found to enhance or suppress cold-induced thermogenesis. It is expected that CITGeneDB will be a valuable resource in future explorations of the molecular mechanism of cold-induced thermogenesis, helping pave the way for new obesity and diabetes treatments.Database URL: http://citgenedb.yubiolab.org.",2018-01-01 +33820995,Integrative microbiomics in bronchiectasis exacerbations.,"Bronchiectasis, a progressive chronic airway disease, is characterized by microbial colonization and infection. We present an approach to the multi-biome that integrates bacterial, viral and fungal communities in bronchiectasis through weighted similarity network fusion ( https://integrative-microbiomics.ntu.edu.sg ). Patients at greatest risk of exacerbation have less complex microbial co-occurrence networks, reduced diversity and a higher degree of antagonistic interactions in their airway microbiome. Furthermore, longitudinal interactome dynamics reveals microbial antagonism during exacerbation, which resolves following treatment in an otherwise stable multi-biome. Assessment of the Pseudomonas interactome shows that interaction networks, rather than abundance alone, are associated with exacerbation risk, and that incorporation of microbial interaction data improves clinical prediction models. Shotgun metagenomic sequencing of an independent cohort validated the multi-biome interactions detected in targeted analysis and confirmed the association with exacerbation. Integrative microbiomics captures microbial interactions to determine exacerbation risk, which cannot be appreciated by the study of a single microbial group. Antibiotic strategies probably target the interaction networks rather than individual microbes, providing a fresh approach to the understanding of respiratory infection.",2021-04-05 +33010151,A machine learning-based method for prediction of macrocyclization patterns of polyketides and non-ribosomal peptides.,"

Motivation

Even though genome mining tools have successfully identified large numbers of non-ribosomal peptide synthetase (NRPS) and polyketide synthase (PKS) biosynthetic gene clusters (BGCs) in bacterial genomes, currently no tool can predict the chemical structure of the secondary metabolites biosynthesized by these BGCs. Lack of algorithms for predicting complex macrocyclization patterns of linear PK/NRP biosynthetic intermediates has been the major bottleneck in deciphering the final bioactive chemical structures of PKs/NRPs by genome mining.

Results

Using a large dataset of known chemical structures of macrocyclized PKs/NRPs, we have developed a machine learning (ML) algorithm for distinguishing the correct macrocyclization pattern of PKs/NRPs from the library of all theoretically possible cyclization patterns. Benchmarking of this ML classifier on completely independent datasets has revealed ROC-AUC and PR-AUC values of 0.82 and 0.81, respectively. This cyclization prediction algorithm has been used to develop SBSPKSv3, a genome mining tool for completely automated prediction of macrocyclized structures of NRPs/PKs. SBSPKSv3 has been extensively benchmarked on a dataset of over 100 BGCs with known PKs/NRPs products.

Availability and implementation

The macrocyclization prediction pipeline and all the datasets used in this study are freely available at http://www.nii.ac.in/sbspks3.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +32960943,A novel sequence alignment algorithm based on deep learning of the protein folding code.,"

Motivation

From evolutionary interference, function annotation to structural prediction, protein sequence comparison has provided crucial biological insights. While many sequence alignment algorithms have been developed, existing approaches often cannot detect hidden structural relationships in the 'twilight zone' of low sequence identity. To address this critical problem, we introduce a computational algorithm that performs protein Sequence Alignments from deep-Learning of Structural Alignments (SAdLSA, silent 'd'). The key idea is to implicitly learn the protein folding code from many thousands of structural alignments using experimentally determined protein structures.

Results

To demonstrate that the folding code was learned, we first show that SAdLSA trained on pure α-helical proteins successfully recognizes pairs of structurally related pure β-sheet protein domains. Subsequent training and benchmarking on larger, highly challenging datasets show significant improvement over established approaches. For challenging cases, SAdLSA is ∼150% better than HHsearch for generating pairwise alignments and ∼50% better for identifying the proteins with the best alignments in a sequence library. The time complexity of SAdLSA is O(N) thanks to GPU acceleration.

Availability and implementation

Datasets and source codes of SAdLSA are available free of charge for academic users at http://sites.gatech.edu/cssb/sadlsa/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +34426074,Identifying Early Vascular Ageing in Patients With Metabolic Syndrome: Unresolved Issues and a Proposed Novel VAmets Score.,"

Purpose

To identify the features of early vascular ageing (EVA) in patients with metabolic syndrome (MetS), to assess the accuracy of existing methods for determining vascular age in MetS, and to derive a new score (VAmets) for the calculation of vascular age and predicting EVA in patients with MetS.

Methods

Prospective open cohort study using routinely collected data from general practice. A total of 750 patients (age, 35-80 yrs old) with MetS were examined. EVA syndrome was detected in 484 patients with MetS and carotid-femoral pulse wave velocity (cfPWV) values exceeding average expected for age values by 2 or more standard deviations (SD).

Results

The presence of type 2 diabetes and insulin resistance (IR) were associated with greater risk of EVA in MetS patients; the odds ratios were 2.75 (95% confidence interval [CI]: 2.34, 3.35) and 1.57 (95% CI: 1.16, 2.00), respectively. In addition, the risk of EVA increased by 76% with an increase in homeostatic model assessment ofinsulin resistance (HOMA-IR) by 1 unit, by 17% with an increase in high-sensitivity C-reactive protein (hs-CRP) by 1 mg/L, by 4% with an increase in diastolic blood pressure (DBP) by 1 mmHg, and by 1% with each (1) μmol/L increase in the level of uric acid (UA). The area under the curve (AUC) for predicting EVA in patients with MetS was 0.949 (95% CI: 0.936-0.963), 0.630 (95% CI: 0.589-0.671), 0.697 (95% CI: 0.659-0.736) and 0.686 (95% CI: 0.647-0.726), for vascular age calculated from carotid-femoral pulse wave velocity (cfPWV), Systematic COronary Risk Evaluation (SCORE) scale, QRESEARCH cardiovascular risk algorithm (QRISK-3) scale, and Framingham scale, respectively. Diabetes mellitus and clinical markers of IR (yes/no), HOMA-IR and UA level were used to develop a new VAmets score for EVA prediction providing a total accuracy of 0.830 (95% CI: 0.799-0.860). Based on the results of the study, a VAmets calculator was developed for diagnosing EVA in patients with MetS. (The calculator is available online at https://apps.medhub.pro/evams/) CONCLUSION: Carotid-femoral pulse wave velocity is at present the most widely studied index of arterial stiffness and fulfils most of the stringent criteria for a clinically useful biomarker of EVA in patients with MetS. There are parallel efforts for the effective identification and integration of a simple clinical score into clinical practice. Our score (VAmets) may accurately identify patients with MetS and EVA on the basis of widely available clinical variables and classic cardiovascular risk factors, and may assist in prioritising the calculation and use of vascular age in routine care.",2021-08-20 +30823695,Prevalence of Tobacco Use and Overweight/Obesity in Rhode Island: Comparisons of Survey and Claims Data.,"Many states, including Rhode Island, have begun to collect insurance claims data to better understand healthcare spending and local health outcomes. In this study, we sought to determine whether or not the prevalence of tobacco use and overweight/obesity in the Rhode Island All-Payer Claims Database (APCD) was comparable to that predicted by national behavioral survey data. We found that the prevalence of these lifestyle-related health problems was lower in local claims data than in survey data, suggesting that this database should be used with caution when exploring issues related to the prevalence of tobacco use and overweight/ obesity in Rhode Island. [Full article available at http://rimed.org/rimedicaljournal-2019-03.asp].",2019-03-01 +32997632,Interhemispheric Functional Reorganization and its Structural Base After BCI-Guided Upper-Limb Training in Chronic Stroke.,"Brain-computer interface (BCI)-guided robot-assisted upper-limb training has been increasingly applied to stroke rehabilitation. However, the induced long-term neuroplasticity modulation still needs to be further characterized. This study investigated the functional reorganization and its structural base after BCI-guided robot-assisted training using resting-state fMRI, task-based fMRI, and diffusion tensor imaging (DTI) data. The clinical improvement and the neurological changes before, immediately after, and six months after 20-session BCI-guided robot hand training were explored in 14 chronic stroke subjects. The structural base of the induced functional reorganization and motor improvement were also investigated using DTI. Repeated measure ANOVA indicated long-term motor improvement was found (F[2, 26] = 6.367, p = 0.006). Significantly modulated functional connectivity (FC) was observed between ipsilesional motor regions (M1 and SMA) and some contralesional areas (SMA, PMd, SPL) in the seed-based analysis. Modulated FC with ipsilesional M1 was significantly correlated with motor function improvement (r = 0.6455, p = 0.0276). Besides, increased interhemispheric FC among the sensorimotor area from resting-state data and increased laterality index from task-based data together indicated the re-balance of the two hemispheres during the recovery. Multiple linear regression models suggested that both motor function improvement and the functional change between ipsilesional M1 and contralesional premotor area were significantly associated with the ipsilesional corticospinal tract integrity. The results in the current study provided solid support for stroke recovery mechanism in terms of interhemispheric interaction and its structural substrates, which could further enhance the understanding of BCI training in stroke rehabilitation. This study was registered at https://clinicaltrials.gov (NCT02323061).",2020-11-06 +29226803,Quantitative Missense Variant Effect Prediction Using Large-Scale Mutagenesis Data.,"Large datasets describing the quantitative effects of mutations on protein function are becoming increasingly available. Here, we leverage these datasets to develop Envision, which predicts the magnitude of a missense variant's molecular effect. Envision combines 21,026 variant effect measurements from nine large-scale experimental mutagenesis datasets, a hitherto untapped training resource, with a supervised, stochastic gradient boosting learning algorithm. Envision outperforms other missense variant effect predictors both on large-scale mutagenesis data and on an independent test dataset comprising 2,312 TP53 variants whose effects were measured using a low-throughput approach. This dataset was never used for hyperparameter tuning or model training and thus serves as an independent validation set. Envision prediction accuracy is also more consistent across amino acids than other predictors. Finally, we demonstrate that Envision's performance improves as more large-scale mutagenesis data are incorporated. We precompute Envision predictions for every possible single amino acid variant in human, mouse, frog, zebrafish, fruit fly, worm, and yeast proteomes (https://envision.gs.washington.edu/).",2017-12-06 +32490097,Ethical issues in poultry production - Datasets from a German consumer survey.,"The killing of day-old chicks is controversially discussed in poultry keeping, science, politics, and society. The present survey data contributes to understand consumers´ attitudes towards ethical issues in chicken production, especially the killing practice and dual purpose chickens as alternative to avoid such killing. Information on the various topics is provided: Consumer purchase pattern of eggs and chicken meat, perception of animal welfare and protection issues, knowledge and perception of killing day-old chicks, attitudes towards dual purpose chickens as an alternative to killing day-old chicks, and socio-demographic data. The data set contains standardized responds of 1000 telephone interviews. These interviews were conducted with German consumers in spring 2016. The survey data were in part analysed with cluster analysis to categorize consumers according to their purchasing criteria for dual chicken products, and assessing which socio-economic variables best described each of the consumer categories. The survey raw data, a file with the questionnaire and the codes, the analysed data, and additional files for understanding the cluster analysis are hosted in the public repository Open Research Data https://www.doi.org/10.4228/ZALF.DK.106.",2020-05-20 +34271916,Predictive models for chronic kidney disease after radical or partial nephrectomy in renal cell cancer using early postoperative serum creatinine levels.,"

Background

Several predictive factors for chronic kidney disease (CKD) following radical nephrectomy (RN) or partial nephrectomy (PN) have been identified. However, early postoperative laboratory values were infrequently considered as potential predictors. Therefore, this study aimed to develop predictive models for CKD 1 year after RN or PN using early postoperative laboratory values, including serum creatinine (SCr) levels, in addition to preoperative and intraoperative factors. Moreover, the optimal SCr sampling time point for the best prediction of CKD was determined.

Methods

Data were retrospectively collected from patients with renal cell cancer who underwent laparoscopic or robotic RN (n = 557) or PN (n = 999). Preoperative, intraoperative, and postoperative factors, including laboratory values, were incorporated during model development. We developed 8 final models using information collected at different time points (preoperative, postoperative day [POD] 0 to 5, and postoperative 1 month). Lastly, we combined all possible subsets of the developed models to generate 120 meta-models. Furthermore, we built a web application to facilitate the implementation of the model.

Results

The magnitude of postoperative elevation of SCr and history of CKD were the most important predictors for CKD at 1 year, followed by RN (compared to PN) and older age. Among the final models, the model using features of POD 4 showed the best performance for correctly predicting the stages of CKD at 1 year compared to other models (accuracy: 79% of POD 4 model versus 75% of POD 0 model, 76% of POD 1 model, 77% of POD 2 model, 78% of POD 3 model, 76% of POD 5 model, and 73% in postoperative 1 month model). Therefore, POD 4 may be the optimal sampling time point for postoperative SCr. A web application is hosted at https://dongy.shinyapps.io/aki_ckd .

Conclusions

Our predictive model, which incorporated postoperative laboratory values, especially SCr levels, in addition to preoperative and intraoperative factors, effectively predicted the occurrence of CKD 1 year after RN or PN and may be helpful for comprehensive management planning.",2021-07-16 +34165340,Application of Text Mining in Risk Assessment of Chemical Mixtures: A Case Study of Polycyclic Aromatic Hydrocarbons (PAHs).,"

Background

Cancer risk assessment of complex exposures, such as exposure to mixtures of polycyclic aromatic hydrocarbons (PAHs), is challenging due to the diverse biological activities of these compounds. With the help of text mining (TM), we have developed TM tools-the latest iteration of the Cancer Risk Assessment using Biomedical literature tool (CRAB3) and a Cancer Hallmarks Analytics Tool (CHAT)-that could be useful for automatic literature analyses in cancer risk assessment and research. Although CRAB3 analyses are based on carcinogenic modes of action (MOAs) and cover almost all the key characteristics of carcinogens, CHAT evaluates literature according to the hallmarks of cancer referring to the alterations in cellular behavior that characterize the cancer cell.

Objectives

The objective was to evaluate the usefulness of these tools to support cancer risk assessment by performing a case study of 22 European Union and U.S. Environmental Protection Agency priority PAHs and diesel exhaust and a case study of PAH interactions with silica.

Methods

We analyzed PubMed literature, comprising 57,498 references concerning priority PAHs and complex PAH mixtures, using CRAB3 and CHAT.

Results

CRAB3 analyses correctly identified similarities and differences in genotoxic and nongenotoxic MOAs of the 22 priority PAHs and grouped them according to their known carcinogenic potential. CHAT had the same capacity and complemented the CRAB output when comparing, for example, benzo[a]pyrene and dibenzo[a,l]pyrene. Both CRAB3 and CHAT analyses highlighted potentially interacting mechanisms within and across complex PAH mixtures and mechanisms of possible importance for interactions with silica.

Conclusion

These data suggest that our TM approach can be useful in the hazard identification of PAHs and mixtures including PAHs. The tools can assist in grouping chemicals and identifying similarities and differences in carcinogenic MOAs and their interactions. https://doi.org/10.1289/EHP6702.",2021-06-24 +32423767,Genetic Basis of Inherited Retinal Disease in a Molecularly Characterized Cohort of More Than 3000 Families from the United Kingdom.,"

Purpose

In a large cohort of molecularly characterized inherited retinal disease (IRD) families, we investigated proportions with disease attributable to causative variants in each gene.

Design

Retrospective study of electronic patient records.

Participants

Patients and relatives managed in the Genetics Service of Moorfields Eye Hospital in whom a molecular diagnosis had been identified.

Methods

Genetic screening used a combination of single-gene testing, gene panel testing, whole exome sequencing, and more recently, whole genome sequencing. For this study, genes listed in the Retinal Information Network online resource (https://sph.uth.edu/retnet/) were included. Transcript length was extracted for each gene (Ensembl, release 94).

Main outcome measures

We calculated proportions of families with IRD attributable to variants in each gene in the entire cohort, a cohort younger than 18 years, and a current cohort (at least 1 patient encounter between January 1, 2017, and August 2, 2019). Additionally, we explored correlation between numbers of families and gene transcript length.

Results

We identified 3195 families with a molecular diagnosis (variants in 135 genes), including 4236 affected individuals. The pediatric cohort comprised 452 individuals from 411 families (66 genes). The current cohort comprised 2614 families (131 genes; 3130 affected individuals). The 20 most frequently implicated genes overall (with prevalence rates per families) were as follows: ABCA4 (20.8%), USH2A (9.1%), RPGR (5.1%), PRPH2 (4.6%), BEST1 (3.9%), RS1 (3.5%), RP1 (3.3%), RHO (3.3%), CHM (2.7%), CRB1 (2.1%), PRPF31 (1.8%), MY07A (1.7%), OPA1 (1.6%), CNGB3 (1.4%), RPE65 (1.2%), EYS (1.2%), GUCY2D (1.2%), PROM1 (1.2%), CNGA3 (1.1%), and RDH12 (1.1%). These accounted for 71.8% of all molecularly diagnosed families. Spearman coefficients for correlation between numbers of families and transcript length were 0.20 (P = 0.025) overall and 0.27 (P = 0.017), -0.17 (P = 0.46), and 0.71 (P = 0.047) for genes in which variants exclusively cause recessive, dominant, or X-linked disease, respectively.

Conclusions

Our findings help to quantify the burden of IRD attributable to each gene. More than 70% of families showed pathogenic variants in 1 of 20 genes. Transcript length (relevant to gene delivery strategies) correlated significantly with numbers of affected families (but not for dominant disease).",2020-04-16 +32202787,Structural Analysis and Identification of False Positive Hits in Luciferase-Based Assays.,"Luciferase-based bioluminescence detection techniques are highly favored in high-throughput screening (HTS), in which the firefly luciferase (FLuc) is the most commonly used variant. However, FLuc inhibitors can interfere with the activity of luciferase, which may result in false positive signals in HTS assays. In order to reduce the unnecessary cost of time and money, an in silico prediction model for FLuc inhibitors is highly desirable. In this study, we built an extensive data set consisting of 20 888 FLuc inhibitors and 198 608 noninhibitors, and then developed a group of classification models based on the combination of three machine learning (ML) algorithms and four types of molecular representations. The best prediction model based on XGBoost and ECFP4 and MOE2d descriptors yielded a balanced accuracy (BA) of 0.878 and an area under the receiver operating characteristic curve (AUC) value of 0.958 for the validation set, and a BA of 0.886 and an AUC of 0.947 for the test set. Three external validation sets, including set 1 (3231 FLuc inhibitors and 69 783 noninhibitors), set 2 (695 FLuc inhibitors and 75 913 noninhibitors), and set 3 (1138 FLuc inhibitors and 8155 noninhibitors), were used to verify the predictive ability of our models. The BA values for the three external validation sets given by the best model are 0.864, 0.845, and 0.791, respectively. In addition, the important features or structural fragments related to FLuc inhibitors were recognized by the Shapley additive explanations (SHAP) method along with their influences on predictions, which may provide valuable clues to detecting undesirable luciferase inhibitors. Based on the important and explanatory features, 16 rules were proposed for detecting FLuc inhibitors, which can achieve a correction rate of 70% for FLuc inhibitors. Furthermore, a comparison with existing prediction rules and models for FLuc inhibitors used in virtual screening verified the high reliability of the models and rules proposed in this study. We also used the model to screen three curated chemical databases, and almost 10% of the molecules in the evaluated databases were predicted as inhibitors, highlighting the potential risk of false positives in luciferase-based assays. Finally, a public web server called ChemFLuc was developed (http://admet.scbdd.com/chemfluc/index/), and it offers a free available service to predict potential FLuc inhibitors.",2020-03-30 +33125734,Technical Note: Implications of using EGSnrc instead of EGS4 for extracting electron stopping powers from measured energy spectra.,"

Purpose

NRC Report PIRS-0626 (https://doi.org/10.4224/40000364) describes how measured electron energy deposition spectra can be used to determine the electronic stopping power. The stopping power is obtained by comparing measured spectra with spectra calculated using Monte Carlo techniques. The stopping powers reported in PIRS-0626 were obtained using the EGS4 Monte Carlo code. Since then, the EGSnrc code has been released which has more accurate electron transport algorithms. We calculate the effect on the measured stopping powers of using EGSnrc instead of EGS4.

Method

The EGS4 spectra calculated in PIRS-0626 were based on 4×105 primary electron histories. We first show that those spectra, calculated in 1997, are consistent with current EGS4 spectra calculated using 108 histories. EGSnrc spectra are also calculated using 108 histories and these high-precision spectra are compared to extract any energy difference. The energy differences between the spectra are used to estimate the effect on the measured electronic stopping powers.

Results

The energy differences depend on the absorber material, the absorber thickness and the beam energy. The improved electron elastic scattering cross section of EGSnrc accounts for only part of the difference between the two codes. The effect on the extracted stopping power is largest for the lowest electron energies and can be as large as 0.9%. The calculated spectra show differences for lower energies, with the EGSnrc spectra having a larger proportion of low-energy electrons.

Conclusion

The differences introduced by using EGSnrc instead of EGS4 can affect the estimated stopping power by almost 1% in the worst case but generally the effect is much smaller. We report corrections that can be applied to all the stopping power data in PIRS-0626. An experiment to measure the average energy to create an ion pair in air, Wair , using aluminum detectors will provide an interesting test of the aluminum stopping power data as reported in PIRS-0626 and revised by this work.",2021-02-16 +30395289,The PRIDE database and related tools and resources in 2019: improving support for quantification data.,"The PRoteomics IDEntifications (PRIDE) database (https://www.ebi.ac.uk/pride/) is the world's largest data repository of mass spectrometry-based proteomics data, and is one of the founding members of the global ProteomeXchange (PX) consortium. In this manuscript, we summarize the developments in PRIDE resources and related tools since the previous update manuscript was published in Nucleic Acids Research in 2016. In the last 3 years, public data sharing through PRIDE (as part of PX) has definitely become the norm in the field. In parallel, data re-use of public proteomics data has increased enormously, with multiple applications. We first describe the new architecture of PRIDE Archive, the archival component of PRIDE. PRIDE Archive and the related data submission framework have been further developed to support the increase in submitted data volumes and additional data types. A new scalable and fault tolerant storage backend, Application Programming Interface and web interface have been implemented, as a part of an ongoing process. Additionally, we emphasize the improved support for quantitative proteomics data through the mzTab format. At last, we outline key statistics on the current data contents and volume of downloads, and how PRIDE data are starting to be disseminated to added-value resources including Ensembl, UniProt and Expression Atlas.",2019-01-01 +33510575,"Vascular plants dataset of the herbarium (HSS) of Agrarian Research Institute Finca ""La Orden-Valdesequera"" (CICYTEX), Extremadura, Spain.","The HSS herbarium database includes 69,397 records of vascular plant taxa, representing 91.1% of the herbarium's specimens as for December, 2019, which are available through the Global Biodiversity Information Facility (GBIF) website (accessible at https://doi.org/10.15468/siye1z). The database represents 4,343 species and 787 infraspecific taxa (530 subspecies, 130 varieties and 127 notho-species or hybrids) of 196 families and 1,164 genera, and 105 type sheets. So far, 97.7% of the databased records are georeferenced (geographic coordinates or MRGS coordinates) and the geographic area with the largest number of specimens is the southwest quadrant of the Iberian Peninsula (Spain and Portugal).",2021-01-07 +30052772,The Terabase Search Engine: a large-scale relational database of short-read sequences.,"

Motivation

DNA sequencing archives have grown to enormous scales in recent years, and thousands of human genomes have already been sequenced. The size of these data sets has made searching the raw read data infeasible without high-performance data-query technology. Additionally, it is challenging to search a repository of short-read data using relational logic and to apply that logic across samples from multiple whole-genome sequencing samples.

Results

We have built a compact, efficiently-indexed database that contains the raw read data for over 250 human genomes, encompassing trillions of bases of DNA, and that allows users to search these data in real-time. The Terabase Search Engine enables retrieval from this database of all the reads for any genomic location in a matter of seconds. Users can search using a range of positions or a specific sequence that is aligned to the genome on the fly.

Availability and implementation

Public access to the Terabase Search Engine database is available at http://tse.idies.jhu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +33406856,"First Report of Choanephora cucurbitarum Causing Leaf Wilt, Flower Rot, and Stem Necrosis on Crotalaria breviflora. ","Crotalaria breviflora (Fabaceae) is used as green manure crop because of its nitrogen fixation and nematode control (Nascimento et al. 2020). In April 2018, leaf wilting, flower rot, and stem necrosis symptoms were observed on C. breviflora with 100% incidence, in Sorriso (12° 33' 31″ S, 55º 42' 51″ W), Santa Carmem (11° 55' 52″ S, 55º 16' 47″ W), and Sapezal (12º 59' 22″ S, 58º 45' 52″ W) counties in the state of Mato Grosso, Brazil. Three monosporic isolates were isolated from symptomatic leaves, cultivated in potato dextrose agar (PDA) medium, and deposited at the Cultures Collection of the University of Brasilia (codes CCUB 1293, CCUB 1667, CCUB 1668). Colonies on PDA were white and cottony with presence of hyaline and coenocytic hyphae. The mycelia later became pale yellow with abundant reproductive structures. Sporangiophores were hyaline, aseptate, unbranched, and apically dilated to form a clavate vesicle, which produced secondary vesicles bearing sporangiola. Secondary vesicles were clavate, light brown, and 37 to 51 µm in diameter. Sporangia were brown to dark brown, globular to ellipsoid, 115 to 140 µm long, and 96 to 122 µm wide. Sporangiospores (n=30) were brown to reddish-brown, ellipsoid to ovoid, with longitudinal striae, 14 to 19 µm long, and 8 to 12 µm wide. Some with hyaline appendages at both ends. Their morphological characteristics were consistent with the descriptions of Choanephora cucurbitarum (Kirk 1984). To confirm the identity, the DNA of the three isolates was extracted and the sequences of Small Subunit (SSU), Large Subunit (LSU), and complete Internal Transcribed Spacer (ITS) of rDNA were amplified using V9G, ITS3, and LR5 primers (GenBank acc. no: MN897836, MN897837 and MN897838). The sequences were aligned with the MAFFT software. The alignment matrix was subjected to Maximum Likelihood (ML) analysis using RAxML v. 8 and Bayesian Inference performed in MrBayes v.3.1.2. The tree was edited in the FigTree software. The sequences showed 100% identity with the sequences from C. cucurbitarum found on the GenBank. To confirm pathogenicity, a suspension at 5.4 ×106 spores/ml was prepared from a 15-day-old culture grown at 25°C and sprayed on asymptomatic plants of C. breviflora. Sterilized water was sprayed as the control. Plants were kept in a humid chamber at 20°C for 48 h. Initial symptoms were visualized 16 days after inoculation. Complete necrosis of leaves and stems with spore mass on infected tissue was observed 19 days after inoculation. To satisfy the Koch's postulates, the fungus was successfully reisolated from the infected tissues. No symptoms were observed on the control plants. In Brazil, this pathogen has been reported on Brassica oleracea var. capitata, Capsicum annuum, Crotalaria spectabilis, Cucurbita sp., and Vigna unguiculata (Alfenas et al. 2018; Mendes and Urben, 2019). C. cucurbitarum has been reported to have a wide range of hosts (Farr and Rossman, 2020). It can infect the crops grown in rotation or in succession, including common bean, corn, cotton, quinoa, soybean, and sunflower. Therefore, this pathogen is of epidemiological importance and poses a threat to the croplands where environmental conditions are conducive to the disease to develop and spread. To our knowledge, this is the first report of C. cucurbitarum causing leaf and flower wilt, and stem rot on C. breviflora in the world. Acknowledgment We thank the Environmental Sciences Graduate Program, Federal University of Mato Grosso, University of Brasilia, PROPeq/PROPG-UFMT, EMBRAPA, CODEX/UFMT, Institute of Agricultural and Environmental Sciences (ICAA)/UFMT and CAPES for providing the Master's scholarship. References Alfenas, R. F., et al. 2018. Plant Dis.102:1456. https://doi.org/10.1094/PDIS-10-17-1610-PDN, Google Scholar. Farr, D. F., and Rossman, A. Y. 2020. Fungal Databases, Syst. Mycol. Microbiol. Lab., ARS, USDA. Retrieved May 26, 2020 from https://nt.ars-grin.gov/fungaldatabases/, Google Scholar. Kirk, P. M. 1984. Mycol Paper. 152:1. Google Scholar. Mendes, M. A. S., and Urben, A. F. 2020. Fungos relatados em plantas no Brasil, Retrived May 26, 2020 from http://pragawall.cenargen.embrapa.br/aiqweb/michtml/fgbanco01.asp, Google Scholar. Nascimento, D. D. et al. 2020. Bioscience Journal. 36:713. https://doi.org/10.14393/BJ-v36n3a2020-42248, Google Scholar.",2021-01-06 +32176258,M2IA: a web server for microbiome and metabolome integrative analysis.,"MOTIVATION:Microbiome-metabolome association studies have experienced exponential growth for an in-depth understanding of the impact of microbiota on human health over the last decade. However, analyzing the resulting multi-omics data and their correlations remains a significant challenge due to the lack of a comprehensive computational tool that can facilitate data integration and interpretation. In this study, an automated microbiome and metabolome integrative analysis pipeline (M2IA) has been developed to meet the urgent needs for tools that can effectively integrate microbiome and metabolome data to derive biological insights. RESULTS:M2IA streamlines the integrative data analysis between metabolome and microbiome, from data preprocessing, univariate and multivariate statistical analyses, advanced functional analysis for biological interpretation, to a summary report. The functionality of M2IA was demonstrated using TwinsUK cohort datasets consisting of 1116 fecal metabolites and 16s rRNA microbiome from 786 individuals. Moreover, two important metabolic pathways, i.e. benzoate degradation and phosphotransferase system, were identified to be closely associated with obesity. AVAILABILITY AND IMPLEMENTATION:M2IA is public available at http://m2ia.met-bioinformatics.cn. CONTACT:yanni617@zju.edu.cn or fjf68@zju.edu.cn. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-06-01 +27527702,HCVIVdb: The hepatitis-C IRES variation database.,"

Background

Sequence variability in the hepatitis C virus (HCV) genome has led to the development and classification of six genotypes and a number of subtypes. The HCV 5' untranslated region mainly comprises an internal ribosomal entry site (IRES) responsible for cap-independent synthesis of the viral polyprotein and is conserved among all HCV genotypes.

Description

Considering the possible high impact of variations in HCV IRES on viral protein production and thus virus replication, we decided to collect the available data on known nucleotide variants in the HCV IRES and their impact on IRES function in translation initiation. The HCV IRES variation database (HCVIVdb) is a collection of naturally occurring and engineered mutation entries for the HCV IRES. Each entry contains contextual information pertaining to the entry such as the HCV genotypic background and links to the original publication. Where available, quantitative data on the IRES efficiency in translation have been collated along with details on the reporter system used to generate the data. Data are displayed both in a tabular and graphical formats and allow direct comparison of results from different experiments. Together the data provide a central resource for researchers in the IRES and hepatitis C-oriented fields.

Conclusion

The collation of over 1900 mutations enables systematic analysis of the HCV IRES. The database is mainly dedicated to detailed comparative and functional analysis of all the HCV IRES domains, which can further lead to the development of site-specific drug designs and provide a guide for future experiments. HCVIVdb is available at http://www.hcvivdb.org .",2016-08-15 +27733502,SZGR 2.0: a one-stop shop of schizophrenia candidate genes.,"SZGR 2.0 is a comprehensive resource of candidate variants and genes for schizophrenia, covering genetic, epigenetic, transcriptomic, translational and many other types of evidence. By systematic review and curation of multiple lines of evidence, we included almost all variants and genes that have ever been reported to be associated with schizophrenia. In particular, we collected ∼4200 common variants reported in genome-wide association studies, ∼1000 de novo mutations discovered by large-scale sequencing of family samples, 215 genes spanning rare and replication copy number variations, 99 genes overlapping with linkage regions, 240 differentially expressed genes, 4651 differentially methylated genes and 49 genes as antipsychotic drug targets. To facilitate interpretation, we included various functional annotation data, especially brain eQTL, methylation QTL, brain expression featured in deep categorization of brain areas and developmental stages and brain-specific promoter and enhancer annotations. Furthermore, we conducted cross-study, cross-data type and integrative analyses of the multidimensional data deposited in SZGR 2.0, and made the data and results available through a user-friendly interface. In summary, SZGR 2.0 provides a one-stop shop of schizophrenia variants and genes and their function and regulation, providing an important resource in the schizophrenia and other mental disease community. SZGR 2.0 is available at https://bioinfo.uth.edu/SZGR/.",2016-10-12 +33290432,Cell segmentation and tracking using CNN-based distance predictions and a graph-based matching strategy.,"The accurate segmentation and tracking of cells in microscopy image sequences is an important task in biomedical research, e.g., for studying the development of tissues, organs or entire organisms. However, the segmentation of touching cells in images with a low signal-to-noise-ratio is still a challenging problem. In this paper, we present a method for the segmentation of touching cells in microscopy images. By using a novel representation of cell borders, inspired by distance maps, our method is capable to utilize not only touching cells but also close cells in the training process. Furthermore, this representation is notably robust to annotation errors and shows promising results for the segmentation of microscopy images containing in the training data underrepresented or not included cell types. For the prediction of the proposed neighbor distances, an adapted U-Net convolutional neural network (CNN) with two decoder paths is used. In addition, we adapt a graph-based cell tracking algorithm to evaluate our proposed method on the task of cell tracking. The adapted tracking algorithm includes a movement estimation in the cost function to re-link tracks with missing segmentation masks over a short sequence of frames. Our combined tracking by detection method has proven its potential in the IEEE ISBI 2020 Cell Tracking Challenge (http://celltrackingchallenge.net/) where we achieved as team KIT-Sch-GE multiple top three rankings including two top performances using a single segmentation model for the diverse data sets.",2020-12-08 +33257449,Validation of a Community-Acquired Pneumonia Score To Improve Empiric Antibiotic Selection at an Academic Medical Center. ,"The 2019 American Thoracic Society and the Infectious Diseases Society of America community-acquired pneumonia (CAP) guidelines recommend that drug-resistant pathogens (DRP) be empirically covered if locally validated risk factors are present. This retrospective case-control validation study evaluated the performance of the drug resistance in pneumonia (DRIP) clinical prediction score. Two hundred seventeen adult patients with ICD-10 (https://www.who.int/classifications/classification-of-diseases) pneumonia diagnosis, positive confirmed microbiologic data, and clinical signs and symptoms were included. A DRIP score of ≥4 was used to assess model performance. Logistic regression was used to select for significant predictors and create a modified DRIP score, which was evaluated to define clinical application. The DRIP score predicted pneumonia due to a DRP with a sensitivity of 67% and specificity of 73%. The area under the receiver operating characteristic (AUROC) curve was 0.76 (95% confidence interval [CI], 0.69 to 0.82). From regression analysis, prior infection with a DRP and antibiotics in the last 60 days, yielding scores of 2 points and 1 point, respectively, remained local risk factors in predicting drug-resistant pneumonia. Sensitivity (47%) and specificity (94%) were maximized at a threshold of ≥2 in the modified DRIP model. Therefore, prior infection with a DRP remained the only clinically relevant predictor for drug-resistant pneumonia. The original DRIP score demonstrated a decreased performance in our patient population and behaved similarly to other clinical prediction models. Empiric CAP therapy without anti-methicillin-resistant Staphylococcus aureus and antipseudomonal coverage should be considered for noncritically ill patients without a drug resistant pathogen infection in the past year. Our data support the necessity of local validation to authenticate clinical risk predictors for drug-resistant pneumonia.",2021-01-20 +33258916,MOSGA: Modular Open-Source Genome Annotator.,"

Motivation

The generation of high-quality assemblies, even for large eukaryotic genomes, has become a routine task for many biologists thanks to recent advances in sequencing technologies. However, the annotation of these assemblies-a crucial step toward unlocking the biology of the organism of interest-has remained a complex challenge that often requires advanced bioinformatics expertise.

Results

Here, we present MOSGA (Modular Open-Source Genome Annotator), a genome annotation framework for eukaryotic genomes with a user-friendly web-interface that generates and integrates annotations from various tools. The aggregated results can be analyzed with a fully integrated genome browser and are provided in a format ready for submission to NCBI. MOSGA is built on a portable, customizable and easily extendible Snakemake backend, and thus, can be tailored to a wide range of users and projects.

Availability and implementation

We provide MOSGA as a web service at https://mosga.mathematik.uni-marburg.de and as a docker container at registry.gitlab.com/mosga/mosga: latest. Source code can be found at https://gitlab.com/mosga/mosga.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +28673048,Taming the BEAST-A Community Teaching Material Resource for BEAST 2.,"Phylogenetics and phylodynamics are central topics in modern evolutionary biology. Phylogenetic methods reconstruct the evolutionary relationships among organisms, whereas phylodynamic approaches reveal the underlying diversification processes that lead to the observed relationships. These two fields have many practical applications in disciplines as diverse as epidemiology, developmental biology, palaeontology, ecology, and linguistics. The combination of increasingly large genetic data sets and increases in computing power is facilitating the development of more sophisticated phylogenetic and phylodynamic methods. Big data sets allow us to answer complex questions. However, since the required analyses are highly specific to the particular data set and question, a black-box method is not sufficient anymore. Instead, biologists are required to be actively involved with modeling decisions during data analysis. The modular design of the Bayesian phylogenetic software package BEAST 2 enables, and in fact enforces, this involvement. At the same time, the modular design enables computational biology groups to develop new methods at a rapid rate. A thorough understanding of the models and algorithms used by inference software is a critical prerequisite for successful hypothesis formulation and assessment. In particular, there is a need for more readily available resources aimed at helping interested scientists equip themselves with the skills to confidently use cutting-edge phylogenetic analysis software. These resources will also benefit researchers who do not have access to similar courses or training at their home institutions. Here, we introduce the ""Taming the Beast"" (https://taming-the-beast.github.io/) resource, which was developed as part of a workshop series bearing the same name, to facilitate the usage of the Bayesian phylogenetic software package BEAST 2.",2018-01-01 +32915977,A novel algorithm comprehensively characterizes human RH genes using whole-genome sequencing data.,"RHD and RHCE genes encode Rh blood group antigens and exhibit extensive single-nucleotide polymorphisms and chromosome structural changes in patients with sickle cell disease (SCD). RH variation can drive loss of antigen epitopes or expression of new epitopes, predisposing patients with SCD to Rh alloimmunization. Serologic antigen typing is limited to common Rh antigens, necessitating a genetic approach to detect variant antigen expression. We developed a novel algorithm termed RHtyper for RH genotyping from existing whole-genome sequencing (WGS) data. RHtyper determined RH genotypes in an average of 3.4 and 3.3 minutes per sample for RHD and RHCE, respectively. In a validation cohort consisting of 57 patients with SCD, RHtyper achieved 100% accuracy for RHD and 98.2% accuracy for RHCE, when compared with genotypes obtained by RH BeadChip and targeted molecular assays and after verification by Sanger sequencing and independent next-generation sequencing assays. RHtyper was next applied to WGS data from an additional 827 patients with SCD. In the total cohort of 884 patients, RHtyper identified 38 RHD and 28 RHCE distinct alleles, including a novel RHD DAU allele, RHD* 602G, 733C, 744T 1136T. RHtyper provides comprehensive and high-throughput RH genotyping from WGS data, facilitating deconvolution of the extensive RH genetic variation among patients with SCD. We have implemented RHtyper as a cloud-based public access application in DNAnexus (https://platform.dnanexus.com/app/RHtyper), enabling clinicians and researchers to perform RH genotyping with next-generation sequencing data.",2020-09-01 +33399015,"A comprehensive review on carotenoids in foods and feeds: status quo, applications, patents, and research needs.","Carotenoids are isoprenoids widely distributed in foods that have been always part of the diet of humans. Unlike the other so-called food bioactives, some carotenoids can be converted into retinoids exhibiting vitamin A activity, which is essential for humans. Furthermore, they are much more versatile as they are relevant in foods not only as sources of vitamin A, but also as natural pigments, antioxidants, and health-promoting compounds. Lately, they are also attracting interest in the context of nutricosmetics, as they have been shown to provide cosmetic benefits when ingested in appropriate amounts. In this work, resulting from the collaborative work of participants of the COST Action European network to advance carotenoid research and applications in agro-food and health (EUROCAROTEN, www.eurocaroten.eu, https://www.cost.eu/actions/CA15136/#tabs|Name:overview) research on carotenoids in foods and feeds is thoroughly reviewed covering aspects such as analysis, carotenoid food sources, carotenoid databases, effect of processing and storage conditions, new trends in carotenoid extraction, daily intakes, use as human, and feed additives are addressed. Furthermore, classical and recent patents regarding the obtaining and formulation of carotenoids for several purposes are pinpointed and briefly discussed. Lastly, emerging research lines as well as research needs are highlighted.",2021-01-05 +34305284,Machine learning-based heart disease prediction system for Indian population: An exploratory study done in South India.,"

Background

In India, huge mortality occurs due to cardiovascular diseases (CVDs) as these diseases are not diagnosed in early stages. Machine learning (ML) algorithms can be used to build efficient and economical prediction system for early diagnosis of CVDs in India.

Methods

A total of 1670 anonymized medical records were collected from a tertiary hospital in South India. Seventy percent of the collected data were used to train the prediction system. Five state-of-the-art ML algorithms (k-Nearest Neighbours, Naïve Bayes, Logistic Regression, AdaBoost and Random Forest [RF]) were applied using Python programming language to develop the prediction system. The performance was evaluated over remaining 30% of data. The prediction system was later deployed in the cloud for easy accessibility via Internet.

Results

ML effectively predicted the risk of heart disease. The best performing (RF) prediction system correctly classified 470 out of 501 medical records thus attaining a diagnostic accuracy of 93.8%. Sensitivity and specificity were observed to be 92.8% and 94.6%, respectively. The prediction system attained positive predictive value of 94% and negative predictive value of 93.6%. The prediction model developed in this study can be accessed at http://das.southeastasia.cloudapp.azure.com/predict/.

Conclusions

ML-based prediction system developed in this study performs well in early diagnosis of CVDs and can be accessed via Internet. This study offers promising results suggesting potential use of ML-based heart disease prediction system as a screening tool to diagnose heart diseases in primary healthcare centres in India, which would otherwise get undetected.",2021-01-06 +33418450,Discovery of new enzymatic functions and metabolic pathways using genomic enzymology web tools.,"The continuing expansion of protein and genome sequence databases is an opportunity to identify novel enzymes with biotechnological applications. Whether applied to enzymology, chemical biology, systems biology, and microbiology, database mining must be 'user-friendly' so that experimentalists can devise focused strategies to discover the in vitro activities and in vivo functions of uncharacterized enzymes. We developed a suite of genomic enzymology tools (https://efi.igb.illinois.edu/) to (1) generate sequence similarity networks (SSNs) for exploration of sequence-function space in protein families (EFI-EST) and (2) provide genome context for members of protein families (EFI-GNT). Integrated analysis of this complementary information allows to generate testable hypotheses about new functions. After a brief overview of EFI-EST and EFI-GNT, we describe applications that illustrate their use.",2021-01-05 +33214771,HumDLoc: Human Protein Subcellular Localization Prediction Using Deep Neural Network.,"

Aims

To develop a tool that can annotate subcellular localization of human proteins.

Background

With the progression of high throughput human proteomics projects, an enormous amount of protein sequence data has been discovered in the recent past. All these raw sequence data require precise mapping and annotation for their respective biological role and functional attributes. The functional characteristics of protein molecules are highly dependent on the subcellular localization/compartment. Therefore, a fully automated and reliable protein subcellular localization prediction system would be very useful for current proteomic research.

Objective

To develop a machine learning-based predictive model that can annotate the subcellular localization of human proteins with high accuracy and precision.

Methods

In this study, we used the PSI-CD-HIT homology criterion and utilized the sequence-based features of protein sequences to develop a powerful subcellular localization predictive model. The dataset used to train the HumDLoc model was extracted from a reliable data source, Uniprot knowledge base, which helps the model to generalize on the unseen dataset.

Results

The proposed model, HumDLoc, was compared with two of the most widely used techniques: CELLO and DeepLoc, and other machine learning-based tools. The result demonstrated promising predictive performance of HumDLoc model based on various machine learning parameters such as accuracy (≥97.00%), precision (≥0.86), recall (≥0.89), MCC score (≥0.86), ROC curve (0.98 square unit), and precision-recall curve (0.93 square unit).

Conclusion

In conclusion, HumDLoc was able to outperform several alternative tools for correctly predicting subcellular localization of human proteins. The HumDLoc has been hosted as a web-based tool at https://bioserver.iiita.ac.in/HumDLoc/.",2020-11-01 +28925997,Precision annotation of digital samples in NCBI's gene expression omnibus.,"The Gene Expression Omnibus (GEO) contains more than two million digital samples from functional genomics experiments amassed over almost two decades. However, individual sample meta-data remains poorly described by unstructured free text attributes preventing its largescale reanalysis. We introduce the Search Tag Analyze Resource for GEO as a web application (http://STARGEO.org) to curate better annotations of sample phenotypes uniformly across different studies, and to use these sample annotations to define robust genomic signatures of disease pathology by meta-analysis. In this paper, we target a small group of biomedical graduate students to show rapid crowd-curation of precise sample annotations across all phenotypes, and we demonstrate the biological validity of these crowd-curated annotations for breast cancer. STARGEO.org makes GEO data findable, accessible, interoperable and reusable (i.e., FAIR) to ultimately facilitate knowledge discovery. Our work demonstrates the utility of crowd-curation and interpretation of open 'big data' under FAIR principles as a first step towards realizing an ideal paradigm of precision medicine.",2017-09-19 +29378221,Canine sarcomas as a surrogate for the human disease.,"Pet dogs are becoming increasingly recognized as a population with the potential to inform medical research through their treatment for a variety of maladies by veterinary health professionals. This is the basis of the One Health initiative, supporting the idea of collaboration between human and animal health researchers and clinicians to study spontaneous disease processes and treatment in animals to inform human health. Cancer is a major health burden in pet dogs, accounting for approximately 30% of deaths across breeds. As such, pet dogs with cancer are becoming increasingly recognized as a resource for studying the pharmacology and therapeutic potential of anticancer drugs and therapies under development. This was recently highlighted by a National Academy of Medicine Workshop on Comparative Oncology that took place in mid-2015 (http://www.nap.edu/21830). One component of cancer burden in dogs is their significantly higher incidence of sarcomas as compared to humans. This increased incidence led to canine osteosarcoma being an important component in the development of surgical approaches for osteosarcoma in children. Included in this review of sarcomas in dogs is a description of the incidence, pathology, molecular characteristics and previous translational therapeutic studies associated with these tumors. An understanding of the patho-physiological and molecular characteristics of these naturally occurring canine sarcomas holds great promise for effective incorporation into drug development schemas, for evaluation of target modulation or other pharmacodynamic measures associated with therapeutic response. These data could serve to supplement other preclinical data and bolster clinical investigations in tumor types for which there is a paucity of human patients for clinical trials.",2018-03-09 +32510565,GreenCircRNA: a database for plant circRNAs that act as miRNA decoys. ,"Circular RNAs (circRNAs) are endogenous non-coding RNAs that form a covalently closed continuous loop, are widely distributed and play important roles in a series of developmental processes. In plants, an increasing number of studies have found that circRNAs can regulate plant metabolism and are involved in plant responses to biotic or abiotic stress. Acting as miRNA decoys is a critical way for circRNAs to perform their functions. Therefore, we developed GreenCircRNA-a database for plant circRNAs acting as miRNA decoys that is dedicated to providing a plant-based platform for detailed exploration of plant circRNAs and their potential decoy functions. This database includes over 210 000 circRNAs from 69 species of plants; the main data sources of circRNAs in this database are NCBI, EMBL-EBI and Phytozome. To investigate the function of circRNAs as competitive endogenous RNAs, the possibility of circRNAs from 38 plants to act as miRNA decoys was predicted. Moreover, we provide basic information for the circRNAs in the database, including their locations, host genes and relative expression levels, as well as full-length sequences, host gene GO (Gene Ontology) numbers and circRNA visualization. GreenCircRNA is the first database for the prediction of circRNAs that act as miRNA decoys and contains the largest number of plant species. Database URL: http://greencirc.cn.",2020-01-01 +32367111,UPCLASS: a deep learning-based classifier for UniProtKB entry publications. ,"In the UniProt Knowledgebase (UniProtKB), publications providing evidence for a specific protein annotation entry are organized across different categories, such as function, interaction and expression, based on the type of data they contain. To provide a systematic way of categorizing computationally mapped bibliographies in UniProt, we investigate a convolutional neural network (CNN) model to classify publications with accession annotations according to UniProtKB categories. The main challenge of categorizing publications at the accession annotation level is that the same publication can be annotated with multiple proteins and thus be associated with different category sets according to the evidence provided for the protein. We propose a model that divides the document into parts containing and not containing evidence for the protein annotation. Then, we use these parts to create different feature sets for each accession and feed them to separate layers of the network. The CNN model achieved a micro F1-score of 0.72 and a macro F1-score of 0.62, outperforming baseline models based on logistic regression and support vector machine by up to 22 and 18 percentage points, respectively. We believe that such an approach could be used to systematically categorize the computationally mapped bibliography in UniProtKB, which represents a significant set of the publications, and help curators to decide whether a publication is relevant for further curation for a protein accession. Database URL: https://goldorak.hesge.ch/bioexpclass/upclass/.",2020-01-01 +31691816,miRPathDB 2.0: a novel release of the miRNA Pathway Dictionary Database.,"Since the initial release of miRPathDB, tremendous progress has been made in the field of microRNA (miRNA) research. New miRNA reference databases have emerged, a vast amount of new miRNA candidates has been discovered and the number of experimentally validated target genes has increased considerably. Hence, the demand for a major upgrade of miRPathDB, including extended analysis functionality and intuitive visualizations of query results has emerged. Here, we present the novel release 2.0 of the miRNA Pathway Dictionary Database (miRPathDB) that is freely accessible at https://mpd.bioinf.uni-sb.de/. miRPathDB 2.0 comes with a ten-fold increase of pre-processed data. In total, the updated database provides putative associations between 27 452 (candidate) miRNAs, 28 352 targets and 16 833 pathways for Homo sapiens, as well as interactions of 1978 miRNAs, 24 898 targets and 6511 functional categories for Mus musculus. Additionally, we analyzed publications citing miRPathDB to identify common use-cases and further extensions. Based on this evaluation, we added new functionality for interactive visualizations and down-stream analyses of bulk queries. In summary, the updated version of miRPathDB, with its new custom-tailored features, is one of the most comprehensive and advanced resources for miRNAs and their target pathways.",2020-01-01 +34101125,Reviewing Challenges of Predicting Protein Melting Temperature Change Upon Mutation Through the Full Analysis of a Highly Detailed Dataset with High-Resolution Structures.,"Predicting the effects of mutations on protein stability is a key problem in fundamental and applied biology, still unsolved even for the relatively simple case of small, soluble, globular, monomeric, two-state-folder proteins. Many articles discuss the limitations of prediction methods and of the datasets used to train them, which result in low reliability for actual applications despite globally capturing trends. Here, we review these and other issues by analyzing one of the most detailed, carefully curated datasets of melting temperature change (ΔTm) upon mutation for proteins with high-resolution structures. After examining the composition of this dataset to discuss imbalances and biases, we inspect several of its entries assisted by an online app for data navigation and structure display and aided by a neural network that predicts ΔTm with accuracy close to that of programs available to this end. We pose that the ΔTm predictions of our network, and also likely those of other programs, account only for a baseline-like general effect of each type of amino acid substitution which then requires substantial corrections to reproduce the actual stability changes. The corrections are very different for each specific case and arise from fine structural details which are not well represented in the dataset and which, despite appearing reasonable upon visual inspection of the structures, are hard to encode and parametrize. Based on these observations, additional analyses, and a review of recent literature, we propose recommendations for developers of stability prediction methods and for efforts aimed at improving the datasets used for training. We leave our interactive interface for analysis available online at http://lucianoabriata.altervista.org/papersdata/proteinstability2021/s1626navigation.html so that users can further explore the dataset and baseline predictions, possibly serving as a tool useful in the context of structural biology and protein biotechnology research and as material for education in protein biophysics.",2021-06-08 +32219413,FerrDb: a manually curated resource for regulators and markers of ferroptosis and ferroptosis-disease associations. ,"Ferroptosis is a mode of regulated cell death that depends on iron. Cells die from the toxic accumulation of lipid reactive oxygen species. Ferroptosis is tightly linked to a variety of human diseases, such as cancers and degenerative diseases. The ferroptotic process is complicated and consists of a wide range of metabolites and biomolecules. Although great progress has been achieved, the mechanism of ferroptosis remains enigmatic. We have currently entered an era of extensive knowledge advancement, and thus, it is important to find ways to organize and utilize data efficiently. We have observed a high-quality knowledge base of ferroptosis research is lacking. In this study, we downloaded 784 ferroptosis articles from the PubMed database. Ferroptosis regulators and markers and associated diseases were extracted from these articles and annotated. In summary, 253 regulators (including 108 drivers, 69 suppressors, 35 inducers and 41 inhibitors), 111 markers and 95 ferroptosis-disease associations were found. We then developed FerrDb, the first manually curated database for regulators and markers of ferroptosis and ferroptosis-disease associations. The database has a user-friendly interface, and it will be updated every 6 months to offer long-term service. FerrDb is expected to help researchers acquire insights into ferroptosis. Database URL: http://www.zhounan.org/ferrdb.",2020-01-01 +31298694,DeePaC: predicting pathogenic potential of novel DNA with reverse-complement neural networks.,"

Motivation

We expect novel pathogens to arise due to their fast-paced evolution, and new species to be discovered thanks to advances in DNA sequencing and metagenomics. Moreover, recent developments in synthetic biology raise concerns that some strains of bacteria could be modified for malicious purposes. Traditional approaches to open-view pathogen detection depend on databases of known organisms, which limits their performance on unknown, unrecognized and unmapped sequences. In contrast, machine learning methods can infer pathogenic phenotypes from single NGS reads, even though the biological context is unavailable.

Results

We present DeePaC, a Deep Learning Approach to Pathogenicity Classification. It includes a flexible framework allowing easy evaluation of neural architectures with reverse-complement parameter sharing. We show that convolutional neural networks and LSTMs outperform the state-of-the-art based on both sequence homology and machine learning. Combining a deep learning approach with integrating the predictions for both mates in a read pair results in cutting the error rate almost in half in comparison to the previous state-of-the-art.

Availability and implementation

The code and the models are available at: https://gitlab.com/rki_bioinformatics/DeePaC.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +30993051,Ozymandias: a biodiversity knowledge graph.,"Enormous quantities of biodiversity data are being made available online, but much of this data remains isolated in silos. One approach to breaking these silos is to map local, often database-specific identifiers to shared global identifiers. This mapping can then be used to construct a knowledge graph, where entities such as taxa, publications, people, places, specimens, sequences, and institutions are all part of a single, shared knowledge space. Motivated by the 2018 GBIF Ebbe Nielsen Challenge I explore the feasibility of constructing a ""biodiversity knowledge graph"" for the Australian fauna. The data cleaning and reconciliation steps involved in constructing the knowledge graph are described in detail. Examples are given of its application to understanding changes in patterns of taxonomic publication over time. A web interface to the knowledge graph (called ""Ozymandias"") is available at https://ozymandias-demo.herokuapp.com.",2019-04-08 +29077939,lncRNASNP2: an updated database of functional SNPs and mutations in human and mouse lncRNAs.,"Long non-coding RNAs (lncRNAs) are emerging as important regulators in different biological processes through various ways. Because the related data, especially mutations in cancers, increased sharply, we updated the lncRNASNP to version 2 (http://bioinfo.life.hust.edu.cn/lncRNASNP2). lncRNASNP2 provides comprehensive information of SNPs and mutations in lncRNAs, as well as their impacts on lncRNA structure and function. lncRNASNP2 contains 7260238 SNPs on 141353 human lncRNA transcripts and 3921448 SNPs on 117405 mouse lncRNA transcripts. Besides the SNP information in the first version, the following new features were developed to improve the lncRNASNP2. (i) noncoding variants from COSMIC cancer data (859534) in lncRNAs and their effects on lncRNA structure and function; (ii) TCGA cancer mutations (315234) in lncRNAs and their impacts; (iii) lncRNA expression profiling of 20 cancer types in both tumor and its adjacent samples; (iv) expanded lncRNA-associated diseases; (v) optimized the results about lncRNAs structure change induced by variants; (vi) reduced false positives in miRNA and lncRNA interaction results. Furthermore, we developed online tools for users to analyze new variants in lncRNA. We aim to maintain the lncRNASNP as a useful resource for lncRNAs and their variants.",2018-01-01 +29905762,LeptoDB: an integrated database of genomics and proteomics resource of Leptospira. ,"Leptospirosis is a potentially fatal zoo-anthroponosis caused by pathogenic species of Leptospira belonging to the family of Leptospiraceae, with a worldwide distribution and effect, in terms of its burden and risk to human health. The 'LeptoDB' is a single window dedicated architecture (5 948 311 entries), modeled using heterogeneous data as a core resource for global Leptospira species. LeptoDB facilitates well-structured knowledge of genomics, proteomics and therapeutic aspects with more than 500 assemblies including 17 complete and 496 draft genomes encoding 1.7 million proteins for 23 Leptospira species with more than 250 serovars comprising pathogenic, intermediate and saprophytic strains. Also, it seeks to be a dynamic compendium for therapeutically essential components such as epitope, primers, CRISPR/Cas9 and putative drug targets. Integration of JBrowse provides elaborated locus centric description of sequence or contig. Jmol for structural visualization of protein structures, MUSCLE for interactive multiple sequence alignment annotation and analysis. The data on genomic islands will definitely provide an understanding of virulence and pathogenicity. Phylogenetics analysis integrated suggests the evolutionary division of strains. Easily accessible on a public web server, we anticipate wide use of this metadata on Leptospira for the development of potential therapeutics.Database URL: http://leptonet.org.in.",2018-01-01 +30818354,"GenomeGraphR: A user-friendly open-source web application for foodborne pathogen whole genome sequencing data integration, analysis, and visualization.","Food safety risk assessments and large-scale epidemiological investigations have the potential to provide better and new types of information when whole genome sequence (WGS) data are effectively integrated. Today, the NCBI Pathogen Detection database WGS collections have grown significantly through improvements in technology, coordination, and collaboration, such as the GenomeTrakr and PulseNet networks. However, high-quality genomic data is not often coupled with high-quality epidemiological or food chain metadata. We have created a set of tools for cleaning, curation, integration, analysis and visualization of microbial genome sequencing data. It has been tested using Salmonella enterica and Listeria monocytogenes data sets provided by NCBI Pathogen Detection (160,000 sequenced isolates in 2018). GenomeGraphR presents foodborne pathogen WGS data and associated curated metadata in a user-friendly interface that allows a user to query a variety of research questions such as, transmission sources and dynamics, global reach, and persistence of genotypes associated with contamination in the food supply and foodborne illness across time or space. The application is freely available (https://fda-riskmodels.foodrisk.org/genomegraphr/).",2019-02-28 +31433664,Percent Grammatical Utterances Between 4 and 9 Years of Age for the Edmonton Narrative Norms Instrument: Reference Data and Psychometric Properties.,"Purpose The purpose of this article was to provide the reference data and evaluate psychometric properties for the percent grammatical utterances (PGU; Eisenberg & Guo, 2013) in children between 4 and 9 years of age from the database of the Edmonton Narrative Norms Instrument (ENNI; Schneider, Dubé, & Hayward, 2005). Method Participants were 377 children who were between 4 and 9 years of age, including 300 children with typical language (TL) and 77 children with language impairment (LI). Narrative samples were collected using the ENNI protocol (i.e., a story generation task). PGU was computed from the samples. Split-half reliability, concurrent criterion validity, and diagnostic accuracy for PGU were further evaluated. Results PGU increased significantly in children between 4 and 9 years of age in both the TL and LI groups. In addition, the correlation coefficients for the split-half reliability and concurrent criterion validity of PGU were all large (rs ≥ .557, ps < .001). The diagnostic accuracy of PGU was also good or acceptable from ages 4 to 9 years. Conclusions With the attested psychometric properties, PGU computed from the ENNI could be used as an assessment tool for identifying children with LI between 4 and 9 years of age. The reference data of PGU could also be used for monitoring treatment progress. Supplemental Material https://doi.org/10.23641/asha.9630590.",2019-08-21 +33190468,Determining Public Opinion of the COVID-19 Pandemic in South Korea and Japan: Social Network Mining on Twitter.,"

Objectives

This study analyzed the perceptions and emotions of Korean and Japanese citizens regarding coronavirus disease 2019 (COVID-19). It examined the frequency of words used in Korean and Japanese tweets regarding COVID-19 and the corresponding changes in their interests.

Methods

This cross-sectional study analyzed Twitter posts (Tweets) from February 1, 2020 to April 30, 2020 to determine public opinion of the COVID-19 pandemic in Korea and Japan. We collected data from Twitter (https://twitter.com/), a major social media platform in Korea and Japan. Python 3.7 Library was used for data collection. Data analysis included KR-WordRank and frequency analyses in Korea and Japan, respectively. Heat diagrams, word clouds, and rank flowcharts were also used.

Results

Overall, 1,470,673 and 4,195,457 tweets were collected from Korea and Japan, respectively. The word trend in Korea and Japan was analyzed every 5 days. The word cloud analysis revealed ""COVID-19"", ""Shinchonji"", ""Mask"", ""Daegu"", and ""Travel"" as frequently used words in Korea. While in Japan, ""COVID-19"", ""Mask"", ""Test"", ""Impact"", and ""China"" were identified as high-frequency words. They were divided into four categories: social distancing, prevention, issue, and emotion for the rank flowcharts. Concerning emotion, ""Overcome"" and ""Support"" increased from February in Korea, while ""Worry"" and ""Anxiety"" decreased in Japan from April 1.

Conclusions

As a result of the trend, people's interests in the economy were high in both countries, indicating their reservations on the economic downturn. Therefore, focusing policies toward economic stability is essential. Although the interest in prevention increased since April in both countries, the general public's relaxation regarding COVID-19 was also observed.",2020-10-31 +33329482,Human Gut Microbiome-Based Knowledgebase as a Biomarker Screening Tool to Improve the Predicted Probability for Colorectal Cancer.,"Colorectal cancer (CRC) is a common clinical malignancy globally ranked as the fourth leading cause of cancer mortality. Some microbes are known to contribute to adenoma-carcinoma transition and possess diagnostic potential. Advances in high-throughput sequencing technology and functional studies have provided significant insights into the landscape of the gut microbiome and the fundamental roles of its components in carcinogenesis. Integration of scattered knowledge is highly beneficial for future progress. In this study, literature review and information extraction were performed, with the aim of integrating the available data resources and facilitating comparative research. A knowledgebase of the human CRC microbiome was compiled to facilitate understanding of diagnosis, and the global signatures of CRC microbes, sample types, algorithms, differential microorganisms and various panels of markers plus their diagnostic performance were evaluated based on statistical and phylogenetic analyses. Additionally, prospects about current changelings and solution strategies were outlined for identifying future research directions. This type of data integration strategy presents an effective platform for inquiry and comparison of relevant information, providing a tool for further study about CRC-related microbes and exploration of factors promoting clinical transformation (available at: http://gsbios.com/index/experimental/dts_ mben?id=1).",2020-11-19 +32715038,"Dataset for estimating occurrence probability of causations for plugged, abandoned and decommissioned oil and gas wells.","This article contains the dataset on the failure frequencies of the barrier and mechanical plugs in place within the hydrocarbon-containing wellbore during plugging and abandonment operation. The interpretation and application of this data can be found in the research article (""https://doi.org/10.1016/j.psep.2019.09.015"" Babaleye et al., 2019). These datasets were collected through a comprehensive hazard identification technique workshop involving 10 engineers and academics with considerable years of field experience. The data were collected based on how likely it is for each causation to occur and these likelihoods are ranked from 1 to 10. The process is experience-driven and is complemented by a 1-10 rating of the duration of leak of hydrocarbon before remediation, should the leak reach the mudline. The ranked data was a representative of raw failure data (failure rate or mean time to failure (MTTF)) for each causation and are coded in MATLAB using gamma distribution based on hierarchical Bayesian analysis. The dataset offers unique opportunity for reuse due to its accessibility and discreteness.",2020-07-05 +32964081,Dataset on cigarette smokers in six South African townships.,"A total of 2453 smokers were interviewed in townships over two rounds of data collection. Townships are low-income, urban areas characterised by overpopulation, poor service delivery, crime, and poor socioeconomic outcomes. Township residents typically live in poverty. Data were collected from six townships in four of South Africa's nine provinces, namely Gauteng (Eldorado Park and Ivory Park), Western Cape (Khayelitsha and Mitchell's Plain), Free State (Thabong) and KwaZulu-Natal (Umlazi). These townships were chosen to represent both the geographical and racial spread of low socioeconomic areas in South Africa. Round 1 data (n = 1260) were collected from October to November 2017, and round 2 data (n = 1193) were collected from July to August 2018. The sample includes two of South Africa's four population groups: African and mixed race (locally referred to as ""Coloured"", which describes people of mixed Khoisan, Malay, European, and black African ancestry). Since few Whites and Asians live in townships, they were not sampled. Households were selected via a random walk through each township. One smoker per household was interviewed (if a household contained at least one available smoker). We aimed to interview 200 adult smokers (aged 18+ years) per township per round. If a household had more than one smoker, a random selection determined which smoker to interview. Respondents were asked about their most recent cigarette purchase, specifically packaging type (single stick, pack, or carton), number of items purchased, brand, type of outlet where the cigarettes were bought, and the total amount paid for cigarettes. Respondents were also asked about other tobacco use in the household, and about their perceptions regarding illegal cigarettes. Socioeconomic and demographic information was collected at the individual and household level. The data has been used to estimate illicit trade (https://tobaccocontrol.bmj.com/content/early/2020/03/10/tobaccocontrol-2019-055136.info), and to analyse the determinants of smoking intensity (https://www.sciencedirect.com/science/article/pii/S2211335520300590).",2020-09-02 +30417254,RNA sequencing-based transcriptomic profiles of embryonic lens development for cataract gene discovery.,"Isolated or syndromic congenital cataracts are heterogeneous developmental defects, making the identification of the associated genes challenging. In the past, mouse lens expression microarrays have been successfully applied in bioinformatics tools (e.g., iSyTE) to facilitate human cataract-associated gene discovery. To develop a new resource for geneticists, we report high-throughput RNA sequencing (RNA-seq) profiles of mouse lens at key embryonic stages (E)10.5 (lens pit), E12.5 (primary fiber cell differentiation), E14.5 and E16.5 (secondary fiber cell differentiation). These stages capture important events as the lens develops from an invaginating placode into a transparent tissue. Previously, in silico whole-embryo body (WB)-subtraction-based ""lens-enriched"" expression has been effective in prioritizing cataract-linked genes. To apply an analogous approach, we generated new mouse WB RNA-seq datasets and show that in silico WB subtraction of lens RNA-seq datasets successfully identifies key genes based on lens-enriched expression. At ≥2 counts-per-million expression, ≥1.5 log2 fold-enrichment (p < 0.05) cutoff, E10.5 lens exhibits 1401 enriched genes (17% lens-expressed genes), E12.5 lens exhibits 1937 enriched genes (22% lens-expressed genes), E14.5 lens exhibits 2514 enriched genes (31% lens-expressed genes), and E16.5 lens exhibits 2745 enriched genes (34% lens-expressed genes). Biological pathway analysis identified genes associated with lens development, transcription regulation and signaling pathways, among other functional groups. Furthermore, these new RNA-seq data confirmed high expression of established cataract-linked genes and identified new potential regulators in the lens. Finally, we developed new lens stage-specific UCSC Genome Brower annotation tracks and made these publicly accessible through iSyTE ( https://research.bioinformatics.udel.edu/iSyTE/ ) for user-friendly visualization of lens gene expression/enrichment to prioritize genes from high-throughput data from cataract cases.",2018-11-11 +31029262,EMIF Catalogue: A collaborative platform for sharing and reusing biomedical data.,"

Objective

The collaboration and knowledge exchange between researchers are often hindered by the nonexistence of accurate information about which databases may support research studies. Even though a considerable amount of patient health information does exist, it is usually distributed and hidden in many institutions. The goal of this project is to provide, for any research community, a holistic view of biomedical datasets of interests, from which researchers can explore several distinct levels of granularity.

Methods

We developed a community-centered approach to facilitate data sharing while ensuring privacy. A dynamic schema allows exposing any metadata model about existing repositories. The framework was developed following a modular plugin-based architecture that facilitates the integration of internal and external tools.

Results

The EMIF Catalogue, a web platform for sharing and reusing biomedical data. Through this system, data custodians can publish and share different levels of information, while the researchers can search for databases that fulfill research requirements.

Conclusions

The EMIF Catalogue currently fosters several distinct research communities, with different levels of data governance, combining, for instance, data available in pan-European EHR and Alzheimer cohorts. This portal is publicly available at https://emif-catalogue.eu.",2019-03-13 +30933541,Nonanimal Models for Acute Toxicity Evaluations: Applying Data-Driven Profiling and Read-Across.,"BACKGROUND:Low-cost, high-throughput in vitro bioassays have potential as alternatives to animal models for toxicity testing. However, incorporating in vitro bioassays into chemical toxicity evaluations such as read-across requires significant data curation and analysis based on knowledge of relevant toxicity mechanisms, lowering the enthusiasm of using the massive amount of unstructured public data. OBJECTIVE:We aimed to develop a computational method to automatically extract useful bioassay data from a public repository (i.e., PubChem) and assess its ability to predict animal toxicity using a novel bioprofile-based read-across approach. METHODS:A training database containing 7,385 compounds with diverse rat acute oral toxicity data was searched against PubChem to establish in vitro bioprofiles. Using a novel subspace clustering algorithm, bioassay groups that may inform on relevant toxicity mechanisms underlying acute oral toxicity were identified. These bioassays groups were used to predict animal acute oral toxicity using read-across through a cross-validation process. Finally, an external test set of over 600 new compounds was used to validate the resulting model predictivity. RESULTS:Several bioassay clusters showed high predictivity for acute oral toxicity (positive prediction rates range from 62-100%) through cross-validation. After incorporating individual clusters into an ensemble model, chemical toxicants in the external test set were evaluated for putative acute toxicity (positive prediction rate equal to 76%). Additionally, chemical fragment -in vitro-in vivo relationships were identified to illustrate new animal toxicity mechanisms. CONCLUSIONS:The in vitro bioassay data-driven profiling strategy developed in this study meets the urgent needs of computational toxicology in the current big data era and can be extended to develop predictive models for other complex toxicity end points. https://doi.org/10.1289/EHP3614.",2019-04-01 +33604189,6mA-Pred: identifying DNA N6-methyladenine sites based on deep learning.,"With the accumulation of data on 6mA modification sites, an increasing number of scholars have begun to focus on the identification of 6mA sites. Despite the recognized importance of 6mA sites, methods for their identification remain lacking, with most existing methods being aimed at their identification in individual species. In the present study, we aimed to develop an identification method suitable for multiple species. Based on previous research, we propose a method for 6mA site recognition. Our experiments prove that the proposed 6mA-Pred method is effective for identifying 6mA sites in genes from taxa such as rice, Mus musculus, and human. A series of experimental results show that 6mA-Pred is an excellent method. We provide the source code used in the study, which can be obtained from http://39.100.246.211:5004/6mA_Pred/.",2021-02-03 +33291027,Vienna soil organic matter modeler 2 (VSOMM2).,"Soil Organic Matter (SOM) plays an important role in several biogeochemical processes by directly affecting the microbial activity, soil aggregation, plant growth and carbon storage. Despite of its importance, our understanding of its composition and structure is still incomplete. Several experiments using elemental analysis, nuclear magnetic resonance (NMR) and mass spectrometry (MS) shed light on the structure of organic matter. In this context, the Vienna Soil-Organic-Matter Modeler (https://somm.boku.ac.at/) is a website that generates condensed phase computer models of Soil-Organic-Matter (SOM). Most of the data comes from standardized samples by the International Humic Substances Association (IHSS), which uses a specific methodology to extract organic compounds from soil, called humic substances. We have improved the modeler by increasing the pool of elemental units that compose our SOM molecules called building blocks, and also by implementing a genetic algorithm that increases the chemical and geometric diversity of the models. This allowed us to create models using the IHSS data as well as different types of soil. The webserver uses as an input principally the elemental and organic composition and offers input files needed to run molecular dynamic (MD) simulations of solvated and neutralized SOM within the framework of the GROMOS 54A7 forcefield and the GROMOS and GROMACS simulation packages.",2020-11-26 +33922568,Hfinger: Malware HTTP Request Fingerprinting. ,"Malicious software utilizes HTTP protocol for communication purposes, creating network traffic that is hard to identify as it blends into the traffic generated by benign applications. To this aim, fingerprinting tools have been developed to help track and identify such traffic by providing a short representation of malicious HTTP requests. However, currently existing tools do not analyze all information included in the HTTP message or analyze it insufficiently. To address these issues, we propose Hfinger, a novel malware HTTP request fingerprinting tool. It extracts information from the parts of the request such as URI, protocol information, headers, and payload, providing a concise request representation that preserves the extracted information in a form interpretable by a human analyst. For the developed solution, we have performed an extensive experimental evaluation using real-world data sets and we also compared Hfinger with the most related and popular existing tools such as FATT, Mercury, and p0f. The conducted effectiveness analysis reveals that on average only 1.85% of requests fingerprinted by Hfinger collide between malware families, what is 8-34 times lower than existing tools. Moreover, unlike these tools, in default mode, Hfinger does not introduce collisions between malware and benign applications and achieves it by increasing the number of fingerprints by at most 3 times. As a result, Hfinger can effectively track and hunt malware by providing more unique fingerprints than other standard tools.",2021-04-23 +28383659,The Proteins API: accessing key integrated protein and genome information.,"The Proteins API provides searching and programmatic access to protein and associated genomics data such as curated protein sequence positional annotations from UniProtKB, as well as mapped variation and proteomics data from large scale data sources (LSS). Using the coordinates service, researchers are able to retrieve the genomic sequence coordinates for proteins in UniProtKB. This, the LSS genomics and proteomics data for UniProt proteins is programmatically only available through this service. A Swagger UI has been implemented to provide documentation, an interface for users, with little or no programming experience, to 'talk' to the services to quickly and easily formulate queries with the services and obtain dynamically generated source code for popular programming languages, such as Java, Perl, Python and Ruby. Search results are returned as standard JSON, XML or GFF data objects. The Proteins API is a scalable, reliable, fast, easy to use RESTful services that provides a broad protein information resource for users to ask questions based upon their field of expertise and allowing them to gain an integrated overview of protein annotations available to aid their knowledge gain on proteins in biological processes. The Proteins API is available at (http://www.ebi.ac.uk/proteins/api/doc).",2017-07-01 +33245779,PheLiGe: an interactive database of billions of human genotype-phenotype associations.,"Genome-wide association studies have provided a vast array of publicly available SNP × phenotype association results. However, they are often in disparate repositories and formats, making downstream analyses difficult and time consuming. PheLiGe (https://phelige.com) is a database that provides easy access to such results via a web interface. The underlying database currently stores >75 billion genotype-phenotype associations from 7347 genome-wide and 1.2 million region-wide (e.g. cis-eQTL) association scans. The web interface allows for investigation of regional genotype-phenotype associations across many phenotypes, giving insights into the biological function affected by the variant in question. Furthermore, PheLiGe can compare regional patterns of association between different traits. This analysis can ascertain whether a co-association is due to pleiotropy or linkage. Moreover, comparison of association patterns for a complex trait of interest and gene expression and protein levels can implicate causal genes.",2021-01-01 +33237299,GreenPhylDB v5: a comparative pangenomic database for plant genomes.,"Comparative genomics is the analysis of genomic relationships among different species and serves as a significant base for evolutionary and functional genomic studies. GreenPhylDB (https://www.greenphyl.org) is a database designed to facilitate the exploration of gene families and homologous relationships among plant genomes, including staple crops critically important for global food security. GreenPhylDB is available since 2007, after the release of the Arabidopsis thaliana and Oryza sativa genomes and has undergone multiple releases. With the number of plant genomes currently available, it becomes challenging to select a single reference for comparative genomics studies but there is still a lack of databases taking advantage several genomes by species for orthology detection. GreenPhylDBv5 introduces the concept of comparative pangenomics by harnessing multiple genome sequences by species. We created 19 pangenes and processed them with other species still relying on one genome. In total, 46 plant species were considered to build gene families and predict their homologous relationships through phylogenetic-based analyses. In addition, since the previous publication, we rejuvenated the website and included a new set of original tools including protein-domain combination, tree topologies searches and a section for users to store their own results in order to support community curation efforts.",2021-01-01 +33196798,PROMISCUOUS 2.0: a resource for drug-repositioning.,"The development of new drugs for diseases is a time-consuming, costly and risky process. In recent years, many drugs could be approved for other indications. This repurposing process allows to effectively reduce development costs, time and, ultimately, save patients' lives. During the ongoing COVID-19 pandemic, drug repositioning has gained widespread attention as a fast opportunity to find potential treatments against the newly emerging disease. In order to expand this field to researchers with varying levels of experience, we made an effort to open it to all users (meaning novices as well as experts in cheminformatics) by significantly improving the entry-level user experience. The browsing functionality can be used as a global entry point to collect further information with regards to small molecules (∼1 million), side-effects (∼110 000) or drug-target interactions (∼3 million). The drug-repositioning tab for small molecules will also suggest possible drug-repositioning opportunities to the user by using structural similarity measurements for small molecules using two different approaches. Additionally, using information from the Promiscuous 2.0 Database, lists of candidate drugs for given indications were precomputed, including a section dedicated to potential treatments for COVID-19. All the information is interconnected by a dynamic network-based visualization to identify new indications for available compounds. Promiscuous 2.0 is unique in its functionality and is publicly available at http://bioinformatics.charite.de/promiscuous2.",2021-01-01 +33152092,Genomes OnLine Database (GOLD) v.8: overview and updates.,"The Genomes OnLine Database (GOLD) (https://gold.jgi.doe.gov/) is a manually curated, daily updated collection of genome projects and their metadata accumulated from around the world. The current version of the database includes over 1.17 million entries organized broadly into Studies (45 770), Organisms (387 382) or Biosamples (101 207), Sequencing Projects (355 364) and Analysis Projects (283 481). These four levels contain over 600 metadata fields, which includes 76 controlled vocabulary (CV) tables containing 3873 terms. GOLD provides an interactive web user interface for browsing and searching by a wide range of project and metadata fields. Users can enter details about their own projects in GOLD, which acts as a gatekeeper to ensure that metadata is accurately documented before submitting sequence information to the Integrated Microbial Genomes (IMG) system for analysis. In order to maintain a reference dataset for use by members of the scientific community, GOLD also imports projects from public repositories such as GenBank and SRA. The current status of the database, along with recent updates and improvements are described in this manuscript.",2021-01-01 +33035346,tRFtarget: a database for transfer RNA-derived fragment targets.,"Transfer RNA-derived fragments (tRFs) are a new class of small non-coding RNAs and play important roles in biological and physiological processes. Prediction of tRF target genes and binding sites is crucial in understanding the biological functions of tRFs in the molecular mechanisms of human diseases. We developed a publicly accessible web-based database, tRFtarget (http://trftarget.net), for tRF target prediction. It contains the computationally predicted interactions between tRFs and mRNA transcripts using the two state-of-the-art prediction tools RNAhybrid and IntaRNA, including location of the binding sites on the target, the binding region, and free energy of the binding stability with graphic illustration. tRFtarget covers 936 tRFs and 135 thousand predicted targets in eight species. It allows researchers to search either target genes by tRF IDs or tRFs by gene symbols/transcript names. We also integrated the manually curated experimental evidence of the predicted interactions into the database. Furthermore, we provided a convenient link to the DAVID® web server to perform downstream functional pathway analysis and gene ontology annotation on the predicted target genes. This database provides useful information for the scientific community to experimentally validate tRF target genes and facilitate the investigation of the molecular functions and mechanisms of tRFs.",2021-01-01 +33393872,International Harmonization of Nomenclature and Diagnostic Criteria (INHAND): Nonproliferative and Proliferative Lesions of the Minipig.,"The INHAND (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions) Project (www.toxpath.org/inhand.asp) is a joint initiative of the Societies of Toxicologic Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP), and North America (STP) to develop an internationally accepted nomenclature for proliferative and nonproliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature for classifying microscopic lesions observed in most tissues and organs from the minipig used in nonclinical safety studies. Some of the lesions are illustrated by color photomicrographs. The standardized nomenclature presented in this document is also available electronically on the internet (http://www.goreni.org/). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous lesions as well as lesions induced by exposure to test materials. Relevant infectious and parasitic lesions are included as well. A widely accepted and utilized international harmonization of nomenclature for lesions in laboratory animals will provide a common language among regulatory and scientific research organizations in different countries and increase and enrich international exchanges of information among toxicologists and pathologists.",2021-01-01 +33270901,RefSeq: expanding the Prokaryotic Genome Annotation Pipeline reach with protein family model curation.,"The Reference Sequence (RefSeq) project at the National Center for Biotechnology Information (NCBI) contains nearly 200 000 bacterial and archaeal genomes and 150 million proteins with up-to-date annotation. Changes in the Prokaryotic Genome Annotation Pipeline (PGAP) since 2018 have resulted in a substantial reduction in spurious annotation. The hierarchical collection of protein family models (PFMs) used by PGAP as evidence for structural and functional annotation was expanded to over 35 000 protein profile hidden Markov models (HMMs), 12 300 BlastRules and 36 000 curated CDD architectures. As a result, >122 million or 79% of RefSeq proteins are now named based on a match to a curated PFM. Gene symbols, Enzyme Commission numbers or supporting publication attributes are available on over 40% of the PFMs and are inherited by the proteins and features they name, facilitating multi-genome analyses and connections to the literature. In adherence with the principles of FAIR (findable, accessible, interoperable, reusable), the PFMs are available in the Protein Family Models Entrez database to any user. Finally, the reference and representative genome set, a taxonomically diverse subset of RefSeq prokaryotic genomes, is now recalculated regularly and available for download and homology searches with BLAST. RefSeq is found at https://www.ncbi.nlm.nih.gov/refseq/.",2021-01-01 +33196814,"RBP2GO: a comprehensive pan-species database on RNA-binding proteins, their interactions and functions.","RNA-protein complexes have emerged as central players in numerous key cellular processes with significant relevance in health and disease. To further deepen our knowledge of RNA-binding proteins (RBPs), multiple proteome-wide strategies have been developed to identify RBPs in different species leading to a large number of studies contributing experimentally identified as well as predicted RBP candidate catalogs. However, the rapid evolution of the field led to an accumulation of isolated datasets, hampering the access and comparison of their valuable content. Moreover, tools to link RBPs to cellular pathways and functions were lacking. Here, to facilitate the efficient screening of the RBP resources, we provide RBP2GO (https://RBP2GO.DKFZ.de), a comprehensive database of all currently available proteome-wide datasets for RBPs across 13 species from 53 studies including 105 datasets identifying altogether 22 552 RBP candidates. These are combined with the information on RBP interaction partners and on the related biological processes, molecular functions and cellular compartments. RBP2GO offers a user-friendly web interface with an RBP scoring system and powerful advanced search tools allowing forward and reverse searches connecting functions and RBPs to stimulate new research directions.",2021-01-01 +33174598,"LectomeXplore, an update of UniLectin for the discovery of carbohydrate-binding proteins based on a new lectin classification.","Lectins are non-covalent glycan-binding proteins mediating cellular interactions but their annotation in newly sequenced organisms is lacking. The limited size of functional domains and the low level of sequence similarity challenge usual bioinformatics tools. The identification of lectin domains in proteomes requires the manual curation of sequence alignments based on structural folds. A new lectin classification is proposed. It is built on three levels: (i) 35 lectin domain folds, (ii) 109 classes of lectins sharing at least 20% sequence similarity and (iii) 350 families of lectins sharing at least 70% sequence similarity. This information is compiled in the UniLectin platform that includes the previously described UniLectin3D database of curated lectin 3D structures. Since its first release, UniLectin3D has been updated with 485 additional 3D structures. The database is now complemented by two additional modules: PropLec containing predicted β-propeller lectins and LectomeXplore including predicted lectins from sequences of the NBCI-nr and UniProt for every curated lectin class. UniLectin is accessible at https://www.unilectin.eu/.",2021-01-01 +33167031,"COG database update: focus on microbial diversity, model organisms, and widespread pathogens.","The Clusters of Orthologous Genes (COG) database, also referred to as the Clusters of Orthologous Groups of proteins, was created in 1997 and went through several rounds of updates, most recently, in 2014. The current update, available at https://www.ncbi.nlm.nih.gov/research/COG, substantially expands the scope of the database to include complete genomes of 1187 bacteria and 122 archaea, typically, with a single genome per genus. In addition, the current version of the COGs includes the following new features: (i) the recently deprecated NCBI's gene index (gi) numbers for the encoded proteins are replaced with stable RefSeq or GenBank\ENA\DDBJ coding sequence (CDS) accession numbers; (ii) COG annotations are updated for >200 newly characterized protein families with corresponding references and PDB links, where available; (iii) lists of COGs grouped by pathways and functional systems are added; (iv) 266 new COGs for proteins involved in CRISPR-Cas immunity, sporulation in Firmicutes and photosynthesis in cyanobacteria are included; and (v) the database is made available as a web page, in addition to FTP. The current release includes 4877 COGs. Future plans include further expansion of the COG collection by adding archaeal COGs (arCOGs), splitting the COGs containing multiple paralogs, and continued refinement of COG annotations.",2021-01-01 +33119734,DescribePROT: database of amino acid-level protein structure and function predictions.,"We present DescribePROT, the database of predicted amino acid-level descriptors of structure and function of proteins. DescribePROT delivers a comprehensive collection of 13 complementary descriptors predicted using 10 popular and accurate algorithms for 83 complete proteomes that cover key model organisms. The current version includes 7.8 billion predictions for close to 600 million amino acids in 1.4 million proteins. The descriptors encompass sequence conservation, position specific scoring matrix, secondary structure, solvent accessibility, intrinsic disorder, disordered linkers, signal peptides, MoRFs and interactions with proteins, DNA and RNAs. Users can search DescribePROT by the amino acid sequence and the UniProt accession number and entry name. The pre-computed results are made available instantaneously. The predictions can be accesses via an interactive graphical interface that allows simultaneous analysis of multiple descriptors and can be also downloaded in structured formats at the protein, proteome and whole database scale. The putative annotations included by DescriPROT are useful for a broad range of studies, including: investigations of protein function, applied projects focusing on therapeutics and diseases, and in the development of predictors for other protein sequence descriptors. Future releases will expand the coverage of DescribePROT. DescribePROT can be accessed at http://biomine.cs.vcu.edu/servers/DESCRIBEPROT/.",2021-01-01 +33104797,REDIportal: millions of novel A-to-I RNA editing events from thousands of RNAseq experiments.,"RNA editing is a relevant epitranscriptome phenomenon able to increase the transcriptome and proteome diversity of eukaryotic organisms. ADAR mediated RNA editing is widespread in humans in which millions of A-to-I changes modify thousands of primary transcripts. RNA editing has pivotal roles in the regulation of gene expression or modulation of the innate immune response or functioning of several neurotransmitter receptors. Massive transcriptome sequencing has fostered the research in this field. Nonetheless, different aspects of the RNA editing biology are still unknown and need to be elucidated. To support the study of A-to-I RNA editing we have updated our REDIportal catalogue raising its content to about 16 millions of events detected in 9642 human RNAseq samples from the GTEx project by using a dedicated pipeline based on the HPC version of the REDItools software. REDIportal now allows searches at sample level, provides overviews of RNA editing profiles per each RNAseq experiment, implements a Gene View module to look at individual events in their genic context and hosts the CLAIRE database. Starting from this novel version, REDIportal will start collecting non-human RNA editing changes for comparative genomics investigations. The database is freely available at http://srv00.recas.ba.infn.it/atlas/index.html.",2021-01-01 +33104791,DrugSpaceX: a large screenable and synthetically tractable database extending drug space.,"One of the most prominent topics in drug discovery is efficient exploration of the vast drug-like chemical space to find synthesizable and novel chemical structures with desired biological properties. To address this challenge, we created the DrugSpaceX (https://drugspacex.simm.ac.cn/) database based on expert-defined transformations of approved drug molecules. The current version of DrugSpaceX contains >100 million transformed chemical products for virtual screening, with outstanding characteristics in terms of structural novelty, diversity and large three-dimensional chemical space coverage. To illustrate its practical application in drug discovery, we used a case study of discoidin domain receptor 1 (DDR1), a kinase target implicated in fibrosis and other diseases, to show DrugSpaceX performing a quick search of initial hit compounds. Additionally, for ligand identification and optimization purposes, DrugSpaceX also provides several subsets for download, including a 10% diversity subset, an extended drug-like subset, a drug-like subset, a lead-like subset, and a fragment-like subset. In addition to chemical properties and transformation instructions, DrugSpaceX can locate the position of transformation, which will enable medicinal chemists to easily integrate strategy planning and protection design.",2021-01-01 +33084862,BastionHub: a universal platform for integrating and analyzing substrates secreted by Gram-negative bacteria.,"Gram-negative bacteria utilize secretion systems to export substrates into their surrounding environment or directly into neighboring cells. These substrates are proteins that function to promote bacterial survival: by facilitating nutrient collection, disabling competitor species or, for pathogens, to disable host defenses. Following a rapid development of computational techniques, a growing number of substrates have been discovered and subsequently validated by wet lab experiments. To date, several online databases have been developed to catalogue these substrates but they have limited user options for in-depth analysis, and typically focus on a single type of secreted substrate. We therefore developed a universal platform, BastionHub, that incorporates extensive functional modules to facilitate substrate analysis and integrates the five major Gram-negative secreted substrate types (i.e. from types I-IV and VI secretion systems). To our knowledge, BastionHub is not only the most comprehensive online database available, it is also the first to incorporate substrates secreted by type I or type II secretion systems. By providing the most up-to-date details of secreted substrates and state-of-the-art prediction and visualized relationship analysis tools, BastionHub will be an important platform that can assist biologists in uncovering novel substrates and formulating new hypotheses. BastionHub is freely available at http://bastionhub.erc.monash.edu/.",2021-01-01 +32882008,TBDB: a database of structurally annotated T-box riboswitch:tRNA pairs.,"T-box riboswitches constitute a large family of tRNA-binding leader sequences that play a central role in gene regulation in many gram-positive bacteria. Accurate inference of the tRNA binding to T-box riboswitches is critical to predict their cis-regulatory activity. However, there is no central repository of information on the tRNA binding specificities of T-box riboswitches, and de novo prediction of binding specificities requires advanced knowledge of computational tools to annotate riboswitch secondary structure features. Here, we present the T-box Riboswitch Annotation Database (TBDB, https://tbdb.io), an open-access database with a collection of 23,535 T-box riboswitch sequences, spanning the major phyla of 3,632 bacterial species. Among structural predictions, the TBDB also identifies specifier sequences, cognate tRNA binding partners, and downstream regulatory targets. To our knowledge, the TBDB presents the largest collection of feature, sequence, and structural annotations carried out on this important family of regulatory RNA.",2021-01-01 +33996073,Drug-drug interaction database for safe prescribing of systemic antifungal agents.,"

Introduction

A drug-drug interaction (DDI) describes the influence of one drug upon another or the change in a drug's effect on the body when the drug is taken together with a second drug. A DDI can delay, decrease or enhance absorption or metabolism of either drug. Several antifungal agents have a large number of potentially deleterious DDIs.

Methods

The antifungal drug interactions database https://antifungalinteractions.org/was first launched in 2012 and is updated regularly. It is available as web and app versions to allow information on potential drug interactions with antifungals with a version for patients and another for health professionals. A new and updated database and interface with apps was created in 2019. This allows clinicians and patients to rapidly check for DDIs. The database is fully referenced to allow the user to access further information if needed. Currently DDIs for fluconazole, itraconazole, voriconazole, posaconazole, isavuconazole, terbinafine, amphotericin B, caspofungin, micafungin and anidulafungin are cross-referenced against 2398 other licensed drugs, a total of nearly 17,000 potential DDIs.

Results

The database records 541 potentially severe DDIs, 1129 moderate and 1015 mild DDIs, a total of 2685 (15.9%).

Conclusion

As the online database and apps are free to use, we hope that widespread acceptance and usage will reduce medical misadventure and iatrogenic harm from unconsidered DDIs.",2021-01-01 +33759118,Investigation of the Click-Chemical Space for Drug Design Using ZINClick.,"This chapter provides a brief overview of the applications of ZINClick virtual library. In the last years, we have investigated the click-chemical space covered by molecules containing the triazole ring and generated a database of 1,2,3-triazoles called ZINClick, starting from literature reported alkynes and azides synthesizable in no more than three synthetic steps from commercially available products. This combinatorial database contains millions of 1,4-disubstituted 1,2,3-triazoles that are easily synthesizable. The library is regularly updated and can be freely downloaded from http://www.ZINClick.org . This virtual library is a good starting point to explore a new portion of chemical space.",2021-01-01 +33507271,SinEx DB 2.0 update 2020: database for eukaryotic single-exon coding sequences. ,"Single-exon coding sequences (CDSs), also known as 'single-exon genes' (SEGs), are defined as nuclear, protein-coding genes that lack introns in their CDSs. They have been studied not only to determine their origin and evolution but also because their expression has been linked to several types of human cancers and neurological/developmental disorders, and many exhibit tissue-specific transcription. We developed SinEx DB that houses DNA and protein sequence information of SEGs from 10 mammalian genomes including human. SinEx DB includes their functional predictions (KOG (euKaryotic Orthologous Groups)) and the relative distribution of these functions within species. Here, we report SinEx 2.0, a major update of SinEx DB that includes information of the occurrence, distribution and functional prediction of SEGs from 60 completely sequenced eukaryotic genomes, representing animals, fungi, protists and plants. The information is stored in a relational database built with MySQL Server 5.7, and the complete dataset of SEG sequences and their GO (Gene Ontology) functional assignations are available for downloading. SinEx DB 2.0 was built with a novel pipeline that helps disambiguate single-exon isoforms from SEGs. SinEx DB 2.0 is the largest available database for SEGs and provides a rich source of information for advancing our understanding of the evolution, function of SEGs and their associations with disorders including cancers and neurological and developmental diseases. Database URL: http://v2.sinex.cl/.",2021-01-01 +33393871,International Harmonization of Nomenclature and Diagnostic Criteria (INHAND): Nonproliferative and Proliferative Lesions of the Dog.,"The INHAND (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions) Project (www.toxpath.org/inhand.asp) is a joint initiative of the societies of toxicologic Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP), and North America (STP) to develop an internationally accepted nomenclature for proliferative and nonproliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature for classifying lesions observed in most tissues and organs from the dog used in nonclinical safety studies. Some of the lesions are illustrated by color photomicrographs. The standardized nomenclature presented in this document is also available electronically on the internet (http://www.goreni.org/). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous lesions, lesions induced by exposure to test materials, and relevant infectious and parasitic lesions. A widely accepted and utilized international harmonization of nomenclature for lesions in laboratory animals will provide a common language among regulatory and scientific research organizations in different countries and increase and enrich international exchanges of information among toxicologists and pathologists.",2021-01-01 +33021671,RMVar: an updated database of functional variants involved in RNA modifications.,"Distinguishing the few disease-related variants from a massive number of passenger variants is a major challenge. Variants affecting RNA modifications that play critical roles in many aspects of RNA metabolism have recently been linked to many human diseases, such as cancers. Evaluating the effect of genetic variants on RNA modifications will provide a new perspective for understanding the pathogenic mechanism of human diseases. Previously, we developed a database called 'm6AVar' to host variants associated with m6A, one of the most prevalent RNA modifications in eukaryotes. To host all RNA modification (RM)-associated variants, here we present an updated version of m6AVar renamed RMVar (http://rmvar.renlab.org). In this update, RMVar contains 1 678 126 RM-associated variants for 9 kinds of RNA modifications, namely m6A, m6Am, m1A, pseudouridine, m5C, m5U, 2'-O-Me, A-to-I and m7G, at three confidence levels. Moreover, RBP binding regions, miRNA targets, splicing events and circRNAs were integrated to assist investigations of the effects of RM-associated variants on posttranscriptional regulation. In addition, disease-related information was integrated from ClinVar and other genome-wide association studies (GWAS) to investigate the relationship between RM-associated variants and diseases. We expect that RMVar may boost further functional studies on genetic variants affecting RNA modifications.",2021-01-01 +32976581,STAB: a spatio-temporal cell atlas of the human brain.,"The human brain is the most complex organ consisting of billions of neuronal and non-neuronal cells that are organized into distinct anatomical and functional regions. Elucidating the cellular and transcriptome architecture underlying the brain is crucial for understanding brain functions and brain disorders. Thanks to the single-cell RNA sequencing technologies, it is becoming possible to dissect the cellular compositions of the brain. Although great effort has been made to explore the transcriptome architecture of the human brain, a comprehensive database with dynamic cellular compositions and molecular characteristics of the human brain during the lifespan is still not available. Here, we present STAB (a Spatio-Temporal cell Atlas of the human Brain), a database consists of single-cell transcriptomes across multiple brain regions and developmental periods. Right now, STAB contains single-cell gene expression profiling of 42 cell subtypes across 20 brain regions and 11 developmental periods. With STAB, the landscape of cell types and their regional heterogeneity and temporal dynamics across the human brain can be clearly seen, which can help to understand both the development of the normal human brain and the etiology of neuropsychiatric disorders. STAB is available at http://stab.comp-sysbio.org.",2021-01-01 +33444047,Editorial.,"It is impossible to write this editorial without recognizing that we are living in challenging times. Unprecedented changes in how, when, where, and with whom we work have occurred in response to the COVID-19 pandemic. In addition to the threat to human life, the pandemic is expected to increase poverty and deepen preexisting inequalities for vulnerable groups such as women (United Nations, 2020) and individuals living in poorer countries (United Nations Development Programme, 2020). In the United States, the pandemic has disproportionately negatively affected racial and ethnic minority group members (https://www.cdc.gov/coronavirus/2019-ncov/community/health-equity/race-ethnicity.html). For example, in the United States infection and mortality rates are especially high among African Americans (Yancy, 2020). These sobering realities, along with the recent deaths of George Floyd, Ahmaud Arbery, and Breonna Taylor, and so many others, are vivid and wrenching reminders of longstanding social injustice and systematic racism, both in the United States and around the globe. When preparing my candidate statement and vision for the journal, a global pandemic and widespread social protest were the furthest thing from my mind. However, several aspects of my vision for JAP are highly relevant to the current context. This includes increasing representation and supporting diversity, as well as improving the translation of our science for the public good. Other elements of my vision for the journal include enhancing the review process and promoting open science. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2021-01-01 +33211851,WikiPathways: connecting communities.,"WikiPathways (https://www.wikipathways.org) is a biological pathway database known for its collaborative nature and open science approaches. With the core idea of the scientific community developing and curating biological knowledge in pathway models, WikiPathways lowers all barriers for accessing and using its content. Increasingly more content creators, initiatives, projects and tools have started using WikiPathways. Central in this growth and increased use of WikiPathways are the various communities that focus on particular subsets of molecular pathways such as for rare diseases and lipid metabolism. Knowledge from published pathway figures helps prioritize pathway development, using optical character and named entity recognition. We show the growth of WikiPathways over the last three years, highlight the new communities and collaborations of pathway authors and curators, and describe various technologies to connect to external resources and initiatives. The road toward a sustainable, community-driven pathway database goes through integration with other resources such as Wikidata and allowing more use, curation and redistribution of WikiPathways content.",2021-01-01 +33196801,NONCODEV6: an updated database dedicated to long non-coding RNA annotation in both animals and plants.,"NONCODE (http://www.noncode.org/) is a comprehensive database of collection and annotation of noncoding RNAs, especially long non-coding RNAs (lncRNAs) in animals. NONCODEV6 is dedicated to providing the full scope of lncRNAs across plants and animals. The number of lncRNAs in NONCODEV6 has increased from 548 640 to 644 510 since the last update in 2017. The number of human lncRNAs has increased from 172 216 to 173 112. The number of mouse lncRNAs increased from 131 697 to 131 974. The number of plant lncRNAs is 94 697. The relationship between lncRNAs in human and cancer were updated with transcriptome sequencing profiles. Three important new features were also introduced in NONCODEV6: (i) updated human lncRNA-disease relationships, especially cancer; (ii) lncRNA annotations with tissue expression profiles and predicted function in five common plants; iii) lncRNAs conservation annotation at transcript level for 23 plant species. NONCODEV6 is accessible through http://www.noncode.org/.",2021-01-01 +33155923,"Design, Synthesis and Biological Evaluation of Novel Heterocyclic Fluoroquinolone Citrate Conjugates as Potential Inhibitors of Topoisomerase IV: A Computational Molecular Modeling Study.","

Background & objective

A facile and efficient method for the synthesis of novel derivatives of FQ citrate conjugates with 1,2,4-triazoles and 1,3,4-oxadiazole scaffolds 8-11 using conventional, as well as microwave irradiation methods, was reported. Based on these original building blocks, the new derivatives of 3, 7-disubstituted fluoroquinolones bearing the oxadiazolyl-triazole groups were obtained. These invaluable derivatives are of great interest in medicinal and pharmaceutical studies because of their important biological properties.

Methods

All the reactions were examined under conventional as well as microwave mediated conditions. The structures of obtained compounds were confirmed by 1H NMR, 13C NMR, IR HRMS spectroscopy, and elemental analysis. The antibacterial and antifungal activities of these compounds were screened against Gram-positive, Gram-negative bacteria, and fungal stains by the agar well diffusion method. Cytotoxic assay of the title compounds was evaluated against cervical carcinoma cell line (HeLa) by using the MTT assay. The crystal structure of the Quinolone-DNA cleavage complex of type IV topoisomerase from S. pneumoniae (PDB ID: 3RAE) complex was obtained from the Protein Database (PDB, http:// www.rcsb.org). Molecular properties prediction-drug likeness was studied by Molinspiration and Molsoft software, while lipophilicity and solubility parameters were studied using the Osiris program.

Results

A novel approach for the synthesis of benzylthio-1,2,4-triazole and 1,3,4-oxadiazoles core with regioisomeric norfloxacin citrate conjugates was developed. Among the title compounds, 11b, 10a reveal pronounced activity against S. pneumoniae with minimum inhibitory concentrations of 0.89, 0.96 mg/mL and MBCs of 2.95, 2.80 mg/mL, respectively. Minimum Fungicidal Concentration (MFC) has been determined for each compound against two fungal strains. Compound 11b showed maximum anti-cancer activity against HeLa cell line with IC50 value 11.3 ± 0.41 comparable to standard drug DXN. For binding mode, active site residues and docking energies (ΔG =-7.9 Kcal/mol) for ligand 9b exhibited the highest hydrogen bonding (3.59274 A˚), Pi- Alkyl (5.14468 A˚) interactions with amino acid LEU479 of 3RAE protein. The compounds following the Lipinski 'Rule of five' were synthesized for antimicrobial and anti-cancer screening as oral bioavailable drugs/leads. Maximum drug likeness model score 1.52, 1.41 was found for compounds 10d, 11b.

Conclusion

The present work, through simple synthetic approaches, led to the development of novel hybrids of fluoroquinolone containing citrate-triazole-oxadiazole pharmacophores that exhibited remarkable biological activities against different microorganisms and cell lines. The compounds showed suitable druglike properties and are expected to present good bioavailability profile. An efficient combination of molecular modeling and biological activity provided an insight into QSAR guidelines that could aid in further development and optimization of the norfloxacin derivatives.",2021-01-01 +33125077,MASI: microbiota-active substance interactions database.,"Xenobiotic and host active substances interact with gut microbiota to influence human health and therapeutics. Dietary, pharmaceutical, herbal and environmental substances are modified by microbiota with altered bioavailabilities, bioactivities and toxic effects. Xenobiotics also affect microbiota with health implications. Knowledge of these microbiota and active substance interactions is important for understanding microbiota-regulated functions and therapeutics. Established microbiota databases provide useful information about the microbiota-disease associations, diet and drug interventions, and microbiota modulation of drugs. However, there is insufficient information on the active substances modified by microbiota and the abundance of gut bacteria in humans. Only ∼7% drugs are covered by the established databases. To complement these databases, we developed MASI, Microbiota-Active Substance Interactions database, for providing the information about the microbiota alteration of various substances, substance alteration of microbiota, and the abundance of gut bacteria in humans. These include 1,051 pharmaceutical, 103 dietary, 119 herbal, 46 probiotic, 142 environmental substances interacting with 806 microbiota species linked to 56 diseases and 784 microbiota-disease associations. MASI covers 11 215 bacteria-pharmaceutical, 914 bacteria-herbal, 309 bacteria-dietary, 753 bacteria-environmental substance interactions and the abundance profiles of 259 bacteria species in 3465 patients and 5334 healthy individuals. MASI is freely accessible at http://www.aiddlab.com/MASI.",2021-01-01 +33084874,OGEE v3: Online GEne Essentiality database with increased coverage of organisms and human cell lines.,"OGEE is an Online GEne Essentiality database. Gene essentiality is not a static and binary property, rather a context-dependent and evolvable property in all forms of life. In OGEE we collect not only experimentally tested essential and non-essential genes, but also associated gene properties that contributes to gene essentiality. We tagged conditionally essential genes that show variable essentiality statuses across datasets to highlight complex interplays between gene functions and environmental/experimental perturbations. OGEE v3 contains gene essentiality datasets for 91 species; almost doubled from 48 species in previous version. To accommodate recent advances on human cancer essential genes (as known as tumor dependency genes) that could serve as targets for cancer treatment and/or drug development, we expanded the collection of human essential genes from 16 cell lines in previous to 581. These human cancer cell lines were tested with high-throughput experiments such as CRISPR-Cas9 and RNAi; in total, 150 of which were tested by both techniques. We also included factors known to contribute to gene essentiality for these cell lines, such as genomic mutation, methylation and gene expression, along with extensive graphical visualizations for ease of understanding of these factors. OGEE v3 can be accessible freely at https://v3.ogee.info.",2021-01-01 +33045745,SilencerDB: a comprehensive database of silencers.,"Gene regulatory elements, including promoters, enhancers, silencers, etc., control transcriptional programs in a spatiotemporal manner. Though these elements are known to be able to induce either positive or negative transcriptional control, the community has been mostly studying enhancers which amplify transcription initiation, with less emphasis given to silencers which repress gene expression. To facilitate the study of silencers and the investigation of their potential roles in transcriptional control, we developed SilencerDB (http://health.tsinghua.edu.cn/silencerdb/), a comprehensive database of silencers by manually curating silencers from 2300 published articles. The current version, SilencerDB 1.0, contains (1) 33 060 validated silencers from experimental methods, and (ii) 5 045 547 predicted silencers from state-of-the-art machine learning methods. The functionality of SilencerDB includes (a) standardized categorization of silencers in a tree-structured class hierarchy based on species, organ, tissue and cell line and (b) comprehensive annotations of silencers with the nearest gene and potential regulatory genes. SilencerDB, to the best of our knowledge, is the first comprehensive database at this scale dedicated to silencers, with reliable annotations and user-friendly interactive database features. We believe this database has the potential to enable advanced understanding of silencers in regulatory mechanisms and to empower researchers to devise diverse applications of silencers in disease development.",2021-01-01 +31568186,Spinal Fusion Surgery and Local Antibiotic Administration: A Systematic Review on Key Points From Preclinical and Clinical Data.,"

Study design

Systematic review.

Objective

The present review of clinical and preclinical in vivo studies focused on the local antibiotic administration for surgical site infection (SSI) in spinal fusion procedures and identifying new approaches or research direction able to release antibiotics in the infected environment.

Summary of background data

SSI is a severe complication of spinal fusion procedures that represents a challenging issue for orthopedic surgeons. SSIs can range from 0.7% to 2.3% without instrumentation up to 6.7% with the use of instrumentation with significant implications in health care costs and patient management.

Method

A systematic search was carried out by two independent researchers according to the PRISMA statement in three databases (www.pubmed.com, www.scopus.com and www.webofknowledge.com) to identify preclinical in vivo and clinical reports in the last 10 years. Additionally, to evaluate ongoing clinical trials, three of the major clinical registry websites were also checked (www.clinicaltrials.gov, www.who.int/ictrp, https://www.clinicaltrialsregister.eu).

Results

After screening, a total of 43 articles were considered eligible for the review: 36 clinical studies and seven preclinical studies. In addition, six clinical trials were selected from the clinical registry websites.

Conclusion

The results reported that the topical vancomycin application seem to represent a strategy to reduce SSI incidence in spine surgery. However, the use of local vancomycin as a preventive approach for SSIs in spine surgery is mostly based on retrospective studies with low levels of evidence and moderate/severe risk of bias that do not allow to draw a clear conclusion. This review also underlines that several key points concerning the local use of antibiotics in spinal fusion still remains to be defined to allow this field to make a leap forward that would lead to the identification of specific approaches to counteract the onset of SSIs.

Level of evidence

4.",2020-03-01 +33835459,MODOMICS: An Operational Guide to the Use of the RNA Modification Pathways Database.,"MODOMICS is an established database of RNA modifications that provides comprehensive information concerning chemical structures of modified ribonucleosides, their biosynthetic pathways, the location of modified residues in RNA sequences, and RNA-modifying enzymes. This chapter covers the resources available on MODOMICS web server and the basic steps that can be undertaken by the user to explore them. MODOMICS is available at http://www.genesilico.pl/modomics .",2021-01-01 +33609383,Combined APRI/ALBI score to predict mortality after hepatic resection. ,"Aspartate aminotransferase/platelet ratio index (APRI) and albumin-bilirubin grade (ALBI) are validated prognostic indices implicated as predictors of postoperative liver dysfunction after hepatic resection. The aim of this study was to evaluate the relevance of the combined APRI/ALBI score for postoperative clinically meaningful outcomes. Patients undergoing hepatectomy were included from the American College of Surgeons National Surgical Quality Improvement Program database. The association between APRI/ALBI score and postoperative grade C liver dysfunction, liver dysfunction-associated and overall 30-day mortality was assessed. A total of 12 055 patients undergoing hepatic resection from 2014 to 2017 with preoperative blood values and detailed 30-day postoperative outcomes were included (exploration cohort: January 2014 to December 2016; validation cohort: 2017). In the exploration cohort (8538 patients), the combination of both scores (APRI/ALBI) was significantly associated with postoperative grade C liver dysfunction, 30-day mortality, and liver dysfunction-associated 30-day mortality, and was superior to either score alone. The association with postoperative 30-day mortality was confirmed in multivariable analysis. A predictive model was generated using the exploration cohort. The predicted incidence of events closely followed the observed incidence in the validation cohort (3517 patients). Subgroup analyses of tumour types were used to generate disease-specific risk models to assess risk in different clinical scenarios. These findings informed development of a smartphone application (https://tellaprialbi.37binary.com). The predictive potential of the combined APRI/ALBI score for clinically relevant outcomes such as mortality was demonstrated. An evidence-based smartphone application will allow clinical translation and facilitation of risk assessment before hepatic resection using routine laboratory parameters.",2021-01-01 +33509748,[Network pharmacology-based study of the therapeutic mechanism of resveratrol for Alzheimer's disease].,"

Objective

To investigate the therapeutic mechanism of resveratrol (RES) for Alzheimer's disease (AD) in light of network pharmacology.

Methods

We searched PubChem, BATMAN-TCM, Genecards, AD, TTD, String 11.0, AlzData, SwissTargetPrediction, Metascape and other databases for the therapeutic targets of RES and human AD-related targets. The intersection was determined using Venny 2.1 to obtain the therapeutic targets of RES for AD. The protein-protein interaction (PPI) network was constructed, the gene ontology (GO) was enriched and the Kyoto Encyclopedia of Genes and Genomes pathway (KEGG pathway) were analyzed. Cytoscape 3.7.1 software was used to construct a target-signaling pathway network of RES in the treatment of AD. Molecular docking verification was carried out on SwissDock (http://www.swissdock.ch/docking). We examined a 293Tau cell model of AD for changes in protein levels of pS396, pS199, Tau5, CDK5, glycogen synthase kinase 3β (GSK3β) and p-GSK3β in response to RES treatment using Western blotting.

Results

We obtained 182 targets of RES, 525 targets related to AD, and 36 targets of RES for AD treatment, among which 34.6% of the targets were protein-modifying enzymes, 27.7% were metabolite invertase, 13.8% were gene-specific transcriptional regulators, and 10.3% were transporters. The core key targets of RES in the treatment of AD included INS, APP, ESR1, MMP9, IGF1R, CACNA1C, MAPT (microtubule- associated protein Tau), MMP2, TGFB1 and GSK3B. Enrichment analysis of GO biological process suggested that the biological function of RES in AD treatment mainly involved the response to β-amyloid protein, positive regulation of transferase activity, the transmembrane receptor protein tyrosine kinase signaling pathway, regulation of behavior, learning or memory, aging, and transmembrane transport. KEGG pathway enrichment analysis showed that the most significantly enriched signaling pathways were AD pathway, PI3K-AKT signaling pathway, cGMP-PKG signaling pathway, and MAPK signaling pathway. Molecular docking results showed that RES had strong binding with ESR1, GSK3B, MMP9, IGF1R, APP and INS. In the cell model of AD, treatment with 50 μmol/L RES for 12 h significantly reduced the levels of pS396 and pS199 by regulating CDK5 and GSK3β activity (P < 0.001).

Conclusions

RES produces therapeutic effects on AD by acting on multiple targets and affecting multiple signaling pathways and improves AD-associated pathologies via a direct action on Aβ and Tau pathological processes.",2021-01-01 +33438548,Texture Analysis in the Evaluation of COVID-19 Pneumonia in Chest X-Ray Images: A Proof of Concept Study.,"

Background

One of the most challenging aspects related to Covid-19 is to establish the presence of infection in an early phase of the disease. Texture analysis might be an additional tool for the evaluation of Chest X-ray in patients with clinical suspicion of Covid-19 related pneumonia.

Objective

To evaluate the diagnostic performance of texture analysis and machine learning models for the diagnosis of Covid-19 interstitial pneumonia in Chest X-ray images.

Methods

Chest X-ray images were accessed from a publicly available repository(https://www.kaggle. com/tawsifurrahman/covid19-radiography-database). Lung areas were manually segmented using a polygonal region of interest covering both lung areas, using MaZda, a freely available software for texture analysis. A total of 308 features per ROI was extracted. One hundred-ten Covid-19 Chest X-ray images were selected for the final analysis.

Results

Six models, namely NB, GLM, DL, GBT, ANN, and PLS-DA were selected and ensembled. According to Youden's index, the Covid-19 Ensemble Machine Learning Score showing the highest area under the curve (0.971±0.015) was 132.57. Assuming this cut-off the Ensemble model performance was estimated by evaluating both true and false positive/negative, resulting in 91.8% accuracy with 93% sensitivity and 90% specificity. Moving the cut-off value to -100, although the accuracy resulted lower (90.6%), the Ensemble Machine Learning showed 100% sensitivity, with 80% specificity.

Conclusion

Texture analysis of Chest X-ray images and machine learning algorithms may help in differentiating patients with Covid-19 pneumonia. Despite several limitations, this study can lay the ground for future research works in this field and help to develop more rapid and accurate screening tools for these patients.",2021-01-01 +33396976,The 2021 Nucleic Acids Research database issue and the online molecular biology database collection.,"The 2021 Nucleic Acids Research database Issue contains 189 papers spanning a wide range of biological fields and investigation. It includes 89 papers reporting on new databases and 90 covering recent changes to resources previously published in the Issue. A further ten are updates on databases most recently published elsewhere. Seven new databases focus on COVID-19 and SARS-CoV-2 and many others offer resources for studying the virus. Major returning nucleic acid databases include NONCODE, Rfam and RNAcentral. Protein family and domain databases include COG, Pfam, SMART and Panther. Protein structures are covered by RCSB PDB and dispersed proteins by PED and MobiDB. In metabolism and signalling, STRING, KEGG and WikiPathways are featured, along with returning KLIFS and new DKK and KinaseMD, all focused on kinases. IMG/M and IMG/VR update in the microbial and viral genome resources section, while human and model organism genomics resources include Flybase, Ensembl and UCSC Genome Browser. Cancer studies are covered by updates from canSAR and PINA, as well as newcomers CNCdatabase and Oncovar for cancer drivers. Plant comparative genomics is catered for by updates from Gramene and GreenPhylDB. The entire Database Issue is freely available online on the Nucleic Acids Research website (https://academic.oup.com/nar). The NAR online Molecular Biology Database Collection has been substantially updated, revisiting nearly 1000 entries, adding 90 new resources and eliminating 86 obsolete databases, bringing the current total to 1641 databases. It is available at https://www.oxfordjournals.org/nar/database/c/.",2021-01-01 +33219670,MolluscDB: an integrated functional and evolutionary genomics database for the hyper-diverse animal phylum Mollusca.,"Mollusca represents the second largest animal phylum but remains poorly explored from a genomic perspective. While the recent increase in genomic resources holds great promise for a deep understanding of molluscan biology and evolution, access and utilization of these resources still pose a challenge. Here, we present the first comprehensive molluscan genomics database, MolluscDB (http://mgbase.qnlm.ac), which compiles and integrates current molluscan genomic/transcriptomic resources and provides convenient tools for multi-level integrative and comparative genomic analyses. MolluscDB enables a systematic view of genomic information from various aspects, such as genome assembly statistics, genome phylogenies, fossil records, gene information, expression profiles, gene families, transcription factors, transposable elements and mitogenome organization information. Moreover, MolluscDB offers valuable customized datasets or resources, such as gene coexpression networks across various developmental stages and adult tissues/organs, core gene repertoires inferred for major molluscan lineages, and macrosynteny analysis for chromosomal evolution. MolluscDB presents an integrative and comprehensive genomics platform that will allow the molluscan community to cope with ever-growing genomic resources and will expedite new scientific discoveries for understanding molluscan biology and evolution.",2021-01-01 +33152079,The antiSMASH database version 3: increased taxonomic coverage and new query features for modular enzymes.,"Microorganisms produce natural products that are frequently used in the development of antibacterial, antiviral, and anticancer drugs, pesticides, herbicides, or fungicides. In recent years, genome mining has evolved into a prominent method to access this potential. antiSMASH is one of the most popular tools for this task. Here, we present version 3 of the antiSMASH database, providing a means to access and query precomputed antiSMASH-5.2-detected biosynthetic gene clusters from representative, publicly available, high-quality microbial genomes via an interactive graphical user interface. In version 3, the database contains 147 517 high quality BGC regions from 388 archaeal, 25 236 bacterial and 177 fungal genomes and is available at https://antismash-db.secondarymetabolites.org/.",2021-01-01 +33093428,Clinical and Epidemiologic Analysis of COVID-19 Children Cases in Colombia PEDIACOVID.,"

Objective

The COVID pandemic has affected Colombia with a high number of cases and deceases; however, no studies have been published regarding pediatric population. An epidemiologic analysis of the nationwide COVID register, therefore, is necessary to outline and describe the impact in such population.

Methods

A retrospective analysis was made of the characteristics of a cohort of 5062 patients <18 years of age, until June 16, 2020, reported at the National Institute of Health-INS (https://www.ins.gov.co/News./Pages/Coronavirus.aspx), through the national public access database, with all subjects confirmed with COVID-19 or severe acute respiratory syndrome-CoV-2.

Results

Reviewed on June 16, 2020, a total of 54,971 confirmed cases were reported nationwide for COVID-19, of which 5062 (9.2%) are cases in patients under 18 years of age. There was a statistically significant difference between groups; age was statistically significantly higher in the asymptomatic, compared with: deceased, severe and moderate cases; moreover, age was statistically significantly higher in the mild, compared with: deceased, severe and moderate. Statistically significant difference determined with one-way ANOVA was found between groups (F = 16.08, P < 0.001). Post hoc analysis reveals significant differences between groups, the age of patients at home (9.39 years) and those recovered (9.3 years) being significantly higher than those in intensive care unit (4.9 years), in hospital (6.1 years), or than the deceased (2.9 years).

Conclusion

The results of this study show that, at the nationwide level, patients in more severe states (deceased, severe and moderate), are significantly younger than those in the milder state (asymptomatic and mild).",2021-01-01 +33045729,GIMICA: host genetic and immune factors shaping human microbiota.,"Besides the environmental factors having tremendous impacts on the composition of microbial community, the host factors have recently gained extensive attentions on their roles in shaping human microbiota. There are two major types of host factors: host genetic factors (HGFs) and host immune factors (HIFs). These factors of each type are essential for defining the chemical and physical landscapes inhabited by microbiota, and the collective consideration of both types have great implication to serve comprehensive health management. However, no database was available to provide the comprehensive factors of both types. Herein, a database entitled 'Host Genetic and Immune Factors Shaping Human Microbiota (GIMICA)' was constructed. Based on the 4257 microbes confirmed to inhabit nine sites of human body, 2851 HGFs (1368 single nucleotide polymorphisms (SNPs), 186 copy number variations (CNVs), and 1297 non-coding ribonucleic acids (RNAs)) modulating the expression of 370 microbes were collected, and 549 HIFs (126 lymphocytes and phagocytes, 387 immune proteins, and 36 immune pathways) regulating the abundance of 455 microbes were also provided. All in all, GIMICA enabled the collective consideration not only between different types of host factor but also between the host and environmental ones, which is freely accessible without login requirement at: https://idrblab.org/gimica/.",2021-01-01 +32975768,Multilocus Sequence Typing (MLST) and Whole Genome Sequencing (WGS) of Listeria monocytogenes and Listeria innocua.,Nucleotide sequence-based methods focusing on the single-nucleotide polymorphisms (SNPs) of Listeria monocytogenes and L. innocua housekeeping genes (multilocus sequence typing) and in the core genome (core genome MLST) facilitate the rapid and interlaboratory comparison in open accessible databases as provided by Institute Pasteur ( https://bigsdb.web.pasteur.fr/listeria/listeria.html ). Strains can be compared on a global level and help to track forward and trace backward pathogen contamination events in food processing facilities and in outbreak scenarios.,2021-01-01 +34453204,Omega-3 fatty acid blood levels are inversely associated with cardiometabolic risk factors in HFpEF patients: the Aldo-DHF randomized controlled trial.,"

Objectives

To evaluate associations of omega-3 fatty acid (O3-FA) blood levels with cardiometabolic risk markers, functional capacity and cardiac function/morphology in patients with heart failure with preserved ejection fraction (HFpEF).

Background

O3-FA have been linked to reduced risk for HF and associated phenotypic traits in experimental/clinical studies.

Methods

This is a cross-sectional analysis of data from the Aldo-DHF-RCT. From 422 patients, the omega-3-index (O3I = EPA + DHA) was analyzed at baseline in n = 404 using the HS-Omega-3-Index® methodology. Patient characteristics were; 67 ± 8 years, 53% female, NYHA II/III (87/13%), ejection fraction ≥ 50%, E/e' 7.1 ± 1.5; median NT-proBNP 158 ng/L (IQR 82-298). Pearson's correlation coefficient and multiple linear regression analyses, using sex and age as covariates, were used to describe associations of the O3I with metabolic phenotype, functional capacity, echocardiographic markers for LVDF, and neurohumoral activation at baseline/12 months.

Results

The O3I was below (< 8%), within (8-11%), and higher (> 11%) than the target range in 374 (93%), 29 (7%), and 1 (0.2%) patients, respectively. Mean O3I was 5.7 ± 1.7%. The O3I was inversely associated with HbA1c (r = - 0.139, p = 0.006), triglycerides-to-HDL-C ratio (r = - 0.12, p = 0.017), triglycerides (r = - 0.117, p = 0.02), non-HDL-C (r = - 0.101, p = 0.044), body-mass-index (r = - 0.149, p = 0.003), waist circumference (r = - 0.121, p = 0.015), waist-to-height ratio (r = - 0.141, p = 0.005), and positively associated with submaximal aerobic capacity (r = 0.113, p = 0.023) and LVEF (r = 0.211, p < 0.001) at baseline. Higher O3I at baseline was predictive of submaximal aerobic capacity (β = 15.614, p < 0,001), maximal aerobic capacity (β = 0.399, p = 0.005) and LVEF (β = 0.698, p = 0.007) at 12 months.

Conclusions

Higher O3I was associated with a more favorable cardiometabolic risk profile and predictive of higher submaximal/maximal aerobic capacity and lower BMI/truncal adiposity in HFpEF patients. Omega-3 fatty acid blood levels are inversely associated with cardiometabolic risk factors in HFpEF patients. Higher O3I was associated with a more favorable cardiometabolic risk profile and aerobic capacity (left) but did not correlate with echocardiographic markers for left ventricular diastolic function or neurohumoral activation (right). An O3I-driven intervention trial might be warranted to answer the question whether O3-FA in therapeutic doses (with the target O3I 8-11%) impact on echocardiographic markers for left ventricular diastolic function and neurohumoral activation in patients with HFpEF. This figure contains modified images from Servier Medical Art ( https://smart.servier.com ) licensed by a Creative Commons Attribution 3.0 Unported License.",2021-08-28 +30944327,Collected mass spectrometry data on monoterpene indole alkaloids from natural product chemistry research.,"This Data Descriptor announces the submission to public repositories of the monoterpene indole alkaloid database (MIADB), a cumulative collection of 172 tandem mass spectrometry (MS/MS) spectra from multiple research projects conducted in eight natural product chemistry laboratories since the 1960s. All data have been annotated and organized to promote reuse by the community. Being a unique collection of these complex natural products, these data can be used to guide the dereplication and targeting of new related monoterpene indole alkaloids within complex mixtures when applying computer-based approaches, such as molecular networking. Each spectrum has its own accession number from CCMSLIB00004679916 to CCMSLIB00004680087 on the GNPS. The MIADB is available for download from MetaboLights under the identifier: MTBLS142 ( https://www.ebi.ac.uk/metabolights/MTBLS142 ).",2019-04-03 +33416865,Interactive gene networks with KNIT. ,"KNIT is a web application that provides a hierarchical, directed graph on how a set of genes is connected to a particular gene of interest. Its primary aim is to aid researchers in discerning direct from indirect effects that a gene might have on the expression of other genes and molecular pathways, a very common problem in omics analysis. As such, KNIT provides deep contextual information for experiments where gene or protein expression might be changed, such as gene knock-out and overexpression experiments. KNIT is publicly available at http://knit.ims.bio. It is implemented with Django and Nuxtjs, with all major browsers supported. Supplementary information: Supplementary data are available at Bioinformatics online.",2021-01-08 +33263329,Structural variability of CG-rich DNA 18-mers accommodating double T-T mismatches.,"Solution and crystal data are reported for DNA 18-mers with sequences related to those of bacterial noncoding single-stranded DNA segments called repetitive extragenic palindromes (REPs). Solution CD and melting data showed that the CG-rich, near-palindromic REPs from various bacterial species exhibit dynamic temperature-dependent and concentration-dependent equilibria, including architectures compatible with not only hairpins, which are expected to be biologically relevant, but also antiparallel duplexes and bimolecular tetraplexes. Three 18-mer oligonucleotides named Hpar-18 (PDB entry 6rou), Chom-18 (PDB entry 6ros) and its brominated variant Chom-18Br (PDB entry 6ror) crystallized as isomorphic right-handed A-like duplexes. The low-resolution crystal structures were solved with the help of experimental phases for Chom-18Br. The center of the duplexes is formed by two successive T-T noncanonical base pairs (mismatches). They do not deform the double-helical geometry. The presence of T-T mismatches prompted an analysis of the geometries of these and other noncanonical pairs in other DNA crystals in terms of their fit to the experimental electron densities (RSCC) and their geometric fit to the NtC (dinucleotide conformational) classes (https://dnatco.datmos.org/). Throughout this work, knowledge of the NtC classes was used to refine and validate the crystal structures, and to analyze the mismatches.",2020-11-24 +33300819,"A Case-Crossover Analysis of Indoor Heat Exposure on Mortality and Hospitalizations among the Elderly in Houston, Texas.","

Background

Despite the substantial role indoor exposure has played in heat wave-related mortality, few epidemiological studies have examined the health effects of exposure to indoor heat. As a result, knowledge gaps regarding indoor heat-health thresholds, vulnerability, and adaptive capacity persist.

Objective

We evaluated the role of indoor heat exposure on mortality and morbidity among the elderly (≥65 years of age) in Houston, Texas.

Methods

Mortality and emergency hospital admission data were obtained through the Texas Department of State Health Services. Summer indoor heat exposure was modeled at the U.S. Census block group (CBG) level using building energy models, outdoor weather data, and building characteristic data. Indoor heat-health associations were examined using time-stratified case-crossover models, controlling for temporal trends and meteorology, and matching on CBG of residence, year, month, and weekday of the adverse health event. Separate models were fitted for three indoor exposure metrics, for individual lag days 0-6, and for 3-d moving averages (lag 0-2). Effect measure modification was explored via stratification on individual- and area-level vulnerability factors.

Results

We estimated positive associations between short-term changes in indoor heat exposure and cause-specific mortality and morbidity [e.g., circulatory deaths, odds ratio per 5°C increase=1.16 (95% CI: 1.03, 1.30)]. Associations were generally positive for earlier lag periods and weaker across later lag periods. Stratified analyses suggest stronger associations between indoor heat and emergency hospital admissions among African Americans compared with Whites.

Discussion

Findings suggest excess mortality among certain elderly populations in Houston who are likely exposed to high indoor heat. We developed a novel methodology to estimate indoor heat exposure that can be adapted to other U.S.

Locations

In locations with high air conditioning prevalence, simplified modeling approaches may adequately account for indoor heat exposure in vulnerable neighborhoods. Accounting for indoor heat exposure may improve the estimation of the total impact of heat on health. https://doi.org/10.1289/EHP6340.",2020-12-10 +33729791,Epigenetic Target Profiler: A Web Server to Predict Epigenetic Targets of Small Molecules.,"The identification of protein targets of small molecules is essential for drug discovery. With the increasing amount of chemogenomic data in the public domain, multiple ligand-based models for target prediction have emerged. However, these models are generally biased by the number of known ligands for different targets, which involves an under-representation of epigenetic targets, and despite the increasing importance of epigenetic targets in drug discovery, there are no open tools for epigenetic target prediction. In this work, we introduce Epigenetic Target Profiler (ETP), a freely accessible and easy-to-use web application for the prediction of epigenetic targets of small molecules. For a query compound, ETP predicts its bioactivity profile over a panel of 55 different epigenetic targets. To that aim, ETP uses a consensus model based on two binary classification models for each target, relying on support vector machines and built on molecular fingerprints of different design. A distance-to-model parameter related to the reliability of the predictions is included to facilitate their interpretability and assist in the identification of small molecules with potential epigenetic activity. Epigenetic Target Profiler is freely available at http://www.epigenetictargetprofiler.com.",2021-03-17 +26578574,"Ensembl Genomes 2016: more genomes, more complexity.","Ensembl Genomes (http://www.ensemblgenomes.org) is an integrating resource for genome-scale data from non-vertebrate species, complementing the resources for vertebrate genomics developed in the context of the Ensembl project (http://www.ensembl.org). Together, the two resources provide a consistent set of programmatic and interactive interfaces to a rich range of data including reference sequence, gene models, transcriptional data, genetic variation and comparative analysis. This paper provides an update to the previous publications about the resource, with a focus on recent developments. These include the development of new analyses and views to represent polyploid genomes (of which bread wheat is the primary exemplar); and the continued up-scaling of the resource, which now includes over 23 000 bacterial genomes, 400 fungal genomes and 100 protist genomes, in addition to 55 genomes from invertebrate metazoa and 39 genomes from plants. This dramatic increase in the number of included genomes is one part of a broader effort to automate the integration of archival data (genome sequence, but also associated RNA sequence data and variant calls) within the context of reference genomes and make it available through the Ensembl user interfaces.",2015-11-17 +34825131,Application of Machine Learning Algorithms in Breast Cancer Diagnosis and Classification.,"Breast cancer continues to be the most frequent cancer in females, affecting about one in 8 women and causing the highest number of cancer-related deaths in females worldwide despite remarkable progress in early diagnosis, screening, and patient management. All breast lesions are not malignant, and all the benign lesions do not progress to cancer. However, the accuracy of diagnosis can be increased by a combination or preoperative tests such as physical examination, mammography, fine-needle aspiration cytology, and core needle biopsy. Despite some limitations, these procedures are more accurate, reliable, and acceptable, when compared with a single adopted diagnostic procedure. Recent studies have shown that breast cancer can be accurately predicted and diagnosed using machine learning (ML) technology. The objective of this study was to explore the application of ML approaches to classify breast cancer based on feature values generated from a digitized image of a fine-needle aspiration (FNA) of a breast mass. To achieve this objective, we used ML algorithms, collected a scientific dataset of 569 breast cancer patients from Kaggle (https://www.kaggle.com/uciml/breast-cancer-wisconsin-data), analyze and interpreted the data based on ten real-valued features of a breast mass FNA including the radius, texture, perimeter, area, smoothness, compactness, concavity, concave points, symmetry, and fractal dimension. Among the 569 patients tested, 63% were diagnosed with benign breast cancer and 37% were diagnosed with malignant breast cancer. Benign tumors grow slowly and do not spread while malignant tumors grow rapidly and spread to other parts of the body.",2021-01-01 +33206635,Stability of SARS-CoV-2 phylogenies.,"The SARS-CoV-2 pandemic has led to unprecedented, nearly real-time genetic tracing due to the rapid community sequencing response. Researchers immediately leveraged these data to infer the evolutionary relationships among viral samples and to study key biological questions, including whether host viral genome editing and recombination are features of SARS-CoV-2 evolution. This global sequencing effort is inherently decentralized and must rely on data collected by many labs using a wide variety of molecular and bioinformatic techniques. There is thus a strong possibility that systematic errors associated with lab-or protocol-specific practices affect some sequences in the repositories. We find that some recurrent mutations in reported SARS-CoV-2 genome sequences have been observed predominantly or exclusively by single labs, co-localize with commonly used primer binding sites and are more likely to affect the protein-coding sequences than other similarly recurrent mutations. We show that their inclusion can affect phylogenetic inference on scales relevant to local lineage tracing, and make it appear as though there has been an excess of recurrent mutation or recombination among viral lineages. We suggest how samples can be screened and problematic variants removed, and we plan to regularly inform the scientific community with our updated results as more SARS-CoV-2 genome sequences are shared (https://virological.org/t/issues-with-sars-cov-2-sequencing-data/473 and https://virological.org/t/masking-strategies-for-sars-cov-2-alignments/480). We also develop tools for comparing and visualizing differences among very large phylogenies and we show that consistent clade- and tree-based comparisons can be made between phylogenies produced by different groups. These will facilitate evolutionary inferences and comparisons among phylogenies produced for a wide array of purposes. Building on the SARS-CoV-2 Genome Browser at UCSC, we present a toolkit to compare, analyze and combine SARS-CoV-2 phylogenies, find and remove potential sequencing errors and establish a widely shared, stable clade structure for a more accurate scientific inference and discourse.",2020-11-18 +34246172,Dependency of lower limb joint reaction forces on femoral version.,"

Background

Musculoskeletal (MSK) models based on literature data are meant to represent a generic anatomy and are a popular tool employed by biomechanists to estimate the internal loads occurring in the lower limb joints, such as joint reaction forces (JRFs). However, since these models are normally just linearly scaled to an individual's anthropometry, it is unclear how their estimations would be affected by the personalization of key features of the MSK anatomy, one of which is the femoral version angle.

Research question

How are the lower limb JRF magnitudes computed through a generic MSK model affected by changes in the femoral version?

Methods

We developed a bone-deformation tool in MATLAB (shared at https://simtk.org/projects/bone_deformity) and used it to create a set of seven OpenSim models spanning from 2˚ femoral retroversion to 40˚ anteversion. We used these models to simulate the gait of an elderly individual with an instrumented prosthesis implanted at their knee joint (5th Grand Challenge dataset) and quantified both the changes in JRFs magnitude due to varying the skeletal anatomy and their accuracy against the correspondent in vivo measurements at the knee joint.

Results

Hip and knee JRF magnitudes were affected by the femoral version with variations from the unmodified generic model up to 17.9 ± 4.5% at the hip and 43.4 ± 27.1% at the knee joint. The ankle joint was unaffected by the femoral geometry. The MSK models providing the most accurate knee JRFs (root mean squared error: 0.370 ± 0.068 body weight, coefficient of determination: 0.757 ± 0.104, peak error range: 0.09-0.42 body weight) were those with femoral anteversion angle closer to that measured on the segmented bone of the individual.

Significance

Femoral version substantially affects hip and knee JRFs estimated with generic MSK models, suggesting that personalizing key MSK anatomical features might be necessary for accurate estimation of JRFs with these models.",2021-06-16 +32437538,scTPA: a web tool for single-cell transcriptome analysis of pathway activation signatures.,"

Motivation

At present, a fundamental challenge in single-cell RNA-sequencing data analysis is functional interpretation and annotation of cell clusters. Biological pathways in distinct cell types have different activation patterns, which facilitates the understanding of cell functions using single-cell transcriptomics. However, no effective web tool has been implemented for single-cell transcriptome data analysis based on prior biological pathway knowledge.

Results

Here, we present scTPA, a web-based platform for pathway-based analysis of single-cell RNA-seq data in human and mouse. scTPA incorporates four widely-used gene set enrichment methods to estimate the pathway activation scores of single cells based on a collection of available biological pathways with different functional and taxonomic classifications. The clustering analysis and cell-type-specific activation pathway identification were provided for the functional interpretation of cell types from a pathway-oriented perspective. An intuitive interface allows users to conveniently visualize and download single-cell pathway signatures. Overall, scTPA is a comprehensive tool for the identification of pathway activation signatures for the analysis of single cell heterogeneity.

Availability and implementation

http://sctpa.bio-data.cn/sctpa.

Contact

sujz@wmu.edu.cn or yufulong421@gmail.com or zgj@zjut.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +27819351,PEPlife: A Repository of the Half-life of Peptides.,"Short half-life is one of the key challenges in the field of therapeutic peptides. Various studies have reported enhancement in the stability of peptides using methods like chemical modifications, D-amino acid substitution, cyclization, replacement of labile aminos acids, etc. In order to study this scattered data, there is a pressing need for a repository dedicated to the half-life of peptides. To fill this lacuna, we have developed PEPlife (http://crdd.osdd.net/raghava/peplife), a manually curated resource of experimentally determined half-life of peptides. PEPlife contains 2229 entries covering 1193 unique peptides. Each entry provides detailed information of the peptide, like its name, sequence, half-life, modifications, the experimental assay for determining half-life, biological nature and activity of the peptide. We also maintain SMILES and structures of peptides. We have incorporated web-based modules to offer user-friendly data searching and browsing in the database. PEPlife integrates numerous tools to perform various types of analysis such as BLAST, Smith-Waterman algorithm, GGSEARCH, Jalview and MUSTANG. PEPlife would augment the understanding of different factors that affect the half-life of peptides like modifications, sequence, length, route of delivery of the peptide, etc. We anticipate that PEPlife will be useful for the researchers working in the area of peptide-based therapeutics.",2016-11-07 +33441155,The variant call format provides efficient and robust storage of GWAS summary statistics.,"GWAS summary statistics are fundamental for a variety of research applications yet no common storage format has been widely adopted. Existing tabular formats ambiguously or incompletely store information about genetic variants and associations, lack essential metadata and are typically not indexed yielding poor query performance and increasing the possibility of errors in data interpretation and post-GWAS analyses. To address these issues, we adapted the variant call format to store GWAS summary statistics (GWAS-VCF) and developed open-source tools to use this format in downstream analyses. We provide open access to over 10,000 complete GWAS summary datasets converted to this format ( https://gwas.mrcieu.ac.uk ).",2021-01-13 +32467963,AlphaFamImpute: high-accuracy imputation in full-sib families from genotype-by-sequencing data.,"

Summary

AlphaFamImpute is an imputation package for calling, phasing and imputing genome-wide genotypes in outbred full-sib families from single nucleotide polymorphism (SNP) array and genotype-by-sequencing (GBS) data. GBS data are increasingly being used to genotype individuals, especially when SNP arrays do not exist for a population of interest. Low-coverage GBS produces data with a large number of missing or incorrect naïve genotype calls, which can be improved by identifying shared haplotype segments between full-sib individuals. Here, we present AlphaFamImpute, an algorithm specifically designed to exploit the genetic structure of full-sib families. It performs imputation using a two-step approach. In the first step, it phases and imputes parental genotypes based on the segregation states of their offspring (i.e. which pair of parental haplotypes the offspring inherited). In the second step, it phases and imputes the offspring genotypes by detecting which haplotype segments the offspring inherited from their parents. With a series of simulations, we find that AlphaFamImpute obtains high-accuracy genotypes, even when the parents are not genotyped and individuals are sequenced at <1x coverage.

Availability and implementation

AlphaFamImpute is available as a Python package from the AlphaGenes website http://www.AlphaGenes.roslin.ed.ac.uk/AlphaFamImpute.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +31706309,Quantitative lung morphology: semi-automated measurement of mean linear intercept.,"BACKGROUND:Quantifying morphologic changes is critical to our understanding of the pathophysiology of the lung. Mean linear intercept (MLI) measures are important in the assessment of clinically relevant pathology, such as emphysema. However, qualitative measures are prone to error and bias, while quantitative methods such as mean linear intercept (MLI) are manually time consuming. Furthermore, a fully automated, reliable method of assessment is nontrivial and resource-intensive. METHODS:We propose a semi-automated method to quantify MLI that does not require specialized computer knowledge and uses a free, open-source image-processor (Fiji). We tested the method with a computer-generated, idealized dataset, derived an MLI usage guide, and successfully applied this method to a murine model of particulate matter (PM) exposure. Fields of randomly placed, uniform-radius circles were analyzed. Optimal numbers of chords to assess based on MLI were found via receiver-operator-characteristic (ROC)-area under the curve (AUC) analysis. Intraclass correlation coefficient (ICC) measured reliability. RESULTS:We demonstrate high accuracy (AUCROC > 0.8 for MLIactual > 63.83 pixels) and excellent reliability (ICC = 0.9998, p < 0.0001). We provide a guide to optimize the number of chords to sample based on MLI. Processing time was 0.03 s/image. We showed elevated MLI in PM-exposed mice compared to PBS-exposed controls. We have also provided the macros that were used and have made an ImageJ plugin available free for academic research use at https://med.nyu.edu/nolanlab. CONCLUSIONS:Our semi-automated method is reliable, equally fast as fully automated methods, and uses free, open-source software. Additionally, we quantified the optimal number of chords that should be measured per lung field.",2019-11-09 +31906602,LLPSDB: a database of proteins undergoing liquid-liquid phase separation in vitro.,"Liquid-liquid phase separation (LLPS) leads to a conversion of homogeneous solution into a dense phase that often resembles liquid droplets, and a dilute phase. An increasing number of investigations have shown that biomolecular condensates formed by LLPS play important roles in both physiology and pathology. It has been suggested the phase behavior of proteins would be not only determined by sequences, but controlled by micro-environmental conditions. Here, we introduce LLPSDB (http://bio-comp.ucas.ac.cn/llpsdb or http://bio-comp.org.cn/llpsdb), a web-accessible database providing comprehensive, carefully curated collection of proteins involved in LLPS as well as corresponding experimental conditions in vitro from published literatures. The current release of LLPSDB incorporates 1182 entries with 273 independent proteins and 2394 specific conditions. The database provides a variety of data including biomolecular information (protein sequence, protein modification, nucleic acid, etc.), specific phase separation information (experimental conditions, phase behavior description, etc.) and comprehensive annotations. To our knowledge, LLPSDB is the first available database designed for LLPS related proteins specifically. It offers plenty of valuable resources for exploring the relationship between protein sequence and phase behavior, and will enhance the development of phase separation prediction methods, which may further provide more insights into a comprehensive understanding of LLPS in cellular function and related diseases.",2020-01-01 +31598702,VISDB: a manually curated database of viral integration sites in the human genome.,"Virus integration into the human genome occurs frequently and represents a key driving event in human disease. Many studies have reported viral integration sites (VISs) proximal to structural or functional regions of the human genome. Here, we systematically collected and manually curated all VISs reported in the literature and publicly available data resources to construct the Viral Integration Site DataBase (VISDB, https://bioinfo.uth.edu/VISDB). Genomic information including target genes, nearby genes, nearest transcription start site, chromosome fragile sites, CpG islands, viral sequences and target sequences were integrated to annotate VISs. We further curated VIS-involved oncogenes and tumor suppressor genes, virus-host interactions involved in non-coding RNA (ncRNA), target gene and microRNA expression in five cancers, among others. Moreover, we developed tools to visualize single integration events, VIS clusters, DNA elements proximal to VISs and virus-host interactions involved in ncRNA. The current version of VISDB contains a total of 77 632 integration sites of five DNA viruses and four RNA retroviruses. VISDB is currently the only active comprehensive VIS database, which provides broad usability for the study of disease, virus related pathophysiology, virus biology, host-pathogen interactions, sequence motif discovery and pattern recognition, molecular evolution and adaption, among others.",2020-01-01 +31680137,ANISEED 2019: 4D exploration of genetic data for an extended range of tunicates.,"ANISEED (https://www.aniseed.cnrs.fr) is the main model organism database for the worldwide community of scientists working on tunicates, the vertebrate sister-group. Information provided for each species includes functionally-annotated gene and transcript models with orthology relationships within tunicates, and with echinoderms, cephalochordates and vertebrates. Beyond genes the system describes other genetic elements, including repeated elements and cis-regulatory modules. Gene expression profiles for several thousand genes are formalized in both wild-type and experimentally-manipulated conditions, using formal anatomical ontologies. These data can be explored through three complementary types of browsers, each offering a different view-point. A developmental browser summarizes the information in a gene- or territory-centric manner. Advanced genomic browsers integrate the genetic features surrounding genes or gene sets within a species. A Genomicus synteny browser explores the conservation of local gene order across deuterostome. This new release covers an extended taxonomic range of 14 species, including for the first time a non-ascidian species, the appendicularian Oikopleura dioica. Functional annotations, provided for each species, were enhanced through a combination of manual curation of gene models and the development of an improved orthology detection pipeline. Finally, gene expression profiles and anatomical territories can be explored in 4D online through the newly developed Morphonet morphogenetic browser.",2020-01-01 +31701131,CancerTracer: a curated database for intrapatient tumor heterogeneity.,"Comprehensive genomic analyses of cancers have revealed substantial intrapatient molecular heterogeneities that may explain some instances of drug resistance and treatment failures. Examination of the clonal composition of an individual tumor and its evolution through disease progression and treatment may enable identification of precise therapeutic targets for drug design. Multi-region and single-cell sequencing are powerful tools that can be used to capture intratumor heterogeneity. Here, we present a database we've named CancerTracer (http://cailab.labshare.cn/cancertracer): a manually curated database designed to track and characterize the evolutionary trajectories of tumor growth in individual patients. We collected over 6000 tumor samples from 1548 patients corresponding to 45 different types of cancer. Patient-specific tumor phylogenetic trees were constructed based on somatic mutations or copy number alterations identified in multiple biopsies. Using the structured heterogeneity data, researchers can identify common driver events shared by all tumor regions, and the heterogeneous somatic events present in different regions of a tumor of interest. The database can also be used to investigate the phylogenetic relationships between primary and metastatic tumors. It is our hope that CancerTracer will significantly improve our understanding of the evolutionary histories of tumors, and may facilitate the identification of predictive biomarkers for personalized cancer therapies.",2020-01-01 +32100133,Elective neck dissection in T1/T2 oral squamous cell carcinoma with N0 neck: essential or not? A systematic review and meta-analysis.,"

Introduction

Oral squamous cell carcinoma (SCC) is characterized by a high risk of cervical lymph node metastasis with a high incidence of occult metastasis. A strong debate is still present regarding the best treatment for early oral cavity cancer with N0 neck.

Objective

The aim of the present study was to compare between the results of elective neck dissection (END) and watchful waiting (observation or therapeutic neck dissection) in patients with early-stage (T1/T2) oral squamous cell carcinoma with N0 neck.

Data sources

Medline database (https://www.pubmed.com), Google Scholar and Scopus.

Patients and methods

A systematic review and meta-analysis for the evaluation of regional recurrence rate and 5-year survival rate after elective neck dissection (END) or watchful waiting in early oral cancers were conducted. This study included published English medical articles (which met our predetermined inclusion criteria) in the last 30 years, concerning early oral SCC with N0 neck. 24 articles were included (4 randomized studies and 20 observational ""retrospective"" studies) with a total number of 2190 of patients who underwent END and 1619 who underwent watchful waiting. Regarding the 5-year survival rate, (10) studies were included with a total number of 1211 patients who underwent END and 948 who underwent watchful waiting.

Results

Regarding the regional recurrence rate, (END) was associated with significantly lower risk of recurrence when compared with observation. Regarding the 5-year survival rate, END was associated with a better survival rate than the observational group.

Conclusions

Elective neck dissection is better than watchful waiting in early (T1/T2) stage oral cavity squamous cell carcinoma with N0 neck, regarding regional recurrence and 5-year survival rate.",2020-02-25 +31598675,KnockTF: a comprehensive human gene expression profile database with knockdown/knockout of transcription factors.,"Transcription factors (TFs) and their target genes have important functions in human diseases and biological processes. Gene expression profile analysis before and after knockdown or knockout is one of the most important strategies for obtaining target genes of TFs and exploring TF functions. Human gene expression profile datasets with TF knockdown and knockout are accumulating rapidly. Based on the urgent need to comprehensively and effectively collect and process these data, we developed KnockTF (http://www.licpathway.net/KnockTF/index.html), a comprehensive human gene expression profile database of TF knockdown and knockout. KnockTF provides a number of resources for human gene expression profile datasets associated with TF knockdown and knockout and annotates TFs and their target genes in a tissue/cell type-specific manner. The current version of KnockTF has 570 manually curated RNA-seq and microarray datasets associated with 308 TFs disrupted by different knockdown and knockout techniques and across multiple tissue/cell types. KnockTF collects upstream pathway information of TFs and functional annotation results of downstream target genes. It provides details about TFs binding to promoters, super-enhancers and typical enhancers of target genes. KnockTF constructs a TF-differentially expressed gene network and performs network analyses for genes of interest. KnockTF will help elucidate TF-related functions and potential biological effects.",2020-01-01 +28111366,Orchidstra 2.0-A Transcriptomics Resource for the Orchid Family.,"Orchidaceae, the orchid family, encompasses more than 25,000 species and five subfamilies. Due to their beautiful and exotic flowers, distinct biological and ecological features, orchids have aroused wide interest among both researchers and the general public. We constructed the Orchidstra database, a resource for orchid transcriptome assembly and gene annotations. The Orchistra database has been under active development since 2013. To accommodate the increasing amount of orchid transcriptome data and house more comprehensive information, Orchidstra 2.0 has been built with a new database system to store the annotations of 510,947 protein-coding genes and 161,826 noncoding transcripts, covering 18 orchid species belonging to 12 genera in five subfamilies of Orchidaceae. We have improved the N50 size of protein-coding genes, provided new functional annotations (including protein-coding gene annotations, protein domain/family information, pathways analysis, Gene Ontology term assignments, orthologous genes across orchid species, cross-links to the database of model species, and miRNA information), and improved the user interface with better website performance. We also provide new database functionalities for database searching and sequence retrieval. Moreover, the Orchidstra 2.0 database incorporates detailed RNA-Seq gene expression data from various tissues and developmental stages in different orchid species. The database will be useful for gene prediction and gene family studies, and for exploring gene expression in orchid species. The Orchidstra 2.0 database is freely accessible at http://orchidstra2.abrc.sinica.edu.tw.",2017-01-01 +30208844,Web of microbes (WoM): a curated microbial exometabolomics database for linking chemistry and microbes.,"

Background

As microbiome research becomes increasingly prevalent in the fields of human health, agriculture and biotechnology, there exists a need for a resource to better link organisms and environmental chemistries. Exometabolomics experiments now provide assertions of the metabolites present within specific environments and how the production and depletion of metabolites is linked to specific microbes. This information could be broadly useful, from comparing metabolites across environments, to predicting competition and exchange of metabolites between microbes, and to designing stable microbial consortia. Here, we introduce Web of Microbes (WoM; freely available at: http://webofmicrobes.org ), the first exometabolomics data repository and visualization tool.

Description

WoM provides manually curated, direct biochemical observations on the changes to metabolites in an environment after exposure to microorganisms. The web interface displays a number of key features: (1) the metabolites present in a control environment prior to inoculation or microbial activation, (2) heatmap-like displays showing metabolite increases or decreases resulting from microbial activities, (3) a metabolic web displaying the actions of multiple organisms on a specified metabolite pool, (4) metabolite interaction scores indicating an organism's interaction level with its environment, potential for metabolite exchange with other organisms and potential for competition with other organisms, and (5) downloadable datasets for integration with other types of -omics datasets.

Conclusion

We anticipate that Web of Microbes will be a useful tool for the greater research community by making available manually curated exometabolomics results that can be used to improve genome annotations and aid in the interpretation and construction of microbial communities.",2018-09-12 +27974320,From 20th century metabolic wall charts to 21st century systems biology: database of mammalian metabolic enzymes.,"The organization of the mammalian genome into gene subsets corresponding to specific functional classes has provided key tools for systems biology research. Here, we have created a web-accessible resource called the Mammalian Metabolic Enzyme Database (https://hpcwebapps.cit.nih.gov/ESBL/Database/MetabolicEnzymes/MetabolicEnzymeDatabase.html) keyed to the biochemical reactions represented on iconic metabolic pathway wall charts created in the previous century. Overall, we have mapped 1,647 genes to these pathways, representing ~7 percent of the protein-coding genome. To illustrate the use of the database, we apply it to the area of kidney physiology. In so doing, we have created an additional database (Database of Metabolic Enzymes in Kidney Tubule Segments: https://hpcwebapps.cit.nih.gov/ESBL/Database/MetabolicEnzymes/), mapping mRNA abundance measurements (mined from RNA-Seq studies) for all metabolic enzymes to each of 14 renal tubule segments. We carry out bioinformatics analysis of the enzyme expression pattern among renal tubule segments and mine various data sources to identify vasopressin-regulated metabolic enzymes in the renal collecting duct.",2016-12-14 +34273447,Adipose and non-adipose perspectives of plant derived natural compounds for mitigation of obesity.,"

Ethnopharmacological relevance

Phyto-preparations and phyto-compounds, by their natural origin, easy availability, cost-effectiveness, and fruitful traditional uses based on accumulated experiences, have been extensively explored to mitigate the global burden of obesity.

Aim of this review

The review aimed to analyse and critically summarize the prospect of future anti-obesity drug leads from the extant array of phytochemicals for mitigation of obesity, using adipose related targets (adipocyte formation, lipid metabolism, and thermogenesis) and non-adipose targets (hepatic lipid metabolism, appetite, satiety, and pancreatic lipase activity). Phytochemicals as inhibitors of adipocyte differentiation, modulators of lipid metabolism, and thermogenic activators of adipocytes are specifically discussed with their non-adipose anti-obesogenic targets.

Materials and methods

PubMed, Google Scholar, Scopus, and SciFinder were accessed to collect data on traditional medicinal plants, compounds derived from plants, their reported anti-obesity mechanisms, and therapeutic targets. The taxonomically accepted name of each plant in this review has been vetted from ""The Plant List"" (www.theplantlist.org) or MPNS (http://mpns.kew.org).

Results

Available knowledge of a large number of phytochemicals, across a range of adipose and non-adipose targets, has been critically analysed and delineated by graphical and tabular depictions, towards mitigation of obesity. Neuro-endocrinal modulation in non-adipose targets brought into sharp dual focus, both non-adipose and adipose targets as the future of anti-obesity research. Numerous phytochemicals (Berberine, Xanthohumol, Ursolic acid, Guggulsterone, Tannic acid, etc.) have been found to be effectively reducing weight through lowered adipocyte formation, increased lipolysis, decreased lipogenesis, and enhanced thermogenesis. They have been affirmed as potential anti-obesity drugs of future because of their effectiveness yet having no threat to adipose or systemic insulin sensitivity.

Conclusion

Due to high molecular diversity and a greater ratio of benefit to risk, plant derived compounds hold high therapeutic potential to tackle obesity and associated risks. This review has been able to generate fresh perspectives on the anti-diabetic/anti-hyperglycemic/anti-obesity effect of phytochemicals. It has also brought into the focus that many phytochemicals demonstrating in vitro anti-obesogenic effects are yet to undergo in vivo investigation which could lead to potential phyto-molecules for dedicated anti-obesity action.",2021-07-15 +33649760,Annual incidence and severity of acute episodes in hereditary thrombotic thrombocytopenic purpura.,"Hereditary thrombotic thrombocytopenic purpura (hTTP) is a rare thrombotic microangiopathy characterized by severe congenital ADAMTS13 deficiency and recurring acute episodes causing morbidity and premature death. Information on the annual incidence and severity of acute episodes in patients with hTTP is largely lacking. This study reports prospective data on 87 patients from the Hereditary TTP Registry (clinicaltrials.gov #NCT01257269) for survival, frequency, and severity of acute episodes from enrollment until December 2019. The 87 patients, followed up for a median of 4.2 years (range, 0.01-15 years), had a median age at overt disease onset and at clinical diagnosis of 4.6 years and 18 years (range, 0.0-70 years for both), respectively. Forty-three patients received regular plasma prophylaxis, whereas 22 did not, and treatment changed over time or was unknown in the remaining 22. Forty-three patients experienced 131 acute episodes, of which 91 (69%) occurred in patients receiving regular prophylaxis. This resulted in an annual incidence of acute episodes of 0.36 (95% confidence interval [CI], 0.29-0.44) with regular plasma treatment and of 0.41 (95% CI, 0.30-0.56) without regular plasma treatment. More than one-third of acute episodes (n = 51) were documented in children <10 years of age at enrollment and were often triggered by infections. Their annual incidence of acute episodes was significantly higher than in patients aged >40 years (1.18 [95% CI, 0.88-1.55] vs 0.14 [95% CI, 0.08-0.23]). The prophylactic plasma infusion regimens used were insufficient to prevent acute episodes in many patients. Such regimens are burdensome, and caregivers, patients, and their guardians are reluctant to start regular plasma infusions, from which children particularly would benefit.",2021-06-01 +26599696,The Resource Identification Initiative: A Cultural Shift in Publishing.,"A central tenet in support of research reproducibility is the ability to uniquely identify research resources, i.e., reagents, tools, and materials that are used to perform experiments. However, current reporting practices for research resources are insufficient to identify the exact resources that are reported or to answer basic questions such as ""How did other studies use resource X?"" To address this issue, the Resource Identification Initiative was launched as a pilot project to improve the reporting standards for research resources in the Methods sections of articles and thereby improve identifiability and scientific reproducibility. The pilot engaged over 25 biomedical journal editors from most major publishers, as well as scientists and funding officials. Authors were asked to include Research Resource Identifiers (RRIDs) in their articles prior to publication for three resource types: antibodies, model organisms, and tools (i.e., software and databases). RRIDs are assigned by an authoritative database, for example, a model organism database for each type of resource. To make it easier for authors to obtain RRIDs, resources were aggregated from the appropriate databases and their RRIDs made available in a central Web portal (http://scicrunch.org/resources). RRIDs meet three key criteria: they are machine-readable, free to generate and access, and are consistent across publishers and journals. The pilot was launched in February of 2014 and over 300 articles have appeared that report RRIDs. The number of journals participating has expanded from the original 25 to more than 40, with RRIDs appearing in 62 different journals to date. Here we present an overview of the pilot project and its outcomes to date. We show that authors are able to identify resources and are supportive of the goals of the project. Identifiability of the resources post-pilot showed a dramatic improvement for all three resource types, suggesting that the project has had a significant impact on identifiability of research resources.",2016-01-01 +33087120,Pan-cancer analysis of differential DNA methylation patterns.,"

Background

DNA methylation is a key epigenetic regulator contributing to cancer development. To understand the role of DNA methylation in tumorigenesis, it is important to investigate and compare differential methylation (DM) patterns between normal and case samples across different cancer types. However, current pan-cancer analyses call DM separately for each cancer, which suffers from lower statistical power and fails to provide a comprehensive view for patterns across cancers.

Methods

In this work, we propose a rigorous statistical model, PanDM, to jointly characterize DM patterns across diverse cancer types. PanDM uses the hidden correlations in the combined dataset to improve statistical power through joint modeling. PanDM takes summary statistics from separate analyses as input and performs methylation site clustering, differential methylation detection, and pan-cancer pattern discovery. We demonstrate the favorable performance of PanDM using simulation data. We apply our model to 12 cancer methylome data collected from The Cancer Genome Atlas (TCGA) project. We further conduct ontology- and pathway-enrichment analyses to gain new biological insights into the pan-cancer DM patterns learned by PanDM.

Results

PanDM outperforms two types of separate analyses in the power of DM calling in the simulation study. Application of PanDM to TCGA data reveals 37 pan-cancer DM patterns in the 12 cancer methylomes, including both common and cancer-type-specific patterns. These 37 patterns are in turn used to group cancer types. Functional ontology and biological pathways enriched in the non-common patterns not only underpin the cancer-type-specific etiology and pathogenesis but also unveil the common environmental risk factors shared by multiple cancer types. Moreover, we also identify PanDM-specific DM CpG sites that the common strategy fails to detect.

Conclusions

PanDM is a powerful tool that provides a systematic way to investigate aberrant methylation patterns across multiple cancer types. Results from real data analyses suggest a novel angle for us to understand the common and specific DM patterns in different cancers. Moreover, as PanDM works on the summary statistics for each cancer type, the same framework can in principle be applied to pan-cancer analyses of other functional genomic profiles. We implement PanDM as an R package, which is freely available at http://www.sta.cuhk.edu.hk/YWei/PanDM.html .",2020-10-22 +31887789,ETph: enhancers and their targets in pig and human database.,"Enhancers, as the genomic non-coding sequences, play a key role in the activation of gene expression. They have been widely identified in the human genome. Pig is an important biomedical model for human health. Few studies have been performed to explore the enhancers in the pig genome. The human enhancer information may be useful to identify enhancers in the pig genome. In addition, the genetic background of pig traits could be useful to annotate human enhancers and diseases. Thus, in order to further study enhancers and their potential roles in human and pig, we developed a public database, ETph (Enhancers and their Targets in pig and human). ETph integrates the information on human enhancers, pig putative enhancers, target genes, pig QTL terms, human diseases, GO terms and the KEGG pathway. A total of 25 182 enhancers were identified in the pig genome using the human homology sequence information. Among them, 6232 high-confidence enhancers were used to build the ETph. ETph provides a convenient platform to search, browse and download data. Moreover, a web-based analytical tool was designed to visualize networks and topology graphs among pig putative enhancers, target genes, pig QTL traits and human diseases. ETph might provide a useful tool for researchers to investigate the genetic background of pig traits and human diseases. ETph is freely accessible at http://klab.sjtu.edu.cn/enhancer/.",2019-12-30 +33427013,A narrative systematic review of randomised controlled trials that compare cannulation techniques for haemodialysis.,"

Background

Cannulation of arteriovenous access for haemodialysis affects longevity of the access, associates with complications and affects patients' experiences of haemodialysis. Buttonhole and rope ladder techniques were developed to reduce complications. However, studies that compare these two techniques report disparate results. This systematic review performs an in-depth exploration of RCTs, with a specific focus on cannulation as a complex intervention.

Methods

A PICO question and protocol was developed as per PRISMA-P guidance and registered on PROSPERO (CRD42018094656 https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=112895). The systematic review included any RCT performed on adult patients with end-stage kidney disease undergoing cannulation of arteriovenous fistulae or grafts for in-centre haemodialysis, as performed by healthcare staff. Assessment of quality of RCTs and data extraction were performed by two co-authors independently. Data were extracted on the study design, intervention and comparator and outcomes, including patency, infection and patients' experiences.

Results

The literature search identified 241 records. Ten records met inclusion criteria, which described five different RCTs that compared buttonhole to either rope ladder or usual practice. Results were disparate, with patency and infection results varying. Pain Visual Analogue scores were the only measure used to capture patients' experiences and results were inconclusive. All RCTs had differences and limitations in study design that could explain the disparity in results.

Conclusion

Current evidence does not allow definitive conclusions as to whether buttonhole or rope ladder needling technique is superior. Future RCTs should describe interventions and comparators with adequate detail, embed process evaluation, use standardised outcome measures and build on feasibility studies to produce definitive results.",2021-01-10 +32427908,"The Ontario Climate Data Portal, a user-friendly portal of Ontario-specific climate projections.","An easily accessible climate data portal, http://yorku.ca/ocdp, was developed and officially launched in 2018 to disseminate a super ensemble of high-resolution regional climate change projections for the province of Ontario, Canada. The spatial resolution is ~10 km × ~10 km and temporal resolution is one day, UTC. The data covers 120 years from 1981 to 2100. This user-friendly portal provides users with thousands of static and interactive maps, decadal variation trend lines, summary tables, reports and terabytes of bias-corrected downscaled data. The data portal was generated with an emphasis on interactive visualization of climate change information for researchers and the public to understand to what extent climate could change locally under different emission scenarios in the future. This paper presents an introduction to the portal structure and functions, the large extent of the datasets available and the data development methodology.",2020-05-19 +31641782,The IPD Project: a centralised resource for the study of polymorphism in genes of the immune system.,"The Immuno Polymorphism Database (IPD), https://www.ebi.ac.uk/ipd/, is a set of specialist databases that enable the study of polymorphic genes which function as part of the vertebrate immune system. The major focus is on the hyperpolymorphic major histocompatibility complex (MHC) genes and the killer-cell immunoglobulin-like receptor (KIR) genes, by providing the official repository and primary source of sequence data. Databases are centred around humans as well as animals important for food security, for companionship and as disease models. The IPD project works with specialist groups or nomenclature committees who provide and manually curate individual sections before they are submitted for online publication. To reflect the recent advance of allele sequencing technologies and the increasing demands of novel tools for the analysis of genomic variation, the IPD project is undergoing a progressive redesign and reorganisation. In this review, recent updates and future developments are discussed, with a focus on the core concepts to better future-proof the project.",2019-10-22 +30416381,BERMP: a cross-species classifier for predicting m6A sites by integrating a deep learning algorithm and a random forest approach.,"N6-methyladenosine (m6A) is a prevalent RNA methylation modification involved in several biological processes. Hundreds or thousands of m6A sites identified from different species using high-throughput experiments provides a rich resource to construct in-silico approaches for identifying m6A sites. The existing m6A predictors are developed using conventional machine-learning (ML) algorithms and most are species-centric. In this paper, we develop a novel cross-species deep-learning classifier based on bidirectional Gated Recurrent Unit (BGRU) for the prediction of m6A sites. In comparison with conventional ML approaches, BGRU achieves outstanding performance for the Mammalia dataset that contains over fifty thousand m6A sites but inferior for the Saccharomyces cerevisiae dataset that covers around a thousand positives. The accuracy of BGRU is sensitive to the data size and the sensitivity is compensated by the integration of a random forest classifier with a novel encoding of enhanced nucleic acid content. The integrated approach dubbed as BGRU-based Ensemble RNA Methylation site Predictor (BERMP) has competitive performance in both cross-validation test and independent test. BERMP also outperforms existing m6A predictors for different species. Therefore, BERMP is a novel multi-species tool for identifying m6A sites with high confidence. This classifier is freely available at http://www.bioinfogo.org/bermp.",2018-09-07 +30076945,Use of Google Trends to Track Online Behavior and Interest in Kidney Stone Surgery.,"

Objective

To explore internet search trends data as a unique resource for monitoring online health information-seeking behavior. We utilized Google trends to sample population interest and search inquiries into surgical treatment options of kidney stones, and we examined the relative frequency of searches across the United States.

Methods

Google trends was queried via the Google Insights for Search (http://google.com/trends) using terms related to kidney stone surgeries from 2011 to 2017. ""Kidney stone surgery"" was identified to be the most common term. The search volume index graph for the U.S. was recorded as well as regional distribution and related queries. For specific surgical treatment modalities, the following medical terminology was used: percutaneous nephrolithotomy, extracorporeal shockwave lithotripsy, ureteroscopy, and laser lithotripsy. Each query was then compared and analyzed to assess changes in the interest in these search terms over time.

Results

The research trends for the search ""kidney stone surgery"" as well as specific surgical modalities remained constant over time. extracorporeal shockwave lithotripsy had the highest search volume index and laser lithotripsy had the lowest. The top 5 US states with greatest search volume was Tennessee, Indiana, Ohio, Michigan, and North Carolina, whereas the top 5 metropolitan regions were Atlanta, Tampa, Detroit, Philadelphia, and Boston.

Conclusion

Google trends data is a useful tool to analyze online health information-seeking behavior. Despite an increase in stone prevalence over the past decade, search rates have remained relatively constant. High relative search volume was seen from states within and outside the traditional stone belt, suggesting a possible changing trend.",2018-08-01 +,First Report of Tomato Mottle Mosaic Virus in Tomato Crops in China,"Tomato (Solanum lycopersicum L.), an economically important vegetable crop, is widely cultivated in China. Tomato can be infected with viruses in single or mixed infections, producing severe symptoms that result in significant yield reduction. To identify viruses infecting tomato, a provincial-wide virus survey was carried out in seven open-field tomato-producing areas of Hainan province during November 2016 to February 2017. A total of 170 tomato leaf samples showing foliar mottle, stunting, leaf distortion, and necrosis symptoms were collected. Eight randomly selected leaf samples were combined into one sample, and total RNA was extracted from the pooled sample with TRIzol Reagent (Invitrogen, Carlsbad, CA). A small RNA library was prepared with TruSeq Small RNA Library Preparation Kits (Illumina, San Diego, CA) and sequenced on an Illumina HiSeq 4000 sequencer. De novo assembly of the single-end reads was performed using Velvet assembler. The contigs were analyzed using NCBI’s BLAST program (https://www.ncbi.nlm.nih.gov/blast) against the viral RefSeq database. The whole next-generation sequencing procedure was performed by Biomarker Technologies Corporation (Beijing, China). The analysis generated 28,783,437 raw reads, apart from 15.09% of raw reads mapping to tomato mottle mosaic virus (ToMMV), there were sequence reads mapping to tomato yellow leaf curl virus, tobacco mosaic virus, southern tomato virus, cucumber mosaic virus, and tomato mosaic virus. To further confirm the presence of ToMMV in the eight leaf samples, reverse transcription polymerase chain reaction (RT-PCR) was performed using primer pair ToMMV-4491 (TAAAGGGGCGTTTTGTGGTG) and ToMMV-6309 (GCGTTCCAAGACAAAACCCT) designed based on eight available ToMMV full genomic sequences (GenBank accession nos. KU594507, KX898033, KR824951, KR824950, KT810183, KP202857, KF477193, and KX898034). The expected ≈1,820-bp PCR fragments (partial sequence of RNA dependent RNA polymerase, complete sequence of the movement protein and coat protein coding region of ToMMV) were amplified from three randomly selected symptomatic tomato samples. These fragments were cloned into the pMD18-T Vector (TaKaRa, Dalian, China) and sequenced (Beijing Genomics Institute, Shenzhen, China). BLASTn searches of the clones (GenBank accession nos. MG920804, MG920805, and MG920806) revealed that all sequences had 99.3 to 99.8% identity with currently available ToMMV full genomic sequences. To study the presence of ToMMV in the samples, RT-PCR assays using specific primers (ToMMV-4491/ToMMV-6309) were performed. Thirty-one out of the 170 samples tested were positive for ToMMV, suggesting that the virus was widespread in the field. ToMMV, a species in the genus Tobamovirus, was first reported from a tomato sample collected in Mexico in 2013 (Li et al. 2013). In just a few years, ToMMV has been reported to infect tomato and pepper in several countries. ToMMV also could infect plants in the Brassicaceae by sap transmission (Li et al. 2017). Previously, ToMMV was only found to infect the pepper grown in the field in China (Li et al. 2014). Our results confirmed the infection of tomato by ToMMV in Hainan, China. To the best of our knowledge, this is the first report of ToMMV naturally infecting tomato in China, thus providing further characterization of ToMMV and highlighting its potential as a worldwide threat to solanaceous vegetable crop production. Attention should be paid to this emerging viral disease, and measures should be taken to control the spread of ToMMV.",2018-10-01 +31404401,miRDRN-miRNA disease regulatory network: a tool for exploring disease and tissue-specific microRNA regulatory networks.,"

Background

MicroRNA (miRNA) regulates cellular processes by acting on specific target genes, and cellular processes proceed through multiple interactions often organized into pathways among genes and gene products. Hundreds of miRNAs and their target genes have been identified, as are many miRNA-disease associations. These, together with huge amounts of data on gene annotation, biological pathways, and protein-protein interactions are available in public databases. Here, using such data we built a database and web service platform, miRNA disease regulatory network (miRDRN), for users to construct disease and tissue-specific miRNA-protein regulatory networks, with which they may explore disease related molecular and pathway associations, or find new ones, and possibly discover new modes of drug action.

Methods

Data on disease-miRNA association, miRNA-target association and validation, gene-tissue association, gene-tumor association, biological pathways, human protein interaction, gene ID, gene ontology, gene annotation, and product were collected from publicly available databases and integrated. A large set of miRNA target-specific regulatory sub-pathways (RSPs) having the form (T, G 1, G 2) was built from the integrated data and stored, where T is a miRNA-associated target gene, G 1 (G 2) is a gene/protein interacting with T (G 1). Each sequence (T, G 1, G 2) was assigned a p-value weighted by the participation of the three genes in molecular interactions and reaction pathways.

Results

A web service platform, miRDRN (http://mirdrn.ncu.edu.tw/mirdrn/), was built. The database part of miRDRN currently stores 6,973,875 p-valued RSPs associated with 116 diseases in 78 tissue types built from 207 diseases-associated miRNA regulating 389 genes. miRDRN also provides facilities for the user to construct disease and tissue-specific miRNA regulatory networks from RSPs it stores, and to download and/or visualize parts or all of the product. User may use miRDRN to explore a single disease, or a disease-pair to gain insights on comorbidity. As demonstrations, miRDRN was applied: to explore the single disease colorectal cancer (CRC), in which 26 novel potential CRC target genes were identified; to study the comorbidity of the disease-pair Alzheimer's disease-Type 2 diabetes, in which 18 novel potential comorbid genes were identified; and, to explore possible causes that may shed light on recent failures of late-phase trials of anti-AD, BACE1 inhibitor drugs, in which genes downstream to BACE1 whose suppression may affect signal transduction were identified.",2019-08-06 +26342919,"Geroprotectors.org: a new, structured and curated database of current therapeutic interventions in aging and age-related disease.","As the level of interest in aging research increases, there is a growing number of geroprotectors, or therapeutic interventions that aim to extend the healthy lifespan and repair or reduce aging-related damage in model organisms and, eventually, in humans. There is a clear need for a manually-curated database of geroprotectors to compile and index their effects on aging and age-related diseases and link these effects to relevant studies and multiple biochemical and drug databases. Here, we introduce the first such resource, Geroprotectors (http://geroprotectors.org). Geroprotectors is a public, rapidly explorable database that catalogs over 250 experiments involving over 200 known or candidate geroprotectors that extend lifespan in model organisms. Each compound has a comprehensive profile complete with biochemistry, mechanisms, and lifespan effects in various model organisms, along with information ranging from chemical structure, side effects, and toxicity to FDA drug status. These are presented in a visually intuitive, efficient framework fit for casual browsing or in-depth research alike. Data are linked to the source studies or databases, providing quick and convenient access to original data. The Geroprotectors database facilitates cross-study, cross-organism, and cross-discipline analysis and saves countless hours of inefficient literature and web searching. Geroprotectors is a one-stop, knowledge-sharing, time-saving resource for researchers seeking healthy aging solutions.",2015-09-01 +32875815,Mapping Human Vulnerability to Extreme Heat: A Critical Assessment of Heat Vulnerability Indices Created Using Principal Components Analysis.,"

Background

Extreme heat poses current and future risks to human health. Heat vulnerability indices (HVIs), commonly developed using principal components analysis (PCA), are mapped to identify populations vulnerable to extreme heat. Few studies critically assess implications of analytic choices made when employing this methodology for fine-scale vulnerability mapping.

Objective

We investigated sensitivity of HVIs created by applying PCA to input variables and whether training input variables on heat-health data produced HVIs with similar spatial vulnerability patterns for Detroit, Michigan, USA.

Methods

We acquired 2010 Census tract and block group level data, land cover data, daily ambient apparent temperature, and all-cause mortality during May-September, 2000-2009. We used PCA to construct HVIs using: a) ""unsupervised""-PCA applied to variables selected a priori as risk factors for heat-related health outcomes; b) ""supervised""-PCA applied only to variables significantly correlated with proportion of all-cause mortality occurring on extreme heat days (i.e., days with 2-d mean apparent temperature above month-specific 95th percentiles).

Results

Unsupervised and supervised HVIs yielded differing spatial vulnerability patterns, depending on selected land cover input variables. Supervised PCA explained 62% of variance in the input variables and was applied on half the variables used in the unsupervised method. Census tract-level supervised HVI values were positively associated with increased proportion of mortality occurring on extreme heat days; supervised PCA could not be applied to block group data. Unsupervised HVI values were not associated with extreme heat mortality for either tracts or block groups.

Discussion

HVIs calculated using PCA are sensitive to input data and scale. Supervised HVIs may provide marginally more specific indicators of heat vulnerability than unsupervised HVIs. PCA-derived HVIs address correlation among vulnerability indicators, although the resulting output requires careful contextual interpretation beyond generating epidemiological research questions. Methods with reliably stable outputs should be leveraged for prioritizing heat interventions. https://doi.org/10.1289/EHP4030.",2020-09-02 +,Towards holomorphology in entomology: rapid and cost‐effective adult–larva matching using NGS barcodes,"In many taxa the morphology of females and immatures is poorly known because species descriptions and identification tools have a male bias. The root causes are problems with matching life‐history stages and genders belonging to the same species. Such matching is time‐consuming when conventional methods are used (e.g. rearing) and expensive when the stages are matched with DNA barcodes. Unfortunately, the lack of associations is not a trivial problem because it renders a large part of the phenome of insects unexplored, although larvae and females are useful sources of characters for descriptive and phylogenetic purposes. In addition, many collectors intentionally avoid females and immature stages, which skews survey results, interferes with collecting life‐history information, and makes it less likely that rare species are discovered. These problems even exist for well‐studied taxa like Odonata, where obtaining adult–larva matches relies largely on rearing. Here we demonstrate how the matching problem can be addressed with cost‐effective tagged amplicon sequencing of a 313‐bp segment of cox1 with next‐generation sequencing (NGS) (‘NGS barcoding’). We illustrate the value of this approach based on Singapore's odonate fauna which is of a similar size as the European fauna (Singapore, 122 extant species; Europe, 138 recorded species). We match the larvae and adults of 59 species by first creating a barcode database for 338 identified adult specimens representing 83 species. We then sequence 1178 larvae from a wide range of sources. We successfully barcode 1123 specimens, which leads to adult–larva matches for 59 species based on our own barcodes (55) and online barcode databases (4). With these additions, 84 of the 131 species recorded in Singapore have now been associated with a species name. Most common species are now matched (83%), and good progress has been made for vulnerable/near‐threatened (55%), endangered (53%), and critically endangered species (38%). We used nondestructive DNA extraction methods in order to be able to use high‐resolution imaging of matched larvae to establish a publicly available digital reference collection for odonates which is incorporated into ‘Biodiversity of Singapore’ (https://singapore.biodiversity.online/). We suggest that the methods described here are suitable for many insect taxa because NGS barcoding allows for fast and low‐cost matching of well‐studied life‐history stages with neglected semaphoronts (eggs, larvae, females). We estimate that the specimen‐specific amplicons in this study (c. 1500 specimens) can now be obtained within eight working days and that the laboratory and sequencing cost is c. US$600 (< US$0.40 per specimen).",2018-10-01 +32591635,Detection of copy-number variations from NGS data using read depth information: a diagnostic performance evaluation.,"The detection of copy-number variations (CNVs) from NGS data is underexploited as chip-based or targeted techniques are still commonly used. We assessed the performances of a workflow centered on CANOES, a bioinformatics tool based on read depth information. We applied our workflow to gene panel (GP) and whole-exome sequencing (WES) data, and compared CNV calls to quantitative multiplex PCR of short fluorescent fragments (QMSPF) or array comparative genomic hybridization (aCGH) results. From GP data of 3776 samples, we reached an overall positive predictive value (PPV) of 87.8%. This dataset included a complete comprehensive QMPSF comparison of four genes (60 exons) on which we obtained 100% sensitivity and specificity. From WES data, we first compared 137 samples with aCGH and filtered comparable events (exonic CNVs encompassing enough aCGH probes) and obtained an 87.25% sensitivity. The overall PPV was 86.4% following the targeted confirmation of candidate CNVs from 1056 additional WES. In addition, our CANOES-centered workflow on WES data allowed the detection of CNVs with a resolution of single exons, allowing the detection of CNVs that were missed by aCGH. Overall, switching to an NGS-only approach should be cost-effective as it allows a reduction in overall costs together with likely stable diagnostic yields. Our bioinformatics pipeline is available at: https://gitlab.bioinfo-diag.fr/nc4gpm/canoes-centered-workflow .",2020-06-26 +33651768,Can a Deep-learning Model for the Automated Detection of Vertebral Fractures Approach the Performance Level of Human Subspecialists?,"

Background

Vertebral fractures are the most common osteoporotic fractures in older individuals. Recent studies suggest that the performance of artificial intelligence is equal to humans in detecting osteoporotic fractures, such as fractures of the hip, distal radius, and proximal humerus. However, whether artificial intelligence performs as well in the detection of vertebral fractures on plain lateral spine radiographs has not yet been reported.

Questions/purposes

(1) What is the accuracy, sensitivity, specificity, and interobserver reliability (kappa value) of an artificial intelligence model in detecting vertebral fractures, based on Genant fracture grades, using plain lateral spine radiographs compared with values obtained by human observers? (2) Do patients' clinical data, including the anatomic location of the fracture (thoracic or lumbar spine), T-score on dual-energy x-ray absorptiometry, or fracture grade severity, affect the performance of an artificial intelligence model? (3) How does the artificial intelligence model perform on external validation?

Methods

Between 2016 and 2018, 1019 patients older than 60 years were treated for vertebral fractures in our institution. Seventy-eight patients were excluded because of missing CT or MRI scans (24% [19]), poor image quality in plain lateral radiographs of spines (54% [42]), multiple myeloma (5% [4]), and prior spine instrumentation (17% [13]). The plain lateral radiographs of 941 patients (one radiograph per person), with a mean age of 76 ± 12 years, and 1101 vertebral fractures between T7 and L5 were retrospectively evaluated for training (n = 565), validating (n = 188), and testing (n = 188) of an artificial intelligence deep-learning model. The gold standard for diagnosis (ground truth) of a vertebral fracture is the interpretation of the CT or MRI reports by a spine surgeon and a radiologist independently. If there were any disagreements between human observers, the corresponding CT or MRI images would be rechecked by them together to reach a consensus. For the Genant classification, the injured vertebral body height was measured in the anterior, middle, and posterior third. Fractures were classified as Grade 1 (< 25%), Grade 2 (26% to 40%), or Grade 3 (> 40%). The framework of the artificial intelligence deep-learning model included object detection, data preprocessing of radiographs, and classification to detect vertebral fractures. Approximately 90 seconds was needed to complete the procedure and obtain the artificial intelligence model results when applied clinically. The accuracy, sensitivity, specificity, interobserver reliability (kappa value), receiver operating characteristic curve, and area under the curve (AUC) were analyzed. The bootstrapping method was applied to our testing dataset and external validation dataset. The accuracy, sensitivity, and specificity were used to investigate whether fracture anatomic location or T-score in dual-energy x-ray absorptiometry report affected the performance of the artificial intelligence model. The receiver operating characteristic curve and AUC were used to investigate the relationship between the performance of the artificial intelligence model and fracture grade. External validation with a similar age population and plain lateral radiographs from another medical institute was also performed to investigate the performance of the artificial intelligence model.

Results

The artificial intelligence model with ensemble method demonstrated excellent accuracy (93% [773 of 830] of vertebrae), sensitivity (91% [129 of 141]), and specificity (93% [644 of 689]) for detecting vertebral fractures of the lumbar spine. The interobserver reliability (kappa value) of the artificial intelligence performance and human observers for thoracic and lumbar vertebrae were 0.72 (95% CI 0.65 to 0.80; p < 0.001) and 0.77 (95% CI 0.72 to 0.83; p < 0.001), respectively. The AUCs for Grades 1, 2, and 3 vertebral fractures were 0.919, 0.989, and 0.990, respectively. The artificial intelligence model with ensemble method demonstrated poorer performance for discriminating normal osteoporotic lumbar vertebrae, with a specificity of 91% (260 of 285) compared with nonosteoporotic lumbar vertebrae, with a specificity of 95% (222 of 234). There was a higher sensitivity 97% (60 of 62) for detecting osteoporotic (dual-energy x-ray absorptiometry T-score ≤ -2.5) lumbar vertebral fractures, implying easier detection, than for nonosteoporotic vertebral fractures (83% [39 of 47]). The artificial intelligence model also demonstrated better detection of lumbar vertebral fractures compared with detection of thoracic vertebral fractures based on the external dataset using various radiographic techniques. Based on the dataset for external validation, the overall accuracy, sensitivity, and specificity on bootstrapping method were 89%, 83%, and 95%, respectively.

Conclusion

The artificial intelligence model detected vertebral fractures on plain lateral radiographs with high accuracy, sensitivity, and specificity, especially for osteoporotic lumbar vertebral fractures (Genant Grades 2 and 3). The rapid reporting of results using this artificial intelligence model may improve the efficiency of diagnosing vertebral fractures. The testing model is available at http://140.113.114.104/vght_demo/corr/. One or multiple plain lateral radiographs of the spine in the Digital Imaging and Communications in Medicine format can be uploaded to see the performance of the artificial intelligence model.

Level of evidence

Level II, diagnostic study.",2021-07-01 +29890119,FlyXCDB-A Resource for Drosophila Cell Surface and Secreted Proteins and Their Extracellular Domains.,"Genomes of metazoan organisms possess a large number of genes encoding cell surface and secreted (CSS) proteins that carry out crucial functions in cell adhesion and communication, signal transduction, extracellular matrix establishment, nutrient digestion and uptake, immunity, and developmental processes. We developed the FlyXCDB database (http://prodata.swmed.edu/FlyXCDB) that provides a comprehensive resource to investigate extracellular (XC) domains in CSS proteins of Drosophila melanogaster, the most studied insect model organism in various aspects of animal biology. More than 300 Drosophila XC domains were discovered in Drosophila CSS proteins encoded by over 2500 genes through analyses of computational predictions of signal peptide, transmembrane (TM) segment, and GPI-anchor signal sequence, profile-based sequence similarity searches, gene ontology, and literature. These domains were classified into six classes mainly based on their molecular functions, including protein-protein interactions (class P), signaling molecules (class S), binding of non-protein molecules or groups (class B), enzyme homologs (class E), enzyme regulation and inhibition (class R), and unknown molecular function (class U). Main cellular functions such as cell adhesion, cell signaling, and extracellular matrix composition were described for the most abundant domains in each functional class. We assigned cell membrane topology categories (E, secreted; S, type I/III single-pass TM; T, type II single-pass TM; M, multi-pass TM; and G, GPI-anchored) to the products of genes with XC domains and investigated their regulation by mechanisms such as alternative splicing and stop codon readthrough.",2018-06-08 +32613038,Data on differentially expressed proteins in rock inhibitor-treated human trabecular meshwork cells using SWATH-based proteomics.,"Rho-associated coiled coil-forming protein kinase (ROCK) inhibitors represent a novel class of anti-glaucoma drugs because of their ocular hypotensive effects. However, the underlying mechanisms responsible for lowering intraocular pressure (IOP) are not completely clear. The protein profile changes in primary human trabecular meshwork (TM) cells after two days treatment with a ROCK inhibitor were studied using label-free SWATH acquisition. These results provided significant data of key protein candidates underlying the effect of ROCK inhibitor. Using the sensitive label-free mass spectrometry approach with data-independent acquisition (SWATH-MS), we established a comprehensive TM proteome library. All raw data generated from IDA and SWATH acquisitions were uploaded and published in the Peptide Atlas public repository (http://www.peptideatlas.org/) for general release (Data ID PASS01254).",2020-06-12 +31130993,DiVenn: An Interactive and Integrated Web-Based Visualization Tool for Comparing Gene Lists.,"Gene expression data generated from multiple biological samples (mutant, double mutant, and wild-type) are often compared via Venn diagram tools. It is of great interest to know the expression pattern between overlapping genes and their associated gene pathways or gene ontology (GO) terms. We developed DiVenn (Dive into the Venn diagram and create a force directed graph)-a novel web-based tool that compares gene lists from multiple RNA-Seq experiments in a force-directed graph, which shows the gene regulation levels for each gene and integrated KEGG pathway and gene ontology knowledge for the data visualization. DiVenn has four key features: (1) informative force-directed graph with gene expression levels to compare multiple data sets; (2) interactive visualization with biological annotations and integrated pathway and GO databases, which can be used to subset or highlight gene nodes to pathway or GO terms of interest in the graph; (3) Pathway and GO enrichment analysis of all or selected genes in the graph; and (4) high resolution image and gene-associated information export. DiVenn is freely available at http://divenn.noble.org/.",2019-05-03 +33830015,"CEM500K, a large-scale heterogeneous unlabeled cellular electron microscopy image dataset for deep learning. ","Automated segmentation of cellular electron microscopy (EM) datasets remains a challenge. Supervised deep learning (DL) methods that rely on region-of-interest (ROI) annotations yield models that fail to generalize to unrelated datasets. Newer unsupervised DL algorithms require relevant pre-training images, however, pre-training on currently available EM datasets is computationally expensive and shows little value for unseen biological contexts, as these datasets are large and homogeneous. To address this issue, we present CEM500K, a nimble 25 GB dataset of 0.5 × 106 unique 2D cellular EM images curated from nearly 600 three-dimensional (3D) and 10,000 two-dimensional (2D) images from >100 unrelated imaging projects. We show that models pre-trained on CEM500K learn features that are biologically relevant and resilient to meaningful image augmentations. Critically, we evaluate transfer learning from these pre-trained models on six publicly available and one newly derived benchmark segmentation task and report state-of-the-art results on each. We release the CEM500K dataset, pre-trained models and curation pipeline for model building and further expansion by the EM community. Data and code are available at https://www.ebi.ac.uk/pdbe/emdb/empiar/entry/10592/ and https://git.io/JLLTz.",2021-04-08 +31077305,Entrezpy: a Python library to dynamically interact with the NCBI Entrez databases.,"

Summary

Entrezpy is a Python library that automates the querying and downloading of data from the Entrez databases at National Center for Biotechnology Information by interacting with E-Utilities. Entrezpy implements complex queries by automatically creating E-Utility parameters from the results obtained that can then be used directly in subsequent queries. Entrezpy also allows the user to cache and retrieve results locally, implements interactions with all Entrez databases as part of an analysis pipeline and adjusts parameters within an ongoing query or using prior results. Entrezpy's modular design enables it to easily extend and adjust existing E-Utility functions.

Availability and implementation

Entrezpy is implemented in Python 3 (≥3.6) and depends only on the Python Standard Library. It is available via PyPi (https://pypi.org/project/entrezpy/) and at https://gitlab.com/ncbipy/entrezpy.git. Entrezpy is licensed under the LGPLv3 and also at http://entrezpy.readthedocs.io/.",2019-11-01 +29040751,EpiDenovo: a platform for linking regulatory de novo mutations to developmental epigenetics and diseases.,"De novo mutations (DNMs) have been shown to be a major cause of severe early-onset genetic disorders such as autism spectrum disorder and intellectual disability. Over one million DNMs have been identified in developmental disorders by next generation sequencing, but linking these DNMs to the genes that they impact remains a challenge, as the majority of them are embedded in non-coding regions. As most developmental diseases occur in the early stages of development or during childhood, it is crucial to clarify the details of epigenetic regulation in early development in order to interpret the mechanisms underlying developmental disorders. Here, we develop EpiDenovo, a database that is freely available at http://www.epidenovo.biols.ac.cn/, and which provides the associations between embryonic epigenomes and DNMs in developmental disorders, including several neuropsychiatric disorders and congenital heart disease. EpiDenovo provides an easy-to-use web interface allowing users rapidly to find the epigenetic signatures of DNMs and the expression patterns of the genes that they regulate during embryonic development. In summary, EpiDenovo is a useful resource for selecting candidate genes for further functional studies in embryonic development, and for investigating regulatory DNMs as well as other genetic variants causing or underlying developmental disorders.",2018-01-01 +33608514,Robust inference of kinase activity using functional networks.,"Mass spectrometry enables high-throughput screening of phosphoproteins across a broad range of biological contexts. When complemented by computational algorithms, phospho-proteomic data allows the inference of kinase activity, facilitating the identification of dysregulated kinases in various diseases including cancer, Alzheimer's disease and Parkinson's disease. To enhance the reliability of kinase activity inference, we present a network-based framework, RoKAI, that integrates various sources of functional information to capture coordinated changes in signaling. Through computational experiments, we show that phosphorylation of sites in the functional neighborhood of a kinase are significantly predictive of its activity. The incorporation of this knowledge in RoKAI consistently enhances the accuracy of kinase activity inference methods while making them more robust to missing annotations and quantifications. This enables the identification of understudied kinases and will likely lead to the development of novel kinase inhibitors for targeted therapy of many diseases. RoKAI is available as web-based tool at http://rokai.io .",2021-02-19 +33828806,Two hours in Hollywood: A manually annotated ground truth data set of eye movements during movie clip watching. ,"In this short article we present our manual annotation of the eye movement events in a subset of the large-scale eye tracking data set Hollywood2. Our labels include fixations, saccades, and smooth pursuits, as well as a noise event type (the latter representing either blinks, loss of tracking, or physically implausible signals). In order to achieve more consistent annotations, the gaze samples were labelled by a novice rater based on rudimentary algorithmic suggestions, and subsequently corrected by an expert rater. Overall, we annotated eye movement events in the recordings corresponding to 50 randomly selected test set clips and 6 training set clips from Hollywood2, which were viewed by 16 observers and amount to a total of approximately 130 minutes of gaze data. In these labels, 62.4% of the samples were attributed to fixations, 9.1% - to saccades, and, notably, 24.2% - to pursuit (the remainder marked as noise). After evaluation of 15 published eye movement classification algorithms on our newly collected annotated data set, we found that the most recent algorithms perform very well on average, and even reach human-level labelling quality for fixations and saccades, but all have a much larger room for improvement when it comes to smooth pursuit classification. The data set is made available at https://gin.g-node.org/ioannis.agtzidis/hollywood2_em.",2020-07-27 +30548723,CropSNPdb: a database of SNP array data for Brassica crops and hexaploid bread wheat.,"Advances in sequencing technology have led to a rapid rise in the genomic data available for plants, driving new insights into the evolution, domestication and improvement of crops. Single nucleotide polymorphisms (SNPs) are a major component of crop genomic diversity, and are invaluable as genetic markers in research and breeding programs. High-throughput SNP arrays, or 'SNP chips', can generate reproducible sets of informative SNP markers and have been broadly adopted. Although there are many public repositories for sequencing data, which are routinely uploaded, there are no formal repositories for crop SNP array data. To make SNP array data more easily accessible, we have developed CropSNPdb (http://snpdb.appliedbioinformatics.com.au), a database for SNP array data produced by the Illumina Infinium™ hexaploid bread wheat (Triticum aestivum) 90K and Brassica 60K arrays. We currently host SNPs from datasets covering 526 Brassica lines and 309 bread wheat lines, and provide search, download and upload utilities for users. CropSNPdb provides a useful repository for these data, which can be applied for a range of genomics and molecular crop-breeding activities.",2019-01-28 +28875065,BioFuelDB: a database and prediction server of enzymes involved in biofuels production.,"

Background

In light of the rapid decrease in fossils fuel reserves and an increasing demand for energy, novel methods are required to explore alternative biofuel production processes to alleviate these pressures. A wide variety of molecules which can either be used as biofuels or as biofuel precursors are produced using microbial enzymes. However, the common challenges in the industrial implementation of enzyme catalysis for biofuel production are the unavailability of a comprehensive biofuel enzyme resource, low efficiency of known enzymes, and limited availability of enzymes which can function under extreme conditions in the industrial processes.

Methods

We have developed a comprehensive database of known enzymes with proven or potential applications in biofuel production through text mining of PubMed abstracts and other publicly available information. A total of 131 enzymes with a role in biofuel production were identified and classified into six enzyme classes and four broad application categories namely 'Alcohol production', 'Biodiesel production', 'Fuel Cell' and 'Alternate biofuels'. A prediction tool 'Benz' was developed to identify and classify novel homologues of the known biofuel enzyme sequences from sequenced genomes and metagenomes. 'Benz' employs a hybrid approach incorporating HMMER 3.0 and RAPSearch2 programs to provide high accuracy and high speed for prediction.

Results

Using the Benz tool, 153,754 novel homologues of biofuel enzymes were identified from 23 diverse metagenomic sources. The comprehensive data of curated biofuel enzymes, their novel homologs identified from diverse metagenomes, and the hybrid prediction tool Benz are presented as a web server which can be used for the prediction of biofuel enzymes from genomic and metagenomic datasets. The database and the Benz tool is publicly available at http://metabiosys.iiserb.ac.in/biofueldb& http://metagenomics.iiserb.ac.in/biofueldb.",2017-08-28 +32481589,CSI NGS Portal: An Online Platform for Automated NGS Data Analysis and Sharing. ,"Next-generation sequencing (NGS) has been a widely-used technology in biomedical research for understanding the role of molecular genetics of cells in health and disease. A variety of computational tools have been developed to analyse the vastly growing NGS data, which often require bioinformatics skills, tedious work and a significant amount of time. To facilitate data processing steps minding the gap between biologists and bioinformaticians, we developed CSI NGS Portal, an online platform which gathers established bioinformatics pipelines to provide fully automated NGS data analysis and sharing in a user-friendly website. The portal currently provides 16 standard pipelines for analysing data from DNA, RNA, smallRNA, ChIP, RIP, 4C, SHAPE, circRNA, eCLIP, Bisulfite and scRNA sequencing, and is flexible to expand with new pipelines. The users can upload raw data in FASTQ format and submit jobs in a few clicks, and the results will be self-accessible via the portal to view/download/share in real-time. The output can be readily used as the final report or as input for other tools depending on the pipeline. Overall, CSI NGS Portal helps researchers rapidly analyse their NGS data and share results with colleagues without the aid of a bioinformatician. The portal is freely available at: https://csibioinfo.nus.edu.sg/csingsportal.",2020-05-28 +33032116,A novel variable selection method based on combined moving window and intelligent optimization algorithm for variable selection in chemical modeling.,"We propose a new wavelength selection algorithm based on combined moving window (CMW) and variable dimension particle swarm optimization (VDPSO) algorithm. CMW retains the advantages of the moving window algorithm, and different windows can overlap each other to realize automatic optimization of spectral interval width and number. VDPSO algorithms improve the PSO algorithm. They can search the data space in different dimensions, and reduce the risk of limited local extrema and over fitting. Four different high-performance variable selection algorithms-BOSS, VCPA, iVISSA and IRF-are compared in three NIR data sets (corn, beer and fuel). The results show that VDPSO-CMW has better performance. The Matlab codes for implementing PSO-CWM and VDPSO-CMW are freely available on the website: https://www.mathworks.com/matlabcentral/fileexchange/75828-a-variable-selection-method.",2020-09-25 +33135062,RNA inter-nucleotide 3D closeness prediction by deep residual neural networks.,"

Motivation

Recent years have witnessed that the inter-residue contact/distance in proteins could be accurately predicted by deep neural networks, which significantly improve the accuracy of predicted protein structure models. In contrast, fewer studies have been done for the prediction of RNA inter-nucleotide 3D closeness.

Results

We proposed a new algorithm named RNAcontact for the prediction of RNA inter-nucleotide 3D closeness. RNAcontact was built based on the deep residual neural networks. The covariance information from multiple sequence alignments and the predicted secondary structure were used as the input features of the networks. Experiments show that RNAcontact achieves the respective precisions of 0.8 and 0.6 for the top L/10 and L (where L is the length of an RNA) predictions on an independent test set, significantly higher than other evolutionary coupling methods. Analysis shows that about 1/3 of the correctly predicted 3D closenesses are not base pairings of secondary structure, which are critical to the determination of RNA structure. In addition, we demonstrated that the predicted 3D closeness could be used as distance restraints to guide RNA structure folding by the 3dRNA package. More accurate models could be built by using the predicted 3D closeness than the models without using 3D closeness.

Availability and implementation

The webserver and a standalone package are available at: http://yanglab.nankai.edu.cn/RNAcontact/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +31398129,Identifying Drug Resistant miRNAs Using Entropy Based Ranking.,"MicroRNAs play an important role in controlling drug sensitivity and resistance in cancer. Identification of responsible miRNAs for drug resistance can enhance the effectiveness of treatment. A new set theoretic entropy measure (SPEM) is defined to determine the relevance and level of confidence of miRNAs in deciding their drug resistant nature. Here, a pattern is represented by a pair of values. One of them implies the degree of its belongingness (fuzzy membership) to a class and the other represents the actual class of origin (crisp membership). A measure, called granular probability, is defined that determines the confidence level of having a particular pair of membership values. The granules used to compute the said probability are formed by a histogram based method where each bin of a histogram is considered as one granule. The width and number of the bins are automatically determined by the algorithm. The set thus defined, comprising a pair of membership values and the confidence level for having them, is used for the computation of SPEM and thereby identifying the drug resistant miRNAs. The efficiency of SPEM is demonstrated extensively on six data sets. While the achieved F-score in classifying sensitive and resistant samples ranges between 0.31 & 0.50 using all the miRNAs by SVM classifier, the same score varies from 0.67 to 0.94 using only the top 1 percent drug resistant miRNAs. Superiority of the proposed method as compared to some existing ones is established in terms of F-score. The significance of the top 1 percent miRNAs in corresponding cancer is also verified by the different articles based on biological investigations. Source code of SPEM is available at http://www.jayanta.droppages.com/SPEM.html.",2021-05-01 +31510641,"ADAPTIVE: leArning DAta-dePendenT, concIse molecular VEctors for fast, accurate metabolite identification from tandem mass spectra.","

Motivation

Metabolite identification is an important task in metabolomics to enhance the knowledge of biological systems. There have been a number of machine learning-based methods proposed for this task, which predict a chemical structure of a given spectrum through an intermediate (chemical structure) representation called molecular fingerprints. They usually have two steps: (i) predicting fingerprints from spectra; (ii) searching chemical compounds (in database) corresponding to the predicted fingerprints. Fingerprints are feature vectors, which are usually very large to cover all possible substructures and chemical properties, and therefore heavily redundant, in the sense of having many molecular (sub)structures irrelevant to the task, causing limited predictive performance and slow prediction.

Results

We propose ADAPTIVE, which has two parts: learning two mappings (i) from structures to molecular vectors and (ii) from spectra to molecular vectors. The first part learns molecular vectors for metabolites from given data, to be consistent with both spectra and chemical structures of metabolites. In more detail, molecular vectors are generated by a model, being parameterized by a message passing neural network, and parameters are estimated by maximizing the correlation between molecular vectors and the corresponding spectra in terms of Hilbert-Schmidt Independence Criterion. Molecular vectors generated by this model are compact and importantly adaptive (specific) to both given data and task of metabolite identification. The second part uses input output kernel regression (IOKR), the current cutting-edge method of metabolite identification. We empirically confirmed the effectiveness of ADAPTIVE by using a benchmark data, where ADAPTIVE outperformed the original IOKR in both predictive performance and computational efficiency.

Availability and implementation

The code will be accessed through http://www.bic.kyoto-u.ac.jp/pathway/tools/ADAPTIVE after the acceptance of this article.",2019-07-01 +33539279,BSGatlas: a unified Bacillus subtilis genome and transcriptome annotation atlas with enhanced information access. ,"A large part of our current understanding of gene regulation in Gram-positive bacteria is based on Bacillus subtilis, as it is one of the most well studied bacterial model systems. The rapid growth in data concerning its molecular and genomic biology is distributed across multiple annotation resources. Consequently, the interpretation of data from further B. subtilis experiments becomes increasingly challenging in both low- and large-scale analyses. Additionally, B. subtilis annotation of structured RNA and non-coding RNA (ncRNA), as well as the operon structure, is still lagging behind the annotation of the coding sequences. To address these challenges, we created the B. subtilis genome atlas, BSGatlas, which integrates and unifies multiple existing annotation resources. Compared to any of the individual resources, the BSGatlas contains twice as many ncRNAs, while improving the positional annotation for 70 % of the ncRNAs. Furthermore, we combined known transcription start and termination sites with lists of known co-transcribed gene sets to create a comprehensive transcript map. The combination with transcription start/termination site annotations resulted in 717 new sets of co-transcribed genes and 5335 untranslated regions (UTRs). In comparison to existing resources, the number of 5' and 3' UTRs increased nearly fivefold, and the number of internal UTRs doubled. The transcript map is organized in 2266 operons, which provides transcriptional annotation for 92 % of all genes in the genome compared to the at most 82 % by previous resources. We predicted an off-target-aware genome-wide library of CRISPR-Cas9 guide RNAs, which we also linked to polycistronic operons. We provide the BSGatlas in multiple forms: as a website (https://rth.dk/resources/bsgatlas/), an annotation hub for display in the UCSC genome browser, supplementary tables and standardized GFF3 format, which can be used in large scale -omics studies. By complementing existing resources, the BSGatlas supports analyses of the B. subtilis genome and its molecular biology with respect to not only non-coding genes but also genome-wide transcriptional relationships of all genes.",2021-02-01 +32840574,KORP-PL: a coarse-grained knowledge-based scoring function for protein-ligand interactions.,"

Motivation

Despite the progress made in studying protein-ligand interactions and the widespread application of docking and affinity prediction tools, improving their precision and efficiency still remains a challenge. Computational approaches based on the scoring of docking conformations with statistical potentials constitute a popular alternative to more accurate but costly physics-based thermodynamic sampling methods. In this context, a minimalist and fast sidechain-free knowledge-based potential with a high docking and screening power can be very useful when screening a big number of putative docking conformations.

Results

Here, we present a novel coarse-grained potential defined by a 3D joint probability distribution function that only depends on the pairwise orientation and position between protein backbone and ligand atoms. Despite its extreme simplicity, our approach yields very competitive results with the state-of-the-art scoring functions, especially in docking and screening tasks. For example, we observed a twofold improvement in the median 5% enrichment factor on the DUD-E benchmark compared to Autodock Vina results. Moreover, our results prove that a coarse sidechain-free potential is sufficient for a very successful docking pose prediction.

Availabilityand implementation

The standalone version of KORP-PL with the corresponding tests and benchmarks are available at https://team.inria.fr/nano-d/korp-pl/ and https://chaconlab.org/modeling/korp-pl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +32580745,FSBC: fast string-based clustering for HT-SELEX data.,"

Background

The combination of systematic evolution of ligands by exponential enrichment (SELEX) and deep sequencing is termed high-throughput (HT)-SELEX, which enables searching aptamer candidates from a massive amount of oligonucleotide sequences. A clustering method is an important procedure to identify sequence groups including aptamer candidates for evaluation with experimental analysis. In general, aptamer includes a specific target binding region, which is necessary for binding to the target molecules. The length of the target binding region varies depending on the target molecules and/or binding styles. Currently available clustering methods for HT-SELEX only estimate clusters based on the similarity of full-length sequences or limited length of motifs as target binding regions. Hence, a clustering method considering the target binding region with different lengths is required. Moreover, to handle such huge data and to save sequencing cost, a clustering method with fast calculation from a single round of HT-SELEX data, not multiple rounds, is also preferred.

Results

We developed fast string-based clustering (FSBC) for HT-SELEX data. FSBC was designed to estimate clusters by searching various lengths of over-represented strings as target binding regions. FSBC was also designed for fast calculation with search space reduction from a single round, typically the final round, of HT-SELEX data considering imbalanced nucleobases of the aptamer selection process. The calculation time and clustering accuracy of FSBC were compared with those of four conventional clustering methods, FASTAptamer, AptaCluster, APTANI, and AptaTRACE, using HT-SELEX data (>15 million oligonucleotide sequences). FSBC, AptaCluster, and AptaTRACE could complete the clustering for all sequence data, and FSBC and AptaTRACE performed higher clustering accuracy. FSBC showed the highest clustering accuracy and had the second fastest calculation speed among all methods compared.

Conclusion

FSBC is applicable to a large HT-SELEX dataset, which can facilitate the accurate identification of groups including aptamer candidates.

Availability of data and materials

FSBC is available at http://www.aoki.ecei.tohoku.ac.jp/fsbc/.",2020-06-24 +33854526,GeenaR: A Web Tool for Reproducible MALDI-TOF Analysis.,"Mass spectrometry is a widely applied technology with a strong impact in the proteomics field. MALDI-TOF is a combined technology in mass spectrometry with many applications in characterizing biological samples from different sources, such as the identification of cancer biomarkers, the detection of food frauds, the identification of doping substances in athletes' fluids, and so on. The massive quantity of data, in the form of mass spectra, are often biased and altered by different sources of noise. Therefore, extracting the most relevant features that characterize the samples is often challenging and requires combining several computational methods. Here, we present GeenaR, a novel web tool that provides a complete workflow for pre-processing, analyzing, visualizing, and comparing MALDI-TOF mass spectra. GeenaR is user-friendly, provides many different functionalities for the analysis of the mass spectra, and supports reproducible research since it produces a human-readable report that contains function parameters, results, and the code used for processing the mass spectra. First, we illustrate the features available in GeenaR. Then, we describe its internal structure. Finally, we prove its capabilities in analyzing oncological datasets by presenting two case studies related to ovarian cancer and colorectal cancer. GeenaR is available at http://proteomics.hsanmartino.it/geenar/.",2021-03-29 +33361798,OCTAD: an open workspace for virtually screening therapeutics targeting precise cancer patient groups using gene expression features.,"As the field of precision medicine progresses, treatments for patients with cancer are starting to be tailored to their molecular as well as their clinical features. The emerging cancer subtypes defined by these molecular features require that dedicated resources be used to assist the discovery of drug candidates for preclinical evaluation. Voluminous gene expression profiles of patients with cancer have been accumulated in public databases, enabling the creation of cancer-specific expression signatures. Meanwhile, large-scale gene expression profiles of cellular responses to chemical compounds have also recently became available. By matching the cancer-specific expression signature to compound-induced gene expression profiles from large drug libraries, researchers can prioritize small molecules that present high potency to reverse expression of signature genes for further experimental testing of their efficacy. This approach has proven to be an efficient and cost-effective way to identify efficacious drug candidates. However, the success of this approach requires multiscale procedures, imposing considerable challenges to many labs. To address this, we developed Open Cancer TherApeutic Discovery (OCTAD; http://octad.org ): an open workspace for virtually screening compounds targeting precise groups of patients with cancer using gene expression features. Its database includes 19,127 patient tissue samples covering more than 50 cancer types and expression profiles for 12,442 distinct compounds. The program is used to perform deep-learning-based reference tissue selection, disease gene expression signature creation, drug reversal potency scoring and in silico validation. OCTAD is available as a web portal and a standalone R package to allow experimental and computational scientists to easily navigate the tool.",2020-12-23 +29178828,CrusTF: a comprehensive resource of transcriptomes for evolutionary and functional studies of crustacean transcription factors.,"

Background

Crustacea, the second largest subphylum of Arthropoda, includes species of major ecological and economic importance, such as crabs, lobsters, crayfishes, shrimps, and barnacles. With the rapid development of crustacean aquaculture and biodiversity loss, understanding the gene regulatory mechanisms of growth, reproduction, and development of crustaceans is crucial to both aquaculture development and biodiversity conservation of this group of organisms. In these biological processes, transcription factors (TFs) play a vital role in regulating gene expression. However, crustacean transcription factors are still largely unknown, because the lack of complete genome sequences of most crustacean species hampers the studies on their transcriptional regulation on a system-wide scale. Thus, the current TF databases derived from genome sequences contain TF information for only a few crustacean species and are insufficient to elucidate the transcriptional diversity of such a large animal group.

Results

Our database CrusTF ( http://qinlab.sls.cuhk.edu.hk/CrusTF ) provides comprehensive information for evolutionary and functional studies on the crustacean transcriptional regulatory system. CrusTF fills the knowledge gap of transcriptional regulation in crustaceans by exploring publicly available and newly sequenced transcriptomes of 170 crustacean species and identifying 131,941 TFs within 63 TF families. CrusTF features three categories of information: sequence, function, and evolution of crustacean TFs. The database enables searching, browsing and downloading of crustacean TF sequences. CrusTF infers DNA binding motifs of crustacean TFs, thus facilitating the users to predict potential downstream TF targets. The database also presents evolutionary analyses of crustacean TFs, which improve our understanding of the evolution of transcriptional regulatory systems in crustaceans.

Conclusions

Given the importance of TF information in evolutionary and functional studies on transcriptional regulatory systems of crustaceans, this database will constitute a key resource for the research community of crustacean biology and evolutionary biology. Moreover, CrusTF serves as a model for the construction of TF database derived from transcriptome data. A similar approach could be applied to other groups of organisms, for which transcriptomes are more readily available than genomes.",2017-11-25 +26519407,WheatGenome.info: A Resource for Wheat Genomics Resource.,"An integrated database with a variety of Web-based systems named WheatGenome.info hosting wheat genome and genomic data has been developed to support wheat research and crop improvement. The resource includes multiple Web-based applications, which are implemented as a variety of Web-based systems. These include a GBrowse2-based wheat genome viewer with BLAST search portal, TAGdb for searching wheat second generation genome sequence data, wheat autoSNPdb, links to wheat genetic maps using CMap and CMap3D, and a wheat genome Wiki to allow interaction between diverse wheat genome sequencing activities. This portal provides links to a variety of wheat genome resources hosted at other research organizations. This integrated database aims to accelerate wheat genome research and is freely accessible via the web interface at http://www.wheatgenome.info/ .",2016-01-01 +32729218,Interactive Web-based Data Visualization and Analysis Tool for Synthetizing on-farm Research Networks Data.,"The on-farm research network concept enables a group of farmers to test new agricultural management practices under local conditions with support from local researchers or agronomists. Different on-farm trials based on the same experimental design are conducted over several years and sites to test the effectiveness of different innovative management practices aimed at increasing crop productivity and profitability. As a larger amount of historical trial data are being accumulated, data of all the trials require analyses and summarization. Summaries of on-farm trials are usually presented to farmers as individual field reports, which are not optimal for the dissemination of results and decision making. A more practical communication method is needed to enhance result communication and decision making. R Shiny is a new rapidly developing technology for turning R data analyses into interactive web applications. For the first time for on-farm research networks, we developed and launched an interactive web tool called ISOFAST using R Shiny. ISOFAST simultaneously reports all trial results about the same management practice to simplify interpretation of multi-site and multi-year summaries. We used a random-effects model to synthetize treatment differences at both the individual trial and network levels and generate new knowledge for farmers and agronomists. The friendly interface enables users to explore trial summaries, access model outputs, and perform economic analysis at their fingertips. This paper describes a case-study to illustrate how to use the tool and make agronomic management decisions based on the on-farm trial data. We also provided technical details and guidance for developing a similar interactive visualization tool customized for on-farm research network. ISOFAST is currently available at https://analytics.iasoybeans.com/cool-apps/ISOFAST/.",2020-08-18 +32649184,Software Requirements for the Analysis and Interpretation of Native Ion Mobility Mass Spectrometry Data.,"The past few years have seen a dramatic increase in applications of native mass and ion mobility spectrometry, especially for the study of proteins and protein complexes. This increase has been catalyzed by the availability of commercial instrumentation capable of carrying out such analyses. As in most fields, however, the software to process the data generated from new instrumentation lags behind. Recently, a number of research groups have started addressing this by developing software, but further improvements are still required in order to realize the full potential of the data sets generated. In this perspective, we describe practical aspects as well as challenges in processing native mass spectrometry (MS) and ion mobility-MS data sets and provide a brief overview of currently available tools. We then set out our vision of future developments that would bring the community together and lead to the development of a common platform to expedite future computational developments, provide standardized processing approaches, and serve as a location for the deposition of data for this emerging field. This perspective has been written by members of the European Cooperation in Science and Technology Action on Native MS and Related Methods for Structural Biology (EU COST Action BM1403) as an introduction to the software tools available in this area. It is intended to serve as an overview for newcomers and to stimulate discussions in the community on further developments in this field, rather than being an in-depth review. Our complementary perspective (http://dx.doi.org/10.1021/acs.analchem.9b05791) focuses on computational approaches used in this field.",2020-07-24 +31740563,ADAPTABLE: a comprehensive web platform of antimicrobial peptides tailored to the user's research. ,"Antimicrobial peptides (AMPs) are part of the innate immune response to pathogens in all of the kingdoms of life. They have received significant attention because of their extraordinary variety of activities, in particular, as candidate drugs against the threat of super-bacteria. A systematic study of the relation between the sequence and the mechanism of action is urgently needed, given the thousands of sequences already in multiple web resources. ADAPTABLE web platform (http://gec.u-picardie.fr/adaptable) introduces the concept of ""property alignment"" to create families of property and sequence-related peptides (SR families). This feature provides the researcher with a tool to select those AMPs meaningful to their research from among more than 40,000 nonredundant sequences. Selectable properties include the target organism and experimental activity concentration, allowing selection of peptides with multiple simultaneous actions. This is made possible by ADAPTABLE because it not only merges sequences of AMP databases but also merges their data, thereby standardizing values and handling non-proteinogenic amino acids. In this unified platform, SR families allow the creation of peptide scaffolds based on common traits in peptides with similar activity, independently of their source.",2019-11-18 +34281399,Visualization and Analysis of the Dynamic Assembly of a Heterologous Lantibiotic Biosynthesis Complex in Bacillus subtilis.,"A membrane-associated lanthipeptide synthetase complex, consisting of the dehydratase NisB, the cyclase NisC, and the ABC transporter NisT, has been described for nisin biosynthesis in the coccoid bacterium Lactococcus lactis. Here, we used advanced fluorescence microscopy to visualize the functional nisin biosynthesis machinery in rod-shaped cells and analyzed its spatial distribution and dynamics employing a platform we developed for heterologous production of nisin in Bacillus subtilis. We observed that NisT, as well as NisB and NisC, were all distributed in a punctate pattern along the cell periphery, opposed to the situation in coccoid cells. NisBTC proteins were found to be highly colocalized, being visualized at the same spots by dual fluorescence microscopy. In conjunction with the successful isolation of the biosynthetic complex NisBTC from the cell membrane, this corroborated that the visual bright foci were the sites for nisin maturation and transportation. A strategy of differential timing of expression was employed to demonstrate the in vivo dynamic assembly of NisBTC, revealing the recruitment by NisT of NisBC to the membrane. Additionally, by use of mutated proteins, the nucleotide binding domain (NBD) of NisT was found to function as a membrane anchor for NisB and/or NisC. We also show that the nisin biosynthesis sites are static and likely associated with proteins residing in lipid rafts. Based on these data, we propose a model for a three-phase production of modified precursor nisin in rod-shaped bacteria, presenting the assembly dynamics of NisBTC and emphasizing the crucial role of NisBC, next to NisT, in the process of precursor nisin translocation. IMPORTANCE Nisin is a model antimicrobial peptide for LanBC-modified lantibiotics that are modified and transported by a membrane synthetase complex. Although the subcellular localization and the assembly process of such a complex in L. lactis have been described in our recent work (J. Chen, A. J. van Heel, and O. P. Kuipers, mBio 11:e02825-20, 2020, https://doi.org/10.1128/mBio.02825-20), it proved difficult to gain a more detailed insight into the exact LanBTC assembly in the L. lactis system. Rod-shaped cells, especially B. subtilis, are better suited to study the assembly dynamics of these protein complexes. In this work, we present evidence for the existence of the lanthipeptide biosynthetic complex by visualizing and isolating the machinery in vivo. The dynamic behavior of the modification machinery and the transporter within the cells was characterized in depth, revealing the dependence of first LanB and LanC on each other and subsequent recruitment of them by LanT during the machinery assembly. Importantly, the elucidation of the dynamic assembly of the complex will facilitate future studies of lanthipeptide transport mechanisms and the structural characterization of the complete complex.",2021-07-20 +33391720,FastD: Fast detection of insecticide target-site mutations and overexpressed detoxification genes in insect populations from RNA-Seq data.,"Target-site mutations and detoxification gene overexpression are two major mechanisms conferring insecticide resistance. Molecular assays applied to detect these resistance genetic markers are time-consuming and with high false-positive rates. RNA-Seq data contains information on the variations within expressed genomic regions and expression of detoxification genes. However, there is no corresponding method to detect resistance markers at present. Here, we collected 66 reported resistance mutations of four insecticide targets (AChE, VGSC, RyR, and nAChR) from 82 insect species. Next, we obtained 403 sequences of the four target genes and 12,665 sequences of three kinds of detoxification genes including P450s, GSTs, and CCEs. Then, we developed a Perl program, FastD, to detect target-site mutations and overexpressed detoxification genes from RNA-Seq data and constructed a web server for FastD (http://www.insect-genome.com/fastd). The estimation of FastD on simulated RNA-Seq data showed high sensitivity and specificity. We applied FastD to detect resistant markers in 15 populations of six insects, Plutella xylostella, Aphis gossypii, Anopheles arabiensis, Musca domestica, Leptinotarsa decemlineata and Apis mellifera. Results showed that 11 RyR mutations in P. xylostella, one nAChR mutation in A. gossypii, one VGSC mutation in A. arabiensis and five VGSC mutations in M. domestica were found to be with frequency difference >40% between resistant and susceptible populations including previously confirmed mutations G4946E in RyR, R81T in nAChR and L1014F in VGSC. And 49 detoxification genes were found to be overexpressed in resistant populations compared with susceptible populations including previously confirmed detoxification genes CYP6BG1, CYP6CY22, CYP6CY13, CYP6P3, CYP6M2, CYP6P4 and CYP4G16. The candidate target-site mutations and detoxification genes were worth further validation. Resistance estimates according to confirmed markers were consistent with population phenotypes, confirming the reliability of this program in predicting population resistance at omics-level.",2020-11-21 +31870277,MADOKA: an ultra-fast approach for large-scale protein structure similarity searching.,"

Background

Protein comparative analysis and similarity searches play essential roles in structural bioinformatics. A couple of algorithms for protein structure alignments have been developed in recent years. However, facing the rapid growth of protein structure data, improving overall comparison performance and running efficiency with massive sequences is still challenging.

Results

Here, we propose MADOKA, an ultra-fast approach for massive structural neighbor searching using a novel two-phase algorithm. Initially, we apply a fast alignment between pairwise structures. Then, we employ a score to select pairs with more similarity to carry out a more accurate fragment-based residue-level alignment. MADOKA performs about 6-100 times faster than existing methods, including TM-align and SAL, in massive alignments. Moreover, the quality of structural alignment of MADOKA is better than the existing algorithms in terms of TM-score and number of aligned residues. We also develop a web server to search structural neighbors in PDB database (About 360,000 protein chains in total), as well as additional features such as 3D structure alignment visualization. The MADOKA web server is freely available at: http://madoka.denglab.org/ CONCLUSIONS: MADOKA is an efficient approach to search for protein structure similarity. In addition, we provide a parallel implementation of MADOKA which exploits massive power of multi-core CPUs.",2019-12-24 +33351632,BCL::Conf: Improved Open-Source Knowledge-Based Conformation Sampling Using the Crystallography Open Database.,"We previously described BCL::Conf, a knowledge-based conformation sampling algorithm utilizing a small molecule fragment rotamer library derived from the Cambridge Structural Database (CSD, license required), as a component of the BioChemical Library (BCL) cheminformatics toolkit. This paper describes substantial improvements made to the BCL::Conf algorithm and a transition to a rotamer library derived from molecules in the Crystallography Open Database (COD, no license required). We demonstrate the performance of the new BCL::Conf on native conformer recovery in the Platinum dataset of high-quality protein-ligand complexes. This set of 2859 structures has previously been used to assess the performance of over a dozen conformer generation algorithms, including the Conformator, Balloon, RDKit DG, ETKDG, Confab, Frog2, MultiConf-DOCK, CSD conformer generator, ConfGenX-OPSL3 force field, Omega, excalc, iCon, and MOE. These benchmarks suggest that the CSD conformer generator is at the state of the art of reported conformer generators. Our results indicate that the improved BCL::Conf significantly outperforms the CSD conformer generation algorithm at binding conformer recovery across a range of ensemble sizes and with similarly fast rates of conformer generation. BCL::Conf is now distributed with the COD-derived rotamer library and is free for academic use. The BCL can be downloaded at http://meilerlab.org/bclcommons for Windows, Linux, or Apple operating systems. BCL::Conf can now also be accessed via webserver at http://meilerlab.org/bclconf.",2020-12-22 +34278891,Pulmonary hypoxia and venous admixture correlate linearly to the kinetic energy from porcine high velocity projectile behind armor blunt trauma.,"Purpose. Behind armor blunt trauma (BABT) is a non-penetrating injury caused by the rapid deformation of body armor, by a projectile, which may in extreme circumstances cause death. The understanding of the mechanisms is still low, in relation to what is needed for safety threshold levels. High velocity projectile BABT causes immediate and severe hypoxia by increased venous admixture (Q's/Q't), but it is not known whether the level of hypoxia correlates to the kinetic energy (Ek) of the projectile.Materials and Methods. We constructed a 65 mm BABT-simulator to measure the Ek absorbed by the thorax. The simulator was validated to 7.62 mm high velocity BABT (swine with removed organs) for 7.62 mm (n = 7) and 65 mm (n = 12). Physiological measurements during 60 minutes were performed in 40 anesthetized swine in groups control (n = 9), 7.62 mm (n = 7), 65 mm weight variation (n = 24), 65 mm speed variation (n = 12, included in the weight variation group). New calculations were done for a previously studied group of 7.62 mm with backing (n = 9).Results. 65 mm BABT simulation and 7.62 mm BABT had similar back-face signatures (24 mm), and maximum thoracic impression speed (24-34 m/s). Back-face signatures correlated linearly to Ek (R2=0.20). Rib fractures had a 50% likelihood at back-face signature 23.0 mm (95% CI 18.5 to 29.0 mm, area under ROC curve 0.93). Ek correlated linearly to pO2 (R2=0.34, p = 0.0026) and venous admixture (R2=0.37, p = 0.0046). The extrapolated Ek at 5 minutes for pO2=0 kPa was 587 J and for venous admixture = 100% 574 J.Conclusions. Hypoxia and venous admixture correlated linearly to Ek, allowing for a calculated predicted lethal Ek to ≥574 J, which should be verified in survival studies. Lethality predictions from lung physiology is an alternative to clay impressions and may facilitate the development of ballistic safety equipment and new BABT safety criteria.Supplemental data for this article is available online at https://doi.org/10.1080/01902148.2021.1950869 .",2021-07-19 +32110491,DISNET: a framework for extracting phenotypic disease information from public sources.,"

Background

Within the global endeavour of improving population health, one major challenge is the identification and integration of medical knowledge spread through several information sources. The creation of a comprehensive dataset of diseases and their clinical manifestations based on information from public sources is an interesting approach that allows one not only to complement and merge medical knowledge but also to increase it and thereby to interconnect existing data and analyse and relate diseases to each other. In this paper, we present DISNET (http://disnet.ctb.upm.es/), a web-based system designed to periodically extract the knowledge from signs and symptoms retrieved from medical databases, and to enable the creation of customisable disease networks.

Methods

We here present the main features of the DISNET system. We describe how information on diseases and their phenotypic manifestations is extracted from Wikipedia and PubMed websites; specifically, texts from these sources are processed through a combination of text mining and natural language processing techniques.

Results

We further present the validation of our system on Wikipedia and PubMed texts, obtaining the relevant accuracy. The final output includes the creation of a comprehensive symptoms-disease dataset, shared (free access) through the system's API. We finally describe, with some simple use cases, how a user can interact with it and extract information that could be used for subsequent analyses.

Discussion

DISNET allows retrieving knowledge about the signs, symptoms and diagnostic tests associated with a disease. It is not limited to a specific category (all the categories that the selected sources of information offer us) and clinical diagnosis terms. It further allows to track the evolution of those terms through time, being thus an opportunity to analyse and observe the progress of human knowledge on diseases. We further discussed the validation of the system, suggesting that it is good enough to be used to extract diseases and diagnostically-relevant terms. At the same time, the evaluation also revealed that improvements could be introduced to enhance the system's reliability.",2020-02-17 +31131402,BioUML: an integrated environment for systems biology and collaborative analysis of biomedical data.,"BioUML (homepage: http://www.biouml.org, main public server: https://ict.biouml.org) is a web-based integrated environment (platform) for systems biology and the analysis of biomedical data generated by omics technologies. The BioUML vision is to provide a computational platform to build virtual cell, virtual physiological human and virtual patient. BioUML spans a comprehensive range of capabilities, including access to biological databases, powerful tools for systems biology (visual modelling, simulation, parameters fitting and analyses), a genome browser, scripting (R, JavaScript) and a workflow engine. Due to integration with the Galaxy platform and R/Bioconductor, BioUML provides powerful possibilities for the analyses of omics data. The plug-in-based architecture allows the user to add new functionalities using plug-ins. To facilitate a user focus on a particular task or database, we have developed several predefined perspectives that display only those web interface elements that are needed for a specific task. To support collaborative work on scientific projects, there is a central authentication and authorization system (https://bio-store.org). The diagram editor enables several remote users to simultaneously edit diagrams.",2019-07-01 +32299716,Is There an Association Between Schizophrenia and Sexual Dysfunction in Both Sexes? A Systematic Review and Meta-Analysis.,"

Background

Mounting clinical studies have reported patients with schizophrenia are at high risk of developing sexual dysfunction (SD), but a directly calculated prevalence of SD is currently lacking.

Aim

To further quantify the association between schizophrenia and SD.

Methods

MEDLINE (PubMed), Embase (OVID), the Cochrane Library databases, and the PsycINFO were systematically searched for eligible studies reporting the sexual functioning in patients with schizophrenia. This meta-analysis has been registered on PROSPERO (ID: CRD42019121720, http://www.crd.york.ac.uk/PROSPERO).

Outcomes

The relationship between schizophrenia and SD was detected by calculating the relative risk (RR) with a 95% confidence interval (CI). The GRADE-profiler was employed to rank the quality of the evidence.

Results

10 observational studies (3 case-control studies and 7 cross-sectional studies) were finally included, enrolling a total of 3,570 participants (mean age 28.6-46.2 years), of whom 1,161 had schizophrenia and the remainders were the healthy control subjects. Synthetic results indicated that schizophrenia was significantly associated with an increased risk of SD regardless of gender (3 studies reporting both sexes: RR = 2.24, 95%CI: 1.66-3.03, P < .001, heterogeneity: I2 = 0.0%, P = .431; 7 studies reporting men: RR = 2.63, 95%CI: 1.68-4.13, P < .001, heterogeneity: I2 = 82.7%, P < .001; 5 studies reporting women: RR = 2.07, 95%CI: 1.46-2.94, P < .001; heterogeneity: I2 = 79.7%, P = .001). In accordance with the GRADE-profiler, the quality of the evidence of primary outcomes was LOW, MODERATE, and LOW in studies including both sexes, men, and women, respectively.

Clinical implications

Our findings confirmed the potential link between schizophrenia and SD. Clinicians should routinely assess the sexual functioning for those patients with schizophrenia and further recommend the preferred antipsychotics for them.

Strengths & limitations

This is the first meta-analysis investigating the association between schizophrenia and the risks of SD in both sexes. Nonetheless, substantial heterogeneities were identified across the selected studies.

Conclusion

Robust data from this meta-analysis showed increased rates of SD in patients with schizophrenia compared with the general populations. Therefore, more specific psychological and pharmaceutical interventions are needed to help patients with schizophrenia gain a better sexual life. Zhao S, Wang X, Qiang X, et al. Is There an Association Between Schizophrenia and Sexual Dysfunction in Both Sexes? A Systematic Review and Meta-Analysis. J Sex Med 2020;17:1476-1488.",2020-04-14 +33560568,"Update of the Pompe variant database for the prediction of clinical phenotypes: Novel disease-associated variants, common sequence variants, and results from newborn screening.","Pompe disease is an inherited disorder caused by disease-associated variants in the acid α-glucosidase gene (GAA). The Pompe disease GAA variant database (http://www.pompevariantdatabase.nl) is a curated, open-source, disease-specific database, and lists disease-associated GAA variants, in silico predictions, and clinical phenotypes reported until 2016. Here, we provide an update to include 226 disease-associated variants that were published until 2020. We also listed 148 common GAA sequence variants that do not cause Pompe disease. GAA variants with unknown severity that were identified only in newborn screening programs were listed as a new feature to indicate the reason why phenotypes were still unknown. Expression studies were performed for common missense variants to predict their severity. The updated Pompe disease GAA variant database now includes 648 disease-associated variants, 26 variants from newborn screening, and 237 variants with unknown severity. Regular updates of the Pompe disease GAA variant database will be required to improve genetic counseling and the study of genotype-phenotype relationships.",2020-12-21 +34246829,Assessment of the fetal thymus gland: Comparing MRI-acquired thymus volumes with 2D ultrasound measurements.,"

Objectives

The fetal thymus gland has been shown to involute in response to intrauterine infection, and therefore could be used as a non-invasive marker of fetal compartment infection. The objective of this study was to evaluate how accurately 2D ultrasound-derived measurements of the fetal thymus reflect the 3D volume of the gland derived from motion corrected MRI images.

Study design

A retrospective study was performed using paired ultrasound and MRI datasets from the iFIND project (http://www.ifindproject.com). To obtain 3D volumetry of the thymus gland, T2-weighted single shot turbo spin echo (ssTSE) sequences of the fetal thorax were acquired. Thymus volumes were manually segmented from deformable slice-to-volume reconstructed images. To obtain 2D ultrasound measurements, previously stored fetal cine loops were used and measurements obtained at the 3-vessel-view (3VV) and 3-vessel-trachea view (3VT): anterior-posterior diameter (APD), intrathoracic diameter (ITD), transverse diameter (TD), perimeter and 3-vessel-edge (3VE). Inter-observer and intra-observer reliability (ICC) was calculated for both MRI and ultrasound measurements. Pearson correlation coefficients (PCC) were used to compare 2D-parameters with acceptable ICC to TV.

Results

38 participants were identified. Adequate visualisation was possible on 37 MRI scans and 31 ultrasound scans. Of the 30 datasets where both MRI and ultrasound data were available, MRI had good interobserver reliability (ICC 0.964) and all ultrasound 3VV 2D-parameters and 3VT 3VE had acceptable ICC (>0.75). Four 2D parameters were reflective of the 3D thymus volume: 3VV TD r = 0.540 (P = 0.002); 3VV perimeter r = 0.446 (P = 0.013); 3VV APD r = 0.435 (P = 0.110) and 3VT TD r = 0.544 (P = 0.002).

Conclusions

MRI appeared superior to ultrasound for visualization of the thymus gland and reproducibility of measurements. Three 2D US parameters, 3VV TD, perimeter and 3VT APD, correlated well with TV. Therefore, these represent a more accurate reflection of the true size of the gland than other 2D measurements, where MRI is not available.",2021-06-30 +31545554,The modular structure of α/β-hydrolases.,"The α/β-hydrolase fold family is highly diverse in sequence, structure and biochemical function. To investigate the sequence-structure-function relationships, the Lipase Engineering Database (https://led.biocatnet.de) was updated. Overall, 280 638 protein sequences and 1557 protein structures were analysed. All α/β-hydrolases consist of the catalytically active core domain, but they might also contain additional structural modules, resulting in 12 different architectures: core domain only, additional lids at three different positions, three different caps, additional N- or C-terminal domains and combinations of N- and C-terminal domains with caps and lids respectively. In addition, the α/β-hydrolases were distinguished by their oxyanion hole signature (GX-, GGGX- and Y-types). The N-terminal domains show two different folds, the Rossmann fold or the β-propeller fold. The C-terminal domains show a β-sandwich fold. The N-terminal β-propeller domain and the C-terminal β-sandwich domain are structurally similar to carbohydrate-binding proteins such as lectins. The classification was applied to the newly discovered polyethylene terephthalate (PET)-degrading PETases and MHETases, which are core domain α/β-hydrolases of the GX- and the GGGX-type respectively. To investigate evolutionary relationships, sequence networks were analysed. The degree distribution followed a power law with a scaling exponent γ = 1.4, indicating a highly inhomogeneous network which consists of a few hubs and a large number of less connected sequences. The hub sequences have many functional neighbours and therefore are expected to be robust toward possible deleterious effects of mutations. The cluster size distribution followed a power law with an extrapolated scaling exponent τ = 2.6, which strongly supports the connectedness of the sequence space of α/β-hydrolases. DATABASE: Supporting data about domains from other proteins with structural similarity to the N- or C-terminal domains of α/β-hydrolases are available in Data Repository of the University of Stuttgart (DaRUS) under doi: https://doi.org/10.18419/darus-458.",2019-10-10 +27987171,A Guide to the PLAZA 3.0 Plant Comparative Genomic Database.,"PLAZA 3.0 is an online resource for comparative genomics and offers a versatile platform to study gene functions and gene families or to analyze genome organization and evolution in the green plant lineage. Starting from genome sequence information for over 35 plant species, precomputed comparative genomic data sets cover homologous gene families, multiple sequence alignments, phylogenetic trees, and genomic colinearity information within and between species. Complementary functional data sets, a Workbench, and interactive visualization tools are available through a user-friendly web interface, making PLAZA an excellent starting point to translate sequence or omics data sets into biological knowledge. PLAZA is available at http://bioinformatics.psb.ugent.be/plaza/ .",2017-01-01 +30165538,SKmDB: an integrated database of next generation sequencing information in skeletal muscle.,"

Motivation

Skeletal muscles have indispensable functions and also possess prominent regenerative ability. The rapid emergence of Next Generation Sequencing (NGS) data in recent years offers us an unprecedented perspective to understand gene regulatory networks governing skeletal muscle development and regeneration. However, the data from public NGS database are often in raw data format or processed with different procedures, causing obstacles to make full use of them.

Results

We provide SKmDB, an integrated database of NGS information in skeletal muscle. SKmDB not only includes all NGS datasets available in the human and mouse skeletal muscle tissues and cells, but also provide preliminary data analyses including gene/isoform expression levels, gene co-expression subnetworks, as well as assembly of putative lincRNAs, typical and super enhancers and transcription factor hotspots. Users can efficiently search, browse and visualize the information with the well-designed user interface and server side. SKmDB thus will offer wet lab biologists useful information to study gene regulatory mechanisms in the field of skeletal muscle development and regeneration.

Availability and implementation

Freely available on the web at http://sunlab.cpy.cuhk.edu.hk/SKmDB.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +27899596,ChiTaRS-3.1-the enhanced chimeric transcripts and RNA-seq database matched with protein-protein interactions.,"Discovery of chimeric RNAs, which are produced by chromosomal translocations as well as the joining of exons from different genes by trans-splicing, has added a new level of complexity to our study and understanding of the transcriptome. The enhanced ChiTaRS-3.1 database (http://chitars.md.biu.ac.il) is designed to make widely accessible a wealth of mined data on chimeric RNAs, with easy-to-use analytical tools built-in. The database comprises 34 922: chimeric transcripts along with 11 714: cancer breakpoints. In this latest version, we have included multiple cross-references to GeneCards, iHop, PubMed, NCBI, Ensembl, OMIM, RefSeq and the Mitelman collection for every entry in the 'Full Collection'. In addition, for every chimera, we have added a predicted Chimeric Protein-Protein Interaction (ChiPPI) network, which allows for easy visualization of protein partners of both parental and fusion proteins for all human chimeras. The database contains a comprehensive annotation for 34 922: chimeric transcripts from eight organisms, and includes the manual annotation of 200 sense-antiSense (SaS) chimeras. The current improvements in the content and functionality to the ChiTaRS database make it a central resource for the study of chimeric transcripts and fusion proteins.",2016-11-29 +33978561,Habit Facilitates Actioning Sun Protective Behavior Intentions.,"Skin cancer is highly burdensome, but preventable with regular engagement in sun protective behaviors. Despite modest effectiveness of sun-protective behavior promotional efforts thus far, rates of engagement in sun-protective behaviors remain low. More is needed to understand motivation for using sunscreen, wearing sun-protective clothing, and seeking shade. This study tested whether the links of intention and habit strength with behavior differed between sun-protective behaviors. It was hypothesized that sun protective behaviors would be predicted by both habit and intention and that intention-behavior associations would be weaker for people with stronger habits. Participants residing in Queensland, Australia (N = 203; 75.96% female; M age = 37.16 years, SD = 14.67) self-reported their intentions and habit strength about sun-protective behavior for the next 7 days. Participants were followed-up 7 days later to self-report their sun-protective behavior. Multilevel modeling, accounting for nesting of multiple behaviors within-person, revealed that habit moderated the intention strength - behavior association and this moderation effect did not differ as a function of which behavior was being predicted. People with strong or moderate habit strength tended to act in line with their intentions; however, for people with very weak habits (2 SD < M), there was less alignment between their intention and behavior. These findings suggest that habit plays a facilitative role in the implementation of strong sun protective behavior intentions. Interventions should consider how to encourage intention and habit to enhance sun-protective behaviors and reduce the burden of skin cancer from sun exposure.Supplemental data for this article is available online at https://doi.org/10.1080/08964289.2021.1903380 .",2021-05-12 +27423255,The Neuro Bureau ADHD-200 Preprocessed repository.,"In 2011, the ""ADHD-200 Global Competition"" was held with the aim of identifying biomarkers of attention-deficit/hyperactivity disorder from resting-state functional magnetic resonance imaging (rs-fMRI) and structural MRI (s-MRI) data collected on 973 individuals. Statisticians and computer scientists were potentially the most qualified for the machine learning aspect of the competition, but generally lacked the specialized skills to implement the necessary steps of data preparation for rs-fMRI. Realizing this barrier to entry, the Neuro Bureau prospectively collaborated with all competitors by preprocessing the data and sharing these results at the Neuroimaging Informatics Tools and Resources Clearinghouse (NITRC) (http://www.nitrc.org/frs/?group_id=383). This ""ADHD-200 Preprocessed"" release included multiple analytical pipelines to cater to different philosophies of data analysis. The processed derivatives included denoised and registered 4D fMRI volumes, regional time series extracted from brain parcellations, maps of 10 intrinsic connectivity networks, fractional amplitude of low frequency fluctuation, and regional homogeneity, along with grey matter density maps. The data was used by several teams who competed in the ADHD-200 Global Competition, including the winning entry by a group of biostaticians. To the best of our knowledge, the ADHD-200 Preprocessed release was the first large public resource of preprocessed resting-state fMRI and structural MRI data, and remains to this day the only resource featuring a battery of alternative processing paths.",2016-07-15 +32437515,BioPAX-Parser: parsing and enrichment analysis of BioPAX pathways.,"

Summary

Biological pathways are fundamental for learning about healthy and disease states. Many existing formats support automatic software analysis of biological pathways, e.g. BioPAX (Biological Pathway Exchange). Although some algorithms are available as web application or stand-alone tools, no general graphical application for the parsing of BioPAX pathway data exists. Also, very few tools can perform pathway enrichment analysis (PEA) using pathway encoded in the BioPAX format. To fill this gap, we introduce BiP (BioPAX-Parser), an automatic and graphical software tool aimed at performing the parsing and accessing of BioPAX pathway data, along with PEA by using information coming from pathways encoded in BioPAX.

Availability and implementation

BiP is freely available for academic and non-profit organizations at https://gitlab.com/giuseppeagapito/bip under the LGPL 2.1, the GNU Lesser General Public License.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +34185592,Benefit of Musical Training for Speech Perception and Cognition Later in Life.,"Purpose The aim of this study was to determine the long-term associations of musical training with speech perception in adverse conditions and cognition in a longitudinal cohort study of middle-age to older adults. Method This study is based on Epidemiology of Hearing Loss Study participants. We asked participants at baseline (1993-1995) about their musical training. Speech perception (word recognition in competing message; Northwestern University Auditory Test Number 6), cognitive function (cognitive test battery), and impairment (self-report or surrogate report of Alzheimer's disease or dementia, and/or a Mini-Mental State Examination score ≤ 24) were assessed up to 5 times over the 20-year follow-up. We included 2,938 Epidemiology of Hearing Loss Study participants who had musical training data and at least one follow-up of speech perception and/or cognitive assessment. We used linear mixed-effects models to determine associations between musicianship and decline in speech perception and cognitive function over time and Cox regression models to evaluate associations of musical training with 20-year cumulative incidence of speech perception and cognitive impairment. Models were adjusted for age, sex, and occupation and repeated with additional adjustment for health-related confounders and education. Results Musicians showed less speech perception decline over time with stronger effects in women (0.16% difference, 95% confidence interval [CI] [0.05, 0.26]). Among men, musicians had, on average, better speech perception than nonmusicians (3.41% difference, 95% CI [0.62, 6.20]) and were less likely to develop a cognitive impairment than nonmusicians (hazard ratio = 0.58, 95% CI [0.37, 0.91]). Conclusions Musicians showed an advantage in speech perception abilities and cognition later in life and less decline over time with different magnitudes of effect sizes in men and women. Associations remained with further adjustment, indicating that some degree of the advantage of musical training is independent of socioeconomic or health differences. If confirmed, these findings could have implications for developing speech perception intervention and prevention strategies. Supplemental Material https://doi.org/10.23641/asha.14825454.",2021-06-28 +25877637,DisGeNET: a discovery platform for the dynamical exploration of human diseases and their genes.,"DisGeNET is a comprehensive discovery platform designed to address a variety of questions concerning the genetic underpinning of human diseases. DisGeNET contains over 380,000 associations between >16,000 genes and 13,000 diseases, which makes it one of the largest repositories currently available of its kind. DisGeNET integrates expert-curated databases with text-mined data, covers information on Mendelian and complex diseases, and includes data from animal disease models. It features a score based on the supporting evidence to prioritize gene-disease associations. It is an open access resource available through a web interface, a Cytoscape plugin and as a Semantic Web resource. The web interface supports user-friendly data exploration and navigation. DisGeNET data can also be analysed via the DisGeNET Cytoscape plugin, and enriched with the annotations of other plugins of this popular network analysis software suite. Finally, the information contained in DisGeNET can be expanded and complemented using Semantic Web technologies and linked to a variety of resources already present in the Linked Data cloud. Hence, DisGeNET offers one of the most comprehensive collections of human gene-disease associations and a valuable set of tools for investigating the molecular mechanisms underlying diseases of genetic origin, designed to fulfill the needs of different user profiles, including bioinformaticians, biologists and health-care practitioners. Database URL: http://www.disgenet.org/",2015-04-15 +31663781,"Long-Term Exposure to Particulate Air Pollution, Black Carbon, and Their Source Components in Relation to Ischemic Heart Disease and Stroke.","

Background

Long-term exposure to particulate matter (PM) in ambient air has been associated with cardiovascular mortality, but few studies have considered incident disease in relation to PM from different sources.

Objectives

We aimed to study associations between long-term exposure to different types of PM and sources, and incident ischemic heart disease (IHD) and stroke in three Swedish cities.

Methods

Based on detailed emission databases, monitoring data, and high-resolution dispersion models, we calculated source contributions to PM with aerodynamic diameter ≤10μm (PM10), PM with aerodynamic diameter ≤2.5μm (PM2.5), and black carbon (BC) from road wear, traffic exhaust, residential heating, and other sources in Gothenburg, Stockholm, and Umeå. Registry data for participants from four cohorts were used to obtain incidence of IHD and stroke for first hospitalization or death. We constructed time windows of exposure for same-year, 1- to 5-y, and 6- to 10-y averages preceding incidence from annual averages at residential addresses. Risk estimates were based on random effects meta-analyses of cohort-specific Cox proportional hazard models.

Results

We observed 5,166 and 3,119 incident IHD and stroke cases, respectively, in 114,758 participants. Overall, few consistent associations were observed between the different air pollution measures and IHD or stroke incidence. However, same-year levels of ambient locally emitted BC (range: 0.01-4.6 μg/m3) were associated with a 4.0% higher risk of incident stroke per interquartile range (IQR), 0.30 μg/m3 [95% confidence interval (CI): 0.04, 7.8]. This association was primarily related to BC from traffic exhaust. PM10 (range: 4.4-52 μg/m3) and PM2.5 (range: 2.9-22 μg/m3) were not associated with stroke. Associations with incident IHD were observed only for PM2.5 exposure from residential heating.

Discussion

Few consistent associations were observed between different particulate components and IHD or stroke. However, long-term residential exposure to locally emitted BC from traffic exhaust was associated with stroke incidence. The comparatively low exposure levels may have contributed to the paucity of associations. https://doi.org/10.1289/EHP4757.",2019-10-30 +26673098,Interactome of the hepatitis C virus: Literature mining with ANDSystem.,"A study of the molecular genetics mechanisms of host-pathogen interactions is of paramount importance in developing drugs against viral diseases. Currently, the literature contains a huge amount of information that describes interactions between HCV and human proteins. In addition, there are many factual databases that contain experimentally verified data on HCV-host interactions. The sources of such data are the original data along with the data manually extracted from the literature. However, the manual analysis of scientific publications is time consuming and, because of this, databases created with such an approach often do not have complete information. One of the most promising methods to provide actualisation and completeness of information is text mining. Here, with the use of a previously developed method by the authors using ANDSystem, an automated extraction of information on the interactions between HCV and human proteins was conducted. As a data source for the text mining approach, PubMed abstracts and full text articles were used. Additionally, external factual databases were analyzed. On the basis of this analysis, a special version of ANDSystem, extended with the HCV interactome, was created. The HCV interactome contains information about the interactions between 969 human and 11 HCV proteins. Among the 969 proteins, 153 'new' proteins were found not previously referred to in any external databases of protein-protein interactions for HCV-host interactions. Thus, the extended ANDSystem possesses a more comprehensive detailing of HCV-host interactions versus other existing databases. It was interesting that HCV proteins more preferably interact with human proteins that were already involved in a large number of protein-protein interactions as well as those associated with many diseases. Among human proteins of the HCV interactome, there were a large number of proteins regulated by microRNAs. It turned out that the results obtained for protein-protein interactions and microRNA-regulation did not depend on how well the proteins were studied, while protein-disease interactions appeared to be dependent on the level of study. In particular, the mean number of diseases linked to well-studied proteins (proteins were considered well-studied if they were mentioned in 50 or more PubMed publications) from the HCV interactome was 20.8, significantly exceeding the mean number of associations with diseases (10.1) for the total set of well-studied human proteins present in ANDSystem. For proteins not highly poorly-studied investigated, proteins from the HCV interactome (each protein was referred to in less than 50 publications) distribution of the number of diseases associated with them had no statistically significant differences from the distribution of the number of diseases associated with poorly-studied proteins based on the total set of human proteins stored in ANDSystem. With this, the average number of associations with diseases for the HCV interactome and the total set of human proteins were 0.3 and 0.2, respectively. Thus, ANDSystem, extended with the HCV interactome, can be helpful in a wide range of issues related to analyzing HCV-host interactions in the search for anti-HCV drug targets. The demo version of the extended ANDSystem covered here containing only interactions between human proteins, genes, metabolites, diseases, miRNAs and molecular-genetic pathways, as well as interactions between human proteins/genes and HCV proteins, is freely available at the following web address: http://www-bionet.sscc.ru/psd/andhcv/.",2015-12-07 +34096110,'What matters to you?'-a qualitative study on the views of nursing home residents with dementia regarding the health care they receive.,"

Aims and objective

This study's aim is to examine what matters to nursing home residents with dementia by exploring their perceptions of nursing home health care through the conceptual lens of person-centred care.

Background

Dementia is a major contributor to nursing home placement. To understand the meaning of living with dementia, the inclusion of persons with dementia in research studies is essential.

Methods

In total, 35 in-depth qualitative interviews were conducted with people who have dementia and live in nursing homes. A thematic analysis was applied to analyse the data. Checklist for qualitative studies: Consolidated Criteria for Reporting Qualitative Research (COREQ) https://www.equator-network.org/reporting-guidelines/coreq/ RESULTS: The analysis revealed one overarching theme with four sub-themes. Different matchings of person-centred care and routines in health care being the overarching theme. The four sub-themes were as follows: (a) understanding of the interplay between disabilities and ageing; (b) participating based on one's own preferences and needs; (c) incongruence between the person with dementia's preferences and needs and health-care support; and (d) working conditions: the relationship between residents and health-care providers. Despite the substantive focus of researchers on person-centred care and the positive impact on the nursing home health care of those who receive it, the results showed that nursing home residents still want more person-centred care.

Conclusions

The results indicate that the incongruence between general routines and individual preferences and needs, as well as the demand to operationalise the person-centred dimensions of health-care behaviour in nursing homes, must be resolved. Health care in nursing homes must focus on enabling residents to participate in daily activities and sustain their personhood and sense of self.

Relevance to clinical practice

Based on the residents' statements, the results contribute to the fields of dementia education, health-care provision and policy-making and may be used to achieve person-centredness and governance.",2021-06-06 +32569358,COVID-19 TestNorm: A tool to normalize COVID-19 testing names to LOINC codes.,"Large observational data networks that leverage routine clinical practice data in electronic health records (EHRs) are critical resources for research on coronavirus disease 2019 (COVID-19). Data normalization is a key challenge for the secondary use of EHRs for COVID-19 research across institutions. In this study, we addressed the challenge of automating the normalization of COVID-19 diagnostic tests, which are critical data elements, but for which controlled terminology terms were published after clinical implementation. We developed a simple but effective rule-based tool called COVID-19 TestNorm to automatically normalize local COVID-19 testing names to standard LOINC (Logical Observation Identifiers Names and Codes) codes. COVID-19 TestNorm was developed and evaluated using 568 test names collected from 8 healthcare systems. Our results show that it could achieve an accuracy of 97.4% on an independent test set. COVID-19 TestNorm is available as an open-source package for developers and as an online Web application for end users (https://clamp.uth.edu/covid/loinc.php). We believe that it will be a useful tool to support secondary use of EHRs for research on COVID-19.",2020-07-01 +31504188,Phylogenetic tree-based microbiome association test.,"

Motivation

Ecological patterns of the human microbiota exhibit high inter-subject variation, with few operational taxonomic units (OTUs) shared across individuals. To overcome these issues, non-parametric approaches, such as the Mann-Whitney U-test and Wilcoxon rank-sum test, have often been used to identify OTUs associated with host diseases. However, these approaches only use the ranks of observed relative abundances, leading to information loss, and are associated with high false-negative rates. In this study, we propose a phylogenetic tree-based microbiome association test (TMAT) to analyze the associations between microbiome OTU abundances and disease phenotypes. Phylogenetic trees illustrate patterns of similarity among different OTUs, and TMAT provides an efficient method for utilizing such information for association analyses. The proposed TMAT provides test statistics for each node, which are combined to identify mutations associated with host diseases.

Results

Power estimates of TMAT were compared with existing methods using extensive simulations based on real absolute abundances. Simulation studies showed that TMAT preserves the nominal type-1 error rate, and estimates of its statistical power generally outperformed existing methods in the considered scenarios. Furthermore, TMAT can be used to detect phylogenetic mutations associated with host diseases, providing more in-depth insight into bacterial pathology.

Availability and implementation

The 16S rRNA amplicon sequencing metagenomics datasets for colorectal carcinoma and myalgic encephalomyelitis/chronic fatigue syndrome are available from the European Nucleotide Archive (ENA) database under project accession number PRJEB6070 and PRJEB13092, respectively. TMAT was implemented in the R package. Detailed information is available at http://healthstat.snu.ac.kr/software/tmat.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +32670572,Metabolite AutoPlotter - an application to process and visualise metabolite data in the web browser.,"

Background

Metabolomics is gaining popularity as a standard tool for the investigation of biological systems. Yet, parsing metabolomics data in the absence of in-house computational scientists can be overwhelming and time-consuming. As a consequence of manual data processing, the results are often not analysed in full depth, so potential novel findings might get lost.

Methods

To tackle this problem, we developed Metabolite AutoPlotter, a tool to process and visualise quantified metabolite data. Other than with bulk data visualisations, such as heat maps, the aim of the tool is to generate single plots for each metabolite. For this purpose, it reads as input pre-processed metabolite-intensity tables and accepts different experimental designs, with respect to the number of metabolites, conditions and replicates. The code was written in the R-scripting language and wrapped into a shiny application that can be run online in a web browser on https://mpietzke.shinyapps.io/autoplotter.

Results

We demonstrate the main features and the ease of use with two different metabolite datasets, for quantitative experiments and for stable isotope tracing experiments. We show how the plots generated by the tool can be interactively modified with respect to plot type, colours, text labels and the shown statistics. We also demonstrate the application towards 13C-tracing experiments and the seamless integration of natural abundance correction, which facilitates the better interpretation of stable isotope tracing experiments. The output of the tool is a zip-file containing one single plot for each metabolite as well as restructured tables that can be used for further analysis.

Conclusion

With the help of Metabolite AutoPlotter, it is now possible to simplify data processing and visualisation for a wide audience. High-quality plots from complex data can be generated in a short time by pressing a few buttons. This offers dramatic improvements over manual analysis. It is significantly faster and allows researchers to spend more time interpreting the results or to perform follow-up experiments. Further, this eliminates potential copy-and-paste errors or tedious repetitions when things need to be changed. We are sure that this tool will help to improve and speed up scientific discoveries.",2020-07-10 +28557712,An Integrated Chemical Environment to Support 21st-Century Toxicology.,"SUMMARY: Access to high-quality reference data is essential for the development, validation, and implementation of in vitro and in silico approaches that reduce and replace the use of animals in toxicity testing. Currently, these data must often be pooled from a variety of disparate sources to efficiently link a set of assay responses and model predictions to an outcome or hazard classification. To provide a central access point for these purposes, the National Toxicology Program Interagency Center for the Evaluation of Alternative Toxicological Methods developed the Integrated Chemical Environment (ICE) web resource. The ICE data integrator allows users to retrieve and combine data sets and to develop hypotheses through data exploration. Open-source computational workflows and models will be available for download and application to local data. ICE currently includes curated in vivo test data, reference chemical information, in vitro assay data (including Tox21TM/ToxCast™ high-throughput screening data), and in silico model predictions. Users can query these data collections focusing on end points of interest such as acute systemic toxicity, endocrine disruption, skin sensitization, and many others. ICE is publicly accessible at https://ice.ntp.niehs.nih.gov. https://doi.org/10.1289/EHP1759.",2017-05-25 +33761846,"The necropolitics of COVID-19: Race, class and slow death in an ongoing pandemic.","Achille Mbembe states that 'the ultimate expression of sovereignty resides, to a large degree, in the power and the capacity to dictate who may live and who must die […]. To exercise sovereignty is to exercise control over mortality and to define life as the deployment and manifestation of power' (Mbembe, 2003. Necropolitics. Public Culture, 15(1), 11-40. https://doi.org/10.1215/08992363-15-1-11). For Mbembe a key question is 'under what practical conditions is the right to kill, to allow to live, or to expose to death exercised?' (Mbembe, 2003. Necropolitics. Public Culture, 15(1), 11-40. https://doi.org/10.1215/08992363-15-1-11). This article will map the necropolitical underpinnings of racial and class-based health disparities and vulnerabilities in the current COVID-19 pandemic. The article will directly engage with the question of 'under what practical conditions are the right to expose to death' unfolding in the current COVID-19 pandemic. Drawing on news media representations and public health data in the UK and the U.S, the article will provide a disciplinary conjecture arguing for the importance of looking at what I call a 'state of acceptance' plays into the necropolitical dynamics of the COVID-19 pandemic.",2021-03-24 +32040319,The Metabolic Rainbow: Deep Learning Phase I Metabolism in Five Colors.,"Metabolism of drugs affects their absorption, distribution, efficacy, excretion, and toxicity profiles. Metabolism is routinely assessed experimentally using recombinant enzymes, human liver microsome, and animal models. Unfortunately, these experiments are expensive, time-consuming, and often extrapolate poorly to humans because they fail to capture the full breadth of metabolic reactions observed in vivo. As a result, metabolic pathways leading to the formation of toxic metabolites are often missed during drug development, giving rise to costly failures. To address some of these limitations, computational metabolism models can rapidly and cost-effectively predict sites of metabolism-the atoms or bonds which undergo enzymatic modifications-on thousands of drug candidates, thereby improving the likelihood of discovering metabolic transformations forming toxic metabolites. However, current computational metabolism models are often unable to predict the specific metabolites formed by metabolism at certain sites. Identification of reaction type is a key step toward metabolite prediction. Phase I enzymes, which are responsible for the metabolism of more than 90% of FDA approved drugs, catalyze highly diverse types of reactions and produce metabolites with substantial structural variability. Without knowledge of potential metabolite structures, medicinal chemists cannot differentiate harmful metabolic transformations from beneficial ones. To address this shortcoming, we propose a system for simultaneously labeling sites of metabolism and reaction types, by classifying them into five key reaction classes: stable and unstable oxidations, dehydrogenation, hydrolysis, and reduction. These classes unambiguously identify 21 types of phase I reactions, which cover 92.3% of known reactions in our database. We used this labeling system to train a neural network model of phase I metabolism on a literature-derived data set encompassing 20 736 human phase I metabolic reactions. Our model, Rainbow XenoSite, was able to identify reaction-type specific sites of metabolism with a cross-validated accuracy of 97.1% area under the receiver operator curve. Rainbow XenoSite with five-color and combined output is available for use free and online through our secure server at http://swami.wustl.edu/xenosite/p/phase1_rainbow.",2020-02-24 +33590861,Twelve years of SAMtools and BCFtools. ,"SAMtools and BCFtools are widely used programs for processing and analysing high-throughput sequencing data. They include tools for file format conversion and manipulation, sorting, querying, statistics, variant calling, and effect analysis amongst other methods. The first version appeared online 12 years ago and has been maintained and further developed ever since, with many new features and improvements added over the years. The SAMtools and BCFtools packages represent a unique collection of tools that have been used in numerous other software projects and countless genomic pipelines. Both SAMtools and BCFtools are freely available on GitHub under the permissive MIT licence, free for both non-commercial and commercial use. Both packages have been installed >1 million times via Bioconda. The source code and documentation are available from https://www.htslib.org.",2021-02-01 +31127704,VCF-Server: A web-based visualization tool for high-throughput variant data mining and management.,"

Background

Next-generation sequencing (NGS) has been widely used in both clinics and research. It has become the most powerful tool for diagnosing genetic disorders and investigating disease etiology through the discovery of genetic variants. Variants identified by NGS are stored in variant call format (VCF) files. However, querying and filtering VCF files are extremely difficult for researchers without programming skills. Furthermore, as the mutation data are increasing exponentially, there is an urgent need to develop tools to manage these variant data in a centralized way.

Methods

The VCF-Server was developed as a web-based visualization tool to support the interactive analysis of genetic variant data. It allows researchers and medical geneticists to manage, annotate, filter, query, and export variants in a fast and effective way.

Results

In this study, we developed the VCF-Server, a powerful and easily accessible tool for researchers and medical geneticists to perform variant analysis. Users can query VCFs, annotate, and filter variants without knowing programming code. Once the VCF file is uploaded, VCF-Server allows users to annotate the VCF with commonly used databases or user-defined variant annotations (including variant blacklist and whitelist). Variant information in the VCF is shown visually via the interactive graphical interface. Users can filter the variants with flexible filtering rules, and the prioritized variants can be exported locally for further analysis. As VCF-Server adopts a web file system, files in the VCF-Server can be stored and managed in a centralized way. Moreover, VCF-Server allows direct web-based analysis (accessible through either desktop computers or mobile devices) as well as local deployment.

Conclusions

With an easy-to-use graphical interface, VCF-Server allows researchers with little bioinformatics background to explore and mine mutation data, which may broaden the application of NGS technology in clinics and research. The tool is freely available for use at https://www.diseasegps.org/VCF-Server?lan = eng.",2019-05-24 +30351394,VCPA: genomic variant calling pipeline and data management tool for Alzheimer's Disease Sequencing Project.,"

Summary

We report VCPA, our SNP/Indel Variant Calling Pipeline and data management tool used for the analysis of whole genome and exome sequencing (WGS/WES) for the Alzheimer's Disease Sequencing Project. VCPA consists of two independent but linkable components: pipeline and tracking database. The pipeline, implemented using the Workflow Description Language and fully optimized for the Amazon elastic compute cloud environment, includes steps from aligning raw sequence reads to variant calling using GATK. The tracking database allows users to view job running status in real time and visualize >100 quality metrics per genome. VCPA is functionally equivalent to the CCDG/TOPMed pipeline. Users can use the pipeline and the dockerized database to process large WGS/WES datasets on Amazon cloud with minimal configuration.

Availability and implementation

VCPA is released under the MIT license and is available for academic and nonprofit use for free. The pipeline source code and step-by-step instructions are available from the National Institute on Aging Genetics of Alzheimer's Disease Data Storage Site (http://www.niagads.org/VCPA).

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +34180418,Association Analysis of Chromosome X to Identify Genetic Modifiers of Huntington's Disease.,"

Background

Huntington's disease (HD) is caused by an expanded (>35) CAG trinucleotide repeat in huntingtin (HTT). Age-at-onset of motor symptoms is inversely correlated with the size of the inherited CAG repeat, which expands further in brain regions due to somatic repeat instability. Our recent genetic investigation focusing on autosomal SNPs revealed that age-at-onset is also influenced by genetic variation at many loci, the majority of which encode genes involved in DNA maintenance/repair processes and repeat instability.

Objective

We performed a complementary association analysis to determine whether variants in the X chromosome modify HD.

Methods

We imputed SNPs on chromosome X for ∼9,000 HD subjects of European ancestry and performed an X chromosome-wide association study (XWAS) to test for association with age-at-onset corrected for inherited CAG repeat length.

Results

In a mixed effects model XWAS analysis of all subjects (males and females), assuming random X-inactivation in females, no genome-wide significant onset modification signal was found. However, suggestive significant association signals were detected at Xq12 (top SNP, rs59098970; p-value, 1.4E-6), near moesin (MSN), in a region devoid of DNA maintenance genes. Additional suggestive signals not involving DNA repair genes were observed in male- and female-only analyses at other locations.

Conclusion

Although not genome-wide significant, potentially due to small effect size compared to the power of the current study, our data leave open the possibility of modification of HD by a non-DNA repair process. Our XWAS results are publicly available at the updated GEM EURO 9K website hosted at https://www.hdinhd.org/ for browsing, pathway analysis, and data download.",2021-01-01 +33970805,Medicinal plant compounds as promising inhibitors of coronavirus (COVID-19) main protease: an in silico study.,"The novel Coronavirus (COVID-19) has spread rapidly across the globe and has involved more than 215 countries and territories. Due to a lack of effective therapy or vaccine, urgent and concerted efforts are needed to identify therapeutic targets and medications. COVID-19 main protease represents a major target for drug treatment to inhibit viral function. The present study sought to evaluate medicinal plant compounds as potential inhibitors of the COVID-19 main protease using molecular docking and molecular dynamic analysis. The PDB files of COVID-19 main protease and some medicinal plant compounds were retrieved from the Protein Data Bank (http://www.rcsb.org) and Pubchem server, respectively. The Gromacs software was used for simulation studies, and molecular docking analysis was done using Autodock 4.2. The COVID-19 main protease simulation, compared with some phytochemicals docked to the COVID-19 main protease, were analyzed. Glabridin, catechin, and fisetin had the greatest tendency to interact with the COVID-19 main protease by hydrogen and hydrophobic interactions. Docking of these phytochemicals to COVID-19 main protease led to an increase in the radius of gyration (Rg), decrease in the Root mean square fluctuation (RMSF), and induced variation in COVID-19 main protease secondary structure. The high tendency interaction of glabridin, catechin, and fisetin to COVID-19 main protease induced conformational changes on this enzyme. These interactions can lead to enzyme inhibition. This simulated study indicates that these phytochemicals may be considered as potent inhibitors of the viral protease; however, more investigations are required to explore their potential medicinal use.Communicated by Ramaswamy H. Sarma.",2021-05-10 +32946256,Evaluation of Hearing Aids in Everyday Life Using Ecological Momentary Assessment: What Situations Are We Missing?,"Background Ecological momentary assessment (EMA) is a method to evaluate hearing aids in everyday life that uses repeated smartphone-based questionnaires to assess a situation as it happens. Although being ecologically valid and avoiding memory bias, this method may be prone to selection biases due to questionnaires being skipped or the phone not being carried along in certain situations. Purpose This investigation analyzed which situations are underrepresented in questionnaire responses and physically measured objective EMA data (e.g., sound level), and how such underrepresentation may depend on different triggers. Method In an EMA study, 20 subjects with hearing impairment provided daily information on reasons for missed data, that is, skipped questionnaires or missing connections between their phone and hearing aids. Results Participants often deliberately did not bring the study phone to social situations or skipped questionnaires because they considered it inappropriate, for example, during church service or when engaging in conversation. They answered fewer questions in conversations with multiple partners and were more likely to postpone questionnaires when not in quiet environments. Conclusion Data for social situations will likely be underrepresented in EMA. However, these situations are particularly important for the evaluation of hearing aids, as individuals with hearing impairment often have difficulties communicating in noisy situations. Thus, it is vital to optimize the design of the study to find a balance between avoiding memory bias and enabling subjects to report retrospectively on situations where phone usage may be difficult. The implications for several applications of EMA are discussed. Supplemental Material https://doi.org/10.23641/asha.12746849.",2020-09-18 +32652015,Augmented base pairing networks encode RNA-small molecule binding preferences.,"RNA-small molecule binding is a key regulatory mechanism which can stabilize 3D structures and activate molecular functions. The discovery of RNA-targeting compounds is thus a current topic of interest for novel therapies. Our work is a first attempt at bringing the scalability and generalization abilities of machine learning methods to the problem of RNA drug discovery, as well as a step towards understanding the interactions which drive binding specificity. Our tool, RNAmigos, builds and encodes a network representation of RNA structures to predict likely ligands for novel binding sites. We subject ligand predictions to virtual screening and show that we are able to place the true ligand in the 71st-73rd percentile in two decoy libraries, showing a significant improvement over several baselines, and a state of the art method. Furthermore, we observe that augmenting structural networks with non-canonical base pairing data is the only representation able to uncover a significant signal, suggesting that such interactions are a necessary source of binding specificity. We also find that pre-training with an auxiliary graph representation learning task significantly boosts performance of ligand prediction. This finding can serve as a general principle for RNA structure-function prediction when data is scarce. RNAmigos shows that RNA binding data contains structural patterns with potential for drug discovery, and provides methodological insights for possible applications to other structure-function learning tasks. The source code, data and a Web server are freely available at http://rnamigos.cs.mcgill.ca.",2020-08-01 +35082675,In Silico Prediction and Insights Into the Structural Basis of Drug Induced Nephrotoxicity.,"Drug induced nephrotoxicity is a major clinical challenge, and it is always associated with higher costs for the pharmaceutical industry and due to detection during the late stages of drug development. It is desirable for improving the health outcomes for patients to distinguish nephrotoxic structures at an early stage of drug development. In this study, we focused on in silico prediction and insights into the structural basis of drug induced nephrotoxicity, based on reliable data on human nephrotoxicity. We collected 565 diverse chemical structures, including 287 nephrotoxic drugs on humans in the real world, and 278 non-nephrotoxic approved drugs. Several different machine learning and deep learning algorithms were employed for in silico model building. Then, a consensus model was developed based on three best individual models (RFR_QNPR, XGBOOST_QNPR, and CNF). The consensus model performed much better than individual models on internal validation and it achieved prediction accuracy of 86.24% external validation. The results of analysis of molecular properties differences between nephrotoxic and non-nephrotoxic structures indicated that several key molecular properties differ significantly, including molecular weight (MW), molecular polar surface area (MPSA), AlogP, number of hydrogen bond acceptors (nHBA), molecular solubility (LogS), the number of rotatable bonds (nRotB), and the number of aromatic rings (nAR). These molecular properties may be able to play an important part in the identification of nephrotoxic chemicals. Finally, 87 structural alerts for chemical nephrotoxicity were mined with f-score and positive rate analysis of substructures from Klekota-Roth fingerprint (KRFP). These structural alerts can well identify nephrotoxic drug structures in the data set. The in silico models and the structural alerts could be freely accessed via https://ochem.eu/article/140251 and http://www.sapredictor.cn, respectively. We hope the results should provide useful tools for early nephrotoxicity estimation in drug development.",2021-01-01 +26800248,The Salmonella In Silico Typing Resource (SISTR): An Open Web-Accessible Tool for Rapidly Typing and Subtyping Draft Salmonella Genome Assemblies.,"For nearly 100 years serotyping has been the gold standard for the identification of Salmonella serovars. Despite the increasing adoption of DNA-based subtyping approaches, serotype information remains a cornerstone in food safety and public health activities aimed at reducing the burden of salmonellosis. At the same time, recent advances in whole-genome sequencing (WGS) promise to revolutionize our ability to perform advanced pathogen characterization in support of improved source attribution and outbreak analysis. We present the Salmonella In Silico Typing Resource (SISTR), a bioinformatics platform for rapidly performing simultaneous in silico analyses for several leading subtyping methods on draft Salmonella genome assemblies. In addition to performing serovar prediction by genoserotyping, this resource integrates sequence-based typing analyses for: Multi-Locus Sequence Typing (MLST), ribosomal MLST (rMLST), and core genome MLST (cgMLST). We show how phylogenetic context from cgMLST analysis can supplement the genoserotyping analysis and increase the accuracy of in silico serovar prediction to over 94.6% on a dataset comprised of 4,188 finished genomes and WGS draft assemblies. In addition to allowing analysis of user-uploaded whole-genome assemblies, the SISTR platform incorporates a database comprising over 4,000 publicly available genomes, allowing users to place their isolates in a broader phylogenetic and epidemiological context. The resource incorporates several metadata driven visualizations to examine the phylogenetic, geospatial and temporal distribution of genome-sequenced isolates. As sequencing of Salmonella isolates at public health laboratories around the world becomes increasingly common, rapid in silico analysis of minimally processed draft genome assemblies provides a powerful approach for molecular epidemiology in support of public health investigations. Moreover, this type of integrated analysis using multiple sequence-based methods of sub-typing allows for continuity with historical serotyping data as we transition towards the increasing adoption of genomic analyses in epidemiology. The SISTR platform is freely available on the web at https://lfz.corefacility.ca/sistr-app/.",2016-01-22 +33581337,PM2RA: A Framework for Detecting and Quantifying Relationship Alterations in Microbial Community.,"The dysbiosis of gut microbiota is associated with the pathogenesis of human diseases. However, observing shifts in the microbe abundance cannot fully reveal underlying perturbations. Examining the relationship alterations (RAs) in the microbiome between health and disease statuses provides additional hints about the pathogenesis of human diseases, but no methods were designed to detect and quantify the RAs between different conditions directly. Here, we present profile monitoring for microbial relationship alteration (PM2RA), an analysis framework to identify and quantify the microbial RAs. The performance of PM2RA was evaluated with synthetic data, and it showed higher specificity and sensitivity than the co-occurrence-based methods. Analyses of real microbial datasets showed that PM2RA was robust for quantifying microbial RAs across different datasets in several diseases. By applying PM2RA, we identified several novel or previously reported microbes implicated in multiple diseases. PM2RA is now implemented as a web-based application available at http://www.pm2ra-xingyinliulab.cn/.",2021-02-11 +34141462,Estimation of the probability of daily fluctuations of incidence of COVID-19 according to official data.,"When studying the dynamics of morbidity and mortality, one should not limit ourselves to analyzing general trends. Interesting information can be obtained from the analysis of deviations in morbidity and mortality from the general dynamics. Comparison of the cases of morbidity or death for adjacent time intervals allows us to find out whether the changes in conditions were for short periods of time and whether the cases of morbidity or death were independent. The article consists of two parts: Study of the probability distribution (CDF) of the difference between two independent observations of the Poisson distribution; Application of the results to analyze the morbidity and mortality trends by day for the new coronavirus infection. For the distribution function of the module of difference between two independent observations of the Poisson distribution, an analytical expression has been obtained that allows to get an exact solution. A program has been created, whose software can be downloaded at http://1mgmu.com/nau/DeltaPoisson/DeltaPoisson.zip. An approximate solution that does not require complex calculations has also been obtained, which can be used for an average of more than 20. If real difference is greater than expected, it may be in the following cases: morbidity or mortality varies considerably during the day. That could happen, for example, if the registered number of morbidity on Saturday and Sunday is less than on weekdays due to the management model of the health system, or if the cases are not independent; for example, due to the active identification of infected people among those who have come into contact with the patient. If the difference is less than expected, it may be due to external limiting factors, such as a shortage of test systems for making a diagnosis, a limited number of pathologists to determine the cause of death, and so on. In the analysis of the actual data for COVID-19 it was found that for Poland and Russia, excluding Moscow, the difference in the number of cases and deaths is greater than expected, while for Moscow-less than expected. This may be due to the information policy-the effort to somehow reassure Moscow's population, which in the spring of 2020 had a high incidence rate of the new coronavirus infection.",2021-06-04 +29069403,"PLAZA 4.0: an integrative resource for functional, evolutionary and comparative plant genomics.","PLAZA (https://bioinformatics.psb.ugent.be/plaza) is a plant-oriented online resource for comparative, evolutionary and functional genomics. The PLAZA platform consists of multiple independent instances focusing on different plant clades, while also providing access to a consistent set of reference species. Each PLAZA instance contains structural and functional gene annotations, gene family data and phylogenetic trees and detailed gene colinearity information. A user-friendly web interface makes the necessary tools and visualizations accessible, specific for each data type. Here we present PLAZA 4.0, the latest iteration of the PLAZA framework. This version consists of two new instances (Dicots 4.0 and Monocots 4.0) providing a large increase in newly available species, and offers access to updated and newly implemented tools and visualizations, helping users with the ever-increasing demands for complex and in-depth analyzes. The total number of species across both instances nearly doubles from 37 species in PLAZA 3.0 to 71 species in PLAZA 4.0, with a much broader coverage of crop species (e.g. wheat, palm oil) and species of evolutionary interest (e.g. spruce, Marchantia). The new PLAZA instances can also be accessed by a programming interface through a RESTful web service, thus allowing bioinformaticians to optimally leverage the power of the PLAZA platform.",2018-01-01 +27110440,The Resource Identification Initiative: a cultural shift in publishing.,"A central tenet in support of research reproducibility is the ability to uniquely identify research resources, that is, reagents, tools, and materials that are used to perform experiments. However, current reporting practices for research resources are insufficient to identify the exact resources that are reported or to answer basic questions such as ""How did other studies use resource X?"" To address this issue, the Resource Identification Initiative was launched as a pilot project to improve the reporting standards for research resources in the methods sections of papers and thereby improve identifiability and scientific reproducibility. The pilot engaged over 25 biomedical journal editors from most major publishers, as well as scientists and funding officials. Authors were asked to include Research Resource Identifiers (RRIDs) in their manuscripts prior to publication for three resource types: antibodies, model organisms, and tools (i.e., software and databases). RRIDs are assigned by an authoritative database, for example, a model organism database for each type of resource. To make it easier for authors to obtain RRIDs, resources were aggregated from the appropriate databases and their RRIDs made available in a central web portal ( http://scicrunch.org/resources). RRIDs meet three key criteria: they are machine readable, free to generate and access, and are consistent across publishers and journals. The pilot was launched in February of 2014 and over 300 papers have appeared that report RRIDs. The number of journals participating has expanded from the original 25 to more than 40 with RRIDs appearing in 62 different journals to date. Here, we present an overview of the pilot project and its outcomes to date. We show that authors are able to identify resources and are supportive of the goals of the project. Identifiability of the resources post-pilot showed a dramatic improvement for all three resource types, suggesting that the project has had a significant impact on identifiability of research resources.",2015-12-08 +33681984,Characterizing protein conformers by cross-linking mass spectrometry and pattern recognition. ,"Chemical cross-linking coupled to mass spectrometry (XLMS) emerged as a powerful technique for studying protein structures and large-scale protein-protein interactions. Nonetheless, XLMS lacks software tailored toward dealing with multiple conformers; this scenario can lead to high-quality identifications that are mutually exclusive. This limitation hampers the applicability of XLMS in structural experiments of dynamic protein systems, where less abundant conformers of the target protein are expected in the sample. We present QUIN-XL, a software that uses unsupervised clustering to group cross-link identifications by their quantitative profile across multiple samples. QUIN-XL highlights regions of the protein or system presenting changes in its conformation when comparing different biological conditions. We demonstrate our software's usefulness by revisiting the HSP90 protein, comparing three of its different conformers. QUIN-XL's clusters correlate directly to known protein 3D structures of the conformers and therefore validates our software. QUIN-XL and a user tutorial are freely available at http://patternlabforproteomics.org/quinxl for academic users. Supplementary data are available at Bioinformatics online.",2021-03-03 +32573648,BioModels Parameters: a treasure trove of parameter values from published systems biology models.,"

Motivation

One of the major bottlenecks in building systems biology models is identification and estimation of model parameters for model calibration. Searching for model parameters from published literature and models is an essential, yet laborious task.

Results

We have developed a new service, BioModels Parameters, to facilitate search and retrieval of parameter values from the Systems Biology Markup Language models stored in BioModels. Modellers can now directly search for a model entity (e.g. a protein or drug) to retrieve the rate equations describing it; the associated parameter values (e.g. degradation rate, production rate, Kcat, Michaelis-Menten constant, etc.) and the initial concentrations. Currently, BioModels Parameters contains entries from over 84,000 reactions and 60 different taxa with cross-references. The retrieved rate equations and parameters can be used for scanning parameter ranges, model fitting and model extension. Thus, BioModels Parameters will be a valuable service for systems biology modellers.

Availability and implementation

The data are accessible via web interface and API. BioModels Parameters is free to use and is publicly available at https://www.ebi.ac.uk/biomodels/parameterSearch.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-11-01 +31667506,SEA version 3.0: a comprehensive extension and update of the Super-Enhancer archive.,"Super-enhancers (SEs) are critical for the transcriptional regulation of gene expression. We developed the super-enhancer archive version 3.0 (SEA v. 3.0, http://sea.edbc.org) to extend SE research. SEA v. 3.0 provides the most comprehensive archive to date, consisting of 164 545 super-enhancers. Of these, 80 549 are newly identified from 266 cell types/tissues/diseases using an optimized computational strategy, and 52 have been experimentally confirmed with manually curated references. We now support super-enhancers in 11 species including 7 new species (zebrafish, chicken, chimp, rhesus, sheep, Xenopus tropicalis and stickleback). To facilitate super-enhancer functional analysis, we added several new regulatory datasets including 3 361 785 typical enhancers, chromatin interactions, SNPs, transcription factor binding sites and SpCas9 target sites. We also updated or developed new criteria query, genome visualization and analysis tools for the archive. This includes a tool based on Shannon Entropy to evaluate SE cell type specificity, a new genome browser that enables the visualization of SE spatial interactions based on Hi-C data, and an enhanced enrichment analysis interface that provides online enrichment analyses of SE related genes. SEA v. 3.0 provides a comprehensive database of all available SE information across multiple species, and will facilitate super-enhancer research, especially as related to development and disease.",2020-01-01 +34259569,Gene-Environment Interactions in Developmental Neurotoxicity: a Case Study of Synergy between Chlorpyrifos and CHD8 Knockout in Human BrainSpheres.,"

Background

Autism spectrum disorder (ASD) is a major public health concern caused by complex genetic and environmental components. Mechanisms of gene-environment (G×E) interactions and reliable biomarkers associated with ASD are mostly unknown or controversial. Induced pluripotent stem cells (iPSCs) from patients or with clustered regularly interspaced short palindromic repeats and CRISPR-associated protein 9 (CRISPR/Cas9)-introduced mutations in candidate ASD genes provide an opportunity to study (G×E) interactions.

Objectives

In this study, we aimed to identify a potential synergy between mutation in the high-risk autism gene encoding chromodomain helicase DNA binding protein 8 (CHD8) and environmental exposure to an organophosphate pesticide (chlorpyrifos; CPF) in an iPSC-derived human three-dimensional (3D) brain model.

Methods

This study employed human iPSC-derived 3D brain organoids (BrainSpheres) carrying a heterozygote CRISPR/Cas9-introduced inactivating mutation in CHD8 and exposed to CPF or its oxon-metabolite (CPO). Neural differentiation, viability, oxidative stress, and neurite outgrowth were assessed, and levels of main neurotransmitters and selected metabolites were validated against human data on ASD metabolic derangements.

Results

Expression of CHD8 protein was significantly lower in CHD8 heterozygous knockout (CHD8+/-) BrainSpheres compared with CHD8+/+ ones. Exposure to CPF/CPO treatment further reduced CHD8 protein levels, showing the potential (G×E) interaction synergy. A novel approach for validation of the model was chosen: from the literature, we identified a panel of metabolic biomarkers in patients and assessed them by targeted metabolomics in vitro. A synergistic effect was observed on the cholinergic system, S-adenosylmethionine, S-adenosylhomocysteine, lactic acid, tryptophan, kynurenic acid, and α-hydroxyglutaric acid levels. Neurite outgrowth was perturbed by CPF/CPO exposure. Heterozygous knockout of CHD8 in BrainSpheres led to an imbalance of excitatory/inhibitory neurotransmitters and lower levels of dopamine.

Discussion

This study pioneered (G×E) interaction in iPSC-derived organoids. The experimental strategy enables biomonitoring and environmental risk assessment for ASD. Our findings reflected some metabolic perturbations and disruption of neurotransmitter systems involved in ASD. The increased susceptibility of CHD8+/- BrainSpheres to chemical insult establishes a possibly broader role of (G×E) interaction in ASD. https://doi.org/10.1289/EHP8580.",2021-07-14 +33055180,Meningococcal Deduced Vaccine Antigen Reactivity (MenDeVAR) Index: a Rapid and Accessible Tool That Exploits Genomic Data in Public Health and Clinical Microbiology Applications. ,"As microbial genomics makes increasingly important contributions to clinical and public health microbiology, the interpretation of whole-genome sequence data by nonspecialists becomes essential. In the absence of capsule-based vaccines, two protein-based vaccines have been used for the prevention of invasive serogroup B meningococcal disease (IMD) since their licensure in 2013 and 2014. These vaccines have different components and different levels of coverage of meningococcal variants. Hence, decisions regarding which vaccine to use in managing serogroup B IMD outbreaks require information about the index case isolate, including (i) the presence of particular vaccine antigen variants, (ii) the expression of vaccine antigens, and (iii) the likely susceptibility of its antigen variants to antibody-dependent bactericidal killing. To obtain this information requires a multitude of laboratory assays, impractical in real-time clinical settings, where the information is most urgently needed. To facilitate assessment for public health and clinical purposes, we synthesized genomic and experimental data from published sources to develop and implement the Meningococcal Deduced Vaccine Antigen Reactivity (MenDeVAR) Index, which is publicly available on PubMLST (https://pubmlst.org). Using whole-genome sequences or individual gene sequences obtained from IMD isolates or clinical specimens, the MenDeVAR Index provides rapid evidence-based information on the presence and possible immunological cross-reactivity of different meningococcal vaccine antigen variants. The MenDeVAR Index enables practitioners who are not genomics specialists to assess the likely reactivity of vaccines for individual cases, outbreak management, or the assessment of public health vaccine programs. The MenDeVAR Index has been developed in consultation with, but independently of, both the 4CMenB (Bexsero; GSK) and rLP2086 (Trumenba; Pfizer, Inc.) vaccine manufacturers.",2020-12-17 +33019308,BigO: A public health decision support system for measuring obesogenic behaviors of children in relation to their local environment.,"Obesity is a complex disease and its prevalence depends on multiple factors related to the local socioeconomic, cultural and urban context of individuals. Many obesity prevention strategies and policies, however, are horizontal measures that do not depend on context-specific evidence. In this paper we present an overview of BigO (http://bigoprogram.eu), a system designed to collect objective behavioral data from children and adolescent populations as well as their environment in order to support public health authorities in formulating effective, context-specific policies and interventions addressing childhood obesity. We present an overview of the data acquisition, indicator extraction, data exploration and analysis components of the BigO system, as well as an account of its preliminary pilot application in 33 schools and 2 clinics in four European countries, involving over 4,200 participants.",2020-07-01 +29036529,ITSoneDB: a comprehensive collection of eukaryotic ribosomal RNA Internal Transcribed Spacer 1 (ITS1) sequences.,"A holistic understanding of environmental communities is the new challenge of metagenomics. Accordingly, the amplicon-based or metabarcoding approach, largely applied to investigate bacterial microbiomes, is moving to the eukaryotic world too. Indeed, the analysis of metabarcoding data may provide a comprehensive assessment of both bacterial and eukaryotic composition in a variety of environments, including human body. In this respect, whereas hypervariable regions of the 16S rRNA are the de facto standard barcode for bacteria, the Internal Transcribed Spacer 1 (ITS1) of ribosomal RNA gene cluster has shown a high potential in discriminating eukaryotes at deep taxonomic levels. As metabarcoding data analysis rely on the availability of a well-curated barcode reference resource, a comprehensive collection of ITS1 sequences supplied with robust taxonomies, is highly needed. To address this issue, we created ITSoneDB (available at http://itsonedb.cloud.ba.infn.it/) which in its current version hosts 985 240 ITS1 sequences spanning over 134 000 eukaryotic species. Each ITS1 is mapped on the NCBI reference taxonomy with its start and end positions precisely annotated. ITSoneDB has been developed in agreement to the FAIR guidelines by enabling the users to query and download its content through a simple web-interface and access relevant metadata by cross-linking to European Nucleotide Archive.",2018-01-01 +33385002,DGCR8/miR-106 Axis Enhances Radiosensitivity of Head and Neck Squamous Cell Carcinomas by Downregulating RUNX3.,"Purpose: Head and neck squamous cell carcinoma (HNSCC) is the sixth most prevalent malignant tumor worldwide, and the radiotherapy effect is strongly associated with human papillomavirus (HPV) infection. Therefore, the aim of our study was to analyze the mechanism of HPV E7 and its effects on radiosensitivity in HNSCC cells. Methods: The mRNA expression of DiGeorge syndrome critical region gene 8 (DGCR8), has-miR-106a, and Runt-related transcription factor 3 (RUNX3) was examined by quantitative real-time PCR (RT-qPCR). The protein expression of DGCR8, E7, RUNX3, caspase-3/cleaved caspase-3, poly(ADP-ribose) polymerase (PARP)/cleaved PARP, and γH2AX was measured by Western blot. The expression level of DGCR8 was measured by immunofluorescence assay. Starbase database (http://starbase.sysu.edu.cn/) was used to analyze the correlation between has-miR-106a-5p and DGCR8. TargetScan database (http://www.targetscan.org/vert_72/) was adopted to calculate the prediction of binding sites. Radiosensitivity was evaluated through clone formation assays and Cell Counting Kit-8 (CCK-8) assays. Results: In our study, we found that the mRNA and protein expression levels of HPV E7 and DGCR8 in HPV-positive HNSCC cells were higher than those in HPV-negative cells. The expression of DGCR8 was increased in FaDu and UM-SCC-4 with E7 overexpression, while the expression of DGCR8 was decreased in UM-SCC-47 and UPCI-SCC-090 with E7 silence. The miR-106a expression was increased after DGCR8 overexpression in FaDu and UM-SCC-4. However, the miR-106a expression was decreased in UM-SCC-47 and UPCI-SCC-090 with E7 silence. In radiation conditions, clone formation assays found that less clones formed in FaDu and UM-SCC-4 cells subsequent to silencing DGCR8 or miR-106a than that in the control group, and more clones were formed in UM-SCC-47 and UPCI-SCC-090 cells overexpressing DGCR8 or miR-106a than that in the control group. Luciferase reporter gene assays verified that miR-106a targeted the 3' untranslated region (UTR) of RUNX3 mRNA. MiR-106a overexpression resulted in a decrease in RUNX3 expression, and miR-106a silence increased RUNX3 expression. Rescue experiments conducted with miR-106a inhibitor restored radiation resistance and reduced DNA damage in radiation condition. Conclusions: Our study indicated that HPV E7 activated DGCR8/miR-106a/RUNX3 axis to enhance radiation sensitivity and provided directions for targeted therapeutic interventions.",2020-12-15 +33236927,Quantitative in Vitro to in Vivo Extrapolation (QIVIVE) for Predicting Reduced Anogenital Distance Produced by Anti-Androgenic Pesticides in a Rodent Model for Male Reproductive Disorders.,"

Background

Many pesticides can antagonize the androgen receptor (AR) or inhibit androgen synthesis in vitro but their potential to cause reproductive toxicity related to disruption of androgen action during fetal life is difficult to predict. Currently no approaches for using in vitro data to anticipate such in vivo effects exist. Prioritization schemes that limit unnecessary in vivo testing are urgently needed.

Objectives

The aim was to develop a quantitative in vitro to in vivo extrapolation (QIVIVE) approach for predicting in vivo anti-androgenicity arising from gestational exposures and manifesting as a shortened anogenital distance (AGD) in male rats.

Methods

We built a physiologically based pharmacokinetic (PBK) model to simulate concentrations of chemicals in the fetus resulting from maternal dosing. The predicted fetal levels were compared with analytically determined concentrations, and these were judged against in vitro active concentrations for AR antagonism and androgen synthesis suppression.

Results

We first evaluated our model by using in vitro and in vivo anti-androgenic data for procymidone, vinclozolin, and linuron. Our PBK model described the measured fetal concentrations of parent compounds and metabolites quite accurately (within a factor of five). We applied the model to nine current-use pesticides, all with in vitro evidence for anti-androgenicity but missing in vivo data. Seven pesticides (fludioxonil, cyprodinil, dimethomorph, imazalil, quinoxyfen, fenhexamid, o-phenylphenol) were predicted to produce a shortened AGD in male pups, whereas two (λ-cyhalothrin, pyrimethanil) were anticipated to be inactive. We tested these expectations for fludioxonil, cyprodinil, and dimethomorph and observed shortened AGD in male pups after gestational exposure. The measured fetal concentrations agreed well with PBK-modeled predictions.

Discussion

Our QIVIVE model newly identified fludioxonil, cyprodinil, and dimethomorph as in vivo anti-androgens. With the examples investigated, our approach shows great promise for predicting in vivo anti-androgenicity (i.e., AGD shortening) for chemicals with in vitro activity and for minimizing unnecessary in vivo testing. https://doi.org/10.1289/EHP6774.",2020-11-25 +32756939,IDRMutPred: predicting disease-associated germline nonsynonymous single nucleotide variants (nsSNVs) in intrinsically disordered regions.,"

Motivation

Despite of the lack of folded structure, intrinsically disordered regions (IDRs) of proteins play versatile roles in various biological processes, and many nonsynonymous single nucleotide variants (nsSNVs) in IDRs are associated with human diseases. The continuous accumulation of nsSNVs resulted from the wide application of NGS has driven the development of disease-association prediction methods for decades. However, their performance on nsSNVs in IDRs remains inferior, possibly due to the domination of nsSNVs from structured regions in training data. Therefore, it is highly demanding to build a disease-association predictor specifically for nsSNVs in IDRs with better performance.

Results

We present IDRMutPred, a machine learning-based tool specifically for predicting disease-associated germline nsSNVs in IDRs. Based on 17 selected optimal features that are extracted from sequence alignments, protein annotations, hydrophobicity indices and disorder scores, IDRMutPred was trained using three ensemble learning algorithms on the training dataset containing only IDR nsSNVs. The evaluation on the two testing datasets shows that all the three prediction models outperform 17 other popular general predictors significantly, achieving the ACC between 0.856 and 0.868 and MCC between 0.713 and 0.737. IDRMutPred will prioritize disease-associated IDR germline nsSNVs more reliably than general predictors.

Availability and implementation

The software is freely available at http://www.wdspdb.com/IDRMutPred.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +31568974,Phasic dopamine release identification using convolutional neural network.,"Dopamine has a major behavioral impact related to drug dependence, learning and memory functions, as well as pathologies such as schizophrenia and Parkinson's disease. Phasic release of dopamine can be measured in vivo with fast-scan cyclic voltammetry. However, even for a specialist, manual analysis of experiment results is a repetitive and time consuming task. This work aims to improve the automatic dopamine identification from fast-scan cyclic voltammetry data using convolutional neural networks (CNN). The best performance obtained in the experiments achieved an accuracy of 98.31% using a combined CNN approach. The end-to-end object detection system using YOLOv3 achieved an accuracy of 97.66%. Also, a new public dopamine release dataset was presented, and it is available at https://web.inf.ufpr.br/vri/databases/phasicdopaminerelease/.",2019-09-25 +33890543,The latest development of the DELAD project for sharing corpora of speech disorders.,"Corpora of speech of individuals with communication disorders (CSD) are invaluable resources for education and research, but they are costly and hard to build and difficult to share for various reasons. DELAD, which means 'shared' in Swedish, is a project initiated by Professors Nicole Müller and Martin Ball in 2015 that aims to address this issue by establishing a platform for researchers to share datasets of speech disorders with interested audiences. To date four workshops have been held, where selected participants, covering various expertise including researchers in clinical phonetics and linguistics, speech and language therapy, infrastructure specialists, and ethics and legal specialists, participated to discuss relevant issues in setting up such an archive. Positive and steady progress has been made since 2015, including refurbishing the DELAD website (http://delad.net/) with information and application forms for researchers to join and share their datasets and linking with the CLARIN K-Centre for Atypical Communication Expertise (https://ace.ruhosting.nl/) where CSD can be hosted and accessed through the CLARIN B-Centres, The Language Archive (https://tla.mpi.nl/tools/tla-tools/) and TalkBank (https://talkbank.org/). The latest workshop, which was funded by CLARIN (Common Language Resources and Technology Infrastructure) was held as an online event in January 2021 on topics including Data Protection Impact Assessments, reviewing changes in ethics perspectives in academia on sharing CSD, and voice conversion as a mean to pseudonomise speech. This paper reports the latest progress of DELAD and discusses the directions for further advance of the initiative, with information on how researchers can contribute to the repository.",2021-04-23 +31134776,Nationwide General Hospital Morbidity Study as a source of data about Polish population health,"The objective of this article is description of the important source of data on hospitalised morbidity collected in Poland within the frameworks of public statistics, and also underlying the significance of the quality of data collected at the hospital level for the purpose of the practical application of them. +The Nationwide General Hospital Morbidity Study has been conducted by the Department of Population Health Monitoring and Analysis of the NIPH-NIH for more than 40 years within the frameworks of the Programme of Statistical Surveys of Official Statistics. Since the year 2000, it has had a complete character, collecting individual data for all cases of hospitalisation in Poland within the scope compliant with the MZ/Szp-11 form, among others, sex, age, place of residence of a patient, data on hospital, information about the course of treatment (length of stay in hospital, principal diagnosis and comorbidity, applied medical procedures, the mode of admission and discharge). The collected data are anonymous. In the recent years, annually, more than 8 million of the cases of hospitalisation have been documented. +Almost all obliged hospitals (96%) participate in the study. Some weak point is the completeness of submitted data, in particular, regarding information about the external causes of injuries and poisonings (in the year 2017, 27% of the required data were missing). Interestingly, a high level of missing data is generated by a comparatively small number of hospitals. Significant differences are observed in the quality of data depending on the voivodship of hospital location. +The collected data render it possible to, among others, determine the frequency of hospitalisations due to particular causes taking under consideration the localisation of a hospital, and also the sex, age and the place of residence of patients, the analysis of the duration of hospital stay or in-hospital fatality. The aggregated results are reported to international databases (WHO, OECD, Eurostat), and support the administrative authorities of various levels. They are also a basis for numerous published scientific papers. The most important results of routine analyses are presented of the web page of the Study (http://www.statystyka1.medstat.waw.pl/).",2019-01-01 +30462320,COXPRESdb v7: a gene coexpression database for 11 animal species supported by 23 coexpression platforms for technical evaluation and evolutionary inference.,"The advent of RNA-sequencing and microarray technologies has led to rapid growth of transcriptome data generated for a wide range of organisms, under various cellular, organ and individual conditions. Since the number of possible combinations of intercellular and extracellular conditions is almost unlimited, cataloging all transcriptome conditions would be an immeasurable challenge. Gene coexpression refers to the similarity of gene expression patterns under various conditions, such as disease states, tissue types, and developmental stages. Since the quality of gene coexpression data depends on the quality and quantity of transcriptome data, timely usage of the growing data is key to promoting individual research in molecular biology. COXPRESdb (http://coxpresdb.jp) is a database providing coexpression information for 11 animal species. One characteristic feature of COXPRESdb is its ability to compare multiple coexpression data derived from different transcriptomics technologies and different species, which strongly reduces false positive relationships in individual gene coexpression data. Here, we summarized the current version of this database, including 23 coexpression platforms with the highest-level quality till date. Using various functionalities in COXPRESdb, the new coexpression data would support a broader area of research from molecular biology to medical sciences.",2019-01-01 +33084388,The 'SELection End points in Communities of bacTeria' (SELECT) Method: A Novel Experimental Assay to Facilitate Risk Assessment of Selection for Antimicrobial Resistance in the Environment.,"

Background

Antimicrobial resistance (AMR) is one of the most significant health threats to society. A growing body of research demonstrates selection for AMR likely occurs at environmental concentrations of antibiotics. However, no standardized experimental approaches for determining selective concentrations of antimicrobials currently exist, preventing appropriate environmental and human health risk assessment of AMR.

Objectives

We aimed to design a rapid, simple, and cost-effective novel experimental assay to determine selective effect concentrations of antibiotics and to generate the largest experimental data set of selective effect concentrations of antibiotics to date.

Methods

Previously published methods and data were used to validate the assay, which determines the effect concentration based on reduction of bacterial community (wastewater) growth. Risk quotients for test antibiotics were generated to quantify risk.

Results

The assay (SELection End points in Communities of bacTeria, or the SELECT method) was used to rapidly determine selective effect concentrations of antibiotics. These were in good agreement with quantitative polymerase chain reaction effect concentrations determined within the same experimental system. The SELECT method predicted no effect concentrations were minimally affected by changes in the assay temperature, growth media, or microbial community used as the inoculum. The predicted no effect concentrations for antibiotics tested ranged from 0.05μg/L for ciprofloxacin to 1,250μg/L for erythromycin.

Discussion

The lack of evidence demonstrating environmental selection for AMR, and of associated human health risks, is a primary reason for the lack of action in the mitigation of release of antibiotics into the aquatic environment. We present a novel method that can reliably and rapidly fill this data gap to enable regulation and subsequent mitigation (where required) to lower the risk of selection for, and human exposure to, AMR in aquatic environments. In particular, ciprofloxacin and, to a lesser extent, azithromycin, cefotaxime, and trimethoprim all pose a significant risk for selection of AMR in the environment. https://doi.org/10.1289/EHP6635.",2020-10-21 +33231322,Soybean transporter database: A comprehensive database for identification and exploration of natural variants in soybean transporter genes.,"Transporters, a class of membrane proteins that facilitate exchange of solutes including diverse molecules and ions across the cellular membrane, are vital component for the survival of all organisms. Understanding plant transporters is important to get insight of the basic cellular processes, physiology, and molecular mechanisms including nutrient uptake, signaling, response to external stress, and many more. In this regard, extensive analysis of transporters predicted in soybean and other plant species was performed. In addition, an integrated database for soybean transporter protein, SoyTD, was developed that will facilitate the identification, classification, and extensive characterization of transporter proteins by integrating expression, gene ontology, conserved domain and motifs, gene structure organization, and chromosomal distribution features. A comprehensive analysis was performed to identify highly confident transporters by integrating various prediction tools. Initially, 7541 transmembrane (TM) proteins were predicted in the soybean genome; out of these, 3306 non-redundant transporter genes carrying two or more transmembrane domains were selected for further analysis. The identified transporter genes were classified according to a standard transporter classification (TC) system. Comparative analysis of transporter genes among 47 plant genomes provided insights into expansion and duplication of transporter genes in land plants. The whole genome resequencing (WGRS) and tissue-specific transcriptome datasets of soybean were integrated to investigate the natural variants and expression profile associated with transporter(s) of interest. Overall, SoyTD provides a comprehensive interface to study genetic and molecular function of soybean transporters. SoyTD is publicly available at http://artemis.cyverse.org/soykb_dev/SoyTD/.",2020-12-14 +33794377,Identifying Novel Drug Targets by iDTPnd: A Case Study of Kinase Inhibitors.,"Current FDA-approved kinase inhibitors cause diverse adverse effects, some of which are due to the mechanism-independent effects of these drugs. Identifying these mechanism-independent interactions could improve drug safety and support drug repurposing. Here, we develop iDTPnd (integrated Drug Target Predictor with negative dataset), a computational approach for large-scale discovery of novel targets for known drugs. For a given drug, we construct a positive structural signature as well as a negative structural signature that captures the weakly conserved structural features of drug-binding sites. To facilitate assessment of unintended targets, iDTPnd also provides a docking-based interaction score and its statistical significance. We confirm the interactions of sorafenib, imatinib, dasatinib, sunitinib, and pazopanib with their known targets at a sensitivity of 52% and a specificity of 55%. We also validate 10 predicted novel targets by using in vitro experiments. Our results suggest that proteins other than kinases, such as nuclear receptors, cytochrome P450, and MHC class I molecules, can also be physiologically relevant targets of kinase inhibitors. Our method is general and broadly applicable for the identification of protein-small molecule interactions, when sufficient drug-target 3D data are available. The code for constructing the structural signatures is available at https://sfb.kaust.edu.sa/Documents/iDTP.zip.",2021-03-29 +30864352,GenCoNet - A Graph Database for the Analysis of Comorbidities by Gene Networks. ,"The prevalence of comorbid diseases poses a major health issue for millions of people worldwide and an enormous socio-economic burden for society. The molecular mechanisms for the development of comorbidities need to be investigated. For this purpose, a workflow system was developed to aggregate data on biomedical entities from heterogeneous data sources. The process of integrating and merging all data sources of the workflow system was implemented as a semi-automatic pipeline that provides the import, fusion, and analysis of the highly connected biomedical data in a Neo4j database GenCoNet. As a starting point, data on the common comorbid diseases essential hypertension and bronchial asthma was integrated. GenCoNet (https://genconet.kalis-amts.de) is a curated database that provides a better understanding of hereditary bases of comorbidities.",2018-12-25 +33516897,A Hilbert-based method for processing respiratory timeseries.,"In this technical note, we introduce a new method for estimating changes in respiratory volume per unit time (RVT) from respiratory bellows recordings. By using techniques from the electrophysiological literature, in particular the Hilbert transform, we show how we can better characterise breathing rhythms, with the goal of improving physiological noise correction in functional magnetic resonance imaging (fMRI). Specifically, our approach leads to a representation with higher time resolution and better captures atypical breathing events than current peak-based RVT estimators. Finally, we demonstrate that this leads to an increase in the amount of respiration-related variance removed from fMRI data when used as part of a typical preprocessing pipeline. Our implementation is publicly available as part of the PhysIO package, which is distributed as part of the open-source TAPAS toolbox (https://translationalneuromodeling.org/tapas).",2021-01-28 +30314311,"Combinations of Small RNA, RNA, and Degradome Sequencing Uncovers the Expression Pattern of microRNA⁻mRNA Pairs Adapting to Drought Stress in Leaf and Root of Dactylis glomerata L. ","Drought stress is a global problem, and the lack of water is a key factor that leads to agricultural shortages. MicroRNAs play a crucial role in the plant drought stress response; however, the microRNAs and their targets involved in drought response have not been well elucidated. In the present study, we used Illumina platform (https://www.illumina.com/) and combined data from miRNA, RNA, and degradome sequencing to explore the drought- and organ-specific miRNAs in orchardgrass (Dactylis glomerata L.) leaf and root. We aimed to find potential miRNA⁻mRNA regulation patterns responding to drought conditions. In total, 519 (486 conserved and 33 novel) miRNAs were identified, of which, 41 miRNAs had significant differential expression among the comparisons (p < 0.05). We also identified 55,366 unigenes by RNA-Seq, where 12,535 unigenes were differently expressed. Finally, our degradome analysis revealed that 5950 transcripts were targeted by 487 miRNAs. A correlation analysis identified that miRNA ata-miR164c-3p and its target heat shock protein family A (HSP70) member 5 gene comp59407_c0 (BIPE3) may be essential in organ-specific plant drought stress response and/or adaptation in orchardgrass. Additionally, Gene ontology (GO) and Kyoto encyclopedia of genes and genomes (KEGG) analyses found that ""antigen processing and presentation"" was the most enriched downregulated pathway in adaptation to drought conditions. Taken together, we explored the genes and miRNAs that may be involved in drought adaptation of orchardgrass and identified how they may be regulated. These results serve as a valuable genetic resource for future studies focusing on how plants adapted to drought conditions.",2018-10-11 +31261733,3D-PP: A Tool for Discovering Conserved Three-Dimensional Protein Patterns. ,"Discovering conserved three-dimensional (3D) patterns among protein structures may provide valuable insights into protein classification, functional annotations or the rational design of multi-target drugs. Thus, several computational tools have been developed to discover and compare protein 3D-patterns. However, most of them only consider previously known 3D-patterns such as orthosteric binding sites or structural motifs. This fact makes necessary the development of new methods for the identification of all possible 3D-patterns that exist in protein structures (allosteric sites, enzyme-cofactor interaction motifs, among others). In this work, we present 3D-PP, a new free access web server for the discovery and recognition all similar 3D amino acid patterns among a set of proteins structures (independent of their sequence similarity). This new tool does not require any previous structural knowledge about ligands, and all data are organized in a high-performance graph database. The input can be a text file with the PDB access codes or a zip file of PDB coordinates regardless of the origin of the structural data: X-ray crystallographic experiments or in silico homology modeling. The results are presented as lists of sequence patterns that can be further analyzed within the web page. We tested the accuracy and suitability of 3D-PP using two sets of proteins coming from the Protein Data Bank: (a) Zinc finger containing and (b) Serotonin target proteins. We also evaluated its usefulness for the discovering of new 3D-patterns, using a set of protein structures coming from in silico homology modeling methodologies, all of which are overexpressed in different types of cancer. Results indicate that 3D-PP is a reliable, flexible and friendly-user tool to identify conserved structural motifs, which could be relevant to improve the knowledge about protein function or classification. The web server can be freely utilized at https://appsbio.utalca.cl/3d-pp/.",2019-06-28 +28725787,NDER: A Novel Web Application for Teaching Histology to Medical Students.,"Medical students require a strong foundation in normal histology. However, current trends in medical school curricula have diminished time devoted to histology. Thus, there is a need for more efficient methods of teaching histology. We have developed a novel software program (Novel Diagnostic Educational Resource; https://pcs-webtest0.pathology.washington.edu/academics/pattern/) that uses annotated whole slide images to teach normal histology. Whole slide images of a wide variety of tissues were annotated by a trainee and validated by an experienced pathologist. Still images were extracted and transferred to the Novel Diagnostic Educational Resource web application. In Novel Diagnostic Educational Resource, an image was displayed briefly and the user was forced to identify the tissue type. The display time changed inversely based on cumulative accuracy to challenge the user and maintain engagement. A total of 129 second-year medical students completed the 30-minute Novel Diagnostic Educational Resource module. Surveys showed an increase in confidence from premodule (0% extremely confident, 4% very, 47% somewhat, and 49% not) to postmodule (9% extremely confident, 57% very, 32% somewhat, and 2% not), P < .0001. Accuracy increased from 72.6% pretest to 95.7% posttest, P < .002. The effect size (Cohen d = 2.30) was very large, where 0.2 is a small effect, 0.5 moderate, and 0.8 large. Ninety-six percent of students would recommend Novel Diagnostic Educational Resource to other medical students, and 98% would use Novel Diagnostic Educational Resource to further enhance their histology knowledge. Novel Diagnostic Educational Resource drastically improved medical student accuracy in classifying normal histology and improved confidence. Additional study is needed to determine knowledge retention, but Novel Diagnostic Educational Resource has great potential for efficient teaching of histology given the curriculum time constraints in medical education.",2017-01-01 +34022195,A Schistosoma mansoni tri- and tetramer microsatellite catalog for genetic population diversity and differentiation.,"All Schistosoma mansoni tri- and tetranucleotide repeat microsatellites published as of December 2018 were identified. All 52 were evaluated for autosomal location, strength of amplification, scorability and behavior as single-copy loci by polyacrylamide and capillary gel electrophoresis. Of these, 27 were unique, autosomal, polymorphic, easily scored and single copy as assessed on pooled adult worm DNA from two different continental origins and adult worm clones. These microsatellites were distributed across all seven autosomal chromosomes. On laboratory strains their heterozygosity ranged from 0.22 to 0.77. Individual markers had 5-13 alleles, allelic richness of 2-10 and an effective allele number of 1.3-8.14. Those infected by Schistosoma mansoni carry many genetically distinct, sexually reproducing parasites, therefore, for an individual infection the complete allele frequency profile of their progeny consists of a pool of DNA from multiple diploid eggs. Using a set of 25 microsatellites, we calculated allele frequency profiles of eggs in fecal samples from people in two Brazilian communities separated by 6 km: Jenipapo (n = 80) and Volta do Rio (n = 38). There were no a priori characteristics that could predict the performance of markers in natural infections based on their performance with laboratory strains. Increasing marker number did not change accuracy for differentiation and diversity but did improve precision. Our data suggest that using a random set of 10-20 microsatellites appears to result in values that exhibit low standard deviations for diversity and differentiation indices. All identified microsatellites as well as PCR conditions, allele size, primer sequences and references for all tri- and tetramer microsatellites markers presented in this work are available at: https://sites.google.com/case.edu/cwru-and-fiocruz-wdrc/home.",2021-05-19 +26582922,"SureChEMBL: a large-scale, chemically annotated patent document database.","SureChEMBL is a publicly available large-scale resource containing compounds extracted from the full text, images and attachments of patent documents. The data are extracted from the patent literature according to an automated text and image-mining pipeline on a daily basis. SureChEMBL provides access to a previously unavailable, open and timely set of annotated compound-patent associations, complemented with sophisticated combined structure and keyword-based search capabilities against the compound repository and patent document corpus; given the wealth of knowledge hidden in patent documents, analysis of SureChEMBL data has immediate applications in drug discovery, medicinal chemistry and other commercial areas of chemical science. Currently, the database contains 17 million compounds extracted from 14 million patent documents. Access is available through a dedicated web-based interface and data downloads at: https://www.surechembl.org/.",2015-11-17 +32402076,InteractomeSeq: a web server for the identification and profiling of domains and epitopes from phage display and next generation sequencing data.,"High-Throughput Sequencing technologies are transforming many research fields, including the analysis of phage display libraries. The phage display technology coupled with deep sequencing was introduced more than a decade ago and holds the potential to circumvent the traditional laborious picking and testing of individual phage rescued clones. However, from a bioinformatics point of view, the analysis of this kind of data was always performed by adapting tools designed for other purposes, thus not considering the noise background typical of the 'interactome sequencing' approach and the heterogeneity of the data. InteractomeSeq is a web server allowing data analysis of protein domains ('domainome') or epitopes ('epitome') from either Eukaryotic or Prokaryotic genomic phage libraries generated and selected by following an Interactome sequencing approach. InteractomeSeq allows users to upload raw sequencing data and to obtain an accurate characterization of domainome/epitome profiles after setting the parameters required to tune the analysis. The release of this tool is relevant for the scientific and clinical community, because InteractomeSeq will fill an existing gap in the field of large-scale biomarkers profiling, reverse vaccinology, and structural/functional studies, thus contributing essential information for gene annotation or antigen identification. InteractomeSeq is freely available at https://InteractomeSeq.ba.itb.cnr.it/.",2020-07-01 +32547380,Sammba-MRI: A Library for Processing SmAll-MaMmal BrAin MRI Data in Python.,"Small-mammal neuroimaging offers incredible opportunities to investigate structural and functional aspects of the brain. Many tools have been developed in the last decade to analyse small animal data, but current softwares are less mature than the available tools that process human brain data. The Python package Sammba-MRI (SmAll-MaMmal BrAin MRI in Python; http://sammba-mri.github.io) allows flexible and efficient use of existing methods and enables fluent scriptable analysis workflows, from raw data conversion to multimodal processing.",2020-05-28 +31920755,Altered Expression of a Unique Set of Genes Reveals Complex Etiology of Schizophrenia.,"Background: The etiology of schizophrenia is extensively debated, and multiple factors have been contended to be involved. A panoramic view of the contributing factors in a genome-wide study can be an effective strategy to provide a comprehensive understanding of its causality. Materials and Methods: GSE53987 dataset downloaded from GEO-database, which comprised mRNA expression data of post-mortem brain tissue across three regions from control (C) and age-matched subjects (T) of schizophrenia (N = Hippocampus [HIP]: C-15, T-18, Prefrontal cortex [PFC]: C-15, T-19, Associative striatum [STR]: C-18, T-18). Bio-conductor-affy-package used to compute mRNA expression, and further t-test applied to investigate differential gene expression. The analysis of the derived genes performed using the PANTHER Classification System and NCBI database. Further, a protein interactome analysis of the derived gene set was performed using STRING v10 database (https://string-db.org/) Results: A set of 40 genes showed significantly altered (p < 0.01) expression across all three brain regions. The analyses unraveled genes implicated in biological processes and events, and molecular pathways relating basic neuronal functions. Conclusions: The aberrant expression of genes maintaining basic cell machinery explains compromised neuronal processing in SCZ.",2019-12-12 +32834595,A data driven epidemic model to analyse the lockdown effect and predict the course of COVID-19 progress in India.,"We propose a data driven epidemic model using the real data on the infection, recovery and death cases for the analysis of COVID-19 progression in India. The model assumes continuation of existing control measures such as lockdown and quarantines, the suspected and confirmed cases and does not consider the scenario of 2nd surge of the epidemic due to any reason. The model is arrived after least square fitting of epidemic behaviour model based on theoretical formulation to the real data of cumulative infection cases reported between 24 March 2020 and 30May 2020. The predictive capability of the model has been validated with real data of infection cases reported during June 1-10, 2020. A detailed analysis of model predictions in terms of future trend of COVID-19 progress individually in 18 states of India and India as a whole has been attempted. Infection rate in India, as a whole, is continuously decreasing with time and has reached 3 times lower than the initial infection rate after 6 weeks of lock down suggesting the effectiveness of the lockdown in containing the epidemic. Results suggest that India, as a whole, could see the peak and end of the epidemic in the month of July 2020 and March 2021 respectively as per the current trend in the data. Active infected cases in India may touch 2 lakhs or little above at the peak time and total infected cases may reach over 19 lakhs as per current trend. State-wise results have been discussed in the manuscript. However, the prediction may deviate particularly for longer dates, as assumptions of model cannot be met always in a real scenario. In view of this, a real time application (COV-IND Predictor) has been developed which automatically syncs the latest data from the national COVID19 dash board on daily basis and updates the model input parameters and predictions instantaneously. This real time application can be accessed from the link: https://docs.google.com/spreadsheets/d/1fCwgnQ-dz4J0YWVDHUcbEW1423wOJjdEXm8TqJDWNAk/edit?usp=sharing and can serve as a practical tool for policy makers to track peak time and maximum active infected cases based on latest trend in data for medical readiness and taking epidemic management decisions.",2020-06-20 +33381557,An Integrated Autophagy-Related Long Noncoding RNA Signature as a Prognostic Biomarker for Human Endometrial Cancer: A Bioinformatics-Based Approach.,"Endometrial cancer is one of the most common malignant tumors, lowering the quality of life among women worldwide. Autophagy plays dual roles in these malignancies. To search for prognostic markers for endometrial cancer, we mined The Cancer Genome Atlas and the Human Autophagy Database for information on endometrial cancer and autophagy-related genes and identified five autophagy-related long noncoding RNAs (lncRNAs) (LINC01871, SCARNA9, SOS1-IT1, AL161618.1, and FIRRE). Based on these autophagy-related lncRNAs, samples were divided into high-risk and low-risk groups. Survival analysis showed that the survival rate of the high-risk group was significantly lower than that of the low-risk group. Univariate and multivariate independent prognostic analyses showed that patients' age, pathological grade, and FIGO stage were all risk factors for poor prognosis. A clinical correlation analysis of the relationship between the five autophagy-related lncRNAs and patients' age, pathological grade, and FIGO stage was also per https://orcid.org/0000-0001-7090-1750 formed. Histopathological assessment of the tumor microenvironment showed that the ESTIMATE, immune, and stromal scores in the high-risk group were lower than those in the low-risk group. Principal component analysis and functional annotation were performed to confirm the correlations. To further evaluate the effect of the model constructed on prognosis, samples were divided into training (60%) and validation (40%) groups, regarding the risk status as an independent prognostic risk factor. A prognostic nomogram was constructed using patients' age, pathological grade, FIGO stage, and risk status to estimate the patients' survival rate. C-index and multi-index ROC curves were generated to verify the stability and accuracy of the nomogram. From this analysis, we concluded that the five lncRNAs identified in this study could affect the incidence and development of endometrial cancer by regulating the autophagy process. Therefore, these molecules may have the potential to serve as novel therapeutic targets and biomarkers.",2020-12-12 +34448164,Co-expression for Genotype-Phenotype Function Annotation in Potato Research.,"Here we will show a recipe for running the online version of CoExpNetViz, which is available as a Cytoscape plug-in, and as a web tool. After choosing bait genes and transcriptome datasets in the Cytoscape plug-in, the analysis is run and the resulting network is displayed immediately. Using the web tool, the user can download the Cytoscape files and import them manually into the program.The easiest way to calculate correlations for your data is to use graphical interface online version of CoExpNetViz for the comparative co-expression construction; see http://bioinformatics.psb.ugent.be/webtools/coexpr/index.php .By providing a user-friendly web interface, CoExpNetViz makes comparative transcriptomics analysis accessible to plant researchers without specialized bioinformatics knowledge or programming skills.",2021-01-01 +26078228,An integrated database of wood-formation related genes in plants.,"Wood, which consists mainly of plant cell walls, is an extremely important resource in daily lives. Genes whose products participate in the processes of cell wall and wood formation are therefore major subjects of plant science research. The Wood-Formation Related Genes database (WFRGdb, http://me.lzu.edu.cn/woodformation/) serves as a data resource center for genes involved in wood formation. To create this database, we collected plant genome data published in other online databases and predicted all cell wall and wood formation related genes using BLAST and HMMER. To date, 47 gene families and 33 transcription factors from 57 genomes (28 herbaceous, 22 woody and 7 non-vascular plants) have been covered and more than 122,000 genes have been checked and recorded. To provide easy access to these data, we have developed several search methods, which make it easy to download targeted genes or groups of genes free of charge in FASTA format. Sequence and phylogenetic analyses are also available online. WFRGdb brings together cell wall and wood formation related genes from all available plant genomes, and provides an integrative platform for gene inquiry, downloading and analysis. This database will therefore be extremely useful for those who focuses on cell wall and wood research.",2015-06-16 +32963084,CRISpy-Pop: A Web Tool for Designing CRISPR/Cas9-Driven Genetic Modifications in Diverse Populations.,"CRISPR/Cas9 is a powerful tool for editing genomes, but design decisions are generally made with respect to a single reference genome. With population genomic data becoming available for an increasing number of model organisms, researchers are interested in manipulating multiple strains and lines. CRISpy-pop is a web application that generates and filters guide RNA sequences for CRISPR/Cas9 genome editing for diverse yeast and bacterial strains. The current implementation designs and predicts the activity of guide RNAs against more than 1000 Saccharomyces cerevisiae genomes, including 167 strains frequently used in bioenergy research. Zymomonas mobilis, an increasingly popular bacterial bioenergy research model, is also supported. CRISpy-pop is available as a web application (https://CRISpy-pop.glbrc.org/) with an intuitive graphical user interface. CRISpy-pop also cross-references the human genome to allow users to avoid the selection of guide RNAs with potential biosafety concerns. Additionally, CRISpy-pop predicts the strain coverage of each guide RNA within the supported strain sets, which aids in functional population genetic studies. Finally, we validate how CRISpy-pop can accurately predict the activity of guide RNAs across strains using population genomic data.",2020-11-05 +26481356,Lnc2Cancer: a manually curated database of experimentally supported lncRNAs associated with various human cancers.,"Lnc2Cancer (http://www.bio-bigdata.net/lnc2cancer) is a manually curated database of cancer-associated long non-coding RNAs (lncRNAs) with experimental support that aims to provide a high-quality and integrated resource for exploring lncRNA deregulation in various human cancers. LncRNAs represent a large category of functional RNA molecules that play a significant role in human cancers. A curated collection and summary of deregulated lncRNAs in cancer is essential to thoroughly understand the mechanisms and functions of lncRNAs. Here, we developed the Lnc2Cancer database, which contains 1057 manually curated associations between 531 lncRNAs and 86 human cancers. Each association includes lncRNA and cancer name, the lncRNA expression pattern, experimental techniques, a brief functional description, the original reference and additional annotation information. Lnc2Cancer provides a user-friendly interface to conveniently browse, retrieve and download data. Lnc2Cancer also offers a submission page for researchers to submit newly validated lncRNA-cancer associations. With the rapidly increasing interest in lncRNAs, Lnc2Cancer will significantly improve our understanding of lncRNA deregulation in cancer and has the potential to be a timely and valuable resource.",2015-10-19 +30693153,EyeChrom and CCDBcurator: Visualizing chromosome count data from plants.,"

Premise of the study

Chromosome count data are available for hundreds of plant species and can be explored in text-only format at the Chromosome Counts Database (http://ccdb.tau.ac.il). CCDBcurator and EyeChrom are an R package and a web application, respectively, that first curate and then visualize these data graphically, so that intra- and interspecific variation of chromosome numbers can be easily summarized and displayed for a given genus.

Methods and results

We developed R code to clean, summarize, and display in several formats the chromosome count data for a selected genus or set of species present in the Chromosome Counts Database. These data and figures can be exported for use in analyses, publications, or teaching.

Conclusions

Chromosome count data are critical for a number of evolutionary studies in plant biology, and their importance is underscored by the increasing appreciation of the prevalence of polyploidy in land plants. CCDBcurator and EyeChrom provide a fast, easy, and reproducible means of cleaning, curating, and then visualizing the chromosome count data currently available for plants.",2019-01-04 +30365027,"gcMeta: a Global Catalogue of Metagenomics platform to support the archiving, standardization and analysis of microbiome data.","Meta-omics approaches have been increasingly used to study the structure and function of the microbial communities. A variety of large-scale collaborative projects are being conducted to encompass samples from diverse environments and habitats. This change has resulted in enormous demands for long-term data maintenance and capacity for data analysis. The Global Catalogue of Metagenomics (gcMeta) is a part of the 'Chinese Academy of Sciences Initiative of Microbiome (CAS-CMI)', which focuses on studying the human and environmental microbiome, establishing depositories of samples, strains and data, as well as promoting international collaboration. To accommodate and rationally organize massive datasets derived from several thousands of human and environmental microbiome samples, gcMeta features a database management system for archiving and publishing data in a standardized way. Another main feature is the integration of more than ninety web-based data analysis tools and workflows through a Docker platform which enables data analysis by using various operating systems. This platform has been rapidly expanding, and now hosts data from the CAS-CMI and a number of other ongoing research projects. In conclusion, this platform presents a powerful and user-friendly service to support worldwide collaborative efforts in the field of meta-omics research. This platform is freely accessible at https://gcmeta.wdcm.org/.",2019-01-01 +29520288,TBC2target: A Resource of Predicted Target Genes of Tea Bioactive Compounds.,"Tea is one of the most popular non-alcoholic beverages consumed worldwide. Numerous bioactive constituents of tea were confirmed to possess healthy benefits via the mechanisms of regulating gene expressions or protein activities. However, a complete interacting profile between tea bioactive compounds (TBCs) and their target genes is lacking, which put an obstacle in the study of healthy function of tea. To fill this gap, we developed a database of target genes of TBCs (TBC2target, http://camellia.ahau.edu.cn/TBC2target) based on a pharmacophore mapping approach. In TBC2target, 6,226 interactions between 240 TBCs and 673 target genes were documented. TBC2target contains detailed information about each interacting entry, such as TBC, CAS number, PubChem CID, source of compound (e.g., green, black), compound type, target gene(s) of TBC, gene symbol, gene ID, ENSEMBL ID, PDB ID, TBC bioactivity and the reference. Using the TBC-target associations, we constructed a bipartite network and provided users the global network and local sub-network visualization and topological analyses. The entire database is free for online browsing, searching and downloading. In addition, TBC2target provides a BLAST search function to facilitate use of the database. The particular strengths of TBC2target are the inclusion of the comprehensive TBC-target interactions, and the capacity to visualize and analyze the interacting networks, which may help uncovering the beneficial effects of tea on human health as a central resource in tea health community.",2018-02-22 +32186171,[Using the big data of internet to understand the characteristics of coronavirus disease 2019: a big data study].,"Objective: To analyze the symptom characteristics of Coronavirus Disease 2019(COVID-19) and to improve its prevention by using big data. Methods: Using Baidu Index Platform (http://index.baidu.com) and the website of Chinese Center for Disease Control and Prevention as data resources, we obtained the search volume (SV) of keywords for symptoms associated with COVID-19 from January 1 to February 20 in each year from 2017 to 2020, in Hubei province and other top 10 impacted provinces in China and the epidemic data. Data of 2020 were compared with the previous three years. Data of Hubei province were compared with confirmed cases. The differences and characteristics of the SV of COVID-19-related symptoms, and the correlation between the SV of COVID-19 and new confirmed or suspected cases were analyzed and the hysteresis effects were discussed. R3.6.2 software was used to analyze the data. Results: Compared the data from January 1 to February 20, 2020, with the SV for the same period of previous three years, Hubei's SV for cough, fever, diarrhea, chest tightness, dyspnea and other symptoms were significantly increased. The total SV of lower respiratory symptoms was significantly higher than that of upper respiratory symptoms (P<0.001). The SV of COVID-19 in Hubei province was significantly correlated with new confirmed or suspected cases (r(confirmed)=0.723, r(suspected)=0.863, all P<0.001). The results of the distributed lag model suggested that the patients who retrieved relevant symptoms on the internet may begin to see a doctor in 2-3 days later and be diagnosed in 3-4 days later. Conclusions: The total SV of lower respiratory symptoms is higher than that of upper respiratory symptoms, and the SV of diarrhea also increases significantly. It warns us to pay attention to not only the symptoms of lower respiratory tract, but also the gastrointestinal symptoms, especially diarrhea in patients with COVID-19. There is a relationship between internet retrieval behavior and the number of new confirmed or suspected cases. Big data have a certain role in the early warning of infectious diseases.",2020-06-01 +33310018,FunRich enables enrichment analysis of OMICs datasets.,"High-throughput methods to profile the genome, transcriptome, proteome and metabolome of various systems has become a routine in multiple research laboratories around the world. Hence, to analyse and interpret these heterogenous datasets user-friendly bioinformatics tools are needed. Here, we discuss FunRich tool that enables biologists to perform functional enrichment analysis on the generated datasets. Users can perform enrichment analysis with a variety of background databases and have complete control in updating or modifying the content in most of the databases. Specifically, users can download and update the background database from UniProt at any time thereby allowing a robust background database that can support annotations from >18 taxonomies. Users can create customizable Venn diagrams, pie charts, bar graphs and heatmaps of publication quality for their datasets using FunRich (http://www.funrich.org). Overall, FunRich tool is user-friendly and enables users to perform various analysis on their datasets with minimal or no aid from bioinformaticians.",2020-12-11 +33959153,Amino Acid Reduction Can Help to Improve the Identification of Antimicrobial Peptides and Their Functional Activities.,"Antimicrobial peptides (AMPs) are considered as potential substitutes of antibiotics in the field of new anti-infective drug design. There have been several machine learning algorithms and web servers in identifying AMPs and their functional activities. However, there is still room for improvement in prediction algorithms and feature extraction methods. The reduced amino acid (RAA) alphabet effectively solved the problems of simplifying protein complexity and recognizing the structure conservative region. This article goes into details about evaluating the performances of more than 5,000 amino acid reduced descriptors generated from 74 types of amino acid reduced alphabet in the first stage and the second stage to construct an excellent two-stage classifier, Identification of Antimicrobial Peptides by Reduced Amino Acid Cluster (iAMP-RAAC), for identifying AMPs and their functional activities, respectively. The results show that the first stage AMP classifier is able to achieve the accuracy of 97.21 and 97.11% for the training data set and independent test dataset. In the second stage, our classifier still shows good performance. At least three of the four metrics, sensitivity (SN), specificity (SP), accuracy (ACC), and Matthews correlation coefficient (MCC), exceed the calculation results in the literature. Further, the ANOVA with incremental feature selection (IFS) is used for feature selection to further improve prediction performance. The prediction performance is further improved after the feature selection of each stage. At last, a user-friendly web server, iAMP-RAAC, is established at http://bioinfor.imu.edu. cn/iampraac.",2021-04-20 +25990557,DSigDB: drug signatures database for gene set analysis.,"

Unlabelled

We report the creation of Drug Signatures Database (DSigDB), a new gene set resource that relates drugs/compounds and their target genes, for gene set enrichment analysis (GSEA). DSigDB currently holds 22 527 gene sets, consists of 17 389 unique compounds covering 19 531 genes. We also developed an online DSigDB resource that allows users to search, view and download drugs/compounds and gene sets. DSigDB gene sets provide seamless integration to GSEA software for linking gene expressions with drugs/compounds for drug repurposing and translational research.

Availability and implementation

DSigDB is freely available for non-commercial use at http://tanlab.ucdenver.edu/DSigDB.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

aikchoon.tan@ucdenver.edu.",2015-05-19 +31138816,"Catalysis-Hub.org, an open electronic structure database for surface reactions.","We present a new open repository for chemical reactions on catalytic surfaces, available at https://www.catalysis-hub.org . The featured database for surface reactions contains more than 100,000 chemisorption and reaction energies obtained from electronic structure calculations, and is continuously being updated with new datasets. In addition to providing quantum-mechanical results for a broad range of reactions and surfaces from different publications, the database features a systematic, large-scale study of chemical adsorption and hydrogenation on bimetallic alloy surfaces. The database contains reaction specific information, such as the surface composition and reaction energy for each reaction, as well as the surface geometries and calculational parameters, essential for data reproducibility. By providing direct access via the web-interface as well as a Python API, we seek to accelerate the discovery of catalytic materials for sustainable energy applications by enabling researchers to efficiently use the data as a basis for new calculations and model generation.",2019-05-28 +31240416,The REAL (REctal Anastomotic Leak) score for prediction of anastomotic leak after rectal cancer surgery.,"

Background

Anastomotic leak after rectal cancer surgery is a severe complication associated with poorer oncologic outcome and quality of life. Preoperative assessment of the risk for anastomotic leak is a key component of surgical planning, including the opportunity to create a defunctioning stoma.

Objective

The purpose of this study was to identify and quantify the risk factors for anastomotic leak to minimize risk by either not restoring bowel continuity or protecting the anastomosis with a temporary diverting stoma.

Methods

Potentially relevant studies were identified from the following databases: PubMed, Embase and Cochrane Library. This meta-analysis included studies on transabdominal resection for rectal cancer that reported data about anastomotic leak. The risk for anastomotic leak after rectal cancer surgery was investigated. Preoperative, intraoperative, and postoperative factors were extracted and used to compare anastomotic leak rates. All variables demonstrating a p value < 0.1 in the univariate analysis were entered into a multivariate logistic regression model to determine the risk factors for anastomotic leak.

Results

Twenty-six centers provided individual data on 9735 patients. Selected preoperative covariates (time before surgery, age, gender, smoking, previous abdominal surgery, BMI, diabetes, ASA, hemoglobin level, TNM classification stage, anastomotic distance) were used as independent factors in a logistic regression model with anastomotic leak as dependent variable. With a threshold value of the receiver operating characteristics (ROC) curve corresponding to 0.0791 in the training set, the area under the ROC curve (AUC) was 0.585 (p < 0.0001). Sensitivity and specificity of the model's probability > 0.0791 to identify anastomotic leak were 79.1% and 32.9%, respectively. Accuracy of the threshold value was confirmed in the validation set with 77.8% sensitivity and 35.2% specificity.

Conclusions

We trust that, with further refinement using prospective data, this nomogram based on preoperative risk factors may assist surgeons in decision making. The score is now available online ( http://www.real-score.org ).",2019-06-25 +25113421,GlycoMaster DB: software to assist the automated identification of N-linked glycopeptides by tandem mass spectrometry.,"Glycosylation is one of the most commonly observed post-translational modifications (PTMs) in eukaryotes. It is believed that more than 50% eukaryotic proteins are glycosylated. To reveal the biological functions of protein-linked glycans involved in numerous biological processes, the high-throughput identification of both glycoproteins and the attached glycan structures becomes fundamentally important. Tandem mass spectrometry (MS/MS) is an effective method for glycoproteomic analysis because of its high sensitivity and selectivity. Two experimental approaches exist to obtain MS/MS spectral data of glycopeptides. One consists of isolating glycans from glycopeptides and generating MS/MS spectra of the glycans and peptides separately. The other approach produces spectra directly from intact glycopeptides. The latter approach has the advantage of retaining the glycosylation site information. However, the spectral data cannot be readily analyzed because of the lack of software specifically designed for the identification of intact glycopeptides. To address this need, we developed a novel software tool, GlycoMaster DB, to assist the automated and high-throughput identification of intact N-linked glycopeptides from MS/MS spectra. The software simultaneously searches a protein sequence database and a glycan structure database to find the best pair of peptide and glycan for each input spectrum. GlycoMaster DB can analyze mass spectral data produced with HCD/ETD mixed fragmentation, where HCD spectra are used to identify glycans and ETD spectra are used to determine peptide sequences. When only HCD spectra are available, GlycoMaster DB can still help to identify the glycans, and a list of possible peptide sequences are reported according to the accurate precursor mass and the N-linked glycopeptide sequon. GlycoMaster DB is freely accessible at http://www-novo.cs.uwaterloo.ca:8080/GlycoMasterDB .",2014-08-25 +31557052,Generating the Blood Exposome Database Using a Comprehensive Text Mining and Database Fusion Approach.,"

Background

Blood chemicals are routinely measured in clinical or preclinical research studies to diagnose diseases, assess risks in epidemiological research, or use metabolomic phenotyping in response to treatments. A vast volume of blood-related literature is available via the PubMed database for data mining.

Objectives

We aimed to generate a comprehensive blood exposome database of endogenous and exogenous chemicals associated with the mammalian circulating system through text mining and database fusion.

Methods

Using NCBI resources, we retrieved PubMed abstracts, PubChem chemical synonyms, and PMC supplementary tables. We then employed text mining and PubChem crowdsourcing to associate phrases relating to blood with PubChem chemicals. False positives were removed by a phrase pattern and a compound exclusion list.

Results

A query to identify blood-related publications in the PubMed database yielded 1.1 million papers. Matching a total of 15 million synonyms from 6.5 million relevant PubChem chemicals against all blood-related publications yielded 37,514 chemicals and 851,999 publications records. Mapping PubChem compound identifiers to the PubMed database yielded 49,940 unique chemicals linked to 676,643 papers. Analysis of open-access metabolomics papers related to blood phrases in the PMC database yielded 4,039 unique compounds and 204 papers. Consolidating these three approaches summed up to a total of 41,474 achiral structures that were linked to 65,957 PubChem CIDs and to over 878,966 PubMed articles. We mapped these compounds to 50 databases such as those covering metabolites and pathways, governmental and toxicological databases, pharmacology resources, and bioassay repositories. In comparison, HMDB, the Human Metabolome Database, links 1,075 compounds to blood-related primary publications.

Conclusion

This new Blood Exposome Database can be used for prioritizing chemicals for systematic reviews, developing target assays in exposome research, identifying compounds in untargeted mass spectrometry, and biological interpretation in metabolomics data. The database is available at http://bloodexposome.org. https://doi.org/10.1289/EHP4713.",2019-09-26 +30864317,PLATYPUS: A Multiple-View Learning Predictive Framework for Cancer Drug Sensitivity Prediction.,"Cancer is a complex collection of diseases that are to some degree unique to each patient. Precision oncology aims to identify the best drug treatment regime using molecular data on tumor samples. While omics-level data is becoming more widely available for tumor specimens, the datasets upon which computational learning methods can be trained vary in coverage from sample to sample and from data type to data type. Methods that can 'connect the dots' to leverage more of the information provided by these studies could offer major advantages for maximizing predictive potential. We introduce a multi-view machinelearning strategy called PLATYPUS that builds 'views' from multiple data sources that are all used as features for predicting patient outcomes. We show that a learning strategy that finds agreement across the views on unlabeled data increases the performance of the learning methods over any single view. We illustrate the power of the approach by deriving signatures for drug sensitivity in a large cancer cell line database. Code and additional information are available from the PLATYPUS website https://sysbiowiki.soe.ucsc.edu/platypus.",2019-01-01 +30953757,A computational system for identifying operons based on RNA-seq data.,"An operon is a set of neighboring genes in a genome that is transcribed as a single polycistronic message. Genes that are part of the same operon often have related functional roles or participate in the same metabolic pathways. The majority of all bacterial genes are co-transcribed with one or more other genes as part of a multi-gene operon. Thus, accurate identification of operons is important in understanding co-regulation of genes and their functional relationships. Here, we present a computational system that uses RNA-seq data to determine operons throughout a genome. The system takes the name of a genome and one or more files of RNA-seq data as input. Our method combines primary genomic sequence information with expression data from the RNA-seq files in a unified probabilistic model in order to identify operons. We assess our method's ability to accurately identify operons in a range of species through comparison to external databases of operons, both experimentally confirmed and computationally predicted, and through focused experiments that confirm new operons identified by our method. Our system is freely available at https://cs.wellesley.edu/~btjaden/Rockhopper/.",2019-04-04 +31809863,PsyMuKB: An Integrative De Novo Variant Knowledge Base for Developmental Disorders.,"De novo variants (DNVs) are one of the most significant contributors to severe early-onset genetic disorders such as autism spectrum disorder, intellectual disability, and other developmental and neuropsychiatric (DNP) disorders. Presently, a plethora of DNVs have been identified using next-generation sequencing, and many efforts have been made to understand their impact at the gene level. However, there has been little exploration of the effects at the isoform level. The brain contains a high level of alternative splicing and regulation, and exhibits a more divergent splicing program than other tissues. Therefore, it is crucial to explore variants at the transcriptional regulation level to better interpret the mechanisms underlying DNP disorders. To facilitate a better usage and improve the isoform-level interpretation of variants, we developed NeuroPsychiatric Mutation Knowledge Base (PsyMuKB). It contains a comprehensive, carefully curated list of DNVs with transcriptional and translational annotations to enable identification of isoform-specific mutations. PsyMuKB allows a flexible search of genes or variants and provides both table-based descriptions and associated visualizations, such as expression, transcript genomic structures, protein interactions, and the mutation sites mapped on the protein structures. It also provides an easy-to-use web interface, allowing users to rapidly visualize the locations and characteristics of mutations and the expression patterns of the impacted genes and isoforms. PsyMuKB thus constitutes a valuable resource for identifying tissue-specific DNVs for further functional studies of related disorders. PsyMuKB is freely accessible at http://psymukb.net.",2019-08-01 +32514178,Analysis of task-based functional MRI data preprocessed with fMRIPrep.,"Functional magnetic resonance imaging (fMRI) is a standard tool to investigate the neural correlates of cognition. fMRI noninvasively measures brain activity, allowing identification of patterns evoked by tasks performed during scanning. Despite the long history of this technique, the idiosyncrasies of each dataset have led to the use of ad-hoc preprocessing protocols customized for nearly every different study. This approach is time consuming, error prone and unsuitable for combining datasets from many sources. Here we showcase fMRIPrep (http://fmriprep.org), a robust tool to prepare human fMRI data for statistical analysis. This software instrument addresses the reproducibility concerns of the established protocols for fMRI preprocessing. By leveraging the Brain Imaging Data Structure to standardize both the input datasets (MRI data as stored by the scanner) and the outputs (data ready for modeling and analysis), fMRIPrep is capable of preprocessing a diversity of datasets without manual intervention. In support of the growing popularity of fMRIPrep, this protocol describes how to integrate the tool in a task-based fMRI investigation workflow.",2020-06-08 +33522297,"""I Have Nothing Else to Give"": A Qualitative Exploration of Emergency Medicine Residents' Perceptions of Burnout.","

Phenomenon:

Resident physicians experience high degrees of burnout. Medical educators are tasked with implementing burnout interventions, however they possess an incomplete understanding of residents' lived experiences with this phenomenon. Attempts to understand burnout using quantitative methods may insufficiently capture the complexities of resident burnout and limit our ability to implement meaningful specialty-specific interventions. Qualitative studies examining how residents conceptualize burnout have been briefly examined in other specialties, however the specific stressors that characterize emergency medicine training may lead residents to experience burnout differently. This study used qualitative methodology to explore emergency medicine trainees' perceptions of the complex phenomenon of burnout during their residency training years. Approach: In order to evaluate a novel wellness intervention at their emergency medicine residency program, the authors conducted four semi-structured focus groups with residents and recent alumni from May 2018 to August 2018. After the focus groups concluded, the authors noted that they lacked an insightful understanding of their residents' own experiences with physician burnout. Thus, they performed a secondary analysis of data initially gathered for the curricular evaluation. They followed a reflexive thematic analysis approach, analyzing all focus group transcripts in an iterative manner, discussing and refining codes, and developing thematic categories. Findings: Residents described individual-level manifestations of burnout in their day-to-day lives, a calloused view of patient suffering in the clinical environment, and a fatalistic view toward burnout during their training. They experienced a pervasive negativity, emotional fragility, and neglect of self that bled into their social environments. Clinically, burnout contributed to the erosion of the therapeutic physician-patient relationship. Residents perceived burnout as an inevitable and necessary element of their residency training years. Insights: Residents' lived experiences with burnout include nonclinical manifestations that challenge existing frameworks suggesting that burnout is restricted to the work domain. Burnout interventions in emergency medicine training programs may be more effective if educators inculcate habitual practices of self-monitoring in trainees and explicitly set resident expectations of patient acuity in the clinical environment.Supplemental data for this article is available online at https://doi.org/10.1080/10401334.2021.1875833.",2021-01-30 +32817122,"""Mind the Gap"": Hi-C Technology Boosts Contiguity of the Globe Artichoke Genome in Low-Recombination Regions.","Globe artichoke (Cynara cardunculus var. scolymus; 2n2x=34) is cropped largely in the Mediterranean region, being Italy the leading world producer; however, over time, its cultivation has spread to the Americas and China. In 2016, we released the first (v1.0) globe artichoke genome sequence (http://www.artichokegenome.unito.it/). Its assembly was generated using ∼133-fold Illumina sequencing data, covering 725 of the 1,084 Mb genome, of which 526 Mb (73%) were anchored to 17 chromosomal pseudomolecules. Based on v1.0 sequencing data, we generated a new genome assembly (v2.0), obtained from a Hi-C (Dovetail) genomic library, and which improves the scaffold N50 from 126 kb to 44.8 Mb (∼356-fold increase) and N90 from 29 kb to 17.8 Mb (∼685-fold increase). While the L90 of the v1.0 sequence included 6,123 scaffolds, the new v2.0 just 15 super-scaffolds, a number close to the haploid chromosome number of the species. The newly generated super-scaffolds were assigned to pseudomolecules using reciprocal blast procedures. The cumulative size of unplaced scaffolds in v2.0 was reduced of 165 Mb, increasing to 94% the anchored genome sequence. The marked improvement is mainly attributable to the ability of the proximity ligation-based approach to deal with both heterochromatic (e.g.: peri-centromeric) and euchromatic regions during the assembly procedure, which allowed to physically locate low recombination regions. The new high-quality reference genome enhances the taxonomic breadth of the data available for comparative plant genomics and led to a new accurate gene prediction (28,632 genes), thus promoting the map-based cloning of economically important genes.",2020-10-05 +33202375,Crop breeding - From experience-based selection to precision design.,"Crops are the foundation of human society, not only by providing needed nutrition, but also by feeding livestock and serving as raw materials for industry. Cereal crops, which supply most of our calories, have been supporting humans for thousands of years. However food security is facing many challenges nowadays, including growing populations, water shortage, and increased incidence of biotic and abiotic stresses. According to statistical data from the Food and Agriculture Organization of the United Nations (FAO, http://www.fao.org/), the people suffering severe food insecurity increased from 7.9 % in 2015 to 9.7 % in 2019 and the number of people exposed to moderate or severe food insecurity have increased by 400 million over the same time period. Although there are many ways to cope with these challenges, crop breeding remains the most crucial and direct manner. With the development of molecular genetics, the speed of cloning genetic variations underlying corresponding phenotypes of agricultural importance is considerably more rapid. As a consequence breeding methods have evolved from phenotype-based to genome-based selection. In the future, knowledge-driven crop design, which integrates multi-omics data to reveal the connections between genotypes and phenotypes and to build selection models, will undoubtedly become the most efficient way to shape plants, to improve crops, and to ensure food security.",2020-11-02 +33589834,Single-cell transcriptomic analysis of the adult mouse spinal cord reveals molecular diversity of autonomic and skeletal motor neurons.,"The spinal cord is a fascinating structure that is responsible for coordinating movement in vertebrates. Spinal motor neurons control muscle activity by transmitting signals from the spinal cord to diverse peripheral targets. In this study, we profiled 43,890 single-nucleus transcriptomes from the adult mouse spinal cord using fluorescence-activated nuclei sorting to enrich for motor neuron nuclei. We identified 16 sympathetic motor neuron clusters, which are distinguishable by spatial localization and expression of neuromodulatory signaling genes. We found surprising skeletal motor neuron heterogeneity in the adult spinal cord, including transcriptional differences that correlate with electrophysiologically and spatially distinct motor pools. We also provide evidence for a novel transcriptional subpopulation of skeletal motor neuron (γ*). Collectively, these data provide a single-cell transcriptional atlas ( http://spinalcordatlas.org ) for investigating the organizing molecular logic of adult motor neuron diversity, as well as the cellular and molecular basis of motor neuron function in health and disease.",2021-02-15 +32786543,GLORYx: Prediction of the Metabolites Resulting from Phase 1 and Phase 2 Biotransformations of Xenobiotics.,"Predicting the structures of metabolites formed in humans can provide advantageous insights for the development of drugs and other compounds. Here we present GLORYx, which integrates machine learning-based site of metabolism (SoM) prediction with reaction rule sets to predict and rank the structures of metabolites that could potentially be formed by phase 1 and/or phase 2 metabolism. GLORYx extends the approach from our previously developed tool GLORY, which predicted metabolite structures for cytochrome P450-mediated metabolism only. A robust approach to ranking the predicted metabolites is attained by using the SoM probabilities predicted by the FAME 3 machine learning models to score the predicted metabolites. On a manually curated test data set containing both phase 1 and phase 2 metabolites, GLORYx achieves a recall of 77% and an area under the receiver operating characteristic curve (AUC) of 0.79. Separate analysis of performance on a large amount of freely available phase 1 and phase 2 metabolite data indicates that achieving a meaningful ranking of predicted metabolites is more difficult for phase 2 than for phase 1 metabolites. GLORYx is freely available as a web server at https://nerdd.zbh.uni-hamburg.de/ and is also provided as a software package upon request. The data sets as well as all the reaction rules from this work are also made freely available.",2020-08-26 +31161212,RNA-align: quick and accurate alignment of RNA 3D structures based on size-independent TM-scoreRNA.,"

Motivation

Comparison of RNA 3D structures can be used to infer functional relationship of RNA molecules. Most of the current RNA structure alignment programs are built on size-dependent scales, which complicate the interpretation of structure and functional relations. Meanwhile, the low speed prevents the programs from being applied to large-scale RNA structural database search.

Results

We developed an open-source algorithm, RNA-align, for RNA 3D structure alignment which has the structure similarity scaled by a size-independent and statistically interpretable scoring metric. Large-scale benchmark tests show that RNA-align significantly outperforms other state-of-the-art programs in both alignment accuracy and running speed. The major advantage of RNA-align lies at the quick convergence of the heuristic alignment iterations and the coarse-grained secondary structure assignment, both of which are crucial to the speed and accuracy of RNA structure alignments.

Availability and implementation

https://zhanglab.ccmb.med.umich.edu/RNA-align/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +32861089,A three-dimensional parametric adult head model with representation of scalp shape variability under hair.,"Modeling the shape of the scalp and face is essential for the design of protective helmets and other head-borne equipment. However, head anthropometry studies using optical scanning rarely capture scalp shape because of hair interference. Data on scalp shape is available from bald men, but female data are generally not available. To address this issue, scalp shape was digitized in an ethnically diverse sample of 100 adult women, age 18-59, under a protocol that included whole head surface scanning and scalp measurement using a three-dimensional (3D) coordinate digitizer. A combined male and female sample was created by adding 3D surface scans of a similarly diverse sample of 80 bald men. A statistical head shape model was created by standardizing the head scan data. A total of 58 anatomical head landmarks and 12 head dimensions were obtained from each scan and processed along with the scans. A parametric model accounting for the variability of the head shape under the hair as a function of selected head dimensions was developed. The full-variable model has a mean shape error of 3.8 mm; the 95th percentile error was 7.4 mm, which were measured at the vertices. The model will be particularly useful for generating a series of representing a target population as well as for generating subject-specific head shapes along with predicted landmarks and dimensions. The model is publicly available online at http://humanshape.org/head/.",2020-08-26 +31648087,AdditiveChem: A comprehensive bioinformatics knowledge-base for food additive chemicals.,"Food additives are considered to be the catalysts and headstones of the modern food industry, affecting every step of food production, processing, and storage. The urgent need for a comprehensive curation of food additives, including their molecular structures, biological activities, and precise toxicological evaluations, prompted the creation of the AdditiveChem database (http://www.rxnfinder.org/additivechem/). This database has curated >9064 types of food additives, along with their molecular structure, chemical and physical properties, absorption, distribution, metabolism, excretion and toxicity properties, biosynthesis and biodegradation methods, usage specifications, toxicological and risk assessment data, and targets in the human body from 16 databases to construct an efficient search platform for in silico preliminary evaluations. AdditiveChem database will enable an exploration of the relationship between the structure and function of food additives.",2019-09-11 +33882348,WIKIBrainStem: An online atlas to manually segment the human brainstem at the mesoscopic scale from ultrahigh field MRI.,"The brainstem is one of the most densely packed areas of the central nervous system in terms of gray, but also white, matter structures and, therefore, is a highly functional hub. It has mainly been studied by the means of histological techniques, which requires several hundreds of slices with a loss of the 3D coherence of the whole specimen. Access to the inner structure of the brainstem is possible using Magnetic Resonance Imaging (MRI), but this method has a limited spatial resolution and contrast in vivo. Here, we scanned an ex vivo specimen using an ultra-high field (11.7T) preclinical MRI scanner providing data at a mesoscopic scale for anatomical T2-weighted (100 µm and 185 µm isotropic) and diffusion-weighted imaging (300 µm isotropic). We then proposed a hierarchical segmentation of the inner gray matter of the brainstem and defined a set of rules for each segmented anatomical class. These rules were gathered in a freely accessible web-based application, WIKIBrainStem (https://fibratlas.univ-tours.fr/brainstems/index.html), for 99 structures, from which 13 were subdivided into 29 substructures. This segmentation is, to date, the most detailed one developed from ex vivo MRI of the brainstem. This should be regarded as a tool that will be complemented by future results of alternative methods, such as Optical Coherence Tomography, Polarized Light Imaging or histology… This is a mandatory step prior to segmenting multiple specimens, which will be used to create a probabilistic automated segmentation method of ex vivo, but also in vivo, brainstem and may be used for targeting anatomical structures of interest in managing some degenerative or psychiatric disorders.",2021-04-18 +31927521,Presentation of 14 alkaptonuria patients from Turkey.,"Background Alkaptonuria (OMIM: 203500) is an inborn error of metabolism due to homogentisate 1,2-dioxygenase homogentisic acid 1,2 dioxygenase (HGD) enzyme deficiency. Due to the enzyme deficiency, homogentisic acid cannot be converted to maleylacetoacetate and it accumulates in body fluids. Increased homogentisic acid is converted to benzoquinones, the resulting benzoquinones are converted to melanin-like pigments, and these pigments are deposited in collagen - this process is called ochronosis. In patients with alkaptonuria, the urine is darkened, which is misinterpreted as hematuria, the incidences of renal stones, arthritis and cardiac valve calcification are increased, and spontaneous tendon ruptures, prostatitis and prostate stones can be encountered. The present study aimed to evaluate the HGD gene mutations in 14 patients with alkaptonuria. Methods Fourteen patients diagnosed with alkaptonuria and followed up from 1990 to 2014 were retrospectively evaluated. Their demographic, clinical and treatment-related data were retrieved from hospital files. For mutation analysis, genomic DNAs of the patients were isolated from their peripheral blood samples. Variations in the HGD gene were scanned on the HGD-mutation database (http://hgddatabase.cvtisr.sk). Results Among 14 patients, the female/male ratio was 1/1 and the median age was 9 years (range, 6-59 years). All patients were symptomatic at their first visit and the most common symptom was dark urine (71%) followed by arthralgia. Independent of the urinary homogentisic acid concentrations, patients with the presenting symptom of arthralgia were elder. Nine different mutations including p.Ser59AlafsX52, p.Gly161Arg, p.Asn219Ser, p.Gly251Asp, p.Pro274Leu, p.Arg330Ser, p.Gly372Ala, c.656_657insAATCAA and a novel mutation of p.Val316Ile were detected. All of the pediatric-age patients (n = 13) were treated with ascorbic acid at a dose of 250-1000 mg/day. Conclusions Nine different HGD gene mutations with a novel one, p.Val316Ile, were detected. The most common mutation was p.Ser59AlafsX52 for the HGD gene followed by p.Gly161Arg and p.asn219Ser, which can be considered specific to the Turkish population.",2020-02-01 +34042467,Component Parts of Bacteriophage Virions Accurately Defined by a Machine-Learning Approach Built on Evolutionary Features.,"Antimicrobial resistance (AMR) continues to evolve as a major threat to human health, and new strategies are required for the treatment of AMR infections. Bacteriophages (phages) that kill bacterial pathogens are being identified for use in phage therapies, with the intention to apply these bactericidal viruses directly into the infection sites in bespoke phage cocktails. Despite the great unsampled phage diversity for this purpose, an issue hampering the roll out of phage therapy is the poor quality annotation of many of the phage genomes, particularly for those from infrequently sampled environmental sources. We developed a computational tool called STEP3 to use the ""evolutionary features"" that can be recognized in genome sequences of diverse phages. These features, when integrated into an ensemble framework, achieved a stable and robust prediction performance when benchmarked against other prediction tools using phages from diverse sources. Validation of the prediction accuracy of STEP3 was conducted with high-resolution mass spectrometry analysis of two novel phages, isolated from a watercourse in the Southern Hemisphere. STEP3 provides a robust computational approach to distinguish specific and universal features in phages to improve the quality of phage cocktails and is available for use at http://step3.erc.monash.edu/. IMPORTANCE In response to the global problem of antimicrobial resistance, there are moves to use bacteriophages (phages) as therapeutic agents. Selecting which phages will be effective therapeutics relies on interpreting features contributing to shelf-life and applicability to diagnosed infections. However, the protein components of the phage virions that dictate these properties vary so much in sequence that best estimates suggest failure to recognize up to 90% of them. We have utilized this diversity in evolutionary features as an advantage, to apply machine learning for prediction accuracy for diverse components in phage virions. We benchmark this new tool showing the accurate recognition and evaluation of phage component parts using genome sequence data of phages from undersampled environments, where the richest diversity of phage still lies.",2021-05-27 +31160582,"CFTI5Med, the new release of the catalogue of strong earthquakes in Italy and in the Mediterranean area.","A key element for assessing seismic hazard and risk is the availability of a comprehensive dataset on past earthquakes. Here we present the rationale, structure and contents of CFTI5Med ( https://doi.org/10.6092/ingv.it-cfti5 ), the 2018 version of the Catalogue of Strong Earthquakes in Italy: a large multidisciplinary effort including historians, seismologists and geologists. It was conceived in 1989, following the inception of GIS technology, and first published in 1995 to offer a full account of Italy's strongest earthquakes, of their territorial impact and associated social and economic upheaval. Subsequent versions (1997, 2000, 2007) entailed a fine tuning of research methodologies, included additional research on Italian earthquakes, and were extended to large earthquakes of the Mediterranean area. CFTI5Med comprised an opportunity to streamline the structure of the Catalogue database and propose a renovated user interface. The new front-end (1) grants an easier, intuitive access to the data, including earthquake effects on the environment, and (2) allows all data to be displayed jointly with relevant topographic, geological and seismological overlays published as web services.",2019-06-03 +29040670,Virus taxonomy: the database of the International Committee on Taxonomy of Viruses (ICTV).,"The International Committee on Taxonomy of Viruses (ICTV) is charged with the task of developing, refining, and maintaining a universal virus taxonomy. This task encompasses the classification of virus species and higher-level taxa according to the genetic and biological properties of their members; naming virus taxa; maintaining a database detailing the currently approved taxonomy; and providing the database, supporting proposals, and other virus-related information from an open-access, public web site. The ICTV web site (http://ictv.global) provides access to the current taxonomy database in online and downloadable formats, and maintains a complete history of virus taxa back to the first release in 1971. The ICTV has also published the ICTV Report on Virus Taxonomy starting in 1971. This Report provides a comprehensive description of all virus taxa covering virus structure, genome structure, biology and phylogenetics. The ninth ICTV report, published in 2012, is available as an open-access online publication from the ICTV web site. The current, 10th report (http://ictv.global/report/), is being published online, and is replacing the previous hard-copy edition with a completely open access, continuously updated publication. No other database or resource exists that provides such a comprehensive, fully annotated compendium of information on virus taxa and taxonomy.",2018-01-01 +34194512,Mortality and Excess Mortality: Improving FluMOMO.,"FluMOMO is a universal formula to forecast mortality in 27 European countries and was developed on EuroMOMO context, http://www.euromomo.eu. The model has a trigonometric baseline and considers any upwards deviation from that to come from flu or extreme temperatures. To measure it, the model considers two variables: influenza activity and extreme temperatures. With the former, the model gives the number of deaths because of flu and with the latter the number of deaths because of extreme temperatures. In this article, we show that FluMOMO lacks important variables to be an accurate measure of all-cause mortality and flu mortality. Indeed, we found, as expected, that population ageing and exposure to the risk of death cannot be excluded from the linear predictor. We model weekly deaths as an autoregressive process (lag of one together with a lead of one week). This step allowed us to avoid FluMOMO trigonometric baseline and have a fit to weekly deaths through demographic variables. Our model uses data from Portugal between 2009 and 2020, on ISO-week basis. We use negative binomial-generalized linear models to estimate the weekly number of deaths as an alternative to traditional overdispersion Poisson. As explanatory variables were found to be statistically significant, we registered the number of deaths from the previous week, the influenza activity index, the population average age, the heat waves, the flu season, the number of deaths with COVID-19, and the population exposed to the risk of dying. Considering as excess mortality the number of deaths above the best estimate of deaths from our model, we conclude that excess mortality in 2020 (net of COVID-19 deaths, heat wave of July, and ageing) is low or inexistent. The model also allows us to have the number of deaths arising from flu and we conclude that FluMOMO is overestimating deaths from flu by 78%. Averages from the probability of dying are obtained as well as the probability of dying from flu. The latter is shown to be decreasing over time, probably due to the increase of flu vaccination. Higher mortality detected with the start of COVID-19, in March-April 2020, was probably due to COVID-19 deaths not recognized as COVID-19 deaths.",2021-06-07 +33287903,KVarPredDB: a database for predicting pathogenicity of missense sequence variants of keratin genes associated with genodermatoses.,"

Background

Germline variants of ten keratin genes (K1, K2, K5, K6A, K6B, K9, K10, K14, K16, and K17) have been reported for causing different types of genodermatoses with an autosomal dominant mode of inheritance. Among all the variants of these ten keratin genes, most of them are missense variants. Unlike pathogenic and likely pathogenic variants, understanding the clinical importance of novel missense variants or variants of uncertain significance (VUS) is the biggest challenge for clinicians or medical geneticists. Functional characterization is the only way to understand the clinical association of novel missense variants or VUS but it is time consuming, costly, and depends on the availability of patient's samples. Existing databases report the pathogenic variants of the keratin genes, but never emphasize the systematic effects of these variants on keratin protein structure and genotype-phenotype correlation.

Results

To address this need, we developed a comprehensive database KVarPredDB, which contains information of all ten keratin genes associated with genodermatoses. We integrated and curated 400 reported pathogenic missense variants as well as 4629 missense VUS. KVarPredDB predicts the pathogenicity of novel missense variants as well as to understand the severity of disease phenotype, based on four criteria; firstly, the difference in physico-chemical properties between the wild type and substituted amino acids; secondly, the loss of inter/intra-chain interactions; thirdly, evolutionary conservation of the wild type amino acids and lastly, the effect of the substituted amino acids in the heptad repeat. Molecular docking simulations based on resolved crystal structures were adopted to predict stability changes and get the binding energy to compare the wild type protein with the mutated one. We use this basic information to determine the structural and functional impact of novel missense variants on the keratin coiled-coil heterodimer. KVarPredDB was built under the integrative web application development framework SSM (SpringBoot, Spring MVC, MyBatis) and implemented in Java, Bootstrap, React-mutation-mapper, MySQL, Tomcat. The website can be accessed through http://bioinfo.zju.edu.cn/KVarPredDB . The genomic variants and analysis results are freely available under the Creative Commons license.

Conclusions

KVarPredDB provides an intuitive and user-friendly interface with computational analytical investigation for each missense variant of the keratin genes associated with genodermatoses.",2020-12-07 +33107905,A representation model for biological entities by fusing structured axioms with unstructured texts.,"

Motivation

Structured semantic resources, for example, biological knowledge bases and ontologies, formally define biological concepts, entities and their semantic relationships, manifested as structured axioms and unstructured texts (e.g. textual definitions). The resources contain accurate expressions of biological reality and have been used by machine-learning models to assist intelligent applications like knowledge discovery. The current methods use both the axioms and definitions as plain texts in representation learning (RL). However, since the axioms are machine-readable while the natural language is human-understandable, difference in meaning of token and structure impedes the representations to encode desirable biological knowledge.

Results

We propose ERBK, a RL model of bio-entities. Instead of using the axioms and definitions as a textual corpus, our method uses knowledge graph embedding method and deep convolutional neural models to encode the axioms and definitions respectively. The representations could not only encode more underlying biological knowledge but also be further applied to zero-shot circumstance where existing approaches fall short. Experimental evaluations show that ERBK outperforms the existing methods for predicting protein-protein interactions and gene-disease associations. Moreover, it shows that ERBK still maintains promising performance under the zero-shot circumstance. We believe the representations and the method have certain generality and could extend to other types of bio-relation.

Availability and implementation

The source code is available at the gitlab repository https://gitlab.com/BioAI/erbk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +28187703,iHMS: a database integrating human histone modification data across developmental stages and tissues.,"

Background

Differences in chromatin states are critical to the multiplicity of cell states. Recently genome-wide histone modification maps of diverse human developmental stages and tissues have been charted.

Description

To facilitate the investigation of epigenetic dynamics and regulatory mechanisms in cellular differentiation processes, we developed iHMS, an integrated human histone modification database that incorporates massive histone modification maps spanning different developmental stages, lineages and tissues ( http://www.tongjidmb.com/human/index.html ). It also includes genome-wide expression data of different conditions, reference gene annotations, GC content and CpG island information. By providing an intuitive and user-friendly query interface, iHMS enables comprehensive query and comparative analysis based on gene names, genomic region locations, histone modification marks and cell types. Moreover, it offers an efficient browser that allows users to visualize and compare multiple genome-wide histone modification maps and related expression profiles across different developmental stages and tissues.

Conclusion

iHMS is of great helpfulness to understand how global histone modification state transitions impact cellular phenotypes across different developmental stages and tissues in the human genome. This extensive catalog of histone modification states thus presents an important resource for epigenetic and developmental studies.",2017-02-11 +30395284,Haemopedia RNA-seq: a database of gene expression during haematopoiesis in mice and humans.,"During haematopoiesis, haematopoietic stem cells differentiate into restricted potential progenitors before maturing into the many lineages required for oxygen transport, wound healing and immune response. We have updated Haemopedia, a database of gene-expression profiles from a broad spectrum of haematopoietic cells, to include RNA-seq gene-expression data from both mice and humans. The Haemopedia RNA-seq data set covers a wide range of lineages and progenitors, with 57 mouse blood cell types (flow sorted populations from healthy mice) and 12 human blood cell types. This data set has been made accessible for exploration and analysis, to researchers and clinicians with limited bioinformatics experience, on our online portal Haemosphere: https://www.haemosphere.org. Haemosphere also includes nine other publicly available high-quality data sets relevant to haematopoiesis. We have added the ability to compare gene expression across data sets and species by curating data sets with shared lineage designations or to view expression gene vs gene, with all plots available for download by the user.",2019-01-01 +33151143,The National Center for Health Statistics' 2015 and 2016 Research and Development Surveys.,"Objective: This report provides a general description of the background and operation of the first two rounds of the Research and Development Survey (RANDS), a series of cross-sectional surveys from probability-sampled commercial survey panels. The Division of Research and Methodology of the National Center for Health Statistics (NCHS) conducted the first two rounds of RANDS in 2015 and 2016. RANDS 1 and 2 are being used primarily for question design evaluation and for investigating statistical methodologies for estimation. +Methods: NCHS contracted with Gallup, Inc. to conduct RANDS 1 in Fall 2015 and RANDS 2 in Spring 2016. RANDS 1 and 2 were conducted using a web survey mode and included survey questions from the National Health Interview Survey (NHIS) that were specifically chosen to provide comparison and evaluation of the survey methodology properties of web surveys and traditional household surveys. In this report, some demographic and health estimates are provided from both sources to describe the RANDS data. +Results: In RANDS 1, 2,304 out of the original 9,809 invited panel members completed the survey, for a completion rate of 23.5%. In RANDS 2, 2,480 of the initial 8,231 invited respondents completed the survey, for a completion rate of 30.1%. RANDS 1 and 2 participants were similar to the quarterly NHIS participants with respect to sex, census region, and whether they had worked for pay in the previous week. Other characteristics varied, including age, race and ethnicity, and income. Most health estimates differed between RANDS and NHIS. Public-use versions of the RANDS data can be found at: https://www.cdc.gov/nchs/rands. +Conclusion: RANDS is an ongoing platform for research to understand the properties of probability-sampled recruited panels of primarily web users, investigating and developing statistical methods for using such data in conjunction with large nationally representative health surveys, and for extending question-design evaluations.",2020-10-01 +34113636,Development and Validation of a Predictive Model for Severe COVID-19: A Case-Control Study in China.,"Background: Predicting the risk of progression to severe coronavirus disease 2019 (COVID-19) could facilitate personalized diagnosis and treatment options, thus optimizing the use of medical resources. Methods: In this prospective study, 206 patients with COVID-19 were enrolled from regional medical institutions between December 20, 2019, and April 10, 2020. We collated a range of data to derive and validate a predictive model for COVID-19 progression, including demographics, clinical characteristics, laboratory findings, and cytokine levels. Variation analysis, along with the least absolute shrinkage and selection operator (LASSO) and Boruta algorithms, was used for modeling. The performance of the derived models was evaluated by specificity, sensitivity, area under the receiver operating characteristic (ROC) curve (AUC), Akaike information criterion (AIC), calibration plots, decision curve analysis (DCA), and Hosmer-Lemeshow test. Results: We used the LASSO algorithm and logistic regression to develop a model that can accurately predict the risk of progression to severe COVID-19. The model incorporated alanine aminotransferase (ALT), interleukin (IL)-6, expectoration, fatigue, lymphocyte ratio (LYMR), aspartate transaminase (AST), and creatinine (CREA). The model yielded a satisfactory predictive performance with an AUC of 0.9104 and 0.8792 in the derivation and validation cohorts, respectively. The final model was then used to create a nomogram that was packaged into an open-source and predictive calculator for clinical use. The model is freely available online at https://severeconid-19predction.shinyapps.io/SHINY/. Conclusion: In this study, we developed an open-source and free predictive calculator for COVID-19 progression based on ALT, IL-6, expectoration, fatigue, LYMR, AST, and CREA. The validated model can effectively predict progression to severe COVID-19, thus providing an efficient option for early and personalized management and the allocation of appropriate medical resources.",2021-05-25 +31467713,Integrated Analysis of Oncogenic Networks in Colorectal Cancer Identifies GUCA2A as a Molecular Marker.,"Colorectal cancer (CRC) is one of the most common and deadly malignancies in the world. In China, the morbidity rate of CRC has increased during the period 2000 to 2011. Biomarker detection for early CRC diagnosis can effectively reduce the mortality of patients with CRC. To explore the underlying mechanisms of effective biomarkers and identify more of them, we performed weighted correlation network analysis (WGCNA) on a GSE68468 dataset generated from 378 CRC tissue samples. We screened the gene set (module), which was significantly associated with CRC histology, and analyzed the hub genes. The key genes were identified by obtaining six colorectal raw data (i.e., GSE25070, GSE44076, GSE44861, GSE21510, GSE9348, and GSE21815) from the GEO database (https://www.ncbi.nlm.nih.gov/geo). The robust differentially expressed genes (DEGs) in all six datasets were calculated and obtained using the library ""RobustRankAggreg"" package in R 3.5.1. An integrated analysis of CRC based on the top 50 downregulated DEGs and hub genes in the red module from WGCNA was conducted, and the intersecting genes were screened. The Kaplan-Meier plot was further analyzed, and the genes associated with CRC prognosis based on patients from the TCGA database were determined. Finally, we validated the candidate gene in our clinical CRC specimens. We postulated that the candidate genes screened from the database and verified by our clinical pathological data may contribute to understanding the molecular mechanisms of tumorigenesis and may serve as potential biomarkers for CRC diagnosis and treatment.",2019-07-28 +31606900,VarSite: Disease variants and protein structure.,"VarSite is a web server mapping known disease-associated variants from UniProt and ClinVar, together with natural variants from gnomAD, onto protein 3D structures in the Protein Data Bank. The analyses are primarily image-based and provide both an overview for each human protein, as well as a report for any specific variant of interest. The information can be useful in assessing whether a given variant might be pathogenic or benign. The structural annotations for each position in the protein include protein secondary structure, interactions with ligand, metal, DNA/RNA, or other protein, and various measures of a given variant's possible impact on the protein's function. The 3D locations of the disease-associated variants can be viewed interactively via the 3dmol.js JavaScript viewer, as well as in RasMol and PyMOL. Users can search for specific variants, or sets of variants, by providing the DNA coordinates of the base change(s) of interest. Additionally, various agglomerative analyses are given, such as the mapping of disease and natural variants onto specific Pfam or CATH domains. The server is freely accessible to all at: https://www.ebi.ac.uk/thornton-srv/databases/VarSite.",2019-10-27 +33975522,"Cannabis Use, Screen Time, and Internalizing Symptoms among Canadian Youth: Testing Mediation Pathways.","Background: Existing research suggests positive correlations between screen time sedentary behaviors (STSB) and substance use, including cannabis use, among youth. However, little research has examined what factors mediate these relationships.Methods: This study examined mediating pathways among STSB, internalizing symptoms (IS), and cannabis use in a linked longitudinal sample of 28 269 Canadian youth who participated in the COMPASS study over a two-year period (2017/18 to 2018/19). Structural equation modeling examined two main hypotheses cross-sectionally and over time: 1) if IS mediated associations between STSB and cannabis use frequency, and 2) if STSB mediated associations between IS and cannabis use frequency. Results: Results demonstrated significant partial mediation effects for both hypotheses. For example, indirect effects indicated that IS mediated the association between STSB and cannabis use both cross sectionally (95% CI: 0.021, 0.029) and longitudinally (95% CI: 0.006, 0.010). STSB also mediated associations between IS and cannabis use cross sectionally (95% CI: 0.015, 0.023) and longitudinally (95% CI: 0.010, 0.014). This study demonstrated that the associations between STSB, internalizing symptoms and cannabis use are complex, involving mediation in both directions.Discussion: These findings can be used to inform public health initiatives that aim to take a comprehensive approach to addressing negative health behaviors and outcomes, as it is clear that the multi-directional relationships between STSB and mental health may in-turn impact other health behaviors. Future research should continue to examine mediating factors between STSB and substance use among youth, including exploration of associations with other substances.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1922455 .",2021-05-12 +,"Estimate, a New iPad Application for Assessment of Plant Disease Severity Using Photographic Standard Area Diagrams","Assessment of disease severity is a foundational component of plant pathology and essential for robust disease management. Researchers often estimate disease severity using standard area diagrams (SADs) that are reference images representing disease severity in percentage increments. SADs provide assessments of disease severity that are more accurate, precise, and reliable than other methods. Although specific SADs have been constructed for many plant diseases, they often depict severity in unrealistic black-and-white or grayscale illustrations. SADs are also usually printed, static references that can burden data collection in the field and require data to be transferred manually to a computer spreadsheet for manipulation. This data entry process and verification are prone to errors and require additional inputs of time and labor. We developed a new iPad application (app) called Estimate for researchers and crop managers for their use on a mobile device at the field-level for assessing plant disease severity in order to collect data or aid in treatment decisions. The app is a repository for digital, photographic SADs and offers savings in time for data collection and processing. Estimate allows users to select a disease from a prepopulated list and specify the reference disease images in either logarithmic or linear intervals. Data may be collected as the midpoint of an interval (ordinal) or as 1% increments (continuous). Users then select among photographic images by touching those that best match the observed disease severity on successive samples. Estimate allows data entry at the plant and leaf hierarchical levels within plots and subplots. Alternatively, data may be collected on single sampling units with an undefined experimental design (i.e., 1 to x leaves). The user may inspect and e-mail the final data in comma-separated values format for analysis using conventional spreadsheet software. Estimate was released with SADs for assessing the severity of Cercospora leaf spot in red and yellow table beet cultivars. A list of collaborators and up-to-date list of SADs included in Estimate is available at http://evade.pppmb.cals.cornell.edu/estimate/. SADs for other diseases will be added to Estimate as they become available. Estimate is available for free download from iTunes (https://itunes.apple.com/WebObjects/MZStore.woa/wa/viewSoftware?id=1193605571&mt=8) and is compatible with an iPad Air 2 or equivalent using iOS 9.0 or greater.",2018-02-01 +28184254,NPCARE: database of natural products and fractional extracts for cancer regulation.,"

Background

Natural products have increasingly attracted much attention as a valuable resource for the development of anticancer medicines due to the structural novelty and good bioavailability. This necessitates a comprehensive database for the natural products and the fractional extracts whose anticancer activities have been verified.

Description

NPCARE (http://silver.sejong.ac.kr/npcare) is a publicly accessible online database of natural products and fractional extracts for cancer regulation. At NPCARE, one can explore 6578 natural compounds and 2566 fractional extracts isolated from 1952 distinct biological species including plants, marine organisms, fungi, and bacteria whose anticancer activities were validated with 1107 cell lines for 34 cancer types. Each entry in NPCARE is annotated with the cancer type, genus and species names of the biological resource, the cell line used for demonstrating the anticancer activity, PubChem ID, and a wealth of information about the target gene or protein. Besides the augmentation of plant entries up to 743 genus and 197 families, NPCARE is further enriched with the natural products and the fractional extracts of diverse non-traditional biological resources.

Conclusions

NPCARE is anticipated to serve as a dominant gateway for the discovery of new anticancer medicines due to the inclusion of a large number of the fractional extracts as well as the natural compounds isolated from a variety of biological resources.",2017-01-05 +32003691,An Update on Glutamatergic System in Suicidal Depression and on the Role of Esketamine.,"

Background

A research on mood disorder pathophysiology has hypothesized abnormalities in glutamatergic neurotransmission, by suggesting further investigation on glutamatergic N-methyl-Daspartate (NMDA) receptor modulators in treating Major Depressive Disorder (MDD). Esketamine (ESK), an NMDA receptor antagonist able to modulate glutamatergic neurotransmission has been recently developed as an intranasal formulation for treatment-resistant depression (TRD) and for rapid reduction of depressive symptomatology, including suicidal ideation in MDD patients at imminent risk for suicide.

Objective

The present study aims at investigating recent clinical findings on research on the role of the glutamatergic system and ESK in treating suicidal depression in MDD and TRD.

Methods

A systematic review was here carried out on PubMed/Medline, Scopus and the database on U.S. N.I.H. Clinical Trials (https://clinicaltrials.gov) and the European Medical Agency (EMA) (https://clinicaltrialsregister.eu) from inception until October 2019.

Results

Intravenous infusion of ESK is reported to elicit rapid-acting and sustained antidepressant activity in refractory patients with MDD and TRD. In phase II studies, intranasal ESK demonstrated a rapid onset and a persistent efficacy in patients with TRD as well as in MDD patients at imminent risk for suicide. However, some data discrepancies have emerged in phase III studies.

Conclusion

The U.S. Food and Drug Administration (FDA) granted fast track and Breakthrough Therapy Designation to Janssen Pharmaceuticals®, Inc. for intranasal ESK in 2013 for treatment-resistant depression (TRD) and in 2016 for the treatment of MDD with an imminent risk of suicide. However, further studies should be implemented to investigate the long-term efficacy and safety of intranasal ESK.",2020-01-01 +33743125,Mapping Domain- and Age-Specific Functional Brain Activity for Children's Cognitive and Affective Development.,"The human brain undergoes rapid development during childhood, with significant improvement in a wide spectrum of cognitive and affective functions. Mapping domain- and age-specific brain activity patterns has important implications for characterizing the development of children's cognitive and affective functions. The current mainstay of brain templates is primarily derived from structural magnetic resonance imaging (MRI), and thus is not ideal for mapping children's cognitive and affective brain development. By integrating task-dependent functional MRI data from a large sample of 250 children (aged 7 to 12) across multiple domains and the latest easy-to-use and transparent preprocessing workflow, we here created a set of age-specific brain functional activity maps across four domains: attention, executive function, emotion, and risky decision-making. Moreover, we developed a toolbox named Developmental Brain Functional Activity maps across multiple domains that enables researchers to visualize and download domain- and age-specific brain activity maps for various needs. This toolbox and maps have been released on the Neuroimaging Informatics Tools and Resources Clearinghouse website ( http://www.nitrc.org/projects/dbfa ). Our study provides domain- and age-specific brain activity maps for future developmental neuroimaging studies in both healthy and clinical populations.",2021-03-20 +29637199,BiOnIC: A Catalog of User Interactions with Biomedical Ontologies.,"BiOnIC is a catalog of aggregated statistics of user clicks, queries, and reuse counts for access to over 200 biomedical ontologies. BiOnIC also provides anonymized sequences of classes accessed by users over a period of four years. To generate the statistics, we processed the access logs of BioPortal, a large open biomedical ontology repository. We publish the BiOnIC data using DCAT and SKOS metadata standards. The BiOnIC catalog has a wide range of applicability, which we demonstrate through its use in three different types of applications. To our knowledge, this type of interaction data stemming from a real-world, large-scale application has not been published before. We expect that the catalog will become an important resource for researchers and developers in the Semantic Web community by providing novel insights into how ontologies are explored, queried and reused. The BiOnIC catalog may ultimately assist in the more informed development of intelligent user interfaces for semantic resources through interface customization, prediction of user browsing and querying behavior, and ontology summarization. The BiOnIC catalog is available at: http://onto-apps.stanford.edu/bionic.",2017-10-04 +,5PSQ-138 Best practice of ward-based reconstitution in paediatric hospitals,"

Background

In our country, we use a national system for paediatric drug data management called ePed.1 It provides a unique identifier (ePed-ID) for each reconstitution connected to the national drug-ID. This system also contains a full description for the reconstitution with e.g. administration time, shelf-life, common indication/dose, a dose range check and references.

Purpose

This study investigates additional risk classification and best practice video instructions to each unique ePed-ID, with vancomycin as an example.

Material and methods

With regards to vancomycin, this study consists of: High–risk classification developed by the European Directorate for the Quality of Medicines and Healthcare (EDQM).2 Use of video recording to assess reconstitution in six different paediatric settings. Identification of best practice by the Delphi process. Recording of professional videos for instruction purposes.

Results

Six major paediatric centres contributed to the investigation. All hospitals use vancomycin in standard concentration 5 mg/mL and it is commonly regarded as a high-risk drug due to a multistep reconstitution practice. In the risk evaluation, two centres used pre-diluted vancomycin to lower the residual risk. Four centres used closed-systems, and three centres added risk-reducing strategies from a hood or forced ventilation. By observing the recorded videos, different strategies were present, e.g. additional protective clothing and processes in centres with non-validated closed systems. The Delphi process had a 100% agreement for best practice depending on the risk assessment, resulting in three videos for instruction purposes regarding vancomycin: Pharmacy prepared. Validated closed–system reconstitution with minimal recommendation of protective clothing. Non–validated closed–system reconstitution with recommendation of protective clothing and forced ventilation.

Conclusion

High-risk drugs identified by the EDQM resolution allows hospitals to act differently. The residual risk of high-risk drug reconstitution can be captured by video imaging, to better understand the process of reconstitution. This method will be used in a national project for all instructions in the ePed database to provide risk classification and record video instructions.

References and/or Acknowledgements

1. https://www.eped.se 2. EDQM, Resolution CM/Res(2016)2 On Good Reconstitution Practices in Health-Care Establishments. 3. Financial support from The Swedish National Pharmaceutical Strategy No conflict of interest",2018-01-01 +31250882,Isoform function prediction based on bi-random walks on a heterogeneous network.,"MOTIVATION:Alternative splicing contributes to the functional diversity of protein species and the proteoforms translated from alternatively spliced isoforms of a gene actually execute the biological functions. Computationally predicting the functions of genes has been studied for decades. However, how to distinguish the functional annotations of isoforms, whose annotations are essential for understanding developmental abnormalities and cancers, is rarely explored. The main bottleneck is that functional annotations of isoforms are generally unavailable and functional genomic databases universally store the functional annotations at the gene level. RESULTS:We propose IsoFun to accomplish Isoform Function prediction based on bi-random walks on a heterogeneous network. IsoFun firstly constructs an isoform functional association network based on the expression profiles of isoforms derived from multiple RNA-seq datasets. Next, IsoFun uses the available Gene Ontology annotations of genes, gene-gene interactions and the relations between genes and isoforms to construct a heterogeneous network. After this, IsoFun performs a tailored bi-random walk on the heterogeneous network to predict the association between GO terms and isoforms, thus accomplishing the prediction of GO annotations of isoforms. Experimental results show that IsoFun significantly outperforms the state-of-the-art algorithms and improves the area under the receiver-operating curve (AUROC) and the area under the precision-recall curve (AUPRC) by 17% and 44% at the gene-level, respectively. We further validated the performance of IsoFun on the genes ADAM15 and BCL2L1. IsoFun accurately differentiates the functions of respective isoforms of these two genes. AVAILABILITY AND IMPLEMENTATION:The code of IsoFun is available at http://mlda.swu.edu.cn/codes.php? name=IsoFun. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-01-01 +30926570,Genome Annotator Light (GAL): A Docker-based package for genome analysis and visualization.,"Next generation sequencing techniques produce enormous data but its analysis and visualization remains a big challenge. To address this, we have developed Genome Annotator Light(GAL), a Docker based package for genome analysis and data visualization. GAL integrated several existing tools and in-house programs inside a Docker Container for systematic analysis and visualization of genomes through web browser. GAL takes varieties of input types ranging from raw Fasta files to fully annotated files, processes them through a standard annotation pipeline and visualizes on a web browser. Comparative genomic analysis is performed automatically within a given taxonomic class. GAL creates interactive genome browser with clickable genomic feature tracks; local BLAST-able database; query page, on-fly downstream data analysis using EMBOSS etc. Overall, GAL is an extremely convenient, portable and platform independent. Fully integrated web-resources can be easily created and deployed, e.g. www.eumicrobedb.org/cglab, for our in-house genomes. GAL is freely available at https://hub.docker.com/u/cglabiicb/.",2019-03-26 +32186698,Redundancy-weighting the PDB for detailed secondary structure prediction using deep-learning models.,"

Motivation

The Protein Data Bank (PDB), the ultimate source for data in structural biology, is inherently imbalanced. To alleviate biases, virtually all structural biology studies use nonredundant (NR) subsets of the PDB, which include only a fraction of the available data. An alternative approach, dubbed redundancy-weighting (RW), down-weights redundant entries rather than discarding them. This approach may be particularly helpful for machine-learning (ML) methods that use the PDB as their source for data. Methods for secondary structure prediction (SSP) have greatly improved over the years with recent studies achieving above 70% accuracy for eight-class (DSSP) prediction. As these methods typically incorporate ML techniques, training on RW datasets might improve accuracy, as well as pave the way toward larger and more informative secondary structure classes.

Results

This study compares the SSP performances of deep-learning models trained on either RW or NR datasets. We show that training on RW sets consistently results in better prediction of 3- (HCE), 8- (DSSP) and 13-class (STR2) secondary structures.

Availability and implementation

The ML models, the datasets used for their derivation and testing, and a stand-alone SSP program for DSSP and STR2 predictions, are freely available under LGPL license in http://meshi1.cs.bgu.ac.il/rw.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +29530937,Predicted Arabidopsis Interactome Resource and Gene Set Linkage Analysis: A Transcriptomic Analysis Resource.,"An advanced functional understanding of omics data is important for elucidating the design logic of physiological processes in plants and effectively controlling desired traits in plants. We present the latest versions of the Predicted Arabidopsis Interactome Resource (PAIR) and of the gene set linkage analysis (GSLA) tool, which enable the interpretation of an observed transcriptomic change (differentially expressed genes [DEGs]) in Arabidopsis (Arabidopsis thaliana) with respect to its functional impact for biological processes. PAIR version 5.0 integrates functional association data between genes in multiple forms and infers 335,301 putative functional interactions. GSLA relies on this high-confidence inferred functional association network to expand our perception of the functional impacts of an observed transcriptomic change. GSLA then interprets the biological significance of the observed DEGs using established biological concepts (annotation terms), describing not only the DEGs themselves but also their potential functional impacts. This unique analytical capability can help researchers gain deeper insights into their experimental results and highlight prospective directions for further investigation. We demonstrate the utility of GSLA with two case studies in which GSLA uncovered how molecular events may have caused physiological changes through their collective functional influence on biological processes. Furthermore, we showed that typical annotation-enrichment tools were unable to produce similar insights to PAIR/GSLA. The PAIR version 5.0-inferred interactome and GSLA Web tool both can be accessed at http://public.synergylab.cn/pair/.",2018-03-12 +27789693,3DSNP: a database for linking human noncoding SNPs to their three-dimensional interacting genes.,"The vast noncoding portion of the human genome harbors a rich array of functional elements and disease-causing regulatory variants. Recent high-throughput chromosome conformation capture studies have outlined the principles of these elements interacting and regulating the expression of distal target genes through three-dimensional (3D) chromatin looping. Here we present 3DSNP, an integrated database for annotating human noncoding variants by exploring their roles in the distal interactions between genes and regulatory elements. 3DSNP integrates 3D chromatin interactions, local chromatin signatures in different cell types and linkage disequilibrium (LD) information from the 1000 Genomes Project. 3DSNP provides informative visualization tools to display the integrated local and 3D chromatin signatures and the genetic associations among variants. Data from different functional categories are integrated in a scoring system that quantitatively measures the functionality of SNPs to help select important variants from a large pool. 3DSNP is a valuable resource for the annotation of human noncoding genome sequence and investigating the impact of noncoding variants on clinical phenotypes. The 3DSNP database is available at http://biotech.bmi.ac.cn/3dsnp/.",2016-10-26 +33543686,"Electronic cigarette solvents, pulmonary irritation, and endothelial dysfunction: role of acetaldehyde and formaldehyde.","After more than a decade of electronic cigarette (E-cig) use in the United States, uncertainty persists regarding E-cig use and long-term cardiopulmonary disease risk. As all E-cigs use propylene glycol and vegetable glycerin (PG-VG) and generate abundant saturated aldehydes, mice were exposed by inhalation to PG-VG-derived aerosol, formaldehyde (FA), acetaldehyde (AA), or filtered air. Biomarkers of exposure and cardiopulmonary injury were monitored by mass spectrometry (urine metabolites), radiotelemetry (respiratory reflexes), isometric myography (aorta), and flow cytometry (blood markers). Acute PG-VG exposure significantly affected multiple biomarkers including pulmonary reflex (decreased respiratory rate, -50%), endothelium-dependent relaxation (-61.8 ± 4.2%), decreased WBC (-47 ± 7%), and, increased RBC (+6 ± 1%) and hemoglobin (+4 ± 1%) versus air control group. Notably, FA exposure recapitulated the prominent effects of PG-VG aerosol on pulmonary irritant reflex and endothelial dysfunction, whereas AA exposure did not. To attempt to link PG-VG exposure with FA or AA exposure, urinary formate and acetate levels were measured by GC-MS. Although neither FA nor AA exposure altered excretion of their primary metabolite, formate or acetate, respectively, compared with air-exposed controls, PG-VG aerosol exposure significantly increased post-exposure urinary acetate but not formate. These data suggest that E-cig use may increase cardiopulmonary disease risk independent of the presence of nicotine and/or flavorings. This study indicates that FA levels in tobacco product-derived aerosols should be regulated to levels that do not induce biomarkers of cardiopulmonary harm. There remains a need for reliable biomarkers of exposure to inhaled FA and AA.NEW & NOTEWORTHY Use of electronic cigarettes (E-cig) induces endothelial dysfunction (ED) in healthy humans, yet the specific constituents in E-cig aerosols that contribute to ED are unknown. Our study implicates formaldehyde that is formed in heating of E-cig solvents (propylene glycol, PG; vegetable glycerin, VG). Exposure to formaldehyde or PG-VG-derived aerosol alone stimulated ED in female mice. As ED was independent of nicotine and flavorants, these data reflect a ""universal flaw"" of E-cigs that use PG-VG.Listen to this article's corresponding podcast at https://ajpheart.podbean.com/e/e-cigarettes-aldehydes-and-endothelial-dysfunction/.",2021-02-05 +28453687,GDISC: a web portal for integrative analysis of gene-drug interaction for survival in cancer.,"

Summary

Survival analysis has been applied to The Cancer Genome Atlas (TCGA) data. Although drug exposure records are available in TCGA, existing survival analyses typically did not consider drug exposure, partly due to naming inconsistencies in the data. We have spent extensive effort to standardize the drug exposure data, which enabled us to perform survival analysis on drug-stratified subpopulations of cancer patients. Using this strategy, we integrated gene copy number data, drug exposure data and patient survival data to infer gene-drug interactions that impact survival. The collection of all analyzed gene-drug interactions in 32 cancer types are organized and presented in a searchable web-portal called gene-drug Interaction for survival in cancer (GDISC). GDISC allows biologists and clinicians to interactively explore the gene-drug interactions identified in the context of TCGA, and discover interactions associated to their favorite cancer, drug and/or gene of interest. In addition, GDISC provides the standardized drug exposure data, which is a valuable resource for developing new methods for drug-specific analysis.

Availability and implementation

GDISC is available at https://gdisc.bme.gatech.edu/.

Contact

peng.qiu@bme.gatech.edu.",2017-05-01 +33279858,Transcriptional network modulated by the prognostic signature transcription factors and their long noncoding RNA partners in primary prostate cancer.,"

Background

Transcriptional regulators are seminal players in the onset and progression of prostate cancer. However, clarification of their underlying regulatory circuits and mechanisms demands considerable effort.

Methods

Integrated analyses were performed on genomic, transcriptomic, and clinicopathological profiles of primary prostate cancer and transcription factor-binding profiles, which included estimating transcription factor activity, identifying transcription factors of prognostic values, and discovering cis- and trans-regulations by long noncoding RNAs. Interactions between transcription factors and long noncoding RNAs were validated by RNA immunoprecipitation quantitative PCR. RNA interference assays were performed to explore roles of the selected transcription regulators.

Findings

Sixteen transcription factors, namely, ETS1, ARID4B, KLF12, GMEB1, HBP1, MXI1, MYC, MAX, PGR, BCL11A, AR, KLF4, SRF, HIF1A, EHF, and ATOH1, were jointly identified as a prognostic signature. Candidate long noncoding RNAs interplaying with the prognostic signature constituent transcription factors were further discovered. Their interactions were randomly checked, and many of them were experimentally proved. Transcription regulation by MYC and its long noncoding RNA partner AL590617.2 was further validated on their candidate targets. Moreover, the regulatory network governed by the transcription factors and their interacting long noncoding RNA partners is illustrated and stored in our LNCTRN database (https://navy.shinyapps.io/lnctrn).

Interpretation

The prognostic signature constituent transcription factors and their interacting long noncoding RNAs may represent promising biomarkers and/or therapeutic targets for prostate cancer. Furthermore, the computational framework proposed in the present study can be utilized to explore critical transcriptional regulators in other types of cancer.

Funding

This work was supported by National Natural Science Foundation of China and Fudan University.",2020-12-03 +31508797,Benchmarking database systems for Genomic Selection implementation. ,"With high-throughput genotyping systems now available, it has become feasible to fully integrate genotyping information into breeding programs. To make use of this information effectively requires DNA extraction facilities and marker production facilities that can efficiently deploy the desired set of markers across samples with a rapid turnaround time that allows for selection before crosses needed to be made. In reality, breeders often have a short window of time to make decisions by the time they are able to collect all their phenotyping data and receive corresponding genotyping data. This presents a challenge to organize information and utilize it in downstream analyses to support decisions made by breeders. In order to implement genomic selection routinely as part of breeding programs, one would need an efficient genotyping data storage system. We selected and benchmarked six popular open-source data storage systems, including relational database management and columnar storage systems. We found that data extract times are greatly influenced by the orientation in which genotype data is stored in a system. HDF5 consistently performed best, in part because it can more efficiently work with both orientations of the allele matrix. http://gobiin1.bti.cornell.edu:6083/projects/GBM/repos/benchmarking/browse.",2019-01-01 +33845483,Multilevel proteomics reveals host perturbations by SARS-CoV-2 and SARS-CoV.,"The emergence and global spread of SARS-CoV-2 has resulted in the urgent need for an in-depth understanding of molecular functions of viral proteins and their interactions with the host proteome. Several individual omics studies have extended our knowledge of COVID-19 pathophysiology1-10. Integration of such datasets to obtain a holistic view of virus-host interactions and to define the pathogenic properties of SARS-CoV-2 is limited by the heterogeneity of the experimental systems. Here we report a concurrent multi-omics study of SARS-CoV-2 and SARS-CoV. Using state-of-the-art proteomics, we profiled the interactomes of both viruses, as well as their influence on the transcriptome, proteome, ubiquitinome and phosphoproteome of a lung-derived human cell line. Projecting these data onto the global network of cellular interactions revealed crosstalk between the perturbations taking place upon infection with SARS-CoV-2 and SARS-CoV at different levels and enabled identification of distinct and common molecular mechanisms of these closely related coronaviruses. The TGF-β pathway, known for its involvement in tissue fibrosis, was specifically dysregulated by SARS-CoV-2 ORF8 and autophagy was specifically dysregulated by SARS-CoV-2 ORF3. The extensive dataset (available at https://covinet.innatelab.org ) highlights many hotspots that could be targeted by existing drugs and may be used to guide rational design of virus- and host-directed therapies, which we exemplify by identifying inhibitors of kinases and matrix metalloproteases with potent antiviral effects against SARS-CoV-2.",2021-04-12 +26249811,SurvCurv database and online survival analysis platform update.,"

Unlabelled

Understanding the biology of ageing is an important and complex challenge. Survival experiments are one of the primary approaches for measuring changes in ageing. Here, we present a major update to SurvCurv, a database and online resource for survival data in animals. As well as a substantial increase in data and additions to existing graphical and statistical survival analysis features, SurvCurv now includes extended mathematical mortality modelling functions and survival density plots for more advanced representation of groups of survival cohorts.

Availability and implementation

The database is freely available at https://www.ebi.ac.uk/thornton-srv/databases/SurvCurv/. All data are published under the Creative Commons Attribution License.

Contact

matthias.ziehm@ebi.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-06 +26691201,An ultra-high-density map as a community resource for discerning the genetic basis of quantitative traits in maize.,"

Background

To safeguard the food supply for the growing human population, it is important to understand and exploit the genetic basis of quantitative traits. Next-generation sequencing technology performs advantageously and effectively in genetic mapping and genome analysis of diverse genetic resources. Hence, we combined re-sequencing technology and a bin map strategy to construct an ultra-high-density bin map with thousands of bin markers to precisely map a quantitative trait locus.

Results

In this study, we generated a linkage map containing 1,151,856 high quality SNPs between Mo17 and B73, which were verified in the maize intermated B73 × Mo17 (IBM) Syn10 population. This resource is an excellent complement to existing maize genetic maps available in an online database  (iPlant, http://data.maizecode.org/maize/qtl/syn10/ ). Moreover, in this population combined with the IBM Syn4 RIL population, we detected 135 QTLs for flowering time and plant height traits across the two populations. Eighteen known functional genes and twenty-five candidate genes for flowering time and plant height trait were fine-mapped into a 2.21-4.96 Mb interval. Map expansion and segregation distortion were also analyzed, and evidence for inadvertent selection of early flowering time in the process of mapping population development was observed. Furthermore, an updated integrated map with 1,151,856 high-quality SNPs, 2,916 traditional markers and 6,618 bin markers was constructed. The data were deposited into the iPlant Discovery Environment (DE), which provides a fundamental resource of genetic data for the maize genetic research community.

Conclusions

Our findings provide basic essential genetic data for the maize genetic research community. An updated IBM Syn10 population and a reliable, verified high-quality SNP set between Mo17 and B73 will aid in future molecular breeding efforts.",2015-12-21 +25414324,Araport: the Arabidopsis information portal.,"The Arabidopsis Information Portal (https://www.araport.org) is a new online resource for plant biology research. It houses the Arabidopsis thaliana genome sequence and associated annotation. It was conceived as a framework that allows the research community to develop and release 'modules' that integrate, analyze and visualize Arabidopsis data that may reside at remote sites. The current implementation provides an indexed database of core genomic information. These data are made available through feature-rich web applications that provide search, data mining, and genome browser functionality, and also by bulk download and web services. Araport uses software from the InterMine and JBrowse projects to expose curated data from TAIR, GO, BAR, EBI, UniProt, PubMed and EPIC CoGe. The site also hosts 'science apps,' developed as prototypes for community modules that use dynamic web pages to present data obtained on-demand from third-party servers via RESTful web services. Designed for sustainability, the Arabidopsis Information Portal strategy exploits existing scientific computing infrastructure, adopts a practical mixture of data integration technologies and encourages collaborative enhancement of the resource by its user community.",2014-11-20 +33613609,Distinguishing HapMap Accessions Through Recursive Set Partitioning in Hierarchical Decision Trees.,"The HapMap (haplotype map) projects have produced valuable genetic resources in life science research communities, allowing researchers to investigate sequence variations and conduct genome-wide association study (GWAS) analyses. A typical HapMap project may require sequencing hundreds, even thousands, of individual lines or accessions within a species. Due to limitations in current sequencing technology, the genotype values for some accessions cannot be clearly called. Additionally, allelic heterozygosity can be very high in some lines, causing genetic and sometimes phenotypic segregation in their descendants. Genetic and phenotypic segregation degrades the original accession's specificity and makes it difficult to distinguish one accession from another. Therefore, it is vitally important to determine and validate HapMap accessions before one conducts a GWAS analysis. However, to the best of our knowledge, there are no prior methodologies or tools that can readily distinguish or validate multiple accessions in a HapMap population. We devised a bioinformatics approach to distinguish multiple HapMap accessions using only a minimum number of genetic markers. First, we assign each candidate marker with a distinguishing score (DS), which measures its capability in distinguishing accessions. The DS score prioritizes those markers with higher percentages of homozygous genotypes (allele combinations), as they can be stably passed on to offspring. Next, we apply the ""set-partitioning"" concept to select optimal markers by recursively partitioning accession sets. Subsequently, we build a hierarchical decision tree in which a specific path represents the selected markers and the homogenous genotypes that can be used to distinguish one accession from others in the HapMap population. Based on these algorithms, we developed a web tool named MAD-HiDTree (Multiple Accession Distinguishment-Hierarchical Decision Tree), designed to analyze a user-input genotype matrix and construct a hierarchical decision tree. Using genetic marker data extracted from the Medicago truncatula HapMap population, we successfully constructed hierarchical decision trees by which the original 262 M. truncatula accessions could be efficiently distinguished. PCR experiments verified our proposed method, confirming that MAD-HiDTree can be used for the identification of a specific accession. MAD-HiDTree was developed in C/C++ in Linux. Both the source code and test data are publicly available at https://bioinfo.noble.org/MAD-HiDTree/.",2021-02-03 +34092528,Deep Learning-based Recalibration of the CUETO and EORTC Prediction Tools for Recurrence and Progression of Non-muscle-invasive Bladder Cancer.,"Despite being standard tools for decision-making, the European Organisation for Research and Treatment of Cancer (EORTC), European Association of Urology (EAU), and Club Urologico Espanol de Tratamiento Oncologico (CUETO) risk groups provide moderate performance in predicting recurrence-free survival (RFS) and progression-free survival (PFS) in non-muscle-invasive bladder cancer (NMIBC). In this retrospective combined-cohort data-mining study, the training group consisted of 3570 patients with de novo diagnosed NMIBC. Predictors included gender, age, T stage, histopathological grading, tumor burden and diameter, EORTC and CUETO scores, and type of intravesical treatment. The models developed were externally validated using an independent cohort of 322 patients. Models were trained using Cox proportional-hazards deep neural networks (deep learning; DeepSurv) with a proprietary grid search of hyperparameters. For patients treated with surgery and bacillus Calmette-Guérin-treated patients, the models achieved a c index of 0.650 (95% confidence interval [CI] 0.649-0.650) for RFS and 0.878 (95% CI 0.873-0.874) for PFS in the training group. In the validation group, the c index was 0.651 (95% CI 0.648-0.654) for RFS and 0.881 (95% CI 0.878-0.885) for PFS. After inclusion of patients treated with mitomycin C, the c index for RFS models was 0.6415 (95% CI 0.6412-0.6417) for the training group and 0.660 (95% CI 0.657-0.664) for the validation group. Models for PFS achieved a c index of 0.885 (95% CI 0.885-0.885) for the training set and 0.876 (95% CI 0.873-0.880) for the validation set. Our tool outperformed standard-of-care risk stratification tools and showed no evidence of overfitting. The application is open source and available at https://biostat.umed.pl/deepNMIBC/. PATIENT SUMMARY: We created and validated a new tool to predict recurrence and progression of early-stage bladder cancer. The application uses advanced artificial intelligence to combine state-of-the-art scales, outperforms these scales for prediction, and is freely available online.",2021-06-03 +33717804,Endonasal Odontoidectomy in Basilar Invagination.,"Objective  The endoscopic endonasal odontoidectomy (EEO) is emerging as a feasible surgical alternative to conventional microscopic transoral approach. In this article, we show EEO in the basilar invagination (BI) and describe in detail the technical aspects, advantages, and disadvantages of this approach ( Fig. 1 ). Methods  We describe EEO using audiovisual material from the neurosurgical department of Hospital Universitari i Politècnic La Fe Valencia database. Results  We present the case of a 61-year-old male patient with BI. Initially, we performed suboccipital decompression and occipitocervical fusion. Subsequently, after a no significant neurological improvement and persistent anterior compression, EEO was performed. The postoperative evolution was uneventful and the preoperative neurological deficits were recovered rapidly after surgery Discussion  EEO technique enables complete odontoid resection, preventing invasion of aggressive oral bacterial flora, and it is not limited by the mouth opening. As well, it avoids manipulation of the soft palate, therefore evades the risk of velopalatal insufficiency, facilitates immediate oral tolerance, and early extubation. The rostral position of C1-C2 complex in BI could suppose a great advantage in favor the endonasal approaches. Mucoperichondrial vascularized flaps could be obtained to avoid a postoperative cerebrospinal fluid (CSF) leak and facilitate the reepithelization process of the surgical bed. Conclusion  EEO may provide a significant anatomic and technical advantage over the trans-oral approach. The link to the video can be found at: https://youtu.be/Td6MDcjCNKk .",2020-12-02 +33261662,dbNSFP v4: a comprehensive database of transcript-specific functional predictions and annotations for human nonsynonymous and splice-site SNVs.,"Whole exome sequencing has been increasingly used in human disease studies. Prioritization based on appropriate functional annotations has been used as an indispensable step to select candidate variants. Here we present the latest updates to dbNSFP (version 4.1), a database designed to facilitate this step by providing deleteriousness prediction and functional annotation for all potential nonsynonymous and splice-site SNVs (a total of 84,013,093) in the human genome. The current version compiled 36 deleteriousness prediction scores, including 12 transcript-specific scores, and other variant and gene-level functional annotations. The database is available at http://database.liulab.science/dbNSFP with a downloadable version and a web-service.",2020-12-02 +32990218,Lef1 expression in fibroblasts maintains developmental potential in adult skin to regenerate wounds. ,"Scars are a serious health concern for burn victims and individuals with skin conditions associated with wound healing. Here, we identify regenerative factors in neonatal murine skin that transforms adult skin to regenerate instead of only repairing wounds with a scar, without perturbing development and homeostasis. Using scRNA-seq to probe unsorted cells from regenerating, scarring, homeostatic, and developing skin, we identified neonatal papillary fibroblasts that form a transient regenerative cell type that promotes healthy skin regeneration in young skin. These fibroblasts are defined by the expression of a canonical Wnt transcription factor Lef1 and using gain- and loss of function genetic mouse models, we demonstrate that Lef1 expression in fibroblasts primes the adult skin macroenvironment to enhance skin repair, including regeneration of hair follicles with arrector pili muscles in healed wounds. Finally, we share our genomic data in an interactive, searchable companion website (https://skinregeneration.org/). Together, these data and resources provide a platform to leverage the regenerative abilities of neonatal skin to develop clinically tractable solutions that promote the regeneration of adult tissue.",2020-09-29 +34090677,Establishment of a nomogram prediction model for long diameter 10-15 mm gallbladder polyps with malignant tendency.,"

Background

Surgical indications for the treatment of gallbladder polyps are controversial. Evaluation of gallbladder polyps with malignant tendency and indications for cholecystectomy in patients with long diameter polyps of 10 to 15 mm require further analysis and discussion. In this study, our objective was to re-evaluate indications for the surgical resection of gallbladder polyps and construct a nomogram model for the prediction of gallbladder polyps with malignant tendency.

Methods

Clinicopathologic data of 2,272 patients who had undergone cholecystectomy for gallbladder polyps were collected from 11 medical centers in China. Risk factor analyses and nomogram prediction model for gallbladder polyps with malignant tendency were conducted.

Results

Excluding 311 patients with cholelithiasis and 488 patients with long diameter polyps ≤5 and >15 mm, factors that differed significantly among patients with gallbladder polyps having a long diameter of 6 to 9 mm (885 cases) and 10 to 15 mm (588 cases) were polyp detection time, CEA and CA19-9 levels, number of polyps, fundus, echogenicity, gallbladder wall thickness and postoperative pathologic features (P < .05). Among 588 patients with gallbladder polyps with a long diameter of 10 of 15 mm, multivariate analysis indicated the following independent risk factors of gallbladder polyps with malignant tendency: single polyps (OR = 0.286/P < .001), polyps with broad base (OR = 2.644/P = .001), polyps with medium/low echogenicity (OR = 2.387/P = .003), and polyps with short diameter of 7 to 9 or 10 to 15 mm (OR = 3.820/P = .005; OR = 2.220/P = .048, respectively). The C-index of the nomogram model and internal validation were .778 and .768, respectively. In addition, a sample online calculator for the nomogram prediction model had been created (https://docliqi.shinyapps.io/dynnom/).

Conclusion

Indications for cholecystectomy in patients with gallbladder polyps with a long diameter of 10 to 15 mm should be assessed by combining the information on short diameter, number of polyps, fundus, and echogenicity. The nomogram model can be used to predict the risk for the development of gallbladder polyps with malignant tendency.",2021-06-02 +33815388,Single Cell Analysis of Blood Mononuclear Cells Stimulated Through Either LPS or Anti-CD3 and Anti-CD28.,"Immune cell activation assays have been widely used for immune monitoring and for understanding disease mechanisms. However, these assays are typically limited in scope. A holistic study of circulating immune cell responses to different activators is lacking. Here we developed a cost-effective high-throughput multiplexed single-cell RNA-seq combined with epitope tagging (CITE-seq) to determine how classic activators of T cells (anti-CD3 coupled with anti-CD28) or monocytes (LPS) alter the cell composition and transcriptional profiles of peripheral blood mononuclear cells (PBMCs) from healthy human donors. Anti-CD3/CD28 treatment activated all classes of lymphocytes either directly (T cells) or indirectly (B and NK cells) but reduced monocyte numbers. Activated T and NK cells expressed senescence and effector molecules, whereas activated B cells transcriptionally resembled autoimmune disease- or age-associated B cells (e.g., CD11c, T-bet). In contrast, LPS specifically targeted monocytes and induced two main states: early activation characterized by the expression of chemoattractants and a later pro-inflammatory state characterized by expression of effector molecules. These data provide a foundation for future immune activation studies with single cell technologies (https://czi-pbmc-cite-seq.jax.org/).",2021-03-17 +35935886,Evaluating species in Botryosphaeriales.,"The Botryosphaeriales (Dothideomycetes) includes numerous endophytic, saprobic, and plant pathogenic species associated with a wide range of symptoms, most commonly on woody plants. In a recent phylogenetic treatment of 499 isolates in the culture collection (CBS) of the Westerdijk Institute, we evaluated the families and genera accommodated in this order of important fungi. The present study presents multigene phylogenetic analyses for an additional 230 isolates, using ITS, tef1, tub2, LSU and rpb2 loci, in combination with morphological data. Based on these data, 58 species are reduced to synonymy, and eight novel species are described. They include Diplodia afrocarpi (Afrocarpus, South Africa), Dothiorella diospyricola (Diospyros, South Africa), Lasiodiplodia acaciae (Acacia, Indonesia), Neofusicoccum podocarpi (Podocarpus, South Africa), N. rapaneae (Rapanea, South Africa), Phaeobotryon ulmi (Ulmus, Germany), Saccharata grevilleae (Grevillea, Australia) and S. hakeiphila (Hakea, Australia). The results have clarified the identity of numerous isolates that lacked Latin binomials or had been deposited under incorrect names in the CBS collection in the past. They also provide a solid foundation for more in-depth future studies on taxa in the order. Sequences of the tef1, tub2 and rpb2 genes proved to be the most reliable markers. At the species level, results showed that the most informative genes were inconsistent, but that a combination of four candidate barcodes (ITS, tef1, tub2 and rpb2) provided reliable resolution. Furthermore, given the large number of additional isolates included in this study, and newly generated multigene DNA datasets, several species could also be reduced to synonymy. The study illustrates the value of reassessing the identity of older collections in culture collections utilising modern taxonomic frameworks and methods. Citation: Zhang W, Groenewald JZ, Lombard L, et al. 2021. Evaluating species in Botryosphaeriales. Persoonia 46: 63-115. https://doi.org/10.3767/persoonia.2021.46.03.",2021-02-02 +33283212,MitoEM Dataset: Large-scale 3D Mitochondria Instance Segmentation from EM Images.,"Electron microscopy (EM) allows the identification of intracellular organelles such as mitochondria, providing insights for clinical and scientific studies. However, public mitochondria segmentation datasets only contain hundreds of instances with simple shapes. It is unclear if existing methods achieving human-level accuracy on these small datasets are robust in practice. To this end, we introduce the MitoEM dataset, a 3D mitochondria instance segmentation dataset with two (30μm)3 volumes from human and rat cortices respectively, 3, 600× larger than previous benchmarks. With around 40K instances, we find a great diversity of mitochondria in terms of shape and density. For evaluation, we tailor the implementation of the average precision (AP) metric for 3D data with a 45× speedup. On MitoEM, we find existing instance segmentation methods often fail to correctly segment mitochondria with complex shapes or close contacts with other instances. Thus, our MitoEM dataset poses new challenges to the field. We release our code and data: https://donglaiw.github.io/page/mitoEM/index.html.",2020-09-29 +31635584,"Increasing deceased organ donor numbers in Johannesburg, South Africa: 18-month results of the Wits Transplant Procurement Model.","In 2016, deceased-donor organ procurement at Wits Transplant, based at Wits Donald Gordon Medical Centre in Johannesburg, South Africa (SA), was in a state of crisis. As it is the largest-volume solid-organ transplant unit in SA, and as we aspire to provide transplant services of an international standard, the time to address our procurement practice had come. The number of deceased donors consented through our centre was very low, and we needed a radical change to improve our performance. This article describes the Wits Transplant Procurement Model - the result of our work to improve procurement at our centre. The model has two core phases, one to increase referrals and the other to improve our consent rates. Within these phases there are several initiatives. To improve referrals, the threefold approach of procurement management, acknowledgement and resource utilisation was developed. In order to 'convert' referrals into consents, we established the Wits Transplant 'Family Approach to Consent for Transplant Strategy' (FACTS). Since initiation of the Wits Transplant Procurement Model, both our referral numbers from targeted hospitals and our conversion rates have increased. Referrals from targeted hospitals increased by 54% (from 31 to 57). Our consent rate increased from 25% (n=6) to 73% (n=35) after the initiation of Wits Transplant FACTS. We hope that other transplant centres in SA and further afield in the region will find this article helpful, and to this end we have created a handbook on the Wits Transplant Procurement Model that is freely available for download (http://www.dgmc.co.za/docs/Wits-Transplant-Procurement-Handbook.pdf).",2019-08-28 +33335890,(Re)Defining the Proline-Rich Antimicrobial Peptide Family and the Identification of Putative New Members.,"As we rapidly approach a post-antibiotic era in which multi-drug resistant bacteria are ever-pervasive, antimicrobial peptides (AMPs) represent a promising class of compounds to help address this global issue. AMPs are best-known for their membrane-disruptive mode of action leading to bacteria cell lysis and death. However, many AMPs are also known to be non-lytic and have intracellular modes of action. Proline-rich AMPs (PrAMPs) are one such class, that are generally membrane permeable and inhibit protein synthesis leading to a bactericidal outcome. PrAMPs are highly effective against Gram-negative bacteria and yet show very low toxicity against eukaryotic cells. Here, we review both the PrAMP family and the past and current definitions for this class of peptides. Computational analysis of known AMPs within the DRAMP database (http://dramp.cpu-bioinfor.org/) and assessment of their PrAMP-like properties have led us to develop a revised definition of the PrAMP class. As a result, we subsequently identified a number of unknown and unclassified peptides containing motifs of striking similarity to known PrAMP-based DnaK inhibitors and propose a series of new sequences for experimental evaluation and subsequent addition to the PrAMP family.",2020-12-01 +32394182,LexOPS: An R package and user interface for the controlled generation of word stimuli.,"LexOPS is an R package and user interface designed to facilitate the generation of word stimuli for use in research. Notably, the tool permits the generation of suitably controlled word lists for any user-specified factorial design and can be adapted for use with any language. It features an intuitive graphical user interface, including the visualization of both the distributions within and relationships among variables of interest. An inbuilt database of English words is also provided, including a range of lexical variables commonly used in psycholinguistic research. This article introduces LexOPS, outlining the features of the package and detailing the sources of the inbuilt dataset. We also report a validation analysis, showing that, in comparison to stimuli of existing studies, stimuli optimized with LexOPS generally demonstrate greater constraint and consistency in variable manipulation and control. Current instructions for installing and using LexOPS are available at https://JackEdTaylor.github.io/LexOPSdocs/ .",2020-12-01 +33301344,"""Group therapy for schizophrenia: A meta-analysis"": Correction to Burlingame et al. (2020).","Reports an error in ""Group therapy for schizophrenia: A meta-analysis"" by Gary M. Burlingame, Hal Svien, Lars Hoppe, Isaac Hunt and Jenny Rosendahl (Psychotherapy, 2020[Jun], Vol 57[2], 219-236). In the article, the Orfanos et al. (2015) meta-analysis was missing from Burlingame et al. (2020) and should have appeared as Footnote 1 at the end of the abstract. Consistent with Orfanos et al. (2015), the Burlingame et al. (2020) findings support the notion that group treatments can improve negative symptoms of schizophrenia, across active and passive controls. Unlike Orfanos et al.'s (2015) study, Burlingame et al. (2020) also found a significant effect size for positive symptoms. Reference Orfanos, S., Banks, C., & Priebe, S. (2015). Are group psychotherapeutic treatments effective for patients with schizophrenia? A systematic review and meta-analysis. Psychotherapy and Psychosomatics, 84, 241-249. https://doi.org/10.1159/ 000377705. Footnote 2 was missing from the end of the first sentence in the Method section. This meta-analysis is not registered with PROSPERO, and the PROSPERO protocol (CRD42013004419) does not include the disorder of schizophrenia... (The following abstract of the original article appeared in record 2020-37337-001.) The effectiveness of group treatments for people with schizophrenia has not been examined on symptom-specific (positive and negative symptoms) outcomes, and the differential effects of the most popular group treatments remain unknown. We conducted a meta-analysis of randomized controlled trials that tested (a) the effectiveness of 7 frequently used group treatments on positive and negative symptoms and (b) if treatment-specific outcome improvement was associated with improvement on schizophrenia symptoms. Major databases were searched from 1990 to 2018 for randomized controlled trials of group treatment for people with schizophrenia, including first-episode psychosis. A random effects meta-analysis and meta-regression was conducted on 52 studies representing 4,156 individuals that produced a significant, small effect on symptom-specific outcomes (g = 0.30), with 4 group treatments (cognitive remediation, multifamily, psychoeducational, and social skills training) posting significant improvement. In addition, change on treatment-specific outcomes explained 16% of schizophrenia symptom and 44% of general functioning improvement. Results are discussed with respect to how they replicate past meta-analytic findings and possible revision of practice guidelines to incorporate evidence-based group treatments for schizophrenia. (PsycInfo Database Record (c) 2020 APA, all rights reserved).",2020-12-01 +32529017,Practical estimation of cloud storage costs for clinical genomic data.,"

Background

Laboratories performing clinical high-throughput sequencing for oncology and germline testing are increasingly migrating their data storage to cloud-based solutions. Cloud-based storage has several advantages, such as low per-GB prices, scalability, and minimal fixed costs; however, while these solutions tout ostensibly simple usage-based pricing plans, practical cost analysis of cloud storage for NGS data storage is not straightforward.

Methods

We developed an easy-to-use tool designed specifically for cost and usage estimation for laboratories performing clinical NGS testing (https://ngscosts.info). Our tool enables quick exploration of dozens of storage options across three major cloud providers, and provides complex cost and usage forecasts over 1-20 year timeframes. Parameters include current test volumes, growth rate, data compression, data retention policies, and case re-access rates. Outputs include an easy-to-visualize chart of total data stored, yearly and lifetime costs, and a ""cost per test"" estimate.

Results

Two factors were found to markedly decrease the average cost per test: 1) reducing total file size, including through the use of compression, 2) rapid transfer to ""cold"" or archival storage. In contrast, re-access of data from archival storage tiers was not found to dramatically increase the cost of storage per test.

Conclusions

Steady declines in cloud storage pricing, as well as new options for storage and retrieval, make storing clinical NGS data on the cloud economical and friendly to laboratory workflows. Our web-based tool makes it possible to explore and compare cloud storage solutions and provide forecasts specifically for clinical NGS laboratories.",2020-05-15 +33437765,The predictors and prognosis for unexpected reocclusion after mechanical thrombectomy: a meta-analysis.,"

Background

Mechanical thrombectomy (MT) is the cornerstone for treating acute ischemic stroke (AIS) in emergency cases. However, 3-9% of patients display reocclusion in the recanalized vessels within 24 hours after performing MT. This meta-analysis aimed to further identify the predictors and prognosis of unexpected reocclusion after MT.

Methods

According to the Preferred Reporting Items for Systematic Reviews and Meta-Analyses statement, we searched several literature databases, including PubMed, Embase, and Cochrane, for publications related to the subject term ""thrombectomy"" that were published prior to March 2020. Pooled analysis was performed with the fixed-effects model using the Mantel-Haenszel method if the heterogeneity was expected to be available (I2≤50%). Otherwise, the random-effects model computed by the DerSimonian-Laird method was used (I2>50%). R software (http://www.r-project.org) was used for analysis in this study.

Results

A total of five articles comprising 1,883 patients (126 patients with reocclusion, 1,757 patients without reocclusion) who were confirmed to have AIS and who underwent emergency MT were finally included in this study. The pooled analysis (reocclusion versus non-reocclusion) showed that atrial fibrillation [odds ratio (OR), 0.36; 95% confidence interval (CI), 0.20-0.63], cardiogenic embolism (OR, 0.35; 95% CI, 0.20-0.63), long-term statin use (OR, 0.39; 95% CI, 0.21-0.75), long-term antiplatelet use (OR, 0.53; 95% CI, 0.31-0.92), and target occlusion at middle cerebral artery-M1 (MCA-M1) (OR, 0.39; 95% CI, 0.19-0.77) might prevent reocclusion and longer onset-to-reperfusion time (mean difference, 66.51; 95% CI, 36.66-96.35) might promote reocclusion after MT performance. Furthermore, the clinical outcomes including early neurological deterioration (OR, 4.87; 95% CI, 2.08-11.40), 90-day modified Rankin Scale score ≤2 (OR, 0.28; 95% CI, 0.18-0.45), and 90-day death rate (OR, 1.85; 95% CI, 1.04-3.29) were also associated with reocclusion after MT performance.

Conclusions

Atrial fibrillation, cardiogenic embolism, long-term statin use, long-term antiplatelet use, and target occlusion at MCA-M1 might prevent reocclusion, and longer onset-to-reperfusion time seemed to promote reocclusion after MT. Reocclusion after MT results in a high risk of poor prognosis.",2020-12-01 +31774482,FluReassort: a database for the study of genomic reassortments among influenza viruses.,"Genomic reassortment is an important genetic event in the generation of emerging influenza viruses, which can cause numerous serious flu endemics and epidemics within hosts or even across different hosts. However, there is no dedicated and comprehensive repository for reassortment events among influenza viruses. Here, we present FluReassort, a database for understanding the genomic reassortment events in influenza viruses. Through manual curation of thousands of literature references, the database compiles 204 reassortment events among 56 subtypes of influenza A viruses isolated in 37 different countries. FluReassort provides an interface for the visualization and evolutionary analysis of reassortment events, allowing users to view the events through the phylogenetic analysis with varying parameters. The reassortment networks in FluReassort graphically summarize the correlation and causality between different subtypes of the influenza virus and facilitate the description and interpretation of the reassortment preference among subtypes. We believe FluReassort is a convenient and powerful platform for understanding the evolution of emerging influenza viruses. FluReassort is freely available at https://www.jianglab.tech/FluReassort.",2020-12-01 +33490146,"HER2-targeted regimens after prior trastuzumab for patients with HER2-positive unresectable, locally advanced or metastatic breast cancer: a network meta-analysis of randomized controlled trials.","

Background

Several human epidermal growth factor receptor 2 (HER2)-targeted regimens (anti-HER2 target agent combined chemotherapy) have been introduced for the treatment of HER2-positive locally advanced or metastatic breast cancer progressed after trastuzumab. We therefore conducted a network meta-analysis to compare and rank HER2-targeted regimens in this population after trastuzumab therapy.

Methods

The electronic databases of PubMed, EmBase, Cochrane Central Register of Controlled Trials, and the websites of http://clinicaltrials.gov/ (US NIH) were systematically searched for published and unpublished randomized controlled trials (RCTs) from their inception to October, 2020. Nine treatment regimens were eligible to be included in this analysis. The primary outcomes were overall response rate (ORR), progression-free survival (PFS) and overall survival (OS), while the secondary outcomes were grade ≥3 adverse events.

Results

A total of 2,104 citations were identified and 12 RCTs comprising 3,769 patients were selected for final analysis. For HER2 positive unresectable, locally advanced or metastatic patients progressed after trastuzumab therapy pyrotinib plus capecitabine ranked the highest surface under the cumulative ranking area (SUCRA) in PFS, ORR and its SUCRA in OS was higher than Trastuzumab emtansine (T-DM1). T-DM1 plus atezolizumab, pyrotinib plus capecitabine, and pertuzumab plus trastuzumab plus capecitabine had comparable SUCRA in OS (76.1% vs. 74.5% vs. 71.2%). Six of included studies reported any grade ≥3 adverse events, the prevalence of any grade ≥3 adverse events in lapatinib plus capecitabine (353/683), T-DM1 (213/558), trastuzumab plus capecitabine (130/218), pertuzumab plus trastuzumab plus capecitabine (118/228), pyrotinib plus capecitabine (220/384), T-DM1 plus atezolizumab (43/132) and capecitabine (24/94) were 51.7%, 38.2%, 59.6%, 51.8%, 57.3%, 32.6% and 25.5%, respectively. Specific adverse event characteristics related to different HER2-targeted regimens need to be well known ahead and managed during the therapy.

Conclusions

The results indicated that for HER2 positive breast cancer with previous trastuzumab therapy pyrotinib plus capecitabine was probably more efficacious in PFS and ORR. T-DM1 plus atezolizumab, pyrotinib plus capecitabine and pertuzumab plus trastuzumab plus capecitabine have comparable effect on OS improvement and all of them were likely better than T-DM1. The risk of grade ≥3 adverse events for specific treatment regimens were also provided.",2020-12-01 +33259604,HAHmiR.DB: a server platform for high-altitude human miRNA-gene coregulatory networks and associated regulatory circuits. ,"Around 140 million people live in high-altitude (HA) conditions! and even a larger number visit such places for tourism, adventure-seeking or sports training. Rapid ascent to HA can cause severe damage to the body organs and may lead to many fatal disorders. During induction to HA, human body undergoes various physiological, biochemical, hematological and molecular changes to adapt to the extreme environmental conditions. Several literature references hint that gene-expression-regulation and regulatory molecules like miRNAs and transcription factors (TFs) control adaptive responses during HA stress. These biomolecules are known to interact in a complex combinatorial manner to fine-tune the gene expression and help in controlling the molecular responses during this stress and ultimately help in acclimatization. High-Altitude Human miRNA Database (HAHmiR.DB) is a unique, comprehensive and curated collection of miRNAs that have been experimentally validated to be associated with HA stress, their level of expression in different altitudes, fold change, experiment duration, biomarker association, disease and drug association, tissue-specific expression level, Gene Ontology (GO) and Kyoto Encyclopaedia of Gene and Genomes (KEGG) pathway associations. As a server platform, it also uniquely constructs and analyses interactive miRNA-TF-gene coregulatory networks and extracts regulatory circuits/feed-forward loops (FFLs). These regulatory circuits help to offer mechanistic insights into complex regulatory mechanisms during HA stress. The server can also build these regulatory networks between two and more miRNAs of the database and also identify the regulatory circuits from this network. Hence, HAHmiR.DB is the first-of-its-kind database in HA research, which is a reliable platform to explore, compare, analyse and retrieve miRNAs associated with HA stress, their coregulatory networks and FFL regulatory-circuits. HAHmiR.DB is freely accessible at http://www.hahmirdb.in.",2020-12-01 +32807888,Addressing the batch effect issue for LC/MS metabolomics data in data preprocessing.,"With the growth of metabolomics research, more and more studies are conducted on large numbers of samples. Due to technical limitations of the Liquid Chromatography-Mass Spectrometry (LC/MS) platform, samples often need to be processed in multiple batches. Across different batches, we often observe differences in data characteristics. In this work, we specifically focus on data generated in multiple batches on the same LC/MS machinery. Traditional preprocessing methods treat all samples as a single group. Such practice can result in errors in the alignment of peaks, which cannot be corrected by post hoc application of batch effect correction methods. In this work, we developed a new approach that address the batch effect issue in the preprocessing stage, resulting in better peak detection, alignment and quantification. It can be combined with down-stream batch effect correction methods to further correct for between-batch intensity differences. The method is implemented in the existing workflow of the apLCMS platform. Analyzing data with multiple batches, both generated from standardized quality control (QC) plasma samples and from real biological studies, the new method resulted in feature tables with better consistency, as well as better down-stream analysis results. The method can be a useful addition to the tools available for large studies involving multiple batches. The method is available as part of the apLCMS package. Download link and instructions are at https://mypage.cuhk.edu.cn/academics/yutianwei/apLCMS/ .",2020-08-17 +32338757,MetagenoNets: comprehensive inference and meta-insights for microbial correlation networks.,"Microbial association networks are frequently used for understanding and comparing community dynamics from microbiome datasets. Inferring microbial correlations for such networks and obtaining meaningful biological insights, however, requires a lengthy data management workflow, choice of appropriate methods, statistical computations, followed by a different pipeline for suitably visualizing, reporting and comparing the associations. The complexity is further increased with the added dimension of multi-group 'meta-data' and 'inter-omic' functional profiles that are often associated with microbiome studies. This not only necessitates the need for categorical networks, but also integrated and bi-partite networks. Multiple options of network inference algorithms further add to the efforts required for performing correlation-based microbiome interaction studies. We present MetagenoNets, a web-based application, which accepts multi-environment microbial abundance as well as functional profiles, intelligently segregates 'continuous and categorical' meta-data and allows inference as well as visualization of categorical, integrated (inter-omic) and bi-partite networks. Modular structure of MetagenoNets ensures logical flow of analysis (inference, integration, exploration and comparison) in an intuitive and interactive personalized dashboard driven framework. Dynamic choice of filtration, normalization, data transformation and correlation algorithms ensures, that end-users get a one-stop solution for microbial network analysis. MetagenoNets is freely available at https://web.rniapps.net/metagenonets.",2020-07-01 +33304948,Combined retinal proteome datasets in response to atropine treatment using iTRAQ and SWATH-MS based proteomics approaches in guinea pig myopia model.,"Atropine, a non-selective muscarinic antagonist, is known to slow down myopia progression in human adolescents and in several animal models. However, its underlying molecular mechanism is unclear. The present work built a monocular form-deprivation myopia (FDM) guinea pig model, using facemasks as well as atropine treatment on FDM eyes for 2 and 4 weeks. Retinal protein changes in response to the FDM and effects of topical administration of atropine were screened for the two periods using fractionated isobaric tags for a relative and absolute quantification (iTRAQ) approach coupled with nano-liquid chromatography-tandem mass spectrometry (nano-LC-MS/MS) (n=24, 48 eyes). Retinal tissues from another cohort receiving 4-weeks FDM with atropine treatment (n=12, 24 eyes) with more significant changes were subjected to sequential window acquisition of all theoretical mass spectra (SWATH-MS) proteomics for further protein target confirmation. A total of 1695 proteins (8875 peptides) and 5961 proteins (51871 peptides) were identified using iTRAQ and SWATH approaches, respectively. Using the Paragon algorithm in the ProteinPilotTM software, the three most significantly up-regulated and down-regulated proteins that were commonly found in both ITRAQ and SWATH experiments are presented. All raw data generated from the work were submitted and published in the Peptide Atlas public repository (http://www.peptideatlas.org/) for general release (Data ID PASS01507).",2020-11-17 +32609328,Network analysis of synonymous codon usage.,"

Motivation

Most amino acids are encoded by multiple synonymous codons, some of which are used more rarely than others. Analyses of positions of such rare codons in protein sequences revealed that rare codons can impact co-translational protein folding and that positions of some rare codons are evolutionarily conserved. Analyses of their positions in protein 3-dimensional structures, which are richer in biochemical information than sequences alone, might further explain the role of rare codons in protein folding.

Results

We model protein structures as networks and use network centrality to measure the structural position of an amino acid. We first validate that amino acids buried within the structural core are network-central, and those on the surface are not. Then, we study potential differences between network centralities and thus structural positions of amino acids encoded by conserved rare, non-conserved rare and commonly used codons. We find that in 84% of proteins, the three codon categories occupy significantly different structural positions. We examine protein groups showing different codon centrality trends, i.e. different relationships between structural positions of the three codon categories. We see several cases of all proteins from our data with some structural or functional property being in the same group. Also, we see a case of all proteins in some group having the same property. Our work shows that codon usage is linked to the final protein structure and thus possibly to co-translational protein folding.

Availability and implementation

https://nd.edu/∼cone/CodonUsage/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +32592462,DeepAntigen: a novel method for neoantigen prioritization via 3D genome and deep sparse learning.,"

Motivation

The mutations of cancers can encode the seeds of their own destruction, in the form of T-cell recognizable immunogenic peptides, also known as neoantigens. It is computationally challenging, however, to accurately prioritize the potential neoantigen candidates according to their ability of activating the T-cell immunoresponse, especially when the somatic mutations are abundant. Although a few neoantigen prioritization methods have been proposed to address this issue, advanced machine learning model that is specifically designed to tackle this problem is still lacking. Moreover, none of the existing methods considers the original DNA loci of the neoantigens in the perspective of 3D genome which may provide key information for inferring neoantigens' immunogenicity.

Results

In this study, we discovered that DNA loci of the immunopositive and immunonegative MHC-I neoantigens have distinct spatial distribution patterns across the genome. We therefore used the 3D genome information along with an ensemble pMHC-I coding strategy, and developed a group feature selection-based deep sparse neural network model (DNN-GFS) that is optimized for neoantigen prioritization. DNN-GFS demonstrated increased neoantigen prioritization power comparing to existing sequence-based approaches. We also developed a webserver named deepAntigen (http://yishi.sjtu.edu.cn/deepAntigen) that implements the DNN-GFS as well as other machine learning methods. We believe that this work provides a new perspective toward more accurate neoantigen prediction which eventually contribute to personalized cancer immunotherapy.

Availability and implementation

Data and implementation are available on webserver: http://yishi.sjtu.edu.cn/deepAntigen.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +31337975,SAROTUP: a suite of tools for finding potential target-unrelated peptides from phage display data.,"SAROTUP (Scanner And Reporter Of Target-Unrelated Peptides) 3.1 is a significant upgrade to the widely used SAROTUP web server for the rapid identification of target-unrelated peptides (TUPs) in phage display data. At present, SAROTUP has gathered a suite of tools for finding potential TUPs and other purposes. Besides the TUPScan, the motif-based tool, and three tools based on the BDB database, i.e., MimoScan, MimoSearch, and MimoBlast, three predictors based on support vector machine, i.e., PhD7Faster, SABinder and PSBinder, are integrated into SAROTUP. The current version of SAROTUP contains 27 TUP motifs and 823 TUP sequences. We also developed the standalone SAROTUP application with graphical user interface (GUI) and command line versions for processing deep sequencing phage display data and distributed it as an open source package, which can perform perfectly locally on almost all systems that support C++ with little or no modification. The web interfaces of SAROTUP have also been redesigned to be more self-evident and user-friendly. The latest version of SAROTUP is freely available at http://i.uestc.edu.cn/sarotup3.",2019-06-02 +30274705,"DynBench3D, a Web-Resource to Dynamically Generate Benchmark Sets of Large Heteromeric Protein Complexes.","Multi-protein machines are responsible for most cellular tasks, and many efforts have been invested in the systematic identification and characterization of thousands of these macromolecular assemblies. However, unfortunately, the (quasi) atomic details necessary to understand their function are available only for a tiny fraction of the known complexes. The computational biology community is developing strategies to integrate structural data of different nature, from electron microscopy to X-ray crystallography, to model large molecular machines, as it has been done for individual proteins and interactions with remarkable success. However, unlike for binary interactions, there is no reliable gold-standard set of three-dimensional (3D) complexes to benchmark the performance of these methodologies and detect their limitations. Here, we present a strategy to dynamically generate non-redundant sets of 3D heteromeric complexes with three or more components. By changing the values of sequence identity and component overlap between assemblies required to define complex redundancy, we can create sets of representative complexes with known 3D structure (i.e., target complexes). Using an identity threshold of 20% and imposing a fraction of component overlap of <0.5, we identify 495 unique target complexes, which represent a real non-redundant set of heteromeric assemblies with known 3D structure. Moreover, for each target complex, we also identify a set of assemblies, of varying degrees of identity and component overlap, that can be readily used as input in a complex modeling exercise (i.e., template subcomplexes). We hope that resources like this will significantly help the development and progress assessment of novel methodologies, as docking benchmarks and blind prediction contests did. The interactive resource is accessible at https://DynBench3D.irbbarcelona.org.",2018-09-22 +29843599,JCDSA: a joint covariate detection tool for survival analysis on tumor expression profiles.,"

Background

Survival analysis on tumor expression profiles has always been a key issue for subsequent biological experimental validation. It is crucial how to select features which closely correspond to survival time. Furthermore, it is important how to select features which best discriminate between low-risk and high-risk group of patients. Common features derived from the two aspects may provide variable candidates for prognosis of cancer.

Results

Based on the provided two-step feature selection strategy, we develop a joint covariate detection tool for survival analysis on tumor expression profiles. Significant features, which are not only consistent with survival time but also associated with the categories of patients with different survival risks, are chosen. Using the miRNA expression data (Level 3) of 548 patients with glioblastoma multiforme (GBM) as an example, miRNA candidates for prognosis of cancer are selected. The reliability of selected miRNAs using this tool is demonstrated by 100 simulations. Furthermore, It is discovered that significant covariates are not directly composed of individually significant variables.

Conclusions

Joint covariate detection provides a viewpoint for selecting variables which are not individually but jointly significant. Besides, it helps to select features which are not only consistent with survival time but also associated with prognosis risk. The software is available at http://bio-nefu.com/resource/jcdsa .",2018-05-29 +32442274,"NanoSPC: a scalable, portable, cloud compatible viral nanopore metagenomic data processing pipeline.","Metagenomic sequencing combined with Oxford Nanopore Technology has the potential to become a point-of-care test for infectious disease in public health and clinical settings, providing rapid diagnosis of infection, guiding individual patient management and treatment strategies, and informing infection prevention and control practices. However, publicly available, streamlined, and reproducible pipelines for analyzing Nanopore metagenomic sequencing data are still lacking. Here we introduce NanoSPC, a scalable, portable and cloud compatible pipeline for analyzing Nanopore sequencing data. NanoSPC can identify potentially pathogenic viruses and bacteria simultaneously to provide comprehensive characterization of individual samples. The pipeline can also detect single nucleotide variants and assemble high quality complete consensus genome sequences, permitting high-resolution inference of transmission. We implement NanoSPC using Nextflow manager within Docker images to allow reproducibility and portability of the analysis. Moreover, we deploy NanoSPC to our scalable pathogen pipeline platform, enabling elastic computing for high throughput Nanopore data on HPC cluster as well as multiple cloud platforms, such as Google Cloud, Amazon Elastic Computing Cloud, Microsoft Azure and OpenStack. Users could either access our web interface (https://nanospc.mmmoxford.uk) to run cloud-based analysis, monitor process, and visualize results, as well as download Docker images and run command line to analyse data locally.",2020-07-01 +32479517,Ten tips for a text-mining-ready article: How to improve automated discoverability and interpretability.,"Data-driven research in biomedical science requires structured, computable data. Increasingly, these data are created with support from automated text mining. Text-mining tools have rapidly matured: although not perfect, they now frequently provide outstanding results. We describe 10 straightforward writing tips-and a web tool, PubReCheck-guiding authors to help address the most common cases that remain difficult for text-mining tools. We anticipate these guides will help authors' work be found more readily and used more widely, ultimately increasing the impact of their work and the overall benefit to both authors and readers. PubReCheck is available at http://www.ncbi.nlm.nih.gov/research/pubrecheck.",2020-06-01 +28365733,miRnalyze: an interactive database linking tool to unlock intuitive microRNA regulation of cell signaling pathways. ,"The various pathophysiological processes occurring in living systems are known to be orchestrated by delicate interplays and cross-talks between different genes and their regulators. Among the various regulators of genes, there is a class of small non-coding RNA molecules known as microRNAs. Although, the relative simplicity of miRNAs and their ability to modulate cellular processes make them attractive therapeutic candidates, their presence in large numbers make it challenging for experimental researchers to interpret the intricacies of the molecular processes they regulate. Most of the existing bioinformatic tools fail to address these challenges. Here, we present a new web resource 'miRnalyze' that has been specifically designed to directly identify the putative regulation of cell signaling pathways by miRNAs. The tool integrates miRNA-target predictions with signaling cascade members by utilizing TargetScanHuman 7.1 miRNA-target prediction tool and the KEGG pathway database, and thus provides researchers with in-depth insights into modulation of signal transduction pathways by miRNAs. miRnalyze is capable of identifying common miRNAs targeting more than one gene in the same signaling pathway-a feature that further increases the probability of modulating the pathway and downstream reactions when using miRNA modulators. Additionally, miRnalyze can sort miRNAs according to the seed-match types and TargetScan Context ++ score, thus providing a hierarchical list of most valuable miRNAs. Furthermore, in order to provide users with comprehensive information regarding miRNAs, genes and pathways, miRnalyze also links to expression data of miRNAs (miRmine) and genes (TiGER) and proteome abundance (PaxDb) data. To validate the capability of the tool, we have documented the correlation of miRnalyze's prediction with experimental confirmation studies. http://www.mirnalyze.in.",2017-01-01 +32392343,Oviz-Bio: a web-based platform for interactive cancer genomics data visualization.,"Genetics data visualization plays an important role in the sharing of knowledge from cancer genome research. Many types of visualization are widely used, most of which are static and require sufficient coding experience to create. Here, we present Oviz-Bio, a web-based platform that provides interactive and real-time visualizations of cancer genomics data. Researchers can interactively explore visual outputs and export high-quality diagrams. Oviz-Bio supports a diverse range of visualizations on common cancer mutation types, including annotation and signatures of small scale mutations, haplotype view and focal clusters of copy number variations, split-reads alignment and heatmap view of structural variations, transcript junction of fusion genes and genomic hotspot of oncovirus integrations. Furthermore, Oviz-Bio allows landscape view to investigate multi-layered data in samples cohort. All Oviz-Bio visual applications are freely available at https://bio.oviz.org/.",2020-07-01 +35135136,"Precision Trial Drawer, a Computational Tool to Assist Planning of Genomics-Driven Trials in Oncology.","

Purpose

Trials that accrue participants on the basis of genetic biomarkers are a powerful means of testing targeted drugs, but they are often complicated by the rarity of the biomarker-positive population. Umbrella trials circumvent this by testing multiple hypotheses to maximize accrual. However, bigger trials have higher chances of conflicting treatment allocations because of the coexistence of multiple actionable alterations; allocation strategies greatly affect the efficiency of enrollment and should be carefully planned on the basis of relative mutation frequencies, leveraging information from large sequencing projects.

Methods

We developed software named Precision Trial Drawer (PTD) to estimate parameters that are useful for designing precision trials, most importantly, the number of patients needed to molecularly screen (NNMS) and the allocation rule that maximizes patient accrual on the basis of mutation frequency, systematically assigning patients with conflicting allocations to the drug associated with the rarer mutation. We used data from The Cancer Genome Atlas to show their potential in a 10-arm imaginary trial of multiple cancers on the basis of genetic alterations suggested by the past Molecular Analysis for Personalised Therapy (MAP) conference. We validated PTD predictions versus real data from the SHIVA (A Randomized Phase II Trial Comparing Therapy Based on Tumor Molecular Profiling Versus Conventional Therapy in Patients With Refractory Cancer) trial.

Results

In the MAP imaginary trial, PTD-optimized allocation reduces number of patients needed to molecularly screen by up to 71.8% (3.5 times) compared with nonoptimal trial designs. In the SHIVA trial, PTD correctly predicted the fraction of patients with actionable alterations (33.51% [95% CI, 29.4% to 37.6%] in imaginary v 32.92% [95% CI, 28.2% to 37.6%] expected) and allocation to specific treatment groups (RAS/MEK, PI3K/mTOR, or both).

Conclusion

PTD correctly predicts crucial parameters for the design of multiarm genetic biomarker-driven trials. PTD is available as a package in the R programming language and as an open-access Web-based app. It represents a useful resource for the community of precision oncology trialists. The Web-based app is available at https://gmelloni.github.io/ptd/shinyapp.html.",2018-11-01 +,T189. PEPTIDE SHARING BETWEEN SCHIZOPHRENIA-RELATED PROTEINS AND THE INFLUENZA A VIRUS MAY OFFER A WINDOW INTO THE IMMUNE AETIOLOGY OF PSYCHOTIC DISORDERS,"Abstract

Background

Schizophrenia is a complex disorder in which infection and immune mechanisms are thought to play a role. Epidemiological and ecological studies have implicated influenza infection in particular and it is possible that cross-reactivity, or molecular mimicry, between the influenza virus and brain proteins underlies this association. Proteins might share amino acid sequences, which could thus provide the basis for an autoimmune response that targets endogenous proteins. This study is the first to characterise sequence alignment between schizophrenia-related brain proteins and the proteome of the influenza A virus, and comparing it with sequence alignment in proteins not implicated in schizophrenia.

Methods

The software Peptide Match Service (https://research.bioinformatics.udel.edu/peptidematch/index.jsp; Protein Information Resource, University of Delaware and Georgetown University Medical Center) was used to obtain sequence alignments between protein sequences. A case-control study design was used to compare schizophrenia-related proteins to proteins not involved in schizophrenia. Schizophrenia-related proteins were operationalised as proteins found significant in the Psychiatric Genomics Consortium schizophrenia genome-wide association studies (GWAS). The control group consisted of null proteins (p-value > .75) in the GWAS. Null proteins were also selected to represent genes expressed in tissues other than central nervous system tissues. Both groups were equalised for the total amino acid count. Perfect pentapeptide matches (i.e. 5 amino acids) in proteins and the influenza proteome were explored.

Results

There was a link between schizophrenia-related (GWAS-significant) proteins and presence of perfect matches between proteins and the influenza proteins polymerase acidic protein (χ2 (1) = 5.284, p = .022, two-sided) and RNA-directed RNA polymerase catalytic subunit (χ2 (1) = 6.132, p = .013, two-sided). Pentapeptide-sharing was found to be highly significant between schizophrenia-related proteins and the hemagglutinin precursor (χ2 (1) = 17.723, p = .000026, two-sided). There was no significant difference (p > .05) between schizophrenia-related proteins and proteins not implicated in schizophrenia (GWAS-null proteins) in the frequency of proteins having perfect matches with the influenza A proteins PB2-S1, polymerase basic protein 2, matrix protein 1 and 2, and neuraminidase. However, the result for matrix protein 1 approached statistical significance (χ2 (1) = 3.319, p = .068, two-sided).

Discussion

We find evidence to suggest there is significant overlap between the linear structures of proteins involved in schizophrenia and those integral to the influenza virus. Future research should establish the biological relevance of this finding, particularly regarding the antigenicity of the peptide sequences which we have identified. Extra studies should also go beyond sequences and address structural homologies. Future research could assess whether an immune reaction against particular schizophrenia-related proteins is a plausible mechanism contributing to psychotic disorders. Also, exploring peptide sharing in different influenza strains could offer insights into links between influenza pandemics, maternal infection, and psychosis. Elucidating peptide sharing might have implications for schizophrenia risk management and safe influenza prevention.",2018-04-01 +33190499,LigMate: A Multifeature Integration Algorithm for Ligand-Similarity-Based Virtual Screening.,"Ligand-similarity-based virtual screening is one of the most applicable computer-aided drug design techniques. The current methodology relies heavily on several descriptors of molecular features, including atoms (zero-dimensional, 0D), the presence or absence of structural features (one-dimensional, 1D), topological descriptors (two-dimensional, 2D), geometry and volume (three-dimensional, 3D), or stereoelectronic and stereodynamic properties (four-dimensional, 4D). These descriptors have been frequently used in virtual screening; however, they are usually used independently without integration, which may hinder effective and precise virtual screening. In this study, we developed a multifeature integration algorithm named LigMate, which employs a Hungarian algorithm-based matching and a machine learning-based nonlinear combination of various descriptors, including the new relevant descriptors focusing on the maximum common substructures (maximum common substructure score, MCSS), the relative distance of atoms from the ligand mass center (intraligand distance score, ILDS), as well as the ring differences (ring score, RS). In the benchmark tests, LigMate achieved an overall enrichment factor of the first percent (EF1) of 36.14 and an area under the curve (AUC) value of 0.81 on the DUD-E data set, as well as an EF1 of 15.44 and an AUC of 0.69 on the maximum unbiased validation (MUV) data set, outperforming the control methods that are based on single descriptors. Thus, our study provides a new framework for multiple feature integration, which can benefit ligand-similarity-based virtual screening. LigMate is freely available for noncommercial users at http://cao.labshare.cn/ligmate/.",2020-11-16 +33492704,PRIASE 2021 guidelines for reporting animal studies in Endodontology: explanation and elaboration.,"Laws and ethics require that before conducting human clinical trials, a new material, device or drug may have to undergo testing in animals in order to minimize health risks to humans, unless suitable supporting grandfather data already exist. The Preferred Reporting Items for Animal Studies in Endodontology (PRIASE) 2021 guidelines were developed exclusively for the specialty of Endodontology by integrating and adapting the ARRIVE (Animals in Research: Reporting In Vivo Experiments) guidelines and the Clinical and Laboratory Images in Publications (CLIP) principles using a validated consensus-based methodology. Implementation of the PRIASE 2021 guidelines will reduce potential sources of bias and thus improve the quality, accuracy, reproducibility, completeness and transparency of reports describing animal studies in Endodontology. The PRIASE 2021 guidelines consist of a checklist with 11 domains and 43 individual items and a flowchart. The aim of the current document is to provide an explanation for each item in the PRIASE 2021 checklist and flowchart and is supplemented with examples from the literature in order for readers to understand their significance and to provide usage guidance. A link to the PRIASE 2021 explanation and elaboration document and PRIASE 2021 checklist and flowchart is available on the Preferred Reporting Items for study Designs in Endodontology (PRIDE) website (http://pride-endodonticguidelines.org/priase/).",2021-03-07 +32315392,LeafCutterMD: an algorithm for outlier splicing detection in rare diseases.,"

Motivation

Next-generation sequencing is rapidly improving diagnostic rates in rare Mendelian diseases, but even with whole genome or whole exome sequencing, the majority of cases remain unsolved. Increasingly, RNA sequencing is being used to solve many cases that evade diagnosis through sequencing alone. Specifically, the detection of aberrant splicing in many rare disease patients suggests that identifying RNA splicing outliers is particularly useful for determining causal Mendelian disease genes. However, there is as yet a paucity of statistical methodologies to detect splicing outliers.

Results

We developed LeafCutterMD, a new statistical framework that significantly improves the previously published LeafCutter in the context of detecting outlier splicing events. Through simulations and analysis of real patient data, we demonstrate that LeafCutterMD has better power than the state-of-the-art methodology while controlling false-positive rates. When applied to a cohort of disease-affected probands from the Mayo Clinic Center for Individualized Medicine, LeafCutterMD recovered all aberrantly spliced genes that had previously been identified by manual curation efforts.

Availability and implementation

The source code for this method is available under the opensource Apache 2.0 license in the latest release of the LeafCutter software package available online at http://davidaknowles.github.io/leafcutter.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-11-01 +33594534,Fishing in a Puddle of Doubt and Disbelief?: A Rejoinder to the Speed et al. Commentary.,"In the article ""Religiously/Spiritually Involved, but in Doubt or Disbelief-Why? Healthy?"", Mrdjenovich (in J Relig Health  https://doi.org/10.1007/s10943-018-0711-2 , 2018) explored the practices of religious attendance and prayer among atheists and agnostic theists. Speed et al. (in J Relig Health https://doi.org/10.1007/s10943-020-01109-1 , 2020) offered a commentary regarding Mrdjenovich's (2018) article with attention to moderators of associations between religious/spiritual constructs and health outcomes. In this rejoinder, I review Speed et al.'s (2020) commentary and I identify a number of concerns, both with their observations and ostensive oversights involving qualitative research methodology, the utility of survey data, the domain of belief, and the impact of calls for a pluralistic approach in the religion-heath research field. I conclude that Mrdjenovich does not misunderstand mechanisms of the (non)religion-health relationship as much as Speed et al. seem to misinterpret Mrdjenovich's (2018) purpose, perspective, and default position on the issues. I reiterate that a concerted effort is required to study health outcomes among religious minorities.",2021-02-16 +35117422,High-mobility group A1 (HMGA1) gene expressions in various colorectal cancer cell lines and correlation with prognosis.,"

Background

The high-mobility group A1 gene (HMGA1) plays a major role in the development of malignant cancers. However, the mechanisms underlying the correlation between HMGA1 expression level and patients' overall survival rate in various malignant cancers is unclear.

Methods

We used The Cancer Genome Atlas (TCGA) database (https://genome-cancer.ucsc.edu/) to search for mRNA expression levels of HMGA1 in tumor patients and grouped them by receiver operating characteristic (ROC) curve. This divided patients into a high expression cohort and low expression cohort, and Kaplan-Meier analysis revealed the overall survival of the cancer patients. We also used real-time quantitative PCR (qPCR) to detect the expression of HMGA1, CBX7, E-cadherin, and β-catenin gene was detected by normalized to the expression of β-actin in colorectal cancer cell lines.

Results

High expression group correlated with worse survival prognosis statistically significant (P<0.05), and scatter plots showed HMGA1 high expression in the different cancers (lung cancers; lung adenocarcinoma and lung squamous cell carcinoma; stomach and colorectal cancers; liver and pancreatic cancer; kidney papillary cell carcinoma; kidney clear cell carcinoma, brain lower grade glioma; adrenocortical cancer; acute myeloid leukemia; and sarcoma; head and neck squamous cell carcinoma, cholangio and bladder urothelial cancers). Further, we also found that the mRNA expressions of HMGA1, CBX7, E-cadherin, and β-catenin genes significantly in colorectal cancer cell lines (P value: 0.0005), consistent with the results of HMGA1 in TCGA database.

Conclusions

HMGA1 is highly expressed in various cancers than normal tissues, and high expression levels of HMGA1 correlated with a worse prognosis. The gene expressions and the TCGA data clearly supports that targeting HMGA1 in the management of cancers increases the survival rate of cancer patients.",2020-02-01 +32343490,QIIME 2 Enables Comprehensive End-to-End Analysis of Diverse Microbiome Data and Comparative Studies with Publicly Available Data.,"QIIME 2 is a completely re-engineered microbiome bioinformatics platform based on the popular QIIME platform, which it has replaced. QIIME 2 facilitates comprehensive and fully reproducible microbiome data science, improving accessibility to diverse users by adding multiple user interfaces. QIIME 2 can be combined with Qiita, an open-source web-based platform, to re-use available data for meta-analysis. The following basic protocol describes how to install QIIME 2 on a single computer and analyze microbiome sequence data, from processing of raw DNA sequence reads through generating publishable interactive figures. These interactive figures allow readers of a study to interact with data with the same ease as its authors, advancing microbiome science transparency and reproducibility. We also show how plug-ins developed by the community to add analysis capabilities can be installed and used with QIIME 2, enhancing various aspects of microbiome analyses-e.g., improving taxonomic classification accuracy. Finally, we illustrate how users can perform meta-analyses combining different datasets using readily available public data through Qiita. In this tutorial, we analyze a subset of the Early Childhood Antibiotics and the Microbiome (ECAM) study, which tracked the microbiome composition and development of 43 infants in the United States from birth to 2 years of age, identifying microbiome associations with antibiotic exposure, delivery mode, and diet. For more information about QIIME 2, see https://qiime2.org. To troubleshoot or ask questions about QIIME 2 and microbiome analysis, join the active community at https://forum.qiime2.org. © 2020 The Authors. Basic Protocol: Using QIIME 2 with microbiome data Support Protocol: Further microbiome analyses.",2020-06-01 +26248563,ITS2 Database V: Twice as Much.,"The internal transcribed spacer 2 (ITS2) is a well-established marker for phylogenetic analyses in eukaryotes. A reliable resource for reference sequences and their secondary structures is the ITS2 database (http://its2.bioapps.biozentrum.uni-wuerzburg.de/). However, the database was last updated in 2011. Here, we present a major update of the underlying data almost doubling the number of entities. This increases the number of taxa represented within all major eukaryotic clades. Moreover, additional data has been added to underrepresented groups and some new groups have been added. The broader coverage across the tree of life improves phylogenetic analyses and the capability of ITS2 as a DNA barcode.",2015-08-06 +31420998,The N-terminal peptide of the transglutaminase-activating metalloprotease inhibitor from Streptomyces mobaraensis accommodates both inhibition and glutamine cross-linking sites.,"Streptomyces mobaraensis is a key player for the industrial production of the protein cross-linking enzyme microbial transglutaminase (MTG). Extra-cellular activation of MTG by the transglutaminase-activating metalloprotease (TAMP) is regulated by the TAMP inhibitory protein SSTI that belongs to the large Streptomyces subtilisin inhibitor (SSI) family. Despite decades of SSI research, the binding site for metalloproteases such as TAMP remained elusive in most of the SSI proteins. Moreover, SSTI is a MTG substrate, and the preferred glutamine residues for SSTI cross-linking are not determined. To address both issues, that is, determination of the TAMP and the MTG glutamine binding sites, SSTI was modified by distinct point mutations as well as elongation or truncation of the N-terminal peptide by six and three residues respectively. Structural integrity of the mutants was verified by the determination of protein melting points and supported by unimpaired subtilisin inhibitory activity. While exchange of single amino acids could not disrupt decisively the SSTI TAMP interaction, the N-terminally shortened variants clearly indicated the highly conserved Leu40-Tyr41 as binding motif for TAMP. Moreover, enzymatic biotinylation revealed that an adjacent glutamine pair, upstream from Leu40-Tyr41 in the SSTI precursor protein, is the preferred binding site of MTG. This extension peptide disturbs the interaction with TAMP. The structure of SSTI was furthermore determined by X-ray crystallography. While no structural data could be obtained for the N-terminal peptide due to flexibility, the core structure starting from Tyr41 could be determined and analysed, which superposes well with SSI-family proteins. ENZYMES: Chymotrypsin, EC3.4.21.1; griselysin (SGMPII, SgmA), EC3.4.24.27; snapalysin (ScNP), EC3.4.24.77; streptogrisin-A (SGPA), EC3.4.21.80; streptogrisin-B (SGPB), EC3.4.21.81; subtilisin BPN', EC3.4.21.62; transglutaminase, EC2.3.2.13; transglutaminase-activating metalloprotease (TAMP), EC3.4.-.-; tri-/tetrapeptidyl aminopeptidase, EC3.4.11.-; trypsin, EC3.4.21.4. DATABASES: The atomic coordinates and structure factors (PDB 6I0I) have been deposited in the Protein Data Bank (http://www.rcsb.org).",2019-08-29 +33887150,"The Neural Circuitry Underlying the ""Rhythm Effect"" in Stuttering.","Purpose Stuttering is characterized by intermittent speech disfluencies, which are dramatically reduced when speakers synchronize their speech with a steady beat. The goal of this study was to characterize the neural underpinnings of this phenomenon using functional magnetic resonance imaging. Method Data were collected from 16 adults who stutter and 17 adults who do not stutter while they read sentences aloud either in a normal, self-paced fashion or paced by the beat of a series of isochronous tones (""rhythmic""). Task activation and task-based functional connectivity analyses were carried out to compare neural responses between speaking conditions and groups after controlling for speaking rate. Results Adults who stutter produced fewer disfluent trials in the rhythmic condition than in the normal condition. Adults who stutter did not have any significant changes in activation between the rhythmic condition and the normal condition, but when groups were collapsed, participants had greater activation in the rhythmic condition in regions associated with speech sequencing, sensory feedback control, and timing perception. Adults who stutter also demonstrated increased functional connectivity among cerebellar regions during rhythmic speech as compared to normal speech and decreased connectivity between the left inferior cerebellum and the left prefrontal cortex. Conclusions Modulation of connectivity in the cerebellum and prefrontal cortex during rhythmic speech suggests that this fluency-inducing technique activates a compensatory timing system in the cerebellum and potentially modulates top-down motor control and attentional systems. These findings corroborate previous work associating the cerebellum with fluency in adults who stutter and indicate that the cerebellum may be targeted to enhance future therapeutic interventions. Supplemental Material https://doi.org/10.23641/asha.14417681.",2021-04-22 +28038678,Human splicing diversity and the extent of unannotated splice junctions across human RNA-seq samples on the Sequence Read Archive.,"

Background

Gene annotations, such as those in GENCODE, are derived primarily from alignments of spliced cDNA sequences and protein sequences. The impact of RNA-seq data on annotation has been confined to major projects like ENCODE and Illumina Body Map 2.0.

Results

We aligned 21,504 Illumina-sequenced human RNA-seq samples from the Sequence Read Archive (SRA) to the human genome and compared detected exon-exon junctions with junctions in several recent gene annotations. We found 56,861 junctions (18.6%) in at least 1000 samples that were not annotated, and their expression associated with tissue type. Junctions well expressed in individual samples tended to be annotated. Newer samples contributed few novel well-supported junctions, with the vast majority of detected junctions present in samples before 2013. We compiled junction data into a resource called intropolis available at http://intropolis.rail.bio . We used this resource to search for a recently validated isoform of the ALK gene and characterized the potential functional implications of unannotated junctions with publicly available TRAP-seq data.

Conclusions

Considering only the variation contained in annotation may suffice if an investigator is interested only in well-expressed transcript isoforms. However, genes that are not generally well expressed and nonetheless present in a small but significant number of samples in the SRA are likelier to be incompletely annotated. The rate at which evidence for novel junctions has been added to the SRA has tapered dramatically, even to the point of an asymptote. Now is perhaps an appropriate time to update incomplete annotations to include splicing present in the now-stable snapshot provided by the SRA.",2016-12-30 +32966098,"Interrater Reliability for a Two-Interval, Observer-Based Procedure for Measuring Hearing in Young Children.","Purpose To overcome methodology limitations for studying auditory development in young children, we have recently developed an observer-based procedure that uses a conditioned, play-based, motor response (see Bonino & Leibold, 2017). The purpose of this article was to examine interrater reliability for the method. Method Video recordings of test sessions of 2- to 4-year-old children (n = 17) were examined. Detection of a 1000-Hz warble tone was measured with the Play Observer-Based, Two-Interval (PlayO2I) method in each of two conditions: for a fixed intensity level (30 dB SPL) or for a variable intensity level signal (0-30 dB SPL). All test sessions were scored independently by three observers (one real-time, two offline). Observer consensus was evaluated with Fleiss' kappa statistic. To determine if summary data were similar across the observers of each test session, the proportion of correct trials (fixed-level condition) or threshold (variable-level condition) were computed. Results The strength of observer consensus was classified as ""almost perfect"" and ""substantial"" for the fixed-level and variable-level conditions, respectively. Follow-up analysis of the variable-level data indicated that differences in observer consensus were seen based on the signal level, the type of response behavior provided by the child, and the confidence level of the real-time observer. Resulting summary data were similar across the three observers of each test session: no significant differences for estimates of the proportion of correct trials or threshold. Conclusions Results from this study confirm strong interrater reliability for the method. The PlayO2I method is a powerful tool for measuring detection and discrimination abilities in young children. Supplemental Material https://doi.org/10.23641/asha.12978197.",2020-09-23 +27189608,A web resource for mining HLA associations with adverse drug reactions: HLA-ADR. ,"Human leukocyte antigens (HLA) are an important family of genes involved in the immune system. Their primary function is to allow the host immune system to be able to distinguish between self and non-self peptides-e.g. derived from invading pathogens. However, these genes have also been implicated in immune-mediated adverse drug reactions (ADRs), presenting a problem to patients, clinicians and pharmaceutical companies. We have previously developed the Allele Frequency Net Database (AFND) that captures the allelic and haplotype frequencies for these HLA genes across many healthy populations from around the world. Here, we report the development and release of the HLA-ADR database that captures data from publications where HLA alleles and haplotypes have been associated with ADRs (e.g. Stevens-Johnson Syndrome/toxic epidermal necrolysis and drug-induced liver injury). HLA-ADR was created by using data obtained through systematic review of the literature and semi-automated literature mining. The database also draws on data already present in AFND allowing users to compare and analyze allele frequencies in both ADR patients and healthy populations. The HLA-ADR database provides clinicians and researchers with a centralized resource from which to investigate immune-mediated ADRs.Database URL: http://www.allelefrequencies.net/hla-adr/.",2016-05-17 +34001030,A web-based dynamic Nomogram for predicting instrumental activities of daily living disability in older adults: a nationally representative survey in China.,"

Background

Instrumental Activities of Daily Living (IADL) disability is a common health burden in aging populations. The identification of high-risk individuals is essential for timely targeted interventions. Although predictors for IADL disability have been well described, studies constructing prediction tools for IADL disability among older adults were not adequately explored. Our study aims to develop and validate a web-based dynamic nomogram for individualized IADL disability prediction in older adults.

Methods

Data were obtained from the China Health and Retirement Longitudinal Study (CHARLS). We included 4791 respondents aged 60 years and over, without IADL disability at baseline in the 2011 to 2013 cohort (training cohort) and 371 respondents in the 2013 to 2015 cohort (validation cohort). Here, we defined IADL disability as needing any help in any items of the Lawton and Brody's scale. A web-based dynamic nomogram was built based on a logistic regression model in the training cohort. We validated the nomogram internally with 1000 bootstrap resamples and externally in the validation cohort. The discrimination and calibration ability of the nomogram was assessed using the concordance index (C-index) and calibration plots, respectively.

Results

The nomogram incorporated ten predictors, including age, education level, social activity frequency, drinking frequency, smoking frequency, comorbidity condition, self-report health condition, gait speed, cognitive function, and depressive symptoms. The C-index values in the training and validation cohort were 0.715 (bootstrap-corrected C-index = 0.702) and 0.737, respectively. The internal and external calibration plots for predictions of IADL disability were in excellent agreement. An online web server was built ( https://lilizhang.shinyapps.io/DynNomapp/ ) to facilitate the use of the nomogram.

Conclusions

We developed a dynamic nomogram to evaluate the risk of IADL disability precisely and expediently. The application of this nomogram would be helpful for health care physicians in decision-making.",2021-05-17 +33532827,RAD: a web application to identify region associated differentially expressed genes. ,"With the advance of genomic sequencing techniques, chromatin accessible regions, transcription factor binding sites and epigenetic modifications can be identified at genome-wide scale. Conventional analyses focus on the gene regulation at proximal regions; however, distal regions are usually less focused, largely due to the lack of reliable tools to link these regions to coding genes. In this study, we introduce RAD (Region Associated Differentially expressed genes), a user-friendly web tool to identify both proximal and distal region associated differentially expressed genes (DEGs). With DEGs and genomic regions of interest (gROI) as input, RAD maps the up- and down-regulated genes associated with any gROI and helps researchers to infer the regulatory function of these regions based on the distance of gROI to differentially expressed genes. RAD includes visualization of the results and statistical inference for significance. RAD is implemented with Python 3.7 and run on a Nginx server. RAD is freely available at http://labw.org/rad as online web service. Supplementary data are available at Bioinformatics online.",2021-02-03 +30407532,MaizeGDB 2018: the maize multi-genome genetics and genomics database.,"Since its 2015 update, MaizeGDB, the Maize Genetics and Genomics database, has expanded to support the sequenced genomes of many maize inbred lines in addition to the B73 reference genome assembly. Curation and development efforts have targeted high quality datasets and tools to support maize trait analysis, germplasm analysis, genetic studies, and breeding. MaizeGDB hosts a wide range of data including recent support of new data types including genome metadata, RNA-seq, proteomics, synteny, and large-scale diversity. To improve access and visualization of data types several new tools have been implemented to: access large-scale maize diversity data (SNPversity), download and compare gene expression data (qTeller), visualize pedigree data (Pedigree Viewer), link genes with phenotype images (MaizeDIG), and enable flexible user-specified queries to the MaizeGDB database (MaizeMine). MaizeGDB also continues to be the community hub for maize research, coordinating activities and providing technical support to the maize research community. Here we report the changes MaizeGDB has made within the last three years to keep pace with recent software and research advances, as well as the pan-genomic landscape that cheaper and better sequencing technologies have made possible. MaizeGDB is accessible online at https://www.maizegdb.org.",2019-01-01 +31752633,"A Systematic Review and Meta-Analysis of Dyadic Psychological Interventions for BPSD, Quality of Life and/or Caregiver Burden in Dementia or MCI.","Objectives: This systematic review and meta-analysis assesses the effectiveness of psychological interventions that involve people with dementia or mild cognitive impairment (MCI) and their informal caregivers, and target improvements in the management of the behavioral and psychological symptoms of dementia (BPSD); quality of life; and/or burden reduction for people with either dementia or MCI and their informal caregivers.Methods: Studies were identified through database searches (Cochrane Library, CENTRAL, CINAHL, EMBASE, MEDLINE and PsychINFO) and clinical trials registers (ClinicalTrials.gov and http://apps.who.int/trialsearch/). Data were pooled for meta-analysis.Results: Database and reference list searches identified 1,878 references, of which fourteen studies were included. Positive effects were found on the anxiety symptoms of people with dementia on the RAID scale; on the quality of life of people with dementia on the self-rated QoL-AD scale; and on informal caregiver burden on the Zarit Burden Interview.Conclusions: Psychological interventions involving whole dyads have some promise for both people with dementia and informal caregivers, but are still far from uniformly effective across BPSD, quality of life, and caregiver burden. Further research directions are discussed.Clinical Implications: The results suggest that clinicians should routinely involve both halves of the dyad when delivering psychological interventions targeting anxiety or quality of life for people with dementia, or burden for informal caregivers.",2019-11-22 +32271863,gplas: a comprehensive tool for plasmid analysis using short-read graphs.,"

Summary

Plasmids can horizontally transmit genetic traits, enabling rapid bacterial adaptation to new environments and hosts. Short-read whole-genome sequencing data are often applied to large-scale bacterial comparative genomics projects but the reconstruction of plasmids from these data is facing severe limitations, such as the inability to distinguish plasmids from each other in a bacterial genome. We developed gplas, a new approach to reliably separate plasmid contigs into discrete components using sequence composition, coverage, assembly graph information and network partitioning based on a pruned network of plasmid unitigs. Gplas facilitates the analysis of large numbers of bacterial isolates and allows a detailed analysis of plasmid epidemiology based solely on short-read sequence data.

Availability and implementation

Gplas is written in R, Bash and uses a Snakemake pipeline as a workflow management system. Gplas is available under the GNU General Public License v3.0 at https://gitlab.com/sirarredondo/gplas.git.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +32434476,An integrative investigation on significant mutations and their down-stream pathways in lung squamous cell carcinoma reveals CUL3/KEAP1/NRF2 relevant subtypes.,"

Background

Molecular mechanism of lung squamous cell carcinoma (LUSC) remains poorly understood, hampering effective targeted therapies or precision diagnosis about LUSC. We devised an integrative framework to investigate on the molecular patterns of LUSC by systematically mining the genomic, transcriptional and clinical information.

Methods

We utilized the genomics and transcriptomics data for the LUSC cohorts in The Cancer Genome Atlas.. Both kinds of omics data for 33 types of cancers were downloaded from The NCI's Genomic Data Commons (GDC) (https://gdc.cancer.gov/about-data/publications/pancanatlas). The genomics data were processed in mutation annotation format (maf), and the transcriptomics data were determined by RNA-seq method. Mutation significance was estimated by MutSigCV. Prognosis analysis was based on the cox proportional hazards regression (Coxph) model.

Results

Significant somatic mutated genes (SMGs) like NFE2L2, RASA1 and COL11A1 and their potential down-stream pathways were recognized. Furthermore, two LUSC-specific and prognosis-meaningful subtypes were identified. Interestingly, the good prognosis subtype was enriched with mutations in CUL3/KEAP1/NRF2 pathway and with markedly suppressed expressions of multiple down-stream pathways like epithelial mesenchymal transition. The subtypes were verified by the other two cohorts. Additionally, primarily regulated down-stream elements of different SMGs were also estimated. NFE2L2, KEAP1 and RASA1 mutations showed remarkable effects on the subtype-determinant gene expressions, especially for the inflammatory relevant genes.

Conclusions

This study supplies valuable references on potential down-stream processes of SMGs and an alternative way to classify LUSC.",2020-05-20 +29499865,Ten-year clinical experience of humanitarian cardiothoracic surgery in Rwanda: Building a platform for ultimate sustainability in a resource-limited setting.,"OBJECTIVE:Despite its near complete eradication in resource-rich countries, rheumatic heart disease remains the most common acquired cardiovascular disease in sub-Saharan Africa. With a ratio of physicians/population of 1 per 10,500, including only 4 cardiologists for a population of 11.4 million, Rwanda represents a resource-limited setting lacking the local capacity to detect and treat early cases of strep throat and perform lifesaving operations for advanced rheumatic heart disease. Humanitarian surgical outreach in this region can improve the delivery of cardiovascular care by providing sustainability through mentorship, medical expertise, training, and knowledge transfer, and ultimately the creation of a cardiac center. METHODS:We describe the experience of consecutive annual visits to Rwanda since 2008 and report the outcomes of a collaborative approach to enable sustainable cardiac surgery in the region. The Ferrans and Powers Quality of Life Index tool's Cardiac Version (http://www.uic.edu/orgs/qli/) was administered to assess the postoperative quality of life. RESULTS:Ten visits have been completed, performing 149 open procedures, including 200 valve implantations, New York Heart Association class III or IV, with 4.7% 30-day mortality. All procedures were performed with the participation of local Rwandan personnel, expatriate physicians, nurses, residents, and support staff. Early complications included cerebrovascular accident (n = 4), hemorrhage requiring reoperation (n = 6), and death (n = 7). Quality of life was assessed to further understand challenges encountered after cardiac surgery in this resource-limited setting. Four major domains were considered: health and functioning, social and economic, psychologic/spiritual, and family. The mean total quality of life index was 20.79 ± 4.07 on a scale from 0 to 30, for which higher scores indicated higher quality of life. Women had significantly lower ""social and economic"" subscores (16.81 ± 4.17) than men (18.64 ± 4.10) (P < .05). Patients who reported receiving their follow-up care in rural health centers also had significantly lower ""social and economic"" subscores (15.67 ± 3.81) when compared with those receiving follow-up care in urban health facilities (18.28 ± 4.16) (P < .005). Value afforded to family and psychologic factors remained high among all groups. Major postsurgical challenges faced included barriers to follow-up and systemic anticoagulation. CONCLUSIONS:This report represents the first account of a long-term humanitarian effort to develop sustainability in cardiac surgery in a resource-limited setting, Rwanda. With the use of volunteer teams to deliver care, transfer knowledge, and mentor local personnel, the results demonstrate superior outcomes and favorable indices of quality of life. The credibility gained over a decade of effort has created the opportunity for a partnership with Rwanda to establish a dedicated center of cardiac care to assist in mitigating the burden of cardiovascular disease throughout sub-Saharan Africa.",2018-02-27 +31598633,SPARSim single cell: a count data simulator for scRNA-seq data.,"

Motivation

Single cell RNA-seq (scRNA-seq) count data show many differences compared with bulk RNA-seq count data, making the application of many RNA-seq pre-processing/analysis methods not straightforward or even inappropriate. For this reason, the development of new methods for handling scRNA-seq count data is currently one of the most active research fields in bioinformatics. To help the development of such new methods, the availability of simulated data could play a pivotal role. However, only few scRNA-seq count data simulators are available, often showing poor or not demonstrated similarity with real data.

Results

In this article we present SPARSim, a scRNA-seq count data simulator based on a Gamma-Multivariate Hypergeometric model. We demonstrate that SPARSim allows to generate count data that resemble real data in terms of count intensity, variability and sparsity, performing comparably or better than one of the most used scRNA-seq simulator, Splat. In particular, SPARSim simulated count matrices well resemble the distribution of zeros across different expression intensities observed in real count data.

Availability and implementation

SPARSim R package is freely available at http://sysbiobig.dei.unipd.it/? q=SPARSim and at https://gitlab.com/sysbiobig/sparsim.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +33823117,Exploring Parental Perspectives of Childhood Speech and Language Disorders Across 10 Countries: A Pilot Qualitative Study.,"Purpose Although researchers have explored parental perspectives of childhood speech and language disorders, most studies have been conducted in English-speaking countries. Little is known about parental experiences across countries, where procedures of language screening and services for language disorders differ. The authors participated in the COST 1 Action network IS1406, ""Enhancing Children's Oral Language Skills Across Europe and Beyond,"" which provided an opportunity to conduct cross-country qualitative interviews with parents. The aim of this pilot study was to explore ways in which parents construed and described speech and language disorders across countries. Method Semistructured qualitative interviews were conducted with parents from 10 families in 10 different countries. The data were analyzed using thematic analysis. Findings The overall theme was ""acknowledging parental expertise."" The parents described, in detail, ways in which their children's speech and language (dis)abilities had an impact on the children's everyday life. Three subthemes were identified: impairment, disability, and changes over time. Conclusions The findings suggest that, across a range of countries, parents demonstrated contextualized understandings of their children's speech and language (dis)abilities, along with the everyday functional implications of the disorders. Hence, despite not holding professional knowledge about language disorders, the voices, views, understandings, and personal experiences of parents in relation to their child's disorder should be listened to when planning therapy services. Supplemental Material https://doi.org/10.23641/asha.14109881.",2021-04-06 +26578568,dbPTM 2016: 10-year anniversary of a resource for post-translational modification of proteins.,"Owing to the importance of the post-translational modifications (PTMs) of proteins in regulating biological processes, the dbPTM (http://dbPTM.mbc.nctu.edu.tw/) was developed as a comprehensive database of experimentally verified PTMs from several databases with annotations of potential PTMs for all UniProtKB protein entries. For this 10th anniversary of dbPTM, the updated resource provides not only a comprehensive dataset of experimentally verified PTMs, supported by the literature, but also an integrative interface for accessing all available databases and tools that are associated with PTM analysis. As well as collecting experimental PTM data from 14 public databases, this update manually curates over 12 000 modified peptides, including the emerging S-nitrosylation, S-glutathionylation and succinylation, from approximately 500 research articles, which were retrieved by text mining. As the number of available PTM prediction methods increases, this work compiles a non-homologous benchmark dataset to evaluate the predictive power of online PTM prediction tools. An increasing interest in the structural investigation of PTM substrate sites motivated the mapping of all experimental PTM peptides to protein entries of Protein Data Bank (PDB) based on database identifier and sequence identity, which enables users to examine spatially neighboring amino acids, solvent-accessible surface area and side-chain orientations for PTM substrate sites on tertiary structures. Since drug binding in PDB is annotated, this update identified over 1100 PTM sites that are associated with drug binding. The update also integrates metabolic pathways and protein-protein interactions to support the PTM network analysis for a group of proteins. Finally, the web interface is redesigned and enhanced to facilitate access to this resource.",2015-11-17 +,First Report of Alternaria alternata Causing Leaf Spot of Senecio cannabifolius in China,"Senecio cannabifolius Less. is a perennial herb belongs to the family Compositae. The plant has been widely used as a folk traditional medicine for the treatment of inflammation, pneumonia, bronchitis, and viral respiratory tract infection (Chen et al. 2015; Yan and Li 1997). In summer 2018, severe leaf spot disease was observed in Chahaer city, Inner Mongolia, China. At the beginning of June, the bottom leaves were the first to become infected. Symptoms on leaves initially appeared as small circular, dark brown, necrotic spots. Then, they gradually enlarged in size, becoming irregular. Later, severe infections were seen as coalesced lesions and blighted leaves and stems. Yield losses as much as 80% were reported on approximately 1,000 acres in various S. cannabifolius growing regions. Leaf tissues (5 × 5 mm), cut from the margins of lesions, were surface disinfected by 1% NaClO for 1 min, placed on potato dextrose agar, and incubated at 25°C with a 12-h photoperiod. Fungus Alternaria alternata was consistently isolated and preliminarily identified on the basis of morphological characteristics. Dark brown conidia produced in long chains by conidiophores. Conidia had short beaks and ranged from 15.86 to 33.62 × 12.67 to 18.39 μm and had three to six transverse and zero to three longitudinal septa (n = 60). To confirm the identification, DNA from five single-spore isolates was extracted, and ITS1-5.8-ITS2, the endopolygalacturonase (endoPG), and Alternaria major allergen (Alta1) genes were amplified and further sequenced using primers ITS1/ITS4, PG3/PG2b (Andrew et al. 2009), and Alt-for/Alt-rev (Woudenberg et al. 2015), respectively. A GenBank BLAST search showed 99 to 100% identity to the type species A. alternata (accession nos. MG025876 for ITS1-5.8-ITS2, AY295030 for endoPG, and KP123956 for Alta1) for all isolates, and sequence data were submitted to GenBank (accession nos. MH728994 [ITS1-5.8-ITS2], MH728996 [endoPG], and MH728995 [Alta1]). Pathogenicity tests were conducted to confirm Koch’s postulates, by spraying leaves of 10 healthy, 3-month-old potted S. cannabifolius plants with a spore suspension of 106 conidia/ml. A. alternata spores were suspended in 0.1% Tween 80 and sprayed onto leaves until runoff. Control plants were sprayed with a sterile 0.1% Tween 80 mixture until runoff. Plants were covered by polyethylene bags for 3 days to achieve high humidity levels and incubated in a greenhouse at 25°C. After 3 days, spots similar to those observed in the field appeared on the leaves of inoculated plants, whereas control plants remained symptomless. Isolations made from diseased spots consistently yielded A. alternata. The pathogenicity test was repeated twice under the same conditions. To our knowledge, this is the first report of A. alternata causing leaf spot on S. cannabifolius. A. alternata has also been reported infecting other Senecio genus plants, for example, S. cineraria in Germany and S. skirrhodon in New Zealand (Woudenberg et al. 2015). Other than Senecio, A. alternata has a wide host range of more than 700 documented plant species (https://nt.ars-grin.gov/fungaldatabases/). Future research will focus primarily on management of this disease.",2019-05-01 +30717659,BioReader: a text mining tool for performing classification of biomedical literature.,"BACKGROUND:Scientific data and research results are being published at an unprecedented rate. Many database curators and researchers utilize data and information from the primary literature to populate databases, form hypotheses, or as the basis for analyses or validation of results. These efforts largely rely on manual literature surveys for collection of these data, and while querying the vast amounts of literature using keywords is enabled by repositories such as PubMed, filtering relevant articles from such query results can be a non-trivial and highly time consuming task. RESULTS:We here present a tool that enables users to perform classification of scientific literature by text mining-based classification of article abstracts. BioReader (Biomedical Research Article Distiller) is trained by uploading article corpora for two training categories - e.g. one positive and one negative for content of interest - as well as one corpus of abstracts to be classified and/or a search string to query PubMed for articles. The corpora are submitted as lists of PubMed IDs and the abstracts are automatically downloaded from PubMed, preprocessed, and the unclassified corpus is classified using the best performing classification algorithm out of ten implemented algorithms. CONCLUSION:BioReader supports data and information collection by implementing text mining-based classification of primary biomedical literature in a web interface, thus enabling curators and researchers to take advantage of the vast amounts of data and information in the published literature. BioReader outperforms existing tools with similar functionalities and expands the features used for mining literature in database curation efforts. The tool is freely available as a web service at http://www.cbs.dtu.dk/services/BioReader.",2019-02-04 +31950388,Generation of fruit postharvest gene datasets and a novel motif analysis tool for functional studies: uncovering links between peach fruit heat treatment and cold storage responses.,"

Main conclusion

A survey of developed fruit gene-specific datasets and the implementation of a novel cis-element analysis tool indicate specific transcription factors as novel regulatory actors under HT response and CI protection. Heat treatment (HT) prior to cold storage (CS) has been successfully applied to ameliorate fruit chilling injury (CI) disorders. Molecular studies have identified several HT-driven benefits and putative CI-protective molecules and mechanisms. However, bioinformatic tools and analyses able to integrate fruit-specific information are necessary to begin functional studies and breeding projects. In this work, a HT-responsive gene dataset (HTds) and four fruit expression datasets (FEds), containing gene-specific information from several species and postharvest conditions, were developed and characterized. FEds provided information about HT-responsive genes, not only validating their sensitivity to HT in different systems but also revealing most of them as CS-responsive. A special focus was given to peach heat treatment-sensitive transcriptional regulation by the development of a novel Perl motif analysis software (cisAnalyzer) and a curated plant cis-elements dataset (PASPds). cisAnalyzer is able to assess sequence motifs presence, localization, enrichment and discovery on biological sequences. Its implementation for the enrichment analysis of PASPds motifs on the promoters of HTds genes rendered particular cis-elements that indicate certain transcription factor (TF) families as responsible of fruit HT-sensitive transcription regulation. Phylogenetic and postharvest expression data of these TFs showed a functional diversity of TF families, with members able to fulfil roles under HT, CS and/or both treatments. All integrated datasets and cisAnalyzer tool were deposited in FruitGeneDB (https://www.cefobi-conicet.gov.ar/FruitGeneDB/search1.php), a new available database with a great potential for fruit gene functional studies, including the markers of HT and CS responses whose study will contribute to unravel HT-driven CI-protection and select tolerant cultivars.",2020-01-16 +27297221,sHSPdb: a database for the analysis of small Heat Shock Proteins.,"

Background

small Heat Shock Proteins (sHSP) is a wide proteins family. SHSP are found in all kingdoms and they play critical roles in plant stress tolerance mechanisms (as well as in pathogenic microorganisms and are implicated in human diseases).

Results

sHSPdb (small Heat Shock Proteins database) is an integrated resource containing non-redundant, full-length and curated sequences of sHSP, classified on the basis of amino acids motifs and physico-chemical properties. sHSPdb gathers data about sHSP defined by various databases (Uniprot, PFAM, CDD, InterPro). It provides a browser interface for retrieving information from the whole database and a search interface using various criteria for retrieving a refined subset of entries. Physicochemical properties, amino acid composition and combinations are calculated for each entry. sHSPdb provides automatic statistical analysis of all sHSP properties. Among various possibilities, sHSPdb allows BLAST searches, alignment of selected sequences and submission of sequences.

Conclusions

sHSPdb is a new database containing information about sHSP from all kingdoms. sHSPdb provides a classification of sHSP, as well as tools and data for the analysis of the structure - function relationships of sHSP. Data are mainly related to various physico-chemical properties of the amino acids sequences of sHSP. sHSPdb is accessible at http://forge.info.univ-angers.fr/~gh/Shspdb/index.php .",2016-06-13 +28977473,dbCoRC: a database of core transcriptional regulatory circuitries modeled by H3K27ac ChIP-seq signals.,"Core transcription regulatory circuitry (CRC) is comprised of a small group of self-regulated transcription factors (TFs) and their interconnected regulatory loops. Studies from embryonic stem cells and other cellular models have revealed the elementary roles of CRCs in transcriptional control of cell identity and cellular fate. Systematic identification and subsequent archiving of CRCs across diverse cell types and tissues are needed to explore both cell/tissue type-specific and disease-associated transcriptional networks. Here, we present a comprehensive and interactive database (dbCoRC, http://dbcorc.cam-su.org) of CRC models which are computationally inferred from mapping of super-enhancer and prediction of TF binding sites. The current version of dbCoRC contains CRC models for 188 human and 50 murine cell lines/tissue samples. In companion with CRC models, this database also provides: (i) super enhancer, typical enhancer, and H3K27ac landscape for individual samples, (ii) putative binding sites of each core TF across the super-enhancer regions within CRC and (iii) expression of each core TF in normal or cancer cells/tissues. The dbCoRC will serve as a valuable resource for the scientific community to explore transcriptional control and regulatory circuitries in biological processes related to, but not limited to lineage specification, tissue homeostasis and tumorigenesis.",2018-01-01 +29059379,CirGRDB: a database for the genome-wide deciphering circadian genes and regulators.,"Circadian rhythms govern various kinds of physiological and behavioral functions of the living organisms, and disruptions of the rhythms are highly detrimental to health. Although several databases have been built for circadian genes, a resource for comprehensive post-transcriptional regulatory information of circadian RNAs and expression patterns of disease-related circadian RNAs is still lacking. Here, we developed CirGRDB (http://cirgrdb.biols.ac.cn) by integrating more than 4936 genome-wide assays, with the aim of fulfilling the growing need to understand the rhythms of life. CirGRDB presents a friendly web interface that allows users to search and browse temporal expression patterns of interested genes in 37 human/mouse tissues or cell lines, and three clinical disorders including sleep disorder, aging and tumor. More importantly, eight kinds of potential transcriptional and post-transcriptional regulators involved in the rhythmic expression of the specific genes, including transcription factors, histone modifications, chromatin accessibility, enhancer RNAs, miRNAs, RNA-binding proteins, RNA editing and RNA methylation, can also be retrieved. Furthermore, a regulatory network could be generated based on the regulatory information. In summary, CirGRDB offers a useful repository for exploring disease-related circadian RNAs, and deciphering the transcriptional and post-transcriptional regulation of circadian rhythms.",2018-01-01 +32424025,PolySTest: Robust Statistical Testing of Proteomics Data with Missing Values Improves Detection of Biologically Relevant Features.,"Statistical testing remains one of the main challenges for high-confidence detection of differentially regulated proteins or peptides in large-scale quantitative proteomics experiments by mass spectrometry. Statistical tests need to be sufficiently robust to deal with experiment intrinsic data structures and variations and often also reduced feature coverage across different biological samples due to ubiquitous missing values. A robust statistical test provides accurate confidence scores of large-scale proteomics results, regardless of instrument platform, experimental protocol and software tools. However, the multitude of different combinations of experimental strategies, mass spectrometry techniques and informatics methods complicate the decision of choosing appropriate statistical approaches. We address this challenge by introducing PolySTest, a user-friendly web service for statistical testing, data browsing and data visualization. We introduce a new method, Miss test, that simultaneously tests for missingness and feature abundance, thereby complementing common statistical tests by rescuing otherwise discarded data features. We demonstrate that PolySTest with integrated Miss test achieves higher confidence and higher sensitivity for artificial and experimental proteomics data sets with known ground truth. Application of PolySTest to mass spectrometry based large-scale proteomics data obtained from differentiating muscle cells resulted in the rescue of 10-20% additional proteins in the identified molecular networks relevant to muscle differentiation. We conclude that PolySTest is a valuable addition to existing tools and instrument enhancements that improve coverage and depth of large-scale proteomics experiments. A fully functional demo version of PolySTest and Miss test is available via http://computproteomics.bmb.sdu.dk/Apps/PolySTest.",2020-05-18 +33491712,Gauging van der Waals interactions in aqueous solutions of 2D MOFs: when water likes organic linkers more than open-metal sites.,"Molecular dynamics simulations combined with periodic electronic structure calculations are performed to decipher structural, thermodynamical and dynamical properties of the interfaced vs. confined water adsorbed in hexagonal 1D channels of the 2D layered electrically conductive Cu3(HHTP)2 and Cu3(HTTP)2 metal-organic frameworks (HHTP = 2,3,6,7,10,11-hexahydroxytriphenylene and HTTP = 2,3,6,7,10,11-hexathiotriphenylene). Comparing water adsorption in bulk vs. slab models of the studied 2D MOFs shows that water is preferentially adsorbed on the framework walls via forming hydrogen bonds to the organic linkers rather than by coordinating to the coordinatively unsaturated open-Cu2+ sites. Theory predicts that in Cu3(HTTP)2 the van der Waals interactions are stronger which helps the MOF maintain its layered morphology with allowing very little water molecules to diffuse into the interlayer space. Data presented in this work are general and helpful in implementing new strategies for preserving the integrity as well as electrical conductivity of porous materials in aqueous solutions.",2021-02-01 +32109569,Purification and characterization of seven bioactive compounds from the newly isolated Streptomyces cavourensis TN638 strain via solid-state fermentation.,"The strain TN638 was isolated from Tunisian soil contaminated with industrial wastewater and selected for its potent antimicrobial activity against the tested Gram positive bacteria: Staphylococcus aureus (S. aureus) ATCC 6538 and Listeria monocytogenes (L. monocytogenes) ATCCC 19117, and Gram negative bacteria: Agrobacterium tumefaciens (A. tumefaciens) ATCC 23308 and Salmonella typhimurium (S. typhimurium) ATCC 14028 and fungi: Candida albicans (C. albicans) ATCC 10231, Rhizoctonia solani (R. solani) ATCC 58938 and Fusarium sp. Solide-state fermentation (SSF) dry crude extract of the TN638 strain presents a strong inhibitory activity notably against the phytopathogenic microorganism A. tumefaciens ATCC 23308 and the two pathogenic bacteria S. aureus ATCC 6538 and L. monocytogenes ATCCC 19117 with a zone of inhibition of 48, 34 and 34 mm respectively. According to the morphological characteristic, the complete 16S rRNA gene nucleotide sequence determination [1492 bp deposited in National Center of Biotechnology Information (NCBI) database under the accession no. LN854629.1; https://www.ncbi.nlm.nih.gov/nuccore/LN854629.1/], and the phylogenetic analysis, we can deduce that our isolate is an actinomycete bacterium belonging to the genus Streptomyces and the most closely related strain was Streptomyces cavourensis (S. cavourensis) NRRL 2740T (99.9%). We propose the assignment of our strain as Streptomyces cavourensis (S. cavourensis) TN638 strain. Work-up and purification of the strain extract using different chromatographic techniques afforded seven bio-compounds namely: Cyclo-(Leu-Pro) (1), Cyclo-(Val-Pro) (2), Cyclo-(Phe-Pro) (3), nonactin (4), monactin (5), dinactin (6) and trinactin (7). The chemical structures of compounds 1-7 were confirmed by nuclear magnetic resonance (NMR) 1D and 2D spectroscopy, mass spectrometry, and comparison with literature data. The three purified diketopiperazine (DKP) derivatives (1-3), demonstrated significant antibacterial activity against A. tumefaciens ATCC 23308 and S. typhimurium ATCC 14028. The four pure macrotetrolides (4-7), exhibited strong inhibitory effect against all tested Gram positive and Gram negative bacteria notably against A. tumefaciens ATCC 23308 and S. typhimurium ATCC 14028 with a minimum inhibitory concentration (MIC) around 8 μg/mL quite similar to that of ampicillin. Thus, we propose the use of the (SSF) active extract of the S. cavourensis TN638 strain as safe biological product to control disease caused by plant pathogen A. tumefaciens. Also, the purified active molecules produced by this strain could be used in pharmaceutical field.",2020-02-26 +33073936,Exploring the Molecular Mechanism and Biomarker of Recurrent Spontaneous Abortion Based on RNA Sequencing Analysis. ,"Recurrent spontaneous abortion (RSA) is defined as the failure of two or more consecutive clinical pregnancies before 20 weeks of gestation. It is a hot issue in contemporary obstetrics. The etiology of RSA is complicated. Exploring the molecular mechanisms of RSA will be helpful for the prevention and precise therapy at the molecular level. This study aimed to provide novel insights into the biological characteristics and related pathways of differentially expressed genes (DEGs) in RSA. The data set GSE121950 was obtained from GEO data sets. We identified the DEGs using the affy pack-age in R programming software. Gene set enrichment analysis (GESA) and GenePattern tools were performed to examine the gene expression differences between RSA and control group. Protein-protein interaction (PPI) analysis was performed using STRING online tool (https://string-db.org/). qRT-PCR was carried out to validate the expression levels of DEGs in 16 villus tissue samples from patients with induced abortion and 16 villus tissue samples from RSA patients. A total of 628 DEGs with adjPval < 0.05 and |logFC| > 1 were obtained, including 155 up-regulated genes and 473 down-regulated genes. Ten gene ontology (GO) terms and 10 Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways were screened out by comparing the genome-wide gene set expression patterns of normal and RSA tissues. Eight genes involved in RSA were identified from the hippo signaling pathway, cytokine-cytokine receptor interaction pathway, and allograft rejection pathway. Present findings demonstrated that several cytokine regulation processes have a deep impact on RSA. A number of genes involved in the hippo signaling pathway, cytokine-cytokine receptor interaction pathway, and allograft rejection pathway may be critical mediators or participators in the pathogenesis of RSA. Although further in vivo and in vitro validations are required, our data may provide an important theoretical basis to elucidate the pathogenesis of RSA.",2020-10-01 +33235280,An integrative atlas of chicken long non-coding genes and their annotations across 25 tissues.,"Long non-coding RNAs (LNC) regulate numerous biological processes. In contrast to human, the identification of LNC in farm species, like chicken, is still lacunar. We propose a catalogue of 52,075 chicken genes enriched in LNC ( http://www.fragencode.org/ ), built from the Ensembl reference extended using novel LNC modelled here from 364 RNA-seq and LNC from four public databases. The Ensembl reference grew from 4,643 to 30,084 LNC, of which 59% and 41% with expression ≥ 0.5 and ≥ 1 TPM respectively. Characterization of these LNC relatively to the closest protein coding genes (PCG) revealed that 79% of LNC are in intergenic regions, as in other species. Expression analysis across 25 tissues revealed an enrichment of co-expressed LNC:PCG pairs, suggesting co-regulation and/or co-function. As expected LNC were more tissue-specific than PCG (25% vs. 10%). Similarly to human, 16% of chicken LNC hosted one or more miRNA. We highlighted a new chicken LNC, hosting miR155, conserved in human, highly expressed in immune tissues like miR155, and correlated with immunity-related PCG in both species. Among LNC:PCG pairs tissue-specific in the same tissue, we revealed an enrichment of divergent pairs with the PCG coding transcription factors, as for example LHX5, HXD3 and TBX4, in both human and chicken.",2020-11-24 +33235230,Qtlizer: comprehensive QTL annotation of GWAS results.,"Exploration of genetic variant-to-gene relationships by quantitative trait loci such as expression QTLs is a frequently used tool in genome-wide association studies. However, the wide range of public QTL databases and the lack of batch annotation features complicate a comprehensive annotation of GWAS results. In this work, we introduce the tool ""Qtlizer"" for annotating lists of variants in human with associated changes in gene expression and protein abundance using an integrated database of published QTLs. Features include incorporation of variants in linkage disequilibrium and reverse search by gene names. Analyzing the database for base pair distances between best significant eQTLs and their affected genes suggests that the commonly used cis-distance limit of 1,000,000 base pairs might be too restrictive, implicating a substantial amount of wrongly and yet undetected eQTLs. We also ranked genes with respect to the maximum number of tissue-specific eQTL studies in which a most significant eQTL signal was consistent. For the top 100 genes we observed the strongest enrichment with housekeeping genes (P = 2 × 10-6) and with the 10% highest expressed genes (P = 0.005) after grouping eQTLs by r2 > 0.95, underlining the relevance of LD information in eQTL analyses. Qtlizer can be accessed via https://genehopper.de/qtlizer or by using the respective Bioconductor R-package ( https://doi.org/10.18129/B9.bioc.Qtlizer ).",2020-11-24 +33735954,Pharmacogenetics of common SNP affecting drug metabolizing enzymes: comparison of allele frequencies between European and Malaysian/Singaporean. ,"Compared to Europe, data on genetic variation in genes transcribing drug metabolizing enzymes among Asian is limited due to ethnic diversity. Here we compare frequencies for clinically relevant single nucleotide polymorphism (SNP) commonly observed in drug metabolizing enzymes between European and Malaysian/Singaporean. Minor allele frequencies (MAF) for the indicated SNPs for European, South Asian and East Asian populations were obtained from the NCBI website (https://www.ncbi.nlm.nih.gov/snp). The SNP prevalence among Malaysian/Singaporean was characterized from gene association studies. Generally, some SNPs in CYP2D6 and CYP2C19 do not show good agreement between the two populations as to the MAF value obtained. CYP2D6*4 tends to be more common among European, whereas CYP2D6*10 is more common in Malays and Chinese among Singaporean. Regardless of different phenotype, MAF of CYP2D6*4 for Indians is similar to that seen by the European. Singaporeans show smaller MAF for CYP2C19*17 but higher CYP2C19*2 frequencies as opposed to European ones. Following growing attention to the contribution of CYP3A4/5, N-acetyltransferases (NAT2), thiopurine methyltransferase (TPMT) and uridine diphosphate glucuronosyltransferases (UGT)2B7 in predicting drug response across Europe, there are limited pharmacogenetics (PGx) studies examining the gene-drug interaction among Malaysian/Singaporean. To better understand the heterogeneity of the drug response, PGx studies for the abovementioned enzymes between ethnics in Malaysian/Singaporean should be identified.",2021-03-18 +30398643,ChEMBL: towards direct deposition of bioassay data.,"ChEMBL is a large, open-access bioactivity database (https://www.ebi.ac.uk/chembl), previously described in the 2012, 2014 and 2017 Nucleic Acids Research Database Issues. In the last two years, several important improvements have been made to the database and are described here. These include more robust capture and representation of assay details; a new data deposition system, allowing updating of data sets and deposition of supplementary data; and a completely redesigned web interface, with enhanced search and filtering capabilities.",2019-01-01 +31659794,The Parkinson's Disease Mendelian Randomization Research Portal.,"

Background

Mendelian randomization is a method for exploring observational associations to find evidence of causality.

Objective

To apply Mendelian randomization between risk factors/phenotypic traits (exposures) and PD in a large, unbiased manner, and to create a public resource for research.

Methods

We used two-sample Mendelian randomization in which the summary statistics relating to single-nucleotide polymorphisms from 5,839 genome-wide association studies of exposures were used to assess causal relationships with PD. We selected the highest-quality exposure genome-wide association studies for this report (n = 401). For the disease outcome, summary statistics from the largest published PD genome-wide association studies were used. For each exposure, the causal effect on PD was assessed using the inverse variance weighted method, followed by a range of sensitivity analyses. We used a false discovery rate of 5% from the inverse variance weighted analysis to prioritize exposures of interest.

Results

We observed evidence for causal associations between 12 exposures and risk of PD. Of these, nine were effects related to increasing adiposity and decreasing risk of PD. The remaining top three exposures that affected PD risk were tea drinking, time spent watching television, and forced vital capacity, but these may have been biased and were less convincing. Other exposures at nominal statistical significance included inverse effects of smoking and alcohol.

Conclusions

We present a new platform which offers Mendelian randomization analyses for a total of 5,839 genome-wide association studies versus the largest PD genome-wide association studies available (https://pdgenetics.shinyapps.io/MRportal/). Alongside, we report further evidence to support a causal role for adiposity on lowering the risk of PD. © 2019 The Authors. Movement Disorders published by Wiley Periodicals, Inc. on behalf of International Parkinson and Movement Disorder Society.",2019-10-28 +29145608,MeDReaders: a database for transcription factors that bind to methylated DNA.,"Understanding the molecular principles governing interactions between transcription factors (TFs) and DNA targets is one of the main subjects for transcriptional regulation. Recently, emerging evidence demonstrated that some TFs could bind to DNA motifs containing highly methylated CpGs both in vitro and in vivo. Identification of such TFs and elucidation of their physiological roles now become an important stepping-stone toward understanding the mechanisms underlying the methylation-mediated biological processes, which have crucial implications for human disease and disease development. Hence, we constructed a database, named as MeDReaders, to collect information about methylated DNA binding activities. A total of 731 TFs, which could bind to methylated DNA sequences, were manually curated in human and mouse studies reported in the literature. In silico approaches were applied to predict methylated and unmethylated motifs of 292 TFs by integrating whole genome bisulfite sequencing (WGBS) and ChIP-Seq datasets in six human cell lines and one mouse cell line extracted from ENCODE and GEO database. MeDReaders database will provide a comprehensive resource for further studies and aid related experiment designs. The database implemented unified access for users to most TFs involved in such methylation-associated binding actives. The website is available at http://medreader.org/.",2018-01-01 +33222632,In silico design of influenza a virus artificial epitope-based T-cell antigens and the evaluation of their immunogenicity in mice.,"The polyepitope strategy is promising approach for successfully creating a broadly protective flu vaccine, which targets T-lymphocytes (both CD4+ and CD8+) to recognise the most conserved epitopes of viral proteins. In this study, we employed a computer-aided approach to develop several artificial antigens potentially capable of evoking immune responses to different virus subtypes. These antigens included conservative T-cell epitopes of different influenza A virus proteins. To design epitope-based antigens we used experimentally verified information regarding influenza virus T-cell epitopes from the Immune Epitope Database (IEDB) (http://www.iedb.org). We constructed two ""human"" and two ""murine"" variants of polyepitope antigens. Amino acid sequences of target polyepitope antigens were designed using our original TEpredict/PolyCTLDesigner software. Immunogenic and protective features of DNA constructs encoding ""murine"" target T-cell immunogens were studied in BALB/c mice. We showed that mice groups immunised with a combination of computer-generated ""murine"" DNA immunogens had a 37.5% survival rate after receiving a lethal dose of either A/California/4/2009 (H1N1) virus or A/Aichi/2/68 (H3N2) virus, while immunisation with live flu H1N1 and H3N2 vaccine strains provided protection against homologous viruses and failed to protect against heterologous viruses. These results demonstrate that mechanisms of cross-protective immunity may be associated with the stimulation of specific T-cell responses. This study demonstrates that our computer-aided approach may be successfully used for rational designing artificial polyepitope antigens capable of inducing virus-specific T-lymphocyte responses and providing partial protection against two different influenza virus subtypes.Communicated by Ramaswamy H. Sarma.",2020-11-23 +33391898,The Impact of Connective Tissue Diseases on the Inpatient Outcomes of Congestive Heart Failure Patients.,"Background Rheumatoid arthritis (RA) and systemic lupus erythematosus (SLE) are autoimmune diseases with chronically elevated inflammatory activity. Treatments typically have been aimed at decreasing inflammation. While RA and SLE are known to have a high incidence of congestive heart failure (HF), the mechanism behind this remains elusive. We sought to assess the outcomes of HF patients with either RA or SLE as opposed to HF patients without RA or SLE. Methods We conducted a retrospective analysis of the Healthcare Utilization Project - National Inpatient Sample Database from 2010 to 2015 (third quarter). Patients with a primary admitting diagnosis of HF were queried, and those with or without a diagnosis of either SLE or RA were separated into two groups. In-hospital mortality, total charges (TOTCHG), and length of stay (LOS) were analyzed with a multivariate regression model adjusted for demographical and comorbidity variables, using generalized linear models with family binomial, gamma, and negative-binomial, respectively. A p-value smaller than 0.05 was deemed statistically significant. All the statistical analyses were performed in R 3.5.5 (R Core Team, 2013, http://www.R-project.org/). Results  The in-hospital mortality (3.4% v/s 4.43%), mean TOTCHG ($46k v/s $51k), and mean LOS (5.79 v/s 6.12 days) were significantly lower in HF patients with RA/SLE when compared with HF patients without RA/SLE. A younger age (70.5 v/s 72.6 years) and a female preponderance (75% v/s 51%) were evident in the RA/SLE group. Both groups consistently showed a significant disparity in the rates of hospitalization, which was inversely related to household income. p-value was less than 0.001 for all the above outcomes. Conclusions  RA/SLE patients are associated with better in-hospital outcomes of HF. The underlying mechanism is unclear in terms of this paradox. Given the fact that the majority of RA/SLE patients are treated with agents aimed at decreasing inflammation, this may shed light on the role of inflammation being an important contributor to HF and implicate a future therapeutic direction.",2020-11-23 +31448721,Renal length and volume prediction in healthy children.,"

Introduction

Little information is available regarding the evaluation of renal volume in healthy Latin-American children of different ages. The objective of this work was to establish a predictive model of renal size (volume and length) and develop a web-based calculator.

Materials and methods

A selective and representative sample was obtained randomly from the database of healthy children living in Resistencia city, Chaco, Argentina: a) the National Health Program for children under 6 years old; b) school children until 18 years old (primary and middle education). Renal dimensions were obtained by ultrasonography via a single experienced operator at the indicated site (schools or primary health care centers). Renal volume was calculated using Dinkel's formula. A multiple linear regression model was applied using potential predictors. The final model was implemented in a free web-based application.

Results

Random selection was made from the database to include 882 subjects with ages between 0.03 and 230.63 months. The data was divided into two sets (one for training and the other for model testing). The training set (423) included 212 (50%) females. Significant predictors included age, height, current weight and birth weight, and the interaction between age and present weight. Using the test dataset, both renal volume and length root mean square errors were 5.06 cm3 and 0.59 cm.

Conclusion

The prediction model was accurate and allowed for the development a freely-available web app: Renal size prediction (https://porbm28.shinyapps.io/RenalVolume/). Once the models are validated by additional studies, the app could be a useful tool to predict renal volume and length in pediatric clinical practice.",2020-01-01 +32866236,SAAMBE-SEQ: a sequence-based method for predicting mutation effect on protein-protein binding affinity.,"

Motivation

Vast majority of human genetic disorders are associated with mutations that affect protein-protein interactions by altering wild-type binding affinity. Therefore, it is extremely important to assess the effect of mutations on protein-protein binding free energy to assist the development of therapeutic solutions. Currently, the most popular approaches use structural information to deliver the predictions, which precludes them to be applicable on genome-scale investigations. Indeed, with the progress of genomic sequencing, researchers are frequently dealing with assessing effect of mutations for which there is no structure available.

Results

Here, we report a Gradient Boosting Decision Tree machine learning algorithm, the SAAMBE-SEQ, which is completely sequence-based and does not require structural information at all. SAAMBE-SEQ utilizes 80 features representing evolutionary information, sequence-based features and change of physical properties upon mutation at the mutation site. The approach is shown to achieve Pearson correlation coefficient (PCC) of 0.83 in 5-fold cross validation in a benchmarking test against experimentally determined binding free energy change (ΔΔG). Further, a blind test (no-STRUC) is compiled collecting experimental ΔΔG upon mutation for protein complexes for which structure is not available and used to benchmark SAAMBE-SEQ resulting in PCC in the range of 0.37-0.46. The accuracy of SAAMBE-SEQ method is found to be either better or comparable to most advanced structure-based methods. SAAMBE-SEQ is very fast, available as webserver and stand-alone code, and indeed utilizes only sequence information, and thus it is applicable for genome-scale investigations to study the effect of mutations on protein-protein interactions.

Availability and implementation

SAAMBE-SEQ is available at http://compbio.clemson.edu/saambe_webserver/indexSEQ.php#started.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +31633777,A comparison and assessment of computational method for identifying recombination hotspots in Saccharomyces cerevisiae.,"Meiotic recombination is one of the most important driving forces of biological evolution, which is initiated by double-strand DNA breaks. Recombination has important roles in genome diversity and evolution. This review firstly provides a comprehensive survey of the 15 computational methods developed for identifying recombination hotspots in Saccharomyces cerevisiae. These computational methods were discussed and compared in terms of underlying algorithms, extracted features, predictive capability and practical utility. Subsequently, a more objective benchmark data set was constructed to develop a new predictor iRSpot-Pse6NC2.0 (http://lin-group.cn/server/iRSpot-Pse6NC2.0). To further demonstrate the generalization ability of these methods, we compared iRSpot-Pse6NC2.0 with existing methods on the chromosome XVI of S. cerevisiae. The results of the independent data set test demonstrated that the new predictor is superior to existing tools in the identification of recombination hotspots. The iRSpot-Pse6NC2.0 will become an important tool for identifying recombination hotspot.",2020-09-01 +32993478,TRIP - T cell receptor/immunoglobulin profiler.,"

Background

Antigen receptors are characterized by an extreme diversity of specificities, which poses major computational and analytical challenges, particularly in the era of high-throughput immunoprofiling by next generation sequencing (NGS). The T cell Receptor/Immunoglobulin Profiler (TRIP) tool offers the opportunity for an in-depth analysis based on the processing of the output files of the IMGT/HighV-Quest tool, a standard in NGS immunoprofiling, through a number of interoperable modules. These provide detailed information about antigen receptor gene rearrangements, including variable (V), diversity (D) and joining (J) gene usage, CDR3 amino acid and nucleotide composition and clonality of both T cell receptors (TR) and B cell receptor immunoglobulins (BcR IG), and characteristics of the somatic hypermutation within the BcR IG genes. TRIP is a web application implemented in R shiny.

Results

Two sets of experiments have been performed in order to evaluate the efficiency and performance of the TRIP tool. The first used a number of synthetic datasets, ranging from 250k to 1M sequences, and established the linear response time of the tool (about 6 h for 1M sequences processed through the entire BcR IG data pipeline). The reproducibility of the tool was tested comparing the results produced by the main TRIP workflow with the results from a previous pipeline used on the Galaxy platform. As expected, no significant differences were noted between the two tools; although the preselection process seems to be stricter within the TRIP pipeline, about 0.1% more rearrangements were filtered out, with no impact on the final results.

Conclusions

TRIP is a software framework that provides analytical services on antigen receptor gene sequence data. It is accurate and contains functions for data wrangling, cleaning, analysis and visualization, enabling the user to build a pipeline tailored to their needs. TRIP is publicly available at https://bio.tools/TRIP_-_T-cell_Receptor_Immunoglobulin_Profiler .",2020-09-29 +33293426,Targeting MARCO and IL37R on Immunosuppressive Macrophages in Lung Cancer Blocks Regulatory T Cells and Supports Cytotoxic Lymphocyte Function.,"The progression and metastatic capacity of solid tumors are strongly influenced by immune cells in the tumor microenvironment. In non-small cell lung cancer (NSCLC), accumulation of anti-inflammatory tumor-associated macrophages (TAM) is associated with worse clinical outcome and resistance to therapy. Here we investigated the immune landscape of NSCLC in the presence of protumoral TAMs expressing the macrophage receptor with collagenous structure (MARCO). MARCO-expressing TAM numbers correlated with increased occurrence of regulatory T cells and effector T cells and decreased natural killer (NK) cells in these tumors. Furthermore, transcriptomic data from the tumors uncovered a correlation between MARCO expression and the anti-inflammatory cytokine IL37. In vitro studies subsequently showed that lung cancer cells polarized macrophages to express MARCO and gain an immune-suppressive phenotype through the release of IL37. MARCO-expressing TAMs blocked cytotoxic T-cell and NK-cell activation, inhibiting their proliferation, cytokine production, and tumor killing capacity. Mechanistically, MARCO+ macrophages enhanced regulatory T (Treg) cell proliferation and IL10 production and diminished CD8 T-cell activities. Targeting MARCO or IL37 receptor (IL37R) by antibody or CRISPR knockout of IL37 in lung cancer cell lines repolarized TAMs, resulting in recovered cytolytic activity and antitumoral capacity of NK cells and T cells and downmodulated Treg cell activities. In summary, our data demonstrate a novel immune therapeutic approach targeting human TAMs immune suppression of NK- and T-cell antitumor activities. SIGNIFICANCE: This study defines tumor-derived IL37 and the macrophage scavenger receptor MARCO as potential therapeutic targets to remodel the immune-suppressive microenvironment in patients with lung cancer. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/4/956/F1.large.jpg.",2020-12-08 +30615063,"MMseqs2 desktop and local web server app for fast, interactive sequence searches.","

Summary

The MMseqs2 desktop and web server app facilitates interactive sequence searches through custom protein sequence and profile databases on personal workstations. By eliminating MMseqs2's runtime overhead, we reduced response times to a few seconds at sensitivities close to BLAST.

Availability and implementation

The app is easy to install for non-experts. GPLv3-licensed code, pre-built desktop app packages for Windows, MacOS and Linux, Docker images for the web server application and a demo web server are available at https://search.mmseqs.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +26117828,Evolutionary annotation of conserved long non-coding RNAs in major mammalian species.,"Mammalian genomes contain tens of thousands of long non-coding RNAs (lncRNAs) that have been implicated in diverse biological processes. However, the lncRNA transcriptomes of most mammalian species have not been established, limiting the evolutionary annotation of these novel transcripts. Based on RNA sequencing data from six tissues of nine species, we built comprehensive lncRNA catalogs (4,142-42,558 lncRNAs) covering the major mammalian species. Compared to protein- coding RNAs, expression of lncRNAs exhibits striking lineage specificity. Notably, although 30%-99% human lncRNAs are conserved across different species on DNA locus level, only 20%-27% of these conserved lncRNA loci are detected to transcription, which represents a stark contrast to the proportion of conserved protein-coding genes (48%-80%). This finding provides a valuable resource for experimental scientists to study the mechanisms of lncRNAs. Moreover, we constructed lncRNA expression phylogenetic trees across nine mammals and demonstrated that lncRNA expression profiles can reliably determine phylogenic placement in a manner similar to their coding counterparts. Our data also reveal that the evolutionary rate of lncRNA expression varies among tissues and is significantly higher than those for protein-coding genes. To streamline the processes of browsing lncRNAs and detecting their evolutionary statuses, we integrate all the data produced in this study into a database named PhyloNONCODE (http://www.bioinfo.org/phyloNoncode). Our work starts to place mammalian lncRNAs in an evolutionary context and represent a rich resource for comparative and functional analyses of this critical layer of genome.",2015-06-27 +33293425,Optimal Timing for Cancer Screening and Adaptive Surveillance Using Mathematical Modeling.,"Cancer screening and early detection efforts have been partially successful in reducing incidence and mortality, but many improvements are needed. Although current medical practice is informed by epidemiologic studies and experts, the decisions for guidelines are ultimately ad hoc. We propose here that quantitative optimization of protocols can potentially increase screening success and reduce overdiagnosis. Mathematical modeling of the stochastic process of cancer evolution can be used to derive and optimize the timing of clinical screens so that the probability is maximal that a patient is screened within a certain ""window of opportunity"" for intervention when early cancer development may be observable. Alternative to a strictly empirical approach or microsimulations of a multitude of possible scenarios, biologically based mechanistic modeling can be used for predicting when best to screen and begin adaptive surveillance. We introduce a methodology for optimizing screening, assessing potential risks, and quantifying associated costs to healthcare using multiscale models. As a case study in Barrett's esophagus, these methods were applied for a model of esophageal adenocarcinoma that was previously calibrated to U.S. cancer registry data. Optimal screening ages for patients with symptomatic gastroesophageal reflux disease were older (58 for men and 64 for women) than what is currently recommended (age > 50 years). These ages are in a cost-effective range to start screening and were independently validated by data used in current guidelines. Collectively, our framework captures critical aspects of cancer evolution within patients with Barrett's esophagus for a more personalized screening design. SIGNIFICANCE: This study demonstrates how mathematical modeling of cancer evolution can be used to optimize screening regimes, with the added potential to improve surveillance regimes. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/4/1123/F1.large.jpg.",2020-12-08 +34128690,Cold Spells and Cause-Specific Mortality in 47 Japanese Prefectures: A Systematic Evaluation.,"

Background

Many studies have investigated the devastating health effects of heat waves, but less is known about health risks related to cold spells, despite evidence that extreme cold may contribute to a larger proportion of deaths.

Objectives

We aimed to systematically investigate the association between cold spells and mortality in Japan.

Methods

Daily data for weather conditions and 12 common causes of death during the 1972-2015 cold seasons (November-March) were obtained from 47 Japanese prefectures. Cold spells were defined as ≥2 consecutive days with daily mean temperatures ≤5th percentile for the cold season in each prefecture. Quasi-Poisson regression was combined with a distributed lag model to estimate prefecture-specific associations, and pooled associations at the national level were obtained through random-effects meta-analysis. The potential influence of cold spell characteristics (intensity, duration, and timing in season) on associations between cold spells and mortality was examined using a similar two-stage approach. Temporal trends were investigated using a meta-regression model.

Results

A total of 18,139,498 deaths were recorded during study period. Mortality was significantly higher during cold spell days vs. other days for all selected causes of death. Mortality due to age-related physical debilitation was more strongly associated with cold spells than with other causes of death. Associations between cold spells and mortality from all causes and several more specific outcomes were stronger for longer and more intense cold spells and for cold spells earlier in the cold season. However, although all outcomes were positively associated with cold spell duration, findings for cold spell intensity and seasonal timing were heterogeneous across the outcomes. Associations between cold spells and mortality due to cerebrovascular disease, cerebral infarction, and age-related physical debility decreased in magnitude over time, whereas temporal trends were relatively flat for all-cause mortality and other outcomes.

Discussion

Our findings may have implications for establishing tailored public health strategies to prevent avoidable cold spell-related health consequences. https://doi.org/10.1289/EHP7109.",2021-06-15 +32913254,A gene prioritization method based on a swine multi-omics knowledgebase and a deep learning model.,"The analyses of multi-omics data have revealed candidate genes for objective traits. However, they are integrated poorly, especially in non-model organisms, and they pose a great challenge for prioritizing candidate genes for follow-up experimental verification. Here, we present a general convolutional neural network model that integrates multi-omics information to prioritize the candidate genes of objective traits. By applying this model to Sus scrofa, which is a non-model organism, but one of the most important livestock animals, the model precision was 72.9%, recall 73.5%, and F1-Measure 73.4%, demonstrating a good prediction performance compared with previous studies in Arabidopsis thaliana and Oryza sativa. Additionally, to facilitate the use of the model, we present ISwine ( http://iswine.iomics.pro/ ), which is an online comprehensive knowledgebase in which we incorporated almost all the published swine multi-omics data. Overall, the results suggest that the deep learning strategy will greatly facilitate analyses of multi-omics integration in the future.",2020-09-10 +33164522,Comprehensive Study on Molecular Supervised Learning with Graph Neural Networks.,"This work considers strategies to develop accurate and reliable graph neural networks (GNNs) for molecular property predictions. Prediction performance of GNNs is highly sensitive to the change in various parameters due to the inherent challenges in molecular machine learning, such as a deficient amount of data samples and bias in data distribution. Comparative studies with well-designed experiments are thus important to clearly understand which GNNs are powerful for molecular supervised learning. Our work presents a number of ablation studies along with a guideline to train and utilize GNNs for both molecular regression and classification tasks. First, we validate that using both atomic and bond meta-information improves the prediction performance in the regression task. Second, we find that the graph isomorphism hypothesis proposed by [Xu, K.; et al How powerful are graph neural networks? 2018, arXiv:1810.00826. arXiv.org e-Print archive. https://arxiv.org/abs/1810.00826] is valid for the regression task. Surprisingly, however, the findings above do not hold for the classification tasks. Beyond the study on model architectures, we test various regularization methods and Bayesian learning algorithms to find the best strategy to achieve a reliable classification system. We demonstrate that regularization methods penalizing predictive entropy might not give well-calibrated probability estimation, even though they work well in other domains, and Bayesian learning methods are capable of developing reliable prediction systems. Furthermore, we argue the importance of Bayesian learning in virtual screening by showing that well-calibrated probability estimation may lead to a higher success rate.",2020-11-08 +33128395,"Determination of complex type free, non-conjugated oligosaccharide glucose unit values in tomato xylem sap for early detection of nutrient deficiency.","Although knowledge on glycan biosynthesis and processing is continuously maturing, there are still a limited number of studies that examine biological functions of N-glycan structures in plants, which remain virtually unknown. Here, the statistical correlation between nutrient (nitrogen) deficiency symptoms of crops and changes in 8-aminopyrene-1,3,6-trisulfonic acid (APTS)-labeled complex type free oligosaccharides is reported. While deficiency symptoms are predicted by multispectral images and Kjeldahl digestion, APTS-labeled complex type free oligosaccharides are identified by their glucose unit (GU) values in tomato xylem sap, using capillary electrophoresis with laser induced fluorescence detection (CE-LIF). Given the limited number of structures obtained from plants, archived in the literature, in the future, it is intended to create an open access database of promising indicators, namely, glycan structures that are presumably responsible for the nutrient deficiency caused stress in plants (http://glycoplants.org).",2020-11-22 +26073932,Pedican: an online gene resource for pediatric cancers with literature evidence.,"Pediatric cancer (PC), that is cancer occurring in children, is the leading cause of death among children worldwide, with an incidence of 175,000 per year. Elucidating the genetic abnormalities and underlying cellular mechanisms may provide less toxic curative treatments. Therefore, it is important to understand the pathology of pediatric cancer at the genetic, genomic and epigenetic level. To unveil the cellular complexity of PC, we have developed a database of pediatric cancers (Pedican), the first literature-based pediatric gene data resource by comprehensive literature curation and data integration. In the current release, Pedican contains 735 human genes, 88 gene fusion and 24 chromosome abnormal events curated from 2245 PubMed abstracts. Pedican provides detailed annotations for each gene, such as Entrez gene information, involved pathways, protein-protein interactions, mutations, gene expression, methylation sites, TF regulation, and post-translational modification. Additionally Pedican has a user-friendly web interface, which allows sophisticated text query, sequence searches, and browsing by highlighted literature evidence and hundreds of cancer types. Overall, our curated pediatric cancer-related gene list maps the genomic and cellular landscape for various pediatric cancers, providing a valuable resource for further experiment design. The Pedican is available at http://pedican.bioinfo-minzhao.org/.",2015-06-15 +33042605,LibMI: An Open Source Library for Efficient Histopathological Image Processing.,"

Background

Whole-slide images (WSIs) as a kind of image data are rapidly growing in the digital pathology domain. With unusual high resolution, these images make them hard to be supported by conventional tools or file formats. Thus, it obstructs data sharing and automated analysis. Here, we propose a library, LibMI, along with its open and standardized image file format. They can be used together to efficiently read, write, modify, and annotate large images.

Materials and methods

LibMI utilizes the concept of pyramid image structure and lazy propagation from a segment tree algorithm to support reading and modifying and to guarantee that both operations have linear time complexity. Further, a cache mechanism was introduced to speed up the program.

Results

LibMI is an open and efficient library for histopathological image processing. To demonstrate its functions, we applied it to several tasks including image thresholding, microscopic color correction, and storing pixel-wise information on WSIs. The result shows that libMI is particularly suitable for modifying large images. Furthermore, compared with congeneric libraries and file formats, libMI and modifiable multiscale image (MMSI) run 18.237 times faster on read-only tasks.

Conclusions

The combination of libMI library and MMSI file format enables developers to efficiently read and modify WSIs, thus can assist in pixel-wise image processing on extremely large images to promote building image processing pipeline. The library together with the data schema is freely available on GitLab: https://gitlab.com/BioAI/libMI.",2020-08-21 +32224841,Bioinformatics services for analyzing massive genomic datasets.,"The explosive growth of next-generation sequencing data has resulted in ultra-large-scale datasets and ensuing computational problems. In Korea, the amount of genomic data has been increasing rapidly in the recent years. Leveraging these big data requires researchers to use large-scale computational resources and analysis pipelines. A promising solution for addressing this computational challenge is cloud computing, where CPUs, memory, storage, and programs are accessible in the form of virtual machines. Here, we present a cloud computing-based system, Bio-Express, that provides user-friendly, cost-effective analysis of massive genomic datasets. Bio-Express is loaded with predefined multi-omics data analysis pipelines, which are divided into genome, transcriptome, epigenome, and metagenome pipelines. Users can employ predefined pipelines or create a new pipeline for analyzing their own omics data. We also developed several web-based services for facilitating downstream analysis of genome data. Bio-Express web service is freely available at https://www.bioexpress.re.kr/.",2020-03-31 +30323887,Functional and taxonomic classification of a greenhouse water drain metagenome.,"Microbiome sequencing has become the standard procedure in the study of new ecological and human-constructed niches. To our knowledge, this is the first report of a metagenome from the water of a greenhouse drain. We found that the greenhouse is not a diverse niche, mainly dominated by Rhizobiales and Rodobacterales. The analysis of the functions encoded in the metagenome showed enrichment of characteristic features of soil and root-associated bacteria such as ABC-transporters and hydrolase enzymes. Additionally, we found antibiotic resistances genes principally for spectinomycin, tetracycline, and aminoglycosides. This study aimed to identify the bacteria and functional gene composition of a greenhouse water drain sample and also provide a genomic resource to search novel proteins from a previously unexplored niche. All the metagenome proteins and their annotations are available to the scientific community via http://microbiomics.ibt.unam.mx/tools/metagreenhouse/.",2018-10-05 +27374121,HPIDB 2.0: a curated database for host-pathogen interactions. ,"Identification and analysis of host-pathogen interactions (HPI) is essential to study infectious diseases. However, HPI data are sparse in existing molecular interaction databases, especially for agricultural host-pathogen systems. Therefore, resources that annotate, predict and display the HPI that underpin infectious diseases are critical for developing novel intervention strategies. HPIDB 2.0 (http://www.agbase.msstate.edu/hpi/main.html) is a resource for HPI data, and contains 45, 238 manually curated entries in the current release. Since the first description of the database in 2010, multiple enhancements to HPIDB data and interface services were made that are described here. Notably, HPIDB 2.0 now provides targeted biocuration of molecular interaction data. As a member of the International Molecular Exchange consortium, annotations provided by HPIDB 2.0 curators meet community standards to provide detailed contextual experimental information and facilitate data sharing. Moreover, HPIDB 2.0 provides access to rapidly available community annotations that capture minimum molecular interaction information to address immediate researcher needs for HPI network analysis. In addition to curation, HPIDB 2.0 integrates HPI from existing external sources and contains tools to infer additional HPI where annotated data are scarce. Compared to other interaction databases, our data collection approach ensures HPIDB 2.0 users access the most comprehensive HPI data from a wide range of pathogens and their hosts (594 pathogen and 70 host species, as of February 2016). Improvements also include enhanced search capacity, addition of Gene Ontology functional information, and implementation of network visualization. The changes made to HPIDB 2.0 content and interface ensure that users, especially agricultural researchers, are able to easily access and analyse high quality, comprehensive HPI data. All HPIDB 2.0 data are updated regularly, are publically available for direct download, and are disseminated to other molecular interaction resources.Database URL: http://www.agbase.msstate.edu/hpi/main.html.",2016-07-03 +31337335,Pathogenic Protist Transmembranome database (PPTdb): a web-based platform for searching and analysis of protist transmembrane proteins.,"

Background

Pathogenic protist membrane transporter proteins play important roles not only in exchanging molecules into and out of cells but also in acquiring nutrients and biosynthetic compounds from their hosts. Currently, there is no centralized protist membrane transporter database published, which makes system-wide comparisons and studies of host-pathogen membranomes difficult to achieve.

Results

We analyzed over one million protein sequences from 139 protists with full or partial genome sequences. Putative transmembrane proteins were annotated by primary sequence alignments, conserved secondary structural elements, and functional domains. We have constructed the PPTdb (Pathogenic Protist Transmembranome database), a comprehensive membrane transporter protein portal for pathogenic protists and their human hosts. The PPTdb is a web-based database with a user-friendly searching and data querying interface, including hierarchical transporter classification (TC) numbers, protein sequences, functional annotations, conserved functional domains, batch sequence retrieving and downloads. The PPTdb also serves as an analytical platform to provide useful comparison/mining tools, including transmembrane ability evaluation, annotation of unknown proteins, informative visualization charts, and iterative functional mining of host-pathogen transporter proteins.

Conclusions

The PPTdb collected putative protist transporter proteins and offers a user-friendly data retrieving interface. Moreover, a pairwise functional comparison ability can provide useful information for identifying functional uniqueness of each protist. Finally, the host and non-host protein similarity search can fulfill the needs of comprehensive studies of protists and their hosts. The PPTdb is freely accessible at http://pptdb.cgu.edu.tw .",2019-07-24 +33797913,CHARMM-GUI Polymer Builder for Modeling and Simulation of Synthetic Polymers.,"Molecular modeling and simulations are invaluable tools for polymer science and engineering, which predict physicochemical properties of polymers and provide molecular-level insight into the underlying mechanisms. However, building realistic polymer systems is challenging and requires considerable experience because of great variations in structures as well as length and time scales. This work describes Polymer Builder in CHARMM-GUI (http://www.charmm-gui.org/input/polymer), a web-based infrastructure that provides a generalized and automated process to build a relaxed polymer system. Polymer Builder not only provides versatile modeling methods to build complex polymer structures, but also generates realistic polymer melt and solution systems through the built-in coarse-grained model and all-atom replacement. The coarse-grained model parametrization is generalized and extensively validated with various experimental data and all-atom simulations. In addition, the capability of Polymer Builder for generating relaxed polymer systems is demonstrated by density calculations of 34 homopolymer melt systems, characteristic ratio calculations of 170 homopolymer melt systems, a morphology diagram of poly(styrene-b-methyl methacrylate) block copolymers, and self-assembly behavior of amphiphilic poly(ethylene oxide-b-ethylethane) block copolymers in water. We hope that Polymer Builder is useful to carry out innovative and novel polymer modeling and simulation research to acquire insight into structures, dynamics, and underlying mechanisms of complex polymer-containing systems.",2021-04-02 +,Promotion of technological development and determination of biotechnology trends in five selected Latin American countries: An analysis based on PCT patent applications,"Science and technology are two desirable elements for the economic and social development of a country. Biotechnology has a particularly important potential for economic development. Nevertheless, patent production in Latin America remains underdeveloped, which creates the need to analyze its trend and the efforts made to promote patent production. Therefore, the purpose of this study was, on the one hand, to determine trends in biotechnology-related PCT (Patent Cooperation Treaty) applications in Chile, Mexico, Argentina, Brazil, and Cuba from 1999 to 2015, and, on the other hand, to determine whether there is a relationship between the gross domestic expenditure on research and experimental development as a percentage of gross domestic product (GERD/GDP) and PCT applications for biotechnological inventions from 2007 to 2015 (in this case, the period under study was limited from 2007 to 2015, due to data availability for GERD/GDP in the five selected countries).The first part of this study shows that the growth in biotechnology PCT applications has been moderate and gradual and the trend was fitted to a linear model. The second set of results shows that GERD/GDP is associated with biotechnology-related PCT applications issued during the study period with a significance level of α = 0.01.Even though results indicate a gradual and modest progress, it is necessary that these five representative Latin American nations continue acting toward the protection of intellectual property in the area of biotechnology, especially by configuring strategies for further progress based on investments on research and development.How to cite: Barragán-Ocaña A, Gómez-Viquez H, Merritt H, et al. Promotion of technological development and determination of biotechnology trends in five selected Latin American countries: An analysis based on PCT patent applications. Electron J Biotechnol 2019;37. https://doi.org/10.1016/j.ejbt.2018.10.004.",2019-01-01 +30459848,Distributed retrieval engine for the development of cloud-deployed biological databases.,"The integration of cloud resources with federated data retrieval has the potential of improving the maintenance, accessibility and performance of specialized databases in the biomedical field. However, such an integrative approach requires technical expertise in cloud computing, usage of a data retrieval engine and development of a unified data-model, which can encapsulate the heterogeneity of biological data. Here, a framework for the development of cloud-based biological specialized databases is proposed. It is powered by a distributed biodata retrieval system, able to interface with different data formats, as well as provides an integrated way for data exploration. The proposed framework was implemented using Java as the development environment, and MongoDB as the database manager. Syntactic analysis was based on BSON, jsoup, Apache Commons and w3c.dom open libraries. Framework is available in: http://nbel-lab.com and is distributed under the creative common agreement.",2018-11-12 +33796544,Innovations Developed by Patients and Informal Caregivers for Needs Associated to Rheumatic Diseases.,"Until recently, innovation in healthcare was mainly achieved through the development of new drugs, therapies, and medical devices by big pharma and medtech companies; however, the innovative potential for this field is much broader. The patients and caregivers' role in healthcare is often associated with disease management, demand for their own illness data, and its exchange with other patients. However, the patients and caregivers' capacity to innovate to cope with limitations associated with their health condition is a growing phenomenon and starting to be supported by healthcare stakeholders to achieve a truly patient-centric system. Our previous research has shown that these uncommon innovators can develop a wide range of solutions, from simple adaptations and products to highly technological biomedical devices. In this paper, we present novel solutions developed by rheumatic patients, their caregivers, and collaborators, published on the ""Patient Innovation"" platform (https://patient-innovation.com/), with a focus on the innovator profile, the need that triggers the innovative process, the type of motivation behind the product, and the products developed. The most significant needs that motivate innovation are the will to increase the level of independence (71%) and to be able to perform daily routine activities (65%). In over 80% of cases, the fact that the market does not fully fulfill the needs felt during daily activities is the main motivation to innovate. It is thus concluded that there is room for innovation in rheumatic diseases with solutions developed by patients and informal caregivers that intend to solve needs that the healthcare market is not covering.",2021-03-16 +33860704,Medical help-seeking intentions for cognitive impairment by the patient.,"Objectives: Older adults represent one of the fastest growing population groups. As the aged population increases, incidence of Alzheimer's disease (AD) and other dementias will also increase. Professionals agree that early intervention is essential for therapeutic and quality of life purposes; however, many older adults wait several months or years to seek medical help after first noticing signs of cognitive impairment. The present study sought to identify the predictors of help-seeking for cognitive impairment by an individual for him/herself after the first detection of symptoms.Method: An online survey was administered to adults (N = 250) 50 years old and older. Individuals responded about their help-seeking intentions in response to a hypothetical vignette depicting symptoms of cognitive decline derived from a similar study with caregivers conducted by Qualls and colleagues. Additional standardized measures measuring constructs such as knowledge of Alzheimer's disease were completed.Results: The present study reveals that cognitive (i.e. symptom identification and disease attribution) and affective (i.e. symptom impact and threat appraisal) factors, as well as an interaction between the two, are predictive of help-seeking intentions with excellent model fit.Conclusion: Help-seeking intentions by individuals with possible cognitive impairment are comparable to those of potential caregivers. Contrary to hypotheses, high threat appraisal positively predicted help-seeking intentions despite the expectation that threat-induced fear would lead to avoidance. Recommendations are made for future research to further investigate both patients' help-seeking intentions and actions in response to signs of cognitive impairment.Supplemental data for this article is available online at https://doi.org/10.1080/13607863.2021.1910791 .",2021-04-16 +26989155,dbWGFP: a database and web server of human whole-genome single nucleotide variants and their functional predictions. ,"The recent advancement of the next generation sequencing technology has enabled the fast and low-cost detection of all genetic variants spreading across the entire human genome, making the application of whole-genome sequencing a tendency in the study of disease-causing genetic variants. Nevertheless, there still lacks a repository that collects predictions of functionally damaging effects of human genetic variants, though it has been well recognized that such predictions play a central role in the analysis of whole-genome sequencing data. To fill this gap, we developed a database named dbWGFP (a database and web server of human whole-genome single nucleotide variants and their functional predictions) that contains functional predictions and annotations of nearly 8.58 billion possible human whole-genome single nucleotide variants. Specifically, this database integrates 48 functional predictions calculated by 17 popular computational methods and 44 valuable annotations obtained from various data sources. Standalone software, user-friendly query services and free downloads of this database are available at http://bioinfo.au.tsinghua.edu.cn/dbwgfp. dbWGFP provides a valuable resource for the analysis of whole-genome sequencing, exome sequencing and SNP array data, thereby complementing existing data sources and computational resources in deciphering genetic bases of human inherited diseases.",2016-03-17 +32944598,Dataset on amelogenesis-related genes variants (ENAM and ENAM interacting genes) and on human leukocyte antigen alleles (DQ2 and DQ8) distribution in children with and without molar-incisor hypomineralisation (MIH).,"All children, who were born in 2004 and had undergone surgical treatment for recurrent acute tonsillitis and/or acute otitis media at the ear, nose and throat clinic (ENT) between 2004 and 2010, were called on dental examination and blood sampling. Out of 441 invitees, 113 children and their parents/legal guardians agreed to participate. The following data from this group of subjects are presented: the presence of clinical signs of molar-incisor hypomineralisation (MIH), the distribution of human leukocyte antigen (HLA) alleles DQ2 and DQ8 and eight single nucleotide polymorphisms (SNPs) located in amelogenesis-related genes (rs3796704 in the ENAM gene, rs546778141 in the AMBN gene, rs2106416 in the AMELX gene, rs7660807 and rs35286445 in the AMTN gene, rs4870723 in the COL14A1 gene, rs2245803 in the MMP20 gene, and rs3828054 in the TUFT1 gene). Data on clinical signs of MIH were collected in accordance with the recommendation and on the proposed MIH clinical data recording sheet [1], and with appropriate preliminary training and calibration. Data on HLA DQ2 and DQ8 haplotypes and on SNPs of amelogenesis-related genes were obtained using DNA isolated from blood samples taken from subjects. The HLA DQ2 and DQ8 alleles were determined using the EliGene® Coeliac RT Kits (90,048-RT; Elisabeth Pharmacon spol. s.r.o., Brno-Židenice, Czech Republic) on a 7500 Fast RT-PCR System (Applied Biosystems, Waltham, MA, USA). The distributions of SNPs in the amelogenesis-related genes were determined using high resolution melting (HRM) using the Type-IT HRM Master Mix (Qiagen), TaqMan genotyping assays (ID: C__25766207_10; Thermo Fisher Scientific, Waltham, MA, USA) with the TaqMan Universal Master Mix II, or Sanger sequencing using sequencing master mix BigDye® Terminator v3.1 (Applied Biosystems) and ABI 3500 Genetic Analyser (Applied Biosystems). L. Hočevar, J. Kovač, K. Trebušak Podkrajšek, S. Battelino, A. Pavlič, 2020. The possible influence of genetic aetiological factors on molar-incisor hypomineralisation, Arch. Oral. Biol. 118, 104848. https://doi.org/10.1016/j.archoralbio.2020.104848.",2020-08-25 +30835202,An atlas of polygenic risk score associations to highlight putative causal relationships across the human phenome.,"The age of large-scale genome-wide association studies (GWAS) has provided us with an unprecedented opportunity to evaluate the genetic liability of complex disease using polygenic risk scores (PRS). In this study, we have analysed 162 PRS (p<5×10-05) derived from GWAS and 551 heritable traits from the UK Biobank study (N = 334,398). Findings can be investigated using a web application (http:‌//‌mrcieu.‌mrsoftware.org/‌PRS‌_atlas/), which we envisage will help uncover both known and novel mechanisms which contribute towards disease susceptibility. To demonstrate this, we have investigated the results from a phenome-wide evaluation of schizophrenia genetic liability. Amongst findings were inverse associations with measures of cognitive function which extensive follow-up analyses using Mendelian randomization (MR) provided evidence of a causal relationship. We have also investigated the effect of multiple risk factors on disease using mediation and multivariable MR frameworks. Our atlas provides a resource for future endeavours seeking to unravel the causal determinants of complex disease.",2019-03-05 +32766811,iCarPS: a computational tool for identifying protein carbonylation sites by novel encoded features.,"

Motivation

Protein carbonylation is one of the most important oxidative stress-induced post-translational modifications, which is generally characterized as stability, irreversibility and relative early formation. It plays a significant role in orchestrating various biological processes and has been already demonstrated to be related to many diseases. However, the experimental technologies for carbonylation sites identification are not only costly and time consuming, but also unable of processing a large number of proteins at a time. Thus, rapidly and effectively identifying carbonylation sites by computational methods will provide key clues for the analysis of occurrence and development of diseases.

Results

In this study, we developed a predictor called iCarPS to identify carbonylation sites based on sequence information. A novel feature encoding scheme called residues conical coordinates combined with their physicochemical properties was proposed to formulate carbonylated protein and non-carbonylated protein samples. To remove potential redundant features and improve the prediction performance, a feature selection technique was used. The accuracy and robustness of iCarPS were proved by experiments on training and independent datasets. Comparison with other published methods demonstrated that the proposed method is powerful and could provide powerful performance for carbonylation sites identification.

Availability and implementation

Based on the proposed model, a user-friendly webserver and a software package were constructed, which can be freely accessed at http://lin-group.cn/server/iCarPS.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-04-01 +26876983,Regulators of Androgen Action Resource: a one-stop shop for the comprehensive study of androgen receptor action. ,"Androgen receptor (AR) is a ligand-activated transcription factor that is the main target for treatment of non-organ-confined prostate cancer (CaP). Failure of life-prolonging AR-targeting androgen deprivation therapy is due to flexibility in steroidogenic pathways that control intracrine androgen levels and variability in the AR transcriptional output. Androgen biosynthesis enzymes, androgen transporters and AR-associated coregulators are attractive novel CaP treatment targets. These proteins, however, are characterized by multiple transcript variants and isoforms, are subject to genomic alterations, and are differentially expressed among CaPs. Determining their therapeutic potential requires evaluation of extensive, diverse datasets that are dispersed over multiple databases, websites and literature reports. Mining and integrating these datasets are cumbersome, time-consuming tasks and provide only snapshots of relevant information. To overcome this impediment to effective, efficient study of AR and potential drug targets, we developed the Regulators of Androgen Action Resource (RAAR), a non-redundant, curated and user-friendly searchable web interface. RAAR centralizes information on gene function, clinical relevance, and resources for 55 genes that encode proteins involved in biosynthesis, metabolism and transport of androgens and for 274 AR-associated coregulator genes. Data in RAAR are organized in two levels: (i) Information pertaining to production of androgens is contained in a 'pre-receptor level' database, and coregulator gene information is provided in a 'post-receptor level' database, and (ii) an 'other resources' database contains links to additional databases that are complementary to and useful to pursue further the information provided in RAAR. For each of its 329 entries, RAAR provides access to more than 20 well-curated publicly available databases, and thus, access to thousands of data points. Hyperlinks provide direct access to gene-specific entries in the respective database(s). RAAR is a novel, freely available resource that provides fast, reliable and easy access to integrated information that is needed to develop alternative CaP therapies. Database URL: http://www.lerner.ccf.org/cancerbio/heemers/RAAR/search/.",2016-02-13 +30897000,"Austerity Policies and Mortality Rates in European Countries, 2011-2015.","

Objectives

To assess time trends in mortality rates in European countries for the period 2011 to 2015 by level of austerity measures imposed by governments in response to the economic and financial crisis.

Methods

We analyzed standardized mortality rates (SMRs) for 2011 through 2015 in 15 European countries based on Eurostat data ( http://ec.europa.eu/eurostat/data/database ). We used the Cyclically Adjusted Primary Balance (CAPB) in terciles as an independent variable to represent the level of austerity adopted in each country. We conducted a longitudinal analysis of panel data using generalized estimating equation models of SMR. We included interaction terms to assess the influence of time period and level of austerity.

Results

SMRs generally declined in the study period, except in the last year of the study. In 2015, compared with countries in the low-austerity group, countries with intermediate austerity had excess mortality of 40.2 per 100 000 per year and those with high austerity had excess mortality of 31.22 per 100 000 per year.

Conclusions

The results suggest a negative effect on mortality in those countries that apply a higher level of austerity.",2019-03-21 +32256876,Longer diagnosis-to-ablation time is associated with recurrence of atrial fibrillation after catheter ablation-Systematic review and meta-analysis.,"

Background

Diagnosis-to-ablation time (DTAT) has been postulated to be one of the predictors of atrial fibrillation (AF) recurrence, and it is a ""modifiable"" risk factor unlike that of many electrocardiographic or echocardiographic parameters. This development may change our consideration for ablation. In this systematic review and meta-analysis, we aim to analyze the latest evidence on the importance of DTAT and whether they predict the AF recurrence after catheter ablation.

Methods

We performed a comprehensive search on topics that assess diagnosis-to-ablation time (DTAT) and AF recurrence from inception up until August 2019 through PubMed, EuropePMC, Cochrane Central Database, and http://ClinicalTrials.gov.

Results

There was a total of 3548 patients from six studies. Longer DTAT was associated with increased risk for AF recurrence in all studies included. Meta-analysis of these studies showed that DTAT had a hazard ratio (HR) of 1.19 [1.02, 1.39], P = .03; I 2: 92% for AF recurrence. Upon sensitivity analysis by removing a study, HR became 1.24 [1.16, 1.32], P < .001; I 2: 29%. Meta-analysis on DTAT time >3 years had HR 1.73 [1.54, 1.93], P < .001; I 2: 45% for the recurrence of AF. Upon subgroup analysis of data that compared >6 years to <1 year, the HR was 1.93 [1.62, 2.29], P < .001; I 2: 0%.

Conclusion

Longer DTAT time is associated with an increased risk of AF recurrence. Hence, determining management at the earliest possible moment to avoid delay is of utmost importance.",2019-12-27 +33332039,[Guideline 'Ingrown toenails']. ,"Ingrown toenails (also called unguis incarnatus) are a common problem in the general population. In early 2020, the medical specialists' guideline ""Ingrown toenail"" was published in which the various treatment options are compared. Conservative treatment can be considered for stage I ingrown toenails. In stage II-III ingrown toenails and failing conservative treatment, operative treatment is recommended consisting of partial nail extraction from the ingrown nail edge in combination with destruction of the corresponding part of the matrix. There doesn't seem to be any reason to deviate from the advice in the case of a recurring ingrown toenail or an ingrown toenail in a patient with expected wound healing problems. A detailed elaboration of the guideline, which also contains a step-by-step operative approach, can be found on the Guidelines database (https://richtlijnendatabase.nl/).",2020-11-18 +27799466,IMG/VR: a database of cultured and uncultured DNA Viruses and retroviruses.,"Viruses represent the most abundant life forms on the planet. Recent experimental and computational improvements have led to a dramatic increase in the number of viral genome sequences identified primarily from metagenomic samples. As a result of the expanding catalog of metagenomic viral sequences, there exists a need for a comprehensive computational platform integrating all these sequences with associated metadata and analytical tools. Here we present IMG/VR (https://img.jgi.doe.gov/vr/), the largest publicly available database of 3908 isolate reference DNA viruses with 264 413 computationally identified viral contigs from >6000 ecologically diverse metagenomic samples. Approximately half of the viral contigs are grouped into genetically distinct quasi-species clusters. Microbial hosts are predicted for 20 000 viral sequences, revealing nine microbial phyla previously unreported to be infected by viruses. Viral sequences can be queried using a variety of associated metadata, including habitat type and geographic location of the samples, or taxonomic classification according to hallmark viral genes. IMG/VR has a user-friendly interface that allows users to interrogate all integrated data and interact by comparing with external sequences, thus serving as an essential resource in the viral genomics community.",2016-10-30 +31372542,FeptideDB: A web application for new bioactive peptides from food protein.,"

Background

Bioactive peptides derived from food are important sources for alternative medicine and possess therapeutic activity. Several biochemical methods have been achieved to isolate bioactive peptides from food, which are tedious and time consuming. In silico methods are an alternative process to reduce cost and time with respect to bioactive peptide production. In this paper, FeptideDB was used to collect bioactive peptide (BP) data from both published research articles and available bioactive peptide databases. FeptideDB was developed to assist in forecasting bioactive peptides from food by combining peptide cleavage tools and database matching. Furthermore, this application was able to predict the potential of cleaved peptides from 'enzyme digestion module' to identify new ACE (angiotensin converting enzyme) inhibitors using an automatic molecular docking approach.

Results

The FeptideDB web application contains tools for generating all possible peptides cleaved from input protein by various available enzymes. This database was also used for analysis and visualization to assist in bioactive peptide discovery. One module of FeptideDB has the ability to create 3-dimensional peptide structures to further predict inhibitors for the target protein, ACE (angiotensin converting enzyme).

Conclusions

FeptideDB is freely available to researchers who are interested in exploring bioactive peptides. The FeptideDB interface is easy to use, allowing users to rapidly retrieve data based on desired search criteria. FeptideDB is freely available at http://www4g.biotec.or.th/FeptideDB/. Ultimately, FeptideDB is a computational aid for assessing peptide bioactivities.",2019-07-20 +30335166,RPFdb v2.0: an updated database for genome-wide information of translated mRNA generated from ribosome profiling.,"RPFdb (http://www.rpfdb.org or http://sysbio.sysu.edu.cn/rpfdb) is a public database for hosting, analyzing and visualizing ribosome profiling (ribo-seq) data. Since its initial release in 2015, the amount of new ribo-seq data has been considerably enlarged with the increasing popularity of ribo-seq technique. Here, we describe an updated version, RPFdb v2.0, which brings significant data expansion, feature improvements, and functionality optimization: (i) RPFdb v2.0 currently hosts 2884 ribo-seq datasets from 293 studies, covering 29 different species, in comparison with 777 datasets from 82 studies and 8 species in the previous version; (ii) A refined analysis pipeline with multi-step quality controls has been applied to improve the pre-processing and alignment of ribo-seq data; (iii) New functional modules have been added to provide actively translated open reading frames (ORFs) information for each ribo-seq data; (iv) More features have been made available to increase database usability. With these additions and enhancements, RPFdb v2.0 will represent a more valuable and comprehensive database for the gene regulation community.",2019-01-01 +34014987,Automatic semantic segmentation of breast tumors in ultrasound images based on combining fuzzy logic and deep learning-A feasibility study.,"Computer aided diagnosis (CAD) of biomedical images assists physicians for a fast facilitated tissue characterization. A scheme based on combining fuzzy logic (FL) and deep learning (DL) for automatic semantic segmentation (SS) of tumors in breast ultrasound (BUS) images is proposed. The proposed scheme consists of two steps: the first is a FL based preprocessing, and the second is a Convolutional neural network (CNN) based SS. Eight well-known CNN based SS models have been utilized in the study. Studying the scheme was by a dataset of 400 cancerous BUS images and their corresponding 400 ground truth images. SS process has been applied in two modes: batch and one by one image processing. Three quantitative performance evaluation metrics have been utilized: global accuracy (GA), mean Jaccard Index (mean intersection over union (IoU)), and mean BF (Boundary F1) Score. In the batch processing mode: quantitative metrics' average results over the eight utilized CNNs based SS models over the 400 cancerous BUS images were: 95.45% GA instead of 86.08% without applying fuzzy preprocessing step, 78.70% mean IoU instead of 49.61%, and 68.08% mean BF score instead of 42.63%. Moreover, the resulted segmented images could show tumors' regions more accurate than with only CNN based SS. While, in one by one image processing mode: there has been no enhancement neither qualitatively nor quantitatively. So, only when a batch processing is needed, utilizing the proposed scheme may be helpful in enhancing automatic ss of tumors in BUS images. Otherwise applying the proposed approach on a one-by-one image mode will disrupt segmentation's efficiency. The proposed batch processing scheme may be generalized for an enhanced CNN based SS of a targeted region of interest (ROI) in any batch of digital images. A modified small dataset is available: https://www.kaggle.com/mohammedtgadallah/mt-small-dataset (S1 Data).",2021-05-20 +31116477,MetaDome: Pathogenicity analysis of genetic variants through aggregation of homologous human protein domains.,"The growing availability of human genetic variation has given rise to novel methods of measuring genetic tolerance that better interpret variants of unknown significance. We recently developed a concept based on protein domain homology in the human genome to improve variant interpretation. For this purpose, we mapped population variation from the Exome Aggregation Consortium (ExAC) and pathogenic mutations from the Human Gene Mutation Database (HGMD) onto Pfam protein domains. The aggregation of these variation data across homologous domains into meta-domains allowed us to generate amino acid resolution of genetic intolerance profiles for human protein domains. Here, we developed MetaDome, a fast and easy-to-use web server that visualizes meta-domain information and gene-wide profiles of genetic tolerance. We updated the underlying data of MetaDome to contain information from 56,319 human transcripts, 71,419 protein domains, 12,164,292 genetic variants from gnomAD, and 34,076 pathogenic mutations from ClinVar. MetaDome allows researchers to easily investigate their variants of interest for the presence or absence of variation at corresponding positions within homologous domains. We illustrate the added value of MetaDome by an example that highlights how it may help in the interpretation of variants of unknown significance. The MetaDome web server is freely accessible at https://stuart.radboudumc.nl/metadome.",2019-06-18 +,First Report of Fusarium petroliphilum Causing Fruit Rot of Butternut Squash in Spain,"Severe fruit rot of butternut squash (Cucurbita moschata Duchesne), a fruit widely consumed and cultivated in Spain, was observed in 2016 in cultivated fields and at storage conditions in Valencia Province (southeast Spain). Fruits exhibited sunken lesions that eventually extended into the flesh and led to 10 to 20% postharvest losses. Small (3 to 4 mm) pieces of rotted tissues were surface disinfected for 1 min in 1.5% NaOCl, washed twice with sterilized distilled water, and plated onto potato dextrose agar (PDA) amended with streptomycin sulfate (0.5 g liter–1). Plates were incubated at 25°C in the dark for 3 to 5 days. Fusarium colonies were consistently isolated, transferred to PDA and Spezieller Nährstoffarmer Agar (SNA) culture media for morphological identification, and identified as belonging to the Fusarium solani species complex (FSSC) based on morphology on SNA. Sporodochial macroconidia were 3 to 4 septate in average, slender and slightly curved, 45 (34 to 51) × 4.2 (3.5 to 5) μm; aerial microconidia were abundant, borne on short monophialides, ovoid to reniform, and 8.6 (4.9 to 16.4) × 3.4 (2.7 to 4) μm. The internal transcribed spacer region and elongation factor-1α gene of isolate GIHF-146 were sequenced (using ITS1/ITS4 and EF1-728F/EF1-986R primer pairs) and deposited in GenBank with accession numbers MF535516 (ITS) and MF580776 (EF-1α), respectively. BLASTn comparison of the two sequences showed a 99% homology with those of F. petroliphilum (Q.T. Chen & X.H. Fu) D. Geiser, O’Donnell, Short & Zhang: e.g., LC184243 (ITS) and LC177308 (EF-1α). Comparison of these sequences in the Fusarium ID database (http://www.westerdijkinstitute.nl/fusarium/) exhibited similar levels of similarity. For pathogenicity tests, isolate GIHF-146 was transferred to flasks containing potato sucrose (PS) medium and maintained in agitation (130 rpm) for 2 to 3 days at 25°C in the dark. Twelve 13-day-old squash seedlings (Cucurbita ficifolia Bouché cv. Cabello de Ángel) grown in trays containing sterilized substrate (Projar, Spain) were removed and dipped into a suspension of conidia (3 × 106 conidia/ml) for 2 min, and transferred to plastic pots (Teku-tainer, Pöppelmann) with sterilized substrate. Three noninoculated plants dipped in sterile water were disposed as controls. Simultaneously, five C. moschata fruits were inoculated by direct injection of 1 ml of the same conidial suspension, and one control fruit was injected with 1 ml of sterilized distilled water. Plants and fruits were placed in a growth chamber at 25°C in 16/8 h photoperiod for 20 days. Large sunken areas densely covered by a grayish mycelium surrounding the point of injection appeared 6 days post inoculation on inoculated fruits. Inoculated seedlings and control fruits remained asymptomatic. The fungus was reisolated, fulfilling Koch’s postulates. Short et al. (2013) erected F. petroliphilum to accommodate certain Fusaria from both plumbing drain biofilms and human mycotic keratitis, and is also the accepted epithet to name a group of pathogenic fungi on cucurbits (formerly named as F. solani f. sp. cucurbitae race 2) defined by its tissue specificity, causing only fruit rot on cucurbits (Tousson and Snyder 1961). To date, only Fsc race 1 had been reported from different cucurbits in Spain (Armengol et al. 2000; Gómez et al. 2008, 2014) and, to our knowledge, this is the first report of F. petroliphilum causing fruit rot of butternut squash in Spain, a species widely cultivated and employed as watermelon rootstock.",2018-08-01 +,First Report of Papaya leaf curl China virus infecting Acalypha australis in China,"Papaya leaf curl China virus (PaLCCNV), a monopartite begomovirus (genus Begomovirus, family Geminiviridae), was originally reported on Carica papaya in Guangxi province of China (Wang et al. 2004). The virus has been reported to infect C. papaya, Solanum lycopersicum, Nicotiana tabacum, Corchoropsis tomentosa, Siegesbeckia orientalis, and Ageratum conyzoides. In January 2015, plants of Acalypha australis, a common and widespread weed species, were found exhibiting yellow vein symptoms in fields of Nanning city, Guangxi province. Five symptomatic leaf samples were collected from each of five diseased plants. Total DNA was extracted from the five samples with the EasyPure Plant DNA kit (TransGen Biotech, Beijing, China) and was used as a template for polymerase chain reaction with the degenerate begomovirus primers AV494 (5′-gccyatrtayagraagccmag-3′) and CoPR (5′-gangsatghgtrcadgccatata-3′). The expected approximately 570-bp fragment was detected from all five samples, indicating that A. australis plants showing yellow vein symptoms were infected by a begomovirus. Begomoviral genomes were amplified from the total DNA using rolling circle amplification (RCA) (TempliPhi kit; GE Healthcare, Buckinghamshire, UK), followed by digestion with endonucleases BamH I, EcoR I, Hind III or Pst I, respectively. Amplicons corresponding to full-length begomoviral genomic components, obtained from RCA products digested with Hind III, were gel purified, ligated into the plasmid pGEM-3Z (previously digested with the corresponding restriction enzyme mentioned above), and transformed into Escherichia coli DH5α. Three clones from each sample were selected for sequencing (Invitrogen, Shanghai, China). Sequencing results were assembled and analyzed using DNAStar software version 5.0 (DNAStar, Madison, WI). A similarity search for each sequence was carried out using BLASTn to identify related sequences in the GenBank database (https://www.ncbi.nlm.nih.gov/). Pairwise nucleotide (nt) sequence identities were determined using the Species Demarcation Tool (SDT1.0). Full-length begomoviral genomic sequences were 2,735 nt long and encoded six putative open reading frames. The cloned genomic sequences shared 99.5 to 100% nt identities with each other. One representative sequence was deposited in GenBank as the accession number KX273343, which shared 91.8 to 99.6% nt identities with isolates of PaLCCNV in the database, with the highest nt identities to PaLCCNV-G111 [CN:Gx:Age:14] (HG003651). In accordance with the threshold of 91% for begomovirus species demarcation (Brown et al. 2015), the virus associated with A. australis yellow vein disease is an isolate of PaLCCNV. According to the obtained sequences, the specific primer pairs W-F2 (5′-GTATACACGCCACTCTCGCATTG-3′) and W-R2 (5′-CTGGACAATCAAAAATCCCCTAT-3′) were designed to amplify the full-length genome and detect the presence of PaLCCNV in A. australis samples. The expected approximately 2.7-kb full-length genomic fragment was amplified from all five samples, indicating that A. australis plants showing yellow vein symptoms were infected by PaLCCNV. To our knowledge, this is the first report of natural occurrence of PaLCCNV on A. australis in China. A. australis may play an important role as a reservoir of PaLCCNV, especially during the non-crop-planting period, but additional studies analyzing a large number of plant samples from different geographical locations within China are necessary to establish the role of this host as a virus reservoir.",2018-08-01 +28974472,Stress2TF: a manually curated database of TF regulation in plant response to stress.,"Considerable studies demonstrate that plant transcription factors (TFs) play key regulatory roles in abiotic/biotic stress conditions, such as drought and pathogen attack. However, there is no effort dedicated to curate experimentally validated stress-TF regulatory relationships from these individual reports into a central database, which put an obstacle in the exploration of stress-TF regulations in plants. To address this issue, we presented a literature-curated database 'Stress2TF' that currently documented 1533 regulatory relationships between 71 abiotic/biotic stresses and 558 TFs in 47 plant species. Each entry in Stress2TF contains detailed information about a stress-TF relationship such as plant name, stress name, TF and brief description of stress-TF relationship. Stress2TF provided a user-friendly interface for entry browse, search and download. In addition, a submission page and several useful tools (e.g., BLAST, network visualization) were integrated. Stress2TF may be a valuable resource for the research of stress-TF regulatory mechanisms in plants. Stress2TF is available at http://csgenomics.ahau.edu.cn/Stress2TF.",2017-09-30 +33326073,AnnoLnc: A One-Stop Portal to Systematically Annotate Novel Human Long Noncoding RNAs.,"While more than a hundred thousand long noncoding RNAs (lncRNAs) have been identified in human genome, their biological functions and regulation are largely elusive. Here we present AnnoLnc, a one-stop online annotation portal for human lncRNAs ( http://annolnc1.gao-lab.org/ ). As the first (and the most comprehensive) Web server to provide on-the-fly annotation for novel human lncRNAs, AnnoLnc exploits more than 700 data sources to annotate inputted lncRNA systematically, spanning genomic location, secondary structure, expression patterns, coexpression-based functional annotation, transcriptional regulation, miRNA interaction, protein interaction, genetic association, and evolution. Moreover, in addition to a user-friendly Web interface, AnnoLnc can also be integrated into existing pipelines by either a set of JSON-based web service APIs or a stand-alone version for Linux server.",2021-01-01 +33600746,"[Correction to the article ""Case Series of 103 Children with SARS-CoV-2 Infection in Portugal"", published on Acta Med Port 2020 Dec;33(12):795-802].","On page 801, fifth, where it reads: ""No início da pandemia, teorizou-se que a vacina BCG pudesse ter um efeito protetor relativamente à COVID-19,27,28 mas não se encontrou até à data evidência para tal, não estando atualmente recomendada a vacinação BCG na prevenção da COVID-19.28,29 No nosso estudo, a maioria dos doentes (76%) tinha sido vacinada. Analisámos separadamente o subgrupo de crianças nascidas após janeiro de 2016, altura em que passaram a ser vacinadas apenas as crianças pertencentes a grupos de risco.30 A taxa de vacinação neste grupo foi de 51%, sendo superior à taxa de 30,1% estimada para crianças nascidas em Portugal com um ano de idade referido a 2019.31"" It should read: ""No início da pandemia, teorizou-se que a vacina BCG pudesse ter um efeito protetor relativamente à COVID-19,27,28 mas não se encontrou até à data evidência para tal, não estando atualmente recomendada a vacinação BCG na prevenção da COVID-19.28 No nosso estudo, a maioria dos doentes (76%) tinha sido vacinada. Analisámos separadamente o subgrupo de crianças nascidas após janeiro de 2016, altura em que passaram a ser vacinadas apenas as crianças pertencentes a grupos de risco.29 A taxa de vacinação neste grupo foi de 51%, sendo superior à taxa de 30,1% estimada para crianças nascidas em Portugal com um ano de idade referido a 2019.30"" Paper published with errors: https://www.actamedicaportuguesa.com/revista/index.php/amp/article/view/14537.",2020-12-02 +33345257,Two Stream Active Query Suggestion for Active Learning in Connectomics.,"For large-scale vision tasks in biomedical images, the labeled data is often limited to train effective deep models. Active learning is a common solution, where a query suggestion method selects representative unlabeled samples for annotation, and the new labels are used to improve the base model. However, most query suggestion models optimize their learnable parameters only on the limited labeled data and consequently become less effective for the more challenging unlabeled data. To tackle this, we propose a two-stream active query suggestion approach. In addition to the supervised feature extractor, we introduce an unsupervised one optimized on all raw images to capture diverse image features, which can later be improved by fine-tuning on new labels. As a use case, we build an end-to-end active learning framework with our query suggestion method for 3D synapse detection and mitochondria segmentation in connectomics. With the framework, we curate, to our best knowledge, the largest connectomics dataset with dense synapses and mitochondria annotation. On this new dataset, our method outperforms previous state-of-the-art methods by 3.1% for synapse and 3.8% for mitochondria in terms of region-of-interest proposal accuracy. We also apply our method to image classification, where it outperforms previous approaches on CIFAR-10 under the same limited annotation budget. The project page is https://zudi-lin.github.io/projects/#two_stream_active.",2020-08-01 +33311142,"Hydroids (Cnidaria, Hydrozoa) from Mauritanian Coral Mounds.","Agassiz, L. (1862) Contributions to the natural history of the United States of America. Vol. 4. Little Brown, Boston, 380 pp.Aguirrezabalaga, F., Altuna, A., Borja, A., Feliu, J., GarcíaCarrascosa, A.M., Romero, A., San Vicente, C., Torres-Gómez-de-Cádiz, J.A., Uriz, M.J. Ibánez, M. (1984) Contribución al conocimiento de la fauna marina de la costa Vasca. Π. Lurralde, Investigación y espacio, 1984, pp. 83-133.Alder, J. (1856) A notice of some new genera and species of British hydroid zoophytes. Annals and Magazine of Natural History, Series 2, 18, 353-362.        https://doi.org/10.1080/00222935608697652Allman, G.J. (1873) Interim report on the hydroids collected by L.F. de Pourtalès during the Gulf Stream exploration of United States coast survey. Bulletin of the Museum of Comparative Zoology at Harvard College, 3 (7), 185-186.Allman, G.J. (1874) Report on the Hydroida collected during the expeditions of H.M.S. 'Porcupine.' Transactions of the Zoological Society of London, 8, 469-481.        https://doi.org/10.1111/j.1096-3642.1874.tb00566.xAllman, G.J. (1877) Report on the Hydroida collected during the exploration of the Gulf Stream by L.F. de Pourtalès, assistant United States Coast Survey. Memoirs of the Museum of Comparative Zoology at Harvard College, 5 (2), 1-66.        https://doi.org/10.5962/bhl.

Title

15852Allman, G.J. (1883) Report on the Hydroida dredged by H.M.S. Challenger during the years 1873-76. I. Plumularidae. Report of the Scientific Results of the Voyage of H.M.S. Challenger 1873-1876, 1-55, pls. 1-20.        https://doi.org/10.5962/bhl.

Title

11299Altuna, A. (2012) New records of bathyal Leptolida (Cnidaria: Hydrozoa: Leptothecata) from the Bay of Biscay and the northwestern Iberian Peninsula (northeastern Atlantic). Zootaxa, 3565 (1), 1-17.        https://doi.org/10.11646/zootaxa.3565.1.1Altuna Prados, A. Álvarez Claudio, C. (1994) El género Zygophylax Quelch, 1885 (Cnidaria, Hydrozoa) en el Golfo de Vizcaya. Miscel.lània Zoològica, 17, 1-16.Alvarez Claudio, M.C. (1993) Hidrozoos bentonicos y Catálogo de Antozoos de la Plataforma y Talud Continentales de la costa central de Asturias. Tesis de Licenciatura, Universidad de Oviedo, Oviedo, Asturias, 458 pp.Alvarez Claudio, M.C. (1995) Some records of the superfamily Plumularioidea L. Agassiz, 1862 (Cnidaria, Hydrozoa) from the Bay of Biscay. Miscel·lània Zoològica, 18, 9-20.Ansín Agís, J. (1992) Hidrozoos de la Ría de Vigo. Tesis de Licenciatura, Universidad de Vigo, Ourense, Galicia, 282 pp.Ansín Agís, J. (1998) Plumularioidea (Cnidaria, Hydrozoa) recolectados por las expediciones holandesas ""CANCAP"" en el Atlántico nor-oriental. Tesis Doctoral, Universidade de Vigo, Vigo, Galicia, 675 pp. Ansín Agís, J., Ramil, F. Vervoort, W. (2001) Atlantic Leptolida (Hydrozoa, Cnidaria) of the families Aglaopheniidae, Halopterididae, Kirchenpaueriidae and Plumulariidae collected during the CANCAP and Mauritania-II expeditions of the National Museum of Natural History, Leiden, The Netherlands. Zoologische verhandelingen, Leiden, 333, 1-268.Ansín Agís, J., Vervoort, W. Ramil, F. (2014) Hydroids of the families Kirchenpaueriidae Stechow, 1921 and Plumulariidae McCrady, 1859 (Cnidaria, Hydrozoa) collected in the Western Pacific Ocean by various French Expeditions. Zoosystema, 36, 789-840.        https://doi.org/10.5252/z2014n4a6Arévalo y Carretero, C. (1906) Contribución al estudio de los Hidrozoarios españoles existentes en la Estación de Biología marítima de Santander. Memorias de la Real Sociedad Española de Historia Natural, 4 (3), 79-109.Avila, S.P. Malaquias, M.A.E. (2003) Biogeographical relationships of the molluscan fauna of the Ormonde Seamount (Gorringe Bank, Northeast Atlantic Ocean). Journal of Molluscan Studies, 69, 145-150.        https://doi.org/10.1093/mollus/69.2.145Bale, W.M. (1915) Report on the Hydroida collected in the Great Australian Bight and other localities. Part III. Fish. Zool. (biol) Biological Results of the Fishing Experiments Carried on by the F.I.S. ""Endeavour"", 1909-14, 3 (5), 304-307.Bedot, M. (1916) Sur le genre Kirchenpaueria. Revue Suisse de Zoologie, 24 (11), 637-648.Bedot, M. (1917) Le genre Nemertesia. Mémoire de la Société de Physique et d'Histoire Naturelle de Genève, 39 (1), 15-52.Bedot, M. (1921a) Notes systématiques sur les plumularides. 2me partie. Revue Suisse de Zoologie, 29 (1), 1-40.        https://doi.org/10.5962/bhl.part.84689Bedot, M. (1921b) Hydroïdes provenant des campagnes des yachts Hirondelle et Princesse-Alice (1887-1912). I. Plumulariidae. Résultats des Campagnes scientifiques accomplies sur son yacht par le Prince Albert Ier de Monaco, 60, 1-73.Bedot, M. (1923) Notes systématiques sur les plumularides. 3me partie. Revue Suisse de Zoologie, 30 (7), 213-243.Billard, A. (1901) Note sur l'Antennularia antennina Lin. et sur l'A. perrieri n. sp. Bulletin du Museìum national d'histoire naturelle, Paris, 7, 68-75.Billard, A. (1904) Contribution à l'étude des Hydroïdes (multiplication, regeneration, greffes, variations). Annales des sciences naturelles, Zoologie, Series 8, 20, 1-251.Billard, A (1906a) Hydroïdes. In: Mission des pêcheries de la côte occidental d'Afrique, III. Actes de la Société Linnéenne de Bordeaux, 61, 69-76.Billard, A. (1906b) Hydroïdes. In: Expéditions scientifiques du ""Travailleur"" et du ""Talisman"" pendant les années 1880, 1881, 1882, 1883, etc. Masson Cie., Paris, pp. 153-243.Billard, A. (1906c) Note sur les Hydroïdes du Travailleur et du Talisman. Bulletin du Museìum national d'histoire naturelle, 12 (5), 329-334.Billard, A. (1931) Hydroïdes de Mauritanie. Bulletin du Muséum national d'histoire naturelle, Series 2, 3 (7), 673-678.Billard, A. (1934) Note sur quelques hydroïdes du Maroc. Bulletin de la Société Zoologique de France, 59, 227-231.        Boero, F. Bouillon, J. (1993) Zoogeography and life cycle patterns of Mediterranean hydromedusae (Cnidaria). Biological Journal of the Linnean Society, 48, 239-266.        https://doi.org/10.1111/j.1095-8312.1993.tb00890.xBoero, F. Fresi, E. (1986) Zonation and evolution of a rocky bottom hydroid community. Marine Ecology, 7 (2), 123-150.        https://doi.org/10.1111/j.1439-0485.1986.tb00152.xBouillon, J. (1984) Révision de la famille des Phialuciidae (Kramp, 1955) (Leptomedusae, Hydrozoa, Cnidaria), avec un essai de classification des Thecatae-Leptomedusae. Indo-Malayan Zoology, 1, 1-24.Bouillon, J., Massin, C. Kresevic, R. (1995) Hydroidomedusae de l'Institut royal des Sciences naturelles de Belgique. Documents de travail de l'Institut royal des Sciences naturelles de Belgique, 78, 3-106.Bouillon, J., Medel, M.D., Pagès, F., Gili, J.M., Boero, F. Gravili, C. (2004) Fauna of the Mediterranean Hydrozoa. Scientia Marina, 68 (2), 5-438.        https://doi.org/10.3989/scimar.2004.68s25Bouillon, J., Gravili, C., Pagès, F., Gili, J.M. Boero, F. (2006) An introduction to Hydrozoa. Editions du Muséum, Paris, 591 pp.Broch, H. (1913) Hydroida from the ""Michael Sars"" North Atlantic Deep-Sea Expedition 1910. In: Report on the scientific results of the ""Michael Sars"" north Atlantic deep-sea expedition 1910 3(1) Zoology, 1-18.Broch, H. (1914) Hydrozoa benthonica. In: Michaelsen, W. (Ed.), Beiträge zur Kenntnis der Meeresfauna Westafrikas. Vol. 1. Friederichsen, Hamburg, pp. 19-50.Browne, E.T. (1907) The Hydroida collected by the ""Huxley"" from the North Side of the Bay of Biscay in August, 1906. Journal of the Marine Biological Association of the United Kingdom, 8, 15-37.        https://doi.org/10.1017/S002531540004371XBuchanan, J.B. (1957) The hydroid fauna of the Gold Coast. Revue de Zoologie et de Botanique Africaines, 56 (3-4), 349-372.Burdon-Jones, C. Tambs-Lyche, H. (1960) Observations on the fauna of the North Brattholmen stone-coral reef near Bergen. Årbok for Universitetet i Bergen, Matematisk-naturvitenskapling, 1960 (4), 1-24.Cairns, S.D. Chapman, R.E. (2001) Biogeographic affinities of the North Atlantic deep-water Scleractinia. In: Willison, J.H.M., Hall, J., Gass, S.E., Kenchington, E.L.R., Butler, M. Doherty, P. (Eds.), Proceedings of the First International Sympo-sium on Deep-Sea Corals. Ecology Action Centre, Halifax, pp. 30-57Calder, D.R. (1988) Shallow-water hydroids of Bermuda: the Athecatae. Royal Ontario Museum, Life Sciences Contributions, 148, 1-107.        https://doi.org/10.5962/bhl.

Title

52225Calder, D.R. (1991) Shallow-water hydroids of Bermuda. The Thecatae, exclusive of Plumularioidea. Royal Ontario Museum, Life Sciences Contributions, 154, 1-140.Calder, D.R. (1997) Shallow-water hydroids of Bermuda: superfamily Plumularioidea. Royal Ontario Museum, Life Sciences Contributions, 161, 1-85.Calder, D.R. (1998) Hydroid diversity and species composition along a gradient from shallow waters to deep sea around Bermuda. Deep-Sea Research I, 45, 1843-1860.        https://doi.org/10.1016/S0967-0637(98)00044-2Calder, D.R. (2012) On a collection of hydroids (Cnidaria, Hydrozoa, Hydroidolina) from the west coast of Sweden, with a checklist of species from the region. Zootaxa, 3171 (1), 1-77.        https://doi.org/10.11646/zootaxa.3171.1.1Calder, D.R. Vervoort, W. (1998) Some hydroids (Cnidaria: Hydrozoa) from the Mid-Atlantic Ridge, in the North Atlantic Ocean. Zoologische verhandelingen, Leiden, 319, 1-65.Calero, B., Ramil, F. Ramos, A. (2017) Echinoderms of the Mauritanian Deep-Sea Waters. In: Ramos, A., Ramil, F. Sanz, J.L. (Eds.), Deep sea ecosystems off Mauritania: Researching marine biodiversity and habitats in West African Deep-waters. Springer, Dordrecht, pp. 445-480.        https://doi.org/10.1007/978-94-024-1023-5_12Castillo, S. (2017) Marine Mollusc (Gastropoda and Bivalvia) from Northwest Africa. Undergraduate dissertation, Universidad de Vigo, Pontevedra, Ourense, Vigo, Galicia, 422 pp.Christiansen, B.O. (1972). The hydroid fauna of the Oslo Fjord in Norway. Norwegian Journal of Zoology, 20, 279-310.Clarke, S.F. (1879) Report on the Hydroida collected during the Exploration of the Gulf Stream of Mexico by Alexander Agassiz, 1877-78. Bulletin of the Museum of Comparative Zoology, 5, 239-252, pls. 1-5.Collins, J.S.H., Ross, A.J., Genzano, G. Mianzan, H. (2006) Earleria gen. nov. Gabriella gen. nov., replacement names for Foersteria Arai Brinckmann-Voss, 1980 (Cnidaria, Hydrozoa, Mitrocomidae) and Foersteria Wehner, 1988 (Crustacea, Decapoda, Prosopidae), junior homonyms of Foersteria Szepligeti, 1896 (Insecta, Hymenoptera, Braconidae). Bulletin of the Mizunami Fossil Museum, 33, 125-126.Cornelius, P.F.S. (1975) A revision of the species of Lafoeidae and Haleciidae (Coelenterata: Hydroida) recorded from Britain and nearby seas. Bulletin of the British Museum (Natural History), Zoology, 28, 375-426.Cornelius, P.F.S. (1979) A revision of species Sertulariidae (Coelenterata: Hydroida) recorded from Britain and nearby seas. Bulletin of the British Museum (Natural History), Zoology Series, 34 (6), 243-321.Cornelius, P.F.S. (1995a) North-West European Thecate Hydroids and their Medusae. Part 1. Part 1. Introduction, Laodiceidae to Haleciidae. Synopses of the British Fauna, New Series, 50, 1-347.Cornelius, P.F.S. (1995b) North-West European Thecate Hydroids and their Medusae. Part 2. Sertulariidae to Campanulariidae. Synopses of the British Fauna, New Series, 50, 1-386.De Matos Pita, S. (2015) Crustáceos Decápodos de aguas profundas de Mauritania (África Noroccidental). Undergraduate dissertation, Universidad de Vigo, Pontevedra, Ourense, Vigo, Galicia, 308 pp.Di Camillo, C.G., Boero, F., Gravili, C., Previati, M., Torsani, F. Cerrano, C. (2013) Distribution, ecology and morphology of Lytocarpia myriophyllum (Cnidaria: Hydrozoa), a Mediterranean Sea habitat former to protect. Biodiversity and Conservation, 22,773-789.        https://doi.org/10.1007/s10531-013-0449-9Dons, C. (1944) Norges korallrev. Det Kongelige Norske Videnskabers Selskabs Forhandlinger, 16, 37-82.Ehrenberg, C.G. (1834) Beiträge zur physiologischen Kenntniss der Corallenthiere im allgemeinen, und besonders des rothen Meeres, nebst einem Versuche zur physiologischen Systematik derselben. Abhandlung der Königlichen Akademie der Wissenschaften, Berlin, 1, 225-380.Eschscholtz, F. (1829) System der Acalephen. Eine ausführliche Beschreibung aller medusenartigen Strahltiere. Ferdinand Dümmler, Berlin, 190 pp.        https://doi.org/10.5962/bhl.

Title

10139Fey, A. (1969) Peuplements sessiles de l'archipel de Glénan. 1.-Inventaire: hydraires. Vie Milieu, 20 (2), 387-414.Fraser, C.M. (1912) Some hydroids of Beaufort, North Carolina. Bulletin of the United States Bureau of Fisheries, 30, 337-387.Fraser, C.M. (1944) Hydroids of the Atlantic coast of North America. The University of Toronto Press, Toronto, 451 pp.Freiwald, A., Fosså, J.H., Grehan, A., Koslow, T. Roberts, J.M. (2004) Cold-water coral reefs. UNEP-WCMC, Cambridge, Biodiversity Series, 22, 1-84.Freiwald, A., Beuck, L., Lundälv, T. Wienberg, C. (2012) General morphology and preliminary biological inventory of the Mauritanian cold-water coral habitats. In: Westphal, H., Beuck, L., Braun, S., Freiwald, A., Hanebuth, T., Hetzinger, S., Klicpera, A., Kudrass, H., Lantzsch, H., Lundälv, T., Mateu Vicens, G., Preto, N., Reumont, J.V., Schilling, S., Taviani, M. Wienberg, C. (Eds.), Report of Cruise Maria S. Merian 16/3-Phaeton-Paleoceanographic and paleo-climatic record on the Mauritanian shelf. 13 October. 13-20 November 2010. Bremerhaven (Allemagne)-Mindelo (Cap Verde). Maria S. Merian-Berichte, Leibniz-ZMT, Bremen, pp. 28-40.Galea, H.R. (2007) Hydroids and hydromedusae (Cnidaria: Hydrozoa) from the fjords region of southern Chile. Zootaxa, 1597, 1-116.Genzano, G.N. Zamponi, M.O. (1999) Natural history of Bimeria vestita Wright, 1859 (Hydrozoa, Bougainvilliidae) in the rocky intertal of Mar del Plata (Argentina). Ciencias Marinas, 25, 63-74.        https://doi.org/10.7773/cm.v25i1.652Gil, M. (2017) Hydroids (Cnidaria, Hydrozoa) from Northwest Africa. Undergraduate dissertation, Universidad de Vigo, Pontevedra, Ourense, Vigo, Galicia, 458 pp.Gil, M. Ramil, F. (2017a) Hydrozoans from Mauritanian Deep-Waters. In: Ramos, A., Ramil, F. Sanz, J.L. (Eds.), Deep sea ecosystems off Mauritania: Researching marine biodiversity and habitats in West African Deep-waters. Springer, Dordrecht, pp. 419-444.        https://doi.org/10.1007/978-94-024-1023-5_11Gil, M. Ramil, F. (2017b) The genus Diphasia L. Agassiz, 1862 (Cnidaria, Hydrozoa) in Northwest Africa. Zootaxa, 4363 (3), 301-349.        https://doi.org/10.11646/zootaxa.4363.3.1Gili, J.M., Vervoort, W. Pagès, F. (1989) Hydroids from the West African coast: Guinea Bissau, Namibia and South Africa. Scientia Marina, 53 (1), 67-112.Gmelin, J.F. (1791) Caroli a Linné, systema naturae per regna tria naturae, secundum classes, ordines, genera, species, cum characteribus, differentiis, synonymis, locis. Editio decima tertia, aucta reformata. Tomus I. Pars 6. G. E. Beer, Lipsiae, 1100 pp. [pp. 3021-4120]Gravili, C., Di Camillo, C.G., Piraino, S. Boero, F. (2013) Hydrozoan species richness in the Mediterranean Sea: past and present. Marine Ecology, 34, 41-62.        https://doi.org/10.1111/maec.12023Gravili, C., De Vito, D., Di Camillo, C.G., Martell, L., Piraino, S. Boero, F. (2015) The non-Siphonophoran Hydrozoa (Cnidaria) of Salento, Italy with notes on their life-cycles: an illustrated guide. Zootaxa, 3908 (1), 1-187.        https://doi.org/10.11646/zootaxa.3908.1.1Gray, J.E. (1848) List of the specimens of British animals in the collection of the British Museum. Part 1. Centroniae or radiated animals. British Museum, London, 173 pp.Hassall, A.H. (1841) Supplement to a catalogue of Irish Zoophytes. Annals of natural history, Series 1, 7 (44), 276-287.        https://doi.org/10.1080/03745484109442700Henry, L.A. (2001) Hydroids associated with deep-sea corals in the boreal north-west Atlantic. Journal of the Marine Biological Association of the United Kingdom, 81, 163-164.        https://doi.org/10.1017/S0025315401003502Henry, L.A. Roberts, J.M. (2007) Biodiversity and ecological composition of macrobenthos on cold-water coral mounds and adjacent off-mound habitat in the bathyal Porcupine Seabight, NE Atlantic. Deep Sea Research Part I: Oceanographic Research Papers, 54, 654-672.        https://doi.org/10.1016/j.dsr.2007.01.005Henry, L.A. Roberts, J.M. (2016) Global Biodiversity in Cold-Water Coral Reef Ecosystems. In: Rossi, S., Bramanti, L., Gori, A. Orejas Saco del Valle, C. (Eds.), Marine Animal Forests. Springer, Cham, pp. 235-256.        https://doi.org/10.1007/978-3-319-21012-4_6Henry, L.A., Nizinski, M.S. Ross, S.W. (2008) Occurrence and biogeography of hydroids (Cnidaria: Hydrozoa) from deep-water coral habitats off the southeastern United States. Deep Sea Research Part I: Oceanographic Research Papers, 55, 788-800.        https://doi.org/10.1016/j.dsr.2008.03.002Henry, L.A., Frank, N., Hebbeln, D., Wienberg, C., Robinson, L., Van de Flierdt, T., Dahl, M., Douarin, M., Morrison, C.L., López Correa, M., Rogers, A.D., Ruckelshausen, M. Roberts, J.M. (2014) Global ocean conveyor lowers extinction risk in the deep sea. Deep Sea Research Part I: Oceanographic Research Papers, 88, 8-16.        https://doi.org/10.1016/j.dsr.2014.03.004Hincks, T. (1861) A catalogue of the zoophytes of south Devon and South Cornwall. Annals and Magazine of Natural History, Series 3, 8, 251-262.        https://doi.org/10.1080/00222936108697413Hincks, T. (1868) A history of the British hydroid zoophytes. John van Voorst, London, 338 pp.        https://doi.org/10.5962/bhl.

Title

1322Hirohito, E.S. (1995) The hydroids of Sagami Bay II. Thecata. Biological Laboratory Imperial Household, Tokyo, pp. 1-355.Izquierdo, M.S., García-Corrales, P., Bacallado, J.J. (1986b) Contribución al conocimiento de los hidrozoos caliptoblástidos del Archipiélago Canario. Parte II: Plumulariidae. Boletín del Instituto Español de Oceanografía, 3 (2), 49-66.Jäderholm, Ε. (1909) Northern and Arctic invertebrates in the collection of the Swedish State Museum (Riksmuseum). IV. Hydroiden. Kungliga Svenska vetenskapsakademiens handlingar, 45 (1), 1-124.Jäderholm, E. (1919) Zur Kenntnis der Hydroidenfauna Japans. Arkiv för Zoologi, 12 (9), 1-34.            Jensen, A. Frederiksen, R. (1992) The fauna associated with the bank-forming deepwater coral Lophelia pertusa (Scleractinaria) on the Faroe shelf. Sarsia, 77 (1), 53-69.        https://doi.org/10.1080/00364827.1992.10413492Jickeli, C.F. (1883) Der Bau der Hydroidpolypen. Morphologisches Jahrbuch, 8, 580-680.Johnston, G. (1833) Illustrations in British zoology. Magazine of natural history and journal of zoology, botany, mineralogy, geology and meteorology, 6, 320-324, 497-499.Johnston, G. (1837) A catalogue of the zoophytes of Berwickshire. Berwickshire Naturalist's Club Proceedings, 1, 107-108.Johnston, G. (1838) A history of the British zoophytes. W.H. Lizars, Edinburgh, 341 pp.        https://doi.org/10.5962/bhl.

Title

110844Kirchenpauer, G.H. (1872) Ueber die Hydroidenfamilie Plumularidae, einzelne Gruppen derselben und ihre Fruchtbehälter. I. Aglaophenia Lx. Abhandlungen aus dem Gebiete der Naturwissenschaften herausgegeben von dem Naturwissenschaftlichen Verein in Hamburg, 5 (3), 1-52.Kramp, P.L. (1947) Hydroids collected by the ""Skagerak"" expedition in the eastern Atlantic 1946. Göteborgs. Kungliga Vetenskaps och Vitterhets Samhälles Handlingar, (B6), 5 (8), 1-16.Kramp, P.L. (1959). Some new and little known Indo-Pacific medusae. Videnskabelige Meddelelser fra Dansk naturhistorisk Forening i København, 121, 223-259.Lamarck, J.B. de (1816) Histoire naturelle des animaux sans vertèbres. Vol. 2. Verdière, Paris, 568 pp.Lamouroux, J.V.F. (1812) Extrait d'un mémoire sur la classification des polypes coralligènes non entièrement pierreux. Nouveau Bulletin des Sciences de la Société Philomatique de Paris, 3, 181-188.Lamouroux, J.V.F. (1821) Exposition méthodique des genres de l'ordre des polypiers, avec leur description et celle des principales espèces, figurées dans 84 planches; les 63 premières appartenant à l'histoire naturelle des zoophytes d'Ellis et Solander. Agasse, Paris, 115 pp.        https://doi.org/10.5962/bhl.

Title

11328Le Danois, E. (1948) Les Profondeurs de la Mer. Trente ans de recherches sur la faune sous-marine au large des côtes de France. Payot, Paris, 303 pp.Leloup, E. (1937) Hydroidea, Siphonophora, Ceriantharia. I. Hydropolypes. In: Résultats scientifiques des croisières du navire-école belge ""Mercator"". Mémoires du Musée royal d'histoire naturelle de Belgique, Series 2, 9, pp. 91-121.Leloup, E. (1940) Hydropolypes provenant des croisières du Prince Albert Ier de Monaco. Résultats des Campagnes scientifiques accomplies sur son yacht par le Prince Albert Ier de Monaco, 104, 1-38.Leloup, E. (1974) Hydropolypes calyptoblastiques du Chili. Report no. 48 of the Lund University Chile Expedition 1948-1949. Sarsia, 55, 1-62.        https://doi.org/10.1080/00364827.1974.10411252Levinsen, G.M.R. (1893) Meduser, Ctenophorer og Hydroider fra Grønlands Vestkyst, tilligemed Bemaerkninger om Hydroidernes Systematik. Videnskabelige Meddelelser fra Dansk naturhistorisk Forening i København, 1892, 143-212 + 215-220, appendix, pls. 5-8.Linnaeus, C. (1758) Systema naturae per regna tria naturae, secundum classes, ordines, genera, species cum characteribus, differentiis, synonymis, locis. Editio decima, reformata. Laurentii Salvii, Holmiae, 823 pp.        https://doi.org/10.5962/bhl.

Title

542Lütken, C., (1850) Nogle Bemaerkninger om Medusernes systematiske Inddeling, navnlig med Hensyn til Forbes's History of British Naked-eyed Medusae. Videnskabelige meddelelser fra den Naturhistoriske forening i Kjöbenhavn, 1850, 15-35.Marktanner-Turneretscher, G. (1890) Die Hydroiden des k.k. naturhistorischen Hofmuseums. Annalen des Naturhistorischen Hofmuseums, 5, 195-286.Maronna, M.M., Miranda, T.P., Peña Cantero, A.L., Barbeitos, M.S. Marques, A.C. (2016) Towards a phylogenetic classification of Leptothecata (Cnidaria, Hydrozoa). Scientific Reports, 6, 18075.        https://doi.org/10.1038/srep18075Marques, A.C., Mergner, H., Höinghaus, R. Vervoort, W. (2000) Bimeria vestita (Hydrozoa: Anthomedusae: Bougainvilliidae) senior synonym of Eudendrium vestitum (Hydrozoa: Anthomedusae: Eudendriidae). Zoologische Mededelingen, 73, 321-325.Marques, A.C., Peña Cantero, A.L., Miranda, T.P. Migotto, A.E. (2011) Revision of the genus Filellum Hincks, 1868 (Lafoeidae, Leptothecata, Hydrozoa). Zootaxa, 3129 (1), 1-28.        https://doi.org/10.11646/zootaxa.3129.1.1Martell, L., Piraino, S., Gravili, C. Boero, F. (2016) Life cycle, morphology and medusa ontogenesis of Turritopsis dohrnii (Cnidaria: Hydrozoa). Italian Journal of Zoology, 83 (3), 390-399.        https://doi.org/10.1080/11250003.2016.1203034McCrady, J., (1857) Description of Oceania (Turritopsis) nutricula nov. spec. and the embryological history of a singular medusan larva, found in the cavity of its bell. Proceedings of the Elliott Society of natutal Histtory, 1, 55-90.Medel, M.D. Vervoort, W. (1995) Plumularian hydroids (Cnidaria: Hydrozoa) from the strait of Gibraltar and nearby areas. Zoologische Verhandelingen, Leiden, 300, 1-72.Medel, M.D. Vervoort, W. (1998) Atlantic Thyroscyphidae and Sertulariidae (Hydrozoa, Cnidaria) collected during the CANCAP and Mauritania II expeditions of the National Museum of Natural History, Leiden, The Netherlands. Zoologische Verhandelingen, Leiden, 320, 1-83.Medel, M.D. Vervoort, W. (2000) Atlantic Haleciidae and Campanulariidae (Hydrozoa, Cnidaria) collected during the CANCAP and Mauritania-II expeditions of the National Museum of Natural History, Leiden, The Netherlands. Zoologische Verhandelingen, Leiden, 330, 1-66.Medel, M.D., García, F.G. Vervoort, W. (1998) The family Haleciidae (Cnidaria: Hydrozoa) from the Strait of Gibraltar and nearby areas. Zoologische Verhandelingen, Leiden, 72 (3), 29-50.Miglietta, M.P., Piraino, S., Kubota, S. Schuchert, P. (2007) Species in the genus Turritopsis (Cnidaria, Hydrozoa): a molecular evaluation. Journal of Zoological Systematics and Evolutionary Research, 45 (1), 11-19.        https://doi.org/10.1111/j.1439-0469.2006.00379.xMigotto, A.E. Cabral, A.S. (2005) Lafoeina amirantensis (Cnidaria: Hydrozoa, Campanulinoidea), the hydroid stage of the medusa Cirrholovenia tetranema (Cnidaria: Hydrozoa, Lovenelloidea). Zootaxa, 919 (1), 1-16.        https://doi.org/10.11646/zootaxa.919.1.1Millard, N.A.H. (1962) The Hydrozoa of the south and west coasts of South Africa. Part I. The Plumulariidae. Annals of the South African Museum, 46, 261-319.Millard, N.A.H. (1975) Monograph on the Hydroida of southern Africa. Annals of the South African Museum. Annale van die Suid-Afrikaanse Museum, 68, 1-513.Millard, N.A.H. (1978) The geographical distribution of southern African hydroids. Annals of the South African Museum. Annale van die Suid-Afrikaanse Museum, 74 (6), 159-200.Millard, N.A.H. Bouillon, J. (1973) Hydroids from the Seychelles (Coelenterata). Annals Musee Royal de L´Afrique Centrale, Sciences Zoologiques, 206 (8), 1-106.Mortensen, P. al B. Fossa, J.H. (2006) Species diversity and spatial distribution of invertebrates on deep-water Lophelia reefs in Norway. Proceedings of 10th International Coral Reef Symposium, Okinawa, Japan, 2006, pp. 1849-1860.Moura, C.J., Harris, D.J., Cunha, M.R. Rogers, A.D. (2008) DNA barcoding reveals cryptic diversity in marine hydroids (Cnidaria, Hydrozoa) from coastal and deep-sea environments. Zoologica Scripta, 37, 93-108.        https://doi.org/10.1111/j.1463-6409.2007.00312.xMoura, C.J., Cunha, M.R., Porteiro, F.M. Rogers, A.D. (2011) The use of the DNA barcode gene 16S mRNA for the clarification of taxonomic problems within the family Sertulariidae (Cnidaria, Hydrozoa): Systematics of sertulariid hydroids. Zoologica Scripta, 40, 520-537.        https://doi.org/10.1111/j.1463-6409.2011.00489.xMoura, C.J., Cunha, M.R., Porteiro, F.M., Yesson, C. Rogers, A.D. (2012) Evolution of Nemertesia hydroids (Cnidaria: Hydrozoa, Plumulariidae) from the shallow and deep waters of the NE Atlantic and western Mediterranean. Zoologica Scripta, 41, 79-96.        https://doi.org/10.1111/j.1463-6409.2011.00503.xNorman, A.M. (1875) Submarine-cable fauna. Part II. Crustacea, etc. Annals and Magazine of Natural History, Series 4, 15, 169-176, pl. 12.        https://doi.org/10.1080/00222937508681053Oken, L. (1815) Okens Lehrbuch der Naturgeschichte. III. Theil. Zoologie. Vol. 1. Oken, Jena, 842 pp.Patriti, G. (1970) Catalogue des cnidaires et cténaires des côtes Atlantiques marocaines. Travaux de l'Institut Scientifique Chérifien, Série Zoologique, 35, 1-149.Pelegrí, J.L. Peña-Izquierdo, J. (2015) Eastern Boundary currents off North-West Africa. In: Valdés, L. Déniz-González, I. (Eds.), Oceanographic and biological features in the Canary Current Large Marine Ecosystem. IOC Technical Series. No. 115. IOC-UNESCO, Paris. pp. 81-92.Pelegrí, J.L., Peña-Izquierdo, J., Machín, F.J., Meiners, C. Presas-Navarro, C. (2017) Oceanography of the Cape Verde Basin and Mauritanian Slope Waters. In: Ramos, A., Ramil, F. Sanz, J.L. (Eds.), Deep sea ecosystems off Mauritania: Researching marine biodiversity and habitats in West African Deep-waters. Springer, Dordrecht, pp. 119-153.        https://doi.org/10.1007/978-94-024-1023-5_3Peña Cantero, A.L. (2004) How rich is the deep-sea Antarctic benthic hydroid fauna? Polar Biology, 27, 767-774.        https://doi.org/10.1007/s00300-004-0654-9Peña Cantero, A.L. García Carrascosa, A.M. (2002) The benthic hydroid fauna of the Chafarinas Islands (Alborán Sea, western Mediterranean). Zoologische Verhandelingen, Leiden, 337, 1-180.Peña Cantero, A.L. Horton, T. (2017) Benthic hydroids (Cnidaria, Hydrozoa) from bathyal and abyssal depths of the Northeast Atlantic held in the modern Discovery Collections. Zootaxa, 4347 (1), 1-30.        https://doi.org/10.11646/zootaxa.4347.1.1Peña Cantero, A.L., García Carrascosa, A.M. Vervoort, W. (1998) On the species of Filellum Hincks, 1868 (Cnidaria: Hydrozoa) with the description of a new species. Journal of Natural History, 32, 297-315.        https://doi.org/10.1080/00222939800770151Picard, J. (1951) Hydraires littoraux du Sénégal récoltés par H. Sourie aux environs de Dakar. Bulletin de l'Institut français d'Afrique noir, 13 (1), 109-115.Pictet, C. Bedot, M. (1900) Hydraires provenant des campagnes de l' «Hirondelle» (1886-1888). Résultats des campagnes scientifiques du prince de Monaco, 18, 1-59, pls. 1-10.        https://doi.org/10.5962/bhl.

Title

11294Quelch, J.J. (1885) On some deep-sea and shallow water Hydrozoa. Annals and Magazine of Natural History, Series 5, 16 (91), 1-20.        https://doi.org/10.1080/00222938509487499Ramil, F. (1988) Hidrozoos de Galicia. Tesis doctoral, Universidad de Santiago de Compostela, Santiago de Compostela, Galicia, 525 pp.Ramil, F. Iglesias, A. (1988) Sobre la presencia de Opercularella panicula (Sars, 1873) (Cnidaria, Hydrozoa) en las costas de la Península Ibérica. Thalassas, 6, 79-82.Ramil, F. Vervoort, W. (1992) Report on the Hydroida collected by the ""BALGIM"" expedition in and around the Strait of Gibraltar. Zoologische Verhandelingen, Leiden, 277, 1-262.Ramil, F. Vervoort, W. (2008) Note on Streptocaulus multiseptatus (Bale, 1915) (Cnidaria: Leptolida: Aglaopheniidae), with the description of its gonosome. Zoologische Medelingen Leiden, 82, 417-422.Ramil, F., Parapar, J. Vervoort, W. (1992) The genus Sertularella Gray, 1848 (Cnidaria: Hydrozoa) along the coasts of Galicia (Spain). Zoologische Verhandelingen, Leiden, 66, 493-524.Ramil, F., Vervoort, W. Ansin, J.A. (1998) Report on the Haleciidae and Plumularioidea (Cnidaria, Hydrozoa) collected by the French SEAMOUNT 1 expedition. Zoologische Verhandelingen, Leiden, 322, 1-42.Ramos, A., Ramil, F. Sanz, J.L. (2017) Deep-Sea Ecosystems off Mauritania: An Introduction. In: Ramos, A., Ramil, F. Sanz, J.L. (Eds.), Deep sea ecosystems off Mauritania: Researching marine biodiversity and habitats in West African Deep-waters. Springer, Dordrecht, pp. 1-51.        https://doi.org/10.1007/978-94-024-1023-5_1Ramos, A., Sanz, J.L., Ramil, F., Agudo, L.M. Presas-Navarro, C. (2017) The Giant Cold-Water Coral Mounds Barrier off Mauritania. In: Ramos, A., Ramil, F. Sanz, J.L. (Eds.), Deep sea ecosystems off Mauritania: Researching marine biodiversity and habitats in West African Deep-waters. Springer, Dordrecht, pp. 481-525.        https://doi.org/10.1007/978-94-024-1023-5_13Redier, L. (1965) Hydraires et Bryozoaires du Golfe de Guinêe. Bulletin du Muséum National D'Histoire Naturelle, 37, 367-394.Rees, W.J., Thursfield, S. (1965) The hydroid collections of James Ritchie. Proceedings of the Royal Society of Edinburgh, (B), 69 (1-2), (2), 34-220.        https://doi.org/10.1017/S0080455X00010122Rees, W.J. Vervoort, W. (1987) Hydroids from the John Murray Expedition to the Indian Ocean, with revisory notes on Hydrodendron, Abietinella, Cryptolaria and Zygophylax (Cnidaria, Hydrozoa). Zoologische Verhandelingen, 237, 1-209.Rees, W.J. White, E. (1966) New records and fauna list of hydroids from the Azores. Annals and Magazine of Natural History, Series 13, 9, 271-284.        https://doi.org/10.1080/00222936608656051Ritchie, J. (1907) On collections of the Cape Verde Islands marine fauna, made by Cyril Crossland, M.A. (Cantab.), B.Sc. (Lond.), F.Z.S., of St. Andrews University, July to September, 1904. The Hydroids. Proceedings of the Zoological Society of London, 77, 488-514.        https://doi.org/10.1111/j.1469-7998.1907.tb06944.xRoberts, J.M. Cairns, S.D. (2014) Cold-water corals in a changing ocean. Current Opinion in Environmental Sustainability, 7, 118-126.        https://doi.org/10.1016/j.cosust.2014.01.004Roberts, J.M., Wheeler, A.J. Freiwald, A. (2006) Reefs of the Deep: The Biology and Geology of Cold-Water Coral Ecosystems. Science, 312, 543-547.        https://doi.org/10.1126/science.1119861Ronowicz, M. (2007) Benthic hydroids (Cnidaria: Hydrozoa) from Svalbard waters-biodiversity and distribution. Journal of the Marine Biological Association of the United Kingdom, 87, 1089-1094.        https://doi.org/10.1017/S0025315407055142Ronowicz, M., Włodarska-Kowalczuk, M. Kuklinski, P. (2013) Depth- and substrate-related patterns of species richness and distribution of hydroids (Cnidaria, Hydrozoa) in Arctic coastal waters (Svalbard). Marine Ecology, 34 (Supplement 1), 165-176.        https://doi.org/10.1111/maec.12034Russell, F.S. (1940) On the nematocysts of hydromedusae III. Journal of the Marine Biological Association of the United Kingdom, 24, 515-523.        https://doi.org/10.1017/S0025315400045422Russell, F.S. (1953) The medusa of the British Isles. Anthomedusae, Leptomedusae, Limnomedusae, Trachymedusae and Narcomedusae. Cambridge University Press, Cambridge, 538 pp.Saemundsson, B. (1911) Bidrag til Kundskaben om de islandske Hydroider. II. Videnskabelige Meddelelser fra Den Danske Naturhistoriske Forening, 63, 67-107.Sars, M. (1863) Bemaerkninger over fire norske Hydroider. Forhandlinger i Videnskabsselscabet i Kristiania, 1862, 25-39.Sars, G.O. (1874) Bidrag til Kundskaben om Norges Hydroider. Forhandlinger i Videnskabs-Selskabet i Kristiana, 1873, 91-150.Schuchert, P. (1996) The marine fauna of New Zealand: athecate hydroids and their medusae (Cnidaria: Hydrozoa). Elsevier Science, Wellington, 159 pp.Schuchert, P. (1997) Review of the family Halopterididae (Hydrozoa, Cnidaria). Zoologische Verhandelingen, Leiden, 309, 1-161.Schuchert, P. (2000) Hydrozoa (Cnidaria) of Iceland collected by the BIOICE programme. Sarsia, 85, 411-438.        https://doi.org/10.1080/00364827.2000.10414592Schuchert, P. (2001) Hydroids of Greenland and Iceland (Cnidaria, Hydrozoa). Bioscience, 53, 1-185.Schuchert, P. (2004) Revision of the European athecate hydroids and their medusae (Hydrozoa, Cnidaria): families Oceanidae and Pachycordylidae. Revue suisse de Zoologie, 111, 315-369.        https://doi.org/10.5962/bhl.part.80242Schuchert, P. (2005) Taxonomic revision and systematic notes on some Halecium species (Cnidaria, Hydrozoa). Journal of Natural History, 39, 607-639.        https://doi.org/10.1080/00222930400001319Schuchert, P. (2007) The European athecate hydroids and their medusa (Hydrozoa, Cnidaria): Filifera Part 2. Revue suisse de Zoologie, 114, 195-396.        https://doi.org/10.5962/bhl.part.80395Schuchert, P. (2012) North-West European Athecate Hydroids and their Medusae. Synopses of the British Fauna, New Series, 59, 1-364.Schuchert, P. (2014) High genetic diversity in the hydroid Plumularia setacea: A multitude of cryptic species or extensive population subdivision? Molecular Phylogenetics and Evolution, 76, 1-9.        https://doi.org/10.1016/j.ympev.2014.02.020Schuchert, P. (2018) World Hydrozoa Database. Earleria panicula (G.O. Sars, 1874). Available from: http://www.marinespecies.org/hydrozoa/aphia.php?p=taxdetailsid=1255613 (accessed 3 September 2020)Schuchert, P., Hosia, A. Leclère, L. (2017) Identification of the polyp stage of three leptomedusa species using DNA barcoding. Revue suisse de Zoologie, 124 (1), 167-182.Stechow, E. (1907) Neue japanische Athecata und Plumularidae aus der Sammlung Dr. Doflein. Zoologischer Anzeiger, 32 (7), 192-200.Stechow, E. (1909) Hydroidpolypen der japanischen Ostküste. 1. Theil. Athecata und Plumularidae. In: Doflein, F. (Ed.), Beiträge zur Naturgeschichte Ostasiens. Abhandlungen der Königlich Bayerischen Akademie der Wissenschaften, 1 (6), 1-111.Stechow, E. (1913a) Neue Genera thecater Hydroiden aus der Familie der Lafoeiden und neue Species von Thecaten aus Japan. Zoologischer Anzeiger, 43 (3), 137-144.Stechow, E. (1913b). Hydroidpolypen der japanischen Ostküste. II. Teil: Campanularidae, Halecidae, Lafoeidae, Campanulinidae und Sertularidae, nebst Ergänzungen zu den Athecata und Plumularidae. In: Doflein, F., Beiträge zur Naturgeschichte Ostasiens.-Abh. math.phys. Historische Klasse der Königlichen bayerischen Akademie der Wissenschaften, 3 (2), pp. 1-162.        https://doi.org/10.5962/bhl.

Title

11621Stechow, E. (1921) Neue Genera und Species von Hydrozoen und anderen Evertebraten. Archiv für Naturgeschichte, Abteilung A, 3, Heft, 87, 248-265.Stramma, L. Schott, F. (1999) The mean flow field of the tropical Atlantic Ocean. Deep-Sea Research Part II: Topical Studies in Oceanography, 46 (1), 279-303.        https://doi.org/10.1016/S0967-0645(98)00109-XSvoboda, Α. (1979) Beitrag zur Ökologie, Biometrie und Systematik der Mediterranen Aglaophenia Arten (Hydroidea). Zoologische Verhandelingen, Leiden, 167, 1-114.Svoboda, A. Cornelius, P.F.S. (1991) The European and Mediterranean species of Aglaophenia (Cnidaria : Hydrozoa). Zoologische Verhandelingen, Leiden, 274, 1-70.Teissier, G. (1965) Inventaire de la faune marine de Roscoff. Cnidaires-Cténaires. Travaux de la Station Biologique de Roscoff, 16, 1-53.Van Soest, R.W.M. (1993) Distribution of sponges on the Mauritanian continental shelf. Hydrobiologia, 258, 95-106.        https://doi.org/10.1007/BF00006189Van Soest, R.W.M. and de Voogd, N.J. (2015) Sponge species composition of north-east Atlantic cold-water coral reefs compared in a bathyal to inshore gradient. Journal of the Marine Biological Association of the United Kingdom, 95 (7), 1461-1474.        https://doi.org/10.1017/S0025315413001410Vanhöffen, E. (1910) Die Hydroiden der Deutschen Südpolar-Expedition 1901-1903. Deutsche Südpolar-Expedition 1901-1903, 11, Zoologie 3, 269-340.Vervoort, W. (1946) Exotic hydroids in the collections of the Rijksmuseum van Natuurlijke Historie and the Zoological Museum at Amsterdam. Zoologische Mededelingen, Leiden, 26 (1-4), 287-351.Vervoort, W. (1946) Exotic hydroids in the collections of the Rijksmuseum van Natuurlijke Historie and the Zoological Museum at Amsterdam. Zoologische Mededelingen, Leiden, 26 (1-4), 287-351.Vervoort, W. (1972) Hydroids from the Theta, Vema and Yelcho cruises of the Lamont-Doherty geological observatory. Rijksmuseum van Natuurlijke Historie, Leiden, Netherlands, 120, 1-247.Vervoort, W. (2006) Leptolida (Cnidaria: Hydrozoa) collected during the CANCAP and Mauritania-II expeditions of the National Hystory, Leiden, the Netherlands [Anthoathecata, various families of Leptothecata and addenda]. Zoologische Mededelingen, Leiden, 80, 181-318.Vervoort, W. Faasse, M. (2009) Overzicht van de Nederlandse Leptolida (= Hydroida) (Cnidaria: Hydrozoa). Nederlandse Faunistische Mededelingen, 32, 1-207.Vervoort, W. Watson, J.E. (2003) The marine fauna of New Zealand. Leptothecata (Cnidaria: Hydrozoa) (Thecate Hydroids). NIWA Biodiversity Memoir, 119, 1-540.Westphal, H., Beuck, L., Braun, S., Freiwald, A., Hanebuth, T., Hetzinger, S., Klicpera, A., Kudrass, H., Lantzsch, H., Lundälv, T., Mateu Vicens, G., Preto, N., Reumont, J.V., Schilling, S., Taviani, M. Wienberg, C. (2012) Report of Cruise Maria S. Merian 16/3-Phaeton-Paleoceanographic and paleo-climatic record on the Mauritanian shelf. Institut für Meereskunde der Universität Hamburg, 136 pp.Wienberg, C., Titschack, J., Freiwald, A., Frank, N., TomasLundälv, T., Taviani, M., Beuck, L., Schröder-Ritzrau, A., Krengel, T. and Hebbeln, D. (2018) The giant Mauritanian cold-water coral mound province: Oxygen control on coral mound formation. Quaternary Science Reviews, 185, 135-152.        https://doi.org/10.1016/j.quascirev.2018.02.012Wright, T.S. (1859) Observations on British zoophytes. Edinburgh new Philosophical Journal, 10, 105-114.",2020-11-16 +31088897,'Active & Safe Central': development of an online resource for the prevention of injury in sport and recreational activity.,"

Background

Sport and recreation related injuries exert a significant cost on the healthcare system. As prevention researchers and practitioners, we have a responsibility to provide guidance towards prevention to those who participate in sport and recreation, and those that coach, treat and parent children that participate. The objective of this project was to use an integrated knowledge translation approach to develop an end user-driven digital platform that provides injury prevention information and resources across 51 sport and recreational activities.

Design

We used an integrated knowledge translation approach to scope and develop an online sport and recreational injury prevention resource. A project team was formed that included end users-coaches, parents and athletes, injury researchers and practitioners, as well as members of a digital design team. All members of the project team informed the development process, including a review of literature and existing resources, the translation of evidence and development of the platform. At all stages of development, members of the project team cocreated knowledge for the tool, including forming the research questions, the approach, feasibility and development of outcomes.

Conclusion

The 'Active & Safe Central' (https://activesafe.ca/) platform provides web-based sport injury and prevention information. This user-friendly, web and mobile accessible platform can increase the reach, awareness and implementation of prevention programming in sport and recreational activity.",2019-05-14 +32677705,Data-driven inference of the reproduction number for COVID-19 before and after interventions for 51 European countries.,"The reproduction number is broadly considered as a key indicator for the spreading of the COVID-19 pandemic. Its estimated value is a measure of the necessity and, eventually, effectiveness of interventions imposed in various countries. Here we present an online tool for the data-driven inference and quantification of uncertainties for the reproduction number, as well as the time points of interventions for 51 European countries. The study relied on the Bayesian calibration of the SIR model with data from reported daily infections from these countries. The model fitted the data, for most countries, without individual tuning of parameters. We also compared the results of SIR and SEIR models, which give different estimates of the reproduction number, and provided an analytical relationship between the respective numbers. We deployed a Bayesian inference framework with efficient sampling algorithms, to present a publicly available graphical user interface (https://cse-lab.ethz.ch/coronavirus) that allows the user to assess and compare predictions for pairs of European countries. The results quantified the rate of the disease’s spread before and after interventions, and provided a metric for the effectiveness of non-pharmaceutical interventions in different countries. They also indicated how geographic proximity and the times of interventions affected the progression of the epidemic.",2020-07-10 +28472360,PopFly: the Drosophila population genomics browser.,"

Summary

The recent compilation of over 1100 worldwide wild-derived Drosophila melanogaster genome sequences reassembled using a standardized pipeline provides a unique resource for population genomic studies (Drosophila Genome Nexus, DGN). A visual display of the estimated metrics describing genome-wide variation and selection patterns would allow gaining a global view and understanding of the evolutionary forces shaping genome variation.

Availability and implementation

Here, we present PopFly, a population genomics-oriented genome browser, based on JBrowse software, that contains a complete inventory of population genomic parameters estimated from DGN data. This browser is designed for the automatic analysis and display of genetic variation data within and between populations along the D. melanogaster genome. PopFly allows the visualization and retrieval of functional annotations, estimates of nucleotide diversity metrics, linkage disequilibrium statistics, recombination rates, a battery of neutrality tests, and population differentiation parameters at different window sizes through the euchromatic chromosomes. PopFly is open and freely available at site http://popfly.uab.cat .

Contact

sergi.hervas@uab.cat or antonio.barbadilla@uab.cat.",2017-09-01 +34603551,The braingraph.org database with more than 1000 robust human connectomes in five resolutions.,"The human brain is the most complex object of study we encounter today. Mapping the neuronal-level connections between the more than 80 billion neurons in the brain is a hopeless task for science. By the recent advancement of magnetic resonance imaging (MRI), we are able to map the macroscopic connections between about 1000 brain areas. The MRI data acquisition and the subsequent algorithmic workflow contain several complex steps, where errors can occur. In the present contribution we describe and publish 1064 human connectomes, computed from the public release of the Human Connectome Project. Each connectome is available in 5 resolutions, with 83, 129, 234, 463 and 1015 anatomically labeled nodes. For error correction we follow an averaging and extreme value deleting strategy for each edge and for each connectome. The resulting 5320 braingraphs can be downloaded from the https://braingraph.org site. This dataset makes possible the access to this graphs for scientists unfamiliar with neuroimaging- and connectome-related tools: mathematicians, physicists and engineers can use their expertize and ideas in the analysis of the connections of the human brain. Brain scientists and computational neuroscientists also have a robust and large, multi-resolution set for connectomical studies.

Supplementary information

The online version contains supplementary material available at 10.1007/s11571-021-09670-5.",2021-03-12 +34025448,Wrist Band Photoplethysmography Autocorrelation Analysis Enables Detection of Atrial Fibrillation Without Pulse Detection.,"Atrial fibrillation is often asymptomatic and intermittent making its detection challenging. A photoplethysmography (PPG) provides a promising option for atrial fibrillation detection. However, the shapes of pulse waves vary in atrial fibrillation decreasing pulse and atrial fibrillation detection accuracy. This study evaluated ten robust photoplethysmography features for detection of atrial fibrillation. The study was a national multi-center clinical study in Finland and the data were combined from two broader research projects (NCT03721601, URL: https://clinicaltrials.gov/ct2/show/NCT03721601 and NCT03753139, URL: https://clinicaltrials.gov/ct2/show/NCT03753139). A photoplethysmography signal was recorded with a wrist band. Five pulse interval variability, four amplitude features and a novel autocorrelation-based morphology feature were calculated and evaluated independently as predictors of atrial fibrillation. A multivariate predictor model including only the most significant features was established. The models were 10-fold cross-validated. 359 patients were included in the study (atrial fibrillation n = 169, sinus rhythm n = 190). The autocorrelation univariate predictor model detected atrial fibrillation with the highest area under receiver operating characteristic curve (AUC) value of 0.982 (sensitivity 95.1%, specificity 93.7%). Autocorrelation was also the most significant individual feature (p < 0.00001) in the multivariate predictor model, detecting atrial fibrillation with AUC of 0.993 (sensitivity 96.4%, specificity 96.3%). Our results demonstrated that the autocorrelation independently detects atrial fibrillation reliably without the need of pulse detection. Combining pulse wave morphology-based features such as autocorrelation with information from pulse-interval variability it is possible to detect atrial fibrillation with high accuracy with a commercial wrist band. Photoplethysmography wrist bands accompanied with atrial fibrillation detection algorithms utilizing autocorrelation could provide a computationally very effective and reliable wearable monitoring method in screening of atrial fibrillation.",2021-05-07 +32240586,Simulation Foundry: Automated and F.A.I.R. Molecular Modeling.,"The Simulation Foundry (SF) is a modular workflow for the automated creation of molecular modeling (MM) data. MM allows for the reliable prediction of the microscopic and macroscopic properties of multicomponent systems from first principles. The SF makes MM repeatable, replicable, and findable, accessible, interoperable, and reusable (F.A.I.R.). The SF uses a standardized data structure and file naming convention, allowing for replication on different supercomputers and re-entrancy. We focus on keeping the SF simple by basing it on scripting languages that are widely used by the MM community (bash, Python) and making it reusable and re-editable. The SF was developed to assist expert users in performing parameter studies of multicomponent systems by high throughput molecular dynamics simulations. The usability of the SF is demonstrated by simulations of thermophysical properties of binary mixtures. A standardized data exchange format enables the integration of simulated data with data from experiments. The SF also provides a complete documentation of how the results were obtained, thus assigning provenance. Increasing computational power facilitates the intensification of the simulation process and requires automation and modularity. The SF provides a community platform on which to integrate new methods and create data that is reproducible and transparent (https://fairdomhub.org/studies/639/snapshots/1, https://fairdomhub.org/studies/639/snapshots/2).",2020-04-14 +33381842,Adversarial deconfounding autoencoder for learning robust gene expression embeddings.,"

Motivation

Increasing number of gene expression profiles has enabled the use of complex models, such as deep unsupervised neural networks, to extract a latent space from these profiles. However, expression profiles, especially when collected in large numbers, inherently contain variations introduced by technical artifacts (e.g. batch effects) and uninteresting biological variables (e.g. age) in addition to the true signals of interest. These sources of variations, called confounders, produce embeddings that fail to transfer to different domains, i.e. an embedding learned from one dataset with a specific confounder distribution does not generalize to different distributions. To remedy this problem, we attempt to disentangle confounders from true signals to generate biologically informative embeddings.

Results

In this article, we introduce the Adversarial Deconfounding AutoEncoder (AD-AE) approach to deconfounding gene expression latent spaces. The AD-AE model consists of two neural networks: (i) an autoencoder to generate an embedding that can reconstruct original measurements, and (ii) an adversary trained to predict the confounder from that embedding. We jointly train the networks to generate embeddings that can encode as much information as possible without encoding any confounding signal. By applying AD-AE to two distinct gene expression datasets, we show that our model can (i) generate embeddings that do not encode confounder information, (ii) conserve the biological signals present in the original space and (iii) generalize successfully across different confounder domains. We demonstrate that AD-AE outperforms standard autoencoder and other deconfounding approaches.

Availability and implementation

Our code and data are available at https://gitlab.cs.washington.edu/abdincer/ad-ae.

Contact

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +32492134,Unipept CLI 2.0: adding support for visualizations and functional annotations.,"

Summary

Unipept is an ecosystem of tools developed for fast metaproteomics data-analysis consisting of a web application, a set of web services (application programming interface, API) and a command-line interface (CLI). After the successful introduction of version 4 of the Unipept web application, we here introduce version 2.0 of the API and CLI. Next to the existing taxonomic analysis, version 2.0 of the API and CLI provides access to Unipept's powerful functional analysis for metaproteomics samples. The functional analysis pipeline supports retrieval of Enzyme Commission numbers, Gene Ontology terms and InterPro entries for the individual peptides in a metaproteomics sample. This paves the way for other applications and developers to integrate these new information sources into their data processing pipelines, which greatly increases insight into the functions performed by the organisms in a specific environment. Both the API and CLI have also been expanded with the ability to render interactive visualizations from a list of taxon ids. These visualizations are automatically made available on a dedicated website and can easily be shared by users.

Availability and implementation

The API is available at http://api.unipept.ugent.be. Information regarding the CLI can be found at https://unipept.ugent.be/clidocs. Both interfaces are freely available and open-source under the MIT license.

Contact

pieter.verschaffelt@ugent.be.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +30027916,"Pathogenesis, diagnostic challenges and treatment of zika virus disease in resource-limited settings.","The association of Zika virus (ZIKV) infection with congenital malformation and neurological sequelae has brought significant global concern. Consequently, the World Health Organization (WHO) declared it ""a public health emergency of International concern"" on 1 February, 2016. A critical review of its pathogenesis would lead to a better understanding of the clinical features and the neurological complications. This review is based on literature search in PubMed/Medline, Google Scholar and the WHO, http://www.who.int. This include all relevant articles written in English published through June 2018, with subject heading and keywords such as Zika, ZIKV, Zika pathogenesis, diagnosis of Zika, Zika Nigeria, Zika Africa and Zika resource-limited settings. Following ZIKV infection, viraemia ensues targeting primarily the monocytes for both the Asian and African strains. ZIKV infection by an African strain appears to be more pathogenic, in early pregnancy tends to result in spontaneous abortion. Whereas an Asian strain tends to be less pathogenic and more chronic, this allows the pregnancy to continue, ultimately resulting in congenital malformations. There is no routine laboratory diagnosis of ZIKV infection in resource-constrained countries. Serologic tests should be interpreted with caution since there can be cross-reactivity with other flaviviruses, especially in Africa where the burden of infection with flaviviruses is comparatively high. There is a paucity of well-equipped laboratories for comprehensive ZIKV diagnosis. It is imperative to strengthen the health systems, improve health workforce and diagnostic capacity of such settings.",2018-04-01 +31737596,Fast Screening of Inhibitor Binding/Unbinding Using Novel Software Tool CaverDock.,"Protein tunnels and channels are attractive targets for drug design. Drug molecules that block the access of substrates or release of products can be efficient modulators of biological activity. Here, we demonstrate the applicability of a newly developed software tool CaverDock for screening databases of drugs against pharmacologically relevant targets. First, we evaluated the effect of rigid and flexible side chains on sets of substrates and inhibitors of seven different proteins. In order to assess the accuracy of our software, we compared the results obtained from CaverDock calculation with experimental data previously collected with heat shock protein 90α. Finally, we tested the virtual screening capabilities of CaverDock with a set of oncological and anti-inflammatory FDA-approved drugs with two molecular targets-cytochrome P450 17A1 and leukotriene A4 hydrolase/aminopeptidase. Calculation of rigid trajectories using four processors took on average 53 min per molecule with 90% successfully calculated cases. The screening identified functional tunnels based on the profile of potential energies of binding and unbinding trajectories. We concluded that CaverDock is a sufficiently fast, robust, and accurate tool for screening binding/unbinding processes of pharmacologically important targets with buried functional sites. The standalone version of CaverDock is available freely at https://loschmidt.chemi.muni.cz/caverdock/ and the web version at https://loschmidt.chemi.muni.cz/caverweb/.",2019-10-29 +30541929,A Statistical Procedure for Genome-Wide Detection of QTL Hotspots Using Public Databases with Application to Rice.,"Genome-wide detection of quantitative trait loci (QTL) hotspots underlying variation in many molecular and phenotypic traits has been a key step in various biological studies since the QTL hotspots are highly informative and can be linked to the genes for the quantitative traits. Several statistical methods have been proposed to detect QTL hotspots. These hotspot detection methods rely heavily on permutation tests performed on summarized QTL data or individual-level data (with genotypes and phenotypes) from the genetical genomics experiments. In this article, we propose a statistical procedure for QTL hotspot detection by using the summarized QTL (interval) data collected in public web-accessible databases. First, a simple statistical method based on the uniform distribution is derived to convert the QTL interval data into the expected QTL frequency (EQF) matrix. And then, to account for the correlation structure among traits, the QTL for correlated traits are grouped together into the same categories to form a reduced EQF matrix. Furthermore, a permutation algorithm on the EQF elements or on the QTL intervals is developed to compute a sliding scale of EQF thresholds, ranging from strict to liberal, for assessing the significance of QTL hotspots. With grouping, much stricter thresholds can be obtained to avoid the detection of spurious hotspots. Real example analysis and simulation study are carried out to illustrate our procedure, evaluate the performances and compare with other methods. It shows that our procedure can control the genome-wide error rates at the target levels, provide appropriate thresholds for correlated data and is comparable to the methods using individual-level data in hotspot detection. Depending on the thresholds used, more than 100 hotspots are detected in GRAMENE rice database. We also perform a genome-wide comparative analysis of the detected hotspots and the known genes collected in the Rice Q-TARO database. The comparative analysis reveals that the hotspots and genes are conformable in the sense that they co-localize closely and are functionally related to relevant traits. Our statistical procedure can provide a framework for exploring the networks among QTL hotspots, genes and quantitative traits in biological studies. The R codes that produce both numerical and graphical outputs of QTL hotspot detection in the genome are available on the worldwide web http://www.stat.sinica.edu.tw/chkao/.",2019-02-07 +32837980,Data-driven modeling of COVID-19-Lessons learned.,"Understanding the outbreak dynamics of COVID-19 through the lens of mathematical models is an elusive but significant goal. Within only half a year, the COVID-19 pandemic has resulted in more than 19 million reported cases across 188 countries with more than 700,000 deaths worldwide. Unlike any other disease in history, COVID-19 has generated an unprecedented volume of data, well documented, continuously updated, and broadly available to the general public. Yet, the precise role of mathematical modeling in providing quantitative insight into the COVID-19 pandemic remains a topic of ongoing debate. Here we discuss the lessons learned from six month of modeling COVID-19. We highlight the early success of classical models for infectious diseases and show why these models fail to predict the current outbreak dynamics of COVID-19. We illustrate how data-driven modeling can integrate classical epidemiology modeling and machine learning to infer critical disease parameters-in real time-from reported case data to make informed predictions and guide political decision making. We critically discuss questions that these models can and cannot answer and showcase controversial decisions around the early outbreak dynamics, outbreak control, and exit strategies. We anticipate that this summary will stimulate discussion within the modeling community and help provide guidelines for robust mathematical models to understand and manage the COVID-19 pandemic. EML webinar speakers, videos, and overviews are updated at https://imechanica.org/node/24098.",2020-08-14 +33064600,Causal Pathways for Specific Language Impairment: Lessons From Studies of Twins.,"Purpose This review article summarizes a program of longitudinal investigation of twins' language acquisition with a focus on causal pathways for specific language impairment (SLI) and nonspecific language impairment in children at 4 and 6 years with known history at 2 years. Method The context of the overview is established by legacy scientific papers in genetics, language, and SLI. Five recent studies of twins are summarized, from 2 to 16 years of age, with a longitudinal perspective of heritability over multiple speech, language, and cognitive phenotypes. Results Replicated moderate-to-high heritability is reported across ages, phenotypes, full population estimates, and estimates for clinical groups. Key outcomes are documentation of a twinning effect of risk for late language acquisition in twins that persists through 6 years of age, greater for monozygotic than dizygotic twins (although zygosity effects disappear at 6 years); heritability is greater for grammar and morphosyntax than other linguistic dimensions, from age 2 years through age 16 years, replicated within twin samples at subsequent age levels and across twin samples at age 16 years. Conclusion There is consistent support for legacy models of genetic influences on language acquisition, updated with a more precise growth signaling disruption model supported by twin data, as well as singleton data of children with SLI and nonspecific language impairment. Presentation Video https://doi.org/10.23641/asha.13063727.",2020-10-16 +30020414,"SKEMPI 2.0: an updated benchmark of changes in protein-protein binding energy, kinetics and thermodynamics upon mutation.","

Motivation

Understanding the relationship between the sequence, structure, binding energy, binding kinetics and binding thermodynamics of protein-protein interactions is crucial to understanding cellular signaling, the assembly and regulation of molecular complexes, the mechanisms through which mutations lead to disease, and protein engineering.

Results

We present SKEMPI 2.0, a major update to our database of binding free energy changes upon mutation for structurally resolved protein-protein interactions. This version now contains manually curated binding data for 7085 mutations, an increase of 133%, including changes in kinetics for 1844 mutations, enthalpy and entropy changes for 443 mutations, and 440 mutations, which abolish detectable binding.

Availability and implementation

The database is available as supplementary data and at https://life.bsc.es/pid/skempi2/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +30918038,Relationship between sociodemographic factors and specialty destination of UK trainee doctors: a national cohort study.,"OBJECTIVES:Many countries are driving forward policies to widen the socioeconomic profile of medical students and to train more medical students for certain specialties. However, little is known about how socioeconomic origin relates to specialty choice. Nor is there a good understanding of the relationship between academic performance and specialty choice. To address these gaps, our aim was to identify the relationship between socioeconomic background, academic performance and accepted offers into specialty training. DESIGN:Longitudinal, cohort study using data from the UK Medical Education Database (https://www.ukmed.ac.uk/). PARTICIPANTS:6065 (60% females) UK doctors who accepted offers to a specialty training (residency) post after completing the 2-year generic foundation programme (UK Foundation Programme) between 2012 and 2014. MAIN OUTCOME MEASURES:Χ2 tests were used to examine the relationships between sociodemographic characteristics, academic ability and the dependent variable, specialty choice. Multiple data imputation was used to address the issue of missing data. Multinomial regression was employed to test the independent variables in predicting the likelihood of choosing a given specialty. RESULTS:Participants pursuing careers in more competitive specialties had significantly higher academic scores than colleagues pursuing less competitive ones. After controlling for the presence of multiple factors, trainees who came from families where no parent was educated to a degree level had statistically significant lower odds of choosing careers in medical specialties relative to general practice (OR=0.78, 95% CI, 0.67 to 0.92). Students who entered medical school as school leavers, compared with mature students, had odds 1.2 times higher (95% CI, 1.04 to 1.56) of choosing surgical specialties than general practice. CONCLUSIONS:The data indicate a direct association between trainees' sociodemographic characteristics, academic ability and career choices. The findings can be used by medical school, training boards and workforce planners to inform recruitment and retention strategies.",2019-03-27 +34000200,Development and Initial Validation of the Lifetime Exposure to Noise and Solvents Questionnaire in U.S. Service Members and Veterans.,"Purpose A need exists to investigate the short- and long-term impact of noise exposures during and following military service on auditory health. Currently available questionnaires are limited in their ability to meet this need because of (a) inability to evaluate noise exposures beyond a limited time frame, (b) lack of consensus on scoring, (c) inability to assess impulse exposures (e.g., firearm use), (d) lack of a single questionnaire that assesses both military and nonmilitary exposures, and (e) lack of validity and reliability data. To address these limitations, the Lifetime Exposure to Noise and Solvents Questionnaire (LENS-Q) was developed. The purpose of this report is to describe the development and initial validation of the LENS-Q as a measure of self-reported noise exposure. Method Six hundred ninety participants, consisting of current Service members and recently military-separated (within about 2.5 years) Veterans, completed the LENS-Q, additional study questionnaires, and comprehensive audiometric testing. Noise exposure scores were computed from LENS-Q responses using a simple scoring algorithm that distinguishes between different cumulative levels of exposure and allows for the inclusion of both continuous and impulse noise exposures. Results The LENS-Q demonstrates good construct validity as evidenced by measures of hearing loss, tinnitus, and subjective hearing difficulties all increasing with an increase in noise exposure scores. A logistic regression, adjusting for age and sex, revealed that participants in the highest exposure group were 2.4-3.9 times more likely to experience hearing loss, 2.7-2.8 times more likely to experience tinnitus, and 3.0-3.7 times more likely to report hearing difficulties compared with individuals in the lowest exposure group. Conclusions The LENS-Q captures noise exposure over an individual's lifetime and provides an alternative scoring metric capable of representing exposure to both continuous and impulse noise. Findings suggest that the LENS-Q is a valuable tool for capturing and measuring both military and nonmilitary noise exposure. Supplemental Material https://doi.org/10.23641/asha.14582937.",2021-05-17 +33186464,YQFC: a web tool to compare quantitative biological features between two yeast gene lists. ,"Nowadays high-throughput omics technologies are routinely used in biological research. From the omics data, researchers can easily get two gene lists (e.g. stress-induced genes vs. stress-repressed genes) related to their biological question. The next step would be to apply enrichment analysis tools to identify distinct functional/regulatory features between these two gene lists for further investigation. Although various enrichment analysis tools are already available, two challenges remain to be addressed. First, most existing tools are designed to analyze only one gene list, so they cannot directly compare two gene lists. Second, almost all existing tools focus on identifying the enriched qualitative features (e.g. gene ontology [GO] terms, pathways, domains, etc.). Many quantitative features (e.g. number of mRNA isoforms of a gene, mRNA half-life, protein half-life, transcriptional plasticity, translational efficiency, etc.) are available in the yeast, but no existing tools provide analyses on these quantitative features. To address these two challenges, here we present Yeast Quantitative Features Comparator (YQFC) that can directly compare various quantitative features between two yeast gene lists. In YQFC, we comprehensively collected and processed 85 quantitative features from the yeast literature and yeast databases. For each quantitative feature, YQFC provides three statistical tests (t-test, U test and KS test) to test whether this quantitative feature is statistically different between the two input yeast gene lists. The distinct quantitative features identified by YQFC may help researchers to study the underlying molecular mechanisms that differentiate the two input yeast gene lists. We believe that YQFC is a useful tool to expedite the biological research that uses high-throughput omics technologies. http://cosbi2.ee.ncku.edu.tw/YQFC/.",2020-01-01 +33188218,TOXiTAXi: a web resource for toxicity of Bacillus thuringiensis protein compositions towards species of various taxonomic groups.,"Bioinsecticides consisting of different sets of Bacillus thuringiensis (Bt) Cry, Cyt and Vip toxins are broadly used in pest control. Possible interactions (synergistic, additive or antagonistic) between these proteins can not only influence the overall efficacy of certain Bt-based bioinsecticide, but also raise questions regarding environmental safety. Here, we assemble, summarize and analyze the outcomes of experiments published over 30 years, investigating combinatorial effects among Bt Cry, Cyt and Vip toxins. We collected the results on 118 various two-to-five-component combinations that have been bioassayed against 38 invertebrate species. Synergism, additive effect and antagonism was indicated in 54%, 32% and 14% of experiments, respectively. Synergism was noted most frequently for Cry/Cyt combinations, followed by Cyt/Vip and Cry/Cry. In Cry/Vip combinations, antagonism is more frequent and higher in magnitude compared to other categories. Despite a significant number of tested Bt toxin combinations, most of them have been bioassayed only against one pest species. To aid the research on Bt pesticidal protein activity, we present TOXiTAXi ( http://www.combio.pl/toxitaxi/ ), a universal database and a dedicated web tool to conveniently gather and analyze the existing and future bioassay results on biocidal activity of toxins against various taxonomic groups.",2020-11-13 +31537810,Optimized CRISPR guide RNA design for two high-fidelity Cas9 variants by deep learning.,"Highly specific Cas9 nucleases derived from SpCas9 are valuable tools for genome editing, but their wide applications are hampered by a lack of knowledge governing guide RNA (gRNA) activity. Here, we perform a genome-scale screen to measure gRNA activity for two highly specific SpCas9 variants (eSpCas9(1.1) and SpCas9-HF1) and wild-type SpCas9 (WT-SpCas9) in human cells, and obtain indel rates of over 50,000 gRNAs for each nuclease, covering ~20,000 genes. We evaluate the contribution of 1,031 features to gRNA activity and develope models for activity prediction. Our data reveals that a combination of RNN with important biological features outperforms other models for activity prediction. We further demonstrate that our model outperforms other popular gRNA design tools. Finally, we develop an online design tool DeepHF for the three Cas9 nucleases. The database, as well as the designer tool, is freely accessible via a web server, http://www.DeepHF.com/ .",2019-09-19 +31241221,PAMAM: Power analysis in multiancestry admixture mapping.,"Admixed populations arise when two or more previously isolated populations interbreed. Admixture mapping (AM) methods are used for tracing the ancestral origin of disease-susceptibility genetic loci in the admixed population such as African American and Latinos. AM is different from genome-wide association studies in that ancestry rather than genotypes are tracked in the association process. The power and sample size of AM primarily depend on proportion of admixture and differences in the risk allele frequencies among the ancestral populations. Ensuring sufficient power to detect the effect of ancestry on disease susceptibility is critical for interpretability and reliability of studies using AM approach. However, there is no power and sample size analysis tool existing for AM studies in admixed population. In this study, we developed power analysis of multiancestry AM (PAMAM) to estimate power and sample size for two-way and three-way population admixtures. PAMAM is the first web-based bioinformatics tool developed to calculate power and sample size in admixed population under a variety of genetic and disease phenotype models. It is a valuable resource for investigators to design a cost-efficient study and develop grant application to pursue AM studies. PAMAM is built on JavaScript back-end with HTML front-end. It is accessible through any modern web browser such as Firefox, Internet Explorer, and Google Chrome regardless of operating system. It is a user-friendly tool containing links for support information including user manual and examples, and freely available at https://research.cchmc.org/mershalab/PAMAM/login.html.",2019-06-26 +,AGE-FRIENDLY NEW HAMPSHIRE,"Abstract The NH Alliance for Healthy Aging (AHA) is a statewide collective impact initiative of 265+ stakeholders, representing 170+ organizations (http://nhaha.info/). AHA’s approach is: a) change the conversation about aging; b) change public policy to promote a strong, stable infrastructure for aging; and c) change practices across public and private sectors to improve care and support for older adults, their families, and communities. The work is guided by a shared vision for communities to advance culture, policies, and services which support older adults and their families. The Endowment for Health supported a process led by the University of NH to establish strategic priority areas: develop advocacy infrastructure; enhance family caregiver support; improve availability of quality workforce; increase transportation options; and advance zoning changes for better housing options. The NH Healthy Aging Data Report will support this state-level approach and local communities, such as NH Southern Planning Commission’s Age-Friendly Initiative.",2018-11-01 +32984478,Microbial and functional diversity of Cyclopia intermedia rhizosphere microbiome revealed by analysis of shotgun metagenomics sequence data.,"Cyclopia spp., commonly referred to as honeybush due to the honey scented flowers, are indigenous legumes mainly growing in the Cape Floristic Region of the Western Cape, South Africa. Dozens of species, including Cyclopia intermedia, C. subternata, C. plicata, C. genistoides are used to make the well-known, popular and widely enjoyed beverage called 'honeybush tea'. In the past, most rhizosphere microbial studies associated with Cyclopia spp. focused mainly on the taxonomy and diversity of the root nodule associated symbiotic nitrogen fixing rhizobia. The work presented here is the first report on the microbial and functional diversity of rhizosphere microbiome associated with Cyclopia intermedia. Metagenomic shotgun sequencing was performed on the rhizosphere soil sample collected from this Cyclopia sp. using illumina Hiseq 2500 platform which resulted in an α- diversity of 312 species. Analysis of the metagenome sequence using the Metagenomic analysis server (MG-RAST) indicated that bacteria constitute the dominant domain followed by Eukaryota, Archaea and other sequences derived from fungi and viruses. Functional diversity of the metagenome based on analysis using the Cluster Orthologous Group (COG) method showed metabolism as the most important function in the community. The raw sequence data is uploaded in FASTQ format on MG-RAST server with ID mgm4855911.3 which can be accessed at http://www.mg-rast.org/linkin.cgi?project=mgp90368. The data on the microbial and functional diversity of the rhizosphere community of Cyclopia intermedia generates a baseline information about the microbial ecology of this indigenous legume. The microbial profile data can also be used as indicators of soil health characteristic of the rhizosphere of this important legume.",2020-09-07 +33345792,Pushing the bounds of second stage in term nulliparas with a predictive model.,"

Background

Management of the second stage of labor continues to be a clinical challenge with unclear indications for abandoning attempts at spontaneous vaginal delivery. The conflict between diminishing chances of spontaneous vaginal delivery and increasing maternal and neonatal morbidity is difficult to quantify, leading to significant variation in management between providers, and variation in rates of operative vaginal delivery and cesarean birth.

Objective

The objective of the study was to develop an hourly prediction model for spontaneous vaginal delivery during the second stage of labor in nulliparous women with epidural anesthesia.

Study design

This was a secondary analysis of the Consortium for Safe Labor database. The Consortium for Safe Labor collected data from 228,652 patients at 19 hospitals in the United State from 2002 through 2008. Primary outcome was delivery type per hour of second stage: spontaneous vaginal delivery vs operative delivery (operative vaginal and cesarean delivery). Inclusion criteria were term nulliparas with singleton gestations, vertex presentation, and attainment of 10 cm cervical dilation with epidural anesthesia. Exclusion criteria were intrauterine fetal demise, planned cesarean delivery, and major congenital anomalies. An optimal decision tree was used to create a prediction model. A test set was withheld from the data set to perform validation. A risk calculator tool was developed for prediction of spontaneous vaginal birth as well as adverse perinatal outcomes per hour. Adverse maternal outcomes were a composite of postpartum hemorrhage, transfusion, endometritis and third-/fourth-degree laceration. Adverse neonatal outcomes were a composite of neonatal intensive care unit admission, hypoxic ischemic encephalopathy, respiratory distress, seizures, apnea, asphyxia, and shoulder dystocia.

Results

The study population included 228,438 deliveries; 26,796 patients met inclusion and exclusion criteria. After removing cases with incomplete data, the study population consisted of 22,299 women, of which 16,593 women had a spontaneous vaginal delivery (74.4%). The number of deliveries at a given hospital per year, fetal position, cervical dilation on admission, chorioamnionitis, augmentation of labor, maternal age, and length of second stage were associated with the odds of spontaneous vaginal delivery. Using the predictors identified, a risk predictor calculator was created, taking into consideration the length of time in the second stage. A receiver-operator characteristic curve was developed to assess the calculator; area under the curve was 0.73. This calculator is available at https://www.pushprescriber.com/.

Conclusion

Spontaneous vaginal delivery for women with term, cephalic, singleton gestations with epidural anesthesia was associated with several variables. This calculator tool helps facilitate provider decision making and patient counseling about the value of continuing the second stage of labor based on changing rates of success and risks of maternal and neonatal morbidity with time.",2019-07-20 +,PhytoREF: a reference database of the plastidial 16S rRNA gene of photosynthetic eukaryotes with curated taxonomy,"Photosynthetic eukaryotes have a critical role as the main producers in most ecosystems of the biosphere. The ongoing environmental metabarcoding revolution opens the perspective for holistic ecosystems biological studies of these organisms, in particular the unicellular microalgae that often lack distinctive morphological characters and have complex life cycles. To interpret environmental sequences, metabarcoding necessarily relies on taxonomically curated databases containing reference sequences of the targeted gene (or barcode) from identified organisms. To date, no such reference framework exists for photosynthetic eukaryotes. In this study, we built the PhytoREF database that contains 6490 plastidial 16S rDNA reference sequences that originate from a large diversity of eukaryotes representing all known major photosynthetic lineages. We compiled 3333 amplicon sequences available from public databases and 879 sequences extracted from plastidial genomes, and generated 411 novel sequences from cultured marine microalgal strains belonging to different eukaryotic lineages. A total of 1867 environmental Sanger 16S rDNA sequences were also included in the database. Stringent quality filtering and a phylogeny‐based taxonomic classification were applied for each 16S rDNA sequence. The database mainly focuses on marine microalgae, but sequences from land plants (representing half of the PhytoREF sequences) and freshwater taxa were also included to broaden the applicability of PhytoREF to different aquatic and terrestrial habitats. PhytoREF, accessible via a web interface (http://phytoref.fr), is a new resource in molecular ecology to foster the discovery, assessment and monitoring of the diversity of photosynthetic eukaryotes using high‐throughput sequencing.",2015-11-01 +,FINANCIAL EXPLOITATION: THE DARK SIDE OF FINANCIAL INCAPACITY?,"Abstract Our program of research has been examining the intersection of financial exploitation, financial decision making and cognitive decline. With the creation of new person centered assessment tools for real world financial decision making, we have collected data on 475 older adults, and participated in 30 probate and circuit court cases involving questions of financial capacity. Focused on a conceptual model that examines not only the intellectual factors of financial decision making, but also the financial awareness and psychosocial vulnerability factors, we will discuss our validation and cross-validation studies of our 10-item screening scale being used by Adult Protective Services workers, and our Rating scale (65 and 35 item versions) and our studies on its validity and reliability. In addition to sharing results from the research we will introduce our etraining and certification process through our website https://olderadultnestegg.com.",2018-11-01 +32839519,Serum albumin as a predictor of neurological recovery after spinal cord injury: a replication study.,"

Study design

This was a secondary analysis on an observational cohort study.

Objective

To determine if serum albumin significantly associates with long-term neurological outcome (i.e., 1-year post-injury) in a contemporary cohort of individuals with spinal cord injury.

Setting

Six rehabilitation centers across the United States.

Methods

A secondary analysis of neurological outcomes and serum albumin concentrations was performed on data from the Spinal Cord Injury Rehabilitation study. Data was accessed from the Archive of Data on Disability to Enable Policy and research (ADDEP). The primary analysis applied unbiased recursive partitioning to examine the relationship between serum albumin, injury severity, and long-term outcomes. The analysis is accessible via https://rpubs.com/AnhKhoaVo/586028 .

Results

Serum albumin concentration was significantly associated with lower extremity motor scores (LEMS) and American Spinal Injury Association Impairment Scale (AIS) grade at admission to rehabilitation. Serum albumin concentrations alone were also significantly associated with change of LEMS and marked recovery (improvement of at least 2 AIS grades and/or recovery to walking) at 1-year post injury. However, after adjusting for admission to rehabilitation LEMS and AIS grade, serum albumin was not significant.

Conclusion

The current study partially confirms our previous observations that serum albumin concentrations are associated with neurological outcome after spinal cord injury. As a crude prognostic biomarker, serum albumin concentration could be useful in cases where injury severity cannot be accurately assessed.",2020-08-24 +33180536,Education and training guidelines for psychological assessment in health service psychology.,"While recent survey findings suggest graduate programs in health service psychology (HSP) are allocating the same or increased time to education and training in psychological assessment over the last two decades, there is a lack of clear guidance for programs to implement practices associated with quality education and training. These Guidelines (found in full at https://www.apa.org/about/policy/guidelines-assessment-health-service.pdf) were developed to address this critical need. Developed by a task force of the American Psychological Association Board of Educational Affairs in 2018 and 2019, the Guidelines serve to inform faculty/supervisors, students, and the public as to quality practices associated with graduate education and training in psychological assessment. They are organized around seven domains: theory; psychological assessment process; psychometrics; tests and methods; ethics, legal issues, and professionalism; diversity; and supervision. These domains are drawn from a review of the scholarly literature on psychological assessment, as well as graduate psychology education and training. The domains and their associated Guidelines are interdependent, and, while some overlap exists among them, they should be considered in their entirety. While a summary of each section is provided in the present article, the full explanation of each domain is presented in the actual Guidelines document. (PsycInfo Database Record (c) 2021 APA, all rights reserved).",2020-11-12 +31151998,GRAF-pop: A Fast Distance-Based Method To Infer Subject Ancestry from Multiple Genotype Datasets Without Principal Components Analysis.,"Inferring subject ancestry using genetic data is an important step in genetic association studies, required for dealing with population stratification. It has become more challenging to infer subject ancestry quickly and accurately since large amounts of genotype data, collected from millions of subjects by thousands of studies using different methods, are accessible to researchers from repositories such as the database of Genotypes and Phenotypes (dbGaP) at the National Center for Biotechnology Information (NCBI). Study-reported populations submitted to dbGaP are often not harmonized across studies or may be missing. Widely-used methods for ancestry prediction assume that most markers are genotyped in all subjects, but this assumption is unrealistic if one wants to combine studies that used different genotyping platforms. To provide ancestry inference and visualization across studies, we developed a new method, GRAF-pop, of ancestry prediction that is robust to missing genotypes and allows researchers to visualize predicted population structure in color and in three dimensions. When genotypes are dense, GRAF-pop is comparable in quality and running time to existing ancestry inference methods EIGENSTRAT, FastPCA, and FlashPCA2, all of which rely on principal components analysis (PCA). When genotypes are not dense, GRAF-pop gives much better ancestry predictions than the PCA-based methods. GRAF-pop employs basic geometric and probabilistic methods; the visualized ancestry predictions have a natural geometric interpretation, which is lacking in PCA-based methods. Since February 2018, GRAF-pop has been successfully incorporated into the dbGaP quality control process to identify inconsistencies between study-reported and computationally predicted populations and to provide harmonized population values in all new dbGaP submissions amenable to population prediction, based on marker genotypes. Plots, produced by GRAF-pop, of summary population predictions are available on dbGaP study pages, and the software, is available at https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/Software.cgi.",2019-08-08 +27153608,ProbOnto: ontology and knowledge base of probability distributions.,"

Motivation

Probability distributions play a central role in mathematical and statistical modelling. The encoding, annotation and exchange of such models could be greatly simplified by a resource providing a common reference for the definition of probability distributions. Although some resources exist, no suitably detailed and complex ontology exists nor any database allowing programmatic access.

Results

ProbOnto, is an ontology-based knowledge base of probability distributions, featuring more than 80 uni- and multivariate distributions with their defining functions, characteristics, relationships and re-parameterization formulas. It can be used for model annotation and facilitates the encoding of distribution-based models, related functions and quantities.

Availability and implementation

http://probonto.org

Contact

mjswat@ebi.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-03 +33552531,The Dutch Auditory & Image Vocabulary Test (DAIVT): A New Dutch Receptive Vocabulary Test for Students.,"We introduce a new Dutch receptive vocabulary test, the Dutch auditory & image vocabulary test (DAIVT). The test is multiple choice and assesses vocabulary knowledge for spoken words. The measure has an online (available at https://tpsurvey.ugent.be/limesurvey315/index.php/923234?lang=nl) format, has free access, and allows easy data collection. The test was developed with the intent to enable testing for research purposes with university students. This paper describes the test construction. We cover three phases: 1) collecting stimulus materials and developing the test's first version, 2) an exploratory item-analysis on the first draft (n = 93), and 3) validating the test (both the second and the final version) by comparing it to two existing tests (n = 270, n = 157). The results indicate that the test is reliable and correlates well with existing Dutch receptive vocabulary tests (convergent validity). The final version of the DAIVT comprises 90 test items and 1 practice item. It can be used freely for research purposes.",2021-01-19 +29204945,"D3R Grand Challenge 2: blind prediction of protein-ligand poses, affinity rankings, and relative binding free energies.","The Drug Design Data Resource (D3R) ran Grand Challenge 2 (GC2) from September 2016 through February 2017. This challenge was based on a dataset of structures and affinities for the nuclear receptor farnesoid X receptor (FXR), contributed by F. Hoffmann-La Roche. The dataset contained 102 IC50 values, spanning six orders of magnitude, and 36 high-resolution co-crystal structures with representatives of four major ligand classes. Strong global participation was evident, with 49 participants submitting 262 prediction submission packages in total. Procedurally, GC2 mimicked Grand Challenge 2015 (GC2015), with a Stage 1 subchallenge testing ligand pose prediction methods and ranking and scoring methods, and a Stage 2 subchallenge testing only ligand ranking and scoring methods after the release of all blinded co-crystal structures. Two smaller curated sets of 18 and 15 ligands were developed to test alchemical free energy methods. This overview summarizes all aspects of GC2, including the dataset details, challenge procedures, and participant results. We also consider implications for progress in the field, while highlighting methodological areas that merit continued development. Similar to GC2015, the outcome of GC2 underscores the pressing need for methods development in pose prediction, particularly for ligand scaffolds not currently represented in the Protein Data Bank ( http://www.pdb.org ), and in affinity ranking and scoring of bound ligands.",2017-12-04 +26527721,PGSB PlantsDB: updates to the database framework for comparative plant genome research.,"PGSB (Plant Genome and Systems Biology: formerly MIPS) PlantsDB (http://pgsb.helmholtz-muenchen.de/plant/index.jsp) is a database framework for the comparative analysis and visualization of plant genome data. The resource has been updated with new data sets and types as well as specialized tools and interfaces to address user demands for intuitive access to complex plant genome data. In its latest incarnation, we have re-worked both the layout and navigation structure and implemented new keyword search options and a new BLAST sequence search functionality. Actively involved in corresponding sequencing consortia, PlantsDB has dedicated special efforts to the integration and visualization of complex triticeae genome data, especially for barley, wheat and rye. We enhanced CrowsNest, a tool to visualize syntenic relationships between genomes, with data from the wheat sub-genome progenitor Aegilops tauschii and added functionality to the PGSB RNASeqExpressionBrowser. GenomeZipper results were integrated for the genomes of barley, rye, wheat and perennial ryegrass and interactive access is granted through PlantsDB interfaces. Data exchange and cross-linking between PlantsDB and other plant genome databases is stimulated by the transPLANT project (http://transplantdb.eu/).",2015-11-02 +29863501,Provider Perceptions of Bubble Continuous Positive Airway Pressure and Barriers to Implementation in a Level III Neonatal Unit in South India.,"

Background

Bubble continuous positive airway pressure (bCPAP) is a simple, safe, and cost-effective strategy to provide respiratory support to newborns with respiratory distress syndrome in resource-limited settings.

Purpose

To understand whether implementation of bCPAP, relative to other modes of respiratory support in the care of newborns with respiratory distress syndrome, increases positive attitudes about its potential for consistent and widespread use among providers in neonatal intensive care units (NICUs) of lower middle-income countries.

Methods

Semistructured qualitative interviews with 14 healthcare providers, including 5 neonatal nurses, 2 respiratory therapists, 5 postgraduate trainees in pediatrics, and 2 attending physicians, were conducted at a level III NICU in south India where bCPAP had been in consistent use for 6 years. Interviews were transcribed and then coded and categorized using NVivo 10 Software (QSR International, Victoria, Australia).

Findings

Categories that emerged from our data include (1) perceived indications, (2) learning curve, (3) perceived costs, (4) perceived shortages, and (5) barriers to use. Providers believed that bCPAP was easy to learn and that it helped empower neonatal nurses in decision-making process. Participants provided a nuanced perspective of cost-benefit associated with bCPAP and that it helped make optimal use of limited resources. Participants identified several barriers to the implementation of bCPAP.

Implications for practice

Providers of a level III NICU in a lower- to middle-income country viewed the use of bCPAP favorably. Addressing context-specific barriers will be important for the successful widespread implementation of bCPAP.

Implications for research

Further research will need to focus on whether bCPAP can be safely implemented at level II NICUs.Video Abstract Available at https://journals.lww.com/advancesinneonatalcare/Pages/videogallery.aspx.",2018-12-01 +26582920,iGNM 2.0: the Gaussian network model database for biomolecular structural dynamics.,"Gaussian network model (GNM) is a simple yet powerful model for investigating the dynamics of proteins and their complexes. GNM analysis became a broadly used method for assessing the conformational dynamics of biomolecular structures with the development of a user-friendly interface and database, iGNM, in 2005. We present here an updated version, iGNM 2.0 http://gnmdb.csb.pitt.edu/, which covers more than 95% of the structures currently available in the Protein Data Bank (PDB). Advanced search and visualization capabilities, both 2D and 3D, permit users to retrieve information on inter-residue and inter-domain cross-correlations, cooperative modes of motion, the location of hinge sites and energy localization spots. The ability of iGNM 2.0 to provide structural dynamics data on the large majority of PDB structures and, in particular, on their biological assemblies makes it a useful resource for establishing the bridge between structure, dynamics and function.",2015-11-17 +28710041,"Create, run, share, publish, and reference your LC-MS, FIA-MS, GC-MS, and NMR data analysis workflows with the Workflow4Metabolomics 3.0 Galaxy online infrastructure for metabolomics.","Metabolomics is a key approach in modern functional genomics and systems biology. Due to the complexity of metabolomics data, the variety of experimental designs, and the multiplicity of bioinformatics tools, providing experimenters with a simple and efficient resource to conduct comprehensive and rigorous analysis of their data is of utmost importance. In 2014, we launched the Workflow4Metabolomics (W4M; http://workflow4metabolomics.org) online infrastructure for metabolomics built on the Galaxy environment, which offers user-friendly features to build and run data analysis workflows including preprocessing, statistical analysis, and annotation steps. Here we present the new W4M 3.0 release, which contains twice as many tools as the first version, and provides two features which are, to our knowledge, unique among online resources. First, data from the four major metabolomics technologies (i.e., LC-MS, FIA-MS, GC-MS, and NMR) can be analyzed on a single platform. By using three studies in human physiology, alga evolution, and animal toxicology, we demonstrate how the 40 available tools can be easily combined to address biological issues. Second, the full analysis (including the workflow, the parameter values, the input data and output results) can be referenced with a permanent digital object identifier (DOI). Publication of data analyses is of major importance for robust and reproducible science. Furthermore, the publicly shared workflows are of high-value for e-learning and training. The Workflow4Metabolomics 3.0 e-infrastructure thus not only offers a unique online environment for analysis of data from the main metabolomics technologies, but it is also the first reference repository for metabolomics workflows.",2017-07-12 +33175354,Overview of Erasmus+ NETCHEM project: ICT networking for overcoming technical and social barriers in instrumental analytical chemistry education.,"The paper briefly presents goals, activities, challenges, and outcomes of the NETCHEM project ( http://www.netchem.ac.rs/ ) that was co-funded by the Erasmus+ Program of European Union (573885-EPP-1-2016-1-RS-EPPKA2- CBHE-JP). The project has been started in October 2016 and with extension lasted until April 2020. Western Balkan region has been targeted by upgrading capacities for education and research in environmental and food analysis in cooperation with partners from France, the UK, and Czech Republic. NETCHEM platform providing Web Accessed Remote Instrumental Analytical Laboratories (WARIAL) network, Database service and Open education system was created in order to improve the cooperation, educational, and research capacities of Higher Education Institutions involved, but also targeting whether audience not only from academic domain but from industry as well. The NETCHEM platform is free for access to public; thus, the external users to NETCHEM consortium can not only see its content but also actively participate, enter Database and WARIAL network, and upload their own educational/research material.",2020-11-11 +33177514,"SAVI, in silico generation of billions of easily synthesizable compounds through expert-system type rules.","We have made available a database of over 1 billion compounds predicted to be easily synthesizable, called Synthetically Accessible Virtual Inventory (SAVI). They have been created by a set of transforms based on an adaptation and extension of the CHMTRN/PATRAN programming languages describing chemical synthesis expert knowledge, which originally stem from the LHASA project. The chemoinformatics toolkit CACTVS was used to apply a total of 53 transforms to about 150,000 readily available building blocks (enamine.net). Only single-step, two-reactant syntheses were calculated for this database even though the technology can execute multi-step reactions. The possibility to incorporate scoring systems in CHMTRN allowed us to subdivide the database of 1.75 billion compounds in sets according to their predicted synthesizability, with the most-synthesizable class comprising 1.09 billion synthetic products. Properties calculated for all SAVI products show that the database should be well-suited for drug discovery. It is being made publicly available for free download from https://doi.org/10.35115/37n9-5738.",2020-11-11 +32757305,Absorption and Fluorescence Spectral Database of Chlorophylls and Analogues.,"Absorption spectra and fluorescence spectra are essential for use across the photosciences, yet such spectra along with the all-important values for molar absorption coefficient (ε) and fluorescence quantum yield (Φf ) often are found with great difficulty. Here, a literature survey concerning the vital class of chlorophyll compounds has led to identification of spectra for 150 members. Spectra in print form have been digitized (with baseline corrections) and assembled into a database along with literature references, solvent identity and values for ε and Φf (where available). The database encompasses photosynthetic tetrapyrroles wherein the chromophore is a porphyrin (e.g. chlorophyll c1 , protochlorophyll a), chlorin (e.g. chlorophyll a, bacteriochlorophyll c) or bacteriochlorin (e.g. bacteriochlorophyll a). Altogether, the database contains 305 absorption spectra (from 19 porphyrins, 109 chlorins and 22 bacteriochlorins) and 72 fluorescence spectra (from 10 porphyrins, 30 chlorins and 4 bacteriochlorins). The spectral database should facilitate comparisons and quantitative calculations. All spectra are available in print form in the Supporting Information. The entire database in digital form is available with the PhotochemCAD program for free downloading and further use at http://www.photochemcad.com.",2020-11-11 +27374120,The harmonizome: a collection of processed datasets gathered to serve and mine knowledge about genes and proteins. ,"Genomics, epigenomics, transcriptomics, proteomics and metabolomics efforts rapidly generate a plethora of data on the activity and levels of biomolecules within mammalian cells. At the same time, curation projects that organize knowledge from the biomedical literature into online databases are expanding. Hence, there is a wealth of information about genes, proteins and their associations, with an urgent need for data integration to achieve better knowledge extraction and data reuse. For this purpose, we developed the Harmonizome: a collection of processed datasets gathered to serve and mine knowledge about genes and proteins from over 70 major online resources. We extracted, abstracted and organized data into ∼72 million functional associations between genes/proteins and their attributes. Such attributes could be physical relationships with other biomolecules, expression in cell lines and tissues, genetic associations with knockout mouse or human phenotypes, or changes in expression after drug treatment. We stored these associations in a relational database along with rich metadata for the genes/proteins, their attributes and the original resources. The freely available Harmonizome web portal provides a graphical user interface, a web service and a mobile app for querying, browsing and downloading all of the collected data. To demonstrate the utility of the Harmonizome, we computed and visualized gene-gene and attribute-attribute similarity networks, and through unsupervised clustering, identified many unexpected relationships by combining pairs of datasets such as the association between kinase perturbations and disease signatures. We also applied supervised machine learning methods to predict novel substrates for kinases, endogenous ligands for G-protein coupled receptors, mouse phenotypes for knockout genes, and classified unannotated transmembrane proteins for likelihood of being ion channels. The Harmonizome is a comprehensive resource of knowledge about genes and proteins, and as such, it enables researchers to discover novel relationships between biological entities, as well as form novel data-driven hypotheses for experimental validation.Database URL: http://amp.pharm.mssm.edu/Harmonizome.",2016-07-03 +27128319,The Ontology for Biomedical Investigations.,"The Ontology for Biomedical Investigations (OBI) is an ontology that provides terms with precisely defined meanings to describe all aspects of how investigations in the biological and medical domains are conducted. OBI re-uses ontologies that provide a representation of biomedical knowledge from the Open Biological and Biomedical Ontologies (OBO) project and adds the ability to describe how this knowledge was derived. We here describe the state of OBI and several applications that are using it, such as adding semantic expressivity to existing databases, building data entry forms, and enabling interoperability between knowledge resources. OBI covers all phases of the investigation process, such as planning, execution and reporting. It represents information and material entities that participate in these processes, as well as roles and functions. Prior to OBI, it was not possible to use a single internally consistent resource that could be applied to multiple types of experiments for these applications. OBI has made this possible by creating terms for entities involved in biological and medical investigations and by importing parts of other biomedical ontologies such as GO, Chemical Entities of Biological Interest (ChEBI) and Phenotype Attribute and Trait Ontology (PATO) without altering their meaning. OBI is being used in a wide range of projects covering genomics, multi-omics, immunology, and catalogs of services. OBI has also spawned other ontologies (Information Artifact Ontology) and methods for importing parts of ontologies (Minimum information to reference an external ontology term (MIREOT)). The OBI project is an open cross-disciplinary collaborative effort, encompassing multiple research communities from around the globe. To date, OBI has created 2366 classes and 40 relations along with textual and formal definitions. The OBI Consortium maintains a web resource (http://obi-ontology.org) providing details on the people, policies, and issues being addressed in association with OBI. The current release of OBI is available at http://purl.obolibrary.org/obo/obi.owl.",2016-04-29 +24910945,MOPED 2.5--an integrated multi-omics resource: multi-omics profiling expression database now includes transcriptomics data.,"Multi-omics data-driven scientific discovery crucially rests on high-throughput technologies and data sharing. Currently, data are scattered across single omics repositories, stored in varying raw and processed formats, and are often accompanied by limited or no metadata. The Multi-Omics Profiling Expression Database (MOPED, http://moped.proteinspire.org ) version 2.5 is a freely accessible multi-omics expression database. Continual improvement and expansion of MOPED is driven by feedback from the Life Sciences Community. In order to meet the emergent need for an integrated multi-omics data resource, MOPED 2.5 now includes gene relative expression data in addition to protein absolute and relative expression data from over 250 large-scale experiments. To facilitate accurate integration of experiments and increase reproducibility, MOPED provides extensive metadata through the Data-Enabled Life Sciences Alliance (DELSA Global, http://delsaglobal.org ) metadata checklist. MOPED 2.5 has greatly increased the number of proteomics absolute and relative expression records to over 500,000, in addition to adding more than four million transcriptomics relative expression records. MOPED has an intuitive user interface with tabs for querying different types of omics expression data and new tools for data visualization. Summary information including expression data, pathway mappings, and direct connection between proteins and genes can be viewed on Protein and Gene Details pages. These connections in MOPED provide a context for multi-omics expression data exploration. Researchers are encouraged to submit omics data which will be consistently processed into expression summaries. MOPED as a multi-omics data resource is a pivotal public database, interdisciplinary knowledge resource, and platform for multi-omics understanding.",2014-06-01 +34756215,GANs for medical image analysis.,"Generative adversarial networks (GANs) and their extensions have carved open many exciting ways to tackle well known and challenging medical image analysis problems such as medical image de-noising, reconstruction, segmentation, data simulation, detection or classification. Furthermore, their ability to synthesize images at unprecedented levels of realism also gives hope that the chronic scarcity of labeled data in the medical field can be resolved with the help of these generative models. In this review paper, a broad overview of recent literature on GANs for medical applications is given, the shortcomings and opportunities of the proposed methods are thoroughly discussed, and potential future work is elaborated. We review the most relevant papers published until the submission date. For quick access, essential details such as the underlying method, datasets, and performance are tabulated. An interactive visualization that categorizes all papers to keep the review alive is available at http://livingreview.in.tum.de/GANs_for_Medical_Applications/.",2020-08-09 +32383755,ARIAweb: a server for automated NMR structure calculation.,"Nuclear magnetic resonance (NMR) spectroscopy is a method of choice to study the dynamics and determine the atomic structure of macromolecules in solution. The standalone program ARIA (Ambiguous Restraints for Iterative Assignment) for automated assignment of nuclear Overhauser enhancement (NOE) data and structure calculation is well established in the NMR community. To ultimately provide a perfectly transparent and easy to use service, we designed an online user interface to ARIA with additional functionalities. Data conversion, structure calculation setup and execution, followed by interactive visualization of the generated 3D structures are all integrated in ARIAweb and freely accessible at https://ariaweb.pasteur.fr.",2020-07-01 +33638635,BERT4Bitter: a bidirectional encoder representations from transformers (BERT)-based model for improving the prediction of bitter peptides. ,"The identification of bitter peptides through experimental approaches is an expensive and time-consuming endeavor. Due to the huge number of newly available peptide sequences in the post-genomic era, the development of automated computational models for the identification of novel bitter peptides is highly desira-ble. In this work, we present BERT4Bitter, a bidirectional encoder representation from transformers (BERT)-based model for predicting bitter peptides directly from their amino acid sequence without using any structural information. To the best of our knowledge, this is the first time a BERT-based model has been employed to identify bitter peptides. Compared to widely used machine learning models, BERT4Bitter achieved the best performance with accuracy of 0.861 and 0.922 for cross-validation and independent tests, respectively. Furthermore, extensive empirical benchmarking experiments on the independent dataset demonstrated that BERT4Bitter clearly outperformed the existing method with improvements of > 8% accuracy and >16% Matthews coefficient correlation, highlighting the effectiveness and robustness of BERT4Bitter. We believe that the BERT4Bitter method proposed herein will be a useful tool for rapidly screening and identifying novel bitter peptides for drug development and nutritional research. The user-friendly web server of the proposed BERT4Bitter is freely accessible at: http://pmlab.pythonanywhere.com/BERT4Bitter. Supplementary data are available at Bioinformatics online.",2021-02-26 +33186582,Cppsite 2.0: An Available Database of Experimentally Validated Cell-Penetrating Peptides Predicting their Secondary and Tertiary Structures.,"One of the biggest barriers in drug and vaccine development is to find an effective delivery system. Cell-penetrating peptides (CPPs) play a crucial role for delivery of biological cargoes and pass them through the membranes. Several databases have been developed for therapeutic peptides as potential drug candidates and delivery vehicles. A rapid growth has occurred in many patents and research articles on CPPs as therapeutic peptides. To save time and cost in laboratories, prediction and design of CPPs before in vitro/in vivo experiments using computational methods and online web servers are rational. Various online web servers which provide prediction of CPPs including CellPPD, CPPpred, CPPred-RF and MLCPP, and also different curated databases that present validated information of CPPs such as CPPsite 2.0 have been developed up to now. Two methods including CellPPD and CPPpred were applied to predict and design potent CPPs. CPPsite 2.0 is a user-friendly updated database that provides various information about CPPs and contains 1855 entries. This database provides comprehensive information on experimentally tested CPPs and prediction of their secondary and tertiary structures to realize their structure-function relationship. Furthermore, each entry presents information of a CPP including chirality, origin, nature of peptide, sub-cellular localization, uptake mechanism and efficiency, amino acid composition, hydrophobicity, and physicochemical properties. One of main goals of CPPsite 2.0 database is to provide the latest datasets of CPPs for analysis and development of CPP prediction methods. CPPsite 2.0 is freely available at https://webs.iiitd.edu.in/raghava/cppsite.",2020-11-10 +33170902,Large scale genomic analysis of 3067 SARS-CoV-2 genomes reveals a clonal geo-distribution and a rich genetic variations of hotspots mutations.,"In late December 2019, an emerging viral infection COVID-19 was identified in Wuhan, China, and became a global pandemic. Characterization of the genetic variants of SARS-CoV-2 is crucial in following and evaluating it spread across countries. In this study, we collected and analyzed 3,067 SARS-CoV-2 genomes isolated from 55 countries during the first three months after the onset of this virus. Using comparative genomics analysis, we traced the profiles of the whole-genome mutations and compared the frequency of each mutation in the studied population. The accumulation of mutations during the epidemic period with their geographic locations was also monitored. The results showed 782 variants sites, of which 512 (65.47%) had a non-synonymous effect. Frequencies of mutated alleles revealed the presence of 68 recurrent mutations, including ten hotspot non-synonymous mutations with a prevalence higher than 0.10 in this population and distributed in six SARS-CoV-2 genes. The distribution of these recurrent mutations on the world map revealed that certain genotypes are specific to geographic locations. We also identified co-occurring mutations resulting in the presence of several haplotypes. Moreover, evolution over time has shown a mechanism of mutation co-accumulation which might affect the severity and spread of the SARS-CoV-2. The phylogentic analysis identified two major Clades C1 and C2 harboring mutations L3606F and G614D, respectively and both emerging for the first time in China. On the other hand, analysis of the selective pressure revealed the presence of negatively selected residues that could be taken into considerations as therapeutic targets. We have also created an inclusive unified database (http://covid-19.medbiotech.ma) that lists all of the genetic variants of the SARS-CoV-2 genomes found in this study with phylogeographic analysis around the world.",2020-11-10 +34036443,"BDD Knowledge, Attitude and Practice Among Aesthetic Plastic Surgeons Worldwide.","

Background

Body dysmorphic disorder (BDD) is a controversial topic in the field of plastic surgery.

Objective

Our aim was to determine whether BDD knowledge, attitude and practice (KAP) are affected by the experience of the surgeon in the field, sex of the surgeon, country of practice, and the number of patients the surgeon sees annually. We were particularly interested in uncovering any significant relations in KAP of BDD between plastic surgeons practicing in developed versus developing countries.

Methods

We created a two-page survey of 24 questions about the KAP of BDD. The survey was sent to aesthetic plastic surgeons worldwide via ISAPS global email list. The data were collected over a period of 20 days at the end of 2020.

Results

A total of 464 plastic surgeons completed the survey. The only factor that determines the awareness of BDD is the experience of the surgeon. The more experienced the surgeon is, the more likely he/she is to be familiar with the clinical picture of BDD. Although aware, the more experienced surgeons tend to dismiss the importance of referring BDD patients to psychiatrists/psychologists. Male surgeons tend to diagnose more patients with BDD than female surgeons. Surgeons who estimated the correct prevalence of BDD among patients seeking surgery acquired knowledge of BDD from scientific journals. The KAP is relatively similar between surgeons practicing in developed and developing countries, and the main statistically significant difference was in the questions used during the course of the interviews to diagnose BDD.

Conclusion

We can deduce from the results that most aesthetic surgeons worldwide have got knowledge of the presentation of BDD and are keen to diagnose the disorder in their practice. It is worth noting that surgeons usually have their unique approach in the management of BDD. Our study highlights the importance of not only raising awareness of the best management of BDD, but also of establishing a consensus that BDD is a contraindication to aesthetic treatment. The best methods to raise awareness are through journals and plastic surgery residency.

Level of evidence v

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors https://www.springer.com/00266 .",2021-05-25 +32258287,Multivariate sensor signals collected by aquatic drones involved in water monitoring: A complete dataset.,"Sensor data generated by intelligent systems, such as autonomous robots, smart buildings and other systems based on artificial intelligence, represent valuable sources of knowledge in today's data-driven society, since they contain information about the situations these systems face during their operation. These data are usually multivariate time series since modern technologies enable the simultaneous acquisition of multiple signals during long periods of time. In this paper we present a dataset containing sensor traces of six data acquisition campaigns performed by autonomous aquatic drones involved in water monitoring. A total of 5.6 h of navigation are available, with data coming from both lakes and rivers, and from different locations in Italy and Spain. The monitored variables concern both the internal state of the drone (e.g., battery voltage, GPS position and signals to propellers) and the state of the water (e.g., temperature, dissolved oxygen and electrical conductivity). Data were collected in the context of the EU-funded Horizon 2020 project INTCATCH (http://www.intcatch.eu) which aims to develop a new paradigm for monitoring water quality of catchments. The aquatic drones used for data acquisition are Platypus Lutra boats. Both autonomous and manual drive is used in different parts of the navigation. The dataset is analyzed in the paper ""Time series segmentation for state-model generation of autonomous aquatic drones: A systematic framework"" [1] by means of recent time series clustering/segmentation techniques to extract data-driven models of the situations faced by the drones in the data acquisition campaigns. These data have strong potential for reuse in other kinds of data analysis and evaluation of machine learning methods on real-world datasets [2]. Moreover, we consider this dataset valuable also for the variety of situations faced by the drone, from which machine learning techniques can learn behavioral patterns or detect anomalous activities. We also provide manual labeling for some known states of the drones, such as, drone inside/outside the water, upstream/downstream navigation, manual/autonomous drive, and drone turning, that represent a ground truth for validation purposes. Finally, the real-world nature of the dataset makes it more challenging for machine learning methods because it contains noisy samples collected while the drone was exposed to atmospheric agents and uncertain water flow conditions.",2020-03-19 +32938971,IMPROvER: the Integral Membrane Protein Stability Selector.,"Identifying stabilising variants of membrane protein targets is often required for structure determination. Our new computational pipeline, the Integral Membrane Protein Stability Selector (IMPROvER) provides a rational approach to variant selection by employing three independent approaches: deep-sequence, model-based and data-driven. In silico tests using known stability data, and in vitro tests using three membrane protein targets with 7, 11 and 16 transmembrane helices provided measures of success. In vitro, individual approaches alone all identified stabilising variants at a rate better than expected by random selection. Low numbers of overlapping predictions between approaches meant a greater success rate was achieved (fourfold better than random) when approaches were combined and selections restricted to the highest ranked sites. The mix of information IMPROvER uses can be extracted for any helical membrane protein. We have developed the first general-purpose tool for selecting stabilising variants of [Formula: see text]-helical membrane proteins, increasing efficiency and reducing workload. IMPROvER can be accessed at http://improver.ddns.net/IMPROvER/ .",2020-09-16 +30357379,EndoDB: a database of endothelial cell transcriptomics data.,"Endothelial cells (ECs) line blood vessels, regulate homeostatic processes (blood flow, immune cell trafficking), but are also involved in many prevalent diseases. The increasing use of high-throughput technologies such as gene expression microarrays and (single cell) RNA sequencing generated a wealth of data on the molecular basis of EC (dys-)function. Extracting biological insight from these datasets is challenging for scientists who are not proficient in bioinformatics. To facilitate the re-use of publicly available EC transcriptomics data, we developed the endothelial database EndoDB, a web-accessible collection of expert curated, quality assured and pre-analyzed data collected from 360 datasets comprising a total of 4741 bulk and 5847 single cell endothelial transcriptomes from six different organisms. Unlike other added-value databases, EndoDB allows to easily retrieve and explore data of specific studies, determine under which conditions genes and pathways of interest are deregulated and assess reprogramming of metabolism via principal component analysis, differential gene expression analysis, gene set enrichment analysis, heatmaps and metabolic and transcription factor analysis, while single cell data are visualized as gene expression color-coded t-SNE plots. Plots and tables in EndoDB are customizable, downloadable and interactive. EndoDB is freely available at https://vibcancer.be/software-tools/endodb, and will be updated to include new studies.",2019-01-01 +32258262,"Annual PM2.5 and cardiovascular mortality rate data: Trends modified by county socioeconomic status in 2,132 US counties.","This article contains data on county-level socioeconomic status for 2132 US counties and each county's average annual cardiovascular mortality rate (CMR) and fine particulate matter (PM2.5) concentration for 21 years (1990-2010). County CMR, PM2.5, and socioeconomic data were obtained from the US National Center for Health Statistics, US Environmental Protection Agency's Community Multiscale Air Quality modeling system, and the US Census, respectively. Annual socioeconomic indices were created using seven county-level measures from the 1990, 2000, and 2010 US Census using factor analysis. Quintiles of this index were used to generate categories of county socioeconomic status. This national data set contains data for annual PM2.5 and CMR changes over a time-period when there was a significant reduction in US air pollutants (following the enactment of the 1970 Clean Air Act). These data are associated with the article ""The contribution of improved air quality to reduced cardiovascular mortality: Declines in socioeconomic differences over time"" [1]. Data are stored in a comma separated value format and can be downloaded from the USEPA ScienceHub data repository (https://doi.org/10.23719/1506014).",2020-03-16 +26602695,Cancer RNA-Seq Nexus: a database of phenotype-specific transcriptome profiling in cancer cells.,"The genome-wide transcriptome profiling of cancerous and normal tissue samples can provide insights into the molecular mechanisms of cancer initiation and progression. RNA Sequencing (RNA-Seq) is a revolutionary tool that has been used extensively in cancer research. However, no existing RNA-Seq database provides all of the following features: (i) large-scale and comprehensive data archives and analyses, including coding-transcript profiling, long non-coding RNA (lncRNA) profiling and coexpression networks; (ii) phenotype-oriented data organization and searching and (iii) the visualization of expression profiles, differential expression and regulatory networks. We have constructed the first public database that meets these criteria, the Cancer RNA-Seq Nexus (CRN, http://syslab4.nchu.edu.tw/CRN). CRN has a user-friendly web interface designed to facilitate cancer research and personalized medicine. It is an open resource for intuitive data exploration, providing coding-transcript/lncRNA expression profiles to support researchers generating new hypotheses in cancer research and personalized medicine.",2015-11-23 +35153713,Brain Morphometry and Cognitive Performance in Normal Brain Aging: Age- and Sex-Related Structural and Functional Changes.,"

Background

The human brain structure undergoes considerable changes throughout life. Cognitive function can be affected either negatively or positively. It is challenging to segregate normal brain aging from the accelerated one.

Objective

To work out a descriptive model of brain structural and functional changes in normal aging.

Materials and methods

By using voxel-based morphometry and lesion segmentation along with linear statistics and machine learning (ML), we analyzed the structural changes in the major brain compartments and modeled the dynamics of neurofunctional performance throughout life. We studied sex differences in lifelong dynamics of brain volumetric data with Mann-Whitney U-test. We tested the hypothesis that performance in some cognitive domains might decline as a linear function of age while other domains might have a non-linear dependence on it. We compared the volumetric changes in the major brain compartments with the dynamics of psychophysiological performance in 4 age groups. Then, we tested linear models of structural and functional decline for significant differences between the slopes in age groups with the T-test.

Results

White matter hyperintensities (WMH) are not the major structural determinant of the brain normal aging. They should be viewed as signs of a disease. There is a sex difference in the speed and/or in the onset of the gray matter atrophy. It either starts earlier or goes faster in males. Marked sex difference in the proportion of total cerebrospinal fluid (CSF) and intraventricular CSF (iCSF) justifies that elderly men are more prone to age-related brain atrophy than women of the same age.

Conclusion

The article gives an overview and description of the conceptual structural changes in the brain compartments. The obtained data justify distinct patterns of age-related changes in the cognitive functions. Cross-life slowing of decision-making may follow the linear tendency of enlargement of the interhemispheric fissure because the center of task switching and inhibitory control is allocated within the medial wall of the frontal cortex, and its atrophy accounts for the expansion of the fissure. Free online tool at https://med-predict.com illustrates the tests and study results.",2021-01-01 +34031183,Predictive Approaches for Acute Dialysis Requirement and Death in COVID-19.,"

Background and objectives

AKI treated with dialysis initiation is a common complication of coronavirus disease 2019 (COVID-19) among hospitalized patients. However, dialysis supplies and personnel are often limited.

Design, setting, participants, & measurements

Using data from adult patients hospitalized with COVID-19 from five hospitals from the Mount Sinai Health System who were admitted between March 10 and December 26, 2020, we developed and validated several models (logistic regression, Least Absolute Shrinkage and Selection Operator (LASSO), random forest, and eXtreme GradientBoosting [XGBoost; with and without imputation]) for predicting treatment with dialysis or death at various time horizons (1, 3, 5, and 7 days) after hospital admission. Patients admitted to the Mount Sinai Hospital were used for internal validation, whereas the other hospitals formed part of the external validation cohort. Features included demographics, comorbidities, and laboratory and vital signs within 12 hours of hospital admission.

Results

A total of 6093 patients (2442 in training and 3651 in external validation) were included in the final cohort. Of the different modeling approaches used, XGBoost without imputation had the highest area under the receiver operating characteristic (AUROC) curve on internal validation (range of 0.93-0.98) and area under the precision-recall curve (AUPRC; range of 0.78-0.82) for all time points. XGBoost without imputation also had the highest test parameters on external validation (AUROC range of 0.85-0.87, and AUPRC range of 0.27-0.54) across all time windows. XGBoost without imputation outperformed all models with higher precision and recall (mean difference in AUROC of 0.04; mean difference in AUPRC of 0.15). Features of creatinine, BUN, and red cell distribution width were major drivers of the model's prediction.

Conclusions

An XGBoost model without imputation for prediction of a composite outcome of either death or dialysis in patients positive for COVID-19 had the best performance, as compared with standard and other machine learning models.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2021_07_09_CJN17311120.mp3.",2021-05-24 +33170156,Genetics of pain: From rare Mendelian disorders to genetic predisposition to pain.,"

Background and aim of the work

Pain is defined by the International Association for the Study of Pain as ""an unpleasant sensory and emotional experience associated with actual or potential tissue damage, or described in terms of such damage"". In this mini-review, we focused on the Mendelian disorders with chronic pain as the main characteristic or where pain perception is disrupted, and on the polymorphisms that can impart susceptibility to chronic pain.

Methods

We searched PubMed and Online Mendelian Inheritance in Man (OMIM) databases and selected only syndromes in which pain or insensitivity to pain were among the main characteristics. Polymorphisms were selected from the database GWAS catalog (https://www.ebi.ac.uk/gwas/home).

Results

We retrieved a total of 28 genes associated with Mendelian inheritance in which pain or insensitivity to pain were the main characteristics and 70 polymorphisms associated with modulation of pain perception.

Conclusions

This mini-review highlights the importance of genetics in phenotypes characterized by chronic pain or pain insensitivity. We think that an effective genetic test should analyze all genes associated with Mendelian pain disorders and all SNPs that can increase the risk of pain.",2020-11-09 +33872308,MRLocus: Identifying causal genes mediating a trait through Bayesian estimation of allelic heterogeneity.,"Expression quantitative trait loci (eQTL) studies are used to understand the regulatory function of non-coding genome-wide association study (GWAS) risk loci, but colocalization alone does not demonstrate a causal relationship of gene expression affecting a trait. Evidence for mediation, that perturbation of gene expression in a given tissue or developmental context will induce a change in the downstream GWAS trait, can be provided by two-sample Mendelian Randomization (MR). Here, we introduce a new statistical method, MRLocus, for Bayesian estimation of the gene-to-trait effect from eQTL and GWAS summary data for loci with evidence of allelic heterogeneity, that is, containing multiple causal variants. MRLocus makes use of a colocalization step applied to each nearly-LD-independent eQTL, followed by an MR analysis step across eQTLs. Additionally, our method involves estimation of the extent of allelic heterogeneity through a dispersion parameter, indicating variable mediation effects from each individual eQTL on the downstream trait. Our method is evaluated against other state-of-the-art methods for estimation of the gene-to-trait mediation effect, using an existing simulation framework. In simulation, MRLocus often has the highest accuracy among competing methods, and in each case provides more accurate estimation of uncertainty as assessed through interval coverage. MRLocus is then applied to five candidate causal genes for mediation of particular GWAS traits, where gene-to-trait effects are concordant with those previously reported. We find that MRLocus's estimation of the causal effect across eQTLs within a locus provides useful information for determining how perturbation of gene expression or individual regulatory elements will affect downstream traits. The MRLocus method is implemented as an R package available at https://mikelove.github.io/mrlocus.",2021-04-19 +31822087,Atlas of putative minima and low-lying energy networks of water clusters n = 3-25.,"We report a database consisting of the putative minima and ∼3.2 × 106 local minima lying within 5 kcal/mol from the putative minima for water clusters of sizes n = 3-25 using an improved version of the Monte Carlo temperature basin paving (MCTBP) global optimization procedure in conjunction with the ab initio based, flexible, polarizable Thole-Type Model (TTM2.1-F, version 2.1) interaction potential for water. Several of the low-lying structures, as well as low-lying penta-coordinated water networks obtained with the TTM2.1-F potential, were further refined at the Møller-Plesset second order perturbation (MP2)/aug-cc-pVTZ level of theory. In total, we have identified 3 138 303 networks corresponding to local minima of the clusters n = 3-25, whose Cartesian coordinates and relative energies can be obtained from the webpage https://sites.uw.edu/wdbase/. Networks containing penta-coordinated water molecules start to appear at n = 11 and, quite surprisingly, are energetically close (within 1-3 kcal/mol) to the putative minima, a fact that has been confirmed from the MP2 calculations. This large database of water cluster minima spanning quite dissimilar hydrogen bonding networks is expected to influence the development and assessment of the accuracy of interaction potentials for water as well as lower scaling electronic structure methods (such as different density functionals). Furthermore, it can also be used in conjunction with data science approaches (including but not limited to neural networks and machine and deep learning) to understand the properties of water, nature's most important substance.",2019-12-01 +33870774,The association between relationship strain and emotional well-being among older adult couples: the moderating role of social connectedness.,"

Objectives

The present study examines the moderating role of social connectedness (i.e. closeness, talk frequency, social network size, and neighborhood social ties) in the association between one's own and spouse's relationship strain and emotional well-being (i.e. depressive symptoms, happiness, and loneliness).

Method

Married couples (N = 865) were drawn from the second wave of the National Social, Health, and Aging Project. One Actor Partner Interdependence Model (APIM) and one Actor Partner Interdependence Model with Moderation (APIMoM) were conducted.

Results

In terms of actor effects, relationship strain was associated with all emotional well-being outcomes. Wives' and husbands' greater relationship strain was associated with spouses' loneliness. Only wives' greater relationship strain was associated with her husbands' higher level of depressive symptoms and no partner effects were found for happiness. In six instances, social connectedness factors helped to ameliorate the association between self/spouse relationship strain, depressive symptoms, and happiness. However, wives' greater neighborhood social ties amplified the association between wives greater relationship strain and husbands' greater depressive symptoms. We did not find that social connectedness factors moderated the associations between self/spouse relationship strain and loneliness.

Conclusion

Even in late life marriages, marital strain is associated with less happiness and greater depressive symptoms and loneliness. Practitioners addressing emotional well-being may need to pay attention to spousal perceptions of relationship strain and social relationships external to the marital relationship when working with heterosexual couples. Efforts throughout the life course should be made to ensure connections with diverse types of social networks.Supplemental data for this article is available online at https://doi.org/10.1080/13607863.2021.1910786.",2021-04-19 +31930389,Nearest-neighbor Projected-Distance Regression (NPDR) for detecting network interactions with adjustments for multiple tests and confounding.,"

Summary

Machine learning feature selection methods are needed to detect complex interaction-network effects in complicated modeling scenarios in high-dimensional data, such as GWAS, gene expression, eQTL and structural/functional neuroimage studies for case-control or continuous outcomes. In addition, many machine learning methods have limited ability to address the issues of controlling false discoveries and adjusting for covariates. To address these challenges, we develop a new feature selection technique called Nearest-neighbor Projected-Distance Regression (NPDR) that calculates the importance of each predictor using generalized linear model regression of distances between nearest-neighbor pairs projected onto the predictor dimension. NPDR captures the underlying interaction structure of data using nearest-neighbors in high dimensions, handles both dichotomous and continuous outcomes and predictor data types, statistically corrects for covariates, and permits statistical inference and penalized regression. We use realistic simulations with interactions and other effects to show that NPDR has better precision-recall than standard Relief-based feature selection and random forest importance, with the additional benefit of covariate adjustment and multiple testing correction. Using RNA-Seq data from a study of major depressive disorder (MDD), we show that NPDR with covariate adjustment removes spurious associations due to confounding. We apply NPDR to eQTL data to identify potentially interacting variants that regulate transcripts associated with MDD and demonstrate NPDR's utility for GWAS and continuous outcomes.

Availability and implementation

Available at: https://insilico.github.io/npdr/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-05-01 +32696501,When is refraction stable following routine cataract surgery? A systematic review and meta-analysis.,"

Purpose

We systematically reviewed the literature to investigate when refraction is stable following routine cataract surgery implanting monofocal intraocular lenses. Current advice recommends obtaining new spectacles 4-6 weeks following surgery. Due to advancements in surgical techniques, we hypothesised that refractive stability would be achieved earlier, which could have major short-term improvements in quality of life for patients.

Methods

Medline, CINAHL, AMED, Embase, Web of Science and the Cochrane Library were searched with key words chosen to find articles, which assessed refraction following uncomplicated cataract surgery. Citation chains and the reference lists of all included papers were searched. Unpublished literature was identified using OpenGrey (www.opengrey.eu). The review considered studies that measured refraction at regular intervals following surgery until stability was achieved.

Results

The search identified 6,680 papers. Two reviewers independently screened the abstracts and nine papers were found to fit the criteria, of which five were included in the meta-analysis. The quality of the papers was evaluated using the Methodological Index for Non-Randomised Studies (MINORS) instrument. Meta-analysis of 301 patients' data of spherical, cylindrical and spherical equivalent correction were performed using Review Manager 5 (RevMan 5.3) (https://revman.cochrane.org/). Refraction at 1-week versus the gold standard of 4-weeks showed no significant difference for sphere data (effect size and 95% confidence interval of; ES = 0.00, 95% CI: -0.17, 0.17; p = 1.00), cylindrical data (ES = +0.06; 95% CI: -0.05, 0.17; p = 0.31), and spherical equivalent (ES = -0.01; 95% CI: -0.12, 0.10; p = 0.90). Heterogeneity was non-significant (I2  < 25%) for all refractive elements. Data were similar for 2- versus 4-weeks post-surgery. Acquired data from one study highlighted a small number of patients with very unstable cylindrical corrections at 1-week post-operatively.

Conclusions

No statistical difference was found when comparing sphere, cylindrical and spherical equivalent values at 1- and 4-weeks post cataract surgery. This suggests that new glasses could be provided 1-week after surgery. However, from a clinical perspective, a small number of patients (~7%) from an acquired dataset (N = 72) showed very unstable cylindrical corrections at 1-week. Further work is needed to determine why this is the case and how these patients can be detected.",2020-07-22 +32339163,Scedar: A scalable Python package for single-cell RNA-seq exploratory data analysis.,"In single-cell RNA-seq (scRNA-seq) experiments, the number of individual cells has increased exponentially, and the sequencing depth of each cell has decreased significantly. As a result, analyzing scRNA-seq data requires extensive considerations of program efficiency and method selection. In order to reduce the complexity of scRNA-seq data analysis, we present scedar, a scalable Python package for scRNA-seq exploratory data analysis. The package provides a convenient and reliable interface for performing visualization, imputation of gene dropouts, detection of rare transcriptomic profiles, and clustering on large-scale scRNA-seq datasets. The analytical methods are efficient, and they also do not assume that the data follow certain statistical distributions. The package is extensible and modular, which would facilitate the further development of functionalities for future requirements with the open-source development community. The scedar package is distributed under the terms of the MIT license at https://pypi.org/project/scedar.",2020-04-27 +32701041,Cardiovascular Effects of Polychlorinated Biphenyls and Their Major Metabolites.,"

Background

Xenobiotic metabolism is complex, and accounting for bioactivation and detoxification processes of chemicals remains among the most challenging aspects for decision making with in vitro new approach methods data.

Objectives

Considering the physiological relevance of human organotypic culture models and their utility for high-throughput screening, we hypothesized that multidimensional chemical-biological profiling of chemicals and their major metabolites is a sensible alternative for the toxicological characterization of parent molecules vs. metabolites in vitro.

Methods

In this study, we tested 25 polychlorinated biphenyls (PCBs) [PCB 3, 11, 52, 126, 136, and 153 and their relevant metabolites (hydroxylated, methoxylated, sulfated, and quinone)] in concentration-response (10 nM-100μM) for effects in human induced pluripotent stem cell (iPSC)-derived cardiomyocytes (CMs) and endothelial cells (ECs) (iPSC-derived and HUVECs). Functional phenotypic end points included effects on beating parameters and intracellular Ca2+ flux in CMs and inhibition of tubulogenesis in ECs. High-content imaging was used to evaluate cytotoxicity, mitochondrial integrity, and oxidative stress.

Results

Data integration of a total of 19 physicochemical descriptors and 36 in vitro phenotypes revealed that chlorination status and metabolite class are strong predictors of the in vitro cardiovascular effects of PCBs. Oxidation of PCBs, especially to di-hydroxylated and quinone metabolites, was associated with the most pronounced effects, whereas sulfation and methoxylation of PCBs resulted in diminished bioactivity.

Discussion

Risk characterization analysis showed that although in vitro derived effective concentrations exceeded the levels measured in the general population, risks cannot be ruled out due to the potential for population variability in susceptibility and the need to fill data gaps using read-across approaches. This study demonstrated a strategy for how in vitro data can be used to characterize human health risks from PCBs and their metabolites. https://doi.org/10.1289/EHP7030.",2020-07-23 +32805035,NetSets.js: a JavaScript framework for compositional assessment and comparison of biological networks through Venn-integrated network diagrams.,"

Motivation

Venn diagrams are frequently used to compare composition of datasets (e.g. datasets containing list of proteins and genes). Network diagram constructed using such datasets are usually generated using 'list of edges', popularly known as edge-lists. An edge-list and the corresponding generated network are, however, composed of two elements, namely, edges (e.g. protein-protein interactions) and nodes (e.g. proteins). Researchers often use individual lists of edges and nodes to compare composition of biological networks using existing Venn diagram tools. However, specialized analysis workflows are required for comparison of nodes as well as edges. Apart from this, different tools or graph libraries are needed for visualizing any specific edges of interest (e.g. protein-protein interactions which are present across all networks or are shared between subset of networks or are exclusively present in a selected network). Further, these results are required to be exported in the form of publication worthy network diagram(s), particularly for small networks.

Results

We introduce a (server independent) JavaScript framework (called NetSets.js) that integrates popular Venn and network diagrams in a single application. A free to use intuitive web application (utilizing NetSets.js), specifically designed to perform both compositional comparisons (e.g. for identifying common/exclusive edges or nodes) and interactive user defined visualizations of network (for the identified common/exclusive interactions across multiple networks) using simple edge-lists is also presented. The tool also enables connection to Cytoscape desktop application using the Netsets-Cyapp. We demonstrate the utility of our tool using real world biological networks (microbiome, gene interaction, multiplex and protein-protein interaction networks).

Availabilityand implementation

http://web.rniapps.net/netsets (freely available for academic use).

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-05-01 +30629147,TopoLink: evaluation of structural models using chemical crosslinking distance constraints.,"

Summary

A software was developed to evaluate structural models using chemical crosslinking experiments. The user provides the types of linkers used and their reactivity, and the observed crosslinks and dead-ends. The software computes the minimum length of a physically inspired linker that connects the reactive atoms of interest, and reports the consistency of each distance with the experimental observation. Statistics on model consistency with the links are provided. Tools to evaluate the correlation of crosslinks in ensembles of models were developed. TopoLink was used to evaluate the potential crosslinks of all structures of the CATH database. The number of crosslinks expected as a function of protein size and linker length can be used as guide for experimental design.

Availability and implementation

TopoLink is available as free software at http://m3g.iqm.unicamp.br/topolink, and distributed as source code with a user-friendly graphical interface for Windows. A web server is also provided.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +26452372,WaspAtlas: a Nasonia vitripennis gene database and analysis platform. ,"Nasonia vitripennis is a parasitoid wasp which is becoming an important model organism for parasitism, epigenetics, evolutionary and developmental genetics. WaspAtlas is a new gene database in which we have compiled annotation data from all available N. vitripennis releases along with a wealth of transcriptomic data, methylation data and original analyses and annotations to form a comprehensive resource to aid the study of Nasonia. WaspAtlas allows users to explore gene structure and function, to compare expression data across sexes, tissues, developmental stages and conditions, and to explore published data relating to gene(s) of interest. WaspAtlas is easy to navigate and the database is easily searchable through the web interface. Detailed illustrations are provided for splice variants, protein domain predictions and the results of analyses. The website also functions as an analysis platform analysis for Nasonia, providing a set of tools designed to perform common analyses including GO term overrepresentation and RNAi off-target prediction. WaspAtlas will act as a hub for published data relating to Nasonia genes, and will be continually updated with new data to reflect the state of Nasonia-omics research. Database URL: http://waspatlas.com.",2015-10-09 +30846808,"TACCO, a Database Connecting Transcriptome Alterations, Pathway Alterations and Clinical Outcomes in Cancers.","Because of innumerable cancer sequencing projects, abundant transcriptome expression profiles together with survival data are available from the same patients. Although some expression signatures for prognosis or pathologic staging have been identified from these data, systematically discovering such kind of expression signatures remains a challenge. To address this, we developed TACCO (Transcriptome Alterations in CanCer Omnibus), a database for identifying differentially expressed genes and altered pathways in cancer. TACCO also reveals miRNA cooperative regulations and supports construction of models for prognosis. The resulting signatures have great potential for patient stratification and treatment decision-making in future clinical applications. TACCO is freely available at http://tacco.life.nctu.edu.tw/ .",2019-03-07 +33979227,Coping With Tinnitus During the COVID-19 Pandemic.,"Purpose The COVID-19 pandemic disrupted normal operations of health care services, broad sectors of the economy, and the ability to socialize freely. For those with tinnitus, such changes can be factors in exacerbating tinnitus. The purpose of this study was to determine tinnitus help-seeking behavior, which resources individuals utilized to cope during the pandemic, and what additional support is desired. Method An exploratory cross-sectional study design including 1,522 adults with tinnitus living in North America (Canada and the United States) was used. Data were collected through an online survey distributed by the American Tinnitus Association via e-mail. Free text from open-ended questions was analyzed using the automated content analysis. The responses to the structured questionnaire were analyzed using descriptive and nonparametric statistics. Results Significantly less tinnitus support was sought during the pandemic, and very few respondents utilized tinnitus support networks during the pandemic at the time the survey was conducted. Nonetheless, seeking support during the pandemic was significantly associated with significantly less tinnitus distress. The most frequently utilized resources for coping during the pandemic were contacting family and friends, spending time outdoors or in nature, relaxation, and exercise. Such tools for coping were associated with significantly less tinnitus distress. The support requested and advice provided by participants to health care services had overlap. The main support needs related to managing tinnitus included addressing hearing loss, providing peer support, finding cures, and accessing trained and understanding health care providers to help. The advice for professionals related to tinnitus management included the need for cures, personalized support, addressing hearing loss, targeting the tinnitus percept, and providing more information about the condition. Conclusions These findings provide suggestions on how to better support those with tinnitus at a time when health care is undergoing rapid changes. Findings can be used by stakeholders, clinical practitioners, and tinnitus support services to devise ways to work more effectively together to improve access to patient-driven, suitable, accessible, and evidence-based support. Supplemental Material https://doi.org/10.23641/asha.14558514.",2021-05-12 +32491285,Taxamat: Automated biodiversity data management tool - Implications for microbiome studies.,"Working with biodiversity data is a computationally intensive process. Numerous applications and services provide options to deal with sequencing and taxonomy data. Professional statistics software are also available to analyze these type of data. However, in-between the two processes there is a huge need to curate biodiversity sample files. Curation involves creating summed abundance values for chosen taxonomy ranks, excluding certain taxa from analysis, and finally merging and downsampling data files. Very few tools, if any, offer a solution to this problem, thus we present Taxamat, a simple data management application that allows for curation of biodiversity data files before they can be imported to other statistics software. Taxamat is a downloadable application for automated curation of biodiversity data featuring taxonomic classification, taxon filtering, sample merging, and downsampling. Input and output files are compatible with most widely used programs. Taxamat is available on the web at http://www.taxamat.com either as a single executable or as an installable package for Microsoft Windows platforms.",2020-03-01 +32324854,Can ODE gene regulatory models neglect time lag or measurement scaling?,"

Motivation

Many ordinary differential equation (ODE) models have been introduced to replace linear regression models for inferring gene regulatory relationships from time-course gene expression data. But, since the observed data are usually not direct measurements of the gene products or there is an unknown time lag in gene regulation, it is problematic to directly apply traditional ODE models or linear regression models.

Results

We introduce a lagged ODE model to infer lagged gene regulatory relationships from time-course measurements, which are modeled as linear transformation of the gene products. A time-course microarray dataset from a yeast cell-cycle study is used for simulation assessment of the methods and real data analysis. The results show that our method, by considering both time lag and measurement scaling, performs much better than other linear and ODE models. It indicates the necessity of explicitly modeling the time lag and measurement scaling in ODE gene regulatory models.

Availability and implementation

R code is available at https://www.sta.cuhk.edu.hk/xfan/share/lagODE.zip.",2020-07-01 +32649399,The Impact of Coronavirus Disease 2019 Pandemic on U.S. and Canadian PICUs.,"

Objectives

There are limited reports of the impact of the coronavirus disease 2019 pandemic focused on U.S. and Canadian PICUs. This hypothesis-generating report aims to identify the United States and Canadian trends of coronavirus disease 2019 in PICUs.

Design and setting

To better understand how the coronavirus disease 2019 pandemic was affecting U.S. and Canadian PICUs, an open voluntary daily data collection process of Canadian and U.S. PICUs was initiated by Virtual Pediatric Systems, LLC (Los Angeles, CA; http://www.myvps.org) in mid-March 2020. Information was made available online to all PICUs wishing to participate. A secondary data collection was performed to follow-up on patients discharged from those PICUs reporting coronavirus disease 2019 positive patients.

Measurements and main results

To date, over 180 PICUs have responded detailing 530 PICU admissions requiring over 3,467 days of PICU care with 30 deaths. The preponderance of cases was in the eastern regions. Twenty-four percent of the patients admitted to the PICUs were over 18 years old. Fourteen percent of admissions were under 2 years old. Nearly 60% of children had comorbidities at admission with the average length of stay increasing by age and by severity of comorbidity. Advanced respiratory support was necessary during 67% of the current days of care, with 69% being conventional mechanical ventilation.

Conclusions

PICUs have been significantly impacted by the pandemic. They have provided care not only for children but also adults. Patients with coronavirus disease 2019 have a high frequency of comorbidities, require longer stays, more ventilatory support than usual PICU admissions. These data suggest several avenues for further exploration.",2020-09-01 +25242825,What-Where-When Memory in the Rodent Odor Span Task.,"While the Odor Span Task (OST) was developed to assess working memory in rodents, it appears that odor (""What"") and time since an odor was last reinforced (""When"") jointly control responding in the OST. The OST uses an incrementing non-match to sample procedure such that the number of stimuli to remember increases during the session; the rodent is trained to remember stimuli within a session but not between sessions. We used a variation of the OST to add a ""Where"" dimension to the task to examine whether rodents could learn to respond to scents based on contextual cues as well. In Experiment 1, 6 rats well-trained on the OST procedure were exposed to four target scents in a holding cage before the OST session began [What-Where-When (WWW) condition]. When these target scents appeared in the OST, rats treated them as novel scents despite their being previously encountered that day; WWW responding was comparable to baseline (BL) responding. Controls were implemented to account for relative familiarity: frequency of target presentation and time since the target odor was presented. On both types of control probes, rats typically responded to target scents less than during WWW or BL conditions, took longer to make a response, and visited more comparison stimuli. In Experiment 2, the study was replicated adding reinforcement delivery for responding to pre-session presentation of target stimuli. Subjects were the same 6 rats plus 2 additional rats also well-trained on the OST. Results were similar to those from Experiment 1. These data indicate that the variables controlling performance on the OST task include What stimulus is presented, Where (i.e., in which location) it was presented, and When it was presented. Thus, the OST-probe methodology may provide a useful vehicle for the study of episodic-like memory processes in non-humans.",2014-08-01 +25378328,MyMpn: a database for the systems biology model organism Mycoplasma pneumoniae.,"MyMpn (http://mympn.crg.eu) is an online resource devoted to studying the human pathogen Mycoplasma pneumoniae, a minimal bacterium causing lower respiratory tract infections. Due to its small size, its ability to grow in vitro, and the amount of data produced over the past decades, M. pneumoniae is an interesting model organisms for the development of systems biology approaches for unicellular organisms. Our database hosts a wealth of omics-scale datasets generated by hundreds of experimental and computational analyses. These include data obtained from gene expression profiling experiments, gene essentiality studies, protein abundance profiling, protein complex analysis, metabolic reactions and network modeling, cell growth experiments, comparative genomics and 3D tomography. In addition, the intuitive web interface provides access to several visualization and analysis tools as well as to different data search options. The availability and--even more relevant--the accessibility of properly structured and organized data are of up-most importance when aiming to understand the biology of an organism on a global scale. Therefore, MyMpn constitutes a unique and valuable new resource for the large systems biology and microbiology community.",2014-11-06 +33275427,FragRep: A Web Server for Structure-Based Drug Design by Fragment Replacement.,"The design of efficient computational tools for structure-guided ligand design is essential for the drug discovery process. We hereby present FragRep, a new web server for structure-based ligand design by fragment replacement. The input is a protein and a ligand structure, either from protein data bank or from molecular docking. Users can choose specific substructures they want to modify. The server tries to find suitable fragments that not only meet the geometric requirements of the remaining part of the ligand but also fit well with local protein environments. FragRep is a powerful computational tool for the rapid generation of ligand design ideas; either in scaffold hopping or bioisosteric replacing. The FragRep Server is freely available to researchers and can be accessed at http://xundrug.cn/fragrep.",2020-12-04 +33441023,Sorcery and well-being: bodily transformation at Beckeranta.,"This paper examines bodily transformation and well-being within the context of a millenarian movement that emerged during the 1840s in the area surrounding Mount Roraima at the periphery of Brazil, Guyana (British Guiana at the time), and Venezuela. The site of this movement was Beckeranta - meaning 'Land of the Whites' - where up to 400 Amerindians were reportedly killed in a quest that is described in its sole historical account as centred around a goal of bodily transformation into white people. In examining this movement, the paper engages with longstanding debates in medical anthropology concerning the body, as well as conversations among Amazonianists concerning the social formation of bodies, and examines sorcery and shamanism as practices that go 'beyond the body'. Notions of bodily transformation in Amazonia, which are often activated by strong emotions, facilitate conceptual expansions of the body in medical anthropology. The paper suggests that bodily transformations tied to sorcery and shamanism are in some contexts, such as at Beckeranta, associated with desires for well-being.Supplemental data for this article is available online at https://doi.org/10.1080/13648470.2020.1807726.",2021-01-14 +32804489,Pathway-Guided Deep Neural Network toward Interpretable and Predictive Modeling of Drug Sensitivity.,"To efficiently save cost and reduce risk in drug research and development, there is a pressing demand to develop in silico methods to predict drug sensitivity to cancer cells. With the exponentially increasing number of multi-omics data derived from high-throughput techniques, machine learning-based methods have been applied to the prediction of drug sensitivities. However, these methods have drawbacks either in the interpretability of the mechanism of drug action or limited performance in modeling drug sensitivity. In this paper, we presented a pathway-guided deep neural network (DNN) model to predict the drug sensitivity in cancer cells. Biological pathways describe a group of molecules in a cell that collaborates to control various biological functions like cell proliferation and death, thereby abnormal function of pathways can result in disease. To take advantage of the excellent predictive ability of DNN and the biological knowledge of pathways, we reshaped the canonical DNN structure by incorporating a layer of pathway nodes and their connections to input gene nodes, which makes the DNN model more interpretable and predictive compared to canonical DNN. We have conducted extensive performance evaluations on multiple independent drug sensitivity data sets and demonstrated that our model significantly outperformed the canonical DNN model and eight other classical regression models. Most importantly, we observed a remarkable activity decrease in disease-related pathway nodes during forward propagation upon inputs of drug targets, which implicitly corresponds to the inhibition effect of disease-related pathways induced by drug treatment on cancer cells. Our empirical experiments showed that our method achieves pharmacological interpretability and predictive ability in modeling drug sensitivity in cancer cells. The web server, the processed data sets, and source codes for reproducing our work are available at http://pathdnn.denglab.org.",2020-08-31 +30128888,"Norms of conceptual familiarity for 3,596 French nouns and their contribution in lexical decision.","In the last decade, research has shown that word processing is influenced by the lexical and semantic features of words. However, norms for a crucial semantic variable-that is, conceptual familiarity-have not been available for a sizeable French database. We thus developed French Canadian conceptual familiarity norms for 3,596 nouns. This enriches Desrochers and Thompson's (2009) database, in which subjective frequency and imageability values are already available for the same words. We collected online data from 313 Canadian French speakers. The full database of conceptual familiarity ratings is freely available at http://lingualab.ca/fr/projets/normes-de-familiarite-conceptuelle . We then demonstrated the utility of these new conceptual familiarity norms by assessing their contribution to lexical decision times. We conducted a stepwise regression model with conceptual familiarity in the last step. This allowed us to assess the independent contribution of conceptual familiarity beyond the contributions of other well-known psycholinguistic variables, such as frequency, imageability, and age of acquisition. The results showed that conceptual familiarity facilitated lexical decision latencies. In sum, these ratings will help researchers select French stimuli for experiments in which conceptual familiarity must be taken into account.",2019-10-01 +32634117,"Healthspan pathway maps in C. elegans and humans highlight transcription, proliferation/biosynthesis and lipids.","The molecular basis of aging and of aging-associated diseases is being unraveled at an increasing pace. An extended healthspan, and not merely an extension of lifespan, has become the aim of medical practice. Here, we define health based on the absence of diseases and dysfunctions. Based on an extensive review of the literature, in particular for humans and C. elegans, we compile a list of features of health and of the genes associated with them. These genes may or may not be associated with survival/lifespan. In turn, survival/lifespan genes that are not known to be directly associated with health are not considered. Clusters of these genes based on molecular interaction data give rise to maps of healthspan pathways for humans and for C. elegans. Overlaying healthspan-related gene expression data onto the healthspan pathway maps, we observe the downregulation of (pro-inflammatory) Notch signaling in humans and of proliferation in C. elegans. We identify transcription, proliferation/biosynthesis and lipids as a common theme on the annotation level, and proliferation-related kinases on the gene/protein level. Our literature-based data corpus, including visualization, should be seen as a pilot investigation of the molecular underpinnings of health in two different species. Web address: http://pathways.h2020awe.eu.",2020-07-07 +33998607,The Flexor Pollicis Longus Reflex: Interrater and Intrarater Reliability in Comparison With Established Muscle Stretch Reflexes.,"

Objective

The aim of this study was to investigate the interrater reliability and intrarater reliability of the flexor pollicis longus muscle stretch reflex (FPLR) and compare it with clinically established reflexes.

Design

A total of 71 healthy volunteers participated. The FPLR, biceps reflex, brachioradialis reflex, and patellar tendon reflex of each participant were tested bilaterally and rated by eight examiners (four experienced, four inexperienced). For intrarater reliability evaluation, five examiners rated the reflexes of four volunteers at four different points in time.

Results

Analysis of the interrater reliability with Gwet's AC1 demonstrated almost perfect agreement for FPLR (Gwet's AC1 = 0.90), biceps reflex (Gwet's AC1 = 0.90), and patellar tendon reflex (Gwet's AC1 = 0.95) when using binary data (reflex present vs. absent). Only fair agreement was found for the brachioradialis reflex (Gwet's AC1 = 0.56). Experienced raters had a higher agreement than inexperienced raters did when rating the biceps reflex and the patellar tendon reflex. The intrarater reliability was almost perfect for the patellar tendon reflex (Gwet's AC1 = 0.94), followed by the FPLR (Gwet's AC1 = 0.83) with substantial agreement and the biceps reflex (Gwet's AC1 = 0.57) with moderate agreement.

Conclusion

The FPLR is a reliable diagnostic neuromuscular test and may therefore be useful in the clinical examination for C8/T1 nerve root lesions or pathologies of the interosseous anterior nerve.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME.

Cme objectives

Upon completion of this article, the reader should be able to: (1) Elicit the flexor pollicis longus muscle stretch reflex; (2) Discuss the disadvantage of kappa statistics in assessing the interrater reliability when the prevalence of the studied trait is very high or very low; and (3) Name the spinal nerves involved in the innervation of the flexor pollicis longus muscle.

Level

Advanced.

Accreditation

The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2021-06-01 +33204283,Efficacy and Safety of Acupuncture at Tianshu (ST25) for Functional Constipation: Evidence from 10 Randomized Controlled Trials.,"

Objective

To evaluate the evidence for the efficacy and safety of acupuncture at Tianshu (ST25) for functional constipation (FC).

Methods

We systematically searched seven databases to identify randomized controlled trials of acupuncture at ST25 alone or in combination with conventional therapy in the treatment of FC. Risk ratios (RRs) and mean differences (MDs) were calculated using RevMan 5.3 with 95% confidence interval (CI).

Results

The study included ten trials with 1568 participants. Meta-analysis showed that the Cleveland Constipation Score (CCS) for deep needling was significantly lower than that for lactulose (deep needling with low-frequency dilatational wave: MD -0.58, 95% CI -0.94 to -0.22; deep needling with sparse wave: MD -3.67, 95% CI -6.40 to -0.94; deep needling with high-frequency dilatational wave: MD -3.42, 95% CI -5.03 to -1.81). Furthermore, CCS for shallow needling with high-frequency dilatational wave was lower than that for lactulose (MD -1.77, 95% CI -3.40 to -0.14). In addition, when deep needling was combined with high-frequency dilatational wave, the weekly frequency of spontaneous defecation (FSD) was significantly higher than that for lactulose (MD 1.57, 95% CI 0.93 to 2.21). Colonic Transit Time (CTT) scores were significantly higher when deep needling was combined with sparse wave (MD -14.36, 95% CI -18.31 to -10.41) or high-frequency dilatational wave (MD -11.53, 95% CI -19.25 to -3.81). The time of first defecation after treatment (TFD) of the shallow needling therapy was significantly longer than that of the lactulose (MD 13.67, 95% CI 5.66 to 21.67). The CCS 6 months after treatment (CCS6m) for deep needling was significantly lower than that for lactulose (MD -4.90, 95% CI -5.97 to -3.84). Moreover, the FSD 6 months after treatment (FSD6m) for shallow needling was significantly higher than that for lactulose (MD 0.49, 95% CI 0.02 to 0.97). The adverse event (AE) rate for lactulose was significantly higher than that achieved with the needling treatments, and this held true for both deep needling therapy (RR 0.41, 95% CI 0.23 to 0.72) and shallow needling therapy (RR 0.33, 95% CI 0.15 to 0.77).

Conclusions

The meta-analysis demonstrates that acupuncture at ST25 appears to be more effective than lactulose in the treatment of functional constipation. This was found to be especially true for deep needling with high-frequency dilatational wave, which had a greater impact on improving CCS, FSD, CTT, and CCS6m. Additionally, acupuncture at ST25 was shown to be safer than conventional treatment, with the rate of AE being significantly lower for both deep needling and shallow needling. The trial is registered with https://www.crd.york.ac.uk/prospero/(CRD42019141017)).",2020-11-06 +32768452,interferENZY: A Web-Based Tool for Enzymatic Assay Validation and Standardized Kinetic Analysis.,"Enzymatic assays are widely employed to characterize important allosteric and enzyme modulation effects. The high sensitivity of these assays can represent a serious problem if the occurrence of experimental errors surreptitiously affects the reliability of enzyme kinetics results. We have addressed this problem and found that hidden assay interferences can be unveiled by the graphical representation of progress curves in modified reaction coordinates. To render this analysis accessible to users across all levels of expertise, we have developed a webserver, interferENZY, that allows (i) an unprecedented tight quality control of experimental data, (ii) the automated identification of small and major assay interferences, and (iii) the estimation of bias-free kinetic parameters. By eliminating the subjectivity factor in kinetic data reporting, interferENZY will contribute to solving the ""reproducibility crisis"" that currently challenges experimental molecular biology. The interferENZY webserver is freely available (no login required) at https://interferenzy.i3s.up.pt.",2020-08-05 +33811460,"Development, validation, and comparison of a nomogram based on radiologic findings for predicting malignancy in intraductal papillary mucinous neoplasms of the pancreas: An international multicenter study. ","Although we previously proposed a nomogram to predict malignancy in intraductal papillary mucinous neoplasms (IPMN) and validated it in an external cohort, its application is challenging without data on tumor markers. Moreover, existing nomograms have not been compared. This study aimed to develop a nomogram based on radiologic findings and to compare its performance with previously proposed American and Korean/Japanese nomograms. We recruited 3708 patients who underwent surgical resection at 31 tertiary institutions in eight countries, and patients with main pancreatic duct >10 mm were excluded. To construct the nomogram, 2606 patients were randomly allocated 1:1 into training and internal validation sets, and area under the receiver operating characteristics curve (AUC) was calculated using 10-fold cross validation by exhaustive search. This nomogram was then validated and compared to the American and Korean/Japanese nomograms using 1102 patients. Among the 2606 patients, 90 had main-duct type, 900 had branch-duct type, and 1616 had mixed-type IPMN. Pathologic results revealed 1628 low-grade dysplasia, 476 high-grade dysplasia, and 502 invasive carcinoma. Location, cyst size, duct dilatation, and mural nodule were selected to construct the nomogram. AUC of this nomogram was higher than the American nomogram (0.691 vs 0.664, P = .014) and comparable with the Korean/Japanese nomogram (0.659 vs 0.653, P = .255). A novel nomogram based on radiologic findings of IPMN is competitive for predicting risk of malignancy. This nomogram would be clinically helpful in circumstances where tumor markers are not available. The nomogram is freely available at http://statgen.snu.ac.kr/software/nomogramIPMN.",2021-04-02 +30458204,Second update of the International Registry of HLA Epitopes. I. The HLA-ABC Epitope Database.,"The International Registry of HLA Epitopes (http://www.epregistry.com.br) is a website-based resource for HLA epitopes important in transplant rejection and platelet transfusion refractoriness. Its primary goal is to document epitopes that are verified experimentally with specific antibodies. Such epitopes can be defined by single eplets and by eplets paired with certain polymorphic residues within a 15-Å radius, the dimension of the corresponding structural epitope. This report is an update of the HLA-ABC repertoire including descriptions of 72 antibody-verifications of epitopes defined by eplets and/or eplet pairs. The newly updated version 2.0 EpRegistry shows also the polymorphic residue compositions of structural epitopes corresponding to eplets shared between groups of alleles. At present, 151 eplets have not been antibody-verified, and we ranked them with a so-called ElliPro score as a potential predictor of immunogenicity. Sixty eplets with low ElliPro scores might be considered non-epitopes incapable of inducing specific antibodies.",2018-11-17 +27987167,The Bio-Analytic Resource for Plant Biology.,"Bioinformatic tools have become part of the way plant researchers undertake investigations. Large data sets encompassing genomes, transcriptomes, proteomes, epigenomes, and other ""-omes"" that have been generated in the past decade may be easily accessed with such tools, such that hypotheses may be generated at the click of a mouse. In this chapter, we'll cover the use of bioinformatic tools available at the Bio-Analytic Resource for Plant Biology at http://bar.utoronto.ca for exploring gene expression and coexpression patterns, undertaking promoter analyses, performing functional classification enrichment analyses for sets of genes, and examining protein-protein interactions. We also touch on some newer bioinformatic tools that allow integration of data from several sources for improved hypothesis generation, both for Arabidopsis and translationally. Most of the data sets come from Arabidopsis, but useful BAR tools for other species will be mentioned where appropriate.",2017-01-01 +33737684,"Proteomic blood profiling in mild, severe and critical COVID-19 patients.","The recent SARS-CoV-2 pandemic manifests itself as a mild respiratory tract infection in most individuals, leading to COVID-19 disease. However, in some infected individuals, this can progress to severe pneumonia and acute respiratory distress syndrome (ARDS), leading to multi-organ failure and death. This study explores the proteomic differences between mild, severe, and critical COVID-19 positive patients to further understand the disease progression, identify proteins associated with disease severity, and identify potential therapeutic targets. Blood protein profiling was performed on 59 COVID-19 mild (n = 26), severe (n = 9) or critical (n = 24) cases and 28 controls using the OLINK inflammation, autoimmune, cardiovascular and neurology panels. Differential expression analysis was performed within and between disease groups to generate nine different analyses. From the 368 proteins measured per individual, more than 75% were observed to be significantly perturbed in COVID-19 cases. Six proteins (IL6, CKAP4, Gal-9, IL-1ra, LILRB4 and PD-L1) were identified to be associated with disease severity. The results have been made readily available through an interactive web-based application for instant data exploration and visualization, and can be accessed at https://phidatalab-shiny.rosalind.kcl.ac.uk/COVID19/ . Our results demonstrate that dynamic changes in blood proteins associated with disease severity can potentially be used as early biomarkers to monitor disease severity in COVID-19 and serve as potential therapeutic targets.",2021-03-18 +33180722,"A Benchmark for Studying Diabetic Retinopathy: Segmentation, Grading, and Transferability.","People with diabetes are at risk of developing an eye disease called diabetic retinopathy (DR). This disease occurs when high blood glucose levels cause damage to blood vessels in the retina. Computer-aided DR diagnosis has become a promising tool for the early detection and severity grading of DR, due to the great success of deep learning. However, most current DR diagnosis systems do not achieve satisfactory performance or interpretability for ophthalmologists, due to the lack of training data with consistent and fine-grained annotations. To address this problem, we construct a large fine-grained annotated DR dataset containing 2,842 images (FGADR). Specifically, this dataset has 1,842 images with pixel-level DR-related lesion annotations, and 1,000 images with image-level labels graded by six board-certified ophthalmologists with intra-rater consistency. The proposed dataset will enable extensive studies on DR diagnosis. Further, we establish three benchmark tasks for evaluation: 1. DR lesion segmentation; 2. DR grading by joint classification and segmentation; 3. Transfer learning for ocular multi-disease identification. Moreover, a novel inductive transfer learning method is introduced for the third task. Extensive experiments using different state-of-the-art methods are conducted on our FGADR dataset, which can serve as baselines for future research. Our dataset will be released in https://csyizhou.github.io/FGADR/.",2021-03-02 +32043137,EP3: an ensemble predictor that accurately identifies type III secreted effectors.,"Type III secretion systems (T3SS) can be found in many pathogenic bacteria, such as Dysentery bacillus, Salmonella typhimurium, Vibrio cholera and pathogenic Escherichia coli. The routes of infection of these bacteria include the T3SS transferring a large number of type III secreted effectors (T3SE) into host cells, thereby blocking or adjusting the communication channels of the host cells. Therefore, the accurate identification of T3SEs is the precondition for the further study of pathogenic bacteria. In this article, a new T3SEs ensemble predictor was developed, which can accurately distinguish T3SEs from any unknown protein. In the course of the experiment, methods and models are strictly trained and tested. Compared with other methods, EP3 demonstrates better performance, including the absence of overfitting, strong robustness and powerful predictive ability. EP3 (an ensemble predictor that accurately identifies T3SEs) is designed to simplify the user's (especially nonprofessional users) access to T3SEs for further investigation, which will have a significant impact on understanding the progression of pathogenic bacterial infections. Based on the integrated model that we proposed, a web server had been established to distinguish T3SEs from non-T3SEs, where have EP3_1 and EP3_2. The users can choose the model according to the species of the samples to be tested. Our related tools and data can be accessed through the link http://lab.malab.cn/∼lijing/EP3.html.",2021-03-01 +24939129,"Using PeptideAtlas, SRMAtlas, and PASSEL: Comprehensive Resources for Discovery and Targeted Proteomics.","PeptideAtlas, SRMAtlas, and PASSEL are Web-accessible resources to support discovery and targeted proteomics research. PeptideAtlas is a multi-species compendium of shotgun proteomic data provided by the scientific community; SRMAtlas is a resource of high-quality, complete proteome SRM assays generated in a consistent manner for the targeted identification and quantification of proteins; and PASSEL is a repository that compiles and represents selected reaction monitoring data, all in an easy-to-use interface. The databases are generated from native mass spectrometry data files that are analyzed in a standardized manner including statistical validation of the results. Each resource offers search functionalities and can be queried by user-defined constraints; the query results are provided in tables or are graphically displayed. PeptideAtlas, SRMAtlas, and PASSEL are publicly available freely via the Web site http://www.peptideatlas.org. In this protocol, we describe the use of these resources, we highlight how to submit, search, collate and download data.",2014-06-17 +34047249,"Synthesis, characterization, and biological applications of pyrazole moiety bearing osmium(IV) complexes.","Osmium (IV) complexes with pyrazole nucleus containing ligands were synthesized. Os(IV) compounds were characterized using ESI-MS, ICP-OES, IR spectroscopy, electronic spectroscopy, conductance, and magnetic measurements. Whereas, ligands were characterized by heteronuclear spectroscopy, (1H and 13C), IR spectroscopy, and elemental analysis. All the compounds were tested for their potential to interact with HS-DNA by absorption titration, fluorescence spectroscopy, viscosity measurement, and docking study. The quenching constant and Stern Volmer constant values were calculated using fluorescence study. The synthesized compounds were studied for in-vitro bacteriostatic and cytotoxic activities. The cancer cell line studies of all the synthesized complexes were carried out on human lung cancer cells (A549).Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1921795 .",2021-01-01 +33961227,"Simple, Reliable, and Time-Efficient Manual Annotation of Bacterial Genomes with MAISEN.","Over the last 15 years, the costs of DNA sequencing have sharply fallen, effectively shifting the costs of DNA analysis from sequencing to bioinformatic curation and storage. A huge number of available DNA sequences (including genomes and metagenomes) resulted in the development of various tools for sequence annotation. While much effort has been invested into the development of automatic annotation pipelines, manual curation of their results is still necessary in order to obtain a reliable and strictly validated data. Unfortunately, due to its time-consuming nature, manual annotation is now rarely used.In this chapter, a protocol for efficient manual annotation of prokaryotic DNA sequences using a novel bioinformatic tool-MAISEN ( http://maisen.ddlemb.com ), is presented. MAISEN is a free, web-based tool designed to accelerate manual annotation, by providing the user with simple interface and precomputed alignments for each predicted feature. It was designed to be available for every scientist, regardless of their bioinformatic proficiency.",2021-01-01 +32094335,"Fox Insight collects online, longitudinal patient-reported outcomes and genetic data on Parkinson's disease.","Fox Insight is an online, longitudinal health study of people with and without Parkinson's disease with targeted enrollment set to at least 125,000 individuals. Fox Insight data is a rich data set facilitating discovery, validation, and reproducibility in Parkinson's disease research. The dataset is generated through routine longitudinal assessments (health and medical questionnaires evaluated at regular cycles), one-time questionnaires about environmental exposure and healthcare preferences, and genetic data collection. Qualified Researchers can explore, analyze, and download patient-reported outcomes (PROs) data and Parkinson's disease- related genetic variants at https://foxden.michaeljfox.org. The full Fox Insight genetic data set, including approximately 600,000 single nucleotide polymorphisms (SNPs), can be requested separately with institutional review and are described outside of this data descriptor.",2020-02-24 +32932246,Tracking atomic structure evolution during directed electron beam induced Si-atom motion in graphene via deep machine learning.,"Using electron beam manipulation, we enable deterministic motion of individual Si atoms in graphene along predefined trajectories. Structural evolution during the dopant motion was explored, providing information on changes of the Si atom neighborhood during atomic motion and providing statistical information of possible defect configurations. The combination of a Gaussian mixture model and principal component analysis applied to the deep learning-processed experimental data allowed disentangling of the atomic distortions for two different graphene sublattices. This approach demonstrates the potential of e-beam manipulation to create defect libraries of multiple realizations of the same defect and explore the potential of symmetry breaking physics. The rapid image analytics enabled via a deep learning network further empowers instrumentation for e-beam controlled atom-by-atom fabrication. The analysis described in the paper can be reproduced via an interactive Jupyter notebook at https://git.io/JJ3Bx.",2021-01-01 +27742820,PIECE 2.0: an update for the plant gene structure comparison and evolution database.,"PIECE (Plant Intron Exon Comparison and Evolution) is a web-accessible database that houses intron and exon information of plant genes. PIECE serves as a resource for biologists interested in comparing intron-exon organization and provides valuable insights into the evolution of gene structure in plant genomes. Recently, we updated PIECE to a new version, PIECE 2.0 (http://probes.pw.usda.gov/piece or http://aegilops.wheat.ucdavis.edu/piece). PIECE 2.0 contains annotated genes from 49 sequenced plant species as compared to 25 species in the previous version. In the current version, we also added several new features: (i) a new viewer was developed to show phylogenetic trees displayed along with the structure of individual genes; (ii) genes in the phylogenetic tree can now be also grouped according to KOG (The annotation of Eukaryotic Orthologous Groups) and KO (KEGG Orthology) in addition to Pfam domains; (iii) information on intronless genes are now included in the database; (iv) a statistical summary of global gene structure information for each species and its comparison with other species was added; and (v) an improved GSDraw tool was implemented in the web server to enhance the analysis and display of gene structure. The updated PIECE 2.0 database will be a valuable resource for the plant research community for the study of gene structure and evolution.",2016-10-13 +31139565,HNCDB: An Integrated Gene and Drug Database for Head and Neck Cancer.,"Head and neck cancer (HNC) is the sixth most common cancer worldwide. Over the last decade, an enormous amount of well-annotated gene and drug data has accumulated for HNC. However, a comprehensive repository is not yet available. Here, we constructed the Head and Neck Cancer Database (HNCDB: http://hncdb.cancerbio.info) using text mining followed by manual curation of the literature to collect reliable information on the HNC-related genes and drugs. The high-throughput gene expression data for HNC were also integrated into HNCDB. HNCDB includes the following three separate but closely related components: ""HNC GENE,"" ""Connectivity Map,"" and ""ANALYSIS."" The ""HNC GENE"" component contains comprehensive information for the 1,173 HNC-related genes manually curated from 2,564 publications. The ""Connectivity Map"" includes information on the potential connections between the 176 drugs manually curated from 2,032 publications and the 1,173 HNC-related genes. The ""ANALYSIS"" component allows users to conduct correlation, differential expression, and survival analyses in the 2,403 samples from 78 HNC gene expression datasets. Taken together, we believe that HNCDB will be of significant benefit for the HNC community and promote further advances for precision medicine research on HNC.",2019-05-14 +33354878,Industry payments to hospitalist physicians: a 5-year analysis of the Open Payments programme from 2014 to 2018.,We analysed Open Payments programme data (https://openpaymentsdata.cms.gov) on industry-to-physician payments to hospitalists for the years 2014 to 2018. Payments to hospitalists increased by 106.5% from 2014 to 2018 with food and beverage (38.5%) and compensation for services other than consulting (24.3%) being the highest-paid categories. Industry payment to hospitalists was highly skewed with top 10 hospitalists receiving more than 30% of the total payments during the study period. The most common drugs associated with payments were anticoagulant medications (apixaban and rivaroxaban). Industry seems to be spending a significant amount of money to increase awareness of medications among hospitalists. Identification of these trends and potential motives of industry spending is critical to address any potential physician bias.,2020-12-01 +25428371,The InterPro protein families database: the classification resource after 15 years.,"The InterPro database (http://www.ebi.ac.uk/interpro/) is a freely available resource that can be used to classify sequences into protein families and to predict the presence of important domains and sites. Central to the InterPro database are predictive models, known as signatures, from a range of different protein family databases that have different biological focuses and use different methodological approaches to classify protein families and domains. InterPro integrates these signatures, capitalizing on the respective strengths of the individual databases, to produce a powerful protein classification resource. Here, we report on the status of InterPro as it enters its 15th year of operation, and give an overview of new developments with the database and its associated Web interfaces and software. In particular, the new domain architecture search tool is described and the process of mapping of Gene Ontology terms to InterPro is outlined. We also discuss the challenges faced by the resource given the explosive growth in sequence data in recent years. InterPro (version 48.0) contains 36,766 member database signatures integrated into 26,238 InterPro entries, an increase of over 3993 entries (5081 signatures), since 2012.",2014-11-26 +31758726,"GBX2, as a tumor promoter in lung adenocarcinoma, enhances cells viability, invasion and migration by regulating the AKT/ERK signaling pathway.","

Background

Increasing evidence shows that gastrulation brain homeobox 2 (GBX2) is involved in multiple cancers. However, whether GBX2 has an effect on the lung adenocarcinoma remains unclear. In the present study, we investigated the functions of GBX2 on lung adenocarcinoma and explored the underlying mechanism.

Methods

Public data were obtained from the TCGA (https://cancergenome.nih.gov) and Oncomine (http://www.oncomine.org) databases. GBX2 expression and its prognostic value were analyzed by bioinformatics methods. Relative mRNA and protein expression levels of GBX2 in lung adenocarcinoma cell lines were evaluated via a quantitative reverse transcriptase polymerase chain reaction and western blotting. Lung adenocarcinoma cell lines LTEP-a-2 and A549, respectively, were selected for gain and loss function of GBX2 assays. Cell viability was detected by CCK8 and clone formation experiments. Cell invasion and migration were assessed by Transwell assays. The effect of GBX2 on the AKT/extracellular signal regulated kinase (ERK) pathway was tested by western blotting.

Results

Compared to adjacent tissues, GBX2 expression was up-regulated in lung adenocarcinoma tissues. High expression of GBX2 led to a poor survival and could be seen as an independent predictor for lung adenocarcinoma patients. Furthermore, down-regulation of GBX2 notably restrained the viability, invasion and migration abilities of A549 cells, whereas up-regulation of GBX2 in LTEP-a-2 cells presented the opposite outcomes. Furthermore, western blot indicated that down-regulation of GBX2 decreases the protein levels of phosphorylated (p)-AKT and p-ERK in A549 cells, whereas up-regulation of GBX2 shows the opposite effects in LTEP-a-2 cells.

Conclusions

The results of present study indicate that GBX2 acts a cancer-promoting role to accelerate cell proliferation, invasion and migration partly by modulation of the AKT/ERK pathway in lung adenocarcinoma.",2019-12-20 +33444218,Predicting dementia diagnosis from cognitive footprints in electronic health records: a case-control study protocol.,"

Introduction

Dementia is a group of disabling disorders that can be devastating for persons living with it and for their families. Data-informed decision-making strategies to identify individuals at high risk of dementia are essential to facilitate large-scale prevention and early intervention. This population-based case-control study aims to develop and validate a clinical algorithm for predicting dementia diagnosis, based on the cognitive footprint in personal and medical history.

Methods and analysis

We will use territory-wide electronic health records from the Clinical Data Analysis and Reporting System (CDARS) in Hong Kong between 1 January 2001 and 31 December 2018. All individuals who were at least 65 years old by the end of 2018 will be identified from CDARS. A random sample of control individuals who did not receive any diagnosis of dementia will be matched with those who did receive such a diagnosis by age, gender and index date with 1:1 ratio. Exposure to potential protective/risk factors will be included in both conventional logistic regression and machine-learning models. Established risk factors of interest will include diabetes mellitus, midlife hypertension, midlife obesity, depression, head injuries and low education. Exploratory risk factors will include vascular disease, infectious disease and medication. The prediction accuracy of several state-of-the-art machine-learning algorithms will be compared.

Ethics and dissemination

This study was approved by Institutional Review Board of The University of Hong Kong/Hospital Authority Hong Kong West Cluster (UW 18-225). Patients' records are anonymised to protect privacy. Study results will be disseminated through peer-reviewed publications. Codes of the resulted dementia risk prediction algorithm will be made publicly available at the website of the Tools to Inform Policy: Chinese Communities' Action in Response to Dementia project (https://www.tip-card.hku.hk/).",2020-11-19 +33564397,The EU one-stop-shop collection of publicly available information on COVID-19 in vitro diagnostic medical devices.,"The JRC COVID-19 In Vitro Diagnostic Devices and Test Methods Database, aimed to collect in a single place all publicly available information on performance of CE-marked in vitro diagnostic medical devices (IVDs) as well as in house laboratory-developed devices and related test methods for COVID-19, is here presented. The database, manually curated and regularly updated, has been developed as a follow-up to the Communication from the European Commission ""Guidelines on in vitro diagnostic tests and their performance"" of 15 April 2020 and is freely accessible at https://covid-19-diagnostics.jrc.ec.europa.eu/.",2020-11-03 +33142001,Machine learning models to predict length of stay and discharge destination in complex head and neck surgery.,"

Background

This study develops machine learning (ML) algorithms that use preoperative-only features to predict discharge-to-nonhome-facility (DNHF) and length-of-stay (LOS) following complex head and neck surgeries.

Methods

Patients undergoing laryngectomy or composite tissue excision followed by free tissue transfer were extracted from the 2005 to 2017 NSQIP database.

Results

Among the 2786 included patients, DNHF and mean LOS were 421 (15.1%) and 11.7 ± 8.8 days. Four classification models for predicting DNHF with high specificities (range, 0.80-0.84) were developed. The generalized linear and gradient boosting machine models performed best with receiver operating characteristic (ROC), accuracy, and negative predictive value (NPV) of 0.72-0.73, 0.75-0.76, and 0.88-0.89. Four regression models for predicting LOS in days were developed, where all performed similarly with mean absolute error and root mean-squared errors of 3.95-3.98 and 5.14-5.16. Both models were developed into an encrypted web-based interface: https://uci-ent.shinyapps.io/head-neck/.

Conclusion

Novel and proof-of-concept ML models to predict DNHF and LOS were developed and published as web-based interfaces.",2020-11-03 +33368787,GeneBreaker: Variant simulation to improve the diagnosis of Mendelian rare genetic diseases.,"Mendelian rare genetic diseases affect 5%-10% of the population, and with over 5300 genes responsible for ∼7000 different diseases, they are challenging to diagnose. The use of whole-genome sequencing (WGS) has bolstered the diagnosis rate significantly. The effective use of WGS relies on the ability to identify the disrupted gene responsible for disease phenotypes. This process involves genomic variant calling and prioritization, and is the beneficiary of improvements to sequencing technology, variant calling approaches, and increased capacity to prioritize genomic variants with potential pathogenicity. As analysis pipelines continue to improve, careful testing of their efficacy is paramount. However, real-life cases typically emerge anecdotally, and utilization of clinically sensitive and identifiable data for testing pipeline improvements is regulated and limiting. We identified the need for a gene-based variant simulation framework that can create mock rare disease scenarios, utilizing known pathogenic variants or through the creation of novel gene-disrupting variants. To fill this need, we present GeneBreaker, a tool that creates synthetic rare disease cases with utility for benchmarking variant calling approaches, testing the efficacy of variant prioritization, and as an educational mechanism for training diagnostic practitioners in the expanding field of genomic medicine. GeneBreaker is freely available at http://GeneBreaker.cmmt.ubc.ca.",2021-02-10 +32247535,Nonexudative Macular Neovascularization Supporting Outer Retina in Age-Related Macular Degeneration: A Clinicopathologic Correlation.,"

Purpose

Type 1 macular neovascularization (MNV) secondary to age-related macular degeneration (AMD) may sustain hypoxic and micronutrient-insufficient outer retinal cells compensatorily. We explored this hypothesis via histologic analysis of an eye with a shallow irregular retinal pigment epithelial elevation (SIRE) on OCT and good vision.

Design

Case study and clinicopathologic correlation.

Participant

A white woman with untreated nonexudative neovascular AMD and 20/30 visual acuity (left eye) and neovascular AMD (right eye), with 9 years' multimodal imaging before dying at 90 years of age.

Methods

The left eye was preserved 6.25 hours after death and prepared for submicrometer epoxy resin sections and transmission electron microscopy aligned to clinical OCT B-scans. Inside and outside the MNV area, layer thicknesses, phenotypes, and vascular density of native choriocapillaris and neovessels were measured. Lengths of choriocapillaries and intervening gaps in the index eye and in early AMD eyes and healthy eyes with similar age (n = 19 each) from the Project MACULA (Maculopathy Unveiled by Laminar Analysis) online histopathologic resource (http://projectmacula.cis.uab.edu/) were measured with custom software (Caps and Gaps).

Main outcome measures

Descriptive features, vascular density, histologic and OCT layer thicknesses, and distribution of choriocapillaries and intervening gaps.

Results

The SIRE correlated to a type 1 MNV that expanded slowly without evidence of exudation and with numerous choroidal vessels traversing Bruch's membrane defects, some visible on OCT. Tissue layers in and adjacent to the MNV area showed continuous RPE and characteristic AMD deposits. Capillary-like neovessels with fenestrations and caveolae resembling native choriocapillaris lined the retinal pigment epithelium (RPE) with a vascular density comparable with surrounding non-MNV areas. Relative to early AMD and healthy aged eyes, the index eye showed similar capillary lengths but larger gaps between vessels, indicating dropout. Outer nuclear layer thickness was preserved and showed less photoreceptor degeneration over areas of relative choriocapillaris health, including the type 1 MNV.

Conclusions

Eyes with nonexudative type 1 MNV in AMD may progress to exudation, yet this stable MNV complex supported outer retinal structure for 9 years. Distinguishing features were numerous connecting vessels, high density of neovessels, continuous RPE, and slow growth. Maintaining beneficial type 1 MNV may be a therapeutic strategy.",2020-01-29 +29059383,FlavorDB: a database of flavor molecules.,"Flavor is an expression of olfactory and gustatory sensations experienced through a multitude of chemical processes triggered by molecules. Beyond their key role in defining taste and smell, flavor molecules also regulate metabolic processes with consequences to health. Such molecules present in natural sources have been an integral part of human history with limited success in attempts to create synthetic alternatives. Given their utility in various spheres of life such as food and fragrances, it is valuable to have a repository of flavor molecules, their natural sources, physicochemical properties, and sensory responses. FlavorDB (http://cosylab.iiitd.edu.in/flavordb) comprises of 25,595 flavor molecules representing an array of tastes and odors. Among these 2254 molecules are associated with 936 natural ingredients belonging to 34 categories. The dynamic, user-friendly interface of the resource facilitates exploration of flavor molecules for divergent applications: finding molecules matching a desired flavor or structure; exploring molecules of an ingredient; discovering novel food pairings; finding the molecular essence of food ingredients; associating chemical features with a flavor and more. Data-driven studies based on FlavorDB can pave the way for an improved understanding of flavor mechanisms.",2018-01-01 +31611909,SCDevDB: A Database for Insights Into Single-Cell Gene Expression Profiles During Human Developmental Processes.,"Single-cell RNA-seq studies profile thousands of cells in developmental processes. Current databases for human single-cell expression atlas only provide search and visualize functions for a selected gene in specific cell types or subpopulations. These databases are limited to technical properties or visualization of single-cell RNA-seq data without considering the biological relations of their collected cell groups. Here, we developed a database to investigate single-cell gene expression profiling during different developmental pathways (SCDevDB). In this database, we collected 10 human single-cell RNA-seq datasets, split these datasets into 176 developmental cell groups, and constructed 24 different developmental pathways. SCDevDB allows users to search the expression profiles of the interested genes across different developmental pathways. It also provides lists of differentially expressed genes during each developmental pathway, T-distributed stochastic neighbor embedding maps showing the relationships between developmental stages based on these differentially expressed genes, Gene Ontology, and Kyoto Encyclopedia of Genes and Genomes analysis results of these differentially expressed genes. This database is freely available at https://scdevdb.deepomics.org.",2019-09-26 +32050905,GsmPlot: a web server to visualize epigenome data in NCBI.,"BACKGROUND:Epigenetic regulation is essential in regulating gene expression across a variety of biological processes. Many high-throughput sequencing technologies have been widely used to generate epigenetic data, such as histone modification, transcription factor binding sites, DNA modifications, chromatin accessibility, and etc. A large scale of epigenetic data is stored in NCBI Gene Expression Omnibus (GEO). However, it is a great challenge to reanalyze these large scale and complex data, especially for researchers who do not specialize in bioinformatics skills or do not have access to expensive computational infrastructure. RESULTS:GsmPlot can simply accept GSM IDs to automatically download NCBI data or can accept user's private bigwig files as input to plot the concerned data on promoters, exons or any other user-defined genome locations and generate UCSC visualization tracks. By linking public data repository and private data, GsmPlot can spark data-driven ideas and hence promote the epigenetic research. CONCLUSIONS:GsmPlot web server allows convenient visualization and efficient exploration of any NCBI epigenetic data in any genomic region without need of any bioinformatics skills or special computing resources. GsmPlot is freely available at https://gsmplot.deqiangsun.org/.",2020-02-12 +32846784,"The impact of intense nursing care in improving anxiety, depression, and quality of life in patients with liver cancer: A systematic review and meta-analysis.","

Background

Liver resection is a major, serious, and very delicate operation that should be done only by specialized, well-skilled, and experienced surgeons. However, the role of nurses, which has often been under-estimated, is also crucial for the success of the intervention or surgery. Intensive nursing care involves high quality nursing modes to achieve the expected goals of treatment smoothly and with less complications. In this analysis, we aimed to show the impact of intense nursing care in improving anxiety, depression, and quality of life in patients with intervention for liver cancers.

Methods

Data sources included EMBASE, MEDLINE, Web of Science, the Cochrane central, Google scholar, and http://www.ClinicalTrials.gov. Three authors independently extracted data from the selected original studies. The statistical analysis was carried out by the Cochrane based RevMan software. For dichotomous data, the number of events and the total number of participants were required and for the continuous data, mean, standard deviation as well as the total number of participants were required in the input for analysis. Odds ratios (OR) with 95% confidence intervals (CI) were used to represent the data following assessment.

Results

A total of 1205 participants with liver cancer enrolled between the years 2010 to 2018 were included in this analysis whereby 667 participants were assigned to an intensive nursing care. Our current analysis showed that most of the patients who were assigned to an intense nursing intervention were significantly very satisfied with their quality of life (OR: 4.07, 95% CI: 1.45 - 11.45; P = .008). However, a minor number of patients with liver cancer who were not assigned to intense nursing care were significantly dissatisfied with their quality of life with OR: 0.18, 95% CI: 0.04 - 0.77; P = .02. This analysis also showed that self-rating anxiety score (SAS) and self-rating depression score (SDS) were significantly in favor of the participants with intense nursing care with OR: - 7.66, 95% CI: [(-9.66) - (-5.66)]; P = .00001 and OR: -7.87, 95% CI: [(-8.43) - (-7.26)]; P = .00001 respectively. In addition, physical function (OR: 13.56, 95% CI: 12.39 - 14.74; P = .00001), and total activity score (OR: 16.58, 95% CI: 13.51 - 19.65; P = .00001) were also significantly in favor of an intense nursing care.

Conclusions

Our current analysis showed that intense nursing care significantly improved anxiety, depression, and quality of life following interventions in patients with liver cancers. Most of the patients with liver cancers who were assigned to an intense nursing care were very satisfied with their quality of life. However, this hypothesis should further be confirmed in larger nursing related studies based on patients with liver cancers.",2020-08-01 +31886250,Identification of Potential Biomarkers and Biological Pathways in Juvenile Dermatomyositis Based on miRNA-mRNA Network.,"

Objective

The aim of this study is to explore the potential pathogenesis of juvenile dermatomyositis by bioinformatics analysis of gene chips, which would screen the hub genes, identify potential biomarkers, and reveal the development mechanism of juvenile dermatomyositis.

Material and methods

We retrieved juvenile dermatomyositis's original expression microarray data of message RNAs (mRNAs) and microRNAs (miRNAs) from NCBI's Gene Expression Omnibus database (GEO, http://www.ncbi.nlm.nih.gov/geo/); through the R package of limma in Bioconductor, we can screen the differentially expressed miRNAs and mRNAs, and then we further analyzed the predicted target genes by the methods such as Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway enrichment analysis and miRNA-mRNA regulatory network construction and protein-protein interaction (PPI) network using Cytoscape 3.6.1.

Results

Compared with normal juvenile skin tissues, 6 upregulated microRNAs and 5 downregulated microRNAs were identified from 166 downregulated microRNAs and 58 upregulated microRNAs in juvenile dermatomyositis tissues. The enrichment pathways of differentially expressed microRNAs include cell adhesion molecules (CAMs), autoimmune thyroid disease, Type I diabetes mellitus, antigen and presentation, viral myocardium, graft-versus-host disease, and Kaposi sarcoma-associated herpes virus infection. By screening of microRNA-messenger RNA regulatory network and construction of PPI network map, three target miRNAs were identified, namely, miR-193b, miR-199b-5p, and miR-665.

Conclusion

We identified mir-193b, mir-199b-5p, and mir-6653 target miRNAs by exploring the miRNA-mRNA regulation network mechanism related to the pathogenesis of juvenile dermatomyositis, which will be of great significance for further study on the pathogenesis and targeted therapy of juvenile dermatomyositis.",2019-12-07 +34175254,Robot-assisted Versus Open Radical Cystectomy in Bladder Cancer: An Economic Evaluation Alongside a Multicentre Comparative Effectiveness Study.,"

Background

Open radical cystectomy (ORC) is regarded as the standard treatment for muscle-invasive bladder cancer, but robot-assisted radical cystectomy (RARC) is increasingly used in practice. A recent study showed that RARC resulted in slightly fewer minor but slightly more major complications, although the difference was not statistically significant. Some differences were found in secondary outcomes favouring either RARC or ORC. RARC use is expected to increase in coming years, which fuels the debate about whether RARC provides value for money.

Objective

To assess the cost-effectiveness of RARC compared to ORC in bladder cancer.

Design, setting, and participants

This economic evaluation was performed alongside a prospective multicentre comparative effectiveness study. We included 348 bladder cancer patients (ORC, n = 168; RARC, n = 180) from 19 Dutch hospitals.

Outcome measurements and statistical analysis

Over 1 yr, we assessed the incremental cost per quality-adjusted life year (QALY) gained from both healthcare and societal perspectives. We used single imputation nested in the bootstrap percentile method to assess missing data and uncertainty, and inverse probability of treatment weighting to control for potential bias. Deterministic sensitivity analyses were performed to explore the impact of various parameters on the cost difference.

Results and limitations

The mean healthcare cost per patient was €17 141 (95% confidence interval [CI] €15 791-€18 720) for ORC and €21 266 (95% CI €19 163-€23 650) for RARC. The mean societal cost per patient was €18 926 (95% CI €17 431-€22 642) for ORC and €24 896 (95% CI €21 925-€31 888) for RARC. On average, RARC patients gained 0.79 QALYs (95% CI 0.74-0.85) compared to 0.81 QALYs (95% CI 0.77-0.85) for ORC patients, resulting in a mean QALY difference of -0.02 (95% CI -0.05 to 0.02). Using a cost-effectiveness threshold of €80 000, RARC was cost-effective in 0.6% and 0.2% of the replications for the healthcare and societal perspectives, respectively.

Conclusions

RARC shows no difference in terms of QALYs, but is more expensive than ORC. Hence, RARC does not seem to provide value for money in comparison to ORC.

Patient summary

This study assessed the relation between costs and effects of robot-assisted surgery compared to open surgery for removal of the bladder in 348 Dutch patients with bladder cancer. We found that after 1 year, the two approaches were similarly effective according to a measure called quality-adjusted life years, but robot-assisted surgery was much more expensive. This trial was prospectively registered in the Netherlands Trial Register as NTR5362 (https://www.trialregister.nl/trial/5214).",2021-06-24 +32386292,GDASC: a GPU parallel-based web server for detecting hidden batch factors.,"

Summary

We developed GDASC, a web version of our former DASC algorithm implemented with GPU. It provides a user-friendly web interface for detecting batch factors. Based on the good performance of DASC algorithm, it is able to give the most accurate results. For two steps of DASC, data-adaptive shrinkage and semi-non-negative matrix factorization, we designed parallelization strategies facing convex clustering solution and decomposition process. It runs more than 50 times faster than the original version on the representative RNA sequencing quality control dataset. With its accuracy and high speed, this server will be a useful tool for batch effects analysis.

Availability and implementation

http://bioinfo.nankai.edu.cn/gdasc.php.

Contact

zhanghan@nankai.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +33737205,"French FastContext: A publicly accessible system for detecting negation, temporality and experiencer in French clinical notes.","The context of medical conditions is an important feature to consider when processing clinical narratives. NegEx and its extension ConText became the most well-known rule-based systems that allow determining whether a medical condition is negated, historical or experienced by someone other than the patient in English clinical text. In this paper, we present a French adaptation and enrichment of FastContext which is the most recent, n-trie engine-based implementation of the ConText algorithm. We compiled an extensive list of French lexical cues by automatic and manual translation and enrichment. To evaluate French FastContext, we manually annotated the context of medical conditions present in two types of clinical narratives: (i)death certificates and (ii)electronic health records. Results show good performance across different context values on both types of clinical notes (on average 0.93 and 0.86 F1, respectively). Furthermore, French FastContext outperforms previously reported French systems for negation detection when compared on the same datasets and it is the first implementation of contextual temporality and experiencer identification reported for French. Finally, French FastContext has been implemented within the SIFR Annotator: a publicly accessible Web service to annotate French biomedical text data (http://bioportal.lirmm.fr/annotator). To our knowledge, this is the first implementation of a Web-based ConText-like system in a publicly accessible platform allowing non-natural-language-processing experts to both annotate and contextualize medical conditions in clinical notes.",2021-03-15 +29043042,feedr and animalnexus.ca: A paired R package and user-friendly Web application for transforming and visualizing animal movement data from static stations.,"Radio frequency identification (RFID) provides a simple and inexpensive approach for examining the movements of tagged animals, which can provide information on species behavior and ecology, such as habitat/resource use and social interactions. In addition, tracking animal movements is appealing to naturalists, citizen scientists, and the general public and thus represents a tool for public engagement in science and science education. Although a useful tool, the large amount of data collected using RFID may quickly become overwhelming. Here, we present an R package (feedr) we have developed for loading, transforming, and visualizing time-stamped, georeferenced data, such as RFID data collected from static logger stations. Using our package, data can be transformed from raw RFID data to visits, presence (regular detections by a logger over time), movements between loggers, displacements, and activity patterns. In addition, we provide several conversion functions to allow users to format data for use in functions from other complementary R packages. Data can also be visualized through static or interactive maps or as animations over time. To increase accessibility, data can be transformed and visualized either through R directly, or through the companion site: http://animalnexus.ca, an online, user-friendly, R-based Shiny Web application. This system can be used by professional and citizen scientists alike to view and study animal movements. We have designed this package to be flexible and to be able to handle data collected from other stationary sources (e.g., hair traps, static very high frequency (VHF) telemetry loggers, observations of marked individuals in colonies or staging sites), and we hope this framework will become a meeting point for science, education, and community awareness of the movements of animals. We aim to inspire citizen engagement while simultaneously enabling robust scientific analysis.",2017-08-30 +30407529,BioSamples database: an updated sample metadata hub.,"The BioSamples database at EMBL-EBI provides a central hub for sample metadata storage and linkage to other EMBL-EBI resources. BioSamples has recently undergone major changes, both in terms of data content and supporting infrastructure. The data content has more than doubled from around 2 million samples in 2014 to just over 5 million samples in 2018. Fast, reciprocal data exchange was fully established between sister Biosample databases and other INSDC partners, enabling a worldwide common representation and centralization of sample metadata. The BioSamples platform has been upgraded to accommodate anticipated increases in the number of submissions via GA4GH driver projects such as the Human Cell Atlas and the EGA, as well as from mirroring of NCBI dbGaP data. The BioSamples database is now the authoritative repository for all INSDC sample metadata, an ELIXIR Deposition Database for Biomolecular Data and the EMBL-EBI sample metadata hub. To support faster turnaround for sample submission, and to increase scalability and resilience, we have upgraded the BioSamples database backend storage, APIs and user interface. Finally, the website has been redesigned to allow search and retrieval of records based on specific filters, such as 'disease' or 'organism'. These changes are targeted at answering current use cases as well as providing functionalities for future emerging and anticipated developments. Availability: The BioSamples database is freely available at http://www.ebi.ac.uk/biosamples. Content is distributed under the EMBL-EBI Terms of Use available at https://www.ebi.ac.uk/about/terms-of-use.",2019-01-01 +32952940,UMI-Gen: A UMI-based read simulator for variant calling evaluation in paired-end sequencing NGS libraries.,"

Motivation

With Next Generation Sequencing becoming more affordable every year, NGS technologies asserted themselves as the fastest and most reliable way to detect Single Nucleotide Variants (SNV) and Copy Number Variations (CNV) in cancer patients. These technologies can be used to sequence DNA at very high depths thus allowing to detect abnormalities in tumor cells with very low frequencies. Multiple variant callers are publicly available and are usually efficient at calling out variants. However, when frequencies begin to drop under 1%, the specificity of these tools suffers greatly as true variants at very low frequencies can be easily confused with sequencing or PCR artifacts. The recent use of Unique Molecular Identifiers (UMI) in NGS experiments has offered a way to accurately separate true variants from artifacts. UMI-based variant callers are slowly replacing raw-read based variant callers as the standard method for an accurate detection of variants at very low frequencies. However, benchmarking done in the tools publication are usually realized on real biological data in which real variants are not known, making it difficult to assess their accuracy.

Results

We present UMI-Gen, a UMI-based read simulator for targeted sequencing paired-end data. UMI-Gen generates reference reads covering the targeted regions at a user customizable depth. After that, using a number of control files, it estimates the background error rate at each position and then modifies the generated reads to mimic real biological data. Finally, it will insert real variants in the reads from a list provided by the user.

Availability

The entire pipeline is available at https://gitlab.com/vincent-sater/umigen under MIT license.",2020-08-27 +33135992,First Report of Alfalfa mosaic virus in Chayote in Italy. ,"Chayote (Sechium edule (Jacq.) Sw.) is a vigorous perennial and climbing cucurbits, native to Mesoamerica, and cultivated for alimentary purposes in the American continent, Australia, New Zealand, South Europe, Asia and Africa. During spring 2019, some chayote plants showing bright yellow vein banding rings and lines were observed in a private garden in South Italy (Campania region). Symptoms coalesced in some leaves, covering almost the whole foliar area. Double-stranded RNAs were extracted from symptomatic leaves of a single chayote plant and reverse-transcribed, randomly amplified, and submitted to Illumina sequencing (Marais et al., 2018). Reads were assembled using CLC Genomics Workbench 11.1 (http://www.clcbio.com). Contigs were then annotated by Blastn and Blastx comparison with the Genbank database, which allowed the identification of eight contigs of between 380 and 980 nucleotides sharing significant identity with alfalfa mosaic virus (AMV) genomic RNAs. No other viral contigs were identified. Mapping of reads on AMV genomic RNAs identified 4,209 AMV reads (1.26% of total reads) and allowed the scaffolding of the contigs into three scaffolds corresponding to the three AMV genomic RNAs. To complete the sequence of the AMV chayote isolate genome (named See-1), primers were designed from the contig sequences and used to amplify RACE PCR products spanning the 5' and 3' terminal regions of the three genomic RNAs using the SMARTer™ RACE cDNA Amplification Kit (Clontech, China). All amplicons were cloned into the pGEM-T vector (Promega, USA) and sequenced (three clones for each amplicon) by Microsynth Seqlab (Microsynth AG, Switzerland). Finally, the complete genomic sequences of the three RNAs were assembled by MacVector 17.5 (MacVector Inc., USA). The RNA1, RNA2 and RNA3 of See-1 are 3,643, 2,593 and 2,037 nt respectively (GenBank accession Nos. MT093209 to MT093211), and share the highest nt sequence identity with the RNA1 and RNA3 of AMV isolate (HZ) from tobacco (99.5% for RNA1, HQ316635; 98.7% for RNA3, HQ316637) and with the RNA2 of isolate AMV-Gym from Gynostemma pentaphyllum (98.1%, MH332898), both from China. AMV isolate See-1 was classified as belonging to subgroup I based on the presence of a BamH I and two AvaII sites in the CP ORF (Parrella et al., 2000). Reverse transcription polymerase chain reaction, using primers targeting the CP gene (Parrella et al., 2000), confirmed AMV infection in three symptomatic cayote plants including that used for Illumina sequencing, with 100% of nt sequence identity of amplicons. Three plants each of Chenopodium amaranticolor, Nicotiana benthamiana and Solanum lycopersicon were mechanically inoculated with sap from isolate See-1 infected plant, leading to the appearance of typical AMV symptoms in all three hosts ten days post-inoculation (Jaspars & Bos, 1980). This note describes the first detection of AMV in cayote in Italy and, to the best of our knowledge, in the world. In some areas of Southern Italy, climatic conditions are favorable enough to allow chayote development in the wild. Further studies would be desirable to determine the distribution and incidence of AMV in chayote and to understand the possibility that this species may play a role in AMV epidemiology, representing a threat to other susceptible crops.",2020-11-02 +32026396,The Biological Structure Model Archive (BSM-Arc): an archive for in silico models and simulations.,"We present the Biological Structure Model Archive (BSM-Arc, https://bsma.pdbj.org), which aims to collect raw data obtained via in silico methods related to structural biology, such as computationally modeled 3D structures and molecular dynamics trajectories. Since BSM-Arc does not enforce a specific data format for the raw data, depositors are free to upload their data without any prior conversion. Besides uploading raw data, BSM-Arc enables depositors to annotate their data with additional explanations and figures. Furthermore, via our WebGL-based molecular viewer Molmil, it is possible to recreate 3D scenes as shown in the corresponding scientific article in an interactive manner. To submit a new entry, depositors require an ORCID ID to login, and to finally publish the data, an accompanying peer-reviewed paper describing the work must be associated with the entry. Submitting their data enables researchers to not only have an external backup but also provide an opportunity to promote their work via an interactive platform and to provide third-party researchers access to their raw data.",2020-02-05 +30091980,Characterizing genetic and environmental influences on variable DNA methylation using monozygotic and dizygotic twins.,"Variation in DNA methylation is being increasingly associated with health and disease outcomes. Although DNA methylation is hypothesized to be a mechanism by which both genetic and non-genetic factors can influence the regulation of gene expression, little is known about the extent to which DNA methylation at specific sites is influenced by heritable as well as environmental factors. We quantified DNA methylation in whole blood at age 18 in a birth cohort of 1,464 individuals comprising 426 monozygotic (MZ) and 306 same-sex dizygotic (DZ) twin pairs. Site-specific levels of DNA methylation were more strongly correlated across the genome between MZ than DZ twins. Structural equation models revealed that although the average contribution of additive genetic influences on DNA methylation across the genome was relatively low, it was notably elevated at the highly variable sites characterized by intermediate levels of DNAm that are most relevant for epigenetic epidemiology. Sites at which variable DNA methylation was most influenced by genetic factors were significantly enriched for DNA methylation quantitative trait loci (mQTL) effects, and overlapped with sites where inter-individual variation correlates across tissues. Finally, we show that DNA methylation at sites robustly associated with environmental exposures such as tobacco smoking and obesity is also influenced by additive genetic effects, highlighting the need to control for genetic background in analyses of exposure-associated DNA methylation differences. Estimates of the contribution of genetic and environmental influences to DNA methylation at all sites profiled in this study are available as a resource for the research community (http://www.epigenomicslab.com/online-data-resources).",2018-08-09 +31702846,ConSurf-DB: An accessible repository for the evolutionary conservation patterns of the majority of PDB proteins.,"Patterns observed by examining the evolutionary relationships among proteins of common origin can reveal the structural and functional importance of specific residue positions. In particular, amino acids that are highly conserved (i.e., their positions evolve at a slower rate than other positions) are particularly likely to be of biological importance, for example, for ligand binding. ConSurf is a bioinformatics tool for accurately estimating the evolutionary rate of each position in a protein family. Here we introduce a new release of ConSurf-DB, a database of precalculated ConSurf evolutionary conservation profiles for proteins of known structure. ConSurf-DB provides high-accuracy estimates of the evolutionary rates of the amino acids in each protein. A reliable estimate of a query protein's evolutionary rates depends on having a sufficiently large number of effective homologues (i.e., nonredundant yet sufficiently similar). With current sequence data, ConSurf-DB covers 82% of the PDB proteins. It will be updated on a regular basis to ensure that coverage remains high-and that it might even increase. Much effort was dedicated to improving the user experience. The repository is available at https://consurfdb.tau.ac.il/. BROADER AUDIENCE: By comparing a protein to other proteins of similar origin, it is possible to determine the extent to which each amino acid position in the protein evolved slowly or rapidly. A protein's evolutionary profile can provide valuable insights: For example, amino acid positions that are highly conserved (i.e., evolved slowly) are particularly likely to be of structural and/or functional importance, for example, for ligand binding and catalysis. We introduce here a new and improved version of ConSurf-DB, a continually updated database that provides precalculated evolutionary profiles of proteins with known structure.",2019-11-22 +33161739,How the zoonotic origins of SARS-CoV-2 ensure its survival as a human disease.,"In December 2019, a new species of coronavirus (SARS-CoV-2) was identified in a number of patients presenting with pneumonias of unknown aetiology in WuHan Province, China. Early epidemiological indications were of a zoonotic origin: many of the initial patients confirmed contact with a local wet market and the genomic sequencing showed similar characteristics with coronaviruses known to be carried by bats. The theory of subsequent human to human transmission became evident once global epidemiological reporting of COVID infection was established. Confirmation of the origins of infections caused by SARS-CoV-2 was enabled by the early sharing of the initial genomic sequence by China in January 2020 and since developed collaboratively on a globally accessible database, supported by the World Health Organization (https://tinyurl.com/rj32fp3).",2020-11-01 +33280581,A Quick Guide to Small-Molecule Inhibitors of Eukaryotic Protein Synthesis.,"Eukaryotic ribosome and cap-dependent translation are attractive targets in the antitumor, antiviral, anti-inflammatory, and antiparasitic therapies. Currently, a broad array of small-molecule drugs is known that specifically inhibit protein synthesis in eukaryotic cells. Many of them are well-studied ribosome-targeting antibiotics that block translocation, the peptidyl transferase center or the polypeptide exit tunnel, modulate the binding of translation machinery components to the ribosome, and induce miscoding, premature termination or stop codon readthrough. Such inhibitors are widely used as anticancer, anthelmintic and antifungal agents in medicine, as well as fungicides in agriculture. Chemicals that affect the accuracy of stop codon recognition are promising drugs for the nonsense suppression therapy of hereditary diseases and restoration of tumor suppressor function in cancer cells. Other compounds inhibit aminoacyl-tRNA synthetases, translation factors, and components of translation-associated signaling pathways, including mTOR kinase. Some of them have antidepressant, immunosuppressive and geroprotective properties. Translation inhibitors are also used in research for gene expression analysis by ribosome profiling, as well as in cell culture techniques. In this article, we review well-studied and less known inhibitors of eukaryotic protein synthesis (with the exception of mitochondrial and plastid translation) classified by their targets and briefly describe the action mechanisms of these compounds. We also present a continuously updated database (http://eupsic.belozersky.msu.ru) that currently contains information on 370 inhibitors of eukaryotic protein synthesis.",2020-11-01 +33247932,BarleyVarDB: a database of barley genomic variation. ,"Barley (Hordeum vulgare L.) is one of the first domesticated grain crops and represents the fourth most important cereal source for human and animal consumption. BarleyVarDB is a database of barley genomic variation. It can be publicly accessible through the website at http://146.118.64.11/BarleyVar. This database mainly provides three sets of information. First, there are 57 754 224 single nuclear polymorphisms (SNPs) and 3 600 663 insertions or deletions (InDels) included in BarleyVarDB, which were identified from high-coverage whole genome sequencing of 21 barley germplasm, including 8 wild barley accessions from 3 barley evolutionary original centers and 13 barley landraces from different continents. Second, it uses the latest barley genome reference and its annotation information publicly accessible, which has been achieved by the International Barley Genome Sequencing Consortium (IBSC). Third, 522 212 whole genome-wide microsatellites/simple sequence repeats (SSRs) were also included in this database, which were identified in the reference barley pseudo-molecular genome sequence. Additionally, several useful web-based applications are provided including JBrowse, BLAST and Primer3. Users can design PCR primers to asses polymorphic variants deposited in this database and use a user-friendly interface for accessing the barley reference genome. We envisage that the BarleyVarDB will benefit the barley genetic research community by providing access to all publicly available barley genomic variation information and barley reference genome as well as providing them with an ultra-high density of SNP and InDel markers for molecular breeding and identification of functional genes with important agronomic traits in barley. Database URL: http://146.118.64.11/BarleyVar.",2020-11-01 +30272193,BACTOME-a reference database to explore the sequence- and gene expression-variation landscape of Pseudomonas aeruginosa clinical isolates.,"Extensive use of next-generation sequencing (NGS) for pathogen profiling has the potential to transform our understanding of how genomic plasticity contributes to phenotypic versatility. However, the storage of large amounts of NGS data and visualization tools need to evolve to offer the scientific community fast and convenient access to these data. We introduce BACTOME as a database system that links aligned DNA- and RNA-sequencing reads of clinical Pseudomonas aeruginosa isolates with clinically relevant pathogen phenotypes. The database allows data extraction for any single isolate, gene or phenotype as well as data filtering and phenotypic grouping for specific research questions. With the integration of statistical tools we illustrate the usefulness of a relational database structure for the identification of phenotype-genotype correlations as an essential part of the discovery pipeline in genomic research. Furthermore, the database provides a compilation of DNA sequences and gene expression values of a plethora of clinical isolates to give a consensus DNA sequence and consensus gene expression signature. Deviations from the consensus thereby describe the genomic landscape and the transcriptional plasticity of the species P. aeruginosa. The database is available at https://bactome.helmholtz-hzi.de.",2019-01-01 +34160246,"In Utero Exposure to Heavy Metals and Trace Elements and Childhood Blood Pressure in a U.S. Urban, Low-Income, Minority Birth Cohort.","

Background

In utero exposure to heavy metals lead (Pb), mercury (Hg), and cadmium (Cd) may be associated with higher childhood blood pressure (BP), whereas trace elements selenium (Se) and manganese (Mn) may have protective antioxidant effects that modify metal-BP associations.

Objectives

We examined the individual and joint effects of in utero exposure to Pb, Hg, Cd, Se, and Mn on childhood BP.

Methods

We used data from the Boston Birth Cohort (enrolled 2002-2013). We measured heavy metals and trace elements in maternal red blood cells collected 24-72 h after delivery. We calculated child BP percentile per the 2017 American Academy of Pediatrics Clinical Practice Guideline. We used linear regression models to estimate the association of each metal, and Bayesian kernel machine regression (BKMR) to examine metal coexposures, with child BP between 3 to 15 years of age.

Results

Our analytic sample comprised 1,194 mother-infant pairs (61% non-Hispanic Black, 20% Hispanic). Hg and Pb were not associated with child systolic BP (SBP). Se and Mn were inversely associated with child SBP percentiles, which, on average, were 6.23 points lower with a doubling of Se (95% CI: -11.51, -0.96) and 2.62 points lower with a doubling of Mn (95% CI: -5.20, -0.04). BKMR models showed similar results. Although Cd was not associated with child SBP overall, the inverse association between Mn and child SBP was stronger at higher levels of Cd (p-interaction=0.04). Consistent with this finding, in utero exposure to cigarette smoke modified the Mn-child SBP association. Among children whose mothers smoked during pregnancy, a doubling of Mn was associated with a 10.09-point reduction in SBP percentile (95% CI: -18.03, -2.15), compared with a 1.49-point reduction (95% CI: -4.21, 1.24) in children whose mothers did not smoke during pregnancy (p-interaction=0.08).

Conclusion

Se and Mn concentrations in maternal red blood cells collected 24-72 h after delivery were associated with lower child SBP at 3 to 15 years of age. There was an interaction between Mn and Cd on child SBP, whereby the protective association of Mn on child SBP was stronger among mothers who had higher Cd. The association of Mn and child SBP was also modified by maternal cigarette smoking-a source of Cd-during pregnancy. Optimizing in utero Se levels, as well as Mn levels in women who had high Cd or smoked during pregnancy, may protect offspring from developing high BP during childhood. https://doi.org/10.1289/EHP8325.",2021-06-23 +33846532,"COVID-19 information retrieval with deep-learning based semantic search, question answering, and abstractive summarization.","The COVID-19 global pandemic has resulted in international efforts to understand, track, and mitigate the disease, yielding a significant corpus of COVID-19 and SARS-CoV-2-related publications across scientific disciplines. Throughout 2020, over 400,000 coronavirus-related publications have been collected through the COVID-19 Open Research Dataset. Here, we present CO-Search, a semantic, multi-stage, search engine designed to handle complex queries over the COVID-19 literature, potentially aiding overburdened health workers in finding scientific answers and avoiding misinformation during a time of crisis. CO-Search is built from two sequential parts: a hybrid semantic-keyword retriever, which takes an input query and returns a sorted list of the 1000 most relevant documents, and a re-ranker, which further orders them by relevance. The retriever is composed of a deep learning model (Siamese-BERT) that encodes query-level meaning, along with two keyword-based models (BM25, TF-IDF) that emphasize the most important words of a query. The re-ranker assigns a relevance score to each document, computed from the outputs of (1) a question-answering module which gauges how much each document answers the query, and (2) an abstractive summarization module which determines how well a query matches a generated summary of the document. To account for the relatively limited dataset, we develop a text augmentation technique which splits the documents into pairs of paragraphs and the citations contained in them, creating millions of (citation title, paragraph) tuples for training the retriever. We evaluate our system ( http://einstein.ai/covid ) on the data of the TREC-COVID information retrieval challenge, obtaining strong performance across multiple key information retrieval metrics.",2021-04-12 +30357349,DDBJ update: the Genomic Expression Archive (GEA) for functional genomics data.,"The Genomic Expression Archive (GEA) for functional genomics data from microarray and high-throughput sequencing experiments has been established at the DNA Data Bank of Japan (DDBJ) Center (https://www.ddbj.nig.ac.jp), which is a member of the International Nucleotide Sequence Database Collaboration (INSDC) with the US National Center for Biotechnology Information and the European Bioinformatics Institute. The DDBJ Center collects nucleotide sequence data and associated biological information from researchers and also services the Japanese Genotype-phenotype Archive (JGA) with the National Bioscience Database Center for collecting human data. To automate the submission process, we have implemented the DDBJ BioSample validator which checks submitted records, auto-corrects their format, and issues error messages and warnings if necessary. The DDBJ Center also operates the NIG supercomputer, prepared for analyzing large-scale genome sequences. We now offer a secure platform specifically to handle personal human genomes. This report describes database activities for INSDC and JGA over the past year, the newly launched GEA, submission, retrieval, and analysis services available in our supercomputer system and their recent developments.",2019-01-01 +32442297,3D-GNOME 2.0: a three-dimensional genome modeling engine for predicting structural variation-driven alterations of chromatin spatial structure in the human genome.,"Structural variants (SVs) that alter DNA sequence emerge as a driving force involved in the reorganisation of DNA spatial folding, thus affecting gene transcription. In this work, we describe an improved version of our integrated web service for structural modeling of three-dimensional genome (3D-GNOME), which now incorporates all types of SVs to model changes to the reference 3D conformation of chromatin. In 3D-GNOME 2.0, the default reference 3D genome structure is generated using ChIA-PET data from the GM12878 cell line and SVs data are sourced from the population-scale catalogue of SVs identified by the 1000 Genomes Consortium. However, users may also submit their own structural data to set a customized reference genome structure, and/or a custom input list of SVs. 3D-GNOME 2.0 provides novel tools to inspect, visualize and compare 3D models for regions that differ in terms of their linear genomic sequence. Contact diagrams are displayed to compare the reference 3D structure with the one altered by SVs. In our opinion, 3D-GNOME 2.0 is a unique online tool for modeling and analyzing conformational changes to the human genome induced by SVs across populations. It can be freely accessed at https://3dgnome.cent.uw.edu.pl/.",2020-07-01 +26503249,BDB: biopanning data bank.,"The BDB database (http://immunet.cn/bdb) is an update of the MimoDB database, which was previously described in the 2012 Nucleic Acids Research Database issue. The rebranded name BDB is short for Biopanning Data Bank, which aims to be a portal for biopanning results of the combinatorial peptide library. Last updated in July 2015, BDB contains 2904 sets of biopanning data collected from 1322 peer-reviewed papers. It contains 25,786 peptide sequences, 1704 targets, 492 known templates, 447 peptide libraries and 310 crystal structures of target-template or target-peptide complexes. All data stored in BDB were revisited, and information on peptide affinity, measurement method and procedures was added for 2298 peptides from 411 sets of biopanning data from 246 published papers. In addition, a more professional and user-friendly web interface was implemented, a more detailed help system was designed, and a new on-the-fly data visualization tool and a series of tools for data analysis were integrated. With these new data and tools made available, we expect that the BDB database would become a major resource for scholars using phage display, with improved utility for biopanning and related scientific communities.",2015-10-25 +31694550,WheatCRISPR: a web-based guide RNA design tool for CRISPR/Cas9-mediated genome editing in wheat.,"BACKGROUND:CRISPR/Cas9 gene editing has become a revolutionary technique for crop improvement as it can facilitate fast and efficient genetic changes without the retention of transgene components in the final plant line. Lack of robust bioinformatics tools to facilitate the design of highly specific functional guide RNAs (gRNAs) and prediction of off-target sites in wheat is currently an obstacle to effective application of CRISPR technology to wheat improvement. DESCRIPTION:We have developed a web-based bioinformatics tool to design specific gRNAs for genome editing and transcriptional regulation of gene expression in wheat. A collaborative study between the Broad Institute and Microsoft Research used large-scale empirical evidence to devise algorithms (Doech et al., 2016, Nature Biotechnology 34, 184-191) for predicting the on-target activity and off-target potential of CRISPR/SpCas9 (Streptococcus pyogenes Cas9). We applied these prediction models to determine on-target specificity and potential off-target activity for individual gRNAs targeting specific loci in the wheat genome. The genome-wide gRNA mappings and the corresponding Doench scores predictive of the on-target and off-target activities were used to create a gRNA database which was used as a data source for the web application termed WheatCRISPR. CONCLUSION:The WheatCRISPR tool allows researchers to browse all possible gRNAs targeting a gene or sequence of interest and select effective gRNAs based on their predicted high on-target and low off-target activity scores, as well as other characteristics such as position within the targeted gene. It is publicly available at https://crispr.bioinfo.nrc.ca/WheatCrispr/ .",2019-11-06 +33328504,Autonomous molecule generation using reinforcement learning and docking to develop potential novel inhibitors.,"We developed a computational method named Molecule Optimization by Reinforcement Learning and Docking (MORLD) that automatically generates and optimizes lead compounds by combining reinforcement learning and docking to develop predicted novel inhibitors. This model requires only a target protein structure and directly modifies ligand structures to obtain higher predicted binding affinity for the target protein without any other training data. Using MORLD, we were able to generate potential novel inhibitors against discoidin domain receptor 1 kinase (DDR1) in less than 2 days on a moderate computer. We also demonstrated MORLD's ability to generate predicted novel agonists for the D4 dopamine receptor (D4DR) from scratch without virtual screening on an ultra large compound library. The free web server is available at http://morld.kaist.ac.kr .",2020-12-16 +33161371,"Discovery of novel modulators targeting human TRPC5: Docking-based virtual screening, molecular dynamics simulation and binding affinity predication.","Canonical transient receptor potential channel 5 (TRPC5) plays a key role in the regulation of central nervous system, cardiovascular system, kidney disease, cancer, and could be also involved in liver function, arthritis, diabetes-associated complications and so on. However, evidence of TRPC5 function on cellular or organismic levels is sparse. There is still a need for identifying novel and efficient TRPC5 channel modulators to study TRPC5 function. In this study, based on the hTRPC5 structure obtained by homology modeling and the predicted binding site, we have performed virtual screening of 212,736 compounds from the specs database(http://www.specs.net) to find potential hTRPC5 modulators. Lipinski and Veber rules, ADMET (Absorption, Distribution, Metabolism, Excretion, Toxicity) and PAINS (Pan Assay Interference structures) filters were used to screen the large database. Further, multi-software combination docking, cluster analysis and interaction analysis were used to select 20 potential active candidates with novel skeleton. 4 Hits, bearing appreciable binding affinity with hTRPC5 were selected for 40ns all-atom molecular dynamics (MD) simulations under explicit water conditions. The MD simulation results suggested that the 4 Hits binding induces a slight structural change and stabilizes the hTRPC5 structure. In addition, decomposition free energy demonstrated that residues TRP434, LEU437, MET438, ALA441, ILE484, ILE487, LEU488, LEU491, LEU515, ILE517, LEU518, LEU521, PHE531, THR607, VAL610, ILE611, VAL615 played the critical role on system stability. 4 Hits, as potential modulators of hTRPC5, may be potential leads to develop effective therapeutics hTRPC5-associated diseases.",2020-10-31 +30354114,Nuclear Receptors Database Including Negative Data (NR-DBIND): A Database Dedicated to Nuclear Receptors Binding Data Including Negative Data and Pharmacological Profile.,"Nuclear receptors (NRs) are transcription factors that regulate gene expression in various physiological processes through their interactions with small hydrophobic molecules. They constitute an important class of targets for drugs and endocrine disruptors and are widely studied for both health and environment concerns. Since the integration of negative data can be critical for accurate modeling of ligand activity profiles, we manually collected and annotated NRs interaction data (positive and negative) through a sharp review of the corresponding literature. 15 116 positive and negative interactions data are provided for 28 NRs together with 593 PDB structures in the freely available Nuclear Receptors Database Including Negative Data ( http://nr-dbind.drugdesign.fr ). The NR-DBIND contains the most extensive information about interaction data on NRs, which should bring valuable information to chemists, biologists, pharmacologists and toxicologists.",2018-11-06 +32881514,Robust Accurate Identification and Biomass Estimates of Microorganisms via Tandem Mass Spectrometry.,"Rapid and accurate identification of microorganisms and estimation of their biomasses are of extreme importance to public health. Mass spectrometry has become an important technique for these purposes. Previously we published a workflow named Microorganism Classification and Identification (MiCId v.12.26.2017) that was shown to perform no worse than other workflows. This manuscript presents MiCId v.12.13.2018 that, in comparison with the earlier version v.12.26.2017, allows for biomass estimates, provides more accurate microorganism identifications (better controls the number of false positives), and is robust against database size increase. This significant advance is made possible by several new ingredients introduced: first, we apply a modified expectation-maximization method to compute for each taxon considered a prior probability, which can be used for biomass estimate; second, we introduce a new concept called ownership, through which the participation ratio is computed and use it as the number of taxa to be kept within a cluster of closely related taxa; third, based on confidently identified peptides, we calculate for each taxon its degree of independence from the rest of taxa considered to determine whether or not to split this taxon off the cluster. Using 270 data files, each containing a large number of MS/MS spectra, we show that, in comparison with v.12.26.2017, version v.12.13.2018 yields superior retrieval results. We also show that MiCId v.12.13.2018 can estimate species biomass reasonably well. The new MiCId v.12.13.2018, designed to run in Linux environment, is freely available for download at https://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads.html.",2019-11-20 +33416863,Recognition of small molecule-RNA binding sites using RNA sequence and structure. ,"RNA molecules become attractive small-molecule drug targets to treat disease in recent years. Computer-aided drug design can be facilitated by detecting the RNA sites that bind small molecules. However, very limited progress has been reported for the prediction of small molecule-RNA binding sites. We developed a novel method RNAsite to predict small molecule-RNA binding sites using sequence profile- and structure-based descriptors. RNAsite was shown to be competitive with the state-of-the-art methods on the experimental structures of two independent test sets. When predicted structure models were used, RNAsite outperforms other methods by a large margin. The possibility of improving RNAsite by geometry-based binding pocket detection was investigated. The influence of RNA structure's flexibility and the conformational changes caused by ligand binding on RNAsite were also discussed. RNAsite is anticipated to be a useful tool for the design of RNA-targeting small molecule drugs. http://yanglab.nankai.edu.cn/RNAsite. Supplementary data are available at Bioinformatics online.",2021-01-08 +32496546,SNPnexus: a web server for functional annotation of human genome sequence variation (2020 update).,"SNPnexus is a web-based annotation tool for the analysis and interpretation of both known and novel sequencing variations. Since its last release, SNPnexus has received continual updates to expand the range and depth of annotations provided. SNPnexus has undergone a complete overhaul of the underlying infrastructure to accommodate faster computational times. The scope for data annotation has been substantially expanded to enhance biological interpretations of queried variants. This includes the addition of pathway analysis for the identification of enriched biological pathways and molecular processes. We have further expanded the range of user directed annotation fields available for the study of cancer sequencing data. These new additions facilitate investigations into cancer driver variants and targetable molecular alterations within input datasets. New user directed filtering options have been coupled with the addition of interactive graphical and visualization tools. These improvements streamline the analysis of variants derived from large sequencing datasets for the identification of biologically and clinically significant subsets in the data. SNPnexus is the most comprehensible web-based application currently available and these new set of updates ensures that it remains a state-of-the-art tool for researchers. SNPnexus is freely available at https://www.snp-nexus.org.",2020-07-01 +,Software application profile: mrrobust—a tool for performing two-sample summary Mendelian randomization analyses,"Abstract

Motivation

In recent years, Mendelian randomization analysis using summary data from genome-wide association studies has become a popular approach for investigating causal relationships in epidemiology. The mrrobust Stata package implements several of the recently developed methods.

Implementation

mrrobust is freely available as a Stata package.

General features

The package includes inverse variance weighted estimation, as well as a range of median, modal and MR-Egger estimation methods. Using mrrobust, plots can be constructed visualizing each estimate either individually or simultaneously. The package also provides statistics such as

Availability

The software is freely available from GitHub [https://raw.github.com/remlapmot/mrrobust/master/].",2018-09-12 +32310311,Microbiome Structure and Function: A New Framework for Interpreting Data.,"A distinction between different notions of ""structure"" and ""function"" is suggested for interpreting the overwhelming amount of data on microbiome structure and function. Sequence data, biochemical agents, interaction networks, taxonomic communities, and their dynamics can be linked to potential or actual biochemical activities, causal roles, and selected effects, respectively. This conceptual clarification has important methodological consequences for how to interpret existing data and approach open questions in contemporary microbiome research practice. In particular, the field will have to start thinking about notions of function more directly. Also see the video abstract here https://youtu.be/j5pq5uGld1k.",2020-04-20 +33124919,Occurrence and Health Risks of Organic Micro-Pollutants and Metals in Groundwater of Chinese Rural Areas.,"

Background

Groundwater is a main drinking-water source for Chinese rural residents. The overall pollution status of organic micropollutants (OMPs) and metals in the groundwater and corresponding health risks are unknown.

Objectives

Our objective was to comprehensively screen for and assess the health risks of OMPs and metals in groundwater of rural areas in China where groundwater is used for drinking so as to provide a benchmark for monitoring and improving groundwater quality in future developments.

Methods

One hundred sixty-six groundwater samples were collected in the rural areas of China, and 1,300 OMPs and 25 metals were screened by GC-MS, LC-QTOF/MS, and ICP-MS analysis. To assess the noncarcinogenic and carcinogenic risks of the detected pollutants, missing toxicity threshold values were extrapolated from existing databases or predicted by quantitative structure-activity relationship (QSAR) models. Monte Carlo simulation was performed to account for uncertainties in the exposure parameters and toxicity thresholds.

Results

Two hundred thirty-three OMPs and 25 metals were detected from the 166 samples. The concentration summation for the detected OMPs ranged from 2.9 to 1.7×105ng/L among the different sampling sites. Cumulative noncarcinogenic risks for the OMPs were estimated to be negligible. However, high metal risks were calculated in 23% of the sites. Forty-two carcinogens (including 38 OMPs) were identified and the cumulative carcinogenic risks in 34% of the sites were calculated to be >10-4 (i.e., one excess cancer case in a population of 10 thousand people). The carcinogenic risks were estimated to be mainly associated with exposures to the metals, which were calculated to contribute 79% (0-100%) of the cumulative carcinogenic risks.

Discussion

The overall status of OMPs and metals pollution in the groundwater and the corresponding health risks were determined preliminarily, which may provide a benchmark for future efforts in China to ensure the safety of drinking water for the local residents in rural areas. The joint application of QSARs and Monte Carlo simulation provided a feasible way to comprehensively assess the health risks of the large and ever-increasing number of pollutants detected in the aquatic environment. https://doi.org/10.1289/EHP6483.",2020-10-30 +33126898,Development of prognosis model for colon cancer based on autophagy-related genes.,"

Background

Autophagy is an orderly catabolic process for degrading and removing unnecessary or dysfunctional cellular components such as proteins and organelles. Although autophagy is known to play an important role in various types of cancer, the effects of autophagy-related genes (ARGs) on colon cancer have not been well studied.

Methods

Expression profiles from ARGs in 457 colon cancer patients were retrieved from the TCGA database ( https://portal.gdc.cancer.gov ). Differentially expressed ARGs and ARGs related to overall patient survival were identified. Cox proportional-hazard models were used to investigate the association between ARG expression profiles and patient prognosis.

Results

Twenty ARGs were significantly associated with the overall survival of colon cancer patients. Five of these ARGs had a mutation rate ≥ 3%. Patients were divided into high-risk and low-risk groups based on Cox regression analysis of 8 ARGs. Low-risk patients had a significantly longer survival time than high-risk patients (p < 0.001). Univariate and multivariate Cox regression analysis showed that the resulting risk score, which was associated with infiltration depth and metastasis, could be an independent predictor of patient survival. A nomogram was established to predict 1-, 3-, and 5-year survival of colon cancer patients based on 5 independent prognosis factors, including the risk score. The prognostic nomogram with online webserver was more effective and convenient to provide information for researchers and clinicians.

Conclusion

The 8 ARGs can be used to predict the prognosis of patients and provide information for their individualized treatment.",2020-10-30 +26519399,"UniProtKB/Swiss-Prot, the Manually Annotated Section of the UniProt KnowledgeBase: How to Use the Entry View.","The Universal Protein Resource (UniProt, http://www.uniprot.org ) consortium is an initiative of the SIB Swiss Institute of Bioinformatics (SIB), the European Bioinformatics Institute (EBI) and the Protein Information Resource (PIR) to provide the scientific community with a central resource for protein sequences and functional information. The UniProt consortium maintains the UniProt KnowledgeBase (UniProtKB), updated every 4 weeks, and several supplementary databases including the UniProt Reference Clusters (UniRef) and the UniProt Archive (UniParc).The Swiss-Prot section of the UniProt KnowledgeBase (UniProtKB/Swiss-Prot) contains publicly available expertly manually annotated protein sequences obtained from a broad spectrum of organisms. Plant protein entries are produced in the frame of the Plant Proteome Annotation Program (PPAP), with an emphasis on characterized proteins of Arabidopsis thaliana and Oryza sativa. High level annotations provided by UniProtKB/Swiss-Prot are widely used to predict annotation of newly available proteins through automatic pipelines.The purpose of this chapter is to present a guided tour of a UniProtKB/Swiss-Prot entry. We will also present some of the tools and databases that are linked to each entry.",2016-01-01 +32123714,Estimation of upper and lower bounds of Gini coefficient by fuzzy data.,"The data presented in this paper are used to examine the uncertainty in macroeconomic variables and their impact on the Gini coefficient. Annual data for the period 2017 - 1996 are taken from the Bank of Iran website https://www.cbi.ir. We used fuzzy regression with symmetric coefficients to calculate upper and lower bound data of Gini coefficient. Estimated data at this stage can be a very useful guide for policymakers, on the other hand, it is a benchmark for evaluating the effectiveness of government policies. The reason for using fuzzy regression to estimate data on Gini coefficients is the extra flexibility of this model.",2020-02-14 +32750265,Daratumumab subcutaneous formulation for the treatment of multiple myeloma.,"

Introduction

Intravenous daratumumab has shown unprecedented anti-myeloma activity when used as a single agent or in combination with other myeloma therapies. Recently, a subcutaneous formulation of daratumumab was approved for use in both the United States and European Union based on data which showed shorter infusion times and decreased rate of infusion reactions while maintaining non-inferior efficacy.

Areas covered

We cover the physiology behind subcutaneous daratumumab and summarize the relevant clinical data with a particular focus on the pharmacokinetics, pharmacodynamics, safety, and clinical efficacy. Articles used to generate this review were obtained by searching pubmed (https://pubmed.ncbi.nlm.nih.gov/) with the search terms 'subcutaneous daratumumab' and 'daratumumab hyaluronidase'.

Expert opinion

Subcutaneous daratumumab is associated with lower risk of infusion reactions and decreased administration time while maintaining non-inferior efficacy. We support the use of subcutaneous daratumumab for all approved indications and for investigational use moving forward.",2020-08-16 +27899595,"PANTHER version 11: expanded annotation data from Gene Ontology and Reactome pathways, and data analysis tool enhancements.","The PANTHER database (Protein ANalysis THrough Evolutionary Relationships, http://pantherdb.org) contains comprehensive information on the evolution and function of protein-coding genes from 104 completely sequenced genomes. PANTHER software tools allow users to classify new protein sequences, and to analyze gene lists obtained from large-scale genomics experiments. In the past year, major improvements include a large expansion of classification information available in PANTHER, as well as significant enhancements to the analysis tools. Protein subfamily functional classifications have more than doubled due to progress of the Gene Ontology Phylogenetic Annotation Project. For human genes (as well as a few other organisms), PANTHER now also supports enrichment analysis using pathway classifications from the Reactome resource. The gene list enrichment tools include a new 'hierarchical view' of results, enabling users to leverage the structure of the classifications/ontologies; the tools also allow users to upload genetic variant data directly, rather than requiring prior conversion to a gene list. The updated coding single-nucleotide polymorphisms (SNP) scoring tool uses an improved algorithm. The hidden Markov model (HMM) search tools now use HMMER3, dramatically reducing search times and improving accuracy of E-value statistics. Finally, the PANTHER Tree-Attribute Viewer has been implemented in JavaScript, with new views for exploring protein sequence evolution.",2016-11-29 +26183768,A resource for teaching emergency care communication.,"

Background

Communication in emergency departments (EDs), often between several health professionals and patients and relatives, is a major cause of patient complaint and error; however, communication-skills teaching for medical students largely focuses on individual clinician-patient interactions.

Context

We developed and implemented an evidence-informed online resource, Communication for Health in Emergency Contexts (CHEC; http://www.chec.meu.medicine.unimelb.edu.au/resources) to raise medical students' awareness of the challenges of communication in the ED, and to provide students with communication strategies for addressing these challenges. The foundation of the CHEC resource was the findings and data from a large research project conducted at five emergency departments in Australia over the period 2006-2009. From this, we developed ED scenarios and teaching vignettes using authentic communication data. The project included a nationwide medical curriculum scoping phase, involving interviews with medical students and educators, on ED communication curriculum needs in order to inform the educational activities.

Innovation

The CHEC resource provides students with the opportunity to follow real-life scenarios through all stages of the ED journey, whereas insights from ED medical and nursing staff provide learning opportunities about interprofessional communication for medical students. Evaluation suggests that students find the resource useful, and that the resource has been successfully embedded in medical and junior doctor training on communication and quality and safety.

Implications

The CHEC resource enhances the capacity of busy clinical educators to raise students' awareness of the communication needs of emergency health care by focusing on communication in high-stress, time-pressured settings using a web format. The CHEC resource provides students with the opportunity to follow real-life scenarios through all stages of the ED journey.",2015-07-16 +31750992,Structural and biochemical analysis of Bacillus anthracis prephenate dehydrogenase reveals an unusual mode of inhibition by tyrosine via the ACT domain.,"Tyrosine biosynthesis via the shikimate pathway is absent in humans and other animals, making it an attractive target for next-generation antibiotics, which is increasingly important due to the looming proliferation of multidrug-resistant pathogens. Tyrosine biosynthesis is also of commercial importance for the environmentally friendly production of numerous compounds, such as pharmaceuticals, opioids, aromatic polymers, and petrochemical aromatics. Prephenate dehydrogenase (PDH) catalyzes the penultimate step of tyrosine biosynthesis in bacteria: the oxidative decarboxylation of prephenate to 4-hydroxyphenylpyruvate. The majority of PDHs are competitively inhibited by tyrosine and consist of a nucleotide-binding domain and a dimerization domain. Certain PDHs, including several from pathogens on the World Health Organization priority list of antibiotic-resistant bacteria, possess an additional ACT domain. However, biochemical and structural knowledge was lacking for these enzymes. In this study, we successfully established a recombinant protein expression system for PDH from Bacillus anthracis (BaPDH), the causative agent of anthrax, and determined the structure of a BaPDH ternary complex with NAD+ and tyrosine, a binary complex with tyrosine, and a structure of an isolated ACT domain dimer. We also conducted detailed kinetic and biophysical analyses of the enzyme. We show that BaPDH is allosterically regulated by tyrosine binding to the ACT domains, resulting in an asymmetric conformation of the BaDPH dimer that sterically prevents prephenate binding to either active site. The presented mode of allosteric inhibition is unique compared to both the competitive inhibition established for other PDHs and to the allosteric mechanisms for other ACT-containing enzymes. This study provides new structural and mechanistic insights that advance our understanding of tyrosine biosynthesis in bacteria. ENZYMES: Prephenate dehydrogenase from Bacillus anthracis (PDH): EC database ID: 1.3.1.12. DATABASES: Coordinates and structure factors have been deposited in the Protein Data Bank (PDB) with accession numbers PDB ID: 6U60 (BaPDH complex with NAD+ and tyrosine), PDB ID: 5UYY (BaPDH complex with tyrosine), and PDB ID: 5V0S (BaPDH isolated ACT domain dimer). The diffraction images are available at http://proteindiffraction.org with DOIs: https://doi.org/10.18430/M35USC, https://doi.org/10.18430/M35UYY, and https://doi.org/10.18430/M35V0S.",2019-12-26 +30586417,ROCA - An ArcGIS toolbox for road alignment identification and horizontal curve radii computation.,"We present the ROCA (ROad Curvature Analyst) software, in the form of an ESRI ArcGIS Toolbox, intended for vector line data processing. The software segments road network data into tangents and horizontal curves. Horizontal curve radii and azimuth of tangents are then automatically computed. Simultaneously, additional frequently used road section characteristics are calculated, such as the sinuosity of a road section (detour ratio), the number of turns along an individual road section and the average cumulative angle for a road section. The identification of curves is based on the naïve Bayes classifier and users are allowed to prepare their own training data files. We applied ROCA software to secondary roads within the Czech road network (9,980 km). The data processing took less than ten minutes. Approximately 43% of the road network in question consists of 42,752 horizontal curves. The ROCA software outperforms other existing automatic methods by 26% with respect to the percentage of correctly identified curves. The segmented secondary roads within the Czech road network can be viewed on the roca.cdvgis.cz/czechia web-map application. We combined data on road geometry with road crashes database to develop the crash modification factors for horizontal curves with various radii. We determined that horizontal curves with radii of 50 m are approximately 3.7 times more hazardous than horizontal curves with radii accounting for 1000 m. ROCA software can be freely downloaded for noncommercial use from https://roca.cdvinfo.cz/ website.",2018-12-26 +27530928,NLDB: a database for 3D protein-ligand interactions in enzymatic reactions.,"NLDB (Natural Ligand DataBase; URL: http://nldb.hgc.jp ) is a database of automatically collected and predicted 3D protein-ligand interactions for the enzymatic reactions of metabolic pathways registered in KEGG. Structural information about these reactions is important for studying the molecular functions of enzymes, however a large number of the 3D interactions are still unknown. Therefore, in order to complement such missing information, we predicted protein-ligand complex structures, and constructed a database of the 3D interactions in reactions. NLDB provides three different types of data resources; the natural complexes are experimentally determined protein-ligand complex structures in PDB, the analog complexes are predicted based on known protein structures in a complex with a similar ligand, and the ab initio complexes are predicted by docking simulations. In addition, NLDB shows the known polymorphisms found in human genome on protein structures. The database has a flexible search function based on various types of keywords, and an enrichment analysis function based on a set of KEGG compound IDs. NLDB will be a valuable resource for experimental biologists studying protein-ligand interactions in specific reactions, and for theoretical researchers wishing to undertake more precise simulations of interactions.",2016-08-16 +33589518,MEK Inhibition Remodels the Immune Landscape of Mutant KRAS Tumors to Overcome Resistance to PARP and Immune Checkpoint Inhibitors.,"Mutant KRAS tumors are associated with poor outcomes, at least in part, due to decreased therapeutic sensitivity. Here, we show that KRAS mutations are associated with resistance to monotherapy and combination therapy with PARP inhibitors (PARPi) and immune checkpoint blockade with anti-PD-L1 antibodies. In mutant KRAS tumors, inhibition of KRAS signaling with MEK inhibitors (MEKi) triggered and amplified PARPi-induced DNA damage, cytosolic double-stranded DNA accumulation, STING pathway activation, and CD8+ T-cell recruitment. Moreover, MEKi decreased myeloid-derived suppressor cell infiltration, in part, by inhibiting IL6 and GMCSF production. Importantly, addition of MEKi to PARPi and anti-PD-L1 resulted in marked tumor inhibition in immunocompetent mutant KRAS tumor models. This study provides the underlying mechanistic data to support evaluation of PARPi, MEKi, and anti-PD-L1 combination in clinical trials of mutant KRAS tumors. SIGNIFICANCE: This study provides key insights into the potential for using MEKi combined with PARPi and anti-PD-L1 for the treatment of all mutant KRAS tumors. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/10/2714/F1.large.jpg.",2021-02-15 +34017848,Effects of Dynamic Hyperinflation on Left Ventricular Diastolic Function in Healthy Subjects - A Randomized Controlled Crossover Trial.,"Objective: Diastolic dysfunction of the left ventricle is common in patients with chronic obstructive pulmonary disease (COPD). Dynamic hyperinflation has been suggested as a key determinant of reduced diastolic function in COPD. We aimed to investigate the effects of induced dynamic hyperinflation on left ventricular diastolic function in healthy subjects to exclude other confounding mechanisms associated with COPD. Design: In this randomized controlled crossover trial (NCT03500822, https://www.clinicaltrials.gov/), we induced dynamic hyperinflation using the validated method of expiratory resistance breathing (ERB), which combines tachypnea with expiratory resistance, and compared the results to those of tachypnea alone. Healthy male subjects (n = 14) were randomly assigned to the ERB or control group with subsequent crossover. Mild, moderate, and severe hyperinflation (i.e., ERB1, ERB2, ERB3) were confirmed by intrinsic positive end-expiratory pressure (PEEPi) using an esophageal balloon catheter. The effects on diastolic function of the left ventricle were measured by transthoracic echocardiographic assessment of the heart rate-adjusted transmitral E/A-ratio and E/e'-ratio. Results: We randomly assigned seven participants to the ERB group and seven to the control group (age 26 [24-26] vs. 24 [24-34], p = 0.81). Severe hyperinflation decreased the E/A-ratio compared to the control condition (1.63 [1.49-1.77] vs. 1.85 [0.95-2.75], p = 0.039), and moderate and severe ERB significantly increased the septal E/e'-ratio. No changes in diastolic function were found during mild hyperinflation. PEEPi levels during ERB were inversely correlated with the E/A ratio (regression coefficient = -0.007, p = 0.001). Conclusions: Our data indicate dynamic hyperinflation as a determinant of left ventricular diastolic dysfunction in healthy subjects. Therapeutic reduction of hyperinflation might be a treatable trait to improve diastolic function in patients with COPD.",2021-05-04 +33311384,A newly available database of an important family of phytophagous mites: Tenuipalpidae Database.,"This paper announces a database on the taxonomy, distribution and host plants of mites of the family Tenuipalpidae Berlese (Acari: Tetranychoidea), available online at https://www.tenuipalpidae.ibilce.unesp.br/. In the Tenuipalpidae Database the recorded world distribution and range of host plants are provided for each tenuipalpid species, including synonyms, with a list of all relevant publications.",2020-10-29 +28509912,A resource on latitudinal and altitudinal clines of ecologically relevant phenotypes of the Indian Drosophila.,"The unique geography of the Indian subcontinent has provided diverse natural environments for a variety of organisms. In this region, many ecological indices such as temperature and humidity vary predictably as a function of both latitude and altitude; these environmental parameters significantly affect fundamental dynamics of natural populations. Indian drosophilids are diverse in their geographic distribution and climate tolerance, possibly as a result of climatic adaptation. These associations with environmental parameters are further reflected in a large number of clines that have been reported for various fitness traits along these geographical ranges. This unique amalgamation of environmental variability and genetic diversity make the subcontinent an ecological laboratory for studying evolution in action. We assembled data collected over the last 20 years on the geographical clines for various phenotypic traits in several species of drosophilids and present a web-resource on Indian-Drosophila ( http://www.indian-drosophila.org/). The clinal data on ecologically relevant phenotypes of Indian drosophilids will be useful in addressing questions related to future challenges in biodiversity and ecosystems in this region.",2017-05-16 +29092931,Cistrome Cancer: A Web Resource for Integrative Gene Regulation Modeling in Cancer.,"Cancer results from a breakdown of normal gene expression control, so the study of gene regulation is critical to cancer research. To gain insight into the transcriptional and epigenetic factors regulating abnormal gene expression patterns in cancers, we developed the Cistrome Cancer web resource (http://cistrome.org/CistromeCancer/). We conducted the systematic integration and modeling of over 10,000 tumor molecular profiles from The Cancer Genome Atlas (TCGA) with over 23,000 ChIP-seq and chromatin accessibility profiles from our Cistrome collection. The results include reconstruction of functional enhancer profiles, ""super-enhancer"" target genes, as well as predictions of active transcription factors and their target genes for each TCGA cancer type. Cistrome Cancer reveals novel insights from integrative analyses combining chromatin profiles with tumor molecular profiles and will be a useful resource to the cancer gene regulation community. Cancer Res; 77(21); e19-22. ©2017 AACR.",2017-11-01 +33295759,Skin Doctor CP: Conformal Prediction of the Skin Sensitization Potential of Small Organic Molecules.,"Skin sensitization potential or potency is an important end point in the safety assessment of new chemicals and new chemical mixtures. Formerly, animal experiments such as the local lymph node assay (LLNA) were the main form of assessment. Today, however, the focus lies on the development of nonanimal testing approaches (i.e., in vitro and in chemico assays) and computational models. In this work, we investigate, based on publicly available LLNA data, the ability of aggregated, Mondrian conformal prediction classifiers to differentiate between non- sensitizing and sensitizing compounds as well as between two levels of skin sensitization potential (weak to moderate sensitizers, and strong to extreme sensitizers). The advantage of the conformal prediction framework over other modeling approaches is that it assigns compounds to activity classes only if a defined minimum level of confidence is reached for the individual predictions. This eliminates the need for applicability domain criteria that often are arbitrary in their nature and less flexible. Our new binary classifier, named Skin Doctor CP, differentiates nonsensitizers from sensitizers with a higher reliability-to-efficiency ratio than the corresponding nonconformal prediction workflow that we presented earlier. When tested on a set of 257 compounds at the significance levels of 0.10 and 0.30, the model reached an efficiency of 0.49 and 0.92, and an accuracy of 0.83 and 0.75, respectively. In addition, we developed a ternary classification workflow to differentiate nonsensitizers, weak to moderate sensitizers, and strong to extreme sensitizers. Although this model achieved satisfactory overall performance (accuracies of 0.90 and 0.73, and efficiencies of 0.42 and 0.90, at significance levels 0.10 and 0.30, respectively), it did not obtain satisfying class-wise results (at a significance level of 0.30, the validities obtained for nonsensitizers, weak to moderate sensitizers, and strong to extreme sensitizers were 0.70, 0.58, and 0.63, respectively). We argue that the model is, in consequence, unable to reliably identify strong to extreme sensitizers and suggest that other ternary models derived from the currently accessible LLNA data might suffer from the same problem. Skin Doctor CP is available via a public web service at https://nerdd.zbh.uni-hamburg.de/skinDoctorII/.",2020-12-09 +32930613,Machine Learning-Based DNA Methylation Score for Fetal Exposure to Maternal Smoking: Development and Validation in Samples Collected from Adolescents and Adults.,"

Background

Fetal exposure to maternal smoking during pregnancy is associated with the development of noncommunicable diseases in the offspring. Maternal smoking may induce such long-term effects through persistent changes in the DNA methylome, which therefore hold the potential to be used as a biomarker of this early life exposure. With declining costs for measuring DNA methylation, we aimed to develop a DNA methylation score that can be used on adolescent DNA methylation data and thereby generate a score for in utero cigarette smoke exposure.

Methods

We used machine learning methods to create a score reflecting exposure to maternal smoking during pregnancy. This score is based on peripheral blood measurements of DNA methylation (Illumina's Infinium HumanMethylation450K BeadChip). The score was developed and tested in the Raine Study with data from 995 white 17-y-old participants using 10-fold cross-validation. The score was further tested and validated in independent data from the Northern Finland Birth Cohort 1986 (NFBC1986) (16-y-olds) and 1966 (NFBC1966) (31-y-olds). Further, three previously proposed DNA methylation scores were applied for comparison. The final score was developed with 204 CpGs using elastic net regression.

Results

Sensitivity and specificity values for the best performing previously developed classifier (""Reese Score"") were 88% and 72% for Raine, 87% and 61% for NFBC1986 and 72% and 70% for NFBC1966, respectively; corresponding figures using the elastic net regression approach were 91% and 76% (Raine), 87% and 75% (NFBC1986), and 72% and 78% for NFBC1966.

Conclusion

We have developed a DNA methylation score for exposure to maternal smoking during pregnancy, outperforming the three previously developed scores. One possible application of the current score could be for model adjustment purposes or to assess its association with distal health outcomes where part of the effect can be attributed to maternal smoking. Further, it may provide a biomarker for fetal exposure to maternal smoking. https://doi.org/10.1289/EHP6076.",2020-09-15 +31988005,"Interactome networks between the human respiratory syncytial virus (HRSV), the human metapneumovirus (ΗMPV), and their host: In silico investigation and comparative functional enrichment analysis.","

Background and objectives

Human respiratory syncytial virus (HRSV) and human metapneumovirus (HMPV) are leading causes of upper and lower respiratory tract infections in non-immunocompetent subjects, yet the mechanisms by which they induce their pathogenicity differ significantly and remain elusive. In this study we aimed at identifying the gene interaction networks between the HRSV, HMPV respiratory pathogens and their host along with the different cell-signaling pathways associated with the above interactomes.

Materials and methods

The Viruses STRING database (http://viruses.string-db.org/) was used for the identification of the host-viruses interaction networks. The two lists of the predicted functional partners were entered in the FunRich tool (http://www.funrich.org) for the construction of the Venn diagram and the comparative Funcional Enrichment Analysis (FEA) with respect to biological pathways. The sets of the common and unique human genes identified in the two networks were also analyzed. The computational predictions regarding the shared human genes in the host-HRSV and the host-HMPV interactomes were further evaluated via the analysis of the GSE111732 dataset. miRNA transcriptomics data were mapped to gene targets using the miRNomics pipeline of the GeneTrail2 database (https://genetrail2.bioinf.uni-sb.de/).

Results

Eleven out of twenty predicted human genes were common in the two interactomes (TLR4, SOCS3, SFXN1, AKT1, SFXN3, LY96, SFXN2, SOCS7, CISH, SOCS6, SOCS1). FEA of these common genes identified the kit receptor and the GH receptor signaling pathways as the most significantly enriched annotations. The remaining nine genes of the host-HRSV and the host-HMPV interaction networks were the IFIH1, DDX58, NCL, IRF3, STAT2, HSPA4, CD209, KLF6, CHKA and the MYD88, SOCS4, SOCS2, SOCS5 AKT2, AKT3, SFXN4, SFXN5 and TLR3 respectively. Distinct cell-signaling pathways were enriched per interactome. The comparative FEA highlighted the association of the host-HRSV functional partners with the negative regulation of RIG-I/MDA5 signaling. The analysis with respect to miRNAs mapping to gene targets of the GSE111732 dataset indicated that nine out of the eleven common host genes are either enriched or depleted in the sample sets (HRSV or HMPV infected) as compared with the reference set (non-infected), although with no significant scores.

Conclusions

We have identified both shared and unique host genes as members of the HRSV and HMPV interaction networks. The disparate human genes likely contribute to distinct responses in airway epithelial cells.",2020-01-25 +,"First Report of ‘Candidatus Phytoplasma trifolii’-Related Strain of 16SrVI-A Phytoplasma Subgroup, Associated with Elm Yellows Disease in American Elm (Ulmus americana L.) in Ohio, U.S.A","During the investigation of the sudden and early onset of yellowing, followed by mortality of American elm (Ulmus americana L.) trees at the USDA Forest Service Northern Research Station in Delaware, Ohio, a phytoplasma of the clover proliferation group (16SrVI) was detected as the putative causal agent of the disease outbreak. Onset of symptoms was rapid and widespread, occurring in late July 2016 and affecting ∼60 trees across two elm research plantations. Symptoms included a general yellowing of individual tree canopies, epinasty of foliage throughout the canopy, phloem discoloration, and on a subset of trees, a strong odor of methyl salicylate (observed in phloem tissue extracted from the lower stem). Similar symptoms in elms have been attributed to the classic elm yellows ‘Candidatus Phytoplasma ulmi’ (16SrV-A) (Lee et al. 2004) and the Illinois elm yellows phytoplasma (16SrVI-C) (Jacobs et al. 2003). In July 2016, samples were collected from 12 symptomatic and 8 asymptomatic American elm trees. DNA from the leaf midrib and branch phloem was isolated and analyzed for phytoplasma via seminested polymerase chain reactions (PCR). PCRs were first primed by phytoplasma universal primer pair P5/P7 (Jomantiene et al. 1998), followed by P7 and the reverse complement of the universal phytoplasma primer R16R2 for amplification of the phytoplasma 16S-23 ribosomal (r) DNA (16S-23 rRNA gene) sequences as per Gundersen and Lee (1996). The predicted band size of the second PCR product is 487 base pair (bp). The product bands were isolated, purified, and sequenced using primer P7. Sequencing results of the PCR products indicated that nine of the symptomatic and one of the asymptomatic American elm trees tested were infected by a phytoplasma. A BLAST search of the DNA sequences indicated high similarities to members of the ‘Ca. P. trifolii’ group 16SrVI-A. The sequences of all 10 phytoplasma-infected trees were identical to each other. To further confirm that a strain of ‘Ca. P. trifolii’ was infecting the elms, a PCR product was cloned and sequenced. The 1,557 bp band was the product of primers P1a/P7, followed by PCR primers designed from the sequence of ‘Ca. P. trifolii’. This band was cloned into the pMiniT 2.0 vector. Plasmid sequencing used standard sequencing primers (SP6 and T7 promoters), the primers included with the vector, and finally, custom designed phytoplasma primers that eliminate other bacterial DNA from getting amplified. The entire plasmid clone was sequenced in both directions with four to six times of coverage per base. Determination of the phytoplasma classification group was based on the nucleotide sequence within the phytoplasma universal primers F2n/R2 PCR fragment within the 16Sr gene. Using iPhyClassifier, the online tool for phytoplasma classification and taxonomic assignment (https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi), the sequence similarity between the Delaware elm phytoplasma and ‘Ca. P. trifolii’ (GenBank accession no. AB279597.1) is 99.9% in the 16Sr region, which places the Delaware elm phytoplasma in the 16SrVI-A group. The sequence of the plasmid was deposited in GenBank under accession number MF385584. Elm decline and yellows diseases in North America have been associated with the Illinois elm yellows phytoplasma (16SrVI-C, GenBank accession no. AF268893.1) (Jacobs et al. 2003), ‘Ca. Phytoplasma ulmi’ (16SrV-A) (Lee et al. 2004), and phytoplasma in the aster yellows group (16SrI-C) (Lee et al. 1995).",2018-02-01 +31835998,Telehealth in palliative care is being described but not evaluated: a systematic review.,"BACKGROUND:Telehealth is growing and its application in palliative care is seen as a solution to pressures on palliative care services. A 2010 UK review reported growing awareness of telehealth in palliative care but a lack of evidence-based research to support its use. The primary aim of this review was to describe the current use of telehealth in palliative care in the UK and evaluate telehealth initiatives against a digital service standard. The secondary aim was to explore whether telehealth results in a reduction in emergency care access. METHODS:Systematic review of the literature with thematic synthesis. Records were screened and data extracted by two reviewers. EMBASE, MEDLINE, CINAHL, Psychinfo and Cochrane central register for controlled trials were searched using pre-defined terms. Hand searching of conference literature, thesis databases and citation tracking was also conducted. The protocol for this systematic review was registered with PROSPERO and can be found at http://www.crd.york.ac.uk/PROSPERO/display_record.php?ID=CRD42017080038. RESULTS:The search identified 3807 titles and 30 studies were included in the review. Telehealth was used to support patients and carers, electronic record keeping and professional education. Notably, the number of home telemonitoring initiatives for patients had increased from the 2010 review. Despite this variety, many studies were small scale, descriptive and provided little evidence of evaluation of the service. Ten papers were sufficiently detailed to allow appraisal against the digital service standard and only one of these met all of the criteria to some extent. Seven studies made reference to emergency care access. CONCLUSIONS:Although there is growth of telehealth services, there remains a lack of evaluation and robust study design meaning conclusions regarding the clinical application of telehealth in palliative care cannot be drawn. There is insufficient evidence to appreciate any benefit of telehealth on access to emergency care. Future work is needed to evaluate the use of telehealth in palliative care and improve telehealth design in line with digital service standards.",2019-12-13 +33989040,Umbilical Cord Blood Metal Mixtures and Birth Size in Bangladeshi Children.,"

Background

Studies have evaluated environmental exposure to toxic metals such as arsenic (As), cadmium (Cd), manganese (Mn), or lead (Pb) on birth size; however, information on potential effects of exposures to metal mixtures is limited.

Objectives

We assessed the association between metal mixtures (As, Cd, Mn, Pb) in umbilical cord blood and neonate size in Bangladeshi children.

Methods

In this birth cohort study, pregnant women who were ≥18 years of age with an ultrasound-confirmed singleton pregnancy of ≤16wk gestation were recruited from two Bangladesh clinics between 2008 and 2011. Neonate size metrics were measured at the time of delivery. Metals in cord blood were measured using inductively coupled plasma mass spectrometry. We employed multivariable linear regression and Bayesian kernel machine regression (BKMR) to estimate associations of individual metals and metal mixtures with birth size parameters.

Results

Data from 1,088 participants was assessed. We found a significant negative association between metal mixture and birth length and head circumference when all metal concentrations were above the 60th and 55th percentiles, respectively, compared with the median. An interquartile range (IQR) increase in log Cd concentration {log[Cd (in micrograms per deciliter)] IQR=2.51} was associated with a 0.13-standard deviation (SD) decrease in mean birth length (95% CI: -0.25, -0.02) and a 0.17-SD decrease in mean head circumference (95% CI: -0.28, -0.05), based on linear regression models adjusted for covariates and the other metals. An IQR increase in log Mn concentration {log[Mn (in micrograms per deciliter)] IQR=0.69} was associated with a 0.07-SD decrease in mean birth weight (95% CI: -0.15, 0.002).

Discussion

Metal mixtures in cord blood were associated with reduced birth size in Bangladeshi children. Results from linear regression models adjusted and the BKMR mixtures analyses suggest adverse effects of Cd and Mn, as individual metal exposures, on birth size outcomes. https://doi.org/10.1289/EHP7502.",2021-05-14 +31589649,Cell fishing: A similarity based approach and machine learning strategy for multiple cell lines-compound sensitivity prediction.,"The prediction of cell-lines sensitivity to a given set of compounds is a very important factor in the optimization of in-vitro assays. To date, the most common prediction strategies are based upon machine learning or other quantitative structure-activity relationships (QSAR) based approaches. In the present research, we propose and discuss a straightforward strategy not based on any learning modelling but exclusively relying upon the chemical similarity of a query compound to reference compounds with annotated activity against cell lines. We also compare the performance of the proposed method to machine learning predictions on the same problem. A curated database of compounds-cell lines associations derived from ChemBL version 22 was created for algorithm construction and cross-validation. Validation was done using 10-fold cross-validation and testing the models on new data obtained from ChemBL version 25. In terms of accuracy, both methods perform similarly with values around 0.65 across 750 cell lines in 10-fold cross-validation experiments. By combining both methods it is possible to achieve 66% of correct classification rate in more than 26000 newly reported interactions comprising 11000 new compounds. A Web Service implementing the described approaches (both similarity and machine learning based models) is freely available at: http://bioquimio.udla.edu.ec/cellfishing.",2019-10-07 +30394935,Systematic analysis of breast atypical hyperplasia-associated hub genes and pathways based on text mining.,"The purpose of this study was to describe breast atypical hyperplasia (BAH)-related gene expression and to systematically analyze the functions, pathways, and networks of BAH-related hub genes. On the basis of natural language processing, gene data for BAH were extracted from the PubMed database using text mining. The enriched Gene Ontology terms and Kyoto Encyclopedia of Genes and Genomes pathways were obtained using DAVID (http://david.abcc.ncifcrf.gov/). A protein-protein interaction network was constructed using the STRING database. Hub genes were identified as genes that interact with at least 10 other genes within the BAH-related gene network. In total, 138 BAH-associated genes were identified as significant (P < 0.05), and 133 pathways were identified as significant (P < 0.05, false discovery rate < 0.05). A BAH-related protein network that included 81 interactions was constructed. Twenty genes were determined to interact with at least 10 others (P < 0.05, false discovery rate < 0.05) and were identified as the BAH-related hub genes of this protein-protein interaction network. These 20 genes are TP53, PIK3CA, JUN, MYC, EGFR, CCND1, AKT1, ERBB2, CTNN1B, ESR1, IGF-1, VEGFA, HRAS, CDKN1B, CDKN1A, PCNA, HGF, HIF1A, RB1, and STAT5A. This study may help to disclose the molecular mechanisms of BAH development and provide implications for BAH-targeted therapy or even breast cancer prevention. Nevertheless, connections between certain genes and BAH require further exploration.",2019-11-01 +31061857,RNA-sequencing data highlighting the time-of-day-dependent transcriptome of the central circadian pacemaker in Sox2-deficient mice.,"SOX2 is a stem cell-associated pluripotency transcription factor whose role in neuronal populations is undefined. Here we present the RNA-sequencing based transcriptome profiles of control (Sox2 fl/fl ) and SOX2 conditional knock-out (Vgat-cre;Sox2 fl/fl ) mice at four time points in one 24-h circadian cycle. The raw sequencing data were deposited to ArrayExpress database at EMBL-EBI (https://www.ebi.ac.uk/arrayexpress) under the accession number E-MTAB-7496. Results of rhythmicity analysis, differential expression analysis, network prediction, and potential target identification stemming from the RNA-sequencing dataset are also given in this article. The interpretation and discussion of these data can be found in the related research article entitled ""SOX2-dependent transcription in clock neurons promotes the robustness of the central circadian pacemaker."" Cheng et al. 2019.",2019-04-08 +29134430,Modern drug design: the implication of using artificial neuronal networks and multiple molecular dynamic simulations.,"We report the implementation of molecular modeling approaches developed as a part of the 2016 Grand Challenge 2, the blinded competition of computer aided drug design technologies held by the D3R Drug Design Data Resource ( https://drugdesigndata.org/ ). The challenge was focused on the ligands of the farnesoid X receptor (FXR), a highly flexible nuclear receptor of the cholesterol derivative chenodeoxycholic acid. FXR is considered an important therapeutic target for metabolic, inflammatory, bowel and obesity related diseases (Expert Opin Drug Metab Toxicol 4:523-532, 2015), but in the context of this competition it is also interesting due to the significant ligand-induced conformational changes displayed by the protein. To deal with these conformational changes we employed multiple simulations of molecular dynamics (MD). Our MD-based protocols were top-ranked in estimating the free energy of binding of the ligands and FXR protein. Our approach was ranked second in the prediction of the binding poses where we also combined MD with molecular docking and artificial neural networks. Our approach showed mediocre results for high-throughput scoring of interactions.",2017-11-13 +30735484,Meta-analytic framework for modeling genetic coexpression dynamics. ,"Methods for exploring genetic interactions have been developed in an attempt to move beyond single gene analyses. Because biological molecules frequently participate in different processes under various cellular conditions, investigating the changes in gene coexpression patterns under various biological conditions could reveal important regulatory mechanisms. One of the methods for capturing gene coexpression dynamics, named liquid association (LA), quantifies the relationship where the coexpression between two genes is modulated by a third ""coordinator"" gene. This LA measure offers a natural framework for studying gene coexpression changes and has been applied increasingly to study regulatory networks among genes. With a wealth of publicly available gene expression data, there is a need to develop a meta-analytic framework for LA analysis. In this paper, we incorporated mixed effects when modeling correlation to account for between-studies heterogeneity. For statistical inference about LA, we developed a Markov chain Monte Carlo (MCMC) estimation procedure through a Bayesian hierarchical framework. We evaluated the proposed methods in a set of simulations and illustrated their use in two collections of experimental data sets. The first data set combined 10 pancreatic ductal adenocarcinoma gene expression studies to determine the role of possible coordinator gene USP9X in the Hippo pathway. The second experimental data set consisted of 907 gene expression microarray Escherichia coli experiments from multiple studies publicly available through the Many Microbe Microarray Database website (http://m3d.bu.edu/) and examined genes that coexpress with serA in the presence of coordinator gene Lrp.",2019-02-09 +33532528,Data on SiC-based bundle lifetime variability: The insufficiency of external phenomena affecting the flaw size.,"A broad variability characterizes the lifetime of SiC-based bundles under static fatigue conditions at intermediate temperature and ambient air, challenging the accuracy of its prediction. The same is true, in a lower extend, with tensile properties, in apparent discrepancy with the bundle theory based on weakest link theory. The data presented here focus on lifetime scattering, evaluated on different fiber types (6 in total, Nicalon® or Tyranno®). It is hosted at http://dx.doi.org/10.17632/96xg3wmppf.1 and related to the research article ""Static fatigue of SiC-based multifilament tows at intermediate temperature: the time to failure variability"" (Mazerat et al., 2020) [1]. The insufficiency of classically invoked external and discrete bias (fiber sticking phenomenon for instance) was compared to a devoted Monte Carlo algorithm, attributing to each filament a strength (random) and a stress (homogeneous). Introduction of a stress inconsistency from tow to tow, experimentally observed through section variability, was revealed to overpass such biasing approach. This article can be referred to for the interpretation or prediction of CMC lifetime to guaranty long term performances over the broad offered application field.",2021-01-15 +31103066,The Placental Atlas Tool (PAT): A collaborative research and discovery platform for the placental research community.,"

Introduction

The placenta is one of the least understood, yet arguably one of the most important organs for human health and development. While there have been numerous research efforts dedicated to understanding the placenta's critical role, these studies and the data they produced remain separated and largely disparate. In order to facilitate placental research, the Eunice Kennedy Shriver National Institute of Child and Human Development (NICHD) released in October 2018 the Placental Atlas Tool (PAT) (https://pat.nichd.nih.gov/), an internet-based platform offering users a centralized placental database of molecular datasets, analytic tools, and images.

Methods

PAT is a cloud-based system developed by the business requirements defined by NICHD leadership and extramural placental researchers. PAT employs a metadata-driven web interface to provide curated placental datasets and images, enriched with structured, descriptive metadata to enhance data discoverability. PAT also incorporates open source molecular data analytical tools to provide a flexible analytics workflow for placental researchers.

Results

PAT launched with 426 analyzable molecular placental datasets consisting of over 12,500 samples from 10 distinct species, all systematically annotated and processed for enhanced research utility. 828 placental images, consisting of 7 imaging modalities across 47 species, and nearly 300 annotated linked publications supplement the datasets to facilitate knowledge integration and hypothesis generation across disparate molecular studies.

Discussion

PAT will maximize the NICHD's investment in placental research by reinforcing open scientific inquiry, facilitating reuse of datasets, promoting novel research and testing of new hypotheses and analytic methods, and facilitating education of new researchers.",2019-04-01 +32613037,Detail data of reactive extraction of caproic acid using tri-Butyl phosphate and Sunflower and Soybean oils as diluents.,"Caproic acid can be produced by fermentation technology. Reactive extraction method is a promising technology for separating the acid from the carboxylic mixture in the fermenter [1], [2], [3], [4]. To achieve it, tri‑butyl phosphate (TBP) is used as the reactive extractant and sunflower and soybean oils are used as the diluents. The performance of both the physical and reactive extraction processes was analysed by different parameters like distribution coefficient, loading ratio, and extraction efficiency. To meet the purpose, concentration of caproic acid in aqueous phase was measured by doing acid-base titration by caustic solution Further, reaction equilibrium constant, stoichiometry and distribution of complex, free acid and dimer concentrations in the organic phase were analysed. The data are related to the published (https://doi.org/10.1016/j.cep.2020.107926) paper in Chemical Engineering and Processing: Process Intensification [5]. The data shown in the current article are not provided in the mentioned published paper. Moreover, data are useful for understanding the physical and chemical behavior of the caproic acid extraction process and also can be used to design the process in industrial scale.",2020-06-11 +31608375,Progress in the study of genome size evolution in Asteraceae: analysis of the last update. ,"The Genome Size in Asteraceae Database (GSAD, http://www.asteraceaegenomesize.com) has been recently updated, with data from papers published or in press until July 2018. This constitutes the third release of GSAD, currently containing 4350 data entries for 1496 species, which represent a growth of 22.52% in the number of species with available genome size data compared with the previous release, and a growth of 57.72% in terms of entries. Approximately 6% of Asteraceae species are covered in terms of known genome sizes. The number of source papers included in this release (198) means a 48.87% increase with respect to release 2.0. The significant data increase was exploited to study the genome size evolution in the family from a phylogenetic perspective. Our results suggest that the role of chromosome number in genome size diversity within Asteraceae is basically associated to polyploidy, while dysploidy would only cause minor variation in the DNA amount along the family. Among diploid taxa, we found that the evolution of genome size shows a strong phylogenetic signal. However, this trait does not seem to evolve evenly across the phylogeny, but there could be significant scale and clade-dependent patterns. Our analyses indicate that the phylogenetic signal is stronger at low taxonomic levels, with certain tribes standing out as hotspots of autocorrelation between genome size and phylogeny. Finally, we also observe meaningful associations among nuclear DNA content on Asteraceae species and other phenotypical and ecological traits (i.e. plant habit and invasion ability). Overall, this study emphasizes the need to continue generating and analysing genome size data in order to puzzle out the evolution of this parameter and its many biological correlates.",2019-01-01 +33105510,"An Open Source Solution for ""Hands-on"" teaching of PET/CT to Medical Students under the COVID-19 Pandemic.","

Aims

 Since 2017, medical students at the University of Bergen were taught PET/CT ""hands-on"" by viewing PET/CT cases in native format on diagnostic workstations in the hospital. Due to the COVID-19 pandemic, students were barred access. This prompted us to launch and evaluate a new freeware PET/CT viewing system hosted in the university network.

Methods

 We asked our students to install the multiplatform Fiji viewer with Beth Israel PET/CT plugin (http://petctviewer.org) on their personal computers and connect to a central image database in the university network based on the public domain orthanc server (https://orthanc-server.com). At the end of course, we conducted an anonymous student survey.

Results

 The new system was online within eight days, including regulatory approval. All 76 students (100 %) in the fifth year completed their course work, reading five anonymized PET/CT cases as planned. 41 (53 %) students answered the survey. Fiji was challenging to install with a mean score of 1.8 on a 5-point Likert scale (5 = easy, 1 = difficult). Fiji was more difficult to use (score 3.0) than the previously used diagnostic workstations in the hospital (score 4.1; p < 0.001, paired t-test). Despite the technical challenge, 47 % of students reported having learnt much (scores 4 and 5); only 11 % were negative (scores 1 and 2). 51 % found the PET/CT tasks engaging (scores 4 and 5) while 20 % and 5 % returned scores 2 and 1, respectively.

Conclusion

 Despite the initial technical challenge, ""hands-on"" learning of PET/CT based on the freeware Fiji/orthanc PET/CT-viewer was associated with a high degree of student satisfaction. We plan to continue running the system to give students permanent access to PET/CT cases in native format regardless of time or location.",2020-10-26 +33063413,HerbiPAD: a free web platform to comprehensively analyze constitutive property and herbicide-likeness to estimate chemical bioavailability.,"

Background

Herbicides, as efficient weed control measures, play a crucial role in ensuring food security. The emergence of herbicide-resistant weeds has negatively affected food security and promoted the demand for new and improved herbicides. The balance between bioavailability and the potency of a compound is one of the most pressing challenges in the development of novel ideal herbicides. Herbicide-likeness analysis is crucial for the evaluation of this balance and thus may help to address this issue. Many herbicide-likeness analysis methods have been developed to screen potential novel lead compounds. However, there remains a lack of user-friendly and integrated tools to comprehensively evaluate herbicide-likeness.

Results

Herbicide-likeness of compounds was assessed through integrated analysis incorporating the physicochemical properties of commercial herbicides, a qualitative rule, and three quantitative scoring functions developed for evaluating herbicide-likeness. HerbiPAD (http://agroda.gzu.edu.cn:9999/ccb/database/HerbiPAD/) is a free web platform integrated with the collected database and scoring model. This platform contains 542 approved herbicides and > 29 000 physicochemical descriptors. The accuracy of HerbiPAD in distinguishing known herbicides from nonherbicides was 84.2%. In the case study, HerbiPAD evaluated 60 new compounds from seven different herbicide targets, and the accuracy of predicting better bioavailability was 83.3%.

Conclusions

HerbiPAD was designed to quickly and efficiently evaluate herbicide-likeness by integrating qualitative and quantitative analyses. The simple and effective interpretation of the analysis interface may help noncomputational experts understand herbicide-likeness. © 2020 Society of Chemical Industry.",2020-10-26 +31922426,Privacy Risks of Sharing Data from Environmental Health Studies.,"BACKGROUND:Sharing research data uses resources effectively; enables large, diverse data sets; and supports rigor and reproducibility. However, sharing such data increases privacy risks for participants who may be re-identified by linking study data to outside data sets. These risks have been investigated for genetic and medical records but rarely for environmental data. OBJECTIVES:We evaluated how data in environmental health (EH) studies may be vulnerable to linkage and we investigated, in a case study, whether environmental measurements could contribute to inferring latent categories (e.g., geographic location), which increases privacy risks. METHODS:We identified 12 prominent EH studies, reviewed the data types collected, and evaluated the availability of outside data sets that overlap with study data. With data from the Household Exposure Study in California and Massachusetts and the Green Housing Study in Boston, Massachusetts, and Cincinnati, Ohio, we used k-means clustering and principal component analysis to investigate whether participants' region of residence could be inferred from measurements of chemicals in household air and dust. RESULTS:All 12 studies included at least two of five data types that overlap with outside data sets: geographic location (9 studies), medical data (9 studies), occupation (10 studies), housing characteristics (10 studies), and genetic data (7 studies). In our cluster analysis, participants' region of residence could be inferred with 80%-98% accuracy using environmental measurements with original laboratory reporting limits. DISCUSSION:EH studies frequently include data that are vulnerable to linkage with voter lists, tax and real estate data, professional licensing lists, and ancestry websites, and exposure measurements may be used to identify subgroup membership, increasing likelihood of linkage. Thus, unsupervised sharing of EH research data potentially raises substantial privacy risks. Empirical research can help characterize risks and evaluate technical solutions. Our findings reinforce the need for legal and policy protections to shield participants from potential harms of re-identification from data sharing. https://doi.org/10.1289/EHP4817.",2020-01-10 +31124259,The impact of social media on citation rates in coloproctology.,"

Aim

This study aimed to investigate the association between Twitter exposure and the number of citations for coloproctology articles.

Method

Original articles from journals using Twitter between June 2015 and May 2016 were evaluated for the following characteristics: publishing journal; article subject; study design; nationality, speciality and affiliation of the author(s); and reference on Twitter. Citation data for these articles were retrieved from Google Scholar (https://scholar.google.com) in January 2018. We performed a univariate analysis using these data followed by a multivariate, logistic regression analysis to search for factors associated with a high citation level, which was defined as accrual of more than five citations.

Results

Out of six coloproctology journals listed on the InCites JCR database, three (Diseases of the Colon & Rectum, Colorectal Disease and Techniques in Coloproctology) used Twitter, where 200 (49.5%) out of a total of 404 articles had been featured. Citation rates of articles that featured on Twitter were significantly higher than those that did not (11.4 ± 9.2 vs 4.1 ± 3.1, P < 0.001). In multivariate analysis, Twitter exposure (OR 8.6, P = 0.001), European Union nationality (OR 2.4, P = 0.004), Colorectal Disease journal (OR 3.3, P = 0.005) and systematic review articles (OR 3.4, P = 0.009) were associated with higher citation levels.

Conclusion

Article exposure on Twitter was strongly associated with a high citation level. Medical communities should encourage journals as well as physicians to actively utilize social media to expedite the spread of new ideas and ultimately benefit medical society as a whole.",2019-06-19 +33876588,A Clinical Risk Score to Predict In-hospital Mortality from COVID-19 in South Korea.,"

Background

Early identification of patients with coronavirus disease 2019 (COVID-19) who are at high risk of mortality is of vital importance for appropriate clinical decision making and delivering optimal treatment. We aimed to develop and validate a clinical risk score for predicting mortality at the time of admission of patients hospitalized with COVID-19.

Methods

Collaborating with the Korea Centers for Disease Control and Prevention (KCDC), we established a prospective consecutive cohort of 5,628 patients with confirmed COVID-19 infection who were admitted to 120 hospitals in Korea between January 20, 2020, and April 30, 2020. The cohort was randomly divided using a 7:3 ratio into a development (n = 3,940) and validation (n = 1,688) set. Clinical information and complete blood count (CBC) detected at admission were investigated using Least Absolute Shrinkage and Selection Operator (LASSO) and logistic regression to construct a predictive risk score (COVID-Mortality Score). The discriminative power of the risk model was assessed by calculating the area under the curve (AUC) of the receiver operating characteristic curves.

Results

The incidence of mortality was 4.3% in both the development and validation set. A COVID-Mortality Score consisting of age, sex, body mass index, combined comorbidity, clinical symptoms, and CBC was developed. AUCs of the scoring system were 0.96 (95% confidence interval [CI], 0.85-0.91) and 0.97 (95% CI, 0.84-0.93) in the development and validation set, respectively. If the model was optimized for > 90% sensitivity, accuracies were 81.0% and 80.2% with sensitivities of 91.7% and 86.1% in the development and validation set, respectively. The optimized scoring system has been applied to the public online risk calculator (https://www.diseaseriskscore.com).

Conclusion

This clinically developed and validated COVID-Mortality Score, using clinical data available at the time of admission, will aid clinicians in predicting in-hospital mortality.",2021-04-19 +32379325,GeneTrail 3: advanced high-throughput enrichment analysis.,"We present GeneTrail 3, a major extension of our web service GeneTrail that offers rich functionality for the identification, analysis, and visualization of deregulated biological processes. Our web service provides a comprehensive collection of biological processes and signaling pathways for 12 model organisms that can be analyzed with a powerful framework for enrichment and network analysis of transcriptomic, miRNomic, proteomic, and genomic data sets. Moreover, GeneTrail offers novel workflows for the analysis of epigenetic marks, time series experiments, and single cell data. We demonstrate the capabilities of our web service in two case-studies, which highlight that GeneTrail is well equipped for uncovering complex molecular mechanisms. GeneTrail is freely accessible at: http://genetrail.bioinf.uni-sb.de.",2020-07-01 +27631061,"Introduction to the National Mental Health Services Survey, 2010","Background: The National Mental Health Services Survey (N-MHSS), conducted by the Substance Abuse and Mental Health Services Administration (SAMHSA), is an annual survey of all known public and private mental health treatment facilities in the United States. The survey is the only source of national and state-level data on the mental health services reported by both publicly and privately operated specialty mental health care facilities. Method: The National Mental Health Services Survey (N-MHSS) is a biennial survey designed to collect information from all facilities in the United States, both public and private, that provide mental health treatment. N-MHSS provides the mechanism for quantifying the dynamic character and composition of the United States mental health treatment delivery system. Results: The 2010 N-MHSS instrument collected information on specific facility characteristics and the number of clients in treatment on the survey reference date. It included questions on topics such as facility type, operation, primary treatment focus, management characteristics, client demographics, supportive services, mental health treatment approaches, special programs or groups, types of payment/funding accepted, languages in which treatment is provided, and a one-day client/patient census by type of service setting. Conclusion: The N-MHSS data can be used to describe the nature and scope of mental health services provided in state-funded, state-operated, and other (e.g., private for-profit, and nonprofit) mental health treatment facilities. The N-MHSS data can also be used to conduct comparative analyses and forecast future resource needs. Additionally, the N-MHSS data are used to update SAMHSA's Inventory of Behavioral Health Services, an inventory of all known mental health and substance abuse treatment facilities in the United States, and to update the information in the mental health component of SAMHSA's online Behavioral Health Treatment Services Locator (http://findtreatment.samhsa.gov/), a searchable database of licensed and accredited public and private mental health treatment facilities.",2016-09-16 +26722116,ChromothripsisDB: a curated database of chromothripsis.,"

Unlabelled

Chromothripsis is a single catastrophic event that can lead to massive genomic rearrangements confined to one or a few chromosomes. It provides an alternative paradigm in cancer development and changes the conventional view that cancer develops in a stepwise progression. The mechanisms underlying chromothripsis and their specific impact on tumorigenesis are still poorly understood, and further examination of a large number of identified chromothripsis samples is needed. Unfortunately, this data are difficult to access, as they are scattered across multiple publications, come in different formats and descriptions, or are hidden in figures and supplementary materials. To improve access to this data and promote meta-analysis, we developed ChromothripsisDB, a manually curated database containing a unified description of all published chromothripsis cases and relevant genomic aberrations. Currently, 423 chromothripsis samples representing 107 research articles are included in our database. ChromothripsisDB represents an extraordinary resource for mining the existing knowledge of chromothripsis, and will facilitate the identification of mechanisms involved in this phenomenon.

Availability and implementation

ChromothripsisDB is freely available at http://cgma.scu.edu.cn/ChromothripsisDB CONTACT: haoyang.cai@scu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-31 +33604919,Variant-set association test for generalized linear mixed model.,"Advances in high-throughput biotechnologies have culminated in a wide range of omics (such as genomics, epigenomics, transcriptomics, metabolomics, and metagenomics) studies, and increasing evidence in these studies indicates that the biological architecture of complex traits involves a large number of omics variants each with minor effects but collectively accounting for the full phenotypic variability. Thus, a major challenge in many ""ome-wide"" association analyses is to achieve adequate statistical power to identify multiple variants of small effect sizes, which is notoriously difficult for studies with relatively small-sample sizes. A small-sample adjustment incorporated in the kernel machine regression framework was proposed to solve this for association studies under various settings. However, such an adjustment in the generalized linear mixed model (GLMM) framework, which accounts for both sample relatedness and non-Gaussian outcomes, has not yet been attempted. In this study, we fill this gap by extending small-sample adjustment in kernel machine association test to GLMM. We propose a new Variant-Set Association Test (VSAT), a powerful and efficient analysis tool in GLMM, to examine the association between a set of omics variants and correlated phenotypes. The usefulness of VSAT is demonstrated using both numerical simulation studies and applications to data collected from multiple association studies. The software for implementing the proposed method in R is available at https://www.github.com/jchen1981/SSKAT.",2021-02-19 +32637482,Treatment effects on neurometabolite levels in schizophrenia: A meta-analysis dataset of proton magnetic resonance spectroscopy.,"This article describes a dataset for a meta-analysis that aimed to investigate the effects of treatment on the neurometabolite status in patients with schizophrenia (DOI of original article: https://doi.org/10.1016/j.schres.2020.03.069[1]). The data search was performed with MEDLINE, Embase, and PsycINFO. The neurometabolites investigated include glutamate, glutamine, glutamate + glutamine, gamma-aminobutyric acid, N-acetylaspartate, and myo-inositol, and the regions of interest (ROIs) include the frontal cortex, temporal cortex, parieto-occipital cortex, thalamus, basal ganglia, and hippocampus. The meta-analysis was conducted with a random-effects model, and the use of the standardized mean difference method between pre- and post-treatment of subjects for neurometabolites in each ROI of three patient groups or more. The dataset covers raw data of 39 patient groups (773 patients with schizophrenia at follow-up) with neurometabolite levels measured by magnetic resonance spectroscopy both before and after treatment. Furthermore, it contains details of clinical characteristics and treatment types for each group. Therefore, the data would be useful for a reinvestigation of treatment effects on the neurometabolite status from diverse points of view, as well as for the development of future treatment strategies for psychiatric diseases.",2020-06-16 +33972023,ModFlex: Towards Function Focused Protein Modeling.,"There is a wide, and continuously widening, gap between the number of proteins known only by their amino acid sequence versus those structurally characterized by direct experiment. To close this gap, we mostly rely on homology-based inference and modeling to reason about the structures of the uncharacterized proteins by using structures of homologous proteins as templates. With the rapidly growing size of the Protein Data Bank, there are often multiple choices of templates, including multiple sets of coordinates from the same protein. The substantial conformational differences observed between different experimental structures of the same protein often reflect function related structural flexibility. Thus, depending on the questions being asked, using distant homologs, or coordinate sets with lower resolution but solved in the appropriate functional form, as templates may be more informative. The ModFlex server (https://modflex.org/) addresses this seldom mentioned gap in the standard homology modeling approach by providing the user with an interface with multiple options and tools to select the most relevant template and explore the range of structural diversity in the available templates. ModFlex is closely integrated with a range of other programs and servers developed in our group for the analysis and visualization of protein structural flexibility and divergence.",2021-01-23 +31510687,pNovo 3: precise de novo peptide sequencing using a learning-to-rank framework.,"

Motivation

De novo peptide sequencing based on tandem mass spectrometry data is the key technology of shotgun proteomics for identifying peptides without any database and assembling unknown proteins. However, owing to the low ion coverage in tandem mass spectra, the order of certain consecutive amino acids cannot be determined if all of their supporting fragment ions are missing, which results in the low precision of de novo sequencing.

Results

In order to solve this problem, we developed pNovo 3, which used a learning-to-rank framework to distinguish similar peptide candidates for each spectrum. Three metrics for measuring the similarity between each experimental spectrum and its corresponding theoretical spectrum were used as important features, in which the theoretical spectra can be precisely predicted by the pDeep algorithm using deep learning. On seven benchmark datasets from six diverse species, pNovo 3 recalled 29-102% more correct spectra, and the precision was 11-89% higher than three other state-of-the-art de novo sequencing algorithms. Furthermore, compared with the newly developed DeepNovo, which also used the deep learning approach, pNovo 3 still identified 21-50% more spectra on the nine datasets used in the study of DeepNovo. In summary, the deep learning and learning-to-rank techniques implemented in pNovo 3 significantly improve the precision of de novo sequencing, and such machine learning framework is worth extending to other related research fields to distinguish the similar sequences.

Availability and implementation

pNovo 3 can be freely downloaded from http://pfind.ict.ac.cn/software/pNovo/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +31159881,"NG-meta-profiler: fast processing of metagenomes using NGLess, a domain-specific language.","

Background

Shotgun metagenomes contain a sample of all the genomic material in an environment, allowing for the characterization of a microbial community. In order to understand these communities, bioinformatics methods are crucial. A common first step in processing metagenomes is to compute abundance estimates of different taxonomic or functional groups from the raw sequencing data. Given the breadth of the field, computational solutions need to be flexible and extensible, enabling the combination of different tools into a larger pipeline.

Results

We present NGLess and NG-meta-profiler. NGLess is a domain specific language for describing next-generation sequence processing pipelines. It was developed with the goal of enabling user-friendly computational reproducibility. It provides built-in support for many common operations on sequencing data and is extensible with external tools with configuration files. Using this framework, we developed NG-meta-profiler, a fast profiler for metagenomes which performs sequence preprocessing, mapping to bundled databases, filtering of the mapping results, and profiling (taxonomic and functional). It is significantly faster than either MOCAT2 or htseq-count and (as it builds on NGLess) its results are perfectly reproducible.

Conclusions

NG-meta-profiler is a high-performance solution for metagenomics processing built on NGLess. It can be used as-is to execute standard analyses or serve as the starting point for customization in a perfectly reproducible fashion. NGLess and NG-meta-profiler are open source software (under the liberal MIT license) and can be downloaded from https://ngless.embl.de or installed through bioconda.",2019-06-03 +30624314,Can Machine Learning Methods Produce Accurate and Easy-to-use Prediction Models of 30-day Complications and Mortality After Knee or Hip Arthroplasty?,"

Background

Existing universal and procedure-specific surgical risk prediction models of death and major complications after elective total joint arthroplasty (TJA) have limitations including poor transparency, poor to modest accuracy, and insufficient validation to establish performance across diverse settings. Thus, the need remains for accurate and validated prediction models for use in preoperative management, informed consent, shared decision-making, and risk adjustment for reimbursement.

Questions/purposes

The purpose of this study was to use machine learning methods and large national databases to develop and validate (both internally and externally) parsimonious risk-prediction models for mortality and complications after TJA.

Methods

Preoperative demographic and clinical variables from all 107,792 nonemergent primary THAs and TKAs in the 2013 to 2014 American College of Surgeons-National Surgical Quality Improvement Program (ACS-NSQIP) were evaluated as predictors of 30-day death and major complications. The NSQIP database was chosen for its high-quality data on important outcomes and rich characterization of preoperative demographic and clinical predictors for demographically and geographically diverse patients. Least absolute shrinkage and selection operator (LASSO) regression, a type of machine learning that optimizes accuracy and parsimony, was used for model development. Tenfold validation was used to produce C-statistics, a measure of how well models discriminate patients who experience an outcome from those who do not. External validation, which evaluates the generalizability of the models to new data sources and patient groups, was accomplished using data from the Veterans Affairs Surgical Quality Improvement Program (VASQIP). Models previously developed from VASQIP data were also externally validated using NSQIP data to examine the generalizability of their performance with a different group of patients outside the VASQIP context.

Results

The models, developed using LASSO regression with diverse clinical (for example, American Society of Anesthesiologists classification, comorbidities) and demographic (for example, age, gender) inputs, had good accuracy in terms of discriminating the likelihood a patient would experience, within 30 days of arthroplasty, a renal complication (C-statistic, 0.78; 95% confidence interval [CI], 0.76-0.80), death (0.73; 95% CI, 0.70-0.76), or a cardiac complication (0.73; 95% CI, 0.71-0.75) from one who would not. By contrast, the models demonstrated poor accuracy for venous thromboembolism (C-statistic, 0.61; 95% CI, 0.60-0.62) and any complication (C-statistic, 0.64; 95% CI, 0.63-0.65). External validation of the NSQIP- derived models using VASQIP data found them to be robust in terms of predictions about mortality and cardiac complications, but not for predicting renal complications. Models previously developed with VASQIP data had poor accuracy when externally validated with NSQIP data, suggesting they should not be used outside the context of the Veterans Health Administration.

Conclusions

Moderately accurate predictive models of 30-day mortality and cardiac complications after elective primary TJA were developed as well as internally and externally validated. To our knowledge, these are the most accurate and rigorously validated TJA-specific prediction models currently available (http://med.stanford.edu/s-spire/Resources/clinical-tools-.html). Methods to improve these models, including the addition of nonstandard inputs such as natural language processing of preoperative clinical progress notes or radiographs, should be pursued as should the development and validation of models to predict longer term improvements in pain and function.

Level of evidence

Level III, diagnostic study.",2019-02-01 +33766669,Automatic external defibrillator provided by unmanned aerial vehicle (drone) in Greater Paris: A real world-based simulation.,"

Aim

To reduce the delay in defibrillation of out-of-hospital cardiac arrest (OHCA) patients, recent publications have shown that drones equipped with an automatic external defibrillator (AED) appear to be effective in sparsely populated areas. To study the effectiveness of AED-drones in high-density urban areas, we developed an algorithm based on emergency dispatch parameters for the rate and detection speed of cardiac arrests and technical and meteorological parameters.

Methods

We ran a numerical simulation to compare the actual time required by the Basic Life Support team (BLSt) for OHCA patients in Greater Paris in 2017 to the time required by an AED-drone. Endpoints were the proportion of patients with ""AED-drone first"" and the defibrillation time gained. We built an open-source website (https://airborne-aed.org/) to allow modelling by modifying one or more parameters and to help other teams model their own OHCA data.

Results

Of 3014 OHCA patients, 72.2 ± 0.7% were in the ""no drone flight"" group, 25.8 ± 0.2% in the ""AED-drone first"" group, and 2.1 ± 0.2% in the ""BLSt-drone first"" group. When a drone flight was authorized, it arrived an average 190 s before BLSt in 93% of cases. The possibility of flying the drone during the aeronautical night improved the results of the ""AED-drone first"" group the most (+60%).

Conclusions

In our very high-density urban model, at most 26% of OHCA patients received an AED from an AED-drone before BLSt. The flexible parameters of our website model allows evaluation of the impact of each choice and concrete implementation of the AED-drone.",2021-03-22 +26578589,ORegAnno 3.0: a community-driven resource for curated regulatory annotation.,"The Open Regulatory Annotation database (ORegAnno) is a resource for curated regulatory annotation. It contains information about regulatory regions, transcription factor binding sites, RNA binding sites, regulatory variants, haplotypes, and other regulatory elements. ORegAnno differentiates itself from other regulatory resources by facilitating crowd-sourced interpretation and annotation of regulatory observations from the literature and highly curated resources. It contains a comprehensive annotation scheme that aims to describe both the elements and outcomes of regulatory events. Moreover, ORegAnno assembles these disparate data sources and annotations into a single, high quality catalogue of curated regulatory information. The current release is an update of the database previously featured in the NAR Database Issue, and now contains 1 948 307 records, across 18 species, with a combined coverage of 334 215 080 bp. Complete records, annotation, and other associated data are available for browsing and download at http://www.oreganno.org/.",2015-11-17 +,First Report of Aphelenchoides besseyi Infecting the Aerial Part of Cotton Plants in Brazil,"Cotton plants (Gossypium hirsutum L.) exhibiting stunting, loss of floral buds, foliage distortion, and thickened nodes were observed in May 2017 in the county of Sapezal, Mato Grosso State, Brazil (13°32′33″S; 58°48′51″W). Samples of leaves and stems in 10-g aliquots were processed using the blender-centrifugal flotation method. Nematodes of the genus Aphelenchoides were extracted at fresh tissue densities of 41 specimens/g. Specimens were collected individually, narcotized in distilled water with light heat, and mounted in temporary slides. Males and females were examined and measured using a compound microscope (Olympus BX 53) coupled with a video image system. Morphological analysis of females indicated a slender body, a labial region slightly wider than the first body annulus, a hexa-radiated labial structure, a stylet 11 µm long, an oval metacorpus with a distinct valve, the excretory pore located at the anterior edge of the nerve ring, the lateral field marked by four incisures, an oval and long spermatheca filled with spermatozoa, a narrow postuterine sac without spermatozoa and shorter than 1/3 of the vulva-anus distance, and a conoid tail with a terminus bearing a mucro with four pointed processes. These morphological characters and those of the males matched those reported in the original description of Aphelenchoides besseyi by Christie (1942) and a revision by Fortuner (1970) and Franklin and Siddiqi (1972). Additional individual A. besseyi specimens from cotton were used for phylogenetic analysis to verify genetic congruence between the sequences of this population of A. besseyi from cotton and those published for other populations of this nematode in Brazil. The genomic DNA extraction and the sequences were obtained for the near full length 225 of the ribosomal small subunit (SSU) and the expansion segment 226 of the large subunit (D2-D3 LSU) according to the protocol of de Jesus et al. (2016). The newly obtained sequences (MH187564 and MH187565) were assembled using BioEdit version 7.2.5 (http://www.mbio.ncsu.edu/bioedit/page2.html). Guide sequences to phylogenetic analysis were from Meyer et al. (2017). Sequence data were aligned using Mega 7.0 (ClustalW algorithm). The ambiguously aligned regions from SSU and LSU alignments were removed to optimize the phylogenetic analysis. For phylogenetic reconstruction, the best fitting model of sequence evolution was determined for each data set separately using the Akaike information criterion in Mega 7.0. The model GTR+G+I (general time reversible + gamma distributed with invariant sites) was selected for both SSU and LSU. Therefore, phylogenetic trees were constructed applying the maximum likelihood method with 100 bootstrap replications. Phylogenetic analysis indicated that the sequence of the cotton population of A. besseyi grouped with those of a population infesting soybean (Favoreto and Meyer 2017; Meyer et al. 2017) and other populations included in the group rice by de Jesus et al. (2016). The ability of A. besseyi to infect cotton was confirmed in two greenhouse experiments. For these experiments, specimens of A. besseyi obtained from field-infected cotton plants were reared on fungal cultures of Fusarium sp. growing in potato dextrose agar in Petri dishes. The reared nematodes were injected at the stem base of individual healthy cotton cultivar TMG 47B2RF seedlings growing in pots at a density of 6,000 specimens per seedling. The same field symptoms were observed in the inoculated seedlings, and a high number of nematodes were recovered. These results fulfill the modified Koch’s postulates. The final nematode densities in the two experiments were 970 and 2,244 specimens per gram of fresh tissue. Field observations in Brazil also indicated that soybeans grown after or before cotton in the same nematode-infested area were damaged by the nematode as well (similar symptoms on soybean aerial tissues with population levels of 70 specimens/g). A. besseyi therefore poses a considerable threat to cotton-soybean cropping systems in Mato Grosso State in Brazil.",2018-12-01 +32421805,IRIS3: integrated cell-type-specific regulon inference server from single-cell RNA-Seq.,"A group of genes controlled as a unit, usually by the same repressor or activator gene, is known as a regulon. The ability to identify active regulons within a specific cell type, i.e., cell-type-specific regulons (CTSR), provides an extraordinary opportunity to pinpoint crucial regulators and target genes responsible for complex diseases. However, the identification of CTSRs from single-cell RNA-Seq (scRNA-Seq) data is computationally challenging. We introduce IRIS3, the first-of-its-kind web server for CTSR inference from scRNA-Seq data for human and mouse. IRIS3 is an easy-to-use server empowered by over 20 functionalities to support comprehensive interpretations and graphical visualizations of identified CTSRs. CTSR data can be used to reliably characterize and distinguish the corresponding cell type from others and can be combined with other computational or experimental analyses for biomedical studies. CTSRs can, therefore, aid in the discovery of major regulatory mechanisms and allow reliable constructions of global transcriptional regulation networks encoded in a specific cell type. The broader impact of IRIS3 includes, but is not limited to, investigation of complex diseases hierarchies and heterogeneity, causal gene regulatory network construction, and drug development. IRIS3 is freely accessible from https://bmbl.bmi.osumc.edu/iris3/ with no login requirement.",2020-07-01 +32286627,rMAPS2: an update of the RNA map analysis and plotting server for alternative splicing regulation.,"The rMAPS2 (RNA Map Analysis and Plotting Server 2) web server, freely available at http://rmaps.cecsresearch.org/, has provided the high-throughput sequencing data research community with curated tools for the identification of RNA binding protein sites. rMAPS2 analyzes differential alternative splicing or CLIP peak data obtained from high-throughput sequencing data analysis tools like MISO, rMATS, Piranha, PIPE-CLIP and PARalyzer, and then, graphically displays enriched RNA-binding protein target sites. The initial release of rMAPS focused only on the most common alternative splicing event, skipped exon or exon skipping. However, there was a high demand for the analysis of other major types of alternative splicing events, especially for retained intron events since this is the most common type of alternative splicing in plants, such as Arabidopsis thaliana. Here, we expanded the implementation of rMAPS2 to facilitate analyses for all five major types of alternative splicing events: skipped exon, mutually exclusive exons, alternative 5' splice site, alternative 3' splice site and retained intron. In addition, by employing multi-threading, rMAPS2 has vastly improved the user experience with significant reductions in running time, ∼3.5 min for the analysis of all five major alternative splicing types at once.",2020-07-01 +30231853,AgriSeqDB: an online RNA-Seq database for functional studies of agriculturally relevant plant species.,"

Background

The genome-wide expression profile of genes in different tissues/cell types and developmental stages is a vital component of many functional genomic studies. Transcriptome data obtained by RNA-sequencing (RNA-Seq) is often deposited in public databases that are made available via data portals. Data visualization is one of the first steps in assessment and hypothesis generation. However, these databases do not typically include visualization tools and establishing one is not trivial for users who are not computational experts. This, as well as the various formats in which data is commonly deposited, makes the processes of data access, sharing and utility more difficult. Our goal was to provide a simple and user-friendly repository that meets these needs for data-sets from major agricultural crops.

Description

AgriSeqDB ( https://expression.latrobe.edu.au/agriseqdb ) is a database for viewing, analysing and interpreting developmental and tissue/cell-specific transcriptome data from several species, including major agricultural crops such as wheat, rice, maize, barley and tomato. The disparate manner in which public transcriptome data is often warehoused and the challenge of visualizing raw data are both major hurdles to data reuse. The popular eFP browser does an excellent job of presenting transcriptome data in an easily interpretable view, but previous implementation has been mostly on a case-by-case basis. Here we present an integrated visualisation database of transcriptome data-sets from six species that did not previously have public-facing visualisations. We combine the eFP browser, for gene-by-gene investigation, with the Degust browser, which enables visualisation of all transcripts across multiple samples. The two visualisation interfaces launch from the same point, enabling users to easily switch between analysis modes. The tools allow users, even those without bioinformatics expertise, to mine into data-sets and understand the behaviour of transcripts of interest across samples and time. We have also incorporated an additional graphic download option to simplify incorporation into presentations or publications.

Conclusion

Powered by eFP and Degust browsers, AgriSeqDB is a quick and easy-to-use platform for data analysis and visualization in five crops and Arabidopsis. Furthermore, it provides a tool that makes it easy for researchers to share their data-sets, promoting research collaborations and data-set reuse.",2018-09-19 +29075431,The GapMap project: a mobile surveillance system to map diagnosed autism cases and gaps in autism services globally.,"Although the number of autism diagnoses is on the rise, we have no evidence-based tracking of size and severity of gaps in access to autism-related resources, nor do we have methods to geographically triangulate the locations of the widest gaps in either the US or elsewhere across the globe. To combat these related issues of (1) mapping diagnosed cases of autism and (2) quantifying gaps in access to key intervention services, we have constructed a crowd-based mobile platform called ""GapMap"" (http://gapmap.stanford.edu) for real-time tracking of autism prevalence and autism-related resources that can be accessed from any mobile device with cellular or wireless connectivity. Now in beta, our aim is for this Android/iOS compatible mobile tool to simultaneously crowd-enroll the massive and growing community of families with autism to capture geographic, diagnostic, and resource usage information while automatically computing prevalence at granular geographical scales to yield a more complete and dynamic understanding of autism resource epidemiology.",2017-10-23 +35186019,Full Chromosomal Relationships Between Populations and the Origin of Humans.,"A comprehensive description of human genomes is essential for understanding human evolution and relationships between modern populations. However, most published literature focuses on local alignment comparison of several genes rather than the complete evolutionary record of individual genomes. Combining with data from the 1,000 Genomes Project, we successfully reconstructed 2,504 individual genomes and propose Divided Natural Vector method to analyze the distribution of nucleotides in the genomes. Comparisons based on autosomes, sex chromosomes and mitochondrial genomes reveal the genetic relationships between populations, and different inheritance pattern leads to different phylogenetic results. Results based on mitochondrial genomes confirm the ""out-of-Africa"" hypothesis and assert that humans, at least females, most likely originated in eastern Africa. The reconstructed genomes are stored on our server and can be further used for any genome-scale analysis of humans (http://yaulab.math.tsinghua.edu.cn/2022_1000genomesprojectdata/). This project provides the complete genomes of thousands of individuals and lays the groundwork for genome-level analyses of the genetic relationships between populations and the origin of humans.",2021-01-01 +33436519,Caught between Two Genes: Accounting for Operonic Gene Structure Improves Prokaryotic RNA Sequencing Quantification. ,"RNA sequencing (RNA-seq) has matured into a reliable and low-cost assay for transcriptome profiling and has been deployed across a range of systems. The computational tool space for the analysis of RNA-seq data has kept pace with advances in sequencing. Yet tool development has largely centered around the human transcriptome. While eukaryotic and prokaryotic transcriptomes are similar, key differences in transcribed units limit the transfer of wet-lab and computational tools between the two domains. The article by M. Chung, R. S. Adkins, J. S. A. Mattick, K. R. Bradwell, et al. (mSystems 6:e00917-20, 2021, https://doi.org/10.1128/mSystems.00917-20), demonstrates that integrating prokaryote-specific strategies into existing RNA-seq analyses improves read quantification. Unlike in eukaryotes, polycistronic transcripts derived from operons lead to sequencing reads that span multiple neighboring genes. Chung et al. introduce FADU, a software tool that performs a correction for such reads and thereby improves read quantification and biological interpretation of prokaryotic RNA sequencing.",2021-01-12 +33393468,Carbapenem Antibiotics for the Empiric Treatment of Nosocomial Pneumonia: A Systematic Review and Meta-analysis.,"

Background

Previous meta-analyses suggested that treating hospital-acquired pneumonia (HAP), including ventilator-associated pneumonia (VAP), with empiric carbapenems was associated with lower mortality rates but higher rates of clinical failure for pseudomonal pneumonia. This study was an updated meta-analysis with sensitivity analyses and meta-regression to better understand the impact of carbapenem use in HAP/VAP.

Research question

What is the efficacy of carbapenems for empiric treatment of nosocomial pneumonia?

Study design and methods

Databases were searched for randomized controlled studies evaluating empiric treatment for HAP and/or VAP, and studies were included comparing carbapenem- vs non-carbapenem-containing regimens. The primary outcome was all-cause mortality. Secondary outcomes included subgroup stratification and resistance development.

Results

Of 9,140 references, 20 trials enrolling 5,489 patients met inclusion criteria. For mortality, carbapenem use had a risk ratio (RR) of 0.84 (95% CI, 0.74-0.96; P = .01). Stratified according to VAP proportion (< 33%, 33%-66%, and > 66%), RRs were 0.95 (95% CI, 0.77-1.17; P = .66), 0.78 (95% CI, 0.57-1.07; P = .13), and 0.81 (95% CI, 0.65-0.99; P = .04), respectively. Stratified according to severity, only groups with Acute Physiology and Chronic Health Evaluation II scores < 14 and between 14 and 17 showed mortality benefit (RRs of 0.64 [95% CI, 0.45-0.92; P = .01] and 0.77 [95% CI, 0.61-0.97; P = .03]). Meta-regression did not show an association between Pseudomonas prevalence and mortality (P = .44). Carbapenem use showed a trend toward developing resistance (RR, 1.40; 95% CI, 0.95-2.06; P = .09) and a 96% probability of resistance emergence.

Interpretation

Carbapenem-based empiric regimens were associated with lower mortality rates compared with non-carbapenems, largely driven by trials of VAP. The mortality effect was not observed in trials with high disease severity and was not associated with Pseudomonas. The mortality difference was observed mainly in studies that used ceftazidime as control. There was a trend toward increasing resistance associated with carbapenems.

Trial registry

International Prospective Register of Systematic Reviews; No. CRD42018093602; URL: https://www.crd.york.ac.uk/prospero/.",2020-10-23 +33097024,LUMINOUS database: lumbar multifidus muscle segmentation from ultrasound images.,"

Background

Among the paraspinal muscles, the structure and function of the lumbar multifidus (LM) has become of great interest to researchers and clinicians involved in lower back pain and muscle rehabilitation. Ultrasound (US) imaging of the LM muscle is a useful clinical tool which can be used in the assessment of muscle morphology and function. US is widely used due to its portability, cost-effectiveness, and ease-of-use. In order to assess muscle function, quantitative information of the LM must be extracted from the US image by means of manual segmentation. However, manual segmentation requires a higher level of training and experience and is characterized by a level of difficulty and subjectivity associated with image interpretation. Thus, the development of automated segmentation methods is warranted and would strongly benefit clinicians and researchers. The aim of this study is to provide a database which will contribute to the development of automated segmentation algorithms of the LM.

Construction and content

This database provides the US ground truth of the left and right LM muscles at the L5 level (in prone and standing positions) of 109 young athletic adults involved in Concordia University's varsity teams. The LUMINOUS database contains the US images with their corresponding manually segmented binary masks, serving as the ground truth. The purpose of the database is to enable development and validation of deep learning algorithms used for automatic segmentation tasks related to the assessment of the LM cross-sectional area (CSA) and echo intensity (EI). The LUMINOUS database is publicly available at http://data.sonography.ai .

Conclusion

The development of automated segmentation algorithms based on this database will promote the standardization of LM measurements and facilitate comparison among studies. Moreover, it can accelerate the clinical implementation of quantitative muscle assessment in clinical and research settings.",2020-10-23 +31034195,Machine Learning the Voltage of Electrode Materials in Metal-Ion Batteries.,"Machine-learning (ML) techniques have rapidly found applications in many domains of materials chemistry and physics where large data sets are available. Aiming to accelerate the discovery of materials for battery applications, in this work, we develop a tool ( http://se.cmich.edu/batteries ) based on ML models to predict voltages of electrode materials for metal-ion batteries. To this end, we use deep neural network, support vector machine, and kernel ridge regression as ML algorithms in combination with data taken from the Materials Project database, as well as feature vectors from properties of chemical compounds and elemental properties of their constituents. We show that our ML models have predictive capabilities for different reference test sets and, as an example, we utilize them to generate a voltage profile diagram and compare it to density functional theory calculations. In addition, using our models, we propose nearly 5000 candidate electrode materials for Na- and K-ion batteries. We also make available a web-accessible tool that, within a minute, can be used to estimate the voltage of any bulk electrode material for a number of metal ions. These results show that ML is a promising alternative for computationally demanding calculations as a first screening tool of novel materials for battery applications.",2019-05-07 +31111484,The International Federation of Gynecology and Obstetrics (FIGO) initiative on pre-eclampsia: A pragmatic guide for first-trimester screening and prevention.,"Pre‐eclampsia (PE) is a multisystem disorder that typically affects 2%–5% of pregnant women and is one of the leading causes of maternal and perinatal morbidity and mortality, especially when the condition is of early onset. Globally, 76 000 women and 500 000 babies die each year from this disorder. Furthermore, women in low‐resource countries are at a higher risk of developing PE compared with those in high‐resource countries. + +Although a complete understanding of the pathogenesis of PE remains unclear, the current theory suggests a two‐stage process. The first stage is caused by shallow invasion of the trophoblast, resulting in inadequate remodeling of the spiral arteries. This is presumed to lead to the second stage, which involves the maternal response to endothelial dysfunction and imbalance between angiogenic and antiangiogenic factors, resulting in the clinical features of the disorder. + +Accurate prediction and uniform prevention continue to elude us. The quest to effectively predict PE in the first trimester of pregnancy is fueled by the desire to identify women who are at high risk of developing PE, so that necessary measures can be initiated early enough to improve placentation and thus prevent or at least reduce the frequency of its occurrence. Furthermore, identification of an “at risk” group will allow tailored prenatal surveillance to anticipate and recognize the onset of the clinical syndrome and manage it promptly. + +PE has been previously defined as the onset of hypertension accompanied by significant proteinuria after 20 weeks of gestation. Recently, the definition of PE has been broadened. Now the internationally agreed definition of PE is the one proposed by the International Society for the Study of Hypertension in Pregnancy (ISSHP). + +According to the ISSHP, PE is defined as systolic blood pressure at ≥140 mm Hg and/or diastolic blood pressure at ≥90 mm Hg on at least two occasions measured 4 hours apart in previously normotensive women and is accompanied by one or more of the following new‐onset conditions at or after 20 weeks of gestation: +1.Proteinuria (i.e. ≥30 mg/mol protein:creatinine ratio; ≥300 mg/24 hour; or ≥2 + dipstick); +2.Evidence of other maternal organ dysfunction, including: acute kidney injury (creatinine ≥90 μmol/L; 1 mg/dL); liver involvement (elevated transaminases, e.g. alanine aminotransferase or aspartate aminotransferase >40 IU/L) with or without right upper quadrant or epigastric abdominal pain; neurological complications (e.g. eclampsia, altered mental status, blindness, stroke, clonus, severe headaches, and persistent visual scotomata); or hematological complications (thrombocytopenia–platelet count <150 000/μL, disseminated intravascular coagulation, hemolysis); or +3.Uteroplacental dysfunction (such as fetal growth restriction, abnormal umbilical artery Doppler waveform analysis, or stillbirth). + + +It is well established that a number of maternal risk factors are associated with the development of PE: advanced maternal age; nulliparity; previous history of PE; short and long interpregnancy interval; use of assisted reproductive technologies; family history of PE; obesity; Afro‐Caribbean and South Asian racial origin; co‐morbid medical conditions including hyperglycemia in pregnancy; pre‐existing chronic hypertension; renal disease; and autoimmune diseases, such as systemic lupus erythematosus and antiphospholipid syndrome. These risk factors have been described by various professional organizations for the identification of women at risk of PE; however, this approach to screening is inadequate for effective prediction of PE. + +PE can be subclassified into: +1.Early‐onset PE (with delivery at <34+0 weeks of gestation); +2.Preterm PE (with delivery at <37+0 weeks of gestation); +3.Late‐onset PE (with delivery at ≥34+0 weeks of gestation); +4.Term PE (with delivery at ≥37+0 weeks of gestation). + + +These subclassifications are not mutually exclusive. Early‐onset PE is associated with a much higher risk of short‐ and long‐term maternal and perinatal morbidity and mortality. + +Obstetricians managing women with preterm PE are faced with the challenge of balancing the need to achieve fetal maturation in utero with the risks to the mother and fetus of continuing the pregnancy longer. These risks include progression to eclampsia, development of placental abruption and HELLP (hemolysis, elevated liver enzyme, low platelet) syndrome. On the other hand, preterm delivery is associated with higher infant mortality rates and increased morbidity resulting from small for gestational age (SGA), thrombocytopenia, bronchopulmonary dysplasia, cerebral palsy, and an increased risk of various chronic diseases in adult life, particularly type 2 diabetes, cardiovascular disease, and obesity. Women who have experienced PE may also face additional health problems in later life, as the condition is associated with an increased risk of death from future cardiovascular disease, hypertension, stroke, renal impairment, metabolic syndrome, and diabetes. The life expectancy of women who developed preterm PE is reduced on average by 10 years. There is also significant impact on the infants in the long term, such as increased risks of insulin resistance, diabetes mellitus, coronary artery disease, and hypertension in infants born to pre‐eclamptic women. + +The International Federation of Gynecology and Obstetrics (FIGO) brought together international experts to discuss and evaluate current knowledge on PE and develop a document to frame the issues and suggest key actions to address the health burden posed by PE. + +FIGO's objectives, as outlined in this document, are: (1) To raise awareness of the links between PE and poor maternal and perinatal outcomes, as well as to the future health risks to mother and offspring, and demand a clearly defined global health agenda to tackle this issue; and (2) To create a consensus document that provides guidance for the first‐trimester screening and prevention of preterm PE, and to disseminate and encourage its use. + +Based on high‐quality evidence, the document outlines current global standards for the first‐trimester screening and prevention of preterm PE, which is in line with FIGO good clinical practice advice on first trimester screening and prevention of pre‐eclampsia in singleton pregnancy.1 + +It provides both the best and the most pragmatic recommendations according to the level of acceptability, feasibility, and ease of implementation that have the potential to produce the most significant impact in different resource settings. Suggestions are provided for a variety of different regional and resource settings based on their financial, human, and infrastructure resources, as well as for research priorities to bridge the current knowledge and evidence gap. + +To deal with the issue of PE, FIGO recommends the following: + +Public health focus: There should be greater international attention given to PE and to the links between maternal health and noncommunicable diseases (NCDs) on the Sustainable Developmental Goals agenda. Public health measures to increase awareness, access, affordability, and acceptance of preconception counselling, and prenatal and postnatal services for women of reproductive age should be prioritized. Greater efforts are required to raise awareness of the benefits of early prenatal visits targeted at reproductive‐aged women, particularly in low‐resource countries. + +Universal screening: All pregnant women should be screened for preterm PE during early pregnancy by the first‐trimester combined test with maternal risk factors and biomarkers as a one‐step procedure. The risk calculator is available free of charge at https://fetalmedicine.org/research/assess/preeclampsia. FIGO encourages all countries and its member associations to adopt and promote strategies to ensure this. The best combined test is one that includes maternal risk factors, measurements of mean arterial pressure (MAP), serum placental growth factor (PLGF), and uterine artery pulsatility index (UTPI). Where it is not possible to measure PLGF and/or UTPI, the baseline screening test should be a combination of maternal risk factors with MAP, and not maternal risk factors alone. If maternal serum pregnancy‐associated plasma protein A (PAPP‐A) is measured for routine first‐trimester screening for fetal aneuploidies, the result can be included for PE risk assessment. Variations to the full combined test would lead to a reduction in the performance screening. A woman is considered high risk when the risk is 1 in 100 or more based on the first‐trimester combined test with maternal risk factors, MAP, PLGF, and UTPI. + +Contingent screening: Where resources are limited, routine screening for preterm PE by maternal factors and MAP in all pregnancies and reserving measurements of PLGF and UTPI for a subgroup of the population (selected on the basis of the risk derived from screening by maternal factors and MAP) can be considered. + +Prophylactic measures: Following first‐trimester screening for preterm PE, women identified at high risk should receive aspirin prophylaxis commencing at 11–14+6 weeks of gestation at a dose of ~150 mg to be taken every night until 36 weeks of gestation, when delivery occurs, or when PE is diagnosed. Low‐dose aspirin should not be prescribed to all pregnant women. In women with low calcium intake (<800 mg/d), either calcium replacement (≤1 g elemental calcium/d) or calcium supplementation (1.5–2 g elemental calcium/d) may reduce the burden of both early‐ and late‐onset PE.",2019-05-01 +31802127,Reactome and ORCID-fine-grained credit attribution for community curation. ,"Reactome is a manually curated, open-source, open-data knowledge base of biomolecular pathways. Reactome has always provided clear credit attribution for authors, curators and reviewers through fine-grained annotation of all three roles at the reaction and pathway level. These data are visible in the web interface and provided through the various data download formats. To enhance visibility and credit attribution for the work of authors, curators and reviewers, and to provide additional opportunities for Reactome community engagement, we have implemented key changes to Reactome: contributor names are now fully searchable in the web interface, and contributors can 'claim' their contributions to their ORCID profile with a few clicks. In addition, we are reaching out to domain experts to request their help in reviewing and editing Reactome pathways through a new 'Contribution' section, highlighting pathways which are awaiting community review. Database URL: https://reactome.org.",2019-01-01 +33186790,PCLiON: An Ontology for Data Standardization and Sharing of Prostate Cancer Associated Lifestyles.,"

Background

Researches on Lifestyle medicine (LM) have emerged in recent years to garner wide attention. Prostate cancer (PCa) could be prevented and treated by positive lifestyles, but the association between lifestyles and PCa is always personalized.

Objectives

In order to solve the heterogeneity and diversity of different data types related to PCa, establish a standardized lifestyle ontology, promote the exchange and sharing of disease lifestyle knowledge, and support text mining and knowledge discovery.

Methods

The overall construction of PCLiON was created in accordance with the principles and methodology of ontology construction. Following the principles of evidence-based medicine, we screened and integrated the lifestyles and their related attributes. Protégé was used to construct and validate the semantic framework. All annotations in PCLiON were based on SNOMED CT, NCI Thesaurus, the Cochrane Library and FooDB, etc. HTML5 and ASP.NET was used to develop the independent Web page platform and corresponding intelligent terminal application. The PCLiON also uploaded to the National Center for Biomedical Ontology BioPortal.

Results

PCLiON integrates 397 lifestyles and lifestyle-related factors associated with PCa, and is the first of its kind for a specific disease. It contains 320 attribute annotations and 11 object attributes. The logical relationship and completeness meet the ontology requirements. Qualitative analysis was carried out for 329 terms in PCLiON, including factors which are protective, risk or associated but functional unclear, etc. PCLiON is publicly available both at http://pcaontology.net/PCaLifeStyleDefault.aspx and https://bioportal.bioontology.org/ontologies/PCALION.

Conclusions

Through the bilingual online platforms, complex lifestyle research data can be transformed into standardized, reliable and responsive knowledge, which can promote the shared-decision making (SDM) on lifestyle intervention and assist patients in lifestyle self-management toward the goal of PCa targeted prevention.",2020-11-07 +28975713,PepSweetener: A Web-Based Tool to Support Manual Annotation of Intact Glycopeptide MS Spectra.,"

Purpose

PepSweetener is a web-based visualization tool designed to facilitate the manual annotation of intact glycopeptides from MS data regardless of the instrument that produced these data.

Experimental design

This exploratory tool uses a theoretical glycopeptide dataset to visualize all peptide-glycan combinations that fall within the error range of the query precursor ion. PepSweetener simplifies the determination of the correct peptide and glycan composition of a glycopeptide based on its precursor mass. The theoretical glycopeptide search space can be customized in an advanced query mode that specifies potential proteins/peptides, glycan compositions, and several experimental parameters.

Results

PepSweetener displays the results on an interactive heat-map chart where theoretical glycopeptide tile colors correspond to ppm deviations from the query precursor mass. Additionally, a visualization chart incorporates glycan composition filtering, sorting by mass and tolerance, and an in silico peptide fragmentation diagram is provided to further support the correct glycopeptide identification.

Conclusions and clinical relevance

PepSweetener efficiently allows the selection of the most probable intact glycopeptide mass matches and speeds up the verification process. It is validated on serum protein samples and immunoglobulins. The tool is publicly hosted on ExPASy, the SIB Swiss Institute of Bioinformatics resource portal (http://glycoproteome.expasy.org/pepsweetener/app/).",2017-10-30 +31524988,Community Curation and Expert Curation of Human Long Noncoding RNAs with LncRNAWiki and LncBook.,"In recent years, the number of human long noncoding RNAs (lncRNAs) that have been identified has increased exponentially. However, these lncRNAs are poorly annotated compared to protein-coding genes, posing great challenges for a better understanding of their functional significance and elucidating their complex functioning molecular mechanisms. Here we employ both community and expert curation to yield a comprehensive collection of human lncRNAs and their annotations. Specifically, LncRNAWiki (http://lncrna.big.ac.cn/index.php/Main_Page) uses a wiki-based community curation model, thus showing great promise in dealing with the flood of biological knowledge, while LncBook (http://bigd.big.ac.cn/lncbook) is an expert curation-based database that provides a complement to LncRNAWiki. LncBook features a comprehensive collection of human lncRNAs and a systematic curation of lncRNAs by multi-omics data integration, functional annotation, and disease association. These protocols provide step-by-step instructions on how to browse and search a specific lncRNA and how to obtain a range of related information including expression, methylation, variation, function, and disease association. © 2019 by John Wiley & Sons, Inc.",2019-09-01 +31755900,NeoFuse: predicting fusion neoantigens from RNA sequencing data.,"

Summary

Gene fusions can generate immunogenic neoantigens that mediate anticancer immune responses. However, their computational prediction from RNA sequencing (RNA-seq) data requires deep bioinformatics expertise to assembly a computational workflow covering the prediction of: fusion transcripts, their translated proteins and peptides, Human Leukocyte Antigen (HLA) types, and peptide-HLA binding affinity. Here, we present NeoFuse, a computational pipeline for the prediction of fusion neoantigens from tumor RNA-seq data. NeoFuse can be applied to cancer patients' RNA-seq data to identify fusion neoantigens that might expand the repertoire of suitable targets for immunotherapy.

Availability and implementation

NeoFuse source code and documentation are available under GPLv3 license at https://icbi.i-med.ac.at/NeoFuse/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +30289528,IMG/M v.5.0: an integrated data management and comparative analysis system for microbial genomes and microbiomes.,"The Integrated Microbial Genomes & Microbiomes system v.5.0 (IMG/M: https://img.jgi.doe.gov/m/) contains annotated datasets categorized into: archaea, bacteria, eukarya, plasmids, viruses, genome fragments, metagenomes, cell enrichments, single particle sorts, and metatranscriptomes. Source datasets include those generated by the DOE's Joint Genome Institute (JGI), submitted by external scientists, or collected from public sequence data archives such as NCBI. All submissions are typically processed through the IMG annotation pipeline and then loaded into the IMG data warehouse. IMG's web user interface provides a variety of analytical and visualization tools for comparative analysis of isolate genomes and metagenomes in IMG. IMG/M allows open access to all public genomes in the IMG data warehouse, while its expert review (ER) system (IMG/MER: https://img.jgi.doe.gov/mer/) allows registered users to access their private genomes and to store their private datasets in workspace for sharing and for further analysis. IMG/M data content has grown by 60% since the last report published in the 2017 NAR Database Issue. IMG/M v.5.0 has a new and more powerful genome search feature, new statistical tools, and supports metagenome binning.",2019-01-01 +34412170,Pharmacogenetics of common SNP affecting drug metabolizing enzymes: comparison of allele frequencies between European and Malaysian/Singaporean.,"Compared to Europe, data on genetic variation in genes transcribing drug metabolizing enzymes among Asian is limited due to ethnic diversity. Here we compare frequencies for clinically relevant single nucleotide polymorphism (SNP) commonly observed in drug metabolizing enzymes between European and Malaysian/Singaporean. Minor allele frequencies (MAF) for the indicated SNPs for European, South Asian and East Asian populations were obtained from the NCBI website (https://www.ncbi.nlm.nih.gov/snp). The SNP prevalence among Malaysian/Singaporean was characterized from gene association studies. Generally, some SNPs in CYP2D6 and CYP2C19 do not show good agreement between the two populations as to the MAF value obtained. CYP2D6*4 tends to be more common among European, whereas CYP2D6*10 is more common in Malays and Chinese among Singaporean. Regardless of different phenotype, MAF of CYP2D6*4 for Indians is similar to that seen by the European. Singaporeans show smaller MAF for CYP2C19*17 but higher CYP2C19*2 frequencies as opposed to European ones. Following growing attention to the contribution of CYP3A4/5, N-acetyltransferases (NAT2), thiopurine methyltransferase (TPMT) and uridine diphosphate glucuronosyltransferases (UGT)2B7 in predicting drug response across Europe, there are limited pharmacogenetics (PGx) studies examining the gene-drug interaction among Malaysian/Singaporean. To better understand the heterogeneity of the drug response, PGx studies for the abovementioned enzymes between ethnics in Malaysian/Singaporean should be identified.",2021-03-19 +33031371,Real-time tracking of Tomato brown rugose fruit virus (ToBRFV) outbreaks in the Netherlands using Nextstrain.,"Tomato brown rugose fruit virus (ToBRFV) is a Tobamovirus that was first observed in 2014 and 2015 on tomato plants in Israel and Jordan respectively. Since the first description, the virus has been reported from all continents except Oceania and Antarctica, and has been found infecting both tomato and pepper crops. In October 2019, the Dutch National Plant Protection Organization received a ToBRFV infected tomato sample as part of a generic survey targeting tomato pests. Presence of the virus was verified using Illumina sequencing. A follow-up survey was initiated to determine the extent of ToBRFV presence in the Dutch tomato horticulture and identify possible linkages between ToBRFV genotypes, companies and epidemiological traits. Nextstrain was used to visualize these potential connections. By November 2019, 68 companies had been visited of which 17 companies were found to be infected. The 50 ToBRFV genomes from these outbreak locations group in three main clusters, which are hypothesized to represent three original sources. No correlation was found between genotypes, companies and epidemiological traits, and the source(s) of the Dutch ToBRFV outbreak remain unknown. This paper describes a Nextstrain build containing ToBRFV genomes up to and including November 2019. Sharing data with this interactive online tool will enable the plant virology field to better understand and communicate the diversity and spread of this new virus. Organizations are invited to share data or materials for inclusion in the Nextstrain build, which can be accessed at https://nextstrain.nrcnvwa.nl/ToBRFV/20191231.",2020-10-08 +33087719,Density functional theory-based electric field gradient database.,"The deviation of the electron density around the nuclei from spherical symmetry determines the electric field gradient (EFG), which can be measured by various types of spectroscopy. Nuclear Quadrupole Resonance (NQR) is particularly sensitive to the EFG. The EFGs, and by implication NQR frequencies, vary dramatically across materials. Consequently, searching for NQR spectral lines in previously uninvestigated materials represents a major challenge. Calculated EFGs can significantly aid at the search's inception. To facilitate this task, we have applied high-throughput density functional theory calculations to predict EFGs for 15187 materials in the JARVIS-DFT database. This database, which will include EFG as a standard entry, is continuously increasing. Given the large scope of the database, it is impractical to verify each calculation. However, we assess accuracy by singling out cases for which reliable experimental information is readily available and compare them to the calculations. We further present a statistical analysis of the results. The database and tools associated with our work are made publicly available by JARVIS-DFT ( https://www.ctcms.nist.gov/~knc6/JVASP.html ) and NIST-JARVIS API ( http://jarvis.nist.gov/ ).",2020-10-21 +31455604,"Single cell transcriptomic landscapes of pattern formation, proliferation and growth in Drosophila wing imaginal discs. ","Organ formation relies on the orchestration of pattern formation, proliferation and growth during development. How these processes are integrated at the individual cell level remains unclear. In the past decades, studies using Drosophila wing imaginal discs as a model system have provided valuable insights into pattern formation, growth control and regeneration. Here, we provide single cell transcriptomic landscapes of pattern formation, proliferation and growth of wing imaginal discs. We found that patterning information is robustly maintained in the single cell transcriptomic data and can provide reference matrices for computationally mapping single cells into discrete spatial domains. Assignment of wing disc single cells to spatial subregions facilitates examination of patterning refinement processes. We also clustered single cells into different proliferation and growth states and evaluated the correlation between cell proliferation/growth states and spatial patterning. Furthermore, single cell transcriptomic analyses allowed us to quantitatively examine disturbances of differentiation, proliferation and growth in a well-established tumor model. We provide a database to explore these datasets at http://drosophilayanlab-virtual-wingdisc.ust.hk:3838/v2/This article has an associated 'The people behind the papers' interview.",2019-09-20 +33810805,Fast lightweight accurate xenograft sorting.,"

Motivation

With an increasing number of patient-derived xenograft (PDX) models being created and subsequently sequenced to study tumor heterogeneity and to guide therapy decisions, there is a similarly increasing need for methods to separate reads originating from the graft (human) tumor and reads originating from the host species' (mouse) surrounding tissue. Two kinds of methods are in use: On the one hand, alignment-based tools require that reads are mapped and aligned (by an external mapper/aligner) to the host and graft genomes separately first; the tool itself then processes the resulting alignments and quality metrics (typically BAM files) to assign each read or read pair. On the other hand, alignment-free tools work directly on the raw read data (typically FASTQ files). Recent studies compare different approaches and tools, with varying results.

Results

We show that alignment-free methods for xenograft sorting are superior concerning CPU time usage and equivalent in accuracy. We improve upon the state of the art sorting by presenting a fast lightweight approach based on three-way bucketed quotiented Cuckoo hashing. Our hash table requires memory comparable to an FM index typically used for read alignment and less than other alignment-free approaches. It allows extremely fast lookups and uses less CPU time than other alignment-free methods and alignment-based methods at similar accuracy. Several engineering steps (e.g., shortcuts for unsuccessful lookups, software prefetching) improve the performance even further.

Availability

Our software xengsort is available under the MIT license at http://gitlab.com/genomeinformatics/xengsort . It is written in numba-compiled Python and comes with sample Snakemake workflows for hash table construction and dataset processing.",2021-04-02 +28732212,UALCAN: A Portal for Facilitating Tumor Subgroup Gene Expression and Survival Analyses.,"Genomics data from The Cancer Genome Atlas (TCGA) project has led to the comprehensive molecular characterization of multiple cancer types. The large sample numbers in TCGA offer an excellent opportunity to address questions associated with tumo heterogeneity. Exploration of the data by cancer researchers and clinicians is imperative to unearth novel therapeutic/diagnostic biomarkers. Various computational tools have been developed to aid researchers in carrying out specific TCGA data analyses; however there is need for resources to facilitate the study of gene expression variations and survival associations across tumors. Here, we report UALCAN, an easy to use, interactive web-portal to perform to in-depth analyses of TCGA gene expression data. UALCAN uses TCGA level 3 RNA-seq and clinical data from 31 cancer types. The portal's user-friendly features allow to perform: 1) analyze relative expression of a query gene(s) across tumor and normal samples, as well as in various tumor sub-groups based on individual cancer stages, tumor grade, race, body weight or other clinicopathologic features, 2) estimate the effect of gene expression level and clinicopathologic features on patient survival; and 3) identify the top over- and under-expressed (up and down-regulated) genes in individual cancer types. This resource serves as a platform for in silico validation of target genes and for identifying tumor sub-group specific candidate biomarkers. Thus, UALCAN web-portal could be extremely helpful in accelerating cancer research. UALCAN is publicly available at http://ualcan.path.uab.edu.",2017-07-18 +27076334,The Web-Based DNA Vaccine Database DNAVaxDB and Its Usage for Rational DNA Vaccine Design.,"A DNA vaccine is a vaccine that uses a mammalian expression vector to express one or more protein antigens and is administered in vivo to induce an adaptive immune response. Since the 1990s, a significant amount of research has been performed on DNA vaccines and the mechanisms behind them. To meet the needs of the DNA vaccine research community, we created DNAVaxDB ( http://www.violinet.org/dnavaxdb ), the first Web-based database and analysis resource of experimentally verified DNA vaccines. All the data in DNAVaxDB, which includes plasmids, antigens, vaccines, and sources, is manually curated and experimentally verified. This chapter goes over the detail of DNAVaxDB system and shows how the DNA vaccine database, combined with the Vaxign vaccine design tool, can be used for rational design of a DNA vaccine against a pathogen, such as Mycobacterium bovis.",2016-01-01 +33372171,SARS-Cov-2 infection in transplant-related biology: Where do we stand?,"Since December 2019, the novel coronavirus (SARS-CoV-2) emerged in Wuhan and rapidly spread throughout the world. There are nearly 3 951 905 confirmed cases of novel coronary pneumonia and more than 275 067 deaths worldwide, [JHU data-09/05/2020, https://www.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6]. A great number of patients contracted SARS-Cov-2 pneumonia (COVID-19). SARS-CoV-2 invades human target cells through receptor angiotensin-converting enzyme II (ACE2), which are expressed in the lung, kidney, and ileum and mediate inflammatory responses and immune activities. High plasma levels of proinflammatory cytokines were detected in the infected patients. These factors may predispose transplant patients to high risk of poor outcomes. Therefore, transplant patients might be affected by this coronavirus infection and protection of allografts should receive special attention during this outbreak. In the present study we attempt to delineate the transplant-related biology of SARS-CoV-2 infection.",2020-12-29 +27987168,The Evolution of Soybean Knowledge Base (SoyKB).,"Soybean Knowledge Base (SoyKB) is a comprehensive all-inclusive web resource for bridging the gap between soybean translational genomics and molecular breeding. It provides information for six entities including genes/proteins, microRNAs (miRNAs)/small interfering RNAs (sRNA), metabolites, single nucleotide polymorphisms (SNPs), and plant introduction lines and traits. It has a user-friendly web interface publicly available at http://soykb.org , which integrates and presents data in an intuitive manner to the soybean researchers, breeders, and consumers. It incorporates several informatics and analytical tools for integrating and merging various multi-omics datasets.",2017-01-01 +32061017,genodive version 3.0: Easy-to-use software for the analysis of genetic data of diploids and polyploids.,"genodive version 3.0 is a user-friendly program for the analysis of population genetic data. This version presents a major update from the previous version and now offers a wide spectrum of different types of analyses. genodive has an intuitive graphical user interface that allows direct manipulation of the data through transformation, imputation of missing data, and exclusion and inclusion of individuals, population and/or loci. Furthermore, genodive seamlessly supports 15 different file formats for importing or exporting data from or to other programs. One major feature of genodive is that it supports both diploid and polyploid data, up to octaploidy (2n = 8x) for some analyses, but up to hexadecaploidy (2n = 16x) for other analyses. The different types of analyses offered by genodive include multiple statistics for estimating population differentiation (φST , FST , F'ST , GST , G'ST , G''ST , Dest , RST , ρ), analysis of molecular variance-based K-means clustering, Hardy-Weinberg equilibrium, hybrid index, population assignment, clone assignment, Mantel test, Spatial Autocorrelation, 23 ways of calculating genetic distances, and both principal components and principal coordinates analyses. A unique feature of genodive is that it can also open data sets with nongenetic variables, for example environmental data or geographical coordinates that can be included in the analysis. In addition, genodive makes it possible to run several external programs (lfmm, structure, instruct and vegan) directly from its own user interface, avoiding the need for data reformatting and use of the command line. genodive is available for computers running Mac OS X 10.7 or higher and can be downloaded freely from: http://www.patrickmeirmans.com/software.",2020-03-11 +33982937,Development and Validation of a Risk Prediction Model for Esophageal Squamous Cell Carcinoma Using Cohort Studies.,"

Introduction

Esophageal squamous cell carcinoma (ESCC) carries a poor prognosis, but earlier tumor detection would improve survival. We aimed to develop and externally validate a risk prediction model based on exposure to readily available risk factors to identify high-risk individuals of ESCC.

Methods

Competing risk regression modeling was used to develop a risk prediction model. Individuals' absolute risk of ESCC during follow-up was computed with the cumulative incidence function. We used prospectively collected data from the Nord-Trøndelag Health Study (HUNT) for model derivation and the UK Biobank cohort for validation. Candidate predictors were age, sex, tobacco smoking, alcohol consumption, body mass index (BMI), education, cohabitation, physical exercise, and employment. Model performance was validated internally and externally by evaluating model discrimination using the area under the receiver-operating characteristic curve (AUC) and model calibration.

Results

The developed risk prediction model included age, sex, smoking, alcohol, and BMI. The AUC for 5-year risk of ESCC was 0.76 (95% confidence interval [CI], 0.58-0.93) in the derivation cohort and 0.70 (95% CI, 0.64-0.75) in the validation cohort. The calibration showed close agreement between the predicted cumulative risk and observed probabilities of developing ESCC. Higher net benefit was observed when applying the risk prediction model than considering all participants as being at high risk, indicating good clinical usefulness. A web tool for risk calculation was developed: https://sites.google.com/view/escc-ugis-ki.

Discussion

This ESCC risk prediction model showed good discrimination and calibration and validated well in an independent cohort. This readily available model can help select high-risk individuals for preventive interventions.",2021-04-01 +31025863,Extended Multitarget Pharmacology of Anticancer Drugs.,"Multitarget pharmacology of small-molecule cancer drugs significantly contributes to their mechanism of action, side effects, and emergence of drug resistance and opens ways to repurpose, combine, or customize drug therapy. In most cases, the set of targets affected at therapeutic concentrations is not fully characterized and/or the interaction efficacy values are not accurately quantified. We collected information about multiple targets for each cancer drug along with their experimental effective concentrations or binding activities from multiple sources. All multitarget activity values for each drug then were used to build two proximity network pharmacology maps of anticancer drugs and targets of those drugs, respectively. Together with the network map, we showed that the majority of the cancer drugs had substantial multitarget pharmacology based on our current knowledge. In addition, most of the cancer drugs simultaneously affect macromolecular targets from different classes and types. The target subset can further be accentuated and personalized by patient sample-specific expression data. The network maps of cancer drugs and targets as well as all quantified activity data were integrated into a freely available database, CancerDrugMap (http://ruben.ucsd.edu/dnet/maps/drugnet.html). The identified multitarget pharmacology of cancer drugs is essential for improving the efficacy of individually prescribed drugs and drug combinations and minimization of adverse effects.",2019-05-03 +31514139,Why Visualize? Untangling a Large Network of Arguments.,"Visualization has been deemed a useful technique by researchers and practitioners, alike, leaving a trail of arguments behind that reason why visualization works. In addition, examples of misleading usages of visualizations in information communication have occasionally been pointed out. Thus, to contribute to the fundamental understanding of our discipline, we require a comprehensive collection of arguments on ""why visualize?"" (or ""why not?""), untangling the rationale behind positive and negative viewpoints. In this paper, we report a theoretical study to understand the underlying reasons of various arguments; their relationships (e.g., built-on, and conflict); and their respective dependencies on tasks, users, and data. We curated an argumentative network based on a collection of arguments from various fields, including information visualization, cognitive science, psychology, statistics, philosophy, and others. Our work proposes several categorizations for the arguments, and makes their relations explicit. We contribute the first comprehensive and systematic theoretical study of the arguments on visualization. Thereby, we provide a roadmap towards building a foundation for visualization theory and empirical research as well as for practical application in the critique and design of visualizations. In addition, we provide our argumentation network and argument collection online at https://whyvis.dbvis.de, supported by an interactive visualization.",2021-01-28 +34003774,Porphyromonas gingivalis Promotes Colorectal Carcinoma by Activating the Hematopoietic NLRP3 Inflammasome.,"Porphyromonas gingivalis (P. gingivalis) is a keystone periodontal pathogen associated with various digestive cancers. However, whether P. gingivalis can promote colorectal cancer and the underlying mechanism associated with such promotion remains unclear. In this study, we found that P. gingivalis was enriched in human feces and tissue samples from patients with colorectal cancer compared with those from patients with colorectal adenoma or healthy subjects. Cohort studies demonstrated that P. gingivalis infection was associated with poor prognosis in colorectal cancer. P. gingivalis increased tumor counts and tumor volume in the ApcMin/+ mouse model and increased tumor growth in orthotopic rectal and subcutaneous carcinoma models. Furthermore, orthotopic tumors from mice exposed to P. gingivalis exhibited tumor-infiltrating myeloid cell recruitment and a proinflammatory signature. P. gingivalis promoted colorectal cancer via NLRP3 inflammasome activation in vitro and in vivo. NLRP3 chimeric mice harboring orthotopic tumors showed that the effect of NLRP3 on P. gingivalis pathogenesis was mediated by hematopoietic sources. Collectively, these data suggest that P. gingivalis contributes to colorectal cancer neoplasia progression by activating the hematopoietic NLRP3 inflammasome. SIGNIFICANCE: This study demonstrates that the periodontal pathogen P. gingivalis can promote colorectal tumorigenesis by recruiting myeloid cells and creating a proinflammatory tumor microenvironment. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/10/2745/F1.large.jpg.",2021-03-18 +35935891,Genome-scale phylogenies reveal relationships among Parastagonospora species infecting domesticated and wild grasses.,"Several plant pathogenic Parastagonospora species have been identified infecting wheat and other cereals over the past 50 years. As new lineages were discovered, naming conventions grew unwieldy and the relationships with previously recognized species remained unclear. We used genome sequencing to clarify relationships among these species and provided new names for most of these species. Six of the nine described Parastagonospora species were recovered from wheat, with five of these species coming from Iran. Genome sequences revealed that three strains thought to be hybrids between P. nodorum and P. pseudonodorum were not actually hybrids, but rather represented rare gene introgressions between those species. Our data are consistent with the hypothesis that P. nodorum originated as a pathogen of wild grasses in the Fertile Crescent, then emerged as a wheat pathogen via host-tracking during the domestication of wheat in the same region. The discovery of a diverse array of Parastagonospora species infecting wheat in Iran suggests that new wheat pathogens could emerge from this region in the future. Citation: Croll D, Crous PW, Pereira D, et al. 2021. Genome-scale phylogenies reveal relationships among Parastagonospora species infecting domesticated and wild grasses. Persoonia 46: 116-128. https://doi.org/10.3767/persoonia.2021.46.04.",2021-02-14 +36353314,Multi-locus identification of Psilocybe cubensis by high-resolution melting (HRM).,"Hallucinogenic mushroom is a kind of toxic strain containing psychoactive tryptamine substances such as psilocybin, psilocin and ibotenic acid, etc. The mushrooms containing hallucinogenic components are various, widely distributed and lack of standard to define, which made a great challenge to identification. Traditional identification methods, such as morphology and toxicology analysis, showed shortcomings in old or processed samples, while the DNA-based identification of hallucinogenic mushrooms would allow to identify these samples due to the stability of DNA. In this paper, four primer sets are designed to target Psilocybe cubensis DNA for increasing resolution of present identification method, and the target markers include largest subunit of RNA polymerase II (marked as PC-R1), psilocybin-related phosphotransferase gene (marked as PC-PT), glyceraldehyde 3-phosphate dehydrogenase (marked as PC-3) and translation EF1α (marked as PC-EF). Real-time PCR with high-resolution melting (HRM) assay were used for the differentiation of the fragments amplified by these primer sets, which were tested for specificity, reproducibility, sensitivity, mixture analysis and multiplex PCR. It was shown that the melting temperatures of PC-R1, PC-PT, PC-3 and PC-EF of P. cubensis were (87.93 ± 0.12) °C, (82.21 ± 0.14) °C, (79.72 ± 0.12) °C and (80.11 ± 0.19) °C in our kinds of independent experiments. Significant HRM characteristic can be shown with a low concentration of 62.5 pg/µL DNA sample, and P. cubensis could be detected in mixtures with Homo sapiens or Cannabis sativa. In summary, the method of HRM analysis can quickly and specifically distinguish P. cubensis from other species, which could be utilized for forensic science, medical diagnosis and drug trafficking cases. Supplemental data for this article are available online at https://doi.org/10.1080/20961790.2021.1875580.",2021-04-13 +33956508,Effects of Low-Dose Gestational TCDD Exposure on Behavior and on Hippocampal Neuron Morphology and Gene Expression in Mice.,"

Background

2,3,7,8-tetrachlorodibenzo-p-dioxin (TCDD) is a persistent and toxic environmental pollutant. Gestational exposure to TCDD has been linked to cognitive and motor deficits, and increased incidence of autism spectrum disorder (ASD) traits in children. Most animal studies of these neurodevelopmental effects involve acute TCDD exposure, which does not model typical exposure in humans.

Objectives

The aim of the study was to establish a dietary low-dose gestational TCDD exposure protocol and performed an initial characterization of the effects on offspring behavior, neurodevelopmental phenotypes, and gene expression.

Methods

Throughout gestation, pregnant C57BL/6J mice were fed a diet containing a low dose of TCDD (9 ng TCDD/kg body weight per day) or a control diet. The offspring were tested in a battery of behavioral tests, and structural brain alterations were investigated by magnetic resonance imaging. The dendritic morphology of pyramidal neurons in the hippocampal Cornu Ammonis (CA)1 area was analyzed. RNA sequencing was performed on hippocampi of postnatal day 14 TCDD-exposed and control offspring.

Results

TCDD-exposed females displayed subtle deficits in motor coordination and reversal learning. Volumetric difference between diet groups were observed in regions of the hippocampal formation, mammillary bodies, and cerebellum, alongside higher dendritic arborization of pyramidal neurons in the hippocampal CA1 region of TCDD-exposed females. RNA-seq analysis identified 405 differentially expressed genes in the hippocampus, enriched for genes with functions in regulation of microtubules, axon guidance, extracellular matrix, and genes regulated by SMAD3.

Discussion

Exposure to 9 ng TCDD/kg body weight per day throughout gestation was sufficient to cause specific behavioral and structural brain phenotypes in offspring. Our data suggest that alterations in SMAD3-regulated microtubule polymerization in the developing postnatal hippocampus may lead to an abnormal morphology of neuronal dendrites that persists into adulthood. These findings show that environmental low-dose gestational exposure to TCDD can have significant, long-term impacts on brain development and function. https://doi.org/10.1289/EHP7352.",2021-05-06 +27139435,An integrated signal transduction network of macrophage migration inhibitory factor.,"Macrophage migration inhibitory factor (MIF) is a glycosylated multi-functional protein that acts as an enzyme as well as a cytokine. MIF mediates its actions through a cell surface class II major histocompatibility chaperone, CD74 and co-receptors such as CD44, CXCR2, CXCR4 or CXCR7. MIF has been implicated in the pathogenesis of several acute and chronic inflammatory diseases. Although MIF is a molecule of biomedical importance, a public resource of MIF signaling pathway is currently lacking. In view of this, we carried out detailed data mining and documentation of the signaling events pertaining to MIF from published literature and developed an integrated reaction map of MIF signaling. This resulted in the cataloguing of 68 molecules belonging to MIF signaling pathway, which includes 24 protein-protein interactions, 44 post-translational modifications, 11 protein translocation events and 8 activation/inhibition events. In addition, 65 gene regulation events at the mRNA levels induced by MIF signaling have also been catalogued. This signaling pathway has been integrated into NetPath ( http://www.netpath.org ), a freely available human signaling pathway resource developed previously by our group. The MIF pathway data is freely available online in various community standard data exchange formats. We expect that data on signaling events and a detailed signaling map of MIF will provide the scientific community with an improved platform to facilitate further molecular as well as biomedical investigations on MIF.",2016-05-03 +32673298,"Characteristics of Persons Who Died with COVID-19 - United States, February 12-May 18, 2020.","During January 1, 2020-May 18, 2020, approximately 1.3 million cases of coronavirus disease 2019 (COVID-19) and 83,000 COVID-19-associated deaths were reported in the United States (1). Understanding the demographic and clinical characteristics of decedents could inform medical and public health interventions focused on preventing COVID-19-associated mortality. This report describes decedents with laboratory-confirmed infection with SARS-CoV-2, the virus that causes COVID-19, using data from 1) the standardized CDC case-report form (case-based surveillance) (https://www.cdc.gov/coronavirus/2019-ncov/php/reporting-pui.html) and 2) supplementary data (supplemental surveillance), such as underlying medical conditions and location of death, obtained through collaboration between CDC and 16 public health jurisdictions (15 states and New York City).",2020-07-17 +26503248,dbMAE: the database of autosomal monoallelic expression.,"Recently, data on 'random' autosomal monoallelic expression has become available for the entire genome in multiple human and mouse tissues and cell types, creating a need for better access and dissemination. The database of autosomal monoallelic expression (dbMAE; https://mae.hms.harvard.edu) incorporates data from multiple recent reports of genome-wide analyses. These include transcriptome-wide analyses of allelic imbalance in clonal cell populations based on sequence polymorphisms, as well as indirect identification, based on a specific chromatin signature present in MAE gene bodies. Currently, dbMAE contains transcriptome-wide chromatin identification calls for 8 human and 21 mouse tissues, and describes over 16 000 murine and ∼ 700 human cases of directly measured biased expression, compiled from allele-specific RNA-seq and genotyping array data. All data are manually curated. To ensure cross-publication uniformity, we performed re-analysis of transcriptome-wide RNA-seq data using the same pipeline. Data are accessed through an interface that allows for basic and advanced searches; all source references, including raw data, are clearly described and hyperlinked. This ensures the utility of the resource as an initial screening tool for those interested in investigating the role of monoallelic expression in their specific genes and tissues of interest.",2015-10-25 +32096818,Proline: an efficient and user-friendly software suite for large-scale proteomics.,"MOTIVATION:The proteomics field requires the production and publication of reliable mass spectrometry-based identification and quantification results. Although many tools or algorithms exist, very few consider the importance of combining, in a unique software environment, efficient processing algorithms and a data management system to process and curate hundreds of datasets associated with a single proteomics study. RESULTS:Here, we present Proline, a robust software suite for analysis of MS-based proteomics data, which collects, processes and allows visualization and publication of proteomics datasets. We illustrate its ease of use for various steps in the validation and quantification workflow, its data curation capabilities and its computational efficiency. The DDA label-free quantification workflow efficiency was assessed by comparing results obtained with Proline to those obtained with a widely used software using a spiked-in sample. This assessment demonstrated Proline's ability to provide high quantification accuracy in a user-friendly interface for datasets of any size. AVAILABILITY AND IMPLEMENTATION:Proline is available for Windows and Linux under CECILL open-source license. It can be deployed in client-server mode or in standalone mode at http://proline.profiproteomics.fr/#downloads. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +32324219,NOREVA: enhanced normalization and evaluation of time-course and multi-class metabolomic data.,"Biological processes (like microbial growth & physiological response) are usually dynamic and require the monitoring of metabolic variation at different time-points. Moreover, there is clear shift from case-control (N=2) study to multi-class (N>2) problem in current metabolomics, which is crucial for revealing the mechanisms underlying certain physiological process, disease metastasis, etc. These time-course and multi-class metabolomics have attracted great attention, and data normalization is essential for removing unwanted biological/experimental variations in these studies. However, no tool (including NOREVA 1.0 focusing only on case-control studies) is available for effectively assessing the performance of normalization method on time-course/multi-class metabolomic data. Thus, NOREVA was updated to version 2.0 by (i) realizing normalization and evaluation of both time-course and multi-class metabolomic data, (ii) integrating 144 normalization methods of a recently proposed combination strategy and (iii) identifying the well-performing methods by comprehensively assessing the largest set of normalizations (168 in total, significantly larger than those 24 in NOREVA 1.0). The significance of this update was extensively validated by case studies on benchmark datasets. All in all, NOREVA 2.0 is distinguished for its capability in identifying well-performing normalization method(s) for time-course and multi-class metabolomics, which makes it an indispensable complement to other available tools. NOREVA can be accessed at https://idrblab.org/noreva/.",2020-07-01 +25404132,DoGSD: the dog and wolf genome SNP database.,"The rapid advancement of next-generation sequencing technology has generated a deluge of genomic data from domesticated dogs and their wild ancestor, grey wolves, which have simultaneously broadened our understanding of domestication and diseases that are shared by humans and dogs. To address the scarcity of single nucleotide polymorphism (SNP) data provided by authorized databases and to make SNP data more easily/friendly usable and available, we propose DoGSD (http://dogsd.big.ac.cn), the first canidae-specific database which focuses on whole genome SNP data from domesticated dogs and grey wolves. The DoGSD is a web-based, open-access resource comprising ∼ 19 million high-quality whole-genome SNPs. In addition to the dbSNP data set (build 139), DoGSD incorporates a comprehensive collection of SNPs from two newly sequenced samples (1 wolf and 1 dog) and collected SNPs from three latest dog/wolf genetic studies (7 wolves and 68 dogs), which were taken together for analysis with the population genetic statistics, Fst. In addition, DoGSD integrates some closely related information including SNP annotation, summary lists of SNPs located in genes, synonymous and non-synonymous SNPs, sampling location and breed information. All these features make DoGSD a useful resource for in-depth analysis in dog-/wolf-related studies.",2014-11-17 +33784201,Bayesian Generalized Linear Mixed-Model Analysis of Language Samples: Detecting Patterns in Expository and Narrative Discourse of Adolescents With Traumatic Brain Injury.,"Purpose Generalized linear mixed-model (GLMM) and Bayesian methods together provide a framework capable of handling a wide variety of complex data commonly encountered across the communication sciences. Using language sample analysis, we demonstrate the utility of these methods in answering specific questions regarding the differences between discourse patterns of children who have experienced a traumatic brain injury (TBI), as compared to those with typical development. Method Language samples were collected from 55 adolescents ages 13-18 years, five of whom had experienced a TBI. We describe parameters relating to the productivity, syntactic complexity, and lexical diversity of language samples. A Bayesian GLMM is developed for each parameter of interest, relating these parameters to age, sex, prior history (TBI or typical development), and socioeconomic status, as well as the type of discourse sample (compare-contrast, cause-effect, or narrative). Statistical models are thoroughly described. Results Comparing the discourse of adolescents with TBI to those with typical development, substantial differences are detected in productivity and lexical diversity, while differences in syntactic complexity are more moderate. Female adolescents exhibited greater syntactic complexity, while male adolescents exhibited greater productivity and lexical diversity. Generally, our models suggest more advanced discourse among adolescents who are older or who have indicators of higher socioeconomic status. Differences relating to lecture type were also detected. Conclusions Bayesian and GLMM methods yield more informative and intuitive results than traditional statistical analyses, with a greater degree of confidence in model assumptions. We recommend that these methods be used more widely in language sample analysis. Supplemental Material https://doi.org/10.23641/asha.14226959.",2021-03-30 +31232449,Comparative assessment of long-read error correction software applied to Nanopore RNA-sequencing data.,"

Motivation

Nanopore long-read sequencing technology offers promising alternatives to high-throughput short read sequencing, especially in the context of RNA-sequencing. However this technology is currently hindered by high error rates in the output data that affect analyses such as the identification of isoforms, exon boundaries, open reading frames and creation of gene catalogues. Due to the novelty of such data, computational methods are still actively being developed and options for the error correction of Nanopore RNA-sequencing long reads remain limited.

Results

In this article, we evaluate the extent to which existing long-read DNA error correction methods are capable of correcting cDNA Nanopore reads. We provide an automatic and extensive benchmark tool that not only reports classical error correction metrics but also the effect of correction on gene families, isoform diversity, bias toward the major isoform and splice site detection. We find that long read error correction tools that were originally developed for DNA are also suitable for the correction of Nanopore RNA-sequencing data, especially in terms of increasing base pair accuracy. Yet investigators should be warned that the correction process perturbs gene family sizes and isoform diversity. This work provides guidelines on which (or whether) error correction tools should be used, depending on the application type.

Benchmarking software

https://gitlab.com/leoisl/LR_EC_analyser.",2020-07-01 +28353654,The 'Molecule of the Month' Website-An Extraordinary Chemistry Educational Resource Online for over 20 Years. ,"The Molecule of the Month website (http://www.chm.bris.ac.uk/motm/motm.htm) is an educational resource that is celebrating its 20th anniversary. Here we reflect on its pioneering role in promoting new technology for visualizing and presenting chemical information on the web, as well as its achievements, as a free educational resource, both as a teaching aid and as a multi-user, multi-author learning platform. We discuss the legal aspects of such sites, as well as issues around how to make the content permanent. Finally, we look forward to how such sites may evolve in the future.",2017-03-29 +33342522,"Measuring the Effects of a Demonstration to Reduce Childhood Food Insecurity: A Randomized Controlled Trial of the Nevada Healthy, Hunger Free Kids Project.","

Background

To reduce childhood hunger, the US Department of Agriculture funded a set of demonstration projects, including the Nevada Healthy, Hunger-Free Kids (HHFK) project.

Objective

The study objective was to test whether the Nevada HHFK project reduced child food insecurity (FI-C) among low-income households with young children.

Design

Households were randomly assigned to treatment and control groups, with outcomes measured using household surveys and administrative data. Survey data were collected at baseline (n=3,088) and follow-up (n=2,074) 8 to 12 months into the project.

Participants/setting

Eligible households in Las Vegas, NV, had children under age 5 years, received Supplemental Nutrition Assistance Program (SNAP) benefits, and had incomes below 75% of the federal poverty level.

Intervention

Between June 2016 and May 2017, treatment households on SNAP received an additional $40 in monthly SNAP benefits per child under age 5 years.

Main outcome measures

Key outcomes included FI-C (primary), food security among adults and households, and food expenditures (secondary).

Statistical analyses performed

Differences between the treatment and control groups were estimated by a logistic regression model and controlling for baseline characteristics. Analyses were also performed on socioeconomic subgroups.

Results

The Nevada HHFK project did not reduce FI-C (treatment=31.2%, control=30.6%; P=0.620), very low food security among children (P=0.915), or food insecurity among adults (P=0.925). The project increased households' monthly food expenditures (including SNAP and out-of-pocket food purchases) by $23 (P<0.001).

Conclusions

A demonstration project to reduce FI-C by increasing SNAP benefits to Las Vegas households with young children and very low income did not reduce FI-C or other food-insecurity measures. This finding runs counter to prior research showing that SNAP and similar forms of food assistance have reduced food insecurity. This project was implemented during a period of substantial economic growth in Las Vegas. Future research should explore the role of the economic context, children's ages, and household income in determining how increases in SNAP benefits affect food insecurity. CLINICALTRIALS.

Gov identifier

NCT04253743 (http://www.clinicaltrials.gov) FUNDING/SUPPORT: This article is published as part of a supplement supported by the US Department of Agriculture, Food and Nutrition Service.",2021-01-01 +33737256,Facial Nerve Length Influence on Vestibular Schwannoma Microsurgery Outcomes.,"

Objective

Facial nerve (FN) function preservation is the primary goal during vestibular schwannoma (VS) resection. Many factors are linked to postoperative FN outcomes. In the present study, we evaluated the association between FN length and VS surgical outcomes.

Methods

We included 70 consecutive patients who had undergone VS microsurgery between October 2019 and November 2020. The clinical data were prospectively obtained from the patients. The relative FN (rFN) length was obtained by subtracting the contralateral FN length from the ipsilateral FN length as measured using DSI Studio software (available at: http://dsi-studio.labsolver.org/).

Results

The postoperative FN function was House-Brackmann grade I in 47 of the 70 patients (67.1%), grade II in 10 (14.3%), and grade III in 13 (18.6%). Gross total resection (GTR) was performed in 61 patients (87.1%). A residual tumor was retained to preserve FN function in 9 of the 70 patients (12.9%), and rFN length was measured (mean diameter, 20.8 mm; range, 2.5-51.5]). On multivariate analysis, the rFN length was significantly associated with the extent of tumor resection. The receiver operating characteristic curve indicated that the cutoff value for rFN length to predict for intraoperative near total resection versus GTR was 36.6 mm, with a specificity and sensitivity of 93.4% and 88.9%, respectively.

Conclusions

The rFN length is important for predicting surgical outcomes. An rFN length >36.6 mm might indicate difficulty in achieving GTR with preservation of FN function. Therefore, the rFN length could become an objective indicator for neurosurgeons to predict the difficulty of GTR to preserve FN function.",2021-03-16 +33648553,Fe(2)OG: an integrated HMM profile-based web server to predict and analyze putative non-haem iron(II)- and 2-oxoglutarate-dependent dioxygenase function in protein sequences.,"

Objective

Non-haem iron(II)- and 2-oxoglutarate-dependent dioxygenases (i2OGdd), are a taxonomically and functionally diverse group of enzymes. The active site comprises ferrous iron in a hexa-coordinated distorted octahedron with the apoenzyme, 2-oxoglutarate and a displaceable water molecule. Current information on novel i2OGdd members is sparse and relies on computationally-derived annotation schema. The dissimilar amino acid composition and variable active site geometry thereof, results in differing reaction chemistries amongst i2OGdd members. An additional need of researchers is a curated list of sequences with putative i2OGdd function which can be probed further for empirical data.

Results

This work reports the implementation of [Formula: see text], a web server with dual functionality and an extension of previous work on i2OGdd enzymes [Formula: see text]. [Formula: see text], in this form is completely revised, updated (URL, scripts, repository) and will strengthen the knowledge base of investigators on i2OGdd biochemistry and function. [Formula: see text], utilizes the superior predictive propensity of HMM-profiles of laboratory validated i2OGdd members to predict probable active site geometries in user-defined protein sequences. [Formula: see text], also provides researchers with a pre-compiled list of analyzed and searchable i2OGdd-like sequences, many of which may be clinically relevant. [Formula: see text], is freely available ( http://204.152.217.16/Fe2OG.html ) and supersedes all previous versions, i.e., H2OGpred, DB2OG.",2021-03-01 +32484539,miRNet 2.0: network-based visual analytics for miRNA functional analysis and systems biology.,"miRNet is an easy-to-use, web-based platform designed to help elucidate microRNA (miRNA) functions by integrating users' data with existing knowledge via network-based visual analytics. Since its first release in 2016, miRNet has been accessed by >20 000 researchers worldwide, with ∼100 users on a daily basis. While version 1.0 was focused primarily on miRNA-target gene interactions, it has become clear that in order to obtain a global view of miRNA functions, it is necessary to bring other important players into the context during analysis. Driven by this concept, in miRNet version 2.0, we have (i) added support for transcription factors (TFs) and single nucleotide polymorphisms (SNPs) that affect miRNAs, miRNA-binding sites or target genes, whilst also greatly increased (>5-fold) the underlying knowledgebases of miRNAs, ncRNAs and disease associations; (ii) implemented new functions to allow creation and visual exploration of multipartite networks, with enhanced support for in situ functional analysis and (iii) revamped the web interface, optimized the workflow, and introduced microservices and web application programming interface (API) to sustain high-performance, real-time data analysis. The underlying R package is also released in tandem with version 2.0 to allow more flexible data analysis for R programmers. The miRNet 2.0 website is freely available at https://www.mirnet.ca.",2020-07-01 +32356893,miRSwitch: detecting microRNA arm shift and switch events.,"Arm selection, the preferential expression of a 3' or 5' mature microRNA (miRNA), is a highly dynamic and tissue-specific process. Time-dependent expression shifts or switches between the arms are also relevant for human diseases. We present miRSwitch, a web server to facilitate the analysis and interpretation of arm selection events. Our species-independent tool evaluates pre-processed small non-coding RNA sequencing (sncRNA-seq) data, i.e. expression matrices or output files from miRNA quantification tools (miRDeep2, miRMaster, sRNAbench). miRSwitch highlights potential changes in the distribution of mature miRNAs from the same precursor. Group comparisons from one or several user-provided annotations (e.g. disease states) are possible. Results can be dynamically adjusted by choosing from a continuous range of highly specific to very sensitive parameters. Users can compare potential arm shifts in the provided data to a human reference map of pre-computed arm shift frequencies. We created this map from 46 tissues and 30 521 samples. As case studies we present novel arm shift information in a Alzheimer's disease biomarker data set and from a comparison of tissues in Homo sapiens and Mus musculus. In summary, miRSwitch offers a broad range of customized arm switch analyses along with comprehensive visualizations, and is freely available at: https://www.ccb.uni-saarland.de/mirswitch/.",2020-07-01 +32559338,Autopsy registry can facilitate COVID-19 research.,"The WHO declared the global outbreak of SARS-CoV-2 a pandemic on March 11, 2020, and ""call(ed) on all countries to exchange country experiences and practices in a transparent and timely way"" (http://www.euro.who.int/en/health-topics/health-emergencies/pages/news/news/2020/03/who-announces-covid-19-outbreak-a-pandemic). To date, many medical societies have announced their intention to collect and analyze data from COVID-19 patients and some large-scale prospective data collections are already running, such as the LEOSS registry (Lean European Open Survey on SARS-CoV-2 Infected Patients) or the CAPACITYCOVID registry (registry of patients with COVID-19 including cardiovascular risk and complications). The necessity to mobilize and harmonize basic and applied research worldwide is of utmost importance (Sansonetti, 2020).",2020-07-03 +33219073,Medicare Access and CHIP Reauthorization Act in Small to Medium-Sized Primary Care Practices.,"

Background

Despite major efforts to transition to a new physician payment system under the Medicare Access and CHIP Reauthorization Act (MACRA), little is known about how well practices are prepared. This study aimed to understand how small and medium-sized primary care practices in the Heart of Virginia Healthcare (https://www.vahealthinnovation.org/hvh/) perceive their quality incentives under MACRA.

Methods

This study analyzed data from 16 focus-groups (70 participants), which yielded a range of physician, advanced practice clinician, office manager, and staff perspectives. Focus-groups were audio-recorded and transcribed, then imported into NVivo for coding and analysis of themes. A multidisciplinary research team reviewed the transcripts to maximize coding insights and to improve validity.

Results

The main findings from the focus-groups are: 1) MACRA awareness is relatively higher in independent practices, 2) steps taken toward MACRA differ by practice ownership, and 3) practices have mixed perceptions about the expected impact of MACRA. Two additional themes emerged from data: 1) practices that joined accountable care organizations are taking proactive approaches to MACRA, and 2) independent practices face ongoing challenges.

Conclusions

This study highlights a dilemma in which independent practices are proactively attempting to prepare for MACRA's requirements, yet they continue to have major challenges. Practices are under extreme pressure to comply with reimbursement regulations, which may force some practices joining a health system or merging with another practice or completely closing the practices. Policy makers should assess the unintended consequences of payment reform policies on independent practices and provide support in transitioning to a new payment system.",2020-11-01 +30566623,"Interactive visual analysis of drug-target interaction networks using Drug Target Profiler, with applications to precision medicine and drug repurposing. ","Knowledge of the full target space of drugs (or drug-like compounds) provides important insights into the potential therapeutic use of the agents to modulate or avoid their various on- and off-targets in drug discovery and precision medicine. However, there is a lack of consolidated databases and associated data exploration tools that allow for systematic profiling of drug target-binding potencies of both approved and investigational agents using a network-centric approach. We recently initiated a community-driven platform, Drug Target Commons (DTC), which is an open-data crowdsourcing platform designed to improve the management, reproducibility and extended use of compound-target bioactivity data for drug discovery and repurposing, as well as target identification applications. In this work, we demonstrate an integrated use of the rich bioactivity data from DTC and related drug databases using Drug Target Profiler (DTP), an open-source software and web tool for interactive exploration of drug-target interaction networks. DTP was designed for network-centric modeling of mode-of-action of multi-targeting anticancer compounds, especially for precision oncology applications. DTP enables users to construct an interaction network based on integrated bioactivity data across selected chemical compounds and their protein targets, further customizable using various visualization and filtering options, as well as cross-links to several drug and protein databases to provide comprehensive information of the network nodes and interactions. We demonstrate here the operation of the DTP tool and its unique features by several use cases related to both drug discovery and drug repurposing applications, using examples of anticancer drugs with shared target profiles. DTP is freely accessible at http://drugtargetprofiler.fimm.fi/.",2018-12-18 +30371820,The UNITE database for molecular identification of fungi: handling dark taxa and parallel taxonomic classifications.,"UNITE (https://unite.ut.ee/) is a web-based database and sequence management environment for the molecular identification of fungi. It targets the formal fungal barcode-the nuclear ribosomal internal transcribed spacer (ITS) region-and offers all ∼1 000 000 public fungal ITS sequences for reference. These are clustered into ∼459 000 species hypotheses and assigned digital object identifiers (DOIs) to promote unambiguous reference across studies. In-house and web-based third-party sequence curation and annotation have resulted in more than 275 000 improvements to the data over the past 15 years. UNITE serves as a data provider for a range of metabarcoding software pipelines and regularly exchanges data with all major fungal sequence databases and other community resources. Recent improvements include redesigned handling of unclassifiable species hypotheses, integration with the taxonomic backbone of the Global Biodiversity Information Facility, and support for an unlimited number of parallel taxonomic classification systems.",2019-01-01 +32025087,Reassessing Southern Ocean Air-Sea CO2 Flux Estimates With the Addition of Biogeochemical Float Observations.,"New estimates of pCO2 from profiling floats deployed by the Southern Ocean Carbon and Climate Observations and Modeling (SOCCOM) project have demonstrated the importance of wintertime outgassing south of the Polar Front, challenging the accepted magnitude of Southern Ocean carbon uptake (Gray et al., 2018, https://doi:10.1029/2018GL078013). Here, we put 3.5 years of SOCCOM observations into broader context with the global surface carbon dioxide database (Surface Ocean CO2 Atlas, SOCAT) by using the two interpolation methods currently used to assess the ocean models in the Global Carbon Budget (Le Quéré et al., 2018, https://doi:10.5194/essd-10-2141-2018) to create a ship-only, a float-weighted, and a combined estimate of Southern Ocean carbon fluxes (<35°S). In our ship-only estimate, we calculate a mean uptake of -1.14 ± 0.19 Pg C/yr for 2015-2017, consistent with prior studies. The float-weighted estimate yields a significantly lower Southern Ocean uptake of -0.35 ± 0.19 Pg C/yr. Subsampling of high-resolution ocean biogeochemical process models indicates that some of the differences between float and ship-only estimates of the Southern Ocean carbon flux can be explained by spatial and temporal sampling differences. The combined ship and float estimate minimizes the root-mean-square pCO2 difference between the mapped product and both data sets, giving a new Southern Ocean uptake of -0.75 ± 0.22 Pg C/yr, though with uncertainties that overlap the ship-only estimate. An atmospheric inversion reveals that a shift of this magnitude in the contemporary Southern Ocean carbon flux must be compensated for by ocean or land sinks within the Southern Hemisphere.",2019-11-16 +30786057,mHDFS-HoF: A generalized multilevel homodesmotic fragment-separation reaction based program for heat-of-formation calculation for acyclic hydrocarbons.,"Based on our modified classification of elemental species, a framework for automatic generation of multilevel Homodesmotic fragment-separation (mHDFS) reactions for chemical species was proposed. Combined the mHDFS framework with a database of heat of formation (HoF) and the calculated electronic structure data for the elemental mHD species, the mHDFS-HoF program was constructed in C/C++ language to calculate heat of formation for a species of interest on-the-fly. Using the electronic structure data calculated at CBS-QB3 level of theory for the elemental mHD species, applications and robustness of the code were discussed with several acyclic hydrocarbon systems including neutral and radical species. On-going work and extension to other systems were also discussed. The program and the supporting files can be freely downloaded at https://sites.google.com/view/mhdfs/. © 2019 Wiley Periodicals, Inc.",2019-02-20 +30778259,Automated evaluation of consistency within the PubChem Compound database.,"Identification of discrepant data in aggregated databases is a key step in data curation and remediation. We have applied the ALATIS approach, which is based on the international chemical shift identifier (InChI) model, to the full PubChem Compound database to generate unique and reproducible compound and atom identifiers for all entries for which three-dimensional structures were available. This exercise also served to identify entries with discrepancies between structures and chemical formulas or InChI strings. The use of unique compound identifiers and atom nomenclature should support more rigorous links between small-molecule databases including those containing atom-specific information of the type available from crystallography and spectroscopy. The comprehensive results from this analysis are publicly available through our webserver [http://alatis.nmrfam.wisc.edu/].",2019-02-19 +32892627,Isobaric Matching between Runs and Novel PSM-Level Normalization in MaxQuant Strongly Improve Reporter Ion-Based Quantification.,"Isobaric labeling has the promise of combining high sample multiplexing with precise quantification. However, normalization issues and the missing value problem of complete n-plexes hamper quantification across more than one n-plex. Here, we introduce two novel algorithms implemented in MaxQuant that substantially improve the data analysis with multiple n-plexes. First, isobaric matching between runs makes use of the three-dimensional MS1 features to transfer identifications from identified to unidentified MS/MS spectra between liquid chromatography-mass spectrometry runs in order to utilize reporter ion intensities in unidentified spectra for quantification. On typical datasets, we observe a significant gain in MS/MS spectra that can be used for quantification. Second, we introduce a novel PSM-level normalization, applicable to data with and without the common reference channel. It is a weighted median-based method, in which the weights reflect the number of ions that were used for fragmentation. On a typical dataset, we observe complete removal of batch effects and dominance of the biological sample grouping after normalization. Furthermore, we provide many novel processing and normalization options in Perseus, the companion software for the downstream analysis of quantitative proteomics results. All novel tools and algorithms are available with the regular MaxQuant and Perseus releases, which are downloadable at http://maxquant.org.",2020-09-16 +34377575,An examination of handwritten signatures forged using photosensitive signature stamp.,"Signature examination is the most common examination performed by any document examiner. Determination of the authenticity of a handwritten signature on a questioned document is an important task for forensic document examiners in the forensic science field. As a result of continuous developments in technology, a signature stamp can now be created using a photosensitive seal to enable the reproduction of a handwritten signature. These stamps are commonly used in China and several other countries. In this study, 10 types of black photosensitive stamp-pad ink, 10 brands of fountain pen ink, 15 types of black gel ink and six types of black erasable gel ink found on the Chinese domestic market were collected and 10 photosensitive signature stamps were created using the signatures of 10 people. Microscopic analysis, infrared (IR) and fluorescence analyses and microspectrophotometry (MSP) techniques were used to examine the resulting photosensitive signature stamp impressions when applied to printing papers, writing papers and invoice papers. By comparing the printing and spectral characteristics of the photosensitive signature stamp impressions with those of the signatures executed using the fountain pens, gel pens and erasable gel pens, it was possible to determine whether each signature was written or stamped using a photosensitive signature stamp. To validate these results, a 96.7% absolute accuracy and a 99.3% detection rate were achieved over a total of 150 blind tests conducted by five forensic document examiners, thus demonstrating that a combination of the four analysis methods used in this work can provide a more scientific approach and improve the accuracy and the detection rate of the examination process.KEY POINTSA signature stamp is a photosensitive seal made in the style of a handwritten signature.Although microscopic analysis can usually provide better examination results, a comprehensive examination method that includes microscopic analysis and ink composition analysis is required to improve the accuracy and the detection rate of the examination process.This study collected and tested photosensitive stamp-pad inks, fountain pen inks, gel inks and erasable inks.Infrared and fluorescence analyses and microspectrophotometry were able to distinguish the photosensitive ink from both erasable ink and fountain pen ink. Supplemental data for this article are available online at https://doi.org/10.1080/20961790.2021.1898755.",2021-05-03 +33064603,Exploring Sentence Diversity at the Boundary of Typical and Impaired Language Abilities.,"Purpose This review article summarizes programmatic research on sentence diversity in toddlers developing language typically and explores developmental patterns of sentence diversity in toddlers at risk for specific language impairment. Method The first half of this review article presents a sentence-focused approach to language assessment and intervention and reviews findings from empirical studies of sentence diversity. In the second half, subject and verb diversity in three simple sentence types are explored in an archival database of toddlers with varying levels of grammatical outcomes at 36 months of age: low average, mild/moderate delay, and severe delay. Results Descriptive findings from the archival database replicated previous developmental patterns. All toddlers with low-average language abilities produced diverse simple sentences by 30 months of age and exhibited greater sentence diversity with first-person I-subjects before third-person subjects. Third-person subject diversity emerged in a developmental sequence, increasing in one-argument copula contexts and one-argument subject-verb sentences before two-argument subject-verb-object sentences. This developmental pattern held across all three outcome groups. Third-person subjects were least diverse for children with severe grammatical delays and were absent in all sentence contexts for two children with severe delays at 36 months. Conclusions Sentence diversity increases gradually and expands in predictable patterns. Understanding these developmental patterns may help identify and treat children who display unexpected difficulty combining different subjects and verbs in flexible ways. Supplemental Material and Presentation Video https://doi.org/10.23641/asha.12915320.",2020-10-16 +30986089,"Climate Change, Human Health, and Social Stability: Addressing Interlinkages.","

Background

Abundant historical evidence demonstrates how environmental changes can affect social stability and, in turn, human health. A rapidly growing body of literature, largely from political science and economics, is examining the potential for and consequences associated with social instability related to current climate change. However, comparatively little of this research incorporates the effects on human health or the role of health systems in influencing the magnitude and types of instability that could occur.

Objective

The objective of this commentary is to articulate a conceptual framework incorporating health outcomes and health systems into theorized and observed linkages between climate change and social instability, illustrating in particular the health effects of natural resource shortages, infectious disease outbreaks, and migration.

Discussion

Although increasing evidence exists that climate change, health, and social instability are related, key questions remain about the pathways linking these factors, as well as the magnitude, causality, and directionality of relationships across spatial and temporal scales. Models seeking to explain and predict climate-related social unrest should incorporate the many linkages between climate change, human health, and social instability. Members of the environmental health research community should work closely with those in the political science and economics communities to help deepen understandings of climate-related stressors and shocks that affect instability and worsen health outcomes. https://doi.org/10.1289/EHP4534.",2019-04-01 +30845883,12 Components of a Strong Vision Health System of Care: Components 1 and 2-Family Education and Comprehensive Communication/Approval Process.,"The National Center for Children's Vision and Eye Health (NCCVEH) at Prevent Blindness partnered with the National Association of School Nurses (NASN) to provide guidance for school nurses responsible for screening the vision of preschool and K-12 students. Goals of this national partnership are to (1) standardize approaches to vision health, (2) facilitate follow up to eye care for students who do not pass vision screening, (3) provide family/caregiver friendly educational information, and (4) consult with leading pediatric eye care experts to promote evidence-based best practices. The NCCVEH/NASN partnership created a Vision and Eye Health page on the NASN website ( https://www.nasn.org/nasn-resources/practice-topics/vision-health ). This resource is organized according to the 12 Components of a Strong Vision Health System of Care. The 12 components emerged as the NCCVEH considered vision screening from a systems perspective. This systems perspective addresses key activities along the entire spectrum of care that supports a child's vision health-beginning with parent/caregiver education and ending with an annual evaluation of the school's vision health system. Each of these 12 components will be described in 4 installments of NASN School Nurse in 2019. This installment describes the first two components: Family Education and a Comprehensive Communication/Approval Process.",2019-03-08 +29268738,Models and analyses to understand threats to polio eradication.,"To achieve complete polio eradication, the live oral poliovirus vaccine (OPV) currently used must be phased out after the end of wild poliovirus transmission. However, poorly understood threats may arise when OPV use is stopped. To counter these threats, better models than those currently available are needed. Two articles recently published in BMC Medicine address these issues. Mercer et al. (BMC Med 15:180, 2017) developed a statistical model analysis of polio case data and characteristics of cases occurring in several districts in Pakistan to inform resource allocation decisions. Nevertheless, despite having the potential to accelerate the elimination of polio cases, their analyses are unlikely to advance our understanding OPV cessation threats. McCarthy et al. (BMC Med 15:175, 2017) explored one such threat, namely the emergence and transmission of serotype 2 circulating vaccine derived poliovirus (cVDPV2) after OPV2 cessation, and found that the risk of persistent spread of cVDPV2 to new areas increases rapidly 1-5 years after OPV2 cessation. Thus, recently developed models and analysis methods have the potential to guide the required steps to surpass these threats. 'Big data' scientists could help with this; however, datasets covering all eradication efforts should be made readily available.Please see related articles: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-017-0937-y and https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-017-0941-2 .",2017-12-22 +33392948,IRC-Fuse: improved and robust prediction of redox-sensitive cysteine by fusing of multiple feature representations.,"Redox-sensitive cysteine (RSC) thiol contributes to many biological processes. The identification of RSC plays an important role in clarifying some mechanisms of redox-sensitive factors; nonetheless, experimental investigation of RSCs is expensive and time-consuming. The computational approaches that quickly and accurately identify candidate RSCs using the sequence information are urgently needed. Herein, an improved and robust computational predictor named IRC-Fuse was developed to identify the RSC by fusing of multiple feature representations. To enhance the performance of our model, we integrated the probability scores evaluated by the random forest models implementing different encoding schemes. Cross-validation results exhibited that the IRC-Fuse achieved accuracy and AUC of 0.741 and 0.807, respectively. The IRC-Fuse outperformed exiting methods with improvement of 10% and 13% on accuracy and MCC, respectively, over independent test data. Comparative analysis suggested that the IRC-Fuse was more effective and promising than the existing predictors. For the convenience of experimental scientists, the IRC-Fuse online web server was implemented and publicly accessible at http://kurata14.bio.kyutech.ac.jp/IRC-Fuse/ .",2021-01-04 +31803027,Differential Striatal Axonal Arborizations of the Intratelencephalic and Pyramidal-Tract Neurons: Analysis of the Data in the MouseLight Database.,"There exist two major types of striatum-targeting neocortical neurons, specifically, intratelencephalic (IT) neurons and pyramidal-tract (PT) neurons. Regarding their striatal projections, it was once suggested that IT axons are extended whereas PT axons are primarily focal. However, subsequent study with an increased number of well-stained extended axons concluded that such an apparent distinction was spurious due to limited sample size. Recent work using genetically labeled neurons reintroduced the differential spatial extent of the striatal projections of IT and PT neurons through population-level analyses, complemented by observations of single axons. However, quantitative IT vs. PT comparison of a large number of axons remained to be conducted. We analyzed the data of axonal end-points of 161 IT neurons and 33 PT neurons in the MouseLight database (http://ml-neuronbrowser.janelia.org/). The number of axonal end-points in the ipsilateral striatum exhibits roughly monotonically decreasing distributions in both neuron types. Excluding neurons with no ipsilateral end-point, the distributions of the logarithm of the number of ipsilateral end-points are considerably overlapped between IT and PT neurons, although the proportion of neurons having more than 50 ipsilateral end-points is somewhat larger in IT neurons than in PT neurons. Looking at more details, among IT subpopulations in the secondary motor area (MOs), layer 5 neurons and bilateral striatum-targeting layer 2/3 neurons, but not contralateral striatum-non-targeting layer 2/3 neurons, have a larger number of ipsilateral end-points than MOs PT neurons. We also found that IT ipsilateral striatal axonal end-points are on average more widely distributed than PT end-points, especially in the medial-lateral direction. These results indicate that IT and PT striatal axons differ in the frequencies and spatial extent of end-points while there are wide varieties within each neuron type.",2019-11-15 +33540081,"m6AmPred: Identifying RNA N6, 2'-O-dimethyladenosine (m6Am) sites based on sequence-derived information.","N6,2'-O-dimethyladenosine (m6Am) is a reversible modification widely occurred on varied RNA molecules. The biological function of m6Am is yet to be known though recent studies have revealed its influences in cellular mRNA fate. Precise identification of m6Am sites on RNA is vital for the understanding of its biological functions. We present here m6AmPred, the first web server for in silico identification of m6Am sites from the primary sequences of RNA. Built upon the eXtreme Gradient Boosting with Dart algorithm (XgbDart) and EIIP-PseEIIP encoding scheme, m6AmPred achieved promising prediction performance with the AUCs greater than 0.954 when tested by 10-fold cross-validation and independent testing datasets. To critically test and validate the performance of m6AmPred, the experimentally verified m6Am sites from two data sources were cross-validated. The m6AmPred web server is freely accessible at: https://www.xjtlu.edu.cn/biologicalsciences/m6am, and it should make a useful tool for the researchers who are interested in N6,2'-O-dimethyladenosine RNA modification.",2021-02-02 +33886573,The impact of COVID-19 vaccination campaigns accounting for antibody-dependent enhancement.,"

Background

COVID-19 vaccines are approved, vaccination campaigns are launched, and worldwide return to normality seems within close reach. Nevertheless, concerns about the safety of COVID-19 vaccines arose, due to their fast emergency approval. In fact, the problem of antibody-dependent enhancement was raised in the context of COVID-19 vaccines.

Methods and findings

We introduce a complex extension of the model underlying the pandemic preparedness tool CovidSim 1.1 (http://covidsim.eu/) to optimize vaccination strategies with regard to the onset of campaigns, vaccination coverage, vaccination schedules, vaccination rates, and efficiency of vaccines. Vaccines are not assumed to immunize perfectly. Some individuals fail to immunize, some reach only partial immunity, and-importantly-some develop antibody-dependent enhancement, which increases the likelihood of developing symptomatic and severe episodes (associated with higher case fatality) upon infection. Only a fraction of the population will be vaccinated, reflecting vaccination hesitancy or contraindications. The model is intended to facilitate decision making by exploring ranges of parameters rather than to be fitted by empirical data. We parameterized the model to reflect the situation in Germany and predict increasing incidence (and prevalence) in early 2021 followed by a decline by summer. Assuming contact reductions (curfews, social distancing, etc.) to be lifted in summer, disease incidence will peak again. Fast vaccine deployment contributes to reduce disease incidence in the first quarter of 2021, and delay the epidemic outbreak after the summer season. Higher vaccination coverage results in a delayed and reduced epidemic peak. A coverage of 75%-80% is necessary to prevent an epidemic peak without further drastic contact reductions.

Conclusions

With the vaccine becoming available, compliance with contact reductions is likely to fade. To prevent further economic damage from COVID-19, high levels of immunization need to be reached before next year's flu season, and vaccination strategies and disease management need to be flexibly adjusted. The predictive model can serve as a refined decision support tool for COVID-19 management.",2021-04-22 +29157087,THANATOS: an integrative data resource of proteins and post-translational modifications in the regulation of autophagy.,"Macroautophagy/autophagy is a highly conserved process for degrading cytoplasmic contents, determines cell survival or death, and regulates the cellular homeostasis. Besides ATG proteins, numerous regulators together with various post-translational modifications (PTMs) are also involved in autophagy. In this work, we collected 4,237 experimentally identified proteins regulated in autophagy and cell death pathways from the literature. Then we computationally identified potential orthologs of known proteins, and developed a comprehensive database of The Autophagy, Necrosis, ApopTosis OrchestratorS (THANATOS, http://thanatos.biocuckoo.org ), containing 191,543 proteins potentially associated with autophagy and cell death pathways in 164 eukaryotes. We performed an evolutionary analysis of ATG genes, and observed that ATGs required for the autophagosome formation are highly conserved across eukaryotes. Further analyses revealed that known cancer genes and drug targets were overrepresented in human autophagy proteins, which were significantly associated in a number of signaling pathways and human diseases. By reconstructing a human kinase-substrate phosphorylation network for ATG proteins, our results confirmed that phosphorylation play a critical role in regulating autophagy. In total, we mapped 65,015 known sites of 11 types of PTMs to collected proteins, and revealed that all types of PTM substrates were enriched in human autophagy. In addition, we observed multiple types of PTM regulators such as protein kinases and ubiquitin E3 ligases or adaptors were significantly associated with human autophagy, and again the results emphasized the importance of PTM regulations in autophagy. We anticipated THANATOS can be a useful resource for further studies.",2018-01-01 +30052776,WebMetabase: cleavage sites analysis tool for natural and unnatural substrates from diverse data source.,"

Summary

More than 150 peptide therapeutics are globally in clinical development. Many enzymatic barriers should be crossed by a successful drug to be prosperous in such a process. Therefore, the new peptide drugs must be designed preventing the potential protease cleavage to make the compound less susceptible to protease reaction. We present a new data analysis tool developed in WebMetabase, an approach that stores the information from liquid chromatography mass spectrometry-based experimental data or from external sources such as the MEROPS database. The tool is a chemically aware system where each peptide substrate is presented as a sequence of structural blocks (SBs) connected by amide bonds and not being limited to the natural amino acids. Each SB is characterized by its pharmacophoric and physicochemical properties including a similarity score that describes likelihood between a SB and each one of the other SBs in the database. This methodology can be used to perform a frequency analysis to discover the most frequent cleavage sites for similar amide bonds, defined based on the similarity of the SB that participate in such a bond within the experimentally derived and/or public database.

Availability and implementation

http://webmetabase.com:8182/WebMetabaseBioinformatics/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +31929523,PlotTwist: A web app for plotting and annotating continuous data.,"Experimental data can broadly be divided in discrete or continuous data. Continuous data are obtained from measurements that are performed as a function of another quantitative variable, e.g., time, length, concentration, or wavelength. The results from these types of experiments are often used to generate plots that visualize the measured variable on a continuous, quantitative scale. To simplify state-of-the-art data visualization and annotation of data from such experiments, an open-source tool was created with R/shiny that does not require coding skills to operate it. The freely available web app accepts wide (spreadsheet) and tidy data and offers a range of options to normalize the data. The data from individual objects can be shown in 3 different ways: (1) lines with unique colors, (2) small multiples, and (3) heatmap-style display. Next to this, the mean can be displayed with a 95% confidence interval for the visual comparison of different conditions. Several color-blind-friendly palettes are available to label the data and/or statistics. The plots can be annotated with graphical features and/or text to indicate any perturbations that are relevant. All user-defined settings can be stored for reproducibility of the data visualization. The app is dubbed PlotTwist and runs locally or online: https://huygens.science.uva.nl/PlotTwist.",2020-01-13 +31368353,OSkirc: a web tool for identifying prognostic biomarkers in kidney renal clear cell carcinoma.,"Aim: To develop a free and quick analysis online tool that allows users to easily investigate the prognostic potencies of interesting genes in kidney renal clear cell carcinoma (KIRC). Patients & methods: A total of 629 KIRC cases with gene expression profiling data and clinical follow-up information are collected from public Gene Expression Omnibus and The Cancer Genome Atlas databases. Results: One web application called Online consensus Survival analysis for KIRC (OSkirc) that can be used for exploring the prognostic implications of interesting genes in KIRC was constructed. By OSkirc, users could simply input the gene symbol to receive the Kaplan-Meier survival plot with hazard ratio and log-rank p-value. Conclusion: OSkirc is extremely valuable for basic and translational researchers to screen and validate the prognostic potencies of genes for KIRC, publicly accessible at http://bioinfo.henu.edu.cn/KIRC/KIRCList.jsp.",2019-08-01 +29569340,Seventy Years of RN Effectiveness: A Database Development Project to Inform Best Practice.,"

Background

The appropriate nursing staff mix is imperative to the provision of quality care. Nurse staffing levels and staff mix vary from country to country, as well as between care settings. Understanding how staffing skill mix impacts patient, organizational, and financial outcomes is critical in order to allow policymakers and clinicians to make evidence-informed staffing decisions.

Aims

This paper reports on the methodology for creation of an electronic database of studies exploring the effectiveness of Registered Nurses (RNs) on clinical and patient outcomes, organizational and nurse outcomes, and financial outcomes.

Methods

Comprehensive literature searches were conducted in four electronic databases. Inclusion criteria for the database included studies published from 1946 to 2016, peer-reviewed international literature, and studies focused on RNs in all health-care disciplines, settings, and sectors. Masters-prepared nurse researchers conducted title and abstract screening and relevance review to determine eligibility of studies for the database. High-level analysis was conducted to determine key outcomes and the frequency at which they appeared within the database.

Results

Of the initial 90,352 records, a total of 626 abstracts were included within the database. Studies were organized into three groups corresponding to clinical and patient outcomes, organizational and nurse-related outcomes, and financial outcomes. Organizational and nurse-related outcomes represented the largest category in the database with 282 studies, followed by clinical and patient outcomes with 244 studies, and lastly financial outcomes, which included 124 studies.

Linking evidence to action

The comprehensive database of evidence for RN effectiveness is freely available at https://rnao.ca/bpg/initiatives/RNEffectiveness. The database will serve as a resource for the Registered Nurses' Association of Ontario, as well as a tool for researchers, clinicians, and policymakers for making evidence-informed staffing decisions.",2018-03-23 +31725858,PlantCircNet: a database for plant circRNA-miRNA-mRNA regulatory networks. ,"Circular RNA (circRNA) is a novel type of endogenous noncoding RNA with covalently closed loop structures, which are widely expressed in various tissues and have functional implications in cellular processes. Acting as competing endogenous RNAs (ceRNAs), circRNAs are important regulators of miRNA activities. The identification of these circRNAs underlines the increasing complexity of ncRNA-mediated regulatory networks. However, more biological evidence is required to infer direct circRNA-miRNA associations while little attention has been paid to circRNAs in plants as compared to the abundant research in mammals. PlantCircNet is presented as an integrated database that provides visualized plant circRNA-miRNA-mRNA regulatory networks containing identified circRNAs in eight model plants. The bioinformatics integration of data from multiple sources reveals circRNA-miRNA-mRNA regulatory networks and helps identify mechanisms underlying metabolic effects of circRNAs. An enrichment analysis tool was implemented to detect significantly overrepresented Gene Ontology categories of miRNA targets. The genomic annotations, sequences and isoforms of circRNAs were also investigated. PlantCircNet provides a user-friendly interface for querying detailed information of specific plant circRNAs. The database may serve as a resource to facilitate plant circRNA research. Several circRNAs were identified to play potential regulatory roles in flower development and response to environmental stress from regulatory networks related with miR156a and AT5G59720, respectively. This present research indicated that circRNAs could be involved in diverse biological processes. Database URL: http://bis.zju.edu.cn/plantcircnet/index.php.",2017-01-01 +32392251,"A systematic machine learning and data type comparison yields metagenomic predictors of infant age, sex, breastfeeding, antibiotic usage, country of origin, and delivery type.","The microbiome is a new frontier for building predictors of human phenotypes. However, machine learning in the microbiome is fraught with issues of reproducibility, driven in large part by the wide range of analytic models and metagenomic data types available. We aimed to build robust metagenomic predictors of host phenotype by comparing prediction performances and biological interpretation across 8 machine learning methods and 4 different types of metagenomic data. Using 1,570 samples from 300 infants, we fit 7,865 models for 6 host phenotypes. We demonstrate the dependence of accuracy on algorithm choice and feature definition in microbiome data and propose a framework for building microbiome-derived indicators of host phenotype. We additionally identify biological features predictive of age, sex, breastfeeding status, historical antibiotic usage, country of origin, and delivery type. Our complete results can be viewed at http://apps.chiragjpgroup.org/ubiome_predictions/.",2020-05-11 +33054771,smORFunction: a tool for predicting functions of small open reading frames and microproteins.,"

Background

Small open reading frame (smORF) is open reading frame with a length of less than 100 codons. Microproteins, translated from smORFs, have been found to participate in a variety of biological processes such as muscle formation and contraction, cell proliferation, and immune activation. Although previous studies have collected and annotated a large abundance of smORFs, functions of the vast majority of smORFs are still unknown. It is thus increasingly important to develop computational methods to annotate the functions of these smORFs.

Results

In this study, we collected 617,462 unique smORFs from three studies. The expression of smORF RNAs was estimated by reannotated microarray probes. Using a speed-optimized correlation algorism, the functions of smORFs were predicted by their correlated genes with known functional annotations. After applying our method to 5 known microproteins from literatures, our method successfully predicted their functions. Further validation from the UniProt database showed that at least one function of 202 out of 270 microproteins was predicted.

Conclusions

We developed a method, smORFunction, to provide function predictions of smORFs/microproteins in at most 265 models generated from 173 datasets, including 48 tissues/cells, 82 diseases (and normal). The tool can be available at https://www.cuilab.cn/smorfunction .",2020-10-14 +29701758,BNPMDA: Bipartite Network Projection for MiRNA-Disease Association prediction.,"

Motivation

A large number of resources have been devoted to exploring the associations between microRNAs (miRNAs) and diseases in the recent years. However, the experimental methods are expensive and time-consuming. Therefore, the computational methods to predict potential miRNA-disease associations have been paid increasing attention.

Results

In this paper, we proposed a novel computational model of Bipartite Network Projection for MiRNA-Disease Association prediction (BNPMDA) based on the known miRNA-disease associations, integrated miRNA similarity and integrated disease similarity. We firstly described the preference degree of a miRNA for its related disease and the preference degree of a disease for its related miRNA with the bias ratings. We constructed bias ratings for miRNAs and diseases by using agglomerative hierarchical clustering according to the three types of networks. Then, we implemented the bipartite network recommendation algorithm to predict the potential miRNA-disease associations by assigning transfer weights to resource allocation links between miRNAs and diseases based on the bias ratings. BNPMDA had been shown to improve the prediction accuracy in comparison with previous models according to the area under the receiver operating characteristics (ROC) curve (AUC) results of three typical cross validations. As a result, the AUCs of Global LOOCV, Local LOOCV and 5-fold cross validation obtained by implementing BNPMDA were 0.9028, 0.8380 and 0.8980 ± 0.0013, respectively. We further implemented two types of case studies on several important human complex diseases to confirm the effectiveness of BNPMDA. In conclusion, BNPMDA could effectively predict the potential miRNA-disease associations at a high accuracy level.

Availability and implementation

BNPMDA is available via http://www.escience.cn/system/file?fileId=99559.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-09-01 +33978452,"Contamination of Retail Meat Samples with Multidrug-Resistant Organisms in Relation to Organic and Conventional Production and Processing: A Cross-Sectional Analysis of Data from the United States National Antimicrobial Resistance Monitoring System, 2012-2017.","

Background

During food animal production, animals are exposed to, colonized by, and sometimes infected with bacteria that may contaminate animal products with susceptible and multidrug-resistant organisms (MDRO). The United States' Organic Foods Production Act resulted in decreased antibiotic use in some animal production operations. Some studies have reported that decreased antibiotic use is associated with reduced MDRO on meat.

Objectives

The aim of this study was to investigate associations of meat production and processing methods with MDRO and overall bacterial contamination of retail meats.

Methods

Bacterial contamination data from 2012 to 2017 for chicken breast, ground beef, ground turkey, and pork chops were downloaded from the National Antimicrobial Resistance Monitoring System. Poisson regression models with robust variance were used to estimate associations with MDRO contamination and any contamination (adjusted for year and meat type) overall, and according to bacteria genus (Salmonella, Campylobacter, Enterococcus, Escherichia coli) and meat type.

Results

A total of 39,349 retail meat samples were linked to 216 conventional, 123 split (conventional and organic), and three organic processing facilities. MDRO contamination was similar in conventionally produced meats processed at split vs. conventional facilities but was significantly lower in organically produced meats processed at split facilities [adjusted prevalance ratio (aPR)=0.43; 95% CI: 0.30, 0.63]. Meat processed by split vs. conventional processors had higher or similar MDRO contamination for all tested bacterial genera except Campylobacter (aPR=0.29; 95% CI: 0.13, 0.64). The prevalence of any contamination was lower in samples processed at split vs. conventional facilities for aggregated samples (aPR=0.70; 95% CI: 0.68, 0.73) and all meat types and bacterial genera.

Discussion

Organically produced and processed retail meat samples had a significantly lower prevalence of MDRO than conventionally produced and processed samples had, whereas meat from split processors had a lower prevalence of any contamination than samples from conventional processors had. Additional studies are needed to confirm findings and clarify specific production and processing practices that might explain them. https://doi.org/10.1289/EHP7327.",2021-05-12 +33355192,Zolpidem Versus Trazodone Initiation and the Risk of Fall-Related Fractures among Individuals Receiving Maintenance Hemodialysis.,"

Background and objectives

Zolpidem, a nonbenzodiazepine hypnotic, and trazodone, a sedating antidepressant, are the most common medications used to treat insomnia in the United States. Both drugs have side effect profiles (e.g., drowsiness, dizziness, and cognitive and motor impairment) that can heighten the risk of falls and fractures. Despite widespread zolpidem and trazodone use, little is known about the comparative safety of these medications in patients receiving hemodialysis, a vulnerable population with an exceedingly high fracture rate.

Design, setting, participants, & measurements

Using data from the United States Renal Data System registry (2013-2016), we conducted a retrospective cohort study to investigate the association between the initiation of zolpidem versus trazodone therapy and the 30-day risk of hospitalized fall-related fractures among Medicare-enrolled patients receiving maintenance hemodialysis. We used an active comparator new-user design and estimated 30-day inverse probability of treatment-weighted hazard ratios and risk differences. We treated death as a competing event.

Results

A total of 31,055 patients were included: 18,941 zolpidem initiators (61%) and 12,114 trazodone initiators (39%). During the 30-day follow-up period, 101 fall-related fractures occurred. Zolpidem versus trazodone initiation was associated with a higher risk of hospitalized fall-related fracture (weighted hazard ratio, 1.71; 95% confidence interval, 1.11 to 2.63; weighted risk difference, 0.17%; 95% confidence interval, 0.07% to 0.29%). This association was more pronounced among individuals prescribed higher zolpidem doses (hazard ratio, 1.85; 95% confidence interval, 1.10 to 3.01; and risk difference, 0.20%; 95% confidence interval, 0.04% to 0.38% for higher-dose zolpidem versus trazodone; and hazard ratio, 1.60; 95% confidence interval, 1.01 to 2.55 and risk difference, 0.14%; 95% confidence interval, 0.03% to 0.27% for lower-dose zolpidem versus trazodone). Sensitivity analyses using longer follow-up durations yielded similar results.

Conclusions

Among individuals receiving maintenance hemodialysis, zolpidem initiators had a higher risk of hospitalized fall-related fracture compared with trazodone initiators.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_12_18_CJN10070620_final.mp3.",2020-12-18 +33829950,"Exercise, Decision-Making, and Cannabis-Related Outcomes among Adolescents.","

Objective

Poor decision-making may represent a risk factor for adverse cannabis-related outcomes, whereas exercise has been linked to better executive functioning and substance use outcomes. This study examines the associations between self-reported exercise and cannabis use (CU) outcomes over 6 months among adolescents, and whether these are mediated by exercise-related effects on decision-making. Method: Participants were 387 adolescents aged 15-18 who completed two assessments 6 months apart. Self-reported past 6-month hours/week of exercise were assessed at baseline. At the 6-month follow-up, participants completed measures assessing past 6-month CU frequency, presence of CU disorder (CUD), and CU-related problems, as well as risky decision-making tasks (Iowa Gambling Task, Game of Dice Task, Cups Task), which were used to derive a latent construct of decision-making. We used prospective mediation to examine the role of decision-making in the relationship between exercise and CU outcomes. Results: More self-reported exercise at baseline predicted greater CU frequency at the 6-month follow-up, but did not predict the presence of a CUD, or cannabis-related problems. After controlling for confounds, baseline exercise did not predict better decision-making at follow-up. Decision-making did not predict CU outcomes, and indirect effects of decision-making were not significant. Conclusions: Contrary to hypotheses, adolescents reporting more exercise at baseline also reported higher CU frequency in our sample. This association may be explained by factors like sample characteristics or sports types, but more research is needed to explore this. Results did not support a mediating role for decision-making in the associations between exercise and CU outcomes.Supplemental data for this article can be accessed online at https://doi.org/10.1080/10826084.2021.1906279.",2021-04-08 +33433736,Modeling LSD1-Mediated Tumor Stagnation.,"LSD1 (KDMA1) has gained attention in the last decade as a cancer biomarker and drug target. In particular, recent work suggests that LSD1 inhibition alone reduces tumor growth, increases T cell tumor infiltration, and complements PD1/PDL1 checkpoint inhibitor therapy. In order to elucidate the immunogenic effects of LSD1 inhibition, we develop a mathematical model of tumor growth under the influence of the adaptive immune response. In particular, we investigate the anti-tumor cytotoxicity of LSD1-mediated T cell dynamics, in order to better understand the synergistic potential of LSD1 inhibition in combination immunotherapies, including checkpoint inhibitors. To that end, we formulate a non-spatial delay differential equation model and fit to the B16 mouse model data from Sheng et al. (Cell 174(3):549-563, 2018. https://doi.org/10.1016/j.cell.2018.05.052 ). Our results suggest that the immunogenic effect of LSD1 inhibition accelerates anti-tumor cytotoxicity. However, cytotoxicity does not seem to account for the slower growth observed in LSD1-inhibited tumors, despite evidence suggesting immune-mediation of this effect.",2021-01-12 +26989151,KinetochoreDB: a comprehensive online resource for the kinetochore and its related proteins. ,"KinetochoreDB is an online resource for the kinetochore and its related proteins. It provides comprehensive annotations on 1554 related protein entries in terms of their amino acid sequence, protein domain context, protein 3D structure, predicted intrinsically disordered region, protein-protein interaction, post-translational modification site, functional domain and key metabolic/signaling pathways, integrating several public databases, computational annotations and experimental results. KinetochoreDB provides interactive and customizable search and data display functions that allow users to interrogate the database in an efficient and user-friendly manner. It uses PSI-BLAST searches to retrieve the homologs of all entries and generate multiple sequence alignments that contain important evolutionary information. This knowledgebase also provides annotations of single point mutations for entries with respect to their pathogenicity, which may be useful for generation of new hypotheses on their functions, as well as follow-up studies of human diseases. Database URL: http://lightning.med.monash.edu/kinetochoreDB2/.",2016-03-17 +33632201,Diabetes mellitus hospitalization and mortality rate according to a national database in Brazil: a longitudinal study.,"

Background

Diabetes mellitus (DM) is an important public health problem worldwide. In addition to the impairment in functionality, the large number of complications which lead to hospitalizations results in high treatment costs. The aim of this study was to analyze the incidence of hospitalizations, mortality rate and hospital costs, as well as to observe the temporal trend of hospitalizations and length of hospital stay due to DM between 2008 and 2019 in Brazil.

Methods

This is a longitudinal descriptive study in which all data regarding hospital admissions registered in the Brazilian system of Hospital Information of ""Sistema Único de Saúde"" (SIH/SUS; http://datasus.saude.gov.br ) due to DM (ICD-10) were included. Comparisons among the groups were performed by an unpaired Student's t-test, two-way ANOVA with a Tukey post hoc test (p < 0.05).

Results

An increased hospitalization of 1.83% due to DM was observed between 2008 and 2019 in Brazil. The Southeastern region had the highest incidence (34.6%) and mortality rate when compared to the other regions (p < 0.05). We also found that females were more likely to be hospitalized in comparison to males, without a statistically significant difference. Finally, a progressive increase of hospitalizations and mortality rate were observed according to age groups, as well as increased spending due to DM hospitalizations over the years.

Conclusion

Hospitalizations due to DM in Brazil showed an expressive increase over the last 12 years, and there is a need for primary healthcare interventions to help reduce this situation.",2021-02-25 +33974460,How a Medication for Opioid Use Disorder Curriculum Translates into Experiences and Internal Medicine Residents' Understanding of Patients with Opioid Use Disorder.,"

Problem

The number of people with an Opioid Use Disorder (OUD) continues to outpace access to associated medication. Ninety-six percent of states report higher rates of OUD than access to medications, and, despite being the standard of care, only 3% of physicians currently prescribe medication for opioid use disorder (MOUD). Prior studies have shown that decreasing barriers, such as a lack of knowledge about MOUD, increased physicians' willingness to prescribe. However, most internal medicine residency programs do not have a required addiction curriculum. As a result, we created a curriculum and conducted qualitative interviews with residents to better understand experiences with the curriculum.

Intervention

In an effort to overcome physician-centered barriers associated with prescribing MOUD, we developed and implemented a week-long curriculum, Addiction Week, for second and third year Internal Medicine Residents at Indiana University School of Medicine in a safety-net clinic. The curriculum included the following: didactics on substance use disorder (SUD), including OUD and alcohol use disorder, and MOUD (mostly buprenorphine), and mostly web-based, peer-reviewed and guideline based readings about addiction, direct observation of addiction counselors, direct discussion with people receiving MOUD, observation of a group therapy session, informal discussion with providers who prescribe MOUD, and, for some residents, observation of a physician prescribing MOUD. After completing the curriculum, the residents participated in an hour long audio-recorded interview to better understand their experiences with the curriculum.

Context

This study was completed at a residency program where residents were not previously exposed to outpatient MOUD prescribing. Due to limited availability of faculty treating patients with MOUD, residents spent the majority of their time shadowing a social worker.

Impact

Residents described gaining a deeper understanding of OUD by having the opportunity to interact with patients in a stable outpatient setting, which for many led to increased confidence and willingness to prescribe MOUD for people with OUD.

Lessons learned

The greater understanding of addiction and willingness to prescribe MOUD described by residents in this study indicate that this type of curriculum may be a promising way to increase MOUD prescribing. Further studies are needed to evaluate whether this intervention can change prescribing behaviors.Supplemental data for this article is available online at https://doi.org/10.1080/10401334.2021.1897597.",2021-05-11 +26697388,Genome-wide expression analysis comparing hypertrophic changes in normal and dysferlinopathy mice.,"Because myostatin normally limits skeletal muscle growth, there are extensive efforts to develop myostatin inhibitors for clinical use. One potential concern is that in muscle degenerative diseases, inducing hypertrophy may increase stress on dystrophic fibers. Our study shows that blocking this pathway in dysferlin deficient mice results in early improvement in histopathology but ultimately accelerates muscle degeneration. Hence, benefits of this approach should be weighed against these potential detrimental effects. Here, we present detailed experimental methods and analysis for the gene expression profiling described in our recently published study in Human Molecular Genetics (Lee et al., 2015). Our data sets have been deposited in the Gene Expression Omnibus (GEO) database (GSE62945) and are available at http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE62945. Our data provide a resource for exploring molecular mechanisms that are related to hypertrophy-induced, accelerated muscular degeneration in dysferlinopathy.",2015-10-24 +33769846,Discovery of New Protein Targets of BPA Analogs and Derivatives Associated with Noncommunicable Diseases: A Virtual High-Throughput Screening.,"

Background

Bisphenol A analogs and derivatives (BPs) have emerged as new contaminants with little or no information about their toxicity. These have been found in numerous everyday products, from thermal paper receipts to plastic containers, and measured in human samples.

Objectives

The objectives of this research were to identify in silico new protein targets of BPs associated with seven noncommunicable diseases (NCDs), and to study their protein-ligand interactions using computer-aided tools.

Methods

Fifty BPs were identified by a literature search and submitted to a virtual high-throughput screening (vHTS) with 328 proteins associated with NCDs. Protein-protein interactions between predicted targets were examined using STRING, and the protocol was validated in terms of binding site recognition and correlation between in silico affinities and in vitro data.

Results

According to the vHTS, several BPs may target proteins associated with NCDs, some of them with stronger affinities than bisphenol A (BPA). The best affinity score (the highest in silico affinity absolute value) was obtained after docking 4,4'-bis(N-carbamoyl-4-methylbenzensulfonamide)diphenylmethane (BTUM) on estradiol 17-beta-dehydrogenase 1 (-13.7 kcal/mol). However, other molecules, such as bisphenol A bis(diphenyl phosphate) (BDP), bisphenol PH (BPPH), and Pergafast 201 also exhibited great affinities (top 10 affinity scores for each disease) with proteins related to NCDs.

Discussion

Molecules such as BTUM, BDP, BPPH, and Pergafast 201 could be targeting key signaling pathways related to NCDs. These BPs should be prioritized for in vitro and in vivo toxicity testing and to further assess their possible role in the development of these diseases. https://doi.org/10.1289/EHP7466.",2021-03-26 +27789703,Prokaryotic Virus Orthologous Groups (pVOGs): a resource for comparative genomics and protein family annotation.,"Viruses are the most abundant and diverse biological entities on earth, and while most of this diversity remains completely unexplored, advances in genome sequencing have provided unprecedented glimpses into the virosphere. The Prokaryotic Virus Orthologous Groups (pVOGs, formerly called Phage Orthologous Groups, POGs) resource has aided in this task over the past decade by using automated methods to keep pace with the rapid increase in genomic data. The uses of pVOGs include functional annotation of viral proteins, identification of genes and viruses in uncharacterized DNA samples, phylogenetic analysis, large-scale comparative genomics projects, and more. The pVOGs database represents a comprehensive set of orthologous gene families shared across multiple complete genomes of viruses that infect bacterial or archaeal hosts (viruses of eukaryotes will be added at a future date). The pVOGs are constructed within the Clusters of Orthologous Groups (COGs) framework that is widely used for orthology identification in prokaryotes. Since the previous release of the POGs, the size has tripled to nearly 3000 genomes and 300 000 proteins, and the number of conserved orthologous groups doubled to 9518. User-friendly webpages are available, including multiple sequence alignments and HMM profiles for each VOG. These changes provide major improvements to the pVOGs database, at a time of rapid advances in virus genomics. The pVOGs database is hosted jointly at the University of Iowa at http://dmk-brain.ecn.uiowa.edu/pVOGs and the NCBI at ftp://ftp.ncbi.nlm.nih.gov/pub/kristensen/pVOGs/home.html.",2016-10-26 +32246829,T-Gene: improved target gene prediction.,"

Motivation

Identifying the genes regulated by a given transcription factor (TF) (its 'target genes') is a key step in developing a comprehensive understanding of gene regulation. Previously, we developed a method (CisMapper) for predicting the target genes of a TF based solely on the correlation between a histone modification at the TF's binding site and the expression of the gene across a set of tissues or cell lines. That approach is limited to organisms for which extensive histone and expression data are available, and does not explicitly incorporate the genomic distance between the TF and the gene.

Results

We present the T-Gene algorithm, which overcomes these limitations. It can be used to predict which genes are most likely to be regulated by a TF, and which of the TF's binding sites are most likely involved in regulating particular genes. T-Gene calculates a novel score that combines distance and histone/expression correlation, and we show that this score accurately predicts when a regulatory element bound by a TF is in contact with a gene's promoter, achieving median precision above 60%. T-Gene is easy to use via its web server or as a command-line tool, and can also make accurate predictions (median precision above 40%) based on distance alone when extensive histone/expression data is not available for the organism. T-Gene provides an estimate of the statistical significance of each of its predictions.

Availability and implementation

The T-Gene web server, source code, histone/expression data and genome annotation files are provided at http://meme-suite.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +32647128,"IDEAL, the Infectious Diseases of East African Livestock project open access database and biobank.","The Infectious Diseases of East African Livestock (IDEAL) project was a longitudinal cohort study of calf health which was conducted in Western Kenya between 2007-2010. A total of 548 East African shorthorn zebu calves were recruited at birth and followed at least every 5 weeks during the first year of life. Comprehensive clinical and epidemiological data, blood and tissue samples were collected at every visit. These samples were screened for over 100 different pathogens or infectious exposures, using a range of diagnostic methods. This manuscript describes this comprehensive dataset and bio-repository, and how to access it through a single online site ( http://data.ctlgh.org/ideal/ ). This provides extensive filtering and searching capabilities. These data are useful to illustrate outcomes of multiple infections on health, investigate patterns of morbidity and mortality due to parasite infections, and to study genotypic determinants of immunity and disease.",2020-07-09 +31647523,SINC: a scale-invariant deep-neural-network classifier for bulk and single-cell RNA-seq data.,"

Motivation

Scaling by sequencing depth is usually the first step of analysis of bulk or single-cell RNA-seq data, but estimating sequencing depth accurately can be difficult, especially for single-cell data, risking the validity of downstream analysis. It is thus of interest to eliminate the use of sequencing depth and analyze the original count data directly.

Results

We call an analysis method 'scale-invariant' (SI) if it gives the same result under different estimates of sequencing depth and hence can use the original count data without scaling. For the problem of classifying samples into pre-specified classes, such as normal versus cancerous, we develop a deep-neural-network based SI classifier named scale-invariant deep neural-network classifier (SINC). On nine bulk and single-cell datasets, the classification accuracy of SINC is better than or competitive to the best of eight other classifiers. SINC is easier to use and more reliable on data where proper sequencing depth is hard to determine.

Availability and implementation

This source code of SINC is available at https://www.nd.edu/∼jli9/SINC.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +32532357,SURF: integrative analysis of a compendium of RNA-seq and CLIP-seq datasets highlights complex governing of alternative transcriptional regulation by RNA-binding proteins.,"Advances in high-throughput profiling of RNA-binding proteins (RBPs) have resulted inCLIP-seq datasets coupled with transcriptome profiling by RNA-seq. However, analysis methods that integrate both types of data are lacking. We describe SURF, Statistical Utility for RBP Functions, for integrative analysis of large collections of CLIP-seq and RNA-seq data. We demonstrate SURF's ability to accurately detect differential alternative transcriptional regulation events and associate them to local protein-RNA interactions. We apply SURF to ENCODE RBP compendium and carry out downstream analysis with additional reference datasets. The results of this application are browsable at http://www.statlab.wisc.edu/shiny/surf/.",2020-06-12 +32832266,Improved normalization of species count data in ecology by scaling with ranked subsampling (SRS): application to microbial communities.,"

Background

Analysis of species count data in ecology often requires normalization to an identical sample size. Rarefying (random subsampling without replacement), which is the current standard method for normalization, has been widely criticized for its poor reproducibility and potential distortion of the community structure. In the context of microbiome count data, researchers explicitly advised against the use of rarefying. Here we introduce a normalization method for species count data called scaling with ranked subsampling (SRS) and demonstrate its suitability for the analysis of microbial communities.

Methods

SRS consists of two steps. In the scaling step, the counts for all species or operational taxonomic units (OTUs) are divided by a scaling factor chosen in such a way that the sum of scaled counts equals the selected total number of counts Cmin. The relative frequencies of all OTUs remain unchanged. In the subsequent ranked subsampling step, non-integer count values are converted into integers by an algorithm that minimizes subsampling error with regard to the population structure (relative frequencies of species or OTUs) while keeping the total number of counts equal Cmin. SRS and rarefying were compared by normalizing a test library representing a soil bacterial community. Common parameters of biodiversity and population structure (Shannon index H', species richness, species composition, and relative abundances of OTUs) were determined for libraries normalized to different size by rarefying as well as SRS with 10,000 replications each. An implementation of SRS in R is available for download (https://doi.org/10.20387/BONARES-2657-1NP3).

Results

SRS showed greater reproducibility and preserved OTU frequencies and alpha diversity better than rarefying. The variance in Shannon diversity increased with the reduction of the library size after rarefying but remained zero for SRS. Relative abundances of OTUs strongly varied among libraries generated by rarefying, whereas libraries normalized by SRS showed only negligible variation. Bray-Curtis index of dissimilarity among replicates of the same library normalized by rarefying revealed a large variation in species composition, which reached complete dissimilarity (not a single OTU shared) among some libraries rarefied to a small size. The dissimilarity among replicated libraries normalized by SRS remained negligibly low at each library size. The variance in dissimilarity increased with the decreasing library size after rarefying, whereas it remained either zero or negligibly low after SRS.

Conclusions

Normalization of OTU or species counts by scaling with ranked subsampling preserves the original community structure by minimizing subsampling errors. We therefore propose SRS for the normalization of biological count data.",2020-08-03 +33182081,"Binary and multi-class classification for androgen receptor agonists, antagonists and binders.","Androgens and androgen receptor regulate a variety of biological effects in the human body. The impaired functioning of androgen receptor may have different adverse health effects from cancer to infertility. Therefore, it is important to determine whether new chemicals have any binding activity and act as androgen agonists or antagonists before commercial use. Due to the large number of chemicals that require experimental testing, the computational methods are a viable alternative. Therefore, the aim of the present study was to develop predictive QSAR models for classifying compounds according to their activity at the androgen receptor. A large data set of chemicals from the CoMPARA project was used for this purpose and random forest classification models have been developed for androgen binding, agonistic, and antagonistic activity. In addition, a unique effort has been made for multi-class approach that discriminates between inactive compounds, agonists and antagonists simultaneously. For the evaluation set, the classification models predicted agonists with 80% of accuracy and for the antagonists' and binders' the respective metrics were 72% and 78%. Combining agonists, antagonists and inactive compounds into a multi-class approach added complexity to the modelling task and resulted to 64% prediction accuracy for the evaluation set. Considering the size of the training data sets and their imbalance, the achieved evaluation accuracy is very good. The final classification models are available for exploring and predicting at QsarDB repository (https://doi.org/10.15152/QDB.236).",2020-09-11 +32641370,A Complex Systems Model of Breast Cancer Etiology: The Paradigm II Conceptual Model.,"

Background

The etiology of breast cancer is a complex system of interacting factors from multiple domains. New knowledge about breast cancer etiology continues to be produced by the research community, and the communication of this knowledge to other researchers, practitioners, decision makers, and the public is a challenge.

Methods

We updated the previously published Paradigm model (PMID: 25017248) to create a framework that describes breast cancer etiology in four overlapping domains of biologic, behavioral, environmental, and social determinants. This new Paradigm II conceptual model was part of a larger modeling effort that included input from multiple experts in fields from genetics to sociology, taking a team and transdisciplinary approach to the common problem of describing breast cancer etiology for the population of California women in 2010. Recent literature was reviewed with an emphasis on systematic reviews when available and larger epidemiologic studies when they were not. Environmental chemicals with strong animal data on etiology were also included.

Results

The resulting model illustrates factors with their strength of association and the quality of the available data. The published evidence supporting each relationship is made available herein, and also in an online dynamic model that allows for manipulation of individual factors leading to breast cancer (https://cbcrp.org/causes/).

Conclusions

The Paradigm II model illustrates known etiologic factors in breast cancer, as well as gaps in knowledge and areas where better quality data are needed.

Impact

The Paradigm II model can be a stimulus for further research and for better understanding of breast cancer etiology.",2020-07-08 +32900399,"A Theoretical Model to Investigate the Influence of Temperature, Reactions of the Population and the Government on the COVID-19 Outbreak in Turkey.","

Objectives

The ongoing coronavirus disease 2019 (COVID-19) pandemic, which was initially identified in December 2019 in the city of Wuhan in China, poses a major threat to worldwide health care. By August 04, 2020, there were globally 695,848 deaths (Johns Hopkins University, https://coronavirus.jhu.edu/map.html). A total of 5765 of them come from Turkey (Johns Hopkins University, https://coronavirus.jhu.edu/map.html). As a result, various governments and their respective populations have taken strong measures to control the spread of the pandemic. In this study, a model that is by construction able to describe both government actions and individual reactions in addition to the well-known exponential spread is presented. Moreover, the influence of the weather is included. This approach demonstrates a quantitative method to track these dynamic influences. This makes it possible to numerically estimate the influence that various private or state measures that were put into effect to contain the pandemic had at time t. This might serve governments across the world by allowing them to plan their actions based on quantitative data to minimize the social and economic consequences of their containment strategies.

Methods

A compartmental model based on SEIR that includes the risk perception of the population by an additional differential equation and uses an implicit time-dependent transmission rate is constructed. Within this model, the transmission rate depends on temperature, population, and government actions, which in turn depend on time. The model was tested using different scenarios, with the different dynamic influences being mathematically switched on and off. In addition, the real data of infected coronavirus cases in Turkey were compared with the results of the model.

Results

The mathematical study of the influence of the different parameters is presented through different scenarios. Remarkably, the last scenario is also an example of a theoretical mitigation strategy that shows its maximum in August 2020. In addition, the results of the model are compared with the real data from Turkey using conventional fitting that shows good agreement.

Conclusions

Although most countries activated their pandemic plans, significant disruptions in health-care systems occurred. The framework of this model seems to be valid for a numerical analysis of dynamic processes that occur during the COVID-19 outbreak due to weather and human reactions. As a result, the effects of the measures introduced could be better planned in advance by use of this model.",2020-09-09 +32083158,A data set for electric power consumption forecasting based on socio-demographic features: Data from an area of southern Colombia.,"In this article, we introduce a data set concerning electric-power consumption-related features registered in seven main municipalities of Nariño, Colombia, from December 2010 to May 2016. The data set consists of 4427 socio-demographic characteristics, and 7 power-consumption-referred measured values. Data were fully collected by the company Centrales Eléctricas de Nariño (CEDENAR) according to the client consumption records. Power consumption data collection was carried following a manual procedure wherein company workers are in charge of manually registering the readings (measured in kWh) reported by the electric energy meters installed at each housing/building. Released data set is aimed at providing researchers a suitable input for designing and assessing the performance of forecasting, modelling, simulation and optimization approaches applied to electric power consumption prediction and characterization problems. The data set, so-named in shorthand PCSTCOL, is freely and publicly available at https://doi.org/10.17632/xbt7scz5ny.3.",2020-02-06 +33118182,Reproducibility analysis of multi-institutional paired expert annotations and radiomic features of the Ivy Glioblastoma Atlas Project (Ivy GAP) dataset.,"

Purpose

The availability of radiographic magnetic resonance imaging (MRI) scans for the Ivy Glioblastoma Atlas Project (Ivy GAP) has opened up opportunities for development of radiomic markers for prognostic/predictive applications in glioblastoma (GBM). In this work, we address two critical challenges with regard to developing robust radiomic approaches: (a) the lack of availability of reliable segmentation labels for glioblastoma tumor sub-compartments (i.e., enhancing tumor, non-enhancing tumor core, peritumoral edematous/infiltrated tissue) and (b) identifying ""reproducible"" radiomic features that are robust to segmentation variability across readers/sites.

Acquisition and validation methods

From TCIA's Ivy GAP cohort, we obtained a paired set (n = 31) of expert annotations approved by two board-certified neuroradiologists at the Hospital of the University of Pennsylvania (UPenn) and at Case Western Reserve University (CWRU). For these studies, we performed a reproducibility study that assessed the variability in (a) segmentation labels and (b) radiomic features, between these paired annotations. The radiomic variability was assessed on a comprehensive panel of 11 700 radiomic features including intensity, volumetric, morphologic, histogram-based, and textural parameters, extracted for each of the paired sets of annotations. Our results demonstrated (a) a high level of inter-rater agreement (median value of DICE ≥0.8 for all sub-compartments), and (b) ≈24% of the extracted radiomic features being highly correlated (based on Spearman's rank correlation coefficient) to annotation variations. These robust features largely belonged to morphology (describing shape characteristics), intensity (capturing intensity profile statistics), and COLLAGE (capturing heterogeneity in gradient orientations) feature families.

Data format and usage notes

We make publicly available on TCIA's Analysis Results Directory (https://doi.org/10.7937/9j41-7d44), the complete set of (a) multi-institutional expert annotations for the tumor sub-compartments, (b) 11 700 radiomic features, and (c) the associated reproducibility meta-analysis.

Potential applications

The annotations and the associated meta-data for Ivy GAP are released with the purpose of enabling researchers toward developing image-based biomarkers for prognostic/predictive applications in GBM.",2020-12-04 +33007469,A clinical calculator for predicting intraoperative blood loss and transfusion risk in spine tumor patients.,"

Background context

Surgery for vertebral column tumors is commonly associated with intraoperative blood loss (IOBL) exceeding 2 liters and the need for transfusion of allogeneic blood products. Transfusion of allogeneic blood, while necessary, is not benign, and has been associated with increased rates of wound complication, venous thromboembolism, delirium, and death.

Purpose

To develop a prediction tool capable of predicting IOBL and risk of requiring allogeneic transfusion in patients undergoing surgery for vertebral column tumors.

Study design/setting

Retrospective, single-center study.

Patient sample

Consecutive series of 274 patients undergoing 350 unique operations for primary or metastatic spinal column tumors over a 46-month period at a comprehensive cancer center OUTCOME MEASURES: IOBL (in mL), use of intraoperative blood products, and intraoperative blood products transfused.

Methods

We identified IOBL and transfusions, along with demographic data, preoperative laboratory data, and surgical procedures performed. Independent predictors of IOBL and transfusion risk were identified using multivariable regression.

Results

Mean age at surgery was 57.0±13.6 years, 53.1% were male, and 67.1% were treated for metastatic lesions. Independent predictors of IOBL included en bloc resection (p<.001), surgical invasiveness (β=25.43 per point; p<0.001), and preoperative albumin (β=-244.86 per g/dL; p=0.011). Predictors of transfusion risk included preoperative hematocrit (odds ratio [OR]=0.88 per %; 95% confidence interval [CI, 0.84, 0.93]; p<0.001), preoperative MCHgb (OR=0.88 per pg; 95% CI [0.78, 1.00]; p=0.048), preoperative red cell distribution width (OR=1.32 per %; 95% CI [1.13, 1.55]; p<0.001), en bloc resection (OR=3.17; 95%CI [1.33, 7.54]; p=0.009), and surgical invasiveness (OR=1.08 per point; [1.06; 1.11]; p<0.001). The transfusion model showed a good fit of the data with an optimism-corrected area under the curve of 0.819. A freely available, web-based calculator was developed for the transfusion risk model (https://jhuspine3.shinyapps.io/TRUST/).

Conclusions

Here we present the first clinical calculator for intraoperative blood loss and transfusion risk in patients being treated for primary or metastatic vertebral column tumors. Surgical invasiveness and preoperative microcytic anemia most strongly predict transfusion risk. The resultant calculators may prove clinically useful for surgeons counseling patients about their individual risk of requiring allogeneic transfusion.",2020-09-30 +31633786,Treeio: An R Package for Phylogenetic Tree Input and Output with Richly Annotated and Associated Data.,"Phylogenetic trees and data are often stored in incompatible and inconsistent formats. The outputs of software tools that contain trees with analysis findings are often not compatible with each other, making it hard to integrate the results of different analyses in a comparative study. The treeio package is designed to connect phylogenetic tree input and output. It supports extracting phylogenetic trees as well as the outputs of commonly used analytical software. It can link external data to phylogenies and merge tree data obtained from different sources, enabling analyses of phylogeny-associated data from different disciplines in an evolutionary context. Treeio also supports export of a phylogenetic tree with heterogeneous-associated data to a single tree file, including BEAST compatible NEXUS and jtree formats; these facilitate data sharing as well as file format conversion for downstream analysis. The treeio package is designed to work with the tidytree and ggtree packages. Tree data can be processed using the tidy interface with tidytree and visualized by ggtree. The treeio package is released within the Bioconductor and rOpenSci projects. It is available at https://www.bioconductor.org/packages/treeio/.",2020-02-01 +33644291,"Effectiveness of interventions as part of the One Health approach to control coronavirus disease 2019 and stratified case features in Anhui Province, China: A real-world population-based cohort study.","

Background

Coronavirus-Disease-2019 (COVID-19) caused by Severe-Acute-Respiratory-Syndrome-Coronavirus-2 (SARS-CoV-2) is rapidly spreading worldwide causing a pandemic. To control the pandemic, the One Health approach (https://www.who.int/news-room/q-a-detail/one-health) is very important. We herein provide a real-world example of efficient COVID-19 control in Anhui Province, China with outbreak originating from imported cases through implementation of a series of measures as part of the One Health approach and describe the stratified cases features.

Methods

Since the identification of the first imported COVID-19 case on Jan 22, 2020, Anhui immediately initiated a sequence of systematic and forceful interventions. We detailed the control measures and analyzed the effects as demonstrated by the corresponding temporal changes of overall epidemiology data on confirmed, cured, and hospitalized cases and contacts. An accumulated number of 991 cases were confirmed, with a total number of 29,399 contacts traced. We further retrieved individual-level data of confirmed cases and compared them across stratifications by sex, age group, linkage to Wuhan, and period of diagnosis.

Results

With a series of interventions including active field investigation, case tracing, quarantine, centralization, education, closed management, and boundary control implemented, number of hospitalized COVID-19 cases peaked, new case disappeared, and all cases were discharged 21, 36, and 46 days after the identification of the initial case, respectively. Male patients were younger, more often had linkage to Wuhan, and received timelier care, but less often had infected cohabitants. Patients aged 25-44 years most often had linkage to Wuhan, while such frequency was lowest in those ≥65 years. Cases <25 years most often had a known contact with COVID-19 patients and any infected family member and cohabitant and were beforehand quarantined, and received fastest management. Patients with linkage to Wuhan were younger, less often had infected family member, had longer incubation period, and received earlier quarantine and timelier care. With more recent periods, the proportion of cases with linkage to Wuhan markedly decreased while the proportion of cases with known contact with COVID-19 cases dramatically increased; the proportions of patients with any infected family member or cohabitant, those beforehand quarantined, and those taking drugs before admission increased; incubation period lengthened, and patients received timelier professional care. Nonspecific systemic symptoms were most common, whose proportion decreased in more recent periods.

Conclusions

Timely and powerful measures as part of the One Health approach (https://www.who.int/news-room/q-a-detail/one-health) effectively and efficiently controlled the COVID-19 outbreak in Anhui, which can be a good real-world example strongly demonstrating the usefulness of such measures in places with outbreaks originating from imported cases. Precise and dynamic prevention and control measures should be implemented and based on features including sex, age group, exposure history, and phase of outbreak.",2021-02-02 +,"Phylogeny of Evanioidea (Hymenoptera, Apocrita), with descriptions of new Mesozoic species from China and Myanmar","The phylogeny of the superfamily Evanioidea is presented using morphology and DNA sequence data of selected extant and fossil genera by employing two phylogenetic methods, maximum parsimony and Bayesian inference. Based on our new results, the monophyly of Evanioidea is corroborated. Evanioidea, Anomopterellidae, Othniodellithidae, Andreneliidae and Evaniidae are monophyletic families, while Praeaulacidae, Aulacidae, Baissidae and Gasteruptiidae are paraphyletic families. In addition, four new genera (Sinuevania gen.n., Curtevania gen.n., Exilaulacus gen.n., Heterobaissa gen.n.) with five new species (Sinuevania mira sp.n., Curtevania enervia sp.n., Exilaulacus loculatus sp.n., Exilaulacus latus sp.n., Heterobaissa apetiola sp.n.), and five additionally new species (Newjersevania longa sp.n., Newjersevania brevis sp.n., Cretevania tenuis sp.n., Cretevania venae sp.n., Praeaulacus rectus sp.n.) and one new combination [Cretevania mitis (Li, Shih & Ren, 2014a) comb.n.] are described based on well‐preserved fossils from the Middle Jurassic Jiulongshan Formation in Inner Mongolia, China, the Early Cretaceous Yixian Formation in Liaoning, and mid‐Cretaceous amber from Myanmar. This study documents the diversification of one major lineage of the mid‐Mesozoic parasitoid revolution that dramatically changed food‐web relationships in terrestrial ecosystems. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:CBC04ADA‐0176‐402D‐9B43‐E1B3CDA080E1.",2018-10-01 +33037906,Efficient protein extraction for shotgun proteomics from hydrated and desiccated leaves of resurrection Ramonda serbica plants.,"Resurrection plant Ramonda serbica is a suitable model to investigate vegetative desiccation tolerance. However, the detailed study of these mechanisms at the protein level is hampered by the severe tissue water loss, high amount of phenolics and polysaccharide, and possible protein modifications and aggregations during the extraction and purification steps. When applied to R. serbica leaves, widely used protein extraction protocols containing polyvinylpolypyrrolidone and ascorbate, as well as the phenol/SDS/buffer-based protocol recommended for recalcitrant plant tissues failed to eliminate persistent contamination and ensure high protein quality. Here we compared three protein extraction approaches aiming to establish the optimal one for both hydrated and desiccated R. serbica leaves. To evaluate the efficacy of these protocols by shotgun proteomics, we also created the first R. serbica annotated transcriptome database, available at http://www.biomed.unipd.it/filearrigoni/Trinity_Sample_RT2.fasta . The detergent-free phenol-based extraction combined with dodecyl-β-D-maltoside-assisted extraction enabled high-yield and high-purity protein extracts. The phenol-based protocol improved the protein-band resolution, band number, and intensity upon electrophoresis, and increased the protein yield and the number of identified peptides and protein groups by LC-MS/MS. Additionally, dodecyl-β-D-maltoside enabled solubilisation and identification of more membrane-associated proteins. The presented study paves the way for investigating the desiccation tolerance in R. serbica, and we recommend this protocol for similar recalcitrant plant material.",2020-10-10 +33653218,Sex differences in the relationship of sleep-disordered breathing and asthma control among children with severe asthma.,"

Objective

Children with severe asthma are underrepresented in studies of the relationship of sleep-disordered breathing (SDB) and asthma and little is known about sex differences of these relationships. We sought to determine the relationship of SDB with asthma control and lung function among boys and girls within a pediatric severe asthma cohort.

Methods

Patients attending clinic visits at the Boston Children's Hospital Pediatric Severe Asthma Program completed the Pediatric Sleep Questionnaire (PSQ), Asthma Control Test (ACT) and Spirometry. The prevalence of SDB was defined as a PSQ score >0.33. We analyzed the association between PSQ score and both ACT score and spirometry values in mixed effect models, testing interactions for age and sex.

Results

Among 37 subjects, mean age was 11.8 years (4.4) and 23 (62.2%) were male, the prevalence of SDB was 43.2% (16/37). Including all 80 observations, there was a moderate negative correlation between PSQ and ACT scores (r=-0.46, p < 0.001). Multivariable linear regression models revealed a significant sex interaction with PSQ on asthma control (p = 0.003), such that for each 0.10 point increase in PSQ there was a 1.88 point decrease in ACT score for females but only 0.21 point decrease in ACT score for males. A positive PSQ screen was associated with a 9.44 point (CI 5.54, 13.34, p < 0.001) lower ACT score for females and a 3.22 point (CI 0.56, 5.88, p = 0.02) lower score for males.

Conclusions

SDB is common among children with severe asthma. Among children with severe asthma, SDB in girls portends to significantly worse asthma control than boys.Supplemental data for this article is available online at https://doi.org/10.1080/02770903.2021.1897838.",2021-03-23 +33858735,Publication Trends and Hot Spots in Femoroacetabular Impingement Research: A 20-Year Bibliometric Analysis.,"

Background

Femoroacetabular impingement (FAI) has attracted increasing attention over the past few decades. We aim to evaluate FAI research and predict research hot spots quantitatively and qualitatively.

Methods

The publications in FAI research between 2000 and 2019 were assimilated from the Web of Science Core Collection of Clarivate Analytics. The retrieved data were evaluated by the bibliometric method. Software CiteSpace 5.7.R1, VOSviewer 1.6.15, and the Online Analysis Platform of Literature Metrology (http://bibliometric.com/) were used to analyze and identify the hot spots and trends in this field.

Results

A total of 2471 originals articles that fulfilled the study requirements were obtained. The number of manuscripts on FAI has experienced rapid growth, especially after 2009. The United States of America was the leading country for publication and to the collaboration network. FAI, osteoarthritis, hip arthroscopy, labral reconstruction, pathomorphology, outcome, rehabilitation, and joint cartilage are some of the high-frequency keywords in co-occurrence cluster analysis and cocited reference cluster analysis. Burst detection analysis of top keywords revealed that outcomes, instability, labral reconstruction, adolescent, and risk factor were newly emerged research hot spots.

Conclusion

The understanding of FAI has been improved significantly during the past two decades. Present studies focused on identifying the optimal method to treat labral pathology, outcome assessment of either surgeries or conservative managements, and predicting midterm and long-term outcomes. Together these studies exert critical implications for decision-making and management for FAI.",2021-03-09 +27924044,PlaMoM: a comprehensive database compiles plant mobile macromolecules.,"In plants, various phloem-mobile macromolecules including noncoding RNAs, mRNAs and proteins are suggested to act as important long-distance signals in regulating crucial physiological and morphological transition processes such as flowering, plant growth and stress responses. Given recent advances in high-throughput sequencing technologies, numerous mobile macromolecules have been identified in diverse plant species from different plant families. However, most of the identified mobile macromolecules are not annotated in current versions of species-specific databases and are only available as non-searchable datasheets. To facilitate study of the mobile signaling macromolecules, we compiled the PlaMoM (Plant Mobile Macromolecules) database, a resource that provides convenient and interactive search tools allowing users to retrieve, to analyze and also to predict mobile RNAs/proteins. Each entry in the PlaMoM contains detailed information such as nucleotide/amino acid sequences, ortholog partners, related experiments, gene functions and literature. For the model plant Arabidopsis thaliana, protein-protein interactions of mobile transcripts are presented as interactive molecular networks. Furthermore, PlaMoM provides a built-in tool to identify potential RNA mobility signals such as tRNA-like structures. The current version of PlaMoM compiles a total of 17 991 mobile macromolecules from 14 plant species/ecotypes from published data and literature. PlaMoM is available at http://www.systembioinfo.org/plamom/.",2016-10-24 +30535305,PopViz: a webserver for visualizing minor allele frequencies and damage prediction scores of human genetic variations.,"

Summary

Next-generation sequencing (NGS) generates large amounts of genomic data and reveals about 20 000 genetic coding variants per individual studied. Several mutation damage prediction scores are available to prioritize variants, but there is currently no application to help investigators to determine the relevance of the candidate genes and variants quickly and visually from population genetics data and deleteriousness scores. Here, we present PopViz, a user-friendly, rapid, interactive, mobile-compatible webserver providing a gene-centric visualization of the variants of any human gene, with (i) population-specific minor allele frequencies from the gnomAD population genetic database; (ii) mutation damage prediction scores from CADD, EIGEN and LINSIGHT and (iii) amino-acid positions and protein domains. This application will be particularly useful in investigations of NGS data for new disease-causing genes and variants, by reinforcing or rejecting the plausibility of the candidate genes, and by selecting and prioritizing, the candidate variants for experimental testing.

Availability and implementation

PopViz webserver is freely accessible from http://shiva.rockefeller.edu/PopViz/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +31689369,Development of a Free Online Interactive Naming Therapy for Bilingual Aphasia.,"Purpose The purpose of this ongoing project was to provide speech-language pathologists who serve culturally and linguistically diverse populations with a freely available online tool for naming therapy in a variety of languages. The purpose of this clinical focus article was to report on this resource in an effort to make known its existence, its instructions for use, and the evidence-based practices from which it was developed. Method The website, http://bilingualnamingtherapy.psu.edu/, was created by the research team in collaboration with a web programmer using Amazon Web Services. The treatment protocol for the website was adapted from an evidence-based naming intervention in which clients select and verify appropriate semantic features for the target words. This protocol comes from the work of Kiran and colleagues (Edmonds & Kiran, 2006; Kiran & Iakupova, 2011; Kiran & Lo, 2013; Kiran & Roberts, 2010; Kiran, Sandberg, Gray, Ascenso, & Kester, 2013; Krishnan, Tiwari, Kiran, & Chengappa, 2014), who showed positive benefits of this therapy within and across languages in bilingual persons with aphasia. The stimuli for the online therapy were developed in a variety of languages. First, words and semantic features were translated from English to 10 different languages. Next, surveys were created using Qualtrics software and posted on Amazon Mechanical Turk to verify picture labels and semantic features for each word in each language. The results of these surveys guided the stimuli used for each language on the website. An interactive website was developed to allow clinicians to select a set of words and progress through a series of steps. A step-by-step tutorial on how to use this website is also included in this article. Conclusions The interactive online naming therapy described in this article is currently available in English and Spanish, with Chinese under construction. Several more languages are in various stages of preparation for use on the website, and suggestions for additional languages are being actively sought. http://bilingualnamingtherapy.psu.edu/ promises to be a useful tool for speech-language pathologists who work with culturally and linguistically diverse clients. This website provides naming therapy materials, adapted from an evidence-based protocol, in a variety of languages, that have been developed based on feedback from speakers of each language to maximize cultural and linguistic appropriateness.",2019-11-05 +33684301,A Neural Marker of Speech Intention: Evidence From Contingent Negative Variation.,"Purpose This study investigated whether changes in brain activity preceding spoken words can be used as a neural marker of speech intention. Specifically, changes in the contingent negative variation (CNV) were examined prior to speech production in three different study designs to determine a method that maximizes signal detection in a speaking task. Method Electroencephalography data were collected in three different protocols to elicit the CNV in a spoken word task that varied the timing and type of linguistic information. The first protocol provided participants with the word to be spoken before the instruction of whether or not to speak, the second provided both the word and the instruction to speak, and the third provided the instruction to speak before the word. Participants (N = 18) were split into three groups (one for each protocol) and were instructed to either speak (Go) or refrain from speaking (NoGo) each word according to task instructions. The CNV was measured by analyzing the difference in slope between Go and NoGo trials. Results Statistically significant effects of hemispheric laterality on the CNV slope confirm the third protocol where the participants know they will speak in advance of the word, as the paradigm that reliably elicits a CNV response related to speech intention. Conclusions The maximal CNV response when the instruction is known before the word indicates the neural processing measured in this protocol may reflect a generalized speech intention process in which the speech-language systems become prepared to speak and then execute production once the word information is provided. Further analysis of the optimal protocol identified in this study requires additional experimental investigation to confirm its role in eliciting an objective marker of speech intention. Supplemental Material https://doi.org/10.23641/asha.14111468.",2021-03-08 +33828582,De novo Prediction of Moonlighting Proteins Using Multimodal Deep Ensemble Learning.,"Moonlighting proteins (MPs) are a special type of protein with multiple independent functions. MPs play vital roles in cellular regulation, diseases, and biological pathways. At present, very few MPs have been discovered by biological experiments. Due to the lack of data sample, computation-based methods to identify MPs are limited. Currently, there is no de-novo prediction method for MPs. Therefore, systematic research and identification of MPs are urgently required. In this paper, we propose a multimodal deep ensemble learning architecture, named MEL-MP, which is the first de novo computation model for predicting MPs. First, we extract four sequence-based features: primary protein sequence information, evolutionary information, physical and chemical properties, and secondary protein structure information. Second, we select specific classifiers for each kind of feature. Finally, we apply the stacked ensemble to integrate the output of each classifier. Through comprehensive model selection and cross-validation experiments, it is shown that specific classifiers for specific feature types can achieve superior performance. For validating the effectiveness of the fusion-based stacked ensemble, different feature fusion strategies including direct combination and a multimodal deep auto-encoder are used for comparative purposes. MEL-MP is shown to exhibit superior prediction performance (F-score = 0.891), surpassing the existing machine learning model, MPFit (F-score = 0.784). In addition, MEL-MP is leveraged to predict the potential MPs among all human proteins. Furthermore, the distribution of predicted MPs on different chromosomes, the evolution of MPs, the association of MPs with diseases, and the functional enrichment of MPs are also explored. Finally, for maximum convenience, a user-friendly web server is available at: http://ml.csbg-jlu.site/mel-mp/.",2021-03-22 +33679288,Beating Rate Variability of Isolated Mammal Sinoatrial Node Tissue: Insight Into Its Contribution to Heart Rate Variability.,"

Background

Because of the complexity of the interaction between the internal pacemaker mechanisms, cell interconnected signals, and interaction with other body systems, study of the role of individual systems must be performed under in vivo and in situ conditions. The in situ approach is valuable when exploring the mechanisms that govern the beating rate and rhythm of the sinoatrial node (SAN), the heart's primary pacemaker. SAN beating rate changes on a beat-to-beat basis. However, to date, there are no standard methods and tools for beating rate variability (BRV) analysis from electrograms (EGMs) collected from different mammals, and there is no centralized public database with such recordings.

Methods

We used EGM recordings obtained from control SAN tissues of rabbits (n = 9) and mice (n = 30) and from mouse SAN tissues (n = 6) that were exposed to drug intervention. The data were harnessed to develop a beat detector to derive the beat-to-beat interval time series from EGM recordings. We adapted BRV measures from heart rate variability and reported their range for rabbit and mouse.

Results

The beat detector algorithm performed with 99% accuracy, sensitivity, and positive predictive value on the test (mouse) and validation (rabbit and mouse) sets. Differences in the frequency band cutoff were found between BRV of SAN tissue vs. heart rate variability (HRV) of in vivo recordings. A significant reduction in power spectrum density existed in the high frequency band, and a relative increase was seen in the low and very low frequency bands. In isolated SAN, the larger animal had a slower beating rate but with lower BRV, which contrasted the phenomena reported for in vivo analysis. Thus, the non-linear inverse relationship between the average HR and HRV is not maintained under in situ conditions. The beat detector, BRV measures, and databases were contributed to the open-source PhysioZoo software (available at: https://physiozoo.com/).

Conclusion

Our approach will enable standardization and reproducibility of BRV analysis in mammals. Different trends were found between beating rate and BRV or HRV in isolated SAN tissue vs. recordings collected under in vivo conditions, respectively, implying a complex interaction between the SAN and the autonomic nervous system in determining HRV in vivo.",2020-01-01 +,Pseudomonas syringae Pathogen Causes Foliar Disease of Upland Cotton in Texas,"Cotton bacterial blight (CBB), caused by Xanthomonas citri pv. malvacearum (Xcm), can cause significant yield losses on susceptible varieties of upland cotton (Gossypium hirsutum L.) (Verma 1986). CBB has reemerged in the United States since 2011 (Phillips et al. 2017). In 2015, cotton fields near Plains, TX, exhibited symptoms of an unknown foliar disease on cotton cultivars that were reported to be resistant to CBB. In June 2016, bacteria were isolated from Fibermax 2007GLT (CBB Resistant) leaves exhibiting CBB-like symptoms. Culture on nutrient-rich NYGA medium with 50 μM cycloheximide revealed two predominant colony morphologies, yellow and white. The yellow colonies were confirmed to be Xcm. Koch’s postulates were used to determine that the white bacteria caused necrotic foliar lesions when infiltrated into cotton leaves alone or in combination with Xcm. Sequencing of the 16s rRNA gene identified this bacterium as Pseudomonas sp. with 99% sequence identity to the Pseudomonas syringae pv. syringae B728a (Gbk 230265-9). Further multilocus sequence analysis using concatenated regions of the gyrB, rpoD, gap1, and gltA on the PAMDB database grouped this pathogen with P. syringae pv. atrofaciens, P. syringae pv. apata, P. syringae pv. pisi, P. syringae pv. syringae, and P. syringae pv. aceris (http://genome.ppws.vt.edu/cgi-bin/MLST/home.pl). Thus, the isolated cotton pathogen is a member of P. syringae. Identification of a pseudomonad cotton pathogen has occurred at least once before in cotton seedlings near Lubbock, TX. This was recorded in the 1994 Cotton Beltwide Conferences Proceedings (ATCC 51506). No other reference to a pseudomonad pathogen of cotton has been found. However, evidence of a Pseudomonas-Xanthomonas disease complex has been found at least once in leafy crucifers (Zhao et al. 2000). Inoculations with a needleless syringe resulted in symptoms that initially appeared as a cell death phenotype but quickly progressed to a spreading necrotic lesion. Foliar disease symptoms were observed after inoculation with a bacterial suspension at OD₆₀₀ of 0.0001 to 0.01. Symptoms appeared as early as 1 day after inoculation. To date, this pathogen has been isolated from 11 different fields in six counties and always in association with Xcm. Additional isolates were identified from symptomatic leaf tissue of at least eight different cultivars at various locations. Four isolates from Donley County, TX, and four cotton cultivars (DP 1454NRB2RF, FM 1320GL, FM 1830GLT, and FM 2484B2F) were screened using the scratch method to identify whether variability existed in the disease response of cultivars. The cultivars were arranged in a split-plot design, with isolate as the main factor and cultivar as the subfactor. Both the main factor and the subfactors were randomized. There were four replications for each isolate/cultivar combination. The test was repeated once. Disease symptoms were less severe on FM 2484B2F (P < 0.05) than the other three cultivars, suggesting that variation exists among cotton germplasm for tolerance to this pathogen. Future phylogenetic analysis will focus on the origin of virulence of this pathogen and its distribution within the cotton production regions in the United States.",2018-06-01 +25841437,dbPSP: a curated database for protein phosphorylation sites in prokaryotes.,"As one of the most important post-translational modifications, phosphorylation is highly involved in almost all of biological processes through temporally and spatially modifying substrate proteins. Recently, phosphorylation in prokaryotes attracted much attention for its critical roles in various cellular processes such as signal transduction. Thus, an integrative data resource of the prokaryotic phosphorylation will be useful for further analysis. In this study, we presented a curated database of phosphorylation sites in prokaryotes (dbPSP, Database URL: http://dbpsp.biocuckoo.org) for 96 prokaryotic organisms, which belong to 11 phyla in two domains including bacteria and archaea. From the scientific literature, we manually collected experimentally identified phosphorylation sites on seven types of residues, including serine, threonine, tyrosine, aspartic acid, histidine, cysteine and arginine. In total, the dbPSP database contains 7391 phosphorylation sites in 3750 prokaryotic proteins. With the dataset, the sequence preferences of the phosphorylation sites and functional annotations of the phosphoproteins were analyzed, while the results shows that there were obvious differences among the phosphorylation in bacteria, archaea and eukaryotes. All the phosphorylation sites were annotated with original references and other descriptions in the database, which could be easily accessed through user-friendly website interface including various search and browse options. Taken together, the dbPSP database provides a comprehensive data resource for further studies of protein phosphorylation in prokaryotes. Database URL: http://dbpsp.biocuckoo.org",2015-04-04 +25542617,"Mitochondrial Disease Sequence Data Resource (MSeqDR): a global grass-roots consortium to facilitate deposition, curation, annotation, and integrated analysis of genomic data for the mitochondrial disease clinical and research communities.","Success rates for genomic analyses of highly heterogeneous disorders can be greatly improved if a large cohort of patient data is assembled to enhance collective capabilities for accurate sequence variant annotation, analysis, and interpretation. Indeed, molecular diagnostics requires the establishment of robust data resources to enable data sharing that informs accurate understanding of genes, variants, and phenotypes. The ""Mitochondrial Disease Sequence Data Resource (MSeqDR) Consortium"" is a grass-roots effort facilitated by the United Mitochondrial Disease Foundation to identify and prioritize specific genomic data analysis needs of the global mitochondrial disease clinical and research community. A central Web portal (https://mseqdr.org) facilitates the coherent compilation, organization, annotation, and analysis of sequence data from both nuclear and mitochondrial genomes of individuals and families with suspected mitochondrial disease. This Web portal provides users with a flexible and expandable suite of resources to enable variant-, gene-, and exome-level sequence analysis in a secure, Web-based, and user-friendly fashion. Users can also elect to share data with other MSeqDR Consortium members, or even the general public, either by custom annotation tracks or through the use of a convenient distributed annotation system (DAS) mechanism. A range of data visualization and analysis tools are provided to facilitate user interrogation and understanding of genomic, and ultimately phenotypic, data of relevance to mitochondrial biology and disease. Currently available tools for nuclear and mitochondrial gene analyses include an MSeqDR GBrowse instance that hosts optimized mitochondrial disease and mitochondrial DNA (mtDNA) specific annotation tracks, as well as an MSeqDR locus-specific database (LSDB) that curates variant data on more than 1300 genes that have been implicated in mitochondrial disease and/or encode mitochondria-localized proteins. MSeqDR is integrated with a diverse array of mtDNA data analysis tools that are both freestanding and incorporated into an online exome-level dataset curation and analysis resource (GEM.app) that is being optimized to support needs of the MSeqDR community. In addition, MSeqDR supports mitochondrial disease phenotyping and ontology tools, and provides variant pathogenicity assessment features that enable community review, feedback, and integration with the public ClinVar variant annotation resource. A centralized Web-based informed consent process is being developed, with implementation of a Global Unique Identifier (GUID) system to integrate data deposited on a given individual from different sources. Community-based data deposition into MSeqDR has already begun. Future efforts will enhance capabilities to incorporate phenotypic data that enhance genomic data analyses. MSeqDR will fill the existing void in bioinformatics tools and centralized knowledge that are necessary to enable efficient nuclear and mtDNA genomic data interpretation by a range of shareholders across both clinical diagnostic and research settings. Ultimately, MSeqDR is focused on empowering the global mitochondrial disease community to better define and explore mitochondrial diseases.",2014-12-04 +33534641,Predicting cell health phenotypes using image-based morphology profiling.,"Genetic and chemical perturbations impact diverse cellular phenotypes, including multiple indicators of cell health. These readouts reveal toxicity and antitumorigenic effects relevant to drug discovery and personalized medicine. We developed two customized microscopy assays, one using four targeted reagents and the other three targeted reagents, to collectively measure 70 specific cell health phenotypes including proliferation, apoptosis, reactive oxygen species, DNA damage, and cell cycle stage. We then tested an approach to predict multiple cell health phenotypes using Cell Painting, an inexpensive and scalable image-based morphology assay. In matched CRISPR perturbations of three cancer cell lines, we collected both Cell Painting and cell health data. We found that simple machine learning algorithms can predict many cell health readouts directly from Cell Painting images, at less than half the cost. We hypothesized that these models can be applied to accurately predict cell health assay outcomes for any future or existing Cell Painting dataset. For Cell Painting images from a set of 1500+ compound perturbations across multiple doses, we validated predictions by orthogonal assay readouts. We provide a web app to browse predictions: http://broad.io/cell-health-app. Our approach can be used to add cell health annotations to Cell Painting datasets.",2021-02-03 +30894701,ClinGen expert clinical validity curation of 164 hearing loss gene-disease pairs.,"

Purpose

Proper interpretation of genomic variants is critical to successful medical decision making based on genetic testing results. A fundamental prerequisite to accurate variant interpretation is the clear understanding of the clinical validity of gene-disease relationships. The Clinical Genome Resource (ClinGen) has developed a semiquantitative framework to assign clinical validity to gene-disease relationships.

Methods

The ClinGen Hearing Loss Gene Curation Expert Panel (HL GCEP) uses this framework to perform evidence-based curations of genes present on testing panels from 17 clinical laboratories in the Genetic Testing Registry. The HL GCEP curated and reviewed 142 genes and 164 gene-disease pairs, including 105 nonsyndromic and 59 syndromic forms of hearing loss.

Results

The final outcome included 82 Definitive (50%), 12 Strong (7%), 25 Moderate (15%), 32 Limited (20%), 10 Disputed (6%), and 3 Refuted (2%) classifications. The summary of each curation is date stamped with the HL GCEP approval, is live, and will be kept up-to-date on the ClinGen website ( https://search.clinicalgenome.org/kb/gene-validity ).

Conclusion

This gene curation approach serves to optimize the clinical sensitivity of genetic testing while reducing the rate of uncertain or ambiguous test results caused by the interrogation of genes with insufficient evidence of a disease link.",2019-03-21 +31596400,Facebook recruitment of smokers: comparing gain- and loss-framed ads for the purposes of an Internet-based smoking cessation intervention.,"Gain- and loss-framed messages about smoking behavior have commonly been used to promote cessation. However, there are still no clear conclusions as to what kind of message is more effective for motivating smokers to quit. This study compared the effectiveness of loss- and gain-framed messages in the online recruitment of smokers via Facebook Advertising. Loss- and gain-framed messages about smoking were created and released as Facebook ads. Users who clicked on the ads were automatically redirected to the ""Live Without Tobacco"" intervention (http://www.vivasemtabaco.com.br). The amount spent on the ads was BRL 647.64. Data were collected from the Facebook Ads platform and from a relational database. Analyses were performed on the 6,350 users who clicked on one of the ads and 1,731 who were successfully redirected to the intervention. Gain-framed ads reached 174,029 people and loss-framed ads reached 180,527. The former received 2,688 clicks, while the latter received 3,662. The cost of the click was BRL 0.12 per gain-framed ad and BRL 0.09 per loss-framed ad. Loss-framed ads reached more users, got more clicks (and website accesses), and led to more accounts and quit plans being created. Loss-framed messages about smoking appear to be more cost-effective for both initial recruitment and intervention engagement. Facebook has proven to be a good outreach and recruitment tool and can be a solution for the difficulty in reaching smokers for cessation interventions.",2019-10-07 +26973684,FragariaCyc: A Metabolic Pathway Database for Woodland Strawberry Fragaria vesca.,"FragariaCyc is a strawberry-specific cellular metabolic network based on the annotated genome sequence of Fragaria vesca L. ssp. vesca, accession Hawaii 4. It was built on the Pathway-Tools platform using MetaCyc as the reference. The experimental evidences from published literature were used for supporting/editing existing entities and for the addition of new pathways, enzymes, reactions, compounds, and small molecules in the database. To date, FragariaCyc comprises 66 super-pathways, 488 unique pathways, 2348 metabolic reactions, 3507 enzymes, and 2134 compounds. In addition to searching and browsing FragariaCyc, researchers can compare pathways across various plant metabolic networks and analyze their data using Omics Viewer tool. We view FragariaCyc as a resource for the community of researchers working with strawberry and related fruit crops. It can help understanding the regulation of overall metabolism of strawberry plant during development and in response to diseases and abiotic stresses. FragariaCyc is available online at http://pathways.cgrb.oregonstate.edu.",2016-03-04 +33206002,Depot Medroxyprogesterone Acetate Use and Blood Lead Levels in a Cohort of Young Women.,"

Background

Injectable contraceptive use is common, with 74 million users worldwide. Use of the injectable contraceptive depot medroxyprogesterone acetate (DMPA) is associated with bone mineral density loss. We hypothesize that increased bone resorption with DMPA use allows for mobilization of the toxic metal lead stored in bone to blood, presenting users with increased systemic exposure to lead.

Objective

The objective of our study was to investigate the association between current DMPA use and blood lead concentrations.

Methods

We conducted a cross-sectional analysis using enrollment data from the Study of Environment, Lifestyle & Fibroids (SELF), a cohort of 1,693 African-American women who were 23-35 years of age. Data on DMPA use were collected by computer-assisted telephone interview. Blood lead concentrations were measured in whole blood samples among 1,548 participants (91% of cohort). We estimated the adjusted percent difference in blood lead concentrations and 95% confidence intervals (CI) between current DMPA users and nonusers using multivariable linear regression.

Results

Geometric mean blood lead concentration was 0.69μg/dL (95% CI: 0.67, 0.71). After adjustment, current DMPA users (7% of cohort) had blood lead concentrations that were 18% higher than those of nonusers (95% CI: 8%, 29%). Similar associations were observed with additional analyses to assess for potential bias from smoking, DMPA-induced amenorrhea, use of estrogen-containing contraceptives, having given birth in the prior year, and history of medical conditions or current medication use associated with bone loss.

Discussion

Our results indicate that current DMPA use is associated with increased blood lead concentrations. Further research, particularly in populations highly exposed to lead, is warranted to consider tradeoffs between the adverse effects of lead on human health and the importance of DMPA as a contraceptive option to prevent unintended pregnancy. https://doi.org/10.1289/EHP7017.",2020-11-18 +33851871,Assessing Indoor Dust Interference with Human Nuclear Hormone Receptors in Cell-Based Luciferase Reporter Assays.,"

Background

Per- and polyfluoroalkyl substances (PFAS), organophosphate esters (OPEs), and polybrominated diphenyl ethers (PBDEs) are hormone-disrupting chemicals that migrate from building materials into air and dust.

Objectives

We aimed to quantify the hormonal activities of 46 dust samples and identify chemicals driving the observed activities.

Methods

We evaluated associations between hormonal activities of extracted dust in five cell-based luciferase reporter assays and dust concentrations of 42 measured PFAS, OPEs, and PBDEs, transformed as either raw or potency-weighted concentrations based on Tox21 high-throughput screening data.

Results

All dust samples were hormonally active, showing antagonistic activity toward peroxisome proliferator-activated receptor (PPARγ2) (100%; 46 of 46 samples), thyroid hormone receptor (TRβ) (89%; 41 samples), and androgen receptor (AR) (87%; 40 samples); agonist activity on estrogen receptor (ERα) (96%; 44 samples); and binding competition with thyroxine (T4) on serum transporter transthyretin (TTR) (98%; 45 samples). Effects were observed with as little as 4μg of extracted dust. In regression models for each chemical class, interquartile range increases in potency-weighted or unknown-potency chemical concentrations were associated with higher hormonal activities of dust extracts (potency-weighted: ΣPFAS-TRβ, ↑28%, p<0.05; ΣOPEs-TRβ, ↑27%, p=0.08; ΣPBDEs-TRβ, ↑20%, p<0.05; ΣPBDEs-ERα, ↑7.7%, p=0.08; unknown-potency: ΣOPEs-TTR, ↑34%, p<0.05; ΣOPEs-AR, ↑13%, p=0.06), adjusted for chemicals with active, inactive, and unknown Tox21 designations.

Discussion

All indoor dust samples exhibited hormonal activities, which were associated with PFAS, PBDE, and OPE levels. Reporter gene cell-based assays are relatively inexpensive, health-relevant evaluations of toxic loads of chemical mixtures that building occupants are exposed to. https://doi.org/10.1289/EHP8054.",2021-04-14 +34076892,The PREPARE for Your Care program increases advance care planning engagement among diverse older adults with cancer.,"

Background

Advance care planning (ACP) is low among older adults with cancer. In a secondary analysis of randomized trial data, the authors compared the efficacy of the PREPARE for Your Care (PREPARE) website plus an easy-to-read advance directive (AD) with an AD only among older adults with and without cancer.

Methods

Safety net, primary care patients in San Francisco were included if they were 55 years old or older, were English- or Spanish-speaking, and had 2 or more chronic conditions. The authors determined cancer diagnoses by using International Classification of Diseases, Ninth Revision/Tenth Revision codes. The primary outcome was new ACP documentation in the medical record at 15 months; the secondary outcomes were self-reported ACP engagement, ease of use, satisfaction, and depression/anxiety. The authors used mixed effects logistic and linear regression adjusted for prior ACP, health literacy, and clinician, including a cancer interaction term.

Results

Of 986 participants, 220 (22%) had cancer. The mean age was 63 years (SD, 6 years), 61% were women, 81% were of a minority race/ethnicity, 45% were Spanish-speaking, 39% had limited health literacy, and 27% had prior ACP. New ACP documentation was higher in the PREPARE arm versus the AD-only arm among participants with cancer (62% vs 43%; P = .01) and without cancer (38% vs 28%; P = .01), as was ACP engagement in both arms (P < .001), with no interactions by cancer. Ease of use and satisfaction were high, and depression/anxiety was low, with no differences by study arm or by cancer/no cancer.

Conclusions

PREPARE plus an easy-to-read AD increased ACP documentation and engagement among diverse older adults with cancer more than an AD alone, with no increase in depression or anxiety between study arms or by cancer. PREPARE may help to decrease ACP disparities among patients with cancer.

Lay summary

Advance care planning (ACP) is the process of sharing values, goals, and preferences for medical care, but engagement in ACP is low among older adults with cancer. Among 986 English- and Spanish-speaking older adults from a safety net hospital, an interactive, multimedia, web-based ACP program (PREPARE for Your Care at https://prepareforyourcare.org/) plus an easy-to-read advance directive increased ACP documentation and engagement more than an advance directive alone. There were no differences in this increase in ACP between older adults with cancer and older adults without cancer. Also, engaging in ACP did not result in increased depression or anxiety.",2021-06-02 +33964537,Children's understanding of epilepsy: A qualitative study.,"

Purpose

To use a qualitative research approach to determine children's understandings of epilepsy and their epilepsy treatment.

Methods

Children aged 7-16 years with physician-confirmed active epilepsy (i.e., having had an epileptic seizure in the past year and or currently taking antiepileptic drugs (AEDs), and not known to have an intellectual disability, were invited to participate. Children had semi-structured interviews separately on two occasions. Between the first and second interviews, an observation of a routine epilepsy clinic appointment of individual children was conducted, and was then discussed during the second interview. Participatory research tools were used in both child interviews to facilitate discussions. Interviews were audio recorded and transcribed, pseudonymized and entered into NVivo (version 12, QSR International). Data were analyzed using a thematic approach.

Results

Twenty-three children of mean age 10.1 years (range 8-14), mean duration of epilepsy of 4.6 years (range 2-10) were enrolled. Twelve were 12 female; 7 had focal, 14 had generalized, and 2 had combined epilepsy; 20 were on monotherapy; and 16 had tried previous AEDs. All had an initial (first) interview; 20 were observed during a clinic appointment and had a second interview. Five broad themes emerged: understanding of epilepsy; understanding of seizures; understanding of medication; understanding of children's role in clinical appointments; influences on children's understanding. Children spoke about what epilepsy meant by describing the physical sensations of having a seizure or through the act of taking medication. Children described the role they had, or felt they should have, but reported challenges in being meaningfully involved in clinical appointments. While healthcare professionals were initial information nodes, epilepsy information from parents appeared to be more significant for children.

Conclusions

The perspectives of children with epilepsy are valuable for clinicians to understand; assumptions should not be made that children's views can be accessed via parents. Clinicians need to be constantly aware of children's views and ways of understanding and communicating about their epilepsy. To support this, the research - drawing on children's words, meanings, and stories - was used to inform an easily accessible, gender-neutral, animation about epilepsy that provides information about the condition, seizures, and medication (https://youtu.be/MO7xXL2ZXP8).",2021-05-05 +33949893,Nitrate in Drinking Water during Pregnancy and Spontaneous Preterm Birth: A Retrospective Within-Mother Analysis in California.,"

Background

Nitrate is a widespread groundwater contaminant and a leading cause of drinking water quality violations in California. Associations between nitrate exposure and select adverse birth outcomes have been suggested, but few studies have examined gestational exposures to nitrate and risk of preterm birth (before 37 wk gestation).

Objective

We investigated the association between elevated nitrate in drinking water and spontaneous preterm birth through a within-mother retrospective cohort study of births in California.

Methods

We acquired over 6 million birth certificate records linked with Office of Statewide Health Planning and Development hospital discharge data for California births from 2000-2011. We used public water system monitoring records to estimate nitrate concentrations in drinking water for each woman's residence during gestation. After exclusions, we constructed a sample of 1,443,318 consecutive sibling births in order to conduct a within-mother analysis. We used separate conditional logistic regression models to estimate the odds of preterm birth at 20-31 and 32-36 wk, respectively, among women whose nitrate exposure changed between consecutive pregnancies.

Results

Spontaneous preterm birth at 20-31 wk was increased in association with tap water nitrate concentrations during pregnancy of 5 to <10mg/L [odds ratio (OR)=1.47; 95% confidence interval (CI): 1.29, 1.67] and ≥10mg/L (OR=2.52; 95% CI: 1.49, 4.26) compared with <5mg/L (as nitrogen). Corresponding estimates for spontaneous preterm birth at 32-36 wk were positive but close to the null for 5 to <10mg/L nitrate (OR=1.08; 95% CI: 1.02, 1.15) and for ≥10mg/L nitrate (OR=1.05; 95% CI: 0.85, 1.31) vs. <5mg/L nitrate. Our findings were similar in several secondary and sensitivity analyses, including in a conventional individual-level design.

Discussion

The results suggest that nitrate in drinking water is associated with increased odds of spontaneous preterm birth. Notably, we estimated modestly increased odds associated with tap water nitrate concentrations of 5 to <10mg/L (below the federal drinking water standard of 10mg/L) relative to <5mg/L. https://doi.org/10.1289/EHP8205.",2021-05-05 +33021923,First report of Fusarium proliferatum causing Sheath Rot Disease of Rice in Eastern India. ,"Sheath rot is one of the most devastating diseases of rice because of its ability to reduce the yield significantly in all rice cultivating areas of the world (Bigirimana et al., 2015). Sheath rot disease is associated with various pathogens such as Sarocladium oryza, Fusarium fujikuroi complex and Pseudomonas fuscovaginae (Bigirimana et al., 2015). Hence, this disease has become more complex in nature and added more seriousness. From September to December 2018, plants were observed with typical sheath rot symptoms in research farm of ICAR-National Rice Research Institute and ten farmer's fields of Cuttack district, Odisha, Eastern India. About 25 to 37% of sheath rot disease severity was recorded in the infected field. Diseased plants were observed with symptoms such as brownish or reddish brown irregular lesions, which were later, got enlarged with grayish centers. Further, rotting of the topmost leaf sheaths that surround the young panicle was observed. At the severe stages, the young panicle was partially emerged from sheath or completely rotted within the sheath. The white to pinkish powdery growth observed inside the infected sheath leading to chaffy and discolored grains. The sheath rot symptomatic plants were collected from the infected fields. To isolate the causal pathogen, infected sheath tissues were surface sterilized in 1% sodium hypochlorite for 2 min, rinsed three times in sterile distilled water, and placed on potato dextrose agar medium (PDA) (HiMedia). Plates were incubated at 27 ± 1° C for 3 d. Further, fungal pathogen colonies were sub-cultured and purified to perform the pathogenicity test. On PDA, the colonies produced abundant white aerial mycelium with violet to pink pigmentation and hyphae were hyaline with septation. Abundant single celled, oval shaped microcondia (5.5-9 × 1.5-2 μm) were produced, whereas macrocondia were not produced and the fungal pathogen was tentatively identified as Fusarium sp. In order to characterize the pathogen at molecular level, ITS, alpha elongation factor gene (EF1-α), RNA polymerase II largest-subunit gene (RPB2), calmodulin gene (cld) were amplified using the primer pair of ITS1/ITS4, EF1/EF2, 5F/7CR and CLPRO1/CLPRO2 respectively and PCR amplicons were subjected to sequencing (White et al. 1990; O'Donnell et al. 1998; Chang et al. 2015). Furthermore, a species-specific primer Fp3-F/Fp4-R was used to identify the pathogen (Jurado et al., 2006). The resulting sequences were confirmed by BLAST analysis and the FUSARIUM-ID database (http://isolate.fusariumdb.org). BLASTn search showed 100% similarity between the query sequence and ITS, EF1-α, RPB2, Calmodulin gene sequences of F. proliferatum available in the Genbank. The following GenBank accession numbers were obtained; MT394055 for ITS; MT439867 for EF1-α; MT790774 for calmodulin; MT940224 for RPB2 and MT801050 for species-specific to F. proliferatum. To confirm the pathogenicity under glass house conditions, fungus grown on sterilized chaffy grains were placed in between boot leaf sheath and panicle and covered with moist cotton (Saravanakumar et al., 2009). After 15 days post inoculation (dpi), rotting symptoms were observed and these were similar to that of field symptoms. Pathogen was constantly re-isolated from symptomatic tissue, satisfying Koch's postulates. Disease symptoms were not observed on un-inoculated plants. Morphological characters, pathogenicity test and molecular characterization have identified the pathogen as F. proliferatum. To the best of our knowledge, this is the first confirmed report of F. proliferatum causing sheath rot disease on rice from Eastern India.",2020-10-06 +31334319,Electronic nose dataset for detection of wine spoilage thresholds.,"In this data article, we provide a time series dataset obtained for an application of wine quality detection focused on spoilage thresholds. The database contains 235 recorded measurements of wines divided into three groups and labeled as high quality (HQ), average quality (AQ) and low quality (LQ), in addition to 65 ethanol measurements. This dataset was collected using an electronic nose system (E-Nose) based on Metal Oxide Semiconductor (MOS) gas sensors, self-developed at the Universidade Federal Rural de Pernambuco (Brazil). The dataset is related to the research article entitled ""Wine quality rapid detection using a compact electronic nose system: application focused on spoilage thresholds by acetic acid"" by Rodriguez Gamboa et al., 2019. The dataset can be accessed publicly at the repository: https://data.mendeley.com/datasets/vpc887d53s/.",2019-06-27 +32791019,Automated Phenotyping Tool for Identifying Developmental Language Disorder Cases in Health Systems Data (APT-DLD): A New Research Algorithm for Deployment in Large-Scale Electronic Health Record Systems.,"Purpose Data mining algorithms using electronic health records (EHRs) are useful in large-scale population-wide studies to classify etiology and comorbidities (Casey et al., 2016). Here, we apply this approach to developmental language disorder (DLD), a prevalent communication disorder whose risk factors and epidemiology remain largely undiscovered. Method We first created a reliable system for manually identifying DLD in EHRs based on speech-language pathologist (SLP) diagnostic expertise. We then developed and validated an automated algorithmic procedure, called, Automated Phenotyping Tool for identifying DLD cases in health systems data (APT-DLD), that classifies a DLD status for patients within EHRs on the basis of ICD (International Statistical Classification of Diseases and Related Health Problems) codes. APT-DLD was validated in a discovery sample (N = 973) using expert SLP manual phenotype coding as a gold-standard comparison and then applied and further validated in a replication sample of N = 13,652 EHRs. Results In the discovery sample, the APT-DLD algorithm correctly classified 98% (concordance) of DLD cases in concordance with manually coded records in the training set, indicating that APT-DLD successfully mimics a comprehensive chart review. The output of APT-DLD was also validated in relation to independently conducted SLP clinician coding in a subset of records, with a positive predictive value of 95% of cases correctly classified as DLD. We also applied APT-DLD to the replication sample, where it achieved a positive predictive value of 90% in relation to SLP clinician classification of DLD. Conclusions APT-DLD is a reliable, valid, and scalable tool for identifying DLD cohorts in EHRs. This new method has promising public health implications for future large-scale epidemiological investigations of DLD and may inform EHR data mining algorithms for other communication disorders. Supplemental Material https://doi.org/10.23641/asha.12753578.",2020-08-11 +32467974,Audit logs to enforce document integrity in Skyline and Panorama.,"

Summary

Skyline is a Windows application for targeted mass spectrometry method creation and quantitative data analysis. Like most graphical user interface (GUI) tools, it has a complex user interface with many ways for users to edit their files which makes the task of logging user actions challenging and is the reason why audit logging of every change is not common in GUI tools. We present an object comparison-based approach to audit logging for Skyline that is extensible to other GUI tools. The new audit logging system keeps track of all document modifications made through the GUI or the command line and displays them in an interactive grid. The audit log can also be uploaded and viewed in Panorama, a web repository for Skyline documents that can be configured to only accept documents with a valid audit log, based on embedded hashes to protect log integrity. This makes workflows involving Skyline and Panorama more reproducible.

Availability and implementation

Skyline is freely available at https://skyline.ms.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +27814681,WUFlux: an open-source platform for 13C metabolic flux analysis of bacterial metabolism.,"

Background

Flux analyses, including flux balance analysis (FBA) and 13C-metabolic flux analysis (13C-MFA), offer direct insights into cell metabolism, and have been widely used to characterize model and non-model microbial species. Nonetheless, constructing the 13C-MFA model and performing flux calculation are demanding for new learners, because they require knowledge of metabolic networks, carbon transitions, and computer programming. To facilitate and standardize the 13C-MFA modeling work, we set out to publish a user-friendly and programming-free platform (WUFlux) for flux calculations in MATLAB®.

Results

We constructed an open-source platform for steady-state 13C-MFA. Using GUIDE (graphical user interface design environment) in MATLAB, we built a user interface that allows users to modify models based on their own experimental conditions. WUFlux is capable of directly correcting mass spectrum data of TBDMS (N-tert-butyldimethylsilyl-N-methyltrifluoroacetamide)-derivatized proteinogenic amino acids by removing background noise. To simplify 13C-MFA of different prokaryotic species, the software provides several metabolic network templates, including those for chemoheterotrophic bacteria and mixotrophic cyanobacteria. Users can modify the network and constraints, and then analyze the microbial carbon and energy metabolisms of various carbon substrates (e.g., glucose, pyruvate/lactate, acetate, xylose, and glycerol). WUFlux also offers several ways of visualizing the flux results with respect to the constructed network. To validate our model's applicability, we have compared and discussed the flux results obtained from WUFlux and other MFA software. We have also illustrated how model constraints of cofactor and ATP balances influence fluxome results.

Conclusion

Open-source software for 13C-MFA, WUFlux, with a user-friendly interface and easy-to-modify templates, is now available at http://www.13cmfa.org /or ( http://tang.eece.wustl.edu/ToolDevelopment.htm ). We will continue documenting curated models of non-model microbial species and improving WUFlux performance.",2016-11-04 +32785422,Prioritisation of potential drug targets against Bartonella bacilliformis by an integrative in-silico approach.,"BACKGROUND Carrion's disease (CD) is a neglected biphasic illness caused by Bartonella bacilliformis, a Gram-negative bacteria found in the Andean valleys. The spread of resistant strains underlines the need for novel antimicrobials against B. bacilliformis and related bacterial pathogens. OBJECTIVE The main aim of this study was to integrate genomic-scale data to shortlist a set of proteins that could serve as attractive targets for new antimicrobial discovery to combat B. bacilliformis. METHODS We performed a multidimensional genomic scale analysis of potential and relevant targets which includes structural druggability, metabolic analysis and essentiality criteria to select proteins with attractive features for drug discovery. FINDINGS We shortlisted seventeen relevant proteins to develop new drugs against the causative agent of Carrion's disease. Particularly, the protein products of fabI, folA, aroA, trmFO, uppP and murE genes, meet an important number of desirable features that make them attractive targets for new drug development. This data compendium is freely available as a web server (http://target.sbg.qb.fcen.uba.ar/). MAIN CONCLUSION This work represents an effort to reduce the costs in the first phases of B. bacilliformis drug discovery.",2020-08-10 +33740030,Development and validation of a predictive model for critical illness in adult patients requiring hospitalization for COVID-19.,"

Background

Identifying factors that can predict severe disease in patients needing hospitalization for COVID-19 is crucial for early recognition of patients at greatest risk.

Objective

(1) Identify factors predicting intensive care unit (ICU) transfer and (2) develop a simple calculator for clinicians managing patients hospitalized with COVID-19.

Methods

A total of 2,685 patients with laboratory-confirmed COVID-19 admitted to a large metropolitan health system in Georgia, USA between March and July 2020 were included in the study. Seventy-five percent of patients were included in the training dataset (admitted March 1 to July 10). Through multivariable logistic regression, we developed a prediction model (probability score) for ICU transfer. Then, we validated the model by estimating its performance accuracy (area under the curve [AUC]) using data from the remaining 25% of patients (admitted July 11 to July 31).

Results

We included 2,014 and 671 patients in the training and validation datasets, respectively. Diabetes mellitus, coronary artery disease, chronic kidney disease, serum C-reactive protein, and serum lactate dehydrogenase were identified as significant risk factors for ICU transfer, and a prediction model was developed. The AUC was 0.752 for the training dataset and 0.769 for the validation dataset. We developed a free, web-based calculator to facilitate use of the prediction model (https://icucovid19.shinyapps.io/ICUCOVID19/).

Conclusion

Our validated, simple, and accessible prediction model and web-based calculator for ICU transfer may be useful in assisting healthcare providers in identifying hospitalized patients with COVID-19 who are at high risk for clinical deterioration. Triage of such patients for early aggressive treatment can impact clinical outcomes for this potentially deadly disease.",2021-03-19 +30101321,GlycanAnalyzer: software for automated interpretation of N-glycan profiles after exoglycosidase digestions.,"

Summary

Many eukaryotic proteins are modified by N-glycans. Liquid chromatography (ultra-performance -UPLC and high-performance-HPLC) coupled with mass spectrometry (MS) is conventionally used to characterize N-glycan structures. Software can automatically assign glycan structures by matching their observed retention times and masses with standardized values in reference databases. However, more precise confirmation of N-glycan structures can be derived using exoglycosidases, enzymes that remove specific monosaccharides from glycans. Exoglycosidase removal of monosaccharides results in signature peak shifts, in both UPLC and MS1, yielding an effective way to verify N-glycan structure with high detail (down to the position and isomeric linkage of each monosaccharide). Because manual interpretation of exoglycosidase data is complex and time consuming, we developed GlycanAnalyzer, a web application that pattern matches N-glycan peak shifts following exoglycosidase digestion and automates structure assignments. GlycanAnalyzer significantly improves assignment accuracy over other auto-assignment methods on tests with a monoclonal antibody and four glycan standards (100% versus 82% for the next best software). By automating data interpretation, GlycanAnalyzer enables the easier use of exoglycosidases to precisely define N-glycan structure.

Availability and implementation

http://glycananalyzer.neb.com. Datasets available online.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +30395310,"Vesiclepedia 2019: a compendium of RNA, proteins, lipids and metabolites in extracellular vesicles.","Extracellular vesicles (EVs) are membranous vesicles that are released by both prokaryotic and eukaryotic cells into the extracellular microenvironment. EVs can be categorised as exosomes, ectosomes or shedding microvesicles and apoptotic bodies based on the mode of biogenesis. EVs contain biologically active cargo of nucleic acids, proteins, lipids and metabolites that can be altered based on the precise state of the cell. Vesiclepedia (http://www.microvesicles.org) is a web-based compendium of RNA, proteins, lipids and metabolites that are identified in EVs from both published and unpublished studies. Currently, Vesiclepedia contains data obtained from 1254 EV studies, 38 146 RNA entries, 349 988 protein entries and 639 lipid/metabolite entries. Vesiclepedia is publicly available and allows users to query and download EV cargo based on different search criteria. The mode of EV isolation and characterization, the biophysical and molecular properties and EV-METRIC are listed in the database aiding biomedical scientists in assessing the quality of the EV preparation and the corresponding data obtained. In addition, FunRich-based Vesiclepedia plugin is incorporated aiding users in data analysis.",2019-01-01 +33295914,iDHS-DASTS: identifying DNase I hypersensitive sites based on LASSO and stacking learning.,"The DNase I hypersensitivity site is an important marker of the DNA regulatory region, and its identification in the DNA sequence is of great significance for biomedical research. However, traditional identification methods are extremely time-consuming and can not obtain an accurate result. In this paper, we proposed a predictor called iDHS-DASTS to identify the DHS based on benchmark datasets. First, we adopt a feature extraction method called PseDNC which can incorporate the original DNA properties and spatial information of the DNA sequence. Then we use a method called LASSO to reduce the dimensions of the original data. Finally, we utilize stacking learning as a classifier, which includes Adaboost, random forest, gradient boosting, extra trees and SVM. Before we train the classifier, we use SMOTE-Tomek to overcome the imbalance of the datasets. In the experiment, our iDHS-DASTS achieves remarkable performance on three benchmark datasets. We achieve state-of-the-art results with over 92.06%, 91.06% and 90.72% accuracy for datasets [Doublestruck S]1, [Doublestruck S]2 and [Doublestruck S]3, respectively. To verify the validation and transferability of our model, we establish another independent dataset [Doublestruck S]4, for which the accuracy can reach 90.31%. Furthermore, we used the proposed model to construct a user friendly web server called iDHS-DASTS, which is available at http://www.xdu-duan.cn/.",2021-02-01 +30239692,A pipeline to translate glycosaminoglycan sequences into 3D models. Application to the exploration of glycosaminoglycan conformational space.,"Mammalian glycosaminoglycans are linear complex polysaccharides comprising heparan sulfate, heparin, dermatan sulfate, chondroitin sulfate, keratan sulfate and hyaluronic acid. They bind to numerous proteins and these interactions mediate their biological activities. GAG-protein interaction data reported in the literature are curated mostly in MatrixDB database (http://matrixdb.univ-lyon1.fr/). However, a standard nomenclature and a machine-readable format of GAGs together with bioinformatics tools for mining these interaction data are lacking. We report here the building of an automated pipeline to (i) standardize the format of GAG sequences interacting with proteins manually curated from the literature, (ii) translate them into the machine-readable GlycoCT format and into SNFG (Symbol Nomenclature For Glycan) images and (iii) convert their sequences into a format processed by a builder generating three-dimensional structures of polysaccharides based on a repertoire of conformations experimentally validated by data extracted from crystallized GAG-protein complexes. We have developed for this purpose a converter (the CT23D converter) to automatically translate the GlycoCT code of a GAG sequence into the input file required to construct a three-dimensional model.",2019-01-01 +26209309,Human metabolic atlas: an online resource for human metabolism.,"Human tissue-specific genome-scale metabolic models (GEMs) provide comprehensive understanding of human metabolism, which is of great value to the biomedical research community. To make this kind of data easily accessible to the public, we have designed and deployed the human metabolic atlas (HMA) website (http://www.metabolicatlas.org). This online resource provides comprehensive information about human metabolism, including the results of metabolic network analyses. We hope that it can also serve as an information exchange interface for human metabolism knowledge within the research community. The HMA consists of three major components: Repository, Hreed (Human REaction Entities Database) and Atlas. Repository is a collection of GEMs for specific human cell types and human-related microorganisms in SBML (System Biology Markup Language) format. The current release consists of several types of GEMs: a generic human GEM, 82 GEMs for normal cell types, 16 GEMs for different cancer cell types, 2 curated GEMs and 5 GEMs for human gut bacteria. Hreed contains detailed information about biochemical reactions. A web interface for Hreed facilitates an access to the Hreed reaction data, which can be easily retrieved by using specific keywords or names of related genes, proteins, compounds and cross-references. Atlas web interface can be used for visualization of the GEMs collection overlaid on KEGG metabolic pathway maps with a zoom/pan user interface. The HMA is a unique tool for studying human metabolism, ranging in scope from an individual cell, to a specific organ, to the overall human body. This resource is freely available under a Creative Commons Attribution-NonCommercial 4.0 International License.",2015-07-24 +29016166,"""Replicability and other features of a high-quality science: Toward a balanced and empirical approach"": Correction to Finkel et al. (2017).","Reports an error in ""Replicability and other features of a high-quality science: Toward a balanced and empirical approach"" by Eli J. Finkel, Paul W. Eastwick and Harry T. Reis (Journal of Personality and Social Psychology, 2017[Aug], Vol 113[2], 244-253). In the commentary, there was an error in the References list. The publishing year for the 18th article was cited incorrectly as 2016. The in-text acronym associated with this citation should read instead as LCL2017. The correct References list citation should read as follows: LeBel, E. P., Campbell, L., & Loving, T. J. (2017). Benefits of open and high-powered research outweigh costs. Journal of Personality and Social Psychology, 113, 230-243. http://dx.doi.org/10 .1037/pspi0000049. The online version of this article has been corrected. (The following abstract of the original article appeared in record 2017-30567-002.) Finkel, Eastwick, and Reis (2015; FER2015) argued that psychological science is better served by responding to apprehensions about replicability rates with contextualized solutions than with one-size-fits-all solutions. Here, we extend FER2015's analysis to suggest that much of the discussion of best research practices since 2011 has focused on a single feature of high-quality science-replicability-with insufficient sensitivity to the implications of recommended practices for other features, like discovery, internal validity, external validity, construct validity, consequentiality, and cumulativeness. Thus, although recommendations for bolstering replicability have been innovative, compelling, and abundant, it is difficult to evaluate their impact on our science as a whole, especially because many research practices that are beneficial for some features of scientific quality are harmful for others. For example, FER2015 argued that bigger samples are generally better, but also noted that very large samples (""those larger than required for effect sizes to stabilize""; p. 291) could have the downside of commandeering resources that would have been better invested in other studies. In their critique of FER2015, LeBel, Campbell, and Loving (2016) concluded, based on simulated data, that ever-larger samples are better for the efficiency of scientific discovery (i.e., that there are no tradeoffs). As demonstrated here, however, this conclusion holds only when the replicator's resources are considered in isolation. If we widen the assumptions to include the original researcher's resources as well, which is necessary if the goal is to consider resource investment for the field as a whole, the conclusion changes radically-and strongly supports a tradeoff-based analysis. In general, as psychologists seek to strengthen our science, we must complement our much-needed work on increasing replicability with careful attention to the other features of a high-quality science. (PsycINFO Database Record",2017-11-01 +33844597,Profiling the Tox21 Chemical Collection for Acetylcholinesterase Inhibition.,"

Background

Inhibition of acetylcholinesterase (AChE), a biomarker of organophosphorous and carbamate exposure in environmental and occupational human health, has been commonly used to identify potential safety liabilities. So far, many environmental chemicals, including drug candidates, food additives, and industrial chemicals, have not been thoroughly evaluated for their inhibitory effects on AChE activity. AChE inhibitors can have therapeutic applications (e.g., tacrine and donepezil) or neurotoxic consequences (e.g., insecticides and nerve agents).

Objectives

The objective of the current study was to identify environmental chemicals that inhibit AChE activity using in vitro and in silico models.

Methods

To identify AChE inhibitors rapidly and efficiently, we have screened the Toxicology in the 21st Century (Tox21) 10K compound library in a quantitative high-throughput screening (qHTS) platform by using the homogenous cell-based AChE inhibition assay and enzyme-based AChE inhibition assays (with or without microsomes). AChE inhibitors identified from the primary screening were further tested in monolayer or spheroid formed by SH-SY5Y and neural stem cell models. The inhibition and binding modes of these identified compounds were studied with time-dependent enzyme-based AChE inhibition assay and molecular docking, respectively.

Results

A group of known AChE inhibitors, such as donepezil, ambenonium dichloride, and tacrine hydrochloride, as well as many previously unreported AChE inhibitors, such as chelerythrine chloride and cilostazol, were identified in this study. Many of these compounds, such as pyrazophos, phosalone, and triazophos, needed metabolic activation. This study identified both reversible (e.g., donepezil and tacrine) and irreversible inhibitors (e.g., chlorpyrifos and bromophos-ethyl). Molecular docking analyses were performed to explain the relative inhibitory potency of selected compounds.

Conclusions

Our tiered qHTS approach allowed us to generate a robust and reliable data set to evaluate large sets of environmental compounds for their AChE inhibitory activity. https://doi.org/10.1289/EHP6993.",2021-04-12 +28806134,iLIR@viral: A web resource for LIR motif-containing proteins in viruses.,"Macroautophagy/autophagy has been shown to mediate the selective lysosomal degradation of pathogenic bacteria and viruses (xenophagy), and to contribute to the activation of innate and adaptative immune responses. Autophagy can serve as an antiviral defense mechanism but also as a proviral process during infection. Atg8-family proteins play a central role in the autophagy process due to their ability to interact with components of the autophagy machinery as well as selective autophagy receptors and adaptor proteins. Such interactions are usually mediated through LC3-interacting region (LIR) motifs. So far, only one viral protein has been experimentally shown to have a functional LIR motif, leaving open a vast field for investigation. Here, we have developed the iLIR@viral database ( http://ilir.uk/virus/ ) as a freely accessible web resource listing all the putative canonical LIR motifs identified in viral proteins. Additionally, we used a curated text-mining analysis of the literature to identify novel putative LIR motif-containing proteins (LIRCPs) in viruses. We anticipate that iLIR@viral will assist with elucidating the full complement of LIRCPs in viruses.",2017-08-14 +31164042,EmExplorer: a database for exploring time activation of gene expression in mammalian embryos.,"Understanding early development offers a striking opportunity to investigate genetic disease, stem cell and assisted reproductive technology. Recent advances in high-throughput sequencing technology have led to the rising influx of omics data, which have rapidly boosted our understanding of mammalian developmental mechanisms. Here, we review the database EmExplorer (a database for exploring time activation of gene expression in mammalian embryos), which systematically organizes the genes from development-related pathways, and which we have already established and continue to update it. The current version of EmExplorer incorporates over 26 000 genes obtained from 306 functional pathways in five species. The function annotations of development-related genes were also integrated into EmExplorer. To facilitate data extraction, the database also contains the following information. (i) The dynamic expression values for each development stage are matched to the corresponding genes. (ii) A two-layer search tool which supports multi-option searching, such as by official symbol, pathway name and function annotation. The returned entries can directly link to the analysis results for the corresponding gene or pathway in the analysis module. (iii) The analysis module provides different gene comparisons at the multi-species level and functional pathway level, which shows the species specificity and stage specificity at the gene or pathway level. (iv) The analysis based on the hypergeometric distribution test reveals the enrichment of gene functions at a particular stage of one organism's pathway. (v) The browser is designed for users with ambiguous searching goals and greatly helps new users to get a general idea of the contents of the database. (vi) The experimentally validated pathways are manually curated and shown on the home page. EmExplorer will be helpful for elucidating early developmental mechanisms and exploring time activation genes. EmExplorer is freely available at http://bioinfor.imu.edu.cn/emexplorer .",2019-06-05 +33784599,SoftSeg: Advantages of soft versus binary training for image segmentation.,"Most image segmentation algorithms are trained on binary masks formulated as a classification task per pixel. However, in applications such as medical imaging, this ""black-and-white"" approach is too constraining because the contrast between two tissues is often ill-defined, i.e., the voxels located on objects' edges contain a mixture of tissues (a partial volume effect). Consequently, assigning a single ""hard"" label can result in a detrimental approximation. Instead, a soft prediction containing non-binary values would overcome that limitation. In this study, we introduce SoftSeg, a deep learning training approach that takes advantage of soft ground truth labels, and is not bound to binary predictions. SoftSeg aims at solving a regression instead of a classification problem. This is achieved by using (i) no binarization after preprocessing and data augmentation, (ii) a normalized ReLU final activation layer (instead of sigmoid), and (iii) a regression loss function (instead of the traditional Dice loss). We assess the impact of these three features on three open-source MRI segmentation datasets from the spinal cord gray matter, the multiple sclerosis brain lesion, and the multimodal brain tumor segmentation challenges. Across multiple random dataset splittings, SoftSeg outperformed the conventional approach, leading to an increase in Dice score of 2.0% on the gray matter dataset (p=0.001), 3.3% for the brain lesions, and 6.5% for the brain tumors. SoftSeg produces consistent soft predictions at tissues' interfaces and shows an increased sensitivity for small objects (e.g., multiple sclerosis lesions). The richness of soft labels could represent the inter-expert variability, the partial volume effect, and complement the model uncertainty estimation, which is typically unclear with binary predictions. The developed training pipeline can easily be incorporated into most of the existing deep learning architectures. SoftSeg is implemented in the freely-available deep learning toolbox ivadomed (https://ivadomed.org).",2021-03-18 +32822736,Commentary - A comprehensive safety understanding of granulocyte-colony stimulating factor biosimilars and Intended Copy Biologics in treating chemotherapy associated febrile neutropenia.,"Filgrastim, human white cell growth factor, Granulocyte colony-stimulating factor (G-CSF), is a core medicine in the WHO list of Essential Medicines. For this reason, recent reporting of statistically significant safety and efficacy differences between reference and Biosimilar brands of filgrastim by Rastogi and the Indian Pharmacopoeia Commission in Toxicology and Applied Pharmacology in 2020 is of great concern [Shruti Rastogi et al. Towards a comprehensive safety understanding of granulocyte-colony stimulating factor biosimilars in treating chemotherapy associated febrile neutropenia: Trends from decades of data. Toxicology and Applied Pharmacology Volume 395, 15 May 2020, 114,976. https://doi.org/10.1016/j.taap.2020.114976]. This commentary shows that the alarming report is a result of incorrect statistical tests misapplied to inappropriate data sets compounded by a further problem relating to the strict regulatory definition of a Biosimilar Medicine as opposed that of an Intended Copy Biologic. In contrast, the body of evidence from more than seven and a half thousand participants in Confirmatory Clinical Studies and Post Approval Clinical Studies as well as the Periodic Safety Update Reports confirms that European approved filgrastim Biosimilars show no meaningful difference in quality, safety or efficacy compared to the reference brand.",2020-08-18 +30239819,POSTAR2: deciphering the post-transcriptional regulatory logics.,"Post-transcriptional regulation of RNAs is critical to the diverse range of cellular processes. The volume of functional genomic data focusing on post-transcriptional regulation logics continues to grow in recent years. In the current database version, POSTAR2 (http://lulab.life.tsinghua.edu.cn/postar), we included the following new features and data: updated ∼500 CLIP-seq datasets (∼1200 CLIP-seq datasets in total) from six species, including human, mouse, fly, worm, Arabidopsis and yeast; added a new module 'Translatome', which is derived from Ribo-seq datasets and contains ∼36 million open reading frames (ORFs) in the genomes from the six species; updated and unified post-transcriptional regulation and variation data. Finally, we improved web interfaces for searching and visualizing protein-RNA interactions with multi-layer information. Meanwhile, we also merged our CLIPdb database into POSTAR2. POSTAR2 will help researchers investigate the post-transcriptional regulatory logics coordinated by RNA-binding proteins and translational landscape of cellular RNAs.",2019-01-01 +32686665,Proteome activity landscapes of tumor cell lines determine drug responses.,"Integrated analysis of genomes, transcriptomes, proteomes and drug responses of cancer cell lines (CCLs) is an emerging approach to uncover molecular mechanisms of drug action. We extend this paradigm to measuring proteome activity landscapes by acquiring and integrating quantitative data for 10,000 proteins and 55,000 phosphorylation sites (p-sites) from 125 CCLs. These data are used to contextualize proteins and p-sites and predict drug sensitivity. For example, we find that Progesterone Receptor (PGR) phosphorylation is associated with sensitivity to drugs modulating estrogen signaling such as Raloxifene. We also demonstrate that Adenylate kinase isoenzyme 1 (AK1) inactivates antimetabolites like Cytarabine. Consequently, high AK1 levels correlate with poor survival of Cytarabine-treated acute myeloid leukemia patients, qualifying AK1 as a patient stratification marker and possibly as a drug target. We provide an interactive web application termed ATLANTiC (http://atlantic.proteomics.wzw.tum.de), which enables the community to explore the thousands of novel functional associations generated by this work.",2020-07-20 +33483373,The Irradiated Brain Microenvironment Supports Glioma Stemness and Survival via Astrocyte-Derived Transglutaminase 2.,"The tumor microenvironment plays an essential role in supporting glioma stemness and radioresistance. Following radiotherapy, recurrent gliomas form in an irradiated microenvironment. Here we report that astrocytes, when pre-irradiated, increase stemness and survival of cocultured glioma cells. Tumor-naïve brains increased reactive astrocytes in response to radiation, and mice subjected to radiation prior to implantation of glioma cells developed more aggressive tumors. Extracellular matrix derived from irradiated astrocytes were found to be a major driver of this phenotype and astrocyte-derived transglutaminase 2 (TGM2) was identified as a promoter of glioma stemness and radioresistance. TGM2 levels increased after radiation in vivo and in recurrent human glioma, and TGM2 inhibitors abrogated glioma stemness and survival. These data suggest that irradiation of the brain results in the formation of a tumor-supportive microenvironment. Therapeutic targeting of radiation-induced, astrocyte-derived extracellular matrix proteins may enhance the efficacy of standard-of-care radiotherapy by reducing stemness in glioma. SIGNIFICANCE: These findings presented here indicate that radiotherapy can result in a tumor-supportive microenvironment, the targeting of which may be necessary to overcome tumor cell therapeutic resistance and recurrence. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/8/2101/F1.large.jpg.",2021-01-22 +26347762,Development of genome-wide informative simple sequence repeat markers for large-scale genotyping applications in chickpea and development of web resource.,"Development of informative polymorphic simple sequence repeat (SSR) markers at a genome-wide scale is essential for efficient large-scale genotyping applications. We identified genome-wide 1835 SSRs showing polymorphism between desi and kabuli chickpea. A total of 1470 polymorphic SSR markers from diverse coding and non-coding regions of the chickpea genome were developed. These physically mapped SSR markers exhibited robust amplification efficiency (73.9%) and high intra- and inter-specific polymorphic potential (63.5%), thereby suggesting their immense use in various genomics-assisted breeding applications. The SSR markers particularly derived from intergenic and intronic sequences revealed high polymorphic potential. Using the mapped SSR markers, a wider functional molecular diversity (16-94%, mean: 68%), and parentage- and cultivar-specific admixed domestication pattern and phylogenetic relationships in a structured population of desi and kabuli chickpea genotypes was evident. The intra-specific polymorphism (47.6%) and functional molecular diversity (65%) potential of polymorphic SSR markers developed in our study is much higher than that of previous documentations. Finally, we have developed a user-friendly web resource, Chickpea Microsatellite Database (CMsDB; http://www.nipgr.res.in/CMsDB.html), which provides public access to the data and results reported in this study. The developed informative SSR markers can serve as a resource for various genotyping applications, including genetic enhancement studies in chickpea.",2015-08-21 +32324845,alona: a web server for single-cell RNA-seq analysis.,"

Summary

Single-cell RNA sequencing (scRNA-seq) is a technology to measure gene expression in single cells. It has enabled discovery of new cell types and established cell type atlases of tissues and organs. The widespread adoption of scRNA-seq has created a need for user-friendly software for data analysis. We have developed a web server, alona that incorporates several of the most popular single-cell analysis algorithms into a flexible pipeline. alona can perform quality filtering, normalization, batch correction, clustering, cell type annotation and differential gene expression analysis. Data are visualized in the web browser using an interface based on JavaScript, allowing the user to query genes of interest and visualize the cluster structure. alona accepts a compressed gene expression matrix and identifies cell clusters with a graph-based clustering strategy. Cell types are identified from a comprehensive collection of marker genes or by specifying a custom set of marker genes.

Availability and implementation

The service runs at https://alona.panglaodb.se and the Python package can be downloaded from https://oscar-franzen.github.io/adobo/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +32162851,Using ggtree to Visualize Data on Tree-Like Structures.,"Ggtree is an R/Bioconductor package for visualizing tree-like structures and associated data. After 5 years of continual development, ggtree has been evolved as a package suite that contains treeio for tree data input and output, tidytree for tree data manipulation, and ggtree for tree data visualization. Ggtree was originally designed to work with phylogenetic trees, and has been expanded to support other tree-like structures, which extends the application of ggtree to present tree data in other disciplines. This article contains five basic protocols describing how to visualize trees using the grammar of graphics syntax, how to visualize hierarchical clustering results with associated data, how to estimate bootstrap values and visualize the values on the tree, how to estimate continuous and discrete ancestral traits and visualize ancestral states on the tree, and how to visualize a multiple sequence alignment with a phylogenetic tree. The ggtree package is freely available at https://www.bioconductor.org/packages/ggtree. © 2020 by John Wiley & Sons, Inc. Basic Protocol 1: Using grammar of graphics for visualizing trees Basic Protocol 2: Visualizing hierarchical clustering using ggtree Basic Protocol 3: Visualizing bootstrap values as symbolic points Basic Protocol 4: Visualizing ancestral status Basic Protocol 5: Visualizing a multiple sequence alignment with a phylogenetic tree.",2020-03-01 +30497363,SEDE-GPS: socio-economic data enrichment based on GPS information.,"

Background

Microbes are essentail components of all ecosystems because they drive many biochemical processes and act as primary producers. In freshwater ecosystems, the biodiversity in and the composition of microbial communities can be used as indicators for environmental quality. Recently, some environmental features have been identified that influence microbial ecosystems. However, the impact of human action on lake microbiomes is not well understood. This is, in part, due to the fact that environmental data is, albeit theoretically accessible, not easily available.

Results

In this work, we present SEDE-GPS, a tool that gathers data that are relevant to the environment of an user-provided GPS coordinate. To this end, it accesses a list of public and corporate databases and aggregates the information in a single file, which can be used for further analysis. To showcase the use of SEDE-GPS, we enriched a lake microbial ecology sequencing dataset with around 18,000 socio-economic, climate, and geographic features. The sources of SEDE-GPS are public databases such as Eurostat, the Climate Data Center, and OpenStreetMap, as well as corporate sources such as Twitter. Using machine learning and feature selection methods, we were able to identify features in the data provided by SEDE-GPS that can be used to predict lake microbiome alpha diversity.

Conclusion

The results presented in this study show that SEDE-GPS is a handy and easy-to-use tool for comprehensive data enrichment for studies of ecology and other processes that are affected by environmental features. Furthermore, we present lists of environmental, socio-economic, and climate features that are predictive for microbial biodiversity in lake ecosystems. These lists indicate that human action has a major impact on lake microbiomes. SEDE-GPS and its source code is available for download at http://SEDE-GPS.heiderlab.de.",2018-11-30 +23529715,Resource-use measurement based on patient recall: issues and challenges for economic evaluation.,"Accurate resource-use measurement is challenging within an economic evaluation, but is a fundamental requirement for estimating efficiency. Considerable research effort has been concentrated on the appropriate measurement of outcomes and the policy implications of economic evaluation, while methods for resource-use measurement have been relatively neglected. Recently, the Database of Instruments for Resource Use Measurement (DIRUM) was set up at http://www.dirum.org to provide a repository where researchers can share resource-use measures and methods. A workshop to discuss the issues was held at the University of Birmingham in October 2011. Based on material presented at the workshop, this article highlights the state of the art of UK instruments for resource-use data collection based on patient recall. We consider methodological issues in the design and analysis of resource-use instruments, and the challenges associated with designing new questionnaires. We suggest a method of developing a good practice guideline, and identify some areas for future research. Consensus amongst health economists has yet to be reached on many aspects of resource-use measurement. We argue that researchers should now afford costing methodologies the same attention as outcome measurement, and we hope that this Current Opinion article will stimulate a debate on methods of resource-use data collection and establish a research agenda to improve the precision and accuracy of resource-use estimates.",2013-06-01 +32514737,Real-World Evidence Utilization in Clinical Development Reflected by US Product Labeling: Statistical Review.,"The US Food and Drug Administration (FDA) has shown scientific discretion in interpreting the substantial evidence requirement for the approval of new drugs with its considerations on the use of single controlled or uncontrolled trials (Federal Food, Drug, and Cosmetic Act § 505(d), 21 USC 355(d), 1962). With the passage of the 21st Centuries Cures Act (21st Century Cures-patients. House, Energy and Commerce Committee, Washington, DC, 2019 available at: https://energycommerce.house.gov/sites/republicans.energycommerce.house.gov/files/analysis/21stCenturyCures/20140516PatientsWhitePaper.pdf ), the FDA is mandated to expand the role of real-world evidence (RWE) in support of drug approval. This mandate further broadens the scope of scientific discretion to include data collected outside clinical trials. We summarize the agency's past acceptance of real-world data (RWD) sources for supporting drug approval in new indications which have been reflected in US labels. In our summary, we focus on the type of RWD and statistical methodologies presented in these labels. Furthermore, two labels were selected for in-depth assessment of the RWE presented in these labels. Through these examples, we demonstrate the issues that can be raised in data collection that could affect interpretation. In addition, a brief discussion of statistical methods that can be used to incorporate RWE to clinical development is presented.",2020-06-08 +33816228,A Voxel-Based Radiographic Analysis Reveals the Biological Character of Proneural-Mesenchymal Transition in Glioblastoma.,"Introduction: Proneural and mesenchymal subtypes are the most distinct demarcated categories in classification scheme, and there is often a shift from proneural type to mesenchymal subtype in the progression of glioblastoma (GBM). The molecular characters are determined by specific genomic methods, however, the application of radiography in clinical practice remains to be further studied. Here, we studied the topography features of GBM in proneural subtype, and further demonstrated the survival characteristics and proneural-mesenchymal transition (PMT) progression of samples by combining with the imaging variables. Methods: Data were acquired from The Cancer Imaging Archive (TCIA, http://cancerimagingarchive.net). The radiography image, clinical variables and transcriptome subtype from 223 samples were used in this study. Proneural and mesenchymal subtype on GBM topography based on overlay and Voxel-based lesion-symptom mapping (VLSM) analysis were revealed. Besides, we carried out the comparison of survival analysis and PMT progression in and outside the VLSM-determined area. Results: The overlay of total GBM and separated image of proneural and mesenchymal subtype revealed a correlation of the two subtypes. By VLSM analysis, proneural subtype was confirmed to be related to left inferior temporal medulla, and no significant voxel was found for mesenchymal subtype. The subsequent comparison between samples in and outside the VLSM-determined area showed difference in overall survival (OS) time, tumor purity, epithelial-mesenchymal transition (EMT) score and clinical variables. Conclusions: PMT progression was determined by radiography approach. GBM samples in the VLSM-determined area tended to harbor the signature of proneural subtype. This study provides a valuable VLSM-determined area related to the predilection site, prognosis and PMT progression by the association between GBM topography and molecular characters.",2021-03-17 +30793173,CausalTAB: the PSI-MITAB 2.8 updated format for signalling data representation and dissemination.,"

Motivation

Combining multiple layers of information underlying biological complexity into a structured framework represent a challenge in systems biology. A key task is the formalization of such information in models describing how biological entities interact to mediate the response to external and internal signals. Several databases with signalling information, focus on capturing, organizing and displaying signalling interactions by representing them as binary, causal relationships between biological entities. The curation efforts that build these individual databases demand a concerted effort to ensure interoperability among resources.

Results

Aware of the enormous benefits of standardization efforts in the molecular interaction research field, representatives of the signalling network community agreed to extend the PSI-MI controlled vocabulary to include additional terms representing aspects of causal interactions. Here, we present a common standard for the representation and dissemination of signalling information: the PSI Causal Interaction tabular format (CausalTAB) which is an extension of the existing PSI-MI tab-delimited format, now designated PSI-MITAB 2.8. We define the new term 'causal interaction', and related child terms, which are children of the PSI-MI 'molecular interaction' term. The new vocabulary terms in this extended PSI-MI format will enable systems biologists to model large-scale signalling networks more precisely and with higher coverage than before.

Availability and implementation

PSI-MITAB 2.8 format and the new reference implementation of PSICQUIC are available online (https://psicquic.github.io/ and https://psicquic.github.io/MITAB28Format.html).

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +30450911,Cytoscape StringApp: Network Analysis and Visualization of Proteomics Data.,"Protein networks have become a popular tool for analyzing and visualizing the often long lists of proteins or genes obtained from proteomics and other high-throughput technologies. One of the most popular sources of such networks is the STRING database, which provides protein networks for more than 2000 organisms, including both physical interactions from experimental data and functional associations from curated pathways, automatic text mining, and prediction methods. However, its web interface is mainly intended for inspection of small networks and their underlying evidence. The Cytoscape software, on the other hand, is much better suited for working with large networks and offers greater flexibility in terms of network analysis, import, and visualization of additional data. To include both resources in the same workflow, we created stringApp, a Cytoscape app that makes it easy to import STRING networks into Cytoscape, retains the appearance and many of the features of STRING, and integrates data from associated databases. Here, we introduce many of the stringApp features and show how they can be used to carry out complex network analysis and visualization tasks on a typical proteomics data set, all through the Cytoscape user interface. stringApp is freely available from the Cytoscape app store: http://apps.cytoscape.org/apps/stringapp .",2018-12-05 +27779618,"A studyforrest extension, retinotopic mapping and localization of higher visual areas.","The studyforrest (http://studyforrest.org) dataset is likely the largest neuroimaging dataset on natural language and story processing publicly available today. In this article, along with a companion publication, we present an update of this dataset that extends its scope to vision and multi-sensory research. 15 participants of the original cohort volunteered for a series of additional studies: a clinical examination of visual function, a standard retinotopic mapping procedure, and a localization of higher visual areas-such as the fusiform face area. The combination of this update, the previous data releases for the dataset, and the companion publication, which includes neuroimaging and eye tracking data from natural stimulation with a motion picture, form an extremely versatile and comprehensive resource for brain imaging research-with almost six hours of functional neuroimaging data across five different stimulation paradigms for each participant. Furthermore, we describe employed paradigms and present results that document the quality of the data for the purpose of characterising major properties of participants' visual processing stream.",2016-10-25 +30125363,Better Hearing is Better Seeing: Molecular Photoacoustic Contrast Agents.,"The photoacoustic effect-the generation of a sound wave upon absorption of light by a sample-was developed over the past decades into photoacoustic imaging (PAI), and related technologies. These imaging modalities combine the advantages of optical imaging techniques (high resolution) with those of ultrasonic imaging (deep imaging depth). The light absorption by a tissue sample can be because of endogenous absorbers (such as hemoglobin or melanins). Alternatively-and most advantageously-is the use of exogenous dyes as contrast agents. Borg and Rochford present in this Journal (Photochem. Photobiol., 2018, https://doi.org/10.1111/php.12967) a comprehensive review of molecular dyes reported as contrast agents for PAI, referred to as molecular photoacoustic contrast agents (MPACs). Highlighted here is PAI as a most promising biomedical imaging modality and the importance of the rational development of novel, high-efficiency MPACs, an endeavor for which Borg and Rochford provided an excellent resource for novices and experts in the field, and anyone else interested in bioimaging or the interaction of light with chromophores.",2018-09-27 +33514929,Neighbor GWAS: incorporating neighbor genotypic identity into genome-wide association studies of field herbivory.,"An increasing number of field studies have shown that the phenotype of an individual plant depends not only on its genotype but also on those of neighboring plants; however, this fact is not taken into consideration in genome-wide association studies (GWAS). Based on the Ising model of ferromagnetism, we incorporated neighbor genotypic identity into a regression model, named ""Neighbor GWAS"". Our simulations showed that the effective range of neighbor effects could be estimated using an observed phenotype when the proportion of phenotypic variation explained (PVE) by neighbor effects peaked. The spatial scale of the first nearest neighbors gave the maximum power to detect the causal variants responsible for neighbor effects, unless their effective range was too broad. However, if the effective range of the neighbor effects was broad and minor allele frequencies were low, there was collinearity between the self and neighbor effects. To suppress the false positive detection of neighbor effects, the fixed effect and variance components involved in the neighbor effects should be tested in comparison with a standard GWAS model. We applied neighbor GWAS to field herbivory data from 199 accessions of Arabidopsis thaliana and found that neighbor effects explained 8% more of the PVE of the observed damage than standard GWAS. The neighbor GWAS method provides a novel tool that could facilitate the analysis of complex traits in spatially structured environments and is available as an R package at CRAN ( https://cran.rproject.org/package=rNeighborGWAS ).",2021-01-29 +33123459,Predictive Risk Factors and Online Nomograms for Synchronous Colon Cancer With Liver Metastasis.,"

Objectives

To develop and validate predictive nomograms of cancer specific survival (CSS) and overall survival (OS) for synchronous colon cancer with liver metastasis (SCLM) patients.

Methods

Patients with pathologically diagnosed colon cancer with liver metastasis were retrieved from the SEER database between 2010 and 2015. Only SCLM patients were included. Univariate and multivariate cox regression analyses were conducted to identify the potential predictors of patients' survival outcomes. The selected variables were integrated to create predictive nomograms via R tools. Furthermore, the concordance index Harrell's C statistic (C-index) was calculated to describe the discrimination of nomograms. Calibration (1000 bootstrap resamples) curves were plotted to compare the predictions of nomograms with the observed outcomes. Decision curve analysis (DCA) and clinical impact curves were performed to evaluate the clinical effects of nomograms.

Results

A total of 22,378 SCLM patients were included. The median time of OS and CSS was 13 and 17 months, respectively. The 1-, 2-, and 3-year rate of OS was 50.6, 28.1, and 14.8%, respectively. While the 1-, 2-, and 3-year rate of CSS was 58.7, 36.8, and 22.5%, respectively. SCLM patients with increased age, left primary tumor location, AJCC IVb stage, and no chemotherapy were associated with an obviously reduced OS and CSS. Variables including age, histological grade, T/N/M stage, tumor size, bone/lung metastasis, CEA, surgery of primary site, and chemotherapy were closely related to the prognoses of SCLM patients. Nomograms of OS and CSS were built and displayed online for convenient utilization. The C-index of OS and CSS monograms were 0.74 and 0.73, respectively, indicating relatively good discrimination of the nomograms. The calibration curves suggested a good agreement between the actual observation and the nomogram prediction. DCAs and clinical impact curves reflected favorable potential clinical effects of predictive nomograms.

Conclusion

Chemotherapy, surgery of primary site, and age were important independent risk factors for the CSS and OS of SCLM patients. We built and validated two reliable nomograms of OS and CSS to predict the prognoses of SCLM patients, which can be accessed online at (https://predictive-tool.shinyapps.io/CSS-DynNomapp/; https://predictive-tool.shinyapps.io/OS-DynNomapp/).",2020-10-02 +27114493,From data repositories to submission portals: rethinking the role of domain-specific databases in CollecTF. ,"Domain-specific databases are essential resources for the biomedical community, leveraging expert knowledge to curate published literature and provide access to referenced data and knowledge. The limited scope of these databases, however, poses important challenges on their infrastructure, visibility, funding and usefulness to the broader scientific community. CollecTF is a community-oriented database documenting experimentally validated transcription factor (TF)-binding sites in the Bacteria domain. In its quest to become a community resource for the annotation of transcriptional regulatory elements in bacterial genomes, CollecTF aims to move away from the conventional data-repository paradigm of domain-specific databases. Through the adoption of well-established ontologies, identifiers and collaborations, CollecTF has progressively become also a portal for the annotation and submission of information on transcriptional regulatory elements to major biological sequence resources (RefSeq, UniProtKB and the Gene Ontology Consortium). This fundamental change in database conception capitalizes on the domain-specific knowledge of contributing communities to provide high-quality annotations, while leveraging the availability of stable information hubs to promote long-term access and provide high-visibility to the data. As a submission portal, CollecTF generates TF-binding site information through direct annotation of RefSeq genome records, definition of TF-based regulatory networks in UniProtKB entries and submission of functional annotations to the Gene Ontology. As a database, CollecTF provides enhanced search and browsing, targeted data exports, binding motif analysis tools and integration with motif discovery and search platforms. This innovative approach will allow CollecTF to focus its limited resources on the generation of high-quality information and the provision of specialized access to the data.Database URL: http://www.collectf.org/.",2016-04-25 +33614872,A grapevine leaves dataset for early detection and classification of esca disease in vineyards through machine learning.,"Esca is one of the most common disease that can severely damage grapevine. This disease, if not properly treated in time, is the cause of vegetative stress or death of the attacked plant, with the consequence of losses in production as well as a rising risk of propagation to the closer grapevines. Nowadays, the detection of Esca is carried out manually through visual surveys usually done by agronomists, requiring enormous amount of time. Recently, image processing, computer vision and machine learning methods have been widely adopted for plant diseases classification. These methods can minimize the time spent for anomaly detection ensuring an early detection of Esca disease in grapevine plants that helps in preventing it to spread in the vineyards and in minimizing the financial loss to the wine producers. In this article, an image dataset of grapevine leaves is presented. The dataset holds grapevine leaves images belonging to two classes: unhealthy leaves acquired from plants affected by Esca disease and healthy leaves. The data presented has been collected to be used in a research project jointly developed by the Department of Information Engineering, Polytechnic University of Marche, Ancona, Italy and the STMicroelectronics, Italy, under the cooperation of the Umani Ronchi SPA winery, Osimo, Ancona, Marche, Italy. The dataset could be helpful to researchers who use machine learning and computer vision algorithms to develop applications that help agronomists in early detection of grapevine plant diseases. The dataset is freely available at http://dx.doi.org/10.17632/89cnxc58kj.1.",2021-01-29 +,"Volumetric Nanoscale Imaging: Hard X‐Ray Nanoholotomography: Large‐Scale, Label‐Free, 3D Neuroimaging beyond Optical Limit (Adv. Sci. 6/2018)","In article number https://doi.org/10.1002/advs.201700694, Bert Müller and co‐workers launch nano‐holotomography for the isotropic 3D imaging of paraffin‐embedded human brain tissues. Nano‐holotomography bridges the spatial resolution gap between optical and electron microscopy, while giving access to 3D data for large tissue volumes with a spatial resolution well below 100 nm. The hierarchical approach will mediate prospering nano‐anatomy.",2018-06-01 +32639954,Real time structural search of the Protein Data Bank.,"Detection of protein structure similarity is a central challenge in structural bioinformatics. Comparisons are usually performed at the polypeptide chain level, however the functional form of a protein within the cell is often an oligomer. This fact, together with recent growth of oligomeric structures in the Protein Data Bank (PDB), demands more efficient approaches to oligomeric assembly alignment/retrieval. Traditional methods use atom level information, which can be complicated by the presence of topological permutations within a polypeptide chain and/or subunit rearrangements. These challenges can be overcome by comparing electron density volumes directly. But, brute force alignment of 3D data is a compute intensive search problem. We developed a 3D Zernike moment normalization procedure to orient electron density volumes and assess similarity with unprecedented speed. Similarity searching with this approach enables real-time retrieval of proteins/protein assemblies resembling a target, from PDB or user input, together with resulting alignments (http://shape.rcsb.org).",2020-07-08 +31713622,New developments on the Encyclopedia of DNA Elements (ENCODE) data portal.,"The Encyclopedia of DNA Elements (ENCODE) is an ongoing collaborative research project aimed at identifying all the functional elements in the human and mouse genomes. Data generated by the ENCODE consortium are freely accessible at the ENCODE portal (https://www.encodeproject.org/), which is developed and maintained by the ENCODE Data Coordinating Center (DCC). Since the initial portal release in 2013, the ENCODE DCC has updated the portal to make ENCODE data more findable, accessible, interoperable and reusable. Here, we report on recent updates, including new ENCODE data and assays, ENCODE uniform data processing pipelines, new visualization tools, a dataset cart feature, unrestricted public access to ENCODE data on the cloud (Amazon Web Services open data registry, https://registry.opendata.aws/encode-project/) and more comprehensive tutorials and documentation.",2020-01-01 +32830797,Targeted Therapy- and Chemotherapy-Associated Skin Toxicities: Systematic Review and Meta-Analysis.,"

Problem identification

Preventing and managing skin toxicities can minimize treatment disruptions and improve well-being. This systematic review aimed to evaluate the effectiveness of interventions for the prevention and management of cancer treatment-related skin toxicities.

Literature search

The authors systematically searched for comparative studies published before April 1, 2019. Study selection and appraisal were conducted by pairs of independent reviewers.

Data evaluation

The random-effects model was used to conduct meta-analysis when appropriate.

Synthesis

39 studies (6,006 patients) were included; 16 of those provided data for meta-analysis. Prophylactic minocycline reduced the development of all-grade and grade 1 acneform rash in patients who received erlotinib. Prophylaxis with pyridoxine 400 mg in capecitabine-treated patients lowered the risk of grade 2 or 3 hand-foot syndrome. Several treatments for hand-foot skin reaction suggested benefit in heterogeneous studies. Scalp cooling significantly reduced the risk for severe hair loss or total alopecia associated with chemotherapy.

Implications for research

Certainty in the available evidence was limited for several interventions, suggesting the need for future research.

Supplemental material can be found at https

//onf.ons.org/supplementary-material-targeted-therapy-and-chemotherapy-associated-skin-toxicity-systematic-review.",2020-09-01 +32986461,Correction to Bagby et al. (2020).,"Reports an error in ""Examining the ""traditional background hypothesis"" for the MMPI-2-RF L-r scores in a Muslim faith-based sample"" by R. Michael Bagby, Karin A. Onno, Ardeshir Mortezaei and Martin Sellbom (Psychological Assessment, Advanced Online Publication, Jul 27, 2020, np). In the article ""Examining the 'Traditional Background Hypothesis' for the MMPI-2-RF L-r Scores in a Muslim Faith-Based Sample,"" by R. Michael Bagby, Karin A. Onno, Ardeshir Mortezaei, and Martin Sellbom (Psychological Assessment, 2020, Vol. 32, No. 10, pp. 991-995, http://dx.doi.org/ 10.1037/pas0000941), the word ""not"" was missing in the abstract from the text ""(b) direct assessments of strength of faith or positive impression management were included or measured independently."" The correct sentence should have read as follows: ""(b) direct assessments of strength of faith or positive impression management were not included or measured independently."" All versions of this article have been corrected. (The following abstract of the original article appeared in record 2020-54974-001.) The traditional background hypothesis (TBH) is a long-standing belief associated with the Minnesota Multiphasic Personality Inventory (MMPI) L scale; a validity scale, which appears on every version of the family of MMPI instruments including the soon-to-be released MMPI-3. The L scale was originally designed to assess whether test respondents presented themselves in an unrealistically favorable light. Both researchers and clinicians noted, however, that those from traditional Christian faith-based groups produced elevated L-scale scores. A recent meta-analysis supported this observation, reporting an average L-scale elevation 0.50 SD greater than the MMPI-2 normative sample compared to samples of those with presumptively strong Christian-Judeo faith. Some limitations of this meta-analysis are that (a) the samples used in it included those undergoing an evaluative assessment, which could elevate L-scale scores independent of strength of faith belief, and (b) direct assessments of strength of faith or positive impression management were included or measured independently. Our primary goal in this study was to examine the TBH addressing these limitations with a sample of those who self-identified as believers in the Muslim faith (N = 267), the examination of which expands the scope beyond those of the Christian-Judeo faith. Consistent with previous results, the mean L-r (MMPI/MMPI-2 L scale counterpart on the MMPI-2-Restructured Form) was 56.41 T. Higher L-r scale scores were associated with increasing strength in the Muslim faith, and although increasing L-r scores were primarily associated with impression management, increasing Muslim-based faith values had a nontrivial influence on L-r scores and especially in the moderate score range of this scale. (PsycInfo Database Record (c) 2020 APA, all rights reserved).",2020-10-01 +33090868,"""The distance threshold of reliable eyewitness identification"": Correction to Nyman et al. (2019).","Reports an error in ""The distance threshold of reliable eyewitness identification"" by Thomas J. Nyman, James Michael Lampinen, Jan Antfolk, Julia Korkman and Pekka Santtila (Law and Human Behavior, 2019[Dec], Vol 43[6], 527-541). In the article (http://dx.doi.org/10.1037/lhb0000342), the authors incorrectly referred to ""simple main"" effects as ""main effects"" in four places on pp. 532-533. The authors have created a document reporting the main and simple main effects based on the original multilevel logistic regressions. These analyses support the authors' original interpretations and conclusions and can be found in the online supplemental materials. The online version of this article and online supplementary material have been corrected. (The following abstract of the original article appeared in record 2019-38765-001.) Increased distance between an eyewitness and a culprit decreases the accuracy of eyewitness identifications, but the maximum distance at which reliable observations can still be made is unknown. Our aim was to identify this threshold. We hypothesized that increased distance would decrease identification, rejection accuracy, confidence and would increase response time. We expected an interaction effect, where increased distance would more negatively affect younger and older participants (vs. young adults), resulting in age-group specific distance thresholds where diagnosticity would be 1. We presented participants with 4 live targets at distances between 5 m and 110 m using an 8-person computerized line-up task. We used simultaneous and sequential target-absent or target-present line-ups and presented these to 1,588 participants (age range = 6-77; 61% female; 95% Finns), resulting in 6,233 responses. We found that at 40 m diagnosticity was 50% lower than at 5 m and with increased distance diagnosticity tapered off until it was 1 (±0.5) at 100 m for all age groups and line-up types. However, young children (age range = 6-11) and older adults (age range = 45-77) reached a diagnosticity of 1 at shorter distances compared with older children (age range = 12-17) and young adults (age range = 18-44). We found that confidence dropped with increased distance, response time remained stable, and high confidence and shorter response times were associated with identification accuracy up to 40 m. We conclude that age and line-up type moderate the effect distance has on eyewitness accuracy and that there are perceptual distance thresholds at which an eyewitness can no longer reliably encode and later identify a culprit. (PsycInfo Database Record (c) 2020 APA, all rights reserved).",2020-10-01 +33120767,Danhong injection for the treatment of early diabetic nephropathy: A protocol of systematic review and meta-analysis.,"

Background

Diabetic nephropathy (DN) is the one that of the most common complications of diabetes mellitus (DM). Diabetic patients will experience a high mortality rate when DN progress to end-stage. So, it is extremely important to early treat DN. Although several interventions have been used to treat DN, a conclusive finding has not already been achieved. As one of the most common Chinese medicines, danhong injection (DHI) which has been shown to have various functions has also been prescribed to be as the alternative treatment option. However, no systematic review and meta-analysis has been conducted to objectively and comprehensively investigate its effectiveness and safety. Thus, we designed the current systematic review and meta-analysis to answer whether DHI can be preferably used to timely treat DN.

Methods

We will perform a systematic search to capture any potentially eligible studies in several electronic databases including PubMed, Cochrane library, Embase, China National Knowledgement Infrastructure (CNKI), Wanfang database, and Chinese sci-tech periodical full-text database (VIP) from their inception to August 31, 2020. We will assign 2 independent reviewers to select eligible studies, and assess the quality of included studies with Cochrane risk of bias assessment tool. We will perform all statistical analyses using RevMan 5.3 software.

Ethics and dissemination

We will submit our findings to be taken into consideration for publication in a peer-reviewed academic journal. Meanwhile, we will also communicate our findings in important conferences.

Protocol registry

The protocol of this systematic review and meta-analysis has been registered at the International Plateform of Registered Systematic Review and Meta-Analysis Protocols (INPLASY) platform (https://inplasy.com/inplasy-2020-9-0005/, registry number: INPLASY202090005) and this protocol was funded through a protocol registry.",2020-10-01 +32096105,"Spanish affective normative data for 1,406 words rated by children and adolescents (SANDchild).","Most research on the relationship between emotion and language in children relies on the use of words whose affective properties have been assessed by adults. To overcome this limitation, in the current study we introduce SANDchild, the Spanish affective database for children. This dataset reports ratings in the valence and the arousal dimensions for a large corpus of 1406 Spanish words rated by a large sample of 1276 children and adolescents from four different age groups (7, 9, 11 and 13 years old). We observed high inter-rater reliabilities for both valence and arousal in the four age groups. However, some age differences were found. In this sense, ratings for both valence and arousal decreased with age. Furthermore, the youngest children consider more words to be positive than adolescents. We also found sex differences in valence scores since boys gave higher valence ratings than girls, while girls considered more words to be negative than boys. The norms provided in this database will allow us to further extend our knowledge on the acquisition, development and processing of emotional language from childhood to adolescence. The complete database can be downloaded from https://psico.fcep.urv.cat/exp/files/SANDchild.xlsx .",2020-10-01 +26130573,Inference of Markovian properties of molecular sequences from NGS data and applications to comparative genomics.,"

Motivation

Next-generation sequencing (NGS) technologies generate large amounts of short read data for many different organisms. The fact that NGS reads are generally short makes it challenging to assemble the reads and reconstruct the original genome sequence. For clustering genomes using such NGS data, word-count based alignment-free sequence comparison is a promising approach, but for this approach, the underlying expected word counts are essential.A plausible model for this underlying distribution of word counts is given through modeling the DNA sequence as a Markov chain (MC). For single long sequences, efficient statistics are available to estimate the order of MCs and the transition probability matrix for the sequences. As NGS data do not provide a single long sequence, inference methods on Markovian properties of sequences based on single long sequences cannot be directly used for NGS short read data.

Results

Here we derive a normal approximation for such word counts. We also show that the traditional Chi-square statistic has an approximate gamma distribution ,: using the Lander-Waterman model for physical mapping. We propose several methods to estimate the order of the MC based on NGS reads and evaluate those using simulations. We illustrate the applications of our results by clustering genomic sequences of several vertebrate and tree species based on NGS reads using alignment-free sequence dissimilarity measures. We find that the estimated order of the MC has a considerable effect on the clustering results ,: and that the clustering results that use a N: MC of the estimated order give a plausible clustering of the species.

Availability and implementation

Our implementation of the statistics developed here is available as R package 'NGS.MC' at http://www-rcf.usc.edu/∼fsun/Programs/NGS-MC/NGS-MC.html

Contact

fsun@usc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-30 +31030203,From Multiple Organ Support Therapy to Extracorporeal Organ Support in Critically Ill Patients.,"Critically ill patients developing severe forms multiple organ dysfunction syndrome (MODS) may not be adequately supported by pharmacologic management. In these complex cases, a single form of extracorporeal organ support (ECOS) may be required, but multiple organ support therapy (MOST) is currently seen as a feasible approach. Severe renal dysfunction is a typical syndrome requiring renal replacement therapy (RRT) in the context of MODS. After more than a decade of RRT application in various intensive care settings, ECOS are not anymore seen as extraordinary or particularly aggressive techniques in MODS patients. Nowadays, a significant increase in the use of extracorporeal membrane oxygenation and extracorporeal carbon dioxide removal is occurring. When renal and cardio-pulmonary ECOS are used together, a multidisciplinary approach is necessary to minimize negative interactions and unwanted adverse effects. In this editorial, we focus on the organ crosstalk between the native and artificial organs, including the advantages and disadvantages of organ support on multiorgan function. Much of current experience on MOST has been gained upon RRT connected to other organ support therapies. Overall, available literature has not definitely established the ideal timing of these interventions, and whether early implementation impacts organ recovery and optimizes resource utilization is still a matter of open debate: it is possible that future research will be devoted to identify patient groups that may benefit from short- and long-term multiple organ support. Video Journal Club ""Cappuccino with Claudio Ronco"" at  https://www.karger.com/Journal/ArticleNews/490694?sponsor=52.",2019-04-26 +30526489,GOnet: a tool for interactive Gene Ontology analysis.,"

Background

Biological interpretation of gene/protein lists resulting from -omics experiments can be a complex task. A common approach consists of reviewing Gene Ontology (GO) annotations for entries in such lists and searching for enrichment patterns. Unfortunately, there is a gap between machine-readable output of GO software and its human-interpretable form. This gap can be bridged by allowing users to simultaneously visualize and interact with term-term and gene-term relationships.

Results

We created the open-source GOnet web-application (available at http://tools.dice-database.org/GOnet/ ), which takes a list of gene or protein entries from human or mouse data and performs GO term annotation analysis (mapping of provided entries to GO subsets) or GO term enrichment analysis (scanning for GO categories overrepresented in the input list). The application is capable of producing parsable data formats and importantly, interactive visualizations of the GO analysis results. The interactive results allow exploration of genes and GO terms as a graph that depicts the natural hierarchy of the terms and retains relationships between terms and genes/proteins. As a result, GOnet provides insight into the functional interconnection of the submitted entries.

Conclusions

The application can be used for GO analysis of any biological data sources resulting in gene/protein lists. It can be helpful for experimentalists as well as computational biologists working on biological interpretation of -omics data resulting in such lists.",2018-12-07 +25883136,ChEMBL web services: streamlining access to drug discovery data and utilities.,"ChEMBL is now a well-established resource in the fields of drug discovery and medicinal chemistry research. The ChEMBL database curates and stores standardized bioactivity, molecule, target and drug data extracted from multiple sources, including the primary medicinal chemistry literature. Programmatic access to ChEMBL data has been improved by a recent update to the ChEMBL web services (version 2.0.x, https://www.ebi.ac.uk/chembl/api/data/docs), which exposes significantly more data from the underlying database and introduces new functionality. To complement the data-focused services, a utility service (version 1.0.x, https://www.ebi.ac.uk/chembl/api/utils/docs), which provides RESTful access to commonly used cheminformatics methods, has also been concurrently developed. The ChEMBL web services can be used together or independently to build applications and data processing workflows relevant to drug discovery and chemical biology.",2015-04-16 +32946226,IAMPE: NMR-Assisted Computational Prediction of Antimicrobial Peptides.,"Antimicrobial peptides (AMPs) are at the focus of attention due to their therapeutic importance and developing computational tools for the identification of efficient antibiotics from the primary structure. Here, we utilized the 13CNMR spectral of amino acids and clustered them into various groups. These clusters were used to build feature vectors for the AMP sequences based on the composition, transition, and distribution of cluster members. These features, along with the physicochemical properties of AMPs were exploited to learn computational models to predict active AMPs solely from their sequences. Naïve Bayes (NB), k-nearest neighbors (KNN), support-vector machine (SVM), random forest (RF), and eXtreme Gradient Boosting (XGBoost) were employed to build the classification system using the collected AMP datasets from the CAMP, LAMP, ADAM, and AntiBP databases. Our results were validated and compared with the CAMP and ADAM prediction systems and indicated that the synergistic combination of the 13CNMR features with the physicochemical descriptors enables the proposed ensemble mechanism to improve the prediction performance of active AMP sequences. Our web-based AMP prediction platform, IAMPE, is available at http://cbb1.ut.ac.ir/.",2020-09-30 +31397839,"pWGBSSimla: a profile-based whole-genome bisulfite sequencing data simulator incorporating methylation QTLs, allele-specific methylations and differentially methylated regions.","

Motivation

DNA methylation plays an important role in regulating gene expression. DNA methylation is commonly analyzed using bisulfite sequencing (BS-seq)-based designs, such as whole-genome bisulfite sequencing (WGBS), reduced representation bisulfite sequencing (RRBS) and oxidative bisulfite sequencing (oxBS-seq). Furthermore, there has been growing interest in investigating the roles that genetic variants play in changing the methylation levels (i.e. methylation quantitative trait loci or meQTLs), how methylation regulates the imprinting of gene expression (i.e. allele-specific methylation or ASM) and the differentially methylated regions (DMRs) among different cell types. However, none of the current simulation tools can generate different BS-seq data types (e.g. WGBS, RRBS and oxBS-seq) while modeling meQTLs, ASM and DMRs.

Results

We developed profile-based whole-genome bisulfite sequencing data simulator (pWGBSSimla), a profile-based bisulfite sequencing data simulator, which simulates WGBS, RRBS and oxBS-seq data for different cell types based on real data. meQTLs and ASM are modeled based on the block structures of the methylation status at CpGs, whereas the simulation of DMRs is based on observations of methylation rates in real data. We demonstrated that pWGBSSimla adequately simulates data and allows performance comparisons among different methylation analysis methods.

Availability and implementation

pWGBSSimla is available at https://omicssimla.sourceforge.io.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +33785406,Electroconvulsive therapy with a memory reactivation intervention for post-traumatic stress disorder: A randomized controlled trial.,"

Background

Post-traumatic Stress Disorder (PTSD) often does not respond to available treatments. Memories are vulnerable to disruption during reconsolidation, and electroconvulsive therapy (ECT) has amnestic effects OBJECTIVE/HYPOTHESIS: To test the use of ECT to disrupt the reconsolidation of traumatic memories as a potential treatment for PTSD METHODS: Participants were adults from the civilian population and were referred for ECT treatment for severe depression with comorbid PTSD symptoms. Twenty-eight participants were randomly assigned to reactivation of a traumatic or non-traumatic memory using audio script driven imagery prior to each ECT treatment. Primary outcomes were change in scores on the Modified PTSD Symptom Scale - Self Report (MPSS-SR) and the Clinician-Administered PTSD Scale for DSM-5 (CAPS-5). Secondary outcomes included a comparison of the change in heart rate while listening to the script RESULTS: Twenty-five female patients who completed a post-ECT assessment were included in the analysis. No significant group differences were found in the MPSS-SR or CAPS-5 scores from pre-ECT to post-ECT or 3-month follow-ups. However, both groups improved at post-ECT and 3-month follow up. Partial eta squared estimates of effect size showed large effect sizes for all outcomes (η2 > 0.13). Changes in heart rate were not significantly different between groups or over time CONCLUSIONS: ECT paired with pre-treatment traumatic memory reactivation was not more effective for treating PTSD symptoms than ECT with non-traumatic memory reactivation. While our primary hypothesis was not supported, our data provides further support for the efficacy of ECT for improving symptoms of PTSD with comorbid depression. ClinicalTrials.gov. https://clinicaltrials.gov/ct2/show/NCT04027452.

Identifier

NCT04027452.",2021-03-27 +25352728,Seed Pro-Nutra Care: A tool for characterization of seed storage proteins and database of bioactive peptides having potential health benefits.,"

Unlabelled

Seed storage proteins, the major food proteins, possess unique physicochemical characteristics which determine their nutritional importance and influence their utilization by humans. Here, we describe a database driven tool named Seed Pro-Nutra Care which comprises a systematic compendium of seed storage proteins and their bioactive peptides influencing several vital organ systems for maintenance of health. Seed Pro-Nutra Careis an integrated resource on seed storage protein. This resource help in the (I) Characterization of proteins whether they belong to seed storage protein group or not. (II) Identification the bioactive peptides with their sequences using peptide name (III) Determination of physico chemical properties of seed storage proteins. (IV) Epitope identification and mapping (V) Allergenicity prediction and characterization. Seed Pro-Nutra Care is a compilation of data on bioactive peptides present in seed storage proteins from our own collections and other published and unpublished sources. The database provides an information resource of a variety of seed related biological information and its use for nutritional and biomedical application.

Availability

http://www.gbpuat-cbsh.ac.in/departments/bi/database/seed_pro_nutra_care/",2014-09-30 +33506361,Hyperhomocysteinemia is a risk factor for postoperative ischemia in adult patients with moyamoya disease.,"Growing evidence has suggested that hyperhomocysteinemia (HHcy) is a risk factor for cerebral infarction. However, the effect of HHcy on postoperative cerebral ischemia is still unclear. We aim to investigate the relationship between HHcy and postoperative ischemia of adult patients with moyamoya disease (MMD). A total of 138 adult patients with MMD were prospectively recruited from July 1 to December 31, 2019. After excluding 14 patients accepting conservative therapy, all 124 patients who underwent surgical treatment were enrolled. Patients were grouped according to postoperative ischemia and HHcy presentation, respectively. Clinical data and laboratory examinations were compared by statistical analyses. Potential risk factors were evaluated by univariate and multivariate logistic regression analysis. Comparing to the normal, patients with postoperative ischemia were higher in serum homocysteine (Hcy) level (P = 0.039) and HHcy ratio (P = 0.035). Furthermore, HHcy was more common in males (P = 0.007) than females. Logistic analysis results showed that HHcy (OR 5.234, 95% CI 1.127-24.315; P = 0.035) was an independent risk factor. HHcy was significantly associated with postoperative ischemia in MMD patients. Our study found that HHcy was correlated to the risk of postoperative ischemia. HHcy can be used as an indicator and a potential therapeutic target for postoperative ischemia in adult patients with MMD. URL: http://www.chictr.org . Unique identifier: ChiCTR2000031412.",2021-01-27 +31240104,3.5KJPNv2: an allele frequency panel of 3552 Japanese individuals including the X chromosome.,"The first step towards realizing personalized healthcare is to catalog the genetic variations in a population. Since the dissemination of individual-level genomic information is strictly controlled, it will be useful to construct population-level allele frequency panels with easy-to-use interfaces. In the Tohoku Medical Megabank Project, we sequenced nearly 4000 individuals from a Japanese population and constructed an allele frequency panel of 3552 individuals after removing related samples. The panel is called the 3.5KJPNv2. It was constructed by using a standard pipeline including the 1KGP and gnomAD algorithms to reduce technical biases and to allow comparisons to other populations. Our database is the first large-scale panel providing the frequencies of variants present on the X chromosome and on the mitochondria in the Japanese population. All the data are available on our original database at https://jmorp.megabank.tohoku.ac.jp.",2019-06-18 +32156760,"Cohort profile: social well-being and determinants of health study (SWADES), Kerala, India.","

Purpose

In response to the need for more advanced and longitudinal data concerning chronic diseases, behavioural risk factors and social support systems in India, the SWADES (Social Well-being and Determinants of Health Study) was established.

Participants

At baseline, 997 adults aged 30 years and over, living in the semi-urban area were interviewed in their home.

Findings to date

Data collected included self-reports of demographic details, health, depression, morbid conditions and healthcare utilisation, risk factors (physical, behavioural and social) of chronic diseases, common mental disorders, out-of-pocket expenditure, social support network, social cohesion, disability, education and wealth. Objective data for hypertension, diabetes and cognitive function were also collected.

Future plans

The first annual follow-up interviews were completed in 2019; the subsequent annual follow-up will be conducted until 2030. The SWADES data are held at the International Centre for Consortium Research in Social Care (ICRS), Rajagiri College of Social Science, Kerala, India. Procedures for data access, information on collaborations, publications and other details can be found at (http://icrs.in).",2020-03-09 +27899580,"OrthoDB v9.1: cataloging evolutionary and functional annotations for animal, fungal, plant, archaeal, bacterial and viral orthologs.","OrthoDB is a comprehensive catalog of orthologs, genes inherited by extant species from a single gene in their last common ancestor. In 2016 OrthoDB reached its 9th release, growing to over 22 million genes from over 5000 species, now adding plants, archaea and viruses. In this update we focused on usability of this fast-growing wealth of data: updating the user and programmatic interfaces to browse and query the data, and further enhancing the already extensive integration of available gene functional annotations. Collating functional annotations from over 100 resources, and enabled us to propose descriptive titles for 87% of ortholog groups. Additionally, OrthoDB continues to provide computed evolutionary annotations and to allow user queries by sequence homology. The OrthoDB resource now enables users to generate publication-quality comparative genomics charts, as well as to upload, analyze and interactively explore their own private data. OrthoDB is available from http://orthodb.org.",2016-11-28 +33574090,YAP and β-Catenin Cooperate to Drive Oncogenesis in Basal Breast Cancer.,"Targeting cancer stem cells (CSC) can serve as an effective approach toward limiting resistance to therapies. While basal-like (triple-negative) breast cancers encompass cells with CSC features, rational therapies remain poorly established. We show here that the receptor tyrosine kinase Met promotes YAP activity in basal-like breast cancer and find enhanced YAP activity within the CSC population. Interfering with YAP activity delayed basal-like cancer formation, prevented luminal to basal transdifferentiation, and reduced CSC. YAP knockout mammary glands revealed a decrease in β-catenin target genes, suggesting that YAP is required for nuclear β-catenin activity. Mechanistically, nuclear YAP interacted with β-catenin and TEAD4 at gene regulatory elements. Proteomic patient data revealed an upregulation of the YAP signature in basal-like breast cancers. Our findings demonstrate that in basal-like breast cancers, β-catenin activity is dependent on YAP signaling and controls the CSC program. These findings suggest that targeting the YAP/TEAD4/β-catenin complex offers a potential therapeutic strategy for eradicating CSCs in basal-like breast cancers. SIGNIFICANCE: These findings show that YAP cooperates with β-catenin in basal-like breast cancer to regulate CSCs and that targeting this interaction may be a novel CSC therapy for patients with basal-like breast cancer. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/8/2116/F1.large.jpg.",2021-02-11 +32816427,Air Quality and Its Association with Cardiovascular and Respiratory Hospital Admissions in Ireland.,"Aim Cardiovascular (CVD) and respiratory (RSD) diseases are leading causes of morbidity and mortality in Ireland. Clear links have been demonstrated in the literature between poor air quality and these diseases. This study aimed to use routinely available data to examine the relationship between air quality index for health (AQIH) (Available URL: http://www.epa.ie/air/quality/index/) and hospital admissions due to CVD and RSD in Dublin City and County between 2014 and 2018. Methods Admission data were collected from the HSE Hospital In-Patient Enquiry (HIPE). Daily count of hospital admissions with Dublin city and county address with a primary diagnosis of CVS and RSD was performed. The daily AQIH were obtained from the EPA for Dublin. Results Overall, AQIH distribution was: Good: 96% (1,575/1,642); Fair: 3% (52/1,642); and Poor: 1% (11/1,642). There were significant rises in admissions with change in AQIH (i.e. from good to very poor) for asthma, chronic obstructive airways disease and heart failure. There were also varying significant changes in short-term admission rates (i.e. up to 72 hours) following change in AQIH. Conclusions This study, using routinely gathered data, suggests that in Dublin city, where the AQ is predominantly good, that change in ambient AQ appears to impact admissions with CVD and RSD.",2020-06-11 +33254015,ENNAACT is a novel tool which employs neural networks for anticancer activity classification for therapeutic peptides.,"The prevalence of cancer as a threat to human life, responsible for 9.6 million deaths worldwide in 2018, motivates the search for new anticancer agents. While many options are currently available for treatment, these are often expensive and impact the human body unfavourably. Anticancer peptides represent a promising emerging field of anticancer therapeutics, which are characterized by favourable toxicity profile. The development of accurate in silico methods for anticancer peptide prediction is of paramount importance, as the amount of available sequence data is growing each year. This study leverages advances in machine learning research to produce a novel sequence-based deep neural network classifier for anticancer peptide activity. The classifier achieves performance comparable to the best-in-class, with a cross-validated accuracy of 98.3%, Matthews correlation coefficient of 0.91 and an Area Under the Curve of 0.95. This innovative classifier is available as a web server at https://research.timmons.eu/ennaact, facilitating in silico screening and design of new anticancer peptide chemotherapeutics by the research community.",2020-11-27 +32902022,miR-381-3p inhibits high glucose-induced vascular smooth muscle cell proliferation and migration by targeting HMGB1.,"

Background

Hyperglycemia increases the risk of many cardiovascular diseases (CVD), and the dysregulation of proliferation and migration in vascular smooth muscle cells (VSMCs) also participates in the pathogenesis of CVD. miR-381-3p is known to suppress the proliferation and migration of multiple human cell types. Nevertheless, the function of miR-381-3p in VSMCs remains largely indistinct.

Methods

A quantitative real-time polymerase chain reaction (qRT-PCR) was employed to investigate miR-381-3p expression in high-glucose-induced VSMCs. Inflammatory cytokines tumor necrosis factor-α, interleukin-1β and interleukin-6, as well as oxidative stress markers SOD and MDA, were determined by an enzyme-linked immunosorbent assay. Reactive oxygen species generation was examined using a 2,7'-dichlorofluorescein kit. The proliferation, migration and apoptosis of VSMCs were monitored by 3-(4,5-dimethylthiazl2-yl)-2,5-diphenyltetazolium bromide (MTT), transwell and terminal deoxynucleotidyl transferase-mediated dUTP nick-end labeling (TUNEL) assays. The TargetScan database (http://www.targetscan.org) was employed to seek the potential target gene of miR-381-3p. Interaction between miR-381-3p and HMGB1 was determined by a qRT-PCR, western blotting and a luciferase reporter assay.

Results

miR-381-3p expression was significantly reduced in a VSMCs dysfunction model induced by high-glucose in a dose- and time-dependent manner. Transfection of miR-381-3p mimics suppressed the inflammation, oxidative stress, proliferation and migration of VSMCs, whereas apoptosis of VSMCs was promoted, and the transfection of miR-381-3p inhibitors had the opposite effect. Mechanistically, HMGB1, an important factor in inflammation response, was confirmed as a target gene of miR-381-3p.

Conclusions

miR-381-3p targets HMGB1 to suppress the inflammation, oxidative stress, proliferation and migration of high-glucose-induced VSMCs by targeting HMGB1.",2020-09-28 +26909679,DsTRD: Danshen Transcriptional Resource Database.,"Salvia miltiorrhiza has been comprehensively studied as a medicinal model plant. However, research progress on this species is significantly hindered by its unavailable genome sequences and limited number of expressed sequence tags in the National Center for Biotechnology Information database. Thus, a transcript database must be developed to assist researchers to browse, search, and align sequences for gene cloning and functional analysis in S. miltiorrhiza. In this study, the Danshen Transcriptional Resource Database (DsTRD) was built using 76,531 transcribed sequences assembled from 12 RNA-Seq transcriptomes. Among these 12 RNA-seq data, ten were downloaded from NCBI database. The remaining two were enced on the Hiseq2000 platform using the stem and hairy-root of S. miltiorrhiza. The transcripts were annotated as protein-coding RNAs, long non-coding RNAs, microRNA precursors, and phased secondary small-interfering RNA genes through several bioinformatics methods. The tissue expression levels for each transcript were also calculated and presented in terms of RNA-Seq data. Overall, DsTRD facilitates browsing and searching for sequences and functional annotations of S. miltiorrhiza. DsTRD is freely available at http://bi.sky.zstu.edu.cn/DsTRD/home.php.",2016-02-24 +31647529,PlanExp: intuitive integration of complex RNA-seq datasets with planarian omics resources.,"MOTIVATION:There is an increasing amount of transcriptomic and genomic data available for planarians with the advent of both traditional and single-cell RNA sequencing technologies. Therefore, exploring, visualizing and making sense of all these data in order to understand planarian regeneration and development can be challenging. RESULTS:In this work, we present PlanExp, a web-application to explore and visualize gene expression data from different RNA-seq experiments (both traditional and single-cell RNA-seq) for the planaria Schmidtea mediterranea. PlanExp provides tools for creating different interactive plots, such as heatmaps, scatterplots, etc. and links them with the current sequence annotations both at the genome and the transcript level thanks to its integration with the PlanNET web application. PlanExp also provides a full gene/protein network editor, a prediction of genetic interactions from single-cell RNA-seq data, and a network expression mapper that will help researchers to close the gap between systems biology and planarian regeneration. AVAILABILITY AND IMPLEMENTATION:PlanExp is freely available at https://compgen.bio.ub.edu/PlanNET/planexp. The source code is available at https://compgen.bio.ub.edu/PlanNET/downloads. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-03-01 +33823805,New Guinean orogenic dynamics and biota evolution revealed using a custom geospatial analysis pipeline.,"

Background

The New Guinean archipelago has been shaped by millions of years of plate tectonic activity combined with long-term fluctuations in climate and sea level. These processes combined with New Guinea's location at the tectonic junction between the Australian and Pacific plates are inherently linked to the evolution of its rich endemic biota. With the advent of molecular phylogenetics and an increasing amount of geological data, the field of New Guinean biogeography begins to be reinvigorated.

Results

We inferred a comprehensive dated molecular phylogeny of endemic diving beetles to test historical hypotheses pertaining to the evolution of the New Guinean biota. We used geospatial analysis techniques to compare our phylogenetic results with a newly developed geological terrane map of New Guinea as well as the altitudinal and geographic range of species ( https://arcg.is/189zmz ). Our divergence time estimations indicate a crown age (early diversification) for New Guinea Exocelina beetles in the mid-Miocene ca. 17 Ma, when the New Guinean orogeny was at an early stage. Geographic and geological ancestral state reconstructions suggest an origin of Exocelina ancestors on the eastern part of the New Guinean central range on basement rocks (with a shared affinity with the Australian Plate). Our results do not support the hypothesis of ancestors migrating to the northern margin of the Australian Plate from Pacific terranes that incrementally accreted to New Guinea over time. However, our analyses support to some extent a scenario in which Exocelina ancestors would have been able to colonize back and forth between the amalgamated Australian and Pacific terranes from the Miocene onwards. Our reconstructions also do not support an origin on ultramafic or ophiolite rocks that have been colonized much later in the evolution of the radiation. Macroevolutionary analyses do not support the hypothesis of heterogeneous diversification rates throughout the evolution of this radiation, suggesting instead a continuous slowdown in speciation.

Conclusions

Overall, our geospatial analysis approach to investigate the links between the location and evolution of New Guinea's biota with the underlying geology sheds a new light on the patterns and processes of lineage diversification in this exceedingly diverse region of the planet.",2021-04-06 +30913342,Tea Plant Information Archive: a comprehensive genomics and bioinformatics platform for tea plant.,"Tea is the world's widely consumed nonalcohol beverage with essential economic and health benefits. Confronted with the increasing large-scale omics-data set particularly the genome sequence released in tea plant, the construction of a comprehensive knowledgebase is urgently needed to facilitate the utilization of these data sets towards molecular breeding. We hereby present the first integrative and specially designed web-accessible database, Tea Plant Information Archive (TPIA; http://tpia.teaplant.org). The current release of TPIA employs the comprehensively annotated tea plant genome as framework and incorporates with abundant well-organized transcriptomes, gene expressions (across species, tissues and stresses), orthologs and characteristic metabolites determining tea quality. It also hosts massive transcription factors, polymorphic simple sequence repeats, single nucleotide polymorphisms, correlations, manually curated functional genes and globally collected germplasm information. A variety of versatile analytic tools (e.g. JBrowse, blast, enrichment analysis, etc.) are established helping users to perform further comparative, evolutionary and functional analysis. We show a case application of TPIA that provides novel and interesting insights into the phytochemical content variation of section Thea of genus Camellia under a well-resolved phylogenetic framework. The constructed knowledgebase of tea plant will serve as a central gateway for global tea community to better understand the tea plant biology that largely benefits the whole tea industry.",2019-04-11 +32837757,Using the internet search data to investigate symptom characteristics of COVID-19: A big data study.,"

Objective

Analyzing the symptom characteristics of Coronavirus Disease 2019(COVID-19) to improve control and prevention.

Methods

Using the Baidu Index Platform (http://index.baidu.com) and the website of Chinese Center for Disease Control and Prevention as data resources to obtain the search volume (SV) of keywords for symptoms associated with COVID-19 from January 1 to February 20 in each year from 2017 to 2020 and the epidemic data in Hubei province and the other top 9 impacted provinces in China. Data of 2020 were compared with those of the previous three years. Data of Hubei province were compared with those of the other 9 provinces. The differences and characteristics of the SV of COVID-19-related symptoms, and the correlations between the SV of COVID-19 and the number of newly confirmed/suspected cases were analyzed. The lag effects were discussed.

Results

Comparing the SV from January 1, 2020 to February 20, 2020 with those for the same period of the previous three years, Hubei's SV for cough, fever, diarrhea, chest tightness, dyspnea, and other symptoms were significantly increased. The total SV of lower respiratory symptoms was significantly higher than that of upper respiratory symptoms (P<0.001). The SV of COVID-19 in Hubei province was significantly correlated with the number of newly confirmed/suspected cases (r confirmed = 0.723, r suspected = 0.863, both p < 0.001). The results of the distributed lag model suggested that the patients who searched relevant symptoms on the Internet may begin to see doctors in 2-3 days later and be confirmed in 3-4 days later.

Conclusion

The total SV of lower respiratory symptoms was higher than that of upper respiratory symptoms, and the SV of diarrhea also increased significantly. It warned us to pay attention to not only the symptoms of the lower respiratory tract but also the gastrointestinal symptoms, especially diarrhea in patients with COVID-19. Internet search behavior had a positive correlation with the number of newly confirmed/suspected cases, suggesting that big data has an important role in the early warning of infectious diseases.",2020-05-19 +25332396,BCCTBbp: the Breast Cancer Campaign Tissue Bank bioinformatics portal.,"BCCTBbp (http://bioinformatics.breastcancertissue bank.org) was initially developed as the data-mining portal of the Breast Cancer Campaign Tissue Bank (BCCTB), a vital resource of breast cancer tissue for researchers to support and promote cutting-edge research. BCCTBbp is dedicated to maximising research on patient tissues by initially storing genomics, methylomics, transcriptomics, proteomics and microRNA data that has been mined from the literature and linking to pathways and mechanisms involved in breast cancer. Currently, the portal holds 146 datasets comprising over 227,795 expression/genomic measurements from various breast tissues (e.g. normal, malignant or benign lesions), cell lines and body fluids. BCCTBbp can be used to build on breast cancer knowledge and maximise the value of existing research. By recording a large number of annotations on samples and studies, and linking to other databases, such as NCBI, Ensembl and Reactome, a wide variety of different investigations can be carried out. Additionally, BCCTBbp has a dedicated analytical layer allowing researchers to further analyse stored datasets. A future important role for BCCTBbp is to make available all data generated on BCCTB tissues thus building a valuable resource of information on the tissues in BCCTB that will save repetition of experiments and expand scientific knowledge.",2014-10-20 +31750888,All-FIT: allele-frequency-based imputation of tumor purity from high-depth sequencing data.,"

Summary

Clinical sequencing aims to identify somatic mutations in cancer cells for accurate diagnosis and treatment. However, most widely used clinical assays lack patient-matched control DNA and additional analysis is needed to distinguish somatic and unfiltered germline variants. Such computational analyses require accurate assessment of tumor cell content in individual specimens. Histological estimates often do not corroborate with results from computational methods that are primarily designed for normal-tumor matched data and can be confounded by genomic heterogeneity and presence of sub-clonal mutations. Allele-frequency-based imputation of tumor (All-FIT) is an iterative weighted least square method to estimate specimen tumor purity based on the allele frequencies of variants detected in high-depth, targeted, clinical sequencing data. Using simulated and clinical data, we demonstrate All-FIT's accuracy and improved performance against leading computational approaches, highlighting the importance of interpreting purity estimates based on expected biology of tumors.

Availability and implementation

Freely available at http://software.khiabanian-lab.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +31642507,Deep annotation of untargeted LC-MS metabolomics data with Binner.,"

Motivation

When metabolites are analyzed by electrospray ionization (ESI)-mass spectrometry, they are usually detected as multiple ion species due to the presence of isotopes, adducts and in-source fragments. The signals generated by these degenerate features (along with contaminants and other chemical noise) obscure meaningful patterns in MS data, complicating both compound identification and downstream statistical analysis. To address this problem, we developed Binner, a new tool for the discovery and elimination of many degenerate feature signals typically present in untargeted ESI-LC-MS metabolomics data.

Results

Binner generates feature annotations and provides tools to help users visualize informative feature relationships that can further elucidate the underlying structure of the data. To demonstrate the utility of Binner and to evaluate its performance, we analyzed data from reversed phase LC-MS and hydrophilic interaction chromatography (HILIC) platforms and demonstrated the accuracy of selected annotations using MS/MS. When we compared Binner annotations of 75 compounds previously identified in human plasma samples with annotations generated by three similar tools, we found that Binner achieves superior performance in the number and accuracy of annotations while simultaneously minimizing the number of incorrectly annotated principal ions. Data reduction and pattern exploration with Binner have allowed us to catalog a number of previously unrecognized complex adducts and neutral losses generated during the ionization of molecules in LC-MS. In summary, Binner allows users to explore patterns in their data and to efficiently and accurately eliminate a significant number of the degenerate features typically found in various LC-MS modalities.

Availability and implementation

Binner is written in Java and is freely available from http://binner.med.umich.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +33761814,Are Exclusive e-Cigarette Users Unique? Comparing Predictors of Exclusive e-Cigarette Use with Traditional Tobacco Use and Dual Use among U.S. Adolescents.,"Background: As e-cigarette use rises among U.S. adolescents, the need to understand its risk factors becomes increasingly urgent. If the risk profile of adolescents who exclusively use e-cigarettes differs from those who use traditional tobacco products and dual users, prevention and intervention efforts would need to target such differences. Methods: In a sample of 708 adolescents, this study compared individual, peer, and family risk factors that are classically associated with greater substance use between exclusive e-cigarette users and traditional tobacco product users. Results: Exclusive e-cigarette users and traditional tobacco product users share many risk factors when compared to non-users. Additional analyses compared exclusive e-cigarette users to exclusive traditional tobacco users and dual users, with some differences emerging. Lower friend (OR = 0.28, 99% CI [0.12, 0.67]) and peer e-cigarette use (OR = 0.26, 99% CI [0.13, 0.52]), and greater friend cigarette smoking (OR = 2.17, 99% CI [1.23, 3.83]) predicted higher odds of being an exclusive traditional tobacco user compared to an exclusive e-cigarette user. Lower SES (OR = 0.67, 99% CI [0.51, 0.90]), and greater friend (OR = 2.68, 99% CI [1.56, 4.59]) and peer cigarette smoking (OR = 1.91, 99% CI [1.17, 3.13]) predicted greater odds of being a dual user compared to an exclusive e-cigarette user. Conclusion: Although some differences exist between exclusive e-cigarette users and traditional tobacco users, their risk profiles are generally the same. Prevention and intervention efforts that target traditional tobacco product could guide efforts to target e-cigarette use and dual use. Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1899236.",2021-03-24 +29092947,"An Image Analysis Resource for Cancer Research: PIIP-Pathology Image Informatics Platform for Visualization, Analysis, and Management.","Pathology Image Informatics Platform (PIIP) is an NCI/NIH sponsored project intended for managing, annotating, sharing, and quantitatively analyzing digital pathology imaging data. It expands on an existing, freely available pathology image viewer, Sedeen. The goal of this project is to develop and embed some commonly used image analysis applications into the Sedeen viewer to create a freely available resource for the digital pathology and cancer research communities. Thus far, new plugins have been developed and incorporated into the platform for out of focus detection, region of interest transformation, and IHC slide analysis. Our biomarker quantification and nuclear segmentation algorithms, written in MATLAB, have also been integrated into the viewer. This article describes the viewing software and the mechanism to extend functionality by plugins, brief descriptions of which are provided as examples, to guide users who want to use this platform. PIIP project materials, including a video describing its usage and applications, and links for the Sedeen Viewer, plug-ins, and user manuals are freely available through the project web page: http://pathiip.org Cancer Res; 77(21); e83-86. ©2017 AACR.",2017-11-01 +32222838,Drug vector representation: a tool for drug similarity analysis.,"DrugMatrix is a valuable toxicogenomic dataset, which provides in vivo transcriptome data corresponding to hundreds of chemical drugs. However, the relationships between drugs and how those drugs affect the biological process are still unknown. The high dimensionality of the microarray data hinders its application. The aims of this study are to (1) represent the transcriptome data by lower-dimensional vectors, (2) compare drug similarity, (3) represent drug combinations by adding vectors and (4) infer drug mechanism of action (MoA) and genotoxicity features. We borrowed the latent semantic analysis (LSA) technique from natural language processing to represent treatments (drugs with multiple concentrations and time points) by dense vectors, each dimension of which is an orthogonal biological feature. The gProfiler enrichment tool was used for the 100-dimensional vector feature annotation. The similarity between treatments vectors was calculated by the cosine function. Adding vectors may represent drug combinations, treatment times or treatment doses that are not presented in the original data. Drug-drug interaction pairs had a higher similarity than random drug pairs in the hepatocyte data. The vector features helped to reveal the MoA. Differential feature expression was also implicated for genotoxic and non-genotoxic carcinogens. An easy-to-use Web tool was developed by Shiny Web application framework for the exploration of treatment similarities and drug combinations (https://bioinformatics.fafu.edu.cn/drugmatrix/). We represented treatments by vectors and provided a tool that is useful for hypothesis generation in toxicogenomic, such as drug similarity, drug repurposing, combination therapy and MoA.",2020-03-28 +,S68. UNALTERED FRONTAL AND PREFRONTAL BRAIN RESPONSE DURING WORK MEMORY TASKS IN PATIENTS WITH A FIRST EPISODE PSYCHOSIS META-ANALYSIS STUDY,"Abstract

Background

There is extensive evidence that frontal and prefrontal cortex have abnormal functioning in patients with schizophrenia (Weinberger et al., 2001). For example, with functional magnetic resonance imaging (fMRI), multiple studies have shown altered activation during working memory tasks in these patients compared with controls (Adamczyk et al., 2017; Li et al., 2017). While most of the studies have been conducted in patients with chronic illness, whether these findings translate to individuals at the time of presenting with a First Episode Psychosis (FEP) is less well understood (Soldevila-Matias et al., 2018). The main objective of this study was to meta-analyze fMRI studies that have investigated the brain response to working memory tasks in patients with FEP. These data may be helpful to understand neurobiology of the longitudinal course of working memory dysfunction in patients with schizophrenia.

Methods

We included 19 studies in patients with FEP. We conducted the analysis using anisotropic effect-size seed-based d mapping (AES-SDM, https://www.sdmproject.com/) (Radua et al., 2014). Firstly, AES-SDM used the coordinates and t-values of the peaks of maximum statistical significance reported in the studies to impute a three-dimensional image of the effect-size of the differences in activation between patients and controls, separately for each study. Specifically, it assigned each voxel an effect size that depended on the spatial covariance with the close peaks, whose effect size is known. Secondly, we created a three-dimensional image of the variance of the effect size, again separately for each study. This step is straightforward because the variance of an effect size only depends on the effect size and the samples sizes. Thirdly, we fitted a standard random-effects meta-analysis separately for each voxel.

Results

Patients with FEP showed hypoactivation of median cingulate cortex mainly Brodmann area 32, (peak 4, 26, 40), left precuneus mainly Brodmann area 7 (peak -12,-64, 58), and left anterior insula mainly Brodmann area 47 (peak -36, 18,-12).

Discussion

Our results might point that FEP exhibit altered brain response in some relevant cortical regions. Nevertheless, we didn’t find any significant results in prefrontal and frontal areas in FEP, implying that may be abnormalities in patients, which are a product of degenerative process of the disease. Moreover, the small size of sample may be another possible explanation, which implies more heterogeneous results suggesting that there are no significant results in the frontal and prefrontal lobes. References: • Adamczyk, P., et al., (2017) ‘Neural circuit of verbal humor comprehension in schizophrenia - an fMRI study’, NeuroImage: Clinical. Vol 15: 525–540. • Li, T., et al., (2017) ‘Brain-Wide Analysis of Functional Connectivity in First-Episode and Chronic Stages of Schizophrenia’, Schizophrenia bulletin. Vol 43: Pages 436–448 • Radua, J., et al., (2014) Anisotropic kernels for coordinate-based meta-analyses of neuroimaging studies. Frontiers in Psychiatry. 5: p. 13. • Soldevila Matias, P. et al., (2018) Where is the abnormal brain activity in First Episode Psychosis. Schizophrenia Bulletin. Vol 44; S384. • Weinberger, D. R., Egan, M. F., Bertolino, A., Callicott, J. H., Mattay, V. S., Lipska, B. K., Berman, K. F. and Goldberg, t. e. (2001) ‘Neurobiology of schizophrenia and the role of atypical antipsychotics prefrontal neurons and the genetics of schizophrenia’, Biological Psychiatry, Vol 50, pp. 825–844.",2019-04-01 +27152146,The Non-Coding RNA Ontology (NCRO): a comprehensive resource for the unification of non-coding RNA biology.,"In recent years, sequencing technologies have enabled the identification of a wide range of non-coding RNAs (ncRNAs). Unfortunately, annotation and integration of ncRNA data has lagged behind their identification. Given the large quantity of information being obtained in this area, there emerges an urgent need to integrate what is being discovered by a broad range of relevant communities. To this end, the Non-Coding RNA Ontology (NCRO) is being developed to provide a systematically structured and precisely defined controlled vocabulary for the domain of ncRNAs, thereby facilitating the discovery, curation, analysis, exchange, and reasoning of data about structures of ncRNAs, their molecular and cellular functions, and their impacts upon phenotypes. The goal of NCRO is to serve as a common resource for annotations of diverse research in a way that will significantly enhance integrative and comparative analysis of the myriad resources currently housed in disparate sources. It is our belief that the NCRO ontology can perform an important role in the comprehensive unification of ncRNA biology and, indeed, fill a critical gap in both the Open Biological and Biomedical Ontologies (OBO) Library and the National Center for Biomedical Ontology (NCBO) BioPortal. Our initial focus is on the ontological representation of small regulatory ncRNAs, which we see as the first step in providing a resource for the annotation of data about all forms of ncRNAs. The NCRO ontology is free and open to all users, accessible at: http://purl.obolibrary.org/obo/ncro.owl.",2016-05-04 +33691595,Punch-Drunk or Drunken Boxing? The Etiology of Alcohol-Related Physical Violence through Adolescence and Young Adulthood.,"

Background

Alcohol-related physical violence (ARPV) can be a causal consequence of alcohol consumption, but only for specific individuals (e.g., those predisposed to violence). Studies have not accounted for the shared etiology explaining comorbidity between alcohol use and violent behavior as a potential third-variable explanation of ARPV. The current study examined genetically-informed associations between ARPV, heavy alcohol use (HAU) and overall physical violence (OPV) in adolescence and young adulthood, by testing two proposed theories of ARPV processes (HAU causes ARPV, causal relationships depend upon OPV) and how overarching shared covariance may account for these associations.

Methods

Using the twin and sibling subsample from the National Longitudinal Study of Adolescent to Adult Health (Add Health), a series of biometric models tested hypotheses individually in adolescence and young adulthood. This included estimating bivariate Cholesky and direction-of-causality models, and trivariate Cholesky, independent pathway, and common pathway models.

Results

HAU had a causal effect on ARPV in adolescence and young adulthood. This effect was not moderated by OPV at either developmental stage. A shared etiology or common latent factor did not explain associations between ARPV, OPV, and HAU, even though ARPV strongly covaried independently with HAU and with OPV. Finally, OPV also had a causal effect on ARPV in adolescence, and in young adulthood for adolescent-onset drinkers.

Conclusions

Causal theories of ARPV still hold when accounting for shared genetic and environmental variance. Further research on the exact role of violence (predispositions, environmental contexts) is required, as both phenotypes substantially (and separately) explain influences driving ARPV.Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1887244.",2021-03-10 +31025917,Inclusive Teaching.,"Over the past two decades, science, technology, engineering, and mathematics (STEM) faculty have been striving to make their teaching practices more inclusive and welcoming to the variety of students who enter college. However, many STEM faculty, even those at teaching-focused institutions, have been educated in a traditional environment that emphasizes research and may not include classroom teaching. This can produce a deficit in training that leaves many STEM faculty feeling uncertain about inclusive teaching practices and their essential undergirding principles. This essay describes an online, evidence-based teaching guide ( https://lse.ascb.org/evidence-based-teaching-guides/inclusive-teaching ) intended to help fill this gap, serving as a resource for science faculty as they work to become more inclusive, particular with regard to differences in race, ethnicity, and gender. The guide describes the importance of developing self-awareness and empathy for students as a precursor to considering classroom practices. It also explores the role of classroom climate before turning to pedagogical choices that can support students' sense of belonging, competence, and interest in the course. Finally, the guide suggests that true inclusivity is a community effort and that instructors should leverage local and national networks to maximize student learning and inclusion. Each of these essential points is supported by summaries of and links to articles that can inform these choices. The guide also includes an instructor checklist that offers a concise summary of key points with actionable steps that can guide instructors as they work toward a more inclusive practice. We hope that the guide will provide value for both faculty who are just beginning to consider how to change their teaching practices and faculty seeking to enrich their current efforts.",2019-06-01 +26887375,PlanTE-MIR DB: a database for transposable element-related microRNAs in plant genomes.,"Transposable elements (TEs) comprise a major fraction of many plant genomes and are known to drive their organization and evolution. Several studies show that these repetitive elements have a prominent role in shaping noncoding regions of the genome such as microRNA (miRNA) loci, which are components of post-transcriptional regulation mechanisms. Although some studies have reported initial formation of miRNA loci from TE sequences, especially in model plants, the approaches that were used did not employ systems that would allow results to be delivered by a user-friendly database. In this study, we identified 152 precursor miRNAs overlapping TEs in 10 plant species. PlanTE-MIR DB was designed to assemble this data and deliver it to the scientific community interested in miRNA origin, evolution, and regulation pathways. Users can browse the database through a web interface and search for entries using various parameters. This resource is cross-referenced with repetitive element (Repbase Update) and miRNA (miRBase) repositories, where sequences can be checked for further analysis. All data in PlanTE-MIR DB are publicly available for download in several file formats to facilitate their understanding and use. The database is hosted at http://bioinfo-tool.cp.utfpr.edu.br/plantemirdb/ .",2016-02-18 +30517703,A map of direct TF-DNA interactions in the human genome.,"Chromatin immunoprecipitation followed by sequencing (ChIP-seq) is the most popular assay to identify genomic regions, called ChIP-seq peaks, that are bound in vivo by transcription factors (TFs). These regions are derived from direct TF-DNA interactions, indirect binding of the TF to the DNA (through a co-binding partner), nonspecific binding to the DNA, and noise/bias/artifacts. Delineating the bona fide direct TF-DNA interactions within the ChIP-seq peaks remains challenging. We developed a dedicated software, ChIP-eat, that combines computational TF binding models and ChIP-seq peaks to automatically predict direct TF-DNA interactions. Our work culminated with predicted interactions covering >4% of the human genome, obtained by uniformly processing 1983 ChIP-seq peak data sets from the ReMap database for 232 unique TFs. The predictions were a posteriori assessed using protein binding microarray and ChIP-exo data, and were predominantly found in high quality ChIP-seq peaks. The set of predicted direct TF-DNA interactions suggested that high-occupancy target regions are likely not derived from direct binding of the TFs to the DNA. Our predictions derived co-binding TFs supported by protein-protein interaction data and defined cis-regulatory modules enriched for disease- and trait-associated SNPs. We provide this collection of direct TF-DNA interactions and cis-regulatory modules through the UniBind web-interface (http://unibind.uio.no).",2019-02-01 +32376697,An Extensive Meta-Metagenomic Search Identifies SARS-CoV-2-Homologous Sequences in Pangolin Lung Viromes. ,"In numerous instances, tracking the biological significance of a nucleic acid sequence can be augmented through the identification of environmental niches in which the sequence of interest is present. Many metagenomic data sets are now available, with deep sequencing of samples from diverse biological niches. While any individual metagenomic data set can be readily queried using web-based tools, meta-searches through all such data sets are less accessible. In this brief communication, we demonstrate such a meta-metagenomic approach, examining close matches to the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) in all high-throughput sequencing data sets in the NCBI Sequence Read Archive accessible with the ""virome"" keyword. In addition to the homology to bat coronaviruses observed in descriptions of the SARS-CoV-2 sequence (F. Wu, S. Zhao, B. Yu, Y. M. Chen, et al., Nature 579:265-269, 2020, https://doi.org/10.1038/s41586-020-2008-3; P. Zhou, X. L. Yang, X. G. Wang, B. Hu, et al., Nature 579:270-273, 2020, https://doi.org/10.1038/s41586-020-2012-7), we note a strong homology to numerous sequence reads in metavirome data sets generated from the lungs of deceased pangolins reported by Liu et al. (P. Liu, W. Chen, and J. P. Chen, Viruses 11:979, 2019, https://doi.org/10.3390/v11110979). While analysis of these reads indicates the presence of a similar viral sequence in pangolin lung, the similarity is not sufficient to either confirm or rule out a role for pangolins as an intermediate host in the recent emergence of SARS-CoV-2. In addition to the implications for SARS-CoV-2 emergence, this study illustrates the utility and limitations of meta-metagenomic search tools in effective and rapid characterization of potentially significant nucleic acid sequences.IMPORTANCE Meta-metagenomic searches allow for high-speed, low-cost identification of potentially significant biological niches for sequences of interest.",2020-05-06 +30985983,Genome analyses of the new model protist Euplotes vannus focusing on genome rearrangement and resistance to environmental stressors.,"As a model organism for studies of cell and environmental biology, the free-living and cosmopolitan ciliate Euplotes vannus shows intriguing features like dual genome architecture (i.e., separate germline and somatic nuclei in each cell/organism), ""gene-sized"" chromosomes, stop codon reassignment, programmed ribosomal frameshifting (PRF) and strong resistance to environmental stressors. However, the molecular mechanisms that account for these remarkable traits remain largely unknown. Here we report a combined analysis of de novo assembled high-quality macronuclear (MAC; i.e., somatic) and partial micronuclear (MIC; i.e., germline) genome sequences for E. vannus, and transcriptome profiling data under varying conditions. The results demonstrate that: (a) the MAC genome contains more than 25,000 complete ""gene-sized"" nanochromosomes (~85 Mb haploid genome size) with the N50 ~2.7 kb; (b) although there is a high frequency of frameshifting at stop codons UAA and UAG, we did not observe impaired transcript abundance as a result of PRF in this species as has been reported for other euplotids; (c) the sequence motif 5'-TA-3' is conserved at nearly all internally-eliminated sequence (IES) boundaries in the MIC genome, and chromosome breakage sites (CBSs) are duplicated and retained in the MAC genome; (d) by profiling the weighted correlation network of genes in the MAC under different environmental stressors, including nutrient scarcity, extreme temperature, salinity and the presence of ammonia, we identified gene clusters that respond to these external physical or chemical stimulations, and (e) we observed a dramatic increase in HSP70 gene transcription under salinity and chemical stresses but surprisingly, not under temperature changes; we link this temperature-resistance to the evolved loss of temperature stress-sensitive elements in regulatory regions. Together with the genome resources generated in this study, which are available online at Euplotes vannus Genome Database (http://evan.ciliate.org), these data provide molecular evidence for understanding the unique biology of highly adaptable microorganisms.",2019-06-06 +31096089,"DrugR+: A comprehensive relational database for drug repurposing, combination therapy, and replacement therapy.","Drug repurposing or repositioning, which introduces new applications of the existing drugs, is an emerging field in drug discovery scope. To enhance the success rate of the research and development (R&D) process in a cost- and time-effective manner, a number of pharmaceutical companies worldwide have made tremendous investments. Besides, many researchers have proposed various methods and databases for the repurposing of various drugs. However, there is not a proper and well-organized database available. To this end, for the first time, we developed a new database based on DrugBank and KEGG data, which is named ""DrugR+"". Our developed database provides some advantages relative to the DrugBank, and its interface supplies new capabilities for both single and synthetic repositioning of drugs. Moreover, it includes four new datasets which can be used for predicting drug-target interactions using supervised machine learning methods. As a case study, we introduced novel applications of some drugs and discussed the obtained results. A comparison of several machine learning methods on the generated datasets has also been reported in the Supplementary File. Having included several normalized tables, DrugR + has been organized to provide key information on data structures for the repurposing and combining applications of drugs. It provides the SQL query capability for professional users and an appropriate method with different options for unprofessional users. Additionally, DrugR + consists of repurposing service that accepts a drug and proposes a list of potential drugs for some usages. Taken all, DrugR+ is a free web-based database and accessible using (http://www.drugr.ir), which can be updated through a map-reduce parallel processing method to provide the most relevant information.",2019-05-08 +32491175,Tox21BodyMap: a webtool to map chemical effects on the human body.,"To support rapid chemical toxicity assessment and mechanistic hypothesis generation, here we present an intuitive webtool allowing a user to identify target organs in the human body where a substance is estimated to be more likely to produce effects. This tool, called Tox21BodyMap, incorporates results of 9,270 chemicals tested in the United States federal Tox21 research consortium in 971 high-throughput screening (HTS) assays whose targets were mapped onto human organs using organ-specific gene expression data. Via Tox21BodyMap's interactive tools, users can visualize chemical target specificity by organ system, and implement different filtering criteria by changing gene expression thresholds and activity concentration parameters. Dynamic network representations, data tables, and plots with comprehensive activity summaries across all Tox21 HTS assay targets provide an overall picture of chemical bioactivity. Tox21BodyMap webserver is available at https://sandbox.ntp.niehs.nih.gov/bodymap/.",2020-07-01 +30418645,The MemProtMD database: a resource for membrane-embedded protein structures and their lipid interactions.,"Integral membrane proteins fulfil important roles in many crucial biological processes, including cell signalling, molecular transport and bioenergetic processes. Advancements in experimental techniques are revealing high resolution structures for an increasing number of membrane proteins. Yet, these structures are rarely resolved in complex with membrane lipids. In 2015, the MemProtMD pipeline was developed to allow the automated lipid bilayer assembly around new membrane protein structures, released from the Protein Data Bank (PDB). To make these data available to the scientific community, a web database (http://memprotmd.bioch.ox.ac.uk) has been developed. Simulations and the results of subsequent analysis can be viewed using a web browser, including interactive 3D visualizations of the assembled bilayer and 2D visualizations of lipid contact data and membrane protein topology. In addition, ensemble analyses are performed to detail conserved lipid interaction information across proteins, families and for the entire database of 3506 PDB entries. Proteins may be searched using keywords, PDB or Uniprot identifier, or browsed using classification systems, such as Pfam, Gene Ontology annotation, mpstruc or the Transporter Classification Database. All files required to run further molecular simulations of proteins in the database are provided.",2019-01-01 +33942461,The hop downy mildew pathogen Pseudoperonospora humuli.,"Pseudoperonospora humuli is an obligate biotrophic oomycete that causes downy mildew, one of the most devastating diseases of cultivated hop, Humulus lupulus. Downy mildew occurs in all production areas of the crop in the Northern Hemisphere and Argentina. The pathogen overwinters in hop crowns and roots, and causes considerable crop loss. Downy mildew is managed by sanitation practices, planting of resistant cultivars, and fungicide applications. However, the scarcity of sources of host resistance and fungicide resistance in pathogen populations complicates disease management. This review summarizes the current knowledge on the symptoms of the disease, life cycle, virulence factors, and management of hop downy mildew, including various forecasting systems available in the world. Additionally, recent developments in genomics and effector discovery, and the future prospects of using such resources in successful disease management are also discussed.

Taxonomy

Class: Oomycota; Order: Peronosporales; Family: Peronosporaceae; Genus: Pseudoperonospora; Species: Pseudoperonospora humuli.

Disease symptoms

The disease is characterized by systemically infected chlorotic shoots called ""spikes"". Leaf symptoms and signs include angular chlorotic lesions and profuse sporulation on the abaxial side of the leaf. Under severe disease pressure, dark brown discolouration or lesions are observed on cones. Infected crowns have brown to black streaks when cut open. Cultivars highly susceptible to crown rot may die at this phase of the disease cycle without producing shoots. However, foliar symptoms may not be present on plants with systemically infected root systems.

Infection process

Pathogen mycelium overwinters in buds and crowns, and emerges on infected shoots in spring. Profuse sporulation occurs on infected tissues and sporangia are released and dispersed by air currents. Under favourable conditions, sporangia germinate and produce biflagellate zoospores that infect healthy tissue, thus perpetuating the infection cycle. Though oospores are produced in infected tissues, their role in the infection cycle is not defined.

Control

Downy mildew on hop is managed by a combination of sanitation practices and timely fungicide applications. Forecasting systems are used to time fungicide applications for successful management of the disease. USEFUL WEBSITES: https://content.ces.ncsu.edu/hop-downy-mildew (North Carolina State University disease factsheet), https://www.canr.msu.edu/resources/michigan-hop-management-guide (Michigan Hop Management Guide), http://uspest.org/risk/models (Oregon State University Integrated Plant Protection Center degree-day model for hop downy mildew), https://www.usahops.org/cabinet/data/Field-Guide.pdf (Field Guide for Integrated Pest Management in Hops).",2021-05-04 +30542370,Co-expression Gene Network Analysis and Functional Module Identification in Bamboo Growth and Development.,"Bamboo is one of the fastest-growing non-timber forest plants. Moso bamboo (Phyllostachys edulis) is the most economically valuable bamboo in Asia, especially in China. With the release of the whole-genome sequence of moso bamboo, there are increasing demands for refined annotation of bamboo genes. Recently, large amounts of bamboo transcriptome data have become available, including data on the multiple growth stages of tissues. It is now feasible for us to construct co-expression networks to improve bamboo gene annotation and reveal the relationships between gene expression and growth traits. We integrated the genome sequence of moso bamboo and 78 transcriptome data sets to build genome-wide global and conditional co-expression networks. We overlaid the gene expression results onto the network with multiple dimensions (different development stages). Through combining the co-expression network, module classification and function enrichment tools, we identified 1,896 functional modules related to bamboo development, which covered functions such as photosynthesis, hormone biosynthesis, signal transduction, and secondary cell wall biosynthesis. Furthermore, an online database (http://bioinformatics.cau.edu.cn/bamboo) was built for searching the moso bamboo co-expression network and module enrichment analysis. Our database also includes cis-element analysis, gene set enrichment analysis, and other tools. In summary, we integrated public and in-house bamboo transcriptome data sets and carried out co-expression network analysis and functional module identification. Through data mining, we have yielded some novel insights into the regulation of growth and development. Our established online database might be convenient for the bamboo research community to identify functional genes or modules with important traits.",2018-11-27 +30561546,Seave: a comprehensive web platform for storing and interrogating human genomic variation.,"Motivation:Genome sequencing has had a remarkable impact on our ability to study the effects of human genetic variation, however, variant interpretation remains the major bottleneck. Understanding the potential impact of variants, including structural variants, requires extensive annotation from disparate sources of knowledge, and in silico prediction algorithms. Results:We introduce Seave, an intuitive web platform that enables all types of variants to be securely stored, annotated and filtered. Variants are annotated with allele frequencies and pathogenicity assessments from many popular databases and in silico pathogenicity prediction scores. Seave enables filtering of variants with specific inheritance patterns, including somatic variants, by quality, allele frequencies and gene lists which can be curated and saved. Seave was made for whole genome data and is capable of storing and querying copy number and structural variants. Availability and implementation:To demo Seave with public data, see https://www.seave.bio. Source code is available at http://code.seave.bio and extensive documentation is available at http://documentation.seave.bio. Seave can be locally installed on an Apache server with PHP and MySQL, or we provide an Amazon Machine Image for quick deployment. For commercial and clinical diagnostic licensing, contact the corresponding author. Supplementary information:Supplementary data are available at Bioinformatics online.",2019-01-01 +31156655,Fine-Tuning the Expression of Duplicate Genes by Translational Regulation in Arabidopsis and Maize.,"Plant genomes are extensively shaped by various types of gene duplication. However, in this active area of investigation, the vast majority of studies focus on the sequence and transcription of duplicate genes, leaving open the question of how translational regulation impacts the expression and evolution of duplicate genes. We explored this issue by analyzing the ribo- and mRNA-seq data sets across six tissue types and stress conditions in Arabidopsis thaliana and maize (Zea mays). We dissected the relative contributions of transcriptional and translational regulation to the divergence in the abundance of ribosome footprint (RF) for different types of duplicate genes. We found that the divergence in RF abundance was largely programmed at the transcription level and that translational regulation plays more of a modulatory role. Intriguingly, translational regulation is characterized by its strong directionality, with the divergence in translational efficiency (TE) globally counteracting the divergence in mRNA abundance, indicating partial buffering of the transcriptional divergence between paralogs by translational regulation. Divergence in TE was associated with several sequence features. The faster-evolving copy in a duplicate pair was more likely to show lower RF abundance, which possibly results from relaxed purifying selection compared with its paralog. A considerable proportion of duplicates displayed differential TE across tissue types and stress conditions, most of which were enriched in photosynthesis, energy production, and translation-related processes. Additionally, we constructed a database TDPDG-DB (http://www.plantdupribo.tk), providing an online platform for data exploration. Overall, our study illustrates the roles of translational regulation in fine-tuning duplicate gene expression in plants.",2019-05-08 +31451757,Construction of complete Tupaia belangeri transcriptome database by whole-genome and comprehensive RNA sequencing.,"The northern tree shrew (Tupaia belangeri) possesses high potential as an animal model of human diseases and biology, given its genetic similarity to primates. Although genetic information on the tree shrew has already been published, some of the entire coding sequences (CDSs) of tree shrew genes remained incomplete, and the reliability of these CDSs remained difficult to determine. To improve the determination of tree shrew CDSs, we performed sequencing of the whole-genome, mRNA, and total RNA and integrated the resulting data. Additionally, we established criteria for the selection of reliable CDSs and annotated these sequences by comparison to the human transcriptome, resulting in the identification of complete CDSs for 12,612 tree shrew genes and yielding a more accurate tree shrew genome database (TupaiaBase: http://tupaiabase.org ). Transcriptome profiles in hepatitis B virus infected tree shrew livers were analyzed for validation. Gene ontology analysis showed enriched transcriptional regulation at 1 day post-infection, namely in the ""type I interferon signaling pathway"". Moreover, a negative regulator of type I interferon, SOCS3, was induced. This work, which provides a tree shrew CDS database based on genomic DNA and RNA sequencing, is expected to serve as a powerful tool for further development of the tree shrew model.",2019-08-26 +33604027,Dataset: percent of population covered by local government mask orders in the US.,"We present a dataset covering the extent of local mask orders between April and August 2020, in states which did not have statewide orders (and hence 100% coverage).  We obtained data from national and regional newspaper and broadcaster web-based articles, and city and county web pages. The information that we abstracted included: city or county of ordinance, date that the ordinance took effect, and the population of the city or county. In 14 states, city or county governments issued mask-wearing orders, and from our dataset it can been seen that the median population covered in the states was 37.5%; the coverage ranged from 1.6% (New Hampshire) to 77.1% (Arizona).  The dataset can be accessed from: https://doi.org/10.7939/DVN/A9C1UU.",2020-10-22 +,Landsat-based snow persistence map for northwest Alaska,"Landsat imagery for northwest Alaska from 1 February to 31 August, 1985–2011 was used to map snow persistence at high spatial resolution. We analyzed 11,645 scenes covering 505,800km2, including five Arctic National Park units and the range of the Western Arctic caribou herd (85 Landsat path/rows). A cloud mask was created using the Landsat Ecosystem Disturbance Adaptive Processing System (LEDAPS). Terrain shadows were calculated from ASTER G-DEM2 and solar incidence angle. The presence of snow cover was determined using separate Snowmap algorithms for non-shadowed and shadowed pixels. Resulting snow cover data were reformatted into 562 30×30km tiles, with an average sample size per pixel of 216 cloud-free observations. A binary classification tree was used to successfully determine the day of the year that best marked the change from snow to snow-free conditions for 99.8% of the study area. An internal consistency check evaluating the occurrence of snow-free data earlier than that day or snow data later than that day, showed that 98.7% of the land pixels were consistently classified ≥90% of the time. Comparison with MODIS end of snow season data showed an average difference of 4.2days. The snow persistence map was strongly correlated with the few SNOTEL stations in the study area (r2=0.856). Broadly, most snowmelt over the study area occurs from late April through early June, with timing delayed farther north and at higher elevations. Many local-scale snow patterns are evident in the detailed, 30-m product. The snow persistence map was co-registered to Landsat land cover mapping, creating a powerful, publicly available resource for ecosystem and land use analyses (https://irma.nps.gov/App/Reference/Profile/2203863).",2015-06-01 +32490081,"Dataset on the production of predominantly male tilapia progeny using two malawian tilapias, Oreochromis karongae and Oreochromis shiranus.","A dataset is presented of an experiment that was conducted to compare the proportions of males obtained from hormonal sex reversed pure strain of Oreochromis shiranus and from interspecific hybridization of O.karongae male x O. shiranus female. Part of the data in the dataset were published in a journal article https://doi.org/10.1016/j.aqrep.2020.100274. The data were generated from four SETs of treatments of an experiment that was conducted at the National Aquaculture Center, in Malawi. The first SET of treatment comprised hybrids from interspecific crossing of O.karongae male and O. shiranus female in a pond based breeding hapa 1. SETs 2 and 4 were for fry from hapa spawned pure cross of O.shiranus males and females in a pond based breeding hapa 2. SET 3 comprised fry from pure cross of O. shiranus males and females under controlled temperature (27°C) in an indoor re-circulatory hatchery (Tables 1 to 4). During the first part of the experiment, the fry was raised for 28 days in tanks at three replicates and was fed a fry formulated feed containing 38% crude protein, three times a day. However, for SETs 2 and 3, the feed contained 17α-methyl testosterone at 60mg/kg of feed. The second part of the experiment involved growth performance testing of fry in each SET. The growth experiment was conducted in rearing hapas that were inserted in a common pond for a period of 70 days. The body weight data were collected every 14 days (Tables 1 to 4). On the 70th day, the proportions of males and females in the four SETs was determined using Aceto-carmine staining method (Table 5). The pictures of the stained male and female gonads under compound microscope are presented in the published journal article at https://doi.org/10.1016/j.aqrep.2020.100274. Dataset presented in this paper is for body weights of fish from day 0 to day 70 and proportion of males and females in each SET on the 70th day of the experiment (Tables 1, 2, 3, 4 and 5). The data were analyzed using SPSS version 20.0. Chi-square goodness of fit test was used to investigate if the observed sex ratios significantly deviated from the expected sex ratios at 5% level of significance. The differences among fish body weights were determined using Analysis of Variance and significantly different means were separated using Duncan's multiple range test at the 5% test level of significance. These data are useful for various stakeholders that are interested in sexing juvenile tilapia and for scientists that conduct tilapia sex reversal experiments. This data can also guide hatchery managers during commercial production of all male tilapias which grow faster than those in the mixed sex tilapia culture.",2020-05-18 +32666327,Formation of Vortices in Idealised Branching Vessels: A CFD Benchmark Study.,"

Purpose

Atherosclerosis preferentially occurs near the junction of branching vessels, where blood recirculation tends to occur (Malek et al. in J Am Med Assoc 282(21):2035-2042, 1999, https://doi.org/10.1001/jama.282.21.2035 ). For decades, CFD has been used to predict flow patterns such as separation and recirculation zones in hemodynamic models, but those predictions have rarely been validated with experimental data. In the context of verification and validation (V&V), we first conduct a CFD benchmark calculation that reproduces the vortex detection experiments of Karino and Goldsmith (1980) with idealised branching blood vessels (Karino and Goldsmith in Trans. Am. Soc. Artif. Internal Organs 26:500-506, 1980). The critical conditions for the formation of recirculation vortices, the so-called critical Reynolds numbers, are the main parameters for comparison with the experimental data to demonstrate the credibility of the CFD workflow. We then characterise the wall shear stresses and develop a surrogate model for the size of formed vortices.

Methods

An automated parametric study generating more than 12,000 CFD simulations was performed, sweeping the geometries and flow conditions found in the experiments by Karino and Goldsmith. The flow conditions were restricted to steady-state laminar flow, with a range of inflow Reynolds numbers up to 350, with various flow ratios between the main branch outlet and side branch outlet. The side branch diameter was scaled relative to the main branch diameter, ranging from 1.05/3 to 3/3; and the branching angles ranged in size from [Formula: see text] to [Formula: see text]. Recirculation vortices were detected by the inversion of the velocity vector at certain locations, as well as by the inversion of the wall shear stress (WSS) vector.

Results

The CFD simulations demonstrated good agreement with the experimental data on the critical Reynolds numbers. The spatial distributions of WSS on each branch were analysed to identify potential regions of disease. Once a vortex is formed, the size of the vortex increases by the square root of the Reynolds number. The CFD data was fitted to a surrogate model that accurately predicts the vortex size without the need to run computationally more expensive CFD simulations.

Conclusions

This benchmark study validates the CFD simulation of vortex detection in idealised branching vessels under comprehensive flow conditions. This work also proposes a surrogate model for the size of the vortex, which could reduce the computational requirements in the studies related to branching vessels and complex vascular systems.",2020-07-14 +33471089,SWOTein: a structure-based approach to predict stability Strengths and Weaknesses of prOTEINs. ,"Although structured proteins adopt their lowest free energy conformation in physiological conditions, the individual residues are generally not in their lowest free energy conformation. Residues that are stability weaknesses are often involved in functional regions, whereas stability strengths ensure local structural stability. The detection of strengths and weaknesses provides key information to guide protein engineering experiments aiming to modulate folding and various functional processes. We developed the SWOTein predictor which identifies strong and weak residues in proteins on the basis of three types of statistical energy functions describing local interactions along the chain, hydrophobic forces and tertiary interactions. The large-scale analysis of the different types of strengths and weaknesses demonstrated their complementarity and the enhancement of the information they provide. Moreover, a good average correlation was observed between predicted and experimental strengths and weaknesses obtained from native hydrogen exchange data. SWOTein application to three test cases further showed its suitability to predict and interpret strong and weak residues in the context of folding, conformational changes and protein-protein binding. In summary, SWOTein is both fast and accurate and can be applied at small and large scale to analyze and modulate folding and molecular recognition processes. The SWOTein webserver provides the list of predicted strengths and weaknesses and a protein structure visualization tool that facilitates the interpretation of the predictions. It is freely available for academic use at http://babylone.ulb.ac.be/SWOTein/.",2021-01-20 +31418036,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline on the Management of Patients With Myelomeningocele: Whether Persistent Ventriculomegaly Adversely Impacts Neurocognitive Development.,"

Background

Myelomeningocele (MM) is the most common congenital anomaly to affect the nervous system and affects 1500-2000 newborn infants per year in the United States. It is accompanied by symptomatic hydrocephalus in approximately 70%-80% of patients. Different treatment strategies for hydrocephalus characteristically result in different effects on the size of the ventricles.

Objective

The objective of this systematic review was to determine whether persistent ventricular enlargement adversely impacts neurocognitive development in patients with MM.

Methods

The PubMed National Library of Medicine Medline database and Embase were queried using MeSH headings and keywords relevant to neurocognitive or intellectual development and ventricular size or morphology. Abstracts were reviewed by the authors to identify which studies met strict inclusion criteria. An evidence table was constructed that summarized the included studies and reflected the quality of evidence (Classes I-III) that each represented. A recommendation was made that is based on the quality of the evidence.

Results

An initial abstract review utilizing strict inclusion/exclusion criteria yielded 48 studies, 9 of which underwent full-text review. There is limited and conflicting Class III evidence from 2 studies.

Conclusion

Currently, there is insufficient data to conclude that ventricular size and morphology impact neurocognitive development.The full guideline can be found at https://www.cns.org/guidelines/guidelines-spina-bifida-chapter-5.",2019-09-01 +30984872,Understanding Human-Virus Protein-Protein Interactions Using a Human Protein Complex-Based Analysis Framework. ,"Computational analysis of human-virus protein-protein interaction (PPI) data is an effective way toward systems understanding the molecular mechanism of viral infection. Previous work has mainly focused on characterizing the global properties of viral targets within the entire human PPI network. In comparison, how viruses manipulate host local networks (e.g., human protein complexes) has been rarely addressed from a computational perspective. By mainly integrating information about human-virus PPIs, human protein complexes, and gene expression profiles, we performed a large-scale analysis of virally targeted complexes (VTCs) related to five common human-pathogenic viruses, including influenza A virus subtype H1N1, human immunodeficiency virus type 1, Epstein-Barr virus, human papillomavirus, and hepatitis C virus. We found that viral targets are enriched within human protein complexes. We observed in the context of VTCs that viral targets tended to have a high within-complex degree and to be scaffold and housekeeping proteins. Complexes that are essential for viral propagation were simultaneously targeted by multiple viruses. We characterized the periodic expression patterns of VTCs and provided the corresponding candidates that may be involved in the manipulation of the host cell cycle. As a potential application of the current analysis, we proposed a VTC-based antiviral drug target discovery strategy. Finally, we developed an online VTC-related platform known as VTcomplex (http://zzdlab.com/vtcomplex/index.php or http://systbio.cau.edu.cn/vtcomplex/index.php). We hope that the current analysis can provide new insights into the global landscape of human-virus PPIs at the VTC level and that the developed VTcomplex will become a vital resource for the community. IMPORTANCE Although human protein complexes have been reported to be directly related to viral infection, previous studies have not systematically investigated human-virus PPIs from the perspective of human protein complexes. To the best of our knowledge, we have presented here the most comprehensive and in-depth analysis of human-virus PPIs in the context of VTCs. Our findings confirm that human protein complexes are heavily involved in viral infection. The observed preferences of virally targeted subunits within complexes reflect the mechanisms used by viruses to manipulate host protein complexes. The identified periodic expression patterns of the VTCs and the corresponding candidates could increase our understanding of how viruses manipulate the host cell cycle. Finally, our proposed conceptual application framework of VTCs and the developed VTcomplex could provide new hints to develop antiviral drugs for the clinical treatment of viral infections.",2019-03-01 +32168452,lipidr: A Software Tool for Data Mining and Analysis of Lipidomics Datasets.,"The rapid evolution of mass spectrometry (MS)-based lipidomics has enabled the simultaneous measurement of numerous lipid classes. With lipidomics datasets becoming increasingly available, lipidomic-focused software tools are required to facilitate data analysis as well as mining of public datasets, integrating lipidomics-unique molecular information such as lipid class, chain length, and unsaturation. To address this need, we developed lipidr, an open-source R/Bioconductor package for data mining and analysis of lipidomics datasets. lipidr implements a comprehensive lipidomic-focused analysis workflow for targeted and untargeted lipidomics. lipidr imports numerical matrices, Skyline exports, and Metabolomics Workbench files directly into R, automatically inferring lipid class and chain information from lipid names. Through integration with the Metabolomics Workbench API, users can search, download, and reanalyze public lipidomics datasets seamlessly. lipidr allows thorough data inspection, normalization, and uni- and multivariate analyses, displaying results as interactive visualizations. To enable interpretation of lipid class, chain length, and total unsaturation data, we also developed and implemented a novel lipid set enrichment analysis. A companion online guide with two live example datasets is presented at https://www.lipidr.org/. We expect that the ease of use and innovative features of lipidr will allow the lipidomics research community to gain novel detailed insights from lipidomics data.",2020-03-23 +30629125,Statistical force-field for structural modeling using chemical cross-linking/mass spectrometry distance constraints.,"

Motivation

Chemical cross-linking/mass spectrometry (XLMS) is an experimental method to obtain distance constraints between amino acid residues which can be applied to structural modeling of tertiary and quaternary biomolecular structures. These constraints provide, in principle, only upper limits to the distance between amino acid residues along the surface of the biomolecule. In practice, attempts to use of XLMS constraints for tertiary protein structure determination have not been widely successful. This indicates the need of specifically designed strategies for the representation of these constraints within modeling algorithms.

Results

A force-field designed to represent XLMS-derived constraints is proposed. The potential energy functions are obtained by computing, in the database of known protein structures, the probability of satisfaction of a topological cross-linking distance as a function of the Euclidean distance between amino acid residues. First, the strategy suggests that XL constraints should be set to shorter distances than usually assumed. Second, the complete statistical force-field improves the models obtained and can be easily incorporated into current modeling methods and software. The force-field was implemented and is distributed to be used within the Rosetta ab initio relax protocol.

Availability and implementation

Force-field parameters and usage instructions are freely available online (http://m3g.iqm.unicamp.br/topolink/xlff).

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +33678117,"""Now We Are Seeing the Tides Wash In"": Trauma and the Opioid Epidemic in Rural Appalachian Ohio.","Background: Ohio's opioid epidemic continues to progress, severely affecting its rural Appalachian counties-areas marked by high mortality rates, widespread economic challenges, and a history of extreme opioid overprescribing. Substance use may be particularly prevalent in the region due to interactions between community and interpersonal trauma. Purpose/Objectives: We conducted qualitative interviews to explore the local context of the epidemic and the contributing role of trauma. Methods: Two interviewers conducted in-depth interviews (n= 34) with stakeholders in three rural Appalachian counties, including healthcare and substance use treatment professionals, law enforcement officials, and judicial officials. Semi-structured interview guides focused on the social, economic, and historical context of the opioid epidemic, perceived causes and effects of the epidemic, and ideas for addressing the challenge. Results: Stakeholders revealed three pervasive forms of trauma related to the epidemic in their communities: environmental/community trauma (including economic and historical distress), physical/sexual trauma, and emotional trauma. Traumas interact with one another and with substance use in a self-perpetuating cycle. Although stakeholders in all groups discussed trauma from all three categories, their interpretation and proposed solutions differed, leading to a fragmented epidemic response. Participants also discussed the potential of finding hope and community through efforts to address trauma and substance use. Conclusions: Findings lend support to the cyclical relationship between trauma and substance use, as well as the importance of environmental and community trauma as drivers of the opioid epidemic. Community-level and trauma-informed interventions are needed to increase stakeholder consensus around treatment and prevention strategies, as well as to strengthen community organization networks and support community resilience. Supplemental data for this article is available online at https://doi.org/10.1080/10826084.2021.1887248.",2021-03-07 +32282202,LIT-PCBA: An Unbiased Data Set for Machine Learning and Virtual Screening.,"Comparative evaluation of virtual screening methods requires a rigorous benchmarking procedure on diverse, realistic, and unbiased data sets. Recent investigations from numerous research groups unambiguously demonstrate that artificially constructed ligand sets classically used by the community (e.g., DUD, DUD-E, MUV) are unfortunately biased by both obvious and hidden chemical biases, therefore overestimating the true accuracy of virtual screening methods. We herewith present a novel data set (LIT-PCBA) specifically designed for virtual screening and machine learning. LIT-PCBA relies on 149 dose-response PubChem bioassays that were additionally processed to remove false positives and assay artifacts and keep active and inactive compounds within similar molecular property ranges. To ascertain that the data set is suited to both ligand-based and structure-based virtual screening, target sets were restricted to single protein targets for which at least one X-ray structure is available in complex with ligands of the same phenotype (e.g., inhibitor, inverse agonist) as that of the PubChem active compounds. Preliminary virtual screening on the 21 remaining target sets with state-of-the-art orthogonal methods (2D fingerprint similarity, 3D shape similarity, molecular docking) enabled us to select 15 target sets for which at least one of the three screening methods is able to enrich the top 1%-ranked compounds in true actives by at least a factor of 2. The corresponding ligand sets (training, validation) were finally unbiased by the recently described asymmetric validation embedding (AVE) procedure to afford the LIT-PCBA data set, consisting of 15 targets and 7844 confirmed active and 407,381 confirmed inactive compounds. The data set mimics experimental screening decks in terms of hit rate (ratio of active to inactive compounds) and potency distribution. It is available online at http://drugdesign.unistra.fr/LIT-PCBA for download and for benchmarking novel virtual screening methods, notably those relying on machine learning.",2020-04-23 +32117417,NG-Tax 2.0: A Semantic Framework for High-Throughput Amplicon Analysis.,"NG-Tax 2.0 is a semantic framework for FAIR high-throughput analysis and classification of marker gene amplicon sequences including bacterial and archaeal 16S ribosomal RNA (rRNA), eukaryotic 18S rRNA and ribosomal intergenic transcribed spacer sequences. It can directly use single or merged reads, paired-end reads and unmerged paired-end reads from long range fragments as input to generate de novo amplicon sequence variants (ASV). Using the RDF data model, ASV's can be automatically stored in a graph database as objects that link ASV sequences with the full data-wise and element-wise provenance, thereby achieving the level of interoperability required to utilize such data to its full potential. The graph database can be directly queried, allowing for comparative analyses of over thousands of samples and is connected with an interactive Rshiny toolbox for analysis and visualization of (meta) data. Additionally, NG-Tax 2.0 exports an extended BIOM 1.0 (JSON) file as starting point for further analyses by other means. The extended BIOM file contains new attribute types to include information about the command arguments used, the sequences of the ASVs formed, classification confidence scores and is backwards compatible. The performance of NG-Tax 2.0 was compared with DADA2, using the plugin in the QIIME 2 analysis pipeline. Fourteen 16S rRNA gene amplicon mock community samples were obtained from the literature and evaluated. Precision of NG-Tax 2.0 was significantly higher with an average of 0.95 vs 0.58 for QIIME2-DADA2 while recall was comparable with an average of 0.85 and 0.77, respectively. NG-Tax 2.0 is written in Java. The code, the ontology, a Galaxy platform implementation, the analysis toolbox, tutorials and example SPARQL queries are freely available at http://wurssb.gitlab.io/ngtax under the MIT License.",2019-01-01 +28486658,A high-coverage draft genome of the mycalesine butterfly Bicyclus anynana.,"The mycalesine butterfly Bicyclus anynana, the ""Squinting bush brown,"" is a model organism in the study of lepidopteran ecology, development, and evolution. Here, we present a draft genome sequence for B. anynana to serve as a genomics resource for current and future studies of this important model species. Seven libraries with insert sizes ranging from 350 bp to 20 kb were constructed using DNA from an inbred female and sequenced using both Illumina and PacBio technology; 128 Gb of raw Illumina data was filtered to 124 Gb and assembled to a final size of 475 Mb (∼×260 assembly coverage). Contigs were scaffolded using mate-pair, transcriptome, and PacBio data into 10 800 sequences with an N50 of 638 kb (longest scaffold 5 Mb). The genome is comprised of 26% repetitive elements and encodes a total of 22 642 predicted protein-coding genes. Recovery of a BUSCO set of core metazoan genes was almost complete (98%). Overall, these metrics compare well with other recently published lepidopteran genomes. We report a high-quality draft genome sequence for Bicyclus anynana. The genome assembly and annotated gene models are available at LepBase (http://ensembl.lepbase.org/index.html).",2017-07-01 +30567491,iDEP: an integrated web application for differential expression and pathway analysis of RNA-Seq data.,"BACKGROUND:RNA-seq is widely used for transcriptomic profiling, but the bioinformatics analysis of resultant data can be time-consuming and challenging, especially for biologists. We aim to streamline the bioinformatic analyses of gene-level data by developing a user-friendly, interactive web application for exploratory data analysis, differential expression, and pathway analysis. RESULTS:iDEP (integrated Differential Expression and Pathway analysis) seamlessly connects 63 R/Bioconductor packages, 2 web services, and comprehensive annotation and pathway databases for 220 plant and animal species. The workflow can be reproduced by downloading customized R code and related pathway files. As an example, we analyzed an RNA-Seq dataset of lung fibroblasts with Hoxa1 knockdown and revealed the possible roles of SP1 and E2F1 and their target genes, including microRNAs, in blocking G1/S transition. In another example, our analysis shows that in mouse B cells without functional p53, ionizing radiation activates the MYC pathway and its downstream genes involved in cell proliferation, ribosome biogenesis, and non-coding RNA metabolism. In wildtype B cells, radiation induces p53-mediated apoptosis and DNA repair while suppressing the target genes of MYC and E2F1, and leads to growth and cell cycle arrest. iDEP helps unveil the multifaceted functions of p53 and the possible involvement of several microRNAs such as miR-92a, miR-504, and miR-30a. In both examples, we validated known molecular pathways and generated novel, testable hypotheses. CONCLUSIONS:Combining comprehensive analytic functionalities with massive annotation databases, iDEP ( http://ge-lab.org/idep/ ) enables biologists to easily translate transcriptomic and proteomic data into actionable insights.",2018-12-19 +32692801,"COVID-19 Docking Server: a meta server for docking small molecules, peptides and antibodies against potential targets of COVID-19.","

Motivation

The coronavirus disease 2019 (COVID-19) caused by a new type of coronavirus has been emerging from China and led to thousands of death globally since December 2019. Despite many groups have engaged in studying the newly emerged virus and searching for the treatment of COVID-19, the understanding of the COVID-19 target-ligand interactions represents a key challenge. Herein, we introduce COVID-19 Docking Server, a web server that predicts the binding modes between COVID-19 targets and the ligands including small molecules, peptides and antibodies.

Results

Structures of proteins involved in the virus life cycle were collected or constructed based on the homologs of coronavirus, and prepared ready for docking. The meta-platform provides a free and interactive tool for the prediction of COVID-19 target-ligand interactions and following drug discovery for COVID-19.

Availability and implementation

http://ncov.schanglab.org.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +28172495,LDAP: a web server for lncRNA-disease association prediction.,"

Motivation

Increasing evidences have demonstrated that long noncoding RNAs (lncRNAs) play important roles in many human diseases. Therefore, predicting novel lncRNA-disease associations would contribute to dissect the complex mechanisms of disease pathogenesis. Some computational methods have been developed to infer lncRNA-disease associations. However, most of these methods infer lncRNA-disease associations only based on single data resource.

Results

In this paper, we propose a new computational method to predict lncRNA-disease associations by integrating multiple biological data resources. Then, we implement this method as a web server for lncRNA-disease association prediction (LDAP). The input of the LDAP server is the lncRNA sequence. The LDAP predicts potential lncRNA-disease associations by using a bagging SVM classifier based on lncRNA similarity and disease similarity.

Availability and implementation

The web server is available at http://bioinformatics.csu.edu.cn/ldap

Contact

jxwang@mail.csu.edu.cn.

Supplimentary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +33789437,Spoken Vocabulary Outcomes of Toddlers With Developmental Delay After Parent-Implemented Augmented Language Intervention.,"Purpose Early intervention using augmentative and alternative communication (AAC) supports both receptive and expressive language skills. However, many parents and clinicians still worry that augmented language intervention might delay or impair speech development. This study aimed to (a) characterize and analyze the speech sound development of toddlers with developmental delay who participated in a parent-implemented language intervention; (b) examine the accuracy of speech sounds among toddlers who participated in an augmented language intervention using speech-generating devices and toddlers who participated in a traditional, spoken language intervention; and (c) examine the relationship between baseline factors (i.e., receptive and expressive language skills, vocal imitation, and number of unintelligible utterances) and the number of spoken target vocabulary words after intervention. Method This study used extant data from two randomized control trials of parent-implemented language interventions using AAC or spoken language. Out of 109 children who completed the intervention, 45 children produced spoken target vocabulary words at the end of the intervention. We identified and phonetically transcribed spoken target vocabulary words for each child and then classified them based on Shriberg and Kwiatkowski's (1982) developmental sound classes. Results Children's speech sound accuracy was not significantly different across intervention groups. Overall, children who produced more words had more speech sound errors and higher baseline language scores. Intervention group and baseline receptive and expressive language skills significantly predicted the number of spoken target vocabulary words produced at the end of intervention. Conclusions Participation in AAC intervention resulted in significantly more spoken target vocabulary words and no statistically significant differences in speech sound errors when compared to children who received spoken language intervention without AAC. Results support using AAC interventions for very young children without the fear that it will delay speech or spoken language development. Supplemental Material https://doi.org/10.23641/asha.14265365.",2021-03-31 +33111403,PDB-tools web: A user-friendly interface for the manipulation of PDB files.,"The Protein Data Bank (PDB) file format remains a popular format used and supported by many software to represent coordinates of macromolecular structures. It however suffers from drawbacks such as error-prone manual editing. Because of that, various software toolkits have been developed to facilitate its editing and manipulation, but, to date, there is no online tool available for this purpose. Here we present PDB-Tools Web, a flexible online service for manipulating PDB files. It offers a rich and user-friendly graphical user interface that allows users to mix-and-match more than 40 individual tools from the pdb-tools suite. Those can be combined in a few clicks to perform complex pipelines, which can be saved and uploaded. The resulting processed PDB files can be visualized online and downloaded. The web server is freely available at https://wenmr.science.uu.nl/pdbtools.",2020-11-07 +33741694,Genome-Wide DNA Methylation Profiling of Esophageal Squamous Cell Carcinoma from Global High-Incidence Regions Identifies Crucial Genes and Potential Cancer Markers.,"Epigenetic mechanisms such as aberrant DNA methylation (DNAme) are known to drive esophageal squamous cell carcinoma (ESCC), yet they remain poorly understood. Here, we studied tumor-specific DNAme in ESCC cases from nine high-incidence countries of Africa, Asia, and South America. Infinium MethylationEPIC array was performed on 108 tumors and 51 normal tissues adjacent to the tumors (NAT) in the discovery phase, and targeted pyrosequencing was performed on 132 tumors and 36 NAT in the replication phase. Top genes for replication were prioritized by weighting methylation results using RNA-sequencing data from The Cancer Genome Atlas and GTEx and validated by qPCR. Methylome analysis comparing tumor and NAT identified 6,796 differentially methylated positions (DMP) and 866 differential methylated regions (DMR), with a 30% methylation (Δβ) difference. The majority of identified DMPs and DMRs were hypermethylated in tumors, particularly in promoters and gene-body regions of genes involved in transcription activation. The top three prioritized genes for replication, PAX9, SIM2, and THSD4, had similar methylation differences in the discovery and replication sets. These genes were exclusively expressed in normal esophageal tissues in GTEx and downregulated in tumors. The specificity and sensitivity of these DNAme events in discriminating tumors from NAT were assessed. Our study identified novel, robust, and crucial tumor-specific DNAme events in ESCC tumors across several high-incidence populations of the world. Methylome changes identified in this study may serve as potential targets for biomarker discovery and warrant further functional characterization. SIGNIFICANCE: This largest genome-wide DNA methylation study on ESCC from high-incidence populations of the world identifies functionally relevant and robust DNAme events that could serve as potential tumor-specific markers. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/10/2612/F1.large.jpg.",2021-03-19 +34598409,"Comments on ""Genetic characterization and phylogenetic analysis of Fasciola species based on ITS2 gene sequence, with first molecular evidence of intermediate Fasciola from water buffaloes in Aswan, Egypt"".","Dear Editor-in-Chief, In Annals of Parasitology 2021, 67(1), 55-65, a paper entitled ""Genetic characterization and phylogenetic analysis of Fasciola species based on ITS2 gene sequence, with first molecular evidence of intermediate Fasciola from water buffaloes in Aswan, Egypt"" was published with great interest [1]. After reading the article carefully and critically, we think some points should be noted. Fasciola species are meiotically functional diploid, can produce sperm and temporarily and store in the seminal vesicles. This type is named spermic fluke [2]. On the other hand, intermediate Fasciola with morphological characteristics intermediates between F. hepatica and F. gigantica with no sperm or aspermic and no sperm in seminal vesicles. However, this is also seen in older flukes [3-5]. It seems that morphological studies based on spermatogenesis ability were necessary for this study. Also, this parasite's anthelmintic resistance is due to aspects of biology, and population structure depends on genetic diversity [6]. We question whether there are any documents about and sequences of mitochondrial markers as COX (Cytochrome Oxidase) and NAD (Nicotinamide Adenine Dinucleotide) to analyze intraspecific phylogenetic relationship in addition to nuclear gene? In Table 3, the pairwise distances between three groups of Fasciola spp. from different livestock animals were low, ranging from 0.004 to 0.01 with an overall mean of 0.008. Genetic diversity is described as a tendency of genetic characteristics to vary and serves as a way for the population to adapt to changing hosts and environments [7]. The nature of the nuclear gene (ITS) is instability. It is better to use mitochondrial sequence data to compare diversity. Also, genetic discrimination grade from infra population to meta population is annotated by Fst value ranging; 0 to 1. Fst values between 0-0.05 indicated a low genetic differentiation population [8]. It seems that by calculating Fst and showing the gene migration based on mitochondrial sequences data of specimens, this study's species population will be obtained. Also, Tajima's D and Fu's F in all loci populations based on GenBank data may show the Fasciola haplotypes' population proximity. Here we recommend, that Omar et al. [1] studies that molecular phylogeny with mitochondrial DNA efectively used for appropriate diferentiation of haplotypes and spermatogenic ability by carmen allium staining helps them find the physiological aspects. Of course, more prominent populations are needed to find intermediate types. [1] Omar M.A, Elmajdoub L.O., Ali A.O., Ibrahim D.A., Sorour S.S., Al-Wabel M.A., Suresh M., Metwally A.M. 2021. Genetic characterization and phylogenetic analysis of Fasciola species based on ITS2 gene sequence, with first molecular evidence of intermediate Fasciola from water buffaloes in Aswan, Egypt. Annals of Parasitology 67: 55-65. doi:10.17420/ap6701.312 [2] Sanderson A. 1953. Maturation and probable gynogenesis in the liver fluke, Fasciola hepatica L. Nature 172: 110-112. doi:10.1038/172110a0 [3] Hayashi K., Ichikawa-Seki M., Mohanta U.K., Singh T.S., Shoriki T., Sugiyama H., Itagaki T. 2015. Molecular phylogenetic analysis of Fasciola flukes from eastern India. Parasitology International 64: 334-338. https://doi.org/10.1016/j.parint.2015.04.004 [4] Ichikawa-Seki M., Tokashiki M., Opara M.N., Iroh G., Hayashi K., Kumar U.M., Itagaki T. 2017. Molecular characterization and phylogenetic analysis of Fasciola gigantica from Nigeria. Parasitology International 66: 893-897. doi:10.1016/j.parint.2016.10.010 [5] Rouhani S., Raeghi S., Mirahmadi H., Fasihi Harandi M., Haghighi A., Spotin A. 2017. Identification of Fasciola spp. in the east of Iran, based on the spermatogenesis and nuclear ribosomal DNA (ITS1) and mitochondrial (ND1) genes. Archives of Clinical Infectious Diseases 12:e57283. doi:10.5812/archcid.57283 [6] Hodgkinson J., Cwiklinski K., Beesley N., Paterson S., Williams D., Devaney E. 2013. Identification of putative markers of triclabendazole resistance by a genome-wide analysis of genetically recombinant Fasciola hepatica. Parasitology 140: 1523. doi:10.1017/S0031182013000528 [7] Bozorgomid A., Rouhani S., Harandi M.F., Ichikawa- Seki M., Raeghi S. 2020. Genetic diversity and distribution of Fasciola hepatica haplotypes in Iran: molecular and phylogenetic studies. Veterinary Parasitology: Regional Studies and Reports 19: 00359. [8] Rouhani S., Raeghi S., Spotin A. 2017. Spermatogenic and phylo-molecular characterizations of isolated Fasciola spp. from cattle, North West Iran. Pakistan Journal of Biological Sciences 20: 204-209.",2021-01-01 +29201145,OCDD: an obesity and co-morbid disease database.,"

Background

Obesity is a medical condition that is known for increased body mass index (BMI). It is also associated with chronic low level inflammation. Obesity disrupts the immune-metabolic homeostasis by changing the secretion of adipocytes. This affects the end-organs, and gives rise to several diseases including type 2 diabetes, asthma, non-alcoholic fatty liver diseases and cancers. These diseases are known as co-morbid diseases. Several studies have explored the underlying molecular mechanisms of developing obesity associated comorbid diseases. To understand the development and progression of diseases associated with obesity, we need a detailed scenario of gene interactions and the distribution of the responsible genes in human system.

Results

Obesity and Co-morbid Disease Database (OCDD) is designed for relating obesity and its co-morbid diseases using literature mining, and computational and systems biology approaches. OCDD is aimed to investigate the genes associated with comorbidity. Several existing databases have been used to extract molecular interactions and functional annotations of each gene. The degree of co-morbid associations has been measured and made available to the users. The database is available at http://www.isical.ac.in/~systemsbiology/OCDD/home.php.

Conclusions

The main objective of the database is to derive the relations among the genes that are involved in both obesity and its co-morbid diseases. Functional annotation of common genes, gene interaction networks and key driver analyses have made the database a valuable and comprehensive resource for investigating the causal links between obesity and co-morbid diseases.",2017-11-21 +32647037,"An Analysis of Variability in ""CatWalk"" Locomotor Measurements to Aid Experimental Design and Interpretation.","Preclinical studies in models of neurologic injury and disease rely on behavioral outcomes to measure intervention efficacy. For spinal cord injury, the CatWalk system provides unbiased quantitative assessment of subtle aspects of locomotor function in rodents and so can powerfully detect significant differences between experimental and control groups. Although clearly of key importance, summary group-level data can obscure the variability within and between individual subjects and therefore make it difficult to understand the magnitude of effect in individual animals and the proportion of a group that may show benefit. Here, we calculate reference change intervals (RCIs) that define boundaries of normal variability for measures of rat locomotion on the CatWalk. Our results indicate that many commonly-used outcome measures are highly variable, such that differences of up to 70% from baseline value must be considered normal variation. Many CatWalk outcome variables are also highly correlated and dependent on run speed. Application of calculated RCIs to open access data (https://scicrunch.org/odc-sci) on hindlimb stride length in spinal cord-injured rats illustrates the complementarity between group-level (16 mm change; p = 0.0009) and individual-level (5/32 animals show change outside RCI boundaries) analysis between week 3 and week 6 after injury. We also conclude that interdependence among CatWalk variables implies that test ""batteries"" require careful composition to ensure that different aspects of defective gait are analyzed. Calculation of RCIs aids in experimental design by quantifying variability and enriches overall data analysis by providing details of change at an individual level that complement group-level analysis.",2020-07-01 +32786678,Joint Synovial Fluid Metabolomics Method to Decipher the Metabolic Mechanisms of Adjuvant Arthritis and Geniposide Intervention.,"Rheumatoid arthritis (RA), a chronic systemic autoimmune disease, is mainly characterized by joint lesions and permanent loss of joint function. To discover the metabolic characteristics of RA and the underlying mechanisms in treatment with geniposide (GE), untargeted metabolomic analysis based on hydrophilic interaction liquid chromatography coupled to high-resolution mass spectrometry (HILIC-HRMS) was performed using the joint synovial fluid samples from adjuvant arthritis (AA) rats. Microdialysis (MD) was utilized to collect the dialysate samples precisely from the articular cavity of AA rats. Multivariate statistical analysis was then conducted to discover the metabolite changes induced by AA and to differentiate GE-related biomarkers. The mass spectrometry data are available on the Chorus website (https://chorusproject.org/pages/index.html) with the data set identifier 1680. The results showed that 20 metabolites differed significantly between AA rats and normal rats. GE treatment recovered the altered levels of the 13 metabolites mentioned above, such as palmitoylethanolamide (PEA), Cer (d18:0/22:0), and PC (18:1(11Z)/16:1(9Z)), and normalized glycerophospholipid metabolism. As evidenced by western blotting, the changes in PEA levels adjusted by GE were associated with the down-regulated expression of N-acylethanolamine-hydrolyzing acid amidase (NAAA) in synovial tissues. Taken together, the elucidation of metabolic changes of joint synovial fluid and how this is influenced by GE will promote future therapeutic interventions of RA.",2020-08-21 +26212453,An Interactive Database for the Assessment of Histone Antibody Specificity.,"Access to high-quality antibodies is a necessity for the study of histones and their posttranslational modifications (PTMs). Here we debut the Histone Antibody Specificity Database (http://www.histoneantibodies.com), an online and expanding resource cataloging the behavior of widely used, commercially available histone antibodies by peptide microarray. This interactive web portal provides a critical resource to the biological research community that routinely uses these antibodies as detection reagents for a wide range of applications.",2015-07-23 +30759212,Expanding CSDB_GT glycosyltransferase database with Escherichia coli.,"In 2017, we reported a new database on glycosyltransferase (GT) activities, CSDB_GT (http://csdb.glycoscience.ru/gt.html), which was built at the platform of the Carbohydrate Structure Database (CSDB, http://csdb.glycoscience.ru/database/index.html) and contained data on experimentally confirmed GT activities from Arabidopsis thaliana. All entries in CSDB_GT are curated manually upon the analysis of scientific publications, and the key features of the database are accurate structural, genetic, protein and bibliographic references and close-to-complete coverage on experimentally proven GT activities in selected species. In 2018, CSDB_GT was supplemented with data on Escherichia coli GT activities. Now it contains ca. 800 entries on E. coli GTs, including ca. 550 entries with functions predicted in silico. This information was extracted from research papers published up to the year 2018 or was obtained by the authors' efforts on GT annotation. Thus, CSDB_GT was extended to provide not only experimentally confirmed GT activities, but also those predicted on the basis of gene or protein sequence homology that could carry valuable information. Accordingly, a new confirmation status-predicted in silico-was introduced. In addition, the coverage on A. thaliana was extended up to ca. 900 entries, all of which had experimental confirmation. Currently, CSDB_GT provides close-to-complete coverage on experimentally confirmed GT activities from A. thaliana and E. coli presented up to the year 2018.",2019-04-01 +31465497,Population size estimation for quality control of ChIP-Seq datasets.,"Chromatin immunoprecipitation followed by sequencing, i.e. ChIP-Seq, is a widely used experimental technology for the identification of functional protein-DNA interactions. Nowadays, such databases as ENCODE, GTRD, ChIP-Atlas and ReMap systematically collect and annotate a large number of ChIP-Seq datasets. Comprehensive control of dataset quality is currently indispensable to select the most reliable data for further analysis. In addition to existing quality control metrics, we have developed two novel metrics that allow to control false positives and false negatives in ChIP-Seq datasets. For this purpose, we have adapted well-known population size estimate for determination of unknown number of genuine transcription factor binding regions. Determination of the proposed metrics was based on overlapping distinct binding sites derived from processing one ChIP-Seq experiment by different peak callers. Moreover, the metrics also can be useful for assessing quality of datasets obtained from processing distinct ChIP-Seq experiments by a given peak caller. We also have shown that these metrics appear to be useful not only for dataset selection but also for comparison of peak callers and identification of site motifs based on ChIP-Seq datasets. The developed algorithm for determination of the false positive control metric and false negative control metric for ChIP-Seq datasets was implemented as a plugin for a BioUML platform: https://ict.biouml.org/bioumlweb/chipseq_analysis.html.",2019-08-29 +30165582,Traitpedia: a collaborative effort to gather species traits.,

Summary

Traitpedia is a collaborative database aimed to collect binary traits in a tabular form for a growing number of species.

Availability and implementation

Traitpedia can be accessed from http://cbdm-01.zdv.uni-mainz.de/~munoz/traitpedia.

Supplementary information

Supplementary data are available at Bioinformatics online.,2019-03-01 +28150246,iPTMnet: Integrative Bioinformatics for Studying PTM Networks.,"Protein post-translational modification (PTM) is an essential cellular regulatory mechanism, and disruptions in PTM have been implicated in disease. PTMs are an active area of study in many fields, leading to a wealth of PTM information in the scientific literature. There is a need for user-friendly bioinformatics resources that capture PTM information from the literature and support analyses of PTMs and their functional consequences. This chapter describes the use of iPTMnet ( http://proteininformationresource.org/iPTMnet/ ), a resource that integrates PTM information from text mining, curated databases, and ontologies and provides visualization tools for exploring PTM networks, PTM crosstalk, and PTM conservation across species. We present several PTM-related queries and demonstrate how they can be addressed using iPTMnet.",2017-01-01 +33634169,Development and Validation of a Predictive Model for Coronary Artery Disease Using Machine Learning.,"Early identification of coronary artery disease (CAD) can prevent the progress of CAD and effectually lower the mortality rate, so we intended to construct and validate a machine learning model to predict the risk of CAD based on conventional risk factors and lab test data. There were 3,112 CAD patients and 3,182 controls enrolled from three centers in China. We compared the baseline and clinical characteristics between two groups. Then, Random Forest algorithm was used to construct a model to predict CAD and the model was assessed by receiver operating characteristic (ROC) curve. In the development cohort, the Random Forest model showed a good AUC 0.948 (95%CI: 0.941-0.954) to identify CAD patients from controls, with a sensitivity of 90%, a specificity of 85.4%, a positive predictive value of 0.863 and a negative predictive value of 0.894. Validation of the model also yielded a favorable discriminatory ability with the AUC, sensitivity, specificity, positive predictive value, and negative predictive value of 0.944 (95%CI: 0.934-0.955), 89.5%, 85.8%, 0.868, and 0.886 in the validation cohort 1, respectively, and 0.940 (95%CI: 0.922-0.960), 79.5%, 94.3%, 0.932, and 0.823 in the validation cohort 2, respectively. An easy-to-use tool that combined 15 indexes to assess the CAD risk was constructed and validated using Random Forest algorithm, which showed favorable predictive capability (http://45.32.120.149:3000/randomforest). Our model is extremely valuable for clinical practice, which will be helpful for the management and primary prevention of CAD patients.",2021-02-02 +28365719,Outreach and online training services at the Saccharomyces Genome Database. ,"The Saccharomyces Genome Database (SGD; www.yeastgenome.org ), the primary genetics and genomics resource for the budding yeast S. cerevisiae , provides free public access to expertly curated information about the yeast genome and its gene products. As the central hub for the yeast research community, SGD engages in a variety of social outreach efforts to inform our users about new developments, promote collaboration, increase public awareness of the importance of yeast to biomedical research, and facilitate scientific discovery. Here we describe these various outreach methods, from networking at scientific conferences to the use of online media such as blog posts and webinars, and include our perspectives on the benefits provided by outreach activities for model organism databases. http://www.yeastgenome.org.",2017-01-01 +30649350,Xolik: finding cross-linked peptides with maximum paired scores in linear time.,"

Motivation

Cross-linking technique coupled with mass spectrometry (MS) is widely used in the analysis of protein structures and protein-protein interactions. In order to identify cross-linked peptides from MS data, we need to consider all pairwise combinations of peptides, which is computationally prohibitive when the sequence database is large. To alleviate this problem, some heuristic screening strategies are used to reduce the number of peptide pairs during the identification. However, heuristic screening strategies may miss some true cross-linked peptides.

Results

We directly tackle the combination challenge without using any screening strategies. With the data structure of double-ended queue, the proposed algorithm reduces the quadratic time complexity of exhaustive searching down to the linear time complexity. We implement the algorithm in a tool named Xolik. The running time of Xolik is validated using databases with different numbers of proteins. Experiments using synthetic and empirical datasets show that Xolik outperforms existing tools in terms of running time and statistical power.

Availability and implementation

Source code and binaries of Xolik are freely available at http://bioinformatics.ust.hk/Xolik.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +33780297,A combined health action process approach and mHealth intervention to reduce sedentary behaviour in university students - a randomized controlled trial.,"Objective: This investigation evaluated the effectiveness of a Health Action Process Approach (HAPA) based planning intervention augmented with text messages to reduce student-related sitting time (primary outcome) and increase specific non-sedentary behaviours. Relationships between the HAPA volitional constructs and sedentary and non-sedentary behaviours were also explored. Design: University students (Mage = 21.13 y; SD = 4.81) were randomized into either a HAPA intervention (n = 28) or control (n = 33) condition. Main Outcome Measures: School-related sitting time, time spent in specific non-sedentary behaviours and HAPA volitional constructs were assessed at baseline, weeks 2, 4, 6 (post-intervention) and 8 (follow-up). Results: Significant group by time interaction effects favouring the intervention group were found for sitting time (p = 0.004, ɳp2 = 0.10), walking time (p = 0.021, ɳp2 = 0.06) and stretching time (p = 0.023, ɳp2 = 0.08), as well as for action planning (p < 0.001, ɳp2 = 0.17), coping planning (p < 0.001, ɳp2 = 0.20) and action control (p < 0.001, ɳp2 = 0.20). Significant correlations (p < 0.05) were also found between the HAPA constructs and sitting-related outcomes. Conclusions: Combining a HAPA-based planning intervention with text messages can reduce student-related sitting time in university students.Supplemental data for this article is available online at https://doi.org/10.1080/08870446.2021.1900574 .",2021-03-29 +32435399,ProBiS H2O MD Approach for Identification of Conserved Water Sites in Protein Structures for Drug Design.,"The ProBiS H2O MD approach for identification of conserved waters and water sites of interest in macromolecular systems, which is becoming a typical step in a structure-based drug design or macromolecular study in general, is described. This work explores an extension of the ProBiS H2O approach introduced by Jukič et al. Indeed, water molecules are key players in the interaction mechanisms of macromolecules and small molecules and play structural roles. Our earlier developed approach, ProBiS H2O, is a simple and transparent workflow for conserved water detection. Here we have considered generalizing the idea by supplementing the experimental data with data derived from molecular dynamics to facilitate work on less known systems. Newly developed ProBiS H2O MD workflow uses trajectory data, extracts and identifies interesting water sites, and visualizes the results. ProBiS H2O MD can thus robustly process molecular dynamic trajectory snapshots, perform local superpositions, collect water location data, and perform density-based clustering to identify discrete sites with high conservation of water molecules. This is a new approach that uses experimental data in silico to identify interesting water sites. Methodology is fast and water-model or molecular dynamics software independent. Trends in the conservation of water molecules can be followed over a variety of trajectories, and our approach has been successfully validated using reported protein systems with experimentally observed conserved water molecules. ProBiS H2O MD is freely available as PyMOL plugin at http://insilab.org.",2020-03-19 +32008461,Does Every Student Count! in Your State? Update on the National School Nurse Dataset.,"The NASN launched a new data initiative in 2018 called: The National School Health Data Set: Every Student Counts! The initiative includes three distinct foci or prongs. This article reports on the progress of states participating in Every Student Counts! For more information on NASN's initiative and to learn how school nurses can join the data revolution, go to http://nasn.org/everystudentcounts .",2020-02-01 +32838035,Rapid translation of clinical guidelines into executable knowledge: A case study of COVID-19 and online demonstration.,"

Introduction

We report a pathfinder study of AI/knowledge engineering methods to rapidly formalise COVID-19 guidelines into an executable model of decision making and care pathways. The knowledge source for the study was material published by BMJ Best Practice in March 2020.

Methods

The PROforma guideline modelling language and OpenClinical.net authoring and publishing platform were used to create a data model for care of COVID-19 patients together with executable models of rules, decisions and plans that interpret patient data and give personalised care advice.

Results

PROforma and OpenClinical.net proved to be an effective combination for rapidly creating the COVID-19 model; the Pathfinder 1 demonstrator is available for assessment at https://www.openclinical.net/index.php?id=746.

Conclusions

This is believed to be the first use of AI/knowledge engineering methods for disseminating best-practice in COVID-19 care. It demonstrates a novel and promising approach to the rapid translation of clinical guidelines into point of care services, and a foundation for rapid learning systems in many areas of healthcare.",2020-07-14 +33505515,iBLP: An XGBoost-Based Predictor for Identifying Bioluminescent Proteins.,"Bioluminescent proteins (BLPs) are a class of proteins that widely distributed in many living organisms with various mechanisms of light emission including bioluminescence and chemiluminescence from luminous organisms. Bioluminescence has been commonly used in various analytical research methods of cellular processes, such as gene expression analysis, drug discovery, cellular imaging, and toxicity determination. However, the identification of bioluminescent proteins is challenging as they share poor sequence similarities among them. In this paper, we briefly reviewed the development of the computational identification of BLPs and subsequently proposed a novel predicting framework for identifying BLPs based on eXtreme gradient boosting algorithm (XGBoost) and using sequence-derived features. To train the models, we collected BLP data from bacteria, eukaryote, and archaea. Then, for getting more effective prediction models, we examined the performances of different feature extraction methods and their combinations as well as classification algorithms. Finally, based on the optimal model, a novel predictor named iBLP was constructed to identify BLPs. The robustness of iBLP has been proved by experiments on training and independent datasets. Comparison with other published method further demonstrated that the proposed method is powerful and could provide good performance for BLP identification. The webserver and software package for BLP identification are freely available at http://lin-group.cn/server/iBLP.",2021-01-07 +27387304,Elucidating and mining the Tulipa and Lilium transcriptomes.,"Genome sequencing remains a challenge for species with large and complex genomes containing extensive repetitive sequences, of which the bulbous and monocotyledonous plants tulip and lily are examples. In such a case, sequencing of only the active part of the genome, represented by the transcriptome, is a good alternative to obtain information about gene content. In this study we aimed to generate a high quality transcriptome of tulip and lily and to make this data available as an open-access resource via a user-friendly web-based interface. The Illumina HiSeq 2000 platform was applied and the transcribed RNA was sequenced from a collection of different lily and tulip tissues, respectively. In order to obtain good transcriptome coverage and to facilitate effective data mining, assembly was done using different filtering parameters for clearing out contamination and noise of the RNAseq datasets. This analysis revealed limitations of commonly applied methods and parameter settings used in de novo transcriptome assembly. The final created transcriptomes are publicly available via a user friendly Transcriptome browser ( http://www.bioinformatics.nl/bulbs/db/species/index ). The usefulness of this resource has been exemplified by a search for all potential transcription factors in lily and tulip, with special focus on the TCP transcription factor family. This analysis and other quality parameters point out the quality of the transcriptomes, which can serve as a basis for further genomics studies in lily, tulip, and bulbous plants in general.",2016-07-07 +33906377,Salt Transiently Inhibits Mitochondrial Energetics in Mononuclear Phagocytes.,"

Background

Dietary high salt (HS) is a leading risk factor for mortality and morbidity. Serum sodium transiently increases postprandially but can also accumulate at sites of inflammation affecting differentiation and function of innate and adaptive immune cells. Here, we focus on how changes in extracellular sodium, mimicking alterations in the circulation and tissues, affect the early metabolic, transcriptional, and functional adaption of human and murine mononuclear phagocytes.

Methods

Using Seahorse technology, pulsed stable isotope-resolved metabolomics, and enzyme activity assays, we characterize the central carbon metabolism and mitochondrial function of human and murine mononuclear phagocytes under HS in vitro. HS as well as pharmacological uncoupling of the electron transport chain under normal salt is used to analyze mitochondrial function on immune cell activation and function (as determined by Escherichia coli killing and CD4+ T cell migration capacity). In 2 independent clinical studies, we analyze the effect of a HS diet during 2 weeks (URL: http://www.clinicaltrials.gov. Unique identifier: NCT02509962) and short-term salt challenge by a single meal (URL: http://www.clinicaltrials.gov. Unique identifier: NCT04175249) on mitochondrial function of human monocytes in vivo.

Results

Extracellular sodium was taken up into the intracellular compartment, followed by the inhibition of mitochondrial respiration in murine and human macrophages. Mechanistically, HS reduces mitochondrial membrane potential, electron transport chain complex II activity, oxygen consumption, and ATP production independently of the polarization status of macrophages. Subsequently, cell activation is altered with improved bactericidal function in HS-treated M1-like macrophages and diminished CD4+ T cell migration in HS-treated M2-like macrophages. Pharmacological uncoupling of the electron transport chain under normal salt phenocopies HS-induced transcriptional changes and bactericidal function of human and murine mononuclear phagocytes. Clinically, also in vivo, rise in plasma sodium concentration within the physiological range reversibly reduces mitochondrial function in human monocytes. In both a 14-day and single meal HS challenge, healthy volunteers displayed a plasma sodium increase of [Formula: see text] and [Formula: see text] respectively, that correlated with decreased monocytic mitochondrial oxygen consumption.

Conclusions

Our data identify the disturbance of mitochondrial respiration as the initial step by which HS mechanistically influences immune cell function. Although these functional changes might help to resolve bacterial infections, a shift toward proinflammation could accelerate inflammatory cardiovascular disease.",2021-04-28 +31921279,A Vision for Development and Utilization of High-Throughput Phenotyping and Big Data Analytics in Livestock.,"Automated high-throughput phenotyping with sensors, imaging, and other on-farm technologies has resulted in a flood of data that are largely under-utilized. Drastic cost reductions in sequencing and other omics technology have also facilitated the ability for deep phenotyping of livestock at the molecular level. These advances have brought the animal sciences to a cross-roads in data science where increased training is needed to manage, record, and analyze data to generate knowledge and advances in Agriscience related disciplines. This paper describes the opportunities and challenges in using high-throughput phenotyping, ""big data,"" analytics, and related technologies in the livestock industry based on discussions at the Livestock High-Throughput Phenotyping and Big Data Analytics meeting, held in November 2017 (see: https://www.animalgenome.org/bioinfo/community/workshops/2017/). Critical needs for investments in infrastructure for people (e.g., ""big data"" training), data (e.g., data transfer, management, and analytics), and technology (e.g., development of low cost sensors) were defined by this group. Though some subgroups of animal science have extensive experience in predictive modeling, cross-training in computer science, statistics, and related disciplines are needed to use big data for diverse applications in the field. Extensive opportunities exist for public and private entities to harness big data to develop valuable research knowledge and products to the benefit of society under the increased demands for food in a rapidly growing population.",2019-12-17 +31378201,"Understanding the interactions between iron supplementation, infectious disease and adverse birth outcomes is essential to guide public health recommendations.","Pregnant women are highly susceptible to anaemia and iron deficiency due to the increased demands of pregnancy as well as other factors. Iron supplementation is recommended in pregnancy, yet the benefits on newborn outcomes are variable between populations, most likely due to the heterogeneity in the prevalence of iron deficiency, detrimental birth outcomes and infectious diseases. Furthermore, there are concerns regarding iron supplementation in malaria-endemic areas due to reports of increased risk of malaria in those receiving iron. This is compounded by limited knowledge of how iron deficiency, anaemia, malaria, and other infections may interact to influence birth outcomes. In a recent cohort study in Papua New Guinea, where there is a high burden of infections and iron deficiency, we found that iron deficiency in pregnancy was associated with a reduced risk of adverse birth outcomes. However, this effect could not be wholly explained by interactions between iron deficiency and malaria. We proposed that iron deficiency may confer a degree of protection against other infectious pathogens, which in turn caused improvements in birthweight. We argue that further studies in multiple populations are crucial to elucidate interactions between iron status, iron supplementation and birthweight as well as to understand the context-specific benefits of iron supplementation in pregnancy and inform public policy. Focus should be given to haematological studies on anaemia, haemodilution and iron absorption, as well as investigating infectious diseases and other nutritional deficiencies. This is a particular priority in resource-constrained settings where the prevalence of iron deficiency, poor nutrition, infections and poor birth outcomes are high. While current recommendations of iron supplementation and malaria prophylaxis to reduce the burden of poor pregnancy outcomes should be supported, the strength of evidence underpinning these must be improved and new insights should be garnered in order to maximise improvements in maternal and child health.Please see related article: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-018-1146-z .Please see related article: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-019-1375-9 .",2019-08-05 +33252892,The decision-making process in recommending electronic communication aids for children and young people who are non-speaking: the I-ASC mixed-methods study,"

Background

This project [Identifying Appropriate Symbol Communication (I-ASC)] explored UK decision-making practices related to communication aid recommendations for children and young people who are non-speaking. Research evidence related to communication aid decision-making is limited. The research aims were to increase understanding of influencers on the decision-making process in recommending electronic communication aids, and to develop guidance tools to support decision-making. An additional, post hoc aim was to evaluate the public involvement contribution to the I-ASC project. The research focused on the identification of attributes and characteristics that professionals, family members and those who use communication aids considered important in the recommendation process. Findings informed the development of guidance resources. The evaluation of public involvement focused on what could be learned from a nationally funded project with involvement from public contributors typically regarded as hard to include.

Methodology

For the clinical decision-making component, the methodological investigation adopted a three-tier approach with three systematic reviews, a qualitative exploration of stakeholder perspectives through focus groups and interviews, and a quantitative investigation surveying professionals’ perspectives. The public involvement evaluation adopted a mixed-methods approach. A total of 354 participants contributed to the decision-making data set, including professionals, family members, and children, young people and adults who use communication aids; 22 participants contributed to the public involvement evaluation. The literature review process followed the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) guidelines. Thematic analysis and framework approach supported the analysis of qualitative data. Two stated preference surveys, a best–worst scaling and a discrete choice experiment, allowed the relative importance of factors in decision-making to be determined. Analysis was grounded in random utility theory.

Public involvement

Two public involvement co-researchers, an adult using a symbol communication aid and a parent of a communication aid user, were core members of the research team. The I-ASC public involvement resulted in an additional award to evaluate the impact of public involvement across the project.

Results

Factors influencing decision-making are not always under the control of the decision-makers, for example professional knowledge, referral criteria and service structure. Findings suggest that real clinical decisions contrast with hypothetical decisions. Survey responses indicated that children’s physical characteristics are less important than their language, communication and learning abilities; however, during real-time decision-making, the opposite appeared to be true, with access needs featuring most prominently. In contrast to professionals’ decisions, users and family members prioritise differing aesthetic attributes of communication aids. Time allocated to system learning remains underspecified. The research informed the development of decision-making guidance tools (https://iasc.mmu.ac.uk/; accessed 8 June 2020). A public involvement evaluation suggests that successful public involvement of individuals with disabilities requires significant resources that include staff time, training and personal support (https://iasc.mmu.ac.uk/publicinvolvement; accessed 8 June 2020).

Future work

Further research is needed in the areas of language assessment, communication aid attributes, types of decision-making episodes and service user perspectives. These data highlight the need for mechanisms that enable public involvement co-researchers to be paid for their contributions to research bid preparation.

Limitations

Individuals who benefit from communication aids are a heterogeneous group. We cannot guarantee that this study has captured all relevant components of decision-making.

Funding

This project was funded by the National Institute for Health Research (NIHR) Health Services and Delivery Research programme and will be published in full in Health Services and Delivery Research; Vol. 8, No. 45. See the NIHR Journals Library website for further project information.",2020-12-01 +34040281,Political Ideology Modifies the Effect of Glass Cliff Candidacies on Election Outcomes for Women in American State Legislative Races (2011-2016).,"Research on glass cliff political candidacies shows that compared to men, women are more likely to run for office in districts where they are likely to lose. We examined if party differences in whether female candidates face these worse conditions in the United States could account for persistent and growing party and state variation in women's representation. Using election data from 2011 to 2016, we compared Republican versus Democratic candidacies at the state legislative level. We found that women in both parties faced glass cliffs in House races, but not in the Senate. For Republican women, glass cliff conditions accounted for worse election outcomes, but Democratic women were more likely to win when these conditions were considered. Variation in party by state measures of glass cliff effects were also found to explain state variation in women's office holding. We found that for Democrats, more women win when more women run, but for Republicans, more women win only when the seats they face are more winnable. These results point to the role of polarized traditional versus progressive political ideologies in structuring the motives which underlie glass cliff conditions for women in politics, suggesting that practical solutions be tailored to party. To overcome the growing gap in women's representation, current efforts to increase the quantity of women running would be complemented by a focus on improving the quality of contests they face, with Republican women most likely to benefit. Further research attending to the multiple sources of variation which impact gendered election outcomes can inform more targeted solutions for advancing equality. Online slides for instructors who want to use this article for teaching are available on PWQ's website at http://journals.sagepub.com/doi/suppl/10.1177/0361684321992046.",2021-03-02 +30334203,Finding Potential Multitarget Ligands Using PubChem.,"PubChem ( https://pubchem.ncbi.nlm.nih.gov ) is a key chemical information resource, developed and maintained by the US National Institutes of Health. The present chapter describes how to find potential multitarget ligands from PubChem that would be tested in further experiments. While the protocol presented here uses PubChem's Web-based interfaces to allow users to follow it interactively, it can also be implemented in computer software by using programmatic access interfaces to PubChem (such as PUG-REST or E-Utilities).",2018-01-01 +32583172,An Evaluation of Florida's Zika Response Using the WHO Health Systems Framework: Can We Apply These Lessons to COVID-19?,"

Objectives

From 2016 to 2018 Florida documented 1471 cases of Zika virus, 299 of which were pregnant women (Florida Department of Health, https://www.floridahealth.gov/diseases-and-conditions/mosquito-bornediseases/surveillance.html , 2019a). Florida's response required unprecedented rapid and continuous cross-sector communication, adaptation, and coordination. Zika tested public health systems in new ways, particularly for maternal child health populations. The systems are now being challenged again, as the Coronavirus COVID-19 pandemic spreads throughout Florida. This qualitative journey mapping evaluation of Florida's response focused on care for pregnant women and families with infants exposed to Zika virus.

Methods

Fifteen focus groups and interviews were conducted with 33 public health and healthcare workers who managed outbreak response, case investigations, and patient care in south Florida. Data were thematically analyzed, and the results were framed by the World Health Organization's (WHO) Healthcare Systems Framework of six building blocks: health service delivery, health workforce, health information systems, access to essential medicines, financing, and leadership and governance (World Health Organization, https://www.who.int/healthsystems/strategy/everybodys_business.pdf , 2007, https://www.who.int/healthinfo/systems/monitoring/en/ , 2010).

Results

Results highlighted coordination of resources, essential services and treatment, data collection, communication among public health and healthcare systems, and dissemination of information. Community education, testing accuracy and turnaround time, financing, and continuity of health services were areas of need, and there was room for improvement in all indicator areas.

Conclusions

The WHO Framework encapsulated important infrastructure and process factors relevant to the Florida Zika response as well as future epidemics. In this context, similarities, differences, and implications for the Coronavirus COVID-19 pandemic response are discussed.",2020-10-01 +32777102,The Musculoskeletal Knowledge Portal: Making Omics Data Useful to the Broader Scientific Community.,"The development of high-throughput genotyping technologies and large biobank collections, complemented with rapid methodological advances in statistical genetics, has enabled hypothesis-free genome-wide association studies (GWAS), which have identified hundreds of genetic variants across many loci associated with musculoskeletal conditions. Similarly, basic scientists have valuable molecular cellular and animal data based on musculoskeletal disease that would be enhanced by being able to determine the human translation of their findings. By integrating these large-scale human genomic musculoskeletal datasets with complementary evidence from model organisms, new and existing genetic loci can be statistically fine-mapped to plausibly causal variants, candidate genes, and biological pathways. Genes and pathways identified using this approach can be further prioritized as drug targets, including side-effect profiling and the potential for new indications. To bring together these big data, and to realize the vision of creating a knowledge portal, the International Federation of Musculoskeletal Research Societies (IFMRS) established a working group to collaborate with scientists from the Broad Institute to create the Musculoskeletal Knowledge Portal (MSK-KP)(http://mskkp.org/). The MSK consolidates omics datasets from humans, cellular experiments, and model organisms into a central repository that can be accessed by researchers. The vision of the MSK-KP is to enable better understanding of the biological mechanisms underlying musculoskeletal disease and apply this knowledge to identify and develop new disease interventions. © 2020 American Society for Bone and Mineral Research (ASBMR).",2020-09-01 +31970275,"Data on the effects of filters, storage conditions, and chlorination in fluorescence and absorbance wastewater measurements.","Data presented in this data article show artifacts (bias and error) that influence fluorescence measurement of dissolved organic matter (DOM) due to samples handling and storage. Data show interferences in fluorescence measurements related to filtration of water by different filter materials, including 0.7 μm glass microfiber filter, 0.45 μm polyvinylidene fluoride (PVDF) membrane, 0.45 μm cellulose nitrate membrane, and 0.45 μm polyethersulfone (PES) syringe filter. Data show also changes of several fluorescence indexes and UV absorbance measurements of wastewater organic matter respect to time under different storage conditions. Particularly, spectroscopic data were acquired using 0.7 μm filtered and unfiltered wastewater samples stored at different temperatures (i.e, room temperature, 4 °C, -20 °C) over a testing period of 21 days. Finally, data show the effect of chlorine disinfection (doses of 0.5-8 mg/L) in fluorescence measurements accomplished in samples from two secondary wastewater effluents. Data of this article are related to the publication ""M. Sgroi, E. Gagliano, F.G.A. Vagliasindi, P. Roccaro, Absorbance and EEM fluorescence of wastewater: effects of filters, storage conditions, and chlorination, Chemosphere, 243, 2020, 125292 [1]"". Raw data are available in a public repository (https://doi.org/10.17632/pf86xs7ybk.1).",2020-01-07 +25723102,Gateways to the FANTOM5 promoter level mammalian expression atlas.,"The FANTOM5 project investigates transcription initiation activities in more than 1,000 human and mouse primary cells, cell lines and tissues using CAGE. Based on manual curation of sample information and development of an ontology for sample classification, we assemble the resulting data into a centralized data resource (http://fantom.gsc.riken.jp/5/). This resource contains web-based tools and data-access points for the research community to search and extract data related to samples, genes, promoter activities, transcription factors and enhancers across the FANTOM5 atlas.",2015-01-05 +33402389,Hypoxia-Induced Suppression of Alternative Splicing of MBD2 Promotes Breast Cancer Metastasis via Activation of FZD1.,"Metastasis is responsible for the majority of breast cancer-related deaths, however, the mechanisms underlying metastasis in this disease remain largely elusive. Here we report that under hypoxic conditions, alternative splicing of MBD2 is suppressed, favoring the production of MBD2a, which facilitates breast cancer metastasis. Specifically, MBD2a promoted, whereas its lesser known short form MBD2c suppressed metastasis. Activation of HIF1 under hypoxia facilitated MBD2a production via repression of SRSF2-mediated alternative splicing. As a result, elevated MBD2a outcompeted MBD2c for binding to promoter CpG islands to activate expression of FZD1, thereby promoting epithelial-to-mesenchymal transition and metastasis. Strikingly, clinical data reveal significantly correlated expression of MBD2a and MBD2c with the invasiveness of malignancy, indicating opposing roles for MBD2 splicing variants in regulating human breast cancer metastasis. Collectively, our findings establish a novel link between MBD2 switching and tumor metastasis and provide a promising therapeutic strategy and predictive biomarkers for hypoxia-driven breast cancer metastasis. SIGNIFICANCE: This study defines the opposing roles and clinical relevance of MBD2a and MBD2c, two MBD2 alternative splicing products, in hypoxia-driven breast cancer metastasis. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/5/1265/F1.large.jpg.",2021-01-05 +33263569,"The X-ray crystal structure of the N-terminal domain of Ssr4, a Schizosaccharomyces pombe chromatin-remodelling protein.","Ssr4 is a yeast protein from Schizosaccharomyces pombe and is an essential part of the chromatin-remodelling [SWI/SNF and RSC (remodelling the structure of chromatin)] complexes found in S. pombe. These complexes (or their homologues) regulate gene expression in eukaryotic organisms, affecting a large number of genes both positively and negatively. The downstream effects are seen in development, and in humans have implications for disease such as cancer. The chromatin structure is altered by modifying the DNA-histone contacts, thus opening up or closing down sections of DNA to specific transcription factors that regulate the transcription of genes. The Ssr4 sequence has little homology to other sequences in the Protein Data Bank, so the structure was solved using an iodine derivative with SAD phasing. The structure of the N-terminal domain is an antiparallel β-sheet of seven strands with α-helices on one side and random coil on the other. The structure is significantly different to deposited structures and was used as a target in the most recent Critical Assessment of Techniques for Protein Structure Prediction (CASP; https://predictioncenter.org/) competition.",2020-11-25 +33479725,"An integrated approach to determine the abundance, mutation rate and phylogeny of the SARS-CoV-2 genome.","The analysis of the SARS-CoV-2 genome datasets has significantly advanced our understanding of the biology and genomic adaptability of the virus. However, the plurality of advanced sequencing datasets-such as short and long reads-presents a formidable computational challenge to uniformly perform quantitative, variant or phylogenetic analysis, thus limiting its application in public health laboratories engaged in studying epidemic outbreaks. We present a computational tool, Infectious Pathogen Detector (IPD), to perform integrated analysis of diverse genomic datasets, with a customized analytical module for the SARS-CoV-2 virus. The IPD pipeline quantitates individual occurrences of 1060 pathogens and performs mutation and phylogenetic analysis from heterogeneous sequencing datasets. Using IPD, we demonstrate a varying burden (5.055-999655.7 fragments per million) of SARS-CoV-2 transcripts across 1500 short- and long-read sequencing SARS-CoV-2 datasets and identify 4634 SARS-CoV-2 variants (~3.05 variants per sample), including 449 novel variants, across the genome with distinct hotspot mutations in the ORF1ab and S genes along with their phylogenetic relationships establishing the utility of IPD in tracing the genome isolates from the genomic data (as accessed on 11 June 2020). The IPD predicts the occurrence and dynamics of variability among infectious pathogens-with a potential for direct utility in the COVID-19 pandemic and beyond to help automate the sequencing-based pathogen analysis and in responding to public health threats, efficaciously. A graphical user interface (GUI)-enabled desktop application is freely available for download for the academic users at http://www.actrec.gov.in/pi-webpages/AmitDutt/IPD/IPD.html and for web-based processing at http://ipd.actrec.gov.in/ipdweb/ to generate an automated report without any prior computational know-how.",2021-03-01 +32657917,Predicting the Outcome of Limb Revascularization in Patients With Lower-extremity Arterial Trauma: Development and External Validation of a Supervised Machine-learning Algorithm to Support Surgical Decisions.,"

Objectives

Estimating the likely success of limb revascularization in patients with lower-extremity arterial trauma is central to decisions between attempting limb salvage and amputation. However, the projected outcome is often unclear at the time these decisions need to be made, making them difficult and threatening sound judgement. The objective of this study was to develop and validate a prediction model that can quantify an individual patient's risk of failed revascularization.

Methods

A BN prognostic model was developed using domain knowledge and data from the US joint trauma system. Performance (discrimination, calibration, and accuracy) was tested using ten-fold cross validation and externally validated on data from the UK Joint Theatre Trauma Registry. BN performance was compared to the mangled extremity severity score.

Results

Rates of amputation performed because of nonviable limb tissue were 12.2% and 19.6% in the US joint trauma system (n = 508) and UK Joint Theatre Trauma Registry (n = 51) populations respectively. A 10-predictor BN accurately predicted failed revascularization: area under the receiver operating characteristic curve (AUROC) 0.95, calibration slope 1.96, Brier score (BS) 0.05, and Brier skill score 0.50. The model maintained excellent performance in an external validation population: AUROC 0.97, calibration slope 1.72, Brier score 0.08, Brier skill score 0.58, and had significantly better performance than mangled extremity severity score at predicting the need for amputation [AUROC 0.95 (0.92-0.98) vs 0.74 (0.67-0.80); P < 0.0001].

Conclusions

A BN (https://www.traumamodels.com) can accurately predict the outcome of limb revascularization at the time of initial wound evaluation. This information may complement clinical judgement, support rational and shared treatment decisions, and establish sensible treatment expectations.",2020-10-01 +32931381,ZenoFishDb v1.1: A Database for Xenotransplantation Studies in Zebrafish.,"Rapidly accumulating literature has proven feasibility of the zebrafish xenograft models in cancer research. Nevertheless, online databases for searching the current zebrafish xenograft literature are in great demand. Herein, we have developed a manually curated database, called ZenoFishDb v1.1 (https://konulab.shinyapps.io/zenofishdb), based on R Shiny platform aiming to provide searchable information on ever increasing collection of zebrafish studies for cancer cell line transplantation and patient-derived xenografts (PDXs). ZenoFishDb v1.1 user interface contains four modules: DataTable, Visualization, PDX Details, and PDX Charts. The DataTable and Visualization pages represent xenograft study details, including injected cell lines, PDX injections, molecular modifications of cell lines, zebrafish strains, as well as technical aspects of the xenotransplantation procedures in table, bar, and/or pie chart formats. The PDX Details module provides comprehensive information on the patient details in table format and can be searched and visualized. Overall, ZenoFishDb v1.1 enables researchers to effectively search, list, and visualize different technical and biological attributes of zebrafish xenotransplantation studies particularly focusing on the new trends that make use of reporters, RNA interference, overexpression, or mutant gene constructs of transplanted cancer cells, stem cells, and PDXs, as well as distinguished host modifications.",2020-09-15 +33112659,Drivers of and Obstacles to the Adoption of Toxicogenomics for Chemical Risk Assessment: Insights from Social Science Perspectives.,"

Background

Some 20 y ago, scientific and regulatory communities identified the potential of omics sciences (genomics, transcriptomics, proteomics, metabolomics) to improve chemical risk assessment through development of toxicogenomics. Recognizing that regulators adopt new scientific methods cautiously given accountability to diverse stakeholders, the scope and pace of adoption of toxicogenomics tools and data have nonetheless not met the ambitious, early expectations of omics proponents.

Objective

Our objective was, therefore, to inventory, investigate, and derive insights into drivers of and obstacles to adoption of toxicogenomics in chemical risk assessment. By invoking established social science frameworks conceptualizing innovation adoption, we also aimed to develop recommendations for proponents of toxicogenomics and other new approach methodologies (NAMs).

Methods

We report findings from an analysis of 56 scientific and regulatory publications from 1998 through 2017 that address the adoption of toxicogenomics for chemical risk assessment. From this purposeful sample of toxicogenomics discourse, we identified major categories of drivers of and obstacles to adoption of toxicogenomics tools and data sets. We then mapped these categories onto social science frameworks for conceptualizing innovation adoption to generate actionable insights for proponents of toxicogenomics.

Discussion

We identify the most salient drivers and obstacles. From 1998 through 2017, adoption of toxicogenomics was understood to be helped by drivers such as those we labeled Superior scientific understanding, New applications, and Reduced cost & increased efficiency but hindered by obstacles such as those we labeled Insufficient validation, Complexity of interpretation, and Lack of standardization. Leveraging social science frameworks, we find that arguments for adoption that draw on the most salient drivers, which emphasize superior and novel functionality of omics as rationales, overlook potential adopters' key concerns: simplicity of use and compatibility with existing practices. We also identify two perspectives-innovation-centric and adopter-centric-on omics adoption and explain how overreliance on the former may be undermining efforts to promote toxicogenomics. https://doi.org/10.1289/EHP6500.",2020-10-28 +24301061,GWAS Central: a comprehensive resource for the comparison and interrogation of genome-wide association studies.,"To facilitate broad and convenient integrative visualization of and access to GWAS data, we have created the GWAS Central resource (http://www.gwascentral.org). This database seeks to provide a comprehensive collection of summary-level genetic association data, structured both for maximal utility and for safe open access (i.e., non-directional signals to fully preclude research subject identification). The resource emphasizes on advanced tools that allow comparison and discovery of relevant data sets from the perspective of genes, genome regions, phenotypes or traits. Tested markers and relevant genomic features can be visually interrogated across up to 16 multiple association data sets in a single view, starting at a chromosome-wide view and increasing in resolution down to individual bases. In addition, users can privately upload and view their own data as temporary files. Search and display utility is further enhanced by exploiting phenotype ontology annotations to allow genetic variants associated with phenotypes and traits of interest to be precisely identified, across all studies. Data submissions are accepted from individual researchers, groups and consortia, whereas we also actively gather data sets from various public sources. As a result, the resource now provides over 67 million P-values for over 1600 studies, making it the world's largest openly accessible online collection of summary-level GWAS association information.",2013-12-04 +30540962,Single-Cell Transcriptome Profiling of Mouse and hESC-Derived Pancreatic Progenitors.,"Human embryonic stem cells (hESCs) are a potential unlimited source of insulin-producing β cells for diabetes treatment. A greater understanding of how β cells form during embryonic development will improve current hESC differentiation protocols. All pancreatic endocrine cells, including β cells, are derived from Neurog3-expressing endocrine progenitors. This study characterizes the single-cell transcriptomes of 6,905 mouse embryonic day (E) 15.5 and 6,626 E18.5 pancreatic cells isolated from Neurog3-Cre; Rosa26mT/mG embryos, allowing for enrichment of endocrine progenitors (yellow; tdTomato + EGFP) and endocrine cells (green; EGFP). Using a NEUROG3-2A-eGFP CyT49 hESC reporter line (N5-5), 4,462 hESC-derived GFP+ cells were sequenced. Differential expression analysis revealed enrichment of markers that are consistent with progenitor, endocrine, or previously undescribed cell-state populations. This study characterizes the single-cell transcriptomes of mouse and hESC-derived endocrine progenitors and serves as a resource (https://lynnlab.shinyapps.io/embryonic_pancreas) for improving the formation of functional β-like cells from hESCs.",2018-12-01 +33343765,Evaluating the impact of heat stress as measured by temperature-humidity index (THI) on test-day milk yield of small holder dairy cattle in a sub-Sahara African climate.,"This study evaluates the effect of heat stress on milk production and describes the pattern of response of milk yield to increasing heat load, using temperature-humidity index (THI) on test-day milk records of small holder dairy cattle herds in the sub-Saharan African climate of Tanzania. Climate data obtained from aWhere, an agricultural weather data platform (http://www.awhere.com) was analysed with 14,367 first lactation test day milk records of 3511 dairy cows collected between 2016 and 2019. THI was calculated from daily maximal temperatures and daily minimum humidity. Three sets of analysis were performed. In the first and second analysis, two mixed effect repeatability models were fitted with THI treated as a categorical variable grouped into 5 classes (THI1= [61 - 66], THI2= [67 - 71], THI3= [72 - 78], THI4=[79 - 81], THI5=[82 - 86]), to obtain least squares estimates of THI effect on milk production, and as a continuous variable within THI classes to identify THI thresholds at which milk yield started to decline. In the third analyses, one quadratic polynomial regression (POL) and three regression spline functions namely piecewise linear spline function (PLF), natural splines function (NSF) and cubic splines function (CSF) were fitted to determine the average effect of THI on milk yield in the population and describe the pattern of response of milk yield to increasing head load. The results show that heat stress reduced milk yield by 4.16% to 14.42% across THI groups, with daily milk yield being the highest in THI1 (7.40±0.39 litres) and the lowest in THI4 (6.33±0.32). Regression coefficients within groups showed significant daily milk yield decrease in THI2 (-0.09) and THI3 (-0.06), but not for other THI classes, indicating that cows experienced heat stress between THI values of 67 and 76 and milk loss plateaued afterwards, suggesting that the animals acclimatized to heat stress conditions beyond THI value of 76. At the population level, THI and its squared term were significantly negatively and positively (-0.61, 0.004) associated with milk production, indicating a non-linear relationship between milk yield and THI. The CSF model showed better goodness of fit and predictive ability than other models for predicting future population response of milk yield to heat stress in small holder dairy farms in Tanzania. Herd management strategies and animal husbandry measures are needed in small holder dairy farms in Tanzania to minimize the impact of heat stress on milk yield and income of the farmers.",2020-12-01 +33011028,Is vulnerability associated with substance use? A study among youth in Switzerland.,"Adolescence is a period of exploration and experimentation that includes risk behaviors such as substance use. Adolescents living in a situation of vulnerability could be more prone to using substances. In this cross-sectional study, we aimed to evaluate the association between the level of vulnerability and substance use considering explanatory factors. Data were obtained from the first wave of the GenerationFRee project (http://www.generationfree.ch), a longitudinal study based on data collected yearly between 2014 and 2019 on youth aged 15-24 years in high schools and professional schools. The sample included 5179 participants. We designated four risk behaviors: current tobacco smoking, alcohol misuse, cannabis use, and other illegal drug use. We defined vulnerability based on three criteria: the relationship with parents, school performance, and the family socioeconomic status (SES). According to this definition, participants were divided into three groups: no vulnerability, moderate vulnerability, and high vulnerability. Each substance was compared by vulnerability level and controlled with explanatory factors such as age, gender, perceived health, emotional well-being, academic track, nationality, living with parents, residence, family structure, money earned, and perceived personal financial situation. The results show that all substances except alcohol misuse are associated with vulnerability at the bivariate level. All the explanatory factors were also significant with the exception of academic track and amount of money earned per month. In the multinomial regression, for the moderate- and high-vulnerability groups, cannabis use was the only behavior that remained significant. In conclusion, this study shows the association between level of vulnerability and substance use, especially cannabis use. The results also demonstrate the complexity around vulnerability and how the interaction with social aspects influences vulnerability. Youths presenting familial, educational, or financial problems need to be especially screened for substance use by healthcare providers.",2020-10-01 +26822098,MTD: a mammalian transcriptomic database to explore gene expression and regulation.,"A systematic transcriptome survey is essential for the characterization and comprehension of the molecular basis underlying phenotypic variations. Recently developed RNA-seq methodology has facilitated efficient data acquisition and information mining of transcriptomes in multiple tissues/cell lines. Current mammalian transcriptomic databases are either tissue-specific or species-specific, and they lack in-depth comparative features across tissues and species. Here, we present a mammalian transcriptomic database (MTD) that is focused on mammalian transcriptomes, and the current version contains data from humans, mice, rats and pigs. Regarding the core features, the MTD browses genes based on their neighboring genomic coordinates or joint KEGG pathway and provides expression information on exons, transcripts and genes by integrating them into a genome browser. We developed a novel nomenclature for each transcript that considers its genomic position and transcriptional features. The MTD allows a flexible search of genes or isoforms with user-defined transcriptional characteristics and provides both table-based descriptions and associated visualizations. To elucidate the dynamics of gene expression regulation, the MTD also enables comparative transcriptomic analysis in both intraspecies and interspecies manner. The MTD thus constitutes a valuable resource for transcriptomic and evolutionary studies. The MTD is freely accessible at http://mtd.cbi.ac.cn.",2016-01-27 +33823115,A Nationwide Study Examining Deafness Among Hospitalized Adults.,"Background It is unknown whether hospital outcomes differ among nonspeaking deaf patients compared to those without this disability. Objective This article aims to compare clinical outcomes and utilization data among patients with and without deafness. Design This study used a retrospective cohort study. Setting and Participants The participants included Nationwide Inpatient Sample, year 2017, hospitalized adults with and without diagnostic codes related to deafness and inability to speak. Method Multiple logistic and linear regression were used to compare in-hospital outcomes. Results Thirty million four hundred one thousand one hundred seventeen adults were hospitalized, and 7,180 had deafness and inability to speak related coding. Patients with deafness were older (mean age ± SEM: 59.2 ± 0.51 vs. 57.9 ± 0.09 years, p = .01), and less likely female (47.0% vs. 57.7%, p < .01) compared to controls. Those with deafness had more comorbidities compared to the controls (Charlson comorbidity score ≥ 3: 31.2% vs. 27.8%, p < .01). Mortality was higher among deaf versus controls (3.6% vs. 2.2%; p < .01); this translated into higher adjusted odds of mortality (adjusted odds ratio = 1.7. [confidence interval (CI) 1.3-2.4]; p = .01). Deaf patients had lower odds of being discharged home compared to controls {aOR} = 0.6, (CI) 0.55-0.73]; p < .01. Length of stay was longer (adjusted mean difference = 1.5 days CI [0.7-2.3]; p < .01) and hospital charges were higher, but not significantly so (adjusted mean difference = $4,193 CI [-$1,935-$10,322]; p = .18) in patients with deafness. Conclusions Hospitalized nonspeaking deaf patients had higher mortality and longer hospital stays compared to those without this condition. These results suggest that specialized attention may be warranted when deaf patients are admitted to our hospitals in hopes of reducing disparities in outcomes. Supplemental Material https://doi.org/10.23641/asha.14336663.",2021-04-06 +33514264,"Associations Between Speech Perception, Vocabulary, and Phonological Awareness Skill in School-Aged Children With Speech Sound Disorders.","Purpose Prior studies report conflicting descriptions of the relationships between phonological awareness (PA), vocabulary, and speech perception in preschoolers with speech disorders. This study sought to determine the nature of these relationships in a sample of school-aged children with residual speech sound errors affecting /ɹ/. Method Participants included 110 children aged 7;0-17;4 (years;months) with residual errors impacting /ɹ/. Data on perceptual acuity and perceptual bias in an /ɹ/ identification task, receptive vocabulary, and PA were obtained. A theoretically and empirically motivated path model was constructed with vocabulary mediating the relationship between two measures of speech perception and PA. Model parameters were determined through maximum likelihood estimation with standard errors that were robust to nonnormality. Monte Carlo simulation was used to examine achieved power at the current sample size. Results The saturated path model explained 19% of the variance in PA. The direct path between age-adjusted perceptual acuity and PA was significant, as was the direct path between vocabulary and PA. Contrary to our hypothesis, there was no evidence in the current sample that vocabulary skill mediated the relationship between speech perception and PA. Each individual path was adequately powered at the current sample size. Conclusions The overall model provided evidence for a continued relationship between speech perception, measured by perceptual acuity of the sound in error, and PA in school-aged children with residual speech errors. Thus, measures of speech perception remain relevant to the assessment of school-aged children and adolescents in this population. Supplemental Material https://doi.org/10.23641/asha.13641275.",2021-01-29 +28377194,Stakeholders apply the GRADE evidence-to-decision framework to facilitate coverage decisions.,"

Objectives

Coverage decisions are complex and require the consideration of many factors. A well-defined, transparent process could improve decision-making and facilitate decision-maker accountability.

Study design and setting

We surveyed key US-based stakeholders regarding their current approaches for coverage decisions. Then, we held a workshop to test an evidence-to-decision (EtD) framework for coverage based on the Grading of Recommendations Assessment, Development, and Evaluation (GRADE) criteria.

Results

A total of 42 individuals (including 19 US stakeholders as well as international health policymakers and GRADE working group members) attended the workshop. Of the 19 stakeholders, 14 (74%) completed the survey before the workshop. Almost all of their organizations (13 of 14; 93%) used systematic reviews for coverage decision-making; few (2 of 14; 14%) developed their own evidence synthesis; a majority (9 of 14; 64%) rated the certainty of evidence (using various systems); almost all (13 of 14; 93%) denied formal consideration of resource use; and half (7 of 14; 50%) reported explicit criteria for decision-making. At the workshop, stakeholders successfully applied the EtD framework to four case studies and provided narrative feedback, which centered on contextual factors affecting coverage decisions in the United States, the need for reliable data on subgroups of patients, and the challenge of decision-making without formal consideration of resource use.

Conclusion

Stakeholders successfully applied the EtD framework to four case studies and highlighted contextual factors affecting coverage decisions and affirmed its value. Their input informed the further development of a revised EtD framework, now publicly available (http://gradepro.org/).",2017-04-01 +32874821,Magnetotail Reconnection at Jupiter: A Survey of Juno Magnetic Field Observations. ,"At Jupiter, tail reconnection is thought to be driven by an internal mass loading and release process called the Vasyliunas cycle. Galileo data have shown hundreds of reconnection events occurring in Jupiter's magnetotail. Here we present a survey of reconnection events observed by Juno during its first 16 orbits of Jupiter (July 2016-October 2018). The events are identified using Juno magnetic field data, which facilitates comparison to the Vogt et al. (2010, https://doi.org/10.1029/2009JA015098) survey of reconnection events from Galileo magnetometer data, but we present data from Juno's other particle and fields instruments for context. We searched for field dipolarizations or reversals and found 232 reconnection events in the Juno data, most of which featured an increase in |Bθ |, the magnetic field meridional component, by a factor of 3 over background values. We found that most properties of the Juno reconnection events, like their spatial distribution and duration, are comparable to Galileo, including the presence of a ~3-day quasi-periodicity in the recurrence of Juno tail reconnection events and in Juno JEDI, JADE, and Waves data. However, unlike with Galileo we were unable to clearly define a statistical x-line separating planetward and tailward Juno events. A preliminary analysis of plasma velocities during five magnetic field reconnection events showed that the events were accompanied by fast radial flows, confirming our interpretation of these magnetic signatures as reconnection events. We anticipate that a future survey covering other Juno datasets will provide additional insight into the nature of tail reconnection at Jupiter.",2020-02-27 +31868908,Deep Residual Neural Networks Resolve Quartet Molecular Phylogenies.,"Phylogenetic inference is of fundamental importance to evolutionary as well as other fields of biology, and molecular sequences have emerged as the primary data for this task. Although many phylogenetic methods have been developed to explicitly take into account substitution models of sequence evolution, such methods could fail due to model misspecification or insufficiency, especially in the face of heterogeneities in substitution processes across sites and among lineages. In this study, we propose to infer topologies of four-taxon trees using deep residual neural networks, a machine learning approach needing no explicit modeling of the subject system and having a record of success in solving complex nonlinear inference problems. We train residual networks on simulated protein sequence data with extensive amino acid substitution heterogeneities. We show that the well-trained residual network predictors can outperform existing state-of-the-art inference methods such as the maximum likelihood method on diverse simulated test data, especially under extensive substitution heterogeneities. Reassuringly, residual network predictors generally agree with existing methods in the trees inferred from real phylogenetic data with known or widely believed topologies. Furthermore, when combined with the quartet puzzling algorithm, residual network predictors can be used to reconstruct trees with more than four taxa. We conclude that deep learning represents a powerful new approach to phylogenetic reconstruction, especially when sequences evolve via heterogeneous substitution processes. We present our best trained predictor in a freely available program named Phylogenetics by Deep Learning (PhyDL, https://gitlab.com/ztzou/phydl; last accessed January 3, 2020).",2020-05-01 +25853886,ALDB: a domestic-animal long noncoding RNA database.,"

Background

Long noncoding RNAs (lncRNAs) have attracted significant attention in recent years due to their important roles in many biological processes. Domestic animals constitute a unique resource for understanding the genetic basis of phenotypic variation and are ideal models relevant to diverse areas of biomedical research. With improving sequencing technologies, numerous domestic-animal lncRNAs are now available. Thus, there is an immediate need for a database resource that can assist researchers to store, organize, analyze and visualize domestic-animal lncRNAs.

Results

The domestic-animal lncRNA database, named ALDB, is the first comprehensive database with a focus on the domestic-animal lncRNAs. It currently archives 12,103 pig intergenic lncRNAs (lincRNAs), 8,923 chicken lincRNAs and 8,250 cow lincRNAs. In addition to the annotations of lincRNAs, it offers related data that is not available yet in existing lncRNA databases (lncRNAdb and NONCODE), such as genome-wide expression profiles and animal quantitative trait loci (QTLs) of domestic animals. Moreover, a collection of interfaces and applications, such as the Basic Local Alignment Search Tool (BLAST), the Generic Genome Browser (GBrowse) and flexible search functionalities, are available to help users effectively explore, analyze and download data related to domestic-animal lncRNAs.

Conclusions

ALDB enables the exploration and comparative analysis of lncRNAs in domestic animals. A user-friendly web interface, integrated information and tools make it valuable to researchers in their studies. ALDB is freely available from http://res.xaut.edu.cn/aldb/index.jsp.",2015-04-08 +31248455,"The Holocaust, medicine and becoming a physician: the crucial role of education.","Learning about the abandonment of moral principles of healthcare professionals and scientists, their societies and academic institutions, to a murderous ideology yields fundamental concerns and global implications for present and future healthcare professionals' education and practice. Medicine's worst-case scenario raises deeply disturbing yet essential questions in the here and now: Could the Holocaust, one of the greatest evils ever perpetrated on humankind, have occurred without the complicity of physicians, their societies, and the scientific profession community? How did healers become killers? Can it happen again?We reflect here on those queries through the lens of the Second International Scholars Workshop on Medicine during the Holocaust and Beyond held in the Galilee, Israel on May 7-11, 2017 and derive contemporary global lessons for the healthcare professions. Following a brief historical background, implications of the history of medicine in the Holocaust are drawn including 1) awareness that the combination of hierarchy, obedience, and power constitutes a risk factor for abuse of power in medicine and 2) learning and teaching about medicine in the Holocaust and beyond is a powerful platform for supporting professional identity formation. As such, this history ideally can help ""equip"" learners with a moral compass for navigating the future of medical practice and inherent ethical challenges such as prejudice, assisted reproduction, resource allocation, obtaining valid informed consent, end of life care, and challenges of genomics and technology expansion. Curriculum modules are available and studies on impact on students' attitudes and behavior are emerging.The conference culminated with the launch of the Galilee Declaration, composed and signed by an international, inter-professional community of historians, healthcare professions educators, and ethicists. The Declaration included herein ( http://english.wgalil.ac.il/category/Declaration ) calls for curricula on history of healthcare professions in the Holocaust and its implications to be included in all healthcare professions education.",2019-06-27 +30371822,MatrixDB: integration of new data with a focus on glycosaminoglycan interactions.,"MatrixDB (http://matrixdb.univ-lyon1.fr/) is an interaction database focused on biomolecular interactions established by extracellular matrix (ECM) proteins and glycosaminoglycans (GAGs). It is an active member of the International Molecular Exchange (IMEx) consortium (https://www.imexconsortium.org/). It has adopted the HUPO Proteomics Standards Initiative standards for annotating and exchanging interaction data, either at the MIMIx (The Minimum Information about a Molecular Interaction eXperiment) or IMEx level. The following items related to GAGs have been added in the updated version of MatrixDB: (i) cross-references of GAG sequences to the GlyTouCan database, (ii) representation of GAG sequences in different formats (IUPAC and GlycoCT) and as SNFG (Symbol Nomenclature For Glycans) images and (iii) the GAG Builder online tool to build 3D models of GAG sequences from GlycoCT codes. The database schema has been improved to represent n-ary experiments. Gene expression data, imported from Expression Atlas (https://www.ebi.ac.uk/gxa/home), quantitative ECM proteomic datasets (http://matrisomeproject.mit.edu/ecm-atlas), and a new visualization tool of the 3D structures of biomolecules, based on the PDB Component Library and LiteMol, have also been added. A new advanced query interface now allows users to mine MatrixDB data using combinations of criteria, in order to build specific interaction networks related to diseases, biological processes, molecular functions or publications.",2019-01-01 +30453895,RATEmiRs: the rat atlas of tissue-specific and enriched miRNAs database.,"

Background

MicroRNAs (miRNAs) regulate gene expression and have been targeted as indicators of environmental/toxicologic stressors. Using the data from our deep sequencing of miRNAs in an extensive sampling of rat tissues, we developed a database called RATEmiRs for the Rat Atlas of Tissue-specific and Enriched miRNAs to allow users to dynamically determine mature-, iso- and pre-miR expression abundance, enrichment and specificity in rat tissues and organs.

Results

Illumina sequencing count data from mapped reads and meta data from the miRNA body atlas consisting of 21 and 23 tissues (14 organs) of toxicologic interest from 12 to 13 week old male and female Sprague Dawley rats respectively, were managed in a relational database with a user-friendly query interface. Data-driven pipelines are available to tailor the identification of tissue-enriched (TE) and tissue-specific (TS) miRNAs. Data-driven organ-specific (OS) pipelines reveal miRNAs that are expressed predominately in a given organ. A user-driven approach is also available to assess the tissue expression of user-specified miRNAs. Using one tissue vs other tissues and tissue(s) of an organ vs other organs, we illustrate the utility of RATEmiRs to facilitate the identification of candidate miRNAs. As a use case example, RATEmiRs revealed two TS miRNAs in the liver: rno-miR-122-3p and rno-miR-122-5p. When liver is compared to just the brain tissues for example, rno-miR-192-5p, rno-miR-193-3p, rno-miR-203b-3p, rno-miR-3559-5p, rno-miR-802-3p and rno-miR-802-5p are also detected as abundantly expressed in liver. As another example, 55 miRNAs from the RATEmiRs query of ileum vs brain tissues overlapped with miRNAs identified from the same comparison of tissues in an independent, publicly available dataset of 10 week old male rat microarray data suggesting that these miRNAs are likely not age-specific, platform-specific nor pipeline-dependent. Lastly, we identified 10 miRNAs that have conserved tissue/organ-specific expression between the rat and human species.

Conclusions

RATEmiRs provides a new platform for identification of TE, TS and OS miRNAs in a broad array of rat tissues. RATEmiRs is available at: https://www.niehs.nih.gov/ratemirs.",2018-11-19 +26454874,BaMBa: towards the integrated management of Brazilian marine environmental data. ,"A new open access database, Brazilian Marine Biodiversity (BaMBa) (https://marinebiodiversity.lncc.br), was developed in order to maintain large datasets from the Brazilian marine environment. Essentially, any environmental information can be added to BaMBa. Certified datasets obtained from integrated holistic studies, comprising physical-chemical parameters, -omics, microbiology, benthic and fish surveys can be deposited in the new database, enabling scientific, industrial and governmental policies and actions to be undertaken on marine resources. There is a significant number of databases, however BaMBa is the only integrated database resource both supported by a government initiative and exclusive for marine data. BaMBa is linked to the Information System on Brazilian Biodiversity (SiBBr, http://www.sibbr.gov.br/) and will offer opportunities for improved governance of marine resources and scientists' integration. Database URL: http://marinebiodiversity.lncc.br.",2015-10-10 +26387108,The Protein Ensemble Database.,"The scientific community's major conceptual notion of structural biology has recently shifted in emphasis from the classical structure-function paradigm due to the emergence of intrinsically disordered proteins (IDPs). As opposed to their folded cousins, these proteins are defined by the lack of a stable 3D fold and a high degree of inherent structural heterogeneity that is closely tied to their function. Due to their flexible nature, solution techniques such as small-angle X-ray scattering (SAXS), nuclear magnetic resonance (NMR) spectroscopy and fluorescence resonance energy transfer (FRET) are particularly well-suited for characterizing their biophysical properties. Computationally derived structural ensembles based on such experimental measurements provide models of the conformational sampling displayed by these proteins, and they may offer valuable insights into the functional consequences of inherent flexibility. The Protein Ensemble Database (http://pedb.vib.be) is the first openly accessible, manually curated online resource storing the ensemble models, protocols used during the calculation procedure, and underlying primary experimental data derived from SAXS and/or NMR measurements. By making this previously inaccessible data freely available to researchers, this novel resource is expected to promote the development of more advanced modelling methodologies, facilitate the design of standardized calculation protocols, and consequently lead to a better understanding of how function arises from the disordered state.",2015-01-01 +33539296,Decoupled Two-Stage Crowd Counting and Beyond.,"One of appealing approaches to counting dense objects, such as crowd, is density map estimation. Density maps, however, present ambiguous appearance cues in congested scenes, rendering infeasibility in identifying individuals and difficulties in diagnosing errors. Inspired by an observation that counting can be interpreted as a two-stage process, i.e., identifying possible object regions and counting exact object numbers, we introduce a probabilistic intermediate representation termed the probability map that depicts the probability of each pixel being an object. This representation allows us to decouple counting into probability map regression (PMR) and count map regression (CMR). We therefore propose a novel decoupled two-stage counting (D2C) framework that sequentially regresses the probability map and learns a counter conditioned on the probability map. Given the probability map and the count map, a peak point detection algorithm is derived to localize each object with a point under the guidance of local counts. An advantage of D2C is that the counter can be learned reliably with additional synthesized probability maps. This addresses important data deficiency and sample imbalanced problems in counting. Our framework also enables easy diagnoses and analyses of error patterns. For instance, we find that, the counter per se is sufficiently accurate, while the bottleneck appears to be PMR. We further instantiate a network D2CNet in our framework and report state-of-the-art counting and localization performance across 6 crowd counting benchmarks. Since the probability map is a representation independent of visual appearance, D2CNet also exhibits remarkable cross-dataset transferability. Code and pretrained models are made available at: https://git.io/d2cnet.",2021-02-12 +26980518,Fish Karyome version 2.1: a chromosome database of fishes and other aquatic organisms. ,"A voluminous information is available on karyological studies of fishes; however, limited efforts were made for compilation and curation of the available karyological data in a digital form. 'Fish Karyome' database was the preliminary attempt to compile and digitize the available karyological information on finfishes belonging to the Indian subcontinent. But the database had limitations since it covered data only on Indian finfishes with limited search options. Perceiving the feedbacks from the users and its utility in fish cytogenetic studies, the Fish Karyome database was upgraded by applying Linux, Apache, MySQL and PHP (pre hypertext processor) (LAMP) technologies. In the present version, the scope of the system was increased by compiling and curating the available chromosomal information over the globe on fishes and other aquatic organisms, such as echinoderms, molluscs and arthropods, especially of aquaculture importance. Thus, Fish Karyome version 2.1 presently covers 866 chromosomal records for 726 species supported with 253 published articles and the information is being updated regularly. The database provides information on chromosome number and morphology, sex chromosomes, chromosome banding, molecular cytogenetic markers, etc. supported by fish and karyotype images through interactive tools. It also enables the users to browse and view chromosomal information based on habitat, family, conservation status and chromosome number. The system also displays chromosome number in model organisms, protocol for chromosome preparation and allied techniques and glossary of cytogenetic terms. A data submission facility has also been provided through data submission panel. The database can serve as a unique and useful resource for cytogenetic characterization, sex determination, chromosomal mapping, cytotaxonomy, karyo-evolution and systematics of fishes. Database URL: http://mail.nbfgr.res.in/Fish_Karyome.",2016-03-15 +24225323,"PATRIC, the bacterial bioinformatics database and analysis resource.","The Pathosystems Resource Integration Center (PATRIC) is the all-bacterial Bioinformatics Resource Center (BRC) (http://www.patricbrc.org). A joint effort by two of the original National Institute of Allergy and Infectious Diseases-funded BRCs, PATRIC provides researchers with an online resource that stores and integrates a variety of data types [e.g. genomics, transcriptomics, protein-protein interactions (PPIs), three-dimensional protein structures and sequence typing data] and associated metadata. Datatypes are summarized for individual genomes and across taxonomic levels. All genomes in PATRIC, currently more than 10,000, are consistently annotated using RAST, the Rapid Annotations using Subsystems Technology. Summaries of different data types are also provided for individual genes, where comparisons of different annotations are available, and also include available transcriptomic data. PATRIC provides a variety of ways for researchers to find data of interest and a private workspace where they can store both genomic and gene associations, and their own private data. Both private and public data can be analyzed together using a suite of tools to perform comparative genomic or transcriptomic analysis. PATRIC also includes integrated information related to disease and PPIs. All the data and integrated analysis and visualization tools are freely available. This manuscript describes updates to the PATRIC since its initial report in the 2007 NAR Database Issue.",2013-11-12 +31510666,NPS: scoring and evaluating the statistical significance of peptidic natural product-spectrum matches.,"

Motivation

Peptidic natural products (PNPs) are considered a promising compound class that has many applications in medicine. Recently developed mass spectrometry-based pipelines are transforming PNP discovery into a high-throughput technology. However, the current computational methods for PNP identification via database search of mass spectra are still in their infancy and could be substantially improved.

Results

Here we present NPS, a statistical learning-based approach for scoring PNP-spectrum matches. We incorporated NPS into two leading PNP discovery tools and benchmarked them on millions of natural product mass spectra. The results demonstrate more than 45% increase in the number of identified spectra and 20% more found PNPs at a false discovery rate of 1%.

Availability and implementation

NPS is available as a command line tool and as a web application at http://cab.spbu.ru/software/NPS.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +31541133,"Evaluation of the functional effects of genetic variants‒missense and nonsense SNPs, indels and copy number variations‒in the gene encoding human deoxyribonuclease I potentially implicated in autoimmunity.","Genetic variants, such as single nucleotide polymorphisms (SNPs), in the deoxyribonuclease I (DNase I) gene which remarkably reduce or abolish the activity are assumed to be substantially responsible for the genetic backgrounds determining susceptibility to autoimmune dysfunction. Here, we evaluated many genetic variants, including missense and nonsense SNPs, and indel (inframe) variants in the gene, potentially implicated in autoimmune diseases as functional variants resulting in altered activity levels. Eighteen missense and 7 nonsense SNPs, and 9 indel (inframe) variants were found to result in loss of function and disappearance of DNase I activity. Furthermore, considering the positions in the DNase I protein corresponding to the various nonsense SNPs, all of the other nonsense SNPs and frameshift variants registered in the Ensembl database ( https://asia.ensembl.org ) appear likely to exert a pathogenetic effect through loss of the activity. Accordingly, a total of 60 genetic variants in the DNase 1 gene (DNASE1) inducing abolishment or marked reduction of the DNase I activity could be identified as genetic risk factors for autoimmunity, irrespective of how sparsely they were distributed in the population. It was noteworthy that SNP p.Gln244Arg, reportedly associated with autoimmunity and reducing the activity to about half of that of the wild type, and SNP p.Arg107Gly, abolishing the activity completely, were distributed worldwide and in African populations at the polymorphic level, respectively. On the other hand, with regard to copy number variations in DNASE1 where loss of copy leads to a reduction of the in vivo enzyme activity, only 2 diploid copy numbers were distributed in Japanese and German populations, demonstrating no loss of copy. These exhaustive data for genetic variants in DNASE1 resulting in loss or marked reduction of the DNase I activity are highly informative when considering genetic predisposition leading to autoimmune dysfunction.",2019-09-20 +,A multi-scale high-resolution analysis of global sea surface temperature,"The Multi-scale Ultra-high Resolution (MUR) sea surface temperature (SST) analysis presents daily SST estimates on a global 0.01°×0.01° grid. The current version (Version 4.1, http://dx.doi.org/10.5067/GHGMR-4FJ04) features the 1-km resolution MODIS retrievals, which are fused with AVHRR GAC, microwave, and in-situ SST data by applying internal correction for relative biases among the data sets. Only the night-time (dusk to dawn locally) satellite SST retrievals are used to estimate the foundation SST. The MUR SST values agree with the GHRSST Multi-Product Ensemble (GMPE) SST field to 0.36°C on average, except in summer-time Arctic region where the existing SST analysis products are known to disagree with each other. The feature resolution of the MUR SST analysis is an order of magnitude higher than most existing analysis products.The Multi-Resolution Variational Analysis (MRVA) method allows the MUR analysis to use multiple synoptic time scales, including a 5-day data window used for reconstruction of mesoscale features and data windows of only few hours for the smaller scale features. Reconstruction of fast evolving small scale features and interpolation over persistent large data voids can be achieved simultaneously by the use of multiple synoptic windows in the multi-scale setting. The MRVA method is also a “mesh-less” interpolation procedure that avoids truncation of the geolocation data during gridding and binning of satellite samples. Future improvements of the MUR SST analysis will include ingestion of day-time MODIS retrievals as well as more recent high-resolution SST retrievals from VIIRS.",2017-10-01 +32692947,The Dimensionality of Oral Language Ability: Evidence From Young Greek Children.,"Purpose This study investigated component skills in oral language development utilizing and validating a new assessment battery in a large (N = 800) and representative sample of Greek students 4-7 years of age. Method All participants enrolled in public schools from four geographical regions (Attica, Thessaly, Macedonia, and Crete) that varied demographically (urban, semiurban, and rural). For the individualized language assessments, we utilized mobile devices (tablet PC) to ensure children's interest and joyful participation as well as reliable administration procedures across sites. Results by confirmatory factor analyses specified and validated five different models in each grade to identify the best conceptualization of language dimensionality in the respective age groups. Results Four-dimensional model provided a slightly better discriminant validity in language data of the preschool group. However, in kindergarten and first grades, the five-dimensional model had the best fit to the data to the four-dimensional. Conclusion These findings support the multidimensionality of oral language ability at this phase of development and increase of factor distinctiveness as children grow. Supplemental Material https://doi.org/10.23641/asha.12649214.",2020-07-16 +35198624,Increasing Diversity in Developmental Biology.,"The demographic profile of the scientific and biomedical workforce in the United States does not reflect the population at large (https://ncses.nsf.gov/pubs/nsf21321/data-tables; www.census.gov), raising concerns that there will be too few trained researchers in the future, the scope of research interests will not be broad enough, gaps in equity and social justice will continue to increase, and the safeguards to the integrity of the scientific enterprise could be jeopardized. To diversify the pool of scientists, the Society for Developmental Biology (SDB) developed the Choose Development! Program-a two-summer immersion for undergraduate students belonging to underrepresented (UR) populations in STEM to join the research laboratory of an established SDB member. This research-intensive experience was augmented by a multi-tier mentoring plan for each student, society-wide recognition, professional development activities and networking at national meetings. The strengths of the Choose Development! Program were leveraged to expand inclusion and outreach at the Society's leadership level, the Board of Directors (BOD), which then led to significant changes that impacted the SDB community. The cumulative outcomes of the Choose Development! Program provides evidence that community-based, long-term advocacy, and mentoring of young UR scientists is successful in retaining UR students in scientific career paths and making a scientific society more inclusive.",2021-01-01 +,MBRS-34. MOLECULAR CHARACTERIZATION OF RECURRENT MEDULLOBLASTOMA REVEALS AN UNEXPECTEDLY HIGH INCIDENCE OF SECONDARY MALIGNANCY,"Abstract

INTRODUCTION

Despite an overall 5-year progression free survival of approximately 80% for primary medulloblastomas (MBs), recurrent disease confers an abysmal prognosis with less than 10% 5-year survival. Subgroup affiliation at recurrence has been reported as stable though divergent clonal selection may explain treatment failure upon relapse. The confirmation of histopathologic diagnosis at recurrence thus represents a key task to facilitate selection of efficacious therapy and to gain deeper understanding of the biology driving recurrent disease.

METHODS

DNA methylation-based classification of pediatric brain tumors has emerged as a robust methodology for refining histopathologic diagnosis and defining disease-specific subgroups. We leveraged Illumina Infinium BeadChip arrays to characterize a series of >50 patient-matched, histopathologically diagnosed, primary/relapse MB pairs. Entity and subgroups were assigned by comparison with a database consisting of >2800 reference methylation profiles (https://www.molecularneuropathology.org/mnp). Genome-wide copy-number aberrations were inferred from the BeadChip arrays, and mutational analysis (whole exome sequencing) was performed on the majority of samples.

RESULTS/CONCLUSION

Molecular classification was performed on n=59 primary/relapse MB pairs, inferring the following subgroup representation amongst our cohort: WNT (n=1, 2%), SHH (n=17, 30%), Group 3 (n=15, 25%), Group 4 (n=25, 42%). MB subgroup status was highly conserved across the series (90%), consistent with prior reports. Notably, we determined that 6/59 (10%) of histopathologically diagnosed MB ‘relapses’ molecularly classified as non-MB, including multiple glioblastomas (3/6). Pairwise copy-number and mutational analyses are ongoing to determine the degree of conservation and/or divergence amongst somatic alterations between primary/relapse MB pairs and to identify molecular patterns associated with secondary malignancy.",2018-06-01 +33177916,Comparative Genomics Platform and Phylogenetic Analysis of Fungal Laccases and Multi-Copper Oxidases.,"Laccases (EC 1.10.3.2), a group of multi-copper oxidases (MCOs), play multiple biological functions and widely exist in many species. Fungal laccases have been extensively studied for their industrial applications, however, there was no database specially focused on fungal laccases. To provide a comparative genomics platform for fungal laccases, we have developed a comparative genomics platform for laccases and MCOs (http://laccase.riceblast.snu.ac.kr/). Based on protein domain profiles of characterized sequences, 3,571 laccases were predicted from 690 genomes including 253 fungi. The number of putative laccases and their properties exhibited dynamic distribution across the taxonomy. A total of 505 laccases from 68 genomes were selected and subjected to phylogenetic analysis. As a result, four clades comprised of nine subclades were phylogenetically grouped by their putative functions and analyzed at the sequence level. Our work would provide a workbench for putative laccases mainly focused on the fungal kingdom as well as a new perspective in the identification and classification of putative laccases and MCOs.",2020-09-11 +30445601,RNAct: Protein-RNA interaction predictions for model organisms with supporting experimental data.,"Protein-RNA interactions are implicated in a number of physiological roles as well as diseases, with molecular mechanisms ranging from defects in RNA splicing, localization and translation to the formation of aggregates. Currently, ∼1400 human proteins have experimental evidence of RNA-binding activity. However, only ∼250 of these proteins currently have experimental data on their target RNAs from various sequencing-based methods such as eCLIP. To bridge this gap, we used an established, computationally expensive protein-RNA interaction prediction method, catRAPID, to populate a large database, RNAct. RNAct allows easy lookup of known and predicted interactions and enables global views of the human, mouse and yeast protein-RNA interactomes, expanding them in a genome-wide manner far beyond experimental data (http://rnact.crg.eu).",2019-01-01 +26586806,BreCAN-DB: a repository cum browser of personalized DNA breakpoint profiles of cancer genomes.,"BreCAN-DB (http://brecandb.igib.res.in) is a repository cum browser of whole genome somatic DNA breakpoint profiles of cancer genomes, mapped at single nucleotide resolution using deep sequencing data. These breakpoints are associated with deletions, insertions, inversions, tandem duplications, translocations and a combination of these structural genomic alterations. The current release of BreCAN-DB features breakpoint profiles from 99 cancer-normal pairs, comprising five cancer types. We identified DNA breakpoints across genomes using high-coverage next-generation sequencing data obtained from TCGA and dbGaP. Further, in these cancer genomes, we methodically identified breakpoint hotspots which were significantly enriched with somatic structural alterations. To visualize the breakpoint profiles, a next-generation genome browser was integrated with BreCAN-DB. Moreover, we also included previously reported breakpoint profiles from 138 cancer-normal pairs, spanning 10 cancer types into the browser. Additionally, BreCAN-DB allows one to identify breakpoint hotspots in user uploaded data set. We have also included a functionality to query overlap of any breakpoint profile with regions of user's interest. Users can download breakpoint profiles from the database or may submit their data to be integrated in BreCAN-DB. We believe that BreCAN-DB will be useful resource for genomics scientific community and is a step towards personalized cancer genomics.",2015-11-19 +30052780,LipidIMMS Analyzer: integrating multi-dimensional information to support lipid identification in ion mobility-mass spectrometry based lipidomics.,"

Summary

Ion mobility-mass spectrometry (IM-MS) has showed great application potential for lipidomics. However, IM-MS based lipidomics is significantly restricted by the available software for lipid structural identification. Here, we developed a software tool, namely, LipidIMMS Analyzer, to support the accurate identification of lipids in IM-MS. For the first time, the software incorporates a large-scale database covering over 260 000 lipids and four-dimensional structural information for each lipid [i.e. m/z, retention time (RT), collision cross-section (CCS) and MS/MS spectra]. Therefore, multi-dimensional information can be readily integrated to support lipid identifications, and significantly improve the coverage and confidence of identification. Currently, the software supports different IM-MS instruments and data acquisition approaches.

Availability and implementation

The software is freely available at: http://imms.zhulab.cn/LipidIMMS/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +33689157,Prostate Cancer Screening and Young Black Men: Can Early Communication Avoid Later Health Disparities?,"This study aims to determine if younger men, across racial and ethnic groups, discussed the benefits/risks/harms of PSA screening with health care professionals. Publicly available data were obtained from the Health Information National Trends Survey https://hints.cancer.gov/ in March 2019. Cross-sectional analysis of 518 men between the ages of 18 and 49 years from men who completed the survey between October 2011 and February 2012 (HINTS cycle 4) was performed. We used logistic regression to evaluate the association between race/ethnicity and discussions around PSA. Less than 10% of the participants reported a prior PSA; Black and Hispanic men were more likely compared with White men. Compared with White men, Black and other race men reported receiving less communications from some doctors recommending PSA screening (ORblack: 0.16, 95% CIblack: 0.07-0.38; ORother: 0.10, 95% CIother: 0.04-0.25), and that no one is sure PSA testing saves lives (ORblack: 0.49, 95% CIblack: 0.04-6.91; ORother: 0.17, 95% CIother: 0.06-0.48). Minority men, while more likely to have had a PSA, were less likely to be told of the harms and benefits of PSA testing, compared with White men. Increasing communication surrounding screening advantages and disadvantages between providers and patients can increase awareness and knowledge among younger men. In a post-COVID-19 environment, communication regarding the return to preventative screenings within vulnerable populations is an important message to convey. Research shows preventive screenings have dropped across all population groups due to the pandemic yet the decline disproportionately affects Black and other minority men.",2021-03-10 +32092702,Model Reconstruction from Small-Angle X-Ray Scattering Data Using Deep Learning Methods.,"Small-angle X-ray scattering (SAXS) method is widely used in investigating protein structures in solution, but high-quality 3D model reconstructions are challenging. We present a new algorithm based on a deep learning method for model reconstruction from SAXS data. An auto-encoder for protein 3D models was trained to compress 3D shape information into vectors of a 200-dimensional latent space, and the vectors are optimized using genetic algorithms to build 3D models that are consistent with the scattering data. The program has been tested with experimental SAXS data, demonstrating the capacity and robustness of accurate model reconstruction. Furthermore, the model size information can be optimized using this algorithm, enhancing the automation in model reconstruction directly from SAXS data. The program was implemented using Python with the TensorFlow framework, with source code and webserver available from http://liulab.csrc.ac.cn/decodeSAXS.",2020-02-13 +30053438,Snake Venomics Display: An online toolbox for visualization of snake venomics data.,"With the introduction of powerful mass spectrometry equipment into the field of snake venom proteomics, a large body of venomics data is accumulating. To allow for better comparison between venom compositions from different snake species and to provide an online database containing this data, we devised the Snake Venomics Display toolbox for visualization of snake venomics data on linear scales. This toolbox is freely available to be used online at https://tropicalpharmacology.com/tools/snake-venomics-display/ and allows researchers to visualize venomics data in a Relative Abundance (%) visualization mode and in an Absolute Abundance (mg) visualization mode, the latter taking venom yields into account. The curated venomics data for all snake species included in this database is also made available in a downloadable Excel file format. The Snake Venomics Display toolbox represents a simple way of handling snake venomics data, which is better suited for large data sets of venom compositions from multiple snake species.",2018-07-25 +30101336,"LipidFinder on LIPID MAPS: peak filtering, MS searching and statistical analysis for lipidomics.","

Summary

We present LipidFinder online, hosted on the LIPID MAPS website, as a liquid chromatography/mass spectrometry (LC/MS) workflow comprising peak filtering, MS searching and statistical analysis components, highly customized for interrogating lipidomic data. The online interface of LipidFinder includes several innovations such as comprehensive parameter tuning, a MS search engine employing in-house customized, curated and computationally generated databases and multiple reporting/display options. A set of integrated statistical analysis tools which enable users to identify those features which are significantly-altered under the selected experimental conditions, thereby greatly reducing the complexity of the peaklist prior to MS searching is included. LipidFinder is presented as a highly flexible, extensible user-friendly online workflow which leverages the lipidomics knowledge base and resources of the LIPID MAPS website, long recognized as a leading global lipidomics portal.

Availability and implementation

LipidFinder on LIPID MAPS is available at: http://www.lipidmaps.org/data/LF.",2019-02-01 +33718222,"Development, Validation, and Visualization of A Web-Based Nomogram for Predicting the Recurrence-Free Survival Rate of Patients With Desmoid Tumors.","

Background

Surgery is an important treatment option for desmoid tumor (DT) patients, but how to decrease and predict the high recurrence rate remains a major challenge.

Methods

Desmoid tumor patients diagnosed and treated at Tianjin Cancer Institute & Hospital were included, and a web-based nomogram was constructed by screening the recurrence-related risk factors using Cox regression analysis. External validation was conducted with data from the Fudan University Shanghai Cancer Center.

Results

A total of 385 patients were identified. Finally, after excluding patients without surgery, patients who were lost to follow-up, and patients without complete resection, a total of 267 patients were included in the nomogram construction. Among these patients, 53 experienced recurrence, with a recurrence rate of 20.15%. The 3-year and 5-year recurrence-free survival (RFS) rates were 82.5% and 78%, respectively. Age, tumor diameter, admission status, location, and tumor number were correlated with recurrence in univariate Cox analysis. In multivariate Cox analysis, only age, tumor diameter and tumor number were independent risk factors for recurrence and were then used to construct a web-based nomogram to predict recurrence. The concordance index (C-index) of the nomogram was 0.718, and the areas under the curves (AUCs) of the 3-year and 5-year receiver operating characteristic (ROC) curves were 0.751 and 0.761, respectively. In the external validation set, the C-index was 0.706, and the AUCs of the 3-year and 5-year ROC curves are 0.788 and 0.794, respectively.

Conclusions

Age, tumor diameter, and tumor number were independent predictors of recurrence for DTs, and a web-based nomogram containing these three predictors could accurately predict RFS (https://stepforward.shinyapps.io/Desmoidtumor/).",2021-02-25 +26578592,"PANTHER version 10: expanded protein families and functions, and analysis tools.","PANTHER (Protein Analysis THrough Evolutionary Relationships, http://pantherdb.org) is a widely used online resource for comprehensive protein evolutionary and functional classification, and includes tools for large-scale biological data analysis. Recent development has been focused in three main areas: genome coverage, functional information ('annotation') coverage and accuracy, and improved genomic data analysis tools. The latest version of PANTHER, 10.0, includes almost 5000 new protein families (for a total of over 12 000 families), each with a reference phylogenetic tree including protein-coding genes from 104 fully sequenced genomes spanning all kingdoms of life. Phylogenetic trees now include inference of horizontal transfer events in addition to speciation and gene duplication events. Functional annotations are regularly updated using the models generated by the Gene Ontology Phylogenetic Annotation Project. For the data analysis tools, PANTHER has expanded the number of different 'functional annotation sets' available for functional enrichment testing, allowing analyses to access all Gene Ontology annotations--updated monthly from the Gene Ontology database--in addition to the annotations that have been inferred through evolutionary relationships. The Prowler (data browser) has been updated to enable users to more efficiently browse the entire database, and to create custom gene lists using the multiple axes of classification in PANTHER.",2015-11-17 +30357361,Glycosciences.DB: an annotated data collection linking glycomics and proteomics data (2018 update).,"Glycosciences.DB, the glycan structure database of the Glycosciences.de portal, collects various kinds of data on glycan structures, including carbohydrate moieties from worldwide Protein Data Bank (wwPDB) structures. This way it forms a bridge between glycomics and proteomics resources. A major update of this database combines a redesigned web interface with a series of new functions. These include separate entry pages not only for glycan structures but also for literature references and wwPDB entries, improved substructure search options, a newly available keyword search covering all types of entries in one query, and new types of information that is added to glycan structures. These new features are described in detail in this article, and options how users can provide information to the database are discussed as well. Glycosciences.DB is available at http://www.glycosciences.de/database/ and can be freely accessed.",2019-01-01 +30527357,Correlation between EZH2 and CEP55 and lung adenocarcinoma prognosis.,"

Objective

Recently, accumulated evidence indicates that the enhancer of zeste homologue 2 (EZH2) is highly expressed in a wide range of cancer types, including NSCLC. The downstream genes regulated by EZH2 were screened using bioinformatics analysis. This study aimed to analyse the correlation between the downstream genes of EZH2 and the prognosis of lung adenocarcinoma.

Methods

Expression and methylation data of lung adenocarcinoma were downloaded from The Cancer Genome Atlas (TCGA) (https://cancergenome.nih.gov/) database, and data were categorized into EZH2 overexpression and EZH2 downregulation groups according to EZH2 expression. The genes that showed opposite trends of methylation and expression changes were screened, and the association of gene expression was calculated. Based on the String database, a protein association analysis was conducted to identify genes related to EZH2, which are referred to as EZH2 regulation candidate genes. According to gene expression (GSE27262) and methylation (GSE66836) chip data in the Gene Expression Omnibus (GEO) (https://www.ncbi.nlm.nih.gov/geo/) database, the genes with differential expression and methylation in lung adenocarcinoma tissues were analysed, and the trends of EZH2 regulation candidate gene expression and methylation were verified to identify the EZH2 regulation candidate genes. Subsequently, MethHC (http://methhc.mbc.nctu.edu.tw/php/index.php) and UALCAN (http://ualcan.path.uab.edu/index.html) were employed to verify changes in the expression and methylation of EZH2 downstream regulation candidate genes and to analyse the correlation between these genes and the prognosis of lung adenocarcinoma.

Results

Expression and methylation data of lung adenocarcinoma were downloaded from TCGA database and categorized into EZH2 overexpression and EZH2 downregulation groups according to EZH2 expression. A total of 337 genes that showed opposite trends of methylation and expression changes were obtained. The protein association analysis using the String (https://string-db.org/) database showed that 61 genes interact with EZH2 and 61 genes represent EZH2 downstream regulation candidate genes. Moreover, 222 genes obtained from GSE27262 and GSE66836 chip data were negatively correlated with methylation and expression changes, and centrosomal protein 55 (CEP55) was identified as the EZH2 downstream regulation candidate gene. CEP55 was upregulated in lung adenocarcinoma tissues and showed low methylation. According to gene expression data from TCGA database, CEP55 and EZH2 exhibit higher levels in lung adenocarcinoma tissue than in adjacent normal tissue. Finally, the survival analysis revealed that EZH2 is not associated with the prognosis of lung adenocarcinoma, while CEP55 is related to lung adenocarcinoma prognosis.

Conclusion

Taken together, these results indicate that changes in EZH2 expression lead to changes in CEP55 expression in lung adenocarcinoma, and these changes are associated with its prognosis.",2018-11-24 +30245835,PGD: Pineapple Genomics Database.,"Pineapple occupies an important phylogenetic position as its reference genome is a model for studying the evolution the Bromeliaceae family and the crassulacean acid metabolism (CAM) photosynthesis. Here, we developed a pineapple genomics database (PGD, http://pineapple.angiosperms.org/pineapple/html/index.html) as a central online platform for storing and integrating genomic, transcriptomic, function annotation and genetic marker data for pineapple (Ananas comosus (L.) Merr.). The PGD currently hosts significant search tools and available datasets for researchers to study comparative genomics, gene expression, gene co-expression molecular marker, and gene annotation of A. comosus (L). PGD also performed a series of additional pages for a genomic browser that visualizes genomic data interactively, bulk data download, a detailed user manual, and data integration information. PGD was developed with the capacity to integrate future data resources, and will be used as a long-term and open access database to facilitate the study of the biology, distribution, and the evolution of pineapple and the relative plant species. An email-based helpdesk is also available to offer support with the website and requests of specific datasets from the research community.",2018-09-17 +33686535,Development of a model to predict the probability of incurring a complication during spine surgery.,"

Purpose

Predictive models in spine surgery are of use in shared decision-making. This study sought to develop multivariable models to predict the probability of general and surgical perioperative complications of spinal surgery for lumbar degenerative diseases.

Methods

Data came from EUROSPINE's Spine Tango Registry (1.2012-12.2017). Separate prediction models were built for surgical and general complications. Potential predictors included age, gender, previous spine surgery, additional pathology, BMI, smoking status, morbidity, prophylaxis, technology used, and the modified Mirza invasiveness index score. Complete case multiple logistic regression was used. Discrimination was assessed using area under the receiver operating characteristic curve (AUC) with 95% confidence intervals (CI). Plots were used to assess the calibration of the models.

Results

Overall, 23'714/68'111 patients (54.6%) were available for complete case analysis: 763 (3.2%) had a general complication, with ASA score being strongly predictive (ASA-2 OR 1.6, 95% CI 1.20-2.12; ASA-3 OR 2.98, 95% CI 2.19-4.07; ASA-4 OR 5.62, 95% CI 3.04-10.41), while 2534 (10.7%) had a surgical complication, with previous surgery at the same level being an important predictor (OR 1.9, 95%CI 1.71-2.12). Respectively, model AUCs were 0.74 (95% CI, 0.72-0.76) and 0.64 (95% CI, 0.62-0.65), and calibration was good up to predicted probabilities of 0.30 and 0.25, respectively.

Conclusion

We developed two models to predict complications associated with spinal surgery. Surgical complications were predicted with less discriminative ability than general complications. Reoperation at the same level was strongly predictive of surgical complications and a higher ASA score, of general complications. A web-based prediction tool was developed at https://sst.webauthor.com/go/fx/run.cfm?fx=SSTCalculator .",2021-03-09 +25388105,The Eukaryotic Pathogen Databases: a functional genomic resource integrating data from human and veterinary parasites.,"Over the past 20 years, advances in high-throughput biological techniques and the availability of computational resources including fast Internet access have resulted in an explosion of large genome-scale data sets ""big data."" While such data are readily available for download and personal use and analysis from a variety of repositories, often such analysis requires access to seldom-available computational skills. As a result a number of databases have emerged to provide scientists with online tools enabling the interrogation of data without the need for sophisticated computational skills beyond basic knowledge of Internet browser utility. This chapter focuses on the Eukaryotic Pathogen Databases (EuPathDB: http://eupathdb.org) Bioinformatic Resource Center (BRC) and illustrates some of the available tools and methods.",2015-01-01 +33236957,Modelling of antiproliferative activity measured in HeLa cervical cancer cells in a series of xanthene derivatives.,"Cancer remains one of the leading causes of death in humans, and new drug substances are therefore being developed. Thus, the anti-cancer activity of xanthene derivatives has become an important topic in the development of new and potent anti-cancer drug substances. Previously published novel series of xanthen-3-one and xanthen-1,8-dione derivatives have been synthesized in one of our laboratories and showed anti-proliferative activity in HeLa cancer cell lines. This series serves as a good basis to develop quantitative structure-activity relationship (QSAR), to study the relations between anti-proliferative activity and chemical structures. A QSAR model has been derived that relies only on two-dimensional molecular descriptors, providing mechanistic insight into the anti-proliferative activity of xanthene derivatives. The model is validated internally and externally and additionally with the set of inactive compounds of the original data, confirming model applicability for the design and discovery of novel xanthene derivatives. The QSAR model is available at the QsarDB repository (http://dx.doi.10.15152/QDB.237).",2020-12-01 +31725860,HisgAtlas 1.0: a human immunosuppression gene database. ,"Immunosuppression is body's state in which the activation or efficacy of immune system is weakened. It is associated with a wide spectrum of human diseases. In the last two decades, tremendous efforts have been made to elucidate the mechanism of hundreds of immunosuppression genes. Immunosuppression genes could be valuable drug targets or biomarkers for the immunotherapeutic treatment of different diseases. However, the information of all previously identified immunosuppression genes is dispersed in thousands of publications. Here, we provide the HisgAtlas database that collects 995 previously identified human immunosuppression genes using text mining and manual curation. We believe HisgAtlas will be a valuable resource to search human immunosuppression genes as well as to investigate their functions in further research. Database URL: http://biokb.ncpsb.org/HisgAtlas/.",2017-01-01 +33882842,Using CPAP in COVID-19 patients outside of the intensive care setting: a comparison of survival and outcomes between dialysis and non-dialysis dependent patients.,"

Background

SARS-CoV-2 (COVID-19) is a novel coronavirus associated with high mortality rates. The use of Continuous Positive Airway Pressure (CPAP) has been recognised as a management option for severe COVID-19 (NHS, Specialty guides for patient management during the coronavirus pandemic Guidance for the role and use of non-invasive respiratory support in adult patients with coronavirus (confirmed or suspected), https://www.nice.org.uk/guidance/ng159 ). We offered ward-based CPAP to COVID-19, dialysis patients not suitable for escalation to ICU. The aim of the study was to evaluate the use of CPAP for COVID-19 dialysis patients compared to non-dialysis COVID-19 patients outside of the intensive care setting. We further aimed to investigate factors associated with improved outcomes.

Methods

Data was collected from a single centre (Royal Preston Hospital, UK), from March to June 2020. Treatment outcomes were compared for dialysis and non-dialysis dependent patients who received CPAP with limitations on their escalation and resuscitation status. Kaplan-Meier survival curves and Cox regression models were used to compare outcomes. The primary study outcome was 30 day mortality. Confounders including length of admission, systemic anticoagulation and ultrafiltration volumes on dialysis were also analysed.

Results

Over the study period, 40 dialysis patients tested positive for COVID-19, with 30 requiring hospital admission. 93% (n = 28) required supplementary oxygen and 12% (n = 9) required CPAP on the ward. These patients were compared to a serial selection of 14 non-dialysis patients treated with CPAP during the same period. Results showed a significant difference in 30 day survival rates between the two groups: 88.9% in the dialysis group vs. 21.4% in the non-dialysis group. Statistical modelling showed that anticoagulation was also an important factor and correlated with better outcomes.

Conclusion

This is to the best of our knowledge, the largest series of COVID-19 dialysis patients treated with CPAP in a ward-based setting. In general, dialysis dependent patients have multiple co-morbidities including cardiovascular disease and diabetes mellitus making them vulnerable to COVID-19 and not always suitable for treatment in ICU. We showed a significantly lower 30 day mortality rate with the use of CPAP in the dialysis group (11.1%) compared to the non-dialysis group (78.6%). Despite a small sample size, we believe this study provides impetus for further work clarifying the role of CPAP in treating COVID-19 dialysis dependent patients.",2021-04-21 +33563213,lncEvo: automated identification and conservation study of long noncoding RNAs.,"

Background

Long noncoding RNAs represent a large class of transcripts with two common features: they exceed an arbitrary length threshold of 200 nt and are assumed to not encode proteins. Although a growing body of evidence indicates that the vast majority of lncRNAs are potentially nonfunctional, hundreds of them have already been revealed to perform essential gene regulatory functions or to be linked to a number of cellular processes, including those associated with the etiology of human diseases. To better understand the biology of lncRNAs, it is essential to perform a more in-depth study of their evolution. In contrast to protein-encoding transcripts, however, they do not show the strong sequence conservation that usually results from purifying selection; therefore, software that is typically used to resolve the evolutionary relationships of protein-encoding genes and transcripts is not applicable to the study of lncRNAs.

Results

To tackle this issue, we developed lncEvo, a computational pipeline that consists of three modules: (1) transcriptome assembly from RNA-Seq data, (2) prediction of lncRNAs, and (3) conservation study-a genome-wide comparison of lncRNA transcriptomes between two species of interest, including search for orthologs. Importantly, one can choose to apply lncEvo solely for transcriptome assembly or lncRNA prediction, without calling the conservation-related part.

Conclusions

lncEvo is an all-in-one tool built with the Nextflow framework, utilizing state-of-the-art software and algorithms with customizable trade-offs between speed and sensitivity, ease of use and built-in reporting functionalities. The source code of the pipeline is freely available for academic and nonacademic use under the MIT license at https://gitlab.com/spirit678/lncrna_conservation_nf .",2021-02-09 +26589293,"TCLP: an online cancer cell line catalogue integrating HLA type, predicted neo-epitopes, virus and gene expression.","Human cancer cell lines are an important resource for research and drug development. However, the available annotations of cell lines are sparse, incomplete, and distributed in multiple repositories. Re-analyzing publicly available raw RNA-Seq data, we determined the human leukocyte antigen (HLA) type and abundance, identified expressed viruses and calculated gene expression of 1,082 cancer cell lines. Using the determined HLA types, public databases of cell line mutations, and existing HLA binding prediction algorithms, we predicted antigenic mutations in each cell line. We integrated the results into a comprehensive knowledgebase. Using the Django web framework, we provide an interactive user interface with advanced search capabilities to find and explore cell lines and an application programming interface to extract cell line information. The portal is available at http://celllines.tron-mainz.de.",2015-11-20 +32784131,CF Distance: A New Domain Discrepancy Metric and Application to Explicit Domain Adaptation for Cross-Modality Cardiac Image Segmentation.,"Domain adaptation has great values in unpaired cross-modality image segmentation, where the training images with gold standard segmentation are not available from the target image domain. The aim is to reduce the distribution discrepancy between the source and target domains. Hence, an effective measurement for this discrepancy is critical. In this work, we propose a new metric based on characteristic functions of distributions. This metric, referred to as CF distance, enables explicit domain adaptation, in contrast to the implicit manners minimizing domain discrepancy via adversarial training. Based on this CF distance, we propose an unsupervised domain adaptation framework for cross-modality cardiac segmentation, which consists of image reconstruction and prior distribution matching. We validated the method on two tasks, i.e., the CT-MR cross-modality segmentation and the multi-sequence cardiac MR segmentation. Results showed that the proposed explicit metric was effective in domain adaptation, and the segmentation method delivered promising and superior performance, compared to other state-of-the-art techniques. The data and source code of this work has been released via https://zmiclab.github.io/projects.html.",2020-11-30 +32563466,SemanticGO: a tool for gene functional similarity analysis in Arabidopsis thaliana and rice.,"Gene or pathway functional similarities are important information for researchers. However, these similarities are often described sparsely and qualitatively. The latent semantic analysis of Arabidopsis thaliana (Arabidopsis) Gene Ontology (GO) data produced a set of 200-dimension feature vectors for each gene. Pathways were represented by summing the vectors of the pathway member genes. Thus, the similarities between genes and pathways were assessed. Additionally, the gene feature vectors were correlated with external gene data, including gene expression and gene network connectivity, to elucidate the associated functions. The gene feature vectors were decoded, and their applications were demonstrated. A simple online tool, SemanticGO (http://bioinformatics.fafu.edu.cn/semanticGO/), is herein provided to enable researchers to explore the similarities between genes and pathways in both Arabidopsis and rice.",2020-05-23 +31160594,"PathoPhenoDB, linking human pathogens to their phenotypes in support of infectious disease research.","Understanding the relationship between the pathophysiology of infectious disease, the biology of the causative agent and the development of therapeutic and diagnostic approaches is dependent on the synthesis of a wide range of types of information. Provision of a comprehensive and integrated disease phenotype knowledgebase has the potential to provide novel and orthogonal sources of information for the understanding of infectious agent pathogenesis, and support for research on disease mechanisms. We have developed PathoPhenoDB, a database containing pathogen-to-phenotype associations. PathoPhenoDB relies on manual curation of pathogen-disease relations, on ontology-based text mining as well as manual curation to associate host disease phenotypes with infectious agents. Using Semantic Web technologies, PathoPhenoDB also links to knowledge about drug resistance mechanisms and drugs used in the treatment of infectious diseases. PathoPhenoDB is accessible at http://patho.phenomebrowser.net/ , and the data are freely available through a public SPARQL endpoint.",2019-06-03 +33458857,Using machine learning techniques predicts prognosis of patients with Ewing sarcoma.,"Ewing sarcoma is one of the most common types of malignant bone tumor in children and adolescents. However, to our limited knowledge, no study exists that uses machine learning to create algorithms for the prediction of survivorship for Ewing sarcoma. About 2332 patients with Ewing sarcoma between 1975 and 2016 in the United States were identified from Surveillance, Epidemiology, and End Results (SEER) program. All patients in the data set were randomly assigned into the training set and the testing set, at a 2:8 ratio. In the training set, boosted decision tree, support vector machine, nonparametric random forest method, and neural network models were developed to predict the 5-year survivorship. The overall survival rate in 5-year follow-up of this patient cohort is 60.72%. With respect to the algorithms for both cancer specific survival and overall survival, there was slight superiority in our performance metrics favoring the random forest method over the other models for survival prediction, with 77/83% sensitivity and 91/94% specificity, respectively. The random forest method was incorporated into a freely available web-based application. This application can be accessed through https://zryan.shinyapps.io/EwingSarcoma/. Clinical Significance: To the best of our knowledge, this is the first available predictive model for predicting survival in Ewing sarcoma based on machine-learning algorithms. This study may provide orthopedic surgeons with an easily accessible prediction tool when dealing with patients suffering from Ewing sarcoma.",2021-01-24 +31461491,A corpus of plant-disease relations in the biomedical domain.,"

Background

Many new medicines have been derived from natural sources such as plants, which have a long history of being used for disease treatment. Thus, their benefits and side effects have been studied, and plant-related information including plant and disease relations have been accumulated in Medline articles. Because numerous articles are available in Medline and are written in natural language, text-mining is important. However, a corpus of plant and disease relations is not available yet. Thus, we aimed to construct such a corpus.

Methods and results

In this study, we designed and annotated a plant-disease relations corpus, and proposed a computational model to predict plant-disease relations using the corpus. We categorized plant and disease relations into four types: treatments of diseases, causes of diseases, associations, and negative relations. To construct a corpus of plant-disease relations, we first created its annotation guidelines and randomly selected 200 Medline abstracts. From these abstracts, we identified 1,405 and 1,755 plant and disease mentions, annotated to 105 and 237 unique plant and disease identifiers, respectively. When we selected sentences containing at least one plant and one disease mention, we extracted 878 plant and 1,077 disease entities, which finally generated a corpus of plant-disease relations including 1,309 relations from 199 abstracts. To verify the effectiveness of the corpus, we proposed a convolutional neural network model with the shortest dependency path (SDP-CNN) and applied it to the constructed corpus. The micro F-score with ten-fold cross-validation was found to be 0.764. We also applied the proposed SDP-CNN model to all Medline abstracts. When we measured its performance for 483 randomly selected plant-disease co-occurring sentences, the model showed a precision of 0.707.

Conclusion

The plant-disease relations corpus is unique and represents an important resource for biomedical text-mining. The corpus of plant and disease relations is available at http://gcancer.org/pdr/.",2019-08-28 +33788613,Effect of Epidermal Growth Factor Treatment and Polychlorinated Biphenyl Exposure in a Dietary-Exposure Mouse Model of Steatohepatitis.,"

Background

Polychlorinated biphenyls (PCBs) are signaling disrupting chemicals that exacerbate nonalcoholic steatohepatitis (NASH) in mice. They are epidermal growth factor receptor (EGFR) inhibitors that enhance hepatic inflammation and fibrosis in mice.

Objectives

This study tested the hypothesis that epidermal growth factor (EGF) administration can attenuate PCB-related NASH by increasing hepatic EGFR signaling in a mouse model.

Methods

C57BL/6 male mice were fed a 42% milk fat diet and exposed to Aroclor 1260 (20mg/kg) or vehicle for 12 wk. EGF (0.2μg/g) or vehicle were administered daily for 10 d starting at study week 10. Liver and metabolic phenotyping were performed. The EGF dose was selected based on results of an acute dose-finding study (30 min treatment of EGF at 0.2, 0.02, 0.002μg/g of via intraperitoneal injection). Hepatic phosphoproteomic analysis was performed using liver tissue from this acute study to understand EGFR's role in liver physiology.

Results

Markers of EGFR signaling were higher in EGF-treated mice. EGF+PCB-exposed mice had lower hepatic free fatty acids, inflammation, and fibrosis relative to PCB-only exposed mice. EGF-treated mice had higher plasma lipids, with no improvement in hepatic steatosis, and an association with higher LXR target gene expression and de novo lipogenesis. EGF-treated mice showed more severe hyperglycemia associated with lower adiponectin levels and insulin sensitivity. EGF-treated mice had higher hepatic HNF4α, NRF2, and AhR target gene expression but lower constitutive androstane receptor and farnesoid X receptor target gene expression. The hepatic EGF-sensitive phosphoproteome demonstrated a role for EGFR signaling in liver homeostasis.

Discussion

These results validated EGFR inhibition as a causal mode of action for PCB-related hepatic inflammation and fibrosis in a mouse model of NASH. However, observed adverse effects may limit the clinical translation of EGF therapy. More data are required to better understand EGFR's underinvestigated roles in liver and environmental health. https://doi.org/10.1289/EHP8222.",2021-03-31 +33058887,How the CORAL software can be used to select compounds for efficient treatment of neurodegenerative diseases?,"Recommendations on the efficient application of CORAL software (http://www.insilico.eu/coral) to establish quantitative structure-property/activity relationships (QSPRs/QSARs) are provided. The predictive potential of the approach has been demonstrated for QSAR models developed for inhibitor concentrations (negative decimal logarithm of IC50) of derivatives of N-methyl-d-aspartate (NMDA) receptor, leucine-rich repeat kinase 2 (LRRK2), and tropomyosin receptor kinase A (TrkA). The above three protein targets are related to various neurodegenerative diseases such as Alzheimer's and Parkinson's. Each model was checked using several splits of the data for the training and the validation sets. The index of ideality of correlation (IIC) represents a tool to improve the predictive potential for an arbitrary model. However, the use of the IIC should be carried out according to rules, described in this work.",2020-10-13 +34109184,Potential Prognostic Immune Biomarkers of Overall Survival in Ovarian Cancer Through Comprehensive Bioinformatics Analysis: A Novel Artificial Intelligence Survival Prediction System.,"Background: The tumour immune microenvironment plays an important role in the biological mechanisms of tumorigenesis and progression. Artificial intelligence medicine studies based on big data and advanced algorithms are helpful for improving the accuracy of prediction models of tumour prognosis. The current research aims to explore potential prognostic immune biomarkers and develop a predictive model for the overall survival of ovarian cancer (OC) based on artificial intelligence algorithms. Methods: Differential expression analyses were performed between normal tissues and tumour tissues. Potential prognostic biomarkers were identified using univariate Cox regression. An immune regulatory network was constructed of prognostic immune genes and their highly related transcription factors. Multivariate Cox regression was used to identify potential independent prognostic immune factors and develop a prognostic model for ovarian cancer patients. Three artificial intelligence algorithms, random survival forest, multitask logistic regression, and Cox survival regression, were used to develop a novel artificial intelligence survival prediction system. Results: The current study identified 1,307 differentially expressed genes and 337 differentially expressed immune genes between tumour samples and normal samples. Further univariate Cox regression identified 84 prognostic immune gene biomarkers for ovarian cancer patients in the model dataset (GSE32062 dataset and GSE53963 dataset). An immune regulatory network was constructed involving 63 immune genes and 5 transcription factors. Fourteen immune genes (PSMB9, FOXJ1, IFT57, MAL, ANXA4, CTSH, SCRN1, MIF, LTBR, CTSD, KIFAP3, PSMB8, HSPA5, and LTN1) were recognised as independent risk factors by multivariate Cox analyses. Kaplan-Meier survival curves showed that these 14 prognostic immune genes were closely related to the prognosis of ovarian cancer patients. A prognostic nomogram was developed by using these 14 prognostic immune genes. The concordance indexes were 0.760, 0.733, and 0.765 for 1-, 3-, and 5-year overall survival, respectively. This prognostic model could differentiate high-risk patients with poor overall survival from low-risk patients. According to three artificial intelligence algorithms, the current study developed an artificial intelligence survival predictive system that could provide three individual mortality risk curves for ovarian cancer. Conclusion: In conclusion, the current study identified 1,307 differentially expressed genes and 337 differentially expressed immune genes in ovarian cancer patients. Multivariate Cox analyses identified fourteen prognostic immune biomarkers for ovarian cancer. The current study constructed an immune regulatory network involving 63 immune genes and 5 transcription factors, revealing potential regulatory associations among immune genes and transcription factors. The current study developed a prognostic model to predict the prognosis of ovarian cancer patients. The current study further developed two artificial intelligence predictive tools for ovarian cancer, which are available at https://zhangzhiqiao8.shinyapps.io/Smart_Cancer_Survival_Predictive_System_17_OC_F1001/ and https://zhangzhiqiao8.shinyapps.io/Gene_Survival_Subgroup_Analysis_17_OC_F1001/. An artificial intelligence survival predictive system could help improve individualised treatment decision-making.",2021-05-24 +33555860,"Extension of the CL&Pol Polarizable Force Field to Electrolytes, Protic Ionic Liquids, and Deep Eutectic Solvents.","The polarizable CL&Pol force field presented in our previous study, Transferable, Polarizable Force Field for Ionic Liquids (J. Chem. Theory Comput. 2019, 15, 5858, DOI: http://doi.org/10.1021/acs.jctc.9b0068910.1021/acs.jctc.9b00689), is extended to electrolytes, protic ionic liquids (PIL), deep eutectic solvents (DES), and glycols. These systems are problematic in polarizable simulations because they contain either small, highly charged ions or strong hydrogen bonds, which cause trajectory instabilities due to the pull exerted on the induced dipoles. We use a Tang-Toennies (TT) function to dampen, or smear, the interactions between charges and induced dipole at a short range involving small, highly charged atoms (such as hydrogen or lithium), thus preventing the ""polarization catastrophe"". The new force field gives stable trajectories and is validated through comparison with experimental data on density, viscosity, and ion diffusion coefficients of liquid systems of the above-mentioned classes. The results also shed light on the hydrogen-bonding pattern in ethylammonium nitrate, a PIL, for which the literature contains conflicting views. We describe the implementation of the TT damping function, of the temperature-grouped Nosé-Hoover thermostat for polarizable molecular dynamics (MD) and of the periodic perturbation method for viscosity evaluation from non-equilibrium trajectories in the LAMMPS MD code. The main result of this work is the wider applicability of the CL&Pol polarizable force field to new, important classes of fluids, achieving robust trajectories and a good description of equilibrium and transport properties in challenging systems. The fragment-based approach of CL&Pol will allow ready extension to a wide variety of PILs, DES, and electrolytes.",2021-02-08 +27153728,IsomiR Bank: a research resource for tracking IsomiRs.,"

Unlabelled

: Next-Generation Sequencing (NGS) technology has revealed that microRNAs (miRNAs) are capable of exhibiting frequent differences from their corresponding mature reference sequences, generating multiple variants: the isoforms of miRNAs (isomiRs). These isomiRs mainly originate via the imprecise and alternative cleavage during the pre-miRNA processing and post-transcriptional modifications that influence miRNA stability, their sub-cellular localization and target selection. Although several tools for the identification of isomiR have been reported, no bioinformatics resource dedicated to gather isomiRs from public NGS data and to provide functional analysis of these isomiRs is available to date. Thus, a free online database, IsomiR Bank has been created to integrate isomiRs detected by our previously published algorithm CPSS. In total, 2727 samples (Small RNA NGS data downloaded from ArrayExpress) from eight species (Arabidopsis thaliana, Drosophila melanogaster, Danio rerio, Homo sapiens, Mus musculus, Oryza sativa, Solanum lycopersicum and Zea mays) are analyzed. At present, 308 919 isomiRs from 4706 mature miRNAs are collected into IsomiR Bank. In addition, IsomiR Bank provides target prediction and enrichment analysis to evaluate the effects of isomiRs on target selection.

Availability and implementation

IsomiR Bank is implemented in PHP/PERL + MySQL + R format and can be freely accessed at http://mcg.ustc.edu.cn/bsc/isomir/

Contacts

: aoli@ustc.edu.cn or qshi@ustc.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-02 +29527288,"A collection of annotated and harmonized human breast cancer transcriptome datasets, including immunologic classification.","The increased application of high-throughput approaches in translational research has expanded the number of publicly available data repositories. Gathering additional valuable information contained in the datasets represents a crucial opportunity in the biomedical field. To facilitate and stimulate utilization of these datasets, we have recently developed an interactive data browsing and visualization web application, the Gene Expression Browser (GXB). In this note, we describe a curated compendium of 13 public datasets on human breast cancer, representing a total of 2142 transcriptome profiles. We classified the samples according to different immune based classification systems and integrated this information into the datasets. Annotated and harmonized datasets were uploaded to GXB. Study samples were categorized in different groups based on their immunologic tumor response profiles, intrinsic molecular subtypes and multiple clinical parameters. Ranked gene lists were generated based on relevant group comparisons. In this data note, we demonstrate the utility of GXB to evaluate the expression of a gene of interest, find differential gene expression between groups and investigate potential associations between variables with a specific focus on immunologic classification in breast cancer. This interactive resource is publicly available online at: http://breastcancer.gxbsidra.org/dm3/geneBrowser/list.",2017-03-20 +33407076,MiBiOmics: an interactive web application for multi-omics data exploration and integration.,"

Background

Multi-omics experimental approaches are becoming common practice in biological and medical sciences underlining the need to design new integrative techniques and applications to enable the multi-scale characterization of biological systems. The integrative analysis of heterogeneous datasets generally allows to acquire additional insights and generate novel hypotheses about a given biological system. However, it can become challenging given the often-large size of omics datasets and the diversity of existing techniques. Moreover, visualization tools for interpretation are usually non-accessible to biologists without programming skills.

Results

Here, we present MiBiOmics, a web-based and standalone application that facilitates multi-omics data visualization, exploration, integration, and analysis by providing easy access to dedicated and interactive protocols. It implements classical ordination techniques and the inference of omics-based (multilayer) networks to mine complex biological systems, and identify robust biomarkers linked to specific contextual parameters or biological states.

Conclusions

MiBiOmics provides easy-access to exploratory ordination techniques and to a network-based approach for integrative multi-omics analyses through an intuitive and interactive interface. MiBiOmics is currently available as a Shiny app at https://shiny-bird.univ-nantes.fr/app/Mibiomics and as a standalone application at https://gitlab.univ-nantes.fr/combi-ls2n/mibiomics .",2021-01-06 +30185512,TaxAss: Leveraging a Custom Freshwater Database Achieves Fine-Scale Taxonomic Resolution. ,"Taxonomy assignment of freshwater microbial communities is limited by the minimally curated phylogenies used for large taxonomy databases. Here we introduce TaxAss, a taxonomy assignment workflow that classifies 16S rRNA gene amplicon data using two taxonomy reference databases: a large comprehensive database and a small ecosystem-specific database rigorously curated by scientists within a field. We applied TaxAss to five different freshwater data sets using the comprehensive SILVA database and the freshwater-specific FreshTrain database. TaxAss increased the percentage of the data set classified compared to using only SILVA, especially at fine-resolution family to species taxon levels, while across the freshwater test data sets classifications increased by as much as 11 to 40% of total reads. A similar increase in classifications was not observed in a control mouse gut data set, which was not expected to contain freshwater bacteria. TaxAss also maintained taxonomic richness compared to using only the FreshTrain across all taxon levels from phylum to species. Without TaxAss, most organisms not represented in the FreshTrain were unclassified, but at fine taxon levels, incorrect classifications became significant. We validated TaxAss using simulated amplicon data derived from full-length clone libraries and found that 96 to 99% of test sequences were correctly classified at fine resolution. TaxAss splits a data set's sequences into two groups based on their percent identity to reference sequences in the ecosystem-specific database. Sequences with high similarity to sequences in the ecosystem-specific database are classified using that database, and the others are classified using the comprehensive database. TaxAss is free and open source and is available at https://www.github.com/McMahonLab/TaxAssIMPORTANCE Microbial communities drive ecosystem processes, but microbial community composition analyses using 16S rRNA gene amplicon data sets are limited by the lack of fine-resolution taxonomy classifications. Coarse taxonomic groupings at the phylum, class, and order levels lump ecologically distinct organisms together. To avoid this, many researchers define operational taxonomic units (OTUs) based on clustered sequences, sequence variants, or unique sequences. These fine-resolution groupings are more ecologically relevant, but OTU definitions are data set dependent and cannot be compared between data sets. Microbial ecologists studying freshwater have curated a small, ecosystem-specific taxonomy database to provide consistent and up-to-date terminology. We created TaxAss, a workflow that leverages this database to assign taxonomy. We found that TaxAss improves fine-resolution taxonomic classifications (family, genus, and species). Fine taxonomic groupings are more ecologically relevant, so they provide an alternative to OTU-based analyses that is consistent and comparable between data sets.",2018-09-05 +25708775,EXPath: a database of comparative expression analysis inferring metabolic pathways for plants.,"

Background

In general, the expression of gene alters conditionally to catalyze a specific metabolic pathway. Microarray-based datasets have been massively produced to monitor gene expression levels in parallel with numerous experimental treatments. Although several studies facilitated the linkage of gene expression data and metabolic pathways, none of them are amassed for plants. Moreover, advanced analysis such as pathways enrichment or how genes express under different conditions is not rendered.

Description

Therefore, EXPath was developed to not only comprehensively congregate the public microarray expression data from over 1000 samples in biotic stress, abiotic stress, and hormone secretion but also allow the usage of this abundant resource for coexpression analysis and differentially expression genes (DEGs) identification, finally inferring the enriched KEGG pathways and gene ontology (GO) terms of three model plants: Arabidopsis thaliana, Oryza sativa, and Zea mays. Users can access the gene expression patterns of interest under various conditions via five main functions (Gene Search, Pathway Search, DEGs Search, Pathways/GO Enrichment, and Coexpression analysis) in EXPath, which are presented by a user-friendly interface and valuable for further research.

Conclusions

In conclusion, EXPath, freely available at http://expath.itps.ncku.edu.tw, is a database resource that collects and utilizes gene expression profiles derived from microarray platforms under various conditions to infer metabolic pathways for plants.",2015-01-21 +30423087,piMGM: incorporating multi-source priors in mixed graphical models for learning disease networks.,"

Motivation

Learning probabilistic graphs over mixed data is an important way to combine gene expression and clinical disease data. Leveraging the existing, yet imperfect, information in pathway databases for mixed graphical model (MGM) learning is an understudied problem with tremendous potential applications in systems medicine, the problems of which often involve high-dimensional data.

Results

We present a new method, piMGM, which can learn with accuracy the structure of probabilistic graphs over mixed data by appropriately incorporating priors from multiple experts with different degrees of reliability. We show that piMGM accurately scores the reliability of prior information from a given expert even at low sample sizes. The reliability scores can be used to determine active pathways in healthy and disease samples. We tested piMGM on both simulated and real data from TCGA, and we found that its performance is not affected by unreliable priors. We demonstrate the applicability of piMGM by successfully using prior information to identify pathway components that are important in breast cancer and improve cancer subtype classification.

Availability and implementation

http://www.benoslab.pitt.edu/manatakisECCB2018.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-09-01 +32770717,"Cutaneous, skin histopathological manifestations and relationship to COVID-19 infection patients.","COVID-19 diseases have been a nationwide pandemic condition. However, cutaneous, skin histopathological manifestations of COVID-19 infection are not well described. Our study aims are to present heterogeneous cutaneous, histopathological manifestations in COVID-19 patients, to investigate the possible relationship between cutaneous manifestations and histopathological features in COVID-19 infection. We performed a systemic review in PubMed database and Chinese medical journal search engines which were wangfang.data (http://www.wanfangdata.com.cn/), Science China (http://www.cnki.net/) until June 17th, 2020. Search terms ""COVID-19,"" ""SARS-Coronavirus-2"" and ""Coronavirus"" were used in combination with ""cutaneous,"" ""rash,"" ""skin,"" ""dermatology."" Seventy-five papers were included with confirmed COVID-19 infection. The most frequent cutaneous manifestation of COVID-19 present was erythema, nearly 38.4%. Trunk was the most affected location, presenting in 51.4% patients. Rash occurred before onset of other symptoms was in 5.3% patients. Seventy-seven patients were received treatments. Rash was dismissed in 49% patients, improved in 21.2% patients ranged from 0 to 17 days. The histopathological examination present in 39 patients. Skin is one of target organs affected by COVID-19 infection. Cutaneous manifestations should be paid more attention. It can help doctors diagnose COVID-19 infection in prodromal stage, understand progression, and determine prognosis of COVID-19 infection.",2020-09-07 +28454513,MYCbase: a database of functional sites and biochemical properties of Myc in both normal and cancer cells.,"

Background

Myc is an essential gene having multiple functions such as in cell growth, differentiation, apoptosis, genomic stability, angiogenesis, and disease biology. A large number of researchers dedicated to Myc biology are generating a substantial amount of data in normal and cancer cells/tissues including Burkitt's lymphoma and ovarian cancer.

Results

MYCbase ( http://bicresources.jcbose.ac.in/ssaha4/mycbase ) is a collection of experimentally supported functional sites in Myc that can influence the biological cellular processes. The functional sites were compiled according to their role which includes mutation, methylation pattern, post-translational modifications, protein-protein interactions (PPIs), and DNA interactions. In addition, biochemical properties of Myc are also compiled, which includes metabolism/pathway, protein abundance, and modulators of protein-protein interactions. The OMICS data related to Myc- like gene expression, proteomics expression using mass-spectrometry and miRNAs targeting Myc were also compiled in MYCbase. The mutation and pathway data from the MYCbase were analyzed to look at the patterns and distributions across different diseases. There were few proteins/genes found common in Myc-protein interactions and Myc-DNA binding, and these can play a significant role in transcriptional feedback loops.

Conclusion

In this report, we present a comprehensive integration of relevant information regarding Myc in the form of MYCbase. The data compiled in MYCbase provides a reliable data resource for functional sites at the residue level and biochemical properties of Myc in various cancers.",2017-04-28 +28821760,A database of breast oncogenic specific siRNAs.,"Breast cancer is a serious problem causing the death of women across the world. At present, one of the major challenges is to design drugs to target breast cancer specific gene(s). RNA interference (RNAi) is an important technique for targeted gene silencing that may lead to promising novel therapeutic strategies for breast cancer. Therefore, identification of such molecules having high oncogene specificity is the need of the hour. Here, we have developed a database named as Breast Oncogenic Specific siRNAs (BOSS, http://bioinformatics.cimap.res.in/sharma/boss/ ) on the basis of the current research status on siRNA-mediated repression of oncogenes in different breast cancer cell lines. BOSS is a resource of experimentally validated breast oncogenic siRNAs, collected from research articles and patents published yet. The present database contains information on 865 breast oncogenic siRNA entries. Each entry provides comprehensive information of an siRNA that includes its name, sequence, target gene, type of cells, and inhibition value, etc. Additionally, some useful tools like siRNAMAP and BOSS BLAST were also developed and linked with the database. siRNAMAP can be used for the selection of best siRNA against a target gene while BOSS BLAST tool helps to locate the siRNA sequences in deferent oncogenes.",2017-08-18 +25398898,CMPD: cancer mutant proteome database.,"Whole-exome sequencing, which centres on the protein coding regions of disease/cancer associated genes, represents the most cost-effective method to-date for deciphering the association between genetic alterations and diseases. Large-scale whole exome/genome sequencing projects have been launched by various institutions, such as NCI, Broad Institute and TCGA, to provide a comprehensive catalogue of coding variants in diverse tissue samples and cell lines. Further functional and clinical interrogation of these sequence variations must rely on extensive cross-platforms integration of sequencing information and a proteome database that explicitly and comprehensively archives the corresponding mutated peptide sequences. While such data resource is a critical for the mass spectrometry-based proteomic analysis of exomic variants, no database is currently available for the collection of mutant protein sequences that correspond to recent large-scale genomic data. To address this issue and serve as bridge to integrate genomic and proteomics datasets, CMPD (http://cgbc.cgu.edu.tw/cmpd) collected over 2 millions genetic alterations, which not only facilitates the confirmation and examination of potential cancer biomarkers but also provides an invaluable resource for translational medicine research and opportunities to identify mutated proteins encoded by mutated genes.",2014-11-14 +33040355,"Superposition of COVID-19 waves, anticipating a sustained wave, and lessons for the future.","The 2019 coronavirus (COVID-19), also known as SARS-CoV-2, is highly pathogenic and virulent, and it spreads very quickly through human-to-human contact. In response to the growing number of cases, governments across the spectrum of affected countries have adopted different strategies in implementing control measures, in a hope to reduce the number of new cases. However, 5 months after the first confirmed case, countries like the United States of America (US) seems to be heading towards a trajectory that indicates a health care crisis. This is in stark contrast to the downward trajectory in Europe, China, and elsewhere in Asia, where the number of new cases has seen a decline ahead of an anticipated second wave. A data-driven approach reveals three key strategies in tackling COVID-19. Our work here has definitively evaluated these strategies and serves as a warning to the US, and more importantly, a guide for tackling future pandemics. Also see the video abstract here https://youtu.be/gPkCi2_7tWo.",2020-11-16 +32658296,The Gut Microbiome and Xenobiotics: Identifying Knowledge Gaps.,"There is an increasing awareness that the gut microbiome plays a critical role in human health and disease, but mechanistic insights are often lacking. In June 2018, the Health and Environmental Sciences Institute (HESI) held a workshop, ""The Gut Microbiome: Markers of Human Health, Drug Efficacy and Xenobiotic Toxicity"" (https://hesiglobal.org/event/the-gut-microbiome-workshop) to identify data gaps in determining how gut microbiome alterations may affect human health. Speakers and stakeholders from academia, government, and industry addressed multiple topics including the current science on the gut microbiome, endogenous and exogenous metabolites, biomarkers, and model systems. The workshop presentations and breakout group discussions formed the basis for identifying data gaps and research needs. Two critical issues that emerged were defining the microbial composition and function related to health and developing standards for models, methods and analysis in order to increase the ability to compare and replicate studies. A series of key recommendations were formulated to focus efforts to further understand host-microbiome interactions and the consequences of exposure to xenobiotics as well as identifying biomarkers of microbiome-associated disease and toxicity.",2020-07-01 +30707359,EvoPPI 1.0: a Web Platform for Within- and Between-Species Multiple Interactome Comparisons and Application to Nine PolyQ Proteins Determining Neurodegenerative Diseases.,"Protein-protein interaction (PPI) data is essential to elucidate the complex molecular relationships in living systems, and thus understand the biological functions at cellular and systems levels. The complete map of PPIs that can occur in a living organism is called the interactome. For animals, PPI data is stored in multiple databases (e.g., BioGRID, CCSB, DroID, FlyBase, HIPPIE, HitPredict, HomoMINT, INstruct, Interactome3D, mentha, MINT, and PINA2) with different formats. This makes PPI comparisons difficult to perform, especially between species, since orthologous proteins may have different names. Moreover, there is only a partial overlap between databases, even when considering a single species. The EvoPPI ( http://evoppi.i3s.up.pt ) web application presented in this paper allows comparison of data from the different databases at the species level, or between species using a BLAST approach. We show its usefulness by performing a comparative study of the interactome of the nine polyglutamine (polyQ) disease proteins, namely androgen receptor (AR), atrophin-1 (ATN1), ataxin 1 (ATXN1), ataxin 2 (ATXN2), ataxin 3 (ATXN3), ataxin 7 (ATXN7), calcium voltage-gated channel subunit alpha1 A (CACNA1A), Huntingtin (HTT), and TATA-binding protein (TBP). Here we show that none of the human interactors of these proteins is common to all nine interactomes. Only 15 proteins are common to at least 4 of these polyQ disease proteins, and 40% of these are involved in ubiquitin protein ligase-binding function. The results obtained in this study suggest that polyQ disease proteins are involved in different functional networks. Comparisons with Mus musculus PPIs are also made for AR and TBP, using EvoPPI BLAST search approach (a unique feature of EvoPPI), with the goal of understanding why there is a significant excess of common interactors for these proteins in humans.",2019-02-01 +32282889,Pooled variable scaling for cluster analysis.,"

Motivation

Many popular clustering methods are not scale-invariant because they are based on Euclidean distances. Even methods using scale-invariant distances, such as the Mahalanobis distance, lose their scale invariance when combined with regularization and/or variable selection. Therefore, the results from these methods are very sensitive to the measurement units of the clustering variables. A simple way to achieve scale invariance is to scale the variables before clustering. However, scaling variables is a very delicate issue in cluster analysis: A bad choice of scaling can adversely affect the clustering results. On the other hand, reporting clustering results that depend on measurement units is not satisfactory. Hence, a safe and efficient scaling procedure is needed for applications in bioinformatics and medical sciences research.

Results

We propose a new approach for scaling prior to cluster analysis based on the concept of pooled variance. Unlike available scaling procedures, such as the SD and the range, our proposed scale avoids dampening the beneficial effect of informative clustering variables. We confirm through an extensive simulation study and applications to well-known real-data examples that the proposed scaling method is safe and generally useful. Finally, we use our approach to cluster a high-dimensional genomic dataset consisting of gene expression data for several specimens of breast cancer cells tissue obtained from human patients.

Availability and implementation

An R-implementation of the algorithms presented is available at https://wis.kuleuven.be/statdatascience/robust/software.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +33133992,The Vulcan Version 3.0 High-Resolution Fossil Fuel CO2 Emissions for the United States.,"Estimates of high-resolution greenhouse gas (GHG) emissions have become a critical component of climate change research and an aid to decision makers considering GHG mitigation opportunities. The ""Vulcan Project"" is an effort to estimate bottom-up carbon dioxide emissions from fossil fuel combustion and cement production (FFCO2) for the U.S. landscape at space and time scales that satisfy both scientific and policy needs. Here, we report on the Vulcan version 3.0 which quantifies emissions at a resolution of 1 km2/hr for the 2010-2015 time period. We estimate 2011 FFCO2 emissions of 1,589.9 TgC with a 95% confidence interval of 1,367/1,853 TgC (-14.0%/+16.6%), implying a one-sigma uncertainty of ~ ±8%. Per capita emissions are larger in states dominated by electricity production and industrial activity and smaller where onroad and building emissions dominate. The U.S. FFCO2 emissions center of mass (CoM) is located in the state of Missouri with mean seasonality that moves on a near-elliptical NE/SW path. Comparison to ODIAC, a global gridded FFCO2 emissions estimate, shows large total emissions differences (100.4 TgC for year 2011), a spatial correlation of 0.68 (R2), and a mean absolute relative difference at the 1 km2 scale of 104.3%. The Vulcan data product offers a high-resolution estimate of FFCO2 emissions in every U.S. city, obviating costly development of self-reported urban inventories. The Vulcan v3.0 annual gridded emissions data product can be downloaded from the Oak Ridge National Laboratory Distributed Active Archive Center (Gurney, Liang, et al., 2019, https://doi.org/10.3334/ORNLDAAC/1741).",2020-10-05 +29321535,Multi-label Deep Learning for Gene Function Annotation in Cancer Pathways.,"The war on cancer is progressing globally but slowly as researchers around the world continue to seek and discover more innovative and effective ways of curing this catastrophic disease. Organizing biological information, representing it, and making it accessible, or biocuration, is an important aspect of biomedical research and discovery. However, because maintaining sophisticated biocuration is highly resource dependent, it continues to lag behind the continually being generated biomedical data. Another critical aspect of cancer research, pathway analysis, has proven to be an efficient method for gaining insight into the underlying biology associated with cancer. We propose a deep-learning-based model, Stacked Denoising Autoencoder Multi-Label Learning (SdaMLL), for facilitating gene multi-function discovery and pathway completion. SdaMLL can capture intermediate representations robust to partial corruption of the input pattern and generate low-dimensional codes superior to conditional dimension reduction tools. Experimental results indicate that SdaMLL outperforms existing classical multi-label algorithms. Moreover, we found some gene functions, such as Fused in Sarcoma (FUS, which may be part of transcriptional misregulation in cancer) and p27 (which we expect will become a member viral carcinogenesis), that can be used to complete the related pathways. We provide a visual tool ( https://www.keaml.cn/gpvisual ) to view the new gene functions in cancer pathways.",2018-01-10 +32892270,Defining and Predicting Early Recurrence after Resection for Gallbladder Cancer.,"

Background

The optimal time interval to define early recurrence (ER) among patients who underwent resection of gallbladder cancer (GBC) is not well defined. We sought to develop and validate a novel GBC recurrence risk (GBRR) score to predict ER among patients undergoing resection for GBC.

Patients and methods

Patients who underwent curative-intent resection for GBC between 2000 and 2018 were identified from the US Extrahepatic Biliary Malignancy Consortium database. A minimum p value approach in the log-rank test was used to define the optimal cutoff for ER. A risk stratification model was developed to predict ER based on relevant clinicopathological factors and was externally validated.

Results

Among 309 patients, 103 patients (33.3%) had a recurrence at a median follow-up period of 15.1 months. The optimal cutoff for ER was defined at 12 months (p = 3.04 × 10-18). On multivariable analysis, T3/T4 disease (HR: 2.80; 95% CI 1.58-5.11) and poor tumor differentiation (HR: 1.91; 95% CI 1.11-3.25) were associated with greater hazards of ER. The GBRR score was developed using β-coefficients of variables in the final model, and patients were classified into three distinct groups relative to the risk for ER (12-month RFS; low risk: 88.4%, intermediate risk: 77.9%, high risk: 37.0%, p < 0.001). The external validation demonstrated good model generalizability with good calibration (n = 102: 12-month RFS; low risk: 94.2%, intermediate risk: 59.8%, high risk: 42.0%, p < 0.001). The GBRR score is available online at https://ktsahara.shinyapps.io/GBC_earlyrec/ .

Conclusions

A novel online calculator was developed to help clinicians predict the probability of ER after curative-intent resection for GBC. The proposed web-based tool may help in the optimization of surveillance intervals and the counselling of patients about their prognosis.",2020-09-05 +34457037,PATH: An interactive web platform for analysis of time-course high-dimensional genomic data. ,"Discovering patterns in time-course genomic data can provide insights on the dynamics of biological systems in health and disease. Here, we present a Platform for Analysis of Time-course High-dimensional data (PATH) with applications in genomics research. This web application provides a user-friendly interface with interactive data visualisation, dimension reduction, pattern discovery, and feature selection based on the principal trend analysis (PTA). Furthermore, the web application enables interactive and integrative analysis of time-course high-dimensional data based on the Joint PTA. The utilities of PATH are demonstrated through simulated and real examples, and the comparison with classical time-course data analysis methods such as the functional principal component analysis. PATH is freely accessible at https://ouyanglab.shinyapps.io/PATH/.",2020-01-01 +30721533,"SiMPLOD, a Structure-Integrated Database of Collagen Lysyl Hydroxylase (LH/PLOD) Enzyme Variants.","PLOD genes encode for procollagen lysyl hydroxylase enzymes (LH/PLOD), a family of proteins essential for collagen biosynthesis. Several mutations affect these genes, causing severe disorders, such as Ehlers-Danlos and Bruck syndrome, as well a connective tissue disease with phenotype resembling osteogenesis imperfecta caused by lack of LH3 functions. The recently determined three-dimensional (3D) structures of the full-length human LH3/PLOD3 isoform, together with the structure of a fragment of a viral LH/PLOD homolog, are now allowing molecular mapping of the numerous disease-causing mutations, providing insights often suitable for the interpretation of the resulting disease phenotypes. However, the added value of molecular structure interpretation is affected by the limited accessibility of complex molecular data to scientific communities lacking direct expertise in structural biology. In this work, we present a Structurally-integrated database for Mutations of PLOD genes (SiMPLOD), a publicly-available manually-curated online database with an embedded molecular viewer interface for the visualization and interpretation of LH/PLOD mutations on available molecular models. Each SiMPLOD entry is accompanied by manual annotations extrapolated from literature references and comments about the localization of the amino acid variants on the molecular structure. Additional links to the appropriate online resources for clinically-relevant as well as biochemical data are also provided in a standardized format. The web application is available at http://fornerislab.unipv.it/SiMPLOD. © 2019 American Society for Bone and Mineral Research.",2019-03-12 +33367514,BiCoN: Network-constrained biclustering of patients and omics data. ,"Unsupervised learning approaches are frequently employed to stratify patients into clinically relevant subgroups and to identify biomarkers such as disease-associated genes. However, clustering and biclustering techniques are oblivious to the functional relationship of genes and are thus not ideally suited to pinpoint molecular mechanisms along with patient subgroups. We developed the network-constrained biclustering approach BiCoN (Biclustering Constrained by Networks) which (i) restricts biclusters to functionally related genes connected in molecular interaction networks and (ii) maximizes the difference in gene expression between two subgroups of patients. This allows BiCoN to simultaneously pinpoint molecular mechanisms responsible for the patient grouping. Network-constrained clustering of genes makes BiCoN more robust to noise and batch effects than typical clustering and biclustering methods. BiCoN can faithfully reproduce known disease subtypes as well as novel, clinically relevant patient subgroups, as we could demonstrate using breast and lung cancer datasets. In summary, BiCoN is a novel systems medicine tool that combines several heuristic optimization strategies for robust disease mechanism extraction. BiCoN is well-documented and freely available as a python package or a web interface. PyPI package: https://pypi.org/project/bicon. https://exbio.wzw.tum.de/bicon. Supplementary data are available at Bioinformatics online.",2020-12-26 +26537179,BRANE Cut: biologically-related a priori network enhancement with graph cuts for gene regulatory network inference.,"

Background

Inferring gene networks from high-throughput data constitutes an important step in the discovery of relevant regulatory relationships in organism cells. Despite the large number of available Gene Regulatory Network inference methods, the problem remains challenging: the underdetermination in the space of possible solutions requires additional constraints that incorporate a priori information on gene interactions.

Methods

Weighting all possible pairwise gene relationships by a probability of edge presence, we formulate the regulatory network inference as a discrete variational problem on graphs. We enforce biologically plausible coupling between groups and types of genes by minimizing an edge labeling functional coding for a priori structures. The optimization is carried out with Graph cuts, an approach popular in image processing and computer vision. We compare the inferred regulatory networks to results achieved by the mutual-information-based Context Likelihood of Relatedness (CLR) method and by the state-of-the-art GENIE3, winner of the DREAM4 multifactorial challenge.

Results

Our BRANE Cut approach infers more accurately the five DREAM4 in silico networks (with improvements from 6% to 11%). On a real Escherichia coli compendium, an improvement of 11.8% compared to CLR and 3% compared to GENIE3 is obtained in terms of Area Under Precision-Recall curve. Up to 48 additional verified interactions are obtained over GENIE3 for a given precision. On this dataset involving 4345 genes, our method achieves a performance similar to that of GENIE3, while being more than seven times faster. The BRANE Cut code is available at: http://www-syscom.univ-mlv.fr/~pirayre/Codes-GRN-BRANE-cut.html.

Conclusions

BRANE Cut is a weighted graph thresholding method. Using biologically sound penalties and data-driven parameters, it improves three state-of-the art GRN inference methods. It is applicable as a generic network inference post-processing, due to its computational efficiency.",2015-11-04 +30465539,[Database resources of the reference genome and genetic variation maps for the Chinese population].,"With the implementation of the international human genome project and 1000 genome project, hundreds of Chinese individual genome sequences have been published. Establishing a high-precision Chinese population reference genome and identifying the unique genome variations are fundamental for future precision medicine research in China. To further meet the needs of scientific management and deep mining on the rapidly growing Chinese genomic data, Beijing Institute of Genomics, Chinese Academy of Sciences, has developed a Virtual Chinese Genome Database (VCGDB, http://bigd.big.ac.cn/vcg/) and Genome Variation Map (GVM, http://bigd.big.ac.cn/gvm/) based on the public whole genome sequencing data, which provides the worldwide services of data retrieval, sharing, downloading and online analysis. This paper presents the brief introduction of characteristics and functions of the two databases, as well as their future development and application prospects, aiming to provide useful information for the promotion and development of the reference genome and genome variation map database in China.",2018-11-01 +32127576,HDMAC: A Web-Based Interactive Program for High-Dimensional Analysis of Molecular Alterations in Cancer.,"Recent advances in high-throughput genomic technologies have nurtured a growing demand for statistical tools to facilitate identification of molecular changes as potential prognostic biomarkers or drugable targets for personalized precision medicine. In this study, we developed a web-based interactive and user-friendly platform for high-dimensional analysis of molecular alterations in cancer (HDMAC) (https://ripsung26.shinyapps.io/rshiny/). On HDMAC, several penalized regression models that are suitable for high-dimensional data analysis, Ridge, Lasso and adaptive Lasso, are offered, with Cox regression for survival and logistic regression for binary outcomes. Choice of a first-step screening is provided to address the multiple-comparison issue that often arises with large-volume genomic data. Hazard ratio or estimated coefficient is provided with each selected gene so that a multivariate regression model may be built based on the genes selected. Cross validation is provided as the method to estimate the prediction power of each regression model. In addition, R codes are also provided to facilitate download of whole sets of molecular variables from TCGA. In this study, illustration of the use of HDMAC was made through a set of data on gene mutations and a set on mRNA expression from ovarian cancer patients and a set on mRNA expression from bladder cancer patient. From the analysis of each set of data, a list of candidate genes was obtained that might be associated with mutations or abnormal expression of genes in ovarian and bladder cancers. HDMAC offers a solution for rigorous and validation analysis of high-dimensional genomic data.",2020-03-03 +24214965,The ChEMBL bioactivity database: an update.,"ChEMBL is an open large-scale bioactivity database (https://www.ebi.ac.uk/chembl), previously described in the 2012 Nucleic Acids Research Database Issue. Since then, a variety of new data sources and improvements in functionality have contributed to the growth and utility of the resource. In particular, more comprehensive tracking of compounds from research stages through clinical development to market is provided through the inclusion of data from United States Adopted Name applications; a new richer data model for representing drug targets has been developed; and a number of methods have been put in place to allow users to more easily identify reliable data. Finally, access to ChEMBL is now available via a new Resource Description Framework format, in addition to the web-based interface, data downloads and web services.",2013-11-07 +25392409,"GEM2Net: from gene expression modeling to -omics networks, a new CATdb module to investigate Arabidopsis thaliana genes involved in stress response.","CATdb (http://urgv.evry.inra.fr/CATdb) is a database providing a public access to a large collection of transcriptomic data, mainly for Arabidopsis but also for other plants. This resource has the rare advantage to contain several thousands of microarray experiments obtained with the same technical protocol and analyzed by the same statistical pipelines. In this paper, we present GEM2Net, a new module of CATdb that takes advantage of this homogeneous dataset to mine co-expression units and decipher Arabidopsis gene functions. GEM2Net explores 387 stress conditions organized into 18 biotic and abiotic stress categories. For each one, a model-based clustering is applied on expression differences to identify clusters of co-expressed genes. To characterize functions associated with these clusters, various resources are analyzed and integrated: Gene Ontology, subcellular localization of proteins, Hormone Families, Transcription Factor Families and a refined stress-related gene list associated to publications. Exploiting protein-protein interactions and transcription factors-targets interactions enables to display gene networks. GEM2Net presents the analysis of the 18 stress categories, in which 17,264 genes are involved and organized within 681 co-expression clusters. The meta-data analyses were stored and organized to compose a dynamic Web resource.",2014-11-11 +30500875,PPaxe: easy extraction of protein occurrence and interactions from the scientific literature.,"

Motivation

Protein-protein interactions (PPIs) are very important to build models for understanding many biological processes. Although several databases hold many of these interactions, exploring them, selecting those relevant for a given subject and contextualizing them can be a difficult task for researchers. Extracting PPIs directly from the scientific literature can be very helpful for providing such context, as the sentences describing these interactions may give insights to researchers in helpful ways.

Results

We have developed PPaxe, a python module and a web application that allows users to extract PPIs and protein occurrence from a given set of PubMed and PubMedCentral articles. It presents the results of the analysis in different ways to help researchers export, filter and analyze the results easily.

Availability and implementation

PPaxe web demo is freely available at https://compgen.bio.ub.edu/PPaxe. All the software can be downloaded from https://compgen.bio.ub.edu/PPaxe/download, including a command-line version and docker containers for an easy installation.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +33023848,Preoperative hepatic artery embolization before distal pancreatectomy plus celiac axis resection does not improve surgical results: A Spanish multicentre study.,"

Background

Distal pancreatectomy with celiac axis resection (DP-CAR) is a surgical procedure with high morbidity and mortality performed in patients with locally advanced pancreatic cancer. Preoperative embolization of hepatic artery (PHAE) has been postulated as a technical option to increase resection rate.

Objective

comparison of morbidity and mortality at 90 days, operative time, hospital stay and survival between patients that performed DP-CAR with and without PHAE.

Methods

Observational retrospective multicentre study.

Inclusion criteria

patient operated in Spanish centers with DP-CAR for pancreatic cancer from April 2004 until 23 June 2018. Preoperative (PHAE, neodjuvant treatment), intraoperative (operative time and blood loss) and postoperative data (morbidity, hospital stay, R0 and survival) were studied. Complications were measured with Clavien classification at 90 days. Specific pancreatic complications were measured using ISGPS classifications. Data were analyzed using R version 3.1.3 (http://www.r-project.org). Level of significance was set at 0.05.

Results

41 patients were studied. 26 patients were not embolized (NO-PHAE group) and 15 patients received PHAE. Preoperative BMI and percentage of neoadjuvant chemotherapy were the only preoperative variables different between both groups. The operative time in the PHAE group was shorter (343 min) than in the non-PHAE group (411 min) (p < 0.06). Major morbidity (Clavien > IIIa) and mortality at 90 days were higher in the PHAE group than in the non-PHAE group (60% vs 23% and 26.6% vs 11.6% respectively) (p < 0.004). No statistical difference in overall survival was observed between both groups (p = 0.14).

Conclusion

In our study PHAE is not related with less postoperative morbidity. Even more, major morbidity (Clavien III-IV) and mortality was higher in PHAE group.",2020-10-03 +30852829,Designing an In Silico Strategy to Select Tissue-Leakage Biomarkers Using the Galaxy Framework.,"Knowledge-based approaches using large-scale biological (""omics"") data are a powerful way to identify mechanistic biomarkers, provided that scientists have access to computational solutions even when they have little programming experience or bioinformatics support. To achieve this goal, we designed a set of tools under the Galaxy framework to allow biologists to define their own strategy for reproducible biomarker selection. These tools rely on retrieving experimental data from public databases, and applying successive filters derived from information relating to disease pathophysiology. A step-by-step protocol linking these tools was implemented to select tissue-leakage biomarker candidates of myocardial infarction. A list of 24 candidates suitable for experimental assessment by MS-based proteomics is proposed. These tools have been made publicly available at http://www.proteore.org , allowing researchers to reuse them in their quest for biomarker discovery.",2019-01-01 +30371818,piRBase: a comprehensive database of piRNA sequences.,"PIWI-interacting RNAs are a class of small RNAs that is most abundantly expressed in animal germline. Substantial research is going on to reveal the functions of piRNAs in the epigenetic and post-transcriptional regulation of transposons and genes. To collect and annotate these data, we developed piRBase, a database assisting piRNA functional study. Since its launch in 2014, piRBase has integrated 264 data sets from 21 organisms, and the number of collected piRNAs has reached 173 million. The latest piRBase release (v2.0, 2018) was more focused on the comprehensive annotation of piRNA sequences, as well as the increasing number of piRNAs. In addition, piRBase release v2.0 also contained the potential information of piRNA targets and disease related piRNA. All datasets in piRBase is free to access, and available for browse, search and bulk downloads at http://www.regulatoryrna.org/database/piRNA/.",2019-01-01 +30455323,Medicare's New Prospective Payment System on Facility Provision of Peritoneal Dialysis.,"

Background and objectives

Peritoneal dialysis is a self-administered, home-based treatment for ESKD associated with equivalent mortality, higher quality of life, and lower costs compared with hemodialysis. In 2011, Medicare implemented a comprehensive prospective payment system that makes a single payment for all dialysis, medication, and ancillary services. We examined whether the prospective payment system increased dialysis facility provision of peritoneal dialysis services and whether changes in peritoneal dialysis provision were more common among dialysis facilities that are chain affiliated, located in nonurban areas, and in regions with high dialysis market competition.

Design, setting, participants, & measurements

We conducted a longitudinal retrospective cohort study of n=6433 United States nonfederal dialysis facilities before (2006-2010) and after (2011-2013) the prospective payment system using data from the US Renal Data System, Medicare, and Area Health Resource Files. The outcomes of interest were a dichotomous indicator of peritoneal dialysis service availability and a discrete count variable of dialysis facility peritoneal dialysis program size defined as the annual number of patients on peritoneal dialysis in a facility. We used general estimating equation models to examine changes in peritoneal dialysis service offerings and peritoneal dialysis program size by a pre- versus post-prospective payment system effect and whether changes differed by chain affiliation, urban location, facility size, or market competition, adjusting for 1-year lagged facility-, patient with ESKD-, and region-level demographic characteristics.

Results

We found a modest increase in observed facility provision of peritoneal dialysis and peritoneal dialysis program size after the prospective payment system (36% and 5.7 patients in 2006 to 42% and 6.9 patients in 2013, respectively). There was a positive association of the prospective payment system with peritoneal dialysis provision (odds ratio, 1.20; 95% confidence interval, 1.13 to 1.18) and PD program size (incidence rate ratio, 1.27; 95% confidence interval, 1.22 to 1.33). Post-prospective payment system change in peritoneal dialysis provision was greater among nonurban (P<0.001), chain-affiliated (P=0.002), and larger-sized facilities (P<0.001), and there were higher rates of peritoneal dialysis program size growth in nonurban facilities (P<0.001).

Conclusions

Medicare's 2011 prospective payment system was associated with more facilities' availability of peritoneal dialysis and modest growth in facility peritoneal dialysis program size.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2018_11_19_CJASNPodcast_18_12_.mp3.",2018-11-19 +33064576,"Triclocarban, Triclosan, Bromochlorophene, Chlorophene, and Climbazole Effects on Nuclear Receptors: An in Silico and in Vitro Study.","

Background

Endocrine-disrupting chemicals can interfere with hormonal homeostasis and have adverse effects for both humans and the environment. Their identification is increasingly difficult due to lack of adequate toxicological tests. This difficulty is particularly problematic for cosmetic ingredients, because in vivo testing is now banned completely in the European Union.

Objectives

The aim was to identify candidate preservatives as endocrine disruptors by in silico methods and to confirm endocrine receptors' activities through nuclear receptors in vitro.

Methods

We screened preservatives listed in Annex V in the European Union Regulation on cosmetic products to predict their binding to nuclear receptors using the Endocrine Disruptome and VirtualToxLab™ version 5.8 in silico tools. Five candidate preservatives were further evaluated for androgen receptor (AR), estrogen receptor (ERα), glucocorticoid receptor (GR), and thyroid receptor (TR) agonist and antagonist activities in cell-based luciferase reporter assays in vitro in AR-EcoScreen, hERα-HeLa-9903, MDA-kb2, and GH3.TRE-Luc cell lines. Additionally, assays to test for false positives were used (nonspecific luciferase gene induction and luciferase inhibition).

Results

Triclocarban had agonist activity on AR and ERα at 1μM and antagonist activity on GR at 5μM and TR at 1μM. Triclosan showed antagonist effects on AR, ERα, GR at 10μM and TR at 5μM, and bromochlorophene at 1μM (AR and TR) and at 10μM (ERα and GR). AR antagonist activity of chlorophene was observed [inhibitory concentration at 50% (IC50) IC50=2.4μM], as for its substantial ERα agonist at >5μM and TR antagonist activity at 10μM. Climbazole showed AR antagonist (IC50=13.6μM), ERα agonist at >10μM, and TR antagonist activity at 10μM.

Discussion

These data support the concerns of regulatory authorities about the endocrine-disrupting potential of preservatives. These data also define the need to further determine their effects on the endocrine system and the need to reassess the risks they pose to human health and the environment. https://doi.org/10.1289/EHP6596.",2020-10-16 +30833424,External Evaluation of Population Pharmacokinetic Models of Vancomycin in Large Cohorts of Intensive Care Unit Patients. ,"Dosing of vancomycin is often guided by therapeutic drug monitoring and population pharmacokinetic models in the intensive care unit (ICU). The validity of these models is crucial, as ICU patients have marked pharmacokinetic variability. Therefore, we set out to evaluate the predictive performance of published population pharmacokinetic models of vancomycin in ICU patients. The PubMed database was used to search for population pharmacokinetic models of vancomycin in adult ICU patients. The identified models were evaluated in two independent data sets which were collected from two large hospitals in the Netherlands (Amsterdam UMC, Location VUmc, and OLVG Oost). We also tested a one-compartment model with fixed values for clearance and volume of distribution, in which a clinical standard dosage regimen (SDR) was mimicked to assess its predictive performance. Prediction error was calculated to assess the predictive performance of the models. Six models plus the SDR model were evaluated. The model of Roberts et al. (J. A. Roberts, F. S. Taccone, A. A. Udy, J.-L. Vincent, F. Jacobs, and J. Lipman, Antimicrob Agents Chemother 55:2704-2709, 2011, https://doi.org/10.1128/AAC.01708-10) performed satisfactorily, with mean and median values of prediction error of 5.1% and -7.5%, respectively, for Amsterdam UMC, Location VUmc, patients, and -12.6% and -17.2% respectively, for OLVG Oost patients. The other models, including the SDR model, yielded high mean values (-49.7% to 87.7%) and median values (-56.1% to 66.1%) for both populations. In conclusion, only the model of Roberts et al. was able to validly predict the concentrations of vancomycin for our data, whereas other models and standard dosing were largely inadequate. Extensive evaluation should precede the adoption of any model in clinical practice for ICU patients.",2019-04-25 +,Maize-nutrient response information applied across Sub-Saharan Africa,"The profit potential for a given investment in fertilizer use can be estimated using representative crop nutrient response functions. Where response data is scarce, determination of representative response functions can be strengthened by using results from homologous crop growing conditions. Maize (Zea mays L.) nutrient response functions were selected from the Optimization of Fertilizer Recommendations in Africa (OFRA) database of 5500 georeferenced response functions determined from field research conducted in Sub-Saharan Africa. Three methods for defining inference domains for selection of response functions were compared. Use of the OFRA Inference Tool (OFRA-IT; http://agronomy.unl.edu/OFRA) resulted in greater specificity of maize N, P, and K response functions with higher R² values indicating superiority compared with using the Harvest Choice Agroecological Zones (HC-AEZ) and the recommendation domains of the Global Yield Gap Atlas project (GYGA-RD). The OFRA-IT queries three soil properties in addition to climate-related properties while the latter two options use climate properties only. The OFRA-IT was generally insensitive to changes in criteria ranges of 20–25% used in queries suggesting value in using wider criteria ranges compared with the default for information scarce crop nutrient response functions.",2017-03-01 +33822648,"Fluoride in Drinking Water, Diet, and Urine in Relation to Bone Mineral Density and Fracture Incidence in Postmenopausal Women.","

Background

Although randomized controlled trials (RCTs) have demonstrated that high fluoride increases bone mineral density (BMD) and skeletal fragility, observational studies of low-dose chronic exposure through drinking water (<1.5mg/L, the maximum recommended by the World Health Organization) have been inconclusive.

Objective

We assessed associations of fluoride in urine, and intake via diet and drinking water, with BMD and fracture incidence in postmenopausal women exposed to drinking water fluoride ≤1mg/L.

Methods

Data were from participants in the Swedish Mammography Cohort-Clinical, a population-based prospective cohort study. At baseline (2004-2009), fluoride exposure was assessed based on urine concentrations (n=4,306) and estimated dietary intake (including drinking water) (n=4,072), and BMD was measured using dual energy X-ray absorptiometry. Incident fractures were ascertained via register-linkage through 2017. Residential history was collected to identify women with long-term consistent drinking water exposures prior to baseline.

Results

At baseline, mean urine fluoride was 1.2mg/g creatinine (±1.9) and mean dietary intake was 2.2mg/d (±0.9), respectively. During follow-up, 850, 529, and 187 cases of any fractures, osteoporotic fractures, and hip fractures, respectively, were ascertained. Baseline BMD was slightly higher among women in the highest vs. lowest tertiles of exposure. Fluoride exposures were positively associated with incident hip fractures, with multivariable-adjusted hazard ratios of 1.50 (95% CI: 1.04, 2.17) and 1.59 (95% CI: 1.10, 2.30), for the highest vs. lowest tertiles of urine fluoride and dietary fluoride, respectively. Associations with other fractures were less pronounced for urine fluoride, and null for dietary fluoride. Restricting the analyses to women with consistent long-term drinking water exposures prior to baseline strengthened associations between fractures and urinary fluoride.

Discussion

In this cohort of postmenopausal women, the risk of fractures was increased in association with two separate indicators of fluoride exposure. Our findings are consistent with RCTs and suggest that high consumption of drinking water with a fluoride concentration of ∼1mg/L may increase both BMD and skeletal fragility in older women. https://doi.org/10.1289/EHP7404.",2021-04-06 +32834641,Automatic distinction between COVID-19 and common pneumonia using multi-scale convolutional neural network on chest CT scans.,"The COVID-19 pneumonia is a global threat since it emerged in early December 2019. Driven by the desire to develop a computer-aided system for the rapid diagnosis of COVID-19 to assist radiologists and clinicians to combat with this pandemic, we retrospectively collected 206 patients with positive reverse-transcription polymerase chain reaction (RT-PCR) for COVID-19 and their 416 chest computed tomography (CT) scans with abnormal findings from two hospitals, 412 non-COVID-19 pneumonia and their 412 chest CT scans with clear sign of pneumonia are also retrospectively selected from participating hospitals. Based on these CT scans, we design an artificial intelligence (AI) system that uses a multi-scale convolutional neural network (MSCNN) and evaluate its performance at both slice level and scan level. Experimental results show that the proposed AI has promising diagnostic performance in the detection of COVID-19 and differentiating it from other common pneumonia under limited number of training data, which has great potential to assist radiologists and physicians in performing a quick diagnosis and mitigate the heavy workload of them especially when the health system is overloaded. The data is publicly available for further research at https://data.mendeley.com/datasets/3y55vgckg6/1https://data.mendeley.com/datasets/3y55vgckg6/1.",2020-07-25 +31364711,GHOST: Recovering Historical Signal from Heterotachously Evolved Sequence Alignments.,"Molecular sequence data that have evolved under the influence of heterotachous evolutionary processes are known to mislead phylogenetic inference. We introduce the General Heterogeneous evolution On a Single Topology (GHOST) model of sequence evolution, implemented under a maximum-likelihood framework in the phylogenetic program IQ-TREE (http://www.iqtree.org). Simulations show that using the GHOST model, IQ-TREE can accurately recover the tree topology, branch lengths, and substitution model parameters from heterotachously evolved sequences. We investigate the performance of the GHOST model on empirical data by sampling phylogenomic alignments of varying lengths from a plastome alignment. We then carry out inference under the GHOST model on a phylogenomic data set composed of 248 genes from 16 taxa, where we find the GHOST model concurs with the currently accepted view, placing turtles as a sister lineage of archosaurs, in contrast to results obtained using traditional variable rates-across-sites models. Finally, we apply the model to a data set composed of a sodium channel gene of 11 fish taxa, finding that the GHOST model is able to elucidate a subtle component of the historical signal, linked to the previously established convergent evolution of the electric organ in two geographically distinct lineages of electric fish. We compare inference under the GHOST model to partitioning by codon position and show that, owing to the minimization of model constraints, the GHOST model offers unique biological insights when applied to empirical data.",2020-03-01 +32875540,Identification of miRNA-mRNA Network in Autism Spectrum Disorder Using a Bioinformatics Method.,"Autism spectrum disorder (ASD) includes a heterogeneous group of disorders with different contributing genetics and epigenetics factors. Aberrant expression of miRNAs has been detected in ASD children compared with normally developed children. Due to the heterogeneity of this disorder, there is no consensus on ASD-associated miRNAs; thus, it is necessary to develop a model for comprehensive assessment of the role of miRNAs in ASD. We interrogated the PubMed, Google Scholar, and Web of Science databases until the end of 2019 to identify ASD-associated miRNAs. In addition, mRNA-coding genes that contribute to the pathogenesis of ASD were downloaded from the SFARI GENE ( https://gene.sfari.org/ ). The obtained 201 miRNAs and 478 target mRNAs were imported into the Cytoscape software suite to construct a miRNA-mRNA network. A protein-protein interaction network was constructed for target mRNAs using the CluPedia program in Cytoscape. Using this approach, we detected five modules that were associated with neurexins and neuroligins, glutamatergic synapse, cell adhesion molecules, NOTCH, MECP2 and circadian clock pathways, L1CAM interactions, and neurotransmitter release cycle. Taken together, functional analysis of these genes led to determination of critical pathways related to CNS disorders. Thus, the suggested approach in the current study resulted in the identification of the most relevant pathways in the pathogenesis of ASD that can be used as biomarkers or therapeutic targets.",2020-09-02 +33426244,Experimental data on the adsorption of water by branches and leaves as affected by different the morphological characteristics of plants.,"We determined 116 globally important woody tree species, classified them based on the differences between plant life-forms, leaf textures and trichomes on leaves and measured the indices of some plant morphological traits in the Guizhou karstic regions of China. The water adsorbed on the upper surfaces of branches and leaves and the water adsorbed on the upper and lower surfaces of branches and leaves (WWu and WWul) of these species was measured. The ratios of the weight of adsorbed water on the upper surfaces of branches and leaves to the weight of branches and leaves (RWWu) and the ratios of the weight of adsorbed water on the upper and lower surfaces of branches and leaves to the weight of branches and leaves (RWWul) were calculated. The adsorption of water and morphological trait indices follow the approximately normal distributions. The weight of branches and leaves (weight), total leaf area (TLA) and mean leaf area (MLA) significantly impacted the adsorption of water by branches and leaves. The different rates of the adsorption of water for 116 tree species can explain the interspecific variation in rainfall interception. Interpretation of these data is provided in Effects of the morphological characteristics of plants on rainfall interception and kinetic energy[J]. Journal of Hydrology, 2020: 125807. https://doi.org/10.1016/j.jhydrol.2020.125807.",2020-12-23 +29912385,Metaxa2 Database Builder: enabling taxonomic identification from metagenomic or metabarcoding data using any genetic marker.,"

Motivation

Correct taxonomic identification of DNA sequences is central to studies of biodiversity using both shotgun metagenomic and metabarcoding approaches. However, no genetic marker gives sufficient performance across all the biological kingdoms, hampering studies of taxonomic diversity in many groups of organisms. This has led to the adoption of a range of genetic markers for DNA metabarcoding. While many taxonomic classification software tools can be re-trained on these genetic markers, they are often designed with assumptions that impair their utility on genes other than the SSU and LSU rRNA. Here, we present an update to Metaxa2 that enables the use of any genetic marker for taxonomic classification of metagenome and amplicon sequence data.

Results

We evaluated the Metaxa2 Database Builder on 11 commonly used barcoding regions and found that while there are wide differences in performance between different genetic markers, our software performs satisfactorily provided that the input taxonomy and sequence data are of high quality.

Availability and implementation

Freely available on the web as part of the Metaxa2 package at http://microbiology.se/software/metaxa2/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +31390943,D-lnc: a comprehensive database and analytical platform to dissect the modification of drugs on lncRNA expression.,"Long non-coding RNAs (lncRNAs) have been proven to be implicated in the pathogenesis of various diseases. Multiple studies have demonstrated that small molecule drugs can modify lncRNA expression, which suggests a promising therapy for human diseases. Here, we constructed a comprehensive query and analytical platform D-lnc to dissect the influence of drugs on lncRNA expression. Firstly, we manually curated the experimentally validated regulations of drugs on lncRNA expression and recorded 7,825 entries between 59 drugs and 7,538 lncRNAs across five species from nearly 1,000 published papers. Secondly, we comprehensively screened the Connectivity Map (cMap) and the Gene Expression Omnibus (GEO) databases to obtain the drug-perturbed gene expression profiles. Through probe re-annotation of microarray data, we identified 19,946 putative associations between 1,279 drugs and 129 lncRNAs in cMap and 36,210 entries between 115 drugs and 2,360 lncRNAs in GEO. Finally, we developed an online analytical platform to predict the potential acting drugs or modified lncRNAs based on user input lncRNA sequence or drug structure through computing the similarities of lncRNA sequences or drug structures. In a word, D-lnc provides a comprehensive platform to detect the modification of drugs on lncRNA expression, which would facilitate the development of lncRNA-targeted therapeutics. D-lnc is freely available at http://www.jianglab.cn/D-lnc/ .",2019-08-07 +34047248,RNA interference activity of single-stranded oligonucleotides linked between the passenger strand and the guide strand with an aryl phosphate linker.,"Recently, we demonstrated that asymmetrical 18 base-paired double-strand oligonucleotides comprised of alternately combined 2'-O-methyl RNA and DNA, termed MED-siRNAs, show high RNase resistance, efficient cleavage of target mRNA, and the subsequent reduction of target protein expression. The 5'-terminal phosphate group and the 3'-overhang of the guide strand were required to fully activate the RNAi activity of MED-siRNAs. Here, we evaluated MED-siRNAs modified with aryl phosphate groups at the 5'-end of the guide strand. The 5'-aryl phosphorylated MED-siRNAs showed highly efficient reduction of target protein expression comparable to 5'-phosphorylated MED-siRNAs. Moreover, 5'-aryl phosphorylated MED-siRNAs linked between the aryl phosphate group at the 5'-end of the guide strand and the hydroxyl group at the 3'-end of the passenger strand with alkyl amide linkers or peptides (e.g., DL-Ser-L-Ala-L-Tyr), resulted in single-stranded MED-siRNAs with a highly efficient cleavage activity of target mRNA with binding to Argonaute 2 via an RNA interference mechanism. These linker techniques could also be used to create siRNAs composed of naturally-occurring molecules such as amino acids. These findings suggest the possibility of using these single-stranded MED-siRNAs as siRNA reagents.Supplemental data for this article is available online at https://doi.org/10.1080/15257770.2021.1927077 .",2021-01-01 +33996146,"LoReTTA, a user-friendly tool for assembling viral genomes from PacBio sequence data.","Long-read, single-molecule DNA sequencing technologies have triggered a revolution in genomics by enabling the determination of large, reference-quality genomes in ways that overcome some of the limitations of short-read sequencing. However, the greater length and higher error rate of the reads generated on long-read platforms make the tools used for assembling short reads unsuitable for use in data assembly and motivate the development of new approaches. We present LoReTTA (Long Read Template-Targeted Assembler), a tool designed for performing de novo assembly of long reads generated from viral genomes on the PacBio platform. LoReTTA exploits a reference genome to guide the assembly process, an approach that has been successful with short reads. The tool was designed to deal with reads originating from viral genomes, which feature high genetic variability, possible multiple isoforms, and the dominant presence of additional organisms in clinical or environmental samples. LoReTTA was tested on a range of simulated and experimental datasets and outperformed established long-read assemblers in terms of assembly contiguity and accuracy. The software runs under the Linux operating system, is designed for easy adaptation to alternative systems, and features an automatic installation pipeline that takes care of the required dependencies. A command-line version and a user-friendly graphical interface version are available under a GPLv3 license at https://bioinformatics.cvr.ac.uk/software/ with the manual and a test dataset.",2021-01-01 +33528617,Application of a Proposed Multi-Positional Circumferential Arm Liposuction Method and Quantification of its Clinical Efficacy Evaluation.,"

Background

Upper arm liposuction mainly focuses on the posterolateral region, which may lead to a lack of harmony between the aspirated and unaspirated areas. In addition, the treatment effect of arm liposuction is often evaluated only by preoperative and postoperative photograph comparison and simple measurement; quantitative research on this topic is still lacking.

Methods

The multi-positional circumferential arm liposuction (MCAL) technique was proposed and applied to a total of 34 females in our hospital from 2017 to 2019. Three-dimensional data of 12 patients before the operation and after 2-3 months were collected and processed by 3D imaging, and the volume reduction rate was evaluated quantitatively.

Results

The MCAL method was successfully applied in the clinic, and its surgical effect was quantitatively studied. The mean follow-up time of 12 patients was (75.2 ±13.1) days, and the postoperative volume was significantly reduced. The postoperative volume of patients with type I, type II and type III decreased by (10.79 ±2.55)%, (17.25 ±3.02)% and (22.76 ±3.51)%, respectively.

Conclusion

Our new MCAL technique was successful, maximizing the esthetic results in upper limb contour refinements in the superficial fascial layer. The clinical efficacy of this proposed MCAL method was evaluated by CT and 3D digital technology, which provided further accuracy in demonstrating its effect on the shape of the arm.

Level of evidence iv

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors https://www.springer.com .",2021-02-02 +31624831,Community curation of bioinformatics software and data resources.,"The corpus of bioinformatics resources is huge and expanding rapidly, presenting life scientists with a growing challenge in selecting tools that fit the desired purpose. To address this, the European Infrastructure for Biological Information is supporting a systematic approach towards a comprehensive registry of tools and databases for all domains of bioinformatics, provided under a single portal (https://bio.tools). We describe here the practical means by which scientific communities, including individual developers and projects, through major service providers and research infrastructures, can describe their own bioinformatics resources and share these via bio.tools.",2020-09-01 +33559678,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines for Deep Brain Stimulations for Obsessive-Compulsive Disorder: Update of the 2014 Guidelines.,"

Background

In 2020, the Guidelines Task Force conducted another systematic review of the relevant literature on deep brain stimulation (DBS) for obsessive-compulsive disorder (OCD) to update the original 2014 guidelines to ensure timeliness and accuracy for clinical practice.

Objective

To conduct a systematic review of the literature and update the evidence-based guidelines on DBS for OCD.

Methods

The Guidelines Task Force conducted another systematic review of the relevant literature, using the same search terms and strategies as used to search PubMed and Embase for relevant literature. The updated search included studies published between 1966 and December 2019. The same inclusion/exclusion criteria as the original guideline were also applied. Abstracts were reviewed and relevant full-text articles were retrieved and graded. Of 864 articles, 10 were retrieved for full-text review and analysis. Recommendations were updated according to new evidence yielded by this update.

Results

Seven studies were included in the original guideline, reporting the use of bilateral DBS as more effective in improving OCD symptoms than sham treatment. An additional 10 studies were included in this update: 1 class II and 9 class III.

Conclusion

Based on the data published in the literature, the following recommendations can be made: (1) It is recommended that clinicians utilize bilateral subthalamic nucleus DBS over best medical management for the treatment of patients with medically refractory OCD (level I). (2) Clinicians may use bilateral nucleus accumbens or bed nucleus of stria terminalis DBS for the treatment of patients with medically refractory OCD (level II). There is insufficient evidence to make a recommendation for the identification of the most effective target.The full guidelines can be accessed at https://www.cns.org/guidelines/browse-guidelines-detail/deep-brain-stimulation-obsessive-compulsive-disord.",2021-03-01 +30582634,Crystal structures and biochemical analyses of intermediate cleavage peptidase: role of dynamics in enzymatic function.,"Intermediate cleavage peptidase (Icp55) processes a subset of mitochondrial matrix proteins by removing a bulky residue at their N termini, leaving behind smaller N-terminal residues (icp activity). This contributes towards the stability of the mitochondrial proteome. We report crystal structures of yeast Icp55 including one bound to the apstatin inhibitor. Apart from icp activity, the enzyme was found to exhibit Xaa-Pro aminopeptidase activity in vitro. Structural and biochemical data suggest that the enzyme exists in a rapid equilibrium between monomer and dimer. Furthermore, the dimer, and not the monomer, was found to be the active species with loop dynamics at the dimer interface playing an important role in activity. Based on the new evidence, we propose a model for binding and processing of cellular targets by Icp55. DATABASE: The atomic coordinates and structure factors for the structures of Icp55 (code 6A9T, 6A9U, 6A9V) have been deposited in the Protein Data Bank (PDB) (http://www.pdb.org/).",2019-01-09 +27284084,ProSAT+: visualizing sequence annotations on 3D structure.,"PRO: tein S: tructure A: nnotation T: ool-plus (ProSAT(+)) is a new web server for mapping protein sequence annotations onto a protein structure and visualizing them simultaneously with the structure. ProSAT(+) incorporates many of the features of the preceding ProSAT and ProSAT2 tools but also provides new options for the visualization and sharing of protein annotations. Data are extracted from the UniProt KnowledgeBase, the RCSB PDB and the PDBe SIFTS resource, and visualization is performed using JSmol. User-defined sequence annotations can be added directly to the URL, thus enabling visualization and easy data sharing. ProSAT(+) is available at http://prosat.h-its.org.",2016-06-09 +32103253,"Cooperative driver pathway discovery via fusion of multi-relational data of genes, miRNAs and pathways.","Discovering driver pathways is an essential step to uncover the molecular mechanism underlying cancer and to explore precise treatments for cancer patients. However, due to the difficulties of mapping genes to pathways and the limited knowledge about pathway interactions, most previous work focus on identifying individual pathways. In practice, two (or even more) pathways interplay and often cooperatively trigger cancer. In this study, we proposed a new approach called CDPathway to discover cooperative driver pathways. First, CDPathway introduces a driver impact quantification function to quantify the driver weight of each gene. CDPathway assumes that genes with larger weights contribute more to the occurrence of the target disease and identifies them as candidate driver genes. Next, it constructs a heterogeneous network composed of genes, miRNAs and pathways nodes based on the known intra(inter)-relations between them and assigns the quantified driver weights to gene-pathway and gene-miRNA relational edges. To transfer driver impacts of genes to pathway interaction pairs, CDPathway collaboratively factorizes the weighted adjacency matrices of the heterogeneous network to explore the latent relations between genes, miRNAs and pathways. After this, it reconstructs the pathway interaction network and identifies the pathway pairs with maximal interactive and driver weights as cooperative driver pathways. Experimental results on the breast, uterine corpus endometrial carcinoma and ovarian cancer data from The Cancer Genome Atlas show that CDPathway can effectively identify candidate driver genes [area under the receiver operating characteristic curve (AUROC) of $\geq $0.9] and reconstruct the pathway interaction network (AUROC of>0.9), and it uncovers much more known (potential) driver genes than other competitive methods. In addition, CDPathway identifies 150% more driver pathways and 60% more potential cooperative driver pathways than the competing methods. The code of CDPathway is available at http://mlda.swu.edu.cn/codes.php?name=CDPathway.",2021-03-01 +30119164,Methodology of a new inflammatory arthritis registry: TReasure,"Background/aim:The TReasure registry, created in 2017, is an observational multicenter cohort that includes inflammatory arthritis +patients. This article reviews the methodology and objectives of the TReasure registry established to collect data from rheumatoid +arthritis (RA) and spondyloarthritis (SpA) patients. Methodology:Fifteen rheumatology centers in Turkey will contribute data to the TReasure database. The actual proprietor of the +database is the Hacettepe Rheumatology Association (HRD) and Hacettepe Financial Enterprises. Pharmaceutical companies that +operate in Turkey (in alphabetical or er), Abbvie, Amgen, BMS, Celltrion Healthcare, Novartis, Pfizer, Roche, and UCB, support the +TReasure registry. TReasure is a web-based database to which users connect through a URL (https://www.trials-network.org/treasure) +with their unique identifier and passwords provided for data entry and access. TReasure records demographic and clinical features, +comorbidities, radiology and laboratory results, measures of disease activity, and treatment data. Discussion:TReasure will provide us with various types of data, such as a cross-sectional view of the current nationwide status of the +patients currently receiving these treatments, and retrospective data as much as allowed by the participating centers’ records. Finally, a +high-quality prospective dataset will be built over the ensuing years from patients with a new diagnosis of RA or SpA.",2018-08-16 +33665269,Dataset for measured viscosity of Polyalpha-Olefin- boron nitride nanofluids.,"Datasets of measured viscosity of Polyalpha-Olefin- boron nitride (PAO/hBN) nanofluids are reported. An AR-G2 rheometer (from TA Instruments) experimental setup is used for measuring the rheological property of PAO/hBN nanofluids, which is a combined motor and transducer (CMT) instrument. The test fluid sample size is approximately 1.5 ml and the tests were conducted over a temperature range of the tested fluids from - 20 °C to 70 °C by a water circulator chamber. The dataset includes measured viscosities as a function of the BN volumetric concentration (ϕ) of 0, 0.6 and 1%. Two sets of viscosity measurements are conducted insuring the thermal equilibrium conditions are reached for all experiments. In set (1), the viscosity is measured at intervals of 10 °C by fixing the temperature at each interval (at -20, -10, 0, 10, 20, 30, 40, 50, 60 and 70 °C), while the shear stress and shear rate are varied. In set (2), the temperature is varied from -20 °C to 70 °C at intervals of 0.5 °C, while the shear stress is fixed and the shear rate is varied accordingly. Set (1) is designed to verify whether the fluids are Newtonian or not and set (2) is designed to derive correlations for the viscosity as a function of temperature. Several characteristics data are recorded including rotational speed of the spindle (RPM), torque, viscosity (Pa- s), shear stress (Pa), shear strain rate (1/s) and temperature (°C). The reuse potential of the dataset includes calculating Reynolds number for further flow studies; heat transfer performance studies of nanofluids; lubrication and lubricants' development studies and characteristics of Newtonian and non-Newtonian fluids. The dataset reported here were used (but not published) in the article published by the author in [1] (https://doi.org/10.1016/j.csite.2020.100776).",2021-02-15 +32347764,Air Pollutant Exposure and Stove Use Assessment Methods for the Household Air Pollution Intervention Network (HAPIN) Trial.,"

Background

High quality personal exposure data is fundamental to understanding the health implications of household energy interventions, interpreting analyses across assigned study arms, and characterizing exposure-response relationships for household air pollution. This paper describes the exposure data collection for the Household Air Pollution Intervention Network (HAPIN), a multicountry randomized controlled trial of liquefied petroleum gas stoves and fuel among 3,200 households in India, Rwanda, Guatemala, and Peru.

Objectives

The primary objectives of the exposure assessment are to estimate the exposure contrast achieved following a clean fuel intervention and to provide data for analyses of exposure-response relationships across a range of personal exposures.

Methods

Exposure measurements are being conducted over the 3-y time frame of the field study. We are measuring fine particulate matter [PM < 2.5μm in aerodynamic diameter (PM2.5)] with the Enhanced Children's MicroPEM™ (RTI International), carbon monoxide (CO) with the USB-EL-CO (Lascar Electronics), and black carbon with the OT21 transmissometer (Magee Scientific) in pregnant women, adult women, and children <1 year of age, primarily via multiple 24-h personal assessments (three, six, and three measurements, respectively) over the course of the 18-month follow-up period using lightweight monitors. For children we are using an indirect measurement approach, combining data from area monitors and locator devices worn by the child. For a subsample (up to 10%) of the study population, we are doubling the frequency of measurements in order to estimate the accuracy of subject-specific typical exposure estimates. In addition, we are conducting ambient air monitoring to help characterize potential contributions of PM2.5 exposure from background concentration. Stove use monitors (Geocene) are being used to assess compliance with the intervention, given that stove stacking (use of traditional stoves in addition to the intervention gas stove) may occur.

Conclusions

The tools and approaches being used for HAPIN to estimate personal exposures build on previous efforts and take advantage of new technologies. In addition to providing key personal exposure data for this study, we hope the application and learnings from our exposure assessment will help inform future efforts to characterize exposure to household air pollution and for other contexts. https://doi.org/10.1289/EHP6422.",2020-04-29 +32978618,Deep learning based prediction of reversible HAT/HDAC-specific lysine acetylation.,"Protein lysine acetylation regulation is an important molecular mechanism for regulating cellular processes and plays critical physiological and pathological roles in cancers and diseases. Although massive acetylation sites have been identified through experimental identification and high-throughput proteomics techniques, their enzyme-specific regulation remains largely unknown. Here, we developed the deep learning-based protein lysine acetylation modification prediction (Deep-PLA) software for histone acetyltransferase (HAT)/histone deacetylase (HDAC)-specific acetylation prediction based on deep learning. Experimentally identified substrates and sites of several HATs and HDACs were curated from the literature to generate enzyme-specific data sets. We integrated various protein sequence features with deep neural network and optimized the hyperparameters with particle swarm optimization, which achieved satisfactory performance. Through comparisons based on cross-validations and testing data sets, the model outperformed previous studies. Meanwhile, we found that protein-protein interactions could enrich enzyme-specific acetylation regulatory relations and visualized this information in the Deep-PLA web server. Furthermore, a cross-cancer analysis of acetylation-associated mutations revealed that acetylation regulation was intensively disrupted by mutations in cancers and heavily implicated in the regulation of cancer signaling. These prediction and analysis results might provide helpful information to reveal the regulatory mechanism of protein acetylation in various biological processes to promote the research on prognosis and treatment of cancers. Therefore, the Deep-PLA predictor and protein acetylation interaction networks could provide helpful information for studying the regulation of protein acetylation. The web server of Deep-PLA could be accessed at http://deeppla.cancerbio.info.",2020-09-01 +31400611,Genetic Interaction-Based Biomarkers Identification for Drug Resistance and Sensitivity in Cancer Cells.,"Cancer cells generally harbor hundreds of alterations in the cancer genomes and act as crucial factors in the development and progression of cancer. Gene alterations in the cancer genome form genetic interactions, which affect the response of patients to drugs. We developed an algorithm that mines copy number alteration and whole-exome mutation profiles from The Cancer Genome Atlas (TCGA), as well as functional screen data generated to identify potential genetic interactions for specific cancer types. As a result, 4,529 synthetic viability (SV) interactions and 10,637 synthetic lethality (SL) interactions were detected. The pharmacogenomic datasets revealed that SV interactions induced drug resistance in cancer cells and that SL interactions mediated drug sensitivity in cancer cells. Deletions of HDAC1 and DVL1, both of which participate in the Notch signaling pathway, had an SV effect in cancer cells, and deletion of DVL1 induced resistance to HDAC1 inhibitors in cancer cells. In addition, patients with low expression of both HDAC1 and DVL1 had poor prognosis. Finally, by integrating current reported genetic interactions from other studies, the Cancer Genetic Interaction database (CGIdb) (http://www.medsysbio.org/CGIdb) was constructed, providing a convenient retrieval for genetic interactions in cancer.",2019-07-17 +32582664,"Automated Hypothesis Generation to Identify Signals Relevant in the Development of Mammalian Cell and Tissue Bioprocesses, With Validation in a Retinal Culture System.","We have developed an accessible software tool (receptoR) to predict potentially active signaling pathways in one or more cell type(s) of interest from publicly available transcriptome data. As proof-of-concept, we applied it to mouse photoreceptors, yielding the previously untested hypothesis that activin signaling pathways are active in these cells. Expression of the type 2 activin receptor (Acvr2a) was experimentally confirmed by both RT-qPCR and immunochemistry, and activation of this signaling pathway with recombinant activin A significantly enhanced the survival of magnetically sorted photoreceptors in culture. Taken together, we demonstrate that our approach can be easily used to mine publicly available transcriptome data and generate hypotheses around receptor expression that can be used to identify novel signaling pathways in specific cell types of interest. We anticipate that receptoR (available at https://www.ucalgary.ca/ungrinlab/receptoR) will enable more efficient use of limited research resources.",2020-06-04 +33761274,An Examination of National Cancer Risk Based on Monitored Hazardous Air Pollutants.,"

Background

Hazardous air pollutants, or air toxics, are pollutants known to cause cancer or other serious health effects. Nationwide cancer risk from these pollutants is estimated by the U.S. EPA National Air Toxics Assessment. However, these model estimates are limited to the totality of the emissions inventory used as inputs, and further, they cannot be used to examine spatial and temporal trends in cancer risk from hazardous air pollutants.

Objectives

To complement model estimates of nationwide cancer risk, we examined trends in cancer risk using monitoring data from 2013 to 2017 across the 27 U.S. National Air Toxics Trends Stations.

Methods

For each monitoring site, we estimated cancer risk by multiplying the annual concentration for each monitored pollutant by its corresponding unit risk estimate. We examined the 5-y average (2013-2017) cancer risk across sites and the population levels and demographics within 1-mi of the monitors, as well as changes in estimated cancer risk over time. Finally, we examined changes in individual pollutant concentrations and their patterns of covariance.

Results

We found that the total estimated cancer risk is higher for urban vs. rural sites, with the risk at seven urban sites (of 21) above 75 in 1 million. Furthermore, while most pollutant concentrations have not changed over the time period explored, we found 38 site-pollutant combinations that significantly declined and 12 that significantly increased between 2013 and 2017. We also identified a positive correlation between estimated cancer risk and percent of the population within 1-mi of a monitor that is low income.

Discussion

Long-term trends show that annual mean concentrations of most measured air toxics have declined. Our evaluation of a more recent snapshot in time finds that most pollutant concentrations have not changed from 2013 to 2017. This analysis of cancer risk based on monitored values provides an important complement to modeled nationwide cancer risk estimates and can further inform future approaches to mitigate risk from exposure to hazardous air pollutants. https://doi.org/10.1289/EHP8044.",2021-03-24 +33525036,[Liposuctions in the ambulatory setting].,"

Background

 Liposuctions are among the most frequently performed operations in plastic surgery worldwide. They are offered as inpatient as well as outpatient procedures. In the outpatient setting, tumescent anaesthesia is used in various forms. There is ambiguity about the amount of lipoaspirate that can be removed safely in an outpatient setting, and also about the monitoring of parameters and the duration of postoperative care.

Material and methods

 A systematic literature review was conducted with the help of the MEDLINE data base of the U. S. National Library of Medicine (NLM) and the bibliographic search engine Google Scholar (https://scholar.google.com) of Google LLC. The key words ""Liposuction Anesthesia"" and ""Liposuction Guidelines"" were used. All items resulting from the search were checked for thematic concordance and further analysed by their level of evidence, significance and availability.

Results

 After the literature review, a total of 197 items were identified for further analysis. The analysis of the international and German literature yielded a systematic overview of recommendations.

Conclusions

 Tumescence anaesthesia in an outpatient setting has various advantages, e. g. cost reduction for provider and patient as well as avoidance of the risk profile of general anaesthesia. Also patients can change their position autonomously, which can be beneficial for surgery. However, there are limitations in terms of the lipoaspirate volume that can be removed safely. With increasing lipoaspirate volumes, more local anaesthetic is needed, which also increases the postoperative monitoring time. In the authors view, tumescent anaesthesia should only be used for small-volume and localised liposuctions. Liposuction in general anaesthesia offers more advantages, especially with increasing lipoaspirate volumes.",2021-02-01 +,Genetic diversity of endangered orchid Phaius australis across a fragmented Australian landscape,"Historical events such as colonisation, spatial distribution across different habitats, and contemporary processes, such as human-mediated habitat fragmentation can leave lasting imprints on the population genetics of a species. Orchids currently comprise 17% of threatened flora species in Australia (Environment Protection and Biodiversity Conservation Act 1999) due to the combination of fragmentation and illegal harvesting (Benwell in Recovery plan, swamp orchids Phaius australis, Phaius tancarvilliae, NSW National Parks and Wildlife Service, Sydney, 1994; Jones in A complete guide to native orchids of Australia including the island territories, 2nd edn, Reed Natural History, Sydney, 2006; DE in Phaius australis in species profile and threats database, Department of the Environment. http://www.environment.gov.au/sprat , 2015). The federally endangered Swamp Orchid Phaius australis has a disjunct distribution across an almost 2000 km latitudinal range along Australia’s east coast but it was estimated that 95% of the populations have been lost since European settlement (Benwell 1994). Phaius australis is endangered due to illegal collection and habitat loss that has resulted in limited connectivity between populations, in ecosystems that are vulnerable to climate change. Thus the genetic impacts of its history combined with more recent fragmentation may have impacts on its future viability especially in light of changing environmental conditions. Thirty-four populations were sampled from tropical north Queensland to the southern edge of the subtropics in New South Wales. Population genetics analysis was conducted using 13 polymorphic microsatellite markers developed for the species using NextGen sequencing. Spatial genetic patterns indicate post-colonisation divergence from the tropics southwards to its current climate niche limits. Genetic diversity is low across all populations (A = 1.5, H ₑ = 0.171), and there is little evidence of genetic differentiation between regions. Consistent with population genetic theory, the historic loss of populations has resulted in significantly lower genetic diversity in small populations compared to large (P, A, He; p < 0.05). The viability and persistence of P. australis populations now and in a changing climate are discussed in the context of conservation priorities.",2018-04-01 +,Genotyping-by-sequencing based single nucleotide polymorphisms enabled Kompetitive Allele Specific PCR marker development in mutant Rubus genotypes,"Rubus is an economically important fruit crop across the globe. Recently, several Rubus mutant genotypes with improved agronomic traits have been developed using gamma ray irradiation. This study investigated genetic diversity and variations in Rubus mutant genotypes using single nucleotide polymorphism (SNP) markers generated from genotyping-by-sequencing (GBS) analysis. A GBS library of 14 Rubus genotypes, consisting of seven boysenberry mutant lines, four blackberry mutant lines, and three original varieties, were sequenced on the Illumina Hiseq2000 platform. A set of SNPs were analyzed by Kompetitive Allele Specific PCR (KASP) assay in order to discriminate the Rubus genotypes.A total of 50,831,040 (86.4%) reads of clean data were generated, and the trimmed length ranged from 116,380,840 to 509,806,521 bp, with an average of 228,087,333 bp per line. A total of 19,634 high-quality SNPs were detected, which contained 11,328 homozygous SNPs and 8306 heterozygous SNPs. A set of 1504 SNPs was used to perform a phylogenetic analysis, which showed that there were clear differences among the Rubus genotypes based on their origin. A total of 25 SNPs were used for the KASP assays, of which six KASP primer sets were successfully distinguished among the Rubus genotypes.This study demonstrated that the SNP and KASP method is an economically efficient tool for mutant screening in Rubus breeding programs.How to cite: Ryu J, Kim WJ, Im J, et al. Genotyping-by-sequencing based single nucleotide polymorphisms enabled kompetitive allele specific PCR marker development in mutant Rubus genotypes. Electron J Biotechnol 2018;35. https://doi.org/10.1016/j.ejbt.2018.08.001.",2018-09-01 +32084131,DeepHiC: A generative adversarial network for enhancing Hi-C data resolution.,"Hi-C is commonly used to study three-dimensional genome organization. However, due to the high sequencing cost and technical constraints, the resolution of most Hi-C datasets is coarse, resulting in a loss of information and biological interpretability. Here we develop DeepHiC, a generative adversarial network, to predict high-resolution Hi-C contact maps from low-coverage sequencing data. We demonstrated that DeepHiC is capable of reproducing high-resolution Hi-C data from as few as 1% downsampled reads. Empowered by adversarial training, our method can restore fine-grained details similar to those in high-resolution Hi-C matrices, boosting accuracy in chromatin loops identification and TADs detection, and outperforms the state-of-the-art methods in accuracy of prediction. Finally, application of DeepHiC to Hi-C data on mouse embryonic development can facilitate chromatin loop detection. We develop a web-based tool (DeepHiC, http://sysomics.com/deephic) that allows researchers to enhance their own Hi-C data with just a few clicks.",2020-02-21 +33229046,Antimicrobial effect of nisin in processed cheese - Quantification of residual nisin by LC-MS/MS and development of new growth and growth boundary model for Listeria monocytogenes.,"This study tested the hypothesis that growth of Listeria monocytogenes in processed cheese with added nisin can be predicted from residual nisin A concentrations in the final product after processing. A LC-MS/MS method and a bioassay were studied to quantify residual nisin A concentrations and a growth and growth boundary model was developed to predict the antilisterial effect in processed cheese. 278 growth rates were determined in broth for 11 L. monocytogenes isolates and used to determine 13 minimum inhibitory concentration (MIC) values for nisin between pH 5.5 and 6.5. To supplement these data, 67 MIC-values at different pH-values were collected from the scientific literature. A MIC-term was developed to describe the effect of pH on nisin MIC-values. An available growth and growth boundary model (doi: https://doi.org/10.1016/j.fm.2019.103255) was expanded with the new MIC-term for nisin to predict growth in processed cheese. To generate data for model evaluation and further model development, challenge tests with a total of 45 growth curves, were performed using processed cheese. Cheeses were formulated with 11.2 or 12.0 ppm of nisin A and heat treated to obtain residual nisin A concentrations ranging from 0.56 to 5.28 ppm. Below 15 °C, nisin resulted in extended lag times. A global regression approach was used to fit all growth curves determined in challenge tests. This was obtained by combining the secondary growth and growth boundary model including the new term for the inhibiting effect of nisin on μmax with the primary logistic growth model with delay. This model appropriately described the growth inhibiting effect of residual nisin A and showed that relative lag times depended on storage temperatures. With residual nisin A concentrations, other product characteristics and storage temperature as input the new model correctly predicted all observed growth and no-growth responses for L. monocytogenes. This model can support development of nisin A containing recipes for processed cheese that prevent growth of L. monocytogenes. Residual nisin A concentrations in processed cheese were accurately quantified by the developed LC-MS/MS method with recoveries of 83 to 110% and limits of detection and quantification being 0.04 and 0.13 ppm, respectively. The tested bioassay was less precise and nisin A recoveries varied for 53% to 94%.",2020-11-04 +33797937,Associations of Pre- and Postnatal Air Pollution Exposures with Child Blood Pressure and Modification by Maternal Nutrition: A Prospective Study in the CANDLE Cohort.,"

Background

Limited data suggest air pollution exposures may contribute to pediatric high blood pressure (HBP), a known predictor of adult cardiovascular diseases.

Methods

We investigated this association in the Conditions Affecting Neurocognitive Development and Learning in Early Childhood (CANDLE) study, a sociodemographically diverse pregnancy cohort in the southern United States with participants enrolled from 2006 to 2011. We included 822 mother-child dyads with available address histories and a valid child blood pressure measurement at 4-6 y. Systolic (SBP) and diastolic blood pressures (DBP) were converted to age-, sex-, and height-specific percentiles for normal-weight U.S. children. HBP was classified based on SBP or DBP ≥90th percentile. Nitrogen dioxide (NO2) and particulate matter ≤2.5μm in aerodynamic diameter (PM2.5) estimates in both pre- and postnatal windows were obtained from annual national models and spatiotemporal models, respectively. We fit multivariate Linear and Poisson regressions and explored multiplicative joint effects with maternal nutrition, child sex, and maternal race using interaction terms.

Results

Mean PM2.5 and NO2 in the prenatal period were 10.8 [standard deviation (SD): 0.9] μg/m3 and 10.0 (SD: 2.4) ppb, respectively, and 9.9 (SD: 0.6) μg/m3 and 8.8 (SD: 1.9) ppb from birth to the 4-y-old birthday. On average, SBP percentile increased by 14.6 (95% CI: 4.6, 24.6), and DBP percentile increased by 8.7 (95% CI: 1.4, 15.9) with each 2-μg/m3 increase in second-trimester PM2.5. PM2.5 averaged over the prenatal period was only significantly associated with higher DBP percentiles [β= 11.6 (95% CI: 2.9, 20.2)]. Positive associations of second-trimester PM2.5 with SBP and DBP percentiles were stronger in children with maternal folate concentrations in the lowest quartile (pinteraction= 0.05 and 0.07, respectively) and associations with DBP percentiles were stronger in female children (pinteraction= 0.05). We did not detect significant association of NO2, road proximity, and postnatal PM2.5 with any outcomes.

Conclusions

The findings suggest that higher prenatal PM2.5 exposure, particularly in the second trimester, is associated with elevated early childhood blood pressure. This adverse association could be modified by pregnancy folate concentrations. https://doi.org/10.1289/EHP7486.",2021-04-02 +33707307,Meta-Analysis and Systematic Review of the Genomics of Mucosal Melanoma.,"Mucosal melanoma is a rare subtype of melanoma. To date, there has been no comprehensive systematic collation and statistical analysis of the aberrations and aggregated frequency of driver events across multiple studies. Published studies using whole genome, whole exome, targeted gene panel, or individual gene sequencing were identified. Datasets from these studies were collated to summarize mutations, structural variants, and regions of copy-number alteration. Studies using next-generation sequencing were divided into the ""main"" cohort (n = 173; fresh-frozen samples), ""validation"" cohort (n = 48; formalin-fixed, paraffin-embedded samples) and a second ""validation"" cohort comprised 104 tumors sequenced using a targeted panel. Studies assessing mutations in BRAF, KIT, and NRAS were summarized to assess hotspot mutations. Statistical analysis of the main cohort variant data revealed KIT, NF1, BRAF, NRAS, SF3B1, and SPRED1 as significantly mutated genes. ATRX and SF3B1 mutations occurred more commonly in lower anatomy melanomas and CTNNB1 in the upper anatomy. NF1, PTEN, CDKN2A, SPRED1, ATM, CHEK2, and ARID1B were commonly affected by chromosomal copy loss, while TERT, KIT, BRAF, YAP1, CDK4, CCND1, GAB2, MDM2, SKP2, and MITF were commonly amplified. Further notable genomic alterations occurring at lower frequencies indicated commonality of signaling networks in tumorigenesis, including MAPK, PI3K, Notch, Wnt/β-catenin, cell cycle, DNA repair, and telomere maintenance pathways. This analysis identified genomic aberrations that provide some insight to the way in which specific pathways may be disrupted. IMPLICATIONS: Our analysis has shown that mucosal melanomas have a diverse range of genomic alterations in several biological pathways. VISUAL OVERVIEW: http://mcr.aacrjournals.org/content/molcanres/19/6/991/F1.large.jpg.",2021-03-11 +32719914,Hybrid PET/MRI in non-small cell lung cancer (NSCLC) and lung nodules-a literature review.,"

Background

The use of hybrid PET/MRI for clinical staging is growing in several cancer forms and, consequently, PET/MRI has also gained interest in the assessment of non-small cell lung cancer (NSCLC) and lung lesions. However, lung evaluation with PET/MRI is associated with challenges related to technical issues and diagnostic image quality. We, therefore, investigated the published literature on PET/MRI for clinical staging in NSCLC or lung nodule detection specifically addressing diagnostic accuracy and technical issues.

Methods

The data originates from a systematic search performed in PubMed/MEDLINE, Embase, and Cochrane Library on hybrid PET/MRI in patients with cancer for a scoping review published earlier ( https://doi.org/10.1007/s00259-019-04402-8 ). Studies in English and German evaluating the diagnostic performance of hybrid PET/MRI for NSCLC or lung nodule detection in cancer patients were selected. Data reported in peer-reviewed journals without restrictions to year of publication were included.

Results

A total of 3138 publications were identified from which 116 published 2012-2018 were included. Of these, nine studies addressed PET/MRI in NSCLC (4) or lung nodule detection (5). Overall, PET/MRI did not provide advantages in preoperative T- and N-staging in NSCLC compared to PET/CT. The data on M-staging were too few for conclusions to be drawn. The lung nodule detection rate of PET/MRI was comparable to that of PET/CT for FDG-avid nodules larger than 10 mm, but the sensitivity of PET/MRI for detection of non-FDG-avid nodules smaller than 5 mm was low.

Conclusion

PET/MRI did not provide advantages in T- and N-staging of NSCLC compared to PET/CT. PET/MRI had a comparable sensitivity for detection of FDG-avid lung nodules and nodules over 10 mm, but PET/CT yielded a higher detection rate in non FDG-avid lung nodules under 5 mm. With PET/MRI, the overall detection rate for lung nodules in various cancer types remains inferior to that of PET/CT due to the lower diagnostic performance of MRI than CT in the lungs.",2020-07-27 +34455803,"First report of Klebsiella aerogenes Inciting Stem Rot of Pearl Millet in Haryana, India. ","Pearl millet [Pennisetum glaucum (L.) R. Br. Syn. Pennisetum americanum (L.) Leeke] is the oldest and widely cultivated millet in Asian and African countries, mostly grown over low fertile soils in more than 40 countries covering an area of 312.00 lakh hectares (FAOSTAT 2017). In Haryana, crop was grown over an area of 4.30 lakh hectares during Kharif 2019. Pearl millet is prone to many fungal and bacterial diseases. During 2018 to 2020, a new devastating diseas exhibiting stem rot like symptoms was observed in pearl millet growing regions in Indian state of Haryana. The isolated disease causing agent was a bacterium, where 16S rDNA-based nucleotide sequence deposited in NCBI GenBank (Accession nos. MZ433194.1) conferred its nearness to Klebsiella aerogenes (Hormaeche and Edwards 1960) Tindall et al. 2017. Further, DNA gyrase genomic sequence (NCBI Accession nos. MZ707528.1) also stayed its high homology to K. aerogenes. Klebsiella usually known to cause diseases in humans and animals, and also has been found inciting different kind of rots in different plantations viz. top rot in maize (Huang Min et al. 2016). Pearl millet is susceptible to minor bacterial diseases viz. bacterial leaf streak (Xanthomonas campestris), bacterial leaf spot (Pseudomonas syringae) and leaf stripe (P. avenae). Earlier, among the plant pathogenic bacterial entirety, only Erwinia chrysanthemi is known to cause stem rot diseases in sorghum (Saxena et al. 1991) amongst different types of millet. Extensive disease survey of pearl millet growing regions (Hisar, Bhiwani, Rewari, Mohindergarh and Bawal districts of Haryana having an altitude of 215, 225, 245, 262 and 266 m, respectively) in rainy seasons of 2019 and 2020 revealed the prevalence of typical stem rot disease, representing up to 70% disease incidence in the infected fields. The pieces of symptomatic stem of different plants were collected from two locations (Hisar and Bhiwani) and associated organism was isolated following the techniques of Janse (2005). The resulting growth of bacterial cultures were further purified on nutrient agar (NA) media using streak plate technique where colony growth of both the isolates were observed as morphotypes. The resulting bacteria were gram-negative and rod-shaped. Colonies were round and creamish white on NA. Isolated morphotypes were positive for indole production, methyl red, Voges Proskauer's test, citrate utilization, arabinose, mannitol, rhamnose and sucrose, whereas negative for glucose, adonitol, lactose and sorbitol tests. Biochemical tests were performed following standard methods (Holt et al. 1994). Molecular analysis of both isolates was performed using two sets of primers (universal 16S rRNA gene and genus-specific gyrA gene). The gyrA fragment (F: 5'-CGCGTACTATACGCCATGAACGTA-3'; R: 5'-ACCGTTGATCACTTCGGTCAGG-3') has been adopted as Klebsiella genus-specific gene (Brisse and Verhoef 2001). The quality and quantity of the isolated genomic DNA were analyzed using NanoDrop-2000 (Thermo Fisher Scientific, USA) and resolved in 1% (w/v) agarose gel. Thereafter, visualized in gel documentation to confirm a single band of high-molecular-weight DNA. The fragment 16S rDNA was amplified using 27F and 1492R primers, where a single discrete PCR amplicon of 1500 bp was observed in 1% (w/v) agarose gel. Similarly, the gyrA gene was amplified using 09510F and 09510R primers that conferred a single discrete band of 400 bp. The forward and reverse DNA sequencing reaction of purified PCR amplicons (16S rDNA and gyrA) was carried out using BDT v3.1 Cycle sequencing kit on a genetic analyzer to generate gene sequences. The consensus sequences of both gene were generated from forward and reverse sequences data using aligner software. The obtained sequences of both genes were compared with the available nucleotide sequences in the NCBI using the blast 2.2.9 system (https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch). The sequenced PCR amplicons showed up to 100% similarity with Klebsiella aerogenes 16s RNA nucleotide sequences (Accession nos. NR102493.2, MT373521.1; MF682950.1; MF462979.1 etc.). The bacterium also showed high nucleotide homology to K. aerogenes gyrA gene sequences (Accession nos. LR607333.1; CP035466.1; CP049600.1 etc.). The molecular phylogenetic analysis was done by the maximum likelihood method based on the Tamura-Nei model, and 1000 replicates for bootstrap testing in MEGA 7.0 software. The analysis involved 16 nucleotide sequences and evolutionary distances were computed. The 16s RNA based phylogenetic tree raised using MEGA7 (Kumar et al. 2016) elucidates that Klebsiella aerogenes Hisar formed a cluster with three K. aerogenes strains (Accession nos. MZ577128.1, MT373521.1 and MT 373520.1), whereas K. aerogenes Bhiwani displayed higher homology to NCBI sequences viz. MF682950.1, MT355368.1, MW331687.1and LC515412.1. Bacterial suspension was prepared by suspending bacterial cells into sterile water and cell density was adjusted to 1×107 colony forming unit/ml. For pathogenicity, leaf whorl inoculation (10 ml suspension/ whorl) was done on 15 days old seedlings of pearl millet genotype 7042S raised under controlled conditions (Temperature 35±2°C and more than 80% Relative Humidity). The pathogenicity was proved under field conditions as well. Initial symptoms were observed 4-5 days after inoculation as long streaks on leaves. Soon a spike in number of these leaf streaks was observed. Thereafter, water-soaked lesions appeared on the stem at 20-25 days after inoculation which later on turned brown to black. Severely diseased plants were dead, exhibiting hollowing of the stem and drying of leaves. The infected stem pith disintegrated and showed slimy rot symptoms and the pearl millet clumps toppled down. The rotten stems of both inoculations were again cut in to small pieces and the reisolated bacterium showed exactly the same morphological, biochemical and molecular characteristics. To our knowledge, this is the first report of stem rot of pearl millet incited by K. aerogenes in south-western regions of Haryana, India. Because the stem rot caused by K. aerogenes poses a significant threat to pearl millet cultivation, further research on biology, epidemiology and management choices is needed.",2021-08-29 +25484339,DISEASES: text mining and data integration of disease-gene associations.,"Text mining is a flexible technology that can be applied to numerous different tasks in biology and medicine. We present a system for extracting disease-gene associations from biomedical abstracts. The system consists of a highly efficient dictionary-based tagger for named entity recognition of human genes and diseases, which we combine with a scoring scheme that takes into account co-occurrences both within and between sentences. We show that this approach is able to extract half of all manually curated associations with a false positive rate of only 0.16%. Nonetheless, text mining should not stand alone, but be combined with other types of evidence. For this reason, we have developed the DISEASES resource, which integrates the results from text mining with manually curated disease-gene associations, cancer mutation data, and genome-wide association studies from existing databases. The DISEASES resource is accessible through a web interface at http://diseases.jensenlab.org/, where the text-mining software and all associations are also freely available for download.",2014-12-05 +31797634,The power of dynamic social networks to predict individuals' mental health.,"Precision medicine has received attention both in and outside the clinic. We focus on the latter, by exploiting the relationship between individuals' social interactions and their mental health to predict one's likelihood of being depressed or anxious from rich dynamic social network data. Existing studies differ from our work in at least one aspect: they do not model social interaction data as a network; they do so but analyze static network data; they examine ""correlation"" between social networks and health but without making any predictions; or they study other individual traits but not mental health. In a comprehensive evaluation, we show that our predictive model that uses dynamic social network data is superior to its static network as well as non-network equivalents when run on the same data. Supplementary material for this work is available at https://nd.edu/~cone/NetHealth/PSB_SM.pdf.",2020-01-01 +27153700,dbDSM: a manually curated database for deleterious synonymous mutations.,"

Motivation

Synonymous mutations (SMs), which changed the sequence of a gene without directly altering the amino acid sequence of the encoded protein, were thought to have no functional consequences for a long time. They are often assumed to be neutral in models of mutation and selection and were completely ignored in many studies. However, accumulating experimental evidence has demonstrated that these mutations exert their impact on gene functions via splicing accuracy, mRNA stability, translation fidelity, protein folding and expression, and some of these mutations are implicated in human diseases. To the best of our knowledge, there is still no database specially focusing on disease-related SMs.

Results

We have developed a new database called dbDSM (database of Deleterious Synonymous Mutation), a continually updated database that collects, curates and manages available human disease-related SM data obtained from published literature. In the current release, dbDSM collects 1936 SM-disease association entries, including 1289 SMs and 443 human diseases from ClinVar, GRASP, GWAS Catalog, GWASdb, PolymiRTS database, PubMed database and Web of Knowledge. Additionally, we provided users a link to download all the data in the dbDSM and a link to submit novel data into the database. We hope dbDSM will be a useful resource for investigating the roles of SMs in human disease.

Availability and implementation

dbDSM is freely available online at http://bioinfo.ahu.edu.cn:8080/dbDSM/index.jsp with all major browser supported.

Contact

jfxia@ahu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-15 +33844598,Long-Term Exposure to Fine Particle Elemental Components and Natural and Cause-Specific Mortality-a Pooled Analysis of Eight European Cohorts within the ELAPSE Project.,"

Background

Inconsistent associations between long-term exposure to particles with an aerodynamic diameter ≤2.5 μm [fine particulate matter (PM2.5)] components and mortality have been reported, partly related to challenges in exposure assessment.

Objectives

We investigated the associations between long-term exposure to PM2.5 elemental components and mortality in a large pooled European cohort; to compare health effects of PM2.5 components estimated with two exposure modeling approaches, namely, supervised linear regression (SLR) and random forest (RF) algorithms.

Methods

We pooled data from eight European cohorts with 323,782 participants, average age 49 y at baseline (1985-2005). Residential exposure to 2010 annual average concentration of eight PM2.5 components [copper (Cu), iron (Fe), potassium (K), nickel (Ni), sulfur (S), silicon (Si), vanadium (V), and zinc (Zn)] was estimated with Europe-wide SLR and RF models at a 100×100 m scale. We applied Cox proportional hazards models to investigate the associations between components and natural and cause-specific mortality. In addition, two-pollutant analyses were conducted by adjusting each component for PM2.5 mass and nitrogen dioxide (NO2) separately.

Results

We observed 46,640 natural-cause deaths with 6,317,235 person-years and an average follow-up of 19.5 y. All SLR-modeled components were statistically significantly associated with natural-cause mortality in single-pollutant models with hazard ratios (HRs) from 1.05 to 1.27. Similar HRs were observed for RF-modeled Cu, Fe, K, S, V, and Zn with wider confidence intervals (CIs). HRs for SLR-modeled Ni, S, Si, V, and Zn remained above unity and (almost) significant after adjustment for both PM2.5 and NO2. HRs only remained (almost) significant for RF-modeled K and V in two-pollutant models. The HRs for V were 1.03 (95% CI: 1.02, 1.05) and 1.06 (95% CI: 1.02, 1.10) for SLR- and RF-modeled exposures, respectively, per 2 ng/m3, adjusting for PM2.5 mass. Associations with cause-specific mortality were less consistent in two-pollutant models.

Conclusion

Long-term exposure to V in PM2.5 was most consistently associated with increased mortality. Associations for the other components were weaker for exposure modeled with RF than SLR in two-pollutant models. https://doi.org/10.1289/EHP8368.",2021-04-12 +,"Airborne spectral BRDF of various surface types (ocean, vegetation, snow, desert, wetlands, cloud decks, smoke layers) for remote sensing applications","In this paper we describe measurements of the bidirectional reflectance-distribution function (BRDF) acquired over a 30-year period (1984–2014) by the National Aeronautics and Space Administration's (NASA's) Cloud Absorption Radiometer (CAR). Our BRDF database encompasses various natural surfaces that are representative of many land cover or ecosystem types found throughout the world. CAR's unique measurement geometry allows a comparison of measurements acquired from different satellite instruments with various geometrical configurations, none of which are capable of obtaining such a complete and nearly instantaneous BRDF. This database is therefore of great value in validating many satellite sensors and assessing corrections of reflectances for angular effects. These data can also be used to evaluate the ability of analytical models to reproduce the observed directional signatures, to develop BRDF models that are suitable for sub-kilometer-scale satellite observations over both homogeneous and heterogeneous landscape types, and to test future spaceborne sensors. All of these BRDF data are publicly available and accessible in hierarchical data format (http:car.gsfc.nasa.gov/).",2016-06-01 +33630824,"Changes in Suicide Rates - United States, 2018-2019.","Suicide is the 10th leading cause of death in the United States overall, and the second and fourth leading cause among persons aged 10-34 and 35-44 years, respectively (1). In just over 2 decades (1999-2019), approximately 800,000 deaths were attributed to suicide, with a 33% increase in the suicide rate over the period (1). In 2019, a total of 12 million adults reported serious thoughts of suicide during the past year, 3.5 million planned a suicide, and 1.4 million attempted suicide (2). Suicides and suicide attempts in 2019 led to a lifetime combined medical and work-loss cost (i.e., the costs that accrue from the time of the injury through the course of a person's expected lifetime) of approximately $70 billion (https://wisqars.cdc.gov:8443/costT/). From 2018 to 2019, the overall suicide rate declined for the first time in over a decade (1). To understand how the decline varied among different subpopulations by demographic and other characteristics, CDC analyzed changes in counts and age-adjusted suicide rates from 2018 to 2019 by demographic characteristics, county urbanicity, mechanism of injury, and state. Z-tests and 95% confidence intervals were used to assess statistical significance. Suicide rates declined by 2.1% overall, by 3.2% among females, and by 1.8% among males. Significant declines occurred, overall, in five states. Other significant declines were noted among subgroups defined by race/ethnicity, age, urbanicity, and suicide mechanism. These declines, although encouraging, were not uniform, and several states experienced significant rate increases. A comprehensive approach to prevention that uses data to drive decision-making, implements prevention strategies from CDC's Preventing Suicide: A Technical Package of Policy, Programs, and Practices with the best available evidence, and targets the multiple risk factors associated with suicide, especially in populations disproportionately affected, is needed to build on initial progress from 2018 to 2019 (3).",2021-02-26 +,"NMπ—improved re‐implementation of NM+, a software for estimating gene dispersal and mating patterns","This study introduces the NMπ computer program designed for estimation of plant mating system and seed and pollen dispersal kernels. NMπ is a re‐implementation of the NM+ program and provides new features such as support for multicore processors, explicit treatment of dioecy, the possibility of incorporating uniparentally cytoplasmic markers, the possibility of assessing assortative mating due to phenotypic similarity and inference about offspring genealogies. The probability model of parentage (the neighbourhood model) accounts for missing data and genotyping errors, which can be estimated along with regular parameters of the mating system. The program has virtually no restrictions with respect to a number of individuals, markers or phenotypic characters. A console version of NMπ can be run under a wide variety of operating systems, including Windows, Linux or Mac OS. For Windows users, a graphical user interface is provided to facilitate operating the software. The program, user manual and example data are available on http://www.ukw.edu.pl/pracownicy/plik/igor_chybicki/3694/.",2018-01-01 +32614833,Data-driven network alignment.,"In this study, we deal with the problem of biological network alignment (NA), which aims to find a node mapping between species' molecular networks that uncovers similar network regions, thus allowing for the transfer of functional knowledge between the aligned nodes. We provide evidence that current NA methods, which assume that topologically similar nodes (i.e., nodes whose network neighborhoods are isomorphic-like) have high functional relatedness, do not actually end up aligning functionally related nodes. That is, we show that the current topological similarity assumption does not hold well. Consequently, we argue that a paradigm shift is needed with how the NA problem is approached. So, we redefine NA as a data-driven framework, called TARA (data-driven NA), which attempts to learn the relationship between topological relatedness and functional relatedness without assuming that topological relatedness corresponds to topological similarity. TARA makes no assumptions about what nodes should be aligned, distinguishing it from existing NA methods. Specifically, TARA trains a classifier to predict whether two nodes from different networks are functionally related based on their network topological patterns (features). We find that TARA is able to make accurate predictions. TARA then takes each pair of nodes that are predicted as related to be part of an alignment. Like traditional NA methods, TARA uses this alignment for the across-species transfer of functional knowledge. TARA as currently implemented uses topological but not protein sequence information for functional knowledge transfer. In this context, we find that TARA outperforms existing state-of-the-art NA methods that also use topological information, WAVE and SANA, and even outperforms or complements a state-of-the-art NA method that uses both topological and sequence information, PrimAlign. Hence, adding sequence information to TARA, which is our future work, is likely to further improve its performance. The software and data are available at http://www.nd.edu/~cone/TARA/.",2020-07-02 +30999846,VIGLA-M: visual gene expression data analytics.,"

Background

The analysis of gene expression levels is used in many clinical studies to know how patients evolve or to find new genetic biomarkers that could help in clinical decision making. However, the techniques and software available for these analyses are not intended for physicians, but for geneticists. However, enabling physicians to make initial discoveries on these data would benefit in the clinical assay development.

Results

Melanoma is a highly immunogenic tumor. Therefore, in recent years physicians have incorporated immune system altering drugs into their therapeutic arsenal against this disease, revolutionizing the treatment of patients with an advanced stage of the cancer. This has led us to explore and deepen our knowledge of the immunology surrounding melanoma, in order to optimize the approach. Within this project we have developed a database for collecting relevant clinical information for melanoma patients, including the storage of patient gene expression levels obtained from the NanoString platform (several samples are taken from each patient). The Immune Profiling Panel is used in this case. This database is being exploited through the analysis of the different expression profiles of the patients. This analysis is being done with Python, and a parallel version of the algorithms is available with Apache Spark to provide scalability as needed.

Conclusions

VIGLA-M, the visual analysis tool for gene expression levels in melanoma patients is available at http://khaos.uma.es/melanoma/ . The platform with real clinical data can be accessed with a demo user account, physician, using password physician_test_7634 (if you encounter any problems, contact us at this email address: mailto: khaos@lcc.uma.es). The initial results of the analysis of gene expression levels using these tools are providing first insights into the patients' evolution. These results are promising, but larger scale tests must be developed once new patients have been sequenced, to discover new genetic biomarkers.",2019-04-18 +32108855,Bio2Rxn: sequence-based enzymatic reaction predictions by a consensus strategy.,"SUMMARY:The development of sequencing technologies has generated large amounts of protein sequence data. The automated prediction of the enzymatic reactions of uncharacterized proteins is a major challenge in the field of bioinformatics. Here, we present Bio2Rxn as a web-based tool to provide putative enzymatic reaction predictions for uncharacterized protein sequences. Bio2Rxn adopts a consensus strategy by incorporating six types of enzyme prediction tools. It allows for the efficient integration of these computational resources to maximize the accuracy and comprehensiveness of enzymatic reaction predictions, which facilitates the characterization of the functional roles of target proteins in metabolism. Bio2Rxn further links the enzyme function prediction with more than 300 000 enzymatic reactions, which were manually curated by more than 100 people over the past 9 years from more than 580 000 publications. AVAILABILITY AND IMPLEMENTATION:Bio2Rxn is available at: http://design.rxnfinder.org/bio2rxn/. CONTACT:qnhu@sibs.ac.cn. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-06-01 +32857617,"Comparing and Predicting Public Attitudes Toward Stuttering, Obesity, and Mental Illness.","Purpose Extensive research on public attitudes has documented stigma toward stuttering, obesity, and mental illness; however, most studies have focused on only one of these conditions. This study sought to compare public attitudes toward stuttering, obesity, and mental illness as well as to identify the predictive potential of four ratings relating to these and other neutral or desirable conditions. Method Five hundred respondents who were selected from each of three international databases filled out the Public Opinion Survey of Human Attributes (POSHA) for stuttering, obesity, or mental illness. The POSHA surveys were as similar as possible, and all contained four general items asking respondents' ""impression"" of the attribute, extent to which he or she ""wants to be/have"" that attribute, ""amount known"" about the attribute, and ""persons known"" who manifest the attribute, for stuttering, obesity, and mental illness plus two others, namely, left-handedness and intelligence. The POSHA surveys also had the same summary scores, Beliefs, Self-Reactions, and an Overall Score. Results Summary scores for the three POSHA surveys in the 500-respondent samples revealed negative attitudes toward all three conditions, the most positive being toward obesity, followed by stuttering and then by mental illness. Stepwise regression analysis indicated that various general items had significant prediction potential not only of attitudes for the same condition but also of attitudes for other conditions. The greatest other condition predictions were between stuttering and mental illness. Conclusions Stuttering is regarded as less stigmatizing than mental illness but more stigmatizing than obesity. Additionally, positivity toward one condition results in limited positivity toward the others. Impressions and knowledge of-as well as experience with-stigmatized conditions can inform public awareness campaigns and individual clinical programs dealing with stigma. Supplemental Material https://doi.org/10.23641/asha.12860939.",2020-08-28 +32860317,"Outcome misclassification: Impact, usual practice in pharmacoepidemiology database studies and an online aid to correct biased estimates of risk ratio or cumulative incidence.","

Purpose

It is well documented that outcome misclassification can bias a point estimate. We aimed to understand current practice in addressing this bias in pharmacoepidemiology database studies and to develop an open source application (app) from existing methodology to demonstrate the impact and mechanism of this bias on results.

Methods

Studies of an exposure and a clinical outcome were selected from all Pharmacoepidemiology and Drug Safety publications during 2017 and any reference to outcome misclassification described. An app to correct risk ratio (RR) and cumulative incidence for outcome misclassification was developed from a published methodology and used to demonstrate the impact of correction on point estimates.

Results

Eight (19%) of 43 papers selected reported estimates of outcome ascertainment accuracy with positive predictive value (PPV) the most commonly reported measure (7 of 8 studies). Three studies (7%) corrected for the bias, 1 by exposure strata, and 5 (12%) restricted analyses to confirmed cases. The app (app http://apps.p-95.com/ISPE/) uses values of PPV and sensitivity (or a range of possible values) in each exposure strata and returns corrected point estimates and confidence intervals. The app demonstrates that small differences between comparison groups in PPV or sensitivity can introduce bias even when accuracy estimates are high.

Conclusions

Outcome misclassification is not usually corrected in pharmacoepidemiology database studies although correction methods using routinely measured indices are available. Error indices are needed for each comparison group to correct RR estimates for these errors. The app should encourage understanding of this bias and increase adjustment.",2020-08-28 +33443729,SimplePhy: An open-source tool for quick online perception experiments.,"Because of the COVID-19 pandemic, researchers are facing unprecedented challenges that affect our ability to run in-person experiments. With mandated social distancing in a controlled laboratory environment, many researchers are searching for alternative options to conduct research, such as online experimentation. However, online experimentation comes at a cost; learning online tools for building and publishing psychophysics experiments can be complicated and time-consuming. This learning cost is unfortunate because researchers typically only need to use a small percentage of these tools' capabilities, but they still have to deal with these systems' complexities (e.g., complex graphical user interfaces or difficult programming languages). Furthermore, after the experiment is built, researchers often have to find an online platform compatible with the tool they used to program the experiment. To simplify and streamline the online process of programming and hosting an experiment, I have created SimplePhy. SimplePhy can save researchers' time and energy by allowing them to create a study in just a few clicks. All researchers have to do is select among a few experiment settings and upload the stimuli. SimplePhy is able to run most psychophysical perception experiments that require mouse clicks and button presses. In addition to collecting online behavioral data, SimplePhy can also collect information regarding the estimated viewing distance between the participant and the monitor, the screen size, and the experimental trial's timing-features not always offered in other online platforms. Overall, SimplePhy is a simple, free, open-source tool (code can be found here: https://gitlab.com/malago/simplephy ) aimed to help labs conduct their experiments online.",2021-01-14 +33619466,An interactive tool to forecast US hospital needs in the coronavirus 2019 pandemic.,"

Objective

We developed an application (https://rush-covid19.herokuapp.com/) to aid US hospitals in planning their response to the ongoing Coronavirus Disease 2019 (COVID-19) pandemic.

Materials and methods

Our application forecasts hospital visits, admits, discharges, and needs for hospital beds, ventilators, and personal protective equipment by coupling COVID-19 predictions to models of time lags, patient carry-over, and length-of-stay. Users can choose from 7 COVID-19 models, customize 23 parameters, examine trends in testing and hospitalization, and download forecast data.

Results

Our application accurately predicts the spread of COVID-19 across states and territories. Its hospital-level forecasts are in continuous use by our home institution and others.

Discussion

Our application is versatile, easy-to-use, and can help hospitals plan their response to the changing dynamics of COVID-19, while providing a platform for deeper study.

Conclusion

Empowering healthcare responses to COVID-19 is as crucial as understanding the epidemiology of the disease. Our application will continue to evolve to meet this need.",2020-11-30 +33195776,Poribohon-BD: Bangladeshi local vehicle image dataset with annotation for classification.,"Vehicle Classification has become tremendously important due to various applications such as traffic video surveillance, accident avoidance, traffic congestion prevention, bringing intelligent transportation systems. This article presents 'Poribohon-BD' dataset for vehicle classification purposes in Bangladesh. The vehicle images are collected from two sources: i) smartphone camera, ii) social media. The dataset contains 9058 labeled and annotated images of 15 native Bangladeshi vehicles such as bus, motorbike, three-wheeler rickshaw, truck, wheelbarrow. Data augmentation techniques have been applied to keep the number of images comparable to each type of vehicle. For labeling the images, LabelImg tool by Tzuta Lin has been used. Human faces have also been blurred to maintain privacy and confidentiality. The dataset is compatible with various CNN architectures such as YOLO, VGG-16, R-CNN, DPM. It is available for research purposes at https://data.mendeley.com/datasets/pwyyg8zmk5/2.",2020-10-27 +30487811,SeqVItA: Sequence Variant Identification and Annotation Platform for Next Generation Sequencing Data.,"The current trend in clinical data analysis is to understand how individuals respond to therapies and drug interactions based on their genetic makeup. This has led to a paradigm shift in healthcare; caring for patients is now 99% information and 1% intervention. Reducing costs of next generation sequencing (NGS) technologies has made it possible to take genetic profiling to the clinical setting. This requires not just fast and accurate algorithms for variant detection, but also a knowledge-base for variant annotation and prioritization to facilitate tailored therapeutics based on an individual's genetic profile. Here we show that it is possible to provide a fast and easy access to all possible information about a variant and its impact on the gene, its protein product, associated pathways and drug-variant interactions by integrating previously reported knowledge from various databases. With this objective, we have developed a pipeline, Sequence Variants Identification and Annotation (SeqVItA) that provides end-to-end solution for small sequence variants detection, annotation and prioritization on a single platform. Parallelization of the variant detection step and with numerous resources incorporated to infer functional impact, clinical relevance and drug-variant associations, SeqVItA will benefit the clinical and research communities alike. Its open-source platform and modular framework allows for easy customization of the workflow depending on the data type (single, paired, or pooled samples), variant type (germline and somatic), and variant annotation and prioritization. Performance comparison of SeqVItA on simulated data and detection, interpretation and analysis of somatic variants on real data (24 liver cancer patients) is carried out. We demonstrate the efficacy of annotation module in facilitating personalized medicine based on patient's mutational landscape. SeqVItA is freely available at https://bioinf.iiit.ac.in/seqvita.",2018-11-14 +30534948,atSNP Search: a web resource for statistically evaluating influence of human genetic variation on transcription factor binding.,"

Summary

Understanding the regulatory roles of non-coding genetic variants has become a central goal for interpreting results of genome-wide association studies. The regulatory significance of the variants may be interrogated by assessing their influence on transcription factor binding. We have developed atSNP Search, a comprehensive web database for evaluating motif matches to the human genome with both reference and variant alleles and assessing the overall significance of the variant alterations on the motif matches. Convenient search features, comprehensive search outputs and a useful help menu are key components of atSNP Search. atSNP Search enables convenient interpretation of regulatory variants by statistical significance testing and composite logo plots, which are graphical representations of motif matches with the reference and variant alleles. Existing motif-based regulatory variant discovery tools only consider a limited pool of variants due to storage or other limitations. In contrast, atSNP Search users can test more than 37 billion variant-motif pairs with marginal significance in motif matches or match alteration. Computational evidence from atSNP Search, when combined with experimental validation, may help with the discovery of underlying disease mechanisms.

Availability and implementation

atSNP Search is freely available at http://atsnp.biostat.wisc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +32611394,RainDrop: Rapid activation matrix computation for droplet-based single-cell RNA-seq reads.,"BACKGROUND:Obtaining data from single-cell transcriptomic sequencing allows for the investigation of cell-specific gene expression patterns, which could not be addressed a few years ago. With the advancement of droplet-based protocols the number of studied cells continues to increase rapidly. This establishes the need for software tools for efficient processing of the produced large-scale datasets. We address this need by presenting RainDrop for fast gene-cell count matrix computation from single-cell RNA-seq data produced by 10x Genomics Chromium technology. RESULTS:RainDrop can process single-cell transcriptomic datasets consisting of 784 million reads sequenced from around 8.000 cells in less than 40 minutes on a standard workstation. It significantly outperforms the established Cell Ranger pipeline and the recently introduced Alevin tool in terms of runtime by a maximal (average) speedup of 30.4 (22.6) and 3.5 (2.4), respectively, while keeping high agreements of the generated results. CONCLUSIONS:RainDrop is a software tool for highly efficient processing of large-scale droplet-based single-cell RNA-seq datasets on standard workstations written in C++. It is available at https://gitlab.rlp.net/stnieble/raindrop .",2020-07-01 +32246720,SynergyFinder 2.0: visual analytics of multi-drug combination synergies.,"SynergyFinder (https://synergyfinder.fimm.fi) is a stand-alone web-application for interactive analysis and visualization of drug combination screening data. Since its first release in 2017, SynergyFinder has become a widely used web-tool both for the discovery of novel synergistic drug combinations in pre-clinical model systems (e.g. cell lines or primary patient-derived cells), and for better understanding of mechanisms of combination treatment efficacy or resistance. Here, we describe the latest version of SynergyFinder (release 2.0), which has extensively been upgraded through the addition of novel features supporting especially higher-order combination data analytics and exploratory visualization of multi-drug synergy patterns, along with automated outlier detection procedure, extended curve-fitting functionality and statistical analysis of replicate measurements. A number of additional improvements were also implemented based on the user requests, including new visualization and export options, updated user interface, as well as enhanced stability and performance of the web-tool. With these improvements, SynergyFinder 2.0 is expected to greatly extend its potential applications in various areas of multi-drug combinatorial screening and precision medicine.",2020-07-01 +29069344,DEEPre: sequence-based enzyme EC number prediction by deep learning.,"

Motivation

Annotation of enzyme function has a broad range of applications, such as metagenomics, industrial biotechnology, and diagnosis of enzyme deficiency-caused diseases. However, the time and resource required make it prohibitively expensive to experimentally determine the function of every enzyme. Therefore, computational enzyme function prediction has become increasingly important. In this paper, we develop such an approach, determining the enzyme function by predicting the Enzyme Commission number.

Results

We propose an end-to-end feature selection and classification model training approach, as well as an automatic and robust feature dimensionality uniformization method, DEEPre, in the field of enzyme function prediction. Instead of extracting manually crafted features from enzyme sequences, our model takes the raw sequence encoding as inputs, extracting convolutional and sequential features from the raw encoding based on the classification result to directly improve the prediction performance. The thorough cross-fold validation experiments conducted on two large-scale datasets show that DEEPre improves the prediction performance over the previous state-of-the-art methods. In addition, our server outperforms five other servers in determining the main class of enzymes on a separate low-homology dataset. Two case studies demonstrate DEEPre's ability to capture the functional difference of enzyme isoforms.

Availability and implementation

The server could be accessed freely at http://www.cbrc.kaust.edu.sa/DEEPre.

Contact

xin.gao@kaust.edu.sa.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +32854096,SmoCuDa: A Validated Smoking Cue Database to Reliably Induce Craving in Tobacco Use Disorder.,"

Background

Cue-reactivity paradigms provide valuable insights into the underlying mechanisms of nicotine craving in nicotine-dependent subjects. In order to study cue-driven nicotine craving, robust and validated stimulus datasets are essential.

Objectives

The aim of this study was to generate and validate a large set of individually rated smoking-related cues that allow for assessment of different stimulus intensities along the dimensions craving, valence, and arousal.

Methods

The image database consisted of 330 visual cues. Two hundred fifty smoking-associated pictures (Creative Commons license) were chosen from online databases and showed a widespread variety of smoking-associated content. Eighty pictures from previously published databases were included for cross-validation. Forty volunteers with tobacco use disorder rated ""urge-to-smoke,"" ""valence,"" and ""arousal"" for all images on a 100-point visual analogue scale. Pictures were also labelled according to 18 categories such as lit/unlit cigarettes in mouth, cigarette end, and cigarette in ashtray.

Results

Ratings (mean ± SD) were as follows: urge to smoke, 44.9 ± 13.2; valence, 51.2 ± 7.6; and arousal, 54.6 ± 7.1. All ratings, particularly ""urge to smoke,"" were widely distributed along the whole scale spectrum.

Conclusions

We present a novel image library of well-described smoking-related cues, which were rated on a continuous scale along the dimensions craving, valence, and arousal that accounts for inter-individual differences. The rating software, image database, and their ratings are publicly available at https://smocuda.github.io.",2020-08-27 +,Medical Implants: Enabling Angioplasty‐Ready “Smart” Stents to Detect In‐Stent Restenosis and Occlusion (Adv. Sci. 5/2018),"Stents are implanted in occluded arteries to restore blood flow thus saving millions of lives world‐wide, although the most common complication known as restenosis, re‐narrowing of arteries, remains as a critical risk to stented patients. In article number https://doi.org/10.1002/advs.201700560, Kenichi Takahata and co‐workers demonstrate a smart stent that monitors hemodynamic change and wirelessly transmits the data for early detection of restenosis. This breakthrough device, with robust electromechanical design, proves compatibility with the present interventional procedure, advancing smart stent technology towards a clinical reality.",2018-05-01 +27472917,CicerTransDB 1.0: a resource for expression and functional study of chickpea transcription factors.,"

Background

Transcription factor (TF) databases are major resource for systematic studies of TFs in specific species as well as related family members. Even though there are several publicly available multi-species databases, the information on the amount and diversity of TFs within individual species is fragmented, especially for newly sequenced genomes of non-model species of agricultural significance.

Description

We constructed CicerTransDB (Cicer Transcription Factor Database), the first database of its kind, which would provide a centralized putatively complete list of TFs in a food legume, chickpea. CicerTransDB, available at www.cicertransdb.esy.es , is based on chickpea (Cicer arietinum L.) annotation v 1.0. The database is an outcome of genome-wide domain study and manual classification of TF families. This database not only provides information of the gene, but also gene ontology, domain and motif architecture.

Conclusion

CicerTransDB v 1.0 comprises information of 1124 genes of chickpea and enables the user to not only search, browse and download sequences but also retrieve sequence features. CicerTransDB also provides several single click interfaces, transconnecting to various other databases to ease further analysis. Several webAPI(s) integrated in the database allow end-users direct access of data. A critical comparison of CicerTransDB with PlantTFDB (Plant Transcription Factor Database) revealed 68 novel TFs in the chickpea genome, hitherto unexplored. Database URL: http://www.cicertransdb.esy.es.",2016-07-29 +33186370,Individual and community level factors associated with anemia among children 6-59 months of age in Ethiopia: A further analysis of 2016 Ethiopia demographic and health survey.,"

Background

Anemia is a global public health problem; but its burden is disproportionately borne among children in the African Regions. The 2016 Ethiopia Demographic and Health Survey report showed that the prevalence of anemia among children 6-59 months of age was 57%; far exceeding the national target of 25% set for 2015. Although studies have been conducted in Ethiopia, multilevel analysis has rarely been used to identify factors associated with anemia among children. Therefore, this study aimed to identify individual and community-level factors associated with anemia among children 6-59 months of age by fitting a multilevel logistic regression model.

Methods

The data was obtained from the 2016 Ethiopia Demographic and Health Survey, conducted from January to June 2016, and downloaded from the website http://www.DHSprogram.com. The sample was taken using two-stage stratified sampling. In stage one, 645 Enumeration Areas and in stage two 28 households per Enumeration Area were selected. A sample of 7790 children 6-59 months of age was included. Data were analyzed using STATA version 14. A multilevel logistic regression model was fitted and an adjusted odds ratio with a 95% confidence interval was obtained.

Result

From the individual-level factors, anemia was associated most strongly with child age, wealth index, maternal anemia and child stunting followed by child underweight, child fever and birth order whereas from the community-level, the strongest odds of anemia occurred among children from Somali, Harari, Dire Dawa and Afar region followed by Oromia and Addis Ababa. Low community-poverty is a protective factor for anemia. The odds of anemia were 0.81 (95% CI: 0.66, 0.99) times lower for children who were living in communities of lower poverty status than children who were living in communities of higher poverty status. Children from Somali and Dire Dawa had 3.38 (95% CI: 3.25, 5.07) and 2.22 (95% CI: 1.42, 3.48) times higher odds of anemia, respectively than children from the Tigray region.

Conclusions

This study shows that anemia among children 6-59 months of age is affected both by the individual and community level factors. It is better to strengthen the strategies of early detection and management of stunted and underweight children. At the same time, interventions should be strengthened to address maternal anemia, child fever and poverty, specifically targeting regions identified to have a high risk of anemia.",2020-11-13 +30668638,CMEP: a database for circulating microRNA expression profiling.,"

Motivation

In recent years, several experimental studies have revealed that the microRNAs (miRNAs) in serum, plasma, exosome and whole blood are dysregulated in various types of diseases, indicating that the circulating miRNAs may serve as potential noninvasive biomarkers for disease diagnosis and prognosis. However, no database has been constructed to integrate the large-scale circulating miRNA profiles, explore the functional pathways involved and predict the potential biomarkers using feature selection between the disease conditions. Although there have been several studies attempting to generate a circulating miRNA database, they have not yet integrated the large-scale circulating miRNA profiles or provided the biomarker-selection function using machine learning methods.

Results

To fill this gap, we constructed the Circulating MicroRNA Expression Profiling (CMEP) database for integrating, analyzing and visualizing the large-scale expression profiles of phenotype-specific circulating miRNAs. The CMEP database contains massive datasets that were manually curated from NCBI GEO and the exRNA Atlas, including 66 datasets, 228 subsets and 10 419 samples. The CMEP provides the differential expression circulating miRNAs analysis and the KEGG functional pathway enrichment analysis. Furthermore, to provide the function of noninvasive biomarker discovery, we implemented several feature-selection methods, including ridge regression, lasso regression, support vector machine and random forests. Finally, we implemented a user-friendly web interface to improve the user experience and to visualize the data and results of CMEP.

Availability and implementation

CMEP is accessible at http://syslab5.nchu.edu.tw/CMEP.",2019-09-01 +31062021,MAFFT-DASH: integrated protein sequence and structural alignment.,"Here, we describe a web server that integrates structural alignments with the MAFFT multiple sequence alignment (MSA) tool. For this purpose, we have prepared a web-based Database of Aligned Structural Homologs (DASH), which provides structural alignments at the domain and chain levels for all proteins in the Protein Data Bank (PDB), and can be queried interactively or by a simple REST-like API. MAFFT-DASH integration can be invoked with a single flag on either the web (https://mafft.cbrc.jp/alignment/server/) or command-line versions of MAFFT. In our benchmarks using 878 cases from the BAliBase, HomFam, OXFam, Mattbench and SISYPHUS datasets, MAFFT-DASH showed 10-20% improvement over standard MAFFT for MSA problems with weak similarity, in terms of Sum-of-Pairs (SP), a measure of how well a program succeeds at aligning input sequences in comparison to a reference alignment. When MAFFT alignments were supplemented with homologous sequences, further improvement was observed. Potential applications of DASH beyond MSA enrichment include functional annotation through detection of remote homology and assembly of template libraries for homology modeling.",2019-07-01 +30407545,"The Zebrafish Information Network: new support for non-coding genes, richer Gene Ontology annotations and the Alliance of Genome Resources.","The Zebrafish Information Network (ZFIN) (https://zfin.org/) is the database for the model organism, zebrafish (Danio rerio). ZFIN expertly curates, organizes and provides a wide array of zebrafish genetic and genomic data, including genes, alleles, transgenic lines, gene expression, gene function, mutant phenotypes, orthology, human disease models, nomenclature and reagents. New features at ZFIN include increased support for genomic regions and for non-coding genes, and support for more expressive Gene Ontology annotations. ZFIN has recently taken over maintenance of the zebrafish reference genome sequence as part of the Genome Reference Consortium. ZFIN is also a founding member of the Alliance of Genome Resources, a collaboration of six model organism databases (MODs) and the Gene Ontology Consortium (GO). The recently launched Alliance portal (https://alliancegenome.org) provides a unified, comparative view of MOD, GO, and human data, and facilitates foundational and translational biomedical research.",2019-01-01 +31296229,GENT2: an updated gene expression database for normal and tumor tissues.,"

Background

Gene Expression database of Normal and Tumor tissues 2 (GENT2) is an updated version of GENT, which has provided a user-friendly search platform for gene expression patterns across different normal and tumor tissues compiled from public gene expression data sets.

Results

We refactored GENT2 with recent technologies such as Apache Lucene indexing for fast search and Google Web Toolkit (GWT) framework for a user-friendly web interface. Now, GENT2 contains more than 68,000 samples and has several new useful functions. First, GENT2 now provides gene expression across 72 different tissues compared to 57 in GENT. Second, with increasing importance of tumor subtypes, GENT2 provides an option to study the differential expression and its prognostic significance based on tumor subtypes. Third, whenever available, GENT2 provides prognostic information of a gene of interest. Fourth, GENT2 provides a meta-analysis of survival information to provide users more reliable prognostic value of a gene of interest.

Conclusions

In conclusion, with these significant improvements, GENT2 will continue to be a useful tool to a wide range of researchers. GENT2 is freely available at http://gent2.appex.kr .",2019-07-11 +32982995,Virulence Pattern and Genomic Diversity of Vibrio cholerae O1 and O139 Strains Isolated From Clinical and Environmental Sources in India.,"Vibrio cholerae is an autochthonous inhabitant of the aquatic environment. Several molecular methods have been used for typing V. cholerae strains, but there is no proper database for such scheme, including multilocus sequence typing (MLST) for V. cholerae O1 and O139 strains. We used 54 V. cholerae O1 and three O139 strains isolated from clinical and environmental sources and regions of India during the time period of 1975-2015 to determine the presence of virulence genes and production of biofilm. We devised a MLST scheme and developed a database for typing V. cholerae strains. Also, we performed pulsed-field gel electrophoresis to see the genomic diversity among them and compared it with MLST. We used the MEGA 7.0 software for the alignment and comparison of different nucleotide sequences. The advanced cluster analysis was performed to define complexes. All strains of V. cholerae, except five strains, showed variation in phenotypic characteristics but carried virulence-associated genes indicating they belonged to the El Tor/hybrid/O139 variants. MLST analysis showed 455 sequences types among V. cholerae strains, irrespective of sources and places of isolation. With these findings, we set up an MLST database on PubMLST.org using the BIGSdb software for V. cholerae O1 and O139 strains, which is available at https://pubmlst.org/vcholerae/ under the O1/O139 scheme. The pulsed-field gel electrophoresis (PFGE) fingerprint showed six fingerprint patterns namely E, F, G, H, I, and J clusters among 33 strains including strain N16961 carrying El Tor ctxB of which cluster J representing O139 strain was entirely different from other El Tor strains. Twenty strains carrying Haitian ctxB showed a fingerprint pattern classified as cluster A. Of the five strains, four carrying classical ctxB comprising two each of El Tor and O139 strains and one El Tor strain carrying Haitian ctxB clustered together under cluster B along with V. cholerae 569B showing pattern D. This study thus indicates that V. cholerae strains are undergoing continuous genetic changes leading to the emergence of new strains. The MLST scheme was found more appropriate compared to PFGE that can be used to determine the genomic diversity and population structure of V. cholerae.",2020-08-26 +35372867,Histopathologic and Clinical Features in Patients with Diabetes and Kidney Disease.,"

Background

The discovery of nondiabetic kidney disease (NDKD) in an individual patient with diabetes may have significant treatment implications. Extensive histopathologic data in this population are lacking, but they may provide insights into the complex pathogenesis of diabetic nephropathy (DN) and reveal specific phenotypes for the development of targeted therapies. This study seeks to elucidate the clinical and laboratory parameters associated with the spectrum of kidney histopathologic features in patients with diabetes.

Methods

This study is a retrospective analysis of 399 kidney biopsies assessed from 2014 to 2016 at the University of Washington among patients with diabetes. More comprehensive clinical data were evaluated in a subset of 79 participants.

Results

Of the 399 biopsies reviewed, 192 (48%) had a primary diagnosis of DN (including 26 with an additional diagnosis), and 207 (52%) had a primary diagnosis of NDKD (including 67 who also had DN). Retinopathy (sensitivity: 0.86; specificity: 0.81; OR, 27.1; 95% CI, 6.8 to 107.7) and higher levels of proteinuria (7.6 versus 4.1 g/d; P=0.004) were associated with DN, whereas a physician description of AKI was associated with a lower risk of DN (OR, 0.13; 95% CI, 0.04 to 0.38). The four most prevalent diagnoses in participants with NDKD were FSGS in 39, nephrosclerosis in 29, IgA nephropathy in 27, and acute tubular injury in 21.

Conclusions

Among patients with diabetes who undergo kidney biopsy in the Pacific Northwest, approximately half have DN, and half have NDKD. Retinopathy and more severe proteinuria were associated with DN, and AKI was a more common descriptor in NDKD.Podcast: This article contains a podcast at https://www.asnonline.org/media/podcast/K360/2020_11_25_KID0003962020.mp3.",2020-09-11 +30564269,RDAD: A Machine Learning System to Support Phenotype-Based Rare Disease Diagnosis.,"DNA sequencing has allowed for the discovery of the genetic cause for a considerable number of diseases, paving the way for new disease diagnostics. However, due to the lack of clinical samples and records, the molecular cause for rare diseases is always hard to identify, significantly limiting the number of rare Mendelian diseases diagnosed through sequencing technologies. Clinical phenotype information therefore becomes a major resource to diagnose rare diseases. In this article, we adopted both a phenotypic similarity method and a machine learning method to build four diagnostic models to support rare disease diagnosis. All the diagnostic models were validated using the real medical records from RAMEDIS. Each model provides a list of the top 10 candidate diseases as the prediction outcome and the results showed that all models had a high diagnostic precision (≥98%) with the highest recall reaching up to 95% while the models with machine learning methods showed the best performance. To promote effective diagnosis for rare disease in clinical application, we developed the phenotype-based Rare Disease Auxiliary Diagnosis system (RDAD) to assist clinicians in diagnosing rare diseases with the above four diagnostic models. The system is freely accessible through http://www.unimd.org/RDAD/.",2018-12-04 +32406521,Approaches to training multiclass semantic image segmentation of damage in concrete.,"This paper addresses the problem of creating a large quantity of high-quality training segmentation masks from scanning electron microscopy (SEM) images. The images are acquired from concrete samples that exhibit progressive amounts of degradation resulting from alkali-silica reaction (ASR), a leading cause of deterioration, cracking and loss of capacity in much of the nation's infrastructure. The target damage classes in concrete SEM images are defined as paste damage, aggregate damage, air voids and no damage. We approached the SEM segmentation problem by applying convolutional neural network (CNN)-based methods to predict the damage classes due to ASR for each image pixel. The challenges in using the CNN-based methods lie in preparing large numbers of high-quality training labelled images while having limited human resources. To address these challenges, we designed damage- and context-assisted approaches to lower the requirements on human resources. We then evaluated the accuracy of CNN-based segmentation methods using the datasets prepared with these two approaches. LAY DESCRIPTION: This work is about automated segmentation of Scanning Electron Microscopy (SEM) images taken from core and prism samples of concrete. The segmentation must detect several damage classes in each image in order to understand properties of concrete-made structures over time. The segmentation problem is approached with an artificial network (AI) based model. The training data for the AI model are created using damage- and context-assisted approaches to lower the requirements on human resources. The access to all training data and to a web-based validation system for scoring segmented images is available at https://isg.nist.gov/deepzoomweb/data/concreteScoring.",2020-06-02 +31633779,3D flow field estimation and assessment for live cell fluorescence microscopy.,"

Motivation

The revolution in light sheet microscopy enables the concurrent observation of thousands of dynamic processes, from single molecules to cellular organelles, with high spatiotemporal resolution. However, challenges in the interpretation of multidimensional data requires the fully automatic measurement of those motions to link local processes to cellular functions. This includes the design and the implementation of image processing pipelines able to deal with diverse motion types, and 3D visualization tools adapted to the human visual system.

Results

Here, we describe a new method for 3D motion estimation that addresses the aforementioned issues. We integrate 3D matching and variational approach to handle a diverse range of motion without any prior on the shape of moving objects. We compare different similarity measures to cope with intensity ambiguities and demonstrate the effectiveness of the Census signature for both stages. Additionally, we present two intuitive visualization approaches to adapt complex 3D measures into an interpretable 2D view, and a novel way to assess the quality of flow estimates in absence of ground truth.

Availability and implementation

https://team.inria.fr/serpico/data/3d-optical-flow-data/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +30698638,Cancer Target Gene Screening: a web application for breast cancer target gene screening using multi-omics data analysis.,"Breast cancer comprises several molecular subtypes with distinct clinical features and treatment responses, and a substantial portion of each subtype remains incurable. A comprehensive analysis of multi-omics data and clinical profiles is required in order to better understand the biological complexity of this cancer type and to identify new prognostic and therapeutic markers. Thus, there arises a need for useful analytical tools to assist in the investigation and clinical management of the disease. We developed Cancer Target Gene Screening (CTGS), a web application that provides rapid and user-friendly analysis of multi-omics data sets from a large number of primary breast tumors. It allows the investigation of genomic and epigenomic aberrations, evaluation of transcriptomic profiles and performance of survival analyses and of bivariate correlations between layers of omics data. Notably, the genome-wide screening function of CTGS prioritizes candidate genes of clinical and biological significance among genes with copy number alteration, DNA methylation and dysregulated expression by the integrative analysis of different types of omics data in customized subgroups of breast cancer patients. These features may help in the identification of druggable cancer driver genes in a specific subtype or the clinical condition of human breast cancer. CTGS is available at http://ctgs.biohackers.net.",2020-03-01 +32195761,External Validation of PATHFx Version 3.0 in Patients Treated Surgically and Nonsurgically for Symptomatic Skeletal Metastases.,"

Background

PATHFx is a clinical decision-support tool based on machine learning capable of estimating the likelihood of survival after surgery for patients with skeletal metastases. The applicability of any machine-learning tool depends not only on successful external validation in unique patient populations but also on remaining relevant as more effective systemic treatments are introduced. With advancements in the treatment of metastatic disease, it is our responsibility to patients to ensure clinical support tools remain contemporary and accurate.

Question/purposes

Therefore, we sought to (1) generate updated PATHFx models using recent data from patients treated at one large, urban tertiary referral center and (2) externally validate the models using two contemporary patient populations treated either surgically or nonsurgically with external-beam radiotherapy alone for symptomatic skeletal metastases for symptomatic lesions.

Methods

After obtaining institutional review board approval, we collected data on 208 patients undergoing surgical treatment for pathologic fractures at Memorial Sloan Kettering Cancer Center between 2015 and 2018. These data were combined with the original PATHFx training set (n = 189) to create the final training set (n = 397). We then created six Bayesian belief networks designed to estimate the likelihood of 1-month, 3-month, 6-month, 12-month, 18-month, and 24-month survival after treatment. Bayesian belief analysis is a statistical method that allows data-driven learning to arise from conditional probabilities by exploring relationships between variables to estimate the likelihood of an outcome using observed data. For external validation, we extracted the records of patients treated between 2016 and 2018 from the International Bone Metastasis Registry and records of patients treated nonoperatively with external-beam radiation therapy for symptomatic skeletal metastases from 2012 to 2016 using the Military Health System Data Repository (radiotherapy-only group). From each record, we collected the date of treatment, laboratory values at the time of treatment initiation, demographic data, details of diagnosis, and the date of death. All records reported sufficient follow-up to establish survival (yes/no) at 24-months after treatment. For external validation, we applied the data from each record to the new PATHFx models. We assessed calibration (calibration plots), accuracy (Brier score), discriminatory ability (area under the receiver operating characteristic curve [AUC]).

Results

The updated PATHFx version 3.0 models successfully classified survival at each time interval in both external validation sets and demonstrated appropriate discriminatory ability and model calibration. The Bayesian models were reasonably calibrated to the Memorial Sloan Kettering Cancer Center training set. External validation with 197 records from the International Bone Metastasis Registry and 192 records from the Military Health System Data Repository for analysis found Brier scores that were all less than 0.20, with upper bounds of the 95% confidence intervals all less than 0.25, both for the radiotherapy-only and International Bone Metastasis Registry groups. Additionally, AUC estimates were all greater than 0.70, with lower bounds of the 95% CI all greater than 0.68, except for the 1-month radiotherapy-only group. To complete external validation, decision curve analysis demonstrated clinical utility. This means it was better to use the PATHFx models when compared to the default assumption that all or no patients would survive at all time periods except for the 1-month models. We believe the favorable Brier scores (< 0.20) as well as DCA indicate these models are suitable for clinical use.

Conclusions

We successfully updated PATHFx using contemporary data from patients undergoing either surgical or nonsurgical treatment for symptomatic skeletal metastases. These models have been incorporated for clinical use on PATHFx version 3.0 (https://www.pathfx.org). Clinically, external validation suggests it is better to use PATHFx version 3.0 for all time periods except when deciding whether to give radiotherapy to patients with the life expectancy of less than 1 month. This is partly because most patients survived 1-month after treatment. With the advancement of medical technology in treatment and diagnosis for patients with metastatic bone disease, part of our fiduciary responsibility is to the main current clinical support tools.

Level of evidence

Level III, therapeutic study.",2020-04-01 +32470119,Coronavirus3D: 3D structural visualization of COVID-19 genomic divergence.,"

Motivation

As the COVID-19 pandemic is spreading around the world, the SARS-CoV-2 virus is evolving with mutations that potentially change and fine-tune functions of the proteins coded in its genome.

Results

Coronavirus3D website integrates data on the SARS-CoV-2 virus mutations with information about 3D structures of its proteins, allowing users to visually analyze the mutations in their 3D context.

Availability and implementation

Coronavirus3D server is freely available at https://coronavirus3d.org.",2020-08-01 +27575498,Impact of pathology theses supported at the medical university of Tunis (2000-2010).,"

Background

The thesis is an research work wish must submit to rigorous scientific criteria. However, this research effort remains inaccessible to international scientific communities. The aims of this study were to determinate the publication rates in indexed journals and factors affecting publication.

Methods

This was a retrospective descriptive study of pathology theses listed in the theses catalog of the library medical university of Tunis whose theses were supported between 2000-2010. Publication had been searched in databases ""Pub Med"". The number of citations received by each published thesis was recorded in www. Scopus.com.

Results

Our study concerned 189 theses. Thirty five original articles were derived from 33 theses (17.5%). Eleven medical indexed journals have made the support of articles, dominated by generalist journal (La Tunisie Médicale: 68.6%), specialist journals (Annales de Pathologies, Pathology, Ultrastructural Pathology: 11.4%). The number of article citations had an average of 1. Theses with informative title had been more publication (p=0.005). Theses with structured introduction had been more publication (p=0.002).

Conclusion

Publication rate of pathology theses in indexed journals are relatively low. This publication rate could be improved by the organization of seminars and workshops on writing articles from theses or by the improvement of these articles in national competitions.",2016-03-01 +28416714,JingleBells: A Repository of Immune-Related Single-Cell RNA-Sequencing Datasets.,"Recent advances in single-cell RNA-sequencing (scRNA-seq) technology increase the understanding of immune differentiation and activation processes, as well as the heterogeneity of immune cell types. Although the number of available immune-related scRNA-seq datasets increases rapidly, their large size and various formats render them hard for the wider immunology community to use, and read-level data are practically inaccessible to the non-computational immunologist. To facilitate datasets reuse, we created the JingleBells repository for immune-related scRNA-seq datasets ready for analysis and visualization of reads at the single-cell level (http://jinglebells.bgu.ac.il/). To this end, we collected the raw data of publicly available immune-related scRNA-seq datasets, aligned the reads to the relevant genome, and saved aligned reads in a uniform format, annotated for cell of origin. We also added scripts and a step-by-step tutorial for visualizing each dataset at the single-cell level, through the commonly used Integrated Genome Viewer (www.broadinstitute.org/igv/). The uniform scRNA-seq format used in JingleBells can facilitate reuse of scRNA-seq data by computational biologists. It also enables immunologists who are interested in a specific gene to visualize the reads aligned to this gene to estimate cell-specific preferences for splicing, mutation load, or alleles. Thus JingleBells is a resource that will extend the usefulness of scRNA-seq datasets outside the programming aficionado realm.",2017-05-01 +32835615,A database of integrated molecular and phytochemical interactions of the foxm1 pathway for lung cancer.,"The FoxM1 pathway is an oncogenic signaling pathway involved in essential mechanisms including control cell-cycle progression, apoptosis and cell growth which are the common hallmarks of various cancers. Although its biological functions in the tumor development and progression are known, the mechanism by which it participates in those processes is not understood. The present work reveals images of the oncogenic FoxM1 pathway controlling the cell cycle process with alternative treatment options via phytochemical substances in the lung cancer study. The downstream significant protein modules of the FoxM1 pathway were extracted by the Molecular Complex Detection (MCODE) and the maximal clique (Mclique) algorithms. Furthermore, the effects of post-transcriptional modification by microRNA, transcription factor binding and the phytochemical compounds are observed through their interactions with the lung cancer protein modules. We provided two case studies to demonstrate the usefulness of our database. Our results suggested that the combination of various phytochemicals is effective in the treatment of lung cancer. The ultimate goal of the present work is to partly support the discovery of plant-derived compounds in combination treatment of classical chemotherapeutic agents to increase the efficacy of lung cancer method probably with minor side effects. Furthermore, a web-based system displaying results of the present work is set up for investigators posing queries at http://sit.mfu.ac.th/lcgdb/index_FoxM1.php.Communicated by Ramaswamy H. Sarma.",2020-08-24 +26631132,The Saccharomyces Genome Database: A Tool for Discovery.,"The Saccharomyces Genome Database (SGD) is the main community repository of information for the budding yeast, Saccharomyces cerevisiae. The SGD has collected published results on chromosomal features, including genes and their products, and has become an encyclopedia of information on the biology of the yeast cell. This information includes gene and gene product function, phenotype, interactions, regulation, complexes, and pathways. All information has been integrated into a unique web resource, accessible via http://yeastgenome.org. The website also provides custom tools to allow useful searches and visualization of data. The experimentally defined functions of genes, mutant phenotypes, and sequence homologies archived in the SGD provide a platform for understanding many fields of biological research. The mission of SGD is to provide public access to all published experimental results on yeast to aid life science students, educators, and researchers. As such, the SGD has become an essential tool for the design of experiments and for the analysis of experimental results.",2015-12-02 +32373688,Data on protein changes of chick vitreous during normal eye growth using data-independent acquisition (SWATH-MS).,"Myopia is the most common refractive error which is estimated to affect half the population of the world by 2050. It has been suggested that it could be determined by multiple factors such as environmental and genetic, but the mechanism behind the cause of myopia is still yet to be identified. Vitreous humor (VH) is a transparent gelatin-like substance that takes up to 80% of the volume of the eye, making it the largest component of the eye. Although VH is the main contributor to axial elongation of the eye including normal eye growth (emmetropization) and myopia, the diluted nature of VH (made up of 99% of water) made it difficult for less abundant molecules to be identified and therefore often overlooked. Using the more sensitive label-free mass spectrometry approach with data-independent acquisition (SWATH-MS), we established a comprehensive VH proteome library in chick animal model and quantified possible protein biomarkers that are responsible for the axial elongation during emmetropization (7, 14, 21, 28 days after hatching, n = 48 eyes). Raw data files for both information-dependent acquisition (IDA) and data-independent acquisition (SWATH-MS) were uploaded on PeptideAtlas for public access (http://www.peptideatlas.org/PASS/PASS01258).",2020-04-18 +33126018,"The Draft Genome of Coelastrum proboscideum (Sphaeropleales, Chlorophyta).","Coelastrum proboscideum Bohlin, 1896 (Sphaeropleales, Scenedesmaceae, Chlorophyta) is a coenobial species with cosmopolitan distribution in diverse freshwater habitats. Coelastrum spp. are widely tested for biotechnological applications such as carotenoid and lipid production, and in bioremediation of wastewater. Here, we report the draft genome of C. proboscideum var. dilatatum strain SAG 217-2. The final assembly comprised 125,935,854 bp with over 8357 scaffolds. The whole-genome data is publicly available in the Nucleotide Sequence Archive (CNSA) of China National GeneBank (CNGB) (https://db.cngb.org/cnsa/) under the accession number CNA0014153.",2020-08-17 +33565027,Classification of COVID-19 by Compressed Chest CT Image through Deep Learning on a Large Patients Cohort.,"Corona Virus Disease (COVID-19) has spread globally quickly, and has resulted in a large number of causalities and medical resources insufficiency in many countries. Reverse-transcriptase polymerase chain reaction (RT-PCR) testing is adopted as biopsy tool for confirmation of virus infection. However, its accuracy is as low as 60-70%, which is inefficient to uncover the infected. In comparison, the chest CT has been considered as the prior choice in diagnosis and monitoring progress of COVID-19 infection. Although the COVID-19 diagnostic systems based on artificial intelligence have been developed for assisting doctors in diagnosis, the small sample size and the excessive time consumption limit their applications. To this end, this paper proposed a diagnosis prototype system for COVID-19 infection testing. The proposed deep learning model is trained and is tested on 2267 CT sequences from 1357 patients clinically confirmed with COVID-19 and 1235 CT sequences from non-infected people. The main highlights of the prototype system are: (1) no data augmentation is needed to accurately discriminate the COVID-19 from normal controls with the specificity of 0.92 and sensitivity of 0.93; (2) the raw DICOM image is not necessary in testing. Highly compressed image like Jpeg can be used to allow a quick diagnosis; and (3) it discriminates the virus infection within 6 seconds and thus allows an online test with light cost. We also applied our model on 48 asymptomatic patients diagnosed with COVID-19. We found that: (1) the positive rate of RT-PCR assay is 63.5% (687/1082). (2) 45.8% (22/48) of the RT-PCR assay is negative for asymptomatic patients, yet the accuracy of CT scans is 95.8%. The online detection system is available: http://212.64.70.65/covid .",2021-02-09 +29912372,Chemical shift-based identification of monosaccharide spin-systems with NMR spectroscopy to complement untargeted glycomics.,"

Motivation

A better understanding of oligosaccharides and their wide-ranging functions in almost every aspect of biology and medicine promises to uncover hidden layers of biology and will support the development of better therapies. Elucidating the chemical structure of an unknown oligosaccharide remains a challenge. Efficient tools are required for non-targeted glycomics. Chemical shifts are a rich source of information about the topology and configuration of biomolecules, whose potential is however not fully explored for oligosaccharides. We hypothesize that the chemical shifts of each monosaccharide are unique for each saccharide type with a certain linkage pattern, so that correlated data measured by NMR spectroscopy can be used to identify the chemical nature of a carbohydrate.

Results

We present here an efficient search algorithm, GlycoNMRSearch, which matches either a subset or the entire set of chemical shifts of an unidentified monosaccharide spin system to all spin systems in an NMR database. The search output is much more precise than earlier search functions and highly similar matches suggest the chemical structure of the spin system within the oligosaccharide. Thus, searching for connected chemical shift correlations within all electronically available NMR data of oligosaccharides is a very efficient way of identifying the chemical structure of unknown oligosaccharides. With an improved database in the future, GlycoNMRSearch will be even more efficient deducing chemical structures of oligosaccharides and there is a high chance that it becomes an indispensable technique for glycomics.

Availability and implementation

The search algorithm presented here, together with a graphical user interface, is available at http://glyconmrsearch.nmrhub.eu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +31603461,Deep-learning approach to identifying cancer subtypes using high-dimensional genomic data.,"

Motivation

Cancer subtype classification has the potential to significantly improve disease prognosis and develop individualized patient management. Existing methods are limited by their ability to handle extremely high-dimensional data and by the influence of misleading, irrelevant factors, resulting in ambiguous and overlapping subtypes.

Results

To address the above issues, we proposed a novel approach to disentangling and eliminating irrelevant factors by leveraging the power of deep learning. Specifically, we designed a deep-learning framework, referred to as DeepType, that performs joint supervised classification, unsupervised clustering and dimensionality reduction to learn cancer-relevant data representation with cluster structure. We applied DeepType to the METABRIC breast cancer dataset and compared its performance to state-of-the-art methods. DeepType significantly outperformed the existing methods, identifying more robust subtypes while using fewer genes. The new approach provides a framework for the derivation of more accurate and robust molecular cancer subtypes by using increasingly complex, multi-source data.

Availability and implementation

An open-source software package for the proposed method is freely available at http://www.acsu.buffalo.edu/~yijunsun/lab/DeepType.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +33356525,Knowledge Organization Systems for Systematic Chemical Assessments.,"

Background

Although the implementation of systematic review and evidence mapping methods stands to improve the transparency and accuracy of chemical assessments, they also accentuate the challenges that assessors face in ensuring they have located and included all the evidence that is relevant to evaluating the potential health effects an exposure might be causing. This challenge of information retrieval can be characterized in terms of ""semantic"" and ""conceptual"" factors that render chemical assessments vulnerable to the streetlight effect.

Objectives

This commentary presents how controlled vocabularies, thesauruses, and ontologies contribute to overcoming the streetlight effect in information retrieval, making up the key components of Knowledge Organization Systems (KOSs) that enable more systematic access to assessment-relevant information than is currently achievable. The concept of Adverse Outcome Pathways is used to illustrate what a general KOS for use in chemical assessment could look like.

Discussion

Ontologies are an underexploited element of effective knowledge organization in the environmental health sciences. Agreeing on and implementing ontologies in chemical assessment is a complex but tractable process with four fundamental steps. Successful implementation of ontologies would not only make currently fragmented information about health risks from chemical exposures vastly more accessible, it could ultimately enable computational methods for chemical assessment that can take advantage of the full richness of data described in natural language in primary studies. https://doi.org/10.1289/EHP6994.",2020-12-24 +31680153,Plant Reactome: a knowledgebase and resource for comparative pathway analysis.,"Plant Reactome (https://plantreactome.gramene.org) is an open-source, comparative plant pathway knowledgebase of the Gramene project. It uses Oryza sativa (rice) as a reference species for manual curation of pathways and extends pathway knowledge to another 82 plant species via gene-orthology projection using the Reactome data model and framework. It currently hosts 298 reference pathways, including metabolic and transport pathways, transcriptional networks, hormone signaling pathways, and plant developmental processes. In addition to browsing plant pathways, users can upload and analyze their omics data, such as the gene-expression data, and overlay curated or experimental gene-gene interaction data to extend pathway knowledge. The curation team actively engages researchers and students on gene and pathway curation by offering workshops and online tutorials. The Plant Reactome supports, implements and collaborates with the wider community to make data and tools related to genes, genomes, and pathways Findable, Accessible, Interoperable and Re-usable (FAIR).",2020-01-01 +31254167,Interoperable chemical structure search service.,"

Motivation

The existing connections between large databases of chemicals, proteins, metabolites and assays offer valuable resources for research in fields ranging from drug design to metabolomics. Transparent search across multiple databases provides a way to efficiently utilize these resources. To simplify such searches, many databases have adopted semantic technologies that allow interoperable querying of the datasets using SPARQL query language. However, the interoperable interfaces of the chemical databases still lack the functionality of structure-driven chemical search, which is a fundamental method of data discovery in the chemical search space.

Results

We present a SPARQL service that augments existing semantic services by making interoperable substructure and similarity searches in small-molecule databases possible. The service thus offers new possibilities for querying interoperable databases, and simplifies writing of heterogeneous queries that include chemical-structure search terms.

Availability

The service is freely available and accessible using a standard SPARQL endpoint interface. The service documentation and user-oriented demonstration interfaces that allow quick explorative querying of datasets are available at https://idsm.elixir-czech.cz .",2019-06-28 +31888469,normGAM: an R package to remove systematic biases in genome architecture mapping data.,"BACKGROUND:The genome architecture mapping (GAM) technique can capture genome-wide chromatin interactions. However, besides the known systematic biases in the raw GAM data, we have found a new type of systematic bias. It is necessary to develop and evaluate effective normalization methods to remove all systematic biases in the raw GAM data. RESULTS:We have detected a new type of systematic bias, the fragment length bias, in the genome architecture mapping (GAM) data, which is significantly different from the bias of window detection frequency previously mentioned in the paper introducing the GAM method but is similar to the bias of distances between restriction sites existing in raw Hi-C data. We have found that the normalization method (a normalized variant of the linkage disequilibrium) used in the GAM paper is not able to effectively eliminate the new fragment length bias at 1 Mb resolution (slightly better at 30 kb resolution). We have developed an R package named normGAM for eliminating the new fragment length bias together with the other three biases existing in raw GAM data, which are the biases related to window detection frequency, mappability, and GC content. Five normalization methods have been implemented and included in the R package including Knight-Ruiz 2-norm (KR2, newly designed by us), normalized linkage disequilibrium (NLD), vanilla coverage (VC), sequential component normalization (SCN), and iterative correction and eigenvector decomposition (ICE). CONCLUSIONS:Based on our evaluations, the five normalization methods can eliminate the four biases existing in raw GAM data, with VC and KR2 performing better than the others. We have observed that the KR2-normalized GAM data have a higher correlation with the KR-normalized Hi-C data on the same cell samples indicating that the KR-related methods are better than the others for keeping the consistency between the GAM and Hi-C experiments. Compared with the raw GAM data, the normalized GAM data are more consistent with the normalized distances from the fluorescence in situ hybridization (FISH) experiments. The source code of normGAM can be freely downloaded from http://dna.cs.miami.edu/normGAM/.",2019-12-30 +24997141,ChiloDB: a genomic and transcriptome database for an important rice insect pest Chilo suppressalis. ,"ChiloDB is an integrated resource that will be of use to the rice stem borer research community. The rice striped stem borer (SSB), Chilo suppressalis Walker, is a major rice pest that causes severe yield losses in most rice-producing countries. A draft genome of this insect is available. The aims of ChiloDB are (i) to store recently acquired genomic sequence and transcriptome data and integrate them with protein-coding genes, microRNAs, piwi-interacting RNAs (piRNAs) and RNA sequencing (RNA-Seq) data and (ii) to provide comprehensive search tools and downloadable data sets for comparative genomics and gene annotation of this important rice pest. ChiloDB contains the first version of the official SSB gene set, comprising 80,479 scaffolds and 10 221 annotated protein-coding genes. Additionally, 262 SSB microRNA genes predicted from a small RNA library, 82 639 piRNAs identified using the piRNApredictor software, 37,040 transcripts from a midgut transcriptome and 69 977 transcripts from a mixed sample have all been integrated into ChiloDB. ChiloDB was constructed using a data structure that is compatible with data resources, which will be incorporated into the database in the future. This resource will serve as a long-term and open-access database for research on the biology, evolution and pest control of SSB. To the best of our knowledge, ChiloDB is one of the first genomic and transcriptome database for rice insect pests. Database URL: http://ento.njau.edu.cn/ChiloDB.",2014-07-04 +25414355,rrnDB: improved tools for interpreting rRNA gene abundance in bacteria and archaea and a new foundation for future development.,"Microbiologists utilize ribosomal RNA genes as molecular markers of taxonomy in surveys of microbial communities. rRNA genes are often co-located as part of an rrn operon, and multiple copies of this operon are present in genomes across the microbial tree of life. rrn copy number variability provides valuable insight into microbial life history, but introduces systematic bias when measuring community composition in molecular surveys. Here we present an update to the ribosomal RNA operon copy number database (rrnDB), a publicly available, curated resource for copy number information for bacteria and archaea. The redesigned rrnDB (http://rrndb.umms.med.umich.edu/) brings a substantial increase in the number of genomes described, improved curation, mapping of genomes to both NCBI and RDP taxonomies, and refined tools for querying and analyzing these data. With these changes, the rrnDB is better positioned to remain a comprehensive resource under the torrent of microbial genome sequencing. The enhanced rrnDB will contribute to the analysis of molecular surveys and to research linking genomic characteristics to life history.",2014-11-20 +24234439,The IUPHAR/BPS Guide to PHARMACOLOGY: an expert-driven knowledgebase of drug targets and their ligands.,"The International Union of Basic and Clinical Pharmacology/British Pharmacological Society (IUPHAR/BPS) Guide to PHARMACOLOGY (http://www.guidetopharmacology.org) is a new open access resource providing pharmacological, chemical, genetic, functional and pathophysiological data on the targets of approved and experimental drugs. Created under the auspices of the IUPHAR and the BPS, the portal provides concise, peer-reviewed overviews of the key properties of a wide range of established and potential drug targets, with in-depth information for a subset of important targets. The resource is the result of curation and integration of data from the IUPHAR Database (IUPHAR-DB) and the published BPS 'Guide to Receptors and Channels' (GRAC) compendium. The data are derived from a global network of expert contributors, and the information is extensively linked to relevant databases, including ChEMBL, DrugBank, Ensembl, PubChem, UniProt and PubMed. Each of the ∼6000 small molecule and peptide ligands is annotated with manually curated 2D chemical structures or amino acid sequences, nomenclature and database links. Future expansion of the resource will complete the coverage of all the targets of currently approved drugs and future candidate targets, alongside educational resources to guide scientists and students in pharmacological principles and techniques.",2013-11-14 +33069256,Multivariate Bayesian meta-analysis: joint modelling of multiple cancer types using summary statistics.,"

Background

Cancer atlases often provide estimates of cancer incidence, mortality or survival across small areas of a region or country. A recent example of a cancer atlas is the Australian cancer atlas (ACA), that provides interactive maps to visualise spatially smoothed estimates of cancer incidence and survival for 20 different cancer types over 2148 small areas across Australia.

Methods

The present study proposes a multivariate Bayesian meta-analysis model, which can model multiple cancers jointly using summary measures without requiring access to the unit record data. This new approach is illustrated by modelling the publicly available spatially smoothed standardised incidence ratios for multiple cancers in the ACA divided into three groups: common, rare/less common and smoking-related. The multivariate Bayesian meta-analysis models are fitted to each group in order to explore any possible association between the cancers in three remoteness regions: major cities, regional and remote areas across Australia. The correlation between the pairs of cancers included in each multivariate model for a group was examined by computing the posterior correlation matrix for each cancer group in each region. The posterior correlation matrices in different remoteness regions were compared using Jennrich's test of equality of correlation matrices (Jennrich in J Am Stat Assoc. 1970;65(330):904-12. https://doi.org/10.1080/01621459.1970.10481133 ).

Results

Substantive correlation was observed among some cancer types. There was evidence that the magnitude of this correlation varied according to remoteness of a region. For example, there has been significant negative correlation between prostate and lung cancer in major cities, but zero correlation found in regional and remote areas for the same pair of cancer types. High risk areas for specific combinations of cancer types were identified and visualised from the proposed model.

Conclusions

Publicly available spatially smoothed disease estimates can be used to explore additional research questions by modelling multiple cancer types jointly. These proposed multivariate meta-analysis models could be useful when unit record data are unavailable because of privacy and confidentiality requirements.",2020-10-17 +31960371,User-Friendly and Interactive Analysis of ChIP-Seq Data Using EaSeq.,"ChIP-seq is a central method to gain understanding of the regulatory networks in the genome of stem cells and during differentiation. Exploration and analysis of such genome-wide data often leads to unexpected discoveries and new hypotheses. It therefore accelerates and improves the discovery phase, when scientists with biological understanding are enabled to analyze and visualize data. EaSeq ( http://easeq.net ) offers integrated exploration of genome-wide data in a visual, versatile, user-friendly, and interactive manner that connects abstract interpretations to the signal distribution at the underlying loci. Here we introduce the interface, data types, and acquisition, and guide the reader through two example workflows. These workflows will enable the reader to perform genome-wide analysis and visualization of transcription factor binding sites and histone marks. This includes making basic plots; finding, annotating, sorting, and filtering of peaks; using EaSeq as a genome browser; measuring ChIP-seq signal and calculating ratios; as well as data import and export.",2020-01-01 +33390759,"Importance of building a digital species index (spindex) for entomology collections: A case study, results and recommendations.","The Entomology Collection at the Academy of Natural Sciences of Drexel University (ANSP) contains approximately four million insect specimens including some of the oldest in the Western Hemisphere. Like most large entomology collections, no complete inventory of the species represented in the collection was available and even a physical search for a species could not ensure that all available specimens would be recovered for study. Between 2010 and 2014, we created a species-level index (called here spindex) of all species and their specimen counts at ANSP, along with each species' location in the collection. Additional data captured during the project included the higher level classification of each species and type of specimen preparation. The spindex is searchable online: http://symbiont.ansp.org/entomology/. The spindex project documented 96,126 species in the ANSP Entomology Collection, representing about 10% of the described insect fauna. Additionally, over 900 putative primary types were discovered outside the Primary Type Collection. The completion of this project has improved access to the collection by enabling scientists and other users worldwide to search these collection holdings remotely and has facilitated staff in curation, research, collection management and funding proposals. A spindex is an important tool that is overlooked for planning and carrying out specimen level digitisation. This project is a case study for building a species-level index. A detailed protocol is provided, along with recommendations for other collections, including cost estimates and strategies for tracking progress and avoiding common obstacles.",2020-12-23 +30794542,Repository of Enriched Structures of Proteins Involved in the Red Blood Cell Environment (RESPIRE).,"The Red Blood Cell (RBC) is a metabolically-driven cell vital for processes such a gas transport and homeostasis. RBC possesses at its surface exposing antigens proteins that are critical in blood transfusion. Due to their importance, numerous studies address the cell function as a whole but more and more details of RBC structure and protein content are now studied using massive state-of-the art characterisation techniques. Yet, the resulting information is frequently scattered in many scientific articles, in many databases and specialized web servers. To provide a more compendious view of erythrocytes and of their protein content, we developed a dedicated database called RESPIRE that aims at gathering a comprehensive and coherent ensemble of information and data about proteins in RBC. This cell-driven database lists proteins found in erythrocytes. For a given protein entry, initial data are processed from external portals and enriched by using state-of-the-art bioinformatics methods. As structural information is extremely useful to understand protein function and predict the impact of mutations, a strong effort has been put on the prediction of protein structures with a special treatment for membrane proteins. Browsing the database is available through text search for reference gene names or protein identifiers, through pre-defined queries or via hyperlinks. The RESPIRE database provides valuable information and unique annotations that should be useful to a wide audience of biologists, clinicians and structural biologists. Database URL: http://www.dsimb.inserm.fr/respire.",2019-02-22 +32639173,A Probabilistic Approach to Evaluate the Risk of Decreased Total Triiodothyronine Hormone Levels following Chronic Exposure to PFOS and PFHxS via Contaminated Drinking Water.,"

Background

Extensive exposure to per- and polyfluoroalkyl substances (PFAS) have been observed in many countries. Current deterministic frameworks for risk assessment lack the ability to predict the likelihood of effects and to assess uncertainty. When exposure exceeds tolerable intake levels, these shortcomings hamper risk management and communication.

Objective

The integrated probabilistic risk assessment (IPRA) combines dose-response and exposure data to estimate the likelihood of adverse effects. We evaluated the usefulness of the IPRA for risk characterization related to decreased levels of total triiodothyronine (T3) in humans following a real case of high exposure to PFAS via drinking water.

Methods

PFAS exposure was defined as serum levels from residents of a contaminated area in Ronneby, Sweden. Median levels were 270 ng/mL [perfluorooctane sulfonic acid (PFOS)] and 229 ng/mL [perfluorohexane sulfonic acid (PFHxS)] for individuals who resided in Ronneby 1 y before the exposure termination. This data was integrated with data from a subchronic toxicity study in monkeys exposed daily to PFOS. Benchmark dose modeling was employed to describe separate dose-effect relationship for males and females, and extrapolation factor distributions were used to estimate the corresponding human benchmark dose. The critical effect level was defined as a 10% decrease in total T3.

Results

The median probability of critical exposure, following a combined exposure to PFOS and PFHxS, was estimated to be [2.1% (90% CI: 0.4%-13.1%)]. Gender-based analysis showed that this risk was almost entirely distributed among women, namely [3.9% (90% CI: 0.8%-21.6%)].

Discussion

The IPRA was compared with the traditional deterministic Margin of Exposure (MoE) approach. We conclude that probabilistic risk characterization represents an important step forward in the ability to adequately analyze group-specific health risks. Moreover, quantifying the sources of uncertainty is desirable, as it improves the awareness among stakeholders and will guide future efforts to improve accuracy. https://doi.org/10.1289/EHP6654.",2020-07-08 +32235926,Tutorial: guidance for quantitative confocal microscopy.,"When used appropriately, a confocal fluorescence microscope is an excellent tool for making quantitative measurements in cells and tissues. The confocal microscope's ability to block out-of-focus light and thereby perform optical sectioning through a specimen allows the researcher to quantify fluorescence with very high spatial precision. However, generating meaningful data using confocal microscopy requires careful planning and a thorough understanding of the technique. In this tutorial, the researcher is guided through all aspects of acquiring quantitative confocal microscopy images, including optimizing sample preparation for fixed and live cells, choosing the most suitable microscope for a given application and configuring the microscope parameters. Suggestions are offered for planning unbiased and rigorous confocal microscope experiments. Common pitfalls such as photobleaching and cross-talk are addressed, as well as several troubling instrumentation problems that may prevent the acquisition of quantitative data. Finally, guidelines for analyzing and presenting confocal images in a way that maintains the quantitative nature of the data are presented, and statistical analysis is discussed. A visual summary of this tutorial is available as a poster (https://doi.org/10.1038/s41596-020-0307-7).",2020-03-31 +33567369,Predicting High-Value Care Outcomes After Surgery for Skull Base Meningiomas.,"

Background

Although various predictors of adverse postoperative outcomes among patients with meningioma have been established, research has yet to develop a method for consolidating these findings to allow for predictions of adverse health care outcomes for patients diagnosed with skull base meningiomas. The objective of the present study was to develop 3 predictive algorithms that can be used to estimate an individual patient's probability of extended length of stay (LOS) in hospital, experiencing a nonroutine discharge disposition, or incurring high hospital charges after surgical resection of a skull base meningioma.

Methods

The present study used data from patients who underwent surgical resection for skull base meningiomas at a single academic institution between 2017 and 2019. Multivariate logistic regression analysis was used to predict extended LOS, nonroutine discharge, and high hospital charges, and 2000 bootstrapped samples were used to calculate an optimism-corrected C-statistic. The Hosmer-Lemeshow test was used to assess model calibration, and P < 0.05 was considered statistically significant.

Results

A total of 245 patients were included in our analysis. Our cohort was mostly female (77.6%) and white (62.4%). Our models predicting extended LOS, nonroutine discharge, and high hospital charges had optimism-corrected C-statistics of 0.768, 0.784, and 0.783, respectively. All models showed adequate calibration (P>0.05), and were deployed via an open-access, online calculator: https://neurooncsurgery3.shinyapps.io/high_value_skull_base_calc/.

Conclusions

After external validation, our predictive models have the potential to aid clinicians in providing patients with individualized risk estimation for health care outcomes after meningioma surgery.",2021-02-07 +33486653,Narrativity and Referential Activity Predict Episodic Memory Strength in Autobiographical Memories.,"Narrativity has been proposed as an indicator of episodic memory strength when people discuss their past (Nelson and Horowitz in Discourse Processes 31:307-324, 2001. https://doi.org/10.1207/S15326950dp31-3_5 ). Referential Activity, the extent to which words convey a speaker's experience of being present in the event being described, has been independently hypothesized to indicate episodic memory strength (Maskit in J Psycholinguist Res, 2021. https://doi.org/10.1007/s10936-021-09761-8 ). These hypotheses are tested using a linguistic measure of narrativity and a computerized measure of referential activity to predict previous independent ratings of episodic memory strength that used the Levine et al. (Psychol Aging 17(4):677-689, 2002. https://doi.org/10.1037//0882-7974.17.4.677 ) measure of internal details in retold personal memories provided by Schacter (Addis et al. in Psychol Sci 19(1):33-41, 2008. https://doi.org/10.1111/j.1467-9280.2008.02043.x ). Raters scored narrativity on four brief near and far past memories elicited from 32 subjects, using Nelson's narrative temporal sequence method based on Labov's (J Narrat Life Hist 7(1-4):395-415, 1997. https://doi.org/10.1075/jnlh.7.49som ) analysis of spoken narratives of personal experience; computerized weighted scores of referential activity (WRAD) were obtained on these same 128 memories. Data analysis showed that narrative temporal sequences predict internal details and WRAD predict internal details. Adding WRAD to narrative temporal sequences improved the prediction of internal details.",2021-01-24 +32393166,MEPHAS: an interactive graphical user interface for medical and pharmaceutical statistical analysis with R and Shiny.,"BACKGROUND:Even though R is one of the most commonly used statistical computing environments, it lacks a graphical user interface (GUI) that appeals to students, researchers, lecturers, and practitioners in medicine and pharmacy for conducting standard data analytics. Current GUIs built on top of R, such as EZR and R-Commander, aim to facilitate R coding and visualization, but most of the functionalities are still accessed through a command-line interface (CLI). To assist practitioners of medicine and pharmacy and researchers to run most routines in fundamental statistical analysis, we developed an interactive GUI; i.e., MEPHAS, to support various web-based systems that are accessible from laptops, workstations, or tablets, under Windows, macOS (and IOS), or Linux. In addition to fundamental statistical analysis, advanced statistics such as the extended Cox regression and dimensional analyses including partial least squares regression (PLS-R) and sparse partial least squares regression (SPLS-R), are also available in MEPHAS. RESULTS:MEPHAS is a web-based GUI (https://alain003.phs.osaka-u.ac.jp/mephas/) that is based on a shiny framework. We also created the corresponding R package mephas (https://mephas.github.io/). Thus far, MEPHAS has supported four categories of statistics, including probability, hypothesis testing, regression models, and dimensional analyses. Instructions and help menus were accessible during the entire analytical process via the web-based GUI, particularly advanced dimensional data analysis that required much explanation. The GUI was designed to be intuitive for non-technical users to perform various statistical functions, e.g., managing data, customizing plots, setting parameters, and monitoring real-time results, without any R coding from users. All generated graphs can be saved to local machines, and tables can be downloaded as CSV files. CONCLUSION:MEPHAS is a free and open-source web-interactive GUI that was designed to support statistical data analyses and prediction for medical and pharmaceutical practitioners and researchers. It enables various medical and pharmaceutical statistical analyses through interactive parameter settings and dynamic visualization of the results.",2020-05-11 +31823712,Vertical and horizontal integration of multi-omics data with miodin.,"BACKGROUND:Studies on multiple modalities of omics data such as transcriptomics, genomics and proteomics are growing in popularity, since they allow us to investigate complex mechanisms across molecular layers. It is widely recognized that integrative omics analysis holds the promise to unlock novel and actionable biological insights into health and disease. Integration of multi-omics data remains challenging, however, and requires combination of several software tools and extensive technical expertise to account for the properties of heterogeneous data. RESULTS:This paper presents the miodin R package, which provides a streamlined workflow-based syntax for multi-omics data analysis. The package allows users to perform analysis of omics data either across experiments on the same samples (vertical integration), or across studies on the same variables (horizontal integration). Workflows have been designed to promote transparent data analysis and reduce the technical expertise required to perform low-level data import and processing. CONCLUSIONS:The miodin package is implemented in R and is freely available for use and extension under the GPL-3 license. Package source, reference documentation and user manual are available at https://gitlab.com/algoromics/miodin.",2019-12-10 +30154505,Variation among intact tissue samples reveals the core transcriptional features of human CNS cell classes.,"It is widely assumed that cells must be physically isolated to study their molecular profiles. However, intact tissue samples naturally exhibit variation in cellular composition, which drives covariation of cell-class-specific molecular features. By analyzing transcriptional covariation in 7,221 intact CNS samples from 840 neurotypical individuals, representing billions of cells, we reveal the core transcriptional identities of major CNS cell classes in humans. By modeling intact CNS transcriptomes as a function of variation in cellular composition, we identify cell-class-specific transcriptional differences in Alzheimer's disease, among brain regions, and between species. Among these, we show that PMP2 is expressed by human but not mouse astrocytes and significantly increases mouse astrocyte size upon ectopic expression in vivo, causing them to more closely resemble their human counterparts. Our work is available as an online resource ( http://oldhamlab.ctec.ucsf.edu/ ) and provides a generalizable strategy for determining the core molecular features of cellular identity in intact biological systems.",2018-08-28 +31240103,MDR: an integrative DNA N6-methyladenine and N4-methylcytosine modification database for Rosaceae.,"Eukaryotic DNA methylation has been receiving increasing attention for its crucial epigenetic regulatory function. The recently developed single-molecule real-time (SMRT) sequencing technology provides an efficient way to detect DNA N6-methyladenine (6mA) and N4-methylcytosine (4mC) modifications at a single-nucleotide resolution. The family Rosaceae contains horticultural plants with a wide range of economic importance. However, little is currently known regarding the genome-wide distribution patterns and functions of 6mA and 4mC modifications in the Rosaceae. In this study, we present an integrated DNA 6mA and 4mC modification database for the Rosaceae (MDR, http://mdr.xieslab.org). MDR, the first repository for displaying and storing DNA 6mA and 4mC methylomes from SMRT sequencing data sets for Rosaceae, includes meta and statistical information, methylation densities, Gene Ontology enrichment analyses, and genome search and browse for methylated sites in NCBI. MDR provides important information regarding DNA 6mA and 4mC methylation and may help users better understand epigenetic modifications in the family Rosaceae.",2019-06-15 +27242503,Building the Ferretome.,"Databases of structural connections of the mammalian brain, such as CoCoMac (cocomac.g-node.org) or BAMS (https://bams1.org), are valuable resources for the analysis of brain connectivity and the modeling of brain dynamics in species such as the non-human primate or the rodent, and have also contributed to the computational modeling of the human brain. Another animal model that is widely used in electrophysiological or developmental studies is the ferret; however, no systematic compilation of brain connectivity is currently available for this species. Thus, we have started developing a database of anatomical connections and architectonic features of the ferret brain, the Ferret(connect)ome, www.Ferretome.org. The Ferretome database has adapted essential features of the CoCoMac methodology and legacy, such as the CoCoMac data model. This data model was simplified and extended in order to accommodate new data modalities that were not represented previously, such as the cytoarchitecture of brain areas. The Ferretome uses a semantic parcellation of brain regions as well as a logical brain map transformation algorithm (objective relational transformation, ORT). The ORT algorithm was also adopted for the transformation of architecture data. The database is being developed in MySQL and has been populated with literature reports on tract-tracing observations in the ferret brain using a custom-designed web interface that allows efficient and validated simultaneous input and proofreading by multiple curators. The database is equipped with a non-specialist web interface. This interface can be extended to produce connectivity matrices in several formats, including a graphical representation superimposed on established ferret brain maps. An important feature of the Ferretome database is the possibility to trace back entries in connectivity matrices to the original studies archived in the system. Currently, the Ferretome contains 50 reports on connections comprising 20 injection reports with more than 150 labeled source and target areas, the majority reflecting connectivity of subcortical nuclei and 15 descriptions of regional brain architecture. We hope that the Ferretome database will become a useful resource for neuroinformatics and neural modeling, and will support studies of the ferret brain as well as facilitate advances in comparative studies of mesoscopic brain connectivity.",2016-05-10 +28759895,Assessing acceptability and feasibility of provider-initiated HIV testing and counseling in Ghana.,"t In Ghana, HIV voluntary counseling and testing remains poorly utilized. The World Health Organization (WHO) has recommended opt-out, provider-initiated testing and counseling (PITC) in order to increase utilization and earlier intervention. Yet implementation challenges remain in resource-scarce settings. This study sought to better understand the dynamics of providing PITC at Apam Catholic Hospital, a district referral hospital in Ghana. Semi-structured interviews were conducted with healthcare providers and patients exploring attitudes regarding PITC, community stigma, and HIV knowledge. Results showed healthcare providers believed PITC would lead to earlier diagnosis and intervention, but concerns persisted over increased costs. Patients welcomed PITC, but expressed discomfort in opting-out. Patients demonstrated incomplete HIV knowledge and widely believed spiritual healers and prayer can cure the infection. Acceptance of PITC by both healthcare providers and patients remains high, but concerns over resource costs and HIV knowledge persist as challenges. [Full article available at http://rimed.org/rimedicaljournal-2017-08.asp].",2017-08-01 +30052511,Mining human cancer datasets for kallikrein expression in cancer: the 'KLK-CANMAP' Shiny web tool.,"The dysregulation of the serine-protease family kallikreins (KLKs), comprising 15 genes, has been reportedly associated with cancer. Their expression in several tissues and physiological fluids makes them potential candidates as biomarkers and therapeutic targets. There are several databases available to mine gene expression in cancer, which often include clinical and pathological data. However, these platforms present some limitations when comparing a specific set of genes and can generate considerable unwanted data. Here, several datasets that showed significant differential expression (p<0.01) in cancer vs. normal (n=118), metastasis vs. primary (n=15) and association with cancer survival (n=21) have been compiled in a user-friendly format from two open and/or publicly available databases Oncomine and OncoLnc for the 15 KLKs. The data have been included in a free web application tool: the KLK-CANMAP https://cancerbioinformatics.shinyapps.io/klk-canmap/. This tool integrates, analyses and visualises data and it was developed with the R Shiny framework. Using KLK-CANMAP box-plots, heatmaps and Kaplan-Meier graphs can be generated for the KLKs of interest. We believe this new cancer KLK focused web tool will benefit the KLK community by narrowing the data visualisation to only the genes of interest.",2018-09-01 +27551218,Epiphytic bryozoans on Neptune grass - a sample-based data set.,"

Background

The seagrass Posidonia oceanica L. Delile, commonly known as Neptune grass, is an endemic species of the Mediterranean Sea. It hosts a distinctive and diverse epiphytic community, dominated by various macroalgal and animal organisms. Mediterranean bryozoans have been extensively studied but quantitative data assessing temporal and spatial variability have rarely been documented. In Lepoint et al. (2014a, b) occurrence and abundance data of epiphytic bryozoan communities on leaves of Posidonia oceanica inhabiting Revellata Bay (Corsica, Mediterranean Sea) were reported and trophic ecology of Electra posidoniae Gautier assessed.

New information

Here, metadata information is provided on the data set discussed in Lepoint et al. (2014a) and published on the GBIF portal as a sampling-event data set: http://ipt.biodiversity.be/resource?r=ulg_bryozoa&v=1.0). The data set is enriched by data concerning species settled on Posidonia scales (dead petiole of Posidonia leaves, remaining after limb abscission).",2016-07-21 +32877839,PadChest: A large chest x-ray image dataset with multi-label annotated reports.,"We present a labeled large-scale, high resolution chest x-ray dataset for the automated exploration of medical images along with their associated reports. This dataset includes more than 160,000 images obtained from 67,000 patients that were interpreted and reported by radiologists at San Juan Hospital (Spain) from 2009 to 2017, covering six different position views and additional information on image acquisition and patient demography. The reports were labeled with 174 different radiographic findings, 19 differential diagnoses and 104 anatomic locations organized as a hierarchical taxonomy and mapped onto standard Unified Medical Language System (UMLS) terminology. Of these reports, 27% were manually annotated by trained physicians and the remaining set was labeled using a supervised method based on a recurrent neural network with attention mechanisms. The labels generated were then validated in an independent test set achieving a 0.93 Micro-F1 score. To the best of our knowledge, this is one of the largest public chest x-ray databases suitable for training supervised models concerning radiographs, and the first to contain radiographic reports in Spanish. The PadChest dataset can be downloaded from http://bimcv.cipf.es/bimcv-projects/padchest/.",2020-08-20 +,The Systems Biology Markup Language (SBML): Language Specification for Level 3 Version 1 Core,"Computational models can help researchers to interpret data, understand biological functions, and make quantitative predictions. The Systems Biology Markup Language (SBML) is a file format for representing computational models in a declarative form that different software systems can exchange. SBML is oriented towards describing biological processes of the sort common in research on a number of topics, including metabolic pathways, cell signaling pathways, and many others. By supporting SBML as an input/output format, different tools can all operate on an identical representation of a model, removing opportunities for translation errors and assuring a common starting point for analyses and simulations. This document provides the specification for Release 2 of Version 1 of SBML Level 3 Core. The specification defines the data structures prescribed by SBML, their encoding in XML (the eXtensible Markup Language), validation rules that determine the validity of an SBML document, and examples of models in SBML form. No design changes have been made to the description of models between Release 1 and Release 2; changes are restricted to the format of annotations, the correction of errata and the addition of clarifications. Other materials and software are available from the SBML project website at http://sbml.org/.",2018-03-01 +30358807,Flexible design of multiple metagenomics classification pipelines with UGENE.,"

Summary

UGENE is a free, open-source, cross-platform bioinformatics software. UGENE deploys pre-defined pipelines and a flexible instrument to design new workflows and visually build multi-step analytics pipelines. The new UGENE v.1.31 release offers graphical, user-friendly wrapping of a number of popular command-line metagenomics classification programs (Kraken, CLARK, DIAMOND), combinable serially and in parallel through the workflow designer, with multiple, customizable reference databases. Ensemble classification voting is available through the WEVOTE algorithm, with augmented output in the form of detailed table reports. Pre-built workflows (which include all steps from data cleaning to summaries) are included with the installation and a tutorial is available on the UGENE website. Further expansion with multiple visualization tools for reports is planned.

Availability and implementation

UGENE is available at http://ugene.net/, implemented in C++ and Qt, and released under GNU General Public License (GPL) version 2.",2019-06-01 +33407073,SPServer: split-statistical potentials for the analysis of protein structures and protein-protein interactions.,"

Background

Statistical potentials, also named knowledge-based potentials, are scoring functions derived from empirical data that can be used to evaluate the quality of protein folds and protein-protein interaction (PPI) structures. In previous works we decomposed the statistical potentials in different terms, named Split-Statistical Potentials, accounting for the type of amino acid pairs, their hydrophobicity, solvent accessibility and type of secondary structure. These potentials have been successfully used to identify near-native structures in protein structure prediction, rank protein docking poses, and predict PPI binding affinities.

Results

Here, we present the SPServer, a web server that applies the Split-Statistical Potentials to analyze protein folds and protein interfaces. SPServer provides global scores as well as residue/residue-pair profiles presented as score plots and maps. This level of detail allows users to: (1) identify potentially problematic regions on protein structures; (2) identify disrupting amino acid pairs in protein interfaces; and (3) compare and analyze the quality of tertiary and quaternary structural models.

Conclusions

While there are many web servers that provide scoring functions to assess the quality of either protein folds or PPI structures, SPServer integrates both aspects in a unique easy-to-use web server. Moreover, the server permits to locally assess the quality of the structures and interfaces at a residue level and provides tools to compare the local assessment between structures. SERVER ADDRESS: https://sbi.upf.edu/spserver/ .",2021-01-06 +31150060,Augmented Interval List: a novel data structure for efficient genomic interval search.,"

Motivation

Genomic data is frequently stored as segments or intervals. Because this data type is so common, interval-based comparisons are fundamental to genomic analysis. As the volume of available genomic data grows, developing efficient and scalable methods for searching interval data is necessary.

Results

We present a new data structure, the Augmented Interval List (AIList), to enumerate intersections between a query interval q and an interval set R. An AIList is constructed by first sorting R as a list by the interval start coordinate, then decomposing it into a few approximately flattened components (sublists), and then augmenting each sublist with the running maximum interval end. The query time for AIList is O(log2N+n+m), where n is the number of overlaps between R and q, N is the number of intervals in the set R and m is the average number of extra comparisons required to find the n overlaps. Tested on real genomic interval datasets, AIList code runs 5-18 times faster than standard high-performance code based on augmented interval-trees, nested containment lists or R-trees (BEDTools). For large datasets, the memory-usage for AIList is 4-60% of other methods. The AIList data structure, therefore, provides a significantly improved fundamental operation for highly scalable genomic data analysis.

Availability and implementation

An implementation of the AIList data structure with both construction and search algorithms is available at http://ailist.databio.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +32817221,Broad Anti-coronavirus Activity of Food and Drug Administration-Approved Drugs against SARS-CoV-2 In Vitro and SARS-CoV In Vivo. ,"Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) emerged in China at the end of 2019 and has rapidly caused a pandemic, with over 20 million recorded COVID-19 cases in August 2020 (https://covid19.who.int/). There are no FDA-approved antivirals or vaccines for any coronavirus, including SARS-CoV-2. Current treatments for COVID-19 are limited to supportive therapies and off-label use of FDA-approved drugs. Rapid development and human testing of potential antivirals is urgently needed. Numerous drugs are already approved for human use, and subsequently, there is a good understanding of their safety profiles and potential side effects, making them easier to fast-track to clinical studies in COVID-19 patients. Here, we present data on the antiviral activity of 20 FDA-approved drugs against SARS-CoV-2 that also inhibit SARS-CoV and Middle East respiratory syndrome coronavirus (MERS-CoV). We found that 17 of these inhibit SARS-CoV-2 at non-cytotoxic concentrations. We directly followed up seven of these to demonstrate that all are capable of inhibiting infectious SARS-CoV-2 production. Moreover, we evaluated two of these, chloroquine and chlorpromazine, in vivo using a mouse-adapted SARS-CoV model and found that both drugs protect mice from clinical disease.IMPORTANCE There are no FDA-approved antivirals for any coronavirus, including SARS-CoV-2. Numerous drugs are already approved for human use that may have antiviral activity and therefore could potentially be rapidly repurposed as antivirals. Here, we present data assessing the antiviral activity of 20 FDA-approved drugs against SARS-CoV-2 that also inhibit SARS-CoV and MERS-CoV in vitro We found that 17 of these inhibit SARS-CoV-2, suggesting that they may have pan-anti-coronaviral activity. We directly followed up seven of these and found that they all inhibit infectious-SARS-CoV-2 production. Moreover, we evaluated chloroquine and chlorpromazine in vivo using mouse-adapted SARS-CoV. We found that neither drug inhibited viral replication in the lungs, but both protected against clinical disease.",2020-10-14 +26590405,TSGene 2.0: an updated literature-based knowledgebase for tumor suppressor genes.,"Tumor suppressor genes (TSGs) are a major type of gatekeeper genes in the cell growth. A knowledgebase with the systematic collection and curation of TSGs in multiple cancer types is critically important for further studying their biological functions as well as for developing therapeutic strategies. Since its development in 2012, the Tumor Suppressor Gene database (TSGene), has become a popular resource in the cancer research community. Here, we reported the TSGene version 2.0, which has substantial updates of contents (e.g. up-to-date literature and pan-cancer genomic data collection and curation), data types (noncoding RNAs and protein-coding genes) and content accessibility. Specifically, the current TSGene 2.0 contains 1217 human TSGs (1018 protein-coding and 199 non-coding genes) curated from over 9000 articles. Additionally, TSGene 2.0 provides thousands of expression and mutation patterns derived from pan-cancer data of The Cancer Genome Atlas. A new web interface is available at http://bioinfo.mc.vanderbilt.edu/TSGene/. Systematic analyses of 199 non-coding TSGs provide numerous cancer-specific non-coding mutational events for further screening and clinical use. Intriguingly, we identified 49 protein-coding TSGs that were consistently down-regulated in 11 cancer types. In summary, TSGene 2.0, which is the only available database for TSGs, provides the most updated TSGs and their features in pan-cancer.",2015-11-20 +27890865,CSF-PR 2.0: An Interactive Literature Guide to Quantitative Cerebrospinal Fluid Mass Spectrometry Data from Neurodegenerative Disorders.,"The rapidly growing number of biomedical studies supported by mass spectrometry based quantitative proteomics data has made it increasingly difficult to obtain an overview of the current status of the research field. A better way of organizing the biomedical proteomics information from these studies and making it available to the research community is therefore called for. In the presented work, we have investigated scientific publications describing the analysis of the cerebrospinal fluid proteome in relation to multiple sclerosis, Parkinson's disease and Alzheimer's disease. Based on a detailed set of filtering criteria we extracted 85 data sets containing quantitative information for close to 2000 proteins. This information was made available in CSF-PR 2.0 (http://probe.uib.no/csf-pr-2.0), which includes novel approaches for filtering, visualizing and comparing quantitative proteomics information in an interactive and user-friendly environment. CSF-PR 2.0 will be an invaluable resource for anyone interested in quantitative proteomics on cerebrospinal fluid.",2016-11-27 +30999860,HumCFS: a database of fragile sites in human chromosomes.,"

Background

Fragile sites are the chromosomal regions that are susceptible to breakage, and their frequency varies among the human population. Based on the frequency of fragile site induction, they are categorized as common and rare fragile sites. Common fragile sites are sensitive to replication stress and often rearranged in cancer. Rare fragile sites are the archetypal trinucleotide repeats. Fragile sites are known to be involved in chromosomal rearrangements in tumors. Human miRNA genes are also present at fragile sites. A better understanding of genes and miRNAs lying in the fragile site regions and their association with disease progression is required.

Result

HumCFS is a manually curated database of human chromosomal fragile sites. HumCFS provides useful information on fragile sites such as coordinates on the chromosome, cytoband, their chemical inducers and frequency of fragile site (rare or common), genes and miRNAs lying in fragile sites. Protein coding genes in the fragile sites were identified by mapping the coordinates of fragile sites with human genome Ensembl (GRCh38/hg38). Genes present in fragile sites were further mapped to DisGenNET database, to understand their possible link with human diseases. Human miRNAs from miRBase was also mapped on fragile site coordinates. In brief, HumCFS provides useful information about 125 human chromosomal fragile sites and their association with 4921 human protein-coding genes and 917 human miRNA's.

Conclusion

User-friendly web-interface of HumCFS and hyper-linking with other resources will help researchers to search for genes, miRNAs efficiently and to intersect the relationship among them. For easy data retrieval and analysis, we have integrated standard web-based tools, such as JBrowse, BLAST etc. Also, the user can download the data in various file formats such as text files, gff3 files and Bed-format files which can be used on UCSC browser. Database URL: http://webs.iiitd.edu.in/raghava/humcfs/.",2019-04-18 +31028388,SEanalysis: a web tool for super-enhancer associated regulatory analysis.,"Super-enhancers (SEs) have prominent roles in biological and pathological processes through their unique transcriptional regulatory capability. To date, several SE databases have been developed by us and others. However, these existing databases do not provide downstream or upstream regulatory analyses of SEs. Pathways, transcription factors (TFs), SEs, and SE-associated genes form complex regulatory networks. Therefore, we designed a novel web server, SEanalysis, which provides comprehensive SE-associated regulatory network analyses. SEanalysis characterizes SE-associated genes, TFs binding to target SEs, and their upstream pathways. The current version of SEanalysis contains more than 330 000 SEs from more than 540 types of cells/tissues, 5042 TF ChIP-seq data generated from these cells/tissues, DNA-binding sequence motifs for ∼700 human TFs and 2880 pathways from 10 databases. SEanalysis supports searching by either SEs, samples, TFs, pathways or genes. The complex regulatory networks formed by these factors can be interactively visualized. In addition, we developed a customizable genome browser containing >6000 customizable tracks for visualization. The server is freely available at http://licpathway.net/SEanalysis.",2019-07-01 +32297936,ProteinsPlus: interactive analysis of protein-ligand binding interfaces.,"Due to the increasing amount of publicly available protein structures searching, enriching and investigating these data still poses a challenging task. The ProteinsPlus web service (https://proteins.plus) offers a broad range of tools addressing these challenges. The web interface to the tool collection focusing on protein-ligand interactions has been geared towards easy and intuitive access to a large variety of functionality for life scientists. Since our last publication, the ProteinsPlus web service has been extended by additional services as well as it has undergone substantial infrastructural improvements. A keyword search functionality was added on the start page of ProteinsPlus enabling users to work on structures without knowing their PDB code. The tool collection has been augmented by three tools: StructureProfiler validates ligands and active sites using selection criteria of well-established protein-ligand benchmark data sets, WarPP places water molecules in the ligand binding sites of a protein, and METALizer calculates, predicts and scores coordination geometries of metal ions based on surrounding complex atoms. Additionally, all tools provided by ProteinsPlus are available through a REST service enabling the automated integration in structure processing and modeling pipelines.",2020-07-01 +32816860,Intratumoral Copper Modulates PD-L1 Expression and Influences Tumor Immune Evasion.,"Therapeutic checkpoint antibodies blocking programmed death receptor 1/programmed death ligand 1 (PD-L1) signaling have radically improved clinical outcomes in cancer. However, the regulation of PD-L1 expression on tumor cells is still poorly understood. Here we show that intratumoral copper levels influence PD-L1 expression in cancer cells. Deep analysis of the The Cancer Genome Atlas database and tissue microarrays showed strong correlation between the major copper influx transporter copper transporter 1 (CTR-1) and PD-L1 expression across many cancers but not in corresponding normal tissues. Copper supplementation enhanced PD-L1 expression at mRNA and protein levels in cancer cells and RNA sequencing revealed that copper regulates key signaling pathways mediating PD-L1-driven cancer immune evasion. Conversely, copper chelators inhibited phosphorylation of STAT3 and EGFR and promoted ubiquitin-mediated degradation of PD-L1. Copper-chelating drugs also significantly increased the number of tumor-infiltrating CD8+ T and natural killer cells, slowed tumor growth, and improved mouse survival. Overall, this study reveals an important role for copper in regulating PD-L1 and suggests that anticancer immunotherapy might be enhanced by pharmacologically reducing intratumor copper levels. SIGNIFICANCE: These findings characterize the role of copper in modulating PD-L1 expression and contributing to cancer immune evasion, highlighting the potential for repurposing copper chelators as enhancers of antitumor immunity. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/19/4129/F1.large.jpg.",2020-08-18 +26602692,AREsite2: an enhanced database for the comprehensive investigation of AU/GU/U-rich elements.,"AREsite2 represents an update for AREsite, an on-line resource for the investigation of AU-rich elements (ARE) in human and mouse mRNA 3'UTR sequences. The new updated and enhanced version allows detailed investigation of AU, GU and U-rich elements (ARE, GRE, URE) in the transcriptome of Homo sapiens, Mus musculus, Danio rerio, Caenorhabditis elegans and Drosophila melanogaster. It contains information on genomic location, genic context, RNA secondary structure context and conservation of annotated motifs. Improvements include annotation of motifs not only in 3'UTRs but in the whole gene body including introns, additional genomes, and locally stable secondary structures from genome wide scans. Furthermore, we include data from CLIP-Seq experiments in order to highlight motifs with validated protein interaction. Additionally, we provide a REST interface for experienced users to interact with the database in a semi-automated manner. The database is publicly available at: http://rna.tbi.univie.ac.at/AREsite.",2015-11-23 +26432830,"MitoMiner v3.1, an update on the mitochondrial proteomics database.","Mitochondrial proteins remain the subject of intense research interest due to their implication in an increasing number of different conditions including mitochondrial and metabolic disease, cancer, and neuromuscular degenerative and age-related disorders. However, the mitochondrial proteome has yet to be accurately and comprehensively defined, despite many studies. To support mitochondrial research, we developed MitoMiner (http://mitominer.mrc-mbu.cam.ac.uk), a freely accessible mitochondrial proteomics database. MitoMiner integrates different types of subcellular localisation evidence with protein information from public resources, and so provides a comprehensive central resource for data on mitochondrial protein localisation. Here we report important updates to the database including the addition of subcellular immunofluorescent staining results from the Human Protein Atlas, computational predictions of mitochondrial targeting sequences, and additional large-scale mass-spectrometry and GFP tagging data sets. This evidence is shared across the 12 species in MitoMiner (now including Schizosaccharomyces pombe) by homology mapping. MitoMiner provides multiple ways of querying the data including simple text searches, predefined queries and custom queries created using the interactive QueryBuilder. For remote programmatic access, API's are available for several programming languages. This combination of data and flexible querying makes MitoMiner a unique platform to investigate mitochondrial proteins, with application in mitochondrial research and prioritising candidate mitochondrial disease genes.",2015-10-01 +32978629,Diagnostic Delay Is Associated With Complicated Disease and Growth Impairment in Paediatric Crohn's Disease.,"

Background

Paediatric data on the association between diagnostic delay and inflammatory bowel disease [IBD] complications are lacking. We aimed to determine the effect of diagnostic delay on stricturing/fistulising complications, surgery, and growth impairment in a large paediatric cohort, and to identify predictors of diagnostic delay.

Methods

We conducted a national, prospective, multicentre IBD inception cohort study including 1399 children. Diagnostic delay was defined as time from symptom onset to diagnosis >75th percentile. Multivariable proportional hazards [PH] regression was used to examine the association between diagnostic delay and stricturing/fistulising complications and surgery, and multivariable linear regression to examine the association between diagnostic delay and growth. Predictors of diagnostic delay were identified using Cox PH regression.

Results

Overall (64% Crohn's disease [CD]; 36% ulcerative colitis/IBD unclassified [UC/IBD-U]; 57% male]), median time to diagnosis was 4.2 (interquartile range [IQR] 2.0-9.2) months. For the overall cohort, diagnostic delay was >9.2 months; in CD, >10.8 months and in UC/IBD-U, >6.6 months. In CD, diagnostic delay was associated with a 2.5-fold higher rate of strictures/internal fistulae (hazard ratio [HR] 2.53, 95% confidence interval [CI] 1.41-4.56). Every additional month of diagnostic delay was associated with a decrease in height-for-age z-score of 0.013 standard deviations [95% CI 0.005-0.021]. Associations persisted after adjusting for disease location and therapy. No independent association was observed between diagnostic delay and surgery in CD or UC/IBD-U. Diagnostic delay was more common in CD, particularly small bowel CD. Abdominal pain, including isolated abdominal pain in CD, was associated with diagnostic delay.

Conclusions

Diagnostic delay represents a risk factor for stricturing/internal fistulising complications and growth impairment in paediatric CD.

Podcast

This article has an associated podcast which can be accessed at https://academic.oup.com/ecco-jcc/pages/podcast.",2021-03-01 +33725111,Whole genome analysis of more than 10 000 SARS-CoV-2 virus unveils global genetic diversity and target region of NSP6.,"Whole genome analysis of SARS-CoV-2 is important to identify its genetic diversity. Moreover, accurate detection of SARS-CoV-2 is required for its correct diagnosis. To address these, first we have analysed publicly available 10 664 complete or near-complete SARS-CoV-2 genomes of 73 countries globally to find mutation points in the coding regions as substitution, deletion, insertion and single nucleotide polymorphism (SNP) globally and country wise. In this regard, multiple sequence alignment is performed in the presence of reference sequence from NCBI. Once the alignment is done, a consensus sequence is build to analyse each genomic sequence to identify the unique mutation points as substitutions, deletions, insertions and SNPs globally, thereby resulting in 7209, 11700, 119 and 53 such mutation points respectively. Second, in such categories, unique mutations for individual countries are determined with respect to other 72 countries. In case of India, unique 385, 867, 1 and 11 substitutions, deletions, insertions and SNPs are present in 566 SARS-CoV-2 genomes while 458, 1343, 8 and 52 mutation points in such categories are common with other countries. In majority (above 10%) of virus population, the most frequent and common mutation points between global excluding India and India are L37F, P323L, F506L, S507G, D614G and Q57H in NSP6, RdRp, Exon, Spike and ORF3a respectively. While for India, the other most frequent mutation points are T1198K, A97V, T315N and P13L in NSP3, RdRp, Spike and ORF8 respectively. These mutations are further visualised in protein structures and phylogenetic analysis has been done to show the diversity in virus genomes. Third, a web application is provided for searching mutation points globally and country wise. Finally, we have identified the potential conserved region as target that belongs to the coding region of ORF1ab, specifically to the NSP6 gene. Subsequently, we have provided the primers and probes using that conserved region so that it can be used for detecting SARS-CoV-2. Contact:indrajit@nitttrkol.ac.inSupplementary information: Supplementary data are available at http://www.nitttrkol.ac.in/indrajit/projects/COVID-Mutation-10K.",2021-03-01 +32734663,The Bio3D packages for structural bioinformatics.,"Bio3D is a family of R packages for the analysis of biomolecular sequence, structure, and dynamics. Major functionality includes biomolecular database searching and retrieval, sequence and structure conservation analysis, ensemble normal mode analysis, protein structure and correlation network analysis, principal component, and related multivariate analysis methods. Here, we review recent package developments, including a new underlying segregation into separate packages for distinct analysis, and introduce a new method for structure analysis named ensemble difference distance matrix analysis (eDDM). The eDDM approach calculates and compares atomic distance matrices across large sets of homologous atomic structures to help identify the residue wise determinants underlying specific functional processes. An eDDM workflow is detailed along with an example application to a large protein family. As a new member of the Bio3D family, the Bio3D-eddm package supports both experimental and theoretical simulation-generated structures, is integrated with other methods for dissecting sequence-structure-function relationships, and can be used in a highly automated and reproducible manner. Bio3D is distributed as an integrated set of platform independent open source R packages available from: http://thegrantlab.org/bio3d/.",2020-08-17 +26995712,Rare disease relations through common genes and protein interactions.,"ODCs (Orphan Disease Connections), available at http://csbg.cnb.csic.es/odcs, is a novel resource to explore potential molecular relations between rare diseases. These molecular relations have been established through the integration of disease susceptibility genes and human protein-protein interactions. The database currently contains 54,941 relations between 3032 diseases.",2016-03-16 +33395407,Alignment-free method for functional annotation of amino acid substitutions: Application on epigenetic factors involved in hematologic malignancies.,"For the last couple of decades, there has been a significant growth in sequencing data, leading to an extraordinary increase in the number of gene variants. This places a challenge on the bioinformatics research community to develop and improve computational tools for functional annotation of new variants. Genes coding for epigenetic regulators have important roles in cancer pathogenesis and mutations in these genes show great potential as clinical biomarkers, especially in hematologic malignancies. Therefore, we developed a model that specifically focuses on these genes, with an assumption that it would outperform general models in predicting the functional effects of amino acid substitutions. EpiMut is a standalone software that implements a sequence based alignment-free method. We applied a two-step approach for generating sequence based features, relying on the biophysical and biochemical indices of amino acids and the Fourier Transform as a sequence transformation method. For each gene in the dataset, the machine learning algorithm-Naïve Bayes was used for building a model for prediction of the neutral or disease-related status of variants. EpiMut outperformed state-of-the-art tools used for comparison, PolyPhen-2, SIFT and SNAP2. Additionally, EpiMut showed the highest performance on the subset of variants positioned outside conserved functional domains of analysed proteins, which represents an important group of cancer-related variants. These results imply that EpiMut can be applied as a first choice tool in research of the impact of gene variants in epigenetic regulators, especially in the light of the biomarker role in hematologic malignancies. EpiMut is freely available at https://www.vin.bg.ac.rs/180/tools/epimut.php.",2021-01-04 +32810414,Language and Speech Markers of Primary Progressive Aphasia: A Systematic Review.,"Purpose This systematic review aimed to establish language and speech markers to support the clinical diagnosis of primary progressive aphasia (PPA) and its clinical phenotypes. Our first objective was to identify behavioral language and speech markers of early-stage PPA. Our second objective was to identify the electrophysiological correlates of the language and speech characteristics in PPA. Method The databases MEDLINE, Web of Science, and Embase were searched for relevant articles. To identify behavioral markers, the initial subjective complaints and the language and speech deficits detected during the initial diagnostic evaluation were summarized for PPA in general and each clinical variant according to the 2011 consensus diagnostic criteria (nonfluent variant [NFV], semantic variant, and logopenic variant [LV]). To identify electrophysiological markers, the studies in which event-related potentials (ERPs) were elicited by a language or speech paradigm in patients with PPA were included. Results In total, 114 relevant studies were identified, including 110 behavioral studies and only four electrophysiological studies. This review suggests that patients with the semantic variant could be accurately differentiated from the NFV and LV in the initial stages based on the consensus criteria. Nonetheless, the early differentiation between the NFV and LV is not straightforward. In the four electrophysiological studies, differences in the latency, amplitude, and topographical distribution of the semantic N400 component were found between patients with PPA and healthy controls. Conclusions To accurately differentiate the NFV from the LV, it could be important to assess the language and speech degeneration by more specific assessments and by more objective diagnostic methods that offer insights into the language-related processes. Electrophysiological markers of PPA were not identified in this review due to the low number of studies that investigated language-related ERPs. More controlled ERP studies in larger patient cohorts are needed to investigate the diagnostic applicability of language-related ERPs in PPA. Supplemental Material https://doi.org/10.23641/asha.12798080.",2020-08-17 +27924013,The ProteomeXchange consortium in 2017: supporting the cultural change in proteomics public data deposition.,"The ProteomeXchange (PX) Consortium of proteomics resources (http://www.proteomexchange.org) was formally started in 2011 to standardize data submission and dissemination of mass spectrometry proteomics data worldwide. We give an overview of the current consortium activities and describe the advances of the past few years. Augmenting the PX founding members (PRIDE and PeptideAtlas, including the PASSEL resource), two new members have joined the consortium: MassIVE and jPOST. ProteomeCentral remains as the common data access portal, providing the ability to search for data sets in all participating PX resources, now with enhanced data visualization components.We describe the updated submission guidelines, now expanded to include four members instead of two. As demonstrated by data submission statistics, PX is supporting a change in culture of the proteomics field: public data sharing is now an accepted standard, supported by requirements for journal submissions resulting in public data release becoming the norm. More than 4500 data sets have been submitted to the various PX resources since 2012. Human is the most represented species with approximately half of the data sets, followed by some of the main model organisms and a growing list of more than 900 diverse species. Data reprocessing activities are becoming more prominent, with both MassIVE and PeptideAtlas releasing the results of reprocessed data sets. Finally, we outline the upcoming advances for ProteomeXchange.",2016-10-18 +22267504,Mining and integration of pathway diagrams from imaging data.,"

Motivation

Pathway diagrams from PubMed and World Wide Web (WWW) contain valuable highly curated information difficult to reach without tools specifically designed and customized for the biological semantics and high-content density of the images. There is currently no search engine or tool that can analyze pathway images, extract their pathway components (molecules, genes, proteins, organelles, cells, organs, etc.) and indicate their relationships.

Results

Here, we describe a resource of pathway diagrams retrieved from article and web-page images through optical character recognition, in conjunction with data mining and data integration methods. The recognized pathways are integrated into the BiologicalNetworks research environment linking them to a wealth of data available in the BiologicalNetworks' knowledgebase, which integrates data from >100 public data sources and the biomedical literature. Multiple search and analytical tools are available that allow the recognized cellular pathways, molecular networks and cell/tissue/organ diagrams to be studied in the context of integrated knowledge, experimental data and the literature.

Availability

BiologicalNetworks software and the pathway repository are freely available at www.biologicalnetworks.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-20 +33301927,Phylogenomic study and classification of mitochondrial DNA through virtual genomic fingerprints.,"In the present study, we evaluated the ability of the Virtual Analysis Method for Phylogenomic fingerprint Estimation (VAMPhyRE) toolkit to classify human mitochondrial DNA (mtDNA) haplogroups. In total, 357 random mtDNA sequences were obtained from different haplogroups, based on the classification of PhyloTree. Additionally, we included a control group of five sequences (Pan paniscus, Pan troglodytes, Homo sapiens neanderthalensis, Yoruba15, and the revised Cambridge reference sequence). VAMPhyRE employs a virtual hybridization technique, using probes that specifically bind to their complementary sequences in the genome. We used 65,536 probes of 8 nucleotides to identify potential sites where hybridization occurs between the mtDNA and the specific probe, forming different heteroduplexes and thus, creating a unique and specific genomic fingerprint for each sequence. Genomic fingerprints were compared, and a table of distances was calculated to obtain a mitochondrial phylogenomic tree with the macrohaplogroups, L, N, M, and R, and their corresponding haplogroups, according to universal nomenclature. The results obtained suggest an accuracy of 97.25% for the distribution of the 357 mtDNA sequences in the four macrohaplogroups and their corresponding haplogroups when compared with other mtDNA classification tools that require reference sequences and do not offer an analysis based on an evolutionary approach. These data are available online at http://biomedbiotec.encb.ipn.mx/VAMPhyRE/.",2020-12-08 +26481352,DIDA: A curated and annotated digenic diseases database.,"DIDA (DIgenic diseases DAtabase) is a novel database that provides for the first time detailed information on genes and associated genetic variants involved in digenic diseases, the simplest form of oligogenic inheritance. The database is accessible via http://dida.ibsquare.be and currently includes 213 digenic combinations involved in 44 different digenic diseases. These combinations are composed of 364 distinct variants, which are distributed over 136 distinct genes. The web interface provides browsing and search functionalities, as well as documentation and help pages, general database statistics and references to the original publications from which the data have been collected. The possibility to submit novel digenic data to DIDA is also provided. Creating this new repository was essential as current databases do not allow one to retrieve detailed records regarding digenic combinations. Genes, variants, diseases and digenic combinations in DIDA are annotated with manually curated information and information mined from other online resources. Next to providing a unique resource for the development of new analysis methods, DIDA gives clinical and molecular geneticists a tool to find the most comprehensive information on the digenic nature of their diseases of interest.",2015-10-19 +33533177,Neuroblastoma: The basis for cure in limited-resource settings.,"

Background

Neuroblastoma (NB) contributes the most to the mortality of childhood malignancies worldwide. The disease spectrum is heterogenous and the management complex and costly, especially in advanced disease or disease with adverse biology. In low- and middle-income countries (LMICs) the majority of NB presents in advanced stages. Therefore, with limited resources and poor prognosis the treatment of NB is often not a priority. The aim of the study was to evaluate the research activities and perceptions of the management of NB that determine the research and treatment approaches in LMICs.

Methods

Data were sourced from https://www.clinicaltrials.gov/ identifying NB trials open to LMIC. Abstracts on NB research presented at the International Society for Paediatric Oncology (SIOP) Congresses between 2014 and 2020 were evaluated according to income status. An online survey evaluating medical views on NB in LMICs and the effect on the management was conducted. Descriptive analysis was done. Where appropriate categorical association between covariates was assessed using the Pearson chi-square (χ2 ) test or Fishers exact test.

Results

There were 15/562 (2.7%) trials open to LMIC. Only six of 138 (4.3%) LMIC participated in NB trials. Of the 688 abstracts presented between 2014 and 2020 at the SIOP International Congress on NB as primary subject, 297 (42.7%) were from LMICs. Only two were from low-income countries (LICs). Sixty-one countries responded to the NB survey. Positive views towards NB management were present when treatment was based on a national protocol, the availability of trimodal or advanced treatment options were present, and when a balance of metastatic or local disease were treated.

Conclusion

Management of NB in LMICs should include increased advocacy and research as well as implementation of national management strategies.",2021-02-02 +30298955,CardioVAI: An automatic implementation of ACMG-AMP variant interpretation guidelines in the diagnosis of cardiovascular diseases.,"Variant interpretation for the diagnosis of genetic diseases is a complex process. The American College of Medical Genetics and Genomics, with the Association for Molecular Pathology, have proposed a set of evidence-based guidelines to support variant pathogenicity assessment and reporting in Mendelian diseases. Cardiovascular disorders are a field of application of these guidelines, but practical implementation is challenging due to the genetic disease heterogeneity and the complexity of information sources that need to be integrated. Decision support systems able to automate variant interpretation in the light of specific disease domains are demanded. We implemented CardioVAI (Cardio Variant Interpreter), an automated system for guidelines based variant classification in cardiovascular-related genes. Different omics-resources were integrated to assess pathogenicity of every genomic variant in 72 cardiovascular diseases related genes. We validated our method on benchmark datasets of high-confident assessed variants, reaching pathogenicity and benignity concordance up to 83 and 97.08%, respectively. We compared CardioVAI to similar methods and analyzed the main differences in terms of guidelines implementation. We finally made available CardioVAI as a web resource (http://cardiovai.engenome.com/) that allows users to further specialize guidelines recommendations.",2018-10-19 +32140515,Dataset of multiple methodology characterization of an illite-kaolinite clay mineral for the purpose of using it as ceramic membrane supports.,"This article describes the data generated from multiple approach methodology physico-chemical characterization of a clay mineral from the West-Central region of Morocco, Safi province (https://doi.org/10.1016/j.heliyon.2019.e02281) [1]. Data were generated from classical chemical analytical techniques namely; organic matter content, linear firing and shrinkage analysis, weight loss on ignition, porosity and methylene blue stain tests according to the French Association of Normalization (AFNOR) and American Society for Testing and Materials (ASTM). In addition to data generated using instrumental analytical techniques namely; Infrared spectroscopy (FTIR), thermal gravimetric analysis (TGA) and deferential thermal analysis (DTA), X-ray diffraction (XRD), scanning electron microscopy (SEM) and elemental energy disperse spectroscopy (EDX).",2020-02-18 +32526479,RawVegetable - A data assessment tool for proteomics and cross-linking mass spectrometry experiments.,"We present RawVegetable, a software for mass spectrometry data assessment and quality control tailored toward shotgun proteomics and cross-linking experiments. RawVegetable provides four main modules with distinct features: (A) The charge state chromatogram that independently displays the ion current for each charge state; useful for optimizing the chromatography for highly charged ions and with lower XIC values such as those typically found in cross-linking experiments. (B) The XL-Artefact determination, which flags possible noncovalently associated peptides. (C) The TopN density estimation, for detecting retention time intervals of under or over-sampling, and (D) The chromatography reproducibility module, which provides pairwise comparisons between multiple experiments. RawVegetable, a tutorial, and the example data are freely available for academic use at: http://patternlabforproteomics.org/rawvegetable. SIGNIFICANCE: Chromatography optimization is a critical step for any shotgun proteomic or cross-linking mass spectrometry experiment. Here, we present a nifty solution with several key features, such as displaying individual charge state chromatograms, highlighting chromatographic regions of under- or over-sampling and checking for reproducibility.",2020-06-09 +31853469,"Data on the inner filter effect, suspended solids and nitrate interferences in fluorescence measurements of wastewater organic matter.","Data presented in this article show the extent of the inner filter effect (IFE) in fluorescence measurements of wastewater and wastewater-impacted surface water samples. Particularly, data show the effectiveness of a commonly used method for IFE correction based on UV absorbance measurement to reinstate the linearity of the relationship between fluorescence intensities and absorbance values. Data report also the effect of nitrates in fluorescence measurements of wastewater samples. Finally, data presented in this work show the effect of total suspended solids (TSS) in the UV absorbance and fluorescence measurements of different waters. Particularly, data describe the TSS effect in fluorescence intensities acquired at different pairs of excitation-emission wavelengths, and in waters with different TSS concentration. Data of this article are related to the publication ""M. Sgroi, E. Gagliano, F.G.A. Vagliasindi, P. Roccaro, Inner filter effect, suspended solids and nitrite/nitrate interferences in fluorescence measurements of wastewater organic matter, Sci. Total Environ., In press"" [1]. Raw data are available in a public repository (https://doi.org/10.17632/4zss49jycj.1).",2019-11-26 +31539030,TASUKE+: a web-based platform for exploring genome-wide association studies results and large-scale resequencing data.,"Recent revolutionary advancements in sequencing technologies have made it possible to obtain mass quantities of genome-scale sequence data in a cost-effective manner and have drastically altered molecular biological studies. To utilize these sequence data, genome-wide association studies (GWASs) have become increasingly important. Hence, there is an urgent need to develop a visualization tool that enables efficient data retrieval, integration of GWAS results with diverse information and rapid public release of such large-scale genotypic and phenotypic data. We developed a web-based genome browser TASUKE+ (https://tasuke.dna.affrc.go.jp/), which is equipped with the following functions: (i) interactive GWAS results visualization with genome resequencing data and annotation information, (ii) PCR primer design, (iii) phylogenetic tree reconstruction and (iv) data sharing via the web. GWAS results can be displayed in parallel with polymorphism data, read depths and annotation information in an interactive and scalable manner. Users can design PCR primers for polymorphic sites of interest. In addition, a molecular phylogenetic tree of any region can be reconstructed so that the overall relationship among the examined genomes can be understood intuitively at a glance. All functions are implemented through user-friendly web-based interfaces so that researchers can easily share data with collaborators in remote places without extensive bioinformatics knowledge.",2019-12-01 +33527307,Improved Segmentation of the Intracranial and Ventricular Volumes in Populations with Cerebrovascular Lesions and Atrophy Using 3D CNNs.,"Successful segmentation of the total intracranial vault (ICV) and ventricles is of critical importance when studying neurodegeneration through neuroimaging. We present iCVMapper and VentMapper, robust algorithms that use a convolutional neural network (CNN) to segment the ICV and ventricles from both single and multi-contrast MRI data. Our models were trained on a large dataset from two multi-site studies (N = 528 subjects for ICV, N = 501 for ventricular segmentation) consisting of older adults with varying degrees of cerebrovascular lesions and atrophy, which pose significant challenges for most segmentation approaches. The models were tested on 238 participants, including subjects with vascular cognitive impairment and high white matter hyperintensity burden. Two of the three test sets came from studies not used in the training dataset. We assessed our algorithms relative to four state-of-the-art ICV extraction methods (MONSTR, BET, Deep Extraction, FreeSurfer, DeepMedic), as well as two ventricular segmentation tools (FreeSurfer, DeepMedic). Our multi-contrast models outperformed other methods across many of the evaluation metrics, with average Dice coefficients of 0.98 and 0.96 for ICV and ventricular segmentation respectively. Both models were also the most time efficient, segmenting the structures in orders of magnitude faster than some of the other available methods. Our networks showed an increased accuracy with the use of a conditional random field (CRF) as a post-processing step. We further validated both segmentation models, highlighting their robustness to images with lower resolution and signal-to-noise ratio, compared to tested techniques. The pipeline and models are available at: https://icvmapp3r.readthedocs.io and https://ventmapp3r.readthedocs.io to enable further investigation of the roles of ICV and ventricles in relation to normal aging and neurodegeneration in large multi-site studies.",2021-02-01 +32665017,CORAZON: a web server for data normalization and unsupervised clustering based on expression profiles.,"

Objective

Data normalization and clustering are mandatory steps in gene expression and downstream analyses, respectively. However, user-friendly implementations of these methodologies are available exclusively under expensive licensing agreements, or in stand-alone scripts developed, reflecting on a great obstacle for users with less computational skills.

Results

We developed an online tool called CORAZON (Correlations Analyses Zipper Online), which implements three unsupervised learning methods to cluster gene expression datasets in a friendly environment. It allows the usage of eight gene expression normalization/transformation methodologies and the attribute's influence. The normalizations requiring the gene length only could be performed to RNA-seq, meanwhile the others can be used with microarray and/or NanoString data. Clustering methodologies performances were evaluated through five models with accuracies between 92 and 100%. We applied our tool to obtain functional insights of non-coding RNAs (ncRNAs) based on Gene Ontology enrichment of clusters in a dataset generated by the ENCODE project. The clusters where the majority of transcripts are coding genes were enriched in Cellular, Metabolic, Transports, and Systems Development categories. Meanwhile, the ncRNAs were enriched in the Detection of Stimulus, Sensory Perception, Immunological System, and Digestion categories. CORAZON source-code is freely available at https://gitlab.com/integrativebioinformatics/corazon and the web-server can be accessed at http://corazon.integrativebioinformatics.me .",2020-07-14 +32410570,ICEKAT: an interactive online tool for calculating initial rates from continuous enzyme kinetic traces.,"

Background

Continuous enzyme kinetic assays are often used in high-throughput applications, as they allow rapid acquisition of large amounts of kinetic data and increased confidence compared to discontinuous assays. However, data analysis is often rate-limiting in high-throughput enzyme assays, as manual inspection and selection of a linear range from individual kinetic traces is cumbersome and prone to user error and bias. Currently available software programs are specialized and designed for the analysis of complex enzymatic models. Despite the widespread use of initial rate determination for processing kinetic data sets, no simple and automated program existed for rapid analysis of initial rates from continuous enzyme kinetic traces.

Results

An Interactive Continuous Enzyme Kinetics Analysis Tool (ICEKAT) was developed for semi-automated calculation of initial rates from continuous enzyme kinetic traces with particular application to the evaluation of Michaelis-Menten and EC50/IC50 kinetic parameters, as well as the results of high-throughput screening assays. ICEKAT allows users to interactively fit kinetic traces using convenient browser-based selection tools, ameliorating tedious steps involved in defining ranges to fit in general purpose programs like Microsoft Excel and Graphpad Prism, while still maintaining simplicity in determining initial rates. As a test case, we quickly analyzed over 500 continuous enzyme kinetic traces resulting from experimental data on the response of the protein lysine deacetylase SIRT1 to small-molecule activators.

Conclusions

ICEKAT allows simultaneous visualization of individual initial rate fits and the resulting Michaelis-Menten or EC50/IC50 kinetic model fits, as well as hits from high-throughput screening assays. In addition to serving as a convenient program for practicing enzymologists, ICEKAT is also a useful teaching aid to visually demonstrate in real-time how incorrect initial rate fits can affect calculated Michaelis-Menten or EC50/IC50 kinetic parameters. For the convenience of the research community, we have made ICEKAT freely available online at https://icekat.herokuapp.com/icekat.",2020-05-14 +33222540,The management of phenylketonuria in adult patients in Italy: a survey of six specialist metabolic centers.,"

Introduction

Phenylketonuria (PKU) is a rare autosomal recessive disorder caused by a deficiency of phenylalanine hydroxylase (PAH). Its prevalence is estimated to be 1:10,000 in Europe. PKU is the commonest congenital inborn error of metabolism. The aim of our study was to investigate the characteristics of clinical practice in relation to PKU in Italy, in order to raise awareness about the current management and therapeutic approaches adopted.

Methods

Six Italian experts conducted a systematic literature review as well as an internal survey to investigate the relevant clinical aspects. Collectively, the expert panel managed a total of 678 PKU patients treated in the early stages of the condition over a 16-year period across six centers.

Results

The management of PKU varied markedly between centers, with differences in the composition of the multidisciplinary team, dietary treatments, compliance and adherence to management, tetrahydrobiopterin use, and patient follow-up. Patients were mostly managed by a pediatric reference center from the initial PKU diagnosis during newborn screening until adulthood, without transition to a specialized adult clinician. Fogginess, concentration reduction, low attention, anxiety, irritability, memory deficit, headache, and unstable mood were common features in patients with uncontrolled blood phenylalanine levels (generally above 600 µmol/L).

Conclusion

A homogeneous and shared approach to the management of PKU patients is important. Our survey demonstrates the current management of PKU in Italy, with the aim of promoting the implementation of therapeutic strategies and follow-up, increased patient compliance and adherence, and the achievement of the phenylalanine level targets recommended by European Union guidelines. Emerging therapies are likely to become a standard treatment for patients unable to comply with diet therapy and maintain their phenylalanine levels below the threshold values.Supplemental data for this article is available online at https://doi.org/10.1080/03007995.2020.1847717.",2021-02-01 +32729447,The development of a nomogram to determine the frequency of elevated risk for non-medical opioid use in cancer patients.,"

Objective

Non-medical opioid use (NMOU) is a growing crisis. Cancer patients at elevated risk of NMOU (+risk) are frequently underdiagnosed. The aim of this paper was to develop a nomogram to predict the probability of +risk among cancer patients receiving outpatient supportive care consultation at a comprehensive cancer center.

Method

3,588 consecutive patients referred to a supportive care clinic were reviewed. All patients had a diagnosis of cancer and were on opioids for pain. All patients were assessed using the Edmonton Symptom Assessment Scale (ESAS), Screener and Opioid Assessment for Patients with Pain (SOAPP-14), and CAGE-AID (Cut Down-Annoyed-Guilty-Eye Opener) questionnaires. ""+risk"" was defined as an SOAPP-14 score of ≥7. A nomogram was devised based on the risk factors determined by the multivariate logistic regression model to estimate the probability of +risk.

Results

731/3,588 consults were +risk. +risk was significantly associated with gender, race, marital status, smoking status, depression, anxiety, financial distress, MEDD (morphine equivalent daily dose), and CAGE-AID score. The C-index was 0.8. A nomogram was developed and can be accessed at https://is.gd/soappnomogram. For example, for a male Hispanic patient, married, never smoked, with ESAS scores for depression = 3, anxiety = 3, financial distress = 7, a CAGE score of 0, and an MEDD score of 20, the total score is 9 + 9+0 + 0+6 + 10 + 23 + 0+1 = 58. A nomogram score of 58 indicates the probability of +risk of 0.1.

Significance of results

We established a practical nomogram to assess the +risk. The application of a nomogram based on routinely collected clinical data can help clinicians establish patients with +risk and positively impact care planning.",2021-02-01 +31885040,Small noncoding RNA discovery and profiling with sRNAtools based on high-throughput sequencing.,"Small noncoding RNAs (sRNA/sncRNAs) are generated from different genomic loci and play important roles in biological processes, such as cell proliferation and the regulation of gene expression. Next-generation sequencing (NGS) has provided an unprecedented opportunity to discover and quantify diverse kinds of sncRNA, such as tRFs (tRNA-derived small RNA fragments), phasiRNAs (phased, secondary, small-interfering RNAs), Piwi-interacting RNA (piRNAs) and plant-specific 24-nt short interfering RNAs (siRNAs). However, currently available web-based tools do not provide approaches to comprehensively analyze all of these diverse sncRNAs. This study presents a novel integrated platform, sRNAtools (https://bioinformatics.caf.ac.cn/sRNAtools), that can be used in conjunction with high-throughput sequencing to identify and functionally annotate sncRNAs, including profiling microRNAss, piRNAs, tRNAs, small nuclear RNAs, small nucleolar RNAs and rRNAs and discovering isomiRs, tRFs, phasiRNAs and plant-specific 24-nt siRNAs for up to 21 model organisms. Different modules, including single case, batch case, group case and target case, are developed to provide users with flexible ways of studying sncRNA. In addition, sRNAtools supports different ways of uploading small RNA sequencing data in a very interactive queue system, while local versions based on the program package/Docker/virtureBox are also available. We believe that sRNAtools will greatly benefit the scientific community as an integrated tool for studying sncRNAs.",2021-01-01 +31796964,Plant Regulomics Portal (PRP): a comprehensive integrated regulatory information and analysis portal for plant genomes. ,"Gene regulation is a highly complex and networked phenomenon where multiple tiers of control determine the cell state in a spatio-temporal manner. Among these, the transcription factors, DNA and histone modifications, and post-transcriptional control by small RNAs like miRNAs serve as major regulators. An understanding of the integrative and spatio-temporal impact of these regulatory factors can provide better insights into the state of a 'cell system'. Yet, there are limited resources available to this effect. Therefore, we hereby report an integrative information portal (Plant Regulomics Portal; PRP) for plants for the first time. The portal has been developed by integrating a huge amount of curated data from published sources, RNA-, methylome- and sRNA/miRNA sequencing, histone modifications and repeats, gene ontology, digital gene expression and characterized pathways. The key features of the portal include a regulatory search engine for fetching numerous analytical outputs and tracks of the abovementioned regulators and also a genome browser for integrated visualization of the search results. It also has numerous analytical features for analyses of transcription factors (TFs) and sRNA/miRNA, spot-specific methylation, gene expression and interactions and details of pathways for any given genomic element. It can also provide information on potential RdDM regulation, while facilitating enrichment analysis, generation of visually rich plots and downloading of data in a selective manner. Visualization of intricate biological networks is an important feature which utilizes the Neo4j Graph database making analysis of relationships and long-range system viewing possible. Till date, PRP hosts 571-GB processed data for four plant species namely Arabidopsis thaliana, Oryza sativa subsp. japonica, Zea mays and Glycine max. Database URL: https://scbb.ihbt.res.in/PRP.",2019-01-01 +33685590,AIRBP: Accurate identification of RNA-binding proteins using machine learning techniques.,"Identification of RNA-binding proteins (RBPs) that bind to ribonucleic acid molecules is an important problem in Computational Biology and Bioinformatics. It becomes indispensable to identify RBPs as they play crucial roles in post-transcriptional control of RNAs and RNA metabolism as well as have diverse roles in various biological processes such as splicing, mRNA stabilization, mRNA localization, and translation, RNA synthesis, folding-unfolding, modification, processing, and degradation. The existing experimental techniques for identifying RBPs are time-consuming and expensive. Therefore, identifying RBPs directly from the sequence using computational methods can be useful to annotate RBPs and assist the experimental design efficiently. In this work, we present a method called AIRBP, which is designed using an advanced machine learning technique, called stacking, to effectively predict RBPs by utilizing features extracted from evolutionary information, physiochemical properties, and disordered properties. Moreover, our method, AIRBP, use the majority vote from RBPPred, DeepRBPPred, and the stacking model for the prediction for RBPs. The results show that AIRBP attains Accuracy (ACC), Balanced Accuracy (BACC), F1-score, and Mathews Correlation Coefficient (MCC) of 95.84 %, 94.71 %, 0.928, and 0.899, respectively, based on the training dataset, using 10-fold cross-validation (CV). Further evaluation of AIRBP on independent test set reveals that it achieves ACC, BACC, F1-score, and MCC of 94.36 %, 94.28 %, 0.897, and 0.860, for Human test set; 91.25 %, 93.00 %, 0.896, and 0.835 for S. cerevisiae test set; and 90.60 %, 90.41 %, 0.934, and 0.775 for A. thaliana test set, respectively. These results indicate that the AIRBP outperforms the existing Deep- and TriPepSVM methods. Therefore, the proposed better-performing AIRBP can be useful for accurate identification and annotation of RBPs directly from the sequence and help gain valuable insight to treat critical diseases. Availability: Code-data is available here: http://cs.uno.edu/∼tamjid/Software/AIRBP/code_data.zip.",2021-02-13 +32445142,Determinants of renewable and non-renewable energy consumption in hydroelectric countries.,"In the past decades, renewable energy consumption has grown considerably because of environmental degradation caused by non-renewable energy consumption. This research aims to find the causal link between renewable and non-renewable energy consumption, human capital, and non-renewable energy price for the 53 most renewable energy-consuming countries worldwide (hydroelectric) during the period 1990-2017. We use data collected from the World Bank ( http://data.worldbank.org/data-catalog/world-development-indicators , 2018) and Statistical Review of World Energy ( https://www.bp.com/ , 2018). We test simultaneously two types of regressions in order to measure the degree of elasticity of the two types of energy by using econometric techniques for panel data. The results of the GLS models indicate that human capital has a stronger significant effect on renewable energy consumption at the global level, in the middle high-income countries and low-middle income countries, compared with non-renewable energy consumption. Besides, at the global level, there is a positive and statistically significant relationship between the non-renewable energy price and the two types of energy consumption. There is a long-run consumption of both types of energy. On the other hand, the one-way relationship between human capital and non-renewable energy price and renewable energy consumption is stronger than the relationship with non-renewable energy consumption. The policy implications derived from this study should be designed to promote human capital development in order to promote renewable energy consumption and increase the investment in renewable energy sources to guarantee their access to lower prices that reduce non-renewable energy consumption.",2020-05-22 +,T253. THE CORRELATION ANALYSIS BETWEEN RENAMING SCHIZOPHRENIA AND VISITING FREQUENCY OF MENTAL HEALTH SERVICES BY BIG DATA ANALYSIS (INTERNET SEARCHES AND NEWSPAPER ARTICLES) IN SOUTH KOREA,"Abstract

Background

Korean Neuropsychiatric Association changed the Korean term for schizophrenia from ‘split-mind disorder’ to ‘attunement disorder’ in 2012, to dispel the stigma associated with name, and to promote early detection and treatment. Information on the internet affects the public awareness and attitude toward schizophrenia. The main purpose of this study was to investigate the correlation between renaming schizophrenia and the pattern of mental health services utilization by big data analysis of internet (newspaper articles and internet searches) in Korea.

Methods

From January 2016 to September 2017, newspaper articles on “attunement disorder” and “split-mind disorder” available on the internet were classified as related with negative images like crime and helpful or positive in dispelling the stigma. The relationship between the number of anti-stigma newspaper articles and newspaper articles of schizophrenia containing both positive and negative images was examined. In addition, using Naver, a major internet search engine in Korea, we investigated the total number of internet searches of both old and new name of schizophrenia by gender differences. Finally, the frequency of the visits of mental health services of patients with schizophrenia was measured using the Korean Healthcare Bigdata Hub (http://opendata.hira.or.kr/home.do#none) for 14 months and the correlation between the frequency of the visits and the above big data was examined. The data were analyzed using the SPSS/WIN 24.0. Pearson correlation coefficients were used to analyze correlations.

Results

The amounts of newspaper articles containing anti-stigma of schizophrenia were correlated with the amounts of newspaper articles containing negative images like crime of the new name (attunement disorder) of schizophrenia (r=0.528, p<0.01), which was greater than the amounts of newspaper articles containing the old name (split-mind disorder) of schizophrenia (r=0.300, p<0.01). We also found that a strong positive correlation between the number of articles about “attunement disorder” and search frequency about the term on the internet. In addition, the search frequency was more highly related to the number of articles containing negative images of the illness (e.g., related crimes, r = 0.910, p<0.01) than that of articles providing positive aspects of the illness (e.g., dispelling stigma, r = 0.423, p<0.01). There was no significant correlation between the number of schizophrenia-related newspaper articles in previous month and the visits of mental health services of patients with schizophrenia in next month. There were no gender differences in internet searches. The correlation between the internet search frequency for “attunement disorder” in the previous month and the visits of the mental health services of patients with schizophrenia (r = 0.185, p>0.05) in next month was larger than the correlation of “split-mind disorder” searches with mental health services utilization (r = 0.082, p>0.05).

Discussion

“Attunement disorder” rather than “split-mind disorder” was appeared more frequently in newspaper articles of the anti-stigma characteristics. “Attunement disorder” seems to be more useful for anti-stigma campaign. Renaming schizophrenia didn’t seem to affect the visiting frequency of mental health services. There was statistical limitation which was originated from the lack of numbers of patient’s information. It was because Korean Bigdata Hub provided patients information just for 14 months as monthly data. Also, it should be considered that the time period, the kinds of mental disorders and the search engine we investigated were limited. Future research needs to overcome these limitations.",2018-04-01 +31757204,metaSPARSim: a 16S rRNA gene sequencing count data simulator.,"

Background

In the last few years, 16S rRNA gene sequencing (16S rDNA-seq) has seen a surprisingly rapid increase in election rate as a methodology to perform microbial community studies. Despite the considerable popularity of this technique, an exiguous number of specific tools are currently available for proper 16S rDNA-seq count data preprocessing and simulation. Indeed, the great majority of tools have been developed adapting methodologies previously used for bulk RNA-seq data, with poor assessment of their applicability in the metagenomics field. For such tools and the few ones specifically developed for 16S rDNA-seq data, performance assessment is challenging, mainly due to the complex nature of the data and the lack of realistic simulation models. In fact, to the best of our knowledge, no software thought for data simulation are available to directly obtain synthetic 16S rDNA-seq count tables that properly model heavy sparsity and compositionality typical of these data.

Results

In this paper we present metaSPARSim, a sparse count matrix simulator intended for usage in development of 16S rDNA-seq metagenomic data processing pipelines. metaSPARSim implements a new generative process that models the sequencing process with a Multivariate Hypergeometric distribution in order to realistically simulate 16S rDNA-seq count table, resembling real experimental data compositionality and sparsity. It provides ready-to-use count matrices and comes with the possibility to reproduce different pre-coded scenarios and to estimate simulation parameters from real experimental data. The tool is made available at http://sysbiobig.dei.unipd.it/?q=Software#metaSPARSimand https://gitlab.com/sysbiobig/metasparsim.

Conclusion

metaSPARSim is able to generate count matrices resembling real 16S rDNA-seq data. The availability of count data simulators is extremely valuable both for methods developers, for which a ground truth for tools validation is needed, and for users who want to assess state of the art analysis tools for choosing the most accurate one. Thus, we believe that metaSPARSim is a valuable tool for researchers involved in developing, testing and using robust and reliable data analysis methods in the context of 16S rRNA gene sequencing.",2019-11-22 +32155318,"A preliminary computational outputs versus experimental results: Application of sTRAP, a biophysical tool for the analysis of SNPs of transcription factor-binding sites.","

Background

In the human genome, the transcription factors (TFs) and transcription factor-binding sites (TFBSs) network has a great regulatory function in the biological pathways. Such crosstalk might be affected by the single-nucleotide polymorphisms (SNPs), which could create or disrupt a TFBS, leading to either a disease or a phenotypic defect. Many computational resources have been introduced to predict the TFs binding variations due to SNPs inside TFBSs, sTRAP being one of them.

Methods

A literature review was performed and the experimental data for 18 TFBSs located in 12 genes was provided. The sequences of TFBS motifs were extracted using two different strategies; in the size similar with synthetic target sites used in the experimental techniques, and with 60 bp upstream and downstream of the SNPs. The sTRAP (http://trap.molgen.mpg.de/cgi-bin/trap_two_seq_form.cgi) was applied to compute the binding affinity scores of their cognate TFs in the context of reference and mutant sequences of TFBSs. The alternative bioinformatics model used in this study was regulatory analysis of variation in enhancers (RAVEN; http://www.cisreg.ca/cgi-bin/RAVEN/a). The bioinformatics outputs of our study were compared with experimental data, electrophoretic mobility shift assay (EMSA).

Results

In 6 out of 18 TFBSs in the following genes COL1A1, Hb ḉᴪ, TF, FIX, MBL2, NOS2A, the outputs of sTRAP were inconsistent with the results of EMSA. Furthermore, no p value of the difference between the two scores of binding affinity under the wild and mutant conditions of TFBSs was presented. Nor, were any criteria for preference or selection of any of the measurements of different matrices used for the same analysis.

Conclusion

Our preliminary study indicated some paradoxical results between sTRAP and experimental data. However, to link the data of sTRAP to the biological functions, its optimization via experimental procedures with the integration of expanded data and applying several other bioinformatics tools might be required.",2020-03-10 +27605101,LncVar: a database of genetic variation associated with long non-coding genes.,"

Motivation

Long non-coding RNAs (lncRNAs) are essential in many molecular pathways, and are frequently associated with disease but the mechanisms of most lncRNAs have not yet been characterized. Genetic variations, including single nucleotide polymorphisms (SNPs) and structural variations, are widely distributed in the genome, including lncRNA gene regions. As the number of studies on lncRNAs grows rapidly, it is necessary to evaluate the effects of genetic variations on lncRNAs.

Results

Here, we present LncVar, a database of genetic variation associated with long non-coding genes in six species. We collected lncRNAs from the NONCODE database, and evaluated their conservation. We systematically integrated transcription factor binding sites and m6A modification sites of lncRNAs and provided comprehensive effects of SNPs on transcription and modification of lncRNAs. We collected putatively translated open reading frames (ORFs) in lncRNAs, and identified both synonymous and non-synonymous SNPs in ORFs. We also collected expression quantitative trait loci of lncRNAs from the literature. Furthermore, we identified lncRNAs in CNV regions as prognostic biomarker candidates of cancers and predicted lncRNA gene fusion events from RNA-seq data from cell lines. The LncVar database can be used as a resource to evaluate the effects of the variations on the biological function of lncRNAs.

Availability and implementation

LncVar is available at http://bioinfo.ibp.ac.cn/LncVar CONTACT: rschen@ibp.ac.cnSupplementary information: Supplementary materials are available at Bioinformatics online.",2016-09-06 +33365372,The effect of substrate temperature and oxygen partial pressure on the properties of nanocrystalline copper oxide thin films grown by pulsed laser deposition.,"The data presented in this paper are related to the research article entitled ""Pulsed laser deposition of single phase n- and p-type Cu2O thin films with low resistivity"" (S.F.U. Farhad et al., 2020) [1]. The detailed processing conditions of copper oxide thin films and a variety of characterization techniques used are described in the same ref. [1]https://doi.org/10.1016/j.matdes.2020.108848. Thin films need to grow on different substrates to elucidate various properties of the individual layer for attaining optimum processing conditions required for devising efficient optoelectronic junctions as well as thin film stacks for different sensing applications. This article describes the effect of substrate temperature and oxygen partial pressure on the structural, morphological, optical, and electrical properties of pulsed laser deposited (PLD) nanocrystalline copper oxide thin films on quartz glass, ITO, NaCl(100), Si(100), ZnO coated FTO substrates. The low temperature grown copper oxide and zinc oxide thin films by PLD were used for devising solid n-ZnO/p-Cu2O junction and investigated their photovoltaic and interface properties using dynamic photo-transient current measurement at zero bias voltage and TEM/EDX respectively. These datasets are made publicly available for enabling extended analyses and as a guide for further research.",2020-12-13 +32783534,"Exposure to Road Traffic Noise and Incidence of Acute Myocardial Infarction and Congestive Heart Failure: A Population-Based Cohort Study in Toronto, Canada.","

Background

Epidemiological evidence for the association between traffic-related noise and the incidence of major cardiovascular events such as acute myocardial infarction (AMI) and congestive heart failure (CHF) is inconclusive, especially in North America.

Objectives

We evaluated the associations between long-term exposure to road traffic noise and the incidence of AMI and CHF.

Methods

Our study population comprised ∼1 million people 30-100 years of age who lived in Toronto, Canada, from 2001 to 2015 and were free of AMI (referred to as the AMI cohort) or CHF (the CHF cohort) at baseline. Outcomes were ascertained from health administrative databases using validated algorithms. Annual average noise levels were estimated as the A-weighted equivalent sound pressure level over the 24-h period (LAeq24) and during nighttime (LAeqNight), respectively, using propagation modeling, and assigned to participants' annual six-digit postal code addresses during follow-up. We calculated hazard ratios (HRs) and 95% confidence intervals (CIs) for incident AMI and CHF in relation to LAeq24 and LAeqNight using random-effects Cox proportional hazards models adjusting for individual- and census tract-level covariates, including traffic-related air pollutants [e.g., ultrafine particles (UFPs) and nitrogen dioxide].

Results

During follow-up, there were 37,441 AMI incident cases and 95,138 CHF incident cases. Each interquartile range change in LAeq24 was associated with an increased risk of incident AMI (HR=1.07; 95% CI: 1.06, 1.08) and CHF (HR=1.07; 95% CI: 1.06, 1.09). Similarly, LAeqNight was associated with incident AMI (HR=1.07; 95% CI: 1.05, 1.08) and CHF (HR=1.06; 95% CI: 1.05, 1.07). These results were robust to various sensitivity analyses and remained elevated after controlling for long-term exposure to UFPs and nitrogen dioxide. We found near-linear relationships between noise and the incidence of AMI and CHF with no evidence of threshold values.

Conclusion

In this large cohort study in Toronto, Canada, chronic exposure to road traffic noise was associated with elevated risks for AMI and CHF incidence. https://doi.org/10.1289/EHP5809.",2020-08-12 +28296894,Impact of genetic variation on three dimensional structure and function of proteins.,"The Protein Data Bank (PDB; http://wwpdb.org) was established in 1971 as the first open access digital data resource in biology with seven protein structures as its initial holdings. The global PDB archive now contains more than 126,000 experimentally determined atomic level three-dimensional (3D) structures of biological macromolecules (proteins, DNA, RNA), all of which are freely accessible via the Internet. Knowledge of the 3D structure of the gene product can help in understanding its function and role in disease. Of particular interest in the PDB archive are proteins for which 3D structures of genetic variant proteins have been determined, thus revealing atomic-level structural differences caused by the variation at the DNA level. Herein, we present a systematic and qualitative analysis of such cases. We observe a wide range of structural and functional changes caused by single amino acid differences, including changes in enzyme activity, aggregation propensity, structural stability, binding, and dissociation, some in the context of large assemblies. Structural comparison of wild type and mutated proteins, when both are available, provide insights into atomic-level structural differences caused by the genetic variation.",2017-03-15 +33576883,Identification of new proteins related with cisplatin resistance in Saccharomyces cerevisiae.,"The aim of this study is to select a cisplatin-resistant Saccharomyces cerevisiae strain to look for new molecular markers of resistance and the identification of mechanisms/interactions involved. A resistant strain was obtained after 80 days of cisplatin exposure. Then, total protein extraction, purification, and identification were carried out, in wild-type (wt) and resistant strains, by tandem mass spectrometry using a ""nano HPLC-ESI-MS/MS"" ion trap system. The increase in the exponentially modified protein abundance index (emPAI) (resistant vs wt strains) was calculated to study the increase in protein expression. ""Genemania"" software ( http://www.Genemania.org/ ) was used to compare the effects, functions, and protein interactions. KEGG tool was used for metabolic pathway analysis. Data are available via ProteomeXchange with identifier PXD020665. The cisplatin-resistant strain showed 2.5 times more resistance than the wt strain for the inhibitory dose 50% (ID50) value (224 μg/ml vs 89.68 μg/ml) and 2.78 times more resistant for the inhibitory dose 90% (ID90) value (735.2 μg/ml vs 264.04 μg/ml). Multiple deregulated proteins were found in the glutathione and carbon metabolism, oxidative phosphorylation, proteasome, glycolysis and gluconeogenesis, glyoxylate metabolism, fatty acid degradation pathway, citric acid cycle, and ribosome. The most overexpressed proteins in the cisplatin-resistant strain were related to growth and metabolism (QCR2, QCR1, ALDH4, ATPB, ATPA, ATPG, and PCKA), cell structure (SCW10), and thermal shock (HSP26). The results suggest that these proteins could be involved in cisplatin resistance. The resistance acquisition process is complex and involves the activation of multiple mechanisms that interact together. KEY POINTS: • Identification of new proteins/genes related to cisplatin resistance • Increased expression of QCR2/QCR1/ALDH4/ATPB/ATPA/SCW10/HSP26/ATPG and PCKA proteins • Multiple molecular mechanisms that interact together are involved in resistance.",2021-02-12 +32984461,Dataset of organic sample near infrared spectra acquired on different spectrometers.,"This dataset presents 127 raw near infrared spectra of different organic samples acquired on three different spectrometers in three different labs. An example of data processing is shown to create six spectra transfer models between the three spectrometers (two by two). In order to build and validate these transfer models, the dataset was split into two sets of spectra: a first set was used to compute six spectra transfer models thanks to the Piecewise Direct standardisation function (PDS). A second set of spectra, independent of the first one was used to validate transfer models. Spectrum treatments and models were created on ChemFlow (https://vm-chemflow-francegrille.eu/), a free online chemometric software that includes all the necessary functions.",2020-09-02 +32723816,COVID-19 lockdowns cause global air pollution declines.,"The lockdown response to coronavirus disease 2019 (COVID-19) has caused an unprecedented reduction in global economic and transport activity. We test the hypothesis that this has reduced tropospheric and ground-level air pollution concentrations, using satellite data and a network of >10,000 air quality stations. After accounting for the effects of meteorological variability, we find declines in the population-weighted concentration of ground-level nitrogen dioxide (NO2: 60% with 95% CI 48 to 72%), and fine particulate matter (PM2.5: 31%; 95% CI: 17 to 45%), with marginal increases in ozone (O3: 4%; 95% CI: -2 to 10%) in 34 countries during lockdown dates up until 15 May. Except for ozone, satellite measurements of the troposphere indicate much smaller reductions, highlighting the spatial variability of pollutant anomalies attributable to complex NOx chemistry and long-distance transport of fine particulate matter with a diameter less than 2.5 µm (PM2.5). By leveraging Google and Apple mobility data, we find empirical evidence for a link between global vehicle transportation declines and the reduction of ambient NO2 exposure. While the state of global lockdown is not sustainable, these findings allude to the potential for mitigating public health risk by reducing ""business as usual"" air pollutant emissions from economic activities. Explore trends here: https://nina.earthengine.app/view/lockdown-pollution.",2020-07-28 +29878047,ImmunomeBrowser: a tool to aggregate and visualize complex and heterogeneous epitopes in reference proteins.,"

Motivation

Datasets that are derived from different studies (e.g. MHC ligand elution, MHC binding, B/T cell epitope screening etc.) often vary in terms of experimental approaches, sizes of peptides tested, including partial and or nested overlapping peptides and in the number of donors tested.

Results

We present a customized application of the Immune Epitope Database's ImmunomeBrowser tool, which can be used to effectively aggregate and visualize heterogeneous immunological data. User provided peptide sets and associated response data is mapped to a user-provided protein reference sequence. The output consists of tables and figures representing the aggregated data represented by a Response Frequency score and associated estimated confidence interval. This allows the user to visualizing regions associated with dominant responses and their boundaries. The results are presented both as a user interactive javascript based web interface and a tabular format in a selected reference sequence.

Availability and implementation

The 'ImmunomeBrowser' has been a longstanding feature of the IEDB (http://www.iedb.org). The present application extends the use of this tool to work with user-provided datasets, rather than the output of IEDB queries. This new server version of the ImmunomeBrowser is freely accessible at http://tools.iedb.org/immunomebrowser/.",2018-11-01 +32749075,PleThora: Pleural effusion and thoracic cavity segmentations in diseased lungs for benchmarking chest CT processing pipelines.,"This manuscript describes a dataset of thoracic cavity segmentations and discrete pleural effusion segmentations we have annotated on 402 computed tomography (CT) scans acquired from patients with non-small cell lung cancer. The segmentation of these anatomic regions precedes fundamental tasks in image analysis pipelines such as lung structure segmentation, lesion detection, and radiomics feature extraction. Bilateral thoracic cavity volumes and pleural effusion volumes were manually segmented on CT scans acquired from The Cancer Imaging Archive ""NSCLC Radiomics"" data collection. Four hundred and two thoracic segmentations were first generated automatically by a U-Net based algorithm trained on chest CTs without cancer, manually corrected by a medical student to include the complete thoracic cavity (normal, pathologic, and atelectatic lung parenchyma, lung hilum, pleural effusion, fibrosis, nodules, tumor, and other anatomic anomalies), and revised by a radiation oncologist or a radiologist. Seventy-eight pleural effusions were manually segmented by a medical student and revised by a radiologist or radiation oncologist. Interobserver agreement between the radiation oncologist and radiologist corrections was acceptable. All expert-vetted segmentations are publicly available in NIfTI format through The Cancer Imaging Archive at https://doi.org/10.7937/tcia.2020.6c7y-gq39. Tabular data detailing clinical and technical metadata linked to segmentation cases are also available. Thoracic cavity segmentations will be valuable for developing image analysis pipelines on pathologic lungs - where current automated algorithms struggle most. In conjunction with gross tumor volume segmentations already available from ""NSCLC Radiomics,"" pleural effusion segmentations may be valuable for investigating radiomics profile differences between effusion and primary tumor or training algorithms to discriminate between them.",2020-08-28 +32616742,Web portal for analytical validation of MRM-MS assay abided with integrative multinational guidelines.,"Multiple reaction monitoring-mass spectrometry became a mainstream method for quantitative proteomics, which made the validation of a method and the analyzed data important. In this portal for validation of the MRM-MS assay, we developed a website that automatically evaluates uploaded MRM-MS data, based on biomarker assay guidelines from the European Medicines Agency, the US Food & Drug Administration, and the Korea Food & Drug Administration. The portal reads a Skyline output file and produces the following results-calibration curve, specificity, sensitivity, carryover, precision, recovery, matrix effect, recovery, dilution integrity, stability, and QC-according to the standards of each independent agency. The final tables and figures that pertain to the 11 evaluation categories are displayed in an individual page. Spring boot was used as a framework for development of the webpage, which follows MVC Pattern. JSP, HTML, XML, and Java Script were used to develop the webpage. A server was composed of Apache Tomcat, MySQL. Input files were skyline-derived output files (csv file), and each files were organized by specific columns in order. SQL, JAVA were interworked to evaluate all the categories and show the results. Method Validation Portal can be accessed via any kind of explorer from https://pnbvalid.snu.ac.kr.",2020-07-02 +30542988,Identification of Cancer Driver Genes from a Custom Set of Next Generation Sequencing Data.,"Next generation sequencing (NGS) has become the norm of cancer genomic researches. Large-scale cancer sequencing projects seek to comprehensively uncover mutated genes that confer a selective advantage for cancer cells. Numerous computational algorithms have been developed to find genes that drive cancer based on their patterns of mutation in a patient cohort. It has been noted that the distinct features of driver gene alterations in different subgroups are based on clinical characteristics. Previously, we have developed a database, DriverDB, to integrate all public cancer sequencing data and to identify cancer driver genes according to bioinformatics tools. In this chapter, we describe the use of the function ""Meta-Analysis"" in DriverDB that offers a list of clinical characteristics to define samples and provides a high degree of freedom for researchers to utilize the huge amounts of sequencing data. Moreover, researchers can use the ""Gene"" section to explore a single driver gene in all cancers by different kinds of aspects after identifying the specific driver genes by ""Meta-Analysis."" DriverDB is available at http://ngs.ym.edu.tw/driverdb/ .",2019-01-01 +29985970,BrainEXP: a database featuring with spatiotemporal expression variations and co-expression organizations in human brains.,"

Summary

Gene expression changes over the lifespan and varies among different tissues or cell types. Gene co-expression also changes by sex, age, different tissues or cell types. However, gene expression under the normal state and gene co-expression in the human brain has not been fully defined and quantified. Here we present a database named Brain EXPression Database (BrainEXP) which provides spatiotemporal expression of individual genes and co-expression in normal human brains. BrainEXP consists of 4567 samples from 2863 healthy individuals gathered from existing public databases and our own data, in either microarray or RNA-Seq library types. We mainly provide two analysis results based on the large dataset: (i) basic gene expression across specific brain regions, age ranges and sexes; (ii) co-expression analysis from different platforms.

Availability and implementation

http://www.brainexp.org/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +32987570,PMO: A knowledge representation model towards precision medicine.,"With the rapid development of biomedical technology, amounts of data in the field of precision medicine (PM) are growing exponentially. Valuable knowledge is included in scattered data in which meaningful biomedical entities and their semantic relationships are buried. Therefore, it is necessary to develop a knowledge representation model like ontology to formally represent the relationships among diseases, phenotypes, genes, mutations, drugs, etc. and achieve effective integration of heterogeneous data. On basis of existing work, our study focus on solving the following issues: (i) Selecting the primary entities in PM domain; (ii) collecting and integrating biomedical vocabularies related to the above entities; (iii) defining and normalizing semantic relationships among these entities. We proposed a semi-automated method which improved the original Ontology Development 101 method to build the Precision Medicine Ontology (PMO), including defining the scope of the PMO according to the definition of PM, collecting terms from different biomedical resources, integrating and normalizing the terms by a combination of machine and manual work, defining the annotation properties, reusing existing ontologies and taxonomies, defining semantic relationships, evaluating PMO and creating the PMO website. Finally, the Precision Medicine Vocabulary (PMV) contains 4.53 million terms collected from 62 biomedical vocabularies, and the PMO includes eleven branches of PM concepts such as disease, chemical and drug, phenotype, gene, mutation, gene product and cell, described by 93 semantic relationships among them. PMO is an open, extensible ontology of PM, all of the terms and relationships in which could be obtained from the PMO website (http://www.phoc.org.cn/pmo/). Compared to existing project, our work has brought a broader and deeper coverage of mutation, gene and gene product, which enriches the semantic type and vocabulary in PM domain and benefits all users in terms of medical literature annotation, text mining and knowledge base construction.",2020-06-01 +32788173,YTHDF1 Promotes Gastric Carcinogenesis by Controlling Translation of FZD7.,"N6-methyladenosine (m6A) is the most prevalent internal RNA modification in mammals that regulates homeostasis and function of modified RNA transcripts. Here, we aimed to investigate the role of YTH m6A RNA-binding protein 1 (YTHDF1), a key regulator of m6A methylation in gastric cancer tumorigenesis. Multiple bioinformatic analyses of different human cancer databases identified key m6A-associated genetic mutations that regulated gastric tumorigenesis. YTHDF1 was mutated in about 7% of patients with gastric cancer, and high expression of YTHDF1 was associated with more aggressive tumor progression and poor overall survival. Inhibition of YTHDF1 attenuated gastric cancer cell proliferation and tumorigenesis in vitro and in vivo. Mechanistically, YTHDF1 promoted the translation of a key Wnt receptor frizzled7 (FZD7) in an m6A-dependent manner, and mutated YTHDF1 enhanced expression of FZD7, leading to hyperactivation of the Wnt/β-catenin pathway and promotion of gastric carcinogenesis. Our results demonstrate the oncogenic role of YTHDF1 and its m6A-mediated regulation of Wnt/β-catenin signaling in gastric cancer, providing a novel approach of targeting such epigenetic regulators in this disease. SIGNIFICANCE: This study provides a rationale for controlling translation of key oncogenic drivers in cancer by manipulating epigenetic regulators, representing a novel and efficient strategy for anticancer treatment. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/10/2651/F1.large.jpg.",2020-08-11 +32780597,Factors Influencing Cognate Performance for Young Multilingual Children's Vocabulary: A Research Synthesis.,"Purpose The purpose of this systematic review was to determine evidence of a cognate effect for young multilingual children (ages 3;0-8;11 [years;months], preschool to second grade) in terms of task-level and child-level factors that may influence cognate performance. Cognates are pairs of vocabulary words that share meaning with similar phonology and/or orthography in more than one language, such as rose-rosa (English-Spanish) or carrot-carotte (English-French). Despite the cognate advantage noted with older bilingual children and bilingual adults, there has been no systematic examination of the cognate research in young multilingual children. Method We conducted searches of multiple electronic databases and hand-searched article bibliographies for studies that examined young multilingual children's performance with cognates based on study inclusion criteria aligned to the research questions. Results The review yielded 16 articles. The majority of the studies (12/16, 75%) demonstrated a positive cognate effect for young multilingual children (measured in higher accuracy, faster reaction times, and doublet translation equivalents on cognates as compared to noncognates). However, not all bilingual children demonstrated a cognate effect. Both task-level factors (cognate definition, type of cognate task, word characteristics) and child-level factors (level of bilingualism, age) appear to influence young bilingual children's performance on cognates. Conclusions Contrary to early 1990s research, current researchers suggest that even young multilingual children may demonstrate sensitivity to cognate vocabulary words. Given the limits in study quality, more high-quality research is needed, particularly to address test validity in cognate assessments, to develop appropriate cognate definitions for children, and to refine word-level features. Only one study included a brief instruction prior to assessment, warranting cognate treatment studies as an area of future need. Supplemental Material https://doi.org/10.23641/asha.12753179.",2020-08-11 +31430151,Structural Analysis and Identification of Colloidal Aggregators in Drug Discovery.,"Aggregation has been posing a great challenge in drug discovery. Current computational approaches aiming to filter out aggregated molecules based on their similarity to known aggregators, such as Aggregator Advisor, have low prediction accuracy, and therefore development of reliable in silico models to detect aggregators is highly desirable. In this study, we built a data set consisting of 12 119 aggregators and 24 172 drugs or drug candidates and then developed a group of classification models based on the combination of two ensemble learning approaches and five types of molecular representations. The best model yielded an accuracy of 0.950 and an area under the curve (AUC) value of 0.987 for the training set, and an accuracy of 0.937 and an AUC of 0.976 for the test set. The best model also gave reliable predictions to the external validation set with 5681 aggregators since 80% of molecules were predicted to be aggregators with a prediction probability higher than 0.9. More importantly, we explored the relationship between colloidal aggregation and molecular features, and generalized a set of simple rules to detect aggregators. Molecular features, such as log D, the number of hydroxyl groups, the number of aromatic carbons attached to a hydrogen atom, and the number of sulfur atoms in aromatic heterocycles, would be helpful to distinguish aggregators from nonaggregators. A comparison with numerous existing druglikeness and aggregation filtering rules and models used in virtual screening verified the high reliability of the model and rules proposed in this study. We also used the model to screen several curated chemical databases, and almost 20% of molecules in the evaluated databases were predicted as aggregators, highlighting the potential high risk of aggregation in screening. Finally, we developed an online Web server of ChemAGG ( http://admet.scbdd.com/ChemAGG/index ), which offers a freely available tool to detect aggregators.",2019-08-27 +30365033,AlloMAPS: allosteric mutation analysis and polymorphism of signaling database.,"AlloMAPS database provides data on the causality and energetics of allosteric communication obtained with the structure-based statistical mechanical model of allostery (SBSMMA). The database contains data on allosteric signaling in three sets of proteins and protein chains: (i) 46 proteins with comprehensively annotated functional and allosteric sites; (ii) 1908 protein chains from PDBselect set of chains with low (<25%) sequence identity; (iii) 33 proteins with more than 50 known pathological SNPs in each molecule. In addition to energetics of allosteric signaling between known functional and regulatory sites, allosteric modulation caused by the binding to these sites, by SNPs, and by mutations designated by the user can be explored. Allosteric Signaling Maps (ASMs), which are produced via the exhaustive computational scanning for stabilizing and destabilizing mutations and for the modulation range caused by the sequence position are available for each protein/protein chain in the database. We propose to use this database for evaluating the effects of allosteric signaling in the search for latent regulatory sites and in the design of allosteric sites and effectors. The database is freely available at: http://allomaps.bii.a-star.edu.sg.",2019-01-01 +30298402,"Exploring Enzyme Evolution from Changes in Sequence, Structure, and Function.","The goal of our research is to increase our understanding of how biology works at the molecular level, with a particular focus on how enzymes evolve their functions through adaptations to generate new specificities and mechanisms. FunTree (Sillitoe and Furnham, Nucleic Acids Res 44:D317-D323, 2016) is a resource that brings together sequence, structure, phylogenetic, and chemical and mechanistic information for 2340 CATH superfamilies (Sillitoe et al., Nucleic Acids Res 43:D376-D381, 2015) (which all contain at least one enzyme) to allow evolution to be investigated within a structurally defined superfamily.We will give an overview of FunTree's use of sequence and structural alignments to cluster proteins within a superfamily into structurally similar groups (SSGs) and generate phylogenetic trees augmented by ancestral character estimations (ACE). This core information is supplemented with new measures of functional similarity (Rahman et al., Nat Methods 11:171-174, 2014) to compare enzyme reactions based on overall bond changes, reaction centers (the local environment atoms involved in the reaction), and the structural similarities of the metabolites involved in the reaction. These trees are also decorated with taxonomic and Enzyme Commission (EC) code and GO annotations, forming the basis of a comprehensive web interface that can be found at http://www.funtree.info . In this chapter, we will discuss the various analyses and supporting computational tools in more detail, describing the steps required to extract information.",2019-01-01 +33730866,"Blood Lead Levels in U.S. Children Ages 1-11 Years, 1976-2016.","

Background

Lead can adversely affect child health across a wide range of exposure levels. We describe the distribution of blood lead levels (BLLs) in U.S. children ages 1-11 y by selected sociodemographic and housing characteristics over a 40-y period.

Methods

Data from the National Health and Nutrition Examination Survey (NHANES) II (1976-1980), NHANES III (Phase 1: 1988-1991 and Phase II: 1991-1994), and Continuous NHANES (1999-2016) were used to describe the distribution of BLLs (in micrograms per deciliter; 1μg/dL=0.0483μmol/L) in U.S. children ages 1-11 y from 1976 to 2016. For all children with valid BLLs (n=27,122), geometric mean (GM) BLLs [95% confidence intervals (CI)] and estimated prevalence ≥5μg/dL (95% CI) were calculated overall and by selected characteristics, stratified by age group (1-5 y and 6-11 y).

Results

The GM BLL in U.S. children ages 1-5 y declined from 15.2μg/dL (95% CI: 14.3, 16.1) in 1976-1980 to 0.83μg/dL (95% CI: 0.78, 0.88) in 2011-2016, representing a 94.5% decrease over time. For children ages 6-11 y, GM BLL declined from 12.7μg/dL (95% CI: 11.9, 13.4) in 1976-1980 to 0.60μg/dL (95% CI: 0.58, 0.63) in 2011-2016, representing a 95.3% decrease over time. Even so, for the most recent period (2011-2016), estimates indicate that approximately 385,775 children ages 1-11 y had BLLs greater than or equal to the CDC blood lead reference value of 5μg/dL. Higher GM BLLs were associated with non-Hispanic Black race/ethnicity, lower family income-to-poverty-ratio, and older housing age.

Discussion

Overall, BLLs in U.S. children ages 1-11 y have decreased substantially over the past 40 y. Despite these notable declines in population exposures to lead over time, higher GM BLLs are consistently associated with risk factors such as race/ethnicity, poverty, and housing age that can be used to target blood lead screening efforts. https://doi.org/10.1289/EHP7932.",2021-03-17 +33621129,Associations between Blood Lead Levels and Coronary Artery Stenosis Measured Using Coronary Computed Tomography Angiography.,"

Background

Lead exposure is a risk factor for increased blood pressure and cardiovascular disease, even when blood lead levels (BLLs) are within the normal range.

Objective

This study aimed to investigate the association between BLL and coronary artery stenosis (CAS) in asymptomatic adults using 128-slice dual-source coronary computed tomography (CT) angiography.

Methods

We analyzed medical records data from 2,193 adults (1,461 men and 732 women) who elected to complete a screening health examination, coronary CT angiography, and BLL measurement during 2011-2018 and had no history of CAS symptoms, cardiovascular disease, or occupational exposure to lead. Logistic regression models were used to estimate associations between moderate-to-severe CAS (≥25% stenosis) and a 1-μg/dL increase in blood lead, with and without adjustment for age, sex, hypertension, diabetes mellitus, dyslipidemia, body mass index, regular exercise, smoking status, and alcohol drinking.

Results

BLLs ranged from 0.12 to 10.14μg/dL, with an arithmetic mean of 2.71±1.26μg/dL. The arithmetic mean was higher for men than for women (2.98±1.26μg/dL vs. 2.18±1.08μg/dL, p<0.001) and higher in the moderate-to-severe CAS group than in the no-CAS or <25% stenosis group (3.02±1.44μg/dL vs. 2.67±1.23μg/dL, p<0.001). Moderate-to-severe CAS was significantly associated with BLL before and after adjustment, with an adjusted odds ratio for a 1-μg/dL increase in BLL of 1.14 (95% CI: 1.02, 1.26), p=0.017.

Conclusions

BLL was positively associated with the prevalence of moderate-to-severe CAS in Korean adults who completed an elective screening examination for early cardiovascular disease, 94% of whom had a BLL of <5μg/dL. More efforts and a strict health policy are needed to further reduce BLLs in the general population. https://doi.org/10.1289/EHP7351.",2021-02-23 +26055100,dbHiMo: a web-based epigenomics platform for histone-modifying enzymes.,"Over the past two decades, epigenetics has evolved into a key concept for understanding regulation of gene expression. Among many epigenetic mechanisms, covalent modifications such as acetylation and methylation of lysine residues on core histones emerged as a major mechanism in epigenetic regulation. Here, we present the database for histone-modifying enzymes (dbHiMo; http://hme.riceblast.snu.ac.kr/) aimed at facilitating functional and comparative analysis of histone-modifying enzymes (HMEs). HMEs were identified by applying a search pipeline built upon profile hidden Markov model (HMM) to proteomes. The database incorporates 11,576 HMEs identified from 603 proteomes including 483 fungal, 32 plants and 51 metazoan species. The dbHiMo provides users with web-based personalized data browsing and analysis tools, supporting comparative and evolutionary genomics. With comprehensive data entries and associated web-based tools, our database will be a valuable resource for future epigenetics/epigenomics studies.",2015-06-08 +26433228,RPFdb: a database for genome wide information of translated mRNA generated from ribosome profiling.,"Translational control is crucial in the regulation of gene expression and deregulation of translation is associated with a wide range of cancers and human diseases. Ribosome profiling is a technique that provides genome wide information of mRNA in translation based on deep sequencing of ribosome protected mRNA fragments (RPF). RPFdb is a comprehensive resource for hosting, analyzing and visualizing RPF data, available at www.rpfdb.org or http://sysbio.sysu.edu.cn/rpfdb/index.html. The current version of database contains 777 samples from 82 studies in 8 species, processed and reanalyzed by a unified pipeline. There are two ways to query the database: by keywords of studies or by genes. The outputs are presented in three levels. (i) Study level: including meta information of studies and reprocessed data for gene expression of translated mRNAs; (ii) Sample level: including global perspective of translated mRNA and a list of the most translated mRNA of each sample from a study; (iii) Gene level: including normalized sequence counts of translated mRNA on different genomic location of a gene from multiple samples and studies. To explore rich information provided by RPF, RPFdb also provides a genome browser to query and visualize context-specific translated mRNA. Overall our database provides a simple way to search, analyze, compare, visualize and download RPF data sets.",2015-10-03 +33019311,Inferring the Spatial Distribution of Physical Activity in Children Population from Characteristics of the Environment.,"Obesity affects a rising percentage of the children and adolescent population, contributing to decreased quality of life and increased risk for comorbidities. Although the major causes of obesity are known, the obesogenic behaviors manifest as a result of complex interactions of the individual with the living environment. For this reason, addressing childhood obesity remains a challenging problem for public health authorities. The BigO project (https://bigoprogram.eu) relies on large-scale behavioral and environmental data collection to create tools that support policy making and intervention design. In this work, we propose a novel analysis approach for modeling the expected population behavior as a function of the local environment. We experimentally evaluate this approach in predicting the expected physical activity level in small geographic regions using urban environment characteristics. Experiments on data collected from 156 children and adolescents verify the potential of the proposed approach. Specifically, we train models that predict the physical activity level in a region, achieving 81% leave-one-out accuracy. In addition, we exploit the model predictions to automatically visualize heatmaps of the expected population behavior in areas of interest, from which we draw useful insights. Overall, the predictive models and the automatic heatmaps are promising tools in gaining direct perception for the spatial distribution of the population's behavior, with potential uses by public health authorities.",2020-07-01 +,Surface albedo and toc-r 300 m products from PROBA-V instrument in the framework of Copernicus Global Land Service,"PROBA-V instrument launched in 2013 is offering a global daily coverage at pixel resolutions of 333 m and 1 km in three spectral bands (BLUE, RED, NIR) and 600 m for shortwave infrared (SWIR). The PROBA-V mission is the follow-on of the VEGETATION program started in 2000, which allowed generating long-term series at 1 km pixel resolution. The PROBA-V products belong to the Copernicus Global Land Service portfolio (http://land.copernicus.eu/global/). The sensor design of PROBA-V with oriented cameras offers a wide field of view (FOV) for sampling the BRDF (Bidirectional Reflectance Distribution Function). This paper details the methodology implemented at the premises of VITO (Flemish Institute for Technological Research) with the aim to disseminate routinely from PROBA-V daily observations for both surface albedo (SA) and top-of-canopy corrected reflectance (TOC-R) products. The method classically operates a selection of cloudless scenes, performs atmospheric corrections, and finally applies a correction of directional effects on a pixel per pixel basis. The synthesis period is the decade and the composite period is 20 days. Such choice is a pointwise sampling as being a trade-off between the availability of clear scenes and the timescale for phenology. Regarding the albedo catalogue, a narrow-band to broadband conversion is stipulated. A recurrent technique serves for gap-filling based on the spread of weighed a priori data. Additional information concerns the quality flag and the age of the product. Preliminary accuracy assessment is performed through a comparison with the Moderate Imaging Spectroradiometer (MODIS) Collection 6. Dependable spatial consistency is reached except for wintertime with deviations in terms of rmse (root mean square errors) about 0.03 for visible and shortwave domains, and 0.04 for near infrared. Besides, both PROBA-V and MODIS C6 exhibit close time profiles, marked by smoothness or rapid transitions. Results over 10 confidence sites reveals rmse values of 0.032 and bias of 0.01 over the 2014 full annual cycle.",2018-09-01 +32772525,"From alpha to omega and beyond! A look at the past, present, and (possible) future of psychometric soundness in the Journal of Applied Psychology.","The psychometric soundness of measures has been a central concern of articles published in the Journal of Applied Psychology (JAP) since the inception of the journal. At the same time, it isn't clear that investigators and reviewers prioritize psychometric soundness to a degree that would allow one to have sufficient confidence in conclusions regarding constructs. The purposes of the present article are to (a) examine current scale development and evaluation practices in JAP; (b) compare these practices to recommended practices, previous practices, and practices in other journals; and (c) use these comparisons to make recommendations for reviewers, editors, and investigators regarding the creation and evaluation of measures including Excel-based calculators for various indices. Finally, given that model complexity appears to have increased the need for short scales, we offer a user-friendly R Shiny app (https://orgscience.uncc.edu/about-us/resources) that identifies the subset of items that maximize a variety of psychometric criteria rather than merely maximizing alpha. (PsycInfo Database Record (c) 2020 APA, all rights reserved).",2020-08-10 +32644982,"Trends in Nonfatal Falls and Fall-Related Injuries Among Adults Aged ≥65 Years - United States, 2012-2018.","Falls are the leading cause of injury among adults aged ≥65 years (older adults) in the United States. In 2018, an estimated 3 million emergency department visits, more than 950,000 hospitalizations or transfers to another facility (e.g., trauma center), and approximately 32,000 deaths resulted from fall-related injuries among older adults.* Deaths from falls are increasing, with the largest increases occurring among persons aged ≥85 years (1). To describe the percentages and rates of nonfatal falls by age group and demographic characteristics and trends in falls and fall-related injuries over time, data were analyzed from the 2018 Behavioral Risk Factor Surveillance System (BRFSS) and were compared with data from 2012, 2014, and 2016. In 2018, 27.5% of older adults reported falling at least once in the past year, and 10.2% reported an injury from a fall in the past year. The percentages of older adults reporting a fall increased between 2012 and 2016 and decreased slightly between 2016 and 2018. Falls are preventable, and health care providers can help their older patients reduce their risk for falls. Screening older patients for fall risk, assessing modifiable risk factors (e.g., use of psychoactive medications or poor gait and balance), and recommending interventions to reduce this risk (e.g., medication management or referral to physical therapy) can prevent older adult falls (https://www.cdc.gov/steadi).",2020-07-10 +32785571,SELAdb: A database of exonic variants in a Brazilian population referred to a quaternary medical center in São Paulo.,"

Objectives

High-throughput sequencing of genomes, exomes, and disease-focused gene panels is becoming increasingly common for molecular diagnostics. However, identifying a single clinically relevant pathogenic variant among thousands of genetic polymorphisms is a challenging task. Publicly available genomic databases are useful resources to filter out common genetic variants present in the population and enable the identification of each disease-causing variant. Based on our experience applying these technologies at Hospital das Clínicas da Faculdade de Medicina da Universidade de São Paulo (HCFMUSP), São Paulo, Brazil, we recognized that the Brazilian population is not adequately represented in widely available genomic databases.

Methods

Here, we took advantage of our 5-year experience as a high-throughput sequencing core facility focused on individuals with putative genetic disorders to build a genomic database that may serve as a more accurate reference for our patient population: SELAdb.

Results/conclusions

Currently, our database comprises a final cohort of 523 unrelated individuals, including patients or family members managed by different clinics of HCFMUSP. We compared SELAdb with other publicly available genomic databases and demonstrated that this population is very heterogeneous, largely resembling Latin American individuals of mixed origin, rather than individuals of pure European ancestry. Interestingly, exclusively through SELAdb, we identified a spectrum of known and potentially novel pathogenic variants in genes associated with highly penetrant Mendelian disorders, illustrating that pathogenic variants circulating in the Brazilian population that is treated in our clinics are underrepresented in other population databases. SELAdb is freely available for public consultation at: http://intranet.fm.usp.br/sela.",2020-08-10 +30312302,GPSuc: Global Prediction of Generic and Species-specific Succinylation Sites by aggregating multiple sequence features.,"Lysine succinylation is one of the dominant post-translational modification of the protein that contributes to many biological processes including cell cycle, growth and signal transduction pathways. Identification of succinylation sites is an important step for understanding the function of proteins. The complicated sequence patterns of protein succinylation revealed by proteomic studies highlight the necessity of developing effective species-specific in silico strategies for global prediction succinylation sites. Here we have developed the generic and nine species-specific succinylation site classifiers through aggregating multiple complementary features. We optimized the consecutive features using the Wilcoxon-rank feature selection scheme. The final feature vectors were trained by a random forest (RF) classifier. With an integration of RF scores via logistic regression, the resulting predictor termed GPSuc achieved better performance than other existing generic and species-specific succinylation site predictors. To reveal the mechanism of succinylation and assist hypothesis-driven experimental design, our predictor serves as a valuable resource. To provide a promising performance in large-scale datasets, a web application was developed at http://kurata14.bio.kyutech.ac.jp/GPSuc/.",2018-10-12 +33370386,sRNATargetDigger: A bioinformatics software for bidirectional identification of sRNA-target pairs with co-regulatory sRNAs information.,"Identification of the target genes of microRNAs (miRNAs), trans-acting small interfering RNAs (ta-siRNAs), and small interfering RNAs (siRNAs) is an important step for understanding their regulatory roles in plants. In recent years, many bioinformatics software packages based on small RNA (sRNA) high-throughput sequencing (HTS) and degradome sequencing data analysis have provided strong technical support for large-scale mining of sRNA-target pairs. However, sRNA-target regulation is achieved using a complex network of interactions since one transcript might be co-regulated by multiple sRNAs and one sRNA may also affect multiple targets. Currently used mining software can realize the mining of multiple unknown targets using known sRNA, but it cannot rule out the possibility of co-regulation of the same target by other unknown sRNAs. Hence, the obtained regulatory network may be incomplete. We have developed a new mining software, sRNATargetDigger, that includes two function modules, ""Forward Digger"" and ""Reverse Digger"", which can identify regulatory sRNA-target pairs bidirectionally. Moreover, it has the ability to identify unknown sRNAs co-regulating the same target, in order to obtain a more authentic and reliable sRNA-target regulatory network. Upon re-examination of the published sRNA-target pairs in Arabidopsis thaliana, sRNATargetDigger found 170 novel co-regulatory sRNA-target pairs. This software can be downloaded from http://www.bioinfolab.cn/sRNATD.html.",2020-12-28 +33616469,Association between TNF-α and IFN-γ levels and severity of acute viral bronchiolitis.,"Acute bronchiolitis caused by the respiratory syncytial virus triggers an inflammatory response with the production and release of several pro-inflammatory cytokines. Evidence suggests that their levels are associated with the severity of the infection. This systematic review and meta-analysis aim to assess whether the levels of TNF-α and IFN-γ are associated with the severity of acute viral bronchiolitis. We searched MEDLINE libraries (via PUBMED), EMBASE, Cochrane Central Register of Controlled Trials (CENTRAL), Scientific Electronic Library Online (SciELO), Latin American Caribbean Health Sciences Literature (LILACS), Cumulative Index to Nursing and Allied Health Literature (CINAHL), Web of Science, and the gray literature through April 2020. Random effect models were used for general and subgroup analysis. In total, six studies were included with a total of 744 participants. The mean TNF-α levels between the severe group did not differ from the control group 0.14 (95% CI: -0.53 to 0.82, I2 = 91%, p < 0.01); the heterogeneity was high. The results remained insignificant when the analyses were performed including only studies with high quality 0.25 (95% CI: -0.46 to 0.96, I2 = 92%, p < 0.01) I2 = 95%, p = 0.815), when TNF-α was nasal 0.60 (95% CI: -0.49 to 1.69), I2 = 94%, p < 0.01), or serum -0.08 (95% CI: -0.48 to 0.31), I2 = 29%, p = 0.24). In the analysis of studies measuring IFN-γ, there was also no significance of -0.67 (95% CI: -1.56 to 0.22, I2 = 76%, p = 0.04). In conclusion, this meta-analysis suggests that the most severe patients do not have different mean TNF-α and IFN-γ values ​than patients with mild disease, but the heterogeneity of the studies was high. Supplemental data for this article is available online at https://doi.org/10.1080/08830185.2021.1889534.",2021-02-22 +34012710,A Comparison of Methods for Studying the Tumor Microenvironment's Spatial Heterogeneity in Digital Pathology Specimens.,"

Background

The tumor microenvironment is highly heterogeneous, and it is understood to affect tumor progression and patient outcome. A number of studies have reported the prognostic significance of tumor-infiltrating lymphocytes and tumor budding in colorectal cancer (CRC). However, the significance of the intratumoral heterogeneity present in the spatial distribution of these features within the tumor immune microenvironment (TIME) has not been previously reported. Evaluating this intratumoral heterogeneity may aid the understanding of the TIME's effect on patient prognosis as well as identify novel aggressive phenotypes which can be further investigated as potential targets for new treatment.

Methods

In this study, we propose and apply two spatial statistical methodologies for the evaluation of the intratumor heterogeneity present in the distribution of CD3 + and CD8 + lymphocytes and tumor buds (TB) in 232 Stage II CRC cases. Getis-Ord hotspot analysis was applied to quantify the cold and hotspots, defined as regions with a significantly low or high number of each feature of interest, respectively. A novel spatial heatmap methodology for the quantification of the cold and hotspots of each feature of interest, which took into account both the interpatient heterogeneity and the intratumor heterogeneity, was further developed.

Results

Resultant data from each analysis, characterizing the spatial intratumor heterogeneity of lymphocytes and TBs were used for the development of two new highly prognostic risk models.

Conclusions

Our results highlight the value of applying spatial statistics for the assessment of the intratumor heterogeneity. Both Getis-Ord hotspot and our proposed spatial heatmap analysis are broadly applicable across other tissue types as well as other features of interest.

Availability

The code underpinning this publication can be accessed at https://doi.org/10.17630/c2306fe9-66e2-4442-ad89-f986220053e2.",2021-01-28 +28655750,AspWood: High-Spatial-Resolution Transcriptome Profiles Reveal Uncharacterized Modularity of Wood Formation in Populus tremula.,"Trees represent the largest terrestrial carbon sink and a renewable source of ligno-cellulose. There is significant scope for yield and quality improvement in these largely undomesticated species, and efforts to engineer elite varieties will benefit from improved understanding of the transcriptional network underlying cambial growth and wood formation. We generated high-spatial-resolution RNA sequencing data spanning the secondary phloem, vascular cambium, and wood-forming tissues of Populus tremula The transcriptome comprised 28,294 expressed, annotated genes, 78 novel protein-coding genes, and 567 putative long intergenic noncoding RNAs. Most paralogs originating from the Salicaceae whole-genome duplication had diverged expression, with the exception of those highly expressed during secondary cell wall deposition. Coexpression network analyses revealed that regulation of the transcriptome underlying cambial growth and wood formation comprises numerous modules forming a continuum of active processes across the tissues. A comparative analysis revealed that a majority of these modules are conserved in Picea abies The high spatial resolution of our data enabled identification of novel roles for characterized genes involved in xylan and cellulose biosynthesis, regulators of xylem vessel and fiber differentiation and lignification. An associated web resource (AspWood, http://aspwood.popgenie.org) provides interactive tools for exploring the expression profiles and coexpression network.",2017-06-27 +32459325,PaCRISPR: a server for predicting and visualizing anti-CRISPR proteins.,"Anti-CRISPRs are widespread amongst bacteriophage and promote bacteriophage infection by inactivating the bacterial host's CRISPR-Cas defence system. Identifying and characterizing anti-CRISPR proteins opens an avenue to explore and control CRISPR-Cas machineries for the development of new CRISPR-Cas based biotechnological and therapeutic tools. Past studies have identified anti-CRISPRs in several model phage genomes, but a challenge exists to comprehensively screen for anti-CRISPRs accurately and efficiently from genome and metagenome sequence data. Here, we have developed an ensemble learning based predictor, PaCRISPR, to accurately identify anti-CRISPRs from protein datasets derived from genome and metagenome sequencing projects. PaCRISPR employs different types of feature recognition united within an ensemble framework. Extensive cross-validation and independent tests show that PaCRISPR achieves a significantly more accurate performance compared with homology-based baseline predictors and an existing toolkit. The performance of PaCRISPR was further validated in discovering anti-CRISPRs that were not part of the training for PaCRISPR, but which were recently demonstrated to function as anti-CRISPRs for phage infections. Data visualization on anti-CRISPR relationships, highlighting sequence similarity and phylogenetic considerations, is part of the output from the PaCRISPR toolkit, which is freely available at http://pacrispr.erc.monash.edu/.",2020-07-01 +32319523,miRViz: a novel webserver application to visualize and interpret microRNA datasets.,"MicroRNAs (miRNAs) are small non-coding RNAs that are involved in the regulation of major pathways in eukaryotic cells through their binding to and repression of multiple mRNAs. With high-throughput methodologies, various outcomes can be measured that produce long lists of miRNAs that are often difficult to interpret. A common question is: after differential expression or phenotypic screening of miRNA mimics, which miRNA should be chosen for further investigation? Here, we present miRViz (http://mirviz.prabi.fr/), a webserver application designed to visualize and interpret large miRNA datasets, with no need for programming skills. MiRViz has two main goals: (i) to help biologists to raise data-driven hypotheses and (ii) to share miRNA datasets in a straightforward way through publishable quality data representation, with emphasis on relevant groups of miRNAs. MiRViz can currently handle datasets from 11 eukaryotic species. We present real-case applications of miRViz, and provide both datasets and procedures to reproduce the corresponding figures. MiRViz offers rapid identification of miRNA families, as demonstrated here for the miRNA-320 family, which is significantly exported in exosomes of colon cancer cells. We also visually highlight a group of miRNAs associated with pluripotency that is particularly active in control of a breast cancer stem-cell population in culture.",2020-07-01 +32614400,iPromoter-BnCNN: a novel branched CNN-based predictor for identifying and classifying sigma promoters.,"

Motivation

Promoter is a short region of DNA which is responsible for initiating transcription of specific genes. Development of computational tools for automatic identification of promoters is in high demand. According to the difference of functions, promoters can be of different types. Promoters may have both intra- and interclass variation and similarity in terms of consensus sequences. Accurate classification of various types of sigma promoters still remains a challenge.

Results

We present iPromoter-BnCNN for identification and accurate classification of six types of promoters-σ24,σ28,σ32,σ38,σ54,σ70. It is a CNN-based classifier which combines local features related to monomer nucleotide sequence, trimer nucleotide sequence, dimer structural properties and trimer structural properties through the use of parallel branching. We conducted experiments on a benchmark dataset and compared with six state-of-the-art tools to show our supremacy on 5-fold cross-validation. Moreover, we tested our classifier on an independent test dataset.

Availability and implementation

Our proposed tool iPromoter-BnCNN web server is freely available at http://103.109.52.8/iPromoter-BnCNN. The runnable source code can be found https://colab.research.google.com/drive/1yWWh7BXhsm8U4PODgPqlQRy23QGjF2DZ.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +30959445,"Not-so-supervised: A survey of semi-supervised, multi-instance, and transfer learning in medical image analysis.","Machine learning (ML) algorithms have made a tremendous impact in the field of medical imaging. While medical imaging datasets have been growing in size, a challenge for supervised ML algorithms that is frequently mentioned is the lack of annotated data. As a result, various methods that can learn with less/other types of supervision, have been proposed. We give an overview of semi-supervised, multiple instance, and transfer learning in medical imaging, both in diagnosis or segmentation tasks. We also discuss connections between these learning scenarios, and opportunities for future research. A dataset with the details of the surveyed papers is available via https://figshare.com/articles/Database_of_surveyed_literature_in_Not-so-supervised_a_survey_of_semi-supervised_multi-instance_and_transfer_learning_in_medical_image_analysis_/7479416.",2019-03-29 +25228099,DruGeVar: an online resource triangulating drugs with genes and genomic biomarkers for clinical pharmacogenomics.,"

Background/aims

Pharmacogenomics aims to rationalize drug use by minimizing drug toxicity and/or by increasing drug efficacy. A large number of genomic markers have been correlated with variable drug responses and severity of adverse drug reactions. Although a number of these drugs bear pharmacogenomic information in their labels--approved by regulatory agencies--and comprehensive drug/gene lists exist online, information related to the respective pharmacogenomic biomarkers is currently missing from such lists.

Methods

We extracted information from the published literature and online resources and developed DruGeVar (http://drugevar.genomicmedicinealliance.org), an online resource triangulating drugs with genes and pharmacogenomic biomarkers in an effort to build a comprehensive database that could serve clinical pharmacogenomics.

Results and conclusions

A user-friendly data querying and visualization interface allows users to formulate simple and complex queries. Such a database would be readily applicable as a stand-alone resource or a plug-in module for other databases.",2014-09-09 +26519468,ccmGDB: a database for cancer cell metabolism genes.,"Accumulating evidence has demonstrated that rewiring of metabolism in cells is an important hallmark of cancer. The percentage of patients killed by metabolic disorder has been estimated to be 30% of the advanced-stage cancer patients. Thus, a systematic annotation of cancer cell metabolism genes is imperative. Here, we present ccmGDB (Cancer Cell Metabolism Gene DataBase), a comprehensive annotation database for cell metabolism genes in cancer, available at http://bioinfo.mc.vanderbilt.edu/ccmGDB. We assembled, curated, and integrated genetic, genomic, transcriptomic, proteomic, biological network and functional information for over 2000 cell metabolism genes in more than 30 cancer types. In total, we integrated over 260 000 somatic alterations including non-synonymous mutations, copy number variants and structural variants. We also integrated RNA-Seq data in various primary tumors, gene expression microarray data in over 1000 cancer cell lines and protein expression data. Furthermore, we constructed cancer or tissue type-specific, gene co-expression based protein interaction networks and drug-target interaction networks. Using these systematic annotations, the ccmGDB portal site provides 6 categories: gene summary, phenotypic information, somatic mutations, gene and protein expression, gene co-expression network and drug pharmacological information with a user-friendly interface for browsing and searching. ccmGDB is developed and maintained as a useful resource for the cancer research community.",2015-10-30 +26450948,CRCDA--Comprehensive resources for cancer NGS data analysis. ,"Next generation sequencing (NGS) innovations put a compelling landmark in life science and changed the direction of research in clinical oncology with its productivity to diagnose and treat cancer. The aim of our portal comprehensive resources for cancer NGS data analysis (CRCDA) is to provide a collection of different NGS tools and pipelines under diverse classes with cancer pathways and databases and furthermore, literature information from PubMed. The literature data was constrained to 18 most common cancer types such as breast cancer, colon cancer and other cancers that exhibit in worldwide population. NGS-cancer tools for the convenience have been categorized into cancer genomics, cancer transcriptomics, cancer epigenomics, quality control and visualization. Pipelines for variant detection, quality control and data analysis were listed to provide out-of-the box solution for NGS data analysis, which may help researchers to overcome challenges in selecting and configuring individual tools for analysing exome, whole genome and transcriptome data. An extensive search page was developed that can be queried by using (i) type of data [literature, gene data and sequence read archive (SRA) data] and (ii) type of cancer (selected based on global incidence and accessibility of data). For each category of analysis, variety of tools are available and the biggest challenge is in searching and using the right tool for the right application. The objective of the work is collecting tools in each category available at various places and arranging the tools and other data in a simple and user-friendly manner for biologists and oncologists to find information easier. To the best of our knowledge, we have collected and presented a comprehensive package of most of the resources available in cancer for NGS data analysis. Given these factors, we believe that this website will be an useful resource to the NGS research community working on cancer. Database URL: http://bioinfo.au-kbc.org.in/ngs/ngshome.html.",2015-10-08 +31950976,Curation and annotation of planarian gene expression patterns with segmented reference morphologies.,"MOTIVATION:Morphological and genetic spatial data from functional experiments based on genetic, surgical and pharmacological perturbations are being produced at an extraordinary pace in developmental and regenerative biology. However, our ability to extract knowledge from these large datasets are hindered due to the lack of formalization methods and tools able to unambiguously describe, centralize and interpret them. Formalizing spatial phenotypes and gene expression patterns is especially challenging in organisms with highly variable morphologies such as planarian worms, which due to their extraordinary regenerative capability can experimentally result in phenotypes with almost any combination of body regions or parts. RESULTS:Here, we present a computational methodology and mathematical formalism to encode and curate the morphological outcomes and gene expression patterns in planaria. Worm morphologies are encoded with mathematical graphs based on anatomical ontology terms to automatically generate reference morphologies. Gene expression patterns are registered to these standard reference morphologies, which can then be annotated automatically with anatomical ontology terms by analyzing the spatial expression patterns and their textual descriptions. This methodology enables the curation and annotation of complex experimental morphologies together with their gene expression patterns in a centralized standardized dataset, paving the way for the extraction of knowledge and reverse-engineering of the much sought-after mechanistic models in planaria and other regenerative organisms. AVAILABILITY AND IMPLEMENTATION:We implemented this methodology in a user-friendly graphical software tool, PlanGexQ, freely available together with the data in the manuscript at https://lobolab.umbc.edu/plangexq. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +30265627,VIPERdb: A Tool for Virus Research.,"The VIrus Particle ExploreR database (VIPERdb) ( http://viperdb.scripps.edu ) is a database and web portal for primarily icosahedral virus capsid structures that integrates structure-derived information with visualization and analysis tools accessed through a set of web interfaces. Our aim in developing VIPERdb is to provide comprehensive structure-derived information on viruses comprising simple to detailed attributes such as size (diameter), architecture ( T number), genome type, taxonomy, intersubunit association energies, and surface-accessible residues. In addition, a number of web-based tools are provided to enable users to interact with the structures and compare and contrast structure-derived properties between different viruses. Recently, we have constructed a series of data visualizations using modern JavaScript charting libraries such as Google Charts that allow users to explore trends and gain insights based on the various data available in the database. Furthermore, we now include helical viruses and nonicosahedral capsids by implementing modified procedures for data curation and analysis. This article provides an up-to-date overview of VIPERdb, describing various data and tools that are currently available and how to use them to facilitate structure-based bioinformatics analysis of virus capsids.",2018-09-01 +33174233,The CAR group of Ig cell adhesion proteins-Regulators of gap junctions?,"Members of the CAR group of Ig-like type I transmembrane proteins mediate homotypic cell adhesion, share a common overall extracellular domain structure and are closely related at the amino acid sequence level. CAR proteins are often found at tight junctions and interact with intracellular scaffolding proteins, suggesting that they might modulate tight junction assembly or function. However, impairment of tight junction integrity has not been reported in mouse knockout models or zebrafish mutants of CAR members. In contrast, in the same knockout models deficits in gap junction communication were detected in several organ systems, including the atrioventricular node of the heart, smooth muscle cells of the intestine and the ureter and in Sertoli cells of the testes. Possible interactions between BT-IgSF and connexin41.8 on the disturbed pattern of pigment stripes found in zebrafish mutants and between ESAM and connexin43 during hematopoiesis in the mouse are also discussed. On the basis of the combined data and phenotypic similarities between CAR member mutants and connexin mutants I hypothesize that they primarily play a role in the organization of gap junction communication. Also see the video abstract here: https://youtu.be/i0yq2KhuDAE.",2020-11-10 +32026395,Patients with Asthma Prescribed Once-Daily Fluticasone Furoate/Vilanterol or Twice-Daily Fluticasone Propionate/Salmeterol as Maintenance Treatment: Analysis from a Claims Database.,"INTRODUCTION:There is a paucity of data describing prescribing patterns and adherence to therapy of inhaled corticosteroids (ICS) in combination with long-acting β2-agonists (LABA) in the Japanese population in clinical practice. METHODS:This was a non-interventional, retrospective, cohort study of patients who were prescribed medication for asthma, using data from the Japan Medical Data Center Claims Database. Data from patients aged ≥ 15 years with a prescription of asthma drugs between December 2014 and October 2015 (Day 0, the index date when asthma medication was initiated) were analysed in 12-month pre-index and post-index periods. Part 1 focused on baseline characteristics and epidemiological outcomes in the pre- and post-index period in the overall asthma population, whereas comparing medication adherence [number of prescribed days per year and proportion of days covered (PDC)] between ICS/LABA-naïve patients treated with once-daily fluticasone furoate/vilanterol (FF/VI) and twice-daily fluticasone propionate/salmeterol (FP/SAL) was the primary endpoint in Part 2. RESULTS:Of the available patient data (N = 2,953,652), 28,699 patients were identified as having asthma. ICS/LABA was the main asthma treatment prescribed; 11,167 (38.9%) patients were continuous ICS/LABA users. In ICS/LABA-naïve asthma patients, treatment with once-daily FF/VI was associated with higher medication adherence compared with twice-daily FP/SAL; mean [standard deviation (SD)] number of prescribed days per year was 97.8 (115.9) for FF/VI versus 80.5 (92.7) for FP/SAL (p = 0.04), mean (SD) PDC was 26.7% (31.5) for FF/VI versus 21.9% (24.8) for FP/SAL (p = 0.04). FF/VI was also associated with a lower rate of treatment discontinuation and no difference in use of short-acting beta2-agonists or oral corticosteroids compared with FP/SAL. CONCLUSIONS:ICS/LABA was the major prescribed asthma treatment in Japan. Medication adherence was greater with FF/VI, which may indicate that patients are more likely to adhere to once-daily FF/VI versus twice-daily FP/SAL. FUNDING:This study was funded by GSK (study sponsor). STUDY REGISTRATION:GSK Study No. 207264, GSK Study Register site: https://www.gsk-clinicalstudyregister.com/search/?search_terms=207264 .",2018-10-30 +33253170,Deconvolution of heterogeneous tumor samples using partial reference signals.,"Deconvolution of heterogeneous bulk tumor samples into distinct cellular populations is an important yet challenging problem, particularly when only partial references are available. A common approach to dealing with this problem is to deconvolve the mixed signals using available references and leverage the remaining signal as a new cell component. However, as indicated in our simulation, such an approach tends to over-estimate the proportions of known cell types and fails to detect novel cell types. Here, we propose PREDE, a partial reference-based deconvolution method using an iterative non-negative matrix factorization algorithm. Our method is verified to be effective in estimating cell proportions and expression profiles of unknown cell types based on simulated datasets at a variety of parameter settings. Applying our method to TCGA tumor samples, we found that proportions of pure cancer cells better indicate different subtypes of tumor samples. We also detected several cell types for each cancer type whose proportions successfully predicted patient survival. Our method makes a significant contribution to deconvolution of heterogeneous tumor samples and could be widely applied to varieties of high throughput bulk data. PREDE is implemented in R and is freely available from GitHub (https://xiaoqizheng.github.io/PREDE).",2020-11-30 +,AB032. Opportunities for real-life respiratory research in Korea: the HIRA database and beyond,"

Background

To date, most real-life respiratory research has been conducted using databases from Europe and North America. However, over half of the World population suffering from asthma or chronic obstructive pulmonary disease (COPD) are living outside these areas, limiting generalizability of previous study results. Moreover, differences in population characteristics, health care systems and treatment availability may highly impact treatment effectiveness and drug safety. In Asia, the Korean Health Insurance Review & Assessment (HIRA) database may offer interesting opportunities. To provide an overview of the opportunities for real-life respiratory research in Korea, with a focus on the HIRA database.

Methods

First, the contents and procedures of the HIRA database (http://www.hira.or.kr) were assessed and characterized. Subsequently, its strengths and limitations were assessed. The database was compared with some other leading databases in Korea, Asia and beyond. Lastly, some examples of its application in respiratory research projects were explored, as well as the identification of potential future research opportunities.

Results

The HIRA database covers the complete medical healthcare utilization data of the entire population of South Korea, that is, 50 million people, over the period 2008–2015. It provides a unique and unbiased overview of healthcare utilization and cost data (including almost all primary care, pharmacy, hospital data) on a national level. The HIRA database has been extensively described (Kim, Kim, Kim Epi Health 2014) and has been used in several previous studies including specific studies in the respiratory field, discussed elsewhere. It uses ICD-10 coding, but has no patient-reported health outcomes (such as CAT, CCQ, mMRC), lung function data or smoking status. So far, multiple reports using HIRA data have been published in high-impact respiratory journals on asthma, COPD and asthma-COPD overlap syndrome (ACOS), and more study protocols are currently in development. Notably, REG/OPRI is planning a HIRA study that will look at overall asthma treatment patterns and asthma control in real-life. Another Korean database of interest included the database of the Ajou Medical Center, a smaller, but more clinically orientated database.

Conclusions

Korean databases, such as HIRA, offer interesting opportunities worth exploring for future real-life respiratory research in Asia.",2016-07-01 +32347714,MAPPS: A Web-Based Tool for Metabolic Pathway Prediction and Network Analysis in the Postgenomic Era.,"Comparative and evolutionary analyses of metabolic networks have a wide range of applications, ranging from research into metabolic evolution through to practical applications in drug development, synthetic biology, and biodegradation. We present MAPPS: Metabolic network Analysis and Pathway Prediction Server (https://mapps.lums.edu.pk), a web-based tool to study functions and evolution of metabolic networks using traditional and 'omics data sets. MAPPS provides diverse functionalities including an interactive interface, graphical visualization of results, pathway prediction and network comparison, identification of potential drug targets, in silico metabolic engineering, host-microbe interactions, and ancestral network building. Importantly, MAPPS also allows users to upload custom data, thus enabling metabolic analyses on draft and custom genomes, and has an 'omics pipeline to filter pathway results, making it relevant in today's postgenomic era.",2020-04-29 +30266409,"PlaD: A Transcriptomics Database for Plant Defense Responses to Pathogens, Providing New Insights into Plant Immune System.","High-throughput transcriptomics technologies have been widely used to study plant transcriptional reprogramming during the process of plant defense responses, and a large quantity of gene expression data have been accumulated in public repositories. However, utilization of these data is often hampered by the lack of standard metadata annotation. In this study, we curated 2444 public pathogenesis-related gene expression samples from the model plant Arabidopsis and three major crops (maize, rice, and wheat). We organized the data into a user-friendly database termed as PlaD. Currently, PlaD contains three key features. First, it provides large-scale curated data related to plant defense responses, including gene expression and gene functional annotation data. Second, it provides the visualization of condition-specific expression profiles. Third, it allows users to search co-regulated genes under the infections of various pathogens. Using PlaD, we conducted a large-scale transcriptome analysis to explore the global landscape of gene expression in the curated data. We found that only a small fraction of genes were differentially expressed under multiple conditions, which might be explained by their tendency of having more network connections and shorter network distances in gene networks. Collectively, we hope that PlaD can serve as an important and comprehensive knowledgebase to the community of plant sciences, providing insightful clues to better understand the molecular mechanisms underlying plant immune responses. PlaD is freely available at http://systbio.cau.edu.cn/plad/index.php or http://zzdlab.com/plad/index.php.",2018-08-01 +33330622,ANCA: A Web Server for Amino Acid Networks Construction and Analysis.,"Amino acid network (AAN) models empower us to gain insights into protein structures and functions by describing a protein 3D structure as a graph, where nodes represent residues and edges as amino acid interactions. Here, we present the ANCA, an interactive Web server for Amino Acids Network Construction and Analysis based on a single structure or a set of structures from the Protein Data Bank. The main purpose of ANCA is to provide a portal for three types of an environment-dependent residue contact energy (ERCE)-based network model, including amino acid contact energy network (AACEN), node-weighted amino acid contact energy network (NACEN), and edge-weighted amino acid contact energy network (EACEN). For comparison, the C-alpha distance-based network model is also included, which can be extended to protein-DNA/RNA complexes. Then, the analyses of different types of AANs were performed and compared from node, edge, and network levels. The network and corresponding structure can be visualized directly in the browser. The ANCA enables researchers to investigate diverse concerns in the framework of AAN, such as the interpretation of allosteric regulation and functional residues. The ANCA portal, together with an extensive help, is available at http://sysbio.suda.edu.cn/anca/.",2020-11-19 +28943872,In Silico Screening of the Human Gut Metaproteome Identifies Th17-Promoting Peptides Encrypted in Proteins of Commensal Bacteria.,"Scientific studies focused on the role of the human microbiome over human health have generated billions of gigabits of genetic information during the last decade. Nowadays integration of all this information in public databases and development of pipelines allowing us to biotechnologically exploit this information are urgently needed. Prediction of the potential bioactivity of the products encoded by the human gut microbiome, or metaproteome, is the first step for identifying proteins responsible for the molecular interaction between microorganisms and the immune system. We have recently published the Mechanism of Action of the Human Microbiome (MAHMI) database (http://www.mahmi.org), conceived as a resource compiling peptide sequences with a potential immunomodulatory activity. Fifteen out of the 300 hundred million peptides contained in the MAHMI database were synthesized. These peptides were identified as being encrypted in proteins produced by gut microbiota members, they do not contain cleavage points for the major intestinal endoproteases and displayed high probability to have immunomodulatory bioactivity. The bacterial peptides FR-16 and LR-17 encrypted in proteins from Bifidobacterium longum DJ010A and Bifidobacterium fragilis YCH46 respectively, showed the higher immune modulation capability over human peripheral blood mononuclear cells. Both peptides modulated the immune response toward increases in the Th17 and decreases in the Th1 cell response, together with an induction of IL-22 production. These results strongly suggest the combined use of bioinformatics and in vitro tools as a first stage in the screening of bioactive peptides encrypted in the human gut metaproteome.",2017-09-08 +32761559,The top 100 most-cited articles citing human brain banking from 1970 to 2020: a bibliometric analysis.,"Many articles involving human brain banks have been published. Bibliometric analysis can determine the history of the development of research and future research trends in a specific field. Three independent researchers retrieved and reviewed articles from the Web of Science database using the following strategy: ""TS = (((brain OR cerebral) AND (bank* OR biobank*)) OR brainbank*)."" The top 100 most-cited articles were identified and listed in descending order by total citations. Web of Science was used to identify ten recent articles describing bank construction. GeenMedical ( https://www.geenmedical.com/ ) was used to identify ten recent articles from journals with an impact factor (IF) > 20. The top 100 most-cited articles citing human brain banks were published between 1991 and 2017. Fifty-two percent of the articles focused on a specific type of neurodegenerative disease, and 16% discussed the construction and development of human brain banks. Articles using brain tissue had more total and annual citations than those on bank construction. Ten articles with high IFs were published from 2017 to 2019, and they were primarily studies using novel research techniques such RNA sequencing and genome-wide association studies. Most studies were published in journals specializing in neurology or neuroscience such as Movement Disorders (10%), and had been conducted in the United States (52%) by neurologists (62%). The top 100 most-cited articles and recent publications citing human brain banks and their bibliometric characteristics were identified and analyzed, which may serve as a useful reference and pave the way for further research.",2020-08-06 +32762322,First Report of canker and branch dieback of sweet cherry trees caused by Calosphaeria pulchella in Chile. ,"In Chile, the 2019-2020 sweet cherry season yielded 228,548 t, produced on 38,392 hectares and an average annual crop value about US$1.6 billion (http://www.iqonsulting.com/yb/). Between autumn 2019 and summer of 2020, branch and limbs dieback symptoms were observed in two 12-year-old sweet cherry (Prunus avium L.) orchards located in the O'Higgins region (Chile Central Valley). Furthermore, other symptoms such as wilting leaves, cankers, bark cracking, emission of gum exudates and internal wood necrosis were detected on trees of ""Bing"", ""Santina"" and ""Sweetheart"" cultivars (Cainelli et al. 2017). Wood fragments from symptomatic branches were surface sterilized with 95% ethanol, flaming and placed onto potato dextrose agar (PDA) amended with 0.5 g liter-1 of streptomycin sulfate (Berbegal et al. 2014). After 7 days of incubation at 25°C, pink to red colonies with white margins were isolated. Each isolate was characterized by having hyaline and oblong-ellipsoidal conidia of 5.76 ± 0.88 × 1.76 ± 0.36 μm (n=100) (Trouillas et al. 2012). According to these morphological features, the fungus was identified as Calosphaeria pulchella (Pers.: Fr.) J. Schröt (anamorph Calosphaeriosphora pulchella Réblová,L. Mostert, W. Gams & Crous) (Réblová et al. 2004). ITS (Internal Transcribed Spacer region of the rDNA) sequence comparison using BLAST analysis revealed a 99.48% identity and 100% query coverage between C. pulchella sequence HM237297 and the Chilean isolates. Moreover, the Chilean isolates were confirmed by means of phylogenetic analysis using ITS sequences of C. pulchella available in GenBank database. The maximum-parsimony phylogenetic tree supported the cluster analysis of the Chilean C. pulchella isolates with those obtained in other regions of the world with a bootstrap value of 95% (Berbegal et al. 2014; Trouillas et al. 2012). The Chilean ITS sequences were deposited into GenBank (MT378444 to MT378447). Two-year-old sweet cherry trees cv. Bing were inoculated with the Chilean isolates. Six trees were used as replicates. To accomplish this goal, two punctures of 5mm diameter were made in two branches per tree with a cork borer and a plug of mycelium from 7-day-old colonies was laid on the wound mycelium side down. Six trees were inoculated with sterile agar plugs. Every puncture was sealed with petroleum jelly and wrapped with parafilm. Four months after inoculation, the vascular streaking developing from the inoculated wounds was measured. The average lesion lengths on inoculated and non-inoculated shoots were 43.79 and 21.79 mm, respectively, which were significantly different according LSD Fisher test (p<0.05). C. pulchella was recovered from all the inoculated branches. No fungus was isolated from the controls, confirming Koch's postulates (Trouillas et al. 2012). To our knowledge this is the first report of C. pulchella causing canker and branch dieback in sweet cherry trees in Chile. This new disease represents a serious threat to the Chilean cherry industry, and further research on disease control is needed.",2020-08-06 +32348158,"""Tell Me About Your Child"": A Grounded Theory Study of Mothers' Understanding of Language Disorder.","Purpose The purpose of this study was to generate a theory grounded in data explaining caregivers' understanding of their child's language disorder and the perceived role of speech-language pathologists in facilitating this knowledge. Method This study employed grounded theory as a conceptual framework. Qualitative data were generated based on semistructured interviews conducted with 12 mothers of children who had received speech-language pathology services. Results The following themes emerged from the data analysis: (a) Many mothers reported receiving confusing or irrelevant diagnostic terms for language disorder, (b) mothers of children with language disorders were distressed about their children's language problems, (c) mothers did not always trust or understand their children's speech-language pathologist, and (d) mothers were satisfied with the interventions their child had been receiving. Mothers described their children's language disorder using a total of 23 labels, most of which were not useful for accessing meaningful information about the nature of their child's communication problem. Generally, mothers reported they did not receive language-related diagnostic labels from speech-language pathologists for their child's language disorder. Conclusions Two theories were generated from the results: (a) Lack of information provided to mothers about their child's language disorder causes mothers psychological harm that appears to be long lasting. (b) Difficulties in successfully relaying information about language disorders to parents result in negative perceptions of speech-language pathology. Implications and future directions are discussed. Supplemental Material https://doi.org/10.23641/asha.12177390.",2020-04-29 +26708988,HitPredict version 4: comprehensive reliability scoring of physical protein-protein interactions from more than 100 species. ,"HitPredict is a consolidated resource of experimentally identified, physical protein-protein interactions with confidence scores to indicate their reliability. The study of genes and their inter-relationships using methods such as network and pathway analysis requires high quality protein-protein interaction information. Extracting reliable interactions from most of the existing databases is challenging because they either contain only a subset of the available interactions, or a mixture of physical, genetic and predicted interactions. Automated integration of interactions is further complicated by varying levels of accuracy of database content and lack of adherence to standard formats. To address these issues, the latest version of HitPredict provides a manually curated dataset of 398 696 physical associations between 70 808 proteins from 105 species. Manual confirmation was used to resolve all issues encountered during data integration. For improved reliability assessment, this version combines a new score derived from the experimental information of the interactions with the original score based on the features of the interacting proteins. The combined interaction score performs better than either of the individual scores in HitPredict as well as the reliability score of another similar database. HitPredict provides a web interface to search proteins and visualize their interactions, and the data can be downloaded for offline analysis. Data usability has been enhanced by mapping protein identifiers across multiple reference databases. Thus, the latest version of HitPredict provides a significantly larger, more reliable and usable dataset of protein-protein interactions from several species for the study of gene groups. Database URL: http://hintdb.hgc.jp/htp.",2015-12-26 +25161253,Extracting patterns of database and software usage from the bioinformatics literature.,"

Motivation

As a natural consequence of being a computer-based discipline, bioinformatics has a strong focus on database and software development, but the volume and variety of resources are growing at unprecedented rates. An audit of database and software usage patterns could help provide an overview of developments in bioinformatics and community common practice, and comparing the links between resources through time could demonstrate both the persistence of existing software and the emergence of new tools.

Results

We study the connections between bioinformatics resources and construct networks of database and software usage patterns, based on resource co-occurrence, that correspond to snapshots of common practice in the bioinformatics community. We apply our approach to pairings of phylogenetics software reported in the literature and argue that these could provide a stepping stone into the identification of scientific best practice.

Availability and implementation

The extracted resource data, the scripts used for network generation and the resulting networks are available at http://bionerds.sourceforge.net/networks/.",2014-09-01 +26523488,An action to an object does not improve its episodic encoding but removes distraction.,"There is some debate as to whether responding to objects in our environment improves episodic memory or does not impact it. Some authors claim that actively encoding objects improves their representation in episodic memory. Conversely, episodic memory has also been shown to improve in passive conditions, suggesting that the action itself could interfere with the encoding process. This study looks at the impact of attention and action on episodic memory using a novel what-where-when (WWW) task that includes information about object identity (what) and spatial (where) and temporal (when) properties. With this approach, we studied the episodic memory of 2 types of objects: a target, where attention or an action is defined, and a distractor, an object to be ignored, following 2 selective states: active versus passive selection. When targets were actively selected, we found no evidence of episodic memory enhancement compared to passive selection; instead, memory from irrelevant sources was suppressed. The pattern was replicated across a 2-D static display and a more realistic 3-D virtual environment. This selective attention effect on episodic memory was not observed on nonepisodic measures, demonstrating a link between attention and the encoding of episodic experiences. (PsycINFO Database Record",2015-11-02 +30994381,Linking Bisphenol S to Adverse Outcome Pathways Using a Combined Text Mining and Systems Biology Approach.,"

Background

Available toxicity data can be optimally interpreted if they are integrated using computational approaches such as systems biology modeling. Such approaches are particularly warranted in cases where regulatory decisions have to be made rapidly.

Objectives

The study aims at developing and applying a new integrative computational strategy to identify associations between bisphenol S (BPS), a substitute for bisphenol A (BPA), and components of adverse outcome pathways (AOPs).

Methods

The proposed approach combines a text mining (TM) procedure and integrative systems biology to comprehensively analyze the scientific literature to enrich AOPs related to environmental stressors. First, to identify relevant associations between BPS and different AOP components, a list of abstracts was screened using the developed text-mining tool AOP-helpFinder, which calculates scores based on the graph theory to prioritize the findings. Then, to fill gaps between BPS, biological events, and adverse outcomes (AOs), a systems biology approach was used to integrate information from the AOP-Wiki and ToxCast databases, followed by manual curation of the relevant publications.

Results

Links between BPS and 48 AOP key events (KEs) were identified and scored via 31 references. The main outcomes were related to reproductive health, endocrine disruption, impairments of metabolism, and obesity. We then explicitly analyzed co-mention of the terms BPS and obesity by data integration and manual curation of the full text of the publications. Several molecular and cellular pathways were identified, which allowed the proposal of a biological explanation for the association between BPS and obesity.

Conclusions

By analyzing dispersed information from the literature and databases, our novel approach can identify links between stressors and AOP KEs. The findings associating BPS and obesity illustrate the use of computational tools in predictive toxicology and highlight the relevance of the approach to decision makers assessing substituents to toxic chemicals. https://doi.org/10.1289/EHP4200.",2019-04-01 +33544274,The Characterization of Sex Differences in Hypoglycemia-Induced Activation of HPA Axis on the Transcriptomic Level.,"Activation of the hypothalamic-pituitary-adrenal (HPA) axis using an insulin tolerance test (ITT) is a medical diagnostic procedure that is frequently used in humans to assess the HPA and growth-hormone (GH) axes. Whether sex differences exist in the response to ITT stress is unknown. Thus, investigations into the analysis of transcripts during activation of the HPA axis in response to hypoglycemia have revealed the underlying influences of sex in signaling pathways that stimulate the HPA axis. We assessed four time points of ITT application in Balb/c mice. After insulin injection, expression levels of 192 microRNAs and 41 mRNAs associated with the HPA, GH and hypothalamic-pituitary-gonadal (HPG) axes were determined by real-time RT-PCR in the hypothalamus, pituitary and adrenal tissues, as well as blood samples (Raw data accession: https://drive.google.com/drive/folders/10qI00NAtjxOepcNKxSJnQbJeBFa6zgHK?usp=sharing ). Although the ITT is commonly used as a gold standard for evaluating the HPA axis, we found completely different responses between males and females with respect to activation of the HPA axis. While activation of several transcripts in the hypothalamus and pituitary was observed after performing the ITT in males within 10 min, females responded via the pituitary and adrenal immediately and durably over 40 min. Additionally, we found that microRNA alterations precede mRNA responses in the HPA axis. Furthermore, robust changes in the levels of several transcripts including Avpr1b and Avpr2 observed at all time points strongly suggest that transcriptional control of these genes occurs mostly via differential signaling in pituitary and blood between males and females. Male and female HPA axis responses to ITT involve a number of sophisticated regulatory signaling pathways of miRNAs and mRNAs. Our results highlight the first robust markers in several layers of HPA, HPG and GH axis involved in ITT/hypoglycemia stress-induced dynamics.",2021-02-05 +32691263,"Letter to the Editor on ""Prediction of Knee Kinematics at Time of Noncontact Anterior Cruciate Ligament Injuries Based on Bone Bruises"".","The aim of the present Letter was to comment on the paper ""Prediction of Knee Kinematics at Time of Noncontact Anterior Cruciate Ligament Injuries Based on Bone Bruises"" from Shi et al. (Ann Biomed Eng, 2020, https://doi.org/10.1007/s10439-020-02523-y ). Though the authors provided an extremely interesting paper on a debated topic in Sport Medicine, with a strong methodology and consistent results, caution should be used when drawing conclusions on Anterior Cruciate Ligament injury mechanism through the interpretation of such data.",2020-07-20 +31832668,AcetoBase: a functional gene repository and database for formyltetrahydrofolate synthetase sequences. ,"Acetogenic bacteria are imperative to environmental carbon cycling and diverse biotechnological applications, but their extensive physiological and taxonomical diversity is an impediment to systematic taxonomic studies. Acetogens are chemolithoautotrophic bacteria that perform reductive carbon fixation under anaerobic conditions through the Wood-Ljungdahl pathway (WLP)/acetyl-coenzyme A pathway. The gene-encoding formyltetrahydrofolate synthetase (FTHFS), a key enzyme of this pathway, is highly conserved and can be used as a molecular marker to probe acetogenic communities. However, there is a lack of systematic collection of FTHFS sequence data at nucleotide and protein levels. In an attempt to streamline investigations on acetogens, we developed AcetoBase - a repository and database for systematically collecting and organizing information related to FTHFS sequences. AcetoBase also provides an opportunity to submit data and obtain accession numbers, perform homology searches for sequence identification and access a customized blast database of submitted sequences. AcetoBase provides the prospect to identify potential acetogenic bacteria, based on metadata information related to genome content and the WLP, supplemented with FTHFS sequence accessions, and can be an important tool in the study of acetogenic communities. AcetoBase can be publicly accessed at https://acetobase.molbio.slu.se.",2019-01-01 +32685640,Lipid profile dataset of optogenetics induced optic nerve regeneration.,"The optic nerve transfers visual information from the retina to the brain through the axons of retinal ganglion cells (RGCs). In adult mammals, optic nerve injuries and progressive degenerative diseases lead to the irreversible loss of RGCs, resulting in vision loss and blindness. Optogenetic models have proved useful in manipulating the growth of RGCs through expression and stimulation of channelrhodopsins (Chr2) in RGCs using the RGC-specific thy-1 promoter. Using transgenic Chr2 mouse (Thy1-ChR2-EYFP) as a model of regeneration, we profile the lipid changes which occur after traumatic optic nerve crush, light stimulation and forced RGC axonal growth. Thy1-ChR2-EYFP and control (C57BL/6) mice were divided in four groups each - 1) no crush and no stimulation, 2) no crush with stimulation, 3) crush and without stimulation, and 4) crush with stimulation. After euthanasia, the optic nerves were collected for lipidomic analysis. The Bligh and Dyer method was used for lipid extraction, followed by mass spectrometry lipid profiling with a Q-Exactive Orbitrap Liquid Chromatography-Mass Spectrometer (LC MS-MS). The raw scans were analysed with LipidSearch 4.1.3 and the statistical analysis was conducted through Metaboanalyst 4.0. This data is available at Metabolomics Workbench, study ID ST001381: [https://www.metabolomicsworkbench.org/data/DRCCMetadata.php?Mode=Study&StudyID=ST001381&StudyType=MS&ResultType=5].",2020-07-05 +26656885,Kiwifruit Information Resource (KIR): a comparative platform for kiwifruit genomics. ,"The Kiwifruit Information Resource (KIR) is dedicated to maintain and integrate comprehensive datasets on genomics, functional genomics and transcriptomics of kiwifruit (Actinidiaceae). KIR serves as a central access point for existing/new genomic and genetic data. KIR also provides researchers with a variety of visualization and analysis tools. Current developments include the updated genome structure of Actinidia chinensis cv. Hongyang and its newest genome annotation, putative transcripts, gene expression, physical markers of genetic traits as well as relevant publications based on the latest genome assembly. Nine thousand five hundred and forty-seven new transcripts are detected and 21 132 old transcripts are changed. At the present release, the next-generation transcriptome sequencing data has been incorporated into gene models and splice variants. Protein-protein interactions are also identified based on experimentally determined orthologous interactions. Furthermore, the experimental results reported in peer-reviewed literature are manually extracted and integrated within a well-developed query page. In total, 122 identifications are currently associated, including commonly used gene names and symbols. All KIR datasets are helpful to facilitate a broad range of kiwifruit research topics and freely available to the research community. Database URL: http://bdg.hfut.edu.cn/kir/index.html.",2015-12-09 +32181288,Data of pre-IPO or listing firm characteristics and post-listing stock returns of Chinese listed firms.,"The data set contains the data of variables, such as indicators of forms or host markets of the Chinese listed firms, pre-IPO or listing firm characteristics and post-IPO or listing stock returns. These variables can be used to estimate the probabilities of overseas listing in specific forms or locations through binary probit or multinomial logistic regression, and to evaluate the consequences of overseas listing in some specific forms or locations via the two-factor asset pricing model. Furthermore, these data can be used to estimate the models simultaneously within the potential outcome framework as done in the paper, entitled 'Direct overseas listing versus cross-listing: A multivalued treatment effects analysis of Chinese listed firms'. https://doi.org/10.1016/j.irfa.2019.101391.",2020-01-31 +30407596,Cancer3D 2.0: interactive analysis of 3D patterns of cancer mutations in cancer subsets.,"Our knowledge of cancer genomics exploded in last several years, providing us with detailed knowledge of genetic alterations in almost all cancer types. Analysis of this data gave us new insights into molecular aspects of cancer, most important being the amazing diversity of molecular abnormalities in individual cancers. The most important question in cancer research today is how to classify this diversity to identify subtypes that are most relevant for treatment and outcome prediction for individual patients. The Cancer3D database at http://www.cancer3d.org gives an open and user-friendly way to analyze cancer missense mutations in the context of structures of proteins they are found in and in relation to patients' clinical data. This approach allows users to find novel candidate driver regions for specific subgroups, that often cannot be found when similar analyses are done on the whole gene level and for large, diverse cohorts. Interactive interface allows user to visualize the distribution of mutations in subgroups defined by cancer type and stage, gender and age brackets, patient's ethnicity or vice versa find dominant cancer type, gender or age groups for specific three-dimensional mutation patterns.",2019-01-01 +32750074,RNAAgeCalc: A multi-tissue transcriptional age calculator.,"Biological aging reflects decline in physiological functions and is an effective indicator of morbidity and mortality. Numerous epigenetic age calculators are available, however biological aging calculators based on transcription remain scarce. Here, we introduce RNAAgeCalc, a versatile across-tissue and tissue-specific transcriptional age calculator. By performing a meta-analysis of transcriptional age signature across multi-tissues using the GTEx database, we identify 1,616 common age-related genes, as well as tissue-specific age-related genes. Based on these genes, we develop new across-tissue and tissue-specific age predictors. We show that our transcriptional age calculator outperforms other prior age related gene signatures as indicated by the higher correlation with chronological age as well as lower median and median error. Our results also indicate that both racial and tissue differences are associated with transcriptional age. Furthermore, we demonstrate that the transcriptional age acceleration computed from our within-tissue predictor is significantly correlated with mutation burden, mortality risk and cancer stage in several types of cancer from the TCGA database, and offers complementary information to DNA methylation age. RNAAgeCalc is available at http://www.ams.sunysb.edu/~pfkuan/softwares.html#RNAAgeCalc, both as Bioconductor and Python packages, accompanied by a user-friendly interactive Shiny app.",2020-08-04 +33100475,Panel forecasts of country-level Covid-19 infections.,"We use a dynamic panel data model to generate density forecasts for daily active Covid-19 infections for a panel of countries/regions. Our specification that assumes the growth rate of active infections can be represented by autoregressive fluctuations around a downward sloping deterministic trend function with a break. Our fully Bayesian approach allows us to flexibly estimate the cross-sectional distribution of slopes and then implicitly use this distribution as prior to construct Bayes forecasts for the individual time series. We find some evidence that information from locations with an early outbreak can sharpen forecast accuracy for late locations. There is generally a lot of uncertainty about the evolution of active infection, due to parameter and shock uncertainty, in particular before and around the peak of the infection path. Over a one-week horizon, the empirical coverage frequency of our interval forecasts is close to the nominal credible level. Weekly forecasts from our model are published at https://laurayuliu.com/covid19-panel-forecast/.",2020-10-16 +32131605,Difference in HIV prevalence by testing venue: results from population level survey in Uganda.,"Growing demand for use of Health Facility (HF) HIV testing data, in addition to other testing data to obtain district level HIV prevalence requires understanding the comparability of these various sources. We analysed the 2011 Uganda AIDS indicator survey data to assess: the proportion of people tested for HIV across Uganda by venue of testing; HIV prevalence ratio for those tested in a HF compared to those tested in community setting; [Katz, D., Baptista, J., Azen, S. P., & Pike, M. C. (1978). Obtaining confidence intervals for the risk ratio in cohort studies. International Biometric Society, 34(3), 469-474. https://doi.org/10.2307/2530610] and factors associated with HIV positivity in each subgroup. Of the 11,685 individuals, 8978 (77.1%) had ever tested for HIV in a HF. Fifty nine per cent tested in a HF in the 12 months preceding the survey (female: 5507, 72.7% versus male: 1413, 34.9%). HIV prevalence ratio was 1.8 times among those tested in a HF compared to those tested at community setting (10.9% [95% CI: 10.0-11.7] versus 6.2% [95% CI: 5.4-7.0]). Among HF testers, older age group, previously married and having no sexual partner was associated with significantly higher HIV prevalence. Using facility testing data for planning and decisions should take into consideration the elevated and varying HIV prevalence among individuals accessing HIV testing services at HFs as well as differences in their social-demographic characteristics.",2020-03-04 +32320624,A Survey of Counseling Curricula Among Accredited Communication Sciences and Disorders Graduate Student Programs.,"Purpose The purpose of this article is to examine the current state of counseling curriculum within the discipline. The last systematic survey of counseling curriculum within the disciplines of communication sciences and disorders was completed with data from 1983 (McCarthy et al., 1986). The Council on Academic Accreditation in Audiology and Speech-Language Pathology (2017) states that counseling should be included in accredited programs but does not specify to what extent. Currently, there are no standards to specify number of credits, need for a stand-alone course, or guidance regarding content delivered. Method The present investigation collected data on the status of counseling curricula in accredited communication sciences and disorders graduate programs. A Qualtrics survey was distributed to identify counseling curriculum practices across accredited programs. Quantitative data such as percentages and frequency counts were compiled to summarize program offerings. Qualitative analyses were used to characterize written responses. Survey responses were also cross-validated with a review of offerings listed on program websites. Results Of programs currently accredited by the Council on Academic Accreditation in Audiology and Speech-Language Pathology, 42.4% responded to the current survey. Fifty-nine percent of programs offer a stand-alone course. Review of curricula from program websites indicated that only 40% of accredited programs offer a stand-alone counseling course. Quantitative details about requirements, number of credits, and embedding counseling within other courses were compared to data from the 1983 survey. Qualitative analyses identified common learner outcomes and the nature of course or curricular content. Conclusions Investigators found a lack of consistency in incorporating counseling across programs and discussed implications of this in speech-language pathology practice. A decrease in the number of programs that offer a stand-alone counseling course was identified as compared to offerings in 1983, as well as a disparity regarding how programs provide training in counseling. Furthermore, survey responses differed from curriculum listings on program websites. Information derived from this study may serve as a starting point for the development of flexible standards that provide direction for achieving consistent preparation of counseling skills. Supplemental Material https://doi.org/10.23641/asha.12149703.",2020-04-21 +26980520,"PolyQ 2.0: an improved version of PolyQ, a database of human polyglutamine proteins. ","Proteins with expanded polyglutamine (polyQ) repeats are involved in human neurodegenerative diseases, via a gain-of-function mechanism of neuronal toxicity involving protein conformational changes that result in the formation and deposition of β-sheet-rich aggregates. Aggregation is dependent on the context and properties of the host protein, such as domain context and location of the repeat tract. In order to explore this relationship in greater detail, here we describe PolyQ 2.0, an updated database that provides a comprehensive knowledgebase for human polyQ proteins. Compared with the previous PolyQ database, our new database provides a variety of substantial updates including detailed biological annotations and search options. Biological annotations in terms of domain context information, protein structural and functional annotation, single point mutations, predicted disordered regions, protein-protein interaction partners, metabolic/signaling pathways, post-translational modification sites and evolutionary information are made available. Several new database functionalities have also been provided, including search using multiple/combinatory keywords, and submission of new data entries. Also, several third-party plug-ins are employed to enhance data visualization in PolyQ 2.0. In PolyQ 2.0 the proteins are reclassified into 3 new categories and contain 9 reviewed disease-associated polyQ proteins, 105 reviewed non-disease polyQ proteins and 146 un-reviewed polyQ proteins (reviewed by UniProt curators). We envisage that this updated database will be a useful resource for functional and structural investigation of human polyQ proteins. Database URL: http://lightning.med.monash.edu/polyq2/.",2016-03-15 +29927537,Review of current practice and outcomes following ileoanal pouch surgery: lessons learned from the Ileoanal Pouch Registry and the 2017 Ileoanal Pouch Report.,"

Aim

The second Association of Coloproctology of Great Britain and Ireland (ACPGBI) Ileoanal Pouch Registry (IPR) report was released in July 2017 following a first report in 2012. This article provides a summary of data derived from the most recent IPR report (2017 Ileoanal Pouch Report. https://www.acpgbi.org.uk/content/uploads/2016/07/Ileoanal-Pouch-Report-2017-FINAL.compressed.pdf).

Method

The IPR is an electronic database of voluntarily submitted data including patient demographics, disease, intra-operative and postoperative factors submitted by consultant surgeons or delegates. Data up to 31 March 2017 have been analysed for this report.

Results

A total of 5352 pouch operations were carried out at 76 UK and four European centres by 154 surgeons over four decades. Recorded procedures have increased over time but data submission is voluntary and underestimates actual volume. Significant variation exists in institutional volume; 73 centres entered data on patients undergoing pouch surgery during the past 5 years. Of these, 44 centres have submitted ≤ 10 cases, with 10 centres submitting one patient and nine centres two cases. Since 2013, minimal access surgery has been employed in 54% of cases. Rectal dissection was undertaken in the total mesorectal excision plane in 69%. J-pouch configuration was used in 99% of cases and 90% of pouch-anal anastomoses were performed using a stapled technique. Including all years, the IPR rate of pelvic sepsis was 9.4% and the rate of pouch failure was 4.7%.

Conclusion

The IPR holds the largest voluntary repository of data on ileoanal pouch surgery. The second report from the IPR records marked refinements in surgical technique over time but also highlights wide variation in institutional caseload and outcome across the UK.",2018-08-27 +27789701,Updates in Rhea - an expert curated resource of biochemical reactions.,"Rhea (http://www.rhea-db.org) is a comprehensive and non-redundant resource of expert-curated biochemical reactions designed for the functional annotation of enzymes and the description of metabolic networks. Rhea describes enzyme-catalyzed reactions covering the IUBMB Enzyme Nomenclature list as well as additional reactions, including spontaneously occurring reactions, using entities from the ChEBI (Chemical Entities of Biological Interest) ontology of small molecules. Here we describe developments in Rhea since our last report in the database issue of Nucleic Acids Research. These include the first implementation of a simple hierarchical classification of reactions, improved coverage of the IUBMB Enzyme Nomenclature list and additional reactions through continuing expert curation, and the development of a new website to serve this improved dataset.",2016-10-26 +30908818,RAId: Knowledge-Integrated Proteomics Web Service with Accurate Statistical Significance Assignment.,"Mass spectrometry-based proteomics starts with identifications of peptides and proteins, which provide the bases for forming the next-level hypotheses whose ""validations"" are often employed for forming even higher level hypotheses and so forth. Scientifically meaningful conclusions are thus attainable only if the number of falsely identified peptides/proteins is accurately controlled. For this reason, RAId continued to be developed in the past decade. RAId employs rigorous statistics for peptides/proteins identification, hence assigning accurate P-values/E-values that can be used confidently to control the number of falsely identified peptides and proteins. The RAId web service is a versatile tool built to identify peptides and proteins from tandem mass spectrometry data. Not only recognizing various spectra file formats, the web service also allows four peptide scoring functions and choice of three statistical methods for assigning P-values/E-values to identified peptides. Users may upload their own protein database or use one of the available knowledge integrated organismal databases that contain annotated information such as single amino acid polymorphisms, post-translational modifications, and their disease associations. The web service also provides a friendly interface to display, sort using different criteria, and download the identified peptides and proteins. RAId web service is freely available at https://www.ncbi.nlm.nih.gov/CBBresearch/Yu/raid.",2019-07-01 +30080852,A systematic review of hepatitis B virus (HBV) drug and vaccine escape mutations in Africa: A call for urgent action.,"International sustainable development goals for the elimination of viral hepatitis as a public health problem by 2030 highlight the pressing need to optimize strategies for prevention, diagnosis and treatment. Selected or transmitted resistance associated mutations (RAMs) and vaccine escape mutations (VEMs) in hepatitis B virus (HBV) may reduce the success of existing treatment and prevention strategies. These issues are particularly pertinent for many settings in Africa where there is high HBV prevalence and co-endemic HIV infection, but lack of robust epidemiological data and limited education, diagnostics and clinical care. The prevalence, distribution and impact of RAMs and VEMs in these populations are neglected in the current literature. We therefore set out to assimilate data for sub-Saharan Africa through a systematic literature review and analysis of published sequence data, and present these in an on-line database (https://livedataoxford.shinyapps.io/1510659619-3Xkoe2NKkKJ7Drg/). The majority of the data were from HIV/HBV coinfected cohorts. The commonest RAM was rtM204I/V, either alone or in combination with associated mutations, and identified in both reportedly treatment-naïve and treatment-experienced adults. We also identified the suite of mutations rtM204V/I + rtL180M + rtV173L, that has been associated with vaccine escape, in over 1/3 of cohorts. Although tenofovir has a high genetic barrier to resistance, it is of concern that emerging data suggest polymorphisms that may be associated with resistance, although the precise clinical impact of these is unknown. Overall, there is an urgent need for improved diagnostic screening, enhanced laboratory assessment of HBV before and during therapy, and sustained roll out of tenofovir in preference to lamivudine alone. Further data are needed in order to inform population and individual approaches to HBV diagnosis, monitoring and therapy in these highly vulnerable settings.",2018-08-06 +31329546,On Learning 3D Face Morphable Model from In-the-Wild Images.,"As a classic statistical model of 3D facial shape and albedo, 3D Morphable Model (3DMM) is widely used in facial analysis, e.g., model fitting, image synthesis. Conventional 3DMM is learned from a set of 3D face scans with associated well-controlled 2D face images, and represented by two sets of PCA basis functions. Due to the type and amount of training data, as well as, the linear bases, the representation power of 3DMM can be limited. To address these problems, this paper proposes an innovative framework to learn a nonlinear 3DMM model from a large set of in-the-wild face images, without collecting 3D face scans. Specifically, given a face image as input, a network encoder estimates the projection, lighting, shape and albedo parameters. Two decoders serve as the nonlinear 3DMM to map from the shape and albedo parameters to the 3D shape and albedo, respectively. With the projection parameter, lighting, 3D shape, and albedo, a novel analytically-differentiable rendering layer is designed to reconstruct the original input face. The entire network is end-to-end trainable with only weak supervision. We demonstrate the superior representation power of our nonlinear 3DMM over its linear counterpart, and its contribution to face alignment, 3D reconstruction, and face editing. Source code and additional results can be found at our project page: http://cvlab.cse.msu.edu/project-nonlinear-3dmm.html.",2020-12-04 +33533981,Association of systemic lupus erythematosus with hearing loss: a systemic review and meta-analysis.,"Systemic lupus erythematosus (SLE) is a systemic autoimmune disease that can affect virtually any organ, including middle and/or inner ear. The objective of the current systematic review and meta-analysis was to investigate the association of SLE with the different subtypes of hearing loss. This systematic review and meta-analysis was conducted in agreement with the PRISMA guidelines. The review protocol was registered in the PROSPERO international prospective register of systematic reviews ( https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=216353 ). A random effects model meta-analysis was carried out while heterogeneity was appraised by I2. Subgroup analysis and sensitivity analysis were also performed. Nine studies comprising 7,654 SLE patients and 37,244 controls were included in this systematic review. Four of them were rated to a moderate rate of bias, while five of them were rated to a low rate of bias. SLE patients had significantly increased odds of sensorineural hearing loss (SNHL) compared with controls (OR 2.31; 95%CI 1.48-3.60; I2 = 0). SLE patients did not have significantly increased odds of Conductive Hearing Loss (CHL) (OR 1.30; 95% CI 0.23-7.45; I2 = 0). Only one study reported on the outcome of Mixed Hearing Loss (MHL) (3 events in SLE group vs. 0 events in control group). Subgroup analysis, based on study design and detection method of hearing loss also showed significantly increased odds of SNHL in SLE patients. The significantly increased odds of SNHL in SLE persisted even after sensitivity analysis. In conclusion, SLE is significantly associated with SNHL; SLE is not associated with CHL, while, due to lack of data, we could not reach a conclusion regarding the odds of MHL in SLE patients. Pure tone audiometry as a screening test and follow-up test in SLE patients could be of essence. Management and prognosis of hearing loss in SLE patients should be discussed.",2021-02-03 +32425651,Current scenario of COVID-19 in pediatric age group and physiology of immune and thymus response.,"COVID-19 pandemic caused by SARS-CoV-2, continues to manifest with severe acute respiratory syndrome among the adults, however, it offers a convincing indication of less severity and fatality in pediatric age group (0-18 years). The current trend suggests that children may get infected but are less symptomatic with less fatality, which is concordant to earlier epidemic outbreaks of SARS-CoV and MERS-CoV, in 2002 and 2012, respectively. According to the available data, children appear to be at lower risk for COVID-19, as adults constitute for maximum number of the confirmed cases (308,592) and deaths (13,069) as on 22nd March (https://www.worldometers.info/coronavirus). However, rapid publications and information of the adult patients with COVID-19 is in progress and published, on the contrary, almost no comprehensive data or discussion about the COVID-19 in children is available. Therefore, in this review, we outline the epidemiology, clinical symptoms, diagnosis, treatment, prevention, possible immune response and role of thymus in children to combat the COVID-19 outbreak.",2020-05-15 +32406916,NetMHCpan-4.1 and NetMHCIIpan-4.0: improved predictions of MHC antigen presentation by concurrent motif deconvolution and integration of MS MHC eluted ligand data.,"Major histocompatibility complex (MHC) molecules are expressed on the cell surface, where they present peptides to T cells, which gives them a key role in the development of T-cell immune responses. MHC molecules come in two main variants: MHC Class I (MHC-I) and MHC Class II (MHC-II). MHC-I predominantly present peptides derived from intracellular proteins, whereas MHC-II predominantly presents peptides from extracellular proteins. In both cases, the binding between MHC and antigenic peptides is the most selective step in the antigen presentation pathway. Therefore, the prediction of peptide binding to MHC is a powerful utility to predict the possible specificity of a T-cell immune response. Commonly MHC binding prediction tools are trained on binding affinity or mass spectrometry-eluted ligands. Recent studies have however demonstrated how the integration of both data types can boost predictive performances. Inspired by this, we here present NetMHCpan-4.1 and NetMHCIIpan-4.0, two web servers created to predict binding between peptides and MHC-I and MHC-II, respectively. Both methods exploit tailored machine learning strategies to integrate different training data types, resulting in state-of-the-art performance and outperforming their competitors. The servers are available at http://www.cbs.dtu.dk/services/NetMHCpan-4.1/ and http://www.cbs.dtu.dk/services/NetMHCIIpan-4.0/.",2020-07-01 +32449934,"ASAP 2020 update: an open, scalable and interactive web-based portal for (single-cell) omics analyses.","Single-cell omics enables researchers to dissect biological systems at a resolution that was unthinkable just 10 years ago. However, this analytical revolution also triggered new demands in 'big data' management, forcing researchers to stay up to speed with increasingly complex analytical processes and rapidly evolving methods. To render these processes and approaches more accessible, we developed the web-based, collaborative portal ASAP (Automated Single-cell Analysis Portal). Our primary goal is thereby to democratize single-cell omics data analyses (scRNA-seq and more recently scATAC-seq). By taking advantage of a Docker system to enhance reproducibility, and novel bioinformatics approaches that were recently developed for improving scalability, ASAP meets challenging requirements set by recent cell atlasing efforts such as the Human (HCA) and Fly (FCA) Cell Atlas Projects. Specifically, ASAP can now handle datasets containing millions of cells, integrating intuitive tools that allow researchers to collaborate on the same project synchronously. ASAP tools are versioned, and researchers can create unique access IDs for storing complete analyses that can be reproduced or completed by others. Finally, ASAP does not require any installation and provides a full and modular single-cell RNA-seq analysis pipeline. ASAP is freely available at https://asap.epfl.ch.",2020-07-01 +31899488,Differential Expression Gene Explorer (DrEdGE): a tool for generating interactive online visualizations of gene expression datasets.,"

Summary

Differential Expression Gene Explorer (DrEdGE) is a web-based tool that guides genomicists through easily creating interactive online data visualizations, which colleagues can query according to their own conditions to discover genes, samples or patterns of interest. We demonstrate DrEdGE's features with three example websites generated from publicly available datasets-human neuronal tissue, mouse embryonic tissue and Caenorhabditis elegans whole embryos. DrEdGE increases the utility of large genomics datasets by removing technical obstacles to independent exploration.

Availability and implementation

Freely available at http://dredge.bio.unc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +33058415,SMN1 copy-number and sequence variant analysis from next-generation sequencing data.,"Spinal muscular atrophy (SMA) is a severe neuromuscular autosomal recessive disorder affecting 1/10,000 live births. Most SMA patients present homozygous deletion of SMN1, while the vast majority of SMA carriers present only a single SMN1 copy. The sequence similarity between SMN1 and SMN2, and the complexity of the SMN locus makes the estimation of the SMN1 copy-number by next-generation sequencing (NGS) very difficult. Here, we present SMAca, the first python tool to detect SMA carriers and estimate the absolute SMN1 copy-number using NGS data. Moreover, SMAca takes advantage of the knowledge of certain variants specific to SMN1 duplication to also identify silent carriers. This tool has been validated with a cohort of 326 samples from the Navarra 1000 Genomes Project (NAGEN1000). SMAca was developed with a focus on execution speed and easy installation. This combination makes it especially suitable to be integrated into production NGS pipelines. Source code and documentation are available at https://www.github.com/babelomics/SMAca.",2020-10-14 +31854200,New Perspectives for Cancer Hazard Evaluation by the Report on Carcinogens: A Case Study Using Read-Across Methods in the Evaluation of Haloacetic Acids Found as Water Disinfection By-Products.,"

Background

Due to the large number of chemicals not yet tested for carcinogenicity but to which people are exposed, the limited number of human and animal cancer studies conducted each year, and the frequent need for a timely response, mechanistic data are playing an increasingly important role in carcinogen hazard identification.

Objectives

To provide a targeted approach to identify relevant mechanistic data in our cancer evaluation of haloacetic acids (HAAs), we used several approaches including systematic review, the 10 key characteristics of carcinogens (KCs), and read-across methods. Our objective in this commentary is to discuss the strengths, limitations, and challenges of these approaches in a cancer hazard assessment.

Methods

A cancer hazard assessment for 13 HAAs found as water disinfection by-products was conducted. Literature searches for mechanistic studies focused on the KCs and individual HAAs. Studies were screened for relevance and categorized by KCs and other relevant data, including chemical properties, toxicokinetics, and biological effects other than KCs. Mechanistic data were organized using the KCs, and strength of evidence was evaluated; this information informed potential modes of action (MOAs) and read-across-like approaches. Three read-across options were considered: evaluating HAAs as a class, as subclass(es), or as individual HAAs (analog approach).

Discussion

Because of data limitations and uncertainties, listing as a class or subclass(es) was ruled out, and an analog approach was used. Two brominated HAAs were identified as target (untested) chemicals based on their metabolism and similarity to source (tested) chemicals. In addition, four HAAs with animal cancer data had sufficient evidence for potential listing in the Report on Carcinogens (RoC). This is the first time that the KCs and other relevant data, in combination with read-across principles, were used to support a recommendation to list chemicals in the RoC that did not have animal cancer data. https://doi.org/10.1289/EHP5672.",2019-12-19 +30138632,BlaPred: Predicting and classifying β-lactamase using a 3-tier prediction system via Chou's general PseAAC.,"Antibiotics of β-lactam class account for nearly half of the global antibiotic use. The β-lactamase enzyme is a major element of the bacterial arsenals to escape the lethal effect of β-lactam antibiotics. Different variants of β-lactamases have evolved to counter the different types of β-lactam antibiotics. Extensive research has been done to isolate and characterize different variants of β-lactamases. Unfortunately, identification and classification of the β-lactamase enzyme are purely based on experiments, which is both time- and resource-consuming. Thus, there is a need for fast and accurate computational methods to identify and classify new β-lactamase enzymes from the avalanche of sequence data generated in the post-genomic era. Based on these considerations, we have developed a support vector machine based three-tier prediction system, BlaPred, to predict and classify (as per Ambler classification) β-lactamases solely from their protein sequences. The input features used were amino acid composition, classic and amphiphilic pseudo amino acid compositions. The results show that the classic pseudo amino acid composition-based models performed better than the other models. Following a leave-one-out cross-validation procedure, the accuracy to discriminate β-lactamases from non-β-lactamases was 93.57% (tier-I); accuracies for prediction of class A β-lactamases was 93.27%, 95.52% for class B, 96.86% for class C and 97.31% for class D (tier-II); and at tier-III the accuracies for prediction were 84.78%, 95.65% and 89.13% for subclasses B1, B2 and B3, respectively. The comparative results on an independent dataset suggests that our method works efficiently to distinguish β-lactamases from non-β-lactamases, with an overall accuracy of 93.09%, and is further able to classify β-lactamase sequences into their respective Ambler classes and subclasses with accuracy higher than 92% and 87%, respectively. Comparative performance of BlaPred on an independent benchmark dataset also shows a significant improvement over other existing methods. Finally, BlaPred is available as a webserver, as well as standalone software, which can be accessed at http://proteininformatics.org/mkumar/blapred.",2018-08-20 +32657372,Benchmarking gene ontology function predictions using negative annotations.,"

Motivation

With the ever-increasing number and diversity of sequenced species, the challenge to characterize genes with functional information is even more important. In most species, this characterization almost entirely relies on automated electronic methods. As such, it is critical to benchmark the various methods. The Critical Assessment of protein Function Annotation algorithms (CAFA) series of community experiments provide the most comprehensive benchmark, with a time-delayed analysis leveraging newly curated experimentally supported annotations. However, the definition of a false positive in CAFA has not fully accounted for the open world assumption (OWA), leading to a systematic underestimation of precision. The main reason for this limitation is the relative paucity of negative experimental annotations.

Results

This article introduces a new, OWA-compliant, benchmark based on a balanced test set of positive and negative annotations. The negative annotations are derived from expert-curated annotations of protein families on phylogenetic trees. This approach results in a large increase in the average information content of negative annotations. The benchmark has been tested using the naïve and BLAST baseline methods, as well as two orthology-based methods. This new benchmark could complement existing ones in future CAFA experiments.

Availability and implementation

All data, as well as code used for analysis, is available from https://lab.dessimoz.org/20_not.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-07-01 +32280325,Simulation of small-angle X-ray scattering data of biological macromolecules in solution.,"This article presents IMSIM, an application to simulate two-dimensional small-angle X-ray scattering patterns and, further, one-dimensional profiles from biological macromolecules in solution. IMSIM implements a statistical approach yielding two-dimensional images in TIFF, CBF or EDF format, which may be readily processed by existing data-analysis pipelines. Intensities and error estimates of one-dimensional patterns obtained from the radial average of the two-dimensional images exhibit the same statistical properties as observed with actual experimental data. With initial input on an absolute scale, [cm-1]/c[mg ml-1], the simulated data frames may also be scaled to absolute scale such that the forward scattering after subtraction of the background is proportional to the molecular weight of the solute. The effects of changes of concentration, exposure time, flux, wavelength, sample-detector distance, detector dimensions, pixel size, and the mask as well as incident beam position can be considered for the simulation. The simulated data may be used in method development, for educational purposes, and also to determine the most suitable beamline setup for a project prior to the application and use of the actual beamtime. IMSIM is available as part of the ATSAS software package (3.0.0) and is freely available for academic use (http://www.embl-hamburg.de/biosaxs/download.html).",2020-02-18 +33528756,Simultaneously measuring pulse-amplitude-modulated (PAM) chlorophyll fluorescence of leaves at wavelengths shorter and longer than 700 nm.,"PAM fluorescence of leaves of cherry laurel (Prunus laurocerasus L.) was measured simultaneously in the spectral range below 700 nm (sw) and above 700 nm (lw). A high-sensitivity photodiode was employed to measure the low intensities of sw fluorescence. Photosystem II (PSII) performance was analyzed by the saturation pulse method during a light response curve with subsequent dark phase. The sw fluorescence was more variable, resulting in higher PSII photochemical yields compared to lw fluorescence. The variations between sw and lw data were explained by different levels of photosystem I (PSI) fluorescence: the contribution of PSI fluorescence to minimum fluorescence (F0) was calculated to be 14% at sw wavelengths and 45% at lw wavelengths. With the results obtained, the validity of an earlier method for the quantification of PSI fluorescence (Genty et al. in Photosynth Res 26:133-139, 1990, https://doi.org/10.1007/BF00047085 ) was reconsidered. After subtracting PSI fluorescence from all fluorescence levels, the maximum PSII photochemical yield (FV/FM) in the sw range was 0.862 and it was 0.883 in the lw range. The lower FV/FM at sw wavelengths was suggested to arise from inactive PSII reaction centers in the outermost leaf layers. Polyphasic fluorescence transients (OJIP or OI1I2P kinetics) were recorded simultaneously at sw and lw wavelengths: the slowest phase of the kinetics (IP or I2P) corresponded to 11% and 13% of total variable sw and lw fluorescence, respectively. The idea that this difference is due to variable PSI fluorescence is critically discussed. Potential future applications of simultaneously recording fluorescence in two spectral windows include studies of PSI non-photochemical quenching and state I-state II transitions, as well as measuring the fluorescence from pH-sensitive dyes simultaneously with chlorophyll fluorescence.",2021-02-02 +32167955,New Guidelines for Electrical Stimulation Parameters in Adult Patients With Knee Osteoarthritis Based on a Systematic Review of the Current Literature.,"

Objective

The goal of this systematic review was to provide guidelines for treatment parameters regarding electrical stimulation by investigating its efficacy in improving muscle strength and decreasing pain in patients with knee osteoarthritis.

Design

Following the Preferred Reporting Items for Systematic Reviews and Meta-Analyses standard, three electronic databases (CINAHL, PubMed, and PEDro) and gray literature were used. Randomized control trials comparing electrical stimulation and conservative physical therapy were critically appraised using the 2005 University of Oxford standard.

Results

Nine randomized control trials were included in our review. First, our review confirmed that neuromuscular electrical stimulation is the most effective electrical stimulation treatment in the management of knee OA, and its efficiency is higher when combined with a strengthening program. Second, frequency of at least 50 Hz and no more than 75 Hz with a pulse duration between 200 and 400 μs and a treatment duration of 20 mins is necessary for successful treatment.

Conclusions

For the first time, our review provides standardized clinical treatment parameters for neuromuscular electrical stimulation to be included in a strengthening program for the adult patient with knee OA.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) Recall the impact of quadriceps femoris weakness on joint stability; (2) Summarize the mechanism of action of neuromuscular electrical stimulation (NMES) on reducing pain and increasing muscle strength; and (3) Plan the clinical treatment parameters of NMES to be included in a strengthening program for an adult patient with knee osteoarthritis.

Level

Advanced.

Accreditation

The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2020-08-01 +,D1-3: Marshfield Dictionary of Clinical and Translational Science (MD-CTS): An Online Reference for Clinical and Translational Science Terminology,"

Background/Aims

New terms are rapidly appearing in the literature and practice of clinical medicine and translational research. To catalog real-world usage of medical terms, we report the first construction of an online dictionary of clinical and translational medicinal terms, which are computationally generated in near real-time using a big data approach. This project is NIH CTSA-funded and developed by the Marshfield Clinic Research Foundation in conjunction with University of Wisconsin - Madison. Currently titled Marshfield Dictionary of Clinical and Translational Science (MD-CTS), this application is a Google-like word search tool. By entering a term into the search bar, MD-CTS will display that term’s definition, usage examples, contextual terms, related images, and ontological information. A prototype is available for public viewing at http://spellchecker.mfldclin.edu/.

Methods

We programmatically derived the lexicon for MD-CTS from scholarly communications by parsing through 15,156,745 MEDLINE abstracts and extracting all of the unique words found therein. We then ran this list through several filters in order to remove words that were not relevant for searching, such as common English words and numeric expressions. We then loaded the resulting 1,795,769 terms into SQL tables. Each term is cross-referenced with every occurrence in all abstracts in which it was found. Additional information is aggregated from Wiktionary, Bioportal, and Wikipedia in real-time and displayed on-screen. From this lexicon we created a supplemental dictionary resource (updated quarterly) to be used in Microsoft Office® products.

Results

We evaluated the utility of MD-CTS by creating a list of 100 words derived from recent clinical and translational medicine publications in the week of July 22, 2013. We then performed comparative searches for each term with Taber’s Cyclopedic Medical Dictionary, Stedman’s Medical Dictionary, Dorland’s Illustrated Medical Dictionary, Medical Subject Headings (MeSH), and MD-CTS. We compared our supplemental dictionary resource to OpenMedSpell for effectiveness in accuracy of term recognition.

Conclusions

In summary, we developed an online mobile and desktop reference, which comprehensively integrates Wiktionary (term information), Bioportal (ontological information), Wikipedia (related images), and Medline abstract information (term usage) for scientists and clinicians to browse in real-time. We also created a supplemental dictionary resource to be used in Microsoft Office® products.",2014-09-01 +32040147,Prediction of all-cause mortality in haemodialysis patients using a Bayesian network.,"

Background

All-cause mortality in haemodialysis (HD) is high, reaching 15.6% in the first year according to the European Renal Association.

Methods

A new clinical tool to predict all-cause mortality in HD patients is proposed. It uses a post hoc analysis of data from the prospective cohort study Photo-Graph V3. A total of 35 variables related to patient characteristics, laboratory values and treatments were used as predictors of all-cause mortality. The first step was to compare the results obtained using a logistic regression to those obtained by a Bayesian network. The second step aimed to increase the performance of the best prediction model using synthetic data. Finally, a compromise between performance and ergonomics was proposed by reducing the number of variables to be entered in the prediction tool.

Results

Among the 9010 HD patients included in the Photo-Graph V3 study, 4915 incident patients with known medical status at 2 years were analysed. All-cause mortality at 2 years was 34.1%. The Bayesian network provided the most reliable prediction. The final optimized models that used 14 variables had areas under the receiver operating characteristic curves of 0.78 ± 0.01, sensitivity of 72 ± 2%, specificity of 69 ± 2%, predictive positive value of 70 ± 1% and negative predictive value of 71 ± 2% for the prediction of all-cause mortality.

Conclusions

Using artificial intelligence methods, a new clinical tool to predict all-cause mortality in incident HD patients is proposed. The latter can be used for research purposes before its external validation at: https://www.hed.cc/? a=twoyearsallcausemortalityhemod&n=2-years%20All-cause%20Mortality%20Hemodialysis.neta.",2020-08-01 +34003914,Genome-Wide Association Studies-Based Machine Learning for Prediction of Age-Related Macular Degeneration Risk.,"

Purpose

Because age-related macular degeneration (AMD) is a progressive disorder and advanced AMD is currently hard to cure, an accurate and informative prediction of a person's AMD risk using genetic information is desirable for early diagnosis and potential individualized clinical management. The objective of this study was to develop and validate novel prediction models for AMD risk using large genome-wide association studies datasets with different machine learning approaches.

Methods

Genotype data from 32,215 Caucasian individuals with age of ≥50 years from the International AMD Genomics Consortium in dbGaP were used to establish and test prediction models for AMD risk. Four different machine learning approaches-neural network, lasso regression, support vector machine, and random forest-were implemented. A standard logistic regression model using a genetic risk score was also considered.

Results

All machine learning-based methods achieved satisfactory performance for predicting advanced AMD cases (vs. normal controls) (area under the curve = 0.81-0.82, Brier score = 0.17-0.18 in a separate test dataset) and any stage AMD (vs. normal controls) (area under the curve = 0.78-0.79, Brier score = 0.18-0.20 in a separate test dataset). The prediction performance was further validated in an independent dataset of 783 subjects from UK Biobank (area under the curve = 0.67).

Conclusions

By applying multiple state-of-art machine learning approaches on large AMD genome-wide association studies datasets, the predictive models we established can provide an accurate estimation of an individual's AMD risk profile based on genetic information along with age. The online prediction interface is available at: https://yanq.shinyapps.io/no_vs_amd_NN/.

Translational relevance

The accurate and individualized risk prediction model interface will greatly improve early diagnosis and enhance tailored clinical management of AMD.",2021-02-01 +33655204,Estimating risk of mechanical ventilation and in-hospital mortality among adult COVID-19 patients admitted to Mass General Brigham: The VICE and DICE scores.,"

Background

Risk stratification of COVID-19 patients upon hospital admission is key for their successful treatment and efficient utilization of hospital resources. We sought to evaluate the risk factors on admission (including comorbidities, vital signs, and initial laboratory assessment) associated with ventilation need and in-hospital mortality in COVID-19.

Methods

We established a retrospective cohort of COVID-19 patients from Mass General Brigham hospitals. Demographic, clinical, and admission laboratory data were obtained from electronic medical records of patients admitted to the hospital with laboratory-confirmed COVID-19 before May 19, 2020. Multivariable logistic regression analyses were used to construct and validate the Ventilation in COVID Estimator (VICE) and Death in COVID Estimator (DICE) risk scores.

Findings

The entire cohort included 1042 patients (median age, 64 years; 56.8% male). The derivation and validation cohorts for the risk scores included 578 and 464 patients, respectively. We found four factors to be independently predictive for mechanical ventilation requirement (diabetes mellitus, SpO2:FiO2 ratio, C-reactive protein, and lactate dehydrogenase), and 10 factors to be predictors of in-hospital mortality (age, male sex, coronary artery disease, diabetes mellitus, chronic statin use, SpO2:FiO2 ratio, body mass index, neutrophil to lymphocyte ratio, platelet count, and procalcitonin). Using these factors, we constructed the VICE and DICE risk scores, which performed with C-statistics of 0.84 and 0.91, respectively. Importantly, the chronic use of a statin was associated with protection against death due to COVID-19. The VICE and DICE score calculators have been placed on an interactive website freely available to healthcare providers and researchers (https://covid-calculator.com/).

Interpretation

The risk scores developed in this study may help clinicians more appropriately determine which COVID-19 patients will need to be managed with greater intensity.

Funding

COVID-19 Fast Grant (fastgrants.org).",2021-02-25 +32486891,Using Reactome to build an autophagy mechanism knowledgebase.,"The 21st century has revealed much about the fundamental cellular process of autophagy. Autophagy controls the catabolism and recycling of various cellular components both as a constitutive process and as a response to stress and foreign material invasion. There is considerable knowledge of the molecular mechanisms of autophagy, and this is still growing as new modalities emerge. There is a need to investigate autophagy mechanisms reliably, comprehensively and conveniently. Reactome is a freely available knowledgebase that consists of manually curated molecular events (reactions) organized into cellular pathways (https://reactome.org/). Pathways/reactions in Reactome are hierarchically structured, graphically presented and extensively annotated. Data analysis tools, such as pathway enrichment, expression data overlay and species comparison, are also available. For customized analysis, information can also be programmatically queried. Here, we discuss the curation and annotation of the molecular mechanisms of autophagy in Reactome. We also demonstrate the value that Reactome adds to research by reanalyzing a previously published work on genome-wide CRISPR screening of autophagy components.Abbreviations: CMA: chaperone-mediated autophagy; GO: Gene Ontology; MA: macroautophagy; MI: microautophagy; MTOR: mechanistic target of rapamycin kinase; SQSTM1: sequestosome 1.",2020-06-02 +32038050,Analysis of Small-scale Magnetic Flux Ropes Covering the Whole Ulysses Mission. ,"Small-scale magnetic flux ropes in the solar wind have been studied for decades via both simulation and observation. Statistical analysis utilizing various in situ spacecraft measurements is the main observational approach, which helps investigate the generation and evolution of these small-scale structures. In this study, we extend the automated detection of small-scale flux ropes based on the Grad-Shafranov reconstruction to the complete data set of in situ measurements of the Ulysses spacecraft. We first discuss the temporal variation of the bulk properties of 22,719 flux ropes found through our approach, namely, the average magnetic field and plasma parameters, etc., as functions of the heliographic latitudes and heliocentric radial distances. We then categorize all identified events into three groups based on event distributions in different latitudes separated by 30°, at different radial distances, and under different solar activities. With the detailed statistical analysis, we conclude the following: (1) the properties of flux ropes, such as the duration, scale size, etc., follow power-law distributions, but with different slope indices, especially for distributions at different radial distances. (2) They are also affected by the solar wind speed, which has different distributions under different solar activities, manifested as a latitudinal effect. (3) The main difference in flux rope properties between the low and high latitudes is attributed to possible Alfvénic structures or waves and to flux ropes with relatively high Alfvénicity. (4) Flux ropes with longer durations and larger scale sizes occur more often at larger radial distances. (5) With a stricter Walén slope threshold, more events are excluded at higher latitudes, which further reduces the latitudinal effects on flux rope properties. The entire database is published online at http://www.fluxrope.info.",2019-08-13 +27509041,MMpI: A WideRange of Available Compounds of Matrix Metalloproteinase Inhibitors.,"Matrix metalloproteinases (MMPs) are a family of zinc-dependent proteinases involved in the regulation of the extracellular signaling and structural matrix environment of cells and tissues. MMPs are considered as promising targets for the treatment of many diseases. Therefore, creation of database on the inhibitors of MMP would definitely accelerate the research activities in this area due to its implication in above-mentioned diseases and associated limitations in the first and second generation inhibitors. In this communication, we report the development of a new MMpI database which provides resourceful information for all researchers working in this field. It is a web-accessible, unique resource that contains detailed information on the inhibitors of MMP including small molecules, peptides and MMP Drug Leads. The database contains entries of ~3000 inhibitors including ~72 MMP Drug Leads and ~73 peptide based inhibitors. This database provides the detailed molecular and structural details which are necessary for the drug discovery and development. The MMpI database contains physical properties, 2D and 3D structures (mol2 and pdb format files) of inhibitors of MMP. Other data fields are hyperlinked to PubChem, ChEMBL, BindingDB, DrugBank, PDB, MEROPS and PubMed. The database has extensive searching facility with MMpI ID, IUPAC name, chemical structure and with the title of research article. The MMP inhibitors provided in MMpI database are optimized using Python-based Hierarchical Environment for Integrated Xtallography (Phenix) software. MMpI Database is unique and it is the only public database that contains and provides the complete information on the inhibitors of MMP. Database URL: http://clri.res.in/subramanian/databases/mmpi/index.php.",2016-08-10 +33464979,African American English and Early Literacy: A Comparison of Approaches to Quantifying Nonmainstream Dialect Use.,"Purpose Many studies have found a correlation between overall usage rates of nonmainstream forms and reading scores, but less is known about which dialect differences are most predictive. Here, we consider different methods of characterizing African American English use from existing assessments and examine which methods best predict literacy achievement. Method Kindergarten and first-grade students who speak African American English received two assessments of dialect use and two assessments of decoding at the beginning and end of the school year. Item-level analyses of the dialect-use assessments were used to compute measures of dialect usage: (a) an overall feature rate measure based on the Diagnostic Evaluation of Language Variation-Screening Test, (b) a subscore analysis of the Diagnostic Evaluation of Language Variation-Screening Test based on items that pattern together, (c) an alternative assessment where children repeat and translate sentences, and (d) ""repertoire"" measures based on a categorical distinction of whether a child used a particular feature of mainstream American English. Results Models using feature rate measures provided better data-model fit than those with repertoire measures, and baseline performance on a sentence repetition task was a positive predictor of reading score at the end of the school year. For phonological subscores, change from the beginning to end of the school year predicted reading at the end of the school year, whereas baseline scores were most predictive for grammatical subscores. Conclusions The addition of a sentence imitation task is useful for understanding a child's dialect and anticipating potential areas for support in early literacy. We observed some support for the idea that morphological dialect differences (i.e., irregular verb morphology) have a particularly close tie to later literacy, but future work will be necessary to confirm this finding. Supplemental Material https://doi.org/10.23641/asha.13425968.",2021-01-18 +32211334,OSeac: An Online Survival Analysis Tool for Esophageal Adenocarcinoma.,"Esophageal Adenocarcinoma (EAC) is one of the most common gastrointestinal tumors in the world. However, molecular prognostic systems are still lacking for EAC. Hence, we developed an Online consensus Survival analysis web server for Esophageal Adenocarcinoma (OSeac), to centralize published gene expression data and clinical follow up data of EAC patients from The Cancer Genome Atlas (TCGA) and Gene Expression Omnibus (GEO). OSeac includes 198 EAC cases with gene expression profiling and relevant clinical long-term follow-up data, and employs the Kaplan Meier (KM) survival plot with hazard ratio (HR) and log rank test to estimate the prognostic potency of genes of interests for EAC patients. Moreover, we have determined the reliability of OSeac by using previously reported prognostic biomarkers such as DKK3, CTO1, and TXNIP. OSeac is free and publicly accessible at http://bioinfo.henu.edu.cn/EAC/EACList.jsp.",2020-03-06 +31236093,Raman Open Database: first interconnected Raman-X-ray diffraction open-access resource for material identification.,"Detailed crystallographic information provided by X-ray diffraction (XRD) is complementary to molecular information provided by Raman spectroscopy. Accordingly, the combined use of these techniques allows the identification of an unknown compound without ambiguity. However, a full combination of Raman and XRD results requires an appropriate and reliable reference database with complete information. This is already available for XRD. The main objective of this paper is to introduce and describe the recently developed Raman Open Database (ROD, http://solsa.crystallography.net/rod). It comprises a collection of high-quality uncorrected Raman spectra. The novelty of this database is its interconnectedness with other open databases like the Crystallography Open Database (http://www.crystallography.net/cod and Theoretical Crystallography Open Database (http://www.crystallography.net/tcod/). The syntax adopted to format entries in the ROD is based on the worldwide recognized and used CIF format, which offers a simple way for data exchange, writing and description. ROD also uses JCAMP-DX files as an alternative format for submitted spectra. JCAMP-DX files are compatible to varying degrees with most commercial Raman software and can be read and edited using standard text editors.",2019-05-28 +26157684,Development of new on-line statistical program for the Korean Society for Radiation Oncology.,"

Purpose

To develop new on-line statistical program for the Korean Society for Radiation Oncology (KOSRO) to collect and extract medical data in radiation oncology more efficiently.

Materials and methods

The statistical program is a web-based program. The directory was placed in a sub-folder of the homepage of KOSRO and its web address is http://www.kosro.or.kr/asda. The operating systems server is Linux and the webserver is the Apache HTTP server. For database (DB) server, MySQL is adopted and dedicated scripting language is the PHP. Each ID and password are controlled independently and all screen pages for data input or analysis are made to be friendly to users. Scroll-down menu is actively used for the convenience of user and the consistence of data analysis.

Results

Year of data is one of top categories and main topics include human resource, equipment, clinical statistics, specialized treatment and research achievement. Each topic or category has several subcategorized topics. Real-time on-line report of analysis is produced immediately after entering each data and the administrator is able to monitor status of data input of each hospital. Backup of data as spread sheets can be accessed by the administrator and be used for academic works by any members of the KOSRO.

Conclusion

The new on-line statistical program was developed to collect data from nationwide departments of radiation oncology. Intuitive screen and consistent input structure are expected to promote entering data of member hospitals and annual statistics should be a cornerstone of advance in radiation oncology.",2015-06-30 +33815050,Robust Cortical Thickness Morphometry of Neonatal Brain and Systematic Evaluation Using Multi-Site MRI Datasets.,"The human brain grows the most dramatically during the perinatal and early post-natal periods, during which pre-term birth or perinatal injury that may alter brain structure and lead to developmental anomalies. Thus, characterizing cortical thickness of developing brains remains an important goal. However, this task is often complicated by inaccurate cortical surface extraction due to small-size brains. Here, we propose a novel complex framework for the reconstruction of neonatal WM and pial surfaces, accounting for large partial volumes due to small-size brains. The proposed approach relies only on T1-weighted images unlike previous T2-weighted image-based approaches while only T1-weighted images are sometimes available under the different clinical/research setting. Deep neural networks are first introduced to the neonatal magnetic resonance imaging (MRI) pipeline to address the mis-segmentation of brain tissues. Furthermore, this pipeline enhances cortical boundary delineation using combined models of the cerebrospinal fluid (CSF)/GM boundary detection with edge gradient information and a new skeletonization of sulcal folding where no CSF voxels are seen due to the limited resolution. We also proposed a systematic evaluation using three independent datasets comprising 736 pre-term and 97 term neonates. Qualitative assessment for reconstructed cortical surfaces shows that 86.9% are rated as accurate across the three site datasets. In addition, our landmark-based evaluation shows that the mean displacement of the cortical surfaces from the true boundaries was less than a voxel size (0.532 ± 0.035 mm). Evaluating the proposed pipeline (namely NEOCIVET 2.0) shows the robustness and reproducibility across different sites and different age-groups. The mean cortical thickness measured positively correlated with post-menstrual age (PMA) at scan (p < 0.0001); Cingulate cortical areas grew the most rapidly whereas the inferior temporal cortex grew the least rapidly. The range of the cortical thickness measured was biologically congruent (1.3 mm at 28 weeks of PMA to 1.8 mm at term equivalent). Cortical thickness measured on T1 MRI using NEOCIVET 2.0 was compared with that on T2 using the established dHCP pipeline. It was difficult to conclude that either T1 or T2 imaging is more ideal to construct cortical surfaces. NEOCIVET 2.0 has been open to the public through CBRAIN (https://mcin-cnim.ca/technology/cbrain/), a web-based platform for processing brain imaging data.",2021-03-17 +33097925,Global analysis of repetitive DNA from unassembled sequence reads using RepeatExplorer2.,"RepeatExplorer2 is a novel version of a computational pipeline that uses graph-based clustering of next-generation sequencing reads for characterization of repetitive DNA in eukaryotes. The clustering algorithm facilitates repeat identification in any genome by using relatively small quantities of short sequence reads, and additional tools within the pipeline perform automatic annotation and quantification of the identified repeats. The pipeline is integrated into the Galaxy platform, which provides a user-friendly web interface for script execution and documentation of the results. Compared to the original version of the pipeline, RepeatExplorer2 provides automated annotation of transposable elements, identification of tandem repeats and enhanced visualization of analysis results. Here, we present an overview of the RepeatExplorer2 workflow and provide procedures for its application to (i) de novo repeat identification in a single species, (ii) comparative repeat analysis in a set of species, (iii) development of satellite DNA probes for cytogenetic experiments and (iv) identification of centromeric repeats based on ChIP-seq data. Each procedure takes approximately 2 d to complete. RepeatExplorer2 is available at https://repeatexplorer-elixir.cerit-sc.cz .",2020-10-23 +32239126,EVICAN-a balanced dataset for algorithm development in cell and nucleus segmentation.,"

Motivation

Deep learning use for quantitative image analysis is exponentially increasing. However, training accurate, widely deployable deep learning algorithms requires a plethora of annotated (ground truth) data. Image collections must contain not only thousands of images to provide sufficient example objects (i.e. cells), but also contain an adequate degree of image heterogeneity.

Results

We present a new dataset, EVICAN-Expert visual cell annotation, comprising partially annotated grayscale images of 30 different cell lines from multiple microscopes, contrast mechanisms and magnifications that is readily usable as training data for computer vision applications. With 4600 images and ∼26 000 segmented cells, our collection offers an unparalleled heterogeneous training dataset for cell biology deep learning application development.

Availability and implementation

The dataset is freely available (https://edmond.mpdl.mpg.de/imeji/collection/l45s16atmi6Aa4sI?q=). Using a Mask R-CNN implementation, we demonstrate automated segmentation of cells and nuclei from brightfield images with a mean average precision of 61.6 % at a Jaccard Index above 0.5.",2020-06-01 +32879816,Trends of Industry Payments in Neurology Subspecialties.,"Background Open Payments is a national disclosure program to promote transparency by the public disclosure of financial relationships between the pharmaceutical and medical device industries and physicians. Objective To explore payments from the industry to physicians in various neurology subspecialties. Methods Open Payments Program (OPP) data (https://openpaymentsdata.cms.gov) on industry-to-physician payments for the years 2014-2018 were extracted for general neurology, neuromuscular, neurophysiology, and vascular neurology. The data were then analyzed to explore trends in payments for various subspecialties and to identify the possible factors underlying these trends. Results Overall, industry-to-physician payments for neurology subspecialties increased by 16% from 2014 to 2018. The introduction of newer drugs in a subspecialty was likely the driving factor for higher industry payments. Nearly half of the total industry-to-physician payments were for the subspecialty of multiple sclerosis (MS)/Neuroimmunology; this coincided with Aubagio and Copaxone being the top two medications associated with the highest industry payments in 2014, Aubagio, and Lemtrada in 2018. A significant increase in spending percentages for headache, neuromuscular disorders, and movement disorders was observed while a relative decrease in the payments for MS/neuroimmunology and epilepsy was identified; these trends coincide with the introduction of new drugs such as Aimovig, Neuplazid, Nusinersen, and Austedo for headache, neuromuscular and movement disorders. Conclusions From 2014 to 2018, the total industry-to-physician payments for neurology subspecialties increased while the distribution of industry-to-physician payments for various neurology subspecialties showed notable changes. The introduction of newer medications in a subspecialty coincided with higher industry payments. Identification of these trends and potential motives of the industry spending is critical to address any potential physician bias in prescribing medications.",2020-07-31 +29659702,Ribopeaks: a web tool for bacterial classification through m/z data from ribosomal proteins.,"Summary:MALDI-TOF MS is a rapid, sensitive and economic tool for bacterial identification. Highly abundant bacterial proteins are detected by this technique, including ribosomal proteins (r-protein), and the generated mass spectra are compared with a MALDI-TOF MS spectra database. Currently, it allows mainly the classification of clinical bacteria due to the limited number of environmental bacteria included in the spectra database. We present a wide-ranging bacterium classifier tool, called Ribopeaks, which was created based on r-protein data from the Genbank. The Ribopeaks database has more than 28 500 bacterial taxonomic records. It compares the incoming m/z data from MALDI-TOF MS analysis with models stored in the Ribopeaks database created by machine learning and then taxonomically classifies the bacteria. Availability and implementation:The software is available at http://www.ribopeaks.com. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-09-01 +32592463,Coevolution-based prediction of protein-protein interactions in polyketide biosynthetic assembly lines.,"

Motivation

Polyketide synthases (PKSs) are enzymes that generate diverse molecules of great pharmaceutical importance, including a range of clinically used antimicrobials and antitumor agents. Many polyketides are synthesized by cis-AT modular PKSs, which are organized in assembly lines, in which multiple enzymes line up in a specific order. This order is defined by specific protein-protein interactions (PPIs). The unique modular structure and catalyzing mechanism of these assembly lines makes their products predictable and also spurred combinatorial biosynthesis studies to produce novel polyketides using synthetic biology. However, predicting the interactions of PKSs, and thereby inferring the order of their assembly line, is still challenging, especially for cases in which this order is not reflected by the ordering of the PKS-encoding genes in the genome.

Results

Here, we introduce PKSpop, which uses a coevolution-based PPI algorithm to infer protein order in PKS assembly lines. Our method accurately predicts protein orders (93% accuracy). Additionally, we identify new residue pairs that are key in determining interaction specificity, and show that coevolution of N- and C-terminal docking domains of PKSs is significantly more predictive for PPIs than coevolution between ketosynthase and acyl carrier protein domains.

Availability and implementation

The code is available on http://www.bif.wur.nl/ (under 'Software').

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +32726309,An immunohistochemical prostate cell identification key indicates that aging shifts procollagen 1A1 production from myofibroblasts to fibroblasts in dogs prone to prostate-related urinary dysfunction.,"

Background

The identity and spatial distribution of prostatic cell types has been determined in humans but not in dogs, even though aging- and prostate-related voiding disorders are common in both species and mechanistic factors, such as prostatic collagen accumulation, appear to be shared between species. In this publication we characterize the regional distribution of prostatic cell types in the young intact dog to enable comparisons with human and mice and we examine how the cellular source of procollagen 1A1 changes with age in intact male dogs.

Methods

A multichotomous decision tree involving sequential immunohistochemical stains was validated for use in dog and used to identify specific prostatic cell types and determine their distribution in the capsule, peripheral, periurethral and urethral regions of the young intact canine prostate. Prostatic cells identified using this technique include perivascular smooth muscle cells, pericytes, endothelial cells, luminal, intermediate, and basal epithelial cells, neuroendocrine cells, myofibroblasts, fibroblasts, fibrocytes, and other hematolymphoid cells. To enhance rigor and transparency, all high resolution images (representative images shown in the figures and biological replicates) are available through the GUDMAP database at https://doi.org/10.25548/16-WMM4.

Results

The prostatic peripheral region harbors the largest proportion of epithelial cells. Aging does not change the density of hematolymphoid cells, fibroblasts, and myofibroblasts in the peripheral region or in the fibromuscular capsule, regions where we previously observed aging- and androgen-mediated increases in prostatic collagen abundance Instead, we observed aging-related changes the procollagen 1A1 positive prostatic cell identity from a myofibroblast to a fibroblast.

Conclusions

Hematolymphoid cells and myofibroblasts are often identified as sources of collagen in tissues prone to aging-related fibrosis. We show that these are not the likely sources of pathological collagen synthesis in older intact male dogs. Instead, we identify an aging-related shift in the prostatic cell type producing procollagen 1A1 that will help direct development of cell type and prostate appropriate therapeutics for collagen accumulation.",2020-07-29 +32250657,Backbone Free Energy Estimator Applied to Viral Glycoproteins.,"Earlier analysis of the Protein Data Bank derived the distribution of rotations from the plane of a protein hydrogen bond donor peptide group to the plane of its acceptor peptide group. The quasi Boltzmann formalism of Pohl-Finkelstein is employed to estimate free energies of protein elements with these hydrogen bonds, pinpointing residues with a high propensity for conformational change. This is applied to viral glycoproteins as well as capsids, where the 90th+ percentiles of free energies determine residues that correlate well with viral fusion peptides and other functional domains in known cases and thus provide a novel method for predicting these sites of importance as antiviral drug or vaccine targets in general. The method is implemented at https://bion-server.au.dk/hbonds/ from an uploaded Protein Data Bank file.",2020-04-03 +32790095,The COVID-19 pandemic: considerations for resuming normal colorectal services.,"This European Society of Coloproctology guidance focuses on a proposed conceptual framework to resume standard service in colorectal surgery. The proposed conceptual framework is a schematic and stepwise approach including: in-depth assessment of damage to non-COVID-19-related colorectal service; the return of service (integration with the COVID-19-specific service and the existing operational continuity planning); safety arrangements in parallel with minimizing downtime; the required support for staff and patients; the aftermath of the pandemic and continued strategic planning. This will be dynamic guidance with ongoing updates using critical appraisal of emerging evidence. We will welcome input from all stakeholders (statutory organizations, healthcare professionals, public and patients). Any new questions, new data and discussion are welcome via https://www.escp.eu.com/guidelines.",2020-08-28 +26110276,BGD: a database of bat genomes.,"Bats account for ~20% of mammalian species, and are the only mammals with true powered flight. For the sake of their specialized phenotypic traits, many researches have been devoted to examine the evolution of bats. Until now, some whole genome sequences of bats have been assembled and annotated, however, a uniform resource for the annotated bat genomes is still unavailable. To make the extensive data associated with the bat genomes accessible to the general biological communities, we established a Bat Genome Database (BGD). BGD is an open-access, web-available portal that integrates available data of bat genomes and genes. It hosts data from six bat species, including two megabats and four microbats. Users can query the gene annotations using efficient searching engine, and it offers browsable tracks of bat genomes. Furthermore, an easy-to-use phylogenetic analysis tool was also provided to facilitate online phylogeny study of genes. To the best of our knowledge, BGD is the first database of bat genomes. It will extend our understanding of the bat evolution and be advantageous to the bat sequences analysis. BGD is freely available at: http://donglab.ecnu.edu.cn/databases/BatGenome/.",2015-06-25 +30445434,"The NHGRI-EBI GWAS Catalog of published genome-wide association studies, targeted arrays and summary statistics 2019.","The GWAS Catalog delivers a high-quality curated collection of all published genome-wide association studies enabling investigations to identify causal variants, understand disease mechanisms, and establish targets for novel therapies. The scope of the Catalog has also expanded to targeted and exome arrays with 1000 new associations added for these technologies. As of September 2018, the Catalog contains 5687 GWAS comprising 71673 variant-trait associations from 3567 publications. New content includes 284 full P-value summary statistics datasets for genome-wide and new targeted array studies, representing 6 × 109 individual variant-trait statistics. In the last 12 months, the Catalog's user interface was accessed by ∼90000 unique users who viewed >1 million pages. We have improved data access with the release of a new RESTful API to support high-throughput programmatic access, an improved web interface and a new summary statistics database. Summary statistics provision is supported by a new format proposed as a community standard for summary statistics data representation. This format was derived from our experience in standardizing heterogeneous submissions, mapping formats and in harmonizing content. Availability: https://www.ebi.ac.uk/gwas/.",2019-01-01 +31718539,PyBDA: a command line tool for automated analysis of big biological data sets.,"BACKGROUND:Analysing large and high-dimensional biological data sets poses significant computational difficulties for bioinformaticians due to lack of accessible tools that scale to hundreds of millions of data points. RESULTS:We developed a novel machine learning command line tool called PyBDA for automated, distributed analysis of big biological data sets. By using Apache Spark in the backend, PyBDA scales to data sets beyond the size of current applications. It uses Snakemake in order to automatically schedule jobs to a high-performance computing cluster. We demonstrate the utility of the software by analyzing image-based RNA interference data of 150 million single cells. CONCLUSION:PyBDA allows automated, easy-to-use data analysis using common statistical methods and machine learning algorithms. It can be used with simple command line calls entirely making it accessible to a broad user base. PyBDA is available at https://pybda.rtfd.io.",2019-11-12 +28582416,JNSViewer-A JavaScript-based Nucleotide Sequence Viewer for DNA/RNA secondary structures.,"Many tools are available for visualizing RNA or DNA secondary structures, but there is scarce implementation in JavaScript that provides seamless integration with the increasingly popular web computational platforms. We have developed JNSViewer, a highly interactive web service, which is bundled with several popular tools for DNA/RNA secondary structure prediction and can provide precise and interactive correspondence among nucleotides, dot-bracket data, secondary structure graphs, and genic annotations. In JNSViewer, users can perform RNA secondary structure predictions with different programs and settings, add customized genic annotations in GFF format to structure graphs, search for specific linear motifs, and extract relevant structure graphs of sub-sequences. JNSViewer also allows users to choose a transcript or specific segment of Arabidopsis thaliana genome sequences and predict the corresponding secondary structure. Popular genome browsers (i.e., JBrowse and BrowserGenome) were integrated into JNSViewer to provide powerful visualizations of chromosomal locations, genic annotations, and secondary structures. In addition, we used StructureFold with default settings to predict some RNA structures for Arabidopsis by incorporating in vivo high-throughput RNA structure profiling data and stored the results in our web server, which might be a useful resource for RNA secondary structure studies in plants. JNSViewer is available at http://bioinfolab.miamioh.edu/jnsviewer/index.html.",2017-06-05 +,"First Report of Natural Infection of Zucchini Green Mottle Mosaic Virus on Bottle Gourd in Guangxi, China","During a survey of virus diseases of cucurbits in Guangxi, China, in June 2016, virus-like symptoms, including mottle and mosaic on leaves in bottle gourd, were observed in a greenhouse in Nanning, and eight symptomatic leaf samples were collected. To identify the causal virus, all samples were tested for 10 known cucurbit-infecting viruses, including cucumber green mottle mosaic virus, tobacco mosaic virus (TMV), zucchini green mottle mosaic virus (ZGMMV), kyuri green mottle mosaic virus, cucumber mosaic virus, zucchini yellow mosaic virus, papaya ringspot virus, watermelon mosaic virus, squash mosaic virus, and watermelon silver mottle virus using double antibody sandwich ELISA (Neogen Europe, U.K.; Agdia, U.S.A.). Five out of eight samples reacted positively against the antisera of ZGMMV and TMV but negatively against antisera of the other viruses. To further confirm the viruses in these samples, total RNA was extracted using Trizol reagent (Tiangen Biotech, China) following the manufacturer’s instructions. The RNA extracts were screened for virus by one-step reverse transcription polymerase chain reaction (RT-PCR) using the PrimeScript One Step RT-PCR Kit (Takara Biotech, China) according to the manufacturer’s instructions, using degenerate primers to ZGMMV (ZG-F, CCGAGCAGATGCGTGTGGTGAC; ZG-R, CCGATCTGCTCGCACGGAATG), which were designed by us according to the partial replicase protein sequence of ZGMMV (GenBank no. NC_003878), and specific primers to TMV (Li et al. 2000). A PCR product of approximately 619 bp was amplified from five diseased plants using ZG-F/ZG-R. No amplification was obtained using the TMV-specific primers with the exception of the positive control. One of the products (Guangxi bottle gourd-1: GXBG1) was cloned into the pMD 19-T Vector (Takara, China) and sequenced in both directions. Sequences were searched in GenBank using the BLASTn tool. All sequences showed 100% identity with ZGMMV-K (GenBank no. AJ295949) and 99% identity with ZGMMV-ZT (GenBank no. AJ252189). The results indicated that GXBG1 was an isolate of ZGMMV, so the isolate was named ZGMMV-GXBG1. To obtain the full length of ZGMMV-GXBG1, five pairs of specific primers were designed and the corresponding fragments amplified, cloned, and sequenced in both directions. The nearly complete genome sequence of ZGMMV-GXBG1 was found to consist of 6,517 nucleotides. A phylogenetic tree was also generated by using MEGA 6.0 software (https://www.megasoftware.net/) with 1,000 bootstrap replicates. The results showed that ZGMMV-GXBG1 clustered together with ZGMMV-ZT and ZGMMV-K with strong bootstrap support (100%). ZGMMV is a distinct species in the genus of Tobamovirus, first reported in Korea (Ryu et al. 2000) and subsequently in Saudi Arabia (Al-Dosary et al. 2012) but not in China. In this research all the data indicate that ZGMMV-GXBG1 is a strain of ZGMMV. To our knowledge, this is the first report of ZGMMV infecting a cucurbitaceous crop in China, and bottle gourd is a new natural host.",2018-11-01 +28968841,AtCircDB: a tissue-specific database for Arabidopsis circular RNAs.,"Circular RNAs are widely existing in eukaryotes. However, there is as yet no tissue-specific Arabidopsis circular RNA database, which hinders the study of circular RNA in plants. Here, we used 622 Arabidopsis RNA sequencing data sets from 87 independent studies hosted at NCBI SRA and developed AtCircDB to systematically identify, store and retrieve circular RNAs. By analyzing back-splicing sites, we characterized 84 685 circular RNAs, 30 648 tissue-specific circular RNAs and 3486 microRNA-circular RNA interactions. In addition, we used a metric (detection score) to measure the detection ability of the circular RNAs using a big-data approach. By experimental validation, we demonstrate that this metric improves the accuracy of the detection algorithm. We also defined the regions hosting enriched circular RNAs as super circular RNA regions. The results suggest that these regions are highly related to alternative splicing and chloroplast. Finally, we developed a comprehensive tissue-specific database (AtCircDB) to help the community store, retrieve, visualize and download Arabidopsis circular RNAs. This database will greatly expand our understanding of circular RNAs and their related regulatory networks. AtCircDB is freely available at http://genome.sdau.edu.cn/circRNA.",2019-01-01 +32621625,LINC00319 promotes osteosarcoma progression by regulating the miR-455-3p/NFIB axis.,"

Background

Numerous studies have shown that aberrant expression of long non-coding RNAs (lncRNAs) is associated with the development and metastasis of osteosarcoma (OS). However, the role and function of LINC00319 with respect to regulating OS progression is unknown. The present study aimed to reveal the function and related mechanism of LINC00319 in OS.

Methods

The expression of LINC00319, miR-455-3p and nuclear factor IB (NFIB) in OS cells and tissues was determined using a reverse transcriptase-polymerase chain reaction (PCR). The sublocalization of LINC00319 was predicted by the lncATLAS database (http://lncatlas.crg.eu) and RNA fluorescence in situ hybridization (FISH) was further performed to detect the subcellular localization of LINC00319. LINC00319, miR-455-3p and NFIB target sites were predicted by StarBase (http://starbase.sysu.edu.cn/index.php) and validated using a dual luciferase reporter gene assay. We subsequently performed LINC00319 gain- and loss-of-function studies to define the role of LINC00319 in OS cell migration.

Results

PCR results showed that lncRNA LINC00319 exhibited high expression in tumor cells and tissue. Moreover, LINC00319 was positioned in the cytoplasm, which was identified by FISH. Knockdown of lncRNA LINC00319/NFIB or overexpression of miR-455-3p blocked the migration of OS cells. In addition, the inhibitory effect of migration with the knockdown of lncRNA LINC00319 was partially blocked by administration of miR-455-3p inhibitor.

Conclusions

lncRNA LINC00319 may promote OS progression by regulating the miR-455-3p/NFIB axis, which probably serves as an innovative potential indicator of prognosis and a target of therapy for OS.",2020-07-28 +33335023,CanRisk Tool-A Web Interface for the Prediction of Breast and Ovarian Cancer Risk and the Likelihood of Carrying Genetic Pathogenic Variants.,"

Background

The CanRisk Tool (https://canrisk.org) is the next-generation web interface for the latest version of the BOADICEA (Breast and Ovarian Analysis of Disease Incidence and Carrier Estimation Algorithm) state-of-the-art risk model and a forthcoming ovarian cancer risk model.

Methods

The tool captures information on family history, rare pathogenic variants in cancer susceptibility genes, polygenic risk scores, lifestyle/hormonal/clinical features, and imaging risk factors to predict breast and ovarian cancer risks and estimate the probabilities of carrying pathogenic variants in certain genes. It was implemented using modern web frameworks, technologies, and web services to make it extensible and increase accessibility to researchers and third-party applications. The design of the graphical user interface was informed by feedback from health care professionals and a formal evaluation.

Results

This freely accessible tool was designed to be user friendly for clinicians and to boost acceptability in clinical settings. The tool incorporates a novel graphical pedigree builder to facilitate collection of the family history data required by risk calculations.

Conclusions

The CanRisk Tool provides health care professionals and researchers with a user-friendly interface to carry out multifactorial breast and ovarian cancer risk predictions. It is the first freely accessible cancer risk prediction program to carry the CE marking.

Impact

There have been over 3,100 account registrations, and 98,000 breast and ovarian cancer risk calculations have been run within the first 9 months of the CanRisk Tool launch.",2020-12-17 +30871473,TADKB: Family classification and a knowledge base of topologically associating domains.,"

Background

Topologically associating domains (TADs) are considered the structural and functional units of the genome. However, there is a lack of an integrated resource for TADs in the literature where researchers can obtain family classifications and detailed information about TADs.

Results

We built an online knowledge base TADKB integrating knowledge for TADs in eleven cell types of human and mouse. For each TAD, TADKB provides the predicted three-dimensional (3D) structures of chromosomes and TADs, and detailed annotations about the protein-coding genes and long non-coding RNAs (lncRNAs) existent in each TAD. Besides the 3D chromosomal structures inferred by population Hi-C, the single-cell haplotype-resolved chromosomal 3D structures of 17 GM12878 cells are also integrated in TADKB. A user can submit query gene/lncRNA ID/sequence to search for the TAD(s) that contain(s) the query gene or lncRNA. We also classified TADs into families. To achieve that, we used the TM-scores between reconstructed 3D structures of TADs as structural similarities and the Pearson's correlation coefficients between the fold enrichment of chromatin states as functional similarities. All of the TADs in one cell type were clustered based on structural and functional similarities respectively using the spectral clustering algorithm with various predefined numbers of clusters. We have compared the overlapping TADs from structural and functional clusters and found that most of the TADs in the functional clusters with depleted chromatin states are clustered into one or two structural clusters. This novel finding indicates a connection between the 3D structures of TADs and their DNA functions in terms of chromatin states.

Conclusion

TADKB is available at http://dna.cs.miami.edu/TADKB/ .",2019-03-14 +33237903,Assessment of deep neural networks for the diagnosis of benign and malignant skin neoplasms in comparison with dermatologists: A retrospective validation study.,"

Background

The diagnostic performance of convolutional neural networks (CNNs) for diagnosing several types of skin neoplasms has been demonstrated as comparable with that of dermatologists using clinical photography. However, the generalizability should be demonstrated using a large-scale external dataset that includes most types of skin neoplasms. In this study, the performance of a neural network algorithm was compared with that of dermatologists in both real-world practice and experimental settings.

Methods and findings

To demonstrate generalizability, the skin cancer detection algorithm (https://rcnn.modelderm.com) developed in our previous study was used without modification. We conducted a retrospective study with all single lesion biopsied cases (43 disorders; 40,331 clinical images from 10,426 cases: 1,222 malignant cases and 9,204 benign cases); mean age (standard deviation [SD], 52.1 [18.3]; 4,701 men [45.1%]) were obtained from the Department of Dermatology, Severance Hospital in Seoul, Korea between January 1, 2008 and March 31, 2019. Using the external validation dataset, the predictions of the algorithm were compared with the clinical diagnoses of 65 attending physicians who had recorded the clinical diagnoses with thorough examinations in real-world practice. In addition, the results obtained by the algorithm for the data of randomly selected batches of 30 patients were compared with those obtained by 44 dermatologists in experimental settings; the dermatologists were only provided with multiple images of each lesion, without clinical information. With regard to the determination of malignancy, the area under the curve (AUC) achieved by the algorithm was 0.863 (95% confidence interval [CI] 0.852-0.875), when unprocessed clinical photographs were used. The sensitivity and specificity of the algorithm at the predefined high-specificity threshold were 62.7% (95% CI 59.9-65.1) and 90.0% (95% CI 89.4-90.6), respectively. Furthermore, the sensitivity and specificity of the first clinical impression of 65 attending physicians were 70.2% and 95.6%, respectively, which were superior to those of the algorithm (McNemar test; p < 0.0001). The positive and negative predictive values of the algorithm were 45.4% (CI 43.7-47.3) and 94.8% (CI 94.4-95.2), respectively, whereas those of the first clinical impression were 68.1% and 96.0%, respectively. In the reader test conducted using images corresponding to batches of 30 patients, the sensitivity and specificity of the algorithm at the predefined threshold were 66.9% (95% CI 57.7-76.0) and 87.4% (95% CI 82.5-92.2), respectively. Furthermore, the sensitivity and specificity derived from the first impression of 44 of the participants were 65.8% (95% CI 55.7-75.9) and 85.7% (95% CI 82.4-88.9), respectively, which are values comparable with those of the algorithm (Wilcoxon signed-rank test; p = 0.607 and 0.097). Limitations of this study include the exclusive use of high-quality clinical photographs taken in hospitals and the lack of ethnic diversity in the study population.

Conclusions

Our algorithm could diagnose skin tumors with nearly the same accuracy as a dermatologist when the diagnosis was performed solely with photographs. However, as a result of limited data relevancy, the performance was inferior to that of actual medical examination. To achieve more accurate predictive diagnoses, clinical information should be integrated with imaging information.",2020-11-25 +32010933,GCdiscrimination: identification of gastric cancer based on a milliliter of blood.,"Gastric cancer (GC) continues to be one of the major causes of cancer deaths worldwide. Meanwhile, liquid biopsies have received extensive attention in the screening and detection of cancer along with better understanding and clinical practice of biomarkers. In this work, 58 routine blood biochemical indices were tentatively used as integrated markers, which further expanded the scope of liquid biopsies and a discrimination system for GC consisting of 17 top-ranked indices, elaborated by random forest method was constructed to assist in preliminary assessment prior to histological and gastroscopic diagnosis based on the test data of a total of 2951 samples. The selected indices are composed of eight routine blood indices (MO%, IG#, IG%, EO%, P-LCR, RDW-SD, HCT and RDW-CV) and nine blood biochemical indices (TP, AMY, GLO, CK, CHO, CK-MB, TG, ALB and γ-GGT). The system presented a robust classification performance, which can quickly distinguish GC from other stomach diseases, different cancers and healthy people with sensitivity, specificity, total accuracy and area under the curve of 0.9067, 0.9216, 0.9138 and 0.9720 for the cross-validation set, respectively. Besides, this system can not only provide an innovative strategy to facilitate rapid and real-time GC identification, but also reveal the remote correlation between GC and these routine blood biochemical parameters, which helped to unravel the hidden association of these parameters with GC and serve as the basis for subsequent studies of the clinical value in prevention program and surveillance management for GC. The identification system, called GC discrimination, is now available online at http://lishuyan.lzu.edu.cn/GC/.",2021-01-01 +30092360,Molecular property diagnostic suite for diabetes mellitus (MPDSDM): An integrated web portal for drug discovery and drug repurposing.,"Molecular Property Diagnostic Suite - Diabetes Mellitus (MPDSDM) is a Galaxy-based, open source disease-specific web portal for diabetes. It consists of three modules namely (i) data library (ii) data processing and (iii) data analysis tools. The data library (target library and literature) module provide extensive and curated information about the genes involved in type 1 and type 2 diabetes onset and progression stage (available at http://www.mpds-diabetes.in). The database also contains information on drug targets, biomarkers, therapeutics and associated genes specific to type 1, and type 2 diabetes. A unique MPDS identification number has been assigned for each gene involved in diabetes mellitus and the corresponding card contains chromosomal data, gene information, protein UniProt ID, functional domains, druggability and related pathway information. One of the objectives of the web portal is to have an open source data repository that contains all information on diabetes and use this information for developing therapeutics to cure diabetes. We also make an attempt for computational drug repurposing for the validated diabetes targets. We performed virtual screening of 1455 FDA approved drugs on selected 20 type 1 and type 2 diabetes proteins using docking protocol and their biological activity was predicted using ""PASS Online"" server (http://www.way2drug.com/passonline) towards anti-diabetic activity, resulted in the identification of 41 drug molecules. Five drug molecules (which are earlier known for anti-malarial/microbial, anti-viral, anti-cancer, anti-pulmonary activities) were proposed to have a better repurposing potential for type 2 anti-diabetic activity and good binding affinity towards type 2 diabetes target proteins.",2018-08-06 +32718162,Development and initial validation of the Eating Pathology Symptoms Inventory-Clinician-Rated Version (EPSI-CRV).,"Proper assessment and diagnosis of eating disorders (EDs) are critical to determine to whom prevention and treatment efforts should be targeted, the extent to which treatment is working, and when an individual has recovered. Although existing ED diagnostic interviews have numerous strengths, they also have certain limitations, including poor internal consistency, low discriminant validity, and poor factor-structure replicability. The purpose of the current study was to address problems of past ED diagnostic interviews through the creation of a new clinician-rated interview-the Eating Pathology Symptoms Inventory-Clinician-Rated Version (EPSI-CRV). The EPSI-CRV was designed to measure dimensional constructs assessed in the self-report version of the EPSI and generate current Diagnostic and Statistical Manual of Mental Disorders (5th ed.; DSM-5; American Psychiatric Association, 2013) diagnoses. Participants were community-recruited adults with a DSM-5 ED (N = 257). Participants completed self-report and interview-based measures of eating, mood, and anxiety disorders and self-report measures of psychiatric impairment. The EPSI-CRV demonstrated evidence for interrater reliability, convergent and discriminant validity, and a good-fitting factor structure. EPSI-CRV dimensions showed concurrent validity for distinguishing among ED diagnoses. Baseline EPSI-CRV dimensions significantly predicted psychiatric impairment at baseline but not at 1-year follow-up. Although some scales had lower internal consistency than ideal, internal consistency values were similar to those of other established diagnostic measures. The EPSI-CRV appears to represent a promising new interview that can be used across a variety of clinical and research settings. Interested readers can access the EPSI-CRV and relevant training materials here: https://kuscholarworks.ku.edu/handle/1808/29616. (PsycInfo Database Record (c) 2020 APA, all rights reserved).",2020-07-27 +32719554,A pan-cancer analysis reveals nonstop extension mutations causing SMAD4 tumour suppressor degradation.,"Nonstop or stop-loss mutations convert a stop into a sense codon, resulting in translation into the 3' untranslated region as a nonstop extension mutation to the next in-frame stop codon or as a readthrough mutation into the poly-A tail. Nonstop mutations have been characterized in hereditary diseases, but not in cancer genetics. In a pan-cancer analysis, we curated and analysed 3,412 nonstop mutations from 62 tumour entities, generating a comprehensive database at http://NonStopDB.dkfz.de. Six different nonstop extension mutations affected the tumour suppressor SMAD4, extending its carboxy terminus by 40 amino acids. These caused rapid degradation of the SMAD4 mutants via the ubiquitin-proteasome system. A hydrophobic degron signal sequence of ten amino acids within the carboxy-terminal extension was required to induce complete loss of the SMAD4 protein. Thus, we discovered that nonstop mutations can be functionally important in cancer and characterize their loss-of-function impact on the tumour suppressor SMAD4.",2020-07-27 +33983910,"Surveillance of Vaccination Coverage Among Adult Populations -United States, 2018.","

Problem/condition

Adults are at risk for illness, hospitalization, disability and, in some cases, death from vaccine-preventable diseases, particularly influenza and pneumococcal disease. CDC recommends vaccinations for adults on the basis of age, health conditions, prior vaccinations, and other considerations. Updated vaccination recommendations from CDC are published annually in the U.S. Adult Immunization Schedule. Despite longstanding recommendations for use of many vaccines, vaccination coverage among U.S. adults remains low.

Reporting period

August 2017-June 2018 (for influenza vaccination) and January-December 2018 (for pneumococcal, herpes zoster, tetanus and diphtheria [Td]/tetanus toxoid, reduced diphtheria toxoid, and acellular pertussis [Tdap], hepatitis A, hepatitis B, and human papillomavirus [HPV] vaccination).

Description of system

The National Health Interview Survey (NHIS) is a continuous, cross-sectional national household survey of the noninstitutionalized U.S. civilian population. In-person interviews are conducted throughout the year in a probability sample of households, and NHIS data are compiled and released annually. NHIS's objective is to monitor the health of the U.S. population and provide estimates of health indicators, health care use and access, and health-related behaviors. Adult receipt of influenza, pneumococcal, herpes zoster, Td/Tdap, hepatitis A, hepatitis B, and at least 1 dose of HPV vaccines was assessed. Estimates were derived for a new composite adult vaccination quality measure and by selected demographic and access-to-care characteristics (e.g., age, race/ethnicity, indication for vaccination, travel history [travel to countries where hepatitis infections are endemic], health insurance status, contacts with physicians, nativity, and citizenship). Trends in adult vaccination were assessed during 2010-2018.

Results

Coverage for the adult age-appropriate composite measure was low in all age groups. Racial and ethnic differences in coverage persisted for all vaccinations, with lower coverage for most vaccinations among non-White compared with non-Hispanic White adults. Linear trend tests indicated coverage increased from 2010 to 2018 for most vaccines in this report. Few adults aged ≥19 years had received all age-appropriate vaccines, including influenza vaccination, regardless of whether inclusion of Tdap (13.5%) or inclusion of any tetanus toxoid-containing vaccine (20.2%) receipt was measured. Coverage among adults for influenza vaccination during the 2017-18 season (46.1%) was similar to the estimate for the 2016-17 season (45.4%), and coverage for pneumococcal (adults aged ≥65 years [69.0%]), herpes zoster (adults aged ≥50 years and aged ≥60 years [24.1% and 34.5%, respectively]), tetanus (adults aged ≥19 years [62.9%]), Tdap (adults aged ≥19 years [31.2%]), hepatitis A (adults aged ≥19 years [11.9%]), and HPV (females aged 19-26 years [52.8%]) vaccination in 2018 were similar to the estimates for 2017. Hepatitis B vaccination coverage among adults aged ≥19 years and health care personnel (HCP) aged ≥19 years increased 4.2 and 6.7 percentage points to 30.0% and 67.2%, respectively, from 2017. HPV vaccination coverage among males aged 19-26 years increased 5.2 percentage points to 26.3% from the 2017 estimate. Overall, HPV vaccination coverage among females aged 19-26 years did not increase, but coverage among Hispanic females aged 19-26 years increased 10.8 percentage points to 49.6% from the 2017 estimate. Coverage for the following vaccines was lower among adults without health insurance compared with those with health insurance: influenza vaccine (among adults aged ≥19 years, 19-49 years, and 50-64 years), pneumococcal vaccine (among adults aged 19-64 years at increased risk), Td vaccine (among all age groups), Tdap vaccine (among adults aged ≥19 years and 19-64 years), hepatitis A vaccine (among adults aged ≥19 years overall and among travelers aged ≥19 years), hepatitis B vaccine (among adults aged ≥19 years and 19-49 years and among travelers aged ≥19 years), herpes zoster vaccine (among adults aged ≥60 years), and HPV vaccine (among males and females aged 19-26 years). Adults who reported having a usual place for health care generally reported receipt of recommended vaccinations more often than those who did not have such a place, regardless of whether they had health insurance. Vaccination coverage was higher among adults reporting ≥1 physician contact during the preceding year compared with those who had not visited a physician during the preceding year, regardless of whether they had health insurance. Even among adults who had health insurance and ≥10 physician contacts during the preceding year, depending on the vaccine, 20.1%-87.5% reported not having received vaccinations that were recommended either for all persons or for those with specific indications. Overall, vaccination coverage among U.S.-born adults was significantly higher than that of foreign-born adults, including influenza vaccination (aged ≥19 years), pneumococcal vaccination (all ages), tetanus vaccination (all ages), Tdap vaccination (all ages), hepatitis B vaccination (aged ≥19 years and 19-49 years and travelers aged ≥19 years), herpes zoster vaccination (all ages), and HPV vaccination among females aged 19-26 years. Vaccination coverage also varied by citizenship status and years living in the United States.

Interpretation

NHIS data indicate that many adults remain unprotected against vaccine-preventable diseases. Coverage for the adult age-appropriate composite measures was low in all age groups. Individual adult vaccination coverage remained low as well, but modest gains occurred in vaccination coverage for hepatitis B (among adults aged ≥19 years and HCP aged ≥19 years), and HPV (among males aged 19-26 years and Hispanic females aged 19-26 years). Coverage for other vaccines and groups with Advisory Committee on Immunization Practices vaccination indications did not improve from 2017. Although HPV vaccination coverage among males aged 19-26 years and Hispanic females aged 19-26 years increased, approximately 50% of females aged 19-26 years and 70% of males aged 19-26 years remained unvaccinated. Racial/ethnic vaccination differences persisted for routinely recommended adult vaccines. Having health insurance coverage, having a usual place for health care, and having ≥1 physician contacts during the preceding 12 months were associated with higher vaccination coverage; however, these factors alone were not associated with optimal adult vaccination coverage, and findings indicate missed opportunities to vaccinate remained.

Public health actions

Substantial improvement in adult vaccination uptake is needed to reduce the burden of vaccine-preventable diseases. Following the Standards for Adult Immunization Practice (https://www.cdc.gov/vaccines/hcp/adults/for-practice/standards/index.html), all providers should routinely assess adults' vaccination status at every clinical encounter, strongly recommend appropriate vaccines, either offer needed vaccines or refer their patients to another provider who can administer the needed vaccines, and document vaccinations received by their patients in an immunization information system.",2021-05-14 +33509941,Targeting IGF Perturbs Global Replication through Ribonucleotide Reductase Dysfunction.,"Inhibition of IGF receptor (IGF1R) delays repair of radiation-induced DNA double-strand breaks (DSB), prompting us to investigate whether IGF1R influences endogenous DNA damage. Here we demonstrate that IGF1R inhibition generates endogenous DNA lesions protected by 53BP1 bodies, indicating under-replicated DNA. In cancer cells, inhibition or depletion of IGF1R delayed replication fork progression accompanied by activation of ATR-CHK1 signaling and the intra-S-phase checkpoint. This phenotype reflected unanticipated regulation of global replication by IGF1 mediated via AKT, MEK/ERK, and JUN to influence expression of ribonucleotide reductase (RNR) subunit RRM2. Consequently, inhibition or depletion of IGF1R downregulated RRM2, compromising RNR function and perturbing dNTP supply. The resulting delay in fork progression and hallmarks of replication stress were rescued by RRM2 overexpression, confirming RRM2 as the critical factor through which IGF1 regulates replication. Suspecting existence of a backup pathway protecting from toxic sequelae of replication stress, targeted compound screens in breast cancer cells identified synergy between IGF inhibition and ATM loss. Reciprocal screens of ATM-proficient/deficient fibroblasts identified an IGF1R inhibitor as the top hit. IGF inhibition selectively compromised growth of ATM-null cells and spheroids and caused regression of ATM-null xenografts. This synthetic-lethal effect reflected conversion of single-stranded lesions in IGF-inhibited cells into toxic DSBs upon ATM inhibition. Overall, these data implicate IGF1R in alleviating replication stress, and the reciprocal IGF:ATM codependence we identify provides an approach to exploit this effect in ATM-deficient cancers. SIGNIFICANCE: This study identifies regulation of ribonucleotide reductase function and dNTP supply by IGFs and demonstrates that IGF axis blockade induces replication stress and reciprocal codependence on ATM. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/8/2128/F1.large.jpg.",2021-01-28 +33792437,Defragmenting the Day: The Effect of Full-Day Continuity Clinics on Continuity of Care and Perceptions of Clinic.,"

Problem

Traditional half-day continuity clinics within primary care residency programs require residents to split time between their assigned clinical rotation and continuity clinic, which can have detrimental effects on resident experiences and patient care within continuity clinics. Most previous efforts to separate inpatient and outpatient obligations have employed block scheduling models, which entail significant rearrangements to clinical rotations, team structures, and didactic education and have yielded mixed effects on continuity of care. A full-day continuity clinic schedule within a traditional, non-block rotation framework holds potential to de-conflict resident schedules without the logistical rearrangements required to adopt block scheduling models, but no literature has described the effect of such full-day continuity clinics on continuity of care or resident experiences within continuity clinic.

Intervention

A pediatric residency program implemented full-day continuity clinics within a traditional rotation framework. We examined the change in continuity for physician (PHY) measure in the six months prior to versus the six months following the switch, as well as changes in how often residents saw clinic patients in follow-up and personally followed up clinic laboratory and radiology results, which we term episodic follow-up. Resident and attending perceptions of full-day continuity clinics were measured using a survey administered 5-7 months after the switch.

Context

The switch to full-day continuity clinics occurred in January 2018 within the Wright State University/Wright-Patterson Medical Center Pediatric Residency Program. The program has 46 residents who are assigned to one of two continuity clinic sites, each of which implemented the full-day continuity clinics simultaneously.

Outcome

The PHY for residents at one clinic decreased slightly from 18.0% to 13.6% (p<.001) with full-day continuity clinics but was unchanged at another clinic [60.6% vs 59.5%, p=.86]. Measures of episodic follow-up were unchanged. Residents (32/46 = 77% responding) and attendings (6/8 = 75% responding) indicated full-day continuity clinics improved residents' balance of inpatient and outpatient obligations, preparation for clinic, continuity relationships with patients, and clinic satisfaction.

Lessons learned

Full-day continuity clinics within a traditional rotation framework had mixed effects on continuity of care but improved residents' experiences within clinic. This model offers a viable alternative to block scheduling models for primary care residency programs wishing to defragment resident schedules.Supplemental data for this article is available online at https://doi.org/10.1080/10401334.2021.1879652.",2021-04-01 +32365179,New Methods to Calculate Concordance Factors for Phylogenomic Datasets.,"We implement two measures for quantifying genealogical concordance in phylogenomic data sets: the gene concordance factor (gCF) and the novel site concordance factor (sCF). For every branch of a reference tree, gCF is defined as the percentage of ""decisive"" gene trees containing that branch. This measure is already in wide usage, but here we introduce a package that calculates it while accounting for variable taxon coverage among gene trees. sCF is a new measure defined as the percentage of decisive sites supporting a branch in the reference tree. gCF and sCF complement classical measures of branch support in phylogenetics by providing a full description of underlying disagreement among loci and sites. An easy to use implementation and tutorial is freely available in the IQ-TREE software package (http://www.iqtree.org/doc/Concordance-Factor, last accessed May 13, 2020).",2020-09-01 +36419961,A global compilation of in situ aquatic high spectral resolution inherent and apparent optical property data for remote sensing applications.,"Light emerging from natural water bodies and measured by radiometers contains information about the local type and concentrations of phytoplankton, non-algal particles and colored dissolved organic matter in the underlying waters. An increase in spectral resolution in forthcoming satellite and airborne remote sensing missions is expected to lead to new or improved capabilities for characterizing aquatic ecosystems. Such upcoming missions include NASA's Plankton, Aerosol, Cloud, ocean Ecosystem (PACE) mission; the NASA Surface Biology and Geology designated observable mission; and NASA Airborne Visible/Infrared Imaging Spectrometer - Next Generation (AVIRIS-NG) airborne missions. In anticipation of these missions, we present an organized dataset of geographically diverse, quality-controlled, high spectral resolution inherent and apparent optical property (IOP-AOP) aquatic data. The data are intended to be of use to increase our understanding of aquatic optical properties, to develop aquatic remote sensing data product algorithms, and to perform calibration and validation activities for forthcoming aquatic-focused imaging spectrometry missions. The dataset is comprised of contributions from several investigators and investigating teams collected over a range of geographic areas and water types, including inland waters, estuaries, and oceans. Specific in situ measurements include remote-sensing reflectance, irradiance reflectance, and coefficients describing particulate absorption, particulate attenuation, non-algal particulate absorption, colored dissolved organic matter absorption, phytoplankton absorption, total absorption, total attenuation, particulate backscattering, and total backscattering. The dataset can be downloaded from https://doi.org/10.1594/PANGAEA.902230 (Casey et al., 2019).",2020-05-01 +25861964,Cellular phenotype database: a repository for systems microscopy data.,"

Motivation

The Cellular Phenotype Database (CPD) is a repository for data derived from high-throughput systems microscopy studies. The aims of this resource are: (i) to provide easy access to cellular phenotype and molecular localization data for the broader research community; (ii) to facilitate integration of independent phenotypic studies by means of data aggregation techniques, including use of an ontology and (iii) to facilitate development of analytical methods in this field.

Results

In this article we present CPD, its data structure and user interface, propose a minimal set of information describing RNA interference experiments, and suggest a generic schema for management and aggregation of outputs from phenotypic or molecular localization experiments. The database has a flexible structure for management of data from heterogeneous sources of systems microscopy experimental outputs generated by a variety of protocols and technologies and can be queried by gene, reagent, gene attribute, study keywords, phenotype or ontology terms.

Availability and implementation

CPD is developed as part of the Systems Microscopy Network of Excellence and is accessible at http://www.ebi.ac.uk/fg/sym.

Contact

jes@ebi.ac.uk or ugis@ebi.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-09 +29949583,A Web Geographic Information System to share data and explorative analysis tools: The application to West Nile disease in the Mediterranean basin.,"

Background

In the last decades an increasing number of West Nile Disease cases was observed in equines and humans in the Mediterranean basin and surveillance systems are set up in numerous countries to manage and control the disease. The collection, storage and distribution of information on the spread of the disease becomes important for a shared intervention and control strategy. To this end, a Web Geographic Information System has been developed and disease data, climatic and environmental remote sensed data, full genome sequences of selected isolated strains are made available. This paper describes the Disease Monitoring Dashboard (DMD) web system application, the tools available for the preliminary analysis on climatic and environmental factors and the other interactive tools for epidemiological analysis.

Methods

WNV occurrence data are collected from multiple official and unofficial sources. Whole genome sequences and metadata of WNV strains are retrieved from public databases or generated in the framework of the Italian surveillance activities. Climatic and environmental data are provided by NASA website. The Geographical Information System is composed by Oracle 10g Database and ESRI ArcGIS Server 10.03; the web mapping client application is developed with the ArcGIS API for Javascript and Phylocanvas library to facilitate and optimize the mash-up approach. ESRI ArcSDE 10.1 has been used to store spatial data.

Results

The DMD application is accessible through a generic web browser at https://netmed.izs.it/networkMediterraneo/. The system collects data through on-line forms and automated procedures and visualizes data as interactive graphs, maps and tables. The spatial and temporal dynamic visualization of disease events is managed by a time slider that returns results on both map and epidemiological curve. Climatic and environmental data can be associated to cases through python procedures and downloaded as Excel files.

Conclusions

The system compiles multiple datasets through user-friendly web tools; it integrates entomological, veterinary and human surveillance, molecular information on pathogens and environmental and climatic data. The principal result of the DMD development is the transfer and dissemination of knowledge and technologies to develop strategies for integrated prevention and control measures of animal and human diseases.",2018-06-27 +32849823,Genome-Scale Metabolic Model of Xanthomonas phaseoli pv. manihotis: An Approach to Elucidate Pathogenicity at the Metabolic Level.,"Xanthomonas phaseoli pv. manihotis (Xpm) is the causal agent of cassava bacterial blight, the most important bacterial disease in this crop. There is a paucity of knowledge about the metabolism of Xanthomonas and its relevance in the pathogenic process, with the exception of the elucidation of the xanthan biosynthesis route. Here we report the reconstruction of the genome-scale model of Xpm metabolism and the insights it provides into plant-pathogen interactions. The model, iXpm1556, displayed 1,556 reactions, 1,527 compounds, and 890 genes. Metabolic maps of central amino acid and carbohydrate metabolism, as well as xanthan biosynthesis of Xpm, were reconstructed using Escher (https://escher.github.io/) to guide the curation process and for further analyses. The model was constrained using the RNA-seq data of a mutant of Xpm for quorum sensing (QS), and these data were used to construct context-specific models (CSMs) of the metabolism of the two strains (wild type and QS mutant). The CSMs and flux balance analysis were used to get insights into pathogenicity, xanthan biosynthesis, and QS mechanisms. Between the CSMs, 653 reactions were shared; unique reactions belong to purine, pyrimidine, and amino acid metabolism. Alternative objective functions were used to demonstrate a trade-off between xanthan biosynthesis and growth and the re-allocation of resources in the process of biosynthesis. Important features altered by QS included carbohydrate metabolism, NAD(P)+ balance, and fatty acid elongation. In this work, we modeled the xanthan biosynthesis and the QS process and their impact on the metabolism of the bacterium. This model will be useful for researchers studying host-pathogen interactions and will provide insights into the mechanisms of infection used by this and other Xanthomonas species.",2020-08-11 +25964630,PsyGeNET: a knowledge platform on psychiatric disorders and their genes.,"

Unlabelled

PsyGeNET (Psychiatric disorders and Genes association NETwork) is a knowledge platform for the exploratory analysis of psychiatric diseases and their associated genes. PsyGeNET is composed of a database and a web interface supporting data search, visualization, filtering and sharing. PsyGeNET integrates information from DisGeNET and data extracted from the literature by text mining, which has been curated by domain experts. It currently contains 2642 associations between 1271 genes and 37 psychiatric disease concepts. In its first release, PsyGeNET is focused on three psychiatric disorders: major depression, alcohol and cocaine use disorders. PsyGeNET represents a comprehensive, open access resource for the analysis of the molecular mechanisms underpinning psychiatric disorders and their comorbidities.

Availability and implementation

The PysGeNET platform is freely available at http://www.psygenet.org/. The PsyGeNET database is made available under the Open Database License (http://opendatacommons.org/licenses/odbl/1.0/).

Contact

lfurlong@imim.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-11 +32048279,Interpreting functional analysis outcomes using automated nonparametric statistical analysis.,"Current methods employed to interpret functional analysis data include visual analysis and post-hoc visual inspection (PHVI). However, these methods may be biased by dataset complexity, hand calculations, and rater experience. We examined whether an automated approach using nonparametric rank-based statistics could increase the accuracy and efficiency of functional analysis data interpretation. We applied Automated Nonparametric Statistical Analysis (ANSA) to a sample of 65 published functional analyses for which additional experimental evidence was available to verify behavior function. Results showed that exact behavior function agreement between ANSA and the publications authors was 83.1%, exact agreement between ANSA and PHVI was 75.4%, and exact agreement across all 3 methods was 64.6%. These preliminary findings suggest that ANSA has the potential to support the data interpretation process. A web application that incorporates the calculations and rules utilized by ANSA is accessible at https://ansa.shinyapps.io/ansa/.",2020-02-12 +32715338,"Estimation of E-waste Generation, Residential Behavior, and Disposal Practices from Major Governorates in Jordan.","Estimating the generation of e-waste in governorates is critically needed for sustainable and environmentally sound e-waste management in Jordan. The main objectives of the present study are to quantify and evaluate the annual e-waste generation in all governorates in Jordan and disposal practices. The present study comprises the information of e-waste as classified by the European Union Directive including six main categories (16 United Nations University key items). The survey targeted 15,883 households (12.52% females and 87.48% males), where primary data on e-waste generation and disposal methods were gathered, assessed, and quantified. Subsequently, the survey-based data collected from the study sample have been extrapolated to quantify an e-waste generation inventory for Jordan and the disposal methods using ArcGIS mapping. The study-extrapolated findings reveal that ~8,735,187 e-waste items (13 ktons) had been turned into e-waste and discarded by all households in 2018 in the 12 governorates in Jordan. Moreover, dumping of e-waste is still the dominant disposal method practiced by 58.4% of households in Jordan. The other disposal practices showed that granting of the waste EEE to others has the share of 16.6%; selling (10.7%); delivering the waste EEE for environmentally sound recycling (6.8%); and others practices represented 7.4%. Furthermore, the present study has played a vital role in e-waste awareness dissemination since the findings of the present study have been modeled and shown online by the Department of Statistics, Jordan through the link ( https://arcg.is/1KzvjO ). Finally, the challenges, barriers, and prospects of e-waste management in Jordan have been explored in the present study.",2020-07-26 +26464443,RMBase: a resource for decoding the landscape of RNA modifications from high-throughput sequencing data.,"Although more than 100 different types of RNA modifications have been characterized across all living organisms, surprisingly little is known about the modified positions and their functions. Recently, various high-throughput modification sequencing methods have been developed to identify diverse post-transcriptional modifications of RNA molecules. In this study, we developed a novel resource, RMBase (RNA Modification Base, http://mirlab.sysu.edu.cn/rmbase/), to decode the genome-wide landscape of RNA modifications identified from high-throughput modification data generated by 18 independent studies. The current release of RMBase includes ∼ 9500 pseudouridine (Ψ) modifications generated from Pseudo-seq and CeU-seq sequencing data, ∼ 1000 5-methylcytosines (m(5)C) predicted from Aza-IP data, ∼ 124 200 N6-Methyladenosine (m(6)A) modifications discovered from m(6)A-seq and ∼ 1210 2'-O-methylations (2'-O-Me) identified from RiboMeth-seq data and public resources. Moreover, RMBase provides a comprehensive listing of other experimentally supported types of RNA modifications by integrating various resources. It provides web interfaces to show thousands of relationships between RNA modification sites and microRNA target sites. It can also be used to illustrate the disease-related SNPs residing in the modification sites/regions. RMBase provides a genome browser and a web-based modTool to query, annotate and visualize various RNA modifications. This database will help expand our understanding of potential functions of RNA modifications.",2015-10-12 +31114314,Increased expression of TMED2 is an unfavorable prognostic factor in patients with breast cancer.,"Background: We obtained 2 types of clones which were termed SC (sphere-shaped clone) and NSC (non-sphere-shaped clone) from 4T1 cells by monoclonal culture. SC and NSC were distinct in morphology, surface marker, metabolism and proliferation rate. With the transcriptome sequencing data analysis, we found TMED2 expressed higher in SCs. TMED2 was a member of the transmembrane emp24 domain and might play roles in cancer cell proliferation. However, its prognostic roles in breast cancer remained unknown. We aimed to investigate the prognostic values of TMED2 in patients with breast cancer. Methods: We used UALCAN (http://ualcan.path.uab.edu) and the Human Protein Atlas (www.proteinatlas.org) to explore the TMED2 expression level and DNA methylation data between breast cancer and normal breast tissue. With Oncomine (www.oncomine.org), we investigated the copy number of TMED2 in breast cancer sample and normal breast tissue. We used the Kaplan-Meier Plotter database (http://kmplot.com/analysis) to analyze prognostic values of TMED2 mRNA expression in all breast cancers and in different intrinsic subtypes. Moreover, protein expression levels of TMED2 were confirmed by Western blot in breast cancer tissues and normal mammary tissue as well as SCs and NSCs. Results: TMED2 significantly upregulated in breast cancer patients compared to normal mammary samples. Meanwhile, the increased expression of TMED2 mRNA was closely associated with reduced overall survival (OS) in all breast cancers, and with reduced OS in patients with ER-positive, Luminal A or Luminal B breast cancer subtypes. Moreover, western blot confirmed that TMED2 increased expressed was correlated with the reduced OS at protein levels. Conclusion: Increased expression of TMED2 was significantly related to unfavorable outcomes in patients with breast cancer. Thus, we supposed TMED2 is oncogenic and a potential target for breast cancer therapy and these preliminary findings require further study to determine whether TMED2-targeting reagents might be developed for clinical application in breast cancer.",2019-03-18 +30439869,"Suicide Rates by Major Occupational Group - 17 States, 2012 and 2015.","During 2000-2016, the suicide rate among the U.S. working age population (persons aged 16-64 years) increased 34%, from 12.9 per 100,000 population to 17.3 (https://www.cdc.gov/injury/wisqars). To better understand suicide among different occupational groups and inform suicide prevention efforts, CDC analyzed suicide deaths by Standard Occupational Classification (SOC) major groups for decedents aged 16-64 years from the 17 states participating in both the 2012 and 2015 National Violent Death Reporting System (NVDRS) (https://www.cdc.gov/violenceprevention/nvdrs). The occupational group with the highest male suicide rate in 2012 and 2015 was Construction and Extraction (43.6 and 53.2 per 100,000 civilian noninstitutionalized working persons, respectively), whereas the group with the highest female suicide rate was Arts, Design, Entertainment, Sports, and Media (11.7 [2012] and 15.6 [2015]). The largest suicide rate increase among males from 2012 to 2015 (47%) occurred in the Arts, Design, Entertainment, Sports, and Media occupational group (26.9 to 39.7) and among females, in the Food Preparation and Serving Related group, from 6.1 to 9.4 (54%). CDC's technical package of strategies to prevent suicide is a resource for communities, including workplace settings (1).",2018-11-16 +33787320,"Assessing the Distribution of Air Pollution Health Risks within Cities: A Neighborhood-Scale Analysis Leveraging High-Resolution Data Sets in the Bay Area, California.","

Background

Air pollution-attributable disease burdens reported at global, country, state, or county levels mask potential smaller-scale geographic heterogeneity driven by variation in pollution levels and disease rates. Capturing within-city variation in air pollution health impacts is now possible with high-resolution pollutant concentrations.

Objectives

We quantified neighborhood-level variation in air pollution health risks, comparing results from highly spatially resolved pollutant and disease rate data sets available for the Bay Area, California.

Methods

We estimated mortality and morbidity attributable to nitrogen dioxide (NO2), black carbon (BC), and fine particulate matter [PM ≤2.5μm in aerodynamic diameter (PM2.5)] using epidemiologically derived health impact functions. We compared geographic distributions of pollution-attributable risk estimates using concentrations from a) mobile monitoring of NO2 and BC; and b) models predicting annual NO2, BC and PM2.5 concentrations from land-use variables and satellite observations. We also compared results using county vs. census block group (CBG) disease rates.

Results

Estimated pollution-attributable deaths per 100,000 people at the 100-m grid-cell level ranged across the Bay Area by a factor of 38, 4, and 5 for NO2 [mean=30 (95% CI: 9, 50)], BC [mean=2 (95% CI: 1, 2)], and PM2.5, [mean=49 (95% CI: 33, 64)]. Applying concentrations from mobile monitoring and land-use regression (LUR) models in Oakland neighborhoods yielded similar spatial patterns of estimated grid-cell-level NO2-attributable mortality rates. Mobile monitoring concentrations captured more heterogeneity [mobile monitoring mean=64 (95% CI: 19, 107) deaths per 100,000 people; LUR mean=101 (95% CI: 30, 167)]. Using CBG-level disease rates instead of county-level disease rates resulted in 15% larger attributable mortality rates for both NO2 and PM2.5, with more spatial heterogeneity at the grid-cell-level [NO2 CBG mean=41 deaths per 100,000 people (95% CI: 12, 68); NO2 county mean=38 (95% CI: 11, 64); PM2.5 CBG mean=59 (95% CI: 40, 77); and PM2.5 county mean=55 (95% CI: 37, 71)].

Discussion

Air pollutant-attributable health burdens varied substantially between neighborhoods, driven by spatial variation in pollutant concentrations and disease rates. https://doi.org/10.1289/EHP7679.",2021-03-31 +31704288,The incidence and relative risk of PD-1/PD-L1 inhibitors-related colitis in non-small cell lung cancer: A meta-analysis of randomized controlled trials.,"BACKGROUND:The programmed cell death-1 (PD-1)/programmed cell death ligand-1 (PD-L1) inhibitors have shown encouraging merits in non-small cell lung cancer (NSCLC) patients, however, they are often related to potentially fatal immune-related adverse events (irAEs) including colitis. Considering the incidence and characteristics of immune-related colitis may have significant implications for the appropriate utilization of PD-1/PD-L1 inhibitors in clinical practice, we conduct this meta to systematically analyze the correlation between PD-1/PD-L1 inhibitors for the treatment of NSCLC and the incidence of immune-associated colitis. METHODS:Electronic databases including PubMed, Embase, Cochrane Library and ClinicalTrials.gov (http://clinicaltrials.gov/) were searched up to May 2019, clinical trials reporting all grade (1-5), higher grade (3-5) colitis and grade 3-5 diarrhea were included, data were expressed as relative risk (RR), incidence, corresponding p value and 95% confidence intervals (CIs). RESULTS:9 randomized controlled trials (RCTs) were identified (7 with PD-1 inhibitors [n = 4526]) and 2 with PD-L1 inhibitors [n = 1464]). The overall incidence of PD-1/PD-L1 target agents was 1.40% for all grade colitis, 0.89% for severe colitis, 11.62% for all grade diarrhea and 1.36% for severe diarrhea. Compared with chemotherapy group, the PD-1/PD-L1 inhibitors had a significantly higher risk of all grade (RR: 3.68, p < 0.001) and high-grade (RR: 2.97, p = 0.01) colitis. Additional analysis of relative risk of diarrhea revealed that PD-1/PD-L1 treatment moderately reduce the risk of all grade diarrhea (RR: 0.64, p = 0.03), while the difference was not statistically significant in the risk of grade 3-5 diarrhea (RR: 0.83, p = 0.64). Subgroup analyses showed that the RR of all grade and higher grade colitis in PD-1 inhibitors was more significant (RR: 3.56, p = 0.001 vs RR: 2.98, p = 0.02 respectively). However, there was no appreciable difference in PD-L1 inhibitors (RR: 4.75, p = 0.15 vs RR: 2.85, p = 0.52 respectively). When compared with first-line therapy, second-line therapy associated with a higher risk of all grade colitis than first-line therapy (RR: 3.29, p = 0.006; RR: 4.69, p = 0.026). CONCLUSION:Our meta-analysis indicates when compared with control group, the PD-1/PD-L1 inhibitors may lead to a higher risk of all grade and high grade immune-mediated colitis, but may result in a reduction in all grade diarrhea. PD-1 inhibitors in NSCLC patients, but not PD-L1 inhibitors, increase the risk of all- and high grade colitis. These results suggest that clinicians shall pay more attention to this rare but life-threatening toxic effect.",2019-11-06 +31056636,HiCNN: a very deep convolutional neural network to better enhance the resolution of Hi-C data.,"

Motivation

High-resolution Hi-C data are indispensable for the studies of three-dimensional (3D) genome organization at kilobase level. However, generating high-resolution Hi-C data (e.g. 5 kb) by conducting Hi-C experiments needs millions of mammalian cells, which may eventually generate billions of paired-end reads with a high sequencing cost. Therefore, it will be important and helpful if we can enhance the resolutions of Hi-C data by computational methods.

Results

We developed a new computational method named HiCNN that used a 54-layer very deep convolutional neural network to enhance the resolutions of Hi-C data. The network contains both global and local residual learning with multiple speedup techniques included resulting in fast convergence. We used mean squared errors and Pearson's correlation coefficients between real high-resolution and computationally predicted high-resolution Hi-C data to evaluate the method. The evaluation results show that HiCNN consistently outperforms HiCPlus, the only existing tool in the literature, when training and testing data are extracted from the same cell type (i.e. GM12878) and from two different cell types in the same or different species (i.e. GM12878 as training with K562 as testing, and GM12878 as training with CH12-LX as testing). We further found that the HiCNN-enhanced high-resolution Hi-C data are more consistent with real experimental high-resolution Hi-C data than HiCPlus-enhanced data in terms of indicating statistically significant interactions. Moreover, HiCNN can efficiently enhance low-resolution Hi-C data, which eventually helps recover two chromatin loops that were confirmed by 3D-FISH.

Availability and implementation

HiCNN is freely available at http://dna.cs.miami.edu/HiCNN/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +28505334,DIRECTION: a machine learning framework for predicting and characterizing DNA methylation and hydroxymethylation in mammalian genomes.,"

Motivation

5-Methylcytosine and 5-Hydroxymethylcytosine in DNA are major epigenetic modifications known to significantly alter mammalian gene expression. High-throughput assays to detect these modifications are expensive, labor-intensive, unfeasible in some contexts and leave a portion of the genome unqueried. Hence, we devised a novel, supervised, integrative learning framework to perform whole-genome methylation and hydroxymethylation predictions in CpG dinucleotides. Our framework can also perform imputation of missing or low quality data in existing sequencing datasets. Additionally, we developed infrastructure to perform in silico, high-throughput hypotheses testing on such predicted methylation or hydroxymethylation maps.

Results

We test our approach on H1 human embryonic stem cells and H1-derived neural progenitor cells. Our predictive model is comparable in accuracy to other state-of-the-art DNA methylation prediction algorithms. We are the first to predict hydroxymethylation in silico with high whole-genome accuracy, paving the way for large-scale reconstruction of hydroxymethylation maps in mammalian model systems. We designed a novel, beam-search driven feature selection algorithm to identify the most discriminative predictor variables, and developed a platform for performing integrative analysis and reconstruction of the epigenome. Our toolkit DIRECTION provides predictions at single nucleotide resolution and identifies relevant features based on resource availability. This offers enhanced biological interpretability of results potentially leading to a better understanding of epigenetic gene regulation.

Availability and implementation

http://www.pradiptaray.com/direction, under CC-by-SA license.

Contacts

pradiptaray@gmail.com or mchen@utdallas.edu or michael.zhang@utdallas.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +32090162,"Seismic data of a rockslide: Evaluation of noise levels, site effects, frequency content and identification of seismic phases.","Seismic data can provide information to deduce the occurrence of mass movement events, their release time, event location and dynamics characterization [1]. Nevertheless, the effect of local site amplifications, the level of seismic noise and the frequency content of the signals are important constraints to correctly identify and describe these types of events. In this article we provide data on: site effects, power spectral densities, polarization particle motion and spectrograms generated by a rockslide (∼450 m3) (hereinafter NR) recorded in two permanent seismic stations (EPOB and POBL) located ∼10 km from the source. Original data are available through the International Federation of Digital Seismograph Networks (FDSN, http://www.fdsn.org) for POBL and on request from Instituto Geográfico Nacional (IGN, http://www.ign.es) for EPOB. POBL and EPOB site effects analysis by means of Horizontal-to-Vertical spectral ratio (H/V) technique shows important signatures in POBL signal between 1 and 10 Hz, indicating strong amplification effects at these frequencies, not present in EPOB. For frequencies >1 Hz, Power Spectral Densities (PSD) are higher in POBL than in EPOB, indicating that POBL is noisier than EPOB. Based on the H/V and PSD analyzes, the EPOB station data was deemed preferable over the POBL, to conduct the research presented in the related article [1]. Particle polarization motion data enabled the identification of the arrivals of P, S, and superficial waves, confirming that Pg waves were correctly identified, providing necessary information for the event location in the research article [1]. Moreover, EPOB and POBL spectrograms together with the Fourier transform are included to analyze their content in the frequency domain showing that the expected high frequency phenomenon of the rockslide recorded at 10 km is attenuated and only the low frequency content between 1 and 15 Hz is recorded.",2020-02-07 +35784417,Age estimation of individuals aged 5-23 years based on dental development of the Indonesian population.,"Dental development can be used to estimate age for forensic purposes. However, most of the currently available methods are less reliable for the Indonesian population due to population variability. This study presents a new method and evaluates other methods that utilize dental development to estimate the age of Indonesian people. Panoramic radiographs of 304 young Indonesian people aged 5-23 years old were analysed for deciduous tooth root resorption, permanent tooth calcification, and eruption. The extent of tooth root resorption was determined based on AlQahtani's modified Moorrees et al. method. Tooth calcification was classified based on a modified Demirjian et al. method. Tooth eruption was evaluated based on AlQahtani's modified Bengston system. The sequence of tooth root resorption, and permanent tooth calcification and eruption were grouped into 19 age categories (from 5-23 years old) in an atlas. The differences between males and females, between maxillary and mandibular teeth, and between right and left teeth were also analysed. There were minimal significant differences of tooth development between males and females, and between the right and left teeth (P > 0.05), while the maxillary and mandibular dental development was significantly different (P < 0.05). The newly developed atlas showed the development of the right side of maxillary and mandibular tooth of combined sex of Indonesian population. Another 34 panoramic radiographs of known-age and sex individuals from Indonesia were assessed using the newly developed Atlas of Dental Development in the Indonesian Population, Ubelaker's Dental Development Chart, The London Atlas of Human Tooth Development and Eruption by AlQahtani, and the Age Estimation Guide-Modern Australia population by Blenkin-Taylor. Accuracy was assessed by comparing estimated age to actual chronological age using the Bland-Altmand test. Results show that the smallest range of error was found in the Atlas of Dental Development in the Indonesian Population (-0.969 to 1.210 years), followed by The London Atlas of Human Tooth Development and Eruption by AlQahtani (-2.013 to 1.990 years), the Age Estimation Guide-Modern Australia population by Blenkin-Taylor (-2.495 to 2.598 years), and the Dental Development Chart by Ubelaker (-2.960 to 3.289 years). These findings show that the Atlas of Dental Development constructed in this study performs better than the other three methods and presents greater accuracy of age estimation in the Indonesian population.Key pointsDental development such as deciduous tooth root resorption, permanent tooth calcification, and tooth eruption can be used to estimate age for forensic purposes.The development of the teeth are influenced by genetic, ethnicity, and sex, therefore an age estimation method must be constructed based on the same population.There were minimal significant differences in tooth development between male and female, and between right and left teeth, but there was significant difference between maxillary and mandibular teeth.The Atlas of Dental Development in the Indonesian Population constructed in this study allowed more accurate age estimation of the Indonesian sample than the other methods tested. Supplemental data for this article are available online at https://doi.org/10.1080/20961790.2021.1886648.",2021-04-15 +31557285,scDAPA: detection and visualization of dynamic alternative polyadenylation from single cell RNA-seq data.,"

Motivation

Alternative polyadenylation (APA) plays a key post-transcriptional regulatory role in mRNA stability and functions in eukaryotes. Single cell RNA-seq (scRNA-seq) is a powerful tool to discover cellular heterogeneity at gene expression level. Given 3' enriched strategy in library construction, the most commonly used scRNA-seq protocol-10× Genomics enables us to improve the study resolution of APA to the single cell level. However, currently there is no computational tool available for investigating APA profiles from scRNA-seq data.

Results

Here, we present a package scDAPA for detecting and visualizing dynamic APA from scRNA-seq data. Taking bam/sam files and cell cluster labels as inputs, scDAPA detects APA dynamics using a histogram-based method and the Wilcoxon rank-sum test, and visualizes candidate genes with dynamic APA. Benchmarking results demonstrated that scDAPA can effectively identify genes with dynamic APA among different cell groups from scRNA-seq data.

Availability and implementation

The scDAPA package is implemented in Shell and R, and is freely available at https://scdapa.sourceforge.io.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +,High performance computation of landscape genomic models including local indicators of spatial association,"With the increasing availability of both molecular and topo‐climatic data, the main challenges facing landscape genomics – that is the combination of landscape ecology with population genomics – include processing large numbers of models and distinguishing between selection and demographic processes (e.g. population structure). Several methods address the latter, either by estimating a null model of population history or by simultaneously inferring environmental and demographic effects. Here we present samβada, an approach designed to study signatures of local adaptation, with special emphasis on high performance computing of large‐scale genetic and environmental data sets. samβada identifies candidate loci using genotype–environment associations while also incorporating multivariate analyses to assess the effect of many environmental predictor variables. This enables the inclusion of explanatory variables representing population structure into the models to lower the occurrences of spurious genotype–environment associations. In addition, samβada calculates local indicators of spatial association for candidate loci to provide information on whether similar genotypes tend to cluster in space, which constitutes a useful indication of the possible kinship between individuals. To test the usefulness of this approach, we carried out a simulation study and analysed a data set from Ugandan cattle to detect signatures of local adaptation with samβada, bayenv, lfmm and an FST outlier method (FDIST approach in arlequin) and compare their results. samβada – an open source software for Windows, Linux and Mac OS X available at http://lasig.epfl.ch/sambada – outperforms other approaches and better suits whole‐genome sequence data processing.",2017-09-01 +28961249,SWI/SNF Infobase-An exclusive information portal for SWI/SNF remodeling complex subunits.,"Chromatin remodeling complexes facilitate the access of condensed genomic DNA during transcription, replication, and repair, by altering the histone-DNA contacts in the nucleosome structures. SWI/SNF (SWItch/Sucrose Non-Fermentable) family of ATP dependent chromatin remodeling complexes have been documented for their tumour suppressor function. Recent studies have reported the high frequency of cancer causing mutations in this protein family. There exist multiple subunits for this complex and can form context-dependent sub-complexes. The cataloguing of individual subunits of this complex is essential for understanding their specific functions and their mechanism of action during chromatin remodeling. This would also facilitate further studies to characterize cancer causing mutations in SWI/SNF subunits. In the current study, a database containing information on the subunits of SWI/SNF-α (BRG1/BRM-Associated Factors (BAF)) and SWI/SNF-β (Polybromo-Associated BAF (PBAF)) sub classes of SWI/SNF family has been curated and catalogued. The database hosts information on 27 distinct SWI/SNF subunits from 20 organisms spanning a wide evolutionary range of eukaryotes. A non-redundant set of 522 genes coding for SWI/SNF subunits have been documented in the database. A detailed annotation on each subunit, including basic protein/gene information, protein sequence, functional domains, homologs and missense mutations of human proteins have been provided with a user-friendly graphical interface. The SWI/SNF Infobase presented here, would be a first of its kind exclusive information portal on SWI/SNF complex subunits and would be a valuable resource for the research community working on chromatin remodeling. The database is available at http://scbt.sastra.edu/swisnfdb/index.php.",2017-09-29 +32698664,A meta-analysis of executive functions among survivors of subarachnoid haemorrhage.,"Subarachnoid Haemorrhage (SAH) is a type of stroke which is suggested to result in Executive Functioning (EF) deficits. Within the SAH research, EF is typically assessed as a unitary cognitive construct. Therefore, the nature and extent to which the different components of EF are impacted post SAH remain unclear. In this meta-analysis, 10 studies met selection criteria including 248 SAH participants, treated by endovascular coiling. Participants were assessed by EF measures and compared with 230 controls. Searches were conducted in November 2018 including Medline, PsychINFO, Web of Science, Scopus and CINAHL databases. EF measures were assigned to categories including Cognitive Flexibility, Working Memory, Inhibitory Control and Planning/Problem Solving [Diamond, 2013. Executive functions. Annual Review of Psychology, 64(1), 135-168. https://doi.org/10.1146/annurev-psych-113011-143750]. A statistically significant effect was found for overall EF. Cognitive Flexibility (G = -0.76) and Inhibitory Control (G = -0.51) generated moderate effect sizes, while Working Memory and Planning/Problem Solving found a small effect size (G = -0.45 and G = -0.49, respectively). The I2 statistic suggested small to moderate heterogeneity between studies, hypothesized to relate to different cognitive tools. Underlying components of EF appear to be differentially impacted post SAH, with Cognitive flexibility demonstrating the largest degree of deficit. Recommendations for a standardized and uniform assessment of EF post SAH are outlined.",2020-07-23 +33290637,Identification of determinants of pollen donor fecundity using the hierarchical neighborhood model.,"Individual differences in male reproductive success drive genetic drift and natural selection, altering genetic variation and phenotypic trait distributions in future generations. Therefore, identifying the determinants of reproductive success is important for understanding the ecology and evolution of plants. Here, based on the spatially explicit mating model (the neighborhood model), we develop a hierarchical probability model that links co-dominant genotypes of offspring and candidate parents with phenotypic determinants of male reproductive success. The model accounts for pollen dispersal, genotyping errors as well as individual variation in selfing, pollen immigration, and differentiation of immigrant pollen pools. Unlike the classic neighborhood model approach, our approach is specially designed to account for excessive variation (overdispersion) in male fecundity. We implemented a Bayesian estimation method (the Windows computer program available at: https://www.ukw.edu.pl/pracownicy/plik/igor_chybicki/1806/) that, among others, allows for selecting phenotypic variables important for male fecundity and assessing the fraction of variance in fecundity (R2 ) explained by selected variables. Simulations showed that our method outperforms both the classic neighborhood model and the two-step approach, where fecundities and the effects of phenotypic variables are estimated separately. The analysis of two data examples showed that in wind-pollinated trees, male fecundity depends on both the amount of produced pollen and the ability to pollen spread. However, despite that the tree size was positively correlated with male fecundity, it explained only a fraction of the total variance in fecundity, indicating the presence of additional factors. Finally, case studies highlighted the importance of accounting for pollen dispersal in the estimation of fecundity determinants.",2020-12-28 +32641783,Population-based surveys of antibodies against SARS-CoV-2 in Southern Brazil.,"Population-based data on COVID-19 are urgently needed. We report on three rounds of probability sample household surveys in the state of Rio Grande do Sul (Brazil), carried out in nine large municipalities using the Wondfo lateral flow point-of-care test for immunoglobulin M and G antibodies against SARS-CoV-2 (https://en.wondfo.com.cn/product/wondfo-sars-cov-2-antibody-test-lateral-flow-method-2/). Before survey use, the assay underwent four validation studies with pooled estimates of sensitivity (84.8%; 95% confidence interval (CI) = 81.4-87.8%) and specificity (99.0%; 95% CI = 97.8-99.7%). We calculated that the seroprevalence was 0.048% (2/4,151; 95% CI = 0.006-0.174) on 11-13 April (round 1), 0.135% (6/4,460; 95% CI = 0.049-0.293%) on 25-27 April (round 2) and 0.222% (10/4,500; 95% CI = 0.107-0.408) on 9-11 May (round 3), with a significant upward trend over the course of the surveys. Of 37 family members of positive individuals, 17 (35%) were also positive. The epidemic is at an early stage in the state, and there is high compliance with social distancing, unlike in other parts of Brazil. Periodic survey rounds will continue to monitor trends until at least the end of September, and our population-based data will inform decisions on preventive policies and health system preparedness at the state level.",2020-07-08 +27924032,"The antiSMASH database, a comprehensive database of microbial secondary metabolite biosynthetic gene clusters.","Secondary metabolites produced by microorganisms are the main source of bioactive compounds that are in use as antimicrobial and anticancer drugs, fungicides, herbicides and pesticides. In the last decade, the increasing availability of microbial genomes has established genome mining as a very important method for the identification of their biosynthetic gene clusters (BGCs). One of the most popular tools for this task is antiSMASH. However, so far, antiSMASH is limited to de novo computing results for user-submitted genomes and only partially connects these with BGCs from other organisms. Therefore, we developed the antiSMASH database, a simple but highly useful new resource to browse antiSMASH-annotated BGCs in the currently 3907 bacterial genomes in the database and perform advanced search queries combining multiple search criteria. antiSMASH-DB is available at http://antismash-db.secondarymetabolites.org/.",2016-10-24 +33483306,Locus of Heat Resistance (LHR) in Meat-Borne Escherichia coli: Screening and Genetic Characterization. ,"Microbial resistance to processing treatments poses a food safety concern, as treatment tolerant pathogens can emerge. Occasional foodborne outbreaks caused by pathogenic Escherichia coli have led to human and economic losses. Therefore, this study screened for the extreme heat resistance (XHR) phenotype as well as one known genetic marker, the locus of heat resistance (LHR), in 4,123 E. coli isolates from diverse meat animals at different processing stages. The prevalences of XHR and LHR among the meat-borne E. coli were found to be 10.3% and 11.4%, respectively, with 19% agreement between the two. Finished meat products showed the highest LHR prevalence (24.3%) compared to other processing stages (0 to 0.6%). None of the LHR+E. coli in this study would be considered pathogens based on screening for virulence genes. Four high-quality genomes were generated by whole-genome sequencing of representative LHR+ isolates. Nine horizontally acquired LHRs were identified and characterized, four plasmid-borne and five chromosomal. Nine newly identified LHRs belong to ClpK1 LHR or ClpK2 LHR variants sharing 61 to 68% nucleotide sequence identity, while one LHR appears to be a hybrid. Our observations suggest positive correlation between the number of LHR regions present in isolates and the extent of heat resistance. The isolate exhibiting the highest degree of heat resistance possessed four LHRs belonging to three different variant groups. Maintenance of as many as four LHRs in a single genome emphasizes the benefits of the LHR in bacterial physiology and stress response.IMPORTANCE Currently, a ""multiple-hurdle"" approach based on a combination of different antimicrobial interventions, including heat, is being utilized during meat processing to control the burden of spoilage and pathogenic bacteria. Our recent study (M. Guragain, G. E. Smith, D. A. King, and J. M. Bosilevac, J Food Prot 83:1438-1443, 2020, https://doi.org/10.4315/JFP-20-103) suggests that U.S. beef cattle harbor Escherichia coli that possess the locus of heat resistance (LHR). LHR seemingly contributes to the global stress tolerance in bacteria and hence poses a food safety concern. Therefore, it is important to understand the distribution of the LHRs among meat-borne bacteria identified at different stages of different meat processing systems. Complete genome sequencing and comparative analysis of selected heat-resistant bacteria provide a clearer understanding of stress and heat resistance mechanisms. Further, sequencing data may offer a platform to gain further insights into the genetic background that provides optimal bacterial tolerance against heat and other processing treatments.",2021-03-11 +25922515,The Fossil Calibration Database-A New Resource for Divergence Dating.,"Fossils provide the principal basis for temporal calibrations, which are critical to the accuracy of divergence dating analyses. Translating fossil data into minimum and maximum bounds for calibrations is the most important-often least appreciated-step of divergence dating. Properly justified calibrations require the synthesis of phylogenetic, paleontological, and geological evidence and can be difficult for nonspecialists to formulate. The dynamic nature of the fossil record (e.g., new discoveries, taxonomic revisions, updates of global or local stratigraphy) requires that calibration data be updated continually lest they become obsolete. Here, we announce the Fossil Calibration Database (http://fossilcalibrations.org), a new open-access resource providing vetted fossil calibrations to the scientific community. Calibrations accessioned into this database are based on individual fossil specimens and follow best practices for phylogenetic justification and geochronological constraint. The associated Fossil Calibration Series, a calibration-themed publication series at Palaeontologia Electronica, will serve as a key pipeline for peer-reviewed calibrations to enter the database.",2015-04-27 +27487989,A large set of 26 new reference transcriptomes dedicated to comparative population genomics in crops and wild relatives.,"We produced a unique large data set of reference transcriptomes to obtain new knowledge about the evolution of plant genomes and crop domestication. For this purpose, we validated a RNA-Seq data assembly protocol to perform comparative population genomics. For the validation, we assessed and compared the quality of de novo Illumina short-read assemblies using data from two crops for which an annotated reference genome was available, namely grapevine and sorghum. We used the same protocol for the release of 26 new transcriptomes of crop plants and wild relatives, including still understudied crops such as yam, pearl millet and fonio. The species list has a wide taxonomic representation with the inclusion of 15 monocots and 11 eudicots. All contigs were annotated using BLAST, prot4EST and Blast2GO. A strong originality of the data set is that each crop is associated with close relative species, which will permit whole-genome comparative evolutionary studies between crops and their wild-related species. This large resource will thus serve research communities working on both crops and model organisms. All the data are available at http://arcad-bioinformatics.southgreen.fr/.",2016-08-29 +32347765,Design and Rationale of the Biomarker Center of the Household Air Pollution Intervention Network (HAPIN) Trial.,"

Background

Biomarkers of exposure, susceptibility, and effect are fundamental for understanding environmental exposures, mechanistic pathways of effect, and monitoring early adverse outcomes. To date, no study has comprehensively evaluated a large suite and variety of biomarkers in household air pollution (HAP) studies in concert with exposure and outcome data. The Household Air Pollution Intervention Network (HAPIN) trial is a liquified petroleum gas (LPG) fuel/stove randomized intervention trial enrolling 800 pregnant women in each of four countries (i.e., Peru, Guatemala, Rwanda, and India). Their offspring will be followed from birth through 12 months of age to evaluate the role of pre- and postnatal exposure to HAP from biomass burning cookstoves in the control arm and LPG stoves in the intervention arm on growth and respiratory outcomes. In addition, up to 200 older adult women per site are being recruited in the same households to evaluate indicators of cardiopulmonary, metabolic, and cancer outcomes.

Objectives

Here we describe the rationale and ultimate design of a comprehensive biomarker plan to enable us to explore more fully how exposure is related to disease outcome.

Methods

HAPIN enrollment and data collection began in May 2018 and will continue through August 2021. As a part of data collection, dried blood spot (DBS) and urine samples are being collected three times during pregnancy in pregnant women and older adult women. DBS are collected at birth for the child. DBS and urine samples are being collected from the older adult women and children three times throughout the child's first year of life. Exposure biomarkers that will be longitudinally measured in all participants include urinary hydroxy-polycyclic aromatic hydrocarbons, volatile organic chemical metabolites, metals/metalloids, levoglucosan, and cotinine. Biomarkers of effect, including inflammation, endothelial and oxidative stress biomarkers, lung cancer markers, and other clinically relevant measures will be analyzed in urine, DBS, or blood products from the older adult women. Similarly, genomic/epigenetic markers, microbiome, and metabolomics will be measured in older adult women samples.

Discussion

Our study design will yield a wealth of biomarker data to evaluate, in great detail, the link between exposures and health outcomes. In addition, our design is comprehensive and innovative by including cutting-edge measures such as metabolomics and epigenetics. https://doi.org/10.1289/EHP5751.",2020-04-29 +32697009,Good practices in health promotion for older people - Significance for evidence in health policy.,"This article is devoted to convincing policy makers to use good practices in encouraging older people to pursue adequate and effective health policies. Long-term scientific research focused on the effects of health promotion programmes is rarely undertaken, although its scope is still expanding. At the same time, it is strongly desirable to form health policy based on scientific evidence. In this situation, an indication of good practices characterised by precisely defined features and their systematic evaluation could be an alternative to an insufficient number of empirical studies. The first step of the methodology was a literature review on health promotion for older people, aimed at defining good practices and criteria used for their selection. The authors searched the following databases: PubMED, Embase and Cochrane Library, as well as international databases dedicated to health promotion programmes for older people (e.g. Age-friendly World (https://extranet.who.int/agefriendlyworld/age-friendly-practice-database-launched); HealthProElderly (www.healthproelderly.com/database/index.php?id=16); JA-CHRODIS (www.chrodis.eu); EuroHealthNet (www.eurohealthnet.eu) and ProFouND; (www.profound.eu.com). As relevant health policy information is usually available in national languages, the authors then approached national experts in 10 European countries, who filled in a dedicated survey on health promotion programmes for older people and indicated examples of good practices from their countries. Practical evidence, based on real implemented programmes, is valuable as inspiration for health promotion programmes, their planning and management. Selecting good practices from among implemented and evaluated actions makes it possible to establish their value. The significance of good practices in health promotion is to deliver real benefits and health effects for a target group, which, in the case of evident benefits, renders the practices credible and worthy of further dissemination. The EU already successfully shares good practices in migrant health and environmental protection. Creating databases on good practices helps policy makers promote the sustainability of already implemented activities and enhances their applicability by other organisations and in different settings.",2020-07-22 +30380112,Translocatome: a novel resource for the analysis of protein translocation between cellular organelles.,"Here we present Translocatome, the first dedicated database of human translocating proteins (URL: http://translocatome.linkgroup.hu). The core of the Translocatome database is the manually curated data set of 213 human translocating proteins listing the source of their experimental validation, several details of their translocation mechanism, their local compartmentalized interactome, as well as their involvement in signalling pathways and disease development. In addition, using the well-established and widely used gradient boosting machine learning tool, XGBoost, Translocatome provides translocation probability values for 13 066 human proteins identifying 1133 and 3268 high- and low-confidence translocating proteins, respectively. The database has user-friendly search options with a UniProt autocomplete quick search and advanced search for proteins filtered by their localization, UniProt identifiers, translocation likelihood or data complexity. Download options of search results, manually curated and predicted translocating protein sets are available on its website. The update of the database is helped by its manual curation framework and connection to the previously published ComPPI compartmentalized protein-protein interaction database (http://comppi.linkgroup.hu). As shown by the application examples of merlin (NF2) and tumor protein 63 (TP63) Translocatome allows a better comprehension of protein translocation as a systems biology phenomenon and can be used as a discovery-tool in the protein translocation field.",2019-01-01 +26220709,Facilitating collaboration in rare genetic disorders through effective matchmaking in DECIPHER.,"DECIPHER (https://decipher.sanger.ac.uk) is a web-based platform for secure deposition, analysis, and sharing of plausibly pathogenic genomic variants from well-phenotyped patients suffering from genetic disorders. DECIPHER aids clinical interpretation of these rare sequence and copy-number variants by providing tools for variant analysis and identification of other patients exhibiting similar genotype-phenotype characteristics. DECIPHER also provides mechanisms to encourage collaboration among a global community of clinical centers and researchers, as well as exchange of information between clinicians and researchers within a consortium, to accelerate discovery and diagnosis. DECIPHER has contributed to matchmaking efforts by enabling the global clinical genetics community to identify many previously undiagnosed syndromes and new disease genes, and has facilitated the publication of over 700 peer-reviewed scientific publications since 2004. At the time of writing, DECIPHER contains anonymized data from ∼250 registered centers on more than 51,500 patients (∼18000 patients with consent for data sharing and ∼25000 anonymized records shared privately). In this paper, we describe salient features of the platform, with special emphasis on the tools and processes that aid interpretation, sharing, and effective matchmaking with other data held in the database and that make DECIPHER an invaluable clinical and research resource.",2015-08-20 +31211398,SpinachBase: a central portal for spinach genomics. ,"Spinach (Spinacia oleracea L.) is a nutritious vegetable enriched with many essential minerals and vitamins. A reference spinach genome has been recently released, and additional spinach genomic resources are being rapidly developed. Therefore, there is an urgent need of a central database to store, query, analyze and integrate various resources of spinach genomic data. To this end, we developed SpinachBase (http://spinachbase.org), which provides centralized public accesses to genomic data as well as analytical tools to assist research and breeding in spinach. The database currently stores the spinach reference genome sequence, and sequences and comprehensive functional annotations of protein-coding genes predicted from the genome. The database also contains gene expression profiles derived from RNA-Seq experiments as well as highly co-expressed genes and genetic variants called from transcriptome sequences of 120 cultivated and wild Spinacia accessions. Biochemical pathways have been predicted from spinach protein-coding genes and are available through a pathway database (SpinachCyc) within SpinachBase. SpinachBase provides a suite of analysis and visualization tools including a genome browser, sequence similarity searches with BLAST, functional enrichment and functional classification analyses and functions to query and retrieve gene sequences and annotations.",2019-01-01 +31263896,CytoGPS: a web-enabled karyotype analysis tool for cytogenetics.,"

Summary

Karyotype data are the most common form of genetic data that is regularly used clinically. They are collected as part of the standard of care in many diseases, particularly in pediatric and cancer medicine contexts. Karyotypes are represented in a unique text-based format, with a syntax defined by the International System for human Cytogenetic Nomenclature (ISCN). While human-readable, ISCN is not intrinsically machine-readable. This limitation has prevented the full use of complex karyotype data in discovery science use cases. To enhance the utility and value of karyotype data, we developed a tool named CytoGPS. CytoGPS first parses ISCN karyotypes into a machine-readable format. It then converts the ISCN karyotype into a binary Loss-Gain-Fusion (LGF) model, which represents all cytogenetic abnormalities as combinations of loss, gain, or fusion events, in a format that is analyzable using modern computational methods. Such data is then made available for comprehensive 'downstream' analyses that previously were not feasible.

Availability and implementation

Freely available at http://cytogps.org.",2019-12-01 +33596105,Air Pollution and Polyclonal Elevation of Serum Free Light Chains: An Assessment of Adaptive Immune Responses in the Prospective Heinz Nixdorf Recall Study.,"

Background

Residential exposure to air pollution (AP) has been shown to activate the immune system (IS). Although innate immune responses to AP have been studied extensively, investigations on the adaptive IS are scarce.

Objectives

The aim of this study was to investigate the association between short- to long-term AP exposure and polyclonal free light chains (FLC) produced by plasma cells.

Methods

We used repeated data from three examinations (t0: 2000-2003; t1: 2006-2008; and t2: 2011-2015) of the population-based German Heinz Nixdorf Recall cohort of initially 4,814 participants (45-75 y old). Residential exposure to total and source-specific particulate matter (PM) with an aerodynamic diameter of 10 or 2.5μm (PM10 and PM2.5 respectively), nitrogen dioxide (NO2), and particle number concentrations (accumulation mode; PNAM) was estimated using a chemistry transport model with different time windows (1- to 365-d mean ± standard deviation) before blood draw. We applied linear mixed models with a random participant intercept to estimate associations between total, traffic- and industry-related AP exposures and log-transformed FLC, controlling for examination time, sociodemographic and lifestyle variables, estimated glomerular filtration rate and season.

Results

Analyzing 9,933 observations from 4,455 participants, we observed generally positive associations between AP exposures and FLC. We observed strongest associations with middle-term exposures, e.g., 3.0% increase in FLC (95% confidence interval: 1.8%, 4.3%) per interquartile range increase in 91-d mean of NO2 (14.1μg/m³). Across the different pollutants, NO2 showed strongest associations with FLC, followed by PM10 and PNAM. Effect estimates for traffic-related exposures were mostly higher compared with total exposures. Although NO2 and PNAM estimates remained stable upon adjustment for PM, PM estimates decreased considerably upon adjustment for NO2 and PNAM.

Discussion

Our results suggest that middle-term AP exposures in particular might be positively associated with activation of the adaptive IS. Traffic-related PM, PNAM, and NO2 showed strongest associations. https://doi.org/10.1289/EHP7164.",2021-02-17 +30321383,Cucurbit Genomics Database (CuGenDB): a central portal for comparative and functional genomics of cucurbit crops.,"The Cucurbitaceae family (cucurbit) includes several economically important crops, such as melon, cucumber, watermelon, pumpkin, squash and gourds. During the past several years, genomic and genetic data have been rapidly accumulated for cucurbits. To store, mine, analyze, integrate and disseminate these large-scale datasets and to provide a central portal for the cucurbit research and breeding community, we have developed the Cucurbit Genomics Database (CuGenDB; http://cucurbitgenomics.org) using the Tripal toolkit. The database currently contains all available genome and expressed sequence tag (EST) sequences, genetic maps, and transcriptome profiles for cucurbit species, as well as sequence annotations, biochemical pathways and comparative genomic analysis results such as synteny blocks and homologous gene pairs between different cucurbit species. A set of analysis and visualization tools and user-friendly query interfaces have been implemented in the database to facilitate the usage of these large-scale data by the community. In particular, two new tools have been developed in the database, a 'SyntenyViewer' to view genome synteny between different cucurbit species and an 'RNA-Seq' module to analyze and visualize gene expression profiles. Both tools have been packed as Tripal extension modules that can be adopted in other genomics databases developed using the Tripal system.",2019-01-01 +32820542,"What Are the Health Risks of Eating Red Meat, and How Should We Assess Them?","This paper discusses possible mechanisms that might lead to misinterpretations of collected data and makes new evidence-based medicine (EBM) recommendations to oppose the previously accepted preventive measures, or treatment options. It is focused on the danger of the ""red meat"" consumption, and the question whether eating pungent food is good or bad for our health and finally whether the ""bad luck"" concept of getting several cancer types is valid or not. These three topics got and still have significant media attention. Several mechanisms are proposed as possible causes of these apparent conflicts. Some of them have already been recognized but sadly remained less known to medical readers and also to the general population. Also see the video abstract here https://youtu.be/owjoRXrNShA.",2020-08-20 +33132094,Mid-term gender-specific differences in periprosthetic bone remodelling after implantation of a curved bone-preserving hip stem.,"

Background

The implant-specific periprosthetic bone remodelling in the proximal femur is considered to be an important factor influencing the long-term survival of cementless hip stems. Particularly data of gender-specific differences regarding bone-preserving stems are very rare in literature and mainly limited to short-term investigations. Therefore, we investigated at mid-term one arm of a prospective randomised study to evaluate if there is an influence of gender on implant-specific stress shielding after implantation of a curved bone preserving hip stem (Fitmore) 5 years postoperatively.

Hypothesis

We hypothesised there will be no gender-specific differences in periprosthetic bone remodelling.

Patients and methods

A total of 20 female and 37 male patients underwent total hip arthroplasty using the Fitmore stem. Clinical, radiological as well as osteodensitometric examinations were performed preoperatively, 7 days and 3, 12 and 60 months postoperatively. Clinical data collection included the Western Ontario and McMaster Universities Arthritis Index (WOMAC) and the Harris Hip Score (HHS). Periprosthetic bone mineral density (BMD) was measured using Dual Energy X-ray Absorptiometry (DXA) and the periprosthetic bone was divided into 7 regions of interest (ROI) for analysis. The results at 3, 12 and 60 months were compared with the first postoperative measurement after 7 days to obtain a percentage change.

Results

Periprosthetic BMD showed a decrease in all 7 ROIs for both groups 5 years postoperatively referred to the baseline value, except ROI 3 (0.8%, p=0.761), representing the distal lateral part of the stem, and ROI 5 (0.3%, p=0.688), representing the distal medial part of the stem in the male cohort. Significant gender differences were found in ROI 1 (-16.0% vs. -3.5%, p=0.016) and ROI 6 (-9.9% vs. -2.1%, p=0.04) in favour of the male patients. Clinical results showed no significant gender differences 5 years postoperatively with regard to WOMAC (mean 0.4 (±0.8, 0-3.3) in women vs. 0.3 (±0.8, 0-4.2) in men, p=0.76) and HHS (mean 93.0 (±9.7, 66.0-100.0) in women vs. 93.9 (±11.5, 53.0-100.0) in men, p=0.36).

Conclusion

Proximal stress shielding was observed independent of gender 5 years postoperatively. However, there was a significantly lower bone loss proximal lateral and medial below the calcar in male patients, indicating a more physiological load transfer. [ClinicalTrials.gov identifier: NCT03147131 (Study ID D.3067-244/10). Registered 10 May 2017 - retrospectively registered, https://clinicaltrials.gov/ct2/show/NCT03147131?term=Bieger&draw=2&rank=1] LEVEL OF EVIDENCE: IV; prospective study without control group.",2020-10-31 +33547491,Radiofrequency ablation for Barrett's oesophagus related neoplasia with the 360 Express catheter: initial experience from the United Kingdom and Ireland-preliminary results.,"

Background

Radio-frequency ablation (RFA) for Barrett's oesophagus (BE)-related neoplasia is currently used after endoscopic resection of visible neoplasia. The HALO 360 balloon has been used to ablate long segment BE. The Barrx™ 360 Express RFA self-sizing catheter ('RFA Express') may potentially allow quicker ablation times and improved treatment outcomes. The aim of this paper is to present real world data on the use of the 360 Express Device.

Methods

Centres in the UK and Ireland submitted cases where the RFA Express was used. The primary outcome was regression of BE at 3 months. Secondary outcomes were the rate of symptomatic stricture formation and resolution of intestinal metaplasia (CR-IM) and dysplasia (CR-D) at End of Treatment (EoT).

Results

11 centres submitted 123 consecutive patients. 112 had a follow up endoscopy. The median age was 67 years (IQR 62-75). 3 dosimetries were used. The mean reduction in Circumferential (C) length was 78% ± 36 and mean reduction in Maximal length (M) was 55% ± 36. 17 patients (15%) developed strictures requiring dilation. There was a higher rate of stricture formation when the 12 J energy was used (p < 0.05). 47 patients had EoT biopsies, 40 (85%) had CR-D and 34(76%) had CR-IM.

Conclusions

The RFA 360 Express catheter shows reduction in length of baseline BE at 3 months after index treatment, and eradication of intestinal metaplasia and dysplasia at 12 months similar to other studies with earlier devices. It appears that the symptomatic stricture rate is slightly higher than previous series with the HALO 360 catheter. This study was performed as part of the HALO registry and has been approved by the Research Ethics Committee - MREC Number 08/H0714/27 Local project reference 08/0104 Project ID 15,033 IRAS Number 54678 EudraCT 2009-015980-1. Registered on ISRCTN as below: ISRCTN93069556. https://doi.org/10.1186/ISRCTN93069556.",2021-02-05 +26310816,Deep proteomic profiling of vasopressin-sensitive collecting duct cells. I. Virtual Western blots and molecular weight distributions.,"The mouse mpkCCD cell line is a continuous cultured epithelial cell line with characteristics of renal collecting duct principal cells. This line is widely used to study epithelial transport and its regulation. To provide a data resource useful for experimental design and interpretation in studies using mpkCCD cells, we have carried out ""deep"" proteomic profiling of these cells using three levels of fractionation (differential centrifugation, SDS-PAGE, and HPLC) followed by tandem mass spectrometry to identify and quantify proteins. The analysis of all resulting samples generated 34.6 gigabytes of spectral data. As a result, we identified 6,766 proteins in mpkCCD cells at a high level of stringency. These proteins are expressed over eight orders of magnitude of protein abundance. The data are provided to users as a public data base (https://helixweb.nih.gov/ESBL/Database/mpkFractions/). The mass spectrometry data were mapped back to their gel slices to generate ""virtual Western blots"" for each protein. For most of the 6,766 proteins, the apparent molecular weight from SDS-PAGE agreed closely with the calculated molecular weight. However, a substantial fraction (>15%) of proteins was found to run aberrantly, with much higher or much lower mobilities than predicted. These proteins were analyzed to identify mechanisms responsible for altered mobility on SDS-PAGE, including high or low isoelectric point, high or low hydrophobicity, physiological cleavage, residence in the lysosome, posttranslational modifications, and expression of alternative isoforms due to alternative exon usage. Additionally, this analysis identified a previously unrecognized isoform of aquaporin-2 with apparent molecular mass <20 kDa.",2015-08-26 +32831287,ChemEnv: a fast and robust coordination environment identification tool.,"Coordination or local environments have been used to describe, analyze and understand crystal structures for more than a century. Here, a new tool called ChemEnv, which can identify coordination environments in a fast and robust manner, is presented. In contrast to previous tools, the assessment of the coordination environments is not biased by small distortions of the crystal structure. Its robust and fast implementation enables the analysis of large databases of structures. The code is available open source within the pymatgen package and the software can also be used through a web app available on http://crystaltoolkit.org through the Materials Project.",2020-07-21 +30783007,"An Integrative Database of β-Lactamase Enzymes: Sequences, Structures, Functions, and Phylogenetic Trees. ","β-Lactamase enzymes have attracted substential medical attention from researchers and clinicians because of their clinical, ecological, and evolutionary interest. Here, we present a comprehensive online database of β-lactamase enzymes. The current database is manually curated and incorporates the primary amino acid sequences, closest structural information in an external structure database (the Protein Data Bank [PDB]) and the functional profiles and phylogenetic trees of the four molecular classes (A, B, C, and D) of β-lactamases. The functional profiles are presented according to the MICs and kinetic parameters that make them more useful for the investigators. Here, a total of 1,147 β-lactam resistance genes are analyzed and described in the database. The database is implemented in MySQL and the related website is developed with Zend Framework 2 on an Apache server, supporting all major web browsers. Users can easily retrieve and visualize biologically important information using a set of efficient queries from a graphical interface. This database is freely accessible at http://ifr48.timone.univ-mrs.fr/beta-lactamase/public/.",2019-04-25 +29218900,Single subject transcriptome analysis to identify functionally signed gene set or pathway activity.,"Analysis of single-subject transcriptome response data is an unmet need of precision medicine, made challenging by the high dimension, dynamic nature and difficulty in extracting meaningful signals from biological or stochastic noise. We have proposed a method for single subject analysis that uses a mixture model for transcript fold-change clustering from isogenically paired samples, followed by integration of these distributions with Gene Ontology Biological Processes (GO-BP) to reduce dimension and identify functional attributes. We then extended these methods to develop functional signing metrics for gene set process regulation by incorporating biological repressor relationships encoded in GO-BP as negatively_regulates edges. Results revealed reproducible and biologically meaningful signals from analysis of a single subject's response, opening the door to future transcriptomic studies where subject and resource availability are currently limiting. We used inbred mouse strains fed different diets to provide isogenic biological replicates, permitting rigorous validation of our method. We compared significant genotype-specific GO-BP term results for overlap and rank order across three replicate pairs per genotype, and cross-methods to reference standards (limma+FET, SAM+FET, and GSEA). All single-subject analytics findings were robust and highly reproducible (median area under the ROC curve=0.96, n=24 genotypes × 3 replicates), providing confidence and validation of this approach for analyses in single subjects. R code is available online at http://www.lussiergroup.org/publications/PathwayActivity.",2018-01-01 +30601935,AnnoFly: annotating Drosophila embryonic images based on an attention-enhanced RNN model.,"

Motivation

In the post-genomic era, image-based transcriptomics have received huge attention, because the visualization of gene expression distribution is able to reveal spatial and temporal expression pattern, which is significantly important for understanding biological mechanisms. The Berkeley Drosophila Genome Project has collected a large-scale spatial gene expression database for studying Drosophila embryogenesis. Given the expression images, how to annotate them for the study of Drosophila embryonic development is the next urgent task. In order to speed up the labor-intensive labeling work, automatic tools are highly desired. However, conventional image annotation tools are not applicable here, because the labeling is at the gene-level rather than the image-level, where each gene is represented by a bag of multiple related images, showing a multi-instance phenomenon, and the image quality varies by image orientations and experiment batches. Moreover, different local regions of an image correspond to different CV annotation terms, i.e. an image has multiple labels. Designing an accurate annotation tool in such a multi-instance multi-label scenario is a very challenging task.

Results

To address these challenges, we develop a new annotator for the fruit fly embryonic images, called AnnoFly. Driven by an attention-enhanced RNN model, it can weight images of different qualities, so as to focus on the most informative image patterns. We assess the new model on three standard datasets. The experimental results reveal that the attention-based model provides a transparent approach for identifying the important images for labeling, and it substantially enhances the accuracy compared with the existing annotation methods, including both single-instance and multi-instance learning methods.

Availability and implementation

http://www.csbio.sjtu.edu.cn/bioinf/annofly/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +30166985,The CAIRR Pipeline for Submitting Standards-Compliant B and T Cell Receptor Repertoire Sequencing Studies to the National Center for Biotechnology Information Repositories.,"The adaptation of high-throughput sequencing to the B cell receptor and T cell receptor has made it possible to characterize the adaptive immune receptor repertoire (AIRR) at unprecedented depth. These AIRR sequencing (AIRR-seq) studies offer tremendous potential to increase the understanding of adaptive immune responses in vaccinology, infectious disease, autoimmunity, and cancer. The increasingly wide application of AIRR-seq is leading to a critical mass of studies being deposited in the public domain, offering the possibility of novel scientific insights through secondary analyses and meta-analyses. However, effective sharing of these large-scale data remains a challenge. The AIRR community has proposed minimal information about adaptive immune receptor repertoire (MiAIRR), a standard for reporting AIRR-seq studies. The MiAIRR standard has been operationalized using the National Center for Biotechnology Information (NCBI) repositories. Submissions of AIRR-seq data to the NCBI repositories typically use a combination of web-based and flat-file templates and include only a minimal amount of terminology validation. As a result, AIRR-seq studies at the NCBI are often described using inconsistent terminologies, limiting scientists' ability to access, find, interoperate, and reuse the data sets. In order to improve metadata quality and ease submission of AIRR-seq studies to the NCBI, we have leveraged the software framework developed by the Center for Expanded Data Annotation and Retrieval (CEDAR), which develops technologies involving the use of data standards and ontologies to improve metadata quality. The resulting CEDAR-AIRR (CAIRR) pipeline enables data submitters to: (i) create web-based templates whose entries are controlled by ontology terms, (ii) generate and validate metadata, and (iii) submit the ontology-linked metadata and sequence files (FASTQ) to the NCBI BioProject, BioSample, and Sequence Read Archive databases. Overall, CAIRR provides a web-based metadata submission interface that supports compliance with the MiAIRR standard. This pipeline is available at http://cairr.miairr.org, and will facilitate the NCBI submission process and improve the metadata quality of AIRR-seq studies.",2018-08-16 +31871973,Data and method for assessing the sustainability of electricity generation sectors in the south Asia growth quadrangle.,"The research article ""Khan I, Sustainability challenges for the south Asia growth quadrangle: A regional electricity generation sustainability assessment, Journal of Cleaner Production. 243 (2020), 118639, 1-13. DOI: https://doi.org/10.1016/j.jclepro.2019.118639"" [1] is linked to this data article. The electricity generation related data were collected from the electricity authorities of Bangladesh, Bhutan, India, and Nepal annual reports, which were publicly available through their websites. Two methods of sustainability assessment, the 'global' and 'multi-criteria decision analysis (MCDA)' were employed. These two methods were adopted from recent literature. Related data were thus also collected from previous studies in the literature. These two models were explicitly used through a step-by-step calculation using the collected data. These data and methods will allow the researchers to replicate the methods readily. The use of this data and method will also enhance applying a similar approach to other related datasets. Overall, this dataset and method of calculation allow the researcher or analyst to avoid a number of issues: (i) it eliminates considering a large volume of electricity generation data from a myriad of sources for the four countries; (ii) this dataset is ready to be used for any further related sustainability assessment, thus reducing the steps by breaking large datasets down in a way that makes the analysis much easier, and (iii) the calculation steps are ready to be used for any other similar dataset.",2019-11-16 +32691361,Genome Sequencing of Leishmania infantum Causing Cutaneous Leishmaniosis from a Turkish Isolate with Next-Generation Sequencing Technology.,"

Purpose

Leishmania subgenus Leishmania causes leishmaniosis, which is a chronic systemic disease in humans and animals, in which the skin and visceral organs can be affected. The disease generally consists of three different clinical types in humans: visceral (kala-azar, VL), cutaneous (CL) and mucocutaneous leishmaniosis (MCL). According to the World Health Organization (WHO), leishmaniosis is still one of the world's most neglected diseases. It has been nearly 13-14 years since the completion of the first complete genome sequence of a Leishmania parasite. However, much information about these parasites remains to be elucidated, such as the causes of differences in tissue tropism. The aim of this study is to perform the whole-genome sequencing of Leishmania infantum causing cutaneous leishmaniosis from a Turkish isolate with next-generation sequencing technology.

Methods

Genomic sequencing was performed on the Illumina HiSeq 2500 platform. The TruSeq Nano DNA Low Throughput Library Prep Kit, compatible with the Illumina HiSeq 2500 platform, was used to generate the library. Synthesis sequencing (SBS) was performed with a HiSeq Rapid SBS Kit v2 to generate single-fragment reads (2 × 150 bp; PE) with two fragment end-to-end assemblies. Bioinformatics analyses were performed on the Geneious 11.0.5. ( www.genius.com ) platform.

Results

In our study, a high-quality whole-genome sequence (WGS) of L. infantum was successfully generated, and a total of 32,009,137 base pairs of genomic DNA from 36 chromosomes were obtained. The resulting genomic DNA sequence was submitted to the US National Center for Biotechnology Information (NCBI) GenBank ( www.ncbi.nlm.nih.gov ) database and registered under the name Leishmania infantum_TR01 (Lin_TR01). The following accession numbers were assigned by NCBI to the 36 chromosomes of the Lin_TR01 genome: CP027807, CP027810, CP027808, CP027811, CP027809, CP027812, CP027813, CP027814, CP027817, CP027818, CP027819, CP027815, CP027821, CP027816, CP027823, CP027820, CP027822, CP027824, CP027825, CP027826, CP027827, CP027828, CP027829, CP027830, CP027831, CP027832, CP027833, CP027834, CP027835, CP027836, CP027837, CP027838, CP027839, CP027840, CP027841, CP027842. As a result of the annotation of the Lin_TR01 genome, 3153 polymorphisms, 8324 genes, 8199 CDSs, 8109 mRNAs, 67 tRNAs, 11 rRNAs and 58 ncRNA were identified. Among the 8199 CDS obtained, 5278 encode hypothetical proteins.

Conclusion

In this study, a high-quality WGS of Leishmania infantum was successfully obtained for the first time in Turkey. According to a review of WGS studies on this subject, the Lin_TR01 strain is the first strain to be isolated from cutaneous leishmaniosis. The reference genome of L. infantum JPCM5 (Peacock et al., 2007) was obtained from a visceral leishmaniosis case, in accordance with the classical tissue and organ tropism of the species. Lin_TR01 is the second whole-genome-sequenced strain in the world after the JPCM5 strain. The Lin_TR01 genome is the only L. infantum whole-genome sequence that is completed assembly level from 36 chromosomes among the genomes obtained thus far ( https://www.ncbi.nlm.nih.gov/genome/genomes/249 ).",2020-07-20 +32631905,Aquaporin-7 Regulates the Response to Cellular Stress in Breast Cancer.,"The complex yet interrelated connections between cancer metabolism, gene expression, and oncogenic driver genes have the potential to identify novel biomarkers and drug targets with prognostic and therapeutic value. Here we effectively integrated metabolomics and gene expression data from breast cancer mouse models through a novel unbiased correlation-based network analysis. This approach identified 35 metabolite and 34 gene hubs with the most network correlations. These hubs have prognostic value and are likely integral to tumor metabolism and breast cancer. The gene hub Aquaporin-7 (Aqp7), a water and glycerol channel, was identified as a novel regulator of breast cancer. AQP7 was prognostic of overall survival in patients with breast cancer. In mouse breast cancer models, reduced expression of Aqp7 caused reduced primary tumor burden and lung metastasis. Metabolomics and complex lipid profiling of cells and tumors with reduced Aqp7 revealed significantly altered lipid metabolism, glutathione metabolism, and urea/arginine metabolism compared with controls. These data identify AQP7 as a critical regulator of metabolic and signaling responses to environmental cellular stresses in breast cancer, highlighting AQP7 as a potential cancer-specific therapeutic vulnerability. SIGNIFICANCE: Aquaporin-7 is identified as a critical regulator of nutrient availability and signaling that responds to cellular stresses, making it an attractive therapeutic target in breast cancer. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/19/4071/F1.large.jpg.",2020-07-06 +32692772,Prediction of five-year mortality after COPD diagnosis using primary care records.,"Accurate prognosis information after a diagnosis of chronic obstructive pulmonary disease (COPD) would facilitate earlier and better informed decisions about the use of prevention strategies and advanced care plans. We therefore aimed to develop and validate an accurate prognosis model for incident COPD cases using only information present in general practitioner (GP) records at the point of diagnosis. Incident COPD patients between 2004-2012 over the age of 35 were studied using records from 396 general practices in England. We developed a model to predict all-cause five-year mortality at the point of COPD diagnosis, using 47,964 English patients. Our model uses age, gender, smoking status, body mass index, forced expiratory volume in 1-second (FEV1) % predicted and 16 co-morbidities (the same number as the Charlson Co-morbidity Index). The performance of our chosen model was validated in all countries of the UK (N = 48,304). Our model performed well, and performed consistently in validation data. The validation area under the curves in each country varied between 0.783-0.809 and the calibration slopes between 0.911-1.04. Our model performed better in this context than models based on the Charlson Co-morbidity Index or Cambridge Multimorbidity Score. We have developed and validated a model that outperforms general multimorbidity scores at predicting five-year mortality after COPD diagnosis. Our model includes only data routinely collected before COPD diagnosis, allowing it to be readily translated into clinical practice, and has been made available through an online risk calculator (https://skiddle.shinyapps.io/incidentcopdsurvival/).",2020-07-21 +28968739,SNPDelScore: combining multiple methods to score deleterious effects of noncoding mutations in the human genome.,"

Summary

Addressing deleterious effects of noncoding mutations is an essential step towards the identification of disease-causal mutations of gene regulatory elements. Several methods for quantifying the deleteriousness of noncoding mutations using artificial intelligence, deep learning and other approaches have been recently proposed. Although the majority of the proposed methods have demonstrated excellent accuracy on different test sets, there is rarely a consensus. In addition, advanced statistical and artificial learning approaches used by these methods make it difficult porting these methods outside of the labs that have developed them. To address these challenges and to transform the methodological advances in predicting deleterious noncoding mutations into a practical resource available for the broader functional genomics and population genetics communities, we developed SNPDelScore, which uses a panel of proposed methods for quantifying deleterious effects of noncoding mutations to precompute and compare the deleteriousness scores of all common SNPs in the human genome in 44 cell lines. The panel of deleteriousness scores of a SNP computed using different methods is supplemented by functional information from the GWAS Catalog, libraries of transcription factor-binding sites, and genic characteristics of mutations. SNPDelScore comes with a genome browser capable of displaying and comparing large sets of SNPs in a genomic locus and rapidly identifying consensus SNPs with the highest deleteriousness scores making those prime candidates for phenotype-causal polymorphisms.

Availability and implementation

https://www.ncbi.nlm.nih.gov/research/snpdelscore/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +32082410,GrpClassifierEC: a novel classification approach based on the ensemble clustering space.,"

Background

Advances in molecular biology have resulted in big and complicated data sets, therefore a clustering approach that able to capture the actual structure and the hidden patterns of the data is required. Moreover, the geometric space may not reflects the actual similarity between the different objects. As a result, in this research we use clustering-based space that convert the geometric space of the molecular to a categorical space based on clustering results. Then we use this space for developing a new classification algorithm.

Results

In this study, we propose a new classification method named GrpClassifierEC that replaces the given data space with categorical space based on ensemble clustering (EC). The EC space is defined by tracking the membership of the points over multiple runs of clustering algorithms. Different points that were included in the same clusters will be represented as a single point. Our algorithm classifies all these points as a single class. The similarity between two objects is defined as the number of times that these objects were not belong to the same cluster. In order to evaluate our suggested method, we compare its results to the k nearest neighbors, Decision tree and Random forest classification algorithms on several benchmark datasets. The results confirm that the suggested new algorithm GrpClassifierEC outperforms the other algorithms.

Conclusions

Our algorithm can be integrated with many other algorithms. In this research, we use only the k-means clustering algorithm with different k values. In future research, we propose several directions: (1) checking the effect of the clustering algorithm to build an ensemble clustering space. (2) Finding poor clustering results based on the training data, (3) reducing the volume of the data by combining similar points based on the EC.

Availability and implementation

The KNIME workflow, implementing GrpClassifierEC, is available at https://malikyousef.com.",2020-02-13 +32947252,Political partisanship and mobility restriction during the COVID-19 pandemic.,"

Objectives

Non-pharmaceutical interventions (NPIs) are effective in curbing the spread of severe acute respiratory syndrome coronavirus 2. All US states have adopted NPI policies, but the compliance to these measures and influence of sociopolitical factors on NPI adherence is unknown. NPI adherence may be approximated by personal mobility in a population that is tracked by anonymous mobile phone data.

Study design

This is a cross-sectional study of state-level mobility changes across the US.

Methods

State-level mobility was based on anonymous mobile phone data from multiple participating carriers collected by the University of Washington's Institute for Health Metrics and Evaluation (http://www.healthdata.org). Pearson's correlation coefficient was used to examine the strength and direction of the relationship between political affiliations and mobility restriction across states. Multivariable linear regression analyses were used to assess other factors that may impact personal travel.

Results

All states experienced a decline in personal mobility but had varying nadirs ranging from a 34% to a 69% reduction in mobility, which was not temporally related to the timing of state-level NPI measures. There was a statistically significant linear and negative correlation (r = -0.79) between the proportion of Republicans/leaning Republicans and NPI adherence across US states. The negative association between Republicans and NPI adherence was significant even when adjusting for urbanization, proportion of essential workers, population, Gini index, and poverty rates.

Conclusions

Political orientation affects risk perception, which may contribute to the unwillingness of some individuals to perceive the coronavirus disease 2019 pandemic as a risk and to comply with NPIs. Our results highlight the importance of sociopolitical factors in disease control and emphasize the importance of bipartisan efforts in fighting the pandemic. These results may have implications for the development, dissemination, and communication of public health policies.",2020-08-19 +33405254,"Determination of acidity constants, ionic mobilities, and hydrodynamic radii of carborane-based inhibitors of carbonic anhydrases by capillary electrophoresis.","Capillary electrophoresis (CE) has been applied for determination of the thermodynamic acidity constants (pKa ) of the sulfamidoalkyl and sulfonamidoalkyl groups, the actual and limiting ionic mobilities and hydrodynamic radii of important compounds, eight carborane-based inhibitors of carbonic anhydrases, which are potential new anticancer drugs. Two types of carboranes were investigated, (i) icosahedral cobalt bis(dicarbollide)(1-) ion with sulfamidoalkyl moieties, and (ii) 7,8-nido-dicarbaundecaborate with sulfonamidoalkyl side chains. First, the mixed acidity constants, pKa mix , of the sulfamidoalkyl and sulfonamidoalkyl groups of the above carboranes and their actual ionic mobilities were determined by nonlinear regression analysis of the pH dependences of their effective electrophoretic mobility measured by capillary electrophoresis in the pH range 8.00-12.25, at constant ionic strength (25 mM), and constant temperature (25°C). Second, the pKa mix were recalculated to the thermodynamic pKa s using the Debye-Hückel theory. The sulfamidoalkyl and sulfonamidoalkyl groups were found to be very weakly acidic with the pKa s in the range 10.78-11.45 depending on the type of carborane cluster and on the position and length of the alkyl chain on the carborane scaffold. These pKa s were in a good agreement with the pKa s (10.67-11.27) obtained by new program AnglerFish (freeware at https://echmet.natur.cuni.cz), which provides thermodynamic pKa s and limiting ionic mobilities directly from the raw CE data. The absolute values of the limiting ionic mobilities of univalent and divalent carborane anions were in the range 18.3-27.8 TU (Tiselius unit, 1 × 10-9 m2 /Vs), and 36.4-45.9 TU, respectively. The Stokes hydrodynamic radii of univalent and divalent carborane anions varied in the range 0.34-0.52 and 0.42-0.52 nm, respectively.",2021-01-21 +27794522,"Data Resource Profile: Cross-national and cross-study sociodemographic and health-related harmonized domains from SAGE plus ELSA, HRS and SHARE (SAGE+, Wave 1).","Four longitudinal studies were included in this rigorous harmonization process: the Study on global AGEing and adult health (SAGE); English Longitudinal Study on Ageing (ELSA); US Health and Retirement Study (HRS); and Survey of Health, Ageing and Retirement in Europe (SHARE). An ex-post harmonized process was applied to nine health-related thematic domains (socio-demographic and economic, health states, overall self-report of health and mental state, health examinations, physical and mental performance tests, risk factors, chronic conditions, social network and subjective well-being) for data from the 2004 wave of each study. Large samples of adults aged 50 years and older were available from each study: SAGE, n = 18 886; ELSA, n = 9181; HRS, n = 19 303; and SHARE, n = 29 917. The microdata, along with further details about the harmonization process and all metadata, are available through the World Health Organization (WHO) data archive at [http://apps.who.int/healthinfo/systems/surveydata/index.php/catalog]. Further information and enquiries can be made to [sagesurvey@who.int] or the corresponding author. The data resource will continue to be updated with data across additional waves of these surveys and new waves.",2016-10-29 +32694069,Is There an Association Between Contraception and Sexual Dysfunction in Women? A Systematic Review and Meta-analysis Based on Female Sexual Function Index.,"

Background

A growing body of research investigates the sexual functioning status in women with contraceptives use; however, the evidence is still inconclusive.

Aim

To examine whether contraceptives use is associated with a higher risk of female sexual dysfunction (FSD).

Methods

The electronic databases MEDLINE, Embase, Cochrane Library databases, and PsychINFO were systematically screened for eligible studies before December 2019. We only included those studies assessing women's sexual functioning by the Female Sexual Function Index (FSFI). This study was registered on the PROSPERO (ID: CRD42020167723, http://www.crd.york.ac.uk/PROSPERO).

Outcomes

The strength of the association between contraceptives use and risk of FSD was presented by calculating the standard mean dierences (SMDs) and the relative risk (RR) with a 95% confidence interval (CI). The pooled results were calculated using a random-effects model.

Results

A total of 12 studies (7 cross-sectional studies, 3 cohorts, and 1 case-control study) involving 9,427 participants were included. The mean age in the contraceptive users ranged from 22.5 ± 2.4 years to 38.2 ± 4.6 years, while the mean age in the nonusers was 22.5 ± 2.4 years to 36.0 ± 1.0 years. Pooled results showed that no significant difference in the total FSFI scores was observed between contraceptives use and noncontraception (SMD = -1.03, 95% CI: -2.08 to 0.01, P = .053; heterogeneity: I2 = 98.2%, P < .001). In line with this finding, the pooled RR also yielded no association between contraception use and the risk of FSD (RR = 1.29, 95% CI: 0.72-2.28, P = .392; heterogeneity: I2 = 76.0%, P = .0015). However, the subscale sexual desire showed a significant reduction in women who received contraceptives than those did not use contraception (SMD = -1.17, 95% CI: -2.09 to -0.24, P = .014; heterogeneity: I2 = 97.7%, P < .001), while no significant differences were found in sexual arousal, lubrication, orgasm, satisfaction, and pain domain.

Clinical implications

Though evidence from this meta-analysis did not support an association between contraceptives use and the risk of FSD, the sexual desire could be significantly impaired by contraceptives use.

Strengths & limitations

This is the first meta-analysis quantifying the relationship between contraceptives use and the risks of FSD. However, substantial heterogeneities were presented across the included studies.

Conclusion

No direct association between contraceptives use and the risk of FSD was found. Nevertheless, declining sexual desire was significantly associated with contraceptives use. Additional double-blind, randomized, placebo-controlled trials are still warranted. Huang M, Li G, Liu J, et al. Is There an Association Between Contraception and Sexual Dysfunction in Women? A Systematic Review and Meta-analysis Based on Female Sexual Function Index. J Sex Med 2020;17:1942-1955.",2020-07-18 +30439517,Rationale and design of the Hepatocellular carcinoma Early Detection Strategy study: A multi-center longitudinal initiative of the National Cancer Institute's Early Detection Research Network.,"

Background

Hepatocellular carcinoma (HCC) is a common malignancy with a steadily rising incidence and associated morbidity and mortality. Cirrhosis of the liver is presently the leading risk factor for developing HCC. Abdominal imaging, with or without alpha-fetoprotein (AFP) testing, every 6 months is the current surveillance strategy for patients at risk. The available biomarkers for detecting this cancer at an early stage have inadequate sensitivity and specificity.

Methods

The Hepatocellular carcinoma Early Detection Strategy (HEDS) study, a multi-center initiative of the National Cancer Institutes' (NCI) Early Detection Research Network (EDRN), launched an effort to establish what has become the nation's largest comprehensive biorepository and database on patients at high risk of developing HCC. The cohort has been developed in seven clinical centers across the USA. Subjects are enrolled for a five-year period involving data and specimen collection every six months in accordance with standard surveillance for HCC. Extensive clinical data are collected and specimens are stored at a central repository.

Results

The database and biorepository contain longitudinally collected clinical data and serum and plasma samples from 1482 participants with cirrhosis and without evidence of HCC at baseline. Fifty-six percent are male, 85% Caucasian, 30% have a history of chronic HCV and 71% have compensated cirrhosis.

Conclusions

The HEDS cohort provides opportunities for the continued study of the incidence and course of HCC in a comprehensively followed population of patients at high risk for this malignancy. Further, the EDRN biorepository provides a distinct opportunity for the development of novel biomarkers. Trial registry URL: https://edrn.nci.nih.gov/protocols/316-hepatocellular-carcinoma-early-detection-strategy.",2018-11-12 +30229074,"A geospatial database of drought occurrence in inland valleys in Mali, Burkina Faso and Nigeria.","The data described in this article are related to drought occurrence in inland valleys and farmers adaptation strategies. The data were collected in 300 inland valleys distributed in 14 regions of West Africa. The data were collected in two phases. In the first phase, 300 inland valleys were identified in 14 regions and their locations were determined with handheld GPS devices. Questionnaires and informal interviews were administered to inland valleys users to collect data on physical and socio-economic characteristics, hydrology, farmers experience with drought affecting rice production in inland valleys and adaptation strategies. In the second phase, the locations of the inland valleys were imported in a GIS environment and were used to extract additional parameters on soil characteristics and water demand from the Shuttle Radar Topography Mission (SRTM), Africa Soil Information Service (africasoils.net) and POWER database (http://power.larc.nasa.gov). In total, the dataset contains 41 variables divided into seven themes: farmers' experience with drought, adaptive management of rice farmers to drought, physical characteristics, hydrology, management practices, socio-economic characteristics and weather data of inland valleys.",2018-06-30 +28399157,A multi-pattern hash-binary hybrid algorithm for URL matching in the HTTP protocol.,"In this paper, based on our previous multi-pattern uniform resource locator (URL) binary-matching algorithm called HEM, we propose an improved multi-pattern matching algorithm called MH that is based on hash tables and binary tables. The MH algorithm can be applied to the fields of network security, data analysis, load balancing, cloud robotic communications, and so on-all of which require string matching from a fixed starting position. Our approach effectively solves the performance problems of the classical multi-pattern matching algorithms. This paper explores ways to improve string matching performance under the HTTP protocol by using a hash method combined with a binary method that transforms the symbol-space matching problem into a digital-space numerical-size comparison and hashing problem. The MH approach has a fast matching speed, requires little memory, performs better than both the classical algorithms and HEM for matching fields in an HTTP stream, and it has great promise for use in real-world applications.",2017-04-11 +,"Museum specimens provide phylogenomic data to resolve relationships of sack‐bearer moths (Lepidoptera, Mimallonoidea, Mimallonidae)","Mimallonidae, the sack‐bearer moths, are a family of predominantly Neotropical moths containing nearly 300 described species. Mimallonidae feed on over 40 host plant families and are found in a variety of environments, but phylogenetic relationships of species within the family have never been investigated. We sequenced 515 loci using anchored hybrid enrichment target capture on ethanol‐preserved and dried museum specimens, with dates of collection ranging from 1985 to 2017. We sampled 47 species, representing 32 of the 36 described mimallonid genera. By incorporating 19 dry museum specimens, and recovering an average of over 400 loci for each, we illustrate the utility of natural history collections in anchored hybrid enrichment‐based phylogenomics. Maximum likelihood and multi‐species coalescent analyses provide robust support for the recognition of six higher‐level groups within Mimallonidae, which we designate as subfamilies: Zaphantinae St Laurent & Kawahara subfam.n., Aurorianinae St Laurent & Kawahara subfam.n., Mimalloninae Burmeister, Lacosominae Dyar, Druenticinae St Laurent & Kawahara subfam.n. and Cicinninae Schaus stat.n. Our phylogenetic results also robustly support eight new tribes: Lacosominae: Trogopterini St Laurent & Kawahara tribe n., Lacosomini Dyar stat.n., Alheitini St Laurent & Kawahara tribe n.; Druenticinae: Luramini St Laurent & Kawahara tribe n., Druenticini St Laurent & Kawahara tribe n.; Cicinninae: Bedosiini St Laurent & Kawahara tribe n., Psychocampini St Laurent & Kawahara tribe n., Cicinnini Schaus stat.n. Three new genera are also described based on our phylogenetic results: Herbinalla St Laurent & Kawahara, gen.n., Ulaluma St Laurent & Kawahara, gen.n., Bedosiallo St Laurent & Kawahara, gen.n. Naniteta Franclemont, syn.n. is a synonym of Lacosoma Grote. Six genera are paraphyletic, and in total 19 new combinations are proposed: Macessoga laxa comb.n., Lacosoma elassa comb.n., Thaelia anysia comb.n., Thaelia subrubiginosa comb.n., Herbinalla caudina comb.n., Druentica brosica comb.n., Ulaluma valva comb.n., Cicinnus eminens comb.n., Roelmana pluridiscata comb.n., Roelmana laguerrei comb.n., Psychocampa joanna comb.n., Psychocampa unalca comb.n., Psychocampa hamata comb.n., Psychocampa marona comb.n., Bedosiallo eugenia comb.n., Bedosiallo forbesi comb.n., Bedosiallo moengus comb.n., Bedosiallo styx comb.n. and Bedosiallo sylvia comb.n. This study is the first to implement the LEP1 probe set on a comprehensive taxonomic dataset that includes many museum specimens, and our results demonstrate that museum specimens can be used in anchored hybrid enrichment studies. Importantly, these data produce a robust phylogeny that will serve as a foundation for future studies on mimallonid evolution, such as host plant relationships and biogeography. This published work has been registered in ZooBank: http://zoobank.org/urn:lsid:zoobank.org:pub:60890688‐2E77‐4B98‐B247‐B7A5F7E4DFD9.",2018-10-01 +32324992,Comprehensive Evaluation of Fourteen Docking Programs on Protein-Peptide Complexes.,"A large number of protein-protein interactions (PPIs) are mediated by the interactions between proteins and peptide segments binding partners, and therefore determination of protein-peptide interactions (PpIs) is quite crucial to elucidate important biological processes and design peptides or peptidomimetic drugs that can modulate PPIs. Nowadays, as a powerful computation tool, molecular docking has been widely utilized to predict the binding structures of protein-peptide complexes. However, although a number of docking programs have been available, the systematic study on the assessment of their performance for PpIs has never been reported. In this study, a benchmark data set called PepSet consisting of 185 protein-peptide complexes with peptide length ranging from 5 to 20 residues was employed to evaluate the performance of 14 docking programs, including three protein-protein docking programs (ZDOCK, FRODOCK, and HawkDock), three small molecule docking programs (GOLD, Surflex-Dock, and AutoDock Vina), and eight protein-peptide docking programs (GalaxyPepDock, MDockPeP, HPEPDOCK, CABS-dock, pepATTRACT, DINC, AutoDock CrankPep (ADCP), and HADDOCK peptide docking). A new evaluation parameter, named IL_RMSD, was proposed to measure the docking accuracy with fnat (the fraction of native contacts). In global docking, HPEPDOCK performs the best for the entire data set and yields the success rates of 4.3%, 24.3%, and 55.7% at the top 1, 10, and 100 levels, respectively. In local docking, overall, ADCP achieves the best predictions and reaches the success rates of 11.9%, 37.3%, and 70.3% at the top 1, 10, and 100 levels, respectively. It is expected that our work can provide some helpful insights into the selection and development of improved docking programs for PpIs. The benchmark data set is freely available at http://cadd.zju.edu.cn/pepset/.",2020-05-06 +32990142,Height and health in late eighteenth-century England.,"Adult stature has become a widely used indicator of childhood nutritional status in historical populations and may provide insights into health inequalities that are not discernible in mortality rates. However, most pre-twentieth-century British data on heights suffer from selection biases. Here we present unique evidence on heights of adult males by occupation from an unbiased sample of adult males in Dorset in 1798-99. The mean height of fully grown (married) men was very similar to that of older military recruits, and our sample therefore confirms the taller stature of English males relative to males of other European countries in the same period. In contrast to previous evidence of negligible or U-shaped socio-economic gradients in mortality in this period, we found a fairly linear gradient in height by socio-economic status, that is similar in magnitude to class differences in adult height among English males born in the mid-twentieth century.Supplementary material for this article is available at: https://doi.org/10.1080/00324728.2020.1823011.",2020-09-29 +32401510,Prediction of the Favorable Hydration Sites in a Protein Binding Pocket and Its Application to Scoring Function Formulation.,"The important role of water molecules in protein-ligand binding energetics has attracted wide attention in recent years. A range of computational methods has been developed to predict the favorable locations of water molecules in a protein binding pocket. Most of the current methods are based on extensive molecular dynamics or Monte Carlo simulations. They are time-consuming and thus cannot be applied to high-throughput tasks. To overcome this difficulty, we have developed an empirical method, called HydraMap, to predict the favorable hydration sites in the binding pocket of a protein molecule. This method uses statistical potentials to quantify the interactions between protein atoms and water molecules. Such statistical potentials were derived from 10,987 crystal structures selected from the Protein Data Bank. The probability of placing a water probe at each spot in the binding pocket was evaluated to derive a density map. The density map was then deduced into explicit hydration sites through a clustering process. HydraMap was validated on two external test sets, where it produced comparable results as 3D-RISM and WATsite but was 30-1000 times faster. In addition, we have attempted to estimate the desolvation energy associated with water molecule replacement upon ligand binding based on the outcomes of HydraMap. This desolvation term, called DEWED, was incorporated into the framework of four scoring functions, i.e., ASP, ChemPLP, GoldScore, and X-Score. The derivative scoring functions were tested in terms of scoring power, docking power, and screening power on a range of data sets. It was observed that X-Score exhibited the most obvious improvement in accuracy after adding the DEWED terms. Moreover, all scoring functions augmented with the DEWED terms exhibited improved or comparable performance on most data sets as the corresponding ones augmented with the GB/SA terms. Our study has demonstrated the potential application of HydraMap and DEWED to the formulation of new scoring functions. A beta-version of the HydraMap software is freely available from our Web site (http://www.sioc-ccbg.ac.cn/software/hydramap/) for testing.",2020-06-01 +29599790,An Updated Functional Annotation of Protein-Coding Genes in the Cucumber Genome.,"Background: Although the cucumber reference genome and its annotation were published several years ago, the functional annotation of predicted genes, particularly protein-coding genes, still requires further improvement. In general, accurately determining orthologous relationships between genes allows for better and more robust functional assignments of predicted genes. As one of the most reliable strategies, the determination of collinearity information may facilitate reliable orthology inferences among genes from multiple related genomes. Currently, the identification of collinear segments has mainly been based on conservation of gene order and orientation. Over the course of plant genome evolution, various evolutionary events have disrupted or distorted the order of genes along chromosomes, making it difficult to use those genes as genome-wide markers for plant genome comparisons. Results: Using the localized LASTZ/MULTIZ analysis pipeline, we aligned 15 genomes, including cucumber and other related angiosperm plants, and identified a set of genomic segments that are short in length, stable in structure, uniform in distribution and highly conserved across all 15 plants. Compared with protein-coding genes, these conserved segments were more suitable for use as genomic markers for detecting collinear segments among distantly divergent plants. Guided by this set of identified collinear genomic segments, we inferred 94,486 orthologous protein-coding gene pairs (OPPs) between cucumber and 14 other angiosperm species, which were used as proxies for transferring functional terms to cucumber genes from the annotations of the other 14 genomes. In total, 10,885 protein-coding genes were assigned Gene Ontology (GO) terms which was nearly 1,300 more than results collected in Uniprot-proteomic database. Our results showed that annotation accuracy would been improved compared with other existing approaches. Conclusions: In this study, we provided an alternative resource for the functional annotation of predicted cucumber protein-coding genes, which we expect will be beneficial for the cucumber's biological study, accessible from http://cmb.bnu.edu.cn/functional_annotation. Meanwhile, using the cucumber reference genome as a case study, we presented an efficient strategy for transferring gene functional information from previously well-characterized protein-coding genes in model species to newly sequenced or ""non-model"" plant species.",2018-03-15 +33356304,"Proteomic Data Analysis for Differential Profiling of the Autoimmune Diseases SLE, RA, SS, and ANCA-Associated Vasculitis.","Early and correct diagnosis of inflammatory rheumatic diseases (IRD) poses a clinical challenge due to the multifaceted nature of symptoms, which also may change over time. The aim of this study was to perform protein expression profiling of four systemic IRDs, systemic lupus erythematosus (SLE), ANCA-associated systemic vasculitis (SV), rheumatoid arthritis (RA), and Sjögren's syndrome (SS), and healthy controls to identify candidate biomarker signatures for differential classification. A total of 316 serum samples collected from patients with SLE, RA, SS, or SV and from healthy controls were analyzed using 394-plex recombinant antibody microarrays. Differential protein expression profiling was examined using Wilcoxon signed rank test, and condensed biomarker panels were identified using advanced bioinformatics and state-of-the art classification algorithms to pinpoint signatures reflecting each disease (raw data set available at https://figshare.com/s/3bd3848a28ef6e7ae9a9.). In this study, we were able to classify the included individual IRDs with high accuracy, as demonstrated by the ROC area under the curve (ROC AUC) values ranging between 0.96 and 0.80. In addition, the groups of IRDs could be separated from healthy controls at an ROC AUC value of 0.94. Disease-specific candidate biomarker signatures and general autoimmune signature were identified, including several deregulated analytes. This study supports the rationale of using multiplexed affinity-based technologies to reflect the biological complexity of autoimmune diseases. A multiplexed approach for decoding multifactorial complex diseases, such as autoimmune diseases, will play a significant role for future diagnostic purposes, essential to prevent severe organ- and tissue-related damage.",2020-12-23 +32764988,Co-Expression Network Analysis Identified LTF in Association with Metastasis Risk and Prognosis in Clear Cell Renal Cell Carcinoma.,"

Objective

Clear cell renal cell carcinoma (ccRCC) is the most common renal cancer in adults. The 5-year survival rate of patients with advanced ccRCC is less than 30%. Lack of potential biomarkers for treatment and prognosis is a limitation for early diagnosis and treatment of ccRCC.

Methods

We collected microarray profiles of 39 ccRCC and matched normal samples to identify differential expression genes (DEGs). Then, a weighted gene co-expression network analysis (WGCNA) was constructed to identify gene modules associated with the metastasis in ccRCC. The Cancer Genome Atlas (TCGA) database and the Human Protein Atlas (HPA, https://www.proteinatlas.org/) database were used for verification set. Finally, we used biological experiments to preliminary investigate the impact of LTF on the tumor biological behavior of ccRCC, including proliferation, migration, invasion, and apoptosis.

Results

A total of 15 genetic modules were identified, and the light-green module is considered the most relevant to tumor metastasis. (P = 0.02, R2 = -0.4). Protein-protein interaction (PPI) network was performed to identify the hub nodes in the light-green module. Finally, combining the results of PPI, WGCNA and DEGs, lactotransferrin (LTF) gene was regarded as ""real"" hub genes for cancer metastasis risk. LTF was subsequently validated using the TCGA database. Immunohistochemistry confirmed that the expression of LTF in ccRCC tumor tissue was significantly lower than that in normal tissue based on the HPA database. Intriguingly, patients with low expression of LTF had lower survival rates (HR = 0.66, 95% CI: 0.49-0.89, P = 0.0067), the expression level of the sample was negatively correlated with tumor stage (P = 0.0385), and patients with low expression of LTF gene were more likely to have distant metastasis (P = 0.038). Overexpression of LTF inhibited the proliferation, migration, invasion and promoted apoptosis of human ccRCC cells in vitro.

Conclusion

LTF might be a novel prognostic biomarker for ccRCC.",2020-07-17 +31868683,The Quebec Parkinson Network: A Researcher-Patient Matching Platform and Multimodal Biorepository.,"

Background

Genetic, biologic and clinical data suggest that Parkinson's disease (PD) is an umbrella for multiple disorders with clinical and pathological overlap, yet with different underlying mechanisms. To better understand these and to move towards neuroprotective treatment, we have established the Quebec Parkinson Network (QPN), an open-access patient registry, and data and bio-samples repository.

Objective

To present the QPN and to perform preliminary analysis of the QPN data.

Methods

A total of 1,070 consecutively recruited PD patients were included in the analysis. Demographic and clinical data were analyzed, including comparisons between males and females, PD patients with and without RBD, and stratified analyses comparing early and late-onset PD and different age groups.

Results

QPN patients exhibit a male:female ratio of 1.8:1, an average age-at-onset of 58.6 years, an age-at-diagnosis of 60.4 years, and average disease duration of 8.9 years. REM-sleep behavior disorder (RBD) was more common among men, and RBD was associated with other motor and non-motor symptoms including dyskinesia, fluctuations, postural hypotension and hallucinations. Older patients had significantly higher rates of constipation and cognitive impairment, and longer disease duration was associated with higher rates of dyskinesia, fluctuations, freezing of gait, falls, hallucinations and cognitive impairment. Since QPN's creation, over 60 studies and 30 publications have included patients and data from the QPN.

Conclusions

The QPN cohort displays typical PD demographics and clinical features. These data are open-access upon application (http://rpq-qpn.ca/en/), and will soon include genetic, imaging and bio-samples. We encourage clinicians and researchers to perform studies using these resources.",2020-01-01 +33685705,Genomic evaluation of Brown Swiss dairy cattle with limited national genotype data and integrated external information.,"This study demonstrated the feasibility of a genomic evaluation for the dairy cattle population for which the small national training population can be complemented with foreign information from international evaluations. National test-day milk yield data records for the Slovenian Brown Swiss cattle population were analyzed. Genomic evaluation was carried out using the single-step genomic best linear unbiased prediction method (ssGBLUP), resulting in genomic estimated breeding values (GEBV). The predominantly female group of genotyped animals, representing the national training population in the single-step genomic evaluation, was further augmented with 7,024 genotypes of foreign progeny-tested sires from an international Brown Swiss InterGenomics genomic evaluation (https://interbull.org/ib/whole_cop). Additionally, the estimated breeding values for the altogether 7,246 genotyped domestic and foreign sires from the 2019 sire multiple across-country evaluation (MACE), were added to the ssGBLUP as external pseudophenotypic information. The ssGBLUP method, with integration of MACE information by avoiding double counting, was then performed, resulting in MACE-enhanced GEBV (GEBVM). The methods were empirically validated with forward prediction. The validation group consisted of 315 domestic males and 1,041 domestic females born after 2012. Increase, inflation, and bias of the GEBV(M) reliability (REL) were assessed for the validation group with a focus on females. All individuals in the validation benefited from genomic evaluations using both methods, but the GEBV(M) REL increased most for the youngest selection candidates. Up to 35 points of GEBV REL could be assigned to national genomic information, and up to 17 points of GEBVM REL could additionally be attributed to the integration of foreign sire genomic and MACE information. Results indicated that the combined foreign progeny-tested sire genomic and external MACE information can be used in the single-step genomic evaluation as an equivalent replacement for domestic phenotypic information. Thus, an equal or slightly higher genomic breeding value REL was obtained sooner than the pedigree-based breeding value REL for the female selection candidates. When the abundant foreign progeny-tested sire genomic and MACE information was used to complement available national genomic and phenotypic information in single-step genomic evaluation, the genomic breeding value REL for young-female selection candidates increased approximately 10 points. Use of international information provides the possibility to upgrade small national training populations and obtain satisfying reliability of genomic breeding values even for the youngest female selection candidates, which will help to increase selection efficiency in the future.",2021-03-06 +27142340,Genome-Wide Functional Annotation of Human Protein-Coding Splice Variants Using Multiple Instance Learning.,"The vast majority of human multiexon genes undergo alternative splicing and produce a variety of splice variant transcripts and proteins, which can perform different functions. These protein-coding splice variants (PCSVs) greatly increase the functional diversity of proteins. Most functional annotation algorithms have been developed at the gene level; the lack of isoform-level gold standards is an important intellectual limitation for currently available machine learning algorithms. The accumulation of a large amount of RNA-seq data in the public domain greatly increases our ability to examine the functional annotation of genes at isoform level. In the present study, we used a multiple instance learning (MIL)-based approach for predicting the function of PCSVs. We used transcript-level expression values and gene-level functional associations from the Gene Ontology database. A support vector machine (SVM)-based 5-fold cross-validation technique was applied. Comparatively, genes with multiple PCSVs performed better than single PCSV genes, and performance also improved when more examples were available to train the models. We demonstrated our predictions using literature evidence of ADAM15, LMNA/C, and DMXL2 genes. All predictions have been implemented in a web resource called ""IsoFunc"", which is freely available for the global scientific community through http://guanlab.ccmb.med.umich.edu/isofunc .",2016-05-09 +33088727,Estimating the volume of dirty money in Iran.,"In this study, the volume of dirty money in Iran was estimated. The data belonged to the period of 1997-2019, and was taken from the Central Bank of Iran (website: https://www.cbi.ir). Fuzzy logic was used to estimate the underground economy. Fuzzy theory can mathematically formulate many variables that are imprecise and ambiguous concepts. This theory is appropriate for reasoning, inference, control, and decision-making under uncertainty. This approach works in conditions of uncertainty. In cases in which the variables are inaccurate, this method is used. Fuzzy set theory is a generalization of the set theory. The underground economy is important in estimating the amount of dirty money and has a positive effect on this amount. The effect of the underground economy was investigated using the vector autoregressive (VAR) and vector error correction (VECM) models.•In this article applied the fuzzy logic, to estimate the underground economy.•The method presented in this article can be useful for Researchers and managers in the monetary trend of economics.•The fuzzy method is the best way to estimate the size of the underground economy because it is a measure of uncertainty.",2020-09-28 +31008002,Correlations Between the Thermosphere's Semiannual Density Variations and Infrared Emissions Measured With the SABER Instrument.,"This paper presents measurements of the amplitudes and timings of the combined, annual, and semiannual variations of thermospheric neutral density, and a comparison of these density variations with measurements of the infrared emissions from carbon dioxide and nitric oxide in the thermosphere. The density values were obtained from measurements of the atmospheric drag experienced by the Challenging Minisatellite Payload, Gravity Recovery and Climate Experiment A, Gravity field and Ocean Circulation Explorer, and three Swarm satellites, while the optical emissions were measured with the Sounding of the Atmosphere using Broadband Emission Radiometry (SABER) instrument on the Thermosphere Ionosphere Mesosphere Energetics and Dynamics satellite. These data span a time period of 16 years. A database containing global average densities that were derived from the orbits of about 5,000 objects (Emmert, 2009, https://doi.org/10.1029/2009JA014102, 2015b, https://doi.org/10.1002/2015JA021047) was employed for calibrating these density data. A comparison with the NRLMSISE-00 model was used to derive measurements of how much the density changes over time due to these seasonal variations. It is found that the seasonal density oscillations have significant variations in amplitude and timing. In order to test the practicality of using optical emissions as a monitoring tool, the SABER data were fit to the measured variations. Even the most simple fit that used only filtered carbon dioxide emissions had good correlations with the measured oscillations. However, the density oscillations were also well predicted by a simple Fourier series, contrary to original expectations. Nevertheless, measurements of the optical emissions from the thermosphere are expected to have a role in future understanding and prediction of the semiannual variations.",2018-10-27 +32991866,The Neurosphere Simulator: An educational online tool for modeling neural stem cell behavior and tissue growth.,"Until very recently, distance education, including digital science labs, served a rather small portion of postsecondary students in the United States and many other countries. This situation has, however, dramatically changed in 2020 in the wake of the COVID-19 pandemic, which forced colleges to rapidly transit from face-to-face instructions to online classes. Here, we report the development of an interactive simulator that is freely available on the web (http://neurosphere.cos.northeastern.edu/) for teaching lab classes in developmental biology. This simulator is based on cellular automata models of neural-stem-cell-driven tissue growth in the neurosphere assay. By modifying model parameters, users can explore the role in tissue growth of several developmental mechanisms, such as regulation of mitosis or apoptotic cell death by contact inhibition. Besides providing an instantaneous animation of the simulated development of neurospheres, the Neurosphere Simulator tool offers also the possibility to download data for detailed analysis. The simulator function is complemented by a tutorial that introduces students to computational modeling of developmental processes.",2020-09-28 +31648100,RR-APET - Heart rate variability analysis software.,"

Background and objectives

Heart rate variability (HRV) has increasingly been linked to medical phenomena and several HRV metrics have been found to be good indicators of patient health. This has enabled generalised treatment plans to be developed in order to respond to subtle personal differences that are reflected in HRV metrics. There are several established HRV analysis platforms and methods available within the literature; some of which provide command line operation across databases but do not offer extensive graphical user interface (GUI) and editing functionality, while others offer extensive ECG editing but are not feasible over large datasets without considerable manual effort. The aim of this work is to provide a comprehensive open-source package, in a well known and multi-platform language, that offers considerable graphical signal editing features, flexibility within the algorithms used for R-peak detection and HRV quantification, and includes graphical functionality for batch processing. Thereby, providing a platform suited to either physician or researcher.

Methods

RR-APET's software was developed in the Python language and is modular in format, providing a range of different modules for established R-peak detection algorithms, as well as an embedded template for alternate algorithms. These modules also include several easily adjustable features, allowing the user to optimise any of the algorithms for different ECG signals or databases. Additionally, the software's user-friendly GUI platform can be operated by both researchers or medical professionals to accomplish different tasks, such as: the in-depth visual analysis of a single ECG, or the analysis multiple signals in a single iteration using batch processing. RR-APET also supports several popular data formats, including text, HDF5, Matlab, and Waveform Database (WFDB) files.

Results

The RR-APET platform presents multiple metrics that quantify the heart rate variability features of an R-to-R interval series, including time-domain, frequency-domain, and nonlinear metrics. When known R-peak annotations are available, positive predictability, sensitivity, detection error rate, and accuracy measures are also provided to assess the validity of the implemented R-peak detection algorithm. RR-APET scored an overall usability rating of 4.16 out of a possible 5, when released on a trial basis for user evaluation.

Conclusions

With its unique ability to both create and operate on large databases, this software provides a strong platform from which to conduct further research in the field of HRV analytics and its correlation to patient healthcare outcomes. This software is available free of charge at https://gitlab.com/MegMcC/rr-apet-hrv-analysis-software and can be operated as an executable file within Windows, Mac and Linux systems.",2019-10-12 +33072812,Income Inequality Is Associated With Low Cumulative Antiretroviral Adherence in Persons With Human Immunodeficiency Virus.,"

Background

The adherence biomarker tenofovir diphosphate (TFV-DP) in dried blood spots (DBS) is associated with viral suppression and predicts future viremia. However, its association with social determinants of health (SDoH) in people with human immunodeficiency virus (PWH) remains unknown.

Methods

Dried blood spots for TFV-DP were longitudinally collected from a clinical cohort of PWH receiving tenofovir disoproxil fumarate-based therapy (up to 3 visits over 48 weeks) residing in 5 Colorado counties. To assign SDoH, zip codes at enrollment were matched with SDoH data from AIDSVu (https://aidsvu.org/). The SDoH included household income, percentage living in poverty, education level, and income inequality (quantified using Gini coefficient, where 0 and 1 represent perfect income equality and inequality, respectively). Log-transformed TFV-DP concentrations were analyzed using a mixed-effects model to estimate percentage change (95% confidence interval) in TFV-DP for every significant change in the SDoH and adjusted for relevant covariates including age, gender, race, estimated glomerular filtration rate, body mass index, hematocrit, CD4+ T-cell count, antiretroviral drug class, and 3-month self-reported adherence.

Results

Data from 430 PWH totaling 950 person-visits were analyzed. In an adjusted analysis, income inequality was inversely associated with TFV-DP in DBS. For every 0.1 increase in the Gini coefficient, TFV-DP concentrations decreased by 9.2% (-0.5 to -17.1; P = .039). This remained significant after adjusting for human immunodeficiency virus viral suppression, where a 0.1 increase in Gini was associated with a decrease of 8.7% (-0.3 to -17.9; P = .042) in TFV-DP.

Conclusions

Higher income inequality was associated with lower cumulative antiretroviral adherence. These findings support the need for further research on how SDoH impact adherence and clinical care.",2020-08-29 +33659830,Genomic regions of Solanum tuberosum L. associated with the tuber eye depth.,"Potato (Solanum tuberosum L.) is one of the most important food crops in the world. The genome of this potato species is autotetraploid and has a high level of heterozygosity, also this potato species is a cross-pollinated plant. These characteristics complicate the genetic analysis and breeding process. The tuber's eye depth is an important trait that affects the suitability of potato varieties for processing. Potato breeding for this trait is based on phenotypic assessment. Identification of the loci that control tuber eye depth would allow diagnostic markers for the marker-assisted selection to be created. The aim of this study is to search for loci associated with the eye depth by analyzing Solanum tuberosum varieties from the GenAgro collection of the Institute of Cytology and Genetics of the Siberian Branch of the Russian Academy of Sciences, genotyped using the Illumina 22K SNP potato array DNA chip. The 24 significant markers associated with the ""eye depth"" trait were identified using 15,214 SNP markers genotyped with the Illumina 22K SNP potato array chip and the general linear model (GLM) taking into account the population structure. Data obtained showed the presence of SNPs in four genomic regions: on chromosome 4 (1 marker in the 3.92 Mb area), 5 (1 marker in the 4.67 Mb area) and 10 (1 marker in the 4.87 Mb area and 21 markers in the region between 48.1-48.9 Mb). The results of localization in the region 48.1-48.9 Mb of chromosome 10 correspond to previously published studies, the remaining three regions were detected for the first time. DNA sections containing SNPs linked to the tuber's eye depth were studied in the SolTub_3.0 potato genome assembly (https://plants.ensembl.org/). KASP markers were developed based on the data obtained. It will be possible to screen the breeding material and to breed the varieties more effectively using current markers associated with a shallow tuber's eye depth.",2020-08-01 +32379868,HPOLabeler: improving prediction of human protein-phenotype associations by learning to rank.,"

Motivation

Annotating human proteins by abnormal phenotypes has become an important topic. Human Phenotype Ontology (HPO) is a standardized vocabulary of phenotypic abnormalities encountered in human diseases. As of November 2019, only <4000 proteins have been annotated with HPO. Thus, a computational approach for accurately predicting protein-HPO associations would be important, whereas no methods have outperformed a simple Naive approach in the second Critical Assessment of Functional Annotation, 2013-2014 (CAFA2).

Results

We present HPOLabeler, which is able to use a wide variety of evidence, such as protein-protein interaction (PPI) networks, Gene Ontology, InterPro, trigram frequency and HPO term frequency, in the framework of learning to rank (LTR). LTR has been proved to be powerful for solving large-scale, multi-label ranking problems in bioinformatics. Given an input protein, LTR outputs the ranked list of HPO terms from a series of input scores given to the candidate HPO terms by component learning models (logistic regression, nearest neighbor and a Naive method), which are trained from given multiple evidence. We empirically evaluate HPOLabeler extensively through mainly two experiments of cross validation and temporal validation, for which HPOLabeler significantly outperformed all component models and competing methods including the current state-of-the-art method. We further found that (i) PPI is most informative for prediction among diverse data sources and (ii) low prediction performance of temporal validation might be caused by incomplete annotation of new proteins.

Availability and implementation

http://issubmission.sjtu.edu.cn/hpolabeler/.

Contact

zhusf@fudan.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +31398302,Evidence-Based Interventions for Learners Who Are Deaf and/or Multilingual: A Systematic Quality Review.,"Purpose Many educators and speech-language pathologists have difficulty providing effective interventions to the growing population of d/Deaf and hard-of-hearing (DHH) learners who use more than 1 language. The purpose of this review article was to identify evidence-based interventions for speech, language, and literacy used with DHH multilingual learners (DMLs), monolingual DHH learners, and hearing bilingual learners without hearing loss. Interventions used with these groups can inform the practice of professionals providing services to DMLs. Method This review article considered speech, language, and literacy interventions used with DHH and hearing bilingual learners from birth to 21 years of age. The following electronic databases were searched: Academic Search Complete/EBSCO (CINAHL, Education, ERIC), Linguistics & Language Behavior Abstracts, PsycINFO, and PubMed. Data describing article, participant, methodological, and intervention variables were extracted from studies. The methodological quality of studies was examined using the Council for Exceptional Children's (2014) standards for evidence-based practice in special education. Results A total of 144 studies were reviewed, describing over 9,370 learners aged 1.8-22.0 years. Two studies investigated DMLs, 76 investigated DHH learners, and 67 investigated hearing bilingual learners. A total of 146 different interventions were examined. Most studies reported positive effects. Only 17 studies met all quality indicators specified by the Council for Exceptional Children (2014): 7 examined DHH learners, and 10 examined hearing bilingual learners. There was insufficient evidence for any intervention to be considered an evidence-based intervention, although 6 could potentially contribute to evidence-based practice. Conclusions No evidence-based interventions for DMLs were identified. A small number of interventions examined in high-quality studies of DHH and hearing bilingual learners were identified, which may be appropriate for use with DMLs following further investigation. Supplemental Material https://doi.org/10.23641/asha.9108386.",2019-08-09 +30714452,Enhanced expression of son of sevenless homolog 1 is predictive of poor prognosis in uveal malignant melanoma patients.,"PURPOSE:The work outlined herein investigated the prognosis value and the potential role son of sevenless homolog 1 (SOS1) played in uveal melanoma (UM). METHODS:We analyzed the mRNA expression level of SOS1 in primary UM cells based on the GSE44295 dataset obtained from the Gene Expression Omnibus (GEO, http://www.ncbi.nlm.nih.gov/geo/ ) database. The correlation between SOS1 expression and clinical characteristics were analyzed by Chi-squared (χ2) test. Then we used SOS1 siRNA to downregulate SOS1 expression in M23 cells. The effect of knockdown SOS1 on cell proliferation was studied using the Cell-Counting Kit-8 and colony formation assays. The influence of silencing SOS1 on cell motility was explored using wound-healing assays and transwell assays. In addition, the relationship between SOS1 and the MAPK signaling pathway was analyzed by western blot. RESULTS:Our results demonstrated that the mRNA expression level of SOS1 was markedly upregulated in UM cells (p < 0.001) and correlated with poor prognosis in UM patients (p = 0.015). Moreover, SOS1 mRNA expression level was found to be positively associated with histological-type (p = 0.043) and death (p = 0.012). Knockdown of SOS1 caused an inhibition on M23 cell proliferation, migration, and invasion. Moreover, the phosphorylation levels of MEK and ERK were reduced in UM cells after downregulating SOS1 expression (p < 0.010). CONCLUSION:Our data demonstrated that SOS1 might play a facilitating role in M23 cell growth and motility by regulating the MAPK signaling pathway. Furthermore, the data suggested that SOS1 may serve as an UM predictor of prognosis as well as a therapeutic target.",2019-02-04 +31238884,Integration of CLIP experiments of RNA-binding proteins: a novel approach to predict context-dependent splicing factors from transcriptomic data.,"

Background

Splicing is a genetic process that has important implications in several diseases including cancer. Deciphering the complex rules of splicing regulation is crucial to understand and treat splicing-related diseases. Splicing factors and other RNA-binding proteins (RBPs) play a key role in the regulation of splicing. The specific binding sites of an RBP can be measured using CLIP experiments. However, to unveil which RBPs regulate a condition, it is necessary to have a priori hypotheses, as a single CLIP experiment targets a single protein.

Results

In this work, we present a novel methodology to predict context-specific splicing factors from transcriptomic data. For this, we systematically collect, integrate and analyze more than 900 CLIP experiments stored in four CLIP databases: POSTAR2, CLIPdb, DoRiNA and StarBase. The analysis of these experiments shows the strong coherence between the binding sites of RBPs of similar families. Augmenting this information with expression changes, we are able to correctly predict the splicing factors that regulate splicing in two gold-standard experiments in which specific splicing factors are knocked-down.

Conclusions

The methodology presented in this study allows the prediction of active splicing factors in either cancer or any other condition by only using the information of transcript expression. This approach opens a wide range of possible studies to understand the splicing regulation of different conditions. A tutorial with the source code and databases is available at https://gitlab.com/fcarazo.m/sfprediction .",2019-06-25 +30863673,Improved taxonomic assignment of rumen bacterial 16S rRNA sequences using a revised SILVA taxonomic framework.,"The taxonomy and associated nomenclature of many taxa of rumen bacteria are poorly defined within databases of 16S rRNA genes. This lack of resolution results in inadequate definition of microbial community structures, with large parts of the community designated as incertae sedis, unclassified, or uncultured within families, orders, or even classes. We have begun resolving these poorly-defined groups of rumen bacteria, based on our desire to name these for use in microbial community profiling. We used the previously-reported global rumen census (GRC) dataset consisting of >4.5 million partial bacterial 16S rRNA gene sequences amplified from 684 rumen samples and representing a wide range of animal hosts and diets. Representative sequences from the 8,985 largest operational units (groups of sequence sharing >97% sequence similarity, and covering 97.8% of all sequences in the GRC dataset) were used to identify 241 pre-defined clusters (mainly at genus or family level) of abundant rumen bacteria in the ARB SILVA 119 framework. A total of 99 of these clusters (containing 63.8% of all GRC sequences) had no unique or had inadequate taxonomic identifiers, and each was given a unique nomenclature. We assessed this improved framework by comparing taxonomic assignments of bacterial 16S rRNA gene sequence data in the GRC dataset with those made using the original SILVA 119 framework, and three other frameworks. The two SILVA frameworks performed best at assigning sequences to genus-level taxa. The SILVA 119 framework allowed 55.4% of the sequence data to be assigned to 751 uniquely identifiable genus-level groups. The improved framework increased this to 87.1% of all sequences being assigned to one of 871 uniquely identifiable genus-level groups. The new designations were included in the SILVA 123 release (https://www.arb-silva.de/documentation/release-123/) and will be perpetuated in future releases.",2019-03-05 +31243429,Comprehensive study of the exposome and omic data using rexposome Bioconductor Packages.,"SUMMARY:Genomics has dramatically improved our understanding of the molecular origins of certain human diseases. Nonetheless, our health is also influenced by the cumulative impact of exposures experienced across the life course (termed 'exposome'). The study of the high-dimensional exposome offers a new paradigm for investigating environmental contributions to disease etiology. However, there is a lack of bioinformatics tools for managing, visualizing and analyzing the exposome. The analysis data should include both association with health outcomes and integration with omic layers. We provide a generic framework called rexposome project, developed in the R/Bioconductor architecture that includes object-oriented classes and methods to leverage high-dimensional exposome data in disease association studies including its integration with a variety of high-throughput data types. The usefulness of the package is illustrated by analyzing a real dataset including exposome data, three health outcomes related to respiratory diseases and its integration with the transcriptome and methylome. AVAILABILITY AND IMPLEMENTATION:rexposome project is available at https://isglobal-brge.github.io/rexposome/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-12-01 +30229048,Influence of support media supplementation to reduce the inhibition of anaerobic digestion by phenol and ammonia: Effect on degradation performances and microbial dynamics.,"Data in this article provide detailed information on the microbial dynamics within digesters supplemented with different support media (two types of zeolites, two types of activated carbons, one type of chitosan, one control) in presence of different inhibitory conditions (control without inhibitor, 1.3 g/L of phenol and 19 g/L of total ammonia nitrogen). Data include the operational conditions and degradation performance measurements, as well as microbial community analysis, by 16S rRNA gene sequencing, at different time points for the different conditions (samples). Sequencing data were generated by using IonTorrent PGM sequencer. This data is associated with the research articles ""Improving anaerobic digestion with support media: Mitigation of ammonia inhibition and effect on microbial communities?"" (Poirier et al., 2017) [1] and ""Support media can steer methanogenesis in presence of phenol through biotic and abiotic effects"" (Poirier et al., 2018) [2]. The sequencing data have been deposited with links to BioProject accession number PRJNA450513, in the NCBI BioProject database (https://www.ncbi.nlm.nih.gov/sra/?term=PRJNA450513). Samples accession numbers go from SAMN08940368 to SAMN08940426.",2018-06-26 +31825479,Arteria: An automation system for a sequencing core facility. ,"In recent years, nucleotide sequencing has become increasingly instrumental in both research and clinical settings. This has led to an explosive growth in sequencing data produced worldwide. As the amount of data increases, so does the need for automated solutions for data processing and analysis. The concept of workflows has gained favour in the bioinformatics community, but there is little in the scientific literature describing end-to-end automation systems. Arteria is an automation system that aims at providing a solution to the data-related operational challenges that face sequencing core facilities. Arteria is built on existing open source technologies, with a modular design allowing for a community-driven effort to create plug-and-play micro-services. In this article we describe the system, elaborate on the underlying conceptual framework, and present an example implementation. Arteria can be reduced to 3 conceptual levels: orchestration (using an event-based model of automation), process (the steps involved in processing sequencing data, modelled as workflows), and execution (using a series of RESTful micro-services). This creates a system that is both flexible and scalable. Arteria-based systems have been successfully deployed at 3 sequencing core facilities. The Arteria Project code, written largely in Python, is available as open source software, and more information can be found at https://arteria-project.github.io/ . We describe the Arteria system and the underlying conceptual framework, demonstrating how this model can be used to automate data handling and analysis in the context of a sequencing core facility.",2019-12-01 +30445619,GTRD: a database on gene transcription regulation-2019 update.,"The current version of the Gene Transcription Regulation Database (GTRD; http://gtrd.biouml.org) contains information about: (i) transcription factor binding sites (TFBSs) and transcription coactivators identified by ChIP-seq experiments for Homo sapiens, Mus musculus, Rattus norvegicus, Danio rerio, Caenorhabditis elegans, Drosophila melanogaster, Saccharomyces cerevisiae, Schizosaccharomyces pombe and Arabidopsis thaliana; (ii) regions of open chromatin and TFBSs (DNase footprints) identified by DNase-seq; (iii) unmappable regions where TFBSs cannot be identified due to repeats; (iv) potential TFBSs for both human and mouse using position weight matrices from the HOCOMOCO database. Raw ChIP-seq and DNase-seq data were obtained from ENCODE and SRA, and uniformly processed. ChIP-seq peaks were called using four different methods: MACS, SISSRs, GEM and PICS. Moreover, peaks for the same factor and peak calling method, albeit using different experiment conditions (cell line, treatment, etc.), were merged into clusters. To reduce noise, such clusters for different peak calling methods were merged into meta-clusters; these were considered to be non-redundant TFBS sets. Moreover, extended quality control was applied to all ChIP-seq data. Web interface to access GTRD was developed using the BioUML platform. It provides browsing and displaying information, advanced search possibilities and an integrated genome browser.",2019-01-01 +30742459,"""Development of a self-care assessment for psychologists"": Correction to Dorociak (2017).","Reports an error in ""Development of the Professional Self-Care Scale"" by Katherine E. Dorociak, Patricia A. Rupert, Fred B. Bryant and Evan Zahniser (Journal of Counseling Psychology, 2017[Apr], Vol 64[3], 325-334). In the article ""Development of a Self-Care Assessment for Psychologists"" by Katherine E. Dorociak, Patricia A. Rupert, Fred B. Bryant, and Evan Zahniser (Journal of Counseling Psychology, 2017, Vol. 64, No. 3, 325-334. http://dx.doi.org/10.1037/cou0000206), the author note has been updated with the following acknowledgment: ""Some of the general ideas presented in this article were also explored in the primary author's undergraduate level senior thesis project, advised by Drs. Greg Neimeyer and Jennifer Taylor, at the University of Florida."" Also, Loyola University Chicago has been added to note the location of the primary author's Master's thesis project on which certain ideas and data presented in this article were based. As part of this correction, the name of the scale described in the article was changed from Professional Self-Care Scale (PSCS) to Self-Care Assessment for Psychologists (SCAP). To reflect the new scale name, the article title was also changed from ""Development of the Professional Self-Care Scale,"" to ""Development of a Self-Care Assessment for Psychologists."" The online version of this article has been corrected. (The following abstract of the original article appeared in record 2017-10882-001.) In recent years, there has been an increased emphasis on the importance of self-care for psychologists and other mental health professionals. With the growth of positive psychology and preventive medicine, self-care is an emerging topic, promulgated as a means of avoiding the adverse effects of stress and promoting professional functioning and well-being. However, the research on self-care is limited because of the lack of an empirically based, psychometrically sound measure of this construct. Thus, the purpose of this project was to develop a measure of professional self-care. Professional psychologists were the focus of study, with the goal being to develop a measure that can be used in this population and similar groups of professionals. Based on expert feedback and a preliminary study of 422 licensed psychologists in Illinois, a 5-factor, 21-item scale was created. Factor analysis identified the following self-care factors: Professional Support, Professional Development, Life Balance, Cognitive Awareness, and Daily Balance. Preliminary analyses provided initial support for the validity of the 5 factors. A follow-up study was conducted with a second sample of clinical psychologists. The 5-factor structure provided a good fit to the data with the second sample. Thus, based on factor analysis and validity data, a 5-factor, 21-item Professional Self-Care Scale was established for further study and use in future research. (PsycINFO Database Record (c) 2019 APA, all rights reserved).",2019-02-11 +31580400,VikNGS: a C++ variant integration kit for next generation sequencing association analysis.,"Summary: Integration of next generation sequencing data (NGS) across different research studies can improve the power of genetic association testing by increasing sample size and can obviate the need for sequencing controls. If differential genotype uncertainty across studies is not accounted for, combining datasets can produce spurious association results. We developed the Variant Integration Kit for NGS (VikNGS), a fast cross-platform software package, to enable aggregation of several datasets for rare and common variant genetic association analysis of quantitative and binary traits with covariate adjustment. VikNGS also includes a graphical user interface, power simulation functionality and data visualization tools.

Availability and implementation: The VikNGS package can be downloaded at http://www.tcag.ca/tools/index.html.

Supplementary information: Supplementary data are available at Bioinformatics online.",2020-02-01 +25534749,EssOilDB: a database of essential oils reflecting terpene composition and variability in the plant kingdom.,"Plant essential oils are complex mixtures of volatile organic compounds, which play indispensable roles in the environment, for the plant itself, as well as for humans. The potential biological information stored in essential oil composition data can provide an insight into the silent language of plants, and the roles of these chemical emissions in defense, communication and pollinator attraction. In order to decipher volatile profile patterns from a global perspective, we have developed the ESSential OIL DataBase (EssOilDB), a continually updated, freely available electronic database designed to provide knowledge resource for plant essential oils, that enables one to address a multitude of queries on volatile profiles of native, invasive, normal or stressed plants, across taxonomic clades, geographical locations and several other biotic and abiotic influences. To our knowledge, EssOilDB is the only database in the public domain providing an opportunity for context based scientific research on volatile patterns in plants. EssOilDB presently contains 123 041 essential oil records spanning a century of published reports on volatile profiles, with data from 92 plant taxonomic families, spread across diverse geographical locations all over the globe. We hope that this huge repository of VOCs will facilitate unraveling of the true significance of volatiles in plants, along with creating potential avenues for industrial applications of essential oils. We also illustrate the use of this database in terpene biology and show how EssOilDB can be used to complement data from computational genomics to gain insights into the diversity and variability of terpenoids in the plant kingdom. EssOilDB would serve as a valuable information resource, for students and researchers in plant biology, in the design and discovery of new odor profiles, as well as for entrepreneurs--the potential for generating consumer specific scents being one of the most attractive and interesting topics in the cosmetic industry. Database URL: http://nipgr.res.in/Essoildb/",2014-12-22 +31353404,Mammalian Annotation Database for improved annotation and functional classification of Omics datasets from less well-annotated organisms. ,"Next-generation sequencing technologies and the availability of an increasing number of mammalian and other genomes allow gene expression studies, particularly RNA sequencing, in many non-model organisms. However, incomplete genome annotation and assignments of genes to functional annotation databases can lead to a substantial loss of information in downstream data analysis. To overcome this, we developed Mammalian Annotation Database tool (MAdb, https://madb.ethz.ch) to conveniently provide homologous gene information for selected mammalian species. The assignment between species is performed in three steps: (i) matching official gene symbols, (ii) using ortholog information contained in Ensembl Compara and (iii) pairwise BLAST comparisons of all transcripts. In addition, we developed a new tool (AnnOverlappeR) for the reliable assignment of the National Center for Biotechnology Information (NCBI) and Ensembl gene IDs. The gene lists translated to gene IDs of well-annotated species such as a human can be used for improved functional annotation with relevant tools based on Gene Ontology and molecular pathway information. We tested the MAdb on a published RNA-seq data set for the pig and showed clearly improved overrepresentation analysis results based on the assigned human homologous gene identifiers. Using the MAdb revealed a similar list of human homologous genes and functional annotation results regardless of whether starting with gene IDs from NCBI or Ensembl. The MAdb database is accessible via a web interface and a Galaxy application.",2019-01-01 +31533900,Drug response prediction by ensemble learning and drug-induced gene expression signatures.,"Chemotherapeutic response of cancer cells to a given compound is one of the most fundamental information one requires to design anti-cancer drugs. Recently, considerable amount of drug-induced gene expression data has become publicly available, in addition to cytotoxicity databases. These large sets of data provided an opportunity to apply machine learning methods to predict drug activity. However, due to the complexity of cancer drug mechanisms, none of the existing methods is perfect. In this paper, we propose a novel ensemble learning method to predict drug response. In addition, we attempt to use the drug screen data together with two novel signatures produced from the drug-induced gene expression profiles of cancer cell lines. Finally, we evaluate predictions by in vitro experiments in addition to the tests on data sets. The predictions of the methods, the signatures and the software are available from http://mtan.etu.edu.tr/drug-response-prediction/.",2018-07-06 +31016217,A dataset of meta-analyses on crop diversification at the global scale.,"Numerous meta-analyses have been conducted in the last three decades to assess the productive and environmental benefits resulting from a diversification of cropping systems. These meta-analyses assessed one or several diversification strategies (e.g., rotations, cover crops, agroforestry) according to various outcomes (e.g., productivity, profitability, biodiversity). To date, no dataset has provided a comprehensive synthesis of existing experimental data on crop diversification. We present here a dataset containing 2382 effect sizes published in 99 meta-analyses covering 3736 experimental studies worldwide (https://figshare.com/s/c15a93e96c95f89ddd89). We also provide an extensive appraisal of the quality of each meta-analysis and a quantification of the redundancy of primary studies between meta-analyses. Our database hence provides (i) a quantification of the impacts of a variety of diversification strategies on crop production, the environment and economic profitability at the global scale and, (ii) a quality and redundancy assessment that may be used as a reference for future studies.",2019-04-04 +31348732,Clinical Outcomes Following Language-Specific Attention Treatment Versus Direct Attention Training for Aphasia: A Comparative Effectiveness Study.,"Purpose This study was conducted to examine the comparative effectiveness of 2 different approaches, 1 domain-specific and the other domain-general, to language and attention rehabilitation in participants with stroke-induced aphasia. The domain-specific treatment consisted of language-specific attention treatment (L-SAT), and the domain-general treatment consisted of direct attention training (DAT) using the computerized exercises included in Attention Process Training-3 (Sohlberg & Mateer, 2010). Method Four individuals with mild-moderate aphasia participated in this study. A randomized controlled cross-over single-subject design was used to assess the effectiveness of the 2 treatments administered in this study. Treatment outcomes were evaluated in terms of participants' task performance for each program, standardized language and attention measures, tests of functional abilities, and patient-reported outcomes. Results Visual comparisons demonstrated linear improvements following L-SAT and variable patterns following DAT. Omnibus effect sizes were statistically significant for 9 of the 13 L-SAT tasks. The weighted standardized effect sizes for posttreatment changes following L-SAT ranged from small to large, with the exception of 1 task. The average group gain following DAT was 5%. The Western Aphasia Battery-Revised Aphasia Quotients (Kertesz, 2007) demonstrated reliable improvements for 3 of the 4 participants following L-SAT, whereas only 1 of the participants improved reliably following DAT. The margins of improvements in functional language were substantially larger following L-SAT than DAT. Performance on the Test of Everyday Attention improved significantly for 2 participants following L-SAT and for 1 participant following DAT on selected Test of Everyday Attention (Robertson, Ward, Ridgeway, & Nimmo-Smith, 1994) subtests. Patient-reported outcomes for communication and attention following treatment favored L-SAT compared to DAT. Conclusions The results support the view that attention is allocated in ways that are particular to specific tasks rather than as a general resource that is allocated equivalently to all processing tasks. Domain-specific treatment for language deficits due to attentional impairment appears to be a suitable, if not preferable, approach for aphasia rehabilitation. Supplemental Material https://doi.org/10.23641/asha.8986427.",2019-07-25 +27037912,GAMDB: a web resource to connect microRNAs with autophagy in gerontology.,"

Objectives

MicroRNAs (miRNAs) are endogenous ~23 nucleotides (nt) RNAs, regulating gene expression by pairing to the mRNAs of protein-coding genes to direct their post-transcriptional repression. Both in normal and aberrant activities, miRNAs contribute to a recurring paradigm of cellular behaviors in pathological settings, especially in gerontology. Autophagy, a multi-step lysosomal degradation process with function to degrade long-lived proteins and damaged organelles, has significant impact on gerontology. Thus, elucidating how miRNAs participate in autophagy may enlarge the scope of miRNA in autophagy and facilitate researches in gerontology.

Materials and methods

Herein, based upon the published studies, predicted targets and gerontology-related diseases, we constructed a web resource named Gerontology-Autophagic-MicroRNA Database (GAMDB) (http://gamdb.liu-lab.com/index.php), which contained 836 autophagy-related miRNAs, 197 targeted genes/proteins and 56 aging-related diseases such as Parkinson' disease, Alzheimer's disease and Huntington's disease.

Results and conclusion

We made use of large amounts of data to elucidate the intricate relationships between microRNA-regulated autophagic mechanisms and gerontology. This database will facilitate better understanding of autophagy regulation network in gerontology and thus promoting gerontology-related therapy in the future.",2016-03-31 +,S171. ALTERED WHITE MATTER CONNECTIVITY IN PATIENTS WITH SCHIZOPHRENIA USING PUBLIC NEUROIMAGING DATA FROM SCHIZCONNECT,"Abstract

Background

Several studies have produced a large body of evidence for white matter abnormalities related to schizophrenia. The literature has yet to achieve a state of consistency and reproducibility, and reported low integrity of white matter tracts vary between studies. Whole brain image study with large sample size is needed to address this issue. We investigated white matter integrity in connections between regions of interests (ROI) in the same hemisphere in patients with schizophrenia and healthy controls with public neuroimaging data from SchizConnect (http://schizconnect.org).

Methods

A final data set was consisted of 129 healthy controls and 122 schizophrenia patients. For each diffusion weighted image (DWI), a two-tensor full-brain tractography was performed, and DWI images were parcellated by processing and registering the T1 images with FreeSurfer and the Advanced Normalization Tools. We extracted a total of 36 tracts in the both hemisphere connecting ROIs in the same hemisphere with white matter query language. We compared means of diffusion measures between patients and controls, and evaluated correlations with Letter-number sequencing (LNS) test, Vocabulary test, letter fluency test, category fluency test, and trails A of the Trail Making Test (TMT). The Benjamini-Hochberg procedure with false discovery rate (FDR) of 0.05 was used to correct for multiple comparisons.

Results

We found a significant RD and TR increase of the left thalamo-occipital tracts and the right uncinate fascicle (UF), and a significant RD increase of the right middle longitudinal fascicle (MDLF), and the right superior longitudinal fascicle (SLF) ii in schizophrenia. There were correlations between the TR in the left thalamo-occipital tracts and letter fluency test, and the RD in the right SLF ii and LNS test, which did not survive after correction for multiple comparisons.

Discussion

These results indicate widespread abnormalities of white matter fiber tracts in schizophrenia, contributing to the pathophysiology of schizophrenia.",2018-04-01 +32610158,PrGeFNE: Predicting disease-related genes by fast network embedding.,"Identifying disease-related genes is of importance for understanding of molecule mechanisms of diseases, as well as diagnosis and treatment of diseases. Many computational methods have been proposed to predict disease-related genes, but how to make full use of multi-source biological data to enhance the ability of disease-gene prediction is still challenging. In this paper, we proposed a novel method for predicting disease-related genes by using fast network embedding (PrGeFNE), which can integrate multiple types of associations related to diseases and genes. Specifically, we first constructed a heterogeneous network by using phenotype-disease, disease-gene, protein-protein and gene-GO associations; and low-dimensional representation of nodes is extracted from the network by using a fast network embedding algorithm. Then, a dual-layer heterogeneous network was reconstructed by using the low-dimensional representation, and a network propagation was applied to the dual-layer heterogeneous network to predict disease-related genes. Through cross-validation and newly added-association validation, we displayed the important roles of different types of association data in enhancing the ability of disease-gene prediction, and confirmed the excellent performance of PrGeFNE by comparing to state-of-the-art algorithms. Furthermore, we developed a web tool that can facilitate researchers to search for candidate genes of different diseases predicted by PrGeFNE, along with the enrichment analysis of GO and pathway on candidate gene set. This may be useful for investigation of diseases' molecular mechanisms as well as their experimental validations. The web tool is available at http://bioinformatics.csu.edu.cn/prgefne/.",2020-06-28 +32871558,Considerations for Use of Hematopoietic Growth Factors in Patients With Cancer Related to the COVID-19 Pandemic.,"Hematopoietic growth factors, including erythrocyte stimulating agents (ESAs), granulocyte colony-stimulating factors, and thrombopoietin mimetics, can mitigate anemia, neutropenia, and thrombocytopenia resulting from chemotherapy for the treatment of cancer. In the context of pandemic SARS-CoV-2 infection, patients with cancer have been identified as a group at high risk of morbidity and mortality from this infection. Our subcommittee of the NCCN Hematopoietic Growth Factors Panel convened a voluntary group to review the potential value of expanded use of such growth factors in the current high-risk environment. Although recommendations are available on the NCCN website in the COVID-19 Resources Section (https://www.nccn.org/covid-19/), these suggestions are provided without substantial context or reference. Herein we review the rationale and data underlying the suggested alterations to the use of hematopoietic growth factors for patients with cancer in the COVID-19 era.",2020-09-01 +30137231,Increased sensitivity with automated validation of XL-MS cleavable peptide crosslinks.,"MOTIVATION:Peptides crosslinked with cleavable chemical crosslinkers are identified with mass spectrometry by independent database search of spectra associated with the two linked peptides. A major challenge is to combine together the evidence of the two peptides into an overall assessment of the two-peptide crosslink. RESULTS:Here, we describe software that models crosslink specific information to automatically validate XL-MS cleavable peptide crosslinks. Using a dataset of crosslinked protein mixtures, we demonstrate that it computes accurate and highly discriminating probabilities, enabling as many as 75% more identifications than was previously possible using only search scores and a predictable false discovery rate. AVAILABILITY AND IMPLEMENTATION:XLinkProphet software is freely available on the web at http://brucelab.gs.washington.edu. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-03-01 +29878154,TAM 2.0: tool for MicroRNA set analysis.,"With the rapid accumulation of high-throughput microRNA (miRNA) expression profile, the up-to-date resource for analyzing the functional and disease associations of miRNAs is increasingly demanded. We here describe the updated server TAM 2.0 for miRNA set enrichment analysis. Through manual curation of over 9000 papers, a more than two-fold growth of reference miRNA sets has been achieved in comparison with previous TAM, which covers 9945 and 1584 newly collected miRNA-disease and miRNA-function associations, respectively. Moreover, TAM 2.0 allows users not only to test the functional and disease annotations of miRNAs by overrepresentation analysis, but also to compare the input de-regulated miRNAs with those de-regulated in other disease conditions via correlation analysis. Finally, the functions for miRNA set query and result visualization are also enabled in the TAM 2.0 server to facilitate the community. The TAM 2.0 web server is freely accessible at http://www.scse.hebut.edu.cn/tam/ or http://www.lirmed.com/tam2/.",2018-07-01 +26851225,GED: a manually curated comprehensive resource for epigenetic modification of gametogenesis.,"Reproductive infertility affects seventh of couples, which is most attributed to the obstacle of gametogenesis. Characterizing the epigenetic modification factors involved in gametogenesis is fundamental to understand the molecular mechanisms and to develop treatments for human infertility. Although the genetic factors have been implicated in gametogenesis, no dedicated bioinformatics resource for gametogenesis is available. To elucidate the relationship of epigenetic modification and mammalian gametogenesis, we developed a new database, gametogenesis epigenetic modification database (GED), a manually curated database, which aims at providing a comprehensive resource of epigenetic modification of gametogenesis. The database integrates three kinds information of epigenetic modifications during gametogenesis (DNA methylation, histone modification and RNA regulation), and the gametogenesis has been detailed as 16 stages in seven mammal species (Homo sapiens, Mus musculus, Rattus norvegicus, Sus scrofa, Bos taurus, Capra hircus and Ovis aries). Besides, we have predicted the linear pathways of epigenetic modification which were composed of 211 genes/proteins and microRNAs that were involved in gametogenesis. GED is a user-friendly Web site, through which users can obtain the comprehensive epigenetic factor information and molecular pathways by visiting our database freely. GED is free available at http://gametsepi.nwsuaflmz.com.",2016-02-05 +27079421,A Network of Splice Isoforms for the Mouse.,"The laboratory mouse is the primary mammalian species used for studying alternative splicing events. Recent studies have generated computational models to predict functions for splice isoforms in the mouse. However, the functional relationship network, describing the probability of splice isoforms participating in the same biological process or pathway, has not yet been studied in the mouse. Here we describe a rich genome-wide resource of mouse networks at the isoform level, which was generated using a unique framework that was originally developed to infer isoform functions. This network was built through integrating heterogeneous genomic and protein data, including RNA-seq, exon array, protein docking and pseudo-amino acid composition. Through simulation and cross-validation studies, we demonstrated the accuracy of the algorithm in predicting isoform-level functional relationships. We showed that this network enables the users to reveal functional differences of the isoforms of the same gene, as illustrated by literature evidence with Anxa6 (annexin a6) as an example. We expect this work will become a useful resource for the mouse genetics community to understand gene functions. The network is publicly available at: http://guanlab.ccmb.med.umich.edu/isoformnetwork.",2016-04-15 +32323374,Energetic contributions of amino acid residues and its cross-talk to delineate ligand-binding mechanism.,"Receptor-based QSAR approaches can enumerate the energetic contributions of amino acid residues toward ligand binding only when experimental binding affinity is associated. The structural data of protein-ligand complexes are witnessing a tremendous growth in the Protein Data Bank deposited with a few entries on binding affinity. We present here a new approach to compute the Energetic CONTributions of Amino acid residues and its possible Cross-Talk (ECONTACT) to study ligand binding using per-residue energy decomposition, molecular dynamics simulations and rescoring method without the need for experimental binding affinity. This approach recognizes potential cross-talks among amino acid residues imparting a nonadditive effect to the binding affinity with evidence of correlative motions in the dynamics simulations. The protein-ligand interaction energies deduced from multiple structures are decomposed into per-residue energy terms, which are employed as variables to principal component analysis and generated cross-terms. Out of 16 cross-talks derived from eight datasets of protein-ligand systems, the ECONTACT approach is able to associate 10 potential cross-talks with site-directed mutagenesis, free energy, and dynamics simulations data strongly. We modeled these key determinants of ligand binding using joint probability density function (jPDF) to identify cross-talks in protein structures. The top two cross-talks identified by ECONTACT approach corroborated with the experimental findings. Furthermore, virtual screening exercise using ECONTACT models better discriminated known inhibitors from decoy molecules. This approach proposes the jPDF metric to estimate the probability of observing cross-talks in any protein-ligand complex. The source code and related resources to perform ECONTACT modeling is available freely at https://www.gujaratuniversity.ac.in/econtact/.",2020-05-01 +32760584,"Stroke care and outcomes in the Department of Neurology in Parakou, Benin: Retrospective cohort study.","

Introduction

Stroke is one of the most common causes of high mortality rates in Africa with many unknown aspects around its prognosis. In this study we aim to describe stroke characteristics and in-hospital mortality of stroke in Parakou.

Methods

This is a retrospective cohort study including all stroke patients admitted to the Department of Neurology at Parakou Teaching Hospital from January 1, 2013 through to December 31, 2019. Clinical data, vascular risk factors, stroke subtype and outcome data were recorded. The in-hospital case-fatality and its associated factors were determined. The study was approved by the Local Ethics Committee of Biomedical research and has been registered under the unique indentifying number researchregistry5687 and is available at https://www.researchregistry.com/browse-the-registry#home/.

Results

Stroke cases represented 51.5% of all patients. There were 372 patients included in the study with a mean age of 58.2 ± 14.2 years. The sex ratio was 1:3. Ischemic stroke accounted for 40.3%, intracerebral hemorrhage 30.4%, and unknown 29.3%. The main vascular risk factors were hypertension (69.1%), alcoholism (23.9%) and diabetes mellitus (16.9%). The mean NIHSS at admission was 9.4 ± 5.7 and the length of hospital stay was 9.0 ± 7.3. The most common complications recorded during the acute phase were swallowing disorders (10.2%), pneumonia (9.1%) and urinary tract infections (8.3%). The in-hospital case fatality was 6.2% and was associated with loss of consciousness (p = 0.0001), high NIHSS on admission (p = 0.001), fever (p = 0.0001), swallowing disorders (p = 0.001) and leukocytosis (p = 0.021). On discharge, 27.6% were independent and 97.8% were on antihypertensive drugs.

Conclusion

The in-hospital stroke mortality was close to that reported by other studies in Africa.",2020-07-28 +32108861,PSORTm: a bacterial and archaeal protein subcellular localization prediction tool for metagenomics data.,"MOTIVATION:Many methods for microbial protein subcellular localization (SCL) prediction exist; however, none is readily available for analysis of metagenomic sequence data, despite growing interest from researchers studying microbial communities in humans, agri-food relevant organisms and in other environments (e.g. for identification of cell-surface biomarkers for rapid protein-based diagnostic tests). We wished to also identify new markers of water quality from freshwater samples collected from pristine versus pollution-impacted watersheds. RESULTS:We report PSORTm, the first bioinformatics tool designed for prediction of diverse bacterial and archaeal protein SCL from metagenomics data. PSORTm incorporates components of PSORTb, one of the most precise and widely used protein SCL predictors, with an automated classification by cell envelope. An evaluation using 5-fold cross-validation with in silico-fragmented sequences with known localization showed that PSORTm maintains PSORTb's high precision, while sensitivity increases proportionately with metagenomic sequence fragment length. PSORTm's read-based analysis was similar to PSORTb-based analysis of metagenome-assembled genomes (MAGs); however, the latter requires non-trivial manual classification of each MAG by cell envelope, and cannot make use of unassembled sequences. Analysis of the watershed samples revealed the importance of normalization and identified potential biomarkers of water quality. This method should be useful for examining a wide range of microbial communities, including human microbiomes, and other microbiomes of medical, environmental or industrial importance. AVAILABILITY AND IMPLEMENTATION:Documentation, source code and docker containers are available for running PSORTm locally at https://www.psort.org/psortm/ (freely available, open-source software under GNU General Public License Version 3). SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +32702119,IDP-Seq2Seq: identification of intrinsically disordered regions based on sequence to sequence learning.,"

Motivation

Related to many important biological functions, intrinsically disordered regions (IDRs) are widely distributed in proteins. Accurate prediction of IDRs is critical for the protein structure and function analysis. However, the existing computational methods construct the predictive models solely in the sequence space, failing to convert the sequence space into the 'semantic space' to reflect the structure characteristics of proteins. Furthermore, although the length-dependent predictors showed promising results, new fusion strategies should be explored to improve their predictive performance and the generalization.

Results

In this study, we applied the Sequence to Sequence Learning (Seq2Seq) derived from natural language processing (NLP) to map protein sequences to 'semantic space' to reflect the structure patterns with the help of predicted residue-residue contacts (CCMs) and other sequence-based features. Furthermore, the Attention mechanism was used to capture the global associations between all residue pairs in the proteins. Three length-dependent predictors were constructed: IDP-Seq2Seq-L for long disordered region prediction, IDP-Seq2Seq-S for short disordered region prediction and IDP-Seq2Seq-G for both long and short disordered region predictions. Finally, these three predictors were fused into one predictor called IDP-Seq2Seq to improve the discriminative power and generalization. Experimental results on four independent test datasets and the CASP test dataset showed that IDP-Seq2Seq is insensitive with the ratios of long and short disordered regions and outperforms other competing methods.

Availability and implementation

For the convenience of most experimental scientists, a user-friendly and publicly accessible web-server for the powerful new predictor has been established at http://bliulab.net/IDP-Seq2Seq/. It is anticipated that IDP-Seq2Seq will become a very useful tool for identification of IDRs.

Supplementary information

Supplementary data are available at Bioinformatics online.",2021-01-01 +30196719,[The role of endoscopy registries in quality health care. The first data from the Hungarian Endoscopic Retrograde Cholangiopancreatography (ERCP) Registry].,"

Introduction

The continuous monitoring of quality indicators in gastrointestinal endoscopy has become an essential requirement nowadays. Most of these data cannot be extracted from the currently used free text reports, therefore a structured web-based data-collecting system was developed to record the indicators of pancreatobiliary endoscopy.

Aim

A structured data-collecting system, the ERCP Registry, was initiated to monitor endoscopic retrograde cholangiopancreatography (ERCP) examinations prospectively, and to verify its usability.

Method

From January 2017, all ERCPs performed at the First Department of Medicine, University of Pécs, have been registered in the database. In the first year, the detailed data of 595 examinations were entered into the registry. After processing these data, the testing period of the registry is now finished.

Results

On 447 patients, 595 ERCPs were performed. The success rate of cannulation is 93.8% if all cases are considered. Difficult biliary access was noted in 32.1% of patients with native papilla, and successful cannulation was achieved in 81.0% of these cases during the first procedure. Post-ERCP pancreatitis was observed in 13 cases (2.2%), clinically significant post-papillotomy bleeding was registered in 2 cases (0.3%), while 27 patients (4.5%) developed temporary hypoxia during the procedure. 30-day follow-up was successful in 75.5% of the cases to detect late complications. All of the quality indicators determined by the American Society of Gastrointestinal Endoscopy (ASGE) were possible to monitor with the help of the registry. Our center already complies with most of these criteria.

Conclusions

Continuous monitoring of the quality indicators of endoscopic interventions are not supported by the current hospital information system but it became possible with our registry. The ERCP Registry is a suitable tool to detect the quality of patient care and also useful for clinical research. Several endoscopy units have joined already this initiative and it is open for further centres through our web page ( https://tm-centre.org/hu/regiszterek/ercp-regiszter/ ). Orv Hetil. 2018; 159(37): 1506-1515.",2018-09-01 +33335901,DeepCSO: A Deep-Learning Network Approach to Predicting Cysteine S-Sulphenylation Sites.,"Cysteine S-sulphenylation (CSO), as a novel post-translational modification (PTM), has emerged as a potential mechanism to regulate protein functions and affect signal networks. Because of its functional significance, several prediction approaches have been developed. Nevertheless, they are based on a limited dataset from Homo sapiens and there is a lack of prediction tools for the CSO sites of other species. Recently, this modification has been investigated at the proteomics scale for a few species and the number of identified CSO sites has significantly increased. Thus, it is essential to explore the characteristics of this modification across different species and construct prediction models with better performances based on the enlarged dataset. In this study, we constructed several classifiers and found that the long short-term memory model with the word-embedding encoding approach, dubbed LSTM WE , performs favorably to the traditional machine-learning models and other deep-learning models across different species, in terms of cross-validation and independent test. The area under the receiver operating characteristic (ROC) curve for LSTM WE ranged from 0.82 to 0.85 for different organisms, which was superior to the reported CSO predictors. Moreover, we developed the general model based on the integrated data from different species and it showed great universality and effectiveness. We provided the on-line prediction service called DeepCSO that included both species-specific and general models, which is accessible through http://www.bioinfogo.org/DeepCSO.",2020-12-01 +31792500,Identification and comprehensive characterization of lncRNAs with copy number variations and their driving transcriptional perturbed subpathways reveal functional significance for cancer.,"Numerous studies have shown that copy number variation (CNV) in lncRNA regions play critical roles in the initiation and progression of cancer. However, our knowledge about their functionalities is still limited. Here, we firstly provided a computational method to identify lncRNAs with copy number variation (lncRNAs-CNV) and their driving transcriptional perturbed subpathways by integrating multidimensional omics data of cancer. The high reliability and accuracy of our method have been demonstrated. Then, the method was applied to 14 cancer types, and a comprehensive characterization and analysis was performed. LncRNAs-CNV had high specificity in cancers, and those with high CNV level may perturb broad biological functions. Some core subpathways and cancer hallmarks widely perturbed by lncRNAs-CNV were revealed. Moreover, subpathways highlighted the functional diversity of lncRNAs-CNV in various cancers. Survival analysis indicated that functional lncRNAs-CNV could be candidate prognostic biomarkers for clinical applications, such as ST7-AS1, CDKN2B-AS1 and EGFR-AS1. In addition, cascade responses and a functional crosstalk model among lncRNAs-CNV, impacted genes, driving subpathways and cancer hallmarks were proposed for understanding the driving mechanism of lncRNAs-CNV. Finally, we developed a user-friendly web interface-LncCASE (http://bio-bigdata.hrbmu.edu.cn/LncCASE/) for exploring lncRNAs-CNV and their driving subpathways in various cancer types. Our study identified and systematically characterized lncRNAs-CNV and their driving subpathways and presented valuable resources for investigating the functionalities of non-coding variations and the mechanisms of tumorigenesis.",2020-12-01 +31769676,ROBOKOP KG and KGB: Integrated Knowledge Graphs from Federated Sources.,"A proliferation of data sources has led to the notional existence of an implicit Knowledge Graph (KG) that contains vast amounts of biological knowledge contributed by distributed Application Programming Interfaces (APIs). However, challenges arise when integrating data across multiple APIs due to incompatible semantic types, identifier schemes, and data formats. We present ROBOKOP KG ( http://robokopkg.renci.org ), which is a KG that was initially built to support the open biomedical question-answering application, ROBOKOP (Reasoning Over Biomedical Objects linked in Knowledge-Oriented Pathways) ( http://robokop.renci.org ). Additionally, we present the ROBOKOP Knowledge Graph Builder (KGB), which constructs the KG and provides an extensible framework to handle graph query over and integration of federated data sources.",2019-12-12 +29788498,CellAtlasSearch: a scalable search engine for single cells.,"Owing to the advent of high throughput single cell transcriptomics, past few years have seen exponential growth in production of gene expression data. Recently efforts have been made by various research groups to homogenize and store single cell expression from a large number of studies. The true value of this ever increasing data deluge can be unlocked by making it searchable. To this end, we propose CellAtlasSearch, a novel search architecture for high dimensional expression data, which is massively parallel as well as light-weight, thus infinitely scalable. In CellAtlasSearch, we use a Graphical Processing Unit (GPU) friendly version of Locality Sensitive Hashing (LSH) for unmatched speedup in data processing and query. Currently, CellAtlasSearch features over 300 000 reference expression profiles including both bulk and single-cell data. It enables the user query individual single cell transcriptomes and finds matching samples from the database along with necessary meta information. CellAtlasSearch aims to assist researchers and clinicians in characterizing unannotated single cells. It also facilitates noise free, low dimensional representation of single-cell expression profiles by projecting them on a wide variety of reference samples. The web-server is accessible at: http://www.cellatlassearch.com.",2018-07-01 +32071981,"Data for teenagers' stressor, mental health, coping style, social support, parenting style and self-efficacy in South China.","Data provided in this article were collected from 3784 high school students in South China, which measured teenagers' stressor (Stressors Scale for Middle School Students, SSMSS), mental health (Symptom Check-List 90, SCL90), coping style (Simplified Coping Style Questionnaire, SCSQ), social support (Social Support Scale, SSS), parenting style (Egna Minnen av Barndoms Uppforstran-own memories of parental rearing practice in childhood, EMBU) and self-efficacy (General Self-Efficacy Scale, GSES). All the instruments for data collection were in the Chinese version. Participants were 3784 students recruited from 15 high schools in Shenzhen, Guangdong Province of South China with random cluster sampling method. Among them, there were 1987 boys and 1797 girls, with an average age of 14.6 and a standard deviation of 1.82. In addition, a.csv file consists of all the variables and questionnaires we used (both in Chinese and in English) are included as a supplementary material. For a discussion of the major finding based on the data please see the article which used a part of questionnaires and participants we supplied in the data set: The relationship between high school students' social support and coping styles: The mediating role of self-efficacy (https://doi.org/10.3969/j.issn.1007-3728.2014.10.016) [1].",2020-01-25 +32309524,Dataset for the combined transcriptome assembly of M. oleifera and functional annotation.,"In this paper, we present the data acquired during transcriptome analysis of the plant Moringa oleifera [1] from five different tissues (root, stem, leaf, flower and seed) by RNA sequencing. A total of 271 million reads were assembled with an N50 of 2094 bp. The combined transcriptome was assessed for transcript abundance across five tissues. The protein coding genes identified from the transcripts were annotated and used for orthology analysis. Further, enzymes involved in the biosynthesis of select medicinally important secondary metabolites, vitamins and ion transporters were identified and their expression levels across tissues were examined. The data generated by RNA sequencing has been deposited to NCBI public repository under the accession number PRJNA394193 (https://www.ncbi.nlm.nih.gov/bioproject/PRJNA394193).",2020-03-20 +29518231,Understanding the glycome: an interactive view of glycosylation from glycocompositions to glycoepitopes.,"Nowadays, due to the advance of experimental techniques in glycomics, large collections of glycan profiles are regularly published. The rapid growth of available glycan data accentuates the lack of innovative tools for visualizing and exploring large amount of information. Scientists resort to using general-purpose spreadsheet applications to create ad hoc data visualization. Thus, results end up being encoded in publication images and text, while valuable curated data is stored in files as supplementary information. To tackle this problem, we have built an interactive pipeline composed with three tools: Glynsight, EpitopeXtractor and Glydin'. Glycan profile data can be imported in Glynsight, which generates a custom interactive glycan profile. Several profiles can be compared and glycan composition is integrated with structural data stored in databases. Glycan structures of interest can then be sent to EpitopeXtractor to perform a glycoepitope extraction. EpitopeXtractor results can be superimposed on the Glydin' glycoepitope network. The network visualization allows fast detection of clusters of glycoepitopes and discovery of potential new targets. Each of these tools is standalone or can be used in conjunction with the others, depending on the data and the specific interest of the user. All the tools composing this pipeline are part of the Glycomics@ExPASy initiative and are available at https://www.expasy.org/glycomics.",2018-06-01 +31236209,The research crisis in American institutions of complementary and integrative health: one proposed solution for chiropractic profession.,"A crisis confronts the Complementary and Integrative Health (CIH) teaching institutions in the US. Research infrastructure is needed to build and sustain productive research programs and retain their own research faculty. In most health professions, this infrastructure is largely built through research grants. In CIH, most educational institutions are funded through student tuition, which has historically also had to be the source for building their research programs. Only a limited number of these institutions have emerged as National Institute of Health (NIH) grant-funded programs. As a result, the American chiropractic institutions have seen a retrenchment in the number of active research programs. In addition, although research training programs e.g., NIH's K awards are available for CIH researchers, these programs generally result in these researchers leaving their institutions and depriving future CIH practitioners of the benefit of being trained in a culture of research. One proposed solution is to leverage the substantial research infrastructure and long history of collaboration available at the RAND Corporation (https://www.rand.org) This article presents the proposed five components of the RAND Center for Collaborative CIH Research and the steps required to bring it to being: 1) the CIH Research Network - an online resource and collaborative site for CIH researchers; 2) the CIH Research Advisory Board - the governing body for the Center selected by its members; 3) the RAND CIH Interest Group - a group of RAND researchers with an interest in and who could provide support to CIH research; 4) CIH Researcher Training - access to existing RAND research training as well as the potential for the Center to provide a research training home for those with training grants; and 5) CIH RAND Partnership for Research - a mentorship program to support successful CIH research. By necessity the first step in the Center's creation would be a meeting between the heads of interested CIH institutions to work out the details and to obtain buy-in. The future success of CIH-directed research on CIH will require a pooling of talent and resources across institutions; something that the American chiropractic institutions have not yet been able to achieve. This article discusses one possible solution.",2019-06-17 +31714792,ImageDataExtractor: A Tool To Extract and Quantify Data from Microscopy Images.,"The rise of data science is leading to new paradigms in data-driven materials discovery. This carries an essential notion that large data sources containing chemical structure and property information can be mined in a fashion that detects and exploits structure-property relationships, such that chemicals can be predicted to suit a given material application. The success of material predictions is predicated on these large data sources of chemical structure and property information being suited to a target application. Microscopy is commonly used to characterize chemical structure, especially in fields such as nanotechnology where material properties are highly dependent on the size and shape of nanoparticles. Large data sources of nanoparticle information stemming from microscopy images would thus be highly beneficial. Millions of microscopy images exist, but they lie fragmented across the literature, typically presented individually within a paper article and usually in a qualitative fashion therein, even though they harbor a wealth of numeric information. We present the ImageDataExtractor toolkit that autoidentifies and autoextracts microscopy images from scientific documents, whereupon it autonomously analyzes each image to produce quantitative particle size and shape information about its subject material. Each image is quantified by decoding its scale bar information using optical character recognition, with help from super-resolution convolutional neural networks where required. Individual particles are detected and profiled using various thresholding, segmentation, polygon fitting, and edge correction routines. The high-throughput operational capability of ImageDataExtractor means that it can be used to generate large-data sources of particle information for data-driven materials discovery. Evaluation metrics, precision and recall, are greater than 80% for the majority of the image processing steps, and precision is above 80% for all critical steps. The ImageDataExtractor tool is released under the MIT license and is available to download from http://www.imagedataextractor.org.",2019-12-03 +32649057,A transcriptomic study for identifying cardia- and non-cardia-specific gastric cancer prognostic factors using genetic algorithm-based methods.,"Gastric cancer (GC) is a heterogeneous tumour with numerous differences of epidemiologic and clinicopathologic features between cardia cancer and non-cardia cancer. However, few studies were performed to construct site-specific GC prognostic models. In this study, we identified site-specific GC transcriptomic prognostic biomarkers using genetic algorithm (GA)-based support vector machine (GA-SVM) and GA-based Cox regression method (GA-Cox) in the Cancer Genome Atlas (TCGA) database. The area under time-dependent receive operating characteristic (ROC) curve (AUC) regarding 5-year survival and concordance index (C-index) was used to evaluate the predictive ability of Cox regression models. Finally, we identified 10 and 13 prognostic biomarkers for cardia cancer and non-cardia cancer, respectively. Compared to traditional models, the addition of these site-specific biomarkers could notably improve the model preference (cardia: AUCtraditional vs AUCcombined  = 0.720 vs 0.899, P = 8.75E-08; non-cardia: AUCtraditional vs AUCcombined  = 0.798 vs 0.994, P = 7.11E-16). The combined nomograms exhibited superior performance in cardia and non-cardia GC survival prediction (C-indexcardia  = 0.816; C-indexnoncardia  = 0.812). We also constructed a user-friendly GC site-specific molecular system (GC-SMS, https://njmu-zhanglab.shinyapps.io/gc_sms/), which is freely available for users. In conclusion, we developed site-specific GC prognostic models for predicting cardia cancer and non-cardia cancer survival, providing more support for the individualized therapy of GC patients.",2020-07-10 +29762782,MetaboAnalyst 4.0: towards more transparent and integrative metabolomics analysis.,"We present a new update to MetaboAnalyst (version 4.0) for comprehensive metabolomic data analysis, interpretation, and integration with other omics data. Since the last major update in 2015, MetaboAnalyst has continued to evolve based on user feedback and technological advancements in the field. For this year's update, four new key features have been added to MetaboAnalyst 4.0, including: (1) real-time R command tracking and display coupled with the release of a companion MetaboAnalystR package; (2) a MS Peaks to Pathways module for prediction of pathway activity from untargeted mass spectral data using the mummichog algorithm; (3) a Biomarker Meta-analysis module for robust biomarker identification through the combination of multiple metabolomic datasets and (4) a Network Explorer module for integrative analysis of metabolomics, metagenomics, and/or transcriptomics data. The user interface of MetaboAnalyst 4.0 has been reengineered to provide a more modern look and feel, as well as to give more space and flexibility to introduce new functions. The underlying knowledgebases (compound libraries, metabolite sets, and metabolic pathways) have also been updated based on the latest data from the Human Metabolome Database (HMDB). A Docker image of MetaboAnalyst is also available to facilitate download and local installation of MetaboAnalyst. MetaboAnalyst 4.0 is freely available at http://metaboanalyst.ca.",2018-07-01 +32058565,GeneSwitches: ordering gene expression and functional events in single-cell experiments.,"SUMMARY:Emerging single-cell RNA-sequencing data technologies has made it possible to capture and assess the gene expression of individual cells. Based on the similarity of gene expression profiles, many tools have been developed to generate an in silico ordering of cells in the form of pseudo-time trajectories. However, these tools do not provide a means to find the ordering of critical gene expression changes over pseudo-time. We present GeneSwitches, a tool that takes any single-cell pseudo-time trajectory and determines the precise order of gene expression and functional-event changes over time. GeneSwitches uses a statistical framework based on logistic regression to identify the order in which genes are either switched on or off along pseudo-time. With this information, users can identify the order in which surface markers appear, investigate how functional ontologies are gained or lost over time and compare the ordering of switching genes from two related pseudo-temporal processes. AVAILABILITY:GeneSwitches is available at https://geneswitches.ddnetbio.com. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +30416498,PredT4SE-Stack: Prediction of Bacterial Type IV Secreted Effectors From Protein Sequences Using a Stacked Ensemble Method.,"Gram-negative bacteria use various secretion systems to deliver their secreted effectors. Among them, type IV secretion system exists widely in a variety of bacterial species, and secretes type IV secreted effectors (T4SEs), which play vital roles in host-pathogen interactions. However, experimental approaches to identify T4SEs are time- and resource-consuming. In the present study, we aim to develop an in silico stacked ensemble method to predict whether a protein is an effector of type IV secretion system or not based on its sequence information. The protein sequences were encoded by the feature of position specific scoring matrix (PSSM)-composition by summing rows that correspond to the same amino acid residues in PSSM profiles. Based on the PSSM-composition features, we develop a stacked ensemble model PredT4SE-Stack to predict T4SEs, which utilized an ensemble of base-classifiers implemented by various machine learning algorithms, such as support vector machine, gradient boosting machine, and extremely randomized trees, to generate outputs for the meta-classifier in the classification system. Our results demonstrated that the framework of PredT4SE-Stack was a feasible and effective way to accurately identify T4SEs based on protein sequence information. The datasets and source code of PredT4SE-Stack are freely available at http://xbioinfo.sjtu.edu.cn/PredT4SE_Stack/index.php.",2018-10-26 +32399551,EasyVS: a user-friendly web-based tool for molecule library selection and structure-based virtual screening.,"

Summary

EasyVS is a web-based platform built to simplify molecule library selection and virtual screening. With an intuitive interface, the tool allows users to go from selecting a protein target with a known structure and tailoring a purchasable molecule library to performing and visualizing docking in a few clicks. Our system also allows users to filter screening libraries based on molecule properties, cluster molecules by similarity and personalize docking parameters.

Availability and implementation

EasyVS is freely available as an easy-to-use web interface at http://biosig.unimelb.edu.au/easyvs.

Contact

douglas.pires@unimelb.edu.au or david.ascher@unimelb.edu.au.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +30615123,The DNA walk and its demonstration of deterministic chaos-relevance to genomic alterations in lung cancer.,"

Motivation

Advancements in cancer genetics have facilitated the development of therapies with actionable mutations. Although mutated genes have been studied extensively, their chaotic behavior has not been appreciated. Thus, in contrast to naïve DNA, mutated DNA sequences can display characteristics of unpredictability and sensitivity to the initial conditions that may be dictated by the environment, expression patterns and presence of other genomic alterations. Employing a DNA walk as a form of 2D analysis of the nucleotide sequence, we demonstrate that chaotic behavior in the sequence of a mutated gene can be predicted.

Results

Using fractal analysis for these DNA walks, we have determined the complexity and nucleotide variance of commonly observed mutated genes in non-small cell lung cancer, and their wild-type counterparts. DNA walks for wild-type genes demonstrate varying levels of chaos, with BRAF, NTRK1 and MET exhibiting greater levels of chaos than KRAS, paxillin and EGFR. Analyzing changes in chaotic properties, such as changes in periodicity and linearity, reveal that while deletion mutations indicate a notable disruption in fractal 'self-similarity', fusion mutations demonstrate bifurcations between the two genes. Our results suggest that the fractals generated by DNA walks can yield important insights into potential consequences of these mutated genes.

Availability and implementation

Introduction to Turtle graphics in Python is an open source article on learning to develop a script for Turtle graphics in Python, freely available on the web at https://docs.python.org/2/library/turtle.html. cDNA sequences were obtained through NCBI RefSeq database, an open source database that contains information on a large array of genes, such as their nucleotide and amino acid sequences, freely available at https://www.ncbi.nlm.nih.gov/refseq/. FracLac plugin for Fractal analysis in ImageJ is an open source plugin for the ImageJ program to perform fractal analysis, free to download at https://imagej.nih.gov/ij/plugins/fraclac/FLHelp/Introduction.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +24334350,ATTED-II in 2014: evaluation of gene coexpression in agriculturally important plants.,"ATTED-II (http://atted.jp) is a database of coexpressed genes that was originally developed to identify functionally related genes in Arabidopsis and rice. Herein, we describe an updated version of ATTED-II, which expands this resource to include additional agriculturally important plants. To improve the quality of the coexpression data for Arabidopsis and rice, we included more gene expression data from microarray and RNA sequencing studies. The RNA sequencing-based coexpression data now cover 94% of the Arabidopsis protein-encoding genes, representing a substantial increase from previously available microarray-based coexpression data (76% coverage). We also generated coexpression data for four dicots (soybean, poplar, grape and alfalfa) and one monocot (maize). As both the quantity and quality of expression data for the non-model species are generally poorer than for the model species, we verified coexpression data associated with these new species using multiple methods. First, the overall performance of the coexpression data was evaluated using gene ontology annotations and the coincidence of a genomic feature. Secondly, the reliability of each guide gene was determined by comparing coexpressed gene lists between platforms. With the expanded and newly evaluated coexpression data, ATTED-II represents an important resource for identifying functionally related genes in agriculturally important plants.",2013-12-10 +30946876,Shady: A software engine for real-time visual stimulus manipulation.,"

Background

Precise definition, rendering and manipulation of visual stimuli are essential in neuroscience. Rather than implementing these tasks from scratch, scientists benefit greatly from using reusable software routines from freely available toolboxes. Existing toolboxes work well when the operating system and hardware are painstakingly optimized, but may be less suited to applications that require multi-tasking (for example, closed-loop systems that involve real-time acquisition and processing of signals).

New method

We introduce a new cross-platform visual stimulus toolbox called Shady (https://pypi.org/project/Shady)-so called because of its heavy reliance on a shader program to perform parallel pixel processing on a computer's graphics processor. It was designed with an emphasis on performance robustness in multi-tasking applications under unforgiving conditions. For optimal timing performance, the CPU drawing management commands are carried out by a compiled binary engine. For configuring stimuli and controlling their changes over time, Shady provides a programmer's interface in Python, a powerful, accessible and widely-used high-level programming language.

Results

Our timing benchmark results illustrate that Shady's hybrid compiled/interpreted architecture requires less time to complete drawing operations, exhibits smaller variability in frame-to-frame timing, and hence drops fewer frames, than pure-Python solutions under matched conditions of resource contention. This performance gain comes despite an expansion of functionality (e.g. ""noisy-bit"" dithering as standard on all pixels and all frames, to enhance effective dynamic range) relative to previous offerings.

Conclusions

Shady simultaneously advances the functionality and performance available to scientists for rendering visual stimuli and manipulating them in real time.",2019-04-01 +32002517,JASS: command line and web interface for the joint analysis of GWAS results.,"Genome-wide association study (GWAS) has been the driving force for identifying association between genetic variants and human phenotypes. Thousands of GWAS summary statistics covering a broad range of human traits and diseases are now publicly available. These GWAS have proven their utility for a range of secondary analyses, including in particular the joint analysis of multiple phenotypes to identify new associated genetic variants. However, although several methods have been proposed, there are very few large-scale applications published so far because of challenges in implementing these methods on real data. Here, we present JASS (Joint Analysis of Summary Statistics), a polyvalent Python package that addresses this need. Our package incorporates recently developed joint tests such as the omnibus approach and various weighted sum of Z-score tests while solving all practical and computational barriers for large-scale multivariate analysis of GWAS summary statistics. This includes data cleaning and harmonization tools, an efficient algorithm for fast derivation of joint statistics, an optimized data management process and a web interface for exploration purposes. Both benchmark analyses and real data applications demonstrated the robustness and strong potential of JASS for the detection of new associated genetic variants. Our package is freely available at https://gitlab.pasteur.fr/statistical-genetics/jass.",2020-01-24 +26476456,"BiGG Models: A platform for integrating, standardizing and sharing genome-scale models.","Genome-scale metabolic models are mathematically-structured knowledge bases that can be used to predict metabolic pathway usage and growth phenotypes. Furthermore, they can generate and test hypotheses when integrated with experimental data. To maximize the value of these models, centralized repositories of high-quality models must be established, models must adhere to established standards and model components must be linked to relevant databases. Tools for model visualization further enhance their utility. To meet these needs, we present BiGG Models (http://bigg.ucsd.edu), a completely redesigned Biochemical, Genetic and Genomic knowledge base. BiGG Models contains more than 75 high-quality, manually-curated genome-scale metabolic models. On the website, users can browse, search and visualize models. BiGG Models connects genome-scale models to genome annotations and external databases. Reaction and metabolite identifiers have been standardized across models to conform to community standards and enable rapid comparison across models. Furthermore, BiGG Models provides a comprehensive application programming interface for accessing BiGG Models with modeling and analysis tools. As a resource for highly curated, standardized and accessible models of metabolism, BiGG Models will facilitate diverse systems biology studies and support knowledge-based analysis of diverse experimental data.",2015-10-17 +31199072,Structural and functional investigation of the Small Ribosomal Subunit Biogenesis GTPase A (RsgA) from Pseudomonas aeruginosa.,"The Small Ribosomal Subunit Biogenesis GTPase A (RsgA) is a bacterial assembly factor involved in the late stages of the 30S subunit maturation. It is a multidomain GTPase in which the central circularly permutated GTPase domain is flanked by an OB domain and a Zn-binding domain. All three domains participate in the interaction with the 30S particle thus ensuring an efficient coupling between catalytic activity and biological function. In vivo studies suggested the relevance of rsgA in bacterial growth and cellular viability, but other pleiotropic roles of RsgA are also emerging. Here, we report the 3D structure of RsgA from Pseudomonas aeruginosa (PaRsgA) in the GDP-bound form. We also report a biophysical and biochemical characterization of the protein in both the GDP-bound and its nucleotide-free form. In particular, we report a kinetic analysis of the RsgA binding to GTP and GDP. We found that PaRsgA is able to bind both nucleotides with submicromolar affinity. The higher affinity towards GDP (KD  = 0.011 μm) with respect to GTP (KD  = 0.16 μm) is mainly ascribed to a smaller GDP dissociation rate. Our results confirm that PaRsgA, like most other GTPases, has a weak intrinsic enzymatic activity (kCAT  = 0.058 min-1 ). Finally, the biological role of RsgA in P. aeruginosa was investigated, allowing us to conclude that rsgA is dispensable for P. aeruginosa growth but important for drug resistance and virulence in an animal infection model. DATABASES: Coordinates and structure factors for the protein structure described in this manuscript have been deposited in the Protein Data Bank (https://www.rcsb.org) with the accession code 6H4D.",2019-07-02 +32115724,Intelligent 4D CT sequence scanning (i4DCT): First scanner prototype implementation and phantom measurements of automated breathing signal-guided 4D CT.,"

Purpose

Four-dimensional (4D) computed tomography (CT) imaging is an essential part of current 4D radiotherapy treatment planning workflows, but clinical 4D CT images are often affected by artifacts. The artifacts are mainly caused by breathing irregularity during data acquisition, which leads to projection data coverage issues for currently available commercial 4D CT protocols. It was proposed to improve projection data coverage by online respiratory signal analysis and signal-guided CT tube control, but related work was always theoretical and presented as pure in silico studies. The present work demonstrates a first CT prototype implementation along with respective phantom measurements for the recently introduced intelligent 4D CT (i4DCT) sequence scanning concept (https://doi.org/10.1002/mp.13632).

Methods

Intelligent 4D CT was implemented on the Siemens SOMATOM go platform. Four-dimensional CT measurements were performed using the CIRS motion phantom. Motion curves were programmed to systematically vary from regular to very irregular, covering typical irregular patterns that are known to result in image artifacts using standard 4D CT imaging protocols. Corresponding measurements were performed using i4DCT and routine spiral 4D CT with similar imaging parameters (e.g., mAs setting and gantry rotation time, retrospective ten-phase reconstruction) to allow for a direct comparison of the image data.

Results

Following technological implementation of i4DCT on the clinical CT scanner platform, 4D CT motion artifacts were significantly reduced for all investigated levels of breathing irregularity when compared to routine spiral 4D CT scanning.

Conclusions

The present study confirms feasibility of fully automated respiratory signal-guided 4D CT scanning by means of a first implementation of i4DCT on a CT scanner. The measurements thereby support the conclusions of respective in silico studies and demonstrate that respiratory signal-guided 4D CT (here: i4DCT) is ready for integration into clinical CT scanners.",2020-03-24 +32006274,Visualization and Analysis of Protein Structures with LiteMol Suite.,"LiteMol suite is an innovative solution that enables near-instant delivery of model and experimental biomacromolecular structural data, providing users with an interactive and responsive experience in all modern web browsers and mobile devices. LiteMol suite is a combination of data delivery services (CoordinateServer and DensityServer), compression format (BinaryCIF), and a molecular viewer (LiteMol Viewer). The LiteMol suite is integrated into Protein Data Bank in Europe (PDBe) and other life science web applications (e.g., UniProt, Ensemble, SIB, and CNRS services), it is freely available at https://litemol.org , and its source code is available via GitHub. LiteMol suite provides advanced functionality (annotations and their visualization, powerful selection features), and this chapter will describe their use for visual inspection of protein structures.",2020-01-01 +31142855,Multi-omics of the gut microbial ecosystem in inflammatory bowel diseases.,"Inflammatory bowel diseases, which include Crohn's disease and ulcerative colitis, affect several million individuals worldwide. Crohn's disease and ulcerative colitis are complex diseases that are heterogeneous at the clinical, immunological, molecular, genetic, and microbial levels. Individual contributing factors have been the focus of extensive research. As part of the Integrative Human Microbiome Project (HMP2 or iHMP), we followed 132 subjects for one year each to generate integrated longitudinal molecular profiles of host and microbial activity during disease (up to 24 time points each; in total 2,965 stool, biopsy, and blood specimens). Here we present the results, which provide a comprehensive view of functional dysbiosis in the gut microbiome during inflammatory bowel disease activity. We demonstrate a characteristic increase in facultative anaerobes at the expense of obligate anaerobes, as well as molecular disruptions in microbial transcription (for example, among clostridia), metabolite pools (acylcarnitines, bile acids, and short-chain fatty acids), and levels of antibodies in host serum. Periods of disease activity were also marked by increases in temporal variability, with characteristic taxonomic, functional, and biochemical shifts. Finally, integrative analysis identified microbial, biochemical, and host factors central to this dysregulation. The study's infrastructure resources, results, and data, which are available through the Inflammatory Bowel Disease Multi'omics Database ( http://ibdmdb.org ), provide the most comprehensive description to date of host and microbial activities in inflammatory bowel diseases.",2019-05-29 +30338276,RNA-seq data of the Jatropha curcas L. shoot system.,"Jatropha curcas L. or the physic nut is a monoecious shrub belonging to the Euphorbiaceae family. The plant is an ideal feedstock for biodiesel production; oil-rich seed (37-42%), has a broad range of growth habitat such as arid, semi-arid and tropical and a relatively feasible process for conversion of crude oil into biodiesel. The major constraint affecting the success of large-scale J. curcas plantation is seed yield inconsistency. Numerous research projects conducted on J. curcas with integrated genetic, genomic and transcriptomic approaches have been applied on the leaf, apical meristem, flower, root and fruit tissues. However, to date, no genomics data of J. curcas shoot system are publicly available, despite its importance in understanding flowering, fruiting and seed set qualities targeted for yield improvement. Here, we present eighteen sets of shoot and inflorescence transcriptomes generated from J. curcas plants with contrasting yields. Raw reads of the RNA-seq data are found in NCBI׳s Sequence Read Archive (SRA) database with the accession number SRP090662 (https://www.ncbi.nlm.nih.gov/sra/?term=SRP090662). This transcriptomic data could be integrated with the present genomic resources for in depth understanding of J. curcas reproductive system.",2018-09-29 +30407009,VIETHERB: A Database for Vietnamese Herbal Species.,"Vietnam carries a highly diverse practice of traditional medicine in which various combinations of herbs have been widely used as remedies for many types of diseases. Poor hand-written records and current text-based databases, however, perplex the process of conventionalizing and evaluating canonical therapeutic effects. In efforts to reorganize the valuable information, we provide the VIETHERB database ( http://vietherb.com.vn/ ) for herbs documented in Vietnamese traditional medicines. This database is constructed with confidence to provide users with information on herbs and other side information including metabolites, diseases, morphologies, and geographical locations for each individual species. Our data in this release consist of 2,881 species, 10,887 metabolites, 458 geographical locations, and 8,046 therapeutic effects. The numbers of species-metabolite, species-therapeutic effect, species-morphology, and species-distribution binary relationships are 17,602, 2,718, 11,943, and 16,089, respectively. The information on Vietnamese herbal species can be easily accessed or queried using their scientific names. Searching for species sharing side information can be simply done by clicking on the data. The database primarily serves as an open source facilitating users in studies of modernizing traditional medicine, computer-aided drug design, conservation of endangered plants, and other relevant experimental sciences.",2018-12-03 +33726787,A versatile web app for identifying the drivers of COVID-19 epidemics.,"

Background

No versatile web app exists that allows epidemiologists and managers around the world to comprehensively analyze the impacts of COVID-19 mitigation. The http://covid-webapp.numerusinc.com/ web app presented here fills this gap.

Methods

Our web app uses a model that explicitly identifies susceptible, contact, latent, asymptomatic, symptomatic and recovered classes of individuals, and a parallel set of response classes, subject to lower pathogen-contact rates. The user inputs a CSV file of incidence and, if of interest, mortality rate data. A default set of parameters is available that can be overwritten through input or online entry, and a user-selected subset of these can be fitted to the model using maximum-likelihood estimation (MLE). Model fitting and forecasting intervals are specifiable and changes to parameters allow counterfactual and forecasting scenarios. Confidence or credible intervals can be generated using stochastic simulations, based on MLE values, or on an inputted CSV file containing Markov chain Monte Carlo (MCMC) estimates of one or more parameters.

Results

We illustrate the use of our web app in extracting social distancing, social relaxation, surveillance or virulence switching functions (i.e., time varying drivers) from the incidence and mortality rates of COVID-19 epidemics in Israel, South Africa, and England. The Israeli outbreak exhibits four distinct phases: initial outbreak, social distancing, social relaxation, and a second wave mitigation phase. An MCMC projection of this latter phase suggests the Israeli epidemic will continue to produce into late November an average of around 1500 new case per day, unless the population practices social-relaxation measures at least 5-fold below the level in August, which itself is 4-fold below the level at the start of July. Our analysis of the relatively late South African outbreak that became the world's fifth largest COVID-19 epidemic in July revealed that the decline through late July and early August was characterised by a social distancing driver operating at more than twice the per-capita applicable-disease-class (pc-adc) rate of the social relaxation driver. Our analysis of the relatively early English outbreak, identified a more than 2-fold improvement in surveillance over the course of the epidemic. It also identified a pc-adc social distancing rate in early August that, though nearly four times the pc-adc social relaxation rate, appeared to barely contain a second wave that would break out if social distancing was further relaxed.

Conclusion

Our web app provides policy makers and health officers who have no epidemiological modelling or computer coding expertise with an invaluable tool for assessing the impacts of different outbreak mitigation policies and measures. This includes an ability to generate an epidemic-suppression or curve-flattening index that measures the intensity with which behavioural responses suppress or flatten the epidemic curve in the region under consideration.",2021-03-16 +30543627,G-quadruplex forming sequences in the genome of all known human viruses: A comprehensive guide.,"G-quadruplexes are non-canonical nucleic-acid structures that control transcription, replication, and recombination in organisms. G-quadruplexes are present in eukaryotes, prokaryotes, and viruses. In the latter, mounting evidence indicates their key biological activity. Since data on viruses are scattered, we here present a comprehensive analysis of potential quadruplex-forming sequences (PQS) in the genome of all known viruses that can infect humans. We show that occurrence and location of PQSs are features characteristic of each virus class and family. Our statistical analysis proves that their presence within the viral genome is orderly arranged, as indicated by the possibility to correctly assign up to two-thirds of viruses to their exact class based on the PQS classification. For each virus we provide: i) the list of all PQS present in the genome (positive and negative strands), ii) their position in the viral genome, iii) the degree of conservation among strains of each PQS in its genome context, iv) the statistical significance of PQS abundance. This information is accessible from a database to allow the easy navigation of the results: http://www.medcomp.medicina.unipd.it/main_site/doku.php?id=g4virus. The availability of these data will greatly expedite research on G-quadruplex in viruses, with the possibility to accelerate finding therapeutic opportunities to numerous and some fearsome human diseases.",2018-12-13 +26852673,Multi-tissue transcriptomics for construction of a comprehensive gene resource for the terrestrial snail Theba pisana.,"The land snail Theba pisana is native to the Mediterranean region but has become one of the most abundant invasive species worldwide. Here, we present three transcriptomes of this agriculture pest derived from three tissues: the central nervous system, hepatopancreas (digestive gland), and foot muscle. Sequencing of the three tissues produced 339,479,092 high quality reads and a global de novo assembly generated a total of 250,848 unique transcripts (unigenes). BLAST analysis mapped 52,590 unigenes to NCBI non-redundant protein databases and further functional analysis annotated 21,849 unigenes with gene ontology. We report that T. pisana transcripts have representatives in all functional classes and a comparison of differentially expressed transcripts amongst all three tissues demonstrates enormous differences in their potential metabolic activities. The genes differentially expressed include those with sequence similarity to those genes associated with multiple bacterial diseases and neurological diseases. To provide a valuable resource that will assist functional genomics study, we have implemented a user-friendly web interface, ThebaDB (http://thebadb.bioinfo-minzhao.org/). This online database allows for complex text queries, sequence searches, and data browsing by enriched functional terms and KEGG mapping.",2016-02-08 +32274408,"Dataset of lipids, antioxidative status and color attributes in cows meat from slaughter to storage: Impacts of diet supplementations and pre-slaughter stress.","This data article presents a dataset with 34 values of the fatty acids composition and of indicators of lipid oxidation determined in the Longissimus dorsi and Semitendinosus from 71 Normand cull-cows at slaughter, after muscle aging and after meat storage periods under different packaging conditions. Cows were subjected to 3 feeding diets and 2 slaughter protocols relative to pre-slaughter stress. The indicators of lipids, FA composition, antioxidative enzymes activities, antioxidative status and global lipid oxidation of the muscles, and meat at different time points and under different aging and storage conditions, may be used to increase our understanding of the evolution of oxidation and consequences on color development. The last research article published on part of these data [1] is available for some interpretive insights: https://doi.org/10.1016/j.foodchem.2019.125668.",2020-03-16 +33173130,Enhancing protein backbone angle prediction by using simpler models of deep neural networks.,"Protein structure prediction is a grand challenge. Prediction of protein structures via the representations using backbone dihedral angles has recently achieved significant progress along with the on-going surge of deep neural network (DNN) research in general. However, we observe that in the protein backbone angle prediction research, there is an overall trend to employ more and more complex neural networks and then to throw more and more features to the neural networks. While more features might add more predictive power to the neural network, we argue that redundant features could rather clutter the scenario and more complex neural networks then just could counterbalance the noise. From artificial intelligence and machine learning perspectives, problem representations and solution approaches do mutually interact and thus affect performance. We also argue that comparatively simpler predictors can more easily be reconstructed than the more complex ones. With these arguments in mind, we present a deep learning method named Simpler Angle Predictor (SAP) to train simpler DNN models that enhance protein backbone angle prediction. We then empirically show that SAP can significantly outperform existing state-of-the-art methods on well-known benchmark datasets: for some types of angles, the differences are 6-8 in terms of mean absolute error (MAE). The SAP program along with its data is available from the website https://gitlab.com/mahnewton/sap .",2020-11-10 +32861138,Radioactivity of honey in central and southern Poland.,"In 2017, the Polish public consumed on average 0.61 kg of honey, while the European average consumption was 0.7 kg (Data on honey consumption in Poland, 2014) [http://www.portalspozywczy.pl]. The main point of this study was to investigate the 210Po activity concentrations in different types of floral and non-floral honey, type of clad honey is made of and honey yield in honey available on the Polish market. Activity of 210 Po in honey ranged from 0.006 ± 0.001 to 0.384 ± 0.004 Bq kg-1 with effective dose 0.005 ± 0.001 to 0.281 ± 0.003 μSv/year. The activity in honey was measured by alpha-spectrometry. The concentration of radionuclide depends on the raw material used by bees and plant type. The highest concentration of 210Po was observed in the honeydew honey and herbal honey.",2020-08-26 +26464438,The IUPHAR/BPS Guide to PHARMACOLOGY in 2016: towards curated quantitative interactions between 1300 protein targets and 6000 ligands.,"The IUPHAR/BPS Guide to PHARMACOLOGY (GtoPdb, http://www.guidetopharmacology.org) provides expert-curated molecular interactions between successful and potential drugs and their targets in the human genome. Developed by the International Union of Basic and Clinical Pharmacology (IUPHAR) and the British Pharmacological Society (BPS), this resource, and its earlier incarnation as IUPHAR-DB, is described in our 2014 publication. This update incorporates changes over the intervening seven database releases. The unique model of content capture is based on established and new target class subcommittees collaborating with in-house curators. Most information comes from journal articles, but we now also index kinase cross-screening panels. Targets are specified by UniProtKB IDs. Small molecules are defined by PubChem Compound Identifiers (CIDs); ligand capture also includes peptides and clinical antibodies. We have extended the capture of ligands and targets linked via published quantitative binding data (e.g. Ki, IC50 or Kd). The resulting pharmacological relationship network now defines a data-supported druggable genome encompassing 7% of human proteins. The database also provides an expanded substrate for the biennially published compendium, the Concise Guide to PHARMACOLOGY. This article covers content increase, entity analysis, revised curation strategies, new website features and expanded download options.",2015-10-12 +32083980,How Does Our Voice Change as We Age? A Systematic Review and Meta-Analysis of Acoustic and Perceptual Voice Data From Healthy Adults Over 50 Years of Age.,"Purpose Approximately 30% of adults over the age of 50 years present with altered vocal function. Our understanding of how these changes manifest acoustically and perceptually is derived from relatively modest-sized studies using a diversity of tools. Voice changes can arise from the onset of disease or disorder, but also age-related physiological changes, which may not reflect pathology as such. Here, we bring together data on acoustic, perceptual, and instrumental assessments (electroglottography), with the aim of gaining a better understanding of the changes occurring across these measurement domains. We consider these changes in the context of different acoustic features, software programs, and perceptual protocols. Method Studies of voice function in healthy older adults over the age of 50 years were sought. Literature was systematically searched with 746 abstracts reviewed. Forty-seven studies were included in the review. A meta-analysis of included studies compared voice acoustic parameters between sex and age. Sixteen acoustic parameters collected from 1,475 participants were analyzed in the meta-analysis. These included some previously unpublished analyses using data provided by authors of included studies. Results Data from the systematic review suggest that older individuals are perceived to present with higher overall scores of dysphonia and roughness, breathiness, strain, and instability. Acoustically, males have significantly higher scores on measures of perturbation, including noise-to-harmonic ratio and absolute jitter. The meta-analysis outcomes suggest that participants aged 80-89 years produce significantly higher fundamental frequency, jitter percent, shimmer percent, and shimmer in decibels compared to participants aged 60-69 years and a significant increase in relative average perturbation, jitter percent, and shimmer in decibels compared to participants aged 70-79 years. Limited data were available comparing acoustic measures using the same acoustic software. Conclusions Variations in fundamental frequency and frequency and amplitude perturbation increase as healthy adults age. It was difficult to draw definitive conclusions based on existing literature due to variability in hardware used, limited descriptions of study cohorts, or missing data from statistical analysis. Supplemental Material https://doi.org/10.23641/asha.11868663.",2020-02-20 +29590301,"GENEASE: real time bioinformatics tool for multi-omics and disease ontology exploration, analysis and visualization.","Motivation:Advances in high-throughput sequencing technologies have made it possible to generate multiple omics data at an unprecedented rate and scale. The accumulation of these omics data far outpaces the rate at which biologists can mine and generate new hypothesis to test experimentally. There is an urgent need to develop a myriad of powerful tools to efficiently and effectively search and filter these resources to address specific post-GWAS functional genomics questions. However, to date, these resources are scattered across several databases and often lack a unified portal for data annotation and analytics. In addition, existing tools to analyze and visualize these databases are highly fragmented, resulting researchers to access multiple applications and manual interventions for each gene or variant in an ad hoc fashion until all the questions are answered. Results:In this study, we present GENEASE, a web-based one-stop bioinformatics tool designed to not only query and explore multi-omics and phenotype databases (e.g. GTEx, ClinVar, dbGaP, GWAS Catalog, ENCODE, Roadmap Epigenomics, KEGG, Reactome, Gene and Phenotype Ontology) in a single web interface but also to perform seamless post genome-wide association downstream functional and overlap analysis for non-coding regulatory variants. GENEASE accesses over 50 different databases in public domain including model organism-specific databases to facilitate gene/variant and disease exploration, enrichment and overlap analysis in real time. It is a user-friendly tool with point-and-click interface containing links for support information including user manual and examples. Availability and implementation:GENEASE can be accessed freely at http://research.cchmc.org/mershalab/GENEASE/login.html. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-09-01 +,European Vegetation Archive (EVA): an integrated database of European vegetation plots,"The European Vegetation Archive (EVA) is a centralized database of European vegetation plots developed by the IAVS Working Group European Vegetation Survey. It has been in development since 2012 and first made available for use in research projects in 2014. It stores copies of national and regional vegetation‐ plot databases on a single software platform. Data storage in EVA does not affect on‐going independent development of the contributing databases, which remain the property of the data contributors. EVA uses a prototype of the database management software TURBOVEG 3 developed for joint management of multiple databases that use different species lists. This is facilitated by the SynBioSys Taxon Database, a system of taxon names and concepts used in the individual European databases and their corresponding names on a unified list of European flora. TURBOVEG 3 also includes procedures for handling data requests, selections and provisions according to the approved EVA Data Property and Governance Rules. By 30 June 2015, 61 databases from all European regions have joined EVA, contributing in total 1 027 376 vegetation plots, 82% of them with geographic coordinates, from 57 countries. EVA provides a unique data source for large‐scale analyses of European vegetation diversity both for fundamental research and nature conservation applications. Updated information on EVA is available online at http://euroveg.org/eva-database.",2016-01-01 +33483370,Gut Microbiota Condition the Therapeutic Efficacy of Trastuzumab in HER2-Positive Breast Cancer.,"Emerging evidence indicates that gut microbiota affect the response to anticancer therapies by modulating the host immune system. In this study, we investigated the impact of gut microbiota on immune-mediated trastuzumab antitumor efficacy in preclinical models of HER2-positive breast cancer and in 24 patients with primary HER2-positive breast cancer undergoing trastuzumab-containing neoadjuvant treatment. In mice, the antitumor activity of trastuzumab was impaired by antibiotic administration or fecal microbiota transplantation from antibiotic-treated donors. Modulation of the intestinal microbiota was reflected in tumors by impaired recruitment of CD4+ T cells and granzyme B-positive cells after trastuzumab treatment. Antibiotics caused reductions in dendritic cell (DC) activation and the release of IL12p70 upon trastuzumab treatment, a mechanism that was necessary for trastuzumab effectiveness in our model. In patients, lower α-diversity and lower abundance of Lachnospiraceae, Turicibacteraceae, Bifidobacteriaceae, and Prevotellaceae characterized nonresponsive patients (NR) compared with those who achieved pathologic complete response (R), similar to antibiotic-treated mice. The transfer of fecal microbiota from R and NR into mice bearing HER2-positive breast cancer recapitulated the response to trastuzumab observed in patients. Fecal microbiota β-diversity segregated patients according to response and positively correlated with immune signature related to interferon (IFN) and NO2-IL12 as well as activated CD4+ T cells and activated DCs in tumors. Overall, our data reveal the direct involvement of the gut microbiota in trastuzumab efficacy, suggesting that manipulation of the gut microbiota is an optimal future strategy to achieve a therapeutic effect or to exploit its potential as a biomarker for treatment response. SIGNIFICANCE: Evidence of gut microbiota involvement in trastuzumab efficacy represents the foundation for new therapeutic strategies aimed at manipulating commensal bacteria to improve response in trastuzumab-resistant patients.See related commentary by Sharma, p. 1937 GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/8/2195/F1.large.jpg.",2021-01-22 +27924018,DisGeNET: a comprehensive platform integrating information on human disease-associated genes and variants.,"The information about the genetic basis of human diseases lies at the heart of precision medicine and drug discovery. However, to realize its full potential to support these goals, several problems, such as fragmentation, heterogeneity, availability and different conceptualization of the data must be overcome. To provide the community with a resource free of these hurdles, we have developed DisGeNET (http://www.disgenet.org), one of the largest available collections of genes and variants involved in human diseases. DisGeNET integrates data from expert curated repositories, GWAS catalogues, animal models and the scientific literature. DisGeNET data are homogeneously annotated with controlled vocabularies and community-driven ontologies. Additionally, several original metrics are provided to assist the prioritization of genotype-phenotype relationships. The information is accessible through a web interface, a Cytoscape App, an RDF SPARQL endpoint, scripts in several programming languages and an R package. DisGeNET is a versatile platform that can be used for different research purposes including the investigation of the molecular underpinnings of specific human diseases and their comorbidities, the analysis of the properties of disease genes, the generation of hypothesis on drug therapeutic action and drug adverse effects, the validation of computationally predicted disease genes and the evaluation of text-mining methods performance.",2016-10-19 +26130332,'LungGENS': a web-based tool for mapping single-cell gene expression in the developing lung.,"We developed LungGENS (Lung Gene Expression iN Single-cell), a web-based bioinformatics resource for querying single-cell gene expression databases by entering a gene symbol or a list of genes or selecting a cell type of their interest. Gene query provides quantitative RNA expression of the gene of interest in each lung cell type. Cell type query returns associated selective gene signatures and genes encoding cell surface markers and transcription factors in interactive heatmap and tables. LungGENS will be broadly applicable in respiratory research, providing a cell-specific RNA expression resource at single-cell resolution. LungGENS is freely available for non-commercial use at https://research.cchmc.org/pbge/lunggens/default.html.",2015-06-30 +30936866,"Inferred Allelic Variants of Immunoglobulin Receptor Genes: A System for Their Evaluation, Documentation, and Naming.","Immunoglobulins or antibodies are the main effector molecules of the B-cell lineage and are encoded by hundreds of variable (V), diversity (D), and joining (J) germline genes, which recombine to generate enormous IG diversity. Recently, high-throughput adaptive immune receptor repertoire sequencing (AIRR-seq) of recombined V-(D)-J genes has offered unprecedented insights into the dynamics of IG repertoires in health and disease. Faithful biological interpretation of AIRR-seq studies depends upon the annotation of raw AIRR-seq data, using reference germline gene databases to identify the germline genes within each rearrangement. Existing reference databases are incomplete, as shown by recent AIRR-seq studies that have inferred the existence of many previously unreported polymorphisms. Completing the documentation of genetic variation in germline gene databases is therefore of crucial importance. Lymphocyte receptor genes and alleles are currently assigned by the Immunoglobulins, T cell Receptors and Major Histocompatibility Nomenclature Subcommittee of the International Union of Immunological Societies (IUIS) and managed in IMGT®, the international ImMunoGeneTics information system® (IMGT). In 2017, the IMGT Group reached agreement with a group of AIRR-seq researchers on the principles of a streamlined process for identifying and naming inferred allelic sequences, for their incorporation into IMGT®. These researchers represented the AIRR Community, a network of over 300 researchers whose objective is to promote all aspects of immunoglobulin and T-cell receptor repertoire studies, including the standardization of experimental and computational aspects of AIRR-seq data generation and analysis. The Inferred Allele Review Committee (IARC) was established by the AIRR Community to devise policies, criteria, and procedures to perform this function. Formalized evaluations of novel inferred sequences have now begun and submissions are invited via a new dedicated portal (https://ogrdb.airr-community.org). Here, we summarize recommendations developed by the IARC-focusing, to begin with, on human IGHV genes-with the goal of facilitating the acceptance of inferred allelic variants of germline IGHV genes. We believe that this initiative will improve the quality of AIRR-seq studies by facilitating the description of human IG germline gene variation, and that in time, it will expand to the documentation of TR and IG genes in many vertebrate species.",2019-03-18 +31428882,Predictive validity of a novel non-invasive estimation of effective shunt fraction in critically ill patients.,"

Background

Accurate measurement of pulmonary oxygenation is important for classification of disease severity and quantification of outcomes in clinical studies. Currently, tension-based methods such as P/F ratio are in widespread use, but are known to be less accurate than content-based methods. However, content-based methods require invasive measurements or sophisticated equipment that are rarely used in clinical practice. We devised two new methods to infer shunt fraction from a single arterial blood gas sample: (1) a non-invasive effective shunt (ES) fraction calculated using a rearrangement of the indirect Fick equation, standard constants, and a procedural inversion of the relationship between content and tension and (2) inferred values from a database of outputs from an integrated mathematical model of gas exchange (DB). We compared the predictive validity-the accuracy of predictions of PaO2 following changes in FIO2-of each measure in a retrospective database of 78,159 arterial blood gas (ABG) results from critically ill patients.

Results

In a formal test set comprising 9,635 pairs of ABGs, the median absolute error (MAE) values for the four measures were as follows: alveolar-arterial difference, 7.30 kPa; PaO2/FIO2 ratio, 2.41 kPa; DB, 2.13 kPa; and ES, 1.88 kPa. ES performed significantly better than other measures (p < 10-10 in all comparisons). Further exploration of the DB method demonstrated that obtaining two blood gas measurements at different FIO2 provides a more precise description of pulmonary oxygenation.

Conclusions

Effective shunt can be calculated using a computationally efficient procedure using routinely collected arterial blood gas data and has better predictive validity than other analytic methods. For practical assessment of oxygenation in clinical research, ES should be used in preference to other indices. ES can be calculated at http://baillielab.net/es .",2019-08-20 +26586809,probeBase--an online resource for rRNA-targeted oligonucleotide probes and primers: new features 2016.,"probeBase http://www.probebase.net is a manually maintained and curated database of rRNA-targeted oligonucleotide probes and primers. Contextual information and multiple options for evaluating in silico hybridization performance against the most recent rRNA sequence databases are provided for each oligonucleotide entry, which makes probeBase an important and frequently used resource for microbiology research and diagnostics. Here we present a major update of probeBase, which was last featured in the NAR Database Issue 2007. This update describes a complete remodeling of the database architecture and environment to accommodate computationally efficient access. Improved search functions, sequence match tools and data output now extend the opportunities for finding suitable hierarchical probe sets that target an organism or taxon at different taxonomic levels. To facilitate the identification of complementary probe sets for organisms represented by short rRNA sequence reads generated by amplicon sequencing or metagenomic analysis with next generation sequencing technologies such as Illumina and IonTorrent, we introduce a novel tool that recovers surrogate near full-length rRNA sequences for short query sequences and finds matching oligonucleotides in probeBase.",2015-11-19 +30530337,Intra-Cluster Distance Minimization in DNA Methylation Analysis Using an Advanced Tabu-Based Iterative k-Medoids Clustering Algorithm (T-CLUST).,"Recent advances in DNA methylation profiling have paved the way for understanding the underlying epigenetic mechanisms of various diseases such as cancer. While conventional distance-based clustering algorithms (e.g., hierarchical and k-means clustering) have been heavily used in such profiling owing to their speed in conduct of high-throughput analysis, these methods commonly converge to suboptimal solutions and/or trivial clusters due to their greedy search nature. Hence, methodologies are needed to improve the quality of clusters formed by these algorithms without sacrificing from their speed. In this study, we introduce three related algorithms for a complete high-throughput methylation analysis: a variance-based dimension reduction algorithm to handle high-dimensionality in data, an outlier detection algorithm to identify the outliers of data, and an advanced Tabu-based iterative k-medoids clustering algorithm (T-CLUST) to reduce the impact of initial solutions on the performance of conventional k-medoids algorithm. The performance of the proposed algorithms is demonstrated on nine different real DNA methylation datasets obtained from the Gene Expression Omnibus DataSets database. The accuracy of the cluster identification obtained by our proposed algorithms is higher than those of hierarchical and k-means clustering, as well as the conventional methods. The algorithms are implemented in MATLAB, and available at: http://www.coe.miami.edu/simlab/tclust.html.",2018-12-10 +32564149,Breast milk n-3 long-chain polyunsaturated fatty acids and blood pressure: an individual participant meta-analysis.,"

Purpose

It is controversial whether a higher intake of n-3 long-chain polyunsaturated fatty acids (n-3 LC PUFA) through breastfeeding is associated or not to a lower blood pressure (BP) during childhood. We aimed to clarify this point by undertaking a meta-analysis involving the data from seven European birth cohorts.

Methods

We searched https://www.birthcohort.net for studies that had collected breast milk samples, and had at least one BP measurement in childhood. Principal investigators were contacted, and all agreed to share data. One additional study was identified by contacts with the principal investigators. For each cohort, we analyzed the association of breast milk n-3 LC PUFAs with systolic and diastolic BP with linear mixed effects models or linear regression, and pooled the estimates with a random effects model. We also investigated age-specific and sex-specific associations.

Results

A total of 2188 participants from 7 cohorts were included. Overall, no associations between breast milk n-3 LC PUFAs and BP were observed. In the pooled analysis, each 0.1 wt% increment in breast milk docosahexaenoic acid (DHA) was associated with a 1.19 (95% CI - 3.31, 0.94) mmHg lower systolic BP. Associations were similar for boys and girls and at different ages.

Conclusion

In this individual participant meta-analysis, we found no evidence for an association between breast milk n-3 LC PUFAs and BP.",2020-06-20 +31524268,Casiopeina II‑gly acts on lncRNA MALAT1 by miR‑17‑5p to inhibit FZD2 expression via the Wnt signaling pathway during the treatment of cervical carcinoma.,"The present study investigated the underlying regulatory network involved in the differential expression of metastasis associated lung adenocarcinoma transcript 1 (MALAT1) long non‑coding (lnc)RNA, microRNA‑17‑5p (miR‑17‑5p) and frizzled class receptor 2 (FZD2) mRNA under the influence of Casiopeina II‑gly (Cas‑II‑gly) via the Wnt signaling pathway in cervical carcinoma (CC). The gene expression data were obtained from the Gene Expression Omnibus database (https://www.ncbi.nlm.nih.gov/geo/), and the differentially expressed genes were determined using R software. The R ClusterProfiler and enrichplot packages were applied for gene‑set enrichment analysis based on the Gene Ontology biological process and Kyoto Encyclopedia of Genes and Genomes databases. TargetScan and the starBase database were used to predict the targeting associations between the miRNAs and lncRNAs/mRNAs. The MALAT1/miR‑17‑5p/mRNA FZD2 expression levels were measured via reverse transcription‑quantitative polymerase chain reaction. The protein expression was monitored by western blot analysis. The target association among the lncRNA MALAT1, miR‑17‑5p and FZD2 was validated via a dual luciferase reporter assay. Cell viability and apoptosis were determined via MTT assays, EdU staining and flow cytometry. The results indicated that the expression levels of lncRNA MALAT1 and FZD2 mRNA were downregulated, while miR‑17‑5p expression was upregulated in HeLa and CaSki cells treated with increasing Cas‑II‑gly concentrations. The cell viability was decreased, and the apoptosis rate was increased in HeLa and CaSki cells following Cas‑II‑gly treatment. Furthermore, western blot analysis results demonstrated that Cas‑II‑gly and the MALAT1/miR‑17‑5p/FZD2 axis could affect the expression of proteins associated with the Wnt signaling pathway, including disheveled segment polarity protein, glycogen synthase kinase‑3β and β‑catenin, and via the MALAT1/miR‑17‑5p/FZD2/Wnt signaling pathway axis.",2019-08-08 +33071296,The six scenario archetypes framework: A systematic investigation of science fiction films set in the future.,"We propose a new scenario archetypes method generated by extracting a set of archetypal images of the future from a sample of 140 science fiction films set in the future using a grounded theory analytical procedure. Six archetypes emerged from the data, and were named Growth & Decay, Threats & New Hopes, Wasteworlds, The Powers that Be, Disarray, and Inversion. The archetypes in part overlap with and confirm previous research, and in part are novel. They all involve stress-point critical conditions in the external environment. We explain why the six archetypes, as a foresight framework, is more transformational and nuanced than previously developed scenario archetypes frameworks, making it particularly suited to the current necessity to think the unthinkable more systematically. We explain how the six archetypes framework can be used as predetermined images of the future to create domain specific scenarios, making organizations more resilient to critical, disruptive futures. We finally present and discuss a case study of the application of the method to create scenarios of post-Covid-19 futures of work. (https://www.youtube.com/watch?v=q82_X7fN_XA).",2020-10-09 +26433225,SubtiWiki 2.0--an integrated database for the model organism Bacillus subtilis.,"To understand living cells, we need knowledge of each of their parts as well as about the interactions of these parts. To gain rapid and comprehensive access to this information, annotation databases are required. Here, we present SubtiWiki 2.0, the integrated database for the model bacterium Bacillus subtilis (http://subtiwiki.uni-goettingen.de/). SubtiWiki provides text-based access to published information about the genes and proteins of B. subtilis as well as presentations of metabolic and regulatory pathways. Moreover, manually curated protein-protein interactions diagrams are linked to the protein pages. Finally, expression data are shown with respect to gene expression under 104 different conditions as well as absolute protein quantification for cytoplasmic proteins. To facilitate the mobile use of SubtiWiki, we have now expanded it by Apps that are available for iOS and Android devices. Importantly, the App allows to link private notes and pictures to the gene/protein pages. Today, SubtiWiki has become one of the most complete collections of knowledge on a living organism in one single resource.",2015-10-03 +25378322,"UniPROBE, update 2015: new tools and content for the online database of protein-binding microarray data on protein-DNA interactions.","The Universal PBM Resource for Oligonucleotide Binding Evaluation (UniPROBE) serves as a convenient source of information on published data generated using universal protein-binding microarray (PBM) technology, which provides in vitro data about the relative DNA-binding preferences of transcription factors for all possible sequence variants of a length k ('k-mers'). The database displays important information about the proteins and displays their DNA-binding specificity data in terms of k-mers, position weight matrices and graphical sequence logos. This update to the database documents the growth of UniPROBE since the last update 4 years ago, and introduces a variety of new features and tools, including a new streamlined pipeline that facilitates data deposition by universal PBM data generators in the research community, a tool that generates putative nonbinding (i.e. negative control) DNA sequences for one or more proteins and novel motifs obtained by analyzing the PBM data using the BEEML-PBM algorithm for motif inference. The UniPROBE database is available at http://uniprobe.org.",2014-11-05 +33306439,The Effects of Dysphonic Voice on Speech Intelligibility in Cantonese-Speaking Adults.,"Purpose This study aims to investigate the effects of dysphonic voice on speech intelligibility in Cantonese-speaking adults. Method Speech recordings from three speakers with dysphonia secondary to phonotrauma and three speakers with healthy voices were presented to 30 healthy listeners (15 men and 15 women; M age = 22.7 years) under six noise conditions (signal-to-noise ratio [SNR] -10, SNR -5, SNR 0, SNR +5, SNR +10) and quiet conditions. The speech recordings were composed of sentences with five different lengths: five syllables, eight syllables, 10 syllables, 12 syllables, and 15 syllables. The effects of speaker's voice quality, background noise condition, and sentence length on speech intelligibility were examined. Speech intelligibility scores were calculated based on the listener's correct judgment of the number of syllables heard as a percentage of the total syllables in each stimulus. Results Dysphonic voices, as compared to healthy voices, were significantly more affected by background noise. Speech presented with dysphonic voices was significantly less intelligible than speech presented with healthy voices under unfavorable SNR conditions (SNR -10, SNR -5, and SNR 0 conditions). However, there was no sufficient evidence to suggest effects of sentence length on intelligibility, regardless of the speaker's voice quality or the level of background noise. Conclusions This study provides empirical data on the impacts of dysphonic voice on speech intelligibility in Cantonese speakers. The findings highlight the importance of educating the public about the impacts of voice quality and background noise on speech intelligibility and the potential of compensatory strategies that specifically address these barriers. Supplemental Material https://doi.org/10.23641/asha.13335926.",2020-12-11 +31470809,Minor intron splicing revisited: identification of new minor intron-containing genes and tissue-dependent retention and alternative splicing of minor introns.,"

Background

Mutations in minor spliceosome components such as U12 snRNA (cerebellar ataxia) and U4atac snRNA (microcephalic osteodysplastic primordial dwarfism type 1 (MOPD1)) result in tissue-specific symptoms. Given that the minor spliceosome is ubiquitously expressed, we hypothesized that these restricted phenotypes might be caused by the tissue-specific regulation of the minor spliceosome targets, i.e. minor intron-containing genes (MIGs). The current model of inefficient splicing is thought to apply to the regulation of the ~ 500 MIGs identified in the U12DB. However this database was created more than 10 years ago. Therefore, we first wanted to revisit the classification of minor introns in light of the most recent reference genome. We then sought to address specificity of MIG expression, minor intron retention, and alternative splicing (AS) across mouse and human tissues.

Results

We employed position-weight matrices to obtain a comprehensive updated list of minor introns, consisting of 722 mouse and 770 human minor introns. These can be found in the Minor Intron DataBase (MIDB). Besides identification of 99% of the minor introns found in the U12DB, we also discovered ~ 150 new MIGs. We then analyzed the RNAseq data from eleven different mouse tissues, which revealed tissue-specific MIG expression and minor intron retention. Additionally, many minor introns were efficiently spliced compared to their flanking major introns. Finally, we identified several novel AS events across minor introns in both mouse and human, which were also tissue-dependent. Bioinformatics analysis revealed that several of the AS events could result in the production of novel tissue-specific proteins. Moreover, like the major introns, we found that these AS events were more prevalent in long minor introns, while retention was favoured in shorter introns.

Conclusion

Here we show that minor intron splicing and AS across minor introns is a highly organised process that might be regulated in coordination with the major spliceosome in a tissue-specific manner. We have provided a framework to further study the impact of the minor spliceosome and the regulation of MIG expression. These findings may shed light on the mechanism underlying tissue-specific phenotypes in diseases associated with minor spliceosome inactivation. MIDB can be accessed at https://midb.pnb.uconn.edu .",2019-08-30 +25010047,Visualizing molecular profiles of glioblastoma with GBM-BioDP.,"Validation of clinical biomarkers and response to therapy is a challenging topic in cancer research. An important source of information for virtual validation is the datasets generated from multi-center cancer research projects such as The Cancer Genome Atlas project (TCGA). These data enable investigation of genetic and epigenetic changes responsible for cancer onset and progression, response to cancer therapies, and discovery of the molecular profiles of various cancers. However, these analyses often require bulk download of data and substantial bioinformatics expertise, which can be intimidating for investigators. Here, we report on the development of a new resource available to scientists: a data base called Glioblastoma Bio Discovery Portal (GBM-BioDP). GBM-BioDP is a free web-accessible resource that hosts a subset of the glioblastoma TCGA data and enables an intuitive query and interactive display of the resultant data. This resource provides visualization tools for the exploration of gene, miRNA, and protein expression, differential expression within the subtypes of GBM, and potential associations with clinical outcome, which are useful for virtual biological validation. The tool may also enable generation of hypotheses on how therapies impact GBM molecular profiles, which can help in personalization of treatment for optimal outcome. The resource can be accessed freely at http://gbm-biodp.nci.nih.gov (a tutorial is included).",2014-07-10 +24150938,GEISHA: an evolving gene expression resource for the chicken embryo.,"GEISHA (Gallus Expression In Situ Hybridization Analysis; http://geisha.arizona.edu) is an in situ hybridization gene expression and genomic resource for the chicken embryo. This update describes modifications that enhance its utility to users. During the past 5 years, GEISHA has undertaken a significant restructuring to more closely conform to the data organization and formatting of Model Organism Databases in other species. This has involved migrating from an entry-centric format to one that is gene-centered. Database restructuring has enabled the inclusion of data pertaining to chicken genes and proteins and their orthologs in other species. This new information is presented through an updated user interface. In situ hybridization data in mouse, frog, zebrafish and fruitfly are integrated with chicken genomic and expression information. A resource has also been developed that integrates the GEISHA interface information with the Online Mendelian Inheritance in Man human disease gene database. Finally, the Chicken Gene Nomenclature Committee database and the GEISHA database have been integrated so that they draw from the same data resources.",2013-10-22 +33531087,Psychometric qualities of the English Coping Scales of the Stress and Coping Inventory in a representative UK sample.,"

Background

The Coping Scales of the Stress and Coping Inventory (SCI; Satow in Stress- und Coping-Inventar (SCI): Test- und Skalendokumentation. Stress and coping inventory. http://www.drsatow.de , 2012) are well-established German self-report scales measuring five coping styles: Positive Thinking, Active Coping, Social Support, Support in Faith, and Alcohol and Cigarette Consumption. The purpose of this study was to translate the scales into English and to psychometrically evaluate this English version of the SCI coping scales with a representative sample of the UK population.

Methods

The coping scales of the SCI were forward-backward translated into English and administered to a representative sample according to age, gender, education, and region for the UK (N = 1006). Internal consistencies, factorial validity, and construct validity were assessed for both the original factor structure of the SCI, as well as a newly identified factor structure.

Results

The results for the original factor structure indicated good internal consistency and construct validity. The adaptive coping styles of this version were positively correlated with resilience and negatively with perceived stress. The maladaptive coping strategy, alcohol and cigarette consumption, showed the opposite correlations. The exploratory factor analysis (EFA) of the English version resulted in a five-factor structure, but some items loaded on different factors than in the German version. These new factors were Religious Coping, Social Support, Various Coping, Alcohol and Cigarette Consumption, and Reflective Coping. The novel factors showed similar correlations to resilience and perceived stress as the original factor structure. Only religious coping did not significantly correlate to perceived stress. Confirmatory factor analysis with the original factor structure of the German SCI coping scales revealed poor model fit for the English SCI coping scales.

Conclusion

The English SCI coping scales consistently and accurately measure five different coping styles. Nevertheless, the original factor structure of the SCI coping scales, when applied to an English-speaking sample, did not fit the data well. The new factor structure established by EFA is only preliminary and needs further validation in future large samples using the English version of the SCI coping scales.",2021-02-02 +29026435,HAlign-II: efficient ultra-large multiple sequence alignment and phylogenetic tree reconstruction with distributed and parallel computing.,"

Background

Multiple sequence alignment (MSA) plays a key role in biological sequence analyses, especially in phylogenetic tree construction. Extreme increase in next-generation sequencing results in shortage of efficient ultra-large biological sequence alignment approaches for coping with different sequence types.

Methods

Distributed and parallel computing represents a crucial technique for accelerating ultra-large (e.g. files more than 1 GB) sequence analyses. Based on HAlign and Spark distributed computing system, we implement a highly cost-efficient and time-efficient HAlign-II tool to address ultra-large multiple biological sequence alignment and phylogenetic tree construction.

Results

The experiments in the DNA and protein large scale data sets, which are more than 1GB files, showed that HAlign II could save time and space. It outperformed the current software tools. HAlign-II can efficiently carry out MSA and construct phylogenetic trees with ultra-large numbers of biological sequences. HAlign-II shows extremely high memory efficiency and scales well with increases in computing resource.

Conclusions

THAlign-II provides a user-friendly web server based on our distributed computing infrastructure. HAlign-II with open-source codes and datasets was established at http://lab.malab.cn/soft/halign.",2017-09-29 +28158179,TrypsNetDB: An integrated framework for the functional characterization of trypanosomatid proteins.,"Trypanosomatid parasites cause serious infections in humans and production losses in livestock. Due to the high divergence from other eukaryotes, such as humans and model organisms, the functional roles of many trypanosomatid proteins cannot be predicted by homology-based methods, rendering a significant portion of their proteins as uncharacterized. Recent technological advances have led to the availability of multiple systematic and genome-wide datasets on trypanosomatid parasites that are informative regarding the biological role(s) of their proteins. Here, we report TrypsNetDB (http://trypsNetDB.org), a web-based resource for the functional annotation of 16 different species/strains of trypanosomatid parasites. The database not only visualizes the network context of the queried protein(s) in an intuitive way but also examines the response of the represented network in more than 50 different biological contexts and its enrichment for various biological terms and pathways, protein sequence signatures, and potential RNA regulatory elements. The interactome core of the database, as of Jan 23, 2017, contains 101,187 interactions among 13,395 trypanosomatid proteins inferred from 97 genome-wide and focused studies on the interactome of these organisms.",2017-02-03 +32181856,sRIS: A Small RNA Illustration System for Plant Next-Generation Sequencing Data Analysis.,"Small RNA (sRNA), such as microRNA (miRNA) and short interfering RNA, are well-known to control gene expression based on degradation of target mRNA in plants. A considerable amount of research has applied next-generation sequencing (NGS) to reveal the regulatory pathways of plant sRNAs. Consequently, numerous bioinformatics tools have been developed for the purpose of analyzing sRNA NGS data. However, most methods focus on the study of sRNA expression profiles or novel miRNAs predictions. The analysis of sRNA target genes is usually not integrated into their pipelines. As a result, there is still no means available for identifying the interaction mechanisms between host and virus or the synergistic effects between two viruses. For the present study, a comprehensive system, called the Small RNA Illustration System (sRIS), has been developed. This system contains two main components. The first is for sRNA overview analysis and can be used not only to identify miRNA but also to investigate virus-derived small interfering RNA. The second component is for sRNA target prediction, and it employs both bioinformatics calculations and degradome sequencing data to enhance the accuracy of target prediction. In addition, this system has been designed so that figures and tables for the outputs of each analysis can be easily retrieved and accessed, making it easier for users to quickly identify and quantify their results. sRIS is available at http://sris.itps.ncku.edu.tw/.",2020-06-01 +33282488,Hybrid mesh and voxel based Monte Carlo algorithm for accurate and efficient photon transport modeling in complex bio-tissues.,"Over the past decade, an increasing body of evidence has suggested that three-dimensional (3-D) Monte Carlo (MC) light transport simulations are affected by the inherent limitations and errors of voxel-based domain boundaries. In this work, we specifically address this challenge using a hybrid MC algorithm, namely split-voxel MC or SVMC, that combines both mesh and voxel domain information to greatly improve MC simulation accuracy while remaining highly flexible and efficient in parallel hardware, such as graphics processing units (GPU). We achieve this by applying a marching-cubes algorithm to a pre-segmented domain to extract and encode sub-voxel information of curved surfaces, which is then used to inform ray-tracing computation within boundary voxels. This preservation of curved boundaries in a voxel data structure demonstrates significantly improved accuracy in several benchmarks, including a human brain atlas. The accuracy of the SVMC algorithm is comparable to that of mesh-based MC (MMC), but runs 2x-6x faster and requires only a lightweight preprocessing step. The proposed algorithm has been implemented in our open-source software and is freely available at http://mcx.space.",2020-10-08 +,First Identification of Taylorella equigenitalis From Genital Tracts of Thoroughbred Horses From the Inland Area of South Korea by Multilocus Sequence Typing,"The bacterium, Taylorella equigenitalis, is responsible for the disease contagious equine metritis (CEM), a highly contagious venereal disease of horses. There have been substantial economic losses reported in various equine industries across the world as a result of CEM. So far, there had been no reported cases of T. equigenitalis in the inland area of South Korea. This study was performed to determine the prevalence and the genotype of T. equigenitalis in the inland area of South Korea. In this study, 1 of 38 Thoroughbred horses was found positive for T. equigenitalis using bacterial culture. Multilocus sequence typing (MLST) and construction of a neighbor-joining tree based on the Taylorella spp. MLST database (http://pubmlst.org/taylorella) indicated that the inland South Korean T. equigenitalis strain in this study showed a distinct genotype and no epidemiologic relationship with other regional strains suggesting that the inland South Korean T. equigenitalis strain is unique. In order to prevent serious repercussions to the South Korean equine industry, a full epidemiologic investigation and comprehensive treatment regimen are needed.",2018-01-01 +32620074,TeaCoN: a database of gene co-expression network for tea plant (Camellia sinensis).,"

Background

Tea plant (Camellia sinensis) is one of the world's most important beverage crops due to its numerous secondary metabolites conferring tea quality and health effects. However, only a small fraction of tea genes (especially for those metabolite-related genes) have been functionally characterized to date. A cohesive bioinformatics platform is thus urgently needed to aid in the functional determination of the remaining genes.

Description

TeaCoN, a database of gene co-expression network for tea plant, was established to provide genome-wide associations in gene co-expression to survey gene modules (i.e., co-expressed gene sets) for a function of interest. TeaCoN featured a comprehensive collection of 261 high-quality RNA-Seq experiments that covered a wide range of tea tissues as well as various treatments for tea plant. In the current version of TeaCoN, 31,968 (94% coverage of the genome) tea gene models were documented. Users can retrieve detailed co-expression information for gene(s) of interest in four aspects: 1) co-expressed genes with the corresponding Pearson correlation coefficients (PCC-values) and statistical P-values, 2) gene information (gene ID, description, symbol, alias, chromosomal location, GO and KEGG annotation), 3) expression profile heatmap of co-expressed genes across seven main tea tissues (e.g., leaf, bud, stem, root), and 4) network visualization of co-expressed genes. We also implemented a gene co-expression analysis, BLAST search function, GO and KEGG enrichment analysis, and genome browser to facilitate use of the database.

Conclusion

The TeaCoN project can serve as a beneficial platform for candidate gene screening and functional exploration of important agronomical traits in tea plant. TeaCoN is freely available at http://teacon.wchoda.com .",2020-07-03 +33526470,Gain of HIF1 Activity and Loss of miRNA let-7d Promote Breast Cancer Metastasis to the Brain via the PDGF/PDGFR Axis.,"Early detection and adjuvant therapies have significantly improved survival of patients with breast cancer over the past three decades. In contrast, management of metastatic disease remains unresolved. Brain metastasis is a late complication frequently observed among patients with metastatic breast cancer, whose poor prognosis calls for novel and more effective therapies. Here, we report that active hypoxia inducible factor-1 (HIF1) signaling and loss of the miRNA let-7d concur to promote brain metastasis in a recently established model of spontaneous breast cancer metastasis from the primary site to the brain (4T1-BM2), and additionally in murine and human experimental models of breast cancer brain metastasis (D2A1-BM2 and MDA231-BrM2). Active HIF1 and let-7d loss upregulated expression of platelet-derived growth factor (PDGF) B/A in murine and human brain metastatic cells, respectively, while either individual silencing of HIF1α and PDGF-A/B or let-7d overexpression suppressed brain metastasis formation in the tested models. Let-7d silencing upregulated HIF1α expression and HIF1 activity, indicating a regulatory hierarchy of the system. The clinical relevance of the identified targets was supported by human gene expression data analyses. Treatment of mice with nilotinib, a kinase inhibitor impinging on PDGF receptor (PDGFR) signaling, prevented formation of spontaneous brain metastases in the 4T1-BM2 model and reduced growth of established brain metastases in mouse and human models. These results identify active HIF1 signaling and let-7d loss as coordinated events promoting breast cancer brain metastasis through increased expression of PDGF-A/B. Moreover, they identify PDGFR inhibition as a potentially actionable therapeutic strategy for patients with brain metastatis. SIGNIFICANCE: These findings show that loss of miRNA let-7d and active HIF1 signaling promotes breast cancer brain metastasis via PDGF and that pharmacologic inhibition of PDGFR suppresses brain metastasis, suggesting novel therapeutic opportunities. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/3/594/F1.large.jpg.See related article by Thies et al., p. 606.",2021-02-01 +30052770,MR4Cancer: a web server prioritizing master regulators for cancer.,"

Motivation

During cancer stage transition, a master regulator (MR) refers to the key gene controlling cancer initiation and progression by orchestrating the associated target genes (termed as its regulon). Due to their inherent importance, MRs can serve as critical biomarkers for cancer diagnosis and prognosis, and therapeutic targets. However, it is challenging to infer key MRs that might explain gene expression profile changes between two groups due to lack of context-specific regulons, whose expression level can collectively reflect the activity of likely MRs. There is also a need to design an easy-to-use tool of MR identification for research community.

Results

First, we generated cancer-specific regulons for 26 cancer types by analyzing high-throughput omics data from TCGA, and extracted noncancer-specific regulons from public databases. We subsequently developed a web server MR4Cancer, integrating the regulons with statistical inference to identify and prioritize MRs driving a phenotypic divergence of interest. Based on the input gene list (e.g. differentially expressed genes) or expression profile with two groups, MR4Cancer outputs ranked MRs by enrichment testing against the predefined regulons. Gene Ontology and canonical pathway analyses are also conducted to elucidate the function of likely MRs. Moreover, MR4Cancer provides dynamic network visualization for MR-target relations, and users can interactively interrogate the network to produce new hypotheses and high-quality figures for publication. Finally, the presented case studies highlighted the performance of MR4Cancer. We expect this user-friendly and powerful web tool will provide researchers novel insights into tumorigenesis and therapeutic intervention.

Availability and implementation

http://cis.hku.hk/MR4Cancer.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +25348407,Genome3D: exploiting structure to help users understand their sequences.,"Genome3D (http://www.genome3d.eu) is a collaborative resource that provides predicted domain annotations and structural models for key sequences. Since introducing Genome3D in a previous NAR paper, we have substantially extended and improved the resource. We have annotated representatives from Pfam families to improve coverage of diverse sequences and added a fast sequence search to the website to allow users to find Genome3D-annotated sequences similar to their own. We have improved and extended the Genome3D data, enlarging the source data set from three model organisms to 10, and adding VIVACE, a resource new to Genome3D. We have analysed and updated Genome3D's SCOP/CATH mapping. Finally, we have improved the superposition tools, which now give users a more powerful interface for investigating similarities and differences between structural models.",2014-10-27 +30185806,Machine learning identified an Alzheimer's disease-related FDG-PET pattern which is also expressed in Lewy body dementia and Parkinson's disease dementia.,"Utilizing the publicly available neuroimaging database enabled by Alzheimer's disease Neuroimaging Initiative (ADNI; http://adni.loni.usc.edu/ ), we have compared the performance of automated classification algorithms that differentiate AD vs. normal subjects using Positron Emission Tomography (PET) with fluorodeoxyglucose (FDG). General linear model, scaled subprofile modeling and support vector machines were examined. Among the tested classification methods, support vector machine with Iterative Single Data Algorithm produced the best performance, i.e., sensitivity (0.84) × specificity (0.95), by 10-fold cross-validation. We have applied the same classification algorithm to four different datasets from ADNI, Health Science Centre (Winnipeg, Canada), Dong-A University Hospital (Busan, S. Korea) and Asan Medical Centre (Seoul, S. Korea). Our data analyses confirmed that the support vector machine with Iterative Single Data Algorithm showed the best performance in prediction of future development of AD from the prodromal stage (mild cognitive impairment), and that it was also sensitive to other types of dementia such as Parkinson's Disease Dementia and Dementia with Lewy Bodies, and that perfusion imaging using single photon emission computed tomography may achieve a similar accuracy to that of FDG-PET.",2018-09-05 +33585390,A Synergetic R-Shiny Portal for Modeling and Tracking of COVID-19 Data.,"The purpose of this paper is to introduce a useful online interactive dashboard (https://mahdisalehi.shinyapps.io/Covid19Dashboard/) that visualize and follow confirmed cases of COVID-19 in real-time. The dashboard was made publicly available on 6 April 2020 to illustrate the counts of confirmed cases, deaths, and recoveries of COVID-19 at the level of country or continent. This dashboard is intended as a user-friendly dashboard for researchers as well as the general public to track the COVID-19 pandemic, and is generated from trusted data sources and built in open-source R software (Shiny in particular); ensuring a high sense of transparency and reproducibility. The R Shiny framework serves as a platform for visualization and analysis of the data, as well as an advance to capitalize on existing data curation to support and enable open science. Coded analysis here includes logistic and Gompertz growth models, as two mathematical tools for predicting the future of the COVID-19 pandemic, as well as the Moran's index metric, which gives a spatial perspective via heat maps that may assist in the identification of latent responses and behavioral patterns. This analysis provides real-time statistical application aiming to make sense to academic- and public consumers of the large amount of data that is being accumulated due to the COVID-19 pandemic.",2020-01-01 +30629780,Indigenous Peoples and genomics: Starting a conversation.,"Compared to European ancestral groups, Indigenous Canadians are more likely to have uninterpretable genome-wide sequencing results due to non-representation in reference databases. We began a conversation with Indigenous Canadians to raise awareness and give voice to this issue. We co-created a video explaining genomic non-representation that included diverse Indigenous view-points. We audio-recorded the focus groups including 30 First Nations, Métis, and Inuit individuals living in Greater Vancouver. After watching an introductory video explaining genomic testing, participants discussed issues surrounding collecting Indigenous genomic data, its control, and usage. Transcripts were analyzed, and participants' quotes representing main themes were incorporated into the introductory video. Indigenous participants discussed data interpretation and gave approval for quote usage. The 20 participants who provided feedback concurred with the thematic interpretation: Systemic racism interlaced most conversations, particularly within the theme of trust. Themes of governance emphasized privacy and fear of discrimination. Some participants thought a separate, Indigenous-controlled database was essential; others recognized advantages of international databases. The theme of implementation included creative ideas to collect Indigenous genomes, but prior approval from Indigenous leaders was emphasized. The final video (https://youtu.be/-wivIBDjoi8) was shared with participants to use as they wish to promote awareness and ongoing discussion of genomic diagnostic inequity.",2018-12-14 +33244798,Comparing the motivational value of rewards and losses in an EEG-pupillometry study.,"We found earlier that performance-contingent rewards lead to faster performance than equivalent losses [Carsten, Hoofs, Boehler, & Krebs, 2019. Motivation Science, 5(3). http://dx.doi.org/10.1037/mot0000117]. Here, we further tested the hypothesis that motivation to gain rewards is higher than to avoid losses, even when incentive values are matched. As implicit markers of motivation, we assessed electroencephalography (EEG) focusing on the P3 after target and feedback onset, and the Feedback-Related Negativity (FRN), as well as simultaneously recorded pupil size. Comparing only reward and loss prospect trials in Experiment 1, we found no consistent differences in behavior and electrophysiological markers of motivation, although pupil data suggested higher arousal after feedback in potential-loss trials. Including additional no-incentive trials in Experiment 2, we found consistent evidence that motivation to gain rewards was higher than to avoid losses: In line with behavior, the target-P3 was most pronounced for reward-related stimuli, followed by loss and no-incentive ones. This same ranking was found in the P3 and the FRN after positive outcomes (i.e., reward, avoided loss, and correct feedback in no-incentive trials). Negative outcomes featured a different pattern in line with the pupil response, which suggests that losses are emotionally salient events, without invigorating behavior proportionally. In sum, these findings suggest that the motivation to gain rewards is more pronounced than motivation to avoid equivalent losses, at least in tasks promoting transient increases in attention triggered by incentive prospect. These motivational differences may arise as avoided losses are not profitable in the long term, in contrast to gained rewards.",2020-12-09 +32265289,Using Routinely Gathered Clinical Data to Develop a Prognostic Online Tool for Decannulation in Subjects With Acquired Brain Injury.,"

Background

Clinicians are often required to provide a qualified guess on the probability of decannulation in estimating patients' rehabilitation potential and relaying information about prognosis to patients and next of kin. The objective of this study was to use routinely gathered clinical data to develop a prognostic model of time to decannulation in subjects with acquired brain injury, for direct implementation in clinical practice.

Methods

Data from a large cohort including 574 tracheostomized subjects admitted for neurorehabilitation were analyzed using discrete time-to-event analysis with logit-link. Within this model, a reference hazard function was modeled using restricted cubic splines, and estimates were presented using odds ratios (95% CIs).

Results

A total of 411 subjects (72%) were decannulated within a median of 27 d (interquartile range 16-49) at the rehabilitation hospital. The prognostic model for decannulation included age, diagnosis, days from injury until admission for rehabilitation, swallowing, and overall functional level measured with the Early Functional Abilities score. Among these, the strongest predictors for decannulation were age and a combination of overall functional abilities combined with swallowing ability.

Conclusions

A prognostic model for decannulation was developed using routinely gathered clinical data. Based on the model, an online graphical user interface was applied, in which the probability of decannulation within x days is calculated along with the statistical uncertainty of the probability. Furthermore, a layman's interpretation is provided. The online tool was directly implemented in clinical practice at the rehabilitation hospital, and is available through this link: (http://www.hospitalsenhedmidt.dk/regionshospitalet-hammel/research-unit/Prognosissoftware/).",2020-04-07 +33408118,A Spatial and Functional Interaction of a Heterotetramer Survivin-DNA-PKcs Complex in DNA Damage Response.,"Substantial evidence has shown that overexpression of the inhibitor of apoptosis protein (IAP) survivin in human tumors correlates significantly with treatment resistance and poor patient prognosis. Survivin serves as a radiation resistance factor that impacts the DNA damage response by interacting with DNA-dependent protein kinase (DNA-PKcs). However, the complexity, molecular determinants, and functional consequences of this interrelationship remain largely unknown. By applying coimmunoprecipitation and flow cytometry-based Förster resonance energy transfer assays, we demonstrated a direct involvement of the survivin baculovirus IAP repeat domain in the regulation of radiation survival and DNA repair. This survivin-mediated activity required an interaction of residues S20 and W67 with the phosphoinositide 3-kinase (PI3K) domain of DNA-PKcs. In silico molecular docking and dynamics simulation analyses, in vitro kinase assays, and large-scale mass spectrometry suggested a heterotetrameric survivin-DNA-PKcs complex that results in a conformational change within the DNA-PKcs PI3K domain. Overexpression of survivin resulted in enhanced PI3K enzymatic activity and detection of differentially abundant phosphopeptides and proteins implicated in the DNA damage response. The survivin-DNA-PKcs interaction altered the S/T-hydrophobic motif substrate specificity of DNA-PKcs with a predominant usage of S/T-P phosphorylation sites and an increase of DNA-PKcs substrates including Foxo3. These data demonstrate that survivin differentially regulates DNA-PKcs-dependent radiation survival and DNA double-strand break repair via formation of a survivin-DNA-PKcs heterotetrameric complex. SIGNIFICANCE: These findings provide insight into survivin-mediated regulation of DNA-PKcs kinase and broaden our knowledge of the impact of survivin in modulating the cellular radiation response.See related commentary by Iliakis, p. 2270 GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/9/2304/F1.large.jpg.",2021-01-06 +30578913,TICA: Transcriptional Interaction and Coregulation Analyzer.,"Transcriptional regulation is critical to cellular processes of all organisms. Regulatory mechanisms often involve more than one transcription factor (TF) from different families, binding together and attaching to the DNA as a single complex. However, only a fraction of the regulatory partners of each TF is currently known. In this paper, we present the Transcriptional Interaction and Coregulation Analyzer (TICA), a novel methodology for predicting heterotypic physical interaction of TFs. TICA employs a data-driven approach to infer interaction phenomena from chromatin immunoprecipitation and sequencing (ChIP-seq) data. Its prediction rules are based on the distribution of minimal distance couples of paired binding sites belonging to different TFs which are located closest to each other in promoter regions. Notably, TICA uses only binding site information from input ChIP-seq experiments, bypassing the need to do motif calling on sequencing data. We present our method and test it on ENCODE ChIP-seq datasets, using three cell lines as reference including HepG2, GM12878, and K562. TICA positive predictions on ENCODE ChIP-seq data are strongly enriched when compared to protein complex (CORUM) and functional interaction (BioGRID) databases. We also compare TICA against both motif/ChIP-seq based methods for physical TF-TF interaction prediction and published literature. Based on our results, TICA offers significant specificity (average 0.902) while maintaining a good recall (average 0.284) with respect to CORUM, providing a novel technique for fast analysis of regulatory effect in cell lines. Furthermore, predictions by TICA are complementary to other methods for TF-TF interaction prediction (in particular, TACO and CENTDIST). Thus, combined application of these prediction tools results in much improved sensitivity in detecting TF-TF interactions compared to TICA alone (sensitivity of 0.526 when combining TICA with TACO and 0.585 when combining with CENTDIST) with little compromise in specificity (specificity 0.760 when combining with TACO and 0.643 with CENTDIST). TICA is publicly available at http://geco.deib.polimi.it/tica/.",2018-10-01 +32702842,Treatment outcomes in children with Acute lymphoblastic leukemia with versus without coexisting Down's syndrome: A systematic review and meta-analysis.,"

Background

Down syndrome (DS) also known as Trisomy 21, is a chromosomal disorder affecting approximately 1 in 732newborns annually in the United States. Children with DS are more likely to develop acute lymphoblastic leukemia (ALL). For the management of pediatric ALL, different treatment protocols have been set up since years. However, ALL children with coexisting DS have shown to have increased therapy-related toxicities compared to those without DS. Therefore, in this study, we aimed to systematically analyze the treatment outcomes in acute ALL children with versus without coexisting DS.

Methods

Electronic databases including the Web of Science, EMBASE, Cochrane Central, MEDLINE, http://www.ClinicalTrials.gov, and Google scholar were searched for publications reporting treatment related outcomes in ALL children with versus without co-existing DS. Several treatment protocols were used accordingly. This study had a long-term follow-up time period ranging from 5 to 10 years. The RevMan 5.3 software was used to carry out this analysis. Odds ratios (OR) with 95% confidence intervals (CI) were used to represent the results post analysis.

Results

A total number of 31,476 children with ALL enrolled between the years 1981 and 2011 were included. Among the total number of children with ALL, 1303 had coexisting DS. Our results showed that event-free survival was similar in ALL children with versus without DS (odds ratio [OR]: 1.34, 95% confidence interval [CI]: 0.51-3.50; P = .55). Overall mortality (OR: 1.63, 95% CI: 0.86-3.10; P = .13) and participants who achieved clinical remission (OR: 1.04, 95% CI: 0.12-9.29; P = .97) were also similarly manifested. However, treatment-related mortality (OR: 4.29, 95% CI: 2.90-6.36; P = .00001) and induction failure (OR: 2.77, 95% CI: 1.08-7.07; P = .03) were significantly higher in the DS group. Also, total (OR: 1.38, 95% CI: 1.02-1.88; P = .04) and bone marrow relapses (OR: 1.29, 95% CI: 1.00-1.67; P = .05) were significantly higher in ALL children with DS. Nevertheless, central nervous system relapse (OR: 1.15, 95% CI: 0.60-2.20; P = .67), testicular relapse (OR: 0.84, 95% CI: 0.38-1.85; P = .87), and other relapses (OR: 1.12, 95% CI: 0.27-4.62; P = .88) were not significantly different when these outcomes were separately analyzed.

Conclusion

Based on this analysis of the treatment outcomes in ALL children with versus without DS, event-free survival, overall mortality, and patients who achieved clinical remission were similar during this long-term follow-up time period. However, due to the significantly higher treatment-related mortality, induction failure, and certain relapses in ALL children with DS, new guidelines might have to focus on reconsidering or modifying treatment regimens for ALL children with DS.",2020-07-01 +32374845,The Quest for Orthologs benchmark service and consensus calls in 2020.,"The identification of orthologs-genes in different species which descended from the same gene in their last common ancestor-is a prerequisite for many analyses in comparative genomics and molecular evolution. Numerous algorithms and resources have been conceived to address this problem, but benchmarking and interpreting them is fraught with difficulties (need to compare them on a common input dataset, absence of ground truth, computational cost of calling orthologs). To address this, the Quest for Orthologs consortium maintains a reference set of proteomes and provides a web server for continuous orthology benchmarking (http://orthology.benchmarkservice.org). Furthermore, consensus ortholog calls derived from public benchmark submissions are provided on the Alliance of Genome Resources website, the joint portal of NIH-funded model organism databases.",2020-07-01 +32657384,Boosting the accuracy of protein secondary structure prediction through nearest neighbor search and method hybridization.,"

Motivation

Protein secondary structure prediction is a fundamental precursor to many bioinformatics tasks. Nearly all state-of-the-art tools when computing their secondary structure prediction do not explicitly leverage the vast number of proteins whose structure is known. Leveraging this additional information in a so-called template-based method has the potential to significantly boost prediction accuracy.

Method

We present a new hybrid approach to secondary structure prediction that gains the advantages of both template- and non-template-based methods. Our core template-based method is an algorithmic approach that uses metric-space nearest neighbor search over a template database of fixed-length amino acid words to determine estimated class-membership probabilities for each residue in the protein. These probabilities are then input to a dynamic programming algorithm that finds a physically valid maximum-likelihood prediction for the entire protein. Our hybrid approach exploits a novel accuracy estimator for our core method, which estimates the unknown true accuracy of its prediction, to discern when to switch between template- and non-template-based methods.

Results

On challenging CASP benchmarks, the resulting hybrid approach boosts the state-of-the-art Q8 accuracy by more than 2-10%, and Q3 accuracy by more than 1-3%, yielding the most accurate method currently available for both 3- and 8-state secondary structure prediction.

Availability and implementation

A preliminary implementation in a new tool we call Nnessy is available free for non-commercial use at http://nnessy.cs.arizona.edu.",2020-07-01 +32611314,Glutantβase: a database for improving the rational design of glucose-tolerant β-glucosidases.,"Β-glucosidases are key enzymes used in second-generation biofuel production. They act in the last step of the lignocellulose saccharification, converting cellobiose in glucose. However, most of the β-glucosidases are inhibited by high glucose concentrations, which turns it a limiting step for industrial production. Thus, β-glucosidases have been targeted by several studies aiming to understand the mechanism of glucose tolerance, pH and thermal resistance for constructing more efficient enzymes. In this paper, we present a database of β-glucosidase structures, called Glutantβase. Our database includes 3842 GH1 β-glucosidase sequences collected from UniProt. We modeled the sequences by comparison and predicted important features in the 3D-structure of each enzyme. Glutantβase provides information about catalytic and conserved amino acids, residues of the coevolution network, protein secondary structure, and residues located in the channel that guides to the active site. We also analyzed the impact of beneficial mutations reported in the literature, predicted in analogous positions, for similar enzymes. We suggested these mutations based on six previously described mutants that showed high catalytic activity, glucose tolerance, or thermostability (A404V, E96K, H184F, H228T, L441F, and V174C). Then, we used molecular docking to verify the impact of the suggested mutations in the affinity of protein and ligands (substrate and product). Our results suggest that only mutations based on the H228T mutant can reduce the affinity for glucose (product) and increase affinity for cellobiose (substrate), which indicates an increment in the resistance to product inhibition and agrees with computational and experimental results previously reported in the literature. More resistant β-glucosidases are essential to saccharification in industrial applications. However, thermostable and glucose-tolerant β-glucosidases are rare, and their glucose tolerance mechanisms appear to be related to multiple and complex factors. We gather here, a set of information, and made predictions aiming to provide a tool for supporting the rational design of more efficient β-glucosidases. We hope that Glutantβase can help improve second-generation biofuel production. Glutantβase is available at http://bioinfo.dcc.ufmg.br/glutantbase .",2020-07-01 +32427333,webPSN v2.0: a webserver to infer fingerprints of structural communication in biomacromolecules.,"A mixed Protein Structure Network (PSN) and Elastic Network Model-Normal Mode Analysis (ENM-NMA)-based strategy (i.e. PSN-ENM) was developed to investigate structural communication in bio-macromolecules. Protein Structure Graphs (PSGs) are computed on a single structure, whereas information on system dynamics is supplied by ENM-NMA. The approach was implemented in a webserver (webPSN), which was significantly updated herein. The webserver now handles both proteins and nucleic acids and relies on an internal upgradable database of network parameters for ions and small molecules in all PDB structures. Apart from the radical restyle of the server and some changes in the calculation setup, other major novelties concern the possibility to: a) compute the differences in nodes, links, and communication pathways between two structures (i.e. network difference) and b) infer links, hubs, communities, and metapaths from consensus networks computed on a number of structures. These new features are useful to identify commonalties and differences between two different functional states of the same system or structural-communication signatures in homologous or analogous systems. The output analysis relies on 3D-representations, interactive tables and graphs, also available for download. Speed and accuracy make this server suitable to comparatively investigate structural communication in large sets of bio-macromolecular systems. URL: http://webpsn.hpc.unimore.it.",2020-07-01 +32313959,Zebra2: advanced and easy-to-use web-server for bioinformatic analysis of subfamily-specific and conserved positions in diverse protein superfamilies.,"Zebra2 is a highly automated web-tool to search for subfamily-specific and conserved positions (i.e. the determinants of functional diversity as well as the key catalytic and structural residues) in protein superfamilies. The bioinformatic analysis is facilitated by Mustguseal-a companion web-server to automatically collect and superimpose a large representative set of functionally diverse homologs with high structure similarity but low sequence identity to the selected query protein. The results are automatically prioritized and provided at four information levels to facilitate the knowledge-driven expert selection of the most promising positions on-line: as a sequence similarity network; interfaces to sequence-based and 3D-structure-based analysis of conservation and variability; and accompanied by the detailed annotation of proteins accumulated from the integrated databases with links to the external resources. The integration of Zebra2 and Mustguseal web-tools provides the first of its kind out-of-the-box open-access solution to conduct a systematic analysis of evolutionarily related proteins implementing different functions within a shared 3D-structure of the superfamily, determine common and specific patterns of function-associated local structural elements, assist to select hot-spots for rational design and to prepare focused libraries for directed evolution. The web-servers are free and open to all users at https://biokinet.belozersky.msu.ru/zebra2, no login required.",2020-07-01 +32221613,A phylogenetic C interpreter for TNT.,"

Motivation

TNT (a widely used program for phylogenetic analysis) includes an interpreter for a scripting language, but that implementation is nonstandard and uses several conventions of its own. This article describes the implementation and basic usage of a C interpreter (with all the ISO essentials) now included in TNT. A phylogenetic library includes functions that can be used for manipulating trees and data, as well as other phylogeny-specific tasks. This greatly extends the capabilities of TNT.

Availability and implementation

Versions of TNT including the C interpreter for scripts can be downloaded from http://www.lillo.org.ar/phylogeny/tnt/.",2020-07-01 +32469061,FATCAT 2.0: towards a better understanding of the structural diversity of proteins.,"FATCAT 2.0 server (http://fatcat.godziklab.org/), provides access to a flexible protein structure alignment algorithm developed in our group. In such an alignment, rotations and translations between elements in the structure are allowed to minimize the overall root mean square deviation (RMSD) between the compared structures. This allows to effectively compare protein structures even if they underwent structural rearrangements in different functional forms, different crystallization conditions or as a result of mutations. The major update for the server introduces a new graphical interface, much faster database searches and several new options for visualization of the structural differences between proteins.",2020-07-01 +32324215,TFmotifView: a webserver for the visualization of transcription factor motifs in genomic regions.,"Transcription factors (TFs) regulate the expression of gene expression. The binding specificities of many TFs have been deciphered and summarized as position-weight matrices, also called TF motifs. Despite the availability of hundreds of known TF motifs in databases, it remains non-trivial to quickly query and visualize the enrichment of known TF motifs in genomic regions of interest. Towards this goal, we developed TFmotifView, a web server that allows to study the distribution of known TF motifs in genomic regions. Based on input genomic regions and selected TF motifs, TFmotifView performs an overlap of the genomic regions with TF motif occurrences identified using a dynamic P-value threshold. TFmotifView generates three different outputs: (i) an enrichment table and scatterplot calculating the significance of TF motif occurrences in genomic regions compared to control regions, (ii) a genomic view of the organisation of TF motifs in each genomic region and (iii) a metaplot summarizing the position of TF motifs relative to the center of the regions. TFmotifView will contribute to the integration of TF motif information with a wide range of genomic datasets towards the goal to better understand the regulation of gene expression by transcription factors. TFmotifView is freely available at http://bardet.u-strasbg.fr/tfmotifview/.",2020-07-01 +32421835,InterPred: a webtool to predict chemical autofluorescence and luminescence interference.,"High-throughput screening (HTS) research programs for drug development or chemical hazard assessment are designed to screen thousands of molecules across hundreds of biological targets or pathways. Most HTS platforms use fluorescence and luminescence technologies, representing more than 70% of the assays in the US Tox21 research consortium. These technologies are subject to interferent signals largely explained by chemicals interacting with light spectrum. This phenomenon results in up to 5-10% of false positive results, depending on the chemical library used. Here, we present the InterPred webserver (version 1.0), a platform to predict such interference chemicals based on the first large-scale chemical screening effort to directly characterize chemical-assay interference, using assays in the Tox21 portfolio specifically designed to measure autofluorescence and luciferase inhibition. InterPred combines 17 quantitative structure activity relationship (QSAR) models built using optimized machine learning techniques and allows users to predict the probability that a new chemical will interfere with different combinations of cellular and technology conditions. InterPred models have been applied to the entire Distributed Structure-Searchable Toxicity (DSSTox) Database (∼800,000 chemicals). The InterPred webserver is available at https://sandbox.ntp.niehs.nih.gov/interferences/.",2020-07-01 +31778088,Utilizing wavelet deep learning network to classify different states of task-fMRI for verifying activation regions.,"Purpose: We propose a convolutional neural network (CNN) based on wavelet for verifying the activation regions decided with statistical analysis. Because the functional magnetic resonance imaging (fMRI) data contains lots of noises, it is difficult to get the data of blood-oxygen-level dependent (BOLD) signal directly for intervention testing like animal studies. So it is difficult to effectively verify these activation regions. Based on the rapid development of deep learning technology. Materials and methods: We select the task fMRI data of presenting food and nonfood pictures to volunteer subjects from open public data, whose website is https://www.openfmri.org/dataset/ds000157/. Firstly, the brain activation regions are obtained by utilizing the method of statistical analysis. Then the spatial coordinates are acquired from the activation regions by checking the atlas table. The P-value of the activation regions are less 0.05. The activation regions are the most responsive to perceive the differences of BOLD in the brain between the two states, presenting food and nonfood pictures. We select the part task fMRI data of from the activation regions, for preparing the training and validation samples. Then we design a deep leaning network based on wavelet to classify the task fMRI data between food and nonfood.Results and conclusions: The classification accuracy is 80.23%. However, when we select the spatial coordinates of other inactivation regions, the classification accuracy is only 60%. The differences of classification accuracy between the activation regions and the inactivation regions prove that the activation regions selected with statistical analysis method are accurate and effective. The two methods of deep learning and statistical analysis can be cross-validated for the study of human being brain.",2019-12-04 +25388145,GlycoRDF: an ontology to standardize glycomics data in RDF.,"

Motivation

Over the last decades several glycomics-based bioinformatics resources and databases have been created and released to the public. Unfortunately, there is no common standard in the representation of the stored information or a common machine-readable interface allowing bioinformatics groups to easily extract and cross-reference the stored information.

Results

An international group of bioinformatics experts in the field of glycomics have worked together to create a standard Resource Description Framework (RDF) representation for glycomics data, focused on glycan sequences and related biological source, publications and experimental data. This RDF standard is defined by the GlycoRDF ontology and will be used by database providers to generate common machine-readable exports of the data stored in their databases.

Availability and implementation

The ontology, supporting documentation and source code used by database providers to generate standardized RDF are available online (http://www.glycoinfo.org/GlycoRDF/).",2014-11-11 +34223056,Antibio'Malin: an e-health resource to raise awareness of antibiotic stewardship and resistance in France.,"

Objectives

To develop a nationwide French website with reliable, practical and public-oriented information on antibiotic stewardship and resistance.

Methods

The design and evaluation were based on the following process: (i) development of a pilot website by a multidisciplinary group; (ii) evaluation phase, using mixed methods and involving health professionals (GPs and community pharmacists) and the general population; and (iii) launch of a final version of the website with 6 month follow-up usage statistics.

Results

The Antibio'Malin website (https://sante.fr/antibiomalin), supported by the French Ministry of Health, contains practical information for the general population on antibiotics marketed in the outpatient setting and on the most common infections, with an antibiotic stewardship perspective. A 'For further information' section provides details on various concepts, such as antibiotic resistance. As part of the evaluation, 8 general practitioners and 5 community pharmacists were individually interviewed, 46 health system users replied to an online questionnaire and 5 focus groups were conducted (17 participants). In addition, more than 100 people (professionals and general population) provided feedback directly on the website. The website was well received by health professionals, particularly general practitioners, and described as a reference site for patients and a communication tool. The general population also found the site useful. Several comments helped improve the website before the launch of the final version on 18 November 2019. At 6 month follow-up, more than 25 000 persons had visited the website.

Conclusions

The Antibio'Malin information website was developed and tested. Post-launch data suggest a useful addition to the multifaceted French national antibiotic stewardship strategy.",2020-12-08 +28827280,Systematic and Quantitative Assessment of Hydrogen Peroxide Reactivity With Cysteines Across Human Proteomes.,"Protein cysteinyl residues are the mediators of hydrogen peroxide (H2O2)-dependent redox signaling. However, site-specific mapping of the selectivity and dynamics of these redox reactions in cells poses a major analytical challenge. Here we describe a chemoproteomic platform to systematically and quantitatively analyze the reactivity of thousands of cysteines toward H2O2 in human cells. We identified >900 H2O2-sensitive cysteines, which are defined as the H2O2-dependent redoxome. Although redox sites associated with antioxidative and metabolic functions are consistent, most of the H2O2-dependent redoxome varies dramatically between different cells. Structural analyses reveal that H2O2-sensitive cysteines are less conserved than their redox-insensitive counterparts and display distinct sequence motifs, structural features, and potential for crosstalk with lysine modifications. Notably, our chemoproteomic platform also provides an opportunity to predict oxidation-triggered protein conformational changes. The data are freely accessible as a resource at http://redox.ncpsb.org/OXID/.",2017-08-21 +31168616,A dynamic programing approach to integrate gene expression data and network information for pathway model generation.,"

Motivation

As large amounts of biological data continue to be rapidly generated, a major focus of bioinformatics research has been aimed toward integrating these data to identify active pathways or modules under certain experimental conditions or phenotypes. Although biologically significant modules can often be detected globally by many existing methods, it is often hard to interpret or make use of the results toward pathway model generation and testing.

Results

To address this gap, we have developed the IMPRes algorithm, a new step-wise active pathway detection method using a dynamic programing approach. IMPRes takes advantage of the existing pathway interaction knowledge in Kyoto Encyclopedia of Genes and Genomes. Omics data are then used to assign penalties to genes, interactions and pathways. Finally, starting from one or multiple seed genes, a shortest path algorithm is applied to detect downstream pathways that best explain the gene expression data. Since dynamic programing enables the detection one step at a time, it is easy for researchers to trace the pathways, which may lead to more accurate drug design and more effective treatment strategies. The evaluation experiments conducted on three yeast datasets have shown that IMPRes can achieve competitive or better performance than other state-of-the-art methods. Furthermore, a case study on human lung cancer dataset was performed and we provided several insights on genes and mechanisms involved in lung cancer, which had not been discovered before.

Availability and implementation

IMPRes visualization tool is available via web server at http://digbio.missouri.edu/impres.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +27252399,Integration of new alternative reference strain genome sequences into the Saccharomyces genome database. ,"The Saccharomyces Genome Database (SGD; http://www.yeastgenome.org/) is the authoritative community resource for the Saccharomyces cerevisiae reference genome sequence and its annotation. To provide a wider scope of genetic and phenotypic variation in yeast, the genome sequences and their corresponding annotations from 11 alternative S. cerevisiae reference strains have been integrated into SGD. Genomic and protein sequence information for genes from these strains are now available on the Sequence and Protein tab of the corresponding Locus Summary pages. We illustrate how these genome sequences can be utilized to aid our understanding of strain-specific functional and phenotypic differences.Database URL: www.yeastgenome.org.",2016-06-01 +32433953,Single-Cell RNA Sequencing Reveals a Dynamic Stromal Niche That Supports Tumor Growth.,"Here, using single-cell RNA sequencing, we examine the stromal compartment in murine melanoma and draining lymph nodes (LNs) at points across tumor development, providing data at http://www.teichlab.org/data/. Naive lymphocytes from LNs undergo activation and clonal expansion within the tumor, before PD1 and Lag3 expression, while tumor-associated myeloid cells promote the formation of a suppressive niche. We identify three temporally distinct stromal populations displaying unique functional signatures, conserved across mouse and human tumors. Whereas ""immune"" stromal cells are observed in early tumors, ""contractile"" cells become more prevalent at later time points. Complement component C3 is specifically expressed in the immune population. Its cleavage product C3a supports the recruitment of C3aR+ macrophages, and perturbation of C3a and C3aR disrupts immune infiltration, slowing tumor growth. Our results highlight the power of scRNA-seq to identify complex interplays and increase stromal diversity as a tumor develops, revealing that stromal cells acquire the capacity to modulate immune landscapes from early disease.",2020-05-01 +,Climate warming effects on grape and grapevine moth (Lobesia botrana) in the Palearctic region,"The grapevine moth Lobesia botrana (Den. & Schiff.) (Lepidoptera: Tortricidae) is the principal native pest of grape in the Palearctic region. In the present study, we assessed prospectively the relative abundance of the moth in Europe and the Mediterranean Basin using linked physiologically‐based demographic models for grape and L. botrana. The model includes the effects of temperature, day‐length and fruit stage on moth development rates, survival and fecundity. Daily weather data for 1980–2010 were used to simulate the dynamics of grapevine and L. botrana in 4506 lattice cells across the region. Average grape yield and pupae per vine were used as metrics of favourability. The results were mapped using the grass Geographic Information System (http://grass.osgeo.org). The model predicts a wide distribution for L. botrana with highest populations in warmer regions in a wide band along latitude 40°N. The effects of climate warming on grapevine and L. botrana were explored using regional climate model projections based on the A1B scenario of an average +1.8 °C warming during the period 2040–2050 compared with the base period (1960–1970). Under climate change, grape yields increase northwards and with a higher elevation but decrease in hotter areas. Similarly, L. botrana levels increase in northern areas but decrease in the hot areas where summer temperatures approach its upper thermal limit.",2018-05-01 +31792435,Orchestrating single-cell analysis with Bioconductor.,"Recent technological advancements have enabled the profiling of a large number of genome-wide features in individual cells. However, single-cell data present unique challenges that require the development of specialized methods and software infrastructure to successfully derive biological insights. The Bioconductor project has rapidly grown to meet these demands, hosting community-developed open-source software distributed as R packages. Featuring state-of-the-art computational methods, standardized data infrastructure and interactive data visualization tools, we present an overview and online book (https://osca.bioconductor.org) of single-cell methods for prospective users.",2019-12-02 +24304891,Virus Variation Resource--recent updates and future directions.,"Virus Variation (http://www.ncbi.nlm.nih.gov/genomes/VirusVariation/) is a comprehensive, web-based resource designed to support the retrieval and display of large virus sequence datasets. The resource includes a value added database, a specialized search interface and a suite of sequence data displays. Virus-specific sequence annotation and database loading pipelines produce consistent protein and gene annotation and capture sequence descriptors from sequence records then map these metadata to a controlled vocabulary. The database supports a metadata driven, web-based search interface where sequences can be selected using a variety of biological and clinical criteria. Retrieved sequences can then be downloaded in a variety of formats or analyzed using a suite of tools and displays. Over the past 2 years, the pre-existing influenza and Dengue virus resources have been combined into a single construct and West Nile virus added to the resultant resource. A number of improvements were incorporated into the sequence annotation and database loading pipelines, and the virus-specific search interfaces were updated to support more advanced functions. Several new features have also been added to the sequence download options, and a new multiple sequence alignment viewer has been incorporated into the resource tool set. Together these enhancements should support enhanced usability and the inclusion of new viruses in the future.",2013-12-04 +26425990,"Mitochondrial capture enriches mito-DNA 100 fold, enabling PCR-free mitogenomics biodiversity analysis.","Biodiversity analyses based on next-generation sequencing (NGS) platforms have developed by leaps and bounds in recent years. A PCR-free strategy, which can alleviate taxonomic bias, was considered as a promising approach to delivering reliable species compositions of targeted environments. The major impediment of such a method is the lack of appropriate mitochondrial DNA enrichment ways. Because mitochondrial genomes (mitogenomes) make up only a small proportion of total DNA, PCR-free methods will inevitably result in a huge excess of data (>99%). Furthermore, the massive volume of sequence data is highly demanding on computing resources. Here, we present a mitogenome enrichment pipeline via a gene capture chip that was designed by virtue of the mitogenome sequences of the 1000 Insect Transcriptome Evolution project (1KITE, www.1kite.org). A mock sample containing 49 species was used to evaluate the efficiency of the mitogenome capture method. We demonstrate that the proportion of mitochondrial DNA can be increased by approximately 100-fold (from the original 0.47% to 42.52%). Variation in phylogenetic distances of target taxa to the probe set could in principle result in bias in abundance. However, the frequencies of input taxa were largely maintained after capture (R(2) = 0.81). We suggest that our mitogenome capture approach coupled with PCR-free shotgun sequencing could provide ecological researchers an efficient NGS method to deliver reliable biodiversity assessment.",2015-10-21 +29868863,ProAcePred: prokaryote lysine acetylation sites prediction based on elastic net feature optimization.,"

Motivation

Lysine acetylation exists extensively in prokaryotes, and plays a vital role in function adjustment. Recent progresses in the identification of prokaryote acetylation substrates and sites provide a great opportunity to explore the difference of substrate site specificity between prokaryotic and eukaryotic acetylation. Motif analysis suggests that prokaryotic and eukaryotic acetylation sites have distinct location-specific difference, and it is necessary to develop a prokaryote-specific acetylation sites prediction tool.

Results

Therefore, we collected nine species of prokaryote lysine acetylation data from various databases and literature, and developed a novel online tool named ProAcePred for predicting prokaryote lysine acetylation sites. Optimization of feature vectors via elastic net could considerably improve the prediction performance. Feature analyses demonstrated that evolutionary information played significant roles in prediction model for prokaryote acetylation. Comparison between our method and other tools suggested that our species-specific prediction outperformed other existing works. We expect that the ProAcePred could provide more instructive help for further experimental investigation of prokaryotes acetylation.

Availability and implementation

http://computbiol.ncu.edu.cn/ProAcePred.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +33224132,Integrated Co-functional Network Analysis on the Resistance and Virulence Features in Acinetobacter baumannii.,"Acinetobacter baumannii is one of the most troublesome bacterial pathogens that pose major public health threats due to its rapidly increasing drug resistance property. It is not only derived from clinic setting but also emerges from aquaculture as a fish pathogen, which could pass the resistant genes in the food chain. Understanding the mechanism of antibiotic resistance development and pathogenesis will aid our battle with the infections caused by A. baumannii. In this study, we constructed a co-functional network by integrating multiple sources of data from A. baumannii and then used the k-shell decomposition to analyze the co-functional network. We found that genes involving in basic cellular physiological function, including genes for antibiotic resistance, tended to have high k-shell values and locate in the internal layer of our network. In contrast, the non-essential genes, such as genes associated with virulence, tended to have lower k-shell values and locate in the external layer. This finding allows us to fish out the potential antibiotic resistance factors and virulence factors. In addition, we constructed an online platform ABviresDB (https://acba.shinyapps.io/ABviresDB/) for visualization of the network and features of each gene in A. baumannii. The network analysis in this study will not only aid the study on A. baumannii but also could be referenced for the research of antibiotic resistance and pathogenesis in other bacteria.",2020-11-02 +33580705,COVID-19 Inmate Risk Appraisal (CIRA): development and validation of a screening tool to assess COVID-19 vulnerability in prisons.,"

Objectives

To develop and validate a screening tool designed to identify detained people at increased risk for COVID-19 mortality, the COVID-19 Inmate Risk Appraisal (CIRA).

Design

Cross-sectional study with a representative sample (development) and a case-control sample (validation).

Setting

The two largest Swiss prisons.

Participants

(1) Development sample: all male persons detained in Pöschwies, Zurich (n = 365); (2) Validation sample: case-control sample of male persons detained in Champ-Dollon, Geneva (n = 192, matching 1:3 for participants at risk for severe course of COVID-19 and participants without risk factors).

Main outcome measures

The CIRA combined seven risk factors identified by the World Health Organization and the Swiss Federal Office of Public Health as predictive of severe COVID-19 to derive an absolute risk increase in mortality rate: Age ≥60 years, cardiovascular disease, diabetes, hypertension, chronic respiratory disease, immunodeficiency and cancer.

Results

Based on the development sample, we proposed a three-level classification: average (<3.7), elevated (3.7-5.7) and high (>5.7) risk. In the validation sample, the CIRA identified all individuals identified as vulnerable by national recommendations (having at least one risk factor). The category “elevated risk” maximised sensitivity (1) and specificity (0.97). The CIRA had even higher capacity in discriminating individuals vulnerable according to clinical evaluation (a four-level risk categorisation based on a consensus of medical staff). The category “elevated risk” maximised sensitivity and specificity (both 1). When considering the individuals classified as extremely high risk by medical staff, the category “high risk” had a high discriminatory capacity (sensitivity =0.89, specificity =0.97).

Conclusions

The CIRA scores have a high discriminative ability and will be important in custodial settings to support decisions and prioritise actions using a standardised valid assessment method. However, as knowledge on risk factors for COVID-19 mortality is still limited, the CIRA may be considered preliminary. Underlying data will be updated regularly on the website (http://www.prison-research.com), where the CIRA algorithm is freely available.",2021-02-08 +33555200,Fine Particle Exposure and Clinical Aggravation in Neurodegenerative Diseases in New York State.,"

Background

Adult-onset neurodegenerative diseases affect millions and negatively impact health care systems worldwide. Evidence suggests that air pollution may contribute to aggravation of neurodegeneration, but studies have been limited.

Objective

We examined the potential association between long-term exposure to particulate matter ≤2.5μm in aerodynamic diameter [fine particulate matter (PM2.5)] and disease aggravation in Alzheimer's (AD) and Parkinson's (PD) diseases and amyotrophic lateral sclerosis (ALS), using first hospitalization as a surrogate of clinical aggravation.

Methods

We used data from the New York Department of Health Statewide Planning and Research Cooperative System (SPARCS 2000-2014) to construct annual county counts of first hospitalizations with a diagnosis of AD, PD, or ALS (total, urbanicity-, sex-, and age-stratified). We used annual PM2.5 concentrations estimated by a prediction model at a 1-km2 resolution, which we aggregated to population-weighted county averages to assign exposure to cases based on county of residence. We used outcome-specific mixed quasi-Poisson models with county-specific random intercepts to estimate rate ratios (RRs) for a 1-y PM2.5 exposure. We allowed for nonlinear exposure-outcome relationships using penalized splines and accounted for potential confounders.

Results

We found a positive nonlinear PM2.5-PD association that plateaued above 11 μg/m3 (RR=1.09, 95% CI: 1.04, 1.14 for a PM2.5 increase from 8.1 to 10.4 μg/m3). We also found a linear PM2.5-ALS positive association (RR=1.05, 95% CI: 1.01, 1.09 per 1-μg/m3 PM2.5 increase), and suggestive evidence of an association with AD. We found effect modification by age for PD and ALS with a stronger positive association in patients <70 years of age but found insufficient evidence of effect modification by sex or urbanization level for any of the outcomes.

Conclusion

Our findings suggest that annual increase in county-level PM2.5 concentrations may contribute to clinical aggravation of PD and ALS. Importantly, the average annual PM2.5 concentration in our study was 8.1 μg/m3, below the current American national standards, suggesting the standards may not adequately protect the aging population. https://doi.org/10.1289/EHP7425.",2021-02-08 +,Identification and biosynthesis of acylphloroglucinols in Hypericum gentianoides,"Species of the genus Hypericum contain a rich array of unusual polyketides, however, only a small proportion of the over 450 Hypericum species, other than the popular medicinal supplement St. John's Wort (Hypericum perforatum), have even been chemically characterized. Hypericum gentianoides, a small annual used medicinally by Cherokee Americans, contains bioactive acylphloroglucinols. Here, we identify acylphloroglucinol constituents of H. gentianoides and determine a potential pathway to their synthesis. Liquid chromatography/electrospray ionization‐mass spectrometry (LC/ESI‐MS) and HPLC‐UV indicate that the level of accumulation and profile of acylphloroglucinols in H. gentianoides vary little seasonally when grown in a greenhouse, but do vary with development and are highly dependent on the accession, highlighting the importance of the selection of plant material for study. We identify the chemical structures of the nine prevalent polyketides, based on LC/ESI‐MS and hybrid quadrupole orthogonal time‐of‐flight (Q‐TOF) mass spectrometry; these metabolites include one monomeric phlorisobutyrophenone (PIB) derivative and eight dimeric acylphloroglucinols. Q‐TOF spectrometry was used to identify eight additional PIB derivatives that were not detected by LC/ESI‐MS. These data lead us to propose that diacylphloroglucinols are synthesized via modification of PIB to yield diverse phloroglucinol and filicinic acids moieties, followed by dimerization of a phloroglucinol and a filicinic acid monomer to yield the observed complement of diacylphloroglucinols. The metabolomics data from H. gentianoides are accessible in plant metabolomics resource (PMR) (http://www.metnetdb.org/pmr), a public metabolomics database with analysis software for plants and microbial organisms.",2013-07-01 +32280732,IRC data for a mechanistic route starting with H2O adsorption and finishing with H2 desorption from graphene.,"Intrinsic reaction coordinate (IRC) data regarding the interactions of water with a carbene-like active site located at the edge of a polyaromatic hydrocarbon [1-3] has been obtained using density functional theory (DFT) and the 6-31g(d) basis set as implemented in the Gaussian 16 software [4]. The data is presented as two videos (frontal and lateral mechanism views) combining four consecutive IRC calculations corresponding to the four different transition states presented on ""https://doi.org/10.1016/j.carbon.2020.01.011"" [3] (Figure 6, side approach). These videos provide powerful insights on two key aspects: a) the rotational process that occurs during water adsorption and b) the hydrogen gas desorption process during water gasification of carbons.",2020-02-29 +31623550,SDA: a semi-parametric differential abundance analysis method for metabolomics and proteomics data.,"BACKGROUND:Identifying differentially abundant features between different experimental groups is a common goal for many metabolomics and proteomics studies. However, analyzing data from mass spectrometry (MS) is difficult because the data may not be normally distributed and there is often a large fraction of zero values. Although several statistical methods have been proposed, they either require the data normality assumption or are inefficient. RESULTS:We propose a new semi-parametric differential abundance analysis (SDA) method for metabolomics and proteomics data from MS. The method considers a two-part model, a logistic regression for the zero proportion and a semi-parametric log-linear model for the possibly non-normally distributed non-zero values, to characterize data from each feature. A kernel-smoothed likelihood method is developed to estimate model coefficients and a likelihood ratio test is constructed for differential abundant analysis. The method has been implemented into an R package, SDAMS, which is available at https://www.bioconductor.org/packages/release/bioc/html/SDAMS.html . CONCLUSION:By introducing the two-part semi-parametric model, SDA is able to handle both non-normally distributed data and large fraction of zero values in a MS dataset. It also allows for adjustment of covariates. Simulations and real data analyses demonstrate that SDA outperforms existing methods.",2019-10-17 +25953081,"MIsoMine: a genome-scale high-resolution data portal of expression, function and networks at the splice isoform level in the mouse.","Products of multiexon genes, especially in higher organisms, are a mixture of isoforms with different or even opposing functions, and therefore need to be treated separately. However, most studies and available resources such as Gene Ontology provide only gene-level function annotations, and therefore lose the differential information at the isoform level. Here we report MIsoMine, a high-resolution portal to multiple levels of functional information of alternatively spliced isoforms in the mouse. This data portal provides tissue-specific expression patterns and co-expression networks, along with such previously published functional genomic data as protein domains, predicted isoform-level functions and functional relationships. The core utility of MIsoMine is allowing users to explore a preprocessed, quality-controlled set of RNA-seq data encompassing diverse tissues and cell lineages. Tissue-specific co-expression networks were established, allowing a 2D ranking of isoforms and tissues by co-expression patterns. The results of the multiple isoforms of the same gene are presented in parallel to facilitate direct comparison, with cross-talking to prioritized functions at the isoform level. MIsoMine provides the first isoform-level resolution effort at genome-scale. We envision that this data portal will be a valuable resource for exploring functional genomic data, and will complement the existing functionalities of the mouse genome informatics database and the gene expression database for the laboratory mouse. Database URL: http://guanlab.ccmb.med.umich.edu/misomine/",2015-05-07 +26200753,Pre_GI: a global map of ontological links between horizontally transferred genomic islands in bacterial and archaeal genomes.,"The Predicted Genomic Islands database (Pre_GI) is a comprehensive repository of prokaryotic genomic islands (islands, GIs) freely accessible at http://pregi.bi.up.ac.za/index.php. Pre_GI, Version 2015, catalogues 26 744 islands identified in 2407 bacterial/archaeal chromosomes and plasmids. It provides an easy-to-use interface which allows users the ability to query against the database with a variety of fields, parameters and associations. Pre_GI is constructed to be a web-resource for the analysis of ontological roads between islands and cartographic analysis of the global fluxes of mobile genetic elements through bacterial and archaeal taxonomic borders. Comparison of newly identified islands against Pre_GI presents an alternative avenue to identify their ontology, origin and relative time of acquisition. Pre_GI aims to aid research on horizontal transfer events and materials through providing data and tools for holistic investigation of migration of genes through ecological niches and taxonomic boundaries.",2015-06-17 +31737752,"Survey data on home gardeners and urban gardening practice in Pune, India.","The role of home gardens as productive green spaces in emerging megacities has so far been neglected when addressing pathways for sustainable urban development. This data article provides quantitative data from a questionnaire survey on gardening practice among home gardeners in Pune, one of the fastest growing cities in India and the Asian-Pacific region. Data include growing decisions and food production, fertilization, treatment of pests or irrigation as well as the cultural and recreational use of the garden. The survey also covered sociodemographic background information on the respondents and their gardening motivations. The data were used for a research article to build indicators for economic, environmental and socio-cultural sustainability dimensions of urban agriculture and analyze gardeners' characteristics that lead to increased sustainability outcomes entitled ""Home Gardening Practice in Pune (India), the Role of Communities, Urban Environment and the Contribution to Urban Sustainability""[1]. The data and questionnaire are provided at the Open Research Data repository at the Leibniz-Centre for Agricultural Landscape Research (ZALF), Germany (https://doi.org/10.4228/ZALF.DK.109). The data offer an empirical baseline to conduct studies on the interrelationships of gardeners' motivations, gardening practice and the value and outcomes of home gardening in an accelerated urbanization process.",2019-10-11 +26087378,Northwestern University schizophrenia data sharing for SchizConnect: A longitudinal dataset for large-scale integration.,"In this paper, we describe an instance of the Northwestern University Schizophrenia Data and Software Tool (NUSDAST), a schizophrenia-related dataset hosted at XNAT Central, and the SchizConnect data portal used for accessing and sharing the dataset. NUSDAST was built and extended upon existing, standard schemas available for data sharing on XNAT Central (http://central.xnat.org/). With the creation of SchizConnect, we were able to link NUSDAST to other neuroimaging data sources and create a powerful, federated neuroimaging resource.",2015-06-16 +32671073,USP15 Enhances Re-epithelialization Through Deubiquitinating EIF4A1 During Cutaneous Wound Repair.,"Re-epithelialization is a fundamental process in wound healing that involves various cytokines and cells during cutaneous barrier reconstruction. Ubiquitin-specific peptidase 15 (USP15), an important member of the deubiquitinating enzymes (DUBs), removes ubiquitin chains from target proteins and maintains protein stability. However, the dynamic role of USP15 in epithelialization remains unclear. We aimed to investigate the regulatory function of USP15 in re-epithelialization. An excisional wound splinting model was established to evaluate the re-epithelialization rate in Usp15 knockout (KO) mice. Coimmunoprecipitation (Co-IP) and mass spectrum analyses were performed to identify USP15-interacting proteins. RNA-sequencing was performed for transcriptome analysis in keratinocytes and uploaded into NODE database (http://www.biosino.org/node, accession numbers: OEP000770 and OEP000763). First, a significant delay in epithelialization was observed in the Usp15 KO mice. Moreover, inhibition of cell migration and proliferation was observed in the USP15-silenced keratinocytes (HaCaTs). Moreover, we revealed for the first time that USP15 could interact with eukaryotic initiation factor 4A-1 (EIF4A1), thereby promoting translational efficacy in keratinocytes, which is essential for keratinocyte proliferation and migration. Conclusively, the USP15-EIF4A1 complex significantly accelerated re-epithelialization in wound healing. These observations helped elucidate the function and mechanisms of USP15 in modulating re-epithelialization in wound healing, providing a promising target for refractory wound treatment.",2020-06-26 +32658789,Species-specific genomic sequences for classification of bacteria.,"Modern bacterial classification relies on genomic relatedness. Genetic variation in bacterial populations present a big challenge for taxonomic classification and recently several bacterial species have been reclassified based on the intra-species genome comparison. These were facilitated by next generation sequencing technologies and advances in genome comparison approaches which led to the rearrangement of diverse bacterial species and revolution in the microbial classification system. One of the outcome of these studies is the development of suitable DNA barcodes as reliable and cost-effective method for identifying various bacterial genera. Towards refining this further, we have applied a genome comparison approach in 1104 bacterial genome assemblies (excluding plasmids) to identify unique genomic segments among intra-species genome assemblies. Using extensive bioinformatics analysis, we have identified species-specific genomic regions and designed unique primers for 100 different species (belonging to 62 genera) which includes 62 pathogenic and 13 opportunistic pathogenic bacterial species and built a database (http://slsdb.manipal.edu/bact/). These species-specific genomic regions will have a major impact on in silico and molecular methods aimed at bacterial classification and identification. These may also serve as better DNA barcodes than the markers currently used for delineation of bacteria and may also find application in various translational research programs.",2020-06-26 +32240182,BWGS: A R package for genomic selection and its application to a wheat breeding programme.,"We developed an integrated R library called BWGS to enable easy computation of Genomic Estimates of Breeding values (GEBV) for genomic selection. BWGS, for BreedWheat Genomic selection, was developed in the framework of a cooperative private-public partnership project called Breedwheat (https://breedwheat.fr) and relies on existing R-libraries, all freely available from CRAN servers. The two main functions enable to run 1) replicated random cross validations within a training set of genotyped and phenotyped lines and 2) GEBV prediction, for a set of genotyped-only lines. Options are available for 1) missing data imputation, 2) markers and training set selection and 3) genomic prediction with 15 different methods, either parametric or semi-parametric. The usefulness and efficiency of BWGS are illustrated using a population of wheat lines from a real breeding programme. Adjusted yield data from historical trials (highly unbalanced design) were used for testing the options of BWGS. On the whole, 760 candidate lines with adjusted phenotypes and genotypes for 47 839 robust SNP were used. With a simple desktop computer, we obtained results which compared with previously published results on wheat genomic selection. As predicted by the theory, factors that are most influencing predictive ability, for a given trait of moderate heritability, are the size of the training population and a minimum number of markers for capturing every QTL information. Missing data up to 40%, if randomly distributed, do not degrade predictive ability once imputed, and up to 80% randomly distributed missing data are still acceptable once imputed with Expectation-Maximization method of package rrBLUP. It is worth noticing that selecting markers that are most associated to the trait do improve predictive ability, compared with the whole set of markers, but only when marker selection is made on the whole population. When marker selection is made only on the sampled training set, this advantage nearly disappeared, since it was clearly due to overfitting. Few differences are observed between the 15 prediction models with this dataset. Although non-parametric methods that are supposed to capture non-additive effects have slightly better predictive accuracy, differences remain small. Finally, the GEBV from the 15 prediction models are all highly correlated to each other. These results are encouraging for an efficient use of genomic selection in applied breeding programmes and BWGS is a simple and powerful toolbox to apply in breeding programmes or training activities.",2020-04-02 +,F171. ALTERED DIFFUSIVITY IN THE BRAIN OF PATIENTS WITH SCHIZOPHRENIA: A DIFFUSION WEIGHTED MAGNETIC RESONANCE IMAGING STUDIES WITH PUBLIC NEUROIMAGING DATA,"Abstract

Background

In recent decades, numerous in vivo brain imaging studies utilizing diffusion weighted MRI (dMRI) technique have focused on altered diffusivity in brains of patients with schizophrenia. However, the literature has not reached at consistent consensus despite a few interesting and promising results. In this study, we investigated whether or not various measures of dMRI (FA, AD, RD, and TR) are altered in patients with schizophrenia by comparing them in both patients and healthy controls with public neuroimaging data from SchizConnect (http://schizconnect.org).

Methods

The final data set was consisted of 121 schizophrenia patients and 119 healthy controls. After verifying 161 anatomical regions of interest (ROIs), we estimated the mean value and standard deviation of fractional anisotropy (FA), axial diffusivity (AD), radial diffusivity (RD), and trace (TR) in each ROI among the healthy controls. After that, we calculated the Z-score of each single ROI in every individual brain of both patients and healthy controls. The Z-score information of each person is then integrated into two location-independent measures. One is the total number of “abnormal” lesions, in which the absolute Z-score is above the cut-off value estimated by the Bonferroni correction, and the other is the largest absolute Z-score. After all, by using Welch two-sample t-test, we compared these two measures between the groups of patients and healthy controls.

Results

The number of abnormal lesions was notably increased in patients group, in terms of RD (p=0.01063) and TR (p=0.009329). Meanwhile, no statistically significant differences related to FA and AD were observed. On the other hand, it was found that the largest absolute Z-score was elevated in patients group, in terms of AD (p=0.03371), RD (p=0.0001762), and TR (p<0.00001). Otherwise, no significant differences related to FA were observed.

Discussion

In this study, we found a few remarkable differences of familiar measures, especially TR, between brains of patients with schizophrenia and healthy controls. This suggests that there should be some subtle changes in the brains of patients with schizophrenia, including microstructural destruction.",2018-04-01 +28530006,Kindergarten/Elementary School Teachers and Web-based Oral Health-Related Resources: An Exploration.,"

Purpose

The percentage of U.S. children with poor oral health continues to be high. Kindergarten/elementary school educators could play an important role in teaching students about oral health promotion. The objectives were to assess which oral health-related web-based resources teachers consider most helpful and how teachers' attitudes, knowledge, and behavioural intentions concerning oral health-related teaching change between before and after having access to a resource website.

Materials and methods

Web-based survey data were collected from 95 kindergarten/elementary school educators before and after they accessed a website with oral health-related information for teachers (web-link: http://media.dent.umich.edu/teachoralhealth/index.html).

Results

Most teachers accessed lesson plans about 'Teeth and smiling' (90%) and 'Taking care of your teeth' (88%) and the fewest accessed information about 'Nutrition and health' (42%) and 'Information for parents' (39%). On average, all materials were perceived as useful (5-point scale with 5 = 'very useful', range = 3.80 to 4.04). Responses to the question on how important dental health is for a child's ability to learn improved significantly from before to after the educational intervention (5-point scale with 5 = 'very important', 3.78 vs 4.44). Knowledge increased and behavioural intentions improved as well. The percentage of teachers who reported that they had included oral health-related material in the past was 47% and the percentage who intended to include it in the future was 65% (p < 0.001).

Conclusions

Providing kindergarten/elementary school educators with web-based resource materials improves their attitudes, increases their knowledge and leads to positive behavioural intentions concerning educating their students about oral health.",2017-01-01 +28499008,RiPPMiner: a bioinformatics resource for deciphering chemical structures of RiPPs based on prediction of cleavage and cross-links.,"Ribosomally synthesized and post-translationally modified peptides (RiPPs) constitute a rapidly growing class of natural products with diverse structures and bioactivities. We have developed RiPPMiner, a novel bioinformatics resource for deciphering chemical structures of RiPPs by genome mining. RiPPMiner derives its predictive power from machine learning based classifiers, trained using a well curated database of more than 500 experimentally characterized RiPPs. RiPPMiner uses Support Vector Machine to distinguish RiPP precursors from other small proteins and classify the precursors into 12 sub-classes of RiPPs. For classes like lanthipeptide, cyanobactin, lasso peptide and thiopeptide, RiPPMiner can predict leader cleavage site and complex cross-links between post-translationally modified residues starting from genome sequences. RiPPMiner can identify correct cross-link pattern in a core peptide from among a very large number of combinatorial possibilities. Benchmarking of prediction accuracy of RiPPMiner on a large lanthipeptide dataset indicated high sensitivity, specificity, accuracy and precision. RiPPMiner also provides interfaces for visualization of the chemical structure, downloading of simplified molecular-input line-entry system and searching for RiPPs having similar sequences or chemical structures. The backend database of RiPPMiner provides information about modification system, precursor sequence, leader and core sequence, modified residues, cross-links and gene cluster for more than 500 experimentally characterized RiPPs. RiPPMiner is available at http://www.nii.ac.in/rippminer.html.",2017-07-01 +26780094,Integrative genomic analysis by interoperation of bioinformatics tools in GenomeSpace.,"Complex biomedical analyses require the use of multiple software tools in concert and remain challenging for much of the biomedical research community. We introduce GenomeSpace (http://www.genomespace.org), a cloud-based, cooperative community resource that currently supports the streamlined interaction of 20 bioinformatics tools and data resources. To facilitate integrative analysis by non-programmers, it offers a growing set of 'recipes', short workflows to guide investigators through high-utility analysis tasks.",2016-01-18 +31701147,LINCS Data Portal 2.0: next generation access point for perturbation-response signatures.,"The Library of Integrated Network-Based Cellular Signatures (LINCS) is an NIH Common Fund program with the goal of generating a large-scale and comprehensive catalogue of perturbation-response signatures by utilizing a diverse collection of perturbations across many model systems and assay types. The LINCS Data Portal (LDP) has been the primary access point for the compendium of LINCS data and has been widely utilized. Here, we report the first major update of LDP (http://lincsportal.ccs.miami.edu/signatures) with substantial changes in the data architecture and APIs, a completely redesigned user interface, and enhanced curated metadata annotations to support more advanced, intuitive and deeper querying, exploration and analysis capabilities. The cornerstone of this update has been the decision to reprocess all high-level LINCS datasets and make them accessible at the data point level enabling users to directly access and download any subset of signatures across the entire library independent from the originating source, project or assay. Access to the individual signatures also enables the newly implemented signature search functionality, which utilizes the iLINCS platform to identify conditions that mimic or reverse gene set queries. A newly designed query interface enables global metadata search with autosuggest across all annotations associated with perturbations, model systems, and signatures.",2020-01-01 +29186333,"FUn: a framework for interactive visualizations of large, high-dimensional datasets on the web.","Motivation:During the past decade, big data have become a major tool in scientific endeavors. Although statistical methods and algorithms are well-suited for analyzing and summarizing enormous amounts of data, the results do not allow for a visual inspection of the entire data. Current scientific software, including R packages and Python libraries such as ggplot2, matplotlib and plot.ly, do not support interactive visualizations of datasets exceeding 100 000 data points on the web. Other solutions enable the web-based visualization of big data only through data reduction or statistical representations. However, recent hardware developments, especially advancements in graphical processing units, allow for the rendering of millions of data points on a wide range of consumer hardware such as laptops, tablets and mobile phones. Similar to the challenges and opportunities brought to virtually every scientific field by big data, both the visualization of and interaction with copious amounts of data are both demanding and hold great promise. Results:Here we present FUn, a framework consisting of a client (Faerun) and server (Underdark) module, facilitating the creation of web-based, interactive 3D visualizations of large datasets, enabling record level visual inspection. We also introduce a reference implementation providing access to SureChEMBL, a database containing patent information on more than 17 million chemical compounds. Availability and implementation:The source code and the most recent builds of Faerun and Underdark, Lore.js and the data preprocessing toolchain used in the reference implementation, are available on the project website (http://doc.gdb.tools/fun/). Contact:daniel.probst@dcb.unibe.ch or jean-louis.reymond@dcb.unibe.ch.",2018-04-01 +31343654,Eye in a Disk: eyeIntegration Human Pan-Eye and Body Transcriptome Database Version 1.0.,"

Purpose

We develop an accessible and reliable RNA sequencing (RNA-seq) transcriptome database of healthy human eye tissues and a matching reactive web application to query gene expression in eye and body tissues.

Methods

We downloaded the raw sequence data for 1375 RNA-seq samples across 54 tissues in the Genotype-Tissue Expression (GTEx) project as a noneye reference set. We then queried several public repositories to find all healthy, nonperturbed, human eye-related tissue RNA-seq samples. The 916 eye and 1375 GTEx samples were sent into a Snakemake-based reproducible pipeline we wrote to quantify all known transcripts and genes, removes samples with poor sequence quality and mislabels, normalizes expression values across each tissue, perform 882 differential expression tests, calculate GO term enrichment, and output all as a single SQLite database file: the Eye in a Disk (EiaD) dataset. Furthermore, we rewrote the web application eyeIntegration (available in the public domain at https://eyeIntegration.nei.nih.gov) to display EiaD.

Results

The new eyeIntegration portal provides quick visualization of human eye-related transcriptomes published to date by database version, gene/transcript, 19 eye tissues, and 54 body tissues. As a test of the value of this unified pan-eye dataset, we showed that fetal and organoid retina are highly similar at a pan-transcriptome level, but display distinct differences in certain pathways and gene families, such as protocadherin and HOXB family members.

Conclusions

The eyeIntegration v1.0 web app serves the pan-human eye and body transcriptome dataset, EiaD. This offers the eye community a powerful and quick means to test hypotheses on human gene and transcript expression across 54 body and 19 eye tissues.",2019-07-01 +32584882,ACD: Antimicrobial chemotherapeutics database.,"Antimicrobial resistance is becoming a growing health problem, which has become a challenge for the physicians to control infection and also an economic burden on the healthcare. This increase in resistance to the present antimicrobial agents led the researchers to find some alternative and more efficient drugs which can fight with the resistant microorganisms more effectively. Hence, in silico approach is used to design some novel drugs against various targets of microorganisms. For effective virtual screening of the drugs, there is a need to know about the chemical structure and properties of the antimicrobial agents. Therefore, we have prepared a comprehensive database as a platform for the researcher to search for possible lead molecules. Antimicrobial chemotherapeutics database (ACD) is comprised of ~4100 synthetic antimicrobial compounds as well as ~1030 active antimicrobial peptides. The Antimicrobial peptides are mainly from biological sources but some of them are synthetic in nature. Only those compounds, which are found to be active against either bacteria (both Gram-positive and negative) or fungus, are selected for this database.The ACD database is freely available at URL: http://amdr.amu.ac.in/acd, and it is compatible with desktops, smartphones, and tablets.",2020-06-25 +26729863,The spreading of misinformation online.,"The wide availability of user-provided content in online social media facilitates the aggregation of people around common interests, worldviews, and narratives. However, the World Wide Web (WWW) also allows for the rapid dissemination of unsubstantiated rumors and conspiracy theories that often elicit rapid, large, but naive social responses such as the recent case of Jade Helm 15--where a simple military exercise turned out to be perceived as the beginning of a new civil war in the United States. In this work, we address the determinants governing misinformation spreading through a thorough quantitative analysis. In particular, we focus on how Facebook users consume information related to two distinct narratives: scientific and conspiracy news. We find that, although consumers of scientific and conspiracy stories present similar consumption patterns with respect to content, cascade dynamics differ. Selective exposure to content is the primary driver of content diffusion and generates the formation of homogeneous clusters, i.e., ""echo chambers."" Indeed, homogeneity appears to be the primary driver for the diffusion of contents and each echo chamber has its own cascade dynamics. Finally, we introduce a data-driven percolation model mimicking rumor spreading and we show that homogeneity and polarization are the main determinants for predicting cascades' size.",2016-01-04 +32589697,TiFoSi: an efficient tool for mechanobiology simulations of epithelia.,"

Motivation

Emerging phenomena in developmental biology and tissue engineering are the result of feedbacks between gene expression and cell biomechanics. In that context, in silico experiments are a powerful tool to understand fundamental mechanisms and to formulate and test hypotheses.

Results

Here, we present TiFoSi, a computational tool to simulate the cellular dynamics of planar epithelia. TiFoSi allows to model feedbacks between cellular mechanics and gene expression (either in a deterministic or a stochastic way), the interaction between different cell populations, the custom design of the cell cycle and cleavage properties, the protein number partitioning upon cell division, and the modeling of cell communication (juxtacrine and paracrine signaling).

Availability and implementation

http://tifosi.thesimbiosys.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +,A nationwide forest attribute map of Sweden predicted using airborne laser scanning data and field data from the National Forest Inventory,"The National Mapping Agency in Sweden has conducted an airborne laser scanning (ALS) campaign covering almost the entire country for the purpose of creating a new national Digital Elevation Model (DEM). The ALS data were collected between 2009 and 2015 using Leica, Optech, Riegl, and Trimble scanners and have a point density of 0.5–1.0pulses/m2. A high resolution national raster database (12.5m×12.5m cell size) with forest variables was produced by combining the ALS data with field data from the Swedish National Forest Inventory (NFI). Approximately 11500 NFI plots (10meter radius) located on productive forest land, inventoried between 2009 and 2013, were used to create linear regression models relating selected forest variables, or transformations of the variables, to metrics derived from the ALS data. The resulting stand level relative RMSEs for predictions of stem volume, basal area, basal-area weighted mean tree height, and basal-area weighted mean stem diameter were in the ranges of 17.2–22.0%, 13.9–18.2%, 5.4–9.5%, and 8.7–13.1%, respectively. It was concluded that the predictions had an accuracy that were at least as good as data typically used in forest management planning. Above ground tree biomass was also included in the national raster database but not validated on a stand-level.An important part of the project was to make the raster database available to private forest owners, forest associations, forest companies, authorities, researchers, and the general public. Thus, all predicted forest variables can be viewed and downloaded free of charge at the Swedish Forest Agency's homepage (http://www.skogsstyrelsen.se/skogligagrunddata).",2016-01-01 +32802923,"Whole genome sequence data of Lactobacillus fermentum HFD1, the producer of antibacterial peptides.","Here we report the whole genome sequence of Lactobacillus fermentum HFD1 strain, the producer of antibacterial peptides. The genome consists of one circular chromosome with 2101878 bp in length and GC-content of 51.8%, and includes linear DNA with 5386 bp in length with 100% identity to bacteriophage phiX174. The analysis of the genome has revealed 2049 genes encoding for proteins including 867 proteins without known function and 70 genes encoding for RNAs (10 rRNAs, 59 tRNAs and 1 tmRNA). Putative genes responsible for the biosynthesis of 4 antimicrobial peptides were identified. The NCBI Bioproject has been deposited at NCBI under the accession number PRJNA615901 (https://www.ncbi.nlm.nih.gov/bioproject/PRJNA615901/) and consist of full annotated genome and raw sequence data.",2020-08-01 +33170544,A Guide to Using ClinTAD for Interpretation of DNA Copy Number Variants in the Context of Topologically Associated Domains.,"DNA copy number variants (CNVs) are routinely evaluated as part of clinical diagnosis in both the prenatal and postnatal genetic settings. Current guidelines for interpreting the potential clinical significance of these CNVs, typically identified by chromosomal microarray, focus entirely on genes localized within the CNV region. However, recent work has suggested that some CNVs can actually produce clinical impacts by influencing transcription of genes outside the CNV region. These alterations of transcription appear to occur by disrupting the composition of DNA topologically associated domains (TADs), which strongly influence contacts between gene promoters and their associated enhancers. Here we present a set of detailed protocols for the use of the free software tool ClinTAD (https://www.clintad.com). This decision-support software allows for prediction as to whether a given CNV may potentially disrupt a TAD boundary, and offers phenotype matching to genes near, but not within the CNV region, whose expression could be influenced by altered TAD architecture and that have phenotypic impacts related to that reported in a given patient. Our protocols here provide specific examples of how to implement these tools. In addition, the software has the capability to impact genomic research by evaluating multiple cases in parallel. We propose that this decision-support tool can benefit and improve genetic diagnosis. © 2020 Wiley Periodicals LLC. Basic Protocol 1: Evaluating a single case using ClinTAD Basic Protocol 2: Evaluating a single case with multiple variants using ClinTAD Basic Protocol 3: Evaluating multiple cases using ClinTAD Basic Protocol 4: Creating tracks with custom data.",2020-12-01 +32021890,Lipidomics dataset of sonication-induced traumatic optic neuropathy in mice.,"Traumatic optic neuropathy (TON) is the loss of vision secondary to trauma. Approximately two weeks after traumatic damage, diffuse retinal ganglion cell loss and axon degeneration of the optic nerve are exhibited [1]. Here we present the changes that occur in the optic nerve lipidome of two-month-old C57BL/6J mice following sonication-induced TON (SI-TON), which closely models the indirect clinical mechanism in TON. Optic nerves were harvested at three time points following injury: 1-day, 7-days, and 14-days for comparison with the control group (uninjured optic nerves from 2-month-old mice). The optic nerves were subjected to mass spectrometry and bioinformatic analysis using LipidSearch 4.1.3 and Metaboanalyst 4.0. This data pertains to the lipidome at each time point following indirect trauma to the optic nerve. The data presented here will augment investigation into the neurodegenerative process. The data is available at Metabolomics Workbench [http://www.metabolomicsworkbench.org (Project ID: PR000859)].",2020-01-16 +31861975,"MIRIA: a webserver for statistical, visual and meta-analysis of RNA editing data in mammals.","

Background

Adenosine-to-inosine RNA editing can markedly diversify the transcriptome, leading to a variety of critical molecular and biological processes in mammals. Over the past several years, researchers have developed several new pipelines and software packages to identify RNA editing sites with a focus on downstream statistical analysis and functional interpretation.

Results

Here, we developed a user-friendly public webserver named MIRIA that integrates statistics and visualization techniques to facilitate the comprehensive analysis of RNA editing sites data identified by the pipelines and software packages. MIRIA is unique in that provides several analytical functions, including RNA editing type statistics, genomic feature annotations, editing level statistics, genome-wide distribution of RNA editing sites, tissue-specific analysis and conservation analysis. We collected high-throughput RNA sequencing (RNA-seq) data from eight tissues across seven species as the experimental data for MIRIA and constructed an example result page.

Conclusion

MIRIA provides both visualization and analysis of mammal RNA editing data for experimental biologists who are interested in revealing the functions of RNA editing sites. MIRIA is freely available at https://mammal.deepomics.org.",2019-12-22 +31400221,Alternating EM algorithm for a bilinear model in isoform quantification from RNA-seq data.,"

Motivation

Estimation of isoform-level gene expression from RNA-seq data depends on simplifying assumptions, such as uniform read distribution, that are easily violated in real data. Such violations typically lead to biased estimates. Most existing methods provide bias correction step(s), which is based on biological considerations-such as GC content-and applied in single samples separately. The main problem is that not all biases are known.

Results

We have developed a novel method called XAEM based on a more flexible and robust statistical model. Existing methods are essentially based on a linear model Xβ, where the design matrix X is known and is computed based on the simplifying assumptions. In contrast XAEM considers Xβ as a bilinear model with both X and β unknown. Joint estimation of X and β is made possible by a simultaneous analysis of multi-sample RNA-seq data. Compared to existing methods, XAEM automatically performs empirical correction of potentially unknown biases. We use an alternating expectation-maximization (AEM) algorithm, alternating between estimation of X and β. For speed XAEM utilizes quasi-mapping for read alignment, thus leading to a fast algorithm. Overall XAEM performs favorably compared to recent advanced methods. For simulated datasets, XAEM obtains higher accuracy for multiple-isoform genes. In a differential-expression analysis of a real single-cell RNA-seq dataset, XAEM achieves substantially better rediscovery rates in independent validation sets.

Availability and implementation

The method and pipeline are implemented as a tool and freely available for use at http://fafner.meb.ki.se/biostatwiki/xaem/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +32576054,DEEPSMP: A deep learning model for predicting the ectodomain shedding events of membrane proteins.,"Membrane proteins play essential roles in modern medicine. In recent studies, some membrane proteins involved in ectodomain shedding events have been reported as the potential drug targets and biomarkers of some serious diseases. However, there are few effective tools for identifying the shedding event of membrane proteins. So, it is necessary to design an effective tool for predicting shedding event of membrane proteins. In this study, we design an end-to-end prediction model using deep neural networks with long short-term memory (LSTM) units and attention mechanism, to predict the ectodomain shedding events of membrane proteins only by sequence information. Firstly, the evolutional profiles are encoded from original sequences of these proteins by Position-Specific Iterated BLAST (PSI-BLAST) on Uniref50 database. Then, the LSTM units which contain memory cells are used to hold information from past inputs to the network and the attention mechanism is applied to detect sorting signals in proteins regardless of their position in the sequence. Finally, a fully connected dense layer and a softmax layer are used to obtain the final prediction results. Additionally, we also try to reduce overfitting of the model by using dropout, L2 regularization, and bagging ensemble learning in the model training process. In order to ensure the fairness of performance comparison, firstly we use cross validation process on training dataset obtained from an existing paper. The average accuracy and area under a receiver operating characteristic curve (AUC) of five-fold cross-validation are 81.19% and 0.835 using our proposed model, compared to 75% and 0.78 by a previously published tool, respectively. To better validate the performance of the proposed model, we also evaluate the performance of the proposed model on independent test dataset. The accuracy, sensitivity, and specificity are 83.14%, 84.08%, and 81.63% using our proposed model, compared to 70.20%, 71.97%, and 67.35% by the existing model. The experimental results validate that the proposed model can be regarded as a general tool for predicting ectodomain shedding events of membrane proteins. The pipeline of the model and prediction results can be accessed at the following URL: http://www.csbg-jlu.info/DeepSMP/.",2020-06-23 +30295749,A new method bridging graph theory and residue co-evolutionary networks for specificity determinant positions detection.,"

Motivation

Computational studies of molecular evolution are usually performed from a multiple alignment of homologous sequences, on which sequences resulting from a common ancestor are aligned so that equivalent residues are placed in the same position. Residues frequency patterns of a full alignment or from a subset of its sequences can be highly useful for suggesting positions under selection. Most methods mapping co-evolving or specificity determinant sites are focused on positions, however, they do not consider the case for residues that are specificity determinants in one subclass, but variable in others. In addition, many methods are impractical for very large alignments, such as those obtained from Pfam, or require a priori information of the subclasses to be analyzed.

Results

In this paper we apply the complex networks theory, widely used to analyze co-affiliation systems in the social and ecological contexts, to map groups of functional related residues. This methodology was initially evaluated in simulated environments and then applied to four different protein families datasets, in which several specificity determinant sets and functional motifs were successfully detected.

Availability and implementation

The algorithms and datasets used in the development of this project are available on http://www.biocomp.icb.ufmg.br/biocomp/software-and-databases/networkstats/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +31449466,Ambient Air Pollution and the Risk of Atrial Fibrillation and Stroke: A Population-Based Cohort Study.,"

Background

Although growing evidence links air pollution to stroke incidence, less is known about the effect of air pollution on atrial fibrillation (AF), an important risk factor for stroke.

Objectives

We assessed the associations between air pollution and incidence of AF and stroke. We also sought to characterize the shape of pollutant-disease relationships.

Methods

The population-based cohort comprised 5,071,956 Ontario residents, age 35–85 y and without the diagnoses of both outcomes on 1 April 2001 and was followed up until 31 March 2015. AF and stroke cases were ascertained using health administrative databases with validated algorithms. Based on annual residential postal codes, we assigned 5-y running average concentrations of fine particulate matter ([Formula: see text]), nitrogen dioxide ([Formula: see text]), and ozone ([Formula: see text]) from satellite-derived data, a land-use regression model, and a fusion-based method, respectively, as well as redox-weighted averages of [Formula: see text] and [Formula: see text] ([Formula: see text]) for each year. Using Cox proportional hazards models, we estimated the hazard ratios (HRs) and 95% confidence intervals (95% CIs) of AF and stroke with each of these pollutants, adjusting for individual- and neighborhood-level variables. We used newly developed nonlinear risk models to characterize the shape of pollutant–disease relationships.

Results

Between 2001 and 2015, we identified 313,157 incident cases of AF and 122,545 cases of stroke. Interquartile range increments of [Formula: see text], [Formula: see text], [Formula: see text], and [Formula: see text] were associated with increases in the incidence of AF [HRs (95% CIs): 1.03 (1.01, 1.04), 1.02 (1.01, 1.03), 1.01 (1.00, 1.02), and 1.01 (1.01, 1.02), respectively] and the incidence of stroke [HRs (95% CIs): 1.05 (1.03, 1.07), 1.04 (1.01, 1.06), 1.05 (1.03, 1.06), and 1.05 (1.04, 1.06), respectively]. Associations of similar magnitude were found in various sensitivity analyses. Furthermore, we found a near-linear association for stroke with [Formula: see text], whereas [Formula: see text], [Formula: see text]-, and [Formula: see text] relationships exhibited sublinear shapes.

Conclusions

Air pollution was associated with stroke and AF onset, even at very low concentrations. https://doi.org/10.1289/EHP4883.",2019-08-26 +30715201,"GLAD: GLycan Array Dashboard, a visual analytics tool for glycan microarrays.","MOTIVATION:Traditional glycan microarray data is typically presented as excel files with limited visualization and interactivity. Thus, comparisons and analysis of glycan array data have been difficult, and there is need for a tool to facilitate data mining of glycan array data. RESULTS:GLAD (GLycan Array Dashboard) is a web-based tool to visualize, analyze, present and mine glycan microarray data. GLAD allows users to input multiple data files to create comparisons. GLAD extends the capability of the microarray data to produce more comparative visualizations in the form of grouped bar charts, heatmaps, calendar heatmaps, force graphs and correlation maps in order to analyze broad sets of samples. Additionally, it allows users to filter, sort and normalize the data and view glycan structures in an interactive manner, to facilitate faster visual data mining. AVAILABILITY AND IMPLEMENTATION:GLAD is freely available for use on the Web at https://glycotoolkit.com/Tools/GLAD/ with all major modern browsers (Edge, Firefox, Chrome, Safari). SUPPLEMENTARY INFORMATION:Full documentation and video tutorials for GLAD can be found on https://glycotoolkit.com/GLAD.",2019-09-01 +25477388,BioAssay Research Database (BARD): chemical biology and probe-development enabled by structured metadata and result types.,"BARD, the BioAssay Research Database (https://bard.nih.gov/) is a public database and suite of tools developed to provide access to bioassay data produced by the NIH Molecular Libraries Program (MLP). Data from 631 MLP projects were migrated to a new structured vocabulary designed to capture bioassay data in a formalized manner, with particular emphasis placed on the description of assay protocols. New data can be submitted to BARD with a user-friendly set of tools that assist in the creation of appropriately formatted datasets and assay definitions. Data published through the BARD application program interface (API) can be accessed by researchers using web-based query tools or a desktop client. Third-party developers wishing to create new tools can use the API to produce stand-alone tools or new plug-ins that can be integrated into BARD. The entire BARD suite of tools therefore supports three classes of researcher: those who wish to publish data, those who wish to mine data for testable hypotheses, and those in the developer community who wish to build tools that leverage this carefully curated chemical biology resource.",2014-12-04 +30261835,Sequence homology in eukaryotes (SHOE): interactive visual tool for promoter analysis.,"

Background

Microarray and DNA-sequencing based technologies continue to produce enormous amounts of data on gene expression. This data has great potential to illuminate our understanding of biology and medicine, but the data alone is of limited value without computational tools to allow human investigators to visualize and interpret it in the context of their problem of interest.

Results

We created a web server called SHOE that provides an interactive, visual presentation of the available evidence of transcriptional regulation and gene co-expression to facilitate its exploration and interpretation. SHOE predicts the likely transcription factor binding sites in orthologous promoters of humans, mice, and rats using the combined information of 1) transcription factor binding preferences (position-specific scoring matrix (PSSM) libraries such as Transfac32, Jaspar, HOCOMOCO, ChIP-seq, SELEX, PBM, and iPS-reprogramming factor), 2) evolutionary conservation of putative binding sites in orthologous promoters, and 3) co-expression tendencies of gene pairs based on 1,714 normal human cells selected from the Gene Expression Omnibus Database.

Conclusion

SHOE enables users to explore potential interactions between transcription factors and target genes via multiple data views, discover transcription factor binding motifs on top of gene co-expression, and visualize genes as a network of gene and transcription factors on its native gadget GeneViz, the CellDesigner pathway analyzer, and the Reactome database to search the pathways involved. As we demonstrate here when using the CREB1 and Nf-κB datasets, SHOE can reliably identify experimentally verified interactions and predict plausible novel ones, yielding new biological insights into the gene regulatory mechanisms involved. SHOE comes with a manual describing how to run it on a local PC or via the Garuda platform ( www.garuda-alliance.org ), where it joins other popular gadgets such as the CellDesigner pathway analyzer and the Reactome database, as part of analysis workflows to meet the growing needs of molecular biologists and medical researchers. SHOE is available from the following URL http://ec2-54-150-223-65.ap-northeast-1.compute.amazonaws.com A video demonstration of SHOE can be found here: https://www.youtube.com/watch?v=qARinNb9NtE.",2018-09-27 +30026404,The American Academy of Psychiatry and the Law Practice Resource for Prescribing in Corrections.,"The practice of prescribing in jails and prisons is often different from that in the community. Serious mental illness is common among inmates, and so are co-morbidities such as substance use, impulse-control, attention-deficit/hyperactivity, and personality disorders. Operational requirements, staffing, and the physical plant of the institution may complicate the provision of treatment according to community standards. Problems related to medication nonadherence, as well as the pursuit of medications for nonmedical reasons, are often seen in these settings and may be managed differently than they are elsewhere. Existing practice resources rarely account for these challenges. Pursuant to a recommendation by the Correctional Committee of the American Academy of Psychiatry and the Law (AAPL), the AAPL Council in May 2015 approved the creation of a task force charged with producing a document on prescribing in correctional facilities.Full Document: Tamburello A, Metzner J, Ferguson E, et al: AAPL practice resource for prescribing in corrections. Journal of the American Academy of Psychiatry and the Law Online Supplement 2018, 46 (2). Available at: http://www.jaapl.org/content/46/2_Supplement.",2018-06-01 +32755498,Resting-State Functional Magnetic Resonance Imaging Connectivity Between Semantic and Phonological Regions of Interest May Inform Language Targets in Aphasia.,"Purpose Brain imaging has provided puzzle pieces in the understanding of language. In neurologically healthy populations, the structure of certain brain regions is associated with particular language functions (e.g., semantics, phonology). In studies on focal brain damage, certain brain regions or connections are considered sufficient or necessary for a given language function. However, few of these account for the effects of lesioned tissue on the ""functional"" dynamics of the brain for language processing. Here, functional connectivity (FC) among semantic-phonological regions of interest (ROIs) is assessed to fill a gap in our understanding about the neural substrates of impaired language and whether connectivity strength can predict language performance on a clinical tool in individuals with aphasia. Method Clinical assessment of language, using the Western Aphasia Battery-Revised, and resting-state functional magnetic resonance imaging data were obtained for 30 individuals with chronic aphasia secondary to left-hemisphere stroke and 18 age-matched healthy controls. FC between bilateral ROIs was contrasted by group and used to predict Western Aphasia Battery-Revised scores. Results Network coherence was observed in healthy controls and participants with stroke. The left-right premotor cortex connection was stronger in healthy controls, as reported by New et al. (2015) in the same data set. FC of (a) connections between temporal regions, in the left hemisphere and bilaterally, predicted lexical-semantic processing for auditory comprehension and (b) ipsilateral connections between temporal and frontal regions in both hemispheres predicted access to semantic-phonological representations and processing for verbal production. Conclusions Network connectivity of brain regions associated with semantic-phonological processing is predictive of language performance in poststroke aphasia. The most predictive connections involved right-hemisphere ROIs-particularly those for which structural adaptions are known to associate with recovered word retrieval performance. Predictions may be made, based on these findings, about which connections have potential as targets for neuroplastic functional changes with intervention in aphasia. Supplemental Material https://doi.org/10.23641/asha.12735785.",2020-08-05 +25143288,GlycoPattern: a web platform for glycan array mining.,"

Unlabelled

GlycoPattern is Web-based bioinformatics resource to support the analysis of glycan array data for the Consortium for Functional Glycomics. This resource includes algorithms and tools to discover structural motifs, a heatmap visualization to compare multiple experiments, hierarchical clustering of Glycan Binding Proteins with respect to their binding motifs and a structural search feature on the experimental data.

Availability and implementation

GlycoPattern is freely available on the Web at http://glycopattern.emory.edu with all major browsers supported.",2014-08-20 +25432973,"A maize database resource that captures tissue-specific and subcellular-localized gene expression, via fluorescent tags and confocal imaging (Maize Cell Genomics Database).","Maize is a global crop and a powerful system among grain crops for genetic and genomic studies. However, the development of novel biological tools and resources to aid in the functional identification of gene sequences is greatly needed. Towards this goal, we have developed a collection of maize marker lines for studying native gene expression in specific cell types and subcellular compartments using fluorescent proteins (FPs). To catalog FP expression, we have developed a public repository, the Maize Cell Genomics (MCG) Database, (http://maize.jcvi.org/cellgenomics), to organize a large data set of confocal images generated from the maize marker lines. To date, the collection represents major subcellular structures and also developmentally important progenitor cell populations. The resource is available to the research community, for example to study protein localization or interactions under various experimental conditions or mutant backgrounds. A subset of the marker lines can also be used to induce misexpression of target genes through a transactivation system. For future directions, the image repository can be expanded to accept new image submissions from the research community, and to perform customized large-scale computational image analysis. This community resource will provide a suite of new tools for gaining biological insights by following the dynamics of protein expression at the subcellular, cellular and tissue levels.",2014-11-27 +30705097,Asparagine levels in the cerebrospinal fluid of children with acute lymphoblastic leukemia treated with pegylated-asparaginase in the induction phase of the AIEOP-BFM ALL 2009 study.,"Asparagine levels in cerebrospinal fluid and serum asparaginase activity were monitored in children with acute lymphoblastic leukemia treated with pegylated-asparaginase. The drug was given intravenously at a dose of 2,500 IU/m2 on days 12 and 26. Serum and cerebrospinal fluid samples obtained on days 33 and 45 were analyzed centrally. Since physiological levels of asparagine in the cerebrospinal fluid of children and adolescents are 4-10 μmol/L, in this study asparagine depletion was considered complete when the concentration of asparagine was ≤0.2 μmol/L, i.e. below the lower limit of quantification of the assay used. Over 24 months 736 patients (AIEOP n=245, BFM n=491) and 903 cerebrospinal fluid samples (n=686 on day 33 and n=217 on day 45) were available for analysis. Data were analyzed separately for the AIEOP and BFM cohorts and yielded superimposable results. Independently of serum asparaginase activity levels, cerebrospinal fluid asparagine levels were significantly reduced during the investigated study phase but only 28% of analyzed samples showed complete asparagine depletion while relevant levels, ≥1 μmol/L, were still detectable in around 23% of them. Complete cerebrospinal fluid asparagine depletion was found in around 5-6% and 33-37% of samples at serum asparaginase activity levels <100 and ≥ 1,500 IU/L, respectively. In this study cerebrospinal fluid asparagine levels were reduced during pegylated-asparaginase treatment, but complete depletion was only observed in a minority of patients. No clear threshold of serum pegylated-asparaginase activity level resulting in complete cerebrospinal fluid asparagine depletion was identified. The consistency of the results found in the two independent data sets strengthen the observations of this study. Details of the treatment are available in the European Clinical Trials Database at https://www.clin-icaltrialsregister.eu/ctr-search/trial/2007-004270-43/IT.",2019-01-31 +28608363,"MiSynPat: An integrated knowledge base linking clinical, genetic, and structural data for disease-causing mutations in human mitochondrial aminoacyl-tRNA synthetases.","Numerous mutations in each of the mitochondrial aminoacyl-tRNA synthetases (aaRSs) have been implicated in human diseases. The mutations are autosomal and recessive and lead mainly to neurological disorders, although with pleiotropic effects. The processes and interactions that drive the etiology of the disorders associated with mitochondrial aaRSs (mt-aaRSs) are far from understood. The complexity of the clinical, genetic, and structural data requires concerted, interdisciplinary efforts to understand the molecular biology of these disorders. Toward this goal, we designed MiSynPat, a comprehensive knowledge base together with an ergonomic Web server designed to organize and access all pertinent information (sequences, multiple sequence alignments, structures, disease descriptions, mutation characteristics, original literature) on the disease-linked human mt-aaRSs. With MiSynPat, a user can also evaluate the impact of a possible mutation on sequence-conservation-structure in order to foster the links between basic and clinical researchers and to facilitate future diagnosis. The proposed integrated view, coupled with research on disease-related mt-aaRSs, will help to reveal new functions for these enzymes and to open new vistas in the molecular biology of the cell. The purpose of MiSynPat, freely available at http://misynpat.org, is to constitute a reference and a converging resource for scientists and clinicians.",2017-06-27 +32816833,Intrauterine Growth Restriction and Risk of Diverse Forms of Kidney Disease during the First 50 Years of Life.,"

Background and objectives

Previous studies have shown that individuals with low birth weight (LBW) or small for gestational age (SGA) have higher risk of kidney failure. This study investigates birth-related exposures and risk of CKD and other kidney diagnoses.

Design, setting, participant, & measurements

The Medical Birth Registry of Norway has registered extensive medical data on all births in Norway since 1967. The Norwegian Patient Registry has registered diagnostic codes for all admissions and outpatient visits to Norwegian hospitals since 2008. Data from these registries were linked, and risk of CKD and other groups of kidney disease were analyzed using logistic regression statistics. LBW (below the tenth percentile), SGA (birth weight below the tenth percentile for gestational age), and preterm birth (<37 weeks) were analyzed as exposures.

Results

A total of 2,663,010 individuals were included. After a mean follow-up of 26 years (maximum 50 years), 4495 had been diagnosed with CKD and 12,818 had been diagnosed with other groups of kidney disease. LBW was associated with an odds ratio (OR) for CKD of 1.72 (95% confidence interval [95% CI], 1.60 to 1.90), SGA with an OR of 1.79 (95% CI, 1.65 to 1.94), and preterm birth with an OR of 1.48 (95% CI, 1.33 to 1.66). Analyses using diagnosis of CKD at stages 3-5 as end point showed similar results. Results were similar for men and women. We analyzed adjusted ORs for other groups of kidney disease and found that LBW was associated with an adjusted OR of 1.44 (95% CI, 1.33 to 1.56) for acute kidney disease, 1.24 (95% CI, 1.14 to 1.36) for GN, 1.35 (95% CI, 1.17 to 1.56) for cystic kidney disease, and 1.15 (95% CI, 1.06 to 1.25) for kidney disease resulting from kidney or urinary tract malformations.

Conclusions

LBW, SGA, and preterm birth are associated with higher risk of CKD in the first 50 years of life. Risk of other groups of kidney disease was less pronounced.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_08_17_CJN04080320.mp3.",2020-08-17 +33575975,Identification and expression of adenosine deaminases acting on tRNA (ADAT) during early tail regeneration of the earthworm.,"

Background

RNA editing is a widespread phenomenon in all metazoans. One of the common RNA editing event is the chemical conversion of adenosine to inosine (A-to-I) catalyzed by adenosine deaminases acting on tRNA (ADAT). During D. melanogaster development, the ADAT1 transcript was found to localize mainly to the central nervous system including brain and ventral nerve cord during brain development. Although an earthworm adenosine deaminases acting on mRNA (ADAR) has been identified and its possible implication in earthworm regeneration has been investigated, there is little accumulated information on ADAT and tRNA editing in the annelid including terrestrial earthworms.

Objective

This study aimed to investigate the molecular characteristics and the expression pattern of earthworm ADAT during tail regeneration to understand its physiological significance.

Methods

Nucleotide sequence of Ean-ADAT was retrieved from the genome assembly of Eisenia andrei via Basic Local Alignment Search Tool (BLAST). The genome assembly of Eisenia andrei was downloaded from National Genomics Data Center ( http://bigd.big.ac.cn/gwh/ ). The alignment and phylogenetic relationship of the core deaminase domains of ADATs and ADARs were analyzed. Its temporal expression during early tail regeneration was measured using real-time PCR.

Results

The open reading frame of Ean-ADAT consists of 1719 nucleotides encoding 573 amino acids. Domain analysis indicates that Ean-ADAT has a deaminase domain composed of 498 amino acids and a predicted nuclear localization signal at the N-terminal. Its subcellular localization was predicted to be nuclear. The core deaminase region of Ean-ADAT encompasses the three active-site motifs, including zinc-chelating residues and a glutamate residue for catalytic activity. In addition, Ean-ADAT shares highly conserved RNA recognition region flanking the third cysteine of the deaminase motif with other ADAT1s even from the yeast. Multiple sequence alignment and phylogenetic analysis indicate that Ean-ADAT shows greater similarity to vertebrate ADARs than to yeast Tad1p. Ean-ADAT mRNA expression began to remarkably decrease before 12 h post-amputation, showing a tendency to gradual decrease until 7 dpa and then it slightly rebounded at 10 dpa.

Conclusions

Our results demonstrate that Ean-ADAT belongs to a class of ADAT1s and support the hypothesis of a common evolutionary origin for ADARs and ADATs. The temporal expression of Ean-ADAT could suggest that its activity is unrelated to the molecular mechanisms of dedifferentiation.",2021-02-11 +30382842,ISU FLUture: a veterinary diagnostic laboratory web-based platform to monitor the temporal genetic patterns of Influenza A virus in swine.,"

Background

Influenza A Virus (IAV) causes respiratory disease in swine and is a zoonotic pathogen. Uncontrolled IAV in swine herds not only affects animal health, it also impacts production through increased costs associated with treatment and prevention efforts. The Iowa State University Veterinary Diagnostic Laboratory (ISU VDL) diagnoses influenza respiratory disease in swine and provides epidemiological analyses on samples submitted by veterinarians.

Description

To assess the incidence of IAV in swine and inform stakeholders, the ISU FLUture website was developed as an interactive visualization tool that allows the exploration of the ISU VDL swine IAV aggregate data in the clinical diagnostic database. The information associated with diagnostic cases has varying levels of completeness and is anonymous, but minimally contains: sample collection date, specimen type, and IAV subtype. Many IAV positive samples are sequenced, and in these cases, the hemagglutinin (HA) sequence and genetic classification are completed. These data are collected and presented on ISU FLUture in near real-time, and more than 6,000 IAV positive diagnostic cases and their epidemiological and evolutionary information since 2003 are presented to date. The database and web interface provides rapid and unique insight into the trends of IAV derived from both large- and small-scale swine farms across the United States of America.

Conclusion

ISU FLUture provides a suite of web-based tools to allow stakeholders to search for trends and correlations in IAV case metadata in swine from the ISU VDL. Since the database infrastructure is updated in near real-time and is integrated within a high-volume veterinary diagnostic laboratory, earlier detection is now possible for emerging IAV in swine that subsequently cause vaccination and control challenges. The access to real-time swine IAV data provides a link with the national USDA swine IAV surveillance system and allows veterinarians to make objective decisions regarding the management and control of IAV in swine. The website is publicly accessible at http://influenza.cvm.iastate.edu .",2018-11-01 +31860075,iMIRAGE: an R package to impute microRNA expression using protein-coding genes.,"

Summary

MicroRNAs (miRNAs) are critical post-transcriptional regulators of gene expression. Due to challenges in accurate profiling of small RNAs, a vast majority of public transcriptome datasets lack reliable miRNA profiles. However, the biological consequence of miRNA activity in the form of altered protein-coding gene (PCG) expression can be captured using machine-learning algorithms. Here, we present iMIRAGE (imputed miRNA activity from gene expression), a convenient tool to predict miRNA expression using PCG expression of the test datasets. The iMIRAGE package provides an integrated workflow for normalization and transformation of miRNA and PCG expression data, along with the option to utilize predicted miRNA targets to impute miRNA activity from independent test PCG datasets.

Availability and implementation

The iMIRAGE package for R, along with package documentation and vignette, is available at https://aritronath.github.io/iMIRAGE/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +33229886,Biopsychosocial Factors Associated With Attention Problems in Children After Traumatic Brain Injury: A Systematic Review.,"

Objective

The aim of this review was to examine biopsychosocial factors associated with an increased risk of attention problems after a traumatic brain injury in children.

Design

A systematic review of the literature was conducted using data sources of MEDLINE, PsycINFO, and CINAHL up to August 30, 2020. Literature primarily examined pediatric patients with traumatic brain injury and attention problems. Risk factors for attention problems posttraumatic brain injury examined in all articles were identified and grouped into broad categories of biological, psychological, and social factors. Methodological quality of each study was assessed using the modified Downs and Black checklist. Preferred Reporting Items for Systematic Reviews and Meta-analyses (PRISMA) guidelines from 2009 were used in completing this review.

Results

Forty articles met inclusion criteria for this study. Overall findings were mixed but suggested that younger age at injury, presence of preinjury attention-deficit/hyperactivity disorder, poorer preinjury adaptive functioning, lower socioeconomic status, and poorer family functioning were associated with increased risk of developing attention problems posttraumatic brain injury.

Conclusions

Development of attention problems after pediatric traumatic brain injury is complex and influenced by an array of biologic, environmental/social, injury-related, and host factors. Evidence is mixed, and further study is needed to better understand the relationships between these factors and how they influence attention after traumatic brain injury. Nonetheless, screening for attention problems in children with risk factors may allow for earlier identification and intervention, minimizing negative impacts of attention problems after traumatic brain injury in children. Limitations of this study included heterogeneity of studies and overall low to moderate methodological quality of studies included as measured by the modified Downs and Black checklist.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME.

Cme objectives

Upon completion of this article, the reader should be able to: (1) Describe the importance of recognizing and identifying attention problems after traumatic brain injury in children; (2) Identify risk factors for development of attention problems after pediatric traumatic brain injury; and (3) Recognize gaps in existing literature regarding predictors of attention problems after pediatric traumatic brain injury.

Level

Advanced.

Accreditation

The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2021-03-01 +31494994,Plant Regulomics: a data-driven interface for retrieving upstream regulators from plant multi-omics data.,"High-throughput technology has become a powerful approach for routine plant research. Interpreting the biological significance of high-throughput data has largely focused on the functional characterization of a large gene list or genomic loci that involves the following two aspects: the functions of the genes or loci and how they are regulated as a whole, i.e. searching for the upstream regulators. Traditional platforms for functional annotation largely help resolving the first issue. Addressing the second issue is essential for a global understanding of the regulatory mechanism, but is more challenging, and requires additional high-throughput experimental evidence and a unified statistical framework for data-mining. The rapid accumulation of 'omics data provides a large amount of experimental data. We here present Plant Regulomics, an interface that integrates 19 925 transcriptomic and epigenomic data sets and diverse sources of functional evidence (58 112 terms and 695 414 protein-protein interactions) from six plant species along with the orthologous genes from 56 whole-genome sequenced plant species. All pair-wise transcriptomic comparisons with biological significance within the same study were performed, and all epigenomic data were processed to genomic loci targeted by various factors. These data were well organized to gene modules and loci lists, which were further implemented into the same statistical framework. For any input gene list or genomic loci, Plant Regulomics retrieves the upstream factors, treatments, and experimental/environmental conditions regulating the input from the integrated 'omics data. Additionally, multiple tools and an interactive visualization are available through a user-friendly web interface. Plant Regulomics is available at http://bioinfo.sibs.ac.cn/plant-regulomics.",2019-10-14 +30418626,dbPTM in 2019: exploring disease association and cross-talk of post-translational modifications.,"The dbPTM (http://dbPTM.mbc.nctu.edu.tw/) has been maintained for over 10 years with the aim to provide functional and structural analyses for post-translational modifications (PTMs). In this update, dbPTM not only integrates more experimentally validated PTMs from available databases and through manual curation of literature but also provides PTM-disease associations based on non-synonymous single nucleotide polymorphisms (nsSNPs). The high-throughput deep sequencing technology has led to a surge in the data generated through analysis of association between SNPs and diseases, both in terms of growth amount and scope. This update thus integrated disease-associated nsSNPs from dbSNP based on genome-wide association studies. The PTM substrate sites located at a specified distance in terms of the amino acids encoded from nsSNPs were deemed to have an association with the involved diseases. In recent years, increasing evidence for crosstalk between PTMs has been reported. Although mass spectrometry-based proteomics has substantially improved our knowledge about substrate site specificity of single PTMs, the fact that the crosstalk of combinatorial PTMs may act in concert with the regulation of protein function and activity is neglected. Because of the relatively limited information about concurrent frequency and functional relevance of PTM crosstalk, in this update, the PTM sites neighboring other PTM sites in a specified window length were subjected to motif discovery and functional enrichment analysis. This update highlights the current challenges in PTM crosstalk investigation and breaks the bottleneck of how proteomics may contribute to understanding PTM codes, revealing the next level of data complexity and proteomic limitation in prospective PTM research.",2019-01-01 +24531082,SMAL: A Resource of Spontaneous Mutation Accumulation Lines.,"Mutation is the ultimate source of genetic variation and evolution. Mutation accumulation (MA) experiments are an alternative approach to study de novo mutation events directly. We have constructed a resource of Spontaneous Mutation Accumulation Lines (SMAL; http://cefg.uestc.edu.cn/smal), which contains all the current publicly available MA lines identified by high-throughput sequencing. We have relocated and mapped the mutations based on the most recent genome annotations. A total of 5,608 single base mutations and 540 other mutations were obtained and are recorded in the current version of the SMAL database. The integrated data in SMAL provide detailed information that can be used in new theoretical analyses. We believe that the SMAL resource will help researchers better understand the processes of genetic variation and the incidence of disease.",2014-02-14 +33019422,Procedural and post-operative complications associated with laparoscopic versus open abdominal surgery for right-sided colonic cancer resection: A systematic review and meta-analysis.,"

Background

In this analysis, we aimed to systematically compare the procedural and post-operative complications (POC) associated with laparoscopic versus open abdominal surgery for right-sided colonic cancer resection.

Methods

We searched MEDLINE, http://www.ClinicalTrials.gov, EMBASE, Web of Science, Cochrane Central, and Google scholar for English studies comparing the POC in patients who underwent laparoscopic versus open surgery (OS) for right colonic cancer. Data were assessed by the Cochrane-based RevMan 5.4 software (The Cochrane Community, London, UK). Mean difference (MD) with 95% confidence intervals (CIs) were used to represent the results for continuous variables, whereas risk ratios (RR) with 95% CIs were used for dichotomous data.

Results

Twenty-six studies involving a total number of 3410 participants with right colonic carcinoma were included in this analysis. One thousand five hundred and fifteen participants were assigned to undergo invasive laparoscopic surgery whereas 1895 participants were assigned to the open abdominal surgery. Our results showed that the open resection was associated with a shorter length of surgery (MD: 48.63, 95% CI: 30.15-67.12; P = .00001) whereas laparoscopic intervention was associated with a shorter hospital stay [MD (-3.09), 95% CI [-5.82 to (-0.37)]; P = .03]. In addition, POC such as anastomotic leak (RR: 0.96, 95% CI: 0.60-1.55; P = .88), abdominal abscess (RR: 1.13, 95% CI: 0.52-2.49; P = .75), pulmonary embolism (RR: 0.40, 95% CI: 0.09-1.69; P = .21) and deep vein thrombosis (RR: 0.94, 95% CI: 0.39-2.28; P = .89) were not significantly different. Paralytic ileus (RR: 0.87, 95% CI: 0.67-1.11; P = .26), intra-abdominal infection (RR: 0.82, 95% CI: 0.15-4.48; P = .82), pulmonary complications (RR: 0.83, 95% CI: 0.57-1.20; P = .32), cardiac complications (RR: 0.73, 95% CI: 0.42-1.27; P = .27) and urological complications (RR: 0.83, 95% CI: 0.52-1.33; P = .44) were also similarly manifested. Our analysis also showed 30-day re-admission and re-operation, and mortality to be similar between laparoscopic versus OS for right colonic carcinoma resection. However, surgical wound infection (RR: 0.65, 95% CI: 0.50-0.86; P = .002) was significantly higher with the OS.

Conclusions

In conclusion, laparoscopic surgery was almost comparable to OS in terms of post-operative outcomes for right-sided colonic cancer resection and was not associated with higher unwanted outcomes. Therefore, laparoscopic intervention should be considered as safe as the open abdominal surgery for right-sided colonic cancer resection, with a decreased hospital stay.",2020-10-01 +28584021,The Landscape of Isoform Switches in Human Cancers.,"Alternative usage of transcript isoforms from the same gene has been hypothesized as an important feature in cancers. However, differential usage of gene transcripts between conditions (isoform switching) has not been comprehensively characterized in and across cancer types. To this end, we developed methods for identification and visualization of isoform switches with predicted functional consequences. Using these methods, we characterized isoform switching in RNA-seq data from >5,500 cancer patients covering 12 solid cancer types. Isoform switches with potential functional consequences were common, affecting approximately 19% of multiple transcript genes. Among these, isoform switches leading to loss of DNA sequence encoding protein domains were more frequent than expected, particularly in pancancer switches. We identified several isoform switches as powerful biomarkers: 31 switches were highly predictive of patient survival independent of cancer types. Our data constitute an important resource for cancer researchers, available through interactive web tools. Moreover, our methods, available as an R package, enable systematic analysis of isoform switches from other RNA-seq datasets.Implications: This study indicates that isoform switches with predicted functional consequences are common and important in dysfunctional cells, which in turn means that gene expression should be analyzed at the isoform level. Visual Overview: http://mcr.aacrjournals.org/content/molcanres/15/9/1206/F1.large.jpg.Mol Cancer Res; 15(9); 1206-20. ©2017 AACR.",2017-06-05 +,"Opening Pandora's box of Pristocerinae: molecular and morphological phylogenies of Apenesia (Hymenoptera, Bethylidae) reveal several hidden genera","The flat wasp Apenesia Westwood is one of the largest genera in Pristocerinae with 190 species worldwide. The lack of a single diagnostic morphological character has resulted in many distinct ground plans within Apenesia. Some authors have suggested Apenesia as polyphyletic. Here we aimed to investigate and delimit the genus based on morphological and molecular data. We analysed 163 morphological characters on tnt using ‘traditional search’, whereas Bayesian (BI) and maximum likelihood (ML) inferences were performed on 1553 concatenated nucleotide base pairs from COI and 28S sequences. Apenesia was recovered as polyphyletic in all analyses with up to 10 different lineages spread throughout the tree. We mapped female morphological characters onto the molecular tree to enlighten morphological evolution in the apterous females. Based on our morphological and molecular analyses, and on the taxonomic revision of holotypes and specimens, we propose six new genera in the Pristocerinae and revalidate the generic status of two: Acrenesia gen.n., Austranesia gen.n., Cleistepyris stat. rev., Dracunesia gen.n., Eleganesia gen.n., Epynesia gen.n., Pristonesia gen.n., and Propristocera stat. rev. We also propose four new generic synonymies: Neoapenesia syn.n. under Apenesia, Dipristocera syn.n. under Cleistepyris, Afrocera syn.n. and Neopristocera syn.n. under Propristocera. At the species level, we indicate Apenesia minor syn.n. under Propristocera tagala. In all, 162 new combinations, eight reinstated combinations and one new name to avoid homonymy are proposed. Apenesia is now defined as flat wasps having males with the mesoscutum gibbous, the genitalia with the paramere narrow and densely hairy, and the aedeagus with the ventral apical lobe elliptical and covered with lumps. Females of Apenesia can be distinguished from other Pristocerinae by having the head wider than the mesosoma, the antennae short, the mandible long, and the clypeus surpassing the toruli in the frons. This study reinforces the difficulties in defining Apenesia and other Pristocerinae genera. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:65FB3087‐0F30‐4851‐B1BA‐ED6E4518B958.",2018-07-01 +31799603,"mitoXplorer, a visual data mining platform to systematically analyze and visualize mitochondrial expression dynamics and mutations.","Mitochondria participate in metabolism and signaling. They adapt to the requirements of various cell types. Publicly available expression data permit to study expression dynamics of genes with mitochondrial function (mito-genes) in various cell types, conditions and organisms. Yet, we lack an easy way of extracting these data for mito-genes. Here, we introduce the visual data mining platform mitoXplorer, which integrates expression and mutation data of mito-genes with a manually curated mitochondrial interactome containing ∼1200 genes grouped in 38 mitochondrial processes. User-friendly analysis and visualization tools allow to mine mitochondrial expression dynamics and mutations across various datasets from four model species including human. To test the predictive power of mitoXplorer, we quantify mito-gene expression dynamics in trisomy 21 cells, as mitochondrial defects are frequent in trisomy 21. We uncover remarkable differences in the regulation of the mitochondrial transcriptome and proteome in one of the trisomy 21 cell lines, caused by dysregulation of the mitochondrial ribosome and resulting in severe defects in oxidative phosphorylation. With the newly developed Fiji plugin mitoMorph, we identify mild changes in mitochondrial morphology in trisomy 21. Taken together, mitoXplorer (http://mitoxplorer.ibdm.univ-mrs.fr) is a user-friendly, web-based and freely accessible software, aiding experimental scientists to quantify mitochondrial expression dynamics.",2020-01-01 +28763057,BECon: a tool for interpreting DNA methylation findings from blood in the context of brain.,"Tissue differences are one of the largest contributors to variability in the human DNA methylome. Despite the tissue-specific nature of DNA methylation, the inaccessibility of human brain samples necessitates the frequent use of surrogate tissues such as blood, in studies of associations between DNA methylation and brain function and health. Results from studies of surrogate tissues in humans are difficult to interpret in this context, as the connection between blood-brain DNA methylation is tenuous and not well-documented. Here, we aimed to provide a resource to the community to aid interpretation of blood-based DNA methylation results in the context of brain tissue. We used paired samples from 16 individuals from three brain regions and whole blood, run on the Illumina 450 K Human Methylation Array to quantify the concordance of DNA methylation between tissues. From these data, we have made available metrics on: the variability of cytosine-phosphate-guanine dinucleotides (CpGs) in our blood and brain samples, the concordance of CpGs between blood and brain, and estimations of how strongly a CpG is affected by cell composition in both blood and brain through the web application BECon (Blood-Brain Epigenetic Concordance; https://redgar598.shinyapps.io/BECon/). We anticipate that BECon will enable biological interpretation of blood-based human DNA methylation results, in the context of brain.",2017-08-01 +35116854,Construction and analysis of lncRNA-associated ceRNA network identified potential prognostic biomarker in gastric cancer.,"

Background

Long non-coding RNAs (lncRNAs) are defined as non-coding RNA (ncRNA) with transcripts longer than 200 nucleotides with tissue specificity. Recently it has been found participate in cancer tumorigenesis and progression via transcriptional regulation, post-transcriptional regulation and epigenetic gene regulation. Competitive endogenous RNA (ceRNA) hypothesis assume that lncRNAs compete the target RNA by sponging the common miRNA response elements (MREs) to complete the post-transcriptional regulation. To explore the function and mechanisms of lncRNAs as ceRNAs in gastric cancer (GC), this study performed a genome-wide analysis.

Methods

The lncRNAs, mRNAs and microRNAs (miRNAs) profiles of 375 GC samples and 32 normal samples were obtained from The Cancer Genome Atlas (TCGA) Stomach Adenocarcinoma (STAD) datasets. The data was standardized with a cross match in the miRBase (a database at http://www.mirbase.org/), which made 365 samples as the analysis objects. We identify differentially expressed RNAs (DERNAs), including differentially expressed mRNAs (DEmRNAs), differentially expressed miRNAs (DEmiRNAs) and differentially expressed lncRNAs (DElncRNAs) by applying edge R package with thresholds of |log2FC| >2 and false discovery rate (FDR) <0.01. The potential RNAs for the gastric ceRNA network were screened out from the DERNAs based on ""ceRNA hypothesis"". The further construction of the network and analysis of its topological properties were performed by Cytoscape. Gene oncology (GO) function enrichment was analyzed by BINGO plugin of Cytoscape. Survival analysis was estimated according to Kaplan-Meier curve analysis.

Results

The constructed gastric ceRNA network involved 61 mRNAs, 44 lncRNAs and 22 miRNAs. Five lncRNAs out of the DElncRNAs, namely MIR100HG, MAGI2-AS3, AC080038.1, AC010478.1 and MEF2C-AS1, were found mostly involved in the network. The lncRNA AL139147 were detected negatively correlated with overall survival (log-rank, P<0.05).

Conclusions

In conclusion, our study identified promising lncRNAs, which might be potential diagnostic biomarker and therapeutic targets and contribute to further understanding of the ceRNA pathogenesis in GC and guide for further investigation.",2019-08-01 +33792448,How Do Clinical Electives during the Clerkship Year Influence Career Exploration? A Qualitative Study.,"Problem: Although many students begin medical school with some idea of their specialty interest, up to 80% of these students choose a different specialty by their final year. This pivot tends to happen in the clerkship year, when students are immersed in the clinical environment, gaining a practical understanding of the day-to-day work in different fields. Yet, in this year students have limited experiences with specialties. Clinical electives during the clerkship year may aid students in career development. The authors examined student career exploration through the lens of social cognitive career theory (SCCT). SCCT posits three variables that influence career development: personal goals, self-efficacy, and understanding outcome expectations. With this framework, the authors sought to understand how a program of clinical electives during the clerkship year influences students' perceptions of their career exploration. We aimed to: (1) describe an innovative clerkship elective program designed for career exploration, and (2) explore how this influenced students' career exploration using qualitative analysis. Intervention: Beginning in 2018, students at our institution were required to participate in three 2-week clinical electives during their clerkship year, called Clinical Immersive Experiences (CIExes). CIExes were categorized into four different types: apprenticeship, clinical skills building, integrative (multi-disciplinary), or subspecialty. Authors invited fourth year students to participate in interviews (January to March 2019) about how they selected electives and how these electives contributed to their career exploration. Interviews continued until reaching information sufficiency. Authors coded and analyzed transcripts using template analysis. Context: This curricular intervention took place in the context of large-scale curricular redesign. Students began clerkships partway into their second year of medical school. The family and community medicine clerkship, which was previously a 6-week core clerkship, was changed to a longitudinal format, thus freeing up 6 weeks for electives. Other core clerkships included anesthesia (2 weeks), medicine (8 weeks), neurology (4 weeks), obstetrics and gynecology (6 weeks), pediatrics (6 weeks), psychiatry (4 weeks), and surgery (8 weeks). Impact: From 15 student interviews, we identified three major themes. First, CIExes facilitated personalized career exploration. All students felt that at least one elective helped them solidify their decision about a specialty choice. Second, CIExes promoted focused learning and skills development that complemented core rotations. They noted the benefit of positive relationships with supervisors, particularly attendings, during these electives. Third, students highlighted how these electives fostered a positive learning environment and enhanced wellbeing. SCCT clarified how the CIEx program helped students advance their personal goals, self-efficacy, and outcome expectations during a pivotal time in medical school. Lessons Learned: We learned that from the student perspective, the inclusion of clinical electives in the clerkship year benefited students' career exploration by helping them develop and refine their career goals, increase self-efficacy, and test outcome expectations in a meaningful way as anticipated from SCCT. In addition, we found that CIExes created a positive learning environment that allowed deep relationships to develop in fields of interest and that supported a strong sense of wellbeing. Supplemental data for this article is available online at https://doi.org/10.1080/10401334.2021.1891545.",2021-04-01 +32026623,Congenital heart disease in low-and-middle-income countries: Focus on sub-Saharan Africa.,"The etiology of congenital heart disease (CHD) is multifactorial. The birth prevalence of CHD is shaped by a wide variety of maternal, fetal, and neonatal risk factors, along with the rates of prenatal diagnosis and terminations of pregnancy, all of which have geographic variability Epidemiology data availability from low-and-middle-income countries (LMIC) on CHD prevalence, morbidity, and mortality are far more limited than from high income countries. Data on specific genetic, environmental, and prenatal risk associated with CHD are almost nonexistent. In this article, we will focus on defining what data are available, genetic risk factors, birth and overall prevalence, morbidity, and the impact of limited access to interventions, both surgery and cardiac catheterizations. We will highlight CHD in sub-Saharan Africa to detail epidemiology studies in the poorest regions of the world. Existing literature as well as estimates from the Global Burden of Disease Study (http://ghdx.healthdata.org) form the basis for this review. The intersection of poverty, high fertility rates, and limited access to care results in a unique profile of CHD in LMIC. CHD is not a preventable disease (by most standards), so early detection and access are our key interventions to improve the dire outcomes for children in low-resources settings of the world.",2020-02-06 +32250452,Electronic personal assessment questionnaire for vascular conditions (ePAQ-VAS): development and validity.,"

Background

This paper describes the development and validation of an electronic personal assessment questionnaire for vascular conditions (ePAQ-VAS) that captures the symptomatology, quality of life and clinically relevant data of patients presenting to vascular services.

Methods

A two-stage survey was conducted in patients attending a tertiary vascular department. Patients completed the ePAQ-VAS remotely online, or on site using an electronic tablet. In the first stage of the survey, the responses were used to perform confirmatory factor analysis to assess the construct validity and remove redundant items. The internal reliability of disease-specific scales was investigated. In the second stage of the survey, the acceptability, known-group validity, test-retest reliability, and responsiveness of ePAQ-VAS was assessed.

Results

In total, 721 patients completed ePAQ-VAS. Their mean(s.d.) age was 63·5(15·7) years and 468 (64·9 per cent) were men. Some 553 patients (76·7 per cent) completed the questionnaire in clinic and the remainder completed the questionnaire online. The results of the confirmatory factor analysis confirmed the conceptual model for ePAQ-VAS structure and eliminated six items. Internal reliability was acceptable for all the scales (Cronbach's α greater than 0·7). The test-retest reliability measured by the intraclass correlation coefficient ranged from 0·65 to 0·99. The results showed that the instrument was responsive over time with the standardized response mean ranging from 0·69 to 1·60.

Conclusion

ePAQ-VAS is a holistic data-collection process that is relevant to vascular service users and has potential to contribute to patient-focused care and the collection of aggregate data for service evaluation. A demonstration version of the final version of ePAQ can be viewed at http://demo-questionnaire.epaq.co.uk/home/project?id=VASC_1.7&page=1.",2020-04-06 +33245232,Automated Identification of Molecular Crystals' Packing Motifs.,"Packing motifs-patterns in how molecules orient relative to one another in a crystal structure-are an important concept in many subdisciplines of materials science because of correlations observed between specific packing motifs and properties of interest. That said, packing motif data sets have remained small and noisy due to intensive manual labeling processes and insufficient labeling schemes. The most prominent labeling algorithms calculate relative interplanar angles of nearest neighbor molecules to determine the packing motif of a molecular crystal, but this simple approach can fail when neighbors are naively sampled isotropically around the crystal structure. To remedy this issue, we propose an optimization algorithm, which rotates the molecular crystal structure to find representative molecules that inform the packing motif. We package this algorithm into an automated framework-Autopack-which both optimally rotates the crystal structure and labels the packing motif based on the appropriate neighboring molecules. In this work, we detail the Autopack framework and its performance, which shows improvements compared to previous state-of-the-art labeling methods, providing the first quantitative point of comparison for packing motif labeling algorithms. Furthermore, using Autopack (available at https://ipo.llnl.gov/technologies/software/autopack), we perform the first large-scale study of potential relationships between chemicals' compositions and packing motifs, which shows that these relationships are more complex than previously hypothesized from studies that used only tens of polycyclic aromatic hydrocarbon molecules. Autopack's capabilities help pose next steps for crystal engineering research focusing not only on a molecule's adoption of a specific packing motif but also on new structure-property relationships.",2020-11-27 +27297607,Developing educational resources for population genetics in R: an open and collaborative approach.,"The r computing and statistical language community has developed a myriad of resources for conducting population genetic analyses. However, resources for learning how to carry out population genetic analyses in r are scattered and often incomplete, which can make acquiring this skill unnecessarily difficult and time consuming. To address this gap, we developed an online community resource with guidance and working demonstrations for conducting population genetic analyses in r. The resource is freely available at http://popgen.nescent.org and includes material for both novices and advanced users of r for population genetics. To facilitate continued maintenance and growth of this resource, we developed a toolchain, process and conventions designed to (i) minimize financial and labour costs of upkeep; (ii) to provide a low barrier to contribution; and (iii) to ensure strong quality assurance. The toolchain includes automatic integration testing of every change and rebuilding of the website when new vignettes or edits are accepted. The process and conventions largely follow a common, distributed version control-based contribution workflow, which is used to provide and manage open peer review by designated website editors. The online resources include detailed documentation of this process, including video tutorials. We invite the community of population geneticists working in r to contribute to this resource, whether for a new use case of their own, or as one of the vignettes from the 'wish list' we maintain, or by improving existing vignettes.",2016-07-12 +33294132,autoRPA: A web server for constructing cancer staging models by recursive partitioning analysis.,"Cancer staging provides a common language that is used to describe the severity of an individual's cancer, which plays a critical role in optimizing cancer treatment. Recursive partitioning analysis (RPA) is the most widely accepted method for cancer staging. Despite its widespread use, to date, only limited tools have been developed to implement the RPA algorithm for cancer staging. Moreover, most of the available tools can be accessed only from command lines and also lack visualization, making them difficult for clinical investigators without programing skills to use. Therefore, we developed a web server called autoRPA that is dedicated to supporting the construction of prognostic staging models and performance comparisons among different staging models. Based on the RPA algorithm and log-rank test statistics, autoRPA can establish a decision-making tree from survival data and provide clinicians an intuitive method to further prune the decision tree. Moreover, autoRPA can evaluate the contribution of each submitted covariate that is involved in the grouping process and help identify factors that significantly contribute to cancer staging. Four indicators, including hazard consistency, hazard discrimination, percentage of variation explained, and sample size balance, are introduced to validate the performance of the designed staging models. In addition, autoRPA can also be used to compare the performance of different prognostic staging models using a standard bootstrap evaluation method. The web server of autoRPA is freely available at http://rpa.renlab.org.",2020-11-10 +32348325,QUATgo: Protein quaternary structural attributes predicted by two-stage machine learning approaches with heterogeneous feature encoding.,"Many proteins exist in natures as oligomers with various quaternary structural attributes rather than as single chains. Predicting these attributes is an essential task in computational biology for the advancement of proteomics. However, the existing methods do not consider the integration of heterogeneous coding and the accuracy of subunit categories with limited data. To this end, we proposed a tool that can predict more than 12 subunit protein oligomers, QUATgo. Meanwhile, three kinds of sequence coding were used, including dipeptide composition, which was used for the first time to predict protein quaternary structural attributes, and protein half-life characteristics, and we modified the coding method of the functional domain composition proposed by predecessors to solve the problem of large feature vectors. QUATgo solves the problem of insufficient data for a single subunit using a two-stage architecture and uses 10-fold cross-validation to test the predictive accuracy of the classifier. QUATgo has 49.0% cross-validation accuracy and 31.1% independent test accuracy. In the case study, the accuracy of QUATgo can reach 61.5% for predicting the quaternary structure of influenza virus hemagglutinin proteins. Finally, QUATgo is freely accessible to the public as a web server via the site http://predictor.nchu.edu.tw/QUATgo.",2020-04-29 +31621885,An independently validated survival nomogram for lower-grade glioma.,"

Background

Gliomas are the most common primary malignant brain tumor. Diffuse low-grade and intermediate-grade gliomas, which together compose the lower-grade gliomas (LGGs; World Health Organization [WHO] grades II and III), present a therapeutic challenge to physicians due to the heterogeneity of their clinical behavior. Nomograms are useful tools for individualized estimation of survival. This study aimed to develop and independently validate a survival nomogram for patients with newly diagnosed LGG.

Methods

Data were obtained for newly diagnosed LGG patients from The Cancer Genome Atlas (TCGA) and the Ohio Brain Tumor Study (OBTS) with the following variables: tumor grade (II or III), age at diagnosis, sex, Karnofsky performance status (KPS), and molecular subtype (IDH mutant with 1p/19q codeletion [IDHmut-codel], IDH mutant without 1p/19q codeletion, and IDH wild-type). Survival was assessed using Cox proportional hazards regression, random survival forests, and recursive partitioning analysis, with adjustment for known prognostic factors. The models were developed using TCGA data and independently validated using the OBTS data. Models were internally validated using 10-fold cross-validation and externally validated with calibration curves.

Results

A final nomogram was validated for newly diagnosed LGG. Factors that increased the probability of survival included grade II tumor, younger age at diagnosis, having a high KPS, and the IDHmut-codel molecular subtype.

Conclusions

A nomogram that calculates individualized survival probabilities for patients with newly diagnosed LGG could be useful to health care providers for counseling patients regarding treatment decisions and optimizing therapeutic approaches. Free online software for implementing this nomogram is provided: https://hgittleman.shinyapps.io/LGG_Nomogram_H_Gittleman/.

Key points

1. A survival nomogram for lower-grade glioma patients has been developed and externally validated.2. Free online software for implementing this nomogram is provided allowing for ease of use by practicing health care providers.",2020-05-01 +29954441,"INSaFLU: an automated open web-based bioinformatics suite ""from-reads"" for influenza whole-genome-sequencing-based surveillance.","

Background

A new era of flu surveillance has already started based on the genetic characterization and exploration of influenza virus evolution at whole-genome scale. Although this has been prioritized by national and international health authorities, the demanded technological transition to whole-genome sequencing (WGS)-based flu surveillance has been particularly delayed by the lack of bioinformatics infrastructures and/or expertise to deal with primary next-generation sequencing (NGS) data.

Results

We developed and implemented INSaFLU (""INSide the FLU""), which is the first influenza-oriented bioinformatics free web-based suite that deals with primary NGS data (reads) towards the automatic generation of the output data that are actually the core first-line ""genetic requests"" for effective and timely influenza laboratory surveillance (e.g., type and sub-type, gene and whole-genome consensus sequences, variants' annotation, alignments and phylogenetic trees). By handling NGS data collected from any amplicon-based schema, the implemented pipeline enables any laboratory to perform multi-step software intensive analyses in a user-friendly manner without previous advanced training in bioinformatics. INSaFLU gives access to user-restricted sample databases and projects management, being a transparent and flexible tool specifically designed to automatically update project outputs as more samples are uploaded. Data integration is thus cumulative and scalable, fitting the need for a continuous epidemiological surveillance during the flu epidemics. Multiple outputs are provided in nomenclature-stable and standardized formats that can be explored in situ or through multiple compatible downstream applications for fine-tuned data analysis. This platform additionally flags samples as ""putative mixed infections"" if the population admixture enrolls influenza viruses with clearly distinct genetic backgrounds, and enriches the traditional ""consensus-based"" influenza genetic characterization with relevant data on influenza sub-population diversification through a depth analysis of intra-patient minor variants. This dual approach is expected to strengthen our ability not only to detect the emergence of antigenic and drug resistance variants but also to decode alternative pathways of influenza evolution and to unveil intricate routes of transmission.

Conclusions

In summary, INSaFLU supplies public health laboratories and influenza researchers with an open ""one size fits all"" framework, potentiating the operationalization of a harmonized multi-country WGS-based surveillance for influenza virus. INSaFLU can be accessed through https://insaflu.insa.pt .",2018-06-29 +33461600,Prediction and mechanistic analysis of drug-induced liver injury (DILI) based on chemical structure.,"

Background

Drug-induced liver injury (DILI) is a major safety concern characterized by a complex and diverse pathogenesis. In order to identify DILI early in drug development, a better understanding of the injury and models with better predictivity are urgently needed. One approach in this regard are in silico models which aim at predicting the risk of DILI based on the compound structure. However, these models do not yet show sufficient predictive performance or interpretability to be useful for decision making by themselves, the former partially stemming from the underlying problem of labeling the in vivo DILI risk of compounds in a meaningful way for generating machine learning models.

Results

As part of the Critical Assessment of Massive Data Analysis (CAMDA) ""CMap Drug Safety Challenge"" 2019 ( http://camda2019.bioinf.jku.at ), chemical structure-based models were generated using the binarized DILIrank annotations. Support Vector Machine (SVM) and Random Forest (RF) classifiers showed comparable performance to previously published models with a mean balanced accuracy over models generated using 5-fold LOCO-CV inside a 10-fold training scheme of 0.759 ± 0.027 when predicting an external test set. In the models which used predicted protein targets as compound descriptors, we identified the most information-rich proteins which agreed with the mechanisms of action and toxicity of nonsteroidal anti-inflammatory drugs (NSAIDs), one of the most important drug classes causing DILI, stress response via TP53 and biotransformation. In addition, we identified multiple proteins involved in xenobiotic metabolism which could be novel DILI-related off-targets, such as CLK1 and DYRK2. Moreover, we derived potential structural alerts for DILI with high precision, including furan and hydrazine derivatives; however, all derived alerts were present in approved drugs and were over specific indicating the need to consider quantitative variables such as dose.

Conclusion

Using chemical structure-based descriptors such as structural fingerprints and predicted protein targets, DILI prediction models were built with a predictive performance comparable to previous literature. In addition, we derived insights on proteins and pathways statistically (and potentially causally) linked to DILI from these models and inferred new structural alerts related to this adverse endpoint.",2021-01-18 +31054477,"ARGA, a pipeline for primer evaluation on antibiotic resistance genes.","Molecular biology techniques have assisted in the investigation of antibiotic resistance genes (ARGs) from various environments. However, their accuracy relies on primer quality and data interpretation, both of which require a full-coverage sequence database for ARGs. Here, based upon the abandoned Antibiotic Resistance Genes Database (ARDB), we created an updated sequence database of antibiotic resistance genes (SDARG). A total of 1,260,069 protein sequences and 1,164,479 nucleotide sequences, 56 times more sequences than ARDB, from 448 types of ARGs (enabling resistance to 18 categories of antibiotics) were collected and integrated with different hierarchical credibility and full-scale taxonomic information. Based on this comprehensive sequence database, an online pipeline - ARG analyzer (ARGA, http://mem.rcees.ac.cn:8083/) was developed to assess current ARGs primers, as well as annotate ARGs from environmental metagenomes. Thereafter, a list of 658 published primer pairs, targeting 173 ARGs, was evaluated using ARGA and integrated in ARGA as ARGs primer database. The results showed that 65.05% primers are of high specificity (≥90%), while only 29.79% primers cover >50% of targeted sequences, indicating a divergence in the quality of current ARG primers. Hence, primer assessment or redesign is highly recommended to improve the accuracy of ARGs studies. ARGs primer database was attached in ARGA to provide researchers alternatives to better survey ARGs in the environment.",2019-05-03 +33507892,"Allergic Reactions Including Anaphylaxis After Receipt of the First Dose of Moderna COVID-19 Vaccine - United States, December 21, 2020-January 10, 2021.","As of January 20, 2021, a total of 24,135,690 cases of coronavirus disease 2019 (COVID-19) and 400,306 associated deaths had been reported in the United States (https://covid.cdc.gov/covid-data-tracker/#cases_casesper100klast7days). On December 18, 2020, the Food and Drug Administration (FDA) issued an Emergency Use Authorization (EUA) for Moderna COVID-19 vaccine administered as 2 doses, 1 month apart to prevent COVID-19. On December 19, 2020, the Advisory Committee on Immunization Practices (ACIP) issued an interim recommendation for use of Moderna COVID-19 vaccine (1). As of January 10, 2021, a reported 4,041,396 first doses of Moderna COVID-19 vaccine had been administered in the United States, and reports of 1,266 (0.03%) adverse events after receipt of Moderna COVID-19 vaccine were submitted to the Vaccine Adverse Event Reporting System (VAERS). Among these, 108 case reports were identified for further review as possible cases of severe allergic reaction, including anaphylaxis. Anaphylaxis is a life-threatening allergic reaction that occurs rarely after vaccination, with onset typically within minutes to hours (2). Among these case reports, 10 cases were determined to be anaphylaxis (a rate of 2.5 anaphylaxis cases per million Moderna COVID-19 vaccine doses administered), including nine in persons with a documented history of allergies or allergic reactions, five of whom had a previous history of anaphylaxis. The median interval from vaccine receipt to symptom onset was 7.5 minutes (range = 1-45 minutes). Among eight persons with follow-up information available, all had recovered or been discharged home. Among the remaining case reports that were determined not to be anaphylaxis, 47 were assessed to be nonanaphylaxis allergic reactions, and 47 were considered nonallergic adverse events. For four case reports, investigators have been unable to obtain sufficient information to assess the likelihood of anaphylaxis. This report summarizes the clinical and epidemiologic characteristics of case reports of allergic reactions, including anaphylaxis and nonanaphylaxis allergic reactions, after receipt of the first dose of Moderna COVID-19 vaccine during December 21, 2020-January 10, 2021, in the United States. CDC has issued updated interim clinical considerations for use of mRNA COVID-19 vaccines currently authorized in the United States (3) and interim considerations for preparing for the potential management of anaphylaxis (4).",2021-01-29 +30894839,Validation of a Bioinformatics Workflow for Routine Analysis of Whole-Genome Sequencing Data and Related Challenges for Pathogen Typing in a European National Reference Center: Neisseria meningitidis as a Proof-of-Concept.,"Despite being a well-established research method, the use of whole-genome sequencing (WGS) for routine molecular typing and pathogen characterization remains a substantial challenge due to the required bioinformatics resources and/or expertise. Moreover, many national reference laboratories and centers, as well as other laboratories working under a quality system, require extensive validation to demonstrate that employed methods are ""fit-for-purpose"" and provide high-quality results. A harmonized framework with guidelines for the validation of WGS workflows does currently, however, not exist yet, despite several recent case studies highlighting the urgent need thereof. We present a validation strategy focusing specifically on the exhaustive characterization of the bioinformatics analysis of a WGS workflow designed to replace conventionally employed molecular typing methods for microbial isolates in a representative small-scale laboratory, using the pathogen Neisseria meningitidis as a proof-of-concept. We adapted several classically employed performance metrics specifically toward three different bioinformatics assays: resistance gene characterization (based on the ARG-ANNOT, ResFinder, CARD, and NDARO databases), several commonly employed typing schemas (including, among others, core genome multilocus sequence typing), and serogroup determination. We analyzed a core validation dataset of 67 well-characterized samples typed by means of classical genotypic and/or phenotypic methods that were sequenced in-house, allowing to evaluate repeatability, reproducibility, accuracy, precision, sensitivity, and specificity of the different bioinformatics assays. We also analyzed an extended validation dataset composed of publicly available WGS data for 64 samples by comparing results of the different bioinformatics assays against results obtained from commonly used bioinformatics tools. We demonstrate high performance, with values for all performance metrics >87%, >97%, and >90% for the resistance gene characterization, sequence typing, and serogroup determination assays, respectively, for both validation datasets. Our WGS workflow has been made publicly available as a ""push-button"" pipeline for Illumina data at https://galaxy.sciensano.be to showcase its implementation for non-profit and/or academic usage. Our validation strategy can be adapted to other WGS workflows for other pathogens of interest and demonstrates the added value and feasibility of employing WGS with the aim of being integrated into routine use in an applied public health setting.",2019-03-06 +26929789,Phonological Awareness at 5 years of age in Children who use Hearing Aids or Cochlear Implants.,"Children with hearing loss typically underachieve in reading, possibly as a result of their underdeveloped phonological skills. This study addressed the questions of whether the development of phonological awareness (PA) is influenced by 1) the degree of hearing loss; and 2) whether performance of children with severe-profound hearing loss differed according to the hearing devices used. Drawing on data collected as part of the Longitudinal Outcomes of Children with Hearing Impairment (LOCHI, www.

Outcomes

nal.gov.au) study, the authors found that sound-matching scores of children with hearing loss ranging from mild to profound degrees were, on average, within the normal range. The degree of hearing loss did not have a significant impact on scores, but there was a non-significant tendency for the proportion of children who achieved zero scores to increase with increase in hearing loss. For children with severe hearing loss, there was no significant group difference in scores among children who used bilateral hearing aids, bimodal fitting (a cochlear implant and a hearing aid in contralateral ears), and bilateral cochlear implants. Although there is a need for further prospective research, professionals have an important role in targeting PA skills for rehabilitation of young children with hearing loss.",2015-09-01 +30020956,ProfPPIdb: Pairs of physical protein-protein interactions predicted for entire proteomes.,"

Motivation

Protein-protein interactions (PPIs) play a key role in many cellular processes. Most annotations of PPIs mix experimental and computational data. The mix optimizes coverage, but obfuscates the annotation origin. Some resources excel at focusing on reliable experimental data. Here, we focused on new pairs of interacting proteins for several model organisms based solely on sequence-based prediction methods.

Results

We extracted reliable experimental data about which proteins interact (binary) for eight diverse model organisms from public databases, namely from Escherichia coli, Schizosaccharomyces pombe, Plasmodium falciparum, Drosophila melanogaster, Caenorhabditis elegans, Mus musculus, Rattus norvegicus, Arabidopsis thaliana, and for the previously used Homo sapiens and Saccharomyces cerevisiae. Those data were the base to develop a PPI prediction method for each model organism. The method used evolutionary information through a profile-kernel Support Vector Machine (SVM). With the resulting eight models, we predicted all possible protein pairs in each organism and made the top predictions available through a web application. Almost all of the PPIs made available were predicted between proteins that have not been observed in any interaction, in particular for less well-studied organisms. Thus, our work complements existing resources and is particularly helpful for designing experiments because of its uniqueness. Experimental annotations and computational predictions are strongly influenced by the fact that some proteins have many partners and others few. To optimize machine learning, recent methods explicitly ignored such a network-structure and rely either on domain knowledge or sequence-only methods. Our approach is independent of domain-knowledge and leverages evolutionary information. The database interface representing our results is accessible from https://rostlab.org/services/ppipair/. The data can also be downloaded from https://figshare.com/collections/ProfPPI-DB/4141784.",2018-07-18 +30574164,VarQ: A Tool for the Structural and Functional Analysis of Human Protein Variants.,"Understanding the functional effect of Single Amino acid Substitutions (SAS), derived from the occurrence of single nucleotide variants (SNVs), and their relation to disease development is a major issue in clinical genomics. Despite the existence of several bioinformatic algorithms and servers that predict if a SAS is pathogenic or not, they give little or no information at all on the reasons for pathogenicity prediction and on the actual predicted effect of the SAS on the protein function. Moreover, few actual methods take into account structural information when available for automated analysis. Moreover, many of these algorithms are able to predict an effect that no necessarily translates directly into pathogenicity. VarQ is a bioinformatic pipeline that incorporates structural information for the detailed analysis and prediction of SAS effect on protein function. It is an online tool which uses UniProt id and automatically analyzes known and user provided SAS for their effect on protein activity, folding, aggregation and protein interactions, among others. We show that structural information, when available, can improve the SAS pathogenicity diagnosis and more important explain its causes. We show that VarQ is able to correctly reproduce previous analysis of RASopathies related mutations, saving extensive and time consuming manual curation. VarQ assessment was performed over a set of previously manually curated RASopathies (diseases that affects the RAS/MAPK signaling pathway) related variants, showing its ability to correctly predict the phenotypic outcome and its underlying cause. This resource is available online at http://varq.qb.fcen.uba.ar/. Supporting Information & Tutorials may be found in the webpage of the tool.",2018-12-06 +25510499,VectorBase: an updated bioinformatics resource for invertebrate vectors and other organisms related with human diseases.,"VectorBase is a National Institute of Allergy and Infectious Diseases supported Bioinformatics Resource Center (BRC) for invertebrate vectors of human pathogens. Now in its 11th year, VectorBase currently hosts the genomes of 35 organisms including a number of non-vectors for comparative analysis. Hosted data range from genome assemblies with annotated gene features, transcript and protein expression data to population genetics including variation and insecticide-resistance phenotypes. Here we describe improvements to our resource and the set of tools available for interrogating and accessing BRC data including the integration of Web Apollo to facilitate community annotation and providing Galaxy to support user-based workflows. VectorBase also actively supports our community through hands-on workshops and online tutorials. All information and data are freely available from our website at https://www.vectorbase.org/.",2014-12-15 +,mvmapper: Interactive spatial mapping of genetic structures,"Characterizing genetic structure across geographic space is a fundamental challenge in population genetics. Multivariate statistical analyses are powerful tools for summarizing genetic variability, but geographic information and accompanying metadata are not always easily integrated into these methods in a user‐friendly fashion. Here, we present a deployable Python‐based web‐tool, mvmapper, for visualizing and exploring results of multivariate analyses in geographic space. This tool can be used to map results of virtually any multivariate analysis of georeferenced data, and routines for exporting results from a number of standard methods have been integrated in the R package adegenet, including principal components analysis (PCA), spatial PCA, discriminant analysis of principal components, principal coordinates analysis, nonmetric dimensional scaling and correspondence analysis. mvmapper's greatest strength is facilitating dynamic and interactive exploration of the statistical and geographic frameworks side by side, a task that is difficult and time‐consuming with currently available tools. Source code and deployment instructions, as well as a link to a hosted instance of mvmapper, can be found at https://popphylotools.github.io/mvMapper/.",2018-03-01 +32913487,An Online Interactive Video Vignette that Helps Students Learn Key Concepts of Fermentation and Respiration. ,"Topics related to energy transformation and metabolism are important parts of an undergraduate biology curriculum, but these are also topics that students traditionally struggle with. To address this, we have created a short online Interactive Video Vignette (IVV) called To Ferment or Not to Ferment: That is the Question. This IVV is designed to help students learn important ideas related to cellular respiration and metabolism. Students in various courses across four institutions were assigned the IVV as an out-of-class preinstruction homework assignment. To test the effectiveness of this IVV on student learning, we collected and analyzed data from questions embedded in the IVV, open response reflection questions, and pre- and postassessments from IVV watchers and nonwatchers. Our analysis revealed that students who completed the IVV activity interacted productively with this online tool and made significant learning gains on important topics related to cellular respiration and metabolism. This IVV is freely available via https://www.rit.edu/cos/interactive/MINT for instructors to adopt for class use.",2020-08-31 +30101161,Sustainability indicators for salmon aquaculture.,"In this paper, we present and describe data comprising indicators of sustainability, collected from eight of the major certification schemes for salmon aquaculture and categorized according to the topics covered by each. These indicators cover most aspects of aquaculture production, including biotic and abiotic effects, feed, emission and waste, fish health and welfare, social assurance, and respect for native culture. In addition to being published in its entirety as supplementary material alongside this article, the data is available through a searchable database on the SustainFish project site: https://sustainfish.wixsite.com/sustainfishproject/search-indicator-database.",2018-07-27 +32108862,Genome Detective Coronavirus Typing Tool for rapid identification and characterization of novel coronavirus genomes.,"

Summary

Genome detective is a web-based, user-friendly software application to quickly and accurately assemble all known virus genomes from next-generation sequencing datasets. This application allows the identification of phylogenetic clusters and genotypes from assembled genomes in FASTA format. Since its release in 2019, we have produced a number of typing tools for emergent viruses that have caused large outbreaks, such as Zika and Yellow Fever Virus in Brazil. Here, we present the Genome Detective Coronavirus Typing Tool that can accurately identify the novel severe acute respiratory syndrome (SARS)-related coronavirus (SARS-CoV-2) sequences isolated in China and around the world. The tool can accept up to 2000 sequences per submission and the analysis of a new whole-genome sequence will take approximately 1 min. The tool has been tested and validated with hundreds of whole genomes from 10 coronavirus species, and correctly classified all of the SARS-related coronavirus (SARSr-CoV) and all of the available public data for SARS-CoV-2. The tool also allows tracking of new viral mutations as the outbreak expands globally, which may help to accelerate the development of novel diagnostics, drugs and vaccines to stop the COVID-19 disease.

Availability and implementation

https://www.genomedetective.com/app/typingtool/cov.

Contact

koen@emweb.be or deoliveira@ukzn.ac.za.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +24253304,CR Cistrome: a ChIP-Seq database for chromatin regulators and histone modification linkages in human and mouse.,"Diversified histone modifications (HMs) are essential epigenetic features. They play important roles in fundamental biological processes including transcription, DNA repair and DNA replication. Chromatin regulators (CRs), which are indispensable in epigenetics, can mediate HMs to adjust chromatin structures and functions. With the development of ChIP-Seq technology, there is an opportunity to study CR and HM profiles at the whole-genome scale. However, no specific resource for the integration of CR ChIP-Seq data or CR-HM ChIP-Seq linkage pairs is currently available. Therefore, we constructed the CR Cistrome database, available online at http://compbio.tongji.edu.cn/cr and http://cistrome.org/cr/, to further elucidate CR functions and CR-HM linkages. Within this database, we collected all publicly available ChIP-Seq data on CRs in human and mouse and categorized the data into four cohorts: the reader, writer, eraser and remodeler cohorts, together with curated introductions and ChIP-Seq data analysis results. For the HM readers, writers and erasers, we provided further ChIP-Seq analysis data for the targeted HMs and schematized the relationships between them. We believe CR Cistrome is a valuable resource for the epigenetics community.",2013-11-18 +30865261,SCL: a lattice-based approach to infer 3D chromosome structures from single-cell Hi-C data.,"

Motivation

In contrast to population-based Hi-C data, single-cell Hi-C data are zero-inflated and do not indicate the frequency of proximate DNA segments. There are a limited number of computational tools that can model the 3D structures of chromosomes based on single-cell Hi-C data.

Results

We developed single-cell lattice (SCL), a computational method to reconstruct 3D structures of chromosomes based on single-cell Hi-C data. We designed a loss function and a 2 D Gaussian function specifically for the characteristics of single-cell Hi-C data. A chromosome is represented as beads-on-a-string and stored in a 3 D cubic lattice. Metropolis-Hastings simulation and simulated annealing are used to simulate the structure and minimize the loss function. We evaluated the SCL-inferred 3 D structures (at both 500 and 50 kb resolutions) using multiple criteria and compared them with the ones generated by another modeling software program. The results indicate that the 3 D structures generated by SCL closely fit single-cell Hi-C data. We also found similar patterns of trans-chromosomal contact beads, Lamin-B1 enriched topologically associating domains (TADs), and H3K4me3 enriched TADs by mapping data from previous studies onto the SCL-inferred 3 D structures.

Availability and implementation

The C++ source code of SCL is freely available at http://dna.cs.miami.edu/SCL/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +30395294,The antiSMASH database version 2: a comprehensive resource on secondary metabolite biosynthetic gene clusters.,"Natural products originating from microorganisms are frequently used in antimicrobial and anticancer drugs, pesticides, herbicides or fungicides. In the last years, the increasing availability of microbial genome data has made it possible to access the wealth of biosynthetic clusters responsible for the production of these compounds by genome mining. antiSMASH is one of the most popular tools in this field. The antiSMASH database provides pre-computed antiSMASH results for many publicly available microbial genomes and allows for advanced cross-genome searches. The current version 2 of the antiSMASH database contains annotations for 6200 full bacterial genomes and 18,576 bacterial draft genomes and is available at https://antismash-db.secondarymetabolites.org/.",2019-01-01 +30243763,A Resource for Inactivation of MicroRNAs Using Short Tandem Target Mimic Technology in Model and Crop Plants.,"microRNAs (miRNAs) are endogenous small non-coding RNAs that bind to mRNAs and target them for cleavage and/or translational repression, leading to gene silencing. We previously developed short tandem target mimic (STTM) technology to deactivate endogenous miRNAs in Arabidopsis. Here, we created hundreds of STTMs that target both conserved and species-specific miRNAs in Arabidopsis, tomato, rice, and maize, providing a resource for the functional interrogation of miRNAs. We not only revealed the functions of several miRNAs in plant development, but also demonstrated that tissue-specific inactivation of a few miRNAs in rice leads to an increase in grain size without adversely affecting overall plant growth and development. RNA-seq and small RNA-seq analyses of STTM156/157 and STTM165/166 transgenic plants revealed the roles of these miRNAs in plant hormone biosynthesis and activation, secondary metabolism, and ion-channel activity-associated electrophysiology, demonstrating that STTM technology is an effective approach for studying miRNA functions. To facilitate the study and application of STTM transgenic plants and to provide a useful platform for storing and sharing of information about miRNA-regulated gene networks, we have established an online Genome Browser (https://blossom.ffr.mtu.edu/designindex2.php) to display the transcriptomic and miRNAomic changes in STTM-induced miRNA knockdown plants.",2018-09-19 +26590256,STITCH 5: augmenting protein-chemical interaction networks with tissue and affinity data.,"Interactions between proteins and small molecules are an integral part of biological processes in living organisms. Information on these interactions is dispersed over many databases, texts and prediction methods, which makes it difficult to get a comprehensive overview of the available evidence. To address this, we have developed STITCH ('Search Tool for Interacting Chemicals') that integrates these disparate data sources for 430 000 chemicals into a single, easy-to-use resource. In addition to the increased scope of the database, we have implemented a new network view that gives the user the ability to view binding affinities of chemicals in the interaction network. This enables the user to get a quick overview of the potential effects of the chemical on its interaction partners. For each organism, STITCH provides a global network; however, not all proteins have the same pattern of spatial expression. Therefore, only a certain subset of interactions can occur simultaneously. In the new, fifth release of STITCH, we have implemented functionality to filter out the proteins and chemicals not associated with a given tissue. The STITCH database can be downloaded in full, accessed programmatically via an extensive API, or searched via a redesigned web interface at http://stitch.embl.de.",2015-11-20 +34283458,Veteran and Military Mental Health Issues,"As the United States faces two decades of continuous war, media and individuals with personal military connections have elevated public and professional concerns for the mental health of veterans and service members. The most publicized mental health challenges facing veterans service members are PTSD and depression. Some research has suggested that approximately 14% to 16% of U.S. service members deployed to Afghanistan and Iraq have PTSD or depression. Although these mental health concerns are highlighted, other issues like suicide, traumatic brain injury (TBI), substance abuse, and interpersonal violence can be equally harmful in this population. The effects of these issues can be wide-reaching and substantially impacts service members and their families. While combat and deployments are linked to increased risks for these mental health conditions, general military service can also lead to difficulties. There is no specified timeline for the presentation of these mental health concerns. Still, there are particularly stressful times for individuals and families, such as in close proximity to combat or when separating from active military service. Current U.S. Census reports estimate roughly 18 million veterans and 2.1 million active-duty and reserve service members (https://www.census.gov/newsroom/press-releases/2020/veterans-report.html). Since September 11, 2001, there have been 2.8 million active-duty American military personnel deployed to Iraq, Afghanistan, and beyond, leading to increasing numbers of combat veterans amongst the population. More than 6% of the U.S. population have served or are serving in the military. However, this statistic fails to capture the even greater number of family members affected by military service. Understanding military service and its relation to a patient’s physical and mental health can help providers improve their quality of care and potentially help save a patient’s life. Post-Traumatic Stress Disorder (PTSD) Post-traumatic stress disorder (PTSD) was first codified in the Diagnostic and Statistical Manual of Mental Disorders (DSM) 3 in 1980, driven in part by sociopolitical aftereffects of the Vietnam War. It has been alluded to in different forms throughout history, from “soldier’s heart” at the time of the Civil War, “shell shock” in the First World War, or “combat fatigue” around the Vietnam War. DSM criteria remained largely unchanged until the most recent update in 2013, although its classification continues to be debated. It is a complex and evolving biological, psychological, and social entity, making it challenging to study and diagnose. PTSD is often researched in war and disaster survivors but can affect anybody, including children. It is usually seen in survivors of violent events such as assault, disasters, terror attacks, and war, although it is also possible to experience PTSD from secondhand exposure, such as learning that a close friend or family member experienced a violent threat or accident. Many individuals exposed to trauma have transient numbness or heightened emotions, nightmares, anxiety, and hypervigilance but usually overcome symptoms within one month. In roughly 10 to 20% of cases, symptoms become persistent and debilitating. PTSD features intrusive thoughts, flashbacks, and nightmares regarding the past trauma, causing avoidance of reminders, hypervigilance, and sleep difficulties. Often, reliving the event can feel as threatening as inciting trauma. Symptoms can interfere with interpersonal and occupational function and manifest in psychological, emotional, physical, behavioral, and cognitive manners. Military personnel can be exposed to an array of potentially traumatizing experiences. Wartime deployments can result in witnessing severe injuries or violent death, sometimes occurring suddenly and not always on expected targets. Apart from the austere environment of deployment, active duty military members are at risk of experiencing non-military-related traumas such as interpersonal violence, physical or sexual abuse.  Symptoms related to these traumas can sometimes be exacerbated in the deployed environment. Depression After two decades of continuous war in Afghanistan, a growing population of veterans with combat and deployment experience is presenting for mental health care. Providers must take into account not only the physical wounds these veterans may have sustained but also the less visible ones such as PTSD, acute stress disorder, and depression. Although the condition does not garner the same attention as PTSD, depression remains one of the leading mental health conditions in the military. In fact, studies show that up to 9% of all appointments in the ambulatory military health network are related to depression. The military environment can act as a catalyst for the development and progression of depression. For example, separation from loved ones and support systems, stressors of combat, and seeing oneself and others in harm’s way are all elements that increase the risk of depression in active duty and veteran populations.  Military medical facilities saw an increase from a baseline of 11.4% of members diagnosed with depression to a rate of 15% after deployments to Iraq or Afghanistan. With such a high prevalence, providers must be responsible for identifying active duty and veteran patients who may be suffering from depression.  Major depression manifests through many symptoms, including depressed mood, loss of interest in activities, insomnia, weight loss or gain, psychomotor retardation, fatigue, decreased ability to concentrate, thoughts of worthlessness, and thoughts of suicide. These symptoms coalesce to significantly impact patients’ abilities to function fully. While the complement of symptoms is readily apparent on paper, a patient’s actual presentation can often be ambiguous. One out of every two depressed patients is not appropriately diagnosed by their general practitioner. Therefore, it is paramount to correctly screen for, identity, and follow through with appropriate treatments, especially in the active duty and veteran military population. Suicide Veteran suicide rates are at the highest level in recorded history, with annual deaths by suicide at over 6,000 veterans per year. Overall suicide rates within the United States have increased by 30% between 1999 and 2016. A study involving 27 states estimated 17.8% of these recorded suicides were by veterans. The U.S. Department of Veterans Affairs (VA) published data in 2016 that indicated veteran suicide rates were 1.5 times greater than non-veterans. Research has shown that veterans are at significantly increased risk of suicide during their first year outside of the military. In 2018, a Presidential Executive Order was signed to improve suicide prevention services for veterans during their transition to civilian life. Additionally, the Department of Defense (DoD) and VA have made suicide prevention a major priority because of observed increases in fatal and non-fatal suicide attempts throughout the wars in Iraq and Afghanistan. Within the U.S. Armed Forces, suicide rates doubled between 2000 and 2012, but since 2012 there have been no appreciable changes in the annual rate, with approximately 19.74 deaths per 100,000 service members. Substance Use Disorders Despite public attention over recent decades, SUDs, including alcohol use, remains a problem among veterans and military members. In these populations, alcohol use is common and is often used for stress relief and socializing. SUDs are associated with significant adverse medical, psychiatric, interpersonal, and occupational outcomes. One study on military personnel found that approximately 30% of completed suicides and around 20% of deaths due to high-risk behavior were attributable to alcohol or drug use. In the general U.S. population, alcohol is the fourth leading cause of preventable death, and 31% of driving-related fatalities involve alcohol intoxication. The DSM-5 defines SUD as a cluster of behaviors surrounding compulsive drug-seeking. This includes impaired control of, dysfunctional social functioning due to, and physiologic changes caused by drug use. Addiction is the most severe stage, characterized by loss of self-control leading to compulsive drug-seeking despite a desire to quit. Substances include legal drugs such as caffeine, nicotine, and alcohol; prescription medications such as opioids, sedative/hypnotics, and stimulants; and illicit drugs such as marijuana, cocaine, methamphetamines, heroin, hallucinogens, and inhalants.",2021-07-21 +25534750,dbPPT: a comprehensive database of protein phosphorylation in plants.,"As one of the most important protein post-translational modifications, the reversible phosphorylation is critical for plants in regulating a variety of biological processes such as cellular metabolism, signal transduction and responses to environmental stress. Numerous efforts especially large-scale phosphoproteome profiling studies have been contributed to dissect the phosphorylation signaling in various plants, while a large number of phosphorylation events were identified. To provide an integrated data resource for further investigations, here we present a comprehensive database of dbPPT (database of Phosphorylation site in PlanTs, at http://dbppt.biocuckoo.org), which contains experimentally identified phosphorylation sites in proteins from plants. The phosphorylation sites in dbPPT were manually curated from the literatures, whereas datasets in other public databases were also integrated. In total, there were 82,175 phosphorylation sites in 31,012 proteins from 20 plant organisms in dbPPT, presenting a larger quantity of phosphorylation sites and a higher coverage of plant species in comparison with other databases. The proportions of residue types including serine, threonine and tyrosine were 77.99, 17.81 and 4.20%, respectively. All the phosphoproteins and phosphorylation sites in the database were critically annotated. Since the phosphorylation signaling in plants attracted great attention recently, such a comprehensive resource of plant protein phosphorylation can be useful for the research community. Database URL: http://dbppt.biocuckoo.or",2014-12-22 +30971747,Genome sequence analysis of the fairy ring-forming fungus Lepista sordida and gene candidates for interaction with plants.,"Circular patterns called ""fairy rings"" in fields are a natural phenomenon that arises through the interaction between basidiomycete fungi and plants. Acceleration or inhibition of plant vegetative growth and the formation of mushroom fruiting bodies are both commonly observed when fairy rings form. The gene of an enzyme involved in the biosynthesis of these regulators was recently isolated in the fairy ring-forming fungus, Lepista sordida. To identify other genes involved in L. sordida fairy ring formation, we used previously generated sequence data to produce a more complete draft genome sequence for this species. Finally, we predicted the metabolic pathways of the plant growth regulators and 29 candidate enzyme-coding genes involved in fairy-ring formation based on gene annotations. Comparisons of protein coding genes among basidiomycete fungi revealed two nitric oxide synthase gene candidates that were uniquely encoded in genomes of fairy ring-forming fungi. These results provide a basis for the discovery of genes involved in fairy ring formation and for understanding the mechanisms involved in the interaction between fungi and plants. We also constructed a new web database F-RINGS ( http://bioinf.mind.meiji.ac.jp/f-rings/ ) to provide the comprehensive genomic information for L. sordida.",2019-04-10 +32553894,"Rapid detection of OXA-23-like, OXA-24-like, and OXA-58-like carbapenemases from Acinetobacter species by real-time PCR.","

Background

Carbapenemase-producing Acinetobacter species, especially A. baumannii, are frequently associated with treatment failures and hospital outbreaks; thus, rapid and reliable detection of specific resistance markers is paramount. The most common carbapenemases found in A. baumannii, namely OXA-23-like, OXA-24-like, and OXA-58-like, belong to the oxacillinase group (class D β-lactamases) which is notoriously difficult to identify phenotypically due to the lack of specific inhibitors.

Aim

To design and validate a multiplex real-time polymerase chain reaction (PCR) assay to detect and differentiate the above three oxacillinases.

Methods

All available variants of the above three oxacillinase subfamilies were downloaded (as of November 2019) from the Beta-Lactamase DataBase (http://bldb.eu/) aligned with Clustal Omega and oligonucleotides designed using Primer-BLAST. A multiplex real-time PCR assay that included an internal control to discount inhibition was optimized on the Rotor-Gene Q (Qiagen) using the Rotor-Gene Multiplex PCR Kit (Qiagen) and validated using a panel of 122 previously characterized strains carrying a wide range of β-lactamases, often in combination.

Findings

The in-silico approach enabled the design of oligonucleotides in conserved regions of the OXA-24-like and OXA-58-like alignments. Among the 42 described OXA-23-like variants, a single nucleotide polymorphism (SNP) was present in one of the oligonucleotide binding sites of OXA-27, OXA-166, OXA-811, OXA-812, and OXA-816. The assay was 100% sensitive and highly specific. Inhibition was not observed.

Conclusion

The assay is easy to perform with results available in about 70 min. It enables unequivocal detection and differentiation of OXA-23-like, OXA-24-like, and OXA-58-like carbapenemases even when more than one is simultaneously present.",2020-06-14 +31159731,Trait ontology analysis based on association mapping studies bridges the gap between crop genomics and Phenomics.,"

Background

Trait ontology (TO) analysis is a powerful system for functional annotation and enrichment analysis of genes. However, given the complexity of the molecular mechanisms underlying phenomes, only a few hundred gene-to-TO relationships in plants have been elucidated to date, limiting the pace of research in this ""big data"" era.

Results

Here, we curated all the available trait associated sites (TAS) information from 79 association mapping studies of maize (Zea mays L.) and rice (Oryza sativa L.) lines with diverse genetic backgrounds and built a large-scale TAS-derived TO system for functional annotation of genes in various crops. Our TO system contains information for up to 18,042 genes (6345 in maize at the 25 k level and 11,697 in rice at the 50 k level), including gene-to-TO relationships, which covers over one fifth of the annotated gene sets for maize and rice. A comparison of Gene Ontology (GO) vs. TO analysis demonstrated that the TAS-derived TO system is an efficient alternative tool for gene functional annotation and enrichment analysis. We therefore combined information from the TO, GO, metabolic pathway, and co-expression network databases and constructed the TAS system, which is publicly available at http://tas.hzau.edu.cn . TAS provides a user-friendly interface for functional annotation of genes, enrichment analysis, genome-wide extraction of trait-associated genes, and crosschecking of different functional annotation databases.

Conclusions

TAS bridges the gap between genomic and phenomic information in crops. This easy-to-use tool will be useful for geneticists, biologists, and breeders in the agricultural community, as it facilitates the dissection of molecular mechanisms conferring agronomic traits in an easy, genome-wide manner.",2019-06-03 +28615994,Characterisation of the circulating acellular proteome of healthy sheep using LC-MS/MS-based proteomics analysis of serum.,"

Background

Unlike humans, there is currently no publicly available reference mass spectrometry-based circulating acellular proteome data for sheep, limiting the analysis and interpretation of a range of physiological changes and disease states. The objective of this study was to develop a robust and comprehensive method to characterise the circulating acellular proteome in ovine serum.

Methods

Serum samples from healthy sheep were subjected to shotgun proteomic analysis using nano liquid chromatography nano electrospray ionisation tandem mass spectrometry (nanoLC-nanoESI-MS/MS) on a quadrupole time-of-flight instrument (TripleTOF® 5600+, SCIEX). Proteins were identified using ProteinPilot™ (SCIEX) and Mascot (Matrix Science) software based on a minimum of two unmodified highly scoring unique peptides per protein at a false discovery rate (FDR) of 1% software by searching a subset of the Universal Protein Resource Knowledgebase (UniProtKB) database (http://www.uniprot.org). PeptideShaker (CompOmics, VIB-UGent) searches were used to validate protein identifications from ProteinPilot™ and Mascot.

Results

ProteinPilot™ and Mascot identified 245 and 379 protein groups (IDs), respectively, and PeptideShaker validated 133 protein IDs from the entire dataset. Since Mascot software is considered the industry standard and identified the most proteins, these were analysed using the Protein ANalysis THrough Evolutionary Relationships (PANTHER) classification tool revealing the association of 349 genes with 127 protein pathway hits. These data are available via ProteomeXchange with identifier PXD004989.

Conclusions

These results demonstrated for the first time the feasibility of characterising the ovine circulating acellular proteome using nanoLC-nanoESI-MS/MS. This peptide spectral data contributes to a protein library that can be used to identify a wide range of proteins in ovine serum.",2016-01-01 +30689489,Short-Term Climate Variation Drives Baseline Innate Immune Function and Stress in a Tropical Bird: A Reactive Scope Perspective.,"Investment in immune function can be costly, and life-history theory predicts trade-offs between immune function and other physiological demands. Environmental heterogeneity may constrain or change the optimal strategy and thereby alter baseline immune function (possibly mediated by stress responses). We tested several hypotheses relating variation in climatic, ecological, and social environments to chronic stress and levels of baseline innate immunity in a wild, cooperatively breeding bird, the purple-crowned fairy-wren (Malurus coronatus coronatus). From samples collected biannually over 5 yr, we quantified three indexes of constitutive innate immune function (haptoglobin/PIT54, natural antibodies, complement activity) and one index of chronic stress (heterophil-lymphocyte ratio; n=513-647 ). Using an information-theoretic and multimodel inference statistical approach, we found that habitat quality and social group size did not affect any immune index, despite hypothesized links to resource abundance and parasite pressure. Rather, short-term variation in temperature and rainfall was related to immune function, while overall differences between seasons were small or absent, despite substantial seasonal variation in climate. Contrary to our expectation, we found no evidence that physiological stress mediated any effects of short-term climatic variables on immune indexes, and alternative mechanisms may be involved. Our results may be interpreted from the perspective of reactive scope models, whereby predictive homeostasis maintains standing immune function relative to long-term demands, while short-term environmental change, being less predictable, has a greater influence on baseline immune function.",2019-03-01 +25536338,Neuroinformatics of the Allen Mouse Brain Connectivity Atlas.,"The Allen Mouse Brain Connectivity Atlas is a mesoscale whole brain axonal projection atlas of the C57Bl/6J mouse brain. Anatomical trajectories throughout the brain were mapped into a common 3D space using a standardized platform to generate a comprehensive and quantitative database of inter-areal and cell-type-specific projections. This connectivity atlas has several desirable features, including brain-wide coverage, validated and versatile experimental techniques, a single standardized data format, a quantifiable and integrated neuroinformatics resource, and an open-access public online database (http://connectivity.brain-map.org/). Meaningful informatics data quantification and comparison is key to effective use and interpretation of connectome data. This relies on successful definition of a high fidelity atlas template and framework, mapping precision of raw data sets into the 3D reference framework, accurate signal detection and quantitative connection strength algorithms, and effective presentation in an integrated online application. Here we describe key informatics pipeline steps in the creation of the Allen Mouse Brain Connectivity Atlas and include basic application use cases.",2014-12-20 +32612817,The role of children in transmission of SARS-CoV-2: A rapid review.,"

Background

Understanding the role of children in the transmission of SARS-CoV-2 is urgently required given its policy implications in relation to the reopening of schools and intergenerational contacts.

Methods

We conducted a rapid review of studies that investigated the role of children in the transmission of SARS-CoV-2. We synthesized evidence for four categories: 1) studies reporting documented cases of SARS-CoV-2 transmission by infected children; 2) studies presenting indirect evidence on the potential of SARS-CoV-2 transmission by (both symptomatic and asymptomatic) children; 3) studies reporting cluster outbreaks of COVID-19 in schools; 4) studies estimating the proportions of children infected by SARS-CoV-2, and reported results narratively.

Results

A total of 16 unique studies were included for narrative synthesis. There is limited evidence detailing transmission of SARS-CoV-2 from infected children. We found two studies that reported a 3-month-old whose parents developed symptomatic COVID-19 seven days after caring for the infant and two children who may have contracted COVID-19 from the initial cases at a school in New South Wales. In addition, we identified six studies presenting indirect evidence on the potential for SARS-CoV-2 transmission by children, three of which found prolonged virus shedding in stools. There is little data on the transmission of SARS-CoV-2 in schools. We identified only two studies reporting outbreaks of COVID-19 in school settings and one case report of a child attending classes but not infecting any other pupils or staff. Lastly, we identified six studies estimating the proportion of children infected; data from population-based studies in Iceland, Italy, South Korea, Netherlands, California and a hospital-based study in the UK suggest children may be less likely to be infected.

Conclusions

Preliminary results from population-based and school-based studies suggest that children may be less frequently infected or infect others, however current evidence is limited. Prolonged faecal shedding observed in studies highlights the potentially increased risk of faeco-oral transmission in children. Further seroprevalence studies (powered adequately for the paediatric population) are urgently required to establish whether children are in fact less likely to be infected compared to adults.

Note

We plan to update this rapid review as new data becomes available. These updates are available at https://www.ed.ac.uk/usher/uncover/completed-uncover-reviews.",2020-06-01 +32855417,Single cell transcriptomics identifies a signaling network coordinating endoderm and mesoderm diversification during foregut organogenesis.,"Visceral organs, such as the lungs, stomach and liver, are derived from the fetal foregut through a series of inductive interactions between the definitive endoderm (DE) and the surrounding splanchnic mesoderm (SM). While DE patterning is fairly well studied, the paracrine signaling controlling SM regionalization and how this is coordinated with epithelial identity is obscure. Here, we use single cell transcriptomics to generate a high-resolution cell state map of the embryonic mouse foregut. This identifies a diversity of SM cell types that develop in close register with the organ-specific epithelium. We infer a spatiotemporal signaling network of endoderm-mesoderm interactions that orchestrate foregut organogenesis. We validate key predictions with mouse genetics, showing the importance of endoderm-derived signals in mesoderm patterning. Finally, leveraging these signaling interactions, we generate different SM subtypes from human pluripotent stem cells (hPSCs), which previously have been elusive. The single cell data can be explored at: https://research.cchmc.org/ZornLab-singlecell .",2020-08-27 +31393554,RICOPILI: Rapid Imputation for COnsortias PIpeLIne.,"

Summary

Genome-wide association study (GWAS) analyses, at sufficient sample sizes and power, have successfully revealed biological insights for several complex traits. RICOPILI, an open-sourced Perl-based pipeline was developed to address the challenges of rapidly processing large-scale multi-cohort GWAS studies including quality control (QC), imputation and downstream analyses. The pipeline is computationally efficient with portability to a wide range of high-performance computing environments. RICOPILI was created as the Psychiatric Genomics Consortium pipeline for GWAS and adopted by other users. The pipeline features (i) technical and genomic QC in case-control and trio cohorts, (ii) genome-wide phasing and imputation, (iv) association analysis, (v) meta-analysis, (vi) polygenic risk scoring and (vii) replication analysis. Notably, a major differentiator from other GWAS pipelines, RICOPILI leverages on automated parallelization and cluster job management approaches for rapid production of imputed genome-wide data. A comprehensive meta-analysis of simulated GWAS data has been incorporated demonstrating each step of the pipeline. This includes all the associated visualization plots, to allow ease of data interpretation and manuscript preparation. Simulated GWAS datasets are also packaged with the pipeline for user training tutorials and developer work.

Availability and implementation

RICOPILI has a flexible architecture to allow for ongoing development and incorporation of newer available algorithms and is adaptable to various HPC environments (QSUB, BSUB, SLURM and others). Specific links for genomic resources are either directly provided in this paper or via tutorials and external links. The central location hosting scripts and tutorials is found at this URL: https://sites.google.com/a/broadinstitute.org/RICOPILI/home.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +32368601,Liquid based-cytology Pap smear dataset for automated multi-class diagnosis of pre-cancerous and cervical cancer lesions.,"While a publicly available benchmark dataset provides a base for the development of new algorithms and comparison of results, hospital-based data collected from the real-world clinical setup is also very important in AI-based medical research for automated disease diagnosis, prediction or classifications as per standard protocol. Primary data must be constantly updated so that the developed algorithms achieve as much accuracy as possible in the regional context. This dataset would support research work related to image segmentation and final classification for a complete decision support system (https://doi.org/10.1016/j.tice.2020.101347) [1]. Liquid-based cytology (LBC) is one of the cervical screening tests. The repository consists of a total of 963 LBC images sub-divided into four sets representing the four classes: NILM, LSIL, HSIL, and SCC. It comprises pre-cancerous and cancerous lesions related to cervical cancer as per standards under The Bethesda System (TBS). The images were captured in 40x magnification using Leica ICC50 HD microscope collected with due consent from 460 patients visiting the O&G department of the public hospital with various gynaecological problems. The images were then viewed and categorized by experts of the pathology department.",2020-04-22 +32449979,Evaluation of DNA methylation status of toll-like receptors 2 and 4 promoters in Behcet's disease.,"

Background

Altered innate immune function plays an important role in the initiation of inflammatory response in Behcet's disease (BD). Toll-like receptors (TLRs) are the master regulators of the innate immune system. Because the role of TLRs remains unknown in the pathogenesis of BD, the present study aimed to evaluate the expression levels and methylation status of the TLR2 and TLR4 promoters in patients with BD.

Methods

In the present study, Iranian Azeri BD patients (n = 47) with an active (n = 22) and inactive (n = 25) period, and healthy controls (n = 61), were matched according to age, sex and ethnicity. TLR2 and TLR4 genes promoter CpG islands were predicted with the Eukaryotic Promoter Database (https://epd.vital-it.ch). Methylated DNA immunoprecipitation (MeDIP) was conducted.

Results

The results showed that mRNA of TLR4 was significantly increased in the peripheral blood mononuclear cells (PBMCs) of BD patients with an active phase compared to the control group. Differences in mRNA of TLR4 between the inactive BD and control groups were not significant. Differences in TLR2 mRNA levels in the PBMCs of the active and inactive phase BD and control groups were not significant. The methylation rate of TLR4 gene promoter was significantly lower in the active and inactive BD groups compared to the control group. The difference between the active and inactive BD groups was not significant. There was no significant difference in the methylation rates of the TLR2 gene between studied groups.

Conclusions

Our preliminary findings suggest that the hypomethylation of TLR4 gene may be involved in the pathogenesis of BD via increasing TLR4 expression.",2020-06-12 +32447824,"Multicopper oxidases: modular structure, sequence space, and evolutionary relationships.","Multicopper oxidases (MCOs) use copper ions as cofactors to oxidize a variety of substrates while reducing oxygen to water. MCOs have been identified in various taxa, with notable occurrences in fungi. The role of these fungal MCOs in lignin degradation sparked an interest due to their potential for application in biofuel production and various other industries. MCOs consist of different protein domains, which led to their classification into two-, three-, and six-domain MCOs. The previously established Laccase and Multicopper Oxidase Engineering Database (https://lcced.biocatnet.de) was updated and now includes 51 058 sequences and 229 structures of MCOs. Sequences and structures of all MCOs were systematically compared. All MCOs consist of cupredoxin-like domains. Two-domain MCOs are formed by the N- and C-terminal domain (domain N and C), while three-domain MCOs have an additional domain (M) in between, homologous to domain C. The six-domain MCOs consist of alternating domains N and C, each three times. Two standard numbering schemes were developed for the copper-binding domains N and C, which facilitated the identification of conserved positions and a comparison to previously reported results from mutagenesis studies. Two sequence motifs for the copper binding sites were identified per domain. Their modularity, depending on the placement of the T1-copper binding site, was demonstrated. Protein sequence networks showed relationships between two- and three-domain MCOs, allowing for family-specific annotation and inference of evolutionary relationships.",2020-06-12 +30196115,HeteroMeth: A Database of Cell-to-cell Heterogeneity in DNA Methylation.,"DNA methylation is an important epigenetic mark that plays a vital role in gene expression and cell differentiation. The average DNA methylation level among a group of cells has been extensively documented. However, the cell-to-cell heterogeneity in DNA methylation, which reflects the differentiation of epigenetic status among cells, remains less investigated. Here we established a gold standard of the cell-to-cell heterogeneity in DNA methylation based on single-cell bisulfite sequencing (BS-seq) data. With that, we optimized a computational pipeline for estimating the heterogeneity in DNA methylation from bulk BS-seq data. We further built HeteroMeth, a database for searching, browsing, visualizing, and downloading the data for heterogeneity in DNA methylation for a total of 141 samples in humans, mice, Arabidopsis, and rice. Three genes are used as examples to illustrate the power of HeteroMeth in the identification of unique features in DNA methylation. The optimization of the computational strategy and the construction of the database in this study complement the recent experimental attempts on single-cell DNA methylomes and will facilitate the understanding of epigenetic mechanisms underlying cell differentiation and embryonic development. HeteroMeth is publicly available at http://qianlab.genetics.ac.cn/HeteroMeth.",2018-08-01 +32544603,The Concerns About Pain (CAP) Scale: A Patient-Reported Outcome Measure of Pain Catastrophizing.,"Pain catastrophizing has been recognized as an important and consistent psychosocial predictor of nearly every key pain-related outcome. The purpose of this study was to develop a new measure of pain catastrophizing using modern psychometric methodology. People with chronic pain (N = 795) responded to thirty items. Data were analyzed using item response theory, including assessment of differential item functioning and reliability. Sensitivity to change and validity were examined using data collected from patients undergoing spinal fusion surgery (n = 184) and participating in an ongoing longitudinal aging with a disability survey study (n = 1,388). The final 24-item bank had no items with significant local dependence, misfit, or differential item functioning. Results provided strong evidence of reliability and validity. Six- and 2-item short forms were developed for use when computer adaptive testing is not feasible or desirable. The item bank was named the University of Washington Concerns About Pain scale because the term ""catastrophizing"" was considered stigmatizing by people with chronic pain. Guidance for score interpretation was developed with extensive feedback from individuals with chronic pain. The Concerns About Pain item bank, short forms, and user manuals are free and publicly available to all users and can be accessed online at https://uwcorr.washington.edu/measures/. PERSPECTIVE: This article presents the development of the University of Washington Concerns About Pain scale, the first item response theory-based item bank of pain catastrophizing. The measure is intended for clinicians interested in improving outcomes of patients with chronic pain and for researchers who study impact of and treatment interventions aimed at reducing pain catastrophizing.",2020-06-13 +25792605,TeloPIN: a database of telomeric proteins interaction network in mammalian cells. ,"Interaction network surrounding telomeres has been intensively studied during the past two decades. However, no specific resource by integrating telomere interaction information data is currently available. To facilitate the understanding of the molecular interaction network by which telomeres are associated with biological process and diseases, we have developed TeloPIN (Telomeric Proteins Interaction Network) database (http://songyanglab.sysu.edu.cn/telopin/), a novel database that points to provide comprehensive information on protein-protein, protein-DNA and protein-RNA interaction of telomeres. TeloPIN database contains four types of interaction data, including (i) protein--protein interaction (PPI) data, (ii) telomeric proteins ChIP-seq data, (iii) telomere-associated proteins data and (iv) telomeric repeat-containing RNAs (TERRA)-interacting proteins data. By analyzing these four types of interaction data, we found that 358 and 199 proteins have more than one type of interaction information in human and mouse cells, respectively. We also developed table browser and TeloChIP genome browser to help researchers with better integrated visualization of interaction data from different studies. The current release of TeloPIN database includes 1111 PPI, eight telomeric protein ChIP-seq data sets, 1391 telomere-associated proteins and 183 TERRA-interacting proteins from 92 independent studies in mammalian cells. The interaction information provided by TeloPIN database will greatly expand our knowledge of telomeric proteins interaction network.",2015-03-18 +32817598,"National, Regional, State, and Selected Local Area Vaccination Coverage Among Adolescents Aged 13-17 Years - United States, 2019.","Three vaccines are recommended by the Advisory Committee on Immunization Practices (ACIP) for routine vaccination of adolescents aged 11-12 years to protect against 1) pertussis; 2) meningococcal disease caused by types A, C, W, and Y; and 3) human papillomavirus (HPV)-associated cancers (1). At age 16 years, a booster dose of quadrivalent meningococcal conjugate vaccine (MenACWY) is recommended. Persons aged 16-23 years can receive serogroup B meningococcal vaccine (MenB), if determined to be appropriate through shared clinical decision-making. CDC analyzed data from the 2019 National Immunization Survey-Teen (NIS-Teen) to estimate vaccination coverage among adolescents aged 13-17 years in the United States.* Coverage with ≥1 dose of HPV vaccine increased from 68.1% in 2018 to 71.5% in 2019, and the percentage of adolescents who were up to date with the HPV vaccination series (HPV UTD) increased from 51.1% in 2018 to 54.2% in 2019. Both HPV vaccination coverage measures improved among females and males. An increase in adolescent coverage with ≥1 dose of MenACWY (from 86.6% in 2018 to 88.9% in 2019) also was observed. Among adolescents aged 17 years, 53.7% received the booster dose of MenACWY in 2019, not statistically different from 50.8% in 2018; 21.8% received ≥1 dose of MenB, a 4.6 percentage point increase from 17.2% in 2018. Among adolescents living at or above the poverty level,§ those living outside a metropolitan statistical area (MSA) had lower coverage with ≥1 dose of MenACWY and with ≥1 HPV vaccine dose, and a lower percentage were HPV UTD, compared with those living in MSA principal cities. In early 2020, the coronavirus disease 2019 (COVID-19) pandemic changed the way health care providers operate and provide routine and essential services. An examination of Vaccines for Children (VFC) provider ordering data showed that vaccine orders for HPV vaccine; tetanus toxoid, reduced diphtheria toxoid, and acellular pertussis vaccine (Tdap); and MenACWY decreased in mid-March when COVID-19 was declared a national emergency (Supplementary Figure 1, https://stacks.cdc.gov/view/cdc/91795). Ensuring that routine immunization services for adolescents are maintained or reinitiated is essential to continuing progress in protecting persons and communities from vaccine-preventable diseases and outbreaks.",2020-08-21 +27453469,Human SRMAtlas: A Resource of Targeted Assays to Quantify the Complete Human Proteome.,"The ability to reliably and reproducibly measure any protein of the human proteome in any tissue or cell type would be transformative for understanding systems-level properties as well as specific pathways in physiology and disease. Here, we describe the generation and verification of a compendium of highly specific assays that enable quantification of 99.7% of the 20,277 annotated human proteins by the widely accessible, sensitive, and robust targeted mass spectrometric method selected reaction monitoring, SRM. This human SRMAtlas provides definitive coordinates that conclusively identify the respective peptide in biological samples. We report data on 166,174 proteotypic peptides providing multiple, independent assays to quantify any human protein and numerous spliced variants, non-synonymous mutations, and post-translational modifications. The data are freely accessible as a resource at http://www.srmatlas.org/, and we demonstrate its utility by examining the network response to inhibition of cholesterol synthesis in liver cells and to docetaxel in prostate cancer lines.",2016-07-21 +30137247,Impact of similarity metrics on single-cell RNA-seq data clustering.,"Advances in high-throughput sequencing on single-cell gene expressions [single-cell RNA sequencing (scRNA-seq)] have enabled transcriptome profiling on individual cells from complex samples. A common goal in scRNA-seq data analysis is to discover and characterise cell types, typically through clustering methods. The quality of the clustering therefore plays a critical role in biological discovery. While numerous clustering algorithms have been proposed for scRNA-seq data, fundamentally they all rely on a similarity metric for categorising individual cells. Although several studies have compared the performance of various clustering algorithms for scRNA-seq data, currently there is no benchmark of different similarity metrics and their influence on scRNA-seq data clustering. Here, we compared a panel of similarity metrics on clustering a collection of annotated scRNA-seq datasets. Within each dataset, a stratified subsampling procedure was applied and an array of evaluation measures was employed to assess the similarity metrics. This produced a highly reliable and reproducible consensus on their performance assessment. Overall, we found that correlation-based metrics (e.g. Pearson's correlation) outperformed distance-based metrics (e.g. Euclidean distance). To test if the use of correlation-based metrics can benefit the recently published clustering techniques for scRNA-seq data, we modified a state-of-the-art kernel-based clustering algorithm (SIMLR) using Pearson's correlation as a similarity measure and found significant performance improvement over Euclidean distance on scRNA-seq data clustering. These findings demonstrate the importance of similarity metrics in clustering scRNA-seq data and highlight Pearson's correlation as a favourable choice. Further comparison on different scRNA-seq library preparation protocols suggests that they may also affect clustering performance. Finally, the benchmarking framework is available at http://www.maths.usyd.edu.au/u/SMS/bioinformatics/software.html.",2019-11-01 +25667314,FlowerNet: a gene expression correlation network for anther and pollen development.,"Floral formation, in particular anther and pollen development, is a complex biological process with critical importance for seed set and for targeted plant breeding. Many key transcription factors regulating this process have been identified; however, their direct role remains largely unknown. Using publicly available gene expression data from Arabidopsis (Arabidopsis thaliana), focusing on those studies that analyze stamen-, pollen-, or flower-specific expression, we generated a network model of the global transcriptional interactions (FlowerNet). FlowerNet highlights clusters of genes that are transcriptionally coregulated and therefore likely to have interacting roles. Focusing on four clusters, and using a number of data sets not included in the generation of FlowerNet, we show that there is a close correlation in how the genes are expressed across a variety of conditions, including male-sterile mutants. This highlights the important role that FlowerNet can play in identifying new players in anther and pollen development. However, due to the use of general floral expression data in FlowerNet, it also has broad application in the characterization of genes associated with all aspects of floral development and reproduction. To aid the dissection of genes of interest, we have made FlowerNet available as a community resource (http://www.cpib.ac.uk/anther). For this resource, we also have generated plots showing anther/flower expression from a variety of experiments: These are normalized together where possible to allow further dissection of the resource.",2015-02-09 +32516383,ccNetViz: a WebGL-based JavaScript library for visualization of large networks.,"

Summary

Visualizing a network provides a concise and practical understanding of the information it represents. Open-source web-based libraries help accelerate the creation of biologically based networks and their use. ccNetViz is an open-source, high speed and lightweight JavaScript library for visualization of large and complex networks. It implements customization and analytical features for easy network interpretation. These features include edge and node animations, which illustrate the flow of information through a network as well as node statistics. Properties can be defined a priori or dynamically imported from models and simulations. ccNetViz is thus a network visualization library particularly suited for systems biology.

Availability and implementation

The ccNetViz library, demos and documentation are freely available at http://helikarlab.github.io/ccNetViz/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +32426428,Mitochondrial lipid profiling data of a traumatic optic neuropathy model.,"Traumatic optic neuropathy (TON) is a degenerative process that occurs in a subset of patients following blunt force trauma to the head. This condition is characterized by retinal ganglion cell (RGC) death and axon degeneration within the optic nerve [1]. At the cellular level, mitochondrial changes are associated with many optic neuropathies [2, 3]. Here, we provide a dataset demonstrating changes in the optic nerve mitochondrial lipid profile of a sonication-induced traumatic optic neuropathy (SI-TON) mouse model at 1, 7, and 14 days after injury. 32 C57BL/6J mice were separated into 4 groups (control, 1, 7, and 14 days) of 8, with 4 males and 4 females in each. Mice were exposed to sonication-induced trauma as described previously (by Tao et al) and optic nerves were harvested at 1, 7, or 14 days following injury [4]. Mitochondria were isolated from homogenized optic nerves and lipids were extracted. Extracted mitochondrial lipids were analysed with a Q-Exactive Orbitrap Liquid Chromatography-Mass Spectrometer (LC MS-MS). Further analysis of raw data was conducted with LipidSearch 4.1.3 and Metaboanalyst 4.0. This data is publicly available at the Metabolomics Workbench, http://www.metabolomicsworkbench.org (Project ID: PR000905).",2020-04-30 +28338945,Genetic variation in traits for nitrogen use efficiency in wheat.,"Crop nutrient and especially nitrogen use efficiency (NUE) is both an economically and an environmentally highly desirable trait. It has been estimated that only a third of nitrogen inputs to cereal crop worldwide are recovered in grain for consumption, resulting in a huge waste of resource with major negative impacts on the environment. Most measures of NUE in wheat and other cereals are based on field assessments of crop yields at given N inputs, performance responses to added N fertilizer, or by quantifying N fertilizer recovery rates. However, NUE is a complex trait comprising two key major components, N uptake and N utilization efficiency, both also complex traits in themselves, each involving many physiological processes and biochemical pathways. A deeper understanding of the processes involved in NUE has been a target of the UK Wheat Genetic Improvement Network project (http://www.wgin.org.uk/). This has enabled the breakdown of characteristics contributing to NUE and an assessment of the variation present in those characteristics, predominantly in modern cultivars; a total of 13 years of data have been obtained to date. Significant but limited variation suggests a requirement for broader germplasm screening such as older varieties, landraces, and wild relatives.",2017-05-01 +33765181,The hepatokine fetuin-A disrupts functional maturation of pancreatic beta cells.,"

Aims/hypothesis

Neonatal beta cells carry out a programme of postnatal functional maturation to achieve full glucose responsiveness. A partial loss of the mature phenotype of adult beta cells may contribute to a reduction of functional beta cell mass and accelerate the onset of type 2 diabetes. We previously found that fetuin-A, a hepatokine increasingly secreted by the fatty liver and a determinant of type 2 diabetes, inhibits glucose-stimulated insulin secretion (GSIS) of human islets. Since fetuin-A is a ubiquitous fetal glycoprotein that declines peripartum, we examined here whether fetuin-A interferes with the functional maturity of beta cells.

Methods

The effects of fetuin-A were assessed during in vitro maturation of porcine neonatal islet cell clusters (NICCs) and in adult human islets. Expression alterations were examined via microarray, RNA sequencing and reverse transcription quantitative real-time PCR (qRT-PCR), proteins were analysed by western blotting and immunostaining, and insulin secretion was quantified in static incubations.

Results

NICC maturation was accompanied by the gain of glucose-responsive insulin secretion (twofold stimulation), backed up by mRNA upregulation of genes governing beta cell identity and function, such as NEUROD1, UCN3, ABCC8 and CASR (Log2 fold change [Log2FC] > 1.6). An active TGFβ receptor (TGFBR)-SMAD2/3 pathway facilitates NICC maturation, since the TGFBR inhibitor SB431542 counteracted the upregulation of aforementioned genes and de-repressed ALDOB, a gene disallowed in mature beta cells. In fetuin-A-treated NICCs, upregulation of beta cell markers and the onset of glucose responsiveness were suppressed. Concomitantly, SMAD2/3 phosphorylation was inhibited. Transcriptome analysis confirmed inhibitory effects of fetuin-A and SB431542 on TGFβ-1- and SMAD2/3-regulated transcription. However, contrary to SB431542 and regardless of cMYC upregulation, fetuin-A inhibited beta cell proliferation (0.27 ± 0.08% vs 1.0  ± 0.1% Ki67-positive cells in control NICCs). This effect was sustained by reduced expression (Log2FC ≤ -2.4) of FOXM1, CENPA, CDK1 or TOP2A. In agreement, the number of insulin-positive cells was lower in fetuin-A-treated NICCs than in control NICCs (14.4 ± 1.2% and 22.3 ± 1.1%, respectively). In adult human islets fetuin-A abolished glucose responsiveness, i.e. 1.7- and 1.1-fold change over 2.8 mmol/l glucose in control- and fetuin-A-cultured islets, respectively. In addition, fetuin-A reduced SMAD2/3 phosphorylation and suppressed expression of proliferative genes. Of note, in non-diabetic humans, plasma fetuin-A was negatively correlated (p = 0.013) with islet beta cell area.

Conclusions/interpretation

Our results suggest that the perinatal decline of fetuin-A relieves TGFBR signalling in islets, a process that facilitates functional maturation of neonatal beta cells. Functional maturity remains revocable in later life, and the occurrence of a metabolically unhealthy milieu, such as liver steatosis and elevated plasma fetuin-A, can impair both function and adaptive proliferation of beta cells.

Data availability

The RNAseq datasets and computer code produced in this study are available in the Gene Expression Omnibus (GEO): GSE144950; https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE144950.",2021-03-25 +31975601,EPIFANY: A Method for Efficient High-Confidence Protein Inference.,"Accurate protein inference in the presence of shared peptides is still one of the key problems in bottom-up proteomics. Most protein inference tools employing simple heuristic inference strategies are efficient but exhibit reduced accuracy. More advanced probabilistic methods often exhibit better inference quality but tend to be too slow for large data sets. Here, we present a novel protein inference method, EPIFANY, combining a loopy belief propagation algorithm with convolution trees for efficient processing of Bayesian networks. We demonstrate that EPIFANY combines the reliable protein inference of Bayesian methods with significantly shorter runtimes. On the 2016 iPRG protein inference benchmark data, EPIFANY is the only tested method that finds all true-positive proteins at a 5% protein false discovery rate (FDR) without strict prefiltering on the peptide-spectrum match (PSM) level, yielding an increase in identification performance (+10% in the number of true positives and +14% in partial AUC) compared to previous approaches. Even very large data sets with hundreds of thousands of spectra (which are intractable with other Bayesian and some non-Bayesian tools) can be processed with EPIFANY within minutes. The increased inference quality including shared peptides results in better protein inference results and thus increased robustness of the biological hypotheses generated. EPIFANY is available as open-source software for all major platforms at https://OpenMS.de/epifany.",2020-02-13 +29947803,Quokka: a comprehensive tool for rapid and accurate prediction of kinase family-specific phosphorylation sites in the human proteome.,"

Motivation

Kinase-regulated phosphorylation is a ubiquitous type of post-translational modification (PTM) in both eukaryotic and prokaryotic cells. Phosphorylation plays fundamental roles in many signalling pathways and biological processes, such as protein degradation and protein-protein interactions. Experimental studies have revealed that signalling defects caused by aberrant phosphorylation are highly associated with a variety of human diseases, especially cancers. In light of this, a number of computational methods aiming to accurately predict protein kinase family-specific or kinase-specific phosphorylation sites have been established, thereby facilitating phosphoproteomic data analysis.

Results

In this work, we present Quokka, a novel bioinformatics tool that allows users to rapidly and accurately identify human kinase family-regulated phosphorylation sites. Quokka was developed by using a variety of sequence scoring functions combined with an optimized logistic regression algorithm. We evaluated Quokka based on well-prepared up-to-date benchmark and independent test datasets, curated from the Phospho.ELM and UniProt databases, respectively. The independent test demonstrates that Quokka improves the prediction performance compared with state-of-the-art computational tools for phosphorylation prediction. In summary, our tool provides users with high-quality predicted human phosphorylation sites for hypothesis generation and biological validation.

Availability and implementation

The Quokka webserver and datasets are freely available at http://quokka.erc.monash.edu/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +33139507,Progesterone Receptor Is a Haploinsufficient Tumor-Suppressor Gene in Cervical Cancer.,"Tumor-suppressor genes (TSG) are often deleted or transcriptionally suppressed in cancer. PGR codes for progesterone receptor (PR), a transcription factor whose function depends on its ligand. Although PR expression is often undetectable in cervical cancer, its relevance to the endocrine-related etiology of this prevalent gynecological disease remains unclear. In this study, we show that the deletion of one Pgr allele in cervical epithelium promoted spontaneous cervical cancer in human papilloma viral oncogene-expressing transgenic mice as efficiently as the ablation of both Pgr alleles. We also show that tumors arising in the transgenic mice with one or both Pgr alleles did not express PR or expressed at the reduced levels compared with the normal epithelium. PR status correlated with estrogen receptor α (ERα) status in the mouse model and the Cancer Genome Atlas (TCGA) dataset. TCGA data analyses revealed that PGR expression significantly decreased in cervical cancer and that the biallelic deletion of PGR was rare. Furthermore, low PGR expression was associated with poor prognosis in young patients with cervical cancer. These discoveries point to PGR as a haploinsufficient TSG in the uterine cervix. They also raise the possibility that the restoration of PGR expression may improve the survival rate. IMPLICATIONS: The decreased expression of PR may increase the risk of cervical cancer in human papillomavirus-infected women. VISUAL OVERVIEW: http://mcr.aacrjournals.org/content/molcanres/19/1/42/F1.large.jpg.",2020-11-02 +32043185,Metabolic alterations in immune cells associate with progression to type 1 diabetes.,"

Aims/hypothesis

Previous metabolomics studies suggest that type 1 diabetes is preceded by specific metabolic disturbances. The aim of this study was to investigate whether distinct metabolic patterns occur in peripheral blood mononuclear cells (PBMCs) of children who later develop pancreatic beta cell autoimmunity or overt type 1 diabetes.

Methods

In a longitudinal cohort setting, PBMC metabolomic analysis was applied in children who (1) progressed to type 1 diabetes (PT1D, n = 34), (2) seroconverted to ≥1 islet autoantibody without progressing to type 1 diabetes (P1Ab, n = 27) or (3) remained autoantibody negative during follow-up (CTRL, n = 10).

Results

During the first year of life, levels of most lipids and polar metabolites were lower in the PT1D and P1Ab groups compared with the CTRL group. Pathway over-representation analysis suggested alanine, aspartate, glutamate, glycerophospholipid and sphingolipid metabolism were over-represented in PT1D. Genome-scale metabolic models of PBMCs during type 1 diabetes progression were developed by using publicly available transcriptomics data and constrained with metabolomics data from our study. Metabolic modelling confirmed altered ceramide pathways, known to play an important role in immune regulation, as specifically associated with type 1 diabetes progression.

Conclusions/interpretation

Our data suggest that systemic dysregulation of lipid metabolism, as observed in plasma, may impact the metabolism and function of immune cells during progression to overt type 1 diabetes.

Data availability

The GEMs for PBMCs have been submitted to BioModels (www.ebi.ac.uk/biomodels/), under accession number MODEL1905270001. The metabolomics datasets and the clinical metadata generated in this study were submitted to MetaboLights (https://www.ebi.ac.uk/metabolights/), under accession number MTBLS1015.",2020-02-11 +32410261,MicroRNA-23 suppresses osteogenic differentiation of human bone marrow mesenchymal stem cells by targeting the MEF2C-mediated MAPK signaling pathway.,"

Background

The present study aimed to determine the role and mechanism of miR-23 with respect to regulating the osteogenic differentiation of human bone marrow mesenchymal stem cells (hBMSCs).

Materials

The expression of miR-23 and MEF2C was measured in osteoporosis (OP) patients and healthy controls by a quantitative reverse transcriptase-polymerase chain reaction (qRT-PCR). The correlation between miR-23 and MEF2C was determined by the Pearson correlation coefficient. Moreover, bioinformatic analysis was performed using public databases. Target gene function and potential pathways were further examined. Then, we used a miR-23 mimic or inhibitor to further explore the potential mechanism of miR-23.

Results

miR-23 is found to be up-regulated and MEF2C is down-regulated in OP patients compared to healthy controls. miR-23 had a negative correlation with MEF2C (r = -0.937, p = 0.001). Bioinformatic analysis revealed that a total of 664 overlapping target genes were found in the TargetScan (http://www.targetscan.org), miRDB (http://mirdb.org) and miRanda (http://www.microrna.org/microrna/home.do) databases. Moreover, Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway analysis indicated that miR-23 may regulate the mitogan-activated protein kinase (MAPK) signaling pathway. miR-23 is down-regulated and MEF2C is significantly up-regulated in the osteogenic differentiation of hBMSCs. MEF2C was significantly up-regulated in the osteogenic differentiation of hBMSCs. Overexpression of miR-23 significantly down-regulated alkaline phosphatase (ALP) activity and calcium deposition, whereas the miR-23 inhibitor had the opposite effects. Moreover, overexpression of miR-23 significantly decreased osteoblast-related markers (Runx2, Osx, ALP and OCN). Further experiments confirmed that MEF2C is a direct target of miR-23. Moreover, the miR-23 mimic enhanced the expression of p-p38 but had no effect on p-JNK.

Conclusions

miR-23 decreases the osteogenic differentiation of hBMSCs through the MEF2C/MAPK signaling pathway.",2020-06-09 +29737975,Computational screening of high-performance optoelectronic materials using OptB88vdW and TB-mBJ formalisms.,"We perform high-throughput density functional theory (DFT) calculations for optoelectronic properties (electronic bandgap and frequency dependent dielectric function) using the OptB88vdW functional (OPT) and the Tran-Blaha modified Becke Johnson potential (MBJ). This data is distributed publicly through JARVIS-DFT database. We used this data to evaluate the differences between these two formalisms and quantify their accuracy, comparing to experimental data whenever applicable. At present, we have 17,805 OPT and 7,358 MBJ bandgaps and dielectric functions. MBJ is found to predict better bandgaps and dielectric functions than OPT, so it can be used to improve the well-known bandgap problem of DFT in a relatively inexpensive way. The peak positions in dielectric functions obtained with OPT and MBJ are in comparable agreement with experiments. The data is available on our websites http://www.ctcms.nist.gov/~knc6/JVASP.html and https://jarvis.nist.gov.",2018-05-08 +33377431,Countertransference types and their relation to rupture and repair in the alliance.,"Background Ruptures in the alliance are co-constructed by clients and therapists, reflecting an interaction between their respective personality configurations [Safran, J. D., & Muran, J. C. (2000). Negotiating the therapeutic alliance: A relational treatment guide. Guilford Press]. In order to work effectively with ruptures, therapists should be aware of their own feeling states, acknowledging the subjectivity of their perceptions [Safran, J. D. (2002). Brief relational psychoanalytic treatment. Psychoanalytic Dialogues, 12(2), 171-195. https://doi.org/10.1080/10481881209348661]. Lack of such awareness may be a product of countertransference (CT), which has been shown to be inversely related to outcome. However, when effectively managed, CT contributes to positive outcome [Hayes, J. A., Gelso, C. J., Goldberg, S., & Kivlighan, D. M. (2018). Countertransference management and effective psychotherapy: Meta-analytic findings. Psychotherapy, 55(4), 496-507. https://doi.org/10.1037/pst0000189]. Objectives: The present study examined the associations between types of CT and therapists' reports of ruptures and resolutions. Method: Data were collected from 27 therapists, who treated 67 clients in yearlong psychodynamic psychotherapy. CT patterns were assessed based on therapists' Core Conflictual Relationship Themes with their parents, which were repeated in narratives about their clients [Tishby, O., & Wiseman, H. (2014). Types of countertransference dynamics and their impact on the client-therapist relationship. Psychotherapy Research, 24(3), 360-375. https://doi.org/10.1080/10503307.2014.893068]. Results: Negative CT patterns were associated with more ruptures and less resolution. Positive patterns predicted resolution when the therapists repeated positive patterns with parents, but predicted ruptures when they tried to ""repair"" negative patterns with the parents. These results point to the importance of therapists' awareness of their CT in order to deal effectively with ruptures and facilitate resolution.",2020-12-30 +34368423,Named Data Networking for Content Delivery Network Workflows. ,"In this work we investigate Named Data Networking's (NDN's) architectural properties and features, such as content caching and intelligent packet forwarding, in the context of Content Delivery Network (CDN) workflows. More specifically, we evaluate NDN's properties for PoP (Point of Presence) to PoP and PoP to device connectivity. We use the Apache Traffic Server (ATS) platform to create a CDN-like caching hierarchy in order to compare NDN with HTTP-based content delivery. Overall, our work demonstrates that several properties inherent to NDN can benefit content providers and users alike through in-network caching of content, fast retransmission, and stateful hop-by-hop packet forwarding. Our experimental results demonstrate that HTTP delivers content faster under stable conditions due to a mature software stack. However, NDN performs better in the presence of packet loss, even for a loss rate as low as 0.1%, due to packet-level caching in the network and fast retransmissions from close upstreams. We further show that the Time To First Byte (TTFB) in NDN is consistently lower than HTTP (~ 100ms in HTTP vs. ~ 50ms in NDN), a vital requirement for CDNs. Unlike HTTP, NDN also supports transparent failover to another upstream when a failure occurs in the network. Finally, we present implementation-agnostic (implementation choices can be Software Defined Networking, Information Centric Networking, or something else) network properties that can benefit CDN workflows.",2020-11-01 +30380085,dbAMP: an integrated resource for exploring antimicrobial peptides with functional activities and physicochemical properties on transcriptome and proteome data.,"Antimicrobial peptides (AMPs), naturally encoded from genes and generally contained 10-100 amino acids, are crucial components of the innate immune system and can protect the host from various pathogenic bacteria, as well as viruses. In recent years, the widespread use of antibiotics has inspired the rapid growth of antibiotic-resistant microorganisms that usually induce critical infection and pathogenesis. An increasing interest therefore was motivated to explore natural AMPs that enable the development of new antibiotics. With the potential of AMPs being as new drugs for multidrug-resistant pathogens, we were thus motivated to develop a database (dbAMP, http://csb.cse.yzu.edu.tw/dbAMP/) by accumulating comprehensive AMPs from public domain and manually curating literature. Currently in dbAMP there are 12 389 unique entries, including 4271 experimentally verified AMPs and 8118 putative AMPs along with their functional activities, supported by 1924 research articles. The advent of high-throughput biotechnologies, such as mass spectrometry and next-generation sequencing, has led us to further expand dbAMP as a database-assisted platform for providing comprehensively functional and physicochemical analyses for AMPs based on the large-scale transcriptome and proteome data. Significant improvements available in dbAMP include the information of AMP-protein interactions, antimicrobial potency analysis for 'cryptic' region detection, annotations of AMP target species, as well as AMP detection on transcriptome and proteome datasets. Additionally, a Docker container has been developed as a downloadable package for discovering known and novel AMPs on high-throughput omics data. The user-friendly visualization interfaces have been created to facilitate peptide searching, browsing, and sequence alignment against dbAMP entries. All the facilities integrated into dbAMP can promote the functional analyses of AMPs and the discovery of new antimicrobial drugs.",2019-01-01 +33065580,Exploring Cancer Treatment Experiences for Patients With Preexisting Mobility Disability.,"

Objective

We explored the process of cancer care for patients with preexisting mobility disability, focusing on treatment decisions and experiences.

Design

We recruited 20 participants with preexisting mobility disability, requiring use of an assistive device or assistance with activities of daily living, subsequently diagnosed with cancer (excluding skin cancers). We conducted open-ended individual interviews, which reached data saturation and were transcribed verbatim for conventional content analysis.

Results

Concerns coalesced around 4 themes: disability-related healthcare experiences affect cancer treatment decisions; concerns about cancer treatment worsening functional impairments; access barriers; and limited provider awareness and biases about treating people with disability. Residual fear from previous medical interventions and concerns about exacerbating functional impairments influenced cancer treatment preferences. Participants also raised concerns that their underlying disability may be used to justify less aggressive treatment. Nevertheless, cancer treatment did exacerbate mobility difficulties for some participants. Inaccessible hospital rooms, lack of accessible medical equipment, and attitudinal barriers complicated treatments.

Conclusions

People with preexisting mobility disability experience barriers to cancer treatment, compromising quality of care and potentially outcomes. Further training and proactive planning for accommodating disability during cancer treatment and rehabilitation are warranted.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME.

Cme objectives

Upon completion of the article, the reader should be able to: (1) Recognize inadequate accommodations that compromise the diagnosis and treatment of a new cancer in patients with preexisting disability; (2) Recommend involving rehabilitation specialists in the process of care and clinical decision making from the time of cancer diagnosis for patients with preexisting disability newly diagnosed with malignancy; and (3) In the setting of accessibility barriers, facilitate efforts to accommodate patients with preexisting disability to improve quality of care in diagnosing and treating cancer.

Level

Advanced.

Accreditation

The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2021-02-01 +33332184,Urinary Arsenic and Cadmium Associations with Findings from Cranial MRI in American Indians: Data from the Strong Heart Study.,"

Background

Arsenic and cadmium are known cardiovascular toxicants that pose disproportionate risk to rural communities where environmental exposures are high. American Indians have high vascular risk, which may be attributable in part to these exposures.

Objective

We examined urine metal concentrations in association with magnetic resonance imaging findings of vascular brain injury or cerebral atrophy in adult American Indians.

Methods

We measured arsenic and cadmium in American Indian participants from the Strong Heart Study (1989-1991) and evaluated these associations with later (2010-2013) measures of infarct, hemorrhage, white matter hyperintensity (WMH) grade, brain and hippocampal volume, and sulcal and ventricle atrophy using nested multivariate regression analyses.

Results

Among participants with available data (N=687), the median urine arsenic:creatinine ratio was 7.54μg/g [interquartile range (IQR): 4.90-11.93] and the cadmium:creatinine ratio was 0.96μg/g (IQR: 0.61-1.51). Median time between metal measurement and brain imaging was 21 y (range: 18-25 y). Statistical models detected significant associations between arsenic and higher burden of WMH [grade increase=0.014 (95% CI: 0.000, 0.028) per 10% increase in arsenic]; and between cadmium and presence of lacunar infarcts [relative risk (RR)=1.024 (95% CI: 1.004, 1.045) per 10% increase in cadmium].

Discussion

This population-based cohort of American Indian elders had measured values of urine arsenic and cadmium several times higher than previous population- and clinic-based studies in the United States and Mexico, and comparable values with European industrial workers. Our findings of associations for arsenic and cadmium exposures with vascular brain injury are consistent with established literature. Environmental toxicant accumulation is modifiable; public health policy may benefit from focusing on reductions in environmental metals. https://doi.org/10.1289/EHP6930.",2020-12-17 +32449536,HCVpred: A web server for predicting the bioactivity of hepatitis C virus NS5B inhibitors.,"Hepatitis C virus (HCV) is one of the major causes of liver disease affecting an estimated 170 million people culminating in 300,000 deaths from cirrhosis or liver cancer. NS5B is one of three potential therapeutic targets against HCV (i.e., the other two being NS3/4A and NS5A) that is central to viral replication. In this study, we developed a classification structure-activity relationship (CSAR) model for identifying substructures giving rise to anti-HCV activities among a set of 578 non-redundant compounds. NS5B inhibitors were described by a set of 12 fingerprint descriptors and predictive models were constructed from 100 independent data splits using the random forest algorithm. The modelability (MODI index) of the data set was determined to be robust with a value of 0.88 exceeding established threshold of 0.65. The predictive performance was deduced by the accuracy, sensitivity, specificity, and Matthews correlation coefficient, which was found to be statistically robust (i.e., the former three parameters afforded values in excess of 0.8 while the latter statistical parameter provided a value >0.7). An in-depth analysis of the top 20 important descriptors revealed that aromatic ring and alkyl side chains are important for NS5B inhibition. Finally, the predictive model is deployed as a publicly accessible HCVpred web server (available at http://codes.bio/hcvpred/) that would allow users to predict the biological activity as being active or inactive against HCV NS5B. Thus, the knowledge and web server presented herein can be used in the design of more potent and specific drugs against the HCV NS5B.",2020-05-25 +33326193,Large expert-curated database for benchmarking document similarity detection in biomedical literature search. ,"Document recommendation systems for locating relevant literature have mostly relied on methods developed a decade ago. This is largely due to the lack of a large offline gold-standard benchmark of relevant documents that cover a variety of research fields such that newly developed literature search techniques can be compared, improved and translated into practice. To overcome this bottleneck, we have established the RElevant LIterature SearcH consortium consisting of more than 1500 scientists from 84 countries, who have collectively annotated the relevance of over 180 000 PubMed-listed articles with regard to their respective seed (input) article/s. The majority of annotations were contributed by highly experienced, original authors of the seed articles. The collected data cover 76% of all unique PubMed Medical Subject Headings descriptors. No systematic biases were observed across different experience levels, research fields or time spent on annotations. More importantly, annotations of the same document pairs contributed by different scientists were highly concordant. We further show that the three representative baseline methods used to generate recommended articles for evaluation (Okapi Best Matching 25, Term Frequency-Inverse Document Frequency and PubMed Related Articles) had similar overall performances. Additionally, we found that these methods each tend to produce distinct collections of recommended articles, suggesting that a hybrid method may be required to completely capture all relevant articles. The established database server located at https://relishdb.ict.griffith.edu.au is freely available for the downloading of annotation data and the blind testing of new methods. We expect that this benchmark will be useful for stimulating the development of new powerful techniques for title and title/abstract-based search engines for relevant articles in biomedical research.",2019-01-01 +25885062,CmMDb: a versatile database for Cucumis melo microsatellite markers and other horticulture crop research.,"Cucumis melo L. that belongs to Cucurbitaceae family ranks among one of the highest valued horticulture crops being cultivated across the globe. Besides its economical and medicinal importance, Cucumis melo L. is a valuable resource and model system for the evolutionary studies of cucurbit family. However, very limited numbers of molecular markers were reported for Cucumis melo L. so far that limits the pace of functional genomic research in melon and other similar horticulture crops. We developed the first whole genome based microsatellite DNA marker database of Cucumis melo L. and comprehensive web resource that aids in variety identification and physical mapping of Cucurbitaceae family. The Cucumis melo L. microsatellite database (CmMDb: http://65.181.125.102/cmmdb2/index.html) encompasses 39,072 SSR markers along with its motif repeat, motif length, motif sequence, marker ID, motif type and chromosomal locations. The database is featured with novel automated primer designing facility to meet the needs of wet lab researchers. CmMDb is a freely available web resource that facilitates the researchers to select the most appropriate markers for marker-assisted selection in melons and to improve breeding strategies.",2015-04-17 +26048622,The diffusion tensor imaging (DTI) component of the NIH MRI study of normal brain development (PedsDTI).,"The NIH MRI Study of normal brain development sought to characterize typical brain development in a population of infants, toddlers, children and adolescents/young adults, covering the socio-economic and ethnic diversity of the population of the United States. The study began in 1999 with data collection commencing in 2001 and concluding in 2007. The study was designed with the final goal of providing a controlled-access database; open to qualified researchers and clinicians, which could serve as a powerful tool for elucidating typical brain development and identifying deviations associated with brain-based disorders and diseases, and as a resource for developing computational methods and image processing tools. This paper focuses on the DTI component of the NIH MRI study of normal brain development. In this work, we describe the DTI data acquisition protocols, data processing steps, quality assessment procedures, and data included in the database, along with database access requirements. For more details, visit http://www.pediatricmri.nih.gov. This longitudinal DTI dataset includes raw and processed diffusion data from 498 low resolution (3 mm) DTI datasets from 274 unique subjects, and 193 high resolution (2.5 mm) DTI datasets from 152 unique subjects. Subjects range in age from 10 days (from date of birth) through 22 years. Additionally, a set of age-specific DTI templates are included. This forms one component of the larger NIH MRI study of normal brain development which also includes T1-, T2-, proton density-weighted, and proton magnetic resonance spectroscopy (MRS) imaging data, and demographic, clinical and behavioral data.",2015-06-03 +32587519,OSlihc: An Online Prognostic Biomarker Analysis Tool for Hepatocellular Carcinoma.,"Liver hepatocellular carcinoma (LIHC) is one of the most common malignant tumors in the world with an increasing number of fatalities. Identification of novel prognosis biomarker for LIHC may improve treatment and therefore patient outcomes. The availability of public gene expression profiling data offers the opportunity to discover prognosis biomarkers for LIHC. We developed an online consensus survival analysis tool named OSlihc using gene expression profiling and long-term follow-up data to identify new prognosis biomarkers. OSlihc consists of 637 cases from four independent cohorts. As a risk assessment tool, OSlihc generates the Kaplan-Meier survival plot with hazard ratio (HR) and p value to evaluate the prognostic value of a gene of interest. To test the reliability of OSlihc, we analyzed 65 previous reported prognostic biomarkers in OSlihc and showed that all of which have significant prognostic values. Furthermore, we identified four novel potential prognostic biomarkers (ATG9A, WIPI1, CXCL1, and CSNK2A2) for LIHC, the elevated expression of which predict the unfavorable survival outcomes. These genes (ATG9A, WIPI1, CXCL1, and CSNK2A2) may be potentially new biomarkers to identify at-risk LIHC patients when further validated. By OSlihc, users can evaluate the prognostic abilities of genes of their interest, which provides a platform for researchers to identify prognostic biomarkers to further develop targeted therapy strategies for LIHC patients. OSlihc is public and free to the users at http://bioinfo.henu.edu.cn/LIHC/LIHCList.jsp.",2020-06-10 +30102334,"Cell membrane proteins with high N-glycosylation, high expression and multiple interaction partners are preferred by mammalian viruses as receptors.","

Motivation

Receptor mediated entry is the first step for viral infection. However, the question of how viruses select receptors remains unanswered.

Results

Here, by manually curating a high-quality database of 268 pairs of mammalian virus-host receptor interaction, which included 128 unique viral species or sub-species and 119 virus receptors, we found the viral receptors are structurally and functionally diverse, yet they had several common features when compared to other cell membrane proteins: more protein domains, higher level of N-glycosylation, higher ratio of self-interaction and more interaction partners, and higher expression in most tissues of the host. This study could deepen our understanding of virus-receptor interaction.

Availability and implementation

The database of mammalian virus-host receptor interaction is available at http://www.computationalbiology.cn: 5000/viralReceptor.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +29082924,Norine: A powerful resource for novel nonribosomal peptide discovery.,"Since its first release in 2008, Norine remains the unique resource completely devoted to nonribosomal peptides (NRPs). They are very attractive microbial secondary metabolites, displaying a remarkable diversity of structure and functions. Norine (http://bioinfo.lifl.fr/NRP) includes a database now containing more than 1160 annotated peptides and user-friendly interfaces enabling the querying of the database, through the annotations or the structure of the peptides. Dedicated tools are associated for structural comparison of the compounds and prediction of their biological activities. In this paper, we start by describing the knowledgebase and the dedicated tools. We then present some user cases to show how useful Norine is for the discovery of novel nonribosomal peptides.",2016-06-01 +32057520,A blueprint for electronic utilization of ambiguous molecular HLA typing data in organ allocation systems and virtual crossmatch.,"Virtual crossmatch (VXM) compares a transplant candidate's unacceptable antigens to the HLA typing of the donor before an organ offer is accepted and, in selected cases, supplant a prospective physical crossmatch. However, deceased donor typing can be ambiguous, leading to uncertainty in compatibility prediction. We have developed a prototype web application that utilizes ambiguous HLA molecular typing data to predict which unacceptable antigens are present in the donor HLA genotype as donor-specific antibodies (DSA). The application compares a candidate's listed unacceptable antigens to computed probabilities of all possible two-field donor HLA alleles and UNOS antigens. The VIrtual CrossmaTch for mOleculaR HLA typing (VICTOR) tool can be accessed at http://www.transplanttoolbox.org/victor. We reanalyzed historical VXM cases where a transplant center's manual interpretation of molecular typing results influenced offer evaluation. We found that interpretation of ambiguous donor molecular typing data using imputation could one day influence VXM decisions if the DSA predictions were rigorously validated. Standardized interpretation of molecular typing data, if applied to the match run, could also change which offers are made. HLA typing ambiguity has been an underappreciated source of immunological risk in organ transplantation. The VICTOR tool can serve as a testbed for development of allocation policies with the aim of decreasing offers refused due to HLA incompatibility.",2020-02-10 +25348409,Disease Ontology 2015 update: an expanded and updated database of human diseases for linking biomedical knowledge through disease data.,"The current version of the Human Disease Ontology (DO) (http://www.disease-ontology.org) database expands the utility of the ontology for the examination and comparison of genetic variation, phenotype, protein, drug and epitope data through the lens of human disease. DO is a biomedical resource of standardized common and rare disease concepts with stable identifiers organized by disease etiology. The content of DO has had 192 revisions since 2012, including the addition of 760 terms. Thirty-two percent of all terms now include definitions. DO has expanded the number and diversity of research communities and community members by 50+ during the past two years. These community members actively submit term requests, coordinate biomedical resource disease representation and provide expert curation guidance. Since the DO 2012 NAR paper, there have been hundreds of term requests and a steady increase in the number of DO listserv members, twitter followers and DO website usage. DO is moving to a multi-editor model utilizing Protégé to curate DO in web ontology language. This will enable closer collaboration with the Human Phenotype Ontology, EBI's Ontology Working Group, Mouse Genome Informatics and the Monarch Initiative among others, and enhance DO's current asserted view and multiple inferred views through reasoning.",2014-10-27 +32430697,Development of a Practice Tool for Primary Care Providers: Medication Management of Posttraumatic Stress Disorder in Veterans with Mild Traumatic Brain Injury.,"Posttraumatic stress disorder (PTSD) and comorbid mild traumatic brain injury (mTBI) are highly prevalent in veterans who served in Iraq [Operation Iraqi Freedom/Operation New Dawn] and Afghanistan [Operation Enduring Freedom]. Complicated psychotropic medications are used for treatment of PTSD and comorbid mTBI symptoms lead to polypharmacy related complications. Primary care providers (PCPs) working in Community Based Outpatient Clinics (CBOCs) are usually burdened with the responsibility of managing this complicated medication regimen or relevant side effects. The PCPs do not feel equipped to provide this complicated psychopharmacological management. Thus, there is a need for a comprehensive yet concise tool for the medication management of PTSD in veterans with comorbid mTBI. (1) To conduct focus groups of interdisciplinary team of experts and other stake holders to assess need, (2) To carefully review current VA/Department of Defense practice guideline to identify content, (3) To develop an evidence based, user friendly, and concise pocket guide for the PCP's. Content was identified by review of current guidelines and available literature and was finalized after input from stakeholders, multidisciplinary team of experts, and review of qualitative data from focus groups/interviews of clinicians working in remote CBOCs. The pocket tool was formatted and designed by multimedia service. A pocket guide in the form of a bi-fold, 4″ × 5.5″ laminated card was developed. One thousand hard copies were distributed in the local VA medical center. This product is available online for download at the South-Central Mental Illness Research, Education, and Clinical Center website ( https://www.mirecc.va.gov/VISN16/ptsd-and-mtbi-pocket-card.asp ). This pocket card provides PCPs an easy to carry and user-friendly clinical decision-making tool to effectively treat veterans with PTSD and comorbid mTBI.",2020-12-01 +25428358,NCBI viral genomes resource.,"Recent technological innovations have ignited an explosion in virus genome sequencing that promises to fundamentally alter our understanding of viral biology and profoundly impact public health policy. Yet, any potential benefits from the billowing cloud of next generation sequence data hinge upon well implemented reference resources that facilitate the identification of sequences, aid in the assembly of sequence reads and provide reference annotation sources. The NCBI Viral Genomes Resource is a reference resource designed to bring order to this sequence shockwave and improve usability of viral sequence data. The resource can be accessed at http://www.ncbi.nlm.nih.gov/genome/viruses/ and catalogs all publicly available virus genome sequences and curates reference genome sequences. As the number of genome sequences has grown, so too have the difficulties in annotating and maintaining reference sequences. The rapid expansion of the viral sequence universe has forced a recalibration of the data model to better provide extant sequence representation and enhanced reference sequence products to serve the needs of the various viral communities. This, in turn, has placed increased emphasis on leveraging the knowledge of individual scientific communities to identify important viral sequences and develop well annotated reference virus genome sets.",2014-11-26 +,Development of Landsat-based annual US forest disturbance history maps (1986–2010) in support of the North American Carbon Program (NACP),"In Phase III of the North American Forest Dynamics (NAFD) study an automatic workflow has been developed for evaluating forest disturbance history using Landsat observations. It has four major components: an automated approach for image selection and preprocessing, the vegetation change tracker (VCT) forest disturbance analysis, postprocessing, and validation. This approach has been applied to the conterminous US (CONUS) to produce a comprehensive analysis of US forest disturbance history using the NASA Earth Exchange (NEX) cloud computing system. The resultant NAFD-NEX product includes 25 annual forest disturbance maps for 1986–2010 and two time-integrated maps to provide spatial-temporal synoptic view of disturbances over this time period. These maps were derived based on 24,000+ scenes selected from 350,000+ available Landsat images at 30-m resolution, and were validated using a visual assessment of Landsat time-series images in combination with high-resolution and other ancillary data sources over samples selected using a probability based sampling method. The validation revealed no major biases in the NAFD-NEX maps for disturbance events that resulted in at least 20% canopy cover loss. The average user's and producer's accuracies for the disturbance class were 53.6% and 53.3%, respectively, with the individual year's user's accuracy varying from 42.8% to 73.6% and producer's accuracy from 39.0% to 84.8% over the 25-year period. The NAFD-NEX disturbance maps are available from a web portal of the Oak Ridge National Laboratory Distributed Active Archive Center (ORNL-DAAC) at https://doi.org/10.3334/ORNLDAAC/1290.",2018-05-01 +,TGB: the tobacco genetics and breeding database,"The germplasm of the genus Nicotiana contains more than 5,000 accessions and plays an important role in modern biological research. Tobacco can be used as a model system to develop methodologies for plant transformation and for investigating gene function. In order to develop the study of Nicotiana, a large quantity of data on germplasm, sequences, molecular markers and genetically modified tobacco was required for in-depth and systematic collation and research. It became necessary to establish a special database for tobacco genetics and breeding. The tobacco genetics and breeding (TGB, http://yancao.sdau.edu.cn/tgb) database was developed with the aim of bringing together tobacco genetics and breeding. The database has three main features: (1) a materials database with information on 1,472 Nicotiana germplasm accessions, as well as updated genomic and expressed sequence tag (EST) data available from the public database; (2) a molecular markers database containing a total of 12,388 potential intron polymorphisms 10,551 EST-simple sequence repeat (EST-SSR) and 66,297 genomic-SSR markers; and (3) an applications database with genetic maps and some genetically modified studies in tobacco. The TGB database also makes Basic Local Alignment Search Tool and primer designing tools publicly available. As far as can be ascertained, the TGB database is the first tobacco genetics and breeding database to be created, and all this comprehensive information will aid basic research into Nicotiana and other related plants. It will serve as an excellent resource for the online tobacco research community.",2013-03-01 +32513899,Metastasis-Specific Gene Expression in Autochthonous and Allograft Mouse Mammary Tumor Models: Stratification and Identification of Targetable Signatures.,"Breast cancer metastasis is a leading cause of cancer-related death of women in the United States. A hurdle in advancing metastasis-targeted intervention is the phenotypic heterogeneity between primary and secondary lesions. To identify metastasis-specific gene expression profiles we performed RNA-sequencing of breast cancer mouse models; analyzing metastases from models of various drivers and routes. We contrasted the models and identified common, targetable signatures. Allograft models exhibited more mesenchymal-like gene expression than genetically engineered mouse models (GEMM), and primary culturing of GEMM-derived metastatic tissue induced mesenchymal-like gene expression. In addition, metastasis-specific transcriptomes differed between tail vein and orthotopic injection of the same cell line. Gene expression common to models of spontaneous metastasis included sildenafil response and nicotine degradation pathways. Strikingly, in vivo sildenafil treatment significantly reduced metastasis by 54%, while nicotine significantly increased metastasis by 46%. These data suggest that (i) actionable metastasis-specific pathways can be readily identified, (ii) already available drugs may have great potential to alleviate metastatic incidence, and (iii) metastasis may be influenced greatly by lifestyle choices such as the choice to consume nicotine products. In summary, while mouse models of breast cancer metastasis vary in ways that must not be ignored, there are shared features that can be identified and potentially targeted therapeutically. IMPLICATIONS: The data we present here exposes critical variances between preclinical models of metastatic breast cancer and identifies targetable pathways integral to metastatic spread. VISUAL OVERVIEW: http://mcr.aacrjournals.org/content/molcanres/18/9/1278/F1.large.jpg.",2020-06-08 +33381815,PROBselect: accurate prediction of protein-binding residues from proteins sequences via dynamic predictor selection.,"

Motivation

Knowledge of protein-binding residues (PBRs) improves our understanding of protein-protein interactions, contributes to the prediction of protein functions and facilitates protein-protein docking calculations. While many sequence-based predictors of PBRs were published, they offer modest levels of predictive performance and most of them cross-predict residues that interact with other partners. One unexplored option to improve the predictive quality is to design consensus predictors that combine results produced by multiple methods.

Results

We empirically investigate predictive performance of a representative set of nine predictors of PBRs. We report substantial differences in predictive quality when these methods are used to predict individual proteins, which contrast with the dataset-level benchmarks that are currently used to assess and compare these methods. Our analysis provides new insights for the cross-prediction concern, dissects complementarity between predictors and demonstrates that predictive performance of the top methods depends on unique characteristics of the input protein sequence. Using these insights, we developed PROBselect, first-of-its-kind consensus predictor of PBRs. Our design is based on the dynamic predictor selection at the protein level, where the selection relies on regression-based models that accurately estimate predictive performance of selected predictors directly from the sequence. Empirical assessment using a low-similarity test dataset shows that PROBselect provides significantly improved predictive quality when compared with the current predictors and conventional consensuses that combine residue-level predictions. Moreover, PROBselect informs the users about the expected predictive quality for the prediction generated from a given input protein.

Availability and implementation

PROBselect is available at http://bioinformatics.csu.edu.cn/PROBselect/home/index.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-12-01 +33379594,Multi-Scattering software: part I: online accelerated Monte Carlo simulation of light transport through scattering media.,"In this article we present and describe an online freely accessible software called Multi-Scattering for the modeling of light propagation in scattering and absorbing media. Part II of this article series focuses on the validation of the model by rigorously comparing the simulated results with experimental data. The model is based on the use of the Monte Carlo method, where billions of photon packets are being tracked through simulated cubic volumes. Simulations are accelerated by the use of general-purpose computing on graphics processing units, reducing the computation time by a factor up to 200x in comparison with a single central processing unit thread. By using four graphic cards on a single computer, the simulation speed increases by a factor of 800x. For an anisotropy factor g = 0.86, this enables the transport path of one billion photons to be computed in 10 seconds for optical depth OD = 10 and in 20 minutes for OD = 500. Another feature of Multi-Scattering is the integration and implementation of the Lorenz-Mie theory in the software to generate the scattering phase functions from spherical particles. The simulations are run from a computer server at Lund University, allowing researchers to log in and use it freely without any prior need for programming skills or specific software/hardware installations. There are countless types of scattering media in which this model can be used to predict light transport, including medical tissues, blood samples, clouds, smoke, fog, turbid liquids, spray systems, etc. An example of simulation results is given here for photon propagation through a piece of human head. The software also includes features for modeling image formation by inserting a virtual collecting lens and a detection matrix which simulate a camera objective and a sensor array respectively. The user interface for setting-up simulations and for displaying the corresponding results is found at: https://multi-scattering.com/.",2020-12-01 +33053149,"Selenium, antioxidants, cardiovascular disease, and all-cause mortality: a systematic review and meta-analysis of randomized controlled trials.","

Background

Antioxidants have been promoted for cardiovascular disease (CVD) risk reduction and for the prevention of cancer. Our preliminary analysis suggested that only when selenium was present were antioxidant mixtures associated with reduced all-cause mortality.

Objective

We conducted a systematic review and meta-analysis of randomized controlled trials (RCTs) to determine the effect of selenium supplementation alone and of antioxidant mixtures with or without selenium on the risk of CVD, cancer, and mortality.

Methods

We identified studies using the Cochrane Library, Medline, and Embase for potential CVD outcomes, cancer, and all-cause mortality following selenium supplementation alone or after antioxidant supplement mixtures with and without selenium up to June 5, 2020. RCTs of ≥24 wk were included and data were analyzed using random-effects models and classified by the Grading of Recommendations, Assessment, Development, and Evaluation approach.

Results

The meta-analysis identified 9423 studies, of which 43 were used in the final analysis. Overall, no association of selenium alone or antioxidants was seen with CVD and all-cause mortality. However, a decreased risk with antioxidant mixtures was seen for CVD mortality when selenium was part of the mix (RR: 0.77; 95% CI: 0.62, 0.97; P = 0.02), with no association when selenium was absent. Similarly, when selenium was part of the antioxidant mixture, a decreased risk was seen for all-cause mortality (RR: 0.90; 95% CI: 0.82, 0.98; P = 0.02) as opposed to an increased risk when selenium was absent (RR: 1.09; 95% CI: 1.04, 1.13; P = 0.0002).

Conclusion

The addition of selenium should be considered for supplements containing antioxidant mixtures if they are to be associated with CVD and all-cause mortality risk reduction. This trial was registered at https://www.crd.york.ac.uk/PROSPERO/ as CRD42019138268.",2020-12-01 +27175227,Filtering large-scale event collections using a combination of supervised and unsupervised learning for event trigger classification.,"

Background

Biomedical event extraction is one of the key tasks in biomedical text mining, supporting various applications such as database curation and hypothesis generation. Several systems, some of which have been applied at a large scale, have been introduced to solve this task. Past studies have shown that the identification of the phrases describing biological processes, also known as trigger detection, is a crucial part of event extraction, and notable overall performance gains can be obtained by solely focusing on this sub-task. In this paper we propose a novel approach for filtering falsely identified triggers from large-scale event databases, thus improving the quality of knowledge extraction.

Methods

Our method relies on state-of-the-art word embeddings, event statistics gathered from the whole biomedical literature, and both supervised and unsupervised machine learning techniques. We focus on EVEX, an event database covering the whole PubMed and PubMed Central Open Access literature containing more than 40 million extracted events. The top most frequent EVEX trigger words are hierarchically clustered, and the resulting cluster tree is pruned to identify words that can never act as triggers regardless of their context. For rarely occurring trigger words we introduce a supervised approach trained on the combination of trigger word classification produced by the unsupervised clustering method and manual annotation.

Results

The method is evaluated on the official test set of BioNLP Shared Task on Event Extraction. The evaluation shows that the method can be used to improve the performance of the state-of-the-art event extraction systems. This successful effort also translates into removing 1,338,075 of potentially incorrect events from EVEX, thus greatly improving the quality of the data. The method is not solely bound to the EVEX resource and can be thus used to improve the quality of any event extraction system or database.

Availability

The data and source code for this work are available at: http://bionlp-www.utu.fi/trigger-clustering/.",2016-05-11 +31516949,Data on correction of pelvic organ prolapse by laparoscopic lateral suspension with mesh: A clinical series.,This DIB article provides additional data on laparoscopic lateral suspension with mesh for correcting pelvic organ prolapse. Data come from a multicentric sample of Italian women (https://doi.org/10.1016/j.ejogrb.2019.07.025). Data are collected retrospectively. Descriptive and raw data on surgery and descriptive and raw data on symptoms of pelvic organ prolapse pre-surgery and post-surgery are provided. Kaplan-Meier curves and scores of 7-items King's Health Questionnaire for quality of life assessment are also reported.,2019-08-23 +,Draft genome and reference transcriptomic resources for the urticating pine defoliator Thaumetopoea pityocampa (Lepidoptera: Notodontidae),"The pine processionary moth Thaumetopoea pityocampa (Lepidoptera: Notodontidae) is the main pine defoliator in the Mediterranean region. Its urticating larvae cause severe human and animal health concerns in the invaded areas. This species shows a high phenotypic variability for various traits, such as phenology, fecundity and tolerance to extreme temperatures. This study presents the construction and analysis of extensive genomic and transcriptomic resources, which are an obligate prerequisite to understand their underlying genetic architecture. Using a well‐studied population from Portugal with peculiar phenological characteristics, the karyotype was first determined and a first draft genome of 537 Mb total length was assembled into 68,292 scaffolds (N50 = 164 kb). From this genome assembly, 29,415 coding genes were predicted. To circumvent some limitations for fine‐scale physical mapping of genomic regions of interest, a 3X coverage BAC library was also developed. In particular, 11 BACs from this library were individually sequenced to assess the assembly quality. Additionally, de novo transcriptomic resources were generated from various developmental stages sequenced with HiSeq and MiSeq Illumina technologies. The reads were de novo assembled into 62,376 and 63,175 transcripts, respectively. Then, a robust subset of the genome‐predicted coding genes, the de novo transcriptome assemblies and previously published 454/Sanger data were clustered to obtain a high‐quality and comprehensive reference transcriptome consisting of 29,701 bona fide unigenes. These sequences covered 99% of the cegma and 88% of the busco highly conserved eukaryotic genes and 84% of the busco arthropod gene set. Moreover, 90% of these transcripts could be localized on the draft genome. The described information is available via a genome annotation portal (http://bipaa.genouest.org/sp/thaumetopoea_pityocampa/).",2018-05-01 +32156793,MaxQuant Software for Ion Mobility Enhanced Shotgun Proteomics.,"Ion mobility can add a dimension to LC-MS based shotgun proteomics which has the potential to boost proteome coverage, quantification accuracy and dynamic range. Required for this is suitable software that extracts the information contained in the four-dimensional (4D) data space spanned by m/z, retention time, ion mobility and signal intensity. Here we describe the ion mobility enhanced MaxQuant software, which utilizes the added data dimension. It offers an end to end computational workflow for the identification and quantification of peptides and proteins in LC-IMS-MS/MS shotgun proteomics data. We apply it to trapped ion mobility spectrometry (TIMS) coupled to a quadrupole time-of-flight (QTOF) analyzer. A highly parallelizable 4D feature detection algorithm extracts peaks which are assembled to isotope patterns. Masses are recalibrated with a non-linear m/z, retention time, ion mobility and signal intensity dependent model, based on peptides from the sample. A new matching between runs (MBR) algorithm that utilizes collisional cross section (CCS) values of MS1 features in the matching process significantly gains specificity from the extra dimension. Prerequisite for using CCS values in MBR is a relative alignment of the ion mobility values between the runs. The missing value problem in protein quantification over many samples is greatly reduced by CCS aware MBR.MS1 level label-free quantification is also implemented which proves to be highly precise and accurate on a benchmark dataset with known ground truth. MaxQuant for LC-IMS-MS/MS is part of the basic MaxQuant release and can be downloaded from http://maxquant.org.",2020-03-10 +32153658,Population-based cancer registries: a gateway to improved surveillance of non-communicable diseases.,"Timely and accurate data on health enable policymakers to make informed decisions that can reduce the burden and suffering from disease. Yet many LMICs are not able to adequately collect the health indicators necessary to track progress in the Sustainable Development Goals (SDG) at present, and a major investment in primary data collection is needed. We argue that cancer surveillance, with an established history of international standards and best practices, represents a feasible entry point in the development of surveillance programmes for NCDs. The International Agency for Research on Cancer (IARC) has served to support population-based cancer registries (PBCR) since its inception over 50 years ago. Based on this longstanding experience and collaboration with PBCR worldwide, IARC and other key partners implemented the Global Initiative for Cancer Registry Development (GICR, http://gicr.iarc.fr/) as a new way to deliver capacity-building in cancer surveillance. We describe some of the critical aspects of the GICR and the prospects of a step-change in the quality and use of cancer data over the next years. Ultimately, the decision on how to proceed resides with countries. The cancer and NCD burden will not be tackled without committed and sustainable action by governments.",2020-01-16 +33612400,Utility estimation for neurogenic bowel dysfunction in the general population.,"

Background

Neurogenic bowel dysfunction (NBD) affects over 80% of individuals with spina bifida causing bowel incontinence and/or constipation. NBD is also associated with decreased quality of life, depression, anxiety, and decreased employment/educational attainment. Because NBD is a life-altering condition without a cure, understanding the utility of different health states related to NBD would aid clinicians as they try to counsel families regarding management options and to better understand the quality of life associated with disease management.

Objective

To elicit utility scores for NBD using an online community sample.

Study design

A cross-sectional anonymous survey was completed by 1534 voluntary participants via an online platform (Amazon Mechanical Turk (MTurk, http://www.mturk.com/)), representing an 87% response rate. The survey presented hypothetical scenarios that asked respondents to imagine themselves as an individual living with NBD or as the caretaker of a child with NBD. The time trade-off (TTO) method was used to estimate a utility score, and outcomes for each scenario were calculated using median and IQR. Univariate comparisons of distributions of TTO for demographic data were made using Kruskal-Wallis tests.

Results

The median utility score for NBD was 0.84 [0.70-0.92]. Participants reported that they would give up a median of 5 years of their own life, to prevent NBD in themselves of their child. Utility values for child scenarios were significantly different when stratified by age, gender, race, parental status, marital status, and income. Stratification by current health status did not yield significantly different utility values.

Discussion

Study findings are comparable with other TTO-determined utility values of moderately severe disease states, including severe persistent asthma (0.83), moderate seizure disorder (0.84) and mild mental retardation (0.84). The significant variations in utility values based on age, gender, race, parent status, partner/marital status and income variables existed in our study, which is similar to findings in other health fields. Study limitations include lack of unanimous agreement about TTO's validity in measuring utility values, and MTurk participant reports can be generalized to greater population.

Conclusion

NBD is perceived by the community as having a substantial impact on the lives of children with spina bifida, representing a 16% reduction from perfect health. In general, health state utilities have been increasingly used in healthcare systems to understand how burdensome a population perceives a disease is and to evaluate whether interventions improve quality of life years.",2021-01-30 +32375005,Quantitative Proteomic Analysis of Chikungunya Virus-Infected Aedes aegypti Reveals Proteome Modulations Indicative of Persistent Infection.,"The mosquito-borne chikungunya virus (CHIKV) poses a threat to human health in tropical countries throughout the world. The molecular interactions of CHIKV with its mosquito vector Aedes aegypti are not fully understood. Following oral acquisition of CHIKV via salinemeals, we analyzed changes in the proteome of Ae. aegypti in 12 h intervals by label-free quantification using a timsTOF Pro mass spectrometer. For each of the seven time points, between 2647 and 3167 proteins were identified among CHIKV-infected and noninfected mosquito samples, and fewer than 6% of those identified proteins were affected by the virus. Functional enrichment analysis revealed that the three pathways, Endocytosis, Oxidative phosphorylation, and Ribosome biogenesis, were enriched during CHIKV infection. On the other hand, three pathways of the cellular RNA machinery and five metabolism related pathways were significantly attenuated in the CHIKV-infected samples. Furthermore, proteins associated with cytoskeleton and vesicular transport, as well as various serine-type endopeptidases and metallo-proteinases, were modulated in the presence of CHIKV. Our study reveals biological pathways and novel proteins interacting with CHIKV in the mosquito. Overall, CHIKV infection caused minor changes to the mosquito proteome demonstrating a high level of adaption between the vector and the virus, essentially coexisting in a nonpathogenic relationship. The mass spectrometry data have been deposited to the MassIVE repository (https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?task=abfd14f7015243c69854731998d55df1) with the data set identifier MSV000085115.",2020-05-21 +33316147,Older Adults' Engagement in Technology-Mediated Self-Monitoring of Diet: A Mixed-Method Study.,"

Purpose

This feasibility study explored older adults' use of a nutrition app called Appetitus (https://apps.apple.com/us/app/appetitt/id1001936854?ign-mpt=uo%3D2; https://play.google.com/store/apps/details?id=no.nr.appetitt&hl=e) and addressed their engagement in technology-mediated self-monitoring of diet. Undernutrition is a significant challenge among older adults and is associated with poorer health experiences. Digital health for self-monitoring of diet has the potential to increase awareness of personal nutrition, and the scarcity of research reporting older adults' ability and willingness to engage in technology-mediated dietary self-monitoring warranted this study.

Design and methods

An explorative mixed-methods design combining descriptive analysis of log data with qualitative analysis of interviews with Appetitus users was implemented.

Findings

Twenty-five older adults self-monitored their diet using Appetitus over an 8-week trial period. Eighty percent of the participants used the app regularly in the trial period. The most engaged users recorded their food consumption daily for 8 weeks. Personal interest in nutrition and commitment to the project facilitated regular use of Appetitus. Poor health and the perception that using a nutrition app lacked personal relevance contributed to irregular self-monitoring. For inexperienced technology users, participation in this project became a springboard to using tablet technology and the Internet beyond the Appetitus app.

Conclusions

The majority of the participants regularly used Appetitus for self-monitoring of diet; they found the tablet technology and Appetitus app easy to use.

Clinical relevance

Older adults are able and willing to use self-monitoring tools. Nutrition apps can empower older adults to make better informed decisions about their diet. Patients' self-monitoring can provide valuable and detailed health-related information to healthcare professionals and mediate patient-centered care practices.",2020-12-14 +30357906,Evaluation of U. S. National Toxicology Program (NTP) mouse lymphoma assay data using International Workshop on Genotoxicity Tests (IWGT) and the Organization for Economic Co-Operation and Development (OECD) criteria.,"The forward gene mutation mouse lymphoma assay (MLA) is widely used, as part of a regulatory test battery, to identify the genotoxic potential of chemicals. It identifies mutagens capable of inducing a variety of genetic events. During the 1980s and early 1990s, the U.S. National Toxicology Program (NTP) developed a publicly available database (https://tools.niehs.nih.gov/cebs3/ui/) of MLA results. This database is used to define the mutagenic potential of chemicals, to develop structure-activity relationships (SAR), and to draw correlations to animal carcinogenicity findings. New criteria for MLA conduct and data interpretation were subsequently developed by the International Workshop for Genotoxicity Testing (IWGT) and the Organization of Economic Cooperation and Development (OECD). These recommendations are included in a new OECD Test Guideline (TG490). It is essential that early experimental data be re-examined and classified according to the current criteria to build a curated database to better inform chemical-specific evaluations and SAR models. We re-evaluated more than 1900 experiments representing 342 chemicals against the newly defined acceptance criteria for background mutant frequency (MF), cloning efficiency (CE), positive control values (modified for this evaluation due to lack of colony sizing), appropriate dose selection, and data consistency. Only 17% of the evaluated experiments met all acceptance criteria used in this re-evaluation. Results from 211 chemicals were determined to be uninterpretable, 92 were positive, and 39 equivocal. The authors could not classify any responses as negative because colony sizing was not performed for any of these experiments and it is clear, based on many experiment with unacceptably low background and positive control MFs, that mutant colony recovery was often suboptimal. This re-evaluation provides a curated database for the MLA. A similar curation should be done for other widely used genetic toxicology assays, but will be more difficult for certain assays (e.g., in vitro chromosomal aberrations) because important parameters such as level of cytotoxicity were often not evaluated/reported. Environ. Mol. Mutagen. 59:829-841, 2018. © 2018 Wiley Periodicals, Inc.",2018-10-25 +,4CPS-093 Long-term efficacy of second-generation direct-acting antiviral agents (daas-2) for hcv treatment: a meta-analysis,"

Background

Efficacy of second-generation direct-acting antiviral agents (DAAs-2) in terms of sustained viral response (SVR) 12 weeks after the end of treatment (EOT) has widely been proven.1–5 However, long-term efficacy is still controversial due to the low number of available studies with a small number of patients.

Purpose

The objective of the study is to conduct a systematic review and, if possible, a meta-analysis of existing clinical evidence in terms of long-term efficacy (SVR longer than 12 weeks after EOT) of DAAs-2 for HCV treatment.

Material and methods

A systematic review was performed with the use of CENTRAL, MEDLINE, Embase, Pubmed and SBBL-CILEA/METACRAWLER databases. Trials were initially screened by the title. Second, full papers and abstracts were analysed. The meta-analysis included randomised controlled trials (RCTs) with adult patients affected by HCV, treated with DAAs-2 and assessed for longer than 12 weeks after EOT. Study quality assessment was undertaken using the Jadad scale. Heterogeneity analysis of the studies was conducted with Chi-square and I2: the statistical analysis of the efficacy rate was performed using the meta package with the R software 6. The effect estimate was expressed in risk ratio (RR) with 95% confidence interval (CI 95%) and pooled using a random effects model.

Results

Of the 106 identified studies, 11 high-quality RCTs were included for meta-analysis (25 were duplicate publications, 70 did not meet the inclusion criteria). Considered genotypes were 1 (nine), 2 (one) and 3 (one). Meta-analysis included 3720 patients (2698 treated with DAAs-2; 1022 treated with placebo or a first-generation DAA±ribavirin± PEG-interferon). Heterogeneity between studies was high (p<0.001; I2=90.2%), however it was absorbed by the model (t2=0.08). Long-term efficacy was expressed as SVR 24 weeks after EOT, since longer timescales were not available. According to the pooled RR, the incidence of efficacy was 1.5 (95% CI: 1.24 to 1.83, p<0.001).

Conclusion

The meta-analysis demonstrated that DAAs-2 for HCV treatment have long-term efficacy at SVR 24 weeks after the EOT. However, the number of studies is mostly based on genotype 1. More RCTs are required to confirm long-term efficacy at more than 6 months after EOT for all treated genotypes.

References and/or Acknowledgements

1. https://www.epatitec.info/terapie/terapia-ledipasvir-sofosbuvir/efficacia-terapeutica 2. https://www.epatitec.info/terapie/terapia-ombitasvir-paritaprevir-dasabuvir/efficacia-terapeutica 3. https://www.epatitec.info/terapie/terapia-daclatasvir/efficacia-terapeutica 4. https://www.epatitec.info/terapie/terapia-simeprevir/efficacia-terapeutica 5. https://www.epatitec.info/terapie/terapia-sofosbuvir/efficacia-terapeutica R Foundation for Statistical Computing (Version 3.3.3). No conflict of interest",2018-01-01 +33004350,Epigenetic Control of Cdkn2a.Arf Protects Tumor-Infiltrating Lymphocytes from Metabolic Exhaustion.,"T-cell exhaustion in cancer is linked to poor clinical outcomes, where evidence suggests T-cell metabolic changes precede functional exhaustion. Direct competition between tumor-infiltrating lymphocytes (TIL) and cancer cells for metabolic resources often renders T cells dysfunctional. Environmental stress produces epigenome remodeling events within TIL resulting from loss of the histone methyltransferase EZH2. Here, we report an epigenetic mechanism contributing to the development of metabolic exhaustion in TIL. A multiomics approach revealed a Cdkn2a.Arf-mediated, p53-independent mechanism by which EZH2 inhibition leads to mitochondrial dysfunction and the resultant exhaustion. Reprogramming T cells to express a gain-of-function EZH2 mutant resulted in an enhanced ability of T cells to inhibit tumor growth in vitro and in vivo. Our data suggest that manipulation of T-cell EZH2 within the context of cellular therapies may yield lymphocytes that are able to withstand harsh tumor metabolic environments and collateral pharmacologic insults. SIGNIFICANCE: These findings demonstrate that manipulation of T-cell EZH2 in cellular therapies may yield cellular products able to withstand solid tumor metabolic-deficient environments. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/21/4707/F1.large.jpg.",2020-10-01 +26980516,DemaDb: an integrated dematiaceous fungal genomes database. ,"Many species of dematiaceous fungi are associated with allergic reactions and potentially fatal diseases in human, especially in tropical climates. Over the past 10 years, we have isolated more than 400 dematiaceous fungi from various clinical samples. In this study, DemaDb, an integrated database was designed to support the integration and analysis of dematiaceous fungal genomes. A total of 92 072 putative genes and 6527 pathways that identified in eight dematiaceous fungi (Bipolaris papendorfii UM 226, Daldinia eschscholtzii UM 1400, D. eschscholtzii UM 1020, Pyrenochaeta unguis-hominis UM 256, Ochroconis mirabilis UM 578, Cladosporium sphaerospermum UM 843, Herpotrichiellaceae sp. UM 238 and Pleosporales sp. UM 1110) were deposited in DemaDb. DemaDb includes functional annotations for all predicted gene models in all genomes, such as Gene Ontology, EuKaryotic Orthologous Groups, Kyoto Encyclopedia of Genes and Genomes (KEGG), Pfam and InterProScan. All predicted protein models were further functionally annotated to Carbohydrate-Active enzymes, peptidases, secondary metabolites and virulence factors. DemaDb Genome Browser enables users to browse and visualize entire genomes with annotation data including gene prediction, structure, orientation and custom feature tracks. The Pathway Browser based on the KEGG pathway database allows users to look into molecular interaction and reaction networks for all KEGG annotated genes. The availability of downloadable files containing assembly, nucleic acid, as well as protein data allows the direct retrieval for further downstream works. DemaDb is a useful resource for fungal research community especially those involved in genome-scale analysis, functional genomics, genetics and disease studies of dematiaceous fungi. Database URL: http://fungaldb.um.edu.my.",2016-03-15 +26315263,Development of a D genome specific marker resource for diploid and hexaploid wheat.,"

Background

Mapping and map-based cloning of genes that control agriculturally and economically important traits remain great challenges for plants with complex highly repetitive genomes such as those within the grass tribe, Triticeae. Mapping limitations in the Triticeae are primarily due to low frequencies of polymorphic gene markers and poor genetic recombination in certain genetic regions. Although the abundance of repetitive sequence may pose common problems in genome analysis and sequence assembly of large and complex genomes, they provide repeat junction markers with random and unbiased distribution throughout chromosomes. Hence, development of a high-throughput mapping technology that combine both gene-based and repeat junction-based markers is needed to generate maps that have better coverage of the entire genome.

Results

In this study, the available genomics resource of the diploid Aegilop tauschii, the D genome donor of bread wheat, were used to develop genome specific markers that can be applied for mapping in modern hexaploid wheat. A NimbleGen array containing both gene-based and repeat junction probe sequences derived from Ae. tauschii was developed and used to map the Chinese Spring nullisomic-tetrasomic lines and deletion bin lines of the D genome chromosomes. Based on these mapping data, we have now anchored 5,171 repeat junction probes and 10,892 gene probes, corresponding to 5,070 gene markers, to the delineated deletion bins of the D genome. The order of the gene-based markers within the deletion bins of the Chinese Spring can be inferred based on their positions on the Ae. tauschii genetic map. Analysis of the probe sequences against the Chinese Spring chromosome sequence assembly database facilitated mapping of the NimbleGen probes to the sequence contigs and allowed assignment or ordering of these sequence contigs within the deletion bins. The accumulated length of anchored sequence contigs is about 155 Mb, representing ~ 3.2 % of the D genome. A specific database was developed to allow user to search or BLAST against the probe sequence information and to directly download PCR primers for mapping specific genetic loci.

Conclusions

In bread wheat, aneuploid stocks have been extensively used to assign markers linked with genes/traits to chromosomes, chromosome arms, and their specific bins. Through this study, we added thousands of markers to the existing wheat chromosome bin map, representing a significant step forward in providing a resource to navigate the wheat genome. The database website ( http://probes.pw.usda.gov/ATRJM/ ) provides easy access and efficient utilization of the data. The resources developed herein can aid map-based cloning of traits of interest and the sequencing of the D genome of hexaploid wheat.",2015-08-28 +32500372,"Retrospective analysis of HIV-1 drug resistance mutations in Suzhou, China from 2009 to 2014.","In this study, we investigated drug resistance levels in human immunodeficiency virus (HIV)-1-infected patients in Suzhou by retrospectively analyzing this property and the characteristics of circulating HIV-1 strains collected from 2009 to 2014. A total of 261 HIV-1-positive plasma samples, confirmed by the Suzhou CDC, were collected and evaluated to detect HIV-1 drug resistance genotypes using an in-house method. The pol gene fragment was amplified, and its nucleic acid sequence was determined by Sanger sequencing. Drug resistance mutations were then analyzed using the Stanford University HIV resistance database ( https://hivdb.stanford.edu ). A total of 216 pol gene fragments were amplified and sequenced with 16.7% (36/216) of sequences revealing these mutations. The drug resistance rates of protease, nucleoside reverse transcriptase, and non-nucleoside reverse transcriptase inhibitors (NNRTIs) were 4/36 (11.1%), 2/36 (5.6%), and 30/36 (83.3%), respectively. Five surveillance drug resistance mutations were found in 36 sequences, of which, three were found among specimens of men who have sex with men. Potential low-level resistance accounted for 33% of amino acid mutations associated with NNRTIs. Two of the mutations, M230L and L100I, which confer a high level of resistance efavirenz (EFV) and nevirapine (NVP) used as NNRTIs for first-line antiretroviral therapy (ART), were detected in this study. Therefore, when HIV-1 patients in Suzhou are administered fist-line ART, much attention should be paid to the status of these mutations that cause resistance to EVP, EFV, and NVP.",2020-06-04 +32500221,Validation of an online tool for early prediction of the failure-risk in gestational trophoblastic neoplasia patients treated with methotrexate.,"

Purpose

In a low-risk gestational trophoblastic neoplasia (GTN) treated with methotrexate (MTX), the modeled hCG (human chorionic gonadotropin) residual concentration (hCGres), calculated with NONMEM program® (NM) during the first 50 treatment days, is a predictor of MTX-resistance risk. This model was implemented with another algorithm on https://www.biomarker-kinetics.org/hCG . The objective was to confirm the validity of the website estimations with respect to NM.

Methods

The consistencies of modeled hCGres estimated by NM and by the website were assessed in a dataset of 60 fictive patients with simulated hCG profiles, as well as in an independent database of 531 actual patients. Moreover, the hCGres predictive values regarding MTX failure-risk were assessed.

Results

The values of hCGres obtained with both methods were highly consistent in the fictive patient and in the actual patient datasets: median relative prediction errors (RPE) were - 0.059 and 9.9 × 10-7, respectively. The ROC AUCs for predictions of MTX failure-risk were 0.90 (95% CI 0.87,0.93) with both NM and the website. The gradual association between increasing hCGres and the 2-year MTX failure-free survival was confirmed.

Conclusion

There is a high consistency of hCGres estimates obtained with the two methods. The website is meant to help clinicians in the interpretation of hCG decline curves of MTX-treated GTN patients. hCGres is now validated for more than 1690 patients in four independent datasets, and its recognition as an early predictor of MTX resistance for treatment adjustment and for the future studies should be considered.",2020-06-04 +29809048,"""Dynamical correlation: A new method for quantifying synchrony with multivariate intensive longitudinal data"": Correction to Liu et al. (2016).","Reports an error in ""Dynamical correlation: A new method for quantifying synchrony with multivariate intensive longitudinal data"" by Siwei Liu, Yang Zhou, Richard Palumbo and Jane-Ling Wang (Psychological Methods, 2016[Sep], Vol 21[3], 291-308). In the article, there were errors in the R script of Appendix B which could lead to incorrect significance testing results for dynamical correlation. We created an updated R script with corrections. In the updated R script, argument ""na"" from function ""ind_DC"" and argument ""ms"" from function ""boot_test_DC"" were removed. Codes to check if there is any missing value in the data, and to compute proportion of missing values in the data were added. A warning was added when too many missing values are present. In addition, argument 't' is now correctly labeled ""a vector of time points where x,y are observed"". The updated R script with corrections can be downloaded from the first author's personal website: https://siweiliu.weebly.com/publications.html. (The following abstract of the original article appeared in record 2016-07276-001.) In this article, we introduce dynamical correlation, a new method for quantifying synchrony between 2 variables with intensive longitudinal data. Dynamical correlation is a functional data analysis technique developed to measure the similarity of 2 curves. It has advantages over existing methods for studying synchrony, such as multilevel modeling. In particular, it is a nonparametric approach that does not require a prespecified functional form, and it places no assumption on homogeneity of the sample. Dynamical correlation can be easily estimated with irregularly spaced observations and tested to draw population-level inferences. We illustrate this flexible statistical technique with a simulation example and empirical data from an experiment examining interpersonal physiological synchrony between romantic partners. We discuss the advantages and limitations of the method, and how it can be extended and applied in psychological research. We also provide a set of R code for other researchers to estimate and test for dynamical correlation. (PsycINFO Database Record",2018-06-01 +32827670,CWLy-pred: A novel cell wall lytic enzyme identifier based on an improved MRMD feature selection method.,"Cell wall lytic enzymes play key roles in biochemical, morphological, genetic research and industry fields. To save time and labor costs, bioinformatic methods are usually adopted to narrow the scope of in vitro experimentation. In this paper, we established a novel machine learning (support vector machine) based identifier called CWLy-pred to identify cell wall lytic enzymes. An improved MRMD feature selection method is also proposed to select the optimal training set to avoid data redundancy. CWLy-pred obtains an accuracy of 93.067%, a sensitivity of 85.3%, a specificity of 94.8%, an MCC of 0.775 and an AUC of 0.900. It outperforms the state-of-the-art identifier in terms of accuracy, sensitivity, specificity and MCC. Our proposed model is based on a feature set of only 6 dimensions; therefore, it not only can overcome overfitting problems but can also supervise biological experiments effectively. CWLy-pred is embedded in a web application at http://server.malab.cn/CWLy-pred/index.jsp, which is accessible for free.",2020-08-19 +27656741,"Highlights of the National Mental Health Services Survey, 2010","Background: The National Mental Health Services Survey (N-MHSS) is an annual survey conducted by the Substance Abuse and Mental Health Services Administration (SAMHSA) of all known public and private mental health treatment facilities in the United States, the District of Columbia, and other U.S. jurisdictions. Methods: In the 2010 N-MHSS, data were collected from: psychiatric hospitals; nonfederal general hospitals with a separate psychiatric unit; U.S. Department of Veterans Affairs (VA) medical centers; outpatient or day treatment or partial hospitalization mental health facilities; residential treatment centers (RTCs) for children; RTCs for adults; and multisetting (nonhospital) mental health facilities. Excluded were Department of Defense military treatment facilities; facilities administered by the Indian Health Service; tribally-operated facilities; private practitioners and small group practices not licensed as mental health clinics or centers; and jails or prisons. Results: In 2010, 10,374 eligible facilities responded to the N-MHSS and reported a 1-day census of 3,186,636 clients enrolled in mental health treatment on April 30, 2010. 67% of facilities surveyed were operated by private nonprofit entities, about 10 percent were operated by private for-profit entities, and the remainder were operated by state, local, or regional/district governments or authorities or the Department of Veterans Affairs (VA). 61% of facilities were outpatient or day treatment or partial hospitalization facilities, 18% were psychiatric hospitals or psychiatric units within general hospitals, 16% were residential treatment centers, 3 percent were multisetting nonhospital facilities, and slightly more than 2% were facilities operated by the VA. More than 75% accepted Medicaid, client/patient fees, or private insurance. About 61% of facilities offered a sliding-fee scale or treatment at no charge. About 63% and 58% of facilities offered special treatment programs for adults with serious mental illness or individuals with co-occurring mental health and substance use disorders, respectively. Conclusion: These N-MHSS data can be used to conduct comparative analyses and forecast future resource needs and are used to update the information in the mental health component of SAMHSA's online Behavioral Health Treatment Services Locator (http://findtreatment.samhsa.gov/), a searchable database of licensed and accredited public and private mental health treatment facilities.",2016-09-23 +27799277,The Spike-and-Slab Lasso Generalized Linear Models for Prediction and Associated Genes Detection.,"Large-scale ""omics"" data have been increasingly used as an important resource for prognostic prediction of diseases and detection of associated genes. However, there are considerable challenges in analyzing high-dimensional molecular data, including the large number of potential molecular predictors, limited number of samples, and small effect of each predictor. We propose new Bayesian hierarchical generalized linear models, called spike-and-slab lasso GLMs, for prognostic prediction and detection of associated genes using large-scale molecular data. The proposed model employs a spike-and-slab mixture double-exponential prior for coefficients that can induce weak shrinkage on large coefficients, and strong shrinkage on irrelevant coefficients. We have developed a fast and stable algorithm to fit large-scale hierarchal GLMs by incorporating expectation-maximization (EM) steps into the fast cyclic coordinate descent algorithm. The proposed approach integrates nice features of two popular methods, i.e., penalized lasso and Bayesian spike-and-slab variable selection. The performance of the proposed method is assessed via extensive simulation studies. The results show that the proposed approach can provide not only more accurate estimates of the parameters, but also better prediction. We demonstrate the proposed procedure on two cancer data sets: a well-known breast cancer data set consisting of 295 tumors, and expression data of 4919 genes; and the ovarian cancer data set from TCGA with 362 tumors, and expression data of 5336 genes. Our analyses show that the proposed procedure can generate powerful models for predicting outcomes and detecting associated genes. The methods have been implemented in a freely available R package BhGLM (http://www.ssg.uab.edu/bhglm/).",2016-10-31 +32840472,Diagnosis of Invasive Lung Adenocarcinoma Based on Chest CT Radiomic Features of Part-Solid Pulmonary Nodules: A Multicenter Study.,"Background Solid components of part-solid nodules (PSNs) at CT are reflective of invasive adenocarcinoma, but studies describing radiomic features of PSNs and the perinodular region are lacking. Purpose To develop and to validate radiomic signatures diagnosing invasive lung adenocarcinoma in PSNs compared with the Brock, clinical-semantic features, and volumetric models. Materials and Methods This retrospective multicenter study (https://ClinicalTrials.gov, NCT03872362) included 291 patients (median age, 60 years; interquartile range, 55-65 years; 191 women) from January 2013 to October 2017 with 297 PSN lung adenocarcinomas split into training (n = 229) and test (n = 68) data sets. Radiomic features were extracted from the different regions (gross tumor volume [GTV], solid, ground-glass, and perinodular). Random-forest models were trained using clinical-semantic, volumetric, and radiomic features, and an online nodule calculator was used to compute the Brock model. Performances of models were evaluated using standard metrics such as area under the curve (AUC), accuracy, and calibration. The integrated discrimination improvement was applied to assess model performance changes after the addition of perinodular features. Results The radiomics model based on ground-glass and solid features yielded an AUC of 0.98 (95% confidence interval [CI]: 0.96, 1.00) on the test data set, which was significantly higher than the Brock (AUC, 0.83 [95% CI: 0.72, 0.94]; P = .007), clinical-semantic (AUC, 0.90 [95% CI: 0.83, 0.98]; P = .03), volumetric GTV (AUC, 0.87 [95% CI: 0.78, 0.96]; P = .008), and radiomics GTV (AUC, 0.88 [95% CI: 0.80, 0.96]; P = .01) models. It also achieved the best accuracy (93% [95% CI: 84%, 98%]). Both this model and the model with added perinodular features showed good calibration, whereas adding perinodular features did not improve the performance (integrated discrimination improvement, -0.02; P = .56). Conclusion Separating ground-glass and solid CT radiomic features of part-solid nodules was useful in diagnosing the invasiveness of lung adenocarcinoma, yielding a better predictive performance than the Brock, clinical-semantic, volumetric, and radiomics gross tumor volume models. Online supplemental material is available for this article. See also the editorial by Nishino in this issue. Published under a CC BY 4.0 license.",2020-08-25 +30405787,Barrier-to-autointegration factor 1: A novel biomarker for gastric cancer.,"China is a country with a high incidence of gastric cancer (GC), where the GC incidence and the resultant mortality rates account for 50% of those worldwide. Surgical resection remains the primary treatment for GC. However, postoperative patients have a poor prognosis as the majority of patients present with metastases at the time of diagnosis. Therefore, the identification of novel treatment targets is required. The present study aimed to determine the effects of barrier-to-autointegration factor 1 (BANF1) on the clinical features and prognosis of GC, which may aid in discovering a novel tumor diagnostic biomarker and treatment target. The BANF1 gene expression profiles for normal and gastric tumor tissues were downloaded from the Gene Expression Omnibus GSE54129 data set to analyse the expression of BANF1 at the mRNA levels. Then, online survival analysis was performed using the GC database with the Kaplan-Meier Plotter (http://kmplot.com/analysis/) data. To examine the association between BANF1 and clinical features and prognosis, 132 postoperative GC pathological specimens were collected for immunohistochemical analyses. In the GSE54129 data sets, BANF1 expression at the mRNA level was significantly higher in the tumor tissue compared with that in the normal tissue. The same result was obtained in following the immunohistochemical analyses. In addition, BANF1 expression was associated with the patient age, tumor differentiation and infiltration depth. The survival time of BANF1 high-expression patients was shorter compared with that of the low-expression patients, and tumor differentiation status and tumor node metastasis stage were independent prognostic factors of the overall survival of patients with GC. The results of the present study suggest that BANF1 is associated with the clinical features and prognosis of GC. It may be a novel indicator of tumor prognosis and a potential therapeutic target for GC.",2018-09-11 +29401218,MutHTP: mutations in human transmembrane proteins.,"Motivation:Existing sources of experimental mutation data do not consider the structural environment of amino acid substitutions and distinguish between soluble and membrane proteins. They also suffer from a number of further limitations, including data redundancy, lack of disease classification, incompatible information content, and ambiguous annotations (e.g. the same mutation being annotated as disease and benign). Results:We have developed a novel database, MutHTP, which contains information on 183 395 disease-associated and 17 827 neutral mutations in human transmembrane proteins. For each mutation site MutHTP provides a description of its location with respect to the membrane protein topology, structural environment (if available) and functional features. Comprehensive visualization, search, display and download options are available. Availability and implementation:The database is publicly available at http://www.iitm.ac.in/bioinfo/MutHTP/. The website is implemented using HTML, PHP and javascript and supports recent versions of all major browsers, such as Firefox, Chrome and Opera. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +30081065,"Allele and haplotype frequencies of HLA-A, -B, -C, -DRB1, -DQB1 in Northern Ossetians from Vladikavkaz, Russia.","This report shows the HLA-A, -B, -C, -DRB1 and -DQB1 allele and haplotype frequencies in a population of 127 healthy Ossetian donors of blood marrow from Vladikavkaz, Russia. First- and second-field (for HLA-C locus) HLA genotyping was performed by polymerase chain reaction sequence-specific priming and/or oligonucleotide probes. Statistical analysis were performed using gene counting and Arlequin software packages. There was no deviation from Hardy-Weinberg equilibrium for all tested loci. The HLA genotypic and haplotypic data of the Ossetians reported here are available in free access at the Allele Frequencies Net Database (http://www.allelefrequencies.net). This data can serve as a reference database for further HLA-based studies in population genetics.",2018-08-03 +31999330,Tempel: time-series mutation prediction of influenza A viruses via attention-based recurrent neural networks.,"MOTIVATION:Influenza viruses are persistently threatening public health, causing annual epidemics and sporadic pandemics. The evolution of influenza viruses remains to be the main obstacle in the effectiveness of antiviral treatments due to rapid mutations. The goal of this work is to predict whether mutations are likely to occur in the next flu season using historical glycoprotein hemagglutinin sequence data. One of the major challenges is to model the temporality and dimensionality of sequential influenza strains and to interpret the prediction results. RESULTS:In this article, we propose an efficient and robust time-series mutation prediction model (Tempel) for the mutation prediction of influenza A viruses. We first construct the sequential training samples with splittings and embeddings. By employing recurrent neural networks with attention mechanisms, Tempel is capable of considering the historical residue information. Attention mechanisms are being increasingly used to improve the performance of mutation prediction by selectively focusing on the parts of the residues. A framework is established based on Tempel that enables us to predict the mutations at any specific residue site. Experimental results on three influenza datasets show that Tempel can significantly enhance the predictive performance compared with widely used approaches and provide novel insights into the dynamics of viral mutation and evolution. AVAILABILITY AND IMPLEMENTATION:The datasets, source code and supplementary documents are available at: https://drive.google.com/drive/folders/15WULR5__6k47iRotRPl3H7ghi3RpeNXH. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +26193342,The prognostic landscape of genes and infiltrating immune cells across human cancers.,"Molecular profiles of tumors and tumor-associated cells hold great promise as biomarkers of clinical outcomes. However, existing data sets are fragmented and difficult to analyze systematically. Here we present a pan-cancer resource and meta-analysis of expression signatures from ∼18,000 human tumors with overall survival outcomes across 39 malignancies. By using this resource, we identified a forkhead box MI (FOXM1) regulatory network as a major predictor of adverse outcomes, and we found that expression of favorably prognostic genes, including KLRB1 (encoding CD161), largely reflect tumor-associated leukocytes. By applying CIBERSORT, a computational approach for inferring leukocyte representation in bulk tumor transcriptomes, we identified complex associations between 22 distinct leukocyte subsets and cancer survival. For example, tumor-associated neutrophil and plasma cell signatures emerged as significant but opposite predictors of survival for diverse solid tumors, including breast and lung adenocarcinomas. This resource and associated analytical tools (http://precog.stanford.edu) may help delineate prognostic genes and leukocyte subsets within and across cancers, shed light on the impact of tumor heterogeneity on cancer outcomes, and facilitate the discovery of biomarkers and therapeutic targets.",2015-07-20 +33502909,NO2 and PM2.5 Exposures and Lung Function in Swiss Adults: Estimated Effects of Short-Term Exposures and Long-Term Exposures with and without Adjustment for Short-Term Deviations.,"

Background

The impact of nitrogen dioxide (NO2) and particulate matter with an aerodynamic diameter of less than or equal to 2.5. microns (PM2.5) exposures on lung function has been investigated mainly in children and less in adults. Furthermore, it is unclear whether short-term deviations of air pollutant concentration need to be considered in long-term exposure models.

Objectives

The aims of this study were to investigate the association between short-term air pollution exposure and lung function and to assess whether short-term deviations of air pollutant concentration should be integrated into long-term exposure models.

Methods

Short-term (daily averages 0-7 d prior) and long-term (1- and 4-y means) NO2 and PM2.5 concentrations were modeled using satellite, land use, and meteorological data calibrated on ground measurements. Forced expiratory volume within the first second (FEV1) of forced exhalation and forced vital capacity (FVC) were measured during a LuftiBus assessment (2003-2012) and linked to exposure information from the Swiss National Cohort for 36,085 adults (ages 18-95 y). We used multiple linear regression to estimate adjusted associations, and additionally adjusted models of long-term exposures for short-term deviations in air pollutant concentrations.

Results

A 10μg/m3 increase in NO2 and PM2.5 on the day of the pulmonary function test was associated with lower FEV1 and FVC (NO2: FEV1 -8.0 ml [95% confidence interval: -13.4, -2.7], FVC -16.7 ml [-23.4, -10.0]; PM2.5: FEV1 -15.3 ml [-21.9, -8.7], FVC -18.5 ml [-26.5, -10.5]). A 10μg/m3 increase in 1-y mean NO2 was also associated with lower FEV1 (-7.7 ml; -15.9, 0.5) and FVC (-21.6 ml; -31.9, -11.4), as was a 10μg/m3 increase in 1-y mean PM2.5 (FEV1: -42.2 ml; -56.9, -27.5; FVC: -82.0 ml; -100.1, -63.9). These associations were robust to adjustment for short-term deviations in the concentration of each air pollutant.

Conclusions

Short- and long-term air pollution exposures were negatively associated with lung function, in particular long-term PM2.5 exposure with FVC. Our findings contribute substantially to the evidence of adverse associations between air pollution and lung function in adults. https://doi.org/10.1289/EHP7529.",2021-01-27 +33583272,Domain-driven models yield better predictions at lower cost than reservoir computers in Lorenz systems.,"Recent advances in computing algorithms and hardware have rekindled interest in developing high-accuracy, low-cost surrogate models for simulating physical systems. The idea is to replace expensive numerical integration of complex coupled partial differential equations at fine time scales performed on supercomputers, with machine-learned surrogates that efficiently and accurately forecast future system states using data sampled from the underlying system. One particularly popular technique being explored within the weather and climate modelling community is the echo state network (ESN), an attractive alternative to other well-known deep learning architectures. Using the classical Lorenz 63 system, and the three tier multi-scale Lorenz 96 system (Thornes T, Duben P, Palmer T. 2017 Q. J. R. Meteorol. Soc. 143, 897-908. (doi:10.1002/qj.2974)) as benchmarks, we realize that previously studied state-of-the-art ESNs operate in two distinct regimes, corresponding to low and high spectral radius (LSR/HSR) for the sparse, randomly generated, reservoir recurrence matrix. Using knowledge of the mathematical structure of the Lorenz systems along with systematic ablation and hyperparameter sensitivity analyses, we show that state-of-the-art LSR-ESNs reduce to a polynomial regression model which we call Domain-Driven Regularized Regression (D2R2). Interestingly, D2R2 is a generalization of the well-known SINDy algorithm (Brunton SL, Proctor JL, Kutz JN. 2016 Proc. Natl Acad. Sci. USA 113, 3932-3937. (doi:10.1073/pnas.1517384113)). We also show experimentally that LSR-ESNs (Chattopadhyay A, Hassanzadeh P, Subramanian D. 2019 (http://arxiv.org/abs/1906.08829)) outperform HSR ESNs (Pathak J, Hunt B, Girvan M, Lu Z, Ott E. 2018 Phys. Rev. Lett. 120, 024102. (doi:10.1103/PhysRevLett.120.024102)) while D2R2 dominates both approaches. A significant goal in constructing surrogates is to cope with barriers to scaling in weather prediction and simulation of dynamical systems that are imposed by time and energy consumption in supercomputers. Inexact computing has emerged as a novel approach to helping with scaling. In this paper, we evaluate the performance of three models (LSR-ESN, HSR-ESN and D2R2) by varying the precision or word size of the computation as our inexactness-controlling parameter. For precisions of 64, 32 and 16 bits, we show that, surprisingly, the least expensive D2R2 method yields the most robust results and the greatest savings compared to ESNs. Specifically, D2R2 achieves 68 × in computational savings, with an additional 2 × if precision reductions are also employed, outperforming ESN variants by a large margin. This article is part of the theme issue 'Machine learning for weather and climate modelling'.",2021-02-15 +32119070,A path recorder algorithm for Multiple Longest Common Subsequences (MLCS) problems.,"MOTIVATION:Searching the Longest Common Subsequences of many sequences is called a Multiple Longest Common Subsequence (MLCS) problem which is a very fundamental and challenging problem in many fields of data mining. The existing algorithms cannot be applicable to problems with long and large-scale sequences due to their huge time and space consumption. To efficiently handle large-scale MLCS problems, a Path Recorder Directed Acyclic Graph (PRDAG) model and a novel Path Recorder Algorithm (PRA) are proposed. RESULTS:In PRDAG, we transform the MLCS problem into searching the longest path from the Directed Acyclic Graph (DAG), where each longest path in DAG corresponds to an MLCS. To tackle the problem efficiently, we eliminate all redundant and repeated nodes during the construction of DAG, and for each node, we only maintain the longest paths from the source node to it but ignore all non-longest paths. As a result, the size of the DAG becomes very small, and the memory space and search time will be greatly saved. Empirical experiments have been performed on a standard benchmark set of both DNA sequences and protein sequences. The experimental results demonstrate that our model and algorithm outperform the related leading algorithms, especially for large-scale MLCS problems. AVAILABILITY AND IMPLEMENTATION:This program code is written by the first author and can be available at https://www.ncbi.nlm.nih.gov/nuccore and https://blog.csdn.net/wswguilin. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +32103259,Tissue specific regulation of transcription in endometrium and association with disease.,"

Study question

Are genetic effects on endometrial gene expression tissue specific and/or associated with reproductive traits and diseases?

Summary answer

Analyses of RNA-sequence data and individual genotype data from the endometrium identified novel and disease associated, genetic mechanisms regulating gene expression in the endometrium and showed evidence that these mechanisms are shared across biologically similar tissues.

What is known already

The endometrium is a complex tissue vital for female reproduction and is a hypothesized source of cells initiating endometriosis. Understanding genetic regulation specific to, and shared between, tissue types can aid the identification of genes involved in complex genetic diseases.

Study design, size, duration

RNA-sequence and genotype data from 206 individuals was analysed and results were compared with large publicly available datasets.

Participants/materials, setting, methods

RNA-sequencing and genotype data from 206 endometrial samples was used to identify the influence of genetic variants on gene expression, via expression quantitative trait loci (eQTL) analysis and to compare these endometrial eQTLs with those in other tissues. To investigate the association between endometrial gene expression regulation and reproductive traits and diseases, we conducted a tissue enrichment analysis, transcriptome-wide association study (TWAS) and summary data-based Mendelian randomisation (SMR) analyses. Transcriptomic data was used to test differential gene expression between women with and without endometriosis.

Main results and the role of chance

A tissue enrichment analysis with endometriosis genome-wide association study summary statistics showed that genes surrounding endometriosis risk loci were significantly enriched in reproductive tissues. A total of 444 sentinel cis-eQTLs (P < 2.57 × 10-9) and 30 trans-eQTLs (P < 4.65 × 10-13) were detected, including 327 novel cis-eQTLs in endometrium. A large proportion (85%) of endometrial eQTLs are present in other tissues. Genetic effects on endometrial gene expression were highly correlated with the genetic effects on reproductive (e.g. uterus, ovary) and digestive tissues (e.g. salivary gland, stomach), supporting a shared genetic regulation of gene expression in biologically similar tissues. The TWAS analysis indicated that gene expression at 39 loci is associated with endometriosis, including five known endometriosis risk loci. SMR analyses identified potential target genes pleiotropically or causally associated with reproductive traits and diseases including endometriosis. However, without taking account of genetic variants, a direct comparison between women with and without endometriosis showed no significant difference in endometrial gene expression.

Large scale data

The eQTL dataset generated in this study is available at http://reproductivegenomics.com.au/shiny/endo_eqtl_rna/. Additional datasets supporting the conclusions of this article are included within the article and the supplementary information files, or are available on reasonable request.

Limitations, reasons for caution

Data are derived from fresh tissue samples and expression levels are an average of expression from different cell types within the endometrium. Subtle cell-specifc expression changes may not be detected and differences in cell composition between samples and across the menstrual cycle will contribute to sample variability. Power to detect tissue specific eQTLs and differences between women with and without endometriosis was limited by the sample size in this study. The statistical approaches used in this study identify the likely gene targets for specific genetic risk factors, but not the functional mechanism by which changes in gene expression may influence disease risk.

Wider implications of the findings

Our results identify novel genetic variants that regulate gene expression in endometrium and the majority of these are shared across tissues. This allows analysis with large publicly available datasets to identify targets for female reproductive traits and diseases. Much larger studies will be required to identify genetic regulation of gene expression that will be specific to endometrium.

Study funding/competing interest(s)

This work was supported by the National Health and Medical Research Council (NHMRC) under project grants GNT1026033, GNT1049472, GNT1046880, GNT1050208, GNT1105321, GNT1083405 and GNT1107258. G.W.M is supported by a NHMRC Fellowship (GNT1078399). J.Y is supported by an ARC Fellowship (FT180100186). There are no competing interests.",2020-02-01 +31209793,The potential markers of endocrine resistance among HR+ /HER2+ breast cancer patients.,"

Purpose

Breast cancer with positive hormone receptor (HR) and human epidermal growth factor receptor-2 (HER2) is a special subgroup with different clinical features and survival, especially the endocrine therapy resistance. The main purpose of the study is to find the potential markers to predict the survival and endocrine therapy resistance of patients with HR+ /HER2+ breast cancer.

Methods

Surveillance, Epidemiology, and End Results (SEER) database was used to collect patients' clinical information and tumor features including age, tumor size, grade, stage and long-term survival; the BioPortal for Cancer Genomics (https://cbioportal.org) was used to download the gene data for specific patient group; cluster analyses of gene expression were conducted through the DAVID Bioinformatics Resources 6.8 software.

Results

All of the included patients were diagnosed as HR positive breast cancer, but the PR positive rates were more common in HER2- group and also the ER+ /PR+ disease. Patients in HR+ /HER2+ group were more likely to present as stage III-IV and grade III disease. Among HR+ /HER2+ patients, 68.6% received chemotherapy, while only 28.9% in HR+ /HER2- group received chemotherapy (P < 0.0001). The survival of HR+ /HER2+ group was poorer. From TCGA database, series genes which were differed between HR+ /HER2+ and HR+ /HER2- were screened out that related to ERBB2 closely: IKZF3, LASP1, CDK12, MLLT6, and RARA. The first three candidate genes were associated with patients' survival, especially in patients who received hormone therapies.

Conclusion

This study analyzed the clinical characteristics and survival of patients with HR+/HER2+ breast cancer as a special subgroup. ERBB2, IKZF3, LASP1, and CDK12 were the potential markers of the resistance of endocrine therapy, and they will provide new strategies for clinicians.",2019-06-17 +32421310,Updated ATLAS of Biochemistry with New Metabolites and Improved Enzyme Prediction Power.,"The ATLAS of Biochemistry is a repository of both known and novel predicted biochemical reactions between biological compounds listed in the Kyoto Encyclopedia of Genes and Genomes (KEGG). ATLAS was originally compiled based on KEGG 2015, though the number of KEGG reactions has increased by almost 20 percent since then. Here, we present an updated version of ATLAS created from KEGG 2018 using an increased set of generalized reaction rules. Furthermore, we improved the accuracy of the enzymes that are predicted for catalyzing novel reactions. ATLAS now contains ∼150 000 reactions, out of which 96% are novel. In this report, we present detailed statistics on the updated ATLAS and highlight the improvements with regard to the previous version. Most importantly, 107 reactions predicted in the original ATLAS are now known to KEGG, which validates the predictive power of our approach. The updated ATLAS is available at https://lcsb-databases.epfl.ch/atlas.",2020-06-02 +26552604,Micro-proteomics with iterative data analysis: Proteome analysis in C. elegans at the single worm level.,"Proteomics studies typically analyze proteins at a population level, using extracts prepared from tens of thousands to millions of cells. The resulting measurements correspond to average values across the cell population and can mask considerable variation in protein expression and function between individual cells or organisms. Here, we report the development of micro-proteomics for the analysis of Caenorhabditis elegans, a eukaryote composed of 959 somatic cells and ∼1500 germ cells, measuring the worm proteome at a single organism level to a depth of ∼3000 proteins. This includes detection of proteins across a wide dynamic range of expression levels (>6 orders of magnitude), including many chromatin-associated factors involved in chromosome structure and gene regulation. We apply the micro-proteomics workflow to measure the global proteome response to heat-shock in individual nematodes. This shows variation between individual animals in the magnitude of proteome response following heat-shock, including variable induction of heat-shock proteins. The micro-proteomics pipeline thus facilitates the investigation of stochastic variation in protein expression between individuals within an isogenic population of C. elegans. All data described in this study are available online via the Encyclopedia of Proteome Dynamics (http://www.peptracker.com/epd), an open access, searchable database resource.",2016-01-07 +32022843,FTIP: an accurate and efficient method for global protein surface comparison.,"

Motivation

Global protein surface comparison (GPSC) studies have been limited compared to other research works on protein structure alignment/comparison due to lack of real applications associated with GPSC. However, the technology advances in cryo-electron tomography (CET) have made methods to identify proteins from their surface shapes extremely useful.

Results

In this study, we developed a new method called Farthest point sampling (FPS)-enhanced Triangulation-based Iterative-closest-Point (ICP) (FTIP) for GPSC. We applied it to protein classification using only surface shape information. Our method first extracts a set of feature points from protein surfaces using FPS and then uses a triangulation-based efficient ICP algorithm to align the feature points of the two proteins to be compared. Tested on a benchmark dataset with 2329 proteins using nearest-neighbor classification, FTIP outperformed the state-of-the-art method for GPSC based on 3D Zernike descriptors. Using real and simulated cryo-EM data, we show that FTIP could be applied in the future to address problems in protein identification in CET experiments.

Availability and implementation

Programs/scripts we developed/used in the study are available at http://ani.stat.fsu.edu/∼yuan/index.fld/FTIP.tar.bz2.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-05-01 +31777943,ClinVar: improvements to accessing data.,"ClinVar is a freely available, public archive of human genetic variants and interpretations of their relationships to diseases and other conditions, maintained at the National Institutes of Health (NIH). Submitted interpretations of variants are aggregated and made available on the ClinVar website (https://www.ncbi.nlm.nih.gov/clinvar/), and as downloadable files via FTP and through programmatic tools such as NCBI's E-utilities. The default view on the ClinVar website, the Variation page, was recently redesigned. The new layout includes several new sections that make it easier to find submitted data as well as summary data such as all diseases and citations reported for the variant. The new design also better represents more complex data such as haplotypes and genotypes, as well as variants that are in ClinVar as part of a haplotype or genotype but have no interpretation for the single variant. ClinVar's variant-centric XML had its production release in April 2019. The ClinVar website and E-utilities both have been updated to support the VCV (variation in ClinVar) accession numbers found in the variant-centric XML file. ClinVar's search engine has been fine-tuned for improved retrieval of search results.",2020-01-01 +33457124,GAPDH and PUM1: Optimal Housekeeping Genes for Quantitative Polymerase Chain Reaction-Based Analysis of Cancer Stem Cells and Epithelial-Mesenchymal Transition Gene Expression in Rectal Tumors.,"Background The overwhelming majority of published articles have taken colon and rectal cancer as a single group, i.e., colorectal cancer, when normalizing gene expression data with housekeeping genes (HKG) in quantitative polymerase chain reaction (qPCR) experiments though there are published reports that suggest the differential expression pattern of genes between the colon and rectal cancer groups and hence the current experiment was attempted to find out the optimal set of housekeeping genes from the list of common HKG for rectal tumor gene expression analysis. Methods The expression of five potential housekeeping genes GAPDH, RPNI, PUM1, B2M, and PMM1 was analyzed through qPCR and Bestkeeper software (http://www.wzw.tum.de/gene-quantification/bestkeeper.html) in 20 stage II-IV rectal cancer samples to check for uniformity in their expression pattern. Cancer stem cell (CSC) marker ALDH1 and epithelial-mesenchymal transition marker (EMT) markers E cadherin, vimentin, Twist, and SNAI2 expression were evaluated in conjunction with the two optimal reference genes in 10 rectal cancers as part of validation. Results The standard deviation of the cycle threshold value of GAPDH was found the lowest at 0.65 followed by RPN1 at 0.88, PUM1 at 0.94, PMM1 at 0.94, and B2M at 1.21 when analyzed with BestKeeper software. Using GAPDH and PUM1 as the reference gene for the validation phase, rectal cancer patients with stage III/IV showed a 4.79-fold change (P=0.006) in ALDH1 expression, and an 11.76-fold change in Twist expression (P=0.003) with respect to stage II rectal tumor when normalized with GAPDH and PUM1. Conclusion GAPDH and PUM1 can be used as an optimal set of housekeeping genes for gene expression-related experiments in rectal tumors. ALDH1 and Twist were found significantly overexpressed in stage III/IV rectal tumors in comparison to stage II rectal cancer. Genes associated with cancer stem cells and EMT markers could be optimally analyzed by normalizing them with GAPDH and PUM1 as housekeeping genes.",2020-12-10 +31504168,CROSSalive: a web server for predicting the in vivo structure of RNA molecules.,"MOTIVATION:RNA structure is difficult to predict in vivo due to interactions with enzymes and other molecules. Here we introduce CROSSalive, an algorithm to predict the single- and double-stranded regions of RNAs in vivo using predictions of protein interactions. RESULTS:Trained on icSHAPE data in presence (m6a+) and absence of N6 methyladenosine modification (m6a-), CROSSalive achieves cross-validation accuracies between 0.70 and 0.88 in identifying high-confidence single- and double-stranded regions. The algorithm was applied to the long non-coding RNA Xist (17 900 nt, not present in the training) and shows an Area under the ROC curve of 0.83 in predicting structured regions. AVAILABILITY AND IMPLEMENTATION:CROSSalive webserver is freely accessible at http://service.tartaglialab.com/new_submission/crossalive. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-02-01 +32802900,Computational approach towards identification of pathogenic missense mutations in AMELX gene and their possible association with amelogenesis imperfecta.,"Amelogenin gene (AMEL-X) encodes an enamel protein called amelogenin, which plays a vital role in tooth development. Any mutations in this gene or the associated pathway lead to developmental abnormalities of the tooth. The present study aims to analyze functional missense mutations in AMEL-X genes and derive an association with amelogenesis imperfecta. The information on missense mutations of human AMEL-X gene was collected from Ensembl database (https://asia.ensembl.org). Three different computational tools viz., SIFT, PolyPhen and PROVEAN were used to identify the deleterious or pathogenic forms of mutations in the gene studied. I-Mutant Suit was used to identify the stability of the proteins identified as deleterious by the three tools. Further, MutPred analysis revealed the pathogenicity of these mutations. Among 96 missense variants reported in AMEL-X gene, 18 were found to be deleterious using the three prediction tools (SIFT, PolyPhen and PROVEAN). When these variants were subjected to protein stability analysis, about 14 missense variants showed decreased stability whereas the other 8 variants showed increased stability. Further, these variants were analyzed using MutPred which identified 9 variants to be highly pathogenic. ExAC database revealed that all the pathogenic mutations had a minor allele frequency less than 0.01. The in silico analysis revealed highly pathogenic mutations in amelogenin gene which could have a putative association with amelogenesis imperfecta. These mutations should be screened in patients for early diagnosis of susceptibility to AI.",2020-06-01 +32349035,Genomic analysis and comparative multiple sequences of SARS-CoV2.,"

Background

China announced an outbreak of new coronavirus in the city of Wuhan on December 31, 2019; lash to now, the virus transmission has become pandemic worldwide. Severe cases from the Huanan Seafood Wholesale market in Wuhan were confirmed pneumonia with a novel coronavirus (2019-nCoV). Understanding the molecular mechanisms of genome selection and packaging is critical for developing antiviral strategies. Thus, we defined the correlation in 10 severe acute respiratory syndrome coronavirus (SARS-CoV2) sequences from different countries to analyze the genomic patterns of disease origin and evolution aiming for developing new control pandemic processes.

Methods

We apply genomic analysis to observe SARS-CoV2 sequences from GenBank (http://www.ncbi.nim.nih.gov/genebank/): MN 908947 (China, C1), MN985325 (USA: WA, UW), MN996527 (China, C2), MT007544 (Australia: Victoria, A1), MT027064 (USA: CA, UC), MT039890 (South Korea, K1), MT066175 (Taiwan, T1), MT066176 (Taiwan, T2), LC528232 (Japan, J1), and LC528233 (Japan, J2) for genomic sequence alignment analysis. Multiple Sequence Alignment by Clustalw (https://www.genome.jp/tools-bin/clustalw) web service is applied as our alignment tool.

Results

We analyzed 10 sequences from the National Center for Biotechnology Information (NCBI) database by genome alignment and found no difference in amino acid sequences within M and N proteins. There are two amino acid variances in the spike (S) protein region. One mutation found from the South Korea sequence is verified. Two possible ""L"" and ""S"" SNPs found in ORF1ab and ORF8 regions are detected.

Conclusion

We performed genomic analysis and comparative multiple sequences of SARS-CoV2. Studies about the biological symptoms of SARS-CoV2 in clinic animals and humans will manipulate an understanding on the origin of pandemic crisis.",2020-06-01 +32386256,miR-874-3p inhibits cell migration through targeting RGS4 in osteosarcoma.,"

Background

The present study explored the role and mechanism of microRNA-874-3p (miR-874-3p) in the migration of the osteosarcoma cell line, U-2 OS.

Methods

The expression profile of osteosarcoma (OS) microRNA (GSE65071) datasets was downloaded from the Gene Expression Omnibus (GEO) database (https://www.ncbi.nlm.nih.gov/geo) to identify differentially expressed miRNAs in OS and its biological functions. A quantitative reverse transcription-polymerase chain reaction was performed to detect the expression of miR-874-3p and its target gene regulator of G protein 4 (RGS4) in human osteosarcoma cells U-2 OS and normal osteoblast hFOB1.19. Plasmid overexpression miR-874-3p and pcDNA-RGS4 were transfected into U-2 OS using Lipofectamine 2000 (Thermo Fisher, Waltham, MA, USA). Cell migration was measured using Transwell migration assays. Bioinformatic analysis and luciferase reporter assay were conducted to search for the target gene of miR-874-3p.

Results

In total, 167 differentially expressed miRNAs were detected after the analysis of GSE65071; of which 78 were up-regulated genes and 89 were down-regulated. miR-874-3p was down-regulated and selected for further analysis. The expression level of miR-874-3p in U-2 OS cells was significantly decreased compared to the hFOB1.19 cell line (p < 0.05). Overexpression of miR-874-3p significantly inhibited the proliferation and migration of U-2 OS cells and overexpression of RGS4 reversed the inhibitory effect of miR-874-3p on U-2 OS cells. Through luciferase report analyses and bioinformatic analysis, RGS4 may be the candidate target gene of miR-874-3p.

Conclusions

In conclusion, overexpression of miR-874-3p suppressed OS cell proliferation and migration. Thus, miR-874-3p might present a therapeutic agent for the treatment of OS.",2020-06-01 +32540816,[Update on the diagnosis of parasitic and fungal infections].,"The diagnosis of parasitic and fungal infections, historically based on the detection of these pathogens using direct diagnosis (macro/microscopic examination, culture) or serological methods, has considerably evolved in the last decades, especially with the development of molecular approaches and mass spectrometry. These techniques, as well as most analyses of parasitic and fungal serology, are mostly the preserve of Hospital University Centers Parasitology-Mycology laboratories. In 2016, the French association of medical parasitology and mycology teachers and hospital practitioners (Anofel) has provided a Catalogue of rare analyses, regularly updated and freely accessible on the Anofel website (https://anofel.net/). This tool, which hinges on 4 parts (parasitology, parasitic serology, mycology, and fungal serology), aims to provide information on all available analyses, and a list of hospital laboratories able to undertake them. It is complementary to the other reference works that were developed by our association, including the Guide of analyses and methods in parasitology and mycology, published in 2018, and the eANOFEL pictures and videos database, freely accessible online (http://www.eanofel.fr). In this article, we draw-up a state-of-the-art of the most specialized techniques available in the parasitology-mycology laboratories and presented in the Catalogue of rare analyses of the Anofel collegium, and their interest for the diagnosis of these infections.",2020-06-01 +32315327,"Dataflow programming for the analysis of molecular dynamics with AViS, an analysis and visualization software application.","The study of molecular dynamics simulations is largely facilitated by analysis and visualization toolsets. However, these toolsets are often designed for specific use cases and those only, while scripting extensions to such toolsets is often exceedingly complicated. To overcome this problem, we designed a software application called AViS which focuses on the extensibility of analysis. By utilizing the dataflow programming (DFP) paradigm, algorithms can be defined by execution graphs, and arbitrary data can be transferred between nodes using visual connectors. Extension nodes can be implemented in either Python, C++, and Fortran, and combined in the same algorithm. AViS offers a comprehensive collection of nodes for sophisticated visualization state modifications, thus greatly simplifying the rules for writing extensions. Input files can also be read from the server automatically, and data is fetched automatically to improve memory usage. In addition, the visualization system of AViS uses physically-based rendering techniques, improving the 3D perception of molecular structures for interactive visualization. By performing two case studies on complex molecular systems, we show that the DFP workflow offers a much higher level of flexibility and extensibility when compared to legacy workflows. The software source code and binaries for Windows, MacOS, and Linux are freely available at https://avis-md.github.io/.",2020-04-21 +30100904,iBCE-EL: A New Ensemble Learning Framework for Improved Linear B-Cell Epitope Prediction.,"Identification of B-cell epitopes (BCEs) is a fundamental step for epitope-based vaccine development, antibody production, and disease prevention and diagnosis. Due to the avalanche of protein sequence data discovered in postgenomic age, it is essential to develop an automated computational method to enable fast and accurate identification of novel BCEs within vast number of candidate proteins and peptides. Although several computational methods have been developed, their accuracy is unreliable. Thus, developing a reliable model with significant prediction improvements is highly desirable. In this study, we first constructed a non-redundant data set of 5,550 experimentally validated BCEs and 6,893 non-BCEs from the Immune Epitope Database. We then developed a novel ensemble learning framework for improved linear BCE predictor called iBCE-EL, a fusion of two independent predictors, namely, extremely randomized tree (ERT) and gradient boosting (GB) classifiers, which, respectively, uses a combination of physicochemical properties (PCP) and amino acid composition and a combination of dipeptide and PCP as input features. Cross-validation analysis on a benchmarking data set showed that iBCE-EL performed better than individual classifiers (ERT and GB), with a Matthews correlation coefficient (MCC) of 0.454. Furthermore, we evaluated the performance of iBCE-EL on the independent data set. Results show that iBCE-EL significantly outperformed the state-of-the-art method with an MCC of 0.463. To the best of our knowledge, iBCE-EL is the first ensemble method for linear BCEs prediction. iBCE-EL was implemented in a web-based platform, which is available at http://thegleelab.org/iBCE-EL. iBCE-EL contains two prediction modes. The first one identifying peptide sequences as BCEs or non-BCEs, while later one is aimed at providing users with the option of mining potential BCEs from protein sequences.",2018-07-27 +31443960,European Association of Urology Guidelines on Non-muscle-invasive Bladder Cancer (TaT1 and Carcinoma In Situ) - 2019 Update.,"

Context

This overview presents the updated European Association of Urology (EAU) guidelines for non-muscle-invasive bladder cancer (NMIBC), TaT1, and carcinoma in situ (CIS).

Objective

To provide practical recommendations on the clinical management of NMIBC with a focus on clinical presentation and recommendations.

Evidence acquisition

A broad and comprehensive scoping exercise covering all areas of the NMIBC guidelines has been performed annually since the last published version in 2017. Databases covered by the search included Medline, EMBASE, and the Cochrane Libraries. Previous guidelines were updated, and the level of evidence and grade of recommendation were assigned.

Evidence synthesis

Tumours staged as Ta, T1, and/or CIS are grouped under the heading of NMIBC. Diagnosis depends on cystoscopy and histological evaluation of the tissue obtained by transurethral resection (TURB) in papillary tumours or by multiple bladder biopsies in CIS. In papillary lesions, a complete TURB is essential for the patient's prognosis and correct diagnosis. Where the initial resection is incomplete, where there is no muscle in the specimen, or where a T1 tumour is detected, a second TURB should be performed within 2-6 wk. The risks of both recurrence and progression may be estimated for individual patients using the European Organisation for Research and Treatment of Cancer (EORTC) scoring system. Stratification of patients into low-, intermediate-, and high-risk groups is pivotal to the recommendation of adjuvant treatment. In patients with tumours presumed to be at a low risk and in those presumed to be at an intermediate risk with a low previous recurrence rate and an expected EORTC recurrence score of <5, one immediate chemotherapy instillation is recommended. Patients with intermediate-risk tumours should receive 1 yr of full-dose bacillus Calmette-Guérin (BCG) intravesical immunotherapy or instillations of chemotherapy for a maximum of 1 yr. In patients with high-risk tumours, full-dose intravesical BCG for 1-3 yr is indicated. In patients at the highest risk of tumour progression, immediate radical cystectomy should be considered. Cystectomy is recommended in BCG-unresponsive tumours. The extended version of the guidelines is available at the EAU website: https://uroweb.org/guideline/non-muscle-invasive-bladder-cancer/.

Conclusions

These abridged EAU guidelines present updated information on the diagnosis and treatment of NMIBC for incorporation into clinical practice.

Patient summary

The European Association of Urology Non-muscle-invasive Bladder Cancer (NMIBC) Panel has released an updated version of their guidelines, which contains information on classification, risk factors, diagnosis, prognostic factors, and treatment of NMIBC. The recommendations are based on the current literature (until the end of 2018), with emphasis on high-level data from randomised clinical trials and meta-analyses. Stratification of patients into low-, intermediate-, and high-risk groups is essential for deciding appropriate use of adjuvant intravesical chemotherapy or bacillus Calmette-Guérin (BCG) instillations. Surgical removal of the bladder should be considered in case of BCG-unresponsive tumours or in NMIBCs with the highest risk of progression.",2019-08-20 +29733404,SPAR: small RNA-seq portal for analysis of sequencing experiments.,"The introduction of new high-throughput small RNA sequencing protocols that generate large-scale genomics datasets along with increasing evidence of the significant regulatory roles of small non-coding RNAs (sncRNAs) have highlighted the urgent need for tools to analyze and interpret large amounts of small RNA sequencing data. However, it remains challenging to systematically and comprehensively discover and characterize sncRNA genes and specifically-processed sncRNA products from these datasets. To fill this gap, we present Small RNA-seq Portal for Analysis of sequencing expeRiments (SPAR), a user-friendly web server for interactive processing, analysis, annotation and visualization of small RNA sequencing data. SPAR supports sequencing data generated from various experimental protocols, including smRNA-seq, short total RNA sequencing, microRNA-seq, and single-cell small RNA-seq. Additionally, SPAR includes publicly available reference sncRNA datasets from our DASHR database and from ENCODE across 185 human tissues and cell types to produce highly informative small RNA annotations across all major small RNA types and other features such as co-localization with various genomic features, precursor transcript cleavage patterns, and conservation. SPAR allows the user to compare the input experiment against reference ENCODE/DASHR datasets. SPAR currently supports analyses of human (hg19, hg38) and mouse (mm10) sequencing data. SPAR is freely available at https://www.lisanwanglab.org/SPAR.",2018-07-01 +32430931,Development and validation of a Web-based malignancy risk-stratification system of thyroid nodules.,"

Objectives

Previous publications on risk-stratification systems for malignant thyroid nodules were based on conventional ultrasound only. We aimed to develop a practical and simplified prediction model for categorizing the malignancy risk of thyroid nodules based on clinical data, biochemical data, conventional ultrasound and real-time elastography.

Design

Retrospective cohort study.

Patients

A total of 2818 patients (1890 female, mean age, 45.5 ± 13.2 years) with 2850 thyroid nodules were retrospectively evaluated between April 2011 and October 2016. 26.8% nodules were malignant.

Measurements

We used a randomly divided sample of 80% of the nodules to perform a multivariate logistic regression analysis. Cut-points were determined to create a risk-stratification scoring system. Patients were classified as having low, moderate and high probability of malignancy according to their scores. We validated the models to the remaining 20% of the nodules. The area under the curve (AUC) was used to evaluate the discrimination ability of the systems.

Results

Ten variables were selected as predictors of malignancy. The point-based scoring systems with and without elasticity score achieved similar AUCs of 0.916 (95% confidence interval [CI]: 0.885-0.948) and 0.906 (95% CI: 0.872-0.941) when validated. Malignancy risk was segmented from 0% to 100.0% and was positively associated with an increase in risk scores. We then developed a Web-based risk-stratification system of thyroid nodules (http: thynodscore.com).

Conclusion

A simple and reliable Web-based risk-stratification system could be practically used in stratifying the risk of malignancy in thyroid nodules.",2020-06-02 +30793016,Data on the genome analysis of the probiotic strain Bacillus subtilis GM5.,"In the present study, we report data on the draft genome sequence of a lipopeptide producing rhizospheric Bacillus subtilis GM5 isolate. The genome consists of 4,271,280 bp with a GC-pair content of 43.3%. A total of 4518 genes including 75 tRNA genes, 3 operons coding for rRNA genes and 56 pseudogenes were annotated. Gene clusters responsible for the biosynthesis of secondary metabolites were validated. Six of the thirty-three clusters identified in the genome code for antimicrobial non-ribosomal peptides synthesis. The Whole Genome Shotgun project of B. subtilis GM5 has been deposited in the NCBI database under the accession number NZ_NKJH00000000 (https://www.ncbi.nlm.nih.gov/nuccore/NZ_NKJH00000000.1).",2018-12-28 +26631432,Human Disease Insight: An integrated knowledge-based platform for disease-gene-drug information.,"The scope of the Human Disease Insight (HDI) database is not limited to researchers or physicians as it also provides basic information to non-professionals and creates disease awareness, thereby reducing the chances of patient suffering due to ignorance. HDI is a knowledge-based resource providing information on human diseases to both scientists and the general public. Here, our mission is to provide a comprehensive human disease database containing most of the available useful information, with extensive cross-referencing. HDI is a knowledge management system that acts as a central hub to access information about human diseases and associated drugs and genes. In addition, HDI contains well-classified bioinformatics tools with helpful descriptions. These integrated bioinformatics tools enable researchers to annotate disease-specific genes and perform protein analysis, search for biomarkers and identify potential vaccine candidates. Eventually, these tools will facilitate the analysis of disease-associated data. The HDI provides two types of search capabilities and includes provisions for downloading, uploading and searching disease/gene/drug-related information. The logistical design of the HDI allows for regular updating. The database is designed to work best with Mozilla Firefox and Google Chrome and is freely accessible at http://humandiseaseinsight.com.",2015-11-27 +31308250,Integration and Analysis of CPTAC Proteomics Data in the Context of Cancer Genomics in the cBioPortal.,"The Clinical Proteomic Tumor Analysis Consortium (CPTAC) has produced extensive mass spectrometry-based proteomics data for selected breast, colon, and ovarian tumors from The Cancer Genome Atlas (TCGA). We have incorporated the CPTAC proteomics data into the cBioPortal to support easy exploration and integrative analysis of these proteomic datasets in the context of the clinical and genomics data from the same tumors. cBioPortal is an open source platform for exploring, visualizing, and analyzing multidimensional cancer genomics and clinical data. The public instance of the cBioPortal (http://cbioportal.org/) hosts more than 200 cancer genomics studies, including all of the data from TCGA. Its biologist-friendly interface provides many rich analysis features, including a graphical summary of gene-level data across multiple platforms, correlation analysis between genes or other data types, survival analysis, and per-patient data visualization. Here, we present the integration of the CPTAC mass spectrometry-based proteomics data into the cBioPortal, consisting of 77 breast, 95 colorectal, and 174 ovarian tumors that already have been profiled by TCGA for mutations, copy number alterations, gene expression, and DNA methylation. As a result, the CPTAC data can now be easily explored and analyzed in the cBioPortal in the context of clinical and genomics data. By integrating CPTAC data into cBioPortal, limitations of TCGA proteomics array data can be overcome while also providing a user-friendly web interface, a web API, and an R client to query the mass spectrometry data together with genomic, epigenomic, and clinical data.",2019-07-15 +32659717,Differences in DYF387S1 copy number distribution among haplogroups caused by haplogroup-specific ancestral Y-chromosome mutations.,"DYF387S1 is a major Y-chromosome short tandem repeat (Y-STR) used in forensic genetics that is included in the Y-chromosomal haplotype reference database (YHRD, https://yhrd.org) and it is known as a rapidly mutating Y-STR. DYF387S1 is a multi-locus marker and the two paralogs are within a palindromic sequence which is a region prone to structural chromosome mutation. In this study, we investigated DYF387S1 copy number distribution and separately typed the two DYF387S1 paralogs in a Japanese population. We found different DYF387S1 copy numbers among haplogroups indicating that the differences had been caused by haplogroup-specific ancestral Y-chromosomal mutations, such as deletion, duplication and non-allelic gene conversion. In haplogroup C, it is likely that gene conversion between two DYF387S1 paralogs had occurred in the common ancestral Y-chromosome for paragroup C-M130* and duplication of DYF387S1 had occurred in the common ancestral Y-chromosome for haplogroup C-M131. Meanwhile, in haplogroup D, deletion of the upstream DYF387S1 paralog is likely to have occurred in the common ancestral Y-chromosome for paragroup D-M57* and duplication of the remaining DYF387S1 paralog is indicated in the common ancestral Y-chromosome for haplogroup D-M125. In haplogroup O, structural mutations changing the DYF387S1 copy number had probably not occurred in the common ancestral Y-chromosome. We also suggest that deletion of one DYF387S1 paralog occurred in haplogroup N and that deletion of one DYF387S1 paralog or DYF387S1 gene conversion occurred in haplogroup Q. This is the first study that has separately typed the two DYF387S1 paralogs in a large population dataset. As haplogroups C, D, N, O and Q are also observed in other populations, the ancestral mutation events indicated by this study may have affected DYF387S1 polymorphism in other areas of the world.",2020-05-31 +24350770,MOPED enables discoveries through consistently processed proteomics data.,"The Model Organism Protein Expression Database (MOPED, http://moped.proteinspire.org) is an expanding proteomics resource to enable biological and biomedical discoveries. MOPED aggregates simple, standardized and consistently processed summaries of protein expression and metadata from proteomics (mass spectrometry) experiments from human and model organisms (mouse, worm, and yeast). The latest version of MOPED adds new estimates of protein abundance and concentration as well as relative (differential) expression data. MOPED provides a new updated query interface that allows users to explore information by organism, tissue, localization, condition, experiment, or keyword. MOPED supports the Human Proteome Project's efforts to generate chromosome- and diseases-specific proteomes by providing links from proteins to chromosome and disease information as well as many complementary resources. MOPED supports a new omics metadata checklist to harmonize data integration, analysis, and use. MOPED's development is driven by the user community, which spans 90 countries and guides future development that will transform MOPED into a multiomics resource. MOPED encourages users to submit data in a simple format. They can use the metadata checklist to generate a data publication for this submission. As a result, MOPED will provide even greater insights into complex biological processes and systems and enable deeper and more comprehensive biological and biomedical discoveries.",2013-12-18 +26322998,CTDB: An Integrated Chickpea Transcriptome Database for Functional and Applied Genomics.,"Chickpea is an important grain legume used as a rich source of protein in human diet. The narrow genetic diversity and limited availability of genomic resources are the major constraints in implementing breeding strategies and biotechnological interventions for genetic enhancement of chickpea. We developed an integrated Chickpea Transcriptome Database (CTDB), which provides the comprehensive web interface for visualization and easy retrieval of transcriptome data in chickpea. The database features many tools for similarity search, functional annotation (putative function, PFAM domain and gene ontology) search and comparative gene expression analysis. The current release of CTDB (v2.0) hosts transcriptome datasets with high quality functional annotation from cultivated (desi and kabuli types) and wild chickpea. A catalog of transcription factor families and their expression profiles in chickpea are available in the database. The gene expression data have been integrated to study the expression profiles of chickpea transcripts in major tissues/organs and various stages of flower development. The utilities, such as similarity search, ortholog identification and comparative gene expression have also been implemented in the database to facilitate comparative genomic studies among different legumes and Arabidopsis. Furthermore, the CTDB represents a resource for the discovery of functional molecular markers (microsatellites and single nucleotide polymorphisms) between different chickpea types. We anticipate that integrated information content of this database will accelerate the functional and applied genomic research for improvement of chickpea. The CTDB web service is freely available at http://nipgr.res.in/ctdb.html.",2015-08-31 +31641140,Tracking vegetation phenology across diverse biomes using Version 2.0 of the PhenoCam Dataset.,"Monitoring vegetation phenology is critical for quantifying climate change impacts on ecosystems. We present an extensive dataset of 1783 site-years of phenological data derived from PhenoCam network imagery from 393 digital cameras, situated from tropics to tundra across a wide range of plant functional types, biomes, and climates. Most cameras are located in North America. Every half hour, cameras upload images to the PhenoCam server. Images are displayed in near-real time and provisional data products, including timeseries of the Green Chromatic Coordinate (Gcc), are made publicly available through the project web page ( https://phenocam.sr.unh.edu/webcam/gallery/ ). Processing is conducted separately for each plant functional type in the camera field of view. The PhenoCam Dataset v2.0, described here, has been fully processed and curated, including outlier detection and expert inspection, to ensure high quality data. This dataset can be used to validate satellite data products, to evaluate predictions of land surface models, to interpret the seasonality of ecosystem-scale CO2 and H2O flux data, and to study climate change impacts on the terrestrial biosphere.",2019-10-22 +24783344,[Satisfaction with the quality of care in nursing homes--the nurses' perspective].,"

Background

In Germany, the number of residents in Nursing Homes (NH) has increased in recent years, residents become older, increasingly multimorbid and suffer more from dementia. In parallel demands concerning the quality of care in NH have increased. The vivid poltical and public debate about quality of care, however, widely disregards the perception of nurses. The aim of this study is to investigate the nurses' satisfaction with the quality of care in their NH and potential psychological consequences.

Methods

Secondary questionnaire data from 1489 nurses in 88 NH of the German 3Q-study (www.3q-studie.de) were used from the 2011 investigation. Questions regarding satisfaction enquire satsifaction in five nursing work domains. Descriptive analyses as well Chi2-tests were performed.

Results

The majority of nurses were satisfied in the subdomain ""overall quality of care"" and ""physical care"" (80% each). 67% were satisfied with ""the quality of care for residents with dementia"" and 64% with ""end-of-life care"". Only 56% of the nurses were satisfied with ""mental care"". If nurses were unsatisfied with the quality of care, this was mostly perceived as a psychological stressor. Subgroup analysis showed a pattern for four of the five domains: dissatisfied nurses were older, better qualified, worked more than 25 hours per week and worked in larger NH. No such pattern was found for ""quality of care for residents with dementia"".

Conclusions

Nurses' satisfaction with the quality of care has shown to be a relevant work factor and potential stressor deserving more scientific and clinical attention. For NH it could constitute a core indicator for internal quality management as well as for human resource management. Research in work, health and economy in NH should also consider this factor.",2014-03-01 +32207533,Resolving single-cell heterogeneity from hundreds of thousands of cells through sequential hybrid clustering and NMF.,"

Motivation

The rapid proliferation of single-cell RNA-sequencing (scRNA-Seq) technologies has spurred the development of diverse computational approaches to detect transcriptionally coherent populations. While the complexity of the algorithms for detecting heterogeneity has increased, most require significant user-tuning, are heavily reliant on dimension reduction techniques and are not scalable to ultra-large datasets. We previously described a multi-step algorithm, Iterative Clustering and Guide-gene Selection (ICGS), which applies intra-gene correlation and hybrid clustering to uniquely resolve novel transcriptionally coherent cell populations from an intuitive graphical user interface.

Results

We describe a new iteration of ICGS that outperforms state-of-the-art scRNA-Seq detection workflows when applied to well-established benchmarks. This approach combines multiple complementary subtype detection methods (HOPACH, sparse non-negative matrix factorization, cluster 'fitness', support vector machine) to resolve rare and common cell-states, while minimizing differences due to donor or batch effects. Using data from multiple cell atlases, we show that the PageRank algorithm effectively downsamples ultra-large scRNA-Seq datasets, without losing extremely rare or transcriptionally similar yet distinct cell types and while recovering novel transcriptionally distinct cell populations. We believe this new approach holds tremendous promise in reproducibly resolving hidden cell populations in complex datasets.

Availability and implementation

ICGS2 is implemented in Python. The source code and documentation are available at http://altanalyze.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +33177204,Deep Gene Sequence Cluster Analyses of Multi-Virus-Infected Mucosal Tissue Reveal Enhanced Transmission of Acute HIV-1. ,"Exposure of the genital mucosa to a genetically diverse viral swarm from the donor HIV-1 can result in breakthrough and systemic infection by a single transmitted/founder (TF) virus in the recipient. The highly diverse HIV-1 envelope (Env) in this inoculating viral swarm may have a critical role in transmission and subsequent immune response. Thus, chronic (Envchronic) and acute (Envacute) Env chimeric HIV-1 were tested using multivirus competition assays in human mucosal penile and cervical tissues. Viral competition analysis revealed that Envchronic viruses resided and replicated mainly in the tissue, while Envacute viruses penetrated the human tissue and established infection of CD4+ T cells more efficiently. Analysis of the replication fitness, as tested in peripheral blood mononuclear cells (PBMCs), showed similar replication fitness of Envacute and Envchronic viruses, which did not correlate with transmission fitness in penile tissue. Further, we observed that chimeric Env viruses with higher replication in genital mucosal tissue (chronic Env viruses) had higher binding affinity to C-type lectins. Data presented herein suggest that the inoculating HIV-1 may be sequestered in the genital mucosal tissue (represented by chronic Env HIV-1) but that a single HIV-1 clone (e.g., acute Env HIV-1) can escape this trapped replication for systemic infection.IMPORTANCE During heterosexual HIV-1 transmission, a genetic bottleneck occurs in the newly infected individual as the virus passes from the mucosa, leading to systemic infection with a single transmitted HIV-1 clone in the recipient. This bottleneck in the recipient has just been described (K. Klein et al., PLoS Pathog 14:e1006754, https://doi.org/10.1371/journal.ppat.1006754), and the mechanisms involved in this selection process have not been elucidated. However, understanding mucosal restriction is of the utmost importance for understanding dynamics of infections and for designing focused vaccines. Using our human penile and cervical mucosal tissue models for mixed HIV infections, we provide evidence that HIV-1 from acute/early infection, compared to that from chronic infection, can more efficiently traverse the mucosal epithelium and be transmitted to T cells, suggesting higher transmission fitness. This study focused on the role of the HIV-1 envelope in transmission and provides strong evidence that HIV transmission may involve breaking the mucosal lectin trap.",2021-01-13 +28635135,The gene expression landscape of pine seedling tissues.,"Conifers dominate vast regions of the Northern hemisphere. They are the main source of raw materials for timber industry as well as a wide range of biomaterials. Despite their inherent difficulties as experimental models for classical plant biology research, the technological advances in genomics research are enabling fundamental studies on these plants. The use of laser capture microdissection followed by transcriptomic analysis is a powerful tool for unravelling the molecular and functional organization of conifer tissues and specialized cells. In the present work, 14 different tissues from 1-month-old maritime pine (Pinus pinaster) seedlings have been isolated and their transcriptomes analysed. The results increased the sequence information and number of full-length transcripts from a previous reference transcriptome and added 39 841 new transcripts. In total, 2376 transcripts were ubiquitously expressed in all of the examined tissues. These transcripts could be considered the core 'housekeeping genes' in pine. The genes have been clustered in function to their expression profiles. This analysis reduced the number of profiles to 38, most of these defined by their expression in a unique tissue that is much higher than in the other tissues. The expression and localization data are accessible at ConGenIE.org (http://v22.popgenie.org/microdisection/). This study presents an overview of the gene expression distribution in different pine tissues, specifically highlighting the relationships between tissue gene expression and function. This transcriptome atlas is a valuable resource for functional genomics research in conifers.",2017-08-04 +28460065,SBSPKSv2: structure-based sequence analysis of polyketide synthases and non-ribosomal peptide synthetases.,"Genome guided discovery of novel natural products has been a promising approach for identification of new bioactive compounds. SBSPKS web-server has been a valuable resource for analysis of polyketide synthase (PKS) and non-ribosomal peptide synthetase (NRPS) gene clusters. We have developed an updated version - SBSPKSv2 which is based on comprehensive analysis of sequence, structure and secondary metabolite chemical structure data from 311 experimentally characterized PKS/NRPS gene clusters with known biosynthetic products. A completely new feature of SBSPKSv2 is the inclusion of features for search in chemical space. It allows the user to compare the chemical structure of a given secondary metabolite to the chemical structures of biosynthetic intermediates and final products. For identification of catalytic domains, SBSPKS now uses profile based searches, which are computationally faster and have high sensitivity. HMM profiles have also been added for a number of new domains and motif information has been used for distinguishing condensation (C), epimerization (E) and cyclization (Cy) domains of NRPS. In summary, the new and updated SBSPKSv2 is a versatile tool for genome mining and analysis of polyketide and non-ribosomal peptide biosynthetic pathways in chemical space. The server is available at: http://www.nii.ac.in/sbspks2.html.",2017-07-01 +32414754,State-Transition Analysis of Time-Sequential Gene Expression Identifies Critical Points That Predict Development of Acute Myeloid Leukemia.,"Temporal dynamics of gene expression inform cellular and molecular perturbations associated with disease development and evolution. Given the complexity of high-dimensional temporal genomic data, an analytic framework guided by a robust theory is needed to interpret time-sequential changes and to predict system dynamics. Here we model temporal dynamics of the transcriptome of peripheral blood mononuclear cells in a two-dimensional state-space representing states of health and leukemia using time-sequential bulk RNA-seq data from a murine model of acute myeloid leukemia (AML). The state-transition model identified critical points that accurately predict AML development and identifies stepwise transcriptomic perturbations that drive leukemia progression. The geometry of the transcriptome state-space provided a biological interpretation of gene dynamics, aligned gene signals that are not synchronized in time across mice, and allowed quantification of gene and pathway contributions to leukemia development. Our state-transition model synthesizes information from multiple cell types in the peripheral blood and identifies critical points in the transition from health to leukemia to guide interpretation of changes in the transcriptome as a whole to predict disease progression. SIGNIFICANCE: These findings apply the theory of state transitions to model the initiation and development of acute myeloid leukemia, identifying transcriptomic perturbations that accurately predict time to disease development.See related commentary by Kuijjer, p. 3072 GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/15/3157/F1.large.jpg.",2020-05-15 +25361974,ArrayExpress update--simplifying data submissions.,"The ArrayExpress Archive of Functional Genomics Data (http://www.ebi.ac.uk/arrayexpress) is an international functional genomics database at the European Bioinformatics Institute (EMBL-EBI) recommended by most journals as a repository for data supporting peer-reviewed publications. It contains data from over 7000 public sequencing and 42,000 array-based studies comprising over 1.5 million assays in total. The proportion of sequencing-based submissions has grown significantly over the last few years and has doubled in the last 18 months, whilst the rate of microarray submissions is growing slightly. All data in ArrayExpress are available in the MAGE-TAB format, which allows robust linking to data analysis and visualization tools and standardized analysis. The main development over the last two years has been the release of a new data submission tool Annotare, which has reduced the average submission time almost 3-fold. In the near future, Annotare will become the only submission route into ArrayExpress, alongside MAGE-TAB format-based pipelines. ArrayExpress is a stable and highly accessed resource. Our future tasks include automation of data flows and further integration with other EMBL-EBI resources for the representation of multi-omics data.",2014-10-31 +25324312,EpilepsyGene: a genetic resource for genes and mutations related to epilepsy.,"Epilepsy is one of the most prevalent chronic neurological disorders, afflicting about 3.5-6.5 per 1000 children and 10.8 per 1000 elderly people. With intensive effort made during the last two decades, numerous genes and mutations have been published to be associated with the disease. An organized resource integrating and annotating the ever-increasing genetic data will be imperative to acquire a global view of the cutting-edge in epilepsy research. Herein, we developed EpilepsyGene (http://61.152.91.49/EpilepsyGene). It contains cumulative to date 499 genes and 3931 variants associated with 331 clinical phenotypes collected from 818 publications. Furthermore, in-depth data mining was performed to gain insights into the understanding of the data, including functional annotation, gene prioritization, functional analysis of prioritized genes and overlap analysis focusing on the comorbidity. An intuitive web interface to search and browse the diversified genetic data was also developed to facilitate access to the data of interest. In general, EpilepsyGene is designed to be a central genetic database to provide the research community substantial convenience to uncover the genetic basis of epilepsy.",2014-10-16 +27892827,"Rationale, Procedures, and Response Rates for the 2015 Administration of NCI's Health Information National Trends Survey: HINTS-FDA 2015.","The National Cancer Institute (NCI) developed the Health Information National Trends Survey (HINTS) to monitor population trends in cancer communication practices, information preferences, health risk behaviors, attitudes, and cancer knowledge. The U.S. Food and Drug Administration (FDA) recognized HINTS as a unique data resource for informing its health communication endeavors and partnered with NCI to field HINTS-FDA 2015. HINTS-FDA 2015 was a self-administered paper instrument sent by mail May 29 to September 8, 2015, using a random probability-based sample of U.S. postal addresses stratified by county-level smoking rates, with an oversampling of high and medium-high smoking strata to increase the yield of current smokers responding to the survey. The response rate for HINTS-FDA 2015 was 33% (N = 3,738). The yield of current smokers (n = 495) was lower than expected, but the sampling strategy achieved the goal of obtaining more former smokers (n = 1,132). Public-use HINTS-FDA 2015 data and supporting documentation have been available for download and secondary data analyses since June 2016 at http://hints.cancer.gov . NCI and FDA encourage the use of HINTS-FDA for health communication research and practice related to tobacco-related communications, public knowledge, and behaviors as well as beliefs and actions related to medical products and dietary supplements.",2016-11-28 +30398656,"InterPro in 2019: improving coverage, classification and access to protein sequence annotations.","The InterPro database (http://www.ebi.ac.uk/interpro/) classifies protein sequences into families and predicts the presence of functionally important domains and sites. Here, we report recent developments with InterPro (version 70.0) and its associated software, including an 18% growth in the size of the database in terms on new InterPro entries, updates to content, the inclusion of an additional entry type, refined modelling of discontinuous domains, and the development of a new programmatic interface and website. These developments extend and enrich the information provided by InterPro, and provide greater flexibility in terms of data access. We also show that InterPro's sequence coverage has kept pace with the growth of UniProtKB, and discuss how our evaluation of residue coverage may help guide future curation activities.",2019-01-01 +33094610,iUmami-SCM: A Novel Sequence-Based Predictor for Prediction and Analysis of Umami Peptides Using a Scoring Card Method with Propensity Scores of Dipeptides.,"Umami or the taste of monosodium glutamate represents one of the major attractive taste modalities in humans. Therefore, knowledge about biophysical and biochemical properties of the umami taste is important for both scientific research and the food industry. Experimental approaches for predicting umami peptides are labor intensive, time consuming, and expensive. To date, computational models for the prediction and analysis of umami peptides as a function of sequence information have not been developed yet. In this study, we have proposed the first sequence-based predictor named iUmami-SCM using primary sequence information for the identification and characterization of umami peptides. iUmami-SCM utilized a newly developed scoring card method (SCM) in conjunction with the propensity scores of amino acids and dipeptide. Our predictor demonstrated excellent prediction performance ability for predicting umami peptides as well as outperforming other commonly used machine learning classifiers. Particularly, iUmami-SCM afforded the highest accuracy and Matthews correlation coefficient of 0.865 and 0.679, respectively, on an independent data set. Furthermore, the analysis of SCM-derived propensity scores was performed so as to provide a more in-depth understanding and knowledge of biophysical and biochemical properties of umami intensities of peptides. To develop a convenient bioinformatics tool, the best model is deployed as a web server that is made publicly available at http://camt.pythonanywhere.com/iUmami-SCM. The iUmami-SCM, as presented herein, serves as a powerful computational technique for large-scale umami peptide identification as well as facilitating the interpretation of umami peptides.",2020-10-23 +30649171,ANPELA: analysis and performance assessment of the label-free quantification workflow for metaproteomic studies.,"Label-free quantification (LFQ) with a specific and sequentially integrated workflow of acquisition technique, quantification tool and processing method has emerged as the popular technique employed in metaproteomic research to provide a comprehensive landscape of the adaptive response of microbes to external stimuli and their interactions with other organisms or host cells. The performance of a specific LFQ workflow is highly dependent on the studied data. Hence, it is essential to discover the most appropriate one for a specific data set. However, it is challenging to perform such discovery due to the large number of possible workflows and the multifaceted nature of the evaluation criteria. Herein, a web server ANPELA (https://idrblab.org/anpela/) was developed and validated as the first tool enabling performance assessment of whole LFQ workflow (collective assessment by five well-established criteria with distinct underlying theories), and it enabled the identification of the optimal LFQ workflow(s) by a comprehensive performance ranking. ANPELA not only automatically detects the diverse formats of data generated by all quantification tools but also provides the most complete set of processing methods among the available web servers and stand-alone tools. Systematic validation using metaproteomic benchmarks revealed ANPELA's capabilities in 1 discovering well-performing workflow(s), (2) enabling assessment from multiple perspectives and (3) validating LFQ accuracy using spiked proteins. ANPELA has a unique ability to evaluate the performance of whole LFQ workflow and enables the discovery of the optimal LFQs by the comprehensive performance ranking of all 560 workflows. Therefore, it has great potential for applications in metaproteomic and other studies requiring LFQ techniques, as many features are shared among proteomic studies.",2020-03-01 +31830239,Characterizing and automatically detecting smooth pursuit in a large-scale ground-truth data set of dynamic natural scenes.,"Eye movements are fundamental to our visual experience of the real world, and tracking smooth pursuit eye movements play an important role because of the dynamic nature of our environment. Static images, however, do not induce this class of eye movements, and commonly used synthetic moving stimuli lack ecological validity because of their low scene complexity compared to the real world. Traditionally, ground truth data for pursuit analyses with naturalistic stimuli are obtained via laborious hand-labelling. Therefore, previous studies typically remained small in scale. We here present the first large-scale quantitative characterization of human smooth pursuit. In order to achieve this, we first provide a methodological framework for such analyses by collecting a large set of manual annotations for eye movements in dynamic scenes and by examining the bias and variance of human annotators. To enable further research on even larger future data sets, we also describe, improve, and thoroughly analyze a novel algorithm to automatically classify eye movements. Our approach incorporates unsupervised learning techniques and thus demonstrates improved performance with the addition of unlabelled data. The code and data related to our manual and automated eye movement annotation are publicly available via https://web.gin.g-node.org/ioannis.agtzidis/gazecom_annotations/.",2019-12-01 +31985781,Impact of weight on the efficacy and safety of direct-acting oral anticoagulants in patients with non-valvular atrial fibrillation: a meta-analysis.,"

Aims

This study sought to determine the impact of weight and body mass index (BMI) on the safety and efficacy of direct-acting oral anticoagulants (DOACs) compared with warfarin in patients with non-valvular atrial fibrillation.

Methods and results

A systematic literature search was employed in PubMed, Embase, and Cochrane clinical trials with no language or date restrictions. Randomized trials or their substudies were assessed for relevant outcome data for efficacy that included stroke or systemic embolization (SSE), and safety including major bleeding and all-cause mortality. Binary outcome data and odds ratios from the relevant articles were used to calculate the pooled relative risk. For SSE, the data from the four Phase III trials showed that DOACs are better or similarly effective with low BMI 0.73 (0.56-0.97), normal BMI 0.72 (0.58-0.91), overweight 0.87 (0.76-0.99), and obese 0.87 (0.76-1.00). The risk of major bleeding was also better or similar with DOACs in all BMI subgroups with low BMI 0.62 (0.37-1.05), normal BMI 0.72 (0.58-0.90), overweight 0.83 (0.71-0.96), and obese 0.91 (0.81-1.03). There was no impact on mortality in all the subgroups. In a meta-regression analysis, the effect size advantage of DOACs compared with warfarin in terms of safety and efficacy gradually attenuated with increasing weight.

Conclusion

Our findings suggest that a weight-based dosage adjustment may be necessary to achieve optimal benefits of DOACs for thromboembolic prevention in these patients with non-valvular atrial fibrillation. Further dedicated trials are needed to confirm these findings. PROSPERO 2019 CRD42019140693. Available from: https://www.crd.york.ac.uk/prospero/display_record.php? ID=CRD42019140693.",2020-03-01 +30407086,A State-of-the-Science Review of Mercury Biomarkers in Human Populations Worldwide between 2000 and 2018.,"

Background

The Minamata Convention on Mercury provided a mandate for action against global mercury pollution. However, our knowledge of mercury exposures is limited because there are many regions and subpopulations with little or no data.

Objective

We aimed to increase worldwide understanding of human exposures to mercury by collecting, collating, and analyzing mercury concentrations in biomarker samples reported in the published scientific literature.

Method

A systematic search of the peer-reviewed scientific literature was performed using three databases. A priori search strategy, eligibility criteria, and data extraction steps were used to identify relevant studies.

Results

We collected 424,858 mercury biomarker measurements from 335,991 individuals represented in 312 articles from 75 countries. General background populations with insignificant exposures have blood, hair, and urine mercury levels that generally fall under [Formula: see text], [Formula: see text], and [Formula: see text], respectively. We identified four populations of concern: a) Arctic populations who consume fish and marine mammals; b) tropical riverine communities (especially Amazonian) who consume fish and in some cases may be exposed to mining; c) coastal and/or small-island communities who substantially depend on seafood; and d) individuals who either work or reside among artisanal and small-scale gold mining sites.

Conclusions

This review suggests that all populations worldwide are exposed to some amount of mercury and that there is great variability in exposures within and across countries and regions. There remain many geographic regions and subpopulations with limited data, thus hindering evidence-based decision making. This type of information is critical in helping understand exposures, particularly in light of certain stipulations in the Minamata Convention on Mercury. https://doi.org/10.1289/EHP3904.",2018-10-01 +31210272,GrainGenes: centralized small grain resources and digital platform for geneticists and breeders. ,"GrainGenes (https://wheat.pw.usda.gov or https://graingenes.org) is an international centralized repository for curated, peer-reviewed datasets useful to researchers working on wheat, barley, rye and oat. GrainGenes manages genomic, genetic, germplasm and phenotypic datasets through a dynamically generated web interface for facilitated data discovery. Since 1992, GrainGenes has served geneticists and breeders in both the public and private sectors on six continents. Recently, several new datasets were curated into the database along with new tools for analysis. The GrainGenes homepage was enhanced by making it more visually intuitive and by adding links to commonly used pages. Several genome assemblies and genomic tracks are displayed through the genome browsers at GrainGenes, including the Triticum aestivum (bread wheat) cv. 'Chinese Spring' IWGSC RefSeq v1.0 genome assembly, the Aegilops tauschii (D genome progenitor) Aet v4.0 genome assembly, the Triticum turgidum ssp. dicoccoides (wild emmer wheat) cv. 'Zavitan' WEWSeq v.1.0 genome assembly, a T. aestivum (bread wheat) pangenome, the Hordeum vulgare (barley) cv. 'Morex' IBSC genome assembly, the Secale cereale (rye) select 'Lo7' assembly, a partial hexaploid Avena sativa (oat) assembly and the Triticum durum cv. 'Svevo' (durum wheat) RefSeq Release 1.0 assembly. New genetic maps and markers were added and can be displayed through CMAP. Quantitative trait loci, genetic maps and genes from the Wheat Gene Catalogue are indexed and linked through the Wheat Information System (WheatIS) portal. Training videos were created to help users query and reach the data they need. GSP (Genome Specific Primers) and PIECE2 (Plant Intron Exon Comparison and Evolution) tools were implemented and are available to use. As more small grains reference sequences become available, GrainGenes will play an increasingly vital role in helping researchers improve crops.",2019-01-01 +27938331,Robust multi-group gene set analysis with few replicates.,"

Background

Competitive gene set analysis is a standard exploratory tool for gene expression data. Permutation-based competitive gene set analysis methods are preferable to parametric ones because the latter make strong statistical assumptions which are not always met. For permutation-based methods, we permute samples, as opposed to genes, as doing so preserves the inter-gene correlation structure. Unfortunately, up until now, sample permutation-based methods have required a minimum of six replicates per sample group.

Results

We propose a new permutation-based competitive gene set analysis method for multi-group gene expression data with as few as three replicates per group. The method is based on advanced sample permutation technique that utilizes all groups within a data set for pairwise comparisons. We present a comprehensive evaluation of different permutation techniques, using multiple data sets and contrast the performance of our method, mGSZm, with other state of the art methods. We show that mGSZm is robust, and that, despite only using less than six replicates, we are able to consistently identify a high proportion of the top ranked gene sets from the analysis of a substantially larger data set. Further, we highlight other methods where performance is highly variable and appears dependent on the underlying data set being analyzed.

Conclusions

Our results demonstrate that robust gene set analysis of multi-group gene expression data is permissible with as few as three replicates. In doing so, we have extended the applicability of such approaches to resource constrained experiments where additional data generation is prohibitively difficult or expensive. An R package implementing the proposed method and supplementary materials are available from the website http://ekhidna.biocenter.helsinki.fi/downloads/pashupati/mGSZm.html .",2016-12-09 +,"Glyco3D: A Suite of Interlinked Databases of 3D Structures of Complex Carbohydrates, Lectins, Antibodies, and Glycosyltransferases","Glyco3D is a portal for structural glycobiology of several interlinked databases that is covering the three-dimensional features of monosaccharides, disaccharides, oligosaccharides, polysaccharides, glycosyltransferases, lectins, monoclonal antibodies, and glycosaminoglycan-binding proteins. Collection of annotated NMR data of bioactive oligosaccharides is also available. A common nomenclature has been adopted for the structural encoding of the carbohydrates. Each individual database stands by itself as it covers a particular family of either complex carbohydrates or carbohydrate-binding proteins. A unique search engine is available that scans the full content of all the databases for queries related to sequential information of the carbohydrates. The interconnection of these databases provides a unique opportunity to characterize the three-dimensional features that a given oligosaccharide molecule can take in different environments, i.e., vacuum, crystalline state, or interacting with different proteins having different biological function. The databases, which have been manually curated, were developed with nonproprietary software. They are web-based platform and are freely available to the scientific community at http://glyco3d.cermav.cnrs.fr.",2016-09-08 +26590404,FunTree: advances in a resource for exploring and contextualising protein function evolution.,"FunTree is a resource that brings together protein sequence, structure and functional information, including overall chemical reaction and mechanistic data, for structurally defined domain superfamilies. Developed in tandem with the CATH database, the original FunTree contained just 276 superfamilies focused on enzymes. Here, we present an update of FunTree that has expanded to include 2340 superfamilies including both enzymes and proteins with non-enzymatic functions annotated by Gene Ontology (GO) terms. This allows the investigation of how novel functions have evolved within a structurally defined superfamily and provides a means to analyse trends across many superfamilies. This is done not only within the context of a protein's sequence and structure but also the relationships of their functions. New measures of functional similarity have been integrated, including for enzymes comparisons of overall reactions based on overall bond changes, reaction centres (the local environment atoms involved in the reaction) and the sub-structure similarities of the metabolites involved in the reaction and for non-enzymes semantic similarities based on the GO. To identify and highlight changes in function through evolution, ancestral character estimations are made and presented. All this is accessible through a new re-designed web interface that can be found at http://www.funtree.info.",2015-11-20 +31770393,"Parasitic infections and medical expenses according to Health Insurance Review Assessment claims data in South Korea, 2011-2018.","INTRODUCTION:In South Korea, Health Insurance Review and Assessment claims data contain comprehensive information on healthcare services for almost the entire population. The present study used claims data on parasitic diseases from 2011 to 2018, and associated medical expenses to investigate infection trends associated with endemic parasitic diseases in South Korea, including those not monitored by Korea Centers for Disease Control and Prevention. METHODS:Basic data regarding each parasitic disease were curated from the Healthcare Bigdata Hub (http://opendata.hira.or.kr). Ten endemic parasitic diseases, three pandemic protozoan diseases, and three ectoparasitic diseases were evaluated between 2011 and 2018. Data on each parasitic disease included the number of patients of each sex, age range within 5 years, province, and total medical expenses. Heatmap and principal component analysis were performed to visualize the incidence pattern of parasitic diseases by provinces. RESULTS:Clonorchiasis and pinworm infections decreased remarkably from 6,097 and 4,018 infections in 2011 to 3,008 and 1,988 infections in 2018, respectively. Other endemic parasitic diseases mostly declined or remained steady over the 8-year period, except for anisakiasis, which doubled from 409 in 2011 to 818 in 2018. Provinces close to North Korea had a higher frequency of claims for Plasmodium vivax infection. The highest rate of clonorchiasis was in Gyeongsangnam-do, while that of anisakiasis was in southern Korea. Jeju province had the highest number of claims for cysticercosis, anisakiasis, pinworm infection, and soil-transmitted helminth infections. The total medical expense for anisakiasis was 65 million Korean won (57,000 US$) in 2011, rising to 237 million Korean won (206,000 US$) in 2018. The medical expense for trichomoniasis was 6,063 million won and for scabies was 1,669 million won in 2018. Since the claims data include only data reported by healthcare providers, some discrepancies might have occurred. CONCLUSION:Our findings provide the basis for a health policy to reduce further infections and medical expense.",2019-11-26 +32271844,Blind estimation and correction of microarray batch effect.,"Microarray batch effect (BE) has been the primary bottleneck for large-scale integration of data from multiple experiments. Current BE correction methods either need known batch identities (ComBat) or have the potential to overcorrect, by removing true but unknown biological differences (Surrogate Variable Analysis SVA). It is well known that experimental conditions such as array or reagent batches, PCR amplification or ozone levels can affect the measured expression levels; often the direction of perturbation of the measured expression is the same in different datasets. However, there are no BE correction algorithms that attempt to estimate the individual effects of technical differences and use them to correct expression data. In this manuscript, we show that a set of signatures, each of which is a vector the length of the number of probes, calculated on a reference set of microarray samples can predict much of the batch effect in other validation sets. We present a rationale of selecting a reference set of samples designed to estimate technical differences without removing biological differences. Putting both together, we introduce the Batch Effect Signature Correction (BESC) algorithm that uses the BES calculated on the reference set to efficiently predict and remove BE. Using two independent validation sets, we show that BESC is capable of removing batch effect without removing unknown but true biological differences. Much of the variations due to batch effect is shared between different microarray datasets. That shared information can be used to predict signatures (i.e. directions of perturbation) due to batch effect in new datasets. The correction can be precomputed without using the samples to be corrected (blind), done on each sample individually (single sample) and corrects only known technical effects without removing known or unknown biological differences (conservative). Those three characteristics make it ideal for high-throughput correction of samples for a microarray data repository. We also compare the performance of BESC to three other batch correction methods: SVA, Removing Unwanted Variation (RUV) and Hidden Covariates with Prior (HCP). An R Package besc implementing the algorithm is available from http://explainbio.com.",2020-04-09 +25030426,HelicoBase: a Helicobacter genomic resource and analysis platform.,"

Background

Helicobacter is a genus of Gram-negative bacteria, possessing a characteristic helical shape that has been associated with a wide spectrum of human diseases. Although much research has been done on Helicobacter and many genomes have been sequenced, currently there is no specialized Helicobacter genomic resource and analysis platform to facilitate analysis of these genomes. With the increasing number of Helicobacter genomes being sequenced, comparative genomic analysis on members of this species will provide further insights on their taxonomy, phylogeny, pathogenicity and other information that may contribute to better management of diseases caused by Helicobacter pathogens.

Description

To facilitate the ongoing research on Helicobacter, a specialized central repository and analysis platform for the Helicobacter research community is needed to host the fast-growing amount of genomic data and facilitate the analysis of these data, particularly comparative analysis. Here we present HelicoBase, a user-friendly Helicobacter resource platform with diverse functionality for the analysis of Helicobacter genomic data for the Helicobacter research communities. HelicoBase hosts a total of 13 species and 166 genome sequences of Helicobacter spp. Genome annotations such as gene/protein sequences, protein function and sub-cellular localisation are also included. Our web implementation supports diverse query types and seamless searching of annotations using an AJAX-based real-time searching system. JBrowse is also incorporated to allow rapid and seamless browsing of Helicobacter genomes and annotations. Advanced bioinformatics analysis tools consisting of standard BLAST for similarity search, VFDB BLAST for sequence similarity search against the Virulence Factor Database (VFDB), Pairwise Genome Comparison (PGC) tool for comparative genomic analysis, and a newly designed Pathogenomics Profiling Tool (PathoProT) for comparative pathogenomic analysis are also included to facilitate the analysis of Helicobacter genomic data.

Conclusions

HelicoBase offers access to a range of genomic resources as well as tools for the analysis of Helicobacter genome data. HelicoBase can be accessed at http://helicobacter.um.edu.my.",2014-07-16 +33543123,InMeRF: prediction of pathogenicity of missense variants by individual modeling for each amino acid substitution.,"In predicting the pathogenicity of a nonsynonymous single-nucleotide variant (nsSNV), a radical change in amino acid properties is prone to be classified as being pathogenic. However, not all such nsSNVs are associated with human diseases. We generated random forest (RF) models individually for each amino acid substitution to differentiate pathogenic nsSNVs in the Human Gene Mutation Database and common nsSNVs in dbSNP. We named a set of our models 'Individual Meta RF' (InMeRF). Ten-fold cross-validation of InMeRF showed that the areas under the curves (AUCs) of receiver operating characteristic (ROC) and precision-recall curves were on average 0.941 and 0.957, respectively. To compare InMeRF with seven other tools, the eight tools were generated using the same training dataset, and were compared using the same three testing datasets. ROC-AUCs of InMeRF were ranked first in the eight tools. We applied InMeRF to 155 pathogenic and 125 common nsSNVs in seven major genes causing congenital myasthenic syndromes, as well as in VANGL1 causing spina bifida, and found that the sensitivity and specificity of InMeRF were 0.942 and 0.848, respectively. We made the InMeRF web service, and also made genome-wide InMeRF scores available online (https://www.med.nagoya-u.ac.jp/neurogenetics/InMeRF/).",2020-05-26 +,P01.152 Evaluation of Factor V Leiden variant as risk a factor for venous thromboembolism in glioblastoma patients,"Abstract

Introduction

Venous thromboembolic events (VTE) are common complications in patients with glioblastoma (GBM). Factor V Leiden (FVL) polymorphism (rs6025, c.1601 G>A) is a known risk factor for VTE, evaluated in cancer associated thrombosis (CAT) in different tumor types. Little is known about the role of this variant in development of CAT in patients with GBM.

Material and Methods

A cohort of 116 GBM patients (73 males and 43 females) all treated with concomitant temozolomide and radiotherapy, were genotyped for FVL using PCR- pyrosequencing. Of the cohort, 40 patients were diagnosed with and 76 without VTE. Allele frequencies of respective variant were also compared with data from the SweGen Variant Frequency Browser (https://swegen-exac.nbis.se/). Statistical analyses in regard to VTE and its association with FVL and prognostic factors were performed.

Results

The variant A/G of FVL was carried by 17 (15%) of the patients and 99 (85%) were wild type, G/G. Chi2 test including the prognostic factors age, type of surgery, gender and blood group showed that these were evenly distributed between those with G/G and A/G, except for blood group in relation to FVL variant, which showed a borderline significance (p=0.07). Among the patients having blood group 0, 32% (n= 32) were G/G and 59% (n= 10) were A/G. Statistical tests did not reveal any correlation between blood group and FVL variant. In the logistic regression analysis apart from FVL, age, gender and blood group were included. This showed a significant difference for blood group 0 versus non-0 (A, B or AB) for decreased risk of VTE (P=0.014). There was no significant difference between heterozygous FVL (A/G) versus the G/G genotype in VTE risk (p=0.09). There were no differences in survival in relation to VTE or not or variant of FVL. The FVL AG variant is slightly overrepresented among GBM compared to the normal Swedish population, but does not reach statistical significance, OR 1.63 (0.93–2.84).

Conclusions

We examined the influence of FVL variants together with clinical factors in a homogenously treated cohort of GBM patients for the risk to develop a VTE. We confirmed blood group 0 versus non-0 as reducing the VTE risk. For FVL c.1601 G>A variant, the differences between AG vs GG did not reach statistical significance. In our cohort we found a trend towards increased risk of developing GBM for the A/G variant. We plan to further study other factors involved in coagulation for their potential role in GBM patients.",2018-09-01 +32830803,Conservative Intervention Strategies for Adult Cancer-Related Lymphedema: A Systematic Review and Network Meta-Analysis.,"

Problem identification

The comparative effectiveness of available management options for cancer-related secondary lymphedema is unknown.

Literature search

CINAHL®, Embase®, and MEDLINE® were searched for randomized trials comparing conservative treatment strategies.

Data evaluation

A network meta-analysis was conducted for lymphedema volume, along with pairwise meta-analyses for remaining outcomes. Evidence certainty was assessed using the GRADE (Grading of Recommendations, Assessment, Development, and Evaluation) approach.

Synthesis

Overall, 36 studies with a total of 1,651 participants were included. Compared to standard care, conservative treatments did not significantly reduce lymphedema volume. There was low to very low certainty evidence of benefit for several treatments on secondary outcomes.

Implications for practice

There is insufficient evidence to suggest important differences between standard care and conservative treatment strategies for reducing lymphedema volume and improving lymphedema-related symptoms.

Supplemental material can be found at https

//onf.ons.org/supplementary-material-conservative-intervention-strategies-adult-cancer-related-lymphedema.",2020-09-01 +30611878,FisOmics: A portal of fish genomic resources.,"An online portal, accessible at URL: http://mail.nbfgr.res.in/FisOmics/, was developed that features different genomic databases and tools. The portal, named as FisOmics, acts as a platform for sharing fish genomic sequences and related information in addition to facilitating the access of high-performance computational resources for genome and proteome data analyses. It provides the ability for quarrying, analysing and visualizing genomic sequences and related information. The featured databases in FisOmics are in the World Wide Web domain already. The aim to develop portal was to provide a nodal point to access the featured databases and work conveniently. Presently, FisOmics includes databases on barcode sequences, microsatellite markers, mitogenome sequences, hypoxia-responsive genes and karyology of fishes. Besides, it has a link to other molecular resources and reports on the on-going activities and research achievements.",2019-01-03 +33068113,Interpreting k-mer-based signatures for antibiotic resistance prediction. ,"Recent years have witnessed the development of several k-mer-based approaches aiming to predict phenotypic traits of bacteria on the basis of their whole-genome sequences. While often convincing in terms of predictive performance, the underlying models are in general not straightforward to interpret, the interplay between the actual genetic determinant and its translation as k-mers being generally hard to decipher. We propose a simple and computationally efficient strategy allowing one to cope with the high correlation inherent to k-mer-based representations in supervised machine learning models, leading to concise and easily interpretable signatures. We demonstrate the benefit of this approach on the task of predicting the antibiotic resistance profile of a Klebsiella pneumoniae strain from its genome, where our method leads to signatures defined as weighted linear combinations of genetic elements that can easily be identified as genuine antibiotic resistance determinants, with state-of-the-art predictive performance. By enhancing the interpretability of genomic k-mer-based antibiotic resistance prediction models, our approach improves their clinical utility and hence will facilitate their adoption in routine diagnostics by clinicians and microbiologists. While antibiotic resistance was the motivating application, the method is generic and can be transposed to any other bacterial trait. An R package implementing our method is available at https://gitlab.com/biomerieux-data-science/clustlasso.",2020-10-01 +32749457,A comprehensive rat transcriptome built from large scale RNA-seq-based annotation.,"The rat is an important model organism in biomedical research for studying human disease mechanisms and treatments, but its annotated transcriptome is far from complete. We constructed a Rat Transcriptome Re-annotation named RTR using RNA-seq data from 320 samples in 11 different organs generated by the SEQC consortium. Totally, there are 52 807 genes and 114 152 transcripts in RTR. Transcribed regions and exons in RTR account for ∼42% and ∼6.5% of the genome, respectively. Of all 73 074 newly annotated transcripts in RTR, 34 213 were annotated as high confident coding transcripts and 24 728 as high confident long noncoding transcripts. Different tissues rather than different stages have a significant influence on the expression patterns of transcripts. We also found that 11 715 genes and 15 852 transcripts were expressed in all 11 tissues and that 849 house-keeping genes expressed different isoforms among tissues. This comprehensive transcriptome is freely available at http://www.unimd.org/rtr/. Our new rat transcriptome provides essential reference for genetics and gene expression studies in rat disease and toxicity models.",2020-09-01 +31451738,A library of human electrocorticographic data and analyses.,"Electrophysiological data from implanted electrodes in the human brain are rare, and therefore scientific access to such data has remained somewhat exclusive. Here we present a freely available curated library of implanted electrocorticographic data and analyses for 16 behavioural experiments, with 204 individual datasets from 34 patients recorded with the same amplifiers and at the same settings. For each dataset, electrode positions were carefully registered to brain anatomy. A large set of fully annotated analysis scripts with which to interpret these data is embedded in the library alongside them. All data, anatomical locations and analysis files (MATLAB code) are provided in a shared file structure at https://searchworks.stanford.edu/view/zk881ps0522.",2019-08-26 +32449065,Genetic localization of the SPC gene controlling pod coiling direction in Medicago truncatula.,"

Background

Handedness in plants introduced by helical growth of organs is frequently observed, and it has fascinated plant scientists for decades. However, the genetic control of natural handedness has not been revealed. In the model legume Medicago truncatula, pods can be coiled in a clockwise or anti-clockwise manner, providing a model for genetic analysis of plant handedness.

Objective

We aimed to localize the Sense of Pod Coiling (SPC) gene controlling pod coiling direction in M. truncatula.

Methods

Linkage analysis was used with a biparental population for fine mapping of the SPC gene. The genome sequence of M. truncatula Mt4.0 was used for marker identification and physical mapping. Single nucleotide polymorphisms (SNPs) between the parental lines were converted to CAPS (cleaved amplified polymorphic sequences) markers. Genetic map was constructed using the software JoinMap version 3.0. Gene predication and annotation provided by the M. truncatula genome database (http://www.medicagogenome.org) was confirmed with the programs of FGENESH and Pfam 32.0, respectively. Quantitative reverse transcription PCR (qRT-PCR) was used to analyze the relative expression levels of candidate genes.

Results

The genetic analysis indicated that the anti-clockwise coiling is dominant to clockwise and is controlled by the single gene, SPC. The SPC gene was delimited to a 250 kb-region on Chromosome 7. Total of 15 protein-coding genes were identified in the SPC locus through gene annotation and sequence analysis. Of those, two genes, potentially encoding a receptor-like kinase and a vacuolar cation/proton exchanger respectively, were selected as candidates for the SPC gene.

Conclusions

The result presented here lay a foundation for gene cloning of SPC, which will help us to understand the molecular mechanisms underlying helical growth in plant organs.",2020-05-24 +33325755,Metabolomic and Transcriptomic Analysis of MCF-7 Cells Exposed to 23 Chemicals at Human-Relevant Levels: Estimation of Individual Chemical Contribution to Effects.,"

Background

Humans are constantly being exposed to various xenobiotics at relatively low concentrations. To date, limited evidence is available to ascertain whether a complex xenobiotic mixture at human-relevant levels causes any health effect. Moreover, there is no effective method to pinpoint the contribution of each chemical toward such an effect.

Objectives

This study aims to understand the responses of cells to a mixture containing 23 xenobiotics at human-relevant levels and develop a feasible method to decipher the chemical(s) that contribute significantly to the observed effect.

Methods

We characterized the metabolome and transcriptome of breast cancer cells (MCF-7) before and after exposure to the mixture at human-relevant levels; preexposure levels were derived from existing large-scale biomonitoring data. A high-throughput metabolomics-based ""leave-one-out"" method was proposed to understand the relative contribution of each component by comparing the metabolome with and without the particular chemical in the mixture.

Results

The metabolomic analysis suggested that the mixture altered metabolites associated with cell proliferation and oxidative stress. For the transcriptomes, gene ontology terms and pathways including ""cell cycle,"" ""cell proliferation,"" and ""cell division"" were significantly altered after mixture exposure. The mixture altered genes associated with pathways such as ""genotoxicity"" and ""nuclear factor erythroid 2-related factor 2 (Nrf2)."" Through joint pathways analysis, metabolites and genes were observed to be well-aligned in pyrimidine and purine metabolisms. The leave-one-out results showed that many chemicals made their contributions to specific metabolic pathways. The overall metabolome pattern of the absence of 2,4-dihyroxybenzophenone (DHB) or bisphenol A (BPA) showed great resemblance to controls, suggesting their higher relative contribution to the observed effect.

Discussion

The omics results showed that exposure to the mixture at human-relevant levels can induce significant in vitro cellular changes. Also, the leave one out method offers an effective approach for deconvoluting the effects of the mixture. https://doi.org/10.1289/EHP6641.",2020-12-16 +32656192,μBialSim: Constraint-Based Dynamic Simulation of Complex Microbiomes.,"Microbial communities are pervasive in the natural environment, associated with many hosts, and of increasing importance in biotechnological applications. The complexity of these microbial systems makes the underlying mechanisms driving their dynamics difficult to identify. While experimental meta-OMICS techniques are routinely applied to record the inventory and activity of microbiomes over time, it remains difficult to obtain quantitative predictions based on such data. Mechanistic, quantitative mathematical modeling approaches hold the promise to both provide predictive power and shed light on cause-effect relationships driving these dynamic systems. We introduce μbialSim (pronounced ""microbial sim""), a dynamic Flux-Balance-Analysis-based (dFBA) numerical simulator which is able to predict the time course in terms of composition and activity of microbiomes containing 100s of species in batch or chemostat mode. Activity of individual species is simulated by using separate FBA models which have access to a common pool of compounds, allowing for metabolite exchange. A novel augmented forward Euler method ensures numerical accuracy by temporarily reducing the time step size when compound concentrations decrease rapidly due to high compound affinities and/or the presence of many consuming species. We present three exemplary applications of μbialSim: a batch culture of a hydrogenotrophic archaeon, a syntrophic methanogenic biculture, and a 773-species human gut microbiome which exhibits a complex and dynamic pattern of metabolite exchange. Focusing on metabolite exchange as the main interaction type, μbialSim allows for the mechanistic simulation of microbiomes at their natural complexity. Simulated trajectories can be used to contextualize experimental meta-OMICS data and to derive hypotheses on cause-effect relationships driving community dynamics based on scenario simulations. μbialSim is implemented in Matlab and relies on the COBRA Toolbox or CellNetAnalyzer for FBA calculations. The source code is available under the GNU General Public License v3.0 at https://git.ufz.de/UMBSysBio/microbialsim.",2020-06-10 +30053266,TC3A: The Cancer 3' UTR Atlas.,"Widespread alternative polyadenylation (APA) occurs during enhanced cellular proliferation and transformation. Recently, we demonstrated that CFIm25-mediated 3' UTR shortening through APA promotes glioblastoma tumor growth in vitro and in vivo, further underscoring its significance to tumorigenesis. Here, we report The Cancer 3' UTR Atlas (TC3A), a comprehensive resource of APA usage for 10,537 tumors across 32 cancer types. These APA events represent potentially novel prognostic biomarkers and may uncover novel mechanisms for the regulation of cancer driver genes. TC3A is built on top of the now de facto standard cBioPortal. Therefore, the large community of existing cBioPortal users and clinical researchers will find TC3A familiar and immediately usable. TC3A is currently fully functional and freely available at http://tc3a.org.",2018-01-01 +30095168,Home parenteral nutrition for people with inoperable malignant bowel obstruction.,"

Background

People with advanced ovarian or gastrointestinal cancer may develop malignant bowel obstruction (MBO). They are able to tolerate limited, if any, oral or enteral (via a tube directly into the gut) nutrition. Parenteral nutrition (PN) is the provision of macronutrients, micronutrients, electrolytes and fluid infused as an intravenous solution and provides a method for these people to receive nutrients. There are clinical and ethical arguments for and against the administration of PN to people receiving palliative care.

Objectives

To assess the effectiveness of home parenteral nutrition (HPN) in improving survival and quality of life in people with inoperable MBO.

Search methods

We searched the following electronic databases: Cochrane Central Register of Controlled Trials (CENTRAL; 2018, Issue 1), MEDLINE (Ovid), Embase (Ovid), BNI, CINAHL, Web of Science and NHS Economic Evaluation and Health Technology Assessment up to January 2018, ClinicalTrials.gov (http://clinicaltrials.gov/) and in the World Health Organization (WHO) International Clinical Trials Registry Platform (ICTRP) search portal (http://apps.who.int/trialsearch/). In addition, we handsearched included studies and used the 'Similar articles' feature on PubMed for included articles.

Selection criteria

We included any studies with more than five participants investigating HPN in people over 16 years of age with inoperable MBO.

Data collection and analysis

We extracted the data and assessed risk of bias for each study. We entered data into Review Manager 5 and used GRADEpro to assess the quality of the evidence.

Main results

We included 13 studies with a total of 721 participants in the review. The studies were observational, 12 studies had only one relevant treatment arm and no control and for the one study with a control arm, very few details were given. The risk of bias was high and the certainty of evidence was graded as very low for all outcomes. Due to heterogeneity of data, meta-analysis was not performed and therefore the data were synthesised via a narrative summary.The evidence for benefit derived from PN was very low for survival and quality of life. All the studies measured overall survival and 636 (88%) of participants were deceased at the end of the study. However there were varying definitions of overall survival that yielded median survival intervals between 15 to 155 days (range three to 1278 days). Three studies used validated measures of quality of life. The results from assessment of quality of life were equivocal; one study reported improvements up until three months and two studies reported approximately similar numbers of participants with improvements and deterioration. Different quality of life scales were used in each of the studies and quality of life was measured at different time points. Due to the very low certainty of the evidence, we are very uncertain about the adverse events related to PN use. Adverse events were measured by nine studies and data for individual participants could be extracted from eight studies. This revealed that 32 of 260 (12%) patients developed a central venous catheter infection or were hospitalised because of complications related to PN.

Authors' conclusions

We are very uncertain whether HPN improves survival or quality of life in people with MBO as the certainty of evidence was very low for both outcomes. As the evidence base is limited and at high risk of bias, further higher-quality prospective studies are required.",2018-08-10 +33268423,Predicting severe pneumonia in the emergency department: a global study of the Pediatric Emergency Research Networks (PERN)-study protocol.,"

Introduction

Pneumonia is a frequent and costly cause of emergency department (ED) visits and hospitalisations in children. There are no evidence-based, validated tools to assist physicians in management and disposition decisions for children presenting to the ED with community-acquired pneumonia (CAP). The objective of this study is to develop a clinical prediction model to accurately stratify children with CAP who are at risk for low, moderate and severe disease across a global network of EDs.

Methods and analysis

This study is a prospective cohort study enrolling up to 4700 children with CAP at EDs at ~80 member sites of the Pediatric Emergency Research Networks (PERN; https://pern-global.com/). We will include children aged 3 months to <14 years with a clinical diagnosis of CAP. We will exclude children with hospital admissions within 7 days prior to the study visit, hospital-acquired pneumonias or chronic complex conditions. Clinical, laboratory and imaging data from the ED visit and hospitalisations within 7 days will be collected. A follow-up telephone or text survey will be completed 7-14 days after the visit. The primary outcome is a three-tier composite of disease severity. Ordinal logistic regression, assuming a partial proportional odds specification, and recursive partitioning will be used to develop the risk stratification models.

Ethics and dissemination

This study will result in a clinical prediction model to accurately identify risk of severe disease on presentation to the ED. Ethics approval was obtained for all sites included in the study. Cincinnati Children's Hospital Institutional Review Board (IRB) serves as the central IRB for most US sites. Informed consent will be obtained from all participants. Results will be disseminated through international conferences and peer-reviewed publications. This study overcomes limitations of prior pneumonia severity scores by allowing for broad generalisability of findings, which can be actively implemented after model development and validation.",2020-12-02 +,Best Paper Selection,"Chen J, Podchiyska T, Altman R. OrderRex: clinical order decision support and outcome predictions by data-mining electronic medical records. J Am Med Inform Assoc 2016;23:339-48 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5009921/ Miotto R, Li L, Kidd BA, Dudley JT. Deep Patient: An Unsupervised Representation to Predict the Future of Patients from the Electronic Health Records. Sci Rep 2016;6:26094 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4869115/ Prasser F, Kohlmayer F, Kuhn KA. The Importance of Context: Risk-based De-identification of Biomedical Data. Methods Inf Med 2016;55:347-55 +https://methods.schattauer.de/en/contents/archivestandard/issue/2382/manuscript/25994.ht Saez C, Zurriaga O, Perez-Panades J, Melchor I, Robles M, Garcia-Gomez JM. Applying probabilistic temporal and multisite data quality control methods to a public health mortality registry in Spain: a systematic approach to quality control of repositories. J Am Med Inform Assoc 2016;23:1085-95 +https://academic.oup.com/jamia/article-lookup/doi/10.1093/jamia/ocw010",2017-08-01 +32912053,Proliferative and Nonproliferative Lesions of the Rat and Mouse Central and Peripheral Nervous Systems: New and Revised INHAND Terms.,"Harmonization of diagnostic terminology used during the histopathologic analysis of rodent tissue sections from nonclinical toxicity studies will improve the consistency of data sets produced by laboratories located around the world. The INHAND Project (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) is a cooperative enterprise of 4 major societies of toxicologic pathology to develop a globally accepted standard vocabulary for proliferative and nonproliferative lesions in rodents. A prior manuscript (Toxicol Pathol 2012;40[4 Suppl]:87S-157S) defined multiple diagnostic terms for toxicant-induced lesions, common spontaneous and age-related changes, and principal confounding artifacts in the rat and mouse central nervous system (CNS) and peripheral nervous system (PNS). The current article defines 9 new diagnostic terms and updates 2 previous terms for findings in the rodent CNS and PNS, the need for which has become evident in the years since the publication of the initial INHAND nomenclature for findings in rodent neural tissues. The nomenclature presented in this document is also available electronically on the Internet at the goRENI website (http://www.goreni.org/).",2020-09-10 +29985979,RNAvista: a webserver to assess RNA secondary structures with non-canonical base pairs.,"

Motivation

In the study of 3D RNA structure, information about non-canonical interactions between nucleobases is increasingly important. Specialized databases support investigation of this issue based on experimental data, and several programs can annotate non-canonical base pairs in the RNA 3D structure. However, predicting the extended RNA secondary structure which describes both canonical and non-canonical interactions remains difficult.

Results

Here, we present RNAvista that allows predicting an extended RNA secondary structure from sequence or from the list enumerating canonical base pairs only. RNAvista is implemented as a publicly available webserver with user-friendly interface. It runs on all major web browsers.

Availability and implementation

http://rnavista.cs.put.poznan.pl.",2019-01-01 +29982280,PepBDB: a comprehensive structural database of biological peptide-protein interactions.,"

Summary

A structural database of peptide-protein interactions is important for drug discovery targeting peptide-mediated interactions. Although some peptide databases, especially for special types of peptides, have been developed, a comprehensive database of cleaned peptide-protein complex structures is still not available. Such cleaned structures are valuable for docking and scoring studies in structure-based drug design. Here, we have developed PepBDB-a curated Peptide Binding DataBase of biological complex structures from the Protein Data Bank (PDB). PepBDB presents not only cleaned structures but also extensive information about biological peptide-protein interactions, and allows users to search the database with a variety of options and interactively visualize the search results.

Availability and implementation

PepBDB is available at http://huanglab.phys.hust.edu.cn/pepbdb/.",2019-01-01 +30714210,Functional Evolution of Proteins.,"The functional evolution of proteins advances through gene duplication followed by functional drift, whereas molecular evolution occurs through random mutational events. Over time, protein active-site structures or functional epitopes remain highly conserved, which enables relationships to be inferred between distant orthologs or paralogs. In this study, we present the first functional clustering and evolutionary analysis of the RCSB Protein Data Bank (RCSB PDB) based on similarities between active-site structures. All of the ligand-bound proteins within the RCSB PDB were scored using our Comparison of Protein Active-site Structures (CPASS) software and database (http://cpass.unl.edu/). Principal component analysis was then used to identify 4431 representative structures to construct a phylogenetic tree based on the CPASS comparative scores (http://itol.embl.de/shared/jcatazaro). The resulting phylogenetic tree identified a sequential, step-wise evolution of protein active-sites and provides novel insights into the emergence of protein function or changes in substrate specificity based on subtle changes in geometry and amino acid composition.",2019-02-19 +31790141,CATHER: a novel threading algorithm with predicted contacts.,"

Motivation

Threading is one of the most effective methods for protein structure prediction. In recent years, the increasing accuracy in protein contact map prediction opens a new avenue to improve the performance of threading algorithms. Several preliminary studies suggest that with predicted contacts, the performance of threading algorithms can be improved greatly. There is still much room to explore to make better use of predicted contacts.

Results

We have developed a new contact-assisted threading algorithm named CATHER using both conventional sequential profiles and contact map predicted by a deep learning-based algorithm. Benchmark tests on an independent test set and the CASP12 targets demonstrated that CATHER made significant improvement over other methods which only use either sequential profile or predicted contact map. Our method was ranked at the Top 10 among all 39 participated server groups on the 32 free modeling targets in the blind tests of the CASP13 experiment. These data suggest that it is promising to push forward the threading algorithms by using predicted contacts.

Availability and implementation

http://yanglab.nankai.edu.cn/CATHER/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +29892645,Bacillus subtilis promoter sequences data set for promoter prediction in Gram-positive bacteria.,"This paper presents a prediction of Bacillus subtilis promoters using a Support Vector Machine system. In the literature, there is a lack of information on Gram-positive bacterial promoter sequences compared to Gram-negative bacteria. Promoter sequence identification is essential for studying gene expression. Initially, we collected the B. subtilis genome sequence from the NCBI database, and promoters were identified by their sigma factors in the DBTBS database. We then grouped the promoters according to 15 factors in 2 domains, corresponding to sigma 54 and sigma 70 of Gram-negative bacteria. Based on these data we developed a script in Python to search for promoters in the B. subtilis genome. After processing the data, we obtained 767 promoter sequences for B. subtilis, most of which were recognized by sigma SigA. To validate the data we found, we developed a software package called BacSVM+, which receives promoters as input and returns the best combination of parameters in a LibSVM library to predict promoter regions in the bacteria used in the simulation. All data gathered as well as the BacSVM+ software is available for download at http://bacpp.bioinfoucs.com/rafael/Sigmas.zip.",2018-05-13 +32043035,A Functional Landscape of CKD Entities From Public Transcriptomic Data.,"

Introduction

To develop effective therapies and identify novel early biomarkers for chronic kidney disease, an understanding of the molecular mechanisms orchestrating it is essential. We here set out to understand how differences in chronic kidney disease (CKD) origin are reflected in gene expression. To this end, we integrated publicly available human glomerular microarray gene expression data for 9 kidney disease entities that account for most of CKD worldwide. Our primary goal was to demonstrate the possibilities and potential on data analysis and integration to the nephrology community.

Methods

We integrated data from 5 publicly available studies and compared glomerular gene expression profiles of disease with that of controls from nontumor parts of kidney cancer nephrectomy tissues. A major challenge was the integration of the data from different sources, platforms, and conditions that we mitigated with a bespoke stringent procedure.

Results

We performed a global transcriptome-based delineation of different kidney disease entities, obtaining a transcriptomic diffusion map of their similarities and differences based on the genes that acquire a consistent differential expression between each kidney disease entity and nephrectomy tissue. We derived functional insights by inferring the activity of signaling pathways and transcription factors from the collected gene expression data and identified potential drug candidates based on expression signature matching. We validated representative findings by immunostaining in human kidney biopsies indicating, for example, that the transcription factor FOXM1 is significantly and specifically expressed in parietal epithelial cells in rapidly progressive glomerulonephritis (RPGN) whereas not expressed in control kidney tissue. Furthermore, we found drug candidates by matching the signature on expression of drugs to that of the CKD entities, in particular, the Food and Drug Administration-approved drug nilotinib.

Conclusion

These results provide a foundation to comprehend the specific molecular mechanisms underlying different kidney disease entities that can pave the way to identify biomarkers and potential therapeutic targets. To facilitate further use, we provide our results as a free interactive Web application: https://saezlab.shinyapps.io/ckd_landscape/. However, because of the limitations of the data and the difficulties in its integration, any specific result should be considered with caution. Indeed, we consider this study rather an illustration of the value of functional genomics and integration of existing data.",2019-11-13 +30677302,FungiPAD: A Free Web Tool for Compound Property Evaluation and Fungicide-Likeness Analysis.,"The increasing prevalence of fungal diseases, continual development of resistance, and stringent environmental regulations have revealed an urgent need to develop more selective, safer, resistance-breaking, and cost-effective fungicides. However, most new fungicidal lead compounds fail in their late stages of development as a result of poor solubility or permeability, meaning that they have suboptimal physicochemical properties. Hence, the exploration of advanced technologies for compound ""fungicide-likeness"" assessment might overcome these obstacles and bring more chemical entities to market. FungiPAD ( http://chemyang.ccnu.edu.cn/ccb/database/FungiPAD/ ) is a free platform employed to predict physicochemical properties, bioavailability, and fungicide-likeness swiftly and powerfully using comprehensive approaches, such as physicochemical radars and qualitative and quantitative analyses. This platform contains data for over 16 000 physicochemical descriptors and the results of 2200 qualitative and 1100 quantitative analyses of marketed fungicides and provides comprehensive fungicide-likeness analysis for different compounds. The user-friendly interface facilitates interpretation and manipulation by non-computational scientists in support of fungicide discovery.",2019-02-07 +31554814,Temporary dense seismic network during the 2016 Central Italy seismic emergency for microzonation studies.,"In August 2016, a magnitude 6.0 earthquake struck Central Italy, starting a devastating seismic sequence, aggravated by other two events of magnitude 5.9 and 6.5, respectively. After the first mainshock, four Italian institutions installed a dense temporary network of 50 seismic stations in an area of 260 km2. The network was registered in the International Federation of Digital Seismograph Networks with the code 3A and quoted with a Digital Object Identifier ( https://doi.org/10.13127/SD/ku7Xm12Yy9 ). Raw data were converted into the standard binary miniSEED format, and organized in a structured archive. Then, data quality and completeness were checked, and all the relevant information was used for creating the metadata volumes. Finally, the 99 Gb of continuous seismic data and metadata were uploaded into the INGV node of the European Integrated Data Archive repository. Their use was regulated by a Memorandum of Understanding between the institutions. After an embargo period, the data are now available for many different seismological studies.",2019-09-25 +30967897,croFGD: Catharanthus roseus Functional Genomics Database.,"Catharanthus roseus is a medicinal plant, which can produce monoterpene indole alkaloid (MIA) metabolites with biological activity and is rich in vinblastine and vincristine. With release of the scaffolded genome sequence of C. roseus, it is necessary to annotate gene functions on the whole-genome level. Recently, 53 RNA-seq datasets are available in public with different tissues (flower, root, leaf, seedling, and shoot) and different treatments (MeJA, PnWB infection and yeast elicitor). We used in-house data process pipeline with the combination of PCC and MR algorithms to construct a co-expression network exploring multi-dimensional gene expression (global, tissue preferential, and treat response) through multi-layered approaches. In the meanwhile, we added miRNA-target pairs, predicted PPI pairs into the network and provided several tools such as gene set enrichment analysis, functional module enrichment analysis, and motif analysis for functional prediction of the co-expression genes. Finally, we have constructed an online croFGD database (http://bioinformatics.cau.edu.cn/croFGD/). We hope croFGD can help the communities to study the C. roseus functional genomics and make novel discoveries about key genes involved in some important biological processes.",2019-03-22 +32401021,GalaxySagittarius: Structure- and Similarity-Based Prediction of Protein Targets for Druglike Compounds.,"Computational techniques for predicting interactions of proteins and druglike molecules have often been used to search for compounds that bind a given protein with high affinity. More recently, such tools have also been applied to the reverse procedure of searching protein targets for a given compound. Among methods for predicting protein-ligand interactions, ligand-based methods relying on similarity to ligands of known interactions are effective only when similar protein-ligand interactions are known. Receptor-based methods predicting protein-ligand interactions by molecular docking are effective only when high-accuracy receptor structures and binding sites are available. Moreover, the computational cost of molecular docking tends to be too high to be applied to the entire protein structure database. In this paper, an effective target prediction method, which combines ligand similarity-based and receptor structure-based approaches, is introduced. In this method, protein-ligand docking is performed after efficient structure- and similarity-based screening. The enriched protein target database by predicted binding ligands and sites allows detection of protein targets with previously unknown ligand interactions. The method, called GalaxySagittarius, is freely available as a web server at http://galaxy.seoklab.org/sagittarius.",2020-05-22 +27127885,GCGene: a gene resource for gastric cancer with literature evidence.,"Gastric cancer (GC) is the fifth most common cancer and third leading cause of cancer-related deaths worldwide. Its lethality primarily stems from a lack of detection strategies for early stages of GC and a lack of noninvasive detection strategies for advanced stages. The development of early diagnostic biomarkers largely depends on understanding the biological pathways and regulatory mechanisms associated with putative GC genes. Unfortunately, the GC-implicated genes that have been identified thus far are scattered among thousands of published studies, and no systematic summary is available, which hinders the development of a large-scale genetic screen. To provide a publically accessible resource tool to meet this need, we constructed a literature-based database GCGene (Gastric Cancer Gene database) with comprehensive annotations supported by a user-friendly website. In the current release, we have collected 1,815 unique human genes including 1,678 protein-coding and 137 non-coding genes curated from extensive examination of 3,142 PubMed abstracts. The resulting database has a convenient web-based interface to facilitate both textual and sequence-based searches. All curated genes in GCGene are downloadable for advanced bioinformatics data mining. Gene prioritization was performed to rank the relative relevance of these genes in GC development. The 100 top-ranked genes are highly mutated according to the cohort of published studies we reviewed. By conducting a network analysis of these top-ranked GC-associated genes in the human interactome, we were able to identify strong links between 8 highly connected genes with low expression and patient survival time. GCGene is freely available to academic users at http://gcgene.bioinfo-minzhao.org/.",2016-06-01 +32150354,Visualizing Human Protein-Protein Interactions and Subcellular Localizations on Cell Images Through CellMap.,"Visualizing protein data remains a challenging and stimulating task. Useful and intuitive visualization tools may help advance biomolecular and medical research; unintuitive tools may bar important breakthroughs. This protocol describes two use cases for the CellMap (http://cellmap.protein.properties) web tool. The tool allows researchers to visualize human protein-protein interaction data constrained by protein subcellular localizations. In the simplest form, proteins are visualized on cell images that also show protein-protein interactions (PPIs) through lines (edges) connecting the proteins across the compartments. At a glance, this simultaneously highlights spatial constraints that proteins are subject to in their physical environment and visualizes PPIs against these localizations. Visualizing two realities helps in decluttering the protein interaction visualization from ""hairball"" phenomena that arise when single proteins or groups thereof interact with hundreds of partners. © 2019 The Authors. Basic Protocol 1: Visualizing proteins and their interactions on cell images Basic Protocol 2: Displaying all interaction partners for a protein.",2020-03-01 +30993345,WHISTLE: a high-accuracy map of the human N6-methyladenosine (m6A) epitranscriptome predicted using a machine learning approach.,"N 6-methyladenosine (m6A) is the most prevalent post-transcriptional modification in eukaryotes, and plays a pivotal role in various biological processes, such as splicing, RNA degradation and RNA-protein interaction. We report here a prediction framework WHISTLE for transcriptome-wide m6A RNA-methylation site prediction. When tested on six independent datasets, our approach, which integrated 35 additional genomic features besides the conventional sequence features, achieved a major improvement in the accuracy of m6A site prediction (average AUC: 0.948 and 0.880 under the full transcript or mature messenger RNA models, respectively) compared to the state-of-the-art computational approaches MethyRNA (AUC: 0.790 and 0.732) and SRAMP (AUC: 0.761 and 0.706). It also out-performed the existing epitranscriptome databases MeT-DB (AUC: 0.798 and 0.744) and RMBase (AUC: 0.786 and 0.736), which were built upon hundreds of epitranscriptome high-throughput sequencing samples. To probe the putative biological processes impacted by changes in an individual m6A site, a network-based approach was implemented according to the 'guilt-by-association' principle by integrating RNA methylation profiles, gene expression profiles and protein-protein interaction data. Finally, the WHISTLE web server was built to facilitate the query of our high-accuracy map of the human m6A epitranscriptome, and the server is freely available at: www.xjtlu.edu.cn/biologicalsciences/whistle and http://whistle-epitranscriptome.com.",2019-04-01 +30215668,bcGST-an interactive bias-correction method to identify over-represented gene-sets in boutique arrays.,"

Motivation

Gene annotation and pathway databases such as Gene Ontology and Kyoto Encyclopaedia of Genes and Genomes are important tools in Gene-Set Test (GST) that describe gene biological functions and associated pathways. GST aims to establish an association relationship between a gene-set of interest and an annotation. Importantly, GST tests for over-representation of genes in an annotation term. One implicit assumption of GST is that the gene expression platform captures the complete or a very large proportion of the genome. However, this assumption is neither satisfied for the increasingly popular boutique array nor the custom designed gene expression profiling platform. Specifically, conventional GST is no longer appropriate due to the gene-set selection bias induced during the construction of these platforms.

Results

We propose bcGST, a bias-corrected GST by introducing bias-correction terms in the contingency table needed for calculating the Fisher's Exact Test. The adjustment method works by estimating the proportion of genes captured on the array with respect to the genome in order to assist filtration of annotation terms that would otherwise be falsely included or excluded. We illustrate the practicality of bcGST and its stability through multiple differential gene expression analyses in melanoma and the Cancer Genome Atlas cancer studies.

Availability and implementation

The bcGST method is made available as a Shiny web application at http://shiny.maths.usyd.edu.au/bcGST/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +30097532,Glycomics@ExPASy: Bridging the Gap.,"Glycomics@ExPASy (https://www.expasy.org/glycomics) is the glycomics tab of ExPASy, the server of SIB Swiss Institute of Bioinformatics. It was created in 2016 to centralize web-based glycoinformatics resources developed within an international network of glycoscientists. The hosted collection currently includes mainly databases and tools created and maintained at SIB but also links to a range of reference resources popular in the glycomics community. The philosophy of our toolbox is that it should be {glycoscientist AND protein scientist}-friendly with the aim of (1) popularizing the use of bioinformatics in glycobiology and (2) emphasizing the relationship between glycobiology and protein-oriented bioinformatics resources. The scarcity of data bridging these two disciplines led us to design tools as interactive as possible based on database connectivity to facilitate data exploration and support hypothesis building. Glycomics@ExPASy was designed, and is developed, with a long-term vision in close collaboration with glycoscientists to meet as closely as possible the growing needs of the community for glycoinformatics.",2018-08-10 +29788290,BAGEL4: a user-friendly web server to thoroughly mine RiPPs and bacteriocins.,"Interest in secondary metabolites such as RiPPs (ribosomally synthesized and posttranslationally modified peptides) is increasing worldwide. To facilitate the research in this field we have updated our mining web server. BAGEL4 is faster than its predecessor and is now fully independent from ORF-calling. Gene clusters of interest are discovered using the core-peptide database and/or through HMM motifs that are present in associated context genes. The databases used for mining have been updated and extended with literature references and links to UniProt and NCBI. Additionally, we have included automated promoter and terminator prediction and the option to upload RNA expression data, which can be displayed along with the identified clusters. Further improvements include the annotation of the context genes, which is now based on a fast blast against the prokaryote part of the UniRef90 database, and the improved web-BLAST feature that dynamically loads structural data such as internal cross-linking from UniProt. Overall BAGEL4 provides the user with more information through a user-friendly web-interface which simplifies data evaluation. BAGEL4 is freely accessible at http://bagel4.molgenrug.nl.",2018-07-01 +33426066,Endoscopic Treatment of Symptomatic Foot and Ankle Bone Cyst with 3D Printing Application.,"

Objective

To study the efficacy of arthroscopy for treating symptomatic bone cysts of the foot and ankle through the follow-up of patients and to further explore the application value of 3D printing technology in this treatment.

Methods

Twenty-one patients with symptomatic bone cysts in the foot and ankle who underwent arthroscopic surgery in our Center from March 2010 to December 2018 were enrolled, including 11 in the experimental group and 10 in the control group. For the control group, C-arm fluoroscopy was used intraoperatively to confirm the positioning of the cysts; for the experimental group, a 3D model of the lesion tissue and the 3D-printed individualized guides were prepared to assist the positioning of the cysts. Debridement of the lesion tissues was conducted under an arthroscope. Regular follow-ups were conducted. The time of establishing arthroscopic approaches and the times of intraoperative fluoroscopy between the two groups were compared. Significance was determined as P < 0.05.

Results

The postoperative pathology of the patients confirmed the diagnosis. No significant perioperative complications were observed in either group, and no recurrence of bone cysts was seen at the last follow-up. The VAS scores and AOFAS scores of the two groups at the last follow-up were significantly improved compared with the preoperative data, but there was no statistical difference between the two groups. All surgeries were performed by the same senior surgeon. The time taken to establish the arthroscopic approaches between the two groups was statistically significant (P < 0.001), and the times of intraoperative fluoroscopy required to establish the approach were also statistically significant (P < 0.001). The intraoperative bleeding between the two groups was statistically significant (P < 0.01). There was 1 case in each group whose postoperative CT showed insufficient bone grafting, but no increase in cavity volume was observed during the follow-up.

Conclusion

With the assistance of the 3D printing technology for treating symptomatic bone cysts of the ankle and foot, the surgeon can design the operation preoperatively and perform the rehearsal, which would make it easier to establish the arthroscopic approach, better understand the anatomy, and make the operation smoother. This trial is registered with http://www.clinicaltrials.govNCT03152916.",2020-12-26 +32297095,Using Complier Average Causal Effect Estimation to Examine Student Outcomes of the PAX Good Behavior Game When Integrated with the PATHS Curriculum.,"A growing body of research has documented a link between variation in implementation dosage and outcomes associated with preventive interventions. Complier Average Causal Effect (CACE; Jo in J Educ Behav Stat 27:385-409, 2002) analysis allows for estimating program impacts in light of variation in implementation. This study reports intent-to-treat (ITT) and CACE findings from a randomized controlled trial (RCT) testing the impacts of the universal PAX Good Behavior Game (PAX GBG) integrated with Promoting Alternative Thinking Strategies (i.e., PATHS to PAX) and PAX GBG only compared to a control. This study used ratings by 318 K-5 teachers of 1526 at-risk children who, at baseline, were rated as displaying the top 33rd percentile of aggressive-disruptive behavior. Leveraging a prior study on these data (Berg et al. in Admin Policy Ment Health Ment Health Serv Res 44:558-571, https://doi.org/10.1007/s10488-016-0738-1 , 2017), CACE was defined as the effect of intervention assignment for compliers, using two compliance cut points (50th and 75th percentile), on posttest ratings of student academic engagement, social competence, peer relations, emotion regulation, hyperactivity, and aggressive-disruptive behavior. The ITT analyses indicated improvements for students in the integrated condition on ratings of social competence compared to the control condition. The CACE analyses also indicated significant effects of the integrated intervention on social competence, as well as academic engagement and emotion regulation for students in high compliance classrooms. These findings illustrate the importance of considering variation in implementation within the context of RCTs.",2020-11-01 +32439998,TraPS-VarI: Identifying genetic variants altering phosphotyrosine based signalling motifs.,"Patient stratification and individualized therapeutic strategies rely on the established knowledge of genotype-specific molecular and cellular alterations of biological and therapeutic significance. Whilst almost all approved drugs have been developed based on the Reference Sequence protein database (RefSeq), the latest genome sequencing studies establish the substantial prevalence of non-synonymous genetic mutations in the general population, including stop-insertion and frame shift mutations within the coding regions of membrane proteins. While the availability of individual genotypes are becoming increasingly common, the biological and clinical interpretations of mutations among individual genomes is largely lagging behind. Lately, transmembrane proteins of haematopoietic (myeloid and lymphoid) derived immune cells have attracted much attention as important targets for cancer immunotherapies. As such, the signalling properties of haematological transmembrane receptors rely on the membrane-proximal phosphotyrosine based sequence motifs (TBSMs) such as ITAM (immunoreceptor tyrosine-based activation motif), ITIM (immunoreceptor tyrosine-based inhibition motif) and signal transducer and activator of transcription 3 (STAT3)-recruiting YxxQ motifs. However, mutations that alter the coding regions of transmembrane proteins, resulting in either insertion or deletion of crucial signal modulating TBSMs, remains unknown. To conveniently identify individual cell line-specific or patient-specific membrane protein altering mutations, we present the Transmembrane Protein Sequence Variant Identifier (TraPS-VarI). TraPS-VarI is an annotation tool for accurate mapping of the effect of an individual's mutation in the transmembrane protein sequence, and to identify the prevalence of TBSMs. TraPS-VarI is a biologist and clinician-friendly algorithm with a web interface and an associated database browser (https://www.traps-vari.org/).",2020-05-21 +33232472,Renal outcome in patients with newly diagnosed multiple myeloma: results from the UK NCRI Myeloma XI trial.,"Renal injury is a common complication of multiple myeloma (MM) and is associated with adverse outcome. Despite this, the natural history of renal injury in patients with MM remains uncertain especially in the context of intensive therapy and novel therapies. To address the lack of data, we evaluated the renal function of 2334 patients from the UK National Cancer Research Institute Myeloma XI trial at baseline and at 12 months to assess renal function over time and the factors associated with change. Patients who had severe acute kidney injury or a requirement for dialysis were excluded. At 12 months of the 1450 evaluable patients planned for autologous transplantation; 204 (14%) patients had a decline in estimated glomerular filtration rate (eGFR) ≥25% from baseline, 341 (23.5%) had an improvement and 905 (62%) had no significant change in eGFR. Renal outcome at 12 months for the 884 evaluable patients who were not planned for transplant was similar. Improved renal function was more likely if patients were <70 years old, male, had an average eGFR <60 mL per minute per 1.73 m2 and a higher baseline free light chain level >1000 mg/L, and/or a free light chain response of >90%. It did not correlate with monoclonal-protein response, transplantation, or use of a bortezomib-based regimen. We show that with current therapies the proportion of patients who have a significant decline in renal function in the first 12 months is small. The greatest relative improvement in eGFR is seen in patients with high free light chain at baseline and a high light chain response. This trial was registered at http://www.isrctn.com as #49407852.",2020-11-01 +31240306,Hydra image processor: 5-D GPU image analysis library with MATLAB and python wrappers.,"

Summary

Light microscopes can now capture data in five dimensions at very high frame rates producing terabytes of data per experiment. Five-dimensional data has three spatial dimensions (x, y, z), multiple channels (λ) and time (t). Current tools are prohibitively time consuming and do not efficiently utilize available hardware. The hydra image processor (HIP) is a new library providing hardware-accelerated image processing accessible from interpreted languages including MATLAB and Python. HIP automatically distributes data/computation across system and video RAM allowing hardware-accelerated processing of arbitrarily large images. HIP also partitions compute tasks optimally across multiple GPUs. HIP includes a new kernel renormalization reducing boundary effects associated with widely used padding approaches.

Availability and implementation

HIP is free and open source software released under the BSD 3-Clause License. Source code and compiled binary files will be maintained on http://www.hydraimageprocessor.com. A comprehensive description of all MATLAB and Python interfaces and user documents are provided. HIP includes GPU-accelerated support for most common image processing operations in 2-D and 3-D and is easily extensible. HIP uses the NVIDIA CUDA interface to access the GPU. CUDA is well supported on Windows and Linux with macOS support in the future.",2019-12-01 +23768135,bioNerDS: exploring bioinformatics' database and software use through literature mining.,"

Background

Biology-focused databases and software define bioinformatics and their use is central to computational biology. In such a complex and dynamic field, it is of interest to understand what resources are available, which are used, how much they are used, and for what they are used. While scholarly literature surveys can provide some insights, large-scale computer-based approaches to identify mentions of bioinformatics databases and software from primary literature would automate systematic cataloguing, facilitate the monitoring of usage, and provide the foundations for the recovery of computational methods for analysing biological data, with the long-term aim of identifying best/common practice in different areas of biology.

Results

We have developed bioNerDS, a named entity recogniser for the recovery of bioinformatics databases and software from primary literature. We identify such entities with an F-measure ranging from 63% to 91% at the mention level and 63-78% at the document level, depending on corpus. Not attaining a higher F-measure is mostly due to high ambiguity in resource naming, which is compounded by the on-going introduction of new resources. To demonstrate the software, we applied bioNerDS to full-text articles from BMC Bioinformatics and Genome Biology. General mention patterns reflect the remit of these journals, highlighting BMC Bioinformatics's emphasis on new tools and Genome Biology's greater emphasis on data analysis. The data also illustrates some shifts in resource usage: for example, the past decade has seen R and the Gene Ontology join BLAST and GenBank as the main components in bioinformatics processing.

Abstract

Conclusions We demonstrate the feasibility of automatically identifying resource names on a large-scale from the scientific literature and show that the generated data can be used for exploration of bioinformatics database and software usage. For example, our results help to investigate the rate of change in resource usage and corroborate the suspicion that a vast majority of resources are created, but rarely (if ever) used thereafter. bioNerDS is available at http://bionerds.sourceforge.net/.",2013-06-15 +32117916,Inferring lncRNA Functional Similarity Based on Integrating Heterogeneous Network Data.,"Although lncRNAs lack the potential to be translated into proteins directly, their complicated and diversiform functions make them as a window into decoding the mechanisms of human physiological activities. Accumulating experiment studies have identified associations between lncRNA dysfunction and many important complex diseases. However, known experimentally confirmed lncRNA functions are still very limited. It is urgent to build effective computational models for rapid predicting of unknown lncRNA functions on a large scale. To this end, valid similarity measure between known and unknown lncRNAs plays a vital role. In this paper, an original model was developed to calculate functional similarities between lncRNAs by integrating heterogeneous network data. In this model, a novel integrated network was constructed based on the data of four single lncRNA functional similarity networks (miRNA-based similarity network, disease-based similarity network, GTEx expression-based network and NONCODE expression-based network). Using the lncRNA pairs that share the target mRNAs as the benchmark, the results show that this integrated network is more effective than any single networks with an AUC of 0.736 in the cross validation, while the AUC of four single networks were 0.703, 0.733, 0.611, and 0.602. To implement our model, a web server named IHNLncSim was constructed for inferring lncRNA functional similarity based on integrating heterogeneous network data. Moreover, the modules of network visualization and disease-based lncRNA function enrichment analysis were added into IHNLncSim. It is anticipated that IHNLncSim could be an effective bioinformatics tool for the researches of lncRNA regulation function studies. IHNLncSim is freely available at http://www.lirmed.com/ihnlncsim.",2020-02-06 +33455774,Relationships between metabolic profiles and gene expression in liver and leukocytes of dairy cows in early lactation.,"Homeorhetic mechanisms assist dairy cows in the transition from pregnancy to lactation. Less successful cows develop severe negative energy balance (NEB), placing them at risk of metabolic and infectious diseases and reduced fertility. We have previously placed multiparous Holstein Friesian cows from 4 herds into metabolic clusters, using as biomarkers measurements of plasma nonesterified fatty acids, β-hydroxybutyrate, glucose and IGF-1 collected at 14 and 35 d in milk (DIM). This study characterized the global transcriptomic profiles of liver and circulating leukocytes from the same animals to determine underlying mechanisms associated with their metabolic and immune function. Liver biopsy and whole-blood samples were collected around 14 DIM for RNA sequencing. All cows with available RNA sequencing data were placed into balanced (BAL, n = 44), intermediate (n = 44), or imbalanced (IMBAL, n = 19) metabolic cluster groups. Differential gene expression was compared between the 3 groups using ANOVA, but only the comparison between BAL and IMBAL cows is reported. Pathway analysis was undertaken using DAVID Bioinformatic Resources (https://david.ncifcrf.gov/). Milk yields did not differ between BAL and IMBAL cows but dry matter intake was less in IMBAL cows and they were in greater energy deficit at 14 DIM (-4.48 v -11.70 MJ/d for BAL and IMBAL cows). Significantly differentially expressed pathways in hepatic tissue included AMPK signaling, glucagon signaling, adipocytokine signaling, and insulin resistance. Genes involved in lipid metabolism and cholesterol transport were more highly expressed in IMBAL cows but IGF1 and IGFALS were downregulated. Leukocytes from BAL cows had greater expression of histones and genes involved in nucleosomes and cell division. Leukocyte expression of heat shock proteins increased in IMBAL cows, suggesting an unfolded protein response, and several key genes involved in immune responses to pathogens were upregulated (e.g., DEFB13, HP, OAS1Z, PTX3, and TLR4). Differentially expressed genes upregulated in IMBAL cows in both tissues included CD36, CPT1, KFL11, and PDK4, all central regulators of energy metabolism. The IMBAL cows therefore had greater difficulty maintaining glucose homeostasis and had dysregulated hepatic lipid metabolism. Their energy deficit was associated with a reduced capacity for cell division and greater evidence of stress responses in the leukocyte population, likely contributing to an increased risk of infectious disease.",2021-01-15 +28915793,The cacao Criollo genome v2.0: an improved version of the genome for genetic and functional genomic studies.,"

Background

Theobroma cacao L., native to the Amazonian basin of South America, is an economically important fruit tree crop for tropical countries as a source of chocolate. The first draft genome of the species, from a Criollo cultivar, was published in 2011. Although a useful resource, some improvements are possible, including identifying misassemblies, reducing the number of scaffolds and gaps, and anchoring un-anchored sequences to the 10 chromosomes.

Methods

We used a NGS-based approach to significantly improve the assembly of the Belizian Criollo B97-61/B2 genome. We combined four Illumina large insert size mate paired libraries with 52x of Pacific Biosciences long reads to correct misassembled regions and reduced the number of scaffolds. We then used genotyping by sequencing (GBS) methods to increase the proportion of the assembly anchored to chromosomes.

Results

The scaffold number decreased from 4,792 in assembly V1 to 554 in V2 while the scaffold N50 size has increased from 0.47 Mb in V1 to 6.5 Mb in V2. A total of 96.7% of the assembly was anchored to the 10 chromosomes compared to 66.8% in the previous version. Unknown sites (Ns) were reduced from 10.8% to 5.7%. In addition, we updated the functional annotations and performed a new RefSeq structural annotation based on RNAseq evidence.

Conclusion

Theobroma cacao Criollo genome version 2 will be a valuable resource for the investigation of complex traits at the genomic level and for future comparative genomics and genetics studies in cacao tree. New functional tools and annotations are available on the Cocoa Genome Hub ( http://cocoa-genome-hub.southgreen.fr ).",2017-09-15 +29325066,Toppar: an interactive browser for viewing association study results.,"Summary:Data integration and visualization help geneticists make sense of large amounts of data. To help facilitate interpretation of genetic association data we developed Toppar, a customizable visualization tool that stores results from association studies and enables browsing over multiple results, by combining features from existing tools and linking to appropriate external databases. Availability and implementation:Detailed information on Toppar's features and functionality are on our website http://mccarthy.well.ox.ac.uk/toppar/docs along with instructions on how to download, install and run Toppar. Our online version of Toppar is accessible from the website and can be test-driven using Firefox, Safari or Chrome on sub-sets of publicly available genome-wide association study anthropometric waist and body mass index data (Locke et al., 2015; Shungin et al., 2015) from the Genetic Investigation of ANthropometric Traits consortium. Contact:totajuliusd@gmail.com.",2018-06-01 +30014462,Development of a novel clustering tool for linear peptide sequences.,"Epitopes identified in large-scale screens of overlapping peptides often share significant levels of sequence identity, complicating the analysis of epitope-related data. Clustering algorithms are often used to facilitate these analyses, but available methods are generally insufficient in their capacity to define biologically meaningful epitope clusters in the context of the immune response. To fulfil this need we developed an algorithm that generates epitope clusters based on representative or consensus sequences. This tool allows the user to cluster peptide sequences on the basis of a specified level of identity by selecting among three different method options. These include the 'clique method', in which all members of the cluster must share the same minimal level of identity with each other, and the 'connected graph method', in which all members of a cluster must share a defined level of identity with at least one other member of the cluster. In cases where it is not possible to define a clear consensus sequence with the connected graph method, a third option provides a novel 'cluster-breaking algorithm' for consensus sequence driven sub-clustering. Herein we demonstrate the tool's clustering performance and applicability using (i) a selection of dengue virus epitopes for the 'clique method', (ii) sets of allergen-derived peptides from related species for the 'connected graph method' and (iii) large data sets of eluted ligand, major histocompatibility complex binding and T-cell recognition data captured within the Immune Epitope Database (IEDB) with the newly developed 'cluster-breaking algorithm'. This novel clustering tool is accessible at http://tools.iedb.org/cluster2/.",2018-08-06 +32490069,"Annotation dataset of the cardiotocographic recordings constituting the ""CTU-CHB intra-partum CTG database"".","The proposed dataset provides annotations for the 552 cardiotocographic (CTG) recordings included in the publicly available ""CTU-CHB intra-partum CTG database"" from Physionet (https://physionet.org/content/ctu-uhb-ctgdb/1.0.0/). Each CTG recording is composed by two simultaneously acquired signals: i) the fetal heart rate (FHR) and ii) the maternal tocogram (representing uterine activity). Annotations consist in the detection of starting and ending points of specific CTG events on both FHR signal and maternal tocogram. Annotated events for the FHR signal are the bradycardia, tachycardia, acceleration and deceleration episodes. Annotated events for the maternal tocogram are the uterine contractions. The dataset also reports classification of each deceleration as early, late, variable or prolonged, in relation to the presence of a uterine contraction. Annotations were obtained by an expert gynecologist with the support of CTG Analyzer, a dedicated software application for automatic analysis of digital CTG recordings. These annotations can be useful in the development, testing and comparison of algorithms for the automatic analysis of digital CTG recordings, which can make CTG interpretation more objective and independent from clinician's experience.",2020-05-19 +31714956,Comprehensive review and assessment of computational methods for predicting RNA post-transcriptional modification sites from RNA sequences.,"RNA post-transcriptional modifications play a crucial role in a myriad of biological processes and cellular functions. To date, more than 160 RNA modifications have been discovered; therefore, accurate identification of RNA-modification sites is fundamental for a better understanding of RNA-mediated biological functions and mechanisms. However, due to limitations in experimental methods, systematic identification of different types of RNA-modification sites remains a major challenge. Recently, more than 20 computational methods have been developed to identify RNA-modification sites in tandem with high-throughput experimental methods, with most of these capable of predicting only single types of RNA-modification sites. These methods show high diversity in their dataset size, data quality, core algorithms, features extracted and feature selection techniques and evaluation strategies. Therefore, there is an urgent need to revisit these methods and summarize their methodologies, in order to improve and further develop computational techniques to identify and characterize RNA-modification sites from the large amounts of sequence data. With this goal in mind, first, we provide a comprehensive survey on a large collection of 27 state-of-the-art approaches for predicting N1-methyladenosine and N6-methyladenosine sites. We cover a variety of important aspects that are crucial for the development of successful predictors, including the dataset quality, operating algorithms, sequence and genomic features, feature selection, model performance evaluation and software utility. In addition, we also provide our thoughts on potential strategies to improve the model performance. Second, we propose a computational approach called DeepPromise based on deep learning techniques for simultaneous prediction of N1-methyladenosine and N6-methyladenosine. To extract the sequence context surrounding the modification sites, three feature encodings, including enhanced nucleic acid composition, one-hot encoding, and RNA embedding, were used as the input to seven consecutive layers of convolutional neural networks (CNNs), respectively. Moreover, DeepPromise further combined the prediction score of the CNN-based models and achieved around 43% higher area under receiver-operating curve (AUROC) for m1A site prediction and 2-6% higher AUROC for m6A site prediction, respectively, when compared with several existing state-of-the-art approaches on the independent test. In-depth analyses of characteristic sequence motifs identified from the convolution-layer filters indicated that nucleotide presentation at proximal positions surrounding the modification sites contributed most to the classification, whereas those at distal positions also affected classification but to different extents. To maximize user convenience, a web server was developed as an implementation of DeepPromise and made publicly available at http://DeepPromise.erc.monash.edu/, with the server accepting both RNA sequences and genomic sequences to allow prediction of two types of putative RNA-modification sites.",2020-09-01 +33361083,Development and external validation of a COVID-19 mortality risk prediction algorithm: a multicentre retrospective cohort study.,"

Objective

This study aimed to develop and externally validate a COVID-19 mortality risk prediction algorithm.

Design

Retrospective cohort study.

Setting

Five designated tertiary hospitals for COVID-19 in Hubei province, China.

Participants

We routinely collected medical data of 1364 confirmed adult patients with COVID-19 between 8 January and 19 March 2020. Among them, 1088 patients from two designated hospitals in Wuhan were used to develop the prognostic model, and 276 patients from three hospitals outside Wuhan were used for external validation. All patients were followed up for a maximal of 60 days after the diagnosis of COVID-19.

Methods

The model discrimination was assessed by the area under the receiver operating characteristic curve (AUC) and Somers' D test, and calibration was examined by the calibration plot. Decision curve analysis was conducted.

Main outcome measures

The primary outcome was all-cause mortality within 60 days after the diagnosis of COVID-19.

Results

The full model included seven predictors of age, respiratory failure, white cell count, lymphocytes, platelets, D-dimer and lactate dehydrogenase. The simple model contained five indicators of age, respiratory failure, coronary heart disease, renal failure and heart failure. After cross-validation, the AUC statistics based on derivation cohort were 0.96 (95% CI, 0.96 to 0.97) for the full model and 0.92 (95% CI, 0.89 to 0.95) for the simple model. The AUC statistics based on the external validation cohort were 0.97 (95% CI, 0.96 to 0.98) for the full model and 0.88 (95% CI, 0.80 to 0.96) for the simple model. Good calibration accuracy of these two models was found in the derivation and validation cohort.

Conclusion

The prediction models showed good model performance in identifying patients with COVID-19 with a high risk of death in 60 days. It may be useful for acute risk classification.

Web calculator

We provided a freely accessible web calculator (https://www.whuyijia.com/).",2020-12-24 +25877638,"An update of miRNASNP database for better SNP selection by GWAS data, miRNA expression and online tools.","MicroRNAs (miRNAs) are key regulators of gene expression involved in a broad range of biological processes. MiRNASNP aims to provide single nucleotide polymorphisms (SNPs) in miRNAs and genes that may impact miRNA biogenesis and/or miRNA target binding. Advanced miRNA research provided abundant data about miRNA expression, validated targets and related phenotypic variants. In miRNASNP v2.0, we have updated our previous database with several new data and features, including: (i) expression level and expression correlation of miRNAs and target genes in different tissues, (ii) linking SNPs to the results of genome-wide association studies, (iii) integrating experimentally validated miRNA:mRNA interactions, (iv) adding multiple filters to prioritize functional SNPs. In addition, as a supplement of the database, we have set up three flexible online tools to analyse the influence of novel variants on miRNA:mRNA binding. A new nice web interface was designed for miRNASNP v2.0 allowing users to browse, search and download. We aim to maintain the miRNASNP as a solid resource for function, genetics and disease studies of miRNA-related SNPs. Database URL: http://bioinfo.life. hust.edu.cn/miRNASNP2/",2015-04-15 +31095917,Dying for a Meal: An Integrative Review of Characteristics of Choking Incidents and Recommendations to Prevent Fatal and Nonfatal Choking Across Populations.,"Purpose The purpose of this study was to conduct an integrative review of original research, across adult populations relating to fatal or nonfatal choking on food, to understand ways to respond to and prevent choking incidents. Method Four scientific databases (CINAHL, Medline, Web of Science, and EMBASE) were searched for original peer-reviewed research relating to fatal or nonfatal choking on foods. Data were extracted on study characteristics; factors leading up to, events at the time of, and actions taken after the choking incident; and impacts of choking incidents. An integrative review of the findings across studies identified several risk factors and recommendations to reduce the risk of choking. Results In total, 52 studies met the criteria for inclusion in this review, of which 31 were quantitative, 17 were qualitative, and 4 were of a mixed methods design. Studies reported the observations and narratives of bystanders or researchers, or else were large-scale autopsy studies, and included both the general public and people at risk of dysphagia. A range of food types were involved, and several actions were reported in response to food choking. Strategies to reduce the risk of choking were identified in the studies and are presented in 5 main categories. Conclusions Factors leading up to choking incidents extend well beyond the individual to the environment for mealtimes; the provision of appropriate mealtime assistance and oral care; and regular monitoring of general health, oral health, and medications. Bystanders' increased awareness and knowledge of how to respond to choking are vital. The results of this review could be used to inform service policy and training, for individuals at risk of choking, the people who support them, and the general public. Further research is needed to explore choking prevention and airway protection in individuals with dysphagia. Supplemental Material https://doi.org/10.23641/asha.8121131.",2019-05-16 +31656833,Dataset of multi-harmonic measurements for the experimental CEA-beam benchmark structure.,"This data article comprises post-processed data to investigate the non-linear dynamic behavior of the CEA-beam benchmark structure that is a clamped-clamped steel beam with non-ideal boundary conditions. Experiments have been performed on the CEA-CESTA laboratory. The data provided include output measurements for the nonlinear dynamic behavior of the CEA-beam (i.e. the displacement amplitudes for each harmonic component at the middle of the beam), as well as the complete input acceleration signal harmonics amplitude. All the results from this data will help researchers and engineers in proper analysis of the nonlinearities of the clamped-clamped beam and the effect of the non-ideal input signal and advanced understanding of links between different excitation signal and the multi-harmonic responses of the CEA-beam. One of the main original contribution is to share the data sets to give the opportunity to researchers for testing and validating analytical or numerical models of a nonlinear beam with non-ideal boundary conditions and subjected to low and high levels of excitation signal. This Data in Brief article is an additional item directly alongside the following paper published in the Elsevier journal Communications in Nonlinear Science and Numerical Simulation: M. Claeys, J-J. Sinou, J-P. Lambelin and B. Alcoverro, Multi-harmonic measurements and numerical simulations of nonlinear vibrations of a beam with non-ideal boundary conditions, Communications in Nonlinear Science and Numerical Simulation, 19(12), 4196-4212, 2014. https://doi.org/10.1016/j.cnsns.2014.04.008.",2019-09-26 +32186956,Machine-Scored Syntax: Comparison of the CLAN Automatic Scoring Program to Manual Scoring.,"Purpose The results of automatic machine scoring of the Index of Productive Syntax from the Computerized Language ANalysis (CLAN) tools of the Child Language Data Exchange System of TalkBank (MacWhinney, 2000) were compared to manual scoring to determine the accuracy of the machine-scored method. Method Twenty transcripts of 10 children from archival data of the Weismer Corpus from the Child Language Data Exchange System at 30 and 42 months were examined. Measures of absolute point difference and point-to-point accuracy were compared, as well as points erroneously given and missed. Two new measures for evaluating automatic scoring of the Index of Productive Syntax were introduced: Machine Item Accuracy (MIA) and Cascade Failure Rate- these measures further analyze points erroneously given and missed. Differences in total scores, subscale scores, and individual structures were also reported. Results Mean absolute point difference between machine and hand scoring was 3.65, point-to-point agreement was 72.6%, and MIA was 74.9%. There were large differences in subscales, with Noun Phrase and Verb Phrase subscales generally providing greater accuracy and agreement than Question/Negation and Sentence Structures subscales. There were significantly more erroneous than missed items in machine scoring, attributed to problems of mistagging of elements, imprecise search patterns, and other errors. Cascade failure resulted in an average of 4.65 points lost per transcript. Conclusions The CLAN program showed relatively inaccurate outcomes in comparison to manual scoring on both traditional and new measures of accuracy. Recommendations for improvement of the program include accounting for second exemplar violations and applying cascaded credit, among other suggestions. It was proposed that research on machine-scored syntax routinely report accuracy measures detailing erroneous and missed scores, including MIA, so that researchers and clinicians are aware of the limitations of a machine-scoring program. Supplemental Material https://doi.org/10.23641/asha.11984364.",2020-03-18 +33187004,Insights from the Hereditary Thrombotic Thrombocytopenic Purpura Registry: Discussion of Key Findings Based on Individual Cases from Switzerland.,"The Hereditary TTP Registry is an international cohort study for patients with a confirmed or suspected diagnosis of hereditary thrombotic thrombocytopenic purpura (hTTP) and their family members. Hereditary TTP is an ultra-rare blood disorder (prevalence of ∼1-2 cases per million), the result of autosomal-recessively inherited congenital ADAMTS13 (a disintegrin and metalloproteinase with a thrombospondin type 1 motif, member 13) deficiency (ADAMTS13 activity <10% of the normal), and associated with yet many unanswered questions. Until December 2017, the Hereditary TTP Registry had enrolled 123 confirmed hTTP patients. Their median age at disease onset was 4.5 years (range: 0-70) and at clinical diagnosis 16.7 years (range: 0-69), a difference that highlights the existing awareness gap in recognizing hTTP. The systematic collection of clinical data of individual patients revealed their substantial baseline comorbidities, as a consequence of recurring TTP episodes in the past. Most notable was the high proportion of patients having suffered from premature arterial thrombotic events, mainly transient ischemic attacks, ischemic strokes, and to a lesser extent myocardial infarctions. At 40 to 50 years of age and above, more than 50% of patients had suffered from at least one such event, and many had experienced arterial thrombotic events despite regular plasma infusions every 2 to 3 weeks that supplements the missing plasma ADAMTS13. The article by van Dorland et al. (Haematologica 2019;104(10):2107-2115) and the ongoing Hereditary TTP Registry cohort study were recognized with the Günter Landbeck Excellence Award at the 50th Hemophilia Symposium in Hamburg in November 2019, the reason to present the Hereditary TTP Registry in more detail here.",2020-11-13 +32011700,IQ-TREE 2: New Models and Efficient Methods for Phylogenetic Inference in the Genomic Era.,"IQ-TREE (http://www.iqtree.org, last accessed February 6, 2020) is a user-friendly and widely used software package for phylogenetic inference using maximum likelihood. Since the release of version 1 in 2014, we have continuously expanded IQ-TREE to integrate a plethora of new models of sequence evolution and efficient computational approaches of phylogenetic inference to deal with genomic data. Here, we describe notable features of IQ-TREE version 2 and highlight the key advantages over other software.",2020-05-01 +34538929,Intergenerational Ambivalence and Loneliness in Later Life.,"

Objective

This brief report examined the relationship between intergenerational ambivalence and loneliness in later life among a group of older adults with at least one child.

Background

Previous work has explored the links between intergenerational ambivalence and other indicators of well-being but has not examined loneliness. Although studies show an association between positive and negative relationship quality with children and loneliness, there are conflicting findings, and there is also insufficient exploration of the role of gender.

Method

Utilizing pooled data from the 2012 and 2014 waves of the Health and Retirement Study (HRS) (n = 10,967) (https://hrs.isr.umich.edu/documentation), structural equation models were used to examine the hypothesized relationships, and multiple group analysis was utilized to assess potential gender differences.

Results

The results indicated that greater intergenerational ambivalence was associated with increased loneliness in later life. However, there were no significant gender or marital status differences in the relationships.

Conclusion

This study adds to the existing literature on ambivalence and well-being by showing that ambivalent relationships are related to loneliness. Results underscore the emotional complexity of parent-child relationships and suggest the need for investigating the consequences of holding contradictory feelings.",2020-09-05 +32214380,Stochastic simulation and statistical inference platform for visualization and estimation of transcriptional kinetics.,"Recent advances in single-molecule fluorescent imaging have enabled quantitative measurements of transcription at a single gene copy, yet an accurate understanding of transcriptional kinetics is still lacking due to the difficulty of solving detailed biophysical models. Here we introduce a stochastic simulation and statistical inference platform for modeling detailed transcriptional kinetics in prokaryotic systems, which has not been solved analytically. The model includes stochastic two-state gene activation, mRNA synthesis initiation and stepwise elongation, release to the cytoplasm, and stepwise co-transcriptional degradation. Using the Gillespie algorithm, the platform simulates nascent and mature mRNA kinetics of a single gene copy and predicts fluorescent signals measurable by time-lapse single-cell mRNA imaging, for different experimental conditions. To approach the inverse problem of estimating the kinetic parameters of the model from experimental data, we develop a heuristic optimization method based on the genetic algorithm and the empirical distribution of mRNA generated by simulation. As a demonstration, we show that the optimization algorithm can successfully recover the transcriptional kinetics of simulated and experimental gene expression data. The platform is available as a MATLAB software package at https://data.caltech.edu/records/1287.",2020-03-26 +32926804,The Access to Literacy Assessment System for Phonological Awareness: An Adaptive Measure of Phonological Awareness Appropriate for Children With Speech and/or Language Impairment.,"Purpose The Access to Literacy Assessment System-Phonological Awareness (ATLAS-PA) was developed for use with children with speech and/or language impairment. The subtests (Rhyming, Blending, and Segmenting) are appropriate for children who are 3-7 years of age. ATLAS-PA is composed entirely of receptive items, incorporates individualized levels of instruction, and is adaptive in nature. Method To establish the construct validity of ATLAS-PA, we collected data from children with typical development (n = 938) and those who have speech and/or language impairment (n = 227). Results Rasch analyses indicated that items fit well together and formed a unidimensional construct of phonological awareness. Differential item functioning was minimal between the two groups of children, and scores on ATLAS-PA were moderately to strongly related to other measures of phonological awareness. Information about item functioning was used to create an adaptive version of ATLAS-PA. Conclusions Findings suggest that ATLAS-PA is a valid measure of phonological awareness that can be used with children with typical development and with speech and/or language impairment. Its adaptive format minimizes testing time and provides opportunities for monitoring progress in preschool and early elementary classrooms. Supplemental Material https://doi.org/10.23641/asha.12931691.",2020-09-14 +31675914,"Annot: a Django-based sample, reagent, and experiment metadata tracking system.","

Background

In biological experiments, comprehensive experimental metadata tracking - which comprises experiment, reagent, and protocol annotation with controlled vocabulary from established ontologies - remains a challenge, especially when the experiment involves multiple laboratory scientists who execute different steps of the protocol. Here we describe Annot, a novel web application designed to provide a flexible solution for this task.

Results

Annot enforces the use of controlled vocabulary for sample and reagent annotation while enabling robust investigation, study, and protocol tracking. The cornerstone of Annot's implementation is a json syntax-compatible file format, which can capture detailed metadata for all aspects of complex biological experiments. Data stored in this json file format can easily be ported into spreadsheet or data frame files that can be loaded into R ( https://www.r-project.org/ ) or Pandas, Python's data analysis library ( https://pandas.pydata.org/ ). Annot is implemented in Python3 and utilizes the Django web framework, Postgresql, Nginx, and Debian. It is deployed via Docker and supports all major browsers.

Conclusions

Annot offers a robust solution to annotate samples, reagents, and experimental protocols for established assays where multiple laboratory scientists are involved. Further, it provides a framework to store and retrieve metadata for data analysis and integration, and therefore ensures that data generated in different experiments can be integrated and jointly analyzed. This type of solution to metadata tracking can enhance the utility of large-scale datasets, which we demonstrate here with a large-scale microenvironment microarray study.",2019-11-01 +25378340,Europe PMC: a full-text literature database for the life sciences and platform for innovation.,"This article describes recent developments of Europe PMC (http://europepmc.org), the leading database for life science literature. Formerly known as UKPMC, the service was rebranded in November 2012 as Europe PMC to reflect the scope of the funding agencies that support it. Several new developments have enriched Europe PMC considerably since then. Europe PMC now offers RESTful web services to access both articles and grants, powerful search tools such as citation-count sort order and data citation features, a service to add publications to your ORCID, a variety of export formats, and an External Links service that enables any related resource to be linked from Europe PMC content.",2014-11-06 +31258099,Pubertal development mediates the association between family environment and brain structure and function in childhood.,"Psychosocial acceleration theory suggests that pubertal maturation is accelerated in response to adversity. In addition, suboptimal caregiving accelerates development of the amygdala-medial prefrontal cortex circuit. These findings may be related. Here, we assess whether associations between family environment and measures of the amygdala-medial prefrontal cortex circuit are mediated by pubertal development in more than 2000 9- and 10-year-old children from the Adolescent Brain Cognitive Development Study (http://dx.doi.org/10.15154/1412097). Using structural equation modeling, demographic, child-reported, and parent-reported data on family dynamics were compiled into a higher level family environment latent variable. Magnetic resonance imaging preprocessing and compilations were performed by the Adolescent Brain Cognitive Development Study's data analysis core. Anterior cingulate cortex (ACC) thickness, area, white matter fractional anisotropy, amygdala volume, and cingulo-opercular network-amygdala resting-state functional connectivity were assessed. For ACC cortical thickness and ACC fractional anisotropy, significant indirect effects indicated that a stressful family environment relates to more advanced pubertal stage and more mature brain structure. For cingulo-opercular network-amygdala functional connectivity, results indicated a trend in the expected direction. For ACC area, evidence for quadratic mediation by pubertal stage was found. Sex-stratified analyses suggest stronger results for girls. Despite small effect sizes, structural measures of circuits important for emotional behavior are associated with family environment and show initial evidence of accelerated pubertal development.",2020-05-01 +32559277,Webina: an open-source library and web app that runs AutoDock Vina entirely in the web browser.,"

Motivation

Molecular docking is a computational technique for predicting how a small molecule might bind a macromolecular target. Among docking programs, AutoDock Vina is particularly popular. Like many docking programs, Vina requires users to download/install an executable file and to run that file from a command-line interface. Choosing proper configuration parameters and analyzing Vina output is also sometimes challenging. These issues are particularly problematic for students and novice researchers.

Results

We created Webina, a new version of Vina, to address these challenges. Webina runs Vina entirely in a web browser, so users need only visit a Webina-enabled webpage. The docking calculations take place on the user's own computer rather than a remote server.

Availability and implementation

A working version of the open-source Webina app can be accessed free of charge from http://durrantlab.com/webina.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +32542109,"Abasy Atlas v2.2: The most comprehensive and up-to-date inventory of meta-curated, historical, bacterial regulatory networks, their completeness and system-level characterization.","Some organism-specific databases about regulation in bacteria have become larger, accelerated by high-throughput methodologies, while others are no longer updated or accessible. Each database homogenize its datasets, giving rise to heterogeneity across databases. Such heterogeneity mainly encompasses different names for a gene and different network representations, generating duplicated interactions that could bias network analyses. Abasy (Across-bacteria systems) Atlas consolidates information from different sources into meta-curated regulatory networks in bacteria. The high-quality networks in Abasy Atlas enable cross-organisms analyses, such as benchmarking studies where gold standards are required. Nevertheless, network incompleteness still casts doubts on the conclusions of network analyses, and available sampling methods cannot reflect the curation process. To tackle this problem, the updated version of Abasy Atlas presented in this work provides historical snapshots of regulatory networks. Thus, network analyses can be performed at different completeness levels, making possible to identify potential bias and to predict future results. We leverage the recently found constraint in the complexity of regulatory networks to develop a novel model to quantify the total number of regulatory interactions as a function of the genome size. This completeness estimation is a valuable insight that may aid in the daunting task of network curation, prediction, and validation. The new version of Abasy Atlas provides 76 networks (204,282 regulatory interactions) covering 42 bacteria (64% Gram-positive and 36% Gram-negative) distributed in 9 species (Mycobacterium tuberculosis, Bacillus subtilis, Escherichia coli, Corynebacterium glutamicum, Staphylococcus aureus, Pseudomonas aeruginosa, Streptococcus pyogenes, Streptococcus pneumoniae, and Streptomyces coelicolor), containing 8459 regulons and 4335 modules. Database URL: https://abasy.ccg.unam.mx/.",2020-05-16 +31081021,Another look at matrix correlations.,"

Motivation

High throughput technologies are widely employed in modern biomedical research. They yield measurements of a large number of biomolecules in a single experiment. The number of experiments usually is much smaller than the number of measurements in each experiment. The simultaneous measurements of biomolecules provide a basis for a comprehensive, systems view for describing relevant biological processes. Often it is necessary to determine correlations between the data matrices under different conditions or pathways. However, the techniques for analyzing the data with a low number of samples for possible correlations within or between conditions are still in development. Earlier developed correlative measures, such as the RV coefficient, use the trace of the product of data matrices as the most relevant characteristic. However, a recent study has shown that the RV coefficient consistently overestimates the correlations in the case of low sample numbers. To correct for this bias, it was suggested to discard the diagonal elements of the outer products of each data matrix. In this work, a principled approach based on the matrix decomposition generates three trace-independent parts for every matrix. These components are unique, and they are used to determine different aspects of correlations between the original datasets.

Results

Simulations show that the decomposition results in the removal of high correlation bias and the dependence on the sample number intrinsic to the RV coefficient. We then use the correlations to analyze a real proteomics dataset.

Availability and implementation

The python code can be downloaded from http://dynamic-proteome.utmb.edu/MatrixCorrelations.aspx.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +33937698,Are we missing the mark? Relationships of psychosocial issues to outcomes after injury: A review of OTA annual meeting presentations.,"

Objectives

To observe the availability of information about social, emotional, and psychological factors in abstracts presented at the Orthopaedic Trauma Association (OTA) annual meeting.

Data source

OTA website (https://ota.org/education/meetings-and-courses/meeting-archive/).

Study selection

All abstracts selected for paper or poster presentation at the 2016 through 2018 OTA annual meetings, as published in the final program. Studies were included if they sought to measure mental illness, substance use or abuse, pain, or other psychosocial issues. If studies utilized 1 or more patient-reported outcome measures (PROMs), they were also included.

Data extraction

For each abstract meeting inclusion criterion, studies were assessed for interventions intended to improve outcomes in any of the listed psychosocial domains.

Data synthesis/results

Nine hundred forty-two abstracts were evaluated over a 3-year period. Of these, 294 (31.2%) met inclusion criteria. Twenty-five abstracts (8.5% of 294) reported mental illness, with depression (n = 14), anxiety (n = 9), and posttraumatic stress disorder (n = 5) being the most common. Eighty-eight abstracts (29.9% of 294) reported substance-use of tobacco, alcohol, narcotics, and/or recreational drugs. Tobacco-use was most prevalent (n = 59), followed by opioid-use (n = 31). Ten abstracts reported substance abuse. Pain was measured in 95 abstracts, and 203 abstracts utilized PROMs. Thirty-five abstracts found that these psychosocial elements significantly impacted outcomes or complications. Many abstracts did not assess the influence of these factors on clinical outcomes (n = 99). Sixteen studies described an intervention aimed at mitigating these features.

Conclusions

This study illustrates limited attention to the impact of psychological, social, and environmental factors on outcomes after orthopaedic trauma. Substance-abuse problems and mental health concerns are not only predictors of poor clinical and PROMs of pain and quality of life after injury, but have also been implicated in subsequent recidivism. Only 3% of 942 abstracts observed mental health and 1% reported substance-abuse. Moving forward, greater understanding of psychosocial issues may enhance interventions to impact long-term outcomes.",2020-04-23 +31691815,The reactome pathway knowledgebase.,"The Reactome Knowledgebase (https://reactome.org) provides molecular details of signal transduction, transport, DNA replication, metabolism and other cellular processes as an ordered network of molecular transformations in a single consistent data model, an extended version of a classic metabolic map. Reactome functions both as an archive of biological processes and as a tool for discovering functional relationships in data such as gene expression profiles or somatic mutation catalogs from tumor cells. To extend our ability to annotate human disease processes, we have implemented a new drug class and have used it initially to annotate drugs relevant to cardiovascular disease. Our annotation model depends on external domain experts to identify new areas for annotation and to review new content. New web pages facilitate recruitment of community experts and allow those who have contributed to Reactome to identify their contributions and link them to their ORCID records. To improve visualization of our content, we have implemented a new tool to automatically lay out the components of individual reactions with multiple options for downloading the reaction diagrams and associated data, and a new display of our event hierarchy that will facilitate visual interpretation of pathway analysis results.",2020-01-01 +29617954,AGORA: organellar genome annotation from the amino acid and nucleotide references.,"

Summary

Next-generation sequencing (NGS) technologies have led to the accumulation of high-throughput sequence data from various organisms in biology. To apply gene annotation of organellar genomes for various organisms, more optimized tools for functional gene annotation are required. Almost all gene annotation tools are mainly focused on the chloroplast genome of land plants or the mitochondrial genome of animals. We have developed a web application AGORA for the fast, user-friendly and improved annotations of organellar genomes. Annotator for Genes of Organelle from the Reference sequence Analysis (AGORA) annotates genes based on a basic local alignment search tool (BLAST)-based homology search and clustering with selected reference sequences from the NCBI database or user-defined uploaded data. AGORA can annotate the functional genes in almost all mitochondrion and plastid genomes of eukaryotes. The gene annotation of a genome with an exon-intron structure within a gene or inverted repeat region is also available. It provides information of start and end positions of each gene, BLAST results compared with the reference sequence and visualization of gene map by OGDRAW.

Availability and implementation

Users can freely use the software, and the accessible URL is https://bigdata.dongguk.edu/gene_project/AGORA/. The main module of the tool is implemented by the python and php, and the web page is built by the HTML and CSS to support all browsers.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-08-01 +26516187,SynLethDB: synthetic lethality database toward discovery of selective and sensitive anticancer drug targets.,"Synthetic lethality (SL) is a type of genetic interaction between two genes such that simultaneous perturbations of the two genes result in cell death or a dramatic decrease of cell viability, while a perturbation of either gene alone is not lethal. SL reflects the biologically endogenous difference between cancer cells and normal cells, and thus the inhibition of SL partners of genes with cancer-specific mutations could selectively kill cancer cells but spare normal cells. Therefore, SL is emerging as a promising anticancer strategy that could potentially overcome the drawbacks of traditional chemotherapies by reducing severe side effects. Researchers have developed experimental technologies and computational prediction methods to identify SL gene pairs on human and a few model species. However, there has not been a comprehensive database dedicated to collecting SL pairs and related knowledge. In this paper, we propose a comprehensive database, SynLethDB (http://histone.sce.ntu.edu.sg/SynLethDB/), which contains SL pairs collected from biochemical assays, other related databases, computational predictions and text mining results on human and four model species, i.e. mouse, fruit fly, worm and yeast. For each SL pair, a confidence score was calculated by integrating individual scores derived from different evidence sources. We also developed a statistical analysis module to estimate the druggability and sensitivity of cancer cells upon drug treatments targeting human SL partners, based on large-scale genomic data, gene expression profiles and drug sensitivity profiles on more than 1000 cancer cell lines. To help users access and mine the wealth of the data, we developed other practical functionalities, such as search and filtering, orthology search, gene set enrichment analysis. Furthermore, a user-friendly web interface has been implemented to facilitate data analysis and interpretation. With the integrated data sets and analytics functionalities, SynLethDB would be a useful resource for biomedical research community and pharmaceutical industry.",2015-10-29 +33656359,First Report of the Rust Disease Caused by Puccinia crepidis-japonicae on New Host Sonchus arvensis from Pakistan. ,"Sonchus arvensis (Asteraceae) is a traditional medicinal herb. The aerial parts are a rich source of vitamins, essential amino acids, and minerals, which may help in treatment of fever, inflammation, detoxication, and blood circulation (Li et al. 2018). In October 2018, typical rust symptoms were observed on S. arvensis leaves and stems in Buner district, Malakand division, Khyber PakhtunKhwa Province, Pakistan (34.39°N; 72.61°E). Almost 40% of leaves and stems of five S. arvensis plants displayed severe rust. The specimens were examined microscopically and compared with type specimen description in published literature (Dietel 1908; Hiratsuka et al. 1992). The fungus was identified as Puccinia crepidis-japonicae based on the characteristic of spore morphology and phylogenetic study based on the internal transcribed spacer (ITS) and large subunit (LSU) sequence data. Uredinia were amphigenous, rounded, or somewhat elliptical, naked, small patches, equally spread, brown. Urediniospores (n=30) were globose to ellipsoid, brownish yellow and measured 22.4-24.7 × 20.2-22.1 µm. Urediniospore walls were brownish orange and finely echinulate and 1.7 to 2.1 µm thick with 2 to 3 germ pores. Telia were amphigenous, rounded or elliptic, scattered, dark brown to blackish. Teliospores (n=30) were ellipsoid, subglobose or long ellipsoid, rounded at both ends, not thickened at apex, warted, reddish brown, and measured 31.3-39 × 24.6-26.8 µm. Teliospore walls were reddish-black and about 1.5-2.5 µm thick, and the pedicles were short, hyaline, fragile, become tapered toward apex, and measured 14.4-18.7 × 4.7-9 µm. DNA was extracted from urediniospores, and the combined region of ITS and LSU (28S) were amplified using Rust2Inv (forward primer) and LR6 (Reverse primer) according to the protocol outlined by Aime (2006). A BLASTn search (http://www.ncbi.nlm.nih.gov) showed that the combined ITS and LSU region shared 99% identity (792/804 bp) to the P. crepidis-japonicae accessions (KY798395 from Hawaii, USA) with 100% query cover. The resulting sequence was deposited in GenBank (Accession No. MN093335). Both morphological and molecular characteristics indicatethat this species was P. crepidis-japonicae. To test pathogenicity and fulfill the Koch's postulates, a urediniospore suspension (1 × 104 spores/ml) was sprayed on three 6-week-old plants of S. arvensis, and one as negative control, incubated at 22-24°C. Uredinia were observed on the leaves after 10 days of inoculation, whereas the control plants remained symptomless. Microscopic examination confirmed that the symptoms on plants obtained from the field and greenhouse inoculations were morphologically identical. This fungus has been observed previously on Crepis japonica in China, Hong Kong, Japan, Korea, and Taiwan, on Prenanthes spp. in China and on Youngia tenuifolia and Y. fusca in China (Farr and Rossman 2021). Pereira et al. (2002) suggested that P. crepidis-japonicae may play a significant role as a biocontrol agent against its weed host. To the best of our knowledge, there are no other reports of this fungus on any other hosts in Pakistan. The specimen has been vouchered in LAH Herbarium, Department of Botany, University of the Punjab, Lahore, Pakistan (LAH36343). This is the first report of P. crepidis-japonicae on S. arvensis as a new host from Pakistan.",2021-03-03 +29501069,Creation of a national emergency medicine enhanced competency directory for residency training.,"Canadian emergency medicine Royal College residency training allows for pursuing extra training in enhanced competency areas. A wealth of enhanced competency training opportunities exist nationally. However, the search for the right fit is a challenging one because there is no centralized resource that catalogues all of these opportunities. A working group of the Canadian Association of Emergency Physicians (CAEP) Resident Section was assembled in 2016 to create a freely accessible and comprehensive directory of Canadian enhanced competency areas. The working group used stakeholder surveys (of residents, recent graduates, and faculty members), social media engagement, and program website searches. Information was collated into the first edition of a national enhanced competency directory, which is available at no cost at http://caep.ca/sites/caep.ca/files/enhancedcompdoc.pdf. Limitations include the scope defined by the working group and survey responses. A biannual update is also incorporated into the CAEP Resident Section portfolio to ensure it remains up-to-date.",2018-03-04 +26989154,"CSCdb: a cancer stem cells portal for markers, related genes and functional information. ","Cancer stem cells (CSCs), which have the ability to self-renew and differentiate into various tumor cell types, are a special class of tumor cells. Characterizing the genes involved in CSCs regulation is fundamental to understand the mechanisms underlying the biological process and develop treatment methods for tumor therapy. Recently, much effort has been expended in the study of CSCs and a large amount of data has been generated. However, to the best of our knowledge, database dedicated to CSCs is not available until now. We have thus developed a CSCs database (CSCdb), which includes marker genes, CSCs-related genes/microRNAs and functional annotations. The information in the CSCdb was manual collected from about 13 000 articles. The CSCdb provides detailed information of 1769 genes that have been reported to participate in the functional regulation of CSCs and 74 marker genes that can be used for identification or isolation of CSCs. The CSCdb also provides 9475 annotations about 13 CSCs-related functions, such as oncogenesis, radio resistance, tumorigenesis, differentiation, etc. Annotations of the identified genes, which include protein function description, post-transcription modification information, related literature, Gene Ontology (GO), protein-protein interaction (PPI) information and regulatory relationships, are integrated into the CSCdb to help users get information more easily. CSCdb provides a comprehensive resource for CSCs research work, which would assist in finding new CSCs-related genes and would be a useful tool for biologists. Database URL: http://bioinformatics.ustc.edu.cn/cscdb.",2016-03-17 +30601938,MetFlow: an interactive and integrated workflow for metabolomics data cleaning and differential metabolite discovery.,"

Summary

Mass spectrometry-based metabolomics aims to profile the metabolic changes in biological systems and identify differential metabolites related to physiological phenotypes and aberrant activities. However, many confounding factors during data acquisition complicate metabolomics data, which is characterized by high dimensionality, uncertain degrees of missing and zero values, nonlinearity, unwanted variations and non-normality. Therefore, prior to differential metabolite discovery analysis, various types of data cleaning such as batch alignment, missing value imputation, data normalization and scaling are essentially required for data post-processing. Here, we developed an interactive web server, namely, MetFlow, to provide an integrated and comprehensive workflow for metabolomics data cleaning and differential metabolite discovery.

Availability and implementation

The MetFlow is freely available on http://metflow.zhulab.cn/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +32901488,Lipidomic Biomarkers of Extracellular Vesicles for the Prediction of Preterm Birth in the Early Second Trimester.,"Preterm birth is the leading cause of infant death worldwide and results in a high societal economic burden associated with newborn care. Recent studies have shown that extracellular vesicles (EVs) play an important role in fetal development during pregnancy. Lipids in EVs related to preterm birth remain undefined. Here, we fully investigated differences in lipids in plasma, microvesicles (MVs), and exosomes (Exos) between 27 preterm and 66 full-term pregnant women in the early second trimester (12-24 weeks) using an untargeted lipidomics approach. Independent of other characteristics of samples, we detected 97, 58, and 10 differential features (retention time (RT) and m/z) with identification in plasma, MVs, and Exos, respectively. A panel of five lipids from MVs has an area under the receiver operating characteristic curve (AUC) of 0.87 for the prediction of preterm birth. One lipid of the panel (PS (34:0)) was validated in an additional 83 plasma samples (41 preterm and 42 full-term deliveries) by the pseudotargeted lipidomics method (AUC = 0.71). Our results provide useful information about the early prediction of preterm birth, as well as a better understanding of the underlying mechanisms and intervention of preterm birth. The MS data have been deposited in the CNSA (https://db.cngb.org/cnsa/) of CNGBdb with accession code CNP0001076.",2020-09-22 +30873526,onlineFDR: an R package to control the false discovery rate for growing data repositories.,"

Summary

In many areas of biological research, hypotheses are tested in a sequential manner, without having access to future P-values or even the number of hypotheses to be tested. A key setting where this online hypothesis testing occurs is in the context of publicly available data repositories, where the family of hypotheses to be tested is continually growing as new data is accumulated over time. Recently, Javanmard and Montanari proposed the first procedures that control the FDR for online hypothesis testing. We present an R package, onlineFDR, which implements these procedures and provides wrapper functions to apply them to a historic dataset or a growing data repository.

Availability and implementation

The R package is freely available through Bioconductor (http://www.bioconductor.org/packages/onlineFDR).

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +32413019,"Evaluation of the National Sexually Transmitted Disease Curriculum: Reach, Utilization, and Engagement.","

Background

With increasing rates of sexually transmitted infections in the United States, there is a critical need to educate health professionals on the prevention, diagnosis, and treatment of sexually transmitted infections. The National Sexually Transmitted Disease Curriculum (NSTDC, https://www.std.uw.edu) is a free, online curriculum, funded by the Centers for Disease Control and Prevention. The purpose of this article is to evaluate the reach, utilization, and engagement of users with the curriculum.

Methods

Data on NSTDC utilization was collected for 24 months after the February 1, 2017 launch. For all users, Google Analytics was used to determine total number of users, geographic location, age and sex, and average session duration. For registered users, additional data analysis included work-role, demographics, and completion of self-study modules, check-on-learning questions, and question banks. User satisfaction was measured on a 5-point Likert scale.

Results

During the evaluation period, 136,270 individual users accessed the NSTDC, including 24,652 registered users. Among all registered users, 10,660 (43.2%) were registered nurses, 2810 (11.4%) physicians, 4942 (20.1%) Advanced Practice Nurses and Physician Assistants, and 6213 (25.2%) nonclinicians. Among registered users, 18,533 (75.2%) completed at least 1 module, 7898 (32.0%) completed all 7 modules, and 19,804 (80.4%) answered optional check-on-learning questions. Median satisfaction with the content was (5) very satisfied (interquartile range, 4-5).

Conclusions

The NSTDC is a free, guideline-based, online curriculum with novel dual functionality that has achieved extensive reach with a broad array of health professionals who engage deeply with the material. The wide usage of NSTDC demonstrates the need for high-quality, unbiased, free content in user-focused formats.",2020-06-01 +32422927,HDVdb: A Comprehensive Hepatitis D Virus Database. ,"Hepatitis D virus (HDV) causes the most severe form of viral hepatitis, which may rapidly progress to liver cirrhosis and hepatocellular carcinoma (HCC). It has been estimated that 15-20 million people worldwide are suffering from the chronic HDV infection. Currently, no effective therapies are available to treat acute or chronic HDV infection. The remarkable sequence variability of the HDV genome, particularly within the hypervariable region has resulted in the provisional classification of eight major genotypes and various subtypes. We have developed a specialized database, HDVdb (http://hdvdb.bio.wzw.tum.de/), which contains a collection of partial and complete HDV genomic sequences obtained from the GenBank and from our own patient cohort. HDVdb enables the researchers to investigate the genetic variability of all available HDV sequences, correlation of genotypes to epidemiology and pathogenesis. Additionally, it will contribute in understanding the drug resistant mutations and develop effective vaccines against HDV infection. The database can be accessed through a web interface that allows for static and dynamic queries and offers integrated generic and specialized sequence analysis tools, such as annotation, genotyping, primer prediction, and phylogenetic analyses.",2020-05-14 +32270255,Major depressive disorder and cardiometabolic diseases: a bidirectional Mendelian randomisation study.,"

Aims/hypothesis

Observational studies have shown a bidirectional association between major depressive disorder (MDD) and cardiometabolic diseases. We conducted a two-sample bidirectional Mendelian randomisation (MR) study to assess the causal associations of MDD with type 2 diabetes, coronary artery disease (CAD) and heart failure and vice versa.

Methods

We extracted summary-level data for MDD, type 2 diabetes, CAD and heart failure from corresponding published large genome-wide association studies of individuals mainly of European-descent. In total, 96 SNPs for MDD, 202 SNPs for type 2 diabetes, 44 SNPs for CAD and 12 SNPs for heart failure were proposed as instrumental variables at the genome-wide significance level (p < 5 × 10-8). The random-effects inverse-variance weighted method was used for the main analyses.

Results

Genetic liability to MDD was significantly associated with type 2 diabetes and CAD at the Bonferroni-corrected significance level. The ORs of type 2 diabetes and CAD were respectively 1.26 (95% CI 1.10, 1.43; p = 6 × 10-4) and 1.16 (95% CI 1.05, 1.29; p = 0.0047) per one-unit increase in loge odds of MDD. There was a suggestive association between MDD and heart failure (OR 1.11 [95% CI 1.01, 1.21]; p = 0.033). We found limited evidence supporting causal effects of cardiometabolic diseases on MDD risk in the reverse MR analyses.

Conclusions/interpretation

The present study strengthened the evidence that MDD is a potential risk factor for type 2 diabetes and CAD. Whether MDD is causally related to heart failure needs further study.

Data availability

All data included in this study were uploaded as supplements and are also publicly available through published GWASs and open GWAS datasets (UK Biobank, 23andMe and Psychiatric Genomics: https://datashare.is.ed.ac.uk/handle/10283/3203; DIAGRAM: http://diagram-consortium.org/downloads.html; CARDIoGRAMplusCD4: www.cardiogramplusc4d.org/; HERMES: http://www.kp4cd.org/datasets/mi). Graphical abstract.",2020-04-08 +31304958,Identifying emerging phenomenon in long temporal phenotyping experiments.,"

Motivation

The rapid improvement of phenotyping capability, accuracy and throughput have greatly increased the volume and diversity of phenomics data. A remaining challenge is an efficient way to identify phenotypic patterns to improve our understanding of the quantitative variation of complex phenotypes, and to attribute gene functions. To address this challenge, we developed a new algorithm to identify emerging phenomena from large-scale temporal plant phenotyping experiments. An emerging phenomenon is defined as a group of genotypes who exhibit a coherent phenotype pattern during a relatively short time. Emerging phenomena are highly transient and diverse, and are dependent in complex ways on both environmental conditions and development. Identifying emerging phenomena may help biologists to examine potential relationships among phenotypes and genotypes in a genetically diverse population and to associate such relationships with the change of environments or development.

Results

We present an emerging phenomenon identification tool called Temporal Emerging Phenomenon Finder (TEP-Finder). Using large-scale longitudinal phenomics data as input, TEP-Finder first encodes the complicated phenotypic patterns into a dynamic phenotype network. Then, emerging phenomena in different temporal scales are identified from dynamic phenotype network using a maximal clique based approach. Meanwhile, a directed acyclic network of emerging phenomena is composed to model the relationships among the emerging phenomena. The experiment that compares TEP-Finder with two state-of-art algorithms shows that the emerging phenomena identified by TEP-Finder are more functionally specific, robust and biologically significant.

Availability and implementation

The source code, manual and sample data of TEP-Finder are all available at: http://phenomics.uky.edu/TEP-Finder/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +32920048,iDRBP_MMC: Identifying DNA-Binding Proteins and RNA-Binding Proteins Based on Multi-Label Learning Model and Motif-Based Convolutional Neural Network.,"DNA-binding protein (DBP) and RNA-binding protein (RBP) are playing crucial roles in gene expression. Accurate identification of them is of great significance, and accurately computational predictors are highly required. In previous studies, DBP recognition and RBP recognition were treated as two separate tasks. Because the functional and structural similarities between DBPs and RBPs are high, the DBP predictors tend to predict RBPs as DBPs, while the RBP predictors tend to predict the DBPs as the RBPs, leading to high cross-prediction rate and low prediction precision. Here we introduced a multi-label learning model based on the motif-based convolutional neural network, and a sequence-based computational method called iDRBP_MMC was proposed to solve the cross-prediction problem so as to improve the predictive performance of DBPs and RBPs. The results on four test datasets showed that it outperformed other state-of-the-art DBP predictors and RBP predictors. When applied to analyze the tomato genome, the results reveal the ability of iDRBP_MMC for large-scale data analysis. Moreover, iDRBP_MMC can identify the proteins binding to both DNA and RNA, which is beyond the scope of existing DBP predictors or RBP predictors. The web-server of iDRBP_MMC is freely available at http://bliulab.net/iDRBP_MMC.",2020-09-11 +30805645,PKAD: a database of experimentally measured pKa values of ionizable groups in proteins. ,"Ionizable residues play key roles in many biological phenomena including protein folding, enzyme catalysis and binding. We present PKAD, a database of experimentally measured pKas of protein residues reported in the literature or taken from existing databases. The database contains pKa data for 1350 residues in 157 wild-type proteins and for 232 residues in 45 mutant proteins. Most of these values are for Asp, Glu, His and Lys amino acids. The database is available as downloadable file as well as a web server (http://compbio.clemson.edu/pkad). The PKAD database can be used as a benchmarking source for development and improvement of pKa's prediction methods. The web server provides additional information taken from the corresponding structures and amino acid sequences, which allows for easy search and grouping of the experimental pKas according to various biophysical characteristics, amino acid type and others.",2019-01-01 +29385404,TOXsIgN: a cross-species repository for toxicogenomic signatures.,"Motivation:At the same time that toxicologists express increasing concern about reproducibility in this field, the development of dedicated databases has already smoothed the path toward improving the storage and exchange of raw toxicogenomic data. Nevertheless, none provides access to analyzed and interpreted data as originally reported in scientific publications. Given the increasing demand for access to this information, we developed TOXsIgN, a repository for TOXicogenomic sIgNatures. Results:The TOXsIgN repository provides a flexible environment that facilitates online submission, storage and retrieval of toxicogenomic signatures by the scientific community. It currently hosts 754 projects that describe more than 450 distinct chemicals and their 8491 associated signatures. It also provides users with a working environment containing a powerful search engine as well as bioinformatics/biostatistics modules that enable signature comparisons or enrichment analyses. Availability and implementation:The TOXsIgN repository is freely accessible at http://toxsign.genouest.org. Website implemented in Python, JavaScript and MongoDB, with all major browsers supported. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +31612738,A new polyketide from the bark of Hypericum roeperianum Schimp. (Hypericaceae).,"The isolation and spectroscopic data of a hitherto undescribed polyketide (1) from Hypericum roeperianum Schimp. (Hypericaceae) together with six known compounds (2-7) is herein reported. The structure elucidation is based on extensive 1D- and 2D-NMR, infrared, UV and MS experiments. The structures of the known compounds were confirmed by comparison of their spectroscopic data with those of similar reported compounds in the literature. Some of the isolated compounds had a significant activity against a panel of multidrug-resistant bacterial strains.Supplemental data for this article can be accessed at https://doi.org/10.1080/14786419.2019.1677655.",2019-10-15 +30297783,Personalized Integrated Network Modeling of the Cancer Proteome Atlas.,"Personalized (patient-specific) approaches have recently emerged with a precision medicine paradigm that acknowledges the fact that molecular pathway structures and activity might be considerably different within and across tumors. The functional cancer genome and proteome provide rich sources of information to identify patient-specific variations in signaling pathways and activities within and across tumors; however, current analytic methods lack the ability to exploit the diverse and multi-layered architecture of these complex biological networks. We assessed pan-cancer pathway activities for >7700 patients across 32 tumor types from The Cancer Proteome Atlas by developing a personalized cancer-specific integrated network estimation (PRECISE) model. PRECISE is a general Bayesian framework for integrating existing interaction databases, data-driven de novo causal structures, and upstream molecular profiling data to estimate cancer-specific integrated networks, infer patient-specific networks and elicit interpretable pathway-level signatures. PRECISE-based pathway signatures, can delineate pan-cancer commonalities and differences in proteomic network biology within and across tumors, demonstrates robust tumor stratification that is both biologically and clinically informative and superior prognostic power compared to existing approaches. Towards establishing the translational relevance of the functional proteome in research and clinical settings, we provide an online, publicly available, comprehensive database and visualization repository of our findings ( https://mjha.shinyapps.io/PRECISE/ ).",2018-10-08 +27577567,DDRprot: a database of DNA damage response-related proteins. ,"The DNA Damage Response (DDR) signalling network is an essential system that protects the genome's integrity. The DDRprot database presented here is a resource that integrates manually curated information on the human DDR network and its sub-pathways. For each particular DDR protein, we present detailed information about its function. If involved in post-translational modifications (PTMs) with each other, we depict the position of the modified residue/s in the three-dimensional structures, when resolved structures are available for the proteins. All this information is linked to the original publication from where it was obtained. Phylogenetic information is also shown, including time of emergence and conservation across 47 selected species, family trees and sequence alignments of homologues. The DDRprot database can be queried by different criteria: pathways, species, evolutionary age or involvement in (PTM). Sequence searches using hidden Markov models can be also used.Database URL: http://ddr.cbbio.es.",2016-08-29 +31657565,LFQ-Analyst: An Easy-To-Use Interactive Web Platform To Analyze and Visualize Label-Free Proteomics Data Preprocessed with MaxQuant.,"Relative label-free quantification (LFQ) of shotgun proteomics data using precursor (MS1) signal intensities is one of the most commonly used applications to comprehensively and globally quantify proteins across biological samples and conditions. Due to the popularity of this technique, several software packages, such as the popular software suite MaxQuant, have been developed to extract, analyze, and compare spectral features and to report quantitative information of peptides, proteins, and even post-translationally modified sites. However, there is still a lack of accessible tools for the interpretation and downstream statistical analysis of these complex data sets, in particular for researchers and biologists with no or only limited experience in proteomics, bioinformatics, and statistics. We have therefore created LFQ-Analyst, which is an easy-to-use, interactive web application developed to perform differential expression analysis with ""one click"" and to visualize label-free quantitative proteomic data sets preprocessed with MaxQuant. LFQ-Analyst provides a wealth of user-analytic features and offers numerous publication-quality result graphics to facilitate statistical and exploratory analysis of label-free quantitative data sets. LFQ-Analyst, including an in-depth user manual, is freely available at https://bioinformatics.erc.monash.edu/apps/LFQ-Analyst .",2019-11-08 +25934803,PUG-SOAP and PUG-REST: web services for programmatic access to chemical information in PubChem.,"PubChem (http://pubchem.ncbi.nlm.nih.gov) is a public repository for information on chemical substances and their biological activities, developed and maintained by the US National Institutes of Health (NIH). PubChem contains more than 180 million depositor-provided chemical substance descriptions, 60 million unique chemical structures and 225 million bioactivity assay results, covering more than 9000 unique protein target sequences. As an information resource for the chemical biology research community, it routinely receives more than 1 million requests per day from an estimated more than 1 million unique users per month. Programmatic access to this vast amount of data is provided by several different systems, including the US National Center for Biotechnology Information (NCBI)'s Entrez Utilities (E-Utilities or E-Utils) and the PubChem Power User Gateway (PUG)-a common gateway interface (CGI) that exchanges data through eXtended Markup Language (XML). Further simplifying programmatic access, PubChem provides two additional general purpose web services: PUG-SOAP, which uses the simple object access protocol (SOAP) and PUG-REST, which is a Representational State Transfer (REST)-style interface. These interfaces can be harnessed in combination to access the data contained in PubChem, which is integrated with the more than thirty databases available within the NCBI Entrez system.",2015-04-30 +31591416,Democratized image analytics by visual programming through integration of deep models and small-scale machine learning.,"Analysis of biomedical images requires computational expertize that are uncommon among biomedical scientists. Deep learning approaches for image analysis provide an opportunity to develop user-friendly tools for exploratory data analysis. Here, we use the visual programming toolbox Orange ( http://orange.biolab.si ) to simplify image analysis by integrating deep-learning embedding, machine learning procedures, and data visualization. Orange supports the construction of data analysis workflows by assembling components for data preprocessing, visualization, and modeling. We equipped Orange with components that use pre-trained deep convolutional networks to profile images with vectors of features. These vectors are used in image clustering and classification in a framework that enables mining of image sets for both novel and experienced users. We demonstrate the utility of the tool in image analysis of progenitor cells in mouse bone healing, identification of developmental competence in mouse oocytes, subcellular protein localization in yeast, and developmental morphology of social amoebae.",2019-10-07 +32910485,Histological observation of the reproductive system in a viviparous teleost Xenotoca eiseni Rutter 1896 (Cyprinodontiformes: Goodeidae).,"Xenotoca eiseni is a viviparous teleost belonging to the family Goodeidae. Here, we report histological observations of the reproductive organs in an adult male, an adult female, a pregnant female with intraovarian embryo and an extracted embryo of X. eiseni. High-resolution images of haematoxylin-eosin-stained sagittal sections revealed the detailed structure of gonads, gametes and reproductive components of the mother-embryo relationship. In the male, mature spermatozoa in the epididymis formed sperm packages. In the female, oogenesis proceeded asynchronously in the ovarian wall, and various stages of oocytes were observed in single ovary. In both sexes, genital openings were located between the anus and anal fin. Developing embryos were observed in an ovary of the pregnant female. Fine structures of components of the mother-to-embryo nutrient supply, ovarian septum and trophotaenia were observed in the pregnant ovary. An immature gonad prior to gamete formation was identified in the extracted embryo. With the aim of supporting the development and extension of studies on this viviparous teleost, we have shared our histological images as raw data in an open online archive, the 'NAGOYA repository (http://hdl.handle.net/2237/00032456)'. Our goal is a comprehensive understanding of the viviparous system in fish using both histological observation and molecular biology methods including genomics and proteomics.",2020-09-10 +24563838,Research and teaching with the AFTOL SBD: an informatics resource for fungal subcellular and biochemical data.,"The Structural and Biochemical Database (SBD), developed as part of the US NSF-funded Assembling the Fungal Tree of Life (AFTOL), is a multi-investigator project. It is a major resource to present and manage morphological and biochemical information on Fungi and serves as a phyloinformatics tool for the scientific community. It also is an important resource for teaching mycology. The database, available at http://aftol.umn.edu, includes new and previously published subcellular data on Fungi, supplemented with images and literature links. Datasets automatically combined in NEXUS format from the site permit independent and combined (with molecular data) phylogenetic analyses. Character lists, a major feature of the site, serve as primary reference documents of subcellular and biochemical characters that distinguish taxa across the major fungal lineages. The character lists illustrated with images and drawings are informative for evolutionary and developmental biologists as well as educators, students and the public. Fungal Subcellular Ontology (FSO), developed as part of this effort is a primary initiative to provide a controlled vocabulary describing subcellular structures unique to Fungi. FSO establishes a full complement of terms that provide an operating ontological framework for the database. Examples are provided for using the database for teaching.",2013-11-29 +33228632,A random forest based biomarker discovery and power analysis framework for diagnostics research.,"

Background

Biomarker identification is one of the major and important goal of functional genomics and translational medicine studies. Large scale -omics data are increasingly being accumulated and can provide vital means for the identification of biomarkers for the early diagnosis of complex disease and/or for advanced patient/diseases stratification. These tasks are clearly interlinked, and it is essential that an unbiased and stable methodology is applied in order to address them. Although, recently, many, primarily machine learning based, biomarker identification approaches have been developed, the exploration of potential associations between biomarker identification and the design of future experiments remains a challenge.

Methods

In this study, using both simulated and published experimentally derived datasets, we assessed the performance of several state-of-the-art Random Forest (RF) based decision approaches, namely the Boruta method, the permutation based feature selection without correction method, the permutation based feature selection with correction method, and the backward elimination based feature selection method. Moreover, we conducted a power analysis to estimate the number of samples required for potential future studies.

Results

We present a number of different RF based stable feature selection methods and compare their performances using simulated, as well as published, experimentally derived, datasets. Across all of the scenarios considered, we found the Boruta method to be the most stable methodology, whilst the Permutation (Raw) approach offered the largest number of relevant features, when allowed to stabilise over a number of iterations. Finally, we developed and made available a web interface ( https://joelarkman.shinyapps.io/PowerTools/ ) to streamline power calculations thereby aiding the design of potential future studies within a translational medicine context.

Conclusions

We developed a RF-based biomarker discovery framework and provide a web interface for our framework, termed PowerTools, that caters the design of appropriate and cost-effective subsequent future omics study.",2020-11-23 +32338561,RadAtlas 1.0: a knowledgebase focusing on radiation-associated genes.,"Purpose: Ionizing radiation has very complex biological effects, such as inducing damage to DNA and proteins, ionizing water molecules to produce toxic free radicals, and triggering genetic and somatic effects. Understanding the biomolecular response mechanism of radiation is very important for the prevention and treatment of radiation diseases. However, function information of these radiation-associated genes is hidden in numbers of scientific papers and databases, making it difficult to understand the response mechanism of ionizing radiation.Materials and methods: We collected radiation-associated genes by literature and database mining. Literature and database mining was performed on the basis of biomedical literature from PubMed and gene expression datasets from GEO respectively.Results: We built an ionizing radiation related knowledgebase RadAtlas 1.0 (http://biokb.ncpsb.org/radatlas), which contains 598 radiation-associated genes compiled from literature mining, and 611 potential radiation-associated genes collected from gene expression datasets by differential gene expression analysis. We also provide a user-friendly web interface that offers multiple search methods.Conclusions: RadAtlas collected a large amount of information about genes, biological processes, and pathways related to ionizing radiation. It is the first attempt to provide a comprehensive catalog of radiation-associated genes with literature evidence and potential radiation-associated genes with differential expression evidence. We believe that RadAtlas would be a helpful tool to understand the response mechanism to ionizing radiation.",2020-05-12 +32410107,An Immune-Related lncRNA Signature to Predict Survival In Glioma Patients.,"Glioma is the most common and fatal primary brain tumor in human. Long non-coding RNA (lncRNA), which are characterized by regulation of gene expression and chromatin recombination play an important role in glioma, and immunotherapy is a promising cancer treatment. Therefore, it is necessary to identify Immune-related lncRNAs in glioma. In this study,we collected and evaluated the RNA-seq data of The Cancer Genome Atlas (TCGA, https://www.ncbi.nlm.nih.gov/ ) and Chinese Glioma Genome Atlas (CGGA, https://www.cgga.org.cn/ ) glioma patients and immune-related lncRNAs were screened. Cox regression and LASSO analysis were performed to construct a risk score formula to explor the different overall survival between high- and low-risk groups in TCGA and verified with CGGA. Gene ontology (GO) and pathway-enrichment analysis (KEGG) were performed to identify the function of screened genes. Co-expression network were performed of these genes for further analysis. Eleven immune-related lncRNAs were concerned to be involved in survival and adopted to construct the risk score formula. Patients with high-risk score held poor survival both in TCGA and CGGA. Compared with current clinical data, the Area Under Curve (AUC) of different years and Principal components analysis (PCA) suggested that the formula had better predictive power. Functional Annotation of immune-related lncRNAs showed that the differences overall survival of high and low RS group might be caused by the cell differentiation, microtubule polymerization, etc. We successfully constructed an immune-related lncRNAs formula with powerful predictive function, which provides certain guidance value to the analysis of glioma pathogenesis and clinical treatment, and potential therapeutic targets for glioma treatment.",2020-05-14 +31867415,Cytochrome c aggregation: A dataset at and far from the isoelectric point.,"We present SEM, ThT fluorescence and circular dichroism (CD) data of amyloidogenic aggregates of cytochrome c (cyt c).This protein is of outmost relevance in many biochemical processes, such as respiratory chain in mitochondria and cells apoptosis. The present data focus on polymorphism of the protein aggregates obtained at the isoelectric point (IP) and by changing the environmental pH above and below the IP, the protein concentration and the base. The SEM images provide evidence for a large variety of structures, depending on the pH and on protein concentration: mature amyloid fibrils and overstructured platelets are distinguishable in the aggregates below IP, and relatively high cyt c concentration, whereas inhomogeneous amyloid formations are observed above it. At pH 10, i.e. close to IP, only characteristic protein particulates at the micrometric scale are observed. SEM and Fluorescence data have been acquired in dried drops of protein solution, prepared in different bases: TRIS-HCl, at the different pH values, or NaOH (pH 13). Along with this, at relatively low cyt c concentration compact layered structures are visible below the IP, though still made of a thin fibrils reticulate, whereas above the IP, also at low cyt c concentration, granulates structures are present, merging into compact layer, alongside with platelets and mature fibers. These areas are characterized by diffuse ThT-fluorescence and typical fibrils. The loss of the predominant alpha helix secondary structure was verified by CD spectra. Besides the intrinsic scientific relevance, this data collection provides a set of images useful for spectroscopists to discriminate among different morphologic protein formations and suggests pathways for the achievement of different kinds of cytochrome c aggregates. These data are add-ons of the paper published in the International Journal of Biomacromolecules, 138 (2019) 106-115, https://doi.org/10.1016/j.ijbiomac.2019.07.060.",2019-11-18 +25414335,ProteomeScout: a repository and analysis resource for post-translational modifications and proteins.,"ProteomeScout (https://proteomescout.wustl.edu) is a resource for the study of proteins and their post-translational modifications (PTMs) consisting of a database of PTMs, a repository for experimental data, an analysis suite for PTM experiments, and a tool for visualizing the relationships between complex protein annotations. The PTM database is a compendium of public PTM data, coupled with user-uploaded experimental data. ProteomeScout provides analysis tools for experimental datasets, including summary views and subset selection, which can identify relationships within subsets of data by testing for statistically significant enrichment of protein annotations. Protein annotations are incorporated in the ProteomeScout database from external resources and include terms such as Gene Ontology annotations, domains, secondary structure and non-synonymous polymorphisms. These annotations are available in the database download, in the analysis tools and in the protein viewer. The protein viewer allows for the simultaneous visualization of annotations in an interactive web graphic, which can be exported in Scalable Vector Graphics (SVG) format. Finally, quantitative data measurements associated with public experiments are also easily viewable within protein records, allowing researchers to see how PTMs change across different contexts. ProteomeScout should prove useful for protein researchers and should benefit the proteomics community by providing a stable repository for PTM experiments.",2014-11-20 +31654438,NAGbinder: An approach for identifying N-acetylglucosamine interacting residues of a protein from its primary sequence.,"N-acetylglucosamine (NAG) belongs to the eight essential saccharides that are required to maintain the optimal health and precise functioning of systems ranging from bacteria to human. In the present study, we have developed a method, NAGbinder, which predicts the NAG-interacting residues in a protein from its primary sequence information. We extracted 231 NAG-interacting nonredundant protein chains from Protein Data Bank, where no two sequences share more than 40% sequence identity. All prediction models were trained, validated, and evaluated on these 231 protein chains. At first, prediction models were developed on balanced data consisting of 1,335 NAG-interacting and noninteracting residues, using various window size. The model developed by implementing Random Forest using binary profiles as the main principle for identifying NAG-interacting residue with window size 9, performed best among other models. It achieved highest Matthews Correlation Coefficient (MCC) of 0.31 and 0.25, and Area Under Receiver Operating Curve (AUROC) of 0.73 and 0.70 on training and validation data set, respectively. We also developed prediction models on realistic data set (1,335 NAG-interacting and 47,198 noninteracting residues) using the same principle, where the model achieved MCC of 0.26 and 0.27, and AUROC of 0.70 and 0.71, on training and validation data set, respectively. The success of our method can be appraised by the fact that, if a sequence of 1,000 amino acids is analyzed with our approach, 10 residues will be predicted as NAG-interacting, out of which five are correct. Best models were incorporated in the standalone version and in the webserver available at https://webs.iiitd.edu.in/raghava/nagbinder/.",2019-11-07 +32234489,Defining and Targeting Adaptations to Oncogenic KRASG12C Inhibition Using Quantitative Temporal Proteomics.,"Covalent inhibitors of the KRASG12C oncoprotein have recently been developed and are being evaluated in clinical trials. Resistance to targeted therapies is common and may limit long-term efficacy of KRAS inhibitors (KRASi). To identify pathways of adaptation to KRASi and predict drug combinations that circumvent resistance, we use mass-spectrometry-based quantitative temporal proteomics to profile the proteomic response to KRASi in pancreatic and lung cancer 2D and 3D cellular models. We quantify 10,805 proteins, representing the most comprehensive KRASi proteome (https://manciaslab.shinyapps.io/KRASi/). Our data reveal common mechanisms of acute and long-term response between KRASG12C-driven tumors. Based on these proteomic data, we identify potent combinations of KRASi with phosphatidylinositol 3-kinase (PI3K), HSP90, CDK4/6, and SHP2 inhibitors, in some instances converting a cytostatic response to KRASi monotherapy to a cytotoxic response to combination treatment. Overall, using quantitative temporal proteomics, we comprehensively characterize adaptations to KRASi and identify combinatorial regimens with potential therapeutic utility.",2020-03-01 +26211629,PLNlncRbase: A resource for experimentally identified lncRNAs in plants.,"Accumulating published reports have confirmed the critical biological role (e.g., cell differentiation, gene regulation, stress response) for plant long non-coding RNAs (lncRNAs). However, a literature-derived database with the aim of lncRNA curation, data deposit and further distribution remains still absent for this particular lncRNA clade. PLNlncRbase has been designed as an easy-to-use resource to provide detailed information for experimentally identified plant lncRNAs. In the current version, PLNlncRbase has manually collected data from nearly 200 published literature, covering a total of 1187 plant lncRNAs in 43 plant species. The user can retrieve plant lncRNA entries from a well-organized interface through a keyword search by using the name of plant species or a lncRNA identifier. Each entry upon a query will be returned with detailed information for a specific plant lncRNA, including the species name, a lncRNA identifier, a brief description of the potential biological role, the lncRNA sequence, the lncRNA classification, an expression pattern of the lncRNA, the tissue/developmental stage/condition for lncRNA expression, the detection method for lncRNA expression, a reference literature, and the potential target gene(s) of the lncRNA extracted from the original reference. This database will be regularly updated to greatly facilitate future investigations of plant lncRNAs pertaining to their biological significance. The PLNlncRbase database is now freely available at http://bioinformatics.ahau.edu.cn/PLNlncRbase.",2015-07-23 +25378336,The GOA database: gene Ontology annotation updates for 2015.,"The Gene Ontology Annotation (GOA) resource (http://www.ebi.ac.uk/GOA) provides evidence-based Gene Ontology (GO) annotations to proteins in the UniProt Knowledgebase (UniProtKB). Manual annotations provided by UniProt curators are supplemented by manual and automatic annotations from model organism databases and specialist annotation groups. GOA currently supplies 368 million GO annotations to almost 54 million proteins in more than 480,000 taxonomic groups. The resource now provides annotations to five times the number of proteins it did 4 years ago. As a member of the GO Consortium, we adhere to the most up-to-date Consortium-agreed annotation guidelines via the use of quality control checks that ensures that the GOA resource supplies high-quality functional information to proteins from a wide range of species. Annotations from GOA are freely available and are accessible through a powerful web browser as well as a variety of annotation file formats.",2014-11-06 +32838167,"COVID-19, Brachytherapy, and Gynecologic Cancers: a Moroccan Experience.","The treatment of gynecological cancers is the main activity of brachytherapy units. However, during COVID-19 pandemic, precautions should be done in order to reduce the spread of the virus while maintaining all chances to recovery for all patients (Radiother Oncol 148, 227-228, 2020). Despite the extent of the pandemic in our country, limited data are available to establish recommendations with a sufficient level of evidence (Radiother Oncol 148, 227-228, 2020). More recently, the American Brachytherapy Society published some clarifications in this regard and international expert consensus recommendations of radiation therapy for gynecologic malignancies during the COVID-19 pandemic were published (https://www.americanbrachytherapy.org/about-abs/abs-news/abs-statement-on-coronavirus/, Gynecol Oncol 15, 2020). In this commentary, we sought to share the procedures adopted for the management of gynecological cancer patients during COVID-19 pandemic in our brachytherapy unit.",2020-07-15 +25493946,Computational Identification and Systematic Classification of Novel Cytochrome P450 Genes in Salvia miltiorrhiza.,"Salvia miltiorrhiza is one of the most economically important medicinal plants. Cytochrome P450 (CYP450) genes have been implicated in the biosynthesis of its active components. However, only a dozen full-length CYP450 genes have been described, and there is no systematic classification of CYP450 genes in S. miltiorrhiza. We obtained 77,549 unigenes from three tissue types of S. miltiorrhiza using RNA-Seq technology. Combining our data with previously identified CYP450 sequences and scanning with the CYP450 model from Pfam resulted in the identification of 116 full-length and 135 partial-length CYP450 genes. The 116 genes were classified into 9 clans and 38 families using standard criteria. The RNA-Seq results showed that 35 CYP450 genes were co-expressed with CYP76AH1, a marker gene for tanshinone biosynthesis, using r≥0.9 as a cutoff. The expression profiles for 16 of 19 randomly selected CYP450 obtained from RNA-Seq were validated by qRT-PCR. Comparing against the KEGG database, 10 CYP450 genes were found to be associated with diterpenoid biosynthesis. Considering all the evidence, 3 CYP450 genes were identified to be potentially involved in terpenoid biosynthesis. Moreover, we found that 15 CYP450 genes were possibly regulated by antisense transcripts (r≥0.9 or r≤-0.9). Lastly, a web resource (SMCYP450, http://www.herbalgenomics.org/samicyp450) was set up, which allows users to browse, search, retrieve and compare CYP450 genes and can serve as a centralized resource.",2014-12-10 +,Characterization of transcriptomes from sexual and asexual lineages of a New Zealand snail (Potamopyrgus antipodarum),"Understanding the evolution and maintenance of sexual reproduction is one of the central challenges of evolutionary biology, yet we know very little about how sex influences molecular evolution. The New Zealand freshwater snail Potamopyrgus antipodarum is ideally suited to address this knowledge gap because obligately sexual individuals often coexist with multiple independently derived obligately asexual lineages. This unusual situation allows direct comparisons both between sexual and asexual P. antipodarum and across populations that differ in the relative frequency of sexual individuals. As such, P. antipodarum has received a great deal of attention as a model system for the maintenance of sex in nature and is also used as a model for environmental toxicology and biological invasions. Molecular genetic resources for P. antipodarum will thus be useful to investigators in a variety of biological fields. We used 454 sequencing of cDNA libraries to generate transcriptomes from two sexual and two asexual P. antipodarum lineages. A de novo assembly of 116.7 Mb of sequence reads produced 41 396 contigs, and sequence similarity‐based Gene Ontology annotations were obtained for 3740 contigs. We detected 408 315 SNP loci and 7315 microsatellite loci, which together represent the first genome‐scale resource available for P. antipodarum. Raw 454 read sequences, contig sequences, annotation data and polymorphism data are publicly available in a searchable online database and for download at http://www.biology.uiowa.edu/neiman/transcriptome.php.",2013-03-01 +,F154. ABERRANT SALIENCE NETWORK FUNCTIONAL CONNECTIVITY IN AUDITORY VERBAL HALLUCINATIONS: A FIRST EPISODE PSYCHOSIS SAMPLE,"Abstract

Background

Auditory verbal hallucinations (AVH) often lead to distress and functional disability, and are frequently associated with psychotic illness. Theories of abnormal integration have been proposed to explain symptoms of schizophrenia, including delusions and hallucinations, with a central abnormality being aberrant activity in intrinsic brain networks such as the default mode network (DMN) or the salience network (SN). Previous investigations of patients with schizophrenia assessing functional connectivity (FC) have used a seed-based functional connectivity approach (sb-FC), with seed placement in brain areas responsible for auditory processing, language, and memory; the striatum, and in areas of DMN. These have generated some conflicting results, possibly because of the varying seed placement. The aim of the current study was to address these confounding factors by investigating the intrinsic FC in first episode psychosis (FEP) patients with AVH using within-sample AVH symptom capture seeds. It was hypothesised that patients would show aberrant resting state FC between areas of the DMN and SN and these areas.

Methods

Eighteen FEP individuals and 20 healthy controls were recruited. All the participants underwent resting-state functional Magnetic Resonance Imaging (rs-fMRI). The Data Processing Assistant for Resting-State fMRI Advanced Edition (DPARSFA) V3.1 (http://rfmri.org/DPARSF) (Yan & Zang, 2010) and the statistical parametric mapping software 8 (SPM8) (SPM, Friston, The Wellcome Department of Cognitive Neurology, London, Uk; http://www.fil.ion.ucl.ac.uk/spm) were used to preprocess and analyze the data.

Results

Patients showed increased FC between left insula and bilateral cerebellum, and angular gyrus; and increased FC between left claustrum and left cerebellum and postcentral gyrus. There was reduced FC in FEP patients with AVH between left claustrum and left insula compared to HC. The FC between left insula and left claustrum seeds for patients and HC is shown separately in supplementary information. There were no significant correlations between DUP, dose of antipsychotic medications, and severity of hallucinations and the mean coefficients of clusters that were significantly different between FEP patients and HC.

Discussion

FEP patients showed increased functional connectivity between left insula and bilateral cerebellum and angular gyrus; and increased functional connectivity between left claustrum and left cerebellum and postcentral gyrus. We also found reduced functional connectivity between left claustrum and left insula in FEP patients compared to HC. It is possible the pathology of AVH is primarily located in the insula and angular gyrus. However, given our results of both the left insula seed in patients and HC shows connectivity with right insula and anterior cingulate cortex (key regions of SN) and literature from patients with chronic AVH, the suggestion may be that resting state dysconnectivity within the DMN and SN are implicated in the generation of AVH, which during the experience itself will further involve temporal and auditory networks. Furthermore, decreased intrinsic functional connectivity between the claustrum and the insula may lead to compensatory over activity in parts of the auditory network including areas involved in DMN, auditory processing, language and memory, leading to the complex and individual content of AVH when they occur.",2018-04-01 +29354869,Organ donor pancreases for the study of human islet cell histology and pathophysiology: a precious and valuable resource.,"Direct in vivo assessment of pancreatic islet-cells for the study of the pathophysiology of diabetes in humans is hampered by anatomical and technological hurdles. To date, most of the information that has been generated is derived from histological studies performed on pancreatic tissue from autopsy, surgery, in vivo biopsy or organ donation. Each approach has its advantages and disadvantages (as summarised in this commentary); however, in this edition of Diabetologia, Kusmartseva et al ( https://doi.org/10.1007/s00125-017-4494-x ) provide further evidence to support the use of organ donor pancreases for the study of human diabetes. They show that length of terminal hospitalisation of organ donors prior to death does not seem to influence the frequency of inflammatory cells infiltrating the pancreas and the replication of beta cells. These findings are reassuring, demonstrating the reliability of this precious and valuable resource for human islet cells research.",2018-01-21 +27115628,Applications of Protein Thermodynamic Database for Understanding Protein Mutant Stability and Designing Stable Mutants.,"Protein stability is the free energy difference between unfolded and folded states of a protein, which lies in the range of 5-25 kcal/mol. Experimentally, protein stability is measured with circular dichroism, differential scanning calorimetry, and fluorescence spectroscopy using thermal and denaturant denaturation methods. These experimental data have been accumulated in the form of a database, ProTherm, thermodynamic database for proteins and mutants. It also contains sequence and structure information of a protein, experimental methods and conditions, and literature information. Different features such as search, display, and sorting options and visualization tools have been incorporated in the database. ProTherm is a valuable resource for understanding/predicting the stability of proteins and it can be accessed at http://www.abren.net/protherm/ . ProTherm has been effectively used to examine the relationship among thermodynamics, structure, and function of proteins. We describe the recent progress on the development of methods for understanding/predicting protein stability, such as (1) general trends on mutational effects on stability, (2) relationship between the stability of protein mutants and amino acid properties, (3) applications of protein three-dimensional structures for predicting their stability upon point mutations, (4) prediction of protein stability upon single mutations from amino acid sequence, and (5) prediction methods for addressing double mutants. A list of online resources for predicting has also been provided.",2016-01-01 +28675924,A statistical framework for biomedical literature mining.,"In systems biology, it is of great interest to identify new genes that were not previously reported to be associated with biological pathways related to various functions and diseases. Identification of these new pathway-modulating genes does not only promote understanding of pathway regulation mechanisms but also allow identification of novel targets for therapeutics. Recently, biomedical literature has been considered as a valuable resource to investigate pathway-modulating genes. While the majority of currently available approaches are based on the co-occurrence of genes within an abstract, it has been reported that these approaches show only sub-optimal performances because 70% of abstracts contain information only for a single gene. To overcome such limitation, we propose a novel statistical framework based on the concept of ontology fingerprint that uses gene ontology to extract information from large biomedical literature data. The proposed framework simultaneously identifies pathway-modulating genes and facilitates interpreting functions of these new genes. We also propose a computationally efficient posterior inference procedure based on Metropolis-Hastings within Gibbs sampler for parameter updates and the poor man's reversible jump Markov chain Monte Carlo approach for model selection. We evaluate the proposed statistical framework with simulation studies, experimental validation, and an application to studies of pathway-modulating genes in yeast. The R implementation of the proposed model is currently available at https://dongjunchung.github.io/bayesGO/. Copyright © 2017 John Wiley & Sons, Ltd.",2017-07-04 +31693067,WPMIAS: Whole-degradome-based Plant MicroRNA-Target Interaction Analysis Server. ,"A critical aspect for exploring the biological function of a microRNA (miRNA) lies on exact detection and validation of its target mRNAs. However, no convenient and efficient web-based server is available for plant biologists to identify the experimentally verified target mRNAs of miRNAs. In this work, we built a comprehensive web-based platform for miRNA-target analysis, named as Whole-degradome-based Plant MiRNA-target Interaction Analysis Server (WPMIAS), for validation of predicted interactions of miRNAs and their target mRNAs (MTIs) by user-submitted data or all available pre-loaded degradome data. Besides, the server can construct degradome-based miRNA regulatory networks (MRNs) based on the validated MTIs to help study the functions and relations among miRNAs and target mRNAs. WPMIAS is also suitable for other small RNAs (sRNAs), such as 21-nt phased siRNAs (phasiRNAs) and natural antisense siRNAs (nat-siRNAs), which direct cleavage of target mRNAs. Currently, WPMIAS supports 64 plant species with ∼200 cDNA libraries and 274 pre-loaded plant degradome datasets. The user can identify all validated MTIs by analyzing all degradome data at a time and understand when and where MTIs take place and their cleavage levels. With the data obtained from WPMIAS, the user can build a plant miRNA-target map, where it is convenient to find interesting research ideas on miRNAs. In summary, WPMIAS is able to support a comprehensive web-based plant miRNA-target analysis and expected to greatly promote future research on plant miRNAs. It can be freely accessed at https://cbi.njau.edu.cn/WPMIAS/. Supplementary data are available at Bioinformatics online.",2019-11-06 +31788193,"Current ecology, not ancestral dispersal patterns, influences menopause symptom severity.","All human females who reach midlife experience menopause, however, it is currently unclear why women experience this period of infertility, and why it is accompanied by many unpleasant symptoms. Using primary data from four ethnic groups in China, we test an existing theory that age of menopause and its symptoms are the result of intragenomic conflict between maternally and paternally inherited genes, with the outcome of such conflict predicted to be contingent on the ancestral postmarital residence pattern of the female (Úbeda, Ohtsuki, & Gardner, Ecology Letters, 17, 2014, 165). The model predicts that being ancestrally patrilocal results in less intragenomic conflict, causing a shorter, less symptomatic perimenopause that terminates in a later menopause. Our findings show no support for this hypothesis and suggest current, rather than ancestral, residence patterns better predict aspects of the menopausal transition. Furthermore, current patrilocality when compared to duolocality is associated with more severe menopause symptoms, which may be due to sexual, rather than intragenomic, conflict.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.27s8k0p.",2019-11-05 +32022239,NATIONAL OBSERVATORY OF ELECTROMAGNETIC FIELDS: NATIONAL TELEMETRIC NETWORK FOR THE MEASUREMENT OF HIGH-FREQUENCY ELECTROMAGNETIC FIELDS IN GREECE.,"At the end of 2015, the operation of the National Observatory of Electromagnetic Fields (established in 2012) has started. The National Observatory of Electromagnetic Fields is a network of 500 fixed (480 broadband and 20 frequency selective) and 13 mobile (vehicle mounted frequency selective) measurement stations throughout Greece that continuously monitor the electromagnetic field levels from all kinds of antenna stations in the frequency range 100kHz-7GHz. The results of this national monitoring network, which is operated and controlled by the Greek Atomic Energy Commission (EEAE), are presented through an interactive web portal (https://paratiritirioemf.eeae.gr), in which data are constantly updated with the latest station measurements. This paper presents the operational aspects and the measurement results of the first 3 years of operation (2016-2018). The measurement results revealed that all values were well below the reference levels for general public exposure, as defined by the Greek legislation.",2020-07-01 +32152446,Edgetic perturbation signatures represent known and novel cancer biomarkers.,"Isoform switching is a recently characterized hallmark of cancer, and often translates to the loss or gain of domains mediating protein interactions and thus, the re-wiring of the interactome. Recent computational tools leverage domain-domain interaction data to resolve the condition-specific interaction networks from RNA-Seq data accounting for the domain content of the primary transcripts expressed. Here, we used The Cancer Genome Atlas RNA-Seq datasets to generate 642 patient-specific pairs of interactomes corresponding to both the tumor and the healthy tissues across 13 cancer types. The comparison of these interactomes provided a list of patient-specific edgetic perturbations of the interactomes associated with the cancerous state. We found that among the identified perturbations, select sets are robustly shared between patients at the multi-cancer, cancer-specific and cancer sub-type specific levels. Interestingly, the majority of the alterations do not directly involve significantly mutated genes, nevertheless, they strongly correlate with patient survival. The findings (available at EdgeExplorer: ""http://webclu.bio.wzw.tum.de/EdgeExplorer"") are a new source of potential biomarkers for classifying cancer types and the proteins we identified are potential anti-cancer therapy targets.",2020-03-09 +28280852,TANTIGEN: a comprehensive database of tumor T cell antigens.,"Tumor T cell antigens are both diagnostically and therapeutically valuable molecules. A large number of new peptides are examined as potential tumor epitopes each year, yet there is no infrastructure for storing and accessing the results of these experiments. We have retroactively cataloged more than 1000 tumor peptides from 368 different proteins, and implemented a web-accessible infrastructure for storing and accessing these experimental results. All peptides in TANTIGEN are labeled as one of the four categories: (1) peptides measured in vitro to bind the HLA, but not reported to elicit either in vivo or in vitro T cell response, (2) peptides found to bind the HLA and to elicit an in vitro T cell response, (3) peptides shown to elicit in vivo tumor rejection, and (4) peptides processed and naturally presented as defined by physical detection. In addition to T cell response, we also annotate peptides that are naturally processed HLA binders, e.g., peptides eluted from HLA in mass spectrometry studies. TANTIGEN provides a rich data resource for tumor-associated epitope and neoepitope discovery studies and is freely available at http://cvc.dfci.harvard.edu/tantigen/ or http://projects.met-hilab.org/tadb (mirror).",2017-03-09 +30062648,Antibiotic usage in Chinese children: a point prevalence survey.,"

Background

Children as a population have high antimicrobial prescribing rates which may lead to high resistance of bacteria according to data from some single-center surveys of antibiotic prescribing rates in China. The acquirement of baseline data of antibiotic prescribing is the basis of developing intervention strategies on inappropriate antimicrobial prescriptions. Few studies show clearly the pattern and detailed information on classes of antibiotics and distribution of indications of antibiotic prescriptions in children in China. This study aims to assess the antibiotic prescribing patterns among children and neonates hospitalized in 18 hospitals in China.

Methods

A 24-hour point prevalence survey on antimicrobial prescribing was conducted in hospitalized neonates and children in China from December 1st, 2016 to February 28th, 2017. Information on the antibiotic use of patients under 18 years of age who were administered one or more on-going antibiotics in the selected wards over a 24-hour period was collected. These data were submitted to the GARPEC (Global Antimicrobial Resistance, Prescribing and Efficacy in Children and Neonates) web-based application ( https://pidrg-database.sgul.ac.uk/redcap/ ). For statistical analysis, Microsoft Excel 2007 and SPSS 22.0 were used.

Results

The antibiotic data were collected in 35 wards in 18 hospitals from 9 provinces. In total, 67.76% (975/1439) of the patients (n = 1439) were given at least one antibiotic, including 58.1% (173/298) of neonates (n = 298) and 70.3% (802/1141) of children (n = 1141). In neonates, the three most frequently prescribed antibiotics were third-generation cephalosporins (41.7%), penicillins plus enzyme inhibitor (23.8%), and carbapenems (11.2%). In children, the three most frequently prescribed antibiotics were third-generation cephalosporins (35.5%), macrolides (23.2%), and penicillins plus enzyme inhibitors (15.9%). The most common indication for antibiotics was proven or probable bacterial lower respiratory tract infection (30.9% in neonates and 66.6% in children).

Conclusions

Antibiotics are commonly prescribed in the Chinese children population. It is likely that the third-generation cephalosporins and macrolides are currently overused in Chinese children. Efforts must be made to ensure safe and appropriate antibiotic prescribing to reduce and prevent the future development of antibiotic resistance.",2018-07-30 +31721338,Cross-docking benchmark for automated pose and ranking prediction of ligand binding.,"Significant efforts have been devoted in the last decade to improving molecular docking techniques to predict both accurate binding poses and ranking affinities. Some shortcomings in the field are the limited number of standard methods for measuring docking success and the availability of widely accepted standard data sets for use as benchmarks in comparing different docking algorithms throughout the field. In order to address these issues, we have created a Cross-Docking Benchmark server. The server is a versatile cross-docking data set containing 4,399 protein-ligand complexes across 95 protein targets intended to serve as benchmark set and gold standard for state-of-the-art pose and ranking prediction in easy, medium, hard, or very hard docking targets. The benchmark along with a customizable cross-docking data set generation tool is available at http://disco.csb.pitt.edu. We further demonstrate the potential uses of the server in questions outside of basic benchmarking such as the selection of the ideal docking reference structure.",2019-11-28 +29887853,CowPI: A Rumen Microbiome Focussed Version of the PICRUSt Functional Inference Software.,"Metataxonomic 16S rDNA based studies are a commonplace and useful tool in the research of the microbiome, but they do not provide the full investigative power of metagenomics and metatranscriptomics for revealing the functional potential of microbial communities. However, the use of metagenomic and metatranscriptomic technologies is hindered by high costs and skills barrier necessary to generate and interpret the data. To address this, a tool for Phylogenetic Investigation of Communities by Reconstruction of Unobserved States (PICRUSt) was developed for inferring the functional potential of an observed microbiome profile, based on 16S data. This allows functional inferences to be made from metataxonomic 16S rDNA studies with little extra work or cost, but its accuracy relies on the availability of completely sequenced genomes of representative organisms from the community being investigated. The rumen microbiome is an example of a community traditionally underrepresented in genome and sequence databases, but recent efforts by projects such as the Global Rumen Census and Hungate 1000 have resulted in a wide sampling of 16S rDNA profiles and almost 500 fully sequenced microbial genomes from this environment. Using this information, we have developed ""CowPI,"" a focused version of the PICRUSt tool provided for use by the wider scientific community in the study of the rumen microbiome. We evaluated the accuracy of CowPI and PICRUSt using two 16S datasets from the rumen microbiome: one generated from rDNA and the other from rRNA where corresponding metagenomic and metatranscriptomic data was also available. We show that the functional profiles predicted by CowPI better match estimates for both the meta-genomic and transcriptomic datasets than PICRUSt, and capture the higher degree of genetic variation and larger pangenomes of rumen organisms. Nonetheless, whilst being closer in terms of predictive power for the rumen microbiome, there were differences when compared to both the metagenomic and metatranscriptome data and so we recommend, where possible, functional inferences from 16S data should not replace metagenomic and metatranscriptomic approaches. The tool can be accessed at http://www.cowpi.org and is provided to the wider scientific community for use in the study of the rumen microbiome.",2018-05-25 +33585225,Liq_ccRCC: Identification of Clear Cell Renal Cell Carcinoma Based on the Integration of Clinical Liquid Indices.,"Currently, preoperative diagnosis and differentiation of renal clear cell carcinoma and other subtypes remain a serious challenge for doctors. The liquid biopsy technique and artificial intelligence have inspired the pursuit of distinguishing clear cell renal cell carcinoma using clinically available test data. In this work, a method called liq_ccRCC based on the integration of clinical blood and urine indices through machine learning approaches was successfully designed to achieve this goal. Clinically available biochemical blood data and urine indices were collected from 306 patients with renal cell carcinoma. Finally, the integration of 18 top-ranked clinical liquid indices (13 blood samples and 5 urine samples) was proven to be able to distinguish renal clear cell carcinoma from other subtypes of renal carcinoma by cross-valuation with an AUC of 0.9372. The successful introduction of this identification method suggests that subtype differentiation of renal cell carcinoma can be accomplished based on clinical liquid test data, which is noninvasive and easy to perform. It has huge potential to be developed as a promising innovation strategy for preoperative subtype differentiation of renal cell carcinoma with the advantages of convenience and real-time testing. liq_ccRCC is available online for the free test of readers at http://lishuyan.lzu.edu.cn/liq_ccRCC.",2020-01-01 +30572878,"Geographical mobility of UK trainee doctors, from family home to first job: a national cohort study.","

Background

The UK faces geographical variation in the recruitment of doctors. Understanding where medical graduates choose to go for training is important because doctors are more likely to consider practicing in areas where they completed postgraduate training. The wider literature also suggests that there is a relationship between origin and background, and where doctors wish to train/work. Thus, the purpose of this paper is to investigate the geographical mobility of UK medical graduates from different socio-economic groups in terms of where they wish to spend their first years of postgraduate training.

Methods

This was an observational study of Foundation Programme (FP) doctors who graduated from 33 UK medical schools between 2012 and 2014. Data was accessed via the UK medical education database (UKMED: https://www.ukmed.ac.uk/ ). Chi-square tests were used to examine the relationships between doctor's sociodemographic characteristics and the dependent variable, average driving time from parental home to foundation school/region. Generalised Linear Mixed Models (GLMM) were used to estimate the effects of those factors in combination against the outcome measure.

Results

The majority of doctors prefer to train at foundation schools that are reasonably close to the family home. Those who attended state-funded schools, from non-white ethnic groups and/or from lower socio-economic groups were significantly more likely to choose foundation schools nearer their parental home. Doctors from disadvantaged backgrounds (as determined by entitlement to free school meals, OR = 1.29, p = 0.003 and no parental degree, OR = 1.34, p < 0.001) were associated with higher odds of selecting a foundation schools that were closer to parental home.

Conclusion

The data suggests that recruiting medical students from lower socioeconomic groups and those who originate from under-recruiting areas may be at least part of the solution to filling training posts in these areas. This has obvious implications for the widening access agenda, and equitable distribution of health services.",2018-12-20 +31586405,MatrisomeDB: the ECM-protein knowledge database.,"The extracellular matrix (ECM) is a complex and dynamic meshwork of cross-linked proteins that supports cell polarization and functions and tissue organization and homeostasis. Over the past few decades, mass-spectrometry-based proteomics has emerged as the method of choice to characterize the composition of the ECM of normal and diseased tissues. Here, we present a new release of MatrisomeDB, a searchable collection of curated proteomic data from 17 studies on the ECM of 15 different normal tissue types, six cancer types (different grades of breast cancers, colorectal cancer, melanoma, and insulinoma) and other diseases including vascular defects and lung and liver fibroses. MatrisomeDB (http://www.pepchem.org/matrisomedb) was built by retrieving raw mass spectrometry data files and reprocessing them using the same search parameters and criteria to allow for a more direct comparison between the different studies. The present release of MatrisomeDB includes 847 human and 791 mouse ECM proteoforms and over 350 000 human and 600 000 mouse ECM-derived peptide-to-spectrum matches. For each query, a hierarchically-clustered tissue distribution map, a peptide coverage map, and a list of post-translational modifications identified, are generated. MatrisomeDB is the most complete collection of ECM proteomic data to date and allows the building of a comprehensive ECM atlas.",2020-01-01 +26607947,SymbioGenomesDB: a database for the integration and access to knowledge on host-symbiont relationships. ,"Symbiotic relationships occur naturally throughout the tree of life, either in a commensal, mutualistic or pathogenic manner. The genomes of multiple organisms involved in symbiosis are rapidly being sequenced and becoming available, especially those from the microbial world. Currently, there are numerous databases that offer information on specific organisms or models, but none offer a global understanding on relationships between organisms, their interactions and capabilities within their niche, as well as their role as part of a system, in this case, their role in symbiosis. We have developed the SymbioGenomesDB as a community database resource for laboratories which intend to investigate and use information on the genetics and the genomics of organisms involved in these relationships. The ultimate goal of SymbioGenomesDB is to host and support the growing and vast symbiotic-host relationship information, to uncover the genetic basis of such associations. SymbioGenomesDB maintains a comprehensive organization of information on genomes of symbionts from diverse hosts throughout the Tree of Life, including their sequences, their metadata and their genomic features. This catalog of relationships was generated using computational tools, custom R scripts and manual integration of data available in public literature. As a highly curated and comprehensive systems database, SymbioGenomesDB provides web access to all the information of symbiotic organisms, their features and links to the central database NCBI. Three different tools can be found within the database to explore symbiosis-related organisms, their genes and their genomes. Also, we offer an orthology search for one or multiple genes in one or multiple organisms within symbiotic relationships, and every table, graph and output file is downloadable and easy to parse for further analysis. The robust SymbioGenomesDB will be constantly updated to cope with all the data being generated and included in major databases, in order to serve as an important, useful and timesaving tool. Database URL: http://symbiogenomesdb.uv.es.",2015-11-25 +28095775,ARA-PEPs: a repository of putative sORF-encoded peptides in Arabidopsis thaliana.,"

Background

Many eukaryotic RNAs have been considered non-coding as they only contain short open reading frames (sORFs). However, there is increasing evidence for the translation of these sORFs into bioactive peptides with potent signaling, antimicrobial, developmental, antioxidant roles etc. Yet only a few peptides encoded by sORFs are annotated in the model organism Arabidopsis thaliana.

Results

To aid the functional annotation of these peptides, we have developed ARA-PEPs (available at http://www.biw.kuleuven.be/CSB/ARA-PEPs ), a repository of putative peptides encoded by sORFs in the A. thaliana genome starting from in-house Tiling arrays, RNA-seq data and other publicly available datasets. ARA-PEPs currently lists 13,748 sORF-encoded peptides with transcriptional evidence. In addition to existing data, we have identified 100 novel transcriptionally active regions (TARs) that might encode 341 novel stress-induced peptides (SIPs). To aid in identification of bioactivity, we add functional annotation and sequence conservation to predicted peptides.

Conclusion

To our knowledge, this is the largest repository of plant peptides encoded by sORFs with transcript evidence, publicly available and this resource will help scientists to effortlessly navigate the list of experimentally studied peptides, the experimental and computational evidence supporting the activity of these peptides and gain new perspectives for peptide discovery.",2017-01-17 +32442849,FADB-China: A molecular-level food adulteration database in China based on molecular fingerprints and similarity algorithms prediction expansion.,"Food adulteration is a growing concern worldwide. The collation and analysis of food adulteration cases is of immense significance for food safety regulation and research. We collected 961 cases of food adulteration between 1998 and 2019 from the literature reports and announcements released by the Chinese government. Critical molecules were manually annotated in food adulteration substances as determined by food chemists, to build the first food adulteration database in China (http://www.rxnfinder.org/FADB-China/). This database is also the first molecular-level food adulteration database worldwide. Additionally, we herein propose an in silico method for predicting potentially illegal food additives on the basis of molecular fingerprints and similarity algorithms. Using this algorithm, we predict 1919 chemicals that may be illegally added to food; these predictions can effectively assist in the discovery and prevention of emerging food adulteration.",2020-05-08 +32938640,Integrating Mathematical Modeling with High-Throughput Imaging Explains How Polyploid Populations Behave in Nutrient-Sparse Environments.,"Breast cancer progresses in a multistep process from primary tumor growth and stroma invasion to metastasis. Nutrient-limiting environments promote chemotaxis with aggressive morphologies characteristic of invasion. It is unknown how coexisting cells differ in their response to nutrient limitations and how this impacts invasion of the metapopulation as a whole. In this study, we integrate mathematical modeling with microenvironmental perturbation data to investigate invasion in nutrient-limiting environments inhabited by one or two cancer cell subpopulations. Subpopulations were defined by their energy efficiency and chemotactic ability. Invasion distance traveled by a homogeneous population was estimated. For heterogeneous populations, results suggest that an imbalance between nutrient efficacy and chemotactic superiority accelerates invasion. Such imbalance will spatially segregate the two populations and only one type will dominate at the invasion front. Only if these two phenotypes are balanced, the two subpopulations compete for the same space, which decelerates invasion. We investigate ploidy as a candidate biomarker of this phenotypic heterogeneity and discuss its potential to inform the dose of mTOR inhibitors (mTOR-I) that can inhibit chemotaxis just enough to facilitate such competition. SIGNIFICANCE: This study identifies the double-edged sword of high ploidy as a prerequisite to personalize combination therapies with cytotoxic drugs and inhibitors of signal transduction pathways such as mTOR-Is. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/22/5109/F1.large.jpg.",2020-09-16 +31641134,Distributed radiomics as a signature validation study using the Personal Health Train infrastructure.,"Prediction modelling with radiomics is a rapidly developing research topic that requires access to vast amounts of imaging data. Methods that work on decentralized data are urgently needed, because of concerns about patient privacy. Previously published computed tomography medical image sets with gross tumour volume (GTV) outlines for non-small cell lung cancer have been updated with extended follow-up. In a previous study, these were referred to as Lung1 (n = 421) and Lung2 (n = 221). The Lung1 dataset is made publicly accessible via The Cancer Imaging Archive (TCIA; https://www.cancerimagingarchive.net ). We performed a decentralized multi-centre study to develop a radiomic signature (hereafter ""ZS2019"") in one institution and validated the performance in an independent institution, without the need for data exchange and compared this to an analysis where all data was centralized. The performance of ZS2019 for 2-year overall survival validated in distributed radiomics was not statistically different from the centralized validation (AUC 0.61 vs 0.61; p = 0.52). Although slightly different in terms of data and methods, no statistically significant difference in performance was observed between the new signature and previous work (c-index 0.58 vs 0.65; p = 0.37). Our objective was not the development of a new signature with the best performance, but to suggest an approach for distributed radiomics. Therefore, we used a similar method as an earlier study. We foresee that the Lung1 dataset can be further re-used for testing radiomic models and investigating feature reproducibility.",2019-10-22 +32853264,Shiny-SoSV: A web-based performance calculator for somatic structural variant detection.,"Somatic structural variants are an important contributor to cancer development and evolution. Accurate detection of these complex variants from whole genome sequencing data is influenced by a multitude of parameters. However, there are currently no tools for guiding study design nor are there applications that could predict the performance of somatic structural variant detection. To address this gap, we developed Shiny-SoSV, a user-friendly web-based calculator for determining the impact of common variables on the sensitivity, precision and F1 score of somatic structural variant detection, including choice of variant detection tool, sequencing depth of coverage, variant allele fraction, and variant breakpoint resolution. Using simulation studies, we determined singular and combinatoric effects of these variables, modelled the results using a generalised additive model, allowing structural variant detection performance to be predicted for any combination of predictors. Shiny-SoSV provides an interactive and visual platform for users to easily compare individual and combined impact of different parameters. It predicts the performance of a proposed study design, on somatic structural variant detection, prior to the commencement of benchwork. Shiny-SoSV is freely available at https://hcpcg.shinyapps.io/Shiny-SoSV with accompanying user's guide and example use-cases.",2020-08-27 +,Best Paper Selection,"Agarwal V, Podchiyska T, Banda JM, Goel V, Leung TI, Minty EP, Sweeney TE, Gyang E, Shah NH. Learning statistical models of phenotypes using noisy labeled training data. J Am Med Inform Assoc 2016;23(6):1166-73 +https://academic.oup.com/jamia/article-lookup/doi/10.1093/jamia/ocw028 Harmanci A, Gerstein M. Quantification of private information leakage from phenotype-genotype data: linking attacks. Nat Methods 2016;13(3):251-6 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4834871/ Pfiffner PB, Pinyol I, Natter MD, Mandl KD. C3-PRO: Connecting ResearchKit to the Health System Using i2b2 and FHIR. PloS One 2016;11(3):e0152722 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4816293/ Wilkinson MD, Dumontier M, Aalbersberg IJJ, Appleton G, Axton M, Baak A, Blomberg N, Boiten JW, da Silva Santos LB, Bourne PE, Bouwman J, Brookes AJ, Clark T, Crosas M, Dillo I, Dumon O, Edmunds S, Evelo CT, Finkers R, Gonzalez-Beltran A, Gray AJ, Groth P, Goble C, Grethe JS, Heringa J, ‘t Hoen PA, Hooft R, Kuhn T, Kok R, Kok J, Lusher SJ, Martone ME, Mons A, Packer AL, Persson B, Rocca-Serra P, Roos M, van Schaik R, Sansone SA, Schultes E, Sengstag T, Slater T, Strawn G, Swertz MA, Thompson M, van der Lei J, van Mulligen E, Velterop J, Waagmeester A, Wittenburg P, Wolstencroft K, Zhao J, Mons B. The FAIR Guiding Principles for scientific data management and stewardship. Sci Data 2016;3:160018 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4792175/ Springer DB, Tarassenko L, Clifford GD. Logistic regression-HSMM-based heart sound segmentation. IEEE Trans Biomed Eng 2016 Apr;63(4):822-32",2017-08-01 +29659714,Computation and application of tissue-specific gene set weights.,"Motivation:Gene set testing, or pathway analysis, has become a critical tool for the analysis of high-dimensional genomic data. Although the function and activity of many genes and higher-level processes is tissue-specific, gene set testing is typically performed in a tissue agnostic fashion, which impacts statistical power and the interpretation and replication of results. Results:To address this challenge, we have developed a bioinformatics approach to compute tissue-specific weights for individual gene sets using information on tissue-specific gene activity from the Human Protein Atlas (HPA). We used this approach to create a public repository of tissue-specific gene set weights for 37 different human tissue types from the HPA and all collections in the Molecular Signatures Database. To demonstrate the validity and utility of these weights, we explored three different applications: the functional characterization of human tissues, multi-tissue analysis for systemic diseases and tissue-specific gene set testing. Availability and implementation:All data used in the reported analyses is publicly available. An R implementation of the method and tissue-specific weights for MSigDB gene set collections can be downloaded at http://www.dartmouth.edu/∼hrfrost/TissueSpecificGeneSets.",2018-09-01 +33205902,A nomogram of clinical and biologic factors to predict survival in children newly diagnosed with high-risk neuroblastoma: An International Neuroblastoma Risk Group project.,"

Background

Long-term outcome remains poor for children with high-risk neuroblastoma (five-year overall survival [OS] ∼50%). Our objectives were to (a) identify prognostic biomarkers and apply them in a nomogram to identify the subgroup of ultra-high-risk patients at highest risk of disease progression/death, for whom novel frontline therapy is urgently needed; and (b) validate the nomogram in an independent cohort.

Methods

A total of 1820 high-risk patients (≥18 months old with metastatic neuroblastoma), diagnosed 1998-2015, from the International Neuroblastoma Risk Groups (INRG) Data Commons were analyzed in a retrospective cohort study. Using multivariable Cox regression of OS from diagnosis, a nomogram was created from prognostic biomarkers to predict three-year OS. External validation was performed using the SIOPEN HR-NBL1 trial cohort (n = 521), evidenced by receiver operating characteristic curves.

Results

The nomogram, including MYCN status (P < 0.0001), lactate dehydrogenase (LDH) (P = 0.0007), and presence of bone marrow metastases (P = 0.004), had robust performance and was validated. Applying the nomogram at diagnosis (a) gives prognosis of an individual patient and (b) identifies patients predicted to have poor outcome (three-year OS was 30% ± 5% for patients with a nomogram score of > 82 points; 58% ± 1% for those ≤82 points). Median follow-up time was 5.5 years (range, 0-14.1).

Conclusions

In high-risk neuroblastoma, a novel, publicly available nomogram using prognostic biomarkers (MYCN status, LDH, presence of bone marrow metastases; https://neuroblastoma.shinyapps.io/High-Risk-Neuroblastoma-Nomogram/) has the flexibility to apply a clinically suitable and context-specific cutoff to identify patients at highest risk of death. This will facilitate testing urgently needed new frontline treatment options to improve outcome for these children.",2020-11-18 +25848172,ISOB: A Database of Indigenous Snake Species of Bangladesh with respective known venom composition.,"

Unlabelled

At present there is no well structured database available for the venomous snakes and venom composition of snakes in the world although venom has immense importance in biomedical research. Searching for a specific venom component from NCBI, PDB or public databases is troublesome, because they contain huge amount of data entries. Therefore, we created a database named ""ISOB"" which is a web accessible unique secondary database that represents the first online available bioinformatics resource showing venom composition of snakes. This database provides a comprehensive overview of seventy-eight indigenous snake species covering description of snakes supplemented with structural information of the relevant individual available venom proteins. We strongly believe that this database will contribute significantly in the field of bioinformatics, environmental research, proteomics, drug development and rationale drug designing.

Availability

The database is freely available at http://www.snakebd.com/.",2015-02-28 +24923821,IFIM: a database of integrated fitness information for microbial genes. ,"Knowledge of an organism's fitness for survival is important for a complete understanding of microbial genetics and effective drug design. Current essential gene databases provide only binary essentiality data from genome-wide experiments. We therefore developed a new database that Integrates quantitative Fitness Information for Microbial genes (IFIM). The IFIM database currently contains data from 16 experiments and 2186 theoretical predictions. The highly significant correlation between the experiment-derived fitness data and our computational simulations demonstrated that the computer-generated predictions were often as reliable as the experimental data. The data in IFIM can be accessed easily, and the interface allows users to browse through the gene fitness information that it contains. IFIM is the first resource that allows easy access to fitness data of microbial genes. We believe this database will contribute to a better understanding of microbial genetics and will be useful in designing drugs to resist microbial pathogens, especially when experimental data are unavailable. Database URL: http://cefg.uestc.edu.cn/ifim/ or http://cefg.cn/ifim/",2014-06-11 +32368572,Data for semi-permanent cationic coating for protein separations.,"Protein separations and analyses are fundamental to fields of study that include biochemistry, biology, physiology, drug discovery, pharmaceuticals, as well as agricultural and food based industries. Here, we provide the data from a novel phospholipid-cetyltrimethylammonium bromide coating capable of separating cationic and anionic proteins with high efficiency. Capillary electrophoresis separations of protein standards were utilized to characterize the performance of the novel coating. Using capillary electrophoresis with UV absorbance detection a working pH range of 4-9 was identified, with reproducibility in time ≤1% relative standard deviation, and plate counts for proteins as high as 480,000 plates (lysozyme, pH 7). Further details and results from these data are available in the work reported by Crihfield et al. and can be accessed at https://doi.org/10.1016/j.chroma.2019.460397 [1].",2020-01-11 +29940847,ToTem: a tool for variant calling pipeline optimization.,"BACKGROUND:High-throughput bioinformatics analyses of next generation sequencing (NGS) data often require challenging pipeline optimization. The key problem is choosing appropriate tools and selecting the best parameters for optimal precision and recall. RESULTS:Here we introduce ToTem, a tool for automated pipeline optimization. ToTem is a stand-alone web application with a comprehensive graphical user interface (GUI). ToTem is written in Java and PHP with an underlying connection to a MySQL database. Its primary role is to automatically generate, execute and benchmark different variant calling pipeline settings. Our tool allows an analysis to be started from any level of the process and with the possibility of plugging almost any tool or code. To prevent an over-fitting of pipeline parameters, ToTem ensures the reproducibility of these by using cross validation techniques that penalize the final precision, recall and F-measure. The results are interpreted as interactive graphs and tables allowing an optimal pipeline to be selected, based on the user's priorities. Using ToTem, we were able to optimize somatic variant calling from ultra-deep targeted gene sequencing (TGS) data and germline variant detection in whole genome sequencing (WGS) data. CONCLUSIONS:ToTem is a tool for automated pipeline optimization which is freely available as a web application at  https://totem.software .",2018-06-26 +30689715,Inferring clonal heterogeneity in cancer using SNP arrays and whole genome sequencing.,"MOTIVATION:Clonal heterogeneity is common in many types of cancer, including chronic lymphocytic leukemia (CLL). Previous research suggests that the presence of multiple distinct cancer clones is associated with clinical outcome. Detection of clonal heterogeneity from high throughput data, such as sequencing or single nucleotide polymorphism (SNP) array data, is important for gaining a better understanding of cancer and may improve prediction of clinical outcome or response to treatment. Here, we present a new method, CloneSeeker, for inferring clinical heterogeneity from sequencing data, SNP array data, or both. RESULTS:We generated simulated SNP array and sequencing data and applied CloneSeeker along with two other methods. We demonstrate that CloneSeeker is more accurate than existing algorithms at determining the number of clones, distribution of cancer cells among clones, and mutation and/or copy numbers belonging to each clone. Next, we applied CloneSeeker to SNP array data from samples of 258 previously untreated CLL patients to gain a better understanding of the characteristics of CLL tumors and to elucidate the relationship between clonal heterogeneity and clinical outcome. We found that a significant majority of CLL patients appear to have multiple clones distinguished by copy number alterations alone. We also found that the presence of multiple clones corresponded with significantly worse survival among CLL patients. These findings may prove useful for improving the accuracy of prognosis and design of treatment strategies. AVAILABILITY AND IMPLEMENTATION:Code available on R-Forge: https://r-forge.r-project.org/projects/CloneSeeker/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +30239679,AgBioData consortium recommendations for sustainable genomics and genetics databases for agriculture. ,"The future of agricultural research depends on data. The sheer volume of agricultural biological data being produced today makes excellent data management essential. Governmental agencies, publishers and science funders require data management plans for publicly funded research. Furthermore, the value of data increases exponentially when they are properly stored, described, integrated and shared, so that they can be easily utilized in future analyses. AgBioData (https://www.agbiodata.org) is a consortium of people working at agricultural biological databases, data archives and knowledgbases who strive to identify common issues in database development, curation and management, with the goal of creating database products that are more Findable, Accessible, Interoperable and Reusable. We strive to promote authentic, detailed, accurate and explicit communication between all parties involved in scientific data. As a step toward this goal, we present the current state of biocuration, ontologies, metadata and persistence, database platforms, programmatic (machine) access to data, communication and sustainability with regard to data curation. Each section describes challenges and opportunities for these topics, along with recommendations and best practices.",2018-01-01 +28791657,MEGALEX: A megastudy of visual and auditory word recognition.,"Using the megastudy approach, we report a new database (MEGALEX) of visual and auditory lexical decision times and accuracy rates for tens of thousands of words. We collected visual lexical decision data for 28,466 French words and the same number of pseudowords, and auditory lexical decision data for 17,876 French words and the same number of pseudowords (synthesized tokens were used for the auditory modality). This constitutes the first large-scale database for auditory lexical decision, and the first database to enable a direct comparison of word recognition in different modalities. Different regression analyses were conducted to illustrate potential ways to exploit this megastudy database. First, we compared the proportions of variance accounted for by five word frequency measures. Second, we conducted item-level regression analyses to examine the relative importance of the lexical variables influencing performance in the different modalities (visual and auditory). Finally, we compared the similarities and differences between the two modalities. All data are freely available on our website ( https://sedufau.shinyapps.io/megalex/ ) and are searchable at www.lexique.org , inside the Open Lexique search engine.",2018-06-01 +26673001,G-Links: a gene-centric link acquisition service.,"With the availability of numerous curated databases, researchers are now able to efficiently use the multitude of biological data by integrating these resources via hyperlinks and cross-references. A large proportion of bioinformatics research tasks, however, may include labor-intensive tasks such as fetching, parsing, and merging datasets and functional annotations from distributed multi-domain databases. This data integration issue is one of the key challenges in bioinformatics. We aim to provide an identifier conversion and data aggregation system as a part of solution to solve this problem with a service named G-Links, 1) by gathering resource URI information from 130 databases and 30 web services in a gene-centric manner so that users can retrieve all available links about a given gene, 2) by providing RESTful API for easy retrieval of links including facet searching based on keywords and/or predicate types, and 3) by producing a variety of outputs as visual HTML page, tab-delimited text, and in Semantic Web formats such as Notation3 and RDF. G-Links as well as other relevant documentation are available at http://link.g-language.org/.",2014-11-19 +30304355,LION LBD: a literature-based discovery system for cancer biology.,"

Motivation

The overwhelming size and rapid growth of the biomedical literature make it impossible for scientists to read all studies related to their work, potentially leading to missed connections and wasted time and resources. Literature-based discovery (LBD) aims to alleviate these issues by identifying implicit links between disjoint parts of the literature. While LBD has been studied in depth since its introduction three decades ago, there has been limited work making use of recent advances in biomedical text processing methods in LBD.

Results

We present LION LBD, a literature-based discovery system that enables researchers to navigate published information and supports hypothesis generation and testing. The system is built with a particular focus on the molecular biology of cancer using state-of-the-art machine learning and natural language processing methods, including named entity recognition and grounding to domain ontologies covering a wide range of entity types and a novel approach to detecting references to the hallmarks of cancer in text. LION LBD implements a broad selection of co-occurrence based metrics for analyzing the strength of entity associations, and its design allows real-time search to discover indirect associations between entities in a database of tens of millions of publications while preserving the ability of users to explore each mention in its original context in the literature. Evaluations of the system demonstrate its ability to identify undiscovered links and rank relevant concepts highly among potential connections.

Availability and implementation

The LION LBD system is available via a web-based user interface and a programmable API, and all components of the system are made available under open licenses from the project home page http://lbd.lionproject.net.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +26787663,lnCaNet: pan-cancer co-expression network for human lncRNA and cancer genes.,"

Unlabelled

Thousands of human long non-coding RNAs (lncRNAs) have been identified in cancers and played important roles in a wide range of tumorigenesis. However, the functions of vast majority of human lncRNAs are still elusive. Emerging studies revealed that the expression level of majority lncRNAs shows discordant expression pattern with their protein-coding gene neighbors in various model organisms. Therefore, it may be useful to infer lncRNAs' potential biological function in cancer development by more comprehensive functional views of co-expressed cancer genes beyond mere physical proximity of genes. To this aim, we performed thorough searches and analyses of the interactions between lncRNA and non-neighboring cancer genes and provide a comprehensive co-expression data resource, LnCaNet. In current version, LnCaNet contains the pre-computed 8 494 907 significant co-expression pairs of 9641 lncRNAs and 2544 well-classified cancer genes in 2922 matched TCGA samples. In detail, we integrated 10 cancer gene lists from public database and calculate the co-expression with all the lncRNAs in 11 TCGA cancer types separately. Based on the resulted 110 co-expression networks, we identified 17 common regulatory pairs related to extracellular space shared in 11 cancers. We expect LnCaNet will enable researcher to explore lncRNA expression pattern, their affected cancer genes and pathways, biological significance in the context of specific cancer types and other useful annotation related to particular kind of lncRNA-cancer gene interaction.

Availability and implementation

http://lncanet.bioinfo-minzhao.org/

Contact

: m.zhao@uq.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-18 +32266012,The UCSC repeat browser allows discovery and visualization of evolutionary conflict across repeat families.,"

Background

Nearly half the human genome consists of repeat elements, most of which are retrotransposons, and many of which play important biological roles. However repeat elements pose several unique challenges to current bioinformatic analyses and visualization tools, as short repeat sequences can map to multiple genomic loci resulting in their misclassification and misinterpretation. In fact, sequence data mapping to repeat elements are often discarded from analysis pipelines. Therefore, there is a continued need for standardized tools and techniques to interpret genomic data of repeats.

Results

We present the UCSC Repeat Browser, which consists of a complete set of human repeat reference sequences derived from annotations made by the commonly used program RepeatMasker. The UCSC Repeat Browser also provides an alignment from the human genome to these references, uses it to map the standard human genome annotation tracks, and presents all of them as a comprehensive interface to facilitate work with repetitive elements. It also provides processed tracks of multiple publicly available datasets of particular interest to the repeat community, including ChIP-seq datasets for KRAB Zinc Finger Proteins (KZNFs) - a family of proteins known to bind and repress certain classes of repeats. We used the UCSC Repeat Browser in combination with these datasets, as well as RepeatMasker annotations in several non-human primates, to trace the independent trajectories of species-specific evolutionary battles between LINE 1 retroelements and their repressors. Furthermore, we document at https://repeatbrowser.ucsc.edu how researchers can map their own human genome annotations to these reference repeat sequences.

Conclusions

The UCSC Repeat Browser allows easy and intuitive visualization of genomic data on consensus repeat elements, circumventing the problem of multi-mapping, in which sequencing reads of repeat elements map to multiple locations on the human genome. By developing a reference consensus, multiple datasets and annotation tracks can easily be overlaid to reveal complex evolutionary histories of repeats in a single interactive window. Specifically, we use this approach to retrace the history of several primate specific LINE-1 families across apes, and discover several species-specific routes of evolution that correlate with the emergence and binding of KZNFs.",2020-03-31 +31913465,Gapsplit: efficient random sampling for non-convex constraint-based models.,"

Summary

Gapsplit generates random samples from convex and non-convex constraint-based models by targeting under-sampled regions of the solution space. Gapsplit provides uniform coverage of linear, mixed-integer and general non-linear models.

Availability and implementation

Python and Matlab source code are freely available at http://jensenlab.net/tools.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +31072179,A Large-Scale Comparison of Main Concept Production Between Persons With Aphasia and Persons Without Brain Injury.,"Purpose The purposes of this study are to provide clinicians and researchers with introductory psychometric data for the main concept analysis (MCA), a measure of discourse informativeness, and specifically, to provide descriptive and comparative statistical information about the performance of a large sample of persons not brain injured (PNBIs) and persons with aphasia (PWAs) on AphasiaBank discourse tasks. Method Transcripts of 5 semi-spontaneous discourse tasks were retrieved from the AphasiaBank database and scored according to detailed checklists and scoring procedures. Transcripts from 145 PNBIs and 238 PWAs were scored; descriptive statistics, median tests, and effect sizes are reported. Results PWAs demonstrated overall lower informativeness scores and more frequent production of statements that were inaccurate and/or incomplete. Differences between PNBIs and PWAs were observed for all main concept measures and stories. Comparisons of PNBIs and aphasia subtypes revealed significant differences for all groups, although the pattern of differences and strength of effect sizes varied by group and discourse task. Conclusions These results may improve the investigative and clinical utility of the MCA by providing descriptive and comparative information for PNBIs and PWAs for standardized discourse tasks that can be reliably scored. The results indicate that the MCA is sensitive to differences in discourse as a result of aphasia. Supplemental Material https://doi.org/10.23641/asha.7485647.",2019-03-01 +32083704,Identifying cancer driver genes from functional genomics screens.,"With the emerging advances made in genomics and functional genomics approaches, there is a critical and growing unmet need to integrate plural datasets in order to identify driver genes in cancer. An integrative approach, with the convergence of multiple types of genetic evidence, can limit false positives through a posterior filtering strategy and reduce the need for multiple hypothesis testing to identify true cancer vulnerabilities. We performed a pooled shRNA screen against 906 human genes in the oral cancer cell line AW13516 in triplicate. The genes that were depleted in the screen were integrated with copy number alteration and gene expression data and ranked based on ROAST analysis, using an integrative scoring system, DepRanker, to compute a Rank Impact Score (RIS) for each gene. The RIS-based ranking of candidate driver genes was used to identify the putative oncogenes AURKB and TK1 as essential for oral cancer cell proliferation. We validated the findings, showing that shRNA mediated genetic knockdown of TK1 or pharmacological inhibition of AURKB by AZD-1152 HQPA in AW13516 cells could significantly impede their proliferation. Next we analysed alterations in AURKB and TK1 genes in head and neck cancer and their association with prognosis using data on 528 patients obtained from TCGA. Patients harbouring alterations in AURKB and TK1 genes were associated with poor survival. To summarise, we present DepRanker as a simple yet robust package with no third-party dependencies for the identification of potential driver genes from a pooled shRNA functional genomic screen by integrating results from RNAi screens with gene expression and copy number data. Using DepRanker, we identify AURKB and TK1 as potential therapeutic targets in oral cancer. DepRanker is in the public domain and available for download at http://www.actrec.gov.in/pi-webpages/AmitDutt/DepRanker/DepRanker.html.",2020-02-21 +30737407,Functional genomics reveal gene regulatory mechanisms underlying schizophrenia risk.,"Genome-wide association studies (GWASs) have identified over 180 independent schizophrenia risk loci. Nevertheless, how the risk variants in the reported loci confer schizophrenia susceptibility remains largely unknown. Here we systematically investigate the gene regulatory mechanisms underpinning schizophrenia risk through integrating data from functional genomics (including 30 ChIP-Seq experiments) and position weight matrix (PWM). We identify 132 risk single nucleotide polymorphisms (SNPs) that disrupt transcription factor binding and we find that 97 of the 132 TF binding-disrupting SNPs are associated with gene expression in human brain tissues. We validate the regulatory effect of some TF binding-disrupting SNPs with reporter gene assays (9 SNPs) and allele-specific expression analysis (10 SNPs). Our study reveals gene regulatory mechanisms affected by schizophrenia risk SNPs (including widespread disruption of POLR2A and CTCF binding) and identifies target genes for mechanistic studies and drug development. Our results can be accessed and visualized at SZDB database ( http://www.szdb.org/ ).",2019-02-08 +33735166,"Malaria Surveillance - United States, 2017.","

Problem/condition

Malaria in humans is caused by intraerythrocytic protozoa of the genus Plasmodium. These parasites are transmitted by the bite of an infective female Anopheles species mosquito. The majority of malaria infections in the United States occur among persons who have traveled to regions with ongoing malaria transmission. However, malaria is occasionally acquired by persons who have not traveled out of the country through exposure to infected blood products, congenital transmission, nosocomial exposure, or local mosquitoborne transmission. Malaria surveillance in the United States is conducted to provide information on its occurrence (e.g., temporal, geographic, and demographic), guide prevention and treatment recommendations for travelers and patients, and facilitate rapid transmission control measures if locally acquired cases are identified.

Period covered

This report summarizes confirmed malaria cases in persons with onset of illness in 2017 and trends in previous years.

Description of system

Malaria cases diagnosed by blood film microscopy, polymerase chain reaction, or rapid diagnostic tests are reported to local and state health departments through electronic laboratory reports or by health care providers or laboratory staff members. Case investigations are conducted by local and state health departments, and reports are transmitted to CDC through the National Malaria Surveillance System (NMSS), the National Notifiable Diseases Surveillance System (NNDSS), or direct CDC consultations. CDC reference laboratories provide diagnostic assistance and conduct antimalarial drug resistance marker testing on blood samples submitted by health care providers or local or state health departments. This report summarizes data from the integration of all cases from NMSS and NNDSS, CDC reference laboratory reports, and CDC clinical consultations.

Results

CDC received reports of 2,161 confirmed malaria cases with onset of symptoms in 2017, including two congenital cases, three cryptic cases, and two cases acquired through blood transfusion. The number of malaria cases diagnosed in the United States has been increasing since the mid-1970s; in 2017, the number of cases reported was the highest in 45 years, surpassing the previous peak of 2,078 confirmed cases reported in 2016. Of the cases in 2017, a total of 1,819 (86.1%) were imported cases that originated from Africa; 1,216 (66.9%) of these came from West Africa. The overall proportion of imported cases originating from West Africa was greater in 2017 (57.6%) than in 2016 (51.6%). Among all cases, P. falciparum accounted for the majority of infections (1,523 [70.5%]), followed by P. vivax (216 [10.0%]), P. ovale (119 [5.5%]), and P. malariae (55 [2.6%]). Infections by two or more species accounted for 22 cases (1.0%). The infecting species was not reported or was undetermined in 226 cases (10.5%). CDC provided diagnostic assistance for 9.5% of confirmed cases and tested 8.0% of specimens with P. falciparum infections for antimalarial resistance markers. Most patients (94.8%) had symptom onset <90 days after returning to the United States from a country with malaria transmission. Of the U.S. civilian patients who reported reason for travel, 73.1% were visiting friends and relatives. The proportion of U.S. residents with malaria who reported taking any chemoprophylaxis in 2017 (28.4%) was similar to that in 2016 (26.4%), and adherence was poor among those who took chemoprophylaxis. Among the 996 U.S. residents with malaria for whom information on chemoprophylaxis use and travel region were known, 93.3% did not adhere to or did not take a CDC-recommended chemoprophylaxis regimen. Among 805 women with malaria, 27 reported being pregnant. Of these, 10 pregnant women were U.S. residents, and none reported taking chemoprophylaxis to prevent malaria. A total of 26 (1.2%) malaria cases occurred among U.S. military personnel in 2017, fewer than in 2016 (41 [2.0%]). Among all reported cases in 2017, a total of 312 (14.4%) were classified as severe malaria illnesses, and seven persons died. In 2017, CDC analyzed 117 P. falciparum-positive and six P. falciparum mixed-species samples for antimalarial resistance markers (although certain loci were untestable in some samples); identification of genetic polymorphisms associated with resistance to pyrimethamine were found in 108 (97.3%), to sulfadoxine in 77 (69.4%), to chloroquine in 38 (33.3%), to mefloquine in three (2.7%), and to atovaquone in three (2.7%); no specimens tested contained a marker for artemisinin resistance. The data completeness of key variables (species, country of acquisition, and resident status) was lower in 2017 (74.4%) than in 2016 (79.4%).

Interpretation

The number of reported malaria cases in 2017 continued a decades-long increasing trend, and for the second year in a row the highest number of cases since 1971 have been reported. Despite progress in malaria control in recent years, the disease remains endemic in many areas globally. The importation of malaria reflects the overall increase in global travel to and from these areas. Fifty-six percent of all cases were among persons who had traveled from West Africa, and among U.S. civilians, visiting friends and relatives was the most common reason for travel (73.1%). Frequent international travel combined with the inadequate use of prevention measures by travelers resulted in the highest number of imported malaria cases detected in the United States in 4 decades.

Public health actions

The best way to prevent malaria is to take chemoprophylaxis medication during travel to a country where malaria is endemic. Adherence to recommended malaria prevention strategies among U.S. travelers would reduce the numbers of imported cases; reasons for nonadherence include prematurely stopping after leaving the area where malaria was endemic, forgetting to take the medication, and experiencing a side effect. Travelers might not understand the risk that malaria poses to them; thus, health care providers should incorporate risk education to motivate travelers to be adherent to chemoprophylaxis. Malaria infections can be fatal if not diagnosed and treated promptly with antimalarial medications appropriate for the patient's age, medical history, the likely country of malaria acquisition, and previous use of antimalarial chemoprophylaxis. Antimalarial use for chemoprophylaxis and treatment should be informed by the most recent guidelines, which are frequently updated. In 2018, two formulations of tafenoquine (i.e., Arakoda and Krintafel) were approved by the Food and Drug Administration (FDA) for use in the United States. Arakoda was approved for use by adults for chemoprophylaxis; the regimen requires a predeparture loading dose, taking the medication weekly during travel, and a short course posttravel. The Arakoda chemoprophylaxis regimen is shorter than alternative regimens, which could possibly improve adherence. This medication also might prevent relapses. Krintafel was approved for radical cure of P. vivax infections in those aged >16 years and should be co-administered with chloroquine (https://www.cdc.gov/malaria/new_info/2020/tafenoquine_2020.html). In April 2019, intravenous artesunate became the first-line medication for treatment of severe malaria in the United States. Artesunate was recently FDA approved but is not yet commercially available. The drug can be obtained from CDC under an investigational new drug protocol. Detailed recommendations for preventing malaria are available to the general public at the CDC website (https://www.cdc.gov/malaria/travelers/drugs.html). Health care providers should consult the CDC Guidelines for Treatment of Malaria in the United States and contact the CDC's Malaria Hotline for case management advice when needed. Malaria treatment recommendations are available online (https://www.cdc.gov/malaria/diagnosis_treatment) and from the Malaria Hotline (770-488-7788 or toll-free 855-856-4713). Persons submitting malaria case reports (care providers, laboratories, and state and local public health officials) should provide complete information because incomplete reporting compromises case investigations and efforts to prevent infections and examine trends in malaria cases. Molecular surveillance of antimalarial drug resistance markers (https://www.cdc.gov/malaria/features/ars.html) enables CDC to track, guide treatment, and manage drug resistance in malaria parasites both domestically and internationally. More samples are needed to improve the completeness of antimalarial drug resistance analysis; therefore, CDC requests that blood specimens be submitted for any case of malaria diagnosed in the United States.",2021-03-19 +32379489,Challenges Raised by Mediation Analysis in a High-Dimension Setting.,"

Background

Mediation analysis is used in epidemiology to identify pathways through which exposures influence health. The advent of high-throughput (omics) technologies gives opportunities to perform mediation analysis with a high-dimension pool of covariates.

Objective

We aimed to highlight some biostatistical issues of this expanding field of high-dimension mediation.

Discussion

The mediation techniques used for a single mediator cannot be generalized in a straightforward manner to high-dimension mediation. Causal knowledge on the relation between covariates is required for mediation analysis, and it is expected to be more limited as dimension and system complexity increase. The methods developed in high dimension can be distinguished according to whether mediators are considered separately or as a whole. Methods considering each potential mediator separately do not allow efficient identification of the indirect effects when mutual influences exist among the mediators, which is expected for many biological (e.g., epigenetic) parameters. In this context, methods considering all potential mediators simultaneously, based, for example, on data reduction techniques, are more adapted to the causal inference framework. Their cost is a possible lack of ability to single out the causal mediators. Moreover, the ability of the mediators to predict the outcome can be overestimated, in particular because many machine-learning algorithms are optimized to increase predictive ability rather than their aptitude to make causal inference. Given the lack of overarching validated framework and the generally complex causal structure of high-dimension data, analysis of high-dimension mediation currently requires great caution and effort to incorporate a priori biological knowledge. https://doi.org/10.1289/EHP6240.",2020-05-06 +32133329,Palliative Care in SMA Type 1: A Prospective Multicenter French Study Based on Parents' Reports.,"Spinal muscular atrophy type 1 (SMA-1) is a severe neurodegenerative disorder, which in the absence of curative treatment, leads to death before 1 year of age in most cases. Caring for these short-lived and severely impaired infants requires palliative management. New drugs (nusinersen) have recently been developed that may modify SMA-1 natural history and thus raise ethical concerns about the appropriate level of care for patients. The national Hospital Clinical Research Program (PHRC) called ""Assessment of clinical practices of palliative care in children with Spinal Muscular Atrophy Type 1 (SMA-1)"" was a multicenter prospective study conducted in France between 2012 and 2016 to report palliative practices in SMA-1 in real life through prospective caregivers' reports about their infants' management. Thirty-nine patients were included in the prospective PHRC (17 centers). We also studied retrospective data regarding management of 43 other SMA-1 patients (18 centers) over the same period, including seven treated with nusinersen, in comparison with historical data from 222 patients previously published over two periods of 10 years (1989-2009). In the latest period studied, median age at diagnosis was 3 months [0.6-10.4]. Seventy-seven patients died at a median 6 months of age[1-27]: 32% at home and 8% in an intensive care unit. Eighty-five percent of patients received enteral nutrition, some through a gastrostomy (6%). Sixteen percent had a non-invasive ventilation (NIV). Seventy-seven percent received sedative treatment at the time of death. Over time, palliative management occurred more frequently at home with increased levels of technical supportive care (enteral nutrition, oxygenotherapy, and analgesic and sedative treatments). No statistical difference was found between the prospective and retrospective patients for the last period. However, significant differences were found between patients treated with nusinersen vs. those untreated. Our data confirm that palliative care is essential in management of SMA-1 patients and that parents are extensively involved in everyday patient care. Our data suggest that nusinersen treatment was accompanied by significantly more invasive supportive care, indicating that a re-examination of standard clinical practices should explicitly consider what treatment pathways are in infants' and caregivers' best interest. This study was registered on clinicaltrials.gov under the reference NCT01862042 (https://clinicaltrials.gov/ct2/show/study/NCT01862042?cond=SMA1&rank=8).",2020-02-18 +24628857,AlgaePath: comprehensive analysis of metabolic pathways using transcript abundance data from next-generation sequencing in green algae.,"

Background

Algae are important non-vascular plants that have many research applications, including high species diversity, biofuel sources, and adsorption of heavy metals and, following processing, are used as ingredients in health supplements. The increasing availability of next-generation sequencing (NGS) data for algae genomes and transcriptomes has made the development of an integrated resource for retrieving gene expression data and metabolic pathway essential for functional analysis and systems biology. In a currently available resource, gene expression profiles and biological pathways are displayed separately, making it impossible to easily search current databases to identify the cellular response mechanisms. Therefore, in this work the novel AlgaePath database was developed to retrieve transcript abundance profiles efficiently under various conditions in numerous metabolic pathways.

Description

AlgaePath is a web-based database that integrates gene information, biological pathways, and NGS datasets for the green algae Chlamydomonas reinhardtii and Neodesmus sp. UTEX 2219-4. Users can search this database to identify transcript abundance profiles and pathway information using five query pages (Gene Search, Pathway Search, Differentially Expressed Genes (DEGs) Search, Gene Group Analysis, and Co-expression Analysis). The transcript abundance data of 45 and four samples from C. reinhardtii and Neodesmus sp. UTEX 2219-4, respectively, can be obtained directly on pathway maps. Genes that are differentially expressed between two conditions can be identified using Folds Search. The Gene Group Analysis page includes a pathway enrichment analysis, and can be used to easily compare the transcript abundance profiles of functionally related genes on a map. Finally, the Co-expression Analysis page can be used to search for co-expressed transcripts of a target gene. The results of the searches will provide a valuable reference for designing further experiments and for elucidating critical mechanisms from high-throughput data.

Conclusions

AlgaePath is an effective interface that can be used to clarify the transcript response mechanisms in different metabolic pathways under various conditions. Importantly, AlgaePath can be mined to identify critical mechanisms based on high-throughput sequencing. To our knowledge, AlgaePath is the most comprehensive resource for integrating numerous databases and analysis tools in algae. The system can be accessed freely online at http://algaepath.itps.ncku.edu.tw.",2014-03-14 +32556075,Capybara: equivalence ClAss enumeration of coPhylogenY event-BAsed ReconciliAtions.,"

Motivation

Phylogenetic tree reconciliation is the method of choice in analyzing host-symbiont systems. Despite the many reconciliation tools that have been proposed in the literature, two main issues remain unresolved: (i) listing suboptimal solutions (i.e. whose score is 'close' to the optimal ones) and (ii) listing only solutions that are biologically different 'enough'. The first issue arises because the optimal solutions are not always the ones biologically most significant; providing many suboptimal solutions as alternatives for the optimal ones is thus very useful. The second one is related to the difficulty to analyze an often huge number of optimal solutions. In this article, we propose Capybara that addresses both of these problems in an efficient way. Furthermore, it includes a tool for visualizing the solutions that significantly helps the user in the process of analyzing the results.

Availability and implementation

The source code, documentation and binaries for all platforms are freely available at https://capybara-doc.readthedocs.io/.

Contact

yishu.wang@univ-lyon1.fr or blerina.sinaimeri@inria.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +27943495,The mitochondrial complexome of Arabidopsis thaliana.,"Mitochondria are central to cellular metabolism and energy conversion. In plants they also enable photosynthesis through additional components and functional flexibility. A majority of those processes relies on the assembly of individual proteins to larger protein complexes, some of which operate as large molecular machines. There has been a strong interest in the makeup and function of mitochondrial protein complexes and protein-protein interactions in plants, but the experimental approaches used typically suffer from selectivity or bias. Here, we present a complexome profiling analysis for leaf mitochondria of the model plant Arabidopsis thaliana for the systematic characterization of protein assemblies. Purified organelle extracts were separated by 1D Blue native (BN) PAGE, a resulting gel lane was dissected into 70 slices (complexome fractions) and proteins in each slice were identified by label free quantitative shot-gun proteomics. Overall, 1359 unique proteins were identified, which were, on average, present in 17 complexome fractions each. Quantitative profiles of proteins along the BN gel lane were aligned by similarity, allowing us to visualize protein assemblies. The data allow re-annotating the subunit compositions of OXPHOS complexes, identifying assembly intermediates of OXPHOS complexes and assemblies of alternative respiratory oxidoreductases. Several protein complexes were discovered that have not yet been reported in plants, such as a 530 kDa Tat complex, 460 and 1000 kDa SAM complexes, a calcium ion uniporter complex (150 kDa) and several PPR protein complexes. We have set up a tailored online resource (https://complexomemap.de/at_mito_leaves) to deposit the data and to allow straightforward access and custom data analyses.",2017-02-20 +25740498,"Data mining in newt-omics, the repository for omics data from the newt.","Salamanders are an excellent model organism to study regenerative processes due to their unique ability to regenerate lost appendages or organs. Straightforward bioinformatics tools to analyze and take advantage of the growing number of ""omics"" studies performed in salamanders were lacking so far. To overcome this limitation, we have generated a comprehensive data repository for the red-spotted newt Notophthalmus viridescens, named newt-omics, merging omics style datasets on the transcriptome and proteome level including expression values and annotations. The resource is freely available via a user-friendly Web-based graphical user interface ( http://newt-omics.mpi-bn.mpg.de) that allows access and queries to the database without prior bioinformatical expertise. The repository is updated regularly, incorporating new published datasets from omics technologies.",2015-01-01 +31990536,SPREAD: A Fully Automated Toolkit for Single-Particle Cryogenic Electron Microscopy Data 3D Reconstruction with Image-Network-Aided Orientation Assignment.,"For the past decade, cryogenic electron microscopy (cryo-EM) has become an important technology to determine three-dimensional (3D) structures of biomacromolecules. Many software tools have been developed for cryo-EM image processing and 3D reconstruction, covering various computational tasks in cryo-EM data analysis. Despite the recent progress, most of these software tools focus on a single task, such as automatic particle picking or image clustering, whereas software packages covering the whole pipeline of cryo-EM data processing are still few. In this study, we developed a fully automatic single-particle reconstruction and analysis toolkit for cryo-EM data, named SPREAD, which integrates 2D image classification, 3D initial model generation, model selection, and 3D refinement. In SPREAD, we adopt our previously proposed network-based clustering algorithm for 2D image classification, NCEM, and the reference-free resolution measurement method SRes to realize the automatic model ranking and selection procedure. Projection orientation assignment is one of the key steps in initial model generation and 3D refinement. In SPREAD, we use the network-based image similarity metric and introduce a new probabilistic-based orientation searching method, named peak finding, to enhance assignment of the projection orientations. For dealing with both the particle images and projection images in the 3D refinement using SPREAD, we build a mixture image network containing both of these types of images on the basis of the peak-finding results, and then similarities for node pairs are recomputed by a superposed random walk on the network. SPREAD achieves a fully automatic workflow in which nearly no expert domain knowledge and interactive manual operation are involved. Our software can accessed for free at http://www.csbio.sjtu.edu.cn/bioinf/SPREAD/ for academic use.",2020-02-19 +29628801,Mapping watershed integrity for the conterminous United States.,"Watershed integrity is the capacity of a watershed to support and maintain the full range of ecological processes and functions essential to sustainability. Using information from EPA's StreamCat dataset, we calculated and mapped an Index of Watershed Integrity (IWI) for 2.6 million watersheds in the conterminous US with first-order approximations of relationships between stressors and six watershed functions: hydrologic regulation, regulation of water chemistry, sediment regulation, hydrologic connectivity, temperature regulation, and habitat provision. Results show high integrity in the western US, intermediate integrity in the southern and eastern US, and the lowest integrity in the temperate plains and lower Mississippi Valley. Correlation between the six functional components was high (r = 0.85-0.98). A related Index of Catchment Integrity (ICI) was developed using local drainages of individual stream segments (i.e., excluding upstream information). We evaluated the ability of the IWI and ICI to predict six continuous site-level indicators with regression analyses - three biological indicators and principal components derived from water quality, habitat, and combined water quality and habitat variables - using data from EPA's National Rivers and Streams Assessment. Relationships were highly significant, but the IWI only accounted for 1-12% of the variation in the four biological and habitat variables. The IWI accounted for over 25% of the variation in the water quality and combined principal components nationally, and 32-39% in the Northern and Southern Appalachians. We also used multinomial logistic regression to compare the IWI with the categorical forms of the three biological indicators. Results were consistent: we found positive associations but modest results. We compared how the IWI and ICI predicted the water quality PC relative to agricultural and urban land use. The IWI or ICI are the best predictors of the water quality PC for the CONUS and six of the nine ecoregions, but they only perform marginally better than agriculture in most instances. However, results suggest that agriculture would not be appropriate in all parts of the country, and the index is meant to be responsive to all stressors. The IWI in its present form (available through the StreamCat website; https://www.epa.gov/national-aquatic-resource-surveys/streamcat) could be useful for management efforts at multiple scales, especially when combined with information on site condition. The IWI could be improved by incorporating empirical or literature-derived relationships between functional components and stressors. However, limitations concerning the absence of data for certain stressors should be considered.",2018-02-01 +35126084,The Effect of Training Sample Size on the Prediction of White Matter Hyperintensity Volume in a Healthy Population Using BIANCA.,"Introduction: White matter hyperintensities of presumed vascular origin (WMH) are an important magnetic resonance imaging marker of cerebral small vessel disease and are associated with cognitive decline, stroke, and mortality. Their relevance in healthy individuals, however, is less clear. This is partly due to the methodological challenge of accurately measuring rare and small WMH with automated segmentation programs. In this study, we tested whether WMH volumetry with FMRIB software library v6.0 (FSL; https://fsl.fmrib.ox.ac.uk/fsl/fslwiki) Brain Intensity AbNormality Classification Algorithm (BIANCA), a customizable and trainable algorithm that quantifies WMH volume based on individual data training sets, can be optimized for a normal aging population. Methods: We evaluated the effect of varying training sample sizes on the accuracy and the robustness of the predicted white matter hyperintensity volume in a population (n = 201) with a low prevalence of confluent WMH and a substantial proportion of participants without WMH. BIANCA was trained with seven different sample sizes between 10 and 40 with increments of 5. For each sample size, 100 random samples of T1w and FLAIR images were drawn and trained with manually delineated masks. For validation, we defined an internal and external validation set and compared the mean absolute error, resulting from the difference between manually delineated and predicted WMH volumes for each set. For spatial overlap, we calculated the Dice similarity index (SI) for the external validation cohort. Results: The study population had a median WMH volume of 0.34 ml (IQR of 1.6 ml) and included n = 28 (18%) participants without any WMH. The mean absolute error of the difference between BIANCA prediction and manually delineated masks was minimized and became more robust with an increasing number of training participants. The lowest mean absolute error of 0.05 ml (SD of 0.24 ml) was identified in the external validation set with a training sample size of 35. Compared to the volumetric overlap, the spatial overlap was poor with an average Dice similarity index of 0.14 (SD 0.16) in the external cohort, driven by subjects with very low lesion volumes. Discussion: We found that the performance of BIANCA, particularly the robustness of predictions, could be optimized for use in populations with a low WMH load by enlargement of the training sample size. Further work is needed to evaluate and potentially improve the prediction accuracy for low lesion volumes. These findings are important for current and future population-based studies with the majority of participants being normal aging people.",2021-01-01 +33342524,A Cluster Randomized Controlled Trial of a Home-Delivered Food Box on Food Security in Chickasaw Nation.,"

Background

The 2010 Child Nutrition reauthorization called for the independent evaluation of innovative strategies to reduce the risk of childhood hunger or improve the food security status of households with children.

Objective

The research question was whether the Packed Promise intervention reduces child food insecurity (FI-C) among low-income households with children.

Design

This study was a cluster randomized controlled trial of 40 school districts and 4,750 eligible, consented households within treatment and control schools.

Participants/setting

Data were collected at baseline (n = 2,859) and 2 follow-ups (n = 2,852; n = 2,790) from households with children eligible for free school meals in participating schools in 12 rural counties within the Chickasaw Nation territory in south central Oklahoma in 2016 to 2018.

Intervention

Each month of the 25-month intervention, for each eligible child, enrolled households could choose from 5 types of food boxes that contained shelf-stable, nutritious foods ($38 food value) and a $15 check for purchasing fruits and vegetables.

Main outcome measures

The primary outcome was FI-C. Other outcomes included household and adult food security, very low food security among children, and food expenditures.

Statistical analyses performed

Differences between the treatment and control groups were estimated by a regression model controlling for baseline characteristics.

Results

The Packed Promise project did not significantly reduce FI-C at 12 months (29.3% prevalence in the treatment group compared with 30.1% in the control group; P = 0.123) or at 18 months (28.2% vs 28.7%; P = 0.276), but reduced food insecurity for adults by 3 percentage points at 12 months (P = 0.002) but not at 18 months (P = 0.354). The intervention led to a $27 and a $16 decline in median household monthly out-of-pocket food expenditures at 12 and 18 months, respectively.

Conclusions

An innovative intervention successfully delivered nutritious food boxes to low-income households with children in rural Oklahoma, but did not significantly reduce FI-C. Improving economic conditions in the demonstration area and participation in other nutrition assistance programs among treatment and control groups might explain the lack of impact.ClinicalTrials.gov ID: NCT04316819 (http://www.clinicaltrials.gov).

Funding/support

This article is published as part of a supplement supported by the US Department of Agriculture, Food and Nutrition Service.",2021-01-01 +31707700,Progress in Allosteric Database.,"An allosteric mechanism refers to the biological regulation process wherein macromolecules propagate the effect of ligand binding at one site to a spatially distant orthosteric locus, thus affecting activity. The theory has remained a trending topic in biology research for over 50 years, since the understanding of allostery is fundamental for gleaning numerous biological processes and developing new drug therapies. In the past two decades, the allosteric paradigm has evolved into more descriptive models, with ever-expanding amounts of experimental data pertaining to newly identified allosteric molecules. The AlloSteric Database (ASD, accessible at http://mdl.shsmu.edu.cn/ASD ), which is a comprehensive knowledge repository, has provided the public with integrated information encompassing allosteric proteins, modulators, sites, pathways, and networks to investigate allostery since 2009. In this chapter, we introduce the history and usage of the ASD and give attention to specific applications that have benefited from the ASD.",2019-01-01 +30295851,The jPOST environment: an integrated proteomics data repository and database.,"Rapid progress is being made in mass spectrometry (MS)-based proteomics, yielding an increasing number of larger datasets with higher quality and higher throughput. To integrate proteomics datasets generated from various projects and institutions, we launched a project named jPOST (Japan ProteOme STandard Repository/Database, https://jpostdb.org/) in 2015. Its proteomics data repository, jPOSTrepo, began operations in 2016 and has accepted more than 10 TB of MS-based proteomics datasets in the past two years. In addition, we have developed a new proteomics database named jPOSTdb in which the published raw datasets in jPOSTrepo are reanalyzed using standardized protocol. jPOSTdb provides viewers showing the frequency of detected post-translational modifications, the co-occurrence of phosphorylation sites on a peptide and peptide sharing among proteoforms. jPOSTdb also provides basic statistical analysis tools to compare proteomics datasets.",2019-01-01 +30446142,The exopolysaccharide properties and structures database: EPS-DB. Application to bacterial exopolysaccharides.,"The EPS Database (EPS-DB) is a web-based, platform-independent database of bacterial exopolysaccharides (EPSs) providing access to detailed structural, taxonomic, growth conditions, functional properties, genetic, and bibliographic information for EPSs. It is freely available on the Internet as a website at http://www.epsdatabase.com. Several structural data representation schemes are used following the most commonly accepted formats. This guarantees full interoperability with other structural, experimental, and functional databases in the area of glycoscience. The scientific usage of EPS-DB throughout a user-friendly interface is presented with a subsection of the database exemplified by EPSs from lactic acid bacteria.",2018-10-28 +32974099,A survey of RNA secondary structural propensity encoded within human herpesvirus genomes: global comparisons and local motifs.,"There are nine herpesviruses known to infect humans, of which Epstein-Barr virus (EBV) is the most widely distributed (>90% of adults infected). This ubiquitous virus is implicated in a variety of cancers and autoimmune diseases. Previous analyses of the EBV genome revealed numerous regions with evidence of generating unusually stable and conserved RNA secondary structures and led to the discovery of a novel class of EBV non-coding (nc)RNAs: the stable intronic sequence (sis)RNAs. To gain a better understanding of the roles of RNA structure in EBV biology and pathogenicity, we revisit EBV using recently developed tools for genome-wide motif discovery and RNA structural characterization. This corroborated previous results and revealed novel motifs with potential functionality; one of which has been experimentally validated. Additionally, since many herpesviruses increasingly rival the seroprevalence of EBV (VZV, HHV-6 and HHV-7 being the most notable), analyses were expanded to include all sequenced human Herpesvirus RefSeq genomes, allowing for genomic comparisons. In total 10 genomes were analyzed, for EBV (types 1 and 2), HCMV, HHV-6A, HHV-6B, HHV-7, HSV-1, HSV-2, KSHV, and VZV. All resulting data were archived in the RNAStructuromeDB (https://structurome.bb.iastate.edu/herpesvirus) to make them available to a wide array of researchers.",2020-09-10 +29912383,"GeneSpy, a user-friendly and flexible genomic context visualizer.","

Summary

The exploration and comparison of genome organization is routinely used in the frame of genomic and phylogenomic analyses. As a consequence, in the past few years, various tools allowing visualizing genomic contexts have been developed. However, their use is often hampered by a lack of flexibility, particularly concerning associated databases input formats and figure customization. Here we present GeneSpy, a graphical user interface that allows the visualization and dynamic exploration of eukaryotic and prokaryotic annotated genomes. GeneSpy relies on user-friendly manageable local databases and allows the easy customization and production of figures in a multitude of formats.

Availability and implementation

GeneSpy is freely available at https://lbbe.univ-lyon1.fr/GeneSpy/ for Linux, Mac OS and Windows under CeCILL license (http://www.cecill.info/licences/). It is written in Python 2.7 and depends on Matplotlib, Tkinter and Sqlite libraries.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +33062451,Predicting CoVID-19 community mortality risk using machine learning and development of an online prognostic tool.,"

Background

The recent pandemic of CoVID-19 has emerged as a threat to global health security. There are very few prognostic models on CoVID-19 using machine learning.

Objectives

To predict mortality among confirmed CoVID-19 patients in South Korea using machine learning and deploy the best performing algorithm as an open-source online prediction tool for decision-making.

Materials and methods

Mortality for confirmed CoVID-19 patients (n = 3,524) between January 20, 2020 and May 30, 2020 was predicted using five machine learning algorithms (logistic regression, support vector machine, K nearest neighbor, random forest and gradient boosting). The performance of the algorithms was compared, and the best performing algorithm was deployed as an online prediction tool.

Results

The logistic regression algorithm was the best performer in terms of discrimination (area under ROC curve = 0.830), calibration (Matthews Correlation Coefficient = 0.433; Brier Score = 0.036) and. The best performing algorithm (logistic regression) was deployed as the online CoVID-19 Community Mortality Risk Prediction tool named CoCoMoRP (https://ashis-das.shinyapps.io/CoCoMoRP/).

Conclusions

We describe the development and deployment of an open-source machine learning tool to predict mortality risk among CoVID-19 confirmed patients using publicly available surveillance data. This tool can be utilized by potential stakeholders such as health providers and policymakers to triage patients at the community level in addition to other approaches.",2020-09-28 +33296240,Low-Dose Bisphenol A in a Rat Model of Endometrial Cancer: A CLARITY-BPA Study.,"

Background

Bisphenol A (BPA) is known to be biologically active in experimental models even at low levels of exposure. However, its impact on endometrial cancer remains unclear.

Objectives

This study aimed to investigate whether lifelong exposure to different doses of BPA induced uterine abnormalities and molecular changes in a rat model.

Methods

Sprague-Dawley rats were exposed to 5 doses of BPA [0, 25, 250, 2,500, or 25,000μg/kg body weight (BW)/d] or 2 doses of 17α-ethynylestradiol (EE2) (0.05 and 0.5μg/kg BW/d) starting from gestational day 6 up to 1 y old according to the CLARITY-BPA consortium protocol. The BW, uterus weight, and histopathology end points of the uteri were analyzed at postnatal (PND) day 21, 90, and 365. Estrous cycling status was evaluated in PND90 and PND365 rats. Transcriptomic analyses of estrus stage uteri were conducted on PND365 rats.

Results

Based on the analysis of the combined effects of all testing outcomes (including immunohistological, morphological, and estrous cycle data) in a semiblinded fashion, using statistical models, 25μg/kg BW/d BPA [BPA(25)], or 250μg/kg BW/d BPA [BPA(250)] exerted effects similar to that of EE2 at 0.5μg/kg BW/d in 1-y-old rats. Transcriptome analyses of estrus stage uteri revealed a set of 710 genes shared only between the BPA(25) and BPA(250) groups, with 115 of them predicted to be regulated by estradiol and 57 associated with female cancers. An interesting finding is that the expression of 476 human orthologous genes in this rat BPA signature robustly predicted the overall survival (p=1.68×10-5, hazard ratio=2.62) of endometrial cancer patients.

Discussion

Lifelong exposure of rats to low-dose BPA at 25 and 250μg/kg BW/d altered the estrous cycle and uterine pathology with similarity to EE2. The exposure also disrupted a unique low-dose BPA-gene signature with predictive value for survival outcomes in patients with endometrial cancer. https://doi.org/10.1289/EHP6875.",2020-12-09 +32989724,Measuring attention and vigilance in the laboratory vs. online: The split-half reliability of the ANTI-Vea.,"Over the past few years, there has been growing interest in using online methods for collecting data from large samples. However, only a few studies have administered online behavioral tasks to assess attention outside the lab. In the present study, we assessed the classic attentional functions and two vigilance components using two versions of the Attentional Networks Test for Interactions and Vigilance-executive and arousal vigilance components (ANTI-Vea): (1) a standard version, performed under typical experimental conditions (n = 314), and (2) an online version, completed outside the lab (n = 303). Both versions were equally effective in assessing (1) the main effects and interactions of phasic alertness, orienting, and executive control, and (2) the executive (i.e., a decline in the ability to detect infrequent critical signals) and the arousal (i.e., a progressive slowness and variability in responses to stimuli from the environment) vigilance decrement across time on task. Responses were generally slower in the online than in the standard version. Importantly, the split-half reliability observed for both tasks was (1) higher for executive control (~.67) than for phasic alertness and orienting (< .40), as observed in previous versions of the task, and (2) between .71 and .99 for the executive and arousal vigilance measures. We expect the present study will be of interest to researchers aiming to assess attentional functions with a valid and reliable method that, importantly, is publicly available on an open website ( https://www.ugr.es/~neurocog/ANTI/ ) and is easy to use in applied contexts.",2020-09-28 +32868298,Tumor-Resident Stromal Cells Promote Breast Cancer Invasion through Regulation of the Basal Phenotype.,"Collective invasion can be led by breast cancer cells expressing basal epithelial markers, typified by keratin-14 (KRT14). We analyzed gene expression data from The Cancer Genome Atlas and demonstrated a significant correlation between a KRT14+ invasion signature and a stromal-mediated extracellular matrix (ECM) organization module. We then developed a novel coculture model of tumor organoids with autologous stromal cells. Coculture significantly increased KRT14 expression and invasion of organoids from both luminal and basal murine breast cancer models. However, stromal cell conditioned medium induced invasion but not KRT14 expression. Cancer cells released TGFβ and that signaling pathway was required for stromal cell-induced invasion and KRT14 expression. Mechanistically, TGFβ induced NOX4 expression in stromal cells and NOX4 inhibition reduced invasion and KRT14 expression. In summary, we developed a novel coculture model and revealed dynamic molecular interactions between stromal cells and cancer cells that regulate both basal gene expression and invasive behavior. IMPLICATIONS: Fibroblasts within mammary tumors can regulate the molecular phenotype and invasive behavior of breast cancer cells. VISUAL OVERVIEW: http://mcr.aacrjournals.org/content/molcanres/18/11/1615/F1.large.jpg.",2020-08-31 +28736776,EmojiNet: Building a Machine Readable Sense Inventory for Emoji.,"Emoji are a contemporary and extremely popular way to enhance electronic communication. Without rigid semantics attached to them, emoji symbols take on different meanings based on the context of a message. Thus, like the word sense disambiguation task in natural language processing, machines also need to disambiguate the meaning or 'sense' of an emoji. In a first step toward achieving this goal, this paper presents EmojiNet, the first machine readable sense inventory for emoji. EmojiNet is a resource enabling systems to link emoji with their context-specific meaning. It is automatically constructed by integrating multiple emoji resources with BabelNet, which is the most comprehensive multilingual sense inventory available to date. The paper discusses its construction, evaluates the automatic resource creation process, and presents a use case where EmojiNet disambiguates emoji usage in tweets. EmojiNet is available online for use at http://emojinet.knoesis.org.",2016-10-23 +27976751,Hepitopes: A live interactive database of HLA class I epitopes in hepatitis B virus.,"Increased clinical and scientific scrutiny is being applied to hepatitis B virus (HBV), with focus on the development of new therapeutic approaches, ultimately aiming for cure. Defining the optimum natural CD8+ T cell immune responses that arise in HBV, mediated by HLA class I epitope presentation, may help to inform novel immunotherapeutic strategies. Therefore, we have set out to develop a comprehensive database of these epitopes in HBV, coined 'Hepitopes'. This undertaking has its foundations in a systematic literature review to identify the sites and sequences of all published class I epitopes in HBV. We also collected information regarding the methods used to define each epitope, and any reported associations between an immune response to this epitope and disease outcome. The results of this search have been collated into a new open-access interactive database that is available at http://www.expmedndm.ox.ac.uk/hepitopes. Over time, we will continue to refine and update this resource, as well as inviting contributions from others in the field to support its development. This unique new database is an important foundation for ongoing investigations into the nature and impact of the CD8+ T cell response to HBV.",2016-11-15 +26791506,"MG-RAST, a Metagenomics Service for Analysis of Microbial Community Structure and Function.","Approaches in molecular biology, particularly those that deal with high-throughput sequencing of entire microbial communities (the field of metagenomics), are rapidly advancing our understanding of the composition and functional content of microbial communities involved in climate change, environmental pollution, human health, biotechnology, etc. Metagenomics provides researchers with the most complete picture of the taxonomic (i.e., what organisms are there) and functional (i.e., what are those organisms doing) composition of natively sampled microbial communities, making it possible to perform investigations that include organisms that were previously intractable to laboratory-controlled culturing; currently, these constitute the vast majority of all microbes on the planet. All organisms contained in environmental samples are sequenced in a culture-independent manner, most often with 16S ribosomal amplicon methods to investigate the taxonomic or whole-genome shotgun-based methods to investigate the functional content of sampled communities. Metagenomics allows researchers to characterize the community composition and functional content of microbial communities, but it cannot show which functional processes are active; however, near parallel developments in transcriptomics promise a dramatic increase in our knowledge in this area as well. Since 2008, MG-RAST (Meyer et al., BMC Bioinformatics 9:386, 2008) has served as a public resource for annotation and analysis of metagenomic sequence data, providing a repository that currently houses more than 150,000 data sets (containing 60+ tera-base-pairs) with more than 23,000 publically available. MG-RAST, or the metagenomics RAST (rapid annotation using subsystems technology) server makes it possible for users to upload raw metagenomic sequence data in (preferably) fastq or fasta format. Assessments of sequence quality, annotation with respect to multiple reference databases, are performed automatically with minimal input from the user (see Subheading 4 at the end of this chapter for more details). Post-annotation analysis and visualization are also possible, directly through the web interface, or with tools like matR (metagenomic analysis tools for R, covered later in this chapter) that utilize the MG-RAST API ( http://api.metagenomics.anl.gov/api.html ) to easily download data from any stage in the MG-RAST processing pipeline. Over the years, MG-RAST has undergone substantial revisions to keep pace with the dramatic growth in the number, size, and types of sequence data that accompany constantly evolving developments in metagenomics and related -omic sciences (e.g., metatranscriptomics).",2016-01-01 +23990418,Multiple alignment-free sequence comparison.,"

Motivation

Recently, a range of new statistics have become available for the alignment-free comparison of two sequences based on k-tuple word content. Here, we extend these statistics to the simultaneous comparison of more than two sequences. Our suite of statistics contains, first, C(*)1 and C(S)1, extensions of statistics for pairwise comparison of the joint k-tuple content of all the sequences, and second, C(*)2, C(S)2 and C(geo)2, averages of sums of pairwise comparison statistics. The two tasks we consider are, first, to identify sequences that are similar to a set of target sequences, and, second, to measure the similarity within a set of sequences.

Results

Our investigation uses both simulated data as well as cis-regulatory module data where the task is to identify cis-regulatory modules with similar transcription factor binding sites. We find that although for real data, all of our statistics show a similar performance, on simulated data the Shepp-type statistics are in some instances outperformed by star-type statistics. The multiple alignment-free statistics are more sensitive to contamination in the data than the pairwise average statistics.

Availability

Our implementation of the five statistics is available as R package named 'multiAlignFree' at be http://www-rcf.usc.edu/∼fsun/Programs/multiAlignFree/multiAlignFreemain.html.

Contact

reinert@stats.ox.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-08-29 +33746558,PiSCES: Pi(scine) stream community estimation system. ,"The Piscine Stream Community Estimation System (PiSCES) provides users with a hypothesized fish community for any stream reach in the conterminous United States using information obtained from Nature Serve, the US Geological Survey (USGS), StreamCat, and the Peterson Field Guide to Freshwater Fishes of North America for over 1000 native and non-native freshwater fish species. PiSCES can filter HUC8-based fish assemblages based on species-specific occurrence models; create a community abundance/biomass distribution by relating relative abundance to mean body weight of each species; and allow users to query its database to see ancillary characteristics of each species (e.g., habitat preferences and maximum size). Future efforts will aim to improve the accuracy of the species distribution database and refine/augment increase the occurrence models. The PiSCES tool is accessible at the EPA's Quantitative Environmental Domain (QED) website at https://qed.epacdx.net/pisces/.",2020-05-01 +32672702,[INVESTIGATION OF THEMUDS (PELOID) BY LIGHT MICROSCOPY FOR THE DEVELOPMENT OF THE IDENTIFICATION METHOD].,"This study purpose was to research the possibility of microscopic analysis using for therapeutic mud (peloids) identification. The samples were studied: products containing native mud for use as cosmetics; sulfide-silt mud of the BolshoyTambukanlake (Stavropol region) and the Saki lake (Crimea). The microscopic analysis of raw materials was carried out in accordance with OFS 1.5.3.0003.15. For identification of the algae the database of the website https://www.algaebase.org/content/ was used. The difference between microscopic features of different genesis muds deposits: sulfide-silt, sapropel and peat was determined. Sulfide-silt muds were characterized by the presence of a large number of mineral particles, including various shapes salt crystals, blue-black hydrotroilite particles, dark brown humus particles, and rare algae inclusions. Sapropel mud was characterized by the presence of a significant number of semi-decomposed plant residues fragments and fragments of plant tissues, algae, pollen of higher coastal plants, and a small number of mineral particles. Peat mud contained numerous fragments of half-decomposed plant residues (conductive and mechanical tissues), fragments of mosses, the absence of algae and mineral particles is noted. Thus, the microscopic method of analysis can be used to assess the authenticity and quality of therapeutic mud of various origins along with the macroscopic method. Further research is promising to clarify the microscopic characteristics of raw materials for the development of regulatory documentation for therapeutic mud intended for use in cosmetics.",2020-05-01 +31161210,eFORGE v2.0: updated analysis of cell type-specific signal in epigenomic data.,"

Summary

The Illumina Infinium EPIC BeadChip is a new high-throughput array for DNA methylation analysis, extending the earlier 450k array by over 400 000 new sites. Previously, a method named eFORGE was developed to provide insights into cell type-specific and cell-composition effects for 450k data. Here, we present a significantly updated and improved version of eFORGE that can analyze both EPIC and 450k array data. New features include analysis of chromatin states, transcription factor motifs and DNase I footprints, providing tools for epigenome-wide association study interpretation and epigenome editing.

Availability and implementation

eFORGE v2.0 is implemented as a web tool available from https://eforge.altiusinstitute.org and https://eforge-tf.altiusinstitute.org/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +32243271,Identification of key genes and pathways associated with topotecan treatment using multiple bioinformatics tools.,"

Background

The goal of this study is to determine critical genes and pathways associated with topotecan using publicly accessible bioinformatics tools.

Methods

Topotecan signatures were downloaded from the Library of Integrated Network-Based Cellular Signatures (LINCS) database (http://www.ilincs.org/ilincs/). Differentially expressed genes (DEGs) were defined as genes that appeared at least three times with p values <0.05 and a fold change of ≥50% (|log2FC| ≥ 0.58). Hub genes were identified by evaluating the following parameters using a protein-protein interaction network: node degrees, betweenness, and eigenfactor scores. Hub genes and the top-40 DEGs by |log2FC| were used to generate a Venn diagram, and key genes were identified. Functional and pathway enrichment analysis was performed using the Kyoto Encyclopedia of Genes and Genomes (KEGG) databases. Information on ovarian cancer patients derived from The Cancer Genome Atlas (TCGA) database was analyzed, and the effect of topotecan on the protein expression was examined by Western blotting.

Results

Eleven topotecan signatures were downloaded, and 65 upregulated and 87 downregulated DEGs were identified. Twenty-one hub genes were identified. We identified eight key genes as upregulated genes, including NFKBIA, IKBKB, GADD45A, CDKN1A, and HIST2H2BE, while EZH2, CDC20, and CDK7 were identified as downregulated genes, which play critical roles in the cell cycle and carcinogenesis in KEGG analysis. In the TCGA analysis, the CDKN1A+/EZH2- group had the longest median survival, while the CDKN1A-/EZH2+ group had the shortest median survival. Topotecan-treated murine ovarian (MOSEC), colorectal (CT26), and lung (LLC) cancer cell lines displayed upregulated CDKN1A encoding p21 and downregulated Ezh2.

Conclusion

Using publicly accessible bioinformatics tools, we evaluated key genes and pathways related to topotecan and examined the key genes using the TCGA database and in vitro studies.",2020-05-01 +31504174,A powerful and flexible weighted distance-based method incorporating interactions between DNA methylation and environmental factors on health outcomes.,"

Motivation

Deoxyribonucleic acid (DNA) methylation plays a crucial role in human health. Studies have demonstrated associations between DNA methylation and environmental factors with evidence also supporting the idea that DNA methylation may modify the risk of environmental factors on health outcomes. However, due to high dimensionality and low study power, current studies usually focus on finding differential methylation on health outcomes at CpG level or gene level combining multiple CpGs and/or finding environmental effects on health outcomes but ignoring their interactions on health outcomes. Here we introduce the idea of a pseudo-data matrix constructed with cross-product terms between CpGs and environmental factors that are able to capture their interactions. We then develop a powerful and flexible weighted distance-based method with the pseudo-data matrix where association strength was used as weights on CpGs, environmental factors and their interactions to up-weight signals and down-weight noises in distance calculations.

Results

We compared the power of this novel approach and several comparison methods in simulated datasets and the Mothers and Newborns birth cohort of the Columbia Center for Children's Environmental Health to determine whether prenatal polycyclic aromatic hydrocarbons interacts with DNA methylation in association with Attention Deficit Hyperactivity Disorder and Mental Development Index at age 3.

Availability and implementation

An R code for the proposed method Dw-M-E-int together with a tutorial and a sample dataset is available for downloading from http://www.columbia.edu/∼sw2206/softwares.htm.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +26322066,ReprOlive: a database with linked data for the olive tree (Olea europaea L.) reproductive transcriptome.,"Plant reproductive transcriptomes have been analyzed in different species due to the agronomical and biotechnological importance of plant reproduction. Here we presented an olive tree reproductive transcriptome database with samples from pollen and pistil at different developmental stages, and leaf and root as control vegetative tissues http://reprolive.eez.csic.es). It was developed from 2,077,309 raw reads to 1,549 Sanger sequences. Using a pre-defined workflow based on open-source tools, sequences were pre-processed, assembled, mapped, and annotated with expression data, descriptions, GO terms, InterPro signatures, EC numbers, KEGG pathways, ORFs, and SSRs. Tentative transcripts (TTs) were also annotated with the corresponding orthologs in Arabidopsis thaliana from TAIR and RefSeq databases to enable Linked Data integration. It results in a reproductive transcriptome comprising 72,846 contigs with average length of 686 bp, of which 63,965 (87.8%) included at least one functional annotation, and 55,356 (75.9%) had an ortholog. A minimum of 23,568 different TTs was identified and 5,835 of them contain a complete ORF. The representative reproductive transcriptome can be reduced to 28,972 TTs for further gene expression studies. Partial transcriptomes from pollen, pistil, and vegetative tissues as control were also constructed. ReprOlive provides free access and download capability to these results. Retrieval mechanisms for sequences and transcript annotations are provided. Graphical localization of annotated enzymes into KEGG pathways is also possible. Finally, ReprOlive has included a semantic conceptualisation by means of a Resource Description Framework (RDF) allowing a Linked Data search for extracting the most updated information related to enzymes, interactions, allergens, structures, and reactive oxygen species.",2015-08-11 +29590633,Disease Ontology: improving and unifying disease annotations across species. ,"Model organisms are vital to uncovering the mechanisms of human disease and developing new therapeutic tools. Researchers collecting and integrating relevant model organism and/or human data often apply disparate terminologies (vocabularies and ontologies), making comparisons and inferences difficult. A unified disease ontology is required that connects data annotated using diverse disease terminologies, and in which the terminology relationships are continuously maintained. The Mouse Genome Database (MGD, http://www.informatics.jax.org), Rat Genome Database (RGD, http://rgd.mcw.edu) and Disease Ontology (DO, http://www.disease-ontology.org) projects are collaborating to augment DO, aligning and incorporating disease terms used by MGD and RGD, and improving DO as a tool for unifying disease annotations across species. Coordinated assessment of MGD's and RGD's disease term annotations identified new terms that enhance DO's representation of human diseases. Expansion of DO term content and cross-references to clinical vocabularies (e.g. OMIM, ORDO, MeSH) has enriched the DO's domain coverage and utility for annotating many types of data generated from experimental and clinical investigations. The extension of anatomy-based DO classification structure of disease improves accessibility of terms and facilitates application of DO for computational research. A consistent representation of disease associations across data types from cellular to whole organism, generated from clinical and model organism studies, will promote the integration, mining and comparative analysis of these data. The coordinated enrichment of the DO and adoption of DO by MGD and RGD demonstrates DO's usability across human data, MGD, RGD and the rest of the model organism database community.",2018-03-12 +32221611,PolishEM: image enhancement in FIB-SEM.,"

Summary

We have developed a software tool to improve the image quality in focused ion beam-scanning electron microscopy (FIB-SEM) stacks: PolishEM. Based on a Gaussian blur model, it automatically estimates and compensates for the blur affecting each individual image. It also includes correction for artifacts commonly arising in FIB-SEM (e.g. curtaining). PolishEM has been optimized for an efficient processing of huge FIB-SEM stacks on standard computers.

Availability and implementation

PolishEM has been developed in C. GPL source code and binaries for Linux, OSX and Windows are available at http://www.cnb.csic.es/%7ejjfernandez/polishem.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +30843052,PhenoPro: a novel toolkit for assisting in the diagnosis of Mendelian disease.,"MOTIVATION:Whole-exome sequencing (WES) is now being used in clinical practice for the diagnosis of the causal genes of Mendelian diseases. In order to make the diagnosis, however, the clinical phenotypes [e.g. Human Phenotype Ontology (HPO) terms] of a patient are needed for prioritizing the variants called from the WES data of the patient. Computational tools are therefore needed to standardize and accelerate this process. RESULTS:Here, we introduce a tool named PhenoPro for prioritizing the causal gene of Mendelian disease given both the HPO terms assigned to and the variants called from the WES data of a patient. PhenoPro has been benchmarked using both simulated patients and 287 real diagnosed patients of Chinese ancestry, and shows significant improvements over five previous tools. Moreover, the addition of an internal variant data of Chinese ancestry and the variant data from the patients' parents can further improve PhenoPro's performance. To make PhenoPro a fully automated tool, we also include a natural language processing component for automated HPO term assignment from clinical reports, and demonstrate that the natural language processing is as effective as manual HPO assignment using real clinical reports. In conclusion, PhenoPro can be used as a pre-screening tool to assist in the diagnosis of Mendelian disease genes. AVAILABILITY AND IMPLEMENTATION:The web server of PhenoPro is freely available at http://app.tianlab.cn. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-10-01 +30837538,Widely targeted metabolome and transcriptome landscapes of Allium fistulosum-A. cepa chromosome addition lines revealed a flavonoid hot spot on chromosome 5A.,"Here, we report a comprehensive analysis of the widely targeted metabolome and transcriptome profiles of Allium fistulosum L. (FF) with the single extra chromosome of shallot [A. cepa L. Aggregatum group (AA)] to clarify the novel gene functions in flavonoid biosynthesis. An exhaustive metabolome analysis was performed using the selected reaction monitoring mode of liquid chromatography-tandem quadrupole mass spectrometry, revealing a specific accumulation of quercetin, anthocyanin and flavone glucosides in AA and FF5A. The addition of chromosome 5A from the shallot to A. fistulosum induced flavonoid accumulation in the recipient species, which was associated with the upregulation of several genes including the dihydroflavonol 4-reductase, chalcone synthase, flavanone 3-hydroxylase, UDP-glucose flavonoid-3-O-glucosyltransferase, anthocyanin 5-aromatic acyltransferase-like, pleiotropic drug resistance-like ATP binding cassette transporter, and MYB14 transcriptional factor. Additionally, an open access Allium Transcript Database (Allium TDB, http://alliumtdb.kazusa.or.jp ) was generated by using RNA-Seq data from different genetic stocks including the A. fistulosum-A. cepa monosomic addition lines. The functional genomic approach presented here provides an innovative means of targeting the gene responsible for flavonoid biosynthesis in A. cepa. The understanding of flavonoid compounds and biosynthesis-related genes would facilitate the development of noble Allium varieties with unique chemical constituents and, subsequently, improved plant stress tolerance and human health benefits.",2019-03-05 +32360523,HIV-1 drug resistance mutations detection and HIV-1 subtype G report by using next-generation sequencing platform.,"

Background

Based on world health organization (WHO) recommend, drug resistance assay should be performed in initial of treatment and after treatment for administering and monitoring of anti-retroviral regime in HIV-1 infected patients.

Material and method

NGS analyses were performed on forty-one plasma samples from HIV-1 affected patients using the Sentosa SQ HIV genotyping assay (Vela-Diagnostics, Germany). This system comprises a semi-automated Ion torrent based platform and the sequencing results were analyzed based on ANRS, REGA and Stanford drug resistance algorithms. Phylogenetic analysis was analyzed based on https://comet.lih.lu database as well as MEGA5 Software.

Results

Drug resistances were identified in thirty-three samples (80%) out of forty-one samples. The Phylogenetic analysis results showed that CRF-35AD (94%) and subtypes B (2.4%) and G (2.4%) were dominant subtypes in this study. NRTI and NNRTI associated dominant mutations were M184I/V and K103 N.High-level resistance to lamivudine (3 TC) and Emtricitabine (FTC) were detected in 34.3% of patients while 53.1% were resistant to Efavirenz (EFV) and Nevirapine (NVP). The Protease inhibitor (PI) minor and major mutations were not reported but more than 95% of samples had polymorphisms mutation in K20R, M36I, H69K, L89 M positions. These mutations are subtype dependent and completely are absent in subtype B virus. The secondary mutations were reported in positions of E157Q, S230 N, and T97A of integrase gene and four samples represent low-level resistance to integrase strand transfer inhibitor (INSTI).

Conclusions

This is the first preliminary evaluation of HIV-1 drug resistance mutation (DRM) by using the Sentosa SQ HIV Genotyping Assay in Iran. The NGS represent a promising tool for the accurate detection of DRMs of CRF-35AD that is dominant subtype in Iranian HIV-1 infected population and for the first time revealed HIV-1 subtype G in Iranian population. In the present study polymorphic mutation in the position of K20R, M36I, H69K, L89 M were properly reported in CRF35AD that is dominant in Iranian HIV patients.",2020-04-30 +31814546,Recent Patents and Discovery of Anti-inflammatory Agents from Marine Source.,"BACKGROUND:Inflammation has become pathology in the majority of the prevalent diseases such as diabetes, atherosclerosis, epilepsy and neurodegenerative disorders. Anti-inflammatory drugs work wonder in all these conditions, where the patient has become refractory to standard treatment. However, available anti-inflammatory agents have side effects associated with chronic use, thus if we could develop safe and efficacious molecules, quality of health care provided will improve. Since plant sources have been extensively explored, the focus needs to be shifted on the alternative natural sources of anti-inflammatory agents. Water bodies especially the sea and ocean are under investigation to find agents which can tackle inflammation. OBJECTIVE:This article reviews anti-inflammatory agents obtained from five types of marine organisms namely microalgae, sea cucumber, mussels, sponges and corals. METHODS:A literature search was conducted using PubMed/Science Direct with keywords marine organisms, inflammation, marine sponges, sea cucumber, mussels, corals and microalgae. Patents were searched using the key terms inflammation, marine agents from www.google.com/patents, www.uspto.gov, http://espacenet.com, www.freepatentsonline.com, www.wipo.int/pctdb/en/searchsimp. jsp and www.freshpatents.com. RESULTS:Literature and current patents have revealed applications of anti-inflammatory agents from marine organisms in pharmaceuticals and cosmeceuticals. These agents are used to treat inflammatory disorders ranging from minor allergy to chronic conditions like rheumatoid arthritis. Marine waste is also a valuable resource for nutraceuticals and anti-inflammatory agents. CONCLUSION:The findings reveal that marine organisms could be a promising source of novel antiinflammatory agents. However, further investigations are suggested for the isolation and identification of bioactive, exploring the mechanism of action and evaluating the efficacy in various inflammatory conditions.",2019-01-01 +32186709,Position-wise binding preference is important for miRNA target site prediction.,"

Motivation

It is a fundamental task to identify microRNAs (miRNAs) targets and accurately locate their target sites. Genome-scale experiments for miRNA target site detection are still costly. The prediction accuracies of existing computational algorithms and tools are often not up to the expectation due to a large number of false positives. One major obstacle to achieve a higher accuracy is the lack of knowledge of the target binding features of miRNAs. The published high-throughput experimental data provide an opportunity to analyze position-wise preference of miRNAs in terms of target binding, which can be an important feature in miRNA target prediction algorithms.

Results

We developed a Markov model to characterize position-wise pairing patterns of miRNA-target interactions. We further integrated this model as a scoring method and developed a dynamic programming (DP) algorithm, MDPS (Markov model-scored Dynamic Programming algorithm for miRNA target site Selection) that can screen putative target sites of miRNA-target binding. The MDPS algorithm thus can take into account both the dependency of neighboring pairing positions and the global pairing information. Based on the trained Markov models from both miRNA-specific and general datasets, we discovered that the position-wise binding information specific to a given miRNA would benefit its target prediction. We also found that miRNAs maintain region-wise similarity in their target binding patterns. Combining MDPS with existing methods significantly improves their precision while only slightly reduces their recall. Therefore, position-wise pairing patterns have the promise to improve target prediction if incorporated into existing software tools.

Availability and implementation

The source code and tool to calculate MDPS score is available at http://hulab.ucf.edu/research/projects/MDPS/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +32360928,Novel Risk Calculator for Suboccipital Decompression for Adult Chiari Malformation.,"BACKGROUND:Patient counseling and selection for surgical therapy in adult Chiari malformation type I (CM-1) remain debatable. We aimed to develop a clinical calculator predicting the risk of nonhome discharge and reoperation using the American College of Surgeons-National Surgical Quality Improvement Program database. METHODS:The database from years 2011 through 2017 was queried to identify the subset of CM-1 patients undergoing suboccipital decompression. Univariable analysis was conducted to identify baseline factors associated with nonhome discharge and 30-day reoperation following the initial decompression procedure. Logistic regression and the Akaike Information Criterion were used to identify the optimal models predictive of both outcomes. Performance was assessed using receiver operating curves and validated with bootstrapping. RESULTS:In 706 CM-1 patients, the rate of nonhome discharge was 5.2% and the reoperation rate was 6.6% with most reoperations consisting of cerebrospinal fluid flow diversion and cerebrospinal fluid leak repair. The optimal model predictive of nonhome discharge consisted of age (odds ratio [OR] = 1.05, P = 0.001), diabetes (OR = 2.44, P = 0.080), and American Society of Anesthesiologists class (OR = 1.94, P = 0.082) with an area under the curve of 0.720. The optimal model predictive of reoperation consisted of female sex (OR = 0.48, P = 0.031), body mass index (OR = 1.05, P = 0.002), and ASA class (OR = 3.44, P = 0.001) with an area under the curve of 0.726. A calculator for both outcomes was deployed under the following URL: https://jhuspine3.shinyapps.io/Discharge_Reop_Calculator/. CONCLUSIONS:We have used a large international database to develop a simple risk calculator based on readily available preoperative variables. Following subsequent validation, this tool can help optimize patient counseling and decision making in adult CM-1.",2020-04-29 +32051640,[The Pacemaker and Implantable Cardioverter-Defibrillator Registry of the Italian Association of Arrhythmology and Cardiac Pacing - Annual report 2018].,"BACKGROUND:The pacemaker (PM) and implantable cardioverter-defibrillator (ICD) Registry of the Italian Association of Arrhythmology and Cardiac Pacing (AIAC) monitors the main epidemiological data in real-world practice. The survey for the 2018 activity collects information about demographics, clinical characteristics, main indications for PM/ICD therapy and device types from the Italian collaborating centers. METHODS:The Registry collects prospectively national PM and ICD implantation activity on the basis of European cards. RESULTS:PM Registry: data about 23 912 PM implantations were collected (20 084 first implants and 3828 replacements). The number of collaborating centers was 180. Median age of treated patients was 81 years (75 quartile I; 86 quartile III). ECG indications included atrioventricular conduction disorders in 34.5% of first PM implants, sick sinus syndrome in 18.3%, atrial fibrillation plus bradycardia in 13.0%, other in 34.2%. Among atrioventricular conduction defects, third-degree atrioventricular block was the most common type (19.2% of first implants). Use of single-chamber PMs was reported in 24.9% of first implants, of dual-chamber PMs in 67.6%, of PMs with cardiac resynchronization therapy (CRT) in 1.6%, and of single lead atrial-synchronized ventricular stimulation (VDD/R PMs) in 5.9%. ICD Registry: data about 18 353 ICD implantations were collected (13 944 first implants and 4359 replacements). The number of collaborating centers was 433. Median age of treated patients was 71 years (63 quartile I; 78 quartile III). Primary prevention indication was reported in 84.3% of first implants, secondary prevention in 15.7% (cardiac arrest in 5.3%). A single-chamber ICD was used in 27.9% of first implants, dual-chamber ICD in 31.9% and biventricular ICD in 40.2%. CONCLUSIONS:The PM and ICD Registry appears fundamental for monitoring PM and ICD utilization on a large national scale with rigorous examination of demographics and clinical indications. The PM Registry showed stable electrocardiographic and symptom indications, with an important prevalence of dual-chamber pacing. The use of CRT-PM regards a very limited number of patients. The ICD Registry documented a large use of prophylactic and biventricular ICD, reflecting a favorable adherence to trials and guidelines in clinical practice. In order to increase and optimize the cooperation of Italian implanting centers, online data entry (http://www.aiac.it/riprid) should be adopted at large scale.",2020-02-01 +32345360,CircAtlas: an integrated resource of one million highly accurate circular RNAs from 1070 vertebrate transcriptomes.,"Existing circular RNA (circRNA) databases have become essential for transcriptomics. However, most are unsuitable for mining in-depth information for candidate circRNA prioritization. To address this, we integrate circular transcript collections to develop the circAtlas database based on 1070 RNA-seq samples collected from 19 normal tissues across six vertebrate species. This database contains 1,007,087 highly reliable circRNAs, of which over 81.3% have been assembled into full-length sequences. We profile their expression pattern, conservation, and functional annotation. We describe a novel multiple conservation score, co-expression, and regulatory networks for circRNA annotation and prioritization. CircAtlas can be accessed at http://circatlas.biols.ac.cn/.",2020-04-28 +31856831,GTX.Digest.VCF: an online NGS data interpretation system based on intelligent gene ranking and large-scale text mining.,"

Background

An important task in the interpretation of sequencing data is to highlight pathogenic genes (or detrimental variants) in the field of Mendelian diseases. It is still challenging despite the recent rapid development of genomics and bioinformatics. A typical interpretation workflow includes annotation, filtration, manual inspection and literature review. Those steps are time-consuming and error-prone in the absence of systematic support. Therefore, we developed GTX.Digest.VCF, an online DNA sequencing interpretation system, which prioritizes genes and variants for novel disease-gene relation discovery and integrates text mining results to provide literature evidence for the discovery. Its phenotype-driven ranking and biological data mining approach significantly speed up the whole interpretation process.

Results

The GTX.Digest.VCF system is freely available as a web portal at http://vcf.gtxlab.com for academic research. Evaluation on the DDD project dataset demonstrates an accuracy of 77% (235 out of 305 cases) for top-50 genes and an accuracy of 41.6% (127 out of 305 cases) for top-5 genes.

Conclusions

GTX.Digest.VCF provides an intelligent web portal for genomics data interpretation via the integration of bioinformatics tools, distributed parallel computing, biomedical text mining. It can facilitate the application of genomic analytics in clinical research and practices.",2019-12-20 +24906803,"hsphase: an R package for pedigree reconstruction, detection of recombination events, phasing and imputation of half-sib family groups.","

Background

Identification of recombination events and which chromosomal segments contributed to an individual is useful for a number of applications in genomic analyses including haplotyping, imputation, signatures of selection, and improved estimates of relationship and probability of identity by descent. Genotypic data on half-sib family groups are widely available in livestock genomics. This structure makes it possible to identify recombination events accurately even with only a few individuals and it lends itself well to a range of applications such as parentage assignment and pedigree verification.

Results

Here we present hsphase, an R package that exploits the genetic structure found in half-sib livestock data to identify and count recombination events, impute and phase un-genotyped sires and phase its offspring. The package also allows reconstruction of family groups (pedigree inference), identification of pedigree errors and parentage assignment. Additional functions in the package allow identification of genomic mapping errors, imputation of paternal high density genotypes from low density genotypes, evaluation of phasing results either from hsphase or from other phasing programs. Various diagnostic plotting functions permit rapid visual inspection of results and evaluation of datasets.

Conclusion

The hsphase package provides a suite of functions for analysis and visualization of genomic structures in half-sib family groups implemented in the widely used R programming environment. Low level functions were implemented in C++ and parallelized to improve performance. hsphase was primarily designed for use with high density SNP array data but it is fast enough to run directly on sequence data once they become more widely available. The package is available (GPL 3) from the Comprehensive R Archive Network (CRAN) or from http://www-personal.une.edu.au/~cgondro2/hsphase.htm.",2014-06-07 +29106550,The OMA orthology database in 2018: retrieving evolutionary relationships among all domains of life through richer web and programmatic interfaces.,"The Orthologous Matrix (OMA) is a leading resource to relate genes across many species from all of life. In this update paper, we review the recent algorithmic improvements in the OMA pipeline, describe increases in species coverage (particularly in plants and early-branching eukaryotes) and introduce several new features in the OMA web browser. Notable improvements include: (i) a scalable, interactive viewer for hierarchical orthologous groups; (ii) protein domain annotations and domain-based links between orthologous groups; (iii) functionality to retrieve phylogenetic marker genes for a subset of species of interest; (iv) a new synteny dot plot viewer; and (v) an overhaul of the programmatic access (REST API and semantic web), which will facilitate incorporation of OMA analyses in computational pipelines and integration with other bioinformatic resources. OMA can be freely accessed at https://omabrowser.org.",2018-01-01 +31048982,A database of threat statuses and life-history traits of Red List species in Flanders (northern Belgium).,"

Background

Red Lists estimate the extinction risk of species at global or regional levels and are important instruments in conservation policies. Global Red List assessments are readily available via the IUCN website (https://www.iucnredlist.org) and are regularly updated by (taxonomic) experts. Regional Red Lists, however, are not always easy to find and often use local criteria to assess the local extinction risk of species.

New information

Here, we publish a database with the outcome of 38 Red List assessments in Flanders (northern Belgium) between 1994 and 2018. In total, the database contains 6,224 records of 5,039 unique taxa pertaining to 24 different taxonomic groups. Using a quality control procedure, we evaluated the criteria used, the number of records, the temporal and spatial distribution of the data and the up-to-dateness of the Red Lists. This way, nineteen Red Lists were approved as being of sufficient high quality (i.e. validated) and nineteen others were not. Once validated, Red Lists are approved by the regional Minister of Environment and published in the Belgian Official Gazette acquiring legal status. For the validated Red Lists, we additionally compiled (life-history) traits that are applicable to a wide variety of species groups (taxonomic kingdom, environment, biotope, nutrient level, dispersal capacity, lifespan and cuddliness). The publication of this dataset allows comparison of Red List statuses with other European regions and countries and permits analyses about how certain (life-history) traits can explain the Red List status of species. The dataset will be regularly updated by adding new Red List (re)assessments and/or additional (life-history) traits.",2019-04-05 +33104339,PubChemQC PM6: Data Sets of 221 Million Molecules with Optimized Molecular Geometries and Electronic Properties.,"We report on optimized molecular geometries and electronic properties calculated by the PM6 method for 94.0% of the 91.6 million molecules cataloged in PubChem Compounds retrieved on August 29, 2016. In addition to neutral states, we also calculated those for cationic, anionic, and spin flipped electronic states of 56.2%, 49.7%, and 41.3% of the molecules, respectively. Thus, the grand total of the PM6 calculations amounted to 221 million. We compared the resulting molecular geometries with B3LYP/6-31G* optimized geometries for 2.6 million molecules. The root-mean-square deviations in bond length and bond angle were approximately 0.016 Å and 1.7°, respectively. Then, using linear regression to examine the HOMO energy levels E(HOMO) in the B3LYP and PM6 calculations, we found that EB3LYP(HOMO) = 0.876EPM6(HOMO) + 1.975 (eV) and calculated the coefficient of determination to be 0.803. Likewise, we examined the LUMO energy levels and found EB3LYP(LUMO) = 1.069EPM6(LUMO) - 0.420 (eV); the coefficient of determination was 0.842. We also generated four subdata sets, each of which was composed of molecules with molecular weights less than 500. Subdata set i contained C, H, O and N, ii contained C, H, N, O, P, and S, iii contained C, H, N, O, P, S, F, and Cl, and iv contained C, H, N, O, P, S, F, Cl, Na, K, Mg, and Ca. The data sets are available at http://pubchemqc.riken.jp/pm6_datasets.html under a Creative Commons Attribution 4.0 International license.",2020-10-26 +30118150,Mucopolysaccharidosis type VI (MPS VI) and molecular analysis: Review and classification of published variants in the ARSB gene.,"Maroteaux-Lamy syndrome (MPS VI) is an autosomal recessive lysosomal storage disorder caused by pathogenic ARSB gene variants, commonly diagnosed through clinical findings and deficiency of the arylsulfatase B (ASB) enzyme. Detection of ARSB pathogenic variants can independently confirm diagnosis and render genetic counseling possible. In this review, we collect and summarize 908 alleles (201 distinct variants, including 3 polymorphisms previously considered as disease-causing variants) from 478 individuals diagnosed with MPS VI, identified from literature and public databases. Each variant is further analyzed for clinical classification according to American College of Medical Genetics and Genomics (ACMG) guidelines. Results highlight the heterogeneity of ARSB alleles, with most unique variants (59.5%) identified as missense and 31.7% of unique alleles appearing once. Only 18% of distinct variants were previously recorded in public databases with supporting evidence and clinical significance. ACMG recommends publishing clinical and biochemical data that accurately characterize pathogenicity of new variants in association with reporting specific alleles. Variants analyzed were sent to ClinVar (https://www.ncbi.nlm.nih.gov/clinvar/), and MPS VI locus-specific database (http://mps6-database.org) where they will be available. High clinical suspicion coupled with diagnostic testing for deficient ASB activity and timely submission and classification of ARSB variants with biochemical and clinical data in public databases is essential for timely diagnosis of MPS VI.",2018-09-17 +32388191,Using narratives in differential diagnosis of neurodegenerative syndromes.,"

Purpose

Language decline has been associated with healthy aging and with various neurodegenerative conditions, making it challenging to differentiate among these conditions. This study examined the utility of linguistic measures derived from a short narrative language sample for 1) identifying language characteristics and cut-off scores to differentiate between healthy aging, Primary Progressive Aphasia (PPA), Mild Cognitive Impairment (MCI), and Alzheimer's dementia (AD); and 2) differentiating among PPA variants in which language is the primary impairment.

Method

Participants were 25 neurologically healthy English speakers, 20 individuals with MCI, 20 with AD, and 26 with PPA (non-fluent/agrammatic N = 10, logopenic N = 9, semantic N = 7). Narrative language samples of the Cookie Theft Picture of persons with healthy aging, MCI and AD were retrospectively obtained from the DementiaBank database (https://talkbank.org/DementiaBank/) and PPA samples were obtained from an ongoing research study. The language samples were analyzed for fluency, word retrieval success, grammatical accuracy, and errors using automated and manual analysis methods. The sensitivity and specificity of various language measures was computed.

Results

Participants with PPA scored lower than neurologically healthy and MCI groups on fluency (words per minute and disfluencies), word retrieval (Correct Information Units and number of errors), and sentence grammaticality. PPA and AD groups did not differ on language measures. Agrammatic PPA participants scored lower than logopenic and semantic PPA groups on several measures, while logopenic and semantic PPA did not differ on any measures.

Conclusion

Measures derived from brief language samples and analyzed using mostly automated methods are clinically useful in differentiating PPA from healthy aging and MCI, and agrammatic PPA from other variants. The sensitivity and specificity of these measures is modest and can be improved when coupled with clinical presentation.",2020-04-27 +32352516,LIST-S2: taxonomy based sorting of deleterious missense mutations across species.,"The separation of deleterious from benign mutations remains a key challenge in the interpretation of genomic data. Computational methods used to sort mutations based on their potential deleteriousness rely largely on conservation measures derived from sequence alignments. Here, we introduce LIST-S2, a successor to our previously developed approach LIST, which aims to exploit local sequence identity and taxonomy distances in quantifying the conservation of human protein sequences. Unlike its predecessor, LIST-S2 is not limited to human sequences but can assess conservation and make predictions for sequences from any organism. Moreover, we provide a web-tool and downloadable software to compute and visualize the deleteriousness of mutations in user-provided sequences. This web-tool contains an HTML interface and a RESTful API to submit and manage sequences as well as a browsable set of precomputed predictions for a large number of UniProtKB protein sequences of common taxa. LIST-S2 is available at: https://list-s2.msl.ubc.ca/.",2020-07-01 +33591827,First report of Tobacco ringspot virus in highbush blueberry in Washington State. ,"Since 2015, several blueberry plants (Vaccinium corymbosum) of cvs. Draper and Top Shelf in an organic farm in eastern Washington State showed reduced growth with deformed leaves displaying chlorotic spots, rings, and red blotches and producing small and poorly ripened berries. The symptomatic plants showed gradual decline within 2 to 3 years post-planting. In ELISA using antibodies (Agdia, Inc., USA) to Blueberry leaf mottle virus, Cherry leaf roll virus, Peach rosette mosaic virus, Strawberry latent ringspot virus, Tomato black ring virus, Tomato ringspot virus, and Tobacco ringspot virus [TRSV]), leaf samples from six symptomatic plants tested positive only to TRSV (Secoviridae: Nepovirus). Subsequently, total RNA was isolated from leaves of a symptomatic plant using the Spectrum™ Plant Total RNA Kit (Sigma-Aldrich, USA). High quality RNA was subjected to high-throughput sequencing (HTS) on the Illumina© NovaSeq™ platform (Huntsman Cancer Institute, UT, USA). An average of ~28 million 150-base pair (bp) paired-end reads obtained were subjected to quality filtering followed by de novo assembly using CLC Genomics Workbench (v12.0) and BLASTn analysis (http://www.ncbi.nlm.nih.gov/blast). Two contigs of 2,778 bp (average coverage: 11,031.7) and 3,589 bp (average coverage: 11,882) showed, respectively, a maximum of 97.3 and 97.6% nucleotide (nt) identity with TRSV RNA1 of a South Korean isolate (KJ556849). Another contig of 3,615 bp (average coverage: 7072.1) showed a maximum of 92.8% nt identity with TRSV RNA2 of an isolate from Iowa (MT563079). The HTS data revealed no other viral sequences reported from blueberry plants (Martin and Tzanetakis 2018). To further confirm the presence of TRSV, extracts of leaf samples from seven symptomatic and ten asymptomatic plants collected randomly from cvs. Draper and Top Shelf were tested by RT-PCR using primers specific to a region of the helicase gene of TRSV RNA1 (Forward: GACTACTGAGCAACATTGCAACTTCC, Reverse: GTCCCCTAACAGCATTGACTACC) and the coat protein gene of TRSV RNA2 (Forward: GCTGATTGGCAGTGTATTGTTAC, Reverse: GTGTTCGCATCTGGTTTCAAATTGG). An approximately 360 bp fragment specific to RNA1 and ~640 bp fragment specific to RNA2 were amplified only from symptomatic samples. Sanger sequence analysis of amplicons specific to RNA1 and RNA2 showed 98.1% and 96.8% nt identity with corresponding sequences of TRSV isolates from South Korea (KJ556849) and Iowa (MT563079), respectively. These results confirmed the presence of TRSV in symptomatic blueberry plants. The complete sequence of RNA1 (7,512 nt, MW495243) and RNA2 (3,925 nt, MW495244) genome segments of the blueberry isolate determined in this study showed 95.9 and 93.2% nt sequence identity, respectively, with corresponding TRSV sequences from South Korea (KJ556849) and Iowa (MT563079). Based on previous reports (Converse and Ramsdell 1982, Martin et al. 2012, Martin and Tzanetakis, 2018), this study represents the first report of TRSV infecting highbush blueberry in Washington State. Since the State has emerged as the national leader in blueberry production, the results will strengthen plant health certification standards to provide virus-tested propagative materials for domestic growers and export to the European Union.",2021-02-16 +32282909,"TREND: a platform for exploring protein function in prokaryotes based on phylogenetic, domain architecture and gene neighborhood analyses.","Key steps in a computational study of protein function involve analysis of (i) relationships between homologous proteins, (ii) protein domain architecture and (iii) gene neighborhoods the corresponding proteins are encoded in. Each of these steps requires a separate computational task and sets of tools. Currently in order to relate protein features and gene neighborhoods information to phylogeny, researchers need to prepare all the necessary data and combine them by hand, which is time-consuming and error-prone. Here, we present a new platform, TREND (tree-based exploration of neighborhoods and domains), which can perform all the necessary steps in automated fashion and put the derived information into phylogenomic context, thus making evolutionary based protein function analysis more efficient. A rich set of adjustable components allows a user to run the computational steps specific to his task. TREND is freely available at http://trend.zhulinlab.org.",2020-07-01 +,4CPS-014 Follow-up to recommendations about renal function monitoring in elderly patients treated with sodium-glucose co-transporter 2 inhibitors,"

Background

The efficacy of sodium-glucose co-transporter 2 inhibitors (iSGLT) decrease with decreasing glomerular filtration rate. The summary of products’ characteristics recommends restriction in the use of iSGLT to patients with creatinine clearance (CrCl) >60 ml/min/1.73 m2 and treatment should be suspended if CrCl <45 ml/min/1,73 m2.

Purpose

To describe the adherence to guidelines’ recommendations about renal function monitoring in patients aged over 75 years treated with iSGLT.

Material and methods

Transversal, descriptive study in patients aged over 75 years from six primary healthcare centres of the same referral hospital, under treatment with iSGLT-2 as of 30 September 2017. Data were obtained from electronic health records of primary care and referral hospitals.

Results

Fifty-nine patients were included: 55.17% male, mean age 79 (SD 2.6) years and mean CrCl (CKD-EPI) 66.1±13.3 mL/min/1.73 m2. Seventeen patients were lost to follow-up, 12 finished treatment before 30 September 30th 2017 and five due to lack of analytical data. All patients were diagnosed with type-2 diabetes mellitus, 59.32% obese (IMC >30). At the beginning of treatment 62.7% IC 95% (49.1%–75%) had CrCl >60 mL/min/1.73 m2. 89.5%, 95% CI: 79.2% to 96. 2% patients followed renal function monitoring recommendations. Six patients had not correct monitoring, three patients did not have any follow-up and 3 patients had ClCr <45 mL/min/1.73 m2 and continued treatment.

Conclusion

Patients treated with iSGLT-2 have a good control of renal function Most of them followed renal function monitoring recommendations. There were patients whose renal function did not align to the recommendations at the beginning of treatment.

References and/or Acknowledgements

1. Summary of products. http://www.ema.europa.eu/ema/ 2. Update of hyperglycaemia algorithm. http://www.redgdps.org/redgdpsresponde/madrid.php No conflict of interest",2018-01-01 +29740723,International chemical identifier for reactions (RInChI).,"The Reaction InChI (RInChI) extends the idea of the InChI, which provides a unique descriptor of molecular structures, towards reactions. Prototype versions of the RInChI have been available since 2011. The first official release (RInChI-V1.00), funded by the InChI Trust, is now available for download ( http://www.inchi-trust.org/downloads/ ). This release defines the format and generates hashed representations (RInChIKeys) suitable for database and web operations. The RInChI provides a concise description of the key data in chemical processes, and facilitates the manipulation and analysis of reaction data.",2018-05-09 +26199991,Ulcerative Colitis Database: An Integrated Database and Toolkit for Gene Function and Medication Involved in Ulcerative Colitis.,"

Background

Over the last decade, a massive amount of well-annotated genomic data has been accumulated on the pathogenesis and therapies for ulcerative colitis (UC). However, a comprehensive repository is not available yet.

Methods

Ulcerative Colitis Database (UCDB) was constructed using text mining followed by manually curating on the literature to collect the reliable information of UC-related genes, drugs, and susceptibility loci. UC DNA microarray data were collected. R packages were used to implement gene expression analysis toolkit.

Results

UCDB includes 4 separate but closely related components: ""UC GENE,"" ""UC DRUG,"" ""UC LOCUS,"" and ""UC ANALYSIS."" The UC GENE contains comprehensive information for 1151 UC-related genes manually curated from 2919 publications. The UC DRUG includes information for 248 drugs manually curated from 2344 publications. ""UC LOCUS"" includes 110 UC susceptibility SNP loci, which were collected from 12 Genome-Wide Association Studies. A comprehensive expression quantitative trait loci browser was also implemented. The UC ANALYSIS is an expression analysis toolkit for 37 UC expression array data sets, which contains 1098 samples. The toolkit can be used to do gene expression correlation, clustering, differentially expressed, and Gene Set Enrichment Analysis (GSEA).

Conclusions

UCDB provides a comprehensive collection of well-curated UC-related genes and drugs, and straightforward interfaces for gene expression analyses. UCDB is a useful leading resource for both basic and clinical research and will benefit UC community worldwide. UCDB is freely accessible at http://seiwertlab.uchicago.edu/UCDB.",2015-08-01 +33037085,PCBP2 Posttranscriptional Modifications Induce Breast Cancer Progression via Upregulation of UFD1 and NT5E.,"It is commonly accepted that cellular protein levels are primarily determined by mRNA levels. However, discordance between protein and mRNA expression has been implicated in many pathologic conditions including oncogenesis. The mechanisms involved in this discordance are complicated and far from understood. In this study, it was observed that the expression levels of poly(C) binding protein 2 (PCBP2) mRNA and protein were diametric in breast normal and cancer cell lines, paraffin-embedded and fresh tissue specimens, consistent with data from The Cancer Genome Atlas and the Clinical Proteomic Tumor Analysis Consortium. Moreover, PCBP2 protein expression was significantly associated with disease progression and poor outcome in patients with breast cancer. Depletion of PCBP2 protein inhibited cell proliferation, colony formation, migration, invasion, and in vivo tumor growth and metastasis. Forced expression of PCBP2 exhibited the opposite effect. Mechanistically, it was demonstrated that PCBP2 3' untranslated region (3'UTR) was subject to alternative splicing and polyadenylation (APA) in breast cancer tissues and cell lines. Non-full-length 3'UTR PCBP2 transcripts yielded more protein than the full-length 3'UTR transcripts and enhanced the oncogenic and metastatic capacities of human breast cancer cells. Furthermore, UFD1 and NT5E were identified as genes downstream of PCBP2. PCBP2 promoted oncogenicity of breast cancer cells via upregulation of the expression of UFD1 and NT5E by direct binding to their 3'UTR-B portions. IMPLICATIONS: Findings demonstrate that APA of PCBP2 3'UTR contributes to its increased expression with subsequent promotion of breast cancer progression by regulating UFD1 and NT5E. VISUAL OVERVIEW: http://mcr.aacrjournals.org/content/molcanres/19/1/86/F1.large.jpg.",2020-10-09 +32344139,Development and Validation of Machine Learning Algorithms for Predicting Adverse Events After Surgery for Lumbar Degenerative Spondylolisthesis.,"

Background

Preoperative prognostication of adverse events (AEs) for patients undergoing surgery for lumbar degenerative spondylolisthesis (LDS) can improve risk stratification and help guide the surgical decision-making process. The aim of this study was to develop and validate a set of predictive variables for 30-day AEs after surgery for LDS.

Methods

The American College of Surgeons National Surgical Quality Improvement Program was used for this study (2005-2016). Logistic regression (enter, stepwise, and forward) and LASSO (least absolute shrinkage and selection operator) methods were performed to identify and select variables for analyses, which resulted in 26 potential models. The final model was selected based on clinical criteria and numeric results.

Results

The overall 30-day rate of AEs for 80,610 patients who underwent surgery for LDS in this database was 4.9% (n = 3965). The median age of the cohort was 58.0 years (range, 18-89 years). The model with the following 10-predictive factors (age, gender, American Society of Anesthesiologists grade, autogenous iliac bone graft, instrumented fusion, levels of surgery, surgical approach, functional status, preoperative serum albumin [g/dL] and serum alkaline phosphatase [IU/L]) performed well on the discrimination, calibration, Brier score, and decision analyses to develop machine learning algorithms. Logistic regression showed higher areas under the curve than did LASSO methods across the different models. The predictive probability derived from the best model is uploaded on an open-access Web application, which can be found at: https://spine.massgeneral.org/drupal/Lumbar-Degenerative-AdverseEvents.

Conclusions

It is feasible to develop machine learning algorithms from large datasets to provide useful tools for patient counseling and surgical risk assessment.",2020-04-25 +32423416,tappAS: a comprehensive computational framework for the analysis of the functional impact of differential splicing.,"Recent advances in long-read sequencing solve inaccuracies in alternative transcript identification of full-length transcripts in short-read RNA-Seq data, which encourages the development of methods for isoform-centered functional analysis. Here, we present tappAS, the first framework to enable a comprehensive Functional Iso-Transcriptomics (FIT) analysis, which is effective at revealing the functional impact of context-specific post-transcriptional regulation. tappAS uses isoform-resolved annotation of coding and non-coding functional domains, motifs, and sites, in combination with novel analysis methods to interrogate different aspects of the functional readout of transcript variants and isoform regulation. tappAS software and documentation are available at https://app.tappas.org.",2020-05-18 +27987180,Construction of the Leaf Senescence Database and Functional Assessment of Senescence-Associated Genes.,"Leaf senescence is the last phase of plant development and a highly coordinated process regulated by a large number of senescence-associated genes (SAGs). By broad literature survey, we constructed a leaf senescence database (LSD) in 2011 and updated it to Version 2.0 in 2014 ( http://www.eplantsenescence.org/ and http://psd.cbi.pku.edu.cn/ ) which contains a total of 5357 genes and 324 mutants from 44 species. These SAGs were retrieved based on genetic, genomic, proteomic, physiological, or other experimental evidence and were classified into different categories according to their functions in leaf senescence or morphological phenotype of mutants. To provide comprehensive information for SAGs, we made extensive annotation by both manual and computational approaches. In addition, we predicted putative orthologues of the SAGs in other species. LSD has a user-friendly interface to allow users to make text queries or BLAST searches and to download SAGs sequences for local analysis. Functional analyses of putative SAGs reveal that WRKY75, AZF2, NAC16, and WRKY26 are positive regulators of leaf senescence, while MKP2 and CTR1 perform negative regulation to leaf senescence. This database has been served as a valuable resource for basic research on the function of SAGs and evolution of plant leaf senescence, as well as for the exploration of genetic traits in agronomically important plants.",2017-01-01 +31848453,PRIMEval: Optimization and screening of multiplex oligonucleotide assays.,"The development of multiplex polymerase chain reaction and microarray assays is challenging due to primer dimer formation, unspecific hybridization events, the generation of unspecific by-products, primer depletion, and thus lower amplification efficiencies. We have developed a software workflow with three underlying algorithms that differ in their use case and specificity, allowing the complete in silico evaluation of such assays on user-derived data sets. We experimentally evaluated the method for the prediction of oligonucleotide hybridization events including resulting products and probes, self-dimers, cross-dimers and hairpins at different experimental conditions. The developed method allows explaining the observed artefacts through in silico WGS data and thermodynamic predictions. PRIMEval is available publicly at https://primeval.ait.ac.at.",2019-12-17 +32337325,Crystallographic orientation and grain size data obtained by Electron Back Scatter Diffraction (EBSD) on quartz analysed in mylonitic quartzite from the Island of Elba (Italy).,"Raw Electron Back Scatter Diffraction (EBSD) data on deformed quartz from a mylonitic quartzite sample of the Calamita Schists (Island of Elba, Italy) is available at https://doi.org/10.17632/8c937t6zs4.32. The investigated sample (IESP3SP78) was collected in quartz-rich outcrops exposed at the Praticciolo Cape and was used to realize an oriented thin section (cut parallel to lineation and perpendicular to foliation). Preliminary investigations were carried out by transmitted-light and scanning electron microscopy (SEM), in order to select key areas for EBSD analysis. EBSD mapping was performed on selected areas of deformed quartz, which was the only phase indexed and were processed to derive orientation maps, pole figures, inverse pole figures, misorientation axis distribution in sample and crystal coordinates. While the processed data is available on the original research article (""Fluid-assisted Strain Localization in Quartz at the Brittle/Ductile Transition""; https://doi.org/10.1029/2019GC008270), this contribution is devoted to supply the unprocessed EBSD data, together with a methodological description, aimed to allow the reproduction of the processed dataset. A brief statistical description of the investigated EBSD maps is also available. This data is valuable because it offers grain size and orientation analysis of deformed quartz investigated in a natural study case and the present publication makes it accessible to those working on naturally and experimentally deformed quartz.",2019-11-02 +,Best Paper Selection,"Daniulaityte R, Chen L, Lamy FR, Carlson RG, Thirunarayan K, Sheth A. “When ‘Bad’ is ‘Good’”: Identifying Personal Communication and Sentiment in Drug-Related Tweets. JMIR Public Health Surveill 2016 Oct 24;2(2):e162 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5099500/ Freedman RA, Viswanath K, Vaz-Luis I, Keating NL. Learning from social media: utilizing advanced data extraction techniques to understand barriers to breast cancer treatment. Breast Cancer Res Treat 2016 Jul;158(2):395-405 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5537590/ Hawkins JB, Brownstein JS, Tuli G, Runels T, Broecker K, Nsoesie EO, McIver DJ, Rozenblum R, Wright A, Bourgeois FT, Greaves F. Measuring patient-perceived quality of care in US hospitals using Twitter. BMJ Qual Saf 2016 Jun;25(6):404-13 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4878682/ Kondylakis H, Koumakis L, Hänold S, Nwankwo I, Forgó N, Marias K, Tsiknakis M, Graf N. Donor’s support tool: Enabling informed secondary use of patient’s biomaterial and personal data. Int J Med Inform 2017 Jan;97:282-92 +https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(16)30234-9 Massey PM, Leader A, Yom-Tov E, Budenz A, Fisher K, Klassen AC. Applying Multiple Data Collection Tools to Quantify Human Papillomavirus Vaccine Communication on Twitter. J Med Internet Res 2016 Dec 5;18(12):e318 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5168526/",2017-08-01 +32339626,libsbmljs-Enabling web-based SBML tools.,"The SBML standard is used in a number of online repositories for storing systems biology models, yet there is currently no Web-capable JavaScript library that can read and write the SBML format. This is a severe limitation since the Web has become a universal means of software distribution, and the graphical capabilities of modern web browsers offer a powerful means for building rich, interactive applications. Also, there is a growing developer population specialized in web technologies that is poised to take advantage of the universality of the web to build the next generation of tools in systems biology and other fields. However, current solutions require server-side processing in order to support existing standards in modeling. We present libsbmljs, a JavaScript/WebAssembly library for Node.js and the Web with full support for all SBML extensions. Our library is an enabling technology for online SBML editors, model-building tools, and web-based simulators, and runs entirely in the browser without the need for any dedicated server resources. We provide NPM packages, an extensive set of examples, JavaScript API documentation, and an online demo that allows users to read and validate the SBML content of any model in the BioModels and BiGG databases. We also provide instructions and scripts to allow users to build a copy of libsbmljs against any libSBML version. Although our library supports all existing SBML extensions, we cover how to add additional extensions to the wrapper, should any arise in the future. To demonstrate the utility of this implementation, we also provide a demo at https://libsbmljsdemo.github.io/ with a proof-of-concept SBML simulator that supports ODE and stochastic simulations for SBML core models. Our project is hosted at https://libsbmljs.github.io/, which contains links to examples, API documentation, and all source code files and build scripts used to create libsbmljs. Our source code is licensed under the Apache 2.0 open source license.",2020-04-24 +31946058,OpenArm 2.0: Automated Segmentation of 3D Tissue Structures for Multi-Subject Study of Muscle Deformation Dynamics.,"We present a novel neural-network-based pipeline for segmentation of 3D muscle and bone structures from localized 2D ultrasound data of the human arm. Building from the U-Net [1] neural network framework, we examine various data augmentation techniques and training data sets to both optimize the network's performance on our data set and hypothesize strategies to better select training data, minimizing manual annotation time while maximizing performance. We then employ this pipeline to generate the OpenArm 2.0 data set, the first factorial set of multi-subject, multi-angle, multi-force scans of the arm with full volumetric annotation of the biceps and humerus. This data set has been made available on SimTK (https://simtk.org/projects/openarm) to enable future exploration of muscle force modeling, improved musculoskeletal graphics, and assistive device control.",2019-07-01 +,A multi-resolution approach to national-scale cultivated area estimation of soybean,"Satellite remote sensing data can provide timely, accurate, and objective information on cultivated area by crop type and, in turn, facilitate accurate estimates of crop production. Here, we present a generic multi-resolution approach to sample-based crop type area estimation at the national level using soybean as an example crop type. Historical MODIS (MODerate resolution Imaging Spectroradiometer) data were used to stratify growing regions into subsets of low, medium and high soybean cover. A stratified random sample of 20km×20km sample blocks was selected and Landsat data for these sample blocks classified into soybean cover. The Landsat-derived soybean area was used to produce national estimates of soybean area. Current year MODIS-indicated soybean cover served as an auxiliary variable in a stratified regression estimator procedure. To evaluate the approach, we prototyped the method in the USA, where the 2013 USDA Cropland Data Layer (CDL) was used as a reference training data set for mapping soybean cover within each sample block. Three individual Landsat images were sufficient to accurately map soybean cover for all blocks, revealing that a rather sparse sample of phenological variation is needed to separate soybean from other cover types. In addition to stacks of images, we also evaluated standard radiometrically normalized Landsat inputs for mapping blocks individually (local-scale) and all at once (national-scale). All tested inputs resulted in area estimates comparable to the official USDA estimate of 30.86Mha, with lower accuracy and higher standard error for national-scale mapping implementations. The stratified regression estimator incorporating current year MODIS-indicated soy reduced the standard error of the estimated soybean area by over 25% relative to the standard error of the stratified estimator. Finally, the method was ported to Argentina. A stratified random sample of blocks was characterized for soybean cultivated area using stacks of individual Landsat images for the 2013–2014 southern hemisphere growing season. A sub-sample of these blocks was visited on the ground to assess the accuracy of the Landsat-derived soy classification. The stratified regression estimator procedure performed similarly to the US application as it resulted in a reduction in standard error of about 25% relative to the stratified estimator not incorporating current year MODIS-indicated soybean. Our final estimated soybean area was 28% lower than that reported by the USDA, corresponding to a 20% field-based omission error related to underdeveloped fields. Lessons learned from this study can be ported to other regions of comparable field size and management intensity to assess soybean cultivated area. Results for the USA and Argentina may be viewed and downloaded at http://glad.geog.umd.edu/us-analysis and http://glad.geog.umd.edu/argentina-analysis, respectively.",2017-06-01 +26335248,"A Web Resource for Standardized Benchmark Datasets, Metrics, and Rosetta Protocols for Macromolecular Modeling and Design.","The development and validation of computational macromolecular modeling and design methods depend on suitable benchmark datasets and informative metrics for comparing protocols. In addition, if a method is intended to be adopted broadly in diverse biological applications, there needs to be information on appropriate parameters for each protocol, as well as metrics describing the expected accuracy compared to experimental data. In certain disciplines, there exist established benchmarks and public resources where experts in a particular methodology are encouraged to supply their most efficient implementation of each particular benchmark. We aim to provide such a resource for protocols in macromolecular modeling and design. We present a freely accessible web resource (https://kortemmelab.ucsf.edu/benchmarks) to guide the development of protocols for protein modeling and design. The site provides benchmark datasets and metrics to compare the performance of a variety of modeling protocols using different computational sampling methods and energy functions, providing a ""best practice"" set of parameters for each method. Each benchmark has an associated downloadable benchmark capture archive containing the input files, analysis scripts, and tutorials for running the benchmark. The captures may be run with any suitable modeling method; we supply command lines for running the benchmarks using the Rosetta software suite. We have compiled initial benchmarks for the resource spanning three key areas: prediction of energetic effects of mutations, protein design, and protein structure prediction, each with associated state-of-the-art modeling protocols. With the help of the wider macromolecular modeling community, we hope to expand the variety of benchmarks included on the website and continue to evaluate new iterations of current methods as they become available.",2015-09-03 +30395267,RNAcentral: a hub of information for non-coding RNA sequences.,"RNAcentral is a comprehensive database of non-coding RNA (ncRNA) sequences, collating information on ncRNA sequences of all types from a broad range of organisms. We have recently added a new genome mapping pipeline that identifies genomic locations for ncRNA sequences in 296 species. We have also added several new types of functional annotations, such as tRNA secondary structures, Gene Ontology annotations, and miRNA-target interactions. A new quality control mechanism based on Rfam family assignments identifies potential contamination, incomplete sequences, and more. The RNAcentral database has become a vital component of many workflows in the RNA community, serving as both the primary source of sequence data for academic and commercial groups, as well as a source of stable accessions for the annotation of genomic and functional features. These examples are facilitated by an improved RNAcentral web interface, which features an updated genome browser, a new sequence feature viewer, and improved text search functionality. RNAcentral is freely available at https://rnacentral.org.",2019-01-01 +30239928,"UniLectin3D, a database of carbohydrate binding proteins with curated information on 3D structures and interacting ligands.","Lectins, and related receptors such as adhesins and toxins, are glycan-binding proteins from all origins that decipher the glycocode, i.e. the structural information encoded in the conformation of complex carbohydrates present on the surface of all cells. Lectins are still poorly classified and annotated, but since their functions are based on ligand recognition, their 3D-structures provide a solid foundation for characterization. UniLectin3D is a curated database that classifies lectins on origin and fold, with cross-links to literature, other databases in glycosciences and functional data such as known specificity. The database provides detailed information on lectins, their bound glycan ligands, and features their interactions using the Protein-Ligand Interaction Profiler (PLIP) server. Special care was devoted to the description of the bound glycan ligands with the use of simple graphical representation and numerical format for cross-linking to other databases in glycoscience. We conceived the design of the database architecture and the navigation tools to account for all organisms, as well as to search for oligosaccharide epitopes complexed within specified binding sites. UniLectin3D is accessible at https://www.unilectin.eu/unilectin3D.",2019-01-01 +29455297,Pan European Phenological database (PEP725): a single point of access for European data.,"The Pan European Phenology (PEP) project is a European infrastructure to promote and facilitate phenological research, education, and environmental monitoring. The main objective is to maintain and develop a Pan European Phenological database (PEP725) with an open, unrestricted data access for science and education. PEP725 is the successor of the database developed through the COST action 725 ""Establishing a European phenological data platform for climatological applications"" working as a single access point for European-wide plant phenological data. So far, 32 European meteorological services and project partners from across Europe have joined and supplied data collected by volunteers from 1868 to the present for the PEP725 database. Most of the partners actively provide data on a regular basis. The database presently holds almost 12 million records, about 46 growing stages and 265 plant species (including cultivars), and can be accessed via http://www.pep725.eu/ . Users of the PEP725 database have studied a diversity of topics ranging from climate change impact, plant physiological question, phenological modeling, and remote sensing of vegetation to ecosystem productivity.",2018-02-18 +31167633,GRASP2: fast and memory-efficient gene-centric assembly and homolog search for metagenomic sequencing data.,"

Background

A crucial task in metagenomic analysis is to annotate the function and taxonomy of the sequencing reads generated from a microbiome sample. In general, the reads can either be assembled into contigs and searched against reference databases, or individually searched without assembly. The first approach may suffer from fragmentary and incomplete assembly, while the second is hampered by the reduced functional signal contained in the short reads. To tackle these issues, we have previously developed GRASP (Guided Reference-based Assembly of Short Peptides), which accepts a reference protein sequence as input and aims to assemble its homologs from a database containing fragmentary protein sequences. In addition to a gene-centric assembly tool, GRASP also serves as a homolog search tool when using the assembled protein sequences as templates to recruit reads. GRASP has significantly improved recall rate (60-80% vs. 30-40%) compared to other homolog search tools such as BLAST. However, GRASP is both time- and space-consuming. Subsequently, we developed GRASPx, which is 30X faster than GRASP. Here, we present a completely redesigned algorithm, GRASP2, for this computational problem.

Results

GRASP2 utilizes Burrows-Wheeler Transformation (BWT) and FM-index to perform assembly graph generation, and reduces the search space by employing a fast ungapped alignment strategy as a filter. GRASP2 also explicitly generates candidate paths prior to alignment, which effectively uncouples the iterative access of the assembly graph and alignment matrix. This strategy makes the execution of the program more efficient under current computer architecture, and contributes to GRASP2's speedup. GRASP2 is 8-fold faster than GRASPx (and 250-fold faster than GRASP) and uses 8-fold less memory while maintaining the original high recall rate of GRASP. GRASP2 reaches ~ 80% recall rate compared to that of ~ 40% generated by BLAST, both at a high precision level (> 95%). With such a high performance, GRASP2 is only ~3X slower than BLASTP.

Conclusion

GRASP2 is a high-performance gene-centric and homolog search tool with significant speedup compared to its predecessors, which makes GRASP2 a useful tool for metagenomics data analysis, GRASP2 is implemented in C++ and is freely available from http://www.sourceforge.net/projects/grasp2 .",2019-06-06 +32032351,SmartPhase: Accurate and fast phasing of heterozygous variant pairs for genetic diagnosis of rare diseases.,"There is an increasing need to use genome and transcriptome sequencing to genetically diagnose patients suffering from suspected monogenic rare diseases. The proper detection of compound heterozygous variant combinations as disease-causing candidates is a challenge in diagnostic workflows as haplotype information is lost by currently used next-generation sequencing technologies. Consequently, computational tools are required to phase, or resolve the haplotype of, the high number of heterozygous variants in the exome or genome of each patient. Here we present SmartPhase, a phasing tool designed to efficiently reduce the set of potential compound heterozygous variant pairs in genetic diagnoses pipelines. The phasing algorithm of SmartPhase creates haplotypes using both parental genotype information and reads generated by DNA or RNA sequencing and is thus well suited to resolve the phase of rare variants. To inform the user about the reliability of a phasing prediction, it computes a confidence score which is essential to select error-free predictions. It incorporates existing haplotype information and applies logical rules to determine variants that can be excluded as causing a recessive, monogenic disease. SmartPhase can phase either all possible variant pairs in predefined genetic loci or preselected variant pairs of interest, thus keeping the focus on clinically relevant results. We compared SmartPhase to WhatsHap, one of the leading comparable phasing tools, using simulated data and a real clinical cohort of 921 patients. On both data sets, SmartPhase generated error-free predictions using our derived confidence score threshold. It outperformed WhatsHap with regard to the percentage of resolved pairs when parental genotype information is available. On the cohort data, SmartPhase enabled on average the exclusion of approximately 22% of the input variant pairs in each singleton patient and 44% in each trio patient. SmartPhase is implemented as an open-source Java tool and freely available at http://ibis.helmholtz-muenchen.de/smartphase/.",2020-02-07 +26508704,Proteomic profiling of nuclear fractions from native renal inner medullary collecting duct cells.,"The control of renal water excretion occurs in part by regulation of transcription in response to vasopressin in cells of the collecting duct. A systems biology-based approach to understanding transcriptional control in renal collecting duct cells depends on knowledge of what transcription factors and other regulatory proteins are present in the cells' nuclei. The goal of this article is to report comprehensive proteomic profiling of cellular fractions enriched in nuclear proteins from native inner medullary collecting duct (IMCD) cells of the rat. Multidimensional separation procedures and state-of-the art protein mass spectrometry produced 18 GB of spectral data that allowed the high-stringency identification of 5,048 proteins in nuclear pellet (NP) and nuclear extract (NE) fractions of biochemically isolated rat IMCD cells (URL: https://helixweb.nih.gov/ESBL/Database/IMCD_Nucleus/). The analysis identified 369 transcription factor proteins out of the 1,371 transcription factors coded by the rat genome. The analysis added 1,511 proteins to the recognized proteome of rat IMCD cells, now amounting to 8,290 unique proteins. Analysis of samples treated with the vasopressin analog dDAVP (1 nM for 30 min) or its vehicle revealed 99 proteins in the NP fraction and 88 proteins in the NE fraction with significant changes in spectral counts (Fisher exact test, P < 0.005). Among those altered by vasopressin were seven distinct histone proteins, all of which showed decreased abundance in the NP fraction, consistent with a possible effect of vasopressin to induce chromatin remodeling. The results provide a data resource for future studies of vasopressin-mediated transcriptional regulation in the renal collecting duct.",2015-10-27 +30329093,REDfly: the transcriptional regulatory element database for Drosophila.,"The REDfly database provides a comprehensive curation of experimentally-validated Drosophila transcriptional cis-regulatory elements and includes information on DNA sequence, experimental evidence, patterns of regulated gene expression, and more. Now in its thirteenth year, REDfly has grown to over 23 000 records of tested reporter gene constructs and 2200 tested transcription factor binding sites. Recent developments include the start of curation of predicted cis-regulatory modules in addition to experimentally-verified ones, improved search and filtering, and increased interaction with the authors of curated papers. An expanded data model that will capture information on temporal aspects of gene regulation, regulation in response to environmental and other non-developmental cues, sexually dimorphic gene regulation, and non-endogenous (ectopic) aspects of reporter gene expression is under development and expected to be in place within the coming year. REDfly is freely accessible at http://redfly.ccr.buffalo.edu, and news about database updates and new features can be followed on Twitter at @REDfly_database.",2019-01-01 +28439836,Prediction of miRNA-mRNA Interactions Using miRGate.,"miRGate ( http://mirgate.bioinfo.cnio.es /) is a freely available database that contains predicted and experimentally validated microRNA-messenger RNA (miRNA-mRNA) target pairs. This resource includes novel predictions from five well-established algorithms, but recalculated from a common and comprehensive sequence dataset. It includes all 3'-UTR sequences of all known genes of the three more widely employed genomes (human, mouse, and rat), and all annotated miRNA sequences from those genomes. Besides, it also contains predictions for all genes in human targeted by miRNA viruses such as Epstein-Barr and Kaposi sarcoma-associated herpes virus.The approach intends to circumvent one of the main drawbacks in this area, as diverse sequences and gene database versions cause poor overlap among different target prediction methods even with experimentally confirmed targets. As a result, miRGate predictions have been successfully validated using functional assays in several laboratories.This chapter describes how a user can access target information via miRGate's web interface. It also shows how automatically access the database through the programmatic interface based on representational state transfer services (REST), using the application programming interface (API) available at http://mirgate.bioinfo.cnio.es/API .",2017-01-01 +33195336,Centrality of G6PD in COVID-19: The Biochemical Rationale and Clinical Implications.,"Introduction: COVID-19 is a novel and devastating disease. Its manifestations vary from asymptomatic to lethal. Moreover, mortality rates differ based on underlying health conditions and ethnicity. We investigated the biochemical rationale behind these observations using machine reasoning by the sci.AI system (https://sci.ai/). Facts were extracted and linked from publications available in nlm.nih.gov and Europe PMC to form the dataset which was validated by medical experts. Results: Based on the analysis of experimental and clinical data, we synthesized detailed biochemical pathways of COVID-19 pathogenesis which were used to explain epidemiological and clinical observations. Clinical manifestations and biomarkers are highlighted to monitor the course of COVID-19 and navigate treatment. As depicted in the Graphical Abstract, SARS-CoV-2 triggers a pro-oxidant (PO) response leading to the production of reactive oxygen species (ROS) as a normal innate defense. However, SARS-CoV-2's unique interference with the antioxidant (AO) system, through suppression of nitric oxide (NO) production in the renin- angiotensin-aldosterone system (RAAS), leads to an excessive inflammatory PO response. The excessive PO response becomes critical in cohorts with a compromised AO system such as patients with glucose-6-phosphate dehydrogenase deficiency (G6PDd) where NO and glutathione (GSH) mechanisms are impaired. G6PDd develops in patients with metabolic syndrome. It is mediated by aldosterone (Ald) which also increases specifically in COVID-19. Conclusion: G6PD is essential for an adequate immune response. Both G6PDd and SARS-CoV-2 compromise the AO system through the same pathways rendering G6PDd the Achilles' heel for COVID-19. Thus, the evolutionary antimalarial advantage of the G6PDd cohort can be a disadvantage against SARS-CoV-2.",2020-10-22 +25404128,"Beyond protein expression, MOPED goes multi-omics.","MOPED (Multi-Omics Profiling Expression Database; http://moped.proteinspire.org) has transitioned from solely a protein expression database to a multi-omics resource for human and model organisms. Through a web-based interface, MOPED presents consistently processed data for gene, protein and pathway expression. To improve data quality, consistency and use, MOPED includes metadata detailing experimental design and analysis methods. The multi-omics data are integrated through direct links between genes and proteins and further connected to pathways and experiments. MOPED now contains over 5 million records, information for approximately 75,000 genes and 50,000 proteins from four organisms (human, mouse, worm, yeast). These records correspond to 670 unique combinations of experiment, condition, localization and tissue. MOPED includes the following new features: pathway expression, Pathway Details pages, experimental metadata checklists, experiment summary statistics and more advanced searching tools. Advanced searching enables querying for genes, proteins, experiments, pathways and keywords of interest. The system is enhanced with visualizations for comparing across different data types. In the future MOPED will expand the number of organisms, increase integration with pathways and provide connections to disease.",2014-11-17 +32105299,ConfID: an analytical method for conformational characterization of small molecules using molecular dynamics trajectories.,"MOTIVATION:The conformational space of small molecules can be vast and difficult to assess. Molecular dynamics (MD) simulations of free ligands in solution have been applied to predict conformational populations, but their characterization is often based on clustering algorithms or manual efforts. RESULTS:Here, we introduce ConfID, an analytical tool for conformational characterization of small molecules using MD trajectories. The evolution of conformational sampling and population frequencies throughout trajectories is calculated to check for sampling convergence while allowing to map relevant conformational transitions. The tool is designed to track conformational transition events and calculate time-dependent properties for each conformational population detected. AVAILABILITY AND IMPLEMENTATION:Toolkit and documentation are freely available at http://sbcb.inf.ufrgs.br/confid. CONTACT:marcelo.poleto@ufv.br or bigrisci@inf.ufrgs.br. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-06-01 +31623721,Metandem: An online software tool for mass spectrometry-based isobaric labeling metabolomics.,"Mass spectrometry-based stable isotope labeling provides the advantages of multiplexing capability and accurate quantification but requires tailored bioinformatics tools for data analysis. Despite the rapid advancements in analytical methodology, it is often challenging to analyze stable isotope labeling-based metabolomics data, particularly for isobaric labeling using MS/MS reporter ions for quantification. We report Metandem, a novel online software tool for isobaric labeling-based metabolomics, freely available at http://metandem.com/web/. Metandem provides a comprehensive data analysis pipeline integrating feature extraction, metabolite quantification, metabolite identification, batch processing of multiple data files, online parameter optimization for custom datasets, data normalization, and statistical analysis. Systematic evaluation of the Metandem tool was demonstrated on UPLC-MS/MS, nanoLC-MS/MS, CE-MS/MS and MALDI-MS platforms, via duplex, 4-plex, 10-plex, and 12-plex isobaric labeling experiments and the application to various biological samples.",2019-08-21 +25428374,The UCSC Genome Browser database: 2015 update.,"Launched in 2001 to showcase the draft human genome assembly, the UCSC Genome Browser database (http://genome.ucsc.edu) and associated tools continue to grow, providing a comprehensive resource of genome assemblies and annotations to scientists and students worldwide. Highlights of the past year include the release of a browser for the first new human genome reference assembly in 4 years in December 2013 (GRCh38, UCSC hg38), a watershed comparative genomics annotation (100-species multiple alignment and conservation) and a novel distribution mechanism for the browser (GBiB: Genome Browser in a Box). We created browsers for new species (Chinese hamster, elephant shark, minke whale), 'mined the web' for DNA sequences and expanded the browser display with stacked color graphs and region highlighting. As our user community increasingly adopts the UCSC track hub and assembly hub representations for sharing large-scale genomic annotation data sets and genome sequencing projects, our menu of public data hubs has tripled.",2014-11-26 +,StructureSelector: A web‐based software to select and visualize the optimal number of clusters using multiple methods,"Inferences of population genetic structure are of great importance to the fields of ecology and evolutionary biology. The program structure has been widely used to infer population genetic structure. However, previous studies demonstrated that uneven sampling often leads to wrong inferences on hierarchical structure. The most widely used ΔK method tends to identify the uppermost hierarchy of population structure. Recently, four alternative statistics (medmedk, medmeak, maxmedk and maxmeak) were proposed, which appear to be more accurate than the previously used methods for both even and uneven sampling data. However, the lack of easy‐to‐use software limits the use of these appealing new estimators. Here, we developed a web‐based user‐friendly software structureselector to calculate the four appealing alternative statistics together with the commonly used Ln Pr(X|K) and ΔK statistics. structureselector accepts the result files of structure, admixture or faststructure as input files. It reports the “best” K for each estimator, and the results are available as HTML or tab separated tables. The program can also generate graphical representations for specific K, which can be easily downloaded from the server. The software is freely available at http://lmme.qdio.ac.cn/StructureSelector/.",2018-01-01 +33448375,Revealing community assembly through barcoding: Mediterranean butterflies and dispersal variation.,"In Focus: Scalercio, S., Cini, A., Menchetti, M., Vodă, R., Bonelli, S., Bordoni, A., … Dapporto, L. (2020). How long is 3 km for a butterfly? Ecological constraints and functional traits explain high mitochondrial genetic diversity between Sicily and the Italian Peninsula. Journal of Animal Ecology. https://doi.org/10.1111/1365-2656.13196. Biotic and abiotic factors can shape geographical patterns of genetic variation within species, but few studies have addressed how this might generate common patterns at the level of communities of species. Scalercio et al. (2020) have combined mtDNA sequence data and life-history traits, to reveal a repeated pattern of genetic structure between Sicilian and southern Italian butterfly populations, which are separated by only 3 km of ocean. They reveal how intrinsic species traits and extrinsic environmental constraints explain this pattern, demonstrating an important role for wind. Moreover, the inclusion of almost 8,000 georeferenced sequences reveals that, in spite of also being present in southern Italy, almost half of Sicilian butterfly species are more closely related to populations from other parts of Europe, Asia or North Africa. We provide further discussion on the biogeographic barrier they identify, and the potential of community-level DNA barcoding to identify processes that structure genetic variation across communities.",2020-09-01 +32761068,Guidelines for the Management of Severe Traumatic Brain Injury: 2020 Update of the Decompressive Craniectomy Recommendations.,"When the fourth edition of the Brain Trauma Foundation's Guidelines for the Management of Severe Traumatic Brain Injury were finalized in late 2016, it was known that the results of the RESCUEicp (Trial of Decompressive Craniectomy for Traumatic Intracranial Hypertension) randomized controlled trial of decompressive craniectomy would be public after the guidelines were released. The guideline authors decided to proceed with publication but to update the decompressive craniectomy recommendations later in the spirit of ""living guidelines,"" whereby topics are updated more frequently, and between new editions, when important new evidence is published. The update to the decompressive craniectomy chapter presented here integrates the findings of the RESCUEicp study as well as the recently published 12-mo outcome data from the DECRA (Decompressive Craniectomy in Patients With Severe Traumatic Brain Injury) trial. Incorporation of these publications into the body of evidence led to the generation of 3 new level-IIA recommendations; a fourth previously presented level-IIA recommendation remains valid and has been restated. To increase the utility of the recommendations, we added a new section entitled Incorporating the Evidence into Practice. This summary of expert opinion provides important context and addresses key issues for practitioners, which are intended to help the clinician utilize the available evidence and these recommendations. The full guideline can be found at: https://braintrauma.org/guidelines/guidelines-for-the-management-of-severe-tbi-4th-ed#/.",2020-09-01 +28149059,Genes2GO: A web application for querying gene sets for specific GO terms.,"Gene ontology annotations have become an essential resource for biological interpretations of experimental findings. The process of gathering basic annotation information in tables that link gene sets with specific gene ontology terms can be cumbersome, in particular if it requires above average computer skills or bioinformatics expertise. We have therefore developed Genes2GO, an intuitive R-based web application. Genes2GO uses the biomaRt package of Bioconductor in order to retrieve custom sets of gene ontology annotations for any list of genes from organisms covered by the Ensembl database. Genes2GO produces a binary matrix file, indicating for each gene the presence or absence of specific annotations for a gene. It should be noted that other GO tools do not offer this user-friendly access to annotations.

Availability

Genes2GO is freely available and listed under http://www.semantic-systems-biology.org/tools/externaltools/.",2016-06-15 +33835829,First Report of Fusarium proliferatum Causing Garlic clove Rot in Russian Federation. ,"Garlic (Allium sativum L.) is a widely consumed bulbous crop both worldwide and in Russia. About 200,000 tons of garlic is produced in Russia annually (https://rosstat.gov.ru/). Significant pre- and post-harvest losses of garlic regularly occur due to Fusarium sp. (Taylor et al., 2013). Since September 2018, rotting has been observed in Russia during garlic bulb storage (data of the Federal Scientific Vegetable Center, FSVC, Moscow Region). The outer bulb surface looked healthy, but underneath the integumentary scales, the cloves had light brown and brown spots. When grown, diseased plants were characterized by root and bulb disruption and leaf drying; for some cultivars, up to 100% of plants died. In January 2020, cv. Strelets and Dubkovsky bulbs, collected in July 2019, with rot symptoms, were taken from the FSVC storage. Necrotic clove tissue fragments (0.2-0.5 cm) were cut, sanitized with 70% ethanol for 3 min, rinsed with sterile water, and incubated on potato dextrose agar (PDA) with 1 mg/ml ampicillin at 22°C in the dark. Four single-spore cultures were obtained from four diseased bulbs. After 6 days of incubation, the isolates produced abundant aerial white mycelia and acquired a purple pigmentation. The hyphae were hyaline with septation. All isolates (Dubkovsky, Dubkovsky 2, Strelets, and Strelets 2) produced numerous oval unicellular microconidia without septa, 4.1 to 11.6 × 1.3 to 3.4 µm (n = 50) and very few macroconidia with 3-4 septa (21 to 26 × 3 to 4 µm (n = 30)), narrowed at both ends. The cultural and conidial characteristics of the isolates corresponded to Fusarium species (Leslie and Summerell 2006). To determine the species, DNA was extracted from four isolates, and the internal transcribed spacer (ITS), and genes of translation elongation factor 1α (EF1α) and subunits 1 and 2 of DNA-directed RNA polymerase II (RPB1 and RPB2) were amplified and sequenced with primers ITS1/ITS4 (White et al. 1990), EF1/EF2 (O'Donnell et al. 1998a), RPB1-F5/RPB1-R8 (O'Donnell et al. 2010) and fRPB2-5F/fRPB2-7cR (Liu et al. 1999). The obtained sequences were identical for all four isolates. The isolate Strelets sequences were deposited in NCBI GenBank (MW149129 (ITS), MW161161 (EF1α), MW413302 (RPB1) and MW413303 (RPB2)); their analysis in MLST (http://fusarium.mycobank.org) showed 98.8-99.8% similarity to F. proliferatum (NRRL 13582, 13598 and others), which is part of the F. fujikuroi complex (O'Donnell et al. 1998b). The test on pathogenicity was performed two times according to (Leyronas et al. 2018). For this, three replicates of 10 cloves (cv. Strelets) were soaked in a conidial suspension (~106 conidia/ml; Strelets isolate) for 24 h. Ten control cloves were soaked in sterile water. The cloves were incubated on Petri dishes (5 cloves on a dish; on filter paper wettened with sterile water) in the dark at 23°C. After 5 days, brown lesions and white mycelium developed on the surface of the treated cloves. The taxonomic status of the fungus isolated from necrotic tissue was determined as F. proliferatum according to the ITS, EF1α, RPB1 and RPB2 analysis. Garlic basal and bulb rot is known to be caused by F. oxysporum f. sp. cepae and F. proliferatum (Snowdon 1990). This study is the first report of F. proliferatum causing rot of garlic bulbs during storage in Russia. F. proliferatum produces a variety of mycotoxins during bulb infestation, and our findings are important for diagnosing a Fusarium disease and the use of garlic crop in culinary and medicine. Funding The reported study was funded by Russian Foundation for Basic Research, project number 20-316-70009. References: Leslie, J. F., and Summerell, B. A. 2006. Page 224 in: The Fusarium Laboratory Manual. Blackwell, Oxford, UK. https://doi.org/10.1002/9780470278376 Leyronas, C., et al. 2018. Plant Dis. 102:2658 https://doi.org/10.1094/PDIS-06-18-0962-PDN Liu, Y.J. et al. 1999. Mol. Biol. Evol. 16: 1799 https://doi.org/10.1093/oxfordjournals.molbev.a026092 O'Donnell, K, et al. 1998a. Proc Natl Acad Sci USA. 95(5):2044. https://doi.org/10.1073/pnas.95.5.2044. O'Donnell, et al. 1998b. Mycologia 90:465 O'Donnell, K., et al. 2010. J. Clin. Microbiol., 48: 3708 https://doi.org/10.1128/JCM.00989-10 Snowdon, A. L. Pages 250-252 in: A Color Atlas of Post-Harvest Diseases and Disorders of Fruits and Vegetables. Vol. 1. 1990. Wolfe Scientific, London. Taylor, A, et al. 2013. Plant Pathol. 62:103. https://doi.org/10.1111/j.1365-3059.2012.02624.x White, T. J., et al. 1990. Page 315 in: PCR Protocols: A Guide to Methods and Applications. Academic Press, San Diego, CA.",2021-04-09 +33203735,Urinary Soluble CD163 and Disease Activity in Biopsy-Proven ANCA-Associated Glomerulonephritis.,"

Background and objectives

ANCA-associated GN is a common cause of rapidly progressive GN, with high relapse rates. The early recognition of an ANCA-associated GN relapse is of importance to prevent loss of kidney function. Urinary soluble CD163 has been identified as a promising marker of active ANCA-associated GN. Previous studies, however, are limited by the lack of histologic data.

Design, setting, participants, & measurements

We analyzed urinary soluble CD163 in 95 patients with ANCA-associated vasculitis who underwent a kidney biopsy. In total, 125 kidney tissue sections (first kidney biopsy, n=67; repeated biopsy, n=58) with concurrent 24-hour urine samples were studied. Correlation analyses comparing urinary soluble CD163 levels and morphologic features of ANCA-associated GN were performed using Spearman rank correlation analysis. The diagnostic performance of biomarkers to detect relapsing ANCA-associated GN was evaluated using receiver operating characteristics curve analysis.

Results

High levels of urinary soluble CD163 were found in 96 (87%) of 110 biopsies with active ANCA-associated GN compared with one (7%) of 15 biopsies without active ANCA-associated GN and one (6%) of 17 healthy controls. Urinary soluble CD163 correlated with fibrinoid necrosis (Rho=0.48, P<0.001) and cellular crescents (Rho=0.70, P<0.001) on kidney biopsy. In repeated biopsies, urinary soluble CD163's sensitivity of 0.94 and specificity of 0.91 for the recognition of relapsing ANCA-associated GN appeared better than routine clinical measures. The presence of CD163+ cells in affected glomeruli confirmed urinary soluble CD163's origin.

Conclusions

Urinary soluble CD163 is associated with active ANCA-associated GN and correlates with histologic features as seen in ANCA-associated GN.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_11_17_CJN07210520_final.mp3.",2020-11-17 +,"2360 Engaging, capturing, and integrating the voice of the customer and collaborator in a clinical and translational science program","OBJECTIVES/SPECIFIC AIMS: This presentation will highlight the +framework, domains, and approaches of the “Engaging the Voice of the +CTS Customer and Collaborator System” created at the University of +Minnesota Clinical and Translational Science Institute (CTSI) in response to the +need to improve the stakeholder engagement, quality, efficiency, consistency, +and transparency of the clinical and translational work. This system addresses 3 +important results-based accountability measures/questions: +“What should we do?”, “How well did we do +it?”, and “Is anyone better off?”. According to +Woolf (2008), “translational research means different things to +different people.” Social networks and systems that support +translational processes and outcomes are complex, nonlinear, and +multidisciplinary (Smith et al., 2017). In this highly +uncertain and fluid context, the input of program stakeholders is paramount to +move translation forward. NCATS Strategic Plan (2016) directs the grantees to +engage patients, community members and nonprofit organizations meaningfully in +translational science and all aspects of translational research. Engagement of +stakeholders throughout the lifecycle of a translational research project +ensures the project processes and outcomes are relevant to and directly address +their needs and will be more readily adopted by the community. +“Customer” (among other terms are Beneficiary, +Collaborator, Client, Community, Consumer, Service User, etc.) is a person, +organization, or entity who directly benefits from service delivery or program +(Friedman, 2005). Customers can be: direct and indirect, primary and secondary, +internal and external. Our analysis of CTS stakeholders (“Who are our +customers/collaborators?”) produced the following list of +customers and collaborators: researchers, University departments, translational +science workforce, patients, community members and entities, nonprofit +organizations, industry collaborators, NCATS/NIH, CTSA hub partners, +and CTSI staff. The “Voice of the Customer” (VOC) is the +term used to describe the stated and unstated needs or requirements of the +program’s customer. The “voice of the customer” +is a process used to capture the feedback from the customer (internal or +external) to provide the customers with the best quality of service, support, +and/or product. This process is about being proactive and constantly +innovative to capture the changing needs of the customers with time. Related to +the VOC is the concept of user innovation that refers to innovations developed +by consumers and end users. Experience shows that sometimes the best product or +a process concept idea comes from a customer (Yang, 2007: p. 20). Capturing and +utilizing such ideas are also relevant to VOC and can be operationalized and +implemented as a valuable strategy. The University of Minnesota +CTSI’s key objectives, goals, and uses of engaging the VOC and +collaborator are as follows: (1) Engage CTSA customers (“relevant +stakeholders”) in multiple aspects of translational science and look +for opportunities to include their perspective (per NCATS strategic principles). +(2) Inform continuous improvement, strategic management, and M&E +efforts, the identification of customer needs and wants, comprehensive problem +definition and ideation, new concept development and optimization. (3) Synergize +NCATS and partner expectations and campus/hub needs. (4) Translate +VOC into functional and measurable service requirements. +METHODS/STUDY POPULATION: A case study of the programmatic and +methodological approach/technique development. The VOC at the UMN +CTSI has been captured in a variety of ways: regular and ad hoc surveys, +interviews, focus groups, Engagement Studios, formal call for +patient/community ideas and proposals, informal conversations, +customer/community membership and participation in the Advisory +Boards and Executive Leadership Team meetings, and observations. Our VOC +variables and metrics assess customer needs, wants, knowledge, and skills; +customer satisfaction with processes and outcomes; and customer ideas for +improvement and innovation. The ensuing customer feedback and other data have +been used to identify and incorporate the important attributes needed in the +CTSI processes, products, and dissemination. UMN CTSI partners in engaging and +capturing the VOC include our past, current, and potential customers and +collaborators, communities, program staff and service providers, program +administration, communication staff, M&E team, internal and external +data collectors. RESULTS/ANTICIPATED RESULTS: The proposed +comprehensive approach shows sound promise to enhance customer and collaborator +engagement, critical thinking, learning, strategic management, evaluation +capacity and improvement within clinical and translational science +organizations. DISCUSSION/SIGNIFICANCE OF IMPACT: This structured +approach’s impact is significant in that it fills the current gap in +the practice, literature, and methodology and offers a practical example of a +“practice that works” for CTR (and other) organizations +and programs striving to improve their stakeholder engagement and program +impact. Leveraging and synergizing the VOC and community engagement approaches +can help CTS organizations advance beyond capturing individual +project/service experiences to drawing a holistic portrait of an +institution-level (and, potentially, a nation-level) translational science +program. References Friedman M. Trying Hard Is Not Good Enough: How to Produce +Measurable Improvements for Customers and Communities. Trafford, +2005. National Center for Advancing Translational Sciences. NCATS +Strategic Plan [Internet], 2016. NIH +(https://ncats.nih.gov/strategicplan) Smith C,et al. Toward a science of translational science. Journal of +Clinical and Translational Science 2017; 1: +253–255. Woolf SH. The meaning of translational research and why it matters. +JAMA 2008; 29: 211–213. Yang, K. Voice of the Customer Capture and +Analysis. US: McGraw-Hill Professional, 2007.",2018-06-01 +32946248,The Content and Quality of Information About Hyperacusis Presented Online.,"Purpose Hyperacusis is a disorder characterized by reduced sound tolerance leading to ear pain, emotional distress, and reduced quality of life. Many people with hyperacusis turn to the Internet for information and support from online communities to discuss their condition. The purpose of this study was to assess the content and quality of hyperacusis information presented online. Method The three most used Internet search engines were used to identify relevant websites using the single search term hyperacusis. Fifteen websites were selected for analysis. Details of the purpose, audience, and content of each website were extracted using a bespoke data extraction form. The quality of the information on each website was rated using the validated DISCERN questionnaire. Results There was a wide disparity in the quality and content of hyperacusis information across websites. The website Hyperacusis Focus achieved the highest overall DISCERN score. Hyperacusis Focus and U.K. National Health Service websites were the most comprehensive online resources for health care professionals and patients, respectively. Wikipedia was judged useful for both health care professionals and patients. In general, hyperacusis-related information was accurate. However, no single website provided a complete account of hyperacusis, and some were judged to be selective in the information they provided. Conclusions The Internet provides an important source of information for those who have hyperacusis and those who care for them. Revisions to the websites reviewed here are needed for each to provide a complete account of hyperacusis. Supplemental Material https://doi.org/10.23641/asha.12869717.",2020-09-18 +33079580,The GoAudio Quantitative Mobile Audiology Test Enhances Access to Clinical Hearing Assessments.,"Purpose Hearing loss is a common impairment of the human senses with an estimated 48 million American adults reporting some trouble hearing; however, access to hearing health care is limited. Detection of hearing loss through a mobile, handheld tool can provide an important access point and potentially expedited access to the continuum of hearing health care. Here, we determined that GoAudio, a portable, automated hearing assessment tool, can be used to identify individuals who require additional hearing evaluation in a clinical workflow. Method This initial study included 24 adults, ages 18-65 years (M = 50, SD = 12), tested with GoAudio versus ""gold-standard"" clinical audiometry for eight frequencies to evaluate ""real-world"" applications. Participants utilized noise-canceling headphones combined with a tablet-based application for the GoAudio assessment. Results The primary study outcome measurements were the comparison of hearing thresholds (dB HL) from clinical audiometry and GoAudio. Results suggest that GoAudio is comparable to clinical audiometry for the identification of hearing loss at most frequencies (except 1 kHz for both ears and 2 kHz in the right ear). Upon stratifying data based on age, we identified that GoAudio is capable of identifying suspected age-related hearing loss or hearing thresholds greater than 30 dB HL at higher frequencies in both ears. Conclusion The study results support that GoAudio can be used effectively in clinical practice workflows as a reliable hearing assessment tool for the identification of hearing loss at the majority of frequencies outside a sound-treated booth and can detect characteristics of age-related hearing loss. Supplemental Material https://doi.org/10.23641/asha.13087682.",2020-10-20 +33217420,Incidence of VTE and Bleeding Among Hospitalized Patients With Coronavirus Disease 2019: A Systematic Review and Meta-analysis.,"

Background

Individual studies have reported widely variable rates for VTE and bleeding among hospitalized patients with coronavirus disease 2019 (COVID-19).

Research question

What is the incidence of VTE and bleeding among hospitalized patients with COVID-19?

Methods

In this systematic review and meta-analysis, 15 standard sources and COVID-19-specific sources were searched between January 1, 2020, and July 31, 2020, with no restriction according to language. Incidence estimates were pooled by using random effects meta-analyses. Heterogeneity was evaluated by using the I2 statistic, and publication bias was assessed by using the Begg and Egger tests.

Results

The pooled incidence was 17.0% (95% CI, 13.4-20.9) for VTE, 12.1% (95% CI, 8.4-16.4) for DVT, 7.1% (95% CI, 5.3-9.1) for pulmonary embolism (PE), 7.8% (95% CI, 2.6-15.3) for bleeding, and 3.9% (95% CI, 1.2-7.9) for major bleeding. In subgroup meta-analyses, the incidence of VTE was higher when assessed according to screening (33.1% vs 9.8% by clinical diagnosis), among patients in the ICU (27.9% vs 7.1% in the ward), in prospective studies (25.5% vs 12.4% in retrospective studies), and with the inclusion of catheter-associated thrombosis/isolated distal DVTs and isolated subsegmental PEs. The highest pooled incidence estimate of bleeding was reported for patients receiving intermediate- or full-dose anticoagulation (21.4%) and the lowest in the only prospective study that assessed bleeding events (2.7%).

Interpretation

Among hospitalized patients with COVID-19, the overall estimated pooled incidence of VTE was 17.0%, with higher rates with routine screening, inclusion of distal DVT, and subsegmental PE, in critically ill patients and in prospective studies. Bleeding events were observed in 7.8% of patients and were sensitive to use of escalated doses of anticoagulants and nature of data collection. Additional studies are required to ascertain the significance of various thrombotic events and to identify strategies to improve patient outcomes.

Trial registry

PROSPERO; No.: CRD42020198864; URL: https://www.crd.york.ac.uk/prospero/.",2020-11-17 +32311701,Elevation and latitude drives structure and tree species composition in Andean forests: Results from a large-scale plot network.,"Our knowledge about the structure and function of Andean forests at regional scales remains limited. Current initiatives to study forests over continental or global scales still have important geographical gaps, particularly in regions such as the tropical and subtropical Andes. In this study, we assessed patterns of structure and tree species diversity along ~ 4000 km of latitude and ~ 4000 m of elevation range in Andean forests. We used the Andean Forest Network (Red de Bosques Andinos, https://redbosques.condesan.org/) database which, at present, includes 491 forest plots (totaling 156.3 ha, ranging from 0.01 to 6 ha) representing a total of 86,964 identified tree stems ≥ 10 cm diameter at breast height belonging to 2341 identified species, 584 genera and 133 botanical families. Tree stem density and basal area increases with elevation while species richness decreases. Stem density and species richness both decrease with latitude. Subtropical forests have distinct tree species composition compared to those in the tropical region. In addition, floristic similarity of subtropical plots is between 13 to 16% while similarity between tropical forest plots is between 3% to 9%. Overall, plots ~ 0.5-ha or larger may be preferred for describing patterns at regional scales in order to avoid plot size effects. We highlight the need to promote collaboration and capacity building among researchers in the Andean region (i.e., South-South cooperation) in order to generate and synthesize information at regional scale.",2020-04-20 +31066443,DrugComb: an integrative cancer drug combination data portal.,"Drug combination therapy has the potential to enhance efficacy, reduce dose-dependent toxicity and prevent the emergence of drug resistance. However, discovery of synergistic and effective drug combinations has been a laborious and often serendipitous process. In recent years, identification of combination therapies has been accelerated due to the advances in high-throughput drug screening, but informatics approaches for systems-level data management and analysis are needed. To contribute toward this goal, we created an open-access data portal called DrugComb (https://drugcomb.fimm.fi) where the results of drug combination screening studies are accumulated, standardized and harmonized. Through the data portal, we provided a web server to analyze and visualize users' own drug combination screening data. The users can also effectively participate a crowdsourcing data curation effect by depositing their data at DrugComb. To initiate the data repository, we collected 437 932 drug combinations tested on a variety of cancer cell lines. We showed that linear regression approaches, when considering chemical fingerprints as predictors, have the potential to achieve high accuracy of predicting the sensitivity of drug combinations. All the data and informatics tools are freely available in DrugComb to enable a more efficient utilization of data resources for future drug combination discovery.",2019-07-01 +29934697,Chinese lexical database (CLD) : A large-scale lexical database for simplified Mandarin Chinese.,"We present the Chinese Lexical Database (CLD): a large-scale lexical database for simplified Chinese. The CLD provides a wealth of lexical information for 3913 one-character words, 34,233 two-character words, 7143 three-character words, and 3355 four-character words, and is publicly available through http://www.chineselexicaldatabase.com . For each of the 48,644 words in the CLD, we provide a wide range of categorical predictors, as well as an extensive set of frequency measures, complexity measures, neighborhood density measures, orthography-phonology consistency measures, and information-theoretic measures. We evaluate the explanatory power of the lexical variables in the CLD in the context of experimental data through analyses of lexical decision latencies for one-character, two-character, three-character and four-character words, as well as word naming latencies for one-character and two-character words. The results of these analyses are discussed.",2018-12-01 +33295795,"Inequalities in Public Water Arsenic Concentrations in Counties and Community Water Systems across the United States, 2006-2011.","

Background

In the United States, nationwide estimates of public drinking water arsenic exposure are not readily available. We used the U.S. Environmental Protection Agency's (EPA) Six-Year Review contaminant occurrence data set to estimate public water arsenic exposure. We compared community water system (CWS) arsenic concentrations during 2006-2008 vs. after 2009-2011, the initial monitoring period for compliance with the U.S. EPA's 10 μg/L arsenic maximum contaminant level (MCL).

Objective

Our objective was to characterize potential inequalities in CWS arsenic exposure over time and across sociodemographic subgroups.

Methods

We estimated 3-y average arsenic concentrations for 36,406 CWSs (98%) and 2,740 counties (87%) and compared differences in means and quantiles of water arsenic (via quantile regression) between both 3-y periods for U.S. regions and sociodemographic subgroups. We assigned CWSs and counties MCL compliance categories (High if above the MCL; Low if below) for each 3-y period.

Results

From 2006-2008 to 2009-2011, mean and 95th percentile CWS arsenic (in micrograms per liter) declined by 10.3% (95% CI: 6.5%, 14.1%) and 11.5% (8.3%, 14.8%) nationwide, by 11.4% (4.7%, 18.1%) and 16.3% (8.1%, 24.5%) for the Southwest, and by 36.8% (7.4%, 66.1%) and 26.5% (12.1%, 40.8%) for New England, respectively. CWSs in the High/High compliance category (not MCL compliant) were more likely in the Southwest (61.1%), served by groundwater (94.7%), serving smaller populations (mean 1,102 persons), and serving Hispanic communities (38.3%).

Discussion

Larger absolute declines in CWS arsenic concentrations at higher water arsenic quantiles indicate declines are related to MCL implementation. CWSs reliant on groundwater, serving smaller populations, located in the Southwest, and serving Hispanic communities were more likely to continue exceeding the arsenic MCL, raising environmental justice concerns. These estimates of public drinking water arsenic exposure can enable further surveillance and epidemiologic research, including assessing whether differential declines in water arsenic exposure resulted in differential declines in arsenic-associated disease. https://doi.org/10.1289/EHP7313.",2020-12-09 +31428111,KnowPulse: A Web-Resource Focused on Diversity Data for Pulse Crop Improvement.,"KnowPulse (https://knowpulse.usask.ca) is a breeder-focused web portal for pulse breeders and geneticists. With a focus on diversity data, KnowPulse provides information on genetic markers, sequence variants, phenotypic traits and germplasm for chickpea, common bean, field pea, faba bean, and lentil. Genotypic data is accessible through the genotype matrix tool, displayed as a marker-by-germplasm table of genotype calls specific to germplasm chosen by the researcher. It is also summarized on genetic marker and sequence variant pages. Phenotypic data is visualized in trait distribution plots: violin plots for quantitative data and histograms for qualitative data. These plots are accessible through trait, germplasm, and experiment pages, as well as through a single page search tool. KnowPulse is built using the open-source Tripal toolkit and utilizes open-source tools including, but not limited to, species-specific JBrowse instances, a BLAST interface, and whole-genome CViTjs visualizations. KnowPulse is constantly evolving with data and tools added as they become available. Full integration of genetic maps and quantitative trait loci is imminent, and development of tools exploring structural variation is being explored.",2019-07-31 +30977782,Modelling G×E with historical weather information improves genomic prediction in new environments.,"

Motivation

Interaction between the genotype and the environment (G×E) has a strong impact on the yield of major crop plants. Although influential, taking G×E explicitly into account in plant breeding has remained difficult. Recently G×E has been predicted from environmental and genomic covariates, but existing works have not shown that generalization to new environments and years without access to in-season data is possible and practical applicability remains unclear. Using data from a Barley breeding programme in Finland, we construct an in silico experiment to study the viability of G×E prediction under practical constraints.

Results

We show that the response to the environment of a new generation of untested Barley cultivars can be predicted in new locations and years using genomic data, machine learning and historical weather observations for the new locations. Our results highlight the need for models of G×E: non-linear effects clearly dominate linear ones, and the interaction between the soil type and daily rain is identified as the main driver for G×E for Barley in Finland. Our study implies that genomic selection can be used to capture the yield potential in G×E effects for future growth seasons, providing a possible means to achieve yield improvements, needed for feeding the growing population.

Availability and implementation

The data accompanied by the method code (http://research.cs.aalto.fi/pml/software/gxe/bioinformatics_codes.zip) is available in the form of kernels to allow reproducing the results.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +31609612,ATBdiscrimination: An in Silico Tool for Identification of Active Tuberculosis Disease Based on Routine Blood Test and T-SPOT.TB Detection Results.,"Tuberculosis remains one of the deadliest infectious diseases worldwide. Only 5-15% of people infected with Mycobacterium tuberculosis develop active TB disease (ATB), while others remain latently infected (LTBI) during their lifetime, which has a completely different clinical treatment schedule. However, most current clinical diagnostic methods are based on the immune response of M. tuberculosis infections and cannot distinguish ATB from LTBIs. Thus, the rapid diagnosis of active or latent tuberculosis infections remains a serious challenge for clinicians. In this work, based on the test data of a total of 478 patients, 36 blood biochemical data were specially included with T-SPOT.TB detection results which are all from routine clinical practice as commercially available. Then a discrimination method to detect ATB infections was successfully developed based on these data by the random forest algorithm. This method presents a robust classification performance with AUC as 0.9256 and 0.8731 for the cross-validation set and the external validation set, respectively. This work suggests an innovative strategy for identification of ATB disease from a single drop of blood with advantages of being timely, efficient, and economical. It also provides valuable information for the comprehensive understanding of TB with deep associations between TB infection and routine blood test data. The web server of this identification method, called ATBdiscrimination, is now available online at http://lishuyan.lzu.edu.cn/ATB/ATBdiscrimination.html .",2019-10-25 +26519403,"Ensembl Plants: Integrating Tools for Visualizing, Mining, and Analyzing Plant Genomics Data.","Ensembl Plants ( http://plants.ensembl.org ) is an integrative resource presenting genome-scale information for a growing number of sequenced plant species (currently 33). Data provided includes genome sequence, gene models, functional annotation, and polymorphic loci. Various additional information are provided for variation data, including population structure, individual genotypes, linkage, and phenotype data. In each release, comparative analyses are performed on whole genome and protein sequences, and genome alignments and gene trees are made available that show the implied evolutionary history of each gene family. Access to the data is provided through a genome browser incorporating many specialist interfaces for different data types, and through a variety of additional methods for programmatic access and data mining. These access routes are consistent with those offered through the Ensembl interface for the genomes of non-plant species, including those of plant pathogens, pests, and pollinators.Ensembl Plants is updated 4-5 times a year and is developed in collaboration with our international partners in the Gramene ( http://www.gramene.org ) and transPLANT projects ( http://www.transplantdb.org ).",2016-01-01 +25404137,SuperFly: a comparative database for quantified spatio-temporal gene expression patterns in early dipteran embryos.,"We present SuperFly (http://superfly.crg.eu), a relational database for quantified spatio-temporal expression data of segmentation genes during early development in different species of dipteran insects (flies, midges and mosquitoes). SuperFly has a special focus on emerging non-drosophilid model systems. The database currently includes data of high spatio-temporal resolution for three species: the vinegar fly Drosophila melanogaster, the scuttle fly Megaselia abdita and the moth midge Clogmia albipunctata. At this point, SuperFly covers up to 9 genes and 16 time points per species, with a total of 1823 individual embryos. It provides an intuitive web interface, enabling the user to query and access original embryo images, quantified expression profiles, extracted positions of expression boundaries and integrated datasets, plus metadata and intermediate processing steps. SuperFly is a valuable new resource for the quantitative comparative study of gene expression patterns across dipteran species. Moreover, it provides an interesting test set for systems biologists interested in fitting mathematical gene network models to data. Both of these aspects are essential ingredients for progress toward a more quantitative and mechanistic understanding of developmental evolution.",2014-11-17 +33002370,Language Development From Early Childhood to Adolescence in Youths With Fragile X Syndrome.,"Purpose The aim of this study was to investigate language growth in individuals with fragile X syndrome (FXS) from early childhood to adolescence and the influence of maternal responsivity on language growth. Method We conducted a longitudinal analysis of language development in 55 youths (44 males, 11 females) with FXS. Data collection spanned the ages of 11-216 months. We measured expressive and receptive vocabulary with standardized tests. The number of different words and mean length of utterance were obtained from language sample analyses of mother-child interactions. We also measured maternal comments (responsivity indicator) produced during the language samples and child nonverbal IQ. Results Growth models indicated that rates of number of different words and receptive vocabulary were related to maternal commenting. Mean length of utterance did not change significantly over time. Expressive vocabulary measured with a standardized test grew, but the growth was not related to maternal commenting. Nonverbal IQ was related to all language outcomes at age of 10 years and to changes over time in vocabulary. Visual analysis indicated that the highest scores on standardized tests were produced by girls; however, measures derived from language sample analyses appeared similar for boys and girls. Language models for boys only were similar to the total sample models with lower scores at age of 10 years for some outcomes. Conclusion Results of persistent language impairments for most youths with FXS suggest the need for continued, focused interventions aimed at improved language productions in addition to a responsive environment. Supplemental Material https://doi.org/10.23641/asha.13022825.",2020-10-01 +29652620,NASA GeneLab Project: Bridging Space Radiation Omics with Ground Studies.,"Accurate assessment of risks of long-term space missions is critical for human space exploration. It is essential to have a detailed understanding of the biological effects on humans living and working in deep space. Ionizing radiation from galactic cosmic rays (GCR) is a major health risk factor for astronauts on extended missions outside the protective effects of the Earth's magnetic field. Currently, there are gaps in our knowledge of the health risks associated with chronic low-dose, low-dose-rate ionizing radiation, specifically ions associated with high (H) atomic number (Z) and energy (E). The NASA GeneLab project ( https://genelab.nasa.gov/ ) aims to provide a detailed library of omics datasets associated with biological samples exposed to HZE. The GeneLab Data System (GLDS) includes datasets from both spaceflight and ground-based studies, a majority of which involve exposure to ionizing radiation. In addition to detailed information on radiation exposure for ground-based studies, GeneLab is adding detailed, curated dosimetry information for spaceflight experiments. GeneLab is the first comprehensive omics database for space-related research from which an investigator can generate hypotheses to direct future experiments, utilizing both ground and space biological radiation data. The GLDS is continually expanding as omics-related data are generated by the space life sciences community. Here we provide a brief summary of the space radiation-related data available at GeneLab.",2018-04-13 +31764940,SCGid: a consensus approach to contig filtering and genome prediction from single-cell sequencing libraries of uncultured eukaryotes.,"

Motivation

Whole-genome sequencing of uncultured eukaryotic genomes is complicated by difficulties in acquiring sufficient amounts of tissue. Single-cell genomics (SCG) by multiple displacement amplification provides a technical workaround, yielding whole-genome libraries which can be assembled de novo. Downsides of multiple displacement amplification include coverage biases and exacerbation of contamination. These factors affect assembly continuity and fidelity, complicating discrimination of genomes from contamination and noise by available tools. Uncultured eukaryotes and their relatives are often underrepresented in large sequence data repositories, further impairing identification and separation.

Results

We compare the ability of filtering approaches to remove contamination and resolve eukaryotic draft genomes from SCG metagenomes, finding significant variation in outcomes. To address these inconsistencies, we introduce a consensus approach that is codified in the SCGid software package. SCGid parallelly filters assemblies using different approaches, yielding three intermediate drafts from which consensus is drawn. Using genuine and mock SCG metagenomes, we show that our approach corrects for variation among draft genomes predicted by individual approaches and outperforms them in recapitulating published drafts in a fast and repeatable way, providing a useful alternative to available methods and manual curation.

Availability and implementation

The SCGid package is implemented in python and R. Source code is available at http://www.github.com/amsesk/SCGid under the GNU GPL 3.0 license.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +23293959,Three-dimensional structure database of natural metabolites (3DMET): a novel database of curated 3D structures.,"A database of 3D structures of natural metabolites has been developed called 3DMET. During the process of structure conversion from 2D to 3D, we found many structures were misconverted at chiral atoms and bonds. Several popular converters were tested in regard to their conversion accuracy. For verification, three canonical strings were also tested. No procedure could satisfactorily cover all the structures of the natural products. The misconverted structures had to be corrected manually. However, a nonnegligible number of mistakes were also observed even after manual curation, so a self-checking system was developed and introduced to our work flow. Thus, the 3D structures in our 3DMET database were evaluated in two steps: automatically and manually. The current version includes most of the natural products of the KEGG COMPOUND collection [ http://www.genome.jp/kegg/compound/ ] and is searchable by string, value range, and substructure. 3DMET can be accessed via http://www.3dmet.dna.affrc.go.jp/ , which also has detailed manuals.",2013-03-07 +32780144,The F2RaD Score: A Novel Prediction Score and Calculator Tool to Identify Patients at Risk of Postoperative C5 Palsy.,"

Background

Postoperative C5 palsy is a debilitating complication following posterior cervical decompression.

Objective

To create a simple clinical risk score predicting the occurrence of C5 palsy.

Methods

We retrospectively reviewed all patients who underwent posterior cervical decompressions between 2007 and 2017. Data was randomly split into training and validation datasets. Multivariable analysis was performed to construct the model from the training dataset. A scoring system was developed based on the model coefficients and a web-based calculator was deployed.

Results

The cohort consisted of 415 patients, of which 65 (16%) developed C5 palsy. The optimal model consisted of: mean C4/5 foraminal diameter (odds ratio [OR] = 9.1 for lowest quartile compared to highest quartile), preoperative C5 radiculopathy (OR = 3.5), and dexterity loss (OR = 2.9). The receiver operating characteristic yielded an area under the curve of 0.757 and 0.706 in the training and validation datasets, respectively. Every characteristic was worth 1 point except the lowest quartile of mean C4/5 foraminal diameter, which was worth 2 points, and the factors were summarized by the acronym F2RaD. The median predicted probability of C5 palsy increased from 2% in patients with a score of 0 to 70% in patients with a score of 4. The calculator can be accessed on https://jhuspine2.shinyapps.io/FRADscore/.

Conclusion

This study yielded a simplified scoring system and clinical calculator that predicts the occurrence of C5 palsy. Individualized risk prediction for patients may facilitate better understanding of the risks and benefits for an operation, and better prepare them for this possible adverse outcome. Furthermore, modifying the surgical plan in high-risk patients may possibly improve outcomes.",2020-10-01 +32307173,Molecular characterization and biofilm formation potential of Bacillus subtilis and Bacillus velezensis in extended shelf-life milk processing line.,"This study aims to characterize Bacillus subtilis complex group from raw, pasteurized, and packaged extended shelf-life (ESL) milk samples, to determine their biofilm potential and source-track the microbial contaminants to control their presence during processing. Isolates were characterized using multi-locus sequence typing (MLST) with 7 housekeeping genes. The primers used were designed from the coding regions with the highest number of polymorphic sites. The heat resistance profile indicated that all 12 isolates are psychrotolerant as well as thermophilic, with temperature ranges of 6°C to 55°C (B43, B44, B52, B54, B55, B56, B57), 6°C to 60°C (B46, B47, B48), and 15°C to 60°C (B49, B50). A general linear model 2-way repeated-measure ANOVA of the biofilm-forming potential of the isolates shows a statistically significant difference across the time of incubation (6, 12, 18, and 24 h). All isolates except 2 formed moderate to strong biofilms, with B44 having the most robust biofilm formation (3.14 ± 0.60). Scanning electron and confocal microscopy images reveal the strain specificity of the biofilm structure. The MLST analysis identified all isolates as belonging to either B. subtilis or Bacillus velezensis. All the isolates were novel sequence types (ST) when compared with the PubMLST database (https://pubmlst.org/) but showed relatedness to isolates in the raw milk that was processed. The closest ST are 96 for B. velezensis and 128 for B. subtilis, mostly isolated from soil. This study presents the significance of biofilms of thermophilic B. subtilis and B. velezensis and their possible perpetuation in the dairy processing plant. The information provided is a call for an innovative food contact surface or any other intervention that can minimize or prevent microbial adhesion in the processing plant, to prevent negative effects in ESL milk.",2020-04-16 +25474259,DBBP: database of binding pairs in protein-nucleic acid interactions.,"

Background

Interaction of proteins with other molecules plays an important role in many biological activities. As many structures of protein-DNA complexes and protein-RNA complexes have been determined in the past years, several databases have been constructed to provide structure data of the complexes. However, the information on the binding sites between proteins and nucleic acids is not readily available from the structure data since the data consists mostly of the three-dimensional coordinates of the atoms in the complexes.

Results

We analyzed the huge amount of structure data for the hydrogen bonding interactions between proteins and nucleic acids and developed a database called DBBP (DataBase of Binding Pairs in protein-nucleic acid interactions, http://bclab.inha.ac.kr/dbbp). DBBP contains 44,955 hydrogen bonds (H-bonds) of protein-DNA interactions and 77,947 H-bonds of protein-RNA interactions.

Conclusions

Analysis of the huge amount of structure data of protein-nucleic acid complexes is labor-intensive, yet provides useful information for studying protein-nucleic acid interactions. DBBP provides the detailed information of hydrogen-bonding interactions between proteins and nucleic acids at various levels from the atomic level to the residue level. The binding information can be used as a valuable resource for developing a computational method aiming at predicting new binding sites in proteins or nucleic acids.",2014-12-03 +31431510,Making Workshops Work: Insights from EDAMAME. ,"Microbiology, like many areas of life science research, is increasingly data-intensive. As such, bioinformatics and data science skills have become essential to leverage microbiome sequencing data for discovery. Short intensive courses have sprung up as formal computational training opportunities at individual institutions fail to meet demands. In this issue, Shade et al. (A. Shade, T. K. Dunivin, J. Choi, T. K. Teal, et al., mSystems 4:e00297-19, 2019, https://doi.org/10.1128/mSystems.00297-19) share their experience and approach in executing the annual, weeklong Explorations in Data Analysis for Metagenomic Advances in Microbial Ecology (EDAMAME) workshop from 2014 to 2018. EDAMAME introduced learners to general scientific computing concepts and domain-specific data analysis approaches. Workshop learners self-reported appreciable gains in understanding and ability. This report on the EDAMAME workshop strategy and lessons learned will help others in the life sciences to plan, execute, and assess short hands-on computing-intensive courses that support research in a particular domain.",2019-08-20 +,Phylogenetic position of a remarkable new fideliine bee from northern Chile (Hymenoptera: Megachilidae),"Fideliine bees are an archaic group with a disjunct distribution mostly restricted to deserts of South America and South Africa. This group was previously thought to be more diverse in Africa than in South America, where only one genus (Neofidelia) comprising five species is known. Here we describe a species belonging to a second South American genus: Xenofidelia colorada Packer gen. et sp.n., from northern Chile. The species is illustrated and its phylogenetic position within Megachilidae is assessed using morphological, molecular and combined data. The 214 character morphological matrix includes 55 new characters with an additional 16 hitherto unexplored for megachilid phylogeny. The molecular dataset is based upon seven nuclear gene sequences, totalling 6439 bp, many of which are published for the first time for particular megachilid taxa. In all analyses, Xenofidelia was found as sister to Neofidelia (endemic to Chile and Peru). It differs from that genus most notably in its short mouthparts, absence of a glossal rod, unmodified female metabasitarsus and an elongate and horizontal dorsal surface of the metapostnotum. Morphological and combined data support a monophyletic Fideliinae (excluding Pararhophites), while molecular data alone failed to recover fideliine monophyly. Dating analyses suggest that Xenofidelia and Neofidelia diverged 34.3–40.6 Ma, indicating that New World fideliines were probably present in arid habitats of South America during the Eocene. This divergence time predates both the main orogenic events that resulted in the formation of the Andean mountains and the origin of hyperarid conditions in the Atacama Desert; it also corresponds to a period prior to the origin of the summer rainfall area in the far north of Chile where the new genus is found. These results support the view that arid habitats have been present continuously in South America since the Eocene. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:EA69BB4A‐6F59‐4A15‐AB44‐2A8949E3CF8F.",2017-07-01 +28099711,Introducing the World Health Organization Postpartum Family Planning Compendium.,"The postpartum period offers multiple opportunities for healthcare providers to assist with family planning decision making. However, there are also many changing factors during the first year after delivery that can affect family planning choices. Given that several different documents have addressed WHO guidance on postpartum family planning, the electronic WHO Postpartum Family Planning Compendium (http://srhr.org/postpartumfp) has been introduced. This resource integrates essential guidance on postpartum family planning for clinicians, program managers, and policy makers. The development of the Compendium included consultations with family planning experts, key international stakeholders, and web developers. Once the website had been created, user testing by family planning experts allowed for improvements to be made before the official launch. Future directions are adaptation of the website into a mobile application that can be more easily integrated to low-resource settings, and translation of the content into French and Spanish.",2016-11-03 +31624582,Linking pollen foraging of megachilid bees to their nest bacterial microbiota.,"Solitary bees build their nests by modifying the interior of natural cavities, and they provision them with food by importing collected pollen. As a result, the microbiota of the solitary bee nests may be highly dependent on introduced materials. In order to investigate how the collected pollen is associated with the nest microbiota, we used metabarcoding of the ITS2 rDNA and the 16S rDNA to simultaneously characterize the pollen composition and the bacterial communities of 100 solitary bee nest chambers belonging to seven megachilid species. We found a weak correlation between bacterial and pollen alpha diversity and significant associations between the composition of pollen and that of the nest microbiota, contributing to the understanding of the link between foraging and bacteria acquisition for solitary bees. Since solitary bees cannot establish bacterial transmission routes through eusociality, this link could be essential for obtaining bacterial symbionts for this group of valuable pollinators.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://www.ebi.ac.uk/ena/data/view/PRJEB27223, https://www.ebi.ac.uk/ena/data/view/PRJEB31610, and https://doi.org/10.5061/dryad.qk36k8q.",2019-09-02 +30684219,"NeuroMuscleDB: a Database of Genes Associated with Muscle Development, Neuromuscular Diseases, Ageing, and Neurodegeneration.","Skeletal muscle is a highly complex, heterogeneous tissue that serves a multitude of biological functions in living organisms. With the advent of methods, such as microarrays, transcriptome analysis, and proteomics, studies have been performed at the genome level to gain insight of changes in the expression profiles of genes during different stages of muscle development and of associated diseases. In the present study, a database was conceived for the straightforward retrieval of information on genes involved in skeletal muscle formation, neuromuscular diseases (NMDs), ageing, and neurodegenerative disorders (NDs). The resulting database named NeuroMuscleDB ( http://yu-mbl-muscledb.com/NeuroMuscleDB ) is the result of a wide literature survey, database searches, and data curation. NeuroMuscleDB contains information of genes in Homo sapiens, Mus musculus, and Bos Taurus, and their promoter sequences and specified roles at different stages of muscle development and in associated myopathies. The database contains information on ~ 1102 genes, 6030 mRNAs, and 5687 proteins, and embedded analytical tools that can be used to perform tasks related to gene sequence usage. The authors believe NeuroMuscleDB provides a platform for obtaining desired information on genes related to myogenesis and their associations with various diseases (NMDs, ageing, and NDs). NeuroMuscleDB is freely available on the web at http://yu-mbl-muscledb.com/NeuroMuscleDB and supports all major browsers.",2019-01-25 +31589052,MHCquant: Automated and Reproducible Data Analysis for Immunopeptidomics.,"Personalized multipeptide vaccines are currently being discussed intensively for tumor immunotherapy. In order to identify epitopes-short, immunogenic peptides-suitable for eliciting a tumor-specific immune response, human leukocyte antigen-presented peptides are isolated by immunoaffinity purification from cancer tissue samples and analyzed by liquid chromatography-coupled tandem mass spectrometry (LC-MS/MS). Here, we present MHCquant, a fully automated, portable computational pipeline able to process LC-MS/MS data automatically and generate annotated, false discovery rate-controlled lists of (neo-)epitopes with associated relative quantification information. We could show that MHCquant achieves higher sensitivity than established methods. While obtaining the highest number of unique peptides, the rate of predicted MHC binders remains still comparable to other tools. Reprocessing of the data from a previously published study resulted in the identification of several neoepitopes not detected by previously applied methods. MHCquant integrates tailor-made pipeline components with existing open-source software into a coherent processing workflow. Container-based virtualization permits execution of this workflow without complex software installation, execution on cluster/cloud infrastructures, and full reproducibility of the results. Integration with the data analysis workbench KNIME enables easy mining of large-scale immunopeptidomics data sets. MHCquant is available as open-source software along with accompanying documentation on our website at https://www.openms.de/mhcquant/ .",2019-10-22 +32006283,Structural Characterization of Protein-Protein Interactions with pyDockSAXS.,"Structural characterization of protein-protein interactions can provide essential details to understand biological functions at the molecular level and to facilitate their manipulation for biotechnological and biomedical purposes. Unfortunately, the 3D structure is available for only a small fraction of all possible protein-protein interactions, due to the technical limitations of high-resolution structural determination methods. In this context, low-resolution structural techniques, such as small-angle X-ray scattering (SAXS), can be combined with computational docking to provide structural models of protein-protein interactions at large scale. In this chapter, we describe the pyDockSAXS web server ( https://life.bsc.es/pid/pydocksaxs ), which uses pyDock docking and scoring to provide structural models that optimally satisfy the input SAXS data. This server, which is freely available to the scientific community, provides an automatic pipeline to model the structure of a protein-protein complex from SAXS data.",2020-01-01 +32396163,Development and Validation of a Clinical Risk Score to Predict the Occurrence of Critical Illness in Hospitalized Patients With COVID-19.,"

Importance

Early identification of patients with novel coronavirus disease 2019 (COVID-19) who may develop critical illness is of great importance and may aid in delivering proper treatment and optimizing use of resources.

Objective

To develop and validate a clinical score at hospital admission for predicting which patients with COVID-19 will develop critical illness based on a nationwide cohort in China.

Design, setting, and participants

Collaborating with the National Health Commission of China, we established a retrospective cohort of patients with COVID-19 from 575 hospitals in 31 provincial administrative regions as of January 31, 2020. Epidemiological, clinical, laboratory, and imaging variables ascertained at hospital admission were screened using Least Absolute Shrinkage and Selection Operator (LASSO) and logistic regression to construct a predictive risk score (COVID-GRAM). The score provides an estimate of the risk that a hospitalized patient with COVID-19 will develop critical illness. Accuracy of the score was measured by the area under the receiver operating characteristic curve (AUC). Data from 4 additional cohorts in China hospitalized with COVID-19 were used to validate the score. Data were analyzed between February 20, 2020 and March 17, 2020.

Main outcomes and measures

Among patients with COVID-19 admitted to the hospital, critical illness was defined as the composite measure of admission to the intensive care unit, invasive ventilation, or death.

Results

The development cohort included 1590 patients. the mean (SD) age of patients in the cohort was 48.9 (15.7) years; 904 (57.3%) were men. The validation cohort included 710 patients with a mean (SD) age of 48.2 (15.2) years, and 382 (53.8%) were men and 172 (24.2%). From 72 potential predictors, 10 variables were independent predictive factors and were included in the risk score: chest radiographic abnormality (OR, 3.39; 95% CI, 2.14-5.38), age (OR, 1.03; 95% CI, 1.01-1.05), hemoptysis (OR, 4.53; 95% CI, 1.36-15.15), dyspnea (OR, 1.88; 95% CI, 1.18-3.01), unconsciousness (OR, 4.71; 95% CI, 1.39-15.98), number of comorbidities (OR, 1.60; 95% CI, 1.27-2.00), cancer history (OR, 4.07; 95% CI, 1.23-13.43), neutrophil-to-lymphocyte ratio (OR, 1.06; 95% CI, 1.02-1.10), lactate dehydrogenase (OR, 1.002; 95% CI, 1.001-1.004) and direct bilirubin (OR, 1.15; 95% CI, 1.06-1.24). The mean AUC in the development cohort was 0.88 (95% CI, 0.85-0.91) and the AUC in the validation cohort was 0.88 (95% CI, 0.84-0.93). The score has been translated into an online risk calculator that is freely available to the public (http://118.126.104.170/).

Conclusions and relevance

In this study, a risk score based on characteristics of COVID-19 patients at the time of admission to the hospital was developed that may help predict a patient's risk of developing critical illness.",2020-08-01 +31653057,Analytic Correlation Filtration: A New Tool to Reduce Analytical Complexity of Metabolomic Datasets. ,"Metabolomics generates massive and complex data. Redundant different analytical species and the high degree of correlation in datasets is a constraint for the use of data mining/statistical methods and interpretation. In this context, we developed a new tool to detect analytical correlation into datasets without confounding them with biological correlations. Based on several parameters, such as a similarity measure, retention time, and mass information from known isotopes, adducts, or fragments, the algorithm principle is used to group features coming from the same analyte, and to propose one single representative per group. To illustrate the functionalities and added-value of this tool, it was applied to published datasets and compared to one of the most commonly used free packages proposing a grouping method for metabolomics data: 'CAMERA'. This tool was developed to be included in Galaxy and will be available in Workflow4Metabolomics (http://workflow4metabolomics.org). Source code is freely available for download under CeCILL 2.1 license at https://services.pfem.clermont.inra.fr/gitlab/grandpa /tool-acf and implement in Perl.",2019-10-24 +32844286,How accurate is circular dichroism-based model validation?,"Circular dichroism (CD) spectroscopy is highly sensitive to the secondary structure (SS) composition of proteins. Several methods exist to either estimate the SS composition of a protein or to validate existing structural models using its CD spectrum. The accuracy and precision of these methods depend on the quality of both the measured CD spectrum and the used reference structure. Using a large reference protein set with high-quality CD spectra and synthetic data derived from this set, we quantified deviations from both ideal spectra and reference structures due to experimental limitations. We also determined the impact of these deviations on SS estimation, CD prediction, and SS validation methods of the SESCA analysis package. With regard to the CD spectra, our results suggest intensity scaling errors and non-SS contributions as the main causes of inaccuracies. These factors also can lead to overestimated model errors during validation. The errors of the used reference structures combine non-additively with errors caused by the CD spectrum, which increases the uncertainty of model validation. We have further shown that the effects of scaling errors in the CD spectrum can be nearly eliminated by appropriate re-scaling, and that the accuracy of model validation methods can be improved by accounting for typical non-SS contributions. These improvements have now been implemented within the SESCA package and are available at: https://www.mpibpc.mpg.de/sesca .",2020-08-26 +32243204,Land-Use Change and Cardiometabolic Risk Factors in an Urbanizing Area of South India: A Population-Based Cohort Study.,"

Background

Land-use changes in city fringes due to urbanization can lead to a reduction of greenspace that may reduce its associated health benefits.

Objectives

We evaluated the association between changes in residential surrounding built-up land use and cardiometabolic risk factors in an urbanizing peri-urban area of south India and explored the mediating roles of air pollution, physical activity, and stress in these associations.

Methods

We analyzed data on 6,039 adults from the third follow-up of the Andhra Pradesh Children and Parent Study (APCAPS) cohort (2010-2012). We generated trajectories of change in residential surrounding built-up land use (buffer areas) from 1995-2009 (stable, slow increase, fast increase) using remote sensing data and image classification methods. We estimated associations between built-up land use trajectories and natural log-transformed blood pressure, waist circumference, triglycerides, fasting glucose, and non-high-density lipoprotein (non-HDL) cholesterol using linear mixed models. We accounted for multiple mediators and the multilevel structure of the data in mediation analyses.

Results

We observed positive associations between a fast increase in built-up land use within 300m of the home and all cardiometabolic risk factors. Compared with participants with stable trajectories, those with the largest increase in built-up land use had 1.5% (95% CI: 0.1, 2.9) higher systolic blood pressure, 2.4% (95% CI: 0.6, 4.3) higher diastolic blood pressure, 2.1% (95% CI: 0.5, 3.8) higher waist circumference, and 1.6% (95% CI: -0.6, 3.8) higher fasting glucose in fully adjusted models. Associations were positive, but not statistically significant, for triglycerides, fasting glucose, and non-HDL cholesterol. Physical activity and ambient particulate matter ≤2.5μm in aerodynamic diameter (PM2.5) partially mediated the estimated associations. Associations between fast build-up and all cardiometabolic risk factors except non-HDL cholesterol were stronger in women than men.

Discussion

Increases in built-up land use surrounding residences were consistently associated with higher levels of cardiometabolic risk factors. Our findings support the need for better integration of health considerations in urban planning in rapidly urbanizing settings. https://doi.org/10.1289/EHP5445.",2020-04-03 +32233038,The default cyanobacterial linked genome: an interactive platform based on cyanobacterial linkage networks to assist functional genomics.,"A database of cyanobacterial linked genomes that can be accessed through an interactive platform (https://dfgm.ua.es/genetica/investigacion/cyanobacterial_genetics/Resources.html) was generated on the bases of conservation of gene neighborhood across 124 cyanobacterial species. It allows flexible generation of gene networks at different threshold values. The default cyanobacterial linked genome, whose global properties are analyzed here, connects most of the cyanobacterial core genes. The potential of the web tool is discussed in relation to other bioinformatics approaches based on guilty-by-association principles, with selected examples of networks illustrating its usefulness for genes found exclusively in cyanobacteria or in cyanobacteria and chloroplasts. We believe that this tool will provide useful predictions that are readily testable in Synechococcus elongatus PCC7942 and other model organisms performing oxygenic photosynthesis.",2020-04-15 +31721688,NeuroCS: A Tool to Predict Cleavage Sites of Neuropeptide Precursors.,"BACKGROUND:Neuropeptides are a class of bioactive peptides produced from neuropeptide precursors through a series of extremely complex processes, mediating neuronal regulations in many aspects. Accurate identification of cleavage sites of neuropeptide precursors is of great significance for the development of neuroscience and brain science. OBJECTIVE:With the explosive growth of neuropeptide precursor data, it is pretty much needed to develop bioinformatics methods for predicting neuropeptide precursors' cleavage sites quickly and efficiently. METHODS:We started with processing the neuropeptide precursor data from SwissProt and NueoPedia into two sets of data, training dataset and testing dataset. Subsequently, six feature extraction schemes were applied to generate different feature sets and then feature selection methods were used to find the optimal feature subset of each. Thereafter the support vector machine was utilized to build models for different feature types. Finally, the performance of models were evaluated with the independent testing dataset. RESULTS:Six models are built through support vector machine. Among them the enhanced amino acid composition-based model reaches the highest accuracy of 91.60% in the 5-fold cross validation. When evaluated with independent testing dataset, it also showed an excellent performance with a high accuracy of 90.37% and Area under Receiver Operating Characteristic curve up to 0.9576. CONCLUSION:The performance of the developed model was decent. Moreover, for users' convenience, an online web server called NeuroCS is built, which is freely available at http://i.uestc.edu.cn/NeuroCS/dist/index.html#/. NeuroCS can be used to predict neuropeptide precursors' cleavage sites effectively.",2020-01-01 +30959307,"Characterization of wastewater effluents in the Danube River Basin with chemical screening, in vitro bioassays and antibiotic resistant genes analysis.","Averaged 7-day composite effluent wastewater samples from twelve wastewater treatment plants (WWTPs) in nine countries (Romania, Serbia, Hungary, Slovenia, Croatia, Slovakia, Czechia, Austria, Germany) in the Danube River Basin were collected. WWTPs' selection was based on countries' dominant technology and a number of served population with the aim to get a representative holistic view of the pollution status. Samples were analyzed for 2248 chemicals of emerging concern (CECs) by wide-scope target screening employing LC-ESI-QTOF-MS. 280 compounds were detected at least in one sample and quantified. Spatial differences in the concentrations and distribution of the compounds classes were discussed. Additionally, samples were analyzed for the possible agonistic/antagonistic potencies using a panel of in vitro transactivation reporter gene CALUX® bioassays including ERα (estrogenics), anti-AR (anti-androgens), GR (glucocorticoids), anti-PR (anti-progestins), PPARα and PPARγ (peroxisome proliferators) and PAH assays. The potency of the wastewater samples to cause oxidative stress and induce xenobiotic metabolism was determined using the Nrf2 and PXR CALUX® bioassays, respectively. The signals from each of the bioassays were compared with the recently developed effect-based trigger values (EBTs) and thus allowed for allocating the wastewater effluents into four categories based on their measured toxicity, proposing a putative action plan for wastewater operators. Moreover, samples were analyzed for antibiotics and 13 antibiotic-resistant genes (ARGs) and one mobile genetic element (intl1) with the aim to assess the potential for antibiotic resistance. All data collected from these various types of analysis were stored in an on-line database and can be viewed via interactive map at https://norman-data.eu/EWW_DANUBE.",2019-04-05 +25348401,The Mouse Genome Database (MGD): facilitating mouse as a model for human biology and disease.,"The Mouse Genome Database (MGD, http://www.informatics.jax.org) serves the international biomedical research community as the central resource for integrated genomic, genetic and biological data on the laboratory mouse. To facilitate use of mouse as a model in translational studies, MGD maintains a core of high-quality curated data and integrates experimentally and computationally generated data sets. MGD maintains a unified catalog of genes and genome features, including functional RNAs, QTL and phenotypic loci. MGD curates and provides functional and phenotype annotations for mouse genes using the Gene Ontology and Mammalian Phenotype Ontology. MGD integrates phenotype data and associates mouse genotypes to human diseases, providing critical mouse-human relationships and access to repositories holding mouse models. MGD is the authoritative source of nomenclature for genes, genome features, alleles and strains following guidelines of the International Committee on Standardized Genetic Nomenclature for Mice. A new addition to MGD, the Human-Mouse: Disease Connection, allows users to explore gene-phenotype-disease relationships between human and mouse. MGD has also updated search paradigms for phenotypic allele attributes, incorporated incidental mutation data, added a module for display and exploration of genes and microRNA interactions and adopted the JBrowse genome browser. MGD resources are freely available to the scientific community.",2014-10-27 +30753280,Biological sequence modeling with convolutional kernel networks.,"

Motivation

The growing number of annotated biological sequences available makes it possible to learn genotype-phenotype relationships from data with increasingly high accuracy. When large quantities of labeled samples are available for training a model, convolutional neural networks can be used to predict the phenotype of unannotated sequences with good accuracy. Unfortunately, their performance with medium- or small-scale datasets is mitigated, which requires inventing new data-efficient approaches.

Results

We introduce a hybrid approach between convolutional neural networks and kernel methods to model biological sequences. Our method enjoys the ability of convolutional neural networks to learn data representations that are adapted to a specific task, while the kernel point of view yields algorithms that perform significantly better when the amount of training data is small. We illustrate these advantages for transcription factor binding prediction and protein homology detection, and we demonstrate that our model is also simple to interpret, which is crucial for discovering predictive motifs in sequences.

Availability and implementation

Source code is freely available at https://gitlab.inria.fr/dchen/CKN-seq.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +32324724,"Preexposure Prophylaxis for Prevention of HIV Acquisition Among Adolescents: Clinical Considerations, 2020.","Preexposure prophylaxis (PrEP) with antiretroviral medication has been proven effective in reducing the risk for acquiring human immunodeficiency virus (HIV). The fixed-dose combination tablet of tenofovir disoproxil fumarate (TDF)/emtricitabine (FTC) was approved by the U.S. Food and Drug Administration (FDA) for use as PrEP for adults in 2012. Since then, recognition has been increasing that adolescents at risk for acquiring HIV can benefit from PrEP. In 2018, FDA approved revised labeling for TDF/FTC that expanded the indication for PrEP to include adolescents weighing at least 77 lb (35 kg) who are at risk for acquiring HIV. In 2019, FDA approved the combination product tenofovir alafenamide (TAF)/FTC as PrEP for adolescents and adults weighing at least 77 lb (35 kg), excluding those at risk for acquiring HIV through receptive vaginal sex. This exclusion is due to the lack of clinical data regarding the efficacy of TAF/FTC in cisgender women.Clinical providers who evaluate adolescents for PrEP use must consider certain topics that are unique to the adolescent population. Important considerations related to adolescents include PrEP safety data, legal issues about consent for clinical care and confidentiality, the therapeutic partnership with adolescents and their parents or guardians, the approach to the adolescent patient's clinical visit, and medication initiation, adherence, and persistence during adolescence. Overall, data support the safety of PrEP for adolescents. PrEP providers should be familiar with the statutes and regulations about the provision of health care to minors in their states. Providers should partner with the adolescent patient for PrEP decisions, recognizing the adolescent's autonomy to the extent allowable by law and including parents in the conversation about PrEP when it is safe and reasonable to do so. A comprehensive approach to adolescent health is recommended, including considering PrEP as one possible component of providing medical care to adolescents who inject drugs or engage in sexual behaviors that place them at risk for acquiring HIV. PrEP adherence declined over time in the studies evaluating PrEP among adolescents, a trend that also has been observed among adult patients. Clinicians should implement strategies to address medication adherence as a routine part of prescribing PrEP; more frequent clinical follow-up is one possible approach.PrEP is an effective HIV prevention tool for protecting adolescents at risk for HIV acquisition. For providers, unique considerations that are part of providing PrEP to adolescents include the possible need for more frequent, supportive interactions to promote medication adherence. Recommendations for PrEP medical management and additional resources for providers are available in the U.S. Public Health Service clinical practice guideline Preexposure Prophylaxis for the Prevention of HIV Infection in the United States - 2017 Update and the clinical providers' supplement Preexposure Prophylaxis for the Prevention of HIV Infection in the United States - 2017 Update: Clinical Providers' Supplement (https://www.cdc.gov/hiv/clinicians/prevention/prep.html).",2020-04-24 +32694909,Family-Specific Gains and Losses of Protein Domains in the Legume and Grass Plant Families.,"Protein domains can be regarded as sections of protein sequences capable of folding independently and performing specific functions. In addition to amino-acid level changes, protein sequences can also evolve through domain shuffling events such as domain insertion, deletion, or duplication. The evolution of protein domains can be studied by tracking domain changes in a selected set of species with known phylogenetic relationships. Here, we conduct such an analysis by defining domains as ""features"" or ""descriptors,"" and considering the species (target + outgroup) as instances or data-points in a data matrix. We then look for features (domains) that are significantly different between the target species and the outgroup species. We study the domain changes in 2 large, distinct groups of plant species: legumes (Fabaceae) and grasses (Poaceae), with respect to selected outgroup species. We evaluate 4 types of domain feature matrices: domain content, domain duplication, domain abundance, and domain versatility. The 4 types of domain feature matrices attempt to capture different aspects of domain changes through which the protein sequences may evolve-that is, via gain or loss of domains, increase or decrease in the copy number of domains along the sequences, expansion or contraction of domains, or through changes in the number of adjacent domain partners. All the feature matrices were analyzed using feature selection techniques and statistical tests to select protein domains that have significant different feature values in legumes and grasses. We report the biological functions of the top selected domains from the analysis of all the feature matrices. In addition, we also perform domain-centric gene ontology (dcGO) enrichment analysis on all selected domains from all 4 feature matrices to study the gene ontology terms associated with the significantly evolving domains in legumes and grasses. Domain content analysis revealed a striking loss of protein domains from the Fanconi anemia (FA) pathway, the pathway responsible for the repair of interstrand DNA crosslinks. The abundance analysis of domains found in legumes revealed an increase in glutathione synthase enzyme, an antioxidant required from nitrogen fixation, and a decrease in xanthine oxidizing enzymes, a phenomenon confirmed by previous studies. In grasses, the abundance analysis showed increases in domains related to gene silencing which could be due to polyploidy or due to enhanced response to viral infection. We provide a docker container that can be used to perform this analysis workflow on any user-defined sets of species, available at https://cloud.docker.com/u/akshayayadav/repository/docker/akshayayadav/protein-domain-evolution-project.",2020-07-09 +29696732,"Seshat: A Web service for accurate annotation, validation, and analysis of TP53 variants generated by conventional and next-generation sequencing.","Accurate annotation of genomic variants in human diseases is essential to allow personalized medicine. Assessment of somatic and germline TP53 alterations has now reached the clinic and is required in several circumstances such as the identification of the most effective cancer therapy for patients with chronic lymphocytic leukemia (CLL). Here, we present Seshat, a Web service for annotating TP53 information derived from sequencing data. A flexible framework allows the use of standard file formats such as Mutation Annotation Format (MAF) or Variant Call Format (VCF), as well as common TXT files. Seshat performs accurate variant annotations using the Human Genome Variation Society (HGVS) nomenclature and the stable TP53 genomic reference provided by the Locus Reference Genomic (LRG). In addition, using the 2017 release of the UMD_TP53 database, Seshat provides multiple statistical information for each TP53 variant including database frequency, functional activity, or pathogenicity. The information is delivered in standardized output tables that minimize errors and facilitate comparison of mutational data across studies. Seshat is a beneficial tool to interpret the ever-growing TP53 sequencing data generated by multiple sequencing platforms and it is freely available via the TP53 Website, http://p53.fr or directly at http://vps338341.ovh.net/.",2018-05-17 +28077567,FARME DB: a functional antibiotic resistance element database. ,"Antibiotic resistance (AR) is a major global public health threat but few resources exist that catalog AR genes outside of a clinical context. Current AR sequence databases are assembled almost exclusively from genomic sequences derived from clinical bacterial isolates and thus do not include many microbial sequences derived from environmental samples that confer resistance in functional metagenomic studies. These environmental metagenomic sequences often show little or no similarity to AR sequences from clinical isolates using standard classification criteria. In addition, existing AR databases provide no information about flanking sequences containing regulatory or mobile genetic elements. To help address this issue, we created an annotated database of DNA and protein sequences derived exclusively from environmental metagenomic sequences showing AR in laboratory experiments. Our Functional Antibiotic Resistant Metagenomic Element (FARME) database is a compilation of publically available DNA sequences and predicted protein sequences conferring AR as well as regulatory elements, mobile genetic elements and predicted proteins flanking antibiotic resistant genes. FARME is the first database to focus on functional metagenomic AR gene elements and provides a resource to better understand AR in the 99% of bacteria which cannot be cultured and the relationship between environmental AR sequences and antibiotic resistant genes derived from cultured isolates.Database URL: http://staff.washington.edu/jwallace/farme.",2017-01-10 +27515366,The Lancet Infectious Diseases HIV Prevention Resource Center.,"In collaboration with the US Centers for Diseases Control and Prevention (CDC), The Lancet Infectious Diseases has launched a free HIV prevention resource centre (http://hivprevent.thelancet.com).",2016-08-08 +32049321,Occupancy spectrum distribution: application for coalescence simulation with generic mergers.,"MOTIVATION:As the density of sampled population increases, especially as studies incorporate aspects of the spatial landscape to study evolutionary processes, efficient simulation of genetic data under the coalescent becomes a primary challenge. Beyond the computational demands, coalescence-based simulation strategies have to be reconsidered because traditional assumptions about the dynamics of coalescing lineages within local populations may be violated (e.g. more than two daughter lineages may coalesce to a parent at low population densities). Specifically, to efficiently assign n lineages to m parents, the order relation between n and m strongly affects the relevant algorithm for the coalescent simulator (e.g. only when n<2m, it is reasonable to assume that two lineages, at most, can be assigned to the same parent). Controlling the details of the simulation model as a function of n and m is then crucial to represent accurately and efficiently the assignment process, but current implementations make it difficult to switch between different types of lineage mergers at run-time or even compile-time. RESULTS:With the described occupancy spectrum and algorithm that generates the support of the joint probability distribution of the occupancy spectrum; computation is much faster than realizing the whole assignment process under the coalescent. Using general definitions of lineage merges, which also makes the codebase reusable, we implement several variants of coalescent mergers, including an approximation where low probability spectrums are discarded. Comparison of runtimes and performance of the different C++ highly reusable coalescence mergers (binary, multiple, hybrids) are given, and we illustrate their potential utility with example applications. AVAILABILITY AND IMPLEMENTATION:All components are integrated into Quetzal, an open-source C++ library for coalescence available at https://becheler.github.io/pages/quetzal.html. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +31985795,UMI-VarCal: a new UMI-based variant caller that efficiently improves low-frequency variant detection in paired-end sequencing NGS libraries.,"MOTIVATION:Next-generation sequencing has become the go-to standard method for the detection of single-nucleotide variants in tumor cells. The use of such technologies requires a PCR amplification step and a sequencing step, steps in which artifacts are introduced at very low frequencies. These artifacts are often confused with true low-frequency variants that can be found in tumor cells and cell-free DNA. The recent use of unique molecular identifiers (UMI) in targeted sequencing protocols has offered a trustworthy approach to filter out artefactual variants and accurately call low-frequency variants. However, the integration of UMI analysis in the variant calling process led to developing tools that are significantly slower and more memory consuming than raw-reads-based variant callers. RESULTS:We present UMI-VarCal, a UMI-based variant caller for targeted sequencing data with better sensitivity compared to other variant callers. Being developed with performance in mind, UMI-VarCal stands out from the crowd by being one of the few variant callers that do not rely on SAMtools to do their pileup. Instead, at its core runs an innovative homemade pileup algorithm specifically designed to treat the UMI tags in the reads. After the pileup, a Poisson statistical test is applied at every position to determine if the frequency of the variant is significantly higher than the background error noise. Finally, an analysis of UMI tags is performed, a strand bias and a homopolymer length filter are applied to achieve better accuracy. We illustrate the results obtained using UMI-VarCal through the sequencing of tumor samples and we show how UMI-VarCal is both faster and more sensitive than other publicly available solutions. AVAILABILITY AND IMPLEMENTATION:The entire pipeline is available at https://gitlab.com/vincent-sater/umi-varcal-master under MIT license. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +33323109,"CERC: an interactive content extraction, recognition, and construction tool for clinical and biomedical text.","

Background

Automated summarization of scientific literature and patient records is essential for enhancing clinical decision-making and facilitating precision medicine. Most existing summarization methods are based on single indicators of relevance, offer limited capabilities for information visualization, and do not account for user specific interests. In this work, we develop an interactive content extraction, recognition, and construction system (CERC) that combines machine learning and visualization techniques with domain knowledge for highlighting and extracting salient information from clinical and biomedical text.

Methods

A novel sentence-ranking framework multi indicator text summarization, MINTS, is developed for extractive summarization. MINTS uses random forests and multiple indicators of importance for relevance evaluation and ranking of sentences. Indicative summarization is performed using weighted term frequency-inverse document frequency scores of over-represented domain-specific terms. A controlled vocabulary dictionary generated using MeSH, SNOMED-CT, and PubTator is used for determining relevant terms. 35 full-text CRAFT articles were used as the training set. The performance of the MINTS algorithm is evaluated on a test set consisting of the remaining 32 full-text CRAFT articles and 30 clinical case reports using the ROUGE toolkit.

Results

The random forests model classified sentences as ""good"" or ""bad"" with 87.5% accuracy on the test set. Summarization results from the MINTS algorithm achieved higher ROUGE-1, ROUGE-2, and ROUGE-SU4 scores when compared to methods based on single indicators such as term frequency distribution, position, eigenvector centrality (LexRank), and random selection, p < 0.01. The automatic language translator and the customizable information extraction and pre-processing pipeline for EHR demonstrate that CERC can readily be incorporated within clinical decision support systems to improve quality of care and assist in data-driven and evidence-based informed decision making for direct patient care.

Conclusions

We have developed a web-based summarization and visualization tool, CERC ( https://newton.isye.gatech.edu/CERC1/ ), for extracting salient information from clinical and biomedical text. The system ranks sentences by relevance and includes features that can facilitate early detection of medical risks in a clinical setting. The interactive interface allows users to filter content and edit/save summaries. The evaluation results on two test corpuses show that the newly developed MINTS algorithm outperforms methods based on single characteristics of importance.",2020-12-15 +32824044,FLAVi: An Enhanced Annotator for Viral Genomes of Flaviviridae. ,"Responding to the ongoing and severe public health threat of viruses of the family Flaviviridae, including dengue, hepatitis C, West Nile, yellow fever, and Zika, demands a greater understanding of how these viruses emerge and spread. Updated phylogenies are central to this understanding. Most cladograms of Flaviviridae focus on specific lineages and ignore outgroups, hampering the efficacy of the analysis to test ingroup monophyly and relationships. This is due to the lack of annotated Flaviviridae genomes, which has gene content variation among genera. This variation makes analysis without partitioning difficult. Therefore, we developed an annotation pipeline for the genera of Flaviviridae (Flavirirus, Hepacivirus, Pegivirus, and Pestivirus, named ""Fast Loci Annotation of Viruses"" (FLAVi; http://flavi-web.com/), that combines ab initio and homology-based strategies. FLAVi recovered 100% of the genes in Flavivirus and Hepacivirus genomes. In Pegivirus and Pestivirus, annotation efficiency was 100% except for one partition each. There were no false positives. The combined phylogenetic analysis of multiple genes made possible by annotation has clear impacts over the tree topology compared to phylogenies that we inferred without outgroups or data partitioning. The final tree is largely congruent with previous hypotheses and adds evidence supporting the close phylogenetic relationship between dengue and Zika.",2020-08-14 +23550061,curatedOvarianData: clinically annotated data for the ovarian cancer transcriptome.,"This article introduces a manually curated data collection for gene expression meta-analysis of patients with ovarian cancer and software for reproducible preparation of similar databases. This resource provides uniformly prepared microarray data for 2970 patients from 23 studies with curated and documented clinical metadata. It allows users to efficiently identify studies and patient subgroups of interest for analysis and to perform meta-analysis immediately without the challenges posed by harmonizing heterogeneous microarray technologies, study designs, expression data processing methods and clinical data formats. We confirm that the recently proposed biomarker CXCL12 is associated with patient survival, independently of stage and optimal surgical debulking, which was possible only through meta-analysis owing to insufficient sample sizes of the individual studies. The database is implemented as the curatedOvarianData Bioconductor package for the R statistical computing language, providing a comprehensive and flexible resource for clinically oriented investigation of the ovarian cancer transcriptome. The package and pipeline for producing it are available from http://bcb.dfci.harvard.edu/ovariancancer.",2013-04-02 +33100922,"Oh, Behave!: PRESIDENTIAL ADDRESS, XXth International Conference on Infant Studies New Orleans, LA, US May 2016.","Behavior is essential for understanding infant learning and development. Although behavior is transient and ephemeral, we have the technology to make it tangible and enduring. Video uniquely captures and preserves the details of behavior and the surrounding context. By sharing videos for documentation and data reuse, we can exploit the tremendous opportuni-ties provided by infancy research and overcome the important challenges in studying behavior. The Datavyu video coding software and Databrary digital video library provide tools and infrastructure for mining and sharing the richness of video. This article is based on my Presidential Address to the International Congress on Infant Studies in New Orleans, May 22, 2016 (Video 1 at https://www.databrary.org/volume/955/slot/39352/-?asset=190106. Given that the article de-scribes the power of video for understanding behavior, I use video clips rather than static images to illustrate most of my points, and the videos are shared on the Databrary library.",2020-06-18 +32297047,The completed macronuclear genome of a model ciliate Tetrahymena thermophila and its application in genome scrambling and copy number analyses.,"The ciliate Tetrahymena thermophila has been a powerful model system for molecular and cellular biology. However, some investigations have been limited due to the incomplete closure and sequencing of the macronuclear genome assembly, which for many years has been stalled at 1,158 scaffolds, with large sections of unknown sequences (available in Tetrahymena Genome Database, TGD, http://ciliate.org/ ). Here we completed the first chromosome-level Tetrahymena macronuclear genome assembly, with approximately 300× long Single Molecule, Real-Time reads of the wild-type SB210 cells-the reference strain for the initial macronuclear genome sequencing project. All 181 chromosomes were capped with two telomeres and gaps were entirely closed. The completed genome shows significant improvements over the current assembly (TGD 2014) in both chromosome structure and sequence integrity. The majority of previously identified gene models shown in TGD were retained, with the addition of 36 new genes and 883 genes with modified gene models. The new genome and annotation were incorporated into TGD. This new genome allows for pursuit in some underexplored areas that were far more challenging previously; two of them, genome scrambling and chromosomal copy number, were investigated in this study. We expect that the completed macronuclear genome will facilitate many studies in Tetrahymena biology, as well as multiple lines of research in other eukaryotes.",2020-04-13 +31730197,RaNA-Seq: Interactive RNA-Seq analysis from FASTQ files to functional analysis. ,"RaNA-Seq is a cloud platform for the rapid analysis and visualization of RNA-Seq data. It performs a full analysis in minutes by quantifying FASTQ files, calculating quality control metrics, running differential expression analyses and enabling the explanation of results with functional analyses. Our analysis pipeline applies generally accepted and reproducible protocols that can be applied with two simple steps in its web interface. Analysis results are presented as interactive graphics and reports, ready for their interpretation and publication. RaNA-Seq web service is freely available online at https://ranaseq.eu. Supplementary data are available at Bioinformatics online.",2019-11-15 +31599925,AggreRATE-Pred: a mathematical model for the prediction of change in aggregation rate upon point mutation.,"

Motivation

Protein aggregation is a major unsolved problem in biochemistry with implications for several human diseases, biotechnology and biomaterial sciences. A majority of sequence-structural properties known for their mechanistic roles in protein aggregation do not correlate well with the aggregation kinetics. This limits the practical utility of predictive algorithms.

Results

We analyzed experimental data on 183 unique single point mutations that lead to change in aggregation rates for 23 polypeptides and proteins. Our initial mathematical model obtained a correlation coefficient of 0.43 between predicted and experimental change in aggregation rate upon mutation (P-value <0.0001). However, when the dataset was classified based on protein length and conformation at the mutation sites, the average correlation coefficient almost doubled to 0.82 (range: 0.74-0.87; P-value <0.0001). We observed that distinct sequence and structure-based properties determine protein aggregation kinetics in each class. In conclusion, the protein aggregation kinetics are impacted by local factors and not by global ones, such as overall three-dimensional protein fold, or mechanistic factors such as the presence of aggregation-prone regions.

Availability and implementation

The web server is available at http://www.iitm.ac.in/bioinfo/aggrerate-pred/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +24391364,"MAPS Database: Medicinal plant Activities, Phytochemical and Structural Database.","

Unlabelled

Drug development from natural sources is an important and fast developing area. Natural sources (plants) have been used to cure a range of diseases for Thousands of years. Different online medicinal plant databases provide information about classifications, activities, phytochemicals and structure of phytochemicals in different formats. These databases do not cover all aspects of medicinal plants. MAPS (Medicinal plant Activities, Phytochemicals & structural database) has been constructed with uniqueness that it combines all information in one web resource and additionally provides test targets on which particular plant found to be effective with reference to the original paper as well. MAPS database is user friendly information resource, including the data of > 500 medicinal plants. This database includes phytochemical constituents, their structure in mol format, different activities possessed by the medicinal plant with the targets reported in literature.

Availability

http://www.mapsdatabase.com.",2013-12-06 +32692251,Quantitative Structure-Activity Relationship Models for Predicting Inflammatory Potential of Metal Oxide Nanoparticles.,"

Background

Although substantial concerns about the inflammatory effects of engineered nanomaterial (ENM) have been raised, experimentally assessing toxicity of various ENMs is challenging and time-consuming. Alternatively, quantitative structure-activity relationship (QSAR) models have been employed to assess nanosafety. However, no previous attempt has been made to predict the inflammatory potential of ENMs.

Objectives

By employing metal oxide nanoparticles (MeONPs) as a model ENM, we aimed to develop QSAR models for prediction of the inflammatory potential by their physicochemical properties.

Methods

We built a comprehensive data set of 30 MeONPs to screen a proinflammatory cytokine interleukin (IL)-1 beta (IL-1β) release in THP-1 cell line. The in vitro hazard ranking was validated in mouse lungs by oropharyngeal instillation of six randomly selected MeONPs. We established QSAR models for prediction of MeONP-induced inflammatory potential via machine learning. The models were further validated against seven new MeONPs. Density functional theory (DFT) computations were exploited to decipher the key mechanisms driving inflammatory responses of MeONPs.

Results

Seventeen out of 30 MeONPs induced excess IL-1β production in THP-1 cells. In vivo disease outcomes were highly relevant to the in vitro data. QSAR models were developed for inflammatory potential, with predictive accuracy (ACC) exceeding 90%. The models were further validated experimentally against seven independent MeONPs (ACC=86%). DFT computations and experimental results further revealed the underlying mechanisms: MeONPs with metal electronegativity lower than 1.55 and positive ζ-potential were more likely to cause lysosomal damage and inflammation.

Conclusions

IL-1β released in THP-1 cells can be an index to rank the inflammatory potential of MeONPs. QSAR models based on IL-1β were able to predict the inflammatory potential of MeONPs. Our approach overcame the challenge of time- and labor-consuming biological experiments and allowed for computational assessment of MeONP inflammatory potential by characterization of their physicochemical properties. https://doi.org/10.1289/EHP6508.",2020-06-12 +30197923,"New WGS data and annotation of the heterosomal vs. autosomal localization of Ostrinia scapulalis (Lepidoptera, Crambidae) nuclear genomic scaffolds.","Here, we introduce new whole-genome shotgun sequencing and annotation data describing the autosomal vs. Z-heterosomal localization of nuclear genomic scaffolds of the moth species Ostrinia scapulalis. Four WGS libraries (corresponding to 2 males and 2 females) were sequenced with an Illumina HiSeq2500 sequencing technology, and the so-called 'AD-ratio' method was applied to distinguish between autosomal and Z-heterosomal scaffolds based on sequencing depth comparisons between homogametic (male) and heterogametic (female) libraries. A total of 25,760 scaffolds (corresponding to 341.69 Mb) were labelled as autosomal and 1273 scaffolds (15.29 Mb) were labelled as Z-heterosomal, totaling about 357 Mb. Besides, 4874 scaffolds (29.07 Mb) remain ambiguous because of a lack of AD-ratio reproducibility between the two replicates. The annotation method was evaluated a posteriori, by comparing depth-based annotation with the exact localization of known genes. Raw genomic data have been deposited and made accessible via the EMBL ENA BioProject id PRJEB26557. Comprehensive annotation is made accessible via the LepidoDB database (http://bipaa.genouest.org/sp/ostrinia_scapulalis/download/genome/v1.2/).",2018-08-09 +25652745,CLIPdb: a CLIP-seq database for protein-RNA interactions.,"

Background

RNA-binding proteins (RBPs) play essential roles in gene expression regulation through their interactions with RNA transcripts, including coding, canonical non-coding and long non-coding RNAs. Large amounts of crosslinking immunoprecipitation (CLIP)-seq data (including HITS-CLIP, PAR-CLIP, and iCLIP) have been recently produced to reveal transcriptome-wide binding sites of RBPs at the single-nucleotide level.

Description

Here, we constructed a database, CLIPdb, to describe RBP-RNA interactions based on 395 publicly available CLIP-seq data sets for 111 RBPs from four organisms: human, mouse, worm and yeast. We consistently annotated the CLIP-seq data sets and RBPs, and developed a user-friendly interface for rapid navigation of the CLIP-seq data. We applied a unified computational method to identify transcriptome-wide binding sites, making the binding sites directly comparable and the data available for integration across different CLIP-seq studies. The high-resolution binding sites of the RBPs can be visualized on the whole-genome scale using a browser. In addition, users can browse and download the identified binding sites of all profiled RBPs by querying genes of interest, including both protein coding genes and non-coding RNAs.

Conclusion

Manually curated metadata and uniformly identified binding sites of publicly available CLIP-seq data sets will be a foundation for further integrative and comparative analyses. With maintained up-to-date data sets and improved functionality, CLIPdb ( http://clipdb.ncrnalab.org ) will be a valuable resource for improving the understanding of post-transcriptional regulatory networks.",2015-02-05 +30482792,Phosphofructokinase controls the acetaldehyde-induced phase shift in isolated yeast glycolytic oscillators.,"The response of oscillatory systems to external perturbations is crucial for emergent properties such as synchronisation and phase locking and can be quantified in a phase response curve (PRC). In individual, oscillating yeast cells, we characterised experimentally the phase response of glycolytic oscillations for external acetaldehyde pulses and followed the transduction of the perturbation through the system. Subsequently, we analysed the control of the relevant system components in a detailed mechanistic model. The observed responses are interpreted in terms of the functional coupling and regulation in the reaction network. We find that our model quantitatively predicts the phase-dependent phase shift observed in the experimental data. The phase shift is in agreement with an adaptation leading to synchronisation with an external signal. Our model analysis establishes that phosphofructokinase plays a key role in the phase shift dynamics as shown in the PRC and adaptation time to external perturbations. Specific mechanism-based interventions, made possible through such analyses of detailed models, can improve upon standard trial and error methods, e.g. melatonin supplementation to overcome jet-lag, which are error-prone, specifically, since the effects are phase dependent and dose dependent. The models by Gustavsson and Goldbeter discussed in the text can be obtained from the JWS Online simulation database: (https://jjj.bio.vu.nl/models/gustavsson5 and https://jjj.bio.vu.nl/models/goldbeter1).",2019-01-31 +30259659,Role of TGF-β1/miR-382-5p/SOD2 axis in the induction of oxidative stress in CD34+ cells from primary myelofibrosis.,"Primary myelofibrosis (PMF) is a myeloproliferative neoplasm characterized by an excessive production of pro-inflammatory cytokines resulting in chronic inflammation and genomic instability. Besides the driver mutations in JAK2, MPL, and CALR genes, the deregulation of miRNA expression may also contribute to the pathogenesis of PMF. To this end, we recently reported the upregulation of miR-382-5p in PMF CD34+ cells. In order to unveil the mechanistic details of the role of miR-382-5p in pathogenesis of PMF, we performed gene expression profiling of CD34+ cells overexpressing miR-382-5p. Among the downregulated genes, we identified superoxide dismutase 2 (SOD2), which is a predicted target of miR-382-5p. Subsequently, we confirmed miR-382-5p/SOD2 interaction by luciferase assay and we showed that miR-382-5p overexpression in CD34+ cells causes the decrease in SOD2 activity leading to reactive oxygen species (ROS) accumulation and oxidative DNA damage. In addition, our data indicate that inhibition of miR-382-5p in PMF CD34+ cells restores SOD2 function, induces ROS disposal, and reduces DNA oxidation. Since the pro-inflammatory cytokine transforming growth factor-β1 (TGF-β1) is a key player in PMF pathogenesis, we further investigated the effect of TGF-β1 on ROS and miR-382-5p levels. Our data showed that TGF-β1 treatment enhances miR-382-5p expression and reduces SOD2 activity leading to ROS accumulation. Finally, inhibition of TGF-β1 signaling in PMF CD34+ cells by galunisertib significantly reduced miR-382-5p expression and ROS accumulation and restored SOD2 activity. As a whole, this study reports that TGF-β1/miR-382-5p/SOD2 axis deregulation in PMF cells is linked to ROS overproduction that may contribute to enhanced oxidative stress and inflammation. Our results suggest that galunisertib may represent an effective drug reducing abnormal oxidative stress induced by TGF-β1 in PMF patients. DATABASE LINKING: GEO: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE103464.",2018-11-16 +32603655,Selective Neuronal Vulnerability in Alzheimer's Disease: A Network-Based Analysis.,"A major obstacle to treating Alzheimer's disease (AD) is our lack of understanding of the molecular mechanisms underlying selective neuronal vulnerability, a key characteristic of the disease. Here, we present a framework integrating high-quality neuron-type-specific molecular profiles across the lifetime of the healthy mouse, which we generated using bacTRAP, with postmortem human functional genomics and quantitative genetics data. We demonstrate human-mouse conservation of cellular taxonomy at the molecular level for neurons vulnerable and resistant in AD, identify specific genes and pathways associated with AD neuropathology, and pinpoint a specific functional gene module underlying selective vulnerability, enriched in processes associated with axonal remodeling, and affected by amyloid accumulation and aging. We have made all cell-type-specific profiles and functional networks available at http://alz.princeton.edu. Overall, our study provides a molecular framework for understanding the complex interplay between Aβ, aging, and neurodegeneration within the most vulnerable neurons in AD.",2020-06-29 +31673577,Public good experiment data of a water game framed to Rajasthan/India.,"This dataset belongs to a framed economic field experiment conducted in 2016 in Bhilwara district in Rajasthan state in India. A public good game was framed as dam management challenge. We made incentivized payments based on the game earnings. The data are organized as a panel defined by players and experiment rounds. The dataset contains the experiment decisions in different phases of the experiment as well as socio-economic variables of the anonymized players. The data can be accessed through the Dataverse of the International Crops Research Institute for the Semi-Arid Tropics (ICRISAT) under the following link: https://doi.org/10.21421/D2/MFT8ZD. The data article is related to the research article Falk et al. [2] on ""Experimental games for developing institutional capacity to manage common water infrastructure in India"".",2019-09-24 +32563838,The use of Bayesian Networks and simulation methods to identify the variables impacting the value of evidence assessed under activity level propositions in stabbing cases.,"This paper presents a methodology allowing identification of the variables associated with transfer, persistence or recovery of DNA traces that have the most significant impact on the result of an evaluation measured through a likelihood ratio (LR). It builds on a case scenario involving trace DNA recovered from knife handles where the prosecution alleges that the person of interest (POI) stabbed a victim whereas the defence claims that the POI has nothing to do with the incident and the victim was stabbed by an alternative offender (AO). The defence proposition will also be refined to account for the possibility of secondary transfer. The variables having a significant impact on the LR are identified taking advantage of a graphical probabilistic environment (using Bayesian Networks, BN), coupled with simulation techniques. The paper presents (a) a BN, based on previous work Taylor et al. [5]; (b) its parametrization based on the current literature that represents the current state of knowledge used to inform the conditional probability tables of the BN and; (c) the implementation of the simulation methods. Results show that, regardless of the DNA outcome obtained, the most impacting variable is the ""DNA match probability"" when the defence alleged that the POI has nothing to do with the incident. It means that, given the current state of knowledge, such cases can easily be interpreted considering activity level propositions as they would not require any further data acquisition. When secondary transfer is alleged under the defence's perspective, the LRs are generally much lower than for the previous case. The DNA match probability has less impact and variables associated with the donor will take the lead on the ranges observed on the LRs. Overall, once extraction and sampling efficiency have been set, the remaining variables that have an impact on the value of the evidence are the DNA quantity on hands and the background. With the most impacting variables so identified, it becomes manageable to direct further data acquisition if so required. Generally, the background that could be present on the knife handle, the environmental conditions are not critical due to their limited impact on the LR value. We note, however, that this identification of the significant variables depends on the obtained DNA results and this selection may be refined on a case by case basis. To allow one to explore all possibilities a dedicated Shiny application has been designed (https://lydie-samie.shinyapps.io/DNA_Activity/).",2020-06-11 +35073701,"Genotyping of noroviruses from patients of the Pilsen University Hospital in the Czech Republic, 2017-2020.","

Objective

Noroviruses are members of the Caliciviridae family and are currently segregated into at least 10 genogroups. The distribution of these viruses in the Czech Republic has not yet been investigated in detail. A pilot study was performed to contribute to the overall knowledge and understanding of norovirus circulation in the population. Clinical specimens from patients diagnosed with norovirus infection during their hospitalization at the Pilsen University Hospital were genotyped.

Material and methods

A total of 118 patients were diagnosed with norovirus infection between July 2017 and March 2020. Stool samples from patients presenting with gastroenteritis were routinely screened by the RIDA®QUICK Norovirus Test (R-Biopharm AG), a rapid chromatographic immunoassay for the qualitative detection of Norovirus genogroups I and II, according to the manufacturers instructions. Norovirus positive samples were subsequently analysed by molecular biological methods. Stool suspensions (10%) were prepared with phosphate-buffered saline, and nucleic acid was extracted using the QIAamp Viral RNA kit (Qiagen) according to the manufacturers instructions. To investigate the genotype distribution, RT-PCR targeting specific sections of the norovirus genome (region C) was employed, followed by purification of PCR products using the QIAquick PCR Purification Kit (Qiagen) and sequencing (Eurofins Genomics). The sequences obtained were analysed by the MEGA X software, and the results of phylogenetic analyses were confirmed by Norovirus Typing Tool Version 2.0 (https://www.rivm.nl/mpf/typingtool/ norovirus/.

Results

During the study period, 14 norovirus genotypes or genogroup variants NoV GI, NoV GII and NoV GIX (previously NoV GII.15) were identified. The major genotype NoV GII.4 Sydney-2012 accounted for a total of 70.3% of norovirus gastroenteritis cases. This norovirus variant was detected in 24 months out of 34 months of the study period. In general, the number of norovirus infections increased during autumn and winter months (October to March) when 68 (57.6%) cases were diagnosed. Although norovirus infection was confirmed in all age categories (age range 0-96 years, median 8, mean 27.9), the statistical analysis revealed a significant difference in the incidence of NoV GII.4 infection between the age group 0-4 years and older patients (χ2 = 3.95, P = 0.047). Many patients (51) were residents of the Pilsen-city district. The case history data showed that 35 of them (29.7%) had another family member who developed symptoms of gastroenteritis at the time of the onset of their infection.

Conclusion

The pilot study is the first attempt to map the molecular epidemiology of noroviruses, not only in the Pilsen Region but also in the whole Czech Republic. Despite the relatively low number of officially reported cases, noroviruses are undoubtedly one of the most important causes of gastroenteritis in this country. Further studies are therefore necessary to expand the body of knowledge of their ecology and circulation.",2021-01-01 +31878868,PROMO: an interactive tool for analyzing clinically-labeled multi-omic cancer datasets.,"BACKGROUND:Analysis of large genomic datasets along with their accompanying clinical information has shown great promise in cancer research over the last decade. Such datasets typically include thousands of samples, each measured by one or several high-throughput technologies ('omics') and annotated with extensive clinical information. While instrumental for fulfilling the promise of personalized medicine, the analysis and visualization of such large datasets is challenging and necessitates programming skills and familiarity with a large array of software tools to be used for the various steps of the analysis. RESULTS:We developed PROMO (Profiler of Multi-Omic data), a friendly, fully interactive stand-alone software for analyzing large genomic cancer datasets together with their associated clinical information. The tool provides an array of built-in methods and algorithms for importing, preprocessing, visualizing, clustering, clinical label enrichment testing, and survival analysis that can be performed on a single or multi-omic dataset. The tool can be used for quick exploration and stratification of tumor samples taken from patients into clinically significant molecular subtypes. Identification of prognostic biomarkers and generation of simple subtype classifiers are additional important features. We review PROMO's main features and demonstrate its analysis capabilities on a breast cancer cohort from TCGA. CONCLUSIONS:PROMO provides a single integrated solution for swiftly performing a complete analysis of cancer genomic data for subtype discovery and biomarker identification without writing a single line of code, and can, therefore, make the analysis of these data much easier for cancer biologists and biomedical researchers. PROMO is freely available for download at http://acgt.cs.tau.ac.il/promo/.",2019-12-26 +33040189,An online tool for predicting the prognosis of cancer patients with SARS-CoV-2 infection: a multi-center study.,"

Purpose

During the 2019 coronavirus disease (COVID-19) pandemic, oncologists face new challenges, and they need to adjust their cancer management strategies as soon as possible to reduce the risk of SARS-CoV-2 infection and tumor recurrence. However, data on cancer patients with SARS-CoV-2 infection remains scarce.

Methods

We conducted a retrospective study on 223 cancer patients with SARS-CoV-2 from 26 hospitals in Hubei, China. An individualized nomogram was constructed based on multivariate Cox analysis. Considering the convenience of the nomogram application, an online tool was also created. The predictive performance and clinical application of nomogram were verified by C-index, calibration curve and decision curve analysis (DCA).

Results

Among cancer patients with SARS-CoV-2, there were significant differences in clinical characteristics between survivors and non-survivors, and compared with patients with solid tumors including lung cancer, patients with hematological malignancies had a worse prognosis. Male, dyspnea, elevated PCT, increased heart rate, elevated D-dimers, and decreased platelets were risk factors for these patients. Furthermore, a good prediction performance of the online tool (dynamic nomogram: https://covid-19-prediction-tool.shinyapps.io/DynNomapp/ ) was also fully demonstrated with the C-indexes of 0.841 (95% CI 0.782-0.900) in the development cohort and 0.780 (95% CI 0.678-0.882) in the validation cohort.

Conclusion

Overall, cancer patients with SARS-CoV-2 had unique clinical features, and the established online tool could guide clinicians to predict the prognosis of patients during the COVID-19 epidemic and to develop more rational treatment strategies for cancer patients.",2020-10-11 +32484384,Disability among children of immigrants from India and China: Is there excess disability among girls?,"We investigate whether there is excess morbidity among daughters of Indian or Chinese immigrants in the US by studying the prevalence of disability among children. We use data from the 2012-14 American Community Surveys on approximately 20,000 US-born children of Indian and Chinese immigrants. Children of US natives are used as a comparison group to account for innate differences in disability between the sexes. Results indicate that there is excess disability among daughters compared with sons among children of Chinese immigrants and children of immigrants from northern or western Indian states; this excess disability declines with younger age at arrival or longer exposure to the host country. Analysis using children of Filipino immigrants as an alternative comparison group yields similar excess disability rates for females. Supplementary material is available for this article at: https://doi.org/10.1080/00324728.2020.1762911.",2020-06-02 +,6ER-017 Assessment of drug prescription using the world health organisation (who) indicators at a public hospital,"

Background

Drug use is one part of the drug management cycle which covers selection, procurement, distribution and use. The World Health Organisation has provided the WHO core drug use indicators to promote rational drug use in developing countries.

Purpose

This study aimed at assessing drug prescription patterns using WHO prescribing indicators at a public hospital in Indonesia, as a pilot study for further larger studies.

Material and methods

This was a cross-sectional study conducted in one district hospital in Central Java Province, Indonesia. Data were collected retrospectively from recipes of outpatients visiting the district hospital in a period of two years from 1 January 2015 to 31 December 2016 to examine the time-trend performance. In total, 1218 recipes consisting of 609 recipes for each year were included in the analysis. Data were analysed in accordance with WHO prescribing indicators 1993 modified in 2004.

Results

The average number of drugs prescribed per encounter in 2015 and 2016 were 3 and 3.1, respectively (WHO standard: 1. 6 to 1. 8). The percentage of drugs prescribed by generic name in 2015 and 2016 were 63.9% and 68.2%, respectively (WHO standard: 100%). The percentage of encounters in which an antibiotic was prescribed in 2015 and 2016 were 378% and 343%, respectively (WHO standard:<30%). The percentage of encounters in which an injection was prescribed in 2015 and 2016 were 1.1% and 3.1%, respectively (WHO standard: 13.4% to 24.1%). The percentage of drugs prescribed from the hospital formulary in 2015 and 2016 were 969% and 982%, respectively (WHO standard: 100%).

Conclusion

The prescribing practices tended to show better patterns by time, indicated by lower deviation from the standard. The most significant problem in prescribing practices was the high average number of drugs prescribed per encounter which leads to polypharmacy, followed by a low percentage of drugs prescribed by generic name and a high percentage of encounters with antibiotics, which tends to increase treatment cost.

References and/or Acknowledgements

1. World Health Organisation. How to investigate drug use in health facilities: Selected drug use indicators1993. 2. Isah AO, Ross-Degnan D, Quick J, Laing R, Mabadeje AFB. The development of standard values for the WHO drug use prescribing indicators 2004. Nigeria: ICUM/EDM/WHO. http://archives.who.int/prduc2004/rducd/ICIUM_Posters/1a2_txt.html No conflict of interest",2018-01-01 +33523611,Developing an evidence-based online method of linking behaviour change techniques and theoretical mechanisms of action: a multiple methods study,"

Background

Many global health challenges may be targeted by changing people’s behaviour. Behaviours including cigarette smoking, physical inactivity and alcohol misuse, as well as certain dietary behaviours, contribute to deaths and disability by increasing the risk of cancers, cardiovascular diseases and diabetes. Interventions have been designed to change these health behaviours with a view to reducing these health risks. However, the effectiveness of these interventions has been quite variable and further information is needed to enhance their success. More information is needed about the specific processes that underlie the effectiveness of intervention strategies.

Aim

Researchers have developed a taxonomy of 93 behaviour change techniques (i.e. the active components of an intervention that bring about behavioural change), but little is known regarding their potential mechanisms of action (i.e. the processes through which a behaviour change technique affects behaviour). We therefore aimed to examine links between behaviour change techniques and mechanisms of action.

Method

First, we conducted a literature synthesis study of 277 behaviour change intervention studies, from which we extracted information on links, described by authors, between behaviour change techniques and mechanisms of action, and identified an average of 10 links per intervention report. Second, behaviour change experts (n = 105) were engaged in a three-round consensus study in which they discussed and rated their confidence in the presence/absence of ‘links’ and ‘non-links’ between commonly used behaviour change techniques (n = 61) and a set of mechanisms of action (n = 26). Ninety links and 460 ‘non-links’ reached the pre-set threshold of 80% agreement. To enhance the validity of these results, a third study was conducted that triangulated the findings of the first two studies. Discrepancies and uncertainties between the studies were included in a reconciliation consensus study with a new group of experts (n = 25). The final results identified 92 definite behaviour change technique–mechanism of action links and 465 definite non-links. In a fourth study, we examined whether or not groups of behaviour change techniques used together frequently across interventions revealed shared theoretical underpinnings. We found that experts agreed on the underlying theory for three groups of behaviour change techniques.

Results

Our results are potentially useful to policy-makers and practitioners in selecting behaviour change techniques to include in behaviour change interventions. However, our data do not demonstrate that the behaviour change techniques are effective in targeting the mechanism of action; rather, the links identified may be the ‘best bets’ for interventions that are effective in changing mechanisms of action, and the non-links are unlikely to be effective. Researchers examining effectiveness of interventions in either primary studies or evidence syntheses may consider these links for further investigation.

Conclusion

To make our results usable by researchers, practitioners and policy-makers, they are available in an online interactive tool, which enables discussion and collaboration (https://theoryandtechniquetool.humanbehaviourchange.org/); accessed 1 March 2020. This work, building on previous work to develop the behaviour change technique taxonomy, is part of an ongoing programme of work: the Human Behaviour Change Project (www.humanbehaviourchange.org/; accessed 1 March 2020).

Funding

This project was funded by the Medical Research Council via its Methodology Panel: ‘Developing methodology for designing and evaluating theory-based complex interventions: an ontology for linking behaviour change techniques to theory’ (reference MR/L011115/1).",2021-02-02 +33362391,Prognostic value of the preoperative fibrinogen-to-albumin ratio in pancreatic ductal adenocarcinoma patients undergoing R0 resection.,"

Background

Inflammation plays an important role in tumor progression, and growing evidence has confirmed that the fibrinogen-to-albumin ratio (FAR) is an important prognostic factor for overall survival in malignant tumors.

Aim

To investigate the prognostic significance of FAR in patients undergoing radical R0 resection of pancreatic ductal adenocarcinoma (PDAC).

Methods

We retrospectively analyzed the data of 282 patients with PDAC who underwent radical R0 resection at The Cancer Hospital of the Chinese Academy of Medical Sciences from January 2010 to December 2019. The surv_cutpoint function of the R package survminer via RStudio software (version 1.3.1073, http://www.rstudio.org) was used to determine the optimal cut-off values of biological markers, such as preoperative FAR. The Kaplan-Meier method and log-rank tests were used for univariate survival analysis, and a Cox regression model was used for multivariate survival analysis for PDAC patients who underwent radical R0 resection.

Results

The optimal cut-off value of FAR was 0.08 by the surv_cutpoint function. Higher preoperative FAR was significantly correlated with clinical symptoms (P = 0.001), tumor location (P < 0.001), surgical approaches (P < 0.001), preoperative plasma fibrinogen concentration (P < 0.001), and preoperative plasma albumin level (P < 0.001). Multivariate analysis showed that degree of tumor differentiation (P < 0.001), number of metastatic lymph nodes [hazard ratio (HR): 0.678, 95% confidence interval (CI): 0.509-0.904, P = 0.008], adjuvant therapy (HR: 1.604, 95%CI: 1.214-2.118, P = 0.001), preoperative cancer antigen 19-9 level (HR: 1.740, 95%CI: 1.288-2.352, P < 0.001), and preoperative FAR (HR: 2.258, 95%CI: 1.720-2.963, P < 0.001) were independent risk factors for poor prognosis in patients with PDAC who underwent radical R0 resection.

Conclusion

The increase in preoperative FAR was significantly related to poor prognosis in patients undergoing radical R0 resection for PDAC. Preoperative FAR can be used clinically to predict the prognosis of PDAC patients undergoing radical R0 resection.",2020-12-01 +33431029,"The ChemicalToolbox: reproducible, user-friendly cheminformatics analysis on the Galaxy platform.","Here, we introduce the ChemicalToolbox, a publicly available web server for performing cheminformatics analysis. The ChemicalToolbox provides an intuitive, graphical interface for common tools for downloading, filtering, visualizing and simulating small molecules and proteins. The ChemicalToolbox is based on Galaxy, an open-source web-based platform which enables accessible and reproducible data analysis. There is already an active Galaxy cheminformatics community using and developing tools. Based on their work, we provide four example workflows which illustrate the capabilities of the ChemicalToolbox, covering assembly of a compound library, hole filling, protein-ligand docking, and construction of a quantitative structure-activity relationship (QSAR) model. These workflows may be modified and combined flexibly, together with the many other tools available, to fit the needs of a particular project. The ChemicalToolbox is hosted on the European Galaxy server and may be accessed via https://cheminformatics.usegalaxy.eu .",2020-06-01 +,6ER-011 Drug utilisation study of bevacizumab in a teaching referral paediatric hospital,"

Background

Bevacizumab is a humanised monoclonal antibody against vascular endothelial growth factor authorised for adult cancer treatments. There are several case series and clinical trials on the use of bevacizumab in paediatric tumours at dose range of 5 to 15 mg/kg every 2 to 4 weeks.

Purpose

To describe the use of bevacizumab in oncologic patients of a paediatric referral hospital.

Material and methods

Data from patients treated with bevacizumab were obtained based on off-label use and medical history records from January to September 2017. We focused on indication, treatment duration, dose regimen and, if any, reason for discontinuation. Each case was previously authorised by our Medical Director and a signed informed consent obtained. Indication was classified as per tumour type and location: central nervous system (CNS) tumours, neurofibromatosis (NF) -related tumours, extra CNS malignant and benign tumours: the fifth group was treatment of brain radionecrosis. We also divided the reason for discontinuation into three groups: end of treatment, disease progression and intolerable side-effects.

Results

After analysing data from 62 patients, 71% of tumours were CNS-located, 14.5% of which were NF-related, followed by 12.9% of radionecrosis treatment and extra CNS malignant (11.3%) and benign tumours (4.8%). The median duration of treatment was 5.5 months (IQR 13.75) and the most common dose regimen was 10 mg/kg (83.9%) every 2 weeks (79%). Only 22.6% of treatments remained active at the end of the study. Discontinuation reason was mostly disease progression (43.5%) followed by end of treatment (27.4%). Side-effects were similar to those reported in the literature, causing treatment discontinuation in 6.5% of patients.

Conclusion

Bevacizumab was mainly used to treat CNS tumours at a dose of 10 mg/kg every 2 weeks. After a median duration of 5.5 months, the drug appeared to be safe since only 6.5% of the treatments were discontinued due to side-effects. Our results are consistent with the literature except for radionecrosis. More studies are needed to assess its efficacy and long term adverse events.

Reference and/or Acknowledgements

1. Benesch M, et al. Compassionate use of bevacizumab (Avastin®) in children and young adults with refractory or recurrent solid tumours. Annals of Oncology2008April 1;19(4):807–813. https://doi.org/10.1093/annonc/mdm510 No conflict of interest",2018-01-01 +29594154,A Systems Approach to Evaluate One Health Initiatives.,"Challenges calling for integrated approaches to health, such as the One Health (OH) approach, typically arise from the intertwined spheres of humans, animals, and ecosystems constituting their environment. Initiatives addressing such wicked problems commonly consist of complex structures and dynamics. As a result of the EU COST Action (TD 1404) ""Network for Evaluation of One Health"" (NEOH), we propose an evaluation framework anchored in systems theory to address the intrinsic complexity of OH initiatives and regard them as subsystems of the context within which they operate. Typically, they intend to influence a system with a view to improve human, animal, and environmental health. The NEOH evaluation framework consists of four overarching elements, namely: (1) the definition of the initiative and its context, (2) the description of the theory of change with an assessment of expected and unexpected outcomes, (3) the process evaluation of operational and supporting infrastructures (the ""OH-ness""), and (4) an assessment of the association(s) between the process evaluation and the outcomes produced. It relies on a mixed methods approach by combining a descriptive and qualitative assessment with a semi-quantitative scoring for the evaluation of the degree and structural balance of ""OH-ness"" (summarised in an OH-index and OH-ratio, respectively) and conventional metrics for different outcomes in a multi-criteria-decision-analysis. Here, we focus on the methodology for Elements (1) and (3) including ready-to-use Microsoft Excel spreadsheets for the assessment of the ""OH-ness"". We also provide an overview of Element (2), and refer to the NEOH handbook for further details, also regarding Element (4) (http://neoh.onehealthglobal.net). The presented approach helps researchers, practitioners, and evaluators to conceptualise and conduct evaluations of integrated approaches to health and facilitates comparison and learning across different OH activities thereby facilitating decisions on resource allocation. The application of the framework has been described in eight case studies in the same Frontiers research topic and provides first data on OH-index and OH-ratio, which is an important step towards their validation and the creation of a dataset for future benchmarking, and to demonstrate under which circumstances OH initiatives provide added value compared to disciplinary or conventional health initiatives.",2018-03-09 +32154834,Leitmotif: protein motif scanning 2.0.,"MOTIVATION:Motif-HMM (mHMM) scanning has been shown to possess unique advantages over standardly used sequence-profile search methods (e.g. HMMER, PSI-BLAST) since it is particularly well-suited to discriminate proteins with variations inside conserved motifs (e.g. family subtypes) or motifs lacking essential residues (false positives, e.g. pseudoenzymes). RESULTS:In order to make mHMM widely accessible to a broader scientific community, we developed Leitmotif, an mHMM web application with many parametrization options easily accessible through intuitive interface. Substantial improvement of performance (ROC scores) was obtained by using two novel parameters. To the best of our knowledge, Leitmotif is the only available mHMM application. AVAILABILITY AND IMPLEMENTATION:Leitmotif is freely available at https://leitmotif.irb.hr. CONTACT:sinisa@heuristika.hr or ivan.vujaklija@fer.hr. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-06-01 +,4CPS-051 Monitoring of antibiotics: degree of compliance of the pharmacokinetic settings,"

Background

Increasingly frequent and inappropriate prescription of broad-spectrum antibiotics justifies the use in first-line treatment of effective antibiotics such as glycopeptides and aminoglycosides, whose use was abandoned because of their associated adverse effects.

Purpose

To analyse the degree of implementation of the recommendations of dose setting, by monitoring pharmacokinetics in antibiotic treatments in follow-up by the Spanish PROA Group (Optimisation Antibiotics PRogram).

Material and methods

Observational and retrospective study on the Unit of Clinical Pharmacokinetics () from a university hospital during a period of 6 months (December 2016 to May 2017). Antibiotics glycopeptides (vancomycin) and aminoglycosides (gentamicin, tobramycin and amikacin) were the monitored drugs. For the processing of the information, standard sheets of application of plasma levels and reports made by the UFCC in the corporate application Diraya® (Digital single story) were reviewed. Both paediatric and adult populations were considered and the collected parameters were: dosage (mg/hour), weight (kg), size (cm), the infusion duration (min), age (years/days), days of treatment, the time of extraction, Cmin (trough level) and Cmax (peak level) (mcg/ml).

Results

The data of 123 adults were collected (63.4% male), with an average age of 46 years and range (16–91). The paediatric population consisted of 21 patients (12 females) with ages ranging from 2 days to 1.5 months. The average duration of treatment for adults was 17 days and 5 days for infants. A subset of 13 patients in haemodialysis (HD) (61.5% female) was also analysed. Seven hundred and twenty-two determinations of plasma levels, putting on average three to five monitors per adult patient in the paediatric information were sought. Seventy-eight per cent (563) of dosing adjustments were vancomycin and 22% (159) remaining of aminoglycosides, being the most sought-after gentamicin. Requests for levels distributed services was : infectious diseases (48%), ICU (22%), internal medicine (17%) and paediatrics (13%). Of the total of monitors, 2.9% (21) could not be performed due to lack of information or incorrect data in the application.

Conclusion

Of 217 recommended individualised dosing adjustments, 209 were accepted (96.3%), which allowed the use of these antibiotics in the first instance, preserving ecological niches and reducing the economic impact.

References and/or Acknowledgements

Thanks to the PROA working group and the rest of the clinicians who have made this work possible http://activos-salud.com/prioam/ http://www.hpm.sas.junta-andalucia.es/servicioandaluzdesalud/hpm2/puertadelmar/GuiaAntimicrobiano/perfusion-extendida-betalactamicos.html http://www.chj.sas.junta-andalucia.es/index.php?id=2881&nv=62&nv2=2661 No conflict of interest",2018-01-01 +27987164,Plant Genome DataBase Japan (PGDBj).,"A portal website that integrates a variety of information related to genomes of model and crop plants from databases (DBs) and the literature was generated. This website, named the Plant Genome DataBase Japan (PGDBj, http://pgdbj. jp/en/ ), is comprised of three component DBs and a cross-search engine which provides a seamless search over their contents. One of the three component DBs is the Ortholog DB, which provides gene cluster information based on the amino acid sequence similarity. Over 1,000,000 amino acid sequences of 40 Viridiplantae species were collected from the public DNA DBs, and plant genome DBs such as TAIR and RAP-DB were subjected to reciprocal BLAST searches for clustering. Another component DB is the Plant Resource DB for genomic- and bio-resources. This DB also integrates the SABRE DB, which provides cDNA and genome sequence resources maintained in the RIKEN BioResource Center and National BioResource Projects Japan. The third component DB of PGDBj is the DNA Marker DB, which manually or automatically collects curated information on DNA markers, quantitative trait loci (QTL), and related genetic linkage maps, from the literature and external DBs. By combining these component DBs and a cross-search engine, PGDBj serves as a useful platform to study genetic systems for both fundamental and applied researches for a wide range of plant species.",2017-01-01 +32271048,"Emodiversity, health, and well-being in the Midlife in the United States (MIDUS) daily diary study.","Emodiversity, or the variety and relative abundance of emotions experienced, provides a metric that can be used to understand emotional experience and its relation to well-being above and beyond average levels of positive and negative affect. Past research has found that more diverse emotional experiences, both positive and negative, are related to better mental and physical health outcomes. The present research aimed to test the relationship between positive and negative emodiversity across the span of 8 days with measures of health and well-being using 2 samples of the Midlife in the United States study (http://midus.wisc.edu/). Participants (N = 2,788) reported emotional states (14 negative, 13 positive) once each day for 8 days. Emodiversity scores were computed for each day using an adaptation of Shannon's biodiversity index and averaged across the days. All models included average affect and demographic covariates. Greater positive emodiversity was associated with fewer symptoms of depression and anxiety and fewer physical health symptoms but was not related to eudaimonic well-being nor cognitive functioning. In contrast to previous research, greater negative emodiversity was related to more symptoms of depression and anxiety and more physical health symptoms. Greater negative emodiversity was only associated with one positive outcome: better executive functioning. These findings illustrate inconsistencies across studies in whether negative emodiversity is associated with better or worse outcomes and raise further questions about how the construct of emodiversity can be better refined. (PsycInfo Database Record (c) 2022 APA, all rights reserved).",2020-04-09 +32009147,IDR2D identifies reproducible genomic interactions.,"Chromatin interaction data from protocols such as ChIA-PET, HiChIP and Hi-C provide valuable insights into genome organization and gene regulation, but can include spurious interactions that do not reflect underlying genome biology. We introduce an extension of the Irreproducible Discovery Rate (IDR) method called IDR2D that identifies replicable interactions shared by chromatin interaction experiments. IDR2D provides a principled set of interactions and eliminates artifacts from single experiments. The method is available as a Bioconductor package for the R community, as well as an online service at https://idr2d.mit.edu.",2020-04-01 +,"Pythiopina, an enigmatic subtribe of darkling beetles (Coleoptera: Tenebrionidae: Pedinini): taxonomic revision, microtomography, ecological niche models and phylogenetic position","Morphological, anatomical, and distributional data concerning the South African endemic beetle subtribe Pythiopina (Tenebrionidae: Pedinini) are revised. Five species, representing two genera, are recognized. Included in this total is one new species (Meglyphus mariae Kamiński sp.n.). The following species are placed in synonymy: Meglyphus ciliatipes [=Meglyphus calitzensis syn.n.]; Meglyphus laenoides [=Meglyphus andreaei syn.n.; =Meglyphus namaqua syn.n.]. Microtomographic models for all valid Pythiopina species, including the holotype of the newly described species, are presented and analysed. Endoskeleton morphology (specifically characters of the tentorium and metendosternite) proved to be informative at the specific and generic levels. An identification key is provided to all known species of the subtribe. Environmental niche models are presented for the majority of species. A molecular phylogeny of Pedinini based on six genetic loci (28S: D1–D3 region; 28S: D4–D5 region, COII, ArgK, CAD2, wg) was also produced to explore the phylogenetic position of Pythiopina. This analysis is the first to include representatives of all seven subtribes of Pedinini, and supports a sister relationship between Pythiopina and the Palaearctic subtribe Dendarina. Results also suggest the existence of a second pair of sister taxa within Pedinini (in addition to Melambiina) with an amphitropical African distribution. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:285AD87A‐46B1‐4FE9‐BC57‐949EA1F70D49.",2018-01-01 +27624619,An overview of the oxytocin-oxytocin receptor signaling network.,"Oxytocin, a nine amino acid long neuropeptide hormone, is synthesized in the hypothalamus and stored and released from the neural lobe of the pituitary gland. Although commonly known for its central role in the regulation of parturition and lactation, oxytocin signaling also plays a key role in modulating social behavior, evoking contentment, initiating maternal behavior, inducing trust, generosity and bonding in humans and animals. Oxytocin signaling can prove to be of great importance in therapeutics and drug targeting because of its diverse range of actions. However, a well annotated map of oxytocin signaling pathway is currently lacking in the publicly available pathway resources. Therefore, we systematically curated the available signaling information of oxytocin from published literature and collated the data to develop a more complete map. We cataloged 66 molecules belonging to oxytocin signaling pathway, which included 9 protein-protein interactions, 39 post-translational modifications, 14 protein translocation events and 22 activation/inhibition events. Further, Oxytocin signaling network data is made freely available to academic fraternity by integrating this into NetPath ( http://www.netpath.org /), a freely available human signaling pathway resource developed previously by our group.",2016-09-14 +29329372,Enrichment analysis with EpiAnnotator.,"Motivation:Deciphering relevant biological insights from epigenomic data can be a challenging task. One commonly used approach is to perform enrichment analysis. However, finding, downloading and using the publicly available functional annotations require time, programming skills and IT infrastructure. Here we describe the online tool EpiAnnotator for performing enrichment analyses on epigenomic data in a fast and user-friendly way. Results:EpiAnnotator is an R Package accompanied by a web interface. It contains regularly updated annotations from 4 public databases: Blueprint, RoadMap, GENCODE and the UCSC Genome Browser. Annotations are hosted locally or in a server environment and automatically updated by scripts of our own design. Thousands of tracks are available, reflecting data on a variety of tissues, cell types and cell lines from the human and mouse genomes. Users need to upload sets of selected and background regions. Results are displayed in customizable and easily interpretable figures. Availability and implementation:The R package and Shiny app are open source and available under the GPL v3 license. EpiAnnotator's web interface is accessible at http://computational-epigenomics.com/en/epiannotator. Contact:epiannotator@computational-epigenomics.com.",2018-05-01 +29126249,The Encyclopedia of DNA elements (ENCODE): data portal update.,"The Encyclopedia of DNA Elements (ENCODE) Data Coordinating Center has developed the ENCODE Portal database and website as the source for the data and metadata generated by the ENCODE Consortium. Two principles have motivated the design. First, experimental protocols, analytical procedures and the data themselves should be made publicly accessible through a coherent, web-based search and download interface. Second, the same interface should serve carefully curated metadata that record the provenance of the data and justify its interpretation in biological terms. Since its initial release in 2013 and in response to recommendations from consortium members and the wider community of scientists who use the Portal to access ENCODE data, the Portal has been regularly updated to better reflect these design principles. Here we report on these updates, including results from new experiments, uniformly-processed data from other projects, new visualization tools and more comprehensive metadata to describe experiments and analyses. Additionally, the Portal is now home to meta(data) from related projects including Genomics of Gene Regulation, Roadmap Epigenome Project, Model organism ENCODE (modENCODE) and modERN. The Portal now makes available over 13000 datasets and their accompanying metadata and can be accessed at: https://www.encodeproject.org/.",2018-01-01 +30596128,Dataset of the first de novo transcriptome assembly of the arillode of Baccaurea motleyana.,"Baccaurea motleyana Müll. Arg. (rambai) is one of the underutilized fruit natives to Indonesia, Thailand, and Malaya Peninsula and it is mostly cultivated in Java island (Lim, 2012) [1]. The edible part of fruits is white and reddish arillodes in which having sweet to acid-sweet tastes. However, nucleotide as well as transcriptome information of this species is still scarce, no information has been deposited in GenBank. In this data article, we performed for the first time of de novo assembly of transcriptome using paired-end Illumina technology. The assembled contigs were constructed using Trinity and after filtering and clustering, produced 37,077 contigs. The contig ranged 201-4972 bp and N50 has 696 bp. The contig was annotated with several database such as SwissProt, TrEMBL, nr and nt NCBI databases. The raw reads were deposited in DDBJ with DRA numbers, DRA007358. The assembled contigs of transcriptome are deposited in the DDBJ TSA with accession number, IADP01000001-IADP01037077 and also can be accessed at http://rujakbase.id.",2018-12-13 +29092055,SABIO-RK: an updated resource for manually curated biochemical reaction kinetics.,"SABIO-RK (http://sabiork.h-its.org/) is a manually curated database containing data about biochemical reactions and their reaction kinetics. The data are primarily extracted from scientific literature and stored in a relational database. The content comprises both naturally occurring and alternatively measured biochemical reactions and is not restricted to any organism class. The data are made available to the public by a web-based search interface and by web services for programmatic access. In this update we describe major improvements and extensions of SABIO-RK since our last publication in the database issue of Nucleic Acid Research (2012). (i) The website has been completely revised and (ii) allows now also free text search for kinetics data. (iii) Additional interlinkages with other databases in our field have been established; this enables users to gain directly comprehensive knowledge about the properties of enzymes and kinetics beyond SABIO-RK. (iv) Vice versa, direct access to SABIO-RK data has been implemented in several systems biology tools and workflows. (v) On request of our experimental users, the data can be exported now additionally in spreadsheet formats. (vi) The newly established SABIO-RK Curation Service allows to respond to specific data requirements.",2018-01-01 +30975085,A splice donor variant in CCDC189 is associated with asthenospermia in Nordic Red dairy cattle.,"

Background

Cattle populations are highly amenable to the genetic mapping of male reproductive traits because longitudinal data on ejaculate quality and dense microarray-derived genotypes are available for thousands of artificial insemination bulls. Two young Nordic Red bulls delivered sperm with low progressive motility (i.e., asthenospermia) during a semen collection period of more than four months. The bulls were related through a common ancestor on both their paternal and maternal ancestry. Thus, a recessive mode of inheritance of asthenospermia was suspected.

Results

Both bulls were genotyped at 54,001 SNPs using the Illumina BovineSNP50 Bead chip. A scan for autozygosity revealed that they were identical by descent for a 2.98 Mb segment located on bovine chromosome 25. This haplotype was not found in the homozygous state in 8557 fertile bulls although five homozygous haplotype carriers were expected (P = 0.018). Whole genome-sequencing uncovered that both asthenospermic bulls were homozygous for a mutation that disrupts a canonical 5' splice donor site of CCDC189 encoding the coiled-coil domain containing protein 189. Transcription analysis showed that the derived allele activates a cryptic splice site resulting in a frameshift and premature termination of translation. The mutated CCDC189 protein is truncated by more than 40%, thus lacking the flagellar C1a complex subunit C1a-32 that is supposed to modulate the physiological movement of the sperm flagella. The mutant allele occurs at a frequency of 2.5% in Nordic Red cattle.

Conclusions

Our study in cattle uncovered that CCDC189 is required for physiological movement of sperm flagella thus enabling active progression of spermatozoa and fertilization. A direct gene test may be implemented to monitor the asthenospermia-associated allele and prevent the birth of homozygous bulls that are infertile. Our results have been integrated in the Online Mendelian Inheritance in Animals (OMIA) database ( https://omia.org/OMIA002167/9913/ ).",2019-04-11 +31770636,Automated EEG mega-analysis I: Spectral and amplitude characteristics across studies.,"Significant achievements have been made in the fMRI field by pooling statistical results from multiple studies (meta-analysis). More recently, fMRI standardization efforts have focused on enabling the joint analysis of raw fMRI data across studies (mega-analysis), with the hope of achieving more detailed insights. However, it has not been clear if such analyses in the EEG field are possible or equally fruitful. Here we present the results of a large-scale EEG mega-analysis using 18 studies from six sites representing several different experimental paradigms. We demonstrate that when meta-data are consistent across studies, both channel-level and source-level EEG mega-analysis are possible and can provide insights unavailable in single studies. The analysis uses a fully-automated processing pipeline to reduce line noise, interpolate noisy channels, perform robust referencing, remove eye-activity, and further identify outlier signals. We define several robust measures based on channel amplitude and dispersion to assess the comparability of data across studies and observe the effect of various processing steps on these measures. Using ICA-based dipolar sources, we also observe consistent differences in overall frequency baseline amplitudes across brain areas. For example, we observe higher alpha in posterior vs anterior regions and higher beta in temporal regions. We also detect consistent differences in the slope of the aperiodic portion of the EEG spectrum across brain areas. In a companion paper, we apply mega-analysis to assess commonalities in event-related EEG features across studies. The continuous raw and preprocessed data used in this analysis are available through the DataCatalog at https://cancta.net.",2019-11-23 +32266595,Dynamic prediction of cancer-specific survival for primary hypopharyngeal squamous cell carcinoma.,"OBJECTIVES:This study investigated a large cohort of patients to construct a predictive nomogram and a web-based survival rate calculator for dynamically predicting the cancer-specific survival of patients with primary hypopharyngeal squamous cell carcinoma (HSCC). METHODS:Patients (n = 2007) initially diagnosed with primary HSCC from 2004 to 2015 were extracted from the Surveillance, Epidemiology, and End Results (SEER) database. All patients were randomly divided into the training and validation cohorts (1:1). The Lasso Cox regression model was applied to identify independent risk factors of cancer-specific survival for a predictive nomogram and a web-based calculator. The model was evaluated by concordance index, calibration, and decision curve analysis. RESULTS:Cancer-specific survival rates decreased with time, while 3-year conditional survival increased. Cancer-specific deaths evolved from relatively high within the first 3 years to low thereafter. Age, race, T stage, N stage, M stage, surgery, radiotherapy, chemotherapy, and marital status were identified as independent risk factors. We constructed a predictive nomogram for survival and a web-based calculator ( https://linzhongyang.shinyapps.io/Hypopharyngeal/ ). Additionally, a prognostic risk stratification was developed according to nomogram total points. CONCLUSIONS:Patients with primary HSCC were found at a high risk of cancer-specific death during the first 3 years, indicating that additional effective follow-up strategies should be implemented over the period. This is the first study to construct a predictive nomogram and a web-based calculator for all patients with HSCC.",2020-04-07 +25971743,GLASS: a comprehensive database for experimentally validated GPCR-ligand associations.,"

Motivation

G protein-coupled receptors (GPCRs) are probably the most attractive drug target membrane proteins, which constitute nearly half of drug targets in the contemporary drug discovery industry. While the majority of drug discovery studies employ existing GPCR and ligand interactions to identify new compounds, there remains a shortage of specific databases with precisely annotated GPCR-ligand associations.

Results

We have developed a new database, GLASS, which aims to provide a comprehensive, manually curated resource for experimentally validated GPCR-ligand associations. A new text-mining algorithm was proposed to collect GPCR-ligand interactions from the biomedical literature, which is then crosschecked with five primary pharmacological datasets, to enhance the coverage and accuracy of GPCR-ligand association data identifications. A special architecture has been designed to allow users for making homologous ligand search with flexible bioactivity parameters. The current database contains ∼500 000 unique entries, of which the vast majority stems from ligand associations with rhodopsin- and secretin-like receptors. The GLASS database should find its most useful application in various in silico GPCR screening and functional annotation studies.

Availability and implementation

The website of GLASS database is freely available at http://zhanglab.ccmb.med.umich.edu/GLASS/.

Contact

zhng@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-13 +25705262,When drug discovery meets web search: Learning to Rank for ligand-based virtual screening.,"BACKGROUND:The rapid increase in the emergence of novel chemical substances presents a substantial demands for more sophisticated computational methodologies for drug discovery. In this study, the idea of Learning to Rank in web search was presented in drug virtual screening, which has the following unique capabilities of 1). Applicable of identifying compounds on novel targets when there is not enough training data available for these targets, and 2). Integration of heterogeneous data when compound affinities are measured in different platforms. RESULTS:A standard pipeline was designed to carry out Learning to Rank in virtual screening. Six Learning to Rank algorithms were investigated based on two public datasets collected from Binding Database and the newly-published Community Structure-Activity Resource benchmark dataset. The results have demonstrated that Learning to rank is an efficient computational strategy for drug virtual screening, particularly due to its novel use in cross-target virtual screening and heterogeneous data integration. CONCLUSIONS:To the best of our knowledge, we have introduced here the first application of Learning to Rank in virtual screening. The experiment workflow and algorithm assessment designed in this study will provide a standard protocol for other similar studies. All the datasets as well as the implementations of Learning to Rank algorithms are available at http://www.tongji.edu.cn/~qiliu/lor_vs.html. Graphical AbstractThe analogy between web search and ligand-based drug discovery.",2015-02-13 +24270791,CentrosomeDB: a new generation of the centrosomal proteins database for Human and Drosophila melanogaster.,"We present the second generation of centrosomeDB, available online at http://centrosome.cnb.csic.es, with a significant expansion of 1357 human and drosophila centrosomal genes and their corresponding information. The centrosome of animal cells takes part in important biological processes such as the organization of the interphase microtubule cytoskeleton and the assembly of the mitotic spindle. The active research done during the past decades has produced lots of data related to centrosomal proteins. Unfortunately, the accumulated data are dispersed among diverse and heterogeneous sources of information. We believe that the availability of a repository collecting curated evidences of centrosomal proteins would constitute a key resource for the scientific community. This was our first motivation to introduce CentrosomeDB in NAR database issue in 2009, collecting a set of human centrosomal proteins that were reported in the literature and other sources. The intensive use of this resource during these years has encouraged us to present this new expanded version. Using our database, the researcher is offered the possibility to study the evolution, function and structure of the centrosome. We have compiled information from many sources, including Gene Ontology, disease-association, single nucleotide polymorphisms and associated gene expression experiments. Special interest has been paid to protein-protein interaction.",2013-11-21 +31988726,Mitochondrial genomes and genetic structure of the Kemp's ridley sea turtle (Lepidochelys kempii).,"The Kemp's ridley (Lepidochelys kempii) is the world's most endangered sea turtle species and is primarily distributed in the Gulf of Mexico. In the United States, South Padre Island, Texas serves as a key nesting ground for the species. Genetic studies of the Kemp's ridley have been used to aid in conservation and management practices, with the mitochondrial control region as the most commonly used marker due to its perceived hypervariability and ease of sequencing. However, with the advent of next generation sequencing technology, targeting complete mitochondrial genomes is now feasible. Here, we describe a more complete mitochondrial genome for the Kemp's ridley than has been previously published in literature and demonstrate a cost-effective and efficient method for obtaining complete mitochondrial genomes from sea turtles. We compare the genetic diversity and taxonomic resolution obtained from whole mitochondrial genomes to that obtained from the mitochondrial control region alone. We compare current genetic diversity with previous records. Furthermore, we evaluate the genetic structure between the breeding stock in South Padre Island and that of deceased Kemp's ridleys recovered on the Northern coast of the Gulf of Mexico after the 2010 BP Deepwater Horizon oil spill, and of Kemp's ridleys stranded on the East Coast of the United States. Our results show that complete mitochondrial genomes provide greater resolution than the control region alone. They also show that the genetic diversity of the Kemp's ridley has remained stable, despite large population declines, and that the genetic makeup of deceased turtles stranded after the Deepwater Horizon oil spill is indistinguishable from the breeding stock in South Padre Island, Texas.

Open data badge

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://www.ncbi.nlm.nih.gov/genbank/.",2019-12-05 +30416387,TFmapper: A Tool for Searching Putative Factors Regulating Gene Expression Using ChIP-seq Data.,"Background: Next-generation sequencing coupled to chromatin immunoprecipitation (ChIP-seq), DNase I hypersensitivity (DNase-seq) and the transposase-accessible chromatin assay (ATAC-seq) has generated enormous amounts of data, markedly improved our understanding of the transcriptional and epigenetic control of gene expression. To take advantage of the availability of such datasets and provide clues on what factors, including transcription factors, epigenetic regulators and histone modifications, potentially regulates the expression of a gene of interest, a tool for simultaneous queries of multiple datasets using symbols or genomic coordinates as search terms is needed. Results: In this study, we annotated the peaks of thousands of ChIP-seq datasets generated by ENCODE project, or ChIP-seq/DNase-seq/ATAC-seq datasets deposited in Gene Expression Omnibus (GEO) and curated by Cistrome project; We built a MySQL database called TFmapper containing the annotations and associated metadata, allowing users without bioinformatics expertise to search across thousands of datasets to identify factors targeting a genomic region/gene of interest in a specified sample through a web interface. Users can also visualize multiple peaks in genome browsers and download the corresponding sequences. Conclusion: TFmapper will help users explore the vast amount of publicly available ChIP-seq/DNase-seq/ATAC-seq data and perform integrative analyses to understand the regulation of a gene of interest. The web server is freely accessible at http://www.tfmapper.org/.",2018-09-07 +32227657,dbMTS: A comprehensive database of putative human microRNA target site SNVs and their functional predictions.,"MicroRNAs (miRNA) are short noncoding RNAs that can repress the expression of protein-coding messenger RNAs (mRNAs) by binding to the 3'-untranslated region (UTR) of the target. Genetic mutations such as single nucleotide variants (SNVs) in the 3'-UTR of the mRNAs can disrupt miRNA regulation. In this study, we presented dbMTS, a database for miRNA target site (MTS) SNVs and their functional annotations. This database can help studies easily identify putative SNVs that affect miRNA targeting and facilitate the prioritization of their functional importance. dbMTS is freely available for academic use at http://database.liulab.science/dbMTS as a web service or a downloadable attached database of dbNSFP.",2020-04-06 +31525920,Sharing Data from Molecular Simulations.,"Given the need for modern researchers to produce open, reproducible scientific output, the lack of standards and best practices for sharing data and workflows used to produce and analyze molecular dynamics (MD) simulations has become an important issue in the field. There are now multiple well-established packages to perform molecular dynamics simulations, often highly tuned for exploiting specific classes of hardware, each with strong communities surrounding them, but with very limited interoperability/transferability options. Thus, the choice of the software package often dictates the workflow for both simulation production and analysis. The level of detail in documenting the workflows and analysis code varies greatly in published work, hindering reproducibility of the reported results and the ability for other researchers to build on these studies. An increasing number of researchers are motivated to make their data available, but many challenges remain in order to effectively share and reuse simulation data. To discuss these and other issues related to best practices in the field in general, we organized a workshop in November 2018 ( https://bioexcel.eu/events/workshop-on-sharing-data-from-molecular-simulations/ ). Here, we present a brief overview of this workshop and topics discussed. We hope this effort will spark further conversation in the MD community to pave the way toward more open, interoperable, and reproducible outputs coming from research studies using MD simulations.",2019-10-11 +31909129,Data of detection and characterization of nitrated conjugated-linoleic acid (NO2-cLA) in LDL.,"Under physiological and pathophysiological conditions, lipid nitration occurs generating nitro-fatty acids (NFA) with pleiotropic activities as modulation of inflammatory cell responses. Foam cell formation and atherosclerotic lesion development have been extensively related to low-density lipoprotein (LDL) oxidation. Considering our manuscript ""Fatty acid nitration in human low-density lipoprotein"" (https://doi.org/10.1016/j.abb.2019.108190), herein we report the oxidation versus nitration of human LDL protein and lipid fractions. Data is shown on LDL fatty acid nitration, in particular, formation and quantitation of nitro-conjugated linoleic acid (NO2-cLA) under mild nitration conditions. In parallel to NO2-cLA formation, depletion of endogenous antioxidants, protein tyrosine nitration, and carbonyl formation is observed. Overall, our data propose the formation of a potential anti-atherogenic form of LDL carrying NFA.",2019-12-20 +33197361,Factor Analysis of Spontaneous Speech in Aphasia.,"Purpose Spontaneous speech tasks are critically important for characterizing spoken language production deficits in aphasia and for assessing the impact of therapy. The utility of such tasks arises from the complex interaction of linguistic demands (word retrieval, sentence formulation, articulation). However, this complexity also makes spontaneous speech hugely variable and difficult to assess. The current study aimed to simplify the problem by identifying latent factors underlying performance in spontaneous speech in aphasia. The ecological validity of the factors was examined by examining how well the factor structures corresponded to traditionally defined aphasia subtypes. Method A factor analysis was conducted on 17 microlinguistic measures of narratives from 274 individuals with aphasia in AphasiaBank. The resulting factor scores were compared across aphasia subtypes. Supervised (linear discriminant analysis) and unsupervised (latent profile analysis) classification techniques were then conducted on the factor scores and the solutions compared to traditional aphasia subtypes. Results Six factors were identified. Two reflected aspects of fluency, one at the phrase level (Phrase Building) and one at the narrative level (Narrative Productivity). Two other factors reflected the accuracy of productions, one at the word level (Semantic Anomaly) and one at the utterance level (Grammatical Error). The other two factors reflected the complexity of sentence structures (Grammatical Complexity) and the use of repair behaviors (Repair), respectively. Linear discriminant analyses showed that only about two thirds of speakers were classified correctly and that misclassifications were similar to disagreements between clinical diagnoses. The most accurately diagnosed syndromes were the largest groups-Broca's and anomic aphasia. The latent profile analysis also generated profiles similar to Broca's and anomic aphasia but separated some subtypes according to severity. Conclusions The factor solution and the classification analyses reflected broad patterns of spontaneous speech performance in a large and representative sample of individuals with aphasia. However, such data-driven approaches present a simplified picture of aphasia patterns, much as traditional syndrome categories do. To ensure ecological validity, a hybrid approach is recommended, balancing population-level analyses with examination of performance at the level of theoretically specified subgroups or individuals. Supplemental Material https://doi.org/10.23641/asha.13232354.",2020-11-16 +30229072,An open-access dataset of crop production by farm size from agricultural censuses and surveys.,"This dataset is a cross-country convenience sample of primary data measuring crop production and/or area by farm size for 55 countries that underlies the article entitled ""How much of the world׳s food do smallholders produce?"" (DOI: https://doi.org/10.1016/j.gfs.2018.05.002). The harmonized dataset is nationally representative with subnational resolution, sourced from agricultural censuses and household surveys. The dataset covers 154 crop species and 11 farm size classes, and is ontologically interoperable with other global agricultural datasets, such as the Food and Agricultural Organization׳s statistical database (FAOSTAT), and the World Census of Agriculture (WCA). The dataset includes estimates of the quantity of food, feed, processed agricultural commodities, seed, waste (post-harvest loss), or other uses; and potential human nutrition (i.e., kilocalories, fats, and proteins) generated by each farm size class. We explain the details of the dataset, the inclusion criteria used to assess each data source, the data harmonization procedures, and the spatial coverage. We detail assumptions underlying the construction of this dataset, including the use of aggregate field size as a proxy for farm size in some cases, and crop species omission biases resulting from converting local species names to harmonized names. We also provide bias estimates for commonly used methods for estimating food production by farm size: use of constant yields across farm size classes when crop production is not available, and relying on nationally representative household sample surveys that omitted non-family farms. Together this dataset represents the most complete empirically grounded estimate of how much food and nutrition smallholder farmers produce from crops.",2018-06-23 +31792574,Association of moderate and vigorous physical activity with incidence of type 2 diabetes and subsequent mortality: 27 year follow-up of the Whitehall II study.,"

Aims/hypothesis

This work examined the role of physical activity in the course of diabetes using data spanning nearly three decades. Our first aim was to examine the long-term association of moderate and vigorous physical activity with incidence of type 2 diabetes. Our second aim was to investigate the association of moderate-to-vigorous physical activity post-diabetes diagnosis with subsequent risk of all-cause and cardiovascular disease mortality.

Methods

A total of 9987 participants from the Whitehall II cohort study free of type 2 diabetes at baseline (1985-1988) were followed for incidence of type 2 diabetes, based on clinical assessments between 1985 and 2016 and linkage to electronic health records up to 31 March 2017. We first examined the association of moderate and vigorous physical activity measured by questionnaire in 1985-1988 (mean age 44.9 [SD 6.0] years; women, 32.7%) with incident type 2 diabetes, using the interval-censored, illness-death model, a competing risk analysis that takes into account both competing risk of death and intermittent ascertainment of diabetes due to reliance on data collection cycles (interval-censored). The second analysis was based on individuals with type 2 diabetes over the follow-up period where we used Cox regression with inverse probability weighting to examine the association of moderate-to-vigorous physical activity after diagnosis of type 2 diabetes with risk of all-cause and cardiovascular disease mortality.

Results

Of the 9987 participants, 1553 developed type 2 diabetes during a mean follow-up of 27.1 (SD 6.3) years. Compared with participants who were inactive in 1985-1988, those who undertook any duration of moderate-to-vigorous physical activity had a lower risk of type 2 diabetes (HR 0.85 [95% CI 0.75, 0.97], p = 0.02; analysis adjusted for sociodemographic, behavioural and health-related factors). In 1026 participants with a diagnosis of type 2 diabetes over the follow-up period, data on moderate-to-vigorous physical activity after diabetes diagnosis were available; 165 all-cause deaths and 55 cardiovascular disease-related deaths were recorded during a mean follow-up of 8.8 (SD 6.1) years. In these participants with diabetes, any duration of moderate-to-vigorous physical activity was associated with lower all-cause mortality (HR 0.61 [95% CI 0.41, 0.93], p = 0.02) while the association with cardiovascular mortality was evident only for physical activity undertaken at or above recommendations (≥2.5 h per week of moderate-to-vigorous physical activity or ≥1.25 h per week of vigorous physical activity; HR 0.40 [95% CI 0.16, 0.96], p = 0.04) in fully adjusted models.

Conclusions/interpretation

Moderate-to-vigorous physical activity plays an important role in diabetes, influencing both its incidence and prognosis. A protective effect on incidence was seen for durations of activity below recommendations and a marginal additional benefit was observed at higher durations. Among individuals with type 2 diabetes, any duration of moderate-to-vigorous physical activity was associated with reduced all-cause mortality while recommended durations of physical activity were required for protection against cardiovascular disease-related mortality.

Data availability

Whitehall II data, protocols and other metadata are available to the scientific community. Please refer to the Whitehall II data sharing policy at https://www.ucl.ac.uk/epidemiology-health-care/research/epidemiology-and-public-health/research/whitehall-ii/data-sharing.",2019-12-02 +30481257,SkeletalVis: an exploration and meta-analysis data portal of cross-species skeletal transcriptomics data.,"

Motivation

Skeletal diseases are prevalent in society, but improved molecular understanding is required to formulate new therapeutic strategies. Large and increasing quantities of available skeletal transcriptomics experiments give the potential for mechanistic insight of both fundamental skeletal biology and skeletal disease. However, no current repository provides access to processed, readily interpretable analysis of this data. To address this, we have developed SkeletalVis, an exploration portal for skeletal gene expression experiments.

Results

The SkeletalVis data portal provides an exploration and comparison platform for analysed skeletal transcriptomics data. It currently hosts 287 analysed experiments with 739 perturbation responses with comprehensive downstream analysis. We demonstrate its utility in identifying both known and novel relationships between skeletal expression signatures. SkeletalVis provides users with a platform to explore the wealth of available expression data, develop consensus signatures and the ability to compare gene signatures from new experiments to the analysed data to facilitate meta-analysis.

Availability and implementation

The SkeletalVis data portal is freely accessible at http://phenome.manchester.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +31020553,Structural and Functional Annotation of Eukaryotic Genomes with GenSAS.,"The Genome Sequence Annotation Server (GenSAS, https://www.gensas.org ) is a secure, web-based genome annotation platform for structural and functional annotation, as well as manual curation. Requiring no installation by users, GenSAS integrates popular command line-based, annotation tools under a single, easy-to-use, online interface. GenSAS integrates JBrowse and Apollo, so users can view annotation data and manually curate gene models. Users are guided step by step through the annotation process by embedded instructions and a more in-depth GenSAS User's Guide. In addition to a genome assembly file, users can also upload organism-specific transcript, protein, and RNA-seq read evidence for use in the annotation process. The latest versions of the NCBI RefSeq transcript and protein databases and the SwissProt and TrEMBL protein databases are provided for all users. GenSAS projects can be shared with other GenSAS users enabling collaborative annotation. Once annotation is complete, GenSAS generates the final files of the annotated gene models in common file formats for use with other annotation tools, submission to a repository, and use in publications.",2019-01-01 +30395323,Plasmid ATLAS: plasmid visual analytics and identification in high-throughput sequencing data.,"Plasmid ATLAS (pATLAS, http://www.patlas.site) provides an easy-to-use web accessible database with visual analytics tools to explore the relationships of plasmids available in NCBI's RefSeq database. pATLAS has two main goals: (i) to provide an easy way to search for plasmids deposited in NCBI RefSeq and their associated metadata; (ii) to visualize the relationships of plasmids in a graph, allowing the exploration of plasmid evolution. pATLAS allows searching by plasmid name, bacterial host taxa, antibiotic resistance and virulence genes, plasmid families, and by sequence length and similarity. pATLAS is also able to represent in the plasmid network, plasmid sets identified by external pipelines using mapping, mash screen or assembly from high-throughput sequencing data. By representing the identified hits within the network of relationships between plasmids, allowing the possibility of removing redundant results, and by taking advantage of the browsing capabilities of pATLAS, users can more easily interpret the pipelines' results. All these analyses can be saved to a JSON file for sharing and future re-evaluation. Furthermore, by offering a REST-API, the pATLAS database and network display are easily accessible by other interfaces or pipelines.",2019-01-01 +31510695,scOrange-a tool for hands-on training of concepts from single-cell data analytics.,"

Motivation

Single-cell RNA sequencing allows us to simultaneously profile the transcriptomes of thousands of cells and to indulge in exploring cell diversity, development and discovery of new molecular mechanisms. Analysis of scRNA data involves a combination of non-trivial steps from statistics, data visualization, bioinformatics and machine learning. Training molecular biologists in single-cell data analysis and empowering them to review and analyze their data can be challenging, both because of the complexity of the methods and the steep learning curve.

Results

We propose a workshop-style training in single-cell data analytics that relies on an explorative data analysis toolbox and a hands-on teaching style. The training relies on scOrange, a newly developed extension of a data mining framework that features workflow design through visual programming and interactive visualizations. Workshops with scOrange can proceed much faster than similar training methods that rely on computer programming and analysis through scripting in R or Python, allowing the trainer to cover more ground in the same time-frame. We here review the design principles of the scOrange toolbox that support such workshops and propose a syllabus for the course. We also provide examples of data analysis workflows that instructors can use during the training.

Availability and implementation

scOrange is an open-source software. The software, documentation and an emerging set of educational videos are available at http://singlecell.biolab.si.",2019-07-01 +,Govocitos: A software tool for estimating fish fecundity based on digital analysis of histological images,"To estimate productivity of a fish stock, the precise determination of fish fecundity is essential. The stereological method accurately estimates fecundity from histological images of a fish gonad. For that purpose, a hexagonal grid is overlaid on the histological image and the number of grid points associated to each oocyte (reproductive cells) category and the number of oocytes in each category is counted. This process is done manually often using off-the-shelf software, but it is very time-consuming, requires specialized technicians, and does not allow to review the calculations. In this paper, we describe and evaluate the software Govocitos, which offers an easy and automatic way to estimate fecundity using the stereological method. Govocitos contains a module to automatically detects the matured oocytes in the slice (nearly 80% of oocytes are correctly detected) and a module to automatically classify the oocytes according to the presence/absence of nucleus (with 84% of accuracy) and to three development stages (with 87% of accuracy). It also provides a user friendly GUI that allows the experts to modify the outlines and classifications of oocytes, to calculate diameters, areas and roundness, to build diameter frequency histograms, to count the points and objects inside the grid, to estimate partial and potential fecundity and to export the data to files and into a database. In addition, Govocitos provides the possibility of varying grid characteristics, it can be trained to work with different species and it allows to check and supervise the calculations whenever needed including in a later point in time. Govocitos is a free software that can be downloaded from http://lia.ei.uvigo.es/daeira/software/govocitos or http://citius.usc.es/w/govocitos.",2016-07-01 +30285246,CRISPRlnc: a manually curated database of validated sgRNAs for lncRNAs.,"The CRISPR/Cas9 system, as a revolutionary genome editing tool for all areas of molecular biology, provides new opportunities for research on lncRNA's function. However, designing a CRISPR/Cas9 single guide RNA (sgRNA) for lncRNA is not easy with an unwarrantable effectiveness. Thus, it is worthy of collecting validated sgRNAs, to assist in efficiently choosing sgRNA with an expected activity. CRISPRlnc (http://www.crisprlnc.org or http://crisprlnc.xtbg.ac.cn) is a manually curated database of validated CRISPR/Cas9 sgRNAs for lncRNAs from all species. After manually reviewing more than 200 published literature, the current version of CRISPRlnc contains 305 lncRNAs and 2102 validated sgRNAs across eight species, including mammalian, insect and plant. We handled the ID, position in the genome, sequence and functional description of these lncRNAs, as well as the sequence, protoacceptor-motif (PAM), CRISPR type and validity of their paired sgRNAs. In CRISPRlnc, we provided the tools for browsing, searching and downloading data, as well as online BLAST service and genome browse server. As the first database against the validated sgRNAs of lncRNAs, CRISPRlnc will provide a new and powerful platform to promote CRISPR/Cas9 applications for future functional studies of lncRNAs.",2019-01-01 +25294826,MethBank: a database integrating next-generation sequencing single-base-resolution DNA methylation programming data.,"DNA methylation plays crucial roles during embryonic development. Here we present MethBank (http://dnamethylome.org), a DNA methylome programming database that integrates the genome-wide single-base nucleotide methylomes of gametes and early embryos in different model organisms. Unlike extant relevant databases, MethBank incorporates the whole-genome single-base-resolution methylomes of gametes and early embryos at multiple different developmental stages in zebrafish and mouse. MethBank allows users to retrieve methylation levels, differentially methylated regions, CpG islands, gene expression profiles and genetic polymorphisms for a specific gene or genomic region. Moreover, it offers a methylome browser that is capable of visualizing high-resolution DNA methylation profiles as well as other related data in an interactive manner and thus is of great helpfulness for users to investigate methylation patterns and changes of gametes and early embryos at different developmental stages. Ongoing efforts are focused on incorporation of methylomes and related data from other organisms. Together, MethBank features integration and visualization of high-resolution DNA methylation data as well as other related data, enabling identification of potential DNA methylation signatures in different developmental stages and accordingly providing an important resource for the epigenetic and developmental studies.",2014-10-07 +30012015,Accurate prediction of human miRNA targets via graph modeling of the miRNA-target duplex.,"miRNAs are involved in many critical cellular activities through binding to their mRNA targets, e.g. in cell proliferation, differentiation, death, growth control, and developmental timing. Accurate prediction of miRNA targets can assist efficient experimental investigations on the functional roles of miRNAs. Their prediction, however, remains a challengeable task due to the lack of experimental data about the tertiary structure of miRNA-target binding duplexes. In particular, correlations of nucleotides in the binding duplexes may not be limited to the canonical Watson Crick base pairs (BPs) as they have been perceived; methods based on secondary structure prediction (typically minimum free energy (MFE)) have only had mix success. In this work, we characterized miRNA binding duplexes with a graph model to capture the correlations between pairs of nucleotides of an miRNA and its target sequences. We developed machine learning algorithms to train the graph model to predict the target sites of miRNAs. In particular, because imbalance between positive and negative samples can significantly deteriorate the performance of machine learning methods, we designed a novel method to re-sample available dataset to produce more informative data learning process. We evaluated our model and miRNA target prediction method on human miRNAs and target data obtained from mirTarBase, a database of experimentally verified miRNA-target interactions. The performance of our method in target prediction achieved a sensitivity of 86% with a false positive rate below 13%. In comparison with the state-of-the-art methods miRanda and RNAhybrid on the test data, our method outperforms both of them by a significant margin. The source codes, test sets and model files all are available at http://rna-informatics.uga.edu/?f=software&p=GraB-miTarget .",2018-05-07 +33023947,Hypoxia Alters the Response to Anti-EGFR Therapy by Regulating EGFR Expression and Downstream Signaling in a DNA Methylation-Specific and HIF-Dependent Manner.,"Intratumoral hypoxia occurs in 90% of solid tumors and is associated with a poor prognosis for patients. Cancer cells respond to hypoxic microenvironments by activating the transcription factors, hypoxia-inducible factor 1 (HIF1) and HIF2. Here, we studied the unique gene expression patterns of 31 different breast cancer cell lines exposed to hypoxic conditions. The EGFR, a member of the ErbB (avian erythroblastosis oncogene B) family of receptors that play a role in cell proliferation, invasion, metastasis, and apoptosis, was induced in seven of the 31 breast cancer cell lines by hypoxia. A functional hypoxia response element (HRE) was identified, which is activated upon HIF1 binding to intron 18 of the EGFR gene in cell lines in which EGFR was induced by hypoxia. CpG methylation of the EGFR HRE prevented induction under hypoxic conditions. The HRE of EGFR was methylated in normal breast tissue and some breast cancer cell lines, and could be reversed by treatment with DNA methyltransferase inhibitors. Induction of EGFR under hypoxia led to an increase in AKT, ERK, and Rb phosphorylation as well as increased levels of cyclin D1, A, B1, and E2F, and repression of p21 in an HIF1α-dependent manner, leading to cell proliferation and migration. Also, increased EGFR expression sensitized cells to EGFR inhibitors. Collectively, our data suggest that patients with hypoxic breast tumors and hypomethylated EGFR status may benefit from EGFR inhibitors currently used in the clinic. SIGNIFICANCE: Hypoxia sensitizes breast cancer cells to EGFR inhibitors in an HIF1α- and a methylation-specific manner, suggesting patients with hypoxic tumors may benefit from EGFR inhibitors already available in the clinic. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/22/4998/F1.large.jpg.",2020-10-06 +32444398,Apixaban versus No Anticoagulation in Patients Undergoing Long-Term Dialysis with Incident Atrial Fibrillation.,"

Background and objectives

The relative efficacy and safety of apixaban compared with no anticoagulation have not been studied in patients on maintenance dialysis with atrial fibrillation. We aimed to determine whether apixaban is associated with better clinical outcomes compared with no anticoagulation in this population.

Design, setting, participants, & measurements

This retrospective cohort study used 2012-2015 US Renal Data System data. Patients on maintenance dialysis with incident, nonvalvular atrial fibrillation treated with apixaban (521 patients) were matched for relevant baseline characteristics with patients not treated with any anticoagulant agent (1561 patients) using a propensity score. The primary outcome was hospital admission for a new stroke (ischemic or hemorrhagic), transient ischemic attack, or systemic thromboembolism. The secondary outcome was fatal or intracranial bleeding. Competing risk survival models were used.

Results

Compared with no anticoagulation, apixaban was not associated with lower incidence of the primary outcome: hazard ratio, 1.24; 95% confidence interval, 0.69 to 2.23; P=0.47. A significantly higher incidence of fatal or intracranial bleeding was observed with apixaban compared with no treatment: hazard ratio, 2.74; 95% confidence interval, 1.37 to 5.47; P=0.004. A trend toward fewer ischemic but more hemorrhagic strokes was seen with apixaban compared with no treatment. No significant difference in the composite outcome of myocardial infarction or ischemic stroke was seen with apixaban compared with no treatment. Compared with no anticoagulation, a significantly higher rate of the primary outcome and a significantly higher incidence of fatal or intracranial bleeding and of hemorrhagic stroke were seen in the subgroup of patients treated with the standard apixaban dose (5 mg twice daily) but not in patients who received the reduced apixaban dose (2.5 mg twice daily).

Conclusions

In patients with kidney failure and nonvalvular atrial fibrillation, treatment with apixaban was not associated with a lower incidence of new stroke, transient ischemic attack, or systemic thromboembolism but was associated with a higher incidence of fatal or intracranial bleeding.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_05_29_CJN11650919.mp3.",2020-05-22 +30597002,Libra: scalable k-mer-based tool for massive all-vs-all metagenome comparisons. ,"Shotgun metagenomics provides powerful insights into microbial community biodiversity and function. Yet, inferences from metagenomic studies are often limited by dataset size and complexity and are restricted by the availability and completeness of existing databases. De novo comparative metagenomics enables the comparison of metagenomes based on their total genetic content. We developed a tool called Libra that performs an all-vs-all comparison of metagenomes for precise clustering based on their k-mer content. Libra uses a scalable Hadoop framework for massive metagenome comparisons, Cosine Similarity for calculating the distance using sequence composition and abundance while normalizing for sequencing depth, and a web-based implementation in iMicrobe (http://imicrobe.us) that uses the CyVerse advanced cyberinfrastructure to promote broad use of the tool by the scientific community. A comparison of Libra to equivalent tools using both simulated and real metagenomic datasets, ranging from 80 million to 4.2 billion reads, reveals that methods commonly implemented to reduce compute time for large datasets, such as data reduction, read count normalization, and presence/absence distance metrics, greatly diminish the resolution of large-scale comparative analyses. In contrast, Libra uses all of the reads to calculate k-mer abundance in a Hadoop architecture that can scale to any size dataset to enable global-scale analyses and link microbial signatures to biological processes.",2019-02-01 +25078076,A community-based resource for automatic exome variant-calling and annotation in Mendelian disorders.,"

Background

Mendelian disorders are mostly caused by single mutations in the DNA sequence of a gene, leading to a phenotype with pathologic consequences. Whole Exome Sequencing of patients can be a cost-effective alternative to standard genetic screenings to find causative mutations of genetic diseases, especially when the number of cases is limited. Analyzing exome sequencing data requires specific expertise, high computational resources and a reference variant database to identify pathogenic variants.

Results

We developed a database of variations collected from patients with Mendelian disorders, which is automatically populated thanks to an associated exome-sequencing pipeline. The pipeline is able to automatically identify, annotate and store insertions, deletions and mutations in the database. The resource is freely available online http://exome.tigem.it. The exome sequencing pipeline automates the analysis workflow (quality control and read trimming, mapping on reference genome, post-alignment processing, variation calling and annotation) using state-of-the-art software tools. The exome-sequencing pipeline has been designed to run on a computing cluster in order to analyse several samples simultaneously. The detected variants are annotated by the pipeline not only with the standard variant annotations (e.g. allele frequency in the general population, the predicted effect on gene product activity, etc.) but, more importantly, with allele frequencies across samples progressively collected in the database itself, stratified by Mendelian disorder.

Conclusions

We aim at providing a resource for the genetic disease community to automatically analyse whole exome-sequencing samples with a standard and uniform analysis pipeline, thus collecting variant allele frequencies by disorder. This resource may become a valuable tool to help dissecting the genotype underlying the disease phenotype through an improved selection of putative patient-specific causative or phenotype-associated variations.",2014-05-06 +29216398,ATTED-II in 2018: A Plant Coexpression Database Based on Investigation of the Statistical Property of the Mutual Rank Index.,"ATTED-II (http://atted.jp) is a coexpression database for plant species to aid in the discovery of relationships of unknown genes within a species. As an advanced coexpression analysis method, multispecies comparisons have the potential to detect alterations in gene relationships within an evolutionary context. However, determining the validity of comparative coexpression studies is difficult without quantitative assessments of the quality of coexpression data. ATTED-II (version 9) provides 16 coexpression platforms for nine plant species, including seven species supported by both microarray- and RNA sequencing (RNAseq)-based coexpression data. Two independent sources of coexpression data enable the assessment of the reproducibility of coexpression. The latest coexpression data for Arabidopsis (Ath-m.c7-1 and Ath-r.c3-0) showed the highest reproducibility (Jaccard coefficient = 0.13) among previous coexpression data in ATTED-II. We also investigated the statistical basis of the mutual rank (MR) index as a coexpression measure by bootstrap sampling of experimental units. We found that the error distribution of the logit-transformed MR index showed normality with equal variances for each coexpression platform. Because the MR error was strongly correlated with the number of samples for the coexpression data, typical confidence intervals for the MR index can be estimated for any coexpression platform. These new, high-quality coexpression data can be analyzed with any tool in ATTED-II and combined with external resources to obtain insight into plant biology.",2018-01-01 +31836897,Mesophotic.org: a repository for scientific information on mesophotic ecosystems. ,"Mesophotic coral ecosystems (MCEs) and temperate mesophotic ecosystems (TMEs) occur at depths of roughly 30-150 m depth and are characterized by the presence of photosynthetic organisms despite reduced light availability. Exploration of these ecosystems dates back several decades, but our knowledge remained extremely limited until about a decade ago, when a renewed interest resulted in the establishment of a rapidly growing research community. Here, we present the 'mesophotic.org' database, a comprehensive and curated repository of scientific literature on mesophotic ecosystems. Through both manually curated and automatically extracted metadata, the repository facilitates rapid retrieval of available information about particular topics (e.g. taxa or geographic regions), exploration of spatial/temporal trends in research and identification of knowledge gaps. The repository can be queried to comprehensively obtain available data to address large-scale questions and guide future research directions. Overall, the 'mesophotic.org' repository provides an independent and open-source platform for the ever-growing research community working on MCEs and TMEs to collate and expedite our understanding of the occurrence, composition and functioning of these ecosystems. Database URL: http://mesophotic.org/.",2019-01-01 +30418610,"eggNOG 5.0: a hierarchical, functionally and phylogenetically annotated orthology resource based on 5090 organisms and 2502 viruses.","eggNOG is a public database of orthology relationships, gene evolutionary histories and functional annotations. Here, we present version 5.0, featuring a major update of the underlying genome sets, which have been expanded to 4445 representative bacteria and 168 archaea derived from 25 038 genomes, as well as 477 eukaryotic organisms and 2502 viral proteomes that were selected for diversity and filtered by genome quality. In total, 4.4M orthologous groups (OGs) distributed across 379 taxonomic levels were computed together with their associated sequence alignments, phylogenies, HMM models and functional descriptors. Precomputed evolutionary analysis provides fine-grained resolution of duplication/speciation events within each OG. Our benchmarks show that, despite doubling the amount of genomes, the quality of orthology assignments and functional annotations (80% coverage) has persisted without significant changes across this update. Finally, we improved eggNOG online services for fast functional annotation and orthology prediction of custom genomics or metagenomics datasets. All precomputed data are publicly available for downloading or via API queries at http://eggnog.embl.de.",2019-01-01 +30329086,CAGm: a repository of germline microsatellite variations in the 1000 genomes project.,"The human genome harbors an abundance of repetitive DNA; however, its function continues to be debated. Microsatellites-a class of short tandem repeat-are established as an important source of genetic variation. Array length variants are common among microsatellites and affect gene expression; but, efforts to understand the role and diversity of microsatellite variation has been hampered by several challenges. Without adequate depth, both long-read and short-read sequencing may not detect the variants present in a sample; additionally, large sample sizes are needed to reveal the degree of population-level polymorphism. To address these challenges we present the Comparative Analysis of Germline Microsatellites (CAGm): a database of germline microsatellites from 2529 individuals in the 1000 genomes project. A key novelty of CAGm is the ability to aggregate microsatellite variation by population, ethnicity (super population) and gender. The database provides advanced searching for microsatellites embedded in genes and functional elements. All data can be downloaded as Microsoft Excel spreadsheets. Two use-case scenarios are presented to demonstrate its utility: a mononucleotide (A) microsatellite at the BAT-26 locus and a dinucleotide (CA) microsatellite in the coding region of FGFRL1. CAGm is freely available at http://www.cagmdb.org/.",2019-01-01 +30008130,Life Expectancy of Olympic Wrestling Champions in Comparison to the General Population.,"Although it was presumed that moderate exercise is a healthy practice but long term high intensity exercise is not, studies observed a life expectancy benefit for both high-intensity endurance and fast power sports athlets, but the data for contact sports are conflicting. Therefore, the author aimed to investigate the life expectancy of Olympic wrestling champions in comparison to the general population. Characteristics, vital status and life-span of the male Olympic wrestling champions was collected (1896-2016). The life expectancy of Olympic champions was compared with matched individuals of the general population (by country, age, and year of birth) obtained from the human mortality database ( http://www.mortality.org ). Overall, 341 male Olympic wrestling champions with median age of 25 (IQR 24-28) years at their Olympic victory were included in this analysis. In total, 142 (41.6%) came of rich countries. The survival was not affected by weight class and country of origin. A significant life expectancy benefit for Olympic champions in comparison to the general population was observed. Male Olympic wrestling champions lived in mean 19.1 ± 19.1 years longer than the matched individuals of the general population (respectively of their country of origin). A substantially lower mortality in male Olympic wrestling champions, compared with the general male population was observed. However, the results do not allow us to draw conclusions about the causes of this survival benefit.",2019-02-01 +28751473,Identification of the macrophage-specific promoter signature in FANTOM5 mouse embryo developmental time course data.,"The FANTOM5 consortium used cap analysis of gene expression (CAGE) to analyze the time course of gene expression over development from 11 days postcoitum (dpc) to adult in 16 developing organs and the whole body of the mouse. Every tissue in the body contains a large number of resident macrophages that initially infiltrate the embryo from the yolk sac. These cells contribute to organogenesis, and their functions diversify during development as they acquire tissue-specific adaptations. In each of the FANTOM5 time courses, the expression of known macrophage-specific genes, including CSF1 receptor (Csf1r), epidermal growth factor-like module-containing mucin-like hormone receptor-like 1 (Emr1), and mer receptor tyrosine kinase (Mertk), was readily detectable and increased with time. We reasoned that genes expressed by macrophages would be strongly correlated in their expression with these known markers and might vary between tissues. We used the network analysis tool, Miru, to extract the sets of coexpressed genes from the time course and identified a core set of coexpressed genes attributable to embryonic macrophages, including some, such as dehydrogenase/reductase 3 (Dhrs3), that may have unique functions in development. The FANTOM5 data also detected the appearance of tissue-specific macrophage-expressed genes, such as T cell Ig and mucin domain-containing 4 (Timd4) and V-set and Ig domain-containing 4 (Vsig4) in liver and sialic acid-binding Ig-like lectin 5 (Siglec5) in lung, and confirmed that macrophage content increases with time in each organ as the proliferative phases end, and tissue-specific gene-expression increases. The FANTOM5 data are available on a comprehensive browser (http://fantom.gsc.riken.jp/zenbu/), which provides a resource for the study of macrophage transcriptional regulation and roles in mouse development.",2017-07-27 +33243244,"Interrelation between facial soft tissue lessions, underlying fracture patterns and treatment of zygomatic bone trauma: a 10 year retrospective study.","

Background

The pattern of zygomatic bone fractures varies in the literature, their features being frequently masked by the presence of associated soft tissue lesions. In this context the clinical diagnosis and the therapeutic indications can be difficult. The aim of this study was to evaluate the clinical features of zygomatic bone fractures and their interrelation with concomitant overlying soft tissue injuries, as well as to assess the type of treatment methods applied depending on the fracture pattern and the results achieved depending on the incidence rate of postoperative complications. We will use these results in order to improve the diagnosis and the establishment of correct treatment of this pathology.

Methods

A 10-year retrospective evaluation of midface fractures was performed in patients diagnosed and treated in a tertiary Clinic of Oral and Maxillofacial Surgery. Statistical analysis was performed with the MedCalc Statistical Software version 19.2 (MedCalc Software bvba, Ostend, Belgium; 53 https://www.medcalc.org ; 2020). Nominal data were expressed as frequency and percentage. The comparisons of the frequencies of a nominal variable among the categories of another nominal variable were made using the chi-square test. Multivariate logistic regressions were used in order to establish the independent association between variables and lacerations/excoriations. After using the Bonferroni correction for multiple comparisons, a value of p < 0.025 was considered statistically significant.

Results

The study included 242 patients with zygomatic bone fractures. The majority of the fractures were displaced n = 179 (73.9%), closed n = 179 (73.9%) and complete n = 219 (90.5%). Hematoma was the most frequent associated soft tissue lesion n = 102 (42.1%) regardless of the fracture pattern (p = 1.000). Complete zygomatic fracture (OR - 2.68; p = 0.035) and fractures with displacement (OR - 3.66; p = 0.012) were independently associated with the presence of laceration. Fractures with displacement (OR - 7.1; p = 0.003) were independently associated with the presence of excoriation. The most frequent type of treatment applied was Gillies reduction (61.9%), followed by ORIF (30.9%). The most frequent postoperative complication was malunion secondary to Gillies treatment (4,6%).

Conclusions

Patients presenting lacerations and excoriations on clinical soft tissue examination will most frequently have an underlying complete, displaced or comminuted zygomatic fracture. In the case of displaced, open or comminuted fractures we achieved the best results secondary to ORIF treatment method, while in the case of non-displaced and closed fractures, the best results achieved were secondary to conservative treatment.",2020-11-26 +31329239,UK phenomics platform for developing and validating electronic health record phenotypes: CALIBER.,"

Objective

Electronic health records (EHRs) are a rich source of information on human diseases, but the information is variably structured, fragmented, curated using different coding systems, and collected for purposes other than medical research. We describe an approach for developing, validating, and sharing reproducible phenotypes from national structured EHR in the United Kingdom with applications for translational research.

Materials and methods

We implemented a rule-based phenotyping framework, with up to 6 approaches of validation. We applied our framework to a sample of 15 million individuals in a national EHR data source (population-based primary care, all ages) linked to hospitalization and death records in England. Data comprised continuous measurements (for example, blood pressure; medication information; coded diagnoses, symptoms, procedures, and referrals), recorded using 5 controlled clinical terminologies: (1) read (primary care, subset of SNOMED-CT [Systematized Nomenclature of Medicine Clinical Terms]), (2) International Classification of Diseases-Ninth Revision and Tenth Revision (secondary care diagnoses and cause of mortality), (3) Office of Population Censuses and Surveys Classification of Surgical Operations and Procedures, Fourth Revision (hospital surgical procedures), and (4) DM+D prescription codes.

Results

Using the CALIBER phenotyping framework, we created algorithms for 51 diseases, syndromes, biomarkers, and lifestyle risk factors and provide up to 6 validation approaches. The EHR phenotypes are curated in the open-access CALIBER Portal (https://www.caliberresearch.org/portal) and have been used by 40 national and international research groups in 60 peer-reviewed publications.

Conclusions

We describe a UK EHR phenomics approach within the CALIBER EHR data platform with initial evidence of validity and use, as an important step toward international use of UK EHR data for health research.",2019-12-01 +31697312,QMEANDisCo-distance constraints applied on model quality estimation.,"MOTIVATION:Methods that estimate the quality of a 3D protein structure model in absence of an experimental reference structure are crucial to determine a model's utility and potential applications. Single model methods assess individual models whereas consensus methods require an ensemble of models as input. In this work, we extend the single model composite score QMEAN that employs statistical potentials of mean force and agreement terms by introducing a consensus-based distance constraint (DisCo) score. RESULTS:DisCo exploits distance distributions from experimentally determined protein structures that are homologous to the model being assessed. Feed-forward neural networks are trained to adaptively weigh contributions by the multi-template DisCo score and classical single model QMEAN parameters. The result is the composite score QMEANDisCo, which combines the accuracy of consensus methods with the broad applicability of single model approaches. We also demonstrate that, despite being the de-facto standard for structure prediction benchmarking, CASP models are not the ideal data source to train predictive methods for model quality estimation. For performance assessment, QMEANDisCo is continuously benchmarked within the CAMEO project and participated in CASP13. For both, it ranks among the top performers and excels with low response times. AVAILABILITY AND IMPLEMENTATION:QMEANDisCo is available as web-server at https://swissmodel.expasy.org/qmean. The source code can be downloaded from https://git.scicore.unibas.ch/schwede/QMEAN. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-03-01 +26435838,Improving preparedness of medical students and junior doctors to manage patients with diabetes.,"

Objective

New medical graduates are the front-line staff in many hospital settings and manage patients with diabetes frequently. Prescribing is an area of concern for junior doctors, however, with insulin prescribing reported as a particular weakness. This study aimed to produce an educational intervention which aimed to improve preparedness to manage patients with diabetes and evaluate it using a mixed methods approach.

Research design and methods

An e-resource (http://www.diabetesscenariosforjuniordoctors.co.uk) was created to contain commonplace and authentic diabetes decision-making scenarios. -32 junior doctors (n=20) and year 5 students (n=12) in South West England worked through the scenarios while 'thinking aloud' and then undertook a semistructured interview. Qualitative data were transcribed verbatim and analyzed thematically. Participant confidence to manage patients with diabetes before, immediately after, and 6 weeks after the educational intervention was also measured using a self-rating scale.

Results

Participants reported that patients with diabetes were daunting to manage because of the wide array of insulin products, their lack of confidence with chronic disease management and the difficulty of applying theory to practice. The e-resource was described as authentic, practical, and appropriate for the target audience. Junior doctors' self-rated confidence to manage patients with diabetes increased from 4.7 (of 10) before using the e-resource, to 6.4 immediately afterwards, and 6.8 6 weeks later. Medical students' confidence increased from 5.1 before, to 6.4 immediately afterwards, and 6.4 6 weeks later.

Conclusions

Providing opportunities to work with authentic scenarios in a safe environment can help to ameliorate junior doctors' lack of confidence to manage patients with diabetes.",2015-09-23 +26578581,DBAASP v.2: an enhanced database of structure and antimicrobial/cytotoxic activity of natural and synthetic peptides.,"Antimicrobial peptides (AMPs) are anti-infectives that may represent a novel and untapped class of biotherapeutics. Increasing interest in AMPs means that new peptides (natural and synthetic) are discovered faster than ever before. We describe herein a new version of the Database of Antimicrobial Activity and Structure of Peptides (DBAASPv.2, which is freely accessible at http://dbaasp.org). This iteration of the database reports chemical structures and empirically-determined activities (MICs, IC50, etc.) against more than 4200 specific target microbes for more than 2000 ribosomal, 80 non-ribosomal and 5700 synthetic peptides. Of these, the vast majority are monomeric, but nearly 200 of these peptides are found as homo- or heterodimers. More than 6100 of the peptides are linear, but about 515 are cyclic and more than 1300 have other intra-chain covalent bonds. More than half of the entries in the database were added after the resource was initially described, which reflects the recent sharp uptick of interest in AMPs. New features of DBAASPv.2 include: (i) user-friendly utilities and reporting functions, (ii) a 'Ranking Search' function to query the database by target species and return a ranked list of peptides with activity against that target and (iii) structural descriptions of the peptides derived from empirical data or calculated by molecular dynamics (MD) simulations. The three-dimensional structural data are critical components for understanding structure-activity relationships and for design of new antimicrobial drugs. We created more than 300 high-throughput MD simulations specifically for inclusion in DBAASP. The resulting structures are described in the database by novel trajectory analysis plots and movies. Another 200+ DBAASP entries have links to the Protein DataBank. All of the structures are easily visualized directly in the web browser.",2015-11-17 +31697324,SigHotSpotter: scRNA-seq-based computational tool to control cell subpopulation phenotypes for cellular rejuvenation strategies. ,"Single-cell RNA-sequencing is increasingly employed to characterize disease or ageing cell subpopulation phenotypes. Despite exponential increase in data generation, systematic identification of key regulatory factors for controlling cellular phenotype to enable cell rejuvenation in disease or ageing remains a challenge. Here, we present SigHotSpotter, a computational tool to predict hotspots of signaling pathways responsible for the stable maintenance of cell subpopulation phenotypes, by integrating signaling and transcriptional networks. Targeted perturbation of these signaling hotspots can enable precise control of cell subpopulation phenotypes. SigHotSpotter correctly predicts the signaling hotspots with known experimental validations in different cellular systems. The tool is simple, user-friendly and is available as web-server or as stand-alone software. We believe SigHotSpotter will serve as a general purpose tool for the systematic prediction of signaling hotspots based on single-cell RNA-seq data, and potentiate novel cell rejuvenation strategies in the context of disease and ageing. SigHotSpotter is at https://SigHotSpotter.lcsb.uni.lu as a web tool. Source code, example datasets and other information are available at https://gitlab.com/srikanth.ravichandran/sighotspotter. Supplementary data are available at Bioinformatics online.",2019-11-07 +32315250,"A Cross-Lagged Analysis of Emotion Regulation, Peer Problems, and Emotional Problems in Children With and Without Early Language Difficulties: Evidence From the Millennium Cohort Study.","Purpose Adolescents with a history of language difficulties are at risk for increased social and emotional difficulties; however, the pathways involved are unclear. We examine the contribution of poor emotion regulation by comparing longitudinal data from children at risk of developmental language disorder (rDLD) and the general population. Method Data from the Millennium Cohort Study were analyzed at ages 3, 5, 7, 11, and 14 years. The rDLD group (children with parent-reported difficulties and/or a score of -1.5 SDs on the Naming Vocabulary subtest at age 5 years) was compared to a general population group on parent reports of emotion regulation, peer problems, and emotional problems. Results In line with the established literature, increased socioemotional problems in individuals with language difficulties were reported. Poor emotion regulation consistently predicted subsequent peer and emotional problems throughout development in both groups. Stronger cross-lag effects were found in the rDLD group for poor emotion regulation at age 3 years predicting age 5 years emotional problems and age 5 years emotional problems predicting age 7 years emotion regulation difficulties. Stronger reciprocal cross-lag effects were also observed in the rDLD group between peer and emotional problems at ages 3 and 5 years. No significant group differences were found in adolescence. Conclusions Poor emotion regulation makes a small but significant contribution to later peer and emotional difficulties, and this relationship is stronger in children at rDLD. Early reciprocal peer and emotional difficulties are also stronger in the rDLD group, but these effects dissipate in midchildhood. Nevertheless, the consistent relationship between early emotion regulation difficulties and socioemotional problems throughout development warrants further investigation in individuals with lower language skills. Supplemental Material https://doi.org/10.23641/asha.12142059.",2020-04-21 +30594059,Relation between Epidural Analgesia and severe perineal laceration in childbearing women in Catalonia.,"

Objective

Our objectives were to study the association between epidural analgesia and risk of severe perineal laceration (SPL), and identify additional risk factors for SPL. This multicentre study consisted of an analysis of data from the MidconBirth Phase I Database, on the use of EA and perineal results during childbirth. (World Health Organization, International Clinical Trials Registry Platform, 2016: http://apps.who.int/trialsearch/Trial2.aspx?TrialID=ISRCTN17833269).

Methods

We conducted a prospective study of pregnant women at term between July 2016 and July 2017 in 30 public maternity hospitals in Catalonia, Spain. Inclusion criteria were an uncomplicated singleton pregnancy, in cephalic presentation and vaginal birth. Data was analysed separately for instrumental births and spontaneous vaginal births, as the former is more frequently associated with episiotomy and more perineal lacerations. Risk factors as well as protective factors in each cohort of women (instrumental and spontaneous vaginal birth), were identified. Multivariate logistic regression model was performed to study the association between epidural analgesia and SPL to identify potential confounders. Odds ratios (OR), using 95% confidence intervals (CI) were constructed.

Findings

During the study period, 5497 eligible women gave birth, 77.46% of them received epidural analgesia. SPL occurred in 1.63% of births. The univariate analysis showed births with epidural analgesia had significantly higher rates of inductions, augmentation of labour, lithotomy position for birth and episiotomy. However, this association disappeared when the variable ""type of vaginal birth"" was introduced. In multivariate logistic regression, nulliparity was the major predictor for SPL (OR: 0.17; CI 95%: 0.08-0.34, p: 0.000).

Key conclusions

Epidural analgesia was not associated with SPL once confounding factors were included. Other interesting factors associated with SPL were identified.

Implications for practice

This paper identifies important practice areas which contribute to SPL and which have the potential to be rectified. It offers evidence on the role that EA plays on pelvic floor injuries and it adds to existing evidence about the disadvantages of using the lithotomy position for birth, especially in relation to SPL. It highlights the need for practice change in Catalonia from what can be considered a medical model of care to one more aligned with the midwifery philosophy of care through the development of clinical guidelines. It also signals the need to provide women with evidence base upon which to make informed choices on the use of EA, specifically in relation to SPL.",2018-12-13 +30715209,Batch-normalization of cerebellar and medulloblastoma gene expression datasets utilizing empirically defined negative control genes.,"

Motivation

Medulloblastoma (MB) is a brain cancer predominantly arising in children. Roughly 70% of patients are cured today, but survivors often suffer from severe sequelae. MB has been extensively studied by molecular profiling, but often in small and scattered cohorts. To improve cure rates and reduce treatment side effects, accurate integration of such data to increase analytical power will be important, if not essential.

Results

We have integrated 23 transcription datasets, spanning 1350 MB and 291 normal brain samples. To remove batch effects, we combined the Removal of Unwanted Variation (RUV) method with a novel pipeline for determining empirical negative control genes and a panel of metrics to evaluate normalization performance. The documented approach enabled the removal of a majority of batch effects, producing a large-scale, integrative dataset of MB and cerebellar expression data. The proposed strategy will be broadly applicable for accurate integration of data and incorporation of normal reference samples for studies of various diseases. We hope that the integrated dataset will improve current research in the field of MB by allowing more large-scale gene expression analyses.

Availability and implementation

The RUV-normalized expression data is available through the Gene Expression Omnibus (GEO; https://www.ncbi.nlm.nih.gov/geo/) and can be accessed via the GSE series number GSE124814.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +27899604,IPD-MHC 2.0: an improved inter-species database for the study of the major histocompatibility complex.,"The IPD-MHC Database project (http://www.ebi.ac.uk/ipd/mhc/) collects and expertly curates sequences of the major histocompatibility complex from non-human species and provides the infrastructure and tools to enable accurate analysis. Since the first release of the database in 2003, IPD-MHC has grown and currently hosts a number of specific sections, with more than 7000 alleles from 70 species, including non-human primates, canines, felines, equids, ovids, suids, bovins, salmonids and murids. These sequences are expertly curated and made publicly available through an open access website. The IPD-MHC Database is a key resource in its field, and this has led to an average of 1500 unique visitors and more than 5000 viewed pages per month. As the database has grown in size and complexity, it has created a number of challenges in maintaining and organizing information, particularly the need to standardize nomenclature and taxonomic classification, while incorporating new allele submissions. Here, we describe the latest database release, the IPD-MHC 2.0 and discuss planned developments. This release incorporates sequence updates and new tools that enhance database queries and improve the submission procedure by utilizing common tools that are able to handle the varied requirements of each MHC-group.",2016-11-28 +25157689,STATdb: a specialised resource for the STATome.,"Signal transducers and activators of transcription (STAT) proteins are key signalling molecules in metazoans, implicated in various cellular processes. Increased research in the field has resulted in the accumulation of STAT sequence and structure data, which are scattered across various public databases, missing extensive functional annotations, and prone to effort redundancy because of the dearth of community sharing. Therefore, there is a need to integrate the existing sequence, structure and functional data into a central repository, one that is enriched with annotations and provides a platform for community contributions. Herein, we present STATdb (publicly available at http://statdb.bic.nus.edu.sg/), the first integrated resource for STAT sequences comprising 1540 records representing the known STATome, enriched with existing structural and functional information from various databases and literature and including manual annotations. STATdb provides advanced features for data visualization, analysis and prediction, and community contributions. A key feature is a meta-predictor to characterise STAT sequences based on a novel classification that integrates STAT domain architecture, lineage and function. A curation policy workflow has been devised for regulated and structured community contributions, with an update policy for the seamless integration of new data and annotations.",2014-08-26 +32246067,µgreen-db: a reference database for the 23S rRNA gene of eukaryotic plastids and cyanobacteria.,"Studying the ecology of photosynthetic microeukaryotes and prokaryotic cyanobacterial communities requires molecular tools to complement morphological observations. These tools rely on specific genetic markers and require the development of specialised databases to achieve taxonomic assignment. We set up a reference database, called µgreen-db, for the 23S rRNA gene. The sequences were retrieved from generalist (NCBI, SILVA) or Comparative RNA Web (CRW) databases, in addition to a more original approach involving recursive BLAST searches to obtain the best possible sequence recovery. At present, µgreen-db includes 2,326 23S rRNA sequences belonging to both eukaryotes and prokaryotes encompassing 442 unique genera and 736 species of photosynthetic microeukaryotes, cyanobacteria and non-vascular land plants based on the NCBI and AlgaeBase taxonomy. When PR2/SILVA taxonomy is used instead, µgreen-db contains 2,217 sequences (399 unique genera and 696 unique species). Using µgreen-db, we were able to assign 96% of the sequences of the V domain of the 23S rRNA gene obtained by metabarcoding after amplification from soil DNA at the genus level, highlighting good coverage of the database. µgreen-db is accessible at http://microgreen-23sdatabase.ea.inra.fr.",2020-04-03 +26490957,SigMol: repertoire of quorum sensing signaling molecules in prokaryotes.,"Quorum sensing is a widespread phenomenon in prokaryotes that helps them to communicate among themselves and with eukaryotes. It is driven through quorum sensing signaling molecules (QSSMs) in a density dependent manner that assists in numerous biological functions like biofilm formation, virulence factors secretion, swarming motility, bioluminescence, etc. Despite immense implications, dedicated resources of QSSMs are lacking. Therefore, we have developed SigMol (http://bioinfo.imtech.res.in/manojk/sigmol), a specialized repository of these molecules in prokaryotes. SigMol harbors information on QSSMs pertaining to different quorum sensing signaling systems namely acylated homoserine lactones (AHLs), diketopiperazines (DKPs), 4-hydroxy-2-alkylquinolines (HAQs), diffusible signal factors (DSFs), autoinducer-2 (AI-2) and others. Database contains 1382: entries of 182: unique signaling molecules from 215: organisms. It encompasses biological as well as chemical aspects of signaling molecules. Biological information includes genes, preliminary bioassays, identification assays and applications, while chemical detail comprises of IUPAC name, SMILES and structure. We have provided user-friendly browsing and searching facilities for easy data retrieval and comparison. We have gleaned information of diverse QSSMs reported in literature at a single platform 'SigMol'. This comprehensive resource will assist the scientific community in understanding intraspecies, interspecies or interkingdom networking and further help to unfold different facets of quorum sensing and related therapeutics.",2015-10-20 +35369023,A Comparison Study of Coronavirus Disease 2019 Outcomes in Hospitalized Kidney Transplant Recipients.,"

Background

Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) can infect any human host, but kidney transplant recipients (KTR) are considered more susceptible on the basis of previous experience with other viral infections. We evaluated rates of hospital complications between SARS-CoV-2-positive KTR and comparator groups.

Methods

We extracted data from the electronic health record on patients who were hospitalized with SARS-CoV-2, testing at six hospitals from March 4 through September 9, 2020. We compared outcomes between SARS-CoV-2-positive KTR and controls: SARS-CoV-2-positive non-KTR, SARS-CoV-2-negative KTR, and SARS-CoV-2-negative non-KTR.

Results

Of 31,540 inpatients, 3213 tested positive for SARS-CoV-2. There were 32 SARS-CoV-2-positive and 224 SARS-CoV-2-negative KTR. SARS-CoV-2-positive KTR had higher ferritin levels (1412; interquartile range, 748-1749 versus 553; interquartile range, 256-1035; P<0.01) compared with SARS-CoV-2-positive non-KTR. SARS-CoV-2-positive KTR had higher rates of ventilation (34% versus 14%, P<0.01; versus 9%, P<0.01; versus 5%, P<0.01), vasopressor use (41% versus 16%, P<0.01; versus 17%, P<0.01; versus 12%, P<0.01), and AKI (47% versus 15%, P<0.01; versus 23%, P<0.01; versus 10%, P<0.01) compared with SARS-CoV-2-positive non-KTR, SARS-CoV-2-negative KTR, and SARS-CoV-2-negative non-KTR, respectively. SARS-CoV-2-positive KTR continued to have increased odds of ventilation, vasopressor use, and AKI compared with SARS-CoV-2-positive non-KTR independent of Elixhauser score, Black race, and baseline eGFR. Mortality was not significantly different between SARS-CoV-2-positive KTR and non-KTR, but there was a notable trend toward higher mortality in SARS-CoV-2-positive KTR (25% versus 16%, P=0.15, respectively).

Conclusions

Hospitalized SARS-CoV-2-positive KTR had a high rate of mortality and hospital complications, such as requiring ventilation, vasopressor use, and AKI. Additionally, they had higher odds of hospital complications compared with SARS-CoV-2-positive non-KTR after adjusting for Elixhauser score, Black race, and baseline eGFR. Future studies with larger sample size of KTR are needed to validate our findings.

Podcast

This article contains a podcast at https://dts.podtrac.com/redirect.mp3/www.asn-online.org/media/podcast/K360/2021_03_25_KID0005652020.mp3.",2021-01-12 +31608946,Exploiting transfer learning for the reconstruction of the human gene regulatory network.,"

Motivation

The reconstruction of gene regulatory networks (GRNs) from gene expression data has received increasing attention in recent years, due to its usefulness in the understanding of regulatory mechanisms involved in human diseases. Most of the existing methods reconstruct the network through machine learning approaches, by analyzing known examples of interactions. However, (i) they often produce poor results when the amount of labeled examples is limited, or when no negative example is available and (ii) they are not able to exploit information extracted from GRNs of other (better studied) related organisms, when this information is available.

Results

In this paper, we propose a novel machine learning method that overcomes these limitations, by exploiting the knowledge about the GRN of a source organism for the reconstruction of the GRN of the target organism, by means of a novel transfer learning technique. Moreover, the proposed method is natively able to work in the positive-unlabeled setting, where no negative example is available, by fruitfully exploiting a (possibly large) set of unlabeled examples. In our experiments, we reconstructed the human GRN, by exploiting the knowledge of the GRN of Mus musculus. Results showed that the proposed method outperforms state-of-the-art approaches and identifies previously unknown functional relationships among the analyzed genes.

Availability and implementation

http://www.di.uniba.it/∼mignone/systems/biosfer/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +31792281,The integrative knowledge base for miRNA-mRNA expression in colorectal cancer.,"""miRNA colorectal cancer"" (https://mirna-coadread.omics.si/) is a freely available web application for studying microRNA and mRNA expression and their correlation in colorectal cancer. To the best of our knowledge, ""miRNA colorectal cancer"" has the largest knowledge base of miRNA-target gene expressions and correlations in colorectal cancer, based on the largest available sample size from the same source of data. Data from high-throughput molecular profiling of 295 colon and rectum adenocarcinoma samples from The Cancer Genome Atlas was analyzed and integrated into our knowledge base. The objective of developing this web application was to help researchers to discover the behavior and role of miRNA-target gene interactions in colorectal cancer. For this purpose, results of differential expression and correlation analyses of miRNA and mRNA data collected in our knowledge base are available through web forms. To validate our knowledge base experimentally, we selected genes FN1, TGFB2, RND3, ZEB1 and ZEB2 and miRNAs hsa-miR-200a/b/c-3p, hsa-miR-141-3p and hsa-miR-429. Both approaches revealed a negative correlation between miRNA hsa-miR-200b/c-3p and its target gene FN1 and between hsa-miR-200a-3p and its target TGFB2, thus supporting the usefulness of the developed knowledge base.",2019-12-02 +30053264,"Updates to the RNA mapping database (RMDB), version 2.","Chemical mapping is a broadly utilized technique for probing the structure and function of RNAs. The volume of chemical mapping data continues to grow as more researchers routinely employ this information and as experimental methods increase in throughput and information content. To create a central location for these data, we established an RNA mapping database (RMDB) 5 years ago. The RMDB, which is available at http://rmdb.stanford.edu, now contains chemical mapping data for over 800 entries, involving 134 000 natural and engineered RNAs, in vitro and in cellulo. The entries include large data sets from multidimensional techniques that focus on RNA tertiary structure and co-transcriptional folding, resulting in over 15 million residues probed. The database interface has been redesigned and now offers interactive graphical browsing of structural, thermodynamic and kinetic data at single-nucleotide resolution. The front-end interface now uses the force-directed RNA applet for secondary structure visualization and other JavaScript-based views of bar graphs and annotations. A new interface also streamlines the process for depositing new chemical mapping data to the RMDB.",2018-01-01 +32956891,A Novel Prediction Tool for Overall Survival of Patients Living with Spinal Metastatic Disease.,"

Objective

To identify the significant prognostic factors for overall survival in patients with spinal metastases and to establish an online widget for predicting survival with an interactive visual approach.

Methods

Patients operated for spinal metastases between 2010 and 2018 were retrospectively enrolled and were randomly divided into training and validation samples with a ratio of 7:3. Patients' characteristics were analyzed with univariate and multivariate Cox analyses to identify independent prognostic factors basing on the training sample. A shiny web tool was developed by transforming the fitted multivariable Cox model into a visual interface. Time-dependent area under the curve plot and calibration curve were generated to assess the discrimination ability and consistency of the novel model, both for the training and validation samples.

Results

A total of 265 consecutive patients were finally included, with 185 in the training sample and 80 in the validation sample. The primary tumor types, lesion site of metastasis, visceral metastasis, Frankel grade, operation category, number of surgical segments, and the preoperative percentage of lymphocyte were demonstrated to be significantly associated with overall survival. A novel shiny model (https://yang1209xg.shinyapps.io/predictspinalmetastasis/) that could provide predicted survival curve and median survival time was established, with favorable discrimination ability and consistency between predicted and actual survival both in internal and external data, according to time-dependent area under the curve plots and calibration curves.

Conclusions

A user-friendly shiny app with favorable discrimination ability and consistency was released online for predicting the survival of patients with spinal metastases. A continuous survival curve and the predicted median survival time are available to guide the treatment planning.",2020-09-18 +32300580,A Geospatial Bibliometric Review of the HIV/AIDS Epidemic in the Russian Federation.,"Background: Increasing rates of HIV/AIDS in Eastern Europe and Central Asia contrast global trends, but the scope of HIV/AIDS research originating from Russian Federation and countries of the former Soviet Union has not been quantified. Methods: We searched six major scientific databases in Russian and English languages with medical subject heading terms ""HIV"" or ""AIDS"" and ""Russia"" or ""Soviet Union"" from 1991 to 2016. Each abstract indexed was reviewed and tagged for 25 HIV/AIDS research themes, location of research focus and first author. Results and Discussion: A total of 2,868 articles were included; 2,156 (75.1%) and 712 (24.8%) described research in the Russian Federation and countries of the former Soviet Union, respectively. There were 15 publications per million population in Russian Federation. Federal districts of the Russian Federation with the highest rates of HIV had the most limited publications. An interactive web-map with time-lapse features and links to primary literature was created using ArcGIS® technology [http://arcg.is/2FUIJ5v]. Conclusion: We found a lower than expected publication rate in the Russian Federation relative to rising HIV prevalence. The greatest deficits were in the most HIV burdened regions in the Russian Federation. Our findings highlight opportunities for new research strategies and public health efforts among key populations and subnational regions.",2020-04-02 +31277321,"GEDS: A Gene Expression Display Server for mRNAs, miRNAs and Proteins. ","High-throughput technologies generate a tremendous amount of expression data on mRNA, miRNA and protein levels. Mining and visualizing the large amount of expression data requires sophisticated computational skills. An easy to use and user-friendly web-server for the visualization of gene expression profiles could greatly facilitate data exploration and hypothesis generation for biologists. Here, we curated and normalized the gene expression data on mRNA, miRNA and protein levels in 23315, 9009 and 9244 samples, respectively, from 40 tissues (The Cancer Genome Atlas (TCGA) and Genotype-Tissue Expression (GETx)) and 1594 cell lines (Cancer Cell Line Encyclopedia (CCLE) and MD Anderson Cell Lines Project (MCLP)). Then, we constructed the Gene Expression Display Server (GEDS), a web-based tool for quantification, comparison and visualization of gene expression data. GEDS integrates multiscale expression data and provides multiple types of figures and tables to satisfy several kinds of user requirements. The comprehensive expression profiles plotted in the one-stop GEDS platform greatly facilitate experimental biologists utilizing big data for better experimental design and analysis. GEDS is freely available on http://bioinfo.life.hust.edu.cn/web/GEDS/.",2019-07-03 +29987232,NTyroSite: Computational Identification of Protein Nitrotyrosine Sites Using Sequence Evolutionary Features. ,"Nitrotyrosine is a product of tyrosine nitration mediated by reactive nitrogen species. As an indicator of cell damage and inflammation, protein nitrotyrosine serves to reveal biological change associated with various diseases or oxidative stress. Accurate identification of nitrotyrosine site provides the important foundation for further elucidating the mechanism of protein nitrotyrosination. However, experimental identification of nitrotyrosine sites through traditional methods are laborious and expensive. In silico prediction of nitrotyrosine sites based on protein sequence information are thus highly desired. Here, we report a novel predictor, NTyroSite, for accurate prediction of nitrotyrosine sites using sequence evolutionary information. The generated features were optimized using a Wilcoxon-rank sum test. A random forest classifier was then trained using these features to build the predictor. The final NTyroSite predictor achieved an area under a receiver operating characteristics curve (AUC) score of 0.904 in a 10-fold cross-validation test. It also significantly outperformed other existing implementations in an independent test. Meanwhile, for a better understanding of our prediction model, the predominant rules and informative features were extracted from the NTyroSite model to explain the prediction results. We expect that the NTyroSite predictor may serve as a useful computational resource for high-throughput nitrotyrosine site prediction. The online interface of the software is publicly available at https://biocomputer.bio.cuhk.edu.hk/NTyroSite/.",2018-07-09 +31944874,Development of Phonetic Contrasts in Cantonese Tone Acquisition.,"Purpose Previous studies showed both early and late acquisition of Cantonese tones based on transcription data using different criteria, but very little acoustic data were reported. Our study examined Cantonese tone acquisition using both transcription and acoustic data, illustrating the early and protracted aspects of Cantonese tone acquisition. Method One hundred fifty-nine Cantonese-speaking children aged between 2;1 and 6;0 (years;months) and 10 reference speakers participated in a tone production experiment based on picture naming. Natural production materials with 30 monosyllabic words were transcribed by two native judges. Acoustic measurements included overall tonal dispersion and specific contrasts between similar tone pairs: ratios of average fundamental frequency height for the level tones (T1, T3, T6), magnitude of rise and inflection point for the rising tones (T2, T5), magnitude of fall, H1*-H2*, and harmonic-to-noise ratio for the low tones (T4, T6). Auditory assessment of creakiness for T4 was also included. Results Children in the eldest group (aged 5;7-6;0) were still not completely adultlike in production accuracy, although two thirds of them had production accuracy over 90%. Children in all age groups had production accuracy significantly higher than chance level, and they could produce the major acoustic contrasts between specific tone pairs similarly as reference speakers. Fine phonetic detail of the inflection point and creakiness was more challenging for children. Conclusion Our findings illustrated the multifaceted aspects (both early and late) of Cantonese tone acquisition and called for a wider perspective on how to define successful phonological acquisition. Supplemental Material https://doi.org/10.23641/asha.11594853.",2020-01-16 +29648583,LipidPedia: a comprehensive lipid knowledgebase.,"

Motivation

Lipids are divided into fatty acyls, glycerolipids, glycerophospholipids, sphingolipids, saccharolipids, sterols, prenol lipids and polyketides. Fatty acyls and glycerolipids are commonly used as energy storage, whereas glycerophospholipids, sphingolipids, sterols and saccharolipids are common used as components of cell membranes. Lipids in fatty acyls, glycerophospholipids, sphingolipids and sterols classes play important roles in signaling. Although more than 36 million lipids can be identified or computationally generated, no single lipid database provides comprehensive information on lipids. Furthermore, the complex systematic or common names of lipids make the discovery of related information challenging.

Results

Here, we present LipidPedia, a comprehensive lipid knowledgebase. The content of this database is derived from integrating annotation data with full-text mining of 3923 lipids and more than 400 000 annotations of associated diseases, pathways, functions and locations that are essential for interpreting lipid functions and mechanisms from over 1 400 000 scientific publications. Each lipid in LipidPedia also has its own entry containing a text summary curated from the most frequently cited diseases, pathways, genes, locations, functions, lipids and experimental models in the biomedical literature. LipidPedia aims to provide an overall synopsis of lipids to summarize lipid annotations and provide a detailed listing of references for understanding complex lipid functions and mechanisms.

Availability and implementation

LipidPedia is available at http://lipidpedia.cmdm.tw.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-09-01 +32412612,Nature of the sedimentary rock record and its implications for Earth system evolution.,"The sedimentary rock reservoir both records and influences changes in Earth's surface environment. Geoscientists extract data from the rock record to constrain long-term environmental, climatic and biological evolution, with the understanding that geological processes of erosion and rock destruction may have overprinted some aspects of their results. It has also long been recognized that changes in the mass and chemical composition of buried sediments, operating in conjunction with biologically catalyzed reactions, exert a first-order control on Earth surface conditions on geologic timescales. Thus, the construction and destruction of the rock record has the potential to influence both how Earth and life history are sampled, and drive long-term trends in surface conditions that otherwise are difficult to affect. However, directly testing what the dominant process signal in the sedimentary record is - rock construction or destruction - has rarely been undertaken, primarily due to the difficulty of assembling data on the mass and age of rocks in Earth's crust. Here, we present results on the chronological age and general properties of rocks and sediments in the Macrostrat geospatial database (https://macrostrat.org). Empirical patterns in surviving rock quantity as a function of age are indicative of both continual cycling (gross sedimentation) and long-term sediment accumulation (net sedimentation). Temporal variation in the net sedimentary reservoir was driven by major changes in the ability of continental crust to accommodate sediments. The implied history of episodic growth of sediment mass on continental crust has many attendant implications for the drivers of long-term biogeochemical evolution of Earth and life.",2018-09-01 +32876224,Agentes potencialmente terapéuticos contra el SARS-CoV-2: revisión rápida de la evidencia.,"The Instituto de Evaluación de Tecnologías en Salud e Investigación (IETSI) of the Seguro Social de Salud (EsSalud) has completed seven brief reports by means of rapid reviews of evidence regarding the potentially effective therapies against SARS-CoV-2 in order to provide current and relevant information for decision makers, clinicians, researchers and the academic community in Peru. The therapeutic agents evaluated were chloroquine/hydroxychloroquine, lopinavir/ritonavir, tocilizumab, oseltamivir, interferon, atazanavir and anti SARS-CoV-2 serum. Evidence identification included the review of PubMed and Cochrane Library electronic databases. Additionally, manual search was carried out on websites from groups dedicated to research and education on health, as well as in the main specialized societies or institutions, such as, the World Health Organization (WHO) and Centers for Disease Control and Prevention (CDC). Furthermore, in order to reduce publication bias, the websites: www.clinicaltrials.gov and http://apps.who.int/trialsearch were searched to identify in-progress or unpublished clinical trials. Finally, a ""snowball"" strategy was performed by reviewing the reference lists of the systematic reviews, primary studies and selected narrative reviews to identify relevant information. The latest review (March 27, 2020) showed that there is no evidence to recommend any medication for patients´ treatment with COVID-19. More evidence, preferably high-quality randomized clinical trials, is needed for decision-making against SARS-CoV-2.",2020-04-01 +32723288,[Candida hellenica Candidemia Occurred After Esophagus Surgery].,"In this study, a case of candidemia caused by Candida hellenica as the first report in our country was presented. Fluconazole and liposomal amphotericin B treatment was initiated in a 20-year-old male patient in October 2018 due to the diagnosis of candidemia following esophageal surgery. The patient had a history of multiple esophageal operations. The patient was discharged during the last 24 hours due to the lack of fever, improvement in general condition and lack of growth in blood cultures. Germination tube test of the Candida isolate grown in blood culture was negative and the colony morphology in corn meal tween 80 agar was not defining. It was identified as C.hellenica according to the profile obtained from the ID32C® (bioMérieux, France) method based on carbohydrate assimilation. The target ITS regions of the rRNA genes were amplified by polymerase chain reaction and sequenced using suitable primers for the confirmation of the identification on species level. The DNA sequences obtained were searched by using the ""National Center for Biotechnology Information (BLAST)"" (http://www.ncbi.nlm.nih.gov/ BLAST/) database and the isolate was identified as C.hellenica with a 99% homology with GenBank sequences. MALDI-TOF (Vitek MS, bioMerieux) could not identify the yeast isolate. The reference microdilution method was performed according to the recommendations of the Clinical and Laboratory Standards Institute in order to test the antifungal susceptibility. The minimal inhibitory concentrations for the isolate, determined after 24-hour incubation were 0.25 µg/ml for amphotericin B, 8 µg/ml for fluconazole, 0.25 µg/ml for voriconazole, and 0.25 µg/ml for itraconazole. As our case had a previous history of gastrointestinal tract surgery it was thought that gastrointestinal tract was the endogenous source of candidemia by leading to mucosal disruption and this mucosal disruption might facilitate the translocation of Candida. The carbohydrate assimilation test ID32C®, was able identify the causative agent of candidaemia at the species level in this case. However, uncommon or previously unrecognized organisms may be misidentified by commercial systems. While the phenotypic definition is sufficient in routine laboratories, it is mandatory to confirm the microorganism species definition by DNA sequence analysis, as done in this case. We have presented a correctly identifed and successfully treated candidemia case. Although the candidemia was not mortal in our patient, the mortality rate of candidemia which is 50%, should be remembered. A total of two C.hellenica infections have been reported in the literature, including one candidaemia and one respiratory tract colonization. Our successfully treated case was presented to draw attention to this rare infectious agent.",2020-04-01 +32243925,"Karyopherin α 2 promotes proliferation, migration and invasion through activating NF-κB/p65 signaling pathways in melanoma cells.","AIMS:Melanoma is a fatal malignancy. Karyopherin α 2 (KPNA2) plays an important role in many carcinogenesis. This study was aimed to study the role of KPNA2 in cellular functions and molecular mechanisms of melanoma. MAIN METHODS:We investigated the expression and prognosis of KPNA2 in melanoma using the GEPIA database (http://gepia.cancer-pku.cn/). The effect of KPNA2 on melanoma cells was determined using real-time PCR, western blot, immunofluorescence assay, CCK-8, colony formation, wound healing assay, transwell assay, EMSA, and immunohistochemistry. The influence of KPNA2 on the tumorigenicity of melanoma cells was evaluated in a nude mice model in vivo. KEY FINDINGS:Our results showed that KPNA2 expression is relatively high in melanoma tissues and cells, and melanoma patients with higher expression of KPNA2 had lower overall survival rate and disease free survival rate. KPNA2 promoted proliferation ability and increased the expression of PCNA, Ki67, and C-MYC in melanoma cells. Further, KPNA2 could promote migration and invasion and increase the expression of MMP2 and MMP9. Mechanism studies showed that KPNA2 activated NF-κB/p65 signaling pathways, as evidenced by the nuclear translocation of p65 and increased the expression of COX-2, ICAM-1, iNOS, and MCP1 in melanoma cells. NF-κB inhibitor JSH-23 could reverse the pro-tumor effects of KPNA2 on melanoma cells. Moreover, upregulation of KPNA2 facilitated the tumorigenicity of melanoma cells. SIGNIFICANCE:KPNA2 promotes proliferation, migration and invasion through enhancing NF-κB/p65 signaling pathways in melanoma cells. Our study suggests KPNA2 as a potential therapeutic target for the treatment of melanoma.",2020-04-01 +31797265,Diverse Community Structures in the Neuronal-Level Connectome of the Drosophila Brain.,"Drosophila melanogaster is one of the most important model animals in neurobiology owing to its manageable brain size, complex behaviour, and extensive genetic tools. However, without a comprehensive map of the brain-wide neural network, our ability to investigate brain functions at the systems level is seriously limited. In this study, we constructed a neuron-to-neuron network of the Drosophila brain based on the 28,573 fluorescence images of single neurons in the newly released FlyCircuit v1.2 (http://www.flycircuit.tw) database. By performing modularity and centrality analyses, we identified eight communities (right olfaction, left olfaction, olfactory core, auditory, motor, pre-motor, left vision, and right vision) in the brain-wide network. Further investigation on information exchange and structural stability revealed that the communities of different functions dominated different types of centralities, suggesting a correlation between functions and network structures. Except for the two olfaction and the motor communities, the network is characterized by overall small-worldness. A rich club (RC) structure was also found in this network, and most of the innermost RC members innervated the central complex, indicating its role in information integration. We further identified numerous loops with length smaller than seven neurons. The observation suggested unique characteristics in the information processing inside the fruit fly brain.",2020-04-01 +32558887,DeepCLIP: predicting the effect of mutations on protein-RNA binding with deep learning.,"Nucleotide variants can cause functional changes by altering protein-RNA binding in various ways that are not easy to predict. This can affect processes such as splicing, nuclear shuttling, and stability of the transcript. Therefore, correct modeling of protein-RNA binding is critical when predicting the effects of sequence variations. Many RNA-binding proteins recognize a diverse set of motifs and binding is typically also dependent on the genomic context, making this task particularly challenging. Here, we present DeepCLIP, the first method for context-aware modeling and predicting protein binding to RNA nucleic acids using exclusively sequence data as input. We show that DeepCLIP outperforms existing methods for modeling RNA-protein binding. Importantly, we demonstrate that DeepCLIP predictions correlate with the functional outcomes of nucleotide variants in independent wet lab experiments. Furthermore, we show how DeepCLIP binding profiles can be used in the design of therapeutically relevant antisense oligonucleotides, and to uncover possible position-dependent regulation in a tissue-specific manner. DeepCLIP is freely available as a stand-alone application and as a webtool at http://deepclip.compbio.sdu.dk.",2020-07-01 +33275452,Boiled or Bottled: Regional and Seasonal Exposures to Drinking Water Contamination and Household Air Pollution in Rural China.,"

Background

Inadequate access to safe drinking water remains a global health problem, particularly in rural areas. Boiling is the most commonly used form of point-of-use household water treatment (HWT) globally, although the use of bottled water in low- and middle-income countries (LMICs) is increasing rapidly.

Objectives

We assessed the regional and seasonal prevalence of HWT practices (including bottled water use) in low-income rural areas in two Chinese provinces, evaluated the microbiological safety of drinking water and associated health outcomes, and estimated the air pollution burden associated with the use of solid fuels for boiling.

Methods

We conducted cross-sectional surveys and collected drinking water samples from 1,033 rural households in Guangxi and Henan provinces. Temperature sensors affixed to pots and electric kettles were used to corroborate self-reported boiling frequencies and durations, which were used to model household air pollution (HAP) in terms of estimated particulate matter ≤2.5μm in aerodynamic diameter (PM2.5) concentrations.

Results

Based on summer data collection in both provinces, after controlling for covariates, boiling with electric kettles was associated with the largest log reduction in thermotolerant coliforms (TTCs) (-0.66 log10 TTC most probable number/100mL), followed by boiling with pots (-0.58), and bottled water use (-0.39); all were statistically significant (p<0.001). Boiling with electric kettles was associated with a reduced risk of TTC contamination [risk ratio (RR)=0.25, p<0.001] and reported diarrhea (RR=0.80, p=0.672). TTCs were detected in 51% (n=136) of bottled water samples. For households boiling with biomass, modeled PM2.5 concentrations averaged 79 μg/m3 (standard deviation=21).

Discussion

Our findings suggest that where boiling is already common and electricity access is widespread, the promotion of electricity-based boiling may represent a pragmatic stop-gap means of expanding safe water access until centralized, or decentralized, treated drinking water is available; displacing biomass use for water boiling could also reduce HAP concentrations and exposures. Our results also highlight the risks of increasing bottled water use in rural areas, and its potential to displace other sources of safe drinking water, which could in turn hamper efforts in China and other LMICs toward universal and affordable safe water access. https://doi.org/10.1289/EHP7124.",2020-12-04 +25414358,DB-AT: a 2015 update to the Full-parasites database brings a multitude of new transcriptomic data for apicomplexan parasites.,"The previous release of our Full-parasites database (http://fullmal.hgc.jp/) brought enhanced functionality, an expanded full-length cDNA content, and new RNA-Seq datasets from several important apicomplexan parasites. The 2015 update witnesses the major shift in the databases content with focus on diverse transcriptomes of the apicomplexan parasites. The content of the database was substantially enriched with transcriptome information for new apicomplexan parasites. The latest version covers a total of 17 species, with addition of our newly generated RNA-Seq data of a total of 909,150,388 tags. Moreover, we have generated and included two novel and unique datasets, which represent diverse nature of transcriptomes in individual parasites in vivo and in vitro. One is the data collected from 116 Indonesian patients infected with Plasmodium falciparum. The other is a series of transcriptome data collected from a total of 38 single cells of P. falciparum cultured in vitro. We believe that with the recent advances our database becomes an even better resource and a unique platform in the analysis of apicomplexan parasites and their interaction with their hosts. To adequately reflect the recent modifications and the current content we have changed the database name to DB-AT--DataBase of Apicomplexa Transcriptomes.",2014-11-20 +32783783,Longitudinal Growth in Intelligibility of Connected Speech From 2 to 8 Years in Children With Cerebral Palsy: A Novel Bayesian Approach.,"Aim The aim of the study was to examine longitudinal growth in intelligibility in connected speech from 2 to 8 years of age in children with cerebral palsy. Method Sixty-five children with cerebral palsy participated in the longitudinal study. Children were classified into speech-language profile groups using age-4 data: no speech motor impairment (SMI), SMI with typical language comprehension, and SMI with impaired language comprehension. We fit a Bayesian nonlinear mixed-effects model of intelligibility growth at the child and group levels. We compared groups by age of steepest growth, maximum growth rate, and predicted intelligibility at 8 years of age. Results The no SMI group showed earlier and steeper intelligibility growth and higher average outcomes compared to the SMI groups. The SMI groups had more variable growth trajectories, but the SMI with typical language comprehension group had higher age-8 outcomes and steeper rates of maximum growth than the SMI with impaired language comprehension group. Language comprehension impairment at age of 4 years predicted lower intelligibility outcomes at age of 8 years, compared to typical language at age of 4 years. Interpretation Children with SMI at age of 4 years show highly variable intelligibility growth trajectories, and comorbid language comprehension impairment predicts lower intelligibility outcomes. Supplemental Material https://doi.org/10.23641/asha.12777659.",2020-08-12 +,Molecular phylogeny and higher systematics of the metalmark butterflies (Lepidoptera: Riodinidae),"Riodinidae is a highly diverse butterfly family with the majority of its genera restricted to the Neotropics and, despite previous efforts, its higher systematics remains unresolved. Here, we propose a novel phylogenetic hypothesis, based on a comprehensive sample of riodinids, primarily from the Neotropics, covering 67% of all genera and all of the major lineages. We sequenced nine molecular markers and estimated resulting phylogenies with maximum likelihood and Bayesian approaches, using both timed trees and time‐independent trees. We based calibration on three fossil Riodinidae, and reassessed the position of the oldest fossil. We also incorporated 52 samples from a previous study providing a comprehensive maximum likelihood tree for 304 species comprising 80% of all genera. We propose a new higher classification of the Riodinidae with two subfamilies: the Nemeobiinae, including the Old World riodinids and their Neotropical sister Euselasia Hübner; and the Riodininae, comprising all remaining genera. We divided Riodininae into nine tribes (including four new tribes: Calydnini Seraphim, Freitas & Kaminski trib.n.; Sertaniini Seraphim, Freitas & Kaminski trib.n.; Dianesiini Seraphim, Freitas & Kaminski trib.n.; and Emesidini Seraphim, Freitas & Kaminski trib.n.), with Mesosemiini and Nymphidiini further subdivided into two and seven subtribes (including three new subtribes for Nymphidiini: Zabuellina Seraphim, Freitas & Kaminski subtrib.n.; Pachythonina Seraphim, Freitas & Kaminski subtrib.n.; and Pandemina Seraphim, Freitas & Kaminski subtrib.n.). Although our phylogenetic hypotheses are generally congruent with the analyses by Espeland et al. (2015), the comprehensive taxon sampling employed here constitutes a large step towards a stable tribal‐level classification. All taxonomic changes are summarized in a checklist. Despite most genera being restricted to tropical South and Central America, the oldest known fossil of Riodininae belongs to the Green River formation (42.6–50.2 Ma) in North America. Accordingly, we reassess the family's crown age at 56 Ma (52.4–60.7 Ma), which is at variance with previous dating using secondary calibrations and a different subset of genes. Nonmonophyletic riodinid genera are ubiquitous, and several groups need further revision, including groups revised recently. Our results point to the need for integrative taxonomy, as adult morphology seems to be have been exhausted as a single data source in this family. This published work has been registered on Zoobank, http://zoobank.org/urn:lsid:zoobank.org:pub:BCA2DDC5‐753B‐4178‐8E7B‐B7ED1E995CE8.",2018-04-01 +33007163,Norm-Referenced Language Test Selection Practices for Elementary School Children With Suspected Developmental Language Disorder.,"Purpose Standardized norm-referenced tests are an important aspect of language assessment for school-age children. This study explored the language test selection practices of school-based speech-language pathologists (SLPs) working with elementary school children suspected of having developmental language disorder. Specifically, we investigated which tests were most commonly selected as clinicians' first-choice and follow-up tests, which factors impacted their test selection decisions, and what sources of information they used to determine the psychometric quality of tests. Method School-based SLPs completed a web-based questionnaire regarding their use of norm-referenced language tests. A total of 370 elementary school SLPs completed the questionnaire. Results The vast majority of participants indicated that omnibus language tests are their first choice of test. For follow-up tests, participants selected semantics tests, especially single-word vocabulary tests, significantly more often than tests of pragmatics, processing skills, and morphology/syntax. Participants identified multiple factors as affecting test selection, including availability, familiarity, psychometric features, and others. Although more SLPs reported using data-based than subjective sources of information to judge the psychometric quality of tests, a substantial proportion reported that they relied on subjective sources. Conclusions Clinicians have a strong preference for using omnibus language tests. Follow-up test selection does not appear to align with the language difficulties most associated with developmental language disorder. The substantial use of subjective information about psychometric qualities of tests suggests that many SLPs may not attend to the technical meanings of terms such as validity, reliability, and diagnostic accuracy. These results indicate a need for improvement in evidence-based language assessment practices. Supplemental Material https://doi.org/10.23641/asha.13022471.",2020-10-02 +32862486,Factors Related to the Clinical Competence of Registered Nurses: Systematic Review and Meta-Analysis.,"

Purpose

To determine the factors associated with the clinical competence of registered nurses.

Methods

Systematic review and meta-analysis was used. The search strategy was limited to 10 years, ranging from January 2009 to December 2019, in Science Direct, the Cumulative Index to Nursing and Allied Health Literature (CINAHL), PubMed, ProQuest, and Google Scholar. A meta-analysis was performed using R Studio with the metafor package (Boston, MA, USA; https://rstudio.com/products/rstudio/older-versions/).

Results

A total of 22 studies were included, representing 33,961 nurses. There were 28 factors associated with clinical competence. Of those, 13 factors were significantly supported and included for meta-analysis, grouped into (a) individual-related factors, (b) job satisfaction, (c) bullying, (d) burnout, and (d) specific knowledge. The effect size of those factors ranged from -0.14 to 0.50.

Conclusions

Among individual-related factors, salary has the largest effect size on competence and is considered important. Clinical competence is positively affected by job satisfaction, but negatively influenced by bullying and burnout. Although specific knowledge has a large effect size, it does not significantly affect the clinical competence of registered nurses.

Clinical relevance

It is critical to understand factors related to the clinical competence of registered nurses to maintain quality care and patient outcomes in clinical settings. The findings serve as data to help nurse managers find effective ways to improve the knowledge, skill, attitudes, and performance of registered nurses.",2020-08-30 +31930403,The Protein Imager: a full-featured online molecular viewer interface with server-side HQ-rendering capabilities.,"SUMMARY:Molecular viewers' long learning curve is hindering researchers in approaching the field of structural biology for the first time. Herein, we present 'The Protein Imager', a lightweight, powerful and easy-to-use interface as a next-gen online molecular viewer. Furthermore, the interface is linked to an automated server-side rendering system able to generate publication-quality molecular illustrations. The Protein Imager interface has been designed for easy usage for beginners and experts in the field alike. The interface allows the preparation of very complex molecular views maintaining a high level of responsiveness even on mobile devices. AVAILABILITY AND IMPLEMENTATION:The Protein Imager interface is freely available online at https://3dproteinimaging.com/protein-imager. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +29040613,DNA Data Bank of Japan: 30th anniversary.,"The DNA Data Bank of Japan (DDBJ) Center (http://www.ddbj.nig.ac.jp) has been providing public data services for 30 years since 1987. We are collecting nucleotide sequence data and associated biological information from researchers as a member of the International Nucleotide Sequence Database Collaboration (INSDC), in collaboration with the US National Center for Biotechnology Information and the European Bioinformatics Institute. The DDBJ Center also services the Japanese Genotype-phenotype Archive (JGA) with the National Bioscience Database Center to collect genotype and phenotype data of human individuals. Here, we outline our database activities for INSDC and JGA over the past year, and introduce submission, retrieval and analysis services running on our supercomputer system and their recent developments. Furthermore, we highlight our responses to the amended Japanese rules for the protection of personal information and the launch of the DDBJ Group Cloud service for sharing pre-publication data among research groups.",2018-01-01 +31904818,LiPLike: towards gene regulatory network predictions of high certainty.,"MOTIVATION:High correlation in expression between regulatory elements is a persistent obstacle for the reverse-engineering of gene regulatory networks. If two potential regulators have matching expression patterns, it becomes challenging to differentiate between them, thus increasing the risk of false positive identifications. RESULTS:To allow for gene regulation predictions of high confidence, we propose a novel method, the Linear Profile Likelihood (LiPLike), that assumes a regression model and iteratively searches for interactions that cannot be replaced by a linear combination of other predictors. To compare the performance of LiPLike with other available inference methods, we benchmarked LiPLike using three independent datasets from the Dialogue on Reverse Engineering Assessment and Methods 5 (DREAM5) network inference challenge. We found that LiPLike could be used to stratify predictions of other inference tools, and when applied to the predictions of DREAM5 participants, we observed an average improvement in accuracy of >140% compared to individual methods. Furthermore, LiPLike was able to independently predict networks better than all DREAM5 participants when applied to biological data. When predicting the Escherichia coli network, LiPLike had an accuracy of 0.38 for the top-ranked 100 interactions, whereas the corresponding DREAM5 consensus model yielded an accuracy of 0.11. AVAILABILITY AND IMPLEMENTATION:We made LiPLike available to the community as a Python toolbox, available at https://gitlab.com/Gustafsson-lab/liplike. We believe that LiPLike will be used for high confidence predictions in studies where individual model interactions are of high importance, and to remove false positive predictions made by other state-of-the-art gene-gene regulation prediction tools. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-04-01 +33076619,[Hereditary protein S deficiency: survey results from a Chinese pedigree].,"Objective: To investigate the clinical characteristics and gene mutation, and analyze the association between genotype and phenotype of hereditary protein S deficiency in a Chinese pedigree. Methods: Hereditary protein S deficiency was diagnosed in January 2016 in our hospital. A total of 26 family members were surveyed in this study. Blood samples and clinical data were collected from them, and mutations were identified by Sanger sequencing. Pathogenicity of gene mutations was predicted by protein function prediction software including SIFT, PolyPhen_2, nsSNPAnalyzer and MutPred2. Swiss Model (https://swissmodel.expasy.org/) was used to perform homology modeling of the tertiary structure of the protein S wild-type and mutant-type, and observe the impact of gene mutation on the tertiary structure of the protein. Results: Four out of 26 family members of 4 generations were clinically diagnosed with hereditary protein S deficiency. The proband presented with recurrent pulmonary embolism and venous thromboembolism of the lower extremities, and her uncle and mother had a history of venous thromboembolism. Sequencing revealed a mutation in the c.200A>C gene in the second exon of the PROS1 gene of proband and part of her families (Ⅱ2, Ⅱ6, Ⅲ4, Ⅳ2). The prediction results of this gene mutation performed by SIFT, PolyPhen_2, nsSNPAnalyzer, MutPred2 were all harmful. The results of Swiss-Model homology modeling showed that the 67th amino acid was mutated from glutamic acid to alanine because of this gene mutation. Conclusion: A gene mutation cDNA (c. 200A>T) is identified in a Chinese pedigree with hereditary protein S deficiency. This gene mutation may reduce protein S activity, which may cause recurrent pulmonary embolism and venous thromboembolism of the patients.",2020-10-01 +31926012,LigRMSD: a web server for automatic structure matching and RMSD calculations among identical and similar compounds in protein-ligand docking.,"MOTIVATION:Root mean square deviation (RMSD) is one of the most useful and straightforward features for structural comparison between different conformations of the same molecule. Commonly, protein-ligand docking programs have included some utilities that allow the calculation of this value; however, they only work efficiently when exists a complete atom label equivalence between the evaluated conformations. RESULTS:We present LigRMSD, a free web-server for the automatic matching and RMSD calculations among identical or similar chemical compounds. This server allows the user to submit only a pair of identical or similar molecules or dataset of similar compounds to compare their three-dimensional conformations. AVAILABILITY AND IMPLEMENTATION:LigRMSD can be freely accessed at https://ligrmsd.appsbio.utalca.cl. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +27336457,Using the Semantic Web for Rapid Integration of WikiPathways with Other Biological Online Data Resources.,"The diversity of online resources storing biological data in different formats provides a challenge for bioinformaticians to integrate and analyse their biological data. The semantic web provides a standard to facilitate knowledge integration using statements built as triples describing a relation between two objects. WikiPathways, an online collaborative pathway resource, is now available in the semantic web through a SPARQL endpoint at http://sparql.wikipathways.org. Having biological pathways in the semantic web allows rapid integration with data from other resources that contain information about elements present in pathways using SPARQL queries. In order to convert WikiPathways content into meaningful triples we developed two new vocabularies that capture the graphical representation and the pathway logic, respectively. Each gene, protein, and metabolite in a given pathway is defined with a standard set of identifiers to support linking to several other biological resources in the semantic web. WikiPathways triples were loaded into the Open PHACTS discovery platform and are available through its Web API (https://dev.openphacts.org/docs) to be used in various tools for drug development. We combined various semantic web resources with the newly converted WikiPathways content using a variety of SPARQL query types and third-party resources, such as the Open PHACTS API. The ability to use pathway information to form new links across diverse biological data highlights the utility of integrating WikiPathways in the semantic web.",2016-06-23 +29917040,MPD: a pathogen genome and metagenome database. ,"Advances in high-throughput sequencing have led to unprecedented growth in the amount of available genome sequencing data, especially for bacterial genomes, which has been accompanied by a challenge for the storage and management of such huge datasets. To facilitate bacterial research and related studies, we have developed the Mypathogen database (MPD), which provides access to users for searching, downloading, storing and sharing bacterial genomics data. The MPD represents the first pathogenic database for microbial genomes and metagenomes, and currently covers pathogenic microbial genomes (6604 genera, 11 071 species, 41 906 strains) and metagenomic data from host, air, water and other sources (28 816 samples). The MPD also functions as a management system for statistical and storage data that can be used by different organizations, thereby facilitating data sharing among different organizations and research groups. A user-friendly local client tool is provided to maintain the steady transmission of big sequencing data. The MPD is a useful tool for analysis and management in genomic research, especially for clinical Centers for Disease Control and epidemiological studies, and is expected to contribute to advancing knowledge on pathogenic bacteria genomes and metagenomes.Database URL: http://data.mypathogen.org.",2018-01-01 +33057382,"Before and after case reporting: A comparison of the knowledge, attitude and practices of the Jordanian population towards COVID-19.","Coronavirus disease- 2019 (COVID-19) is an emerging contagious infectious disease. It is pandemic and has affected more than 21 million people and resulted in more than 750,000 deaths worldwide (https://www.worldometers.info/coronavirus/#countries; 14/08/20). Our research group initiated a study to ascertain the knowledge, attitude and practices (KAP) of Jordanians toward COVID-19 prior to any initial case report in Jordan. This project was underway when the first Jordanian case was reported. We extended our study to identify how case reporting would alter public KAP towards COVID-19. This cross-sectional study randomly selected and recruited 2104 Jordanian adults. A four-section questionnaire was devised to address the sociodemographic characteristics of the subjects and their KAP toward COVID-19. The mean knowledge score for the study population was 15.9 ± 2.2 (out of the 20 knowledge questions), with 60.9% of the participants having good knowledge about COVID-19. Participants' practices to prevent transmission of COVID-19 were adequate in more than 60% of participants. Most participants had positive attitudes regarding their role in preventing COVID-19 and many of the participants' attitudes and practices changed to more appropriate ones after reporting the first case of COVID-19 in Jordan. The percentage of participants who trust the government in confronting COVID-19 increased significantly (p value < 0.001). However, one alarming and unexpected finding was that the prevention practice score of participants working in the medical field was similar to those from the general population. This may necessitate stricter training and guidelines for this group who will be in the frontline in combating the disease. Impact of this study: The data generated from this study shows that when cases of disease were reported, the public's attitudes and practices improved in many aspects, and that confidence in the government to contain the disease was boosted. We believe that this study is important in allowing other, international governments to develop an understanding of public KAP during pandemic disease outbreaks.",2020-10-15 +32299465,Glioblastoma hijacks microglial gene expression to support tumor growth.,"

Background

Glioblastomas are the most common and lethal primary brain tumors. Microglia, the resident immune cells of the brain, survey their environment and respond to pathogens, toxins, and tumors. Glioblastoma cells communicate with microglia, in part by releasing extracellular vesicles (EVs). Despite the presence of large numbers of microglia in glioblastoma, the tumors continue to grow, and these neuroimmune cells appear incapable of keeping the tumor in check. To understand this process, we analyzed gene expression in microglia interacting with glioblastoma cells.

Methods

We used RNASeq of isolated microglia to analyze the expression patterns of genes involved in key microglial functions in mice with glioblastoma. We focused on microglia that had taken up tumor-derived EVs and therefore were within and immediately adjacent to the tumor.

Results

We show that these microglia have downregulated expression of genes involved in sensing tumor cells and tumor-derived danger signals, as well as genes used for tumor killing. In contrast, expression of genes involved in facilitating tumor spread was upregulated. These changes appear to be in part EV-mediated, since intracranial injection of EVs in normal mice led to similar transcriptional changes in microglia. We observed a similar microglial transcriptomic signature when we analyzed datasets from human patients with glioblastoma.

Conclusion

Our data define a microgliaGlioblastoma specific phenotype, whereby glioblastomas have hijacked gene expression in the neuroimmune system to favor avoiding tumor sensing, suppressing the immune response, clearing a path for invasion, and enhancing tumor propagation. For further exploration, we developed an interactive online tool at http://www.glioma-microglia.com with all expression data and additional functional and pathway information for each gene.",2020-04-16 +29155944,Molecular Interaction Search Tool (MIST): an integrated resource for mining gene and protein interaction data.,"Model organism and human databases are rich with information about genetic and physical interactions. These data can be used to interpret and guide the analysis of results from new studies and develop new hypotheses. Here, we report the development of the Molecular Interaction Search Tool (MIST; http://fgrtools.hms.harvard.edu/MIST/). The MIST database integrates biological interaction data from yeast, nematode, fly, zebrafish, frog, rat and mouse model systems, as well as human. For individual or short gene lists, the MIST user interface can be used to identify interacting partners based on protein-protein and genetic interaction (GI) data from the species of interest as well as inferred interactions, known as interologs, and to view a corresponding network. The data, interologs and search tools at MIST are also useful for analyzing 'omics datasets. In addition to describing the integrated database, we also demonstrate how MIST can be used to identify an appropriate cut-off value that balances false positive and negative discovery, and present use-cases for additional types of analysis. Altogether, the MIST database and search tools support visualization and navigation of existing protein and GI data, as well as comparison of new and existing data.",2018-01-01 +33054421,Comparison of the accuracy of 11 intraocular lens power calculation formulas.,"

Purpose

To compare the accuracy of 11 intraocular lens (IOL) power calculation formulas (SRK-T, Hoffer Q, Holladay I, Haigis, Holladay II, Olsen, Barrett Universal II, Hill-RBF, Ladas Super formula, EVO and Kane).

Setting

Private university hospital (QuironSalud, Madrid, Spain).

Design

Retrospective case series.

Methods

Data were compiled from 481 eyes of 481 patients who had undergone uneventful cataract surgery with IOL insertion. Preoperative biometric measurements were made using an IOL Master® 700. Respective ULIB IOL constants (http://ocusoft.de/ulib/c1.htm) for each of 4 IOL models implanted were used to calculate the predictive refractive outcome for each formula. This was compared with the actual refractive outcome determined 3 months postoperatively. The primary outcome was mean absolute prediction error (MAE). The study sample was divided according to axial length (AL) into three groups of eyes: short (⩽22.00 mm), normal (22.00-25.00 mm) and long (⩾25.00 mm).

Results

The Barrett Universal II and Haigis formulas yielded the lowest MAEs over the entire AL range (p < .01, except EVO) as well as in the long (p < .01, all formulas) and normal (p < .01, except Haigis, Holladay II, Olsen and LSF) eyes. In the short eyes, the lower MAEs were provided by Haigis and EVO (p < .01 except Hoffer Q, SRK/T and Holladay I).

Conclusions

Barrett Universal II was the most accurate for IOL power calculation in the normal and long eyes. For short eyes, the formulas Haigis and EVO seem best at predicting refractive outcomes.",2020-10-15 +27158452,A curated compendium of monocyte transcriptome datasets of relevance to human monocyte immunobiology research.,"Systems-scale profiling approaches have become widely used in translational research settings. The resulting accumulation of large-scale datasets in public repositories represents a critical opportunity to promote insight and foster knowledge discovery. However, resources that can serve as an interface between biomedical researchers and such vast and heterogeneous dataset collections are needed in order to fulfill this potential. Recently, we have developed an interactive data browsing and visualization web application, the Gene Expression Browser (GXB). This tool can be used to overlay deep molecular phenotyping data with rich contextual information about analytes, samples and studies along with ancillary clinical or immunological profiling data. In this note, we describe a curated compendium of 93 public datasets generated in the context of human monocyte immunological studies, representing a total of 4,516 transcriptome profiles. Datasets were uploaded to an instance of GXB along with study description and sample annotations. Study samples were arranged in different groups. Ranked gene lists were generated based on relevant group comparisons. This resource is publicly available online at http://monocyte.gxbsidra.org/dm3/landing.gsp.",2016-04-25 +32773770,Considerations in assessing germline variant pathogenicity using cosegregation analysis.,"

Purpose

The American College of Medical Genetics and Genomics (ACMG) and the Association for Molecular Pathology (AMP) have developed guidelines for classifying germline variants as pathogenic or benign to interpret genetic testing results. Cosegregation analysis is an important component of the guidelines. There are two main approaches for cosegregation analysis: meiosis counting and Bayes factor-based quantitative methods. Of these, the ACMG/AMP guidelines employ only meiosis counting. The accuracy of either approach has not been sufficiently addressed in previous works.

Methods

We analyzed hypothetical, simulated, and real-life data to evaluate the accuracy of each approach for cancer-associated genes.

Results

We demonstrate that meiosis counting can provide incorrect classifications when the underlying genetic basis of the disease departs from simple Mendelian situations. Some Bayes factor approaches are currently implemented with inappropriate penetrance. We propose an improved penetrance model and describe several critical considerations, including the accuracy of cosegregation for moderate-risk genes and the impact of pleiotropy, population, and birth year. We highlight a webserver, COOL (Co-segregation Online, http://BJFengLab.org/ ), that implements an accurate Bayes factor cosegregation analysis.

Conclusion

An appropriate penetrance model improves the accuracy of Bayes factor cosegregation analysis for high-penetrant variants, and is a better choice than meiosis counting whenever feasible.",2020-08-10 +30341246,Prognostic values of F-box members in breast cancer: an online database analysis and literature review. ,"Introduction: F-box proteins are the substrate-recognizing subunits of SKP1 (S-phase kinase-associated protein 1)-cullin1-F-box protein (SCF) E3 ligase complexes that play pivotal roles in multiple cellular processes, including cell proliferation, apoptosis, angiogenesis, invasion, and metastasis. Dysregulation of F-box proteins may lead to an unbalanced proteolysis of numerous protein substrates, contributing to progression of human malignancies. However, the prognostic values of F-box members, especially at mRNA levels, in breast cancer (BC) are elusive. Methods: An online database, which is constructed based on the gene expression data and survival information downloaded from GEO (http://www.ncbi.nlm.nih.gov/geo/), was used to investigate the prognostic values of 15 members of F-box mRNA expression in BC. Results: We found that higher mRNA expression levels of FBXO1, FBXO31, SKP2, and FBXO5 were significantly associated with worse prognosis for BC patients. While FBXO4 and β-TrCP1 were found to be correlated to better overall survival (OS). Conclusion: The associated results provide new insights into F-box members in the development and progression of BC. Further researches to explore the F-box protein-targetting reagents for treating BC are needed.",2019-01-03 +32290787,Transcriptional response to a Mediterranean diet intervention exerts a modulatory effect on neuroinflammation signaling pathway.,"Background: The Traditional Mediterranean Diet (TMD) is known to have beneficial effects on several chronic diseases. However, data concerning the whole transcriptome modulation of the TMD are scarce.Objective: We aimed to explore the effects of the TMD on the whole transcriptome of individuals at high cardiovascular risk.Methods: Thirty-four participants at high cardiovascular risk were randomly assigned to a TMD enriched with extra-virgin olive oil (TMD + VOO), mixed nuts (TMD + Nuts), or a control diet based on low-fat diet recommendations. A microarray analysis in circulating peripheral blood mononuclear cells of the participants was conducted before and after 3 months of the intervention. The association of changes in gene expression was modeled into canonical pathways by conducting an untargeted functional analysis with the Ingenuity Pathway Analysis® (IPA). Effects were considered significant when the absolute z-score values were ≥2.0 and the logarithm P (adjusted by the Benjamini-Hochberg procedure [BH]) values were ≥1.30.Results: According to IPA, interventions with TMD + Nuts, TMD + VOO, and control diet downregulated neuroinflammation, triggering receptor expressed on myeloid cells 1 , and cholecystokinin/gastrin-mediated signaling pathways, respectively. The gene expression among these pathways included cytokines, T-cell activation receptors, nuclear factor kappa β/inflammasome components, pro-inflammatory enzymes and cell cycle regulators.Conclusion: The current findings suggest that the TMD enriched with mixed nuts or VOO downregulate transcriptomic pathways, including those related to neuroinflammation, which could influence development of neurodegenerative diseases. Our data should be corroborated in other tissue cells, such as neurons and glial cells. The PREDIMED trial was registered at https://www.controlled-trials.com (ISRCTN35739639).",2020-04-15 +32610179,Early behavioral markers for neurodevelopmental disorders in the first 3 years of life: An overview of systematic reviews.,"Being able to recognize red flags for neurodevelopmental disorders (NDD) is crucial to provide timely intervention programs. This work aims to support - within a scientific framework - the construction of an instrument capable to early detect all spectrum of NDD and explore all areas of development, detect failures in typical developmental pathways and point out atypical signs at all ages. This overview of reviews provides evidence for differences in children later diagnosed with NDD compared to typically developing peers such as delays in motor, language development and temperament in the first three years of age, repetitive/stereotyped behaviors, atypicalities/delays in play, object use, attention, visual, sensory processing and social engagement in the first and second year, and difficulties in feeding and sleeping in the first year. These behaviors must be carefully observed as potential red flags for NDD. However, data of the systematic reviews are not yet useful to develop an evidence-based clinical screening. It urges to increase efforts in producing systematic reviews on early behavioral markers for each NDD. Trial registration:CRD42019137731. (https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=137731).",2020-06-28 +29619235,"iMETHYL: an integrative database of human DNA methylation, gene expression, and genomic variation.","We launched an integrative multi-omics database, iMETHYL (http://imethyl.iwate-megabank.org). iMETHYL provides whole-DNA methylation (~24 million autosomal CpG sites), whole-genome (~9 million single-nucleotide variants), and whole-transcriptome (>14 000 genes) data for CD4+ T-lymphocytes, monocytes, and neutrophils collected from approximately 100 subjects. These data were obtained from whole-genome bisulfite sequencing, whole-genome sequencing, and whole-transcriptome sequencing, making iMETHYL a comprehensive database.",2018-03-29 +26734381,Preconception counselling resource for women with diabetes. ,"Women with diabetes need to plan for pregnancy if they are to reduce their risk of poor pregnancy outcome. While care providers have focused on setting up specialist pre-pregnancy planning clinics to help women prepare for pregnancy, the majority of women do not attend, entering pregnancy unprepared. A major barrier to accessing this care, and a consequence of poor preconception counselling, is a lack of knowledge as to the need to plan and the reasons why. This project addressed an urgent need to raise awareness of the importance of planning for pregnancy among women with diabetes and among the healthcare professionals (HCPs) caring for them. Focus groups with the target groups informed the development of a preconception counselling resource for women with diabetes. Originally produced as a DVD (Diabetes UK funding), this resource has been embedded in routine care in Northern Ireland (NI) since 2010. A subsequent service evaluation of pregnancy planning indicators undertaken across all five antenatal-metabolic clinics in NI indicated that women who viewed the resource were better prepared for pregnancy. In order to increase the positive impact of the resource and to ensure longer term sustainability the DVD was converted to a website, http://www.womenwithdiabetes.net (Public Health Agency NI funding). The evaluation also highlighted that women with type 2 diabetes were a hard to reach group. As these women are often cared for outside of specialist clinics, it is pertinent that all HCPs caring for women with diabetes are aware of the importance of preconception counselling. Funding also supported the development of an e-learning continuing professional development (CPD) resource within the website. The e-learning resource has since been embedded into existing CPD programmes and is an important tool to ensure that all HCPs caring for women with diabetes are empowered to provide preconception counselling at every opportunity.",2015-10-12 +,A molecular systematic analysis of the Neotropical banner winged damselflies (Polythoridae: Odonata),"The Neotropics are a hotspot of global diversity for many groups of organisms, including the dragonflies and damselflies (Insecta: Odonata). While the number of biodiversity surveys and new species descriptions for Neotropical odonates is increasing, diversity in this region is still under‐explored, and very few studies have looked at the genetic and morphological diversity among (and within) species. Here, we present an overview of the evolutionary history of the Neotropical damselfly family Polythoridae. The family comprises 57 species across seven genera: Chalcopteryx Selys, Chalcothore De Marmels, Cora Selys, Euthore Selys, Miocora Calvert, Polythore Calvert and Stenocora Kennedy. Using a multi‐locus approach, mitochondrial (COI, ND1, 16S) and nuclear (18S, 28S, EF1‐alpha) genes were concatenated to estimate phylogenetic relationships. Our results support five monophyletic clades, which were not always congruent with the genera previously considered to be monophyletic. Only Polythore was recovered as monophyletic, and within it there was geographical structure. We propose the following new genus‐level classification: Chalcothore, Chalcopteryx, Cora s.s., Cora s.l., Miocora, Euthore s.l and Polythore. In addition, we proposed the following new combinations: Miocora aurea comb.n., Miocora chirripa comb.n., Euthore confusa comb.n., Euthore klenei comb.n., and Euthore terminalis comb.n., based on our phylogenetic analyses, our evaluation of morphological characters and their geographical distribution: these data each support the monophyletic entities we recover here. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:E9E10CD8‐6A04‐4F2E‐A632‐1B998BAFB193.",2018-01-01 +30407550,"Human Disease Ontology 2018 update: classification, content and workflow expansion.","The Human Disease Ontology (DO) (http://www.disease-ontology.org), database has undergone significant expansion in the past three years. The DO disease classification includes specific formal semantic rules to express meaningful disease models and has expanded from a single asserted classification to include multiple-inferred mechanistic disease classifications, thus providing novel perspectives on related diseases. Expansion of disease terms, alternative anatomy, cell type and genetic disease classifications and workflow automation highlight the updates for the DO since 2015. The enhanced breadth and depth of the DO's knowledgebase has expanded the DO's utility for exploring the multi-etiology of human disease, thus improving the capture and communication of health-related data across biomedical databases, bioinformatics tools, genomic and cancer resources and demonstrated by a 6.6× growth in DO's user community since 2015. The DO's continual integration of human disease knowledge, evidenced by the more than 200 SVN/GitHub releases/revisions, since previously reported in our DO 2015 NAR paper, includes the addition of 2650 new disease terms, a 30% increase of textual definitions, and an expanding suite of disease classification hierarchies constructed through defined logical axioms.",2019-01-01 +30357367,CORUM: the comprehensive resource of mammalian protein complexes-2019.,"CORUM is a database that provides a manually curated repository of experimentally characterized protein complexes from mammalian organisms, mainly human (67%), mouse (15%) and rat (10%). Given the vital functions of these macromolecular machines, their identification and functional characterization is foundational to our understanding of normal and disease biology. The new CORUM 3.0 release encompasses 4274 protein complexes offering the largest and most comprehensive publicly available dataset of mammalian protein complexes. The CORUM dataset is built from 4473 different genes, representing 22% of the protein coding genes in humans. Protein complexes are described by a protein complex name, subunit composition, cellular functions as well as the literature references. Information about stoichiometry of subunits depends on availability of experimental data. Recent developments include a graphical tool displaying known interactions between subunits. This allows the prediction of structural interconnections within protein complexes of unknown structure. In addition, we present a set of 58 protein complexes with alternatively spliced subunits. Those were found to affect cellular functions such as regulation of apoptotic activity, protein complex assembly or define cellular localization. CORUM is freely accessible at http://mips.helmholtz-muenchen.de/corum/.",2019-01-01 +30335161,EVmiRNA: a database of miRNA profiling in extracellular vesicles.,"Extracellular vesicles (EVs), such as exosomes and microvesicles, acted as cell-to-cell communication vectors and potential biomarkers for diseases. microRNAs (miRNAs) are the most well studied molecules in EVs, thus a comprehensive investigation of miRNA expression profiles in EVs will be helpful to explore their functions and biomarkers. We curated 462 small RNA sequencing samples of EVs from 17 sources/diseases and constructed the EVmiRNA database (http://bioinfo.life.hust.edu.cn/EVmiRNA) to show the miRNA expression profiles. We found >1000 miRNAs expressed in these EVs and detected specific miRNAs for EVs of each source/disease. EVmiRNA provides three functional modules: (i) the miRNA expression profiles and the sample information of EVs from different sources (such as blood, breast milk etc.); (ii) the specifically expressed miRNAs in different EVs that would be helpful for biomarker identification; (iii) the miRNA annotations including the miRNA expression in EVs and TCGA cancer types, miRNA pathway regulations as well as miRNA function and publications. EVmiRNA has a user-friendly web interface with powerful browse and search functions, as well as data downloading. It is the first database focusing on miRNA expression profiles in EVs and will be useful for the research and application community of EV biomarker, miRNA function and liquid biopsy.",2019-01-01 +30304474,Genenames.org: the HGNC and VGNC resources in 2019.,"The HUGO Gene Nomenclature Committee (HGNC) based at EMBL's European Bioinformatics Institute (EMBL-EBI) assigns unique symbols and names to human genes. There are over 40 000 approved gene symbols in our current database of which over 19 000 are for protein-coding genes. The Vertebrate Gene Nomenclature Committee (VGNC) was established in 2016 to assign standardized nomenclature in line with human for vertebrate species that lack their own nomenclature committees. The VGNC initially assigned nomenclature for over 15000 protein-coding genes in chimpanzee. We have extended this process to other vertebrate species, naming over 14000 protein-coding genes in cow and dog and over 13 000 in horse to date. Our HGNC website https://www.genenames.org has undergone a major design update, simplifying the homepage to provide easy access to our search tools and making the site more mobile friendly. Our gene families pages are now known as 'gene groups' and have increased in number to over 1200, with nearly half of all named genes currently assigned to at least one gene group. This article provides an overview of our online data and resources, focusing on our work over the last two years.",2019-01-01 +28578176,Research standardization tools: pregnancy measures in the PhenX Toolkit.,"Only through concerted and well-executed research endeavors can we gain the requisite knowledge to advance pregnancy care and have a positive impact on maternal and newborn health. Yet the heterogeneity inherent in individual studies limits our ability to compare and synthesize study results, thus impeding the capacity to draw meaningful conclusions that can be trusted to inform clinical care. The PhenX Toolkit (http://www.phenxtoolkit.org), supported since 2007 by the National Institutes of Health, is a web-based catalog of standardized protocols for measuring phenotypes and exposures relevant for clinical research. In 2016, a working group of pregnancy experts recommended 15 measures for the PhenX Toolkit that are highly relevant to pregnancy research. The working group followed the established PhenX consensus process to recommend protocols that are broadly validated, well established, nonproprietary, and have a relatively low burden for investigators and participants. The working group considered input from the pregnancy experts and the broader research community and included measures addressing the mode of conception, gestational age, fetal growth assessment, prenatal care, the mode of delivery, gestational diabetes, behavioral and mental health, and environmental exposure biomarkers. These pregnancy measures complement the existing measures for other established domains in the PhenX Toolkit, including reproductive health, anthropometrics, demographic characteristics, and alcohol, tobacco, and other substances. The preceding domains influence a woman's health during pregnancy. For each measure, the PhenX Toolkit includes data dictionaries and data collection worksheets that facilitate incorporation of the protocol into new or existing studies. The measures within the pregnancy domain offer a valuable resource to investigators and clinicians and are well poised to facilitate collaborative pregnancy research with the goal to improve patient care. To achieve this aim, investigators whose work includes the perinatal population are encouraged to utilize the PhenX Toolkit in the design and implementation of their studies, thus potentially reducing heterogeneity in data measures across studies. Such an effort will enhance the overall impact of individual studies, increasing the ability to draw more meaningful conclusions that can then be translated into clinical practice.",2017-05-31 +32540931,A Vibrio cholerae Core Genome Multilocus Sequence Typing Scheme To Facilitate the Epidemiological Study of Cholera. ,"Core genome multilocus sequence typing (cgMLST) has gained popularity in recent years in epidemiological research and subspecies-level classification. cgMLST retains the intuitive nature of traditional MLST but offers much greater resolution by utilizing significantly larger portions of the genome. Here, we introduce a cgMLST scheme for Vibrio cholerae, a bacterium abundant in marine and freshwater environments and the etiologic agent of cholera. A set of 2,443 core genes ubiquitous in V. cholerae were used to analyze a comprehensive data set of 1,262 clinical and environmental strains collected from 52 countries, including 65 newly sequenced genomes in this study. We established a sublineage threshold based on 133 allelic differences that creates clusters nearly identical to traditional MLST types, providing backwards compatibility to new cgMLST classifications. We also defined an outbreak threshold based on seven allelic differences that is capable of identifying strains from the same outbreak and closely related isolates that could give clues on outbreak origin. Using cgMLST, we confirmed the South Asian origin of modern epidemics and identified clustering affinity among sublineages of environmental isolates from the same geographic origin. Advantages of this method are highlighted by direct comparison with existing classification methods, such as MLST and single-nucleotide polymorphism-based methods. cgMLST outperforms all existing methods in terms of resolution, standardization, and ease of use. We anticipate this scheme will serve as a basis for a universally applicable and standardized classification system for V. cholerae research and epidemiological surveillance in the future. This cgMLST scheme is publicly available on PubMLST (https://pubmlst.org/vcholerae/).IMPORTANCE Toxigenic Vibrio cholerae isolates of the O1 and O139 serogroups are the causative agents of cholera, an acute diarrheal disease that plagued the world for centuries, if not millennia. Here, we introduce a core genome multilocus sequence typing scheme for V. cholerae Using this scheme, we have standardized the definition for subspecies-level classification, facilitating global collaboration in the surveillance of V. cholerae In addition, this typing scheme allows for quick identification of outbreak-related isolates that can guide subsequent analyses, serving as an important first step in epidemiological research. This scheme is also easily scalable to analyze thousands of isolates at various levels of resolution, making it an invaluable tool for large-scale ecological and evolutionary analyses.",2020-11-19 +30212133,Identifying an Essential Package for Adolescent Health: Economic Analysis,"Adolescents form a large proportion of the population in many low- and middle-income countries (LMICs)—more than 20 percent in the countries with the fastest-growing populations (WHO 2014). The adolescent period, defined as ages 10 through 19 years, is key to future health because it is during these years that health decisions and habits are formed that have long-term impacts. Adolescents who are enabled to make healthy eating and exercise choices, to adopt healthy sexual behaviors, and to avoid addictive substances and excessive risks have the best opportunities for health in later life. Equally important, some mental health issues are manifested in late adolescence, and early detection is important. Despite the pivotal nature of this age, adolescents until recently have been relatively neglected in international donor strategies for maternal, newborn, and child health. Specific areas where funding is lacking include preventing unsafe abortion and coerced sex, and providing antenatal, childbirth, and postnatal care (iERG 2013). Many adolescents are entitled to appropriate health care under the Convention on the Rights of the Child, but those ages 18 and 19 years are not specifically included. Recent reports and studies seek to bring greater attention to adolescent health needs (Gorna and others 2015; Laski and others 2015; Patton and others 2016; UNICEF 2011, 2012; WHO 2014). Groups such as the International Health Partnership (http://www.internationalhealthpartnership.net) have begun to modify the well-known term RMNCH (Reproductive, Maternal, Newborn, and Child Health) to RMNCAH to include adolescents. The Every Woman Every Child (2015) strategy is titled “The Global Strategy for Women’s, Children’s and Adolescents’ Health 2016–2030” and signals a positive change. It highlights research indicating that the health of women, children, and adolescents is central to the Sustainable Development Goals for 2030. The term youth is mentioned 10 times in the Outcome Declaration of the Sustainable Development Agenda (UN 2015), and the term adolescent is mentioned once in reference to adolescent girls. This chapter provides an overview of methods and examines the economic case for investment in adolescent health by surveying what is known on cost, cost-effectiveness, and cost-benefit ratios of interventions. We then use these economic data to examine the cost of an essential package of health and behavioral interventions that all countries need to provide. The essential package draws on packages developed elsewhere (Every Woman Every Child 2015; Patton and others 2016; WHO 2013). Useful information also comes from costing studies of related packages (Deogan, Ferguson, and Stenberg 2012; Temin and Levine 2009). Countries can modify this package depending on their specific needs and resource availability. Finally, we estimate what such a package might cost in 2012 U.S. dollars and provide brief conclusions. Definitions of age groupings and age-specific terminology used in this volume can be found in chapter 1 (Bundy, de Silva, and others 2017).",2018-09-14 +31545812,Variant analysis pipeline for accurate detection of genomic variants from transcriptome sequencing data.,"The wealth of information deliverable from transcriptome sequencing (RNA-seq) is significant, however current applications for variant detection still remain a challenge due to the complexity of the transcriptome. Given the ability of RNA-seq to reveal active regions of the genome, detection of RNA-seq SNPs can prove valuable in understanding the phenotypic diversity between populations. Thus, we present a novel computational workflow named VAP (Variant Analysis Pipeline) that takes advantage of multiple RNA-seq splice aware aligners to call SNPs in non-human models using RNA-seq data only. We applied VAP to RNA-seq from a highly inbred chicken line and achieved high accuracy when compared with the matching whole genome sequencing (WGS) data. Over 65% of WGS coding variants were identified from RNA-seq. Further, our results discovered SNPs resulting from post transcriptional modifications, such as RNA editing, which may reveal potentially functional variation that would have otherwise been missed in genomic data. Even with the limitation in detecting variants in expressed regions only, our method proves to be a reliable alternative for SNP identification using RNA-seq data. The source code and user manuals are available at https://modupeore.github.io/VAP/.",2019-09-23 +29804395,[Algorithmic analysis of potential drug-drug interactions using direct-acting antiviral agents and concomitant medications in chronic hepatitis C].,Direct acting antiviral agents (DAAs) metabolism and pharmacokinetics of concomitant medications data were extracted and analyzed from the database of Chinese Health Insurance between 2013 and 2015. A potential drug-drug interactions (DDI) were calculated by integration of extracted data and confirmed by using Liverpool website (https: //www.hep-druginteractions.org/). A new algorithm is suggested for management of DDI between DAAs and concomitant medications.,2018-03-01 +26051695,ComiRNet: a web-based system for the analysis of miRNA-gene regulatory networks.,"

Background

The understanding of mechanisms and functions of microRNAs (miRNAs) is fundamental for the study of many biological processes and for the elucidation of the pathogenesis of many human diseases. Technological advances represented by high-throughput technologies, such as microarray and next-generation sequencing, have significantly aided miRNA research in the last decade. Nevertheless, the identification of true miRNA targets and the complete elucidation of the rules governing their functional targeting remain nebulous. Computational tools have been proven to be fundamental for guiding experimental validations for the discovery of new miRNAs, for the identification of their targets and for the elucidation of their regulatory mechanisms.

Description

ComiRNet (Co-clustered miRNA Regulatory Networks) is a web-based database specifically designed to provide biologists and clinicians with user-friendly and effective tools for the study of miRNA-gene target interaction data and for the discovery of miRNA functions and mechanisms. Data in ComiRNet are produced by a combined computational approach based on: 1) a semi-supervised ensemble-based classifier, which learns to combine miRNA-gene target interactions (MTIs) from several prediction algorithms, and 2) the biclustering algorithm HOCCLUS2, which exploits the large set of produced predictions, with the associated probabilities, to identify overlapping and hierarchically organized biclusters that represent miRNA-gene regulatory networks (MGRNs).

Conclusions

ComiRNet represents a valuable resource for elucidating the miRNAs' role in complex biological processes by exploiting data on their putative function in the context of MGRNs. ComiRnet currently stores about 5 million predicted MTIs between 934 human miRNAs and 30,875 mRNAs, as well as 15 bicluster hierarchies, each of which represents MGRNs at different levels of granularity. The database can be freely accessed at: http://comirnet.di.uniba.it.",2015-06-01 +32587353,Prediction of disulfide bond engineering sites using a machine learning method.,"Disulfide bonds are covalently bonded sulfur atoms from cysteine pairs in protein structures. Due to the importance of disulfide bonds in protein folding and structural stability, artificial disulfide bonds are often engineered by cysteine mutation to enhance protein structural stability. To facilitate the experimental design, we implemented a method based on neural networks to predict amino acid pairs for cysteine mutations to form engineered disulfide bonds. The designed neural network was trained with high-resolution structures curated from the Protein Data Bank. The testing results reveal that the proposed method recognizes 99% of natural disulfide bonds. In the test with engineered disulfide bonds, the algorithm achieves similar accuracy levels with other state-of-the-art algorithms in published dataset and better performance for two comprehensively studied proteins with 70% accuracy, demonstrating potential applications in protein engineering. The neural network framework allows exploiting the full features in distance space, and therefore improves accuracy of the disulfide bond engineering site prediction. The source code and a web server are available at http://liulab.csrc.ac.cn/ssbondpre.",2020-06-25 +29578387,Identifying natural compounds as multi-target-directed ligands against Alzheimer's disease: an in silico approach.,"Alzheimer's disease (AD) is a multi-factorial disease, which can be simply outlined as an irreversible and progressive neurodegenerative disorder with an unclear root cause. It is a major cause of dementia in old aged people. In the present study, utilizing the structural and biological activity information of ligands for five important and mostly studied vital targets (i.e. cyclin-dependant kinase 5, β-secretase, monoamine oxidase B, glycogen synthase kinase 3β, acetylcholinesterase) that are believed to be effective against AD, we have developed five classification models using linear discriminant analysis (LDA) technique. Considering the importance of data curation, we have given more attention towards the chemical and biological data curation, which is a difficult task especially in case of big data-sets. Thus, to ease the curation process we have designed Konstanz Information Miner (KNIME) workflows, which are made available at http://teqip.jdvu.ac.in/QSAR_Tools/ . The developed models were appropriately validated based on the predictions for experiment derived data from test sets, as well as true external set compounds including known multi-target compounds. The domain of applicability for each classification model was checked based on a confidence estimation approach. Further, these validated models were employed for screening of natural compounds collected from the InterBioScreen natural database ( https://www.ibscreen.com/natural-compounds ). Further, the natural compounds that were categorized as 'actives' in at least two classification models out of five developed models were considered as multi-target leads, and these compounds were further screened using the drug-like filter, molecular docking technique and then thoroughly analyzed using molecular dynamics studies. Finally, the most potential multi-target natural compounds against AD are suggested.",2018-04-23 +28453653,The Bologna Annotation Resource (BAR 3.0): improving protein functional annotation.,"BAR 3.0 updates our server BAR (Bologna Annotation Resource) for predicting protein structural and functional features from sequence. We increase data volume, query capabilities and information conveyed to the user. The core of BAR 3.0 is a graph-based clustering procedure of UniProtKB sequences, following strict pairwise similarity criteria (sequence identity ≥40% with alignment coverage ≥90%). Each cluster contains the available annotation downloaded from UniProtKB, GO, PFAM and PDB. After statistical validation, GO terms and PFAM domains are cluster-specific and annotate new sequences entering the cluster after satisfying similarity constraints. BAR 3.0 includes 28 869 663 sequences in 1 361 773 clusters, of which 22.2% (22 241 661 sequences) and 47.4% (24 555 055 sequences) have at least one validated GO term and one PFAM domain, respectively. 1.4% of the clusters (36% of all sequences) include PDB structures and the cluster is associated to a hidden Markov model that allows building template-target alignment suitable for structural modeling. Some other 3 399 026 sequences are singletons. BAR 3.0 offers an improved search interface, allowing queries by UniProtKB-accession, Fasta sequence, GO-term, PFAM-domain, organism, PDB and ligand/s. When evaluated on the CAFA2 targets, BAR 3.0 largely outperforms our previous version and scores among state-of-the-art methods. BAR 3.0 is publicly available and accessible at http://bar.biocomp.unibo.it/bar3.",2017-07-01 +27129717,CABRA: Cluster and Annotate Blast Results Algorithm.,"

Background

Basic local alignment search tool (BLAST) searches are frequently used to look for homologous sequences and to annotate a query protein, but the increasing size of protein databases makes it difficult to review all results from a similarity search.

Findings

We developed a web tool called Cluster and Annotate Blast Results Algorithm (CABRA), which enables a rapid BLAST search in a variety of updated reference proteomes, and provides a new way to functionally evaluate the results by the subsequent clustering of the hits and annotation of the clusters. The tool can be accessed from the following web-resource: http://cbdm-01.zdv.uni-mainz.de/~munoz/CABRA .

Conclusions

Cluster and Annotate Blast Results Algorithm simplifies the analysis of the results of a BLAST search by providing an overview of the result's annotations organized in clusters that can be iteratively modified by the user.",2016-04-30 +,A new genus and species of hypodermatine bot flies (Diptera: Oestridae),"The bot fly Gruninomyia mira Szpila & Pape, gen.n., sp.n. is described from Iran, North Khorasan, based on a single adult male and with no larval or host data. The monotypic genus shows a mixture of features otherwise found in either the rodent/lagomorph‐parasitizing oestromyine clade (Oestroderma + Oestromyia) or the artiodactyl‐parasitizing hypodermatine clade (Hypoderma + Pallasiomyia + Pavlovskiata + Przhevalskiana + Strobiloestrus) of subfamily Hypodermatinae. A morphology‐based phylogenetic analysis is marginally in favour of a position of Gruninomyia Szpila & Pape, gen.n. as sister taxon of (Oestroderma + Oestromyia). The COI barcode sequence is provided for the new species, and a phylogenetic analysis based on this marker for Oestridae retrieved from GenBank is in agreement with the conclusions based on morphological data. This published work has been registered in ZooBank: http://zoobank.org/urn:lsid:zoobank.org:pub:8F0CBE07‐4E74‐4186‐B690‐2C97D7ED7DA7.",2017-04-01 +24573881,"AnaLysis of Expression on human chromosome 21, ALE-HSA21: a pilot integrated web resource.","Transcriptome studies have shown the pervasive nature of transcription, demonstrating almost all the genes undergo alternative splicing. Accurately annotating all transcripts of a gene is crucial. It is needed to understand the impact of mutations on phenotypes, to shed light on genetic and epigenetic regulation of mRNAs and more generally to widen our knowledge about cell functionality and tissue diversity. RNA-sequencing (RNA-Seq), and the other applications of the next-generation sequencing, provides precious data to improve annotations' accuracy, simultaneously creating issues related to the variety, complexity and the size of produced data. In this 'scenario', the lack of user-friendly resources, easily accessible to researchers with low skills in bioinformatics, makes difficult to retrieve complete information about one or few genes without browsing a jungle of databases. Concordantly, the increasing amount of data from 'omics' technologies imposes to develop integrated databases merging different data formats coming from distinct but complementary sources. In light of these considerations, and given the wide interest in studying Down syndrome-a genetic condition due to the trisomy of human chromosome 21 (HSA21)-we developed an integrated relational database and a web interface, named ALE-HSA21 (AnaLysis of Expression on HSA21), accessible at http://bioinfo.na.iac.cnr.it/ALE-HSA21. This comprehensive and user-friendly web resource integrates-for all coding and noncoding transcripts of chromosome 21-existing gene annotations and transcripts identified de novo through RNA-Seq analysis with predictive computational analysis of regulatory sequences. Given the role of noncoding RNAs and untranslated regions of coding genes in key regulatory mechanisms, ALE-HSA21 is also an interesting web-based platform to investigate such processes. The 'transcript-centric' and easily-accessible nature of ALE-HSA21 makes this resource a valuable tool to rapidly retrieve data at the isoform level, rather than at gene level, useful to investigate any disease, molecular pathway or cell process involving chromosome 21 genes. Database URL: http://bioinfo.na.iac.cnr.it/ALE-HSA21/.",2014-02-25 +,Estimating Thermodynamic Properties of Pure Triglyceride Systems Using the Triglyceride Property Calculator,"To date, the most comprehensive model for predicting thermodynamic properties of pure triglycerides was presented by Wesdorp in “Liquid-multiple solid phase equilibria in fats: theory and experiments” (1990). In this paper, we present (1) corrections to the published model, as well as (2) a software implementation of the model for numerical assessment. The software tool, Triglyceride Property Calculator (TPC), uses a semi-empirical model to estimate the enthalpy of fusion and melting temperature for a given triglyceride based on its molecular composition and polymorphic form. These estimates are compared to experimentally collected data when available. The web application is available at http://www.crcfoodandhealth.com (under research tools) and through the AOCS Lipid Library. The quality of estimates is characterized according to defined counting metrics and presented for TAG subcategories. Additionally, the extrapolative value of the TPC is assessed by checking for consistency with underlying thermodynamic constraints. The current TPC implementation is effective in describing experimentally collected melting point data, with greater than 91% of the fitted values falling within 10% of the actual data. The TPC is also very good at describing collected enthalpy data. The underlying semi-empirical model and parameter set perform well in ensuring enthalpy predictions are thermodynamically consistent, however, extrapolated melting temperatures appear unreliable. Developing models and parameter sets that ensure thermodynamic consistency is a priority with future TPC iterations.",2017-02-01 +30619469,PhageWeb - Web Interface for Rapid Identification and Characterization of Prophages in Bacterial Genomes.,"This study developed a computational tool with a graphical interface and a web-service that allows the identification of phage regions through homology search and gene clustering. It uses G+C content variation evaluation and tRNA prediction sites as evidence to reinforce the presence of prophages in indeterminate regions. Also, it performs the functional characterization of the prophages regions through data integration of biological databases. The performance of PhageWeb was compared to other available tools (PHASTER, Prophinder, and PhiSpy) using Sensitivity (Sn) and Positive Predictive Value (PPV) tests. As a reference for the tests, more than 80 manually annotated genomes were used. In the PhageWeb analysis, the Sn index was 86.1% and the PPV was approximately 87%, while the second best tool presented Sn and PPV values of 83.3 and 86.5%, respectively. These numbers allowed us to observe a greater precision in the regions identified by PhageWeb while compared to other prediction tools submitted to the same tests. Additionally, PhageWeb was much faster than the other computational alternatives, decreasing the processing time to approximately one-ninth of the time required by the second best software. PhageWeb is freely available at http://computationalbiology.ufpa.br/phageweb.",2018-12-18 +24060133,Microbial community analysis using MEGAN.,"Metagenomics, the study of microbes in the environment using DNA sequencing, depends upon dedicated software tools for processing and analyzing very large sequencing datasets. One such tool is MEGAN (MEtaGenome ANalyzer), which can be used to interactively analyze and compare metagenomic and metatranscriptomic data, both taxonomically and functionally. To perform a taxonomic analysis, the program places the reads onto the NCBI taxonomy, while functional analysis is performed by mapping reads to the SEED, COG, and KEGG classifications. Samples can be compared taxonomically and functionally, using a wide range of different charting and visualization techniques. PCoA analysis and clustering methods allow high-level comparison of large numbers of samples. Different attributes of the samples can be captured and used within analysis. The program supports various input formats for loading data and can export analysis results in different text-based and graphical formats. The program is designed to work with very large samples containing many millions of reads. It is written in Java and installers for the three major computer operating systems are available from http://www-ab.informatik.uni-tuebingen.de.",2013-01-01 +32269511,Volatilomes of Bacterial Infections in Humans.,"Sense of smell in humans has the capacity to detect certain volatiles from bacterial infections. Our olfactory senses were used in ancient medicine to diagnose diseases in patients. As humans are considered holobionts, each person's unique odor consists of volatile organic compounds (VOCs, volatilome) produced not only by the humans themselves but also by their beneficial and pathogenic micro-habitants. In the past decade it has been well documented that microorganisms (fungi and bacteria) are able to emit a broad range of olfactory active VOCs [summarized in the mVOC database (http://bioinformatics.charite.de/mvoc/)]. During microbial infection, the equilibrium between the human and its microbiome is altered, followed by a change in the volatilome. For several decades, physicians have been trying to utilize these changes in smell composition to develop fast and efficient diagnostic tools, particularly because volatiles detection is non-invasive and non-destructive, which would be a breakthrough in many therapies. Within this review, we discuss bacterial infections including gastrointestinal, respiratory or lung, and blood infections, focusing on the pathogens and their known corresponding volatile biomarkers. Furthermore, we cover the potential role of the human microbiota and their volatilome in certain diseases such as neurodegenerative diseases. We also report on discrete mVOCs that affect humans.",2020-03-25 +29136207,LinkedOmics: analyzing multi-omics data within and across 32 cancer types.,"The LinkedOmics database contains multi-omics data and clinical data for 32 cancer types and a total of 11 158 patients from The Cancer Genome Atlas (TCGA) project. It is also the first multi-omics database that integrates mass spectrometry (MS)-based global proteomics data generated by the Clinical Proteomic Tumor Analysis Consortium (CPTAC) on selected TCGA tumor samples. In total, LinkedOmics has more than a billion data points. To allow comprehensive analysis of these data, we developed three analysis modules in the LinkedOmics web application. The LinkFinder module allows flexible exploration of associations between a molecular or clinical attribute of interest and all other attributes, providing the opportunity to analyze and visualize associations between billions of attribute pairs for each cancer cohort. The LinkCompare module enables easy comparison of the associations identified by LinkFinder, which is particularly useful in multi-omics and pan-cancer analyses. The LinkInterpreter module transforms identified associations into biological understanding through pathway and network analysis. Using five case studies, we demonstrate that LinkedOmics provides a unique platform for biologists and clinicians to access, analyze and compare cancer multi-omics data within and across tumor types. LinkedOmics is freely available at http://www.linkedomics.org.",2018-01-01 +28592293,Plasmobase: a comparative database of predicted domain architectures for Plasmodium genomes.,"

Background

With the availability of complete genome sequences of both human and non-human Plasmodium parasites, it is now possible to use comparative genomics to look for orthology across Plasmodium species and for species specific genes. This comparative analyses could provide important clues for the development of new strategies to prevent and treat malaria in humans, however, the number of functionally annotated proteins is still low for all Plasmodium species. In the context of genomes that are hard to annotate because of sequence divergence, such as Plasmodium, domain co-occurrence becomes particularly important to trust predictions. In particular, domain architecture prediction can be used to improve the performance of existing annotation methods since homologous proteins might share their architectural context.

Results

Plasmobase is a unique database designed for the comparative study of Plasmodium genomes. Domain architecture reconstruction in Plasmobase relies on DAMA, the state-of-the-art method in architecture prediction, while domain annotation is realised with CLADE, a novel annotation tool based on a multi-source strategy. Plasmobase significantly increases the Pfam domain coverage of all Plasmodium genomes, it proposes new domain architectures as well as new domain families that have never been reported before for these genomes. It proposes a visualization of domain architectures and allows for an easy comparison among architectures within Plasmodium species and with other species, described in UniProt.

Conclusions

Plasmobase is a valuable new resource for domain annotation in Plasmodium genomes. Its graphical presentation of protein sequences, based on domain architectures, will hopefully be of interest for comparative genomic studies. It should help to discover species-specific genes, possibly underlying important phenotypic differences between parasites, and orthologous gene families for deciphering the biology of these complex and important Apicomplexan organisms. In conclusion, Plasmobase is a flexible and rich site where any biologist can find something of his/her own interest.

Availability

Plasmobase is accessible at http://genome.lcqb.upmc.fr/plasmobase/ .",2017-06-07 +33163255,ImageBox 2 - Efficient and Rapid Access of Image Tiles from Whole-Slide Images Using Serverless HTTP Range Requests.,"

Background

Whole-slide images (WSI) are produced by a high-resolution scanning of pathology glass slides. There are a large number of whole-slide imaging scanners, and the resulting images are frequently larger than 100,000 × 100,000 pixels which typically image 100,000 to one million cells, ranging from several hundred megabytes to many gigabytes in size.

Aims and objectives

Provide HTTP access over the web to Whole Slide Image tiles that do not have localized tiling servers but only basic HTTP access. Move all image decode and tiling functions to calling agent (ImageBox).

Methods

Current software systems require tiling image servers to be installed on systems providing local disk access to these images. ImageBox2 breaks this requirement by accessing tiles from remote HTTP source via byte-level HTTP range requests. This method does not require changing the client software as the operation is relegated to the ImageBox2 server which is local (or remote) to the client and can access tiles from remote images that have no server of their own such as Amazon S3 hosted images. That is, it provides a data service [on a server that does not need to be managed], the definition of serverless execution model increasingly favored by cloud computing infrastructure.

Conclusions

The specific methodology described and assessed in this report preserves normal client connection semantics by enabling cloud-friendly tiling, promoting a web of http connected whole-slide images from a wide-ranging number of sources, and providing tiling where local tiling servers would have been otherwise unavailable.",2020-09-10 +32021823,A single-step protocol for closing experimental atom balances.,"Molar balances are considered to be closed if they are within 95-105%. It was shown in the companion paper ""https://doi.org/10.1016/j.cej.2018.12.113; Chem. Eng. J., 361, 805-811 (2019)"" that even this condition can give rise to pronounced deviations in conversion or selectivity data (Heynderickx, 2019). This manuscript offers a very simple a posteriori calculation procedure to address these deviations via simple linear algebra. The specific details of this procedure, called 'CLOBAL', after 'closing the balances', are shared (1) by showing the mathematics behind-the-scene and (2) by showing the specific programming code with an itemized guideline through the code. Key benefits of proposed procedure CLOBAL script are: •Physical quantities such as molar flow rates, concentrations or absolute number of moles are updated via a one-step linear procedure to close the corresponding atom balances;•The presented CLOBAL procedure, is executed in Excel®, which is accessible and practical for every user - no need for special license and the code is provided; and•Parameter estimation, using treated data, results in smaller confidence intervals and lower residual sum of squares (RSSQ).",2020-01-10 +30105017,ASAP - A Webserver for Immunoglobulin-Sequencing Analysis Pipeline.,"Reproducible and robust data on antibody repertoires are invaluable for basic and applied immunology. Next-generation sequencing (NGS) of antibody variable regions has emerged as a powerful tool in systems immunology, providing quantitative molecular information on antibody polyclonal composition. However, major computational challenges exist when analyzing antibody sequences, from error handling to hypermutation profiles and clonal expansion analyses. In this work, we developed the ASAP (A webserver for Immunoglobulin-Seq Analysis Pipeline) webserver (https://asap.tau.ac.il). The input to ASAP is a paired-end sequence dataset from one or more replicates, with or without unique molecular identifiers. These datasets can be derived from NGS of human or murine antibody variable regions. ASAP first filters and annotates the sequence reads using public or user-provided germline sequence information. The ASAP webserver next performs various calculations, including somatic hypermutation level, CDR3 lengths, V(D)J family assignments, and V(D)J combination distribution. These analyses are repeated for each replicate. ASAP provides additional information by analyzing the commonalities and differences between the repeats (""joint"" analysis). For example, ASAP examines the shared variable regions and their frequency in each replicate to determine which sequences are less likely to be a result of a sample preparation derived and/or sequencing errors. Moreover, ASAP clusters the data to clones and reports the identity and prevalence of top ranking clones (clonal expansion analysis). ASAP further provides the distribution of synonymous and non-synonymous mutations within the V genes somatic hypermutations. Finally, ASAP provides means to process the data for proteomic analysis of serum/secreted antibodies by generating a variable region database for liquid chromatography high resolution tandem mass spectrometry (LC-MS/MS) interpretation. ASAP is user-friendly, free, and open to all users, with no login requirement. ASAP is applicable for researchers interested in basic questions related to B cell development and differentiation, as well as applied researchers who are interested in vaccine development and monoclonal antibody engineering. By virtue of its user-friendliness, ASAP opens the antibody analysis field to non-expert users who seek to boost their research with immune repertoire analysis.",2018-07-30 +33183883,Gut microbiota associations with diet in irritable bowel syndrome and the effect of low FODMAP diet and probiotics.,"

Background and aims

Diet is both a modulator of the gastrointestinal microbiota and an important therapy in irritable bowel syndrome (IBS). We aimed to comprehensively (i) identify diet-microbiota associations in adults with IBS consuming habitual diet; (ii) assess the impact of two nutritional interventions on the microbiota; and (iii) determine whether baseline microbiota can predict clinical response to diet or probiotic intervention.

Methods

Data were analyzed from 95 individuals with IBS participating in a previously published 4-week 2x2 factorial design randomized controlled trial investigating the impact of the low FODMAP diet (LFD) and co-administration of a probiotic. Diet was assessed at four hierarchical levels and partial 16S rRNA gene sequencing was used to profile the microbiota.

Results

There were numerous diet-microbiota associations especially at the nutrient level, including a negative association between protein and Bifidobacterium abundance (rs = -0.358, p < 0.001). After correction for multiple testing, the significance for this association (q = 0.237) and all others was lost. Low FODMAP diet led to changes in abundance of major saccharolytic genera compared with sham diet, including higher Bacteroides (LFD 34.1% (15.7%) vs sham 23.3% (15.2%), q = 0.01) and lower Bifidobacterium (0.9% (1.0%) vs 2.1%, (2.5%) q = 0.029). Compared with placebo, probiotic supplementation led to higher Lactobacillus (probiotic 0.08% (0.1%) vs placebo 0.03% (0.2%), q < 0.001), and Streptococcus abundance (2.0% (2.2%) vs 0.6% (1.2%), q = 0.001). The probiotic treatment buffered the impact of the low FODMAP diet on Bifidobacterium. Baseline microbiota did not predict clinical response to either intervention.

Conclusions

Although diet modifies the gut microbiota, bivariate correlation analysis may only provide a limited explanation of the complex diet interactions with individual gut bacteria in IBS. Some diet interventions modify the microbiota in IBS.

Trial registry

ISRCTN (http://www.isrctn.com) Registered under ISRCTN registry identifier no.ISRCTN02275221.",2020-10-23 +31440553,Experimental data of the distillation of bio-oil from thermal cracking of methyl ester in castor oil.,This article presents the experimental data on distillation of bio-oil obtained from thermal cracking of a mixture of castor oil and its methyl esters. The interpretation of the data can be found in Menshhein et al. (2019) available on https://doi.org/10.1016/j.renene.2019.04.136. Experiments were carried out using a simple distillation apparatus and the products were quantified and qualified from Gas Chromatography - Flame Ionization Detector (GC-FID) with standards compounds. Data were presented in terms of distillation equipment and distillation curve values of volume and temperature of the crude bio-oil sample. Information about GC-FID methods and chromatograms of from standard heptaldehyde and methyl undecenoate and their analytical curve. Carbon number data of crude bio-oil sample was also showed.,2019-07-27 +33015075,DeepKhib: A Deep-Learning Framework for Lysine 2-Hydroxyisobutyrylation Sites Prediction.,"As a novel type of post-translational modification, lysine 2-Hydroxyisobutyrylation (K hib ) plays an important role in gene transcription and signal transduction. In order to understand its regulatory mechanism, the essential step is the recognition of K hib sites. Thousands of K hib sites have been experimentally verified across five different species. However, there are only a couple traditional machine-learning algorithms developed to predict K hib sites for limited species, lacking a general prediction algorithm. We constructed a deep-learning algorithm based on convolutional neural network with the one-hot encoding approach, dubbed CNN OH . It performs favorably to the traditional machine-learning models and other deep-learning models across different species, in terms of cross-validation and independent test. The area under the ROC curve (AUC) values for CNN OH ranged from 0.82 to 0.87 for different organisms, which is superior to the currently available K hib predictors. Moreover, we developed the general model based on the integrated data from multiple species and it showed great universality and effectiveness with the AUC values in the range of 0.79-0.87. Accordingly, we constructed the on-line prediction tool dubbed DeepKhib for easily identifying K hib sites, which includes both species-specific and general models. DeepKhib is available at http://www.bioinfogo.org/DeepKhib.",2020-09-09 +,Comprehensive and reliable: a new online portal of critical plant taxa in Germany,"Morphological identification of apomictic micro-species is difficult and requires detailed comparisons with referenced herbarium material. Access to such is limited because herbaria are scattered, and even in public collections misidentifications are not uncommon in these critical taxa. In close collaboration with taxonomic experts of the apomictic polyploid genera Hieracium, Pilosella, Taraxacum sect. Palustria (Asteraceae) and Alchemilla, Crataegus, Rosa, Rubus (Rosaceae), we established an online portal (http://webapp.senckenberg.de/bestikri/) displaying georeferenced and validated herbarium specimens for each respective micro-species of the German Flora. Our focus was on taxonomic reliability rather than on sheer data quantity and thus identifications were validated. We additionally offer macro-photographs as well as descriptions of taxonomically important morphological characters of the respective genera. Comprising currently >400 taxa, we trust the online portal will greatly facilitate research into difficult polyploid, apomictic taxa of the Central European flora.",2017-10-01 +29664725,Expectations for Tinnitus Treatment and Outcomes: A Survey Study of Audiologists and Patients.,"BACKGROUND:Roughly 10-15% of the general population is affected by tinnitus and this percentage is estimated to rise in future. Because there is currently no cure for tinnitus, treatment is limited and is primarily achieved through management of symptoms and counseling. PURPOSE:This study compared audiologists' and patients' responses to related survey questions about their expectations regarding tinnitus treatment. Two separate surveys were created, one for patients with tinnitus, and one for practicing audiologists who may treat such patients. The surveys included several related questions, such that comparison of the two could reveal where patients' and audiologists' expectations for tinnitus care were in agreement and areas in which they differed. RESEARCH DESIGN:The surveys for audiologists and adults with tinnitus were 31- and 38-item questionnaires, respectively. Both surveys comprised demographic questions followed by several tinnitus-related questions in either multiple-choice or Likert-scale format. STUDY SAMPLE:We received 230 completed Patient Surveys and 68 completed Audiologist Surveys. DATA COLLECTION AND ANALYSIS:All survey recruitment was completed online. Responses were collected via the Survey Monkey web tool (http://www.surveymonkey.com/). Responses were analyzed within and between surveys and grouped into topical categories (assessment, counseling, current available tinnitus information, satisfaction and expectations, improving tinnitus management). For data within each survey, descriptive statistics and correlation analyses were used. For selected comparisons between surveys, cross-tabulations were used. Hierarchical regression modeling was conducted to further explore (1) the perceived effectiveness of treatment received, and (2) how each group defined treatment success. RESULTS:Differences were noted between the two groups' responses to the question on the definition of treatment success; audiologists reported decreased awareness (77%), stress/anxiety relief (63%), and increased knowledge of tinnitus (63%) most commonly, whereas patients reported reduction of tinnitus loudness (63%) and complete elimination of tinnitus (57%) most often. The topic of greatest agreement was the desire for more information on tinnitus; 62% of patients felt more information from their healthcare provider would be the most important factor for improved tinnitus management, and 67% of audiologists reported currently having ""some access"" or less to appropriate resources for tinnitus treatment. Modeling results for effective tinnitus management and definitions of treatment success highlighted the importance of resource access and information sharing for both audiologists and patients. CONCLUSIONS:Patients and audiologists differed in terms of their expectations for successful treatment, with the patients focusing on perceptual factors and the audiologists on the reaction to the sound. Patient satisfaction with tinnitus treatment may be improved through access to more information, specifically, more information about current tinnitus treatment options and how these focus on the patient's reaction to the tinnitus rather than the percept itself. Providing credible tinnitus information resources to audiologists, and focusing resources on training a small number of tinnitus specialist audiologists could greatly improve patient satisfaction with the current state of tinnitus palliative care.",2018-04-01 +32200737,The ecology of heterogeneity: soil bacterial communities and C dynamics.,"Heterogeneity is a fundamental property of soil that is often overlooked in microbial ecology. Although it is generally accepted that the heterogeneity of soil underpins the emergence and maintenance of microbial diversity, the profound and far-reaching consequences that heterogeneity can have on many aspects of microbial ecology and activity have yet to be fully apprehended and have not been fully integrated into our understanding of microbial functioning. In this contribution we first discuss how the heterogeneity of the soil microbial environment, and the consequent uncertainty associated with acquiring resources, may have affected how microbial metabolism, motility and interactions evolved and, ultimately, the overall microbial activity that is represented in ecosystem models, such as heterotrophic decomposition or respiration. We then present an analysis of predicted metabolic pathways for soil bacteria, obtained from the MetaCyc pathway/genome database collection (https://metacyc.org/). The analysis suggests that while there is a relationship between phylogenic affiliation and the catabolic range of soil bacterial taxa, there does not appear to be a trade-off between the 16S rRNA gene copy number, taken as a proxy of potential growth rate, of bacterial strains and the range of substrates that can be used. Finally, we present a simple, spatially explicit model that can be used to understand how the interactions between decomposers and environmental heterogeneity affect the bacterial decomposition of organic matter, suggesting that environmental heterogeneity might have important consequences on the variability of this process. This article is part of the theme issue 'Conceptual challenges in microbial community ecology'.",2020-03-23 +29028885,HoTResDB: host transcriptional response database for viral hemorrhagic fevers.,"SUMMARY:High-throughput screening of the host transcriptional response to various viral infections provides a wealth of data, but utilization of microarray and next generation sequencing (NGS) data for analysis can be difficult. The Host Transcriptional Response DataBase (HoTResDB), allows visitors to access already processed microarray and NGS data from non-human primate models of viral hemorrhagic fever to better understand the host transcriptional response. AVAILABILITY:HoTResDB is freely available at http://hotresdb.bu.edu.",2018-01-01 +32609005,Household Fuel Use and the Risk of Gastrointestinal Cancers: The Golestan Cohort Study.,"

Background

Three billion people burn nonclean fuels for household purposes. Limited evidence suggests a link between household fuel use and gastrointestinal (GI) cancers.

Objectives

We investigated the relationship between indoor burning of biomass, kerosene, and natural gas with the subsequent risk of GI cancers.

Methods

During the period 2004-2008, a total of 50,045 Iranian individuals 40-75 years of age were recruited to this prospective population-based cohort. Upon enrollment, validated data were collected on demographics, lifestyle, and exposures, including detailed data on lifetime household use of different fuels and stoves. The participants were followed through August 2018 with <1% loss.

Results

During the follow-up, 962 participants developed GI cancers. In comparison with using predominantly gas in the recent 20-y period, using predominantly biomass was associated with higher risks of esophageal [hazard ratio (HR): 1.89; 95% confidence interval (CI): 1.02, 3.50], and gastric HR: 1.83; 95% CI: 1.01, 3.31) cancers, whereas using predominantly kerosene was associated with higher risk of esophageal cancer (HR: 1.84; 95% CI: 1.10, 3.10). Lifetime duration of biomass burning for both cooking and house heating (exclusive biomass usage) using heating-stoves without chimney was associated with higher risk of GI cancers combined (10-y HR: 1.14; 95% CI: 1.07, 1.21), esophageal (10-y HR: 1.19; 95% CI: 1.08, 1.30), gastric (10-y HR: 1.11; 95% CI: 1.00, 1.23), and colon (10-y HR: 1.26; 95% CI: 1.03, 1.54) cancers. The risks of GI cancers combined, esophageal cancer, and gastric cancer were lower when biomass was burned using chimney-equipped heating-stoves (strata difference p-values=0.001, 0.003, and 0.094, respectively). Duration of exclusive kerosene burning using heating-stoves without chimney was associated with higher risk of GI cancers combined (10-y HR: 1.05; 95% CI: 1.00, 1.11), and esophageal cancer (10-y HR: 1.14; 95% CI: 1.04, 1.26).

Discussion

Household burning of biomass or kerosene, especially without a chimney, was associated with higher risk of some digestive cancers. Using chimney-equipped stoves and replacing these fuels with natural gas may be useful interventions to reduce the burden of GI cancers worldwide. https://doi.org/10.1289/EHP5907.",2020-06-17 +,"LANDFIRE – A national vegetation/fuels data base for use in fuels treatment, restoration, and suppression planning","LANDFIRE is the working name given to the Landscape Fire and Resource Management Planning Tools Project (http://www.landfire.gov). The project was initiated in response to mega-fires and the need for managers to have consistent, wall-to-wall (i.e., all wildlands regardless of agency/ownership), geospatial data, on vegetation, fuels, and terrain to support use of fire behavior and effects prediction systems in guiding policy and management decisions. Base layers were created in a 5-year program of research and development ending in 2009, with processes in place to periodically update fuel and vegetation layers in response to anthropogenic and natural disturbances. LANDFIRE has been institutionalized as the primary data source for modeling activities aimed at meeting the goals of the United States’ National Cohesive Wildland Fire Management Strategy, and the data are available on-line to any user for conducting landscape analyses. Data access and use are high and expected to grow with the increasing scope and complexity of wildland fire management, thus requiring continued LANDFIRE improvements and updates.",2013-04-01 +32973085,Prognostic Significance of Ambulatory BP Monitoring in CKD: A Report from the Chronic Renal Insufficiency Cohort (CRIC) Study.,"

Background

Whether ambulatory BP monitoring is of value in evaluating risk for outcomes in patients with CKD is not clear.

Methods

We followed 1502 participants of the Chronic Renal Insufficiency Cohort (CRIC) Study for a mean of 6.72 years. We evaluated, as exposures, ambulatory BP monitoring profiles (masked uncontrolled hypertension, white-coat effect, sustained hypertension, and controlled BP), mean ambulatory BP monitoring and clinic BPs, and diurnal variation in BP-reverse dipper (higher at nighttime), nondipper, and dipper (lower at nighttime). Outcomes included cardiovascular disease (a composite of myocardial infarction, cerebrovascular accident, heart failure, and peripheral arterial disease), kidney disease (a composite of ESKD or halving of the eGFR), and mortality.

Results

Compared with having controlled BP, the presence of masked uncontrolled hypertension independently associated with higher risk of the cardiovascular outcome and the kidney outcome, but not with all-cause mortality. Higher mean 24-hour systolic BP associated with higher risk of cardiovascular outcome, kidney outcome, and mortality, independent of clinic BP. Participants with the reverse-dipper profile of diurnal BP variation were at higher risk of the kidney outcome.

Conclusions

In this cohort of participants with CKD, BP metrics derived from ambulatory BP monitoring are associated with cardiovascular outcomes, kidney outcomes, and mortality, independent of clinic BP. Masked uncontrolled hypertension and mean 24-hour BP associated with high risk of cardiovascular disease and progression of kidney disease. Alterations of diurnal variation in BP are associated with high risk of progression of kidney disease, stroke, and peripheral arterial disease. These data support the wider use of ambulatory BP monitoring in the evaluation of hypertension in patients with CKD.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/JASN/2020_09_24_JASN2020030236.mp3.",2020-09-24 +29281002,An automated benchmarking platform for MHC class II binding prediction methods.,"Motivation:Computational methods for the prediction of peptide-MHC binding have become an integral and essential component for candidate selection in experimental T cell epitope discovery studies. The sheer amount of published prediction methods-and often discordant reports on their performance-poses a considerable quandary to the experimentalist who needs to choose the best tool for their research. Results:With the goal to provide an unbiased, transparent evaluation of the state-of-the-art in the field, we created an automated platform to benchmark peptide-MHC class II binding prediction tools. The platform evaluates the absolute and relative predictive performance of all participating tools on data newly entered into the Immune Epitope Database (IEDB) before they are made public, thereby providing a frequent, unbiased assessment of available prediction tools. The benchmark runs on a weekly basis, is fully automated, and displays up-to-date results on a publicly accessible website. The initial benchmark described here included six commonly used prediction servers, but other tools are encouraged to join with a simple sign-up procedure. Performance evaluation on 59 data sets composed of over 10 000 binding affinity measurements suggested that NetMHCIIpan is currently the most accurate tool, followed by NN-align and the IEDB consensus method. Availability and implementation:Weekly reports on the participating methods can be found online at: http://tools.iedb.org/auto_bench/mhcii/weekly/. Contact:mniel@bioinformatics.dtu.dk. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +32965128,Developing a Health Impact Model for Adult Lead Exposure and Cardiovascular Disease Mortality.,"

Background

Lead (Pb) is a highly toxic pollutant. Evidence suggests it is associated with cardiovascular disease (CVD)-related mortality.

Objectives

We present a rigorous approach for identifying concentration-response functions that relate adult Pb exposures to CVD mortality to inform a health impact model (HIM). We then use the model in a proof-of-concept example.

Methods

Building on previously conducted government literature reviews and a de novo supplemental literature review, we compiled and evaluated the available data on Pb and CVD mortality in humans. We applied a set of predefined selection criteria to identify studies that would be most useful in understanding the impact of Pb exposure on CVD mortality risk in adults. Once we identified the studies, we derived a HIM and used each study's concentration-response function in a proof-of-concept example.

Results

Our literature search identified 15 studies for full-text review. Of those 15 studies, 4 fit our criteria for use in the HIM. Using population and CVD mortality rates for 40- to 80-y-olds in 2014, we estimated that 34,000-99,000 deaths have been avoided due to the lowering of blood Pb levels from 1999 to 2014. Based on these values we estimated that approximately 16%-46% of the decreased CVD-related death rate from 1999 to 2014 may be attributable to decreased blood Pb levels.

Conclusion

Our results demonstrate that decreases in Pb exposure can result in large benefits for the adult population. We have provided a HIM that can be used in a variety of applications from burden-of-disease estimates to regulatory impact assessments and have demonstrated its sensitivity to the choice of concentration-response function. https://doi.org/10.1289/EHP6552.",2020-09-23 +31872007,Validation data for the use of bradykinin and substance P protease activity assays with capillary blood and blood cards.,"In the associated main paper (""Labeled substance P as a neuropeptide reporter substance for enzyme activity"" (http://doi.org/10.1016/j.jpba.2019.112953)), substance P was shown to be a valuable neuropeptide reporter substance to monitor the protease activity of serum. The assay was developed based on the predecessor assay using bradykinin (""A vote for robustness: Monitoring serum enzyme activity by thin-layer chromatography of dabsylated bradykinin products"", http://doi.org/10.1016/j.jpba.2017.06.007). Both neuropeptides are of interest in inflammation and pain research and were thus explored for use with capillary blood and blood cards (see associated MethodX paper ""Neuropeptide reporter assay for serum, capillary blood and blood cards""). Here, we present validation data for the assay when sampling with blood cards as well as data on the use of fresh capillary blood.",2019-11-23 +30976793,The EMBL-EBI search and sequence analysis tools APIs in 2019.,"The EMBL-EBI provides free access to popular bioinformatics sequence analysis applications as well as to a full-featured text search engine with powerful cross-referencing and data retrieval capabilities. Access to these services is provided via user-friendly web interfaces and via established RESTful and SOAP Web Services APIs (https://www.ebi.ac.uk/seqdb/confluence/display/JDSAT/EMBL-EBI+Web+Services+APIs+-+Data+Retrieval). Both systems have been developed with the same core principles that allow them to integrate an ever-increasing volume of biological data, making them an integral part of many popular data resources provided at the EMBL-EBI. Here, we describe the latest improvements made to the frameworks which enhance the interconnectivity between public EMBL-EBI resources and ultimately enhance biological data discoverability, accessibility, interoperability and reusability.",2019-07-01 +33089558,"'I have been all in, I have been all out and I have been everything in-between': A 2-year longitudinal qualitative study of weight loss maintenance.","

Background

Qualitative studies investigating weight management experiences are usually cross-sectional or of short duration, which limits understanding of the long-term challenges.

Methods

Eleven women [mean (SD) age 44.9 (9.8) years; body mass index 40.3 (4.0) kg m-2 ] participated in this longitudinal qualitative study, which included up to 20 weeks of total diet replacement (825-853 kcal day-1 ) and ongoing support for weight loss maintenance (WLM), to 2 years. Semi-structured interviews were conducted at baseline and programme end, as well as at key intervals during the intervention. Questions examined five theoretical themes: motivation, self-regulation, habits, psychological resources and social/environmental influences. Data were coded and analysed in nvivo (https://qsrinternational.com/nvivo) using the framework method.

Results

In total, 64 interviews were completed (median, n = 6 per participant). Mean (SD) weight loss was 15.7 (9.6) kg (14.6% body weight) at 6 months and 9.6 (9.9) kg (8.8% body weight) at 2 years. The prespecified theoretical model offered a useful framework to capture the variability of experiences. Negative aspects of obesity were strong motivations for weight loss and maintenance. Perceiving new routines as sustainable and developing a 'maintenance mindset' was characteristic of 'Maintainers', whereas meeting emotional needs at the expense of WLM goals during periods of stress and negative mood states was reported more often by 'Regainers'. Optimistic beliefs about maintaining weight losses appeared to interfere with barrier identification and coping planning for most participants.

Conclusions

People tended to be very optimistic about WLM without acknowledging barriers and this may undermine longer-term outcomes. The potential for regain remained over time, mainly as a result of emotion-triggered eating to alleviate stress and negative feelings. More active self-regulation during these circumstances may improve WLM, and these situations represent important targets for intervention.",2020-10-21 +29165655,Expression Atlas: gene and protein expression across multiple studies and organisms.,"Expression Atlas (http://www.ebi.ac.uk/gxa) is an added value database that provides information about gene and protein expression in different species and contexts, such as tissue, developmental stage, disease or cell type. The available public and controlled access data sets from different sources are curated and re-analysed using standardized, open source pipelines and made available for queries, download and visualization. As of August 2017, Expression Atlas holds data from 3,126 studies across 33 different species, including 731 from plants. Data from large-scale RNA sequencing studies including Blueprint, PCAWG, ENCODE, GTEx and HipSci can be visualized next to each other. In Expression Atlas, users can query genes or gene-sets of interest and explore their expression across or within species, tissues, developmental stages in a constitutive or differential context, representing the effects of diseases, conditions or experimental interventions. All processed data matrices are available for direct download in tab-delimited format or as R-data. In addition to the web interface, data sets can now be searched and downloaded through the Expression Atlas R package. Novel features and visualizations include the on-the-fly analysis of gene set overlaps and the option to view gene co-expression in experiments investigating constitutive gene expression across tissues or other conditions.",2018-01-01 +32582278,Using Cellular Automata to Simulate Domain Evolution in Proteins.,"Proteins play primary roles in important biological processes such as catalysis, physiological functions, and immune system functions. Thus, the research on how proteins evolved has been a nuclear question in the field of evolutionary biology. General models of protein evolution help to determine the baseline expectations for evolution of sequences, and these models have been extensively useful in sequence analysis as well as for the computer simulation of artificial sequence data sets. We have developed a new method of simulating multi-domain protein evolution, including fusions of domains, insertion, and deletion. It has been observed via the simulation test that the success rates achieved by the proposed predictor are remarkably high. For the convenience of the most experimental scientists, a user-friendly web server has been established at http://jci-bioinfo.cn/domainevo, by which users can easily get their desired results without having to go through the detailed mathematics. Through the simulation results of this website, users can predict the evolution trend of the protein domain architecture.",2020-06-09 +29514934,Potential Point-of-Care Testing for Dengue Virus in the Field. ,"The four serotypes of dengue virus (DENV) cause one of the most important and rapidly emerging mosquito-borne viral diseases in humans. Of the currently available diagnostic tests for dengue, the reverse transcription-PCR (RT-PCR) assay is the most sensitive and specific, and so it is commonly used as the gold standard. However, the requirement of a sophisticated and expensive thermal cycler makes it very difficult to use as a point-of-care diagnostic test in resource-limited regions where dengue is endemic. Tsai et al. (J Clin Microbiol 56:e01865-17, 2018, https://doi.org/10.1128/JCM.01865-17) report the analytical and clinical performances of a reverse transcription-insulated isothermal PCR (RT-iiPCR) assay with a portable nucleic acid analyzer for rapid detection of the four DENV serotypes; its reproducibility and complete agreement on clinical samples with the multiplex RT-PCR assay developed by the Centers for Disease Control and Prevention suggest that the dengue RT-iiPCR is a potential point-of-care test. Compared with other DENV RNA detection methods, the unique isothermal PCR design of RT-iiPCR, together with further improvements, would represent a promising new type of field-deployable diagnostic test for dengue.",2018-04-25 +29608773,Mining for recurrent long-range interactions in RNA structures reveals embedded hierarchies in network families.,"The wealth of the combinatorics of nucleotide base pairs enables RNA molecules to assemble into sophisticated interaction networks, which are used to create complex 3D substructures. These interaction networks are essential to shape the 3D architecture of the molecule, and also to provide the key elements to carry molecular functions such as protein or ligand binding. They are made of organised sets of long-range tertiary interactions which connect distinct secondary structure elements in 3D structures. Here, we present a de novo data-driven approach to extract automatically from large data sets of full RNA 3D structures the recurrent interaction networks (RINs). Our methodology enables us for the first time to detect the interaction networks connecting distinct components of the RNA structure, highlighting their diversity and conservation through non-related functional RNAs. We use a graphical model to perform pairwise comparisons of all RNA structures available and to extract RINs and modules. Our analysis yields a complete catalog of RNA 3D structures available in the Protein Data Bank and reveals the intricate hierarchical organization of the RNA interaction networks and modules. We assembled our results in an online database (http://carnaval.lri.fr) which will be regularly updated. Within the site, a tool allows users with a novel RNA structure to detect automatically whether the novel structure contains previously observed RINs.",2018-05-01 +30465702,"A Prospective Study of Environmental Exposures and Early Biomarkers in Autism Spectrum Disorder: Design, Protocols, and Preliminary Data from the MARBLES Study.","

Background

Until recently, environmental factors in autism spectrum disorder (ASD) were largely ignored. Over the last decade, altered risks from lifestyle, medical, chemical, and other factors have emerged through various study designs: whole population cohorts linked to diagnostic and/or exposure-related databases, large case-control studies, and smaller cohorts of children at elevated risk for ASD.

Objectives

This study aimed to introduce the MARBLES (Markers of Autism Risk in Babies-Learning Early Signs) prospective study and its goals, motivate the enhanced-risk cohort design, describe protocols and main exposures of interest, and present initial descriptive results for the study population.

Methods

Families having one or more previous child with ASD were contacted before or during a pregnancy, and once the woman became pregnant, were invited to enroll. Data and biological samples were collected throughout pregnancy, at birth, and until the child's third birthday. Neurodevelopment was assessed longitudinally. The study began enrolling in 2006 and is ongoing.

Results

As of 30 June 2018, 463 pregnant mothers have enrolled. Most mothers ([Formula: see text]) were thirty years of age or over, including 7.9% who are fourty years of age or over. The sample includes 22% Hispanic and another 25% nonHispanic Black, Asian, or multiracial participants; 24% were born outside the United States. Retention is high: 84% of participants whose pregnancies did not end in miscarriage completed the study or are still currently active. Among children evaluated at 36 months of age, 24% met criteria for ASD, and another 25% were assessed as nonASD nontypical development.

Conclusion

Few environmental studies of ASD prospectively obtain early-life exposure measurements. The MARBLES study fills this gap with extensive data and specimen collection beginning in pregnancy and has achieved excellent retention in an ethnically diverse study population. The 24% familial recurrence risk is consistent with recent reported risks observed in large samples of siblings of children diagnosed with ASD. https://doi.org/10.1289/EHP535.",2018-11-01 +31743833,Ligand based virtual screening using SVM on GPU.,"In silico methods play an essential role in modern drug discovery methods. Virtual screening, an in silico method, is used to filter out the chemical space on which actual wet lab experiments are need to be conducted. Ligand based virtual screening is a computational strategy using which one can build a model of the target protein based on the knowledge of the ligands that bind successfully to the target. This model is then used to predict if the new molecule is likely to bind to the target. Support vector machine, a supervised learning algorithm used for classification, can be utilized for virtual screening the ligand data. When used for virtual screening purpose, SVM could produce interesting results. But since we have a huge ligand data, the time taken for training the SVM model is quite high compared to other learning algorithms. By parallelizing these algorithms on multi-core processors, one can easily expedite these discoveries. In this paper, a GPU based ligand based virtual screening tool (GpuSVMScreen) which uses SVM have been proposed and bench-marked. This data parallel virtual screening tool provides high throughput by running in short time. The proposed GpuSVMScreen can successfully screen large number of molecules (billions) also. The source code of this tool is available at http://ccc.nitc.ac.in/project/GPUSVMSCREEN.",2019-11-10 +32071970,Data on assessment of safety and tear proteome change in response to orthokeratology lens - Insight from integrating clinical data and next generation proteomics.,"Breath-O™ Correct Ortho-K lenses are newly designed ortho-K lenses which are made from a silicon and fluoride containing methacrylate compound. This compound is said to be more flexible, durable and less likely to break compared to traditional Ortho-K lenses. The special design of this Ortho-K lens can reshape the corneal profile to induce temporary myopic reduction while producing beneficial peripheral hyperopic defocus for myopia control. To evaluate the safety and ocular surface responses of overnight Ortho-K wear over 1 and 3 months using this new type of material, we evaluated the clinical parameters (corneal integrity, corneal biomechanics, corneal endothelial health, non-invasive keratographical break-up time) and profiled the change of global tear proteome on healthy young subjects using next generation proteomics (SWATH-MS). The acquired mass spectrometric data were processed and analyzed using a cloud based Oneomics™ bioinformatic platform. All raw data generated from Information-dependent acquisition (IDA) and SWATH acquisitions were accepted and published in the Peptide Atlas public repository for general release (http://www.peptideatlas.org/PASS/PASS01367).",2020-01-28 +29804401,[Analysis of significant microRNA associated with chronic thromboembolic pulmonary hypertension].,"Objective: To find key microRNA (miR) associated with chronic thromboembolic pulmonary hypertension (CTEPH). Methods: Affymetrix miR microarray data and GSE56914 data downloaded from GEO database (http: //www.ncbi.nlm.nih.gov/geo/) were obtained and integrated. The microarray data were obtained from peripheral blood samples of CTEPH patients and the matched control. Differentially expressed miRs were screened. Target genes of these miRs were searched. Then, functional enrichment analyses for these miRs were performed. After that, disease network including miRs, target genes and pathways was constructed. Results: Five important miRs including hsa-miR-885-5p, hsa-miR-501-5p, hsa-miR-615-3p, hsa-miR-610, and hsa-miR-346 were identified. Furthermore, hsa-miR-885-5p and hsa-miR-501-5p were significantly enriched in cell cycle pathway. Hsa-miR-615-3p was involved in cytokine-cytokine receptor interaction, axon guidance, focal adhesion and cell cycle pathway. Hsa-miR-610 was significantly enriched in focal adhesion pathway, and hsa-miR-346 was involved in cytokine-cytokine receptor interaction, axon guidance, and focal adhesion pathway. Conclusions: Hsa-miR-885-5p, hsa-miR-501-5p, hsa-miR-615-3p, hsa-miR-610 and hsa-miR-346 are important miRs for the development of CTEPH.",2018-05-01 +33039206,EAU-EANM-ESTRO-ESUR-SIOG Guidelines on Prostate Cancer. Part II-2020 Update: Treatment of Relapsing and Metastatic Prostate Cancer.,"

Objective

To present a summary of the 2020 version of the European Association of Urology (EAU)-European Association of Nuclear Medicine (EANM)-European Society for Radiotherapy & Oncology (ESTRO)-European Society of Urogenital Radiology (ESUR)-International Society of Geriatric Oncology (SIOG) guidelines on the treatment of relapsing, metastatic, and castration-resistant prostate cancer (CRPC).

Evidence acquisition

The working panel performed a literature review of the new data (2016-2019). The guidelines were updated, and the levels of evidence and/or grades of recommendation were added based on a systematic review of the literature.

Evidence synthesis

Prostate-specific membrane antigen positron emission tomography computed tomography scanning has developed an increasingly important role in men with biochemical recurrence after local therapy. Early salvage radiotherapy after radical prostatectomy appears as effective as adjuvant radiotherapy and, in a subset of patients, should be combined with androgen deprivation. New treatments have become available for men with metastatic hormone-sensitive prostate cancer (PCa), nonmetastatic CRPC, and metastatic CRPC, along with a role for local radiotherapy in men with low-volume metastatic hormone-sensitive PCa. Also included is information on quality of life outcomes in men with PCa.

Conclusions

The knowledge in the field of advanced and metastatic PCa and CRPC is changing rapidly. The 2020 EAU-EANM-ESTRO-ESUR-SIOG guidelines on PCa summarise the most recent findings and advice for use in clinical practice. These PCa guidelines are first endorsed by the EANM and reflect the multidisciplinary nature of PCa management. A full version is available from the EAU office or online (http://uroweb.org/guideline/prostate-cancer/).

Patient summary

This article summarises the guidelines for the treatment of relapsing, metastatic, and castration-resistant prostate cancer. These guidelines are evidence based and guide the clinician in the discussion with the patient on the treatment decisions to be taken. These guidelines are updated every year; this summary spans the 2017-2020 period of new evidence.",2020-10-07 +31099384,NGSEP3: accurate variant calling across species and sequencing protocols.,"

Motivation

Accurate detection, genotyping and downstream analysis of genomic variants from high-throughput sequencing data are fundamental features in modern production pipelines for genetic-based diagnosis in medicine or genomic selection in plant and animal breeding. Our research group maintains the Next-Generation Sequencing Experience Platform (NGSEP) as a precise, efficient and easy-to-use software solution for these features.

Results

Understanding that incorrect alignments around short tandem repeats are an important source of genotyping errors, we implemented in NGSEP new algorithms for realignment and haplotype clustering of reads spanning indels and short tandem repeats. We performed extensive benchmark experiments comparing NGSEP to state-of-the-art software using real data from three sequencing protocols and four species with different distributions of repetitive elements. NGSEP consistently shows comparative accuracy and better efficiency compared to the existing solutions. We expect that this work will contribute to the continuous improvement of quality in variant calling needed for modern applications in medicine and agriculture.

Availability and implementation

NGSEP is available as open source software at http://ngsep.sf.net.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +26304062,Comparative Effectiveness of Biomarkers to Target Cancer Treatment: Modeling Implications for Survival and Costs.,"

Background

Biomarkers used at the time of diagnosis to tailor treatment decisions may diffuse into clinical practice before data become available on whether biomarker testing reduces cancer mortality. In the interim, quantitative estimates of the mortality impact of testing are needed to assess the value of these diagnostic biomarkers. These estimates are typically generated by customized models that are resource intensive to build and apply.

Methods

We developed a user-friendly system of models for Cancer Translation of Comparative Effectiveness Research (CANTRANce) to model the mortality impact of cancer interventions. The Diagnostic Biomarker module of this system projects the mortality impact of testing for a diagnostic biomarker, given data on how testing affects treatment recommendations. Costs and quality-of-life outcomes may also be modeled. We applied the Diagnostic Biomarker module to 2 case studies to demonstrate its capabilities.

Results

The user interface (http://www.fhcrc.org/cantrance) allows comparative effectiveness researchers to use the Diagnostic Biomarker module of CANTRANce. Our case studies indicate that the model produces estimates on par with those generated by customized models and is a strong tool for quickly generating novel projections.

Limitations

The simple structure that makes CANTRANce user-friendly also constrains the complexity with which cancer progression can be modeled. The quality of the results rests on the quality of the input data, which may pertain to small or dissimilar populations or suffer from informative censoring.

Conclusions

The Diagnostic Biomarker module of CANTRANce is a novel public resource that can provide timely insights into the expected mortality impact of testing for diagnostic biomarkers. The model projections should be useful for understanding the long-term potential of emerging diagnostic biomarkers.",2015-08-24 +32205149,Expansion of the Major Facilitator Superfamily (MFS) to include novel transporters as well as transmembrane-acting enzymes.,"The Major Facilitator Superfamily (MFS) is currently the largest characterized superfamily of transmembrane secondary transport proteins. Its diverse members are found in essentially all organisms in the biosphere and function by uniport, symport, and/or antiport mechanisms. In 1993 we first named and described the MFS which then consisted of 5 previously known families that had not been known to be related, and by 2012 we had identified a total of 74 families, classified phylogenetically within the MFS, all of which included only transport proteins. This superfamily has since expanded to 89 families, all included under TC# 2.A.1, and a few transporter families outside of TC# 2.A.1 were identified as members of the MFS. In this study, we assign nine previously unclassified protein families in the Transporter Classification Database (TCDB; http://www.tcdb.org) to the MFS based on multiple criteria and bioinformatic methodologies. In addition, we find integral membrane domains distantly related to partial or full-length MFS permeases in Lysyl tRNA Synthases (TC# 9.B.111), Lysylphosphatidyl Glycerol Synthases (TC# 4.H.1), and cytochrome b561 transmembrane electron carriers (TC# 5.B.2). Sequence alignments, overlap of hydropathy plots, compatibility of repeat units, similarity of complexity profiles of transmembrane segments, shared protein domains and 3D structural similarities between transport proteins were analyzed to assist in inferring homology. The MFS now includes 105 families.",2020-03-20 +32258898,Molecular Architect: A User-Friendly Workflow for Virtual Screening.,"Computer-assisted drug design (CADD) methods have greatly contributed to the development of new drugs. Among CADD methodologies, virtual screening (VS) can enrich the compound collection with molecules that have the desired physicochemical and pharmacophoric characteristics that are needed to become drugs. Many free tools are available for this purpose, but they are difficult to use and do not have a graphical user interface. Furthermore, several free tools must be used to carry out the entire VS process, requiring the user to process the results of one software program so that they can be used in another program, adding a potential source of human error. Moreover, some software programs require knowledge of advanced computational skills, such as programming languages. This context has motivated us to develop Molecular Architect (MolAr). MolAr is a workflow with a simple and intuitive interface that acts in an integrated and automated form to perform the entire VS process, from protein preparation (homology modeling and protonation state) to virtual screening. MolAr carries out VS through AutoDock Vina, DOCK 6, or a consensus of the two. Two case studies were conducted to demonstrate the performance of MolAr. In the first study, the feasibility of using MolAr for DNA-ligand systems was assessed. Both AutoDock Vina and DOCK 6 showed good results in performing VS in DNA-ligand systems. However, the use of consensus virtual screening was able to enrich the results. According to the area under the ROC curve and the enrichment factors, consensus VS was better able to predict the positions of the active ligands. The second case study was performed on 8 targets from the DUD-E database and 10 active ligands for each target. The results demonstrated that using the final ligand conformation provided by AutoDock Vina as an input for DOCK 6 improved the DOCK 6 ROC curves by up to 42% in VS. These case studies demonstrated that MolAr is capable conducting the VS process and is an easy-to-use and effective tool. MolAr is available for download free of charge at http: //www.drugdiscovery.com.br/software/.",2020-03-20 +31656848,Absorption coefficients data of lead iodine perovskites using 14 different organic cations.,"This Data article presents the absorption coefficients of Lead Iodine perovskites using 14 different organic cations. In addition, the absorption coefficients have been split into inter-atomic species components in order to quantify all of the contributions. For more details on the methodology, interpretation and discussion, refer to the full length article entitled ""Effect Of the organic cation on the optical properties of lead iodine perovskites"". https://doi.org/10.1016/j.solmat.2019.110022 Data may be useful for future research, and to identify the contribution of different species to the absorption.",2019-10-07 +32573785,Multi-omics analysis reveals the functional transcription and potential translation of enhancers.,"Enhancer can transcribe RNAs, however, most of them were neglected in traditional RNA-seq analysis workflow. Here, we developed a Pipeline for Enhancer Transcription (PET, http://fun-science.club/PET) for quantifying enhancer RNAs (eRNAs) from RNA-seq. By applying this pipeline on lung cancer samples and cell lines, we showed that the transcribed enhancers are enriched with histone marks and transcription factor motifs (JUNB, Hand1-Tcf3 and GATA4). By training a machine learning model, we demonstrate that enhancers can predict prognosis better than their nearby genes. Integrating the Hi-C, ChIP-seq and RNA-seq data, we observe that transcribed enhancers associate with cancer hallmarks or oncogenes, among which LcsMYC-1 (Lung cancer-specific MYC eRNA-1) potentially supports MYC expression. Surprisingly, a significant proportion of transcribed enhancers contain small protein-coding open reading frames (sORFs) and can be translated into microproteins. Our study provides a computational method for eRNA quantification and deepens our understandings of the DNA, RNA and protein nature of enhancers.",2020-07-01 +27899578,COSMIC: somatic cancer genetics at high-resolution.,"COSMIC, the Catalogue of Somatic Mutations in Cancer (http://cancer.sanger.ac.uk) is a high-resolution resource for exploring targets and trends in the genetics of human cancer. Currently the broadest database of mutations in cancer, the information in COSMIC is curated by expert scientists, primarily by scrutinizing large numbers of scientific publications. Over 4 million coding mutations are described in v78 (September 2016), combining genome-wide sequencing results from 28 366 tumours with complete manual curation of 23 489 individual publications focused on 186 key genes and 286 key fusion pairs across all cancers. Molecular profiling of large tumour numbers has also allowed the annotation of more than 13 million non-coding mutations, 18 029 gene fusions, 187 429 genome rearrangements, 1 271 436 abnormal copy number segments, 9 175 462 abnormal expression variants and 7 879 142 differentially methylated CpG dinucleotides. COSMIC now details the genetics of drug resistance, novel somatic gene mutations which allow a tumour to evade therapeutic cancer drugs. Focusing initially on highly characterized drugs and genes, COSMIC v78 contains wide resistance mutation profiles across 20 drugs, detailing the recurrence of 301 unique resistance alleles across 1934 drug-resistant tumours. All information from the COSMIC database is available freely on the COSMIC website.",2016-11-28 +32415960,PLIDflow: an open-source workflow for the online analysis of protein-ligand docking using galaxy.,"

Motivation

Molecular docking is aimed at predicting the conformation of small-molecule (ligands) within an identified binding site (BS) in a target protein (receptor). Protein-ligand docking plays an important role in modern drug discovery and biochemistry for protein engineering. However, efficient docking analysis of proteins requires prior knowledge of the BS, which is not always known. The process which covers BS identification and protein-ligand docking usually requires the combination of different programs, which require several input parameters. This is furtherly aggravated when factoring in computational demands, such as CPU-time. Therefore, these types of simulation experiments can become a complex process for researchers without a background in computer sciences.

Results

To overcome these problems, we have designed an automatic computational workflow (WF) to process protein-ligand complexes, which runs from the identification of the possible BSs positions to the prediction of the experimental binding modes and affinities of the ligand. This open-access WF runs under the Galaxy platform that integrates public domain software. The results of the proposed method are in close agreement with state-of-the-art docking software.

Availability and implementation

Software is available at: https://pistacho.ac.uma.es/galaxy-bitlab.

Contact

euv@uma.es.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +32164662,Methodological procedures followed in a school-and community-based intervention to prevent type 2 diabetes in vulnerable families across Europe: the Feel4Diabetes-study.,"Feel4Diabetes (standing for: Families across Europe following a hEalthy Lifestyle for Diabetes prevention, http://feel4diabetes-study.eu/) is a school and community based intervention program, aiming to prevent type 2 diabetes (T2D) among families from vulnerable population groups, in six European countries, by promoting healthy lifestyle. In the current issue of BMC Endocrine Disorders, three reviews and three papers providing a detailed description of the methodology used to obtain measurements related to the trial conduction, as well as two papers using original data collected in the Feel4Diabetes-study are presented.",2020-03-12 +30395255,VFDB 2019: a comparative pathogenomic platform with an interactive web interface.,"The virulence factor database (VFDB, http://www.mgc.ac.cn/VFs/) is devoted to providing the scientific community with a comprehensive warehouse and online platform for deciphering bacterial pathogenesis. The various combinations, organizations and expressions of virulence factors (VFs) are responsible for the diverse clinical symptoms of pathogen infections. Currently, whole-genome sequencing is widely used to decode potential novel or variant pathogens both in emergent outbreaks and in routine clinical practice. However, the efficient characterization of pathogenomic compositions remains a challenge for microbiologists or physicians with limited bioinformatics skills. Therefore, we introduced to VFDB an integrated and automatic pipeline, VFanalyzer, to systematically identify known/potential VFs in complete/draft bacterial genomes. VFanalyzer first constructs orthologous groups within the query genome and preanalyzed reference genomes from VFDB to avoid potential false positives due to paralogs. Then, it conducts iterative and exhaustive sequence similarity searches among the hierarchical prebuilt datasets of VFDB to accurately identify potential untypical/strain-specific VFs. Finally, via a context-based data refinement process for VFs encoded by gene clusters, VFanalyzer can achieve relatively high specificity and sensitivity without manual curation. In addition, a thoroughly optimized interactive web interface is introduced to present VFanalyzer reports in comparative pathogenomic style for easy online analysis.",2019-01-01 +32421816,FGviewer: an online visualization tool for functional features of human fusion genes.,"Among the diverse location of the breakpoints (BPs) of structural variants (SVs), the breakpoints of fusion genes (FGs) are located in the gene bodies. This broken gene context provided the aberrant functional clues to study disease genesis. Many tumorigenic fusion genes have retained or lost functional or regulatory domains and these features impacted tumorigenesis. Full annotation of fusion genes aided by the visualization tool based on two gene bodies will be helpful to study the functional aspect of fusion genes. To date, a specialized tool with effective visualization of the functional features of fusion genes is not available. In this study, we built FGviewer, a tool for visualizing functional features of human fusion genes, which is available at https://ccsmweb.uth.edu/FGviewer. FGviewer gets the input of fusion gene symbols, breakpoint information, or structural variants from whole-genome sequence (WGS) data. For any combination of gene pairs/breakpoints to be involved in fusion genes, the users can search the functional/regulatory aspect of the fusion gene in the three bio-molecular levels (DNA-, RNA-, and protein-levels) and one clinical level (pathogenic-level). FGviewer will be a unique online tool in disease research communities.",2020-07-01 +30295701,Multi-insight visualization of multi-omics data via ensemble dimension reduction and tensor factorization.,"

Motivation

Visualization of high-dimensional data is an important step in exploratory data analysis and knowledge discovery. However, it is challenging, because the interpretation is highly subjective. If we see dimensionality reduction (DR) techniques as the main tool for data visualization, they are like multiple cameras that look into the data from different perspectives or angles. We can hardly prescribe one single perspective for all datasets and problems. One snapshot of data cannot reveal all the relevant aspects of the data in higher dimensions. The reason is that each of these methods has its own specific strategy, normally based on well-established mathematical theories to obtain a low-dimensional projection of the data, which sometimes is totally different from the others. Therefore, relying only on one single projection can be risky, because it can close our eyes to important parts of the full knowledge space.

Results

We propose the first framework for multi-insight data visualization of multi-omics data. This approach, contrary to single-insight approaches, is able to uncover the majority of data features through multiple insights. The main idea behind the methodology is to combine several DR methods via tensor factorization and group the solutions into an optimal number of clusters (or insights). The experimental evaluation with low-dimensional synthetic data, simulated multi-omics data related to ovarian cancer, as well as real multi-omics data related to breast cancer show the competitive advantage over state-of-the-art methods.

Availability and implementation

https://folk.uio.no/hadift/MIV/ [user/pass via hadift@medisin. uio.no].

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +29335563,Visualization and analysis of non-covalent contacts using the Protein Contacts Atlas.,"Visualizations of biomolecular structures empower us to gain insights into biological functions, generate testable hypotheses, and communicate biological concepts. Typical visualizations (such as ball and stick) primarily depict covalent bonds. In contrast, non-covalent contacts between atoms, which govern normal physiology, pathogenesis, and drug action, are seldom visualized. We present the Protein Contacts Atlas, an interactive resource of non-covalent contacts from over 100,000 PDB crystal structures. We developed multiple representations for visualization and analysis of non-covalent contacts at different scales of organization: atoms, residues, secondary structure, subunits, and entire complexes. The Protein Contacts Atlas enables researchers from different disciplines to investigate diverse questions in the framework of non-covalent contacts, including the interpretation of allostery, disease mutations and polymorphisms, by exploring individual subunits, interfaces, and protein-ligand contacts and by mapping external information. The Protein Contacts Atlas is available at http://www.mrc-lmb.cam.ac.uk/pca/ and also through PDBe.",2018-01-15 +32193422,A database of freshwater fish species of the Amazon Basin.,"The Amazon Basin is an unquestionable biodiversity hotspot, containing the highest freshwater biodiversity on earth and facing off a recent increase in anthropogenic threats. The current knowledge on the spatial distribution of the freshwater fish species is greatly deficient in this basin, preventing a comprehensive understanding of this hyper-diverse ecosystem as a whole. Filling this gap was the priority of a transnational collaborative project, i.e. the AmazonFish project - https://www.amazon-fish.com/. Relying on the outputs of this project, we provide the most complete fish species distribution records covering the whole Amazon drainage. The database, including 2,406 validated freshwater native fish species, 232,936 georeferenced records, results from an extensive survey of species distribution including 590 different sources (e.g. published articles, grey literature, online biodiversity databases and scientific collections from museums and universities worldwide) and field expeditions conducted during the project. This database, delivered at both georeferenced localities (21,500 localities) and sub-drainages grains (144 units), represents a highly valuable source of information for further studies on freshwater fish biodiversity, biogeography and conservation.",2020-03-19 +30364951,DoriC 10.0: an updated database of replication origins in prokaryotic genomes including chromosomes and plasmids.,"DoriC, a database of replication origins, was initially created to present the bacterial oriCs predicted by Ori-Finder or determined by experiments in 2007. DoriC 5.0, an updated database of oriC regions in both bacterial and archaeal genomes, was published in the 2013 Nucleic Acids Research database issue. Now, the latest release DoriC 10, a large-scale update of replication origins in prokaryotic genomes including chromosomes and plasmids, has been presented with a completely redesigned user interface, which is freely available at http://tubic.org/doric/ and http://tubic.tju.edu.cn/doric/. In the current release, the database of DoriC has made significant improvements compared with version 5.0 as follows: (i) inclusion of oriCs on more bacterial chromosomes increased from 1633 to 7580; (ii) inclusion of oriCs on more archaeal chromosomes increased from 86 to 226; (iii) inclusion of 1209 plasmid replication origins retrieved from NCBI annotations or predicted by in silico analysis; (iv) inclusion of more replication origin elements on bacterial chromosomes including DnaA-trio motifs. Now, DoriC becomes the most complete and scalable database of replication origins in prokaryotic genomes, and facilitates the studies in large-scale oriC data mining, strand-biased analyses and replication origin predictions.",2019-01-01 +30321428,New approach for understanding genome variations in KEGG.,"KEGG (Kyoto Encyclopedia of Genes and Genomes; https://www.kegg.jp/ or https://www.genome.jp/kegg/) is a reference knowledge base for biological interpretation of genome sequences and other high-throughput data. It is an integrated database consisting of three generic categories of systems information, genomic information and chemical information, and an additional human-specific category of health information. KEGG pathway maps, BRITE hierarchies and KEGG modules have been developed as generic molecular networks with KEGG Orthology nodes of functional orthologs so that KEGG pathway mapping and other procedures can be applied to any cellular organism. Unfortunately, however, this generic approach was inadequate for knowledge representation in the health information category, where variations of human genomes, especially disease-related variations, had to be considered. Thus, we have introduced a new approach where human gene variants are explicitly incorporated into what we call 'network variants' in the recently released KEGG NETWORK database. This allows accumulation of knowledge about disease-related perturbed molecular networks caused not only by gene variants, but also by viruses and other pathogens, environmental factors and drugs. We expect that KEGG NETWORK will become another reference knowledge base for the basic understanding of disease mechanisms and practical use in clinical sequencing and drug development.",2019-01-01 +33017176,"Air Pollution and Progression of Atherosclerosis in Different Vessel Beds-Results from a Prospective Cohort Study in the Ruhr Area, Germany.","

Objectives

Due to inconsistent epidemiological evidence on health effects of air pollution on progression of atherosclerosis, we investigated several air pollutants and their effects on progression of atherosclerosis, using carotid intima media thickness (cIMT), coronary calcification (CAC), and thoracic aortic calcification (TAC).

Methods

We used baseline (2000-2003) and 5-y follow-up (2006-2008) data from the German Heinz Nixdorf Recall cohort study, including 4,814 middle-aged adults. Residence-based long-term air pollution exposure, including particulate matter (PM) with aerodynamic diameter ≤2.5μm (PM2.5), (PM10), and nitrogen dioxide (NO2) was assessed using chemistry transport and land use regression (LUR) models. cIMT was quantified as side-specific median IMT assessed from standardized ultrasound images. CAC and TAC were quantified by computed tomography using the Agatston score. Development (yes/no) and progression of atherosclerosis (change in cIMT and annual growth rate for CAC/TAC) were analyzed with logistic and linear regression models, adjusting for age, sex, lifestyle variables, socioeconomic status, and traffic noise.

Results

While no clear associations were observed in the full study sample (mean age 59.1 (±7.6) y; 53% female), most air pollutants were marginally associated with progression of atherosclerosis in participants with no or low baseline atherosclerotic burden. Most consistently for CAC, e.g., a 1.5 μg/m3 higher exposure to PM2.5 (LUR) yielded an estimated odds ratio of 1.19 [95% confidence interval (CI): 1.03, 1.39] for progression of CAC and an increased annual growth rate of 2% (95% CI: 1%, 4%).

Conclusion

Our study suggests that development and progression of subclinical atherosclerosis is associated with long-term air pollution in middle-aged participants with no or minor atherosclerotic burden at baseline, while overall no consistent associations are observed. https://doi.org/10.1289/EHP7077.",2020-10-05 +31559014,A curated transcriptome dataset collection to investigate inborn errors of immunity.,"Primary immunodeficiencies (PIDs) are a heterogeneous group of inherited disorders, frequently caused by loss-of-function and less commonly by gain-of-function mutations, which can result in susceptibility to a broad or a very narrow range of infections but also in inflammatory, allergic or malignant diseases. Owing to the wide range in clinical manifestations and variability in penetrance and expressivity, there is an urgent need to better understand the underlying molecular, cellular and immunological phenotypes in PID patients in order to improve clinical diagnosis and management. Here we have compiled a manually curated collection of public transcriptome datasets mainly obtained from human whole blood, peripheral blood mononuclear cells (PBMCs) or fibroblasts of patients with PIDs and of control subjects for subsequent meta-analysis, query and interpretation. A total of eighteen (18) datasets derived from studies of PID patients were identified and retrieved from the NCBI Gene Expression Omnibus (GEO) database and loaded in GXB, a custom web application designed for interactive query and visualization of integrated large-scale data. The dataset collection includes samples from well characterized PID patients that were stimulated ex vivo under a variety of conditions to assess the molecular consequences of the underlying, naturally occurring gene defects on a genome-wide scale. Multiple sample groupings and rank lists were generated to facilitate comparisons of the transcriptional responses between different PID patients and control subjects. The GXB tool enables browsing of a single transcript across studies, thereby providing new perspectives on the role of a given molecule across biological systems and PID patients. This dataset collection is available at http://pid.gxbsidra.org/dm3/geneBrowser/list.",2019-02-15 +32560894,"Natural and anthropogenic processes affecting radon releases during mining and early stage reclamation activities, Pinenut uranium mine, Arizona, USA.","Radon (Rnair) was monitored in open air in publicly accessible areas surrounding the Pinenut uranium (U) mine during mining and reclamation activities in 2015-16 to address concerns about mining related effects to areas surrounding Grand Canyon National Park (GCNP) in Arizona, USA. During July 2015, Rnair concentrations associated with the ore storage pile monitoring site were larger than those at the mine vent monitoring site and likely resulted from the relatively large amount of ore stored on site during this period. Higher wind velocities at the ore pile monitoring site generally resulted in lower Rnair concentrations; however, wind velocity did not appear to be an important factor in controlling Rnair concentrations at the mine vent monitoring site. Physical disturbances of the ore pile by heavy equipment did not coincide with elevated Rnair concentrations at the ore storage pile or mine vent monitoring sites. The relative size of the ore storage pile showed a positive trend with the daily mean Rnair concentration measured at the ore pile monitoring site. Principal component analysis (PCA) was applied to the ore pile and mine vent multivariate data sets for simultaneous comparison of all measured variables during 230 days of the study period. A significant positive coefficient for Rnair was associated with a significant negative coefficient for wind speed for principal component (PC) 2ore pile. Significant, positive PC2mine vent coefficients included Rnair, wind direction, and relative ore pile size indicating that Rnair variations at the mine vent monitoring site may be affected by Rn sourced from the ore pile. The ore pile is located about 200 m south of the mine vent Rn monitor with the prevalent wind direction coming from the south. All data generated during the field study and laboratory verification tests were published by Naftz et al. (2018) and are available online at: https://doi.org/10.5066/F79Z946T.",2020-05-18 +25762455,"amamutdb.no: A relational database for MAN2B1 allelic variants that compiles genotypes, clinical phenotypes, and biochemical and structural data of mutant MAN2B1 in α-mannosidosis.","α-Mannosidosis is an autosomal recessive lysosomal storage disorder caused by mutations in the MAN2B1 gene, encoding lysosomal α-mannosidase. The disorder is characterized by a range of clinical phenotypes of which the major manifestations are mental impairment, hearing impairment, skeletal changes, and immunodeficiency. Here, we report an α-mannosidosis mutation database, amamutdb.no, which has been constructed as a publicly accessible online resource for recording and analyzing MAN2B1 variants (http://amamutdb.no). Our aim has been to offer structured and relational information on MAN2B1 mutations and genotypes along with associated clinical phenotypes. Classifying missense mutations, as pathogenic or benign, is a challenge. Therefore, they have been given special attention as we have compiled all available data that relate to their biochemical, functional, and structural properties. The α-mannosidosis mutation database is comprehensive and relational in the sense that information can be retrieved and compiled across datasets; hence, it will facilitate diagnostics and increase our understanding of the clinical and molecular aspects of α-mannosidosis. We believe that the amamutdb.no structure and architecture will be applicable for the development of databases for any monogenic disorder.",2015-04-09 +24578355,StaphyloBase: a specialized genomic resource for the staphylococcal research community.,"With the advent of high-throughput sequencing technologies, many staphylococcal genomes have been sequenced. Comparative analysis of these strains will provide better understanding of their biology, phylogeny, virulence and taxonomy, which may contribute to better management of diseases caused by staphylococcal pathogens. We developed StaphyloBase with the goal of having a one-stop genomic resource platform for the scientific community to access, retrieve, download, browse, search, visualize and analyse the staphylococcal genomic data and annotations. We anticipate this resource platform will facilitate the analysis of staphylococcal genomic data, particularly in comparative analyses. StaphyloBase currently has a collection of 754 032 protein-coding sequences (CDSs), 19 258 rRNAs and 15 965 tRNAs from 292 genomes of different staphylococcal species. Information about these features is also included, such as putative functions, subcellular localizations and gene/protein sequences. Our web implementation supports diverse query types and the exploration of CDS- and RNA-type information in detail using an AJAX-based real-time search system. JBrowse has also been incorporated to allow rapid and seamless browsing of staphylococcal genomes. The Pairwise Genome Comparison tool is designed for comparative genomic analysis, for example, to reveal the relationships between two user-defined staphylococcal genomes. A newly designed Pathogenomics Profiling Tool (PathoProT) is also included in this platform to facilitate comparative pathogenomics analysis of staphylococcal strains. In conclusion, StaphyloBase offers access to a range of staphylococcal genomic resources as well as analysis tools for comparative analyses. Database URL: http://staphylococcus.um.edu.my/.",2014-02-26 +30590428,CSHAP: efficient haplotype frequency estimation based on sparse representation.,"

Motivation

Estimating haplotype frequencies from genotype data plays an important role in genetic analysis. In silico methods are usually computationally involved since phase information is not available. Due to tight linkage disequilibrium and low recombination rates, the number of haplotypes observed in human populations is far less than all the possibilities. This motivates us to solve the estimation problem by maximizing the sparsity of existing haplotypes. Here, we propose a new algorithm by applying the compressive sensing (CS) theory in the field of signal processing, compressive sensing haplotype inference (CSHAP), to solve the sparse representation of haplotype frequencies based on allele frequencies and between-allele co-variances.

Results

Our proposed approach can handle both individual genotype data and pooled DNA data with hundreds of loci. The CSHAP exhibits the same accuracy compared with the state-of-the-art methods, but runs several orders of magnitude faster. CSHAP can also handle with missing genotype data imputations efficiently.

Availability and implementation

The CSHAP is implemented in R, the source code and the testing datasets are available at http://home.ustc.edu.cn/∼zhouys/CSHAP/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +25252779,PIP-DB: the Protein Isoelectric Point database.,"

Unlabelled

A protein's isoelectric point or pI corresponds to the solution pH at which its net surface charge is zero. Since the early days of solution biochemistry, the pI has been recorded and reported, and thus literature reports of pI abound. The Protein Isoelectric Point database (PIP-DB) has collected and collated these data to provide an increasingly comprehensive database for comparison and benchmarking purposes. A web application has been developed to warehouse this database and provide public access to this unique resource. PIP-DB is a web-enabled SQL database with an HTML GUI front-end. PIP-DB is fully searchable across a range of properties.

Availability and implementation

The PIP-DB database and documentation are available at http://www.pip-db.org.",2014-09-23 +30367574,CamurWeb: a classification software and a large knowledge base for gene expression data of cancer.,"

Background

The high growth of Next Generation Sequencing data currently demands new knowledge extraction methods. In particular, the RNA sequencing gene expression experimental technique stands out for case-control studies on cancer, which can be addressed with supervised machine learning techniques able to extract human interpretable models composed of genes, and their relation to the investigated disease. State of the art rule-based classifiers are designed to extract a single classification model, possibly composed of few relevant genes. Conversely, we aim to create a large knowledge base composed of many rule-based models, and thus determine which genes could be potentially involved in the analyzed tumor. This comprehensive and open access knowledge base is required to disseminate novel insights about cancer.

Results

We propose CamurWeb, a new method and web-based software that is able to extract multiple and equivalent classification models in form of logic formulas (""if then"" rules) and to create a knowledge base of these rules that can be queried and analyzed. The method is based on an iterative classification procedure and an adaptive feature elimination technique that enables the computation of many rule-based models related to the cancer under study. Additionally, CamurWeb includes a user friendly interface for running the software, querying the results, and managing the performed experiments. The user can create her profile, upload her gene expression data, run the classification analyses, and interpret the results with predefined queries. In order to validate the software we apply it to all public available RNA sequencing datasets from The Cancer Genome Atlas database obtaining a large open access knowledge base about cancer. CamurWeb is available at http://bioinformatics.iasi.cnr.it/camurweb .

Conclusions

The experiments prove the validity of CamurWeb, obtaining many classification models and thus several genes that are associated to 21 different cancer types. Finally, the comprehensive knowledge base about cancer and the software tool are released online; interested researchers have free access to them for further studies and to design biological experiments in cancer research.",2018-10-15 +32183712,CuAS: a database of annotated transcripts generated by alternative splicing in cucumbers.,"BACKGROUND:Alternative splicing (AS) plays a critical regulatory role in modulating transcriptome and proteome diversity. In particular, it increases the functional diversity of proteins. Recent genome-wide analysis of AS using RNA-Seq has revealed that AS is highly pervasive in plants. Furthermore, it has been suggested that most AS events are subject to tissue-specific regulation. DESCRIPTION:To reveal the functional characteristics induced by AS and tissue-specific splicing events, a database for exploring these characteristics is needed, especially in plants. To address these goals, we constructed a database of annotated transcripts generated by alternative splicing in cucumbers (CuAS: http://cmb.bnu.edu.cn/alt_iso/index.php) that integrates genomic annotations, isoform-level functions, isoform-level features, and tissue-specific AS events among multiple tissues. CuAS supports a retrieval system that identifies unique IDs (gene ID, isoform ID, UniProt ID, and gene name), chromosomal positions, and gene families, and a browser for visualization of each gene. CONCLUSION:We believe that CuAS could be helpful for revealing the novel functional characteristics induced by AS and tissue-specific AS events in cucumbers. CuAS is freely available at http://cmb.bnu.edu.cn/alt_iso/index.php.",2020-03-18 +27587585,REDIportal: a comprehensive database of A-to-I RNA editing events in humans.,"RNA editing by A-to-I deamination is the prominent co-/post-transcriptional modification in humans. It is carried out by ADAR enzymes and contributes to both transcriptomic and proteomic expansion. RNA editing has pivotal cellular effects and its deregulation has been linked to a variety of human disorders including neurological and neurodegenerative diseases and cancer. Despite its biological relevance, many physiological and functional aspects of RNA editing are yet elusive. Here, we present REDIportal, available online at http://srv00.recas.ba.infn.it/atlas/, the largest and comprehensive collection of RNA editing in humans including more than 4.5 millions of A-to-I events detected in 55 body sites from thousands of RNAseq experiments. REDIportal embeds RADAR database and represents the first editing resource designed to answer functional questions, enabling the inspection and browsing of editing levels in a variety of human samples, tissues and body sites. In contrast with previous RNA editing databases, REDIportal comprises its own browser (JBrowse) that allows users to explore A-to-I changes in their genomic context, empathizing repetitive elements in which RNA editing is prominent.",2016-09-01 +28197060,A searchable database for the genome of Phomopsis longicolla (isolate MSPL 10-6).,"Phomopsis longicolla (syn. Diaporthe longicolla) is an important seed-borne fungal pathogen that primarily causes Phomopsis seed decay (PSD) in most soybean production areas worldwide. This disease severely decreases soybean seed quality by reducing seed viability and oil quality, altering seed composition, and increasing frequencies of moldy and/or split beans. To facilitate investigation of the genetic base of fungal virulence factors and understand the mechanism of disease development, we designed and developed a database for P. longicolla isolate MSPL 10-6 that contains information about the genome assemblies (contigs), gene models, gene descriptions and GO functional ontologies. A web-based front end to the database was built using ASP.NET, which allows researchers to search and mine the genome of this important fungus. This database represents the first reported genome database for a seed borne fungal pathogen in the Diaporthe- Phomopsis complex. The database will also be a valuable resource for research and agricultural communities. It will aid in the development of new control strategies for this pathogen.

Availability

http://bioinformatics.towson.edu/Phomopsis_longicolla/HomePage.aspx.",2016-07-26 +31228193,Genetic cooperativity in multi-layer networks implicates cell survival and senescence in the striatum of Huntington's disease mice synchronous to symptoms.,"

Motivation

Huntington's disease (HD) may evolve through gene deregulation. However, the impact of gene deregulation on the dynamics of genetic cooperativity in HD remains poorly understood. Here, we built a multi-layer network model of temporal dynamics of genetic cooperativity in the brain of HD knock-in mice (allelic series of Hdh mice). To enhance biological precision and gene prioritization, we integrated three complementary families of source networks, all inferred from the same RNA-seq time series data in Hdh mice, into weighted-edge networks where an edge recapitulates path-length variation across source-networks and age-points.

Results

Weighted edge networks identify two consecutive waves of tight genetic cooperativity enriched in deregulated genes (critical phases), pre-symptomatically in the cortex, implicating neurotransmission, and symptomatically in the striatum, implicating cell survival (e.g. Hipk4) intertwined with cell proliferation (e.g. Scn4b) and cellular senescence (e.g. Cdkn2a products) responses. Top striatal weighted edges are enriched in modulators of defective behavior in invertebrate models of HD pathogenesis, validating their relevance to neuronal dysfunction in vivo. Collectively, these findings reveal highly dynamic temporal features of genetic cooperativity in the brain of Hdh mice where a 2-step logic highlights the importance of cellular maintenance and senescence in the striatum of symptomatic mice, providing highly prioritized targets.

Availability and implementation

Weighted edge network analysis (WENA) data and source codes for performing spectral decomposition of the signal (SDS) and WENA analysis, both written using Python, are available at http://www.broca.inserm.fr/HD-WENA/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +31738408,Discovery of disease- and drug-specific pathways through community structures of a literature network.,"

Motivation

In light of the massive growth of the scientific literature, text mining is increasingly used to extract biological pathways. Though multiple tools explore individual connections between genes, diseases and drugs, few extensively synthesize pathways for specific diseases and drugs.

Results

Through community detection of a literature network, we extracted 3444 functional gene groups that represented biological pathways for specific diseases and drugs. The network linked Medical Subject Headings (MeSH) terms of genes, diseases and drugs that co-occurred in publications. The resulting communities detected highly associated genes, diseases and drugs. These significantly matched current knowledge of biological pathways and predicted future ones in time-stamped experiments. Likewise, disease- and drug-specific communities also recapitulated known pathways for those given diseases and drugs. Moreover, diseases sharing communities had high comorbidity with each other and drugs sharing communities had many common side effects, consistent with related mechanisms. Indeed, the communities robustly recovered mutual targets for drugs [area under Receiver Operating Characteristic curve (AUROC)=0.75] and shared pathogenic genes for diseases (AUROC=0.82). These data show that literature communities inform not only just known biological processes but also suggest novel disease- and drug-specific mechanisms that may guide disease gene discovery and drug repurposing.

Availability and implementation

Application tools are available at http://meteor.lichtargelab.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +30482172,dbMPIKT: a database of kinetic and thermodynamic mutant protein interactions.,"

Background

Protein-protein interactions (PPIs) play important roles in biological functions. Studies of the effects of mutants on protein interactions can provide further understanding of PPIs. Currently, many databases collect experimental mutants to assess protein interactions, but most of these databases are old and have not been updated for several years.

Results

To address this issue, we manually curated a kinetic and thermodynamic database of mutant protein interactions (dbMPIKT) that is freely accessible at our website. This database contains 5291 mutants in protein interactions collected from previous databases and the literature published within the last three years. Furthermore, some data analysis, such as mutation number, mutation type, protein pair source and network map construction, can be performed online.

Conclusion

Our work can promote the study on PPIs, and novel information can be mined from the new database. Our database is available in http://DeepLearner.ahu.edu.cn/web/dbMPIKT/ for use by all, including both academics and non-academics.",2018-11-27 +31459550,InterSpin: Integrated Supportive Webtools for Low- and High-Field NMR Analyses Toward Molecular Complexity.,"InterSpin (http://dmar.riken.jp/interspin/) comprises integrated, supportive, and freely accessible preprocessing webtools and a database to advance signal assignment in low- and high-field NMR analyses of molecular complexities ranging from small molecules to macromolecules for food, material, and environmental applications. To support handling of the broad spectra obtained from solid-state NMR or low-field benchtop NMR, we have developed and evaluated two preprocessing tools: sensitivity improvement with spectral integration, which enhances the signal-to-noise ratio by spectral integration, and peaks separation, which separates overlapping peaks by several algorithms, such as non-negative sparse coding. In addition, the InterSpin Laboratory Information Management System (SpinLIMS) database stores numerous standard spectra ranging from small molecules to macromolecules in solid and solution states (dissolved in polar/nonpolar solvents), and can be searched under various conditions using the following molecular assignment tools. SpinMacro supports easy assignment of macromolecules in natural mixtures via solid-state 13C peaks and dimethyl sulfoxide-dissolved 1H-13C correlation peaks. InterAnalysis improves the accuracy of molecular assignment by integrated analysis of 1H-13C correlation peaks and 1H-J correlation peaks of small molecules dissolved in D2O or deuterated methanol, which supports easy narrowing down of metabolite candidates. Finally, by enabling database interoperability, SpinLIMS's client software will ultimately support scientific discovery by facilitating sharing and reusing of NMR data.",2019-02-14 +30760842,Development and validation of whole genome-wide and genic microsatellite markers in oil palm (Elaeis guineensis Jacq.): First microsatellite database (OpSatdb).,"The availability of large expressed sequence tag (EST) and whole genome databases of oil palm enabled the development of a data base of microsatellite markers. For this purpose, an EST database consisting of 40,979 EST sequences spanning 27 Mb and a chromosome-wise whole genome databases were downloaded. A total of 3,950 primer pairs were identified and developed from EST sequences. The tri and tetra nucleotide repeat motifs were most prevalent (each 24.75%) followed by di-nucleotide repeat motifs. Whole genome-wide analysis found a total of 245,654 SSR repeats across the 16 chromosomes of oil palm, of which 38,717 were compound microsatellite repeats. A web application, OpSatdb, the first microsatellite database of oil palm, was developed using the PHP and MySQL database ( https://ssr.icar.gov.in/index.php ). It is a simple and systematic web-based search engine for searching SSRs based on repeat motif type, repeat type, and primer details. High synteny was observed between oil palm and rice genomes. The mapping of ESTs having SSRs by Blast2GO resulted in the identification of 19.2% sequences with gene ontology (GO) annotations. Randomly, a set of ten genic SSRs and five genomic SSRs were used for validation and genetic diversity on 100 genotypes belonging to the world oil palm genetic resources. The grouping pattern was observed to be broadly in accordance with the geographical origin of the genotypes. The identified genic and genome-wide SSRs can be effectively useful for various genomic applications of oil palm, such as genetic diversity, linkage map construction, mapping of QTLs, marker-assisted selection, and comparative population studies.",2019-02-13 +36337286,Effect of source and level of forage in the diet on in vitro ammonia emission from manure of Holstein and Jersey dairy cows.,"Reducing overall reactive N losses from dairy production systems depends substantially on reducing the atmospheric emission of manure ammonia (NH3). The objective of this study was to determine potential NH3-N emission of reconstituted manure using an in vitro protocol. Feces and urine were collected from a companion study designed as a Latin square in which 4 Holstein and 4 Jersey cows were fed diets containing 2 levels of forage neutral detergent fiber (NDF) [low-forage NDF (19%) vs. high-forage NDF (24%; dry matter basis)] from either alfalfa silage or corn silage (70:30 vs. 30:70 ratio of alfalfa silage NDF:corn silage NDF) arranged as a 2 × 2 factorial. All diets contained similar levels of crude protein (17%) and starch (23%), and had forage-to-concentrate ratios of 55:45 and 68:32 for low- and high-forage NDF diets, respectively. Measurements of NH3-N emission were conducted in a laboratory-scale chamber with 16 g of reconstituted manure (urine plus feces) incubated for 48 h at 15°C with sampling at 1, 3, 6, 12, 24, 36, and 48 h. Hourly NH3-N emissions data were analyzed using a repeated-measures mixed model in R (https://www.r-project.org/). The fixed effects were breed, forage NDF level, forage NDF source, time of sampling, and all possible interactions; cow was included as a random term. The cumulative 48-h NH3-N emissions and the scaled-up emissions accounting for daily output of manure from each cow were analyzed using the same model but without time of sampling. Level and source of forage in the diet tended to influence the pattern in hourly rate and 48-h cumulative emission, respectively. Accounting for daily manure volume differences, low-forage NDF diets led to lower estimates of daily NH3-N emissions than high-forage NDF diets (20% on a cow basis, 15% on a raw manure basis, and 18% on a manure-N basis). Compared with Holsteins, Jerseys emitted 17% lower estimated NH3-N on a cow basis, mainly due to lower manure excretion but tended to emit 15% more NH3-N expressed on a manure-N basis. Findings of this study suggested that cow breed and dietary forage NDF level should be considered in the prediction of NH3-N emission from the dairy industry.",2020-12-11 +25273106,"Curation, integration and visualization of bacterial virulence factors in PATRIC.","

Motivation

We've developed a highly curated bacterial virulence factor (VF) library in PATRIC (Pathosystems Resource Integration Center, www.patricbrc.org) to support infectious disease research. Although several VF databases are available, there is still a need to incorporate new knowledge found in published experimental evidence and integrate these data with other information known for these specific VF genes, including genomic and other omics data. This integration supports the identification of VFs, comparative studies and hypothesis generation, which facilitates the understanding of virulence and pathogenicity.

Results

We have manually curated VFs from six prioritized NIAID (National Institute of Allergy and Infectious Diseases) category A-C bacterial pathogen genera, Mycobacterium, Salmonella, Escherichia, Shigella, Listeria and Bartonella, using published literature. This curated information on virulence has been integrated with data from genomic functional annotations, trancriptomic experiments, protein-protein interactions and disease information already present in PATRIC. Such integration gives researchers access to a broad array of information about these individual genes, and also to a suite of tools to perform comparative genomic and transcriptomics analysis that are available at PATRIC.

Availability and implementation

All tools and data are freely available at PATRIC (http://patricbrc.org).

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-30 +31350858,Lipid remodeling regulator 1 (LRL1) is differently involved in the phosphorus-depletion response from PSR1 in Chlamydomonas reinhardtii.,"The elucidation of lipid metabolism in microalgae has attracted broad interest, as their storage lipid, triacylglycerol (TAG), can be readily converted into biofuel via transesterification. TAG accumulates in the form of oil droplets, especially when cells undergo nutrient deprivation, such as for nitrogen (N), phosphorus (P), or sulfur (S). TAG biosynthesis under N-deprivation has been comprehensively studied in the model microalga Chlamydomonas reinhardtii, during which TAG accumulates dramatically. However, the resulting rapid breakdown of chlorophyll restricts overall oil yield productivity and causes cessation of cell growth. In contrast, P-deprivation results in oil accumulation without disrupting chloroplast integrity. We used a reverse genetics approach based on co-expression analysis to identify a transcription factor (TF) that is upregulated under P-depleted conditions. Transcriptomic analysis revealed that the mutants showed repression of genes typically associated with lipid remodeling under P-depleted conditions, such as sulfoquinovosyl diacylglycerol 2 (SQD2), diacylglycerol acyltransferase (DGTT1), and major lipid droplet protein (MLDP). As accumulation of sulfoquinovosyl diacylglycerol and TAG were suppressed in P-depleted mutants, we designated the protein as lipid remodeling regulator 1 (LRL1). LRL1 mutants showed slower growth under P-depletion. Moreover, cell size in the mutant was significantly reduced, and TAG and starch accumulation per cell were decreased. Transcriptomic analysis also suggested the repression of several genes typically upregulated in adaptation to P-depletion that are associated with the cell cycle and P and lipid metabolism. Thus, our analysis of LRL1 provides insights into P-allocation and lipid remodeling under P-depleted conditions in C. reinhardtii. OPEN RESEARCH BADGES: This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The sequencing data were made publicly available under the BioProject Accession number PRJDB6733 and an accession number LC488724 at the DNA Data Bank of Japan (DDBJ). The data is available at https://trace.ddbj.nig.ac.jp/BPSearch/bioproject?acc=PRJDB6733; http://getentry.ddbj.nig.ac.jp/getentry/na/LC488724. The metabolome data were made publicly available and can be accessed at http://metabolonote.kazusa.or.jp/SE195:/; http://webs2.kazusa.or.jp/data/nur/.",2019-08-23 +29377907,ProtDataTherm: A database for thermostability analysis and engineering of proteins.,"Protein thermostability engineering is a powerful tool to improve resistance of proteins against high temperatures and thereafter broaden their applications. For efficient protein thermostability engineering, different thermostability-classified data sources including sequences and 3D structures are needed for different protein families. However, no data source is available providing such data easily. It is the first release of ProtDataTherm database for analysis and engineering of protein thermostability which contains more than 14 million protein sequences categorized based on their thermal stability and protein family. This database contains data needed for better understanding protein thermostability and stability engineering. Providing categorized protein sequences and structures as psychrophilic, mesophilic and thermophilic makes this database useful for the development of new tools in protein stability prediction. This database is available at http://profiles.bs.ipm.ir/softwares/protdatatherm. As a proof of concept, the thermostability that improves mutations were suggested for one sample protein belonging to one of protein families with more than 20 mesophilic and thermophilic sequences and with known experimentally measured ΔT of mutations available within ProTherm database.",2018-01-29 +32667808,Computational Strategies and Challenges for Using Native Ion Mobility Mass Spectrometry in Biophysics and Structural Biology.,"Native mass spectrometry (MS) allows the interrogation of structural aspects of macromolecules in the gas phase, under the premise of having initially maintained their solution-phase noncovalent interactions intact. In the more than 25 years since the first reports, the utility of native MS has become well established in the structural biology community. The experimental and technological advances during this time have been rapid, resulting in dramatic increases in sensitivity, mass range, resolution, and complexity of possible experiments. As experimental methods have improved, there have been accompanying developments in computational approaches for analyzing and exploiting the profusion of MS data in a structural and biophysical context. In this perspective, we consider the computational strategies currently being employed by the community, aspects of best practice, and the challenges that remain to be addressed. Our perspective is based on discussions within the European Cooperation in Science and Technology Action on Native Mass Spectrometry and Related Methods for Structural Biology (EU COST Action BM1403), which involved participants from across Europe and North America. It is intended not as an in-depth review but instead to provide an accessible introduction to and overview of the topic-to inform newcomers to the field and stimulate discussions in the community about addressing existing challenges. Our complementary perspective (http://dx.doi.org/10.1021/acs.analchem.9b05792) focuses on software tools available to help researchers tackle some of the challenges enumerated here.",2020-07-29 +25378335,MeT-DB: a database of transcriptome methylation in mammalian cells.,"Methyltranscriptome is an exciting new area that studies the mechanisms and functions of methylation in transcripts. The MethylTranscriptome DataBase (MeT-DB, http://compgenomics.utsa.edu/methylation/) is the first comprehensive resource for N6-methyladenosine (m(6)A) in mammalian transcriptome. It includes a database that records publicaly available data sets from methylated RNA immunoprecipitation sequencing (MeRIP-Seq), a recently developed technology for interrogating m(6)A methyltranscriptome. MeT-DB includes ∼ 300 k m(6)A methylation sites in 74 MeRIP-Seq samples from 22 different experimental conditions predicted by exomePeak and MACS2 algorithms. To explore this rich information, MeT-DB also provides a genome browser to query and visualize context-specific m(6)A methylation under different conditions. MeT-DB also includes the binding site data of microRNA, splicing factor and RNA binding proteins in the browser window for comparison with m(6)A sites and for exploring the potential functions of m(6)A. Analysis of differential m(6)A methylation and the related differential gene expression under two conditions is also available in the browser. A global perspective of the genome-wide distribution of m(6)A methylation in all the data is provided in circular ideograms, which also act as a navigation portal. The query results and the entire data set can be exported to assist publication and additional analysis.",2014-11-06 +30627605,Data for evolutive analysis of insulin related peptides in bilaterian species.,"In bilaterian species, the amino acid sequence conservation between Insulin related peptides is relatively low except for the cysteine residues involved in the disulphide bonds. In the A chain, the conserved cystein residues are included in a signature motif. Investigating the variations in this motif would give insight into the phylogenetic history of the family. The table presented in this paper contains a large set of insulin-related peptides in bilateral phylogenetic groups (deuterostomian, ecdysozoan, lophotrochozoan). NCBI databases in silico wide screening combined with bibliographic researches provided a framework for identifying and categorising the structural characteristics of these insulin related peptides. The dataset includes NCBI IDs of each sequence with hyperlinks to FASTA format. Moreover, the structural type (α, β or γ), the A chain motif, the total number of cysteins, the C peptide cleavage mode and the potential additional domains (D or E) are specified for each sequence. The data are associated with the research article ""Molecular evolution and functional characterisation of insulin-related peptides in molluscs: contributions of Crassostrea gigas genomic and transcriptomic-wide screening"" [1]. The table presented here can be found at http://dx.doi.org/10.17632/w4gr8zcpk5.4#file-21c0f6a5-a3e3-4a15-86e0-e5a696458866.",2018-12-18 +29461516,A database of chlorophyll a in Australian waters.,"Chlorophyll a is the most commonly used indicator of phytoplankton biomass in the marine environment. It is relatively simple and cost effective to measure when compared to phytoplankton abundance and is thus routinely included in many surveys. Here we collate 173, 333 records of chlorophyll a collected since 1965 from Australian waters gathered from researchers on regular coastal monitoring surveys and ocean voyages into a single repository. This dataset includes the chlorophyll a values as measured from samples analysed using spectrophotometry, fluorometry and high performance liquid chromatography (HPLC). The Australian Chlorophyll a database is freely available through the Australian Ocean Data Network portal (https://portal.aodn.org.au/). These data can be used in isolation as an index of phytoplankton biomass or in combination with other data to provide insight into water quality, ecosystem state, and relationships with other trophic levels such as zooplankton or fish.",2018-02-20 +33220709,Microbial function and genital inflammation in young South African women at high risk of HIV infection.,"

Background

Female genital tract (FGT) inflammation is an important risk factor for HIV acquisition. The FGT microbiome is closely associated with inflammatory profile; however, the relative importance of microbial activities has not been established. Since proteins are key elements representing actual microbial functions, this study utilized metaproteomics to evaluate the relationship between FGT microbial function and inflammation in 113 young and adolescent South African women at high risk of HIV infection. Women were grouped as having low, medium, or high FGT inflammation by K-means clustering according to pro-inflammatory cytokine concentrations.

Results

A total of 3186 microbial and human proteins were identified in lateral vaginal wall swabs using liquid chromatography-tandem mass spectrometry, while 94 microbial taxa were included in the taxonomic analysis. Both metaproteomics and 16S rRNA gene sequencing analyses showed increased non-optimal bacteria and decreased lactobacilli in women with FGT inflammatory profiles. However, differences in the predicted relative abundance of most bacteria were observed between 16S rRNA gene sequencing and metaproteomics analyses. Bacterial protein functional annotations (gene ontology) predicted inflammatory cytokine profiles more accurately than bacterial relative abundance determined by 16S rRNA gene sequence analysis, as well as functional predictions based on 16S rRNA gene sequence data (p < 0.0001). The majority of microbial biological processes were underrepresented in women with high inflammation compared to those with low inflammation, including a Lactobacillus-associated signature of reduced cell wall organization and peptidoglycan biosynthesis. This signature remained associated with high FGT inflammation in a subset of 74 women 9 weeks later, was upheld after adjusting for Lactobacillus relative abundance, and was associated with in vitro inflammatory cytokine responses to Lactobacillus isolates from the same women. Reduced cell wall organization and peptidoglycan biosynthesis were also associated with high FGT inflammation in an independent sample of ten women.

Conclusions

Both the presence of specific microbial taxa in the FGT and their properties and activities are critical determinants of FGT inflammation. Our findings support those of previous studies suggesting that peptidoglycan is directly immunosuppressive, and identify a possible avenue for biotherapeutic development to reduce inflammation in the FGT. To facilitate further investigations of microbial activities, we have developed the FGT-DB application that is available at http://fgtdb.org/ . Video Abstract.",2020-11-21 +26343929,A guide to genome-wide association analysis and post-analytic interrogation.,"This tutorial is a learning resource that outlines the basic process and provides specific software tools for implementing a complete genome-wide association analysis. Approaches to post-analytic visualization and interrogation of potentially novel findings are also presented. Applications are illustrated using the free and open-source R statistical computing and graphics software environment, Bioconductor software for bioinformatics and the UCSC Genome Browser. Complete genome-wide association data on 1401 individuals across 861,473 typed single nucleotide polymorphisms from the PennCATH study of coronary artery disease are used for illustration. All data and code, as well as additional instructional resources, are publicly available through the Open Resources in Statistical Genomics project: http://www.stat-gen.org.",2015-09-06 +32467650,Interpretable multimodal deep learning for real-time pan-tissue pan-disease pathology search on social media.,"Pathologists are responsible for rapidly providing a diagnosis on critical health issues. Challenging cases benefit from additional opinions of pathologist colleagues. In addition to on-site colleagues, there is an active worldwide community of pathologists on social media for complementary opinions. Such access to pathologists worldwide has the capacity to improve diagnostic accuracy and generate broader consensus on next steps in patient care. From Twitter we curate 13,626 images from 6,351 tweets from 25 pathologists from 13 countries. We supplement the Twitter data with 113,161 images from 1,074,484 PubMed articles. We develop machine learning and deep learning models to (i) accurately identify histopathology stains, (ii) discriminate between tissues, and (iii) differentiate disease states. Area Under Receiver Operating Characteristic (AUROC) is 0.805-0.996 for these tasks. We repurpose the disease classifier to search for similar disease states given an image and clinical covariates. We report precision@k = 1 = 0.7618 ± 0.0018 (chance 0.397 ± 0.004, mean ±stdev ). The classifiers find that texture and tissue are important clinico-visual features of disease. Deep features trained only on natural images (e.g., cats and dogs) substantially improved search performance, while pathology-specific deep features and cell nuclei features further improved search to a lesser extent. We implement a social media bot (@pathobot on Twitter) to use the trained classifiers to aid pathologists in obtaining real-time feedback on challenging cases. If a social media post containing pathology text and images mentions the bot, the bot generates quantitative predictions of disease state (normal/artifact/infection/injury/nontumor, preneoplastic/benign/low-grade-malignant-potential, or malignant) and lists similar cases across social media and PubMed. Our project has become a globally distributed expert system that facilitates pathological diagnosis and brings expertise to underserved regions or hospitals with less expertise in a particular disease. This is the first pan-tissue pan-disease (i.e., from infection to malignancy) method for prediction and search on social media, and the first pathology study prospectively tested in public on social media. We will share data through http://pathobotology.org . We expect our project to cultivate a more connected world of physicians and improve patient care worldwide.",2020-05-28 +33254138,Predicting nonroutine discharge in patients undergoing surgery for vertebral column tumors.,"

Objective

More than 8000 patients are treated annually for vertebral column tumors, of whom roughly two-thirds will be discharged to an inpatient facility (nonroutine discharge). Nonroutine discharge is associated with increased care costs as well as delays in discharge and poorer patient outcomes. In this study, the authors sought to develop a prediction model of nonroutine discharge in the population of vertebral column tumor patients.

Methods

Patients treated for primary or metastatic vertebral column tumors at a single comprehensive cancer center were identified for inclusion. Data were gathered regarding surgical procedure, patient demographics, insurance status, and medical comorbidities. Frailty was assessed using the modified 5-item Frailty Index (mFI-5) and medical complexity was assessed using the modified Charlson Comorbidity Index (mCCI). Multivariable logistic regression was used to identify independent predictors of nonroutine discharge, and multivariable linear regression was used to identify predictors of prolonged length of stay (LOS). The discharge model was internally validated using 1000 bootstrapped samples.

Results

The authors identified 350 patients (mean age 57.0 ± 13.6 years, 53.1% male, and 67.1% treated for metastatic vs primary disease). Significant predictors of prolonged LOS included higher mCCI score (β = 0.74; p = 0.026), higher serum absolute neutrophil count (β = 0.35; p = 0.001), lower hematocrit (β = -0.34; p = 0.001), use of a staged operation (β = 4.99; p < 0.001), occurrence of postoperative pulmonary embolism (β = 3.93; p = 0.004), and surgical site infection (β = 9.93; p < 0.001). Significant predictors of nonroutine discharge included emergency admission (OR 3.09; p = 0.001), higher mFI-5 score (OR 1.90; p = 0.001), lower serum albumin level (OR 0.43 per g/dL; p < 0.001), and operations with multiple stages (OR 4.10; p < 0.001). The resulting statistical model was deployed as a web-based calculator (https://jhuspine4.shinyapps.io/Nonroutine_Discharge_Tumor/).

Conclusions

The authors found that nonroutine discharge of patients with surgically treated vertebral column tumors was predicted by emergency admission, increased frailty, lower serum albumin level, and staged surgical procedures. The resulting web-based calculator tool may be useful clinically to aid in discharge planning for spinal oncology patients by preoperatively identifying patients likely to require placement in an inpatient facility postoperatively.",2020-11-20 +29416546,"GSHR, a Web-Based Platform Provides Gene Set-Level Analyses of Hormone Responses in Arabidopsis.","Phytohormones regulate diverse aspects of plant growth and environmental responses. Recent high-throughput technologies have promoted a more comprehensive profiling of genes regulated by different hormones. However, these omics data generally result in large gene lists that make it challenging to interpret the data and extract insights into biological significance. With the rapid accumulation of theses large-scale experiments, especially the transcriptomic data available in public databases, a means of using this information to explore the transcriptional networks is needed. Different platforms have different architectures and designs, and even similar studies using the same platform may obtain data with large variances because of the highly dynamic and flexible effects of plant hormones; this makes it difficult to make comparisons across different studies and platforms. Here, we present a web server providing gene set-level analyses of Arabidopsis thaliana hormone responses. GSHR collected 333 RNA-seq and 1,205 microarray datasets from the Gene Expression Omnibus, characterizing transcriptomic changes in Arabidopsis in response to phytohormones including abscisic acid, auxin, brassinosteroids, cytokinins, ethylene, gibberellins, jasmonic acid, salicylic acid, and strigolactones. These data were further processed and organized into 1,368 gene sets regulated by different hormones or hormone-related factors. By comparing input gene lists to these gene sets, GSHR helped to identify gene sets from the input gene list regulated by different phytohormones or related factors. Together, GSHR links prior information regarding transcriptomic changes induced by hormones and related factors to newly generated data and facilities cross-study and cross-platform comparisons; this helps facilitate the mining of biologically significant information from large-scale datasets. The GSHR is freely available at http://bioinfo.sibs.ac.cn/GSHR/.",2018-01-24 +29624889,PhyMet2 : a database and toolkit for phylogenetic and metabolic analyses of methanogens.,"The vast biodiversity of the microbial world and how little is known about it, has already been revealed by extensive metagenomics analyses. Our rudimentary knowledge of microbes stems from difficulties concerning their isolation and culture in laboratory conditions, which is necessary for describing their phenotype, among other things, for biotechnological purposes. An important component of the understudied ecosystems is methanogens, archaea producing a potent greenhouse-effect gas methane. Therefore, we created PhyMet2 , the first database that combines descriptions of methanogens and their culturing conditions with genetic information. The database contains a set of utilities that facilitate interactive data browsing, data comparison, phylogeny exploration and searching for sequence homologues. The most unique feature of the database is the web server MethanoGram, which can be used to significantly reduce the time and cost of searching for the optimal culturing conditions of methanogens by predicting them based on 16S RNA sequences. The database will aid many researchers in exploring the world of methanogens and their applications in biotechnological processes. PhyMet2 with the MethanoGram predictor is available at http://metanogen.biotech.uni.wroc.pl.",2018-06-01 +30793160,Anduril 2: upgraded large-scale data integration framework.,"

Summary

Anduril is an analysis and integration framework that facilitates the design, use, parallelization and reproducibility of bioinformatics workflows. Anduril has been upgraded to use Scala for pipeline construction, which simplifies software maintenance, and facilitates design of complex pipelines. Additionally, Anduril's bioinformatics repository has been expanded with multiple components, and tutorial pipelines, for next-generation sequencing data analysis.

Availabilityand implementation

Freely available at http://anduril.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +32582286,LITHOPHONE: Improving lncRNA Methylation Site Prediction Using an Ensemble Predictor.,"N 6-methyladenosine (m6A) is one of the most widely studied epigenetic modifications, which plays an important role in many biological processes, such as splicing, RNA localization, and degradation. Studies have shown that m6A on lncRNA has important functions, including regulating the expression and functions of lncRNA, regulating the synthesis of pre-mRNA, promoting the proliferation of cancer cells, and affecting cell differentiation and many others. Although a number of methods have been proposed to predict m6A RNA methylation sites, most of these methods aimed at general m6A sites prediction without noticing the uniqueness of the lncRNA methylation prediction problem. Since many lncRNAs do not have a polyA tail and cannot be captured in the polyA selection step of the most widely adopted RNA-seq library preparation protocol, lncRNA methylation sites cannot be effectively captured and are thus likely to be significantly underrepresented in existing experimental data affecting the accuracy of existing predictors. In this paper, we propose a new computational framework, LITHOPHONE, which stands for long noncoding RNA methylation sites prediction from sequence characteristics and genomic information with an ensemble predictor. We show that the methylation sites of lncRNA and mRNA have different patterns exhibited in the extracted features and should be differently handled when making predictions. Due to the used experiment protocols, the number of known lncRNA m6A sites is limited, and insufficient to train a reliable predictor; thus, the performance can be improved by combining both lncRNA and mRNA data using an ensemble predictor. We show that the newly developed LITHOPHONE approach achieved a reasonably good performance when tested on independent datasets (AUC: 0.966 and 0.835 under full transcript and mature mRNA modes, respectively), marking a substantial improvement compared with existing methods. Additionally, LITHOPHONE was applied to scan the entire human lncRNAome for all possible lncRNA m6A sites, and the results are freely accessible at: http://180.208.58.19/lith/.",2020-06-09 +32760766,Data on proteome of Mycoplasma hominis cultivated with arginine or thymidine as a carbon source.,"Mycoplasma hominis is an opportunistic bacterium that can cause acute and chronic infections of the urogenital tract. This bacterium, like all other Mycoplasma species, is characterized by the reduced genome size, and, consequently, reduction of the main metabolic pathways. M. hominis cells cannot effectively use glucose as a carbon and energy source. Therefore, the main pathway of energy metabolism is the arginine dihydrolase pathway. However, several bacteria can use nucleosides as the sole energy source. Biochemical studies using Salmonella typhimurium have shown that three enzymes (thymidine phosphorylase, phosphopentose mutase and deoxyribose-phosphate aldolase) are involved in the thymidine catabolic pathway. All these enzymes are present in M. hominis. For understanding changes in the energy metabolism of M. hominis we performed shotgun proteome analysis of M. hominis cells in liquid medium with arginine or thymidine as a carbon source. LC-MS analysis was performed with an Ultimate 3000 Nano LC System (Thermo Fisher Scientific) coupled to a Q Exactive HF benchtop Orbitrap mass spectrometer (Thermo Fisher Scientific) via a nanoelectrospray source (Thermo Fisher Scientific). Data are available via ProteomeXchange with identifier PXD018714 (https://www.ebi.ac.uk/pride/archive/projects/PXD018714).",2020-07-17 +33296068,Therapeutic effects of Chlorella vulgaris on carbon tetrachloride induced liver fibrosis by targeting Hippo signaling pathway and AMPK/FOXO1 axis.,"This study was conducted to present the mechanism of the therapeutic effects of Chlorella vulgaris extract (CV) on the carbon tetrachloride (CCl4) induced liver fibrosis model. Primarily, the mechanism of antioxidant effects of CV were investigated via measuring the expression of forkhead box protein O1 (FOXO1) and phosphorylated 5' adenosine monophosphate-activated protein kinase (p-AMPK) as upstream regulators of superoxide dismutase (SOD) and catalase (CAT). Subsequently, we investigated the regulatory effect of CV treatment on the yes-associated protein (YAP) and transcriptional coactivators with a PDZ-binding motif (TAZ) as fibrogenic factors. Male Wistar rats received CCl4 and olive oil solution 1 ml/kg intraperitoneally for 12 weeks, twice weekly. CV 50 and 100 mg/kg were administered on a daily basis by gavage in the last 4 weeks. Ultimately, liver marker enzymes and hepatic hydroxyproline content were measured. The activity of SOD and CAT and the expression of YAP, TAZ, FOXO1, SOD, and CAT were analyzed. Finally, the protein levels of YAP, TAZ, and p-AMPK were detected. CV administration decreased liver marker enzymes and hydroxyproline content significantly. The expression and protein levels of YAP and TAZ reduced by CV treatment. Furthermore, the augmentation of expression and function of CAT and SOD by CV treatment was followed by an increase in the expression of FOXO1 and protein level of p-AMPK. Our data revealed that the stimulation of expression and function of SOD and CAT by CV treatment could be mediated by FOXO1/p-AMPK axis. Moreover, anti-fibrotic effect of CV might be associated with its inhibitory effect on the hepatic expression of YAP and TAZ. Chlorella vulgaris treatment ameliorates liver fibrosis via two cellular mechanisms. A) Likely, Chlorella vulgaris treatment increases gene expression of enzymatic antioxidants superoxide dismutase (SOD) and catalase (CAT) via upregulating its upstream regulatory elements i.e. phosphorylated 5' adenosine monophosphate-activated protein kinase (p-AMPK) and forkhead box protein O1 (FOXO1). These possible regulatory effects maybe lead to reduce reactive oxygen species level (ROS). B) Chlorella vulgaris treatment decreases hepatic protein level and gene expression of key elements of Hippo signaling pathway i.e. Yes-associated protein (YAP) and Transcriptional coactivators with a PDZ-binding motif (TAZ). Figure created with BioRender ( https://biorender.com ). ROS: Reactive oxygen species, YAP: Yes-associated protein, TAZ: Transcriptional coactivators with a PDZ-binding motif, FOXO1: Fork head Box O1, AMPK: 5' adenosine monophosphate activated protein kinase, SOD: Superoxide dismutase, CAT: Catalase, P: Phosphate group.",2020-12-09 +27174935,tRNAscan-SE On-line: integrating search and context for analysis of transfer RNA genes.,"High-throughput genome sequencing continues to grow the need for rapid, accurate genome annotation and tRNA genes constitute the largest family of essential, ever-present non-coding RNA genes. Newly developed tRNAscan-SE 2.0 has advanced the state-of-the-art methodology in tRNA gene detection and functional prediction, captured by rich new content of the companion Genomic tRNA Database. Previously, web-server tRNA detection was isolated from knowledge of existing tRNAs and their annotation. In this update of the tRNAscan-SE On-line resource, we tie together improvements in tRNA classification with greatly enhanced biological context via dynamically generated links between web server search results, the most relevant genes in the GtRNAdb and interactive, rich genome context provided by UCSC genome browsers. The tRNAscan-SE On-line web server can be accessed at http://trna.ucsc.edu/tRNAscan-SE/.",2016-05-12 +24829452,TogoTable: cross-database annotation system using the Resource Description Framework (RDF) data model.,"TogoTable (http://togotable.dbcls.jp/) is a web tool that adds user-specified annotations to a table that a user uploads. Annotations are drawn from several biological databases that use the Resource Description Framework (RDF) data model. TogoTable uses database identifiers (IDs) in the table as a query key for searching. RDF data, which form a network called Linked Open Data (LOD), can be searched from SPARQL endpoints using a SPARQL query language. Because TogoTable uses RDF, it can integrate annotations from not only the reference database to which the IDs originally belong, but also externally linked databases via the LOD network. For example, annotations in the Protein Data Bank can be retrieved using GeneID through links provided by the UniProt RDF. Because RDF has been standardized by the World Wide Web Consortium, any database with annotations based on the RDF data model can be easily incorporated into this tool. We believe that TogoTable is a valuable Web tool, particularly for experimental biologists who need to process huge amounts of data such as high-throughput experimental output.",2014-05-14 +31750874,Easy-HLA: a validated web application suite to reveal the full details of HLA typing.,"

Motivation

The HLA system plays a pivotal role in both clinical applications and immunology research. Typing HLA genes in patient and donor is indeed required in hematopoietic stem cell and solid-organ transplantation, and the histocompatibility complex region exhibits countless genetic associations with immune-related pathologies. Since the discovery of HLA antigens, the HLA system nomenclature and typing methods have constantly evolved, which leads to difficulties in using data generated with older methodologies.

Results

Here, we present Easy-HLA, a web-based software suite designed to facilitate analysis and gain knowledge from HLA typing, regardless of nomenclature or typing method. Easy-HLA implements a computational and statistical method of HLA haplotypes inference based on published reference populations containing over 600 000 haplotypes to upgrade missing or partial HLA information: 'HLA-Upgrade' tool infers high-resolution HLA typing and 'HLA-2-Haplo' imputes haplotype pairs and provides additional functional annotations (e.g. amino acids and KIR ligands). We validated both tools using two independent cohorts (total n = 2500). For HLA-Upgrade, we reached a prediction accuracy of 92% from low- to high-resolution of European genotypes. We observed a 96% call rate and 76% accuracy with HLA-2-Haplo European haplotype pairs prediction. In conclusion, Easy-HLA tools facilitate large-scale immunogenetic analysis and promotes the multi-faceted HLA expertise beyond allelic associations by providing new functional immunogenomics parameters.

Availability and implementation

Easy-HLA is a web application freely available (free account) at: https://hla.univ-nantes.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +32936717,Electrophysiological Evidence of Early Cortical Sensitivity to Human Conspecific Mimic Voice as a Distinct Category of Natural Sound.,"Purpose From an anthropological perspective of hominin communication, the human auditory system likely evolved to enable special sensitivity to sounds produced by the vocal tracts of human conspecifics whether attended or passively heard. While numerous electrophysiological studies have used stereotypical human-produced verbal (speech voice and singing voice) and nonverbal vocalizations to identify human voice-sensitive responses, controversy remains as to when (and where) processing of acoustic signal attributes characteristic of ""human voiceness"" per se initiate in the brain. Method To explore this, we used animal vocalizations and human-mimicked versions of those calls (""mimic voice"") to examine late auditory evoked potential responses in humans. Results Here, we revealed an N1b component (96-120 ms poststimulus) during a nonattending listening condition showing significantly greater magnitude in response to mimics, beginning as early as primary auditory cortices, preceding the time window reported in previous studies that revealed species-specific vocalization processing initiating in the range of 147-219 ms. During a sound discrimination task, a P600 (500-700 ms poststimulus) component showed specificity for accurate discrimination of human mimic voice. Distinct acoustic signal attributes and features of the stimuli were used in a classifier model, which could distinguish most human from animal voice comparably to behavioral data-though none of these single features could adequately distinguish human voiceness. Conclusions These results provide novel ideas for algorithms used in neuromimetic hearing aids, as well as direct electrophysiological support for a neurocognitive model of natural sound processing that informs both neurodevelopmental and anthropological models regarding the establishment of auditory communication systems in humans. Supplemental Material https://doi.org/10.23641/asha.12903839.",2020-09-16 +31860715,Convolutional neural network-based annotation of bacterial type IV secretion system effectors with enhanced accuracy and reduced false discovery.,"The type IV bacterial secretion system (SS) is reported to be one of the most ubiquitous SSs in nature and can induce serious conditions by secreting type IV SS effectors (T4SEs) into the host cells. Recent studies mainly focus on annotating new T4SE from the huge amount of sequencing data, and various computational tools are therefore developed to accelerate T4SE annotation. However, these tools are reported as heavily dependent on the selected methods and their annotation performance need to be further enhanced. Herein, a convolution neural network (CNN) technique was used to annotate T4SEs by integrating multiple protein encoding strategies. First, the annotation accuracies of nine encoding strategies integrated with CNN were assessed and compared with that of the popular T4SE annotation tools based on independent benchmark. Second, false discovery rates of various models were systematically evaluated by (1) scanning the genome of Legionella pneumophila subsp. ATCC 33152 and (2) predicting the real-world non-T4SEs validated using published experiments. Based on the above analyses, the encoding strategies, (a) position-specific scoring matrix (PSSM), (b) protein secondary structure & solvent accessibility (PSSSA) and (c) one-hot encoding scheme (Onehot), were identified as well-performing when integrated with CNN. Finally, a novel strategy that collectively considers the three well-performing models (CNN-PSSM, CNN-PSSSA and CNN-Onehot) was proposed, and a new tool (CNN-T4SE, https://idrblab.org/cnnt4se/) was constructed to facilitate T4SE annotation. All in all, this study conducted a comprehensive analysis on the performance of a collection of encoding strategies when integrated with CNN, which could facilitate the suppression of T4SS in infection and limit the spread of antimicrobial resistance.",2020-09-01 +32492120,Prediction of mortality rate in acute type A dissection: the German Registry for Acute Type A Aortic Dissection score.,"

Objectives

The goal was to develop a scoring system to predict the 30-day mortality rate for patients undergoing surgery for acute type A aortic dissection on the basis of the German Registry for Acute Type A Aortic Dissection (GERAADA) data set and to provide a Web-based application for standard use.

Methods

A total of 2537 patients enrolled in GERAADA who underwent surgery between 2006 and 2015 were analysed. Variable selection was performed using the R-package FAMoS. The robustness of the results was confirmed via the bootstrap procedure. The coefficients of the final model were used to calculate the risk score in a Web-based application.

Results

Age [odds ratio (OR) 1.018, 95% confidence interval (CI) 1.009-1.026; P < 0.001; 5-year OR: 1.093], need for catecholamines at referral (OR 1.732, 95% CI 1.340-2.232; P < 0.001), preoperative resuscitation (OR 3.051, 95% CI 2.099-4.441; P < 0.001), need for intubation before surgery (OR 1.949, 95% CI 1.465-2.585; P < 0.001), preoperative hemiparesis (OR 1.442, 95% CI 0.996-2.065; P = 0.049), coronary malperfusion (OR 1.870, 95% CI 1.386-2.509; P < 0.001), visceral malperfusion (OR 1.748, 95% CI 1.198-2.530; P = 0.003), dissection extension to the descending aorta (OR 1.443, 95% CI 1.120-1.864; P = 0.005) and previous cardiac surgery (OR 1.772, 95% CI 1.048-2.903; P = 0.027) were independent predictors of the 30-day mortality rate. The Web application based on the final model can be found at https://www.dgthg.de/de/GERAADA_Score.

Conclusions

The GERAADA score is a simple, effective tool to predict the 30-day mortality rate for patients undergoing surgery for acute type A aortic dissection. We recommend the widespread use of this Web-based application for standard use.",2020-10-01 +30657866,DIABLO: an integrative approach for identifying key molecular drivers from multi-omics assays.,"

Motivation

In the continuously expanding omics era, novel computational and statistical strategies are needed for data integration and identification of biomarkers and molecular signatures. We present Data Integration Analysis for Biomarker discovery using Latent cOmponents (DIABLO), a multi-omics integrative method that seeks for common information across different data types through the selection of a subset of molecular features, while discriminating between multiple phenotypic groups.

Results

Using simulations and benchmark multi-omics studies, we show that DIABLO identifies features with superior biological relevance compared with existing unsupervised integrative methods, while achieving predictive performance comparable to state-of-the-art supervised approaches. DIABLO is versatile, allowing for modular-based analyses and cross-over study designs. In two case studies, DIABLO identified both known and novel multi-omics biomarkers consisting of mRNAs, miRNAs, CpGs, proteins and metabolites.

Availability and implementation

DIABLO is implemented in the mixOmics R Bioconductor package with functions for parameters' choice and visualization to assist in the interpretation of the integrative analyses, along with tutorials on http://mixomics.org and in our Bioconductor vignette.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +30066633,NetControl4BioMed: a pipeline for biomedical data acquisition and analysis of network controllability.,"

Background

Network controllability focuses on discovering combinations of external interventions that can drive a biological system to a desired configuration. In practice, this approach translates into finding a combined multi-drug therapy in order to induce a desired response from a cell; this can lead to developments of novel therapeutic approaches for systemic diseases like cancer.

Result

We develop a novel bioinformatics data analysis pipeline called NetControl4BioMed based on the concept of target structural control of linear networks. Our pipeline generates novel molecular interaction networks by combining pathway data from various public databases starting from the user's query. The pipeline then identifies a set of nodes that is enough to control a given, user-defined set of disease-specific essential proteins in the network, i.e., it is able to induce a change in their configuration from any initial state to any final state. We provide both the source code of the pipeline as well as an online web-service based on this pipeline http://combio.abo.fi/nc/net_control/remote_call.php .

Conclusion

The pipeline can be used by researchers for controlling and better understanding of molecular interaction networks through combinatorial multi-drug therapies, for more efficient therapeutic approaches and personalised medicine.",2018-07-09 +30865265,Multiresolution correction of GC bias and application to identification of copy number alterations.,"

Motivation

Whole-genome sequencing (WGS) data are affected by various sequencing biases such as GC bias and mappability bias. These biases degrade performance on detection of genetic variations such as copy number alterations. The existing methods use a relation between the GC proportion and depth of coverage (DOC) of markers by means of regression models. Nonetheless, severity of the GC bias varies from sample to sample. We developed a new method for correction of GC bias on the basis of multiresolution analysis. We used a translation-invariant wavelet transform to decompose biased raw signals into high- and low-frequency coefficients. Then, we modeled the relation between GC proportion and DOC of the genomic regions and constructed new control DOC signals that reflect the GC bias. The control DOC signals are used for normalizing genomic sequences by correcting the GC bias.

Results

When we applied our method to simulated sequencing data with various degrees of GC bias, our method showed more robust performance on correcting the GC bias than the other methods did. We also applied our method to real-world cancer sequencing datasets and successfully identified cancer-related focal alterations even when cancer genomes were not normalized to normal control samples. In conclusion, our method can be employed for WGS data with different degrees of GC bias.

Availability and implementation

The code is available at http://gcancer.org/wabico.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +32001758,miRgo: integrating various off-the-shelf tools for identification of microRNA-target interactions by heterogeneous features and a novel evaluation indicator.,"MicroRNAs (miRNAs) are short non-coding RNAs that regulate gene expression and biological processes through binding to messenger RNAs. Predicting the relationship between miRNAs and their targets is crucial for research and clinical applications. Many tools have been developed to predict miRNA-target interactions, but variable results among the different prediction tools have caused confusion for users. To solve this problem, we developed miRgo, an application that integrates many of these tools. To train the prediction model, extreme values and median values from four different data combinations, which were obtained via an energy distribution function, were used to find the most representative dataset. Support vector machines were used to integrate 11 prediction tools, and numerous feature types used in these tools were classified into six categories-binding energy, scoring function, evolution evidence, binding type, sequence property, and structure-to simplify feature selection. In addition, a novel evaluation indicator, the Chu-Hsieh-Liang (CHL) index, was developed to improve the prediction power in positive data for feature selection. miRgo achieved better results than all other prediction tools in evaluation by an independent testing set and by its subset of functionally important genes. The tool is available at http://predictor.nchu.edu.tw/miRgo.",2020-01-30 +32559081,Developing a Fully Glycosylated Full-Length SARS-CoV-2 Spike Protein Model in a Viral Membrane.,"This technical study describes all-atom modeling and simulation of a fully glycosylated full-length SARS-CoV-2 spike (S) protein in a viral membrane. First, starting from PDB: 6VSB and 6VXX, full-length S protein structures were modeled using template-based modeling, de-novo protein structure prediction, and loop modeling techniques in GALAXY modeling suite. Then, using the recently determined most occupied glycoforms, 22 N-glycans and 1 O-glycan of each monomer were modeled using Glycan Reader & Modeler in CHARMM-GUI. These fully glycosylated full-length S protein model structures were assessed and further refined against the low-resolution data in their respective experimental maps using ISOLDE. We then used CHARMM-GUI Membrane Builder to place the S proteins in a viral membrane and performed all-atom molecular dynamics simulations. All structures are available in CHARMM-GUI COVID-19 Archive (http://www.charmm-gui.org/docs/archive/covid19) so that researchers can use these models to carry out innovative and novel modeling and simulation research for the prevention and treatment of COVID-19.",2020-07-06 +30511116,FreeSurfer 5.3 versus 6.0: are volumes comparable? A Chronic Effects of Neurotrauma Consortium study.,"Automated neuroimaging methods like FreeSurfer ( https://surfer.nmr.mgh.harvard.edu/ ) have revolutionized quantitative neuroimaging analyses. Such analyses provide a variety of metrics used for image quantification, including magnetic resonance imaging (MRI) volumetrics. With the release of FreeSurfer version 6.0, it is important to assess its comparability to the widely-used previous version 5.3. The current study used data from the initial 249 participants in the ongoing Chronic Effects of Neurotrauma Consortium (CENC) multicenter observational study to compare the volumetric output of versions 5.3 and 6.0 across various regions of interest (ROI). In the current investigation, the following ROIs were examined: total intracranial volume, total white matter volume, total ventricular volume, total gray matter volume, and right and left volumes for the thalamus, pallidum, putamen, caudate, amygdala and hippocampus. Absolute ROI volumes derived from FreeSurfer 6.0 differed significantly from those obtained using version 5.3. We also employed a clinically-based evaluation strategy to compare both versions in their prediction of age-mediated volume reductions (or ventricular increase) in the aforementioned structures. Statistical comparison involved both general linear modeling (GLM) and random forest (RF) methods, where cross-validation error was significantly higher using segmentations from FreeSurfer version 5.3 versus version 6.0 (GLM: t = 4.97, df = 99, p value = 2.706e-06; RF: t = 4.85, df = 99, p value = 4.424e-06). Additionally, the relative importance of ROIs used to predict age using RFs differed between FreeSurfer versions, indicating substantial differences in the two versions. However, from the perspective of correlational analyses, fitted regression lines and their slopes were similar between the two versions, regardless of version used. While absolute volumes are not interchangeable between version 5.3 and 6.0, ROI correlational analyses appear to yield similar results, suggesting the interchangeability of ROI volume for correlational studies.",2020-10-01 +31406900,Annotation data about multi criteria assessment methods used in the agri-food research: The french national institute for agricultural research (INRA) experience.,"This data article contains annotation data characterizing Multi Criteria Assessment (MCA) Methods proposed in the agri-food sector by researchers from INRA, Europe's largest agricultural research institute (INRA, http://institut.inra.fr/en). MCA can be used to assess and compare agricultural and food systems, and support multi-actor decision making and design of innovative systems for crop production, animal production and processing of agricultural products. These data are stored in a public repository managed by INRA (https://data.inra.fr/; https://doi.org/10.15454/WB51LL).",2019-07-22 +26481731,De novo transcriptome analysis of Medicago falcata reveals novel insights about the mechanisms underlying abiotic stress-responsive pathway.,"

Background

The entire world is facing a deteriorating environment. Understanding the mechanisms underlying plant responses to external abiotic stresses is important for breeding stress-tolerant crops and herbages. Phytohormones play critical regulatory roles in plants in the response to external and internal cues to regulate growth and development. Medicago falcata is one of the stress-tolerant candidate leguminous species and is able to fix atmospheric nitrogen. This ability allows leguminous plants to grow in nitrogen deficient soils.

Methods

We performed Illumina sequencing of cDNA prepared from abiotic stress treated M. falcata. Sequencedreads were assembled to provide a transcriptome resource. Transcripts were annotated using BLASTsearches against the NCBI non-redundant database and gene ontology definitions were assigned. Acomparison among the three abiotic stress treated samples was carried out. The expression of transcriptswas confirmed with qRT-PCR.

Results

We present an abiotic stress-responsive M. falcata transcriptome using next-generation sequencing data from samples grown under standard, dehydration, high salinity, and cold conditions. We combined reads from all samples and de novo assembled 98,515 transcripts to build the M. falcata gene index. A comprehensive analysis of the transcriptome revealed abiotic stress-responsive mechanisms underlying the metabolism and core signalling components of major phytohormones. We identified nod factor signalling pathways during early symbiotic nodulation that are modified by abiotic stresses. Additionally, a global comparison of homology between the M. falcata and M. truncatula transcriptomes, along with five other leguminous species, revealed a high level of global sequence conservation within the family.

Conclusions

M. falcata is shown to be a model candidate for studying abiotic stress-responsive mechanisms in legumes. This global gene expression analysis provides new insights into the biochemical and molecular mechanisms involved in the acclimation to abiotic stresses. Our data provides many gene candidates that might be used for herbage and crop breeding. Additionally, FalcataBase ( http://bioinformatics.cau.edu.cn/falcata/ ) was built for storing these data.",2015-10-19 +27924021,AAgAtlas 1.0: a human autoantigen database.,"Autoantibodies refer to antibodies that target self-antigens, which can play pivotal roles in maintaining homeostasis, distinguishing normal from tumor tissue and trigger autoimmune diseases. In the last three decades, tremendous efforts have been devoted to elucidate the generation, evolution and functions of autoantibodies, as well as their target autoantigens. However, reports of these countless previously identified autoantigens are randomly dispersed in the literature. Here, we constructed an AAgAtlas database 1.0 using text-mining and manual curation. We extracted 45 830 autoantigen-related abstracts and 94 313 sentences from PubMed using the keywords of either 'autoantigen' or 'autoantibody' or their lexical variants, which were further refined to 25 520 abstracts, 43 253 sentences and 3984 candidates by our bio-entity recognizer based on the Protein Ontology. Finally, we identified 1126 genes as human autoantigens and 1071 related human diseases, with which we constructed a human autoantigen database (AAgAtlas database 1.0). The database provides a user-friendly interface to conveniently browse, retrieve and download human autoantigens as well as their associated diseases. The database is freely accessible at http://biokb.ncpsb.org/aagatlas/ We believe this database will be a valuable resource to track and understand human autoantigens as well as to investigate their functions in basic and translational research.",2016-10-19 +30456272,A proteomic dataset of secreted proteins by three Staphylococcus saprophyticus strains.,"This article presents a proteomic dataset generated from a comparative analysis of the exoproteome of Staphylococcus saprophyticus, ATCC 15305, 7108 and 9325 strains. The extract of secreted proteins were obtained after incubation of stationary phase cells in BHI medium. All samples were submitted to nano-ESI-UPLC-MSE, and the spectrum obtained was processed and analyzed by ProteinLynx Global Server (PLGS), Uniprot and Pedant databases, for identification, annotation and functional classification of proteins. Fold changes and protein relative abundances were properly reported. This report is related to the research article entitled ""The exoproteome profiles of three Staphylococcus saprophyticus strains reveal diversity in protein secretion contents"" (Oliveira et al., 2018). The proteomic data generated have been deposited to the ProteomeXchange Consortium, via the PRIDE partner repository, with a project number PXD008643, https://www.ebi.ac.uk/pride/archive/projects/PXD008643.",2018-10-27 +27213017,"SePIA: RNA and small RNA sequence processing, integration, and analysis.","

Background

Large-scale sequencing experiments are complex and require a wide spectrum of computational tools to extract and interpret relevant biological information. This is especially true in projects where individual processing and integrated analysis of both small RNA and complementary RNA data is needed. Such studies would benefit from a computational workflow that is easy to implement and standardizes the processing and analysis of both sequenced data types.

Results

We developed SePIA (Sequence Processing, Integration, and Analysis), a comprehensive small RNA and RNA workflow. It provides ready execution for over 20 commonly known RNA-seq tools on top of an established workflow engine and provides dynamic pipeline architecture to manage, individually analyze, and integrate both small RNA and RNA data. Implementation with Docker makes SePIA portable and easy to run. We demonstrate the workflow's extensive utility with two case studies involving three breast cancer datasets. SePIA is straightforward to configure and organizes results into a perusable HTML report. Furthermore, the underlying pipeline engine supports computational resource management for optimal performance.

Conclusion

SePIA is an open-source workflow introducing standardized processing and analysis of RNA and small RNA data. SePIA's modular design enables robust customization to a given experiment while maintaining overall workflow structure. It is available at http://anduril.org/sepia.",2016-05-20 +31173064,RAISS: robust and accurate imputation from summary statistics.,"

Motivation

Multi-trait analyses using public summary statistics from genome-wide association studies (GWASs) are becoming increasingly popular. A constraint of multi-trait methods is that they require complete summary data for all traits. Although methods for the imputation of summary statistics exist, they lack precision for genetic variants with small effect size. This is benign for univariate analyses where only variants with large effect size are selected a posteriori. However, it can lead to strong p-value inflation in multi-trait testing. Here we present a new approach that improve the existing imputation methods and reach a precision suitable for multi-trait analyses.

Results

We fine-tuned parameters to obtain a very high accuracy imputation from summary statistics. We demonstrate this accuracy for variants of all effect sizes on real data of 28 GWAS. We implemented the resulting methodology in a python package specially designed to efficiently impute multiple GWAS in parallel.

Availability and implementation

The python package is available at: https://gitlab.pasteur.fr/statistical-genetics/raiss, its accompanying documentation is accessible here http://statistical-genetics.pages.pasteur.fr/raiss/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +31372377,Datasets of microarray analysis to identify Gpr137b-dependent interleukin-4-responsive genes in the mouse macrophage cell line RAW264.,"Macrophages are classified mainly into two subtypes, M1 and M2, which exhibit distinct phenotypes, based on their microenvironment. We have recently demonstrated that Gpr137b is abundantly expressed in RAW264 macrophages, ""Gpr137b is an orphan G-protein-coupled receptor associated with M2 macrophage polarization"" (Islam et al., in press) [1]. Although recent studies have suggested that G-protein-coupled receptors (GPCRs) are associated with M1/M2 macrophage polarization (""G-protein-coupled bile acid receptor 1 (GPBAR1, TGR5) agonists reduce the production of proinflammatory cytokines and stabilize the alternative macrophage phenotype"" (Hogenauer et al., 2014) [2], ""Leukotriene B4 promotes neovascularization and macrophage recruitment in murine wet-type AMD models"" (Sasaki et al., 2018) [3]), available information about GPCR-mediated macrophage polarization is still limited. This prompted us to generate Gpr137b-knockout (KO) RAW264 clones using the CRISPR/Cas9 genome editing system to elucidate the function of Gpr137b in interleukin (IL)-4-induced M2 macrophage polarization (Islam et al., in press) [1]. Here we present the datasets of a microarray analysis to identify Gpr137b-dependent IL-4-responsive genes in RAW264 cells. The raw microarray data are available in the Gene Expression Omnibus database (https://www.ncbi.nlm.nih.gov/geo/) under the accession number GSE117578, https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE117578.",2019-01-21 +27720627,[Challenges in renal transplantation].,"

Objectives

To describe kidney transplantation surgical techniques and to propose strategies in high-risk recipients.

Material and methods

Relevant publications were identified through Medline (http://www.ncbi.nlm.nih.gov/) and Embase (http://www.embase.com/) database using the following keywords, alone or in association, ""renal transplantation; peripheral arterial disease; obesity; third and fourth transplantation; robotic-assisted kidney transplant; anticoagulant therapy; dual kidney transplant"". Articles were selected according to methods, language of publication and relevance. The reference lists were used to identify additional historical studies of interest. Both prospective and retrospective series, in French and English, as well as review articles and case-reports were selected. A total of 1949 articles were analyzed for arterial disease and anticoagulant therapy, 1083 for obesity, 663 for dual kidney transplants, 458 for third and subsequent procedures and 84 for robotic-assisted kidney transplantation. After careful selection, 304 publications were eligible for our review.

Results

Surgical assessment of future recipients is a pivotal step to anticipate technical difficulties, to interrupt clopidogrel or direct oral anticoagulants and to propose a revascularization procedure when necessary. Lack of data regarding obese recipients does not allow us to conclude about best surgical care or optimal timing but suggest that an early global management of obesity in chronic kidney disease patients is mandatory to improve access to a successful transplantation. In neurologic bladder and congenital anomalies, urodynamics and bladder function must be assessed prior to the onset of oliguria to intend an early treatment. Urinary diversion may be performed prior to or after transplantation with similar survival outcome and comparable rates of infections. Because of a rigorous selection of donors, the French dual kidney transplant program provides satisfactory outcomes, but fails in convincing surgical teams nationwide. Third and subsequent transplant procedures remain a surgical and immunological challenge, with an increased morbidity and a moderate decline in transplant survival only when donors are extended criteria' with extensive duration of waiting time between procedures. Robotic-assisted kidney transplantation is a recent technique requiring methodical evaluation.

Conclusion

Kidney transplantation in challenging recipients implies a global understanding of patients' prognosis and benefits versus dialysis, in the context of the attribution of a valuable resource awaited by other patients on waiting list.",2016-10-06 +30057343,MCENet: A database for maize conditional co-expression network and network characterization collaborated with multi-dimensional omics levels.,"Maize (Zea mays) is the most widely grown grain crop in the world, playing important roles in agriculture and industry. However, the functions of maize genes remain largely unknown. High-quality genome-wide transcriptome datasets provide important biological knowledge which has been widely and successfully used in plants not only by measuring gene expression levels but also by enabling co-expression analysis for predicting gene functions and modules related to agronomic traits. Recently, thousands of maize transcriptomic data are available across different inbred lines, development stages, tissues, and treatments, or even across different tissue sections and cell lines. Here, we integrated 701 transcriptomic and 108 epigenomic data and studied the different conditional networks with multi-dimensional omics levels. We constructed a searchable, integrative, one-stop online platform, the maize conditional co-expression network (MCENet) platform. MCENet provides 10 global/conditional co-expression networks, 5 network accessional analysis toolkits (i.e., Network Search, Network Remodel, Module Finder, Network Comparison, and Dynamic Expression View) and multiple network functional support toolkits (e.g., motif and module enrichment analysis). We hope that our database might help plant research communities to identify maize functional genes or modules that regulate important agronomic traits. MCENet is publicly accessible at http://bioinformatics.cau.edu.cn/MCENet/.",2018-07-18 +30603810,"FLA, which encodes a homolog of UBP, is required for chlorophyll accumulation and development of lemma and palea in rice.","

Key message

FLA, the homolog of ubiquitin-specific protease does not have deubiquitination activity, but it is essential for flower and chloroplast development in rice. Ubiquitin-specific proteases (UBPs) are widely distributed and highly conserved proteins and are also members of the most important family of deubiquitination enzymes. Although the functions and phylogenies of UBPs from yeast, mammals and Arabidopsis have been widely reported, the functions and evolutionary relationships of UBPs in rice remain unclear. In this study, we characterized the rice flower and leaf color aberrant mutant (fla), which exhibited a variety of developmental defects, including abnormal floral organs and pollen development, and leaf bleaching. We isolated FLA by positional cloning and found that it encodes a homolog of ubiquitin-specific protease. FLA is a ubiquitously expressed gene with the highest expression in floral organs. Subcellular localization analysis indicated that FLA is a cell membrane protein. Through searches of the rice genome database ( http://rice.plantbiology.msu.edu ), we identified 35 UBP family members in the rice genome. These proteins were grouped into 16 subfamilies based on phylogenetic analysis, and FLA was found to belong to the G8 subfamily. In vitro activity assays revealed that FLA does not have deubiquitination activity. Our data suggest that FLA plays an important role in the development of floral organs and chloroplast in rice, but that this role probably does not involve deubiquitination activity, because FLA does not have an active site and deubiquitination activity.",2019-01-02 +32161720,CaMeRe: A Novel Tool for Inference of Cancer Metabolic Reprogramming.,"Metabolic reprogramming is prevalent in cancer, largely due to its altered chemical environments such as the distinct intracellular concentrations of O2, H2O2 and H+, compared to those in normal tissue cells. The reprogrammed metabolisms are believed to play essential roles in cancer formation and progression. However, it is highly challenging to elucidate how individual normal metabolisms are altered in a cancer-promoting environment; hence for many metabolisms, our knowledge about how they are changed is limited. We present a novel method, CaMeRe (CAncer MEtabolic REprogramming), for identifying metabolic pathways in cancer tissues. Based on the specified starting and ending compounds, along with gene expression data of given cancer tissue samples, CaMeRe identifies metabolic pathways connecting the two compounds via collection of compatible enzymes, which are most consistent with the provided gene-expression data. In addition, cancer-specific knowledge, such as the expression level of bottleneck enzymes in the pathways, is incorporated into the search process, to enable accurate inference of cancer-specific metabolic pathways. We have applied this tool to predict the altered sugar-energy metabolism in cancer, referred to as the Warburg effect, and found the prediction result is highly accurate by checking the appearance and ranking of those key pathways in the results of CaMeRe. Computational evaluation indicates that the tool is fast and capable of handling large metabolic network inference in cancer tissues. Hence, we believe that CaMeRe offers a powerful tool to cancer researchers for their discovery of reprogrammed metabolisms in cancer. The URL of CaMeRe is http://csbl.bmb.uga.edu/CaMeRe/.",2020-02-25 +30398659,"MitoMiner v4.0: an updated database of mitochondrial localization evidence, phenotypes and diseases.","Increasing numbers of diseases are associated with mitochondrial dysfunction. This is unsurprising given mitochondria have major roles in bioenergy generation, signalling, detoxification, apoptosis and biosynthesis. However, fundamental questions of mitochondrial biology remain, including: which nuclear genes encode mitochondrial proteins; how their expression varies with tissue; and which are associated with disease. But experiments to catalogue the mitochondrial proteome are incomplete and sometimes contradictory. This arises because the mitochondrial proteome has tissue- and stage-specific variability, plus differences among experimental techniques and localization evidence types used. This leads to limitations in each technique's coverage and inevitably conflicting results. To support identification of mitochondrial proteins, we developed MitoMiner (http://mitominer.mrc-mbu.cam.ac.uk/), a database combining evidence of mitochondrial localization with information from public resources. Here we report upgrades to MitoMiner, including its re-engineering to be gene-centric to enable easier sharing of evidence among orthologues and support next generation sequencing, plus new data sources, including expression in different tissues, information on phenotypes and diseases of genetic mutations and a new mitochondrial proteome catalogue. MitoMiner is a powerful platform to investigate mitochondrial localization by providing a unique combination of experimental sub-cellular localization datasets, tissue expression, predictions of mitochondrial targeting sequences, gene annotation and links to phenotype and disease.",2019-01-01 +32924579,Risk of Bias Assessments and Evidence Syntheses for Observational Epidemiologic Studies of Environmental and Occupational Exposures: Strengths and Limitations.,"

Background

Increasingly, risk of bias tools are used to evaluate epidemiologic studies as part of evidence synthesis (evidence integration), often involving meta-analyses. Some of these tools consider hypothetical randomized controlled trials (RCTs) as gold standards.

Methods

We review the strengths and limitations of risk of bias assessments, in particular, for reviews of observational studies of environmental exposures, and we also comment more generally on methods of evidence synthesis.

Results

Although RCTs may provide a useful starting point to think about bias, they do not provide a gold standard for environmental studies. Observational studies should not be considered inherently biased vs. a hypothetical RCT. Rather than a checklist approach when evaluating individual studies using risk of bias tools, we call for identifying and quantifying possible biases, their direction, and their impacts on parameter estimates. As is recognized in many guidelines, evidence synthesis requires a broader approach than simply evaluating risk of bias in individual studies followed by synthesis of studies judged unbiased, or with studies given more weight if judged less biased. It should include the use of classical considerations for judging causality in human studies, as well as triangulation and integration of animal and mechanistic data.

Conclusions

Bias assessments are important in evidence synthesis, but we argue they can and should be improved to address the concerns we raise here. Simplistic, mechanical approaches to risk of bias assessments, which may particularly occur when these tools are used by nonexperts, can result in erroneous conclusions and sometimes may be used to dismiss important evidence. Evidence synthesis requires a broad approach that goes beyond assessing bias in individual human studies and then including a narrow range of human studies judged to be unbiased in evidence synthesis. https://doi.org/10.1289/EHP6980.",2020-09-14 +32155110,Question Use in Adults With Right-Hemisphere Brain Damage.,"Purpose Right-hemisphere brain damage (RHD) can affect pragmatic aspects of communication that may contribute to an impaired ability to gather information. Questions are an explicit means of gathering information. Question types vary in terms of the demands they place on cognitive resources. The purpose of this exploratory descriptive study is to test the hypothesis that adults with RHD differ from neurologically healthy adults in the types of questions asked during a structured task. Method Adults who sustained a single right-hemisphere stroke and neurologically healthy controls from the RHDBank Database completed the Unfamiliar Object Task of the RHDBank Discourse Protocol (Minga et al., 2016). Each task was video-recorded. Questions were transcribed using the Codes for the Human Analysis of Transcripts format. Coding and analysis of each response were conducted using Computerized Language Analysis (MacWhinney, 2000) programs. Results The types of questions used differed significantly across groups, with the RHD group using significantly more content questions and significantly fewer polar questions than the neurologically healthy control group. In their content question use, adults with RHD used significantly more ""what"" questions than other question subtypes. Conclusion Question-asking is an important aspect of pragmatic communication. Differences in the relative usage of question types, such as the reduced use of polar questions or increased use of content questions, may reflect cognitive limitations arising from RHD. Further investigations examining question use in this population are encouraged to replicate the current findings and to expand on the study tasks and measures. Supplemental Material https://doi.org/10.23641/asha.11936295.",2020-03-11 +30624648,AtFusionDB: a database of fusion transcripts in Arabidopsis thaliana. ,"Fusion transcripts are chimeric RNAs generated as a result of fusion either at DNA or RNA level. These novel transcripts have been extensively studied in the case of human cancers but still remain underexamined in plants. In this study, we introduce the first plant-specific database of fusion transcripts named AtFusionDB (http://www.nipgr.res.in/AtFusionDB). This is a comprehensive database that contains the detailed information about fusion transcripts identified in model plant Arabidopsis thaliana. A total of 82 969 fusion transcript entries generated from 17 181 different genes of A. thaliana are available in this database. Apart from the basic information consisting of the Ensembl gene names, official gene name, tissue type, EricScore, fusion type, AtFusionDB ID and sample ID (e.g. Sequence Read Archive ID), additional information like UniProt, gene coordinates (together with the function of parental genes), junction sequence, expression level of both parent genes and fusion transcript may be of high utility to the user. Two different types of search modules viz. 'Simple Search' and 'Advanced Search' in addition to the 'Browse' option with data download facility are provided in this database. Three different modules for mapping and alignment of the query sequences viz. BLASTN, SW Align and Mapping are incorporated in AtFusionDB. This database is a head start for exploring the complex and unexplored domain of gene/transcript fusion in plants.",2019-01-01 +30371817,SEdb: a comprehensive human super-enhancer database.,"Super-enhancers are important for controlling and defining the expression of cell-specific genes. With research on human disease and biological processes, human H3K27ac ChIP-seq datasets are accumulating rapidly, creating the urgent need to collect and process these data comprehensively and efficiently. More importantly, many studies showed that super-enhancer-associated single nucleotide polymorphisms (SNPs) and transcription factors (TFs) strongly influence human disease and biological processes. Here, we developed a comprehensive human super-enhancer database (SEdb, http://www.licpathway.net/sedb) that aimed to provide a large number of available resources on human super-enhancers. The database was annotated with potential functions of super-enhancers in the gene regulation. The current version of SEdb documented a total of 331 601 super-enhancers from 542 samples. Especially, unlike existing super-enhancer databases, we manually curated and classified 410 available H3K27ac samples from >2000 ChIP-seq samples from NCBI GEO/SRA. Furthermore, SEdb provides detailed genetic and epigenetic annotation information on super-enhancers. Information includes common SNPs, motif changes, expression quantitative trait locus (eQTL), risk SNPs, transcription factor binding sites (TFBSs), CRISPR/Cas9 target sites and Dnase I hypersensitivity sites (DHSs) for in-depth analyses of super-enhancers. SEdb will help elucidate super-enhancer-related functions and find potential biological effects.",2019-01-01 +32218746,"""Placebo by Proxy"" and ""Nocebo by Proxy"" in Children: A Review of Parents' Role in Treatment Outcomes.","The ""placebo (effect) by proxy"" (PbP) concept, introduced by Grelotti and Kaptchuk (1), describes a positive effect of a patient's treatment on persons in their surrounding such as family members or healthcare providers, who feel better because the patient is being treated. The PbP effect is a complex dynamic phenomenon which attempts to explain a change in treatment outcome arising from an interaction between a patient and an effect from proxies such as parents, caregivers, physicians or even the media. By extension the effect of the proxy can also have a negative or adverse effect whereby a proxy feels worse when a patient is treated, giving rise to the possibility of a ""nocebo (effect) by proxy"" (NbP), and by extension can influence a patient's treatment response. While this has yet to be systematically investigated, such an effect could occur when a proxy observes that a treatment is ineffective or is perceived as causing adverse effects leading the patient to experience side effects. In this narrative review, we take these definitions one step further to include the impact of PbP/NbP as they transform to affect the treatment outcome for the patient or child being treated, not just the people surrounding the individual being treated. Following a systematic search of literature on the subject using the Journal of Interdisciplinary Placebo Studies (JIPS) database (https://jips.online) and PubMed (NCBI) resulted in very few relevant studies, especially in children. The effect of PbP per se has been studied in parents and their children for temper tantrums, acupuncture for postoperative symptoms, as well as for neuroprotection in very preterm-born infants. This paper will review the PbP/NbP concepts, show evidence for its presence in children's treatment outcome and introduce clinical implications. We will also offer suggestions for future research to further our understanding of the role of the proxy in promoting or distracting from treatment benefit in children. Increasing an appreciation of the PbP and NbP phenomena and the role of the proxy in children's treatment should improve research study design and ultimately harness them to improve clinical child healthcare.",2020-03-11 +24253303,Activities at the Universal Protein Resource (UniProt).,"The mission of the Universal Protein Resource (UniProt) (http://www.uniprot.org) is to provide the scientific community with a comprehensive, high-quality and freely accessible resource of protein sequences and functional annotation. It integrates, interprets and standardizes data from literature and numerous resources to achieve the most comprehensive catalog possible of protein information. The central activities are the biocuration of the UniProt Knowledgebase and the dissemination of these data through our Web site and web services. UniProt is produced by the UniProt Consortium, which consists of groups from the European Bioinformatics Institute (EBI), the SIB Swiss Institute of Bioinformatics (SIB) and the Protein Information Resource (PIR). UniProt is updated and distributed every 4 weeks and can be accessed online for searches or downloads.",2013-11-18 +30535146,Comparative Analysis of Oomycete Genome Evolution Using the Oomycete Gene Order Browser (OGOB).,"The oomycetes are a class of microscopic, filamentous eukaryotes within the stramenopiles-alveolates-rhizaria eukaryotic supergroup. They include some of the most destructive pathogens of animals and plants, such as Phytophthora infestans, the causative agent of late potato blight. Despite the threat they pose to worldwide food security and natural ecosystems, there is a lack of tools and databases available to study oomycete genetics and evolution. To this end, we have developed the Oomycete Gene Order Browser (OGOB), a curated database that facilitates comparative genomic and syntenic analyses of oomycete species. OGOB incorporates genomic data for 20 oomycete species including functional annotations and a number of bioinformatics tools. OGOB hosts a robust set of orthologous oomycete genes for evolutionary analyses. Here, we present the structure and function of OGOB as well as a number of comparative genomic analyses we have performed to better understand oomycete genome evolution. We analyze the extent of oomycete gene duplication and identify tandem gene duplication as a driving force of the expansion of secreted oomycete genes. We identify core genes that are present and microsyntenically conserved (termed syntenologs) in oomycete lineages and identify the degree of microsynteny between each pair of the 20 species housed in OGOB. Consistent with previous comparative synteny analyses between a small number of oomycete species, our results reveal an extensive degree of microsyntenic conservation amongst genes with housekeeping functions within the oomycetes. OGOB is available at https://ogob.ie.",2019-01-01 +33537459,Healthcare Workers Bioresource: Study outline and baseline characteristics of a prospective healthcare worker cohort to study immune protection and pathogenesis in COVID-19.,"Background: Most biomedical research has focused on sampling COVID-19 patients presenting to hospital with advanced disease, with less focus on the asymptomatic or paucisymptomatic. We established a bioresource with serial sampling of health care workers (HCWs) designed to obtain samples before and during mainly mild disease, with follow-up sampling to evaluate the quality and duration of immune memory. Methods: We conducted a prospective study on HCWs from three hospital sites in London, initially at a single centre (recruited just prior to first peak community transmission in London), but then extended to multiple sites 3 weeks later (recruitment still ongoing, target n=1,000). Asymptomatic participants attending work complete a health questionnaire, and provide a nasal swab (for SARS-CoV-2 RNA by RT-PCR tests) and blood samples (mononuclear cells, serum, plasma, RNA and DNA are biobanked) at 16 weekly study visits, and at 6 and 12 months. Results: Preliminary baseline results for the first 731 HCWs (400 single-centre, 331 multicentre extension) are presented. Mean age was 38±11 years; 67% are female, 31% nurses, 20% doctors, and 19% work in intensive care units. COVID-19-associated risk factors were: 37% black, Asian or minority ethnicities; 18% smokers; 13% obesity; 11% asthma; 7% hypertension and 2% diabetes mellitus. At baseline, 41% reported symptoms in the preceding 2 weeks. Preliminary test results from the initial cohort (n=400) are available: PCR at baseline for SARS-CoV-2 was positive in 28 of 396 (7.1%, 95% CI 4.9-10.0%) and 15 of 385 (3.9%, 2.4-6.3%) had circulating IgG antibodies. Conclusions: This COVID-19 bioresource established just before the peak of infections in the UK will provide longitudinal assessments of incident infection and immune responses in HCWs through the natural time course of disease and convalescence. The samples and data from this bioresource are available to academic collaborators by application  https://covid-consortium.com/application-for-samples/.",2020-10-12 +,INT-010 The impact of the introduction of health information technology on medication errors in a paediatric intensive care unit,"

Background

Increased use of health information technology (HIT) has been advocated as a medication error reduction strategy. Evidence of its impact in the paediatric setting remains limited. In 2012, the paediatric intensive care unit (PICU) of an Irish tertiary children’s hospital implemented electronic-prescribing and a smart-pump library of standard concentration infusions (SCIs).

Purpose

To assess the impact of the newly implemented technology on medication errors in the PICU.

Material and methods

A retrospective, observational study of medication errors as identified by clinical pharmacist review was conducted. An interrupted time series design with four time periods was employed: pre-implementation; post-implementation of SCIs; immediate post-implementation of electronic-prescribing; and 1 year post-implementation. Pre-determined error definitions and validated grading tools were used in conjunction with a multi-disciplinary consensus process.1–3 Data were analysed in Stata Version 13.1 using ANOVA and Chi-squared tests.

Results

3356 medication orders from 288 random patients were included. Identified errors were almost exclusively prescribing, with a similar prevalence pre- and post-implementation (10.2% v 9.8%; p=0.66). Incomplete and wrong unit errors were eradicated, however duplicate orders increased. Dose prescribing errors remained the most common. Seventy seven per cent of pre-implementation and 24% of post-implementation prescribing errors were categorised as paper-based and technology-generated, respectively. The implementation of SCIs pre-electronic-prescribing significantly reduced infusion-related prescribing errors (29% to 14.6%; p<0.01). A further reduction to 8.4% (p>0.05) was reported after implementation of electronically-generated infusion orders. A significant reduction in the severity of infusion errors was found, with no differences in non-infusion errors. Almost all errors were minor, causing no patient harm.

Conclusion

The overall prevalence of errors in the PICU was unchanged. Altered error distribution was evident with many paper-based errors disappearing but new technology-generated errors emerging. In the complex PICU environment, prescribing errors remain common. The benefits of SCIs in improving the safety of prescribing paediatric infusions was a significant finding, with electronically-generated orders likely to further enhance safety. Our results show that the benefits of HIT in the paediatric setting cannot be assumed and highlight the need for further studies, given the increasing use of HIT in paediatric settings.

Acknowledgements

We would like to acknowledge the National Children’s Research Centre for funding this research and for providing biostatistical support. We would also like to thank Erika Brereton and Ian Dawkins, PICU Data Managers for their assistance.

References

Ghaleb MA, Barber N, Dean Franklin B, et al. What constitutes a prescribing error in paediatrics?BMJ Qual Saf2005;14(5):352–7. Dean BS, Barber ND. A validated, reliable method of scoring the severity of medication errors. Am J Health Syst Pharm1999;56(1):57–62. National Coordinating Council for Medication Error Reporting and Prevention. Taxonomy of medication errors1998. http://www.nccmerp.org/about-medication-errors",2018-01-01 +30371824,ViBrism DB: an interactive search and viewer platform for 2D/3D anatomical images of gene expression and co-expression networks.,"Understanding anatomical structures and biological functions based on gene expression is critical in a systemic approach to address the complexity of the mammalian brain, where >25 000 genes are expressed in a precise manner. Co-expressed genes are thought to regulate cell type- or region-specific brain functions. Thus, well-designed data acquisition and visualization systems for profiling combinatorial gene expression in relation to anatomical structures are crucial. To this purpose, using our techniques of microtomy-based gene expression measurements and WebGL-based visualization programs, we mapped spatial expression densities of genome-wide transcripts to the 3D coordinates of mouse brains at four post-natal stages, and built a database, ViBrism DB (http://vibrism.neuroinf.jp/). With the DB platform, users can access a total of 172 022 expression maps of transcripts, including coding, non-coding and lncRNAs in the whole context of 3D magnetic resonance (MR) images. Co-expression of transcripts is represented in the image space and in topological network graphs. In situ hybridization images and anatomical area maps are browsable in the same space of 3D expression maps using a new browser-based 2D/3D viewer, BAH viewer. Created images are shareable using URLs, including scene-setting parameters. The DB has multiple links and is expandable by community activity.",2019-01-01 +30574788,CEU Mass Mediator 3.0: A Metabolite Annotation Tool.,"CEU Mass Mediator (CMM, http://ceumass.eps.uspceu.es ) is an online tool that has evolved from a simple interface to query different metabolomic databases (CMM 1.0) to a tool that unifies the compounds from these databases and, using an expert system with knowledge about the experimental setup and the compounds properties, filters and scores the query results (CMM 2.0). Since this last major revision, CMM has continued to grow, expanding the knowledge base of its expert system and including new services to support researchers in the metabolite annotation and identification process. The information from external databases has been refreshed, and an in-house library with oxidized lipids not present in other sources has been added. This has increased the number of experimental metabolites up 332,665 and the number of predicted metabolites to 681,198. Furthermore, new taxonomy and ontology metadata have been included. CMM has expanded its functionalities with a service for the annotation of oxidized glycerophosphocholines, a service for spectral comparison from MS2 data, and a spectral quality-assessment service to determine the reliability of a spectrum for compound identification purposes. To facilitate the collaboration and integration of CMM with external tools and metabolomic platforms, a RESTful API has been created, and it has already been integrated into the HMDB (Human Metabolome Database). This paper will present the novel functionalities incorporated into version 3.0 of CMM.",2018-12-31 +31812694,Mango: Exploratory Data Analysis for Large-Scale Sequencing Datasets.,"The decreasing cost of DNA sequencing over the past decade has led to an explosion of sequencing datasets, leaving us with petabytes of data to analyze. However, current sequencing visualization tools are designed to run on single machines, which limits their scalability and interactivity on modern genomic datasets. Here, we leverage the scalability of Apache Spark to provide Mango, consisting of a Jupyter notebook and genome browser, which removes scalability and interactivity constraints by leveraging multi-node compute clusters to allow interactive analysis over terabytes of sequencing data. We demonstrate scalability of the Mango tools by performing quality control analyses on 10 terabytes of 100 high-coverage sequencing samples from the Simons Genome Diversity Project, enabling capability for interactive genomic exploration of multi-sample datasets that surpass the computational limitations of single-node visualization tools. Mango is freely available for download with full documentation at https://bdg-mango.readthedocs.io/en/latest/.",2019-12-04 +31667224,Dataset on discarded cigarette packs in Mongolia.,"This dataset documents the variety of discarded cigarette packs available in Mongolia, specifically in the capital city (Ulaanbaatar) and 2 provinces (Dornod and Bayan Ulgii). Both of these provinces border China and the Russian Federation. Discarded cigarette packs were collected from the ground or from the top of waste bins. Packs were collected over three rounds of data collection (round 1: April 2017, round 2: August/September 2017 and round 3: May/June 2018). 7494 packs were collected in round 1, 5852 packs in round 2 and 6258 packs in round 3. The dataset consists of 25 variables which describe each pack in detail, including information on excise tax stamps, health warnings, tar and nicotine levels, brand name, name of manufacturer, and importer, among others. This data is freely available on the DataFirst data repository (https://www.datafirst.uct.ac.za/dataportal/index.php/catalog/772) after creating a user profile. This data was used for a research article titled ""The impact of tax increases on illicit cigarette trade in Mongolia"" which was published by Tobacco Control in 2019 (https://tobaccocontrol.bmj.com/content/early/2019/06/18/tobaccocontrol-2018-054904). The paper is co-authored by Ross H, Vellios N, Batmunkh T, Enkhtsogt M and Rossouw L.",2019-09-11 +32027495,Tautomer Database: A Comprehensive Resource for Tautomerism Analyses.,"We report a database of tautomeric structures that contains 2819 tautomeric tuples extracted from 171 publications. Each tautomeric entry has been annotated with experimental conditions reported in the respective publication, plus bibliographic details, structural identifiers (e.g., NCI/CADD identifiers FICTS, FICuS, uuuuu, and Standard InChI), and chemical information (e.g., SMILES, molecular weight). The majority of tautomeric tuples found were pairs; the remaining 10% were triples, quadruples, or quintuples, amounting to a total number of structures of 5977. The types of tautomerism were mainly prototropic tautomerism (79%), followed by ring-chain (13%) and valence tautomerism (8%). The experimental conditions reported in the publications included about 50 pure solvents and 9 solvent mixtures with 26 unique spectroscopic or nonspectroscopic methods. 1H and 13C NMR were the most frequently used methods. A total of 77 different tautomeric transform rules (SMIRKS) are covered by at least one example tuple in the database. This database is freely available as a spreadsheet at https://cactus.nci.nih.gov/download/tautomer/.",2020-03-10 +32043883,Toward a Comprehensive Treatment of Tautomerism in Chemoinformatics Including in InChI V2.,"We have collected 86 different transforms of tautomeric interconversions. Out of those, 54 are for prototropic (non-ring-chain) tautomerism, 21 for ring-chain tautomerism, and 11 for valence tautomerism. The majority of these rules have been extracted from experimental literature. Twenty rules, covering the most well-known types of tautomerism such as keto-enol tautomerism, were taken from the default handling of tautomerism by the chemoinformatics toolkit CACTVS. The rules were analyzed against nine differerent databases totaling over 400 million (non-unique) structures as to their occurrence rates, mutual overlap in coverage, and recapitulation of the rules' enumerated tautomer sets by InChI V.1.05, both in InChI's Standard and a Nonstandard version with the increased tautomer-handling options 15T and KET turned on. These results and the background of this study are discussed in the context of the IUPAC InChI Project tasked with the redesign of handling of tautomerism for an InChI version 2. Applying the rules presented in this paper would approximately triple the number of compounds in typical small-molecule databases that would be affected by tautomeric interconversion by InChI V2. A web tool has been created to test these rules at https://cactus.nci.nih.gov/tautomerizer.",2020-03-10 +31903802,Leukocyte Traits and Exposure to Ambient Particulate Matter Air Pollution in the Women's Health Initiative and Atherosclerosis Risk in Communities Study.,"

Background

Inflammatory effects of ambient particulate matter (PM) air pollution exposures may underlie PM-related increases in cardiovascular disease risk and mortality, although evidence of PM-associated leukocytosis is inconsistent and largely based on small, cross-sectional, and/or unrepresentative study populations.

Objectives

Our objective was to estimate PM-leukocyte associations among U.S. women and men in the Women's Health Initiative and Atherosclerosis Risk in Communities study (n=165,675).

Methods

We based the PM-leukocyte estimations on up to four study visits per participant, at which peripheral blood leukocytes and geocoded address-specific concentrations of PM≤10, ≤2.5, and 2.5-10μm in diameter (PM10, PM2.5, and PM2.5-10, respectively) were available. We multiply imputed missing data using chained equations and estimated PM-leukocyte count associations over daily to yearly PM exposure averaging periods using center-specific, linear, mixed, longitudinal models weighted for attrition and adjusted for sociodemographic, behavioral, meteorological, and geographic covariates. In a subset of participants with available data (n=8,457), we also estimated PM-leukocyte proportion associations in compositional data analyses.

Results

We found a 12 cells/μL (95% confidence interval: -9, 33) higher leukocyte count, a 1.2% (0.6%, 1.8%) higher granulocyte proportion, and a -1.1% (-1.9%, -0.3%) lower CD8+ T-cell proportion per 10-μg/m3 increase in 1-month mean PM2.5. However, shorter-duration PM10 exposures were inversely and only modestly associated with leukocyte count.

Discussion

The PM2.5-leukocyte estimates, albeit imprecise, suggest that among racially, ethnically, and environmentally diverse U.S. populations, sustained, ambient exposure to fine PM may induce subclinical, but epidemiologically important, inflammatory effects. https://doi.org/10.1289/EHP5360.",2020-01-06 +,SU70. Multimodal Fusion of 7 T Imaging Data Using mCCA + jICA Model in First-Episode Schizophrenia,"Abstract Background: The acquisition of multiple types of brain imaging data for the same subject has become more common; consequently, methods for fusing these multimodal data have emerged. Previously, multimodal data have been analyzed separately and individual modality results subsequently correlated; however, these analysis techniques lack the ability to examine true relationships between modalities. In contrast, the utilization of a common multiset canonical correlation analysis and joint independent component analysis (mCCA + jICA) model to fuse the data allows joint information (ie, shared or distinct abnormalities) between modalities to be examined. Methods: In this study, medicated first-episode schizophrenia patients (nSZ = 19) and matched (age, gender, smoking status, and socioeconomic status) controls (nHC = 21) completed a 6-minute resting-state functional magnetic resonance imaging (fMRI) scan at 7 tesla. Structural scans for each subject were segmented into unmodulated normalized grey matter (GM), white matter (WM), and cerebrospinal fluid (CSF) maps. Mean corrected amplitude of low-frequency fluctuation (ALFF) maps was extracted from standard preprocessed fMRI data. ALFF was calculated as the averaged square root of the power spectrum within 0.01–0.08Hz. Utilizing the Fusion ICA Toolbox (FIT; http://mialab.mrn.org/software/fit), GM, WM, CSF, and ALFF maps were used as features in a mCCA + jICA model. Data were decomposed into 10 components for each feature and 2-sample t tests were performed to indicate joint and modality-unique group-discriminating independent components. Results: Of the 10 components extracted from the model, results indicated one joint group-discriminating independent component for all modalities and 2 modality-unique group-discriminating components (ALFF; CSF). In the joint component, patients demonstrated higher mixing coefficients in GM (P = .00009) and WM (P = .0004) compared to controls; however, controls demonstrated higher mixing coefficients in CSF (P = .0066) and ALFF (P = .0006) in the same component. Additionally, the joint component demonstrated abnormalities in regions such as the middle frontal gyrus, inferior parietal lobule, precuneus, and caudate. These results indicate a relationship between abnormalities found in all 4 modalities. Conclusion: We believe that identification of an imaging biomarker will be enhanced with this ability to fuse multiple imaging modalities and ultimately use the joint information to differentiate schizophrenia.",2017-03-01 +32188748,mSphere of Influence: Predicting Immune Responses and Susceptibility to Influenza Virus-May the Data Be with You. ,"Irene Ramos works in the field of immunology to viral infections. In this mSphere of Influence article, she reflects on how ""Global analyses of human immune variation reveal baseline predictors of postvaccination responses"" by Tsang et al. (Cell 157:499-513, 2014, https://doi.org/10.1016/j.cell.2014.03.031) and ""A crowdsourced analysis to identify ab initio molecular signatures predictive of susceptibility to viral infection"" by Fourati et al. (Nat Commun 9:4418, 2018, https://doi.org/10.1038/s41467-018-06735-8) made an impact on her by highlighting the importance of data science methods to understand virus-host interactions.",2020-03-18 +29091996,"Visualization portal for genetic variation (VizGVar): a tool for interactive visualization of SNPs and somatic mutations in exons, genes and protein domains.","Motivation:VizGVar was designed to meet the growing need of the research community for improved genomic and proteomic data viewers that benefit from better information visualization. Results:We implemented a new information architecture and applied user centered design principles to provide a new improved way of visualizing genetic information and protein data related to human disease. VizGVar connects the entire database of Ensembl protein motifs, domains, genes and exons with annotated SNPs and somatic variations from PharmGKB and COSMIC. VizGVar precisely represents genetic variations and their respective location by colored curves to designate different types of variations. The structured hierarchy of biological data is reflected in aggregated patterns through different levels, integrating several layers of information at once. VizGVar provides a new interactive, web-based JavaScript visualization of somatic mutations and protein variation, enabling fast and easy discovery of clinically relevant variation patterns. Availability and implementation:VizGVar is accessible at http://vizport.io/vizgvar; http://vizport.io/vizgvar/doc/. Contact:asolano@broadinstitute.org or allan.orozcosolano@ucr.ac.cr.",2018-03-01 +25392406,VirHostNet 2.0: surfing on the web of virus/host molecular interactions data.,"VirHostNet release 2.0 (http://virhostnet.prabi.fr) is a knowledgebase dedicated to the network-based exploration of virus-host protein-protein interactions. Since the previous VirhostNet release (2009), a second run of manual curation was performed to annotate the new torrent of high-throughput protein-protein interactions data from the literature. This resource is shared publicly, in PSI-MI TAB 2.5 format, using a PSICQUIC web service. The new interface of VirHostNet 2.0 is based on Cytoscape web library and provides a user-friendly access to the most complete and accurate resource of virus-virus and virus-host protein-protein interactions as well as their projection onto their corresponding host cell protein interaction networks. We hope that the VirHostNet 2.0 system will facilitate systems biology and gene-centered analysis of infectious diseases and will help to identify new molecular targets for antiviral drugs design. This resource will also continue to help worldwide scientists to improve our knowledge on molecular mechanisms involved in the antiviral response mediated by the cell and in the viral strategies selected by viruses to hijack the host immune system.",2014-11-11 +32774102,"Description of two new species of Paraonidae (Annelida) from the Gulf of Thailand, Western Pacific.","Two new species of Aricidea Webster, 1879 (Paraonidae), Aricidea (Acmira) anusakdiisp. nov. and Aricidea (Aricidea) thammapinanaesp. nov. were collected from 10-26.5 m depth, in soft bottoms with mud mixed with sand and shells at Songkhla Sea, the Gulf of Thailand between 2011-2018. Aricidea (Acmira) anusakdiisp. nov. is clearly distinguished from other species of the subgenus Acmira by having a rounded bilobed prostomium divided by a slight notch on the anterior margin; red pigments on the subdistal to the tip of each branchia (new character); two prebranchial chaetigers; 48-68 pairs of branchiae; and modified neurochaetae as strong curved spines with blunt shafts surrounded by pubescence from chaetigers 19-44. On the other hand, Aricidea (Aricidea) thammapinanaesp. nov. can be separated from other members of the subgenus Aricidea by the presence of a biarticulated median antenna; distinctive notopodial lobes as broad triangular with short distal protuberances on chaetiger 3, 4-8 pairs of branchiae; and modified neurochaetae as bidentate neurochaetae with a long pubescent subterminal arista on the concave side. All data have been archived and are freely available from the Dryad Digital Repository (https://doi.org/10.5061/dryad.hqbzkh1cn).",2020-07-22 +27943584,Cytogenetic features of rRNA genes across land plants: analysis of the Plant rDNA database.,"The online resource http://www.plantrdnadatabase.com/ stores information on the number, chromosomal locations and structure of the 5S and 18S-5.8S-26S (35S) ribosomal DNAs (rDNA) in plants. This resource was exploited to study relationships between rDNA locus number, distribution, the occurrence of linked (L-type) and separated (S-type) 5S and 35S rDNA units, chromosome number, genome size and ploidy level. The analyses presented summarise current knowledge on rDNA locus numbers and distribution in plants. We analysed 2949 karyotypes, from 1791 species and 86 plant families, and performed ancestral character state reconstructions. The ancestral karyotype (2n = 16) has two terminal 35S sites and two interstitial 5S sites, while the median (2n = 24) presents four terminal 35S sites and three interstitial 5S sites. Whilst 86.57% of karyotypes show S-type organisation (ancestral condition), the L-type arrangement has arisen independently several times during plant evolution. A non-terminal position of 35S rDNA was found in about 25% of single-locus karyotypes, suggesting that terminal locations are not essential for functionality and expression. Single-locus karyotypes are very common, even in polyploids. In this regard, polyploidy is followed by subsequent locus loss. This results in a decrease in locus number per monoploid genome, forming part of the diploidisation process returning polyploids to a diploid-like state over time.",2017-02-14 +32670506,Modeling and analysis of site-specific mutations in cancer identifies known plus putative novel hotspots and bias due to contextual sequences.,"In cancer, recurrently mutated sites in DNA and proteins, called hotspots, are thought to be raised by positive selection and therefore important due to its potential functional impact. Although recent evidence for APOBEC enzymatic activity have shown that specific types of sequences are likely to be false, the identification of putative hotspots is important to confirm either its functional role or its mechanistic bias. In this work, an algorithm and a statistical model is presented to detect hotspots. The model consists of a beta-binomial component plus fixed effects that efficiently fits the distribution of mutated sites. The algorithm employs an optimal stepwise approach to find the model parameters. Simulations show that the proposed algorithmic model is highly accurate for common hotspots. The approach has been applied to TCGA mutational data from 33 cancer types. The results show that well-known cancer hotspots are easily detected. Besides, novel hotspots are also detected. An analysis of the sequence context of detected hotspots show a preference for TCG sites that may be related to APOBEC or other unknown mechanistic biases. The detected hotspots are available online in http://bioinformatica.mty.itesm.mx/HotSpotsAnnotations.",2020-06-20 +32479607,"The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2020 update.","Galaxy (https://galaxyproject.org) is a web-based computational workbench used by tens of thousands of scientists across the world to analyze large biomedical datasets. Since 2005, the Galaxy project has fostered a global community focused on achieving accessible, reproducible, and collaborative research. Together, this community develops the Galaxy software framework, integrates analysis tools and visualizations into the framework, runs public servers that make Galaxy available via a web browser, performs and publishes analyses using Galaxy, leads bioinformatics workshops that introduce and use Galaxy, and develops interactive training materials for Galaxy. Over the last two years, all aspects of the Galaxy project have grown: code contributions, tools integrated, users, and training materials. Key advances in Galaxy's user interface include enhancements for analyzing large dataset collections as well as interactive tools for exploratory data analysis. Extensions to Galaxy's framework include support for federated identity and access management and increased ability to distribute analysis jobs to remote resources. New community resources include large public servers in Europe and Australia, an increasing number of regional and local Galaxy communities, and substantial growth in the Galaxy Training Network.",2020-07-01 +31391866,"ClickGene: an open cloud-based platform for big pan-cancer data genome-wide association study, visualization and exploration.","Tremendous amount of whole-genome sequencing data have been provided by large consortium projects such as TCGA (The Cancer Genome Atlas), COSMIC and so on, which creates incredible opportunities for functional gene research and cancer associated mechanism uncovering. While the existing web servers are valuable and widely used, many whole genome analysis functions urgently needed by experimental biologists are still not adequately addressed. A cloud-based platform, named CG (ClickGene), therefore, was developed for DIY analyzing of user's private in-house data or public genome data without any requirement of software installation or system configuration. CG platform provides key interactive and customized functions including Bee-swarm plot, linear regression analyses, Mountain plot, Directional Manhattan plot, Deflection plot and Volcano plot. Using these tools, global profiling or individual gene distributions for expression and copy number variation (CNV) analyses can be generated by only mouse button clicking. The easy accessibility of such comprehensive pan-cancer genome analysis greatly facilitates data mining in wide research areas, such as therapeutic discovery process. Therefore, it fills in the gaps between big cancer genomics data and the delivery of integrated knowledge to end-users, thus helping unleash the value of the current data resources. More importantly, unlike other R-based web platforms, Dubbo, a cloud distributed service governance framework for 'big data' stream global transferring, was used to develop CG platform. After being developed, CG is run on an independent cloud-server, which ensures its steady global accessibility. More than 2 years running history of CG proved that advanced plots for hundreds of whole-genome data can be created through it within seconds by end-users anytime and anywhere. CG is available at http://www.clickgenome.org/.",2019-06-26 +28191780,Cancer Stem Cells Therapeutic Target Database: The First Comprehensive Database for Therapeutic Targets of Cancer Stem Cells.,"Cancer stem cells (CSCs) are a subpopulation of tumor cells that have strong self-renewal capabilities and may contribute to the failure of conventional cancer therapies. Hence, therapeutics homing in on CSCs represent a novel and promising approach that may eradicate malignant tumors. However, the lack of information on validated targets of CSCs has greatly hindered the development of CSC-directed therapeutics. Herein, we describe the Cancer Stem Cells Therapeutic Target Database (CSCTT), the first online database to provide a rich bioinformatics resource for the display, search, and analysis of structure, function, and related annotation for therapeutic targets of cancer stem cells. CSCTT contains 135 proteins that are potential targets of CSCs, with validated experimental evidence manually curated from existing literatures. Proteins are carefully annotated with a detailed description of protein families, biological process, related diseases, and experimental evidences. In addition, CSCTT has compiled 213 documented therapeutic methods for cancer stem cells, including 118 small molecules and 20 biotherapy methods. The CSCTT may serve as a useful platform for the development of CSC-directed therapeutics against various malignant tumors. The CSCTT database is freely available to the public at http://www.csctt.org/. Stem Cells Translational Medicine 2017;6:331-334.",2016-10-11 +30171661,Destructive twisting of neutral metalloproteases: the catalysis mechanism of the Dispase autolysis-inducing protein from Streptomyces mobaraensis DSM 40487.,"The Dispase autolysis-inducing protein (DAIP) is produced by Streptomyces mobaraensis to disarm neutral metalloproteases by decomposition. The absence of a catalytic protease domain led to the assumption that the seven-bladed β-propeller protein DAIP causes structural modifications, thereby triggering autolysis. Determination of protein complexes consisting of DAIP and thermolysin or DAIP and a nonfunctional E138A bacillolysin variant supported this postulation. Protein twisting was indicated by DAIP-mediated inhibition of thermolysin while bacillolysin underwent immediate autolysis under the same conditions. Interestingly, an increase in SYPRO orange fluorescence allowed tracking of the fast degradation process. Similarly rapid autolysis of thermolysin mediated by DAIP was only observed upon the addition of amphiphilic compounds, which probably amplify the induced structural changes. DAIP further caused degradation of FITC-labeled E138A bacillolysin by trypsin, as monitored by a linear decrease in fluorescence polarization. The kinetic model, calculated from the obtained data, suggested a three-step mechanism defined by (a) fast DAIP-metalloprotease complex formation, (b) slower DAIP-mediated protein twisting, and (c) fragmentation. These results were substantiated by crystallized DAIP attached to a C-terminal helix fragment of thermolysin. Structural superposition of the complex with thermolysin is indicative of a conformational change upon binding to DAIP. Importantly, the majority of metalloproteases, also including homologs from various pathogens, are highly conserved at the autolysis-prone peptide bonds, suggesting their susceptibility to DAIP-mediated decomposition, which may offer opportunities for pharmaceutical applications. DATABASES: The atomic coordinates and structure factors (PDB ID: 6FHP) have been deposited in the Protein Data Bank (http://www.pdb.org/). ENZYMES: Aureolysin, EC 3.4.24.29; bacillolysin (Dispase, Gentlyase), EC 3.4.24.28; lasB (elastase), EC 3.4.24.4; subtilisin, EC 3.4.21.62; thermolysin, EC 3.4.24.27; transglutaminase, EC 2.3.2.13; trypsin, EC 3.4.21.4; vibriolysin (hemagglutinin(HA)/protease), EC 3.4.24.25.",2018-09-17 +30526591,Mining and standardizing chinese consumer health terms.,"

Background

Health professionals and consumers use different terms to express medical events or concerns, which makes the communication barriers between the professionals and consumers. This may lead to bias in the diagnosis or treatment due to the misunderstanding or incomplete understanding. To solve the issue, a consumer health vocabulary was developed to map the consumer-used health terms to professional-used medical terms.

Methods

In this study, we extracted Chinese consumer health terms from both online health forum and patient education monographs, and manually mapped them to medical terms used by professionals (terms in medical thesauri or in medical books). To ensure the above annotation quality, we developed annotation guidelines.

Results

We applied our method to extract consumer-used disease terms in endocrinology, cardiology, gastroenterology and dermatology. In this study, we identified 1349 medical mentions from 8436 questions posted in an online health forum and 1428 articles for patient education monographs. After manual annotation and review, we released 1036 Chinese consumer health terms with mapping to 480 medical terms. Four annotators worked on the manual annotation work following the Chinese consumer health term annotation guidelines. Their average inter-annotator agreement (IAA) score was 93.91% ensuring high consistency of the released terms.

Conclusions

We extracted Chinese consumer health terms from online forum and patient education monographs, and mapped them to medical terms used by professionals. Manual annotation efforts have been made for term annotating and mapping. Our study may contribute to the Chinese consumer health vocabulary construction. In addition, our annotated corpus, both the contexts of consumer health terms and consumer-professional term mapping, would be a useful resource for automatic methodology development. The dataset of the Chinese consumer health terms (CHT) is publicly available at http://www.phoc.org.cn/cht/ .",2018-12-07 +33035119,Fine Particulate Matter Exposure and Cancer Incidence: Analysis of SEER Cancer Registry Data from 1992-2016.,"

Background

Previous research has identified an association between fine particulate matter (PM2.5) air pollution and lung cancer. Most of the evidence for this association, however, is based on research using lung cancer mortality, not incidence. Research that examines potential associations between PM2.5 and incidence of non-lung cancers is limited.

Objectives

The primary purpose of this study was to evaluate the association between the incidence of cancer and exposure to PM2.5 using >8.5 million cases of cancer incidences from U.S. registries. Secondary objectives include evaluating the sensitivity of the associations to model selection, spatial control, and latency period as well as estimating the exposure-response relationship for several cancer types.

Methods

Surveillance, Epidemiology, and End Results (SEER) program data were used to calculate incidence rates for various cancer types in 607 U.S. counties. County-level PM2.5 concentrations were estimated using integrated empirical geographic regression models. Flexible semi-nonparametric regression models were used to estimate associations between PM2.5 and cancer incidence for selected cancers while controlling for important county-level covariates. Primary time-independent models using average incidence rates from 1992-2016 and average PM2.5 from 1988-2015 were estimated. In addition, time-varying models using annual incidence rates from 2002-2011 and lagged moving averages of annual estimates for PM2.5 were also estimated.

Results

The incidences of all cancer and lung cancer were consistently associated with PM2.5. The incident rate ratios (IRRs), per 10-μg/m3 increase in PM2.5, for all and lung cancer were 1.09 (95% CI: 1.03, 1.14) and 1.19 (95% CI: 1.09, 1.30), respectively. Less robust associations were observed with oral, rectal, liver, skin, breast, and kidney cancers.

Discussion

Exposure to PM2.5 air pollution contributes to lung cancer incidence and is potentially associated with non-lung cancer incidence. https://doi.org/10.1289/EHP7246.",2020-10-09 +31690012,MicroServices Suite for Smart City Applications. ,"Smart Cities are approaching the Internet of Things (IoT) World. Most of the first-generation Smart City solutions are based on Extract Transform Load (ETL); processes and languages that mainly support pull protocols for data gathering. IoT solutions are moving forward to event-driven processes using push protocols. Thus, the concept of IoT applications has turned out to be widespread; but it was initially ""implemented"" with ETL; rule-based solutions; and finally; with true data flows. In this paper, these aspects are reviewed, highlighting the requirements for smart city IoT applications and in particular, the ones that implement a set of specific MicroServices for IoT Applications in Smart City contexts. Moreover; our experience has allowed us to implement a suite of MicroServices for Node-RED; which has allowed for the creation of a wide range of new IoT applications for smart cities that includes dashboards, IoT Devices, data analytics, discovery, etc., as well as a corresponding Life Cycle. The proposed solution has been validated against a large number of IoT applications, as it can be verified by accessing the https://www.Snap4City.org portal; while only three of them have been described in the paper. In addition, the reported solution assessment has been carried out by a number of smart city experts. The work has been developed in the framework of the Select4Cities PCP (PreCommercial Procurement), funded by the European Commission as Snap4City platform.",2019-11-04 +32750560,sTAM: An Online Tool for the Discovery of miRNA-Set Level Disease Biomarkers.,"MicroRNAs (miRNAs) are an important class of small noncoding RNA molecules that serve as excellent biomarkers of various diseases. However, current miRNA biomarkers, including those comprised of multiple miRNAs, work at a single-miRNA level but not at a miRNA-set level, which is defined as a group of miRNAs sharing common biological characteristics. Given the rapidly accumulating miRNA omics data, we believe that the miRNA-set level analysis could be an important supplement to the single-miRNA level analysis. Therefore, we present sTAM (http://mir.rnanut.net/stam), a computational tool for single-sample miRNA-set enrichment analysis. Moreover, we demonstrate the utility of sTAM scores in discovering miRNA-set level biomarkers through two case studies. We conduct a pan-cancer analysis of the sTAM scores of the ""tumor suppressor miRNA set"" on 15 types of cancers from The Cancer Genome Atlas (TCGA) and 14 from Gene Expression Omnibus (GEO), results of which indicated a good performance in distinguishing cancers from controls. Moreover, we reveal that the sTAM scores of the ""brain development miRNA set"" can effectively predict cerebrovascular disorder (CVD). Finally, we believe that sTAM can be used to discover disease-related biomarkers at a miRNA-set level.",2020-07-10 +31077292,SPLATCHE3: simulation of serial genetic data under spatially explicit evolutionary scenarios including long-distance dispersal.,"

Summary

SPLATCHE3 simulates genetic data under a variety of spatially explicit evolutionary scenarios, extending previous versions of the framework. The new capabilities include long-distance migration, spatially and temporally heterogeneous short-scale migrations, alternative hybridization models, simulation of serial samples of genetic data and a large variety of DNA mutation models. These implementations have been applied independently to various studies, but grouped together in the current version.

Availability and implementation

SPLATCHE3 is written in C++ and is freely available for non-commercial use from the website http://www.splatche.com/splatche3. It includes console versions for Linux, MacOs and Windows and a user-friendly GUI for Windows, as well as detailed documentation and ready-to-use examples.",2019-11-01 +30726880,Finding de novo methylated DNA motifs.,"

Motivation

Increasing evidence has shown that nucleotide modifications such as methylation and hydroxymethylation on cytosine would greatly impact the binding of transcription factors (TFs). However, there is a lack of motif finding algorithms with the function to search for motifs with modified bases. In this study, we expand on our previous motif finding pipeline Epigram to provide systematic de novo motif discovery and performance evaluation on methylated DNA motifs.

Results

mEpigram outperforms both MEME and DREME on finding modified motifs in simulated data that mimics various motif enrichment scenarios. Furthermore we were able to identify methylated motifs in Arabidopsis DNA affinity purification sequencing (DAP-seq) data that were previously demonstrated to contain such motifs. When applied to TF ChIP-seq and DNA methylome data in H1 and GM12878, our method successfully identified novel methylated motifs that can be recognized by the TFs or their co-factors. We also observed spacing constraint between the canonical motif of the TF and the newly discovered methylated motifs, which suggests operative recognition of these cis-elements by collaborative proteins.

Availability and implementation

The mEpigram program is available at http://wanglab.ucsd.edu/star/mEpigram.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +32329531,Texture Analysis of Ultrasound Images to Differentiate Simple Fibroadenomas From Complex Fibroadenomas and Benign Phyllodes Tumors.,"

Objectives

American College of Radiology Breast Imaging Reporting and Data System (BI-RADS) category 4A lesions can be distinguished from BI-RADS 3 lesions with main ultrasound (US) findings such as a well-defined contour, round/oval shape, and parallel orientation with a homogeneous echo pattern. Breast Imaging Reporting and Data System 4A solid masses might be diagnosed as simple fibroadenomas (SFAs), complex fibroadenomas (CFAs), or benign phyllodes tumors (BPTs). Complex fibroadenomas have an increased risk of invasive cancer development than SFAs, and BPTs have a risk of borderline-malignant phyllodes tumor transformation; both of them are surgically treated, whereas follow-up procedures are applied in SFAs. It is essential to differentiate SFAs from CFAs and BPTs. Grayscale features of these lesions include a prominent overlap. Texture analyses in breast lesions have contributions in benign-malignant lesion differentiation. In this study, we aimed to use texture analysis of US images to differentiate these benign lesions.

Methods

Grayscale US features of lesions (32 SFAs, 31 CFAs, and 32 BPTs) were classified according to the BI-RADS. Texture analysis of US images with LIFEx software (http://www.lifexsoft.org) was performed retrospectively. First- and second-order histogram parameters were evaluated.

Results

In grayscale US, the shape, orientation, and posterior acoustic characteristics had statistical significance (P < .05). In the statistical analysis, skewness, kurtosis, excess kurtosis, gray-level co-occurrence matrix (GLCM)-energy, GLCM-entropy log 2, and GLCM-entropy log 10 revealed significant differences among all 3 groups (P < .05).

Conclusions

As grayscale US features show prominent intersections, and treatment options differ, correct diagnosis is essential in SFAs, CFAs, and BPTs. In this study, we concluded that texture analysis of US images can discriminate SFAs from CFAs and BPTs. Texture analyses of US images is a potential candidate diagnostic tool for these lesions, and accurate diagnoses will preclude patients from undergoing unnecessary biopsies.",2020-04-24 +29268838,Impacts of SRT on Particle Size Distribution and Reactor Performance in Activated Sludge Processes.,"  Particle size distribution of the particulates is an essential characteristic of the wastewater quality. Particle size of activated sludge flocs may affect key sludge handling processes including sedimentation, thickening, digestion, and dewatering. This study evaluated the effects of solids retention time (SRT) on particle size distribution, sludge settleability, effluent turbidity, and removals of chemical oxygen demand (COD) and -N in a lab-scale Modified Ludzak-Ettinger (MLE) reactor and an integrated fixed film activated sludge (IFAS) reactor. This study also surveyed particle size distribution profile of five full-scale water resource recovery facilities (WRRFs), including high purity oxygen (HPO), step-feed nitrification/denitrification (NDN), and MLE NDN processes. This study provides direct evidence of the effects of SRT on particle size distribution and sludge settleability in lab-scale reactors and full-scale WRRFs.",2018-01-01 +31228181,KinomeX: a web application for predicting kinome-wide polypharmacology effect of small molecules.,"

Motivation

The large-scale kinome-wide virtual profiling for small molecules is a daunting task by experimental and traditional in silico drug design approaches. Recent advances in deep learning algorithms have brought about new opportunities in promoting this process.

Results

KinomeX is an online platform to predict kinome-wide polypharmacology effect of small molecules based solely on their chemical structures. The prediction is made by a multi-task deep neural network model trained with over 140 000 bioactivity data points for 391 kinases. Extensive computational and experimental validations have been performed. Overall, KinomeX enables users to create a comprehensive kinome interaction network for designing novel chemical modulators, and is of practical value on exploring the previously less studied or untargeted kinases.

Availability and implementation

KinomeX is available at: https://kinome.dddc.ac.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31566664,DeepCleave: a deep learning predictor for caspase and matrix metalloprotease substrates and cleavage sites.,"

Motivation

Proteases are enzymes that cleave target substrate proteins by catalyzing the hydrolysis of peptide bonds between specific amino acids. While the functional proteolysis regulated by proteases plays a central role in the 'life and death' cellular processes, many of the corresponding substrates and their cleavage sites were not found yet. Availability of accurate predictors of the substrates and cleavage sites would facilitate understanding of proteases' functions and physiological roles. Deep learning is a promising approach for the development of accurate predictors of substrate cleavage events.

Results

We propose DeepCleave, the first deep learning-based predictor of protease-specific substrates and cleavage sites. DeepCleave uses protein substrate sequence data as input and employs convolutional neural networks with transfer learning to train accurate predictive models. High predictive performance of our models stems from the use of high-quality cleavage site features extracted from the substrate sequences through the deep learning process, and the application of transfer learning, multiple kernels and attention layer in the design of the deep network. Empirical tests against several related state-of-the-art methods demonstrate that DeepCleave outperforms these methods in predicting caspase and matrix metalloprotease substrate-cleavage sites.

Availability and implementation

The DeepCleave webserver and source code are freely available at http://deepcleave.erc.monash.edu/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +31073610,"The barcode, UMI, set format and BUStools.","

Summary

We introduce the Barcode-UMI-Set format (BUS) for representing pseudoalignments of reads from single-cell RNA-seq experiments. The format can be used with all single-cell RNA-seq technologies, and we show that BUS files can be efficiently generated. BUStools is a suite of tools for working with BUS files and facilitates rapid quantification and analysis of single-cell RNA-seq data. The BUS format therefore makes possible the development of modular, technology-specific and robust workflows for single-cell RNA-seq analysis.

Availability and implementation

http://BUStools.github.io/ and http://pachterlab.github.io/kallisto/singlecell.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +33500774,"Reproducibly sampling SARS-CoV-2 genomes across time, geography, and viral diversity.","The COVID-19 pandemic has led to a rapid accumulation of SARS-CoV-2 genomes, enabling genomic epidemiology on local and global scales. Collections of genomes from resources such as GISAID must be subsampled to enable computationally feasible phylogenetic and other analyses. We present genome-sampler, a software package that supports sampling collections of viral genomes across multiple axes including time of genome isolation, location of genome isolation, and viral diversity. The software is modular in design so that these or future sampling approaches can be applied independently and combined (or replaced with a random sampling approach) to facilitate custom workflows and benchmarking. genome-sampler is written as a QIIME 2 plugin, ensuring that its application is fully reproducible through QIIME 2's unique retrospective data provenance tracking system. genome-sampler can be installed in a conda environment on macOS or Linux systems. A complete default pipeline is available through a Snakemake workflow, so subsampling can be achieved using a single command. genome-sampler is open source, free for all to use, and available at https://caporasolab.us/genome-sampler. We hope that this will facilitate SARS-CoV-2 research and support evaluation of viral genome sampling approaches for genomic epidemiology.",2020-06-29 +31645373,Genotyping of Mycobacterium tuberculosis Rifampin Resistance-Associated Mutations by Use of Data from Xpert MTB/RIF Ultra Enables Large-Scale Tuberculosis Molecular Epidemiology Studies. ,"Molecular epidemiology studies of tuberculosis have been empowered in recent years by the availability of whole-genome sequencing, which has allowed a new focus on the adaptive significance of drug resistance mutations. Genome sequencing technology remains expensive, however, limiting the potential for larger studies. Conversely, during this same time the GeneXpert molecular diagnostic method has been deployed globally and now serves as a cornerstone of tuberculosis diagnosis and drug sensitivity testing. In this issue, Y. Cao, H. Parmar, A. M. Simmons, D. Kale, et al. (J Clin Microbiol 57:e00907-19, 2019, https://doi.org/10.1128/JCM.00907-19) report the development of an algorithm that can use high-resolution melting temperature data generated in the course of analysis using the next-generation Xpert MTB/RIF Ultra assay to accurately genotype rifampin resistance-associated mutations. When paired with a system to aggregate data from diagnostic laboratories, this technique has the potential to enable studies on the global scale of the epidemiology of tuberculosis drug resistance.",2019-12-23 +24203342,Navigating the global protein-protein interaction landscape using iRefWeb.,"iRefWeb is a bioinformatics resource that offers access to a large collection of data on protein-protein interactions in over a thousand organisms. This collection is consolidated from 14 major public databases that curate the scientific literature. The collection is enhanced with a range of versatile data filters and search options that categorize various types of protein-protein interactions and protein complexes. Users of iRefWeb are able to retrieve all curated interactions for a given organism or those involving a given protein (or a list of proteins), narrow down their search results based on different supporting evidence, and assess the reliability of these interactions using various criteria. They may also examine all data and annotations related to any publication that described the interaction-detection experiments. iRefWeb is freely available to the research community worldwide at http://wodaklab.org/iRefWeb .",2014-01-01 +32849849,Phosphodiesterase 4D Gene Modifies the Functional Network of Patients With Mild Cognitive Impairment and Alzheimer's Disease.,"Alzheimer's disease (AD) is a progressive neurodegenerative disorder that is affected by several genetic variants. It has been demonstrated that genetic variants affect brain organization and function. In this study, using whole genome-wide association studies (GWAS), we analyzed the functional magnetic resonance imaging and genetic data from the Alzheimer's Disease Neuroimaging Initiative dataset (ADNI) dataset and identified genetic variants associated with the topology of the functional brain network http://www.adni-info.org. We found three novel loci (rs2409627, rs9647533, and rs11955845) in an intron of the phosphodiesterase 4D (PDE4D) gene that contribute to abnormalities in the topological organization of the functional network. In particular, compared to the wild-type genotype, the subjects carrying the PDE4D variants had altered network properties, including a significantly reduced clustering coefficient, small-worldness, global and local efficiency, a significantly enhanced path length and a normalized path length. In addition, we found that all global brain network attributes were affected by PDE4D variants to different extents as the disease progressed. Additionally, brain regions with alterations in nodal efficiency due to the variations in PDE4D were predominant in the limbic lobe, temporal lobe and frontal lobes. PDE4D has a great effect on memory consolidation and cognition through long-term potentiation (LTP) effects and/or the promotion of inflammatory effects. PDE4D variants might be a main reasons underlyling for the abnormal topological properties and cognitive impairment. Furthermore, we speculated that PDE4D is a risk factor for neural degenerative diseases and provided important clues for the earlier detection and therapeutic intervention for AD.",2020-08-06 +32902808,Predictors and Outcomes of Neurological Deterioration in Intracerebral Hemorrhage: Results from the TICH-2 Randomized Controlled Trial.,"Neurological deterioration is common after intracerebral hemorrhage (ICH). We aimed to identify the predictors and effects of neurological deterioration and whether tranexamic acid reduced the risk of neurological deterioration. Data from the Tranexamic acid in IntraCerebral Hemorrhage-2 (TICH-2) randomized controlled trial were analyzed. Neurological deterioration was defined as an increase in National Institutes of Health Stroke Scale (NIHSS) of ≥ 4 or a decline in Glasgow Coma Scale of ≥ 2. Neurological deterioration was considered to be early if it started ≤ 48 h and late if commenced between 48 h and 7 days after onset. Logistic regression was used to identify predictors and effects of neurological deterioration and the effect of tranexamic acid on neurological deterioration. Of 2325 patients, 735 (31.7%) had neurological deterioration: 590 (80.3%) occurred early and 145 (19.7%) late. Predictors of early neurological deterioration included recruitment from the UK, previous ICH, higher admission systolic blood pressure, higher NIHSS, shorter onset-to-CT time, larger baseline hematoma, intraventricular hemorrhage, subarachnoid extension and antiplatelet therapy. Older age, male sex, higher NIHSS, previous ICH and larger baseline hematoma predicted late neurological deterioration. Neurological deterioration was independently associated with a modified Rankin Scale of > 3 (aOR 4.98, 3.70-6.70; p < 0.001). Tranexamic acid reduced the risk of early (aOR 0.79, 0.63-0.99; p = 0.041) but not late neurological deterioration (aOR 0.76, 0.52-1.11; p = 0.15). Larger hematoma size, intraventricular and subarachnoid extension increased the risk of neurological deterioration. Neurological deterioration increased the risk of death and dependency at day 90. Tranexamic acid reduced the risk of early neurological deterioration and warrants further investigation in ICH. URL: https://www.isrctn.com Unique identifier: ISRCTN93732214.",2020-09-09 +32140819,i6mA-Fuse: improved and robust prediction of DNA 6 mA sites in the Rosaceae genome by fusing multiple feature representation.,"DNA N6-methyladenine (6 mA) is one of the most vital epigenetic modifications and involved in controlling the various gene expression levels. With the avalanche of DNA sequences generated in numerous databases, the accurate identification of 6 mA plays an essential role for understanding molecular mechanisms. Because the experimental approaches are time-consuming and costly, it is desirable to develop a computation model for rapidly and accurately identifying 6 mA. To the best of our knowledge, we first proposed a computational model named i6mA-Fuse to predict 6 mA sites from the Rosaceae genomes, especially in Rosa chinensis and Fragaria vesca. We implemented the five encoding schemes, i.e., mononucleotide binary, dinucleotide binary, k-space spectral nucleotide, k-mer, and electron-ion interaction pseudo potential compositions, to build the five, single-encoding random forest (RF) models. The i6mA-Fuse uses a linear regression model to combine the predicted probability scores of the five, single encoding-based RF models. The resultant species-specific i6mA-Fuse achieved remarkably high performances with AUCs of 0.982 and 0.978 and with MCCs of 0.869 and 0.858 on the independent datasets of Rosa chinensis and Fragaria vesca, respectively. In the F. vesca-specific i6mA-Fuse, the MBE and EIIP contributed to 75% and 25% of the total prediction; in the R. chinensis-specific i6mA-Fuse, Kmer, MBE, and EIIP contribute to 15%, 65%, and 20% of the total prediction. To assist high-throughput prediction for DNA 6 mA identification, the i6mA-Fuse is publicly accessible at https://kurata14.bio.kyutech.ac.jp/i6mA-Fuse/.",2020-03-05 +32133024,"BarleyNet: A Network-Based Functional Omics Analysis Server for Cultivated Barley, Hordeum vulgare L.","Cultivated barley (Hordeum vulgare L.) is one of the most produced cereal crops worldwide after maize, bread wheat, and rice. Barley is an important crop species not only as a food source, but also in plant genetics because it harbors numerous stress response alleles in its genome that can be exploited for crop engineering. However, the functional annotation of its genome is relatively poor compared with other major crops. Moreover, bioinformatics tools for system-wide analyses of omics data from barley are not yet available. We have thus developed BarleyNet, a co-functional network of 26,145 barley genes, along with a web server for network-based predictions (http://www.inetbio.org/barleynet). We demonstrated that BarleyNet's prediction of biological processes is more accurate than that of an existing barley gene network. We implemented three complementary network-based algorithms for prioritizing genes or functional concepts to study genetic components of complex traits such as environmental stress responses: (i) a pathway-centric search for candidate genes of pathways or complex traits; (ii) a gene-centric search to infer novel functional concepts for genes; and (iii) a context-centric search for novel genes associated with stress response. We demonstrated the usefulness of these network analysis tools in the study of stress response using proteomics and transcriptomics data from barley leaves and roots upon drought or heat stresses. These results suggest that BarleyNet will facilitate our understanding of the underlying genetic components of complex traits in barley.",2020-02-18 +30785205,A Systematic Review of Principal Component Analysis-Derived Dietary Patterns in Japanese Adults: Are Major Dietary Patterns Reproducible Within a Country?,"Principal component analysis (PCA) has been widely used in nutritional epidemiology to derive dietary patterns. However, although PCA-derived dietary patterns are population-dependent, their reproducibility in different populations is largely unexplored. We aimed to investigate whether major dietary patterns are consistently identified among different populations within a country and, if so, how similar these dietary patterns are. We conducted a systematic review of PCA-derived dietary patterns in Japanese adults using PubMed and Web of Science for English articles and Ichushi-Web and CiNii databases for Japanese articles. We assessed the reproducibility of major dietary patterns using congruence coefficients (CCs), with values ≥0.80 considered to represent fair similarity. From 65 articles (80 studies) included in this review, 285 different dietary patterns were identified. Based on the names of these patterns, major dietary patterns were Western (n = 34), Japanese (n = 12), traditional (n = 10), traditional Japanese (n = 9), healthy (n = 18), and prudent (n = 9) patterns. When assessment was limited to high-quality data (i.e., studies based on a sample size ≥200 and use of a validated dietary assessment questionnaire or multiple-day dietary record), the median CC was low for Western (0.44), traditional (0.59), and traditional Japanese (0.31) patterns. Conversely, the median CC was 0.89 for healthy, 0.86 for prudent, and 0.80 for Japanese patterns; and the proportion of pairs with a CC ≥0.80 was 87.3%, 64.3%, and 50.0%, respectively. Characteristics shared among these 3 dietary patterns included higher intakes of mushrooms, seaweeds, vegetables, potatoes, fruits, pulses, and pickles. In conclusion, this systematic review showed that some of the major dietary patterns are relatively reproducible in different populations within a country, whereas others are not. This highlights the importance of careful interpretation of PCA-derived dietary patterns. Our findings in Japan should be confirmed in different countries and globally. This study was registered at https://www.crd.york.ac.uk/prospero/ as CRD42018087669.",2019-03-01 +31913588,Measuring and optimising the efficiency of community hospital inpatient care for older people: the MoCHA mixed-methods study,"

Background

Community hospitals are small hospitals providing local inpatient and outpatient services. National surveys report that inpatient rehabilitation for older people is a core function but there are large differences in key performance measures. We have investigated these variations in community hospital ward performance.

Objectives

(1) To measure the relative performance of community hospital wards (studies 1 and 2); (2) to identify characteristics of community hospital wards that optimise performance (studies 1 and 3); (3) to develop a web-based interactive toolkit that supports operational changes to optimise ward performance (study 4); (4) to investigate the impact of community hospital wards on secondary care use (study 5); and (5) to investigate associations between short-term community (intermediate care) services and secondary care utilisation (study 5).

Methods

Study 1 – we used national data to conduct econometric estimations using stochastic frontier analysis in which a cost function was modelled using significant predictors of community hospital ward costs. Study 2 – a national postal survey was developed to collect data from a larger sample of community hospitals. Study 3 – three ethnographic case studies were performed to provide insight into less tangible aspects of community hospital ward care. Study 4 – a web-based interactive toolkit was developed by integrating the econometrics (study 1) and case study (study 3) findings. Study 5 – regression analyses were conducted using data from the Atlas of Variation Map 61 (rate of emergency admissions to hospital for people aged ≥ 75 years with a length of stay of < 24 hours) and the National Audit of Intermediate Care.

Results

Community hospital ward efficiency is comparable with the NHS acute hospital sector (mean cost efficiency 0.83, range 0.72–0.92). The rank order of community hospital ward efficiencies was distinguished to facilitate learning across the sector. On average, if all community hospital wards were operating in line with the highest cost efficiency, savings of 17% (or £47M per year) could be achieved (price year 2013/14) for our sample of 101 wards. Significant economies of scale were found: a 1% rise in output was associated with an average 0.85% increase in costs. We were unable to obtain a larger community hospital sample because of the low response rate to our national survey. The case studies identified how rehabilitation was delivered through collaborative, interdisciplinary working; interprofessional communication; and meaningful patient and family engagement. We also developed insight into patients’ recovery trajectories and care transitions. The web-based interactive toolkit was established [http://mocha.nhsbenchmarking.nhs.uk/ (accessed 9 September 2019)]. The crisis response team type of intermediate care, but not community hospitals, had a statistically significant negative association with emergency admissions.

Limitations

The econometric analyses were based on cross-sectional data and were also limited by missing data. The low response rate to our national survey means that we cannot extrapolate reliably from our community hospital sample.

Conclusions

The results suggest that significant community hospital ward savings may be realised by improving modifiable performance factors that might be augmented further by economies of scale.

Future work

How less efficient hospitals might reduce costs and sustain quality requires further research.

Funding

This project was funded by the National Institute for Health Research (NIHR) Health Services and Delivery Research programme and will be published in full in Health Services and Delivery Research; Vol. 8, No. 1. See the NIHR Journals Library website for further project information.",2020-01-09 +31552418,Signatures of cell death and proliferation in perturbation transcriptomics data-from confounding factor to effective prediction.,"Transcriptional perturbation signatures are valuable data sources for functional genomics. Linking perturbation signatures to screenings opens the possibility to model cellular phenotypes from expression data and to identify efficacious drugs. We linked perturbation transcriptomics data from the LINCS-L1000 project with cell viability information upon genetic (Achilles project) and chemical (CTRP screen) perturbations yielding more than 90 000 signature-viability pairs. An integrated analysis showed that the cell viability signature is a major factor underlying perturbation signatures. The signature is linked to transcription factors regulating cell death, proliferation and division time. We used the cell viability-signature relationship to predict viability from transcriptomics signatures, and identified and validated compounds that induce cell death in tumor cell lines. We showed that cellular toxicity can lead to unexpected similarity of signatures, confounding mechanism of action discovery. Consensus compound signatures predicted cell-specific drug sensitivity, even if the signature is not measured in the same cell line, and outperformed conventional drug-specific features. Our results can help in understanding mechanisms behind cell death and removing confounding factors of transcriptomic perturbation screens. To interactively browse our results and predict cell viability in new gene expression samples, we developed CEVIChE (CEll VIability Calculator from gene Expression; https://saezlab.shinyapps.io/ceviche/).",2019-11-01 +31218360,sefOri: selecting the best-engineered sequence features to predict DNA replication origins.,"

Motivation

Cell divisions start from replicating the double-stranded DNA, and the DNA replication process needs to be precisely regulated both spatially and temporally. The DNA is replicated starting from the DNA replication origins. A few successful prediction models were generated based on the assumption that the DNA replication origin regions have sequence level features like physicochemical properties significantly different from the other DNA regions.

Results

This study proposed a feature selection procedure to further refine the classification model of the DNA replication origins. The experimental data demonstrated that as large as 26% improvement in the prediction accuracy may be achieved on the yeast Saccharomyces cerevisiae. Moreover, the prediction accuracies of the DNA replication origins were improved for all the four yeast genomes investigated in this study.

Availability and implementation

The software sefOri version 1.0 was available at http://www.healthinformaticslab.org/supp/resources.php. An online server was also provided for the convenience of the users, and its web link may be found in the above-mentioned web page.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +27158451,A compendium of monocyte transcriptome datasets to foster biomedical knowledge discovery.,"Systems-scale profiling approaches have become widely used in translational research settings. The resulting accumulation of large-scale datasets in public repositories represents a critical opportunity to promote insight and foster knowledge discovery. However, resources that can serve as an interface between biomedical researchers and such vast and heterogeneous dataset collections are needed in order to fulfill this potential. Recently, we have developed an interactive data browsing and visualization web application, the Gene Expression Browser (GXB). This tool can be used to overlay deep molecular phenotyping data with rich contextual information about analytes, samples and studies along with ancillary clinical or immunological profiling data. In this note, we describe a curated compendium of 93 public datasets generated in the context of human monocyte immunological studies, representing a total of 4,516 transcriptome profiles. Datasets were uploaded to an instance of GXB along with study description and sample annotations. Study samples were arranged in different groups. Ranked gene lists were generated based on relevant group comparisons. This resource is publicly available online at http://monocyte.gxbsidra.org/dm3/landing.gsp.",2016-03-07 +30101318,PlaNC-TE: a comprehensive knowledgebase of non-coding RNAs and transposable elements in plants.,"Transposable elements (TEs) play an essential role in the genetic variability of eukaryotic species. In plants, they may comprise up to 90% of the total genome. Non-coding RNAs (ncRNAs) are known to control gene expression and regulation. Although the relationship between ncRNAs and TEs is known, obtaining the organized data for sequenced genomes is not straightforward. In this study, we describe the PlaNC-TE (http://planc-te.cp.utfpr.edu.br), a user-friendly portal harboring a knowledgebase created by integrating and analysing plant ncRNA-TE data. We identified a total of 14 350 overlaps between ncRNAs and TEs in 40 plant genomes. The database allows users to browse, search and download all ncRNA and TE data analysed. Overall, PlaNC-TE not only organizes data and provides insights about the relationship between ncRNA and TEs in plants but also helps improve genome annotation strategies. Moreover, this is the first database to provide resources to broadly investigate functions and mechanisms involving TEs and ncRNAs in plants.",2018-01-01 +23193269,MicroScope--an integrated microbial resource for the curation and comparative analysis of genomic and metabolic data.,"MicroScope is an integrated platform dedicated to both the methodical updating of microbial genome annotation and to comparative analysis. The resource provides data from completed and ongoing genome projects (automatic and expert annotations), together with data sources from post-genomic experiments (i.e. transcriptomics, mutant collections) allowing users to perfect and improve the understanding of gene functions. MicroScope (http://www.genoscope.cns.fr/agc/microscope) combines tools and graphical interfaces to analyse genomes and to perform the manual curation of gene annotations in a comparative context. Since its first publication in January 2006, the system (previously named MaGe for Magnifying Genomes) has been continuously extended both in terms of data content and analysis tools. The last update of MicroScope was published in 2009 in the Database journal. Today, the resource contains data for >1600 microbial genomes, of which ∼300 are manually curated and maintained by biologists (1200 personal accounts today). Expert annotations are continuously gathered in the MicroScope database (∼50 000 a year), contributing to the improvement of the quality of microbial genomes annotations. Improved data browsing and searching tools have been added, original tools useful in the context of expert annotation have been developed and integrated and the website has been significantly redesigned to be more user-friendly. Furthermore, in the context of the European project Microme (Framework Program 7 Collaborative Project), MicroScope is becoming a resource providing for the curation and analysis of both genomic and metabolic data. An increasing number of projects are related to the study of environmental bacterial (meta)genomes that are able to metabolize a large variety of chemical compounds that may be of high industrial interest.",2012-11-27 +32833550,EasyAmber: A comprehensive toolbox to automate the molecular dynamics simulation of proteins.,"Conformational plasticity of the functionally important regions and binding sites in protein/enzyme structures is one of the key factors affecting their function and interaction with substrates/ligands. Molecular dynamics (MD) can address the challenge of accounting for protein flexibility by predicting the time-dependent behavior of a molecular system. It has a potential of becoming a particularly important tool in protein engineering and drug discovery, but requires specialized training and skills, what impedes practical use by many investigators. We have developed the easyAmber - a comprehensive set of programs to automate the molecular dynamics routines implemented in the Amber package. The toolbox can address a wide set of tasks in computational biology struggling to account for protein flexibility. The automated workflow includes a complete set of steps from the initial ""static"" molecular model to the MD ""production run"": the full-atom model building, optimization/equilibration of the molecular system, classical/conventional and accelerated molecular dynamics simulations. The easyAmber implements advanced MD protocols, but is highly automated and easy-to-operate to attract a broad audience. The toolbox can be used on a personal desktop station equipped with a compatible gaming GPU-accelerator, as well as help to manage huge workloads on a powerful supercomputer. The software provides an opportunity to operate multiple simulations of different proteins at the same time, thus significantly increasing work efficiency. The easyAmber takes the molecular dynamics to the next level in terms of usability for complex processing of large volumes of data, thus supporting the recent trend away from inefficient ""static"" approaches in biology toward a deeper understanding of the dynamics in protein structures. The software is freely available for download at https://biokinet.belozersky.msu.ru/easyAmber, no login required.",2020-08-22 +30950277,mineXpert: Biological Mass Spectrometry Data Visualization and Mining with Full JavaScript Ability.,"Biological mass spectrometry mainly comprises three fields of endeavor, namely, proteomics, metabolomics, and structural biology. In each of these specialties, the mass spectrometrist needs to access MS1 mass spectral data, although not necessarily on the same basis. For example, the bottom-up proteomics scientist will occasionally access MS1 data to perform data inspection, quality assessments, and quantitation measurements, whereas top-down proteomics, structural biology, or metabolomics scientists will actually spend most of their time mining profile-mode MS1 data. Furthermore, the advent of ion mobility-mass spectrometry imposes new manners of mass spectral data visualization. An open-source MS1-only mass data visualization software for the desktop was developed to allow scientists to visualize conventional and drift time mass data. Various mass data integrations are possible, allowing a thorough mass spectral data scrutiny. Isotopic cluster calculations are easily carried over from the chemical formula up to the display of the mass spectrum. Deconvolution of mass peaks can be achieved with a simple mouse drag. Flexible reporting of data inspection events and of mining discoveries is provided. Very large sparse data sets can be sliced into smaller chunks replicating the original data without data loss. Task automation is achieved in a JavaScript environment. This project allows users of mass spectrometry facilities to inspect and mine their MS1 mass data outside of these facilities without having to resort to the closed-source vendor software shipped with the instruments. mineXpert requires no proprietary software whatsoever once the mass spectrometry data have been converted to mzML. The reference implementation is version 5.8.2 or greater. Reference material, a detailed user manual, and video tutorials are available at http://www.msxpertsuite.org .",2019-04-17 +29186384,PlanNET: homology-based predicted interactome for multiple planarian transcriptomes.,"Motivation:Planarians are emerging as a model organism to study regeneration in animals. However, the little available data of protein-protein interactions hinders the advances in understanding the mechanisms underlying its regenerating capabilities. Results:We have developed a protocol to predict protein-protein interactions using sequence homology data and a reference Human interactome. This methodology was applied on 11 Schmidtea mediterranea transcriptomic sequence datasets. Then, using Neo4j as our database manager, we developed PlanNET, a web application to explore the multiplicity of networks and the associated sequence annotations. By mapping RNA-seq expression experiments onto the predicted networks, and allowing a transcript-centric exploration of the planarian interactome, we provide researchers with a useful tool to analyse possible pathways and to design new experiments, as well as a reproducible methodology to predict, store, and explore protein interaction networks for non-model organisms. Availability and implementation:The web application PlanNET is available at https://compgen.bio.ub.edu/PlanNET. The source code used is available at https://compgen.bio.ub.edu/PlanNET/downloads. Contact:jabril@ub.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-03-01 +26087234,"Sophora flavescens Ait.: Traditional usage, phytochemistry and pharmacology of an important traditional Chinese medicine.","

Ethnopharmacological relevance

Sophora flavescens (Fabaceae), also known as Kushen (Chinese: ), has been an important species in Chinese medicine since the Qin and Han dynasties. The root of Sophora flavescens has a long history in the traditional medicine of many countries, including China, Japan, Korea, India and some countries in Europe. In traditional Chinese medicine (TCM), Sophora flavescens has been used extensively, mainly in combination with other medicinal plants in prescriptions to treat fever, dysentery, hematochezia, jaundice, oliguria, vulvar swelling, asthma, eczema, inflammatory disorders, ulcers and diseases associated with skin burns. The aim of this review is to provide updated and comprehensive information regarding the botany, ethnopharmacology, phytochemistry, biological activities and toxicology of Sophora flavescens and to discuss possible trends and opportunities for further research on Sophora flavescens.

Materials and methods

We systematically searched major scientific databases (PubMed, Elsevier, SpringerLink, Google Scholar, Medline Plus, ACS, ""Da Yi Yi Xue Sou Suo (http://www.dayi100.com/login.jsp)"", China Knowledge Resource Integrated (CNKI) and Web of Science) for information published between 1958 and 2015 on Sophora flavescens. Information was also acquired from local classic herbal literature, conference papers, government reports, and PhD and MSc dissertations.

Results

The broad spectrum of biological activities associated with Sophora flavescens has been considered a valuable resource in both traditional and modern medicine. Extracts are taken either orally or by injection. More than 200 compounds have been isolated from Sophora flavescens, and the major components have been identified as flavonoids and alkaloids. Recent in vitro and in vivo studies indicate that at least 50 pure compounds and crude extracts from Sophora flavescens possess wide-ranging antitumor, antimicrobial, antipyretic, antinociceptive, and anti-inflammatory pharmacological abilities. The anticancer and anti-infection abilities of these components are especially attractive areas for research.

Conclusions

Sophora flavescens is a promising traditional medicine, but there is a need for more precise studies to test the safety and clinical value of its main active crude extracts and pure compounds and to clarify their mechanisms of action. Moreover, some existing studies have lacked systematic methods and integration with the existing literature, and some of the experiments were isolated, used small sample sizes and were unreliable. More validated data are therefore required.",2015-06-16 +25244735,[Integrated DNA barcoding database for identifying Chinese animal medicine].,"In order to construct an integrated DNA barcoding database for identifying Chinese animal medicine, the authors and their cooperators have completed a lot of researches for identifying Chinese animal medicines using DNA barcoding technology. Sequences from GenBank have been analyzed simultaneously. Three different methods, BLAST, barcoding gap and Tree building, have been used to confirm the reliabilities of barcode records in the database. The integrated DNA barcoding database for identifying Chinese animal medicine has been constructed using three different parts: specimen, sequence and literature information. This database contained about 800 animal medicines and the adulterants and closely related species. Unknown specimens can be identified by pasting their sequence record into the window on the ID page of species identification system for traditional Chinese medicine (www. tcmbarcode. cn). The integrated DNA barcoding database for identifying Chinese animal medicine is significantly important for animal species identification, rare and endangered species conservation and sustainable utilization of animal resources.",2014-06-01 +21766944,Ab initio and long-range investigation of the Ω((+∕-)) states of NaK dissociating adiabatically up to Na(3s 2S(1/2)) + K(3d 2D(3/2)).,"A theoretical investigation of the electronic structure of the NaK molecule including spin-orbit effects has been performed for the 34 Ω((+∕-)) states dissociating adiabatically into the limits up to Na(3s(2)S(1/2)) + K(3d(2)D(3/2)) from both an ab initio approach and a long-range model. Equilibrium distances, transition energies, harmonic frequencies as well as depths of wells and heights of humps are reported for all the states. Formulas for calculating the long-range energies for all the 0(+∕-), 1, 2, and 3 states under investigation are also displayed. They are expressed in terms of the C(n) (n = 6,8, ...) long-range coefficients and exchange integrals for the (2S+1)Λ((+)) parent states, available from literature. As present data could help experimentalists we make available extensive tables of energy values versus internuclear distances in our database at the web address: http://www-lasim.univ-lyon1.fr/spip.php?rubrique99.",2011-07-01 +31289831,iMicrobe: Tools and data-dreaiven discovery platform for the microbiome sciences. ,"Scientists have amassed a wealth of microbiome datasets, making it possible to study microbes in biotic and abiotic systems on a population or planetary scale; however, this potential has not been fully realized given that the tools, datasets, and computation are available in diverse repositories and locations. To address this challenge, we developed iMicrobe.us, a community-driven microbiome data marketplace and tool exchange for users to integrate their own data and tools with those from the broader community. The iMicrobe platform brings together analysis tools and microbiome datasets by leveraging National Science Foundation-supported cyberinfrastructure and computing resources from CyVerse, Agave, and XSEDE. The primary purpose of iMicrobe is to provide users with a freely available, web-based platform to (1) maintain and share project data, metadata, and analysis products, (2) search for related public datasets, and (3) use and publish bioinformatics tools that run on highly scalable computing resources. Analysis tools are implemented in containers that encapsulate complex software dependencies and run on freely available XSEDE resources via the Agave API, which can retrieve datasets from the CyVerse Data Store or any web-accessible location (e.g., FTP, HTTP). iMicrobe promotes data integration, sharing, and community-driven tool development by making open source data and tools accessible to the research community in a web-based platform.",2019-07-01 +31095279,Prediction of survival risks with adjusted gene expression through risk-gene networks.,"

Motivation

Network-based analysis of biomedical data has been extensively studied over the last decades. As a successful application, gene networks have been used to illustrate interactions among genes and explain the associated phenotypes. However, the gene network approaches have not been actively applied for survival analysis, which is one of the main interests of biomedical research. In addition, a few previous studies using gene networks for survival analysis construct networks mainly from prior knowledge, such as pathways, regulations and gene sets, while the performance considerably depends on the selection of prior knowledge.

Results

In this paper, we propose a data-driven construction method for survival risk-gene networks as well as a survival risk prediction method using the network structure. The proposed method constructs risk-gene networks with survival-associated genes using penalized regression. Then, gene expression indices are hierarchically adjusted through the networks to reduce the variance intrinsic in datasets. By illustrating risk-gene structure, the proposed method is expected to provide an intuition for the relationship between genes and survival risks. The risk-gene network is applied to a low grade glioma dataset, and produces a hypothesis of the relationship between genetic biomarkers of low and high grade glioma. Moreover, with multiple datasets, we demonstrate that the proposed method shows superior prediction performance compared to other conventional methods.

Availability and implementation

The R package of risk-gene networks is freely available in the web at http://cdal.korea.ac.kr/NetDA/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +32119710,In silico analysis of the tryptophan hydroxylase 2 (TPH2) protein variants related to psychiatric disorders.,"The tryptophan hydroxylase 2 (TPH2) enzyme catalyzes the first step of serotonin biosynthesis. Serotonin is known for its role in several homeostatic systems related to sleep, mood, and food intake. As the reaction catalyzed by TPH2 is the rate-limiting step of serotonin biosynthesis, mutations in TPH2 have been associated with several psychiatric disorders (PD). This work undertakes an in silico analysis of the effects of genetic mutations in the human TPH2 protein. Ten algorithms were used to predict the functional and stability effects of the TPH2 mutations. ConSurf was used to estimate the evolutionary conservation of TPH2 amino acids. GROMACS was used to perform molecular dynamics (MD) simulations of TPH2 WT and P260S, R303W, and R441H, which had already been associated with the development of PD. Forty-six TPH2 variants were compiled from the literature. Among the analyzed variants, those occurring at the catalytic domain were shown to be more damaging to protein structure and function. The ConSurf analysis indicated that the mutations affecting the catalytic domain were also more conserved throughout evolution. The variants S364K and S383F were predicted to be deleterious by all the functional algorithms used and occurred at conserved positions, suggesting that they might be deleterious. The MD analyses indicate that the mutations P206S, R303W, and R441H affect TPH2 flexibility and essential mobility at the catalytic and oligomerization domains. The variants P206S, R303W, and R441H also exhibited alterations in dimer binding affinity and stability throughout the simulations. Thus, these mutations may impair TPH2 functional interactions and, consequently, its function, leading to the development of PD. Furthermore, we developed a database, SNPMOL (http://www.snpmol.org/), containing the results presented in this paper. Understanding the effects of TPH2 mutations on protein structure and function may lead to improvements in existing treatments for PD and facilitate the design of further experiments.",2020-03-02 +32612423,Developing an Online Portal for Determining the Genomic Signature of Archaic DNA that are Associated to Modern Human Genetic Diseases: A Meta-Analysis Study.,"

Objective

Mutations or introgression can cause and rise adaptive alleles of which some can be beneficial. Archaic humans lived more than 200,000 years ago in Europe and Western Asia. They were adapted to the environment and pathogens that prevailed in these locations. It can therefore be thought that modern humans obtained significant immune advantage from the archaic alleles.

Materials and methods

First, data were collected by meta-analysis from previously identified genetic diseases caused by alleles that were introgressed from archaics. Second, the in silico model portal (http://www.archaics2phenotype.xxx.edu.tr) was designed to trace the history of the Neanderthal allele. The portal also shows the current distribution of the genotypes of the selected alleles within different populations and correlates with the individuals phenotype.

Results

Our developed model provides a better understanding for the origin of genetic diseases or traits that are associated with the Neanderthal genome.

Conclusion

The developed medicine model will help individuals and their populations to receive the best treatment. It also clarifies why there are differences in disease phenotypes in modern humans.",2020-06-02 +28086860,"Nicotiana attenuata Data Hub (NaDH): an integrative platform for exploring genomic, transcriptomic and metabolomic data in wild tobacco.","

Background

Nicotiana attenuata (coyote tobacco) is an ecological model for studying plant-environment interactions and plant gene function under real-world conditions. During the last decade, large amounts of genomic, transcriptomic and metabolomic data have been generated with this plant which has provided new insights into how native plants interact with herbivores, pollinators and microbes. However, an integrative and open access platform that allows for the efficient mining of these -omics data remained unavailable until now.

Description

We present the Nicotiana attenuata Data Hub (NaDH) as a centralized platform for integrating and visualizing genomic, phylogenomic, transcriptomic and metabolomic data in N. attenuata. The NaDH currently hosts collections of predicted protein coding sequences of 11 plant species, including two recently sequenced Nicotiana species, and their functional annotations, 222 microarray datasets from 10 different experiments, a transcriptomic atlas based on 20 RNA-seq expression profiles and a metabolomic atlas based on 895 metabolite spectra analyzed by mass spectrometry. We implemented several visualization tools, including a modified version of the Electronic Fluorescent Pictograph (eFP) browser, co-expression networks and the Interactive Tree Of Life (iTOL) for studying gene expression divergence among duplicated homologous. In addition, the NaDH allows researchers to query phylogenetic trees of 16,305 gene families and provides tools for analyzing their evolutionary history. Furthermore, we also implemented tools to identify co-expressed genes and metabolites, which can be used for predicting the functions of genes. Using the transcription factor NaMYB8 as an example, we illustrate that the tools and data in NaDH can facilitate identification of candidate genes involved in the biosynthesis of specialized metabolites.

Conclusion

The NaDH provides interactive visualization and data analysis tools that integrate the expression and evolutionary history of genes in Nicotiana, which can facilitate rapid gene discovery and comparative genomic analysis. Because N. attenuata shares many genome-wide features with other Nicotiana species including cultivated tobacco, and hence NaDH can be a resource for exploring the function and evolution of genes in Nicotiana species in general. The NaDH can be accessed at: http://nadh.ice.mpg.de/ .",2017-01-13 +30759193,Comparison of six breast cancer classifiers using qPCR.,"

Motivation

Several gene expression-based risk scores and subtype classifiers for breast cancer were developed to distinguish high- and low-risk patients. Evaluating the performance of these classifiers helps to decide which classifiers should be used in clinical practice for personal therapeutic recommendations. So far, studies that compared multiple classifiers in large independent patient cohorts mostly used microarray measurements. qPCR-based classifiers were not included in the comparison or had to be adapted to the different experimental platforms.

Results

We used a prospective study of 726 early breast cancer patients from seven certified German breast cancer centers. Patients were treated according to national guidelines and the expressions of 94 selected genes were measured by the mid-throughput qPCR platform Fluidigm. Clinical and pathological data including outcome over five years is available. Using these data, we could compare the performance of six classifiers (scmgene and research versions of PAM50, ROR-S, recurrence score, EndoPredict and GGI). Similar to other studies, we found a similar or even higher concordance between most of the classifiers and most were also able to differentiate high- and low-risk patients. The classifiers that were originally developed for microarray data still performed similarly using the Fluidigm data. Therefore, Fluidigm can be used to measure the gene expressions needed by several classifiers for a large cohort with little effort. In addition, we provide an interactive report of the results, which enables a transparent, in-depth comparison of classifiers and their prediction of individual patients.

Availability and implementation

https://services.bio.ifi.lmu.de/pia/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +31834829,New Toxicology Tools and the Emerging Paradigm Shift in Environmental Health Decision-Making.,"BACKGROUND:Numerous types of rapid toxicity or exposure assays and platforms are providing information relevant to human hazard and exposure identification. They offer the promise of aiding decision-making in a variety of contexts including the regulatory management of chemicals, evaluation of products and environmental media, and emergency response. There is a need to consider both the scientific validity of the new methods and the values applied to a given decision using this new information to ensure that the new methods are employed in ways that enhance public health and environmental protection. In 2018, a National Academies of Sciences, Engineering, and Medicine (NASEM) workshop examined both the toxicological and societal aspects of this challenge. OBJECTIVES:Our objectives were to explore the challenges of adopting new data streams into regulatory decision-making and highlight the need to align new methods with the information and confidence needs of the decision contexts in which the data may be applied. METHODS:We go beyond the NASEM workshop to further explore the requirements of different decision contexts. We also call for the new methods to be applied in a manner consistent with the core values of public health and environmental protection. We use the case examples presented in the NASEM workshop to illustrate a range of decision contexts that have applied or could benefit from these new data streams. Organizers of the NASEM workshop came together to further evaluate the main themes from the workshop and develop a joint assessment of the critical needs for improved use of emerging toxicology tools in decision-making. We have drawn from our own experience and individual decision or research contexts as well as from the case studies and panel discussions from the workshop to inform our assessment. DISCUSSION:Many of the statutes that regulate chemicals in the environment place a high priority on the protection of public health and the environment. Moving away from the sole reliance on traditional approaches and information sources used in hazard, exposure, and risk assessment, toward the more expansive use of rapidly acquired chemical information via in vitro, in silico, and targeted testing strategies will require careful consideration of the information needed and values considerations associated with a particular decision. In this commentary, we explore the ability and feasibility of using emerging data streams, particularly those that allow for the rapid testing of a large number of chemicals across numerous biological targets, to shift the chemical testing paradigm to one in which potentially harmful chemicals are more rapidly identified, prioritized, and addressed. Such a paradigm shift could ultimately save financial and natural resources while ensuring and preserving the protection of public health. https://doi.org/10.1289/EHP4745.",2019-12-13 +32951601,"Down-regulation of PADI2 prevents proliferation and epithelial-mesenchymal transition in ovarian cancer through inhibiting JAK2/STAT3 pathway in vitro and in vivo, alone or in combination with Olaparib.","

Background

Epithelial ovarian cancer (EOC) is the most lethal disease among female genital malignant tumors. Peptidylarginine deiminase type II(PADI II) has been shown to enhance a variety of cancers carcinogenesis, including ovarian cancer. The purpose of this study was to investigate the biological role of PADI2 in ovarian cancer (OC) and the relative mechanism.

Methods

Gene Expression Profiling Interactive Analysis (GEPIA) ( https://gepia.pku.cn/ ) and ONCOMINE ( https://www.oncomine.org/ ) were used to analyze PADI2 Gene Expression data. The survival curve for the PADI2 gene was generated by using the online Kaplan-Meier mapping site ( https://www.kmplot.com/ ). We conducted MTT assay, cloning formation assay and EdU cell proliferation assay to detect the cell activity of PADI2 knockdown A2780 and SKOV3 ovarian cancer cells treated with Olaparib. Cell migration and invasion were observed by would healing and transwell assay. The pathway changes after the treatment of PADI2 were detected by transcriptome sequencing and western blot. The role of PADI2 combined with Olaparib treatment in vivo was studied in nude mouse model bearing ovarian cancer tumor.

Results

We investigated the role of PADI2 on EOC in vitro and in vivo. PADI2 was upregulated in ovarian cancer samples and high PADI2 expression was correlated with poor outcome. Downregulating PADI2 suppressed colony formation, proliferation, migration and invasion of A2780 and SKOV3 cells. Furthermore, downregulating PADI2 and Olaparib combination treatment attenuated the viability, migration and invasion of A2780 and SKOV3 cells. We identified differentially expressed genes in A2780-shPADI2 and SKOV3-shPADI2 cell by transcriptome sequencing analysis and verified that downregulating PADI2 and Olaparib combination treatment suppresses EMT and JAK2/STAT3 signaling pathway in A2780 and SKOV3 cells in vitro and in vivo.

Conclusions

Downregulation of PADI2 and Olaparib combination treatment attenuated the proliferation, migration and invasion of A2780 and SKOV3 cells by inhibiting the EMT through JAK2/STAT3 signaling pathway.",2020-09-20 +32019722,A pooled-analysis of age and sex based coronary artery calcium scores percentiles.,"BACKGROUND:Age and sex based coronary artery calcium score (CAC) percentiles have been used to improve coronary artery disease (CAD) risk prediction. However, the main limitation of the CACs percentiles currently in use is that they are often based on single studies. We performed a pooled analysis of all available studies that reported on CAC percentiles, in order to develop more generalizable age and sex nomograms. METHODS:PubMed/Medline and Embase were searched for studies that reported nomograms of age and sex-based CACs percentiles. Studies were included if they reported data collected among asymptomatic individuals without a history of cardiovascular disease. Absolute CACs for each specific percentile stratum were pooled and new percentiles were generated taking into account the sample size of the study. RESULTS:We found 831 studies, of which 12 met the inclusion criteria. Data on CACs percentiles of 134,336 Western and 33,488 Asians were pooled separately, rendering a weighted CACs percentile nomogram available at https://www.calciumscorecalculator.com. Our weighted percentiles differed by up to 24% from the nomograms in use today. CONCLUSIONS:Our pooled age and sex based CACs percentiles based on over 155,000 individuals should provide a measure of risk that is more applicable to a wider population than the ones currently in use and hopefully will lead to better risk assessment and treatment decisions.",2020-01-27 +25468931,"AromaDeg, a novel database for phylogenomics of aerobic bacterial degradation of aromatics.","Understanding prokaryotic transformation of recalcitrant pollutants and the in-situ metabolic nets require the integration of massive amounts of biological data. Decades of biochemical studies together with novel next-generation sequencing data have exponentially increased information on aerobic aromatic degradation pathways. However, the majority of protein sequences in public databases have not been experimentally characterized and homology-based methods are still the most routinely used approach to assign protein function, allowing the propagation of misannotations. AromaDeg is a web-based resource targeting aerobic degradation of aromatics that comprises recently updated (September 2013) and manually curated databases constructed based on a phylogenomic approach. Grounded in phylogenetic analyses of protein sequences of key catabolic protein families and of proteins of documented function, AromaDeg allows query and data mining of novel genomic, metagenomic or metatranscriptomic data sets. Essentially, each query sequence that match a given protein family of AromaDeg is associated to a specific cluster of a given phylogenetic tree and further function annotation and/or substrate specificity may be inferred from the neighboring cluster members with experimentally validated function. This allows a detailed characterization of individual protein superfamilies as well as high-throughput functional classifications. Thus, AromaDeg addresses the deficiencies of homology-based protein function prediction, combining phylogenetic tree construction and integration of experimental data to obtain more accurate annotations of new biological data related to aerobic aromatic biodegradation pathways. We pursue in future the expansion of AromaDeg to other enzyme families involved in aromatic degradation and its regular update. Database URL: http://aromadeg.siona.helmholtz-hzi.de",2014-12-01 +21252073,Compression of DNA sequence reads in FASTQ format.,"

Motivation

Modern sequencing instruments are able to generate at least hundreds of millions short reads of genomic data. Those huge volumes of data require effective means to store them, provide quick access to any record and enable fast decompression.

Results

We present a specialized compression algorithm for genomic data in FASTQ format which dominates its competitor, G-SQZ, as is shown on a number of datasets from the 1000 Genomes Project (www.1000genomes.org).

Availability

DSRC is freely available at http:/sun.aei.polsl.pl/dsrc.",2011-01-19 +32515544,Identifying scenarios of benefit or harm from kidney transplantation during the COVID-19 pandemic: A stochastic simulation and machine learning study.,"Clinical decision-making in kidney transplant (KT) during the coronavirus disease 2019 (COVID-19) pandemic is understandably a conundrum: both candidates and recipients may face increased acquisition risks and case fatality rates (CFRs). Given our poor understanding of these risks, many centers have paused or reduced KT activity, yet data to inform such decisions are lacking. To quantify the benefit/harm of KT in this context, we conducted a simulation study of immediate-KT vs delay-until-after-pandemic for different patient phenotypes under a variety of potential COVID-19 scenarios. A calculator was implemented (http://www.transplantmodels.com/covid_sim), and machine learning approaches were used to evaluate the important aspects of our modeling. Characteristics of the pandemic (acquisition risk, CFR) and length of delay (length of pandemic, waitlist priority when modeling deceased donor KT) had greatest influence on benefit/harm. In most scenarios of COVID-19 dynamics and patient characteristics, immediate KT provided survival benefit; KT only began showing evidence of harm in scenarios where CFRs were substantially higher for KT recipients (eg, ≥50% fatality) than for waitlist registrants. Our simulations suggest that KT could be beneficial in many centers if local resources allow, and our calculator can help identify patients who would benefit most. Furthermore, as the pandemic evolves, our calculator can update these predictions.",2020-07-15 +32578539,Identification and Quantification of Oxidized Lipids in LC-MS Lipidomics Data.,"Changes in lipid homeostasis can lead to a plethora of diseases, raising the importance of reliable identification and measurement of lipids enabled by bioinformatics tools. However, due to the enormous diversity of lipids, most contemporary tools cover only a marginal range of lipid classes. To reduce such a shortcoming, this work extends the lipid species covered by Lipid Data Analyzer (LDA) to galactolipids and oxidized lipids. Appropriate mass lists were generated for MS1 identifications and the proprietary decision rule sets were extended for MS2 identifications of the novel lipid classes. Furthermore, LDA was extended to enable identification of oxidatively modified fatty acyl chains. With these extensions, LDA can reliably identify the most important galactolipids as well as oxidatively modified versions of the 22 previously implemented lipid classes. Comparison with other up to date lipidomics tools show that LDA has a better coverage of the newly implemented lipid species. The extended version of LDA provides researchers with a powerful platform to elucidate diseases caused by perturbations in the oxidized lipidome. LDA is freely available from https://genome.tugraz.at/lda.",2020-06-01 +30649247,miR+Pathway: the integration and visualization of miRNA and KEGG pathways.,"miRNAs represent a type of noncoding small molecule RNA. Many studies have shown that miRNAs are widely involved in the regulation of various pathways. The key to fully understanding the regulatory function of miRNAs is the determination of the pathways in which the miRNAs participate. However, the major pathway databases such as KEGG only include information regarding protein-coding genes. Here, we redesigned a pathway database (called miR+Pathway) by integrating and visualizing the 8882 human experimentally validated miRNA-target interactions (MTIs) and 150 KEGG pathways. This database is freely accessible at http://www.insect-genome.com/miR-pathway. Researchers can intuitively determine the pathways and the genes in the pathways that are regulated by miRNAs as well as the miRNAs that target the pathways. To determine the pathways in which targets of a certain miRNA or multiple miRNAs are enriched, we performed a KEGG analysis miRNAs by using the hypergeometric test. In addition, miR+Pathway provides information regarding MTIs, PubMed IDs and the experimental verification method. Users can retrieve pathways regulated by an miRNA or a gene by inputting its names.",2020-03-01 +31693112,The evolution of contact prediction: evidence that contact selection in statistical contact prediction is changing.,"

Motivation

Over the last few years, the field of protein structure prediction has been transformed by increasingly accurate contact prediction software. These methods are based on the detection of coevolutionary relationships between residues from multiple sequence alignments (MSAs). However, despite speculation, there is little evidence of a link between contact prediction and the physico-chemical interactions which drive amino-acid coevolution. Furthermore, existing protocols predict only a fraction of all protein contacts and it is not clear why some contacts are favoured over others. Using a dataset of 863 protein domains, we assessed the physico-chemical interactions of contacts predicted by CCMpred, MetaPSICOV and DNCON2, as examples of direct coupling analysis, meta-prediction and deep learning.

Results

We considered correctly predicted contacts and compared their properties against the protein contacts that were not predicted. Predicted contacts tend to form more bonds than non-predicted contacts, which suggests these contacts may be more important than contacts that were not predicted. Comparing the contacts predicted by each method, we found that metaPSICOV and DNCON2 favour accuracy, whereas CCMPred detects contacts with more bonds. This suggests that the push for higher accuracy may lead to a loss of physico-chemically important contacts. These results underscore the connection between protein physico-chemistry and the coevolutionary couplings that can be derived from MSAs. This relationship is likely to be relevant to protein structure prediction and functional analysis of protein structure and may be key to understanding their utility for different problems in structural biology.

Availability and implementation

We use publicly available databases. Our code is available for download at https://opig.stats.ox.ac.uk/.

Supplementary information

Supplementary information is available at Bioinformatics online.",2020-03-01 +30715167,The global dissemination of bacterial infections necessitates the study of reverse genomic epidemiology.,"Whole genome sequencing (WGS) has revolutionized the genotyping of bacterial pathogens and is expected to become the new gold standard for tracing the transmissions of bacterial infectious diseases for public health purposes. Traditional genomic epidemiology often uses WGS as a verification tool, namely, when a common source or epidemiological link is suspected, the collected isolates are sequenced for the determination of clonal relationships. However, increasingly frequent international travel and food transportation, and the associated potential for the cross-border transmission of bacterial pathogens, often lead to an absence of information on bacterial transmission routes. Here we introduce the concept of 'reverse genomic epidemiology', i.e. when isolates are inspected by genome comparisons to be sufficiently similar to one another, they are assumed to be a consequence of infection from a common source. Through BacWGSTdb (http://bacdb.org/BacWGSTdb/), a database we have developed for bacterial genome typing and source tracking, we have found that almost the entire analyzed 20 bacterial species exhibit the phenomenon of cross-border clonal dissemination. Five networks were further identified in which isolates sharing nearly identical genomes were collected from at least five different countries. Three of these have been documented as real infectious disease outbreaks, therefore demonstrating the feasibility and authority of reverse genomic epidemiology. Our survey and proposed strategy would be of potential value in establishing a global surveillance system for tracing bacterial transmissions and outbreaks; the related database and techniques require urgent standardization.",2020-03-01 +26585827,SNPTracker: A Swift Tool for Comprehensive Tracking and Unifying dbSNP rs IDs and Genomic Coordinates of Massive Sequence Variants.,"The reference single nucleotide polymorphism (rs) ID in dbSNP (http://www.ncbi.nlm.nih.gov/SNP/) is a key resource identifier, which is widely used in human genetics and genomics studies. However, its application is often complicated by the varied IDs of different versions. Here, we developed a user-friendly tool, SNPTracker, for comprehensively tracking and unifying the rs IDs and genomic coordinates of massive sequence variants at a time. It worked perfectly, and had much higher accuracy and capacity than two alternative utilities in our proof-of-principle examples. SNPTracker will greatly facilitate genetic data exchange and integration in the postgenome-wide association study era.",2015-11-19 +28153913,An Anatomically Resolved Mouse Brain Proteome Reveals Parkinson Disease-relevant Pathways.,"Here, we present a mouse brain protein atlas that covers 17 surgically distinct neuroanatomical regions of the adult mouse brain, each less than 1 mm3 in size. The protein expression levels are determined for 6,500 to 7,500 gene protein products from each region and over 12,000 gene protein products for the entire brain, documenting the physiological repertoire of mouse brain proteins in an anatomically resolved and comprehensive manner. We explored the utility of our spatially defined protein profiling methods in a mouse model of Parkinson's disease. We compared the proteome from a vulnerable region (substantia nigra pars compacta) of wild type and parkinsonian mice with that of an adjacent, less vulnerable, region (ventral tegmental area) and identified several proteins that exhibited both spatiotemporal- and genotype-restricted changes. We validated the most robustly altered proteins using an alternative profiling method and found that these modifications may highlight potential new pathways for future studies. This proteomic atlas is a valuable resource that offers a practical framework for investigating the molecular intricacies of normal brain function as well as regional vulnerability in neurological diseases. All of the mouse regional proteome profiling data are published on line at http://mbpa.bprc.ac.cn/.",2017-02-02 +30918942,Simultaneous clustering of multiview biomedical data using manifold optimization.,"MOTIVATION:Multiview clustering has attracted much attention in recent years. Several models and algorithms have been proposed for finding the clusters. However, these methods are developed either to find the consistent/common clusters across different views, or to identify the differential clusters among different views. In reality, both consistent and differential clusters may exist in multiview datasets. Thus, development of simultaneous clustering methods such that both the consistent and the differential clusters can be identified is of great importance. RESULTS:In this paper, we proposed one method for simultaneous clustering of multiview data based on manifold optimization. The binary optimization model for finding the clusters is relaxed to a real value optimization problem on the Stiefel manifold, which is solved by the line-search algorithm on manifold. We applied the proposed method to both simulation data and four real datasets from TCGA. Both studies show that when the underlying clusters are consistent, our method performs competitive to the state-of-the-art algorithms. When there are differential clusters, our method performs much better. In the real data study, we performed experiments on cancer stratification and differential cluster (module) identification across multiple cancer subtypes. For the patients of different subtypes, both consistent clusters and differential clusters are identified at the same time. The proposed method identifies more clusters that are enriched by gene ontology and KEGG pathways. The differential clusters could be used to explain the different mechanisms for the cancer development in the patients of different subtypes. AVAILABILITY AND IMPLEMENTATION:Codes can be downloaded from: http://homepage.fudan.edu.cn/sqzhang/files/2018/12/MVCMOcode.zip. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-10-01 +31410461,EPIP: a novel approach for condition-specific enhancer-promoter interaction prediction.,"

Motivation

The identification of enhancer-promoter interactions (EPIs), especially condition-specific ones, is important for the study of gene transcriptional regulation. Existing experimental approaches for EPI identification are still expensive, and available computational methods either do not consider or have low performance in predicting condition-specific EPIs.

Results

We developed a novel computational method called EPIP to reliably predict EPIs, especially condition-specific ones. EPIP is capable of predicting interactions in samples with limited data as well as in samples with abundant data. Tested on more than eight cell lines, EPIP reliably identifies EPIs, with an average area under the receiver operating characteristic curve of 0.95 and an average area under the precision-recall curve of 0.73. Tested on condition-specific EPIPs, EPIP correctly identified 99.26% of them. Compared with two recently developed methods, EPIP outperforms them with a better accuracy.

Availability and implementation

The EPIP tool is freely available at http://www.cs.ucf.edu/˜xiaoman/EPIP/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +31969424,Use of Rapid Diagnostics To Manage Pediatric Bloodstream Infections? You Bet Your ASP! ,"Rapid diagnostic testing (RDT) can facilitate earlier optimization of the treatment of bloodstream infections, particularly in conjunction with an effective antimicrobial stewardship program (ASP). However, the effective implementation and workflow of RDTs are still a matter of debate, particularly in a pediatric setting. In this issue of the Journal of Clinical Microbiology, L. J. Juttukonda, S. Katz, J. Gillon, J. Schmitz, and R. Banerjee (J Clin Microbiol 58:e01400-19, 2020, https://doi.org/10.1128/JCM.01400-19) investigate the impact of a multiplex, molecular RDT on changes to antimicrobial therapy in an academic children's hospital. These data reveal several factors that clinical laboratories should consider prior to the implementation of RDTs for positive blood cultures.",2020-03-25 +29717336,"Global burden of hypoglycaemia-related mortality in 109 countries, from 2000 to 2014: an analysis of death certificates.","AIMS/HYPOTHESIS:In the context of increasing prevalence of diabetes in elderly people with multimorbidity, intensive glucose control may increase the risk of severe hypoglycaemia, potentially leading to death. While rising trends of severe hypoglycaemia rates have been reported in some European, North American and Asian countries, the global burden of hypoglycaemia-related mortality is unknown. We aimed to investigate global differences and trends of hypoglycaemia-related mortality. METHODS:We used the WHO mortality database to extract information on death certificates reporting hypoglycaemia or diabetes as the underlying cause of death, and the United Nations demographic database to obtain data on mid-year population estimates from 2000 to 2014. We calculated crude and age-standardised proportions (defined as number of hypoglycaemia-related deaths divided by total number of deaths from diabetes [i.e. the sum of hypoglycaemia- and diabetes-related deaths]) and rates (hypoglycaemia-related deaths divided by mid-year population) of hypoglycaemia-related mortality and compared estimates across countries and over time. RESULTS:Data for proportions were extracted from 109 countries (31 had data from all years analysed [2000-2014] available). Combining all countries, the age-standardised proportion of hypoglycaemia-related deaths was 4.49 (95% CI 4.44, 4.55) per 1000 total diabetes deaths. Compared with the overall mean, most Central American, South American and (mainly) Caribbean countries reported higher proportions (five more age-standardised hypoglycaemia-related deaths per 1000 total diabetes deaths in Chile, six in Uruguay, 11 in Belize and 22 in Aruba), as well as Japan (11 more age-standardised hypoglycaemia-related deaths per 1000 total diabetes deaths). In comparison, lower proportions were noted in most European countries, the USA, Canada, New Zealand and Australia. For countries with data available for all years analysed, trend analysis showed a 60% increase in hypoglycaemia-related deaths until 2010 and stable trends onwards. Rising trends were most evident for Argentina, Brazil, Chile, the USA and Japan. Data for rates were available for 105 countries (30 had data for all years analysed [2000-2014] available). Combining all countries, the age-standardised hypoglycaemia-related death rate was 0.79 (95% CI 0.77, 0.80) per 1 million person-years. Most Central American, South American and Caribbean countries similarly reported higher rates of hypoglycaemia-related death, whilst virtually all European countries, the USA, Canada, Japan, New Zealand and Australia reported lower rates compared with the overall mean. Age-standardised rates were very low for most countries (lower than five per 1 million person-years in 89.5% of countries), resulting in small absolute differences among countries. As noted with the proportions analysis, trend analysis showed an overall 60% increase in hypoglycaemia-related deaths until 2010 and stable rate trends onwards; rising rates were particularly evident for Brazil, Chile and the USA. CONCLUSIONS/INTERPRETATION:Most countries in South America, Central America and the Caribbean showed the highest proportions of diabetes-related deaths attributable to hypoglycaemia and the highest rates of hypoglycaemia-related deaths. Between 2000 and 2014, rising trends were observed in Brazil, Chile and the USA for both rates and proportions of hypoglycaemia-related death, and in Argentina and Japan for proportions only. Further studies are required to unravel the contribution of clinical and socioeconomic factors, difference in diabetes prevalence and heterogeneity of death certification in determining lower rates and proportions of hypoglycaemia-related deaths in high-income countries in Europe, North America and Asia. DATA AVAILABILITY:Data used for these analyses are available at https://doi.org/10.17632/ndp52fbz8r.1.",2018-05-01 +29949965,Bayesian networks for mass spectrometric metabolite identification via molecular fingerprints.,"

Motivation

Metabolites, small molecules that are involved in cellular reactions, provide a direct functional signature of cellular state. Untargeted metabolomics experiments usually rely on tandem mass spectrometry to identify the thousands of compounds in a biological sample. Recently, we presented CSI:FingerID for searching in molecular structure databases using tandem mass spectrometry data. CSI:FingerID predicts a molecular fingerprint that encodes the structure of the query compound, then uses this to search a molecular structure database such as PubChem. Scoring of the predicted query fingerprint and deterministic target fingerprints is carried out assuming independence between the molecular properties constituting the fingerprint.

Results

We present a scoring that takes into account dependencies between molecular properties. As before, we predict posterior probabilities of molecular properties using machine learning. Dependencies between molecular properties are modeled as a Bayesian tree network; the tree structure is estimated on the fly from the instance data. For each edge, we also estimate the expected covariance between the two random variables. For fixed marginal probabilities, we then estimate conditional probabilities using the known covariance. Now, the corrected posterior probability of each candidate can be computed, and candidates are ranked by this score. Modeling dependencies improves identification rates of CSI:FingerID by 2.85 percentage points.

Availability and implementation

The new scoring Bayesian (fixed tree) is integrated into SIRIUS 4.0 (https://bio.informatik.uni-jena.de/software/sirius/).",2018-07-01 +29360930,"ADEPTUS: a discovery tool for disease prediction, enrichment and network analysis based on profiles from many diseases.","Motivation:Large-scale publicly available genomic data on many disease phenotypes could improve our understanding of the molecular basis of disease. Tools that undertake this challenge by jointly analyzing multiple phenotypes are needed. Results:ADEPTUS is a web-tool that enables various functional genomics analyses based on a high-quality curated database spanning >38, 000 gene expression profiles and >100 diseases. It offers four types of analysis. (i) For a gene list provided by the user it computes disease ontology (DO), pathway, and gene ontology (GO) enrichment and displays the genes as a network. (ii) For a given disease, it enables exploration of drug repurposing by creating a gene network summarizing the genomic events in it. (iii) For a gene of interest, it generates a report summarizing its behavior across several studies. (iv) It can predict the tissue of origin and the disease of a sample based on its gene expression or its somatic mutation profile. Such analyses open novel ways to understand new datasets and to predict primary site of cancer. Availability and implementation:Data and tool: http://adeptus.cs.tau.ac.il/home Analyses: Supplementary Material. Contact:rshamir@tau.ac.il. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +32245404,MI-MAAP: marker informativeness for multi-ancestry admixed populations.,"

Background

Admixed populations arise when two or more previously isolated populations interbreed. A powerful approach to addressing the genetic complexity in admixed populations is to infer ancestry. Ancestry inference including the proportion of an individual's genome coming from each population and its ancestral origin along the chromosome of an admixed population requires the use of ancestry informative markers (AIMs) from reference ancestral populations. AIMs exhibit substantial differences in allele frequency between ancestral populations. Given the huge amount of human genetic variation data available from diverse populations, a computationally feasible and cost-effective approach is becoming increasingly important to extract or filter AIMs with the maximum information content for ancestry inference, admixture mapping, forensic applications, and detecting genomic regions that have been under recent selection.

Results

To address this gap, we present MI-MAAP, an easy-to-use web-based bioinformatics tool designed to prioritize informative markers for multi-ancestry admixed populations by utilizing feature selection methods and multiple genomics resources including 1000 Genomes Project and Human Genome Diversity Project. Specifically, this tool implements a novel allele frequency-based feature selection algorithm, Lancaster Estimator of Independence (LEI), as well as other genotype-based methods such as Principal Component Analysis (PCA), Support Vector Machine (SVM), and Random Forest (RF). We demonstrated that MI-MAAP is a useful tool in prioritizing informative markers and accurately classifying ancestral populations. LEI is an efficient feature selection strategy to retrieve ancestry informative variants with different allele frequency/selection pressure among (or between) ancestries without requiring computationally expensive individual-level genotype data.

Conclusions

MI-MAAP has a user-friendly interface which provides researchers an easy and fast way to filter and identify AIMs. MI-MAAP can be accessed at https://research.cchmc.org/mershalab/MI-MAAP/login/.",2020-04-03 +33157302,CircPlant: An Integrated Tool for circRNA Detection and Functional Prediction in Plants.,"The recent discovery of circular RNAs (circRNAs) and characterization of their functional roles have opened a new avenue for understanding the biology of genomes. circRNAs have been implicated to play important roles in a variety of biological processes, but their precise functions remain largely elusive. Currently, a few approaches are available for novel circRNA prediction, but almost all these methods are intended for animal genomes. Considering that the major differences between the organization of plant and mammal genomes cannot be neglected, a plant-specific method is needed to enhance the validity of plant circRNA identification. In this study, we present CircPlant, an integrated tool for the exploration of plant circRNAs, potentially acting as competing endogenous RNAs (ceRNAs), and their potential functions. With the incorporation of several unique plant-specific criteria, CircPlant can accurately detect plant circRNAs from high-throughput RNA-seq data. Based on comparison tests on simulated and real RNA-seq datasets from Arabidopsis thaliana and Oryza sativa, we show that CircPlant outperforms all evaluated competing tools in both accuracy and efficiency. CircPlant is freely available at http://bis.zju.edu.cn/circplant.",2020-06-01 +32220091,Improved quality of care by using the PRISMS form to support self-management in patients with COPD: A Randomised Controlled Trial.,"

Aims and objective

To investigate the effects on the quality of care of the Patient Report Informing Self-Management Support (PRISMS) form compared with usual care among patients with chronic obstructive pulmonary disease (COPD) consulting a COPD nurse in primary health care.

Background

Patients with COPD experience symptoms affecting their everyday lives, and there is a need for interventions in self-management support. The delivery of chronic care in an organised, structured and planned manner can lead to more productive relationships between professionals and patients.

Design

A multicentre randomised controlled trial with a post-test design, according to the CONSORT checklist, in one intervention group (n = 94) and one control group (n = 108).

Methods

In addition to usual care, the intervention group (n = 94) completed the PRISMS form to indicate areas where they wanted self-management support before the consultation with the COPD nurse. This form comprises 17 items that patients with COPD commonly experience as problems. The control group received usual care (n = 108). The primary outcome was patients' satisfaction with quality of care, assessed using the Quality from the Patient's Perspective (QPP) questionnaire. Means and (SD) are presented where applicable. Differences between the intervention and control group were analysed with Student's t test for independent groups for interval data, and the Mann-Whitney U test for ordinal data.

Results

Participants in the intervention group were more satisfied with the QPP domains ""personal attention,"" regarding both ""perceived reality"" (p = .021) and ""subjective importance"" (p = .012). The PRISMS form revealed ""shortness of breath"" as the most commonly experienced problem and the issue most desired to discuss.

Conclusion

The PRISMS form improved patient satisfaction with quality of care regarding personal attention, which is an important factor in patient participation and improving relationships and communication.

Relevance to clinical practice

The PRISMS form can be a useful tool in improving person-centred care when delivering self-management support.

Register id

192691 at http://www.researchweb.org/is/en/sverige/project/192691.",2020-04-30 +29321052,ASGDB: a specialised genomic resource for interpreting Anopheles sinensis insecticide resistance.,"BACKGROUND:Anopheles sinensis is an important malaria vector in Southeast Asia. The widespread emergence of insecticide resistance in this mosquito species poses a serious threat to the efficacy of malaria control measures, particularly in China. Recently, the whole-genome sequencing and de novo assembly of An. sinensis (China strain) has been finished. A series of insecticide-resistant studies in An. sinensis have also been reported. There is a growing need to integrate these valuable data to provide a comprehensive database for further studies on insecticide-resistant management of An. sinensis. RESULTS:A bioinformatics database named An. sinensis genome database (ASGDB) was built. In addition to being a searchable database of published An. sinensis genome sequences and annotation, ASGDB provides in-depth analytical platforms for further understanding of the genomic and genetic data, including visualization of genomic data, orthologous relationship analysis, GO analysis, pathway analysis, expression analysis and resistance-related gene analysis. Moreover, ASGDB provides a panoramic view of insecticide resistance studies in An. sinensis in China. In total, 551 insecticide-resistant phenotypic and genotypic reports on An. sinensis distributed in Chinese malaria-endemic areas since the mid-1980s have been collected, manually edited in the same format and integrated into OpenLayers map-based interface, which allows the international community to assess and exploit the high volume of scattered data much easier. The database has been given the URL: http://www.asgdb.org /. CONCLUSIONS:ASGDB was built to help users mine data from the genome sequence of An. sinensis easily and effectively, especially with its advantages in insecticide resistance surveillance and control.",2018-01-10 +31194143,"Data on air pollutants and greenery in the city of Yerevan, Armenia.","This article contains data related to the research article entitled 'Agent-based modelling of interactions between air pollutants and greenery using a case study of Yerevan, Armenia' [1]. These data include the total air pollution and its splitting between different air pollutants in the city of Yerevan, as well as data on agent-vehicles (car clusters) and absorption characteristics of agent-trees. Data and the model that is implemented in the AnyLogic simulation tool are available online at: http://www.runmycode.org/companion/view/3420.",2019-05-23 +32230307,Every sponge its own name: removing Porifera homonyms.,"The occurrence of different sponge species bearing the same Linnean binomial name combination, i.e. homonyms, is to be avoided for obvious reasons. In a review of sponge taxon names of the World Porifera Database, we detected 121 homonymic cases (115 species-group names, 6 genus-group names), involving a total of 272 nominal taxa. It is the object of the present study to remove their occurrence by proposing new names for the junior homonyms following the rules of the International Commission of Zoological Nomenclature as laid down in the Code (ICZN, 1999) and the on-line edition http://iczn.org/iczn/index.jsp . Homonym cases are discussed and, where applicable, junior homonyms are either replaced by nomina nova or reassigned to their earliest available synonyms. The order in which the homonyms are treated is alphabetical on original species name, with genus names separately treated at the end. A summary table with all proposed name changes is also presented to allow quick access to the junior homonyms and their proposed new names. A total of 116 nomina nova are proposed, including five new genus names.",2020-02-28 +32184655,OTUD4: A Potential Prognosis Biomarker for Multiple Human Cancers.,"

Background

Deubiquitinase OTU domain containing 4 (OTUD4) is initially identified as a K48-specific deubiquitinase and plays an important role in DNA damage repair signaling transduction. However, the expression level, prognostic role, biological function and mechanism of OTUD4 in multiple human cancers are unclear.

Methods

GEPIA online (http://gepia.cancer-pku.cn/; The Cancer Genome Atlas (TCGA) database) was used to analyze the mRNA expression of OTUD4 in multiple human cancers. Kaplan-Meier plotter (KM plotter) database and TCGA database were used to evaluate the prognostic value of OTUD4 expression in multiple human cancers. MTT, Transwell and 3D culture assays were used to detect the role of OTUD4 in breast, liver and lung cancer cells. The correlation between OTUD4 and apoptosis signaling pathway and AKT signaling pathway was analyzed by Gene set enrichment analysis (GSEA).

Results

OTUD4 mRNA expression is significantly downregulated in multiple human cancer tissues. Survival analysis establishes that the downregulation of OTUD4 predicts poor prognosis in many solid tumors, including breast invasive carcinoma (BRCA), esophageal carcinoma (ESCA), liver hepatocellular carcinoma (LIHC), lung adenocarcinoma (LUAD), and ovarian serous cystadenocarcinoma (OV). Furthermore, overexpression of OTUD4 could inhibit tumor cell proliferation, migration and invasion of breast, liver and lung cancer cells through inhibiting the AKT signaling pathway.

Conclusion

This study found that OTUD4 may be a potential predictive factor for several human cancers and a tumor suppressor for breast, liver and lung cancer. The overexpression of OTUD4 restrained proliferation, migration and invasion of human breast, liver and lung cancer cells through promoting cancer cells apoptosis and inhibiting AKT signaling pathway. Notably, our results indicated that OTUD4 could be a useful biomarker for the prognosis of human cancers and a potential molecular target for diagnosis and treatment of breast, liver and lung cancer.",2020-02-28 +29156057,PRGdb 3.0: a comprehensive platform for prediction and analysis of plant disease resistance genes.,"The Plant Resistance Genes database (PRGdb; http://prgdb.org) has been redesigned with a new user interface, new sections, new tools and new data for genetic improvement, allowing easy access not only to the plant science research community but also to breeders who want to improve plant disease resistance. The home page offers an overview of easy-to-read search boxes that streamline data queries and directly show plant species for which data from candidate or cloned genes have been collected. Bulk data files and curated resistance gene annotations are made available for each plant species hosted. The new Gene Model view offers detailed information on each cloned resistance gene structure to highlight shared attributes with other genes. PRGdb 3.0 offers 153 reference resistance genes and 177 072 annotated candidate Pathogen Receptor Genes (PRGs). Compared to the previous release, the number of putative genes has been increased from 106 to 177 K from 76 sequenced Viridiplantae and algae genomes. The DRAGO 2 tool, which automatically annotates and predicts (PRGs) from DNA and amino acid with high accuracy and sensitivity, has been added. BLAST search has been implemented to offer users the opportunity to annotate and compare their own sequences. The improved section on plant diseases displays useful information linked to genes and genomes to connect complementary data and better address specific needs. Through, a revised and enlarged collection of data, the development of new tools and a renewed portal, PRGdb 3.0 engages the plant science community in developing a consensus plan to improve knowledge and strategies to fight diseases that afflict main crops and other plants.",2018-01-01 +30604984,"""The Development and Psychometric Evaluation of the Trans Discrimination Scale: TDS-21"": Correction to Watson et al. (2018).","Reports an error in ""The Development and Psychometric Evaluation of the Trans Discrimination Scale: TDS-21"" by Laurel B. Watson, Luke R. Allen, Mirella J. Flores, Christine Serpe and Michelle Farrell (Journal of Counseling Psychology, Advanced Online Publication, Jul 23, 2018, np). In the article ""The Development and Psychometric Evaluation of the Trans Discrimination Scale: TDS-21"" by Laurel B. Watson, Luke R. Allen, Mirella J. Flores, Christine Serpe, and Michelle Farrell (Journal of Counseling Psychology, 2018, Advance online publication. http://dx.doi.org/10 .1037/cou0000301), there were two errors in the Methods section of the article. In Study 1, Participants paragraph of The Development and Psychometric Evaluation of the Trans Discrimination Scale: TDS-21 for the Methods section, the gender listed at birth was incorrect in the following sentence, The majority of participants in this study identified as trans women and along a trans feminine spectrum, were assigned male at birth, White, had attained some college but no degree, and were employed full time. The correct gender assigned at birth was predominantly female. In addition, the gender coding procedures was incorrectly described. Specifically, those who identified as FAAB and AFAB were actually coded as trans men or along a transmasculine spectrum, whereas those who identified as MAAB and AMAB were coded as trans women and along a trans feminine perspective. In Study 3, Participants paragraph of The Development and Psychometric Evaluation of the Trans Discrimination Scale: TDS-21 for the Methods section, the gender identity listed in the following sentence was incorrect, The majority of participants identified as trans women and along the trans feminine spectrum, were assigned female at birth, White, had attained some college but no degree, and were students. Rather, participants primarily identified as non-binary trans. (The following abstract of the original article appeared in record 2018-35350-001.) To date, researchers assessing the role of discrimination in trans peoples' lives have relied upon measures that were developed and normed on LGB populations, culled specific items from large-scale survey data, or used more generalized measures of discrimination that do not specifically assess the unique forms of discrimination that trans people may encounter. Thus, the purpose of this three-part study was to develop and provide psychometric support for a measure of trans peoples' discrimination. In Study 1, a five-factor model emerged, which included: Microaggressions and Harassment, Restricted Career and Work Opportunities, Maltreatment in Health Care Settings, Harassment by Law Enforcement, and Bullying and Harassment in Educational Settings. Internal consistency estimates for subscale and total scale scores ranged from acceptable to excellent. Results from Study 2 revealed that a bifactor model provided the best fit to the data, revealing that the scale is essentially unidimensional. In addition, convergent and concurrent validity was supported, demonstrating significant positive correlations with another measure of trans discrimination, internalized transphobia, nondisclosure, negative expectations for the future, psychological distress, and perceived stress. In Study 3, results revealed excellent test-retest reliability up to a three-week period. Collectively, results suggested that the Transgender Discrimination Scale-21 (TDS-21) is a psychometrically sound measure that may be used to advance research on the role of discrimination in trans peoples' lives. (PsycINFO Database Record (c) 2019 APA, all rights reserved).",2019-01-01 +32991719,Fusion partner-specific mutation profiles and KRAS mutations as adverse prognostic factors in MLL-rearranged AML.,"Mixed-lineage leukemia (MLL) gene rearrangements are among the most frequent chromosomal abnormalities in acute myeloid leukemia (AML). MLL fusion patterns are associated with the patient's prognosis; however, their relationship with driver mutations is unclear. We conducted sequence analyses of 338 genes in pediatric patients with MLL-rearranged (MLL-r) AML (n = 56; JPLSG AML-05 study) alongside data from the TARGET study's pediatric cohorts with MLL-r AML (n = 104), non-MLL-r AML (n = 581), and adult MLL-r AML (n = 81). KRAS mutations were most frequent in pediatric patients with high-risk MLL fusions (MLL-MLLLT10, MLL-MLLT4, and MLL-MLLT1). Pediatric patients with MLL-r AML (n = 160) and a KRAS mutation (KRAS-MT) had a significantly worse prognosis than those without a KRAS mutation (KRAS-WT) (5-year event-free survival [EFS]: 51.8% vs 18.3%, P < .0001; 5-year overall survival [OS]: 67.3% vs 44.3%, P = .003). The adverse prognostic impact of KRAS mutations was confirmed in adult MLL-r AML. KRAS mutations were associated with adverse prognoses in pediatric patients with both high-risk (MLLT10+MLLT4+MLLT1; n = 60) and intermediate-to-low-risk (MLLT3+ELL+others; n = 100) MLL fusions. The prognosis did not differ significantly between patients with non-MLL-r AML with KRAS-WT or KRAS-MT. Multivariate analysis showed the presence of a KRAS mutation to be an independent prognostic factor for EFS (hazard ratio [HR], 2.21; 95% confidence interval [CI], 1.35-3.59; P = .002) and OS (HR, 1.85; 95% CI, 1.01-3.31; P = .045) in MLL-r AML. The mutation is a distinct adverse prognostic factor in MLL-r AML, regardless of risk subgroup, and is potentially useful for accurate treatment stratification. This trial was registered at the UMIN (University Hospital Medical Information Network) Clinical Trials Registry (UMIN-CTR; http://www.umin.ac.jp/ctr/index.htm) as #UMIN000000511.",2020-10-01 +32107590,The Relationship Between the Facial Proportion Changes in Hard Tissue and the Satisfaction of Patients After Reduction Malarplasty: a Research Based on Three-Dimensional Cephalometry.,"

Objective

We aim to measure the zygomatic width and protrusion changes in hard tissue after reduction malarplasty and then calculate facial proportion changes and analyze the relationship between facial proportion changes and patients' satisfaction.

Methods

We retrospectively reviewed our database and selected 36 eligible patients who underwent isolated reduction malarplasty in our department from March 2015 to July 2018. The preoperative and postoperative facial width and protrusion, as well as head height, in hard tissue were measured using ProPlan software. Patients' satisfaction was evaluated by questionnaire. The correlations between the facial proportion changes and patients' satisfaction were analyzed using Spearman correlation analysis.

Results

The preoperative and postoperative midface widths were 135.87 ± 4.09 mm and 129.06 ± 4.95 mm. The relative zygomatic protrusion was reduced by 3.29 ± 1.54 mm in the left and 2.88 ± 1.73 mm in the right after surgery. The ratio of the midface width to lower face width changed from 1.43 ± 0.05 to 1.36 ± 0.06 after surgery. And the ratio of the head height to midface width changed from 1.53 ± 0.05 to 1.61 ± 0.05 after surgery. The ratios were indeed close to the ideal ratios we presumed (4:3 and 1.618). Moreover, patients' total and morphology satisfaction were both significantly higher with the postoperative ratio of the midface width to lower face width closer to 4:3 (R = - 0.732, P < 0.001; R = - 0.906, P < 0.001, respectively). But only morphology satisfaction was higher with the ratio of the head height to midface width closer to 1.618 (R = - 0.404, P = 0.014) and the ratio of the postoperative midface to lower face width decreased (R = - 0.434, P = 0.008).

Conclusions

We found patients' morphology satisfaction was higher with the proportion of the postoperative midface to lower face width decreased. What's more, the proximity degree between the postoperative facial proportion and the ideal facial proportions we presumed was significantly correlated with patients' high satisfaction. Therefore, 4:3 and 1.618 may be the ideal postoperative facial ratios for the patients who underwent reduction malarplasty.

Level of evidence iv

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors https://www.springer.com/00266.",2020-02-27 +29846171,The MR-Base platform supports systematic causal inference across the human phenome. ,"Results from genome-wide association studies (GWAS) can be used to infer causal relationships between phenotypes, using a strategy known as 2-sample Mendelian randomization (2SMR) and bypassing the need for individual-level data. However, 2SMR methods are evolving rapidly and GWAS results are often insufficiently curated, undermining efficient implementation of the approach. We therefore developed MR-Base (http://www.mrbase.org): a platform that integrates a curated database of complete GWAS results (no restrictions according to statistical significance) with an application programming interface, web app and R packages that automate 2SMR. The software includes several sensitivity analyses for assessing the impact of horizontal pleiotropy and other violations of assumptions. The database currently comprises 11 billion single nucleotide polymorphism-trait associations from 1673 GWAS and is updated on a regular basis. Integrating data with software ensures more rigorous application of hypothesis-driven analyses and allows millions of potential causal relationships to be efficiently evaluated in phenome-wide association studies.",2018-05-30 +31571291,Independent origin of MIRNA genes controlling homologous target genes by partial inverted duplication of antisense-transcribed sequences.,"Some microRNAs (miRNAs) are key regulators of developmental processes, mainly by controlling the accumulation of transcripts encoding transcription factors that are important for morphogenesis. MADS-box genes encode a family of transcription factors which control diverse developmental processes in flowering plants. Here we study the convergent evolution of two MIRNA (MIR) gene families, named MIR444 and MIR824, targeting members of the same clade of MIKCC -group MADS-box genes. We show that these two MIR genes most likely originated independently in monocots (MIR444) and in Brassicales (eudicots, MIR824). We provide evidence that, in both cases, the future target gene was transcribed in antisense prior to the evolution of the MIR genes. Both MIR genes then likely originated by a partial inverted duplication of their target genes, resulting in natural antisense organization of the newly evolved MIR gene and its target gene at birth. We thus propose a model for the origin of MIR genes, MEPIDAS (MicroRNA Evolution by Partial Inverted Duplication of Antisense-transcribed Sequences). MEPIDAS is a refinement of the inverted duplication hypothesis. According to MEPIDAS, a MIR gene evolves at a genomic locus at which the future target gene is also transcribed in the antisense direction. A partial inverted duplication at this locus causes the antisense transcript to fold into a stem-loop structure that is recognized by the miRNA biogenesis machinery to produce a miRNA that regulates the gene at this locus. Our analyses exemplify how to elucidate the origin of conserved miRNAs by comparative genomics and will guide future studies. OPEN RESEARCH BADGE: This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://www.ncbi.nlm.nih.gov/genbank/.",2019-11-26 +33051283,Gender-transformative programming with men and boys to improve sexual and reproductive health and rights: a systematic review of intervention studies. ,"Global health organisations advocate gender-transformative programming (which challenges gender inequalities) with men and boys to improve sexual and reproductive health and rights (SRHR) for all. We systematically review evidence for this approach. We previously reported an evidence-and-gap map (http://srhr.org/masculinities/wbincome/) and systematic review of reviews of experimental intervention studies engaging men/boys in SRHR, identified through a Campbell Collaboration published protocol (https://doi.org/10.1002/CL2.203) without language restrictions between January 2007 and July 2018. Records for the current review of intervention studies were retrieved from those systematic reviews containing one or more gender-transformative intervention studies engaging men/boys. Data were extracted for intervention studies relating to each of the World Health Organization (WHO) SRHR outcomes. Promising programming characteristics, as well as underused strategies, were analysed with reference to the WHO definition of gender-transformative programming and an established behaviour change model, the COM-B model. Risk of bias was assessed using Cochrane Risk of Bias tools, RoB V.2.0 and Risk of Bias In Non-randomised Studies of Interventions. From 509 eligible records, we synthesised 68 studies comprising 36 randomised controlled trials, n=56 417 participants, and 32 quasi-experimental studies, n=25 554 participants. Promising programming characteristics include: multicomponent activities of education, persuasion, modelling and enablement; multilevel programming that mobilises wider communities; targeting both men and women; and programmes of longer duration than three months. Six of the seven interventions evaluated more than once show efficacy. However, we identified a significant risk of bias in the overall available evidence. Important gaps in evidence relate to safe abortion and SRHR during disease outbreaks. It is widely acknowledged by global organisations that the question is no longer whether to include boys and men in SRHR but how to do so in ways that promote gender equality and health for all and are scientifically rigorous. This paper provides an evidence base to take this agenda for programming and research forward.",2020-10-01 +32293874,Metabolomics Profiles of Smokers from Two Ethnic Groups with Differing Lung Cancer Risk.,"African American (AA) smokers are at a higher risk of developing lung cancer compared to whites. The variations in the metabolism of nicotine and tobacco-derived carcinogens in these groups were reported previously with the levels of nicotine metabolites and carcinogen-derived metabolites measured using targeted approaches. While useful, these targeted strategies are not able to detect global metabolic changes for use in predicting the detrimental effects of tobacco use and ultimately lung cancer susceptibility among smokers. To address this limitation, we have performed global untargeted metabolomics profiling in urine of AA and white smokers to characterize the pattern of metabolites, identify differentially regulated pathways, and correlate these profiles with the observed variations in lung cancer risk between these two populations. Urine samples from AA (n = 30) and white (n = 30) smokers were used for metabolomics analysis acquired in both positive and negative electrospray ionization modes. LC-MS data were uploaded onto the cloud-based XCMS online (http://xcmsonline.scripps.edu) platform for retention time correction, alignment, feature detection, annotation, statistical analysis, data visualization, and automated systems biology pathway analysis. The latter identified global differences in the metabolic pathways in the two groups including the metabolism of carbohydrates, amino acids, nucleotides, fatty acids, and nicotine. Significant differences in the nicotine degradation pathway (cotinine glucuronidation) in the two groups were observed and confirmed using a targeted LC-MS/MS approach. These results are consistent with previous studies demonstrating AA smokers with lower glucuronidation capacity compared to whites. Furthermore, the d-glucuronate degradation pathway was found to be significantly different between the two populations, with lower amounts of the putative metabolites detected in AA compared to whites. We hypothesize that the differential regulation of the d-glucuronate degradation pathway is a consequence of the variations in the glucuronidation capacity observed in the two groups. Other pathways including the metabolism of amino acids, nucleic acids, and fatty acids were also identified, however, the biological relevance and implications of these differences across ethnic groups need further investigation. Overall, the applied metabolomics approach revealed global differences in the metabolic networks and endogenous metabolites in AA and whites, which could be used and validated as a new potential panel of biomarkers that could be used to predict lung cancer susceptibility among smokers in population-based studies.",2020-05-11 +32504492,RNAProbe: a web server for normalization and analysis of RNA structure probing data.,"RNA molecules play key roles in all living cells. Knowledge of the structural characteristics of RNA molecules allows for a better understanding of the mechanisms of their action. RNA chemical probing allows us to study the susceptibility of nucleotides to chemical modification, and the information obtained can be used to guide secondary structure prediction. These experimental results can be analyzed using various computational tools, which, however, requires additional, tedious steps (e.g., further normalization of the reactivities and visualization of the results), for which there are no fully automated methods. Here, we introduce RNAProbe, a web server that facilitates normalization, analysis, and visualization of the low-pass SHAPE, DMS and CMCT probing results with the modification sites detected by capillary electrophoresis. RNAProbe automatically analyzes chemical probing output data and turns tedious manual work into a one-minute assignment. RNAProbe performs normalization based on a well-established protocol, utilizes recognized secondary structure prediction methods, and generates high-quality images with structure representations and reactivity heatmaps. It summarizes the results in the form of a spreadsheet, which can be used for comparative analyses between experiments. Results of predictions with normalized reactivities are also collected in text files, providing interoperability with bioinformatics workflows. RNAProbe is available at https://rnaprobe.genesilico.pl.",2020-07-01 +29347967,Data quality and feasibility of the Experience Sampling Method across the spectrum of severe psychiatric disorders: a protocol for a systematic review and meta-analysis.,"BACKGROUND:Due to a number of methodological advantages and theoretical considerations, more and more studies in clinical psychology research employ the Experience Sampling Method (ESM) as a data collection technique. Despite this growing interest, the absence of methodological guidelines related to the use of ESM has resulted in a large heterogeneity of designs while the potential effects of the design itself on the response behavior of the participants remain unknown. The objectives of this systematic review are to investigate the associations between the design characteristics and the data quality and feasibility of studies relying on ESM in severe psychiatric disorders. METHODS:We will search for all published studies using ambulatory assessment with patients suffering from major depressive disorder, bipolar disorder, and psychotic disorder or individuals at high risk for these disorders. Electronic database searches will be performed in PubMed and Web of Science with no restriction on the publication date. Two reviewers will independently screen original studies in a title/abstract phase and a full-text phase based on the inclusion criteria. The information related to the design and sample characteristics, data quality, and feasibility will be extracted. We will provide results in terms of a descriptive synthesis, and when applicable, a meta-analysis of the findings will be conducted. DISCUSSION:Our results will attempt to highlight how the feasibility and data quality of ambulatory assessment might be related to the methodological characteristics of the study designs in severe psychiatric disorders. We will discuss these associations in different subsamples if sufficient data are available and will examine limitations in the reporting of the methods of ambulatory studies in the current literature. SYSTEMATIC REVIEW REGISTRATION:The protocol for this systematic review was registered on PROSPERO (PROSPERO 2017: CRD42017060322 ) and is available in full on the University of York website ( http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42017060322 ).",2018-01-18 +30793168,The TMCrys server for supporting crystallization of transmembrane proteins.,"

Motivation

Due to their special properties, the structures of transmembrane proteins are extremely hard to determine. Several methods exist to predict the propensity of successful completion of the structure determination process. However, available predictors incorporate data of any kind of proteins, hence they can hardly differentiate between crystallizable and non-crystallizable membrane proteins.

Results

We implemented a web server to simplify running TMCrys prediction method that was developed specifically to separate crystallizable and non-crystallizable membrane proteins.

Availability and implementation

http://tmcrys.enzim.ttk.mta.hu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +32379317,"SIB Literature Services: RESTful customizable search engines in biomedical literature, enriched with automatically mapped biomedical concepts.","Thanks to recent efforts by the text mining community, biocurators have now access to plenty of good tools and Web interfaces for identifying and visualizing biomedical entities in literature. Yet, many of these systems start with a PubMed query, which is limited by strong Boolean constraints. Some semantic search engines exploit entities for Information Retrieval, and/or deliver relevance-based ranked results. Yet, they are not designed for supporting a specific curation workflow, and allow very limited control on the search process. The Swiss Institute of Bioinformatics Literature Services (SIBiLS) provide personalized Information Retrieval in the biological literature. Indeed, SIBiLS allow fully customizable search in semantically enriched contents, based on keywords and/or mapped biomedical entities from a growing set of standardized and legacy vocabularies. The services have been used and favourably evaluated to assist the curation of genes and gene products, by delivering customized literature triage engines to different curation teams. SIBiLS (https://candy.hesge.ch/SIBiLS) are freely accessible via REST APIs and are ready to empower any curation workflow, built on modern technologies scalable with big data: MongoDB and Elasticsearch. They cover MEDLINE and PubMed Central Open Access enriched by nearly 2 billion of mapped biomedical entities, and are daily updated.",2020-07-01 +32105680,Coronavirus Disease 2019 (COVID-19) and pregnancy: what obstetricians need to know.,"Coronavirus disease 2019 is an emerging disease with a rapid increase in cases and deaths since its first identification in Wuhan, China, in December 2019. Limited data are available about coronavirus disease 2019 during pregnancy; however, information on illnesses associated with other highly pathogenic coronaviruses (ie, severe acute respiratory syndrome and the Middle East respiratory syndrome) might provide insights into coronavirus disease 2019's effects during pregnancy. Coronaviruses cause illness ranging in severity from the common cold to severe respiratory illness and death. Currently the primary epidemiologic risk factors for coronavirus disease 2019 include travel from mainland China (especially Hubei Province) or close contact with infected individuals within 14 days of symptom onset. Data suggest an incubation period of ∼5 days (range, 2-14 days). Average age of hospitalized patients has been 49-56 years, with a third to half with an underlying illness. Children have been rarely reported. Men were more frequent among hospitalized cases (54-73%). Frequent manifestations include fever, cough, myalgia, headache, and diarrhea. Abnormal testing includes abnormalities on chest radiographic imaging, lymphopenia, leukopenia, and thrombocytopenia. Initial reports suggest that acute respiratory distress syndrome develops in 17-29% of hospitalized patients. Overall case fatality rate appears to be ∼1%; however, early data may overestimate this rate. In 2 reports describing 18 pregnancies with coronavirus disease 2019, all were infected in the third trimester, and clinical findings were similar to those in nonpregnant adults. Fetal distress and preterm delivery were seen in some cases. All but 2 pregnancies were cesarean deliveries and no evidence of in utero transmission was seen. Data on severe acute respiratory syndrome and Middle East respiratory syndrome in pregnancy are sparse. For severe acute respiratory syndrome, the largest series of 12 pregnancies had a case-fatality rate of 25%. Complications included acute respiratory distress syndrome in 4, disseminated intravascular coagulopathy in 3, renal failure in 3, secondary bacterial pneumonia in 2, and sepsis in 2 patients. Mechanical ventilation was 3 times more likely among pregnant compared with nonpregnant women. Among 7 first-trimester infections, 4 ended in spontaneous abortion. Four of 5 women with severe acute respiratory syndrome after 24 weeks' gestation delivered preterm. For Middle East respiratory syndrome, there were 13 case reports in pregnant women, of which 2 were asymptomatic, identified as part of a contact investigation; 3 patients (23%) died. Two pregnancies ended in fetal demise and 2 were born preterm. No evidence of in utero transmission was seen in severe acute respiratory syndrome or Middle East respiratory syndrome. Currently no coronavirus-specific treatments have been approved by the US Food and Drug Administration. Because coronavirus disease 2019 might increase the risk for pregnancy complications, management should optimally be in a health care facility with close maternal and fetal monitoring. Principles of management of coronavirus disease 2019 in pregnancy include early isolation, aggressive infection control procedures, oxygen therapy, avoidance of fluid overload, consideration of empiric antibiotics (secondary to bacterial infection risk), laboratory testing for the virus and coinfection, fetal and uterine contraction monitoring, early mechanical ventilation for progressive respiratory failure, individualized delivery planning, and a team-based approach with multispecialty consultations. Information on coronavirus disease 2019 is increasing rapidly. Clinicians should continue to follow the Centers for Disease Control and Prevention website to stay up to date with the latest information (https://www.cdc.gov/coronavirus/2019-nCoV/hcp/index.html).",2020-02-24 +33019182,Evaluating the Predictability of Cancer Types from 536 Somatic Mutations: A New Dataset.,"In this paper, we introduce a new dataset for cancer research containing somatic mutation states of 536 genes of the Cancer Gene Census (CGC). We used somatic mutation information from the Cancer Genome Atlas (TCGA) projects to create this dataset. As preliminary investigations, we employed machine learning techniques, including k-Nearest Neighbors, Decision Tree, Random Forest, and Artificial Neural Networks (ANNs) to evaluate the potential of these somatic mutations for classification of cancer types. We compared our models on accuracy, precision, recall, and F1-score. We observed that ANNs outperformed the other models with F1-score of 0.36 and overall classification accuracy of 40%, and precision ranging from 12% to 92% for different cancer types. The 40% accuracy is significantly higher than random guessing which would have resulted in 3% overall classification accuracy. Although the model has relatively low overall accuracy, it has an average classification specificity of 98%. The ANN achieved high precision scores (> 0.7) for 5 of the 33 cancer types. The introduced dataset can be used for research on TCGA data, such as survival analysis, histopathology image analysis and content-based image retrieval. The dataset is available online for download: https://kimialab.uwaterloo.ca/kimia/.",2020-07-01 +28126036,Genome-wide analysis of differential transcriptional and epigenetic variability across human immune cell types.,"

Background

A healthy immune system requires immune cells that adapt rapidly to environmental challenges. This phenotypic plasticity can be mediated by transcriptional and epigenetic variability.

Results

We apply a novel analytical approach to measure and compare transcriptional and epigenetic variability genome-wide across CD14+CD16- monocytes, CD66b+CD16+ neutrophils, and CD4+CD45RA+ naïve T cells from the same 125 healthy individuals. We discover substantially increased variability in neutrophils compared to monocytes and T cells. In neutrophils, genes with hypervariable expression are found to be implicated in key immune pathways and are associated with cellular properties and environmental exposure. We also observe increased sex-specific gene expression differences in neutrophils. Neutrophil-specific DNA methylation hypervariable sites are enriched at dynamic chromatin regions and active enhancers.

Conclusions

Our data highlight the importance of transcriptional and epigenetic variability for the key role of neutrophils as the first responders to inflammatory stimuli. We provide a resource to enable further functional studies into the plasticity of immune cells, which can be accessed from: http://blueprint-dev.bioinfo.cnio.es/WP10/hypervariability .",2017-01-26 +32657407,Graph neural representational learning of RNA secondary structures for predicting RNA-protein interactions.,"

Motivation

RNA-protein interactions are key effectors of post-transcriptional regulation. Significant experimental and bioinformatics efforts have been expended on characterizing protein binding mechanisms on the molecular level, and on highlighting the sequence and structural traits of RNA that impact the binding specificity for different proteins. Yet our ability to predict these interactions in silico remains relatively poor.

Results

In this study, we introduce RPI-Net, a graph neural network approach for RNA-protein interaction prediction. RPI-Net learns and exploits a graph representation of RNA molecules, yielding significant performance gains over existing state-of-the-art approaches. We also introduce an approach to rectify an important type of sequence bias caused by the RNase T1 enzyme used in many CLIP-Seq experiments, and we show that correcting this bias is essential in order to learn meaningful predictors and properly evaluate their accuracy. Finally, we provide new approaches to interpret the trained models and extract simple, biologically interpretable representations of the learned sequence and structural motifs.

Availability and implementation

Source code can be accessed at https://www.github.com/HarveyYan/RNAonGraph.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-07-01 +32442275,TIMER2.0 for analysis of tumor-infiltrating immune cells.,"Tumor progression and the efficacy of immunotherapy are strongly influenced by the composition and abundance of immune cells in the tumor microenvironment. Due to the limitations of direct measurement methods, computational algorithms are often used to infer immune cell composition from bulk tumor transcriptome profiles. These estimated tumor immune infiltrate populations have been associated with genomic and transcriptomic changes in the tumors, providing insight into tumor-immune interactions. However, such investigations on large-scale public data remain challenging. To lower the barriers for the analysis of complex tumor-immune interactions, we significantly improved our previous web platform TIMER. Instead of just using one algorithm, TIMER2.0 (http://timer.cistrome.org/) provides more robust estimation of immune infiltration levels for The Cancer Genome Atlas (TCGA) or user-provided tumor profiles using six state-of-the-art algorithms. TIMER2.0 provides four modules for investigating the associations between immune infiltrates and genetic or clinical features, and four modules for exploring cancer-related associations in the TCGA cohorts. Each module can generate a functional heatmap table, enabling the user to easily identify significant associations in multiple cancer types simultaneously. Overall, the TIMER2.0 web server provides comprehensive analysis and visualization functions of tumor infiltrating immune cells.",2020-07-01 +32343309,PDBMD2CD: providing predicted protein circular dichroism spectra from multiple molecular dynamics-generated protein structures.,"PDBMD2CD is a new web server capable of predicting circular dichroism (CD) spectra for multiple protein structures derived from molecular dynamics (MD) simulations, enabling predictions from thousands of protein atomic coordinate files (e.g. MD trajectories) and generating spectra for each of these structures provided by the user. Using MD enables exploration of systems that cannot be monitored by direct experimentation. Validation of MD-derived data from these types of trajectories can be difficult via conventional structure-determining techniques such as crystallography or nuclear magnetic resonance spectroscopy. CD is an experimental technique that can provide protein structure information from such conditions. The website utilizes a much faster (minimum ∼1000×) and more accurate approach for calculating CD spectra than its predecessor, PDB2CD (1). As well as improving on the speed and accuracy of current methods, new analysis tools are provided to cluster predictions or compare them against experimental CD spectra. By identifying a subset of the closest predicted CD spectra derived from PDBMD2CD to an experimental spectrum, the associated cluster of structures could be representative of those found under the conditions in which the MD studies were undertaken, thereby offering an analytical insight into the results. PDBMD2CD is freely available at: https://pdbmd2cd.cryst.bbk.ac.uk.",2020-07-01 +32860044,Prediction of protein-binding residues: dichotomy of sequence-based methods developed using structured complexes versus disordered proteins.,"

Motivation

There are over 30 sequence-based predictors of the protein-binding residues (PBRs). They use either structure-annotated or disorder-annotated training datasets, potentially creating a dichotomy where the structure-/disorder-specific models may not be able to cross-over to accurately predict the other type. Moreover, the structure-trained predictors were shown to substantially cross-predict PBRs among residues that interact with non-protein partners (nucleic acids and small ligands). We address these issues by performing first-of-its-kind comparative study of a representative collection of disorder- and structure-trained predictors using a comprehensive benchmark set with the structure- and disorder-derived annotations of PBRs (to analyze the cross-over) and the protein-, nucleic acid- and small ligand-binding proteins (to study the cross-predictions).

Results

Three predictors provide accurate results: SCRIBER, ANCHOR and disoRDPbind. Some of the structure-trained methods make accurate predictions on the structure-annotated proteins. Similarly, the disorder-trained predictors predict well on the disorder-annotated proteins. However, the considered predictors generally fail to cross-over, with the exception of SCRIBER. Our study also reveals that virtually all methods substantially cross-predict PBRs, except for SCRIBER for the structure-annotated proteins and disoRDPbind for the disorder-annotated proteins. We formulate a novel hybrid predictor, hybridPBRpred, that combines results produced by disoRDPbind and SCRIBER to accurately predict disorder- and structure-annotated PBRs. HybridPBRpred generates accurate results that cross-over structure- and disorder-annotated proteins and produces relatively low amount of cross-predictions, offering an accurate alternative to predict PBRs.

Availability and implementation

HybridPBRpred webserver, benchmark dataset and supplementary information are available at http://biomine.cs.vcu.edu/servers/hybridPBRpred/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-09-01 +29145629,The Reactome Pathway Knowledgebase.,"The Reactome Knowledgebase (https://reactome.org) provides molecular details of signal transduction, transport, DNA replication, metabolism, and other cellular processes as an ordered network of molecular transformations-an extended version of a classic metabolic map, in a single consistent data model. Reactome functions both as an archive of biological processes and as a tool for discovering unexpected functional relationships in data such as gene expression profiles or somatic mutation catalogues from tumor cells. To support the continued brisk growth in the size and complexity of Reactome, we have implemented a graph database, improved performance of data analysis tools, and designed new data structures and strategies to boost diagram viewer performance. To make our website more accessible to human users, we have improved pathway display and navigation by implementing interactive Enhanced High Level Diagrams (EHLDs) with an associated icon library, and subpathway highlighting and zooming, in a simplified and reorganized web site with adaptive design. To encourage re-use of our content, we have enabled export of pathway diagrams as 'PowerPoint' files.",2018-01-01 +31136235,"What Is Meant by ""Multimodal Therapy"" for Aphasia?","Purpose Multimodal therapy is a frequent term in aphasia literature, but it has no agreed upon definition. Phrases such as ""multimodal therapy"" and ""multimodal treatment"" are applied to a range of aphasia interventions as if mutually understood, and yet, the interventions reported in the literature differ significantly in methodology, approach, and aims. This inconsistency can be problematic for researchers, policy makers, and clinicians accessing the literature and potentially compromises data synthesis and meta-analysis. A literature review was conducted to examine what types of aphasia treatment are labeled multimodal and determine whether any patterns are present. Method A systematic search was conducted to identify literature pertaining to aphasia that included the term multimodal therapy (and variants). Sources included literature databases, dissertation databases, textbooks, professional association websites, and Google Scholar. Results Thirty-three original articles were identified, as well as another 31 sources referring to multimodal research, all of which used a variant of the term multimodal therapy. Treatments had heterogeneous aims, underlying theories, and methods. The rationale for using more than 1 modality was not always clear, nor was the reason each therapy was considered to be multimodal when similar treatments had not used the title. Treatments were noted to differ across 2 key features. The 1st was whether the ultimate aim of intervention was to improve total communication, as in augmentative and alternative communication approaches, or to improve 1 specific modality, as when gesture is used to improve word retrieval. The 2nd was the point in the treatment that the nonspeech modalities were employed. Discussion Our review demonstrated that references to ""multimodal"" treatments represent very different therapies with little consistency. We propose a framework to define and categorize multimodal treatments, which is based both on our results and on current terminology in speech-language pathology. Video Abstract and Supplemental Material https://doi.org/10.23641/asha.7646717.",2019-02-04 +25414353,EHFPI: a database and analysis resource of essential host factors for pathogenic infection.,"High-throughput screening and computational technology has greatly changed the face of microbiology in better understanding pathogen-host interactions. Genome-wide RNA interference (RNAi) screens have given rise to a new class of host genes designated as Essential Host Factors (EHFs), whose knockdown effects significantly influence pathogenic infections. Therefore, we present the first release of a manually-curated bioinformatics database and analysis resource EHFPI (Essential Host Factors for Pathogenic Infection, http://biotech.bmi.ac.cn/ehfpi). EHFPI captures detailed article, screen, pathogen and phenotype annotation information for a total of 4634 EHF genes of 25 clinically important pathogenic species. Notably, EHFPI also provides six powerful and data-integrative analysis tools, i.e. EHF Overlap Analysis, EHF-pathogen Network Analysis, Gene Enrichment Analysis, Pathogen Interacting Proteins (PIPs) Analysis, Drug Target Analysis and GWAS Candidate Gene Analysis, which advance the comprehensive understanding of the biological roles of EHF genes, as in diverse perspectives of protein-protein interaction network, drug targets and diseases/traits. The EHFPI web interface provides appropriate tools that allow efficient query of EHF data and visualization of custom-made analysis results. EHFPI data and tools shall keep available without charge and serve the microbiology, biomedicine and pharmaceutics research communities, to finally facilitate the development of diagnostics, prophylactics and therapeutics for human pathogens.",2014-11-20 +31870293,An improved catalogue of putative synaptic genes defined exclusively by temporal transcription profiles through an ensemble machine learning approach.,"

Background

Assembly and function of neuronal synapses require the coordinated expression of a yet undetermined set of genes. Previously, we had trained an ensemble machine learning model to assign a probability of having synaptic function to every protein-coding gene in Drosophila melanogaster. This approach resulted in the publication of a catalogue of 893 genes which we postulated to be very enriched in genes with a still undocumented synaptic function. Since then, the scientific community has experimentally identified 79 new synaptic genes. Here we use these new empirical data to evaluate our original prediction. We also implement a series of changes to the training scheme of our model and using the new data we demonstrate that this improves its predictive power. Finally, we added the new synaptic genes to the training set and trained a new model, obtaining a new, enhanced catalogue of putative synaptic genes.

Results

The retrospective analysis demonstrate that our original catalogue was significantly enriched in new synaptic genes. When the changes to the training scheme were implemented using the original training set we obtained even higher enrichment. Finally, applying the new training scheme with a training set including the 79 new synaptic genes, resulted in an enhanced catalogue of putative synaptic genes. Here we present this new catalogue and announce that a regularly updated version will be available online at: http://synapticgenes.bnd.edu.uy CONCLUSIONS: We show that training an ensemble of machine learning classifiers solely with the whole-body temporal transcription profiles of known synaptic genes resulted in a catalogue with a significant enrichment in undiscovered synaptic genes. Using new empirical data provided by the scientific community, we validated our original approach, improved our model an obtained an arguably more precise prediction. This approach reduces the number of genes to be tested through hypothesis-driven experimentation and will facilitate our understanding of neuronal function.

Availability

http://synapticgenes.bnd.edu.uy.",2019-12-23 +28968746,LDassoc: an online tool for interactively exploring genome-wide association study results and prioritizing variants for functional investigation.,"

Motivation

Existing approaches to plot association results from genome-wide association studies (GWAS) are in the form of static Manhattan plots and often lack data integration with rich databases on variant regulatory potential as well as population-specific linkage disequilibrium patterns.

Summary

We created an intuitive web module for uploading and efficiently exploring GWAS association results. Interactive plots and sortable tables allow researchers to query genomic regions of interest, facilitating the integration of data on linkage disequilibrium, variant regulatory potential and potential target genes. External links allow for visualization of association results in the UCSC genome browser as well as easy access to publically available databases (e.g. dbSNP and RegulomeDB). Through improved visualization and data integration, LDassoc offers genomic researchers a specialized environment to examine association signals and suggests variants for functional investigation.

Availability and implementation

LDassoc is a free and publically available web tool which can be accessed online at https://analysistools.nci.nih.gov/LDlink/? tab=ldassoc.

Contact

mitchell.machiela@nih.gov.",2018-03-01 +34675001,"European Glaucoma Society Terminology and Guidelines for Glaucoma, 5th Edition.","

Foreword

The only time is now. Every ""now"" is unique. Responsible persons ask themselves, ""How can I act well now?"" The answers will differ for every person, because just as every situation is unique, so is every person different from every other person. But surely there must be some algorithm that will assist us in coming to the right answer. Unfortunately, no, for there is no right answer. There is only an answer that is as appropriate as we can conclude at that moment in that situation. No written guidelines can apply appropriately to every unique situation.Unfortunately we physicians have been suckled on a fallacy: ""What's good for the goose is good for the gander."" Phrased in medical terms, ""normal findings are good, and abnormal findings are bad."" This is too simple, and often wrong.Good clinicians know that care must be personalized for it to be optimal. So-called normal findings give rough guidance, sometimes applicable to groups, but frequently wrong for individuals. Consider intraocular pressure (IOP). A normal IOP of 15 mmHg good for some and bad for others, and an abnormal IOP of 30 mmHg is good for some and bad for others. We are so bombarded by the myth of the sanctity of the standard distribution curve that it is hard to think independently and specifically. Also, unfortunately, doctors are prone to decide for patients, often on the basis of normative data that is not relevant or important for the particular patient. That we do this is not surprising, as we want to help, and so we default to what seems to be the easy, safe (non-thinking) way, in which we do not have to hold ourselves accountable for the outcome.Somebody HAS to decide, or else we would be living in an anarchical world. Also true. And because none of us knows as much as we need to know to act appropriately, we seek advice from so-called ""experts.""For us to care for people well it is essential that we consider what others recommend. So we look to experts, as we should. However, experts are sometimes right and sometimes wrong. Remember that von Graefe in 1860 recommended surgical iridectomy for all glaucoma, Elliot recommended mustard plaster between the shoulders for glaucoma, Becker based treatment on tonographic findings, Weve reported 100% success with penetrating cyclodiathermy in glaucoma, Lichter advised against laser trabeculoplasty, many thought Cypass was great, and the investigators in the Advanced Glaucoma Intervention Study indicated that an IOP usually around 12 mmHg was better than one usually around 20 mmHg. All wrong. What the authors of these guidelines have done excellently, is to provide a general framework on which ophthalmologists can hang pieces of evidence, so as to be able to evaluate the validity and the importance of that evidence. In doing this meticulously they have provided a valuable service to all ophthalmologists, none of whom individually have either the time or the skill to be fully informed. In their own practices the authors consider whether valid information is relevant for the particular person being considered. That process of considering relevance is essential, always. And relevance is based on the particular unique patient, unique doctor and unique situation. The only guideline the authors can provide in this regard is to remind us all to consider relevance with all patients in all situations, and from the patient's perspective. Even more important than the service to ophthalmologists is the benefit to patients that will result from thoughtful use of these guidelines.We need, also, to remember that diagnoses are generic, and that within every diagnosis there are differences. For example what does a diagnosis of primary open angle mean? Some of those affected will rapidly go blind despite the most thoughtful treatment and others will keep their sight even without treatment. What does a diagnosis of Chandler's Syndrome mean? In some, surgery works well, and, in others, poorly. So one never directs diagnosis and treatment at a condition, but rather at the person, the objective being the wellness of that person.The previous European Glaucoma Society Guidelines are used internationally. It is good that the EGS is again providing updated, useful information.The Guidelines are a practical, inspirational contribution.George L. Spaeth, BA, MD.Esposito Research Professor, Wills Eye Hospital/Sidney Kimmel Medical College/Thomas Jefferson University WWWEUGSORG: The Guidelines writers, authors and contributorsAugusto Azuara-Blanco (Editor)Luca BagnascoAlessandro BagnisJoao Barbosa BredaChiara BonzanoAndrei BrezhnevAlain BronCarlo A. CutoloBarbara CvenkelStefano GandolfiTed Garway HeathIlmira GazizovaGus GazzardFranz GrehnAnders HeijlCornelia HirnGábor HollóAnton HommerMichele IesterIngrida JanulevicieneGauti JóhannessonMiriam KolkoTianjing LiJosé Martínez de la CasaFrances Meier-GibbonsMaria MusolinoMarta PazosNorbert PfeifferSergey PetrovLuis Abegao PintoRiccardo ScottoIngeborg StalmansGordana SunaricMégevandErnst TammJohn ThygesenFotis TopouzisMarc Töteberg-HarmsCarlo E. Traverso (Editor)Anja TuulonenZoya VeselovskayaAnanth ViswanathanIlgaz YalvacThierry ZeyenGuidelines CommitteeAugusto Azuara-Blanco (Chair)Carlo E. Traverso (Co-chair)Manuele Michelessi (NGP Co-chair)Luis Abegao PintoMichele IesterJoao BredaCarlo A. CutoloPanayiota FountiGerhard GarhoeferAndreas KatsanosMiriam KolkoFrancesco OddoneMarta PazosVerena Prokosch-WillingCedric SchweitzerAndrew TathamMarc Toteberg-HarmsAcknowledgementsAnja TuulonenTed Garway HeathRichard WormaldTianjing LiManuele MichelessiJenny BurrAzuara-Blanco for their methodological oversight.Tianjing Li and Riaz Qureshi (US Cochrane Eye and Vision Group) and Manuele Michelessi (EGS) for leading the evidence review.Manuele MichelessiGianni VirgiliJoao Barbosa BredaCarlo A. CutoloMarta PazosAndreas KatsanosGerhard GarhoferMiriam KolkoVerena ProkoschPanayota FountiFrancesco OddoneAli Ahmed Al RajhiTianjing LiRiaz Qureshi and Azuara-Blanco for their contribution to the evidence review.Karen Osborn and Joanna Bradley from Glaucoma UK charity for their contribution to the section: 'What matters to patients' (https://glaucoma.uk)Additional contributions were made by the following people on specific topicsEleftherios AnastasopoulosPanayiota FountiGus GazzardFranz GrehnAnders HeijlGábor HollóFotis TopouzisAnja TuulonenAnanth ViswanathamThe team of Clinica Oculistica of the University of Genoa for medical editing and illustrationsLuca BagnascoAlessandro BagnisChiara BonzanoCarlo A. CutoloMichele LesterMaria MusolinoRoberta ParodiRiccardo ScottoWe would like to thank the following colleagues for their help in reviewing/editing section I.7. Landmark randomised controlled trials for glaucomaJoe CaprioliTed Garway Heath Gus Gazzard Divakar Gupta Anders Heijl Michael Kass Stefano Miglior David Musch Norbert Pfeiffer Thierry ZeyenExternal reviewsWe would like to thank the following societies and experts:World Glaucoma Association:Parul IchhpujaniMonisha NongpiurTanuj DadaSola OlawoyeJayme ViannaMin Hee SuhFarouk GarbaSimon SkalickyAlex HuangFarouk GarbaPradeep RamuluVerena ProkoschCarolina Gracitelli;American Glaucoma Society:Josh Stein;and Latin-American Glaucoma Society:Daniel GrigeraWe would like to thank the external reviewers whose comments are listed on https://www.eugs.org/eng/guidelines.aspThe EGS executive committeeTed Garway Heath (President)Fotis Topouzis (Vice President)Ingeborg Stalmans (Treasurer)Anja Tuulonen (Past President)Luis Abegao PintoAndrei BrezhnevAlain BronGauti JóhannessonNorbert PfeifferThe board of the European Glaucoma Society FoundationCarlo E. Traverso (Chair)Fotis Topouzis (Vice Chair)Franz GrehnAnders HeijlJohn ThygesenThierry Zeyen GLOSSARY: 5-FU 5-fluorouracilAAC Acute angle closureACG Angle closure glaucomaAGIS Advanced glaucoma intervention studyAH Aqueous humourAI Artificial intelligenceALT Argon laser trabeculoplastyBAC Benzalkalonium chlorideCCT Central corneal thicknessCDR Cup to disc ratioCIGTS Initial glaucoma treatment studyCNTGS Collaborative normal tension glaucoma studyDCT Dynamic contour tonometryEAGLE Effectiveness of early lens extraction for the treatment of primary angle closure glaucomaEGPS European glaucoma prevention studyEGS European glaucoma societyEMA The european medicines agencyEMGT Early manifest glaucoma trialFC Flow chartFDT Frequency doubling technologyFC Fixed combinationFL Fixation lossesFN False negativesFP False positiveGAT Goldmann applanation tonometryGHT The glaucoma hemifield testGRADE Grading of recommendations, assessment, development and evaluationsHRT Heidelberg retina tomographyICE Irido-corneal endothelial syndromeIOL Intraocular lensIOP Intraocular pressureITC Iridotrabecular contactIV IntravenousLIGHT Laser in glaucoma and ocular hypertension trialLPI Laser peripheral iridotomyLV Loss varianceMD Mean defect or mean deviationMMC Mitomycin CNCT Non-contact tonometryNd:YAG Neodymium-doped yttrium aluminum garnetNTG Normal tension glaucomaOAG Open angle glaucomaOCT Optical coherence tomographyOHT Ocular hypertensionOHTS The ocular hypertension treatment studyONH Optic nerve headORA Ocular response analyserOSD Ocular surface diseasePAC Primary angle closurePACG Primary angle closure glaucomaPACS Primary angle closure suspectPAS Peripheral anterior synechiaePCG Primary congenital glaucomaPDS Pigment dispersion syndromePGA Prostaglandin analoguePOAG Primary open angle glaucomaPG Pigmentary glaucomaPSD Pattern standard deviationPXF Pseudoexfoliation syndromePXFG Pseudoexfoliation glaucomaRCT Randomised controlled trialRNFL Retinal nerve fiber layerRoP Rate of progressionSAP Standard automated perimetrySITA Swedish interactive threshold algorithmSLT Selective laser trabeculoplastySWAP Short-wavelength automated perimetryTLPI Thermal laser peripheral iridoplastyTM Trabecular meshworkUBM Ultrasound biomicroscopyUGH Uveitis-glaucoma-hyphema syndromeUKGTS United Kingdom glaucoma treatment studyVEGF Vascular endothelial growth factorVF Visual filedVFI Visual field indexZAP Zhongshan angle closure prevention trial.",2021-06-01 +31231773,MepmiRDB: a medicinal plant microRNA database. ,"MicroRNAs (miRNAs) have been recognized as a key regulator in plant development and metabolism. Recent reports showed that the miRNAs of medicinal plants not only act as a critical modulator in secondary metabolism but also had a great potential of performing cross-kingdom gene regulation. Although several plant miRNA repositories have been publicly available, no miRNA database specific for medicinal plants has been reported to date. Here, we report the first version of MepmiRDB (medicinal plant microRNA database), which is freely accessible at http://mepmirdb.cn/mepmirdb/index.html. This database accommodates thousands of miRNA candidates belonging to 29 medicinal plant species. The miRNA information on sequences, expression patterns and regulatory networks has been included in the functional modules of the database. Specifically, the 'Sequence' module provides the sequences of the mature miRNAs and their precursors, and the structure information of the precursors. Moreover, the processing and small RNA accumulation signals on the miRNA precursors are also included in the 'Sequence' module. The organ/growth condition-specific expression information of the mature miRNAs has been stored in the 'Expression' module. The 'Interaction' module offers the information of the degradome-validated miRNA-target pairs of eight plant species. The 'Search' module enables users to search for the miRNAs by plant species and miRNA families, or by sequences. All data in this database are available for download. Taken together, the functional modules of MepmiRDB ensure its importance and timeliness for mechanistic and functional studies on the medicinal plant miRNAs.",2019-01-01 +27553277,Cysteinome: The first comprehensive database for proteins with targetable cysteine and their covalent inhibitors.,"The covalent modification of intrinsically nucleophilic cysteine in proteins is crucial for diverse biochemical events. Bioinformatics approaches may prove useful in the design and discovery of covalent molecules targeting the cysteine in proteins to tune their functions and activities. Herein, we describe the Cysteinome, the first online database that provides a rich resource for the display, search and analysis of structure, function and related annotation for proteins with targetable cysteine as well as their covalent modulators. To this end, Cysteinome compiles 462 proteins with targetable cysteine from 122 different species along with 1217 covalent modulators curated from existing literatures. Proteins are annotated with a detailed description of protein families, biological process and related diseases. In addition, covalent modulators are carefully annotated with chemical name, chemical structure, binding affinity, physicochemical properties, molecule type and related diseases etc. The Cysteinome database may serve as a useful platform for the identification of crucial proteins with targetable cysteine in certain cellular context. Furthermore, it may help biologists and chemists for the design and discovery of covalent chemical probes or inhibitors homing at functional cysteine of critical protein targets implicated in various physiological or disease process. The Cysteinome database is freely available to public at http://www.cysteinome.org/.",2016-08-20 +30971690,A data citation roadmap for scholarly data repositories.,"This article presents a practical roadmap for scholarly data repositories to implement data citation in accordance with the Joint Declaration of Data Citation Principles, a synopsis and harmonization of the recommendations of major science policy bodies. The roadmap was developed by the Repositories Expert Group, as part of the Data Citation Implementation Pilot (DCIP) project, an initiative of FORCE11.org and the NIH-funded BioCADDIE ( https://biocaddie.org ) project. The roadmap makes 11 specific recommendations, grouped into three phases of implementation: a) required steps needed to support the Joint Declaration of Data Citation Principles, b) recommended steps that facilitate article/data publication workflows, and c) optional steps that further improve data citation support provided by data repositories. We describe the early adoption of these recommendations 18 months after they have first been published, looking specifically at implementations of machine-readable metadata on dataset landing pages.",2019-04-10 +31024751,CASBench: A Benchmarking Set of Proteins with Annotated Catalytic and Allosteric Sites in Their Structures.,"In recent years, the phenomenon of allostery has witnessed growing attention driven by a fundamental interest in new ways to regulate the functional properties of proteins, as well as the prospects of using allosteric sites as targets to design novel drugs with lower toxicity due to a higher selectivity of binding and specificity of the mechanism of action. The currently available bioinformatic methods can sometimes correctly detect previously unknown ligand binding sites in protein structures. However, the development of universal and more efficient approaches requires a deeper understanding of the common and distinctive features of the structural organization of both functional (catalytic) and allosteric sites, the evolution of their amino acid sequences in respective protein families, and allosteric communication pathways. The CASBench benchmark set contains 91 entries related to enzymes with both catalytic and allosteric sites within their structures annotated based on the experimental information from the Allosteric Database, Catalytic Site Atlas, and Protein Data Bank. The obtained dataset can be used to benchmark the performance of existing computational approaches and develop/train perspective algorithms to search for new catalytic and regulatory sites, as well as to study the mechanisms of protein regulation on a large collection of allosteric enzymes. Establishing a relationship between the structure, function, and regulation is expected to improve our understanding of the mechanisms of action of enzymes and open up new prospects for discovering new drugs and designing more efficient biocatalysts. The CASBench can be operated offline on a local computer or online using built-in interactive tools at https://biokinet.belozersky.msu.ru/casbench.",2019-01-01 +30598113,CRlncRNA: a manually curated database of cancer-related long non-coding RNAs with experimental proof of functions on clinicopathological and molecular features.,"

Background

Recent studies demonstrated that long non-coding RNAs (lncRNAs) could be intricately implicated in cancer-related molecular networks, and related to cancer occurrence, development and prognosis. However, clinicopathological and molecular features for these cancer-related lncRNAs, which are very important in bridging lncRNA basic research with clinical research, fail to well settle to integration.

Results

After manually reviewing more than 2500 published literature, we collected the cancer-related lncRNAs with the experimental proof of functions. By integrating from literature and public databases, we constructed CRlncRNA, a database of cancer-related lncRNAs. The current version of CRlncRNA embodied 355 entries of cancer-related lncRNAs, covering 1072 cancer-lncRNA associations regarding to 76 types of cancer, and 1238 interactions with different RNAs and proteins. We further annotated clinicopathological features of these lncRNAs, such as the clinical stages and the cancer hallmarks. We also provided tools for data browsing, searching and download, as well as online BLAST, genome browser and gene network visualization service.

Conclusions

CRlncRNA is a manually curated database for retrieving clinicopathological and molecular features of cancer-related lncRNAs supported by highly reliable evidences. CRlncRNA aims to provide a bridge from lncRNA basic research to clinical research. The lncRNA dataset collected by CRlncRNA can be used as a golden standard dataset for the prospective experimental and in-silico studies of cancer-related lncRNAs. CRlncRNA is freely available for all users at http://crlnc.xtbg.ac.cn .",2018-12-31 +31693257,"BactMAP: An R package for integrating, analyzing and visualizing bacterial microscopy data.","High-throughput analyses of single-cell microscopy data are a critical tool within the field of bacterial cell biology. Several programs have been developed to specifically segment bacterial cells from phase-contrast images. Together with spot and object detection algorithms, these programs offer powerful approaches to quantify observations from microscopy data, ranging from cell-to-cell genealogy to localization and movement of proteins. Most segmentation programs contain specific post-processing and plotting options, but these options vary between programs and possibilities to optimize or alter the outputs are often limited. Therefore, we developed BactMAP (Bacterial toolbox for Microscopy Analysis & Plotting), a command-line based R package that allows researchers to transform cell segmentation and spot detection data generated by different programs into various plots. Furthermore, BactMAP makes it possible to perform custom analyses and change the layout of the output. Because BactMAP works independently of segmentation and detection programs, inputs from different sources can be compared within the same analysis pipeline. BactMAP complies with standard practice in R which enables the use of advanced statistical analysis tools, and its graphic output is compatible with ggplot2, enabling adjustable plot graphics in every operating system. User feedback will be used to create a fully automated Graphical User Interface version of BactMAP in the future. Using BactMAP, we visualize key cell cycle parameters in Bacillus subtilis and Staphylococcus aureus, and demonstrate that the DNA replication forks in Streptococcus pneumoniae dissociate and associate before splitting of the cell, after the Z-ring is formed at the new quarter positions. BactMAP is available from https://veeninglab.com/bactmap.",2019-11-24 +33144314,Human Gene Functional Network-Informed Prediction of HIV-1 Host Dependency Factors. ,"Human immunodeficiency virus type 1 (HIV-1) depends on a class of host proteins called host dependency factors (HDFs) to facilitate its infection. So far experimental efforts have detected a certain number of HDFs, but the gene inventory of HIV-1 HDFs remains incomplete. Here, we implemented an existing network-based gene discovery strategy to predict HIV-1 HDFs. First, an encoding scheme based on a publicly available human tissue-specific gene functional network (GIANT; http://giant.princeton.edu/) was designed to convert each human gene into a 25,825-dimensional feature vector. Then, a random forest-based predictive model was trained on a data set containing 868 known HDFs and 1,736 non-HDFs. Through 5-fold cross-validation, an independent test, and comparison with one existing method, the proposed prediction method consistently revealed accurate and competitive performance. The highlight of our method should be ascribed to the introduction of the GIANT encoding scheme, which contains rich information regarding gene interactions. By merging known HDFs and genome-wide HDF prediction results, network analysis was conducted to catch the common patterns of HDFs in the context of the GIANT network. Interestingly, HDFs reveal significantly lower betweenness than HIV-1-interacting human proteins (i.e., HIV targets). In the meantime, the functional roles of HDFs were also examined by mapping all the HDF candidates into human protein complexes. Especially, we observed the frequent co-occurrence of HDFs and HIV targets at the protein complex level. Collectively, we hope the proposed prediction method not only can accelerate the HDF identification and antiviral drug target discovery, but also can provide some mechanistic insights into human-virus relationships.IMPORTANCE Identification of HIV-1 HDFs remains a crucial step to understand the complicated relationships between human and HIV-1. To complement the experimental identification of HDFs, we have implemented an existing network-based gene discovery strategy to predict HDFs from the human genome. The core idea of the proposed method is that the rich information deposited in host gene functional networks can be effectively utilized to infer the potential HDFs. We hope the proposed prediction method could further guide hypothesis-driven experimental efforts to interrogate human-HIV-1 relationships and provide new hints for the development of antiviral drugs to combat HIV-1 infection.",2020-11-03 +26400039,"PathwaysWeb: a gene pathways API with directional interactions, expanded gene ontology, and versioning.","

Unlabelled

PathwaysWeb is a resource-based, well-documented web system that provides publicly available information on genes, biological pathways, Gene Ontology (GO) terms, gene-gene interaction networks (importantly, with the directionality of interactions) and links to key-related PubMed documents. The PathwaysWeb API simplifies the construction of applications that need to retrieve and interrelate information across multiple, pathway-related data types from a variety of original data sources. PathwaysBrowser is a companion website that enables users to explore the same integrated pathway data. The PathwaysWeb system facilitates reproducible analyses by providing access to all versions of the integrated datasets. Although its GO subsystem includes data for mouse, PathwaysWeb currently focuses on human data. However, pathways for mouse and many other species can be inferred with a high success rate from human pathways.

Availability and implementation

PathwaysWeb can be accessed via the Internet at http://bioinformatics.mdanderson.org/main/PathwaysWeb:Overview.

Contact

jmmelott@mdanderson.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-23 +32971450,Improving the endothelial dysfunction in type 2 diabetes with chromium and vitamin D3 byreducing homocysteine and oxidative stress: A randomized placebo-controlled trial.,"

Background

Chromium picolinate (CrPic) and vitamin D3 are known as two antioxidant micronutrients. Through inducing endothelial dysfunction, oxidants such as homocysteine (Hct) and malondialdehyde (MDA) lead to cardiovascular disease in type 2 diabetes mellitus (T2DM). No published data has directly examined the effects of these two antioxidants on improving the endothelial dysfunction in T2DM throughreducing homocysteine and oxidative stress.

Methods

Subjects (n = 92) in this randomized, double blind, placebo-control study were randomly assigned to receive oral placebo (group I), D3 (group II: 50,000 IU/ week), chromium picolinate (CrPic) (group III: 500 μg/day), and both vitamin D3 and CrPic (group IV) for four months. Fasting blood samples were drawn at study baseline and following intervention to determine Hct, MDA, total antioxidant capacity (TAC), total thiol groups (SHs), vascular cell adhesion molecule- 1 (VCAM-1), and plasminogen activator inhibitor-1 (PAI-1).

Results

After intervention, MDA significantly decreased in groups II and IV; TAC significantly increased in group IV, and SHs significantly augmented in group III; Hct was significantly reduced in groups II, III, and IV; and VCAM-1 significantly decreased in groups III and IV and PAI-1 was significantly reduced in groups II, III, and IV.

Conclusion

Our findings suggest that through reducing homocysteine and oxidative stress and improving endothelial dysfunction, chromium and vitamin D3 co-supplementation might be predictive and preventive of cardiovascular diseasesassociated with T2DM. IRCT, IRCT20190610043852N1, registered 21 October 2019, https://fa.irct.ir/user/trial/42293/view.",2020-08-31 +32677034,A genome database for a Japanese population of the larvacean Oikopleura dioica.,"The larvacean Oikopleura dioica is a planktonic chordate and is a tunicate that belongs to the closest relatives to vertebrates. Its simple and transparent body, invariant embryonic cell lineages, and short life cycle of 5 days make it a promising model organism for the study of developmental biology. The genome browser OikoBase was established in 2013 using Norwegian O. dioica. However, genome information for other populations is not available, even though many researchers have studied local populations. In the present study, we sequenced using Illumina and PacBio RSII technologies the genome of O. dioica from a southwestern Japanese population that was cultured in our laboratory for 3 years. The genome of Japanese O. dioica was assembled into 576 scaffold sequences with a total length and N50 length of 56.6 and 1.5 Mb, respectively. A total of 18,743 gene models (transcript models) were predicted in the genome assembly, named OSKA2016. In addition, 19,277 non-redundant transcripts were assembled using RNA-seq data. The OSKA2016 has global sequence similarity of only 86.5% when compared with the OikoBase, highlighting the sequence difference between the two far distant O. dioica populations on the globe. The genome assembly, transcript assembly, and transcript models were incorporated into ANISEED (https://www.aniseed.cnrs.fr/) for genome browsing and BLAST searches. Mapping of reads obtained from male- or female-specific genome libraries yielded male-specific scaffolds in the OSKA2016 and revealed that over 2.6 Mb of sequence were included in the male-specific Y-region. The genome and transcriptome resources from two distinct populations will be useful datasets for developmental biology, evolutionary biology, and molecular ecology using this model organism.",2020-08-14 +32554036,Correlation intensity index: Building up models for mutagenicity of silver nanoparticles.,"Nanomaterials become significant component of economics. Consequently, nanomaterials become object of environmental sciences. There is a traditional list of endpoints which are indicators of the ecological risk. Mutagenicity is one of important component in this list. The quasi-SMILES approach, that in contrast to majority of work dedicated to modelling behaviour of nanomaterials gives possibility to consider experimental conditions as well as other circumstances which can impact the behaviour of nanomaterials is suggested. This is carried out via so-called quasi-SMILES. The quasi-SMILES is a line on of codes that contains all the above available eclectic data. Modelling process aimed to build up a model involves Correlation Intensity Index (CII) that is a new criterion of predictive potential of models. The scheme of calculation of CII is described in this work in the first time. The applying of CII together with Index of Ideality Correlation (IIC) in modelling of mutagenicity of silver nanoparticles by the Monte Carlo method using the CORAL software (http://www.insilico.eu/coral) indicates that application of the CII improves the predictive potential of these models for three random splits into the training set (75%) and validation set (25%).",2020-05-27 +32992023,Outcomes of Minimally Invasive Management of Tubo-ovarian Abscess: A Systematic Review.,"

Objective

To compare the success rate, complications, and hospital length-of-stay of 3 modalities of minimally invasive management of tubo-ovarian abscesses (TOAs): laparoscopy, ultrasound-guided drainage, and computed tomography-guided drainage.

Data sources

Electronic-based search in PubMed, EMBASE, Ovid MEDLINE, Google Scholar, and Cochrane Central Register of Controlled Trials, using the following Medical Subject Heading terms: ""minimally invasive surgical procedures,"" ""drainage,"" ""abscess,"" ""tubo-ovarian,"" ""ovarian diseases,"" and ""fallopian tube diseases.""

Methods of study selection

Of the 831 articles in the initial results, 10 studies were eligible for inclusion in our systematic review.

Tabulation, integration, and results

A total of 975 patients were included in our study; 107 (11%) had laparoscopic drainage procedures, and 406 (42%) had image-guided (ultrasound or computed tomography) drainage of TOAs. Image-guided TOA drainage had higher success rates (90%-100%) than laparoscopic drainage (89%-96%) and the use of antibiotic treatment alone (65%-83%). Patients treated with image-guided drainage had no complications (for up to 6 months of follow-up) and shorter lengths of hospital stay (0-3 days on average) compared with laparoscopic drainage (5-12 days) or conservative management with antibiotics alone (7-9 days).

Conclusion

Although conservative management of TOAs with antibiotics alone remains first-line, our review indicates that better outcomes in the management of TOA were achieved by minimally invasive approach compared with conservative treatment with antibiotics only. Of the minimally invasive techniques, image-guided drainage of TOAs provided the highest success rates, the fewest complications, and the shortest hospital stays compared with laparoscopy. The low magnitude of evidence in the included studies calls for further randomized trials. This systematic review was registered in the International Prospective Register of Systematic Review (register, http://www.crd.york.ac.uk/PROSPERO;CRD 42020170345).",2020-09-28 +32049311,InterLig: improved ligand-based virtual screening using topologically independent structural alignments.,"MOTIVATION:In the past few years, drug discovery processes have been relying more and more on computational methods to sift out the most promising molecules before time and resources are spent to test them in experimental settings. Whenever the protein target of a given disease is not known, it becomes fundamental to have accurate methods for ligand-based virtual screening, which compares known active molecules against vast libraries of candidate compounds. Recently, 3D-based similarity methods have been developed that are capable of scaffold hopping and to superimpose matching molecules. RESULTS:Here, we present InterLig, a new method for the comparison and superposition of small molecules using topologically independent alignments of atoms. We test InterLig on a standard benchmark and show that it compares favorably to the best currently available 3D methods. AVAILABILITY AND IMPLEMENTATION:The program is available from http://wallnerlab.org/InterLig. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +25913159,ClosIndb: A resource for computationally derived information from clostridial genomes.,"Over the past few years, several clostridial genomes have been sequenced, and since then new sequencing projects are also under way. Clostridia is one of the most sequenced genera, and presently, complete genome sequences of 49 clostridial species are available in public archives. Unraveling this wealth of genomic information opens up potential avenues in clostridial research. In the present study, we have carried out in silico analysis to decipher the genomic data. Subsequently, a web resource, ClosIndb, has been developed which collates the computationally derived information associated with all clostridial genes. It features various aspects of coding regions as well as non-coding regions, such as putative orthologs, proteins physicochemical properties, operons and cis-regulatory elements. It provides users with comparative details of all clostridial proteins across the firmicutes. ClosIndb is a comprehensive resource for all completely sequenced clostridial genomes and is under constant development. ClosIndb is freely accessible at http://bif.uohyd.ac.in/closindb/.",2015-04-23 +33008805,Soft Microenvironments Induce Chemoresistance by Increasing Autophagy Downstream of Integrin-Linked Kinase.,"Breast cancer relapse can develop over the course of years as a result of dormant cancer cells that disseminate to secondary sites. These dormant cells are often resistant to conventional hormone and chemotherapy. Although recurrence is the main cause of death from cancer, microenvironmental factors that may influence resistance to therapy and duration of dormancy are largely unknown. Breast cancer relapse is often detected in tissues that are softer than the normal mammary gland or the primary breast tumor, such as bone marrow, brain, and lung. We therefore explored how stiffness of the microenvironment at secondary sites regulates tumor dormancy and the response of breast cancer cells to hormone and chemotherapy. In soft microenvironments reminiscent of metastatic sites, breast cancer cells were more resistant to the estrogen receptor modulator tamoxifen as a result of increased autophagy and decreased expression of estrogen receptor-α. Consistently, pharmacologic inhibition or genetic downregulation of autophagy increased the response of breast cancer cells to tamoxifen on soft substrata. In addition, autophagy was decreased downstream of integrin-linked kinase on stiff substrata. Altogether, our data show that tissue mechanics regulates therapeutic outcome and long-term survival of breast cancer cells by influencing autophagy. SIGNIFICANCE: These findings characterize the persistence of dormant cells at metastatic sites, where soft microenvironments downregulate estrogen receptor expression and upregulate autophagy, thereby promoting therapy resistance in breast cancer cells. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/19/4103/F1.large.jpg.",2020-07-27 +31985802,G4Killer web application: a tool to design G-quadruplex mutations.,"MOTIVATION:G-quadruplexes (G4) are important regulatory non-B DNA structures with therapeutic potential. A tool for rational design of mutations leading to decreased propensity for G4 formation should be useful in studying G4 functions. Although tools exist for G4 prediction, no easily accessible tool for the rational design of G4 mutations has been available. RESULTS:We developed a web-based tool termed G4Killer that is based on the G4Hunter algorithm. This new tool is a platform-independent and user-friendly application to design mutations crippling G4 propensity in a parsimonious way (i.e., keeping the primary sequence as close as possible to the original one). The tool is integrated into our DNA analyzer server and allows for generating mutated DNA sequences having the desired lowered G4Hunter score with minimal mutation steps. AVAILABILITY AND IMPLEMENTATION:The G4Killer web tool can be accessed at: http://bioinformatics.ibp.cz. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +32295889,Computer Vision and Artificial Intelligence Are Emerging Diagnostic Tools for the Clinical Microbiologist. ,"Artificial intelligence (AI) is increasingly becoming an important component of clinical microbiology informatics. Researchers, microbiologists, laboratorians, and diagnosticians are interested in AI-based testing because these solutions have the potential to improve a test's turnaround time, quality, and cost. A study by Mathison et al. used computer vision AI (B. A. Mathison, J. L. Kohan, J. F. Walker, R. B. Smith, et al., J Clin Microbiol 58:e02053-19, 2020, https://doi.org/10.1128/JCM.02053-19), but additional opportunities for AI applications exist within the clinical microbiology laboratory. Large data sets within clinical microbiology that are amenable to the development of AI diagnostics include genomic information from isolated bacteria, metagenomic microbial findings from primary specimens, mass spectra captured from cultured bacterial isolates, and large digital images, which is the medium that Mathison et al. chose to use. AI in general and computer vision in specific are emerging tools that clinical microbiologists need to study, develop, and implement in order to improve clinical microbiology.",2020-05-26 +32716683,Neural Processes Underlying Nonword Rhyme Differentiate Eventual Stuttering Persistence and Recovery.,"Purpose Phonological skills have been associated with developmental stuttering. The current study aimed to determine whether the neural processes underlying phonology, specifically for nonword rhyming, differentiated stuttering persistence and recovery. Method Twenty-six children who stutter (CWS) and 18 children who do not stutter, aged 5 years, completed an auditory nonword rhyming task. Event-related brain potentials were elicited by prime, rhyming, and nonrhyming targets. CWS were followed longitudinally to determine eventual persistence (n = 14) or recovery (n = 12). This is a retrospective analysis of data acquired when all CWS presented as stuttering. Results CWS who eventually recovered and children who do not stutter exhibited the expected rhyme effect, with larger event-related brain potential amplitudes elicited by nonrhyme targets compared to rhyme targets. In contrast, CWS who eventually persisted exhibited a reverse rhyme effect, with larger responses to rhyme than nonrhyme targets. Conclusions These findings suggest that CWS who eventually persisted are not receiving the same benefit of phonological priming as CWS who eventually recovered for complex nonword rhyming tasks. These results indicate divergent patterns of phonological processing in young CWS who eventually persisted, especially for difficult tasks with limited semantic context, and suggest that the age of 5 years may be an important developmental period for phonology in CWS. Supplemental Material https://doi.org/10.23641/asha.12682874.",2020-07-27 +32117828,AWAKEN-Ing a New Frontier in Neonatal Nephrology.,"In 2013, literature about the epidemiology of neonatal acute kidney injury (AKI) was limited to primarily retrospective, single center studies that suggested that AKI was common and that those with AKI had higher rates of mortality. We developed a 24-center retrospective cohort of neonates admitted to the NICU between January 1 and March 31, 2014. Analysis of the Assessment of Worldwide Acute Kidney Epidemiology in Neonates (AWAKEN) cohort, has allowed us to describe the prevalence, risk factors and impact of neonatal AKI for different gestational age cohorts. The ample sample size allows us to provide convincing data to show that those with AKI have an increase independent higher odds of death and prolonged hospitalization time (1). This data mirrors similar studies in pediatric (2) and adult (3) critically ill populations which collectively suggest that patients do not just die with AKI, but instead, AKI is directly linked to hard clinical outcomes. This study has allowed us to answer multiple other questions in the field which has expanded our understanding of the risk factors, complications, impact of fluid overload, the definition of neonatal AKI and suggests interventions for improving outcomes. Furthermore, this project brought together neonatologist and nephrologist within and across centers. Finally, the AWAKEN project has enabled us to build relationships and infrastructure that has launched the Neonatal Kidney Collaborative http://babykidney.org/ on its way to accomplish its stated mission to improve the health of newborns with or at risk for kidney disease through multidisciplinary collaborative research, advocacy, and education.",2020-02-07 +25887485,"The Alternaria genomes database: a comprehensive resource for a fungal genus comprised of saprophytes, plant pathogens, and allergenic species.","

Background

Alternaria is considered one of the most common saprophytic fungal genera on the planet. It is comprised of many species that exhibit a necrotrophic phytopathogenic lifestyle. Several species are clinically associated with allergic respiratory disorders although rarely found to cause invasive infections in humans. Finally, Alternaria spp. are among the most well known producers of diverse fungal secondary metabolites, especially toxins.

Description

We have recently sequenced and annotated the genomes of 25 Alternaria spp. including but not limited to many necrotrophic plant pathogens such as A. brassicicola (a pathogen of Brassicaceous crops like cabbage and canola) and A. solani (a major pathogen of Solanaceous plants like potato and tomato), and several saprophytes that cause allergy in human such as A. alternata isolates. These genomes were annotated and compared. Multiple genetic differences were found in the context of plant and human pathogenicity, notably the pro-inflammatory potential of A. alternata. The Alternaria genomes database was built to provide a public platform to access the whole genome sequences, genome annotations, and comparative genomics data of these species. Genome annotation and comparison were performed using a pipeline that integrated multiple computational and comparative genomics tools. Alternaria genome sequences together with their annotation and comparison data were ported to Ensembl database schemas using a self-developed tool (EnsImport). Collectively, data are currently hosted using a customized installation of the Ensembl genome browser platform.

Conclusion

Recent efforts in fungal genome sequencing have facilitated the studies of the molecular basis of fungal pathogenicity as a whole system. The Alternaria genomes database provides a comprehensive resource of genomics and comparative data of an important saprophytic and plant/human pathogenic fungal genus. The database will be updated regularly with new genomes when they become available. The Alternaria genomes database is freely available for non-profit use at http://alternaria.vbi.vt.edu .",2015-03-25 +31581856,Two new pregnane steroidal glycosides from Cynanchum taihangense.,"As our ongoing chemical investigation, two new pregnane steroidal glycosides, cynataihosides G (1), with a new aglycone, and H (2) were isolated from the 95% ethanol extract of Cynanchum taihangense. Their structures were elucidated on the basis of 1 D and 2 D NMR spectral data, HR-ESI-MS analysis and qualitative chemical methods. The compounds were subjected to detect the cytotoxicity against three human tumor cell lines (HL-60, THP-1 and PC-3). The compounds displayed no significant cytotoxicity.Supplemental data for this article can be accessed at https://doi.org/10.1080/14786419.2019.1672682.",2019-10-04 +31860077,AQUA-DUCT 1.0: structural and functional analysis of macromolecules from an intramolecular voids perspective.,"

Motivation

Tunnels, pores, channels, pockets and cavities contribute to proteins architecture and performance. However, analysis and characteristics of transportation pathways and internal binding cavities are performed separately. We aimed to provide universal tool for analysis of proteins integral interior with access to detailed information on the ligands transportation phenomena and binding preferences.

Results

AQUA-DUCT version 1.0 is a comprehensive method for macromolecules analysis from the intramolecular voids perspective using small ligands as molecular probes. This version gives insight into several properties of macromolecules and facilitates protein engineering and drug design by the combination of the tracking and local mapping approach to small ligands.

Availability and implementation

http://www.aquaduct.pl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +32775301,OSlgg: An Online Prognostic Biomarker Analysis Tool for Low-Grade Glioma.,"Glioma is the most frequent primary brain tumor that causes high mortality and morbidity with poor prognosis. There are four grades of gliomas, I to IV, among which grade II and III are low-grade glioma (LGG). Although less aggressive, LGG almost universally progresses to high-grade glioma and eventual causes death if lacking of intervention. Current LGG treatment mainly depends on surgical resection followed by radiotherapy and chemotherapy, but the survival rates of LGG patients are low. Therefore, it is necessary to use prognostic biomarkers to classify patients into subgroups with different risks and guide clinical managements. Using gene expression profiling and long-term follow-up data, we established an Online consensus Survival analysis tool for LGG named OSlgg. OSlgg is comprised of 720 LGG cases from two independent cohorts. To evaluate the prognostic potency of genes, OSlgg employs the Kaplan-Meier plot with hazard ratio and p value to assess the prognostic significance of genes of interest. The reliability of OSlgg was verified by analyzing 86 previously published prognostic biomarkers of LGG. Using OSlgg, we discovered two novel potential prognostic biomarkers (CD302 and FABP5) of LGG, and patients with the elevated expression of either CD302 or FABP5 present the unfavorable survival outcome. These two genes may be novel risk predictors for LGG patients after further validation. OSlgg is public and free to the users at http://bioinfo.henu.edu.cn/LGG/LGGList.jsp.",2020-07-07 +29069441,PolyA_DB 3 catalogs cleavage and polyadenylation sites identified by deep sequencing in multiple genomes.,"PolyA_DB is a database cataloging cleavage and polyadenylation sites (PASs) in several genomes. Previous versions were based mainly on expressed sequence tags (ESTs), which had a limited amount and could lead to inaccurate PAS identification due to the presence of internal A-rich sequences in transcripts. Here, we present an updated version of the database based solely on deep sequencing data. First, PASs are mapped by the 3' region extraction and deep sequencing (3'READS) method, ensuring unequivocal PAS identification. Second, a large volume of data based on diverse biological samples increases PAS coverage by 3.5-fold over the EST-based version and provides PAS usage information. Third, strand-specific RNA-seq data are used to extend annotated 3' ends of genes to obtain more thorough annotations of alternative polyadenylation (APA) sites. Fourth, conservation information of PAS across mammals sheds light on significance of APA sites. The database (URL: http://www.polya-db.org/v3) currently holds PASs in human, mouse, rat and chicken, and has links to the UCSC genome browser for further visualization and for integration with other genomic data.",2018-01-01 +31402841,"Sampling event dataset on five-year observations of macrofungi fruit bodies in raised bogs, Western Siberia, Russia.","

Background

The data paper includes the results of a long-term monitoring programme for macrofungi fruiting using permanent plots located in the raised bog ecosystem in central part of Western Siberia (nearby Khanty-Mansiysk), Russia. The goal of the project was to describe the quantitative and qualitative structure and spatial variation of the community of macromycetes, to follow its dynamics seasonally and inter-annually and also elucidate the relationship between the fruiting and climate variables. A total of 263 circular 5 m2 subplots (for a total area of 1,315 m2) were inspected weekly during vegetation seasons 2014-2018 and carpophores of different fungal taxa were counted. The resulting sampling-event dataset includes 16,569 of plot-based observations (= sampling events) with corresponding 6,011 occurrence records of macromycetes identified to species or genus level. In total, 69 species were revealed during the study. About 80% of plot-based observations contain zero records and mark absence of visible fruiting bodies in a certain plot and time.

New information

This is the first sampling-event dataset on plot-based observations of macrofungi published in GBIF and the first long-term series of macrofungi monitoring in a raised bog ecosystem accomplished in Western Siberia. The aim of the data paper publication was to provide the description and the link to the published data in the format of a peer-reviewed journal paper and to provide recognition for the effort by means of a scholarly article (based on Data paper definition published at https://www.gbif.org/en/data-papers).",2019-07-30 +31752864,Prediction of cognitive impairment via deep learning trained with multi-center neuropsychological test data.,"BACKGROUND:Neuropsychological tests (NPTs) are important tools for informing diagnoses of cognitive impairment (CI). However, interpreting NPTs requires specialists and is thus time-consuming. To streamline the application of NPTs in clinical settings, we developed and evaluated the accuracy of a machine learning algorithm using multi-center NPT data. METHODS:Multi-center data were obtained from 14,926 formal neuropsychological assessments (Seoul Neuropsychological Screening Battery), which were classified into normal cognition (NC), mild cognitive impairment (MCI) and Alzheimer's disease dementia (ADD). We trained a machine learning model with artificial neural network algorithm using TensorFlow (https://www.tensorflow.org) to distinguish cognitive state with the 46-variable data and measured prediction accuracies from 10 randomly selected datasets. The features of the NPT were listed in order of their contribution to the outcome using Recursive Feature Elimination. RESULTS:The ten times mean accuracies of identifying CI (MCI and ADD) achieved by 96.66 ± 0.52% of the balanced dataset and 97.23 ± 0.32% of the clinic-based dataset, and the accuracies for predicting cognitive states (NC, MCI or ADD) were 95.49 ± 0.53 and 96.34 ± 1.03%. The sensitivity to the detection CI and MCI in the balanced dataset were 96.0 and 96.0%, and the specificity were 96.8 and 97.4%, respectively. The 'time orientation' and '3-word recall' score of MMSE were highly ranked features in predicting CI and cognitive state. The twelve features reduced from 46 variable of NPTs with age and education had contributed to more than 90% accuracy in predicting cognitive impairment. CONCLUSIONS:The machine learning algorithm for NPTs has suggested potential use as a reference in differentiating cognitive impairment in the clinical setting.",2019-11-21 +30050522,"Genotypic Expansion Within the Population Structure of Classical Brucella Species Revealed by MLVA16 Typing of 1404 Brucella Isolates From Different Animal and Geographic Origins, 1974-2006.","Previous studies have shown the usefulness of MLVA16 as a rapid molecular identification and classification method for Brucella species and biovars including recently described novel Brucella species from wildlife. Most studies were conducted on a limited number of strains from limited geographic/host origins. The objective of this study was to assess genetic diversity of Brucella spp. by MLVA16 on a larger scale. Thus, 1404 animal or human isolates collected from all parts of the world over a period of 32 years (1974-2006) were investigated. Selection of the 1404 strains was done among the approximately 4000 strains collection of the BCCN (Brucella Culture Collection Nouzilly), based on classical biotyping and on the animal/human/geographic origin over the time period considered. MLVA16 was performed on extracted DNAs using high throughput capillary electrophoresis. The 16 loci were amplified in four multiplex PCR reactions. This large scale study firstly confirmed the accuracy of MLVA16 typing for Brucella species and biovar identification and its congruence with the recently described Extended Multilocus Sequence Analysis. In addition, it allowed identifying novel MLVA11 (based upon 11 slowly evolving VNTRs) genotypes representing an increase of 15% relative to the previously known Brucella MLVA11 genotypes. Cluster analysis showed that among the MLVA16 genotypes some were genetically more distant from the major classical clades. For example new major clusters of B. abortus biovar 3 isolated from cattle in Sub-Saharan Africa were identified. For other classical species and biovars this study indicated also genotypic expansion within the population structure of classical Brucella species. MLVA proves to be a powerful tool to rapidly assess genetic diversity of bacterial populations on a large scale, as here on a large collection of strains of the genomically homogeneous genus Brucella. The highly discriminatory power of MLVA appears of particular interest as a first step for selection of Brucella strains for whole-genome sequencing. The MLVA data of this study were added to the public Brucella MLVA database at http://microbesgenotyping.i2bc.paris-saclay.fr. Current version Brucella_4_3 comprises typing data from more than 5000 strains including in silico data analysis of public whole genome sequence datasets.",2018-07-12 +31651589,Development and Internal Validation of Machine Learning Algorithms for Preoperative Survival Prediction of Extremity Metastatic Disease.,"

Background

A preoperative estimation of survival is critical for deciding on the operative management of metastatic bone disease of the extremities. Several tools have been developed for this purpose, but there is room for improvement. Machine learning is an increasingly popular and flexible method of prediction model building based on a data set. It raises some skepticism, however, because of the complex structure of these models.

Questions/purposes

The purposes of this study were (1) to develop machine learning algorithms for 90-day and 1-year survival in patients who received surgical treatment for a bone metastasis of the extremity, and (2) to use these algorithms to identify those clinical factors (demographic, treatment related, or surgical) that are most closely associated with survival after surgery in these patients.

Methods

All 1090 patients who underwent surgical treatment for a long-bone metastasis at two institutions between 1999 and 2017 were included in this retrospective study. The median age of the patients in the cohort was 63 years (interquartile range [IQR] 54 to 72 years), 56% of patients (610 of 1090) were female, and the median BMI was 27 kg/m (IQR 23 to 30 kg/m). The most affected location was the femur (70%), followed by the humerus (22%). The most common primary tumors were breast (24%) and lung (23%). Intramedullary nailing was the most commonly performed type of surgery (58%), followed by endoprosthetic reconstruction (22%), and plate screw fixation (14%). Missing data were imputed using the missForest methods. Features were selected by random forest algorithms, and five different models were developed on the training set (80% of the data): stochastic gradient boosting, random forest, support vector machine, neural network, and penalized logistic regression. These models were chosen as a result of their classification capability in binary datasets. Model performance was assessed on both the training set and the validation set (20% of the data) by discrimination, calibration, and overall performance.

Results

We found no differences among the five models for discrimination, with an area under the curve ranging from 0.86 to 0.87. All models were well calibrated, with intercepts ranging from -0.03 to 0.08 and slopes ranging from 1.03 to 1.12. Brier scores ranged from 0.13 to 0.14. The stochastic gradient boosting model was chosen to be deployed as freely available web-based application and explanations on both a global and an individual level were provided. For 90-day survival, the three most important factors associated with poorer survivorship were lower albumin level, higher neutrophil-to-lymphocyte ratio, and rapid growth primary tumor. For 1-year survival, the three most important factors associated with poorer survivorship were lower albumin level, rapid growth primary tumor, and lower hemoglobin level.

Conclusions

Although the final models must be externally validated, the algorithms showed good performance on internal validation. The final models have been incorporated into a freely accessible web application that can be found at https://sorg-apps.shinyapps.io/extremitymetssurvival/. Pending external validation, clinicians may use this tool to predict survival for their individual patients to help in shared treatment decision making.

Level of evidence

Level III, therapeutic study.",2020-02-01 +32256778,Molecular cytogenetic characterization of small supernumerary marker 15 in infertile male: A case report.,"Small supernumerary marker chromosomes (sSMCs) are defined as structurally abnormal chromosomes that may be detected pre- or postnataly in patients with developmental and/or mental retardation or infertility. sSMC on chromosome 15 accounts for the highest proportion of all sSMCs and may be detected in subfertile individuals. The present study reports the case of a male patient with oligoasthenoteratozoospermia and an sSMC. The sSMC was identified and characterized according to G-banding analysis, chromosomal microarray analysis (CMA) and fluorescence in situ hybridization (FISH) analysis. Chromosomal karyotype analysis suggested that the patient presented with 47,XY,+mar. CMA was used to characterize the sSMC, which revealed a 0.44-Mb microduplication in 6q25.3q26. Subsequently, FISH using centromere-specific probes for chromosomes 13/21, 14/22 and 15 was applied to identify the origin of the sSMC, which was finally determined to be inverted duplicated(15)(q11.2). It was hypothesized that heterochromatin in the sSMC is responsible for the patient's fertility problem. The presence of heterochromatin may disrupt regular meiosis, thereby affecting normal spermatogenesis. Impaired spermatogenesis in infertile males with an sSMC derived from chromosome 15 was also reviewed by searching published literature and the sSMC database (http://ssmc-tl.com/sSMC.html). For patients with low sperm parameters and complete absence of spermatozoa in the ejaculate, including infertile males with an sSMC with spermatozoa, intracytoplasmic sperm injection is considered as an effective assisted reproductive technique. It may be concluded that molecular cytogenetic techniques are critical tools for delineating sSMCs in infertile males and may be beneficial in identifying sSMC carriers to ensure they receive clinical genetic counseling.",2020-02-21 +32674771,Development and comparison of formula assignment algorithms for ultrahigh-resolution mass spectra of natural organic matter.,"Increasing number of application of ultrahigh-resolution mass spectrometry (UHR-MS) to natural organic matter (NOM) characterization requires an efficient and accurate formula assignment from a number of mass data. Herein, we newly developed two automated batch codes (namely TRFu and FuJHA) and assessed their formula assignment accuracy together with frequently used open access algorithms (i.e., Formularity and WHOI). Overall assignment accuracy for 8719 NOM-like emerging chemicals with known molecular formulae (mass range from 68 Da to 1000 Da) was highest (94%) for TRFu. Further, TRFu showed up to 99.1% formula assignment ratio for a total 76,880 UHR-MS peaks from 35 types of NOM (e.g., aquatic, soil/sediment, biochar). Therefore, as a reliable and practically feasible tool, the automated batch TRFu (freely available at ChemRxiv, https://doi.org/10.26434/chemrxiv.9917399) can precisely characterize UHR-MS spectra of various NOM and could be extended to non-target screening of NOM-like emerging chemicals in natural and engineered environments.",2020-05-24 +31746988,"TFmiR2: constructing and analyzing disease-, tissue- and process-specific transcription factor and microRNA co-regulatory networks.","

Summary

TFmiR2 is a freely available web server for constructing and analyzing integrated transcription factor (TF) and microRNA (miRNA) co-regulatory networks for human and mouse. TFmiR2 generates tissue- and biological process-specific networks for the set of deregulated genes and miRNAs provided by the user. Furthermore, the service can now identify key driver genes and miRNAs in the constructed networks by utilizing the graph theoretical concept of a minimum connected dominating set. These putative key players as well as the newly implemented four-node TF-miRNA motifs yield novel insights that may assist in developing new therapeutic approaches.

Availability and implementation

The TFmiR2 web server is available at http://service.bioinformatik.uni-saarland.de/tfmir2.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +27618709,DenHunt - A Comprehensive Database of the Intricate Network of Dengue-Human Interactions.,"Dengue virus (DENV) is a human pathogen and its etiology has been widely established. There are many interactions between DENV and human proteins that have been reported in literature. However, no publicly accessible resource for efficiently retrieving the information is yet available. In this study, we mined all publicly available dengue-human interactions that have been reported in the literature into a database called DenHunt. We retrieved 682 direct interactions of human proteins with dengue viral components, 382 indirect interactions and 4120 differentially expressed human genes in dengue infected cell lines and patients. We have illustrated the importance of DenHunt by mapping the dengue-human interactions on to the host interactome and observed that the virus targets multiple host functional complexes of important cellular processes such as metabolism, immune system and signaling pathways suggesting a potential role of these interactions in viral pathogenesis. We also observed that 7 percent of the dengue virus interacting human proteins are also associated with other infectious and non-infectious diseases. Finally, the understanding that comes from such analyses could be used to design better strategies to counteract the diseases caused by dengue virus. The whole dataset has been catalogued in a searchable database, called DenHunt (http://proline.biochem.iisc.ernet.in/DenHunt/).",2016-09-12 +32631067,Sound Production Treatment for Acquired Apraxia of Speech: An Examination of Dosage in Relation to Probe Performance.,"Purpose This study was designed to examine the association of dosage and effects of Sound Production Treatment (SPT) for acquired apraxia of speech. Method Treatment logs and probe data from 20 speakers with apraxia of speech and aphasia were submitted to a retrospective analysis. The number of treatment sessions and teaching episodes was examined relative to (a) change in articulation accuracy above baseline performance, (b) mastery of production, and (c) maintenance. The impact of practice schedule (SPT-Blocked vs. SPT-Random) was also examined. Results The average number of treatment sessions conducted prior to change was 5.4 for SPT-Blocked and 3.9 for SPT-Random. The mean number of teaching episodes preceding change was 334 for SPT-Blocked and 179 for SPT-Random. Mastery occurred within an average of 13.7 sessions (1,252 teaching episodes) and 12.4 sessions (1,082 teaching episodes) for SPT-Blocked and SPT-Random, respectively. Comparisons of dosage metric values across practice schedules did not reveal substantial differences. Significant negative correlations were found between follow-up probe performance and the dosage metrics. Conclusions Only a few treatment sessions were needed to achieve initial positive changes in articulation, with mastery occurring within 12-14 sessions for the majority of participants. Earlier occurrence of change or mastery was associated with better follow-up performance. Supplemental Material https://doi.org/10.23641/asha.12592190.",2020-07-06 +32706182,Graphical enhancements to summary receiver operating characteristic plots to facilitate the analysis and reporting of meta-analysis of diagnostic test accuracy data.,"Diagnostic test accuracy (DTA) systematic reviews are conducted to summarize evidence on the accuracy of a diagnostic test including a critical evaluation of the primary studies. Where appropriate, the evidence is meta-analyzed to obtain pooled estimates of effectiveness.In this study, we reviewed and critiqued three DTA guidance documents with respect to the graphical presentation of DTA meta-analysis results. All three documents recommended the use of two forms of graphical presentation: (a) forest plots displaying meta-analysis results for sensitivity (ie, the true positive rate) and specificity (ie, true negative rate) separately, and (b) Summary Receiver Operating Characteristic (SROC) curve to provide a global summary of test performance. Two primary shortcomings were identified: (a) lack of incorporation of quality assessment results into the main analysis and; (b) ambiguity with which the contribution of individual studies is represented on SROC curves. In response, two alternative graphical approaches were developed: A quality assessment enhanced SROC plot which displays the results from individual studies in the meta-analysis with multiple indicators of quality assessed using QUADAS-2; and A percentage study weights enhanced SROC plot which accurately portrays the percentage contribution each study makes to the meta-analysis. The proposed enhanced SROC curves facilitate the exploration of DTA data, leading to a deeper understanding of the primary studies included in a DTA meta-analysis including identifying reasons for between study heterogeneity and why specific study results may be divergent. Both plots can easily be produced in the free online interactive application, MetaDTA (https://crsu.shinyapps.io/dta_ma/).",2020-08-12 +32324757,Expression based biomarkers and models to classify early and late-stage samples of Papillary Thyroid Carcinoma.,"

Introduction

Recently, the rise in the incidences of thyroid cancer worldwide renders it to be the sixth most common cancer among women. Commonly, Fine Needle Aspiration biopsy predominantly facilitates the diagnosis of the nature of thyroid nodules. However, it is inconsiderable in determining the tumor's state, i.e., benign or malignant. This study aims to identify the key RNA transcripts that can segregate the early and late-stage samples of Thyroid Carcinoma (THCA) using RNA expression profiles.

Materials and methods

In this study, we used the THCA RNA-Seq dataset of The Cancer Genome Atlas, consisting of 500 cancer and 58 normal (adjacent non-tumorous) samples obtained from the Genomics Data Commons (GDC) data portal. This dataset was dissected to identify key RNA expression features using various feature selection techniques. Subsequently, samples were classified based on selected features employing different machine learning algorithms.

Results

Single gene ranking based on the Area Under the Receiver Operating Characteristics (AUROC) curve identified the DCN transcript that can classify the early-stage samples from late-stage samples with 0.66 AUROC. To further improve the performance, we identified a panel of 36 RNA transcripts that achieved F1 score of 0.75 with 0.73 AUROC (95% CI: 0.62-0.84) on the validation dataset. Moreover, prediction models based on 18-features from this panel correctly predicted 75% of the samples of the external validation dataset. In addition, the multiclass model classified normal, early, and late-stage samples with AUROC of 0.95 (95% CI: 0.84-1), 0.76 (95% CI: 0.66-0.85) and 0.72 (95% CI: 0.61-0.83) on the validation dataset. Besides, a five protein-coding transcripts panel was also recognized, which segregated cancer and normal samples in the validation dataset with F1 score of 0.97 and 0.99 AUROC (95% CI: 0.91-1).

Conclusion

We identified 36 important RNA transcripts whose expression segregated early and late-stage samples with reasonable accuracy. The models and dataset used in this study are available from the webserver CancerTSP (http://webs.iiitd.edu.in/raghava/cancertsp/).",2020-04-23 +31919993,LncRNA ZFPM2-AS1 promotes lung adenocarcinoma progression by interacting with UPF1 to destabilize ZFPM2.,"Lung adenocarcinoma (LUAD), a histological subclass of non-small-cell lung cancer, is globally the leading cause of cancer-related deaths. Long noncoding RNAs (lncRNAs) are emerging as cancer regulators. Zinc finger protein multitype 2 antisense RNA 1 (ZFPM2-AS1) is an oncogene in gastric cancer, but its functions have not been investigated in LUAD. We showed that ZFPM2-AS1 expression is high in LUAD samples based on GEPIA database (http://gepia.cancer-pku.cn/) and validated ZFPM2-AS1 upregulation in LUAD cell lines. Functionally, ZFPM2-AS1 facilitated proliferation, invasion, and epithelial-to-mesenchymal transition of LUAD cells. Thereafter, we found that ZFPM2 was negatively regulated by ZFPM2-AS1, and identified the suppressive effect of ZFPM2 regulation by ZFPM2-AS1 on LUAD progression. Mechanistically, we showed that ZFPM2-AS1 interacted with up-frameshift 1 (UPF1) to regulate mRNA decay of ZFPM2. Rescue assays in vitro and in vivo confirmed that ZFPM2-AS1 regulated LUAD progression and tumor growth through ZFPM2. Taken together, our findings demonstrate a role for the ZFPM2-AS1-UPF1-ZFPM2 axis in LUAD progression, suggesting ZFPM2-AS1 as a new potential target for LUAD treatment.",2020-02-20 +32856138,Expression of S100 proteins is associated with HBV intrauterine transmission.,"

Purpose

The mechanisms underlying HBV intrauterine transmission remain unknown. In this study, we explored the mechanism of HBV intrauterine transmission by iTRAQ proteomics analysis.

Methods

iTRAQ technology was applied to perform comparative proteomics studies on six HBV+/+ neonates and six HBV+/- neonates whose mothers and fathers were HBsAg positive and paternal HBsAg negative, respectively. The data obtained from the mass spectrometer were analyzed using MASCOT ( https://matrixscience.com ) to qualitatively and quantitatively compare the differentially expressed proteins in the two groups. Gene Ontology and KEGG pathway analyses were performed to analyze the differentially expressed proteins. The expressions of HBV intrauterine transmission-related proteins in serum samples and corresponding placental tissues were further verified by immunohistochemistry and Western Blot. Then, the human trophoblast cell line (Swan71) infected with HBV was used to analyze the potential mechanisms of HBV intrauterine transmission under the mediation of differential proteins.

Results

A total of 35 differentially expressed proteins, including 17 up-regulated proteins and 18 down-regulated proteins, were identified by comparing serum protein expression levels in HBV+/+ and HBV+/- neonates. The differentially expressed proteins were mainly related to RAGE receptor binding, NF-kappa B transcription factor activity, innate immune response, defense response to bacterium, and the signaling pathway in pathogenic microorganism infection. The expressions of S100A8/9/12 in HBV+/+ maternal placenta tissue were significantly increased. The expressions of S100A8/9/12 proteins in Swan71 cells were significantly increased after HBV infection.

Conclusion

High expression of S100 proteins may be associated with the intrauterine-transplacental transmission of HBV.",2020-08-27 +33061343,The Value of FENO Measurement for Predicting Treatment Response in Patients with Acute Exacerbation of Chronic Obstructive Pulmonary Disease.,"

Background

Fractional exhaled nitric oxide (FENO) has been shown to be a marker of airway inflammation in various pulmonary diseases, including chronic obstructive pulmonary disease (COPD). In this study, we assessed the FENO level in patients with acute exacerbations of COPD (AECOPD) and analyzed the predictive value of the FENO level for treatment response.

Methods

Demographic data were collected at admission. FENO, lung function, blood gases, COPD Assessment Test (CAT), and modified Medical Research Council (mMRC) scores were measured at admission and on day 7. At the second visit, the patients were asked to report their health status; scores ranged from 1 to 5, representing ""much better"", ""slightly better"", ""no change"", ""slightly worse"", and ""much worse"", respectively. The treatment response was evaluated based on the patient's reported health status (responders were those who reported much better and slightly better) and lung function (responders were those who presented an increase in FEV1 over 200 mL).

Results

A total of 182 patients were recruited into the analysis. The FENO level positively correlated with an increase in FEV1 and FEV1% (r = 0.291, p < 0.001 and r = 0.205, p = 0.005, respectively), but negatively correlated with a decrease in the COPD Assessment Test (CAT) score (r = -0.197, p = 0.008) and patient-reported health status (rho = -0.408, p<0.001). An inverse correlation was observed between FENO concentrations at admission and the length of hospital stay. The cut-off point for differentiating responders, identified by health status, was 18 ppb, with the sensitivity being 89.7% and specificity 88.9%.

Conclusion

FENO levels, determined at hospital admission, are potential to predict the overall treatment response in AECOPD patients, including remission in subjective patient-reported health statuses and, also, improvements in lung function.

Registry number

ChiCTR-ROC-16,009,087 (http://www.chictr.org.cn/).",2020-09-24 +33038477,The impact of the Covid-19 lockdown on the experiences and feeding practices of new mothers in the UK: Preliminary data from the COVID-19 New Mum Study.,"

Background

The COVID-19 New Mum Study is recording maternal experiences and infant feeding during the UK lockdown. This report from week 1 of the survey describes and compares the delivery and post-natal experiences of women who delivered before (BL) versus during (DL) the lockdown.

Methods

Women living in the UK aged ≥18 years with an infant ≤12 months of age completed an anonymous online survey (https://is.gd/covid19newmumstudy). Information/links are shared via websites, social media and existing contacts.

Results

From 27.5.20-3.6.20, 1365 women provided data (94% white, 95% married/with partner, 66% degree/higher qualification, 86% living in house; 1049 (77%) delivered BL and 316 (23%) DL. Delivery mode, skin-to-skin contact and breastfeeding initiation did not differ between groups. DL women had shorter hospital stays (p < 0.001). 39% reported changes to their birth plan. Reflecting younger infant age, 59% of DL infants were exclusively breast-fed/mixed fed versus 39% of BL (p < 0.05). 13% reported a change in feeding; often related to lack of breastfeeding support, especially with practical problems. Important sources of feeding support were the partner (60%), health professional (50%) and online groups (47%). 45% of DL women reported insufficient feeding support. Among BL women, 57% and 69% reported decreased feeding support and childcare, respectively. 40% BL/45% DL women reported insufficient support with their own health, 8%/9% contacted a mental health professional; 11% reported their mental health was affected. 9% highlighted lack of contact/support from family and distress that they had missed seeing the baby.

Conclusion

Lockdown has impacted maternal experiences, resulting in distress for many women. Our findings suggest the need for better infant feeding support, especially 'face-to-face' support for practical issues; and recognising and supporting mothers who are struggling with mental health challenges or other aspects of their health. The effectiveness of online versus face-to-face contact is currently uncertain, and requires further evaluation.",2020-10-07 +30928859,A systematic review and meta-analysis of the association between daily mean temperature and mortality in China.,"

Purpose

We summarized the evidence on the effects of heat and cold exposures on mortality in China. We included studies published on this topic in both Chinese and English, thereby filling a gap in knowledge using data from a country that consists of one-fifth of the world's population.

Methods

We conducted a systematic search of peer-reviewed studies on the association between daily mean temperature and mortality published from 2001 up to July 2018. We searched one Chinese database (China National Knowledge infrastructure, http://www.cnki.net) and three English databases (PubMed, Scopus, Web of Science). We converted the effect estimates of heat/cold to rate ratios (RRs) associated with 1° increase/decrease beyond the heat/cold reference temperatures. For studies that provided lag-specific estimates, we used both the maximum and minimum of RR estimates. We calculated summary effect estimates for all-cause and cause-specific mortalities, as well as RRs stratified by sex, age, and socioeconomic status. We also investigated patterns of heat and cold adaptation at different latitudes, and at different reference temperatures.

Results

In total, 45 articles were included in this systematic review. For every 1° temperature increase/decrease beyond reference points, the rate of non-accidental mortality increased by 2% (RR, 1.02; 95% confidence interval (95% CI [1.01-1.02]) for heat and 4% (RR, 1.04; 95% CI [1.03-1.04]) for cold, respectively; the rate of cardiovascular mortality increased 3% (RR, 1.03; 95% CI [1.03-1.04]) for heat and 6% (RR, 1.06; 95% CI [1.04-1.07]) for cold; the rate of respiratory mortality increased 2% (RR, 1.02; 95% CI [1.01-1.03]) for heat and 2% (RR, 1.02; 95% CI [1.00-1.04]) for cold; the rate of cerebrovascular mortality increased 2% (RR, 1.02; 95% CI [1.02-1.03]) for heat and 3% (RR, 1.03; 95% CI [1.02-1.04]) for cold. We identified a variation in optimal temperature range related to latitude of the residential area, and differences in people's capability to adapt to heat versus cold.

Conclusion

We found consistent evidence of the association between temperature and mortality, as well as evidence of patterns in human adaptation, and we discussed the implications of our findings.",2019-03-22 +32573926,Links between screen use and depressive symptoms in adolescents over 16 years: Is there evidence for increased harm?,"Recent scholarship has been divided on whether an observed increase in suicides in the United States among teenagers and preteens (12-18) can be attributed to an increased use in social screen media beginning in 2009. If these concerns are accurate effect sizes for the relationship between screen use and suicide should increase over the 16 years since 2001. The current study used the Florida Youth Risk Behavior Survey data (n = 45,992) from 2001 to 2017, to track effect sizes for screen/depression correlations, controlling for age and gender. A second dataset from the UK Understanding Society dataset (ns for each wave ranged between 3,536 and 4,850) was used to study associations between time spent on social media and emotional problems. Metaregression was be used to examine whether effect sizes increase across time. Results generally did not support the hypothesis that effect sizes between screen and social media use are increasing over time. Aside from the trends over time, for any given year, most effect sizes were below the r = .10 threshold used for interpretation with the exception of computer use which was just at that threshold. It is concluded that screens and social media use are unlikely to bear major responsibility for youth suicide trends. A video abstract of this article can be viewed at https://www.youtube.com/watch?v=76S7cxiaU88.",2020-07-05 +29697370,LDSplitDB: a database for studies of meiotic recombination hotspots in MHC using human genomic data.,"BACKGROUND:Meiotic recombination happens during the process of meiosis when chromosomes inherited from two parents exchange genetic materials to generate chromosomes in the gamete cells. The recombination events tend to occur in narrow genomic regions called recombination hotspots. Its dysregulation could lead to serious human diseases such as birth defects. Although the regulatory mechanism of recombination events is still unclear, DNA sequence polymorphisms have been found to play crucial roles in the regulation of recombination hotspots. METHOD:To facilitate the studies of the underlying mechanism, we developed a database named LDSplitDB which provides an integrative and interactive data mining and visualization platform for the genome-wide association studies of recombination hotspots. It contains the pre-computed association maps of the major histocompatibility complex (MHC) region in the 1000 Genomes Project and the HapMap Phase III datasets, and a genome-scale study of the European population from the HapMap Phase II dataset. Besides the recombination profiles, related data of genes, SNPs and different types of epigenetic modifications, which could be associated with meiotic recombination, are provided for comprehensive analysis. To meet the computational requirement of the rapidly increasing population genomics data, we prepared a lookup table of 400 haplotypes for recombination rate estimation using the well-known LDhat algorithm which includes all possible two-locus haplotype configurations. CONCLUSION:To the best of our knowledge, LDSplitDB is the first large-scale database for the association analysis of human recombination hotspots with DNA sequence polymorphisms. It provides valuable resources for the discovery of the mechanism of meiotic recombination hotspots. The information about MHC in this database could help understand the roles of recombination in human immune system. DATABASE URL: http://histone.scse.ntu.edu.sg/LDSplitDB.",2018-04-20 +32201534,Prognostic Value and Efficacy Evaluation of Novel Drugs for Multiple Myeloma Patients with 1q21 Amplification (Amp1q21) Only: A Systematic Review of Randomized Controlled Trials.,"Background: Multiple myeloma (MM) is a heterogeneous disease characterized by chromosomal translocation, deletion, and amplification in plasma cells, resulting in a huge heterogeneity in its outcomes. Of all these cytogenetic abnormalities, Amp1q21 is most commonly detected, which is always associated with significantly shorter progression-free survival (PFS) and overall survival (OS) than normal 1q copy number status. In the era of novel agents such as bortezomib, ixazomib, lenalidomide, a head-to-head comparison of all these agents is still absent, especially in the patients with Amp1q21 alone. So, aiming to explore the optimum therapy to the patients with Amp1q21 only, we conduct this study. Patients and Methods: We searched the PubMed, the Cochrane Library, PMC and the Embase databases, and we selected all the randomized controlled trials (RCTs) in English about MM with Amp1q21 up to April, 2019. A total of 72 papers were full screened and finally 2 literatures can be included in our study. Results: Of the two studies, the one is about IRd (ixazomib, lenalidomide, dexamethasone) vs. placebo-Rd (HR, 0.781; 95% CI, 0.492-1.240), another is about VAD (vincristine, adriamycin, dexamethasone) vs. PAD (bortezomib, adriamycin, dexamethasone) (3-year survival rate: 59% vs. 83%, p=0.016). Conclusion: From this review, MM patients with Amp1q21 may somewhat benefit from ixazomib but the evidence is still stuffless. What's more, a head-to-head comparison between ixazomib and other agents among MM patients with Amp1q21 is also absent. So, we sincerely expect this review can attract some attention for the therapy of this special part of patients. This study was registered in https://www.crd.york.ac.uk/prospero/#recordDetails.",2020-02-19 +29363422,Construction of Pará rubber tree genome and multi-transcriptome database accelerates rubber researches.,"

Background

Natural rubber is an economically important material. Currently the Pará rubber tree, Hevea brasiliensis is the main commercial source. Little is known about rubber biosynthesis at the molecular level. Next-generation sequencing (NGS) technologies brought draft genomes of three rubber cultivars and a variety of RNA sequencing (RNA-seq) data. However, no current genome or transcriptome databases (DB) are organized by gene.

Results

A gene-oriented database is a valuable support for rubber research. Based on our original draft genome sequence of H. brasiliensis RRIM600, we constructed a rubber tree genome and transcriptome DB. Our DB provides genome information including gene functional annotations and multi-transcriptome data of RNA-seq, full-length cDNAs including PacBio Isoform sequencing (Iso-Seq), ESTs and genome wide transcription start sites (TSSs) derived from CAGE technology. Using our original and publically available RNA-seq data, we calculated co-expressed genes for identifying functionally related gene sets and/or genes regulated by the same transcription factor (TF). Users can access multi-transcriptome data through both a gene-oriented web page and a genome browser. For the gene searching system, we provide keyword search, sequence homology search and gene expression search; users can also select their expression threshold easily.

Conclusion

The rubber genome and transcriptome DB provides rubber tree genome sequence and multi-transcriptomics data. This DB is useful for comprehensive understanding of the rubber transcriptome. This will assist both industrial and academic researchers for rubber and economically important close relatives such as R. communis, M. esculenta and J. curcas. The Rubber Transcriptome DB release 2017.03 is accessible at http://matsui-lab.riken.jp/rubber/ .",2018-01-19 +32110488,Tight DNA-protein complexes isolated from barley seedlings are rich in potential guanine quadruplex sequences.,"

Background

The concept of chromatin domains attached to the nuclear matrix is being revisited, with nucleus described as a set of topologically associating domains. The significance of the tightly bound to DNA proteins (TBP), a protein group that remains attached to DNA after its deproteinization should be also revisited, as the existence of these interactions is in good agreement with the concept of the topologically associating domain. The work aimed to characterize the DNA component of TBP isolated from barley seedlings.

Methods

The tight DNA-protein complexes from the first leaves, coleoptiles, and roots of barley seedlings were isolated by purification with chromatography on nitrocellulose or exhaustive digestion of DNA with DNase I. Cloning and transformation were performed using pMOSBBlue Blunt Ended Cloning Kit. Inserts were amplified by PCR, and sequencing was performed on the MegaBace 1000 Sequencing System. The BLAST search was performed using sequence databases at NCBI, CR-EST, and TREP and Ensembl Plants databases. Comparison to MAR/SAR sequences was performed using http://smartdb.bioinf.med.uni-goettingen.de/cgi-bin/SMARtDB/smar.cgi database. The prediction of G quadruplexes (GQ) was performed with the aid of R-studio library pqsfinder. CD spectra were recorded on a Chirascan CS/3D spectrometer.

Results

Although the barley genome is AT-rich (43% of GC pairs), most DNA fragments associated with TBP were GC-rich (up to 70% in some fractions). Both fractionation procedures yielded a high proportion of CT-motif sequences presented predominantly by the 16-bp CC(TCTCCC)2 TC fragment present in clones derived from the TBP-bound DNA and absent in free DNA. BLAST analysis revealed alignment with different barley repeats. Some clones, however, aligned with both nuclear and chloroplast structural genes. Alignments with MAR/SAR motifs were very few. The analysis produced by the pqsfinder program revealed numerous potential quadruplex-forming sites in the TBP-bound sequences. A set of oligonucleotides containing sites of possible GQs were designed and ordered. Three of them represented the minus strand of the CT-repeat. Two were derived from sequences of two clones of nitrocellulose retained fraction from leaves and contained GC-rich motifs different from the CT motif. Circular dichroism spectroscopy revealed profound changes in spectra when oligonucleotides were incubated with 100 mM KCl. There was either an increase of positive band in the area of 260 nm or the formation of a positive band at 290 nm. In the former case, changes are typical for parallel G-quadruplexes and, in the latter, 3 + 1 structures.

Discussion

The G-quadruplexes anchor proteins are probably involved in the maintenance of the topologically associated domain structure.",2020-02-18 +25392405,Tissue-specific transcriptome sequencing analysis expands the non-human primate reference transcriptome resource (NHPRTR).,"The non-human primate reference transcriptome resource (NHPRTR, available online at http://nhprtr.org/) aims to generate comprehensive RNA-seq data from a wide variety of non-human primates (NHPs), from lemurs to hominids. In the 2012 Phase I of the NHPRTR project, 19 billion fragments or 3.8 terabases of transcriptome sequences were collected from pools of ∼ 20 tissues in 15 species and subspecies. Here we describe a major expansion of NHPRTR by adding 10.1 billion fragments of tissue-specific RNA-seq data. For this effort, we selected 11 of the original 15 NHP species and subspecies and constructed total RNA libraries for the same ∼ 15 tissues in each. The sequence quality is such that 88% of the reads align to human reference sequences, allowing us to compute the full list of expression abundance across all tissues for each species, using the reads mapped to human genes. This update also includes improved transcript annotations derived from RNA-seq data for rhesus and cynomolgus macaques, two of the most commonly used NHP models and additional RNA-seq data compiled from related projects. Together, these comprehensive reference transcriptomes from multiple primates serve as a valuable community resource for genome annotation, gene dynamics and comparative functional analysis.",2014-11-11 +29559379,Clonal expansion across the seas as seen through CPLP-TB database: A joint effort in cataloguing Mycobacterium tuberculosis genetic diversity in Portuguese-speaking countries.,"Tuberculosis (TB) remains a major health problem within the Community of Portuguese Language Speaking Countries (CPLP). Despite the marked variation in TB incidence across its member-states and continued human migratory flux between countries, a considerable gap in the knowledge on the Mycobacterium tuberculosis population structure and strain circulation between the countries still exists. To address this, we have assembled and analysed the largest CPLP M. tuberculosis molecular and drug susceptibility dataset, comprised by a total of 1447 clinical isolates, including 423 multidrug-resistant isolates, from five CPLP countries. The data herein presented reinforces Latin American and Mediterranean (LAM) strains as the hallmark of M. tuberculosis populational structure in the CPLP coupled with country-specific differential prevalence of minor clades. Moreover, using high-resolution typing by 24-loci MIRU-VNTR, six cross-border genetic clusters were detected, thus supporting recent clonal expansion across the Lusophone space. To make this data available to the scientific community and public health authorities we developed CPLP-TB (available at http://cplp-tb.ff.ulisboa.pt), an online database coupled with web-based tools for exploratory data analysis. As a public health tool, it is expected to contribute to improved knowledge on the M. tuberculosis population structure and strain circulation within the CPLP, thus supporting the risk assessment of strain-specific trends.",2018-03-17 +32967922,Trends in Use and In-Hospital Outcomes of Subcutaneous Implantable Cardioverter Defibrillators in Patients Undergoing Long-Term Dialysis.,"

Background and objectives

Patients on dialysis are at high risk of complications related to implantable cardioverter defibrillator (ICD) implantation; use of subcutaneous ICDs may be preferred over transvenous devices due to lower risk of bloodstream infection and interference with vascular access sites. We evaluated trends in use and in-hospital outcomes of subcutaneous compared with transvenous ICDs among patients on dialysis in the United States.

Design, setting, participants, & measurements

Retrospective analysis of ICD implants from 2012 to 2018 among patients on dialysis reported to the National Cardiovascular Data Registry ICD Registry, a nationally representative US ICD Registry. We examined overall trends in subcutaneous ICD adoption as a proportion of all eligible ICD implants among patients on dialysis and then compared in-hospital outcomes between eligible subcutaneous ICD and transvenous ICD recipients using inverse probability of treatment weighting.

Results

Of the 23,136 total ICD implants in patients on dialysis during the study period, 3195 (14%) were subcutaneous ICDs. Among eligible first-time ICD recipients on dialysis, the proportion of subcutaneous ICDs used increased yearly from 10% in 2012 to 69% in 2018. In propensity score-weighted analysis of 3327 patients, compared with transvenous ICDs, patients on dialysis receiving subcutaneous ICDs had a higher rate of in-hospital cardiac arrest (2% versus 0.4%, P=0.002), but there was no significant difference in total in-hospital complications (2% versus 1%, P=0.08), all-cause death, or length of hospital stay.

Conclusions

The utilization of subcutaneous ICDs among US patients on dialysis has been steadily increasing. The overall risk of short-term complications is low and comparable with transvenous ICDs, but higher risks of in-hospital cardiac arrest merits closer monitoring and further investigation.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_09_23_CJN07920520.mp3.",2020-09-23 +32271876,MODifieR: an Ensemble R Package for Inference of Disease Modules from Transcriptomics Networks.,"

Motivation

Complex diseases are due to the dense interactions of many disease-associated factors that dysregulate genes that in turn form the so-called disease modules, which have shown to be a powerful concept for understanding pathological mechanisms. There exist many disease module inference methods that rely on somewhat different assumptions, but there is still no gold standard or best-performing method. Hence, there is a need for combining these methods to generate robust disease modules.

Results

We developed MODule IdentiFIER (MODifieR), an ensemble R package of nine disease module inference methods from transcriptomics networks. MODifieR uses standardized input and output allowing the possibility to combine individual modules generated from these methods into more robust disease-specific modules, contributing to a better understanding of complex diseases.

Availability and implementation

MODifieR is available under the GNU GPL license and can be freely downloaded from https://gitlab.com/Gustafsson-lab/MODifieR and as a Docker image from https://hub.docker.com/r/ddeweerd/modifier.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +29934806,Identification of transposable elements fused in the exonic region of the olive flounder genome.,"Transposable elements (TEs) are mobile genetic sequences that comprise a large portion of vertebrate genomes. The olive flounder (Paralichthys olivaceus) is a valuable marine resource in East Asia. The scope of most genomic studies on the olive flounder is limited to its immunology as their focus is the prevention of mass mortality of this species. Thus, for a broader understanding of the species, its genomic information is consistently in demand. Transcripts sequences were acquired from transcriptome analysis using gill tissues of 12 olive flounders. Distribution of TEs inserted in exonic region of the olive flounder genome was analyzed using RepeatMasker ( http://www.repeatmasker.org/ ). We found 1140 TEs in the exonic region of the genome and long interspersed nuclear elements (LINEs) and long terminal repeats (LTRs) insertions occurred with forward orientation preferences. Transposons belonging to the hAt, Gypsy, and LINE 1 (L1) subfamilies were the most abundant DNA transposons, LTRs, and long interspersed elements (LINEs), respectively. Finally, we carried out a gene ontology analysis to determine the function of TE-fused genes. These results provide some genomic information about TEs that is useful for future research on changes in properties and functions of genes by TEs in the olive flounder genome.",2018-03-09 +27794551,HIPPIE v2.0: enhancing meaningfulness and reliability of protein-protein interaction networks.,"The increasing number of experimentally detected interactions between proteins makes it difficult for researchers to extract the interactions relevant for specific biological processes or diseases. This makes it necessary to accompany the large-scale detection of protein-protein interactions (PPIs) with strategies and tools to generate meaningful PPI subnetworks. To this end, we generated the Human Integrated Protein-Protein Interaction rEference or HIPPIE (http://cbdm.uni-mainz.de/hippie/). HIPPIE is a one-stop resource for the generation and interpretation of PPI networks relevant to a specific research question. We provide means to generate highly reliable, context-specific PPI networks and to make sense out of them. We just released the second major update of HIPPIE, implementing various new features. HIPPIE grew substantially over the last years and now contains more than 270 000 confidence scored and annotated PPIs. We integrated different types of experimental information for the confidence scoring and the construction of context-specific networks. We implemented basic graph algorithms that highlight important proteins and interactions. HIPPIE's graphical interface implements several ways for wet lab and computational scientists alike to access the PPI data.",2016-10-24 +32123502,The FrogID dataset: expert-validated occurrence records of Australia's frogs collected by citizen scientists.,"This dataset represents expert-validated occurrence records of calling frogs across Australia collected via the national citizen science project FrogID (http://www.frogid.net.au). FrogID relies on participants recording calling frogs using smartphone technology, after which point the frogs are identified by expert validators, resulting in a database of georeferenced frog species records. This dataset represents one full year of the project (10 November 2017-9 November 2018), including 54,864 records of 172 species, 71% of the known frog species in Australia. This is the first instalment of the dataset, and we anticipate providing updated datasets on an annual basis.",2020-02-17 +32081774,Discovery and development of safe-in-man broad-spectrum antiviral agents.,"Viral diseases are one of the leading causes of morbidity and mortality in the world. Virus-specific vaccines and antiviral drugs are the most powerful tools to combat viral diseases. However, broad-spectrum antiviral agents (BSAAs, i.e. compounds targeting viruses belonging to two or more viral families) could provide additional protection of the general population from emerging and re-emerging viral diseases, reinforcing the arsenal of available antiviral options. Here, we review discovery and development of BSAAs and summarize the information on 120 safe-in-man agents in a freely accessible database (https://drugvirus.info/). Future and ongoing pre-clinical and clinical studies will increase the number of BSAAs, expand the spectrum of their indications, and identify drug combinations for treatment of emerging and re-emerging viral infections as well as co-infections.",2020-02-17 +32064572,Computational determination of human PPARG gene: SNPs and prediction of their effect on protein functions of diabetic patients.,"BACKGROUND:The Peroxisome proliferator-activated receptor gamma gene (PPARG), encodes a member of the peroxisome-activated receptor subfamily of nuclear receptors. PPARs form heterodimers with retinoid X receptors (RXRs) which regulate transcription of various genes. Three subtypes of PPARs are known: PPAR-alpha, PPAR-delta and PPAR-gamma. The protein encoded by this gene is PPAR-gamma which is a regulator of adipocyte differentiation. PPARG-gamma has been implicated in the pathology of numerous diseases including obesity, diabetes, atherosclerosis and cancer. AIM:This study aimed to perform insilico analysis to predict the effects that can be imposed by SNPs reported in PPARG gene. METHODOLOGY:This gene was investigated in NCBI database (http://www.ncbi.nlm.nih.gov/) during the year 2016 and the SNPs in coding region (exonal SNPs) that are non-synonymous (ns SNPs) were analyzed by computational softwares. SIFT, Polyphen, I-Mutant and PHD-SNP softwares). SIFT was used to filter the deleterious SNPs, Polyphen was used to determine the degree of pathogenicity, I-Mutant was used to determine the effect of mutation on protein stability while PHD-SNP software was used to investigate the effect of mutation on protein function. Furthermore, Structural and functional analysis of ns SNPs was also studied using Project HOPE software and modeling was conducted by Chimera. RESULTS:A total of 34,035 SNPs from NCBI, were found, 21,235 of them were found in Homo sapiens, 134 in coding non synonymous (missense) and 89 were synonymous. Only SNPs present in coding regions were selected for analysis. Out of 12 deleterious SNPs sorted by SIFT, 10 were predicted by Polyphen to be probably damaging with PISC score = 1 and only two were benign. All these 10 double positive SNPs were disease related as predicted by PHD-SNPs and revealed decreased stability indicated by I-Mutant. CONCLUSION:Based on the findings of this study, it can be concluded that the deleterious ns SNPs (rs72551364 and rs121909244SNPs) of PPARG are important candidates for the cause of different types of human diseases including diabetes mellitus.",2020-02-17 +32207514,Cancer subtype classification and modeling by pathway attention and propagation.,"

Motivation

Biological pathway is an important curated knowledge of biological processes. Thus, cancer subtype classification based on pathways will be very useful to understand differences in biological mechanisms among cancer subtypes. However, pathways include only a fraction of the entire gene set, only one-third of human genes in KEGG, and pathways are fragmented. For this reason, there are few computational methods to use pathways for cancer subtype classification.

Results

We present an explainable deep-learning model with attention mechanism and network propagation for cancer subtype classification. Each pathway is modeled by a graph convolutional network. Then, a multi-attention-based ensemble model combines several hundreds of pathways in an explainable manner. Lastly, network propagation on pathway-gene network explains why gene expression profiles in subtypes are different. In experiments with five TCGA cancer datasets, our method achieved very good classification accuracies and, additionally, identified subtype-specific pathways and biological functions.

Availability and implementation

The source code is available at http://biohealth.snu.ac.kr/software/GCN_MAE.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +31598630,Learning from the ligand: using ligand-based features to improve binding affinity prediction.,"

Motivation

Machine learning scoring functions for protein-ligand binding affinity prediction have been found to consistently outperform classical scoring functions. Structure-based scoring functions for universal affinity prediction typically use features describing interactions derived from the protein-ligand complex, with limited information about the chemical or topological properties of the ligand itself.

Results

We demonstrate that the performance of machine learning scoring functions are consistently improved by the inclusion of diverse ligand-based features. For example, a Random Forest (RF) combining the features of RF-Score v3 with RDKit molecular descriptors achieved Pearson correlation coefficients of up to 0.836, 0.780 and 0.821 on the PDBbind 2007, 2013 and 2016 core sets, respectively, compared to 0.790, 0.746 and 0.814 when using the features of RF-Score v3 alone. Excluding proteins and/or ligands that are similar to those in the test sets from the training set has a significant effect on scoring function performance, but does not remove the predictive power of ligand-based features. Furthermore a RF using only ligand-based features is predictive at a level similar to classical scoring functions and it appears to be predicting the mean binding affinity of a ligand for its protein targets.

Availability and implementation

Data and code to reproduce all the results are freely available at http://opig.stats.ox.ac.uk/resources.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +31588495,EvoEF2: accurate and fast energy function for computational protein design.,"

Motivation

The accuracy and success rate of de novo protein design remain limited, mainly due to the parameter over-fitting of current energy functions and their inability to discriminate incorrect designs from correct designs.

Results

We developed an extended energy function, EvoEF2, for efficient de novo protein sequence design, based on a previously proposed physical energy function, EvoEF. Remarkably, EvoEF2 recovered 32.5%, 47.9% and 22.3% of all, core and surface residues for 148 test monomers, and was generally applicable to protein-protein interaction design, as it recapitulated 30.9%, 42.4%, 31.3% and 21.4% of all, core, interface and surface residues for 88 test dimers, significantly outperforming EvoEF on the native sequence recapitulation. We further used I-TASSER to evaluate the foldability of the 148 designed monomer sequences, where all of them were predicted to fold into structures with high fold- and atomic-level similarity to their corresponding native structures, as demonstrated by the fact that 87.8% of the predicted structures shared a root-mean-square-deviation less than 2 Å to their native counterparts. The study also demonstrated that the usefulness of physical energy functions is highly correlated with the parameter optimization processes, and EvoEF2, with parameters optimized using sequence recapitulation, is more suitable for computational protein sequence design than EvoEF, which was optimized on thermodynamic mutation data.

Availability and implementation

The source code of EvoEF2 and the benchmark datasets are freely available at https://zhanglab.ccmb.med.umich.edu/EvoEF.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +31504182,Gene relevance based on multiple evidences in complex networks.,"

Motivation

Multi-omics approaches offer the opportunity to reconstruct a more complete picture of the molecular events associated with human diseases, but pose challenges in data analysis. Network-based methods for the analysis of multi-omics leverage the complex web of macromolecular interactions occurring within cells to extract significant patterns of molecular alterations. Existing network-based approaches typically address specific combinations of omics and are limited in terms of the number of layers that can be jointly analysed. In this study, we investigate the application of network diffusion to quantify gene relevance on the basis of multiple evidences (layers).

Results

We introduce a gene score (mND) that quantifies the relevance of a gene in a biological process taking into account the network proximity of the gene and its first neighbours to other altered genes. We show that mND has a better performance over existing methods in finding altered genes in network proximity in one or more layers. We also report good performances in recovering known cancer genes. The pipeline described in this article is broadly applicable, because it can handle different types of inputs: in addition to multi-omics datasets, datasets that are stratified in many classes (e.g., cell clusters emerging from single cell analyses) or a combination of the two scenarios.

Availability and implementation

The R package 'mND' is available at URL: https://www.itb.cnr.it/mnd.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +30105748,Automated Time Series Modeling for Piezometers in the National Database of the Netherlands.,"The Geological Survey of the Netherlands (TNO-GSN) maintains a public national database of groundwater head observations. Transfer function-noise modeling has been applied to the time series in order to extract the impulse response functions for precipitation and evaporation for each piezometer. An automated procedure has been developed to assess the quality of the time series and of the models. The time series models of sufficient quality offer far more homogeneous data on the piezometric head than the original measurements. This allows for improved mapping of the head at a specific date or of characteristics of the head like average summer or winter levels. Also, the separation of precipitation and evaporation from other influences is useful for groundwater management and policy. The individual time series models are available online with interactive graphics (https://www.grondwatertools.nl/grondwatertools-viewer). The spatial patterns of the impulse response function characteristics can support analyses of the groundwater system.",2018-09-11 +24243848,miRNEST 2.0: a database of plant and animal microRNAs.,"Ever growing interest in microRNAs has immensely populated the number of resources and research papers devoted to the field and, as a result, it becomes more and more demanding to find miRNA data of interest. To mitigate this problem, we created miRNEST database (http://mirnest.amu.edu.pl), an integrative microRNAs resource. In its updated version, named miRNEST 2.0, the database is complemented with our extensive miRNA predictions from deep sequencing libraries, data from plant degradome analyses, results of pre-miRNA classification with HuntMi and miRNA splice sites information. We also added download and upload options and improved the user interface to make it easier to browse through miRNA records.",2013-11-15 +32568733,Systems biology graphical notation markup language (SBGNML) version 0.3. ,"This document defines Version 0.3 Markup Language (ML) support for the Systems Biology Graphical Notation (SBGN), a set of three complementary visual languages developed for biochemists, modelers, and computer scientists. SBGN aims at representing networks of biochemical interactions in a standard, unambiguous way to foster efficient and accurate representation, visualization, storage, exchange, and reuse of information on all kinds of biological knowledge, from gene regulation, to metabolism, to cellular signaling. SBGN is defined neutrally to programming languages and software encoding; however, it is oriented primarily towards allowing models to be encoded using XML, the eXtensible Markup Language. The notable changes from the previous version include the addition of attributes for better specify metadata about maps, as well as support for multiple maps, sub-maps, colors, and annotations. These changes enable a more efficient exchange of data to other commonly used systems biology formats (e. g., BioPAX and SBML) and between tools supporting SBGN (e. g., CellDesigner, Newt, Krayon, SBGN-ED, STON, cd2sbgnml, and MINERVA). More details on SBGN and related software are available at http://sbgn.org. With this effort, we hope to increase the adoption of SBGN in bioinformatics tools, ultimately enabling more researchers to visualize biological knowledge in a precise and unambiguous manner.",2020-06-22 +29106613,3DIV: A 3D-genome Interaction Viewer and database.,"Three-dimensional (3D) chromatin structure is an emerging paradigm for understanding gene regulation mechanisms. Hi-C (high-throughput chromatin conformation capture), a method to detect long-range chromatin interactions, allows extensive genome-wide investigation of 3D chromatin structure. However, broad application of Hi-C data have been hindered by the level of complexity in processing Hi-C data and the large size of raw sequencing data. In order to overcome these limitations, we constructed a database named 3DIV (a 3D-genome Interaction Viewer and database) that provides a list of long-range chromatin interaction partners for the queried locus with genomic and epigenomic annotations. 3DIV is the first of its kind to collect all publicly available human Hi-C data to provide 66 billion uniformly processed raw Hi-C read pairs obtained from 80 different human cell/tissue types. In contrast to other databases, 3DIV uniquely provides normalized chromatin interaction frequencies against genomic distance dependent background signals and a dynamic browsing visualization tool for the listed interactions, which could greatly advance the interpretation of chromatin interactions. '3DIV' is available at http://kobic.kr/3div.",2018-01-01 +26424082,RegNetwork: an integrated database of transcriptional and post-transcriptional regulatory networks in human and mouse. ,"Transcriptional and post-transcriptional regulation of gene expression is of fundamental importance to numerous biological processes. Nowadays, an increasing amount of gene regulatory relationships have been documented in various databases and literature. However, to more efficiently exploit such knowledge for biomedical research and applications, it is necessary to construct a genome-wide regulatory network database to integrate the information on gene regulatory relationships that are widely scattered in many different places. Therefore, in this work, we build a knowledge-based database, named 'RegNetwork', of gene regulatory networks for human and mouse by collecting and integrating the documented regulatory interactions among transcription factors (TFs), microRNAs (miRNAs) and target genes from 25 selected databases. Moreover, we also inferred and incorporated potential regulatory relationships based on transcription factor binding site (TFBS) motifs into RegNetwork. As a result, RegNetwork contains a comprehensive set of experimentally observed or predicted transcriptional and post-transcriptional regulatory relationships, and the database framework is flexibly designed for potential extensions to include gene regulatory networks for other organisms in the future. Based on RegNetwork, we characterized the statistical and topological properties of genome-wide regulatory networks for human and mouse, we also extracted and interpreted simple yet important network motifs that involve the interplays between TF-miRNA and their targets. In summary, RegNetwork provides an integrated resource on the prior information for gene regulatory relationships, and it enables us to further investigate context-specific transcriptional and post-transcriptional regulatory interactions based on domain-specific experimental data. Database URL: http://www.regnetworkweb.org.",2015-09-30 +29722965,Database for CO2 Separation Performances of MOFs Based on Computational Materials Screening.,"Metal-organic frameworks (MOFs) are potential adsorbents for CO2 capture. Because thousands of MOFs exist, computational studies become very useful in identifying the top performing materials for target applications in a time-effective manner. In this study, molecular simulations were performed to screen the MOF database to identify the best materials for CO2 separation from flue gas (CO2/N2) and landfill gas (CO2/CH4) under realistic operating conditions. We validated the accuracy of our computational approach by comparing the simulation results for the CO2 uptakes, CO2/N2 and CO2/CH4 selectivities of various types of MOFs with the available experimental data. Binary CO2/N2 and CO2/CH4 mixture adsorption data were then calculated for the entire MOF database. These data were then used to predict selectivity, working capacity, regenerability, and separation potential of MOFs. The top performing MOF adsorbents that can separate CO2/N2 and CO2/CH4 with high performance were identified. Molecular simulations for the adsorption of a ternary CO2/N2/CH4 mixture were performed for these top materials to provide a more realistic performance assessment of MOF adsorbents. The structure-performance analysis showed that MOFs with Δ Qst0 > 30 kJ/mol, 3.8 Å < pore-limiting diameter < 5 Å, 5 Å < largest cavity diameter < 7.5 Å, 0.5 < ϕ < 0.75, surface area < 1000 m2/g, and ρ > 1 g/cm3 are the best candidates for selective separation of CO2 from flue gas and landfill gas. This information will be very useful to design novel MOFs exhibiting high CO2 separation potentials. Finally, an online, freely accessible database https://cosmoserc.ku.edu.tr was established, for the first time in the literature, which reports all of the computed adsorbent metrics of 3816 MOFs for CO2/N2, CO2/CH4, and CO2/N2/CH4 separations in addition to various structural properties of MOFs.",2018-05-14 +30576486,PhytoTypeDB: a database of plant protein inter-cultivar variability and function. ,"Despite a fast-growing number of available plant genomes, available computational resources are poorly integrated and provide only limited access to the underlying data. Most existing databases focus on DNA/RNA data or specific gene families, with less emphasis on protein structure, function and variability. In particular, despite the economic importance of many plant accessions, there are no straightforward ways to retrieve or visualize information on their differences. To fill this gap, we developed PhytoTypeDB (http://phytotypedb.bio.unipd.it/), a scalable database containing plant protein annotations and genetic variants from resequencing of different accessions. The database content is generated by an integrated pipeline, exploiting state-of-the-art methods for protein characterization requiring only the proteome reference sequence and variant calling files. Protein names for unknown proteins are inferred by homology for over 95% of the entries. Single-nucleotide variants are visualized along with protein annotation in a user-friendly web interface. The server offers an effective querying system, which allows to compare variability among different species and accessions, to generate custom data sets based on shared functional features or to perform sequence searches. A documented set of exposed RESTful endpoints make the data accessible programmatically by third-party clients.",2018-01-01 +,"PFR2: a curated database of planktonic foraminifera 18S ribosomal DNA as a resource for studies of plankton ecology, biogeography and evolution","Planktonic foraminifera (Rhizaria) are ubiquitous marine pelagic protists producing calcareous shells with conspicuous morphology. They play an important role in the marine carbon cycle, and their exceptional fossil record serves as the basis for biochronostratigraphy and past climate reconstructions. A major worldwide sampling effort over the last two decades has resulted in the establishment of multiple large collections of cryopreserved individual planktonic foraminifera samples. Thousands of 18S rDNA partial sequences have been generated, representing all major known morphological taxa across their worldwide oceanic range. This comprehensive data coverage provides an opportunity to assess patterns of molecular ecology and evolution in a holistic way for an entire group of planktonic protists. We combined all available published and unpublished genetic data to build PFR², the Planktonic foraminifera Ribosomal Reference database. The first version of the database includes 3322 reference 18S rDNA sequences belonging to 32 of the 47 known morphospecies of extant planktonic foraminifera, collected from 460 oceanic stations. All sequences have been rigorously taxonomically curated using a six‐rank annotation system fully resolved to the morphological species level and linked to a series of metadata. The PFR² website, available at http://pfr2.sb-roscoff.fr, allows downloading the entire database or specific sections, as well as the identification of new planktonic foraminiferal sequences. Its novel, fully documented curation process integrates advances in morphological and molecular taxonomy. It allows for an increase in its taxonomic resolution and assures that integrity is maintained by including a complete contingency tracking of annotations and assuring that the annotations remain internally consistent.",2015-11-01 +30195755,Parallel Genome-wide Profiling of Coding and Non-coding RNAs to Identify Novel Regulatory Elements in Embryonic and Maturated Heart.,"Heart development is a complex process, tightly regulated by numerous molecular mechanisms. Key components of the regulatory network underlying heart development are transcription factors (TFs) and microRNAs (miRNAs), yet limited investigation of the role of miRNAs in heart development has taken place. Here, we report the first parallel genome-wide profiling of polyadenylated RNAs and miRNAs in a developing murine heart. These data enable us to identify dynamic activation or repression of numerous biological processes and signaling pathways. More than 200 miRNAs and 25 long non-coding RNAs were differentially expressed during embryonic heart development compared to the mature heart; most of these had not been previously associated with cardiogenesis. Integrative analysis of expression data and potential regulatory interactions suggested 28 miRNAs as novel regulators of embryonic heart development, representing a considerable expansion of the current repertoire of known cardiac miRNAs. To facilitate follow-up investigations, we constructed HeartMiR (http://heartmir.sysbiolab.eu), an open access database and interactive visualization tool for the study of gene regulation by miRNAs during heart development.",2018-05-04 +27829364,LSCplus: a fast solution for improving long read accuracy by short read alignment.,"

Background

The single molecule, real time (SMRT) sequencing technology of Pacific Biosciences enables the acquisition of transcripts from end to end due to its ability to produce extraordinarily long reads (>10 kb). This new method of transcriptome sequencing has been applied to several projects on humans and model organisms. However, the raw data from SMRT sequencing are of relatively low quality, with a random error rate of approximately 15 %, for which error correction using next-generation sequencing (NGS) short reads is typically necessary. Few tools have been designed that apply a hybrid sequencing approach that combines NGS and SMRT data, and the most popular existing tool for error correction, LSC, has computing resource requirements that are too intensive for most laboratory and research groups. These shortcomings severely limit the application of SMRT long reads for transcriptome analysis.

Results

Here, we report an improved tool (LSCplus) for error correction with the LSC program as a reference. LSCplus overcomes the disadvantage of LSC's time consumption and improves quality. Only 1/3-1/4 of the time and 1/20-1/25 of the error correction time is required using LSCplus compared with that required for using LSC.

Conclusions

LSCplus is freely available at http://www.herbbol.org:8001/lscplus/ . Sample calculations are provided illustrating the precision and efficiency of this method regarding error correction and isoform detection.",2016-11-09 +29322938,MDD-carb: a combinatorial model for the identification of protein carbonylation sites with substrate motifs.,"BACKGROUND:Carbonylation, which takes place through oxidation of reactive oxygen species (ROS) on specific residues, is an irreversibly oxidative modification of proteins. It has been reported that the carbonylation is related to a number of metabolic or aging diseases including diabetes, chronic lung disease, Parkinson's disease, and Alzheimer's disease. Due to the lack of computational methods dedicated to exploring motif signatures of protein carbonylation sites, we were motivated to exploit an iterative statistical method to characterize and identify carbonylated sites with motif signatures. RESULTS:By manually curating experimental data from research articles, we obtained 332, 144, 135, and 140 verified substrate sites for K (lysine), R (arginine), T (threonine), and P (proline) residues, respectively, from 241 carbonylated proteins. In order to examine the informative attributes for classifying between carbonylated and non-carbonylated sites, multifarious features including composition of twenty amino acids (AAC), composition of amino acid pairs (AAPC), position-specific scoring matrix (PSSM), and positional weighted matrix (PWM) were investigated in this study. Additionally, in an attempt to explore the motif signatures of carbonylation sites, an iterative statistical method was adopted to detect statistically significant dependencies of amino acid compositions between specific positions around substrate sites. Profile hidden Markov model (HMM) was then utilized to train a predictive model from each motif signature. Moreover, based on the method of support vector machine (SVM), we adopted it to construct an integrative model by combining the values of bit scores obtained from profile HMMs. The combinatorial model could provide an enhanced performance with evenly predictive sensitivity and specificity in the evaluation of cross-validation and independent testing. CONCLUSION:This study provides a new scheme for exploring potential motif signatures at substrate sites of protein carbonylation. The usefulness of the revealed motifs in the identification of carbonylated sites is demonstrated by their effective performance in cross-validation and independent testing. Finally, these substrate motifs were adopted to build an available online resource (MDD-Carb, http://csb.cse.yzu.edu.tw/MDDCarb/ ) and are also anticipated to facilitate the study of large-scale carbonylated proteomes.",2017-12-21 +32427317,ARTS 2.0: feature updates and expansion of the Antibiotic Resistant Target Seeker for comparative genome mining.,"Multi-drug resistant pathogens have become a major threat to human health and new antibiotics are urgently needed. Most antibiotics are derived from secondary metabolites produced by bacteria. In order to avoid suicide, these bacteria usually encode resistance genes, in some cases within the biosynthetic gene cluster (BGC) of the respective antibiotic compound. Modern genome mining tools enable researchers to computationally detect and predict BGCs that encode the biosynthesis of secondary metabolites. The major challenge now is the prioritization of the most promising BGCs encoding antibiotics with novel modes of action. A recently developed target-directed genome mining approach allows researchers to predict the mode of action of the encoded compound of an uncharacterized BGC based on the presence of resistant target genes. In 2017, we introduced the 'Antibiotic Resistant Target Seeker' (ARTS). ARTS allows for specific and efficient genome mining for antibiotics with interesting and novel targets by rapidly linking housekeeping and known resistance genes to BGC proximity, duplication and horizontal gene transfer (HGT) events. Here, we present ARTS 2.0 available at http://arts.ziemertlab.com. ARTS 2.0 now includes options for automated target directed genome mining in all bacterial taxa as well as metagenomic data. Furthermore, it enables comparison of similar BGCs from different genomes and their putative resistance genes.",2020-07-01 +31701156,The Monarch Initiative in 2019: an integrative data and analytic platform connecting phenotypes to genotypes across species.,"In biology and biomedicine, relating phenotypic outcomes with genetic variation and environmental factors remains a challenge: patient phenotypes may not match known diseases, candidate variants may be in genes that haven't been characterized, research organisms may not recapitulate human or veterinary diseases, environmental factors affecting disease outcomes are unknown or undocumented, and many resources must be queried to find potentially significant phenotypic associations. The Monarch Initiative (https://monarchinitiative.org) integrates information on genes, variants, genotypes, phenotypes and diseases in a variety of species, and allows powerful ontology-based search. We develop many widely adopted ontologies that together enable sophisticated computational analysis, mechanistic discovery and diagnostics of Mendelian diseases. Our algorithms and tools are widely used to identify animal models of human disease through phenotypic similarity, for differential diagnostics and to facilitate translational research. Launched in 2015, Monarch has grown with regards to data (new organisms, more sources, better modeling); new API and standards; ontologies (new Mondo unified disease ontology, improvements to ontologies such as HPO and uPheno); user interface (a redesigned website); and community development. Monarch data, algorithms and tools are being used and extended by resources such as GA4GH and NCATS Translator, among others, to aid mechanistic discovery and diagnostics.",2020-01-01 +28137767,SmProt: a database of small proteins encoded by annotated coding and non-coding RNA loci.,"Small proteins is the general term for proteins with length shorter than 100 amino acids. Identification and functional studies of small proteins have advanced rapidly in recent years, and several studies have shown that small proteins play important roles in diverse functions including development, muscle contraction and DNA repair. Identification and characterization of previously unrecognized small proteins may contribute in important ways to cell biology and human health. Current databases are generally somewhat deficient in that they have either not collected small proteins systematically, or contain only predictions of small proteins in a limited number of tissues and species. Here, we present a specifically designed web-accessible database, small proteins database (SmProt, http://bioinfo.ibp.ac.cn/SmProt), which is a database documenting small proteins. The current release of SmProt incorporates 255 010 small proteins computationally or experimentally identified in 291 cell lines/tissues derived from eight popular species. The database provides a variety of data including basic information (sequence, location, gene name, organism, etc.) as well as specific information (experiment, function, disease type, etc.). To facilitate data extraction, SmProt supports multiple search options, including species, genome location, gene name and their aliases, cell lines/tissues, ORF type, gene type, PubMed ID and SmProt ID. SmProt also incorporates a service for the BLAST alignment search and provides a local UCSC Genome Browser. Additionally, SmProt defines a high-confidence set of small proteins and predicts the functions of the small proteins.",2018-07-01 +26340938,Beyond knockouts: the International Knockout Mouse Consortium delivers modular and evolving tools for investigating mammalian genes.,"The International Knockout Mouse Consortium (IKMC; http://www.mousephenotype.org ) has generated mutations in almost every protein-coding mouse gene and is completing the companion Cre driver resource to expand tissue-specific conditional mutagenesis. Accordingly, the IKMC has carried out high-throughput gene trapping and targeting producing conditional mutations in murine embryonic stem cells in more than 18,500 genes, from which at least 4900 mutant mouse lines have been established to date. This resource is currently being upgraded with more powerful tools, such as visualization and manipulation cassettes that can be easily introduced into IKMC alleles for multifaceted functional studies. In addition, we discuss how existing IKMC products can be used in combination with CRISPR technology to accelerate genome engineering projects. All information and materials from this extraordinary biological resource together with coordinated phenotyping efforts can be retrieved at www.mousephenotype.org . The comprehensive IKMC knockout resource in combination with an extensive set of modular gene cassettes will continue to enhance functional gene annotation in the future and solidify its impact on biomedical research.",2015-09-04 +29218917,GeneDive: A gene interaction search and visualization tool to facilitate precision medicine.,"Obtaining relevant information about gene interactions is critical for understanding disease processes and treatment. With the rise in text mining approaches, the volume of such biomedical data is rapidly increasing, thereby creating a new problem for the users of this data: information overload. A tool for efficient querying and visualization of biomedical data that helps researchers understand the underlying biological mechanisms for diseases and drug responses, and ultimately helps patients, is sorely needed. To this end we have developed GeneDive, a web-based information retrieval, filtering, and visualization tool for large volumes of gene interaction data. GeneDive offers various features and modalities that guide the user through the search process to efficiently reach the information of their interest. GeneDive currently processes over three million gene-gene interactions with response times within a few seconds. For over half of the curated gene sets sourced from four prominent databases, more than 80% of the gene set members are recovered by GeneDive. In the near future, GeneDive will seamlessly accommodate other interaction types, such as gene-drug and gene-disease interactions, thus enabling full exploration of topics such as precision medicine. The GeneDive application and information about its underlying system architecture are available at http://www.genedive.net.",2018-01-01 +,Molecular phylogenetics and piercer evolution in the bug‐killing flies (Diptera: Tachinidae: Phasiinae),"Phasiinae (Diptera: Tachinidae) are endoparasitoid flies that attack Heteroptera, including a multitude of agricultural pests. A phylogenetically informed classification of Phasiinae has eluded systematists for over a century, primarily because of the conflicting character states and confusing morphology of certain taxa that indicate potential placement within other subfamilies. The unstable nature of phasiine taxonomy discourages important research into their classification, life history and potential use in biological control. In hopes of resolving several longstanding taxonomic debates and encouraging future research into this important group of parasitoids, the first molecular systematic analysis of Phasiinae is presented, including 128 worldwide taxa (80 genera) and approximately 7.6 kb of nuclear data representing four genes. Special emphasis is placed on the resolution of taxonomically ambiguous groups. The resulting robustly supported phylogenetic trees [maximum‐likelihood (ML)/Bayesian] were used to trace the evolution of significant adaptive traits within Tachinidae and test hypotheses about the classification of Phasiinae. Subfamily placements of certain taxa are confidently resolved including Eutherini, Epigrimyiini, Litophasia Girschner within Dexiinae, and Strongygastrini and Parerigonini within Phasiinae. The members of tribe Phasiini are redistributed: Cistogaster Latreille, Clytiomya Rondani, Ectophasia Townsend, Eliozeta Rondani and Euclytia Townsend transferred to Gymnosomatini; Opesia Robineau‐Desvoidy to Strongygastrini; and Xysta Meigen to Xystini. Similarly, members of Parerigonini are treated as belonging to Parerigonini (Parerigone Brauer, Zambesomima Walker), Cylindromyiini (Australotachina Curran, Pygidimyia Crosskey, Neobrachelia Townsend) or new tribe Zitini (Zita Curran, Leverella Baranov). Penthosia van der Wulp is transferred from Cylindromyiini to Hermyini. Ancestral state reconstruction suggests that piercing structures used to insert eggs directly into host tissues have evolved separately in a number of groups, but have also been lost or reduced in several lineages. A single potentially unequivocal morphological synapomorphy of Phasiinae, an elongated medial plate of the hypandrium in males, is identified. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:8BE75122‐FC7C‐4809‐AAF7‐19575596EF78.",2018-01-01 +31348610,SecProMTB: Support Vector Machine-Based Classifier for Secretory Proteins Using Imbalanced Data Sets Applied to Mycobacterium tuberculosis.,"Secretory proteins of Mycobacterium tuberculosis have created more concern, given their dominant immunogenicity and role in pathogenesis. In view of expensive and time-consuming traditional biochemical experiments, an advanced support vector machine model named SecProMTB is constructed in this study and the proteins are identified by a bioinformatic approach. First, an improved pseudo-amino acid composition (PseAAC) algorithm is used to extract features from all entities. Second, a novel imbalanced-data strategy is proposed and adopted to divide the original data set into train set and test set. Third, to overcome the overfitting problem, feature-ranking algorithms are applied with an increment feature selection. Finally, the model is trained and optimized. Consequently, a model is obtained with an area under the curve of 0.862 and average accuracy of 86% in the independent test. For the convenience of users, SecProMTB and related data are openly accessible at http://server.malab.cn/SecProMTB/index.jsp.",2019-08-08 +31492109,Visualization methods for differential expression analysis.,"

Background

Despite the availability of many ready-made testing software, reliable detection of differentially expressed genes in RNA-seq data is not a trivial task. Even though the data collection is considered high-throughput, data analysis has intricacies that require careful human attention. Researchers should use modern data analysis techniques that incorporate visual feedback to verify the appropriateness of their models. While some RNA-seq packages provide static visualization tools, their capabilities should be expanded and their meaningfulness should be explicitly demonstrated to users.

Results

In this paper, we 1) introduce new interactive RNA-seq visualization tools, 2) compile a collection of examples that demonstrate to biologists why visualization should be an integral component of differential expression analysis. We use public RNA-seq datasets to show that our new visualization tools can detect normalization issues, differential expression designation problems, and common analysis errors. We also show that our new visualization tools can identify genes of interest in ways undetectable with models. Our R package ""bigPint"" includes the plotting tools introduced in this paper, many of which are unique additions to what is currently available. The ""bigPint"" website is located at https://lindsayrutter.github.io/bigPint and contains short vignette articles that introduce new users to our package, all written in reproducible code.

Conclusions

We emphasize that interactive graphics should be an indispensable component of modern RNA-seq analysis, which is currently not the case. This paper and its corresponding software aim to persuade 1) users to slightly modify their differential expression analyses by incorporating statistical graphics into their usual analysis pipelines, 2) developers to create additional complex and interactive plotting methods for RNA-seq data, possibly using lessons learned from our open-source codes. We hope our work will serve a small part in upgrading the RNA-seq analysis world into one that more wholistically extracts biological information using both models and visuals.",2019-09-06 +31269035,Using citizen science to expand the global map of landslides: Introducing the Cooperative Open Online Landslide Repository (COOLR).,"Robust inventories are vital for improving assessment of and response to deadly and costly landslide hazards. However, collecting landslide events in inventories is difficult at the global scale due to inconsistencies in or the absence of landslide reporting. Citizen science is a valuable opportunity for addressing some of these challenges. The new Cooperative Open Online Landslide Repository (COOLR) supplements data in a NASA-developed Global Landslide Catalog (GLC) with citizen science reports to build a more robust, publicly available global inventory. This manuscript introduces the COOLR project and its methods, evaluates the initial citizen science results from the first 13 months, and discusses future improvements to increase the global engagement with the project. The COOLR project (https://landslides.nasa.gov) contains Landslide Reporter, the first global citizen science project for landslides, and Landslide Viewer, a portal to visualize data from COOLR and other satellite and model products. From March 2018 to April 2019, 49 citizen scientists contributed 162 new landslide events to COOLR. These events spanned 37 countries in five continents. The initial results demonstrated that both expert and novice participants are contributing via Landslide Reporter. Citizen scientists are filling in data gaps through news sources in 11 different languages, in-person observations, and new landslide events occurring hundreds and thousands of kilometers away from any existing GLC data. The data is of sufficient accuracy to use in NASA susceptibility and hazard models. COOLR continues to expand as an open platform of landslide inventories with new data from citizen scientists, NASA scientists, and other landslide groups. Future work on the COOLR project will seek to increase participation and functionality of the platform as well as move towards collective post-disaster mapping.",2019-07-03 +31420835,Does Location of Practice or Religiosity Predict Negative Physician Attitudes or Beliefs Toward LGB+ Individuals?,"The purpose of this study is to extend the Sabin et al's. (Am J Public Health 105(9):1831-1841, 2015. https://doi.org/10.2105/AJPH.2015.302631) findings to examine the extent to which religiosity and/or geographic region is predictive of negative attitudes or beliefs toward lesbian, gay, bisexual, and asexual (LGB+) individuals. Secondary data from the Sexuality Implicit Association Test were analyzed. Data included only participants from 2013 to 2015 who identified ""Healthcare - Diagnosing and Treating Practitioners"" as their occupation (n = 1376). The results of a factorial ANOVA revealed significant group differences accounting for 22.4% of the variance in attitudes toward LGB+ individuals. Religiosity was a significant factor in determining negative attitudes toward LGB+ individuals. However, the study was underpowered (5.8%) to detect an effect of geographic location in determining negative attitudes toward LGB+ individuals. It is important to validate a tool that can adequately measure the common assumptions associated with both religion and geographic region. Additionally, medical educators need to learn how to recognize and address negative attitudes among their students.",2019-12-01 +,A NEIGHBOURHOODS AND DEMENTIA STUDY: WHAT IS IMPORTANT TO PEOPLE WITH DEMENTIA VERSUS TRIAL OUTCOMES,"Abstract Many systematic reviews of effectiveness of non-pharmacological interventions for people with dementia have highlighted the variability in the outcomes assessed. This prevents comparisons of effectiveness across studies. This study, embedded in the Neighbourhoods and Dementia programme http://www.neighbourhoodsanddementia.org/work-programme-summary/ seeks to create a core outcome set for use within intervention studies aimed at people living with dementia. The 4-phase study design includes: qualitative interviews/focus groups and literature review; Delphi survey; systematic review; and stated preference survey. This presentation focuses on Phase 1, comparing outcomes identified through the qualitative work with those measured in previous and ongoing intervention trials. Thirty-five interviews and four focus groups were conducted with people with dementia, care partners, health/social care professionals, policy makers, service commissioners and research leaders. Outcome measures were also extracted from 129 international intervention trials. The qualitative data were analysed using a thematic framework to identify outcomes considered important to people with dementia. There were key differences in the emphasis of the outcomes in the literature compared to the qualitative data, indicating that many trials may not be measuring what is important to people with dementia. For example, activities were assessed in terms of frequency within previous studies; however, the meaningfulness of activities rather than the frequency were highlighted as important in the interviews. This core outcome set will help to ensure that the outcomes measured in evaluations of interventions are those that are considered the most important to people living with dementia and will aide comparability and consistency in future studies.",2017-06-30 +31996136,TopoFilter: a MATLAB package for mechanistic model identification in systems biology.,"BACKGROUND:To develop mechanistic dynamic models in systems biology, one often needs to identify all (or minimal) representations of the biological processes that are consistent with experimental data, out of a potentially large set of hypothetical mechanisms. However, a simple enumeration of all alternatives becomes quickly intractable when the number of model parameters grows. Selecting appropriate dynamic models out of a large ensemble of models, taking the uncertainty in our biological knowledge and in the experimental data into account, is therefore a key current problem in systems biology. RESULTS:The TopoFilter package addresses this problem in a heuristic and automated fashion by implementing the previously described topological filtering method for Bayesian model selection. It includes a core heuristic for searching the space of submodels of a parametrized model, coupled with a sampling-based exploration of the parameter space. Recent developments of the method allow to balance exhaustiveness and speed of the model space search, to efficiently re-sample parameters, to parallelize the search, and to use custom scoring functions. We use a theoretical example to motivate these features and then demonstrate TopoFilter's applicability for a yeast signaling network with more than 250'000 possible model structures. CONCLUSIONS:TopoFilter is a flexible software framework that makes Bayesian model selection and reduction efficient and scalable to network models of a complexity that represents contemporary problems in, for example, cell signaling. TopoFilter is open-source, available under the GPL-3.0 license at https://gitlab.com/csb.ethz/TopoFilter. It includes installation instructions, a quickstart guide, a description of all package options, and multiple examples.",2020-01-29 +30053237,HDncRNA: a comprehensive database of non-coding RNAs associated with heart diseases.,"Heart diseases (HDs) represent a common group of diseases that involve the heart, a number of which are characterized by high morbidity and lethality. Recently, increasing evidence demonstrates diverse non-coding RNAs (ncRNAs) play critical roles in HDs. However, currently there lacks a systematic investigation of the association between HDs and ncRNAs. Here, we developed a Heart Disease-related Non-coding RNAs Database (HDncRNA), to curate the HDs-ncRNA associations from 3 different sources including 1904 published articles, 3 existing databases [the Human microRNA Disease Database (HMDD), miR2disease and lncRNAdisease] and 5 RNA-seq datasets. The HDs-ncRNA associations with experimental validations curated from these articles, HMDD, miR2disease and part of data from lncRNAdisease were 'direct evidence'. Relationships got from high-through data in lncRNAdisease and annotated differential expressed lncRNAs from RNA-seq data were defined as 'high-throughput associations'. Novel lncRNAs identified from RNA-seq data in HDs had least credibility and were defined as 'predicted associations'. Currently, the database contains 2304 HDs-ncRNA associations for 133 HDs in 6 species including human, mouse, rat, pig, calf and dog. The database also has the following features: (i) A user-friendly web interface for browsing and searching the data; (ii) a visualization tool to plot miRNA and lncRNA locations in the human and mouse genomes; (iii) information about neighboring genes of lncRNAs and (iv) links to some mainstream databases including miRbase, Ensemble and Fantom Cat for the annotated lncRNAs and miRNAs. In summary, HDncRNA provides an excellent platform for exploring HDs related ncRNAs.Database URL: http://hdncrna.cardiacdev.com.",2018-01-01 +23159828,Cohort profile: the Quebec Longitudinal Study of Kindergarten Children (QLSKC).,"The Quebec Longitudinal Study of Kindergarten Children (QLSKC) is an ongoing population-based prospective longitudinal study presently spanning ages 6-29 years, designed to study the prevalence, risk factors, development and consequences of behavioural and emotional problems during elementary school. Kindergarten boys and girls attending French-speaking public schools in the Canadian province of Quebec during the 1986-87 and 1987-88 school years were included in the cohort: 2000 children representative of the population and 1017 children exhibiting disruptive behaviour problems. To date, 12 waves of data have been collected, and three generations of participants have been involved in the study (i.e. the study child, his parents and the first child of the study child). Information on demographics, psycho-social and lifestyle factors, child and family member characteristics (physical and mental health), and outcomes such as psychiatric diagnoses, delinquency or school diploma were assessed during three important developmental stages (childhood, adolescence and early adulthood). Blood samples were also collected in early adulthood for genetic analyses. Information on publications, available data and access to data can be found on the following website (http://www.gripinfo.ca/Grip/Public/www/).",2012-11-18 +33554223,RANDOMIZE: A Web Server for Data Randomization.,"The microarray-based Illumina Infinium MethylationEpic BeadChip (Epic 850k) has become a useful and standard tool for epigenome wide deoxyribonucleic acid (DNA) methylation profiling. Data from this technology may suffer from batch effects due to improper handling of the samples during the plating process. Batch effects are a significant issue and can give rise to spurious and inaccurate results and reduction in power to detect real biological differences. Careful study design, such as randomizing the samples to uniformly distribute the samples across the factors responsible for batch effects, is crucial to address batch effects and other technical artifacts. Randomization helps to reduce the likelihood of bias and impact of difference among groups. This process of randomizing the samples can be a tedious, error-prone, and time-consuming task without a user-friendly and efficient tool. We present RANDOMIZE, a web-based application designed to perform randomization of relevant metadata to evenly distribute samples across the factors typically responsible for batch effects in DNA methylation microarrays, such as rows, chips and plates. We demonstrate that the tool is efficient, fast and easy to use. The tool is freely available online at https://coph-usf.shinyapps.io/RANDOMIZE/ and can be accessed using any web browser. Sample data and tutorial is also available with the tool.",2020-01-01 +25432968,MTGD: The Medicago truncatula genome database.,"Medicago truncatula, a close relative of alfalfa (Medicago sativa), is a model legume used for studying symbiotic nitrogen fixation, mycorrhizal interactions and legume genomics. J. Craig Venter Institute (JCVI; formerly TIGR) has been involved in M. truncatula genome sequencing and annotation since 2002 and has maintained a web-based resource providing data to the community for this entire period. The website (http://www.MedicagoGenome.org) has seen major updates in the past year, where it currently hosts the latest version of the genome (Mt4.0), associated data and legacy project information, presented to users via a rich set of open-source tools. A JBrowse-based genome browser interface exposes tracks for visualization. Mutant gene symbols originally assembled and curated by the Frugoli lab are now hosted at JCVI and tie into our community annotation interface, Medicago EuCAP (to be integrated soon with our implementation of WebApollo). Literature pertinent to M. truncatula is indexed and made searchable via the Textpresso search engine. The site also implements MedicMine, an instance of InterMine that offers interconnectivity with other plant 'mines' such as ThaleMine and PhytoMine, and other model organism databases (MODs). In addition to these new features, we continue to provide keyword- and locus identifier-based searches served via a Chado-backed Tripal Instance, a BLAST search interface and bulk downloads of data sets from the iPlant Data Store (iDS). Finally, we maintain an E-mail helpdesk, facilitated by a JIRA issue tracking system, where we receive and respond to questions about the website and requests for specific data sets from the community.",2014-11-28 +32973479,Nutil: A Pre- and Post-processing Toolbox for Histological Rodent Brain Section Images.,"With recent technological advances in microscopy and image acquisition of tissue sections, further developments of tools are required for viewing, transforming, and analyzing the ever-increasing amounts of high-resolution data produced. In the field of neuroscience, histological images of whole rodent brain sections are commonly used for investigating brain connections as well as cellular and molecular organization in the normal and diseased brain, but present a problem for the typical neuroscientist with no or limited programming experience in terms of the pre- and post-processing steps needed for analysis. To meet this need we have designed Nutil, an open access and stand-alone executable software that enables automated transformations, post-processing, and analyses of 2D section images using multi-core processing (OpenMP). The software is written in C++ for efficiency, and provides the user with a clean and easy graphical user interface for specifying the input and output parameters. Nutil currently contains four separate tools: (1) A transformation toolchain named ""Transform"" that allows for rotation, mirroring and scaling, resizing, and renaming of very large tiled tiff images. (2) ""TiffCreator"" enables the generation of tiled TIFF images from other image formats such as PNG and JPEG. (3) A ""Resize"" tool completes the preprocessing toolset and allows downscaling of PNG and JPEG images with output in PNG format. (4) The fourth tool is a post-processing method called ""Quantifier"" that enables the quantification of segmented objects in the context of regions defined by brain atlas maps generated with the QuickNII software based on a 3D reference atlas (mouse or rat). The output consists of a set of report files, point cloud coordinate files for visualization in reference atlas space, and reference atlas images superimposed with color-coded objects. The Nutil software is made available by the Human Brain Project (https://www.humanbrainproject.eu) at https://www.nitrc.org/projects/nutil/.",2020-08-21 +31066453,g:Profiler: a web server for functional enrichment analysis and conversions of gene lists (2019 update).,"Biological data analysis often deals with lists of genes arising from various studies. The g:Profiler toolset is widely used for finding biological categories enriched in gene lists, conversions between gene identifiers and mappings to their orthologs. The mission of g:Profiler is to provide a reliable service based on up-to-date high quality data in a convenient manner across many evidence types, identifier spaces and organisms. g:Profiler relies on Ensembl as a primary data source and follows their quarterly release cycle while updating the other data sources simultaneously. The current update provides a better user experience due to a modern responsive web interface, standardised API and libraries. The results are delivered through an interactive and configurable web design. Results can be downloaded as publication ready visualisations or delimited text files. In the current update we have extended the support to 467 species and strains, including vertebrates, plants, fungi, insects and parasites. By supporting user uploaded custom GMT files, g:Profiler is now capable of analysing data from any organism. All past releases are maintained for reproducibility and transparency. The 2019 update introduces an extensive technical rewrite making the services faster and more flexible. g:Profiler is freely available at https://biit.cs.ut.ee/gprofiler.",2019-07-01 +33246333,Impact of the polycarbonate strippers used in assisted reproduction techniques on embryonic development.,"

Study question

Do daily manipulations of preimplantation embryos with polycarbonate (PC)-made bisphenol A (BPA)-releasing strippers influence embryo development?

Summary answer

Compared to glass strippers, PC strippers enhance the blastocyst development rate but this does not seem to be BPA-related.

What is known already

PC strippers have been shown to release tiny amounts (around 0.5 ng/ml BPA) of BPA in routine human IVF procedures. A chronic exposure to BPA either in vivo or in vitro during the preimplantation period can impact post-implantation and post-natal development. BPA can act rapidly by binding to membrane receptors and inducing rapid non-genomic effects.

Study design, size, duration

This experimental study using mouse embryos had a balanced design and blinded evaluations of the endpoints.

Participants/materials, setting, methods

In vivo fertilized zygotes were obtained from outbred Swiss CD1 mice crossings after an ovarian stimulation. The zygotes were allocated to three daily handling conditions (HCs) and cultured until Day 4 in a single human commercial medium. Each day, the embryos were handled for 20 s either in a PC stripper (HC1) or in a glass stripper (HC2). In HC3, the embryos were pre-exposed to 0.5 ng/ml BPA before being handled for 20 s in a glass stripper. Handling operations were repeated on Days 1, 2 and 3. Embryo development was assessed blindly on Day 4. Expanded blastocysts were selected for a transcriptomic analysis using Agilent Sureprint G3 Mouse GE v2 microarrays and the retrotransposon LINE1-Orf2 expression was analysed using qRT-PCR, as a proxy for a global evaluation of the epigenetic status.

Main results and the role of chance

Compared to the embryos manipulated in HC2 (n = 243), those in HC1 (n = 228) developed significantly more often to the blastocyst stage (55 vs 46%; P < 0.05). It appears the effect of these PC strippers was not BPA-related because embryos pre-exposed to BPA (HC3, n = 230) showed no difference in the blastocyst rate when compared to HC2 (43 vs 46%). When analysing same-stage blastocysts, we noticed no difference in the embryo gene expression between the three HC groups.

Large scale data

https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE148868.

Limitations, reasons for caution

Our results using a mouse model designed to mimic human conditions (outbred strain, human commercial IVF dishes and a unique commercial human embryonic culture media) are reassuring since no gene was found to be differentially expressed, including LINE-1 genes, as a proxy for a global evaluation of the epigenetic status. However, no global epigenetic analysis of the genome has been performed. Furthermore, we did not evaluate post-implantation events, although BPA exposure during peri-conception could affect foeto-placental and post-natal development.

Wider implications of the findings

Based on the precautionary principle, several European countries banned the use of BPA in baby bottles and food packaging several years before European Agencies took an official position. The question of applying this principle to plastics in closed contact with human embryos is raised. Further studies are needed for a decision to be made.

Study funding/competing interest(s)

This study was supported by a grant from the Agence de Biomédecine (AOR 2016). The authors declare no competing interest.",2021-01-01 +31921624,OSbrca: A Web Server for Breast Cancer Prognostic Biomarker Investigation With Massive Data From Tens of Cohorts.,"Potential prognostic mRNA biomarkers are exploited to assist in the clinical management and treatment of breast cancer, which is the first life-threatening tumor in women worldwide. However, it is technically challenging for untrained researchers to process high dimensional profiling data to screen and validate the potential prognostic values of genes of interests in multiple cohorts. Our aim is to develop an easy-to-use web server to facilitate the screening, developing, and evaluating of prognostic biomarkers in breast cancers. Herein, we collected more than 7,400 cases of breast cancer with gene expression profiles and clinical follow-up information from The Cancer Genome Atlas and Gene Expression Omnibus data, and built an Online consensus Survival analysis web server for Breast Cancers, abbreviated OSbrca, to generate the Kaplan-Meier survival plot with a hazard ratio and log rank P-value for given genes in an interactive way. To examine the performance of OSbrca, the prognostic potency of 128 previously published biomarkers of breast cancer was reassessed in OSbrca. In conclusion, it is highly valuable for biologists and clinicians to perform the preliminary assessment and validation of novel or putative prognostic biomarkers for breast cancers. OSbrca could be accessed at http://bioinfo.henu.edu.cn/BRCA/BRCAList.jsp.",2019-12-20 +31765831,I3: A Self-organising Learning Workflow for Intuitive Integrative Interpretation of Complex Genetic Data.,"We propose a computational workflow (I3) for intuitive integrative interpretation of complex genetic data mainly building on the self-organising principle. We illustrate the use in interpreting genetics of gene expression and understanding genetic regulators of protein phenotypes, particularly in conjunction with information from human population genetics and/or evolutionary history of human genes. We reveal that loss-of-function intolerant genes tend to be depleted of tissue-sharing genetics of gene expression in brains, and if highly expressed, have broad effects on the protein phenotypes studied. We suggest that this workflow presents a general solution to the challenge of complex genetic data interpretation. I3 is available at http://suprahex.r-forge.r-project.org/I3.html.",2019-10-01 +31260377,"Effect Sizes in Single-Case Aphasia Studies: A Comparative, Autocorrelation-Oriented Analysis.","Purpose In single-case treatment studies, researchers may compare client performance during a baseline, nontreatment phase(s) to client performance during intervention phases. Autocorrelation in the data series gathered during such studies increases the likelihood that analysts will detect or fail to detect meaningful differences between baseline and treatment phase data. We examined the impact that autocorrelation has on 4 effect size calculation methods when these methods are applied to data generated by people with aphasia during anomia treatment studies. The effect sizes we selected were Busk and Serlin's d, Young's C, nonoverlap of all pairs, and Tau- U. We hypothesized that d and C would be influenced by autocorrelation, whereas nonoverlap of all pairs and Tau- U would not. Method We extracted 173 highly autocorrelated data series from published investigations of treatments for anomia. These data series were then ""cleansed"" of autocorrelation through the use of an autoregressive integrated moving average (ARIMA) process. The 4 effect size calculation methods were used to derive an effect size for each published and each corresponding ARIMA-tized data series. The published and ARIMA-tized effect sizes associated with each calculation method were then compared. Results For all of the 4 effect sizes, statistically significant differences existed between the published effect sizes and the ARIMA-tized effect sizes. Conclusions All 4 of the methods were influenced by autocorrelation. Further research that develops effect size calculation methods that are not influenced by autocorrelation will help to improve the quality of single-case studies. Supplemental Material https://doi.org/10.23641/asha.8298530.",2019-07-01 +29280960,MetaGOmics: A Web-Based Tool for Peptide-Centric Functional and Taxonomic Analysis of Metaproteomics Data. ,"Metaproteomics is the characterization of all proteins being expressed by a community of organisms in a complex biological sample at a single point in time. Applications of metaproteomics range from the comparative analysis of environmental samples (such as ocean water and soil) to microbiome data from multicellular organisms (such as the human gut). Metaproteomics research is often focused on the quantitative functional makeup of the metaproteome and which organisms are making those proteins. That is: What are the functions of the currently expressed proteins? How much of the metaproteome is associated with those functions? And, which microorganisms are expressing the proteins that perform those functions? However, traditional protein-centric functional analysis is greatly complicated by the large size, redundancy, and lack of biological annotations for the protein sequences in the database used to search the data. To help address these issues, we have developed an algorithm and web application (dubbed ""MetaGOmics"") that automates the quantitative functional (using Gene Ontology) and taxonomic analysis of metaproteomics data and subsequent visualization of the results. MetaGOmics is designed to overcome the shortcomings of traditional proteomics analysis when used with metaproteomics data. It is easy to use, requires minimal input, and fully automates most steps of the analysis-including comparing the functional makeup between samples. MetaGOmics is freely available at https://www.yeastrc.org/metagomics/.",2017-12-27 +32053711,In-silico simulated prototype-patients using TPMS technology to study a potential adverse effect of sacubitril and valsartan.,"Unveiling the mechanism of action of a drug is key to understand the benefits and adverse reactions of a medication in an organism. However, in complex diseases such as heart diseases there is not a unique mechanism of action but a wide range of different responses depending on the patient. Exploring this collection of mechanisms is one of the clues for a future personalized medicine. The Therapeutic Performance Mapping System (TPMS) is a Systems Biology approach that generates multiple models of the mechanism of action of a drug. Each molecular mechanism generated could be associated to particular individuals, here defined as prototype-patients, hence the generation of models using TPMS technology may be used for detecting adverse effects to specific patients. TPMS operates by (1) modelling the responses in humans with an accurate description of a protein network and (2) applying a Multilayer Perceptron-like and sampling strategy to find all plausible solutions. In the present study, TPMS is applied to explore the diversity of mechanisms of action of the drug combination sacubitril/valsartan. We use TPMS to generate a wide range of models explaining the relationship between sacubitril/valsartan and heart failure (the indication), as well as evaluating their association with macular degeneration (a potential adverse effect). Among the models generated, we identify a set of mechanisms of action associated to a better response in terms of heart failure treatment, which could also be associated to macular degeneration development. Finally, a set of 30 potential biomarkers are proposed to identify mechanisms (or prototype-patients) more prone of suffering macular degeneration when presenting good heart failure response. All prototype-patients models generated are completely theoretical and therefore they do not necessarily involve clinical effects in real patients. Data and accession to software are available at http://sbi.upf.edu/data/tpms/.",2020-02-13 +31912599,OSpaad: An online tool to perform survival analysis by integrating gene expression profiling and long-term follow-up data of 1319 pancreatic carcinoma patients.,"Pancreatic carcinoma (PC) is a type of highly lethal malignant tumor that has unfavorable outcomes. One major challenge in improving clinical outcomes is to identify novel biomarkers for prognosis. In this study, we developed an online consensus survival tool for pancreatic adenocarcinoma (OSpaad), which allows researchers and clinicians to analyze the prognostic value of selected genes in PC. OSpaad contains 1319 unique PC cases that have both gene expression data and correspondent clinical data from seven individual cohorts and provides four survival terms including overall survival, disease-specific survival, disease-free interval, progression-free interval for prognosis evaluation. To meet the different research needs, OSpaad allows users to limit survival analysis in subgroups by selecting different terms of clinical confounding factors such as TNM stage, sex, smoking time, lymph invasion, and race. Moreover, we showed that 97% (116 out of 120) previously reported prognostic biomarkers, including ERBB2, TP53, EGFR and so forth, were validated and confirmed their prognostic significance in OSpaad, demonstrating the well performance of survival analysis in OSpaad. OSpaad is a user-friendly online tool with a straightforward interface allowing clinicians and basic research scientists with even a limited bioinformatics background to easily screen and evaluate the prognostic value of genes in a large PC cohort. This online tool can be accessed at http://bioinfo.henu.edu.cn/PAAD/PAADList.jsp.",2020-01-08 +27273672,Predicting regulatory variants with composite statistic.,"

Motivation

Prediction and prioritization of human non-coding regulatory variants is critical for understanding the regulatory mechanisms of disease pathogenesis and promoting personalized medicine. Existing tools utilize functional genomics data and evolutionary information to evaluate the pathogenicity or regulatory functions of non-coding variants. However, different algorithms lead to inconsistent and even conflicting predictions. Combining multiple methods may increase accuracy in regulatory variant prediction.

Results

Here, we compiled an integrative resource for predictions from eight different tools on functional annotation of non-coding variants. We further developed a composite strategy to integrate multiple predictions and computed the composite likelihood of a given variant being regulatory variant. Benchmarked by multiple independent causal variants datasets, we demonstrated that our composite model significantly improves the prediction performance.

Availability and implementation

We implemented our model and scoring procedure as a tool, named PRVCS, which is freely available to academic and non-profit usage at http://jjwanglab.org/PRVCS CONTACT: wang.junwen@mayo.edu, jliu@stat.harvard.edu, or limx54@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-06 +33189894,Effects of purposeful soccer heading on circulating small extracellular vesicle concentration and cargo.,"

Background

Considering the potential cumulative effects of repetitive head impact (HI) exposure, we need sensitive biomarkers to track short- and long-term effects. Circulating small extracellular vesicles (sEVs) (<200 nm) traffic biological molecules throughout the body and may have diagnostic value as biomarkers for disease. The purpose of this study was to identify the microRNA (miRNA) profile in circulating sEVs derived from human plasma following repetitive HI exposure.

Methods

Healthy adult (aged 18-35 years) soccer players were randomly assigned to one of 3 groups: the HI group performed 10 standing headers, the leg impact group performed 10 soccer ball trapping maneuvers over 10 min, and the control group did not participate in any soccer drills. Plasma was collected before testing and 24 h afterward, and sEVs were isolated and characterized via nanoparticle tracking analysis. Next-generation sequencing was utilized to identify candidate miRNAs isolated from sEVs, and candidate microRNAs were analyzed via quantitative polymerase chain reaction. In silico target prediction was performed using TargetScan (Version 7.0; targetscan.org) and miRWalk (http://mirwalk.umm.uni-heidelberg.de/) programs, and target validation was performed using luciferase reporter vectors with a miR-7844-5p mimic in human embryonic kidney (HEK) 293T/17 cells.

Results

Plasma sEV concentration and size were not affected across time and group following repetitive HI exposure. After 24 h, the HI read count from next-generation sequencing showed a 4-fold or greater increase in miR-92b-5p, miR-423-5p, and miR-24-3p and a 3-fold or greater decrease in miR-7844-5p, miR-144-5p, miR-221-5p, and miR-22-3p. Analysis of quantitative polymerase chain reaction revealed that leg impact did not alter the candidate miRNA levels. To our knowledge, miR-7844-5p is a previously unknown miRNA. We identified 8 miR-7844-5p mRNA targets: protein phosphatase 1 regulatory inhibitor subunit 1B (PPP1R1B), LIM and senescent cell antigen-like domains 1 (LIMS1), autophagy-related 12 (ATG12), microtubule-associated protein 1 light chain 3 beta (MAP1LC3B), integrin subunit alpha-1 (ITGA1), mitogen-activated protein kinase 1 (MAPK1), glycogen synthase kinase 3β (GSK3β), and mitogen-activated protein kinase 8 (MAPK8).

Conclusion

Collectively, these data indicate repetitive HI exposure alters plasma sEV miRNA content, but not sEV size or number. Furthermore, for the first time we demonstrate that previously unknown miR-7844-5p targets mRNAs known to be involved in mitochondrial apoptosis, autophagy regulation, mood disorders, and neurodegenerative disease.",2020-11-12 +23989082,PhenDisco: phenotype discovery system for the database of genotypes and phenotypes.,"The database of genotypes and phenotypes (dbGaP) developed by the National Center for Biotechnology Information (NCBI) is a resource that contains information on various genome-wide association studies (GWAS) and is currently available via NCBI's dbGaP Entrez interface. The database is an important resource, providing GWAS data that can be used for new exploratory research or cross-study validation by authorized users. However, finding studies relevant to a particular phenotype of interest is challenging, as phenotype information is presented in a non-standardized way. To address this issue, we developed PhenDisco (phenotype discoverer), a new information retrieval system for dbGaP. PhenDisco consists of two main components: (1) text processing tools that standardize phenotype variables and study metadata, and (2) information retrieval tools that support queries from users and return ranked results. In a preliminary comparison involving 18 search scenarios, PhenDisco showed promising performance for both unranked and ranked search comparisons with dbGaP's search engine Entrez. The system can be accessed at http://pfindr.net.",2013-08-29 +,Total evidence phylogenetic analysis and reclassification of Euschistus Dallas within Carpocorini (Hemiptera: Pentatomidae: Pentatominae),"Robust phylogenetic hypotheses have become key for studies addressing the evolutionary biology and ecology of various groups of organisms. In the species‐rich heteropteran superfamily Pentatomoidea, phylogenies at lower taxonomic levels are still scarce and mostly employ exclusively morphological data. In this study, we conducted a total evidence phylogeny focusing on the tribe Carpocorini (Pentatomidae), using morphological data and four DNA markers (COI, Cytb, 16S and 28S rDNA; ∼2330 bp; 32 taxa) in order to investigate the relationships within Euschistus Dallas, one of the most speciose pentatomid genera, and between Euschistus and related genera. Our hypotheses generated by maximum likelihood and Bayesian inference show that the current taxonomic composition and classification of Euschistus and allied genera are in need of revision. Euschistus was recovered as nonmonophyletic, with the subgenera forming four independent lineages: Euschistus (Euschistus) and Euschistus (Lycipta) Stål are sister groups; Euschistus (Euschistomorphus) Jensen‐Haarup is more closely related to Dichelops Spinola and Agroecus Dallas; and Mitripus Rolston is divided into two clades closely related to Sibaria Stål and Ladeaschistus Rolston. We chose not to change the classification of E. (Euschistomorphus) until further data become available, and propose to split Euschistus into three genera with the exclusion of Euschistus (Mitripus) and all of its species. Here we elevate Mitripus to genus rank to include M. acutus comb.n., M. convergens comb.n. and M. legionarius comb.n., and propose Adustonotus Bianchi gen.n. to include A. anticus comb.n., A. latus comb.n., A. tauricornis comb.n., A. grandis comb.n., A. hansi comb.n., A. paranticus comb.n., A. irroratus comb.n. and A. saramagoi comb.n. We also provide identification keys to the genera Adustonotus gen.n., Ladeaschistus, Mitripus n. rank and Sibaria, here defined as the Mitripus genus group, and to the species of Mitripus and Adustonotus gen.n. Our results provide insights into the current status of the classification of the Pentatomidae, suggesting the need for phylogenetic analyses at different taxonomic levels within stink bugs. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:E09D2675‐0F2B‐4AAE‐9837‐257E0B18BC52.",2017-04-01 +31064737,[Digitization in Swiss veterinary practices].,"

Introduction

Data on the digitization in Swiss veterinary practices and clinics were collected in a survey from June 2017 to the end of December 2017. Data of 171 practices contributed to the survey. Animal records were filed in 96.5% with a practice management program. Nine out of ten practices operate an x-ray machine whereof 70% digitally record the radiographs. While a moderate diversity of practice management systems is used, numerous different radiographic recording, archiving and viewing systems are utilized. Data exchange with other practices and owners preferably takes place via e-mail, followed by upload servers and digital data carriers. Data protection receives less attention in veterinary medicine than in comparison to human medicine. A protected data exchange platform coupled with AMICUS and ANIS is under construction via standardized DICOM (https://www.dicomstandard.org/current/) and HL7 (https://www.hl7.org/) interfaces.",2019-05-01 +29628922,The Applied Development of a Tiered Multilocus Sequence Typing (MLST) Scheme for Dichelobacter nodosus.,"Dichelobacter nodosus (D. nodosus) is the causative pathogen of ovine footrot, a disease that has a significant welfare and financial impact on the global sheep industry. Previous studies into the phylogenetics of D. nodosus have focused on Australia and Scandinavia, meaning the current diversity in the United Kingdom (U.K.) population and its relationship globally, is poorly understood. Numerous epidemiological methods are available for bacterial typing; however, few account for whole genome diversity or provide the opportunity for future application of new computational techniques. Multilocus sequence typing (MLST) measures nucleotide variations within several loci with slow accumulation of variation to enable the designation of allele numbers to determine a sequence type. The usage of whole genome sequence data enables the application of MLST, but also core and whole genome MLST for higher levels of strain discrimination with a negligible increase in experimental cost. An MLST database was developed alongside a seven loci scheme using publically available whole genome data from the sequence read archive. Sequence type designation and strain discrimination was compared to previously published data to ensure reproducibility. Multiple D. nodosus isolates from U.K. farms were directly compared to populations from other countries. The U.K. isolates define new clades within the global population of D. nodosus and predominantly consist of serogroups A, B and H, however serogroups C, D, E, and I were also found. The scheme is publically available at https://pubmlst.org/dnodosus/.",2018-03-23 +31647520,SMARTS: the social media-based addiction recovery and intervention targeting server. ,"Substance abuse and addiction is a significant contemporary health crisis. Modeling its epidemiology and designing effective interventions requires real-time data analysis along with the means to contextualize addiction patterns across the individual-to-community scale. In this context, social media platforms have begun to receive significant attention as a novel source of real-time user-reported information. However, the ability of epidemiologists to use such information is significantly stymied by the lack of publicly available algorithms and software for addiction information extraction, analysis and modeling. SMARTS is a public, open source, web-based application that addresses the aforementioned deficiency. SMARTS is designed to analyze data from two popular social media forums, namely, Reddit and Twitter and can be used to study the effect of various intoxicants including, opioids, weed, kratom, alcohol, and cigarettes. The SMARTS software analyzes social media posts using natural language processing, and machine learning to characterize drug use at both the individual- and population-levels. Included in SMARTS is a predictive modeling functionality that can, with high accuracy, identify individuals open to addiction recovery interventions. SMARTS also supports extraction, analysis and visualization of a number of key informational and demographic characteristics including post topics and sentiment, drug- and recovery-term usage, geolocation, and age. Finally, the distributions of the aforementioned characteristics as derived from a set of 170,097 drug users are provided as part of SMARTS and can be used by researchers as a reference. The SMARTS web server and source code are available at: http://haddock9.sfsu.edu/. Supplementary data are available at Bioinformatics online.",2019-10-24 +29106616,MODOMICS: a database of RNA modification pathways. 2017 update.,"MODOMICS is a database of RNA modifications that provides comprehensive information concerning the chemical structures of modified ribonucleosides, their biosynthetic pathways, the location of modified residues in RNA sequences, and RNA-modifying enzymes. In the current database version, we included the following new features and data: extended mass spectrometry and liquid chromatography data for modified nucleosides; links between human tRNA sequences and MINTbase - a framework for the interactive exploration of mitochondrial and nuclear tRNA fragments; new, machine-friendly system of unified abbreviations for modified nucleoside names; sets of modified tRNA sequences for two bacterial species, updated collection of mammalian tRNA modifications, 19 newly identified modified ribonucleosides and 66 functionally characterized proteins involved in RNA modification. Data from MODOMICS have been linked to the RNAcentral database of RNA sequences. MODOMICS is available at http://modomics.genesilico.pl.",2018-01-01 +32422000,Correlation Network Analysis Provides Important Modules and Pathways for Human Hyperlipidemia.,"Hyperlipidemia casts great threats to humans around the world. The systemic co-expression and function enrichment analysis for this disease is limited to date. This study was to identify co-expression modules to explore hyperlipidemia-associated functional pathways. Gene expression data of human hyperlipidemia (GSE17170) were downloaded from the Gene Expression Omnibus (GEO) database. We evaluated the top 3,000 genes with the highest average expression, with which the co-expression modules were constructed in weighted correlation network analysis (WGC-NA).Cluster analysis was then applied to visualize the interaction relationships of these modules. By gene ontology (GO) and KEGG functional enrichment analysis, we finally investigated the function enrichment of co-expression genes from important modules in the Database for Annotation, Visualization, and Integrated Discovery (DAVID) database (https:// david.ncifcrf.gov/summary.jsp).15 Thirteen co-expression modules were constructed for 3,000 genes in the 70samples. Interaction relationships of hub genes between pairwise modules showed high confidence. In functional enrichments of the co-expression modules, genes in Modules 3 and 4 were significantly enriched in biological processes and pathways that are associated with ubiquitination-for example, G0:0016567 (protein ubiquitination) and hsa04120 (ubiquitin-mediated proteolysis). We inferred these two modules as key modules associated with hyperlipidemia. Additionally, G0:0098609 (cell-cell adhesion) was enriched in four modules, suggesting an important function in hyperlipidemia. In conclusion, Protein ubiquitination may play important roles in human hyperlipidemia. All the discoveries made in this study enrich understanding of the pathogenesis of hyperlipidemia and might contribute much to the development of diagnosis and outcome evaluation of this disease.",2019-01-01 +24163254,Ensembl Genomes 2013: scaling up access to genome-wide data.,"Ensembl Genomes (http://www.ensemblgenomes.org) is an integrating resource for genome-scale data from non-vertebrate species. The project exploits and extends technologies for genome annotation, analysis and dissemination, developed in the context of the vertebrate-focused Ensembl project, and provides a complementary set of resources for non-vertebrate species through a consistent set of programmatic and interactive interfaces. These provide access to data including reference sequence, gene models, transcriptional data, polymorphisms and comparative analysis. This article provides an update to the previous publications about the resource, with a focus on recent developments. These include the addition of important new genomes (and related data sets) including crop plants, vectors of human disease and eukaryotic pathogens. In addition, the resource has scaled up its representation of bacterial genomes, and now includes the genomes of over 9000 bacteria. Specific extensions to the web and programmatic interfaces have been developed to support users in navigating these large data sets. Looking forward, analytic tools to allow targeted selection of data for visualization and download are likely to become increasingly important in future as the number of available genomes increases within all domains of life, and some of the challenges faced in representing bacterial data are likely to become commonplace for eukaryotes in future.",2013-10-25 +29045713,SCPortalen: human and mouse single-cell centric database.,"Published single-cell datasets are rich resources for investigators who want to address questions not originally asked by the creators of the datasets. The single-cell datasets might be obtained by different protocols and diverse analysis strategies. The main challenge in utilizing such single-cell data is how we can make the various large-scale datasets to be comparable and reusable in a different context. To challenge this issue, we developed the single-cell centric database 'SCPortalen' (http://single-cell.clst.riken.jp/). The current version of the database covers human and mouse single-cell transcriptomics datasets that are publicly available from the INSDC sites. The original metadata was manually curated and single-cell samples were annotated with standard ontology terms. Following that, common quality assessment procedures were conducted to check the quality of the raw sequence. Furthermore, primary data processing of the raw data followed by advanced analyses and interpretation have been performed from scratch using our pipeline. In addition to the transcriptomics data, SCPortalen provides access to single-cell image files whenever available. The target users of SCPortalen are all researchers interested in specific cell types or population heterogeneity. Through the web interface of SCPortalen users are easily able to search, explore and download the single-cell datasets of their interests.",2018-01-01 +28967693,HUMA: A platform for the analysis of genetic variation in humans.,"The completion of the human genome project at the beginning of the 21st century, along with the rapid advancement of sequencing technologies thereafter, has resulted in exponential growth of biological data. In genetics, this has given rise to numerous variation databases, created to store and annotate the ever-expanding dataset of known mutations. Usually, these databases focus on variation at the sequence level. Few databases focus on the analysis of variation at the 3D level, that is, mapping, visualizing, and determining the effects of variation in protein structures. Additionally, these Web servers seldom incorporate tools to help analyze these data. Here, we present the Human Mutation Analysis (HUMA) Web server and database. HUMA integrates sequence, structure, variation, and disease data into a single, connected database. A user-friendly interface provides click-based data access and visualization, whereas a RESTful Web API provides programmatic access to the data. Tools have been integrated into HUMA to allow initial analyses to be carried out on the server. Furthermore, users can upload their private variation datasets, which are automatically mapped to public data and can be analyzed using the integrated tools. HUMA is freely accessible at https://huma.rubi.ru.ac.za.",2017-10-17 +30917112,PlotsOfData-A web app for visualizing data together with their summaries.,"Reporting of the actual data in graphs and plots increases transparency and enables independent evaluation. On the other hand, data summaries are often used in graphs because they aid interpretation. To democratize state-of-the-art data visualization of raw data with a selection of statistical summaries, a freely available, open-source web app was written using R/shiny that uses the ggplot2 package for generating plots. Users can to choose how to display the data and which of the data summaries to add. In addition, the 95% confidence intervals (95CIs) can be added for visual inferences. By adjusting the visibility of the layers, the visualization of the raw data and their summaries can be tuned for optimal presentation and interpretation. The app is dubbed PlotsOfData and is available at https://huygens.science.uva.nl/PlotsOfData/.",2019-03-27 +32043293,Behavioural disturbances in patients with frontotemporal lobe degeneration focusing on caregiver burden at home and in nursing homes.,"AIM AND OBJECTIVE:To explore the challenges faced by family caregivers of people with frontotemporal dementia and other forms of dementia affecting the frontal and temporal lobes causing behavioural disturbances through a qualitative approach with in-depth interviews. BACKGROUND:Studies of different forms of dementia involving degeneration of the frontal and temporal lobes have mainly focused on the neurophysiology and physiology of the disease and on caregivers' health. Few studies have described the challenges and burdens connected with everyday life and in relation to suitable nursing home placement that are faced by family caregivers. METHOD AND DESIGN:This study used a descriptive and explorative design. Eleven semi-structured interviews with family caregivers of patients from special units in four nursing homes were conducted in 2014. Data were analysed based on Kvale and Brinkmann's three contexts of interpretation: self-understanding, common sense and theoretical understanding. Checklist for qualitative studies: Standards for Reporting Qualitative Research (SRQR) http://www.equator-network.org/reporting-guidelines/srqr/ RESULTS: Two central themes were derived from the data: changes in behaviour and personality were perceived as incomprehensible, frightening and increasingly difficult to manage. Family caregivers experienced challenges in finding suitable care facilities when they were not able to continue providing home care. Due to behavioural disturbances and lack of relevant competencies among health personnel, family members were often moved between nursing homes. CONCLUSION:Pronounced personality and behavioural disturbances such as tactlessness and aggression in a family member with dementia are experienced by caregivers as stressful and burdensome and may lead to feelings of shame and guilt. A lack of suitable care facilities adds to the stress and difficulties of the families and entails an additional and unresolved burden. RELEVANCE TO CLINICAL PRACTICE:The study reveals a need for more knowledge among those organising health services as well as healthcare professional dealing with this patient category to ease the burden on next of kin.",2020-02-27 +28981707,The Encyclopedia of Proteome Dynamics: a big data ecosystem for (prote)omics.,"Driven by improvements in speed and resolution of mass spectrometers (MS), the field of proteomics, which involves the large-scale detection and analysis of proteins in cells, tissues and organisms, continues to expand in scale and complexity. There is a resulting growth in datasets of both raw MS files and processed peptide and protein identifications. MS-based proteomics technology is also used increasingly to measure additional protein properties affecting cellular function and disease mechanisms, including post-translational modifications, protein-protein interactions, subcellular and tissue distributions. Consequently, biologists and clinicians need innovative tools to conveniently analyse, visualize and explore such large, complex proteomics data and to integrate it with genomics and other related large-scale datasets. We have created the Encyclopedia of Proteome Dynamics (EPD) to meet this need (https://peptracker.com/epd/). The EPD combines a polyglot persistent database and web-application that provides open access to integrated proteomics data for >30 000 proteins from published studies on human cells and model organisms. It is designed to provide a user-friendly interface, featuring graphical navigation with interactive visualizations that facilitate powerful data exploration in an intuitive manner. The EPD offers a flexible and scalable ecosystem to integrate proteomics data with genomics information, RNA expression and other related, large-scale datasets.",2018-01-01 +31934340,"The genome sequence of celery (Apium graveolens L.), an important leaf vegetable crop rich in apigenin in the Apiaceae family.","Celery (Apium graveolens L.) is a vegetable crop in the Apiaceae family that is widely cultivated and consumed because it contains necessary nutrients and multiple biologically active ingredients, such as apigenin and terpenoids. Here, we report the genome sequence of celery based on the use of HiSeq 2000 sequencing technology to obtain 600.8 Gb of data, achieving ~189-fold genome coverage, from 68 sequencing libraries with different insert sizes ranging from 180 bp to 10 kb in length. The assembled genome has a total sequence length of 2.21 Gb and consists of 34,277 predicted genes. Repetitive DNA sequences represent 68.88% of the genome sequences, and LTR retrotransposons are the main components of the repetitive sequences. Evolutionary analysis showed that a recent whole-genome duplication event may have occurred in celery, which could have contributed to its large genome size. The genome sequence of celery allowed us to identify agronomically important genes involved in disease resistance, flavonoid biosynthesis, terpenoid metabolism, and other important cellular processes. The comparative analysis of apigenin biosynthesis genes among species might explain the high apigenin content of celery. The whole-genome sequences of celery have been deposited at CeleryDB (http://apiaceae.njau.edu.cn/celerydb). The availability of the celery genome data advances our knowledge of the genetic evolution of celery and will contribute to further biological research and breeding in celery as well as other Apiaceae plants.",2020-01-06 +32692969,Collecting Words: A Clinical Example of a Morphology-Focused Orthographic Intervention.,"Purpose Morphological interventions promote gains in morphological knowledge and in other oral and written language skills (e.g., phonological awareness, vocabulary, reading, and spelling), yet we have a limited understanding of critical intervention features. In this clinical focus article, we describe a relatively novel approach to teaching morphology that considers its role as the key organizing principle of English orthography. We also present a clinical example of such an intervention delivered during a summer camp at a university speech and hearing clinic. Method Graduate speech-language pathology students provided a 6-week morphology-focused orthographic intervention to children in first through fourth grade (n = 10) who demonstrated word-level reading and spelling difficulties. The intervention focused children's attention on morphological families, teaching how morphology is interrelated with phonology and etymology in English orthography. Results Comparing pre- and posttest scores, children demonstrated improvement in reading and/or spelling abilities, with the largest gains observed in spelling affixes within polymorphemic words. Children and their caregivers reacted positively to the intervention. Therefore, data from the camp offer preliminary support for teaching morphology within the context of written words, and the intervention appears to be a feasible approach for simultaneously increasing morphological knowledge, reading, and spelling. Conclusion Children with word-level reading and spelling difficulties may benefit from a morphology-focused orthographic intervention, such as the one described here. Research on the approach is warranted, and clinicians are encouraged to explore its possible effectiveness in their practice. Supplemental Material https://doi.org/10.23641/asha.12290687.",2020-07-15 +31895140,Vicarious Posttraumatic Growth in NICU Nurses.,"

Background

When posttraumatic growth occurs in clinicians as a result of their caring for patients and families who are traumatized, it is termed vicarious posttraumatic growth.

Purpose

(1) To determine the level of vicarious posttraumatic growth and the disruption of core beliefs in neonatal intensive care unit (NICU) nurses who have cared for critically ill infants and their families. (2) To explore those quantitative findings through nurses' qualitative descriptions of their growth.

Methods

A mixed method with a convergent parallel design was used to address the study aims. A targeted sample of neonatal nurses was recruited from the National Association of Neonatal Nurses (NANN) through the MyNANN Community message board. The sample consisted of 109 NICU nurses who completed the quantitative strand and 61 (55%) who completed the qualitative strand. Nurses completed the Posttraumatic Growth Inventory, the Core Beliefs Inventory, and described their experiences of any positive changes in their beliefs or life as a result of caring for critically ill infants. IBM SPSS 25.0 and Krippendorff's content analysis were used to analyze the quantitative and qualitative data, respectively.

Results

NICU nurses reported a moderate degree of vicarious posttraumatic growth and disruption of their assumptive world. Appreciation of Life was the Posttraumatic Growth Inventory dimension that reflected the highest growth and Spiritual Change the lowest.

Implications for practice

Providing posttraumatic growth interventions has the potential to help NICU nurses find meaning through their experience.

Implications for research

Future surveys need to include a higher response rate to generalize the findings. A video abstract is available.Video Abstract available at: https://journals.na.lww.com/advancesinneonatalcare/Pages/videogallery.aspx?autoPlay=false&videoId=36.",2020-08-01 +31939704,Trihalomethanes in Drinking Water and Bladder Cancer Burden in the European Union.,"

Background

Trihalomethanes (THMs) are widespread disinfection by-products (DBPs) in drinking water, and long-term exposure has been consistently associated with increased bladder cancer risk.

Objective

We assessed THM levels in drinking water in the European Union as a marker of DBP exposure and estimated the attributable burden of bladder cancer.

Methods

We collected recent annual mean THM levels in municipal drinking water in 28 European countries (EU28) from routine monitoring records. We estimated a linear exposure-response function for average residential THM levels and bladder cancer by pooling data from studies included in the largest international pooled analysis published to date in order to estimate odds ratios (ORs) for bladder cancer associated with the mean THM level in each country (relative to no exposure), population-attributable fraction (PAF), and number of attributable bladder cancer cases in different scenarios using incidence rates and population from the Global Burden of Disease study of 2016.

Results

We obtained 2005-2018 THM data from EU26, covering 75% of the population. Data coverage and accuracy were heterogeneous among countries. The estimated population-weighted mean THM level was 11.7μg/L [standard deviation (SD) of 11.2]. The estimated bladder cancer PAF was 4.9% [95% confidence interval (CI): 2.5, 7.1] overall (range: 0-23%), accounting for 6,561 (95% CI: 3,389, 9,537) bladder cancer cases per year. Denmark and the Netherlands had the lowest PAF (0.0% each), while Cyprus (23.2%), Malta (17.9%), and Ireland (17.2%) had the highest among EU26. In the scenario where no country would exceed the current EU mean, 2,868 (95% CI: 1,522, 4,060; 43%) annual attributable bladder cancer cases could potentially be avoided.

Discussion

Efforts have been made to reduce THM levels in the European Union. However, assuming a causal association, current levels in certain countries still could lead to a considerable burden of bladder cancer that could potentially be avoided by optimizing water treatment, disinfection, and distribution practices, among other possible measures. https://doi.org/10.1289/EHP4495.",2020-01-15 +24318814,EMAGE: Electronic Mouse Atlas of Gene Expression.,"The EMAGE (Electronic Mouse Atlas of Gene Expression) database (http://www.emouseatlas.org/emage) allows users to perform on-line queries of mouse developmental gene expression. EMAGE data are represented spatially using a framework of 3D mouse embryo models, thus allowing uniquely spatial queries to be carried out alongside more traditional text-based queries. This spatial representation of the data also allows a comparison of spatial similarity between the expression patterns. The data are mapped to the models by a team of curators using bespoke mapping software, and the associated meta-data are curated for accuracy and completeness. The data contained in EMAGE are gathered from three main sources: from the published literature, through large-scale screens and collaborations, and via direct submissions from researchers. There are a variety of ways to query the EMAGE database via the on-line search interfaces, as well as via direct computational script-based queries. EMAGE is a free, on-line, community resource funded by the Medical Research Council, UK.",2014-01-01 +32484858,AnthOligo: automating the design of oligonucleotides for capture/enrichment technologies.,"

Summary

A number of methods have been devised to address the need for targeted genomic resequencing. One of these methods, region-specific extraction (RSE) is characterized by the capture of long DNA fragments (15-20 kb) by magnetic beads, after enzymatic extension of oligonucleotides hybridized to selected genomic regions. Facilitating the selection of the most appropriate capture oligos for targeting a region of interest, satisfying the properties of temperature (Tm) and entropy (ΔG), while minimizing the formation of primer-dimers in a pooled experiment, is therefore necessary. Manual design and selection of oligos becomes very challenging, complicated by factors such as length of the target region and number of targeted regions. Here we describe, AnthOligo, a web-based application developed to optimally automate the process of generation of oligo sequences used to target and capture the continuum of large and complex genomic regions. Apart from generating oligos for RSE, this program may have wider applications in the design of customizable internal oligos to be used as baits for gene panel analysis or even probes for large-scale comparative genomic hybridization array processes. AnthOligo was tested by capturing the Major Histocompatibility Complex (MHC) of a random sample.The application provides users with a simple interface to upload an input file in BED format and customize parameters for each task. The task of probe design in AnthOligo commences when a user uploads an input file and concludes with the generation of a result-set containing an optimal set of region-specific oligos. AnthOligo is currently available as a public web application with URL: http://antholigo.chop.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +31165883,WashU Epigenome Browser update 2019.,"The WashU Epigenome Browser (https://epigenomegateway.wustl.edu/) provides visualization, integration and analysis tools for epigenomic datasets. Since 2010, it has provided the scientific community with data from large consortia including the Roadmap Epigenomics and the ENCODE projects. Recently, we refactored the codebase, redesigned the user interface, and developed various novel features. New features include: (i) visualization using virtual reality (VR), which has implications in biology education and the study of 3D chromatin structure; (ii) expanded public data hubs, including data from the 4DN, ENCODE, Roadmap Epigenomics, TaRGET, IHEC and TCGA consortia; (iii) a more responsive user interface; (iv) a history of interactions, which enables undo and redo; (v) a feature we call Live Browsing, which allows multiple users to collaborate remotely on the same session; (vi) the ability to visualize local tracks and data hubs. Amazon Web Services also hosts the redesign at https://epigenomegateway.org/.",2019-07-01 +29897419,MARDy: Mycology Antifungal Resistance Database.,"

Summary

The increase of antifungal drug resistance is a major global human health concern and threatens agriculture and food security; in order to tackle these concerns, it is important to understand the mechanisms that cause antifungal resistance. The curated Mycology Antifungal Resistance Database (MARDy) is a web-service of antifungal drug resistance mechanisms, including amino acid substitutions, tandem repeat sequences and genome ploidy. MARDy is implemented on a Linux, Apache, MySQL and PHP web development platform and includes a local installation of BLASTn of the database of curated genes.

Availability and implementation

MARDy can be accessed at http://www.mardy.net and is free to use. The complete database can be retrieved, ordered by organism, gene and drug. Missing or new mycological antifungal resistance data can be relayed to the development team through a contribute entry form. Updates and news will be publicized via a dedicated Twitter feed: @MARDYfungi.",2018-09-01 +29916797,ClermonTyping: an easy-to-use and accurate in silico method for Escherichia genus strain phylotyping. ,"The genus Escherichia is composed of Escherichia albertii, E. fergusonii, five cryptic Escherichia clades and E. coli sensu stricto. Furthermore, the E. coli species can be divided into seven main phylogroups termed A, B1, B2, C, D, E and F. As specific lifestyles and/or hosts can be attributed to these species/phylogroups, their identification is meaningful for epidemiological studies. Classical phenotypic tests fail to identify non-sensu stricto E. coli as well as phylogroups. Clermont and colleagues have developed PCR assays that allow the identification of most of these species/phylogroups, the triplex/quadruplex PCR for E. coli phylogroup determination being the most popular. With the growing availability of whole genome sequences, we have developed the ClermonTyping method and its associated web-interface, the ClermonTyper, that allows a given strain sequence to be assigned to E. albertii, E. fergusonii, Escherichia clades I-V, E. coli sensu stricto as well as to the seven main E. coli phylogroups. The ClermonTyping is based on the concept of in vitro PCR assays and maintains the principles of ease of use and speed that prevailed during the development of the in vitro assays. This in silico approach shows 99.4 % concordance with the in vitro PCR assays and 98.8 % with the Mash genome-clustering tool. The very few discrepancies result from various errors occurring mainly from horizontal gene transfers or SNPs in the primers. We propose the ClermonTyper as a freely available resource to the scientific community at: http://clermontyping.iame-research.center/.",2018-06-19 +30508039,GWASpro: a high-performance genome-wide association analysis server.,"

Summary

We present GWASpro, a high-performance web server for the analyses of large-scale genome-wide association studies (GWAS). GWASpro was developed to provide data analyses for large-scale molecular genetic data, coupled with complex replicated experimental designs such as found in plant science investigations and to overcome the steep learning curves of existing GWAS software tools. GWASpro supports building complex design matrices, by which complex experimental designs that may include replications, treatments, locations and times, can be accounted for in the linear mixed model. GWASpro is optimized to handle GWAS data that may consist of up to 10 million markers and 10 000 samples from replicable lines or hybrids. GWASpro provides an interface that significantly reduces the learning curve for new GWAS investigators.

Availability and implementation

GWASpro is freely available at https://bioinfo.noble.org/GWASPRO.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +23601370,LiverAtlas: a unique integrated knowledge database for systems-level research of liver and hepatic disease.,"

Background

A large amount of liver-related physiological and pathological data exist in publicly available biological and bibliographic databases, which are usually far from comprehensive or integrated. Data collection, integration and mining processes pose a great challenge to scientific researchers and clinicians interested in the liver.

Method

To address these problems, we constructed LiverAtlas (http://liveratlas.hupo.org.cn), a comprehensive resource of biomedical knowledge related to the liver and various hepatic diseases by incorporating 53 databases.

Results

In the present version, LiverAtlas covers data on liver-related genomics, transcriptomics, proteomics, metabolomics and hepatic diseases. Additionally, LiverAtlas provides a wealth of manually curated information, relevant literature citations and cross-references to other databases. Importantly, an expert-confirmed Human Liver Disease Ontology, including relevant information for 227 types of hepatic disease, has been constructed and is used to annotate LiverAtlas data. Furthermore, we have demonstrated two examples of applying LiverAtlas data to identify candidate markers for hepatocellular carcinoma (HCC) at the systems level and to develop a systems biology-based classifier by combining the differential gene expression with topological features of human protein interaction networks to enhance the ability of HCC differential diagnosis.

Conclusion

LiverAtlas is the most comprehensive liver and hepatic disease resource, which helps biologists and clinicians to analyse their data at the systems level and will contribute much to the biomarker discovery and diagnostic performance enhancement for liver diseases.",2013-04-21 +30942868,Digital expression explorer 2: a repository of uniformly processed RNA sequencing data. ,"RNA sequencing (RNA-seq) is an indispensable tool in the study of gene regulation. While the technology has brought with it better transcript coverage and quantification, there remain considerable barriers to entry for the computational biologist to analyse large data sets. There is a real need for a repository of uniformly processed RNA-seq data that is easy to use. To address these obstacles, we developed Digital Expression Explorer 2 (DEE2), a web-based repository of RNA-seq data in the form of gene-level and transcript-level expression counts. DEE2 contains >5.3 trillion assigned reads from 580,000 RNA-seq data sets including species Escherichia coli, yeast, Arabidopsis, worm, fruit fly, zebrafish, rat, mouse, and human. Base-space sequence data downloaded from the National Center for Biotechnology Information Sequence Read Archive underwent quality control prior to transcriptome and genome mapping using open-source tools. Uniform data processing methods ensure consistency across experiments, facilitating fast and reproducible meta-analyses. The web interface allows users to quickly identify data sets of interest using accession number and keyword searches. The data can also be accessed programmatically using a specifically designed R package. We demonstrate that DEE2 data are compatible with statistical packages such as edgeR or DESeq. Bulk data are also available for download. DEE2 can be found at http://dee2.io.",2019-04-01 +29744539,Community-based pre-pregnancy care programme improves pregnancy preparation in women with pregestational diabetes.,"

Aims/hypothesis

Women with diabetes remain at increased risk of adverse pregnancy outcomes associated with poor pregnancy preparation. However, women with type 2 diabetes are less aware of and less likely to access pre-pregnancy care (PPC) compared with women with type 1 diabetes. We developed and evaluated a community-based PPC programme with the aim of improving pregnancy preparation in all women with pregestational diabetes.

Methods

This was a prospective cohort study comparing pregnancy preparation measures before and during/after the PPC intervention in women with pre-existing diabetes from 1 June 2013 to 28 February 2017. The setting was 422 primary care practices and ten National Health Service specialist antenatal diabetes clinics. A multifaceted approach was taken to engage women with diabetes and community healthcare teams. This included identifying and sending PPC information leaflets to all eligible women, electronic preconception care templates, online education modules and resources, and regional meetings and educational events. Key outcomes were preconception folic acid supplementation, maternal HbA1c level, use of potentially harmful medications at conception and gestational age at first presentation, before and during/after the PPC programme.

Results

A total of 306 (73%) primary care practices actively participated in the PPC programme. Primary care databases were used to identify 5075 women with diabetes aged 18-45 years. PPC leaflets were provided to 4558 (89.8%) eligible women. There were 842 consecutive pregnancies in women with diabetes: 502 before and 340 during/after the PPC intervention. During/after the PPC intervention, pregnant women with type 2 diabetes were more likely to achieve target HbA1c levels ≤48 mmol/mol (6.5%) (44.4% of women before vs 58.5% of women during/after PPC intervention; p = 0.016) and to take 5 mg folic acid daily (23.5% and 41.8%; p = 0.001). There was an almost threefold improvement in 'optimal' pregnancy preparation in women with type 2 diabetes (5.8% and 15.1%; p = 0.021). Women with type 1 diabetes presented for earlier antenatal care during/after PPC (54.0% vs 67.3% before 8 weeks' gestation; p = 0.003) with no other changes.

Conclusions/interpretation

A pragmatic community-based PPC programme was associated with clinically relevant improvements in pregnancy preparation in women with type 2 diabetes. To our knowledge, this is the first community-based PPC intervention to improve pregnancy preparation for women with type 2 diabetes.

Data availability

Further details of the data collection methodology, individual clinic data and the full audit reports for healthcare professionals and service users are available from https://digital.nhs.uk/data-and-information/clinical-audits-and-registries/our-clinical-audits-and-registries/national-pregnancy-in-diabetes-audit .",2018-05-09 +31474000,pseudoQC: A Regression-Based Simulation Software for Correction and Normalization of Complex Metabolomics and Proteomics Datasets.,"Various types of unwanted and uncontrollable signal variations in MS-based metabolomics and proteomics datasets severely disturb the accuracies of metabolite and protein profiling. Therefore, pooled quality control (QC) samples are often employed in quality management processes, which are indispensable to the success of metabolomics and proteomics experiments, especially in high-throughput cases and long-term projects. However, data consistency and QC sample stability are still difficult to guarantee because of the experimental operation complexity and differences between experimenters. To make things worse, numerous proteomics projects do not take QC samples into consideration at the beginning of experimental design. Herein, a powerful and interactive web-based software, named pseudoQC, is presented to simulate QC sample data for actual metabolomics and proteomics datasets using four different machine learning-based regression methods. The simulated data are used for correction and normalization of the two published datasets, and the obtained results suggest that nonlinear regression methods perform better than linear ones. Additionally, the above software is available as a web-based graphical user interface and can be utilized by scientists without a bioinformatics background. pseudoQC is open-source software and freely available at https://www.omicsolution.org/wukong/pseudoQC/.",2019-09-18 +31307061,PRSice-2: Polygenic Risk Score software for biobank-scale data. ,"Polygenic risk score (PRS) analyses have become an integral part of biomedical research, exploited to gain insights into shared aetiology among traits, to control for genomic profile in experimental studies, and to strengthen causal inference, among a range of applications. Substantial efforts are now devoted to biobank projects to collect large genetic and phenotypic data, providing unprecedented opportunity for genetic discovery and applications. To process the large-scale data provided by such biobank resources, highly efficient and scalable methods and software are required. Here we introduce PRSice-2, an efficient and scalable software program for automating and simplifying PRS analyses on large-scale data. PRSice-2 handles both genotyped and imputed data, provides empirical association P-values free from inflation due to overfitting, supports different inheritance models, and can evaluate multiple continuous and binary target traits simultaneously. We demonstrate that PRSice-2 is dramatically faster and more memory-efficient than PRSice-1 and alternative PRS software, LDpred and lassosum, while having comparable predictive power. PRSice-2's combination of efficiency and power will be increasingly important as data sizes grow and as the applications of PRS become more sophisticated, e.g., when incorporated into high-dimensional or gene set-based analyses. PRSice-2 is written in C++, with an R script for plotting, and is freely available for download from http://PRSice.info.",2019-07-01 +30923806,rMTA: robust metabolic transformation analysis.,"MOTIVATION:The development of computational tools exploiting -omics data and high-quality genome-scale metabolic networks for the identification of novel drug targets is a relevant topic in Systems Medicine. Metabolic Transformation Algorithm (MTA) is one of these tools, which aims to identify targets that transform a disease metabolic state back into a healthy state, with potential application in any disease where a clear metabolic alteration is observed. RESULTS:Here, we present a robust extension to MTA (rMTA), which additionally incorporates a worst-case scenario analysis and minimization of metabolic adjustment to evaluate the beneficial effect of gene knockouts. We show that rMTA complements MTA in the different datasets analyzed (gene knockout perturbations in different organisms, Alzheimer's disease and prostate cancer), bringing a more accurate tool for predicting therapeutic targets. AVAILABILITY AND IMPLEMENTATION:rMTA is freely available on The Cobra Toolbox: https://opencobra.github.io/cobratoolbox/latest/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-11-01 +32663038,Cultural Competence and Self-Efficacy After Study Abroad Experiences.,"Purpose Cultural competence is crucial for the successful provision of speech, language, and hearing services. The purpose of this study was to assess and describe gains in cultural awareness, cultural competence, and self-efficacy after service-learning study abroad experiences and to examine whether gains in these areas are related to higher clinical skills ratings in speech-language pathology and audiology students. Method Sixteen speech-language pathology and audiology students participated in two international study abroad experiences (Nicaragua and Malawi). Students completed a survey on cultural awareness, competence, and self-efficacy beliefs and journal entries before, during, and after their trips. In addition, the supervisors assessed the clinical skills of the students by the end of the trip. The researchers applied a mixed-methods approach to analyze data from the survey, clinical ratings, and journals. Results Students showed significant increases in cultural awareness, competence, and self-efficacy. Gains in self-efficacy and cultural awareness were highly correlated with students' clinical ratings in cultural competence as judged by their supervisors. Two main themes were identified from the journal entries: continuing community involvement and growth in cultural self-efficacy. Conclusion Service-learning study abroad experiences help students develop cultural awareness and competence skills and increase their self-efficacy beliefs. Using a mixed-methods approach can help identify strengths and weaknesses in the training of speech-language pathology and audiology students with regard to cultural competence. Supplemental Material https://doi.org/10.23641/asha.12642647.",2020-07-14 +32548853,Structural compliance: A new metric for protein flexibility.,"Proteins are the active players in performing essential molecular activities throughout biology, and their dynamics has been broadly demonstrated to relate to their mechanisms. The intrinsic fluctuations have often been used to represent their dynamics and then compared to the experimental B-factors. However, proteins do not move in a vacuum and their motions are modulated by solvent that can impose forces on the structure. In this paper, we introduce a new structural concept, which has been called the structural compliance, for the evaluation of the global and local deformability of the protein structure in response to intramolecular and solvent forces. Based on the application of pairwise pulling forces to a protein elastic network, this structural quantity has been computed and sometimes is even found to yield an improved correlation with the experimental B-factors, meaning that it may serve as a better metric for protein flexibility. The inverse of structural compliance, namely the structural stiffness, has also been defined, which shows a clear anticorrelation with the experimental data. Although the present applications are made to proteins, this approach can also be applied to other biomolecular structures such as RNA. This present study considers only elastic network models, but the approach could be applied further to conventional atomic molecular dynamics. Compliance is found to have a slightly better agreement with the experimental B-factors, perhaps reflecting its bias toward the effects of local perturbations, in contrast to mean square fluctuations. The code for calculating protein compliance and stiffness is freely accessible at https://jerniganlab.github.io/Software/PACKMAN/Tutorials/compliance.",2020-07-14 +26620522,PPIM: A Protein-Protein Interaction Database for Maize.,"Maize (Zea mays) is one of the most important crops worldwide. To understand the biological processes underlying various traits of the crop (e.g. yield and response to stress), a detailed protein-protein interaction (PPI) network is highly demanded. Unfortunately, there are very few such PPIs available in the literature. Therefore, in this work, we present the Protein-Protein Interaction Database for Maize (PPIM), which covers 2,762,560 interactions among 14,000 proteins. The PPIM contains not only accurately predicted PPIs but also those molecular interactions collected from the literature. The database is freely available at http://comp-sysbio.org/ppim with a user-friendly powerful interface. We believe that the PPIM resource can help biologists better understand the maize crop.",2015-11-30 +31004478,The EntOptLayout Cytoscape plug-in for the efficient visualization of major protein complexes in protein-protein interaction and signalling networks.,"

Motivation

Network visualizations of complex biological datasets usually result in 'hairball' images, which do not discriminate network modules.

Results

We present the EntOptLayout Cytoscape plug-in based on a recently developed network representation theory. The plug-in provides an efficient visualization of network modules, which represent major protein complexes in protein-protein interaction and signalling networks. Importantly, the tool gives a quality score of the network visualization by calculating the information loss between the input data and the visual representation showing a 3- to 25-fold improvement over conventional methods.

Availability and implementation

The plug-in (running on Windows, Linux, or Mac OS) and its tutorial (both in written and video forms) can be downloaded freely under the terms of the MIT license from: http://apps.cytoscape.org/apps/entoptlayout.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +31655566,Interpreting HIV diagnostic histories into infection time estimates: analytical framework and online tool.,"

Background

It is frequently of epidemiological and/or clinical interest to estimate the date of HIV infection or time-since-infection of individuals. Yet, for over 15 years, the only widely-referenced infection dating algorithm that utilises diagnostic testing data to estimate time-since-infection has been the 'Fiebig staging' system. This defines a number of stages of early HIV infection through various standard combinations of contemporaneous discordant diagnostic results using tests of different sensitivity. To develop a new, more nuanced infection dating algorithm, we generalised the Fiebig approach to accommodate positive and negative diagnostic results generated on the same or different dates, and arbitrary current or future tests - as long as the test sensitivity is known. For this purpose, test sensitivity is the probability of a positive result as a function of time since infection.

Methods

The present work outlines the analytical framework for infection date estimation using subject-level diagnostic testing histories, and data on test sensitivity. We introduce a publicly-available online HIV infection dating tool that implements this estimation method, bringing together 1) curatorship of HIV test performance data, and 2) infection date estimation functionality, to calculate plausible intervals within which infection likely became detectable for each individual. The midpoints of these intervals are interpreted as infection time 'point estimates' and referred to as Estimated Dates of Detectable Infection (EDDIs). The tool is designed for easy bulk processing of information (as may be appropriate for research studies) but can also be used for individual patients (such as in clinical practice).

Results

In many settings, including most research studies, detailed diagnostic testing data are routinely recorded, and can provide reasonably precise estimates of the timing of HIV infection. We present a simple logic to the interpretation of diagnostic testing histories into infection time estimates, either as a point estimate (EDDI) or an interval (earliest plausible to latest plausible dates of detectable infection), along with a publicly-accessible online tool that supports wide application of this logic.

Conclusions

This tool, available at https://tools.incidence-estimation.org/idt/ , is readily updatable as test technology evolves, given the simple architecture of the system and its nature as an open source project.",2019-10-26 +32308930,CirRNAPL: A web server for the identification of circRNA based on extreme learning machine.,"Circular RNA (circRNA) plays an important role in the development of diseases, and it provides a novel idea for drug development. Accurate identification of circRNAs is important for a deeper understanding of their functions. In this study, we developed a new classifier, CirRNAPL, which extracts the features of nucleic acid composition and structure of the circRNA sequence and optimizes the extreme learning machine based on the particle swarm optimization algorithm. We compared CirRNAPL with existing methods, including blast, on three datasets and found CirRNAPL significantly improved the identification accuracy for the three datasets, with accuracies of 0.815, 0.802, and 0.782, respectively. Additionally, we performed sequence alignment on 564 sequences of the independent detection set of the third data set and analyzed the expression level of circRNAs. Results showed the expression level of the sequence is positively correlated with the abundance. A user-friendly CirRNAPL web server is freely available at http://server.malab.cn/CirRNAPL/.",2020-04-02 +30536256,"[Registry Research Funding of the German Society of Plastic, Reconstructive and Aesthetic Surgeons (DGPRÄC) and Research Funding Report 2017/2018].","

Introduction

This report serves to publicize the research of academic institutions for Plastic Surgery within our society DGPRÄC in 2017/2018 and sequels the funding report of 2015/2016. Applications to public, non-public, or industrial funding organizations were evaluated. At the same time, this paper analyses the number of approved DFG applications in Plastic, Thoracic and Vascular Surgery in the GEPRIS system. Contrary to these specialties, Plastic Surgery is not classified as an independent speciality in the subject structure of the DFG review board which results in a lack of transparency concerning Plastic Surgery research work.

Materials and methods

Our previously established online database (https://docs.google.com/forms/d/1OaSnHyKTysawiI1ie7kfUxDf7nJP_RiTUJTsnb7Mq_E/edit) for reporting requested/ approved and rejected research applications to public, non-public and industrial funding organizations was continued and evaluated together with applications found in the DFG's public database GEPRIS.

Results

Compared to the previous year's report, the number of approved applications from public research organizations (DFG, BMBF, BMWi, EU) was increased from 23 to 27. We identified 19 approved DFG applications from Plastic Surgery, as compared to 9 and 8 applications by Thoracic and Vascular Surgery, respectively.

Summary

Taken together, this data emphasizes that our research is at least equal to that of other newly established surgical specialties within the framework of the DFG. Accordingly, we hope to provide further arguments for an adaptation of the DFG review boards subject structure to include Plastic Surgery as an independent specialty as it is for Vascular Surgery and Thoracic Surgery.",2018-12-10 +33164580,Acute effects of electronic cigarettes on arterial pressure and peripheral sympathetic activity in young nonsmokers.,"Electronic cigarettes (e-cigarettes) are marketed as an alternative to smoking for those who want to decrease the health risks of tobacco. Tobacco cigarettes increase heart rate (HR) and arterial pressure, while reducing muscle sympathetic nerve activity (MSNA) through sympathetic baroreflex inhibition. The acute effects of e-cigarettes on arterial pressure and MSNA have not been reported: our purpose was to clarify this issue. Using a randomized crossover design, participants inhaled on a JUUL e-cigarette containing nicotine (59 mg/mL) and a similar placebo e-cigarette (0 mg/mL). Experiments were separated by ∼1 mo. We recorded baseline ECG, finger arterial pressure (n = 15), and MSNA (n = 10). Subjects rested for 10 min (BASE) and then inhaled once every 30 s on an e-cigarette that contained nicotine or placebo (VAPE) for 10 min followed by a 10-min recovery (REC). Data were expressed as Δ means ± SE from BASE. Heart rate increased in the nicotine condition during VAPE and returned to BASE values in REC (5.0 ± 1.3 beats/min nicotine vs. 0.1 ± 0.8 beats/min placebo, during VAPE; P < 0.01). Mean arterial pressure increased in the nicotine condition during VAPE and remained elevated during REC (6.5 ± 1.6 mmHg nicotine vs. 2.6 ± 1 mmHg placebo, during VAPE and 4.6.0 ± 1.7 mmHg nicotine vs. 1.4 ± 1.4 mmHg placebo, during REC; P < 0.05). MSNA decreased from BASE to VAPE and did not restore during REC (-7.1 ± 1.6 bursts/min nicotine vs. 2.6 ± 2 bursts/min placebo, during VAPE and -5.8 ± 1.7 bursts/min nicotine vs. 0.5 ± 1.4 bursts/min placebo, during REC; P < 0.05). Our results show that acute e-cigarette usage increases mean arterial pressure leading to a baroreflex-mediated inhibition of MSNA.NEW & NOTEWORTHY The JUUL e-cigarette is the most popular e-cigarette in the market. In the present study, inhaling on a JUUL e-cigarette increased mean arterial pressure and heart rate, and decreased muscle sympathetic nerve activity (MSNA). In contrast, inhaling on a placebo e-cigarette without nicotine elicited no sympathomimetic effects. Although previous tobacco cigarette studies have demonstrated increased mean arterial pressure and MSNA inhibition, ours is the first study to report similar responses while inhaling on an e-cigarette. Listen to this article's corresponding podcast at @ https://ajpheart.podbean.com/e/aerosolized-nicotine-and-cardiovascular-control/.",2020-11-08 +31888452,LMAP_S: Lightweight Multigene Alignment and Phylogeny eStimation.,"BACKGROUND:Recent advances in genome sequencing technologies and the cost drop in high-throughput sequencing continue to give rise to a deluge of data available for downstream analyses. Among others, evolutionary biologists often make use of genomic data to uncover phenotypic diversity and adaptive evolution in protein-coding genes. Therefore, multiple sequence alignments (MSA) and phylogenetic trees (PT) need to be estimated with optimal results. However, the preparation of an initial dataset of multiple sequence file(s) (MSF) and the steps involved can be challenging when considering extensive amount of data. Thus, it becomes necessary the development of a tool that removes the potential source of error and automates the time-consuming steps of a typical workflow with high-throughput and optimal MSA and PT estimations. RESULTS:We introduce LMAP_S (Lightweight Multigene Alignment and Phylogeny eStimation), a user-friendly command-line and interactive package, designed to handle an improved alignment and phylogeny estimation workflow: MSF preparation, MSA estimation, outlier detection, refinement, consensus, phylogeny estimation, comparison and editing, among which file and directory organization, execution, manipulation of information are automated, with minimal manual user intervention. LMAP_S was developed for the workstation multi-core environment and provides a unique advantage for processing multiple datasets. Our software, proved to be efficient throughout the workflow, including, the (unlimited) handling of more than 20 datasets. CONCLUSIONS:We have developed a simple and versatile LMAP_S package enabling researchers to effectively estimate multiple datasets MSAs and PTs in a high-throughput fashion. LMAP_S integrates more than 25 software providing overall more than 65 algorithm choices distributed in five stages. At minimum, one FASTA file is required within a single input directory. To our knowledge, no other software combines MSA and phylogeny estimation with as many alternatives and provides means to find optimal MSAs and phylogenies. Moreover, we used a case study comparing methodologies that highlighted the usefulness of our software. LMAP_S has been developed as an open-source package, allowing its integration into more complex open-source bioinformatics pipelines. LMAP_S package is released under GPLv3 license and is freely available at https://lmap-s.sourceforge.io/.",2019-12-30 +28632491,Lessons Learned on Health Adaptation to Climate Variability and Change: Experiences Across Low- and Middle-Income Countries.,"

Background

There is limited published evidence of the effectiveness of adaptation in managing the health risks of climate variability and change in low- and middle-income countries.

Objectives

To document lessons learned and good practice examples from health adaptation pilot projects in low- and middle-income countries to facilitate assessing and overcoming barriers to implementation and to scaling up.

Methods

We evaluated project reports and related materials from the first five years of implementation (2008-2013) of multinational health adaptation projects in Albania, Barbados, Bhutan, China, Fiji, Jordan, Kazakhstan, Kenya, Kyrgyzstan, Philippines, Russian Federation, Tajikistan, and Uzbekistan. We also collected qualitative data through a focus group consultation and 19 key informant interviews.

Results

Our recommendations include that national health plans, policies, and budget processes need to explicitly incorporate the risks of current and projected climate variability and change. Increasing resilience is likely to be achieved through longer-term, multifaceted, and collaborative approaches, with supporting activities (and funding) for capacity building, communication, and institutionalized monitoring and evaluation. Projects should be encouraged to focus not just on shorter-term outputs to address climate variability, but also on establishing processes to address longer-term climate change challenges. Opportunities for capacity development should be created, identified, and reinforced.

Conclusions

Our analyses highlight that, irrespective of resource constraints, ministries of health and other institutions working on climate-related health issues in low- and middle-income countries need to continue to prepare themselves to prevent additional health burdens in the context of a changing climate and socioeconomic development patterns. https://doi.org/10.1289/EHP405.",2017-06-20 +28203233,Exo-miRExplorer: A Comprehensive Resource for Exploring and Comparatively Analyzing Exogenous MicroRNAs.,"MicroRNAs (miRNAs) are small regulatory RNAs that play important roles in animals, plants, and viruses. Deep-sequencing technology has been widely adopted in miRNA investigations. However, it is still a big mysterious why nearly all sequencing data contain miRNA sequences from exogenous species, called exo-miRNAs. In this study, we developed a novel platform, exo-miRExplorer, for mining and identifying exo-miRNAs from high-throughput small RNA sequencing experiments which originated from tissues and cell lines of multiple organisms. Thousands of exo-miRNAs are characterized with their expression abundance, the RNA families, original organisms and the sequencing platforms presented in exo-miRExplorer. Subsequently, we used exo-miRExplorer to perform further analysis. Comparative analysis of the exo-miRNAs between different sequencing datasets revealed significant correlation of exo-miRNAs between experiments in the same study. The plant-derived exo-miRNAs analysis provided robust evidence for non-diet source of exo-miRNAs. Virus-derived exo-miRNA analysis showed that pathogen RNAs could transfer to host cells and exist in deep-sequencing result at abundance level. In conclusion, exo-miRExplorer provides users with an integrative resource to facilitate detection and analysis of exo-miRNAs. exo-miRExplorer is available at the following URL: http://rna.sysu.edu.cn/exomiRDB/.",2017-02-01 +30395171,Supervised non-negative matrix factorization methods for MALDI imaging applications.,"

Motivation

Non-negative matrix factorization (NMF) is a common tool for obtaining low-rank approximations of non-negative data matrices and has been widely used in machine learning, e.g. for supporting feature extraction in high-dimensional classification tasks. In its classical form, NMF is an unsupervised method, i.e. the class labels of the training data are not used when computing the NMF. However, incorporating the classification labels into the NMF algorithms allows to specifically guide them toward the extraction of data patterns relevant for discriminating the respective classes. This approach is particularly suited for the analysis of mass spectrometry imaging (MSI) data in clinical applications, such as tumor typing and classification, which are among the most challenging tasks in pathology. Thus, we investigate algorithms for extracting tumor-specific spectral patterns from MSI data by NMF methods.

Results

In this article, we incorporate a priori class labels into the NMF cost functional by adding appropriate supervised penalty terms. Numerical experiments on a MALDI imaging dataset confirm that the novel supervised NMF methods lead to significantly better classification accuracy and stability as compared with other standard approaches.

Availability and implementaton

https://gitlab.informatik.uni-bremen.de/digipath/Supervised_NMF_Methods_for_MALDI.git.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +32607463,"Mixed-Methods Systematic Review of Behavioral Interventions in Low- and Middle-Income Countries to Increase Family Support for Maternal, Infant, and Young Child Nutrition during the First 1000 Days.","Fathers, grandmothers, and other family members' influence on maternal, infant, and young child nutrition (MIYCN) is widely recognized, yet synthesis of the effectiveness of engaging them to improve nutrition practices during the first 1000 d is lacking. We examined the impact of behavioral interventions to engage family members in MIYCN in low- and middle-income countries through a mixed-methods systematic review. We screened 5733 abstracts and included 35 peer-reviewed articles on 25 studies (16 with quantitative and 13 with qualitative data). Most quantitative studies focused on early breastfeeding, primarily engaging fathers or, less often, grandmothers. Most found positive impacts on exclusive breastfeeding rates and family members' knowledge and support. The few quantitative studies on complementary feeding, maternal nutrition, and multiple outcomes also suggested benefits. Qualitative themes included improved nutrition behaviors, enhanced relationships, and challenges due to social norms. Interventions engaging family members can increase awareness and build support for MIYCN, but more rigorous study designs are needed. This systematic review is registered at PROSPERO as CRD42018090273, https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=90273.",2020-05-21 +26026752,A standard bacterial isolate set for research on contemporary dairy spoilage.,"Food spoilage is an ongoing issue that could be dealt with more efficiently if some standardization and unification was introduced in this field of research. For example, research and development efforts to understand and reduce food spoilage can greatly be enhanced through availability and use of standardized isolate sets. To address this critical issue, we have assembled a standard isolate set of dairy spoilers and other selected nonpathogenic organisms frequently associated with dairy products. This publicly available bacterial set consists of (1) 35 gram-positive isolates including 9 Bacillus and 15 Paenibacillus isolates and (2) 16 gram-negative isolates including 4 Pseudomonas and 8 coliform isolates. The set includes isolates obtained from samples of pasteurized milk (n=43), pasteurized chocolate milk (n=1), raw milk (n=1), cheese (n=2), as well as isolates obtained from samples obtained from dairy-powder production (n=4). Analysis of growth characteristics in skim milk broth identified 16 gram-positive and 13 gram-negative isolates as psychrotolerant. Additional phenotypic characterization of isolates included testing for activity of β-galactosidase and lipolytic and proteolytic enzymes. All groups of isolates included in the isolate set exhibited diversity in growth and enzyme activity. Source data for all isolates in this isolate set are publicly available in the FoodMicrobeTracker database (http://www.foodmicrobetracker.com), which allows for continuous updating of information and advancement of knowledge on dairy-spoilage representatives included in this isolate set. This isolate set along with publicly available isolate data provide a unique resource that will help advance knowledge of dairy-spoilage organisms as well as aid industry in development and validation of new control strategies.",2015-05-28 +29140525,"rSNPBase 3.0: an updated database of SNP-related regulatory elements, element-gene pairs and SNP-based gene regulatory networks.","Here, we present the updated rSNPBase 3.0 database (http://rsnp3.psych.ac.cn), which provides human SNP-related regulatory elements, element-gene pairs and SNP-based regulatory networks. This database is the updated version of the SNP regulatory annotation database rSNPBase and rVarBase. In comparison to the last two versions, there are both structural and data adjustments in rSNPBase 3.0: (i) The most significant new feature is the expansion of analysis scope from SNP-related regulatory elements to include regulatory element-target gene pairs (E-G pairs), therefore it can provide SNP-based gene regulatory networks. (ii) Web function was modified according to data content and a new network search module is provided in the rSNPBase 3.0 in addition to the previous regulatory SNP (rSNP) search module. The two search modules support data query for detailed information (related-elements, element-gene pairs, and other extended annotations) on specific SNPs and SNP-related graphic networks constructed by interacting transcription factors (TFs), miRNAs and genes. (3) The type of regulatory elements was modified and enriched. To our best knowledge, the updated rSNPBase 3.0 is the first data tool supports SNP functional analysis from a regulatory network prospective, it will provide both a comprehensive understanding and concrete guidance for SNP-related regulatory studies.",2018-01-01 +29077937,PICKLES: the database of pooled in-vitro CRISPR knockout library essentiality screens.,"The adaptation of CRISPR/Cas9 systems for pooled library genetic knockout screens in mammalian cells has substantially advanced the state of the art in human functional genomics. Screening panels of cell lines for genes whose knockout imposes a significant fitness defect has dramatically expanded our catalog of high-confidence essential genes, and has already proven useful in identifying tumor-specific essential genes for the development of targeted therapies. However, nonexperts currently lack an easy to use way to access this data and to identify whether their genes of interest are essential across different genetic backgrounds. The volume of screening data is expected to grow massively, making the problem more intractable. Here we describe PICKLES, the database of Pooled In vitro CRISPR Knockout Library Essentiality Screens, where end users can display and download raw or normalized essentiality profiles for more that 18 000 protein-coding genes across more than 50 cell lines. An additional data set with 15,000 genes targeted by pooled library shRNA in over 100 cell lines is also included. Researchers can see at a glance the relative fitness defect and tissue specificity of their genes of interest, generate and save figures locally, and download all raw data. The database is available at http://pickles.hart-lab.org.",2018-01-01 +23337681,Medical mentoring via the evolving world wide web.,"

Objectives

Mentoring, for physicians and surgeons in training, is advocated as an essential adjunct in work-based learning, providing support in career and non-career related issues. The World Wide Web (WWW) has evolved, as a technology, to become more interactive and person centric, tailoring itself to the individual needs of the user. This changing technology may open new avenues to foster mentoring in medicine. DESIGN, SYSTEMATIC REVIEW, MAIN OUTCOME MEASURES: A search of the MEDLINE database from 1950 to 2012 using the PubMed interface, combined with manual cross-referencing was performed using the following strategy: (""mentors""[MeSH Terms] OR ""mentors""[All Fields] OR ""mentor""[All Fields]) AND (""internet""[MeSH Terms] OR ""internet""[All Fields]) AND (""medicine""[MeSH Terms] OR ""medicine""[All Fields]) AND (""humans""[MeSH Terms] AND English[lang]). Abstracts were screened for relevance (UJ) to the topic; eligibility for inclusion was simply on screening for relevance to online mentoring and web-based technologies.

Results

Forty-five papers were found, of which 16 were relevant. All studies were observational in nature. To date, all medical mentoring applications utilizing the World Wide Web have enjoyed some success limited by Web 1.0 and 2.0 technologies.

Conclusions

With the evolution of the WWW through 1.0, 2.0 and 3.0 generations, the potential for meaningful tele- and distance mentoring has greatly improved. Some engagement has been made with these technological advancements, however further work is required to fully realize the potential of these technologies.",2012-10-27 +30639381,Cardiovascular research in France: Evolution of scientific activities and production over the last decade.,"

Background

Cardiovascular disease (CVD) is a major cause of death worldwide, and fruitful research is needed for future advances in this field.

Aims

To analyse the scientific production and vitality of French cardiovascular clinical research, and its evolution over the last decade.

Methods

We first used Lab Times online data obtained through the Web of Science (Thomson-Reuters, Toronto, ON, Canada), then the PubMed database (National Center for Biotechnology Information [NCBI], Bethesda, MD, USA), for studies published between 2005 and 2015 in the multidisciplinary and cardiology journals with the highest impact factors. French abstracts submitted and accepted at the European Society of Cardiology (ESC) congress were provided directly by the ESC. The number of cardiovascular projects was analysed through the http://www.ClinicalTrials.gov database and the French site for government-funded projects, over the decade from 2008 to 2017.

Results

Overall, France was ranked fifth in Europe and eighth worldwide for CVD publications. During the 10-year period from 2005 to 2015, French publications accounted for 0.2-0.3% of articles in top multidisciplinary journals and 2% of articles in top cardiology journals. We observed a steady decrease in French abstract submissions at the ESC congress (from 5% to 3.5% in 10 years), and in 2017, France was ranked eighth in Europe. Across European countries, France has been ranked first for declared cardiovascular research on http://www.ClinicalTrials.gov over the last 3 years, for both interventional and observational studies. Regarding the Hospital Programme of Clinical Research, heart ranked second after neurosciences.

Conclusions

France is very well represented in terms of new CVD projects, but actual French scientific production scores poorly. Investing in CVD research is a priority to increase the level of publication and to compete with other leading countries.",2019-01-11 +33026256,Effects of PCB126 on Adipose-to-Muscle Communication in an in Vitro Model.,"

Background

Exposure to coplanar polychlorinated biphenyls (PCBs) is linked to the development of insulin resistance. Previous studies suggested PCB126 alters muscle mitochondrial function through an indirect mechanism. Given that PCBs are stored in fat, we hypothesized that PCB126 alters adipokine secretion, which in turn affects muscle metabolism.

Objectives

We determined a) the impacts of PCB126 exposure on adipocyte cytokine/adipokine secretion in vitro; b) whether adipocyte-derived factors alter glucose metabolism and mitochondrial function in myotubes when exposed to PCB126; and c) whether preestablished insulin resistance alters the metabolic responses of adipocytes exposed to PCB126 and the communication between adipocytes and myotubes.

Methods

3T3-L1 adipocytes were exposed to PCB126 (1-100 nM) in two insulin sensitivity conditions [insulin sensitive (IS) and insulin resistant (IR) adipocytes], followed by the measurement of secreted adipokines, mitochondrial function, and insulin-stimulated glucose uptake. Communication between adipocytes and myotubes was reproduced by exposing C2C12 myotubes or mouse primary myotubes to conditioned medium (CM) derived from IS or IR 3T3-L1 adipocytes exposed to PCB126. Mitochondrial function and insulin-stimulated glucose uptake were then determined in myotubes.

Results

IR 3T3-L1 adipocytes treated with PCB126 had significantly higher adipokine (adiponectin, IL-6, MCP-1, TNF-α) secretion and lower mitochondrial function, glucose uptake, and glycolysis. However, PCB126 did not significantly alter these parameters in IS adipocytes. Altered energy metabolism in IR 3T3-L1 adipocytes was linked to lower phosphorylation of AMP-activated protein kinase (p-AMPK) and higher superoxide dismutase 2 levels, an enzyme involved in reactive oxygen species detoxification. Myotubes exposed to the CM from PCB126-treated IR adipocytes had lower glucose uptake, with no alteration in glycolysis or mitochondrial function. Interestingly, p-AMPK levels were higher in myotubes exposed to the CM of PCB126-treated IR adipocytes.

Discussion

Taken together, these data suggest that increased adipokine secretion from IR adipocytes exposed to PCB126 might explain impaired glucose uptake in myotubes. https://doi.org/10.1289/EHP7058.",2020-10-07 +32913910,Risk prediction in cutaneous melanoma patients from their clinico-pathological features: superiority of clinical data over gene expression data.,"Risk assessment in cutaneous melanoma (CM) patients is one of the major challenges in the effective treatment of CM patients. Traditionally, clinico-pathological features such as Breslow thickness, American Joint Committee on Cancer (AJCC) tumor staging, etc. are utilized for this purpose. However, due to advancements in technology, most of the upcoming risk prediction methods are gene-expression profile (GEP) based. In this study, we have tried to develop new GEP and clinico-pathological features-based biomarkers and assessed their prognostic strength in contrast to existing prognostic methods. We developed risk prediction models using the expression of the genes associated with different cancer-related pathways and got a maximum hazard ratio (HR) of 2.52 with p-value ~10-8 for the apoptotic pathway. Another model, based on combination of apoptotic and notch pathway genes boosted the HR to 2.57. Next, we developed models based on individual clinical features and got a maximum HR of 2.45 with p-value ~10-6 for Breslow thickness. We also developed models using the best features of clinical as well as gene-expression data and obtained a maximum HR of 3.19 with p-value ~10-9. Finally, we developed a new ensemble method using clinical variables only and got a maximum HR of 6.40 with p-value ~10-15. Based on this method, a web-based service and an android application named 'CMcrpred' is available at (https://webs.iiitd.edu.in/raghava/cmcrpred/) and Google Play Store respectively to facilitate scientific community. This study reveals that our new ensemble method based on only clinico-pathological features overperforms methods based on GEP based profiles as well as currently used AJCC staging. It also highlights the need to explore the full potential of clinical variables for prognostication of cancer patients.",2020-08-29 +29112749,PGG.Population: a database for understanding the genomic diversity and genetic ancestry of human populations.,"There are a growing number of studies focusing on delineating genetic variations that are associated with complex human traits and diseases due to recent advances in next-generation sequencing technologies. However, identifying and prioritizing disease-associated causal variants relies on understanding the distribution of genetic variations within and among populations. The PGG.Population database documents 7122 genomes representing 356 global populations from 107 countries and provides essential information for researchers to understand human genomic diversity and genetic ancestry. These data and information can facilitate the design of research studies and the interpretation of results of both evolutionary and medical studies involving human populations. The database is carefully maintained and constantly updated when new data are available. We included miscellaneous functions and a user-friendly graphical interface for visualization of genomic diversity, population relationships (genetic affinity), ancestral makeup, footprints of natural selection, and population history etc. Moreover, PGG.Population provides a useful feature for users to analyze data and visualize results in a dynamic style via online illustration. The long-term ambition of the PGG.Population, together with the joint efforts from other researchers who contribute their data to our database, is to create a comprehensive depository of geographic and ethnic variation of human genome, as well as a platform bringing influence on future practitioners of medicine and clinical investigators. PGG.Population is available at https://www.pggpopulation.org.",2018-01-01 +32154836,iATC-FRAKEL: a simple multi-label web server for recognizing anatomical therapeutic chemical classes of drugs with their fingerprints only.,"MOTIVATION:Anatomical therapeutic chemical (ATC) classification system is very important for drug utilization and studies. Correct prediction of the 14 classes in the first level for given drugs is an essential problem for the study on such system. Several multi-label classifiers have been proposed in this regard. However, only two of them provided the web servers and their performance was not very high. On the other hand, although some rest classifiers can provide better performance, they were built based on some prior knowledge on drugs, such as information of chemical-chemical interaction and chemical ontology, leading to limited applications. Furthermore, provided codes of these classifiers are almost inaccessible for pharmacologists. RESULTS:In this study, we built a simple web server, namely iATC-FRAKEL. This web server only required the SMILES format of drugs as input and extracted their fingerprints for making prediction. The performance of the iATC-FRAKEL was much higher than all existing web servers and was comparable to the best multi-label classifier but had much wider applications. Such web server can be visited at http://cie.shmtu.edu.cn/iatc/index. AVAILABILITY AND IMPLEMENTATION:The web server is available at http://cie.shmtu.edu.cn/iatc/index. CONTACT:chen_lei1@163.com. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-06-01 +29161430,MethBank 3.0: a database of DNA methylomes across a variety of species.,"MethBank (http://bigd.big.ac.cn/methbank) is a database that integrates high-quality DNA methylomes across a variety of species and provides an interactive browser for visualization of methylation data. Here, we present an updated implementation of MethBank (version 3.0) by incorporating more DNA methylomes from multiple species and equipping with more enhanced functionalities for data annotation and more friendly web interfaces for data presentation, search and visualization. MethBank 3.0 features large-scale integration of high-quality methylomes, involving 34 consensus reference methylomes derived from a large number of human samples, 336 single-base resolution methylomes from different developmental stages and/or tissues of five plants, and 18 single-base resolution methylomes from gametes and early embryos at multiple stages of two animals. Additionally, it is enhanced by improving the functionalities for data annotation, which accordingly enables systematic identification of methylation sites closely associated with age, sites with constant methylation levels across different ages, differentially methylated promoters, age-specific differentially methylated cytosines/regions, and methylated CpG islands. Moreover, MethBank provides tools to estimate human methylation age online and to identify differentially methylated promoters, respectively. Taken together, MethBank is upgraded with significant improvements and advances over the previous version, which is of great help for deciphering DNA methylation regulatory mechanisms for epigenetic studies.",2018-01-01 +29126305,Membranome 2.0: database for proteome-wide profiling of bitopic proteins and their dimers.,"

Motivation

Structural studies of TM domains of single-spanning (bitopic) membrane proteins are impeded by their instability, flexibility and heterogeneity. The new computational method TMDOCK allows reliable modeling of homodimers of transmembrane (TM) α-helices on a proteomic scale.

Results

3D models of 2129 parallel homodimers formed by TM α-helices of bitopic proteins from six evolutionarily distant organisms were modeled by TMDOCK, verified through experimental data available for nearly 600 proteins, and included in the Membranome database (v.2.0) along with related information to facilitate structural and evolutionary analysis of bitopic proteins.

Availability and implementation

http://membranome.org.

Contact

almz@umich.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +,ASSESSING THE ABILITY OF SOCIAL INNOVATIONS TO INCREASE HEALTHY LIFE YEARS,"Abstract The European Union has adopted Healthy Life Years (HLY), based on the Global Activity Limitation Indicator, as its preferred health expectancy for comparing countries and defining health targets. One such target is that of the European Innovation Partnership on Healthy and Active Ageing to increase HLY by 2 years between 2010 and 2020. We assessed the ability of the exemplar social innovations (SIs) identified on the database, to increase HLY. We searched for evaluations of the 157 exemplar SIs and categorised their outcome measures into a five point typology graded A (direct link to HLY) to E (no evaluation found). Only 2 Sis were graded A, and it was impossible to assess the impact on HLY for 90% (n=142) which were graded D or E. We also developed a ‘HLY Modeller’ in Microsoft Excel for users to explore the potential impact of a SI on population health (http://www.innovage.group.shef.ac.uk/healthy-life-years.html).",2017-06-30 +32117874,ChEMBL-Likeness Score and Database GDBChEMBL.,"The generated database GDB17 enumerates 166.4 billion molecules up to 17 atoms of C, N, O, S and halogens following simple rules of chemical stability and synthetic feasibility. However, most molecules in GDB17 are too complex to be considered for chemical synthesis. To address this limitation, we report GDBChEMBL as a subset of GDB17 featuring 10 million molecules selected according to a ChEMBL-likeness score (CLscore) calculated from the frequency of occurrence of circular substructures in ChEMBL, followed by uniform sampling across molecular size, stereocenters and heteroatoms. Compared to the previously reported subsets FDB17 and GDBMedChem selected from GDB17 by fragment-likeness, respectively, medicinal chemistry criteria, our new subset features molecules with higher synthetic accessibility and possibly bioactivity yet retains a broad and continuous coverage of chemical space typical of the entire GDB17. GDBChEMBL is accessible at http://gdb.unibe.ch for download and for browsing using an interactive chemical space map at http://faerun.gdb.tools.",2020-02-04 +31774292,Extended Human G-Protein Coupled Receptor Network: Cell-Type-Specific Analysis of G-Protein Coupled Receptor Signaling Pathways.,"G-protein coupled receptors (GPCRs) mediate crucial physiological functions in humans, have been implicated in an array of diseases, and are therefore prime drug targets. GPCRs signal via a multitude of pathways, mainly through G-proteins and β-arrestins, to regulate effectors responsible for cellular responses. The limited number of transducers results in different GPCRs exerting control on the same pathway, while the availability of signaling proteins in a cell defines the result of GPCR activation. The aim of this study was to construct the extended human GPCR network (hGPCRnet) and examine the effect that cell-type specificity has on GPCR signaling pathways. To achieve this, protein-protein interaction data between GPCRs, G-protein coupled receptor kinases (GRKs), Gα subunits, β-arrestins, and effectors were combined with protein expression data in cell types. This resulted in the hGPCRnet, a very large interconnected network, and similar cell-type-specific networks in which, distinct GPCR signaling pathways were formed. Finally, a user friendly web application, hGPCRnet ( http://bioinformatics.biol.uoa.gr/hGPCRnet ), was created to allow for the visualization and exploration of these networks and of GPCR signaling pathways. This work, and the resulting application, can be useful in further studies of GPCR function and pharmacology.",2019-12-12 +,"ClonEstiMate, a Bayesian method for quantifying rates of clonality of populations genotyped at two‐time steps","Partial clonality is commonly used in eukaryotes and has large consequences for their evolution and ecology. Assessing accurately the relative importance of clonal vs. sexual reproduction matters for studying and managing such species. Here, we proposed a Bayesian approach, ClonEstiMate, to infer rates of clonality c from populations sampled twice over a short time interval, ideally one generation time. The method relies on the likelihood of the transitions between genotype frequencies of ancestral and descendent populations, using an extended Wright–Fisher model explicitly integrating reproductive modes. Our model provides posterior probability distribution of inferred c, given the assumed rates of mutation, as well as inbreeding and selfing when occurring. Tested under various conditions, this model provided accurate inferences of c, especially when the amount of information was modest, that is low sample sizes, few loci, low polymorphism and strong linkage disequilibrium. Inferences remained robust when mutation models and rates were misinformed. However, the method was sensitive to moderate frequencies of null alleles and when the time interval between required samplings exceeding two generations. Misinformed rates on mating modes (inbreeding and selfing) also resulted in biased inferences. Our method was tested on eleven data sets covering five partially clonal species, for which the extent of clonality was formerly deciphered. It delivered highly consistent results with previous information on the biology of those species. ClonEstiMate represents a powerful tool for detecting and inferring clonality in finite populations, genotyped with SNPs or microsatellites. It is freely available at https://www6.rennes.inra.fr/igepp_eng/Productions/Software.",2017-11-01 +31844516,Diet-based assortative mating through sexual imprinting.,"Speciation is facilitated by ""magic traits,"" where divergent natural selection on such traits also results in assortative mating. In animal populations, diet has the potential to act as a magic trait if populations diverge in consumed food that incidentally affects mating and therefore sexual isolation. While diet-based assortative mating has been observed in the laboratory and in natural populations, the mechanisms causing positive diet-based assortment remain largely unknown. Here, we experimentally created divergent diets in a sexually imprinting species of mouse, Peromyscus gossypinus (the cotton mouse), to test the hypothesis that sexual imprinting on diet could be a mechanism that generates rapid and significant sexual isolation. We provided breeding pairs with novel garlic- or orange-flavored water and assessed whether their offspring, exposed to these flavors in utero and in the nest before weaning, later preferred mates that consumed the same flavored water as their parents. While males showed no preference, females preferred males of their parental diet, which is predicted to yield moderate sexual isolation. Thus, our experiment demonstrates the potential for sexual imprinting on dietary cues learned in utero and/or postnatally to facilitate reproductive isolation and potentially speciation.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.n1qq6v3.",2019-09-30 +22139920,SitEx: a computer system for analysis of projections of protein functional sites on eukaryotic genes.,"Search of interrelationships between the structural-functional protein organization and exon structure of encoding gene provides insights into issues concerned with the function, origin and evolution of genes and proteins. The functions of proteins and their domains are defined mostly by functional sites. The relation of the exon-intron structure of the gene to the protein functional sites has been little studied. Development of resources containing data on projections of protein functional sites on eukaryotic genes is needed. We have developed SitEx, a database that contains information on functional site amino acid positions in the exon structure of encoding gene. SitEx is integrated with the BLAST and 3DExonScan programs. BLAST is used for searching sequence similarity between the query protein and polypeptides encoded by single exons stored in SitEx. The 3DExonScan program is used for searching for structural similarity of the given protein with these polypeptides using superimpositions. The developed computer system allows users to analyze the coding features of functional sites by taking into account the exon structure of the gene, to detect the exons involved in shuffling in protein evolution, also to design protein-engineering experiments. SitEx is accessible at http://www-bionet.sscc.ru/sitex/. Currently, it contains information about 9994 functional sites presented in 2021 proteins described in proteomes of 17 organisms.",2011-12-01 +31617784,A new diketopiperazine from an endophytic fungus Aspergillus aculeatus F027.,"A new diketopiperazine cyclo-(L-Phe-N-ethyl-L-Glu) (1), along with two known diketopiperazines cyclo-(L-Pro-L-Leu) (2) and cyclo-(L-Pro-L-Phe) (3) were isolated from the cultures of an endophytic fungus Aspergillus aculeatus F027. The structures of these compounds were elucidated based on spectroscopic data. The configurations of these compounds were determined by advanced Marfey's analysis. Antibacterial activity of the diketopiperazines against Staphylococcus aureus, Escherichia coli and Pseudomonas aeruginosa were also evaluated.Supplemental data for this article can be accessed at https://doi.org/10.1080/14786419.2019.1677652.",2019-10-16 +32078592,"Characteristics and Health Status of Informal Unpaid Caregivers - 44 States, District of Columbia, and Puerto Rico, 2015-2017.","In 2015, an estimated 17.7 million U.S. persons were informal caregivers who provided substantial services through in-home, unpaid assistance to their family members and friends (1). Caregiving can have many benefits, such as enhancing the bond between caregiver and recipient, but it can also place an emotional and physical strain on caregivers, leading to higher rates of depression, lower quality of life, and poorer overall health (2). As the U.S. population continues to age (3), the need for informal caregivers will likely increase. However, little nationally representative information on prevalence of caregivers is available. This study examined demographic characteristics and health status of informal caregivers from 44 states,* the District of Columbia (DC), and Puerto Rico, based on data from the Behavioral Risk Factor Surveillance System (BRFSS) collected during 2015-2017. Overall, approximately one in five adults reported that they had provided care to a family member or friend in the preceding 30 days. Fifty-eight percent of caregivers were women, and a majority were non-Hispanic white, with at least some college education, and married or living with a partner. Across all states, 19.2% of caregivers reported being in fair or poor health, although significant state-to-state variation occurred. Caregivers provide important support to family members, friends, and the health care system and might compromise their own health to provide this support (1,2). Better understanding of caregivers and the challenges they face could inform implementation of improvements in support systems that could enhance not only the health of the caregiver, but that of the care recipient as well. For example, additional data regarding demographics at the state level might aid in more effective planning and support of caregivers with evidence-based programs and assistance (https://www.cdc.gov/aging/publications/features/caring-for-yourself.html).",2020-02-21 +31034053,"BEAGLE 3: Improved Performance, Scaling, and Usability for a High-Performance Computing Library for Statistical Phylogenetics.","BEAGLE is a high-performance likelihood-calculation library for phylogenetic inference. The BEAGLE library defines a simple, but flexible, application programming interface (API), and includes a collection of efficient implementations for calculation under a variety of evolutionary models on different hardware devices. The library has been integrated into recent versions of popular phylogenetics software packages including BEAST and MrBayes and has been widely used across a diverse range of evolutionary studies. Here, we present BEAGLE 3 with new parallel implementations, increased performance for challenging data sets, improved scalability, and better usability. We have added new OpenCL and central processing unit-threaded implementations to the library, allowing the effective utilization of a wider range of modern hardware. Further, we have extended the API and library to support concurrent computation of independent partial likelihood arrays, for increased performance of nucleotide-model analyses with greater flexibility of data partitioning. For better scalability and usability, we have improved how phylogenetic software packages use BEAGLE in multi-GPU (graphics processing unit) and cluster environments, and introduced an automated method to select the fastest device given the data set, evolutionary model, and hardware. For application developers who wish to integrate the library, we also have developed an online tutorial. To evaluate the effect of the improvements, we ran a variety of benchmarks on state-of-the-art hardware. For a partitioned exemplar analysis, we observe run-time performance improvements as high as 5.9-fold over our previous GPU implementation. BEAGLE 3 is free, open-source software licensed under the Lesser GPL and available at https://beagle-dev.github.io.",2019-11-01 +29120328,Neuropathological and transcriptomic characteristics of the aged brain. ,"As more people live longer, age-related neurodegenerative diseases are an increasingly important societal health issue. Treatments targeting specific pathologies such as amyloid beta in Alzheimer's disease (AD) have not led to effective treatments, and there is increasing evidence of a disconnect between traditional pathology and cognitive abilities with advancing age, indicative of individual variation in resilience to pathology. Here, we generated a comprehensive neuropathological, molecular, and transcriptomic characterization of hippocampus and two regions cortex in 107 aged donors (median = 90) from the Adult Changes in Thought (ACT) study as a freely-available resource (http://aging.brain-map.org/). We confirm established associations between AD pathology and dementia, albeit with increased, presumably aging-related variability, and identify sets of co-expressed genes correlated with pathological tau and inflammation markers. Finally, we demonstrate a relationship between dementia and RNA quality, and find common gene signatures, highlighting the importance of properly controlling for RNA quality when studying dementia.",2017-11-09 +24980131,Plant rDNA database: update and new features. ,"The Plant rDNA database (www.plantrdnadatabase.com) is an open access online resource providing detailed information on numbers, structures and positions of 5S and 18S-5.8S-26S (35S) ribosomal DNA loci. The data have been obtained from >600 publications on plant molecular cytogenetics, mostly based on fluorescent in situ hybridization (FISH). This edition of the database contains information on 1609 species derived from 2839 records, which means an expansion of 55.76 and 94.45%, respectively. It holds the data for angiosperms, gymnosperms, bryophytes and pteridophytes available as of June 2013. Information from publications reporting data for a single rDNA (either 5S or 35S alone) and annotation regarding transcriptional activity of 35S loci now appears in the database. Preliminary analyses suggest greater variability in the number of rDNA loci in gymnosperms than in angiosperms. New applications provide ideograms of the species showing the positions of rDNA loci as well as a visual representation of their genome sizes. We have also introduced other features to boost the usability of the Web interface, such as an application for convenient data export and a new section with rDNA-FISH-related information (mostly detailing protocols and reagents). In addition, we upgraded and/or proofread tabs and links and modified the website for a more dynamic appearance. This manuscript provides a synopsis of these changes and developments. http://www.plantrdnadatabase.com.",2014-06-30 +29203139,Structural Mapping of Adenosine Receptor Mutations: Ligand Binding and Signaling Mechanisms.,"The four adenosine receptors (ARs), A1, A2A, A2B, and A3, constitute a subfamily of G protein-coupled receptors (GPCRs) with exceptional foundations for structure-based ligand design. The vast amount of mutagenesis data, accumulated in the literature since the 1990s, has been recently supplemented with structural information, currently consisting of several inactive and active structures of the A2A and inactive conformations of the A1 ARs. We provide the first integrated view of the pharmacological, biochemical, and structural data available for this receptor family, by mapping onto the relevant crystal structures all site-directed mutagenesis data, curated and deposited at the GPCR database (available through http://www.gpcrdb.org). This analysis provides novel insights into ligand binding, allosteric modulation, and signaling of the AR family.",2017-12-05 +32476802,Gamma-glutamyl transferase and cardiovascular risk in nonalcoholic fatty liver disease: The Gut and Obesity Asia initiative.,"

Background

Gamma-glutamyl transferase (GGT) is associated with the risk of cardiovascular disease (CVD) in the general population.

Aim

To identify the association of baseline GGT level and QRISK2 score among patients with biopsy-proven nonalcoholic fatty liver disease (NAFLD).

Methods

This was a retrospective study involving 1535 biopsy-proven NAFLD patients from 10 Asian centers in 8 countries using data collected by the Gut and Obesity in Asia (referred to as ""GO ASIA"") workgroup. All patients with available baseline GGT levels and all 16 variables for the QRISK2 calculation (QRISK2-2017; developed by researchers at the United Kingdom National Health Service; https://qrisk.org/2017/; 10-year cardiovascular risk estimation) were included and compared to healthy controls with the same age, sex, and ethnicity. Relative risk was reported. QRISK2 score > 10% was defined as the high-CVD-risk group. Fibrosis stages 3 and 4 (F3 and F4) were considered advanced fibrosis.

Results

A total of 1122 patients (73%) had complete data and were included in the final analysis; 314 (28%) had advanced fibrosis. The median age (interquartile range [IQR]) of the study population was 53 (44-60) years, 532 (47.4%) were females, and 492 (43.9%) were of Chinese ethnicity. The median 10-year CVD risk (IQR) was 5.9% (2.6-10.9), and the median relative risk of CVD over 10 years (IQR) was 1.65 (1.13-2.2) compared to healthy individuals with the same age, sex, and ethnicity. The high-CVD-risk group was significantly older than the low-risk group (median [IQR]: 63 [59-67] vs 49 [41-55] years; P < 0.001). Higher fibrosis stages in biopsy-proven NAFLD patients brought a significantly higher CVD risk (P < 0.001). Median GGT level was not different between the two groups (GGT [U/L]: Median [IQR], high risk 60 [37-113] vs low risk 66 [38-103], P = 0.56). There was no correlation between baseline GGT level and 10-year CVD risk based on the QRISK2 score (r = 0.02).

Conclusion

The CVD risk of NAFLD patients is higher than that of healthy individuals. Baseline GGT level cannot predict CVD risk in NAFLD patients. However, advanced fibrosis is a predictor of a high CVD risk.",2020-05-01 +24951798,A controlled vocabulary for pathway entities and events. ,"Entities involved in pathways and the events they participate in require descriptive and unambiguous names that are often not available in the literature or elsewhere. Reactome is a manually curated open-source resource of human pathways. It is accessible via a website, available as downloads in standard reusable formats and via Representational State Transfer (REST)-ful and Simple Object Access Protocol (SOAP) application programming interfaces (APIs). We have devised a controlled vocabulary (CV) that creates concise, unambiguous and unique names for reactions (pathway events) and all the molecular entities they involve. The CV could be reapplied in any situation where names are used for pathway entities and events. Adoption of this CV would significantly improve naming consistency and readability, with consequent benefits for searching and data mining within and between databases. Database URL: http://www.reactome.org.",2014-06-20 +31405451,"KymoButler, a deep learning software for automated kymograph analysis. ","Kymographs are graphical representations of spatial position over time, which are often used in biology to visualise the motion of fluorescent particles, molecules, vesicles, or organelles moving along a predictable path. Although in kymographs tracks of individual particles are qualitatively easily distinguished, their automated quantitative analysis is much more challenging. Kymographs often exhibit low signal-to-noise-ratios (SNRs), and available tools that automate their analysis usually require manual supervision. Here we developed KymoButler, a Deep Learning-based software to automatically track dynamic processes in kymographs. We demonstrate that KymoButler performs as well as expert manual data analysis on kymographs with complex particle trajectories from a variety of different biological systems. The software was packaged in a web-based 'one-click' application for use by the wider scientific community (http://kymobutler.deepmirror.ai). Our approach significantly speeds up data analysis, avoids unconscious bias, and represents another step towards the widespread adaptation of Machine Learning techniques in biological data analysis.",2019-08-13 +31504183,EasyModel: user-friendly tool for building and analysis of simple mathematical models in systems biology.,"

Summary

EasyModel is a new user-friendly web application that contains ready-for-simulation versions of the BioModels Database, and allows for the intuitive creation of new models. Its main target audience is the experimental biologist and students of bioinformatics or systems biology without programming skills. Expert users can also benefit from it by implementing basic models quickly and downloading the code for further tailoring.

Availability and implementation

Freely available on the web at https://easymodel.udl.cat. Implementation is described in its own section.",2020-02-01 +31971437,"""Beyond the thin ideal: Development and validation of the Fit Ideal Internalization Test (FIIT) for women"": Correction to Uhlmann et al. (2019).","Reports an error in ""Beyond the thin ideal: Development and validation of the Fit Ideal Internalization Test (FIIT) for women"" by Laura R. Uhlmann, Caroline L. Donovan and Melanie J. Zimmer-Gembeck (Psychological Assessment, Advanced Online Publication, Sep 19, 2019, np). In the article, there are two errors in the Method section for Study 2. First, in the ""Body dissatisfaction"" subsection, the range of total scores for the Body-Image Ideals Questionnaire was incorrectly listed as being ""between 0 and 99."" The correct range is from - 3 to 9. Second, in the ""Dieting and bulimia"" subsection, the reference for the Eating Attitudes Test (EAT-26) was incorrectly cited as ""Garner et al., 1983."" Garner, D. M., Olmsted, M. P., Bohr, Y., & Garfinkel, P. E. (1982). The Eating Attitudes Test: Psychometric features and clinical correlates. Psychological Medicine, 12, 871-878. http://dx.doi .org/10.1017/s0033291700049163. (The following abstract of the original article appeared in record 2019-55793-001.) Females are at risk for body image and eating disturbance when they internalize societally prescribed standards of Western beauty. With respect to messages to be thin or muscular, numerous scales are available that measure internalization. However, many women are now receiving messages about the desirability of being both thin and toned, yet no self-report measure of internalization of a fit female body ideal exists. Our aim was to develop a multidimensional tool (i.e., the Fit Ideal Internalization Test; FIIT) useful for assessing women's internalization of the fit ideal (i.e., a lean and toned body ideal). Three studies were conducted, recruiting independent groups of women attending university to complete surveys. In Study 1 (N = 300, age 16-51), women completed the FIIT items, and a 3-factor structure of fit idealization (8 items), fit overvaluation (8 items), and fit behavioral drive (4 items) was established through exploratory factor analysis. Also, items loading highly on each of the factors had good interitem correlations. In Study 2 (N = 354, age 16-63), women completed the 20-item FIIT and validation measures. The 3-factor structure of the FIIT was confirmed, and findings supported convergent, discriminant, and incremental validity of the FIIT subscale scores (and a total score). In Study 3 (N = 67, age 17-50), the 2-week test-retest reliability of the FIIT scores was high. Overall, the 3 FIIT subscales are related but also distinct domains of fit ideal internalization that conform to theory and may be used as individual subscales or potentially as a composite score. (PsycINFO Database Record (c) 2020 APA, all rights reserved).",2020-02-01 +31586211,An Online Calculator for the Prediction of Survival in Glioblastoma Patients Using Classical Statistics and Machine Learning.,"

Background

Although survival statistics in patients with glioblastoma multiforme (GBM) are well-defined at the group level, predicting individual patient survival remains challenging because of significant variation within strata.

Objective

To compare statistical and machine learning algorithms in their ability to predict survival in GBM patients and deploy the best performing model as an online survival calculator.

Methods

Patients undergoing an operation for a histopathologically confirmed GBM were extracted from the Surveillance Epidemiology and End Results (SEER) database (2005-2015) and split into a training and hold-out test set in an 80/20 ratio. Fifteen statistical and machine learning algorithms were trained based on 13 demographic, socioeconomic, clinical, and radiographic features to predict overall survival, 1-yr survival status, and compute personalized survival curves.

Results

In total, 20 821 patients met our inclusion criteria. The accelerated failure time model demonstrated superior performance in terms of discrimination (concordance index = 0.70), calibration, interpretability, predictive applicability, and computational efficiency compared to Cox proportional hazards regression and other machine learning algorithms. This model was deployed through a free, publicly available software interface (https://cnoc-bwh.shinyapps.io/gbmsurvivalpredictor/).

Conclusion

The development and deployment of survival prediction tools require a multimodal assessment rather than a single metric comparison. This study provides a framework for the development of prediction tools in cancer patients, as well as an online survival calculator for patients with GBM. Future efforts should improve the interpretability, predictive applicability, and computational efficiency of existing machine learning algorithms, increase the granularity of population-based registries, and externally validate the proposed prediction tool.",2020-02-01 +30904206,Feeding difficulties in young paediatric intensive care survivors: A scoping review.,"BACKGROUND:Although feeding difficulties are commonly described amongst children with chronic diseases, those admitted to a paediatric intensive care unit (PICU) represent a mix of previously healthy children as well as those with pre-existing diseases. There is, however, a lack of evidence describing the prevalence and type of feeding difficulties amongst healthy children who survive a period of critical illness and the subsequent impact on growth and family life. The aim of this work was to complete a scoping review of evidence describing feeding difficulties amongst PICU-survivors. METHOD:Six electronic databases were searched from January 2000-October 2018. NICE Healthcare Databases Advanced Search website (https://hdas.nice.org.uk/) was used as a tool to complete multiple searches within multiple databases, including the Cumulative Index to Nursing and Allied Health Literature (CINAHL), PsycInfo and Medline. Any studies considering feeding difficulties amongst previously healthy children following discharge from PICU or those which explored the parental/caregiver experiences were included. RESULTS:As the initial search yielded only one study which fulfilled the inclusion criteria, the criteria was extended to include studies relating to feeding difficulties (post-discharge) amongst otherwise healthy ex-preterm infants (born < 37 weeks gestational age) and infants/children with chronic diseases where feeding difficulties were described following a PICU admission. A review team screened and extracted the data of published qualitative and quantitative studies, using content analysis techniques. Of the 9622 articles identified from the searches, 22 full-text studies were reviewed with seven studies included. Four overarching categories represented the results: prevalence of feeding difficulties; risk factors and predictors for developing feeding difficulties; parental/carer experience and emotional response to feeding difficulties; and challenges in accessing feeding support. CONCLUSIONS:The results of this scoping review suggest there are gaps in the research, particularly those exploring the prevalence of feeding difficulties amongst previously healthy children and the negative impact this may have on family life. Future research should focus on addressing the extent of the problem and identifying risk factors, in addition to the potential development of toolkits for health care professionals to better support parents.",2019-02-16 +30669929,AutophagySMDB: a curated database of small molecules that modulate protein targets regulating autophagy.,"Macroautophagy/autophagy is a complex self-degradative mechanism responsible for clearance of non functional organelles and proteins. A range of factors influences the autophagic process, and disruptions in autophagy-related mechanisms lead to disease states, and further exacerbation of disease. Despite in-depth research into autophagy and its role in pathophysiological processes, the resources available to use it for therapeutic purposes are currently lacking. Herein we report the Autophagy Small Molecule Database (AutophagySMDB; http://www.autophagysmdb.org/ ) of small molecules and their cognate protein targets that modulate autophagy. Presently, AutophagySMDB enlists ~10,000 small molecules which regulate 71 target proteins. All entries are comprised of information such as EC50 (half maximal effective concentration), IC50 (half maximal inhibitory concentration), Kd (dissociation constant) and Ki (inhibition constant), IUPAC name, canonical SMILE, structure, molecular weight, QSAR (quantitative structure activity relationship) properties such as hydrogen donor and acceptor count, aromatic rings and XlogP. AutophagySMDB is an exhaustive, cross-platform, manually curated database, where either the cognate targets for small molecule or small molecules for a target can be searched. This database is provided with different search options including text search, advanced search and structure search. Various computational tools such as tree tool, cataloging tools, and clustering tools have also been implemented for advanced analysis. Data and the tools provided in this database helps to identify common or unique scaffolds for designing novel drugs or to improve the existing ones for autophagy small molecule therapeutics. The approach to multitarget drug discovery by identifying common scaffolds has been illustrated with experimental validation. Abbreviations: AMPK: AMP-activated protein kinase; ATG: autophagy related; AutophagySMDB: autophagy small molecule database; BCL2: BCL2, apoptosis regulator; BECN1: beclin 1; CAPN: calpain; MTOR: mechanistic target of rapamycin kinase; PPARG: peroxisome proliferator activated receptor gamma; SMILES: simplified molecular input line entry system; SQSTM1: sequestosome 1; STAT3: signal transducer and activator of transcription.",2019-02-03 +32982148,Performance of Glow Fixation GoCheck Kids and 2WIN Photoscreeners and Retinomax to Uncover Hyperopia.,"

Background

A low-detail, glowing fixation device was added to GoCheck Kids (GCK) photoscreener in the hope of unmasking hyperopia and amblyopia risk factors (ARF).

Methods

Pediatric eye patients were screened by GCK and 2WIN photoscreeners, and Retinomax autorefractor before being compared to AAPOS ARFs.

Results

Screening was attempted by 131 children who then had school bus accommodation-relaxing skiascopy (SBA-RS) before cycloplegic examination. By 2013 AAPOS uniform guidelines, sensitivity/specificity for GCK was 87%/68%, for 2WIN 87%/71% and for Retinomax 79%/68%. Detection of amblyopia had sensitivity/specificity by GCK of 78%/63%, for 2WIN 79%/65% and for Retinomax 77%/68%. Inconclusive screens were seven for GCK, six for 2WIN and 13 for Retinomax. Mean hyperopia for GCK (+2.49±0.74 D) was similar to cycloplegic refraction (+2.93±0.72 D) and SBA-RS (+2.80±0.82 D) while GCK was slightly more than Retinomax (+1.59±0.93 D, p=0.13) but significantly more than 2WIN (+1.02±0.49 D, p<0.01).

Conclusion

GCK, 2WIN and Retinomax had similar validity detecting uniform amblyopia risk factors and amblyopia itself. The nondetailed glow fixation device allowed GCK to uncover substantial hyperopia while the detailed flashing fixation devices on 2WIN and Retinomax seemed to stimulate accommodation in some hyperopic children.

Clinical trials registry

NCT04297969. Data Access: http://www.abcd-vision.org/references/GCK%20glow%202WIN%20deidentify.pdf.

Précis

A glow fixation device on a smart phone photoscreener allowed robust detection of hyperopia.",2020-08-10 +32271766,Phenogenon: Gene to phenotype associations for rare genetic diseases.,"As high-throughput sequencing is increasingly applied to the molecular diagnosis of rare Mendelian disorders, a large number of patients with diverse phenotypes have their genetic and phenotypic data pooled together to uncover new gene-phenotype relations. We introduce Phenogenon, a statistical tool that combines, Human Phenotype Ontology (HPO) annotated patient phenotypes, gnomAD allele population frequency, and Combined Annotation Dependent Depletion (CADD) score for variant pathogenicity, in order to jointly predict the mode of inheritance and gene-phenotype associations. We ran Phenogenon on our cohort of 3,290 patients who had undergone whole exome sequencing. Among the top associations, we recapitulated previously known, such as ""SRD5A3-Abnormal full-field electroretinogram-recessive"" and ""GRHL2 -Nail dystrophy-recessive"", and discovered one potentially novel, ""RRAGA-Abnormality of the skin-dominant"". We also developed an interactive web interface available at https://phenogenon.phenopolis.org to visualise and explore the results.",2020-04-09 +31355885,Development and Validation of a Dynamic Risk Prediction Model to Forecast Psychosis Onset in Patients at Clinical High Risk.,"The prediction of outcomes in patients at Clinical High Risk for Psychosis (CHR-P) almost exclusively relies on static data obtained at a single snapshot in time (ie, baseline data). Although the CHR-P symptoms are intrinsically evolving over time, available prediction models cannot be dynamically updated to reflect these changes. Hence, the aim of this study was to develop and internally validate a dynamic risk prediction model (joint model) and to implement this model in a user-friendly online risk calculator. Furthermore, we aimed to explore the prognostic performance of extended dynamic risk prediction models and to compare static with dynamic prediction. One hundred ninety-six CHR-P patients were recruited as part of the ""Basel Früherkennung von Psychosen"" (FePsy) study. Psychopathology and transition to psychosis was assessed at regular intervals for up to 5 years using the Brief Psychiatric Rating Scale-Expanded (BPRS-E). Various specifications of joint models were compared with regard to their cross-validated prognostic performance. We developed and internally validated a joint model that predicts psychosis onset from BPRS-E disorganization and years of education at baseline and BPRS-E positive symptoms during the follow-up with good prognostic performance. The model was implemented as online risk calculator (http://www.fepsy.ch/DPRP/). The use of extended joint models slightly increased the prognostic accuracy compared to basic joint models, and dynamic models showed a higher prognostic accuracy than static models. Our results confirm that extended joint modeling could improve the prediction of psychosis in CHR-P patients. We implemented the first online risk calculator that can dynamically update psychosis risk prediction.",2020-02-01 +29450704,"Gulf Arabic nouns and verbs: A standardized set of 319 object pictures and 141 action pictures, with predictors of naming latencies.","Standardized pictorial stimuli and predictors of successful picture naming are not readily available for Gulf Arabic. On the basis of data obtained from Qatari Arabic, a variety of Gulf Arabic, the present study provides norms for a set of 319 object pictures and a set of 141 action pictures. Norms were collected from healthy speakers, using a picture-naming paradigm and rating tasks. Norms for naming latencies, name agreement, visual complexity, image agreement, imageability, age of acquisition, and familiarity were established. Furthermore, the database includes other intrinsic factors, such as syllable length and phoneme length. It also includes orthographic frequency values (extracted from Aralex; Boudelaa & Marslen-Wilson, 2010). These factors were then examined for their impact on picture-naming latencies in object- and action-naming tasks. The analysis showed that the primary determinants of naming latencies in both nouns and verbs are (in descending order) image agreement, name agreement, familiarity, age of acquisition, and imageability. These results indicate no evidence that noun- and verb-naming processes in Gulf Arabic are influenced in different ways by these variables. This is the first database for Gulf Arabic, and therefore the norms collected from the present study will be of paramount importance for researchers and clinicians working with speakers of this variety of Arabic. Due to the similarity of the Arabic varieties spoken in the Gulf, these different varieties are grouped together under the label ""Gulf Arabic"" in the literature. The normative databases and the standardized pictures from this study can be downloaded from http://qufaculty.qu.edu.qa/tariq-khwaileh/download-center/ .",2018-12-01 +35372894,"Characteristics, Outcomes and 60-Day Hospital Mortality of ICU Patients with COVID-19 and Acute Kidney Injury.","

Background

AKI has been reported in patients with COVID-19 pneumonia and it is associated with higher mortality. The aim of our study is to describe characteristics, outcomes, and 60-day hospital mortality of patients with COVID-19 pneumonia and AKI in the intensive care unit (ICU).

Methods

We conducted a retrospective study in which all adult patients with confirmed COVID-19 who were admitted to ICUs of Montefiore Medical Center and developing AKI were included. The study period ranged from March 10 to April 11, 2020. The 60-day follow-up data through June 11, 2020 were obtained.

Results

Of 300 adults admitted to the ICUs with COVID-19 pneumonia, 224 patients (75%) presented with AKI or developed AKI subsequent to admission. A total of 218 (97%) patients required invasive mechanical ventilation for moderate to severe acute respiratory distress syndrome (ARDS). A total of 113 (50%) patients had AKI on day 1 of ICU admission. The peak AKI stages observed were stage 1 in 49 (22%), stage 2 in 35 (16%), and stage 3 in 140 (63%) patients, respectively. Among patients with AKI, 114 patients (51%) required RRT. The mortality rate of patients requiring RRT was 70%. Of the 34 patients who were survivors, 25 (74%) were able to be weaned off RRT completely before hospital discharge. Nonsurvivors were older and had significantly higher admission and peak creatinine levels, admission hemoglobin, and peak phosphate levels compared with survivors. The 60-day hospital mortality was 67%.

Conclusions

COVID-19 requiring ICU admission is associated with high incidence of severe AKI, necessitating RRT in approximately half of such patients. The majority of patients with COVID-19 and AKI in ICU developed moderate to severe ARDS, requiring invasive mechanical ventilation. Timing or severity of AKI did not affect outcomes. The 60-day hospital mortality is high (67%). Patients with AKI requiring RRT have high mortality, but survivors have good rates of RRT recovery.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/K360/2020_12_31_KID0004282020.mp3.",2020-10-02 +31312702,"Data for benchmarking low-cost, 3D printed prosthetic hands.","In this article, three different data sets are presented to evaluate a representative of openly accessible 3D printed prosthetic hand. The first data set includes grasping force measurements of human hand and low-cost 3D printed hand. Three grasping functions were evaluated, spherical, cylindrical, and precision grasps. The experimental test was performed using a wearable tactile sensor. The second data set includes the numerical analysis of prosthetic fingers made from Acrylonitrile Butadiene Styrene (ABS) and Polylactic Acid (PLA) materials under different carrying loads. The numerical analyses were carried out by LS-DYNA software. The files can be used for the prosthetic fingers' evaluation and for the selection of suitable material. The third data set includes the experimental tensile test of ABS and PLA materials. The mechanical properties were calculated from the results, which can be used in the design and fabrication of products from these materials. All the datasets are available from Harvard Dataverse: https://doi.org/10.7910/DVN/GCPAIL.",2019-06-22 +26950747,Tissue-specific regulatory circuits reveal variable modular perturbations across complex diseases.,"Mapping perturbed molecular circuits that underlie complex diseases remains a great challenge. We developed a comprehensive resource of 394 cell type- and tissue-specific gene regulatory networks for human, each specifying the genome-wide connectivity among transcription factors, enhancers, promoters and genes. Integration with 37 genome-wide association studies (GWASs) showed that disease-associated genetic variants--including variants that do not reach genome-wide significance--often perturb regulatory modules that are highly specific to disease-relevant cell types or tissues. Our resource opens the door to systematic analysis of regulatory programs across hundreds of human cell types and tissues (http://regulatorycircuits.org).",2016-03-07 +30057049,Cupping therapy for treating ankylosing spondylitis: The evidence from systematic review and meta-analysis.,"

Objective

Cupping therapy has been widely used in Eastern Asia, the Middle East, or Central and North Europe to manage the symptom of ankylosing spondylitis (AS). The aim of this systematic review was to review data from randomized controlled trials (RCTs) of cupping therapy for treating patients with AS.

Methods

Databases that were searched from their inception until December 2017 included: MEDLINE, CINAHL, EMBASE, AMED, Cochrane Central Register of Controlled Trials, four Chinese databases [Chinese BioMedical Database, China National Knowledge Infrastructure, Wan-Fang Data, and the Chinese WeiPu Database], KoreaMed, The Korean National Assembly Library, Japana Centra Revuo Medicina (http://www.jamas.gr.jp/) and CiNii. In this systematic review, only RCTs that were related to the effects of cupping therapy on managing AS were included. A quantitative synthesis of RCTs will be conducted using RevMan 5.3 software. Study selection, data extraction, and validation were performed independently by two reviewers. Quantitative analysis of RCTs were performed using RevMan 5.3 software, and cochrane criteria for risk-of-bias were used to assess the methodological quality of the trials.

Results

A total of 5 RCTs met the inclusion criteria, and most were of low methodological quality. Participants in cupping therapy plus Western medicine group showed significantly greater improvements in the response rate [RR = 1.13, 95%CI (1.06, 1.22), p < 0.01] with low heterogeneity (Chi2 = 2.88, p = 0.41, I2 = 0%). Moreover, when compared with western medicine alone, meta-analysis indicated favorable statistically significant effects of cupping therapy plus western medicine on the Bath Ankylosing Spondylitis Functional Index (BASFI) [MD = -16.63, 95%CI (-17.75, -15.51), p < 0.01] and Bath Ankylosing Disease Activity Index (BASDAI) [MD = -9.93, 95%CI (-10.34, -9.52), p < 0.01], with low heterogeneity (Chi2 = 0.32, p = 0.85, I2 = 0% in BASFI; (Chi2 = 2.46, p = 0.29, I2 = 19% in BASDAI). Furthermore, when compared with western medicine alone, meta-analysis demonstrated statistically significant effects of cupping therapy plus western medicine on the serum level of ESR [MD = -1.28, 95% CI (-1.44, -1.13), p < 0.01] and the serum level of CRP [MD = -3.97, 95%CI (-4.71, -3.22), p < 0.01], with low heterogeneity (Chi2 = 0.50, p = 0.78, I2 = 0% in the serum level of ESR; Chi2 = 0.19, p = 0.91, I2 = 0% in the serum level of CRP).

Conclusion

Taken together, only weak evidence supported the hypothesis that cupping therapy had potential benefits for patients with AS.",2018-07-06 +24952961,A high-resolution spatiotemporal atlas of gene expression of the developing mouse brain.,"To provide a temporal framework for the genoarchitecture of brain development, we generated in situ hybridization data for embryonic and postnatal mouse brain at seven developmental stages for ∼2,100 genes, which were processed with an automated informatics pipeline and manually annotated. This resource comprises 434,946 images, seven reference atlases, an ontogenetic ontology, and tools to explore coexpression of genes across neurodevelopment. Gene sets coinciding with developmental phenomena were identified. A temporal shift in the principles governing the molecular organization of the brain was detected, with transient neuromeric, plate-based organization of the brain present at E11.5 and E13.5. Finally, these data provided a transcription factor code that discriminates brain structures and identifies the developmental age of a tissue, providing a foundation for eventual genetic manipulation or tracking of specific brain structures over development. The resource is available as the Allen Developing Mouse Brain Atlas (http://developingmouse.brain-map.org).",2014-06-19 +32183790,Mining and visualizing high-order directional drug interaction effects using the FAERS database.,"

Background

Adverse drug events (ADEs) often occur as a result of drug-drug interactions (DDIs). The use of data mining for detecting effects of drug combinations on ADE has attracted growing attention and interest, however, most studies focused on analyzing pairwise DDIs. Recent efforts have been made to explore the directional relationships among high-dimensional drug combinations and have shown effectiveness on prediction of ADE risk. However, the existing approaches become inefficient from both computational and illustrative perspectives when considering more than three drugs.

Methods

We proposed an efficient approach to estimate the directional effects of high-order DDIs through frequent itemset mining, and further developed a novel visualization method to organize and present the high-order directional DDI effects involving more than three drugs in an interactive, concise and comprehensive manner. We demonstrated its performance by mining the directional DDIs associated with myopathy using a publicly available FAERS dataset.

Results

Directional effects of DDIs involving up to seven drugs were reported. Our analysis confirmed previously reported myopathy associated DDIs including interactions between fusidic acid with simvastatin and atorvastatin. Furthermore, we uncovered a number of novel DDIs leading to increased risk for myopathy, such as the co-administration of zoledronate with different types of drugs including antibiotics (ciprofloxacin, levofloxacin) and analgesics (acetaminophen, fentanyl, gabapentin, oxycodone). Finally, we visualized directional DDI findings via the proposed tool, which allows one to interactively select any drug combination as the baseline and zoom in/out to obtain both detailed and overall picture of interested drugs.

Conclusions

We developed a more efficient data mining strategy to identify high-order directional DDIs, and designed a scalable tool to visualize high-order DDI findings. The proposed method and tool have the potential to contribute to the drug interaction research and ultimately impact patient health care.

Availability and implementation

http://lishenlab.com/d3i/explorer.html.",2020-03-18 +30990728,Environmental Health Indicators for China: Data Resources for Chinese Environmental Public Health Tracking.,"Many developed countries use environmental public health tracking to gain a better understanding of the link between environmental hazards and public health. To respond to complicated environmental health issues, the National Institute of Environmental Health (NIEH), Chinese Center for Disease Control and Prevention (China CDC), has begun to build a Chinese Environmental Public Health Tracking (CEPHT) system. On behalf of the CEPHT, authors provide insight into the CEPHT's development, current status, and future plans. In the initial stage of CEPHT, an indicator framework linking environment and public health that included a list of publicly available data sources regarding environmental hazards, public health outcomes, and risk factors in China was developed. An analysis of data availability, along with a comparison between CEPHT's indicator system and other tracking networks, revealed the existence of barriers and gaps in data integration that affect China's ability to track environmental public health. The lack of access to data, combined with inadequate data quality, has led to difficulties linking environmental hazards to their effects on public health. Current CEPHT efforts will help integrate environmental factors and exposure data with public health outcomes. For the near future, CEPHT plans to focus on increasing collaboration among data tracking agencies, improving data quality, and expanding proper data sharing. https://doi.org/10.1289/EHP4319.",2019-04-01 +31995268,Lobule-Specific Dosage Considerations for Cerebellar Transcranial Direct Current Stimulation During Healthy Aging: A Computational Modeling Study Using Age-Specific Magnetic Resonance Imaging Templates.,"

Objective

Aging is associated with a decline in cognitive and motor performances, which are a part of geriatric syndromes. Since aging is associated with morphological changes in the cerebellum and cerebellar morphology is a good predictor of cognitive and motor performances, so the study of cerebellar role in age-related decline in performance is necessary. Cerebellar transcranial direct current stimulation (ctDCS) has been proposed to study and facilitate the cerebellar function. However, lobule-specific dosing has not been investigated in healthy aging. This is important because the same electrode montage across different individuals for ctDCS (called the ""one-size-fits-all"" approach) can lead to inter-individual differences in the lobule-specific dosing of the electric field (EF). These differences can be due to the inter-individual variability and age-related changes in the cerebellar structure. To investigate such lobule-specific dosing differences in healthy aging, we modeled the lobular EF distribution across groups of 18 to 89 years for a commonly used ""one-size-fits-all"" ctDCS montage.

Materials and methods

A fully automated open-source pipeline performed age-group specific computational modeling of EF using 18 age-appropriate human brain magnetic resonance imaging (MRI) templates. The 18 age-appropriate human brain MRI templates were obtained from a database found online at https://jerlab.sc.edu/projects/neurodevelopmental-mri-database/. We extracted the EF magnitude (called EF strength) across the 28 cerebellar lobules based on a spatially unbiased cerebellar atlas. We investigated the aging effects on various measures of specificity including the ratio of the mean lobular EF at the lobules beneath the active electrode (ipsilateral [right] lobules VIIIa, VIIIb, IX) divided by the mean EF across both the targeted (ipsi) and the contralateral (contra) cerebellar hemisphere.

Results

Two-way ANOVA showed that the lobules as well as the age group (and their interaction term) had a significant effect (p < 0.01) on the EF strength. Specifically, EF strength increased significantly at the neighboring cerebellar lobules (e.g., ipsilateral [right] lobules VIIb, Crus I and Crus II) of the targeted cerebellar hemisphere at an old age (70-74, 75-79, and 85-89 years) that reduced the specificity of ctDCS at the ipsilateral (right) lobules VIIIa, VIIIb, IX beneath the active electrode. We also found that the maximum EF strength in the cerebellar hemispheres decreased with an increase in the volume of the cerebrospinal fluid (CSF) and a decrease in the cerebellar volume with aging in a linear manner.

Discussion

We found that cerebellar shrinkage and increasing thickness of the highly conductive CSF during healthy aging can lead to the dispersion of the current away from the lobules underlying the active electrode. We concluded that an individualized ctDCS approach for dosimetry is critical when ctDCS is used as an adjuvant treatment for active aging to address age-related lobule-specific cerebellar geriatric syndromes effectively. Future work is necessary to investigate age-related effects of lobule-specific ctDCS on the large-scale cognitive and motor networks using functional neuroimaging that is expected due to the cerebellum's extensive reciprocal connectivity with the cerebral cortex.",2020-01-29 +30605063,"Tobacco Smoking: Risk to Develop Addiction, Chronic Obstructive Pulmonary Disease, and Lung Cancer.","

Background

The morbidity and mortality associated with tobacco smoking is well established. Nicotine is the addictive component of tobacco. Nicotine, through the non-neuronal α7nicotinic receptor, induces cell proliferation, neo-angiogenesis, epithelial to mesenchymal transition, and inhibits drug-induced apoptosis.

Objective

To understand the genetic, molecular and cellular biology of addiction, chronic obstructive pulmonary disease and lung cancer.

Methods

The search for papers to be included in the review was performed during the months of July- September 2018 in the following databases: PubMed (http://www.ncbi.nlm.nih.gov), Scopus (http://www.scopus.com), EMBASE (http://www.elsevier.com/online-tools/embase), and ISI Web of Knowledge (http://apps.webofknowledge.com/). The following searching terms: ""nicotine"", ""nicotinic receptor"", and ""addiction"" or ""COPD"" or ""lung cancer"" were used. Patents were retrieved in clinicaltrials.gov (https://clinicaltrials.gov/). All papers written in English were evaluated. The reference list of retrieved articles was also reviewed to identify other eligible studies that were not indexed by the above-mentioned databases. New experimental data on the ability of nicotine to promote transformation of human bronchial epithelial cells, exposed for one hour to Benzo[a]pyrene-7,8-diol-9-10-epoxide, are reported.

Results

Nicotinic receptors variants and nicotinic receptors upregulation are involved in addiction, chronic obstructive pulmonary disease and/or lung cancer. Nicotine through α7nicotinic receptor upregulation induces complete bronchial epithelial cells transformation.

Conclusion

Genetic studies highlight the involvement of nicotinic receptors variants in addiction, chronic obstructive pulmonary disease and/or lung cancer. A future important step will be to translate these genetic findings to clinical practice. Interventions able to help smoking cessation in nicotine dependence subjects, under patent, are reported.",2019-01-01 +28962356,Exploring consumer exposure pathways and patterns of use for chemicals in the environment.,"Humans are exposed to thousands of chemicals in the workplace, home, and via air, water, food, and soil. A major challenge in estimating chemical exposures is to understand which chemicals are present in these media and microenvironments. Here we describe the Chemical/Product Categories Database (CPCat), a new, publically available (http://actor.epa.gov/cpcat) database of information on chemicals mapped to ""use categories"" describing the usage or function of the chemical. CPCat was created by combining multiple and diverse sources of data on consumer- and industrial-process based chemical uses from regulatory agencies, manufacturers, and retailers in various countries. The database uses a controlled vocabulary of 833 terms and a novel nomenclature to capture and streamline descriptors of chemical use for 43,596 chemicals from the various sources. Examples of potential applications of CPCat are provided, including identifying chemicals to which children may be exposed and to support prioritization of chemicals for toxicity screening. CPCat is expected to be a valuable resource for regulators, risk assessors, and exposure scientists to identify potential sources of human exposures and exposure pathways, particularly for use in high-throughput chemical exposure assessment.",2015-01-02 +28694781,"Amelogenesis Imperfecta; Genes, Proteins, and Pathways.","Amelogenesis imperfecta (AI) is the name given to a heterogeneous group of conditions characterized by inherited developmental enamel defects. AI enamel is abnormally thin, soft, fragile, pitted and/or badly discolored, with poor function and aesthetics, causing patients problems such as early tooth loss, severe embarrassment, eating difficulties, and pain. It was first described separately from diseases of dentine nearly 80 years ago, but the underlying genetic and mechanistic basis of the condition is only now coming to light. Mutations in the gene AMELX, encoding an extracellular matrix protein secreted by ameloblasts during enamel formation, were first identified as a cause of AI in 1991. Since then, mutations in at least eighteen genes have been shown to cause AI presenting in isolation of other health problems, with many more implicated in syndromic AI. Some of the encoded proteins have well documented roles in amelogenesis, acting as enamel matrix proteins or the proteases that degrade them, cell adhesion molecules or regulators of calcium homeostasis. However, for others, function is less clear and further research is needed to understand the pathways and processes essential for the development of healthy enamel. Here, we review the genes and mutations underlying AI presenting in isolation of other health problems, the proteins they encode and knowledge of their roles in amelogenesis, combining evidence from human phenotypes, inheritance patterns, mouse models, and in vitro studies. An LOVD resource (http://dna2.leeds.ac.uk/LOVD/) containing all published gene mutations for AI presenting in isolation of other health problems is described. We use this resource to identify trends in the genes and mutations reported to cause AI in the 270 families for which molecular diagnoses have been reported by 23rd May 2017. Finally we discuss the potential value of the translation of AI genetics to clinical care with improved patient pathways and speculate on the possibility of novel treatments and prevention strategies for AI.",2017-06-26 +29985971,Computational analysis of kinase inhibitor selectivity using structural knowledge.,"

Motivation

Kinases play a significant role in diverse disease signaling pathways and understanding kinase inhibitor selectivity, the tendency of drugs to bind to off-targets, remains a top priority for kinase inhibitor design and clinical safety assessment. Traditional approaches for kinase selectivity analysis using biochemical activity and binding assays are useful but can be costly and are often limited by the kinases that are available. On the other hand, current computational kinase selectivity prediction methods are computational intensive and can rarely achieve sufficient accuracy for large-scale kinome wide inhibitor selectivity profiling.

Results

Here, we present a KinomeFEATURE database for kinase binding site similarity search by comparing protein microenvironments characterized using diverse physiochemical descriptors. Initial selectivity prediction of 15 known kinase inhibitors achieved an >90% accuracy and demonstrated improved performance in comparison to commonly used kinase inhibitor selectivity prediction methods. Additional kinase ATP binding site similarity assessment (120 binding sites) identified 55 kinases with significant promiscuity and revealed unexpected inhibitor cross-activities between PKR and FGFR2 kinases. Kinome-wide selectivity profiling of 11 kinase drug candidates predicted novel as well as experimentally validated off-targets and suggested structural mechanisms of kinase cross-activities. Our study demonstrated potential utilities of our approach for large-scale kinase inhibitor selectivity profiling that could contribute to kinase drug development and safety assessment.

Availability and implementation

The KinomeFEATURE database and the associated scripts for performing kinase pocket similarity search can be downloaded from the Stanford SimTK website (https://simtk.org/projects/kdb).

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +32333818,COVID-19: lambda interferon against viral load and hyperinflammation.,"Coronavirus disease 2019 (COVID-19), triggered by the betacoronavirus SARS-CoV-2, has become one of the worst pandemics of our time that has already caused more than 250,000 deaths (JHU data-05/06/2020, https://coronavirus.jhu.edu/). Effective therapeutic approaches are urgently needed to reduce the spread of the virus and its death toll. Here, we assess the possibility of using interferon-lambda (IFNλ), a third type of interferon sharing low homology with type I IFNs and IL-10, for treating COVID-19 patients. We discuss the unique role of IFNλ in fine-tuning antiviral immunity in the respiratory tract to achieve optimal protection and minimal host damage and review early evidence that SARS-CoV-2 may impair IFNλ induction, leading to a delayed type I IFN-dominated response that triggers hyperinflammation and severe disease. We also consider the potential windows of opportunity for therapeutic intervention with IFNλ and potential safety considerations. We conclude that IFNλ constitutes a promising therapeutic agent for reducing viral presence and hyperinflammation in a single shot to prevent the devastating consequences of COVID-19 such as pneumonia and acute respiratory distress syndrome (ARDS).",2020-05-25 +,Influence of El Niño-Southern oscillation (ENSO) on agroclimatic zoning for tomato in Mozambique,"Mozambique is a country dependent on agriculture as 70% of the country’s population lives in rural areas, and approximately 80% of the nation economically active population works on agricultural jobs. Using climate/weather information in agriculture helps to reduce risks and benefit from favorable conditions for crop development. Agroclimatic zoning is one of the most useful tools to define suitable regions and spatial, temporal and environmental bounds for crop production. These bounds, however, are not static and can vary with sources of climatic variability such as the El Niño-Southern Oscillation (ENSO) phenomenon. Incorporating information about primary drivers of interannual climate variability, like ENSO, allows for better management adaptation. Tomato has a high economic impact in Mozambique’s economy and its cropping success is highly dependent on weather conditions. The objectives of this study were to create an agroclimatic zoning for tomato production in Mozambique identifying regions and planting dates with optimal crop development for each phase of ENSO and to understand how ENSO phenomenon impacts the zoning. A third objective was to create an online tool (http://mz.agroclimate.org/plantio/) to assist farmers, extension agents and researchers to plan the crop season. Gridded data of daily air temperature and rainfall were collected between 1984 and 2014 from the Climate Forecast System Reanalysis (CFSR) and Famine Early Warning System Network (FEWS-Net) datasets provided by the US National Oceanic and Atmospheric Administration (NOAA). A generic tomato variety with growing cycle of 90days was evaluated considering 24 planting dates throughout the year. Each growing cycle and planting date was classified as ideal, marginal, or unsuitable according to the conditions for tomato development. During summer months, the air temperatures were excessively high for tomato production except at high elevations. During fall and winter, most of the country is suitable for tomato production. Both El Niño and La Niña impacted the zoning, especially from November to May. During La Niña, suitable conditions were more frequent than during El Niño extending the planting window. High air temperatures during El Niño reduce crop suitability. Disseminating this information in a timely and accessible way will reduce climate risk for tomato production in Mozambique through the definition of location and planting dates with lower risk associated with climate variability.",2018-01-01 +30295754,WGS analysis of a penicillin-resistant Neisseria meningitidis strain containing a chromosomal ROB-1 β-lactamase gene.,"

Objectives

Neisseria meningitidis is rarely penicillin resistant. We describe WGS analysis of a penicillin-resistant N. meningitidis collected from a case of invasive meningococcal disease.

Methods

Serogrouping, serotyping and serosubtyping were performed with specific antibodies. β-Lactamase was detected by nitrocefin. MICs were determined by Etest and agar dilution. Sequencing of N. meningitidis genomes was done on the Illumina MiSeq platform and genome data were analysed using the Bacterial Isolate Genome Sequence Database (BIGSdb) on the PubMLST Neisseria website (https://pubmlst.org/neisseria/). Transformation was used to confirm the genetic basis of the penicillin resistance.

Results

An N. meningitidis blood isolate from a female patient in her mid-50s with a painful and septic left shoulder was found to have penicillin MIC values of 3-12 mg/L. The isolate was typed as Y: 14, 19: P1.- and ST3587, and was weakly β-lactamase positive. WGS analysis identified a full-length copy of the β-lactamase gene blaROB-1, which was contained on a 1719 bp insert with a G + C content of 41.7% (versus a G + C content of N. meningitidis of 51.7%), suggesting that the blaROB-1 gene came from a different bacterial species. A GenBank analysis of the blaROB-1 gene insert found 99.77% identity with a DNA segment found in plasmid pB1000' from Haemophilus influenzae. Transformation of a penicillin-susceptible strain with the blaROB-1 gene conferred β-lactamase activity and penicillin resistance.

Conclusions

N. meningitidis serogroup Y, ST3587 can carry and express the blaROB-1 gene, leading to penicillin resistance. It is highly likely that the N. meningitidis isolate acquired the blaROB-1 gene from H. influenzae.",2019-01-01 +30422398,Structural basis for protein phosphatase 1 recruitment by glycogen-targeting subunits.,"The rate-limiting enzymes in glycogen metabolism are subject to regulation by reversible phosphorylation. The glycogen-targeted protein phosphatase 1 (PP1) holoenzyme catalyzes their dephosphorylation. It is composed of a catalytic subunit (PP1C) and a glycogen-targeting subunit (G subunit). To date, seven G subunits have been identified. They all contain an RVxF PP1C-binding motif. The interactions between this motif in the skeletal muscle-specific GM and PP1C have been revealed by structural studies. However, whether elements outside of this motif contribute to the interaction with PP1C is not clear. In this study, we found that residues next to the RVxF motif in GM also mediate interactions to PP1C and revealed the mechanism of the interaction by structural studies. Sequence analysis revealed that the PP1C-binding region in GM is highly conserved among G subunits. Consistently, we found that the equivalent region in the liver-enriched GL adopts a similar structure upon binding PP1C. Dephosphorylation experiments indicated that this region and the glycogen-binding region in GM cooperate to stimulate PP1C's activity toward glycogen-associated substrates. DATABASES: The structure factors and coordinates for the PP1Cα-GM (1-99) and PP1Cα-GL (31-105) complexes have been deposited into the Protein Data Bank (http://www.pdb.org), with the accession codes 5ZQV and 5ZT0, respectively.",2018-11-28 +28968848,A comprehensive assessment of long intrinsic protein disorder from the DisProt database.,"

Motivation

Intrinsic disorder (ID), i.e. the lack of a unique folded conformation at physiological conditions, is a common feature for many proteins, which requires specialized biochemical experiments that are not high-throughput. Missing X-ray residues from the PDB have been widely used as a proxy for ID when developing computational methods. This may lead to a systematic bias, where predictors deviate from biologically relevant ID. Large benchmarking sets on experimentally validated ID are scarce. Recently, the DisProt database has been renewed and expanded to include manually curated ID annotations for several hundred new proteins. This provides a large benchmark set which has not yet been used for training ID predictors.

Results

Here, we describe the first systematic benchmarking of ID predictors on the new DisProt dataset. In contrast to previous assessments based on missing X-ray data, this dataset contains mostly long ID regions and a significant amount of fully ID proteins. The benchmarking shows that ID predictors work quite well on the new dataset, especially for long ID segments. However, a large fraction of ID still goes virtually undetected and the ranking of methods is different than for PDB data. In particular, many predictors appear to confound ID and regions outside X-ray structures. This suggests that the ID prediction methods capture different flavors of disorder and can benefit from highly accurate curated examples.

Availability and implementation

The raw data used for the evaluation are available from URL: http://www.disprot.org/assessment/.

Contact

silvio.tosatto@unipd.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-02-01 +32049310,FluPhenotype-a one-stop platform for early warnings of the influenza A virus.,"MOTIVATION:Newly emerging influenza viruses keep challenging global public health. To evaluate the potential risk of the viruses, it is critical to rapidly determine the phenotypes of the viruses, including the antigenicity, host, virulence and drug resistance. RESULTS:Here, we built FluPhenotype, a one-stop platform to rapidly determinate the phenotypes of the influenza A viruses. The input of FluPhenotype is the complete or partial genomic/protein sequences of the influenza A viruses. The output presents five types of information about the viruses: (i) sequence annotation including the gene and protein names as well as the open reading frames, (ii) potential hosts and human-adaptation-associated amino acid markers, (iii) antigenic and genetic relationships with the vaccine strains of different HA subtypes, (iv) mammalian virulence-related amino acid markers and (v) drug resistance-related amino acid markers. FluPhenotype will be a useful bioinformatic tool for surveillance and early warnings of the newly emerging influenza A viruses. AVAILABILITY AND IMPLEMENTATION:It is publicly available from: http://www.computationalbiology.cn : 18888/IVEW. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +27358602,Recon 2.2: from reconstruction to model of human metabolism.,"

Introduction

The human genome-scale metabolic reconstruction details all known metabolic reactions occurring in humans, and thereby holds substantial promise for studying complex diseases and phenotypes. Capturing the whole human metabolic reconstruction is an on-going task and since the last community effort generated a consensus reconstruction, several updates have been developed.

Objectives

We report a new consensus version, Recon 2.2, which integrates various alternative versions with significant additional updates. In addition to re-establishing a consensus reconstruction, further key objectives included providing more comprehensive annotation of metabolites and genes, ensuring full mass and charge balance in all reactions, and developing a model that correctly predicts ATP production on a range of carbon sources.

Methods

Recon 2.2 has been developed through a combination of manual curation and automated error checking. Specific and significant manual updates include a respecification of fatty acid metabolism, oxidative phosphorylation and a coupling of the electron transport chain to ATP synthase activity. All metabolites have definitive chemical formulae and charges specified, and these are used to ensure full mass and charge reaction balancing through an automated linear programming approach. Additionally, improved integration with transcriptomics and proteomics data has been facilitated with the updated curation of relationships between genes, proteins and reactions.

Results

Recon 2.2 now represents the most predictive model of human metabolism to date as demonstrated here. Extensive manual curation has increased the reconstruction size to 5324 metabolites, 7785 reactions and 1675 associated genes, which now are mapped to a single standard. The focus upon mass and charge balancing of all reactions, along with better representation of energy generation, has produced a flux model that correctly predicts ATP yield on different carbon sources.

Conclusion

Through these updates we have achieved the most complete and best annotated consensus human metabolic reconstruction available, thereby increasing the ability of this resource to provide novel insights into normal and disease states in human. The model is freely available from the Biomodels database (http://identifiers.org/biomodels.db/MODEL1603150001).",2016-06-07 +29198880,AureoWiki ̵ The repository of the Staphylococcus aureus research and annotation community.,"In light of continuously accumulating data and knowledge on major human pathogens, comprehensive and up-to-date sources of easily accessible information are urgently required. The AureoWiki database (http://aureowiki.med.uni-greifswald.de) provides detailed information on the genes and proteins of clinically and experimentally relevant S. aureus strains, currently covering NCTC 8325, COL, Newman, USA300_FPR3757, and N315. By implementing a pan-genome approach, AureoWiki facilitates the transfer of knowledge gained in studies with different S. aureus strains, thus supporting functional annotation and better understanding of this organism. All data related to a given gene or gene product is compiled on a strain-specific gene page. The gene pages contain sequence-based information complemented by data on, for example, protein function and localization, transcriptional regulation, and gene expression. The information provided is connected via links to other databases and published literature. Importantly, orthologous genes of the individual strains, which are linked by a pan-genome gene identifier and a unified gene name, are presented side by side using strain-specific tabs. The respective pan-genome gene page contains an orthologue table for 32 S. aureus strains, a multiple-strain genome viewer, a protein sequence alignment as well as other comparative information. The data collected in AureoWiki is also accessible through various download options in order to support bioinformatics applications. In addition, based on two large-scale gene expression data sets, AureoWiki provides graphical representations of condition-dependent mRNA levels and protein profiles under various laboratory and infection-related conditions.",2017-11-24 +,Modelling daily to seasonal carbon fluxes and annual net ecosystem carbon balance of cereal grain-cropland using DailyDayCent: A model data comparison,"Croplands are important not only for food and fibre, but also for their global climate change mitigation and carbon (C) sequestration potentials. Measurements and modelling of daily C fluxes and annual C balance, which are needed for optimizing such global potentials in croplands, are difficult since many measurements, and the correct simulation of different ecosystem processes are needed. In the present study, a biogeochemical ecosystem model (DailyDayCent) was applied to simulate daily to seasonal C fluxes, as well as annual net ecosystem carbon balance (NECB), in a cereal grain-cropland. The model was tested using eddy-flux data and other associated C flux measurements lasting for three years over a full cereal crop-rotation (corn-wheat-barley) from a long-term experiment (SOERE–ACBB; http://www.soere-acbb.com) in France. DailyDayCent simulated seasonal crop growth, regrowth of volunteers and cumulative net primary production (NPP) at harvest successfully. Fairly consistent agreement was obtained between measured and modelled daily NPP over the full crop rotation, with model efficiency (EF) of 0.59. The model underestimated heterotrophic respiration (Rh) on daily, seasonal and annual time scales by 43–53%. Although a reasonable model fit was found for daily NEE over the entire experimental period (EF∼0.47), the model overestimated cumulative annual net C uptake (NEE) by 28 times. DailyDayCent simulated net C harvest efficiently, and the leaching loss of C reasonably well. Both the modelled and measured mean annual NECB indicate that present cereal grain-cropland is a net C source and the cropland is losing C at a mean annual rate of 64.0 (modelled) to 349.4gCm−2 yr−1 (measured), thus the model overestimated mean annual NECB (or underestimated mean annual net C loss) in the present cropland by 82%. We conclude that overestimation of cumulative NEE on seasonal and annual time scales is the most likely reason for overestimation of NECB, and underestimation of Rh was the main driver for overestimation of cumulative seasonal and annual NEE. The model would benefit from further testing, particularly against direct measurements of Rh, and subsequent calibration, parameter estimation and model development for improving its ability to simulate Rh on daily to seasonal and annul time scales, cumulative seasonal and annual NEE, and net C balance, especially in cereal grain-croplands in the study region.",2018-01-01 +,Omics analysis of acetic acid tolerance in Saccharomyces cerevisiae,"Acetic acid is an inhibitor in industrial processes such as wine making and bioethanol production from cellulosic hydrolysate. It causes energy depletion, inhibition of metabolic enzyme activity, growth arrest and ethanol productivity losses in Saccharomyces cerevisiae. Therefore, understanding the mechanisms of the yeast responses to acetic acid stress is essential for improving acetic acid tolerance and ethanol production. Although 329 genes associated with acetic acid tolerance have been identified in the Saccharomyces genome and included in the database (http://www.yeastgenome.org/observable/resistance_to_acetic_acid/overview), the cellular mechanistic responses to acetic acid remain unclear in this organism. Post-genomic approaches such as transcriptomics, proteomics, metabolomics and chemogenomics are being applied to yeast and are providing insight into the mechanisms and interactions of genes, proteins and other components that together determine complex quantitative phenotypic traits such as acetic acid tolerance. This review focuses on these omics approaches in the response to acetic acid in S. cerevisiae. Additionally, several novel strains with improved acetic acid tolerance have been engineered by modifying key genes, and the application of these strains and recently acquired knowledge to industrial processes is also discussed.",2017-05-01 +32087005,PmliPred: a method based on hybrid model and fuzzy decision for plant miRNA-lncRNA interaction prediction.,"MOTIVATION:The studies have indicated that not only microRNAs (miRNAs) or long non-coding RNAs (lncRNAs) play important roles in biological activities, but also their interactions affect the biological process. A growing number of studies focus on the miRNA-lncRNA interactions, while few of them are proposed for plant. The prediction of interactions is significant for understanding the mechanism of interaction between miRNA and lncRNA in plant. RESULTS:This article proposes a new method for fulfilling plant miRNA-lncRNA interaction prediction (PmliPred). The deep learning model and shallow machine learning model are trained using raw sequence and manually extracted features, respectively. Then they are hybridized based on fuzzy decision for prediction. PmliPred shows better performance and generalization ability compared with the existing methods. Several new miRNA-lncRNA interactions in Solanum lycopersicum are successfully identified using quantitative real time-polymerase chain reaction from the candidates predicted by PmliPred, which further verifies its effectiveness. AVAILABILITY AND IMPLEMENTATION:The source code of PmliPred is freely available at http://bis.zju.edu.cn/PmliPred/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-05-01 +32096820,Tally-2.0: upgraded validator of tandem repeat detection in protein sequences.,"

Motivation

Proteins containing tandem repeats (TRs) are abundant, frequently fold in elongated non-globular structures and perform vital functions. A number of computational tools have been developed to detect TRs in protein sequences. A blurred boundary between imperfect TR motifs and non-repetitive sequences gave rise to necessity to validate the detected TRs.

Results

Tally-2.0 is a scoring tool based on a machine learning (ML) approach, which allows to validate the results of TR detection. It was upgraded by using improved training datasets and additional ML features. Tally-2.0 performs at a level of 93% sensitivity, 83% specificity and an area under the receiver operating characteristic curve of 95%.

Availability and implementation

Tally-2.0 is available, as a web tool and as a standalone application published under Apache License 2.0, on the URL https://bioinfo.crbm.cnrs.fr/index.php? route=tools&tool=27. It is supported on Linux. Source code is available upon request.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-05-01 +31860771,"Expression, immune infiltration and clinical significance of SPAG5 in hepatocellular carcinoma: A gene expression-based study.","

Background

Overexpression of sperm-associated antigen 5 (SPAG5) is a marker of poor prognosis in numerous tumors and is recognized as an index of tumor proliferation; however, its expression in liver cancer remains unclear.

Methods

The Oncomine (https://www.oncomine.org) and Timer (https://cistrome.shinyapps.io/timer) databases were used to analyze the expression of SPAG5 in liver hepatocellular carcinoma (HCC) and normal liver tissues. The relationship between the expression of SPAG5 and immune infiltration of HCC was investigated using the Timer and GEPIA (http://gepia.cancer-pku.cn) databases, and the mechanism was analyzed using Gene Set Enrichment Analysis. A Kaplan-Meier Plotter (http://kmplot.com/analysis) was used to evaluate the effect of SPAG5 on the prognosis of patients with HCC.

Results

The results revealed that the SPAG5 expression level was positively correlated with the infiltration levels of CD8+ T cells, macrophages, neutrophils, and especially B cells and dendritic cells. In addition, SPAG5 expression was significantly associated with T cell exhaustion. The overall survival time, progression-free survival time, recurrence-free survival time and disease-specific survival time were significantly reduced for HCC patients with high SPAG5 expression (p < 0.01) and high expression of SPAG5 was significantly associated with a poor overall survival time and progression-free survival time of grade and stage II-III HCC (p < 0.05) but not with stage I HCC (p > 0.05). Additionally, the expression of SPAG5 is related to the p53 and cell cycle signal pathways.

Conclusions

In conclusion, SPAG5 is not only a marker of immune infiltration and poor prognosis, but also a potential therapeutic target for liver cancer.",2020-01-27 +32880677,A web-based machine-learning algorithm predicting postoperative acute kidney injury after total knee arthroplasty.,"

Purpose

Acute kidney injury (AKI) is a deleterious complication after total knee arthroplasty (TKA). The purposes of this study were to identify preoperative risk factors and develop a web-based prediction model for postoperative AKI, and assess how AKI affected the progression to ESRD.

Method

The study included 5757 patients treated in three tertiary teaching hospitals. The model was developed using data on 5302 patients from two hospitals and externally validated in 455 patients from the third hospital. Eighteen preoperative variables were collected and feature selection was performed. A gradient boosting machine (GBM) was used to predict AKI. A tenfold-stratified area under the curve (AUC) served as the metric for internal validation. Calibration was performed via isotonic regression and evaluated using a calibration plot. End-stage renal disease (ESRD) was followed up for an average of 41.7 months.

Results

AKI develops in up to 10% of patients undergoing TKA, increasing the risk of progression to ESRD. The ESRD odds ratio of AKI patients (compared to non-AKI patients) was 9.8 (95% confidence interval 4.3-22.4). Six key predictors of postoperative AKI were selected: higher preoperative levels of creatinine in serum, the use of general anesthesia, male sex, a higher ASA class (> 3), use of a renin-angiotensin-aldosterone system inhibitor, and no use of tranexamic acid (all p < 0.001). The predictive performance of our model was good (area under the curve 0.78 [95% CI 0.74-0.81] in the developmental cohort and improved in the external validation cohort (0.89). Our model can be accessed at https://safetka.net .

Conclusions

A web-based predictive model for AKI after TKA was developed using a machine-learning algorithm featuring six preoperative variables. The model is simple and has been validated to improve both short- and long-term prognoses of TKA patients. Postoperative AKI may lead to ESRD, which surgeons should strive to avoid.

Level of evidence

Diagnostic level II.",2020-09-03 +29016774,Alveolar bone changes after rapid maxillary expansion with tooth-born appliances: a systematic review.,"Background:During rapid maxillary expansion (RME), heavy forces are transmitted to the maxilla by the anchored teeth causing buccal inclination and buccal bone loss of posterior teeth. Objective:To systematically review the literature in order to investigate whether RME causes periodontal sequelae, assessed by cone-beam computed tomography (CBCT). Search methods:Fifteen electronic databases and reference lists of studies were searched up to March 2017. Selection criteria:To be included in the systematic review, articles must be human studies on growing subjects, with transversal maxillary deficiency treated with RME and with assessment of buccal bone loss by CBCT images. Only randomized and non-randomized trials were included. Data collection and analysis:Two authors independently performed study selection, data extraction, and risk of bias assessment. Study characteristics (study design, sample size, age, sex, skeletal maturity, type of appliance, daily activation, evaluated linear measurements, observation period, CBCT settings), and study outcomes (loss of buccal bone thickness and marginal bone) were reported according to the PRISMA statement. Results:On the basis of the applied inclusion criteria, only six articles, three randomized clinical trials and three controlled clinical trials were included. An individual analysis of the selected articles was undertaken. The risks of bias of the six trials were scored as medium to low. Limitations:The results of the present systematic review are based on a limited number of studies and only one study included a control group. Conclusions and implications:In all considered studies, significant loss of buccal bone thickness and marginal bone level were observed in anchored teeth, following RME. Further prospective studies correlating the radiological data of bone loss to the periodontal soft tissues reaction after RME are required. A preliminary evaluation of the patient-related risk factors for RR may be advisable when considering to administering RME. Registration:This systematic review was registered in the National Institute of Health Research database with an appropriate protocol number (http://www.crd.york.ac.uk/PROSPERO Protocol: CRD42017062645). Funding:The present study has not received any contributions from private or public funding agencies.",2018-05-01 +29892516,SEVENS: a database for comprehensive GPCR genes obtained from genomes: -Update to 68 eukaryotes.,"We report the development of the SEVENS database, which contains information on G-protein coupled receptor (GPCR) genes that are identified with high confidence levels (A, B, C, and D) from various eukaryotic genomes, by using a pipeline comprising bioinformatics softwares, including a gene finder, a sequence alignment tool, a motif and domain assignment tool, and a transmembrane helix predictor. SEVENS compiles detailed information on GPCR genes, such as chromosomal mapping position, phylogenetic tree, sequence similarity to known genes, and protein function described by motif/domain and transmembrane helices. They are presented in a user-friendly interface. Because of the comprehensive gene findings from genomes, SEVENS contains a larger data set than that of previous databases and enables the performance of a genome-scale overview of all the GPCR genes. We surveyed the complete genomes of 68 eukaryotes, and found that there were between 6 and 3,470 GPCR genes for each genome (Level A data). Within these genes, the number of receptors for various molecules, including biological amines, peptides, and lipids, were conserved in mammals, birds, and fishes, whereas the numbers of odorant receptors and pheromone receptors were highly diverse in mammals. SEVENS is freely available at http://sevens.cbrc.jp or http://sevens.chem.aoyama.ac.jp.",2018-04-27 +27113915,Hipposeq: a comprehensive RNA-seq database of gene expression in hippocampal principal neurons.,"Clarifying gene expression in narrowly defined neuronal populations can provide insight into cellular identity, computation, and functionality. Here, we used next-generation RNA sequencing (RNA-seq) to produce a quantitative, whole genome characterization of gene expression for the major excitatory neuronal classes of the hippocampus; namely, granule cells and mossy cells of the dentate gyrus, and pyramidal cells of areas CA3, CA2, and CA1. Moreover, for the canonical cell classes of the trisynaptic loop, we profiled transcriptomes at both dorsal and ventral poles, producing a cell-class- and region-specific transcriptional description for these populations. This dataset clarifies the transcriptional properties and identities of lesser-known cell classes, and moreover reveals unexpected variation in the trisynaptic loop across the dorsal-ventral axis. We have created a public resource, Hipposeq (http://hipposeq.janelia.org), which provides analysis and visualization of these data and will act as a roadmap relating molecules to cells, circuits, and computation in the hippocampus.",2016-04-26 +31077222,A mobile health monitoring-and-treatment system based on integration of the SSN sensor ontology and the HL7 FHIR standard.,"

Background

Mobile health (MH) technologies including clinical decision support systems (CDSS) provide an efficient method for patient monitoring and treatment. A mobile CDSS is based on real-time sensor data and historical electronic health record (EHR) data. Raw sensor data have no semantics of their own; therefore, a computer system cannot interpret these data automatically. In addition, the interoperability of sensor data and EHR medical data is a challenge. EHR data collected from distributed systems have different structures, semantics, and coding mechanisms. As a result, building a transparent CDSS that can work as a portable plug-and-play component in any existing EHR ecosystem requires a careful design process. Ontology and medical standards support the construction of semantically intelligent CDSSs.

Methods

This paper proposes a comprehensive MH framework with an integrated CDSS capability. This cloud-based system monitors and manages type 1 diabetes mellitus. The efficiency of any CDSS depends mainly on the quality of its knowledge and its semantic interoperability with different data sources. To this end, this paper concentrates on constructing a semantic CDSS based on proposed FASTO ontology.

Results

This realistic ontology is able to collect, formalize, integrate, analyze, and manipulate all types of patient data. It provides patients with complete, personalized, and medically intuitive care plans, including insulin regimens, diets, exercises, and education sub-plans. These plans are based on the complete patient profile. In addition, the proposed CDSS provides real-time patient monitoring based on vital signs collected from patients' wireless body area networks. These monitoring include real-time insulin adjustments, mealtime carbohydrate calculations, and exercise recommendations. FASTO integrates the well-known standards of HL7 fast healthcare interoperability resources (FHIR), semantic sensor network (SSN) ontology, basic formal ontology (BFO) 2.0, and clinical practice guidelines. The current version of FASTO includes 9577 classes, 658 object properties, 164 data properties, 460 individuals, and 140 SWRL rules. FASTO is publicly available through the National Center for Biomedical Ontology BioPortal at https://bioportal.bioontology.org/ontologies/FASTO .

Conclusions

The resulting CDSS system can help physicians to monitor more patients efficiently and accurately. In addition, patients in rural areas can depend on the system to manage their diabetes and emergencies.",2019-05-10 +32992049,Crohn's Disease Pathobiont Adherent-Invasive E coli Disrupts Epithelial Mitochondrial Networks With Implications for Gut Permeability.,"

Background & aims

Adherent-invasive Escherichia coli are implicated in inflammatory bowel disease, and mitochondrial dysfunction has been observed in biopsy specimens from patients with inflammatory bowel disease. As a novel aspect of adherent-invasive E coli-epithelial interaction, we hypothesized that E coli (strain LF82) would elicit substantial disruption of epithelial mitochondrial form and function.

Methods

Monolayers of human colon-derived epithelial cell lines were exposed to E coli-LF82 or commensal E coli and RNA sequence analysis, mitochondrial function (adenosine triphosphate synthesis) and dynamics (mitochondrial network imaging, immunoblotting for fission and fusion proteins), and epithelial permeability (transepithelial resistance, flux of fluorescein isothiocyanate-dextran and bacteria) were assessed.

Results

E coli-LF82 significantly affected epithelial expression of ∼8600 genes, many relating to mitochondrial function. E coli-LF82-infected epithelia showed swollen mitochondria, reduced mitochondrial membrane potential and adenosine triphosphate, and fragmentation of the mitochondrial network: events not observed with dead E coli-LF82, medium from bacterial cultures, or control E coli. Treatment with Mitochondrial Division Inhibitor 1 (Mdivi1, inhibits dynamin-related peptide 1, guanosine triphosphatase principally responsible for mitochondrial fission) or P110 (prevents dynamin-related peptide 1 binding to mitochondrial fission 1 protein) partially reduced E coli-LF82-induced mitochondrial fragmentation in the short term. E coli-LF82-infected epithelia showed loss of the long isoform of optic atrophy factor 1, which mediates mitochondrial fusion. Mitochondrial Division Inhibitor 1 reduced the magnitude of E coli-LF82-induced increased transepithelial flux of fluorescein isothiocyanate dextran. By 8 hours after infection, increased cytosolic cytochrome C and DNA fragmentation were apparent without evidence of caspase-3 or apoptosis inducing factor activation.

Conclusions

Epithelial mitochondrial fragmentation caused by E coli-LF82 could be targeted to maintain cellular homeostasis and mitigate infection-induced loss of epithelial barrier function. Data have been deposited in NCBI's Gene Expression Omnibus and are accessible through GEO series accession numbers GSE154121 and GSE154122 (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE154121).",2020-09-28 +25392419,AHTPDB: a comprehensive platform for analysis and presentation of antihypertensive peptides.,"AHTPDB (http://crdd.osdd.net/raghava/ahtpdb/) is a manually curated database of experimentally validated antihypertensive peptides. Information pertaining to peptides with antihypertensive activity was collected from research articles and from various peptide repositories. These peptides were derived from 35 major sources that include milk, egg, fish, pork, chicken, soybean, etc. In AHTPDB, most of the peptides belong to a family of angiotensin-I converting enzyme inhibiting peptides. The current release of AHTPDB contains 5978 peptide entries among which 1694 are unique peptides. Each entry provides detailed information about a peptide like sequence, inhibitory concentration (IC50), toxicity/bitterness value, source, length, molecular mass and information related to purification of peptides. In addition, the database provides structural information of these peptides that includes predicted tertiary and secondary structures. A user-friendly web interface with various tools has been developed to retrieve and analyse the data. It is anticipated that AHTPDB will be a useful and unique resource for the researchers working in the field of antihypertensive peptides.",2014-11-11 +25526212,CoffeebEST: an integrated resource for Coffea spp expressed sequence tags.,"Coffee is one of the most important commodities in the world, and its production relies mainly on two species, Coffea arabica and Coffea canephora. Although there are diverse transcriptome datasets available for coffee trees, few research groups have exploited the potential knowledge contained in these data, especially with respect to fruit and seed development. Here, we present a comparative analysis of the transcriptomes of Coffea arabica and Coffea canephora with a focus on fruit development using publicly available expressed sequence tags (ESTs). Most of the fruit and seed EST data has been obtained from C. canephora. Therefore, we performed a fruit EST analysis of the 5 developmental stages of this species (18, 22, 30, 42, and 46 weeks after flowering) comprising 29,009 sequences. We compared C. canephora fruit ESTs to reference unigenes of C. canephora (7710 contigs and 8955 singletons) and C. arabica (15,656 contigs and 16,351 singletons). Additional analyses included functional annotation based on Gene Onthology, as well as an annotation using PlantCyc, a curated plant protein database. The Coffee Bean EST (CoffeebEST) is a public database available at http://bioinfo-02.cp.utfpr.edu.br/. This database represents an additional resource for the coffee scientific community, offering a user-friendly collection of information for non-specialists in coffee molecular biology to support experimental research on comparative and functional genomics.",2014-12-19 +31922754,Highly Flexible Ligand Docking: Benchmarking of the DockThor Program on the LEADS-PEP Protein-Peptide Data Set.,"Protein-peptide interactions play a crucial role in many cellular and biological functions, which justify the increasing interest in the development of peptide-based drugs. However, predicting experimental binding modes and affinities in protein-peptide docking remains a great challenge for most docking programs due to some particularities of this class of ligands, such as the high degree of flexibility. In this paper, we present the performance of the DockThor program on the LEADS-PEP data set, a benchmarking set composed of 53 diverse protein-peptide complexes with peptides ranging from 3 to 12 residues and with up to 51 rotatable bonds. The DockThor performance for pose prediction on redocking studies was compared with some state-of-the-art docking programs that were also evaluated on the LEADS-PEP data set, AutoDock, AutoDock Vina, Surflex, GOLD, Glide, rDock, and DINC, as well as with the task-specific docking protocol HPepDock. Our results indicate that DockThor could dock 40% of the cases with an overall backbone RMSD below 2.5 Å when the top-scored docking pose was considered, exhibiting similar results to Glide and outperforming other protein-ligand docking programs, whereas rDock and HPepDock achieved superior results. Assessing the docking poses closest to the crystal structure (i.e., best-RMSD pose), DockThor achieved a success rate of 60% in pose prediction. Due to the great overall performance of handling peptidic compounds, the DockThor program can be considered as suitable for docking highly flexible and challenging ligands, with up to 40 rotatable bonds. DockThor is freely available as a virtual screening Web server at https://www.dockthor.lncc.br/ .",2020-01-27 +29095974,BEAM web server: a tool for structural RNA motif discovery.,"Motivation:RNA structural motif finding is a relevant problem that becomes computationally hard when working on high-throughput data (e.g. eCLIP, PAR-CLIP), often represented by thousands of RNA molecules. Currently, the BEAM server is the only web tool capable to handle tens of thousands of RNA in input with a motif discovery procedure that is only limited by the current secondary structure prediction accuracies. Results:The recently developed method BEAM (BEAr Motifs finder) can analyze tens of thousands of RNA molecules and identify RNA secondary structure motifs associated to a measure of their statistical significance. BEAM is extremely fast thanks to the BEAR encoding that transforms each RNA secondary structure in a string of characters. BEAM also exploits the evolutionary knowledge contained in a substitution matrix of secondary structure elements, extracted from the RFAM database of families of homologous RNAs. The BEAM web server has been designed to streamline data pre-processing by automatically handling folding and encoding of RNA sequences, giving users a choice for the preferred folding program. The server provides an intuitive and informative results page with the list of secondary structure motifs identified, the logo of each motif, its significance, graphic representation and information about its position in the RNA molecules sharing it. Availability and implementation:The web server is freely available at http://beam.uniroma2.it/ and it is implemented in NodeJS and Python with all major browsers supported. Contact:marco.pietrosanto@uniroma2.it. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-03-01 +27010673,SM-TF: A structural database of small molecule-transcription factor complexes.,"Transcription factors (TFs) are the proteins involved in the transcription process, ensuring the correct expression of specific genes. Numerous diseases arise from the dysfunction of specific TFs. In fact, over 30 TFs have been identified as therapeutic targets of about 9% of the approved drugs. In this study, we created a structural database of small molecule-transcription factor (SM-TF) complexes, available online at http://zoulab.dalton.missouri.edu/SM-TF. The 3D structures of the co-bound small molecule and the corresponding binding sites on TFs are provided in the database, serving as a valuable resource to assist structure-based drug design related to TFs. Currently, the SM-TF database contains 934 entries covering 176 TFs from a variety of species. The database is further classified into several subsets by species and organisms. The entries in the SM-TF database are linked to the UniProt database and other sequence-based TF databases. Furthermore, the druggable TFs from human and the corresponding approved drugs are linked to the DrugBank. © 2016 Wiley Periodicals, Inc.",2016-03-24 +30091044,"Human fetoplacental arterial and venous endothelial cells are differentially programmed by gestational diabetes mellitus, resulting in cell-specific barrier function changes.","

Aims/hypothesis

An adverse intrauterine environment can result in permanent changes in the physiology of the offspring and predispose to diseases in adulthood. One such exposure, gestational diabetes mellitus (GDM), has been linked to development of metabolic disorders and cardiovascular disease in offspring. Epigenetic variation, including DNA methylation, is recognised as a leading mechanism underpinning fetal programming and we hypothesised that this plays a key role in fetoplacental endothelial dysfunction following exposure to GDM. Thus, we conducted a pilot epigenetic study to analyse concordant DNA methylation and gene expression changes in GDM-exposed fetoplacental endothelial cells.

Methods

Genome-wide methylation analysis of primary fetoplacental arterial endothelial cells (AEC) and venous endothelial cells (VEC) from healthy pregnancies and GDM-complicated pregnancies in parallel with transcriptome analysis identified methylation and expression changes. Most-affected pathways and functions were identified by Ingenuity Pathway Analysis and validated using functional assays.

Results

Transcriptome and methylation analyses identified variation in gene expression linked to GDM-associated DNA methylation in 408 genes in AEC and 159 genes in VEC, implying a direct functional link. Pathway analysis found that genes altered by exposure to GDM clustered to functions associated with 'cell morphology' and 'cellular movement' in healthy AEC and VEC. Further functional analysis demonstrated that GDM-exposed cells had altered actin organisation and barrier function.

Conclusions/interpretation

Our data indicate that exposure to GDM programs atypical morphology and barrier function in fetoplacental endothelial cells by DNA methylation and gene expression change. The effects differ between AEC and VEC, indicating a stringent cell-specific sensitivity to adverse exposures associated with developmental programming in utero.

Data availability

DNA methylation and gene expression datasets generated and analysed during the current study are available at the National Center for Biotechnology Information (NCBI) Gene Expression Omnibus (GEO) database ( http://www.ncbi.nlm.nih.gov/geo ) under accession numbers GSE106099 and GSE103552, respectively.",2018-08-08 +30521009,BaiHui: cross-species brain-specific network built with hundreds of hand-curated datasets.,"

Motivation

Functional gene networks, representing how likely two genes work in the same biological process, are important models for studying gene interactions in complex tissues. However, a limitation of the current network-building scheme is the lack of leveraging evidence from multiple model organisms as well as the lack of expert curation and quality control of the input genomic data.

Results

Here, we present BaiHui, a brain-specific functional gene network built by probabilistically integrating expertly-hand-curated (by reading original publications) heterogeneous and multi-species genomic data in human, mouse and rat brains. To facilitate the use of this network, we deployed a web server through which users can query their genes of interest, visualize the network, gain functional insight from enrichment analysis and download network data. We also illustrated how this network could be used to generate testable hypotheses on disease gene prioritization of brain disorders.

Availability and implementation

BaiHui is freely available at: http://guanlab.ccmb.med.umich.edu/BaiHui/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +32726728,Identification of genes related to dexamethasone-induced immunosuppression in chicken thymus using transcriptome analysis.,"The molecular mechanism of stress-induced immunosuppression (SIS) in certain poultry immune organs is not completely clear. In this study, we constructed a stress immunosuppression model by selecting 180 healthy 7-day-old Gushi chickens and dividing them randomly into two groups: a D_T group and a B_T group. The D_T group was given dexamethasone, and the B_T group was given normal saline, according to the treatment method established and reported in our previous study. Thymus samples were subsequently taken from both groups. RNA-seq was used to sequence the transcriptomes of the thymus samples from both groups, and 1278 significant differentially expressed genes (DEGs) were obtained, of which 845 genes were up-regulated and 433 genes were down-regulated (padj<0.05, |FC| ≥ 2, FPKM>1). We identified immune-related gene ontology (GO) terms including immune system processes, immune system process regulation, and T cell activation. The results of KEGG (http: //www.kegg.jp) analysis showed that the DEGs are involved in a variety of immune-related pathways, such as cytokine-cytokine receptor interactions, Jak-STAT signaling pathways, and cell adhesion molecules (CAMs). The cytokine-cytokine receptor interaction pathway involves the DEGs CCR6, CCR5, CD40LG and FAS. The DEGs in the Jak-STAT signaling pathway were SPRY2, BCL2L1. These DEGS play an important role in cell apoptosis. CD40L, CD8, among other genes, are involved in the CAMs pathway. The results of this study add to existing data on the genomic study of stress affecting immune function, and provide a basis for further studies of the molecular mechanisms of stress-influenced immune function.",2020-07-19 +31106358,EPIC-TABSAT: analysis tool for targeted bisulfite sequencing experiments and array-based methylation studies.,"DNA methylation is one of the major epigenetic modifications and has frequently demonstrated its suitability as diagnostic and prognostic biomarker. In addition to chip and sequencing based epigenome wide methylation profiling methods, targeted bisulfite sequencing (TBS) has been established as a cost-effective approach for routine diagnostics and target validation applications. Yet, an easy-to-use tool for the analysis of TBS data in combination with array-based methylation results has been missing. Consequently, we have developed EPIC-TABSAT, a user-friendly web-based application for the analysis of targeted sequencing data that additionally allows the integration of array-based methylation results. The tool can handle multiple targets as well as multiple sequencing files in parallel and covers the complete data analysis workflow from calculation of quality metrics to methylation calling and interactive result presentation. The graphical user interface offers an unprecedented way to interpret TBS data alone or in combination with array-based methylation studies. Together with the computation of target-specific epialleles it is useful in validation, research, and routine diagnostic environments. EPIC-TABSAT is freely accessible to all users at https://tabsat.ait.ac.at/.",2019-07-01 +31350879,geneCo: a visualized comparative genomic method to analyze multiple genome structures.,"

Summary

In comparative and evolutionary genomics, a detailed comparison of common features between organisms is essential to evaluate genetic distance. However, identifying differences in matched and mismatched genes among multiple genomes is difficult using current comparative genomic approaches due to complicated methodologies or the generation of meager information from obtained results. This study describes a visualized software tool, geneCo (gene Comparison), for comparing genome structure and gene arrangements between various organisms. User data are aligned, gene information is recognized, and genome structures are compared based on user-defined GenBank files. Information regarding inversion, gain, loss, duplication and gene rearrangement among multiple organisms being compared is provided by geneCo, which uses a web-based interface that users can easily access without any need to consider the computational environment.

Availability and implementation

Users can freely use the software, and the accessible URL is https://bigdata.dongguk.edu/geneCo. The main module of geneCo is implemented by Python and the web-based user interface is built by PHP, HTML and CSS to support all browsers.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +,Best Paper Selection,"Arnold CW, Wallace WD, Chen S, Oh A, Abtin F, Genshaft S, Binder S, Aberle D, Enzmann D. RadPath: A web-based system for integrating and correlating radiology and pathology findings during cancer diagnosis. Acad Radiol 2016 Jan;23(1):90-100 +http://escholarship.org/uc/item/22x4021q Hravnak M, Chen L, Dubrawski A, Bose E, Clermont G, Pinsky MR. Real alerts and artifact classification in archived multi-signal vital sign monitoring data: implications for mining big data. J Clin Monit Comput 2016 Dec;30(6):875-88 +https://link.springer.com/article/10.1007%2Fs10877-015-9788-2 Kalpathy-Cramer J, Zhao B, Goldgof D, Gu Y, Wang X, Yang H, Tan Y, Gillies R, Napel S. A comparison of lung nodule segmentation algorithms: methods and results from a multi-institutional study. J Digit Imaging 2016 Aug;29(4):476-87 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4942386/ Moss TJ, Lake DE, Calland JF, Enfield KB, Delos JB, Fairchild KD, Moorman JR. Signatures of subacute potentially catastrophic illness in the ICU: model development and validation. Crit Care Med 2016 Sep;44(9):1639-48 +https://insights.ovid.com/pubmed?pmid=27452809 Petousis P, Han SX, Aberle D, Bui AA. Prediction of lung cancer incidence on the low-dose computed tomography arm of the National Lung Screening Trial: A dynamic Bayesian network. Artif Intell Med 2016 Sep;72:42-55 +https://linkinghub.elsevier.com/retrieve/pii/S0933-3657(16)30106-3 Springer DB, Tarassenko L, Clifford GD. Logistic regression-HSMM-based heart sound segmentation. IEEE Trans Biomed Eng 2016 Apr;63(4):822-32 +http://ieeexplore.ieee.org/document/7234876/",2017-08-01 +31194003,"Mineral data (SEM, electron microprobe, Raman spectroscopy) from epithermal hydrothermal alteration of the Miocene Sigri Petrified Forest and host pyroclastic rocks, Western Lesbos, Greece.","Data available from a detailed mineralogical investigation of the Petrified Forest of Lesbos and its host pyroclastic rocks [1] are summarized and a link is provided to the full data at https://data.mendeley.com/datasets/dxwfd32zms/1. Samples were taken from petrified wood, fresh and devitrified tuffs, and from epithermal veins and epithermally altered tuffs. Backscattered electron (BSE) images were made by scanning electron microscope (SEM) from polished thin sections of 16 samples to show textural relationships between minerals. Minerals were identified by energy dispersive spectroscopy (EDS). Further chemical analysis by electron microprobe (EMP) were made of trace elements in the petrified wood and of Mn-oxide minerals. Polymorphs of silica were investigated by Raman spectroscopy. SEM X-Ray maps were made of selected sites with manganese oxide minerals. In this contribution, the general character of each analyzed sample is summarized and a brief inventory of available data is presented, with specific reference to features in the on-line data. The significance of these data for the origin of the petrification of the wood and the epithermal veining of the host pyroclastic rocks is provided in ""Nature of the hydrothermal alteration of the Miocene Sigri Petrified Forest and host pyroclastic rocks, Lesbos, Greece"" [1] https://doi.org/10.1016/j.jvolgeores.2018.11.018. The data will be of comparative value to those investigating petrification of wood, devitrification of tuffs, and epithermal Mn-Fe mineralization in other areas.",2019-05-17 +32545277,Creating the Internet of Augmented Things: An Open-Source Framework to Make IoT Devices and Augmented and Mixed Reality Systems Talk to Each Other. ,"Augmented Reality (AR) and Mixed Reality (MR) devices have evolved significantly in the last years, providing immersive AR/MR experiences that allow users to interact with virtual elements placed on the real-world. However, to make AR/MR devices reach their full potential, it is necessary to go further and let them collaborate with the physical elements around them, including the objects that belong to the Internet of Things (IoT). Unfortunately, AR/MR and IoT devices usually make use of heterogeneous technologies that complicate their intercommunication. Moreover, the implementation of the intercommunication mechanisms requires involving specialized developers with have experience on the necessary technologies. To tackle such problems, this article proposes the use of a framework that makes it easy to integrate AR/MR and IoT devices, allowing them to communicate dynamically and in real time. The presented AR/MR-IoT framework makes use of standard and open-source protocols and tools like MQTT, HTTPS or Node-RED. After detailing the inner workings of the framework, it is illustrated its potential through a practical use case: a smart power socket that can be monitored and controlled through Microsoft HoloLens AR/MR glasses. The performance of such a practical use case is evaluated and it is demonstrated that the proposed framework, under normal operation conditions, enables to respond in less than 100 ms to interaction and data update requests.",2020-06-11 +25360160,BioPhytMol: a drug discovery community resource on anti-mycobacterial phytomolecules and plant extracts.,"

Background

Tuberculosis (TB) is the second leading cause of death from a single infectious organism, demanding attention towards discovery of novel anti-tubercular compounds. Natural products or their derivatives have provided more than 50% of all existing drugs, offering a chemically diverse space for discovery of novel drugs.

Description

BioPhytMol has been designed to systematically curate and analyze the anti-mycobacterial natural product chemical space. BioPhytMol is developed as a drug-discovery community resource with anti-mycobacterial phytomolecules and plant extracts. Currently, it holds 2582 entries including 188 plant families (692 genera and 808 species) from global flora, manually curated from literature. In total, there are 633 phytomolecules (with structures) curated against 25 target mycobacteria. Multiple analysis approaches have been used to prioritize the library for drug-like compounds, for both whole cell screening and target-based approaches. In order to represent the multidimensional data on chemical diversity, physiochemical properties and biological activity data of the compound library, novel approaches such as the use of circular graphs have been employed.

Conclusion

BioPhytMol has been designed to systematically represent and search for anti-mycobacterial phytochemical information. Extensive compound analyses can also be performed through web-application for prioritizing drug-like compounds. The resource is freely available online at http://ab-openlab.csir.res.in/biophytmol/. Graphical AbstractBioPhytMol: a drug discovery community resource on anti-mycobacterial phytomolecules and plant extracts generated using Crowdsourcing. The platform comprises of manually curated data on antimycobacterial natural products along with tools to perform structure similarity and visualization. The platform allows for prioritization of drug like natural products for antimycobacterial drug discovery.",2014-10-11 +29126205,Complex analyses of inverted repeats in mitochondrial genomes revealed their importance and variability.,"

Motivation

The NCBI database contains mitochondrial DNA (mtDNA) genomes from numerous species. We investigated the presence and locations of inverted repeat sequences (IRs) in these mtDNA sequences, which are known to be important for regulating nuclear genomes.

Results

IRs were identified in mtDNA in all species. IR lengths and frequencies correlate with evolutionary age and the greatest variability was detected in subgroups of plants and fungi and the lowest variability in mammals. IR presence is non-random and evolutionary favoured. The frequency of IRs generally decreased with IR length, but not for IRs 24 or 30 bp long, which are 1.5 times more abundant. IRs are enriched in sequences from the replication origin, followed by D-loop, stem-loop and miscellaneous sequences, pointing to the importance of IRs in regulatory regions of mitochondrial DNA.

Availability and implementation

Data were produced using Palindrome analyser, freely available on the web at http://bioinformatics.ibp.cz.

Contact

vaclav@ibp.cz.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-04-01 +29718096,GapRepairer: a server to model a structural gap and validate it using topological analysis.,"Motivation:Over 25% of protein structures possess unresolved fragments. On the other hand, approximately 6% of protein chains have non-trivial topology (and form knots, slipknots, lassos and links). As the topology is fundamental for the proper function of proteins, modeling of topologically correct structures is decisive in various fields, including biophysics, biotechnology and molecular biology. However, none of the currently existing tools take into account the topology of the model and those which could be modified to include topology, demand experience in bioinformatics, protein topology and knot theory. Results:In this work, we present the GapRepairer-the server that fills the gap in the spectrum of structure modeling methods. Its easy and intuitive interface offers the power of Modeller homology modeling to many non-experts in the field. This server determines the topology of templates and predicted structures. Such information when possible is used by the server to suggest the best model, or it can be used by the user to score models or to design artificially (dis)entangled structures. Availability and implementation:GapRepairer server along with tutorials, usage notes, movies and the database of already repaired structures is available at http://gaprepairer.cent.uw.edu.pl. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +31778141,APTANI2: update of aptamer selection through sequence-structure analysis.,"SUMMARY:Here we present APTANI2, an expanded and optimized version of APTANI, a computational tool for selecting target-specific aptamers from high-throughput-Systematic Evolution of Ligands by Exponential Enrichment data through sequence-structure analysis. As compared to its original implementation, APTANI2 ranks aptamers and identifies relevant structural motifs through the calculation of a score that combines frequency and structural stability of each secondary structure predicted in any aptamer sequence. In addition, APTANI2 comprises modules for a deeper investigation of sequence motifs and secondary structures, a graphical user interface that enhances its usability, and coding solutions that improve performances. AVAILABILITY AND IMPLEMENTATION:Source code, documentation and example command lines can be downloaded from http://aptani.unimore.it. APTANI2 is implemented in Python 3.4, released under the GNU GPL3.0 License, and compatible with Linux, Mac OS and the MS Windows subsystem for Linux. SUPPLEMENTARY INFORMATION:Supplementary information is available at Bioinformatics online.",2020-04-01 +23203889,NCBI Bookshelf: books and documents in life sciences and health care.,"Bookshelf (http://www.ncbi.nlm.nih.gov/books/) is a full-text electronic literature resource of books and documents in life sciences and health care at the National Center for Biotechnology Information (NCBI). Created in 1999 with a single book as an encyclopedic reference for resources such as PubMed and GenBank, it has grown to its current size of >1300 titles. Unlike other NCBI databases, such as GenBank and Gene, which have a strict data structure, books come in all forms; they are diverse in publication types, formats, sizes and authoring models. The Bookshelf data format is XML tagged in the NCBI Book DTD (Document Type Definition), modeled after the National Library of Medicine journal article DTDs. The book DTD has been used for systematically tagging the diverse data formats of books, a move that has set the foundation for the growth of this resource. Books at NCBI followed the route of journal articles in the PubMed Central project, using the PubMed Central architectural framework, workflows and processes. Through integration with other NCBI molecular databases, books at NCBI can be used to provide reference information for biological data and facilitate its discovery. This article describes Bookshelf at NCBI: its growth, data handling and retrieval and integration with molecular databases.",2012-11-29 +32804317,The relationship between diabetes and clinical outcomes in COVID-19: a single-center retrospective analysis.,"

Aims

Coronavirus disease 19 (COVID-19) has become a pandemic. Diabetic patients tend to have poorer outcomes and more severe disease (Kumar et al. in Diabetes Metab Syndr 14(4):535-545, 2020. https://doi.org/10.1016/j.dsx.2020.04.044 ). However, the vast majority of studies are representative of Asian and Caucasian population and fewer represent an African-American population.

Methods

In this single-center, retrospective observational study, we included all adult patients (> 18 years old) admitted to Einstein Medical Center, Philadelphia, with a diagnosis of COVID-19. Patients were classified according to having a known diagnosis of diabetes mellitus. Demographic and clinical data, comorbidities, outcomes and laboratory findings were obtained.

Results

Our sample included a total of 355 patients. 70% were African-American, and 47% had diabetes. Patients with diabetes had higher peak inflammatory markers like CRP 184 (111-258) versus 142 (65-229) p = 0.012 and peak LDH 560 (384-758) versus 499 (324-655) p = 0.017. The need for RRT/HD was significantly higher in patients with diabetes (21% vs 11% p = 0.013) as well as the need for vasopressors (28% vs 18% p = 0.023). Only age was found to be an independent predictor of mortality. We found no significant differences in inpatient mortality p = 0.856, need for RRT/HD p = 0.429, need for intubation p = 1.000 and need for vasopressors p = 0.471 in African-Americans with diabetes when compared to non-African-Americans.

Conclusions

Our study demonstrates that patients with COVID-19 and diabetes tend to have more severe disease and poorer clinical outcomes. African-American patients with diabetes did not differ in outcomes or disease severity when compared to non-African-American patients.",2020-08-17 +32602023,Posterior reconstruction during robotic-assisted radical cystectomy with intracorporeal orthotopic ileal neobladder: description and outcomes of a simple step.,"A posterior reconstruction (PR) might improve the fluidity and delicacy of the maneuvers related to the neovesico-urethral anastomosis during robotic-assisted radical cystectomy (RARC). Our objective is to describe in detail the surgical steps of PR and to assess its feasibility and functional outcomes. The data regarding patients undergoing a totally intracorporeal RARC with neobladder and PR for high-grade and/or muscle-invasive urothelial cancer of the bladder at Karolinska University Hospital between October 2015 and November 2016 by a single surgeon (PW) were reviewed. Prior to the anastomosis, a modified posterior Rocco's repair involving the Denonvillier's fascia, the rhabdosphincter, and the posterior side of the ileal neobladder neck was performed. The steps are shown in a video at https://doi.org/10.1089/vid.2019.0029 . The primary outcome was urinary continence; the secondary outcomes were urinary leakage, intermittent catheterization, and complications related to the reconstructive steps. Eleven male patients with a median age and BMI of 67 years and 24, respectively, underwent RARC with PR associated to the neovesico-urethral anastomosis. Overall and posterior reconstruction time were 300' (195-320) and 6' (4-7), respectively. The daytime and nighttime continence rates were 100% and 44% at 12 months, respectively; the median pad weight was 3.5 g and 108 g at daytime and nighttime, respectively. One urinary leakage from the urethrovesical anastomosis was treated conservatively. Two patients perform intermittent catheterization. The posterior reconstruction during RARC is safe and feasible, providing good continence rates. It supported a careful suturing of the anastomosis as well as an uncomplicated catheter placement.",2020-06-29 +25217587,VirusMentha: a new resource for virus-host protein interactions.,"Viral infections often cause diseases by perturbing several cellular processes in the infected host. Viral proteins target host proteins and either form new complexes or modulate the formation of functional host complexes. Describing and understanding the perturbation of the host interactome following viral infection is essential for basic virology and for the development of antiviral therapies. In order to provide a general overview of such interactions, a few years ago we developed VirusMINT. We have now extended the scope and coverage of VirusMINT and established VirusMentha, a new virus-virus and virus-host interaction resource build on the detailed curation protocols of the IMEx consortium and on the integration strategies developed for mentha. VirusMentha is regularly and automatically updated every week by capturing, via the PSICQUIC protocol, interactions curated by five different databases that are part of the IMEx consortium. VirusMentha can be freely browsed at http://virusmentha.uniroma2.it/ and its complete data set is available for download.",2014-09-12 +25551368,PD_NGSAtlas: a reference database combining next-generation sequencing epigenomic and transcriptomic data for psychiatric disorders.,"

Background

Psychiatric disorders such as schizophrenia (SZ) and bipolar disorder (BP) are projected to lead the global disease burden within the next decade. Several lines of evidence suggest that epigenetic- or genetic-mediated dysfunction is frequently present in these disorders. To date, the inheritance patterns have been complicated by the problem of integrating epigenomic and transcriptomic factors that have yet to be elucidated. Therefore, there is a need to build a comprehensive database for storing epigenomic and transcriptomic data relating to psychiatric disorders.

Description

We have developed the PD_NGSAtlas, which focuses on the efficient storage of epigenomic and transcriptomic data based on next-generation sequencing and on the quantitative analyses of epigenetic and transcriptional alterations involved in psychiatric disorders. The current release of the PD_NGSAtlas contains 43 DNA methylation profiles and 37 transcription profiles detected by MeDIP-Seq and RNA-Seq, respectively, in two distinct brain regions and peripheral blood of SZ, BP and non-psychiatric controls. In addition to these data that were generated in-house, we have included, and will continue to include, published DNA methylation and gene expression data from other research groups, with a focus on psychiatric disorders. A flexible query engine has been developed for the acquisition of methylation profiles and transcription profiles for special genes or genomic regions of interest of the selected samples. Furthermore, the PD_NGSAtlas offers online tools for identifying aberrantly methylated and expressed events involved in psychiatric disorders. A genome browser has been developed to provide integrative and detailed views of multidimensional data in a given genomic context, which can help researchers understand molecular mechanisms from epigenetic and transcriptional perspectives. Moreover, users can download the methylation and transcription data for further analyses.

Conclusions

The PD_NGSAtlas aims to provide storage of epigenomic and transcriptomic data as well as quantitative analyses of epigenetic and transcriptional alterations involved in psychiatric disorders. The PD_NGSAtlas will be a valuable data resource and will enable researchers to investigate the pathophysiology and aetiology of disease in detail. The database is available at http://bioinfo.hrbmu.edu.cn/pd_ngsatlas/.",2014-12-31 +32403123,Automated inference of Boolean models from molecular interaction maps using CaSQ.,"

Motivation

Molecular interaction maps have emerged as a meaningful way of representing biological mechanisms in a comprehensive and systematic manner. However, their static nature provides limited insights to the emerging behaviour of the described biological system under different conditions. Computational modelling provides the means to study dynamic properties through in silico simulations and perturbations. We aim to bridge the gap between static and dynamic representations of biological systems with CaSQ, a software tool that infers Boolean rules based on the topology and semantics of molecular interaction maps built with CellDesigner.

Results

We developed CaSQ by defining conversion rules and logical formulas for inferred Boolean models according to the topology and the annotations of the starting molecular interaction maps. We used CaSQ to produce executable files of existing molecular maps that differ in size, complexity and the use of Systems Biology Graphical Notation (SBGN) standards. We also compared, where possible, the manually built logical models corresponding to a molecular map to the ones inferred by CaSQ. The tool is able to process large and complex maps built with CellDesigner (either following SBGN standards or not) and produce Boolean models in a standard output format, Systems Biology Marked Up Language-qualitative (SBML-qual), that can be further analyzed using popular modelling tools. References, annotations and layout of the CellDesigner molecular map are retained in the obtained model, facilitating interoperability and model reusability.

Availability and implementation

The present tool is available online: https://lifeware.inria.fr/∼soliman/post/casq/ and distributed as a Python package under the GNU GPLv3 license. The code can be accessed here: https://gitlab.inria.fr/soliman/casq.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +30311374,ClinGen Allele Registry links information about genetic variants.,"Effective exchange of information about genetic variants is currently hampered by the lack of readily available globally unique variant identifiers that would enable aggregation of information from different sources. The ClinGen Allele Registry addresses this problem by providing (1) globally unique ""canonical"" variant identifiers (CAids) on demand, either individually or in large batches; (2) access to variant-identifying information in a searchable Registry; (3) links to allele-related records in many commonly used databases; and (4) services for adding links to information about registered variants in external sources. A core element of the Registry is a canonicalization service, implemented using in-memory sequence alignment-based index, which groups variant identifiers denoting the same nucleotide variant and assigns unique and dereferenceable CAids. More than 650 million distinct variants are currently registered, including those from gnomAD, ExAC, dbSNP, and ClinVar, including a small number of variants registered by Registry users. The Registry is accessible both via a web interface and programmatically via well-documented Hypertext Transfer Protocol (HTTP) Representational State Transfer Application Programming Interface (REST-APIs). For programmatic interoperability, the Registry content is accessible in the JavaScript Object Notation for Linked Data (JSON-LD) format. We present several use cases and demonstrate how the linked information may provide raw material for reasoning about variant's pathogenicity.",2018-11-01 +25881271,The salinity tolerant poplar database (STPD): a comprehensive database for studying tree salt-tolerant adaption and poplar genomics.,"

Background

Soil salinity is a significant factor that impairs plant growth and agricultural productivity, and numerous efforts are underway to enhance salt tolerance of economically important plants. Populus species are widely cultivated for diverse uses. Especially, they grow in different habitats, from salty soil to mesophytic environment, and are therefore used as a model genus for elucidating physiological and molecular mechanisms of stress tolerance in woody plants.

Description

The Salinity Tolerant Poplar Database (STPD) is an integrative database for salt-tolerant poplar genome biology. Currently the STPD contains Populus euphratica genome and its related genetic resources. P. euphratica, with a preference of the salty habitats, has become a valuable genetic resource for the exploitation of tolerance characteristics in trees. This database contains curated data including genomic sequence, genes and gene functional information, non-coding RNA sequences, transposable elements, simple sequence repeats and single nucleotide polymorphisms information of P. euphratica, gene expression data between P. euphratica and Populus tomentosa, and whole-genome alignments between Populus trichocarpa, P. euphratica and Salix suchowensis. The STPD provides useful searching and data mining tools, including GBrowse genome browser, BLAST servers and genome alignments viewer, which can be used to browse genome regions, identify similar sequences and visualize genome alignments. Datasets within the STPD can also be downloaded to perform local searches.

Conclusions

A new Salinity Tolerant Poplar Database has been developed to assist studies of salt tolerance in trees and poplar genomics. The database will be continuously updated to incorporate new genome-wide data of related poplar species. This database will serve as an infrastructure for researches on the molecular function of genes, comparative genomics, and evolution in closely related species as well as promote advances in molecular breeding within Populus. The STPD can be accessed at http://me.lzu.edu.cn/stpd/ .",2015-03-17 +,Evaluating soil moisture retrievals from ESA's SMOS and NASA's SMAP brightness temperature datasets,"Two satellites are currently monitoring surface soil moisture (SM) using L-band observations: SMOS (Soil Moisture and Ocean Salinity), a joint ESA (European Space Agency), CNES (Centre national d'études spatiales), and CDTI (the Spanish government agency with responsibility for space) satellite launched on November 2, 2009 and SMAP (Soil Moisture Active Passive), a National Aeronautics and Space Administration (NASA) satellite successfully launched in January 2015. In this study, we used a multilinear regression approach to retrieve SM from SMAP data to create a global dataset of SM, which is consistent with SM data retrieved from SMOS. This was achieved by calibrating coefficients of the regression model using the CATDS (Centre Aval de Traitement des Données) SMOS Level 3 SM and the horizontally and vertically polarized brightness temperatures (TB) at 40° incidence angle, over the 2013–2014 period. Next, this model was applied to SMAP L3 TB data from Apr 2015 to Jul 2016. The retrieved SM from SMAP (referred to here as SMAP_Reg) was compared to: (i) the operational SMAP L3 SM (SMAP_SCA), retrieved using the baseline Single Channel retrieval Algorithm (SCA); and (ii) the operational SMOSL3 SM, derived from the multiangular inversion of the L-MEB model (L-MEB algorithm) (SMOSL3). This inter-comparison was made against in situ soil moisture measurements from >400 sites spread over the globe, which are used here as a reference soil moisture dataset. The in situ observations were obtained from the International Soil Moisture Network (ISMN; https://ismn.geo.tuwien.ac.at/) in North of America (PBO_H2O, SCAN, SNOTEL, iRON, and USCRN), in Australia (Oznet), Africa (DAHRA), and in Europe (REMEDHUS, SMOSMANIA, FMI, and RSMN). The agreement was analyzed in terms of four classical statistical criteria: Root Mean Squared Error (RMSE), Bias, Unbiased RMSE (UnbRMSE), and correlation coefficient (R). Results of the comparison of these various products with in situ observations show that the performance of both SMAP products i.e. SMAP_SCA and SMAP_Reg is similar and marginally better to that of the SMOSL3 product particularly over the PBO_H2O, SCAN, and USCRN sites. However, SMOSL3 SM was closer to the in situ observations over the DAHRA and Oznet sites. We found that the correlation between all three datasets and in situ measurements is best (R>0.80) over the Oznet sites and worst (R=0.58) over the SNOTEL sites for SMAP_SCA and over the DAHRA and SMOSMANIA sites (R=0.51 and R=0.45 for SMAP_Reg and SMOSL3, respectively). The Bias values showed that all products are generally dry, except over RSMN, DAHRA, and Oznet (and FMI for SMAP_SCA). Finally, our analysis provided interesting insights that can be useful to improve the consistency between SMAP and SMOS datasets.",2017-05-01 +31969334,PI3K Inhibitors Curtail MYC-Dependent Mutant p53 Gain-of-Function in Head and Neck Squamous Cell Carcinoma.,"

Purpose

Mutation of TP53 gene is a hallmark of head and neck squamous cell carcinoma (HNSCC) not yet exploited therapeutically. TP53 mutation frequently leads to the synthesis of mutant p53 proteins with gain-of-function activity, associated with radioresistance and high incidence of local recurrences in HNSCC.

Experimental design

Mutant p53-associated functions were investigated through gene set enrichment analysis in the Cancer Genome Atlas cohort of HNSCC and in a panel of 22 HNSCC cell lines. Mutant p53-dependent transcripts were analyzed in HNSCC cell line Cal27, carrying mutant p53H193L; FaDu, carrying p53R248L; and Detroit 562, carrying p53R175H. Drugs impinging on mutant p53-MYC-dependent signature were identified interrogating Connectivity Map (https://clue.io) derived from the Library of Integrated Network-based Cellular Signatures (LINCS) database (http://lincs.hms.harvard.edu/) and analyzed in HNSCC cell lines and patient-derived xenografts (PDX) models.

Results

We identified a signature of transcripts directly controlled by gain-of-function mutant p53 protein and prognostic in HNSCC, which is highly enriched of MYC targets. Specifically, both in PDX and cell lines of HNSCC treated with the PI3Kα-selective inhibitor BYL719 (alpelisib) the downregulation of mutant p53/MYC-dependent signature correlates with response to this compound. Mechanistically, mutant p53 favors the binding of MYC to its target promoters and enhances MYC protein stability. Treatment with BYL719 disrupts the interaction of MYC, mutant p53, and YAP proteins with MYC target promoters. Of note, depletion of MYC, mutant p53, or YAP potentiates the effectiveness of BYL719 treatment.

Conclusions

Collectively, the blocking of this transcriptional network is an important determinant for the response to BYL719 in HNSCC.",2020-01-22 +31967488,Healthy and Climate-Friendly Eating Patterns in the New Zealand Context.,"

Background

The global food system is driving both the climate crisis and the growing burden of noncommunicable disease. International research has highlighted the climate and health co-benefit opportunity inherent in widespread uptake of plant-based diets. Nevertheless, uncertainty remains as to what constitutes healthy and climate-friendly eating patterns in specific world regions.

Objectives

Using New Zealand as a case study, this research investigates the extent to which potential contextual differences may affect the local applicability of international trends. It further examines the potential for demand-end avenues to support a transition toward a healthier, more climate-friendly food system in New Zealand.

Methods

A New Zealand-specific life-cycle assessment (LCA) database was developed by modifying cradle to point-of-sale reference emissions estimates according to the New Zealand context. This food emissions database, together with a New Zealand-specific multistate life-table model, was then used to estimate climate, health, and health system cost impacts associated with shifting current consumption to align with dietary scenarios that conform to the New Zealand dietary guidelines (NZDGs).

Results

Whole plant foods, including vegetables, fruits, legumes, and whole grains were substantially less climate-polluting (1.2-1.8 kgCO2e/kg) than animal-based foods, particularly red and processed meats (12-21 kgCO2e/kg). Shifting population-level consumption to align with the NZDGs would confer diet-related emissions savings of 4-42%, depending on the degree of dietary change and food waste minimization pursued. NZDG-abiding dietary scenarios, when modeled out over the lifetime of the current New Zealand population, would also confer large health gains (1.0-1.5 million quality-adjusted life-years) and health care system cost savings (NZ$14-20 billion).

Discussion

Guideline-abiding dietary scenarios, particularly those that prioritize plant-based foods, have the potential to confer substantial climate and health gains. This research shows that major contextual differences specific to New Zealand's food system do not appear to cause notable deviation from global trends, reinforcing recent international research. https://doi.org/10.1289/EHP5996.",2020-01-22 +,Development of an on-line Irish food composition database for nutrients,"Food composition databases underpin national dietary surveys, support epidemiological and experimental research in human nutrition and are essential in the development of food regulations and dietary guidelines. To date, nutrition researchers in Ireland have not had access to national food composition data and have relied heavily on borrowed data—mainly from the United Kingdom. The aim of this project was to compile an on-line Irish food composition database, which is compatible with international standards. Data on foods specific to the Irish population, including commonly consumed manufactured products, composite dishes and nutritional supplements, were collected between 1997 and 2006 during national dietary intake surveys. In conjunction with EuroFIR (European Food Information Resource Network of Excellence), these foods have been compiled using standardised methodology. The database comprises 938 foods, with values for 41 components. Values for packaged products were sourced from manufacturers and values for composite dishes were calculated from ingredients. Homemade meat, fish and vegetable dishes, manufactured cereal products and nutritional supplements are the key foods in the database. The Irish food composition database is available on-line (http://www.ucc.ie/en/ifcdb) and is incorporated in the EuroFIR eSearch facility (http://www.eurofir.eu) alongside other European national and specialised food composition databases.",2011-11-01 +32426818,PASSION: an ensemble neural network approach for identifying the binding sites of RBPs on circRNAs.,"

Motivation

Different from traditional linear RNAs (containing 5' and 3' ends), circular RNAs (circRNAs) are a special type of RNAs that have a closed ring structure. Accumulating evidence has indicated that circRNAs can directly bind proteins and participate in a myriad of different biological processes.

Results

For identifying the interaction of circRNAs with 37 different types of circRNA-binding proteins (RBPs), we develop an ensemble neural network, termed PASSION, which is based on the concatenated artificial neural network (ANN) and hybrid deep neural network frameworks. Specifically, the input of the ANN is the optimal feature subset for each RBP, which has been selected from six types of feature encoding schemes through incremental feature selection and application of the XGBoost algorithm. In turn, the input of the hybrid deep neural network is a stacked codon-based scheme. Benchmarking experiments indicate that the ensemble neural network reaches the average best area under the curve (AUC) of 0.883 across the 37 circRNA datasets when compared with XGBoost, k-nearest neighbor, support vector machine, random forest, logistic regression and Naive Bayes. Moreover, each of the 37 RBP models is extensively tested by performing independent tests, with the varying sequence similarity thresholds of 0.8, 0.7, 0.6 and 0.5, respectively. The corresponding average AUC obtained are 0.883, 0.876, 0.868 and 0.883, respectively, highlighting the effectiveness and robustness of PASSION. Extensive benchmarking experiments demonstrate that PASSION achieves a competitive performance for identifying binding sites between circRNA and RBPs, when compared with several state-of-the-art methods.

Availability and implementation

A user-friendly web server of PASSION is publicly accessible at http://flagship.erc.monash.edu/PASSION/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-08-01 +32284610,Meltome atlas-thermal proteome stability across the tree of life.,"We have used a mass spectrometry-based proteomic approach to compile an atlas of the thermal stability of 48,000 proteins across 13 species ranging from archaea to humans and covering melting temperatures of 30-90 °C. Protein sequence, composition and size affect thermal stability in prokaryotes and eukaryotic proteins show a nonlinear relationship between the degree of disordered protein structure and thermal stability. The data indicate that evolutionary conservation of protein complexes is reflected by similar thermal stability of their proteins, and we show examples in which genomic alterations can affect thermal stability. Proteins of the respiratory chain were found to be very stable in many organisms, and human mitochondria showed close to normal respiration at 46 °C. We also noted cell-type-specific effects that can affect protein stability or the efficacy of drugs. This meltome atlas broadly defines the proteome amenable to thermal profiling in biology and drug discovery and can be explored online at http://meltomeatlas.proteomics.wzw.tum.de:5003/ and http://www.proteomicsdb.org.",2020-04-13 +26560047,Pan-transcriptomic analysis identifies coordinated and orthologous functional modules in the diatoms Thalassiosira pseudonana and Phaeodactylum tricornutum.,"Diatoms are important primary producers in the ocean that thrive in diverse and dynamic environments. Their survival and success over changing conditions depend on the complex coordination of gene regulatory processes. Here we present an integrated analysis of all publicly available microarray data for the diatoms Thalassiosira pseudonana and Phaeodactylum tricornutum. This resource includes shared expression patterns, gene functions, and cis-regulatory DNA sequence motifs in each species that are statistically coordinated over many experiments. These data illustrate the coordination of transcriptional responses in diatoms over changing environmental conditions. Responses to silicic acid depletion segregate into multiple distinctly regulated groups of genes, regulation by heat shock transcription factors (HSFs) is implicated in the response to nitrate stress, and distinctly coordinated carbon concentrating, CO2 and pH-related responses are apparent. Fundamental features of diatom physiology are similarly coordinated between two distantly related diatom species, including the regulation of photosynthesis, cellular growth functions and lipid metabolism. These integrated data and analyses can be explored publicly (http://networks.systemsbiology.net/diatom-portal/).",2015-11-10 +32622194,MED-TMA: A clinical decision support tool for differential diagnosis of TMA with enhanced accuracy using an ensemble method.,"Considering difficulties in on-site ADAMTS13 testing and the performance instability of PLASMIC score according to ethnicity, we developed a prediction tool, MED-TMA (machine learning (ML) method for differential diagnosis (DDx) of thrombotic microangiopathy (TMA)) to support clinical decision. Data from 319 patients visiting 31 hospitals in Korea clinically diagnosed with primary TMA was randomly separated by 2:1 into two groups - the development dataset (D-set, n = 212), the validation dataset (V-set, n = 107). Feature elimination was conducted to select optimal clinical predictors. We developed the model with the selected features using ML methods, verifying using V-set. After the feature elimination using 19 clinical variables, five variables were selected with high importance value. Among nine ML methods, four ML methods were chosen considering the Area Under the Curves (AUC) and the correlation between the methods using D-set. We developed MED-TMA based on an optimized ensemble model with the selected four ML methods resulting in AUC values of 0.945 and 0.924 in D-set and V-set, respectively. In addition to the binary outcome, MED-TMA was capable of providing a probability for DDx of TMA. The ensemble approach driven MED-TMA showed comparable accurate and intuitive decision support for DDx of TMA to that of the existing models based on a single ML method. We provide a web-based nomogram for the appropriate use of effective but costly therapeutics to treat TMA patients (http://hematology.snu.ac.kr/medtma/).",2020-06-27 +31666984,A data driven approach reveals disease similarity on a molecular level.,"Could there be unexpected similarities between different studies, diseases, or treatments, on a molecular level due to common biological mechanisms involved? To answer this question, we develop a method for computing similarities between empirical, statistical distributions of high-dimensional, low-sample datasets, and apply it on hundreds of -omics studies. The similarities lead to dataset-to-dataset networks visualizing the landscape of a large portion of biological data. Potentially interesting similarities connecting studies of different diseases are assembled in a disease-to-disease network. Exploring it, we discover numerous non-trivial connections between Alzheimer's disease and schizophrenia, asthma and psoriasis, or liver cancer and obesity, to name a few. We then present a method that identifies the molecular quantities and pathways that contribute the most to the identified similarities and could point to novel drug targets or provide biological insights. The proposed method acts as a ""statistical telescope"" providing a global view of the constellation of biological data; readers can peek through it at: http://datascope.csd.uoc.gr:25000/.",2019-10-25 +29976632,Glioblastoma Multiforme: Fewer Tumor Copy Number Segments of the SGK1 Gene Are Associated with Poorer Survival.,"

Background/aim

Glioblastoma multiforme (GBM) is the most common primary tumor of the central nervous system. The serum and glucocorticoid-regulated kinase SGK1 gene is required for the growth and survival of GBM stem-like cells under both normoxic and hypoxic conditions. It has been reported that oxygenation significantly affects cellular genetic expression; 30% of the genes required in hypoxia were not required under normoxic conditions. Therefore, we examined SGK1 expression to determine if it may be a novel potential drug target for GBM.

Materials and methods

We assessed the association between SGK1 and glioblastoma patient overall survival using the GBM cohort in TCGA (The Cancer Genome Atlas) database (TCGA-GBM). To access and analyze the data we used the UCSC Xena browser (https://xenabrowser.net). Survival data of the GBM subgroup were extracted for analysis and generation of Kaplan-Meier curves for overall survival. The best cut-off was identified by methods described in the R2 web-based application (http://r2.amc.nl).

Results

We analyzed patient survival by tumor SGK1 copy number segments after removal of common germ-line copy-number variants (CNVs). Copy number segments (log2 tumor/normal) ≤0.009700 were associated with significantly poorer survival (p=0.016).

Conclusion

Increased median overall survival associated with increased SGK1 copy number segments may be a reflection of better tumor oxygenation. Therefore, besides being a drug target, SGK1 may also be a prognostic marker. Among molecular tumor markers, only the methylation status of the O-6-methylguanine-DNA methyltransferase (MGMT) gene has shown a significant association with survival in patients with GBM.",2018-07-01 +30949679,IntronDB: a database for eukaryotic intron features.,"

Summary

The rate and extent of unbalanced eukaryotic intron changes exhibit dynamic patterns for different lineages of species or certain functional groups of genes with varied spatio-temporal expression modes affected by selective pressure. To date, only a few key conserved splicing signals or regulatory elements have been identified in introns and little is known about the remaining intronic regions. To trace the evolutionary trajectory of spliceosomal introns from available genomes under a unified framework, we present IntronDB, which catalogs ∼50 000 000 introns from over 1000 genomes spanning the major eukaryotic clades in the tree of life. Based on the position of introns relative to coding regions, it categorizes introns into three groups, such as 5'UTR, CDS and 3'UTR and subsequently divides CDS introns into three categories, such as phase 0, phase 1 and phase 2. It provides the quality evaluation for each sequence entry and characterizes the intronic parameters including number, size, sequence composition and positioning information as well as the features for exons and genes, making possible the comparisons between introns and exons. It reports the dinucleotides around the intron boundary and displays the consensus sequence features for all introns, small introns and large introns for each genome. By incorporating the taxonomic assignment of genomes, it performs high-level or genome-wide statistical analysis for single feature and coupled features both in a single genome and across multiple genomes. It offers functionalities to browse the data from representative protein-coding transcripts and download the data from all transcripts from protein-coding genes.

Availability and implementation

http://www.nextgenbioinformatics.org/IntronDB.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +30070575,"""Revisiting Carroll's Survey of Factor-Analytic Studies: Implications for the Clinical Assessment of Intelligence"": Correction to Benson et al. (2018).","Reports an error in ""Revisiting Carroll's survey of factor-analytic studies: Implications for the clinical assessment of intelligence"" by Nicholas F. Benson, A. Alexander Beaujean, Ryan J. McGill and Stefan C. Dombrowski (Psychological Assessment, Advanced Online Publication, May 24, 2018, np). In the article ""Revisiting Carroll's Survey of Factor-Analytic Studies: Implications for the Clinical Assessment of Intelligence,"" by Nicholas F. Benson, A. Alexander Beaujean, Ryan J. McGill, and Stefan C. Dombrowski (Psychological Assessment, Advance online publication, May 24, 2018, http://dx.doi.org/10.1037/pas0000556), the majority of values in the ωH and ωHS columns of Table 4 were incorrect and have been amended. These revisions required text in the fourth paragraph of the Results section to be changed from ""Moreover, the ωHS value for Gs is relatively high and very close to the and ωH values for g"" to ""Moreover, the ωHS values for Gs and Gv are relatively high, exceeding the ω and ωH values for g."" All versions of this article have been corrected. (The following abstract of the original article appeared in record 2018-23627-001.) John Carroll's three-stratum theory (and the decades of research behind its development) is foundational to the contemporary practice of intellectual assessment. The present study addresses some limitations of Carroll's work: specification, reproducibility with more modern methods, and interpretive relevance. We reanalyzed select data sets from Carroll's survey of factor analytic studies using confirmatory factor analysis as well as modern indices of interpretive relevance. For the majority of data sets, we found that Carroll likely extracted too many factors representing Stratum II abilities. Moreover, almost all factors representing Stratum II abilities had little-to-no interpretive relevance above and beyond that of general intelligence. We conclude by discussing the implications of this research with respect to the interpretive relevance and clinical utility of scores reflecting cognitive abilities at all strata of the three-stratum theory and offer some directions for future research. (PsycINFO Database Record",2018-08-01 +26578556,The Saccharomyces Genome Database Variant Viewer.,"The Saccharomyces Genome Database (SGD; http://www.yeastgenome.org) is the authoritative community resource for the Saccharomyces cerevisiae reference genome sequence and its annotation. In recent years, we have moved toward increased representation of sequence variation and allelic differences within S. cerevisiae. The publication of numerous additional genomes has motivated the creation of new tools for their annotation and analysis. Here we present the Variant Viewer: a dynamic open-source web application for the visualization of genomic and proteomic differences. Multiple sequence alignments have been constructed across high quality genome sequences from 11 different S. cerevisiae strains and stored in the SGD. The alignments and summaries are encoded in JSON and used to create a two-tiered dynamic view of the budding yeast pan-genome, available at http://www.yeastgenome.org/variant-viewer.",2015-11-17 +28761934,Metabolic Fingerprints from the Human Oral Microbiome Reveal a Vast Knowledge Gap of Secreted Small Peptidic Molecules. ,"Recent research indicates that the human microbiota play key roles in maintaining health by providing essential nutrients, providing immune education, and preventing pathogen expansion. Processes underlying the transition from a healthy human microbiome to a disease-associated microbiome are poorly understood, partially because of the potential influences from a wide diversity of bacterium-derived compounds that are illy defined. Here, we present the analysis of peptidic small molecules (SMs) secreted from bacteria and viewed from a temporal perspective. Through comparative analysis of mass spectral profiles from a collection of cultured oral isolates and an established in vitro multispecies oral community, we found that the production of SMs both delineates a temporal expression pattern and allows discrimination between bacterial isolates at the species level. Importantly, the majority of the identified molecules were of unknown identity, and only ~2.2% could be annotated and classified. The catalogue of bacterially produced SMs we obtained in this study reveals an undiscovered molecular world for which compound isolation and ecosystem testing will facilitate a better understanding of their roles in human health and disease. IMPORTANCE Metabolomics is the ultimate tool for studies of microbial functions under any specific set of environmental conditions (D. S. Wishart, Nat Rev Drug Discov 45:473-484, 2016, https://doi.org/10.1038/nrd.2016.32). This is a great advance over studying genes alone, which only inform about metabolic potential. Approximately 25,000 compounds have been chemically characterized thus far; however, the richness of metabolites such as SMs has been estimated to be as high as 1 × 1030 in the biosphere (K. Garber, Nat Biotechnol 33:228-231, 2015, https://doi.org/10.1038/nbt.3161). Our classical, one-at-a-time activity-guided approach to compound identification continues to find the same known compounds and is also incredibly tedious, which represents a major bottleneck for global SM identification. These challenges have prompted new developments of databases and analysis tools that provide putative classifications of SMs by mass spectral alignments to already characterized tandem mass spectrometry spectra and databases containing structural information (e.g., PubChem and AntiMarin). In this study, we assessed secreted peptidic SMs (PSMs) from 27 oral bacterial isolates and a complex oral in vitro biofilm community of >100 species by using the Global Natural Products Social molecular Networking and the DEREPLICATOR infrastructures, which are methodologies that allow automated and putative annotation of PSMs. These approaches enabled the identification of an untapped resource of PSMs from oral bacteria showing species-unique patterns of secretion with putative matches to known bioactive compounds.",2017-07-18 +31688925,Enzyme annotation in UniProtKB using Rhea.,"

Motivation

To provide high quality computationally tractable enzyme annotation in UniProtKB using Rhea, a comprehensive expert-curated knowledgebase of biochemical reactions which describes reaction participants using the ChEBI (Chemical Entities of Biological Interest) ontology.

Results

We replaced existing textual descriptions of biochemical reactions in UniProtKB with their equivalents from Rhea, which is now the standard for annotation of enzymatic reactions in UniProtKB. We developed improved search and query facilities for the UniProt website, REST API and SPARQL endpoint that leverage the chemical structure data, nomenclature and classification that Rhea and ChEBI provide.

Availability and implementation

UniProtKB at https://www.uniprot.org; UniProt REST API at https://www.uniprot.org/help/api; UniProt SPARQL endpoint at https://sparql.uniprot.org/; Rhea at https://www.rhea-db.org.",2020-03-01 +30221945,Combining High-Resolution and Exact Calibration To Boost Statistical Power: A Well-Calibrated Score Function for High-Resolution MS2 Data.,"To achieve accurate assignment of peptide sequences to observed fragmentation spectra, a shotgun proteomics database search tool must make good use of the very high-resolution information produced by state-of-the-art mass spectrometers. However, making use of this information while also ensuring that the search engine's scores are well calibrated, that is, that the score assigned to one spectrum can be meaningfully compared to the score assigned to a different spectrum, has proven to be challenging. Here we describe a database search score function, the ""residue evidence"" (res-ev) score, that achieves both of these goals simultaneously. We also demonstrate how to combine calibrated res-ev scores with calibrated XCorr scores to produce a ""combined p value"" score function. We provide a benchmark consisting of four mass spectrometry data sets, which we use to compare the combined p value to the score functions used by several existing search engines. Our results suggest that the combined p value achieves state-of-the-art performance, generally outperforming MS Amanda and Morpheus and performing comparably to MS-GF+. The res-ev and combined p-value score functions are freely available as part of the Tide search engine in the Crux mass spectrometry toolkit ( http://crux.ms ).",2018-10-18 +26578696,OryzaGenome: Genome Diversity Database of Wild Oryza Species.,"The species in the genus Oryza, encompassing nine genome types and 23 species, are a rich genetic resource and may have applications in deeper genomic analyses aiming to understand the evolution of plant genomes. With the advancement of next-generation sequencing (NGS) technology, a flood of Oryza species reference genomes and genomic variation information has become available in recent years. This genomic information, combined with the comprehensive phenotypic information that we are accumulating in our Oryzabase, can serve as an excellent genotype-phenotype association resource for analyzing rice functional and structural evolution, and the associated diversity of the Oryza genus. Here we integrate our previous and future phenotypic/habitat information and newly determined genotype information into a united repository, named OryzaGenome, providing the variant information with hyperlinks to Oryzabase. The current version of OryzaGenome includes genotype information of 446 O. rufipogon accessions derived by imputation and of 17 accessions derived by imputation-free deep sequencing. Two variant viewers are implemented: SNP Viewer as a conventional genome browser interface and Variant Table as a text-based browser for precise inspection of each variant one by one. Portable VCF (variant call format) file or tab-delimited file download is also available. Following these SNP (single nucleotide polymorphism) data, reference pseudomolecules/scaffolds/contigs and genome-wide variation information for almost all of the closely and distantly related wild Oryza species from the NIG Wild Rice Collection will be available in future releases. All of the resources can be accessed through http://viewer.shigen.info/oryzagenome/.",2015-11-16 +25815792,In Silico and in Vitro Study of Binding Affinity of Tripeptides to Amyloid β Fibrils: Implications for Alzheimer's Disease.,"Self-assembly of Aβ peptides into amyloid aggregates has been suggested as the major cause of Alzheimer's disease (AD). Nowadays, there is no medication for AD, but experimental data indicate that reversion of the process of amyloid aggregation reduces the symptoms of disease. In this paper, all 8000 tripeptides were studied for their ability to destroy Aβ fibrils. The docking method and the more sophisticated MM-PBSA (molecular mechanics Poisson-Boltzmann surface area) method were employed to calculate the binding affinity and mode of tripeptides to Aβ fibrils. The ability of these peptides to depolymerize Aβ fibrils was also investigated experimentally using atomic force microscopy and fluorescence spectroscopy (Thioflavin T assay). It was shown that tripeptides prefer to bind to hydrophobic regions of 6Aβ9-40 fibrils. Tripeptides WWW, WWP, WPW and PWW were found to be the most potent binders. In vitro experiments showed that tight-binding tripeptides have significant depolymerizing activities and their DC50 values determined from dose-response curves were in micromolar range. The ability of nonbinding (GAM, AAM) and weak-binding (IVL and VLA) tripeptides to destroy Aβ fibrils was negligible. In vitro data of tripeptide depolymerizing activities support the predictions obtained by molecular docking and all-atom simulation methods. Our results suggest that presence of multiple complexes of heterocycles forming by tryptophan and proline residues in tripeptides is crucial for their tight binding to Aβ fibrils as well as for extensive fibril depolymerization. We recommend PWW for further studies as it has the lowest experimental binding constant.",2015-04-08 +31841564,GLAMbox: A Python toolbox for investigating the association between gaze allocation and decision behaviour.,"Recent empirical findings have indicated that gaze allocation plays a crucial role in simple decision behaviour. Many of these findings point towards an influence of gaze allocation onto the speed of evidence accumulation in an accumulation-to-bound decision process (resulting in generally higher choice probabilities for items that have been looked at longer). Further, researchers have shown that the strength of the association between gaze and choice behaviour is highly variable between individuals, encouraging future work to study this association on the individual level. However, few decision models exist that enable a straightforward characterization of the gaze-choice association at the individual level, due to the high cost of developing and implementing them. The model space is particularly scarce for choice sets with more than two choice alternatives. Here, we present GLAMbox, a Python-based toolbox that is built upon PyMC3 and allows the easy application of the gaze-weighted linear accumulator model (GLAM) to experimental choice data. The GLAM assumes gaze-dependent evidence accumulation in a linear stochastic race that extends to decision scenarios with many choice alternatives. GLAMbox enables Bayesian parameter estimation of the GLAM for individual, pooled or hierarchical models, provides an easy-to-use interface to predict choice behaviour and visualize choice data, and benefits from all of PyMC3's Bayesian statistical modeling functionality. Further documentation, resources and the toolbox itself are available at https://glambox.readthedocs.io.",2019-12-16 +25348402,The Genomes OnLine Database (GOLD) v.5: a metadata management system based on a four level (meta)genome project classification.,"The Genomes OnLine Database (GOLD; http://www.genomesonline.org) is a comprehensive online resource to catalog and monitor genetic studies worldwide. GOLD provides up-to-date status on complete and ongoing sequencing projects along with a broad array of curated metadata. Here we report version 5 (v.5) of the database. The newly designed database schema and web user interface supports several new features including the implementation of a four level (meta)genome project classification system and a simplified intuitive web interface to access reports and launch search tools. The database currently hosts information for about 19,200 studies, 56,000 Biosamples, 56,000 sequencing projects and 39,400 analysis projects. More than just a catalog of worldwide genome projects, GOLD is a manually curated, quality-controlled metadata warehouse. The problems encountered in integrating disparate and varying quality data into GOLD are briefly highlighted. GOLD fully supports and follows the Genomic Standards Consortium (GSC) Minimum Information standards.",2014-10-27 +31729402,Identification of candidate cancer drivers by integrative Epi-DNA and Gene Expression (iEDGE) data analysis.,"The emergence of large-scale multi-omics data warrants method development for data integration. Genomic studies from cancer patients have identified epigenetic and genetic regulators - such as methylation marks, somatic mutations, and somatic copy number alterations (SCNAs), among others - as predictive features of cancer outcome. However, identification of ""driver genes"" associated with a given alteration remains a challenge. To this end, we developed a computational tool, iEDGE, to model cis and trans effects of (epi-)DNA alterations and identify potential cis driver genes, where cis and trans genes denote those genes falling within and outside the genomic boundaries of a given (epi-)genetic alteration, respectively. iEDGE first identifies the cis and trans gene expression signatures associated with the presence/absence of a particular epi-DNA alteration across samples. It then applies tests of statistical mediation to determine the cis genes predictive of the trans gene expression. Finally, cis and trans effects are annotated by pathway enrichment analysis to gain insights into the underlying regulatory networks. We used iEDGE to perform integrative analysis of SCNAs and gene expression data from breast cancer and 18 additional cancer types included in The Cancer Genome Atlas (TCGA). Notably, cis gene drivers identified by iEDGE were found to be significantly enriched for known driver genes from multiple compendia of validated oncogenes and tumor suppressors, suggesting that the remainder are of equal importance. Furthermore, predicted drivers were enriched for functionally relevant cancer genes with amplification-driven dependencies, which are of potential prognostic and therapeutic value. All the analyses results are accessible at https://montilab.bu.edu/iEDGE. In summary, integrative analysis of SCNAs and gene expression using iEDGE successfully identified known cancer driver genes and putative cancer therapeutic targets across 19 cancer types in the TCGA. The proposed method can easily be applied to the integration of gene expression profiles with other epi-DNA assays in a variety of disease contexts.",2019-11-15 +33828766,"Eye movements during the reading of narrative and poetic texts. Symposium 6 at the 20th European Conference on Eye Movement Research (ECEM) in Alicante, 21.8.2019. ","Despite a wealth of studies using eye tracking to investigate mental processes during vision or reading, the investigation of oculomotor activity during natural reading of longer texts -be it newspaper articles, narratives or poetry- is still an exception in this field (as evidenced by the program of ECEM 2017 in Wuppertal). Following up on our symposium at ECEM 2017, here we bring together eye movement research on natural text reading to report recent progress in a coordinated way sharing data, experiences and software skills in this highly complex subfield. More specifically, in this symposium we will address several challenges faced by an eye tracking perspective on the reading of longer texts which involve a surplus of intervening variables and novel methods to analyze the data. In particular, the following issues will be addressed: - Which text-analytical and statistical methods are best to deal with the myriad of surface and affective semantic features potentially influencing eye movements during reading of 'natural' texts? - What are the pros and cons of using machine learning assisted predictive modeling as an alternative to the standard GLM/LMM frameworks? - Which kind of theoretical models can deal with the level of complexity offered by reading longer natural texts? Video stream: https://vimeo.com/358415199.",2019-11-25 +28334239,Comparison of the general co-expression landscapes between human and mouse.,"The murine model serves as an important experimental system in biomedical science because of its high degree of similarities at the sequence level with human. Recent studies have compared the transcriptional landscapes between human and mouse, but the general co-expression landscapes have not been characterized. Here, we calculated the general co-expression coefficients and constructed the general co-expression maps for human and mouse. The differences and similarities of the general co-expression maps between the two species were compared in detail. The results showed low similarities in the human and mouse, with only about 36.54% of the co-expression relationships conserved between the two species. These results indicate that researchers should pay attention to these differences when performing research using the expression data of human and mouse. To facilitate use of this information, we also developed the human-mouse general co-expression difference database (coexpressMAP) to search differences in co-expression between human and mouse. This database is freely available at http://www.bioapp.org/coexpressMAP.",2018-09-01 +32390972,Multi-Label Random Forest Model for Tuberculosis Drug Resistance Classification and Mutation Ranking.,"Resistance prediction and mutation ranking are important tasks in the analysis of Tuberculosis sequence data. Due to standard regimens for the use of first-line antibiotics, resistance co-occurrence, in which samples are resistant to multiple drugs, is common. Analysing all drugs simultaneously should therefore enable patterns reflecting resistance co-occurrence to be exploited for resistance prediction. Here, multi-label random forest (MLRF) models are compared with single-label random forest (SLRF) for both predicting phenotypic resistance from whole genome sequences and identifying important mutations for better prediction of four first-line drugs in a dataset of 13402 Mycobacterium tuberculosis isolates. Results confirmed that MLRFs can improve performance compared to conventional clinical methods (by 18.10%) and SLRFs (by 0.91%). In addition, we identified a list of candidate mutations that are important for resistance prediction or that are related to resistance co-occurrence. Moreover, we found that retraining our analysis to a subset of top-ranked mutations was sufficient to achieve satisfactory performance. The source code can be found at http://www.robots.ox.ac.uk/~davidc/code.php.",2020-04-22 +33173006,Subcellular Localization and Assembly Process of the Nisin Biosynthesis Machinery in Lactococcus lactis. ,"Nisin, a class I lantibiotic, is synthesized as a precursor peptide by a putative membrane-associated lanthionine synthetase complex consisting of the dehydratase NisB, the cyclase NisC, and the ABC transporter NisT. Here, we characterize the subcellular localization and the assembly process of the nisin biosynthesis machinery in Lactococcus lactis by mutational analyses and fluorescence microscopy. Precursor nisin, NisB, and NisC were found to be mainly localized at the cell poles, with a preference for the old poles. They were found to be colocalized at the same spots in these old pole regions, functioning as a nisin modification complex. In contrast, the transporter NisT was found to be distributed uniformly and circumferentially in the membrane. When nisin secretion was blocked by mutagenesis of NisT, the nisin biosynthesis machinery was also visualized directly at a polar position using fluorescence microscopy. The interactions between NisB and other components of the machinery were further studied in vivo, and therefore, the ""order of assembly"" of the complex was revealed, indicating that NisB directly or indirectly plays the role of a polar ""recruiter"" in the initial assembly process. Additionally, a potential domain that is located at the surface of the elimination domain of NisB was identified to be crucial for the polar localization of NisB. Based on these data, we propose a model wherein precursor nisin is first completely modified by the nisin biosynthesis machinery, preventing the premature secretion of partially modified peptides, and subsequently secreted by recruited NisT, preferentially at the old pole regions.IMPORTANCE Nisin is the model peptide for LanBC-modified lantibiotics that are commonly modified and exported by a putative synthetase complex. Although the mechanism of maturation, transport, immunity, and regulation is relatively well understood, and structural information is available for some of the proteins involved (B. Li, J. P. J. Yu, J. S. Brunzelle, G. N. Moll, et al., Science 311:1464-1467, 2006, https://doi.org/10.1126/science.1121422; M. A. Ortega, Y. Hao, Q. Zhang, M. C. Walker, et al., Nature 517:509-512, 2015, https://doi.org/10.1038/nature13888; C. Hacker, N. A. Christ, E. Duchardt-Ferner, S. Korn, et al., J Biol Chem 290:28869-28886, 2015, https://doi.org/10.1074/jbc.M115.679969; Y. Y. Xu, X. Li, R. Q. Li, S. S. Li, et al., Acta Crystallogr D Biol Crystallogr 70:1499-1505, 2014, https://doi.org/10.1107/S1399004714004234), the subcellular localization and assembly process of the biosynthesis complex remain to be elucidated. In this study, we determined the spatial distribution of nisin synthesis-related enzymes and the transporter, revealing that the modification and secretion of the precursor nisin mainly occur at the old cell poles of L. lactis and that the transporter NisT is probably recruited later to this spot after the completion of the modification reactions by NisB and NisC. Fluorescently labeled nisin biosynthesis machinery was visualized directly by fluorescence microscopy. To our knowledge, this is the first study to provide direct evidence of the existence of such a complex in vivo Importantly, the elucidation of the ""order of assembly"" of the complex will facilitate future endeavors in the investigation of the nisin secretion mechanism and even the isolation and structural characterization of the complete complex.",2020-11-10 +32298251,"Hospitalization Rates and Characteristics of Patients Hospitalized with Laboratory-Confirmed Coronavirus Disease 2019 - COVID-NET, 14 States, March 1-30, 2020.","Since SARS-CoV-2, the novel coronavirus that causes coronavirus disease 2019 (COVID-19), was first detected in December 2019 (1), approximately 1.3 million cases have been reported worldwide (2), including approximately 330,000 in the United States (3). To conduct population-based surveillance for laboratory-confirmed COVID-19-associated hospitalizations in the United States, the COVID-19-Associated Hospitalization Surveillance Network (COVID-NET) was created using the existing infrastructure of the Influenza Hospitalization Surveillance Network (FluSurv-NET) (4) and the Respiratory Syncytial Virus Hospitalization Surveillance Network (RSV-NET). This report presents age-stratified COVID-19-associated hospitalization rates for patients admitted during March 1-28, 2020, and clinical data on patients admitted during March 1-30, 2020, the first month of U.S. surveillance. Among 1,482 patients hospitalized with COVID-19, 74.5% were aged ≥50 years, and 54.4% were male. The hospitalization rate among patients identified through COVID-NET during this 4-week period was 4.6 per 100,000 population. Rates were highest (13.8) among adults aged ≥65 years. Among 178 (12%) adult patients with data on underlying conditions as of March 30, 2020, 89.3% had one or more underlying conditions; the most common were hypertension (49.7%), obesity (48.3%), chronic lung disease (34.6%), diabetes mellitus (28.3%), and cardiovascular disease (27.8%). These findings suggest that older adults have elevated rates of COVID-19-associated hospitalization and the majority of persons hospitalized with COVID-19 have underlying medical conditions. These findings underscore the importance of preventive measures (e.g., social distancing, respiratory hygiene, and wearing face coverings in public settings where social distancing measures are difficult to maintain) to protect older adults and persons with underlying medical conditions, as well as the general public. In addition, older adults and persons with serious underlying medical conditions should avoid contact with persons who are ill and immediately contact their health care provider(s) if they have symptoms consistent with COVID-19 (https://www.cdc.gov/coronavirus/2019-ncov/symptoms-testing/symptoms.html) (5). Ongoing monitoring of hospitalization rates, clinical characteristics, and outcomes of hospitalized patients will be important to better understand the evolving epidemiology of COVID-19 in the United States and the clinical spectrum of disease, and to help guide planning and prioritization of health care system resources.",2020-04-17 +32955354,Exposure to Manganese in Drinking Water during Childhood and Association with Attention-Deficit Hyperactivity Disorder: A Nationwide Cohort Study.,"

Background

Manganese (Mn) in drinking water may increase the risk of several neurodevelopmental outcomes, including attention-deficit hyperactivity disorder (ADHD). Earlier epidemiological studies on associations between Mn exposure and ADHD-related outcomes had small sample sizes, lacked spatiotemporal exposure assessment, and relied on questionnaire data (not diagnoses)-shortcomings that we address here.

Objective

Our objective was to assess the association between exposure to Mn in drinking water during childhood and later development of ADHD.

Methods

In a nationwide population-based registry study in Denmark, we followed a cohort of 643,401 children born 1992-2007 for clinical diagnoses of ADHD. In subanalyses, we classified cases into ADHD-Inattentive and ADHD-Combined subtypes based on hierarchical categorization of International Classification of Diseases (ICD)-10 codes. We obtained Mn measurements from 82,574 drinking water samples to estimate longitudinal exposure during the first 5 y of life with high spatiotemporal resolution. We modeled exposure as both peak concentration and time-weighted average. We estimated sex-specific hazard ratios (HRs) in Cox proportional hazards models adjusted for age, birth year, socioeconomic status (SES), and urbanicity.

Results

We found that exposure to increasing levels of Mn in drinking water was associated with an increased risk of ADHD-Inattentive subtype, but not ADHD-Combined subtype. After adjusting for age, birth year, and SES, females exposed to high levels of Mn (i.e., >100μg/L) at least once during their first 5 y of life had an HR for ADHD-Inattentive subtype of 1.51 [95% confidence interval (CI): 1.18, 1.93] and males of 1.20 (95% CI: 1.01, 1.42) when compared with same-sex individuals exposed to <5μg/L. When modeling exposure as a time-weighted average, sex differences were no longer present.

Discussion

Mn in drinking water was associated with ADHD, specifically the ADHD-Inattentive subtype. Our results support earlier studies suggesting a need for a formal health-based drinking water guideline value for Mn. Future Mn-studies should examine ADHD subtype-specific associations and utilize direct subtype measurements rather than relying on ICD-10 codes alone. https://doi.org/10.1289/EHP6391.",2020-09-21 +32663617,Population Genetics of SARS-CoV-2: Disentangling Effects of Sampling Bias and Infection Clusters.,"A novel RNA virus, the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2), is responsible for the ongoing outbreak of coronavirus disease 2019 (COVID-19). Population genetic analysis could be useful for investigating the origin and evolutionary dynamics of COVID-19. However, due to extensive sampling bias and existence of infection clusters during the epidemic spread, direct applications of existing approaches can lead to biased parameter estimations and data misinterpretation. In this study, we first present robust estimator for the time to the most recent common ancestor (TMRCA) and the mutation rate, and then apply the approach to analyze 12,909 genomic sequences of SARS-CoV-2. The mutation rate is inferred to be 8.69 × 10-4 per site per year with a 95% confidence interval (CI) of [8.61 × 10-4, 8.77 × 10-4], and the TMRCA of the samples inferred to be Nov 28, 2019 with a 95% CI of [Oct 20, 2019, Dec 9, 2019]. The results indicate that COVID-19 might originate earlier than and outside of Wuhan Seafood Market. We further demonstrate that genetic polymorphism patterns, including the enrichment of specific haplotypes and the temporal allele frequency trajectories generated from infection clusters, are similar to those caused by evolutionary forces such as natural selection. Our results show that population genetic methods need to be developed to efficiently detangle the effects of sampling bias and infection clusters to gain insights into the evolutionary mechanism of SARS-CoV-2. Software for implementing VirusMuT can be downloaded at https://bigd.big.ac.cn/biocode/tools/BT007081.",2020-07-12 +31406615,Internet of Things Applied in Healthcare Based on Open Hardware with Low-Energy Consumption.,"

Objectives

The Internet of Things (IoT) and its applications are growing simultaneously. These applications need new intelligent devices along heterogeneous networking. Which makes them costly to implement indeed. Platforms and open devices designed for open-source hardware are possible solutions. This research was conducted under an IoT design, implementation, and assessment model for the remote monitoring of pulse oximetry via oxygen partial saturation (SpO2) and heart rate (HR) with low-energy consumption.

Methods

This study focused on the development of SpO2 and HR measurements that will allow the monitoring and estimation in real time of the user's state and health related to the established parameters. Measurements were acquired and recorded using a remote web server that recorded the acquired variables for further processing. The statistical analysis data allows comparison of the registered data measured with theoretical models.

Results

The IoT model was developed use Bluetooth low-energy devices, which comply with low-cost and open-hardware solutions operated via 'HTTP requests' for data transmission and reception from a cloud server to an edge device. Network performance assessment was conducted to guarantee the availability and integrity of the acquired values and signals. The system measured SpO2 and HR variables. The most significant result was to achieve energy consumption 20% lower than that of devices in the market.

Conclusions

In summary, the acquired data validation based on the IoT model had a transmission error of 0.001% which proves its applicability in healthcare.",2019-07-31 +30418480,BayMAP: a Bayesian hierarchical model for the analysis of PAR-CLIP data.,"

Motivation

Photoactivatable-Ribonucleoside-Enhanced Crosslinking and Immunoprecipitation (PAR-CLIP) is a biochemical method for detecting interaction sites of proteins with mRNA. This method introduces T-to-C substitutions at sequenced cDNA that help to detect binding sites on mRNA. However, T-to-C substitutions can also occur due to other reasons such as mismatches or SNPs. Only few statistical procedures exist for detecting binding sites in PAR-CLIP data. Most of these methods do not account for other types of substitutions than those induced by PAR-CLIP, and therefore, also report positions with high T-to-C substitution rates, e.g. SNPs, as binding sites. Moreover, none of these procedures allow to include additional information, e.g. the type of mRNA region, relevant for the biology of microRNA-binding sites.

Results

We have developed BayMAP, a procedure based on a fully Bayesian hierarchical model that takes other sources of substitutions into account. Furthermore, this model enables the incorporation of additional information into the analysis of PAR-CLIP data. This incorporation does not only permit a better detection of binding sites, but also a better understanding of the data and the biology of binding sites. In applications to simulated PAR-CLIP data, BayMAP distinguishes binding sites from noise better than existing methods. Additionally, it yields good estimates of the influence of the additional information. We here demonstrate BayMAP's usability for real datasets even when noisy data is present.

Availability and implementation

BayMAP is freely available as an R package at http://stat.math.uni-duesseldorf.de/baymap.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +27907895,mutLBSgeneDB: mutated ligand binding site gene DataBase.,"Mutations at the ligand binding sites (LBSs) can influence protein structure stability, binding affinity with small molecules, and drug resistance in cancer patients. Our recent analysis revealed that ligand binding residues had a significantly higher mutation rate than other parts of the protein. Here, we built mutLBSgeneDB (mutated Ligand Binding Site gene DataBase) available at http://zhaobioinfo.org/mutLBSgeneDB We collected and curated over 2300 genes (mutLBSgenes) having ∼12 000 somatic mutations at ∼10 000 LBSs across 16 cancer types and selected 744 drug targetable genes (targetable_mutLBSgenes) by incorporating kinases, transcription factors, pharmacological genes, and cancer driver genes. We analyzed LBS mutation information, differential gene expression network, drug response correlation with gene expression, and protein stability changes for all mutLBSgenes using integrated genetic, genomic, transcriptomic, proteomic, network and functional information. We calculated and compared the binding affinities of 20 carefully selected genes with their drugs in wild type and mutant forms. mutLBSgeneDB provides a user-friendly web interface for searching and browsing through seven categories of annotations: Gene summary, Mutated information, Protein structure related information, Differential gene expression and gene-gene network, Phenotype information, Pharmacological information, and Conservation information. mutLBSgeneDB provides a useful resource for functional genomics, protein structure, drug and disease research communities.",2016-10-07 +29069336,"ArachnoServer 3.0: an online resource for automated discovery, analysis and annotation of spider toxins.","

Summary

ArachnoServer is a manually curated database that consolidates information on the sequence, structure, function and pharmacology of spider-venom toxins. Although spider venoms are complex chemical arsenals, the primary constituents are small disulfide-bridged peptides that target neuronal ion channels and receptors. Due to their high potency and selectivity, these peptides have been developed as pharmacological tools, bioinsecticides and drug leads. A new version of ArachnoServer (v3.0) has been developed that includes a bioinformatics pipeline for automated detection and analysis of peptide toxin transcripts in assembled venom-gland transcriptomes. ArachnoServer v3.0 was updated with the latest sequence, structure and functional data, the search-by-mass feature has been enhanced, and toxin cards provide additional information about each mature toxin.

Availability and implementation

http://arachnoserver.org.

Contact

support@arachnoserver.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +32806550,Benchmark-Based Reference Model for Evaluating Botnet Detection Tools Driven by Traffic-Flow Analytics. ,"Botnets are some of the most recurrent cyber-threats, which take advantage of the wide heterogeneity of endpoint devices at the Edge of the emerging communication environments for enabling the malicious enforcement of fraud and other adversarial tactics, including malware, data leaks or denial of service. There have been significant research advances in the development of accurate botnet detection methods underpinned on supervised analysis but assessing the accuracy and performance of such detection methods requires a clear evaluation model in the pursuit of enforcing proper defensive strategies. In order to contribute to the mitigation of botnets, this paper introduces a novel evaluation scheme grounded on supervised machine learning algorithms that enable the detection and discrimination of different botnets families on real operational environments. The proposal relies on observing, understanding and inferring the behavior of each botnet family based on network indicators measured at flow-level. The assumed evaluation methodology contemplates six phases that allow building a detection model against botnet-related malware distributed through the network, for which five supervised classifiers were instantiated were instantiated for further comparisons-Decision Tree, Random Forest, Naive Bayes Gaussian, Support Vector Machine and K-Neighbors. The experimental validation was performed on two public datasets of real botnet traffic-CIC-AWS-2018 and ISOT HTTP Botnet. Bearing the heterogeneity of the datasets, optimizing the analysis with the Grid Search algorithm led to improve the classification results of the instantiated algorithms. An exhaustive evaluation was carried out demonstrating the adequateness of our proposal which prompted that Random Forest and Decision Tree models are the most suitable for detecting different botnet specimens among the chosen algorithms. They exhibited higher precision rates whilst analyzing a large number of samples with less processing time. The variety of testing scenarios were deeply assessed and reported to set baseline results for future benchmark analysis targeted on flow-based behavioral patterns.",2020-08-12 +32663767,SnapShot: TP53 status and macrophages infiltration in TCGA-analyzed tumors.,"The infiltration of immune cells is a hallmark of most forms of malignancy. It is well known that in Tumor Microenvironment (TME), monocytes undergo reprogramming process to differentiate into Tumor Associated Macrophages (TAMs) (M2 macrophages). Interestingly, this reprogramming process depends on signals provided by tumors. Hence, tumors from several tissues are infiltrated by functionally distinct TAMs populations. Tumor Protein p53(TP53) plays a role in the regulation or progression of DNA damage and repair through multiple mechanisms of the cell cycle, apoptosis, and genomic stability. Although, TP53 acts as a physiological break for M2 macrophages polarization; the potential regulatory function of TP53 in the infiltration of macrophages is still unknown. We used the Cancer Genomic Atlas (TCGA) clinical data from 10,009 samples across 30 types of cancer via the Tumor IMmune Estimation Tool (TIMER) (https://cistrome.shinyapps.io/timer/) to investigate whether TP53 status has an important clinical outcome on macrophages infiltration in different cancer types. Our analysis of TCGA showed that Ovarian Serous Cystadenocarcinoma (OV) patients with mutant TP53 had significantly higher macrophages infiltration than those with wild-type TP53 (P-value < 0.05) and poor prognosis associated. In contrast, Stomach Adenocarcinoma (STAD) patients with wild-type TP53 had considerably higher macrophages infiltration than those with mutant TP53 (P-value < 0.01) and poor clinical outcomes. Herein, our study sheds light on the novel clinical role of TP53 in macrophages infiltration in TME of OV and STAD patients. Furthermore, the modulation of TP53 and its co-regulators may serve as promising targets for OV and STAD patients.",2020-07-11 +29967752,A reference cytochrome c oxidase subunit I database curated for hierarchical classification of arthropod metabarcoding data.,"Metabarcoding is a popular application which warrants continued methods optimization. To maximize barcoding inferences, hierarchy-based sequence classification methods are increasingly common. We present methods for the construction and curation of a database designed for hierarchical classification of a 157 bp barcoding region of the arthropod cytochrome c oxidase subunit I (COI) locus. We produced a comprehensive arthropod COI amplicon dataset including annotated arthropod COI sequences and COI sequences extracted from arthropod whole mitochondrion genomes, the latter of which provided the only source of representation for Zoraptera, Callipodida and Holothyrida. The database contains extracted sequences of the target amplicon from all major arthropod clades, including all insect orders, all arthropod classes and Onychophora, Tardigrada and Mollusca outgroups. During curation, we extracted the COI region of interest from approximately 81 percent of the input sequences, corresponding to 73 percent of the genus-level diversity found in the input data. Further, our analysis revealed a high degree of sequence redundancy within the NCBI nucleotide database, with a mean of approximately 11 sequence entries per species in the input data. The curated, low-redundancy database is included in the Metaxa2 sequence classification software (http://microbiology.se/software/metaxa2/). Using this database with the Metaxa2 classifier, we performed a cross-validation analysis to characterize the relationship between the Metaxa2 reliability score, an estimate of classification confidence, and classification error probability. We used this analysis to select a reliability score threshold which minimized error. We then estimated classification sensitivity, false discovery rate and overclassification, the propensity to classify sequences from taxa not represented in the reference database. Our work will help researchers design and evaluate classification databases and conduct metabarcoding on arthropods and alternate taxa.",2018-06-26 +27306108,GPS-Lipid: a robust tool for the prediction of multiple lipid modification sites.,"As one of the most common post-translational modifications in eukaryotic cells, lipid modification is an important mechanism for the regulation of variety aspects of protein function. Over the last decades, three classes of lipid modifications have been increasingly studied. The co-regulation of these different lipid modifications is beginning to be noticed. However, due to the lack of integrated bioinformatics resources, the studies of co-regulatory mechanisms are still very limited. In this work, we developed a tool called GPS-Lipid for the prediction of four classes of lipid modifications by integrating the Particle Swarm Optimization with an aging leader and challengers (ALC-PSO) algorithm. GPS-Lipid was proven to be evidently superior to other similar tools. To facilitate the research of lipid modification, we hosted a publicly available web server at http://lipid.biocuckoo.org with not only the implementation of GPS-Lipid, but also an integrative database and visualization tool. We performed a systematic analysis of the co-regulatory mechanism between different lipid modifications with GPS-Lipid. The results demonstrated that the proximal dual-lipid modifications among palmitoylation, myristoylation and prenylation are key mechanism for regulating various protein functions. In conclusion, GPS-lipid is expected to serve as useful resource for the research on lipid modifications, especially on their co-regulation.",2016-06-16 +29494899,Ancestry and different rates of suicide and homicide in European countries: A study with population-level data.,"

Introduction

There are large differences in suicide rates across Europe. The current study investigated the relationship of suicide and homicide rates in different countries of Europe with ancestry as it is defined with the haplotype frequencies of Y-DNA and mtDNA.

Material and methods

The mortality data were retrieved from the WHO online database. The genetic data were retrieved from http://www.eupedia.com. The statistical analysis included Forward Stepwise Multiple Linear Regression analysis and Pearson Correlation Coefficient (R).

Results

In males, N and R1a Y-DNA haplotypes were positively related to both homicidal and suicidal behaviors while I1 was negatively related. The Q was positively related to the homicidal rate. Overall, 60-75% of the observed variance was explained. L, J and X mtDNA haplogroups were negatively related with suicide in females alone, with 82-85% of the observed variance described.

Discussion

The current study should not be considered as a study of genetic markers but rather a study of human ancestry. Its results could mean that research on suicidality has a strong biological but locally restricted component and could be limited by the study population; generalizability of the results at an international level might not be possible. Further research with patient-level data are needed to verify whether these haplotypes could serve as biological markers to identify persons at risk to commit suicide or homicide and whether biologically-determined ancestry could serve as an intermediate grouping method or even as an endophenotype in suicide research.",2018-02-17 +31214689,A sequential algorithm to detect diffusion switching along intracellular particle trajectories.,"

Motivation

Recent advances in molecular biology and fluorescence microscopy imaging have made possible the inference of the dynamics of single molecules in living cells. Changes of dynamics can occur along a trajectory. Then, an issue is to estimate the temporal change-points that is the times at which a change of dynamics occurs. The number of points in the trajectory required to detect such changes will depend on both the magnitude and type of the motion changes. Here, the number of points per trajectory is of the order of 102, even if in practice dramatic motion changes can be detected with less points.

Results

We propose a non-parametric procedure based on test statistics computed on local windows along the trajectory to detect the change-points. This algorithm controls the number of false change-point detections in the case where the trajectory is fully Brownian. We also develop a strategy for aggregating the detections obtained with different window sizes so that the window size is no longer a parameter to optimize. A Monte Carlo study is proposed to demonstrate the performances of the method and also to compare the procedure to two competitive algorithms. At the end, we illustrate the efficacy of the method on real data in 2D and 3D, depicting the motion of mRNA complexes-called mRNA-binding proteins-in neuronal dendrites, Galectin-3 endocytosis and trafficking within the cell.

Availability and implementation

A user-friendly Matlab package containing examples and the code of the simulations used in the paper is available at http://serpico.rennes.inria.fr/doku.php? id=software:cpanalysis:index.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +29301376,"SistematX, an Online Web-Based Cheminformatics Tool for Data Management of Secondary Metabolites. ","The traditional work of a natural products researcher consists in large part of time-consuming experimental work, collecting biota to prepare and analyze extracts and to identify innovative metabolites. However, along this long scientific path, much information is lost or restricted to a specific niche. The large amounts of data already produced and the science of metabolomics reveal new questions: Are these compounds known or new? How fast can this information be obtained? To answer these and other relevant questions, an appropriate procedure to correctly store information on the data retrieved from the discovered metabolites is necessary. The SistematX (http://sistematx.ufpb.br) interface is implemented considering the following aspects: (a) the ability to search by structure, SMILES (Simplified Molecular-Input Line-Entry System) code, compound name and species; (b) the ability to save chemical structures found by searching; (c) compound data results include important characteristics for natural products chemistry; and (d) the user can find specific information for taxonomic rank (from family to species) of the plant from which the compound was isolated, the searched-for molecule, and the bibliographic reference and Global Positioning System (GPS) coordinates. The SistematX homepage allows the user to log into the data management area using a login name and password and gain access to administration pages. In this article, we introduced a modern and innovative web interface for the management of a secondary metabolite database. With its multiplatform design, it is able to be properly consulted via the internet and managed from any accredited computer. The interface provided by SistematX contains a wealth of useful information for the scientific community about natural products, highlighting the locations of species from which compounds are isolated.",2018-01-03 +24214988,SAbDab: the structural antibody database.,"Structural antibody database (SAbDab; http://opig.stats.ox.ac.uk/webapps/sabdab) is an online resource containing all the publicly available antibody structures annotated and presented in a consistent fashion. The data are annotated with several properties including experimental information, gene details, correct heavy and light chain pairings, antigen details and, where available, antibody-antigen binding affinity. The user can select structures, according to these attributes as well as structural properties such as complementarity determining region loop conformation and variable domain orientation. Individual structures, datasets and the complete database can be downloaded.",2013-11-08 +30584170,MDSGene: Closing Data Gaps in Genotype-Phenotype Correlations of Monogenic Parkinson's Disease.,"Given the rapidly increasing number of reported movement disorder genes and clinical-genetic desciptions of mutation carriers, the International Parkinson's Disease and Movement Disorder Society Gene Database (MDSGene) initiative has been launched in 2016 and grown to become a large international project (http://www.mdsgene.org). MDSGene currently contains >1150 variants described in ∼5700 movement disorder patients in almost 1000 publications including monogenic forms of PD clinically resembling idiopathic (PARK-PINK1, PARK-Parkin, PARK-DJ-1, PARK-SNCA, PARK-VPS35, PARK-LRRK2), as well as of atypical PD (PARK-SYNJ1, PARK-DNAJC6, PARK-ATP13A2, PARK-FBXO7). Inclusion of genes is based on standardized published criteria for determining causation. Clinical and genetic information can be filtered according to demographic, clinical or genetic criteria and summary statistics are automatically generated by the MDSGene online tool. Despite MDSGene's novel approach and features, it also faces several challenges: i) The criteria for designating genes as causative will require further refinement, as well as time and support to replace the faulty list of 'PARKs'. ii) MDSGene has uncovered extensive clinical data gaps. iii) The quickly growing body of clinical and genetic data require a large number of experts worldwide posing logistic challenges. iv) MDSGene currently captures published data only, i.e., a small fraction of the available information on monogenic PD available. Thus, an important future aim is to extend MDSGene to unpublished cases in order to provide the broad data base to the PD community that is necessary to comprehensively inform genetic counseling, therapeutic approaches and clinical trials, as well as basic and clinical research studies in monogenic PD.",2018-01-01 +30147056,Data-Driven Assessment of Potentially Inappropriate Medication in the Elderly.,"Multimorbid patients taking polypharmacy represent a growing population at high risk for inappropriate prescribing. Various lists for identifying potentially inappropriate medication are spread across scientific journals and difficult to access. To address this ongoing need, a new database named PIMBase is developed which integrates these well-known lists and unifies their rating scales. The analysis of the pharmacovigilance data reveals the benefits of combining the lists. PIMBase is meant to be a web-based system and starting point for the data-driven assessment of polypharmacy to identify inappropriate medication and to improve the quality of prescribing. PIMBase is available at https://pimbase.kalis-amts.de.",2018-01-01 +32577936,GIS-augmented survey of poultry farms with respiratory problems in Haryana.,"Among various infectious diseases of poultry, diseases of the respiratory tract are responsible for considerable economic losses. The present study was conducted to evaluate some of the risk factors, which included locations of farm, ventilation facilities, number of farms in 1 km2 area, agro-climatic zone, and age of flock in relation to respiratory problem in Haryana, India. One hundred poultry flocks with respiratory problems were identified and selected for conducting the survey. The ""ODK Collect"" app installed on a smartphone was used to capture coordinates of the farms. The collected data was accessed through http://odkproject-iirs.appspot.com/ . The location of farms was mapped with the help of QGIS. All the three parameters, viz., morbidity, mortality (p < 0.001), and case fatality rate (CFR) (p = 0.045), were significantly higher in birds of age 0-2 weeks. Natural ventilation was the most common facility observed in the present study (51/100). Maximum morbidity and mortality were observed in small flocks (< 10,000), whereas maximum CFR was observed in medium-sized flocks (> 10,000-30,000), and there was a significant difference in morbidity, mortality, and CFR. Further, there was a significant difference between agro-climatic zones with respect to morbidity and mortality (p < 0.001). It can be concluded that age, flock size, and agro-climatic conditions have an impact on intensity of diseases especially respiratory diseases. Therefore, special precautions should be taken for young flock. Flock size should be adequate, and such management practices should be adopted that are suitable for particular climatic conditions.",2020-06-23 +24122086,Brief communication: cranial nonmetric trait database on the internet.,"This brief note announces the publication of a nonmetric cranial trait database as a freely available resource on the Internet at: http://library.queensu.ca/webdoc/ssdc/cntd. The files were constructed in the program Excel, and are available also in comma-delimited format. These one-observer data on 38 traits were recorded in 1963-2003 in skeletal collections curated at many museums. The 8,016 crania represent individuals from a broad geographic and temporal range of recent human populations, with regions best represented being the Arctic and northwestern North America.",2013-10-04 +31297422,Example dataset for the hMRI toolbox.,"The hMRI toolbox is an open-source toolbox for the calculation of quantitative MRI parameter maps from a series of weighted imaging data, and optionally additional calibration data. The multi-parameter mapping (MPM) protocol, incorporating calibration data to correct for spatial variation in the scanner's transmit and receive fields, is the most complete protocol that can be handled by the toolbox. Here we present a dataset acquired with such a full MPM protocol, which is made freely available to be used as a tutorial by following instructions provided on the associated toolbox wiki pages, which can be found at http://hMRI.info, and following the theory described in: hMRI - A toolbox for quantitative MRI in neuroscience and clinical research [1].",2019-06-11 +30497361,miRTissue: a web application for the analysis of miRNA-target interactions in human tissues.,"

Background

microRNAs act as regulators of gene expression interacting with their gene targets. Current bioinformatics services, such as databases of validated miRNA-target interactions and prediction tools, usually provide interactions without any information about what tissue that interaction is more likely to appear nor information about the type of interactions, causing mRNA degradation or translation inhibition respectively.

Results

In this work, we introduce miRTissue, a web application that combines validated miRNA-target interactions with statistical correlation among expression profiles of miRNAs, genes and proteins in 15 different human tissues. Validated interactions are taken from the miRTarBase database, while expression profiles are downloaded from The Cancer Genome Atlas repository. As a result, the service provides a tissue-specific characterisation of each couple of miRNA and gene together with its statistical significance (p-value). The inclusion of protein data also allows providing the type of interaction. Moreover, miRTissue offers several views for analysing interactions, focusing for example on the comparison between different cancer types or different tissue conditions. All the results are freely downloadable in the most common formats.

Conclusions

miRTissue fills a gap concerning current bioinformatics services related to miRNA-target interactions because it provides a tissue-specific context to each validated interaction and the type of interaction itself. miRTissue is easily browsable allowing the user to select miRNAs, genes, cancer types and tissue conditions. The results can be sorted according to p-values to immediately identify those interactions that are more likely to occur in a given tissue. miRTissue is available at http://tblab.pa.icar.cnr.it/mirtissue.html.",2018-11-30 +27090005,Ring Catalog: A resource for designing self-assembling RNA nanostructures.,"Designing self-assembling RNA ring structures based on known 3D structural elements connected via linker helices is a challenging task due to the immense number of motif combinations, many of which do not lead to ring-closure. We describe an in silico solution to this design problem by combinatorial assembly of RNA 3-way junctions, bulges, and kissing loops, and tabulating the cases that lead to ring formation. The solutions found are made available in the form of a web-accessible Ring Catalog. As an example of a potential use of this resource, we chose a predicted RNA square structure consisting of five RNA strands and demonstrate experimentally that the self-assembly of those five strands leads to the formation of a square-like complex. This is a demonstration of a novel ""design by catalog"" approach to RNA nano-structure generation. The URL https://rnajunction.ncifcrf.gov/ringdb can be used to access the resource.",2016-04-26 +29761460,A Primer for the Rat Genome Database (RGD).,"The laboratory rat, Rattus norvegicus, is an important model of human health and disease, and experimental findings in the rat have relevance to human physiology and disease. The Rat Genome Database (RGD, http://rgd.mcw.edu ) is a model organism database that provides access to a wide variety of curated rat data including disease associations, phenotypes, pathways, molecular functions, biological processes and cellular components for genes, quantitative trait loci, and strains. We present an overview of the database followed by specific examples that can be used to gain experience in employing RGD to explore the wealth of functional data available for the rat.",2018-01-01 +36406011,Images from Science 3: A True Celebration of Contemporary and Extraordinary Images of Science.,"On October 12, 2002, the first Images from Science (IFS) exhibition opened in the William Harris Gallery at Rochester Institute of Technology (RIT). Professor Michael Peres and Professor Emeritus Andrew Davidhazy created the project with the intent of promoting a wider appreciation of scientific photography by showcasing beautiful, data-rich - but rarely seen - images drawn from oceanography, geology, biology, engineering, medicine, and physics in the traveling exhibition. The organizers of IFS 3 hoped to identify 75 examples of images that revealed science in new and unique ways. Similar to past IFS projects, they used the internet as the primary voice for promotion. Different than IFS 1 and 2, this exhibition features moving images, animations, and medical illustrations, as well as photographs. An international panel of seven experts from around the world selected 81 images. Creating an international exhibition on a tight budget created some unique challenges. The success of the exhibition required constant innovation and problem solving. Organizers: Michael Peres, Norm Barker, Ted Kinsman, Bob Rose, and Chris Jackson Selected cover images: Structure of the Renal Corpuscle, 2018 © Joe Samson, All Rights Reserved Dasyatis sabina, Atlantic Stingray, 2018 © Michael Chaise Gilbert, All Rights Reserved The full exhibit can be seen on line: https://images.cad.rit.edu/gallery2019.html.",2020-06-03 +28771471,The Genomic Observatories Metadatabase (GeOMe): A new repository for field and sampling event metadata associated with genetic samples.,"The Genomic Observatories Metadatabase (GeOMe, http://www.geome-db.org/) is an open access repository for geographic and ecological metadata associated with biosamples and genetic data. Whereas public databases have served as vital repositories for nucleotide sequences, they do not accession all the metadata required for ecological or evolutionary analyses. GeOMe fills this need, providing a user-friendly, web-based interface for both data contributors and data recipients. The interface allows data contributors to create a customized yet standard-compliant spreadsheet that captures the temporal and geospatial context of each biosample. These metadata are then validated and permanently linked to archived genetic data stored in the National Center for Biotechnology Information's (NCBI's) Sequence Read Archive (SRA) via unique persistent identifiers. By linking ecologically and evolutionarily relevant metadata with publicly archived sequence data in a structured manner, GeOMe sets a gold standard for data management in biodiversity science.",2017-08-03 +28245811,RNA-protein binding motifs mining with a new hybrid deep learning based cross-domain knowledge integration approach.,"

Background

RNAs play key roles in cells through the interactions with proteins known as the RNA-binding proteins (RBP) and their binding motifs enable crucial understanding of the post-transcriptional regulation of RNAs. How the RBPs correctly recognize the target RNAs and why they bind specific positions is still far from clear. Machine learning-based algorithms are widely acknowledged to be capable of speeding up this process. Although many automatic tools have been developed to predict the RNA-protein binding sites from the rapidly growing multi-resource data, e.g. sequence, structure, their domain specific features and formats have posed significant computational challenges. One of current difficulties is that the cross-source shared common knowledge is at a higher abstraction level beyond the observed data, resulting in a low efficiency of direct integration of observed data across domains. The other difficulty is how to interpret the prediction results. Existing approaches tend to terminate after outputting the potential discrete binding sites on the sequences, but how to assemble them into the meaningful binding motifs is a topic worth of further investigation.

Results

In viewing of these challenges, we propose a deep learning-based framework (iDeep) by using a novel hybrid convolutional neural network and deep belief network to predict the RBP interaction sites and motifs on RNAs. This new protocol is featured by transforming the original observed data into a high-level abstraction feature space using multiple layers of learning blocks, where the shared representations across different domains are integrated. To validate our iDeep method, we performed experiments on 31 large-scale CLIP-seq datasets, and our results show that by integrating multiple sources of data, the average AUC can be improved by 8% compared to the best single-source-based predictor; and through cross-domain knowledge integration at an abstraction level, it outperforms the state-of-the-art predictors by 6%. Besides the overall enhanced prediction performance, the convolutional neural network module embedded in iDeep is also able to automatically capture the interpretable binding motifs for RBPs. Large-scale experiments demonstrate that these mined binding motifs agree well with the experimentally verified results, suggesting iDeep is a promising approach in the real-world applications.

Conclusion

The iDeep framework not only can achieve promising performance than the state-of-the-art predictors, but also easily capture interpretable binding motifs. iDeep is available at http://www.csbio.sjtu.edu.cn/bioinf/iDeep.",2017-02-28 +31837394,"Litter effects: Comments on Golub and Sobin's ""Statistical modeling of litter as a random effect in mixed models to manage ""intralitter likeness"""".","The importance of litter effects (clustering of variance among offspring in rodents) has been known for decades. The standard approach was to treat the entire litter as a unit or to select one male and one female from each litter to prevent oversampling. These methods work but are imperfect. Treating the litter as a whole fails to use valuable interindividual differences among offspring, and selecting representative pups fails to use all the data available. Golub and Sobin [https://doi.org/10.1016/j.ntt.2019.106841] address this using a better method. They show that using litter as a random factor in mixed linear models resolves this conundrum. As they demonstrate, such models control for litter clustering by partitioning litter variance from error variance. This reduces error variance and increases the power of F-tests of the independent variable(s). In our experience, this is the optimal solution. But as good as mixed linear models are when used with litter as a random factor, if other aspects of the experimental design are not appropriate, this cannot compensate for threats to validity from small sample sizes, dams not strictly randomly assigned to groups, repeated measure covariance structures not appropriately modeled, interactions not properly sliced, or a posteriori group comparisons not controlled for multiple comparisons. Appropriate handling of litter is only one consideration of experimental design and statistical analysis that when used in combination lead to valid, reproducible data.",2019-12-11 +32462065,KomNET: Face Image Dataset from Various Media for Face Recognition.,"KomNet is a face image dataset originated from three media sources which can be used to recognize faces. KomNET contains face images which were collected from three different media sources, i.e. mobile phone camera, digital camera, and media social. The collected face dataset was frontal face image or facing the camera. The face dataset originated from the three media were collected without certain conditions such as lighting, background, haircut, mustache and beard, head cover, glasses, and differences of expression. KomNet dataset were collected from 50 clusters in which each of them consisted of 24 face images. To increase the number of training data, the face images were propagated with augmentation image technique, in which ten augmentations were used such as Rotate, Flip, Gaussian Blur, Gamma Contrast, Sigmoid Contrast, Sharpen, Emboss, Histogram Equalization, Hue and Saturation, Average Blur so the face images became 240 face images per cluster. The author trained the dataset by using CNN-based transfer learning VGGface. KomNET dataset are freely available on https://data.mendeley.com/datasets/hsv83m5zbb/2.",2020-05-13 +23180778,"The Zebrafish Insertion Collection (ZInC): a web based, searchable collection of zebrafish mutations generated by DNA insertion.","ZInC (Zebrafish Insertional Collection, http://research.nhgri.nih.gov/ZInC/) is a web-searchable interface of insertional mutants in zebrafish. Over the last two decades, the zebrafish has become a popular model organism for studying vertebrate development as well as for modeling human diseases. To facilitate such studies, we are generating a genome-wide knockout resource that targets every zebrafish protein-coding gene. All mutant fish are freely available to the scientific community through the Zebrafish International Resource Center (ZIRC). To assist researchers in finding mutant and insertion information, we developed a comprehensive database with a web front-end, the ZInC. It can be queried using multiple types of input such as ZFIN (Zebrafish Information Network) IDs, UniGene accession numbers and gene symbols from zebrafish, human and mouse. In the future, ZInC may include data from other insertional mutation projects as well. ZInC cross-references all integration data with the ZFIN (http://zfin.org/).",2012-11-24 +28183351,A new resource for developing and strengthening large-scale community health worker programs.,"Large-scale community health worker programs are now growing in importance around the world in response to the resurgence of interest and growing evidence of the importance of community-based primary health care for improving the health of populations in resource-constrained, high-mortality settings. These programs, because of their scale and operational challenges, merit special consideration by the global health community, national policy-makers, and program implementers. A new online resource is now available to assist in that effort: Developing and Strengthening Community Health Worker Programs at Scale: A Reference Guide and Case Studies for Program Managers and Policymakers ( http://www.mchip.net/CHWReferenceGuide ). This CHW Reference Guide is the product of 27 different collaborators who, collectively, have a formidable breadth and depth of experience and knowledge about CHW programming around the world. It provides a thoughtful discussion about the many operational issues that large-scale CHW programs need to address as they undergo the process of development, expansion or strengthening. Detailed case studies of 12 national CHW programs are included in the Appendix-the most current and complete cases studies as a group that are currently available. Future articles in this journal will highlight many of the themes in the CHW Reference Guide and provide an update of recent advances and experiences. These articles will serve, we hope, to (1) increase awareness about the CHW Reference Guide and its usefulness and (2) connect a broader audience to the critical importance of strengthening large-scale CHW programs for the health benefits that they can bring to underserved populations around the world.",2017-01-12 +31665448,Assembly and annotation of the mitochondrial minicircle genome of a differentiation-competent strain of Trypanosoma brucei.,"Kinetoplastids are protists defined by one of the most complex mitochondrial genomes in nature, the kinetoplast. In the sleeping sickness parasite Trypanosoma brucei, the kinetoplast is a chain mail-like network of two types of interlocked DNA molecules: a few dozen ∼23-kb maxicircles (homologs of the mitochondrial genome of other eukaryotes) and thousands of ∼1-kb minicircles. Maxicircles encode components of respiratory chain complexes and the mitoribosome. Several maxicircle-encoded mRNAs undergo extensive post-transcriptional RNA editing via addition and deletion of uridines. The process is mediated by hundreds of species of minicircle-encoded guide RNAs (gRNAs), but the precise number of minicircle classes and gRNA genes was unknown. Here we present the first essentially complete assembly and annotation of the kinetoplast genome of T. brucei. We have identified 391 minicircles, encoding not only ∼930 predicted 'canonical' gRNA genes that cover nearly all known editing events (accessible via the web at http://hank.bio.ed.ac.uk), but also ∼370 'non-canonical' gRNA genes of unknown function. Small RNA transcriptome data confirmed expression of the majority of both categories of gRNAs. Finally, we have used our data set to refine definitions for minicircle structure and to explore dynamics of minicircle copy numbers.",2019-12-01 +31068770,Multimodal Integration of M/EEG and f/MRI Data in SPM12.,"We describe the steps involved in analysis of multi-modal, multi-subject human neuroimaging data using the SPM12 free and open source software (https://www.fil.ion.ucl.ac.uk/spm/) and a publically-available dataset organized according to the Brain Imaging Data Structure (BIDS) format (https://openneuro.org/datasets/ds000117/). The dataset contains electroencephalographic (EEG), magnetoencephalographic (MEG), and functional and structural magnetic resonance imaging (MRI) data from 16 subjects who undertook multiple runs of a simple task performed on a large number of famous, unfamiliar and scrambled faces. We demonstrate: (1) batching and scripting of preprocessing of multiple runs/subjects of combined MEG and EEG data, (2) creation of trial-averaged evoked responses, (3) source-reconstruction of the power (induced and evoked) across trials within a time-frequency window around the ""N/M170"" evoked component, using structural MRI for forward modeling and simultaneous inversion (fusion) of MEG and EEG data, (4) group-based optimisation of spatial priors during M/EEG source reconstruction using fMRI data on the same paradigm, and (5) statistical mapping across subjects of cortical source power increases for faces vs. scrambled faces.",2019-04-24 +26048563,CYCLoPs: A Comprehensive Database Constructed from Automated Analysis of Protein Abundance and Subcellular Localization Patterns in Saccharomyces cerevisiae.,"Changes in protein subcellular localization and abundance are central to biological regulation in eukaryotic cells. Quantitative measures of protein dynamics in vivo are therefore highly useful for elucidating specific regulatory pathways. Using a combinatorial approach of yeast synthetic genetic array technology, high-content screening, and machine learning classifiers, we developed an automated platform to characterize protein localization and abundance patterns from images of log phase cells from the open-reading frame-green fluorescent protein collection in the budding yeast, Saccharomyces cerevisiae. For each protein, we produced quantitative profiles of localization scores for 16 subcellular compartments at single-cell resolution to trace proteome-wide relocalization in conditions over time. We generated a collection of ∼300,000 micrographs, comprising more than 20 million cells and ∼9 billion quantitative measurements. The images depict the localization and abundance dynamics of more than 4000 proteins under two chemical treatments and in a selected mutant background. Here, we describe CYCLoPs (Collection of Yeast Cells Localization Patterns), a web database resource that provides a central platform for housing and analyzing our yeast proteome dynamics datasets at the single cell level. CYCLoPs version 1.0 is available at http://cyclops.ccbr.utoronto.ca. CYCLoPs will provide a valuable resource for the yeast and eukaryotic cell biology communities and will be updated as new experiments become available.",2015-04-15 +29992323,CeleryDB: a genomic database for celery. ,"Celery (Apium graveolens L.) is a plant belonging to the Apiaceae family, and a popular vegetable worldwide because of its abundant nutrients and various medical functions. Although extensive genetic and molecular biological studies have been conducted on celery, its genomic data remain unclear. Given the significance of celery and the growing demand for its genomic data, the whole genome of 'Q2-JN11' celery (a highly inbred line obtained by artificial selfing of 'Jinnan Shiqin') was sequenced using HiSeq 2000 sequencing technology. For the convenience of researchers to study celery, an online database of the whole-genome sequences of celery, CeleryDB, was constructed. The sequences of the whole genome, nucleotide sequences of the predicted genes and amino acid sequences of the predicted proteins are available online on CeleryDB. Home, BLAST, Genome Browser, Transcription Factor and Download interfaces composed of the organizational structure of CeleryDB. Users can search the celery genomic data by using two user-friendly query tools: basic local alignment search tool and Genome Browser. In the future, CeleryDB will be constantly updated to satisfy the needs of celery researchers worldwide.Database URL: http://apiaceae.njau.edu.cn/celerydb.",2018-01-01 +31634719,What is the role of meteorological variables on involuntary admission in psychiatric ward? An Italian cross-sectional study.,"Weather affects physical and mental health through several modalities which are not fully elucidated. The aim of the present study was to investigate the impact of meteorological variables and other indexes in a large sample of hospitalized patients, focusing on subjects who were involuntarily admitted. We hypothesized a direct relation between the amount of involuntary admissions and mean sunshine hours. Furthermore, we supposed that specific meteorological factors may significantly influence hospitalizations of patients affected by severe psychiatric conditions. All subjects were consecutively recruited from the Psychiatric Inpatient Unit of San Luigi Gonzaga Hospital, Orbassano (Turin, Italy) from September 2013 to August 2015. Socio-demographic and clinical characteristics were carefully collected. Meteorological data were derived by the Italian Meteorology's Climate Data Service of Physics Department of the University of Turin (Latitude: 45°03'07,15″ Nord, Longitude: 007°40'53,30″ Est, Altitude: 254 m above the sea level) (http://www.meteo.dfg.unito.it/). Our data indicate significant differences regarding temperature (minimum, maximum, and medium), solar radiation, humidex and windchill index, and hours of sunshine in psychiatric patients who were involuntarily hospitalized. After logistic regression analyses, only maximum and medium temperature, and humidex index remained significantly associated with involuntary admission in an emergency psychiatric ward. The limitations of this study include the cross-sectional study design and the single hospital for patients' recruitment. Furthermore, results and seasonal patterns obtained by patients requiring hospitalization might significantly differ from those who were not hospitalized. Exploring in a more detailed manner those environmental factors associated with involuntary admissions could lead to early intervention and prevention strategies for such distressing hospitalizations.",2019-10-10 +31837751,VetCOT: The Veterinary Trauma Registry.,"The goals of the Veterinary Committee on Trauma (VetCOT) trauma registry are to (1) inform improvement of veterinary and human trauma patient care and (2) design clinical and preclinical trials that could inform go/no go decisions for interventional strategies and tools. The VetCOT registry was established in 2013, and includes all trauma cases that present to Veterinary Trauma Centers. Veterinary Trauma Centers are well-resourced veterinary hospitals that are initially identified, then subsequently verified, by the American College of Veterinary and Emergency Critical Care VetCOT (http://vetcot.org/index.php/home/identification-and-verification-process/). As of June 2019, there are > 40,000 dog and cat cases in the registry, 3 publications and 9 ongoing projects utilizing data from the registry. Application materials to utilize VetCOT registry data is available on the VetCOT website (http://vetcot.org/index.php/home/registry-use-materials/).",2019-09-25 +31461537,"Dose point kernels for 2,174 radionuclides.","

Purpose

Rapid adoption of targeted radionuclide therapy as an oncologic intervention has motivated the development of patient-specific voxel-wise approaches to radiation dosimetry. These approaches often rely on pretabulated dose point kernels for convolution-based calculations; however, these dose kernels are sparse in literature and often have suboptimal characteristics. The purpose of this work was to generate an extensive library of dose point kernels with sufficient size and resolution for general clinical application of voxel-wise dosimetry.

Methods

Nuclear data were acquired for 2174 radionuclides from the National Nuclear Data Center (Brookhaven National Laboratory, accessed March 2018). Based on these data, isotropic point sources of radioactivity in water were simulated using Monte Carlo N-Particle transport v6.2 (MCNP6.2, Los Alamos National Laboratory). Simulations were separated by emission type for each radionuclide - photons (γ-rays, x rays), beta particles (positrons, electrons); and discrete electrons (conversion electrons, Auger electrons, Coster-Kronig electrons). Dose was tallied in concentric spherical shells about the point source using an energy deposition pulse-height tally (MCNP *F8 tally). Bins were spaced every 0.1 mm until a radius of 10 cm, and every 1 mm until a radius of 2 m. Positron emissions where treated as electrons for transport, with annihilation photons generated at the origin within the photon simulation. Alpha particle emissions were not simulated since their energy is deposited within ~0.2 mm of the source. Neutron and spallation effects were not considered. A subset of the resultant dose point kernels (11 C, 18 F, 32 P, 52g Mn, 64 Cu, 67 Ga, 89 Sr, 89 Zr, 90 Y, 99m Tc, 111 In, 117m Sn, 123 I, 124 I, 125 I, 131 I, 153 Sm, 177 Lu, 186 Re, 188 Re, 211 As, 212 Pb, 213 Bi, 223 Ra, and 225 Ac) were evaluated for accuracy based on conservation of energy, comparison to kernels in the literature, and statistical precision.

Results

Among dose point kernels that were manually reviewed, good agreement with previously published dose point kernels was observed. Energy within the kernels was found to be conserved to within 1% of the value expected from nuclear data, suggesting that a radius of 2 m was sufficient to capture the almost all of the energy released during decay for all isotopes considered. Local dosimetric uncertainty, evaluated at the radius of 99% energy deposition, was found to be less than 9% for all radioisotopes evaluated. Rebinning data more coarsely by a factor of 10, similar to what would be done for a clinical dose calculation, results in all evaluated kernels having a relative error of less than 1.1% at R50% , 1.5% at R90% , and 2.7% at R99% (the radius corresponding to 50%, 90%, and 99% of total energy deposition, respectively). The kernels produced in this work have been made freely available (https://zenodo.org/record/2564036).

Conclusions

An extensive library of high-resolution radial dose kernels was generated and validated against published data. In addition to enabling patient-specific voxel-wise internal dosimetry by convolution superposition, the generated dose point kernels data may prove useful to the wider health physics community.",2019-09-18 +31667304,Data on the draft genome sequence of Caryocar brasiliense Camb. (Caryocaraceae): An important genetic resource from Brazilian savannas.,"Caryocar brasiliense (Caryocaraceae) is a Neotropical tree species widely distributed in Brazilian savannas. This species is very popular in central Brazil mainly due to the use of its fruits in the local cuisine and their anti-inflammatory proprieties, and indeed it is one of the candidates, among Brazilian native plants, for fast track incorporation into cropping systems. Considering the importance of Caryocar brasiliense, little is known about its genetics and genomics, and determination of a reference genome sequence could improve the understanding of its evolution, as well as the development of tools for domestication. Here, we provide the first draft genome of C. brasiliense, the raw sequencing data and some multiplex sets of high quality microsatellite primers. Data on the genome project can be obtained from the BioProject at NCBI (https://www.ncbi.nlm.nih.gov/bioproject/?term=caryocar).",2019-09-23 +31449464,Using Birth Cohort Data to Estimate Prenatal Chemical Exposures for All Births around the New Bedford Harbor Superfund Site in Massachusetts.,"

Background

Children born near New Bedford, Massachusetts, have been prenatally exposed to multiple environmental chemicals, in part due to an older housing stock, maternal diet, and proximity to the New Bedford Harbor (NBH) Superfund site. Chemical exposure measures are not available for all births, limiting epidemiologic investigations and potential interventions.

Objective

We linked biomonitoring data from the New Bedford Cohort (NBC) and birth record data to predict prenatal exposures for all contemporaneous area births.

Methods

We used prenatal exposure biomarker data from the NBC, a population-based cohort of 788 mother-infant pairs born during 1993–1998 to mothers living near the NBH, linked to their corresponding Massachusetts birth record data, to build predictive models for cord serum polychlorinated biphenyls (expressed as a sum, [Formula: see text]), [Formula: see text] (DDE), hexachlorobenzene (HCB), cord blood lead (Pb), and maternal hair mercury (Hg). We applied the best fit models (highest pseudo [Formula: see text]), with multivariable smooths of continuous variables, to predict exposure biomarkers for all 10,270 births during 1993–1998 around the NBH. We used 10-fold cross validation to validate the exposure models and the bootstrap method to characterize sampling variability in the exposure predictions.

Results

The 10-fold cross-validated [Formula: see text] for the [Formula: see text], DDE, HCB, Pb, and Hg exposure models were 0.54, 0.40, 0.34, 0.46, and 0.40, respectively. For each exposure model, multivariable smooths of continuous variables improved the fit compared with linear models. Other variables with significant effects on exposure estimates were paternal education, maternal race/ethnicity, and maternal ancestry. The resulting exposure predictions for all births had variability consistent with the NBC measured exposures.

Conclusions

Predictive models using multivariable smoothing explained reasonable amounts of variance in prenatal exposure biomarkers. Our analyses suggest that prenatal chemical exposures can be predicted for all contemporaneous births in the same geographic area by modeling available biomarker data for a subset of that population. https://doi.org/10.1289/EHP4849.",2019-08-26 +32202212,Correlation models for monitoring fetal growth.,"Ultrasound growth measurements are monitored to evaluate if a fetus is growing normally compared with a defined standard chart at a specified gestational age. Using data from the Fetal Growth Longitudinal Study of the INTERGROWTH-21st project, we have modelled the longitudinal dependence of fetal head circumference, biparietal diameter, occipito-frontal diameter, abdominal circumference, and femur length using a two-stage approach. The first stage involved finding a suitable transformation of the raw fetal measurements (as the marginal distributions of ultrasound measurements were non-normal) to standardized deviations (Z-scores). In the second stage, a correlation model for a Gaussian process is fitted, yielding a correlation for any pair of observations made between 14 and 40 weeks. The correlation structure of the fetal Z-score can be used to assess whether the growth, for example, between successive measurements is satisfactory. The paper is accompanied by a Shiny application, see https://lxiao5.shinyapps.io/shinycalculator/.",2020-03-23 +31580061,PCAViz: An Open-Source Python/JavaScript Toolkit for Visualizing Molecular Dynamics Simulations in the Web Browser.,"Molecular dynamics (MD) simulations reveal molecular motions at atomic resolution. Recent advances in high-performance computing now enable microsecond-long simulations capable of sampling a wide range of biologically relevant events. But the disk space required to store an MD trajectory increases with simulation length and system size, complicating collaborative sharing and visualization. To overcome these limitations, we created PCAViz, an open-source toolkit for sharing and visualizing MD trajectories via the web browser. PCAViz includes two components: the PCAViz Compressor, which compresses and saves simulation data; and the PCAViz Interpreter, which decompresses the data in users' browsers and feeds it to any of several browser-based molecular-visualization libraries (e.g., 3Dmol.js, NGL Viewer, etc.). An easy-to-install WordPress plugin enables ""plug-and-play"" trajectory visualization. PCAViz will appeal to a broad audience of researchers and educators. The source code is available at http://durrantlab.com/pcaviz/ , and the WordPress plugin is available via the official WordPress Plugin Directory.",2019-10-16 +26431337,A Curated Database of Rodent Uterotrophic Bioactivity.,"

Background

Novel in vitro methods are being developed to identify chemicals that may interfere with estrogen receptor (ER) signaling, but the results are difficult to put into biological context because of reliance on reference chemicals established using results from other in vitro assays and because of the lack of high-quality in vivo reference data. The Organisation for Economic Co-operation and Development (OECD)-validated rodent uterotrophic bioassay is considered the ""gold standard"" for identifying potential ER agonists.

Objectives

We performed a comprehensive literature review to identify and evaluate data from uterotrophic studies and to analyze study variability.

Methods

We reviewed 670 articles with results from 2,615 uterotrophic bioassays using 235 unique chemicals. Study descriptors, such as species/strain, route of administration, dosing regimen, lowest effect level, and test outcome, were captured in a database of uterotrophic results. Studies were assessed for adherence to six criteria that were based on uterotrophic regulatory test guidelines. Studies meeting all six criteria (458 bioassays on 118 unique chemicals) were considered guideline-like (GL) and were subsequently analyzed.

Results

The immature rat model was used for 76% of the GL studies. Active outcomes were more prevalent across rat models (74% active) than across mouse models (36% active). Of the 70 chemicals with at least two GL studies, 18 (26%) had discordant outcomes and were classified as both active and inactive. Many discordant results were attributable to differences in study design (e.g., injection vs. oral dosing).

Conclusions

This uterotrophic database provides a valuable resource for understanding in vivo outcome variability and for evaluating the performance of in vitro assays that measure estrogenic activity.

Citation

Kleinstreuer NC, Ceger PC, Allen DG, Strickland J, Chang X, Hamm JT, Casey WM. 2016. A curated database of rodent uterotrophic bioactivity. Environ Health Perspect 124:556-562; http://dx.doi.org/10.1289/ehp.1510183.",2015-10-02 +30411429,A Comprehensive Review of Dorsomedial Prefrontal Cortex rTMS Utilizing a Double Cone Coil.,"

Background

Repetitive transcranial magnetic stimulation (rTMS) has become increasingly popular during the last decades mainly driven by the antidepressant effects of dorsolateral prefrontal cortex stimulation with ""butterfly"" coils. Only recently, alternative targets such as the dorsomedial prefrontal cortex (dmPFC) have been brought into focus and innovative coil designs such as the angled geometry of the double cone coil (DCC) have raised hope to reach even deeper located targets.

Objective

To provide a systematic and comprehensive review on the application of rTMS stimulation of the dmPFC using the DCC in neuropathological and healthy samples.

Methods

We systematically searched the MEDLINE® database (http://www.ncbi.nlm.nih.gov/pubmed/). Due to the heterogeneous naming of DCC stimulation over the dmPFC a variety of search terms was applied resulting in a numeral quantity of 340 hits.

Results

DCC stimulation over the dmPFC has been proven to be safe and feasible in various neuropsychiatric disorders and in healthy subjects. Clinical results are encouraging, but have to be considered as preliminary as data from sham-controlled clinical trials and knowledge about the neurobiological underpinnings are still scarce.

Conclusion

DCC stimulation over the dmPFC represents a promising approach in the fast evolving noninvasive brain stimulation techniques aiming at the functional modulation of brain areas vitally involved in affect, sensory autonomic, cognitive, and salience regulation. This may hold potential for both neuroscientific research and clinical applications in the treatment of psychiatric disorders.",2018-11-08 +,Molecular phylogeny of Sericostomatoidea (Trichoptera) with the establishment of three new families,"We inferred the phylogenetic relationships among 58 genera of Sericostomatoidea, representing all previously accepted families as well as genera that were not placed in established families. The analyses were based on five fragments of the protein coding genes carbamoylphosphate synthetase (CPSase of CAD), isocitrate dehydrogenase (IDH), Elongation factor 1a (EF‐1a), RNA polymerase II (POL II) and cytochrome oxidase I (COI). The data set was analysed using Bayesian methods with a mixed model, raxml, and parsimony. The various methods generated slightly different results regarding relationships among families, but the shared results comprise support for: (i) a monophyletic Sericostomatoidea; (ii) a paraphyletic Parasericostoma due to inclusion of Myotrichia murina, leading to synonymization of Myotrichia with Parasericostoma; (iii) a polyphyletic Sericostomatidae, which is divided into two families, Sericostomatidae sensu stricto and Parasericostomatidae fam.n.; (iv) a polyphyletic Helicophidae which is divided into Helicophidae sensu stricto and Heloccabucidae fam.n.; (v) hypothesized phylogenetic placement of the former incerta sedis genera Ngoya, Seselpsyche and Karomana; (vi) a paraphyletic Costora (Conoesucidae) that should be divided into several genera after more careful examination of morphological data; (vii) reinstatement of Gyrocarisa as a valid genus within Petrothrincidae. A third family, Ceylanopsychidae fam.n., is established based on morphological characters alone. A hypothesis of the relationship among 14 of the 15 families in the superfamily is presented. A key to the families is presented based on adults (males). Taxonomic history, diagnosis, habitat preference and distribution data for all sericostomatoid families are presented. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:CF6A6B9F‐6A72‐4265‐BD09‐3A710DFCD7B1.",2017-01-01 +25414323,"Allele frequency net 2015 update: new features for HLA epitopes, KIR and disease and HLA adverse drug reaction associations.","It has been 12 years since the Allele Frequency Net Database (AFND; http://www.allelefrequencies.net) was first launched, providing the scientific community with an online repository for the storage of immune gene frequencies in different populations across the world. There have been a significant number of improvements from the first version, making AFND a primary resource for many clinical and scientific areas including histocompatibility, immunogenetics, pharmacogenetics and anthropology studies, among many others. The most widely used part of AFND stores population frequency data (alleles, genes or haplotypes) related to human leukocyte antigens (HLA), killer-cell immunoglobulin-like receptors (KIR), major histocompatibility complex class I chain-related genes (MIC) and a number of cytokine gene polymorphisms. AFND now contains >1400 populations from more than 10 million healthy individuals. Here, we report how the main features of AFND have been updated to include a new section on 'HLA epitope' frequencies in populations, a new section capturing the results of studies identifying HLA associations with adverse drug reactions (ADRs) and one for the examination of infectious and autoimmune diseases associated with KIR polymorphisms-thus extending AFND to serve a new user base in these growing areas of research. New criteria on data quality have also been included.",2014-11-20 +29036298,myVCF: a desktop application for high-throughput mutations data management.,"

Summary

Next-generation sequencing technologies have become the most powerful tool to discover genetic variants associated with human diseases. Although the dramatic reductions in the costs facilitate the use in the wet-lab and clinics, the huge amount of data generated renders their management by non-expert researchers and physicians extremely difficult. Therefore, there is an urgent need of novel approaches and tools aimed at getting the 'end-users' closer to the sequencing data, facilitating the access by non-bioinformaticians, and to speed-up the functional interpretation of genetic variants. We developed myVCF, a standalone, easy-to-use desktop application, which is based on a browser interface and is suitable for Windows, Mac and UNIX systems. myVCF is an efficient platform that is able to manage multiple sequencing projects created from VCF files within the system; stores genetic variants and samples genotypes from an annotated VCF files into a SQLite database; implements a flexible search engine for data exploration, allowing to query for chromosomal region, gene, single variant or dbSNP ID. Besides, myVCF generates a summary statistics report about mutations distribution across samples and across the genome/exome by aggregating the information within the VCF file. In summary, the myVCF platform allows end-users without strong programming and bioinformatics skills to explore, query, visualize and export mutations data in a simple and straightforward way.

Availability and implementation

https://apietrelli.github.io/myVCF/.

Contact

pietrelli@ingm.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +28192383,Pilot Study Evaluating the Impact of Otology Surgery Videos on Otolaryngology Resident Education.,"

Objectives

Use of videos as educational tools is not a novel concept; however, there is a paucity of high-quality video resources available to otolaryngology trainees. We hypothesized that residents would deem surgical-videos using a multimedia-style format more valuable as preparatory tools. Aims of this study: 1) develop portfolio of otology/neurotology videos overviewing key steps, anatomy, and pearls by a senior surgeon; 2) have residents rate the effectiveness of the videos as a preoperative tool.

Study design

Prospective study.

Methods

A video-library of procedures at (https://www.youtube.com/user/cisurgeon) was formatted via time-stamping to coincide expert level narration with closed captioning, critical procedural steps, relevant instrumentation, radiographic pictures, orientation cues, and anatomical highlights. Otolaryngology trainees of postgraduate years 2 through 5 (n = 13) watched a minimum of three videos and completed an assessment addressing: current resource identification/usefulness comparison, self-efficacy, impact on preparation time, and prioritization of resources.

Results

The videos rated as highly useful compared with current resources (p = 0.002) and capable of promoting self-efficacy. Residents reported moderate-high prioritization of our multi-media formatted resource (scores >6) among their current preoperative regimen.

Conclusion

The varied videos were rated highly in terms of usefulness, promoting self-efficacy and as a high-priority for a resident's surgical preparation. Multimedia-formatted training videos should be further explored for this generation of electronic-learners. Future studies with a larger cohort, objective approaches, and multidisciplinary involvement are needed to determine the full impact of this education medium on surgical-training.",2017-03-01 +31288636,Convolutional neural network approach to lung cancer classification integrating protein interaction network and gene expression profiles.,"Deep learning technologies are permeating every field from image and speech recognition to computational and systems biology. However, the application of convolutional neural networks (CCNs) to ""omics"" data poses some difficulties, such as the processing of complex networks structures as well as its integration with transcriptome data. Here, we propose a CNN approach that combines spectral clustering information processing to classify lung cancer. The developed spectral-convolutional neural network based method achieves success in integrating protein interaction network data and gene expression profiles to classify lung cancer. The performed computational experiments suggest that in terms of accuracy the predictive performance of our proposed method was better than those of other machine learning methods such as SVM or Random Forest. Moreover, the computational results also indicate that the underlying protein network structure assists to enhance the predictions. Data and CNN code can be downloaded from the link: https://sites.google.com/site/nacherlab/analysis.",2019-06-01 +29336210,Semi-supervised identification of cancer subgroups using survival outcomes and overlapping grouping information.,"Identification of cancer patient subgroups using high throughput genomic data is of critical importance to clinicians and scientists because it can offer opportunities for more personalized treatment and overlapping treatments of cancers. In spite of tremendous efforts, this problem still remains challenging because of low reproducibility and instability of identified cancer subgroups and molecular features. In order to address this challenge, we developed Integrative Genomics Robust iDentification of cancer subgroups (InGRiD), a statistical approach that integrates information from biological pathway databases with high-throughput genomic data to improve the robustness for identification and interpretation of molecularly-defined subgroups of cancer patients. We applied InGRiD to the gene expression data of high-grade serous ovarian cancer from The Cancer Genome Atlas and the Australian Ovarian Cancer Study. The results indicate clear benefits of the pathway-level approaches over the gene-level approaches. In addition, using the proposed InGRiD framework, we also investigate and address the issue of gene sharing among pathways, which often occurs in practice, to further facilitate biological interpretation of key molecular features associated with cancer progression. The R package ""InGRiD"" implementing the proposed approach is currently available in our research group GitHub webpage ( https://dongjunchung.github.io/INGRID/ ).",2018-01-16 +30165568,AncestryView: data-driven visualization of whole-genome local-ancestry.,"

Summary

Data visualization is a crucial tool for data exploration, analysis and interpretation. To visualize the ancestry data, we developed a new software tool, called AncestryView. We demonstrate its functionality with the data from admixed individuals.

Availability and implementation

Freely available to non-commercial users on the web at https://f001.backblazeb2.com/file/=4DGenome/AncestryView.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +26046679,Detection of gene annotations and protein-protein interaction associated disorders through transitive relationships between integrated annotations.,"

Background

Increasingly high amounts of heterogeneous and valuable controlled biomolecular annotations are available, but far from exhaustive and scattered in many databases. Several annotation integration and prediction approaches have been proposed, but these issues are still unsolved. We previously created a Genomic and Proteomic Knowledge Base (GPKB) that efficiently integrates many distributed biomolecular annotation and interaction data of several organisms, including 32,956,102 gene annotations, 273,522,470 protein annotations and 277,095 protein-protein interactions (PPIs).

Results

By comprehensively leveraging transitive relationships defined by the numerous association data integrated in GPKB, we developed a software procedure that effectively detects and supplement consistent biomolecular annotations not present in the integrated sources. According to some defined logic rules, it does so only when the semantic type of data and of their relationships, as well as the cardinality of the relationships, allow identifying molecular biology compliant annotations. Thanks to controlled consistency and quality enforced on data integrated in GPKB, and to the procedures used to avoid error propagation during their automatic processing, we could reliably identify many annotations, which we integrated in GPKB. They comprise 3,144 gene to pathway and 21,942 gene to biological function annotations of many organisms, and 1,027 candidate associations between 317 genetic disorders and 782 human PPIs. Overall estimated recall and precision of our approach were 90.56 % and 96.61 %, respectively. Co-functional evaluation of genes with known function showed high functional similarity between genes with new detected and known annotation to the same pathway; considering also the new detected gene functional annotations enhanced such functional similarity, which resembled the one existing between genes known to be annotated to the same pathway. Strong evidence was also found in the literature for the candidate associations detected between Cystic fibrosis disorder and the PPIs between the CFTR_HUMAN, DERL1_HUMAN, RNF5_HUMAN, AHSA1_HUMAN and GOPC_HUMAN proteins, and between the CHIP_HUMAN and HSP7C_HUMAN proteins.

Conclusions

Although identified gene annotations and PPI-genetic disorder candidate associations require biological validation, our approach intrinsically provides their in silico evidence based on available data. Public availability within the GPKB (http://www.bioinformatics.deib.polimi.it/GPKB/) of all identified and integrated annotations offers a valuable resource fostering new biomedical-molecular knowledge discoveries.",2015-06-01 +29668970,MitoFish and MiFish Pipeline: A Mitochondrial Genome Database of Fish with an Analysis Pipeline for Environmental DNA Metabarcoding.,"Fish mitochondrial genome (mitogenome) data form a fundamental basis for revealing vertebrate evolution and hydrosphere ecology. Here, we report recent functional updates of MitoFish, which is a database of fish mitogenomes with a precise annotation pipeline MitoAnnotator. Most importantly, we describe implementation of MiFish pipeline for metabarcoding analysis of fish mitochondrial environmental DNA, which is a fast-emerging and powerful technology in fish studies. MitoFish, MitoAnnotator, and MiFish pipeline constitute a key platform for studies of fish evolution, ecology, and conservation, and are freely available at http://mitofish.aori.u-tokyo.ac.jp/ (last accessed April 7th, 2018).",2018-06-01 +24578356,The Zebrafish GenomeWiki: a crowdsourcing approach to connect the long tail for zebrafish gene annotation.,"A large repertoire of gene-centric data has been generated in the field of zebrafish biology. Although the bulk of these data are available in the public domain, most of them are not readily accessible or available in nonstandard formats. One major challenge is to unify and integrate these widely scattered data sources. We tested the hypothesis that active community participation could be a viable option to address this challenge. We present here our approach to create standards for assimilation and sharing of information and a system of open standards for database intercommunication. We have attempted to address this challenge by creating a community-centric solution for zebrafish gene annotation. The Zebrafish GenomeWiki is a 'wiki'-based resource, which aims to provide an altruistic shared environment for collective annotation of the zebrafish genes. The Zebrafish GenomeWiki has features that enable users to comment, annotate, edit and rate this gene-centric information. The credits for contributions can be tracked through a transparent microattribution system. In contrast to other wikis, the Zebrafish GenomeWiki is a 'structured wiki' or rather a 'semantic wiki'. The Zebrafish GenomeWiki implements a semantically linked data structure, which in the future would be amenable to semantic search. Database URL: http://genome.igib.res.in/twiki.",2014-02-26 +32006678,PET image reconstruction using physical and mathematical modelling for time of flight PET-MR scanners in the STIR library.,"This work demonstrates how computational and physical modelling of the positron emission tomography (PET) image acquisition process for a state-of-the-art integrated PET and magnetic resonance imaging (PET-MR) system can produce images comparable to the manufacturer. The GE SIGNA PET/MR scanner is manufactured by General Electric and has time-of-flight (TOF) capabilities of about 390 ps. All software development took place in the Software for Tomographic Image Reconstruction (STIR: http://stir.sf.net) library, which is a widely used open source software to reconstruct data as exported from emission tomography scanners. The new software developments will be integrated into STIR, providing the opportunity for researchers worldwide to establish and expand their image reconstruction methods. Furthermore, this work is of particular significance as it provides the first validation of TOF PET image reconstruction for real scanner datasets using the STIR library. This paper presents the methodology, analysis, and critical issues encountered in implementing an independent reconstruction software package. Acquired PET data were processed via several appropriate algorithms which are necessary to produce an accurate and precise quantitative image. This included mathematical, physical and anatomical modelling of the patient and simulation of various aspects of the acquisition. These included modelling of random coincidences using 'singles' rates per crystals, detector efficiencies and geometric effects. Attenuation effects were calculated by using the STIR's attenuation correction model. Modelling all these effects within the system matrix allowed the reconstruction of PET images which demonstrates the metabolic uptake of the administered radiopharmaceutical. These implementations were validated using measured phantom and clinical datasets. The developments are tested using the ordered subset expectation maximisation (OSEM) and the more recently proposed kernelised expectation maximisation (KEM) algorithm which incorporates anatomical information from MR images into PET reconstruction.",2020-01-30 +29544540,"A graphical user interface for RAId, a knowledge integrated proteomics analysis suite with accurate statistics.","

Objective

RAId is a software package that has been actively developed for the past 10 years for computationally and visually analyzing MS/MS data. Founded on rigorous statistical methods, RAId's core program computes accurate E-values for peptides and proteins identified during database searches. Making this robust tool readily accessible for the proteomics community by developing a graphical user interface (GUI) is our main goal here.

Results

We have constructed a graphical user interface to facilitate the use of RAId on users' local machines. Written in Java, RAId_GUI not only makes easy executions of RAId but also provides tools for data/spectra visualization, MS-product analysis, molecular isotopic distribution analysis, and graphing the retrieval versus the proportion of false discoveries. The results viewer displays and allows the users to download the analyses results. Both the knowledge-integrated organismal databases and the code package (containing source code, the graphical user interface, and a user manual) are available for download at https://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads/raid.html .",2018-03-15 +25230706,Personalized Oncology Suite: integrating next-generation sequencing data and whole-slide bioimages.,"

Background

Cancer immunotherapy has recently entered a remarkable renaissance phase with the approval of several agents for treatment. Cancer treatment platforms have demonstrated profound tumor regressions including complete cure in patients with metastatic cancer. Moreover, technological advances in next-generation sequencing (NGS) as well as the development of devices for scanning whole-slide bioimages from tissue sections and image analysis software for quantitation of tumor-infiltrating lymphocytes (TILs) allow, for the first time, the development of personalized cancer immunotherapies that target patient specific mutations. However, there is currently no bioinformatics solution that supports the integration of these heterogeneous datasets.

Results

We have developed a bioinformatics platform - Personalized Oncology Suite (POS) - that integrates clinical data, NGS data and whole-slide bioimages from tissue sections. POS is a web-based platform that is scalable, flexible and expandable. The underlying database is based on a data warehouse schema, which is used to integrate information from different sources. POS stores clinical data, genomic data (SNPs and INDELs identified from NGS analysis), and scanned whole-slide images. It features a genome browser as well as access to several instances of the bioimage management application Bisque. POS provides different visualization techniques and offers sophisticated upload and download possibilities. The modular architecture of POS allows the community to easily modify and extend the application.

Conclusions

The web-based integration of clinical, NGS, and imaging data represents a valuable resource for clinical researchers and future application in medical oncology. POS can be used not only in the context of cancer immunology but also in other studies in which NGS data and images of tissue sections are generated. The application is open-source and can be downloaded at http://www.icbi.at/POS.",2014-09-18 +29426279,"Genomic analyses of the Chlamydia trachomatis core genome show an association between chromosomal genome, plasmid type and disease.","

Background

Chlamydia trachomatis (Ct) plasmid has been shown to encode genes essential for infection. We evaluated the population structure of Ct using whole-genome sequence data (WGS). In particular, the relationship between the Ct genome, plasmid and disease was investigated.

Results

WGS data from 157 Ct isolates deposited in the Chlamydiales pubMLST database ( http://pubMLST.org/chlamydiales/ ) were annotated with 902 genes including the core and accessory genome. Plasmid associated genes were annotated and a plasmid MLST scheme was defined allowing plasmid sequence types to be determined. Plasmid allelic variation was investigated. Phylogenetic relationships were examined using the Genome Comparator tool available in pubMLST. Phylogenetic analyses identified four distinct Ct core genome clusters and six plasmid clusters, with a strong association between the chromosomal genotype and plasmid. This in turn was linked to ompA genovars and disease phenotype. Horizontal genetic transfer of plasmids was observed for three urogenital-associated isolates, which possessed plasmids more commonly found in isolates resulting from ocular infections. The pgp3 gene was identified as the most polymorphic plasmid gene and pgp4 was the most conserved.

Conclusion

A strong association between chromosomal genome, plasmid type and disease was observed, consistent with previous studies. This suggests co-evolution of the Ct chromosome and their plasmids, but we confirmed that plasmid transfer can occur between isolates. These data provide a better understanding of the genetic diversity occurring across the Ct genome in association with the plasmid content.",2018-02-09 +32760527,Shifting spaces: Which disparity or dissimilarity measurement best summarize occupancy in multidimensional spaces?,"Multidimensional analysis of traits are now common in ecology and evolution and are based on trait spaces in which each dimension summarizes the observed trait combination (a morphospace or an ecospace). Observations of interest will typically occupy a subset of this space, and researchers will calculate one or more measures to quantify how organisms inhabit that space. In macroevolution and ecology, these measures called disparity or dissimilarity metrics are generalized as space occupancy measures. Researchers use these measures to investigate how space occupancy changes through time, in relation to other groups of organisms, or in response to global environmental changes. However, the mathematical and biological meaning of most space occupancy measures is vague with the majority of widely used measures lacking formal description. Here, we propose a broad classification of space occupancy measures into three categories that capture changes in size, density, or position. We study the behavior of 25 measures to changes in trait space size, density, and position on simulated and empirical datasets. We find that no measure describes all of trait space aspects but that some are better at capturing certain aspects. Our results confirm the three broad categories (size, density, and position) and allow us to relate changes in any of these categories to biological phenomena. Because the choice of space occupancy measures is specific to the data and question, we introduced https://tguillerme.shinyapps.io/moms/moms, a tool to both visualize and capture changes in space occupancy for any measurement. https://tguillerme.shinyapps.io/moms/moms is designed to help workers choose the right space occupancy measures, given the properties of their trait space and their biological question. By providing guidelines and common vocabulary for space occupancy analysis, we hope to help bridging the gap in multidimensional research between ecology and evolution.",2020-07-05 +30129931,Single-cell RNA sequencing of mouse brain and lung vascular and vessel-associated cell types.,"Vascular diseases are major causes of death, yet our understanding of the cellular constituents of blood vessels, including how differences in their gene expression profiles create diversity in vascular structure and function, is limited. In this paper, we describe a single-cell RNA sequencing (scRNA-seq) dataset that defines vascular and vessel-associated cell types and subtypes in mouse brain and lung. The dataset contains 3,436 single cell transcriptomes from mouse brain, which formed 15 distinct clusters corresponding to cell (sub)types, and another 1,504 single cell transcriptomes from mouse lung, which formed 17 cell clusters. In order to allow user-friendly access to our data, we constructed a searchable database (http://betsholtzlab.org/VascularSingleCells/database.html). Our dataset constitutes a comprehensive molecular atlas of vascular and vessel-associated cell types in the mouse brain and lung, and as such provides a strong foundation for future studies of vascular development and diseases.",2018-08-21 +29533231,Worldwide Protein Data Bank validation information: usage and trends.,"Realising the importance of assessing the quality of the biomolecular structures deposited in the Protein Data Bank (PDB), the Worldwide Protein Data Bank (wwPDB) partners established Validation Task Forces to obtain advice on the methods and standards to be used to validate structures determined by X-ray crystallography, nuclear magnetic resonance spectroscopy and three-dimensional electron cryo-microscopy. The resulting wwPDB validation pipeline is an integral part of the wwPDB OneDep deposition, biocuration and validation system. The wwPDB Validation Service webserver (https://validate.wwpdb.org) can be used to perform checks prior to deposition. Here, it is shown how validation metrics can be combined to produce an overall score that allows the ranking of macromolecular structures and domains in search results. The ValTrendsDB database provides users with a convenient way to access and analyse validation information and other properties of X-ray crystal structures in the PDB, including investigating trends in and correlations between different structure properties and validation metrics.",2018-03-02 +33064555,Vascular dysfunction and oxidative stress caused by acute formaldehyde exposure in female adults.,"Formaldehyde (FA) is a common, volatile organic compound used in organic preservation with known health effects of eye, nose, and throat irritation linked to oxidative stress and inflammation. Indeed, long-term FA exposure may provoke skin disorders, cancer, and cardiovascular disease. However, the effects of short-term FA exposure on the vasculature have yet to be investigated. We sought to investigate the impact of an acute FA exposure on 1) macrovascular function in the arm (brachial artery flow-mediated dilation, FMD), 2) microvascular function in the arm (brachial artery reactive hyperemia, RH) and leg (common femoral artery, supine passive limb movement, PLM), and 3) circulating markers of oxidative stress (xanthine oxidase, XO; protein carbonyl, PC; and malondialdehyde, MDA) and inflammation (C-reactive protein, CRP). Ten (n = 10) healthy females (23 ± 1 yr) were studied before and immediately after a 90-min FA exposure [(FA): 197 ± 79 ppb] in cadaver dissection laboratories. Brachial artery FMD% decreased following FA exposure (Pre-FA Exp: 9.41 ± 4.21%, Post-FA Exp: 6.74 ± 2.57%; P = 0.043), and FMD/shear decreased following FA exposure (Pre-FA Exp: 0.13 ± 0.07 AU, Post-FA Exp: 0.07 ± 0.03 AU; P = 0.016). The area under the curve for brachial artery RH (Pre-FA Exp: 481 ± 191 ml, Post-FA Exp: 499 ± 165 ml) and common femoral artery PLM (Pre-FA Exp: 139 ± 95 ml, Post-FA Exp: 129 ± 64 ml) were unchanged by FA exposure (P > 0.05). Circulating MDA increased (Pre-FA Exp: 4.8 ± 1.3 µM, Post-FA Exp: 6.3 ± 2.2 µM; P = 0.047) while XO, PC, and CRP were unchanged by FA exposure (P > 0.05). These initial data suggest a short FA exposure can adversely alter vascular function and oxidative stress, influencing cardiovascular health.NEW & NOTEWORTHY This study was the first to investigate the implications of acute formaldehyde (FA) exposure on adult female vascular function in the arms and legs. The main findings of this study were a decrease in conduit vessel function without any alteration to microvascular function following a 90-min FA exposure. Additionally, the oxidative stress marker malondialdehyde increased after FA exposure. Taken together, these results suggest acute FA exposure have deleterious implications for the vasculature and redox balance.Listen to this article's corresponding podcast at https://ajpheart.podbean.com/e/formaldehyde-exposure-decreases-vascular-function/.",2020-10-16 +31793988,DNA Readout Viewer (DRV): visualization of specificity determining patterns of protein-binding DNA segments.,"

Summary

The sequence specific recognition of DNA by regulatory proteins typically occurs by establishing hydrogen bonds and non-bonded contacts between chemical sub-structures of nucleotides and amino acids forming the compatible interacting surfaces. The recognition process is also influenced by the physicochemical and conformational character of the target oligonucleotide motif. Although the role of these mechanisms in DNA-protein interactions is well-established, bioinformatical methods rarely address them directly, instead binding specificity is mostly assessed at nucleotide level. DNA Readout Viewer (DRV) aims to provide a novel DNA representation, facilitating in-depth view into these mechanisms by the concurrent visualization of functional groups and a diverse collection of DNA descriptors. By applying its intuitive representation concept for various DNA recognition related visualization tasks, DRV can contribute to unravelling the binding specificity factors of DNA-protein interactions.

Availability and implementation

DRV is freely available at https://drv.brc.hu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +31114924,EpiAlignment: alignment with both DNA sequence and epigenomic data.,"Comparative epigenomics, which subjects both epigenome and genome to interspecies comparison, has become a powerful approach to reveal regulatory features of the genome. Thus elucidated regulatory features surpass the information derived from comparison of genomic sequences alone. Here, we present EpiAlignment, a web-based tool to align genomic regions with both DNA sequence and epigenomic data. EpiAlignment takes DNA sequence and epigenomic profiles derived by ChIP-seq from two species as input data, and outputs the best semi-global alignments. These alignments are based on EpiAlignment scores, computed by a dynamic programming algorithm that accounts for both sequence alignment and epigenome similarity. For timely response, the EpiAlignment web server automatically initiates up to 140 computing threads depending on the size of user input data. For users' convenience, we have pre-compiled the comparable human and mouse epigenome datasets in matched cell types and tissues from the Roadmap Epigenomics and ENCODE consortia. Users can either upload their own data or select pre-compiled datasets as inputs for EpiAlignment analyses. Results are presented in graphical and tabular formats where the entries can be interactively expanded to visualize additional features of these aligned regions. EpiAlignment is available at https://epialign.ucsd.edu/.",2019-07-01 +31725864,Gold-standard ontology-based anatomical annotation in the CRAFT Corpus. ,"Gold-standard annotated corpora have become important resources for the training and testing of natural-language-processing (NLP) systems designed to support biocuration efforts, and ontologies are increasingly used to facilitate curational consistency and semantic integration across disparate resources. Bringing together the respective power of these, the Colorado Richly Annotated Full-Text (CRAFT) Corpus, a collection of full-length, open-access biomedical journal articles with extensive manually created syntactic, formatting and semantic markup, was previously created and released. This initial public release has already been used in multiple projects to drive development of systems focused on a variety of biocuration, search, visualization, and semantic and syntactic NLP tasks. Building on its demonstrated utility, we have expanded the CRAFT Corpus with a large set of manually created semantic annotations relying on Uberon, an ontology representing anatomical entities and life-cycle stages of multicellular organisms across species as well as types of multicellular organisms defined in terms of life-cycle stage and sexual characteristics. This newly created set of annotations, which has been added for v2.1 of the corpus, is by far the largest publicly available collection of gold-standard anatomical markup and is the first large-scale effort at manual markup of biomedical text relying on the entirety of an anatomical terminology, as opposed to annotation with a small number of high-level anatomical categories, as performed in previous corpora. In addition to presenting and discussing this newly available resource, we apply it to provide a performance baseline for the automatic annotation of anatomical concepts in biomedical text using a prominent concept recognition system. The full corpus, released with a CC BY 3.0 license, may be downloaded from http://bionlp-corpora.sourceforge.net/CRAFT/index.shtml. Database URL: http://bionlp-corpora.sourceforge.net/CRAFT/index.shtml.",2017-01-01 +31874624,NmSEER V2.0: a prediction tool for 2'-O-methylation sites based on random forest and multi-encoding combination.,"

Background

2'-O-methylation (2'-O-me or Nm) is a post-transcriptional RNA methylation modified at 2'-hydroxy, which is common in mRNAs and various non-coding RNAs. Previous studies revealed the significance of Nm in multiple biological processes. With Nm getting more and more attention, a revolutionary technique termed Nm-seq, was developed to profile Nm sites mainly in mRNA with single nucleotide resolution and high sensitivity. In a recent work, supported by the Nm-seq data, we have reported a method in silico for predicting Nm sites, which relies on nucleotide sequence information, and established an online server named NmSEER. More recently, a more confident dataset produced by refined Nm-seq was available. Therefore, in this work, we redesigned the prediction model to achieve a more robust performance on the new data.

Results

We redesigned the prediction model from two perspectives, including machine learning algorithm and multi-encoding scheme combination. With optimization by 5-fold cross-validation tests and evaluation by independent test respectively, random forest was selected as the most robust algorithm. Meanwhile, one-hot encoding, together with position-specific dinucleotide sequence profile and K-nucleotide frequency encoding were collectively applied to build the final predictor.

Conclusions

The predictor of updated version, named NmSEER V2.0, achieves an accurate prediction performance (AUROC = 0.862) and has been settled into a brand-new server, which is available at http://www.rnanut.net/nmseer-v2/ for free.",2019-12-24 +22629346,arrayMap: a reference resource for genomic copy number imbalances in human malignancies.,"

Background

The delineation of genomic copy number abnormalities (CNAs) from cancer samples has been instrumental for identification of tumor suppressor genes and oncogenes and proven useful for clinical marker detection. An increasing number of projects have mapped CNAs using high-resolution microarray based techniques. So far, no single resource does provide a global collection of readily accessible oncogenomic array data.

Methodology/principal findings

We here present arrayMap, a curated reference database and bioinformatics resource targeting copy number profiling data in human cancer. The arrayMap database provides a platform for meta-analysis and systems level data integration of high-resolution oncogenomic CNA data. To date, the resource incorporates more than 40,000 arrays in 224 cancer types extracted from several resources, including the NCBI's Gene Expression Omnibus (GEO), EBI's ArrayExpress (AE), The Cancer Genome Atlas (TCGA), publication supplements and direct submissions. For the majority of the included datasets, probe level and integrated visualization facilitate gene level and genome wide data review. Results from multi-case selections can be connected to downstream data analysis and visualization tools.

Conclusions/significance

To our knowledge, currently no data source provides an extensive collection of high resolution oncogenomic CNA data which readily could be used for genomic feature mining, across a representative range of cancer entities. arrayMap represents our effort for providing a long term platform for oncogenomic CNA data independent of specific platform considerations or specific project dependence. The online database can be accessed at http//www.arraymap.org.",2012-05-18 +26476444,PDBe: improved accessibility of macromolecular structure data from PDB and EMDB.,"The Protein Data Bank in Europe (http://pdbe.org) accepts and annotates depositions of macromolecular structure data in the PDB and EMDB archives and enriches, integrates and disseminates structural information in a variety of ways. The PDBe website has been redesigned based on an analysis of user requirements, and now offers intuitive access to improved and value-added macromolecular structure information. Unique value-added information includes lists of reviews and research articles that cite or mention PDB entries as well as access to figures and legends from full-text open-access publications that describe PDB entries. A powerful new query system not only shows all the PDB entries that match a given query, but also shows the 'best structures' for a given macromolecule, ligand complex or sequence family using data-quality information from the wwPDB validation reports. A PDBe RESTful API has been developed to provide unified access to macromolecular structure data available in the PDB and EMDB archives as well as value-added annotations, e.g. regarding structure quality and up-to-date cross-reference information from the SIFTS resource. Taken together, these new developments facilitate unified access to macromolecular structure data in an intuitive way for non-expert users and support expert users in analysing macromolecular structure data.",2015-10-17 +30847467,Automated exploration of gene ontology term and pathway networks with ClueGO-REST.,"

Summary

Large scale technologies produce massive amounts of experimental data that need to be investigated. To improve their biological interpretation we have developed ClueGO, a Cytoscape App that selects representative Gene Onology terms and pathways for one or multiple lists of genes/proteins and visualizes them into functionally organized networks. Because of its reliability, userfriendliness and support of many species ClueGO gained a large community of users. To further allow scientists programmatic access to ClueGO with R, Python, JavaScript etc., we implemented the cyREST API into ClueGO. In this article we describe this novel, complementary way of accessing ClueGO via REST, and provide R and Phyton examples to demonstrate how ClueGO workflows can be integrated into bioinformatic analysis pipelines.

Availability and implementation

ClueGO is available in the Cytoscape App Store (http://apps.cytoscape.org/apps/cluego).

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +32473122,Accurate prediction of species-specific 2-hydroxyisobutyrylation sites based on machine learning frameworks.,"Lysine 2-hydroxyisobutyrylation (Khib) is a newly discovered post-translational modification (PTM) across eukaryotes and prokaryotes in recent years, which plays a significant role in diverse cellular functions. Accurate prediction of Khib sites is a first-crucial step to decipher its molecular mechanism and urgently needed. In this work, based on a large benchmark datasets in multi-species, a novel online species-specific prediction tool, namely KhibPred, was developed to identify Khib sites. Four types of feature strategies, including sequence-based information, physicochemical properties and evolutionary-derived information, were applied to represent a wide range of protein sequences, and the random forest was used to build the optimal feature datasets. Moreover, six representative machine learning (ML) methods were trained and comprehensively discussed and compared for each organism. Data analyses suggested that the unique protein sequence preferences were discovered for each species. When evaluated on independent test datasets, the area under the receiver operating characteristic curves (AUCs) achieved 0.807, 0.781, 0.825 and 0.831 for Saccharomyces cerevisiaes, Physcomitrella patens, Rice Seeds and HeLa cells, respectively. The satisfactory results imply that KhibPred is a promising computational tool. The online predictor can be freely available at: http://bioinfo.ncu.edu.cn/KhibPred.aspx.",2020-05-28 +31989004,Datasets for the microstructure of nanoscale metal network structures and for its evolution during coarsening.,"The datasets in this work are files containing atom position coordinates of volume elements approximating nanoporous gold made by dealloying and annealing. The material is represented in an as-prepared state and in various stages of coarsening, as described in Phys. Rev. Mater, 3 (2019) 076001. Realistic initial structures of different solid fractions have been constructed by the leveled-wave algorithm, approximating mixtures at the end of early-stage spinodal decomposition. The microstructural evolution during coarsening by surface diffusion was approximated by on-lattice kinetic Monte-Carlo simulation. The data sets refer to solid fractions from 0.22 to 0.50, providing for different initial connectivity of the bicontinuous structures. Coarsening at two temperatures, 900 K and 1800 K, explores two different degrees of surface energy anisotropy - more faceted at 900 K and more rough at 1800 K. Each structure takes the form of a face-centred cubic lattice with approximately 32 million sites. A site can be occupied by either void or atom. 3D periodic boundary conditions are satisfied. Tables list each structure's properties, and specifically the specific surface area, two different measures for the ligament size, the net topological genus as well as the scaled genus. The atom coordinate files may serve as the basis for geometry analysis and for atomistic as well as finite element simulation studies of nanoporous as well as spinodally decomposed materials. The data sets are accessible via the TORE repository at http://hdl.handle.net/11420/3253.",2019-12-24 +31481668,"Success of Montreal Protocol Demonstrated by Comparing High-Quality UV Measurements with ""World Avoided"" Calculations from Two Chemistry-Climate Models.","The Montreal Protocol on Substances that Deplete the Ozone Layer has been hailed as the most successful environmental treaty ever ( https://www.unenvironment.org/news-and-stories/story/montreal-protocol-triumph-treaty ). Yet, although our main concern about ozone depletion is the subsequent increase in harmful solar UV radiation at the Earth's surface, no studies to date have demonstrated its effectiveness in that regard. Here we use long-term UV Index (UVI) data derived from high-quality UV spectroradiometer measurements to demonstrate its success in curbing increases in UV radiation. Without this landmark agreement, UVI values would have increased at mid-latitude locations by approximately 20% between the early 1990s and today and would approximately quadruple at mid-latitudes by 2100. In contrast, an analysis of UVI data from multiple clean-air sites shows that maximum daily UVI values have remained essentially constant over the last ~20 years in all seasons, and may even have decreased slightly in the southern hemisphere, especially in Antarctica, where effects of ozone depletion were larger. Reconstructions of the UVI from total ozone data show evidence of increasing UVI levels in the 1980s, but unfortunately, there are no high-quality UV measurements available prior to the early 1990s to confirm these increases with direct observations.",2019-09-03 +32513852,A 20-Mer Peptide Derived from the Lectin Domain of SP-A2 Decreases Tumor Necrosis Factor Alpha Production during Mycoplasma pneumoniae Infection. ,"Human surfactant protein-A2 (hSP-A2) is a component of pulmonary surfactant that plays an important role in the lung's immune system by interacting with viruses, bacteria, and fungi to facilitate pathogen clearance and by downregulating inflammatory responses after an allergic challenge. Genetic variation in SP-A2 at position Gln223Lys is present in up to ∼30% of the population and has been associated with several lung diseases, such as asthma, pulmonary fibrosis, and lung cancer (M. M. Pettigrew, J. F. Gent, Y. Zhu, E. W. Triche, et al., BMC Med Genet 8:15, 2007, https://bmcmedgenet.biomedcentral.com/articles/10.1186/1471-2350-8-15; Y. Wang, P. J. Kuan, C. Zing, J. T. Cronkhite, et al., Am J Hum Genet 84:52-59, 2009, https://www.cell.com/ajhg/fulltext/S0002-9297(08)00595-8). Previous work performed by our group showed differences in levels of SP-A binding to non-live mycoplasma membrane fractions that were dependent on the presence of a lysine (K) or a glutamine (Q) at amino acid position 223 in the carbohydrate region of SP-A2. On the basis of these differences, we have derived 20-amino-acid peptides flanking this region of interest in order to test the ability of each to regulate various immune responses to live Mycoplasma pneumoniae in SP-A knockout mice and RAW 264.7 cells. In both models, the 20-mer containing 223Q significantly decreased both tumor necrosis factor alpha (TNF-α) mRNA levels and protein levels in comparison to the 20-mer containing 223K during M. pneumoniae infection. While neither of the 20-mer peptides (223Q and 223K) had an effect on p38 phosphorylation during M. pneumoniae infection, the 223Q-20mer peptide significantly reduced NF-κB p65 phosphorylation in both models. Taken together, our data suggest that small peptides derived from the lectin domain of SP-A2 that contain the major allelic variant (223Q) maintain activity in reducing TNF-α induction during M. pneumoniae infection.",2020-08-19 +32386226,Metabolic adaptation is not a major barrier to weight-loss maintenance.,"

Background

The existence of metabolic adaptation, at the level of resting metabolic rate (RMR), remains highly controversial, likely due to lack of standardization of participants' energy balance. Moreover, its role as a driver of relapse remains unproven.

Objective

The main aim was to determine if metabolic adaptation at the level of RMR was present after weight loss and at 1- and 2-y follow-up, with measurements taken under condition of weight stability. A secondary aim was to investigate race differences in metabolic adaptation after weight loss and if this phenomenon was associated with weight regain.

Methods

A total of 171 overweight women [BMI (kg/m2): 28.3 ± 1.3; age: 35.2 ± 6.3 y; 88 whites and 83 blacks] enrolled in a weight-loss program to achieve a BMI <25, and were followed for 2 y. Body weight and composition (4-compartment model) and RMR (indirect calorimetry) were measured after 4 wk of weight stability at baseline, after weight loss and at 1 and 2 y. Metabolic adaptation was defined as a significantly lower measured compared with predicted RMR (from own regression model).

Results

Participants lost, on average, 12 ± 2.6 kg and regained 52% ± 38% and 89% ± 54% of their initial weight lost at 1 and 2 y follow-up, respectively. Metabolic adaptation was found after weight loss (-54 ± 105 kcal/d; P < 0.001), with no difference between races and was positively correlated with fat-mass loss, but not with weight regain, overall. In a subset of women (n = 46) with data at all time points, metabolic adaptation was present after weight loss, but not at 1- or 2-y follow-up (-43 ± 119, P = 0.019; -18 ± 134, P = 0.380; and - 19 ± 166, P = 0.438 kcal/day respectively).

Conclusions

In overweight women, metabolic adaptation at the level of RMR is minimal when measurements are taken under conditions of weight stability and does not predict weight regain up to 2 years follow-up.The JULIET study is registered at https://clinicaltrials.gov/ct2/show/NCT00067873 as NCT00067873.",2020-09-01 +31452600,Top-Down and Intact Protein Mass Spectrometry Data Visualization for Proteoform Analysis Using VisioProt-MS.,"The rise of intact protein analysis by mass spectrometry (MS) was accompanied by an increasing need for flexible tools allowing data visualization and analysis. These include inspection of the deconvoluted molecular weights of the proteoforms eluted alongside liquid chromatography (LC) through their representation in three-dimensional (3D) liquid chromatography coupled to mass spectrometry (LC-MS) maps (plots of deconvoluted molecular weights, retention times, and intensity of the MS signal). With this aim, we developed a free and open-source web application named VisioProt-MS (https://masstools.ipbs.fr/mstools/visioprot-ms/). VisioProt-MS is highly compatible with many algorithms and software developed by the community to integrate and deconvolute top-down and intact protein MS data. Its dynamic and user-friendly features greatly facilitate analysis through several graphical representations dedicated to MS and tandem mass spectrometry (MS/MS) analysis of proteoforms in complex samples. Here, we will illustrate the importance of LC-MS map visualization to optimize top-down acquisition/search parameters and analyze intact protein MS data. We will go through the main features of VisioProt-MS using the human proteasomal 20S core particle as a user-case.",2019-08-16 +29868534,Global Health Education and Advocacy: Using BMJ Case Reports to Tackle the Social Determinants of Health.,"Since 2013, BMJ Case Reports (http://casereports.bmj.com/) has published over 70 global health case reports from five continents, written by doctors, nurses, students, and allied health professionals. These cases, a burgeoning repository of evidence of how real patients are affected by disease, trauma, violence, sexual assault, conflict, migration, adverse living and working conditions, and poor access to health care, discuss, in addition to clinicopathological findings, the global health problems affecting each patient. The global health problem analysis examines the problems of individual patients, critically appraises the literature, and describes actual and potential solutions for the patient, the local community, and patients affected by similar issues across the world. At present global health literature and learning materials lack a patient focus and real-life context in the analysis of global health problems. BMJ Case Reports global health case reports are a unique and important tool to learn about and advocate for change in the social, political, cultural, and financial determinants of health as they affect real patients. This growing evidence base brings together clinicians, local service providers, policy makers, and government and non-governmental institutions to effect real change in patients' lives toward improving health. Each global health case report is an excellent resource for learning, and together, these case reports provide essential reading for anyone embarking on a career in global health, and writing their own case report. The online course (http://casereports.bmj.com/site/misc/GHMA_Mar_2017.pptx) at BMJ Case Reports uses these cases and is free to access.",2018-05-07 +,Global range expansion of pest Lepidoptera requires socially acceptable solutions,"Caterpillars of key moth pests can cause significant losses in cropping systems worldwide, and globalization is spreading such pests. Failure to control some species can jeopardise the economics of food production. A Global Eradication and Response Database (http://b3.net.nz/gerda) was reviewed on known government-level incursion response programs specific to invasive Lepidoptera. Geographic range expansion of Lepidoptera was evident from 144 incursion response programs targeting 28 species in 10 families. The countries involved in responses to Lepidoptera were USA (104), Australia (8), Canada (7), New Zealand (6), Italy (3), Mexico (2), with the remainder with one programme each (Brazil, Czech Republic, France, Hungary, and Spain). Most programs have been undertaken since the 1990’s. Control options exist for the long-term management of Lepidoptera, but most have issues of cost, efficacy or non-target impacts that reduce their acceptance. Pheromone-based technologies are increasingly available and are generally highly compatible with other tactics. The development of tactics for new targets is a major undertaking, although previous programs can be invaluable. New and improved socially-acceptable technologies are needed to counteract range expansion in Lepidoptera, and usually need to be used in combinations to achieve eradication. The sterile insect technique, which involves mass-rearing and release of sterile insects to reduce wild populations of the pest, has been used successfully against a number of lepidopteran species. Several sterile moth programs are under development. New technologies must have a social license to operate in urban areas, where new incursions are frequently detected. This factor is likely to reduce tactical flexibility and increase the complexity of insect eradication.",2017-04-01 +30689732,DeepAMR for predicting co-occurrent resistance of Mycobacterium tuberculosis.,"

Motivation

Resistance co-occurrence within first-line anti-tuberculosis (TB) drugs is a common phenomenon. Existing methods based on genetic data analysis of Mycobacterium tuberculosis (MTB) have been able to predict resistance of MTB to individual drugs, but have not considered the resistance co-occurrence and cannot capture latent structure of genomic data that corresponds to lineages.

Results

We used a large cohort of TB patients from 16 countries across six continents where whole-genome sequences for each isolate and associated phenotype to anti-TB drugs were obtained using drug susceptibility testing recommended by the World Health Organization. We then proposed an end-to-end multi-task model with deep denoising auto-encoder (DeepAMR) for multiple drug classification and developed DeepAMR_cluster, a clustering variant based on DeepAMR, for learning clusters in latent space of the data. The results showed that DeepAMR outperformed baseline model and four machine learning models with mean AUROC from 94.4% to 98.7% for predicting resistance to four first-line drugs [i.e. isoniazid (INH), ethambutol (EMB), rifampicin (RIF), pyrazinamide (PZA)], multi-drug resistant TB (MDR-TB) and pan-susceptible TB (PANS-TB: MTB that is susceptible to all four first-line anti-TB drugs). In the case of INH, EMB, PZA and MDR-TB, DeepAMR achieved its best mean sensitivity of 94.3%, 91.5%, 87.3% and 96.3%, respectively. While in the case of RIF and PANS-TB, it generated 94.2% and 92.2% sensitivity, which were lower than baseline model by 0.7% and 1.9%, respectively. t-SNE visualization shows that DeepAMR_cluster captures lineage-related clusters in the latent space.

Availability and implementation

The details of source code are provided at http://www.robots.ox.ac.uk/∼davidc/code.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +30710333,CHAP: Open-source software for processing and analyzing pupillometry data.,"Pupil dilation is an effective indicator of cognitive and affective processes. Although several eyetracker systems on the market can provide effective solutions for pupil dilation measurement, there is a lack of tools for processing and analyzing the data provided by these systems. For this reason, we developed CHAP: open-source software written in MATLAB. This software provides a user-friendly graphical user interface for processing and analyzing pupillometry data. Our software creates uniform conventions for the preprocessing and analysis of pupillometry data and provides a quick and easy-to-use tool for researchers interested in pupillometry. To download CHAP or join our mailing list, please visit CHAP's website: http://in.bgu.ac.il/en/Labs/CNL/chap .",2019-06-01 +31913853,JIB.tools 2.0 - A Bioinformatics Registry for Journal Published Tools with Interoperability to bio.tools. ,"JIB.tools 2.0 is a new approach to more closely embed the curation process in the publication process. This website hosts the tools, software applications, databases and workflow systems published in the Journal of Integrative Bioinformatics (JIB). As soon as a new tool-related publication is published in JIB, the tool is posted to JIB.tools and can afterwards be easily transferred to bio.tools, a large information repository of software tools, databases and services for bioinformatics and the life sciences. In this way, an easily-accessible list of tools is provided which were published in JIB a well as status information regarding the underlying service. With newer registries like bio.tools providing these information on a bigger scale, JIB.tools 2.0 closes the gap between journal publications and registry publication. (Reference: https://jib.tools).",2020-01-08 +32197044,MAINMASTseg: Automated Map Segmentation Method for Cryo-EM Density Maps with Symmetry.,"For structural interpretation of cryo-electron microscopy (cryo-EM) density maps that contain multiple chains, map segmentation is an important step. If a map is segmented accurately into regions of individual protein components, the structure of each protein can be separately modeled using an existing modeling tool. Here, we developed new software, MAINMASTseg, for segmenting maps with symmetry. MAINMASTseg is an extension of the MAINMAST de novo cryo-EM protein structure modeling tool, which builds protein structures from a graph structure that captures the distribution of salient density points in the map. MAINMASTseg uses this graph and segments the map by considering symmetry corresponding density points in the graph. We tested MAINMASTseg on a data set of 38 experimentally determined EM density maps. MAINMASTseg successfully identified an individual protein unit for the majority of the maps, which was significantly better than two other popular existing methods, Segger and Phenix. The software is made freely available for academic users at http://kiharalab.org/mainmast_seg.",2020-03-30 +32154342,"Dataset on specifications, carcinogenic and non-carcinogenic risk of volatile organic compounds during recycling paper and cardboard.","Emissions of volatile organic compounds (VOCs) were studied during paper and cardboard recycling from a paper and cardboard solid waste recycling factory (PCSWRF). Data are summarized in this article for the following quantities for a PCSWRF during the winter in Tehran, Iran: VOC concentrations (μg m-3), the percentage of detected VOCs, exposure indices (Ei) of individual and total VOCs (TVOCs), inhalation lifetime cancer risk (LTCR) of VOCs, the hazard quotient (HQ) of VOCs, sensitivity analysis (SA) for VOC exposure in different age groups (birth to <81), and Spearman's rank correlation coefficients (r) between VOC concentrations and meteorological parameters. For more insight please see ""Characteristics and Health Effects of Volatile Organic Compound Emissions during Paper and Cardboard Recycling""[1], https://doi.org/10.1016/j.scs.2019.102005.",2020-02-17 +25145340,Mouse IDGenes: a reference database for genetic interactions in the developing mouse brain. ,"The study of developmental processes in the mouse and other vertebrates includes the understanding of patterning along the anterior-posterior, dorsal-ventral and medial- lateral axis. Specifically, neural development is also of great clinical relevance because several human neuropsychiatric disorders such as schizophrenia, autism disorders or drug addiction and also brain malformations are thought to have neurodevelopmental origins, i.e. pathogenesis initiates during childhood and adolescence. Impacts during early neurodevelopment might also predispose to late-onset neurodegenerative disorders, such as Parkinson's disease. The neural tube develops from its precursor tissue, the neural plate, in a patterning process that is determined by compartmentalization into morphogenetic units, the action of local signaling centers and a well-defined and locally restricted expression of genes and their interactions. While public databases provide gene expression data with spatio-temporal resolution, they usually neglect the genetic interactions that govern neural development. Here, we introduce Mouse IDGenes, a reference database for genetic interactions in the developing mouse brain. The database is highly curated and offers detailed information about gene expressions and the genetic interactions at the developing mid-/hindbrain boundary. To showcase the predictive power of interaction data, we infer new Wnt/β-catenin target genes by machine learning and validate one of them experimentally. The database is updated regularly. Moreover, it can easily be extended by the research community. Mouse IDGenes will contribute as an important resource to the research on mouse brain development, not exclusively by offering data retrieval, but also by allowing data input. http://mouseidgenes.helmholtz-muenchen.de.",2014-08-20 +29972695,Implications of Off-Target Serotoninergic Drug Activity: An Analysis of Serotonin Syndrome Reports Using a Systematic Bioinformatics Approach.,"

Study objective

Serotonergic adverse drug events (ADEs) are caused by enhanced intrasynaptic concentrations of 5-hydroxytryptamine (5-HT). No systematic process currently exists for evaluating cumulative 5-HT and off-target toxicity of serotonergic drugs. The primary study aim was to create a Serotonergic Expanded Bioactivity Matrix (SEBM) by using a molecular bioinformatics, polypharmacologic approach for assessment of the participation of individual 5-HT drugs in serotonin syndrome (SS) reports.

Data sources

Publicly available databases including the U.S. Food and Drug Administration (FDA) Adverse Event Reporting System (FAERS), ChEMBL, DrugBank, PubChem, and Kyoto Encyclopedia of Genes and Genomes (KEGG) were queried for computational and pharmacologic data.

Design

An in-house bioinformatics TargetSearch program ( http://dxulab.org/software) was used to characterize 71 serotonergic drugs interacting at 13 serotonin receptor subtypes and serotonin reuptake transporter protein (SERT). In addition, off-target interactions at norepinephrine transporter (NET), monoamine oxidase (MAO), and muscarinic receptors were included to define seven polypharmacological drug cohorts. Serotonin syndrome reports for each serotonergic drug were extracted from FAERS by using the Sternbach and Hunter criteria.

Measurements and main results

A proportional reporting adverse drug reaction (ADR) ratio (PRR) was calculated from each drug's total ADEs and SS case reports and aggregated by drug bioactivity cohorts. Triple-receptor interactions had a disproportionately higher number of SS cases using both the Hunter criteria (mean PRR 1.72, 95% CI 1.05-2.39) and Sternbach (mean PRR 1.54, 95% CI 1.29-1.79). 5-Hydroxytryptamine agonists were associated with a significantly lower proportion of SS cases using the Hunter and Sternbach criteria, respectively (mean PRR 0.49, 95% CI 0.17-0.81 and mean PRR 0.49, 95% CI 0.15-0.83). Drugs with disproportionately higher participation in SS vary considerably between the two diagnostic criteria.

Conclusion

The SEBM model suggests a possible polypharmacological role in SS. Although further research is needed, off-target receptor activity may help explain differences in severity of toxicity and clinical presentation.",2018-07-29 +22905274,Toxocariasis and epilepsy: systematic review and meta-analysis.,"

Objective

Human toxocariasis is a zoonotic infection caused by the larval stages of Toxocara canis (T. canis) and less frequently Toxocara cati (T. cati). A relationship between toxocariasis and epilepsy has been hypothesized. We conducted a systematic review and a meta-analysis of available data to evaluate the strength of association between epilepsy and Toxocara spp. seropositivity and to propose some guidelines for future surveys.

Data sources

Electronic databases, the database from the Institute of Neuroepidemiology and Tropical Neurology of the University of Limoges (http://www-ient.unilim.fr/) and the reference lists of all relevant papers and books were screened up to October 2011.

Methods

We performed a systematic review of literature on toxocariasis (the exposure) and epilepsy (the outcome). Two authors independently assessed eligibility and study quality and extracted data. A common odds ratio (OR) was estimated using a random-effects meta-analysis model of aggregated published data.

Results

Seven case-control studies met the inclusion criteria, for a total of 1867 participants (850 cases and 1017 controls). The percentage of seropositivity (presence of anti-Toxocara spp. antibodies) was higher among people with epilepsy (PWE) in all the included studies even if the association between epilepsy and Toxocara spp. seropositivity was statistically significant in only 4 studies, with crude ORs ranging 2.04-2.85. Another study bordered statistical significance, while in 2 of the included studies no significant association was found. A significant (p < 0.001) common OR of 1.92 [95% confidence interval (CI) 1.50-2.44] was estimated. Similar results were found when meta-analysis was restricted to the studies considering an exclusively juvenile population and to surveys using Western Blot as confirmatory or diagnostic serological assay.

Conclusion

Our results support the existence of a positive association between Toxocara spp. seropositivity and epilepsy. Further studies, possibly including incident cases, should be performed to better investigate the relationship between toxocariasis and epilepsy.",2012-08-14 +29059334,The MetaCyc database of metabolic pathways and enzymes.,"MetaCyc (https://MetaCyc.org) is a comprehensive reference database of metabolic pathways and enzymes from all domains of life. It contains more than 2570 pathways derived from >54 000 publications, making it the largest curated collection of metabolic pathways. The data in MetaCyc is strictly evidence-based and richly curated, resulting in an encyclopedic reference tool for metabolism. MetaCyc is also used as a knowledge base for generating thousands of organism-specific Pathway/Genome Databases (PGDBs), which are available in the BioCyc (https://BioCyc.org) and other PGDB collections. This article provides an update on the developments in MetaCyc during the past two years, including the expansion of data and addition of new features.",2018-01-01 +28961691,3DBIONOTES v2.0: a web server for the automatic annotation of macromolecular structures.,"

Motivation

Complementing structural information with biochemical and biomedical annotations is a powerful approach to explore the biological function of macromolecular complexes. However, currently the compilation of annotations and structural data is a feature only available for those structures that have been released as entries to the Protein Data Bank.

Results

To help researchers in assessing the consistency between structures and biological annotations for structural models not deposited in databases, we present 3DBIONOTES v2.0, a web application designed for the automatic annotation of biochemical and biomedical information onto macromolecular structural models determined by any experimental or computational technique.

Availability and implementation

The web server is available at http://3dbionotes-ws.cnb.csic.es.

Contact

jsegura@cnb.csic.es.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +30753279,Variational infinite heterogeneous mixture model for semi-supervised clustering of heart enhancers.,"

Motivation

Mammalian genomes can contain thousands of enhancers but only a subset are actively driving gene expression in a given cellular context. Integrated genomic datasets can be harnessed to predict active enhancers. One challenge in integration of large genomic datasets is the increasing heterogeneity: continuous, binary and discrete features may all be relevant. Coupled with the typically small numbers of training examples, semi-supervised approaches for heterogeneous data are needed; however, current enhancer prediction methods are not designed to handle heterogeneous data in the semi-supervised paradigm.

Results

We implemented a Dirichlet Process Heterogeneous Mixture model that infers Gaussian, Bernoulli and Poisson distributions over features. We derived a novel variational inference algorithm to handle semi-supervised learning tasks where certain observations are forced to cluster together. We applied this model to enhancer candidates in mouse heart tissues based on heterogeneous features. We constrained a small number of known active enhancers to appear in the same cluster, and 47 additional regions clustered with them. Many of these are located near heart-specific genes. The model also predicted 1176 active promoters, suggesting that it can discover new enhancers and promoters.

Availability and implementation

We created the 'dphmix' Python package: https://pypi.org/project/dphmix/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +31534701,"Fitness costs and benefits vary for two facultative Burkholderia symbionts of the social amoeba, Dictyostelium discoideum.","Hosts and their associated microbes can enter into different relationships, which can range from mutualism, where both partners benefit, to exploitation, where one partner benefits at the expense of the other. Many host-microbe relationships have been presumed to be mutualistic, but frequently only benefits to the host, and not the microbial symbiont, have been considered. Here, we address this issue by looking at the effect of host association on the fitness of two facultative members of the Dictyostelium discoideum microbiome (Burkholderia agricolaris and Burkholderia hayleyella). Using two indicators of bacterial fitness, growth rate and abundance, we determined the effect of D. discoideum on Burkholderia fitness. In liquid culture, we found that D. discoideum amoebas lowered the growth rate of both Burkholderia species. In soil microcosms, we tracked the abundance of Burkholderia grown with and without D. discoideum over a month and found that B. hayleyella had larger populations when associating with D. discoideum while B. agricolaris was not significantly affected. Overall, we find that both B. agricolaris and B. hayleyella pay a cost to associate with D. discoideum, but B. hayleyella can also benefit under some conditions. Understanding how fitness varies in facultative symbionts will help us understand the persistence of host-symbiont relationships.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://openscholarship.wustl.edu/data/15/.",2019-08-15 +31074494,MINERVA API and plugins: opening molecular network analysis and visualization to the community.,"

Summary

The complexity of molecular networks makes them difficult to navigate and interpret, creating a need for specialized software. MINERVA is a web platform for visualization, exploration and management of molecular networks. Here, we introduce an extension to MINERVA architecture that greatly facilitates the access and use of the stored molecular network data. It allows to incorporate such data in analytical pipelines via a programmatic access interface, and to extend the platform's visual exploration and analytics functionality via plugin architecture. This is possible for any molecular network hosted by the MINERVA platform encoded in well-recognized systems biology formats. To showcase the possibilities of the plugin architecture, we have developed several plugins extending the MINERVA core functionalities. In the article, we demonstrate the plugins for interactive tree traversal of molecular networks, for enrichment analysis and for mapping and visualization of known disease variants or known adverse drug reactions to molecules in the network.

Availability and implementation

Plugins developed and maintained by the MINERVA team are available under the AGPL v3 license at https://git-r3lab.uni.lu/minerva/plugins/. The MINERVA API and plugin documentation is available at https://minerva-web.lcsb.uni.lu.",2019-11-01 +32062916,"[Correlation analysis on meteorological factors regarding the incidence of hand, foot and mouth disease in Xinjiang Uygur Autonomous Region, 2011-2018].","Objective: To study the correlation between meteorological factors and the incidence of hand, foot and mouth disease (HFMD) in Xinjiang Uygur Autonomous Region (Xinjiang) so as to provide scientific evidence for the early warning, prediction, prevention and control of HFMD. Methods: Data on HFMD surveillance and related population was collected from the China Information System for Disease Control and Prevention from 2011 to 2018. Meteorological data was obtained from http://www.tianqihoubao.com. Correlation analysis on meteorological factors and the incidence of HFMD in Xinjiang was conducted, using the Excel 2007, SPSS 17.0, and Spatial Distribution Map by ArcGIS 10.2 software. Results: HFMD usually occurred between April and July. Numbers of patients reached the top in May and June. Temperature was positively correlated with the incidence of HFMD (r=0.370, P<0.01) while precipitation was positively correlated with the incidence of HFMD (r=0.747, P<0.01). The temperature threshold appeared as 5 ℃-35 ℃ for the incidence of HFMD. Interval period was one month between the peak of both the incidence of HFMD and the precipitation. A power function relationship (y=0.009 4x(2.332 9), R(2)=0.898 9) was noticed between the precipitation and the incidence of HFMD. Conclusions: The incidence of HFMD was closely related to the meteorological factors including temperature and precipitation in Xinjiang during 2011-2018. Our findings have provided evidence for the development of early warning system on HFMD in Xinjiang.",2019-12-01 +25442502,De novo prediction of cis-regulatory elements and modules through integrative analysis of a large number of ChIP datasets.,"

Background

In eukaryotes, transcriptional regulation is usually mediated by interactions of multiple transcription factors (TFs) with their respective specific cis-regulatory elements (CREs) in the so-called cis-regulatory modules (CRMs) in DNA. Although the knowledge of CREs and CRMs in a genome is crucial to elucidate gene regulatory networks and understand many important biological phenomena, little is known about the CREs and CRMs in most eukaryotic genomes due to the difficulty to characterize them by either computational or traditional experimental methods. However, the exponentially increasing number of TF binding location data produced by the recent wide adaptation of chromatin immunoprecipitation coupled with microarray hybridization (ChIP-chip) or high-throughput sequencing (ChIP-seq) technologies has provided an unprecedented opportunity to identify CRMs and CREs in genomes. Nonetheless, how to effectively mine these large volumes of ChIP data to identify CREs and CRMs at nucleotide resolution is a highly challenging task.

Results

We have developed a novel graph-theoretic based algorithm DePCRM for genome-wide de novo predictions of CREs and CRMs using a large number of ChIP datasets. DePCRM predicts CREs and CRMs by identifying overrepresented combinatorial CRE motif patterns in multiple ChIP datasets in an effective way. When applied to 168 ChIP datasets of 56 TFs from D. melanogaster, DePCRM identified 184 and 746 overrepresented CRE motifs and their combinatorial patterns, respectively, and predicted a total of 115,932 CRMs in the genome. The predictions recover 77.9% of known CRMs in the datasets and 89.3% of known CRMs containing at least one predicted CRE. We found that the putative CRMs as well as CREs as a whole in a CRM are more conserved than randomly selected sequences.

Conclusion

Our results suggest that the CRMs predicted by DePCRM are highly likely to be functional. Our algorithm is the first of its kind for de novo genome-wide prediction of CREs and CRMs using larger number of transcription factor ChIP datasets. The algorithm and predictions will hopefully facilitate the elucidation of gene regulatory networks in eukaryotes. All the predicted CREs, CRMs, and their target genes are available at http://bioinfo.uncc.edu/mniu/pcrms/www/.",2014-12-02 +25593348,"PlasmoGEM, a database supporting a community resource for large-scale experimental genetics in malaria parasites.","The Plasmodium Genetic Modification (PlasmoGEM) database (http://plasmogem.sanger.ac.uk) provides access to a resource of modular, versatile and adaptable vectors for genome modification of Plasmodium spp. parasites. PlasmoGEM currently consists of >2000 plasmids designed to modify the genome of Plasmodium berghei, a malaria parasite of rodents, which can be requested by non-profit research organisations free of charge. PlasmoGEM vectors are designed with long homology arms for efficient genome integration and carry gene specific barcodes to identify individual mutants. They can be used for a wide array of applications, including protein localisation, gene interaction studies and high-throughput genetic screens. The vector production pipeline is supported by a custom software suite that automates both the vector design process and quality control by full-length sequencing of the finished vectors. The PlasmoGEM web interface allows users to search a database of finished knock-out and gene tagging vectors, view details of their designs, download vector sequence in different formats and view available quality control data as well as suggested genotyping strategies. We also make gDNA library clones and intermediate vectors available for researchers to produce vectors for themselves.",2015-01-01 +33133406,Does the Femoral Head Size Influence Outcomes After Uncemented Total Hip Arthroplasty for Fused Hips? A Prospective Study in Ankylosing Spondylitis.,"

Background

Uncemented total hip arthroplasty (THA) with large size femoral heads have shown greater advantage with good stability, range of motion and decreased dislocation rate in ankylosing spondylitis (AS). Meticulous planning is needed to address the unique surgical challenges in such patients with fused hip and spinal deformity.

Materials and methods

Thirty fivefused hip joints in twenty-five AS patients who underwent uncemented THA (April 2014 to December 2016) were included in our prospective study and were followed up for a minimum period of 36 months. Pain relief, functional improvement and patient satisfaction were statistically assessed using ""Visual Analogue Score"" (VAS), ""Harris Hip Score"" (HHS) and ""AJRI 10-Point Satisfaction Score"" (A10PSS), respectively.

Results

The overall mean preoperative VAS improved from 6.9 ± 1.5 to 1.5 ± 1, HHS improved from 50.0 ± 12 to 88.4 ± 7.8 and A10PSS improved from 2.2 ± 1.2 to 7.6 ± 0.8. Our study results were significant with zero dislocation and good functional score in comparison to the other available studies in literature. First subdivision study in AS patients with bilateral THA performed better than unilateral THA. Second subdivision study showed no significant statistical difference in terms of VAS, HHS, A10PSS and dislocation rate in relation to femoral head size between 32 mm, 36 mm and 40 mm.

Conclusion

Uncemented THA with large size femoral head equal or greater than 32 mm provides better stability and good functional outcome with less dislocation rate in comparison to older studies of literature with femoral head size less than 32 mm.

Level of evidence

A Level II study. (Data collected from the ongoing prospective study) (https://www.spine.org/Documents/LevelsofEvidenceFinal.pdf).",2020-08-02 +24569397,PCMdb: pancreatic cancer methylation database.,"Pancreatic cancer is the fifth most aggressive malignancy and urgently requires new biomarkers to facilitate early detection. For providing impetus to the biomarker discovery, we have developed Pancreatic Cancer Methylation Database (PCMDB, http://crdd.osdd.net/raghava/pcmdb/), a comprehensive resource dedicated to methylation of genes in pancreatic cancer. Data was collected and compiled manually from published literature. PCMdb has 65907 entries for methylation status of 4342 unique genes. In PCMdb, data was compiled for both cancer cell lines (53565 entries for 88 cell lines) and cancer tissues (12342 entries for 3078 tissue samples). Among these entries, 47.22% entries reported a high level of methylation for the corresponding genes while 10.87% entries reported low level of methylation. PCMdb covers five major subtypes of pancreatic cancer; however, most of the entries were compiled for adenocarcinomas (88.38%) and mucinous neoplasms (5.76%). A user-friendly interface has been developed for data browsing, searching and analysis. We anticipate that PCMdb will be helpful for pancreatic cancer biomarker discovery.",2014-02-26 +22369513,Analysis of 16S rRNA environmental sequences using MEGAN.,"

Background

Metagenomics is a rapidly growing field of research aimed at studying assemblages of uncultured organisms using various sequencing technologies, with the hope of understanding the true diversity of microbes, their functions, cooperation and evolution. There are two main approaches to metagenomics: amplicon sequencing, which involves PCR-targeted sequencing of a specific locus, often 16S rRNA, and random shotgun sequencing. Several tools or packages have been developed for analyzing communities using 16S rRNA sequences. Similarly, a number of tools exist for analyzing randomly sequenced DNA reads.

Results

We describe an extension of the metagenome analysis tool MEGAN, which allows one to analyze 16S sequences. For the analysis all 16S sequences are blasted against the SILVA database. The result output is imported into MEGAN, using a synonym file that maps the SILVA accession numbers onto the NCBI taxonomy.

Conclusions

Environmental samples are often studied using both targeted 16S rRNA sequencing and random shotgun sequencing. Hence tools are needed that allow one to analyze both types of data together, and one such tool is MEGAN. The ideas presented in this paper are implemented in MEGAN 4, which is available from: http://www-ab.informatik.uni-tuebingen.de/software/megan.",2011-11-30 +31508459,Lipid profiling dataset of the Wnt3a-induced optic nerve regeneration.,"We present lipid profiling data from mouse retina and optic nerve after optic nerve crush and during Wnt3a-induced axonal regeneration at 7 and 15 days post-crush. This data is available at the Metabolomics Workbench, http://www.metabolomicsworkbench.org (Project ID: PR000718).",2019-05-24 +33019050,'Write' but not 'spell' Chinese characters with a BCI-controlled robot.,"Visual brain-computer interface (BCI) systems have made tremendous process in recent years. It has been demonstrated to perform well in spelling words. However, different from spelling English words in one-dimension sequences, Chinese characters are often written in a two-dimensional structure. Previous studies had never investigated how to use BCI to 'write' but not 'spell' Chinese characters. This study developed an innovative BCI-controlled robot for writing Chinese characters. The BCI system contained 108 commands displayed in a 9*12 array. A pixel-based writing method was proposed to map the starting point and ending point of each stroke of Chinese characters to the array. Connecting the starting and ending points for each stroke can make up any Chinese character. The large command set was encoded by the hybrid P300 and SSVEP features efficiently, in which each output needed only 1s of EEG data. The task-related component analysis was used to decode the combined features. Five subjects participated in this study and achieved an average accuracy of 87.23% and a maximal accuracy of 100%. The corresponding information transfer rate was 56.85 bits/min and 71.10 bits/min, respectively. The BCI-controlled robotic arm could write a Chinese character '' with 16 strokes within 5.7 seconds for the best subject. The demo video can be found at https://www.youtube.com/watch?v=A1w-e2dBGl0. The study results demonstrated that the proposed BCI-controlled robot is efficient for writing ideogram (e.g. Chinese characters) and phonogram (e.g. English letter), leading to broad prospects for real-world applications of BCIs.",2020-07-01 +32752637,Rigorous numerics for critical orbits in the quadratic family.,"We develop algorithms and techniques to compute rigorous bounds for finite pieces of orbits of the critical points, for intervals of parameter values, in the quadratic family of one-dimensional maps fa(x)=a-x2. We illustrate the effectiveness of our approach by constructing a dynamically defined partition P of the parameter interval Ω=[1.4,2] into almost 4×106 subintervals, for each of which we compute to high precision the orbits of the critical points up to some time N and other dynamically relevant quantities, several of which can vary greatly, possibly spanning several orders of magnitude. We also subdivide P into a family P+ of intervals, which we call stochastic intervals, and a family P- of intervals, which we call regular intervals. We numerically prove that each interval ω∈P+ has an escape time, which roughly means that some iterate of the critical point taken over all the parameters in ω has considerable width in the phase space. This suggests, in turn, that most parameters belonging to the intervals in P+ are stochastic and most parameters belonging to the intervals in P- are regular, thus the names. We prove that the intervals in P+ occupy almost 90% of the total measure of Ω. The software and the data are freely available at http://www.pawelpilarczyk.com/quadr/, and a web page is provided for carrying out the calculations. The ideas and procedures can be easily generalized to apply to other parameterized families of dynamical systems.",2020-07-01 +31004480,BHap: a novel approach for bacterial haplotype reconstruction.,"

Motivation

The bacterial haplotype reconstruction is critical for selecting proper treatments for diseases caused by unknown haplotypes. Existing methods and tools do not work well on this task, because they are usually developed for viral instead of bacterial populations.

Results

In this study, we developed BHap, a novel algorithm based on fuzzy flow networks, for reconstructing bacterial haplotypes from next generation sequencing data. Tested on simulated and experimental datasets, we showed that BHap was capable of reconstructing haplotypes of bacterial populations with an average F1 score of 0.87, an average precision of 0.87 and an average recall of 0.88. We also demonstrated that BHap had a low susceptibility to sequencing errors, was capable of reconstructing haplotypes with low coverage and could handle a wide range of mutation rates. Compared with existing approaches, BHap outperformed them in terms of higher F1 scores, better precision, better recall and more accurate estimation of the number of haplotypes.

Availability and implementation

The BHap tool is available at http://www.cs.ucf.edu/∼xiaoman/BHap/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +25270086,PIGD: a database for intronless genes in the Poaceae.,"

Background

Intronless genes are a feature of prokaryotes; however, they are widespread and unequally distributed among eukaryotes and represent an important resource to study the evolution of gene architecture. Although many databases on exons and introns exist, there is currently no cohesive database that collects intronless genes in plants into a single database.

Description

In this study, we present the Poaceae Intronless Genes Database (PIGD), a user-friendly web interface to explore information on intronless genes from different plants. Five Poaceae species, Sorghum bicolor, Zea mays, Setaria italica, Panicum virgatum and Brachypodium distachyon, are included in the current release of PIGD. Gene annotations and sequence data were collected and integrated from different databases. The primary focus of this study was to provide gene descriptions and gene product records. In addition, functional annotations, subcellular localization prediction and taxonomic distribution are reported. PIGD allows users to readily browse, search and download data. BLAST and comparative analyses are also provided through this online database, which is available at http://pigd.ahau.edu.cn/.

Conclusion

PIGD provides a solid platform for the collection, integration and analysis of intronless genes in the Poaceae. As such, this database will be useful for subsequent bio-computational analysis in comparative genomics and evolutionary studies.",2014-10-01 +32004161,Burden of fungal infections in Iran.,"INTRODUCTION:The number of fungal infections occurring each year in Iran is not known. As the burden of fungal disease is a measure used to assess and compare the relative impact of different type of fungal diseases on populations, we have estimated the burden of fungal diseases in Iran. METHODOLOGY:We estimated the burden of human fungal diseases based on the specific populations at risk, existing epidemiological data in both local and international databases, and modelling previously described by the LIFE program (http://www.LIFE-worldwide.org). RESULTS:Among the population of Iran (79,926,270 in 2016), 6,670,813 (8.3%) individuals are estimated to suffer from a fungal infection each year. A total of 2,791,568 women aged between 15 and 50 years are estimated to suffer from recurrent vulvovaginal candidiasis, annually. In addition, considering the 13.3% prevalence rate of tinea capitis in children, a total of 2,552,624 cases per year are estimated. The estimated burden of invasive aspergillosis in the 3 groups of patients with hematologic malignancy, lung cancer and chronic pulmonary obstructive disease was 6394 (8.0 per 100,000). The estimate for the burden of allergic disease related to fungi including allergic bronchopulmonary aspergillosis, severe asthma with fungal sensitization and allergic fungal rhinosinusitis was 272,095 (340 per 100,000). Based on the 28,663 cases of HIV infection reported, an estimated 900 and 113 cases with pneumocystosis and cryptococcal meningitis are annually anticipated, respectively. CONCLUSION:Our estimates indicate that the importance of fungal infections is high but overlooked in Iran, which warrants further actions by health care authorities.",2018-10-31 +30285084,Large-scale comparative assessment of computational predictors for lysine post-translational modification sites.,"Lysine post-translational modifications (PTMs) play a crucial role in regulating diverse functions and biological processes of proteins. However, because of the large volumes of sequencing data generated from genome-sequencing projects, systematic identification of different types of lysine PTM substrates and PTM sites in the entire proteome remains a major challenge. In recent years, a number of computational methods for lysine PTM identification have been developed. These methods show high diversity in their core algorithms, features extracted and feature selection techniques and evaluation strategies. There is therefore an urgent need to revisit these methods and summarize their methodologies, to improve and further develop computational techniques to identify and characterize lysine PTMs from the large amounts of sequence data. With this goal in mind, we first provide a comprehensive survey on a large collection of 49 state-of-the-art approaches for lysine PTM prediction. We cover a variety of important aspects that are crucial for the development of successful predictors, including operating algorithms, sequence and structural features, feature selection, model performance evaluation and software utility. We further provide our thoughts on potential strategies to improve the model performance. Second, in order to examine the feasibility of using deep learning for lysine PTM prediction, we propose a novel computational framework, termed MUscADEL (Multiple Scalable Accurate Deep Learner for lysine PTMs), using deep, bidirectional, long short-term memory recurrent neural networks for accurate and systematic mapping of eight major types of lysine PTMs in the human and mouse proteomes. Extensive benchmarking tests show that MUscADEL outperforms current methods for lysine PTM characterization, demonstrating the potential and power of deep learning techniques in protein PTM prediction. The web server of MUscADEL, together with all the data sets assembled in this study, is freely available at http://muscadel.erc.monash.edu/. We anticipate this comprehensive review and the application of deep learning will provide practical guide and useful insights into PTM prediction and inspire future bioinformatics studies in the related fields.",2019-11-01 +29802319,Frequency of genetic variants associated with arrhythmogenic right ventricular cardiomyopathy in the genome aggregation database.,"Arrhythmogenic right ventricular cardiomyopathy (ARVC) is a rare inherited heart-muscle disorder, which is the most common cause of life-threatening arrhythmias and sudden cardiac death (SCD) in young adults and athletes. Early and accurate diagnosis can be crucial in effective ARVC management and prevention of SCD.The genome Aggregation Database (gnomAD) population of 138,632 unrelated individuals was searched for previously identified ARVC variants, classified as pathogenic or unknown on the disease genetic variant database ( http://www.arvcdatabase.info/ ), in five most-commonly mutated genes: PKP2, DSP, DSG2, DSC2 and JUP, where variants account for 40-50% of all the ARVC cases. Minor allele frequency (MAF) of 0.001 was used to define variants as rare or common.The gnomAD data contained 117/364 (32%) of the previously reported pathogenic and 152/266 (57%) of the unknown ARVC variants. The cross-ethnic analysis of MAF revealed that 11 previously classified pathogenic and 57 unknown variants were common (MAF ≥ 0.001) in at least one ethnic gnomAD population and therefore unlikely to be ARVC causing.After applying our MAF analysis the overall frequency of pathogenic ARVC variants in gnomAD was one in 257 individuals, but a more stringent cut-off (MAF ≥ 0.0001) gave a frequency of one in 845, closer to the estimated phenotypic frequency of the disease.Our study demonstrates that the analysis of large cross-ethnic population sequencing data can significantly improve disease variant interpretation. Higher than expected frequency of ARVC variants suggests that a proportion of ARVC-causing variants may be inaccurately classified, implying reduced penetrance of some variants, and/or a polygenic aetiology of ARVC.",2018-05-25 +32018168,"Design of (quinolin-4-ylthio)carboxylic acids as new Escherichia coli DNA gyrase B inhibitors: machine learning studies, molecular docking, synthesis and biological testing.","Spread of multidrug-resistant Escherichia coli clinical isolates is a main problem in the treatment of infectious diseases. Therefore, the modern scientific approaches in decision this problem require not only a prevention strategy, but also the development of new effective inhibitory compounds with selective molecular mechanism of action and low toxicity. The goal of this work is to identify more potent molecules active against E. coli strains by using machine learning, docking studies, synthesis and biological evaluation. A set of predictive QSAR models was built with two publicly available structurally diverse data sets, including recent data deposited in PubChem. The predictive ability of these models tested by a 5-fold cross-validation, resulted in balanced accuracies (BA) of 59-98% for the binary classifiers. Test sets validation showed that the models could be instrumental in predicting the antimicrobial activity with an accuracy (with BA = 60-99 %) within the applicability domain. The models were applied to screen a virtual chemical library, which was designed to have activity against resistant E. coli strains. The eight most promising compounds were identified, synthesized and tested. All of them showed the different levels of anti-E. coli activity and acute toxicity. The docking results have shown that all studied compounds are potential DNA gyrase inhibitors through the estimated interactions with amino acid residues and magnesium ion in the enzyme active center The synthesized compounds could be used as an interesting starting point for further development of drugs with low toxicity and selective molecular action mechanism against resistant E. coli strains. The developed QSAR models are freely available online at OCHEM http://ochem.eu/article/112525 and can be used to virtual screening of potential compounds with anti-E. coli activity.",2020-01-24 +30821317,Functional geometry of protein interactomes.,"MOTIVATION:Protein-protein interactions (PPIs) are usually modeled as networks. These networks have extensively been studied using graphlets, small induced subgraphs capturing the local wiring patterns around nodes in networks. They revealed that proteins involved in similar functions tend to be similarly wired. However, such simple models can only represent pairwise relationships and cannot fully capture the higher-order organization of protein interactomes, including protein complexes. RESULTS:To model the multi-scale organization of these complex biological systems, we utilize simplicial complexes from computational geometry. The question is how to mine these new representations of protein interactomes to reveal additional biological information. To address this, we define simplets, a generalization of graphlets to simplicial complexes. By using simplets, we define a sensitive measure of similarity between simplicial complex representations that allows for clustering them according to their data types better than clustering them by using other state-of-the-art measures, e.g. spectral distance, or facet distribution distance. We model human and baker's yeast protein interactomes as simplicial complexes that capture PPIs and protein complexes as simplices. On these models, we show that our newly introduced simplet-based methods cluster proteins by function better than the clustering methods that use the standard PPI networks, uncovering the new underlying functional organization of the cell. We demonstrate the existence of the functional geometry in the protein interactome data and the superiority of our simplet-based methods to effectively mine for new biological information hidden in the complexity of the higher-order organization of protein interactomes. AVAILABILITY AND IMPLEMENTATION:Codes and datasets are freely available at http://www0.cs.ucl.ac.uk/staff/natasa/Simplets/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-10-01 +29547883,REStLESS: automated translation of glycan sequences from residue-based notation to SMILES and atomic coordinates.,"Motivation:Glycans and glycoconjugates are usually recorded in dedicated databases in residue-based notations. Only a few of them can be converted into chemical (atom-based) formats highly demanded in conformational and biochemical studies. In this work, we present a tool for translation from a residue-based glycan notation to SMILES. Results:The REStLESS algorithm for translation from the CSDB Linear notation to SMILES was developed. REStLESS stands for ResiduEs as Smiles and LinkagEs as SmartS, where SMARTS reaction expressions are used to merge pre-encoded residues into a molecule. The implementation supports virtually all structural features reported in natural carbohydrates and glycoconjugates. The translator is equipped with a mechanism for conversion of SMILES strings into optimized atomic coordinates which can be used as starting geometries for various computational tasks. Availability and implementation:REStLESS is integrated in the Carbohydrate Structure Database (CSDB) and is freely available on the web (http://csdb.glycoscience.ru/csdb2atoms.html). Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +32530942,Negshell casting: 3D-printed structured and sacrificial cores for soft robot fabrication.,"Soft robot fabrication by casting liquid elastomer often requires multiple steps of casting or skillful manual labor. We present a novel soft robotic fabrication technique: negshell casting (negative-space eggshell casting), that reduces the steps required for fabrication by introducing 3D-printed thin-walled cores for use in casting that are meant to be left in place instead of being removed later in the fabrication process. Negshell casting consists of two types of cores: sacrificial cores (negshell cores) and structural cores. Negshell cores are designed to be broken into small pieces that have little effect on the mechanical structure of the soft robot, and can be used for creating fluidic channels and bellows for actuation. Structural cores, on the other hand, are not meant to be broken, and are for increasing the stiffness of soft robotic structures, such as endoskeletons. We describe the design and fabrication concepts for both types of cores and report the mechanical characterization of the cores embedded in silicone rubber specimens. We also present an example use-case of negshell casting for a single joint soft robotic finger, along with an experiment to demonstrate how negshell casting concepts can aid in force transmission. Finally, we present real-world usage of negshell casting in a 6 degree-of-freedom three-finger soft robotic gripper, and a demonstration of the gripper in a robotic pick-and-place task. A companion website with further details about fabrication (as well as an introduction to molding and casting for those who are unfamiliar with the terms), engineering file downloads, and experimental data is provided at https://negshell.github.io/.",2020-06-12 +31296206,CoMutPlotter: a web tool for visual summary of mutations in cancer cohorts.,"

Background

CoMut plot is widely used in cancer research publications as a visual summary of mutational landscapes in cancer cohorts. This summary plot can inspect gene mutation rate and sample mutation burden with their relevant clinical details, which is a common first step for analyzing the recurrence and co-occurrence of gene mutations across samples. The cBioPortal and iCoMut are two web-based tools that allow users to create intricate visualizations from pre-loaded TCGA and ICGC data. For custom data analysis, only limited command-line packages are available now, making the production of CoMut plots difficult to achieve, especially for researchers without advanced bioinformatics skills. To address the needs for custom data and TCGA/ICGC data comparison, we have created CoMutPlotter, a web-based tool for the production of publication-quality graphs in an easy-of-use and automatic manner.

Results

We introduce a web-based tool named CoMutPlotter to lower the barriers between complex cancer genomic data and researchers, providing intuitive access to mutational profiles from TCGA/ICGC projects as well as custom cohort studies. A wide variety of file formats are supported by CoMutPlotter to translate cancer mutation profiles into biological insights and clinical applications, which include Mutation Annotation Format (MAF), Tab-separated values (TSV) and Variant Call Format (VCF) files.

Conclusions

In summary, CoMutPlotter is the first tool of its kind that supports VCF file, the most widely used file format, as its input material. CoMutPlotter also provides the most-wanted function for comparing mutation patterns between custom cohort and TCGA/ICGC project. Contributions of COSMIC mutational signatures in individual samples are also included in the summary plot, which is a unique feature of our tool. CoMutPlotter is freely available at http://tardis.cgu.edu.tw/comutplotter .",2019-07-11 +32730235,"Essential Components of a Public Health Tuberculosis Prevention, Control, and Elimination Program: Recommendations of the Advisory Council for the Elimination of Tuberculosis and the National Tuberculosis Controllers Association.","This report provides an introduction and reference tool for tuberculosis (TB) controllers regarding the essential components of a public health program to prevent, control, and eliminate TB. The Advisory Council for the Elimination of Tuberculosis and the National Tuberculosis Controllers Association recommendations in this report update those previously published (Advisory Council for the Elimination of Tuberculosis. Essential components of a tuberculosis prevention and control program. Recommendations of the Advisory Council for the Elimination of Tuberculosis. MMWR Recomm Rep 1995;44[No. RR-11]). The report has been written collaboratively on the basis of experience and expert opinion on approaches to organizing programs engaged in diagnosis, treatment, prevention, and surveillance for TB at state and local levels.This report reemphasizes the importance of well-established priority strategies for TB prevention and control: identification of and completion of treatment for persons with active TB disease; finding and screening persons who have had contact with TB patients; and screening, testing, and treatment of other selected persons and populations at high risk for latent TB infection (LTBI) and subsequent active TB disease.Health departments are responsible for public safety and population health. To meet their responsibilities, TB control programs should institute or ensure completion of numerous responsibilities and activities described in this report: preparing and maintaining an overall plan and policy for TB control; maintaining a surveillance system; collecting and analyzing data; participating in program evaluation and research; prioritizing TB control efforts; ensuring access to recommended laboratory and radiology tests; identifying, managing, and treating contacts and other persons at high risk for Mycobacterium tuberculosis infection; managing persons who have TB disease or who are being evaluated for TB disease; providing TB training and education; and collaborating in the coordination of patient care and other TB control activities. Descriptions of CDC-funded resources, tests for evaluation of persons with TB or LTBI, and treatment regimens for LTBI are provided (Supplementary Appendices; https://stacks.cdc.gov/view/cdc/90289).",2020-07-31 +29337142,Express: A database of transcriptome profiles encompassing known and novel transcripts across multiple development stages in eye tissues.,"Advances in sequencing have facilitated nucleotide-resolution genome-wide transcriptomic profiles across multiple mouse eye tissues. However, these RNA sequencing (RNA-seq) based eye developmental transcriptomes are not organized for easy public access, making any further analysis challenging. Here, we present a new database ""Express"" (http://www.iupui.edu/∼sysbio/express/) that unifies various mouse lens and retina RNA-seq data and provides user-friendly visualization of the transcriptome to facilitate gene discovery in the eye. We obtained RNA-seq data encompassing 7 developmental stages of lens in addition to that on isolated lens epithelial and fibers, as well as on 11 developmental stages of retina/isolated retinal rod photoreceptor cells from publicly available wild-type mouse datasets. These datasets were pre-processed, aligned, quantified and normalized for expression levels of known and novel transcripts using a unified expression quantification framework. Express provides heatmap and browser view allowing easy navigation of the genomic organization of transcripts or gene loci. Further, it allows users to search candidate genes and export both the visualizations and the embedded data to facilitate downstream analysis. We identified total of >81,000 transcripts in the lens and >178,000 transcripts in the retina across all the included developmental stages. This analysis revealed that a significant number of the retina-expressed transcripts are novel. Expression of several transcripts in the lens and retina across multiple developmental stages was independently validated by RT-qPCR for established genes such as Pax6 and Lhx2 as well as for new candidates such as Elavl4, Rbm5, Pabpc1, Tia1 and Tubb2b. Thus, Express serves as an effective portal for analyzing pruned RNA-seq expression datasets presently collected for the lens and retina. It will allow a wild-type context for the detailed analysis of targeted gene-knockout mouse ocular defect models and facilitate the prioritization of candidate genes from Exome-seq data of eye disease patients.",2018-01-11 +31463656,Modeling Differences Between Response Times of Correct and Incorrect Responses.,"While standard joint models for response time and accuracy commonly assume the relationship between response time and accuracy to be fully explained by the latent variables of the model, this assumption of conditional independence is often violated in practice. If such violations are present, taking these residual dependencies between response time and accuracy into account may both improve the fit of the model to the data and improve our understanding of the response processes that led to the observed responses. In this paper, we propose a framework for the joint modeling of response time and accuracy data that allows for differences in the processes leading to correct and incorrect responses. Extensions of the standard hierarchical model (van der Linden in Psychometrika 72:287-308, 2007. https://doi.org/10.1007/s11336-006-1478-z ) are considered that allow some or all item parameters in the measurement model of speed to differ depending on whether a correct or an incorrect response was obtained. The framework also allows one to consider models that include two speed latent variables, which explain the patterns observed in the responses times of correct and of incorrect responses, respectively. Model selection procedures are proposed and evaluated based on a simulation study, and a simulation study investigating parameter recovery is presented. An application of the modeling framework to empirical data from international large-scale assessment is considered to illustrate the relevance of modeling possible differences between the processes leading to correct and incorrect responses.",2019-08-28 +31971562,Generalizable sgRNA design for improved CRISPR/Cas9 editing efficiency.,"

Motivation

The development of clustered regularly interspaced short palindromic repeat (CRISPR)/CRISPR-associated protein 9 (Cas9) technology has provided a simple yet powerful system for targeted genome editing. In recent years, this system has been widely used for various gene editing applications. The CRISPR editing efficacy is mainly dependent on the single guide RNA (sgRNA), which guides Cas9 for genome cleavage. While there have been multiple attempts at improving sgRNA design, there is a pressing need for greater sgRNA potency and generalizability across various experimental conditions.

Results

We employed a unique plasmid library expressed in human cells to quantify the potency of thousands of CRISPR/Cas9 sgRNAs. Differential sequence and structural features among the most and least potent sgRNAs were then used to train a machine learning algorithm for assay design. Comparative analysis indicates that our new algorithm outperforms existing CRISPR/Cas9 sgRNA design tools.

Availability and implementation

The new sgRNA design tool is freely accessible as a web application, http://crispr.wustl.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-05-01 +24194601,HMDD v2.0: a database for experimentally supported human microRNA and disease associations.,"The Human microRNA Disease Database (HMDD; available via the Web site at http://cmbi.bjmu.edu.cn/hmdd and http://202.38.126.151/hmdd/tools/hmdd2.html) is a collection of experimentally supported human microRNA (miRNA) and disease associations. Here, we describe the HMDD v2.0 update that presented several novel options for users to facilitate exploration of the data in the database. In the updated database, miRNA-disease association data were annotated in more details. For example, miRNA-disease association data from genetics, epigenetics, circulating miRNAs and miRNA-target interactions were integrated into the database. In addition, HMDD v2.0 presented more data that were generated based on concepts derived from the miRNA-disease association data, including disease spectrum width of miRNAs and miRNA spectrum width of human diseases. Moreover, we provided users a link to download all the data in the HMDD v2.0 and a link to submit novel data into the database. Meanwhile, we also maintained the old version of HMDD. By keeping data sets up-to-date, HMDD should continue to serve as a valuable resource for investigating the roles of miRNAs in human disease.",2013-11-04 +32435427,Computational identification of N6-methyladenosine sites in multiple tissues of mammals.,"N6-methyladenosine (m6A) is the methylation of the adenosine at the nitrogen-6 position, which is the most abundant RNA methylation modification and involves a series of important biological processes. Accurate identification of m6A sites in genome-wide is invaluable for better understanding their biological functions. In this work, an ensemble predictor named iRNA-m6A was established to identify m6A sites in multiple tissues of human, mouse and rat based on the data from high-throughput sequencing techniques. In the proposed predictor, RNA sequences were encoded by physical-chemical property matrix, mono-nucleotide binary encoding and nucleotide chemical property. Subsequently, these features were optimized by using minimum Redundancy Maximum Relevance (mRMR) feature selection method. Based on the optimal feature subset, the best m6A classification models were trained by Support Vector Machine (SVM) with 5-fold cross-validation test. Prediction results on independent dataset showed that our proposed method could produce the excellent generalization ability. We also established a user-friendly webserver called iRNA-m6A which can be freely accessible at http://lin-group.cn/server/iRNA-m6A. This tool will provide more convenience to users for studying m6A modification in different tissues.",2020-04-30 +23415072,Dragon exploration system on marine sponge compounds interactions.,"

Background

Natural products are considered a rich source of new chemical structures that may lead to the therapeutic agents in all major disease areas. About 50% of the drugs introduced in the market in the last 20 years were natural products/derivatives or natural products mimics, which clearly shows the influence of natural products in drug discovery.

Results

In an effort to further support the research in this field, we have developed an integrative knowledge base on Marine Sponge Compounds Interactions (Dragon Exploration System on Marine Sponge Compounds Interactions - DESMSCI) as a web resource. This knowledge base provides information about the associations of the sponge compounds with different biological concepts such as human genes or proteins, diseases, as well as pathways, based on the literature information available in PubMed and information deposited in several other databases. As such, DESMSCI is aimed as a research support resource for problems on the utilization of marine sponge compounds. DESMSCI allows visualization of relationships between different chemical compounds and biological concepts through textual and tabular views, graphs and relational networks. In addition, DESMSCI has built in hypotheses discovery module that generates potentially new/interesting associations among different biomedical concepts. We also present a case study derived from the hypotheses generated by DESMSCI which provides a possible novel mode of action for variolins in Alzheimer's disease.

Conclusion

DESMSCI is the first publicly available (http://www.cbrc.kaust.edu.sa/desmsci) comprehensive resource where users can explore information, compiled by text- and data-mining approaches, on biological and chemical data related to sponge compounds.",2013-02-16 +33016481,Runoff water quantity and quality data from native tallgrass prairie and crop-livestock systems in Oklahoma between 1977 and 1999.,"Erosion and sedimentation pose serious threats to soil and water quality worldwide, including in the U.S. southern Great Plains. To better understand these processes in agricultural landscapes, eight 1.6-ha watersheds were established and instrumented in 1976 at the USDA-ARS Grazinglands Research Laboratory, ∼50 km west of Oklahoma City near El Reno, OK, to measure precipitation and surface runoff quantity and quality. Prior to construction, all watersheds were in native grass, primarily big bluestem (Andropogon gerardii Vitman.), little bluestem [Schizachyrium scoparium (Michx.) Nash], and Indiangrass [Sorghastrum nutans (L.) Nash]; afterwards, four of the eight watersheds were cropped initially into winter wheat (Triticum aestivum L.) (two conventionally tilled and two minimally or no-till). Although there have been many peer-reviewed papers from the Water Resources and Erosion (WRE) watersheds, none included all the datasets collected during the period 1977-1999. The objectives of this paper were (a) to present and discuss all archived historical data, including methods of collection and analysis, (b) to provide summary analyses of the variability in each dataset, and (c) to provide details about how to access these datasets. These datasets are valuable resources to improve modeling in relation to land use and management changes, climate variability, and other environmental factors and may be useful in developing strategies to mitigate environmental impacts of agricultural systems. They are available at https://doi.org/10.15482/USDA.ADC/1518421.",2020-06-10 +25593349,The neXtProt knowledgebase on human proteins: current status.,"neXtProt (http://www.nextprot.org) is a human protein-centric knowledgebase developed at the SIB Swiss Institute of Bioinformatics. Focused solely on human proteins, neXtProt aims to provide a state of the art resource for the representation of human biology by capturing a wide range of data, precise annotations, fully traceable data provenance and a web interface which enables researchers to find and view information in a comprehensive manner. Since the introductory neXtProt publication, significant advances have been made on three main aspects: the representation of proteomics data, an extended representation of human variants and the development of an advanced search capability built around semantic technologies. These changes are presented in the current neXtProt update.",2015-01-01 +25776024,LMPID: a manually curated database of linear motifs mediating protein-protein interactions. ,"Linear motifs (LMs), used by a subset of all protein-protein interactions (PPIs), bind to globular receptors or domains and play an important role in signaling networks. LMPID (Linear Motif mediated Protein Interaction Database) is a manually curated database which provides comprehensive experimentally validated information about the LMs mediating PPIs from all organisms on a single platform. About 2200 entries have been compiled by detailed manual curation of PubMed abstracts, of which about 1000 LM entries were being annotated for the first time, as compared with the Eukaryotic LM resource. The users can submit their query through a user-friendly search page and browse the data in the alphabetical order of the bait gene names and according to the domains interacting with the LM. LMPID is freely accessible at http://bicresources.jcbose. ac.in/ssaha4/lmpid and contains 1750 unique LM instances found within 1181 baits interacting with 552 prey proteins. In summary, LMPID is an attempt to enrich the existing repertoire of resources available for studying the LMs implicated in PPIs and may help in understanding the patterns of LMs binding to a specific domain and develop prediction model to identify novel LMs specific to a domain and further able to predict inhibitors/modulators of PPI of interest.",2015-03-16 +32548218,"Dataset and methodology on identification and correlation of secondary carbides with microstructure, wear mechanism, and tool performance for different CERMET grades during high-speed dry finish turning of AISI 304 stainless steel.","The aim of this research is to utilize reverse engineering approach for the identification of the elements and phases available in the commercial CERMET inserts with the help of characterization techniques such as Scanning Electron Microscope (SEM), Energy-dispersive X-ray spectroscopy (EDS), and X-Ray Deposition (XRD). Four commercial CERMET inserts were investigated in this research work, and the effect of the composition and phases are related to its tool wear mechanism and performance. Each CERMET insert is used to perform a turning process on a CNC lathe for machining stainless steel (SS) under the dry condition at a fixed cutting length interval. Once it completes machining for a fixed cutting length, the CERMET insert is taken out to investigate its wear mechanism with the help of SEM, EDS, XRD and using a focus-variation microscope (Alicona). A correlation analysis is performed to relate progressive tool wear mechanisms with elements and their relevant phases of various carbides. The approach of correlating wear property with the phase content will contribute to the understanding of the wear mechanism under such extreme machining conditions. It will serve as a reference for the improvement of the performance of these CERMET inserts for such harsh machining conditions by the development of protective coatings for these CERMET inserts based on the identification of the composition and phases that improves tool life and reduces wear. The data related research work can be found at ""https://doi.org/10.1016/j.wear.2020.203285"" [1].",2020-05-21 +29165593,"FunCoup 4: new species, data, and visualization.","This release of the FunCoup database (http://funcoup.sbc.su.se) is the fourth generation of one of the most comprehensive databases for genome-wide functional association networks. These functional associations are inferred via integrating various data types using a naive Bayesian algorithm and orthology based information transfer across different species. This approach provides high coverage of the included genomes as well as high quality of inferred interactions. In this update of FunCoup we introduce four new eukaryotic species: Schizosaccharomyces pombe, Plasmodium falciparum, Bos taurus, Oryza sativa and open the database to the prokaryotic domain by including networks for Escherichia coli and Bacillus subtilis. The latter allows us to also introduce a new class of functional association between genes - co-occurrence in the same operon. We also supplemented the existing classes of functional association: metabolic, signaling, complex and physical protein interaction with up-to-date information. In this release we switched to InParanoid v8 as the source of orthology and base for calculation of phylogenetic profiles. While populating all other evidence types with new data we introduce a new evidence type based on quantitative mass spectrometry data. Finally, the new JavaScript based network viewer provides the user an intuitive and responsive platform to further evaluate the results.",2018-01-01 +25404130,Content discovery and retrieval services at the European Nucleotide Archive.,"The European Nucleotide Archive (ENA; http://www.ebi.ac.uk/ena) is Europe's primary resource for nucleotide sequence information. With the growing volume and diversity of public sequencing data comes the need for increased sophistication in data organisation, presentation and search services so as to maximise its discoverability and usability. In response to this, ENA has been introducing and improving checklists for use during submission and expanding its search facilities to provide targeted search results. Here, we give a brief update on ENA content and some major developments undertaken in data submission services during 2014. We then describe in more detail the services we offer for data discovery and retrieval.",2014-11-17 +29069459,OverGeneDB: a database of 5' end protein coding overlapping genes in human and mouse genomes.,"Gene overlap plays various regulatory functions on transcriptional and post-transcriptional levels. Most current studies focus on protein-coding genes overlapping with non-protein-coding counterparts, the so called natural antisense transcripts. Considerably less is known about the role of gene overlap in the case of two protein-coding genes. Here, we provide OverGeneDB, a database of human and mouse 5' end protein-coding overlapping genes. The database contains 582 human and 113 mouse gene pairs that are transcribed using overlapping promoters in at least one analyzed library. Gene pairs were identified based on the analysis of the transcription start site (TSS) coordinates in 73 human and 10 mouse organs, tissues and cell lines. Beside TSS data, resources for 26 human lung adenocarcinoma cell lines also contain RNA-Seq and ChIP-Seq data for seven histone modifications and RNA Polymerase II activity. The collected data revealed that the overlap region is rarely conserved between the studied species and tissues. In ∼50% of the overlapping genes, transcription started explicitly in the overlap regions. In the remaining half of overlapping genes, transcription was initiated both from overlapping and non-overlapping TSSs. OverGeneDB is accessible at http://overgenedb.amu.edu.pl.",2018-01-01 +27932914,Macrobenthic molluscs from a marine - lagoonal environmental transition in Lesvos Island (Greece).,"

Background

This paper describes an occurence dataset, also including numerical abundance and biomass data, pertaining to the macrobenthic molluscan assemblages from a marine - lagoonal environmental transition. The study system was the soft-substrate benthoscape of the area of the Kalloni solar saltworks (Lesvos Island, Greece). Specifically, the study area extended from the infralittoral zone of the inner Kalloni Gulf (marine habitat) to the bottoms of the first two evaporation ponds of the Kalloni solar saltworks (lagoonal habitat). Bottom sediment samples (3 replicates) were collected with a Van Veen grab sampler (0.1 m2) at four sampling sites, along a 1.5 km long line transect that spanned the marine - lagoonal environmental transition. A total of four surveys were carried out seasonally in 2004.  A total of 39,345 molluscan individuals were sorted out of the sediment samples and were identified to 71 species, belonging to the Gastropoda (36), Bivalvia (34) and Scaphopoda (1) classes. Numerical abundance and wet biomass (with shells) data are included in the dataset.

New information

The dataset described in the present paper partially fills a significant gap in the scientific literature: Because ecological research of coastal lagoons has seldom explicitly considered the marine - lagoonal habitats interface, there are no openly accessible datasets pertaining to the particular structural component of the transitional waters benthoscapes of the Mediterranean Sea. Such datasets could prove valuable in the research of the structure and functioning of transitional waters benthoscapes. The present dataset is available as a supplementary file (Suppl. material 1) and can also be accessed at http://ipt.medobis.eu/resource?r=kalloni_saltworks_phd.",2016-11-01 +33292107,Metabolism and Interactions of Chloroquine and Hydroxychloroquine with Human Cytochrome P450 Enzymes and Drug Transporters.,"

Background

In clinical practice, chloroquine and hydroxychloroquine are often co-administered with other drugs in the treatment of malaria, chronic inflammatory diseases, and COVID-19. Therefore, their metabolic properties and the effects on the activity of cytochrome P450 (P450, CYP) enzymes and drug transporters should be considered when developing the most efficient treatments for patients.

Methods

Scientific literature on the interactions of chloroquine and hydroxychloroquine with human P450 enzymes and drug transporters, was searched using PUBMED.Gov (https://pubmed.ncbi.nlm.nih.gov/) and the ADME database (https://life-science.kyushu.fujitsu.com/admedb/).

Results

Chloroquine and hydroxychloroquine are metabolized by P450 1A2, 2C8, 2C19, 2D6, and 3A4/5 in vitro and by P450s 2C8 and 3A4/5 in vivo by N-deethylation. Chloroquine effectively inhibited P450 2D6 in vitro; however, in vivo inhibition was not apparent except in individuals with limited P450 2D6 activity. Chloroquine is both an inhibitor and inducer of the transporter MRP1 and is also a substrate of the Mate and MRP1 transport systems. Hydroxychloroquine also inhibited P450 2D6 and the transporter OATP1A2.

Conclusions

Chloroquine caused a statistically significant decrease in P450 2D6 activity in vitro and in vivo, also inhibiting its own metabolism by the enzyme. The inhibition indicates a potential for clinical drug-drug interactions when taken with other drugs that are predominant substrates of the P450 2D6. When chloroquine and hydroxychloroquine are used clinically with other drugs, substrates of P450 2D6 enzyme, attention should be given to substrate-specific metabolism by P450 2D6 alleles present in individuals taking the drugs.",2020-01-01 +32727974,TEMPURA: Database of Growth TEMPeratures of Usual and RAre Prokaryotes. ,"Growth temperature is one of the most representative biological parameters for characterizing living organisms. Prokaryotes have been isolated from various temperature environments and show wide diversity in their growth temperatures. We herein constructed a database of growth TEMPeratures of Usual and RAre prokaryotes (TEMPURA, http://togodb.org/db/tempura), which contains the minimum, optimum, and maximum growth temperatures of 8,639 prokaryotic strains. Growth temperature information is linked with taxonomy IDs, phylogenies, and genomic information. TEMPURA provides useful information to researchers working on biotechnological applications of extremophiles and their biomolecules as well as those performing fundamental studies on the physiological diversity of prokaryotes.",2020-01-01 +32603286,Autophagy and Ubiquitination as Two Major Players in Colorectal Cancer: A Review on Recent Patents.,"

Background

As one of the most commonly diagnosed cancers among men and women, Colorectal Cancer (CRC) leads to high rates of morbidity and mortality across the globe. Recent anti- CRC therapies are now targeting specific signaling pathways involved in colorectal carcinogenesis. Ubiquitin Proteasome System (UPS) and autophagy are two main protein quality control systems, which play major roles in the carcinogenesis of colorectal cancer. A balanced function of these two pathways is necessary for the regulation of cell proliferation and cell death.

Objective

In this systematic review, we discuss the available evidence regarding the roles of autophagy and ubiquitination in progression and inhibition of CRC.

Methods

The search terms ""colorectal cancer"" or ""colon cancer"" or ""colorectal carcinoma"" or ""colon carcinoma"" in combination with ""ubiquitin proteasome"" and ""autophagy"" were searched in PubMed, Web of Science, and Scopus databases, and also Google Patents (https://patents.google .com) from January 2000 to Feb 2020.

Results

The most important factors involved in UPS and autophagy have been investigated. There are many important factors involved in UPS and autophagy but this systematic review shows the studies that have mostly focused on the role of ATG, 20s proteasome and mTOR in CRC, and the more important factors such as ATG8, FIP200, and TIGAR factors that are effective in the regulation of autophagy in CRC cells have not been yet investigated.

Conclusion

The most important factors involved in UPS and autophagy such as ATG, 20s proteasome and mTOR, ATG8, FIP200, and TIGAR can be considered in drug therapy for controlling or activating autophagy.",2020-01-01 +32588040,mAML: an automated machine learning pipeline with a microbiome repository for human disease classification. ,"Due to the concerted efforts to utilize the microbial features to improve disease prediction capabilities, automated machine learning (AutoML) systems aiming to get rid of the tediousness in manually performing ML tasks are in great demand. Here we developed mAML, an ML model-building pipeline, which can automatically and rapidly generate optimized and interpretable models for personalized microbiome-based classification tasks in a reproducible way. The pipeline is deployed on a web-based platform, while the server is user-friendly and flexible and has been designed to be scalable according to the specific requirements. This pipeline exhibits high performance for 13 benchmark datasets including both binary and multi-class classification tasks. In addition, to facilitate the application of mAML and expand the human disease-related microbiome learning repository, we developed GMrepo ML repository (GMrepo Microbiome Learning repository) from the GMrepo database. The repository involves 120 microbiome-based classification tasks for 85 human-disease phenotypes referring to 12 429 metagenomic samples and 38 643 amplicon samples. The mAML pipeline and the GMrepo ML repository are expected to be important resources for researches in microbiology and algorithm developments. Database URL: http://lab.malab.cn/soft/mAML.",2020-01-01 +32277449,Choice of the Promoter for Tissue and Developmental Stage-Specific Gene Expression.,"Transgenic technologies belong to important tools of reverse genetics and biotechnology in plants. Targeted genetic modifications can reveal functions of genes of interest, change metabolic and regulatory pathways, or result in accumulation of valuable proteins or metabolites. However, to be efficient in targeted genetic modification, the chimeric gene construct should be designed properly. In particular, the promoters used to control transgene expression need to be carefully chosen. Most promoters in widely used vectors belong to strong and constitutively expressed variants. However, in many cases transgene expression has to be restricted to certain tissue, stage of development, or response to some internal or external stimuli. In turn, a large variety of tissue-specific promoters have been studied and information on their characteristics may be recovered from the literature. An appropriate promoter may be selected and used in genetic construct to optimize the transgene transcription pattern. We have previously designed the TGP database (TransGene Promoters, http://wwwmgs.bionet.nsc.ru/mgs/dbases/tgp/home.html ) collecting information from the publications in this field. Here we review the wide range of noncanonical tissue-specific and developmentally regulated promoters that might be used for transgene expression control.",2020-01-01 +32159215,TeaMiD: a comprehensive database of simple sequence repeat markers of tea. ,"Tea is a highly cross-pollinated, woody, perennial tree. High heterozygosity combined with a long gestational period makes conventional breeding a cumbersome process. Therefore, marker-assisted breeding is a better alternative approach when compared with conventional breeding. Considering the large genome size of tea (~3 Gb), information about simple sequence repeat (SSR) is scanty. Thus, we have taken advantage of the recently published tea genomes to identify large numbers of SSR markers in the tea. Besides the genomic sequences, we identified SSRs from the other publicly available sequences such as RNA-seq, GSS, ESTs and organelle genomes (chloroplasts and mitochondrial) and also searched published literature to catalog validated set of tea SSR markers. The complete exercise yielded a total of 935 547 SSRs. Out of the total, 82 SSRs were selected for validation among a diverse set of tea genotypes. Six primers (each with four to six alleles, an average of five alleles per locus) out of the total 27 polymorphic primers were used for a diversity analysis in 36 tea genotypes with mean polymorphic information content of 0.61-0.76. Finally, using all the information generated in this study, we have developed a user-friendly database (TeaMiD; http://indianteagenome.in:8080/teamid/) that hosts SSR from all the six resources including three nuclear genomes of tea and transcriptome sequences of 17 Camellia wild species. Database URL: http://indianteagenome.in:8080/teamid/.",2020-01-01 +31722416,"MEGARes 2.0: a database for classification of antimicrobial drug, biocide and metal resistance determinants in metagenomic sequence data.","Antimicrobial resistance (AMR) is a threat to global public health and the identification of genetic determinants of AMR is a critical component to epidemiological investigations. High-throughput sequencing (HTS) provides opportunities for investigation of AMR across all microbial genomes in a sample (i.e. the metagenome). Previously, we presented MEGARes, a hand-curated AMR database and annotation structure developed to facilitate the analysis of AMR within metagenomic samples (i.e. the resistome). Along with MEGARes, we released AmrPlusPlus, a bioinformatics pipeline that interfaces with MEGARes to identify and quantify AMR gene accessions contained within a metagenomic sequence dataset. Here, we present MEGARes 2.0 (https://megares.meglab.org), which incorporates previously published resistance sequences for antimicrobial drugs, while also expanding to include published sequences for metal and biocide resistance determinants. In MEGARes 2.0, the nodes of the acyclic hierarchical ontology include four antimicrobial compound types, 57 classes, 220 mechanisms of resistance, and 1,345 gene groups that classify the 7,868 accessions. In addition, we present an updated version of AmrPlusPlus (AMR ++ version 2.0), which improves accuracy of classifications, as well as expanding scalability and usability.",2020-01-01 +31612943,OHNOLOGS v2: a comprehensive resource for the genes retained from whole genome duplication in vertebrates.,"All vertebrates including human have evolved from an ancestor that underwent two rounds of whole genome duplication (2R-WGD). In addition, teleost fish underwent an additional third round of genome duplication (3R-WGD). The genes retained from these genome duplications, so-called ohnologs, have been instrumental in the evolution of vertebrate complexity, development and susceptibility to genetic diseases. However, the identification of vertebrate ohnologs has been challenging, due to lineage specific genome rearrangements since 2R- and 3R-WGD. We previously identified vertebrate ohnologs using a novel synteny comparison across multiple genomes. Here, we refine and apply this approach on 27 vertebrate genomes to identify ohnologs from both 2R- and 3R-WGD, while taking into account the phylogenetically biased sampling of available species. We assemble vertebrate ohnolog pairs and families in an expanded OHNOLOGS v2 database. We find that teleost fish have retained more 2R-WGD ohnologs than mammals and sauropsids, and that these 2R-ohnologs have retained significantly more ohnologs from the subsequent 3R-WGD than genes without 2R-ohnologs. Interestingly, species with fewer extant genes, such as sauropsids, have retained similar or higher proportions of ohnologs. OHNOLOGS v2 should allow deeper evolutionary genomic analysis of the impact of WGD on vertebrates and can be freely accessed at http://ohnologs.curie.fr.",2020-01-01 +31584171,MOBscan: Automated Annotation of MOB Relaxases.,"Relaxase-based plasmid classification has become popular in the past 10 years. Nevertheless, it is not obvious how to assign a query protein to a relaxase MOB family. Automated protein annotation is commonly used to classify them into families, gathering evolutionarily related proteins that likely perform the same function, while circumventing the problem of different naming conventions. Here, we implement an automated method, MOBscan, to identify relaxases and classify them into any of the nine MOB families. MOBscan is a web tool that carries out a HMMER search against a curated database of MOB profile Hidden Markov models. It is freely available at https://castillo.dicom.unican.es/mobscan/ .",2020-01-01 +31504823,WALTZ-DB 2.0: an updated database containing structural information of experimentally determined amyloid-forming peptides.,"Transition of soluble proteins into insoluble amyloid fibrils is driven by self-propagating short sequence stretches. However, accurate prediction of aggregation determinants remains challenging. Here, we describe WALTZ-DB 2.0, an updated and significantly expanded open-access database providing information on experimentally determined amyloid-forming hexapeptide sequences (http://waltzdb.switchlab.org/). We have updated WALTZ-DB 2.0 with new entries, including: (i) experimental validation of an in-house developed dataset of 229 hexapeptides, using electron microscopy and Thioflavin-T binding assays; (ii) manual curation of 98 amyloid-forming peptides isolated from literature. Furthermore, the content has been expanded by adding novel structural information for peptide entries, including sequences of the previous version. Using a computational methodology developed in the Switch lab, we have generated 3D-models of the putative amyloid fibril cores of WALTZ-DB 2.0 entries. Structural models, coupled with information on the energetic contributions and fibril core stabilities, can be accessed through individual peptide entries. Customized filtering options for subset selections and new modelling graphical features were added to upgrade online accessibility, providing a user-friendly interface for browsing, downloading and updating. WALTZ-DB 2.0 remains the largest open-access repository for amyloid fibril formation determinants and will continue to enhance the development of new approaches focused on accurate prediction of aggregation prone sequences.",2020-01-01 +30973110,"Hybrid Design of Isonicotinic Acid Hydrazide Derivatives: Machine Learning Studies, Synthesis and Biological Evaluation of their Antituberculosis Activity.","

Background

Tuberculosis (TB) is an infection disease caused by Mycobacterium tuberculosis (Mtb) bacteria. One of the main causes of mortality from TB is the problem of Mtb resistance to known drugs.

Objective

The goal of this work is to identify potent small molecule anti-TB agents by machine learning, synthesis and biological evaluation.

Methods

The On-line Chemical Database and Modeling Environment (OCHEM) was used to build predictive machine learning models. Seven compounds were synthesized and tested in vitro for their antitubercular activity against H37Rv and resistant Mtb strains.

Results

A set of predictive models was built with OCHEM based on a set of previously synthesized isoniazid (INH) derivatives containing a thiazole core and tested against Mtb. The predictive ability of the models was tested by a 5-fold cross-validation, and resulted in balanced accuracies (BA) of 61-78% for the binary classifiers. Test set validation showed that the models could be instrumental in predicting anti- TB activity with a reasonable accuracy (with BA = 67-79 %) within the applicability domain. Seven designed compounds were synthesized and demonstrated activity against both the H37Rv and multidrugresistant (MDR) Mtb strains resistant to rifampicin and isoniazid. According to the acute toxicity evaluation in Daphnia magna neonates, six compounds were classified as moderately toxic (LD50 in the range of 10-100 mg/L) and one as practically harmless (LD50 in the range of 100-1000 mg/L).

Conclusion

The newly identified compounds may represent a starting point for further development of therapies against Mtb. The developed models are available online at OCHEM http://ochem.eu/article/11 1066 and can be used to virtually screen for potential compounds with anti-TB activity.",2020-01-01 +33151742,Variability of nonpathogenic influenza virus H5N3 under immune pressure.,"Mutations arising in influenza viruses that have undergone immune pressure may promote a successful spread of mutants in nature. In order to evaluate the variability of nonpathogenic influenza virus A/duck/Moscow/4182-C/2010(H5N3) and to determine the common epitopes between it and highly pathogenic H5N1 avian influenza viruses (HPAIV), a set of escape mutants was selected due to action of MABs specific against A/chicken/Pennsylvania/8125/83(H5N2), A/Vietnam/1203/04(H5N1) and A/duck/Novosibirsk/56/05(H5N1) viruses. The complete genomes of escape mutants were sequenced and amino acid point mutations were determined in HA, NA, PA, PB1, PB2, M1, M2, and NP proteins. Comprehensive analysis of the acquired mutations was performed using the Influenza Research Database (https://www.fludb.org) and revealed that all mutations were located inside short linear epitopes, in positions characterized by polymorphisms. Most of the mutations found were characterized as substitutions by predominant or alternative amino acids existing in nature. Antigenic changes depended only on substitutions at positions 126, 129, 131, 145 and 156 of HA (H3 numbering). The positions 126, 145 and 156 were common for HA/H5 of different phylogenetic lineages of H5N1 HPAIV (arisen from A/goose/Guangdong/1/96) and low pathogenic American and Eurasian viruses. Additionally, mutation S145P increased the temperature of HA heat inactivation, compared to wild-type, as was proved by reverse genetics. Moreover, nonpathogenic A/duck/Moscow/4182-C/2010(H5N3) and H5N1 HPAI viruses have the same structure of short linear epitopes in HA (145-157) and internal proteins (PB2: 186-200, 406-411; PB1: 135-143, 538-546; PA: 515-523; NP: 61-68; M1: 76-84; M2: 45-53). These facts may indicate that H5 wild duck nonpathogenic virus could be used as vaccine against H5N1 HPAIV. Keywords: avian influenza virus; H5 hemagglutinin; escape mutants; genetic analysis; phenotypic properties; site-specific mutagenesis.",2020-01-01 +32168374,uORFlight: a vehicle toward uORF-mediated translational regulation mechanisms in eukaryotes. ,"Upstream open reading frames (uORFs) are prevalent in eukaryotic mRNAs. They act as a translational control element for precisely tuning the expression of the downstream major open reading frame (mORF). uORF variation has been clearly associated with several human diseases. In contrast, natural uORF variants in plants have not ever been identified or linked with any phenotypic changes. The paucity of such evidence encouraged us to generate this database-uORFlight (http://uorflight.whu.edu.cn). It facilitates the exploration of uORF variation among different splicing models of Arabidopsis and rice genes. Most importantly, users can evaluate uORF frequency among different accessions at the population scale and find out the causal single nucleotide polymorphism (SNP) or insertion/deletion (INDEL), which can be associated with phenotypic variation through database mining or simple experiments. Such information will help to make hypothesis of uORF function in plant development or adaption to changing environments on the basis of the cognate mORF function. This database also curates plant uORF relevant literature into distinct groups. To be broadly interesting, our database expands uORF annotation into more species of fungus (Botrytis cinerea and Saccharomyces cerevisiae), plant (Brassica napus, Glycine max, Gossypium raimondii, Medicago truncatula, Solanum lycopersicum, Solanum tuberosum, Triticum aestivum and Zea mays), metazoan (Caenorhabditis elegans and Drosophila melanogaster) and vertebrate (Homo sapiens, Mus musculus and Danio rerio). Therefore, uORFlight will light up the runway toward how uORF genetic variation determines phenotypic diversity and advance our understanding of translational control mechanisms in eukaryotes.",2020-01-01 +32162267,AAgAtlas 1.0: A Database of Human Autoantigens Extracted from Biomedical Literature.,"Autoantibodies are antibodies against host self-proteins (autoantigens), which play significant roles in homeostasis maintenance and diseases with autoimmune disorders. Numerous papers were published in the past decade on the identification of human autoantigens in different human diseases. However, there is no consensus collection with all the reported autoantigens yet. To address this need, previously we developed a human autoantigen database, AAgAtlas 1.0, by text-mining and manual curation, which collects 1126 autoantigens associated with 1071 human diseases. AAgAtlas 1.0 provides a user-friendly interface to conveniently browse, retrieve, and download human autoantigen genes, their functional annotation, related diseases, and the evidence from the literature. AAgAtlas is freely available online http://biokb.ncpsb.org/aagatlas/ . In this chapter, we make an introduction and provide a guide to the users of AAgAtlas 1.0 database.",2020-01-01 +32133509,KRGDB: the large-scale variant database of 1722 Koreans based on whole genome sequencing. ,"Since 2012, the Center for Genome Science of the Korea National Institute of Health (KNIH) has been sequencing complete genomes of 1722 Korean individuals. As a result, more than 32 million variant sites have been identified, and a large proportion of the variant sites have been detected for the first time. In this article, we describe the Korean Reference Genome Database (KRGDB) and its genome browser. The current version of our database contains both single nucleotide and short insertion/deletion variants. The DNA samples were obtained from four different origins and sequenced in different sequencing depths (10× coverage of 63 individuals, 20× coverage of 194 individuals, combined 10× and 20× coverage of 135 individuals, 30× coverage of 230 individuals and 30× coverage of 1100 individuals). The major features of the KRGDB are that it contains information on the Korean genomic variant frequency, frequency difference between the Korean and other populations and the variant functional annotation (such as regulatory elements in ENCODE regions and coding variant functions) of the variant sites. Additionally, we performed the genome-wide association study (GWAS) between Korean genome variant sites for the 30×230 individuals and three major common diseases (diabetes, hypertension and metabolic syndrome). The association results are displayed on our browser. The KRGDB uses the MySQL database and Apache-Tomcat web server adopted with Java Server Page (JSP) and is freely available at http://coda.nih.go.kr/coda/KRGDB/index.jsp. Availability: http://coda.nih.go.kr/coda/KRGDB/index.jsp.",2020-01-01 +32128558,LeukmiR: a database for miRNAs and their targets in acute lymphoblastic leukemia. ,"Acute lymphoblastic leukemia (ALL) is one of the most common hematological malignancies in children. Recent studies suggest the involvement of multiple microRNAs in the tumorigenesis of various leukemias. However, until now, no comprehensive database exists for miRNAs and their cognate target genes involved specifically in ALL. Therefore, we developed 'LeukmiR' a dynamic database comprising in silico predicted microRNAs, and experimentally validated miRNAs along with the target genes they regulate in mouse and human. LeukmiR is a user-friendly platform with search strings for ALL-associated microRNAs, their sequences, description of target genes, their location on the chromosomes and the corresponding deregulated signaling pathways. For the user query, different search modules exist where either quick search can be carried out using any fuzzy term or by providing exact terms in specific modules. All entries for both human and mouse genomes can be retrieved through multiple options such as miRNA ID, their accession number, sequence, target genes, Ensemble-ID or Entrez-ID. User can also access miRNA: mRNA interaction networks in different signaling pathways, the genomic location of the targeted regions such as 3'UTR, 5'UTR and exons with their gene ontology and disease ontology information in both human and mouse systems. Herein, we also report 51 novel microRNAs which are not described earlier for ALL. Thus, LeukmiR database will be a valuable source of information for researchers to understand and investigate miRNAs and their targets with diagnostic and therapeutic potential in ALL. Database URL: http://tdb.ccmb.res.in/LeukmiR/.",2020-01-01 +31942978,Annotation and curation of the causality information in LncRNADisease. ,"Disease causative non-coding RNAs (ncRNAs) are of great importance in understanding a disease, for they directly contribute to the development or progress of a disease. Identifying the causative ncRNAs can provide vital implications for biomedical researches. In this work, we updated the long non-coding RNA disease database (LncRNADisease) with long non-coding RNA (lncRNA) causality information with manual annotations of the causal associations between lncRNAs/circular RNAs (circRNAs) and diseases by reviewing related publications. Of the total 11 568 experimental associations, 2297 out of 10 564 lncRNA-disease associations and 198 out of 1004 circRNA-disease associations were identified to be causal, whereas 635 lncRNAs and 126 circRNAs were identified to be causative for the development or progress of at least one disease. The updated information and functions of the database can offer great help to future researches involving lncRNA/circRNA-disease relationship. The latest LncRNADisease database is available at http://www.rnanut.net/lncrnadisease.",2020-01-01 +31747015,ChiTaRS 5.0: the comprehensive database of chimeric transcripts matched with druggable fusions and 3D chromatin maps.,"Chimeric RNA transcripts are formed when exons from two genes fuse together, often due to chromosomal translocations, transcriptional errors or trans-splicing effect. While these chimeric RNAs produce functional proteins only in certain cases, they play a significant role in disease phenotyping and progression. ChiTaRS 5.0 (http://chitars.md.biu.ac.il/) is the latest and most comprehensive chimeric transcript repository, with 111 582 annotated entries from eight species, including 23 167 known human cancer breakpoints. The database includes unique information correlating chimeric breakpoints with 3D chromatin contact maps, generated from public datasets of chromosome conformation capture techniques (Hi-C). In this update, we have added curated information on druggable fusion targets matched with chimeric breakpoints, which are applicable to precision medicine in cancers. The introduction of a new section that lists chimeric RNAs in various cell-lines is another salient feature. Finally, using text-mining techniques, novel chimeras in Alzheimer's disease, schizophrenia, dyslexia and other diseases were collected in ChiTaRS. Thus, this improved version is an extensive catalogue of chimeras from multiple species. It extends our understanding of the evolution of chimeric transcripts in eukaryotes and contributes to the analysis of 3D genome conformational changes and the functional role of chimeras in the etiopathogenesis of cancers and other complex diseases.",2020-01-01 +31647096,"proGenomes2: an improved database for accurate and consistent habitat, taxonomic and functional annotations of prokaryotic genomes.","Microbiology depends on the availability of annotated microbial genomes for many applications. Comparative genomics approaches have been a major advance, but consistent and accurate annotations of genomes can be hard to obtain. In addition, newer concepts such as the pan-genome concept are still being implemented to help answer biological questions. Hence, we present proGenomes2, which provides 87 920 high-quality genomes in a user-friendly and interactive manner. Genome sequences and annotations can be retrieved individually or by taxonomic clade. Every genome in the database has been assigned to a species cluster and most genomes could be accurately assigned to one or multiple habitats. In addition, general functional annotations and specific annotations of antibiotic resistance genes and single nucleotide variants are provided. In short, proGenomes2 provides threefold more genomes, enhanced habitat annotations, updated taxonomic and functional annotation and improved linkage to the NCBI BioSample database. The database is available at http://progenomes.embl.de/.",2020-01-01 +31624845,"CRISPRCasdb a successor of CRISPRdb containing CRISPR arrays and cas genes from complete genome sequences, and tools to download and query lists of repeats and spacers.","In Archaea and Bacteria, the arrays called CRISPRs for 'clustered regularly interspaced short palindromic repeats' and the CRISPR associated genes or cas provide adaptive immunity against viruses, plasmids and transposable elements. Short sequences called spacers, corresponding to fragments of invading DNA, are stored in-between repeated sequences. The CRISPR-Cas systems target sequences homologous to spacers leading to their degradation. To facilitate investigations of CRISPRs, we developed 12 years ago a website holding the CRISPRdb. We now propose CRISPRCasdb, a completely new version giving access to both CRISPRs and cas genes. We used CRISPRCasFinder, a program that identifies CRISPR arrays and cas genes and determine the system's type and subtype, to process public whole genome assemblies. Strains are displayed either in an alphabetic list or in taxonomic order. The database is part of the CRISPR-Cas++ website which also offers the possibility to analyse submitted sequences and to download programs. A BLAST search against lists of repeats and spacers extracted from the database is proposed. To date, 16 990 complete prokaryote genomes (16 650 bacteria from 2973 species and 340 archaea from 300 species) are included. CRISPR-Cas systems were found in 36% of Bacteria and 75% of Archaea strains. CRISPRCasdb is freely accessible at https://crisprcas.i2bc.paris-saclay.fr/.",2020-01-01 +33598456,BOW-GBDT: A GBDT Classifier Combining With Artificial Neural Network for Identifying GPCR-Drug Interaction Based on Wordbook Learning From Sequences.,"Background: As a class of membrane protein receptors, G protein-coupled receptors (GPCRs) are very important for cells to complete normal life function and have been proven to be a major drug target for widespread clinical application. Hence, it is of great significance to find GPCR targets that interact with drugs in the process of drug development. However, identifying the interaction of the GPCR-drug pairs by experimental methods is very expensive and time-consuming on a large scale. As more and more database about GPCR-drug pairs are opened, it is viable to develop machine learning models to accurately predict whether there is an interaction existing in a GPCR-drug pair. Methods: In this paper, the proposed model aims to improve the accuracy of predicting the interactions of GPCR-drug pairs. For GPCRs, the work extracts protein sequence features based on a novel bag-of-words (BOW) model improved with weighted Silhouette Coefficient and has been confirmed that it can extract more pattern information and limit the dimension of feature. For drug molecules, discrete wavelet transform (DWT) is used to extract features from the original molecular fingerprints. Subsequently, the above-mentioned two types of features are contacted, and SMOTE algorithm is selected to balance the training dataset. Then, artificial neural network is used to extract features further. Finally, a gradient boosting decision tree (GBDT) model is trained with the selected features. In this paper, the proposed model is named as BOW-GBDT. Results: D92M and Check390 are selected for testing BOW-GBDT. D92M is used for a cross-validation dataset which contains 635 interactive GPCR-drug pairs and 1,225 non-interactive pairs. Check390 is used for an independent test dataset which consists of 130 interactive GPCR-drug pairs and 260 non-interactive GPCR-drug pairs, and each element in Check390 cannot be found in D92M. According to the results, the proposed model has a better performance in generation ability compared with the existing machine learning models. Conclusion: The proposed predictor improves the accuracy of the interactions of GPCR-drug pairs. In order to facilitate more researchers to use the BOW-GBDT, the predictor has been settled into a brand-new server, which is available at http://www.jci-bioinfo.cn/bowgbdt.",2020-01-01 +31691819,CAUSALdb: a database for disease/trait causal variants identified using summary statistics of genome-wide association studies.,"Genome-wide association studies (GWASs) have revolutionized the field of complex trait genetics over the past decade, yet for most of the significant genotype-phenotype associations the true causal variants remain unknown. Identifying and interpreting how causal genetic variants confer disease susceptibility is still a big challenge. Herein we introduce a new database, CAUSALdb, to integrate the most comprehensive GWAS summary statistics to date and identify credible sets of potential causal variants using uniformly processed fine-mapping. The database has six major features: it (i) curates 3052 high-quality, fine-mappable GWAS summary statistics across five human super-populations and 2629 unique traits; (ii) estimates causal probabilities of all genetic variants in GWAS significant loci using three state-of-the-art fine-mapping tools; (iii) maps the reported traits to a powerful ontology MeSH, making it simple for users to browse studies on the trait tree; (iv) incorporates highly interactive Manhattan and LocusZoom-like plots to allow visualization of credible sets in a single web page more efficiently; (v) enables online comparison of causal relations on variant-, gene- and trait-levels among studies with different sample sizes or populations and (vi) offers comprehensive variant annotations by integrating massive base-wise and allele-specific functional annotations. CAUSALdb is freely available at http://mulinlab.org/causaldb.",2020-01-01 +35134148,CATA: a comprehensive chromatin accessibility database for cancer.,"Accessible chromatin refers to the active regions of a chromosome that are bound by many transcription factors (TFs). Changes in chromatin accessibility play a critical role in tumorigenesis. With the emergence of novel methods like Assay for Transposase-accessible Chromatin Sequencing, a sequencing method that maps chromatin-accessible regions (CARs) and enables the computational analysis of TF binding at chromatin-accessible sites, the regulatory landscape in cancer can be dissected. Herein, we developed a comprehensive cancer chromatin accessibility database named CATA, which aims to provide available resources of cancer CARs and to annotate their potential roles in the regulation of genes in a cancer type-specific manner. In this version, CATA stores 2 991 163 CARs from 23 cancer types, binding information of 1398 TFs within the CARs, and provides multiple annotations about these regions, including common single nucleotide polymorphisms (SNPs), risk SNPs, copy number variation, somatic mutations, motif changes, expression quantitative trait loci, methylation and CRISPR/Cas9 target loci. Moreover, CATA supports cancer survival analysis of the CAR-associated genes and provides detailed clinical information of the tumor samples. Database URL: CATA is available at http://www.xiejjlab.bio/cata/.",2020-01-01 +32879224,Systems Pharmacology Dissection of Mechanisms of Dengzhan Xixin Injection against Cardiovascular Diseases.,"Dengzhan Xixin injection (DZXXI), a herbal product prepared from a Chinese herb called Erigeron breviscapus, is a classical and traditional therapeutic for cadiovascular diseases (CVDs), including coronary heart disease (CHD), angina, and stroke, etc. However, its potential pharmacology mechanism against CVDs remains unclear. In this paper, a systems pharmacology-based strategy is presented for predicting drug targets and understanding therapeutic mechanisms of DZXXI against CVDs. The main ingredients were identified by HPLC-diode array detector (DAD). The target fishing was performed on the PharmMapper Server (http://lilab-ecust.cn/pharmmapper/). Potential targets were confirmed by two molecular docking tools, Sybyl-X 1.3 and Ledock to ensure the accuracy. The resulting target proteins were applied as baits to fish their related diseases and pathways from the molecular annotation system (MAS 3.0, http://bioinfo.capitalbio.com/mas3/) and Kyoto Encyclopedia of Genes and Genomes (KEGG) database (http://www.genome.jp/kegg/). Network generation and topological analysis were performed in Cytoscape 3.6.0. 15 main ingredients from DZXXI were identified. Forty five putative drug targets and 50 KEGG pathways, which have highly relevance to the therapeutic effects of DZXXI against CVDs, were then obtained. The systems analysis suggested that DZXXI could attenuate cardiac fibrosis, regulate cardiac contractility, and preserve heart function in adverse cardiac remodeling; meanwhile DZXXI also could have the function of activating blood circulation and dilating blood vessels. DZXXI exerts its therapeutic effects on CVDs possibly through multi-targets including CMA1, epidermal growth factor receptor (EGFR), phenylalanine-4-hydroxylase (PAH), SRC, F7, etc., and multi-pathways including Focal adhesion, mitogen-activated protein kinase (MAPK) signaling pathway, complement and coagulation cascades, Wnt signaling pathway, vascular endothelial growth factor (VEGF) signaling pathway, Renin-angiotensin system, etc.",2020-01-01 +32681639,"Autophagy and Tumor Database: ATdb, a novel database connecting autophagy and tumor. ","Autophagy is an essential cellular process that is closely implicated in diverse pathophysiological processes and a variety of human diseases, especially tumors. Autophagy is regarded as not only an anti-cancer process in tumorigenesis but also a pro-tumor process in progression and metastasis according to current research. It means the role of autophagy in tumor is considered to be complex, controversial and context dependent. Hence, a comprehensive database is of great significance to obtain an in-depth understanding of such complex correlations between autophagy and tumor. To achieve this objective, here we developed the Autophagy and Tumor Database (named as ATdb, http://www.bigzju.com/ATdb/#/) to compile the published information concerning autophagy and tumor research. ATdb connected 25 types of tumors with 137 genes required for autophagy-related pathways, containing 219 population filters, 2650 hazard ratio trend plots, 658 interacting microRNAs, 266 interacting long non-coding RNAs, 155 post-translational modifications, 298 DNA methylation records, 331 animal models and 70 clinical trials. ATdb could enable users to search, browse, download and carry out efficient online analysis. For instance, users can make prediction of autophagy gene regulators in a context-dependent manner and in a precise subpopulation and tumor subtypes. Also, it is feasible in ATdb to cluster tumors into distinguished groups based on the gene-related long non-coding RNAs to gain novel insights into their potential functional implications. Thus, ATdb offers a powerful online database for the autophagy community to explore the complex world of autophagy and tumor. Database URL: http://www.bigzju.com/ATdb/#/.",2020-01-01 +32608479,CHDGKB: a knowledgebase for systematic understanding of genetic variations associated with non-syndromic congenital heart disease. ,"Congenital heart disease (CHD) is one of the most common birth defects, with complex genetic and environmental etiologies. The reports of genetic variation associated with CHD have increased dramatically in recent years due to the revolutionary development of molecular technology. However, CHD is a heterogeneous disease, and its genetic origins remain inconclusive in most patients. Here we present a database of genetic variations for non-syndromic CHD (NS-CHD). By manually literature extraction and analyses, 5345 NS-CHD-associated genetic variations were collected, curated and stored in the public online database. The objective of our database is to provide the most comprehensive updates on NS-CHD genetic research and to aid systematic analyses of pathogenesis of NS-CHD in molecular level and the correlation between NS-CHD genotypes and phenotypes. Database URL: http://www.sysbio.org.cn/CHDGKB/.",2020-01-01 +32250210,Targeted Delivery of Therapeutics to Urological Cancer Stem Cells.,"Urological cancer refers to cancer in organs of the urinary system and the male reproductive system. It mainly includes prostate cancer, bladder cancer, renal cancer, etc., seriously threatening patients' survival. Although there are many advances in the treatment of urological cancer, approved targeted therapies often result in tumor recurrence and therapy failure. An increasing amount of evidence indicated that cancer stem cells (CSCs) with tumor-initiating ability were the source of treatment failure in urological cancer. The development of CSCstargeted strategy can provide a possibility for the complete elimination of urological cancer. This review is based on a search of PubMed, Google scholar and NIH database (http://ClinicalTrials.gov/) for English language articles containing the terms: ""biomarkers"", ""cancer stem cells"", ""targeting/targeted therapy"", ""prostate cancer"", bladder cancer"" and ""kidney cancer"". We summarized the biomarkers and stem cell features of the prostate, bladder and renal CSCs, outlined the targeted strategies for urological CSCs from signaling pathways, cytokines, angiogenesis, surface markers, elimination therapy, differentiation therapy, immunotherapy, microRNA, nanomedicine, etc., and highlighted the prospects and future challenges in this research field.",2020-01-01 +32219412,Circad: a comprehensive manually curated resource of circular RNA associated with diseases. ,"Circular RNAs (circRNAs) are unique transcript isoforms characterized by back splicing of exon ends to form a covalently closed loop or circular conformation. These transcript isoforms are now known to be expressed in a variety of organisms across the kingdoms of life. Recent studies have shown the role of circRNAs in a number of diseases and increasing evidence points to their potential application as biomarkers in these diseases. We have created a comprehensive manually curated database of circular RNAs associated with diseases. This database is available at URL http://clingen.igib.res.in/circad/. The Database lists more than 1300 circRNAs associated with 150 diseases and mapping to 113 International Statistical Classification of Diseases (ICD) codes with evidence of association linked to published literature. The database is unique in many ways. Firstly, it provides ready-to-use primers to work with, in order to use circRNAs as biomarkers or to perform functional studies. It additionally lists the assay and PCR primer details including experimentally validated ones as a ready reference to researchers along with fold change and statistical significance. It also provides standard disease nomenclature as per the ICD codes. To the best of our knowledge, circad is the most comprehensive and updated database of disease associated circular RNAs. Availability: http://clingen.igib.res.in/circad/.",2020-01-01 +31950189,RNA CoSSMos 2.0: an improved searchable database of secondary structure motifs in RNA three-dimensional structures. ,"The RNA Characterization of Secondary Structure Motifs, RNA CoSSMos, database is a freely accessible online database that allows users to identify secondary structure motifs among RNA 3D structures and explore their structural features. RNA CoSSMos 2.0 now requires two closing base pairs for all RNA loop motifs to create a less redundant database of secondary structures. Furthermore, RNA CoSSMos 2.0 represents an upgraded database with new features that summarize search findings and aid in the search for 3D structural patterns among RNA secondary structure motifs. Previously, users were limited to viewing search results individually, with no built-in tools to compare search results. RNA CoSSMos 2.0 provides two new features, allowing users to summarize, analyze and compare their search result findings. A function has been added to the website that calculates the average and representative structures of the search results. Additionally, users can now view a summary page of their search results that reports percentages of each structural feature found, including sugar pucker, glycosidic linkage, hydrogen bonding patterns and stacking interactions. Other upgrades include a newly embedded NGL structural viewer, the option to download the clipped structure coordinates in *.pdb format and improved NMR structure results. RNA CoSSMos 2.0 is no longer simply a search engine for a structure database; it now has the capability of analyzing, comparing and summarizing search results. Database URL: http://rnacossmos.com.",2020-01-01 +31914046,Whether productive authors using the national health insurance database also achieve higher individual research metrics: A bibliometric study.,"

Background

Many researchers use the National Health Insurance Research Database (HIRD) to publish medical papers and gain exceptional outputs in academics. Whether they also obtain excellent citation metrics remains unclear.

Methods

We searched the PubMed database (http://www.ncbi.nlm.nih.gov/pubmed) using the terms Taiwan and HIRD. We then downloaded 1997 articles published from 2012 to 2016. An authorship-weighted scheme (AWS) was applied to compute coauthor partial contributions from the article bylines. Both modified x-index and author impact factor (AIF) proved complementary to Hirsch's h-index for calculating individual research achievements (IRA). The metrics from 4684 authors were collected for comparison. Three hundred eligible authors with higher x-indexes were located and displayed on Google Maps dashboards. Ten separate clusters were identified using social network analysis (SNA) to highlight the research teams. The bootstrapping method was used to examine the differences in metrics among author clusters. The Kano model was applied to classify author IRAs into 3 parts.

Results

The most productive author was Investigator#1 (Taichung City, Taiwan), who published 149 articles in 2015 and included 803 other members in his research teams. The Kano diagram results did not support his citation metrics beyond other clusters and individuals in IRAs.

Conclusion

The AWS-based bibliometric metrics make individual weighted research evaluations possible and available for comparison. The study results of productive authors using HIRD did not support the view that higher citation metrics exist in specific disciplines.",2020-01-01 +31740966,EnhancerAtlas 2.0: an updated resource with enhancer annotation in 586 tissue/cell types across nine species.,"Enhancers are distal cis-regulatory elements that activate the transcription of their target genes. They regulate a wide range of important biological functions and processes, including embryogenesis, development, and homeostasis. As more and more large-scale technologies were developed for enhancer identification, a comprehensive database is highly desirable for enhancer annotation based on various genome-wide profiling datasets across different species. Here, we present an updated database EnhancerAtlas 2.0 (http://www.enhanceratlas.org/indexv2.php), covering 586 tissue/cell types that include a large number of normal tissues, cancer cell lines, and cells at different development stages across nine species. Overall, the database contains 13 494 603 enhancers, which were obtained from 16 055 datasets using 12 high-throughput experiment methods (e.g. H3K4me1/H3K27ac, DNase-seq/ATAC-seq, P300, POLR2A, CAGE, ChIA-PET, GRO-seq, STARR-seq and MPRA). The updated version is a huge expansion of the first version, which only contains the enhancers in human cells. In addition, we predicted enhancer-target gene relationships in human, mouse and fly. Finally, the users can search enhancers and enhancer-target gene relationships through five user-friendly, interactive modules. We believe the new annotation of enhancers in EnhancerAtlas 2.0 will facilitate users to perform useful functional analysis of enhancers in various genomes.",2020-01-01 +31733064,"pathDIP 4: an extended pathway annotations and enrichment analysis resource for human, model organisms and domesticated species.","PathDIP was introduced to increase proteome coverage of literature-curated human pathway databases. PathDIP 4 now integrates 24 major databases. To further reduce the number of proteins with no curated pathway annotation, pathDIP integrates pathways with physical protein-protein interactions (PPIs) to predict significant physical associations between proteins and curated pathways. For human, it provides pathway annotations for 5366 pathway orphans. Integrated pathway annotation now includes six model organisms and ten domesticated animals. A total of 6401 core and ortholog pathways have been curated from the literature or by annotating orthologs of human proteins in the literature-curated pathways. Extended pathways are the result of combining these pathways with protein-pathway associations that are predicted using organism-specific PPIs. Extended pathways expand proteome coverage from 81 088 to 120 621 proteins, making pathDIP 4 the largest publicly available pathway database for these organisms and providing a necessary platform for comprehensive pathway-enrichment analysis. PathDIP 4 users can customize their search and analysis by selecting organism, identifier and subset of pathways. Enrichment results and detailed annotations for input list can be obtained in different formats and views. To support automated bioinformatics workflows, Java, R and Python APIs are available for batch pathway annotation and enrichment analysis. PathDIP 4 is publicly available at http://ophid.utoronto.ca/pathDIP.",2020-01-01 +31598695,MirGeneDB 2.0: the metazoan microRNA complement.,"Small non-coding RNAs have gained substantial attention due to their roles in animal development and human disorders. Among them, microRNAs are special because individual gene sequences are conserved across the animal kingdom. In addition, unique and mechanistically well understood features can clearly distinguish bona fide miRNAs from the myriad other small RNAs generated by cells. However, making this distinction is not a common practice and, thus, not surprisingly, the heterogeneous quality of available miRNA complements has become a major concern in microRNA research. We addressed this by extensively expanding our curated microRNA gene database - MirGeneDB - to 45 organisms, encompassing a wide phylogenetic swath of animal evolution. By consistently annotating and naming 10,899 microRNA genes in these organisms, we show that previous microRNA annotations contained not only many false positives, but surprisingly lacked >2000 bona fide microRNAs. Indeed, curated microRNA complements of closely related organisms are very similar and can be used to reconstruct ancestral miRNA repertoires. MirGeneDB represents a robust platform for microRNA-based research, providing deeper and more significant insights into the biology and evolution of miRNAs as well as biomedical and biomarker research. MirGeneDB is publicly and freely available at http://mirgenedb.org/.",2020-01-01 +27267768,Temporal bone radiology report classification using open source machine learning and natural langue processing libraries.,"

Background

Radiology reports are a rich resource for biomedical research. Prior to utilization, trained experts must manually review reports to identify discrete outcomes. The Audiological and Genetic Database (AudGenDB) is a public, de-identified research database that contains over 16,000 radiology reports. Because the reports are unlabeled, it is difficult to select those with specific abnormalities. We implemented a classification pipeline using a human-in-the-loop machine learning approach and open source libraries to label the reports with one or more of four abnormality region labels: inner, middle, outer, and mastoid, indicating the presence of an abnormality in the specified ear region.

Methods

Trained abstractors labeled radiology reports taken from AudGenDB to form a gold standard. These were split into training (80 %) and test (20 %) sets. We applied open source libraries to normalize and convert every report to an n-gram feature vector. We trained logistic regression, support vector machine (linear and Gaussian), decision tree, random forest, and naïve Bayes models for each ear region. The models were evaluated on the hold-out test set.

Results

Our gold-standard data set contained 726 reports. The best classifiers were linear support vector machine for inner and outer ear, logistic regression for middle ear, and decision tree for mastoid. Classifier test set accuracy was 90 %, 90 %, 93 %, and 82 % for the inner, middle, outer and mastoid regions, respectively. The logistic regression method was very consistent, achieving accuracy scores within 2.75 % of the best classifier across regions and a receiver operator characteristic area under the curve of 0.92 or greater across all regions.

Conclusions

Our results indicate that the applied methods achieve accuracy scores sufficient to support our objective of extracting discrete features from radiology reports to enhance cohort identification in AudGenDB. The models described here are available in several free, open source libraries that make them more accessible and simplify their utilization as demonstrated in this work. We additionally implemented the models as a web service that accepts radiology report text in an HTTP request and provides the predicted region labels. This service has been used to label the reports in AudGenDB and is freely available.",2016-06-06 +33206959,The IMEx coronavirus interactome: an evolving map of Coronaviridae-host molecular interactions. ,"The current coronavirus disease of 2019 (COVID-19) pandemic, caused by the severe acute respiratory syndrome coronavirus (SARS-CoV)-2, has spurred a wave of research of nearly unprecedented scale. Among the different strategies that are being used to understand the disease and develop effective treatments, the study of physical molecular interactions can provide fine-grained resolution of the mechanisms behind the virus biology and the human organism response. We present a curated dataset of physical molecular interactions focused on proteins from SARS-CoV-2, SARS-CoV-1 and other members of the Coronaviridae family that has been manually extracted by International Molecular Exchange (IMEx) Consortium curators. Currently, the dataset comprises over 4400 binarized interactions extracted from 151 publications. The dataset can be accessed in the standard formats recommended by the Proteomics Standards Initiative (HUPO-PSI) at the IntAct database website (https://www.ebi.ac.uk/intact) and will be continuously updated as research on COVID-19 progresses.",2020-01-01 +32674732,Ligand and Structure-based Virtual Screening of Lamiaceae Diterpenes with Potential Activity against a Novel Coronavirus (2019-nCoV).,"

Background

The emergence of a new coronavirus (CoV), named 2019-nCoV, as an outbreak originated in the city of Wuhan, China, has resulted in the death of more than 3,400 people this year alone and has caused worldwide an alarming situation, particularly following previous CoV epidemics, including the Severe Acute Respiratory Syndrome (SARS) in 2003 and the Middle East Respiratory Syndrome (MERS) in 2012. Currently, no exists for infections caused by CoVs; however, some natural products may represent potential treatment resources, such as those that contain diterpenes.

Objective

This study aimed to use computational methods to perform a virtual screening (VS) of candidate diterpenes with the potential to act as CoV inhibitors.

Methods

1,955 diterpenes, derived from the Nepetoideae subfamily (Lamiaceae), were selected using the SistematX tool (https://sistematx.ufpb.br), which were used to make predictions. From the ChEMBL database, 3 sets of chemical structures were selected for the construction of predictive models.

Results

The chemical structures of molecules with known activity against SARS CoV, two of which were tested for activity against specific viral proteins and one of which was tested for activity against the virus itself, were classified according to their pIC50 values [-log IC50 (mol/l)].

Conclusion

In the consensus analysis approach, combining both ligand- and structure-based VSs, 19 compounds were selected as potential CoV inhibitors, including isotanshinone IIA (01), tanshinlactone (02), isocryptotanshinone (03), and tanshinketolactone (04), which did not present toxicity within the evaluated parameters.",2020-01-01 +32608478,"RNAWRE: a resource of writers, readers and erasers of RNA modifications. ","RNA modifications are involved in various kinds of cellular biological processes. Accumulated evidences have demonstrated that the functions of RNA modifications are determined by the effectors that can catalyze, recognize and remove RNA modifications. They are called 'writers', 'readers' and 'erasers'. The identification of RNA modification effectors will be helpful for understanding the regulatory mechanisms and biological functions of RNA modifications. In this work, we developed a database called RNAWRE that specially deposits RNA modification effectors. The current version of RNAWRE stored 2045 manually curated writers, readers and erasers for the six major kinds of RNA modifications, namely Cap, m1A, m6A, m5C, ψ and Poly A. The main modules of RNAWRE not only allow browsing and downloading the RNA modification effectors but also support the BLAST search of the potential RNA modification effectors in other species. We hope that RNAWRE will be helpful for the researches on RNA modifications. Database URL: http://rnawre.bio2db.com.",2020-01-01 +32449511,A strategy for large-scale comparison of evolutionary- and reaction-based classifications of enzyme function. ,"Determining the molecular function of enzymes discovered by genome sequencing represents a primary foundation for understanding many aspects of biology. Historically, classification of enzyme reactions has used the enzyme nomenclature system developed to describe the overall reactions performed by biochemically characterized enzymes, irrespective of their associated sequences. In contrast, functional classification and assignment for the millions of protein sequences of unknown function now available is largely done in two computational steps, first by similarity-based assignment of newly obtained sequences to homologous groups, followed by transferring to them the known functions of similar biochemically characterized homologs. Due to the fundamental differences in their etiologies and practice, `how' these chemistry- and evolution-centric functional classification systems relate to each other has been difficult to explore on a large scale. To investigate this issue in a new way, we integrated two published ontologies that had previously described each of these classification systems independently. The resulting infrastructure was then used to compare the functional assignments obtained from each classification system for the well-studied and functionally diverse enolase superfamily. Mapping these function assignments to protein structure and reaction similarity networks shows a profound and complex disconnect between the homology- and chemistry-based classification systems. This conclusion mirrors previous observations suggesting that except for closely related sequences, facile annotation transfer from small numbers of characterized enzymes to the huge number uncharacterized homologs to which they are related is problematic. Our extension of these comparisons to large enzyme superfamilies in a computationally intelligent manner provides a foundation for new directions in protein function prediction for the huge proportion of sequences of unknown function represented in major databases. Interactive sequence, reaction, substrate and product similarity networks computed for this work for the enolase and two other superfamilies are freely available for download from the Structure Function Linkage Database Archive (http://sfld.rbvi.ucsf.edu).",2020-01-01 +32416689,Potential Prognostic Predictors and Molecular Targets for Skin Melanoma Screened by Weighted Gene Co-expression Network Analysis.,"

Aims and objectives

Among skin cancers, malignant skin melanoma is the leading cause of death. Identification of gene markers of malignant skin melanoma associated with survival may provide new clues for prognosis prediction and treatment. This research aimed to screen out potential prognostic predictors and molecular targets for malignant skin melanoma.

Introduction

Information regarding gene expression in skin melanoma and patients' clinical traits was obtained from the Gene Expression Omnibus database. Weighted gene co-expression network analysis (WGCNA) was applied to build co-expression modules and investigate the association between the modules and clinical traits. Moreover, functional enrichment analysis was performed for clinically significant co-expression modules. Hub genes of these modules were validated via Gene Expression Profiling Interactive Analysis (GEPIA) and the Human Protein Atlas (http:// www.proteinatlas.org).

Methods

First, using WGCNA, 9 co-expression modules were constructed by the top 25% differentially expressed genes (4406 genes) from 77 human melanoma samples. Two co-expression modules (magenta and blue modules) were significantly correlated with survival months (r = -0.27, p = 0.02; r = 0.27, p = 0.02, respectively). The results of functional enrichment analysis demonstrated that the magenta module was mainly enriched in the cell cycle process and the blue module was mainly enriched in the immune response process. Additionally, the GEPIA and Human Protein Atlas results suggested that the hub genes CCNB2, ARHGAP30, and SEMA4D were associated with relapse-free survival and overall survival (all p-values < 0.05) and were differentially expressed in melanoma tumors and normal skin.

Results and conclusion

The results provided the framework of co-expression gene modules of skin melanoma and screened out CCNB2, ARHGAP30, and SEMA4D associated with survival as potential prognostic predictors and molecular targets of treatment.",2020-01-01 +31942979,Phenotype-genotype network construction and characterization: a case study of cardiovascular diseases and associated non-coding RNAs. ,"The phenotype-genotype relationship is a key for personalized and precision medicine for complex diseases. To unravel the complexity of the clinical phenotype-genotype network, we used cardiovascular diseases (CVDs) and associated non-coding RNAs (ncRNAs) (i.e. miRNAs, long ncRNAs, etc.) as the case for the study of CVDs at a systems or network level. We first integrated a database of CVDs and ncRNAs (CVDncR, http://sysbio.org.cn/cvdncr/) to construct CVD-ncRNA networks and annotate their clinical associations. To characterize the networks, we then separated the miRNAs into two groups, i.e. universal miRNAs associated with at least two types of CVDs and specific miRNAs related only to one type of CVD. Our analyses indicated two interesting patterns in these CVD-ncRNA networks. First, scale-free features were present within both CVD-miRNA and CVD-lncRNA networks; second, universal miRNAs were more likely to be CVDs biomarkers. These results were confirmed by computational functional analyses. The findings offer theoretical guidance for decoding CVD-ncRNA associations and will facilitate the screening of CVD ncRNA biomarkers. Database URL: http://sysbio.org.cn/cvdncr/.",2020-01-01 +31906604,The 27th annual Nucleic Acids Research database issue and molecular biology database collection.,"The 2020 Nucleic Acids Research Database Issue contains 148 papers spanning molecular biology. They include 59 papers reporting on new databases and 79 covering recent changes to resources previously published in the issue. A further ten papers are updates on databases most recently published elsewhere. This issue contains three breakthrough articles: AntiBodies Chemically Defined (ABCD) curates antibody sequences and their cognate antigens; SCOP returns with a new schema and breaks away from a purely hierarchical structure; while the new Alliance of Genome Resources brings together a number of Model Organism databases to pool knowledge and tools. Major returning nucleic acid databases include miRDB and miRTarBase. Databases for protein sequence analysis include CDD, DisProt and ELM, alongside no fewer than four newcomers covering proteins involved in liquid-liquid phase separation. In metabolism and signaling, Pathway Commons, Reactome and Metabolights all contribute papers. PATRIC and MicroScope update in microbial genomes while human and model organism genomics resources include Ensembl, Ensembl genomes and UCSC Genome Browser. Immune-related proteins are covered by updates from IPD-IMGT/HLA and AFND, as well as newcomers VDJbase and OGRDB. Drug design is catered for by updates from the IUPHAR/BPS Guide to Pharmacology and the Therapeutic Target Database. The entire Database Issue is freely available online on the Nucleic Acids Research website (https://academic.oup.com/nar). The NAR online Molecular Biology Database Collection has been revised, updating 305 entries, adding 65 new resources and eliminating 125 discontinued URLs; so bringing the current total to 1637 databases. It is available at http://www.oxfordjournals.org/nar/database/c/.",2020-01-01 +31724701,Exposome-Explorer 2.0: an update incorporating candidate dietary biomarkers and dietary associations with cancer risk.,"Exposome-Explorer (http://exposome-explorer.iarc.fr) is a database of dietary and pollutant biomarkers measured in population studies. In its first release, Exposome-Explorer contained comprehensive information on 692 biomarkers of dietary and pollution exposures extracted from the analysis of 480 peer-reviewed publications. Today, Exposome-Explorer has been further expanded and contains a total of 908 biomarkers. Two additional types of information have been collected. First, 185 candidate dietary biomarkers having 403 associations with food intake (as measured by metabolomic studies) have been identified and added. Second, 1356 associations between dietary biomarkers and cancer risk in epidemiological studies, which were collected from 313 publications, have also been added to the database. Classifications for both foods and compounds have been revised, and new classifications for biospecimens, analytical methods and cancers have been implemented. Finally, the web interface has been redesigned to significantly improve the user experience.",2020-01-01 +31713636,DisProt: intrinsic protein disorder annotation in 2020.,"The Database of Protein Disorder (DisProt, URL: https://disprot.org) provides manually curated annotations of intrinsically disordered proteins from the literature. Here we report recent developments with DisProt (version 8), including the doubling of protein entries, a new disorder ontology, improvements of the annotation format and a completely new website. The website includes a redesigned graphical interface, a better search engine, a clearer API for programmatic access and a new annotation interface that integrates text mining technologies. The new entry format provides a greater flexibility, simplifies maintenance and allows the capture of more information from the literature. The new disorder ontology has been formalized and made interoperable by adopting the OWL format, as well as its structure and term definitions have been improved. The new annotation interface has made the curation process faster and more effective. We recently showed that new DisProt annotations can be effectively used to train and validate disorder predictors. We believe the growth of DisProt will accelerate, contributing to the improvement of function and disorder predictors and therefore to illuminate the 'dark' proteome.",2020-01-01 +31696234,BiGG Models 2020: multi-strain genome-scale models and expansion across the phylogenetic tree.,"The BiGG Models knowledge base (http://bigg.ucsd.edu) is a centralized repository for high-quality genome-scale metabolic models. For the past 12 years, the website has allowed users to browse and search metabolic models. Within this update, we detail new content and features in the repository, continuing the original effort to connect each model to genome annotations and external databases as well as standardization of reactions and metabolites. We describe the addition of 31 new models that expand the portion of the phylogenetic tree covered by BiGG Models. We also describe new functionality for hosting multi-strain models, which have proven to be insightful in a variety of studies centered on comparisons of related strains. Finally, the models in the knowledge base have been benchmarked using Memote, a new community-developed validator for genome-scale models to demonstrate the improving quality and transparency of model content in BiGG Models.",2020-01-01 +31647101,miRTarBase 2020: updates to the experimentally validated microRNA-target interaction database.,"MicroRNAs (miRNAs) are small non-coding RNAs (typically consisting of 18-25 nucleotides) that negatively control expression of target genes at the post-transcriptional level. Owing to the biological significance of miRNAs, miRTarBase was developed to provide comprehensive information on experimentally validated miRNA-target interactions (MTIs). To date, the database has accumulated >13,404 validated MTIs from 11,021 articles from manual curations. In this update, a text-mining system was incorporated to enhance the recognition of MTI-related articles by adopting a scoring system. In addition, a variety of biological databases were integrated to provide information on the regulatory network of miRNAs and its expression in blood. Not only targets of miRNAs but also regulators of miRNAs are provided to users for investigating the up- and downstream regulations of miRNAs. Moreover, the number of MTIs with high-throughput experimental evidence increased remarkably (validated by CLIP-seq technology). In conclusion, these improvements promote the miRTarBase as one of the most comprehensively annotated and experimentally validated miRNA-target interaction databases. The updated version of miRTarBase is now available at http://miRTarBase.cuhk.edu.cn/.",2020-01-01 +31586406,YEASTRACT+: a portal for cross-species comparative genomics of transcription regulation in yeasts.,"The YEASTRACT+ information system (http://YEASTRACT-PLUS.org/) is a wide-scope tool for the analysis and prediction of transcription regulatory associations at the gene and genomic levels in yeasts of biotechnological or human health relevance. YEASTRACT+ is a new portal that integrates the previously existing YEASTRACT (http://www.yeastract.com/) and PathoYeastract (http://pathoyeastract.org/) databases and introduces the NCYeastract (Non-Conventional Yeastract) database (http://ncyeastract.org/), focused on the so-called non-conventional yeasts. The information in the YEASTRACT database, focused on Saccharomyces cerevisiae, was updated. PathoYeastract was extended to include two additional pathogenic yeast species: Candida parapsilosis and Candida tropicalis. Furthermore, the NCYeastract database was created, including five biotechnologically relevant yeast species: Zygosaccharomyces baillii, Kluyveromyces lactis, Kluyveromyces marxianus, Yarrowia lipolytica and Komagataella phaffii. The YEASTRACT+ portal gathers 289 706 unique documented regulatory associations between transcription factors (TF) and target genes and 420 DNA binding sites, considering 247 TFs from 10 yeast species. YEASTRACT+ continues to make available tools for the prediction of the TFs involved in the regulation of gene/genomic expression. In this release, these tools were upgraded to enable predictions based on orthologous regulatory associations described for other yeast species, including two new tools for cross-species transcription regulation comparison, based on multi-species promoter and TF regulatory network analyses.",2020-01-01 +31410491,The ABCD database: a repository for chemically defined antibodies.,"The ABCD (for AntiBodies Chemically Defined) database is a repository of sequenced antibodies, integrating curated information about the antibody and its antigen with cross-links to standardized databases of chemical and protein entities. It is freely available to the academic community, accessible through the ExPASy server (https://web.expasy.org/abcd/). The ABCD database aims at helping to improve reproducibility in academic research by providing a unique, unambiguous identifier associated to each antibody sequence. It also allows to determine rapidly if a sequenced antibody is available for a given antigen.",2020-01-01 +32783535,A Quantitative Meta-Analysis of the Relation between Occupational Benzene Exposure and Biomarkers of Cytogenetic Damage.,"

Background

The genotoxicity of benzene has been investigated in dozens of biomonitoring studies, mainly by studying (classical) chromosomal aberrations (CAs) or micronuclei (MN) as markers of DNA damage. Both have been shown to be predictive of future cancer risk in cohort studies and could, therefore, potentially be used for risk assessment of genotoxicity-mediated cancers.

Objectives

We sought to estimate an exposure-response curve (ERC) and quantify between-study heterogeneity using all available quantitative evidence on the cytogenetic effects of benzene exposure on CAs and MN respectively.

Methods

We carried out a systematic literature review and summarized all available data of sufficient quality using meta-analyses. We assessed the heterogeneity in slope estimates between studies and conducted additional sensitivity analyses to assess how various study characteristics impacted the estimated ERC.

Results

Sixteen CA (1,356 individuals) and 13 MN studies (2,097 individuals) were found to be eligible for inclusion in a meta-analysis. Studies where benzene was the primary genotoxic exposure and that had adequate assessment of both exposure and outcomes were used for the primary analysis. Estimated slope estimates were an increase of 0.27% CA [(95% CI: 0.08%, 0.47%); based on the results from 4 studies] and 0.27% MN [(95% CI: -0.23%, 0.76%); based on the results from 7 studies] per parts-per-million benzene exposure. We observed considerable between-study heterogeneity for both end points (I2>90%).

Discussion

Our study provides a systematic, transparent, and quantitative summary of the literature describing the strong association between benzene exposure and accepted markers of genotoxicity in humans. The derived consensus slope can be used as a best estimate of the quantitative relationship between real-life benzene exposure and genetic damage in future risk assessment. We also quantitate the large between-study heterogeneity that exists in this literature, a factor which is crucial for the interpretation of single-study or consensus slopes. https://doi.org/10.1289/EHP6404.",2020-08-12 +29993658,Submodular Generalized Matching for Peptide Identification in Tandem Mass Spectrometry.,"

Motivation

Identification of spectra produced by a shotgun proteomics mass spectrometry experiment is commonly performed by searching the observed spectra against a peptide database. The heart of this search procedure is a score function that evaluates the quality of a hypothesized match between an observed spectrum and a theoretical spectrum corresponding to a particular peptide sequence. Accordingly, the success of a spectrum analysis pipeline depends critically upon this peptide-spectrum score function. We develop peptide-spectrum score functions that compute the maximum value of a submodular function under $m$ m matroid constraints. We call this procedure a submodular generalized matching (SGM) since it generalizes bipartite matching. We use a greedy algorithm to compute maximization, which can achieve a solution whose objective is guaranteed to be at least $\frac{1}{1+m}$ 1 1 + m of the true optimum. The advantage of the SGM framework is that known long-range properties of experimental spectra can be modeled by designing suitable submodular functions and matroid constraints. Experiments on four data sets from various organisms and mass spectrometry platforms show that the SGM approach leads to significantly improved performance compared to several state-of-the-art methods. Supplementary information, C++ source code, and data sets can be found at https://melodi-lab.github.io/SGM.",2018-04-02 +24573880,TSLP signaling pathway map: a platform for analysis of TSLP-mediated signaling.,"Thymic stromal lymphopoietin (TSLP) is a four-helix bundle cytokine that plays a critical role in the regulation of immune responses and in the differentiation of hematopoietic cells. TSLP signals through a heterodimeric receptor complex consisting of an interleukin-7 receptor α chain and a unique TSLP receptor (TSLPR) [also known as cytokine receptor-like factor 2 (CRLF2)]. Cellular targets of TSLP include dendritic cells, B cells, mast cells, regulatory T (Treg) cells and CD4+ and CD8+ T cells. The TSLP/TSLPR axis can activate multiple signaling transduction pathways including the JAK/STAT pathway and the PI-3 kinase pathway. Aberrant TSLP/TSLPR signaling has been associated with a variety of human diseases including asthma, atopic dermatitis, nasal polyposis, inflammatory bowel disease, eosinophilic eosophagitis and, most recently, acute lymphoblastic leukemia. A centralized resource of the TSLP signaling pathway cataloging signaling events is not yet available. In this study, we present a literature-annotated resource of reactions in the TSLP signaling pathway. This pathway map is publicly available through NetPath (http://www.netpath.org/), an open access signal transduction pathway resource developed previously by our group. This map includes 236 molecules and 252 reactions that are involved in TSLP/TSLPR signaling pathway. We expect that the TSLP signaling pathway map will provide a rich resource to study the biology of this important cytokine as well as to identify novel therapeutic targets for diseases associated with dysregulated TSLP/TSLPR signaling. Database URL: http://www.netpath.org/pathways?path_id=NetPath_24.",2014-02-25 +32256008,Life science database cross search: A single window system for dispersed biological databases.,"A comprehensive search system for the bioscience databases is in progress. We constructed a search service, Life science database cross search system (https://biosciencedbc.jp/dbsearch/index. php?lang=en) by integrating numerous biomedical databases using database crawling algorithms. The described system integrates 600 databases containing over 90 million entries indexed for biomedical research and development.",2019-12-31 +32950162,"Predicting growth of Listeria monocytogenes at dynamic conditions during manufacturing, ripening and storage of cheeses - Evaluation and application of models.","Mathematical models were evaluated to predict growth of L. monocytogenes in mould/smear-ripened cheeses with measured dynamic changes in product characteristics and storage conditions. To generate data for model evaluation three challenge tests were performed with mould-ripened cheeses produced by using milk inoculated with L. monocytogenes. Growth of L. monocytogenes and lactic acid bacteria (LAB) in the rind and in the core of cheeses were quantified together with changes in product characteristics over time (temperature, pH, NaCl/aw, lactic- and acetic acid concentrations). The performance of nine available L. monocytogenes growth models was evaluated using growth responses from the present study and from literature together with the determined or reported dynamic product characteristics and storage conditions (46 kinetics). The acceptable simulation zone (ASZ) method was used to assess model performance. A reduced version of the Martinez-Rios et al. (2019) model (https://doi.org/10.3389/fmicb.2019.01510) and the model of Østergaard et al. (2014) (https://doi.org/10.1016/j.ijfoodmicro.2014.07.012) had acceptable performance with a ASZ-score of 71-70% for L. monocytogenes growth in mould/smear-ripened cheeses. Models from Coroller et al. (2012) (https://doi.org/10.1016/j.ijfoodmicro.2011.09.023) had close to acceptable performance with ASZ-scores of 67-69%. The validated models (Martinez-Rios et al., 2019; Østergaard et al., 2014) can be used to facilitate the evaluation of time to critical L. monocytogenes growth for mould/smear-ripened cheeses including modification of recipes with for example reduced salt/sodium or to support exposure assessment studies for these cheeses.",2020-06-26 +29236971,New algorithms to represent complex pseudoknotted RNA structures in dot-bracket notation.,"Motivation:Understanding the formation, architecture and roles of pseudoknots in RNA structures are one of the most difficult challenges in RNA computational biology and structural bioinformatics. Methods predicting pseudoknots typically perform this with poor accuracy, often despite experimental data incorporation. Existing bioinformatic approaches differ in terms of pseudoknots' recognition and revealing their nature. A few ways of pseudoknot classification exist, most common ones refer to a genus or order. Following the latter one, we propose new algorithms that identify pseudoknots in RNA structure provided in BPSEQ format, determine their order and encode in dot-bracket-letter notation. The proposed encoding aims to illustrate the hierarchy of RNA folding. Results:New algorithms are based on dynamic programming and hybrid (combining exhaustive search and random walk) approaches. They evolved from elementary algorithm implemented within the workflow of RNA FRABASE 1.0, our database of RNA structure fragments. They use different scoring functions to rank dissimilar dot-bracket representations of RNA structure. Computational experiments show an advantage of new methods over the others, especially for large RNA structures. Availability and implementation:Presented algorithms have been implemented as new functionality of RNApdbee webserver and are ready to use at http://rnapdbee.cs.put.poznan.pl. Contact:mszachniuk@cs.put.poznan.pl. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +28449106,"MicrobiomeAnalyst: a web-based tool for comprehensive statistical, visual and meta-analysis of microbiome data.","The widespread application of next-generation sequencing technologies has revolutionized microbiome research by enabling high-throughput profiling of the genetic contents of microbial communities. How to analyze the resulting large complex datasets remains a key challenge in current microbiome studies. Over the past decade, powerful computational pipelines and robust protocols have been established to enable efficient raw data processing and annotation. The focus has shifted toward downstream statistical analysis and functional interpretation. Here, we introduce MicrobiomeAnalyst, a user-friendly tool that integrates recent progress in statistics and visualization techniques, coupled with novel knowledge bases, to enable comprehensive analysis of common data outputs produced from microbiome studies. MicrobiomeAnalyst contains four modules - the Marker Data Profiling module offers various options for community profiling, comparative analysis and functional prediction based on 16S rRNA marker gene data; the Shotgun Data Profiling module supports exploratory data analysis, functional profiling and metabolic network visualization of shotgun metagenomics or metatranscriptomics data; the Taxon Set Enrichment Analysis module helps interpret taxonomic signatures via enrichment analysis against >300 taxon sets manually curated from literature and public databases; finally, the Projection with Public Data module allows users to visually explore their data with a public reference data for pattern discovery and biological insights. MicrobiomeAnalyst is freely available at http://www.microbiomeanalyst.ca.",2017-07-01 +31487093,Genetic variation and temperature affects hybrid barriers during interspecific hybridization.,"Genomic imprinting regulates parent-specific transcript dosage during seed development and is mainly confined to the endosperm. Elucidation of the function of many imprinted genes has been hampered by the lack of corresponding mutant phenotypes, and the role of imprinting is mainly associated with genome dosage regulation or allocation of resources. Disruption of imprinted genes has also been suggested to mediate endosperm-based post-zygotic hybrid barriers depending on genetic variation and gene dosage. Here, we have analyzed the conservation of a clade from the MADS-box type I class transcription factors in the closely related species Arabidopsis arenosa, A. lyrata, and A. thaliana, and show that AGL36-like genes are imprinted and maternally expressed in seeds of Arabidopsis species and in hybrid seeds between outbreeding species. In hybridizations between outbreeding and inbreeding species the paternally silenced allele of the AGL36-like gene is reactivated in the hybrid, demonstrating that also maternally expressed imprinted genes are perturbed during hybridization and that such effects on imprinted genes are specific to the species combination. Furthermore, we also demonstrate a quantitative effect of genetic diversity and temperature on the strength of the post-zygotic hybridization barrier. Markedly, a small decrease in temperature during seed development increases the survival of hybrid F1 seeds, suggesting that abiotic and genetic parameters play important roles in post-zygotic species barriers, pointing at evolutionary scenarios favoring such effects. OPEN RESEARCH BADGES: This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA562212. All sequences generated in this study have been deposited in the National Center for Biotechnology Information Sequence Read Archive (https://www.ncbi.nlm.nih.gov/sra/) with project number PRJNA562212.",2019-10-12 +29659718,A sequence family database built on ECOD structural domains.,"Motivation:The ECOD database classifies protein domains based on their evolutionary relationships, considering both remote and close homology. The family group in ECOD provides classification of domains that are closely related to each other based on sequence similarity. Due to different perspectives on domain definition, direct application of existing sequence domain databases, such as Pfam, to ECOD struggles with several shortcomings. Results:We created multiple sequence alignments and profiles from ECOD domains with the help of structural information in alignment building and boundary delineation. We validated the alignment quality by scoring structure superposition to demonstrate that they are comparable to curated seed alignments in Pfam. Comparison to Pfam and CDD reveals that 27 and 16% of ECOD families are new, but they are also dominated by small families, likely because of the sampling bias from the PDB database. There are 35 and 48% of families whose boundaries are modified comparing to counterparts in Pfam and CDD, respectively. Availability and implementation:The new families are now integrated in the ECOD website. The aggregate HMMER profile library and alignment are available for download on ECOD website (http://prodata.swmed.edu/ecod). Supplementary information:Supplementary data are available at Bioinformatics online.",2018-09-01 +26113876,"Wanderer, an interactive viewer to explore DNA methylation and gene expression data in human cancer.","

Background

The Cancer Genome Atlas (TCGA) offers a multilayered view of genomics and epigenomics data of many human cancer types. However, the retrieval of expression and methylation data from TCGA is a cumbersome and time-consuming task.

Results

Wanderer is an intuitive Web tool allowing real time access and visualization of gene expression and DNA methylation profiles from TCGA. Given a gene query and selection of a TCGA dataset (e.g., colon adenocarcinomas), the Web resource provides the expression profile, at the single exon level, and the DNA methylation levels of HumanMethylation450 BeadChip loci inside or in the vicinity of the queried gene. Graphic and table outputs include individual and summary data as well as statistical tests, allowing the comparison of tumor and normal profiles and the exploration along the genomic locus and across tumor collections.

Conclusions

Wanderer offers a simple interface to straightforward access to TCGA data, amenable to experimentalists and clinicians without bioinformatics expertise. Wanderer may be accessed at http://maplab.cat/wanderer.",2015-06-23 +31886587,Denture use and osteoradionecrosis following radiotherapy for head and neck cancer: A systematic review.,"BACKGROUND:It is strongly recommended to extract teeth with poor prognosis in head and neck cancer (HaNC) patients prior starting treatment with radiotherapy to avoid need for extraction post-radiotherapy and prevent development of osteoradionecrosis (ORN). Dental extraction means that patients are often left with insufficient teeth leading to psychological problems and reducing their quality of life post-radiotherapy. Some clinicians do not advocate the use of dentures in HaNC patients claiming that dentures might lead to soft tissue irritation followed by ORN when constructed on irradiated jaws. AIMS:This systematic review aimed to investigate the existing evidence regarding the impact of denture use on the development of ORN in HaNC patients post-radiotherapy. METHODS:This systematic review followed the Preferred Reporting Item for Systematic Review and Meta-analyses (PRISMA) guideline. Three database systems were used: Ovid Medline, EMBASE and PsycINFO. PROSPERO was searched for ongoing or recently completed systematic reviews. The https://ClinicalTrials.gov was searched for ongoing or recently completed trials. The Joanna Briggs Institute critical appraisal tools were used to assess quality of studies being reviewed. RESULTS:Only three retrospective case-control studies were included. Numbers of participants included in the three studies are limited with incomparable types of mucosal dentures. None of the studies described the method of measurement of the exposure (denture use) in a standard, validated and reliable way. CONCLUSION:The three included studies suggested no link between denture use and development of ORN. However, very little evidence exists and the robustness of the studies is questionable. Well-powered studies are needed.",2019-12-30 +30476000,Integrate multiple traits to detect novel trait-gene association using GWAS summary data with an adaptive test approach.,"

Motivation

Genetics hold great promise to precision medicine by tailoring treatment to the individual patient based on their genetic profiles. Toward this goal, many large-scale genome-wide association studies (GWAS) have been performed in the last decade to identify genetic variants associated with various traits and diseases. They have successfully identified tens of thousands of disease-related variants. However they have explained only a small proportion of the overall trait heritability for most traits and are of very limited clinical use. This is partly owing to the small effect sizes of most genetic variants, and the common practice of testing association between one trait and one genetic variant at a time in most GWAS, even when multiple related traits are often measured for each individual. Increasing evidence suggests that many genetic variants can influence multiple traits simultaneously, and we can gain more power by testing association of multiple traits simultaneously. It is appealing to develop novel multi-trait association test methods that need only GWAS summary data, since it is generally very hard to access the individual-level GWAS phenotype and genotype data.

Results

Many existing GWAS summary data-based association test methods have relied on ad hoc approach or crude Monte Carlo approximation. In this article, we develop rigorous statistical methods for efficient and powerful multi-trait association test. We develop robust and efficient methods to accurately estimate the marginal trait correlation matrix using only GWAS summary data. We construct the principal component (PC)-based association test from the summary statistics. PC-based test has optimal power when the underlying multi-trait signal can be captured by the first PC, and otherwise it will have suboptimal performance. We develop an adaptive test by optimally weighting the PC-based test and the omnibus chi-square test to achieve robust performance under various scenarios. We develop efficient numerical algorithms to compute the analytical P-values for all the proposed tests without the need of Monte Carlo sampling. We illustrate the utility of proposed methods through application to the GWAS meta-analysis summary data for multiple lipids and glycemic traits. We identify multiple novel loci that were missed by individual trait-based association test.

Availability and implementation

All the proposed methods are implemented in an R package available at http://www.github.com/baolinwu/MTAR. The developed R programs are extremely efficient: it takes less than 2 min to compute the list of genome-wide significant single nucleotide polymorphisms (SNPs) for all proposed multi-trait tests for the lipids GWAS summary data with 2.5 million SNPs on a single Linux desktop.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +26157623,Comprehensive comparison of large-scale tissue expression datasets.,"For tissues to carry out their functions, they rely on the right proteins to be present. Several high-throughput technologies have been used to map out which proteins are expressed in which tissues; however, the data have not previously been systematically compared and integrated. We present a comprehensive evaluation of tissue expression data from a variety of experimental techniques and show that these agree surprisingly well with each other and with results from literature curation and text mining. We further found that most datasets support the assumed but not demonstrated distinction between tissue-specific and ubiquitous expression. By developing comparable confidence scores for all types of evidence, we show that it is possible to improve both quality and coverage by combining the datasets. To facilitate use and visualization of our work, we have developed the TISSUES resource (http://tissues.jensenlab.org), which makes all the scored and integrated data available through a single user-friendly web interface.",2015-06-30 +31243432,HUNER: improving biomedical NER with pretraining.,"MOTIVATION:Several recent studies showed that the application of deep neural networks advanced the state-of-the-art in named entity recognition (NER), including biomedical NER. However, the impact on performance and the robustness of improvements crucially depends on the availability of sufficiently large training corpora, which is a problem in the biomedical domain with its often rather small gold standard corpora. RESULTS:We evaluate different methods for alleviating the data sparsity problem by pretraining a deep neural network (LSTM-CRF), followed by a rather short fine-tuning phase focusing on a particular corpus. Experiments were performed using 34 different corpora covering five different biomedical entity types, yielding an average increase in F1-score of ∼2 pp compared to learning without pretraining. We experimented both with supervised and semi-supervised pretraining, leading to interesting insights into the precision/recall trade-off. Based on our results, we created the stand-alone NER tool HUNER incorporating fully trained models for five entity types. On the independent CRAFT corpus, which was not used for creating HUNER, it outperforms the state-of-the-art tools GNormPlus and tmChem by 5-13 pp on the entity types chemicals, species and genes. AVAILABILITY AND IMPLEMENTATION:HUNER is freely available at https://hu-ner.github.io. HUNER comes in containers, making it easy to install and use, and it can be applied off-the-shelf to arbitrary texts. We also provide an integrated tool for obtaining and converting all 34 corpora used in our evaluation, including fixed training, development and test splits to enable fair comparisons in the future. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-01-01 +28039114,Model-based calculating tool for pollen-mediated gene flow frequencies in plants. ,"The potential social-economic and environmental impacts caused by transgene flow from genetically engineered (GE) crops have stimulated worldwide biosafety concerns. To determine transgene flow frequencies resulted from pollination is the first critical step for assessing such impacts, in addition to the determination of transgene expression and fitness in crop-wild hybrid descendants. Two methods are commonly used to estimate pollen-mediated gene flow (PMGF) frequencies: field experimenting and mathematical modeling. Field experiments can provide relatively accurate results but are time/resource consuming. Modeling offers an effective complement for PMGF experimental assessment. However, many published models describe PMGF by mathematical equations and are practically not easy to use. To increase the application of PMGF modeling for the estimation of transgene flow, we established a tool to calculate PMGF frequencies based on a quasi-mechanistic PMGF model for wind-pollination species. This tool includes a calculating program displayed by an easy-operating interface. PMGF frequencies of different plant species can be quickly calculated under different environmental conditions by including a number of biological and wind speed parameters that can be measured in the fields/laboratories or obtained from published data. The tool is freely available in the public domain (http://ecology.fudan.edu.cn/userfiles/cn/files/Tool_Manual.zip). Case studies including rice, wheat, and maize demonstrated similar results between the calculated frequencies based on this tool and those from published PMGF data. This PMGF calculating tool will provide useful information for assessing and monitoring social-economic and environmental impacts caused by transgene flow from GE crops. This tool can also be applied to determine the isolation distances between GE and non-GE crops in a coexistence agro-ecosystem, and to ensure the purity of certified seeds by setting proper isolation distances among field production plots.",2016-12-30 +31340133,A Comparison of the Prevalence Rates of Language Impairment Before and After Response-to-Intervention Implementation.,"Purpose This research note presents a secondary data analysis of language impairment (LI) prevalence rates of children in public schools before and after a statewide mandate for response-to-intervention (RTI) implementation. Method Statewide and district-level LI prevalence rates were compared across 10 school years. Prevalence data from 67 school districts located in 1 state in the United States are reported as the proportion of the general student population (students ages 3-21 years) who were identified with a primary disability of LI. Results The mandated implementation of RTI within special education prereferral, evaluation, and eligibility processes coincided with significant changes in LI prevalence as a primary disability for most of the school districts. The majority of school districts experienced an increase in LI prevalence within 1 school year following RTI implementation. However, the degree and direction of change in prevalence rates varied across some of the school districts. Similar degrees of change were not evident across the other years of prevalence data review, suggesting the systemic change that occurred via RTI requirements coincided with fluctuations in the LI prevalence rates for the majority of school districts in the state. Conclusion A causal relation between RTI and LI prevalence cannot be established with the current data; however, this study establishes a temporal connection between the timing of RTI implementation and changes in LI prevalence in public schools of 1 very large state. Implications are presented for further research investigating the potential impact of systemic mandates on the identification of school-age children with LI. Supplemental Material https://doi.org/10.23641/asha.8968676.",2019-07-24 +31931895,TwinsMX: Uncovering the Basis of Health and Disease in the Mexican Population.,"TwinsMX is a national twin registry in Mexico recently created with institutional support from the Universidad Nacional Autónoma de México. It aims to serve as a platform to advance epidemiological and genetic research in the country and to disentangle the genetic and environmental contributions to health and disease in the admixed Mexican population. Here, we describe our recruitment and data collection strategies and discuss both the progress to date and future directions. More information about the registry is available on our website: https://twinsmxofficial.unam.mx/ (content in Spanish).",2019-12-01 +28498885,PROXiMATE: a database of mutant protein-protein complex thermodynamics and kinetics.,"

Summary

We have developed PROXiMATE, a database of thermodynamic data for more than 6000 missense mutations in 174 heterodimeric protein-protein complexes, supplemented with interaction network data from STRING database, solvent accessibility, sequence, structural and functional information, experimental conditions and literature information. Additional features include complex structure visualization, search and display options, download options and a provision for users to upload their data.

Availability and implementation

The database is freely available at http://www.iitm.ac.in/bioinfo/PROXiMATE/ . The website is implemented in Python, and supports recent versions of major browsers such as IE10, Firefox, Chrome and Opera.

Contact

gromiha@iitm.ac.in.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +30931570,A Network Module for the Perseus Software for Computational Proteomics Facilitates Proteome Interaction Graph Analysis.,"Proteomics data analysis strongly benefits from not studying single proteins in isolation but taking their multivariate interdependence into account. We introduce PerseusNet, the new Perseus network module for the biological analysis of proteomics data. Proteomics is commonly used to generate networks, e.g., with affinity purification experiments, but networks are also used to explore proteomics data. PerseusNet supports the biomedical researcher for both modes of data analysis with a multitude of activities. For affinity purification, a volcano-plot-based statistical analysis method for network generation is featured which is scalable to large numbers of baits. For posttranslational modifications of proteins, such as phosphorylation, a collection of dedicated network analysis tools helps in elucidating cellular signaling events. Co-expression network analysis of proteomics data adopts established tools from transcriptome co-expression analysis. PerseusNet is extensible through a plugin architecture in a multi-lingual way, integrating analyses in C#, Python, and R, and is freely available at http://www.perseus-framework.org .",2019-04-10 +32861878,HybridSucc: A Hybrid-learning Architecture for General and Species-specific Succinylation Site Prediction.,"As an important protein acylation modification, lysine succinylation (Ksucc) is involved in diverse biological processes, and participates in human tumorigenesis. Here, we collected 26,243 non-redundant known Ksucc sites from 13 species as the benchmark data set, combined 10 types of informative features, and implemented a hybrid-learning architecture by integrating deep-learning and conventional machine-learning algorithms into a single framework. We constructed a new tool named HybridSucc, which achieved area under curve (AUC) values of 0.885 and 0.952 for general and human-specific prediction of Ksucc sites, respectively. In comparison, the accuracy of HybridSucc was 17.84%-50.62% better than that of other existing tools. Using HybridSucc, we conducted a proteome-wide prediction and prioritized 370 cancer mutations that change Ksucc states of 218 important proteins, including PKM2, SHMT2, and IDH2. We not only developed a high-profile tool for predicting Ksucc sites, but also generated useful candidates for further experimental consideration. The online service of HybridSucc can be freely accessed for academic research at http://hybridsucc.biocuckoo.org/.",2020-04-01 +28453651,A PanorOmic view of personal cancer genomes.,"The massive molecular profiling of thousands of cancer patients has led to the identification of many tumor type specific driver genes. However, only a few (or none) of them are present in each individual tumor and, to enable precision oncology, we need to interpret the alterations found in a single patient. Cancer PanorOmics (http://panoromics.irbbarcelona.org) is a web-based resource to contextualize genomic variations detected in a personal cancer genome within the body of clinical and scientific evidence available for 26 tumor types, offering complementary cohort- and patient-centric views. Additionally, it explores the cellular environment of mutations by mapping them on the human interactome and providing quasi-atomic structural details, whenever available. This 'PanorOmic' molecular view of individual tumors, together with the appropriate genetic counselling and medical advice, should contribute to the identification of actionable alterations ultimately guiding the clinical decision-making process.",2017-07-01 +25516260,A database of circadian and diel rhythmic gene expression in the yellow fever mosquito Aedes aegypti.,"

Background

The mosquito species Aedes aegypti is the primary vector of many arboviral diseases, including dengue and yellow fevers, that are responsible for a large worldwide health burden. The biological rhythms of mosquitoes regulate many of the physiological processes and behaviors that influence the transmission of these diseases. For insight into the molecular basis of biological rhythms, diel and circadian gene expression profiling has been carried out for many species. To bring these resources to Aedes aegypti researchers, we used microarray technology to carry out a genome wide assessment of gene expression during the 24 hour light/dark (LD) cycle and during constant darkness (DD). The purpose of this report is to describe the methods, the validation of the results, and the organization of this database resource.

Description

The Aedes aegypti Circadian Database is a publicly accessible database that can be searched via a text-based query to visualize 44 hour temporal expression patterns of a given gene in Ae. aegypti heads under diel (observed under a 12 hour/12 hour LD cycle) and circadian (observed under DD) conditions. Profiles of gene expression under these conditions were assayed by Nimblegen 12-plex microarrays and rhythmicity was objectively assessed by the JTK_CYCLE algorithm. The output of the search is a graphical representation of the expression data along with computed period length, the time-of-day of gene expression peaks, and statistical determination for rhythmicity.

Conclusion

Our results show that at least 7.9% of the gene set present in the Aedes aegypti head are rhythmic under LD conditions and 6.7% can be considered circadian, oscillating under constant dark conditions. We present these results in the Aedes aegypti Circadian Database through Bioclock, a public website hosted by the University of Notre Dame at http://www.nd.edu/~bioclock/. This website allows searchable browsing of this quantitative gene expression information. The visualization allows for gene-by-gene comparison of transcript expression under both diel and circadian conditions, and the results are presented graphically in a plot profile of gene expression. The Ae. aegypti Circadian Database provides a community resource for observing diel and circadian fluctuations in gene expression across the Ae. aegypti genome.",2014-12-17 +25326323,The Comparative Toxicogenomics Database's 10th year anniversary: update 2015.,"Ten years ago, the Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) was developed out of a need to formalize, harmonize and centralize the information on numerous genes and proteins responding to environmental toxic agents across diverse species. CTD's initial approach was to facilitate comparisons of nucleotide and protein sequences of toxicologically significant genes by curating these sequences and electronically annotating them with chemical terms from their associated references. Since then, however, CTD has vastly expanded its scope to robustly represent a triad of chemical-gene, chemical-disease and gene-disease interactions that are manually curated from the scientific literature by professional biocurators using controlled vocabularies, ontologies and structured notation. Today, CTD includes 24 million toxicogenomic connections relating chemicals/drugs, genes/proteins, diseases, taxa, phenotypes, Gene Ontology annotations, pathways and interaction modules. In this 10th year anniversary update, we outline the evolution of CTD, including our increased data content, new 'Pathway View' visualization tool, enhanced curation practices, pilot chemical-phenotype results and impending exposure data set. The prototype database originally described in our first report has transformed into a sophisticated resource used actively today to help scientists develop and test hypotheses about the etiologies of environmentally influenced diseases.",2014-10-17 +27085938,"Local and traditional uses, phytochemistry, and pharmacology of Sophora japonica L.: A review.","

Ethnopharmacological relevance

Sophora japonica (Fabaceae), also known as Huai (Chinese: ), is a medium-sized deciduous tree commonly found in China, Japan, Korea, Vietnam, and other countries. The use of this plant has been recorded in classical medicinal treatises of ancient China, and it is currently recorded in both the Chinese Pharmacopoeia and European Pharmacopoeia. The flower buds and fruits of S. japonica, also known as Flos Sophorae Immaturus and Fructus Sophorae in China, are most commonly used in Asia (especially in China) to treat hemorrhoids, hematochezia, hematuria, hematemesis, hemorrhinia, uterine or intestinal hemorrhage, arteriosclerosis, headache, hypertension, dysentery, dizziness, and pyoderma. To discuss feasible trends for further research on S. japonica, this review highlights the botany, ethnopharmacology, phytochemistry, biological activities, and toxicology of S. japonica based on studies published in the last six decades.

Materials and methods

Information on the S. japonica was collected from major scientific databases (SciFinder, PubMed, Elsevier, SpringerLink, Web of Science, Google Scholar, Medline Plus, China Knowledge Resource Integrated (CNKI), and ""Da Yi Yi Xue Sou Suo (http://www.dayi100.com/login.jsp)"" for publications between 1957 and 2015 on S. japonica. Information was also obtained from local classic herbal literature, government reports, conference papers, as well as PhD and MSc dissertations.

Results

Approximately 153 chemical compounds, including flavonoids, isoflavonoids, triterpenes, alkaloids, polysaccharides, amino acids, and other compounds, have been isolated from the leaves, branches, flowers, buds, pericarps, and/or fruits of S. japonica. Among these compounds, several flavonoids and isoflavonoids comprise the active constituents of S. japonica, which exhibit a wide range of biological activities in vitro and in vivo such as anti-inflammatory, antibacterial, antiviral, anti-osteoporotic, antioxidant, radical scavenging, antihyperglycemic, antiobesity, antitumor, and hemostatic effects. Furthermore, flavonoids and isoflavonoids can be used as quality control markers for quality identification and evaluation of medicinal materials and their preparations. Information on evaluating the safety of S. japonica is very limited, so further study is required. To enable safer, more effective, and controllable therapeutic preparations, more in-depth information is urgently needed on the quality control, toxicology data, and clinical value of crude extract and active compounds of S. japonica.

Conclusions

S. japonica has long been used in traditional Chinese medicine (TCM) due to its wide range of biological activities, and is administered orally. Phytochemical and pharmacological studies of S. japonica have increased in the past few years, and the extract and active components of this plant can be used to develop new drugs based on their traditional application as well as their biological activities. Therefore, this review on the ethnopharmacology, phytochemistry, biological activities, and toxicity of S. japonica offers promising data for further studies as well as the commercial exploitation of this traditional medicine.",2016-04-13 +29036527,iSyTE 2.0: a database for expression-based gene discovery in the eye.,"Although successful in identifying new cataract-linked genes, the previous version of the database iSyTE (integrated Systems Tool for Eye gene discovery) was based on expression information on just three mouse lens stages and was functionally limited to visualization by only UCSC-Genome Browser tracks. To increase its efficacy, here we provide an enhanced iSyTE version 2.0 (URL: http://research.bioinformatics.udel.edu/iSyTE) based on well-curated, comprehensive genome-level lens expression data as a one-stop portal for the effective visualization and analysis of candidate genes in lens development and disease. iSyTE 2.0 includes all publicly available lens Affymetrix and Illumina microarray datasets representing a broad range of embryonic and postnatal stages from wild-type and specific gene-perturbation mouse mutants with eye defects. Further, we developed a new user-friendly web interface for direct access and cogent visualization of the curated expression data, which supports convenient searches and a range of downstream analyses. The utility of these new iSyTE 2.0 features is illustrated through examples of established genes associated with lens development and pathobiology, which serve as tutorials for its application by the end-user. iSyTE 2.0 will facilitate the prioritization of eye development and disease-linked candidate genes in studies involving transcriptomics or next-generation sequencing data, linkage analysis and GWAS approaches.",2018-01-01 +31845959,A framework for exhaustive modelling of genetic interaction patterns using Petri nets.,"

Motivation

Genetic interaction (GI) patterns are characterized by the phenotypes of interacting single and double mutated gene pairs. Uncovering the regulatory mechanisms of GIs would provide a better understanding of their role in biological processes, diseases and drug response. Computational analyses can provide insights into the underpinning mechanisms of GIs.

Results

In this study, we present a framework for exhaustive modelling of GI patterns using Petri nets (PN). Four-node models were defined and generated on three levels with restrictions, to enable an exhaustive approach. Simulations suggest ∼5 million models of GIs. Generalizing these we propose putative mechanisms for the GI patterns, inversion and suppression. We demonstrate that exhaustive PN modelling enables reasoning about mechanisms of GIs when only the phenotypes of gene pairs are known. The framework can be applied to other GI or genetic regulatory datasets.

Availability and implementation

The framework is available at http://www.ibi.vu.nl/programs/ExhMod.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +31667243,Data on corneal proteome and differentially expressed corneal proteins in highly myopic chicks using a data independent quantification approach.,"Myopia is an abnormal refractive status, explained by an excessive ocular lengthening mostly in posterior segments. Although growing evidence of anterior segments, specifically altered corneal geometries with biomechanical properties in myopes have been reported, the mechanism behind is poorly understood. We hereby prepared experimentally induced highly myopic chicks to investigate the molecular basis of corneal remodeling by applying a novel proteomic approach integrated with information dependent acquisition (IDA) and data independent quantification (SWATH-MS) analysis. As a result, differentially expressed protein biomarkers that might be involved in structural changes were screened based on the first of its kind unique chicken corneal proteome. All generated raw data from IDA and SWATH-MS are accessible at Peptide Atlas public repository (http://www.peptideatlas.org/PASS/PASS01410) for general release.",2019-09-04 +29106630,TranslatomeDB: a comprehensive database and cloud-based analysis platform for translatome sequencing data.,"Translation is a key regulatory step, linking transcriptome and proteome. Two major methods of translatome investigations are RNC-seq (sequencing of translating mRNA) and Ribo-seq (ribosome profiling). To facilitate the investigation of translation, we built a comprehensive database TranslatomeDB (http://www.translatomedb.net/) which provides collection and integrated analysis of published and user-generated translatome sequencing data. The current version includes 2453 Ribo-seq, 10 RNC-seq and their 1394 corresponding mRNA-seq datasets in 13 species. The database emphasizes the analysis functions in addition to the dataset collections. Differential gene expression (DGE) analysis can be performed between any two datasets of same species and type, both on transcriptome and translatome levels. The translation indices translation ratios, elongation velocity index and translational efficiency can be calculated to quantitatively evaluate translational initiation efficiency and elongation velocity, respectively. All datasets were analyzed using a unified, robust, accurate and experimentally-verifiable pipeline based on the FANSe3 mapping algorithm and edgeR for DGE analyzes. TranslatomeDB also allows users to upload their own datasets and utilize the identical unified pipeline to analyze their data. We believe that our TranslatomeDB is a comprehensive platform and knowledgebase on translatome and proteome research, releasing the biologists from complex searching, analyzing and comparing huge sequencing data without needing local computational power.",2018-01-01 +25540777,Comprehensive reconstruction and visualization of non-coding regulatory networks in human.,"Research attention has been powered to understand the functional roles of non-coding RNAs (ncRNAs). Many studies have demonstrated their deregulation in cancer and other human disorders. ncRNAs are also present in extracellular human body fluids such as serum and plasma, giving them a great potential as non-invasive biomarkers. However, non-coding RNAs have been relatively recently discovered and a comprehensive database including all of them is still missing. Reconstructing and visualizing the network of ncRNAs interactions are important steps to understand their regulatory mechanism in complex systems. This work presents ncRNA-DB, a NoSQL database that integrates ncRNAs data interactions from a large number of well established on-line repositories. The interactions involve RNA, DNA, proteins, and diseases. ncRNA-DB is available at http://ncrnadb.scienze.univr.it/ncrnadb/. It is equipped with three interfaces: web based, command-line, and a Cytoscape app called ncINetView. By accessing only one resource, users can search for ncRNAs and their interactions, build a network annotated with all known ncRNAs and associated diseases, and use all visual and mining features available in Cytoscape.",2014-12-10 +27392072,YCRD: Yeast Combinatorial Regulation Database.,"In eukaryotes, the precise transcriptional control of gene expression is typically achieved through combinatorial regulation using cooperative transcription factors (TFs). Therefore, a database which provides regulatory associations between cooperative TFs and their target genes is helpful for biologists to study the molecular mechanisms of transcriptional regulation of gene expression. Because there is no such kind of databases in the public domain, this prompts us to construct a database, called Yeast Combinatorial Regulation Database (YCRD), which deposits 434,197 regulatory associations between 2535 cooperative TF pairs and 6243 genes. The comprehensive collection of more than 2500 cooperative TF pairs was retrieved from 17 existing algorithms in the literature. The target genes of a cooperative TF pair (e.g. TF1-TF2) are defined as the common target genes of TF1 and TF2, where a TF's experimentally validated target genes were downloaded from YEASTRACT database. In YCRD, users can (i) search the target genes of a cooperative TF pair of interest, (ii) search the cooperative TF pairs which regulate a gene of interest and (iii) identify important cooperative TF pairs which regulate a given set of genes. We believe that YCRD will be a valuable resource for yeast biologists to study combinatorial regulation of gene expression. YCRD is available at http://cosbi.ee.ncku.edu.tw/YCRD/ or http://cosbi2.ee.ncku.edu.tw/YCRD/.",2016-07-08 +29744300,Gene expression profiles and pathway enrichment analysis of human osteosarcoma cells exposed to sorafenib.,"Sorafenib is an inhibitor of a variety of tyrosine kinase receptors used to treat various cancers including hepatocellular, renal cell and thyroid carcinoma. It has been shown to change various targets associated with osteosarcoma, but the detailed mechanism remains unclear. In order to identify key genes, enriched pathways and important modules during the exposure of human osteosarcoma cells to sorafenib, data for gene expression profiles (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE53155) were downloaded from the GEO database. In total, 61 differentially expressed genes (DEGs) were identified by the R bioconductor packages. Functional and enrichment analyses of DEGs were performed using the DAVID database. These revealed that DEGs were enriched in biological processes, molecular function and KEGG pathway of inflammatory immune response and angiogenesis. A protein-protein interaction network was constructed by string and visualized in cytoscape, and eight genes were selected as hubs: IL8,CXCL2,PTGS2,FOS,CXCL1, C3,EHMT2 and PGF. Subsequently, only one cluster was identified by mcode, which consisted of six nodes (CXCL1,CXCL2,PTGS2,FOS, C3 and PGF) and nine edges. PGF was the seed gene in this cluster. In conclusion, the results of this data mining and integration should help in revealing new mechanisms and targets of sorafenib in inhibiting osteosarcoma.",2018-04-24 +29059382,"dreamBase: DNA modification, RNA regulation and protein binding of expressed pseudogenes in human health and disease.","Although thousands of pseudogenes have been annotated in the human genome, their transcriptional regulation, expression profiles and functional mechanisms are largely unknown. In this study, we developed dreamBase (http://rna.sysu.edu.cn/dreamBase) to facilitate the investigation of DNA modification, RNA regulation and protein binding of potential expressed pseudogenes from multidimensional high-throughput sequencing data. Based on ∼5500 ChIP-seq and DNase-seq datasets, we identified genome-wide binding profiles of various transcription-associated factors around pseudogene loci. By integrating ∼18 000 RNA-seq data, we analysed the expression profiles of pseudogenes and explored their co-expression patterns with their parent genes in 32 cancers and 31 normal tissues. By combining microRNA binding sites, we demonstrated complex post-transcriptional regulation networks involving 275 microRNAs and 1201 pseudogenes. We generated ceRNA networks to illustrate the crosstalk between pseudogenes and their parent genes through competitive binding of microRNAs. In addition, we studied transcriptome-wide interactions between RNA binding proteins (RBPs) and pseudogenes based on 458 CLIP-seq datasets. In conjunction with epitranscriptome sequencing data, we also mapped 1039 RNA modification sites onto 635 pseudogenes. This database will provide insights into the transcriptional regulation, expression, functions and mechanisms of pseudogenes as well as their roles in biological processes and diseases.",2018-01-01 +26820405,The Biosurveillance Analytics Resource Directory (BARD): Facilitating the Use of Epidemiological Models for Infectious Disease Surveillance.,"Epidemiological modeling for infectious disease is important for disease management and its routine implementation needs to be facilitated through better description of models in an operational context. A standardized model characterization process that allows selection or making manual comparisons of available models and their results is currently lacking. A key need is a universal framework to facilitate model description and understanding of its features. Los Alamos National Laboratory (LANL) has developed a comprehensive framework that can be used to characterize an infectious disease model in an operational context. The framework was developed through a consensus among a panel of subject matter experts. In this paper, we describe the framework, its application to model characterization, and the development of the Biosurveillance Analytics Resource Directory (BARD; http://brd.bsvgateway.org/brd/), to facilitate the rapid selection of operational models for specific infectious/communicable diseases. We offer this framework and associated database to stakeholders of the infectious disease modeling field as a tool for standardizing model description and facilitating the use of epidemiological models.",2016-01-28 +31672983,"The Signaling Pathways Project, an integrated 'omics knowledgebase for mammalian cellular signaling pathways.","Mining of integrated public transcriptomic and ChIP-Seq (cistromic) datasets can illuminate functions of mammalian cellular signaling pathways not yet explored in the research literature. Here, we designed a web knowledgebase, the Signaling Pathways Project (SPP), which incorporates community classifications of signaling pathway nodes (receptors, enzymes, transcription factors and co-nodes) and their cognate bioactive small molecules. We then mapped over 10,000 public transcriptomic or cistromic experiments to their pathway node or biosample of study. To enable prediction of pathway node-gene target transcriptional regulatory relationships through SPP, we generated consensus 'omics signatures, or consensomes, which ranked genes based on measures of their significant differential expression or promoter occupancy across transcriptomic or cistromic experiments mapped to a specific node family. Consensomes were validated using alignment with canonical literature knowledge, gene target-level integration of transcriptomic and cistromic data points, and in bench experiments confirming previously uncharacterized node-gene target regulatory relationships. To expose the SPP knowledgebase to researchers, a web browser interface was designed that accommodates numerous routine data mining strategies. SPP is freely accessible at https://www.signalingpathways.org .",2019-10-31 +31880560,Subjective Evaluation of the Spectral Temporal SIMilarity (ST-SIM) Measure for Vibrotactile Quality Assessment.,"Recent standardization efforts for Tactile Internet (TI) and haptic codecs have paved the route for delivering tactile experiences in synchrony with audio and visual interaction components. Since humans are the ultimate consumers of tactile interactions, it is utmost important to develop objective quality assessment measures that are in close agreements with human perception. In this article, we present the results of a large-scale subjective study of a recently proposed objective quality assessment approach for vibrotactile signals called ST-SIM (Spectral Temporal SIMilarity). ST-SIM encompasses two components: perceptual spectral and temporal similarity measures. Two subjective experiments were conducted to validate ST-SIM, and elicited subjective ratings are used to create a VibroTactile Quality Assessment (VTQA) database. The VTQA database together with ST-SIM provide viable means to the development of vibrotactile compression and transmission applications. Our experimental results show that the ST-SIM highly correlates with human opinions in both experiments and significantly outperforms commonly used measures. The VTQA database is made publicly available at https://www.raniahassen.com/RESEARCH/.",2019-12-25 +32364431,Highlights from the 2019 European Congress on Treatment and Research in Multiple Sclerosis (ECTRIMS 2019).,"The 2019 ECTRIMS Congress, in Stockholm, has had record-breaking figures for both attendance and scientific production. There were 9361 participants from 100 different countries for a total of 1541 abstracts. Upon invitation of the European Committee for Treatment and Research in Multiple Sclerosis (ECTRIMS) executive committee, the authors of this meeting report assessed abstracts from all poster and oral presentations for novelty, scientific quality and relevance for basic and clinical multiple sclerosis (MS) research. The objective of this report is to highlight a selection of basic, translational and clinical studies out of the many outstanding projects that were presented. Abstracts and references cited in our report were chosen at the discretion of the authors and all co-authors and the ECTRIMS executive committee agreed on the selection. In the event of discrepancies between the abstract and the uploaded poster or presentation, we aimed to present data derived from the poster or presentation. All abstracts are accessible through the ECTRIMS online library ( https://onlinelibrary.ectrimscongress.eu/ectrims/#!*menu=36*browseby=3*sortby=2*ce_id=160 ) and also published in this journal (Volume 25 Issue 2_suppl, September 2019; https://journals.sagepub.com/toc/msja/25/2_suppl ). A few additional references from the literature were added but were restricted to the ones that authors considered as absolutely required for an optimized understanding of the topics highlighted.",2020-05-04 +30986271,ETDB-Caltech: A blockchain-based distributed public database for electron tomography.,"Three-dimensional electron microscopy techniques like electron tomography provide valuable insights into cellular structures, and present significant challenges for data storage and dissemination. Here we explored a novel method to publicly release more than 11,000 such datasets, more than 30 TB in total, collected by our group. Our method, based on a peer-to-peer file sharing network built around a blockchain ledger, offers a distributed solution to data storage. In addition, we offer a user-friendly browser-based interface, https://etdb.caltech.edu, for anyone interested to explore and download our data. We discuss the relative advantages and disadvantages of this system and provide tools for other groups to mine our data and/or use the same approach to share their own imaging datasets.",2019-04-15 +26652261,Combined de novo and genome guided assembly and annotation of the Pinus patula juvenile shoot transcriptome.,"

Background

Pines are the most important tree species to the international forestry industry, covering 42 % of the global industrial forest plantation area. One of the most pressing threats to cultivation of some pine species is the pitch canker fungus, Fusarium circinatum, which can have devastating effects in both the field and nursery. Investigation of the Pinus-F. circinatum host-pathogen interaction is crucial for development of effective disease management strategies. As with many non-model organisms, investigation of host-pathogen interactions in pine species is hampered by limited genomic resources. This was partially alleviated through release of the 22 Gbp Pinus taeda v1.01 genome sequence ( http://pinegenome.org/pinerefseq/ ) in 2014. Despite the fact that the fragmented state of the genome may hamper comprehensive transcriptome analysis, it is possible to leverage the inherent redundancy resulting from deep RNA sequencing with Illumina short reads to assemble transcripts in the absence of a completed reference sequence. These data can then be integrated with available genomic data to produce a comprehensive transcriptome resource. The aim of this study was to provide a foundation for gene expression analysis of disease response mechanisms in Pinus patula through transcriptome assembly.

Results

Eighteen de novo and two reference based assemblies were produced for P. patula shoot tissue. For this purpose three transcriptome assemblers, Trinity, Velvet/OASES and SOAPdenovo-Trans, were used to maximise diversity and completeness of assembled transcripts. Redundancy in the assembly was reduced using the EvidentialGene pipeline. The resulting 52 Mb P. patula v1.0 shoot transcriptome consists of 52 112 unigenes, 60 % of which could be functionally annotated.

Conclusions

The assembled transcriptome will serve as a major genomic resource for future investigation of P. patula and represents the largest gene catalogue produced to date for this species. Furthermore, this assembly can help detect gene-based genetic markers for P. patula and the comparative assembly workflow could be applied to generate similar resources for other non-model species.",2015-12-12 +32277129,A new tool CovReport generates easy-to-understand sequencing coverage summary for diagnostic reports.,"In order to properly interpret the results of a diagnostic gene panel sequencing test, gene coverage needs to be taken into consideration. If coverage is too low, an additional re-sequencing test is needed to make sure that a pathogenic variant is not missed. To facilitate the interpretation of coverage data, we designed CovReport, a novel easy-to-use visualization tool. CovReport generates a concise coverage summary that allows one-glance assessment of the sequencing test performance. Both gene-level and exon-level coverage can be immediately appreciated and taken into consideration for further medical decisions. CovReport does not require complex installation and can thus be easily implemented in any diagnostic laboratory setting. A user-friendly interface generates a graphic summary of coverage that can be directly included in the diagnostic report. In addition to a stand-alone version, we also provide a command line version of CovReport that can be integrated into any bioinformatics pipeline. This flexible tool is now part of routine sequencing analysis at the Department of Medical Genetics at La Timone Hospital (Marseille, France). CovReport is available at http://jdotsoft.com/CovReport.php. It is implemented in Java and supported on Windows, Mac OS X and Linux.",2020-04-10 +31534687,DNA metabarcoding adds valuable information for management of biodiversity in roadside stormwater ponds.,"

Abstract

Stormwater ponds are used to compensate for the adverse effects that road runoff might have on the natural environment. Depending on their design and placement, stormwater ponds can act as both refugia and traps for local biodiversity. To evaluate the impact of stormwater ponds on biodiversity, it is critical to use effective and precise methods for identification of life associated with the water body. DNA metabarcoding has recently become a promising tool for identification and assessment of freshwater biodiversity.Using both morphology and DNA metabarcoding, we analyze species richness and biological composition of samples from 12 stormwater ponds and investigate the impact of pond size and pollution levels in the sediments and water column on the macroinvertebrate community structure.DNA metabarcoding captured and identified more than twice the number of taxa compared to morphological identification. The (dis)similarity of macroinvertebrate community composition in different ponds showed that the ponds appear better separated in the results obtained by DNA metabarcoding, but that the explained variation is higher for the results obtained by morphologically identification, since it provides abundance data.The reliance on morphological methods has limited our perception of the aquatic biodiversity in response to anthropogenic stressors, thereby providing inaccurate information for appropriate design and management of stormwater ponds; these drawbacks can be overcome by DNA metabarcoding. Synthesis and applications. The results indicate that DNA metabarcoding is a useful tool in identifying species, especially Diptera, which are difficult to determine. Application of DNA metabarcoding greatly increases the number of species identified at each sampling site, thereby providing a more accurate information regarding the way the ponds function and how they are affected by management.

Open practices

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://www.ebi.ac.uk/ena/data/view/PRJEB30841.",2019-08-02 +31898035,"Interactive, Up-to-date Meta-Analysis of MRI in the Management of Men with Suspected Prostate Cancer.","The aim of this study was to test an interactive up-to-date meta-analysis (iu-ma) of studies on MRI in the management of men with suspected prostate cancer. Based on the findings of recently published systematic reviews and meta-analyses, two freely accessible dynamic meta-analyses (https://iu-ma.org) were designed using the programming language R in combination with the package ""shiny."" The first iu-ma compares the performance of the MRI-stratified pathway and the systematic transrectal ultrasound-guided biopsy pathway for the detection of clinically significant prostate cancer, while the second iu-ma focuses on the use of biparametric versus multiparametric MRI for the diagnosis of prostate cancer. Our iu-mas allow for the effortless addition of new studies and data, thereby enabling physicians to keep track of the most recent scientific developments without having to resort to classical static meta-analyses that may become outdated in a short period of time. Furthermore, the iu-mas enable in-depth subgroup analyses by a wide variety of selectable parameters. Such an analysis is not only tailored to the needs of the reader but is also far more comprehensive than a classical meta-analysis. In that respect, following multiple subgroup analyses, we found that even for various subgroups, detection rates of prostate cancer are not different between biparametric and multiparametric MRI. Secondly, we could confirm the favorable influence of MRI biopsy stratification for multiple clinical scenarios. For the future, we envisage the use of this technology in addressing further clinical questions of other organ systems.",2020-06-01 +32246820,A knowledge-based scoring function to assess quaternary associations of proteins.,"

Motivation

The elucidation of all inter-protein interactions would significantly enhance our knowledge of cellular processes at a molecular level. Given the enormity of the problem, the expenses and limitations of experimental methods, it is imperative that this problem is tackled computationally. In silico predictions of protein interactions entail sampling different conformations of the purported complex and then scoring these to assess for interaction viability. In this study, we have devised a new scheme for scoring protein-protein interactions.

Results

Our method, PIZSA (Protein Interaction Z-Score Assessment), is a binary classification scheme for identification of native protein quaternary assemblies (binders/nonbinders) based on statistical potentials. The scoring scheme incorporates residue-residue contact preference on the interface with per residue-pair atomic contributions and accounts for clashes. PIZSA can accurately discriminate between native and non-native structural conformations from protein docking experiments and outperform other contact-based potential scoring functions. The method has been extensively benchmarked and is among the top 6 methods, outperforming 31 other statistical, physics based and machine learning scoring schemes. The PIZSA potentials can also distinguish crystallization artifacts from biological interactions.

Availability and implementation

PIZSA is implemented as a web server at http://cospi.iiserpune.ac.in/pizsa and can be downloaded as a standalone package from http://cospi.iiserpune.ac.in/pizsa/Download/Download.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +30774402,Effect of kiwifruit on metabolic health in patients with cardiovascular risk factors: a systematic review and meta-analysis.,"

Background

Kiwifruit seems to have beneficial effect on metabolic health because it contains abundant phytochemicals and antioxidants. This study aimed to assess the effect of kiwifruit on metabolic health in participants with cardiovascular risk factors.

Methods

Literature was searched from PubMed, CENTRAL, Cumulative Index to Nursing and Allied Health Literature, Web of Science, Scopus, Proquest, Latin American and Carib-bean Health Sciences Literature, International Clinical Trials Registry Platform, Australia New Zealand Clinical Trials Registry, https://clinicaltrials.gov/, China National Knowledge Infrastructure, Wanfang Standards Database, European Association for the Study of Diabetes, and American Diabetes Association conferences up to August 2018. Citing references were manually searched. Randomized controlled trials were selected if they evaluated the effect of kiwifruit in patients with cardiovascular risk factors and reported SBP, DBP, total cholesterol (TC), triglyceride (TG), high-density lipoprotein cholesterol (HDL-C), low-density lipoprotein cholesterol (LDL-C), glycated hemoglobin (A1C), fasting plasma glucose (FPG), homeostasis model assessment of insulin resistance (HOMA-IR), 2-hour postprandial glucose, or body weight (BW). Data extraction and study quality assessment were performed independently by two investigators. Any inconsistencies were resolved by a third investigator. Treatment effect was estimated with mean difference (MD). Effect estimates were pooled using inverse-variance weighted method. Heterogeneity was assessed by the I 2 and Q statistic.

Results

Five randomized controlled trials involving 489 participants met the inclusion criteria. These included hypercholesterolemia, hypertension, type 2 diabetes mellitus, and male smokers. There was no effect of kiwifruit on SBP (MD, -1.72 mmHg; 95% CI: -4.27 to 0.84); DBP (MD, -2.35 mmHg; 95% CI: -5.10 to 0.41); TC (MD, -0.14 mmol/L; 95% CI: -0.71 to 0.43); TG (MD, -0.23 mmol/L; 95% CI: -0.66 to 0.20); LDL-C (MD, -0.41 mmol/L; 95% CI: -0.99 to 0.18); HDL-C (MD, 0.15 mmol/L; 95% CI: -0.18 to 0.48); FPG (MD, -0.08 mmol/L; 95% CI: -0.37 to 0.21); HOMA-IR (MD, -0.29; 95% CI: -0.61 to 0.02), and BW (MD, -1.08 kg; 95% CI: -4.22 to 2.05).

Conclusion

This meta-analysis suggested no effect of kiwifruit on metabolic health in patients with cardiovascular risk factors, although there seemed to be a trend of improvement after kiwifruit intervention.",2019-01-23 +32227201,FUpred: detecting protein domains through deep-learning-based contact map prediction.,"

Motivation

Protein domains are subunits that can fold and function independently. Correct domain boundary assignment is thus a critical step toward accurate protein structure and function analyses. There is, however, no efficient algorithm available for accurate domain prediction from sequence. The problem is particularly challenging for proteins with discontinuous domains, which consist of domain segments that are separated along the sequence.

Results

We developed a new algorithm, FUpred, which predicts protein domain boundaries utilizing contact maps created by deep residual neural networks coupled with coevolutionary precision matrices. The core idea of the algorithm is to retrieve domain boundary locations by maximizing the number of intra-domain contacts, while minimizing the number of inter-domain contacts from the contact maps. FUpred was tested on a large-scale dataset consisting of 2549 proteins and generated correct single- and multi-domain classifications with a Matthew's correlation coefficient of 0.799, which was 19.1% (or 5.3%) higher than the best machine learning (or threading)-based method. For proteins with discontinuous domains, the domain boundary detection and normalized domain overlapping scores of FUpred were 0.788 and 0.521, respectively, which were 17.3% and 23.8% higher than the best control method. The results demonstrate a new avenue to accurately detect domain composition from sequence alone, especially for discontinuous, multi-domain proteins.

Availability and implementation

https://zhanglab.ccmb.med.umich.edu/FUpred.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +32145017,HLPpred-Fuse: improved and robust prediction of hemolytic peptide and its activity by fusing multiple feature representation.,"MOTIVATION:Therapeutic peptides failing at clinical trials could be attributed to their toxicity profiles like hemolytic activity, which hamper further progress of peptides as drug candidates. The accurate prediction of hemolytic peptides (HLPs) and its activity from the given peptides is one of the challenging tasks in immunoinformatics, which is essential for drug development and basic research. Although there are a few computational methods that have been proposed for this aspect, none of them are able to identify HLPs and their activities simultaneously. RESULTS:In this study, we proposed a two-layer prediction framework, called HLPpred-Fuse, that can accurately and automatically predict both hemolytic peptides (HLPs or non-HLPs) as well as HLPs activity (high and low). More specifically, feature representation learning scheme was utilized to generate 54 probabilistic features by integrating six different machine learning classifiers and nine different sequence-based encodings. Consequently, the 54 probabilistic features were fused to provide sufficiently converged sequence information which was used as an input to extremely randomized tree for the development of two final prediction models which independently identify HLP and its activity. Performance comparisons over empirical cross-validation analysis, independent test and case study against state-of-the-art methods demonstrate that HLPpred-Fuse consistently outperformed these methods in the identification of hemolytic activity. AVAILABILITY AND IMPLEMENTATION:For the convenience of experimental scientists, a web-based tool has been established at http://thegleelab.org/HLPpred-Fuse. CONTACT:glee@ajou.ac.kr or watshara.sho@mahidol.ac.th or bala@ajou.ac.kr. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-06-01 +31220603,IMPRes-Pro: A high dimensional multiomics integration method for in silico hypothesis generation.,"Nowadays, large amounts of omics data have been generated and contributed to increasing knowledge about associated biological mechanisms. A new challenge coming along is how to identify the active pathways and extract useful insights from these data with huge background information and noise. Although biologically meaningful modules can often be detected by many existing informatics tools, it is still hard to interpret or make use of the results towards in silico hypothesis generation and testing. To address this gap, we previously developed the IMPRes (Integrative MultiOmics Pathway Resolution) v 1.0 algorithm, a new step-wise active pathway detection method using a dynamic programming approach. This approach enables the network detection one step at a time, making it easy for researchers to trace the pathways, and leading to more accurate drug design and more effective treatment strategies. In this paper, we present IMPRes-Pro, an enhancement to IMPRes v1.0 by integrating proteomics data along with transcriptomics data and constructing a heterogeneous background network. The evaluation experiment conducted on human primary breast cancer dataset has shown the advantage over the original IMPRes v1.0 method. Furthermore, a case study on human metastatic breast cancer dataset was performed and we have provided several insights regarding the selection of optimal therapy strategy. IMPRes-Pro algorithm and visualization tool is available as a web service at http://digbio.missouri.edu/impres.",2019-06-17 +31516934,RoCoLe: A robusta coffee leaf images dataset for evaluation of machine learning based methods in plant diseases recognition.,"In this article we introduce a robusta coffee leaf images dataset called RoCoLe. The dataset contains 1560 leaf images with visible red mites and spots (denoting coffee leaf rust presence) for infection cases and images without such structures for healthy cases. In addition, the data set includes annotations regarding objects (leaves), state (healthy and unhealthy) and the severity of disease (leaf area with spots). Images were all obtained in real-world conditions in the same coffee plants field using a smartphone camera. RoCoLe data set facilitates the evaluation of the performance of machine learning algorithms used in image segmentation and classification problems related to plant diseases recognition. The current dataset is freely and publicly available at https://doi.org/10.17632/c5yvn32dzg.2.",2019-08-19 +29262775,ShinyGPAS: interactive genomic prediction accuracy simulator based on deterministic formulas.,"

Background

Deterministic formulas for the accuracy of genomic predictions highlight the relationships among prediction accuracy and potential factors influencing prediction accuracy prior to performing computationally intensive cross-validation. Visualizing such deterministic formulas in an interactive manner may lead to a better understanding of how genetic factors control prediction accuracy.

Results

The software to simulate deterministic formulas for genomic prediction accuracy was implemented in R and encapsulated as a web-based Shiny application. Shiny genomic prediction accuracy simulator (ShinyGPAS) simulates various deterministic formulas and delivers dynamic scatter plots of prediction accuracy versus genetic factors impacting prediction accuracy, while requiring only mouse navigation in a web browser. ShinyGPAS is available at: https://chikudaisei.shinyapps.io/shinygpas/ .

Conclusion

ShinyGPAS is a shiny-based interactive genomic prediction accuracy simulator using deterministic formulas. It can be used for interactively exploring potential factors that influence prediction accuracy in genome-enabled prediction, simulating achievable prediction accuracy prior to genotyping individuals, or supporting in-class teaching. ShinyGPAS is open source software and it is hosted online as a freely available web-based resource with an intuitive graphical user interface.",2017-12-20 +32124269,Social Inequality and Diabetes: A Commentary.,"Socioeconomic inequality of access to healthcare is seen across the spectrum of healthcare, including diabetes. Health inequalities are defined as the 'preventable, unfair and unjust differences in health status between groups, populations or individuals that arise from the unequal distribution of social, environmental and economic conditions within societies, which determine the risk of people getting ill, their ability to prevent sickness or opportunities to take action and access treatment when ill health occurs' (NHS England; https://www.england.nhs.uk/about/equality/equality-hub/resources/). Access to diabetes technologies has improved glycaemic and quality-of-life outcomes for many users. Inability to access such devices, however, is evidenced in National Diabetes Audit data, with a reported tenfold variation in insulin pump use by people with type 1 diabetes across specialist centres. This variation suggests a lack of access to healthcare systems that should be investigated. This article highlights some of the key issues surrounding healthcare inequalities in the management of diabetes.",2020-03-02 +32552674,Deriving stratified effects from joint models investigating gene-environment interactions.,"

Background

Models including an interaction term and performing a joint test of SNP and/or interaction effect are often used to discover Gene-Environment (GxE) interactions. When the environmental exposure is a binary variable, analyses from exposure-stratified models which consist of estimating genetic effect in unexposed and exposed individuals separately can be of interest. In large-scale consortia focusing on GxE interactions in which only the joint test has been performed, it may be challenging to get summary statistics from both exposure-stratified and marginal (i.e not accounting for interaction) models.

Results

In this work, we developed a simple framework to estimate summary statistics in each stratum of a binary exposure and in the marginal model using summary statistics from the ""joint"" model. We performed simulation studies to assess our estimators' accuracy and examined potential sources of bias, such as correlation between genotype and exposure and differing phenotypic variances within exposure strata. Results from these simulations highlight the high theoretical accuracy of our estimators and yield insights into the impact of potential sources of bias. We then applied our methods to real data and demonstrate our estimators' retained accuracy after filtering SNPs by sample size to mitigate potential bias.

Conclusions

These analyses demonstrated the accuracy of our method in estimating both stratified and marginal summary statistics from a joint model of gene-environment interaction. In addition to facilitating the interpretation of GxE screenings, this work could be used to guide further functional analyses. We provide a user-friendly Python script to apply this strategy to real datasets. The Python script and documentation are available at https://gitlab.pasteur.fr/statistical-genetics/j2s.",2020-06-18 +31418769,MEPSAnd: minimum energy path surface analysis over n-dimensional surfaces.,"

Summary

n-dimensional energy surfaces are becoming computationally accessible, yet interpreting their information is not straightforward. We present minimum energy path surface analysis over n-dimensional surfaces (MEPSAnd), an open source GUI-based program that natively calculates minimum energy paths across energy surfaces of any number of dimensions. Among other features, MEPSAnd can compute the path through lowest barriers and automatically provide a set of alternative paths. MEPSAnd offers distinct plotting solutions as well as direct python scripting.

Availability and implementation

MEPSAnd is freely available (under GPLv3 license) at: http://bioweb.cbm.uam.es/software/MEPSAnd/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +30715136,A unified approach for sparse dynamical system inference from temporal measurements.,"MOTIVATION:Temporal variations in biological systems and more generally in natural sciences are typically modeled as a set of ordinary, partial or stochastic differential or difference equations. Algorithms for learning the structure and the parameters of a dynamical system are distinguished based on whether time is discrete or continuous, observations are time-series or time-course and whether the system is deterministic or stochastic, however, there is no approach able to handle the various types of dynamical systems simultaneously. RESULTS:In this paper, we present a unified approach to infer both the structure and the parameters of non-linear dynamical systems of any type under the restriction of being linear with respect to the unknown parameters. Our approach, which is named Unified Sparse Dynamics Learning (USDL), constitutes of two steps. First, an atemporal system of equations is derived through the application of the weak formulation. Then, assuming a sparse representation for the dynamical system, we show that the inference problem can be expressed as a sparse signal recovery problem, allowing the application of an extensive body of algorithms and theoretical results. Results on simulated data demonstrate the efficacy and superiority of the USDL algorithm under multiple interventions and/or stochasticity. Additionally, USDL's accuracy significantly correlates with theoretical metrics such as the exact recovery coefficient. On real single-cell data, the proposed approach is able to induce high-confidence subgraphs of the signaling pathway. AVAILABILITY AND IMPLEMENTATION:Source code is available at Bioinformatics online. USDL algorithm has been also integrated in SCENERY (http://scenery.csd.uoc.gr/); an online tool for single-cell mass cytometry analytics. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +32401319,Ewastools: Infinium Human Methylation BeadChip pipeline for population epigenetics integrated into Galaxy. ,"Infinium Human Methylation BeadChip is an array platform for complex evaluation of DNA methylation at an individual CpG locus in the human genome based on Illumina's bead technology and is one of the most common techniques used in epigenome-wide association studies. Finding associations between epigenetic variation and phenotype is a significant challenge in biomedical research. The newest version, HumanMethylationEPIC, quantifies the DNA methylation level of 850,000 CpG sites, while the previous versions, HumanMethylation450 and HumanMethylation27, measured >450,000 and 27,000 loci, respectively. Although a number of bioinformatics tools have been developed to analyse this assay, they require some programming skills and experience in order to be usable. We have developed a pipeline for the Galaxy platform for those without experience aimed at DNA methylation analysis using the Infinium Human Methylation BeadChip. Our tool is integrated into Galaxy (http://galaxyproject.org), a web-based platform. This allows users to analyse data from the Infinium Human Methylation BeadChip in the easiest possible way. The pipeline provides a group of integrated analytical methods wrapped into an easy-to-use interface. Our tool is available from the Galaxy ToolShed, GitHub repository, and also as a Docker image. The aim of this project is to make Infinium Human Methylation BeadChip analysis more flexible and accessible to everyone.",2020-05-01 +23493258,"Microbial variome database: point mutations, adaptive or not, in bacterial core genomes.","Analysis of genetic differences (gene presence/absence and nucleotide polymorphisms) among strains of a bacterial species is crucial to understanding molecular mechanisms of bacterial pathogenesis and selecting targets for novel antibacterial therapeutics. However, lack of genome-wide association studies on large and epidemiologically well-defined strain collections from the same species makes it difficult to identify the genes under positive selection and define adaptive polymorphisms in those genes. To address this need and to overcome existing limitations, we propose to create a ""microbial variome""--a species-specific resource database of genomic variations based on molecular evolutionary analysis. Here, we present prototype variome databases of Escherichia coli and Salmonella enterica subspecies enterica (http://depts.washington.edu/sokurel/variome, last accessed March 26, 2013). The prototypes currently include the point mutations data of core protein-coding genes from completely sequenced genomes of 22 E. coli and 17 S. enterica strains. These publicly available databases allow for single- and multiple-field sorting, filtering, and searching of the gene variability data and the potential adaptive significance. Such resource databases would immensely help experimental research, clinical diagnostics, epidemiology, and environmental control of human pathogens.",2013-03-14 +31453287,"Dataset of the frequency patterns of publications annotated to human protein-coding genes, their protein products and genetic relevance.","We present data concerning the distribution of scientific publications for human protein-coding genes together with their protein products and genetic relevance. We annotated the gene2pubmed dataset Maglott et al., 2007 provided by the NCBI (National Center for Biotechnology Information) with publication years, genetic metadata corresponding to Online Mendelian Inheritance in Man (OMIM) Hamosh et al., 2005 entries and the frequency of their appearance in Genome-Wide Association Studies (GWAS) Buniello et al., 2019 provided by the European Bioinformatics Institute (EBI) using the KNIME® Analytics Platform Berthold et al., 2008. The results of this data integration process comprise two datasets: 1) A dataset containing information on all human protein-coding genes that can be used to analyse the number of scientific publications in context of the potential disease relevance of the individual genes. 2) A table with the annual and cumulated number of PubMed entries. For further interpretation of the data presented in this article, please see the research article 'Target 2035 - probing the human proteome' by Carter et al. https://doi.org/10.1016/j.drudis.2019.06.020 Carter et al., 2019.",2019-07-18 +30497261,Comprehensive 3D-QSAR Model Predicts Binding Affinity of Structurally Diverse Sigma 1 Receptor Ligands.,"The Sigma 1 Receptor (S1R) has attracted intense interest as a pharmaceutical target for various therapeutic indications, including the treatment of neuropathic pain and the potentiation of opioid analgesia. Efforts by drug developers to rationally design S1R antagonists have been spurred recently by the 2016 publication of the high-resolution X-ray crystal structure of the ligand-bound human S1R. Until now, however, the absence in the published literature of a single, large-scale, and comprehensive quantitative structure-activity relationship (QSAR) model that encompasses a structurally diverse collection of S1R ligands has impaired rapid progress. To our best knowledge, the present study represents the first report of a statistically robust and highly predictive 3D-QSAR model (R2 = 0.92, Q2 = 0.62, Rpred2 = 0.81) based on the X-ray crystal structure of human S1R and constructed from a pooled compilation of 180 S1R antagonists that encompass five structurally diverse chemical families investigated using identical experimental protocols. Best practices, as recommended by the Organization for Economic Cooperation and Development (OECD: http://www.oecd.org/ ), were adopted for pooling data from disparate sources and for QSAR model development and both internal and external model validation. The practical utility of the final 3D-QSAR model was tested by virtual screening of the DrugBank database of FDA approved drugs supplemented by eight reported S1R antagonists. Among the top-ranked 40 DrugBank hits, four approved drugs which were previously unknown as S1R antagonists were tested using in vitro radiolabeled human S1R binding assays. Of these, two drugs (diphenhydramine and phenyltoloxamine) exhibited potent S1R binding affinity with Ki = 58 nM and 160 nM, respectively. As diphenhydramine is approved as an antiallergic, and phenyltoloxamine as an analgesic and sedative, each of these compounds represents a viable starting point for a drug discovery campaign aimed at the development of novel S1R antagonists for a wide range of therapeutic indications.",2018-12-14 +29644465,Impact of one-to-one tutoring on fundamentals of laparoscopic surgery (FLS) passing rate in a single center experience outside the United States: a randomized controlled trial.,"

Background

Outside the US, FLS certification is not required and its teaching methods are not well standardized. Even if the FLS was designed as ""stand alone"" training system, most of Academic Institution offer support to residents during training. We present the first systematic application of FLS in Italy. Our aim was to evaluate the role of mentoring/coaching on FLS training in terms of the passing rate and global performance in the search for resource optimization.

Methods

Sixty residents in general surgery, obstetrics & gynecology, and urology were selected to be enrolled in a randomized controlled trial, practicing FLS with the goal of passing a simulated final exam. The control group practiced exclusively with video material from SAGES, whereas the interventional group was supported by a mentor.

Results

Forty-six subjects met the requirements and completed the trial. For the other 14 subjects no results are available for comparison. One subject for each group failed the exam, resulting in a passing rate of 95.7%, with no obvious differences between groups. Subgroup analysis did not reveal any difference between the groups for FLS tasks.

Conclusion

We confirm that methods other than video instruction and deliberate FLS practice are not essential to pass the final exam. Based on these results, we suggest the introduction of the FLS system even where a trained tutor is not available. This trial is the first single institution application of the FLS in Italy and one of the few experiences outside the US. Trial Number: NCT02486575 ( https://www.clinicaltrials.gov ).",2018-04-11 +27242036,CoopTFD: a repository for predicted yeast cooperative transcription factor pairs. ,"In eukaryotic cells, transcriptional regulation of gene expression is usually accomplished by cooperative Transcription Factors (TFs). Therefore, knowing cooperative TFs is helpful for uncovering the mechanisms of transcriptional regulation. In yeast, many cooperative TF pairs have been predicted by various algorithms in the literature. However, until now, there is still no database which collects the predicted yeast cooperative TFs from existing algorithms. This prompts us to construct Cooperative Transcription Factors Database (CoopTFD), which has a comprehensive collection of 2622 predicted cooperative TF pairs (PCTFPs) in yeast from 17 existing algorithms. For each PCTFP, our database also provides five types of validation information: (i) the algorithms which predict this PCTFP, (ii) the publications which experimentally show that this PCTFP has physical or genetic interactions, (iii) the publications which experimentally study the biological roles of both TFs of this PCTFP, (iv) the common Gene Ontology (GO) terms of this PCTFP and (v) the common target genes of this PCTFP. Based on the provided validation information, users can judge the biological plausibility of a PCTFP of interest. We believe that CoopTFD will be a valuable resource for yeast biologists to study the combinatorial regulation of gene expression controlled by cooperative TFs.Database URL: http://cosbi.ee.ncku.edu.tw/CoopTFD/ or http://cosbi2.ee.ncku.edu.tw/CoopTFD/.",2016-05-30 +30087834,Co-translational folding of α-helical proteins: structural studies of intermediate-length variants of the λ repressor.,"Nascent polypeptide chains fold cotranslationally, but the atomic-level details of this process remain unknown. Here, we report crystallographic, de novo modeling, and spectroscopic studies of intermediate-length variants of the λ repressor N-terminal domain. Although the ranges of helical regions of the half-length variant were almost identical to those of the full-length protein, the relative orientations of these helices in the intermediate-length variants differed. Our results suggest that cotranslational folding of the λ repressor initially forms a helical structure with a transient conformation, as in the case of a molten globule state. This conformation subsequently matures during the course of protein synthesis.

Database

Structural data are available in the PDB under the accession numbers http://www.rcsb.org/pdb/search/structidSearch.do?structureId=5ZCA and http://www.rcsb.org/pdb/search/structidSearch.do?structureId=3WOA.",2018-06-27 +28914531,Systems-Level Annotation of a Metabolomics Data Set Reduces 25 000 Features to Fewer than 1000 Unique Metabolites.,"When using liquid chromatography/mass spectrometry (LC/MS) to perform untargeted metabolomics, it is now routine to detect tens of thousands of features from biological samples. Poor understanding of the data, however, has complicated interpretation and masked the number of unique metabolites actually being measured in an experiment. Here we place an upper bound on the number of unique metabolites detected in Escherichia coli samples analyzed with one untargeted metabolomics method. We first group multiple features arising from the same analyte, which we call ""degenerate features"", using a context-driven annotation approach. Surprisingly, this analysis revealed thousands of previously unreported degeneracies that reduced the number of unique analytes to ∼2961. We then applied an orthogonal approach to remove nonbiological features from the data using the 13C-based credentialing technology. This further reduced the number of unique analytes to less than 1000. Our 90% reduction in data is 5-fold greater than previously published studies. On the basis of the results, we propose an alternative approach to untargeted metabolomics that relies on thoroughly annotated reference data sets. To this end, we introduce the creDBle database ( http://creDBle.wustl.edu ), which contains accurate mass, retention time, and MS/MS fragmentation data as well as annotations of all credentialed features.",2017-09-15 +25425035,microPIR2: a comprehensive database for human-mouse comparative study of microRNA-promoter interactions.,"microRNA (miRNA)-promoter interaction resource (microPIR) is a public database containing over 15 million predicted miRNA target sites located within human promoter sequences. These predicted targets are presented along with their related genomic and experimental data, making the microPIR database the most comprehensive repository of miRNA promoter target sites. Here, we describe major updates of the microPIR database including new target predictions in the mouse genome and revised human target predictions. The updated database (microPIR2) now provides ∼80 million human and 40 million mouse predicted target sites. In addition to being a reference database, microPIR2 is a tool for comparative analysis of target sites on the promoters of human-mouse orthologous genes. In particular, this new feature was designed to identify potential miRNA-promoter interactions conserved between species that could be stronger candidates for further experimental validation. We also incorporated additional supporting information to microPIR2 such as nuclear and cytoplasmic localization of miRNAs and miRNA-disease association. Extra search features were also implemented to enable various investigations of targets of interest. Database URL: http://www4a.biotec.or.th/micropir2",2014-11-25 +32797992,A Personalized Medicine Approach for the Management of Spinal Metastases with Cord Compression: Development of a Novel Clinical Prediction Model for Postoperative Survival and Quality of Life.,"Surgery should be considered for patients with metastatic epidural spinal cord compression (MESCC) with a life expectancy of ≥3 months. Given the heterogeneity of the clinical presentation and outcomes, clinical prognostic models (CPMs) can assist in tailoring a personalized medicine approach to optimize surgical decision-making. We aimed to develop and internally validate the first CPM of health-related quality of life (HRQoL) and a novel CPM to predict the survival of patients with MESCC treated surgically. Using data from 258 patients (AOSpine North America MESCC study and Nottingham MESCC registry), we created 1-year survival and HRQoL CPMs using a Cox model and logistic regression analysis with manual backward elimination. The outcome measure for HRQoL was the minimal clinical important difference in EuroQol 5-dimension questionnaire scores. Internal validation involved 200 bootstrap iterations, and calibration and discrimination were evaluated. Longer survival was associated with a higher SF-36 physical component score (hazard ratio [HR], 0.96). In contrast, primary tumor other than breast, thyroid, or prostate (unfavorable: HR, 2.57; other: HR, 1.20), organ metastasis (HR, 1.51), male sex (HR, 1.58), and preoperative radiotherapy (HR, 1.53) were not (c-statistic, 0.69; 95% confidence interval, 0.64-0.73). Karnofsky performance status <70% (odds ratio [OR], 2.50), living in North America (OR, 4.06), SF-36 physical component score (OR, 0.95) and SF-36 mental component score (OR, 0.96) were associated with the likelihood of achieving a minimal clinical important difference improvement in the EuroQol 5-Dimension Questionnaire score at 3 months (c-statistic, 0.74; 95% confidence interval, 0.68-0.79). The calibration for both CPMs was very good. We developed and internally validated the first CPMs of survival and HRQoL at 3 months postoperatively in patients with MESCC using the TRIPOD (transparent reporting of a multivariable prediction model for individual prognosis or diagnosis) guidelines. A web-based calculator is available (available at: http://spine-met.com) to assist with clinical decision-making.",2020-08-01 +31125047,Sequencing of a 'mouse azoospermia' gene panel in azoospermic men: identification of RNF212 and STAG3 mutations as novel genetic causes of meiotic arrest.,"

Study question

What is the diagnostic potential of next generation sequencing (NGS) based on a 'mouse azoospermia' gene panel in human non-obstructive azoospermia (NOA)?

Summary answer

The diagnostic performance of sequencing a gene panel based on genes associated with mouse azoospermia was relatively successful in idiopathic NOA patients and allowed the discovery of two novel genes involved in NOA due to meiotic arrest.

What is known already

NOA is a largely heterogeneous clinical entity, which includes different histological pictures. In a large proportion of NOA, the aetiology remains unknown (idiopathic NOA) and yet, unknown genetic factors are likely to play be involved. The mouse is the most broadly used mammalian model for studying human disease because of its usefulness for genetic manipulation and its genetic and physiological similarities to man. Mouse azoospermia models are available in the Mouse Genome Informatics database (MGI: http://www.informatics.jax.org/).

Study design, size, duration

The first step was to design of a 'mouse azoospermia' gene panel through the consultation of MGI. The second step was NGS analysis of 175 genes in a group of highly selected NOA patients (n = 33). The third step was characterization of the discovered gene defects in human testis tissue, through meiotic studies using surplus testicular biopsy material from the carriers of the RNF212 and STAG3 pathogenic variants. The final step was RNF212 and STAG3 expression analysis in a collection of testis biopsies.

Participants/materials, setting, methods

From a total of 1300 infertile patients, 33 idiopathic NOA patients were analysed in this study, including 31 unrelated men and 2 brothers from a consanguineous family. The testis histology of the 31 unrelated NOA patients was as follows: 20 Sertoli cell-only syndrome (SCOS), 11 spermatogenic arrest (6 spermatogonial arrest and 5 spermatocytic arrest). The two brothers were affected by spermatocytic arrest. DNA extracted from blood was used for NGS on Illumina NextSeq500 platform. Generated sequence data was filtered for rare and potentially pathogenic variants. Functional studies in surplus testicular tissue from the carriers included the investigation of meiotic entry, XY body formation and metaphases by performing fluorescent immunohistochemical staining and immunocytochemistry. mRNA expression analysis through RT-qPCR of RNF212 and STAG3 was carried out in a collection of testis biopsies with different histology.

Main results and the role of chance

Our approach was relatively successful, leading to the genetic diagnosis of one sporadic NOA patient and two NOA brothers. This relatively high diagnostic performance is likely to be related to the stringent patient selection criteria i.e. all known causes of azoospermia were excluded and to the relatively high number of patients with rare testis histology (spermatocytic arrest). All three mutation carriers presented meiotic arrest, leading to the genetic diagnosis of three out of seven cases with this specific testicular phenotype. For the first time, we report biallelic variants in STAG3, in one sporadic patient, and a homozygous RNF212 variant, in the two brothers, as the genetic cause of NOA. Meiotic studies allowed the detection of the functional consequences of the mutations and provided information on the role of STAG3 and RNF212 in human male meiosis.

Limitations, reasons for caution

All genes, with the exception of 5 out of 175, included in the panel cause azoospermia in mice only in the homozygous or hemizygous state. Consequently, apart from the five known dominant genes, heterozygous variants (except compound heterozygosity) in the remaining genes were not taken into consideration as causes of NOA. We identified the genetic cause in approximately half of the patients with spermatocytic arrest. The low number of analysed patients can be considered as a limitation, but it is a very rare testis phenotype. Due to the low frequency of this specific phenotype among infertile men, our finding may be considered of low clinical impact. However, at an individual level, it does have relevance for prognostic purposes prior testicular sperm extraction.

Wider implications of the findings

Our study represents an additional step towards elucidating the genetic bases of early spermatogenic failure, since we discovered two new genes involved in human male meiotic arrest. We propose the inclusion of RNF212 and STAG3 in a future male infertility diagnostic gene panel. Based on the associated testis phenotype, the identification of pathogenic mutations in these genes also confers a negative predictive value for testicular sperm retrieval. Our meiotic studies provide novel insights into the role of these proteins in human male meiosis. Mutations in STAG3 were first described as a cause of female infertility and ovarian cancer, and Rnf212 knock out in mice leads to male and female infertility. Hence, our results stimulate further research on shared genetic factors causing infertility in both sexes and indicate that genetic counselling should involve not only male but also female relatives of NOA patients.

Study funding/competing interest(s)

This work was funded by the Spanish Ministry of Health Instituto Carlos III-FIS (grant number: FIS/FEDER-PI14/01250; PI17/01822) awarded to CK and AR-E, and by the European Commission, Reproductive Biology Early Research Training (REPROTRAIN, EU-FP7-PEOPLE-2011-ITN289880), awarded to CK, WB, and AE-M. The authors have no conflict of interest.",2019-06-01 +32474202,A novel risk factor for predicting anti-tuberculosis drug resistance in patients with tuberculosis complicated with type 2 diabetes mellitus.,"

Objectives

This study aimed to explore the relationship between glycosylated hemoglobin (HbA1c) and the risk of anti-tuberculosis (TB) drug resistance for TB-type 2 diabetes mellitus (T2DM) patients.

Methods

From March 2014 to June 2019, medical records from multiple centers were searched. Logistic regression analyses were performed. A predictive model for multidrug-resistance (MDR) was developed and validated. Calibration and discrimination of the model were assessed.

Results

Inconsistent results were found in the systemic review. A multicenter chart review with 657 records was thus conducted. The HbA1c <7% group and HbA1c ≥7% group had 390 and 267 patients, respectively. The HbA1c<7% group had a lower risk of developing rifampicin resistance, isoniazid resistance and MDR, with odd ratios (ORs) of 1.904 (p=0.001), 2.896 (p<0.001) and 3.228 (p<0.001), respectively. The between-group differences in the risk of anti-TB drug resistance were analyzed based on data from three provinces in China. After adding HbA1c grading, the predictive model for MDR (https://mengyuan.shinyapps.io/Shinyapp/) showed excellent capacity with an AUC of 75.4% in the training set (Sichuan and Gansu) and 73.9% in the internal validation set (Henan). The performances in calibration, prediction probabilities and net clinical benefit were significantly improved by HbA1c grading.

Conclusions

HbA1c grading was an independent risk factor for isoniazid resistance and MDR in TB-T2DM patients.",2020-05-29 +22899944,The influence of the local neighbourhood environment on walking levels during the Walking for Wellbeing in the West pedometer-based community intervention.,"We investigated the relationship between walking levels and the local neighbourhood physical environment during the Walking for Wellbeing in the West (WWW) randomised pedometer-based community intervention. Walking activity was recorded as step counts at baseline (n = 76), and at 3 months (n = 57), 6 months (n = 54), and 12 months (n = 45) post-intervention. Objective physical environment data were obtained from GIS datasets and street surveys conducted using the SWAT audit tool. Sixty-nine environment variables were reduced to eight environment factors using principal axis factoring, and the relationship between environment factors and (i) step counts, and (ii) the change in step counts relative to baseline, was examined using hierarchical multiple linear regression, controlling for age, gender, income, and deprivation. Five environment factors were significant predictors of step counts, but none were significant predictors of the change in step counts relative to baseline. None of the demographic variables included in the analysis were significant predictors at any stage of the study. Total variance explained by the environment ranged from 6% (P < 0.05) to 34% (P < 0.01), with lowest levels during the initial stages of the study. The physical environment appears to have influenced walking levels during the WWW intervention, and to have contributed to the maintenance of walking levels post-intervention.",2012-07-29 +31161198,DeepSymmetry: using 3D convolutional networks for identification of tandem repeats and internal symmetries in protein structures.,"

Motivation

Thanks to the recent advances in structural biology, nowadays 3D structures of various proteins are solved on a routine basis. A large portion of these structures contain structural repetitions or internal symmetries. To understand the evolution mechanisms of these proteins and how structural repetitions affect the protein function, we need to be able to detect such proteins very robustly. As deep learning is particularly suited to deal with spatially organized data, we applied it to the detection of proteins with structural repetitions.

Results

We present DeepSymmetry, a versatile method based on 3D convolutional networks that detects structural repetitions in proteins and their density maps. Our method is designed to identify tandem repeat proteins, proteins with internal symmetries, symmetries in the raw density maps, their symmetry order and also the corresponding symmetry axes. Detection of symmetry axes is based on learning 6D Veronese mappings of 3D vectors, and the median angular error of axis determination is less than one degree. We demonstrate the capabilities of our method on benchmarks with tandem-repeated proteins and also with symmetrical assemblies. For example, we have discovered about 7800 putative tandem repeat proteins in the PDB.

Availability and implementation

The method is available at https://team.inria.fr/nano-d/software/deepsymmetry. It consists of a C++ executable that transforms molecular structures into volumetric density maps, and a Python code based on the TensorFlow framework for applying the DeepSymmetry model to these maps.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +32691551,[The Efficacy of 17 Cases of Pancreaticoduodenectomy Combined with Vascular Resection and Reconstruction by Using Robotic Operation System (with Video)].,"

Objective

To explore the clinical efficacy of pancreaticoduodenectomy (PD) combined with vascular resection and reconstruction under robotic surgery system in the treatment of borderline resectable pancreatic cancer.

Methods

The clinical data of 17 patients with borderline resectable pancreatic cancer who underwent PD combined with vascular resection and reconstruction (see the Video 1 in Supplemental Contents, http://ykxb.scu.edu.cn/article/doi/10.12182/20200760202) under robotic surgery system between August 2011 and September 2018 was analyzed retrospectively.

Results

There were 4 cases required conversion because of serious tumor invasion and soft pancreas texture, the other 13 cases were successfully completed. 16 cases (94%) achieved margin-negative resection (R0 resection), 14 cases combined with vein resection, and 3 cases combined with arterial resection. The mean operation time was (401±170) min, the mean blood loss was (647±345) mL, the mean postoperative length of hospital stay was (20±8) d. There was no perioperative death. Postoperative pathology findings and follow-up outcomes were as follows: 1 patient was diagnosed as intraductal papillary mucinous neoplasm (IPMN) and 1 patient was diagnosed as pancreatic neuroendocrine tumors (PNET) (Grade 1), 8 patients with pancreatic ductal adenocarcinoma (PDAC). 1 patient with pancreatic neuroendocrine carcinoma (PNEC) died because of tumor recurrence and metastasis during the follow-up period, the median (Min-Max) survival time was 12 (8-26) months. 5 patients with PDAC and 1 patient with malignant IPMN were currently in the follow-up period.

Conclusion

It is safe and feasible to perform RPD with vascular resection and reconstruction. The patient's condition should be fully evaluated before surgery to select the most appropriate treatment.",2020-07-01 +32657362,ganon: precise metagenomics classification against large and up-to-date sets of reference sequences.,"

Motivation

The exponential growth of assembled genome sequences greatly benefits metagenomics studies. However, currently available methods struggle to manage the increasing amount of sequences and their frequent updates. Indexing the current RefSeq can take days and hundreds of GB of memory on large servers. Few methods address these issues thus far, and even though many can theoretically handle large amounts of references, time/memory requirements are prohibitive in practice. As a result, many studies that require sequence classification use often outdated and almost never truly up-to-date indices.

Results

Motivated by those limitations, we created ganon, a k-mer-based read classification tool that uses Interleaved Bloom Filters in conjunction with a taxonomic clustering and a k-mer counting/filtering scheme. Ganon provides an efficient method for indexing references, keeping them updated. It requires <55 min to index the complete RefSeq of bacteria, archaea, fungi and viruses. The tool can further keep these indices up-to-date in a fraction of the time necessary to create them. Ganon makes it possible to query against very large reference sets and therefore it classifies significantly more reads and identifies more species than similar methods. When classifying a high-complexity CAMI challenge dataset against complete genomes from RefSeq, ganon shows strongly increased precision with equal or better sensitivity compared with state-of-the-art tools. With the same dataset against the complete RefSeq, ganon improved the F1-score by 65% at the genus level. It supports taxonomy- and assembly-level classification, multiple indices and hierarchical classification.

Availability and implementation

The software is open-source and available at: https://gitlab.com/rki_bioinformatics/ganon.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-07-01 +31931344,DBMDA: A Unified Embedding for Sequence-Based miRNA Similarity Measure with Applications to Predict and Validate miRNA-Disease Associations.,"MicroRNAs (miRNAs) play a critical role in human diseases. Determining the association between miRNAs and disease contributes to elucidating the pathogenesis of liver diseases and seeking the effective treatment method. Despite great recent advances in the field of the associations between miRNAs and diseases, implementing association verification and recognition efficiently at scale presents serious challenges to biological experimental approaches. Thus, computational methods for predicting miRNA-disease association have become a research hotspot. In this paper, we present a new computational method, named distance-based sequence similarity for miRNA-disease association prediction (DBMDA), that directly learns a mapping from miRNA sequence to a Euclidean space. The notable feature of our approach consists of inferring global similarity from region distances that can be figured by chaos game representation algorithm based on the miRNA sequences. In the 5-fold cross-validation experiment, the area under the curve (AUC) obtained by DBMDA in predicting potential miRNA-disease associations reached 0.9129. To assess the effectiveness of DBMDA more effectively, we compared it with different classifiers and former prediction models. Besides, we constructed two case studies for prostate neoplasms and colon neoplasms. Results show that 39 and 39 out of the top 40 predicted miRNAs were confirmed by other databases, respectively. BDMDA has made new attempts in sequence similarity and achieved excellent results, while at the same time providing a new perspective for predicting the relationship between diseases and miRNAs. The source code and datasets explored in this work are available online from the University of Chinese Academy of Sciences (http://220.171.34.3:81/).",2019-12-18 +30867992,Lightweight data management with dtool.,"The explosion in volumes and types of data has led to substantial challenges in data management. These challenges are often faced by front-line researchers who are already dealing with rapidly changing technologies and have limited time to devote to data management. There are good high-level guidelines for managing and processing scientific data. However, there is a lack of simple, practical tools to implement these guidelines. This is particularly problematic in a highly distributed research environment where needs differ substantially from group to group and centralised solutions are difficult to implement and storage technologies change rapidly. To meet these challenges we have developed dtool, a command line tool for managing data. The tool packages data and metadata into a unified whole, which we call a dataset. The dataset provides consistency checking and the ability to access metadata for both the whole dataset and individual files. The tool can store these datasets on several different storage systems, including a traditional file system, object store (S3 and Azure) and iRODS. It includes an application programming interface that can be used to incorporate it into existing pipelines and workflows. The tool has provided substantial process, cost, and peace-of-mind benefits to our data management practices and we want to share these benefits. The tool is open source and available freely online at http://dtool.readthedocs.io.",2019-03-07 +,"Proof of a knowledge database concept. Aubrieta ekimii (Brassicaceae), a new species from NW Anatolia (Turkey): morphological and molecular support","Aubrieta ekimii, a new species from Kocaeli Province (Turkey), is described and illustrated, and its relationship to putatively closest relatives, A. olympica and A. pinardii, is discussed. Scanning electron microscopy micrographs of the indumentum of A. ekimii and its relatives are presented. Cytology, ecology, conservation status, and geographical distribution of the novelty and related species are presented. The knowledge database BrassiBase (http://brassibase.cos.uni-heidelberg.de/index.php) is shown herein as a powerful tool to recognize potentially new species. DNA sequence data from the transcribed spacers of nuclear encoded ribosomal RNA (ITS1 and ITS2) was subjected to the respective phylogenetic placement algorithm in BrassiBase and, after adding further sequence information from the plastid trnLF region, tribal-wide phylogenetic analyses were conducted to confirm the systematic placement of the novelty. Because of limited DNA sequence variation, we did not obtain a highly resolved phylogenetic hypothesis of Aubrieta. Our study further highlights Anatolia as a craddle of species diversity with many overlooked and undescribed species.",2015-10-01 +29275134,Comparative Safety of Drugs Targeting the Nitric Oxide Pathway in Pulmonary Hypertension: A Mixed Approach Combining a Meta-Analysis of Clinical Trials and a Disproportionality Analysis From the World Health Organization Pharmacovigilance Database.,"BACKGROUND:Recent guidelines recommend riociguat, a soluble guanylate cyclase (sGC) stimulator, and the type 5 phosphodiesterase inhibitor (PDE5i) tadalafil or sildenafil as treatments for pulmonary arterial hypertension. We compared the safety profiles of sildenafil, tadalafil, and riociguat in pulmonary hypertension. METHODS:We combined two approaches. First, we performed a meta-analysis of safety data extracted from randomized controlled trials. Second, we conducted a disproportionality analysis of data from VigiBase, the World Health Organization's global database of individual case safety reports, to compare the safety profiles with real-life data. RESULTS:In the meta-analysis, a significant difference between the three drugs was only detected for gastrointestinal disorders, in disfavor of riociguat (P < .01 for interaction). In the disproportionality analysis, the use of riociguat was associated with fewer reports of visual disorders but increased reporting of gastrointestinal, hemorrhagic, and musculoskeletal disorders compared with sildenafil and tadalafil. Pharmacovigilance signals of hearing/vestibular disorders were heterogeneous: vestibular disorders (dizziness) were reported more frequently for riociguat, whereas hearing disorders (deafness) were reported less frequently compared with PDE5is. CONCLUSIONS:The safety profiles of PDE5is and sGC stimulators significantly differ in pulmonary hypertension. Accordingly, there is a safety rationale in switching between PDE5is and sGC stimulators because of their different side effects. TRIAL REGISTRY:PROSPERO; No.: CRD42016051986; URL: https://www.crd.york.ac.uk/prospero/.",2017-12-21 +25414356,CDD: NCBI's conserved domain database.,"NCBI's CDD, the Conserved Domain Database, enters its 15(th) year as a public resource for the annotation of proteins with the location of conserved domain footprints. Going forward, we strive to improve the coverage and consistency of domain annotation provided by CDD. We maintain a live search system as well as an archive of pre-computed domain annotation for sequences tracked in NCBI's Entrez protein database, which can be retrieved for single sequences or in bulk. We also maintain import procedures so that CDD contains domain models and domain definitions provided by several collections available in the public domain, as well as those produced by an in-house curation effort. The curation effort aims at increasing coverage and providing finer-grained classifications of common protein domains, for which a wealth of functional and structural data has become available. CDD curation generates alignment models of representative sequence fragments, which are in agreement with domain boundaries as observed in protein 3D structure, and which model the structurally conserved cores of domain families as well as annotate conserved features. CDD can be accessed at http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml.",2014-11-20 +25241556,[REGIS--Romanian National Registry for Interstitial Lung Diseases and Sarcoidosis: launch of the website and building-up the database].,"REGIS--Romanian National Registry for Interstitial Lung Diseases and Sarcoidosis: launch of the website and building-up the database Interstitial lung diseases (ILD) comprise about 200 different diseases with low prevalence, some evolving towards irreversible lung fibrosis. The diagnostic of each disease involves complex investigations (high resolution CT scan, broncho-alveolar lavage, complex lung function testing, surgical biopsy), but the main element is the expertise of the clinician and the multidisciplinary diagnostic approach. The creation of a national registry for ILD and sarcoidosis allows putting together in the same database numerous cases, now spread around the country. REGIS is the initiative of a group of physicians from ""Marius Nasta"" Institute of Pulmonology Bucharest and from the Pulmonology Dept. of ""Victor Babes"" Infectious Diseases Hospital, Timişoara. REGIS is an online registry, available at www. regis.ro, consisting of several components: 1. The registry per se, in which the accredited physicians will be able to feed information about their patients, by filling-in a questionnaire 2. Educational platform, containing a collection of clinical cases organized according to diagnosis, which is generated anonymously from the data from the registry 3. Patients' page, with information on ILD in general and on the most frequent diseases in the group. Expected results are: increasing the physicians' knowledge on ILDs, informing correctly the patients, bringing up to light new cases previously not diagnosed, building up a database for research (prevalence studies, risk factor studies, selection of patients for clinical trials), creating a base for a future national health programme dedicated to idiopathic pulmonary fibrosis patients, preparing future projects for development of a Romanian centre for lung transplantation.",2014-04-01 +,"Cryptic diversity in the long‐horn moth Nemophora degeerella (Lepidoptera: Adelidae) revealed by morphology, DNA barcodes and genome‐wide ddRAD‐seq data","The growth of DNA barcode libraries has now revealed many cases of potentially cryptic diversity in various groups of generally well‐studied European Lepidoptera. In this paper, we revise a complex of cryptic species, which were formerly all classified as one species, Nemophora degeerella (Linnaeus, 1758). We found that this complex consists of three taxa: N. degeerella (Linnaeus, 1758), which is widely distributed across temperate Europe north of the Alps, from Portugal to Finland, Central Russia and Ukraine; N. scopolii sp.n., which inhabits central and southern Europe (Slovakia, southern Germany, Austria, Slovenia and Italy); and N. deceptoriella sp.n. from the Caucasus (Russia and Georgia). These species are separated by subtle but stable external morphological characters (forewing size and pattern, relative size of the labial palpus, scapus and compound eyes) and divergent cytochrome c oxidase subunit I (COI) lineages, with at least one geographical region (Austria to southern Germany and Slovakia) where two of these species (N. degeerella and N. scopolii) co‐occur. The characters of the male genitalia and four nuclear markers (CAD, EF‐1a, MDH and MDH; available for two of the three taxa) did not support the separation of the taxa, but data derived from 1363 and 390 restriction‐site associated DNA sequencing (RAD) loci (altogether consisting of 259 311 and 71 778 bp) of four specimens of each N. degeerella and N. scopolii, which were collected mostly from the contact zone strongly supported their distinctiveness as independent lineages. Our study is one of the still quite few cases where morphological and COI analyses are supplemented with nuclear data, and one of the very first cases where next‐generation sequencing based on double‐digest RAD sequencing (ddRAD‐seq) methods have been applied to address taxonomic questions in insects. This published work has been registered in ZooBank: http://zoobank.org/urn:lsid:zoobank.org:pub:FBA1953A‐412E‐4395‐BB36‐B650621DD0D0.",2017-04-01 +24663501,DrugPath: a database for academic investigators to match oncology molecular targets with drugs in development.,"

Purpose

Academic laboratories are developing increasingly large amounts of data that describe the genomic landscape and gene expression patterns of various types of cancers. Such data can potentially identify novel oncology molecular targets in cancer types that may not be the primary focus of a drug sponsor's initial research for an investigational new drug. Obtaining preclinical data that point toward the potential for a given molecularly targeted agent, or a novel combination of agents requires knowledge of drugs currently in development in both the academic and commercial sectors.

Methods

We have developed the DrugPath database ( http://www.drugpath.org ) as a comprehensive, free-of-charge resource for academic investigators to identify agents being developed in academics or industry that may act against molecular targets of interest. DrugPath data on molecular targets overlay the Michigan Molecular Interactions ( http://mimi.ncibi.org ) gene-gene interaction map to facilitate identification of related agents in the same pathway.

Results

The database catalogs 2,081 drug development programs representing 751 drug sponsors and 722 molecular and genetic targets.

Conclusions

DrugPath should assist investigators in identifying and obtaining drugs acting on specific molecular targets for biological and preclinical therapeutic studies.",2014-03-25 +29079682,"ARSDA: A New Approach for Storing, Transmitting and Analyzing Transcriptomic Data.","Two major stumbling blocks exist in high-throughput sequencing (HTS) data analysis. The first is the sheer file size, typically in gigabytes when uncompressed, causing problems in storage, transmission, and analysis. However, these files do not need to be so large, and can be reduced without loss of information. Each HTS file, either in compressed .SRA or plain text .fastq format, contains numerous identical reads stored as separate entries. For example, among 44,603,541 forward reads in the SRR4011234.sra file (from a Bacillus subtilis transcriptomic study) deposited at NCBI's SRA database, one read has 497,027 identical copies. Instead of storing them as separate entries, one can and should store them as a single entry with the SeqID_NumCopy format (which I dub as FASTA+ format). The second is the proper allocation of reads that map equally well to paralogous genes. I illustrate in detail a new method for such allocation. I have developed ARSDA software that implement these new approaches. A number of HTS files for model species are in the process of being processed and deposited at http://coevol.rdc.uottawa.ca to demonstrate that this approach not only saves a huge amount of storage space and transmission bandwidth, but also dramatically reduces time in downstream data analysis. Instead of matching the 497,027 identical reads separately against the B. subtilis genome, one only needs to match it once. ARSDA includes functions to take advantage of HTS data in the new sequence format for downstream data analysis such as gene expression characterization. I contrasted gene expression results between ARSDA and Cufflinks so readers can better appreciate the strength of ARSDA. ARSDA is freely available for Windows, Linux. and Macintosh computers at http://dambe.bio.uottawa.ca/ARSDA/ARSDA.aspx.",2017-12-04 +32100154,An Arts on Prescription Programme: Perspectives of the Cultural Institutions.,"Research on Arts on Prescription (AoP) programmes are on the increase and the participants' positive mental health outcomes are well-documented. However, there is insufficient research that considers the participating cultural institutions' perspectives. A qualitative focus group interview was conducted with the participating culture institutions in an AoP project in Denmark. Representatives from seven cultural institutions participated in the interview. The data was transcribed and analysed using Braun and Clark's (Qual Res Psychol 3(77):77-101. https://doi.org/10.1191/1478088706qp063oa , 2006) thematic approach. The cultural institutions were positive about the interdisciplinary collaboration with the Center for Mental Health and benefited from working with groups of people with mental health problems. They considered the collaboration to have encouraged skills development by working with groups that they did not regularly engaged with. If cultural institutions are to engage with the mental health wellbeing agenda then policy-driven initiatives can support collaborations that involve groups of people with mental health problems.",2020-02-25 +26444974,Development of ListeriaBase and comparative analysis of Listeria monocytogenes.,"

Background

Listeria consists of both pathogenic and non-pathogenic species. Reports of similarities between the genomic content between some pathogenic and non-pathogenic species necessitates the investigation of these species at the genomic level to understand the evolution of virulence-associated genes. With Listeria genome data growing exponentially, comparative genomic analysis may give better insights into evolution, genetics and phylogeny of Listeria spp., leading to better management of the diseases caused by them.

Description

With this motivation, we have developed ListeriaBase, a web Listeria genomic resource and analysis platform to facilitate comparative analysis of Listeria spp. ListeriaBase currently houses 850,402 protein-coding genes, 18,113 RNAs and 15,576 tRNAs from 285 genome sequences of different Listeria strains. An AJAX-based real time search system implemented in ListeriaBase facilitates searching of this huge genomic data. Our in-house designed comparative analysis tools such as Pairwise Genome Comparison (PGC) tool allowing comparison between two genomes, Pathogenomics Profiling Tool (PathoProT) for comparing the virulence genes, and ListeriaTree for phylogenic classification, were customized and incorporated in ListeriaBase facilitating comparative genomic analysis of Listeria spp. Interestingly, we identified a unique genomic feature in the L. monocytogenes genomes in our analysis. The Auto protein sequences of the serotype 4 and the non-serotype 4 strains of L. monocytogenes possessed unique sequence signatures that can differentiate the two groups. We propose that the aut gene may be a potential gene marker for differentiating the serotype 4 strains from other serotypes of L. monocytogenes.

Conclusions

ListeriaBase is a useful resource and analysis platform that can facilitate comparative analysis of Listeria for the scientific communities. We have successfully demonstrated some key utilities of ListeriaBase. The knowledge that we obtained in the analyses of L. monocytogenes may be important for functional works of this human pathogen in future. ListeriaBase is currently available at http://listeria.um.edu.my .",2015-10-06 +32379923,Is BCG vaccination causally related to reduced COVID-19 mortality?,"The ongoing severe acute respiratory sickness coronavirus 2 (SARS-CoV-2) pandemic has resulted in more than 3,600,000 detected cases of COVID-19 illness and nearly 260,000 deaths worldwide as of May 6, 2020. Recently, BCG vaccination was shown to correlate with reduced COVID-19 case fatality rates (preprint: Miller et al, 2020; preprint: Sala & Miyakawa, 2020; https://www.jsatonotes.com/2020/03/if-i-were-north-americaneuropeanaustral.html). The most recent data from publicly available resources also indicate that both COVID-19 incidence and total deaths are strongly associated with the presence or absence of national mandatory BCG vaccination programs. As seen in Table 1, seven of eight countries with very low numbers of total deaths (< 40 per 1 million population) adopted a mandatory BCG vaccination program using one of a set of 6 separate BCG strains (Table 1). In contrast, COVID-19 mortality was markedly higher in countries where BCG vaccination is not widely administered or is given only to high-risk groups. COVID-19 mortality was also higher in countries where widespread BCG vaccination was discontinued more than 20 years ago and in countries that used the BCG Denmark strain regularly or temporarily. This raises the question of whether BCG vaccination and reduced COVID-19 mortality are causally related. An additional question is why different BCG strains may be variably associated with mortality.",2020-05-26 +,2SPD-029 Impact of the implementation of the falsified medicines directive on a healthcare institution,"

Background

The Directive 2011/62/EU (Falsified medicines directive, FMD) provides for measures to prevent the entry into the legal supply chain of falsified medicinal products and has been supplemented by the commission delegated regulation (EU) 2016/161. From February 2019 onwards prescription medicines are required to bear individual safety features that need to be verified and decommissioned by pharmacies before being supplied to the public. While this process has already been tested in some community pharmacies, little is known on the implications the FMD has on healthcare institutions.

Purpose

Aim of the present study was to assess the impact of the implementation of the FMD in a university-based hospital pharmacy that currently provides medicines for approximately 2000 beds and prepares more than 55 000 chemotherapies per year.

Material and methods

In order to simulate the ‘end-to-end’ verification as outlined by the directive, packs of prescription medicines were scanned at goods in and at several points of dispense within the pharmacy. The time required to process the respective number of drugs was measured and clustered for the individual product type.

Results

A total of 1546 packs of 59 different medicinal products were assessed at goods in, which took a median of 2.1 s (0.6–6. 5 s) to process each single pack. However, some drugs such as iv-anaesthetics, iv-antibiotics and iv-painkillers, all of which were stored on pallets, required a significantly higher amount of time to verify. The simulation was repeated at four different points of dispense where 2056 packs of 811 different drugs were scanned. Here the amount of time required was not significantly different (median 2 s) from goods in but with a higher variation between the different products. Based on these data we extrapolated that the amount of time needed to process the 2.8 million packs of prescription drugs supplied by our pharmacy is more than 1,500 hours per year.

Conclusion

Our study demonstrates that the implementation of the FMD in the hospital pharmacy is a major challenge. Compared with the community pharmacy, a much greater degree of planning, organisation and technical support is needed to cope with the decommissioning of large numbers of drugs.

References and/or Acknowledgements

1. FMD:https://ec.europa.eu/health/sites/health/files/files/eudralex/vol-1/dir_2011_62/dir_2011_62_en.pdf 2. Commission Delegated Regulation (EU) 2016/161: https://ec.europa.eu/health/sites/health/files/files/eudralex/vol-1/reg_2016_161/reg_2016_161_en.pdf No conflict of interest",2018-01-01 +,4CPS-052 European antibiotic awareness day (eaad) activities across scotland: views and experiences of the community pharmacy team,"

Background

European Antibiotic Awareness Day (EAAD) is a European-wide public health initiative encouraging the responsible use of antibiotics among healthcare professionals and the general public.1 The Scottish Antimicrobial Prescribing Group (SAPG) works with hospital-based antimicrobial pharmacists to deliver activities supporting EAAD across hospital and community, including engagement of patients and the public about the appropriate use of antibiotics. From 2014 onwards, EAAD materials have included a community pharmacy version of a self-help guide published by the Royal College of General Practitioners.2

Purpose

This research aimed to explore the views and experiences of community pharmacy teams across Scotland in using this self-help guide.

Material and methods

Qualitative, semi-structured in-depth telephone and face-to-face interviews were undertaken with a purposive sample of community pharmacy team members, including pharmacists and dispensers. An interview schedule was developed, validated and piloted. Interviews were audio-recorded and transcribed verbatim. Data were analysed thematically using the Framework Approach.

Results

Twenty-eight pharmacists consented to participate and 27 were interviewed. Nineteen were pharmacist employees working mainly in a large chain across five regions, 14 had been practising for up to 5 years. Most interviewees thought that the pharmacy was an ideal place to engage patients in an antimicrobial stewardship initiative with a need for a multi-pronged approach. Although the tool was perceived to be useful, few (10) were aware it existed or had any experience in using it. A lack of training around antimicrobial stewardship was also identified.

Conclusion

It is recommended that EAAD materials need to be more effectively disseminated and pharmacists require more opportunities for specialised training on antimicrobial stewardship. Since this study was undertaken EAAD has featured as the Community Pharmacy Public Health Campaign, with posters and leaflets available in all pharmacies. The Royal Pharmaceutical Society Antimicrobial Resistance and Stewardship strategy launched in 2017 will also provide further support for the role of all pharmacists in tackling antimicrobial resistance through increasing patient awareness.3

References and/or Acknowledgements

1. Department of Health. UK Five-Year Antimicrobial Resistance Strategy 2013 to 2018. London: HM Government, 2013. 2. Royal College of General Practitioners, Treating your infection leaflet. Available at http://www.rcgp.org. uk/clinical-and-research/toolkits/~/link.aspx?_id=9FCF9DA4B4A045519593320478DFD9E7&_z=z (accessed 07/06/2016 3. The Royal Pharmaceutical Society. Antimicrobial Resistance and Stewardship, 2017https://www.rpharms.com/making-a-difference/projects-and-campaigns/antimicrobial-resistance-stewardship No conflict of interest",2018-01-01 +31374225,Machine learning and data mining frameworks for predicting drug response in cancer: An overview and a novel in silico screening process based on association rule mining.,"A major challenge in cancer treatment is predicting the clinical response to anti-cancer drugs on a personalized basis. The success of such a task largely depends on the ability to develop computational resources that integrate big ""omic"" data into effective drug-response models. Machine learning is both an expanding and an evolving computational field that holds promise to cover such needs. Here we provide a focused overview of: 1) the various supervised and unsupervised algorithms used specifically in drug response prediction applications, 2) the strategies employed to develop these algorithms into applicable models, 3) data resources that are fed into these frameworks and 4) pitfalls and challenges to maximize model performance. In this context we also describe a novel in silico screening process, based on Association Rule Mining, for identifying genes as candidate drivers of drug response and compare it with relevant data mining frameworks, for which we generated a web application freely available at: https://compbio.nyumc.org/drugs/. This pipeline explores with high efficiency large sample-spaces, while is able to detect low frequency events and evaluate statistical significance even in the multidimensional space, presenting the results in the form of easily interpretable rules. We conclude with future prospects and challenges of applying machine learning based drug response prediction in precision medicine.",2019-07-30 +30223042,TSNAdb: A Database for Tumor-specific Neoantigens from Immunogenomics Data Analysis.,"Tumor-specific neoantigens have attracted much attention since they can be used as biomarkers to predict therapeutic effects of immune checkpoint blockade therapy and as potential targets for cancer immunotherapy. In this study, we developed a comprehensive tumor-specific neoantigen database (TSNAdb v1.0), based on pan-cancer immunogenomic analyses of somatic mutation data and human leukocyte antigen (HLA) allele information for 16 tumor types with 7748 tumor samples from The Cancer Genome Atlas (TCGA) and The Cancer Immunome Atlas (TCIA). We predicted binding affinities between mutant/wild-type peptides and HLA class I molecules by NetMHCpan v2.8/v4.0, and presented detailed information of 3,707,562/1,146,961 potential neoantigens generated by somatic mutations of all tumor samples. Moreover, we employed recurrent mutations in combination with highly frequent HLA alleles to predict potential shared neoantigens across tumor patients, which would facilitate the discovery of putative targets for neoantigen-based cancer immunotherapy. TSNAdb is freely available at http://biopharm.zju.edu.cn/tsnadb.",2018-08-01 +29590295,Functional annotation of genomic variants in studies of late-onset Alzheimer's disease.,"Motivation:Annotation of genomic variants is an increasingly important and complex part of the analysis of sequence-based genomic analyses. Computational predictions of variant function are routinely incorporated into gene-based analyses of rare-variants, though to date most studies use limited information for assessing variant function that is often agnostic of the disease being studied. Results:In this work, we outline an annotation process motivated by the Alzheimer's Disease Sequencing Project, illustrate the impact of including tissue-specific transcript sets and sources of gene regulatory information and assess the potential impact of changing genomic builds on the annotation process. While these factors only impact a small proportion of total variant annotations (∼5%), they influence the potential analysis of a large fraction of genes (∼25%). Availability and implementation:Individual variant annotations are available via the NIAGADS GenomicsDB, at https://www.niagads.org/genomics/ tools-and-software/databases/genomics-database. Annotations are also available for bulk download at https://www.niagads.org/datasets. Annotation processing software is available at http://www.icompbio.net/resources/software-and-downloads/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +33184250,Correction to: Visual exploration of microbiome data. ,"Correction to: J Biosci (2019) 44:119 https://doi.org/10.1007/s12038-019-9933-z In the October 2019 Special Issue of the Journal of Biosciences on Current Trends in Microbiome Research, in the Review article titled ""Visual exploration of microbiome data"" by Bhusan K. Kuntal and Sharmila S. Mande (DOI: 10.1007/s12038-019-9933-z; Vol. 44, Article No. 119), affiliation 3 for Bhusan K. Kuntal was incorrectly mentioned as ""Academy of Scientific and Innovative Research, CSIR-National Chemical Laboratory Campus, Pune 411008, India''. The correct affiliation should read as ''Academy of Scientific and Innovative Research (AcSIR), Ghaziabad 201 002, India"".",2020-01-01 +28595571,CottonFGD: an integrated functional genomics database for cotton.,"

Background

Cotton (Gossypium spp.) is the most important fiber and oil crop in the world. With the emergence of huge -omics data sets, it is essential to have an integrated functional genomics database that allows worldwide users to quickly and easily fetch and visualize genomic information. Currently available cotton-related databases have some weakness in integrating multiple kinds of -omics data from multiple Gossypium species. Therefore, it is necessary to establish an integrated functional genomics database for cotton.

Description

We developed CottonFGD (Cotton Functional Genomic Database, https://cottonfgd.org ), an integrated database that includes genomic sequences, gene structural and functional annotations, genetic marker data, transcriptome data, and population genome resequencing data for all four of the sequenced Gossypium species. It consists of three interconnected modules: search, profile, and analysis. These modules make CottonFGD enable both single gene review and batch analysis with multiple kinds of -omics data and multiple species. CottonFGD also includes additional pages for data statistics, bulk data download, and a detailed user manual.

Conclusion

Equipped with specialized functional modules and modernized visualization tools, and populated with multiple kinds of -omics data, CottonFGD provides a quick and easy-to-use data analysis platform for cotton researchers worldwide.",2017-06-08 +30445495,GRNBoost2 and Arboreto: efficient and scalable inference of gene regulatory networks.,"

Summary

Inferring a Gene Regulatory Network (GRN) from gene expression data is a computationally expensive task, exacerbated by increasing data sizes due to advances in high-throughput gene profiling technology, such as single-cell RNA-seq. To equip researchers with a toolset to infer GRNs from large expression datasets, we propose GRNBoost2 and the Arboreto framework. GRNBoost2 is an efficient algorithm for regulatory network inference using gradient boosting, based on the GENIE3 architecture. Arboreto is a computational framework that scales up GRN inference algorithms complying with this architecture. Arboreto includes both GRNBoost2 and an improved implementation of GENIE3, as a user-friendly open source Python package.

Availability and implementation

Arboreto is available under the 3-Clause BSD license at http://arboreto.readthedocs.io.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +31596872,Clinical significance and prognostic role of hypoxia-induced microRNA 382 in gastric adenocarcinoma.,"Hypoxia and angiogenesis are critical components in the progression of solid cancer, including gastric cancers (GCs). miR-382 has been identified as a hypoxia-induced miR (hypoxamiR), but the clinical significance in GCs has not been identified yet. To explore the clinical and prognostic importance of miR-382 in GCs, the surgical specimens of 398 patients with GCs in KNU hospital in Korea, the total of 183 patients was randomly selected using simple sampling methods and big data with 446 GCs and 45 normal tissues from the data portal (https://portal.gdc.cancer.gov/) were analysed. Expression of miR-382 as well as miR-210, as a positive control hypoxamiR by qRT-PCR in histologically malignant region of GCs showed significantly positive correlation (R = 0.516, p<0.001). High miR-210 and miR-382 expression was significantly correlated with unfavorable prognosis including advanced GCs (AGC), higher T category, N category, pathologic TNM stage, lymphovascular invasion, venous invasion, and perinueral invasion, respectively (all p<0.05). In univariate analysis, high miR-210 expression was significantly associated with worse overall survival (OS) (p = 0.036) but not high miR-382. In paired 60 gastric normal and cancer tissues, miR-382 expression in cancer tissues was significantly higher than normal counterpart (p = 0.003), but not miR-210 expression. However, by increasing the patient number from the big data analysis, miR-210 as well as miR-382 expression in tumor tissues was significantly higher than the normal tissues. Our results suggest that miR-382, as novel hypoxamiR, can be a prognostic marker for advanced GCs and might be correlated with metastatic potential. miR-382 might play important roles in the aggressiveness, progression and prognosis of GCs. In addition, miR-382 give a predictive marker for progression of GCs compared to the normal or preneoplastic lesion.",2019-10-09 +30403753,A computational strategy for finding novel targets and therapeutic compounds for opioid dependence.,"Opioids are widely used for treating different types of pains, but overuse and abuse of prescription opioids have led to opioid epidemic in the United States. Besides analgesic effects, chronic use of opioid can also cause tolerance, dependence, and even addiction. Effective treatment of opioid addiction remains a big challenge today. Studies on addictive effects of opioids focus on striatum, a main component in the brain responsible for drug dependence and addiction. Some transcription regulators have been associated with opioid addiction, but relationship between analgesic effects of opioids and dependence behaviors mediated by them at the molecular level has not been thoroughly investigated. In this paper, we developed a new computational strategy that identifies novel targets and potential therapeutic molecular compounds for opioid dependence and addiction. We employed several statistical and machine learning techniques and identified differentially expressed genes over time which were associated with dependence-related behaviors after exposure to either morphine or heroin, as well as potential transcription regulators that regulate these genes, using time course gene expression data from mouse striatum. Moreover, our findings revealed that some of these dependence-associated genes and transcription regulators are known to play key roles in opioid-mediated analgesia and tolerance, suggesting that an intricate relationship between opioid-induce pain-related pathways and dependence may develop at an early stage during opioid exposure. Finally, we determined small compounds that can potentially target the dependence-associated genes and transcription regulators. These compounds may facilitate development of effective therapy for opioid dependence and addiction. We also built a database (http://daportals.org) for all opioid-induced dependence-associated genes and transcription regulators that we discovered, as well as the small compounds that target those genes and transcription regulators.",2018-11-07 +29190397,The international nucleotide sequence database collaboration.,"For more than 30 years, the International Nucleotide Sequence Database Collaboration (INSDC; http://www.insdc.org/) has been committed to capturing, preserving and providing access to comprehensive public domain nucleotide sequence and associated metadata which enables discovery in biomedicine, biodiversity and biological sciences. Since 1987, the DNA Data Bank of Japan (DDBJ) at the National Institute for Genetics in Mishima, Japan; the European Nucleotide Archive (ENA) at the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI) in Hinxton, UK; and GenBank at National Center for Biotechnology Information (NCBI), National Library of Medicine, National Institutes of Health in Bethesda, Maryland, USA have worked collaboratively to enable access to nucleotide sequence data in standardized formats for the worldwide scientific community. In this article, we reiterate the principles of the INSDC collaboration and briefly summarize the trends of the archival content.",2018-01-01 +,"Millipede assassins and allies (Heteroptera: Reduviidae: Ectrichodiinae, Tribelocephalinae): total evidence phylogeny, revised classification and evolution of sexual dimorphism","Evolution of sexual dimorphism in animals has long been of interest to scientists, but relatively few studies have reconstructed evolutionary patterns of extreme sexual dimorphism at a phylogenetic scale, especially in insects. Millipede assassin bugs (Heteroptera: Reduviidae: Ectrichodiinae; 736 spp.) and their sister taxon, Tribelocephalinae (150 spp.), exhibit sexual dimorphism that ranges from limited to extreme, a phenomenon apparently modulated by female morphology. Here, we reconstruct the first phylogeny for the subfamilies Ectrichodiinae and Tribelocephalinae with comprehensive generic representation (152 taxa in 72 genera) using morphological and molecular data (six gene regions). The combined phylogenetic results indicate that Tribelocephalinae are paraphyletic with respect to Ectrichodiinae, and that Ectrichodiinae themselves are polyphyletic. Based on these results, we synonymize Tribelocephalinae with Ectrichodiinae syn.n., describe three new tribes (Ectrichodiini trib.n., Tribelocodiini trib.n., and Abelocephalini trib.n.) and two new subtribes (Opistoplatyina subtrib.n. and Tribelocephalina subtrib.n.), and revise Tribelocephalini sensu n. Ancestral state reconstruction of sexual dimorphism reconstructed limited sexual dimorphism in the ancestor of Ectrichodiinae sensu n. with at least seven evolutionary transitions to extreme sexual dimorphism within the clade. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:C810E20F‐D66A‐461F‐A0E6‐AB1073EA3E3C.",2017-07-01 +32877220,Severe acute pancreatitis: capillary permeability model linking systemic inflammation to multiorgan failure.,"Severe acute pancreatitis (SAP) includes persistent systemic inflammation (SIRS) and multiorgan failure (MOF). The mechanism of transition from SIRS to MOF is unclear. We developed a fluid compartment model and used clinical data to test predictions. The model includes vascular, interstitial and ""third-space"" compartments with variable permeability of plasma proteins at the capillaries. Consented patients from University of Pittsburgh Medical Center Presbyterian Hospital were studied. Preadmission and daily hematocrit (HCT), blood urea nitrogen (BUN), creatine (Cr), albumin (Alb), and total protein (TP) were collected, and nonalbumin plasma protein (NAPP = TP minus the Alb) was calculated. Subjects served as their own controls for trajectory analysis. Of 57 SAP subjects, 18 developed MOF (5 died), and 39 were non-MOF (0 died). Compared with preadmission levels, admission HCT increased in MOF +5.00 [25%-75% interquartile range, IQR] versus non-MOF -0.10 [-1.55, 1.40] (P < 0.002) with HCT > +3 distinguishing MOF from non-MOF (odds ratio 17.7, P = 0.014). Preadmission Alb fell faster in MOF than non-MOF (P < 0.01). By day 2, TP and NAPP dropped in MOF but not non-MOF (P < 0.001). BUN and Cr levels increased in MOF (P = 0.001), but BUN-to-Cr ratios remained constant. Pancreatic necrosis was more common in MOF (56%) than non-MOF (23%). Changing capillary permeability to allow loss of NAPP in this model predicts loss of plasma oncotic pressure and reduced vascular volume, hypotension with prerenal azotemia and acute kidney dysfunction, pancreas necrosis, and pulmonary edema from capillary leak in the lung with acute respiratory distress syndrome. Sequential biomarker analysis in humans with or without MOF is consistent with this model. This study is registered on https://clinicaltrials.gov at NCT03075605.NEW & NOTEWORTHY Acute pancreatitis is a sudden inflammatory response to pancreatic injury that may spread to systemic inflammation, multiorgan failure, and death in some patients. With the use of the predictions of a new mechanistic model, we compared patients with severe acute pancreatitis with or without multiorgan failure. All biomarkers of capillary leak and clinical features of multiorgan failure were accurately predicted. This provides a new paradigm for understanding and developing new treatments for patients with severe acute pancreatitis.",2020-09-02 +29257129,"Verification of Arabidopsis stock collections using SNPmatch, a tool for genotyping high-plexed samples.","Large-scale studies such as the Arabidopsis thaliana '1,001 Genomes' Project require routine genotyping of stocks to avoid sample contamination. To genotype samples efficiently and economically, sequencing must be inexpensive and data processing simple. Here we present SNPmatch, a tool that identifies strains (or inbred lines, or accessions) by matching them to a SNP database. We tested the tool by performing low-coverage resequencing of over 2,000 strains from our lab seed stock collection. SNPmatch correctly genotyped samples from 1-fold coverage sequencing data, and could also identify the parents of F1 or F2 individuals. SNPmatch can be run either on the command line or through AraGeno (https://arageno.gmi.oeaw.ac.at), a web interface that permits sample genotyping from a user-uploaded VCF or BED file.",2017-12-19 +29136200,PRODORIC2: the bacterial gene regulation database in 2018.,"Bacteria adapt to changes in their environment via differential gene expression mediated by DNA binding transcriptional regulators. The PRODORIC2 database hosts one of the largest collections of DNA binding sites for prokaryotic transcription factors. It is the result of the thoroughly redesigned PRODORIC database. PRODORIC2 is more intuitive and user-friendly. Besides significant technical improvements, the new update offers more than 1000 new transcription factor binding sites and 110 new position weight matrices for genome-wide pattern searches with the Virtual Footprint tool. Moreover, binding sites deduced from high-throughput experiments were included. Data for 6 new bacterial species including bacteria of the Rhodobacteraceae family were added. Finally, a comprehensive collection of sigma- and transcription factor data for the nosocomial pathogen Clostridium difficile is now part of the database. PRODORIC2 is publicly available at http://www.prodoric2.de.",2018-01-01 +31331268,JPhyloIO: a Java library for event-based reading and writing of different phylogenetic file formats through a common interface.,"

Background

Today a variety of phylogenetic file formats exists, some of which are well-established but limited in their data model, while other more recently introduced ones offer advanced features for metadata representation. Although most currently available software only supports the classical formats with a limited metadata model, it would be desirable to have support for the more advanced formats. This is necessary for users to produce richly annotated data that can be efficiently reused and make underlying workflows easily reproducible. A programming library that abstracts over the data and metadata models of the different formats and allows supporting all of them in one step would significantly simplify the development of new and the extension of existing software to address the need for better metadata annotation.

Results

We developed the Java library JPhyloIO, which allows event-based reading and writing of the most common alignment and tree/network formats. It allows full access to all features of the nine currently supported formats. By implementing a single JPhyloIO-based reader and writer, application developers can support all of these formats. Due to the event-based architecture, JPhyloIO can be combined with any application data structure, and is memory efficient for large datasets. JPhyloIO is distributed under LGPL. Detailed documentation and example applications (available on http://bioinfweb.info/JPhyloIO/ ) significantly lower the entry barrier for bioinformaticians who wish to benefit from JPhyloIO's features in their own software.

Conclusion

JPhyloIO enables simplified development of new and extension of existing applications that support various standard formats simultaneously. This has the potential to improve interoperability between phylogenetic software tools and at the same time motivate usage of more recent metadata-rich formats such as NeXML or phyloXML.",2019-07-22 +25591325,YersiniaBase: a genomic resource and analysis platform for comparative analysis of Yersinia.,"

Background

Yersinia is a Gram-negative bacteria that includes serious pathogens such as the Yersinia pestis, which causes plague, Yersinia pseudotuberculosis, Yersinia enterocolitica. The remaining species are generally considered non-pathogenic to humans, although there is evidence that at least some of these species can cause occasional infections using distinct mechanisms from the more pathogenic species. With the advances in sequencing technologies, many genomes of Yersinia have been sequenced. However, there is currently no specialized platform to hold the rapidly-growing Yersinia genomic data and to provide analysis tools particularly for comparative analyses, which are required to provide improved insights into their biology, evolution and pathogenicity.

Description

To facilitate the ongoing and future research of Yersinia, especially those generally considered non-pathogenic species, a well-defined repository and analysis platform is needed to hold the Yersinia genomic data and analysis tools for the Yersinia research community. Hence, we have developed the YersiniaBase, a robust and user-friendly Yersinia resource and analysis platform for the analysis of Yersinia genomic data. YersiniaBase has a total of twelve species and 232 genome sequences, of which the majority are Yersinia pestis. In order to smooth the process of searching genomic data in a large database, we implemented an Asynchronous JavaScript and XML (AJAX)-based real-time searching system in YersiniaBase. Besides incorporating existing tools, which include JavaScript-based genome browser (JBrowse) and Basic Local Alignment Search Tool (BLAST), YersiniaBase also has in-house developed tools: (1) Pairwise Genome Comparison tool (PGC) for comparing two user-selected genomes; (2) Pathogenomics Profiling Tool (PathoProT) for comparative pathogenomics analysis of Yersinia genomes; (3) YersiniaTree for constructing phylogenetic tree of Yersinia. We ran analyses based on the tools and genomic data in YersiniaBase and the preliminary results showed differences in virulence genes found in Yersinia pestis and Yersinia pseudotuberculosis compared to other Yersinia species, and differences between Yersinia enterocolitica subsp. enterocolitica and Yersinia enterocolitica subsp. palearctica.

Conclusions

YersiniaBase offers free access to wide range of genomic data and analysis tools for the analysis of Yersinia. YersiniaBase can be accessed at http://yersinia.um.edu.my .",2015-01-16 +32300503,Pediatric Abdominal X-rays in the Acute Care Setting - Are We Overdiagnosing Constipation?,"Introduction Constipation represents 3% of all office visits to pediatricians and 10% - 45% of consultations with pediatric gastroenterologists. It has been reliably established that the role of abdominal x-rays (AXR) in the diagnosis of constipation in pediatrics is limited; yet, significant overdiagnosis of constipation exists when plain abdominal x-rays are used in the acute setting for abdominal pain or to screen for other disorders. This results in loss of time, resources, exposure to unnecessary radiation, and potentially missing the primary diagnosis. The purpose of this study is to determine the sensitivity and specificity of AXR in diagnosing constipation in the acute setting. Objectives To determine 1) the sensitivity and specificity of plain AXR in the diagnosis of constipation and 2) the effect of age, race, gender, comorbid conditions, and practice setting on the diagnosis of constipation. Methods This study was a historical cohort study of children (two to 18 years of age) who were seen at Ascension St. John Children's Hospital between March 2015 - March 2018 and who had a plain AXR performed during an emergency department (ED) visit or inpatient stay. If AXR results contained keywords, such as ""constipation,"" ""stool load,"" ""fecal retention,"" and ""fecal load,"" the ambulatory medical record, Athena® (http://www.athenahealth.com), was searched to determine if the child had an ambulatory visit in the ensuing 45 days. Chart review was conducted to assess if the diagnosis of constipation was later confirmed by history and physical examination by a pediatrician or gastroenterologist at that visit. By comparing data from both encounters, the sensitivity and specificity of plain AXR in diagnosing constipation was assessed. All data were analyzed using the Statistical Package for Social Sciences (SPSS), v. 25.0 (IBM SPSS Statistics, Armonk, NY) and a p-value of 0.05 or less was considered to indicate statistical significance. Results Over the three-year study period, 1,383 AXRs were performed on 1,116 patients. The sensitivity of AXR in the diagnosis of constipation was 73.8%, specificity 26.8%, positive predictive value 46.4%, and negative predictive value of 54.3%. Pediatric gastroenterologists were more likely to diagnose constipation (63.2%) compared to pediatricians (41.4%) and pediatric surgeons (33.3%) (p = 0.04). Conclusions AXRs are not a reliable means of diagnosing constipation. Overall, we found similar sensitivity and specificity of AXR in diagnosing constipation compared to previous studies. Yet, our study gives new insight into the practices around diagnosing constipation in a single-center community hospital pediatric acute setting and the radiology department. This further emphasizes the need to review current practices and impart more education both in the acute care setting and radiology department.",2020-03-15 +32655300,WITMSG: Large-scale Prediction of Human Intronic m6A RNA Methylation Sites from Sequence and Genomic Features.,"

Introduction

N 6-methyladenosine (m6A) is one of the most widely studied epigenetic modifications. It plays important roles in various biological processes, such as splicing, RNA localization and degradation, many of which are related to the functions of introns. Although a number of computational approaches have been proposed to predict the m6A sites in different species, none of them were optimized for intronic m6A sites. As existing experimental data overwhelmingly relied on polyA selection in sample preparation and the intronic RNAs are usually underrepresented in the captured RNA library, the accuracy of general m6A sites prediction approaches is limited for intronic m6A sites prediction task.

Methodology

A computational framework, WITMSG, dedicated to the large-scale prediction of intronic m6A RNA methylation sites in humans has been proposed here for the first time. Based on the random forest algorithm and using only known intronic m6A sites as the training data, WITMSG takes advantage of both conventional sequence features and a variety of genomic characteristics for improved prediction performance of intron-specific m6A sites.

Results and conclusion

It has been observed that WITMSG outperformed competing approaches (trained with all the m6A sites or intronic m6A sites only) in 10-fold cross-validation (AUC: 0.940) and when tested on independent datasets (AUC: 0.946). WITMSG was also applied intronome-wide in humans to predict all possible intronic m6A sites, and the prediction results are freely accessible at http://rnamd.com/intron/.",2020-01-01 +30062557,Mesenchymal stem cells and immune disorders: from basic science to clinical transition.,"As a promising candidate seed cell type in regenerative medicine, mesenchymal stem cells (MSCs) have attracted considerable attention. The unique capacity of MSCs to exert a regulatory effect on immunity in an autologous/allergenic manner makes them an attractive therapeutic cell type for immune disorders. In this review, we discussed the current knowledge of and advances in MSCs, including its basic biological properties, i.e., multilineage differentiation, secretome, and immunomodulation. Specifically, on the basis of our previous work, we proposed three new concepts of MSCs, i.e., ""subtotipotent stem cell"" hypothesis, MSC system, and ""Yin and Yang"" balance of MSC regulation, which may bring new insights into our understanding of MSCs. Furthermore, we analyzed data from the Clinical Trials database ( http://clinicaltrials.gov ) on registered clinical trials using MSCs to treat a variety of immune diseases, such as graft-versus-host disease, systemic lupus erythematosus, and multiple sclerosis. In addition, we highlighted MSC clinical trials in China and discussed the challenges and future directions in the field of MSC clinical application.",2018-07-30 +31064191,Encyclopedia of CLL Subsets - a Unique Bioinformatics Tool and Database for Analysis of Subsets of Stereotypical B-Cell Receptors in CLL.,"

Background

Chronic lymphocytic leukemia (CLL) is clinically and biologically highly variable disease which is closely related with multiple cellular and molecular markers, including sequence motifs of B-cell receptors. These motifs are highly similar (stereotyped) within one third of CLL patients and create homogeneous groups called stereotyped CLL subsets. The homogeneity is reflected also in clinical and biological characteristics of the disease. To facilitate access to the information about individual subsets, we have created a publicly available web-based tool Encyclopedia of CLL Subsets.

Materials and methods

The Encyclopedia of CLL subsets belongs to our bioinformatics platform Antigen Receptor Research Tool (ARResT) developed for analysis, clustering, and annotation of immunoglobulin sequences. To gather primary knowledge about the subsets, we have analyzed a dataset of 7,500 CLL patients published by Agathangelidis et al in 2012 [1]. We have created an overview of major stereotyped subsets and their characteristics. Additional clinical and cytogenomic information about individual subsets has been obtained by machine text processing of available literature from server PubMed and is regularly updated.

Results

We have created a unique web-based application Encyclopedia of CLL Subsets available from http: //arrest.tools/subsets for an interactive access to the information about stereotyped CLL subsets. A user can obtain and compare basic information about the major subsets including their clinical and cytogenomic characteristics. These have been manually curated from machine processed results from PubMed database by experts in CLL research. Through the Encyclopedias user interface, user can also directly use our published tool ARResT/AssignSubsets to assign new immunoglobulin sequences to the major subsets.

Conclusion

The Encyclopedia of CLL Subsets is a publicly available online tool facilitating access to the most recent research knowledge about stereotyped CLL subsets and enabling analysis of own data and interpretation of the results. This gives the Encyclopedia a great potential for its use in clinical routine. This work was supported by Czech Ministry of Health grant No. 34272A. All rights reserved. The authors declare they have no potential conflicts of interest concerning drugs, products, or services used in the study. The Editorial Board declares that the manuscript met the ICMJE recommendation for biomedical papers. Submitted: 1. 3. 2019 Accepted: 4. 3. 2019.",2019-01-01 +29145615,iPTMnet: an integrated resource for protein post-translational modification network discovery.,"Protein post-translational modifications (PTMs) play a pivotal role in numerous biological processes by modulating regulation of protein function. We have developed iPTMnet (http://proteininformationresource.org/iPTMnet) for PTM knowledge discovery, employing an integrative bioinformatics approach-combining text mining, data mining, and ontological representation to capture rich PTM information, including PTM enzyme-substrate-site relationships, PTM-specific protein-protein interactions (PPIs) and PTM conservation across species. iPTMnet encompasses data from (i) our PTM-focused text mining tools, RLIMS-P and eFIP, which extract phosphorylation information from full-scale mining of PubMed abstracts and full-length articles; (ii) a set of curated databases with experimentally observed PTMs; and iii) Protein Ontology that organizes proteins and PTM proteoforms, enabling their representation, annotation and comparison within and across species. Presently covering eight major PTM types (phosphorylation, ubiquitination, acetylation, methylation, glycosylation, S-nitrosylation, sumoylation and myristoylation), iPTMnet knowledgebase contains more than 654 500 unique PTM sites in over 62 100 proteins, along with more than 1200 PTM enzymes and over 24 300 PTM enzyme-substrate-site relations. The website supports online search, browsing, retrieval and visual analysis for scientific queries. Several examples, including functional interpretation of phosphoproteomic data, demonstrate iPTMnet as a gateway for visual exploration and systematic analysis of PTM networks and conservation, thereby enabling PTM discovery and hypothesis generation.",2018-01-01 +31465713,Functional Parcellation of the Speech Production Cortex.,"Neuroimaging has revealed a core network of cortical regions that contribute to speech production, but the functional organization of this network remains poorly understood. Purpose We describe efforts to identify reliable boundaries around functionally homogenous regions within the cortical speech motor control network in order to improve the sensitivity of functional magnetic resonance imaging (fMRI) analyses of speech production and thus improve our understanding of the functional organization of speech production in the brain. Method We used a bottom-up, data-driven approach by pooling data from 12 previously conducted fMRI studies of speech production involving the production of monosyllabic and bisyllabic words and pseudowords that ranged from single vowels and consonant-vowel pairs to short sentences (163 scanning sessions, 136 unique participants, 39 different speech conditions). After preprocessing all data through the same pipeline and registering individual contrast maps to a common surface space, hierarchical clustering was applied to contrast maps randomly sampled from the pooled data set in order to identify consistent functional boundaries across subjects and tasks. Boundary completion was achieved by applying adaptive smoothing and watershed segmentation to the thresholded population-level boundary map. Hierarchical clustering was applied to the mean within-functional region of interest (fROI) response to identify networks of fROIs that respond similarly during speech. Results We identified highly reliable functional boundaries across the cortical areas involved in speech production. Boundary completion resulted in 117 fROIs in the left hemisphere and 109 in the right hemisphere. Clustering of the mean within-fROI response revealed a core sensorimotor network flanked by a speech motor planning network. The majority of the left inferior frontal gyrus clustered with the visual word form area and brain regions (e.g., anterior insula, dorsal anterior cingulate) associated with detecting salient sensory inputs and choosing the appropriate action. Conclusion The fROIs provide insight into the organization of the speech production network and a valuable tool for studying speech production in the brain by improving within-group and between-groups comparisons of speech-related brain activity. Supplemental Material https://doi.org/10.23641/asha.9402674.",2019-08-29 +24203705,MetaRef: a pan-genomic database for comparative and community microbial genomics.,"Microbial genome sequencing is one of the longest-standing areas of biological database development, but high-throughput, low-cost technologies have increased its throughput to an unprecedented number of new genomes per year. Several thousand microbial genomes are now available, necessitating new approaches to organizing information on gene function, phylogeny and microbial taxonomy to facilitate downstream biological interpretation. MetaRef, available at http://metaref.org, is a novel online resource systematically cataloguing a comprehensive pan-genome of all microbial clades with sequenced isolates. It organizes currently available draft and finished bacterial and archaeal genomes into quality-controlled clades, reports all core and pan gene families at multiple levels in the resulting taxonomy, and it annotates families' conservation, phylogeny and consensus functional information. MetaRef also provides a comprehensive non-redundant reference gene catalogue for metagenomic studies, including the abundance and prevalence of all gene families in the >700 shotgun metagenomic samples of the Human Microbiome Project. This constitutes a systematic mapping of clade-specific microbial functions within the healthy human microbiome across multiple body sites and can be used as reference for identifying potential functional biomarkers in disease-associate microbiomes. MetaRef provides all information both as an online browsable resource and as downloadable sequences and tabular data files that can be used for subsequent offline studies.",2013-11-06 +23161681,Update on activities at the Universal Protein Resource (UniProt) in 2013.,"The mission of the Universal Protein Resource (UniProt) (http://www.uniprot.org) is to support biological research by providing a freely accessible, stable, comprehensive, fully classified, richly and accurately annotated protein sequence knowledgebase. It integrates, interprets and standardizes data from numerous resources to achieve the most comprehensive catalogue of protein sequences and functional annotation. UniProt comprises four major components, each optimized for different uses, the UniProt Archive, the UniProt Knowledgebase, the UniProt Reference Clusters and the UniProt Metagenomic and Environmental Sequence Database. UniProt is produced by the UniProt Consortium, which consists of groups from the European Bioinformatics Institute (EBI), the SIB Swiss Institute of Bioinformatics (SIB) and the Protein Information Resource (PIR). UniProt is updated and distributed every 4 weeks and can be accessed online for searches or downloads.",2012-11-17 +31888790,The role of antioxidants and 25-hydroxyvitamin D during pregnancy in the development of allergic diseases in early school-age children - Polish Mother and Child Cohort Study.,"Purpose: Based on the available data, alterations of the antioxidant defense as well as the vitamin status in mothers may affect the prenatal process of lung and immune system development as a pathophysiological background of increased prevalence of allergic diseases. The primary aim of the current study was to assess the associations among cord blood concentrations of zinc (Zn); copper (Cu); selenium (Se); β-carotene; and vitamin A, E, and D, and the occurrence of atopic dermatitis, food allergy, allergic rhinitis, and asthma in early school-age children. Methods: We evaluated 211 children, 7-9 years old, from the Polish Mother and Child Cohort Study. the women were interviewed during pregnancy to collect demographic and socioeconomic data, and the medical and reproductive history. At delivery, umbilical cord blood plasma was sampled. Seven to nine years after the birth, the child's exposure and health status (including skin-prick test and spirometry for allergy assessment and urine sample for cotinine level) were examined. In the analyses, a multivariable model was applied. Results: Statistically significant relationships were found among Zn; Cu; Se; and vitamin A, E, and D concentrations in cord blood; and the prevalence of food allergy, allergic rhinitis, atopic dermatitis, and asthma in children ages 7-9 years after adjustment for several confounders. Conclusion: We showed an imbalance in the antioxidant defense system in cord blood, which may lead to the occurrence of allergic diseases later in life. The maternal diet may have substantial potential to modify immune tolerance and, consequently, the development of allergic disease in the offspring.Clinical trial NCT01861548, www.clinicaltrials.gov.",2020-01-01 +31504193,Identifying molecular recognition features in intrinsically disordered regions of proteins by transfer learning.,"

Motivation

Protein intrinsic disorder describes the tendency of sequence residues to not fold into a rigid three-dimensional shape by themselves. However, some of these disordered regions can transition from disorder to order when interacting with another molecule in segments known as molecular recognition features (MoRFs). Previous analysis has shown that these MoRF regions are indirectly encoded within the prediction of residue disorder as low-confidence predictions [i.e. in a semi-disordered state P(D)≈0.5]. Thus, what has been learned for disorder prediction may be transferable to MoRF prediction. Transferring the internal characterization of protein disorder for the prediction of MoRF residues would allow us to take advantage of the large training set available for disorder prediction, enabling the training of larger analytical models than is currently feasible on the small number of currently available annotated MoRF proteins. In this paper, we propose a new method for MoRF prediction by transfer learning from the SPOT-Disorder2 ensemble models built for disorder prediction.

Results

We confirm that directly training on the MoRF set with a randomly initialized model yields substantially poorer performance on independent test sets than by using the transfer-learning-based method SPOT-MoRF, for both deep and simple networks. Its comparison to current state-of-the-art techniques reveals its superior performance in identifying MoRF binding regions in proteins across two independent testing sets, including our new dataset of >800 protein chains. These test chains share <30% sequence similarity to all training and validation proteins used in SPOT-Disorder2 and SPOT-MoRF, and provide a much-needed large-scale update on the performance of current MoRF predictors. The method is expected to be useful in locating functional disordered regions in proteins.

Availability and implementation

SPOT-MoRF and its data are available as a web server and as a standalone program at: http://sparks-lab.org/jack/server/SPOT-MoRF/index.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +32095047,An in vitro and in silico evaluation of the antibacterial activity of the bioactive compounds in Majapahit (Crescentia cujete L.) fruit.,"

Background and aim

Majapahit (Crescentia cujete L.) fruit extract acts as a natural antibacterial agent due to its bioactive constituents such as tannins, flavonoids, triterpenoids, and saponins. The aim of this study was to determine the antibacterial activity of Majapahit fruit against Vibrio harveyi both in vitro and in silico.

Materials and methods

Column chromatography, minimum inhibitory concentration (MIC) determination, and transmission electron microscopy (TEM) were used for in vitro analysis. In silico analysis was performed using PubChem® database, Pass Online (Way2Drug.com©), Search Tool 17 Interacting Chemicals (STITCH), and UNIPROT database (https://www.uniprot.org/).

Results

The MIC was found to be 0.313 mg/mL. Within the concentration range of 0.313 mg/mL-10 mg/mL, Majapahit fruit extract could inhibit the growth of V. harveyi, while lower concentrations of 0.078 mg/mL and 0.165 mg/mL indicated the presence of bacterial growth. The pathogenic mechanism of V. harveyi on vannamei shrimp (Litopenaeus vannamei) involved targeting cytochrome P450, cyclin-dependent kinase 6, and caspases 3 and 8. This was indicated by cell damage observed through TEM.

Conclusion

This study provides comprehensive results on the potential of Majapahit fruit as a natural antibacterial agent. Thus, Majapahit fruit can be considered for functional food applications.",2019-12-13 +32551342,An effective datasets describing antimicrobial peptide produced from Pediococcus acidilactici - purification and mode of action determined by molecular docking.,"Most of the probiotics Bacterial cells, express native antibacterial genes, resulting in the production of, antimicrobial peptides, which have various applications in biotechnology and drug development. But the identification of antibacterial peptide, structural characterization of antimicrobial peptide and prediction on mode of action. Regardless of the significance of protein manufacturing, three individual factors are required for the production method: gene expression, stabilization and specific peptide purification. Our protocol describes a straightforward technique of detecting and characterizing particular extracellular peptides and enhancing the antimicrobial peptide expression we optimized using low molecular weight peptides. This protocol can be used to improve peptide detection and expression. The following are the benefits of this method, (DOI - https://doi.org/10.1016/j.ijbiomac.2019.10.196 [1]). The data briefly describe a simple method in detection identification, characterization of antimicrobial extracellular peptide, predicating the mode of action of peptide in targeting pathogens (In-silico method), brief method on profiling of antimicrobial peptide and its mode of action [1]. Further the protocol can be used to enhance the specific peptide expressions, detection of peptides. The advantages of this technique are presented below:•Characterization protocol of specific antimicrobial peptide•The folded antimicrobial peptide expression were less expressed or non-expressed peptides.•Besides being low cost, less time-consuming, easy to handle, universal and fast to execute, the suggested technique can be used for multiple proteins expressed in probiotics (Lactobacillus species) expression system.",2020-05-22 +32441995,Current and Projected Distributions of Aedes aegypti and Ae. albopictus in Canada and the U.S.,"

Background

Aedes aegypti and Ae. albopictus are mosquito vectors of more than 22 arboviruses that infect humans.

Objectives

Our objective was to develop regional ecological niche models for Ae. aegypti and Ae. albopictus in the conterminous United States and Canada with current observed and simulated climate and land-use data using boosted regression trees (BRTs).

Methods

We used BRTs to assess climatic suitability for Ae. albopictus and Ae. aegypti mosquitoes in Canada and the United States under current and future projected climates.

Results

Models for both species were mostly influenced by minimum daily temperature and demonstrated high accuracy for predicting their geographic ranges under the current climate. The northward range expansion of suitable niches for both species was projected under future climate models. Much of the United States and parts of southern Canada are projected to be suitable for both species by 2100, with Ae. albopictus projected to expand its range north earlier this century and further north than Ae. aegypti.

Discussion

Our projections suggest that the suitable ecological niche for Aedes will expand with climate change in Canada and the United States, thus increasing the risk of Aedes-transmitted arboviruses. Increased surveillance for these vectors and the pathogens they carry would be prudent. https://doi.org/10.1289/EHP5899.",2020-05-22 +31829640,"The well-being profile (WB-Pro): Creating a theoretically based multidimensional measure of well-being to advance theory, research, policy, and practice.","There is no universally agreed definition of well-being as a subjective experience, but Huppert and So (2013) adopted and systematically applied the definition of well-being as positive mental health-the opposite of the common mental disorders described in standard mental health classifications (e.g., Diagnostic and Statistical Manual of Mental Disorders). We extended their theoretical approach to include multi-item scales, using 2 waves of nationally representative U.S. adult samples to develop, test, and validate our multidimensional measure of well-being (WB-Pro). This resulted in a good-fitting a priori (48-item, 15-factor) model that was invariant over time, education, gender, and age; showed good reliability (coefficient αs .81-.93), test-retest correlation (.73-.85; M = .80), and convergent/discriminant validity based on a multitrait-multimethod analysis, and relations with demographic variables, selected psychological measures, and other multidimensional and purportedly unidimensional well-being measures. Further, we found that items from 2 widely used, purportedly unidimensional well-being measures loaded on different WB-Pro factors consistent with a priori predictions based on the WB-Pro factor structure, thereby calling into question their claimed unidimensionality and theoretical rationale. Because some applications require a short global measure, we used a machine-learning algorithm to construct 2 global well-being short versions (five- and 15-item forms) and tested these formative measures in relation to the full-form and validity criteria (to download short and long versions see https://ippe.acu.edu.au/research/research-instruments/wb-pro). The WB-Pro appears to be one of the most comprehensive measures of subjective well-being, based on a sound conceptual model and empirical support, with broad applicability for research and practice, as well as providing a framework for evaluating the breadth of other well-being measures. (PsycINFO Database Record (c) 2020 APA, all rights reserved).",2019-12-12 +32517696,PPAI: a web server for predicting protein-aptamer interactions.,"

Background

The interactions between proteins and aptamers are prevalent in organisms and play an important role in various life activities. Thanks to the rapid accumulation of protein-aptamer interaction data, it is necessary and feasible to construct an accurate and effective computational model to predict aptamers binding to certain interested proteins and protein-aptamer interactions, which is beneficial for understanding mechanisms of protein-aptamer interactions and improving aptamer-based therapies.

Results

In this study, a novel web server named PPAI is developed to predict aptamers and protein-aptamer interactions with key sequence features of proteins/aptamers and a machine learning framework integrated adaboost and random forest. A new method for extracting several key sequence features of both proteins and aptamers is presented, where the features for proteins are extracted from amino acid composition, pseudo-amino acid composition, grouped amino acid composition, C/T/D composition and sequence-order-coupling number, while the features for aptamers are extracted from nucleotide composition, pseudo-nucleotide composition (PseKNC) and normalized Moreau-Broto autocorrelation coefficient. On the basis of these feature sets and balanced the samples with SMOTE algorithm, we validate the performance of PPAI by the independent test set. The results demonstrate that the Area Under Curve (AUC) is 0.907 for prediction of aptamer, while the AUC reaches 0.871 for prediction of protein-aptamer interactions.

Conclusion

These results indicate that PPAI can query aptamers and proteins, predict aptamers and predict protein-aptamer interactions in batch mode precisely and efficiently, which would be a novel bioinformatics tool for the research of protein-aptamer interactions. PPAI web-server is freely available at http://39.96.85.9/PPAI.",2020-06-09 +31769130,Systematic integration of GATA transcription factors and epigenomes via IDEAS paints the regulatory landscape of hematopoietic cells.,"Members of the GATA family of transcription factors play key roles in the differentiation of specific cell lineages by regulating the expression of target genes. Three GATA factors play distinct roles in hematopoietic differentiation. In order to better understand how these GATA factors function to regulate genes throughout the genome, we are studying the epigenomic and transcriptional landscapes of hematopoietic cells in a model-driven, integrative fashion. We have formed the collaborative multi-lab VISION project to conduct ValIdated Systematic IntegratiON of epigenomic data in mouse and human hematopoiesis. The epigenomic data included nuclease accessibility in chromatin, CTCF occupancy, and histone H3 modifications for 20 cell types covering hematopoietic stem cells, multilineage progenitor cells, and mature cells across the blood cell lineages of mouse. The analysis used the Integrative and Discriminative Epigenome Annotation System (IDEAS), which learns all common combinations of features (epigenetic states) simultaneously in two dimensions-along chromosomes and across cell types. The result is a segmentation that effectively paints the regulatory landscape in readily interpretable views, revealing constitutively active or silent loci as well as the loci specifically induced or repressed in each stage and lineage. Nuclease accessible DNA segments in active chromatin states were designated candidate cis-regulatory elements in each cell type, providing one of the most comprehensive registries of candidate hematopoietic regulatory elements to date. Applications of VISION resources are illustrated for the regulation of genes encoding GATA1, GATA2, GATA3, and Ikaros. VISION resources are freely available from our website http://usevision.org.",2019-11-25 +31304214,Two dimensional non-destructive testing data maps for reinforced concrete slabs with simulated damage.,"This research presents the use of a total of five Non-Destructive Testing Techniques (NDTs) and their combination to detect and quantify subsurface simulated defects in Reinforced Concrete slabs. The NDT techniques were applied on a total of nine 1800 mm × 460 mm reinforced concrete slabs with varying thicknesses of 100 mm, 150 mm and 200 mm. Contour data maps from each technique were prepared. This Data article presents the Non-Destructive Testing Techniques' specifications, experimental set-up and converted 2-Dimensional NDT data maps for reinforced concrete slabs with simulated damage. The experimental research shows that combining multiple techniques together in evaluating the defects give significantly lower error and higher accuracy compared to that from a standalone test. For more details on the accuracy model of the NDTs, refer to the full length article entitled ""Sub-surface simulated damage detection using Non-Destructive Testing Techniques in reinforced-concrete slabs"" https://doi.org/10.1016/j.conbuildmat.2019.04.223 Rathod et al., 2019.",2019-06-17 +29167818,A validation dataset for Macaque brain MRI segmentation.,"Validation data for segmentation algorithms dedicated to preclinical images is fiercely lacking, especially when compared to the large number of databases of Human brain images and segmentations available to the academic community. Not only is such data essential for validating methods, it is also needed for objectively comparing concurrent algorithms and detect promising paths, as segmentation challenges have shown for clinical images. The dataset we present here is a first step in this direction. It comprises 10 T2-weighted MRIs of healthy adult macaque brains, acquired on a 7 T magnet, along with corresponding manual segmentations into 17 brain anatomic labelled regions spread over 5 hierarchical levels based on a previously published macaque atlas (Calabrese et al., 2015) [1]. By giving access to this unique dataset, we hope to provide a reference needed by the non-human primate imaging community. This dataset was used in an article presenting a new primate brain morphology analysis pipeline, Primatologist (Balbastre et al., 2017) [2]. Data is available through a NITRC repository (https://www.nitrc.org/projects/mircen_macset).",2017-11-04 +26399886,An accurate and affordable test for the rapid diagnosis of sickle cell disease could revolutionize the outlook for affected children born in resource-limited settings.,"Each year, at least 280,000 children are born with sickle cell disease (SCD) in resource-limited settings. For cost, logistic and political reasons, the availability of SCD testing is limited in such settings and consequently 50-90 % of affected children die undiagnosed before their fifth birthday. The recent development of a point of care method for the diagnosis of SCD - the Sickle SCAN™ device - could afford such children the prompt access to appropriate services that has transformed the outlook for affected children in resource-rich areas. In research published in BMC Medicine, Kanter and colleagues describe a small but carefully conducted study involving 208 children and adults, in which they found that by using Sickle SCAN™ it was possible to diagnose the common forms of SCD with 99 % sensitivity and 99 % specificity, in under 5 minutes. If repeatable both in newborn babies and under real-life conditions, and if marketed at an affordable price, Sickle SCAN™ could revolutionize the survival prospects for children born with SCD in resource-limited areas.Please see related article: http://dx.doi.org/10.1186/s12916-015-0473-6.",2015-09-23 +31599833,Development and Validation of a Machine Learning Algorithm for Predicting Response to Anticholinergic Medications for Overactive Bladder Syndrome.,"

Objective

To develop and externally validate a prediction model for anticholinergic response in patients with overactive bladder (OAB).

Methods

A machine learning model to predict the likelihood of anticholinergic treatment failure was constructed using a retrospective data set (n=559) of female patients with OAB who were treated with anticholinergic medications between January 2010 and December 2017. Treatment failure was defined as less than 50% improvement in frequency, urgency, incontinence episodes, and nocturia, and the patient's subjective impression of symptomatic relief. Patients were stratified by age (younger than 40 years, 40-60 years, and older than 60 years), and number of previously failed medications. K-fold stratified cross-validation was performed on each stratum using machine learning algorithms. Of these, the random forest model was the most accurate. This model was refined using internal cross validation within each stratum. The area under the curve (AUC) was calculated for each stratum and used to identify the optimal operating points for prediction of treatment failure. The random forest model was then externally validated using a prospectively collected data set (n=82) of women treated with anticholinergic medications at a different clinical site between January 2018 and December 2018.

Results

The global accuracy of the final model was 80.3% (95% CI 79.1-81.3), and the AUC was 0.77 (95% CI 0.74-0.79). Using the external validation data set, the model's sensitivity and specificity was 80.4% (95% CI 66.5-89.7%) and 77.4% (95% CI 58.6-89.7%), respectively. The model performed best in women aged younger than 40 years (AUC 0.84, 95% CI 0.81-0.84) and worst in women aged older than 60 years who had previously failed medication (AUC 0.71, 95% CI 0.67-0.75).

Conclusion

Our externally validated machine learning prediction model can predict anticholinergic treatment failure during the standard 3-month treatment trial period with greater than 80% accuracy. The model can be accessed at https://oabweb.herokuapp.com/app/pre/.",2019-11-01 +28423831,The BioTop Family of Upper Level Ontological Resources for Biomedicine.,"BioTop is a domain upper level ontology for the life sciences, based on OWL DL, introduced ten years ago. This paper provides an update of the current state of this resource, with a special focus on BioTop's top level, BioTopLite, which currently contains 55 classes, 37 object properties and 247 description logics axioms. A bridging file allows harmonising BioTopLite with the classes of Basic Formal Ontology BFO2. The updated OWL resources are available at http://purl.org/biotop. They build the core of several upper level ontological artefacts including bridging ontologies to other upper level resources.",2017-01-01 +31443860,CALIMA: The semi-automated open-source calcium imaging analyzer.,"

Background and objective

Ever since its discovery, calcium imaging has proven its worth in discovering new insights into the mechanisms of cellular communication. Yet, the analysis of the data generated by calcium imaging experiments demands a large amount of time from researchers. Tools enabling automated and semi-automated analysis are available, but often they allow automating only a part of the data analysis process. Therefore, we developed CALIMA (https://aethelraed.nl/calima), a free and open-source standalone software tool that provides an opportunity to quickly detect cells, to obtain the calcium spikes, and to determine the underlying network structure of neuronal cell cultures.

Methods

Owing to the difference of Gaussians algorithm applied for the cell detection, CALIMA is able to detect regions of interest (ROIs) quickly. The z-scoring algorithm provides a means to set the requirements for spike detection, and the neuronal connections can be reconstructed by analyzing the cross-correlation between the cellular activity. We evaluated CALIMA's reliability, speed, and functionality with a special focus on neuronal cell detection and network reconstruction. The evaluation was performed by using real-life data such as a known example dataset (cultured primary rat cortical neurons, University of Pennsylvania) and by analyzing video graphic footage of in vitro brain cell samples (SH-SY5Y neuroblastoma cultures, one sample with synchronous neuron firing). The obtained results were compared to the corresponding outcomes observed on same datasets for other similar software solutions. Moreover, we compared the results of segmentation and peak detection analysis, the ones obtained using CALIMA and those acquired manually.

Results

CALIMA was able to detect the cells in the cultures within seconds. The average sensitivity was 82% across the datasets checked, comparing favorably with the alternative software solutions. Using the correct parameters, CALIMA's Ca-spikes detection sensitivity reached 96%. Lastly, neuronal networks were reconstructed by combining the data on the ROI's activity and the cell's positions, finding the most likely inter-cell connections.

Conclusions

We found that CALIMA proved to be a robust and fast tool to analyze the data of experiments for the digital reconstruction of the neuronal cellular network while being able to process the analysis steps with minimal user input required and in a time efficient manner.",2019-07-19 +31920488,Application of Machine Learning Methods to Ambulatory Circadian Monitoring (ACM) for Discriminating Sleep and Circadian Disorders.,"The present study proposes a classification model for the differential diagnosis of primary insomnia (PI) and delayed sleep phase disorder (DSPD), applying machine learning methods to circadian parameters obtained from ambulatory circadian monitoring (ACM). Nineteen healthy controls and 242 patients (PI = 184; DSPD = 58) were selected for a retrospective and non-interventional study from an anonymized Circadian Health Database (https://kronowizard.um.es/). ACM records wrist temperature (T), motor activity (A), body position (P), and environmental light exposure (L) rhythms during a whole week. Sleep was inferred from the integrated variable TAP (from temperature, activity, and position). Non-parametric analyses of TAP and estimated sleep yielded indexes of interdaily stability (IS), intradaily variability (IV), relative amplitude (RA), and a global circadian function index (CFI). Mid-sleep and mid-wake times were estimated from the central time of TAP-L5 (five consecutive hours of lowest values) and TAP-M10 (10 consecutive hours of maximum values), respectively. The most discriminative parameters, determined by ANOVA, Chi-squared, and information gain criteria analysis, were employed to build a decision tree, using machine learning. This model differentiated between healthy controls, DSPD and three insomnia subgroups (compatible with onset, maintenance and mild insomnia), with accuracy, sensitivity, and AUC >85%. In conclusion, circadian parameters can be reliably and objectively used to discriminate and characterize different sleep and circadian disorders, such as DSPD and OI, which are commonly confounded, and between different subtypes of PI. Our findings highlight the importance of considering circadian rhythm assessment in sleep medicine.",2019-12-10 +31866739,From marine park to future genomic observatory? Enhancing marine biodiversity assessments using a biocode approach.,"Few tropical marine sites have been thoroughly characterised for their animal species, even though they constitute the largest proportion of multicellular diversity. A number of focused biodiversity sampling programmes have amassed immense collections to address this shortfall, but obstacles remain due to the lack of identification tools and large proportion of undescribed species globally. These problems can be partially addressed with DNA barcodes (""biocodes""), which have the potential to facilitate the estimation of species diversity and identify animals to named species via barcode databases. Here, we present the first results of what is intended to be a sustained, systematic study of the marine fauna of Singapore's first marine park, reporting more than 365 animal species, determined based on DNA barcodes and/or morphology represented by 931 specimens (367 zooplankton, 564 macrofauna including 36 fish). Due to the lack of morphological and molecular identification tools, only a small proportion could be identified to species solely based on either morphology (24.5%) or barcodes (24.6%). Estimation of species numbers for some taxa was difficult because of the lack of sufficiently clear barcoding gaps. The specimens were imaged and added to ""Biodiversity of Singapore"" (http://singapore.biodiversity.online), which now contains images for > 13,000 species occurring in the country.",2019-12-10 +28290711,Visualising a rare and complex case of advanced hilar cholangiocarcinoma.,"The Toronto Video Atlas of Liver, Pancreas, Biliary, and Transplant Surgery (TVASurg) is a free online library of three-dimensional (3D) animation-enhanced surgical videos, designed to instruct surgical fellows in hepato-pancreato-biliary (HPB) and transplant procedures. The video 'Klatskin tumours: Extended left hepatectomy with complex portal vein reconstruction and in situ cold perfusion of the liver', which is available to watch at http://TVASurg.ca , is a unique and valuable visual resource for surgeons in training to assist them in learning this rare procedure. This paper describes the methodologies used in producing this 3D animation-enhanced surgical video.",2017-01-01 +32133992,Estimating local protein model quality: prospects for molecular replacement.,"Model quality assessment programs estimate the quality of protein models and can be used to estimate local error in protein models. ProQ3D is the most recent and most accurate version of our software. Here, it is demonstrated that it is possible to use local error estimates to substantially increase the quality of the models for molecular replacement (MR). Adjusting the B factors using ProQ3D improved the log-likelihood gain (LLG) score by over 50% on average, resulting in significantly more successful models in MR compared with not using error estimates. On a data set of 431 homology models to address difficult MR targets, models with error estimates from ProQ3D received an LLG of >50 for almost half of the models 209/431 (48.5%), compared with 175/431 (40.6%) for the previous version, ProQ2, and only 74/431 (17.2%) for models with no error estimates, clearly demonstrating the added value of using error estimates to enable MR for more targets. ProQ3D is available from http://proq3.bioinfo.se/ both as a server and as a standalone download.",2020-03-03 +32269383,The HDOCK server for integrated protein-protein docking.,"The HDOCK server (http://hdock.phys.hust.edu.cn/) is a highly integrated suite of homology search, template-based modeling, structure prediction, macromolecular docking, biological information incorporation and job management for robust and fast protein-protein docking. With input information for receptor and ligand molecules (either amino acid sequences or Protein Data Bank structures), the server automatically predicts their interaction through a hybrid algorithm of template-based and template-free docking. The HDOCK server distinguishes itself from similar docking servers in its ability to support amino acid sequences as input and a hybrid docking strategy in which experimental information about the protein-protein binding site and small-angle X-ray scattering can be incorporated during the docking and post-docking processes. Moreover, HDOCK also supports protein-RNA/DNA docking with an intrinsic scoring function. The server delivers both template- and docking-based binding models of two molecules and allows for download and interactive visualization. The HDOCK server is user friendly and has processed >30,000 docking jobs since its official release in 2017. The server can normally complete a docking job within 30 min.",2020-04-08 +26051885,"Population structure and genetic diversity of the parasite Trichomonas vaginalis in Bristol, UK.","The protozoan parasite Trichomonas vaginalis is the causative agent of trichomoniasis, an extremely common, but non-life-threatening, sexually-transmitted disease throughout the world. Recent population genetics studies of T. vaginalis have detected high genetic diversity and revealed a two-type population structure, associated with phenotypic differences in sensitivity to metronidazole, the drug commonly used for treatment, and presence of T. vaginalis virus. There is currently a lack of data on UK isolates; most isolates examined to date are from the US. Here we used a recently described system for multilocus sequence typing (MLST) of T. vaginalis to study diversity of clinical isolates from Bristol, UK. We used MLST to characterise 23 clinical isolates of T. vaginalis collected from female patients during 2013. Seven housekeeping genes were PCR-amplified for each isolate and sequenced. The concatenated sequences were then compared with data from other MLST-characterised isolates available from http://tvaginalis.mlst.net/ to analyse the population structure and construct phylogenetic trees. Among the 23 isolates from the Bristol population of T. vaginalis, we found 23 polymorphic nucleotide sites, 25 different alleles and 19 sequence types (genotypes). Most isolates had a unique genotype, in agreement with the high levels of heterogeneity observed elsewhere in the world. A two-type population structure was evident from population genetic analysis and phylogenetic reconstruction split the isolates into two major clades. Tests for recombination in the Bristol population of T. vaginalis gave conflicting results, suggesting overall a clonal pattern of reproduction. We conclude that the Bristol population of T. vaginalis parasites conforms to the two-type population structure found in most other regions of the world. We found the MLST scheme to be an efficient genotyping method. The online MLST database provides a useful repository and resource that will prove invaluable in future studies linking the genetics of T. vaginalis with the clinical manifestation of trichomoniasis.",2015-06-05 +29112715,RefSeq: an update on prokaryotic genome annotation and curation.,"The Reference Sequence (RefSeq) project at the National Center for Biotechnology Information (NCBI) provides annotation for over 95 000 prokaryotic genomes that meet standards for sequence quality, completeness, and freedom from contamination. Genomes are annotated by a single Prokaryotic Genome Annotation Pipeline (PGAP) to provide users with a resource that is as consistent and accurate as possible. Notable recent changes include the development of a hierarchical evidence scheme, a new focus on curating annotation evidence sources, the addition and curation of protein profile hidden Markov models (HMMs), release of an updated pipeline (PGAP-4), and comprehensive re-annotation of RefSeq prokaryotic genomes. Antimicrobial resistance proteins have been reannotated comprehensively, improved structural annotation of insertion sequence transposases and selenoproteins is provided, curated complex domain architectures have given upgraded names to millions of multidomain proteins, and we introduce a new kind of annotation rule-BlastRules. Continual curation of supporting evidence, and propagation of improved names onto RefSeq proteins ensures that the functional annotation of genomes is kept current. An increasing share of our annotation now derives from HMMs and other sets of annotation rules that are portable by nature, and available for download and for reuse by other investigators. RefSeq is found at https://www.ncbi.nlm.nih.gov/refseq/.",2018-01-01 +33160386,Sero-prevalence of human immunodeficiency virus-hepatitis B virus (HIV-HBV) co-infection among pregnant women attending antenatal care (ANC) in sub-Saharan Africa (SSA) and the associated risk factors: a systematic review and meta-analysis.,"

Background

There is plenitude of information on HIV infection among pregnant mothers attending antenatal care (ANC) in sub-Saharan Africa. However, the epidemiology of HBV-HIV co-infections in the same cohort is not clear despite the common route of transmission of both viruses. The aim of our study was to synthesize data on the prevalence of HBV-HIV co-infection among pregnant women attending ANC in Sub-Saharan Africa to assist in the design of public health interventions to mitigate the challenge.

Methods

The study was done in tandem with the Preferred Reporting Items for Systematic Reviews and Meta-analyses (PRISMA) standards and the Cochran's Q test, I2 statistics for heterogeneity and the prevalence were calculated using commercially available software called MedCalcs ( https://www.medcalc.org ). A random effect model was used to pool the prevalence since all the heterogeneities were high (≥ 78%) and Phet < 0.05 indicated significant heterogeneities. The risk factors and risk differences for HBV-HIV co-infection were analyzed. Any likely sources of heterogeneity were analyzed through sensitivity analysis, meta-regression and sub-group analysis. All analyses were done at 95% level of significance and a P < 0.05 was considered significant.

Results

The overall pooled prevalence of HBV-HIV co-infection among pregnant mothers in sub-Saharan Africa was low 3.302% (95%CI = 2.285 to 4.4498%) with heterogeneities (I2) of 97.59% (P > 0.0001). Within regional sub group meta-analyses, West Africa had significantly higher prevalence of 5.155% (95% = 2.671 to 8.392%) with heterogeneity (I2) of 92.25% (P < 0.0001) than any other region (P < 0.001). Articles published from 2004-2010 had significantly higher prevalence of 6.356% (95% = 3.611 to 9.811%) with heterogeneity (I2) 91.15% (P < 0.0001) compared to those published from 2011 to 2019 (P < 0.001). The HIV positive cohort had significantly higher prevalence of HBV-HIV co-infection of 8.312% (95% CI = 5.806 to 11.22%) with heterogeneity (I2)94.90% (P < 0.0001) than the mothers sampled from the general population with a prevalence of 2.152% (95% CI = 1.358 to 3.125%) (P < 0.001). The overall and sub group analyses had high heterogeneities (I2 > 89%, P < 0.0001) but was reduced for South Africa (I2) = 78.4% (P = 0.0314). Age, marital status and employment were independent factors significantly associated with risk of HBV-HIV co-infection (P < 0.001) but not extent of gravidity and education level (P > 0.05). After meta-regression for year of publication and sample size for HBsAg positivity, the results were not significantly associated with HBV pooled prevalence for sample size (P = 0.146) and year of publication (P = 0.560). Following sensitivity analysis, the HBsAg pooled prevalence slightly increased to 3.429% (95% CI = 2.459 to 4.554%) with heterogeneity I2 = 96.59% (95% CI = 95.93 to 97.14%), P < 0.0001 CONCLUSION: There is an urgent need for routine HBV screening among HIV positive pregnant mothers attending antenatal care in sub-Saharan Africa to establish the extent of HBV-HIV co-infection in this cohort. Future studies need to investigate the putative risk factors for HBV-HIV co-infection and prioritize plausible control strategies.",2020-11-07 +23292601,PRIMe Update: innovative content for plant metabolomics and integration of gene expression and metabolite accumulation.,"PRIMe (http://prime.psc.riken.jp/), the Platform for RIKEN Metabolomics, is a website that was designed and implemented to support research and analyses ranging from metabolomics to transcriptomics. To achieve functional genomics and annotation of unknown metabolites, we established the following PRIMe contents: MS2T, a library comprising >1 million entries of untargeted tandem mass spectrometry (MS/MS) data of plant metabolites; AtMetExpress LC-MS, a database of transcriptomics and metabolomics approaches in Arabidopsis developmental stages (AtMetExpress Development LC-MS) and a data set of the composition of secondary metabolites among 20 Arabidopsis ecotypes (AtMetExpress 20 ecotypes LC-MS); and ReSpect, hybrid reference MS/MS data resources (acquisitions and literature). PRIMeLink is a new web application that allows access to the innovative data resources of PRIMe. The MS2T library was generated from a set of MS/MS spectra acquired using the automatic data acquisition function of mass spectrometry. To increase the understanding of mechanisms driving variations in metabolic profiles among plant tissues, we further provided the AtMetExpress Development LC-MS database in PRIMe, facilitating the investigation of relationships between gene expression and metabolite accumulation. This information platform therefore provides an integrative analysis resource by linking Arabidopsis transcriptome and metabolome data. Moreover, we developed the ReSpect database, a plant-specific MS/MS data resource, which allows users to identify candidate structures from the suite of complex phytochemical structures. Finally, we integrated the three databases into PRIMeLink and established a walk-through link between transcriptome and metabolome information. PRIMeLink offers a bi-directional searchable function, from the gene and the metabolite perspective, to search for targets seamlessly and effectively.",2013-01-03 +29031638,dbGAPs: A comprehensive database of genes and genetic markers associated with psoriasis and its subtypes. ,"Psoriasis is a systemic hyperproliferative inflammatory skin disorder, although rarely fatal but significantly reduces quality of life. Understanding the full genetic component of the disease association may provide insight into biological pathways as well as targets and biomarkers for diagnosis, prognosis and therapy. Studies related to psoriasis associated genes and genetic markers are scattered and not easily amendable to data-mining. To alleviate difficulties, we have developed dbGAPs an integrated knowledgebase representing a gateway to psoriasis associated genomic data. The database contains annotation for 202 manually curated genes associated with psoriasis and its subtypes with cross-references. Functional enrichment of these genes, in context of Gene Ontology and pathways, provide insight into their important role in psoriasis etiology and pathogenesis. The dbGAPs interface is enriched with an interactive search engine for data retrieval along with unique customized tools for Single Nucleotide Polymorphism (SNP)/indel detection and SNP/indel annotations. dbGAPs is accessible at http://www.bmicnip.in/dbgaps/.",2017-10-12 +30462168,The Microbiome Modeling Toolbox: from microbial interactions to personalized microbial communities.,"

Motivation

The application of constraint-based modeling to functionally analyze metagenomic data has been limited so far, partially due to the absence of suitable toolboxes.

Results

To address this gap, we created a comprehensive toolbox to model (i) microbe-microbe and host-microbe metabolic interactions, and (ii) microbial communities using microbial genome-scale metabolic reconstructions and metagenomic data. The Microbiome Modeling Toolbox extends the functionality of the constraint-based reconstruction and analysis toolbox.

Availability and implementation

The Microbiome Modeling Toolbox and the tutorials at https://git.io/microbiomeModelingToolbox.",2019-07-01 +31265077,Mapping of scaffold/matrix attachment regions in human genome: a data mining exercise.,"Scaffold/matrix attachment regions (S/MARs) are DNA elements that serve to compartmentalize the chromatin into structural and functional domains. These elements are involved in control of gene expression which governs the phenotype and also plays role in disease biology. Therefore, genome-wide understanding of these elements holds great therapeutic promise. Several attempts have been made toward identification of S/MARs in genomes of various organisms including human. However, a comprehensive genome-wide map of human S/MARs is yet not available. Toward this objective, ChIP-Seq data of 14 S/MAR binding proteins were analyzed and the binding site coordinates of these proteins were used to prepare a non-redundant S/MAR dataset of human genome. Along with co-ordinate (location) details of S/MARs, the dataset also revealed details of S/MAR features, namely, length, inter-SMAR length (the chromatin loop size), nucleotide repeats, motif abundance, chromosomal distribution and genomic context. S/MARs identified in present study and their subsequent analysis also suggests that these elements act as hotspots for integration of retroviruses. Therefore, these data will help toward better understanding of genome functioning and designing effective anti-viral therapeutics. In order to facilitate user friendly browsing and retrieval of the data obtained in present study, a web interface, MARome (http://bioinfo.net.in/MARome), has been developed.",2019-08-01 +31198835,Data on production and characterization of melamine-furan-formaldehyde particles and reversible reactions thereof.,"The data present in this article affords insides in the characterization of a newly described bi-functional furan-melamine monomer, which is used for the production of monodisperse, furan-functionalized melamine-formaldehyde particles, as described in https://doi.org/10.1016/j.eurpolymj.2019.04.006 Urdl et al., 2019. In the related research article Urdl et al., 2019 data interpretations can be found. The furan-functionalization of particles is necessary to perform reversible Diels-Alder reactions with maleimide (BMI) crosslinker to form thermoreversible network systems. To understand the reaction conditions of Diels-Alder (DA) reaction with a Fu-Mel monomer and a maleimide crosslinker, model DA reaction were performed and evaluated using dynamic FT-IR measurements. During retro Diels-Alder (rDA) reactions of the monomer system, it was found out that some side reaction occurred at elevated temperatures. The data of evaluating the side reaction is described in one part of this manuscript. Additional high resolution SEM images of Fu-Mel particles are shown and thermoreversible particle networks with BMI2 are shown. The data of different Fu-Mel particle networks with maleimide crosslinker are presented. Therefore, the used maleimide crosslinker with different spacer lengths were synthesized and the resulting networks were analyzed by ATR-FT-IR, SEM and DSC.",2019-05-25 +27484196,iLIR database: A web resource for LIR motif-containing proteins in eukaryotes.,"Atg8-family proteins are the best-studied proteins of the core autophagic machinery. They are essential for the elongation and closure of the phagophore into a proper autophagosome. Moreover, Atg8-family proteins are associated with the phagophore from the initiation of the autophagic process to, or just prior to, the fusion between autophagosomes with lysosomes. In addition to their implication in autophagosome biogenesis, they are crucial for selective autophagy through their ability to interact with selective autophagy receptor proteins necessary for the specific targeting of substrates for autophagic degradation. In the past few years it has been revealed that Atg8-interacting proteins include not only receptors but also components of the core autophagic machinery, proteins associated with vesicles and their transport, and specific proteins that are selectively degraded by autophagy. Atg8-interacting proteins contain a short linear LC3-interacting region/LC3 recognition sequence/Atg8-interacting motif (LIR/LRS/AIM) motif which is responsible for their interaction with Atg8-family proteins. These proteins are referred to as LIR-containing proteins (LIRCPs). So far, many experimental efforts have been carried out to identify new LIRCPs, leading to the characterization of some of them in the past 10 years. Given the need for the identification of LIRCPs in various organisms, we developed the iLIR database ( https://ilir.warwick.ac.uk ) as a freely available web resource, listing all the putative canonical LIRCPs identified in silico in the proteomes of 8 model organisms using the iLIR server, combined with a Gene Ontology (GO) term analysis. Additionally, a curated text-mining analysis of the literature permitted us to identify novel putative LICRPs in mammals that have not previously been associated with autophagy.",2016-08-02 +,Best Paper Selection,"Chen J, Rozowsky J, Galeev TR, Harmanci A, Kitchen R, Bedford J, Abyzov A, Kong Y, Regan L, Gerstein M. A uniform survey of allele-specific binding and expression over 1000-Genomes-Project individuals. Nat Commun 2016 Apr 18;7:11101 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4837449/ Marbach D, Lamparter D, Quon G, Kellis M, Kutalik Z, Bergmann S. Tissue-specific regulatory circuits reveal variable modular perturbations across complex diseases. Nat Methods 2016 Apr;13(4):366-70 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4967716/ Zhang D, Chen P, Zheng CH, Xia J. Identification of ovarian cancer subtype-specific network modules and candidate drivers through an integrative genomics approach. Oncotarget 2016 Jan 26;7(4):4298-309 +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4826206/ Zhang, J, White, NM, Schmidt, HK, Fulton, RS, Tomlinson, C, Warren, WC, Wilson, RK, Maher, CA. INTEGRATE: gene fusion discovery using whole genome and transcriptome data. Genome Res 2016;26(1):108-18",2017-08-01 +31584628,Fast and flexible coarse-grained prediction of protein folding routes using ensemble modeling and evolutionary sequence variation.,"MOTIVATION:Protein folding is a dynamic process through which polypeptide chains reach their native 3D structures. Although the importance of this mechanism is widely acknowledged, very few high-throughput computational methods have been developed to study it. RESULTS:In this paper, we report a computational platform named P3Fold that combines statistical and evolutionary information for predicting and analyzing protein folding routes. P3Fold uses coarse-grained modeling and efficient combinatorial schemes to predict residue contacts and evaluate the folding routes of a protein sequence within minutes or hours. To facilitate access to this technology, we devise graphical representations and implement an interactive web interface that allows end-users to leverage P3Fold predictions. Finally, we use P3Fold to conduct large and short scale experiments on the human proteome that reveal the broad conservation and variations of structural intermediates within protein families. AVAILABILITY AND IMPLEMENTATION:A Web server of P3Fold is freely available at http://csb.cs.mcgill.ca/P3Fold. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-03-01 +30254379,The global prevalence of Wilson disease from next-generation sequencing data.,"

Purpose

Wilson disease (WD) is an autosomal recessive disorder of copper metabolism, caused by pathogenic variants in ATP7B. We aimed to (1) perform a meta-analysis of previous WD prevalence estimates, (2) estimate the prevalence of WD from population sequencing data, and (3) generate an ATP7B gene variant database.

Methods

MEDLINE and EMBASE were systematically searched. Previous prevalence estimates were subjected to meta-analysis. All previously reported pathogenic ATP7B variants were compiled and annotated with gnomAD allele frequencies. Pooled global and ethnicity-specific genetic prevalences for WD were generated using the Hardy-Weinberg equation.

Results

Meta-analysis of genetic studies of WD prevalence gave an estimate 12.7 per 100,000 (95% confidence interval [CI]: 6.3-23.0). We developed a referenced, searchable ATP7B database comprising 11,520 variants including 782 previously reported disease variants, which can be found at http://www.wilsondisease.tk/ ; 216/782 of these were present in gnomAD, remained after filtering by allele frequency, and met American College of Medical Genetics and Genomics criteria. Based on these, the genetic prevalence of WD was 13.9 per 100,000 (95% CI: 12.9-14.9), or 1 per 7194. Combining this with 60 predicted pathogenic variants gave a birth prevalence of 15.4 per 100,000 (95% CI: 14.4-16.5).

Conclusion

The genetic prevalence of Wilson disease may be greater than previous estimates.",2018-09-26 +32709465,Editor's Choice - Trends in Lower Extremity Amputation Incidence in European Union 15+ Countries 1990-2017.,"

Objective

Lower extremity amputation (LEA) carries significant mortality, morbidity, and health economic burden. In the Western world, it most commonly results from complications of peripheral arterial occlusive disease (PAOD) or diabetic foot disease. The incidence of PAOD has declined in Europe, the United States, and parts of Australasia. The present study aimed to assess trends in LEA incidence in European Union (EU15+) countries for the years 1990-2017.

Methods

This was an observational study using data obtained from the 2017 Global Burden of Disease (GBD) Study. Age standardised incidence rates (ASIRs) for LEA (stratified into toe amputation, and LEA proximal to toes) were extracted from the GBD Results Tool (http://ghdx.healthdata.org/gbd-results-tool) for EU15+ countries for each of the years 1990-2017. Trends were analysed using Joinpoint regression analysis.

Results

Between 1990 and 2017, variable trends in the incidence of LEA were observed in EU15+ countries. For LEAs proximal to toes, increasing trends were observed in six of 19 countries and decreasing trends in nine of 19 countries, with four countries showing varying trends between sexes. For toe amputation, increasing trends were observed in eight of 19 countries and decreasing trends in eight of 19 countries for both sexes, with three countries showing varying trends between sexes. Australia had the highest ASIRs for both sexes in all LEAs at all time points, with steadily increasing trends. The USA observed the greatest reduction in all LEAs in both sexes over the time period analysed (LEAs proximal to toes: female patients -22.93%, male patients -29.76%; toe amputation: female patients -29.93%, male patients -32.67%). The greatest overall increase in incidence was observed in Australia.

Conclusion

Variable trends in LEA incidence were observed across EU15+ countries. These trends do not reflect previously observed reductions in incidence of PAOD over the same time period.",2020-07-21 +24137008,HoPaCI-DB: host-Pseudomonas and Coxiella interaction database.,"Bacterial infectious diseases are the result of multifactorial processes affected by the interplay between virulence factors and host targets. The host-Pseudomonas and Coxiella interaction database (HoPaCI-DB) is a publicly available manually curated integrative database (http://mips.helmholtz-muenchen.de/HoPaCI/) of host-pathogen interaction data from Pseudomonas aeruginosa and Coxiella burnetii. The resource provides structured information on 3585 experimentally validated interactions between molecules, bioprocesses and cellular structures extracted from the scientific literature. Systematic annotation and interactive graphical representation of disease networks make HoPaCI-DB a versatile knowledge base for biologists and network biology approaches.",2013-10-16 +32861806,Osimertinib versus platinum-pemetrexed for patients with EGFR T790M advanced NSCLC and progression on a prior EGFR-tyrosine kinase inhibitor: AURA3 overall survival analysis.,"

Background

In AURA3 (NCT02151981), osimertinib, a third-generation epidermal growth factor receptor (EGFR)-tyrosine kinase inhibitor (TKI), significantly prolonged progression-free survival and improved response in patients with EGFR T790M advanced non-small-cell lung cancer (NSCLC) and progression on prior EGFR-TKI treatment. We report the final AURA3 overall survival (OS) analysis.

Patients and methods

Adult patients were randomized 2 : 1 to osimertinib (80 mg orally, once daily) or pemetrexed plus carboplatin/cisplatin (platinum-pemetrexed) intravenously, every 3 weeks (≤6 cycles). Patients could crossover to osimertinib on progression confirmed by blinded independent central review. OS and safety were secondary end points.

Results

A total of 279 patients were randomly assigned to receive osimertinib and 140 to platinum-pemetrexed (136 received treatment). At data cut-off (DCO; 15 March 2019), 188 patients (67%) receiving osimertinib versus 93 (66%) receiving platinum-pemetrexed had died. The hazard ratio (HR) for OS was 0.87 [95% confidence interval (CI) 0.67-1.12; P = 0.277]; the median OS was 26.8 months (95% CI 23.5-31.5) versus 22.5 months (95% CI 20.2-28.8) for osimertinib and platinum-pemetrexed, respectively. The estimated 24- and 36-month survival was 55% versus 43% and 37% versus 30%, respectively. After crossover adjustment, there was an HR of 0.54 (95% CI 0.18-1.6). Time to first subsequent therapy or death showed a clinically meaningful advantage toward osimertinib (HR 0.21, 95% CI 0.16-0.28; P < 0.001). At DCO, 99/136 (73%) patients in the platinum-pemetrexed arm had crossed over to osimertinib, 66/99 (67%) of whom had died. The most common adverse events possibly related to study treatment were diarrhea (32%; grade ≥3, 1%) and rash (grouped term; 32%; grade ≥3, <1%) in the osimertinib arm, versus nausea (47%; grade ≥3, 3%) in the platinum-pemetrexed arm.

Conclusions

In patients with T790M advanced NSCLC, no statistically significant benefit in OS was observed for osimertinib versus platinum-pemetrexed, which possibly reflects the high crossover rate of patients from platinum-pemetrexed to osimertinib.

Clinical trials number

ClinicalTrials.gov NCT02151981; https://clinicaltrials.gov/ct2/show/NCT02151981.",2020-08-27 +28888533,An overview of posttraumatic stress disorder genetic studies by analyzing and integrating genetic data into genetic database PTSDgene.,"Posttraumatic stress disorder (PTSD) is a debilitating psychiatric syndrome with complex etiology. Studies aiming to explore genetic susceptibility and environmental triggers of PTSD have been increasing. However, the results are limited and highly heterogeneous. To understand the genetic study status of PTSD and explore more reliable candidates, we obtained 105 PTSD related genetic studies by comprehensively literature searching and filtering 1762 studies. Detailed phenotype and sample information for each study and association results for each genetic marker were extracted. Based on the extracted data, we reviewed the PTSD genetic research status and further conducted bioinformatics analyses for the genetic data. Our analyses summarized the landscape of PTSD genetic studies, identified the genes with most genetic evidence, discovered the biological function of the candidate variants/genes and enlarged the overall candidates for future investigations. All the data were stored in the PTSDgene database (http://ptsdgene.psych.ac.cn). We hope PTSDgene could be a platform for the rapid growth of PTSD genetic data and provide new insights into the pathogenesis of PTSD.",2017-09-06 +30374935,Report: NIA workshop on translating genetic variants associated with longevity into drug targets.,"To date, candidate gene and genome-wide association studies (GWAS) have led to the discovery of longevity-associated variants (LAVs) in genes such as FOXO3A and APOE. Unfortunately, translating variants into drug targets is challenging for any trait, and longevity is no exception. Interdisciplinary and integrative multi-omics approaches are needed to understand how LAVs affect longevity-related phenotypes at the molecular physiologic level in order to leverage their discovery to identify new drug targets. The NIA convened a workshop in August 2017 on emerging and novel in silico (i.e., bioinformatics and computational) approaches to the translation of LAVs into drug targets. The goal of the workshop was to identify ways of enabling, enhancing, and facilitating interactions among researchers from different disciplines whose research considers either the identification of LAVs or the mechanistic or causal pathway(s) and protective factors they influence for discovering drug targets. Discussions among the workshop participants resulted in the identification of critical needs for enabling the translation of LAVs into drug targets in several areas. These included (1) the initiation and better use of cohorts with multi-omics profiling on the participants; (2) the generation of longitudinal information on multiple individuals; (3) the collection of data from non-human species (both long and short-lived) for comparative biology studies; (4) the refinement of computational tools for integrative analyses; (5) the development of novel computational and statistical inference techniques for assessing the potential of a drug target; (6) the identification of available drugs that could modulate a target in a way that could potentially provide protection against age-related diseases and/or enhance longevity; and (7) the development or enhancement of databases and repositories of relevant information, such as the Longevity Genomics website ( https://www.longevitygenomics.org ), to enhance and help motivate future interdisciplinary studies. Integrative approaches that examine the influence of LAVs on molecular physiologic phenotypes that might be amenable to pharmacological modulation are necessary for translating LAVs into drugs to enhance health and life span.",2018-10-29 +31924845,SWAV: a web-based visualization browser for sliding window analysis.,"Sliding window analysis has been extensively applied in evolutionary biology. With the development of the high-throughput DNA sequencing of organisms at the population level, an application that is dedicated to visualizing population genetic test statistics at the genomic level is needed. We have developed the sliding window analysis viewer (SWAV), which is a web-based program that can be used to integrate, view and browse test statistics and perform genome annotation. In addition to browsing, SAV can mark, generate and customize statistical images and search by sequence alignment, position or gene name. These features facilitate the effectiveness of sliding window analysis. As an example application, yeast and silkworm resequencing data are analyzed with SWAV. The SWAV package, user manual and usage demo are available at http://swav.popgenetics.net.",2020-01-10 +29992321,Atlas of Schistosoma mansoni long non-coding RNAs and their expression correlation to protein-coding genes. ,"Long non-coding RNAs (lncRNAs) have been widely discovered in several organisms with the help of high-throughput RNA sequencing. LncRNAs are over 200 nt-long transcripts that do not have protein-coding (PC) potential, having been reported in model organisms to act mainly on the overall control of PC gene expression. Little is known about the functionality of lncRNAs in evolutionarily ancient non-model metazoan organisms, like Schistosoma mansoni, the parasite that causes schistosomiasis, one of the most prevalent infectious-parasitic diseases worldwide. In a recent transcriptomics effort, we identified thousands of S. mansoni lncRNAs predicted to be functional along the course of parasite development. Here, we present an online catalog of each of the S. mansoni lncRNAs whose expression is correlated to PC genes along the parasite life-cycle, which can be conveniently browsed and downloaded through a new web resource http://verjolab.usp.br. We also provide access now to navigation on the co-expression networks disclosed in our previous publication, where we correlated mRNAs and lncRNAs transcriptional patterns across five life-cycle stages/forms, pinpointing biological processes where lncRNAs might act upon.Database URL: http://verjolab.usp.br.",2018-01-01 +34384573,CCMP: Software-as-a-service approach for fully-automated microbiome profiling.,"Microbiome profiling holds great promise for the development of novel disease biomarkers and therapeutics. Next-generation sequencing is currently the preferred method for microbiome data collection and multiple standardized tools, packages, and pipelines have been developed for the purpose of raw data processing and microbial annotation. However, these currently available pipelines come with entry-level barriers such as high-performance hardware, software installation, and sequential command-line scripting that often deter end-users. We thus created Cloud Computing for Microbiome Profiling (CCMP, https://ccmp.usc.edu), a public cloud-based web tool which combines the analytical power of current microbiome analysis platforms with a user-friendly interface. CCMP is a free-of-charge software-as-a-service (SaaS) that simplifies user experience by enabling users to complete their analysis in a single step, uploading raw sequencing data files. Once users upload 16S ribosomal RNA gene sequence data, our pipeline performs taxonomic annotation, abundance profiling, and statistical tests to report microbiota signatures altered by diseases or experimental conditions. CCMP took a 125 gigabyte (GB) input of 16S ribosomal RNA gene sequence data from 1052 specimens in FASTQ format and reported figures and tables of taxonomic annotations, statistical tests, α and β diversity calculations, and principal coordinate analyses within 21 h. CCMP is the first fully-automated web interface that integrates three key solutions for large-scale data analysis: cloud computing, fast file transfer technology, and microbiome analysis tools. As a reliable platform that supplies consistent microbiome analysis, CCMP will advance microbiome research by making effortful bioinformatics easily accessible to public.",2019-04-15 +31482150,CCMP: software-as-a-service approach for fully-automated microbiome profiling. ,"Microbiome profiling holds great promise for the development of novel disease biomarkers and therapeutics. Next-generation sequencing is currently the preferred method for microbiome data collection and multiple standardized tools, packages, and pipelines have been developed for the purpose of raw data processing and microbial annotation. However, these currently available pipelines come with entry-level barriers such as high-performance hardware, software installation, and sequential command-line scripting that often deter end-users. We thus created Cloud Computing for Microbiome Profiling (CCMP, https://ccmp.usc.edu), a public cloud-based web tool which combines the analytical power of current microbiome analysis platforms with a user-friendly interface. CCMP is a free-of-charge software-as-a-service (SaaS) that simplifies user experience by enabling users to complete their analysis in a single step, uploading raw sequencing data files. Once users upload 16S ribosomal RNA gene sequence data, our pipeline performs taxonomic annotation, abundance profiling, and statistical tests to report microbiota signatures altered by diseases or experimental conditions. CCMP took a 125 gigabyte (GB) input of 16S ribosomal RNA gene sequence data from 1052 specimens in FASTQ format and reported figures and tables of taxonomic annotations, statistical tests, α and β diversity calculations, and principal coordinate analyses within 21 hours. CCMP is the first fully-automated web interface that integrates three key solutions for large-scale data analysis: cloud computing, fast file transfer technology, and microbiome analysis tools. As a reliable platform that supplies consistent microbiome analysis, CCMP will advance microbiome research by making effortful bioinformatics easily accessible to public.",2019-04-15 +31284104,"Mitochondrial DNA control region variation in Lebanon, Jordan, and Bahrain.","This study investigated the mitochondrial DNA (mtDNA) control region variation in Middle Eastern populations (610 individuals from Lebanon, Jordan and the Kingdom of Bahrain) for which population data are scarce. FST comparison among populations revealed that there are significant differences in mtDNA distributions between Bahrain and the two other populations, while Lebanon and Jordan showed no significant differences. This was also reflected by the distribution of the observed lineages that differed prominently between Bahrain and the other two investigated populations. Jordan and Lebanon fit the hitherto known genetic results of the Levant population. Data are available via EMPOP (https://empop.online) and GenBank.",2019-06-29 +31305969,NTP Research Report on the CLARITY-BPA Core Study: A Perinatal and Chronic Extended-Dose-Range Study of Bisphenol A in Rats: Research Report 9,"Bisphenol A (BPA, CAS #80-05-7) is a high-production-volume industrial chemical used as a monomer for polycarbonate plastics and epoxy resins that have broad applications in consumer products, including storage containers for foods and beverages and medical devices. The potential toxicity resulting from chronic exposure to BPA as an indirect food additive is the concern addressed in this study. This study is part of the Consortium Linking Academic and Regulatory Insights on Bisphenol A Toxicity (CLARITY-BPA), a research program between the National Institute of Environmental Health Sciences (NIEHS) and the National Center for Toxicological Research (NCTR) of the Food and Drug Administration (FDA), developed to bridge guideline-compliant research conducted at the FDA with hypothesis-based research investigations conducted by academia on the toxicity of BPA. The CLARITY-BPA research program has two components: 1) A “core” guideline-compliant chronic study conducted at NCTR according to FDA Good Laboratory Practice (GLP) regulations and 2) studies of various endpoints, conducted by NIEHS-funded researchers at academic institutions using animals born to the same exposed pregnant rats as the core GLP study. The purpose of this research program was to evaluate chronic exposure to BPA over a broad dose range using traditional and non-traditional endpoints. It aimed to determine if non-traditional endpoints reveal toxicity not detected by traditional guideline study endpoints and provide mechanistic support for observations made in the guideline study. The current research report covers only data from the “core” guideline-compliant chronic study. The toxicity of BPA administered by oral gavage from gestation day (GD) 6 through the start of labor and then by oral gavage to pups from postnatal day (PND) 1 (day of birth = PND 0) until termination at one year or two years was examined in Sprague-Dawley rats from the NCTR breeding colony (Sprague-Dawley/CD23/NctrBR). BPA doses were 2.5, 25, 250, 2,500, and 25,000 μg/kg body weight (bw)/day. A vehicle (0.3% carboxymethylcellulose (CMC)) control group was also included. In addition to animals that were dosed daily throughout the study, a stop-dose study arm was included with animals dosed daily until PND 21 and then held without further treatment until termination to assess any effects that were due to early exposure only. Because many of the effects of BPA reported in the literature are associated with estrogen signaling pathways, two doses (0.05 and 0.5 μg/kg bw/day) of the orally active estrogen ethinyl estradiol (EE2) were also included in the continuous-dose arm to assess the sensitivity of the test system to low doses of an estrogen. Reference estrogen groups were not included in the stop-dose study arm of the core study due to resource constraints, primarily lack of animal facility space. Rats were obtained as weanlings from the NCTR breeding colony and placed under study conditions (soy- and alfalfa-free diet (5K96, LabDiet, Purina Mills), polysulfone cages, hardwood chip bedding, glass water bottles, and food-grade silicone stoppers) until mating. Study materials were monitored for background BPA levels; the only material with detectable levels of BPA was the diet, which had less than 3 ppb BPA. Prior to mating to males that were not siblings or first cousins, female rats were stratified by body weight and were randomized to treatment groups to give approximately equivalent mean starting body weights in each group. Each morning after pairing, females were examined for evidence of mating (presence of an in situ vaginal plug or sperm-positive vaginal smear). Upon evidence of mating, the females were separated from the males and individually housed; this day was considered GD 0. On GD 6, daily dosing of the dam with BPA, EE2, or vehicle began and was based on the body weight measured immediately prior to the administration of these compounds. Direct gavage dosing of the pups was started on PND 1, with the same dose and agent that was administered to their dams. At weaning on PND 21, no more than one animal per sex per litter was assigned to the following study arms: 1) continuous dosing to sacrifice at two years (terminal sacrifice, 46–50 animals per sex per vehicle control or BPA treatment group and 26 animals per sex per EE2 group); 2) continuous dosing to sacrifice at one year (interim sacrifice, 20–26 animals per sex for all groups); 3) no further treatment after PND 21 until sacrifice at two years (stop-dose terminal sacrifice, 46–50 animals per sex per preweaning vehicle control or BPA group); and 4) no further treatment after PND 21 until sacrifice at one year (stop-dose interim sacrifice, 20–26 animals per sex for preweaning vehicle control and BPA groups). The stop-dose study arms for which gavage dosing was not continued beyond weaning were included to assess the potential of permanent effects induced by exposure to hormonally active compounds during developmental stages. The interim (one-year) sacrifice group was included to allow evaluation of long-term exposure effects with less confounding due to background lesions of aging than would be expected at two years, and to allow assessment of any precursors of any treatment-related lesions observed at two years. Data collected included body weights, litter parameters, age at vaginal opening, vaginal cytology, clinical pathology (interim sacrifice only), sperm parameters (interim sacrifice only), organ weights (interim sacrifice only), and histopathology (both interim and terminal sacrifices). Vaginal cytology data were collected for 14 consecutive days at approximately 16 weeks of age from the same subset of females in the terminal sacrifice arm that had been monitored for vaginal opening; these same animals were then monitored for five consecutive days monthly to estimate the time at which they began having aberrant estrous cycles. In addition to the summary tables provided in this report and appendices, all individual animal data are available online (https://doi.org/10.22427/NTP-DATA-018-00015-0001-000-6). Table 1 lists all non-histopathology endpoints analyzed and associated statistical findings. For histopathology data, Table 1 only lists the endpoints where a statistically significant difference was found by the primary statistical tests applied (Cochran-Armitage/Fisher’s Exact Test for interim sacrifice animals; survival-adjusted Poly-3 test for terminal sacrifice animals). Results from all statistical tests applied to the histopathology data, which further included Jonckheere-Terpstra/Shirley-Williams (JT/SW) and relative treatment effect (RTE) tests for non-neoplastic lesions assigned severity scores, are included in the text of this abstract and in the report text and tables. Statistically significant results are indicated regardless of biological significance. There were few significant effects of BPA treatment in the in-life data collected. In the late stages of the study (weeks 96–104), mean female body weights in the 250 μg BPA/kg bw/day continuous-dose group were significantly higher than the mean vehicle control body weights. For clinical pathology endpoints and organ weights, some statistically significant effects of continuous- or stop-dose BPA treatments were observed. These effects were of questionable relevance to BPA toxicity given that they were seen only in single-dose groups, in several cases differed from the vehicle control by less than 10%, and, in the case of organ weights, were not significant when adjusted for body weight. In the stop-dose BPA study arm at two years, there was a statistically significant increase in the incidence of female mammary gland adenocarcinoma (22% versus 6%; p = 0.016) and the combination of adenoma and adenocarcinoma (24% versus 8%; p = 0.018) in the 2.5 μg BPA/kg bw/day dose group. No increase in female mammary gland neoplasms was observed in the continuous BPA dose arm at two years. There were no significant treatment-related non-neoplastic lesions in the mammary gland of interim or terminal sacrifice stop-dose BPA groups. In the interim and terminal BPA continuous dosing arm, there was an increase, significant by the secondary RTE test only, in female mammary gland atypical foci at 2.5 μg BPA/kg bw/day (14% versus 0% and 15% versus 4% for the interim and terminal dose group animals, respectively). Increased adenoma/adenocarcinoma incidence observed only in the stop-dose animals, lack of a dose response, absence of non-neoplastic lesions in interim or terminal sacrifice stop-dose animals, and comparison to limited historical control data for this strain of rats in experiments conducted at NCTR bring into question the biological plausibility of this lesion as a BPA treatment-related effect. In addition to mammary gland neoplasms, a significant trend (p = 0.037) for uterine stromal polyps in the interim sacrifice animals in the continuous BPA dose arm was observed; this was not observed in the terminal sacrifice animals. In the histopathological evaluations, there were many non-neoplastic lesions associated with aging in this strain of rats in both males and females that were variable across control and BPA treatment levels. In the interim stop-dose sacrifice BPA females, there was a significant dose trend with a significant increase in follicular cysts in the ovary at 25,000 μg BPA/kg bw/day dose group. The secondary statistical tests, which incorporated both incidence and severity scores, indicated an increase in cystic endometrial hyperplasia and squamous metaplasia in the uterus at 25,000 μg BPA/kg bw/day in the interim stop-dose females. In the terminal stop-dose animals, secondary tests indicated an increase in cystic endometrial hyperplasia at 2,500 and 25,000 μg BPA/kg bw/day, although severity was similar in the vehicle control and the BPA-treated groups. Cardiomyopathy was increased in the terminal stop-dose females at 2.5, 250, 2,500, and 25,000 μg BPA/kg bw/day, as assessed by statistical tests that incorporated incidence and severity scores, although background incidence was high at this age and severity score differences across dose groups were similar. In interim continuous-dose females, uterine apoptosis and vaginal epithelial hyperplasia were elevated at 25,000 μg BPA/kg bw/day. Vaginal epithelial hyperplasia was also increased in terminal continuous-dose animals at doses from 25 to 25,000 μg BPA/kg bw/day, with a similar response across each of those dose levels. There were no significant differences between treatment groups and vehicle controls in the incidences of neoplastic lesions in stop-dose or continuous-dose interim or terminal sacrifice males. There were also no apparent treatment-related non-neoplastic effects in interim stop-dose males; in terminal stop-dose BPA males, an increase of hyperplasia in the pars distalis of the pituitary at 25,000 μg BPA/kg bw/day was noted. In interim, but not terminal, continuous-dose males there was an increase in exfoliated germ cells and an increase in lymphocyte infiltration in the epididymis at 25,000 μg BPA/kg bw/day. No significant effects on sperm parameters or testicular histopathology were noted in the BPA dose groups. In the terminal continuous-dose males, hyperplasia of the pars distalis of the pituitary was increased at 25 and 25,000 μg BPA/kg bw/day. Increases in dorsal/lateral prostate inflammation in most BPA dose groups were variable across a high background in both interim and terminal sacrifice animals. In the EE2 reference estrogen dose groups, there were multiple significant treatment-related effects at the 0.5 μg/kg bw/day exposure level in females. At the time of estrous cycle evaluation at 16 weeks, more than 90% of the females in the 0.5 μg EE2/kg bw/day dose group were exhibiting prolonged estrus. At the interim sacrifice, mean weights of the adrenal glands, heart, kidney, liver, and pituitary gland were higher in the 0.5 μg EE2/kg bw/day dose group than the vehicle control means. Ovarian/parametrial fat pad and ovary weights were significantly lower than mean vehicle control weights in the high EE2 dose group. At the interim sacrifice, lobular hyperplasia and ductal dilatation were elevated in the mammary glands of the 0.5 μg EE2/kg bw/day dose group. Increases in apoptosis, cystic endometrial hyperplasia, and squamous metaplasia were observed in the uterus of the interim high dose EE2 females. Atrophy and cystic follicles were increased in the ovaries, the incidence of vaginal hyperplasia was increased, and increases in hyperplasia of the pars distalis and angiectasis were observed in the pituitary at 0.5 μg EE2/kg bw/day. The incidences of cardiomyopathy and nephropathy were also increased in the high dose EE2 females at one year. At terminal sacrifice, there were significant increases in the incidence of mammary gland adenocarcinomas and combined adenomas/carcinomas of the pituitary pars distalis in the 0.5 μg EE2/kg bw/day dose group. There was a trend toward increasing uterine metaplasia at two years, and the incidence of nephropathy was increased in both the 0.05 and 0.5 μg EE2/kg bw/day dose groups. Few statistically significant effects of EE2 in males were observed. In the high dose EE2 group, there was an elevated incidence of lymphocyte infiltration observed in the epididymis in interim sacrifice animals and an increase in hyperplasia in the pars distalis of the pituitary at two years. In conclusion, in the CLARITY-BPA core study, statistical differences between BPA treatment groups, particularly below 25,000 μg/kg bw/day, and the vehicle control group detected by the low-stringency statistical tests applied to histopathology lesions, were not dose responsive, sometimes occurring in only one low or intermediate dose group, and did not demonstrate a clear pattern of consistent responses within or across organs within the stop- and continuous-dose arms and sacrifice times. In contrast, the high EE2-dose elicited several estrogenic effects in females in a clearly interpretable and biologically plausible manner. Several observations at 25,000 μg BPA/kg bw/day may be treatment related, including effects mentioned above in the female reproductive tract (ovary, uterus, and vagina) and in the male pituitary.",2019-07-16 +31114885,Geneshot: search engine for ranking genes from arbitrary text queries.,"The frequency by which genes are studied correlates with the prior knowledge accumulated about them. This leads to an imbalance in research attention where some genes are highly investigated while others are ignored. Geneshot is a search engine developed to illuminate this gap and to promote attention to the under-studied genome. Through a simple web interface, Geneshot enables researchers to enter arbitrary search terms, to receive ranked lists of genes relevant to the search terms. Returned ranked gene lists contain genes that were previously published in association with the search terms, as well as genes predicted to be associated with the terms based on data integration from multiple sources. The search results are presented with interactive visualizations. To predict gene function, Geneshot utilizes gene-gene similarity matrices from processed RNA-seq data, or from gene-gene co-occurrence data obtained from multiple sources. In addition, Geneshot can be used to analyze the novelty of gene sets and augment gene sets with additional relevant genes. The Geneshot web-server and API are freely and openly available from https://amp.pharm.mssm.edu/geneshot.",2019-07-01 +24124474,Evaluation and cross-comparison of lexical entities of biological interest (LexEBI).,"

Motivation

Biomedical entities, their identifiers and names, are essential in the representation of biomedical facts and knowledge. In the same way, the complete set of biomedical and chemical terms, i.e. the biomedical ""term space"" (the ""Lexeome""), forms a key resource to achieve the full integration of the scientific literature with biomedical data resources: any identified named entity can immediately be normalized to the correct database entry. This goal does not only require that we are aware of all existing terms, but would also profit from knowing all their senses and their semantic interpretation (ambiguities, nestedness).

Result

This study compiles a resource for lexical terms of biomedical interest in a standard format (called ""LexEBI""), determines the overall number of terms, their reuse in different resources and the nestedness of terms. LexEBI comprises references for protein and gene entries and their term variants and chemical entities amongst other terms. In addition, disease terms have been identified from Medline and PubmedCentral and added to LexEBI. Our analysis demonstrates that the baseforms of terms from the different semantic types show only little polysemous use. Nonetheless, the term variants of protein and gene names (PGNs) frequently contain species mentions, which should have been avoided according to protein annotation guidelines. Furthermore, the protein and gene entities as well as the chemical entities, both do comprise enzymes leading to hierarchical polysemy, and a large portion of PGNs make reference to a chemical entity. Altogether, according to our analysis based on the Medline distribution, 401,869 unique PGNs in the documents contain a reference to 25,022 chemical entities, 3,125 disease terms or 1,576 species mentions.

Conclusion

LexEBI delivers the complete biomedical and chemical Lexeome in a standardized representation (http://www.ebi.ac.uk/Rebholz-srv/LexEBI/). The resource provides the disease terms as open source content, and fully interlinks terms across resources.",2013-10-04 +30239600,STatistical Inference Relief (STIR) feature selection.,"

Motivation

Relief is a family of machine learning algorithms that uses nearest-neighbors to select features whose association with an outcome may be due to epistasis or statistical interactions with other features in high-dimensional data. Relief-based estimators are non-parametric in the statistical sense that they do not have a parameterized model with an underlying probability distribution for the estimator, making it difficult to determine the statistical significance of Relief-based attribute estimates. Thus, a statistical inferential formalism is needed to avoid imposing arbitrary thresholds to select the most important features. We reconceptualize the Relief-based feature selection algorithm to create a new family of STatistical Inference Relief (STIR) estimators that retains the ability to identify interactions while incorporating sample variance of the nearest neighbor distances into the attribute importance estimation. This variance permits the calculation of statistical significance of features and adjustment for multiple testing of Relief-based scores. Specifically, we develop a pseudo t-test version of Relief-based algorithms for case-control data.

Results

We demonstrate the statistical power and control of type I error of the STIR family of feature selection methods on a panel of simulated data that exhibits properties reflected in real gene expression data, including main effects and network interaction effects. We compare the performance of STIR when the adaptive radius method is used as the nearest neighbor constructor with STIR when the fixed-k nearest neighbor constructor is used. We apply STIR to real RNA-Seq data from a study of major depressive disorder and discuss STIR's straightforward extension to genome-wide association studies.

Availability and implementation

Code and data available at http://insilico.utulsa.edu/software/STIR.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +25978395,[Computer evaluation of hidden potential of phytochemicals of medicinal plants of the traditional Indian ayurvedic medicine].,"Applicability of our computer programs PASS and PharmaExpert to prediction of biological activity spectra of rather complex and structurally diverse phytocomponents of medicinal plants, both separately and in combinations has been evaluated. The web-resource on phytochemicals of 50 medicinal plants used in Ayurveda was created for the study of hidden therapeutic potential of Traditional Indian Medicine (TIM) (http://ayurveda.pharmaexpert.ru). It contains information on 50 medicinal plants, their using in TIM and their pharmacology activities, also as 1906 phytocomponents. PASS training set was updated by addition of information about 946 natural compounds; then the training procedure and validation were performed, to estimate the quality of PASS prediction. It was shown that the difference between the average accuracy of prediction obtained in leave-5%-out cross-validation (94,467%) and in leave-one-out cross-validation (94,605%) is very small. These results showed high predictive ability of the program. Results of biological activity spectra prediction for all phytocomponents included in our database are in good correspondence with the experimental data. Additional kinds of biological activity predicted with high probability provide the information about most promising directions of further studies. The analysis of prediction results of sets of phytocomponents in each of 50 medicinal plants was made by PharmaExpert software. Based on this analysis, we found that the combination of phytocomponents from Passiflora incarnata may exhibit nootropic, anticonvulsant and antidepressant effects. Experiments carried out in mice models confirmed the predicted effects of Passiflora incarnata extracts.",2015-03-01 +31138115,TelomereHunter - in silico estimation of telomere content and composition from cancer genomes.,"

Background

Establishment of telomere maintenance mechanisms is a universal step in tumor development to achieve replicative immortality. These processes leave molecular footprints in cancer genomes in the form of altered telomere content and aberrations in telomere composition. To retrieve these telomere characteristics from high-throughput sequencing data the available computational approaches need to be extended and optimized to fully exploit the information provided by large scale cancer genome data sets.

Results

We here present TelomereHunter, a software for the detailed characterization of telomere maintenance mechanism footprints in the genome. The tool is implemented for the analysis of large cancer genome cohorts and provides a variety of diagnostic diagrams as well as machine-readable output for subsequent analysis. A novel key feature is the extraction of singleton telomere variant repeats, which improves the identification and subclassification of the alternative lengthening of telomeres phenotype. We find that whole genome sequencing-derived telomere content estimates strongly correlate with telomere qPCR measurements (r = 0.94). For the first time, we determine the correlation of in silico telomere content quantification from whole genome sequencing and whole genome bisulfite sequencing data derived from the same tumor sample (r = 0.78). An analogous comparison of whole exome sequencing data and whole genome sequencing data measured slightly lower correlation (r = 0.79). However, this is considerably improved by normalization with matched controls (r = 0.91).

Conclusions

TelomereHunter provides new functionality for the analysis of the footprints of telomere maintenance mechanisms in cancer genomes. Besides whole genome sequencing, whole exome sequencing and whole genome bisulfite sequencing are suited for in silico telomere content quantification, especially if matched control samples are available. The software runs under a GPL license and is available at https://www.dkfz.de/en/applied-bioinformatics/telomerehunter/telomerehunter.html .",2019-05-28 +31120045,Automatised pharmacophoric deconvolution of plant extracts - application to Cinchona bark crude extract.,"We present a development of the ""Plasmodesma"" dereplication method [Margueritte et al., Magn. Reson. Chem., 2018, 56, 469]. This method is based on the automatic acquisition of a standard set of NMR experiments from a medium sized set of samples differing by their bioactivity. From this raw data, an analysis pipeline is run and the data is analysed by leveraging machine learning approaches in order to extract the spectral fingerprints of the active compounds. The optimal conditions for the analysis are determined and tested on two different systems, a synthetic sample where a single active molecule is to be isolated and characterized, and a complex bioactive matrix with synergetic interactions between the components. The method allows the identification of the active compounds and performs a pharmacophoric deconvolution. The program is freely available on the Internet, with an interactive visualisation of the statistical analysis, at https://plasmodesma.igbmc.science.",2019-08-01 +30192904,Mixed graphical models for integrative causal analysis with application to chronic lung disease diagnosis and prognosis.,"

Motivation

Integration of data from different modalities is a necessary step for multi-scale data analysis in many fields, including biomedical research and systems biology. Directed graphical models offer an attractive tool for this problem because they can represent both the complex, multivariate probability distributions and the causal pathways influencing the system. Graphical models learned from biomedical data can be used for classification, biomarker selection and functional analysis, while revealing the underlying network structure and thus allowing for arbitrary likelihood queries over the data.

Results

In this paper, we present and test new methods for finding directed graphs over mixed data types (continuous and discrete variables). We used this new algorithm, CausalMGM, to identify variables directly linked to disease diagnosis and progression in various multi-modal datasets, including clinical datasets from chronic obstructive pulmonary disease (COPD). COPD is the third leading cause of death and a major cause of disability and thus determining the factors that cause longitudinal lung function decline is very important. Applied on a COPD dataset, mixed graphical models were able to confirm and extend previously described causal effects and provide new insights on the factors that potentially affect the longitudinal lung function decline of COPD patients.

Availability and implementation

The CausalMGM package is available on http://www.causalmgm.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +32069356,"Protein Type, Protein Dose, and Age Modulate Dietary Protein Digestion and Phenylalanine Absorption Kinetics and Plasma Phenylalanine Availability in Humans.","BACKGROUND:Dietary protein ingestion stimulates muscle protein synthesis by providing amino acids to the muscle. The magnitude and duration of the postprandial increase in muscle protein synthesis rates are largely determined by dietary protein digestion and amino acid absorption kinetics. OBJECTIVE:We assessed the impact of protein type, protein dose, and age on dietary protein digestion and amino acid absorption kinetics in vivo in humans. METHODS:We included data from 18 randomized controlled trials with a total of 602 participants [age: 53 ± 23 y; BMI (kg/m2): 24.8 ± 3.3] who consumed various quantities of intrinsically l-[1-13C]-phenylalanine-labeled whey (n = 137), casein (n = 393), or milk (n = 72) protein and received intravenous infusions of l-[ring-2H5]-phenylalanine, which allowed us to assess protein digestion and phenylalanine absorption kinetics and the postprandial release of dietary protein-derived phenylalanine into the circulation. The effect of aging on these processes was assessed in a subset of 82 young (aged 22 ± 3 y) and 83 older (aged 71 ± 5 y) individuals. RESULTS:A total of 50% ± 14% of dietary protein-derived phenylalanine appeared in the circulation over a 5-h postprandial period. Casein ingestion resulted in a smaller (45% ± 11%), whey protein ingestion in an intermediate (57% ± 10%), and milk protein ingestion in a greater (65% ± 13%) fraction of dietary protein-derived phenylalanine appearing in the circulation (P < 0.001). The postprandial availability of dietary protein-derived phenylalanine in the circulation increased with the ingestion of greater protein doses (P < 0.05). Protein digestion and phenylalanine absorption kinetics were attenuated in older when compared with young individuals, with 45% ± 10% vs. 51% ± 14% of dietary protein-derived phenylalanine appearing in the circulation, respectively (P = 0.001). CONCLUSIONS:Protein type, protein dose, and age modulate dietary protein digestion and amino acid absorption kinetics and subsequent postprandial plasma amino acid availability in vivo in humans. These trials were registered at clinicaltrials.gov as NCT00557388, NCT00936039, NCT00991523, NCT01317511, NCT01473576, NCT01576848, NCT01578590, NCT01615276, NCT01680146, NCT01820975, NCT01986842, and NCT02596542, and at http://www.trialregister.nl as NTR3638, NTR3885, NTR4060, NTR4429, and NTR4492.",2020-08-01 +32291780,Making body work sequences visible: an ethnographic study of acute orthopaedic hospital wards.,"Within health and social care, academic attention is increasingly paid to understanding the nature and centrality of body work. Relatively little is known about how and where body work specifically fits into the wider work relations that produce it in healthcare settings. We draw on ethnographic observations of staff practice in three National Health Service acute hospital wards in the United Kingdom to make visible the micro-processes of patient care sequences including both body work and the work contextualising and supporting it. Our data, produced in 2015, show body work interactions in acute care to be critically embedded within a context of initiating, preparing, moving and restoring and proceeding. Shades of privacy and objectification of the body are present throughout these sequences. While accomplishing tasks away from the physical body, staff members must also maintain physical and cognitive work focussed on producing body work. Thus, patient care is necessarily complex, requiring much staff time and energy to deliver it. We argue that by making visible the micro-processes that hospital patient care depends on, including both body work and the work sequences supporting it, the complex physical and cognitive workload required to deliver care can be better recognised. (A virtual version of this abstract is available at: https://www.youtube.com/channel/UC_979cmCmR9rLrKuD7z0ycA).",2020-04-15 +30995154,Diagnostic Decisions in Child Language Assessment: Findings From a Case Review Assessment Task.,"Purpose The current study used a case review diagnostic assessment task to examine the diagnostic decisions speech-language pathologists (SLPs) working in the United States made after reviewing child language cases. Method Fourteen SLPs were given 5 case studies that presented either congruent or incongruent results between standardized testing and informal measures. After reviewing the assessment data, SLPs were asked to make a diagnostic decision. Results Unanimous consensus regarding diagnostic decisions was found when the assessment data were congruent. When the data were incongruent, unanimous consensus was not achieved. Standardized testing seemed to guide the diagnostic decision. This pattern of reliance on standardized testing was evident even when 80% of SLPs reviewed informal language data. In 97% of cases, a standardized test was used to guide clinical decision making. Conclusion Three patterns of clinical decision making in child language assessment emerged: (a) use of both standardized testing and informal measures, (b) a reported concern tool as a 1st step in the process, and (c) standardized testing as the most influential data for guiding diagnostic decisions. Although this study provides initial evidence regarding the process of diagnostic decision making, future studies should examine decision making in real time to further validate the implicit rules used during decision making. Supplemental Material https://doi.org/10.23641/asha.7991174.",2019-04-17 +24705204,SysPTM 2.0: an updated systematic resource for post-translational modification.,"Post-translational modifications (PTMs) of proteins play essential roles in almost all cellular processes, and are closely related to physiological activity and disease development of living organisms. The development of tandem mass spectrometry (MS/MS) has resulted in a rapid increase of PTMs identified on proteins from different species. The collection and systematic ordering of PTM data should provide invaluable information for understanding cellular processes and signaling pathways regulated by PTMs. For this original purpose we developed SysPTM, a systematic resource installed with comprehensive PTM data and a suite of web tools for annotation of PTMs in 2009. Four years later, there has been a significant advance with the generation of PTM data and, consequently, more sophisticated analysis requirements have to be met. Here we submit an updated version of SysPTM 2.0 (http://lifecenter.sgst.cn/SysPTM/), with almost doubled data content, enhanced web-based analysis tools of PTMBlast, PTMPathway, PTMPhylog, PTMCluster. Moreover, a new session SysPTM-H is constructed to graphically represent the combinatorial histone PTMs and dynamic regulation of histone modifying enzymes, and a new tool PTMGO is added for functional annotation and enrichment analysis. SysPTM 2.0 not only facilitates resourceful annotation of PTM sites but allows systematic investigation of PTM functions by the user. Database URL: http://lifecenter.sgst.cn/SysPTM/.",2014-04-03 +32156812,Context Is Key: Comparative Biology Illuminates the Vertebrate Microbiome. ,"Microbes affect vertebrates on timescales from daily to evolutionary, and the cumulative effect of these interactions is immense. However, how microbiomes compare across (host) species is poorly understood, as most studies focus on relatively few species. A recent mBio article by S. J. Song, J. G. Sanders, F. Delsuc, J. Metcalf, et al. (mBio 11:e02901-19, 2019, https://doi.org/10.1128/mBio.02901-19) expands our collective understanding of the vertebrate microbiome by analyzing ∼900 species. They demonstrate that patterns within mammals contrast with those within birds. Their results suggest many hypotheses about the role of host ecology and evolution on microbiome variation. Bats, the only volant mammals, appear to contradict many of the general mammal microbiome trends, in some ways resembling birds. What role has powered flight, and the evolution thereof, played in microbiome structure and function? Comparative methods, mechanistic hypotheses, and theory will elucidate this exciting question (and others) that we can ask using Song, Sanders et al.'s data and results.",2020-03-10 +31844835,GenFam: A web application and database for gene family-based classification and functional enrichment analysis.,"Genome-scale studies using high-throughput sequencing (HTS) technologies generate substantial lists of differentially expressed genes under different experimental conditions. These gene lists need to be further mined to narrow down biologically relevant genes and associated functions in order to guide downstream functional genetic analyses. A popular approach is to determine statistically overrepresented genes in a user-defined list through enrichment analysis tools, which rely on functional annotations of genes based on Gene Ontology (GO) terms. Here, we propose a new computational approach, GenFam, which allows annotation, classification, and enrichment of genes based on their gene family, thus simplifying identification of candidate gene families and associated genes that may be relevant to the query. GenFam and its integrated database comprises of three hundred and eighty-four unique gene families and supports gene family analyses for sixty plant genomes. Four comparative case studies with plant species belonging to different clades and families were performed using GenFam which demonstrated its robustness and comprehensiveness over preexisting functional enrichment tools. To make it readily accessible for plant biologists, GenFam is available as a web-based application where users can input gene IDs and export enrichment results in both tabular and graphical formats. Users can also customize analysis parameters by choosing from the various statistical enrichment tests and multiple testing correction methods. Additionally, the web-based application, source code, and database are freely available to use and download. Website: http://mandadilab.webfactional.com/home/. Source code and database: http://mandadilab.webfactional.com/home/dload/.",2019-12-04 +24163257,The mouse Gene Expression Database (GXD): 2014 update.,"The Gene Expression Database (GXD; http://www.informatics.jax.org/expression.shtml) is an extensive and well-curated community resource of mouse developmental expression information. GXD collects different types of expression data from studies of wild-type and mutant mice, covering all developmental stages and including data from RNA in situ hybridization, immunohistochemistry, RT-PCR, northern blot and western blot experiments. The data are acquired from the scientific literature and from researchers, including groups doing large-scale expression studies. Integration with the other data in Mouse Genome Informatics (MGI) and interconnections with other databases places GXD's gene expression information in the larger biological and biomedical context. Since the last report, the utility of GXD has been greatly enhanced by the addition of new data and by the implementation of more powerful and versatile search and display features. Web interface enhancements include the capability to search for expression data for genes associated with specific phenotypes and/or human diseases; new, more interactive data summaries; easy downloading of data; direct searches of expression images via associated metadata; and new displays that combine image data and their associated annotations. At present, GXD includes >1.4 million expression results and 250,000 images that are accessible to our search tools.",2013-10-25 +31941820,"mSphere of Influence: It's Not Me, It's You-How Donor Factors Influence Kidney Transplant Outcomes. ","Diana V. Pastrana works in the field of DNA tumor virus biology. In this mSphere of Influence article, she reflects on how the two papers ""Donor origin of BKV replication after kidney transplantation"" (C. Schmitt, L. Raggub, S. Linnenweber-Held, O. Adams, et al., J Clin Virol 59:120-125, 2014, https://doi.org/10.1016/j.jcv.2013.11.009) and ""Neutralizing antibody-mediated response and risk of BK virus-associated nephropathy"" (M. Solis, A. Velay, R. Porcher, P. Domingo-Calap, et al., J Am Soc Nephrol 29:326-334, 2018, https://doi.org/10.1681/ASN.2017050532) reminded her of the importance of allowing data, and not adherence to dogma, to drive her research.",2020-01-15 +27602285,Temporal dynamics of the developing lung transcriptome in three common inbred strains of laboratory mice reveals multiple stages of postnatal alveolar development.,"To characterize temporal patterns of transcriptional activity during normal lung development, we generated genome wide gene expression data for 26 pre- and post-natal time points in three common inbred strains of laboratory mice (C57BL/6J, A/J, and C3H/HeJ). Using Principal Component Analysis and least squares regression modeling, we identified both strain-independent and strain-dependent patterns of gene expression. The 4,683 genes contributing to the strain-independent expression patterns were used to define a murine Developing Lung Characteristic Subtranscriptome (mDLCS). Regression modeling of the Principal Components supported the four canonical stages of mammalian embryonic lung development (embryonic, pseudoglandular, canalicular, saccular) defined previously by morphology and histology. For postnatal alveolar development, the regression model was consistent with four stages of alveolarization characterized by episodic transcriptional activity of genes related to pulmonary vascularization. Genes expressed in a strain-dependent manner were enriched for annotations related to neurogenesis, extracellular matrix organization, and Wnt signaling. Finally, a comparison of mouse and human transcriptomics from pre-natal stages of lung development revealed conservation of pathways associated with cell cycle, axon guidance, immune function, and metabolism as well as organism-specific expression of genes associated with extracellular matrix organization and protein modification. The mouse lung development transcriptome data generated for this study serves as a unique reference set to identify genes and pathways essential for normal mammalian lung development and for investigations into the developmental origins of respiratory disease and cancer. The gene expression data are available from the Gene Expression Omnibus (GEO) archive (GSE74243). Temporal expression patterns of mouse genes can be investigated using a study specific web resource (http://lungdevelopment.jax.org).",2016-08-09 +31750988,Qualitative and Quantitative Reporting of a Unique Biparametric MRI: Towards Biparametric MRI-Based Nomograms for Prediction of Prostate Biopsy Outcome in Men With a Clinical Suspicion of Prostate Cancer (IMPROD and MULTI-IMPROD Trials).,"

Background

Multiparametric MRI of the prostate has been shown to improve the risk stratification of men with an elevated prostate-specific antigen (PSA). However, long acquisition time, high cost, and inter-center/reader variability of a routine prostate multiparametric MRI limit its wider adoption.

Purpose

To develop and validate nomograms based on unique rapid biparametric MRI (bpMRI) qualitative and quantitative derived variables for prediction of clinically significant cancer (SPCa).

Study type

Retrospective analyses of single (IMPROD, NCT01864135) and multiinstitution trials (MULTI-IMPROD, NCT02241122).

Population

161 and 338 prospectively enrolled men who completed the IMPROD and MULTI-IMPROD trials, respectively.

Field strength/sequence

IMPROD bpMRI: 3T/1.5T, T2 -weighted imaging, three separate diffusion-weighted imaging (DWI) acquisitions: 1) b-values 0, 100, 200, 300, 500 s/mm2 ; 2) b values 0, 1500 s/mm2 ; 3) values 0, 2000 s/mm2 .

Assessment

The primary endpoint of the combined trial analysis was the diagnostic accuracy of the combination of IMPROD bpMRI and clinical variables for detection of SPCa.

Statistical tests

Logistic regression models were developed using IMPROD trial data and validated using MULTI-IMPROD trial data. The model's performance was expressed as the area under the curve (AUC) values for the detection of SPCa, defined as ISUP Gleason Grade Group ≥2.

Results

A model incorporating clinical variables had an AUC (95% confidence interval) of 0.83 (0.77-0.89) and 0.80 (0.75-0.85) in the development and validation cohorts, respectively. The corresponding values for a model using IMPROD bpMRI findings were 0.93 (0.89-0.97), and 0.88 (0.84-0.92), respectively. Further addition of the quantitative DWI-based score did not improve AUC values (P < 0.05).

Data conclusion

A prediction model using qualitative IMPROD bpMRI findings demonstrated high accuracy for predicting SPCa in men with an elevated PSA. Online risk calculator: http://petiv.utu.fi/multiimprod/ Level of Evidence: 1 Technical Efficacy Stage: 2 J. Magn. Reson. Imaging 2020;51:1556-1567.",2019-11-21 +25707505,IIIDB: a database for isoform-isoform interactions and isoform network modules.,"

Background

Protein-protein interactions (PPIs) are key to understanding diverse cellular processes and disease mechanisms. However, current PPI databases only provide low-resolution knowledge of PPIs, in the sense that ""proteins"" of currently known PPIs generally refer to ""genes."" It is known that alternative splicing often impacts PPI by either directly affecting protein interacting domains, or by indirectly impacting other domains, which, in turn, impacts the PPI binding. Thus, proteins translated from different isoforms of the same gene can have different interaction partners.

Results

Due to the limitations of current experimental capacities, little data is available for PPIs at the resolution of isoforms, although such high-resolution data is crucial to map pathways and to understand protein functions. In fact, alternative splicing can often change the internal structure of a pathway by rearranging specific PPIs. To fill the gap, we systematically predicted genome-wide isoform-isoform interactions (IIIs) using RNA-seq datasets, domain-domain interaction and PPIs. Furthermore, we constructed an III database (IIIDB) that is a resource for studying PPIs at isoform resolution. To discover functional modules in the III network, we performed III network clustering, and then obtained 1025 isoform modules. To evaluate the module functionality, we performed the GO/pathway enrichment analysis for each isoform module.

Conclusions

The IIIDB provides predictions of human protein-protein interactions at the high resolution of transcript isoforms that can facilitate detailed understanding of protein functions and biological pathways. The web interface allows users to search for IIIs or III network modules. The IIIDB is freely available at http://syslab.nchu.edu.tw/IIIDB.",2015-01-21 +32556167,"Watchdog 2.0: New developments for reusability, reproducibility, and workflow execution. ","Advances in high-throughput methods have brought new challenges for biological data analysis, often requiring many interdependent steps applied to a large number of samples. To address this challenge, workflow management systems, such as Watchdog, have been developed to support scientists in the (semi-)automated execution of large analysis workflows. Here, we present Watchdog 2.0, which implements new developments for module creation, reusability, and documentation and for reproducibility of analyses and workflow execution. Developments include a graphical user interface for semi-automatic module creation from software help pages, sharing repositories for modules and workflows, and a standardized module documentation format. The latter allows generation of a customized reference book of public and user-specific modules. Furthermore, extensive logging of workflow execution, module and software versions, and explicit support for package managers and container virtualization now ensures reproducibility of results. A step-by-step analysis protocol generated from the log file may, e.g., serve as a draft of a manuscript methods section. Finally, 2 new execution modes were implemented. One allows resuming workflow execution after interruption or modification without rerunning successfully executed tasks not affected by changes. The second one allows detaching and reattaching to workflow execution on a local computer while tasks continue running on computer clusters. Watchdog 2.0 provides several new developments that we believe to be of benefit for large-scale bioinformatics analysis and that are not completely covered by other competing workflow management systems. The software itself, module and workflow repositories, and comprehensive documentation are freely available at https://www.bio.ifi.lmu.de/watchdog.",2020-06-01 +32399550,GREMA: modelling of emulated gene regulatory networks with confidence levels based on evolutionary intelligence to cope with the underdetermined problem.,"

Motivation

Non-linear ordinary differential equation (ODE) models that contain numerous parameters are suitable for inferring an emulated gene regulatory network (eGRN). However, the number of experimental measurements is usually far smaller than the number of parameters of the eGRN model that leads to an underdetermined problem. There is no unique solution to the inference problem for an eGRN using insufficient measurements.

Results

This work proposes an evolutionary modelling algorithm (EMA) that is based on evolutionary intelligence to cope with the underdetermined problem. EMA uses an intelligent genetic algorithm to solve the large-scale parameter optimization problem. An EMA-based method, GREMA, infers a novel type of gene regulatory network with confidence levels for every inferred regulation. The higher the confidence level is, the more accurate the inferred regulation is. GREMA gradually determines the regulations of an eGRN with confidence levels in descending order using either an S-system or a Hill function-based ODE model. The experimental results showed that the regulations with high-confidence levels are more accurate and robust than regulations with low-confidence levels. Evolutionary intelligence enhanced the mean accuracy of GREMA by 19.2% when using the S-system model with benchmark datasets. An increase in the number of experimental measurements may increase the mean confidence level of the inferred regulations. GREMA performed well compared with existing methods that have been previously applied to the same S-system, DREAM4 challenge and SOS DNA repair benchmark datasets.

Availability and implementation

All of the datasets that were used and the GREMA-based tool are freely available at https://nctuiclab.github.io/GREMA.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-06-01 +31933707,Accuracy of Ultrasound in Diagnosis of Pneumothorax: A Comparison between Neonates and Adults-A Systematic Review and Meta-Analysis.,"Objective:The present systematic review and meta-analysis were conducted to investigate the accuracy of ultrasound in the diagnosis of pneumothorax in neonates and adults. Method:The searches were conducted by two independent researchers (MS and HD) to find the relevant studies published from 01/01/2009 until the end of 01/01/2019. We searched for published literature in the English language in MEDLINE via PubMed, Embase™ via ovid, the Cochrane Library, and Trip database. For literature published in other languages, we searched national databases (Magiran and SID), KoreaMed, and LILACS, and we searched OpenGrey (http://www.opengrey.eu/) and the World Health Organization Clinical Trials Registry (http://who.int/ictrp) for unpublished literature and ongoing studies. The keywords used in the search strategy were pneumothorax or ultrasound or chest ultrasonography or neonate or adult or aerothorax or sensitivity or specificity or diagnostic accuracy. The list of previous study resources and systematic reviews was also searched for identifying the published studies (MS and HD). Analyses were performed using Meta-Disc 1.4. Results:In total, 1,565 patients (255 neonates, 1212 adults, and 101 pediatrics suspected of pneumothorax) were investigated in 10 studies. The overall specificity of chest ultrasound in the diagnosis of pneumothorax in both populations of adults and neonates was 85.1% at the confidence interval of 95 percent (95% CI 81.1%-88.5%). At the confidence interval of 95 percent, the sensitivity was 98.6% (95% CI 97.7%-99.2%). The diagnostic odds ratio was 387.72 (95% CI 76.204-1972.7). For the diagnosis of pneumothorax in neonates, the ultrasound sensitivity was 96.7% at the confidence interval of 95 percent (95% CI 88.3%-99.6%). At the confidence interval of 95 percent, the specificity was 100% (95% CI 97.7%-100%). For the diagnosis of pneumothorax in adults, the ultrasound sensitivity was 82.9% at the confidence interval of 95 percent (95% CI 78.3-86.9%). At the confidence interval of 95 percent, the specificity was 98.2% (95% CI 97.0%-99.0%). The diagnostic odds ratio was 423.13 (95% CI 45.222-3959.1). Analyzing studies indicated that the sensitivity of ""absence lung sliding"" sign for the diagnosis of pneumothorax was 87.2% (95% CI 77.7-93.7), and specificity was 99.4% (95% CI 96.5%-100%). DOR was 556.74 (95% CI 100.03-3098.7). The sensitivity of ""lung point"" sign for the diagnosis of pneumothorax was 82.1% (95% CI 71.7%-89.8%), and the specificity was 100% (at the confidence interval of 95% CI 97.6%-100%). DOR was 298.0 (95% CI 58.893-1507.8). Conclusion:The diagnosis of pneumothorax using ultrasound is accurate and reliable; additionally, it can result in timely diagnoses specifically in neonatal pneumothorax. Using this method facilitates the therapy process; lack of ionizing radiation and easy operation are benefits of this imaging technique.",2019-12-03 +31139204,Simplicity DiffExpress: A Bespoke Cloud-Based Interface for RNA-seq Differential Expression Modeling and Analysis.,"One of the key challenges for transcriptomics-based research is not only the processing of large data but also modeling the complexity of features that are sources of variation across samples, which is required for an accurate statistical analysis. Therefore, our goal is to foster access for wet lab researchers to bioinformatics tools, in order to enhance their ability to explore biological aspects and validate hypotheses with robust analysis. In this context, user-friendly interfaces can enable researchers to apply computational biology methods without requiring bioinformatics expertise. Such bespoke platforms can improve the quality of the findings by allowing the researcher to freely explore the data and test a new hypothesis with independence. Simplicity DiffExpress is a data-driven software platform dedicated to enabling non-bioinformaticians to take ownership of the differential expression analysis (DEA) step in a transcriptomics experiment while presenting the results in a comprehensible layout, which supports an efficient results exploration, information storage, and reproducibility. Simplicity DiffExpress' key component is the bespoke statistical model validation that guides the user through any necessary alteration in the dataset or model, tackling the challenges behind complex data analysis. The software utilizes edgeR, and it is implemented as part of the SimplicityTM platform, providing a dynamic interface, with well-organized results that are easy to navigate and are shareable. Computational biologists and bioinformaticians can also benefit from its use since the data validation is more informative than the usual DEA resources. Wet-lab collaborators can benefit from receiving their results in an organized interface. Simplicity DiffExpress is freely available for academic use, and it is cloud-based (https://simplicity.nsilico.com/dea).",2019-05-14 +30431120,TROAP regulates prostate cancer progression via the WNT3/survivin signalling pathways.,"Prostate cancer (PCa) is one of the most commonly diagnosed malignancies, and 90% of advanced prostate cancer patients relapse after therapy. Trophinin associated protein (TROAP) is essential for centrosome integrity and proper bipolar organisation of spindle assembly during mitosis and plays an essential role in proliferation. We found that TROAP expression correlates with patient survival and speculated that it may be involved in PCa progression. The Oncomine database tool (http://www.oncomine.org) was used to analyse TROAP mRNA expression from microarray data, and patient survival analysis for target genes was performed using the PROGgeneV2 Database (http://watson.compbio.iupui.edu). Gene interference with lentivirus was used to silence TROAP expression in PCa cells and knockdown efficiency was detected by qRT-PCR and western blot analysis. Cell viability, colony formation, cell cycle and apoptosis were then assessed to determine the function of TROAP in PCa cells. Markers of cell cycle and apoptosis were tested by western blotting. The correlation between WNT3 or survivin expression and TROAP transcripts in prostate cancer tissues was analysed using GEPIA (http://gepia.cancer-pku.cn) and validated by western blotting. The in vivo role of TROAP was investigated using xenografts. This protein was overexpressed in PCa, and exhibited relatively higher expression in PCa cell lines, DU145 and 22Rv1. Importantly, analysing human cancer databases available from PROGgeneV2 showed that higher expression of TROAP is associated with shorter overall survival in prostate cancer patients. TROAP knockdown inhibited cell proliferation and led to cell cycle arrest at S phase in 22Rv1 and DU145 cells. Cell cycle arrest resulted in apoptosis in both cell lines via the cyclin A2-cyclin B1-caspase pathway. WNT3 and survivin expression levels were found to correlate with TROAP in PCa, and in vivo xenograft assays revealed that silencing of TROAP inhibited PCa tumour growth. Therefore, TROAP might represent a novel predictive marker to guide therapeutic intervention.",2018-11-09 +29069300,ROSC-Pred: web-service for rodent organ-specific carcinogenicity prediction.,"

Motivation

Identification of rodent carcinogens is an important task in risk assessment of chemicals. SAR methods were proposed to reduce the number of animal experiments. Most of these methods ignore information about organ-specificity of tumorigenesis. Our study was aimed at the creation of classification models and a freely available online service for prediction of rodent carcinogens considering the species (rats, mice), sex and tissue-specificity from structural formula of compounds.

Results

The data from Carcinogenic Potency Database for 1011 organic compounds evaluated on the standard two-year rodent carcinogenicity bioassay was used for the creation of training sets. Structure-activity relationships models for prediction of rodent organ-specific carcinogenicity were created by PASS software, which was based on Bayesian-like approach and Multilevel Neighborhoods of Atoms descriptors. The average prediction accuracy for training sets calculated by leave-one-out and 10-fold cross-validation was 79 and 78.2%, respectively.

Availability and implementation

Freely available on the web at http://www.way2drug.com/ROSC.

Contact

alexey.lagunin@ibmc.msk.ru.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-02-01 +29432422,TopicalPdb: A database of topically delivered peptides.,"TopicalPdb (http://crdd.osdd.net/raghava/topicalpdb/) is a repository of experimentally verified topically delivered peptides. Data was manually collected from research articles. The current release of TopicalPdb consists of 657 entries, which includes peptides delivered through the skin (462 entries), eye (173 entries), and nose (22 entries). Each entry provides comprehensive information related to these peptides like the source of origin, nature of peptide, length, N- and C-terminal modifications, mechanism of penetration, type of assays, cargo and biological properties of peptides, etc. In addition to natural peptides, TopicalPdb contains information of peptides having non-natural, chemically modified residues and D-amino acids. Besides this primary information, TopicalPdb stores predicted tertiary structures as well as peptide sequences in SMILE format. Tertiary structures of peptides were predicted using state-of-art method PEPstrMod. In order to assist users, a number of web-based tools have been integrated that includes keyword search, data browsing, similarity search and structural similarity. We believe that TopicalPdb is a unique database of its kind and it will be very useful in designing peptides for non-invasive topical delivery.",2018-02-12 +32719760,Prospective Randomized Observational Pilot Trial Evaluating the Effect of Different Durations of Interdisciplinary Early Intervention and Family Support in Parents of Very Low Birth Weight Infants (Early Bird Study).,"Background: Early childhood intervention (ECI) is a holistic approach for infants with or at risk for psychomotor and/or cognitive and/or behavioral impairment. It aims to optimally support them and positively influence their neurodevelopmental outcome. The right dosage of intervention and when the intervention should start are still to be determined. Hypothesis: Parents are more satisfied when the duration of ECI is longer (120 min once a week) than the usual 90-min session. Methods: We developed a parental questionnaire (both mother and father) that evaluated the level of satisfaction of parents with the intervention. We compared 120 with 90 min of ECI per week during the school year 2017/18. Included were parents of very low birth weight infants (<1,500 g) following informed consent. ECI was initiated at the NICU at an infant age of ≥ 2 weeks. Parents were randomized (https://www.randomizer.at/) to a 120- or 90-min duration and had to answer the questionnaire to the approximate time-point of 1, 3, and 6 months. Answers were classified as strongly agree, agree, neither agree nor disagree, disagree, and strongly disagree except for the last question, which directly rated the ECI professional. Results: Eleven fathers (55%) and 19 mothers (95%) of the 10 parents of each group participated in the study. Demographic data did not differ between groups, and the median time-points of questionnaire answers were 77, 137, and 220 days, respectively. Overall, 120-min ECI sessions were not superior to 90-min sessions for both parents regarding parental satisfaction during the study time. We found no differences between fathers and mothers and minimal changes over time. All parents were satisfied with the ECI professionals, irrespective of ECI duration. Conclusion: An ECI duration of 120 min once per week was not superior to a 9- min duration regarding parental satisfaction with ECI professionals and their work.",2020-07-03 +31525295,"New Workflow for QSAR Model Development from Small Data Sets: Small Dataset Curator and Small Dataset Modeler. Integration of Data Curation, Exhaustive Double Cross-Validation, and a Set of Optimal Model Selection Techniques.","Quantitative structure-activity relationship (QSAR) modeling is a well-known in silico technique with extensive applications in several major fields such as drug design, predictive toxicology, materials science, food science, etc. Handling small-sized datasets due to the lack of experimental data for specialized end points is a crucial task for the QSAR researcher. In the present study, we propose an integrated workflow/scheme capable of dealing with small dataset modeling that integrates dataset curation, ""exhaustive"" double cross-validation and a set of optimal model selection techniques including consensus predictions. We have developed two software tools, namely, Small Dataset Curator, version 1.0.0, and Small Dataset Modeler, version 1.0.0, to effortlessly execute the proposed workflow. These tools are freely available for download from https://dtclab.webs.com/software-tools . We have performed case studies employing seven diverse datasets to demonstrate the performance of the proposed scheme (including data curation) for small dataset QSAR modeling. The case studies also confirm the usability and stability of the developed software tools.",2019-09-26 +31792693,Longitudinal studies examining the impact of prenatal and subsequent episodes of maternal depression on offspring antisocial behaviour.,"Maternal depression is associated with adverse child outcomes including antisocial behaviour (ASB). Prospective longitudinal studies have focused on the timing and cumulative exposure to maternal depression to further delineate the association and mechanisms of effect. The objective of this systematic review was to synthesise and evaluate the findings of longitudinal studies of maternal depression and offspring antisocial behaviour. Three databases were searched (Psychinfo, Web of Science, and Medline). Twenty of 5936 studies met inclusion criteria. Study quality was assessed using the Critical Appraisal Skills Programme criteria [Critical Appraisal Skills Programme (2017) CASP (cohort observation checklist). https://casp-uk.net/wpcontent/uploads/2018/01/CASP-Cohort-Study-Checklist.pdf ]. Results of individual studies were highly varied, using diverse analytical approaches and not all studies explored the independent effects of different episodes. Only three studies examined hypothesised mechanisms. Prenatal, postnatal, and later episodes of depression were all predictive of antisocial outcomes. One particular time period of depression exposure did not emerge as more predictive of offspring ASB than another. However, measures of maternal depression after the perinatal period were limited and typically included a one-off assessment of mothers' depressive symptoms that was concurrent to the assessment of offspring ASB. When cumulative exposure to maternal depression and specific timing effects were measured within the same study it was cumulative exposure that conferred the greatest risk for offspring ASB-particularly when this exposure began during the perinatal period. Findings are discussed in terms of limitations in the literature and highlight the need for future research to examine the biological and environmental mechanisms that underpin associations between maternal depression and offspring antisocial behaviour during different stages of development.",2019-12-02 +31446897,Comprehensive characterization of circular RNAs in ~ 1000 human cancer cell lines.,"

Background

Human cancer cell lines are fundamental models for cancer research and therapeutic strategy development. However, there is no characterization of circular RNAs (circRNAs) in a large number of cancer cell lines.

Methods

Here, we apply four circRNA identification algorithms to heuristically characterize the expression landscape of circRNAs across ~ 1000 human cancer cell lines from CCLE polyA-enriched RNA-seq data. By using integrative analysis and experimental approaches, we explore the expression landscape, biogenesis, functional consequences, and drug response of circRNAs across different cancer lineages.

Results

We revealed highly lineage-specific expression patterns of circRNAs, suggesting that circRNAs may be powerful diagnostic and/or prognostic markers in cancer treatment. We also identified key genes involved in circRNA biogenesis and confirmed that TGF-β signaling may promote biogenesis of circRNAs. Strikingly, we showed that clinically actionable genes are more likely to generate circRNAs, potentially due to the enrichment of RNA-binding protein (RBP) binding sites. Among these, circMYC can promote cell proliferation. We observed strong association between the expression of circRNAs and the response to drugs, especially those targeting chromatin histone acetylation. Finally, we developed a user-friendly data portal, CircRNAs in cancer cell lines (CircRiC, https://hanlab.uth.edu/cRic ), to benefit the biomedical research community.

Conclusions

Our study provides the characterization of circRNAs in cancer cell lines and explored the potential mechanism of circRNA biogenesis as well as its therapeutic implications. We also provide a data portal to facilitate the related biomedical researches.",2019-08-26 +31463006,Even obligate symbioses show signs of ecological contingency: Impacts of symbiosis for an invasive stinkbug are mediated by host plant context.,"

Abstract

Many species interactions are dependent on environmental context, yet the benefits of obligate, mutualistic microbial symbioses to their hosts are typically assumed to be universal across environments. We directly tested this assumption, focusing on the symbiosis between the sap-feeding insect Megacopta cribraria and its primary bacterial symbiont Candidatus Ishikawaella capsulata. We assessed host development time, survival, and body size in the presence and absence of the symbiont on two alternative host plants and in the insects' new invasive range. We found that association with the symbiont was critical for host survival to adulthood when reared on either host plant, with few individuals surviving in the absence of symbiosis. Developmental differences between hosts with and without microbial symbionts, however, were mediated by the host plants on which the insects were reared. Our results support the hypothesis that benefits associated with this host-microbe interaction are environmentally contingent, though given that few individuals survive to adulthood without their symbionts, this may have minimal impact on ecological dynamics and current evolutionary trajectories of these partners.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.kg4bc56.",2019-07-24 +27660521,AxIOM: Amphipod crustaceans from insular Posidonia oceanica seagrass meadows.,"

Background

The Neptune grass, Posidonia oceanica (L.) Delile, 1813, is the most widespread seagrass of the Mediterranean Sea. This foundation species forms large meadows that, through habitat and trophic services, act as biodiversity hotspots. In Neptune grass meadows, amphipod crustaceans are one of the dominant groups of vagile invertebrates, forming an abundant and diverse taxocenosis. They are key ecological components of the complex, pivotal, yet critically endangered Neptune grass ecosystems. Nevertheless, comprehensive qualitative and quantitative data about amphipod fauna found in Mediterranean Neptune grass meadows remain scarce, especially in insular locations.

New information

Here, we provide in-depth metadata about AxIOM, a sample-based dataset published on the GBIF portal. AxIOM is based on an extensive and spatially hierarchized sampling design with multiple years, seasons, day periods, and methods. Samples were taken along the coasts of Calvi Bay (Corsica, France) and of the Tavolara-Punta Coda Cavallo Marine Protected Area (Sardinia, Italy). In total, AxIOM contains 187 samples documenting occurrence (1775 records) and abundance (10720 specimens) of amphipod crustaceans belonging to 72 species spanning 29 families. The dataset is available at http://ipt.biodiversity.be/resource?r=axiom.",2016-09-08 +23798574,Research resource: novel structural insights bridge gaps in glycoprotein hormone receptor analyses.,"The first version of a glycoprotein hormone receptor (GPHR) information resource was designed to link functional with structural GPHR information, in order to support sequence-structure-function analysis of the LH, FSH, and TSH receptors (http://ssfa-gphr.de). However, structural information on a binding- and signaling-sensitive extracellular fragment (∼100 residues), the hinge region, had been lacking. A new FSHR crystal structure of the hormone-bound extracellular domain has recently been solved. The structure comprises the leucine-rich repeat domain and most parts of the hinge region. We have not only integrated the new FSHR/FSH structure and the derived homology models of TSHR/TSH, LHCGR/CG, and LHCGR/LH into our web-based information resource, but have additionally provided novel tools to analyze the advanced structural features, with the common characteristics and distinctions between GPHRs, in a more precise manner. The hinge region with its second hormone-binding site allows us to assign functional data to the new structural features between hormone and receptor, such as binding details of a sulfated tyrosine (conserved throughout the GPHRs) extending into a pocket of the hormone. We have also implemented a protein interface analysis tool that enables the identification and visualization of extracellular contact points between interaction partners. This provides a starting point for comparing the binding patterns of GPHRs. Together with the mutagenesis data stored in the database, this will help to decipher the essential residues for ligand recognition and the molecular mechanisms of signal transduction, extending from the extracellular hormone-binding site toward the intracellular G protein-binding sites.",2013-06-24 +23193258,NCBI GEO: archive for functional genomics data sets--update.,"The Gene Expression Omnibus (GEO, http://www.ncbi.nlm.nih.gov/geo/) is an international public repository for high-throughput microarray and next-generation sequence functional genomic data sets submitted by the research community. The resource supports archiving of raw data, processed data and metadata which are indexed, cross-linked and searchable. All data are freely available for download in a variety of formats. GEO also provides several web-based tools and strategies to assist users to query, analyse and visualize data. This article reports current status and recent database developments, including the release of GEO2R, an R-based web application that helps users analyse GEO data.",2012-11-27 +31529043,Biogenesis mechanisms of circular RNA can be categorized through feature extraction of a machine learning model.,"

Motivation

In recent years, multiple circular RNAs (circRNA) biogenesis mechanisms have been discovered. Although each reported mechanism has been experimentally verified in different circRNAs, no single biogenesis mechanism has been proposed that can universally explain the biogenesis of all tens of thousands of discovered circRNAs. Under the hypothesis that human circRNAs can be categorized according to different biogenesis mechanisms, we designed a contextual regression model trained to predict the formation of circular RNA from a random genomic locus on human genome, with potential biogenesis factors of circular RNA as the features of the training data.

Results

After achieving high prediction accuracy, we found through the feature extraction technique that the examined human circRNAs can be categorized into seven subgroups, according to the presence of the following sequence features: RNA editing sites, simple repeat sequences, self-chains, RNA binding protein binding sites and CpG islands within the flanking regions of the circular RNA back-spliced junction sites. These results support all of the previously reported biogenesis mechanisms of circRNA and solidify the idea that multiple biogenesis mechanisms co-exist for different subset of human circRNAs. Furthermore, we uncover a potential new links between circRNA biogenesis and flanking CpG island. We have also identified RNA binding proteins putatively correlated with circRNA biogenesis.

Availability and implementation

Scripts and tutorial are available at http://wanglab.ucsd.edu/star/circRNA. This program is under GNU General Public License v3.0.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31300027,Peak calling by Sparse Enrichment Analysis for CUT&RUN chromatin profiling.,"

Background

CUT&RUN is an efficient epigenome profiling method that identifies sites of DNA binding protein enrichment genome-wide with high signal to noise and low sequencing requirements. Currently, the analysis of CUT&RUN data is complicated by its exceptionally low background, which renders programs designed for analysis of ChIP-seq data vulnerable to oversensitivity in identifying sites of protein binding.

Results

Here we introduce Sparse Enrichment Analysis for CUT&RUN (SEACR), an analysis strategy that uses the global distribution of background signal to calibrate a simple threshold for peak calling. SEACR discriminates between true and false-positive peaks with near-perfect specificity from ""gold standard"" CUT&RUN datasets and efficiently identifies enriched regions for several different protein targets. We also introduce a web server ( http://seacr.fredhutch.org ) for plug-and-play analysis with SEACR that facilitates maximum accessibility across users of all skill levels.

Conclusions

SEACR is a highly selective peak caller that definitively validates the accuracy of CUT&RUN for datasets with known true negatives. Its ease of use and performance in comparison with existing peak calling strategies make it an ideal choice for analyzing CUT&RUN data.",2019-07-12 +31958867,"Efficacy of Homeopathy in Addition to a Multidisciplinary Intervention for Overweight or Obesity in Mexican Adolescents: Study Protocol for a Randomized, Double-Blind, Placebo-Controlled Trial.","

Background

Current recommendations for treating obesity in adolescence include a comprehensive approach (nutritional, behavioral, and exercise). Calcarea carbonica ostrearum (CCO) is a homeopathic medicine usually prescribed in obese individuals, but its effects on weight and body fat are not completely known.

Objective

The aim of this study will be to evaluate the efficacy of homeopathic CCO, in addition to a multidisciplinary intervention (diet, motivational support, and exercise program), on body fat and weight in obese adolescents.

Methods/design

A randomized, placebo-controlled, double-blind, parallel-group, superiority trial with 3-month study duration will be undertaken. The study will be conducted in a public research hospital in Mexico City, Hospital Juárez de México, in the outpatient services of homeopathy and sports medicine. Eighty non-diabetic adolescents, 12 to 19 years old, who are overweight or obese, will be included. The primary outcome: change in body fat percentage at week 12. The secondary outcomes: change in mean total weight, total body mass index, fat mass index, waist-hip ratio, lean muscle mass, fasting glucose, insulin, insulin resistance, lipid profile, score of Center for Epidemiologic Studies Depression Scale Revised (CESD-R) and score of Screen for Child Anxiety-Related Emotional Disorders (SCARED) at week 12. Efficacy data will be analyzed in the intention-to-treat sample. To determine the difference in the outcomes between groups at baseline and week 12, data will be analyzed using Student's t-test.

Discussion

This is the first randomized controlled trial aimed to determine the fat-reducing efficacy in obese adolescents of a homeopathic medicine, CCO, given in addition to a multidisciplinary intervention, compared with placebo plus the same intervention. It is an attempt to support scientific evidence in homeopathy for one of the most common chronic diseases, which causes high mortality due to its complications. CLINICALTRIALS.

Gov identifier

NCT03945396:  https://clinicaltrials.gov/ct2/show/NCT03945396?term=homeopathy+for+obesity+in+Mexican+adolescents&rank=1.",2020-01-20 +31086984,Protein-ensemble-RNA docking by efficient consideration of protein flexibility through homology models.,"

Motivation

Given the importance of protein-ribonucleic acid (RNA) interactions in many biological processes, a variety of docking algorithms have been developed to predict the complex structure from individual protein and RNA partners in the past decade. However, due to the impact of molecular flexibility, the performance of current methods has hit a bottleneck in realistic unbound docking. Pushing the limit, we have proposed a protein-ensemble-RNA docking strategy to explicitly consider the protein flexibility in protein-RNA docking through an ensemble of multiple protein structures, which is referred to as MPRDock. Instead of taking conformations from MD simulations or experimental structures, we obtained the multiple structures of a protein by building models from its homologous templates in the Protein Data Bank (PDB).

Results

Our approach can not only avoid the reliability issue of structures from MD simulations but also circumvent the limited number of experimental structures for a target protein in the PDB. Tested on 68 unbound-bound and 18 unbound-unbound protein-RNA complexes, our MPRDock/DITScorePR considerably improved the docking performance and achieved a significantly higher success rate than single-protein rigid docking whether pseudo-unbound templates are included or not. Similar improvements were also observed when combining our ensemble docking strategy with other scoring functions. The present homology model-based ensemble docking approach will have a general application in molecular docking for other interactions.

Availability and implementation

http://huanglab.phys.hust.edu.cn/mprdock/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31214154,"Geptop 2.0: An Updated, More Precise, and Faster Geptop Server for Identification of Prokaryotic Essential Genes.","Geptop has performed effectively in the identification of prokaryotic essential genes since its first release in 2013. It estimates gene essentiality for prokaryotes based on orthology and phylogeny. Genome-scale essentiality data of more prokaryotic species are available, and the information has been collected into public essential gene repositories such as DEG and OGEE. A faster and more accurate toolkit is needed to meet the increasing prokaryotic genome data. We updated Geptop by supplementing more validated essentiality data into reference set (from 19 to 37 species), and introducing multi-process technology to accelerate the computing speed. Compared with Geptop 1.0 and other gene essentiality prediction models, Geptop 2.0 can generate more stable predictions and finish the computation in a shorter time. The software is available both as an online server and a downloadable standalone application. We hope that the improved Geptop 2.0 will facilitate researches in gene essentiality and the development of novel antibacterial drugs. The gene essentiality prediction tool is available at http://cefg.uestc.cn/geptop.",2019-06-04 +26909892,Pep-Calc.com: a set of web utilities for the calculation of peptide and peptoid properties and automatic mass spectral peak assignment.,"The ability to calculate molecular properties such as molecular weights, isoelectric points, and extinction coefficients is vital for scientists using and/or synthesizing peptides and peptoids for research. A suite of two web utilities: Peptide Calculator and Peptoid Calculator, available free at http://www.pep-calc.com, are presented. Both tools allow the calculation of peptide/peptoid chemical formulae and molecular weight, ChemDraw structure file export and automatic assignment of mass spectral peaks to deletion sequences and metal/protecting group adducts. Peptide Calculator also provides a calculated isoelectric point, molar extinction coefficient, graphical peptide charge summary and β-strand contiguity profile (for aggregation-prone sequences), indicating potential regions of synthesis difficulty. In addition to the unique automatic spectral assignment features offered across both utilities, Peptoid Calculator represents a first-of-a-kind resource for researchers in the field of peptoid science. With a constantly expanding database of over 120 amino acids, non-natural peptide building blocks and peptoid building blocks, it is anticipated that Pep-Calc.com will act as a valuable asset to those working on the synthesis and/or application of peptides and peptoids in the biophysical and life sciences fields.",2016-02-24 +31886876,Prognostic model for multiple myeloma progression integrating gene expression and clinical features. ,"Multiple myeloma (MM) is a hematological cancer caused by abnormal accumulation of monoclonal plasma cells in bone marrow. With the increase in treatment options, risk-adapted therapy is becoming more and more important. Survival analysis is commonly applied to study progression or other events of interest and stratify the risk of patients. In this study, we present the current state-of-the-art model for MM prognosis and the molecular biomarker set for stratification: the winning algorithm in the 2017 Multiple Myeloma DREAM Challenge, Sub-Challenge 3. Specifically, we built a non-parametric complete hazard ranking model to map the right-censored data into a linear space, where commonplace machine learning techniques, such as Gaussian process regression and random forests, can play their roles. Our model integrated both the gene expression profile and clinical features to predict the progression of MM. Compared with conventional models, such as Cox model and random survival forests, our model achieved higher accuracy in 3 within-cohort predictions. In addition, it showed robust predictive power in cross-cohort validations. Key molecular signatures related to MM progression were identified from our model, which may function as the core determinants of MM progression and provide important guidance for future research and clinical practice. Functional enrichment analysis and mammalian gene-gene interaction network revealed crucial biological processes and pathways involved in MM progression. The model is dockerized and publicly available at https://www.synapse.org/#!Synapse:syn11459638. Both data and reproducible code are included in the docker. We present the current state-of-the-art prognostic model for MM integrating gene expression and clinical features validated in an independent test set.",2019-12-01 +30915546,OPENMENDEL: a cooperative programming project for statistical genetics.,"Statistical methods for genome-wide association studies (GWAS) continue to improve. However, the increasing volume and variety of genetic and genomic data make computational speed and ease of data manipulation mandatory in future software. In our view, a collaborative effort of statistical geneticists is required to develop open source software targeted to genetic epidemiology. Our attempt to meet this need is called the OPENMENDEL project (https://openmendel.github.io). It aims to (1) enable interactive and reproducible analyses with informative intermediate results, (2) scale to big data analytics, (3) embrace parallel and distributed computing, (4) adapt to rapid hardware evolution, (5) allow cloud computing, (6) allow integration of varied genetic data types, and (7) foster easy communication between clinicians, geneticists, statisticians, and computer scientists. This article reviews and makes recommendations to the genetic epidemiology community in the context of the OPENMENDEL project.",2019-03-26 +32438827,Long-Term Exposure to Air Pollution and Incidence of Myocardial Infarction: A Danish Nurse Cohort Study.,"

Background

Air pollution exposure has been linked to coronary heart disease, although evidence on PM2.5 and myocardial infarction (MI) incidence is mixed.

Objectives

This prospective cohort study aimed to investigate associations between long-term exposure to air pollution and MI incidence, adjusting for road traffic noise.

Methods

We used data from the nationwide Danish Nurse Cohort on 22,882 female nurses (>44 years of age) who, at recruitment in 1993 or 1999, reported information on cardiovascular disease risk factors. Data on MI incidence was collected from the Danish National Patient Register until the end of 2014. Annual mean concentrations of particulate matter (PM) with a diameter <2.5 μg/m3 (PM2.5), PM10, nitrogen dioxide (NO2), and nitrogen oxides (NOx) at the nurses' residences since 1990 (PM10 and PM2.5) or 1970 (NO2 and NOx) were estimated using the Danish Eulerian Hemispheric Model/Urban Background Model/AirGIS (DEHM/UBM/AirGIS) dispersion model. We used time-varying Cox regression models to examine the association between 1- and 3-y running means of these pollutants, as well as 23-y running means of NO2 and NOx, with both overall and fatal incident MI. Associations were explored in three progressively adjusted models: Model 1, adjusted for age and baseline year; Model 2, with further adjustment for potential confounding by lifestyle and cardiovascular disease risk factors; and Model 3, with further adjustment for road traffic noise, modeled as the annual mean of a weighted 24-h average (Lden).

Results

Of the 22,882 women, 641 developed MI during a mean follow-up of 18.6 y, 121 (18.9%) of which were fatal. Reported hazard ratios (HRs) were based on interquartile range increases of 5.3, 5.5, 8.1, and 11.5 μg/m3 for PM2.5, PM10, NO2, and NOx, respectively. In Model 1, we observed a positive association between a 3-y running mean of PM2.5 and an overall incident MI with an HR= 1.20 (95% CI: 1.07, 1.35), which attenuated to HR= 1.06 (95% CI: 0.92, 1.23) in Model 2. In Model 1 for incident fatal MI, we observed a strong association with a 3-y running mean of PM2.5, with an HR= 1.69 (95% CI: 1.33, 2.13), which attenuated to HR= 1.35 (95% CI: 1.01, 1.81) in Model 2. Similar associations were seen for PM10, with 3-y, Model 2 estimates for overall and fatal incident MI of HR= 1.06 (95% CI: 0.91, 1.23) and HR= 1.35 (95% CI: 1.01, 1.81), respectively. No evidence of an association was observed for NO2 or NOx. For all pollutants, associations in Model 2 were robust to further adjustment for road traffic noise in Model 3 and were similar for a 1-y running mean exposure.

Conclusions

We found no association between long-term exposure to PM2.5, PM10, NO2, or NOx and overall MI incidence, but we observed positive associations for PM2.5 and PM10 with fatal MI. We present novel findings that the association between PM and MI incidence is robust to adjustment for road traffic noise. https://doi.org/10.1289/EHP5818.",2020-05-06 +31783725,HKPocket: human kinase pocket database for drug design.,"BACKGROUND:The kinase pocket structural information is important for drug discovery targeting cancer or other diseases. Although some kinase sequence, structure or drug databases have been developed, the databases cannot be directly used in the kinase drug study. Therefore, a comprehensive database of human kinase protein pockets is urgently needed to be developed. RESULTS:Here, we have developed HKPocket, a comprehensive Human Kinase Pocket database. This database provides sequence, structure, hydrophilic-hydrophobic, critical interactions, and druggability information including 1717 pockets from 255 kinases. We further divided these pockets into 91 pocket clusters using structural and position features in each kinase group. The pocket structural information would be useful for preliminary drug screening. Then, the potential drugs can be further selected and optimized by analyzing the sequence conservation, critical interactions, and hydrophobicity of identified drug pockets. HKPocket also provides online visualization and pse files of all identified pockets. CONCLUSION:The HKPocket database would be helpful for drug screening and optimization. Besides, drugs targeting the non-catalytic pockets would cause fewer side effects. HKPocket is available at http://zhaoserver.com.cn/HKPocket/HKPocket.html.",2019-11-29 +23750083,Phyto diab care: Phytoremedial database for antidiabetics.,"

Unlabelled

Diabetes, a chronic disease debilitating to normal healthy lifestyle, onsets due to insufficient amount of insulin production or ineffective utilization of the amount produced. Although, pharmaceutical research has brought up remedial drugs and numerous candidates in various phases of clinical trials, off-target effects and unwanted physiological actions are a constant source of concern and contra indicatory in case of diabetic patients. Here we present a phytoremedial database, Phyto Diab Care, broadly applicable to any known anti-diabetic medicinal plant and phytochemicals sourced from them. Utilization of the traditional medicine knowledge for combating diabetes without creating unwanted physiological actions is our major emphasis. Data collected from peer-reviewed publications and phytochemicals were added to the customizable database by means of an extended relational design. The strength of this resource is in providing rapid retrieval of data from large volumes of text at a high degree of accuracy. Enhanced web interface allows multi-criteria based information filtering. Furthermore, the availability of 2D and 3D structures from molecular docking studies with any efficacy on the insulin signaling pathway makes the resource searchable and comparable in an intuitive manner. Phyto Diab Care compendium is publicly available and can be found in online.

Availability

http://www.gbpuat-cbsh.ac.in/departments/bi/database/phytodiabcare/HOME%20PAGE/Home%20page.html.",2013-04-13 +24178028,"SubtiWiki-a database for the model organism Bacillus subtilis that links pathway, interaction and expression information.","Genome annotation and access to information from large-scale experimental approaches at the genome level are essential to improve our understanding of living cells and organisms. This is even more the case for model organisms that are the basis to study pathogens and technologically important species. We have generated SubtiWiki, a database for the Gram-positive model bacterium Bacillus subtilis (http://subtiwiki.uni-goettingen.de/). In addition to the established companion modules of SubtiWiki, SubtiPathways and SubtInteract, we have now created SubtiExpress, a third module, to visualize genome scale transcription data that are of unprecedented quality and density. Today, SubtiWiki is one of the most complete collections of knowledge on a living organism in one single resource.",2013-10-30 +31467956,Volatile organic compound data of ready-to-cook tuna fish-burgers: Time evolution in function of different and/or combined mild preservation technologies and relevant statistical analysis.,"Volatile organic compound (VOC) composition from ready-to-cook tuna fish-burgers, prepared with and without a protective microbial strain (Lactobacillus paracasei) and/or stored with modified atmosphere packaging (MAP, 5% O2 and 95% CO2), were extracted by headspace solid-phase microextraction and analyzed by gas chromatography-mass spectrometry (HS-SPME-GC-MS) during the burger shelf-life. The collected data showed volatile composition profiles in function of the mild preservation technologies employed and the storage time. Furthermore, statistical data treatment (principal component analysis and Pearson's coefficients) highlighted differences among samples and positive/negative correlations during the storage time. This paper is related to an article already published in LWT (Investigating the effects of mild preservation technology on perishable foods by volatolomics: The case study of ready-to-cook tuna-burgers"" https://doi.org/10.1016/j.lwt.2019.108425).",2019-08-07 +24434032,DBM-DB: the diamondback moth genome database.,"The diamondback moth Genome Database (DBM-DB) is a central online repository for storing and integrating genomic data of diamondback moth (DBM), Plutella xylostella (L.). It provides comprehensive search tools and downloadable datasets for scientists to study comparative genomics, biological interpretation and gene annotation of this insect pest. DBM-DB contains assembled transcriptome datasets from multiple DBM strains and developmental stages, and the annotated genome of P. xylostella (version 2). We have also integrated publically available ESTs from NCBI and a putative gene set from a second DBM genome (KONAGbase) to enable users to compare different gene models. DBM-DB was developed with the capacity to incorporate future data resources, and will serve as a long-term and open-access database that can be conveniently used for research on the biology, distribution and evolution of DBM. This resource aims to help reduce the impact DBM has on agriculture using genomic and molecular tools. Database URL: http://iae.fafu.edu.cn/DBM/",2014-01-16 +25474213,The HIV mutation browser: a resource for human immunodeficiency virus mutagenesis and polymorphism data.,"Huge research effort has been invested over many years to determine the phenotypes of natural or artificial mutations in HIV proteins--interpretation of mutation phenotypes is an invaluable source of new knowledge. The results of this research effort are recorded in the scientific literature, but it is difficult for virologists to rapidly find it. Manually locating data on phenotypic variation within the approximately 270,000 available HIV-related research articles, or the further 1,500 articles that are published each month is a daunting task. Accordingly, the HIV research community would benefit from a resource cataloguing the available HIV mutation literature. We have applied computational text-mining techniques to parse and map mutagenesis and polymorphism information from the HIV literature, have enriched the data with ancillary information and have developed a public, web-based interface through which it can be intuitively explored: the HIV mutation browser. The current release of the HIV mutation browser describes the phenotypes of 7,608 unique mutations at 2,520 sites in the HIV proteome, resulting from the analysis of 120,899 papers. The mutation information for each protein is organised in a residue-centric manner and each residue is linked to the relevant experimental literature. The importance of HIV as a global health burden advocates extensive effort to maximise the efficiency of HIV research. The HIV mutation browser provides a valuable new resource for the research community. The HIV mutation browser is available at: http://hivmut.org.",2014-12-04 +31667256,Loading characteristics data applied on osseointegrated implant by transfemoral bone-anchored prostheses fitted with basic components during daily activities.,"The data in this paper are related to the research articles entitled ""Kinetics of transfemoral amputees with osseointegrated fixation performing common activities of daily living"" (Lee et al., Clinical Biomechanics, 2007.22(6). p. 665-673) and ""Magnitude and variability of loading on the osseointegrated implant of transfemoral amputees during walking"" (Lee et al., Med Eng Phys, 2008.30(7). p. 825-833). This article contains the overall and individual loading characteristics applied on screw-type osseointegrated implant generated by transfemoral bone-anchored prostheses fitted with basic components during daily activities at self-selected comfortable pace. Overall and individual data was presented for the (A) spatio-temporal characteristics, (B) loading patterns, (C) loading boundaries and (D) the loading local extremum during level walking, ascending and descending ramp and stairs. Inter-participant variability of these new datasets with basic components is critical to improve the efficacy and safety of prosthetic components as well as the design of future automated algorithms and clinical trials. Online repository contains the files: https://data.mendeley.com/datasets/hh8rjjh73w/1.",2019-09-11 +32654668,Carcass gain per kg feed intake: developing a stakeholder-driven benchmark for comparing grow-finishing pig performance.,"Feed conversion ratio (FCR) in grow-finishing pigs is one of the most important determinants of pig farm profitability and production efficiency. In its simplest form, FCR represents the amount of feed used per unit weight gain of the pig. Yet, this approach entails various limitations hampering its practical applicability such as availability of accurate data and large variation in ways to adapt FCR values for different starting and end weight as well as mortality rates. Various stakeholders are using their own formulas to determine FCR creating a 'definition nonconformity' when comparing FCRs among farms. This study aimed to optimize the calculation of FCR through the use of participatory qualitative research. A multidisciplinary research group of 9 persons (animal scientists, veterinarians and agricultural economists) and a consulting group of 31 stakeholders (representing the Flemish primary sector, feed industry, pharma, genetic companies, large retailers, academia and policy institutions) were involved. The decision problem analysis started with a literature review, followed by 25 in-depth interviews and their analyses (NVivo 11™). This led to an additional literature review and the formation of focus (expert) groups that helped to formulate preliminary FCR formulas. Revision rounds between the research team and the stakeholders further fine-tuned the formulas with the final result being two distinct complimentary formulas that are fit for purpose. Both refer to carcass gain per kg feed intake (plain (CGF) and standardized (CGFstandardized)). The first formula (CGF), namely ${{{\it{number \, delivered \, pigs}} \times {\it{average \, warm \, carcass \, weight}} - {\it{number \, stocked \, piglets}} \times {\it{average \, piglet \, weight}} \times {\it{piglet \, carcass \, yield}}} \over {{\it{feed \, consumption}}}}$ is an objective representation of the animals' performance. The second formula (CGFstandardized) was developed for farm benchmarking, incorporating a seven-step standardization process that corrects for mortality and 'standardizes' for a fixed (yet fictive) live weight trajectory of 25 to 115 kg. This second formula allows to compare farms (or batches of fattening pigs) with different weight trajectories and different mortality rates. A webtool was designed to ease this standardization process (https://varkensloket.be/tools/CGF).",2020-07-13 +30169576,Protease target prediction via matrix factorization.,"

Motivation

Protein cleavage is an important cellular event, involved in a myriad of processes, from apoptosis to immune response. Bioinformatics provides in silico tools, such as machine learning-based models, to guide the discovery of targets for the proteases responsible for protein cleavage. State-of-the-art models have a scope limited to specific protease families (such as Caspases), and do not explicitly include biological or medical knowledge (such as the hierarchical protein domain similarity or gene-gene interactions). To fill this gap, we present a novel approach for protease target prediction based on data integration.

Results

By representing protease-protein target information in the form of relational matrices, we design a model (i) that is general and not limited to a single protease family, and (b) leverages on the available knowledge, managing extremely sparse data from heterogeneous data sources, including primary sequence, pathways, domains and interactions. When compared with other algorithms on test data, our approach provides a better performance even for models specifically focusing on a single protease family.

Availability and implementation

https://gitlab.com/smarini/MaDDA/ (Matlab code and utilized data.).

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +25005261,FmTFDb: a foxtail millet transcription factors database for expediting functional genomics in millets.,"Foxtail millet has recently been regarded as a model crop for studying the systems biology of millets and bioenergy grass species. For expediting the functional genomic studies in this model crop as well as in the related millets and bioenergy grasses, we have developed a comprehensive transcription factor database. Our foxtail millet transcription factors database (FmTFDb: http://59.163.192.91/FmTFDb/index.html ) encompasses 2,297 putative TFs in 55 families along with its sequence features, chromosomal locations, tissue-specific gene expression data, gene ontology (GO) assignment, and phylogeny. FmTFDb is intended to provide the users an unrestricted public access in retrieving and visualizing the individual members of a TF family through a set of query interfaces and analysis tools, including the BLAST search, annotation query interfaces, and tools to identify enriched GO terms and to visualize physical maps. This FmTFDb will serve as a promising central resource for researchers as well as breeders who are dedicated towards crop improvement of millets and bioenergy grasses.",2014-07-09 +31780665,A database of high-resolution MS/MS spectra for lichen metabolites.,"While analytical techniques in natural products research massively shifted to liquid chromatography-mass spectrometry, lichen chemistry remains reliant on limited analytical methods, Thin Layer Chromatography being the gold standard. To meet the modern standards of metabolomics within lichenochemistry, we announce the publication of an open access MS/MS library with 250 metabolites, coined LDB for Lichen DataBase, providing a comprehensive coverage of lichen chemodiversity. These were donated by the Berlin Garden and Botanical Museum from the collection of Siegfried Huneck to be analyzed by LC-MS/MS. Spectra at individual collision energies were submitted to MetaboLights (https://www.ebi.ac.uk/metabolights/MTBLS999) while merged spectra were uploaded to the GNPS platform (CCMSLIB00004751209 to CCMSLIB00004751517). Technical validation was achieved by dereplicating three lichen extracts using a Molecular Networking approach, revealing the detection of eleven unique molecules that would have been missed without LDB implementation to the GNPS. From a chemist's viewpoint, this database should help streamlining the isolation of formerly unreported metabolites. From a taxonomist perspective, the LDB offers a versatile tool for the chemical profiling of newly reported species.",2019-11-28 +31114886,Interactive web-based visualization and sharing of phylogenetic trees using phylogeny.IO.,"Traditional static publication formats make visualization, exploration, and sharing of massive phylogenetic trees difficult. A phylogenetic study often involves hundreds of taxa, and the resulting tree has to be split across multiple journal pages, or be shrunk onto one, which jeopardizes legibility. Furthermore, additional data layers, such as species-specific information or time calibrations are often displayed in separate figures, making the entire picture difficult for readers to grasp. Web-based technologies, such as the Data Driven Document (D3) JavaScript library, were created to overcome such challenges by allowing interactive displays of complex data sets. The new phylogeny.IO web server (https://phylogeny.io) overcomes this issue by allowing users to easily import, annotate, and share interactive phylogenetic trees. It allows a range of static (e.g. such as shapes and colors) and dynamic (e.g. pop-up text and images) annotations. Annotated trees can be saved on the server for subsequent modification or they may be shared as IFrame HTML objects, easily embeddable in any web page. The principal goal of phylogeny.IO is not to produce publication-ready figures, but rather to provide a simple and intuitive annotation interface that allows easy and rapid sharing of figures in blogs, lecture notes, press releases, etc.",2019-07-01 +30094004,"The Euphausia superba transcriptome database, SuperbaSE: An online, open resource for researchers.","Antarctic krill (Euphausia superba) is a crucial component of the Southern Ocean ecosystem, acting as the major link between primary production and higher trophic levels with an annual predator demand of up to 470 million tonnes. It also acts as an ecosystem engineer, affecting carbon sequestration and recycling iron and nitrogen, and has increasing importance as a commercial product in the aquaculture and health industries. Here we describe the creation of a de novo assembled head transcriptome for E. superba. As an example of its potential as a molecular resource, we relate its exploitation in identifying and characterizing numerous genes related to the circadian clock in E. superba, including the major components of the central feedback loop. We have made the transcriptome openly accessible for a wider audience of ecologists, molecular biologists, evolutionary geneticists, and others in a user-friendly format at SuperbaSE, hosted at http://www.krill.le.ac.uk.",2017-06-28 +31260443,DART-ID increases single-cell proteome coverage.,"Analysis by liquid chromatography and tandem mass spectrometry (LC-MS/MS) can identify and quantify thousands of proteins in microgram-level samples, such as those comprised of thousands of cells. This process, however, remains challenging for smaller samples, such as the proteomes of single mammalian cells, because reduced protein levels reduce the number of confidently sequenced peptides. To alleviate this reduction, we developed Data-driven Alignment of Retention Times for IDentification (DART-ID). DART-ID implements principled Bayesian frameworks for global retention time (RT) alignment and for incorporating RT estimates towards improved confidence estimates of peptide-spectrum-matches. When applied to bulk or to single-cell samples, DART-ID increased the number of data points by 30-50% at 1% FDR, and thus decreased missing data. Benchmarks indicate excellent quantification of peptides upgraded by DART-ID and support their utility for quantitative analysis, such as identifying cell types and cell-type specific proteins. The additional datapoints provided by DART-ID boost the statistical power and double the number of proteins identified as differentially abundant in monocytes and T-cells. DART-ID can be applied to diverse experimental designs and is freely available at http://dart-id.slavovlab.net.",2019-07-01 +29370821,PhenoDis: a comprehensive database for phenotypic characterization of rare cardiac diseases.,"BACKGROUND:Thoroughly annotated data resources are a key requirement in phenotype dependent analysis and diagnosis of diseases in the area of precision medicine. Recent work has shown that curation and systematic annotation of human phenome data can significantly improve the quality and selectivity for the interpretation of inherited diseases. We have therefore developed PhenoDis, a comprehensive, manually annotated database providing symptomatic, genetic and imprinting information about rare cardiac diseases. RESULTS:PhenoDis includes 214 rare cardiac diseases from Orphanet and 94 more from OMIM. For phenotypic characterization of the diseases, we performed manual annotation of diseases with articles from the biomedical literature. Detailed description of disease symptoms required the use of 2247 different terms from the Human Phenotype Ontology (HPO). Diseases listed in PhenoDis frequently cover a broad spectrum of symptoms with 28% from the branch of 'cardiovascular abnormality' and others from areas such as neurological (11.5%) and metabolism (6%). We collected extensive information on the frequency of symptoms in respective diseases as well as on disease-associated genes and imprinting data. The analysis of the abundance of symptoms in patient studies revealed that most of the annotated symptoms (71%) are found in less than half of the patients of a particular disease. Comprehensive and systematic characterization of symptoms including their frequency is a pivotal prerequisite for computer based prediction of diseases and disease causing genetic variants. To this end, PhenoDis provides in-depth annotation for a complete group of rare diseases, including information on pathogenic and likely pathogenic genetic variants for 206 diseases as listed in ClinVar. We integrated all results in an online database ( http://mips.helmholtz-muenchen.de/phenodis/ ) with multiple search options and provide the complete dataset for download. CONCLUSION:PhenoDis provides a comprehensive set of manually annotated rare cardiac diseases that enables computational approaches for disease prediction via decision support systems and phenotype-driven strategies for the identification of disease causing genes.",2018-01-25 +29155950,Ensembl 2018.,"The Ensembl project has been aggregating, processing, integrating and redistributing genomic datasets since the initial releases of the draft human genome, with the aim of accelerating genomics research through rapid open distribution of public data. Large amounts of raw data are thus transformed into knowledge, which is made available via a multitude of channels, in particular our browser (http://www.ensembl.org). Over time, we have expanded in multiple directions. First, our resources describe multiple fields of genomics, in particular gene annotation, comparative genomics, genetics and epigenomics. Second, we cover a growing number of genome assemblies; Ensembl Release 90 contains exactly 100. Third, our databases feed simultaneously into an array of services designed around different use cases, ranging from quick browsing to genome-wide bioinformatic analysis. We present here the latest developments of the Ensembl project, with a focus on managing an increasing number of assemblies, supporting efforts in genome interpretation and improving our browser.",2018-01-01 +31776133,Gain-of-Function Mutant p53 R273H Interacts with Replicating DNA and PARP1 in Breast Cancer.,"Over 80% of triple-negative breast cancers (TNBC) express mutant p53 (mtp53) and some contain oncogenic gain-of-function (GOF) p53. We previously reported that GOF mtp53 R273H upregulates the chromatin association of mini chromosome maintenance (MCM) proteins MCM2-7 and PARP and named this the mtp53-PARP-MCM axis. In this study, we dissected the function and association between mtp53 and PARP using a number of different cell lines, patient-derived xenografts (PDX), tissue microarrays (TMA), and The Cancer Genome Atlas (TCGA) database. Endogenous mtp53 R273H and exogenously expressed R273H and R248W bound to nascent 5-ethynyl-2´-deoxyuridine-labeled replicating DNA. Increased mtp53 R273H enhanced the association of mtp53 and PARP on replicating DNA. Blocking poly-ADP-ribose gylcohydrolase also enhanced this association. Moreover, mtp53 R273H expression enhanced overall MCM2 levels, promoted cell proliferation, and improved the synergistic cytotoxicity of treatment with the alkylating agent temozolomide in combination with the PARP inhibitor (PARPi) talazoparib. Staining of p53 and PARP1 in breast cancer TMAs and comparison with the TCGA database indicated a higher double-positive signal in basal-like breast cancer than in luminal A or luminal B subtypes. Higher PARP1 protein levels and PAR proteins were detected in mtp53 R273H than in wild-type p53-expressing PDX samples. These results indicate that mtp53 R273H and PARP1 interact with replicating DNA and should be considered as dual biomarkers for identifying breast cancers that may respond to combination PARPi treatments. SIGNIFICANCE: p53 gain-of-function mutant 273H and PARP1 interact with replication forks and could serve as potential biomarkers for breast cancer sensitivity to PARP inhibitors. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/3/394/F1.large.jpg.",2019-11-27 +31061853,Data on the impact of an object with different thicknesses of different soft materials at different impact velocities on a dummy head.,"The purpose of this data is to investigate the effect of different thicknesses of different soft materials samples added to an object on the resultant head acceleration of a developed dummy head upon impact. The object was a cylinder (10 × 10 cm2, diameter and height) and weighs 0.4 kg. The investigated materials were Ecoflex, Dragon Skin, and Clay while the thickness were 1 mm, 2 mm, 3 mm, and 5 mm. The velocities of the impacts for the 108 experiments were between 1 m/s and 3 m/s. Three severity indices (i.e. peak head linear acceleration, 3 ms criterion and the Head Injury Criterion (HIC)) were calculated from the raw acceleration data. The impact velocities were tabulated from the video recordings. A summary of the processed data and the raw data are included in this dataset. Online repository contains the files: https://doi.org/10.7910/DVN/TXOPUH.",2019-04-01 +33708079,Development and Validation of a Dynamic Nomogram to Predict the Risk of Neonatal White Matter Damage.,"

Purpose

White matter damage (WMD) was defined as the appearance of rough and uneven echo enhancement in the white matter around the ventricle. The aim of this study was to develop and validate a risk prediction model for neonatal WMD.

Materials and methods

We collected data for 1,733 infants hospitalized at the Department of Neonatology at The First Affiliated Hospital of Zhengzhou University from 2017 to 2020. Infants were randomly assigned to training (n = 1,216) or validation (n = 517) cohorts at a ratio of 7:3. Multivariate logistic regression and least absolute shrinkage and selection operator (LASSO) regression analyses were used to establish a risk prediction model and web-based risk calculator based on the training cohort data. The predictive accuracy of the model was verified in the validation cohort.

Results

We identified four variables as independent risk factors for brain WMD in neonates by multivariate logistic regression and LASSO analysis, including gestational age, fetal distress, prelabor rupture of membranes, and use of corticosteroids. These were used to establish a risk prediction nomogram and web-based calculator (https://caowenjun.shinyapps.io/dynnomapp/). The C-index of the training and validation sets was 0.898 (95% confidence interval: 0.8745-0.9215) and 0.887 (95% confidence interval: 0.8478-0.9262), respectively. Decision tree analysis showed that the model was highly effective in the threshold range of 1-61%. The sensitivity and specificity of the model were 82.5 and 81.7%, respectively, and the cutoff value was 0.099.

Conclusion

This is the first study describing the use of a nomogram and web-based calculator to predict the risk of WMD in neonates. The web-based calculator increases the applicability of the predictive model and is a convenient tool for doctors at primary hospitals and outpatient clinics, family doctors, and even parents to identify high-risk births early on and implementing appropriate interventions while avoiding excessive treatment of low-risk patients.",2020-01-01 +31193500,"Data on monogenean (Platyhelminth) parasites in 11 populations of Astyanax aeneus (Pisces: Teleostei) in a neotropical river in Chiapas, south Mexico.","The data presented in this article are related to the research article entitled ""Aggregation and negative interactions in low-diversity and unsaturated monogenean (Platyhelminthes) communities in Astyanax aeneus (Teleostei) populations in a neotropical river of Mexico"" publicated in Int. J. Parasitol. Parasites Wildl. 8 (2019) 203-215. https://doi.org/10.1016/j.ijppaw.2019.02.005. This article describes the communities of monogenean parasites in 11 populations of a small characid freshwater fish Astyanax aeneus (Günther) separated by small geographical distances along 60 km of the Lacantún river in Chiapas, Mexico. We examined 15 A. aeneus from each of 11 locations (one sample in February, a second sample in August 2012), situated at the mouth of the streams opening into the main body of the Lacantún river, at the Montes Azules Biosphere Reserve in the Lacandon forest, Chiapas in southern Mexico. The area of study is located ∼800 km from the mouth of the Usumacinta river in the Gulf of Mexico. In this paper we provide the data for 12 monogenean taxa. The material in this Data in Brief paper comprised the raw data on the abundance distribution of each monogenean taxa recorded in each of the locations; i. e. the number of helminth individuals of each of 12 taxa found in each one individual of A. aeneus from each of 11 localities. The data set is contained in a single table text document including one matrix per date of collection and locality of monogenean species (lines) per host A. aeneus (columns).",2019-04-24 +30020436,realDB: a genome and transcriptome resource for the red algae (phylum Rhodophyta). ,"With over 6000 species in seven classes, red algae (Rhodophyta) have diverse economic, ecological, experimental and evolutionary values. However, red algae are usually absent or rare in comparative analyses because genomic information of this phylum is often under-represented in various comprehensive genome databases. To improve the accessibility to the ome data and omics tools for red algae, we provided 10 genomes and 27 transcriptomes representing all seven classes of Rhodophyta. Three genomes and 18 transcriptomes were de novo assembled and annotated in this project. User-friendly BLAST suit, Jbrowse tools and search system were developed for online analyses. Detailed introductions to red algae taxonomy and the sequencing status are also provided. In conclusion, realDB (realDB.algaegenome.org) provides a platform covering the most genome and transcriptome data for red algae and a suite of tools for online analyses, and will attract both red algal biologists and those working on plant ecology, evolution and development.Database URL: http://realdb.algaegenome.org/.",2018-01-01 +26362267,Interactive Big Data Resource to Elucidate Human Immune Pathways and Diseases.,"Many functionally important interactions between genes and proteins involved in immunological diseases and processes are unknown. The exponential growth in public high-throughput data offers an opportunity to expand this knowledge. To unlock human-immunology-relevant insight contained in the global biomedical research effort, including all public high-throughput datasets, we performed immunological-pathway-focused Bayesian integration of a comprehensive, heterogeneous compendium comprising 38,088 genome-scale experiments. The distillation of this knowledge into immunological networks of functional relationships between molecular entities (ImmuNet), and tools to mine this resource, are accessible to the public at http://immunet.princeton.edu. The predictive capacity of ImmuNet, established by rigorous statistical validation, is easily accessed by experimentalists to generate data-driven hypotheses. We demonstrate the power of this approach through the identification of unique host-virus interaction responses, and we show how ImmuNet complements genetic studies by predicting disease-associated genes. ImmuNet should be widely beneficial for investigating the mechanisms of the human immune system and immunological diseases.",2015-09-08 +28850115,"RefEx, a reference gene expression dataset as a web tool for the functional analysis of genes.","Gene expression data are exponentially accumulating; thus, the functional annotation of such sequence data from metadata is urgently required. However, life scientists have difficulty utilizing the available data due to its sheer magnitude and complicated access. We have developed a web tool for browsing reference gene expression pattern of mammalian tissues and cell lines measured using different methods, which should facilitate the reuse of the precious data archived in several public databases. The web tool is called Reference Expression dataset (RefEx), and RefEx allows users to search by the gene name, various types of IDs, chromosomal regions in genetic maps, gene family based on InterPro, gene expression patterns, or biological categories based on Gene Ontology. RefEx also provides information about genes with tissue-specific expression, and the relative gene expression values are shown as choropleth maps on 3D human body images from BodyParts3D. Combined with the newly incorporated Functional Annotation of Mammals (FANTOM) dataset, RefEx provides insight regarding the functional interpretation of unfamiliar genes. RefEx is publicly available at http://refex.dbcls.jp/.",2017-08-29 +31624557,Functional divergence of the bitter receptor TAS2R38 in Sulawesi macaques.,"

Abstract

Bitter perception is mediated by G protein-coupled receptors TAS2Rs and plays an important role in avoiding the ingestion of toxins by inducing innate avoidance behavior in mammals. One of the best-studied TAS2Rs is TAS2R38, which mediates the perception of the bitterness of synthetic phenylthiocarbamide (PTC). Previous studies of TAS2R38 have suggested that geographical separation enabled the independent divergence of bitter taste perception. The functional divergence of TAS2R38 in allopatric species has not been evaluated. We characterized the function of TAS2R38 in four allopatric species of Sulawesi macaques on Sulawesi Island. We found variation in PTC taste perception both within and across species. In most cases, TAS2R38 was sensitive to PTC, with functional divergence among species. We observed different truncated TAS2R38s that were not responsive to PTC in each species of Macaca nigra and M. nigrescens due to premature stop codons. Some variants of intact TAS2R38 with an amino acid substitution showed low sensitivity to PTC in M. tonkeana. Similarly, this intact TAS2R38 with PTC-low sensitivity has also been found in humans. We detected a shared haplotype in all four Sulawesi macaques, which may be the ancestral haplotype of Sulawesi macaques. In addition to shared haplotypes among Sulawesi macaques, other TAS2R38 haplotypes were species-specific. These results implied that the variation in TAS2R38 might be shaped by geographical patterns and local adaptation.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.908jf3r.",2019-08-20 +25414348,BioModels: ten-year anniversary.,"BioModels (http://www.ebi.ac.uk/biomodels/) is a repository of mathematical models of biological processes. A large set of models is curated to verify both correspondence to the biological process that the model seeks to represent, and reproducibility of the simulation results as described in the corresponding peer-reviewed publication. Many models submitted to the database are annotated, cross-referencing its components to external resources such as database records, and terms from controlled vocabularies and ontologies. BioModels comprises two main branches: one is composed of models derived from literature, while the second is generated through automated processes. BioModels currently hosts over 1200 models derived directly from the literature, as well as in excess of 140,000 models automatically generated from pathway resources. This represents an approximate 60-fold growth for literature-based model numbers alone, since BioModels' first release a decade ago. This article describes updates to the resource over this period, which include changes to the user interface, the annotation profiles of models in the curation pipeline, major infrastructure changes, ability to perform online simulations and the availability of model content in Linked Data form. We also outline planned improvements to cope with a diverse array of new challenges.",2014-11-20 +,"HEALTHY AGING, THE GENOME OF ELDERLY INDIVIDUALS AND THEIR ENVIRONMENT—THE LEIDEN LONGEVITY STUDY","Abstract Despite the continuous increase in life expectancy in our societies, the diversity in health span is enormous (http://ec.europa.eu/health/indicators). Such heterogeneity can be observed at the level of metabolites and immune factors in blood and also in the genome, its regulation by epigenetic mechanisms and its expression. We investigate to what extend the heterogeneity in the rate and nature of physiological decline among elderly is driven by parameters of metabolic health that can be detected in the circulation, muscle and fat tissue. How can we include epigenetic, metabolomics and other omics data in studies of ageing and mortality and what do these marker sets add to more traditional markers of metabolism. The heterogeneity among elderly can be investigated in longitudinal and in intervention studies. Therefor we apply functional, metabolomic, epigenetic and gene expression measures in challenge tests, clinical and intervention studies. Metabolomics studies indicate which parameters associate to disease and lifespan in prospective studies on the one hand and metabolic improvement after interventions on the other. We also investigate how such biomarkers in the circulation relate to aspects of metabolic health measured in muscle tissue of the same individuals depending on the environment.",2017-06-30 +31971806,NETGE-PLUS: Standard and Network-Based Gene Enrichment Analysis in Human and Model Organisms.,"Omics techniques provide a spectrum of information at the genomic level, whose analysis can characterize complex traits at a molecular level. The relationship among genotype and phenotype implies that from genome information the molecular pathways and biological processes underlying a given phenotype are discovered. In dealing with this problem, gene enrichment analysis has become the most widely adopted strategy. Here we present NETGE-PLUS, a Web server for standard and network-based functional interpretation of gene sets of human and of model organisms, including Sus scrofa, Saccharomyces cerevisiae, Escherichia coli, and Arabidopsis thaliana. NETGE-PLUS enables the functional enrichment of both simple and ranked lists of genes, introducing also the possibility of exploring relationships among KEGG pathways. A Web interface makes data retrieval complete and user-friendly. NETGE-PLUS is publicly available at http://net-ge2.biocomp.unibo.it.",2020-02-04 +31765226,High Prevalence of Connective Tissue Gene Variants in Professional Ballet.,"

Background

There is a high prevalence of hypermobility spectrum disorder (HSD) in dancers. While there is no known genetic variant for HSD, hypermobile Ehlers-Danlos syndrome is a genetic disorder that exists within HSD. There are many connective tissue disorders (CTDs) with known (and unknown) genes associated with hypermobility. Hypermobility has distinct advantages for participation in flexibility sports, including ballet.

Purpose

To determine the prevalence of gene variants associated with hypermobility in a large professional ballet company.

Study design

Cross-sectional study; Level of evidence, 3.

Methods

In this cross-sectional investigation, 51 professional male and female dancers from a large metropolitan ballet company were eligible and offered participation after an oral and written informed consent process. Whole blood was obtained from peripheral venipuncture, and DNA was isolated. Isolated DNA was subsequently enriched for the coding exons of 60 genes associated with CTD that included hypermobility as a phenotype, including Ehlers-Danlos syndromes, osteogenesis imperfecta, Marfan syndrome, and others. Genes were targeted with hybrid capture technology. Prepared DNA libraries were then sequenced with next-generation sequencing technology. Genetic database search tools (Human Gene Mutation Database and e!Ensembl, http://useast.ensembl.org/ ) were used to query specific variants. Descriptive statistics were calculated.

Results

Of 51 dancers, 32 (63%) agreed to participate in DNA analysis (mean ± SD age, 24.3 ± 4.4 years; 18 men, 14 women). Twenty-eight dancers had at least 1 variant in the 60 genes tested, for an 88% prevalence. A total of 80 variants were found. A variant in 26 of the 60 genes was found in at least 1 dancer. Among the 28 dancers with variants, 16 were found in the TTN gene; 10 in ZNF469; 5 in RYR1; 4 in COL12A1; 3 in ABCC6 and COL6A2; 2 in ADAMTS2, CBS, COL1A2, COL6A3, SLC2A10, TNC, and TNXB; and 1 in ATP6V0A2, B4GALT7, BMP1, COL11A1, COL5A2, COL6A1, DSE, FBN1, FBN2, NOTCH1, PRDM5, SMAD3, and TGFBR1. Nine variants found in this population have never been reported. No identified variant was identical to any other variant. No identified variant was known to be disease causing. In the general population, the prevalence of each variant ranges from never reported to 0.33%. In the study population, the prevalence of each variant was 3.13%. There was no association between hypermobility scores and genetic variants.

Conclusion

Genetic variants in CTD-associated genes are highly prevalent (88%) in professional ballet dancers. This may significantly account for the high degree of motion in this population.",2019-11-25 +26063516,A tool to assess potential for alien plant establishment and expansion under climate change.,"Predicting the influence of climate change on the potential distribution of naturalised alien plant species is an important and challenging task. While prioritisation of management actions for alien plants under current climatic conditions has been widely adopted, very few systems explicitly incorporate the potential of future changes in climate conditions to influence the distribution of alien plant species. Here, we develop an Australia-wide screening tool to assess the potential of naturalised alien plants to establish and spread under both current and future climatic conditions. The screening tool developed uses five spatially explicit criteria to establish the likelihood of alien plant population establishment and expansion under baseline climate conditions and future climates for the decades 2035 and 2065. Alien plants are then given a threat rating according to current and future threat to enable natural resource managers to focus on those species that pose the largest potential threat now and in the future. To demonstrate the screening tool, we present results for a representative sample of approximately 10% (n = 292) of Australia's known, naturalised alien plant species. Overall, most alien plant species showed decreases in area of habitat suitability under future conditions compared to current conditions and therefore the threat rating of most alien plant species declined between current and future conditions. Use of the screening tool is intended to assist natural resource managers in assessing the threat of alien plant establishment and spread under current and future conditions and thus prioritise detailed weed risk assessments for those species that pose the greatest threat. The screening tool is associated with a searchable database for all 292 alien plant species across a range of spatial scales, available through an interactive web-based portal at http://weedfutures.net/.",2015-06-07 +31894193,Hourly weather observations from the Scottish Highlands (1883-1904) rescued by volunteer citizen scientists.,"Weather observations taken every hour during the years 1883-1904 on the summit of Ben Nevis (1345 m above sea level) and in the town of Fort William in the Scottish Highlands have been transcribed from the original publications into digital form. More than 3,500 citizen scientist volunteers completed the digitization in less than 3 months using the http://WeatherRescue.org website. Over 1.5 million observations of atmospheric pressure, wet- and dry-bulb temperatures, precipitation and wind speed were recovered. These data have been quality controlled and are now made openly available, including hourly values of relative humidity derived from the digitized dry- and wet-bulb temperatures using modern hygrometric algorithms. These observations are one of the most detailed weather data collections available for anywhere in the UK in the Victorian era. In addition, 374 observations of aurora borealis seen by the meteorologists from the summit of Ben Nevis have been catalogued and this has improved the auroral record for studies of space weather.",2019-08-26 +32118208,A practical framework and online tool for mutational signature analyses show inter-tissue variation and driver dependencies.,"Mutational signatures are patterns of mutations that arise during tumorigenesis. We present an enhanced, practical framework for mutational signature analyses. Applying these methods on 3,107 whole genome sequenced (WGS) primary cancers of 21 organs reveals known signatures and nine previously undescribed rearrangement signatures. We highlight inter-organ variability of signatures and present a way of visualizing that diversity, reinforcing our findings in an independent analysis of 3,096 WGS metastatic cancers. Signatures with a high level of genomic instability are dependent on TP53 dysregulation. We illustrate how uncertainty in mutational signature identification and assignment to samples affects tumor classification, reinforcing that using multiple orthogonal mutational signature data is not only beneficial, it is essential for accurate tumor stratification. Finally, we present a reference web-based tool for cancer and experimentally-generated mutational signatures, called Signal (https://signal.mutationalsignatures.com), that also supports performing mutational signature analyses.",2020-02-17 +27789569,The cancer precision medicine knowledge base for structured clinical-grade mutations and interpretations.,"

Objective

This paper describes the Precision Medicine Knowledge Base (PMKB; https://pmkb.weill.cornell.edu ), an interactive online application for collaborative editing, maintenance, and sharing of structured clinical-grade cancer mutation interpretations.

Materials and methods

PMKB was built using the Ruby on Rails Web application framework. Leveraging existing standards such as the Human Genome Variation Society variant description format, we implemented a data model that links variants to tumor-specific and tissue-specific interpretations. Key features of PMKB include support for all major variant types, standardized authentication, distinct user roles including high-level approvers, and detailed activity history. A REpresentational State Transfer (REST) application-programming interface (API) was implemented to query the PMKB programmatically.

Results

At the time of writing, PMKB contains 457 variant descriptions with 281 clinical-grade interpretations. The EGFR, BRAF, KRAS, and KIT genes are associated with the largest numbers of interpretable variants. PMKB's interpretations have been used in over 1500 AmpliSeq tests and 750 whole-exome sequencing tests. The interpretations are accessed either directly via the Web interface or programmatically via the existing API.

Discussion

An accurate and up-to-date knowledge base of genomic alterations of clinical significance is critical to the success of precision medicine programs. The open-access, programmatically accessible PMKB represents an important attempt at creating such a resource in the field of oncology.

Conclusion

The PMKB was designed to help collect and maintain clinical-grade mutation interpretations and facilitate reporting for clinical cancer genomic testing. The PMKB was also designed to enable the creation of clinical cancer genomics automated reporting pipelines via an API.",2017-05-01 +,Early lineages of Vespidae (Hymenoptera) in Cretaceous amber,"Three wasp (Hymenoptera: Vespidae) fossils in Cretaceous amber (Late Albian) of northern Myanmar are described. Two are new species of the Mesozoic genus Curiosivespa (Rasnitsyn): C. zigrasi sp.n. and C. striata sp.n. The third species, Protovespa haxairei gen.n. et sp.n., has a combination of features unique among Mesozoic Priorvespinae and the extant subfamilies. These well preserved fossils provide new morphological data for a cladistic analysis of the basal lineages of Vespidae. Results suggest that Euparagiinae is the sister group of all other Vespidae. The new genus Protovespa appears more closely related to extant Masarinae, Eumeninae and social wasps than to Priorvespinae. We assign it to a new subfamily: Protovespinae. Finally, fossil information combined with a phylogenetic tree shows that the main groups of Vespidae probably evolved during the Early Cretaceous. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:1E7E4796‐6E70‐4D81‐BB34‐0FEEA765DC25.",2017-04-01 +24225322,Progenetix: 12 years of oncogenomic data curation.,"DNA copy number aberrations (CNAs) can be found in the majority of cancer genomes and are crucial for understanding the potential mechanisms underlying tumor initiation and progression. Since the first release in 2001, the Progenetix project (http://www.progenetix.org) has provided a reference resource dedicated to provide the most comprehensive collection of genome-wide CNA profiles. Reflecting the application of comparative genomic hybridization techniques to tens of thousands of cancer genomes, over the past 12 years our data curation efforts have resulted in a more than 60-fold increase in the number of cancer samples presented through Progenetix. In addition, new data exploration tools and visualization options have been added. In particular, the gene-specific CNA frequency analysis should facilitate the assignment of cancer genes to related cancer types. In addition, the new user file processing interface allows users to take advantage of the online tools, including various data representation options for proprietary data pre-publication. In this update article, we report recent improvements of the database in terms of content, user interface and online tools.",2013-11-12 +29855280,Antidepressants for depressive disorder in children and adolescents: a database of randomised controlled trials.,"BACKGROUND:In recent years, whether, when and how to use antidepressants to treat depressive disorder in children and adolescents has been hotly debated. Relevant evidence on this topic has increased rapidly. In this paper, we present the construction and content of a database of randomised controlled trials of antidepressants to treat depressive disorder in children and adolescents. This database can be freely accessed via our website and will be regularly updated. DESCRIPTION:Major bibliographic databases (PubMed, the Cochrane Library, Web of Science, Embase, CINAHL, PsycINFO and LiLACS), international trial registers and regulatory agencies' websites were systematically searched for published and unpublished studies up to April 30, 2017. We included randomised controlled trials in which the efficacy or tolerability of any oral antidepressant was compared with that of a control group or any other treatment. In total, 7377 citations from bibliographical databases and 3289 from international trial registers and regulatory agencies' websites were identified. Of these, 53 trials were eligible for inclusion in the final database. Selected data were extracted from each study, including characteristics of the participants (the study population, setting, diagnostic criteria, type of depression, age, sex, and comorbidity), characteristics of the treatment conditions (the treatment conditions, general information, and detail of pharmacotherapy and psychotherapy) and study characteristics (the sponsor, country, number of sites, blinding method, sample size, treatment duration, depression scales, other scales, and primary outcome measure used, and side-effect monitoring method). Moreover, the risk of bias for each trial were assessed. CONCLUSION:This database provides information on nearly all randomised controlled trials of antidepressants in children and adolescents. By using this database, researchers can improve research efficiency, avoid inadvertent errors and easily focus on the targeted subgroups in which they are interested. For authors of subsequent reviews, they could only use this database to insure that they have completed a comprehensive review, rather than relied solely on the data from this database. We expect this database could help to promote research on evidence-based practice in the treatment of depressive disorder in children and adolescents. The database could be freely accessed in our website: http://xiepengteam.cn/research/evidence-based-medicine .",2018-05-31 +30536039,Smarter through group living: A response to Smulders.,"We recently identified a strong, positive relationship between group size and individual cognitive performance, and a strong, positive relationship between female cognitive performance and reproductive success (Ashton, Ridley, Edwards, & Thornton in Nature, 554, 364-367, 2018). An opinion piece by Smulders (Learning & Behavior, https://doi.org/10.3758/s13420-018-0335-0, 2018) raised the interesting notion that these patterns may be underlined by motivational factors. In this commentary, we highlight why none of the available data are consistent with this explanation, but instead support the argument that the demands of group living influence cognitive development, with knock-on consequences for fitness.",2019-12-01 +24602877,BambooGDB: a bamboo genome database with functional annotation and an analysis platform.,"Bamboo, as one of the most important non-timber forest products and fastest-growing plants in the world, represents the only major lineage of grasses that is native to forests. Recent success on the first high-quality draft genome sequence of moso bamboo (Phyllostachys edulis) provides new insights on bamboo genetics and evolution. To further extend our understanding on bamboo genome and facilitate future studies on the basis of previous achievements, here we have developed BambooGDB, a bamboo genome database with functional annotation and analysis platform. The de novo sequencing data, together with the full-length complementary DNA and RNA-seq data of moso bamboo composed the main contents of this database. Based on these sequence data, a comprehensively functional annotation for bamboo genome was made. Besides, an analytical platform composed of comparative genomic analysis, protein-protein interactions network, pathway analysis and visualization of genomic data was also constructed. As discovery tools to understand and identify biological mechanisms of bamboo, the platform can be used as a systematic framework for helping and designing experiments for further validation. Moreover, diverse and powerful search tools and a convenient browser were incorporated to facilitate the navigation of these data. As far as we know, this is the first genome database for bamboo. Through integrating high-throughput sequencing data, a full functional annotation and several analysis modules, BambooGDB aims to provide worldwide researchers with a central genomic resource and an extensible analysis platform for bamboo genome. BambooGDB is freely available at http://www.bamboogdb.org/. Database URL: http://www.bamboogdb.org.",2014-03-05 +32186411,"Metal/Metalloid Levels in Electronic Cigarette Liquids, Aerosols, and Human Biosamples: A Systematic Review.","

Background

Electronic cigarettes (e-cigarettes) have become popular, in part because they are perceived as a safer alternative to tobacco cigarettes. An increasing number of studies, however, have found toxic metals/metalloids in e-cigarette emissions.

Objective

We summarized the evidence on metal/metalloid levels in e-cigarette liquid (e-liquid), aerosols, and biosamples of e-cigarette users across e-cigarette device systems to evaluate metal/metalloid exposure levels for e-cigarette users and the potential implications on health outcomes.

Methods

We searched PubMed/TOXLINE, Embase®, and Web of Science for studies on metals/metalloids in e-liquid, e-cigarette aerosols, and biosamples of e-cigarette users. For metal/metalloid levels in e-liquid and aerosol samples, we collected the mean and standard deviation (SD) if these values were reported, derived mean and SD by using automated software to infer them if data were reported in a figure, or calculated the overall mean (mean ± SD) if data were reported only for separate groups. Metal/metalloid levels in e-liquids and aerosols were converted and reported in micrograms per kilogram and nanograms per puff, respectively, for easy comparison.

Results

We identified 24 studies on metals/metalloids in e-liquid, e-cigarette aerosols, and human biosamples of e-cigarette users. Metal/metalloid levels, including aluminum, antimony, arsenic, cadmium, cobalt, chromium, copper, iron, lead, manganese, nickel, selenium, tin, and zinc, were present in e-cigarette samples in the studies reviewed. Twelve studies reported metal/metalloid levels in e-liquids (bottles, cartridges, open wick, and tank), 12 studies reported metal/metalloid levels in e-cigarette aerosols (from cig-a-like and tank devices), and 4 studies reported metal/metalloid levels in human biosamples (urine, saliva, serum, and blood) of e-cigarette users. Metal/metalloid levels showed substantial heterogeneity depending on sample type, source of e-liquid, and device type. Metal/metalloid levels in e-liquid from cartridges or tank/open wicks were higher than those from bottles, possibly due to coil contact. Most metal/metalloid levels found in biosamples of e-cigarette users were similar or higher than levels found in biosamples of conventional cigarette users, and even higher than those found in biosamples of cigar users.

Conclusion

E-cigarettes are a potential source of exposure to metals/metalloids. Differences in collection methods and puffing regimes likely contribute to the variability in metal/metalloid levels across studies, making comparison across studies difficult. Standardized protocols for the quantification of metal/metalloid levels from e-cigarette samples are needed. https://doi.org/10.1289/EHP5686.",2020-03-18 +31689370,Using Visual Supports to Facilitate Audiological Testing for Children With Autism Spectrum Disorder.,"Purpose One in 59 children is diagnosed with autism spectrum disorder (ASD). Due to overlapping symptoms between hearing loss and ASD, children who are suspected of having ASD require an audiological evaluation to determine their hearing status for the purpose of differential diagnosis. The purpose of this article is twofold: (a) to increase audiologists' knowledge of ASD by discussing the challenges associated with testing and interpreting clinical data for children with ASD or suspected ASD and (b) to provide visual supports that can be used to facilitate audiological assessment. Method Eight children (ages 4-12 years) were recruited as video model participants. Videos were filmed using scripts that used concise and concrete language while portraying common clinical procedures. Using the video models, corresponding visual schedules were also created. Conclusion Although obtaining reliable hearing data from children with ASD is challenging, incorporating visual supports may facilitate testing. Video models and visual schedules have been created and made freely available for download online under a Creative Commons License (Creative Commons-Attribution-NonCommercial-ShareAlike 4.0 International License). Incorporating visual supports during clinical testing has the potential to reduce the child's and family's stress, as well as to increase the probability of obtaining a reliable and comprehensive audiological evaluation. Future research is warranted to determine the effectiveness and feasibility of implementing these tools in audiology clinics. Supplemental Material https://doi.org/10.23641/asha.10086434.",2019-11-05 +29927072,Non-Coding RNA Analysis Using the Rfam Database.,"Rfam is a database of non-coding RNA families in which each family is represented by a multiple sequence alignment, a consensus secondary structure, and a covariance model. Using a combination of manual and literature-based curation and a custom software pipeline, Rfam converts descriptions of RNA families found in the scientific literature into computational models that can be used to annotate RNAs belonging to those families in any DNA or RNA sequence. Valuable research outputs that are often locked up in figures and supplementary information files are encapsulated in Rfam entries and made accessible through the Rfam Web site. The data produced by Rfam have a broad application, from genome annotation to providing training sets for algorithm development. This article gives an overview of how to search and navigate the Rfam Web site, and how to annotate sequences with RNA families. The Rfam database is freely available at http://rfam.org. © 2018 by John Wiley & Sons, Inc.",2018-06-05 +31372399,Environmental & load data: 1:15 Scale tidal turbine subject to a variety of regular wave conditions.,"Experimental data was obtained in order to investigate the effect of waves on the loads and performance of tidal turbines. An instrumented 1:15 scale tidal turbine was installed in the FloWave Ocean Energy Research Facility, and a wide range of regular wave conditions were generated; systematically varying both wave frequency and height. Waves were generated both following and opposing a fixed mean current velocity of 0.81 m/s. Data are made available of the measured turbine loads and environmental conditions obtained for five repeats of 24 wave conditions via https://doi.org/10.7488/ds/2472. A description of the data collection process, data processing, file structure and naming conventions are provided in this article. The analysis and presentation of the described dataset can be found in Ref. [1].",2019-03-07 +29490010,SECLAF: a webserver and deep neural network design tool for hierarchical biological sequence classification.,"

Summary

Artificial intelligence tools are gaining more and more ground each year in bioinformatics. Learning algorithms can be taught for specific tasks by using the existing enormous biological databases, and the resulting models can be used for the high-quality classification of novel, un-categorized data in numerous areas, including biological sequence analysis. Here, we introduce SECLAF, a webserver that uses deep neural networks for hierarchical biological sequence classification. By applying SECLAF for residue-sequences, we have reported [Methods (2018), https://doi.org/10.1016/j.ymeth.2017.06.034] the most accurate multi-label protein classifier to date (UniProt-into 698 classes-AUC 99.99%; Gene Ontology-into 983 classes-AUC 99.45%). Our framework SECLAF can be applied for other sequence classification tasks, as we describe in the present contribution.

Availability and implementation

The program SECLAF is implemented in Python, and is available for download, with example datasets at the website https://pitgroup.org/seclaf/. For Gene Ontology and UniProt based classifications a webserver is also available at the address above.",2018-07-01 +31784228,Infrared complex refractive index of N-containing astrophysical ices free of water processed by cosmic-ray simulated in laboratory.,"Several nitrogen-containing molecules have been unambiguously identified in the Solar System and in the Interstellar Medium. It is believed that such a rich inventory of species is a result of the energetic processing of astrophysical ices during the interaction with ionizing radiation. An intrinsic parameter of matter, the complex refractive index, stores all the ""chemical memory"" triggered by energetic processing, and therefore might be used to probe ice observations in the infrared. In this study, four N-containing ices have been condensed in an ultra-high vacuum chamber and processed by heavy ions (O and Ni) with energies between 0.2 and 15.7 MeV at the Grand Accélérateur National d'Ions Lourds (GANIL), in Caen, France. All chemical changes were monitored in situ by Infrared Absorption Spectroscopy. The complex refractive index was calculated directly from the absorbance spectrum, by using the Lambert-Beer and Kramers-Kroning relations, and the values are available in an online database: https://www1.univap.br/gaa/nkabs-database/data.htm. As a result, other than the database, it was observed that non-polar ices are more destroyed by sputtering than polar ones. Such destruction and chemical evolution lead to variation in the IR albedo of samples addressed in this paper.",2019-11-21 +31754154,Characterization and evaluation of mycosterol secreted from endophytic strain of Gymnema sylvestre for inhibition of α-glucosidase activity.,"Endophytic fungi produce various types of chemicals for establishment of niche within the host plant. Due to symbiotic association, they secrete pharmaceutically important bioactive compounds and enzyme inhibitors. In this research article, we have explored the potent α-glucosidse inhibitor (AGI) produced from Fusarium equiseti recovered from the leaf of Gymnema sylvestre through bioassay-guided fraction. This study investigated the biodiversity, phylogeny, antioxidant activity and α-glucosidse inhibition of endophytic fungi isolated from Gymnema sylvestre. A total of 32 isolates obtained were grouped into 16 genera, according to their morphology of colony and spores. A high biodiversity of endophytic fungi were observed in G. sylvestre with diversity indices. Endophytic fungal strain Fusarium equiseti was identified through DNA sequencing and the sequence was deposited in GenBank database (https://ncbi.nim.nih.gov) with acession number: MF403109. The characterization of potent compound was done by FTIR, LC-ESI-MS and NMR spectroscopic analysis with IUPAC name 17-(5-ethyl-6-methylheptan-2-yl)-10,13-dimethyl-2,3,4,7,8,9,10,11,12,13,14,15,16,17-tetradecahydro-1H-cyclopenta[a] phenanthren-3-ol. The isolated bioactive compound showed significant α-amylase and α-glucosidase inhibition activity with IC50 values, 4.22 ± 0.0005 µg/mL and 69.72 ± 0.001 µg/mL while IC50 values of acarbose was 5.75 ± 0.007 and 55.29 ± 0.0005 µg/mL respectively. This result is higher in comparison to other previous study. The enzyme kinetics study revealed that bioactive compound was competitive inhibitor for α-amylase and α-glucosidase. In-silico study showed that bioactive compound binds to the binding site of α-amylase, similar to that of acarbose but with higher affinity. The study highlights the importance of endophytic fungi as an alternative source of AGI (α-glucosidase inhibition) to control the diabetic condition in vitro.",2019-11-21 +28968638,tmVar 2.0: integrating genomic variant information from literature with dbSNP and ClinVar for precision medicine.,"

Motivation

Despite significant efforts in expert curation, clinical relevance about most of the 154 million dbSNP reference variants (RS) remains unknown. However, a wealth of knowledge about the variant biological function/disease impact is buried in unstructured literature data. Previous studies have attempted to harvest and unlock such information with text-mining techniques but are of limited use because their mutation extraction results are not standardized or integrated with curated data.

Results

We propose an automatic method to extract and normalize variant mentions to unique identifiers (dbSNP RSIDs). Our method, in benchmarking results, demonstrates a high F-measure of ∼90% and compared favorably to the state of the art. Next, we applied our approach to the entire PubMed and validated the results by verifying that each extracted variant-gene pair matched the dbSNP annotation based on mapped genomic position, and by analyzing variants curated in ClinVar. We then determined which text-mined variants and genes constituted novel discoveries. Our analysis reveals 41 889 RS numbers (associated with 9151 genes) not found in ClinVar. Moreover, we obtained a rich set worth further review: 12 462 rare variants (MAF ≤ 0.01) in 3849 genes which are presumed to be deleterious and not frequently found in the general population. To our knowledge, this is the first large-scale study to analyze and integrate text-mined variant data with curated knowledge in existing databases. Our results suggest that databases can be significantly enriched by text mining and that the combined information can greatly assist human efforts in evaluating/prioritizing variants in genomic research.

Availability and implementation

The tmVar 2.0 source code and corpus are freely available at https://www.ncbi.nlm.nih.gov/research/bionlp/Tools/tmvar/.

Contact

zhiyong.lu@nih.gov.",2018-01-01 +31440535,SANAD: Single-label Arabic News Articles Dataset for automatic text categorization.,"Text Classification is one of the most popular Natural Language Processing (NLP) tasks. Text classification (aka categorization) is an active research topic in recent years. However, much less attention was directed towards this task in Arabic, due to the lack of rich representative resources for training an Arabic text classifier. Therefore, we introduce a large Single-labeled Arabic News Articles Dataset (SANAD) of textual data collected from three news portals. The dataset is a large one consisting of almost 200k articles distributed into seven categories that we offer to the research community on Arabic computational linguistics. We anticipate that this rich dataset would make a great aid for a variety of NLP tasks on Modern Standard Arabic (MSA) textual data, especially for single label text classification purposes. We present the data in raw form. SANAD is composed of three main datasets scraped from three news portals, which are AlKhaleej, AlArabiya, and Akhbarona. SANAD is made public and freely available at https://data.mendeley.com/datasets/57zpx667y9.",2019-06-04 +31860062,LOBSTER: an environment to design bioimage analysis workflows for large and complex fluorescence microscopy data.,"

Summary

Open source software such as ImageJ and CellProfiler greatly simplified the quantitative analysis of microscopy images but their applicability is limited by the size, dimensionality and complexity of the images under study. In contrast, software optimized for the needs of specific research projects can overcome these limitations, but they may be harder to find, set up and customize to different needs. Overall, the analysis of large, complex, microscopy images is hence still a critical bottleneck for many Life Scientists. We introduce LOBSTER (Little Objects Segmentation and Tracking Environment), an environment designed to help scientists design and customize image analysis workflows to accurately characterize biological objects from a broad range of fluorescence microscopy images, including large images exceeding workstation main memory. LOBSTER comes with a starting set of over 75 sample image analysis workflows and associated images stemming from state-of-the-art image-based research projects.

Availability and implementation

LOBSTER requires MATLAB (version ≥ 2015a), MATLAB Image processing toolbox, and MATLAB statistics and machine learning toolbox. Code source, online tutorials, video demonstrations, documentation and sample images are freely available from: https://sebastients.github.io.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +31053864,Cistrome-GO: a web server for functional enrichment analysis of transcription factor ChIP-seq peaks.,"Characterizing the ontologies of genes directly regulated by a transcription factor (TF), can help to elucidate the TF's biological role. Previously, we developed a widely used method, BETA, to integrate TF ChIP-seq peaks with differential gene expression (DGE) data to infer direct target genes. Here, we provide Cistrome-GO, a website implementation of this method with enhanced features to conduct ontology analyses of gene regulation by TFs in human and mouse. Cistrome-GO has two working modes: solo mode for ChIP-seq peak analysis; and ensemble mode, which integrates ChIP-seq peaks with DGE data. Cistrome-GO is freely available at http://go.cistrome.org/.",2019-07-01 +32361839,"The IN-DEEP project ""INtegrating and Deriving Evidence, Experiences, Preferences"": a web information model on magnetic resonance imaging for people with multiple sclerosis.","

Introduction

The IN-DEEP project aims to provide people with multiple sclerosis (PwMS) with evidence-based information on magnetic resonance imaging (MRI) in diagnosis and monitoring the disease through a website, and to collect their opinions on the clarity of the website's contents and its usefulness.

Methods and analysis

A multidisciplinary advisory board committee was set up. We investigated the experience, attitude and information needs on MRI through three meetings with 24 PwMS, facilitated by an expert researcher and an observer. We developed the website on the basis of input from PwMS and systematic reviews and guidelines, assessed with AMSTAR and AGREE II. We sought feedback from nine PwMS who pilot-tested the beta-version of the website, during a meeting and through phone interviews and judged whether the contents were clear, understandable and useful, and the website was easily navigable. The website is in Italian.

Results

The website ( https://www.istituto-besta.it/in-deep-risonanza-magnetica2 ) provides two levels of information, different layouts and visualization of data covering MRI diagnostic accuracy, sensitivity and specificity, contents on how MRI can monitor PwMS over time to determine changes in the condition and evaluate treatment effects, practical information on how to prepare for the exam, educational tools and a glossary. The website was judged clear and useful by a sample of PwMS.

Conclusions

The website is a tool to address PwMS information needs on the role of MRI. It could be used by neurologists to facilitate communication with PwMS.",2020-05-02 +30165813,Selection of marker genes for genetic barcoding of microorganisms and binning of metagenomic reads by Barcoder software tools.,"

Background

Metagenomic approaches have revealed the complexity of environmental microbiomes with the advancement in whole genome sequencing displaying a significant level of genetic heterogeneity on the species level. It has become apparent that patterns of superior bioactivity of bacteria applicable in biotechnology as well as the enhanced virulence of pathogens often requires distinguishing between closely related species or sub-species. Current methods for binning of metagenomic reads usually do not allow for identification below the genus level and generally stops at the family level.

Results

In this work, an attempt was made to improve metagenomic binning resolution by creating genome specific barcodes based on the core and accessory genomes. This protocol was implemented in novel software tools available for use and download from http://bargene.bi.up.ac.za /. The most abundant barcode genes from the core genomes were found to encode for ribosomal proteins, certain central metabolic genes and ABC transporters. Performance of metabarcode sequences created by this package was evaluated using artificially generated and publically available metagenomic datasets. Furthermore, a program (Barcoding 2.0) was developed to align reads against barcode sequences and thereafter calculate various parameters to score the alignments and the individual barcodes. Taxonomic units were identified in metagenomic samples by comparison of the calculated barcode scores to set cut-off values. In this study, it was found that varying sample sizes, i.e. number of reads in a metagenome and metabarcode lengths, had no significant effect on the sensitivity and specificity of the algorithm. Receiver operating characteristics (ROC) curves were calculated for different taxonomic groups based on the results of identification of the corresponding genomes in artificial metagenomic datasets. The reliability of distinguishing between species of the same genus or family by the program was nearly perfect.

Conclusions

The results showed that the novel online tool BarcodeGenerator ( http://bargene.bi.up.ac.za /) is an efficient approach for generating barcode sequences from a set of complete genomes provided by users. Another program, Barcoder 2.0 is available from the same resource to enable an efficient and practical use of metabarcodes for visualization of the distribution of organisms of interest in environmental and clinical samples.",2018-08-30 +32631226,"HPG-DHunter: an ultrafast, friendly tool for DMR detection and visualization.","BACKGROUND:Software tools for analyzing DNA methylation do not provide graphical results which can be easily identified, but huge text files containing the alignment of the samples and their methylation status at a resolution of base pairs. There have been proposed different tools and methods for finding Differentially Methylated Regions (DMRs) among different samples, but the execution time required by these tools is large, and the visualization of their results is far from being interactive. Additionally, these methods show more accurate results when identifying simulated DM regions that are long and have small within-group variation, but they have low concordance when used with real datasets, probably due to the different approaches they use for DMR identification. Thus, a tool which automatically detects DMRs among different samples and interactively visualizes DMRs at different scales (from a bunch to ten of millions of DNA locations) can be the key for shortening the DNA methylation analysis process in many studies. RESULTS:In this paper, we propose a software tool based on the wavelet transform. This mathematical tool allows the fast automatic DMR detection by simple comparison of different signals at different resolution levels. Also, it allows an interactive visualization of the DMRs found at different resolution levels. The tool is publicly available at https://grev-uv.github.io/ , and it is part of a complete suite of tools which allow to carry out the complete process of DNA alignment and methylation analysis, creation of methylation maps of the whole genome, and the detection and visualization of DMRs between different samples. CONCLUSIONS:The validation of the developed software tool shows similar concordance with other well-known and extended tools when used with real and synthetic data. The batch mode of the tool is capable of automatically detecting the existing DMRs for half (twelve) of the human chromosomes between two sets of six samples (whose.csv files after the alignment and mapping procedures have an aggregated size of 108 Gigabytes) in around three hours and a half. When compared to other well-known tools, HPG-DHunter only requires around 15% of the execution time required by other tools for detecting the DMRs.",2020-07-06 +29650273,Dose-dense weekly chemotherapy in advanced ovarian cancer: An updated meta-analysis of randomized controlled trials.,"

Objective

The use of dose-dense weekly chemotherapy in the management of advanced ovarian cancer (OC) remains controversial. The aim of this meta-analysis was to evaluate the efficacy of dose-dense regimen to improve clinical outcomes in OC patients with the inclusion of new trials.

Methods

For this updated meta-analysis, PubMed Medline and Scopus databases and meeting proceedings were searched for eligible studies with the limitation of randomized controlled trials, comparing dose-dense chemotherapy versus standard treatment. Trials were grouped in two types of dose-dense chemotherapy: weekly dose-dense (both paclitaxel and carboplatin weekly administration) and semi-weekly dose-dense (weekly paclitaxel and three weekly carboplatin administration). Data were extracted independently and were analyzed using RevMan statistical software version 5.3 (http://www.cochrane.org). Primary end-point was progression-free survival (PFS).

Results

Four randomized controlled trials comprising 3698 patients were identified as eligible. Dose-dense chemotherapy had not a significant benefit on PFS (HR 0.92, 95% CI 0.81-1.04, p = 0.20). When the analysis was restricted to both weekly and semi-weekly dose-dense data, a no significant interaction between dose-dense and standard regimen was confirmed (HR 1.01, 95% CI 0.93-1.10 and HR 0.82, 95% CI 0.63-1.08, respectively).

Conclusions

In the absence of PFS superiority of dose-dense schedule, three weekly schedule should remain the standard of care for advanced OC.",2018-03-07 +31806443,Identification of adverse outcome pathway related to high-density polyethylene microplastics exposure: Caenorhabditis elegans transcription factor RNAi screening and zebrafish study.,"To gain insight into the human health implications of microplastics, in this study, we investigated the possible mechanisms affecting the toxicity of high-density polyethylene (HDPE) in the nematode Caenorhabditis elegans using RNAi screening and a bioinformatics-based unbiased approach. The candidate pathways identified from C. elegans study were also confirmed using vertebrate model, zebrafish, Danio rerio and human relevance was then inferred using Comparative Toxicogenomics Database (CTD) analysis. Prior to evaluating the toxicity, label-free Raman mapping was conducted to investigate whether or not the organisms could uptake HDPE. C. elegans transcription factor RNAi screening results showed that the nucleotide excision repair (NER) and transforming growth factor-beta (TGF-β) signaling pathways were significantly associated with HDPE exposure, which was also confirmed in zebrafish model. Gene-disease interaction analysis using the CTD revealed the possible human health implications of microplastics. Finally, based on this finding, related AOPs were identified from AOP Wiki (http://aopwiki.org), which are ""Peroxisome proliferator-activated receptors γ inactivation leading to lung fibrosis"" and ""AFB1: Mutagenic Mode-of-Action leading to Hepatocellular Carcinoma"". Further studies are needed for the validation of these AOPs with various microplastics.",2019-11-20 +21801404,"Comprehensive, atomic-level characterization of structurally characterized protein-protein interactions: the PICCOLO database.","

Background

Structural studies are increasingly providing huge amounts of information on multi-protein assemblies. Although a complete understanding of cellular processes will be dependent on an explicit characterization of the intermolecular interactions that underlie these assemblies and mediate molecular recognition, these are not well described by standard representations.

Results

Here we present PICCOLO, a comprehensive relational database capturing the details of structurally characterized protein-protein interactions. Interactions are described at the level of interacting pairs of atoms, residues and polypeptide chains, with the physico-chemical nature of the interactions being characterized. Distance and angle terms are used to distinguish 12 different interaction types, including van der Waals contacts, hydrogen bonds and hydrophobic contacts. The explicit aim of PICCOLO is to underpin large-scale analyses of the properties of protein-protein interfaces. This is exemplified by an analysis of residue propensity and interface contact preferences derived from a much larger data set than previously reported. However, PICCOLO also supports detailed inspection of particular systems of interest.

Conclusions

The current PICCOLO database comprises more than 260 million interacting atom pairs from 38,202 protein complexes. A web interface for the database is available at http://www-cryst.bioc.cam.ac.uk/piccolo.",2011-07-29 +,PATH-02. USE OF RNAseq TO IDENTIFY TUMOR CELL AND MICROENVIRONMENT FACTORS ASSOCIATED WITH IMMUNE CYTOLYTIC ACTIVITY IN GLIOBLASTOMA,"Abstract

BACKGROUND

Determinants of immune activation in glioblastoma (GBM) are poorly understood, providing little immunotherapeutic guidance. Using a validated gene expression signature of intratumoral immune cytolytic activity (CYT; Rooney et al, Cell 2015), we sought to determine which tumor-intrinsic and microenvironment factors are independently associated with T-cell effector function in GBM.

METHODS

GlioVis (http://gliovis.bioinfo.cnio.es) was accessed to obtain normalized RNAseq data from The Cancer Genome Atlas (TCGA). For all subjects with treatment-naïve, IDH-wild type GBM, we quantified mRNA expression of 17 pre-specified immune checkpoints, co-stimulatory receptors, and soluble immune inhibitory factors. We also calculated the CYT index (geometric mean of GZMA and PRF1 gene expression) for each subject. Linear regression was performed to determine the relationship between mRNA expression of each of the 17 factors and the CYT index (log-transformed) in univariate analyses. Variables associated with CYT in univariate analysis (p<0.01) were entered into a multivariate linear regression model. Backward selection (exit criterion p>0.05) yielded the final model, which we then validated in an independent cohort of newly diagnosed GBMs subjected to RNAseq (Bao et al, Genome Res 2014; GlioVis).

RESULTS

125 TCGA subjects were identified. The final multivariate linear regression model included the following variables, each positively associated with CYT except for VEGFA, which had a negative association: IL6 (p<0.001), IDO1 (p=0.027), TGFB1 (p=0.003), TNFRSF18 (GITR) (p=0.016), CTLA4 (p=0.001), and VEGFA (p<0.001). In the validation dataset (n=60), all variables remained significantly associated with CYT in multivariate analysis except for TNFRSF18 (GITR).

CONCLUSIONS

We identified multiple immune checkpoints and soluble immune inhibitory factors independently associated with intratumoral CYT in untreated GBM. Further studies are warranted to determine whether each of these factors induces T-cell effector responses in GBM or, conversely, is upregulated as a means of tumor immune evasion. These results may inform rational combinations for immunotherapy in GBM.",2017-11-01 +33133298,Greenhouse gas observations from the Northeast Corridor tower network. ,"We present the organization, structure, instrumentation, and measurements of the Northeast Corridor greenhouse gas observation network. This network of tower-based in situ carbon dioxide and methane observation stations was established in 2015 with the goal of quantifying emissions of these gases in urban areas in the northeastern United States. A specific focus of the network is the cities of Baltimore, MD, and Washington, DC, USA, with a high density of observation stations in these two urban areas. Additional observation stations are scattered throughout the northeastern US, established to complement other existing urban and regional networks and to investigate emissions throughout this complex region with a high population density and multiple metropolitan areas. Data described in this paper are archived at the National Institute of Standards and Technology and can be found at https://doi.org/10.18434/M32126 (Karion et al., 2019).",2020-01-01 +30931480,NetworkAnalyst 3.0: a visual analytics platform for comprehensive gene expression profiling and meta-analysis.,"The growing application of gene expression profiling demands powerful yet user-friendly bioinformatics tools to support systems-level data understanding. NetworkAnalyst was first released in 2014 to address the key need for interpreting gene expression data within the context of protein-protein interaction (PPI) networks. It was soon updated for gene expression meta-analysis with improved workflow and performance. Over the years, NetworkAnalyst has been continuously updated based on community feedback and technology progresses. Users can now perform gene expression profiling for 17 different species. In addition to generic PPI networks, users can now create cell-type or tissue specific PPI networks, gene regulatory networks, gene co-expression networks as well as networks for toxicogenomics and pharmacogenomics studies. The resulting networks can be customized and explored in 2D, 3D as well as Virtual Reality (VR) space. For meta-analysis, users can now visually compare multiple gene lists through interactive heatmaps, enrichment networks, Venn diagrams or chord diagrams. In addition, users have the option to create their own data analysis projects, which can be saved and resumed at a later time. These new features are released together as NetworkAnalyst 3.0, freely available at https://www.networkanalyst.ca.",2019-07-01 +27387097,Modeling in Real Time During the Ebola Response.,"To aid decision-making during CDC's response to the 2014-2016 Ebola virus disease (Ebola) epidemic in West Africa, CDC activated a Modeling Task Force to generate estimates on various topics related to the response in West Africa and the risk for importation of cases into the United States. Analysis of eight Ebola response modeling projects conducted during August 2014-July 2015 provided insight into the types of questions addressed by modeling, the impact of the estimates generated, and the difficulties encountered during the modeling. This time frame was selected to cover the three phases of the West African epidemic curve. Questions posed to the Modeling Task Force changed as the epidemic progressed. Initially, the task force was asked to estimate the number of cases that might occur if no interventions were implemented compared with cases that might occur if interventions were implemented; however, at the peak of the epidemic, the focus shifted to estimating resource needs for Ebola treatment units. Then, as the epidemic decelerated, requests for modeling changed to generating estimates of the potential number of sexually transmitted Ebola cases. Modeling to provide information for decision-making during the CDC Ebola response involved limited data, a short turnaround time, and difficulty communicating the modeling process, including assumptions and interpretation of results. Despite these challenges, modeling yielded estimates and projections that public health officials used to make key decisions regarding response strategy and resources required. The impact of modeling during the Ebola response demonstrates the usefulness of modeling in future responses, particularly in the early stages and when data are scarce. Future modeling can be enhanced by planning ahead for data needs and data sharing, and by open communication among modelers, scientists, and others to ensure that modeling and its limitations are more clearly understood. The activities summarized in this report would not have been possible without collaboration with many U.S. and international partners (http://www.cdc.gov/vhf/ebola/outbreaks/2014-west-africa/partners.html).",2016-07-08 +26065909,zflncRNApedia: A Comprehensive Online Resource for Zebrafish Long Non-Coding RNAs.,"Recent transcriptome annotation using deep sequencing approaches have annotated a large number of long non-coding RNAs in zebrafish, a popular model organism for human diseases. These studies characterized lncRNAs in critical developmental stages as well as adult tissues. Each of the studies has uncovered a distinct set of lncRNAs, with minor overlaps. The availability of the raw RNA-Seq datasets in public domain encompassing critical developmental time-points and adult tissues provides us with a unique opportunity to understand the spatiotemporal expression patterns of lncRNAs. In the present report, we created a catalog of lncRNAs in zebrafish, derived largely from the three annotation sets, as well as manual curation of literature to compile a total of 2,267 lncRNA transcripts in zebrafish. The lncRNAs were further classified based on the genomic context and relationship with protein coding gene neighbors into 4 categories. Analysis revealed a total of 86 intronic, 309 promoter associated, 485 overlapping and 1,386 lincRNAs. We created a comprehensive resource which houses the annotation of lncRNAs as well as associated information including expression levels, promoter epigenetic marks, genomic variants and retroviral insertion mutants. The resource also hosts a genome browser where the datasets could be browsed in the genome context. To the best of our knowledge, this is the first comprehensive resource providing a unified catalog of lncRNAs in zebrafish. The resource is freely available at URL: http://genome.igib.res.in/zflncRNApedia.",2015-06-11 +30423080,DREAM-Yara: an exact read mapper for very large databases with short update time.,"

Motivation

Mapping-based approaches have become limited in their application to very large sets of references since computing an FM-index for very large databases (e.g. >10 GB) has become a bottleneck. This affects many analyses that need such index as an essential step for approximate matching of the NGS reads to reference databases. For instance, in typical metagenomics analysis, the size of the reference sequences has become prohibitive to compute a single full-text index on standard machines. Even on large memory machines, computing such index takes about 1 day of computing time. As a result, updates of indices are rarely performed. Hence, it is desirable to create an alternative way of indexing while preserving fast search times.

Results

To solve the index construction and update problem we propose the DREAM (Dynamic seaRchablE pArallel coMpressed index) framework and provide an implementation. The main contributions are the introduction of an approximate search distributor via a novel use of Bloom filters. We combine several Bloom filters to form an interleaved Bloom filter and use this new data structure to quickly exclude reads for parts of the databases where they cannot match. This allows us to keep the databases in several indices which can be easily rebuilt if parts are updated while maintaining a fast search time. The second main contribution is an implementation of DREAM-Yara a distributed version of a fully sensitive read mapper under the DREAM framework.

Availability and implementation

https://gitlab.com/pirovc/dream_yara/.",2018-09-01 +29982705,Generating genomic platforms to study Candida albicans pathogenesis.,"The advent of the genomic era has made elucidating gene function on a large scale a pressing challenge. ORFeome collections, whereby almost all ORFs of a given species are cloned and can be subsequently leveraged in multiple functional genomic approaches, represent valuable resources toward this endeavor. Here we provide novel, genome-scale tools for the study of Candida albicans, a commensal yeast that is also responsible for frequent superficial and disseminated infections in humans. We have generated an ORFeome collection composed of 5099 ORFs cloned in a Gateway™ donor vector, representing 83% of the currently annotated coding sequences of C. albicans. Sequencing data of the cloned ORFs are available in the CandidaOrfDB database at http://candidaorfeome.eu. We also engineered 49 expression vectors with a choice of promoters, tags and selection markers and demonstrated their applicability to the study of target ORFs transferred from the C. albicans ORFeome. In addition, the use of the ORFeome in the detection of protein-protein interaction was demonstrated. Mating-compatible strains as well as Gateway™-compatible two-hybrid vectors were engineered, validated and used in a proof of concept experiment. These unique and valuable resources should greatly facilitate future functional studies in C. albicans and the elucidation of mechanisms that underlie its pathogenicity.",2018-08-01 +31741225,AGONOTES: A Robot Annotator for Argonaute Proteins.,"The argonaute protein (Ago) exists in almost all organisms. In eukaryotes, it functions as a regulatory system for gene expression. In prokaryotes, it is a type of defense system against foreign invasive genomes. The Ago system has been engineered for gene silencing and genome editing and plays an important role in biological studies. With an increasing number of genomes and proteomes of various microbes becoming available, computational tools for identifying and annotating argonaute proteins are urgently needed. We introduce AGONOTES (Argonaute Notes). It is a web service especially designed for identifying and annotating Ago. AGONOTES uses the BLASTP similarity search algorithm to categorize all submitted proteins into three groups: prokaryotic argonaute protein (pAgo), eukaryotic argonaute protein (eAgo), and non-argonaute protein (non-Ago). Argonaute proteins can then be aligned to the corresponding standard set of Ago sequences using the multiple sequence alignment program MUSCLE. All functional domains of Ago can further be curated from the alignment results and visualized easily through Bio::Graphic modules in the BioPerl bundle. Compared with existing tools such as CD-Search and available databases such as UniProt and AGONOTES showed a much better performance on domain annotations, which is fundamental in studying the new Ago. AGONOTES can be freely accessed at http://i.uestc.edu.cn/agonotes/. AGONOTES is a friendly tool for annotating Ago domains from a proteome or a series of protein sequences.",2019-11-18 +32995380,Tobacco control challenges in China: Big data analysis of online tobacco marketing information.,"

Objective

To develop tobacco control strategies by analyzing online tobacco marketing information in China.

Methods

Using web-crawler software, this study acquired 106,485 pieces of online tobacco marketing information published on 11 different Internet platforms including Weibo, WeChat, Baidu, etc., from January-June 2018. The data were used to investigate the characteristics and social networks of online tobacco marketing via content and social network analysis.

Results

The total volume of online tobacco marketing during the study period was high, showing a positive trend. Of all the marketing subjects, those involving ""flavor capsule"", ""Marlboro"", and ""Esse"" were the most popular. The Weibo platform had the highest volume of online tobacco marketing information as well as the largest proportion of explicit marketing information. This was followed by other social media platforms such as Baidu Search, Baidu Tieba, and Xiaohongshu, where implicit marketing information predominated. The overall network structure of tobacco websites exhibited a significant centralization feature, where traditional and novel tobacco websites formed two clusters with almost no intersections. The China Tobacco Science and Education Website (http://www.tobaccoinfo.com.cn/) and E-Cigarette Home (http://ecigm.com/) were the two nodes of the highest degree centrality within the respective ""circle"", while the China Tobacco Monopoly Bureau Website (http://www.tobacco.gov.cn/) was the node with the highest closeness centrality. By contrast, Baidu Tieba's overall network structure was more decentralized, and the degree of correlation between different nodes was relatively low.

Conclusion

Online tobacco marketing demonstrated high volumes and wide coverage, and an intertwined network, thereby creating major obstacles for tobacco control. To address this issue, the government should strengthen network supervision of tobacco marketing and revise its current regulations. Meanwhile, Internet platforms should improve self-regulation by comprehensively removing and blocking tobacco-related information. Lastly, the media and public should advocate associated policies and support Internet platform supervision.",2020-07-04 +29216377,PeachVar-DB: A Curated Collection of Genetic Variations for the Interactive Analysis of Peach Genome Data.,"Applying next-generation sequencing (NGS) technologies to species of agricultural interest has the potential to accelerate the understanding and exploration of genetic resources. The storage, availability and maintenance of huge quantities of NGS-generated data remains a major challenge. The PeachVar-DB portal, available at http://hpc-bioinformatics.cineca.it/peach, is an open-source catalog of genetic variants present in peach (Prunus persica L. Batsch) and wild-related species of Prunus genera, annotated from 146 samples publicly released on the Sequence Read Archive (SRA). We designed a user-friendly web-based interface of the database, providing search tools to retrieve single nucleotide polymorphism (SNP) and InDel variants, along with useful statistics and information. PeachVar-DB results are linked to the Genome Database for Rosaceae (GDR) and the Phytozome database to allow easy access to other external useful plant-oriented resources. In order to extend the genetic diversity covered by the PeachVar-DB further, and to allow increasingly powerful comparative analysis, we will progressively integrate newly released data.",2018-01-01 +31992710,"Genome-wide rare variant analysis for thousands of phenotypes in over 70,000 exomes from two cohorts.","Understanding the impact of rare variants is essential to understanding human health. We analyze rare (MAF < 0.1%) variants against 4264 phenotypes in 49,960 exome-sequenced individuals from the UK Biobank and 1934 phenotypes (1821 overlapping with UK Biobank) in 21,866 members of the Healthy Nevada Project (HNP) cohort who underwent Exome + sequencing at Helix. After using our rare-variant-tailored methodology to reduce test statistic inflation, we identify 64 statistically significant gene-based associations in our meta-analysis of the two cohorts and 37 for phenotypes available in only one cohort. Singletons make significant contributions to our results, and the vast majority of the associations could not have been identified with a genotyping chip. Our results are available for interactive browsing in a webapp (https://ukb.research.helix.com). This comprehensive analysis illustrates the biological value of large, deeply phenotyped cohorts of unselected populations coupled with NGS data.",2020-01-28 +29297289,dBBQs: dataBase of Bacterial Quality scores.,"BACKGROUND:It is well-known that genome sequencing technologies are becoming significantly cheaper and faster. As a result of this, the exponential growth in sequencing data in public databases allows us to explore ever growing large collections of genome sequences. However, it is less known that the majority of available sequenced genome sequences in public databases are not complete, drafts of varying qualities. We have calculated quality scores for around 100,000 bacterial genomes from all major genome repositories and put them in a fast and easy-to-use database. RESULTS:Prokaryotic genomic data from all sources were collected and combined to make a non-redundant set of bacterial genomes. The genome quality score for each was calculated by four different measurements: assembly quality, number of rRNA and tRNA genes, and the occurrence of conserved functional domains. The dataBase of Bacterial Quality scores (dBBQs) was designed to store and retrieve quality scores. It offers fast searching and download features which the result can be used for further analysis. In addition, the search results are shown in interactive JavaScript chart framework using DC.js. The analysis of quality scores across major public genome databases find that around 68% of the genomes are of acceptable quality for many uses. CONCLUSIONS:dBBQs (available at http://arc-gem.uams.edu/dbbqs ) provides genome quality scores for all available prokaryotic genome sequences with a user-friendly Web-interface. These scores can be used as cut-offs to get a high-quality set of genomes for testing bioinformatics tools or improving the analysis. Moreover, all data of the four measurements that were combined to make the quality score for each genome, which can potentially be used for further analysis. dBBQs will be updated regularly and is freely use for non-commercial purpose.",2017-12-28 +28210891,Aquatic monitoring programs conducted during environmental impact assessments in Canada: preliminary assessment before and after weakened environmental regulation.,"Aquatic monitoring programs are imperative for the functioning of the environmental impact assessment (EIA) process and a cornerstone for industrial compliance in Canada. However, in 2012, several leading pieces of federal environmental legislation (e.g., Canadian Environmental Assessment Act c.19, s. 52, 2012) were drastically altered, effectively weakening levels of environmental protection for aquatic ecosystems during project developments. This paper assesses the impact of CEAA 2012 on aquatic monitoring programs (and subsequent monitoring data reporting) across Canada for ten projects (five completed pre-CEAA 2012 and five completed post-CEAA 2012). Projects included four energy and six mining projects and were selected based on the following criteria: (i) representative of Canada's resource economy; (ii) project information was publicly available; and (iii) strong public interest. Projects pre- and post-CEAA 2012 exhibited few apparent differences before and after environmental regulatory changes. However, wide discrepancies exist in numbers and types of parameters reported, along with a lack of consistency in reporting. Projects pre-CEAA 2012 provided more follow-up monitoring commitments. Although qualitative differences remain inconclusive, this paper highlights requirements for further assessment of aquatic monitoring and follow-up programs in Canada. Recommendations for the government to consider during reviews of the federal environmental assessment processes include (i) improved transparency on the Canadian Environmental Assessment Agency website ( https://www.ceaa-acee.gc.ca/ ); (ii) creation of a legally binding standardized aquatic monitoring program framework to ensure that all Canadian aquatic ecosystems are monitored with equal rigour; and (iii) commitments and justification related to frequency of aquatic monitoring of water quality.",2017-02-16 +29329100,A Web-Based System for Bayesian Benchmark Dose Estimation.,"

Background

Benchmark dose (BMD) modeling is an important step in human health risk assessment and is used as the default approach to identify the point of departure for risk assessment. A probabilistic framework for dose-response assessment has been proposed and advocated by various institutions and organizations; therefore, a reliable tool is needed to provide distributional estimates for BMD and other important quantities in dose-response assessment.

Objectives

We developed an online system for Bayesian BMD (BBMD) estimation and compared results from this software with U.S. Environmental Protection Agency's (EPA's) Benchmark Dose Software (BMDS).

Methods

The system is built on a Bayesian framework featuring the application of Markov chain Monte Carlo (MCMC) sampling for model parameter estimation and BMD calculation, which makes the BBMD system fundamentally different from the currently prevailing BMD software packages. In addition to estimating the traditional BMDs for dichotomous and continuous data, the developed system is also capable of computing model-averaged BMD estimates.

Results

A total of 518 dichotomous and 108 continuous data sets extracted from the U.S. EPA's Integrated Risk Information System (IRIS) database (and similar databases) were used as testing data to compare the estimates from the BBMD and BMDS programs. The results suggest that the BBMD system may outperform the BMDS program in a number of aspects, including fewer failed BMD and BMDL calculations and estimates.

Conclusions

The BBMD system is a useful alternative tool for estimating BMD with additional functionalities for BMD analysis based on most recent research. Most importantly, the BBMD has the potential to incorporate prior information to make dose-response modeling more reliable and can provide distributional estimates for important quantities in dose-response assessment, which greatly facilitates the current trend for probabilistic risk assessment. https://doi.org/10.1289/EHP1289.",2018-01-11 +28748223,PhenoPlasm: a database of disruption phenotypes for malaria parasite genes.,"Two decades after the first Plasmodium transfection, attempts have been made to disrupt more than 3,151 genes in malaria parasites, across five Plasmodium species. While results from rodent malaria transfections have been curated and systematised, empowering large-scale analysis, phenotypic data from human malaria parasite transfections currently exists as individual reports scattered across a the literature. To facilitate systematic analysis of published experimental genetic data across Plasmodium species, we have built PhenoPlasm ( http://www.phenoplasm.org), a database of phenotypes generated by transfection experiments in all Plasmodium parasites. The site provides a simple interface linking citation-backed Plasmodium reverse-genetic phenotypes to gene IDs. The database has been populated with phenotypic data on 367 P. falciparum genes, curated from 176 individual publications, as well as existing data on rodent Plasmodium species from RMgmDB and PlasmoGEM. This is the first time that all available data on P. falciparum transfection experiments has been brought together in a single place. These data are presented using ortholog mapping to allow a researcher interested in a gene in one species to see results across other Plasmodium species. The collaborative nature of the database enables any researcher to add new phenotypes as they are discovered. As an example of database utility, we use the currently available datasets to identify RAP (RNA-binding domain abundant in Apicomplexa)-domain containing proteins as crucial to parasite survival.",2017-07-24 +25332401,Proteome TopFIND 3.0 with TopFINDer and PathFINDer: database and analysis tools for the association of protein termini to pre- and post-translational events.,"The knowledgebase TopFIND is an analysis platform focussed on protein termini, their origin, modification and hence their role on protein structure and function. Here, we present a major update to TopFIND, version 3, which includes a 70% increase in the underlying data to now cover a 90,696 proteins, 165,044 N-termini, 130,182 C-termini, 14,382 cleavage sites and 33,209 substrate cleavages in H. sapiens, M. musculus, A. thaliana, S. cerevisiae and E. coli. New features include the mapping of protein termini and cleavage entries across protein isoforms and significantly, the mapping of protein termini originating from alternative transcription and alternative translation start sites. Furthermore, two analysis tools for complex data analysis based on the TopFIND resource are now available online: TopFINDer, the TopFIND ExploRer, characterizes and annotates proteomics-derived N- or C-termini sets for their origin, sequence context and implications for protein structure and function. Neo-termini are also linked to associated proteases. PathFINDer identifies indirect connections between a protease and list of substrates or termini thus supporting the evaluation of complex proteolytic processes in vivo. To demonstrate the utility of the tools, a recent N-terminomics data set of inflamed murine skin has been re-analyzed. In re-capitulating the major findings originally performed manually, this validates the utility of these new resources. The point of entry for the resource is http://clipserve.clip.ubc.ca/topfind from where the graphical interface, all application programming interfaces (API) and the analysis tools are freely accessible.",2014-10-20 +32616503,Y Chromosome LncRNA Are Involved in Radiation Response of Male Non-Small Cell Lung Cancer Cells.,"Numerous studies have implicated changes in the Y chromosome in male cancers, yet few have investigated the biological importance of Y chromosome noncoding RNA. Here we identify a group of Y chromosome-expressed long noncoding RNA (lncRNA) that are involved in male non-small cell lung cancer (NSCLC) radiation sensitivity. Radiosensitive male NSCLC cell lines demonstrated a dose-dependent induction of linc-SPRY3-2/3/4 following irradiation, which was not observed in radioresistant male NSCLC cell lines. Cytogenetics revealed the loss of chromosome Y (LOY) in the radioresistant male NSCLC cell lines. Gain- and loss-of-function experiments indicated that linc-SPRY3-2/3/4 transcripts affect cell viability and apoptosis. Computational prediction of RNA binding proteins (RBP) motifs and UV-cross-linking and immunoprecipitation (CLIP) assays identified IGF2BP3, an RBP involved in mRNA stability, as a binding partner for linc-SPRY3-2/3/4 RNA. The presence of linc-SPRY3-2/3/4 reduced the half-life of known IGF2BP3 binding mRNA, such as the antiapoptotic HMGA2 mRNA, as well as the oncogenic c-MYC mRNA. Assessment of Y chromosome in NSCLC tissue microarrays and expression of linc-SPRY3-2/3/4 in NSCLC RNA-seq and microarray data revealed a negative correlation between the loss of the Y chromosome or linc-SPRY3-2/3/4 and overall survival. Thus, linc-SPRY3-2/3/4 expression and LOY could represent an important marker of radiotherapy in NSCLC. SIGNIFICANCE: This study describes previously unknown Y chromosome-expressed lncRNA regulators of radiation response in male NSCLC and show a correlation between loss of chromosome Y and radioresistance. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/19/4046/F1.large.jpg.",2020-07-02 +32673511,Flaring from Unconventional Oil and Gas Development and Birth Outcomes in the Eagle Ford Shale in South Texas.,"

Background

Prior studies suggest exposure to oil and gas development (OGD) adversely affects birth outcomes, but no studies have examined flaring-the open combustion of natural gas-from OGD.

Objectives

We investigated whether residential proximity to flaring from OGD was associated with shorter gestation and reduced fetal growth in the Eagle Ford Shale of south Texas.

Methods

We conducted a retrospective cohort study using administrative birth records from 2012 to 2015 (N=23,487) and satellite observations of flaring activity during pregnancy within 5km of maternal residence. Multivariate logistic and linear regression models were used to estimate associations between four outcomes (preterm birth, small-for-gestational age, continuous gestational age, and term birthweight) and exposure to a low (1-9) or high (≥10) number of nightly flare events, as compared with no exposure, while controlling for known maternal risk factors. We also examined associations with the number of oil and gas wells within 5km using data from DrillingInfo (now Enverus).

Results

Exposure to a high number of nightly flare events was associated with a 50% higher odds of preterm birth [odds ratio (OR)=1.50 (95% CI: 1.23, 1.83)] and shorter gestation [mean difference=-1.9 (95% CI: -2.8, -0.9) d] compared with no exposure. Effect estimates were slightly reduced after adjustment for the number of wells within 5km. In stratified models these associations were present only among Hispanic women. Flaring and fetal growth outcomes were not significantly associated. Women exposed to a high number of wells (fourth quartile, ≥27) vs. no wells within 5km had a higher odds of preterm birth [OR=1.31 (95% CI: 1.14, 1.49)], shorter gestation [-1.3 (95% CI: -1.9, -0.8) d], and lower average birthweight [-19.4 (95% CI: -36.7, -2.0) g].

Discussion

Our study suggests exposure to flaring from OGD is associated with an increased risk of preterm birth. Our findings need to be confirmed in other populations. https://doi.org/10.1289/EHP6394.",2020-07-15 +28049134,miRandb: a resource of online services for miRNA research.,"Recent discovery of thousands of small and large noncoding RNAs, in parallel to technical improvements enabling scientists to study the transcriptome in much higher depth, has resulted in massive data generation. This burst of information prompts the development of easily accessible resources for storage, retrieval and analysis of raw and processed data, and hundreds of Web-based tools dedicated to these tasks have been made available. However, the increasing number and diversity of bioinformatics tools, each covering a specific and specialized area, as well as their redundancies, represent potential sources of complication for end users. To overcome these issues, we are introducing an easy-to-follow classification of microRNA (miRNA)-related bioinformatics tools for biologists interested in studying this important class of small noncoding RNAs. We also developed our miRNA database miRNA algorithmic network database (miRandb) that is a meta-database, which presents a survey of > 180 Web-based miRNA databases. These include miRNA sequence, discovery, target prediction, target validation, expression and regulation, functions and their roles in diseases, interactions in cellular pathways and networks and deep sequencing. miRandb recapitulates the diverse possibilities and facilitates that access to the different categories of miRNA resources. Researchers can easily select the category of miRNA information and desired organism, in result eligible databases with their features are presented. This database introducing an easy-to-follow classification of available resources that can facilitate selection of appropriate resources for miRNA-related bioinformatics tools. Finally, we described current shortages and future necessities that assist researchers to use these tools easily. Our database is accessible at http://mirandb.ir.",2018-03-01 +31663680,Bovine milk transcriptome analysis reveals microRNAs and RNU2 involved in mastitis.,"Mastitis is a common inflammatory infectious disease in dairy cows. To understand the microRNA (miRNA) expression profile changes during bovine mastitis, we undertook a genome-wide miRNA study of normal milk and milk that tested positive on the California mastitis test for bovine mastitis (CMT+). Twenty-five miRNAs were differentially expressed (23 miRNAs upregulated and two downregulated) during bovine mastitis relative to their expression in normal milk. Upregulated mature miR-1246 probably derived from a U2 small nuclear RNA rather than an miR-1246 precursor. The significantly upregulated miRNA precursors and RNU2 were significantly enriched on bovine chromosome 19, which is homologous to human chromosome 17. A gene ontology analysis of the putative mRNA targets of the significantly upregulated miRNAs showed that these miRNAs were involved in binding target mRNA transcripts and regulating target gene expression, and a Kyoto Encyclopedia of Genes and Genomes pathway analysis showed that the upregulated miRNAs were predominantly related to cancer and immune system pathways. Three novel miRNAs were associated with bovine mastitis and were relatively highly expressed in milk. We confirmed that one of the novel mastitis-related miRNAs was significantly upregulated using a digital PCR system. The differentially expressed miRNAs were involved in human cancers, infections, and immune-related diseases. The genome-wide analysis of miRNA profiles in this study provides insight into bovine mastitis and inflammatory diseases. DATABASES: The miRNAseq generated for this study can be found in the Sequence Read Archive (SRA) under BioProject Number PRJNA421075 and SRA Study Number SRP126134 (https://www.ncbi.nlm.nih.gov/bioproject/PRJNA421075).",2019-11-15 +31691385,The Clinical Genome and Ancestry Report: An interactive web application for prioritizing clinically implicated variants from genome sequencing data with ancestry composition.,"Genome sequencing is positioned as a routine clinical work-up for diverse clinical conditions. A commonly used approach to highlight candidate variants with potential clinical implication is to search over locus- and gene-centric knowledge databases. Most web-based applications allow a federated query across diverse databases for a single variant; however, sifting through a large number of genomic variants with combination of filtering criteria is a substantial challenge. Here we describe the Clinical Genome and Ancestry Report (CGAR), an interactive web application developed to follow clinical interpretation workflows by organizing variants into seven categories: (1) reported disease-associated variants, (2) rare- and high-impact variants in putative disease-associated genes, (3) secondary findings that the American College of Medical Genetics and Genomics recommends reporting back to patients, (4) actionable pharmacogenomic variants, (5) focused reports for candidate genes, (6) de novo variant candidates for trio analysis, and (7) germline and somatic variants implicated in cancer risk, diagnosis, treatment and prognosis. For each variant, a comprehensive list of external links to variant-centric and phenotype databases are provided. Furthermore, genotype-derived ancestral composition is used to highlight allele frequencies from a matched population since some disease-associated variants show a wide variation between populations. CGAR is an open-source software and is available at https://tom.tch.harvard.edu/apps/cgar/.",2019-11-15 +31732673,Protein-assisted RNA fragment docking (RnaX) for modeling RNA-protein interactions using ModelX.,"RNA-protein interactions are crucial for such key biological processes as regulation of transcription, splicing, translation, and gene silencing, among many others. Knowing where an RNA molecule interacts with a target protein and/or engineering an RNA molecule to specifically bind to a protein could allow for rational interference with these cellular processes and the design of novel therapies. Here we present a robust RNA-protein fragment pair-based method, termed RnaX, to predict RNA-binding sites. This methodology, which is integrated into the ModelX tool suite (http://modelx.crg.es), takes advantage of the structural information present in all released RNA-protein complexes. This information is used to create an exhaustive database for docking and a statistical forcefield for fast discrimination of true backbone-compatible interactions. RnaX, together with the protein design forcefield FoldX, enables us to predict RNA-protein interfaces and, when sufficient crystallographic information is available, to reengineer the interface at the sequence-specificity level by mimicking those conformational changes that occur on protein and RNA mutagenesis. These results, obtained at just a fraction of the computational cost of methods that simulate conformational dynamics, open up perspectives for the engineering of RNA-protein interfaces.",2019-11-15 +25502817,HGV&TB: a comprehensive online resource on human genes and genetic variants associated with tuberculosis.,"Tuberculosis (TB) is an infectious disease caused by fastidious pathogen Mycobacterium tuberculosis. TB has emerged as one of the major causes of mortality in the developing world. Role of host genetic factors that modulate disease susceptibility have not been studied widely. Recent studies have reported few genetic loci that provide impetus to this area of research. The availability of tools has enabled genome-wide scans for disease susceptibility loci associated with infectious diseases. Till now, information on human genetic variations and their associated genes that modulate TB susceptibility have not been systematically compiled. In this work, we have created a resource: HGV&TB, which hosts genetic variations reported to be associated with TB susceptibility in humans. It currently houses information on 307 variations in 98 genes. In total, 101 of these variations are exonic, whereas 78 fall in intronic regions. We also analysed the pathogenicity of the genetic variations, their phenotypic consequences and ethnic origin. Using various computational analyses, 30 variations of the 101 exonic variations were predicted to be pathogenic. The resource is freely available at http://genome.igib.res.in/hgvtb/index.html. Using integrative analysis, we have shown that the disease associated variants are selectively enriched in the immune signalling pathways which are crucial in the pathophysiology of TB. Database URL: http://genome.igib.res.in/hgvtb/index.html",2014-12-13 +24198245,PubChem BioAssay: 2014 update.,"PubChem's BioAssay database (http://pubchem.ncbi.nlm.nih.gov) is a public repository for archiving biological tests of small molecules generated through high-throughput screening experiments, medicinal chemistry studies, chemical biology research and drug discovery programs. In addition, the BioAssay database contains data from high-throughput RNA interference screening aimed at identifying critical genes responsible for a biological process or disease condition. The mission of PubChem is to serve the community by providing free and easy access to all deposited data. To this end, PubChem BioAssay is integrated into the National Center for Biotechnology Information retrieval system, making them searchable by Entrez queries and cross-linked to other biomedical information archived at National Center for Biotechnology Information. Moreover, PubChem BioAssay provides web-based and programmatic tools allowing users to search, access and analyze bioassay test results and metadata. In this work, we provide an update for the PubChem BioAssay resource, such as information content growth, new developments supporting data integration and search, and the recently deployed PubChem Upload to streamline chemical structure and bioassay submissions.",2013-11-05 +33828764,"From lab-based studies to eye-tracking in virtual and real worlds: conceptual and methodological problems and solutions. Symposium 4 at the 20th European Conference on Eye Movement Research (ECEM) in Alicante, 20.8.2019. ","Wearable mobile eye trackers have great potential as they allow the measurement of eye movements during daily activities such as driving, navigating the world and doing groceries. Although mobile eye trackers have been around for some time, developing and operating these eye trackers was generally a highly technical affair. As such, mobile eye-tracking research was not feasible for most labs. Nowadays, many mobile eye trackers are available from eye-tracking manufacturers (e.g. Tobii, Pupil labs, SMI, Ergoneers) and various implementations in virtual/augmented reality have recently been released.The wide availability has caused the number of publications using a mobile eye tracker to increase quickly. Mobile eye tracking is now applied in vision science, educational science, developmental psychology, marketing research (using virtual and real supermarkets), clinical psychology, usability, architecture, medicine, and more. Yet, transitioning from lab-based studies where eye trackers are fixed to the world to studies where eye trackers are fixed to the head presents researchers with a number of problems. These problems range from the conceptual frameworks used in world-fixed and head-fixed eye tracking and how they relate to each other, to the lack of data quality comparisons and field tests of the different mobile eye trackers and how the gaze signal can be classified or mapped to the visual stimulus. Such problems need to be addressed in order to understand how world-fixed and head-fixed eye-tracking research can be compared and to understand the full potential and limits of what mobile eye-tracking can deliver. In this symposium, we bring together presenting researchers from five different institutions (Lund University, Utrecht University, Clemson University, Birkbeck University of London and Rochester Institute of Technology) addressing problems and innovative solutions across the entire breadth of mobile eye-tracking research. Hooge, presenting Hessels et al. paper, focus on the definitions of fixations and saccades held by researchers in the eyemovement field and argue how they need to be clarified in order to allow comparisons between world-fixed and head-fixed eye-tracking research. - Diaz et al. introduce machine-learning techniques for classifying the gaze signal in mobile eye-tracking contexts where head and body are unrestrained. Niehorster et al. compare data quality of mobile eye trackers during natural behavior and discuss the application range of these eye trackers. Duchowski et al. introduce a method for automatically mapping gaze to faces using computer vision techniques. Pelz et al. employ state-of-the-art techniques to map fixations to objects of interest in the scene video and align grasp and eye-movement data in the same reference frame to investigate the guidance of eye movements during manual interaction. Video stream: https://vimeo.com/357473408.",2019-11-25 +28095366,Cryptosporidium hominis gene catalog: a resource for the selection of novel Cryptosporidium vaccine candidates. ,"Human cryptosporidiosis, caused primarily by Cryptosporidium hominis and a subset of Cryptosporidium parvum, is a major cause of moderate-to-severe diarrhea in children under 5 years of age in developing countries and can lead to nutritional stunting and death. Cryptosporidiosis is particularly severe and potentially lethal in immunocompromised hosts. Biological and technical challenges have impeded traditional vaccinology approaches to identify novel targets for the development of vaccines against C. hominis, the predominant species associated with human disease. We deemed that the existence of genomic resources for multiple species in the genus, including a much-improved genome assembly and annotation for C. hominis, makes a reverse vaccinology approach feasible. To this end, we sought to generate a searchable online resource, termed C. hominis gene catalog, which registers all C. hominis genes and their properties relevant for the identification and prioritization of candidate vaccine antigens, including physical attributes, properties related to antigenic potential and expression data. Using bioinformatic approaches, we identified ∼400 C. hominis genes containing properties typical of surface-exposed antigens, such as predicted glycosylphosphatidylinositol (GPI)-anchor motifs, multiple transmembrane motifs and/or signal peptides targeting the encoded protein to the secretory pathway. This set can be narrowed further, e.g. by focusing on potential GPI-anchored proteins lacking homologs in the human genome, but with homologs in the other Cryptosporidium species for which genomic data are available, and with low amino acid polymorphism. Additional selection criteria related to recombinant expression and purification include minimizing predicted post-translation modifications and potential disulfide bonds. Forty proteins satisfying these criteria were selected from 3745 proteins in the updated C. hominis annotation. The immunogenic potential of a few of these is currently being tested.Database URL: http://cryptogc.igs.umaryland.edu.",2016-10-19 +29229304,Functional characterization of the G162R and D216H genetic variants of human CYP17A1.,"Cytochrome P450 17A1 (CYP17A1) is a dual-function enzyme catalyzing reactions necessary for cortisol and androgen biosynthesis. CYP17A1 is a validated drug target for prostate cancer as CYP17A1 inhibition significantly reduces circulating androgens and improves survival in castration-resistant prostate cancer. Germline CYP17A1 genetic variants with altered CYP17A1 activity manifesting as various endocrinopathies are extremely rare; however, characterizing these variants provides critical insights into CYP17A1 protein structure and function. By querying the dbSNP online database and publically available data from the 1000 genomes project (http://browser.1000genomes.org), we identified two CYP17A1 nonsynonymous genetic variants with unknown consequences for enzymatic activity and stability. We hypothesized that the resultant amino acid changes would alter CYP17A1 stability or activity. To test this hypothesis, we utilized a HEK-293T cell-based expression system to characterize the functional consequences of two CYP17A1 variants, D216H (rs200063521) and G162R (rs141821705). Cells transiently expressing the D216H variant demonstrate a selective impairment of 16α-hydroxyprogesterone synthesis by 2.1-fold compared to wild-type (WT) CYP17A1, while no effect on 17α-hydroxyprogesterone synthesis was observed. These data suggest that substrate orientations in the active site might be altered with this amino acid substitution. In contrast, the G162R substitution exhibits decreased CYP17A1 protein stability compared to WT with a near 70% reduction in protein levels as determined by immunoblot analysis. This variant is preferentially ubiquitinated and degraded prematurely, with an enzyme half-life calculated to be ∼2.5 h, and proteasome inhibitor treatment recovers G162R protein expression to WT levels. Together, these data provide new insights into CYP17A1 structure-function and stability mechanisms.",2017-12-09 +32174008,Zeta-Potential Read-Across Model Utilizing Nanodescriptors Extracted via the NanoXtract Image Analysis Tool Available on the Enalos Nanoinformatics Cloud Platform.,"Zeta potential is one of the most critical properties of nanomaterials (NMs) which provides an estimation of the surface charge, and therefore electrostatic stability in medium and, in practical terms, influences the NM's tendency to form agglomerates and to interact with cellular membranes. This paper describes a robust and accurate read-across model to predict NM zeta potential utilizing as the input data a set of image descriptors derived from transmission electron microscopy (TEM) images of the NMs. The image descriptors are calculated using NanoXtract (http://enaloscloud.novamechanics.com/EnalosWebApps/NanoXtract/), a unique online tool that generates 18 image descriptors from the TEM images, which can then be explored by modeling to identify those most predictive of NM behavior and biological effects. NM TEM images are used to develop a model for prediction of zeta potential based on grouping of the NMs according to their nearest neighbors. The model provides interesting insights regarding the most important similarity features between NMs-in addition to core composition the main elongation emerged, which links to key drivers of NM toxicity such as aspect ratio. Both the NanoXtract image analysis tool and the validated model for zeta potential (http://enaloscloud.novamechanics.com/EnalosWebApps/ZetaPotential/) are freely available online through the Enalos Nanoinformatics platform.",2020-03-16 +28628531,Costs and Length of Stay for the Acute Care of Patients with Motor-Complete Spinal Cord Injury Following Cervical Trauma: The Impact of Early Transfer to Specialized Acute SCI Center.,"

Objective

Acute spinal cord injury (SCI) centers aim to optimize outcome following SCI. However, there is no timeframe to transfer patients from regional to SCI centers in order to promote cost-efficiency of acute care. Our objective was to compare costs and length of stay (LOS) following early and late transfer to the SCI center.

Design

A retrospective cohort study involving 116 individuals was conducted. Group 1 (n = 87) was managed in an SCI center promptly after the trauma, whereas group 2 (n = 29) was transferred to the SCI center only after surgery. Direct comparison and multivariate linear regression analyses were used to assess the relationship between costs, LOS, and timing to transfer to the SCI center.

Results

Length of stay was significantly longer for group 2 (median, 93.0 days) as compared with group 1 (median, 40.0 days; P < 10), and average costs were also higher (median, Canadian $17,920.0 vs. $10,521.6; P = 0.004) for group 2, despite similar characteristics. Late transfer to the SCI center was the main predictive factor of longer LOS and increased costs.

Conclusions

Early admission to the SCI center was associated with shorter LOS and lower costs for patients sustaining tetraplegia. Early referral to an SCI center before surgery could lower the financial burden for the health care system.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) Determine the optimal timing for transfer of individuals with cervical traumatic spinal cord injury (SCI) in order to decrease acute care resource utilization; (2) Determine benefits of a complete perioperative management in a specialized SCI center; and (3) Identify factors that may influence resource utilization for acute care following motor-complete tetraplegia.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this activity for a maximum of 1.5 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2017-07-01 +24573879,NONATObase: a database for Polychaeta (Annelida) from the Southwestern Atlantic Ocean.,"Networks can greatly advance data sharing attitudes by providing organized and useful data sets on marine biodiversity in a friendly and shared scientific environment. NONATObase, the interactive database on polychaetes presented herein, will provide new macroecological and taxonomic insights of the Southwestern Atlantic region. The database was developed by the NONATO network, a team of South American researchers, who integrated available information on polychaetes from between 5°N and 80°S in the Atlantic Ocean and near the Antarctic. The guiding principle of the database is to keep free and open access to data based on partnerships. Its architecture consists of a relational database integrated in the MySQL and PHP framework. Its web application allows access to the data from three different directions: species (qualitative data), abundance (quantitative data) and data set (reference data). The database has built-in functionality, such as the filter of data on user-defined taxonomic levels, characteristics of site, sample, sampler, and mesh size used. Considering that there are still many taxonomic issues related to poorly known regional fauna, a scientific committee was created to work out consistent solutions to current misidentifications and equivocal taxonomy status of some species. Expertise from this committee will be incorporated by NONATObase continually. The use of quantitative data was possible by standardization of a sample unit. All data, maps of distribution and references from a data set or a specified query can be visualized and exported to a commonly used data format in statistical analysis or reference manager software. The NONATO network has initialized with NONATObase, a valuable resource for marine ecologists and taxonomists. The database is expected to grow in functionality as it comes in useful, particularly regarding the challenges of dealing with molecular genetic data and tools to assess the effects of global environment change. Database URL: http://nonatobase.ufsc.br/.",2014-02-25 +22105744,Chinese hamster genome database: an online resource for the CHO community at www.CHOgenome.org.,The Chinese hamster genome database (http://www.chogenome.org/) is an online resource for the Chinese hamster (Cricetulus griseus) and Chinese hamster ovary (CHO) cell communities. CHO cells are important for biomedical research and are widely used in industry for the production of biopharmaceuticals. The genome of the CHO-K1 cell line was recently sequenced and the CHO community has developed an online resource to facilitate accessibility of the genomic data and the development of genomic tools.,2011-11-22 +29358009,What Can Big Data on Academic Interest Reveal about a Drug? Reflections in Three Major US Databases.,"The different stages of the life cycle of a drug - 'prenatal' stage, birth of a drug, rapid growth, maturity and stability, decline, and status before 'death' - are reflected in the three following databases: journal articles (PubMed-www.ncbi.nlm.nih.gov/pubmed); patents (US Patent Office-http://partfl1.uspto.gov/netahtml/PTO/search-adv.htlm); and approved drugs (FDA - www.accessdata.fda.gov/scripts/cder/drugsatfda/index/cfm). These databases are huge, from authoritative sources, correctly classified, and they properly link different datasets. Analysis of such data can uncover hidden patterns important for the assessment of drug status and may also yield some predictions regarding its future prospects. Drug-related, publication-based academic bibliographic records are especially numerous and support the development of various scientometric indices. In combination with information from other types of databases, they can outline various trends in pharmacology. Scientometric indices can be classified into those indicating a change in the status of a drug, and those assessing the chances for success, or even drug discontinuation. Here, we present big data analytics on publication-based academic interest in two segments: (i) description of scientometric indices and (ii) their applications for the assessment of the status of a drug.",2018-01-18 +31667297,Survey data on the quality of life of consumers fitted with osseointegrated fixation and bone-anchored limb prostheses provided by government organization.,"The data in this paper are related to the research article entitled ""Development of a government continuous quality improvement procedure for assessing the provision of bone anchored limb prosthesis: A process re-design descriptive study"" (Frossard et al., Canadian Prosthetics & Orthotics Journal, 2018. 1(2). p. 1-14). This article contains quality of life data experienced by individuals before and after implantation of a press-fit or screw-type osseointegrated fixation when fitted with conventional socket-suspended and bone-anchored limb prosthesis, respectively. This specifically-designed survey was developed and administered by Queensland Artificial Limb Services (QALS), an Australian State government organization. It was an integrated part of QALS' continuous quality improvement procedure for assessing the provision of bone-anchored prosthesis. A total of 12 out of the 65 consumers completed to the survey, giving a return rate of 18%. This benchmark information can contribute to inform the design of (A) other patients' experience surveys including those built-in governmental continuous quality improvement procedure as well as (B) clinical trials looking at the overall effects of surgical implantation of ossoeintegrated fixation on patients' quality of life. Online repository contains the files: https://data.mendeley.com/datasets/bkbxxmrhfh/1.",2019-09-17 +31649721,Comprehensive Analysis of Human microRNA-mRNA Interactome.,"MicroRNAs play a key role in the regulation of gene expression. A majority of microRNA-mRNA interactions remain unidentified. Despite extensive research, our ability to predict human microRNA-mRNA interactions using computational algorithms remains limited by a complexity of the models for non-canonical interactions, and an abundance of false-positive results. Here, we present the landscape of human microRNA-mRNA interactions derived from comprehensive analysis of HEK293 and Huh7.5 datasets, along with publicly available microRNA and mRNA expression data. We show that, while only 1-2% of human genes were the most regulated by microRNAs, few cell line-specific RNAs, including EEF1A1 and HSPA1B in HEK293 and AFP, APOB, and MALAT1 genes in Huh7.5, display substantial ""sponge-like"" properties. We revealed a group of microRNAs that are expressed at a very high level, while interacting with only a few mRNAs, which, indeed, serve as their specific expression regulators. In order to establish reliable microRNA-binding regions, we collected and systematically analyzed the data from 79 CLIP datasets of microRNA-binding sites. We report 46,805 experimentally confirmed mRNA-miRNA duplex regions. Resulting dataset is available at http://score.generesearch.ru/services/mirna/. Our study provides initial insight into the complexity of human microRNA-mRNA interactions.",2019-10-08 +32068161,Antibacterial activity profile of miramistin in in vitro and in vivo models.,"BACKGROUND:Miramistin is a widely used antiseptic, disinfectant and preservative, and one of the most popular antimicrobial agents on pharmaceutical market of the Russian Federation (http://www.dsm.ru/en/news/385/). However, there is a lack of reported systematic data on antibacterial efficacy of this agent obtained in accordance with the international standards. AIM:This paper represents a systematic study of antibacterial properties of miramistin. Another objective of this work is to evaluate and compare the exploratory performance of in vitro and in vivo protocols of antiseptics' efficacy testing using miramistin as the reference antiseptic. METHODS:Antibacterial activity of 0.1% and 0.2% aqueous solutions of miramistin against two museum strains of S. aureus (ATCC 209p) and E. coli (CDC F-50) was studied. Three standard in vitro laboratory tests (microdilution test, suspension test, and metal surface test), and one in vivo test (on rat's skin) were used. The study was conducted in accordance with the international regulatory documents. RESULTS:Miramistin showed high bactericidal activity against the studied bacterial pathogens in the standard in vitro tests. Thus, in the microdilution test it showed expressed activity against S. aureus (MIC 8 μg/ml, MBC 16 μg/ml) and E. coli (MIC 32 μg/ml, MBC 128 μg/ml). In the suspension test, miramistin decreased the amount of colony forming units by at least 6 log10 units for S. aureus, and by at least 4.5 log10 units for E. coli. Transition to the metal surface test led to significant decrease of antibacterial activity by 1-3 log10 units as compared to the suspension test. Further dramatic reduction of antiseptic activity (by 3-4 log10 units) was observed in in vivo rat skin test. Addition of a protein contaminant (bovine serum albumin) led to a general decrease in the effectiveness of miramistin against the test pathogens (typically, by 1-2 log10 units). An interesting effect of exposure time-dependent reversal of miramistin's specificity to the studied Gram-positive S. aureus and the Gram-negative E. coli organisms was observed in the metal surface test. CONCLUSIONS:The results of this work provide systematic data on antibacterial efficacy of miramistin. They also underscore the need in relevant in vivo models for evaluation of antiseptics' efficacy. While the existing in vitro methods can be successfully applied at the discovery stages, it is necessary to use more realistic in vivo models at more advanced development stages. The observed selectivity reversal effect should be taken into account when carrying out the antiseptics' efficacy testing and surface disinfection procedures.",2020-02-14 +30341039,Identification of a panel of genes as a prognostic biomarker for glioblastoma.,"

Background

Glioblastoma multiforme (GBM) is a fatal disease without effective therapy. Identification of new biomarkers for prognosis would enable more rational selections of strategies to cure patients with GBM and prevent disease relapse.

Methods

Seven datasets derived from GBM patients using microarray or next generation sequencing in R2 online database (http://r2.amc.nl) were extracted and then analyzed using JMP software. The survival distribution was calculated according to the Kaplan-Meier method and the significance was determined using log-rank statistics. The sensitivity of a panel of GBM cell lines in response to temozolomide (TMZ), salinomycin, celastrol, and triptolide treatments was evaluated using MTS and tumor-sphere formation assay.

Findings

We identified that CD44, ATP binding cassette subfamily C member 3 (ABCC3), and tumor necrosis factor receptor subfamily member 1A (TNFRSF1A) as highly expressed genes in GBMs are associated with patients' poor outcomes and therapy resistance. Furthermore, these three markers combined with MGMT, a conventional GBM marker, can classify GBM patients into five new subtypes with different overall survival time in response to treatment. The four-gene signature and the therapy response of GBMs to a panel of therapeutic compounds were confirmed in a panel of GBM cell lines.

Interpretation

The data indicate that the four-gene panel can be used as a therapy response index for GBM patients and potential therapeutic targets. These results provide important new insights into the early diagnosis and the prognosis for GBM patients and introduce potential targets for GBM therapeutics. FUND: Baylor Scott & White Health Startup Fund (E.W.); Collaborative Faculty Research Investment Program (CFRIP) of Baylor University, Baylor Scott & White Health, and Baylor College of Medicine (E.W., T.S., J.H.H.); NIH R01 NS067435 (J.H.H.); Scott & White Plummer Foundation Grant (J.H.H.); National Natural Science Foundation of China 816280007 (J.H.H. and Fu.W.).",2018-10-16 +30351396,Two Methods for Mapping and Visualizing Associated Data on Phylogeny Using Ggtree.,Ggtree is a comprehensive R package for visualizing and annotating phylogenetic trees with associated data. It can also map and visualize associated external data on phylogenies with two general methods. Method 1 allows external data to be mapped on the tree structure and used as visual characteristic in tree and data visualization. Method 2 plots the data with the tree side by side using different geometric functions after reordering the data based on the tree structure. These two methods integrate data with phylogeny for further exploration and comparison in the evolutionary biology context. Ggtree is available from http://www.bioconductor.org/packages/ggtree.,2018-12-01 +31754347,Two predictive precision medicine tools for hepatocellular carcinoma.,"

Background

Hepatocellular carcinoma (HCC) is a serious threat to public health due to its poor prognosis. The current study aimed to develop and validate a prognostic nomogram to predict the overall survival of HCC patients.

Methods

The model cohort consisted of 24,991 mRNA expression data points from 348 HCC patients. The least absolute shrinkage and selection operator method (LASSO) Cox regression model was used to evaluate the prognostic mRNA biomarkers for the overall survival of HCC patients.

Results

Using multivariate Cox proportional regression analyses, a prognostic nomogram (named Eight-mRNA prognostic nomogram) was constructed based on the expression data of N4BP3, -ADRA2B, E2F8, MAPT, PZP, HOXD9, COL15A1, and -NDST3. The C-index of the Eight-mRNA prognostic nomogram was 0.765 (95% CI 0.724-0.806) for the overall survival in the model cohort. The Harrell's concordance-index of the Eight-mRNA prognostic nomogram was 0.715 (95% CI 0.658-0.772) in the validation cohort. The survival curves demonstrated that the HCC patients in the high risk group had a significantly poorer overall survival than the patients in the low risk group.

Conclusion

In the current study, we have developed two convenient and efficient predictive precision medicine tools for hepatocellular carcinoma. These two predictive precision medicine tools are helpful for predicting the individual mortality risk probability and improving the personalized comprehensive treatments for HCC patients. The Smart Cancer Predictive System can be used by clicking the following URL: https://zhangzhiqiao2.shinyapps.io/Smart_cancer_predictive_system_HCC_2/. The Gene Survival Analysis Screen System is available at the following URL: https://zhangzhiqiao5.shinyapps.io/Gene_Survival_Analysis_A1001/.",2019-11-14 +32030471,A functional near-infrared spectroscopy (fNIRS) examination of how self-initiated sequential movements become automatic.,"The neural mechanisms underlying movement automaticity have been investigated using PET and fMRI and more recently functional near-infrared spectroscopy (fNIRS). As fNIRS is an emerging technique, the objective of the present study was to replicate the functional magnetic resonance imaging-related motor sequence findings as reported by Wu et al. (J Neurophysiol 91:1690-1698, https://doi.org/10.1152/jn.01052.2003, 2004) using fNIRS. Seventeen right-handed participants practiced self-initiated sequential finger movements of two lengths (4 and 12) until a level of automaticity was achieved. Automaticity was evaluated by performing a visual-letter-counting task concurrently with the sequential finger movements. Our data were unable to replicate the pre-to-post-practice decrease in cortical activity in the left dorsolateral prefrontal cortex for both motor sequence tasks. The findings did reveal increased contribution from the right hemisphere following learning. The observed lateralization is suggestive of explicit learning and the involvement of working memory in motor sequence production.",2020-02-06 +31636953,MAP: model-based analysis of proteomic data to detect proteins with significant abundance changes.,"Isotope-labeling-based mass spectrometry (MS) is widely used in quantitative proteomic studies. With this technique, the relative abundance of thousands of proteins can be efficiently profiled in parallel, greatly facilitating the detection of proteins differentially expressed across samples. However, this task remains computationally challenging. Here we present a new approach, termed Model-based Analysis of Proteomic data (MAP), for this task. Unlike many existing methods, MAP does not require technical replicates to model technical and systematic errors, and instead utilizes a novel step-by-step regression analysis to directly assess the significance of observed protein abundance changes. We applied MAP to compare the proteomic profiles of undifferentiated and differentiated mouse embryonic stem cells (mESCs), and found it has superior performance compared with existing tools in detecting proteins differentially expressed during mESC differentiation. A web-based application of MAP is provided for online data processing at http://bioinfo.sibs.ac.cn/shaolab/MAP.",2019-08-13 +26061870,Human Chromosome Y and Haplogroups; introducing YDHS Database.,"

Background

As the high throughput sequencing efforts generate more biological information, scientists from different disciplines are interpreting the polymorphisms that make us unique. In addition, there is an increasing trend in general public to research their own genealogy, find distant relatives and to know more about their biological background. Commercial vendors are providing analyses of mitochondrial and Y-chromosomal markers for such purposes. Clearly, an easy-to-use free interface to the existing data on the identified variants would be in the interest of general public and professionals less familiar with the field. Here we introduce a novel metadatabase YDHS that aims to provide such an interface for Y-chromosomal DNA (Y-DNA) haplogroups and sequence variants.

Methods

The database uses ISOGG Y-DNA tree as the source of mutations and haplogroups and by using genomic positions of the mutations the database links them to genes and other biological entities. YDHS contains analysis tools for deeper Y-SNP analysis.

Results

YDHS addresses the shortage of Y-DNA related databases. We have tested our database using a set of different cases from literature ranging from infertility to autism. The database is at http://www.semanticgen.net/ydhs

Conclusions

Y-chromosomal DNA (Y-DNA) haplogroups and sequence variants have not been in the scientific limelight, excluding certain specialized fields like forensics, mainly because there is not much freely available information or it is scattered in different sources. However, as we have demonstrated Y-SNPs do play a role in various cases on the haplogroup level and it is possible to create a free Y-DNA dedicated bioinformatics resource.",2015-06-10 +28549078,HEROD: a human ethnic and regional specific omics database.,"

Motivation

Genetic and gene expression variations within and between populations and across geographical regions have substantial effects on the biological phenotypes, diseases, and therapeutic response. The development of precision medicines can be facilitated by the OMICS studies of the patients of specific ethnicity and geographic region. However, there is an inadequate facility for broadly and conveniently accessing the ethnic and regional specific OMICS data.

Results

Here, we introduced a new free database, HEROD, a human ethnic and regional specific OMICS database. Its first version contains the gene expression data of 53 070 patients of 169 diseases in seven ethnic populations from 193 cities/regions in 49 nations curated from the Gene Expression Omnibus (GEO), the ArrayExpress Archive of Functional Genomics Data (ArrayExpress), the Cancer Genome Atlas (TCGA) and the International Cancer Genome Consortium (ICGC). Geographic region information of curated patients was mainly manually extracted from referenced publications of each original study. These data can be accessed and downloaded via keyword search, World map search, and menu-bar search of disease name, the international classification of disease code, geographical region, location of sample collection, ethnic population, gender, age, sample source organ, patient type (patient or healthy), sample type (disease or normal tissue) and assay type on the web interface.

Availability and implementation

The HEROD database is freely accessible at http://bidd2.nus.edu.sg/herod/index.php. The database and web interface are implemented in MySQL, PHP and HTML with all major browsers supported.

Contact

phacyz@nus.edu.sg.",2017-10-01 +25836717,Accuracy of Molecular Data Generated with FFPE Biospecimens: Lessons from the Literature.,"Formalin-fixed and paraffin-embedded (FFPE) tissue biospecimens are a valuable resource for molecular cancer research. Although much can be gained from their use, it remains unclear whether the genomic and expression profiles obtained from FFPE biospecimens accurately reflect the physiologic condition of the patient from which they were procured, or if such profiles are confounded by biologic effects from formalin fixation and processing. To assess the physiologic accuracy of genomic and expression data generated with FFPE specimens, we surveyed the literature for articles investigating genomic and expression endpoints in case-matched FFPE and fresh or frozen human biospecimens using the National Cancer Institute's Biospecimen Research Database (http://biospecimens.cancer.gov/brd). Results of the survey revealed that the level of concordance between differentially preserved biospecimens varied among analytical parameters and platforms but also among reports, genes/transcripts of interest, and tumor status. The identified analytical techniques and parameters that resulted in strong correlations between FFPE and frozen biospecimens may provide guidance when optimizing molecular protocols for FFPE use; however, discrepancies reported for similar assays also illustrate the importance of validating protocols optimized for use with FFPE specimens with a case-matched fresh or frozen cohort for each platform, gene or transcript, and FFPE processing regime. On the basis of evidence published to date, validation of analytical parameters with a properly handled frozen cohort is necessary to ensure a high degree of concordance and confidence in the results obtained with FFPE biospecimens.",2015-04-02 +32598850,"Vibrational Spectroscopic Map, Vibrational Spectroscopy, and Intermolecular Interaction.","Vibrational spectroscopy is an essential tool in chemical analyses, biological assays, and studies of functional materials. Over the past decade, various coherent nonlinear vibrational spectroscopic techniques have been developed and enabled researchers to study time-correlations of the fluctuating frequencies that are directly related to solute-solvent dynamics, dynamical changes in molecular conformations and local electrostatic environments, chemical and biochemical reactions, protein structural dynamics and functions, characteristic processes of functional materials, and so on. In order to gain incisive and quantitative information on the local electrostatic environment, molecular conformation, protein structure and interprotein contacts, ligand binding kinetics, and electric and optical properties of functional materials, a variety of vibrational probes have been developed and site-specifically incorporated into molecular, biological, and material systems for time-resolved vibrational spectroscopic investigation. However, still, an all-encompassing theory that describes the vibrational solvatochromism, electrochromism, and dynamic fluctuation of vibrational frequencies has not been completely established mainly due to the intrinsic complexity of intermolecular interactions in condensed phases. In particular, the amount of data obtained from the linear and nonlinear vibrational spectroscopic experiments has been rapidly increasing, but the lack of a quantitative method to interpret these measurements has been one major obstacle in broadening the applications of these methods. Among various theoretical models, one of the most successful approaches is a semiempirical model generally referred to as the vibrational spectroscopic map that is based on a rigorous theory of intermolecular interactions. Recently, genetic algorithm, neural network, and machine learning approaches have been applied to the development of vibrational solvatochromism theory. In this review, we provide comprehensive descriptions of the theoretical foundation and various examples showing its extraordinary successes in the interpretations of experimental observations. In addition, a brief introduction to a newly created repository Web site (http://frequencymap.org) for vibrational spectroscopic maps is presented. We anticipate that a combination of the vibrational frequency map approach and state-of-the-art multidimensional vibrational spectroscopy will be one of the most fruitful ways to study the structure and dynamics of chemical, biological, and functional molecular systems in the future.",2020-06-29 +25538713,VitisCyc: a metabolic pathway knowledgebase for grapevine (Vitis vinifera).,"We have developed VitisCyc, a grapevine-specific metabolic pathway database that allows researchers to (i) search and browse the database for its various components such as metabolic pathways, reactions, compounds, genes and proteins, (ii) compare grapevine metabolic networks with other publicly available plant metabolic networks, and (iii) upload, visualize and analyze high-throughput data such as transcriptomes, proteomes, metabolomes etc. using OMICs-Viewer tool. VitisCyc is based on the genome sequence of the nearly homozygous genotype PN40024 of Vitis vinifera ""Pinot Noir"" cultivar with 12X v1 annotations and was built on BioCyc platform using Pathway Tools software and MetaCyc reference database. Furthermore, VitisCyc was enriched for plant-specific pathways and grape-specific metabolites, reactions and pathways. Currently VitisCyc harbors 68 super pathways, 362 biosynthesis pathways, 118 catabolic pathways, 5 detoxification pathways, 36 energy related pathways and 6 transport pathways, 10,908 enzymes, 2912 enzymatic reactions, 31 transport reactions and 2024 compounds. VitisCyc, as a community resource, can aid in the discovery of candidate genes and pathways that are regulated during plant growth and development, and in response to biotic and abiotic stress signals generated from a plant's immediate environment. VitisCyc version 3.18 is available online at http://pathways.cgrb.oregonstate.edu.",2014-12-09 +32425496,Efficacy of WINROP as a Screening Tool for Retinopathy of Prematurity in the East Coast of Malaysia.,"

Purpose

To evaluate the efficacy of the ""weight, insulin-like growth factor 1, neonatal retinopathy of prematurity"" (WINROP) algorithm in predicting retinopathy of prematurity (ROP) requiring treatment in Malaysia.

Participants

This was a retrospective study involving premature infants with gestational age less than 32 weeks treated from September 2016 to March 2019 in Hospital Universiti Sains Malaysia. Clinical diagnosis was made based on Early Treatment Retinopathy of Prematurity study. Participants' weekly weight gain since birth was entered in the website (http://winrop.com), along with date of birth, gestational age and final clinical examination outcome. WINROP software signals an alarm if an infant is at high risk of developing ROP requiring treatment during weight data entry. By using the alarm status, the sensitivity and specificity of this algorithm for predicting ROP requiring treatment were obtained.

Results

Ninety-two infants were included in this study. An alarm was detected in 67 infants (72.8%). There were a total of 53 infants (54.6%) with no ROP, 15 (16.3%) of whom developed stage 1 ROP, 10 (10.8%) who developed stage 2 ROP and 14 infants (15.2%) who developed stage 3 ROP. In our study, WINROP sensitivity was 95.2% and specificity was 33.8%.

Conclusion

WINROP is recommended as an initial screening tool for premature infants at risk of developing treatment-requiring ROP in Malaysia. It may help to alert clinicians managing severely ill infants when clinical examinations are less possible.",2020-04-24 +32078799,"FLASHDeconv: Ultrafast, High-Quality Feature Deconvolution for Top-Down Proteomics.","Top-down mass spectrometry (TD-MS)-based proteomics analyzes intact proteoforms and thus preserves information about individual protein species. The MS signal of these high-mass analytes is complex and challenges the accurate determination of proteoform masses. Fast and accurate feature deconvolution (i.e., the determination of intact proteoform masses) is, therefore, an essential step for TD data analysis. Here, we present FLASHDeconv, an algorithm achieving higher deconvolution quality, with an execution speed two orders of magnitude faster than existing approaches. FLASHDeconv transforms peak positions (m/z) within spectra into log m/z space. This simple transformation turns the deconvolution problem into a search for constant patterns, thereby greatly accelerating the process. In both simple and complex samples, FLASHDeconv reports more genuine feature masses and substantially fewer artifacts than other existing methods. FLASHDeconv is freely available for download here: https://www.openms.org/flashdeconv/. A record of this paper's Transparent Peer Review process is included in the Supplemental Information.",2020-02-19 +31682440,"VMD Store-A VMD Plugin to Browse, Discover, and Install VMD Extensions.","Herein we present the VMD Store, an open-source VMD plugin that simplifies the way that users browse, discover, install, update, and uninstall extensions for the Visual Molecular Dynamics (VMD) software. The VMD Store obtains data about all the indexed VMD extensions hosted on GitHub and presents a one-click mechanism to install and configure VMD extensions. This plugin arises in an attempt to aggregate all VMD extensions into a single platform. The VMD Store is available, free of charge, for Windows, macOS, and Linux at https://biosim.pt/software/ and requires VMD 1.9.3 (or later).",2019-11-04 +31719551,The gene structure and hypervariability of the complete Penaeus monodon Dscam gene.,"Using two advanced sequencing approaches, Illumina and PacBio, we derive the entire Dscam gene from an M2 assembly of the complete Penaeus monodon genome. The P. monodon Dscam (PmDscam) gene is ~266 kbp, with a total of 44 exons, 5 of which are subject to alternative splicing. PmDscam has a conserved architectural structure consisting of an extracellular region with hypervariable Ig domains, a transmembrane domain, and a cytoplasmic tail. We show that, contrary to a previous report, there are in fact 26, 81 and 26 alternative exons in N-terminal Ig2, N-terminal Ig3 and the entirety of Ig7, respectively. We also identified two alternatively spliced exons in the cytoplasmic tail, with transmembrane domains in exon variants 32.1 and 32.2, and stop codons in exon variants 44.1 and 44.2. This means that alternative splicing is involved in the selection of the stop codon. There are also 7 non-constitutive cytoplasmic tail exons that can either be included or skipped. Alternative splicing and the non-constitutive exons together produce more than 21 million isoform combinations from one PmDscam locus in the P. monodon gene. A public-facing database that allows BLAST searches of all 175 exons in the PmDscam gene has been established at http://pmdscam.dbbs.ncku.edu.tw/ .",2019-11-12 +30627601,"Time barrier to trade: Data on 190 economies' export and import time, 2005-2018.","This data article presents data on time to export and import across 190 economies, for the years 2005-2018. The data can foster research on international trade, and are of great academic and political value given the growing awareness and importance of time as a trade barrier. The data are publicly available at https://www.doingbusiness.org/data. A subset of the data is used in the related research data article, ""Time barrier to export for OECD countries"" (Li, 2018). Data on the number of documents required in these economies to export and import are also presented, for the years 2005-2015.",2018-11-14 +26123534,The Mouse Genomes Project: a repository of inbred laboratory mouse strain genomes.,"The Mouse Genomes Project was initiated in 2009 with the goal of using next-generation sequencing technologies to catalogue molecular variation in the common laboratory mouse strains, and a selected set of wild-derived inbred strains. The initial sequencing and survey of sequence variation in 17 inbred strains was completed in 2011 and included comprehensive catalogue of single nucleotide polymorphisms, short insertion/deletions, larger structural variants including their fine scale architecture and landscape of transposable element variation, and genomic sites subject to post-transcriptional alteration of RNA. From this beginning, the resource has expanded significantly to include 36 fully sequenced inbred laboratory mouse strains, a refined and updated data processing pipeline, and new variation querying and data visualisation tools which are available on the project's website ( http://www.sanger.ac.uk/resources/mouse/genomes/ ). The focus of the project is now the completion of de novo assembled chromosome sequences and strain-specific gene structures for the core strains. We discuss how the assembled chromosomes will power comparative analysis, data access tools and future directions of mouse genetics.",2015-06-30 +30204847,KeggExp: a web server for visual integration of KEGG pathways and expression profile data.,"

Summary

Effective visualization is important for knowledge discovery when analysing expression profile data. However, existing tools for visually integrating expression profile data with KEGG pathway maps lack extensive interactive visualization operations. KeggExp simultaneously presents the pathway map of one pathway, dendrogram and heatmap of the genes in the pathway and scatter map of one gene; and also provides interactive operations for highlighting specific genes on the pathway map, including differentially-expressed genes, co-expressed genes selected from the heatmap and user-input genes. With KeggExp, researchers, including those without programming skills, can take advantage of its interactive operations to determine key genes or pathways when analysing expression profile data.

Availability and implementation

Freely available at http://www.fgvis.com/expressvis/KeggExp/. Language: JavaScript, python; Libraries: D3.js, Rxjs, Angular, Django, Django rest frame work, Scipy.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +29905871,HMMER web server: 2018 update.,"The HMMER webserver [http://www.ebi.ac.uk/Tools/hmmer] is a free-to-use service which provides fast searches against widely used sequence databases and profile hidden Markov model (HMM) libraries using the HMMER software suite (http://hmmer.org). The results of a sequence search may be summarized in a number of ways, allowing users to view and filter the significant hits by domain architecture or taxonomy. For large scale usage, we provide an application programmatic interface (API) which has been expanded in scope, such that all result presentations are available via both HTML and API. Furthermore, we have refactored our JavaScript visualization library to provide standalone components for different result representations. These consume the aforementioned API and can be integrated into third-party websites. The range of databases that can be searched against has been expanded, adding four sequence datasets (12 in total) and one profile HMM library (6 in total). To help users explore the biological context of their results, and to discover new data resources, search results are now supplemented with cross references to other EMBL-EBI databases.",2018-07-01 +28862214,Respiratory cancer database: An open access database of respiratory cancer gene and miRNA.,"

Aims

Respiratory cancer database (RespCanDB) is a genomic and proteomic database of cancer of respiratory organ. It also includes the information of medicinal plants used for the treatment of various respiratory cancers with structure of its active constituents as well as pharmacological and chemical information of drug associated with various respiratory cancers.

Materials and methods

Data in RespCanDB has been manually collected from published research article and from other databases. Data has been integrated using MySQL an object-relational database management system. MySQL manages all data in the back-end and provides commands to retrieve and store the data into the database. The web interface of database has been built in ASP.

Results and conclusions

RespCanDB is expected to contribute to the understanding of scientific community regarding respiratory cancer biology as well as developments of new way of diagnosing and treating respiratory cancer. Currently, the database consist the oncogenomic information of lung cancer, laryngeal cancer, and nasopharyngeal cancer. Data for other cancers, such as oral and tracheal cancers, will be added in the near future. The URL of RespCanDB is http://ridb.subdic-bioinformatics-nitrr.in/.",2017-07-01 +23584834,A database for curating the associations between killer cell immunoglobulin-like receptors and diseases in worldwide populations.,"The killer cell immunoglobulin-like receptors (KIR) play a fundamental role in the innate immune system, through their interactions with human leucocyte antigen (HLA) molecules, leading to the modulation of activity in natural killer (NK) cells, mainly related to killing pathogen-infected cells. KIR genes are hugely polymorphic both in the number of genes an individual carries and in the number of alleles identified. We have previously developed the Allele Frequency Net Database (AFND, http://www.allelefrequencies.net), which captures worldwide frequencies of alleles, genes and haplotypes for several immune genes, including KIR genes, in healthy populations, covering >4 million individuals. Here, we report the creation of a new database within AFND, named KIR and Diseases Database (KDDB), capturing a large quantity of data derived from publications in which KIR genes, alleles, genotypes and/or haplotypes have been associated with infectious diseases (e.g. hepatitis C, HIV, malaria), autoimmune disorders (e.g. type I diabetes, rheumatoid arthritis), cancer and pregnancy-related complications. KDDB has been created through an extensive manual curation effort, extracting data on more than a thousand KIR-disease records, comprising >50 000 individuals. KDDB thus provides a new community resource for understanding not only how KIR genes are associated with disease, but also, by working in tandem with the large data sets already present in AFND, where particular genes, genotypes or haplotypes are present in worldwide populations or different ethnic groups. We anticipate that KDDB will be an important resource for researchers working in immunogenetics. Database URL: http://www.allelefrequencies.net/diseases/.",2013-04-12 +29527200,Whole Genome Sequence Analysis of CTX-M-15 Producing Klebsiella Isolates Allowed Dissecting a Polyclonal Outbreak Scenario.,"Extended-spectrum β-lactamase (ESBL) producing Klebsiella pneumoniae pose an important threat of infection with increased morbidity and mortality, especially for immunocompromised patients. Here, we use the rise of multidrug-resistant K. pneumoniae in a German neurorehabilitation center from April 2015 to April 2016 to dissect the benefit of whole genome sequencing (WGS) for outbreak analyses. In total, 53 isolates were obtained from 52 patients and examined using WGS. Two independent analysis strategies (reference-based and -free) revealed the same distinct clusters of two CTX-M-15 producing K. pneumoniae clones (ST15, n = 31; ST405, n = 7) and one CTX-M-15 producing Klebsiella quasipneumoniae strain (ST414, n = 8). Additionally, we determined sequence variations associated with antimicrobial resistance phenotypes in single isolates expressing carbapenem and colistin resistance, respectively. For rapid detection of the major K. pneumoniae outbreak clone (ST15), a selective triplex PCR was deduced from WGS data of the major outbreak strain and K. pneumoniae genome data deposited in central databases. Moreover, we introduce two novel open-source applications supporting reference genome selection (refRank; https://gitlab.com/s.fuchs/refRank) and alignment-based SNP-filtering (SNPfilter; https://gitlab.com/s.fuchs/snpfilter) in NGS analyses.",2018-02-23 +33320311,Effects of Donning and Wearing Personal Protective Equipment on Tourniquet Use and Conversion.,"

Background

We sought to gather data about the effects of personal protective equipment (PPE) use on tourniquet interventions by preliminarily developing a way to simulate delay effects, particularly on time and blood loss. Such knowledge might aid readiness. Field calls to emergency departments may indicate donning of PPE before patient arrival. The purpose of this study was to investigate (1) delay effects of donning the PPE studied on field-tourniquet control of hemorrhage and (2) delay effects of wearing the PPE on application of a field tourniquet and its conversion to a pneumatic tourniquet.

Methods

The experiment simulated 30 tests of nonpneumatic field tourniquet use (http://www.combattourniquet.com/wp -content). The research intervention was the use of PPE. Data were grouped. The control group had no PPE (PPE0). PPE1 and PPE2 groups had mostly improvised and off-the-shelf equipment, respectively. PPE1 included donning a coat, goggles, face covering, cap, booties, and gloves. PPE2 had analogous items. The group order was randomized. A test included paired trials: field tourniquet, followed by conversion. An investigator simulated the caregiver. A task trainer simulated a thigh amputation. Donning delays were evaluated as differences in mean times to stop bleeding compared with PPE0. Blood loss results from donning PPE were calculated as the delay multiplied by its bleeding rate, 500mL/min.

Results

PPE0 had no delay: its mean blood loss was 392mL. PPE1 had 805mL more blood loss than PPE0 did. PPE2 exceeded PPE0 by 1004mL. Donning time (blood loss) for PPE1 and PPE2 were 1.4 minutes (712mL) and 1.7 minutes (863mL), respectively. The wearing of PPE did not slow down field tourniquet application or its conversion.

Conclusions

How long it took to don PPE delayed the time to stop bleeding and increased blood loss, but wearing PPE slowed down neither field tourniquet application nor its conversion.",2020-01-01 +29788129,mTM-align: a server for fast protein structure database search and multiple protein structure alignment.,"With the rapid increase of the number of protein structures in the Protein Data Bank, it becomes urgent to develop algorithms for efficient protein structure comparisons. In this article, we present the mTM-align server, which consists of two closely related modules: one for structure database search and the other for multiple structure alignment. The database search is speeded up based on a heuristic algorithm and a hierarchical organization of the structures in the database. The multiple structure alignment is performed using the recently developed algorithm mTM-align. Benchmark tests demonstrate that our algorithms outperform other peering methods for both modules, in terms of speed and accuracy. One of the unique features for the server is the interplay between database search and multiple structure alignment. The server provides service not only for performing fast database search, but also for making accurate multiple structure alignment with the structures found by the search. For the database search, it takes about 2-5 min for a structure of a medium size (∼300 residues). For the multiple structure alignment, it takes a few seconds for ∼10 structures of medium sizes. The server is freely available at: http://yanglab.nankai.edu.cn/mTM-align/.",2018-07-01 +30053265,"exoRBase: a database of circRNA, lncRNA and mRNA in human blood exosomes.","Exosomes, which are nanosized endocytic vesicles that are secreted by most cells, contain an abundant cargo of different RNA species that can modulate the behavior of recipient cells and may be used as circulating biomarkers for diseases. Here, we develop a web-accessible database (http://www.exoRBase.org), exoRBase, which is a repository of circular RNA (circRNA), long non-coding RNA (lncRNA) and messenger RNA (mRNA) derived from RNA-seq data analyses of human blood exosomes. Experimental validations from the published literature are also included. exoRBase features the integration and visualization of RNA expression profiles based on normalized RNA-seq data spanning both normal individuals and patients with different diseases. exoRBase aims to collect and characterize all long RNA species in human blood exosomes. The first release of exoRBase contains 58 330 circRNAs, 15 501 lncRNAs and 18 333 mRNAs. The annotation, expression level and possible original tissues are provided. exoRBase will aid researchers in identifying molecular signatures in blood exosomes and will trigger new exosomal biomarker discovery and functional implication for human diseases.",2018-01-01 +29106611,mVOC 2.0: a database of microbial volatiles.,"Metabolic capabilities of microorganisms include the production of secondary metabolites (e.g. antibiotics). The analysis of microbial volatile organic compounds (mVOCs) is an emerging research field with huge impact on medical, agricultural and biotechnical applied and basic science. The mVOC database (v1) has grown with microbiome research and integrated species information with data on emitted volatiles. Here, we present the mVOC 2.0 database with about 2000 compounds from almost 1000 species and new features to work with the database. The extended collection of compounds was augmented with data regarding mVOC-mediated effects on plants, fungi, bacteria and (in-)vertebrates. The mVOC database 2.0 now features a mass spectrum finder, which allows a quick mass spectrum comparison for compound identification and the generation of species-specific VOC signatures. Automatic updates, useful links and search for mVOC literature are also included. The mVOC database aggregates and refines available information regarding microbial volatiles, with the ultimate aim to provide a comprehensive and informative platform for scientists working in this research field. To address this need, we maintain a publicly available mVOC database at: http://bioinformatics.charite.de/mvoc.",2018-01-01 +30268934,GAAD: A Gene and Autoimmiune Disease Association Database.,"Autoimmune diseases (ADs) arise from an abnormal immune response of the body against substances and tissues normally present in the body. More than a hundred of ADs have been described in the literature so far. Although their etiology remains largely unclear, various types of ADs tend to share more associated genes with other types of ADs than with non-AD types. Here we present GAAD, a gene and AD association database. In GAAD, we collected 44,762 associations between 49 ADs and 4249 genes from public databases and MEDLINE documents. We manually verified the associations to ensure the quality and credibility. We reconstructed and recapitulated the relationships among ADs using their shared genes, which further validated the quality of our data. We also provided a list of significantly co-occurring gene pairs among ADs; with embedded tools, users can query gene co-occurrences and construct customized co-occurrence network with genes of interest. To make GAAD more straightforward to experimental biologists and medical scientists, we extracted additional information describing the associations through text mining, including the putative diagnostic value of the associations, type and position of gene polymorphisms, expression changes of implicated genes, as well as the phenotypical consequences, and grouped the associations accordingly. GAAD is freely available at http://gaad.medgenius.info.",2018-08-01 +22260278,Influenza research database: an integrated bioinformatics resource for influenza research and surveillance.,"

Background

The recent emergence of the 2009 pandemic influenza A/H1N1 virus has highlighted the value of free and open access to influenza virus genome sequence data integrated with information about other important virus characteristics.

Design

The Influenza Research Database (IRD, http://www.fludb.org) is a free, open, publicly-accessible resource funded by the U.S. National Institute of Allergy and Infectious Diseases through the Bioinformatics Resource Centers program. IRD provides a comprehensive, integrated database and analysis resource for influenza sequence, surveillance, and research data, including user-friendly interfaces for data retrieval, visualization and comparative genomics analysis, together with personal log in-protected 'workbench' spaces for saving data sets and analysis results. IRD integrates genomic, proteomic, immune epitope, and surveillance data from a variety of sources, including public databases, computational algorithms, external research groups, and the scientific literature.

Results

To demonstrate the utility of the data and analysis tools available in IRD, two scientific use cases are presented. A comparison of hemagglutinin sequence conservation and epitope coverage information revealed highly conserved protein regions that can be recognized by the human adaptive immune system as possible targets for inducing cross-protective immunity. Phylogenetic and geospatial analysis of sequences from wild bird surveillance samples revealed a possible evolutionary connection between influenza virus from Delaware Bay shorebirds and Alberta ducks.

Conclusions

The IRD provides a wealth of integrated data and information about influenza virus to support research of the genetic determinants dictating virus pathogenicity, host range restriction and transmission, and to facilitate development of vaccines, diagnostics, and therapeutics.",2012-01-20 +30199612,Not all pycnodysostosis-related mutants of human cathepsin K are inactive - crystal structure and biochemical studies of an active mutant I249T.,"Human cathepsin K (CTSK) is a collagenolytic lysosomal cysteine protease that plays an important role in bone turnover. Mutation in CTSK gene is associated with loss of collagenolytic activity of CTSK leading to an autosomal recessive bone disorder called pycnodysostosis. Although a number of pycnodysostotic missense mutations have been reported, underlying mechanism of the disease is not clear. In this study, we investigated in vitro six recombinant pycnodysostosis-related mutants of human CTSK (G79E, I249T, G243E, G303E, G319C and Q187P). While all the mutants, like wild-type, show similar high levels of expression in Escherichia coli, four of them (G79E, G303E, G319C and Q187P) are inactive, unstable and spontaneously degrade during purification process. In contrast, proteolytic/collagenolytic activity, zymogen activation kinetics and stability of G243E and I249T mutants are nominally affected. Crystal structure of I249T at 1.92 Å resolution shows that the mutation in R-domain causes conformational changes of a surface loop in the L-domain although the catalytic cleft remains unaltered. Molecular simulation, normal mode analysis and fluorescence lifetime measurement eliminated the possibility that the change in L-domain surface loop orientation is a crystallization artefact. CD-based thermal melting profile indicates that stability of I249T is significantly higher than wild-type. Our studies first time reports that pycnodysostosis-related mutations do not always lead to complete loss of general proteolytic activity or specific collagenolytic activity of CTSK. The first crystal structure of a pycnodysostotic mutant (I249T) provides critical information that may pave new avenues towards understanding the disease at molecular level. DATABASE: The atomic co-ordinates and structure factors for I249T mutant of human CTSK (codes 5Z5O) have been deposited in the Protein Data Bank (http://wwpdb.org/).",2018-09-26 +30165424,Penalized co-inertia analysis with applications to -omics data.,"

Motivation

Co-inertia analysis (CIA) is a multivariate statistical analysis method that can assess relationships and trends in two sets of data. Recently CIA has been used for an integrative analysis of multiple high-dimensional omics data. However, for classical CIA, all elements in the loading vectors are nonzero, presenting a challenge for the interpretation when analyzing omics data. For other multivariate statistical methods such as canonical correlation analysis (CCA), penalized least squares (PLS), various approaches have been proposed to produce sparse loading vectors via l1-penalization/constraint. We propose a novel CIA method that uses l1-penalization to induce sparsity in estimators of loading vectors. Our method simultaneously conducts model fitting and variable selection. Also, we propose another CIA method that incorporates structure/network information such as those from functional genomics, besides using sparsity penalty so that one can get biologically meaningful and interpretable results.

Results

Extensive simulations demonstrate that our proposed penalized CIA methods achieve the best or close to the best performance compared to the existing CIA method in terms of feature selection and recovery of true loading vectors. Also, we apply our methods to the integrative analysis of gene expression data and protein abundance data from the NCI-60 cancer cell lines. Our analysis of the NCI-60 cancer cell line data reveals meaningful variables for cancer diseases and biologically meaningful results that are consistent with previous studies.

Availability and implementation

Our algorithms are implemented as an R package which is freely available at: https://www.med.upenn.edu/long-lab/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +27650402,A corpus for plant-chemical relationships in the biomedical domain.,"

Background

Plants are natural products that humans consume in various ways including food and medicine. They have a long empirical history of treating diseases with relatively few side effects. Based on these strengths, many studies have been performed to verify the effectiveness of plants in treating diseases. It is crucial to understand the chemicals contained in plants because these chemicals can regulate activities of proteins that are key factors in causing diseases. With the accumulation of a large volume of biomedical literature in various databases such as PubMed, it is possible to automatically extract relationships between plants and chemicals in a large-scale way if we apply a text mining approach. A cornerstone of achieving this task is a corpus of relationships between plants and chemicals.

Results

In this study, we first constructed a corpus for plant and chemical entities and for the relationships between them. The corpus contains 267 plant entities, 475 chemical entities, and 1,007 plant-chemical relationships (550 and 457 positive and negative relationships, respectively), which are drawn from 377 sentences in 245 PubMed abstracts. Inter-annotator agreement scores for the corpus among three annotators were measured. The simple percent agreement scores for entities and trigger words for the relationships were 99.6 and 94.8 %, respectively, and the overall kappa score for the classification of positive and negative relationships was 79.8 %. We also developed a rule-based model to automatically extract such plant-chemical relationships. When we evaluated the rule-based model using the corpus and randomly selected biomedical articles, overall F-scores of 68.0 and 61.8 % were achieved, respectively.

Conclusion

We expect that the corpus for plant-chemical relationships will be a useful resource for enhancing plant research. The corpus is available at http://combio.gist.ac.kr/plantchemicalcorpus .",2016-09-20 +32458180,Efficacy and complications of cavo-tricuspid isthmus-dependent atrial flutter ablation in patients with and without structural heart disease: results from the German Ablation Registry.,"

Background

The impact of structural heart disease (SHD) on safety and efficacy of catheter ablation of cavo-tricuspid isthmus-dependent atrial flutter (AFLU) is unclear. In addition, recent data suggest a higher complication rate of AFLU ablation compared to the more complex atrial fibrillation (AF) ablation procedure.

Methods and results

Within our prospective multicenter registry, 3526 consecutive patients underwent AFLU ablation at 49 German electrophysiological centers from 2007 to 2010. For the present analysis, the patients were divided into a group with SHD (n = 2164 [61.4%]; median age 69 years; 78.5% male) and a group without SHD (n = 1362 [38.6%]; 65 years; 70.3% male). In our study, SHD mainly encompasses coronary artery disease (52.6%), left ventricular ejection fraction ≤ 50% (47.6%), and hypertensive heart disease (28.0%). The primary ablation success (97%) and the incidence of major (0.2%) or moderate (1.2%) complications did not differ significantly between the two groups (P = 1.0 and 0.87, respectively). Vascular access site complications (0.6%), AV block III° (0.2%), and bleeding (≥ BARC II: 0.2%) were most common. After a median 562 days of follow-up, we observed a 2.92-fold higher one-year mortality (P < 0.0001) in patients with SHD. Patients' satisfaction with the ablation therapy (72.0% satisfied) was close to the overall subjective tachyarrhythmia-free rate (70.7%).

Conclusions

The present analysis demonstrates that ablation of cavo-tricuspid isthmus dependent AFLU in patients with SHD has a comparable, excellent risk-benefit profile in our large ""real-world"" registry. Mortality rates expectedly are higher in patients with SHD and AFLU compared to patients without SHD. CLINICALTRIALS.GOV: NCT01197638, http://clinicaltrials.gov/ct2/show/NCT01197638.",2020-05-27 +31705429,"A systematic scoping review of ethical issues in mentoring in internal medicine, family medicine and academic medicine.","Mentoring's role in medical education is threatened by the potential abuse of mentoring relationships. Particularly affected are mentoring relationships between senior clinicians and junior doctors which lie at the heart of mentoring. To better understand and address these concerns, a systematic scoping review into prevailing accounts of ethical issues and professional lapses in mentoring is undertaken. Arksey and O'Malley's (Int J Soc Res Methodol 8(1):19-32, 2005. https://doi.org/10.1080/1364557032000119616) methodological framework for conducting scoping reviews was employed to explore the scope of ethical concerns in mentoring in general medicine. Databases searcheed included PubMed, ScienceDirect, ERIC, Embase, Scopus, Mednar and OpenGrey. 3391 abstracts were identified from the initialy search after removal of duplicates, 412 full-text articles were reviewed, 98 articles were included and thematically analysed. Unsatisfactory matching, misaligned expectations, inadequate mentor training, cursory codes of conduct, sketchy standards of practice, meagre oversight and unstructured processes have been identified as potential causes for ethical and professional breaches in mentoring practice. Changes in how professionalism is viewed suggest further studies of educational culture should also be carried out. The host organization plays a major role in establishing codes of conduct, expectations, and holistically, longitudinally oversight of the mentoring process and mentoring relationships.",2019-11-09 +30999838,Metabolic and signalling network maps integration: application to cross-talk studies and omics data analysis in cancer.,"

Background

The interplay between metabolic processes and signalling pathways remains poorly understood. Global, detailed and comprehensive reconstructions of human metabolism and signalling pathways exist in the form of molecular maps, but they have never been integrated together. We aim at filling in this gap by integrating of both signalling and metabolic pathways allowing a visual exploration of multi-level omics data and study of cross-regulatory circuits between these processes in health and in disease.

Results

We combined two comprehensive manually curated network maps. Atlas of Cancer Signalling Network (ACSN), containing mechanisms frequently implicated in cancer; and ReconMap 2.0, a comprehensive reconstruction of human metabolic network. We linked ACSN and ReconMap 2.0 maps via common players and represented the two maps as interconnected layers using the NaviCell platform for maps exploration ( https://navicell.curie.fr/pages/maps_ReconMap%202.html ). In addition, proteins catalysing metabolic reactions in ReconMap 2.0 were not previously visually represented on the map canvas. This precluded visualisation of omics data in the context of ReconMap 2.0. We suggested a solution for displaying protein nodes on the ReconMap 2.0 map in the vicinity of the corresponding reaction or process nodes. This permits multi-omics data visualisation in the context of both map layers. Exploration and shuttling between the two map layers is possible using Google Maps-like features of NaviCell. The integrated networks ACSN-ReconMap 2.0 are accessible online and allows data visualisation through various modes such as markers, heat maps, bar-plots, glyphs and map staining. The integrated networks were applied for comparison of immunoreactive and proliferative ovarian cancer subtypes using transcriptomic, copy number and mutation multi-omics data. A certain number of metabolic and signalling processes specifically deregulated in each of the ovarian cancer sub-types were identified.

Conclusions

As knowledge evolves and new omics data becomes more heterogeneous, gathering together existing domains of biology under common platforms is essential. We believe that an integrated ACSN-ReconMap 2.0 networks will help in understanding various disease mechanisms and discovery of new interactions at the intersection of cell signalling and metabolism. In addition, the successful integration of metabolic and signalling networks allows broader systems biology approach application for data interpretation and retrieval of intervention points to tackle simultaneously the key players coordinating signalling and metabolism in human diseases.",2019-04-18 +30525651,"Proxl (Protein Cross-Linking Database): A Public Server, QC Tools, and Other Major Updates.","Proxl is an open-source web application for sharing, visualizing, and analyzing bottom-up protein cross-linking mass spectrometry data and results. Proxl's core features include comparing data sets, structural analysis, customizable and interactive data visualizations, access to all underlying mass spectrometry data, and quality-control tools. All features of Proxl are designed to be independent of specific cross-linker chemistry or software analysis pipelines. Proxl's sharing tools allow users to share their data with the public or securely restrict access to trusted collaborators. Since being published in 2016, Proxl has continued to be expanded and improved through active development and collaboration with cross-linking researchers. Some of Proxl's new features include a centralized, public site for sharing data, greatly expanded quality-control tools and visualizations, support for stable isotope-labeled peptides, and general improvements that make Proxl easier to use, data easier to share and import, and data visualizations more customizable. Source code and more information are found at http://proxl-ms.org/ .",2018-12-19 +30768147,3DBIONOTES v3.0: crossing molecular and structural biology data with genomic variations.,"

Motivation

Many diseases are associated to single nucleotide polymorphisms that affect critical regions of proteins as binding sites or post translational modifications. Therefore, analysing genomic variants with structural and molecular biology data is a powerful framework in order to elucidate the potential causes of such diseases.

Results

A new version of our web framework 3DBIONOTES is presented. This version offers new tools to analyse and visualize protein annotations and genomic variants, including a contingency analysis of variants and amino acid features by means of a Fisher exact test, the integration of a gene annotation viewer to highlight protein features on gene sequences and a protein-protein interaction viewer to display protein annotations at network level.

Availability and implementation

The web server is available at https://3dbionotes.cnb.csic.es.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

Spanish National Institute for Bioinformatics (INB ELIXIR-ES) and Biocomputing Unit, National Centre of Biotechnology (CSIC)/Instruct Image Processing Centre, C/ Darwin nº 3, Campus of Cantoblanco, 28049 Madrid, Spain.",2019-09-01 +31774859,Factors associated with full immunization of children 12-23 months of age in Ethiopia: A multilevel analysis using 2016 Ethiopia Demographic and Health Survey.,"BACKGROUND:Only 40% of World Health Assembly member states achieved 90% national full vaccination coverage in 2015. In the African region, 79% of the countries had not achieved the target in 2015. In Ethiopia, only 39% of children 12-23 months of age were fully vaccinated. Though different studies were conducted in Ethiopia, they were limited in scope and used single level analysis. Therefore, this study aimed to assess individual and community level factors associated with full immunization among children 12-23 months of age in Ethiopia. METHODS:The data was obtained from Ethiopia Demographic and Health Survey 2016, conducted from January 2016 to June 2016. The sample was taken using two stage stratified sampling. In stage one, 645 Enumeration Areas and in stage two 28 households per Enumeration Area were selected systematically. Weighted sample of 1929 children 12-23 months of age were included in the study. Data was extracted from http://www.DHSprogram.com. Multilevel logistic regression was employed. Akaike Information Criteria was used to select best fit model. RESULTS:Mother's education, husband employment, mother's religion, mother's antenatal care visit, presence of vaccination document, region and community antenatal care utilization were significantly associated with children full vaccination. The odds of full vaccination were 2.5 [AOR = 2.48 95% CI: 1.35, 4.56] and 1.6 [AOR = 1.58 95% CI: 1.1, 2.28] times higher in children of mothers with secondary or higher and primary education respectively than children of mothers with no education. CONCLUSION:This study showed that children full vaccination is affected both by the individual and community level factors. Therefore, efforts to increase children full vaccination status need to target both at individual and community level.",2019-11-27 +32645039,"UFO: A tool for unifying biomedical ontology-based semantic similarity calculation, enrichment analysis and visualization.","

Background

Biomedical ontologies have been growing quickly and proven to be useful in many biomedical applications. Important applications of those data include estimating the functional similarity between ontology terms and between annotated biomedical entities, analyzing enrichment for a set of biomedical entities. Many semantic similarity calculation and enrichment analysis methods have been proposed for such applications. Also, a number of tools implementing the methods have been developed on different platforms. However, these tools have implemented a small number of the semantic similarity calculation and enrichment analysis methods for a certain type of biomedical ontology. Note that the methods can be applied to all types of biomedical ontologies. More importantly, each method can be dominant in different applications; thus, users have more choice with more number of methods implemented in tools. Also, more functions would facilitate their task with ontology.

Results

In this study, we developed a Cytoscape app, named UFO, which unifies most of the semantic similarity measures for between-term and between-entity similarity calculation for all types of biomedical ontologies in OBO format. Based on the similarity calculation, UFO can calculate the similarity between two sets of entities and weigh imported entity networks as well as generate functional similarity networks. Besides, it can perform enrichment analysis of a set of entities by different methods. Moreover, UFO can visualize structural relationships between ontology terms, annotating relationships between entities and terms, and functional similarity between entities. Finally, we demonstrated the ability of UFO through some case studies on finding the best semantic similarity measures for assessing the similarity between human disease phenotypes, constructing biomedical entity functional similarity networks for predicting disease-associated biomarkers, and performing enrichment analysis on a set of similar phenotypes.

Conclusions

Taken together, UFO is expected to be a tool where biomedical ontologies can be exploited for various biomedical applications.

Availability

UFO is distributed as a Cytoscape app, and can be downloaded freely at Cytoscape App (http://apps.cytoscape.org/apps/ufo) for non-commercial use.",2020-07-09 +30649200,Characterization and identification of long non-coding RNAs based on feature relationship.,"

Motivation

The significance of long non-coding RNAs (lncRNAs) in many biological processes and diseases has gained intense interests over the past several years. However, computational identification of lncRNAs in a wide range of species remains challenging; it requires prior knowledge of well-established sequences and annotations or species-specific training data, but the reality is that only a limited number of species have high-quality sequences and annotations.

Results

Here we first characterize lncRNAs in contrast to protein-coding RNAs based on feature relationship and find that the feature relationship between open reading frame length and guanine-cytosine (GC) content presents universally substantial divergence in lncRNAs and protein-coding RNAs, as observed in a broad variety of species. Based on the feature relationship, accordingly, we further present LGC, a novel algorithm for identifying lncRNAs that is able to accurately distinguish lncRNAs from protein-coding RNAs in a cross-species manner without any prior knowledge. As validated on large-scale empirical datasets, comparative results show that LGC outperforms existing algorithms by achieving higher accuracy, well-balanced sensitivity and specificity, and is robustly effective (>90% accuracy) in discriminating lncRNAs from protein-coding RNAs across diverse species that range from plants to mammals. To our knowledge, this study, for the first time, differentially characterizes lncRNAs and protein-coding RNAs based on feature relationship, which is further applied in computational identification of lncRNAs. Taken together, our study represents a significant advance in characterization and identification of lncRNAs and LGC thus bears broad potential utility for computational analysis of lncRNAs in a wide range of species.

Availability and implementation

LGC web server is publicly available at http://bigd.big.ac.cn/lgc/calculator. The scripts and data can be downloaded at http://bigd.big.ac.cn/biocode/tools/BT000004.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +26759061,Psmir: a database of potential associations between small molecules and miRNAs.,"miRNAs are key post-transcriptional regulators of many essential biological processes, and their dysregulation has been validated in almost all human cancers. Restoring aberrantly expressed miRNAs might be a novel therapeutics. Recently, many studies have demonstrated that small molecular compounds can affect miRNA expression. Thus, prediction of associations between small molecules and miRNAs is important for investigation of miRNA-targeted drugs. Here, we analyzed 39 miRNA-perturbed gene expression profiles, and then calculated the similarity of transcription responses between miRNA perturbation and drug treatment to predict drug-miRNA associations. At the significance level of 0.05, we obtained 6501 candidate associations between 1295 small molecules and 25 miRNAs, which included 624 FDA approved drugs. Finally, we constructed the Psmir database to store all potential associations and the related materials. In a word, Psmir served as a valuable resource for dissecting the biological significance in small molecules' effects on miRNA expression, which will facilitate developing novel potential therapeutic targets or treatments for human cancers. Psmir is supported by all major browsers, and is freely available at http://www.bio-bigdata.com/Psmir/.",2016-01-13 +28814974,Time-resolved transcriptome analysis and lipid pathway reconstruction of the oleaginous green microalga Monoraphidium neglectum reveal a model for triacylglycerol and lipid hyperaccumulation.,"

Background

Oleaginous microalgae are promising production hosts for the sustainable generation of lipid-based bioproducts and as bioenergy carriers such as biodiesel. Transcriptomics of the lipid accumulation phase, triggered efficiently by nitrogen starvation, is a valuable approach for the identification of gene targets for metabolic engineering.

Results

An explorative analysis of the detailed transcriptional response to different stages of nitrogen availability was performed in the oleaginous green alga Monoraphidium neglectum. Transcript data were correlated with metabolic data for cellular contents of starch and of different lipid fractions. A pronounced transcriptional down-regulation of photosynthesis became apparent in response to nitrogen starvation, whereas glucose catabolism was found to be up-regulated. An in-depth reconstruction and analysis of the pathways for glycerolipid, central carbon, and starch metabolism revealed that distinct transcriptional changes were generally found only for specific steps within a metabolic pathway. In addition to pathway analyses, the transcript data were also used to refine the current genome annotation. The transcriptome data were integrated into a database and complemented with data for other microalgae which were also subjected to nitrogen starvation. It is available at https://tdbmn.cebitec.uni-bielefeld.de.

Conclusions

Based on the transcriptional responses to different stages of nitrogen availability, a model for triacylglycerol and lipid hyperaccumulation is proposed, which involves transcriptional induction of thioesterases, differential regulation of lipases, and a re-routing of the central carbon metabolism. Over-expression of distinct thioesterases was identified to be a potential strategy to increase the oleaginous phenotype of M. neglectum, and furthermore specific lipases were identified as potential targets for future metabolic engineering approaches.",2017-08-14 +31698618,Inferring the three-dimensional structures of the X-chromosome during X-inactivation.,"The Hi-C experiment can capture the genome-wide spatial proximities of the DNA, based on which it is possible to computationally reconstruct the three-dimensional (3D) structures of chromosomes. The transcripts of the long non-coding RNA (lncRNA) Xist spread throughout the entire X-chromosome and alter the 3D structure of the X-chromosome, which also inactivates one copy of the two X-chromosomes in a cell. The Hi-C experiments are expensive and time-consuming to conduct, but the Hi-C data of the active and inactive X-chromosomes are available. However, the Hi-C data of the X-chromosome during the process of X-chromosome inactivation (XCI) are not available. Therefore, the 3D structure of the X-chromosome during the process of X-chromosome inactivation (XCI) remains to be unknown. We have developed a new approach to reconstruct the 3D structure of the X-chromosome during XCI, in which the chain of DNA beads representing a chromosome is stored and simulated inside a 3D cubic lattice. A 2D Gaussian function is used to model the zero values in the 2D Hi-C contact matrices. By applying simulated annealing and Metropolis-Hastings simulations, we first generated the 3D structures of the X-chromosome before and after XCI. Then, we used Xist localization intensities on the X-chromosome (RAP data) to model the traveling speeds or acceleration between all bead pairs during the process of XCI. The 3D structures of the X-chromosome at 3 hours, 6 hours, and 24 hours after the start of the Xist expression, which initiates the XCI process, have been reconstructed. The source code and the reconstructed 3D structures of the X-chromosome can be downloaded from http://dna.cs.miami.edu/3D-XCI/.",2019-08-01 +25805861,SRD: a Staphylococcus regulatory RNA database.,"An overflow of regulatory RNAs (sRNAs) was identified in a wide range of bacteria. We designed and implemented a new resource for the hundreds of sRNAs identified in Staphylococci, with primary focus on the human pathogen Staphylococcus aureus. The ""Staphylococcal Regulatory RNA Database"" (SRD, http://srd.genouest.org/) compiled all published data in a single interface including genetic locations, sequences and other features. SRD proposes novel and simplified identifiers for Staphylococcal regulatory RNAs (srn) based on the sRNA's genetic location in S. aureus strain N315 which served as a reference. From a set of 894 sequences and after an in-depth cleaning, SRD provides a list of 575 srn exempt of redundant sequences. For each sRNA, their experimental support(s) is provided, allowing the user to individually assess their validity and significance. RNA-seq analysis performed on strains N315, NCTC8325, and Newman allowed us to provide further details, upgrade the initial annotation, and identified 159 RNA-seq independent transcribed sRNAs. The lists of 575 and 159 sRNAs sequences were used to predict the number and location of srns in 18 S. aureus strains and 10 other Staphylococci. A comparison of the srn contents within 32 Staphylococcal genomes revealed a poor conservation between species. In addition, sRNA structure predictions obtained with MFold are accessible. A BLAST server and the intaRNA program, which is dedicated to target prediction, were implemented. SRD is the first sRNA database centered on a genus; it is a user-friendly and scalable device with the possibility to submit new sequences that should spread in the literature.",2015-03-24 +32793638,Prognostic Value of N-Terminal Pro-B-Type Natriuretic Peptide and Glomerular Filtration Rate in Patients With Acute Heart Failure.,"Aims: To investigate the relationship between N-terminal pro-B-type natriuretic peptide (NT-proBNP), Glomerular Filtration Rate (GFR), and outcomes in patients hospitalized with acute heart failure (AHF). Methods: The trial was registered at http://www.chictr.org/cn/. (ChiCTR - ONC - 12001944). A total of 493 patients hospitalized for AHF in cardiology department of the First Affiliated Hospital of Nanjing Medical University from March 2012 to October 2016 were enrolled into registry. The end event was the occurrence of all-cause death within an 18-month follow-up. The data collected from the participants in admission were used to calculate the GFR by chronic kidney disease epidemiology collaboration equation (CKD-EPI) and performed the according statistical analysis. Results: There were 74 participants (13.8%) dropped out and 91 (21.7%) passed away within the 18-month follow up. Comparison of clinical indicators between survival and death group were analyzed for the long-term prognosis of patients with AHF. In the single factor analysis, both NT-proBNP and GFR were statistically significant (P < 0.001). Combined NT-proBNP and GFR in multi-factor COX regression analysis showed significant predictive value (P < 0.001). In receiver operator characteristics (ROC) analyses, the area under the curves (AUC) for NT-proBNP was 0.648 [95%CI: 0.598-0.695, P < 0.001] and for GFR was 0.677 [95%CI: 0.627-0.723, P < 0.001]. According to the Youden index, the best prediction point of NT-proBNP was 2,137 pg/ml and GFR was 61.7 ml/(min·1.73 m2). After using the Binary Logistic Regression to combine the two indicators, the AUC was 0.711, which was significantly compared to the AUC of either single factor. The sensitivity of the combined indicators were 0.535, the specificity were 0.853. According to the cut-off point, these two indexes were separated into four groups for further analysis by Kaplan-Meier survival curve comparison (log-rank test), which showed that patients in the group with higher NT-proBNP and lower GFR had the worst prognosis. Conclusions: In patients with NT-proBNP > 2,137 pg/ml and GFR < 61.7 ml/(min·1.73 m2), the risk of death was significantly higher. The combination of GFR and NT-proBNP improved the predictive value for the long-term prognosis of AHF patients.",2020-07-21 +27643925,StemCellCKB: An Integrated Stem Cell-Specific Chemogenomics KnowledgeBase for Target Identification and Systems-Pharmacology Research.,"Given the capacity of self-renewal and multilineage differentiation, stem cells are promising sources for use in regenerative medicines as well as in the clinical treatment of certain hematological malignancies and degenerative diseases. Complex networks of cellular signaling pathways largely determine stem cell fate and function. Small molecules that modulate these pathways can provide important biological and pharmacological insights. However, it is still challenging to identify the specific protein targets of these compounds, to explore the changes in stem cell phenotypes induced by compound treatment and to ascertain compound mechanisms of action. To facilitate stem cell related small molecule study and provide a better understanding of the associated signaling pathways, we have constructed a comprehensive domain-specific chemogenomics resource, called StemCellCKB ( http://www.cbligand.org/StemCellCKB/ ). This new cloud-computing platform describes the chemical molecules, genes, proteins, and signaling pathways implicated in stem cell regulation. StemCellCKB is also implemented with web applications designed specifically to aid in the identification of stem cell relevant protein targets, including TargetHunter, a machine-learning algorithm for predicting small molecule targets based on molecular fingerprints, and HTDocking, a high-throughput docking module for target prediction and systems-pharmacology analyses. We have systematically tested StemCellCKB to verify data integrity. Target-prediction accuracy has also been validated against the reported known target/compound associations. This proof-of-concept example demonstrates that StemCellCKB can (1) accurately predict the macromolecular targets of existing stem cell modulators and (2) identify novel small molecules capable of probing stem cell signaling mechanisms, for use in systems-pharmacology studies. StemCellCKB facilitates the exploration and exchange of stem cell chemogenomics data among members of the broader research community.",2016-10-07 +,"Earth science data records of global forest cover and change: Assessment of accuracy in 1990, 2000, and 2005 epochs","The Global Land Cover Facility (GLCF) global forest-cover and -change dataset is a multi-temporal depiction of long-term (multi-decadal), global forest dynamics at high (30-m) resolution. Based on per-pixel estimates of percentage tree cover and their associated uncertainty, the dataset currently represents binary forest cover in nominal 1990, 2000, and 2005 epochs, as well as gains and losses over time. A comprehensive accuracy assessment of the GLCF dataset was performed using a global, design-based sample of 27,988 independent, visually interpreted reference points collected through a two-stage, stratified sampling design wherein experts visually identified forest cover and change in each of the 3 epochs based on Landsat and high-resolution satellite images, vegetation index profiles, and field photos. Consistent across epochs, the overall accuracy of the static forest-cover layers was 91%, and the overall accuracy of forest-cover change was >88% —among the highest accuracies reported for recent global forest- and land-cover data products. Both commission error (CE) and omission error (OE) were low for static forest cover in each epoch and for the stable classes between epochs (CE<3%, OE<22%), but errors were larger for forest loss (45%≤CE<62%, 47%10,000 proteins in total. Protein copy numbers revealed a specialization of immune cells for ligand and receptor expression, thereby connecting distinct immune functions. By integrating total and secreted proteomes, we discovered fundamental intercellular communication structures and previously unknown connections between cell types. Our publicly accessible (http://www.immprot.org/) proteomic resource provides a framework for the orchestration of cellular interplay and a reference for altered communication associated with pathology.",2017-03-06 +32640176,Toward an Individualized Neural Assessment of Receptive Language in Children.,"Purpose We aimed to develop a noninvasive neural test of language comprehension to use with nonspeaking children for whom standard behavioral testing is unreliable (e.g., minimally verbal autism). Our aims were threefold. First, we sought to establish the sensitivity of two auditory paradigms to elicit neural responses in individual neurotypical children. Second, we aimed to validate the use of a portable and accessible electroencephalography (EEG) system, by comparing its recordings to those of a research-grade system. Third, in light of substantial interindividual variability in individuals' neural responses, we assessed whether multivariate decoding methods could improve sensitivity. Method We tested the sensitivity of two child-friendly covert N400 paradigms. Thirty-one typically developing children listened to identical spoken words that were either strongly predicted by the preceding context or violated lexical-semantic expectations. Context was given by a cue word (Experiment 1) or sentence frame (Experiment 2), and participants either made an overall judgment on word relatedness or counted lexical-semantic violations. We measured EEG concurrently from a research-grade system, Neuroscan's SynAmps2, and an adapted gaming system, Emotiv's EPOC+. Results We found substantial interindividual variability in the timing and topology of N400-like effects. For both paradigms and EEG systems, traditional N400 effects at the expected sensors and time points were statistically significant in around 50% of individuals. Using multivariate analyses, detection rate increased to 88% of individuals for the research-grade system in the sentences paradigm, illustrating the robustness of this method in the face of interindividual variations in topography. Conclusions There was large interindividual variability in neural responses, suggesting interindividual variation in either the cognitive response to lexical-semantic violations and/or the neural substrate of that response. Around half of our neurotypical participants showed the expected N400 effect at the expected location and time points. A low-cost, accessible EEG system provided comparable data for univariate analysis but was not well suited to multivariate decoding. However, multivariate analyses with a research-grade EEG system increased our detection rate to 88% of individuals. This approach provides a strong foundation to establish a neural index of language comprehension in children with limited communication. Supplemental Material https://doi.org/10.23641/asha.12606311.",2020-07-08 +31881982,SpliceFinder: ab initio prediction of splice sites using convolutional neural network.,"BACKGROUND:Identifying splice sites is a necessary step to analyze the location and structure of genes. Two dinucleotides, GT and AG, are highly frequent on splice sites, and many other patterns are also on splice sites with important biological functions. Meanwhile, the dinucleotides occur frequently at the sequences without splice sites, which makes the prediction prone to generate false positives. Most existing tools select all the sequences with the two dimers and then focus on distinguishing the true splice sites from those pseudo ones. Such an approach will lead to a decrease in false positives; however, it will result in non-canonical splice sites missing. RESULT:We have designed SpliceFinder based on convolutional neural network (CNN) to predict splice sites. To achieve the ab initio prediction, we used human genomic data to train our neural network. An iterative approach is adopted to reconstruct the dataset, which tackles the data unbalance problem and forces the model to learn more features of splice sites. The proposed CNN obtains the classification accuracy of 90.25%, which is 10% higher than the existing algorithms. The method outperforms other existing methods in terms of area under receiver operating characteristics (AUC), recall, precision, and F1 score. Furthermore, SpliceFinder can find the exact position of splice sites on long genomic sequences with a sliding window. Compared with other state-of-the-art splice site prediction tools, SpliceFinder generates results in about half lower false positive while keeping recall higher than 0.8. Also, SpliceFinder captures the non-canonical splice sites. In addition, SpliceFinder performs well on the genomic sequences of Drosophila melanogaster, Mus musculus, Rattus, and Danio rerio without retraining. CONCLUSION:Based on CNN, we have proposed a new ab initio splice site prediction tool, SpliceFinder, which generates less false positives and can detect non-canonical splice sites. Additionally, SpliceFinder is transferable to other species without retraining. The source code and additional materials are available at https://gitlab.deepomics.org/wangruohan/SpliceFinder.",2019-12-27 +31699079,ThalPred: a web-based prediction tool for discriminating thalassemia trait and iron deficiency anemia.,"BACKGROUND:The hypochromic microcytic anemia (HMA) commonly found in Thailand are iron deficiency anemia (IDA) and thalassemia trait (TT). Accurate discrimination between IDA and TT is an important issue and better methods are urgently needed. Although considerable RBC formulas and indices with various optimal cut-off values have been developed, distinguishing between IDA and TT is still a challenging problem due to the diversity of various anemic populations. To address this problem, it is desirable to develop an improved and automated prediction model for discriminating IDA from TT. METHODS:We retrospectively collected laboratory data of HMA found in Thai adults. Five machine learnings, including k-nearest neighbor (k-NN), decision tree, random forest (RF), artificial neural network (ANN) and support vector machine (SVM), were applied to construct a discriminant model. Performance was assessed and compared with thirteen existing discriminant formulas and indices. RESULTS:The data of 186 patients (146 patients with TT and 40 with IDA) were enrolled. The interpretable rules derived from the RF model were proposed to demonstrate the combination of RBC indices for discriminating IDA from TT. A web-based tool 'ThalPred' was implemented using an SVM model based on seven RBC parameters. ThalPred achieved prediction results with an external accuracy, MCC and AUC of 95.59, 0.87 and 0.98, respectively. CONCLUSION:ThalPred and an interpretable rule were provided for distinguishing IDA from TT. For the convenience of health care team experimental scientists, a web-based tool has been established at http://codes.bio/thalpred/ by which users can easily get their desired screening test result without the need to go through the underlying mathematical and computational details.",2019-11-07 +29454313,StructRNAfinder: an automated pipeline and web server for RNA families prediction.,"

Background

The function of many noncoding RNAs (ncRNAs) depend upon their secondary structures. Over the last decades, several methodologies have been developed to predict such structures or to use them to functionally annotate RNAs into RNA families. However, to fully perform this analysis, researchers should utilize multiple tools, which require the constant parsing and processing of several intermediate files. This makes the large-scale prediction and annotation of RNAs a daunting task even to researchers with good computational or bioinformatics skills.

Results

We present an automated pipeline named StructRNAfinder that predicts and annotates RNA families in transcript or genome sequences. This single tool not only displays the sequence/structural consensus alignments for each RNA family, according to Rfam database but also provides a taxonomic overview for each assigned functional RNA. Moreover, we implemented a user-friendly web service that allows researchers to upload their own nucleotide sequences in order to perform the whole analysis. Finally, we provided a stand-alone version of StructRNAfinder to be used in large-scale projects. The tool was developed under GNU General Public License (GPLv3) and is freely available at http://structrnafinder.integrativebioinformatics.me .

Conclusions

The main advantage of StructRNAfinder relies on the large-scale processing and integrating the data obtained by each tool and database employed along the workflow, of which several files are generated and displayed in user-friendly reports, useful for downstream analyses and data exploration.",2018-02-17 +30763315,IRIS-EDA: An integrated RNA-Seq interpretation system for gene expression data analysis.,"Next-Generation Sequencing has made available substantial amounts of large-scale Omics data, providing unprecedented opportunities to understand complex biological systems. Specifically, the value of RNA-Sequencing (RNA-Seq) data has been confirmed in inferring how gene regulatory systems will respond under various conditions (bulk data) or cell types (single-cell data). RNA-Seq can generate genome-scale gene expression profiles that can be further analyzed using correlation analysis, co-expression analysis, clustering, differential gene expression (DGE), among many other studies. While these analyses can provide invaluable information related to gene expression, integration and interpretation of the results can prove challenging. Here we present a tool called IRIS-EDA, which is a Shiny web server for expression data analysis. It provides a straightforward and user-friendly platform for performing numerous computational analyses on user-provided RNA-Seq or Single-cell RNA-Seq (scRNA-Seq) data. Specifically, three commonly used R packages (edgeR, DESeq2, and limma) are implemented in the DGE analysis with seven unique experimental design functionalities, including a user-specified design matrix option. Seven discovery-driven methods and tools (correlation analysis, heatmap, clustering, biclustering, Principal Component Analysis (PCA), Multidimensional Scaling (MDS), and t-distributed Stochastic Neighbor Embedding (t-SNE)) are provided for gene expression exploration which is useful for designing experimental hypotheses and determining key factors for comprehensive DGE analysis. Furthermore, this platform integrates seven visualization tools in a highly interactive manner, for improved interpretation of the analyses. It is noteworthy that, for the first time, IRIS-EDA provides a framework to expedite submission of data and results to NCBI's Gene Expression Omnibus following the FAIR (Findable, Accessible, Interoperable and Reusable) Data Principles. IRIS-EDA is freely available at http://bmbl.sdstate.edu/IRIS/.",2019-02-14 +29969759,A Simple and Cheap Hospitalization Risk Assessment Tool for Use in Hemodialysis Patients.,"

Objective

To develop a simple, objective, cheap scoring tool incorporating nutritional parameters and other variables to predict hospitalization and mortality among hemodialysis patients - a tool that could be utilized in low resource countries.

Methods

The following variables were scored according to severity into 0, 1, 2 or 3: BMI, functional capacity, HD vintage in years, serum albumin, serum ferritin, and the number of comorbid conditions (diabetes mellitus, hypertension, ischemic heart disease, cerebrovascular disease). This tool was evaluated on our regular hemodialysis patients who were followed up for 24 months (June 2015 till July 2017). In our study population, the maximum score recorded was 12; accordingly, a score of 6 was used to differentiate between a low-risk group (score < 6) or a high-risk group (score ≥6). The 2 groups were compared (using the Chi square test) for possible differences in mortality and hospitalization rates during the follow-up period.

Results

One hundred and forty adult hemodialysis patients were monitored over 2 years; 83 were males and 57 females; 59% of the patients had diabetes mellitus. Twenty-nine patients (30.7%) were found to be in the high-risk group and 111 (79.3%) in the low-risk group. The high-risk patients were almost one and a half times more likely to be hospitalized for vascular access issues than the low-risk group (p = 0.056) and 3 times more likely to be hospitalized for non-vascular access issues than the low-risk group (p = 0.0001). The mortality rate in the high-risk group was 3.1 times that in the low-risk group, but this was not statistically significant.

Conclusion

Using a simple and cheap assessment tool in hemodialysis patients, we have identified patients at high risk for hospitalization rates and mortality. Video Journal Club ""Cappuccino with Claudio Ronco"" at http://www.karger.com/?doi=490544.",2018-07-03 +29854899,An open data repository for steady state analysis of a 100-node electricity distribution network with moderate connection of renewable energy sources.,"The data of this article represent a real electricity distribution network on twenty kilovolts (20 kV) at medium voltage level of the Hellenic electricity distribution system [1]. This network has been chosen as suitable for smart grid analysis. It demonstrates moderate penetration of renewable sources and it has capability in part of time for reverse power flows. It is suitable for studies of load aggregation, storage, demand response. It represents a rural line of fifty-five kilometres (55 km) total length, a typical length for this type. It serves forty-five (45) medium to low voltage transformers and twenty-four (24) connections to photovoltaic plants. The total installed load capacity is twelve mega-volt-ampere (12 MVA), however the maximum observed load is lower. The data are ready to perform load flow simulation on Matpower [2] for the maximum observed load power on the half production for renewables. The simulation results and processed data for creating the source code are also provided on the database available at http://dx.doi.org/10.7910/DVN/1I6MKU.",2017-09-05 +30428005,"SeQuiLa: an elastic, fast and scalable SQL-oriented solution for processing and querying genomic intervals.","

Summary

Efficient processing of large-scale genomic datasets has recently become possible due to the application of 'big data' technologies in bioinformatics pipelines. We present SeQuiLa-a distributed, ANSI SQL-compliant solution for speedy querying and processing of genomic intervals that is available as an Apache Spark package. Proposed range join strategy is significantly (∼22×) faster than the default Apache Spark implementation and outperforms other state-of-the-art tools for genomic intervals processing.

Availability and implementation

The project is available at http://biodatageeks.org/sequila/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +31562822,The Clinical Pharmacogenetics Implementation Consortium: 10 Years Later.,"In 2009, the Clinical Pharmacogenetics Implementation Consortium (CPIC, www.cpicpgx.org), a shared project between Pharmacogenomics Knowledge Base (PharmGKB, http://www.pharmgkb.org) and the National Institutes of Health (NIH), was created to provide freely available, evidence-based, peer-reviewed, and updated pharmacogenetic clinical practice guidelines. To date, CPIC has published 23 guidelines (of which 11 have been updated), covering 19 genes and 46 drugs across several therapeutic areas. CPIC also now provides additional resources to facilitate the implementation of pharmacogenetics into routine clinical practice and the electronic health record. Furthermore, since its inception, CPIC's interactions with other resources, databases, websites, and genomic communities have grown. The purpose of this paper is to highlight the progress of CPIC over the past 10 years.",2019-11-05 +24225320,The Genome Database for Rosaceae (GDR): year 10 update.,"The Genome Database for Rosaceae (GDR, http:/www.rosaceae.org), the long-standing central repository and data mining resource for Rosaceae research, has been enhanced with new genomic, genetic and breeding data, and improved functionality. Whole genome sequences of apple, peach and strawberry are available to browse or download with a range of annotations, including gene model predictions, aligned transcripts, repetitive elements, polymorphisms, mapped genetic markers, mapped NCBI Rosaceae genes, gene homologs and association of InterPro protein domains, GO terms and Kyoto Encyclopedia of Genes and Genomes pathway terms. Annotated sequences can be queried using search interfaces and visualized using GBrowse. New expressed sequence tag unigene sets are available for major genera, and Pathway data are available through FragariaCyc, AppleCyc and PeachCyc databases. Synteny among the three sequenced genomes can be viewed using GBrowse_Syn. New markers, genetic maps and extensively curated qualitative/Mendelian and quantitative trait loci are available. Phenotype and genotype data from breeding projects and genetic diversity projects are also included. Improved search pages are available for marker, trait locus, genetic diversity and publication data. New search tools for breeders enable selection comparison and assistance with breeding decision making.",2013-11-12 +23185044,OikoBase: a genomics and developmental transcriptomics resource for the urochordate Oikopleura dioica.,"We report the development of OikoBase (http://oikoarrays.biology.uiowa.edu/Oiko/), a tiling array-based genome browser resource for Oikopleura dioica, a metazoan belonging to the urochordates, the closest extant group to vertebrates. OikoBase facilitates retrieval and mining of a variety of useful genomics information. First, it includes a genome browser which interrogates 1260 genomic sequence scaffolds and features gene, transcript and CDS annotation tracks. Second, we annotated gene models with gene ontology (GO) terms and InterPro domains which are directly accessible in the browser with links to their entries in the GO (http://www.geneontology.org/) and InterPro (http://www.ebi.ac.uk/interpro/) databases, and we provide transcript and peptide links for sequence downloads. Third, we introduce the transcriptomics of a comprehensive set of developmental stages of O. dioica at high resolution and provide downloadable gene expression data for all developmental stages. Fourth, we incorporate a BLAST tool to identify homologs of genes and proteins. Finally, we include a tutorial that describes how to use OikoBase as well as a link to detailed methods, explaining the data generation and analysis pipeline. OikoBase will provide a valuable resource for research in chordate development, genome evolution and plasticity and the molecular ecology of this important marine planktonic organism.",2012-11-26 +30940089,OMICfpp: a fuzzy approach for paired RNA-Seq counts.,"

Background

RNA sequencing is a widely used technology for differential expression analysis. However, the RNA-Seq do not provide accurate absolute measurements and the results can be different for each pipeline used. The major problem in statistical analysis of RNA-Seq and in the omics data in general, is the small sample size with respect to the large number of variables. In addition, experimental design must be taken into account and few tools consider it.

Results

We propose OMICfpp, a method for the statistical analysis of RNA-Seq paired design data. First, we obtain a p-value for each case-control pair using a binomial test. These p-values are aggregated using an ordered weighted average (OWA) with a given orness previously chosen. The aggregated p-value from the original data is compared with the aggregated p-value obtained using the same method applied to random pairs. These new pairs are generated using between-pairs and complete randomization distributions. This randomization p-value is used as a raw p-value to test the differential expression of each gene. The OMICfpp method is evaluated using public data sets of 68 sample pairs from patients with colorectal cancer. We validate our results through bibliographic search of the reported genes and using simulated data set. Furthermore, we compared our results with those obtained by the methods edgeR and DESeq2 for paired samples. Finally, we propose new target genes to validate these as gene expression signatures in colorectal cancer. OMICfpp is available at http://www.uv.es/ayala/software/OMICfpp_0.2.tar.gz .

Conclusions

Our study shows that OMICfpp is an accurate method for differential expression analysis in RNA-Seq data with paired design. In addition, we propose the use of randomized p-values pattern graphic as a powerful and robust method to select the target genes for experimental validation.",2019-04-02 +31638822,Mapping of Transglutaminase-2 Sites of Human Salivary Small Basic Proline-Rich Proteins by HPLC-High-Resolution ESI-MS/MS.,"Because of the distinctive features of the oral cavity, the determination of the proteins involved in the formation of the ""oral protein pellicle"" is demanding. The present study investigated the susceptibility of several human basic proline-rich peptides, named P-H, P-D, P-F, P-J, and II-2, as substrates of transglutaminase-2. The reactivity of the P-C peptide and statherin was also investigated. Peptides purified from human whole saliva were incubated with the enzyme in the presence or in the absence of monodansyl-cadaverine. Mass spectrometry analyses of the reaction products highlighted that P-H and P-D (P32 and A32 variants) were active substrates, II-2 was less reactive, and P-F and P-J showed very low reactivity. P-C and statherin were highly reactive. All of the peptides formed cyclo derivatives, and only specific glutamine residues were involved in the cycle formation and reacted with monodansyl-cadaverine: Q29 of P-H, Q37 of P-D, Q21 of II-2, Q41 of P-C, and Q37 of statherin were the principal reactive residues. One or two secondary glutamine residues of only P-H, P-D P32, P-C, and statherin were hierarchically susceptible to the reaction with monodansyl-cadaverine. MS and MS/MS data were deposited to the ProteomeXchange Consortium ( http://www.ebi.ac.uk/pride ) via the PRIDE partner repository with the data set identifier PXD014658.",2019-11-06 +31821015,"Methyl Tertiary-Butyl Ether Exposure from Gasoline in the U.S. Population, NHANES 2001-2012.","BACKGROUND:Methyl tertiary-butyl ether (MTBE) was used as a gasoline additive in the United States during 1995-2006. Because of concerns about potential exposure and health effects, some U.S. states began banning MTBE use in 2002, leading to a nationwide phaseout in 2006. OBJECTIVES:We investigated the change in blood MTBE that occurred during the years in which MTBE was being phased out of gasoline. METHODS:We used data from the National Health and Nutrition Examination Survey (NHANES) from 2001-2012 to assess the change in blood MTBE over this period. We fit sample-weighted multivariate linear regression models to 12,597 human blood MTBE concentrations from the NHANES 2001-2002 to 2011-2012 survey cycles. RESULTS:The unweighted proportion of the individuals with MTBE blood levels above the limit of detection (LOD) of 1.4 ng/L was 93.9% for 2001-2002. This portion dropped to 25.4% for the period 2011-2012. Weighted blood MTBE median levels (ng/L) (25th and 75th percentiles) decreased from 25.8 (6.08, 68.1) ng/L for the period from 2001-2002 to 4.57 (1.44, 19.1) ng/L for the period from 2005-2006. For the entire postban period (2007-2012), MTBE median levels were below the detection limit of 1.4 ng/L. DISCUSSION:These decreases in blood MTBE coincided with multiple statewide bans that began in 2002 and a nationwide ban in 2006. The multivariate log-linear regression model for the NHANES 2003-2004 data showed significantly higher blood MTBE concentrations in the group who pumped gasoline less than 7 h before questionnaire administration compared to those who pumped gasoline more than 12 h before questionnaire administration (p=0.032). This study is the first large-scale, national-level confirmation of substantial decrease in blood MTBE levels in the general population following the phaseout of the use of MTBE as a fuel additive. https://doi.org/10.1289/EHP5572.",2019-12-10 +32377730,Effect of VX‑765 on the transcriptome profile of mice spinal cords with acute injury.,"Previous studies have shown that caspase-1 plays an important role in the acute inflammatory response of spinal cord injury (SCI). VX‑765, a novel and irreversible caspase‑1 inhibitor, has been reported to effectively intervene in inflammation. However, the effect of VX‑765 on genome‑wide transcription in acutely injured spinal cords remains unknown. Therefore, in the present study, RNA‑sequencing (RNA‑Seq) was used to analyze the effect of VX‑765 on the local expression of gene transcription 8 h following injury. The differentially expressed genes (DEGs) underwent enrichment analysis of functions and pathways by Gene Ontology and Kyoto Encyclopedia of Genes and Genomes analyses, respectively. Parallel analysis of western blot confirmed that VX‑765 can effectively inhibit the expression and activation of caspase‑1. RNA‑Seq showed that VX‑765 treatment resulted in 1,137 upregulated and 1,762 downregulated DEGs. These downregulated DEGs and their associated signaling pathways, such as focal adhesion, cytokine‑cytokine receptor interaction, leukocyte transendothelial migration, extracellular matrix‑receptor interaction, phosphatidylinositol 3‑kinase‑protein kinase B, Rap1 and hypoxia inducible factor‑1 signaling pathway, are mainly associated with inflammatory response, local hypoxia, macrophage differentiation, adhesion migration and apoptosis of local cells. This suggests that the application of VX‑765 in the acute phase can improve the local microenvironment of SCI by inhibiting caspase‑1. However, whether VX‑765 can be used as a therapeutic drug for SCI requires further exploration. The sequence data have been deposited into the Sequence Read Archive (https://www.ncbi.nlm.nih.gov/sra/PRJNA548970).",2020-05-05 +30574787,"GlyConnect: Glycoproteomics Goes Visual, Interactive, and Analytical.","Knowledge of glycoproteins, their site-specific glycosylation patterns, and the glycan structures that they present to their recognition partners in health and disease is gradually being built on using a range of experimental approaches. The data from these analyses are increasingly being standardized and presented in various sources, from supplemental tables in publications to localized servers in investigator laboratories. Bioinformatics tools are now needed to collect these data and enable the user to search, display, and connect glycomics and glycoproteomics to other sources of related proteomics, genomics, and interactomics information. We here introduce GlyConnect ( https://glyconnect.expasy.org/ ), the central platform of the Glycomics@ExPASy portal for glycoinformatics. GlyConnect has been developed to gather, monitor, integrate, and visualize data in a user-friendly way to facilitate the interpretation of collected glycoscience data. GlyConnect is designed to accommodate and integrate multiple data types as they are increasingly produced.",2019-01-15 +30054214,The DrugPattern tool for drug set enrichment analysis and its prediction for beneficial effects of oxLDL on type 2 diabetes.,"Enrichment analysis methods, e.g., gene set enrichment analysis, represent one class of important bioinformatical resources for mining patterns in biomedical datasets. However, tools for inferring patterns and rules of a list of drugs are limited. In this study, we developed a web-based tool, DrugPattern, for drug set enrichment analysis. We first collected and curated 7019 drug sets, including indications, adverse reactions, targets, pathways, etc. from public databases. For a list of interested drugs, DrugPattern then evaluates the significance of the enrichment of these drugs in each of the 7019 drug sets. To validate DrugPattern, we employed it for the prediction of the effects of oxidized low-density lipoprotein (oxLDL), a factor expected to be deleterious. We predicted that oxLDL has beneficial effects on some diseases, most of which were supported by evidence in the literature. Because DrugPattern predicted the potential beneficial effects of oxLDL in type 2 diabetes (T2D), animal experiments were then performed to further verify this prediction. As a result, the experimental evidences validated the DrugPattern prediction that oxLDL indeed has beneficial effects on T2D in the case of energy restriction. These data confirmed the prediction accuracy of our approach and revealed unexpected protective roles for oxLDL in various diseases. This study provides a tool to infer patterns and rules in biomedical datasets based on drug set enrichment analysis. DrugPattern is available at http://www.cuilab.cn/drugpattern.",2018-07-24 +30053270,BioMuta and BioXpress: mutation and expression knowledgebases for cancer biomarker discovery.,"Single-nucleotide variation and gene expression of disease samples represent important resources for biomarker discovery. Many databases have been built to host and make available such data to the community, but these databases are frequently limited in scope and/or content. BioMuta, a database of cancer-associated single-nucleotide variations, and BioXpress, a database of cancer-associated differentially expressed genes and microRNAs, differ from other disease-associated variation and expression databases primarily through the aggregation of data across many studies into a single source with a unified representation and annotation of functional attributes. Early versions of these resources were initiated by pilot funding for specific research applications, but newly awarded funds have enabled hardening of these databases to production-level quality and will allow for sustained development of these resources for the next few years. Because both resources were developed using a similar methodology of integration, curation, unification, and annotation, we present BioMuta and BioXpress as allied databases that will facilitate a more comprehensive view of gene associations in cancer. BioMuta and BioXpress are hosted on the High-performance Integrated Virtual Environment (HIVE) server at the George Washington University at https://hive.biochemistry.gwu.edu/biomuta and https://hive.biochemistry.gwu.edu/bioxpress, respectively.",2018-01-01 +29036324,PancanQTL: systematic identification of cis-eQTLs and trans-eQTLs in 33 cancer types.,"Expression quantitative trait locus (eQTL) analysis, which links variations in gene expression to genotypes, is essential to understanding gene regulation and to interpreting disease-associated loci. Currently identified eQTLs are mainly in samples of blood and other normal tissues. However, no database comprehensively provides eQTLs in large number of cancer samples. Using the genotype and expression data of 9196 tumor samples in 33 cancer types from The Cancer Genome Atlas (TCGA), we identified 5 606 570 eQTL-gene pairs in the cis-eQTL analysis and 231 210 eQTL-gene pairs in the trans-eQTL analysis. We further performed survival analysis and identified 22 212 eQTLs associated with patient overall survival. Furthermore, we linked the eQTLs to genome-wide association studies (GWAS) data and identified 337 131 eQTLs that overlap with existing GWAS loci. We developed PancanQTL, a user-friendly database (http://bioinfo.life.hust.edu.cn/PancanQTL/), to store cis-eQTLs, trans-eQTLs, survival-associated eQTLs and GWAS-related eQTLs to enable searching, browsing and downloading. PancanQTL could help the research community understand the effects of inherited variants in tumorigenesis and development.",2018-01-01 +31318033,"Moderate Postmeal Walking Has No Beneficial Effects Over Resting on Postprandial Lipemia, Glycemia, Insulinemia, and Selected Oxidative and Inflammatory Parameters in Older Adults with a Cardiovascular Disease Risk Phenotype: A Randomized Crossover Trial.","

Background

Research suggests that postprandial events, as risk factors for cardiovascular diseases (CVDs), are influenced by meal composition and exercise.

Objectives

We investigated the effect of walking versus rest on postprandial metabolic, inflammatory, and oxidative events following the consumption of test meals reflecting 2 different dietary patterns in older adults with an increased CVD risk.

Methods

A randomized crossover trial was conducted in 26 men and women (aged 70 ± 5 y; BMI 30.3 ± 2.3 kg/m2). Each adult participated in 4 treatments combining 1 of 2 iso-energetic (4300 kJ) meals [Western diet high-fat meal (WD): total fat, 59.4 g; saturated fatty acids, 32.0 g, dietary fiber, 4.2 g; or Mediterranean-type diet meal (MD): total fat, 40.1 g; saturated fatty acids, 5.1 g; dietary fiber, 14.5 g] with 30 min walking (4.6 ± 0.1 km/h) or rest. Primary (serum triglycerides) and secondary [serum nonesterified fatty acids (NEFAs); parameters of glucose metabolism, inflammation, endothelial activation, oxidation; blood pressure/heart rate] outcomes were measured at fasting and 1.5, 3.0, and 4.5 h postprandially. Data were analyzed by linear mixed models.

Results

Triglycerides were higher after the WD than after the MD [AUC in mmol/L × min: Western diet high-fat meal plus postprandial walking (WD-W), 218 ± 15.2; Western diet high-fat meal plus postprandial resting (WD-R), 207 ± 12.6; Mediterranean-type diet meal plus postprandial walking (MD-W), 139 ± 9.83; Mediterranean-type diet meal plus postprandial resting (MD-R), 149 ± 8.15; P  < 0.001]. No meal or activity effect was observed for NEFAs based on AUC data (WD-W, -43.5 ± 7.08; WD-R, -49.2 ± 6.94; MD-W, -48.0 ± 11.6; MD-R, -67.6 ± 7.58). Plasma glucose was higher after the MD than after the WD (WD-W, 222 ± 34.9; WD-R, 177 ± 32.8; MD-W, 314 ± 44.4; MD-R, 275 ± 57.8; P  < 0.001), as was serum insulin (AUC in nmol/L × min: WD-W, 82.0 ± 10.3; WD-R, 88.6 ± 12.8; MD-W, 129 ± 14.7; MD-R, 138 ± 20.5; P < 0.001). Plasma IL-6 was higher after walking than after resting (AUC in pg/mL × min: WD-W, 72.0 ± 34.0; WD-R, 14.3 ± 38.8; MD-W, 70.8 ± 39.4; MD-R, 5.60 ± 26.0; P < 0.05). Plasma vitamin C was higher after the MD than after the WD (P < 0.001) and after walking than after resting (P < 0.05; AUC in mg/L × min: WD-W, -305 ± 59.6; WD-R, -396 ± 84.0; MD-W, 113 ± 56.4; MD-R, -44.5 ± 48.1). We observed no meal or activity effects on parameters of oxidation and endothelial adhesion molecules. Our data revealed no significant meal × activity effects on all outcomes.

Conclusions

In older adults with an increased CVD risk, the MD was associated with superior effects on several postprandial parameters (e.g., triglycerides), in comparison to the WD. Data revealed no relevant differences regarding the effects of postmeal walking and resting. None of the 4 treatments can be rated as superior regarding their acute effects on the shown postprandial metabolic, oxidative, and inflammatory parameters. The trial was registered at German Clinical Trials Register (DRKS; http://www.germanctr.de and http://www.drks.de) under identifier DRKS00012409.",2019-11-01 +29607245,Future of Health: Findings from a survey of stakeholders on the future of health and healthcare in England.,"This article presents findings from a survey conducted by RAND Europe at the request of the National Institute for Health Research (NIHR) to gather and synthesise stakeholder views on the future of health and healthcare in England in 20 to 30 years' time. The aim of the research was to generate an evidenced-based picture of the future health and healthcare needs, and how it might differ from today, in order to inform strategic discussions about the future priorities of the NIHR and the health and social care research communities more broadly. The survey provided a rich and varied dataset based on responses from 300 stakeholders in total. A wide range of fields were represented, including public health, social care, primary care, cancer, genomics, mental health, geriatrics, child health, patient advocacy and health policy. The respondent group also included a number of professional and private stakeholder categories, such as clinicians, policy experts, academics and patient and public representatives. The study findings validate a number of prominent health research priorities currently visible in England, such as antimicrobial resistance, the burden of dementia and age-related multi-morbidity, digital health and genomics. Interest in these areas and other themes, such as mental health, health inequalities and transforming health service models, cut across multiple disciplinary boundaries. However, it is clear that there are a variety of views among stakeholders on the relative importance of these areas of focus, and the best approach to manage their emergence in the coming decades. The full dataset of survey responses, for which permission to share was given, is a useful resource for those seeking to engage with a particular issue in more depth. The dataset can be found on NIHR's website at: http://nihr.ac.uk/news-and-events/documents/quotes.xls.",2018-04-01 +31633588,Widespread Pain Is Associated with Increased Risk of No Clinical Improvement After TKA in Women.,"

Background

When conservative treatments do not work, TKA may be the best option for patients with knee osteoarthritis, although a relatively large proportion of individuals do not have clinically important improvement after TKA. Evidence also suggests that women are less likely to benefit from TKA than men, but the reasons are unclear. Widespread pain disproportionately affects women and has been associated with worse outcomes after joint arthroplasty, yet it is unknown if the effect of widespread pain on TKA outcomes differs by patient gender.

Questions/purposes

(1) Does the association between widespread pain and no clinically important improvement in osteoarthritis-related pain and disability 2 years after TKA differ between men and women? (2) Does the use of pain medications 2 years after TKA differ between those with widespread pain and those without widespread pain before surgery?

Methods

Osteoarthritis Initiative (https://nda.nih.gov/oai/) study participants were followed annually from March 2005 until October 2015. Participants who underwent TKA up to the 7-year follow-up visit with pain/disability assessment at the protocol-planned visit before TKA and at the second planned annual visit after surgery were included in the analysis. Among 4796 study participants, 391 had a confirmed TKA, including 315 with pain/disability assessment at the protocol-planned visit before TKA. Overall, 95% of participants (298) had the required follow-up assessment; 5% (17) did not have follow-up data. Widespread pain was defined based on the modified American College of Rheumatology criteria. Symptoms were assessed using the WOMAC pain (range 0 to 20; higher score, more pain) and disability (range 0 to 68; higher score, more disability) scores, and the Knee Injury and Osteoarthritis Outcome Score for pain (range 0 to 100; higher score, less pain). Improvements in pain and disability were classified based on improvement from established clinically important differences (decrease in WOMAC pain ≥ 1.5; decrease in WOMAC disability ≥ 6.0; increase in Knee Injury and Osteoarthritis Outcome Score for pain ≥ 9). At baseline, more women presented with widespread pain than men (45% [84 of 184] versus 32% [36 of 114]). Probability and the relative risk (RR) of no clinically important improvement were estimated using a logistic regression analysis in which participants with widespread pain and those without were compared. The analyses were done for men and women separately, then adjusted for depression and baseline outcome scores.

Results

Among women, preoperative widespread pain was associated with an increased risk of no clinically important improvement 2 years after TKA, based on WOMAC pain scores (13.5% versus 4.6%; RR 2.93 [95% CI 1.18 to 7.30]; p = 0.02) and the Knee Injury and Osteoarthritis Outcome Score for pain (16.5% versus 4.9%; RR 3.39 [95% CI 1.34 to 8.59]; p = 0.02). Given the lower and upper limits of the confidence intervals, our data are compatible with a broad range of disparate associations between widespread pain and lack of clinically important improvement in WOMAC pain scores (RR 0.77 [95% CI 0.22 to 2.70]; p = 0.68) and the Knee Injury and Osteoarthritis Outcome Score for pain (RR 1.37 [95% CI 0.47 to 4.00]; p = 0.57) among men, as well as clinically important improvement in WOMAC disability scores among men (RR 0.72 [95% CI 0.20 to 2.55]; p = 0.61) and women (RR 1.98 [95% CI 0.92 to 4.26]; p = 0.08). Participants presenting with widespread pain before TKA were more likely than those without widespread pain to use medication for symptoms of knee osteoarthritis most days for at least 1 month 2 years after TKA (51% [61 of 120] versus 32% [57 of 178]; mean difference, 18.8 [95% CI 7.3 to 30.1]; p < 0.01).

Conclusions

Widespread pain before TKA was associated with an increased risk of no clinically important improvement in knee pain 2 years postoperatively among women. Because of the small number of men with widespread pain in the sample, the results for men were inconclusive. In clinical practice, screening TKA candidates for widespread pain may be useful, and expectations of surgical outcomes may need to be tempered if patients have a concurrent diagnosis of widespread pain. Future studies should include more men with widespread pain and investigate if treatment of widespread pain before or concurrent with TKA surgery may improve surgical outcomes.

Level of evidence

Level III, therapeutic study.",2020-07-01 +26638077,Insights into Sex Chromosome Evolution and Aging from the Genome of a Short-Lived Fish.,"The killifish Nothobranchius furzeri is the shortest-lived vertebrate that can be bred in the laboratory. Its rapid growth, early sexual maturation, fast aging, and arrested embryonic development (diapause) make it an attractive model organism in biomedical research. Here, we report a draft sequence of its genome that allowed us to uncover an intra-species Y chromosome polymorphism representing-in real time-different stages of sex chromosome formation that display features of early mammalian XY evolution ""in action."" Our data suggest that gdf6Y, encoding a TGF-β family growth factor, is the master sex-determining gene in N. furzeri. Moreover, we observed genomic clustering of aging-related genes, identified genes under positive selection, and revealed significant similarities of gene expression profiles between diapause and aging, particularly for genes controlling cell cycle and translation. The annotated genome sequence is provided as an online resource (http://www.nothobranchius.info/NFINgb).",2015-12-01 +29140473,JASPAR 2018: update of the open-access database of transcription factor binding profiles and its web framework.,"JASPAR (http://jaspar.genereg.net) is an open-access database of curated, non-redundant transcription factor (TF)-binding profiles stored as position frequency matrices (PFMs) and TF flexible models (TFFMs) for TFs across multiple species in six taxonomic groups. In the 2018 release of JASPAR, the CORE collection has been expanded with 322 new PFMs (60 for vertebrates and 262 for plants) and 33 PFMs were updated (24 for vertebrates, 8 for plants and 1 for insects). These new profiles represent a 30% expansion compared to the 2016 release. In addition, we have introduced 316 TFFMs (95 for vertebrates, 218 for plants and 3 for insects). This release incorporates clusters of similar PFMs in each taxon and each TF class per taxon. The JASPAR 2018 CORE vertebrate collection of PFMs was used to predict TF-binding sites in the human genome. The predictions are made available to the scientific community through a UCSC Genome Browser track data hub. Finally, this update comes with a new web framework with an interactive and responsive user-interface, along with new features. All the underlying data can be retrieved programmatically using a RESTful API and through the JASPAR 2018 R/Bioconductor package.",2018-01-01 +29126123,PedAM: a database for Pediatric Disease Annotation and Medicine.,"There is a significant number of children around the world suffering from the consequence of the misdiagnosis and ineffective treatment for various diseases. To facilitate the precision medicine in pediatrics, a database namely the Pediatric Disease Annotations & Medicines (PedAM) has been built to standardize and classify pediatric diseases. The PedAM integrates both biomedical resources and clinical data from Electronic Medical Records to support the development of computational tools, by which enables robust data analysis and integration. It also uses disease-manifestation (D-M) integrated from existing biomedical ontologies as prior knowledge to automatically recognize text-mined, D-M-specific syntactic patterns from 774 514 full-text articles and 8 848 796 abstracts in MEDLINE. Additionally, disease connections based on phenotypes or genes can be visualized on the web page of PedAM. Currently, the PedAM contains standardized 8528 pediatric disease terms (4542 unique disease concepts and 3986 synonyms) with eight annotation fields for each disease, including definition synonyms, gene, symptom, cross-reference (Xref), human phenotypes and its corresponding phenotypes in the mouse. The database PedAM is freely accessible at http://www.unimd.org/pedam/.",2018-01-01 +25217576,Improved rat genome gene prediction by integration of ESTs with RNA-Seq information.,"

Motivation

RNA-Seq (also called whole-transcriptome sequencing) is an emerging technology that uses the capabilities of next-generation sequencing to detect and quantify entire transcripts. One of its important applications is the improvement of existing genome annotations. RNA-Seq provides rapid, comprehensive and cost-effective tools for the discovery of novel genes and transcripts compared with expressed sequence tag (EST), which is instrumental in gene discovery and gene sequence determination. The rat is widely used as a laboratory disease model, but has a less well-annotated genome as compared with humans and mice. In this study, we incorporated deep RNA-Seq data from three rat tissues-bone marrow, brain and kidney-with EST data to improve the annotation of the rat genome.

Results

Our analysis identified 32 197 transcripts, including 13 461 known transcripts, 13 934 novel isoforms and 4802 new genes, which almost doubled the numbers of transcripts in the current public rat genome database (rn5). Comparisons of our predicted protein-coding gene sets with those in public datasets suggest that RNA-Seq significantly improves genome annotation and identifies novel genes and isoforms in the rat. Importantly, the large majority of novel genes and isoforms are supported by direct evidence of RNA-Seq experiments. These predicted genes were integrated into the Rat Genome Database (RGD) and can serve as an important resource for functional studies in the research community.

Availability and implementation

The predicted genes are available at http://rgd.mcw.edu.",2014-09-12 +29796670,HotSpot Wizard 3.0: web server for automated design of mutations and smart libraries based on sequence input information.,"HotSpot Wizard is a web server used for the automated identification of hotspots in semi-rational protein design to give improved protein stability, catalytic activity, substrate specificity and enantioselectivity. Since there are three orders of magnitude fewer protein structures than sequences in bioinformatic databases, the major limitation to the usability of previous versions was the requirement for the protein structure to be a compulsory input for the calculation. HotSpot Wizard 3.0 now accepts the protein sequence as input data. The protein structure for the query sequence is obtained either from eight repositories of homology models or is modeled using Modeller and I-Tasser. The quality of the models is then evaluated using three quality assessment tools-WHAT_CHECK, PROCHECK and MolProbity. During follow-up analyses, the system automatically warns the users whenever they attempt to redesign poorly predicted parts of their homology models. The second main limitation of HotSpot Wizard's predictions is that it identifies suitable positions for mutagenesis, but does not provide any reliable advice on particular substitutions. A new module for the estimation of thermodynamic stabilities using the Rosetta and FoldX suites has been introduced which prevents destabilizing mutations among pre-selected variants entering experimental testing. HotSpot Wizard is freely available at http://loschmidt.chemi.muni.cz/hotspotwizard.",2018-07-01 +31449403,CompScore: Boosting Structure-Based Virtual Screening Performance by Incorporating Docking Scoring Function Components into Consensus Scoring.,"Consensus scoring has become a commonly used strategy within structure-based virtual screening (VS) workflows with improved performance compared to those based in a single scoring function. However, no research has been devoted to analyze the worth of docking scoring functions components in consensus scoring. We implemented and tested a method that incorporates docking scoring functions components into the setting of high performance VS workflows. This method uses genetic algorithms for finding the combination of scoring components that maximizes the VS enrichment for any target. Our methodology was validated using a data set including ligands and decoys for 102 targets that have been widely used in VS validation studies. Results show that our approach outperforms other methods for all targets. It also boosts the initial enrichment performance of the traditional use of whole scoring functions in consensus scoring by an average of 45%. Our methodology showed to be outstandingly predictive when challenged to rescore external (previously unseen) data. Remarkably, CompScore was able not only to retain its performance after redocking with a different software, but also proved that the enrichment obtained was not artificial. CompScore is freely available at: http://bioquimio.udla.edu.ec/compscore/ .",2019-09-06 +29718313,"UNRES server for physics-based coarse-grained simulations and prediction of protein structure, dynamics and thermodynamics.","A server implementation of the UNRES package (http://www.unres.pl) for coarse-grained simulations of protein structures with the physics-based UNRES model, coined a name UNRES server, is presented. In contrast to most of the protein coarse-grained models, owing to its physics-based origin, the UNRES force field can be used in simulations, including those aimed at protein-structure prediction, without ancillary information from structural databases; however, the implementation includes the possibility of using restraints. Local energy minimization, canonical molecular dynamics simulations, replica exchange and multiplexed replica exchange molecular dynamics simulations can be run with the current UNRES server; the latter are suitable for protein-structure prediction. The user-supplied input includes protein sequence and, optionally, restraints from secondary-structure prediction or small x-ray scattering data, and simulation type and parameters which are selected or typed in. Oligomeric proteins, as well as those containing D-amino-acid residues and disulfide links can be treated. The output is displayed graphically (minimized structures, trajectories, final models, analysis of trajectory/ensembles); however, all output files can be downloaded by the user. The UNRES server can be freely accessed at http://unres-server.chem.ug.edu.pl.",2018-07-01 +29222504,RNAStructuromeDB: A genome-wide database for RNA structural inference.,"RNA plays important roles in almost every aspect of biology, and every aspect of RNA biology is influenced by its folding. This is a particularly important consideration in the era of high-throughput sequencing, when the discovery of novel transcripts far outpaces our knowledge of their functions. To gain a comprehensive picture of biology requires a structural framework for making functional inferences on RNA. To this end we have developed the RNA Structurome Database ( https://structurome.bb.iastate.edu ), a comprehensive repository of RNA secondary structural information that spans the entire human genome. Here, we compile folding information for every base pair of the genome that may be transcribed: coding, noncoding, and intergenic regions, as well as repetitive elements, telomeres, etc. This was done by fragmenting the GRCh38 reference genome into 154,414,320 overlapping sequence fragments and, for each fragment, calculating a set of metrics based on the sequence's folding properties. These data will facilitate a wide array of investigations: e.g. discovery of structured regulatory elements in differential gene expression data or noncoding RNA discovery, as well as allow genome-scale analyses of RNA folding.",2017-12-08 +24214966,DOOR 2.0: presenting operons and their functions through dynamic and integrated views.,"We have recently developed a new version of the DOOR operon database, DOOR 2.0, which is available online at http://csbl.bmb.uga.edu/DOOR/ and will be updated on a regular basis. DOOR 2.0 contains genome-scale operons for 2072 prokaryotes with complete genomes, three times the number of genomes covered in the previous version published in 2009. DOOR 2.0 has a number of new features, compared with its previous version, including (i) more than 250,000 transcription units, experimentally validated or computationally predicted based on RNA-seq data, providing a dynamic functional view of the underlying operons; (ii) an integrated operon-centric data resource that provides not only operons for each covered genome but also their functional and regulatory information such as their cis-regulatory binding sites for transcription initiation and termination, gene expression levels estimated based on RNA-seq data and conservation information across multiple genomes; (iii) a high-performance web service for online operon prediction on user-provided genomic sequences; (iv) an intuitive genome browser to support visualization of user-selected data; and (v) a keyword-based Google-like search engine for finding the needed information intuitively and rapidly in this database.",2013-11-07 +30596886,Multiview: a software package for multiview pattern recognition methods.,"

Summary

Multiview datasets are the norm in bioinformatics, often under the label multi-omics. Multiview data are gathered from several experiments, measurements or feature sets available for the same subjects. Recent studies in pattern recognition have shown the advantage of using multiview methods of clustering and dimensionality reduction; however, none of these methods are readily available to the extent of our knowledge. Multiview extensions of four well-known pattern recognition methods are proposed here. Three multiview dimensionality reduction methods: multiview t-distributed stochastic neighbour embedding, multiview multidimensional scaling and multiview minimum curvilinearity embedding, as well as a multiview spectral clustering method. Often they produce better results than their single-view counterparts, tested here on four multiview datasets.

Availability and implementation

R package at the B2SLab site: http://b2slab.upc.edu/software-and-tutorials/ and Python package: https://pypi.python.org/pypi/multiview.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +31418539,Chromosomal level assembly and population sequencing of the Chinese tree shrew genome.,"Chinese tree shrews (Tupaia belangeri chinensis) have become an increasingly important experimental animal in biomedical research due to their close relationship to primates. An accurately sequenced and assembled genome is essential for understanding the genetic features and biology of this animal. In this study, we used long-read single-molecule sequencing and high-throughput chromosome conformation capture (Hi-C) technology to obtain a high-qualitychromosome-scale scaffolding of the Chinese tree shrew genome. The new reference genome (KIZ version 2: TS_2.0) resolved problems in presently available tree shrew genomes and enabled accurate identification of large and complex repeat regions, gene structures, and species-specific genomic structural variants. In addition, by sequencing the genomes of six Chinese tree shrew individuals, we produced a comprehensive map of 12.8 M single nucleotide polymorphisms and confirmed that the major histocompatibility complex (MHC) loci and immunoglobulin gene family exhibited high nucleotide diversity in the tree shrew genome. We updated the tree shrew genome database (TreeshrewDB v2.0: http://www.treeshrewdb.org) to include the genome annotation information and genetic variations. The new high-quality reference genome of the Chinese tree shrew and the updated TreeshrewDB will facilitate the use of this animal in many different fields of research.",2019-11-01 +31689386,"Single-Cell Profiling Reveals Sex, Lineage, and Regional Diversity in the Mouse Kidney.","Chronic kidney disease affects 10% of the population with notable differences in ethnic and sex-related susceptibility to kidney injury and disease. Kidney dysfunction leads to significant morbidity and mortality and chronic disease in other organ systems. A mouse-organ-centered understanding underlies rapid progress in human disease modeling and cellular approaches to repair damaged systems. To enhance an understanding of the mammalian kidney, we combined anatomy-guided single-cell RNA sequencing of the adult male and female mouse kidney with in situ expression studies and cell lineage tracing. These studies reveal cell diversity and marked sex differences, distinct organization and cell composition of nephrons dependent on the time of nephron specification, and lineage convergence, in which contiguous functionally related cell types are specified from nephron and collecting system progenitor populations. A searchable database, Kidney Cell Explorer (https://cello.shinyapps.io/kidneycellexplorer/), enables gene-cell relationships to be viewed in the anatomical framework of the kidney.",2019-11-01 +30102367,Review and comparative assessment of similarity-based methods for prediction of drug-protein interactions in the druggable human proteome.,"Drug-protein interactions (DPIs) underlie the desired therapeutic actions and the adverse side effects of a significant majority of drugs. Computational prediction of DPIs facilitates research in drug discovery, characterization and repurposing. Similarity-based methods that do not require knowledge of protein structures are particularly suitable for druggable genome-wide predictions of DPIs. We review 35 high-impact similarity-based predictors that were published in the past decade. We group them based on three types of similarities and their combinations that they use. We discuss and compare key aspects of these methods including source databases, internal databases and their predictive models. Using our novel benchmark database, we perform comparative empirical analysis of predictive performance of seven types of representative predictors that utilize each type of similarity individually and all possible combinations of similarities. We assess predictive quality at the database-wide DPI level and we are the first to also include evaluation over individual drugs. Our comprehensive analysis shows that predictors that use more similarity types outperform methods that employ fewer similarities, and that the model combining all three types of similarities secures area under the receiver operating characteristic curve of 0.93. We offer a comprehensive analysis of sensitivity of predictive performance to intrinsic and extrinsic characteristics of the considered predictors. We find that predictive performance is sensitive to low levels of similarities between sequences of the drug targets and several extrinsic properties of the input drug structures, drug profiles and drug targets. The benchmark database and a webserver for the seven predictors are freely available at http://biomine.cs.vcu.edu/servers/CONNECTOR/.",2019-11-01 +31649832,Machine learning classification models for fetal skeletal development performance prediction using maternal bone metabolic proteins in goats.,"

Background

In developing countries, maternal undernutrition is the major intrauterine environmental factor contributing to fetal development and adverse pregnancy outcomes. Maternal nutrition restriction (MNR) in gestation has proven to impact overall growth, bone development, and proliferation and metabolism of mesenchymal stem cells in offspring. However, the efficient method for elucidation of fetal bone development performance through maternal bone metabolic biochemical markers remains elusive.

Methods

We adapted goats to elucidate fetal bone development state with maternal serum bone metabolic proteins under malnutrition conditions in mid- and late-gestation stages. We used the experimental data to create 72 datasets by mixing different input features such as one-hot encoding of experimental conditions, metabolic original data, experimental-centered features and experimental condition probabilities. Seven Machine Learning methods have been used to predict six fetal bone parameters (weight, length, and diameter of femur/humerus).

Results

The results indicated that MNR influences fetal bone development (femur and humerus) and fetal bone metabolic protein levels (C-terminal telopeptides of collagen I, CTx, in middle-gestation and N-terminal telopeptides of collagen I, NTx, in late-gestation), and maternal bone metabolites (low bone alkaline phosphatase, BALP, in middle-gestation and high BALP in late-gestation). The results show the importance of experimental conditions (ECs) encoding by mixing the information with the serum metabolic data. The best classification models obtained for femur weight (Fw) and length (FI), and humerus weight (Hw) are Support Vector Machines classifiers with the leave-one-out cross-validation accuracy of 1. The rest of the accuracies are 0.98, 0.946 and 0.696 for the diameter of femur (Fd), diameter and length of humerus (Hd, Hl), respectively. With the feature importance analysis, the moving averages mixed ECs are generally more important for the majority of the models. The moving average of parathyroid hormone (PTH) within nutritional conditions (MA-PTH-experim) is important for Fd, Hd and Hl prediction models but its removal for enhancing the Fw, Fl and Hw model performance. Further, using one feature models, it is possible to obtain even more accurate models compared with the feature importance analysis models. In conclusion, the machine learning is an efficient method to confirm the important role of PTH and BALP mixed with nutritional conditions for fetal bone growth performance of goats. All the Python scripts including results and comments are available into an open repository at https://gitlab.com/muntisa/goat-bones-machine-learning.",2019-10-18 +24098943,Advances in the REDCAT software package.,"

Background

Residual Dipolar Couplings (RDCs) have emerged in the past two decades as an informative source of experimental restraints for the study of structure and dynamics of biological macromolecules and complexes. The REDCAT software package was previously introduced for the analysis of molecular structures using RDC data. Here we report additional features that have been included in this software package in order to expand the scope of its analyses. We first discuss the features that enhance REDCATs user-friendly nature, such as the integration of a number of analyses into one single operation and enabling convenient examination of a structural ensemble in order to identify the most suitable structure. We then describe the new features which expand the scope of RDC analyses, performing exercises that utilize both synthetic and experimental data to illustrate and evaluate different features with regard to structure refinement and structure validation.

Results

We establish the seamless interaction that takes place between REDCAT, VMD, and Xplor-NIH in demonstrations that utilize our newly developed REDCAT-VMD and XplorGUI interfaces. These modules enable visualization of RDC analysis results on the molecular structure displayed in VMD and refinement of structures with Xplor-NIH, respectively. We also highlight REDCAT's Error-Analysis feature in reporting the localized fitness of a structure to RDC data, which provides a more effective means of recognizing local structural anomalies. This allows for structurally sound regions of a molecule to be identified, and for any refinement efforts to be focused solely on locally distorted regions.

Conclusions

The newly engineered REDCAT software package, which is available for download via the WWW from http://ifestos.cse.sc.edu, has been developed in the Object Oriented C++ environment. Our most recent enhancements to REDCAT serve to provide a more complete RDC analysis suite, while also accommodating a more user-friendly experience, and will be of great interest to the community of researchers and developers since it hides the complications of software development.",2013-10-07 +31808705,Blood Lead Levels and Risk of Atherosclerosis in the Carotid Artery: Results from a Swedish Cohort.,"BACKGROUND:Lead exposure has been associated with increased incidence of adverse clinical cardiovascular outcomes. Atherosclerosis has been suggested as one of the underlying mechanisms, and findings from experimental studies support this, but human data are scarce. OBJECTIVES:Our objective was to determine the association between environmental lead exposure based on blood lead (B-Pb) concentrations and the prevalence of atherosclerotic plaque in the carotid artery. METHODS:We used cross-sectional data from the Malmö Diet and Cancer Study cardiovascular cohort (MDCS-CC; recruitment in 1991-1994) covering 4,172 middle-aged men and women. B-Pb at baseline, measured by inductively coupled plasma mass spectrometry, was used as the exposure biomarker. The presence of atherosclerotic plaque in the carotid artery was determined by B-mode ultrasonography. We used logistic regression to estimate odds ratios (ORs) for prevalence of plaque in the carotid artery according to B-Pb quartiles. RESULTS:The median B-Pb was 25μg/L (range: 1.5-258), and 36% of the cohort had any atherosclerotic plaque. After controlling for confounders and known cardiovascular risk factors, the OR for prevalence of plaque in the highest quartile (Q4) of B-Pb compared with the lowest quartile (Q1) was 1.35 (95% CI: 1.09, 1.66) in the total group, 1.58 (95% CI: 1.20, 2.08) among women, and 1.18 (95% CI: 0.83, 1.69) among men. Among women, associations were limited to those who were postmenopausal [OR for Q4 vs. Q1=1.72 (95% CI: 1.26, 2.34) vs. OR=0.96 (95% CI: 0.49, 1.89 in premenopausal women)]. Associations were weak and nonsignificant in never-smokers [OR for Q4 vs. Q1=1.14 (95% CI: 0.81, 1.61)]. DISCUSSION:Our study shows an association between B-Pb concentrations and occurrence of atherosclerotic plaque in the carotid artery, adding evidence for an underlying pro-atherogenic role of lead in cardiovascular disease. Associations appeared to be limited to postmenopausal (vs. premenopausal) women. https://doi.org/10.1289/EHP5057.",2019-12-06 +32470296,DICE: A Monte Carlo Code for Molecular Simulation Including the Configurational Bias Monte Carlo Method.,"Solute-solvent systems are an important topic of study, as the effects of the solvent on the solute can drastically change its properties. Theoretical studies of these systems are done with ab initio methods, molecular simulations, or a combination of both. The simulations of molecular systems are usually performed with either molecular dynamics (MD) or Monte Carlo (MC) methods. Classical MD has evolved much in the last decades, both in algorithms and implementations, having several stable and efficient codes developed and available. Similarly, MC methods have also evolved, focusing mainly in creating and improving methods and implementations in available codes. In this paper, we provide some enhancements to a configurational bias Monte Carlo (CBMC) methodology to simulate flexible molecules using the molecular fragments concept. In our implementation the acceptance criterion of the CBMC method was simplified and a generalization was proposed to allow the simulation of molecules with any kind of fragments. We also introduce the new version of DICE, an MC code for molecular simulation (available at https://portal.if.usp.br/dice). This code was mainly developed to simulate solute-solvent systems in liquid and gas phases and in interfaces (gas-liquid and solid-liquid) that has been mostly used to generate configurations for a sequential quantum mechanics/molecular mechanics method (S-QM/MM). This new version introduces several improvements over the previous ones, with the ability of simulating flexible molecules with CBMC as one of them. Simulations of well-known molecules, such as n-octane and 1,2-dichloroethane in vacuum and in solution, are presented to validate the new implementations compared with MD simulations, experimental data, and other theoretical results. The efficiency of the conformational sampling was analyzed using the acceptance rates of different alkanes: n-octane, neopentane, and 4-ethylheptane. Furthermore, a very complex molecule, boron subphtalocyanine, was simulated in vacuum and in aqueous solution showing the versatility of the new implementation. We show that the CBMC is a very good method to perform conformation sampling of complex moderately sized molecules (up to 150 atoms) in solution following the Boltzmann thermodynamic equilibrium distribution.",2020-06-18 +30445657,"The European Bioinformatics Institute in 2018: tools, infrastructure and training.","The European Bioinformatics Institute (https://www.ebi.ac.uk/) archives, curates and analyses life sciences data produced by researchers throughout the world, and makes these data available for re-use globally (https://www.ebi.ac.uk/). Data volumes continue to grow exponentially: total raw storage capacity now exceeds 160 petabytes, and we manage these increasing data flows while maintaining the quality of our services. This year we have improved the efficiency of our computational infrastructure and doubled the bandwidth of our connection to the worldwide web. We report two new data resources, the Single Cell Expression Atlas (https://www.ebi.ac.uk/gxa/sc/), which is a component of the Expression Atlas; and the PDBe-Knowledgebase (https://www.ebi.ac.uk/pdbe/pdbe-kb), which collates functional annotations and predictions for structure data in the Protein Data Bank. Additionally, Europe PMC (http://europepmc.org/) has added preprint abstracts to its search results, supplementing results from peer-reviewed publications. EMBL-EBI maintains over 150 analytical bioinformatics tools that complement our data resources. We make these tools available for users through a web interface as well as programmatically using application programming interfaces, whilst ensuring the latest versions are available for our users. Our training team, with support from all of our staff, continued to provide on-site, off-site and web-based training opportunities for thousands of researchers worldwide this year.",2019-01-01 +31393553,"PeNGaRoo, a combined gradient boosting and ensemble learning framework for predicting non-classical secreted proteins.","

Motivation

Gram-positive bacteria have developed secretion systems to transport proteins across their cell wall, a process that plays an important role during host infection. These secretion mechanisms have also been harnessed for therapeutic purposes in many biotechnology applications. Accordingly, the identification of features that select a protein for efficient secretion from these microorganisms has become an important task. Among all the secreted proteins, 'non-classical' secreted proteins are difficult to identify as they lack discernable signal peptide sequences and can make use of diverse secretion pathways. Currently, several computational methods have been developed to facilitate the discovery of such non-classical secreted proteins; however, the existing methods are based on either simulated or limited experimental datasets. In addition, they often employ basic features to train the models in a simple and coarse-grained manner. The availability of more experimentally validated datasets, advanced feature engineering techniques and novel machine learning approaches creates new opportunities for the development of improved predictors of 'non-classical' secreted proteins from sequence data.

Results

In this work, we first constructed a high-quality dataset of experimentally verified 'non-classical' secreted proteins, which we then used to create benchmark datasets. Using these benchmark datasets, we comprehensively analyzed a wide range of features and assessed their individual performance. Subsequently, we developed a two-layer Light Gradient Boosting Machine (LightGBM) ensemble model that integrates several single feature-based models into an overall prediction framework. At this stage, LightGBM, a gradient boosting machine, was used as a machine learning approach and the necessary parameter optimization was performed by a particle swarm optimization strategy. All single feature-based LightGBM models were then integrated into a unified ensemble model to further improve the predictive performance. Consequently, the final ensemble model achieved a superior performance with an accuracy of 0.900, an F-value of 0.903, Matthew's correlation coefficient of 0.803 and an area under the curve value of 0.963, and outperforming previous state-of-the-art predictors on the independent test. Based on our proposed optimal ensemble model, we further developed an accessible online predictor, PeNGaRoo, to serve users' demands. We believe this online web server, together with our proposed methodology, will expedite the discovery of non-classically secreted effector proteins in Gram-positive bacteria and further inspire the development of next-generation predictors.

Availability and implementation

http://pengaroo.erc.monash.edu/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +32499230,Dialysis Regret: Prevalence and Correlates.,"

Background and objectives

Although some patients regret the decision to start dialysis, modifiable factors associated with regret have rarely been studied. We aimed to identify factors associated with patients' regret to initiate dialysis.

Design, setting, participants, & measurements

A 41-item questionnaire was administered to adult patients receiving maintenance dialysis in seven dialysis units located in Cleveland, Ohio, and its suburbs. Of the 450 patients asked to participate in the study, 423 agreed and 397 provided data on decisional regret. We used multivariable logistic regression to identify predictors of regret, which was assessed using a single item, ""Do you regret your decision to start dialysis?"" We report adjusted odd ratios (ORs) and 95% confidence intervals (95% CIs) for the following candidate predictors: knowledge of CKD, attitudes toward CKD treatment, and preference for end-of-life care.

Results

Eighty-two of 397 respondents (21%) reported decisional regret. There were no significant demographic correlates of regret. Regret was more common when patients reported choosing dialysis to please doctors or family members (OR, 2.34; 95% CI, 1.27 to 4.31; P<0.001). Patients who reported having a prognostic discussion about life expectancy with their doctors (OR, 0.42; 95% CI, 0.18 to 0.98; P=0.03) and those who had completed a living will (OR, 0.48; 95% CI, 0.25 to 0.95; P=0.03) were less likely to report regret with dialysis initiation.

Conclusions

Dialysis regret was common in this sample. Demographic factors (age, sex, marital status, race, or educational attainment) were not significantly associated with regret, but modifiable care processes were.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_06_09_CJN13781119.mp3.",2020-06-04 +25392410,RaftProt: mammalian lipid raft proteome database.,"RaftProt (http://lipid-raft-database.di.uq.edu.au/) is a database of mammalian lipid raft-associated proteins as reported in high-throughput mass spectrometry studies. Lipid rafts are specialized membrane microdomains enriched in cholesterol and sphingolipids thought to act as dynamic signalling and sorting platforms. Given their fundamental roles in cellular regulation, there is a plethora of information on the size, composition and regulation of these membrane microdomains, including a large number of proteomics studies. To facilitate the mining and analysis of published lipid raft proteomics studies, we have developed a searchable database RaftProt. In addition to browsing the studies, performing basic queries by protein and gene names, searching experiments by cell, tissue and organisms; we have implemented several advanced features to facilitate data mining. To address the issue of potential bias due to biochemical preparation procedures used, we have captured the lipid raft preparation methods and implemented advanced search option for methodology and sample treatment conditions, such as cholesterol depletion. Furthermore, we have identified a list of high confidence proteins, and enabled searching only from this list of likely bona fide lipid raft proteins. Given the apparent biological importance of lipid raft and their associated proteins, this database would constitute a key resource for the scientific community.",2014-11-11 +31676993,Caveolin-1 Expression Together with VEGF can be a Predictor for Lung Metastasis and Poor Prognosis in Osteosarcoma.,"Caveolin-1, the major protein component of caveolae, plays vital functions in tumorigenesis and metastasis. Previous evidence demonstrated the positive role of Caveolin-1 in the regulation of endothelial cell differentiation and the involvement of Caveolin-1 in vascular endothelial growth factor (VEGF) mediated angiogenesis. The correlation of Caveolin-1 expression and angiogenesis is not yet elucidated in osteosarcoma. This study aimed to investigate the expression levels of Caveolin-1 and VEGF in osteosarcoma and their associations with clinicopathological data. This study included 66 formalin-fixed and paraffin embedded osteosarcoma tissue samples. The expression levels of Caveolin-1 and VEGF were assessed by immunohistochemistry. Then associations with clinicopathological variables and the correlation between both markers were evaluated statistically. We also investigated the expression of Caveolin-1 and VEGF values in gene microarrays of osteosarcoma patients and cell lines by using GEO data sets on https://www.ncbi.nlm.nih.gov. Caveolin-1 and VEGF were expressed in 19.6% and 77.3%, respectively. Caveolin-1 expression was associated positively with osteoblastic histological subtype (P < 0.0001). VEGF expression showed positive association with patient age, histological grade and clinical stage (P = 0.031, P = 0.024 and P < 0.001; respectively). An inverse correlation between Caveolin-1 and VEGF expressions in osteosarcoma was found (r = 0.2 P = 0.04). In silico analysis of Caveolin-1 and VEGF expression supported our results. Our results suggest that Caveolin-1 may act as a tumor suppressor in osteosarcoma. Down-regulation of Caveolin-1 can be used as an indicator for poor prognosis in osteosarcoma patients. Meanwhile, overexpression of VEGF is a predictor of pulmonary metastasis and poor prognosis.",2019-11-01 +24564786,Fungal plant cell wall-degrading enzyme database: a platform for comparative and evolutionary genomics in fungi and Oomycetes.,"

Background

Plant cell wall-degrading enzymes (PCWDEs) play significant roles throughout the fungal life including acquisition of nutrients and decomposition of plant cell walls. In addition, many of PCWDEs are also utilized by biofuel and pulp industries. In order to develop a comparative genomics platform focused in fungal PCWDEs and provide a resource for evolutionary studies, Fungal PCWDE Database (FPDB) is constructed (http://pcwde.riceblast.snu.ac.kr/).

Results

In order to archive fungal PCWDEs, 22 sequence profiles were constructed and searched on 328 genomes of fungi, Oomycetes, plants and animals. A total of 6,682 putative genes encoding PCWDEs were predicted, showing differential distribution by their life styles, host ranges and taxonomy. Genes known to be involved in fungal pathogenicity, including polygalacturonase (PG) and pectin lyase, were enriched in plant pathogens. Furthermore, crop pathogens had more PCWDEs than those of rot fungi, implying that the PCWDEs analysed in this study are more needed for invading plant hosts than wood-decaying processes. Evolutionary analysis of PGs in 34 selected genomes revealed that gene duplication and loss events were mainly driven by taxonomic divergence and partly contributed by those events in species-level, especially in plant pathogens.

Conclusions

The FPDB would provide a fungi-specialized genomics platform, a resource for evolutionary studies of PCWDE gene families and extended analysis option by implementing Favorite, which is a data exchange and analysis hub built in Comparative Fungal Genomics Platform (CFGP 2.0; http://cfgp.snu.ac.kr/).",2013-10-16 +22434841,Tetrahymena Genome Database Wiki: a community-maintained model organism database.,"When funding for Tetrahymena Genome Database (TGD) ended in 2006, no further updates were made to this important community resource and the main database was taken offline in 2008. We have restored and updated this important resource for use by the Tetrahymena research community. We have also retooled the TGD website (now TGD Wiki) to allow members of the community to directly update the information presented for each gene, including gene names, descriptions and Gene Ontology annotations, from a web browser. Maintenance of genome annotations by the authors generating and publishing primary data, rather than dedicated scientific curators, is a viable alternative for the upkeep of genomes, particularly for organisms with smaller research communities. By combining simple, intuitive displays with the powerful search functions made possible by its underlying relational database, TGD Wiki has been designed to maximize participation by bench scientists in the development of their community bioinformatics resource. DATABASE URL: http://ciliate.org.",2012-03-20 +28057002,Prediction of anti-inflammatory proteins/peptides: an insilico approach.,"

Background

The current therapy for inflammatory and autoimmune disorders involves the use of nonspecific anti-inflammatory drugs and other immunosuppressant, which are often accompanied with potential side effects. As an alternative therapy, anti-inflammatory peptides are recently being exploited as anti-inflammatory agents for treatment of various inflammatory diseases such as Alzheimer's disease and rheumatoid arthritis. Thus, understanding the correlation between amino acid sequence and its potential anti-inflammatory property is of great importance for the discovery of novel and efficient anti-inflammatory peptide-based therapeutics.

Methods

In this study, we have developed a prediction tool for the classification of peptides as anti-inflammatory epitopes or non anti-inflammatory epitopes. The training was performed using experimentally validated epitopes obtained from Immune epitope database and analysis resource database. Different sequence-based features and their hybrids with motif information were employed for development of support vector machine-based machine learning models. Similarly, machine learning models were also constructed using random forest.

Results

The composition and terminal residue conservation analysis of peptides revealed the dominance of leucine, serine, tyrosine and arginine residues in anti-inflammatory epitopes as compared to non anti-inflammatory epitopes. Similarly, the anti-inflammatory epitopes specific motifs were found to be rich in hydrophobic and polar residues. The hybrid of tripeptide composition-based support vector machine model and motif yielded the best performance on 10-fold cross validation with an accuracy of 78.1% and MCC of 0.58. The same displayed an accuracy of 72% and MCC of 0.45 on validation dataset, rejecting any possibility of over-fitting. The tripeptide composition-based random forest model displayed an accuracy of 0.8 and MCC of 0.59 on 10-fold cross validation, however, the accuracy (0.68) and MCC (0.31) was lower as compared to support vector machine model on validation dataset. Thus, the support vector machine model is implemented as the default model and an additional option of using the random forest model is provided.

Conclusion

The prediction models along with tools for epitope mapping and similarity search have been provided as a web server which is freely accessible at http://metagenomics.iiserb.ac.in/antiinflam/ .",2017-01-06 +29036655,MFIB: a repository of protein complexes with mutual folding induced by binding.,"

Motivation

It is commonplace that intrinsically disordered proteins (IDPs) are involved in crucial interactions in the living cell. However, the study of protein complexes formed exclusively by IDPs is hindered by the lack of data and such analyses remain sporadic. Systematic studies benefited other types of protein-protein interactions paving a way from basic science to therapeutics; yet these efforts require reliable datasets that are currently lacking for synergistically folding complexes of IDPs.

Results

Here we present the Mutual Folding Induced by Binding (MFIB) database, the first systematic collection of complexes formed exclusively by IDPs. MFIB contains an order of magnitude more data than any dataset used in corresponding studies and offers a wide coverage of known IDP complexes in terms of flexibility, oligomeric composition and protein function from all domains of life. The included complexes are grouped using a hierarchical classification and are complemented with structural and functional annotations. MFIB is backed by a firm development team and infrastructure, and together with possible future community collaboration it will provide the cornerstone for structural and functional studies of IDP complexes.

Availability and implementation

MFIB is freely accessible at http://mfib.enzim.ttk.mta.hu/. The MFIB application is hosted by Apache web server and was implemented in PHP. To enrich querying features and to enhance backend performance a MySQL database was also created.

Contact

simon.istvan@ttk.mta.hu, meszaros.balint@ttk.mta.hu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +30957840,A feature-based approach to predict hot spots in protein-DNA binding interfaces.,"DNA-binding hot spot residues of proteins are dominant and fundamental interface residues that contribute most of the binding free energy of protein-DNA interfaces. As experimental methods for identifying hot spots are expensive and time consuming, computational approaches are urgently required in predicting hot spots on a large scale. In this work, we systematically assessed a wide variety of 114 features from a combination of the protein sequence, structure, network and solvent accessible information and their combinations along with various feature selection strategies for hot spot prediction. We then trained and compared four commonly used machine learning models, namely, support vector machine (SVM), random forest, Naïve Bayes and k-nearest neighbor, for the identification of hot spots using 10-fold cross-validation and the independent test set. Our results show that (1) features based on the solvent accessible surface area have significant effect on hot spot prediction; (2) different but complementary features generally enhance the prediction performance; and (3) SVM outperforms other machine learning methods on both training and independent test sets. In an effort to improve predictive performance, we developed a feature-based method, namely, PrPDH (Prediction of Protein-DNA binding Hot spots), for the prediction of hot spots in protein-DNA binding interfaces using SVM based on the selected 10 optimal features. Comparative results on benchmark data sets indicate that our predictor is able to achieve generally better performance in predicting hot spots compared to the state-of-the-art predictors. A user-friendly web server for PrPDH is well established and is freely available at http://bioinfo.ahu.edu.cn:8080/PrPDH.",2020-05-01 +31756362,Multi-model inference of non-random mating from an information theoretic approach.,"Non-random mating has a significant impact on the evolution of organisms. Here, I developed a modelling framework for discrete traits (with any number of phenotypes) to explore different models connecting the non-random mating causes (mate competition and/or mate choice) and their consequences (sexual selection and/or assortative mating). I derived the formulaefor the maximum likelihood estimates of each model and used information criteria to perform multi-model inference. Simulation results showed a good performance of both model selection and parameter estimation. The methodology was applied to ecotypes data of the marine gastropod Littorina saxatilis from Galicia (Spain), to show that the mating pattern is better described by models with two parameters that involve both mate choice and competition, generating positive assortative mating plus female sexual selection. As far as I know, this is the first standardized methodology for model selection and multi-model inference of mating parameters for discrete traits. The advantages of this framework include the ability of setting up models from which the parameters connect causes, as mate competition and mate choice, with their outcome in the form of data patterns of sexual selection and assortative mating. For some models, the parameters may have a double effect i.e. they produce sexual selection and assortative mating, while for others there are separated parameters for one kind of pattern or another. From an empirical point of view, it is much easier to study patterns than processes and, for this reason, the causal mechanisms of sexual selection are not so well known as the patterns they produce. The goal of the present work is to propose a new tool that helps to distinguish among different alternative processes behind the observed mating pattern. The full methodology was implemented in a software called InfoMating (available at http://acraaj.webs6.uvigo.es/InfoMating/Infomating.htm).",2019-11-19 +31713585,ProAffiMuSeq: sequence-based method to predict the binding free energy change of protein-protein complexes upon mutation using functional classification.,"

Motivation

Protein-protein interactions are essential for the cell and mediate various functions. However, mutations can disrupt these interactions and may cause diseases. Currently available computational methods require a complex structure as input for predicting the change in binding affinity. Further, they have not included the functional class information for the protein-protein complex. To address this, we have developed a method, ProAffiMuSeq, which predicts the change in binding free energy using sequence-based features and functional class.

Results

Our method shows an average correlation between predicted and experimentally determined ΔΔG of 0.73 and mean absolute error (MAE) of 0.86 kcal/mol in 10-fold cross-validation and correlation of 0.75 with MAE of 0.94 kcal/mol in the test dataset. ProAffiMuSeq was also tested on an external validation set and showed results comparable to structure-based methods. Our method can be used for large-scale analysis of disease-causing mutations in protein-protein complexes without structural information.

Availability and implementation

Users can access the method at https://web.iitm.ac.in/bioinfo2/proaffimuseq/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +32930131,Neo-adjuvant chemotherapy followed by either continuous hyper-fractionated accelerated radiation therapy week-end less or conventional chemo-radiotherapy in locally advanced NSCLC-A randomised prospective single institute study.,"

Context

Better locoregional control and increased overall survival by continuous hyper fractionated accelerated radiotherapy have been shown in unresectable nonsmall cell lung carcinoma (NSCLC). Dose escalation and neoadjuvant chemotherapy (NACT) along with continuous hyperfractionated accelerated radiotherapy week end-less (CHARTWEL) were also tried for improved survival. In this present study, we compared the results of NACT followed by CHARTWEL against NACT followed by conventional concurrent chemo-radiation therapy.

Aims

The aim of this study is to compare the locoregional control and toxicities in NSCLC Stage IIIA and B in both arms.

Settings and design

Randomized, prospective single-institutional study with a study population comprising all locally advanced unresectable NSCLC patients enrolled in 2014 at our institute.

Subjects and methods

All enrolled patients were randomized into two arms-CHARTWEL and concomitant chemo-radiotherapy (CCRT), after three weeks of the fourth cycle of NACT. In CHARTWEL arm 30 patients received two-dimensional radiotherapy (RT) 58.5 Gy/39 fr/2.5 weeks while in CCRT arm 30 received 66 Gy/33 fr/6.5 weeks. Disease response was evaluated at 6 months and toxicity assessment during and after treatment completion. Data were analyzed using tools such as percentage, mean, Chi-square test and P value. Chi-square and P value was calculated by statistical online software (http://quantpsy.org).

Results

28% of patients in study arm and 20% in control arm had complete response at 6 months after RT. Locoregional disease control was observed in 44% in study arm and 32% in control arm of patients. There was no statistical difference in grades of toxicities or overall survival (OS)/disease-free survival except persistent esophagitis Grade III seen in two patients of study arm.

Conclusions

Study suggests that CHARTWEL in combination with NACT is an effective strategy to treat patients with locally advanced lung cancer with the advantage of a smaller dose and shorter duration. Although large multivariate studies still needed.",2020-07-01 +26322134,MINEs: open access databases of computationally predicted enzyme promiscuity products for untargeted metabolomics.,"BACKGROUND:In spite of its great promise, metabolomics has proven difficult to execute in an untargeted and generalizable manner. Liquid chromatography-mass spectrometry (LC-MS) has made it possible to gather data on thousands of cellular metabolites. However, matching metabolites to their spectral features continues to be a bottleneck, meaning that much of the collected information remains uninterpreted and that new metabolites are seldom discovered in untargeted studies. These challenges require new approaches that consider compounds beyond those available in curated biochemistry databases. DESCRIPTION:Here we present Metabolic In silico Network Expansions (MINEs), an extension of known metabolite databases to include molecules that have not been observed, but are likely to occur based on known metabolites and common biochemical reactions. We utilize an algorithm called the Biochemical Network Integrated Computational Explorer (BNICE) and expert-curated reaction rules based on the Enzyme Commission classification system to propose the novel chemical structures and reactions that comprise MINE databases. Starting from the Kyoto Encyclopedia of Genes and Genomes (KEGG) COMPOUND database, the MINE contains over 571,000 compounds, of which 93% are not present in the PubChem database. However, these MINE compounds have on average higher structural similarity to natural products than compounds from KEGG or PubChem. MINE databases were able to propose annotations for 98.6% of a set of 667 MassBank spectra, 14% more than KEGG alone and equivalent to PubChem while returning far fewer candidates per spectra than PubChem (46 vs. 1715 median candidates). Application of MINEs to LC-MS accurate mass data enabled the identity of an unknown peak to be confidently predicted. CONCLUSIONS:MINE databases are freely accessible for non-commercial use via user-friendly web-tools at http://minedatabase.mcs.anl.gov and developer-friendly APIs. MINEs improve metabolomics peak identification as compared to general chemical databases whose results include irrelevant synthetic compounds. Furthermore, MINEs complement and expand on previous in silico generated compound databases that focus on human metabolism. We are actively developing the database; future versions of this resource will incorporate transformation rules for spontaneous chemical reactions and more advanced filtering and prioritization of candidate structures. Graphical abstractMINE database construction and access methods. The process of constructing a MINE database from the curated source databases is depicted on the left. The methods for accessing the database are shown on the right.",2015-08-28 +31278630,Archiving and disseminating integrative structure models.,"Limitations in the applicability, accuracy, and precision of individual structure characterization methods can sometimes be overcome via an integrative modeling approach that relies on information from all available sources, including all available experimental data and prior models. The open-source Integrative Modeling Platform (IMP) is one piece of software that implements all computational aspects of integrative modeling. To maximize the impact of integrative structures, the coordinates should be made publicly available, as is already the case for structures based on X-ray crystallography, NMR spectroscopy, and electron microscopy. Moreover, the associated experimental data and modeling protocols should also be archived, such that the original results can easily be reproduced. Finally, it is essential that the integrative structures are validated as part of their publication and deposition. A number of research groups have already developed software to implement integrative modeling and have generated a number of structures, prompting the formation of an Integrative/Hybrid Methods Task Force. Following the recommendations of this task force, the existing PDBx/mmCIF data representation used for atomic PDB structures has been extended to address the requirements for archiving integrative structural models. This IHM-dictionary adds a flexible model representation, including coarse graining, models in multiple states and/or related by time or other order, and multiple input experimental information sources. A prototype archiving system called PDB-Dev ( https://pdb-dev.wwpdb.org ) has also been created to archive integrative structural models, together with a Python library to facilitate handling of integrative models in PDBx/mmCIF format.",2019-07-05 +32029736,Sexual-dimorphism in human immune system aging.,"Differences in immune function and responses contribute to health- and life-span disparities between sexes. However, the role of sex in immune system aging is not well understood. Here, we characterize peripheral blood mononuclear cells from 172 healthy adults 22-93 years of age using ATAC-seq, RNA-seq, and flow cytometry. These data reveal a shared epigenomic signature of aging including declining naïve T cell and increasing monocyte and cytotoxic cell functions. These changes are greater in magnitude in men and accompanied by a male-specific decline in B-cell specific loci. Age-related epigenomic changes first spike around late-thirties with similar timing and magnitude between sexes, whereas the second spike is earlier and stronger in men. Unexpectedly, genomic differences between sexes increase after age 65, with men having higher innate and pro-inflammatory activity and lower adaptive activity. Impact of age and sex on immune phenotypes can be visualized at https://immune-aging.jax.org to provide insights into future studies.",2020-02-06 +28499913,Integration of gel-based and gel-free proteomic data for functional analysis of proteins through Soybean Proteome Database.,"The Soybean Proteome Database (SPD) stores data on soybean proteins obtained with gel-based and gel-free proteomic techniques. The database was constructed to provide information on proteins for functional analyses. The majority of the data is focused on soybean (Glycine max 'Enrei'). The growth and yield of soybean are strongly affected by environmental stresses such as flooding. The database was originally constructed using data on soybean proteins separated by two-dimensional polyacrylamide gel electrophoresis, which is a gel-based proteomic technique. Since 2015, the database has been expanded to incorporate data obtained by label-free mass spectrometry-based quantitative proteomics, which is a gel-free proteomic technique. Here, the portions of the database consisting of gel-free proteomic data are described. The gel-free proteomic database contains 39,212 proteins identified in 63 sample sets, such as temporal and organ-specific samples of soybean plants grown under flooding stress or non-stressed conditions. In addition, data on organellar proteins identified in mitochondria, nuclei, and endoplasmic reticulum are stored. Furthermore, the database integrates multiple omics data such as genomics, transcriptomics, metabolomics, and proteomics. The SPD database is accessible at http://proteome.dc.affrc.go.jp/Soybean/.

Biological significance

The Soybean Proteome Database stores data obtained from both gel-based and gel-free proteomic techniques. The gel-free proteomic database comprises 39,212 proteins identified in 63 sample sets, such as different organs of soybean plants grown under flooding stress or non-stressed conditions in a time-dependent manner. In addition, organellar proteins identified in mitochondria, nuclei, and endoplasmic reticulum are stored in the gel-free proteomics database. A total of 44,704 proteins, including 5490 proteins identified using a gel-based proteomic technique, are stored in the SPD. It accounts for approximately 80% of all predicted proteins from genome sequences, though there are over lapped proteins. Based on the demonstrated application of data stored in the database for functional analyses, it is suggested that these data will be useful for analyses of biological mechanisms in soybean. Furthermore, coupled with recent advances in information and communication technology, the usefulness of this database would increase in the analyses of biological mechanisms.",2017-05-10 +31815854,Worldwide and European interest in the MitraClip: a Google Trends-based analysis.,"

Aims

The use of MitraClip (Abbott Vascular, Santa Clara, California, USA) for the treatment of severe functional mitral valve regurgitation (FMR) has drastically increased over recent years. We analysed the Worldwide and European interest on the MitraClip among Internet users performing a Google Trends-based analysis.

Methods

We conducted a retrospective analysis extracting the data from the cited Google Trends (https://trends.google.com) to explore both the worldwide and European interest in the MitraClip over recent years. Specifically, Google Trends was queried from 1 January 2008 to 31 September 2019 using the item 'MitraClip'.

Results

Over the study period, the worldwide interest in the MitraClip steadily increased by 47.7% per month (Ptrend < 0.001). The analysis performed among the 11 European countries in which the search volume allowed a trend to be created over the study period confirming an increasing significant interest. Specifically, the higher rate of interest increase per month was registered in France followed by Poland, Spain and Italy (40.5, 35.9, 37.3 and 31.8%, respectively).

Conclusion

A growing interest in the MitraClip exists both in Europe and worldwide.",2020-03-01 +26639025,Deciphering Genomic Underpinnings of Quantitative MRI-based Radiomic Phenotypes of Invasive Breast Carcinoma.,"Magnetic Resonance Imaging (MRI) has been routinely used for the diagnosis and treatment of breast cancer. However, the relationship between the MRI tumor phenotypes and the underlying genetic mechanisms remains under-explored. We integrated multi-omics molecular data from The Cancer Genome Atlas (TCGA) with MRI data from The Cancer Imaging Archive (TCIA) for 91 breast invasive carcinomas. Quantitative MRI phenotypes of tumors (such as tumor size, shape, margin, and blood flow kinetics) were associated with their corresponding molecular profiles (including DNA mutation, miRNA expression, protein expression, pathway gene expression and copy number variation). We found that transcriptional activities of various genetic pathways were positively associated with tumor size, blurred tumor margin, and irregular tumor shape and that miRNA expressions were associated with the tumor size and enhancement texture, but not with other types of radiomic phenotypes. We provide all the association findings as a resource for the research community (available at http://compgenome.org/Radiogenomics/). These findings pave potential paths for the discovery of genetic mechanisms regulating specific tumor phenotypes and for improving MRI techniques as potential non-invasive approaches to probe the cancer molecular status.",2015-12-07 +30445448,The impact of the General Data Protection Regulation on health research.,"

Background

On the May 25, 2018 the General Data Protection Regulation (hereafter the GDPR or the Regulation) came into force, replacing the Data Protection Directive 95/46/EC (upon which the Data Protection Act 1998 is based), and imposing new responsibilities on organizations which process the data of European Union citizens.

Sources of data

This piece examines the impact of the Regulation on health research.

Areas of agreement

The Regulation seeks to harmonize data privacy laws across Europe, to protect and empower all EU citizen's data privacy and to reshape the way that organizations approach data privacy (See the GDPR portal at: https://www.eugdpr.org/ (accessed 8 May 2018). As a Regulation the GDPR is directly applicable in all member states as opposed to a directive which requires national implementing measures (In the UK the Data Protection Act 1998 was the implementing legislation for the Data Protection Directive 95/46/EC.).

Areas of controversy

The Regulation is sector wide, but its impact on organizations us sector specific. In some sectors, the Regulation inhibits the processing of personal data, whilst in others it enables that processing. The Regulation takes the position that the 'processing of data should be designed to serve mankind' (Recital 4). Whilst it does not spell out what exactly is meant by this, it indicates that a proportionate approach will be taken to the protection of personal data, where that data can be processed for common goods such as healthcare. Thus, the protection of personal data is not absolute, but considered in relation to its function in society and balance with other fundamental rights in accordance with the principle of proportionality (Recital 4). Differing interpretations of proportionality can detract from the harmonization objective of the Regulation.

Growing points

Reflecting the commitment to proportionality, scientific research holds a privileged position in the Regulation. Throughout the Regulation provision is made for organizations that process personal data for scientific research purposes to avoid restrictive measures which might impede the increase of knowledge. However, the application of the Regulation differs across health research sectors and across jurisdictions. Transparency and engagement across the health research sector is required to promote alignment.

Areas timely for developing research

Research which focuses on the particular problems which arise in the context of the regulation's application to health research would be welcome. Particularly in the context of the operation of the Regulation alongside the duty of confidentiality and the variation in approaches across Member States.",2018-12-01 +32166484,Modeling and insights into molecular basis of low molecular weight respiratory sensitizers.,"Respiratory sensitization has been considered an important toxicological endpoint, because of the severe risk to human health. A great part of sensitization events were caused by low molecular weight (< 1000) respiratory sensitizers in the past decades. However, there is currently no widely accepted test method that can identify prospective low molecular weight respiratory sensitisers. Herein, we performed the study of modeling and insights into molecular basis of low molecular weight respiratory sensitizers with a high-quality data set containing 136 respiratory sensitizers and 518 nonsensitizers. We built a number of classification models by using OCHEM tools, and a consensus model was developed based on the ten best individual models. The consensus model showed good predictive ability with a balanced accuracy of 0.78 and 0.85 on fivefold cross-validation and external validation, respectively. The readers can predict the respiratory sensitization of organic compounds via https://ochem.eu/article/114857 . The effect of several molecular properties on respiratory sensitization was also evaluated. The results indicated that these properties differ significantly between respiratory sensitizers and nonsensitizers. Furthermore, 14 privileged substructures responsible for respiratory sensitization were identified. We hope the models and the findings could provide useful help for environmental risk assessment.",2020-03-12 +24174537,The Database of Genomic Variants: a curated collection of structural variation in the human genome.,"Over the past decade, the Database of Genomic Variants (DGV; http://dgv.tcag.ca/) has provided a publicly accessible, comprehensive curated catalogue of structural variation (SV) found in the genomes of control individuals from worldwide populations. Here, we describe updates and new features, which have expanded the utility of DGV for both the basic research and clinical diagnostic communities. The current version of DGV consists of 55 published studies, comprising >2.5 million entries identified in >22,300 genomes. Studies included in DGV are selected from the accessioned data sets in the archival SV databases dbVar (NCBI) and DGVa (EBI), and then further curated for accuracy and validity. The core visualization tool (gbrowse) has been upgraded with additional functions to facilitate data analysis and comparison, and a new query tool has been developed to provide flexible and interactive access to the data. The content from DGV is regularly incorporated into other large-scale genome reference databases and represents a standard data resource for new product and database development, in particular for copy number variation testing in clinical labs. The accurate cataloguing of variants in DGV will continue to enable medical genetics and genome sequencing research.",2013-10-29 +31681030,A Novel Approach to Assess Sleep-Related Rhythmic Movement Disorder in Children Using Automatic 3D Analysis.,"Background: Unlike other episodic sleep disorders in childhood, there are no agreed severity indices for rhythmic movement disorder. While movements can be characterized in detail by polysomnography, in our experience most children inhibit rhythmic movement during polysomnography. Actigraphy and home video allow assessment in the child's own environment, but both have limitations. Standard actigraphy analysis algorithms fail to differentiate rhythmic movements from other movements. Manual annotation of 2D video is time consuming. We aimed to develop a sensitive, reliable method to detect and quantify rhythmic movements using marker free and automatic 3D video analysis. Method: Patients with rhythmic movement disorder (n = 6, 4 male) between age 5 and 14 years (M: 9.0 years, SD: 4.2 years) spent three nights in the sleep laboratory as part of a feasibility study (https://clinicaltrials.gov/ct2/show/NCT03528096). 2D and 3D video data recorded during the adaptation and baseline nights were analyzed. One ceiling-mounted camera captured 3D depth images, while another recorded 2D video. We developed algorithms to analyze the characteristics of rhythmic movements and built a classifier to distinguish between rhythmic and non-rhythmic movements based on 3D video data alone. Data from 3D automated analysis were compared to manual 2D video annotations to assess algorithm performance. Novel indices were developed, specifically the rhythmic movement index, frequency index, and duration index, to better characterize severity of rhythmic movement disorder in children. Result: Automatic 3D video analysis demonstrated high levels of agreement with the manual approach indicated by a Cohen's kappa >0.9 and F1-score >0.9. We also demonstrated how rhythmic movement assessment can be improved using newly introduced indices illustrated with plots for ease of visualization. Conclusion: 3D video technology is widely available and can be readily integrated into sleep laboratory settings. Our automatic 3D video analysis algorithm yields reliable quantitative information about rhythmic movements, reducing the burden of manual scoring. Furthermore, we propose novel rhythmic movement disorder severity indices that offer a means to standardize measurement of this disorder in both clinical and research practice. The significance of the results is limited due to the nature of a feasibility study and its small number of samples. A larger follow up study is needed to confirm presented results.",2019-10-16 +31664080,dendPoint: a web resource for dendrimer pharmacokinetics investigation and prediction.,"Nanomedicine development currently suffers from a lack of efficient tools to predict pharmacokinetic behavior without relying upon testing in large numbers of animals, impacting success rates and development costs. This work presents dendPoint, the first in silico model to predict the intravenous pharmacokinetics of dendrimers, a commonly explored drug vector, based on physicochemical properties. We have manually curated the largest relational database of dendrimer pharmacokinetic parameters and their structural/physicochemical properties. This was used to develop a machine learning-based model capable of accurately predicting pharmacokinetic parameters, including half-life, clearance, volume of distribution and dose recovered in the liver and urine. dendPoint successfully predicts dendrimer pharmacokinetic properties, achieving correlations of up to r = 0.83 and Q2 up to 0.68. dendPoint is freely available as a user-friendly web-service and database at http://biosig.unimelb.edu.au/dendpoint . This platform is ultimately expected to be used to guide dendrimer construct design and refinement prior to embarking on more time consuming and expensive in vivo testing.",2019-10-29 +30726865,Structured crowdsourcing enables convolutional segmentation of histology images.,"

Motivation

While deep-learning algorithms have demonstrated outstanding performance in semantic image segmentation tasks, large annotation datasets are needed to create accurate models. Annotation of histology images is challenging due to the effort and experience required to carefully delineate tissue structures, and difficulties related to sharing and markup of whole-slide images.

Results

We recruited 25 participants, ranging in experience from senior pathologists to medical students, to delineate tissue regions in 151 breast cancer slides using the Digital Slide Archive. Inter-participant discordance was systematically evaluated, revealing low discordance for tumor and stroma, and higher discordance for more subjectively defined or rare tissue classes. Feedback provided by senior participants enabled the generation and curation of 20 000+ annotated tissue regions. Fully convolutional networks trained using these annotations were highly accurate (mean AUC=0.945), and the scale of annotation data provided notable improvements in image classification accuracy.

Availability and implementation

Dataset is freely available at: https://goo.gl/cNM4EL.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +30228802,Bioprospecting for Genes Encoding Hydrocarbon-Degrading Enzymes from Metagenomic Samples Isolated from 
Northern Adriatic Sea Sediments.,"Three metagenomic libraries were constructed using surface sediment samples from the northern Adriatic Sea. Two of the samples were taken from a highly polluted and an unpolluted site respectively. The third sample from a polluted site had been enriched using crude oil. The results of the metagenome analyses were incorporated in the REDPET relational database (http://redpet.bioinfo.pbf.hr/REDPET), which was generated using the previously developed MEGGASENSE platform. The database includes taxonomic data to allow the assessment of the biodiversity of metagenomic libraries and a general functional analysis of genes using hidden Markov model (HMM) profiles based on the KEGG database. A set of 22 specialised HMM profiles was developed to detect putative genes for hydrocarbon-degrading enzymes. Use of these profiles showed that the metagenomic library generated after selection on crude oil had enriched genes for aerobic n-alkane degradation. The use of this system for bioprospecting was exemplified using potential alkB and almA genes from this library.",2018-06-01 +30124794,Genome Detective: an automated system for virus identification from high-throughput sequencing data.,SUMMARY:Genome Detective is an easy to use web-based software application that assembles the genomes of viruses quickly and accurately. The application uses a novel alignment method that constructs genomes by reference-based linking of de novo contigs by combining amino-acids and nucleotide scores. The software was optimized using synthetic datasets to represent the great diversity of virus genomes. The application was then validated with next generation sequencing data of hundreds of viruses. User time is minimal and it is limited to the time required to upload the data. AVAILABILITY AND IMPLEMENTATION:Available online: http://www.genomedetective.com/app/typingtool/virus/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.,2019-03-01 +25635527,Autophagy Regulatory Network - a systems-level bioinformatics resource for studying the mechanism and regulation of autophagy.,"Autophagy is a complex cellular process having multiple roles, depending on tissue, physiological, or pathological conditions. Major post-translational regulators of autophagy are well known, however, they have not yet been collected comprehensively. The precise and context-dependent regulation of autophagy necessitates additional regulators, including transcriptional and post-transcriptional components that are listed in various datasets. Prompted by the lack of systems-level autophagy-related information, we manually collected the literature and integrated external resources to gain a high coverage autophagy database. We developed an online resource, Autophagy Regulatory Network (ARN; http://autophagy-regulation.org), to provide an integrated and systems-level database for autophagy research. ARN contains manually curated, imported, and predicted interactions of autophagy components (1,485 proteins with 4,013 interactions) in humans. We listed 413 transcription factors and 386 miRNAs that could regulate autophagy components or their protein regulators. We also connected the above-mentioned autophagy components and regulators with signaling pathways from the SignaLink 2 resource. The user-friendly website of ARN allows researchers without computational background to search, browse, and download the database. The database can be downloaded in SQL, CSV, BioPAX, SBML, PSI-MI, and in a Cytoscape CYS file formats. ARN has the potential to facilitate the experimental validation of novel autophagy components and regulators. In addition, ARN helps the investigation of transcription factors, miRNAs and signaling pathways implicated in the control of the autophagic pathway. The list of such known and predicted regulators could be important in pharmacological attempts against cancer and neurodegenerative diseases.",2015-01-01 +32348157,Media Use by Older Adults With Hearing Loss: An Exploratory Survey.,"Objectives There has been a substantial increase in people with health conditions seeking health-related information online. The aim of this study was to examine the media usage by older adults with hearing loss. Method The study used a cross-sectional survey design. A total of 556 older adults with hearing loss (Hearing Tracker website users) completed the survey that was focused on (a) demographic information, (b) general electronic media usage, (c) sources of hearing health information, and (d) social media use for hearing health information. Data were analyzed using descriptive statistics and chi-square tests. Results When seeking hearing health care information, the majority of the participants turned to the Internet (54%) followed by health professionals (34%) as the first response to their symptoms. Both sources were also rated as the easiest means of obtaining hearing health information. The information from health care providers was rated as more reliable and important for decision making than that from the Internet. Facebook and YouTube were the most frequently used social media platforms with over 40% of the respondents using them ""most of the time"" or ""sometimes."" All the social media platforms were rated less favorably than other sources for ease of finding information, reliability, and importance in decision making. Conclusion Older adults with hearing loss use various forms of electronic media for seeking hearing health information. They place the most trust on the information obtained from hearing health care professionals. These professionals need to be aware of the quality of information available on the Internet and social media sources in order to direct patients to credible sources. Supplemental Material https://doi.org/10.23641/asha.12170397.",2020-04-29 +28376796,AHCODA-DB: a data repository with web-based mining tools for the analysis of automated high-content mouse phenomics data.,"

Background

Systematic, standardized and in-depth phenotyping and data analyses of rodent behaviour empowers gene-function studies, drug testing and therapy design. However, no data repositories are currently available for standardized quality control, data analysis and mining at the resolution of individual mice.

Description

Here, we present AHCODA-DB, a public data repository with standardized quality control and exclusion criteria aimed to enhance robustness of data, enabled with web-based mining tools for the analysis of individually and group-wise collected mouse phenotypic data. AHCODA-DB allows monitoring in vivo effects of compounds collected from conventional behavioural tests and from automated home-cage experiments assessing spontaneous behaviour, anxiety and cognition without human interference. AHCODA-DB includes such data from mutant mice (transgenics, knock-out, knock-in), (recombinant) inbred strains, and compound effects in wildtype mice and disease models. AHCODA-DB provides real time statistical analyses with single mouse resolution and versatile suite of data presentation tools. On March 9th, 2017 AHCODA-DB contained 650 k data points on 2419 parameters from 1563 mice.

Conclusion

AHCODA-DB provides users with tools to systematically explore mouse behavioural data, both with positive and negative outcome, published and unpublished, across time and experiments with single mouse resolution. The standardized (automated) experimental settings and the large current dataset (1563 mice) in AHCODA-DB provide a unique framework for the interpretation of behavioural data and drug effects. The use of common ontologies allows data export to other databases such as the Mouse Phenome Database. Unbiased presentation of positive and negative data obtained under the highly standardized screening conditions increase cost efficiency of publicly funded mouse screening projects and help to reach consensus conclusions on drug responses and mouse behavioural phenotypes. The website is publicly accessible through https://public.sylics.com and can be viewed in every recent version of all commonly used browsers.",2017-04-04 +31967544,"Human Neocortical Neurosolver (HNN), a new software tool for interpreting the cellular and network origin of human MEG/EEG data. ","Magneto- and electro-encephalography (MEG/EEG) non-invasively record human brain activity with millisecond resolution providing reliable markers of healthy and disease states. Relating these macroscopic signals to underlying cellular- and circuit-level generators is a limitation that constrains using MEG/EEG to reveal novel principles of information processing or to translate findings into new therapies for neuropathology. To address this problem, we built Human Neocortical Neurosolver (HNN, https://hnn.brown.edu) software. HNN has a graphical user interface designed to help researchers and clinicians interpret the neural origins of MEG/EEG. HNN's core is a neocortical circuit model that accounts for biophysical origins of electrical currents generating MEG/EEG. Data can be directly compared to simulated signals and parameters easily manipulated to develop/test hypotheses on a signal's origin. Tutorials teach users to simulate commonly measured signals, including event related potentials and brain rhythms. HNN's ability to associate signals across scales makes it a unique tool for translational neuroscience research.",2020-01-22 +24304896,CellFinder: a cell data repository.,"CellFinder (http://www.cellfinder.org) is a comprehensive one-stop resource for molecular data characterizing mammalian cells in different tissues and in different development stages. It is built from carefully selected data sets stemming from other curated databases and the biomedical literature. To date, CellFinder describes 3394 cell types and 50 951 cell lines. The database currently contains 3055 microscopic and anatomical images, 205 whole-genome expression profiles of 194 cell/tissue types from RNA-seq and microarrays and 553 905 protein expressions for 535 cells/tissues. Text mining of a corpus of >2000 publications followed by manual curation confirmed expression information on ∼900 proteins and genes. CellFinder's data model is capable to seamlessly represent entities from single cells to the organ level, to incorporate mappings between homologous entities in different species and to describe processes of cell development and differentiation. Its ontological backbone currently consists of 204 741 ontology terms incorporated from 10 different ontologies unified under the novel CELDA ontology. CellFinder's web portal allows searching, browsing and comparing the stored data, interactive construction of developmental trees and navigating the partonomic hierarchy of cells and tissues through a unique body browser designed for life scientists and clinicians.",2013-12-03 +24077841,BBGRE: brain and body genetic resource exchange.,"Studies of copy number variation (genomic imbalance) are providing insight into both complex and Mendelian genetic disorders. Array comparative genomic hybridization (array CGH), a tool for detecting copy number variants at a resolution previously unattainable in clinical diagnostics, is increasingly used as a first-line test at clinical genetics laboratories. Many copy number variants are of unknown significance; correlation and comparison with other patients will therefore be essential for interpretation. We present a resource for clinicians and researchers to identify specific copy number variants and associated phenotypes in patients from a single catchment area, tested using array CGH at the SE Thames Regional Genetics Centre, London. User-friendly searching is available, with links to external resources, providing a powerful tool for the elucidation of gene function. We hope to promote research by facilitating interactions between researchers and patients. The BBGRE (Brain and Body Genetic Resource Exchange) resource can be accessed at the following website: http://bbgre.org DATABASE URL: http://bbgre.org.",2013-09-27 +32343923,Does the Inclusion of a Genome-Wide Polygenic Score Improve Early Risk Prediction for Later Language and Literacy Delay?,"Purpose The ability to identify children early in development who are at substantial risk for language/literacy difficulties would have great benefit both for the children and for the educational and therapeutic institutions that serve them. Information that is relatively easily available prior to the age of 3 years, such as late talking, family history of language/literacy difficulties, and socioeconomic status, have some but very limited predictive power. Here, we examine whether the inclusion of a DNA-based genome-wide polygenic score that has been shown to capture children's genetic propensity for educational attainment (EA3) adds enough prediction to yield a clinically useful score. Method Data are longitudinal scores of 1,420 children from the Twins Early Development Study, who were assessed at ages 2 and 3 years on language and nonverbal ability and at 12 years of age on oral language, word decoding, and reading comprehension. Five risk factors were examined: expressive vocabulary, nonverbal ability (these two from parent report), family history, mothers' education, and EA3. Analyses were conducted both for continuous and categorically defined measures of risk and outcome. Results Language and literacy abilities at 12 years of age were significantly but modestly predicted by the risk factors, with a small but significant added prediction from EA3. Indices of diagnostic validity for poor outcomes, such as sensitivity and area under the curve statistics, were poor in all cases. Conclusions We conclude that, at present, clinically useful prediction from toddlerhood remains an unattained goal. Supplemental Material https://doi.org/10.23641/asha.12170331.",2020-04-28 +32414319,CSN: unsupervised approach for inferring biological networks based on the genome alone.,"BACKGROUND:Most organisms cannot be cultivated, as they live in unique ecological conditions that cannot be mimicked in the lab. Understanding the functionality of those organisms' genes and their interactions by performing large-scale measurements of transcription levels, protein-protein interactions or metabolism, is extremely difficult and, in some cases, impossible. Thus, efficient algorithms for deciphering genome functionality based only on the genomic sequences with no other experimental measurements are needed. RESULTS:In this study, we describe a novel algorithm that infers gene networks that we name Common Substring Network (CSN). The algorithm enables inferring novel regulatory relations among genes based only on the genomic sequence of a given organism and partial homolog/ortholog-based functional annotation. It can specifically infer the functional annotation of genes with unknown homology. This approach is based on the assumption that related genes, not necessarily homologs, tend to share sub-sequences, which may be related to common regulatory mechanisms, similar functionality of encoded proteins, common evolutionary history, and more. We demonstrate that CSNs, which are based on S. cerevisiae and E. coli genomes, have properties similar to 'traditional' biological networks inferred from experiments. Highly expressed genes tend to have higher degree nodes in the CSN, genes with similar protein functionality tend to be closer, and the CSN graph exhibits a power-law degree distribution. Also, we show how the CSN can be used for predicting gene interactions and functions. CONCLUSIONS:The reported results suggest that 'silent' code inside the transcript can help to predict central features of biological networks and gene function. This approach can help researchers to understand the genome of novel microorganisms, analyze metagenomic data, and can help to decipher new gene functions. AVAILABILITY:Our MATLAB implementation of CSN is available at https://www.cs.tau.ac.il/~tamirtul/CSN-Autogen.",2020-05-15 +29121237,Human Ageing Genomic Resources: new and updated databases.,"In spite of a growing body of research and data, human ageing remains a poorly understood process. Over 10 years ago we developed the Human Ageing Genomic Resources (HAGR), a collection of databases and tools for studying the biology and genetics of ageing. Here, we present HAGR's main functionalities, highlighting new additions and improvements. HAGR consists of six core databases: (i) the GenAge database of ageing-related genes, in turn composed of a dataset of >300 human ageing-related genes and a dataset with >2000 genes associated with ageing or longevity in model organisms; (ii) the AnAge database of animal ageing and longevity, featuring >4000 species; (iii) the GenDR database with >200 genes associated with the life-extending effects of dietary restriction; (iv) the LongevityMap database of human genetic association studies of longevity with >500 entries; (v) the DrugAge database with >400 ageing or longevity-associated drugs or compounds; (vi) the CellAge database with >200 genes associated with cell senescence. All our databases are manually curated by experts and regularly updated to ensure a high quality data. Cross-links across our databases and to external resources help researchers locate and integrate relevant information. HAGR is freely available online (http://genomics.senescence.info/).",2018-01-01 +31630011,REFUGE Challenge: A unified framework for evaluating automated methods for glaucoma assessment from fundus photographs.,"Glaucoma is one of the leading causes of irreversible but preventable blindness in working age populations. Color fundus photography (CFP) is the most cost-effective imaging modality to screen for retinal disorders. However, its application to glaucoma has been limited to the computation of a few related biomarkers such as the vertical cup-to-disc ratio. Deep learning approaches, although widely applied for medical image analysis, have not been extensively used for glaucoma assessment due to the limited size of the available data sets. Furthermore, the lack of a standardize benchmark strategy makes difficult to compare existing methods in a uniform way. In order to overcome these issues we set up the Retinal Fundus Glaucoma Challenge, REFUGE (https://refuge.grand-challenge.org), held in conjunction with MICCAI 2018. The challenge consisted of two primary tasks, namely optic disc/cup segmentation and glaucoma classification. As part of REFUGE, we have publicly released a data set of 1200 fundus images with ground truth segmentations and clinical glaucoma labels, currently the largest existing one. We have also built an evaluation framework to ease and ensure fairness in the comparison of different models, encouraging the development of novel techniques in the field. 12 teams qualified and participated in the online challenge. This paper summarizes their methods and analyzes their corresponding results. In particular, we observed that two of the top-ranked teams outperformed two human experts in the glaucoma classification task. Furthermore, the segmentation results were in general consistent with the ground truth annotations, with complementary outcomes that can be further exploited by ensembling the results.",2019-10-08 +29036653,miRCarta: a central repository for collecting miRNA candidates.,"The continuous increase of available biological data as consequence of modern high-throughput technologies poses new challenges for analysis techniques and database applications. Especially for miRNAs, one class of small non-coding RNAs, many algorithms have been developed to predict new candidates from next-generation sequencing data. While the amount of publications describing novel miRNA candidates keeps steadily increasing, the current gold standard database for miRNAs - miRBase - has not been updated since June 2014. As a result, publications describing new miRNA candidates in the last three to five years might have a substantial overlap of candidates without noticing. With miRCarta we implemented a database to collect novel miRNA candidates and augment the information provided by miRBase. In the first stage, miRCarta is thought to be a highly sensitive collection of potential miRNA candidates with a high degree of analysis functionality, annotations and details on each miRNA. We added-besides the full content of the miRBase-12,857 human miRNA precursors to miRCarta. Users can match their own predictions to the entries of miRCarta to reduce potential redundancies in their studies. miRCarta provides the most comprehensive collection of human miRNAs and miRNA candidates to form a basis for further refinement and validation studies. The database is freely accessible at https://mircarta.cs.uni-saarland.de/.",2018-01-01 +31031942,Crossing the Pillars of Hercules: Understanding transoceanic migrations of seabirds throughout their breeding range.,"Variability in long-distance migration strategies is still poorly understood due to the fact that individuals are often tracked from a single colony/population. Transoceanic migrations of Scopoli's shearwaters (Calonectris diomedea) across the Strait of Gibraltar (SoG) have been tracked from several breeding colonies isolatedly, and factors related to the variability in phenological schedules among different populations remain, therefore, not well-understood. Using light-level geolocator data, I examined the autumn (postbreeding) and spring (prebreeding) migratory passage dates through SoG of four populations of Scopoli's shearwater spread along the longitudinal breeding range of the species. Additionally, I also estimated the at-sea activity patterns (from immersion data) during both migratory passages, as well as the body size (from morphometric data) of the individuals of these populations. On average, Scopoli's shearwaters leave the Mediterranean (cross SoG) on 31 October ± 1.8 days on their autumn migrations and return on 03 March ± 1.6 days on their spring migrations. At the population level, there was a clear gradient in the timing of crossing SoG: birds from the westernmost populations (Murcia, SE Spain) were the first ones in leaving the Mediterranean while easternmost breeders (Paximada, Crete) were the last ones. In spring, only birds from the largest breeding population (Zembra, Tunisia) seemed to advance their return and crossed SoG significantly earlier than birds tracked at the remaining populations. In both passages, shearwaters from central and eastern populations spent more time flying than their conspecifics from the western Mediterranean. Scopoli's shearwater populations display a differential phenology and behavior in their migratory passages through SoG. The longitudinal gradient in body size already reported for the species could be an evolutionary response to an obvious trade-off between sharing common wintering grounds in the Atlantic Ocean and the temporal constraints of restoring physiological condition in those grounds.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://hdl.handle.net/2445/128784.",2019-04-01 +31516954,Experimental dataset of nanoporous GaN photoelectrode supported on patterned sapphire substrates for photoelectrochemical water splitting.,"GaN is one of the most promising materials for high PEC efficiency to produce clean, renewable hydrogen in an ecofriendly manner (Ebaid et al., 2015; Kamimura et al., 2017; Yang et al., 2018; Ohkawa et al., 2013). Trough assays of nanoporous gallium nitride (GaN) photoelectrode, we recently demonstrated an improved PEC efficiency and photocurrent density of nanoporous GaN photoelectrode by 470% times with respect to planar counterpart (Li et al., 2019). Here, we report original data acquired under UV-visible spectrometer, X-ray diffraction (XRD), room temperature PL measurements and PEC measurements, based on the characterization of different sapphire substrate, different GaN samples and different GaN photoelectrodes. The optical properties and photoelectrochemical properties of the corresponding samples and possible mechanisms are presented, which is freely available (Li et al., 2019). The data can be valuable for researchers interested in photoelectrochemical water splitting, as well as to researchers developing fabrication of nanoporous photoelectrode. For more insight please see the research article ""A nanoporous GaN photoelectrode on patterned sapphire substrates for high-efficiency photoelectrochemical water splitting"", https://doi.org/10.1016/j.jallcom.2019.06.234.",2019-08-28 +25244105,OOMMPPAA: a tool to aid directed synthesis by the combined analysis of activity and structural data.,"There is an ever increasing resource in terms of both structural information and activity data for many protein targets. In this paper we describe OOMMPPAA, a novel computational tool designed to inform compound design by combining such data. OOMMPPAA uses 3D matched molecular pairs to generate 3D ligand conformations. It then identifies pharmacophoric transformations between pairs of compounds and associates them with their relevant activity changes. OOMMPPAA presents this data in an interactive application providing the user with a visual summary of important interaction regions in the context of the binding site. We present validation of the tool using openly available data for CDK2 and a GlaxoSmithKline data set for a SAM-dependent methyl-transferase. We demonstrate OOMMPPAA's application in optimizing both potency and cell permeability and use OOMMPPAA to highlight nuanced and cross-series SAR. OOMMPPAA is freely available to download at http://oommppaa.sgc.ox.ac.uk/OOMMPPAA/ .",2014-10-09 +29106634,TCMID 2.0: a comprehensive resource for TCM.,"As a traditional medical intervention in Asia and a complementary and alternative medicine in western countries, Traditional Chinese Medicine (TCM) is capturing worldwide attention in life science field. Traditional Chinese Medicine Integrated Database (TCMID), which was originally launched in 2013, was a comprehensive database aiming at TCM's modernization and standardization. It has been highly recognized among pharmacologists and scholars in TCM researches. The latest release, TCMID 2.0 (http://www.megabionet.org/tcmid/), replenished the preceding database with 18 203 herbal ingredients, 15 prescriptions, 82 related targets, 1356 drugs, 842 diseases and numerous new connections between them. Considering that chemical changes might take place in decocting process of prescriptions, which may result in new ingredients, new data containing the prescription ingredients was collected in current version. In addition, 778 herbal mass spectrometry (MS) spectra related to 170 herbs were appended to show the variation of herbal quality in different origin and distinguish genuine medicinal materials from common ones while 3895 MS spectra of 729 ingredients were added as the supplementary materials of component identification. With the significant increase of data, TCMID 2.0 will further facilitate TCM's modernization and enhance the exploration of underlying biological processes that are response to the diverse pharmacologic actions of TCM.",2018-01-01 +27307637,RCK: accurate and efficient inference of sequence- and structure-based protein-RNA binding models from RNAcompete data.,"

Motivation

Protein-RNA interactions, which play vital roles in many processes, are mediated through both RNA sequence and structure. CLIP-based methods, which measure protein-RNA binding in vivo, suffer from experimental noise and systematic biases, whereas in vitro experiments capture a clearer signal of protein RNA-binding. Among them, RNAcompete provides binding affinities of a specific protein to more than 240 000 unstructured RNA probes in one experiment. The computational challenge is to infer RNA structure- and sequence-based binding models from these data. The state-of-the-art in sequence models, Deepbind, does not model structural preferences. RNAcontext models both sequence and structure preferences, but is outperformed by GraphProt. Unfortunately, GraphProt cannot detect structural preferences from RNAcompete data due to the unstructured nature of the data, as noted by its developers, nor can it be tractably run on the full RNACompete dataset.

Results

We develop RCK, an efficient, scalable algorithm that infers both sequence and structure preferences based on a new k-mer based model. Remarkably, even though RNAcompete data is designed to be unstructured, RCK can still learn structural preferences from it. RCK significantly outperforms both RNAcontext and Deepbind in in vitro binding prediction for 244 RNAcompete experiments. Moreover, RCK is also faster and uses less memory, which enables scalability. While currently on par with existing methods in in vivo binding prediction on a small scale test, we demonstrate that RCK will increasingly benefit from experimentally measured RNA structure profiles as compared to computationally predicted ones. By running RCK on the entire RNAcompete dataset, we generate and provide as a resource a set of protein-RNA structure-based models on an unprecedented scale.

Availability and implementation

Software and models are freely available at http://rck.csail.mit.edu/

Contact

bab@mit.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +31833288,Pigmentation Phenotype Prediction of Chinese Populations from Different Language Families.,"

Abstract

Objective To predict the pigmentation phenotypes of Chinese populations from different language families, analyze the differences and provide reference data for forensic anthropology and genetics. Methods The HIrisPlex-S multiplex amplification system with 41 loci related to pigmentation phenotypes was constructed in the laboratory, and 2 666 DNA samples of adult males of 17 populations from six language families, including Indo-European, Sino-Tibetan, Altaic, Hmong-Mien, Tai-Kadai and Austro-Asiatic language families distributed in different regions of China were genotyped. The pigmentation phenotype category of each individual was predicted using the online prediction system (https://HIrisPlex.erasmusmc.nl/), and then the output data were statistically analyzed. Results About 1.92% of the individuals of Asian-European admixed populations from Indo-European and Altaic language families had blue eyes and 34.29% had brown or gold hair. The phenotypes of the color of eyes and hair of other populations had no significant difference, all individuals had brown eyes and black hair. There were differences in skin color of populations of different language families and geographical areas. The Indo-European language family had the lightest skin color, and the Austro-Asiatic language family had the darkest skin color; the southwestern minority populations had a darker skin color than populations in the plain areas. Conclusion The prediction results of pigmentation phenotype of Chinese populations are consistent with the perception of the appearance of each population, proving the reliability of the system. The color of eyes and hair are mainly related to ancestral components, while the skin color shows the differences between language families, and is closely related to geographical distribution of populations.",2019-10-25 +29617941,A reference peptide database for proteome quantification based on experimental mass spectrum response curves.,"

Motivation

Mass spectrometry (MS) based quantification of proteins/peptides has become a powerful tool in biological research with high sensitivity and throughput. The accuracy of quantification, however, has been problematic as not all peptides are suitable for quantification. Several methods and tools have been developed to identify peptides that response well in mass spectrometry and they are mainly based on predictive models, and rarely consider the linearity of the response curve, limiting the accuracy and applicability of the methods. An alternative solution is to select empirically superior peptides that offer satisfactory MS response intensity and linearity in a wide dynamic range of peptide concentration.

Results

We constructed a reference database for proteome quantification based on experimental mass spectrum response curves. The intensity and dynamic range of over 2 647 773 transitions from 121 318 peptides were obtained from a set of dilution experiments, covering 11 040 gene products. These transitions and peptides were evaluated and presented in a database named SCRIPT-MAP. We showed that the best-responder (BR) peptide approach for quantification based on SCRIPT-MAP database is robust, repeatable and accurate in proteome-scale protein quantification. This study provides a reference database as well as a peptides/transitions selection method for quantitative proteomics.

Availability and implementation

SCRIPT-MAP database is available at http://www.firmiana.org/responders/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-08-01 +32780765,"Development and validation of a model for individualized prediction of hospitalization risk in 4,536 patients with COVID-19.","

Background

Coronavirus Disease 2019 is a pandemic that is straining healthcare resources, mainly hospital beds. Multiple risk factors of disease progression requiring hospitalization have been identified, but medical decision-making remains complex.

Objective

To characterize a large cohort of patients hospitalized with COVID-19, their outcomes, develop and validate a statistical model that allows individualized prediction of future hospitalization risk for a patient newly diagnosed with COVID-19.

Design

Retrospective cohort study of patients with COVID-19 applying a least absolute shrinkage and selection operator (LASSO) logistic regression algorithm to retain the most predictive features for hospitalization risk, followed by validation in a temporally distinct patient cohort. The final model was displayed as a nomogram and programmed into an online risk calculator.

Setting

One healthcare system in Ohio and Florida.

Participants

All patients infected with SARS-CoV-2 between March 8, 2020 and June 5, 2020. Those tested before May 1 were included in the development cohort, while those tested May 1 and later comprised the validation cohort.

Measurements

Demographic, clinical, social influencers of health, exposure risk, medical co-morbidities, vaccination history, presenting symptoms, medications, and laboratory values were collected on all patients, and considered in our model development.

Results

4,536 patients tested positive for SARS-CoV-2 during the study period. Of those, 958 (21.1%) required hospitalization. By day 3 of hospitalization, 24% of patients were transferred to the intensive care unit, and around half of the remaining patients were discharged home. Ten patients died. Hospitalization risk was increased with older age, black race, male sex, former smoking history, diabetes, hypertension, chronic lung disease, poor socioeconomic status, shortness of breath, diarrhea, and certain medications (NSAIDs, immunosuppressive treatment). Hospitalization risk was reduced with prior flu vaccination. Model discrimination was excellent with an area under the curve of 0.900 (95% confidence interval of 0.886-0.914) in the development cohort, and 0.813 (0.786, 0.839) in the validation cohort. The scaled Brier score was 42.6% (95% CI 37.8%, 47.4%) in the development cohort and 25.6% (19.9%, 31.3%) in the validation cohort. Calibration was very good. The online risk calculator is freely available and found at https://riskcalc.org/COVID19Hospitalization/.

Limitation

Retrospective cohort design.

Conclusion

Our study crystallizes published risk factors of COVID-19 progression, but also provides new data on the role of social influencers of health, race, and influenza vaccination. In a context of a pandemic and limited healthcare resources, individualized outcome prediction through this nomogram or online risk calculator can facilitate complex medical decision-making.",2020-08-11 +30111025,[Analysis on ecological factors and active components content of wild Dipsacus asper in Chongqing Wulong district].,"An HPLC method was developed for the determination of iridoid glycosides (loganin acid, loganin, sweroside) and saponins (asperosaponin Ⅵ) in the wild Dipsacus asper. A total of 108 samples consecutive growing 12 month were collected in 9 plots in Wulong district of Chongqing. Subsequent analysis of the content of loganin acid, loganin, sweroside and asperosaponin Ⅵ was performed by HPLC to evaluate the quality. In addition, 20 climate data provided by the world climate database (http://www.worldclim.org/) was analyzed to deduce the correlation between the growing environment factors and the active ingredient content accumulation of D. asperoides and choose the apposite growing environment for D. asper. The range of active ingredient content in wild D. asper were 0.01%-3.80%(loganin acid), 0.08%-0.62%(loganin), 0.12%-0.78%(sweroside), 0.64%-5.26%(asperosaponin Ⅵ). The highest content of these active ingredients was concentrated from February to April, with 2.64% of loganin acid, 0.36% of loganin), 0.57% of sweroside, and 3.09% of asperosaponin Ⅵ. The method used for determination of the active ingredient content in D. asper was simple and convenient with accurate result. The selection of the quadrats is scientific and reasonable and can be used for the analysis of the contents of the wild D. asper, thus provide a reference for quality evaluation of D. asper and protection of D. asper resources.",2018-07-01 +26602693,TCGASpliceSeq a compendium of alternative mRNA splicing in cancer.,"TCGA's RNASeq data represent one of the largest collections of cancer transcriptomes ever assembled. RNASeq technology, combined with computational tools like our SpliceSeq package, provides a comprehensive, detailed view of alternative mRNA splicing. Aberrant splicing patterns in cancers have been implicated in such processes as carcinogenesis, de-differentiation and metastasis. TCGA SpliceSeq (http://bioinformatics.mdanderson.org/TCGASpliceSeq) is a web-based resource that provides a quick, user-friendly, highly visual interface for exploring the alternative splicing patterns of TCGA tumors. Percent Spliced In (PSI) values for splice events on samples from 33 different tumor types, including available adjacent normal samples, have been loaded into TCGA SpliceSeq. Investigators can interrogate genes of interest, search for the genes that show the strongest variation between or among selected tumor types, or explore splicing pattern changes between tumor and adjacent normal samples. The interface presents intuitive graphical representations of splicing patterns, read counts and various statistical summaries, including percent spliced in. Splicing data can also be downloaded for inclusion in integrative analyses. TCGA SpliceSeq is freely available for academic, government or commercial use.",2015-11-23 +31297843,Protein Topology Prediction Algorithms Systematically Investigated in the Yeast Saccharomyces cerevisiae.,"Membrane proteins perform a variety of functions, all crucially dependent on their orientation in the membrane. However, neither the exact number of transmembrane domains (TMDs) nor the topology of most proteins have been experimentally determined. Due to this, most scientists rely primarily on prediction algorithms to determine topology and TMD assignments. Since these can give contradictory results, single-algorithm-based predictions are unreliable. To map the extent of potential misanalysis, the predictions of nine algorithms on the yeast proteome are compared and it is found that they have little agreement when predicting TMD number and termini orientation. To view all predictions in parallel, a webpage called TopologYeast: http://www.weizmann.ac.il/molgen/TopologYeast was created. Each algorithm is compared with experimental data and a poor agreement is found. The analysis suggests that more systematic data on protein topology are required to increase the training sets for prediction algorithms and to have accurate knowledge of membrane protein topology.",2019-07-11 +31519398,Illustrate: Software for Biomolecular Illustration.,"The small program Illustrate generates non-photorealistic images of biological molecules for use in dissemination, outreach, and education. The method has been used as part of the ""Molecule of the Month,"" an ongoing educational column at the RCSB Protein Data Bank (http://rcsb.org). Insights from 20 years of application of the program are presented, and the program has been released both as open-source Fortran at GitHub and through an interactive web-based interface.",2019-09-10 +29931085,htsget: a protocol for securely streaming genomic data.,"

Summary

Standardized interfaces for efficiently accessing high-throughput sequencing data are a fundamental requirement for large-scale genomic data sharing. We have developed htsget, a protocol for secure, efficient and reliable access to sequencing read and variation data. We demonstrate four independent client and server implementations, and the results of a comprehensive interoperability demonstration.

Availability and implementation

http://samtools.github.io/hts-specs/htsget.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +31652812,Introducing Murine Microbiome Database (MMDB): A Curated Database with Taxonomic Profiling of the Healthy Mouse Gastrointestinal Microbiome. ,"The gut microbiota modulates overall metabolism, the immune system and brain development of the host. The majority of mammalian gut microbiota consists of bacteria. Among various model animals, the mouse has been most widely used in pre-clinical biological experiments. The significant compositional differences in taxonomic profiles among different mouse strains due to gastrointestinal locations, genotypes and vendors have been well documented. However, details of such variations are yet to be elucidated. This study compiled and analyzed 16S rRNA gene-based taxonomic profiles of 554 healthy mouse samples from 14 different projects to construct a comprehensive database of the microbiome of a healthy mouse gastrointestinal tract. The database, named Murine Microbiome Database, should provide researchers with useful taxonomic information and better biological insight about how each taxon, such as genus and species, is associated with locations in the gastrointestinal tract, genotypes and vendors. The database is freely accessible over the Internet at http://leb.snu.ac.kr/mmdb/.",2019-10-23 +31835909,Speech Perception in Classroom Acoustics by Children With Hearing Loss and Wearing Hearing Aids.,"Purpose The classroom acoustic standard ANSI/ASA S12.60-2010/Part 1 requires a reverberation time (RT) for children with hearing impairment of 0.3 s, shorter than its requirement of 0.6 s for children with typical hearing. While preliminary data from conference proceedings support this new RT requirement of 0.3 s, peer-reviewed data that support 0.3-s RT are not available on those wearing hearing aids. To help address this, this article compares speech perception performance by children with hearing aids in RTs, including those specified in the ANSI/ASA-2010 standard. A related clinical issue is whether assessments of speech perception conducted in near-anechoic sound booths, which may overestimate performance in reverberant classrooms, may now provide a more reliable estimate when the child is in a classroom with a short RT of 0.3 s. To address this, this study compared speech perception by children with hearing aids in a sound booth to listening in 0.3-s RT. Method Participants listened in classroom RTs of 0.3, 0.6, and 0.9 s and in a near-anechoic sound booth. All conditions also included a 21-dB range of speech-to-noise ratios (SNRs) to further represent classroom listening environments. Performance measures using the Bamford-Kowal-Bench Speech-in-Noise (BKB-SIN) test were 50% correct word recognition across these acoustic conditions, with supplementary analyses of percent correct. Results Each reduction in RT from 0.9 to 0.6 to 0.3 s significantly benefited the children's perception of speech. Scores obtained in a sound booth were significantly better than those measured in 0.3-s RT. Conclusion These results support the acoustic standard of 0.3-s RT for children with hearing impairment in learning spaces ≤ 283 m3, as specified in ANSI/ASA S12.60-2010/Part 1. Additionally, speech perception testing in a sound booth did not predict accurately listening ability in a classroom with 0.3-s RT. Supplemental Material https://doi.org/10.23641/asha.11356487.",2019-12-13 +30848455,Post-processing of Large Bioactivity Data.,"Bioactivity data is a valuable scientific data type that needs to be findable, accessible, interoperable, and reusable (FAIR) (Wilkinson et al. Sci Data 3:160018, 2016). However, results from bioassay experiments often exist in formats that are difficult to interoperate across and reuse in follow-up research, especially when attempting to combine experimental records from many different sources. This chapter details common issues associated with the processing of large bioactivity data and methods for handling these issues in a post-processing scenario. Specifically described are observations from a recent effort (Harris, http://www.scrubchem.org , 2017) to post-process massive amounts of bioactivity data from the NIH's PubChem Bioassay repository (Wang et al., Nucleic Acids Res 42:1075-1082, 2014).",2019-01-01 +31976883,Fast Polynomial Approximation of Heat Kernel Convolution on Manifolds and Its Application to Brain Sulcal and Gyral Graph Pattern Analysis.,"Heat diffusion has been widely used in brain imaging for surface fairing, mesh regularization and cortical data smoothing. Motivated by diffusion wavelets and convolutional neural networks on graphs, we present a new fast and accurate numerical scheme to solve heat diffusion on surface meshes. This is achieved by approximating the heat kernel convolution using high degree orthogonal polynomials in the spectral domain. We also derive the closed-form expression of the spectral decomposition of the Laplace-Beltrami operator and use it to solve heat diffusion on a manifold for the first time. The proposed fast polynomial approximation scheme avoids solving for the eigenfunctions of the Laplace-Beltrami operator, which is computationally costly for large mesh size, and the numerical instability associated with the finite element method based diffusion solvers. The proposed method is applied in localizing the male and female differences in cortical sulcal and gyral graph patterns obtained from MRI in an innovative way. The MATLAB code is available at http://www.stat.wisc.edu/~mchung/chebyshev.",2020-01-17 +28916823,Digging into the low molecular weight peptidome with the OligoNet web server.,"Bioactive peptides play critical roles in regulating many biological processes. Recently, natural short peptides biomarkers are drawing significant attention and are considered as ""hidden treasure"" of drug candidates. High resolution and high mass accuracy provided by mass spectrometry (MS)-based untargeted metabolomics would enable the rapid detection and wide coverage of the low-molecular-weight peptidome. However, translating unknown masses (<1 500 Da) into putative peptides is often limited due to the lack of automatic data processing tools and to the limit of peptide databases. The web server OligoNet responds to this challenge by attempting to decompose each individual mass into a combination of amino acids out of metabolomics datasets. It provides an additional network-based data interpretation named ""Peptide degradation network"" (PDN), which unravels interesting relations between annotated peptides and generates potential functional patterns. The ab initio PDN built from yeast metabolic profiling data shows a great similarity with well-known metabolic networks, and could aid biological interpretation. OligoNet allows also an easy evaluation and interpretation of annotated peptides in systems biology, and is freely accessible at https://daniellyz200608105.shinyapps.io/OligoNet/ .",2017-09-15 +21372341,Key2Ann: a tool to process sequence sets by replacing database identifiers with a human-readable annotation.,"Deducing common properties or degrees of phylogenetic relationship by analyzing a grouping or clustering of sequence sets is a frequently used technique in computational biology. If interpreted by means of visual inspection, the conclusions depend for many of these applications on meaningful names for the input data. In accordance with the aim of the analysis, the sequences should be provided with names indicating the function of the genes or gene-products, the phylogenetic position or other properties characterizing the contributing species. However, sequences extracted from databases are most often annotated with identifiers which only implicitly contain the desired information. To solve this problem, we have designed and implemented a tool named Key2Ann, which replaces in multiple fasta files the database keys with short terms indicating the taxonomic position or other features like the gene name or the EC-number. In addition, properties like habitat, growth temperature or the degree of pathogenicity can be coded for microbial species. To allow for highest flexibility, the user can control the composition of the names by means of command line parameters. Key2Ann is written in Java and can be downloaded via http://www-bioinf.uni-regensburg.de/downl/Key2Ann.zip. We demonstrate the usage of Key2Ann by discussing three typical examples of phylogenetic analysis.",2011-03-04 +31253093,miRkwood: a tool for the reliable identification of microRNAs in plant genomes.,"BACKGROUND:MicroRNAs (miRNAs) play crucial roles in post-transcriptional regulation of eukaryotic gene expression and are involved in many aspects of plant development. Although several prediction tools are available for metazoan genomes, the number of tools dedicated to plants is relatively limited. RESULTS:Here, we present miRkwood, a user-friendly tool for the identification of miRNAs in plant genomes using small RNA sequencing data. Deep-sequencing data of Argonaute associated small RNAs showed that miRkwood is able to identify a large diversity of plant miRNAs and limits false positive predictions. Moreover, it outperforms current tools such as ShortStack and contrary to ShortStack, miRkwood provides a quality score allowing users to rank miRNA predictions. CONCLUSION:miRkwood is a very efficient tool for the annotation of miRNAs in plant genomes. It is available as a web server, as a standalone version, as a docker image and as a Galaxy tool: http://bioinfo.cristal.univ-lille.fr/mirkwood.",2019-06-28 +26868127,Seqinspector: position-based navigation through the ChIP-seq data landscape to identify gene expression regulators.,"

Background

The regulation of gene expression in eukaryotic cells is a complex process that involves epigenetic modifications and the interaction of DNA with multiple transcription factors. This process can be studied with unprecedented sensitivity using a combination of chromatin immunoprecipitation and next-generation DNA sequencing (ChIP-seq). Available ChIP-seq data can be further utilized to interpret new gene expression profiling experiments.

Results

Here, we describe seqinspector, a tool that accepts any set of genomic coordinates from ChIP-seq or RNA-seq studies to identify shared transcriptional regulators. The presented web resource includes a large collection of publicly available ChIP-seq and RNA-seq experiments (>1300 tracks) performed on transcription factors, histone modifications, RNA polymerases, enhancers and insulators in humans and mice. Over-representation is calculated based on the coverage computed directly from indexed files storing ChIP-seq data (bigwig). Therefore, seqinspector is not limited to pre-computed sets of gene promoters.

Conclusion

The tool can be used to identify common gene expression regulators for sets of co-expressed transcripts (including miRNAs, lncRNAs or any novel unannotated RNAs) or for sets of ChIP-seq peaks to identify putative protein-protein interactions or transcriptional co-factors. The tool is available at http://seqinspector.cremag.org.",2016-02-12 +32241338,Priorities for improved management of acute rheumatic fever and rheumatic heart disease: analysis of cross-sectional continuous quality improvement data in Aboriginal primary healthcare centres in Australia.,"Objective This study investigated the delivery of guideline-recommended services for the management of acute rheumatic fever (ARF) and rheumatic heart disease (RHD) in Australian primary healthcare centres participating in the Audit and Best Practice for Chronic Disease (ABCD) National Research Partnership project. Methods ARF and RHD clinical audit data were collected from 63 Aboriginal centres in four Australian jurisdictions using the ABCD ARF/RHD audit tool. Records of up to 30 patients treated for ARF and/or RHD were analysed per centre from the most recent audit conducted between 2009 and 2014. The main outcome measure was a quality of ARF and RHD care composite indicator consisting of nine best-practice service items. Results Of 1081 patients, most were Indigenous (96%), female (61%), from the Northern Territory and Queensland (97%) and <25 years of age (49%). The composite indicator was highest in the 0-14 year age group (77% vs 65-67% in other age groups). Timely injections and provision of client education are important specific areas for improvement. Multiple regression showed age >15 years to be a significant negative factor for several care indicators, particularly for the delivery of long-acting antibiotic injections and specialist services in the 15-24 year age group. Conclusions The results suggest that timely injection and patient education are priorities for managing ARF and RHD, particularly focusing on child-to-adult transition care. What is known about the topic? The burden of rheumatic fever and RHD in some Aboriginal communities is among the highest documented globally. Guideline-adherent RHD prevention and management in primary health care (PHC) settings are critically important to reduce this burden. Continuous quality improvement (CQI) is a proven strategy to improve guideline adherence, using audit cycles and proactive engagement of PHC end users with their own data. Previously, such CQI strategies using a systems approach were shown to improve delivery of ARF and RHD care in six Aboriginal health services (three government and three community controlled). What does this paper add? This paper focuses on the variation across age groups in the quality of ARF and/or RHD care according to nine quality of care indicators across 63 PHC centres serving the Aboriginal population in the Northern Territory, Queensland, South Australia and Western Australia. These new findings provide insight into difference in quality of care by life stage, indicating particular areas for improvement of the management of ARF and RHD at the PHC level, and can act as a baseline for monitoring of care quality for ARF and RHD into the future. What are the implications for practitioners? Management plans and innovative strategies or systems for improving adherence need to be developed as a matter of urgency. PHC professionals need to closely monitor adherence to secondary prophylaxis at both the clinic and individual level. RHD priority status needs to be assigned and recorded as a tool to guide management. Systems strengthening needs to particularly target child-to-adult transition care. Practitioners are urged to keep a quick link to the RHDAustralia website to access resources and guidelines pertaining to ARF and RHD (https://www.rhdaustralia.org.au/arf-rhd-guideline, accessed 3 October 2019). CQI strategies can assist PHC centres to improve the care they provide to patients.",2020-04-01 +31710662,EpiDISH web server: Epigenetic Dissection of Intra-Sample-Heterogeneity with online GUI. ,"It is well recognized that cell-type heterogeneity hampers the interpretation of Epigenome-Wide Association Studies (EWAS). Many tools have emerged to address this issue, including several R/Bioconductor packages that infer cell-type composition. Here we present a web application for cell-type deconvolution, which offers the functionality of our EpiDISH Bioconductor/R package in a user-friendly GUI environment. Users can upload their data to infer cell-type composition and differentially methylated cytosines in individual cell-types (DMCTs) for a range of different tissues. EpiDISH web server is implemented with Shiny in R, and is freely available at https://www.biosino.org/EpiDISH/.",2019-11-09 +31695717,AppleMDO: A Multi-Dimensional Omics Database for Apple Co-Expression Networks and Chromatin States.,"As an economically important crop, apple is one of the most cultivated fruit trees in temperate regions worldwide. Recently, a large number of high-quality transcriptomic and epigenomic datasets for apple were made available to the public, which could be helpful in inferring gene regulatory relationships and thus predicting gene function at the genome level. Through integration of the available apple genomic, transcriptomic, and epigenomic datasets, we constructed co-expression networks, identified functional modules, and predicted chromatin states. A total of 112 RNA-seq datasets were integrated to construct a global network and a conditional network (tissue-preferential network). Furthermore, a total of 1,076 functional modules with closely related gene sets were identified to assess the modularity of biological networks and further subjected to functional enrichment analysis. The results showed that the function of many modules was related to development, secondary metabolism, hormone response, and transcriptional regulation. Transcriptional regulation is closely related to epigenetic marks on chromatin. A total of 20 epigenomic datasets, which included ChIP-seq, DNase-seq, and DNA methylation analysis datasets, were integrated and used to classify chromatin states. Based on the ChromHMM algorithm, the genome was divided into 620,122 fragments, which were classified into 24 states according to the combination of epigenetic marks and enriched-feature regions. Finally, through the collaborative analysis of different omics datasets, the online database AppleMDO (http://bioinformatics.cau.edu.cn/AppleMDO/) was established for cross-referencing and the exploration of possible novel functions of apple genes. In addition, gene annotation information and functional support toolkits were also provided. Our database might be convenient for researchers to develop insights into the function of genes related to important agronomic traits and might serve as a reference for other fruit trees.",2019-10-22 +31695723,Measurement of Conditional Relatedness Between Genes Using Fully Convolutional Neural Network.,"Measuring conditional relatedness, the degree of relation between a pair of genes in a certain condition, is a basic but difficult task in bioinformatics, as traditional co-expression analysis methods rely on co-expression similarities, well known with high false positive rate. Complement with prior-knowledge similarities is a feasible way to tackle the problem. However, classical combination machine learning algorithms fail in detection and application of the complex mapping relations between similarities and conditional relatedness, so a powerful predictive model will have enormous benefit for measuring this kind of complex mapping relations. To this need, we propose a novel deep learning model of convolutional neural network with a fully connected first layer, named fully convolutional neural network (FCNN), to measure conditional relatedness between genes using both co-expression and prior-knowledge similarities. The results on validation and test datasets show FCNN model yields an average 3.0% and 2.7% higher accuracy values for identifying gene-gene interactions collected from the COXPRESdb, KEGG, and TRRUST databases, and a benchmark dataset of Xiao-Yong et al. research, by grid-search 10-fold cross validation, respectively. In order to estimate the FCNN model, we conduct a further verification on the GeneFriends and DIP datasets, and the FCNN model obtains an average of 1.8% and 7.6% higher accuracy, respectively. Then the FCNN model is applied to construct cancer gene networks, and also calls more practical results than other compared models and methods. A website of the FCNN model and relevant datasets can be accessed from https://bmbl.bmi.osumc.edu/FCNN.",2019-10-22 +31638302,CRYAB promotes osteogenic differentiation of human bone marrow stem cells via stabilizing β-catenin and promoting the Wnt signalling.,"

Objectives

The osteogenesis differentiation of human bone marrow stem cells (BMSCs) is essential for bone formation and bone homeostasis. In this study, we aim to elucidate novel molecular targets for bone metabolism diseases.

Materials and methods

The dataset GSE80614 which includes mRNA expression profile during BMSCs osteogenic differentiation was obtained from the GEO database (https://www.ncbi.nlm.nih.gov/geo/). The osteogenic differentiation of BMSCs was measured by ALP staining, AR staining and expression of osteogenic markers in vitro. For in vivo assay, we seeded BMSCs onto beta-tricalcium phosphate (β-TCP) and transplanted them into muscle pockets of nude mice. Luciferase assay, co-immunoprecipitation assay and in vitro ubiquitination assay were carried out to investigate the molecular mechanism.

Results

We found that α-B-crystallin (CRYAB) expression was elevated during the process of BMSCs osteogenic differentiation. Further studies showed that upregulation of CRYAB significantly enhanced the osteogenic differentiation, while downregulation of CRYAB suppressed it. CRYAB regulated BMSCs osteogenic differentiation mainly through the canonical Wnt/β-catenin signalling. In addition, we found that CRYAB could physically interact with β-catenin and protect it from ubiquitination and degradation, which stabilized β-catenin and promoted the Wnt signalling.

Conclusions

The present study provides evidences that CRYAB is an important regulator of BMSCs osteogenic differentiation by protecting β-catenin from ubiquitination and degradation and promoting the Wnt signalling. It may serve as a potential therapeutic target for diseases related to bone metabolism.",2019-10-22 +31912676,Human papillomavirus (HPV) DNA detection in uterine cervix cancer after radiation indicating recurrence: a systematic review and meta-analysis.,"

Objective

The causal association of human papillomavirus (HPV) in uterine cervical cancer was well established and this oncogenic virus was reported to be a biomarker for overall recurrence and central pelvic recurrence. The objective of the present systematic review and meta-analysis was to assess the role of HPV DNA testing in early detection of recurrence among cervical cancer survivors after radiotherapy.

Methods

We performed a systematic review and meta-analysis by means of searching electronic databases for published articles between January 1984 and June 2018, on the basis of standard systematic review guidelines prescribed by major agencies namely Cochrane Collaboration (https://www.cochrane.org) and Campbell Collaboration (https://www.campbellcollaboration.org). The meta-analysis component was further modified appropriately for the synthesis of sensitivity and specificity results.

Results

A total of 1,055 cervical cancer cases who had received pelvic radiation with or without chemotherapy from ten cohort studies were evaluated. The overall pooled sensitivity and specificity of HPV DNA testing was 0.84 (95% confidence interval [CI]= 0.66-0.94) and 0.35 (95% CI=0.20-0.54) respectively. The positive likelihood ratio was 1.3 (95% CI=1.0-1.7) and the negative likelihood ratio was 0.45 (95% CI=0.18-1.10) with an estimated diagnostic odds ratio of 3 (95% CI=1-9).

Conclusion

The screening for HPV DNA testing during follow-up facilitates early detection of recurrence after radiotherapy.",2019-10-22 +31944177,Endothelial heterogeneity across distinct vascular beds during homeostasis and inflammation. ,"Blood vessels are lined by endothelial cells engaged in distinct organ-specific functions but little is known about their characteristic gene expression profiles. RNA-Sequencing of the brain, lung, and heart endothelial translatome identified specific pathways, transporters and cell-surface markers expressed in the endothelium of each organ, which can be visualized at http://www.rehmanlab.org/ribo. We found that endothelial cells express genes typically found in the surrounding tissues such as synaptic vesicle genes in the brain endothelium and cardiac contractile genes in the heart endothelium. Complementary analysis of endothelial single cell RNA-Seq data identified the molecular signatures shared across the endothelial translatome and single cell transcriptomes. The tissue-specific heterogeneity of the endothelium is maintained during systemic in vivo inflammatory injury as evidenced by the distinct responses to inflammatory stimulation. Our study defines endothelial heterogeneity and plasticity and provides a molecular framework to understand organ-specific vascular disease mechanisms and therapeutic targeting of individual vascular beds.",2020-01-16 +,Phylogeny of pleasing lacewings (Neuroptera: Dilaridae) with a revised generic classification and description of a new subfamily,"The phylogeny of pleasing lacewings (Neuroptera: Dilaridae) is reconstructed for the first time based on morphological data using all fossil and extant genera. Accordingly, a revised generic classification of Dilaridae is proposed, with a new subfamily (i.e. Berothellinae subfam.n.) erected based on its remarkably different morphological features from the other dilarid subfamilies. A revision of all dilarid genera is presented, including descriptions of some little‐known species from Asia and Mid‐Cretaceous Burmese amber. New genera and species herein described include Berothella holzschuhi U. Aspöck, Liu & H. Aspöck, sp.n., Cretodilar burmanus Liu & Zhang, gen. et sp.n., Dilar cretaceus Liu & Zhang, sp.n., Neonallachius orientalis Liu, U. Aspöck & H. Aspöck, sp.n. and Neonallachius thailandicus Liu & Winterton, sp.n. Two new combinations, i.e. Neonallachius krooni (Minter, 1986), comb.n. and Neonallachius ponomarenkoi (Zakharenko, 1991), comb.n., are proposed. Evolutionary patterns of some important characters and the historical biogeography of Dilaridae are also discussed. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:68836312‐FBDC‐4F9F‐8516‐2365F44596BF.",2017-04-01 +30120950,Meta-analysis of Gastroesophageal Reflux Disease and Idiopathic Pulmonary Fibrosis.,"BACKGROUND:The relationship between gastroesophageal reflux disease (GERD) and idiopathic pulmonary fibrosis (IPF) is controversial. Current guidelines recommend that clinicians use regular antacid treatment, while two recent meta-analyses of antacid therapy in IPF were inconclusive. The objective of this study was to examine the evidence regarding the association between GERD and IPF through a systematic review and a meta-analysis, with special reference to the methodologic quality of the observational studies. METHODS:The MEDLINE, EMBASE, Ovid, and Web of Science (1966-May 2018) databases were searched for original articles published in any language, and we then systematically reviewed the bibliographies of the retrieved articles. Observational studies (cohort and case-control studies) were selected if they allowed the calculation of a measure of association relating GERD to IPF. RESULTS:Eighteen case-control studies including 3,206 patients with IPF and 9,368 control subjects met the inclusion criteria of the meta-analysis. The meta-analysis indicated that GERD is associated with IPF (OR, 2.94 [95% CI, 1.95-4.42]; P homogeneity < .0001). Overall, the results remained consistent whatever the data source (clinical studies vs databases) or the type of control subject (healthy volunteers, patients with respiratory diseases other than interstitial lung disease, or patients with non-IPF interstitial lung disease). In a meta-regression, after controlling for smoking, GERD and IPF were not related. CONCLUSIONS:GERD and IPF may be related, but this association is most likely confounded, especially by smoking. Our confidence in the estimate of association is low because it is exclusively from case-control studies. TRIAL REGISTRY:PROSPERO; No.: CRD42016053728; URL: http://www.crd.york.ac.uk/PROSPERO.",2018-08-16 +31292629,QuanTest2: benchmarking multiple sequence alignments using secondary structure prediction.,"

Motivation

Secondary structure prediction accuracy (SSPA) in the QuanTest benchmark can be used to measure accuracy of a multiple sequence alignment. SSPA correlates well with the sum-of-pairs score, if the results are averaged over many alignments but not on an alignment-by-alignment basis. This is due to a sub-optimal selection of reference and non-reference sequences in QuanTest.

Results

We develop an improved strategy for selecting reference and non-reference sequences for a new benchmark, QuanTest2. In QuanTest2, SSPA and SP correlate better on an alignment-by-alignment basis than in QuanTest. Guide-trees for QuanTest2 are more balanced with respect to reference sequences than in QuanTest. QuanTest2 scores correlate well with other well-established benchmarks.

Availability and implementation

QuanTest2 is available at http://bioinf.ucd.ie/quantest2.tar, comprises of reference and non-reference sequence sets and a scoring script.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +30239567,The Encyclopedia of Proteome Dynamics: the KinoViewer.,"

Summary

The Encyclopedia of Proteome Dynamics (EPD) 'KinoViewer' is an interactive data visualization tool designed for analysis and exploration of both protein and transcript data, showing expression of kinase genes in either human or mouse cells and tissues. The KinoViewer provides a comprehensive, updated graphical display of all human/mouse kinases and an open access analysis tool for the community with a user-friendly graphical interface.

Availability and implementation

The KinoViewer is based on a manually drawn SVG, which is utilized with D3.js to create a dynamic visualization. It can be accessed at: https://peptracker.com/epd/analytics/. The KinoViewer is currently only accessible through the EPD, it is open access and can be used either to view internal datasets, or used to upload and visualize external user datasets.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +32926411,Quantitative SEM characterisation of ceramic target prior and after magnetron sputtering: a case study of aluminium zinc oxide.,"Till now electron microscopy techniques have not been used to evaluate the plasma-target interactions undergone during the magnetron sputtering process. The destructive nature of this interaction severely alters the target microstructure. Utilising quantitative microscopy techniques can shed light on the complex plasma and solid-state processes involved which can ultimately lead to improved functional thin film deposition. As a representative functional material, aluminium-doped-zinc oxide (AZO) is an upcoming alternative to conventional transparent electrode wherein the process optimisation is of great importance. In this paper, we evaluate the pre- and post-sputter field emission scanning electron microscopy (FESEM) data for ceramic AZO target fabricated at three final sintering temperatures (1100°C, 1200°C and 1300°C). In all cases, grain boundaries are merged in addition to a visible reduction in the secondary phases which makes segmentation-based image analysis challenging. Through surface statistics (i.e. fractal dimension, autocorrelation length, texture aspect ratio and entropy) as a function of magnification we can quantify the electron microscopy image of the microstructure. We show that the plasma-microstructure interaction leads to an increase in autocorrelation length, texture aspect ratio and entropy for the optimum AZO ceramic sputtering target sintered at 1200°C. Furthermore, a maximum reduction in fractal dimension span (as determined by exponential regression) is also observed for 1200°C. In addition to the evaluation of plasma effects on sintering, our approach can provide a window towards understanding the underlying thin film growth mechanisms. We believe that this technique can be applied to the defect characterisation of a wide range of polycrystalline ceramic sputtering targets (e.g. ITO, CZTS, GAZO and so on) with the ultimate goal of improving the magnetron sputtering process and the resulting functional thin film. LAY DESCRIPTION: Magnetron sputtering allows scientists to make functional thin films on the order of the nanoscale. In this technique, atoms are plucked from a 'target' then placed onto a substrate forming a thin nanometric film: all thanks to magnets, a special power supply and the fourth state of matter (plasma). Understanding what is going on and how to make a 'good' thin film is important for making better light emitting diodes, solar cells and light sensors. Scientists use electron microscopy to see what is going on in the microstructure of the sputtered thin films to fine tune the sputtering recipe. Here, for the first time, we have applied electron microscopy to see the surface of the microstructure before and after magnetron sputtering. This will help us understanding the plasma-microstructure interaction allowing us to make more informed decisions when fine-tuning the sputtering process to get improved thin films. This is a case study of aluminium-doped zinc oxide (AZO) target that could potentially replace indium tin oxide (ITO), which is widely used as a transparent electrode in devices involving light and electricity. In this case, improved characteristics would be lower electrical resistivity and higher transmission of light. We show that it is possible to use a mathematical description (e.g. the fractal dimension) of the scanning electron microscopy picture to show a link between the target surface and the functional properties. Simple explanation of fractal dimensions by Sixty Symbols ○ https://www.youtube.com/watch?v=cmBljeC79Ls Experimental demonstration of magnetron sputtering by The Thought Emporium ○ https://www.youtube.com/watch?v=Cyu7etM-0Ko Introductory video on magnetron sputtering by Applied Science ○ https://www.youtube.com/watch?v=9OEz_e9C4KM Demonstration of AZO target fabrication and sputtering by Pradhyut Rajjkumar ○ https://www.youtube.com/watch?v=kTLaTJfNX3c Simple explanation of a DIY SEM by Applied Science ○ https://www.youtube.com/watch?v=VdjYVF4a6iU.",2020-09-28 +31236241,Some like it dry: Water restriction overrides heterogametic sex determination in two reptiles.,"

Abstract

The evolution of sex determination is complex and yet crucial in our understanding of population stability. In ectotherms, sex determination involves a variety of mechanisms including genetic determination (GSD), environment determination (ESD), but also interactions between the two via sex reversal. In this study, we investigated whether water deprivation during pregnancy could override GSD in two heterogametic squamate reptiles. We demonstrated that water restriction in early gestation induced a male-biased secondary sex ratio in both species, which could be explained by water sex reversal as the more likely mechanism. We further monitored some long-term fitness estimates of offspring, which suggested that water sex determination (WSD) represented a compensatory strategy producing the rarest sex according to Fisher's assumptions of frequency-dependent selection models. This study provides new insights into sex determination modes and calls for a general investigation of mechanisms behind WSD and to examine the evolutionary implications.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.mv06pv1.",2019-05-21 +30615056,CRISPR-DT: designing gRNAs for the CRISPR-Cpf1 system with improved target efficiency and specificity.,"

Motivation

The Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR)-Cpf1 system has been successfully applied in genome editing. However, target efficiency of the CRISPR-Cpf1 system varies among different guide RNA (gRNA) sequences.

Results

In this study, we reanalyzed the published CRISPR-Cpf1 gRNAs data and found many sequence and structural features related to their target efficiency. With the aid of Random Forest in feature selection, a support vector machine model was created to predict target efficiency for any given gRNAs. We have developed the first CRISPR-Cpf1 web service application, CRISPR-DT (CRISPR DNA Targeting), to help users design optimal gRNAs for the CRISPR-Cpf1 system by considering both target efficiency and specificity. CRISPR-DT will empower researchers in genome editing.

Availability and implementation

CRISPR-DT, mainly implemented in Perl, PHP and JavaScript, is freely available at http://bioinfolab.miamioh.edu/CRISPR-DT.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +31575520,Cardiac rehabilitation for heart failure can improve quality of life and fitness.,"The studyTaylor RS, Walker S, Ciani O, et al. Exercise-based cardiac rehabilitation for chronic heart failure: the EXTRAMATCH II individual participant data meta-analysis. Health Technol Assess 2019;23:1-98.This project was funded by the NIHR Health Technology Assessment Programme (project number 15/80/30).To read the full NIHR Signal, go to https://discover.dc.nihr.ac.uk/content/signal-000803/cardiac-rehabilitation-for-heart-failure-can-improve-quality-of-life-and-fitness.",2019-10-01 +32237066,MicroRNA-155-5p suppresses PD-L1 expression in lung adenocarcinoma.,"MiR-155-5p is a key oncogenic microRNA that maintains immune homeostasis and mediates cross-talk between inflammation and tumorigenesis. High expression of programmed death ligand-1 (PD-L1) also plays an important role in immune tolerance in tumors. The present study aimed to explore the relationship between miR-155-5p and PD-L1 in lung adenocarcinoma (LUAD) cells A549 and H1650. The expression levels of miR-155-5p and PD-L1 in LUAD patients were detected by a quantitative reverse transcriptase-polymerase chain reaction (qRT-PCR) and mimics of miR-155-5p were used to model increased expression in A549 or H1650 cells. After 24 h, we measured levels of PD-L1 by qRT-PCR, western blotting and flow cytometry. In addition, we identified two sites in the PD-L1 3'-UTR (5'-AGCAUUA-3' and 5'-GCAUUAA-3') that can be bound by miR-155-5p using TargetScan (http://www.targetscan.org). Compared to normal tissue, miR-155-5p was overexpressed in tumor tissue (P = 0.0456), whereas the expression of PD-L1 was not significantly different (P = 0.1349). The expression levels of miR-155-5p and PD-L1 were negatively correlated (r = -0.6409, P = 0.0459 and r = -0.7544, P = 0.0117). Exogenous overexpression of miR-155-5p decreased the mRNA, total protein and membrane protein expression levels of PD-L1 both in A549 and H1650 cells (P < 0.05). Taken together, our data suggest that miR-155-5p may suppress the expression of PD-L1 in LUAD.",2020-04-22 +30734313,"Rank-permutation tests for behavior analysis, and a test for trend allowing unequal data numbers for each subject.","We advocate for rank-permutation tests as the best choice for null-hypothesis significance testing of behavioral data, because these tests require neither distributional assumptions about the populations from which our data were drawn nor the measurement assumption that our data are measured on an interval scale. We provide an algorithm that enables exact-probability versions of such tests without recourse to either large-sample approximation or resampling approaches. We particularly consider a rank-permutation test for monotonic trend, and provide an extension of this test that allows unequal number of data points, or observations, for each subject. We provide an extended table of critical values of the test statistic for this test, and both a spreadsheet implementation and an Oracle® Java Web Start application to generate other critical values at https://sites.google.com/a/eastbayspecialists.co.nz/rank-permutation/.",2019-02-07 +30886531,Connecting data and expertise: a new alliance for biodiversity knowledge.,"There has been major progress over the last two decades in digitising historical knowledge of biodiversity and in making biodiversity data freely and openly accessible. Interlocking efforts bring together international partnerships and networks, national, regional and institutional projects and investments and countless individual contributors, spanning diverse biological and environmental research domains, government agencies and non-governmental organisations, citizen science and commercial enterprise. However, current efforts remain inefficient and inadequate to address the global need for accurate data on the world's species and on changing patterns and trends in biodiversity. Significant challenges include imbalances in regional engagement in biodiversity informatics activity, uneven progress in data mobilisation and sharing, the lack of stable persistent identifiers for data records, redundant and incompatible processes for cleaning and interpreting data and the absence of functional mechanisms for knowledgeable experts to curate and improve data. Recognising the need for greater alignment between efforts at all scales, the Global Biodiversity Information Facility (GBIF) convened the second Global Biodiversity Informatics Conference (GBIC2) in July 2018 to propose a coordination mechanism for developing shared roadmaps for biodiversity informatics. GBIC2 attendees reached consensus on the need for a global alliance for biodiversity knowledge, learning from examples such as the Global Alliance for Genomics and Health (GA4GH) and the open software communities under the Apache Software Foundation. These initiatives provide models for multiple stakeholders with decentralised funding and independent governance to combine resources and develop sustainable solutions that address common needs. This paper summarises the GBIC2 discussions and presents a set of 23 complementary ambitions to be addressed by the global community in the context of the proposed alliance. The authors call on all who are responsible for describing and monitoring natural systems, all who depend on biodiversity data for research, policy or sustainable environmental management and all who are involved in developing biodiversity informatics solutions to register interest at https://biodiversityinformatics.org/ and to participate in the next steps to establishing a collaborative alliance. The supplementary materials include brochures in a number of languages (English, Arabic, Spanish, Basque, French, Japanese, Dutch, Portuguese, Russian, Traditional Chinese and Simplified Chinese). These summarise the need for an alliance for biodiversity knowledge and call for collaboration in its establishment.",2019-03-08 +28807036,T-Time: A data repository of T cell and calcium release-activated calcium channel activation imagery.,"

Background

A fundamental understanding of live-cell dynamics is necessary in order to advance scientific techniques and personalized medicine. For this understanding to be possible, image processing techniques, probes, tracking algorithms and many other methodologies must be improved. Currently there are no large open-source datasets containing live-cell imaging to act as a standard for the community. As a result, researchers cannot evaluate their methodologies on an independent benchmark or leverage such a dataset to formulate scientific questions.

Findings

Here we present T-Time, the largest free and publicly available data set of T cell phase contrast imagery designed with the intention of furthering live-cell dynamics research. T-Time consists of over 40 GB of imagery data, and includes annotations derived from these images using a custom T cell identification and tracking algorithm. The data set contains 71 time-lapse sequences containing T cell movement and calcium release activated calcium channel activation, along with 50 time-lapse sequences of T cell activation and T reg interactions. The database includes a user-friendly web interface, summary information on the time-lapse images, and a mechanism for users to download tailored image datasets for their own research. T-Time is freely available on the web at http://ttime.mlatlab.org .

Conclusions

T-Time is a novel data set of T cell images and associated metadata. It allows users to study T cell interaction and activation.",2017-08-15 +25332392,lncRNASNP: a database of SNPs in lncRNAs and their potential functions in human and mouse.,"Long non-coding RNAs (lncRNAs) play key roles in various cellular contexts and diseases by diverse mechanisms. With the rapid growth of identified lncRNAs and disease-associated single nucleotide polymorphisms (SNPs), there is a great demand to study SNPs in lncRNAs. Aiming to provide a useful resource about lncRNA SNPs, we systematically identified SNPs in lncRNAs and analyzed their potential impacts on lncRNA structure and function. In total, we identified 495,729 and 777,095 SNPs in more than 30,000 lncRNA transcripts in human and mouse, respectively. A large number of SNPs were predicted with the potential to impact on the miRNA-lncRNA interaction. The experimental evidence and conservation of miRNA-lncRNA interaction, as well as miRNA expressions from TCGA were also integrated to prioritize the miRNA-lncRNA interactions and SNPs on the binding sites. Furthermore, by mapping SNPs to GWAS results, we found that 142 human lncRNA SNPs are GWAS tagSNPs and 197,827 lncRNA SNPs are in the GWAS linkage disequilibrium regions. All these data for human and mouse lncRNAs were imported into lncRNASNP database (http://bioinfo.life.hust.edu.cn/lncRNASNP/), which includes two sub-databases lncRNASNP-human and lncRNASNP-mouse. The lncRNASNP database has a user-friendly interface for searching and browsing through the SNP, lncRNA and miRNA sections.",2014-10-20 +25686635,Folding RaCe: a robust method for predicting changes in protein folding rates upon point mutations.,"Protein engineering methods are commonly employed to decipher the folding mechanism of proteins and enzymes. However, such experiments are exceedingly time and resource intensive. It would therefore be advantageous to develop a simple computational tool to predict changes in folding rates upon mutations. Such a method should be able to rapidly provide the sequence position and chemical nature to modulate through mutation, to effect a particular change in rate. This can be of importance in protein folding, function or mechanistic studies.We have developed a robust knowledge-based methodology to predict the changes in folding rates upon mutations formulated from amino and acid properties using multiple linear regression approach. We benchmarked this method against an experimental database of 790 point mutations from 26 two-state proteins. Mutants were first classified according to secondary structure, accessible surface area and position along the primary sequence. Three prime amino acid features eliciting the best relationship with folding rates change were then shortlisted for each class along with an optimized window length. We obtained a self-consistent mean absolute error of 0.36 s(-1) and a mean Pearson correlation coefficient (PCC) of 0.81. Jack-knife test resulted in a MAE of 0.42 s(-1) and a PCC of 0.73. Moreover, our method highlights the importance of outlier(s) detection and studying their implications in the folding mechanism.A web server 'Folding RaCe' has been developed and is available at http://www.iitm.ac.in/bioinfo/proteinfolding/foldingrace.html.gromiha@iitm.ac.inSupplementary data are available at Bioinformatics online.",2015-02-16 +31259444,Plasticity in Triticeae centromere DNA sequences: a wheat × tall wheatgrass (decaploid) model.,"Centromeres mediate chromosome attachment to microtubules and maintain the integrity of chromosomes for proper segregation of the sister chromatids during cell division. Advances in the assembly of Triticeae genome sequences combined with the capacity to recover hybrid species derived from very distantly related species provides potential experimental systems for linking retrotransposon amplification and repositioning of centromeres via non-mendelian inheritance in partial amphiploid breeds. The decaploid tall wheatgrass (Thinopyrum ponticum) is one of the most successfully used perennial species in wheat breeding for generating translocation lines with valuable agronomic traits. We found that wheat centromere retrotransposons CRW and Quinta widely occur within the tall wheatgrass genome. In addition, one of the genome donors to Th. ponticum, Pseudoroegneria stipifolia (StSt), has been shown to have Abigail and a satellite repeat, CentSt. We also found two other centromeric retrotransposons, Abia and CL135 in Th. ponticum by ChIP-seq. Examination of partial amphiploid lines that were generated in the 1970s demonstrated extensive modification in centromere sequences using CentSt, Abigail and Abia as probes. We also detected that St-genome chromosomes were more enriched with Abigail and CentSt, whereas E-genome chromosomes were enriched with CRW and Quinta in tall wheatgrass and its closer relatives. It can be concluded that bursts of transposition of retrotransposons and repositioning of centromeres via non-mendelian segregation are common in partial amphiploids derived from interspecific hybrids. Practically speaking, our study reveals that the existence of homologous centromere functional sequences in both a donor and its receptor can substantially contribute to the successful transfer of alien genes into crop species. OPEN RESEARCH BADGES: This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://www.ncbi.nlm.nih.gov/sra/SRR9089557; https://www.ncbi.nlm.nih.gov/sra/SRR9089558; https://www.ncbi.nlm.nih.gov/sra/SRR9089559; https://www.ncbi.nlm.nih.gov/sra/SRR9089560; https://www.ncbi.nlm.nih.gov/sra/SRR9089561; https://www.ncbi.nlm.nih.gov/sra/SRR9089562; https://www.ncbi.nlm.nih.gov/sra/SRR9089563; https://www.ncbi.nlm.nih.gov/sra/SRR9089564; https://www.ncbi.nlm.nih.gov/nuccore/MK999394; https://www.ncbi.nlm.nih.gov/nuccore/MK999395; https://www.ncbi.nlm.nih.gov/nuccore/MK999396.",2019-09-09 +31014259,Protein interaction disruption in cancer.,"

Background

Most methods that integrate network and mutation data to study cancer focus on the effects of genes/proteins, quantifying the effect of mutations or differential expression of a gene and its neighbors, or identifying groups of genes that are significantly up- or down-regulated. However, several mutations are known to disrupt specific protein-protein interactions, and network dynamics are often ignored by such methods. Here we introduce a method that allows for predicting the disruption of specific interactions in cancer patients using somatic mutation data and protein interaction networks.

Methods

We extend standard network smoothing techniques to assign scores to the edges in a protein interaction network in addition to nodes. We use somatic mutations as input to our modified network smoothing method, producing scores that quantify the proximity of each edge to somatic mutations in individual samples.

Results

Using breast cancer mutation data, we show that predicted edges are significantly associated with patient survival and known ligand binding site mutations. In-silico analysis of protein binding further supports the ability of the method to infer novel disrupted interactions and provides a mechanistic explanation for the impact of mutations on key pathways.

Conclusions

Our results show the utility of our method both in identifying disruptions of protein interactions from known ligand binding site mutations, and in selecting novel clinically significant interactions. Supporting website with software and data: https://www.cs.cmu.edu/~mruffalo/mut-edge-disrupt/ .",2019-04-23 +29077809,Linking metabolic network features to phenotypes using sparse group lasso.,"

Motivation

Integration of metabolic networks with '-omics' data has been a subject of recent research in order to better understand the behaviour of such networks with respect to differences between biological and clinical phenotypes. Under the conditions of steady state of the reaction network and the non-negativity of fluxes, metabolic networks can be algebraically decomposed into a set of sub-pathways often referred to as extreme currents (ECs). Our objective is to find the statistical association of such sub-pathways with given clinical outcomes, resulting in a particular instance of a self-contained gene set analysis method. In this direction, we propose a method based on sparse group lasso (SGL) to identify phenotype associated ECs based on gene expression data. SGL selects a sparse set of feature groups and also introduces sparsity within each group. Features in our model are clusters of ECs, and feature groups are defined based on correlations among these features.

Results

We apply our method to metabolic networks from KEGG database and study the association of network features to prostate cancer (where the outcome is tumor and normal, respectively) as well as glioblastoma multiforme (where the outcome is survival time). In addition, simulations show the superior performance of our method compared to global test, which is an existing self-contained gene set analysis method.

Availability and implementation

R code (compatible with version 3.2.5) is available from http://www.abi.bit.uni-bonn.de/index.php?id=17.

Contact

samal@combine.rwth-aachen.de or frohlich@bit.uni-bonn.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +22829871,Web search queries can predict stock market volumes.,"We live in a computerized and networked society where many of our actions leave a digital trace and affect other people's actions. This has lead to the emergence of a new data-driven research field: mathematical methods of computer science, statistical physics and sociometry provide insights on a wide range of disciplines ranging from social science to human mobility. A recent important discovery is that search engine traffic (i.e., the number of requests submitted by users to search engines on the www) can be used to track and, in some cases, to anticipate the dynamics of social phenomena. Successful examples include unemployment levels, car and home sales, and epidemics spreading. Few recent works applied this approach to stock prices and market sentiment. However, it remains unclear if trends in financial markets can be anticipated by the collective wisdom of on-line users on the web. Here we show that daily trading volumes of stocks traded in NASDAQ-100 are correlated with daily volumes of queries related to the same stocks. In particular, query volumes anticipate in many cases peaks of trading by one day or more. Our analysis is carried out on a unique dataset of queries, submitted to an important web search engine, which enable us to investigate also the user behavior. We show that the query volume dynamics emerges from the collective but seemingly uncoordinated activity of many users. These findings contribute to the debate on the identification of early warnings of financial systemic risk, based on the activity of users of the www.",2012-07-19 +26989147,HistoneDB 2.0: a histone database with variants--an integrated resource to explore histones and their variants. ,"Compaction of DNA into chromatin is a characteristic feature of eukaryotic organisms. The core (H2A, H2B, H3, H4) and linker (H1) histone proteins are responsible for this compaction through the formation of nucleosomes and higher order chromatin aggregates. Moreover, histones are intricately involved in chromatin functioning and provide a means for genome dynamic regulation through specific histone variants and histone post-translational modifications. 'HistoneDB 2.0--with variants' is a comprehensive database of histone protein sequences, classified by histone types and variants. All entries in the database are supplemented by rich sequence and structural annotations with many interactive tools to explore and compare sequences of different variants from various organisms. The core of the database is a manually curated set of histone sequences grouped into 30 different variant subsets with variant-specific annotations. The curated set is supplemented by an automatically extracted set of histone sequences from the non-redundant protein database using algorithms trained on the curated set. The interactive web site supports various searching strategies in both datasets: browsing of phylogenetic trees; on-demand generation of multiple sequence alignments with feature annotations; classification of histone-like sequences and browsing of the taxonomic diversity for every histone variant. HistoneDB 2.0 is a resource for the interactive comparative analysis of histone protein sequences and their implications for chromatin function. Database URL: http://www.ncbi.nlm.nih.gov/projects/HistoneDB2.0.",2016-03-17 +24302289,EDdb: a web resource for eating disorder and its application to identify an extended adipocytokine signaling pathway related to eating disorder.,"Eating disorder is a group of physiological and psychological disorders affecting approximately 1% of the female population worldwide. Although the genetic epidemiology of eating disorder is becoming increasingly clear with accumulated studies, the underlying molecular mechanisms are still unclear. Recently, integration of various high-throughput data expanded the range of candidate genes and started to generate hypotheses for understanding potential pathogenesis in complex diseases. This article presents EDdb (Eating Disorder database), the first evidence-based gene resource for eating disorder. Fifty-nine experimentally validated genes from the literature in relation to eating disorder were collected as the core dataset. Another four datasets with 2824 candidate genes across 601 genome regions were expanded based on the core dataset using different criteria (e.g., protein-protein interactions, shared cytobands, and related complex diseases). Based on human protein-protein interaction data, we reconstructed a potential molecular sub-network related to eating disorder. Furthermore, with an integrative pathway enrichment analysis of genes in EDdb, we identified an extended adipocytokine signaling pathway in eating disorder. Three genes in EDdb (ADIPO (adiponectin), TNF (tumor necrosis factor) and NR3C1 (nuclear receptor subfamily 3, group C, member 1)) link the KEGG (Kyoto Encyclopedia of Genes and Genomes) ""adipocytokine signaling pathway"" with the BioCarta ""visceral fat deposits and the metabolic syndrome"" pathway to form a joint pathway. In total, the joint pathway contains 43 genes, among which 39 genes are related to eating disorder. As the first comprehensive gene resource for eating disorder, EDdb ( http://eddb.cbi.pku.edu.cn ) enables the exploration of gene-disease relationships and cross-talk mechanisms between related disorders. Through pathway statistical studies, we revealed that abnormal body weight caused by eating disorder and obesity may both be related to dysregulation of the novel joint pathway of adipocytokine signaling. In addition, this joint pathway may be the common pathway for body weight regulation in complex human diseases related to unhealthy lifestyle.",2013-12-05 +28682264,NGS-FC: A Next-Generation Sequencing Data Format Converter.,"With the widespread implementation of next-generation sequencing (NGS) technologies, millions of sequences have been produced. A lot of databases were created to store and organize the high-throughput sequencing data. Numerous analysis software programs and tools have been developed over the past years. Most of them use specific formats for data representation and storage. Data interoperability becomes a crucial challenge and many tools have been developed to convert NGS data from one format to another. However, most of them were developed for specific and limited formats. Here, we present NGS-FC (Next-Generation Sequencing Format Converter), which provides a framework to support the conversion between several formats. It supports 14 formats now and provides interfaces to enable users to improve the existing converters and add new ones. Moreover, NGS-FC achieved the overall competitive performance in comparison with some existing converters in terms of RAM usage and running time. The software is written in Java and can be executed standalone. The source code and documentation are freely available at http://sysbio.suda.edu.cn/NGS-FC.",2017-07-03 +29228340,Hydrophobicity diversity in globular and nonglobular proteins measured with the Gini index.,"Amino acids and their properties are variably distributed in proteins and different compositions determine all protein features, ranging from solubility to stability and functionality. Gini index, a tool to estimate distribution uniformity, is widely used in macroeconomics and has numerous statistical applications. Here, Gini index is used to analyze the distribution of hydrophobicity in proteins and to compare hydrophobicity distribution in globular and intrinsically disordered proteins. Based on the analysis of carefully selected high-quality data sets of proteins extracted from the Protein Data Bank (http://www.rcsb.org) and from the DisProt database (http://www.disprot.org/), it is observed that hydrophobicity is distributed in a more diverse way in intrinsically disordered proteins than in folded and soluble globular proteins. This correlates with the observation that the amino acid composition deviates from the uniformity (estimate with the Shannon and the Gini-Simpson indices) more in intrinsically disordered proteins than in globular and soluble proteins. Although statistical tools tike the Gini index have received little attention in molecular biology, these results show that they allow one to estimate sequence diversity and that they are useful to delineate trends that can hardly be described, otherwise, in simple and concise ways.",2017-12-01 +32477081,Resting State EEG in Exercise Intervention Studies: A Systematic Review of Effects and Methods.,"Background: Exercise has been shown to alter brain plasticity and is explored as a therapeutic intervention in a wide variety of neurological diseases. Electroencephalography (EEG) offers an inexpensive method of studying brain electrocortical activity shortly after exercise and thus offers a way of exploring the influence of exercise on the brain. We conducted a systematic review to summarize the current body of evidence regarding methods of EEG analysis and the reported effects of exercise interventions on EEG. Methods: PubMed, Web of Science and EMBASE were searched for studies investigating resting state EEG in exercise intervention studies carried out in participants >17 years of age and with no history of epilepsy. Further, studies solely investigating event-related potentials as an outcome measure were excluded. Relevant data were extracted, and a risk-of-bias assessment was carried out using the Cochrane risk-of-bias tool. A qualitative synthesis of results was carried out. A protocol for the systematic review was uploaded to https://www.crd.york.ac.uk/PROSPERO/ (ID: CRD42019134570) and the Preferred Reporting Items for Systematic Reviews (PRISMA) statement was followed. Results: Out of 1,993 records screened, 54 studies were included in a final qualitative synthesis with a total of 1,445 participants. Our synthesis showed that studies were mainly carried out using frequency analysis as an analytical method. Generally, findings across studies were inconsistent and few were adjusted for multiple comparisons. Studies were mainly of low quality and usually carried out in small populations, lowering the significance of results reported. Conclusions: Changes in the EEG as a result of an exercise intervention are elusive and difficult to replicate. Future studies should provide biologically sound hypotheses underlying assumptions, include larger populations and use standardized EEG methods to increase replicability. EEG remains an interesting methodology to examine the effects of exercise on the brain.",2020-05-07 +29106618,eRAM: encyclopedia of rare disease annotations for precision medicine.,"Rare diseases affect over a hundred million people worldwide, most of these patients are not accurately diagnosed and effectively treated. The limited knowledge of rare diseases forms the biggest obstacle for improving their treatment. Detailed clinical phenotyping is considered as a keystone of deciphering genes and realizing the precision medicine for rare diseases. Here, we preset a standardized system for various types of rare diseases, called encyclopedia of Rare disease Annotations for Precision Medicine (eRAM). eRAM was built by text-mining nearly 10 million scientific publications and electronic medical records, and integrating various data in existing recognized databases (such as Unified Medical Language System (UMLS), Human Phenotype Ontology, Orphanet, OMIM, GWAS). eRAM systematically incorporates currently available data on clinical manifestations and molecular mechanisms of rare diseases and uncovers many novel associations among diseases. eRAM provides enriched annotations for 15 942 rare diseases, yielding 6147 human disease related phenotype terms, 31 661 mammalians phenotype terms, 10,202 symptoms from UMLS, 18 815 genes and 92 580 genotypes. eRAM can not only provide information about rare disease mechanism but also facilitate clinicians to make accurate diagnostic and therapeutic decisions towards rare diseases. eRAM can be freely accessed at http://www.unimd.org/eram/.",2018-01-01 +24813212,lncRNAtor: a comprehensive resource for functional investigation of long non-coding RNAs.,"

Motivation

A number of long non-coding RNAs (lncRNAs) have been identified by deep sequencing methods, but their molecular and cellular functions are known only for a limited number of lncRNAs. Current databases on lncRNAs are mostly for cataloging purpose without providing in-depth information required to infer functions. A comprehensive resource on lncRNA function is an immediate need.

Results

We present a database for functional investigation of lncRNAs that encompasses annotation, sequence analysis, gene expression, protein binding and phylogenetic conservation. We have compiled lncRNAs for six species (human, mouse, zebrafish, fruit fly, worm and yeast) from ENSEMBL, HGNC, MGI and lncRNAdb. Each lncRNA was analyzed for coding potential and phylogenetic conservation in different lineages. Gene expression data of 208 RNA-Seq studies (4995 samples), collected from GEO, ENCODE, modENCODE and TCGA databases, were used to provide expression profiles in various tissues, diseases and developmental stages. Importantly, we analyzed RNA-Seq data to identify coexpressed mRNAs that would provide ample insights on lncRNA functions. The resulting gene list can be subject to enrichment analysis such as Gene Ontology or KEGG pathways. Furthermore, we compiled protein-lncRNA interactions by collecting and analyzing publicly available CLIP-seq or PAR-CLIP sequencing data. Finally, we explored evolutionarily conserved lncRNAs with correlated expression between human and six other organisms to identify functional lncRNAs. The whole contents are provided in a user-friendly web interface.

Availability and implementation

lncRNAtor is available at http://lncrnator.ewha.ac.kr/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-09 +29846545,CBD: a biomarker database for colorectal cancer. ,"Colorectal cancer (CRC) biomarker database (CBD) was established based on 870 identified CRC biomarkers and their relevant information from 1115 original articles in PubMed published from 1986 to 2017. In this version of the CBD, CRC biomarker data were collected, sorted, displayed and analysed. The CBD with the credible contents as a powerful and time-saving tool provide more comprehensive and accurate information for further CRC biomarker research. The CBD was constructed under MySQL server. HTML, PHP and JavaScript languages have been used to implement the web interface. The Apache was selected as HTTP server. All of these web operations were implemented under the Windows system. The CBD could provide to users the multiple individual biomarker information and categorized into the biological category, source and application of biomarkers; the experiment methods, results, authors and publication resources; the research region, the average age of cohort, gender, race, the number of tumours, tumour location and stage. We only collect data from the articles with clear and credible results to prove the biomarkers are useful in the diagnosis, treatment or prognosis of CRC. The CBD can also provide a professional platform to researchers who are interested in CRC research to communicate, exchange their research ideas and further design high-quality research in CRC. They can submit their new findings to our database via the submission page and communicate with us in the CBD.Database URL: http://sysbio.suda.edu.cn/CBD/.",2018-01-01 +29140524,NONCODEV5: a comprehensive annotation database for long non-coding RNAs.,"NONCODE (http://www.bioinfo.org/noncode/) is a systematic database that is dedicated to presenting the most complete collection and annotation of non-coding RNAs (ncRNAs), especially long non-coding RNAs (lncRNAs). Since NONCODE 2016 was released two years ago, the amount of novel identified ncRNAs has been enlarged by the reduced cost of next-generation sequencing, which has produced an explosion of newly identified data. The third-generation sequencing revolution has also offered longer and more accurate annotations. Moreover, accumulating evidence confirmed by biological experiments has provided more comprehensive knowledge of lncRNA functions. The ncRNA data set was expanded by collecting newly identified ncRNAs from literature published over the past two years and integration of the latest versions of RefSeq and Ensembl. Additionally, pig was included in the database for the first time, bringing the total number of species to 17. The number of lncRNAs in NONCODEv5 increased from 527 336 to 548 640. NONCODEv5 also introduced three important new features: (i) human lncRNA-disease relationships and single nucleotide polymorphism-lncRNA-disease relationships were constructed; (ii) human exosome lncRNA expression profiles were displayed; (iii) the RNA secondary structures of NONCODE human transcripts were predicted. NONCODEv5 is also accessible through http://www.noncode.org/.",2018-01-01 +32410913,Neonatal intensive care unit preparedness for the Novel Coronavirus Disease-2019 pandemic: A New York City hospital perspective.,"In January 2020, China reported a cluster of cases of pneumonia associated with a novel pathogenic coronavirus provisionally named Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV2). Since then, Coronavirus Disease 2019 (COVID-19) has been reported in more than 180 countries with approximately 6.5 million known infections and more than 380,000 deaths attributed to this disease as of June 3rd , 2020 (Johns Hopkins University COVID map; https://coronavirus.jhu.edu/map.html) The majority of confirmed COVID-19 cases have been reported in adults, especially older individuals with co-morbidities. Children have had a relatively lower rate and a less serious course of infection as reported in the literature to date. One of the most vulnerable pediatric patient populations is cared for in the neonatal intensive care unit. There is limited data on the effect of COVID-19 in fetal life, and among neonates after birth. Therefore there is an urgent need for proactive preparation, and planning to combat COVID-19, as well as to safeguard patients, their families, and healthcare personnel. This review article is based on the Centers for Disease Control and Prevention's (CDC) current recommendations for COVID-19 and its adaptation to our local resources. The aim of this article is to provide basic consolidated guidance and checklists to clinicians in the neonatal intensive care units in key aspects of preparation needed to counter exposure or infection with COVID-19. We anticipate that CDC will continue to update their guidelines regarding COVID-19 as the situation evolves, and we recommend monitoring CDC's updates for the most current information.",2020-04-01 +32374001,"Clinical display, diagnostics and genetic implication of novel Coronavirus (COVID-19) epidemic.","COVID-19 pandemic can cause irreparable damage to the involved society. This study aimed to provide a summary of the up-to-dated clinical display, diagnostics, molecular and genetic implications for COVID-19 infected patients. In this review, 73 research articles published before 25 March 2020 were analyzed to better understand the clinical characteristics of patients and to introduce the available serological, hematology and molecular diagnostic methods. Apart from articles extracted from PubMed and Google Scholar, WHO (https://www.who.int/), NHC (National Health Commission of the People's Republic of China (http://www.nhc.gov.cn/), NICE (National Institute for Health and Clinical Excellence, https://www.nice.org.uk/), CDC (Centers for Disease Control and Prevention, https://www.cdc.gov/), and National Administration of Traditional Chinese Medicine (http://www.satcm.gov.cn/) were also accessed to search for eligible studies. Papers published between January 1, 2020, and 25 March 2020 were searched in English and the terms ""2019-nCoV, Covid-19, Clinical Characteristics OR manifestation, method of detection, COVID-19 Genome and molecular test"" were used. As the pandemic continues to evolve, there have been reports about the possibility of asymptomatic transmission of this newly emerged pneumonia virus. We highlighted the role of HLA haplotype in virus infection as HLA typing will provide susceptibility information for personalized prevention, diagnosis, and treatment in future studies. All the data in this article will assist researchers and clinicians to develop their clinical views regarding infected patients and to emphasize the origin of SARS-CoV-2 for diagnostics.",2020-04-01 +28724888,CrossCheck: an open-source web tool for high-throughput screen data analysis.,"Modern high-throughput screening methods allow researchers to generate large datasets that potentially contain important biological information. However, oftentimes, picking relevant hits from such screens and generating testable hypotheses requires training in bioinformatics and the skills to efficiently perform database mining. There are currently no tools available to general public that allow users to cross-reference their screen datasets with published screen datasets. To this end, we developed CrossCheck, an online platform for high-throughput screen data analysis. CrossCheck is a centralized database that allows effortless comparison of the user-entered list of gene symbols with 16,231 published datasets. These datasets include published data from genome-wide RNAi and CRISPR screens, interactome proteomics and phosphoproteomics screens, cancer mutation databases, low-throughput studies of major cell signaling mediators, such as kinases, E3 ubiquitin ligases and phosphatases, and gene ontological information. Moreover, CrossCheck includes a novel database of predicted protein kinase substrates, which was developed using proteome-wide consensus motif searches. CrossCheck dramatically simplifies high-throughput screen data analysis and enables researchers to dig deep into the published literature and streamline data-driven hypothesis generation. CrossCheck is freely accessible as a web-based application at http://proteinguru.com/crosscheck.",2017-07-19 +31626556,Evidence for an Association Between Hearing Impairment and Disrupted Sleep: Scoping Review.,"Purpose Hearing impairment (HI) is the most common sensory impairment and may negatively impact sleep through reduced auditory input. Factors associated with HI such as anxiety regarding communication in daily life may also adversely impact an individual's sleep. Here, research on the relationship between HI and sleep disruption was catalogued using scoping review methodology. Method A systematic strategy was employed to search various electronic databases. This review is reported according to the Preferred Reporting Items for Systematic Review and Meta-Analyses Scoping Review Extension. Results Sixteen records met inclusion criteria. Studies have investigated sleep in HI as a primary aim in noise-exposed workers or large surveys in older participants. Experimental and quasi-experimental studies report alterations to sleep architecture of potential neuroplastic origins. Studies reporting sleep as a secondary aim generally report poorer sleep in HI participants. Conclusions This scoping review has catalogued evidence that altered or negatively impacted sleep may be associated with HI. Potential confounding factors, mechanisms, and considerations for future research are discussed. Supplemental Material https://doi.org/10.23641/asha.9968369.",2019-10-17 +32794636,Parametric survival analysis using R: Illustration with lung cancer data.,"

Background

Cox regression is the most widely used survival model in oncology. Parametric survival models are an alternative of Cox regression model. In this study, we have illustrated the application of semiparametric model and various parametric (Weibull, exponential, log-normal, and log-logistic) models in lung cancer data by using R software.

Aims

The aim of the study is to illustrate responsible factors in lung cancer and compared with Cox regression and parametric models.

Methods

A total of 66 lung cancer patients of African Americans (AAs) (data available online at http://clincancerres.aacrjournals.org) was used. To identify predictors of overall survival, stage of patient, sex, age, smoking, and tumor grade were taken into account. Both parametric and semiparametric models were fitted. Performance of parametric models was compared by Akaike information criterion (AIC). ""Survival"" package in R software was used to perform the analysis. Posterior density was obtained for different parameters through Bayesian approach using WinBUGS.

Results

The illustration about model fitting problem was documented. Parametric models were fitted only for stage after controlling for age. AIC value was minimum (462.4087) for log-logistic model as compared with other parametric models. Log-logistic model was the best fit for AAs lung cancer data under study.

Conclusion

Exploring parametric survival models in daily practice of cancer research is challenging. It may be due to many reasons including popularity of Cox regression and lack of knowledge about how to perform it. This paper provides the application of parametric survival models by using freely available R software with illustration. It is expected that this present work can be useful to apply parametric survival models.",2019-07-24 +32166213,Machine learning with force-field inspired descriptors for materials: fast screening and mapping energy landscape. ,"We present a complete set of chemo-structural descriptors to significantly extend the applicability of machine-learning (ML) in material screening and mapping energy landscape for multicomponent systems. These new descriptors allow differentiating between structural prototypes, which is not possible using the commonly used chemical-only descriptors. Specifically, we demonstrate that the combination of pairwise radial, nearest neighbor, bond-angle, dihedral-angle and core-charge distributions plays an important role in predicting formation energies, bandgaps, static refractive indices, magnetic properties, and modulus of elasticity for three-dimensional (3D) materials as well as exfoliation energies of two-dimensional (2D) layered materials. The training data consists of 24549 bulk and 616 monolayer materials taken from JARVIS-DFT database. We obtained very accurate ML models using gradient boosting algorithm. Then we use the trained models to discover exfoliable 2D-layered materials satisfying specific property requirements. Additionally, we integrate our formation energy ML model with a genetic algorithm for structure search to verify if the ML model reproduces the DFT convex hull. This verification establishes a more stringent evaluation metric for the ML model than what commonly used in data sciences. Our learnt model is publicly available on the JARVIS-ML website (https://www.ctcms.nist.gov/jarvisml) property predictions of generalized materials.",2018-01-01 +29155946,GPCRdb in 2018: adding GPCR structure models and ligands.,"G protein-coupled receptors are the most abundant mediators of both human signalling processes and therapeutic effects. Herein, we report GPCRome-wide homology models of unprecedented quality, and roughly 150 000 GPCR ligands with data on biological activities and commercial availability. Based on the strategy of 'Less model - more Xtal', each model exploits both a main template and alternative local templates. This achieved higher similarity to new structures than any of the existing resources, and refined crystal structures with missing or distorted regions. Models are provided for inactive, intermediate and active states-except for classes C and F that so far only have inactive templates. The ligand database has separate browsers for: (i) target selection by receptor, family or class, (ii) ligand filtering based on cross-experiment activities (min, max and mean) or chemical properties, (iii) ligand source data and (iv) commercial availability. SMILES structures and activity spreadsheets can be downloaded for further processing. Furthermore, three recent landmark publications on GPCR drugs, G protein selectivity and genetic variants have been accompanied with resources that now let readers view and analyse the findings themselves in GPCRdb. Altogether, this update will enable scientific investigation for the wider GPCR community. GPCRdb is available at http://www.gpcrdb.org.",2018-01-01 +29040761,PCSD: a plant chromatin state database.,"Genome-wide maps of chromatin states have become a powerful representation of genome annotation and regulatory activity. We collected public and in-house plant epigenomic data sets and applied a Hidden Markov Model to define chromatin states, which included 290 553 (36 chromatin states), 831 235 (38 chromatin states) and 3 936 844 (26 chromatin states) segments across the whole genome of Arabidopsis thaliana, Oryza sativa and Zea mays, respectively. We constructed a Plant Chromatin State Database (PCSD, http://systemsbiology.cau.edu.cn/chromstates) to integrate detailed information about chromatin states, including the features and distribution of states, segments in states and related genes with segments. The self-organization mapping (SOM) results for these different chromatin signatures and UCSC Genome Browser for visualization were also integrated into the PCSD database. We further provided differential SOM maps between two epigenetic marks for chromatin state comparison and custom tools for new data analysis. The segments and related genes in SOM maps can be searched and used for motif and GO analysis, respectively. In addition, multi-species integration can be used to discover conserved features at the epigenomic level. In summary, our PCSD database integrated the identified chromatin states with epigenetic features and may be beneficial for communities to discover causal functions hidden in plant chromatin.",2018-01-01 +28968795,"Mobi 2.0: an improved method to define intrinsic disorder, mobility and linear binding regions in protein structures.","

Motivation

The structures contained in the Protein Data Bank (PDB) database are of paramount importance to define our knowledge of folded proteins. While providing mainly circumstantial evidence, PDB data is also increasingly used to define the lack of unique structure, represented by mobile regions and even intrinsic disorder (ID). However, alternative definitions are used by different authors and potentially limit the generality of the analyses being carried out.

Results

Here we present Mobi 2.0, a completely re-written version of the Mobi software for the determination of mobile and potentially disordered regions from PDB structures. Mobi 2.0 provides robust definitions of mobility based on four main sources of information: (i) missing residues, (ii) residues with high temperature factors, (iii) mobility between different models of the same structure and (iv) binding to another protein or nucleotide chain. Mobi 2.0 is well suited to aggregate information across different PDB structures for the same UniProt protein sequence, providing consensus annotations. The software is expected to standardize the treatment of mobility, allowing an easier comparison across different studies related to ID.

Availability

Mobi 2.0 provides the structure-based annotation for the MobiDB database. The software is available from URL http://protein.bio.unipd.it/mobi2/.

Contact

silvio.tosatto@unipd.it.",2018-01-01 +32514450,"Sodium-glucose cotransporter 2 inhibition does not reduce hepatic steatosis in overweight, insulin-resistant patients without type 2 diabetes.","

Background and aim

Non-alcoholic fatty liver disease (NAFLD) is rapidly becoming the leading indication for liver transplant and is associated with increased cardiovascular and liver mortality, yet there are no licensed therapies. Sodium-glucose cotransporter 2 (SGLT2) inhibitors are widely used for their glucose-lowering effects in patients with type 2 diabetes (T2D). Preclinical models have suggested a beneficial impact on NAFLD, but clinical data are limited, and there are currently no data on patients without T2D. We aimed to investigate the impact of SGLT2 inhibition on NAFLD in overweight, nondiabetic patients and establish the effect these agents may have on the processes that regulate hepatic steatosis in vivo.

Methods

We conducted an open-label, experimental medicine pilot study on insulin-resistant overweight/obese individuals (n = 10) using gold-standard noninvasive assessments of NAFLD phenotype, including magnetic resonance spectroscopy, two-step hyperinsulinemic euglycemic clamps, and stable isotope tracers to assess lipid and glucose metabolism. Investigations were performed before and after a 12-week treatment with the SGLT2 inhibitor, dapagliflozin.

Results

Despite a body weight reduction of 4.4 kg, hepatic steatosis was unchanged following treatment. Hepatic glucose production increased, and there was impairment of glucose disposal during the low-dose insulin infusion. Although circulating, nonesterified, fatty acid levels did not change, the ability of insulin to suppress lipolysis was reduced.

Conclusions

SGLT2 inhibition for 12 weeks does not improve hepatic steatosis in patients without T2D. Additional studies in patients with established T2D or impairments of fasting or postprandial glucose homeostasis are needed to determine whether SGLT2 inhibition represents a viable therapeutic strategy for NAFLD. (http://clinicaltrials.gov Number NCT02696941).",2019-11-05 +32202765,In-Silico-Generated Library for Sensitive Detection of 2-Dimethylaminoethylamine Derivatized FAHFA Lipids Using High-Resolution Tandem Mass Spectrometry.,"Fatty acid esters of hydroxy fatty acids (FAHFAs) are a family of recently discovered lipids with important physiological functions in mammals and plants. However, low detection sensitivity in negative ionization mode mass spectrometry makes low-abundance FAHFA challenging to analyze. A 2-dimethylaminoethylamine (DMED) based chemical derivatization strategy was recently reported to improve the MS sensitivity of FAHFAs by labeling FAHFAs with a positively ionizable tertiary amine group. To facilitate reliable, high-throughput, and automatic annotation of these compounds, a DMED-FAHFA in silico library containing 4290 high-resolution tandem mass spectra covering 264 different FAHFA classes was developed. The construction of the library was based on the heuristic information from MS/MS fragmentation patterns of DMED-FAHFA authentic standards, and then, the patterns were applied to computer-generated DMED-FAHFAs. The developed DMED-FAHFA in silico library was demonstrated to be compatible with library search software NIST MS Search and the LC-MS/MS data processing tool MS-DIAL to guarantee high-throughput and automatic annotations. Applying the in silico library in Arabidopsis thaliana samples for profiling FAHFAs by high-resolution LC-MS/MS enabled the annotation of 19 DMED-FAHFAs from 16 families, including 3 novel compounds. Using the in silico library largely decreased the false-positive annotation rate in comparison to low-resolution LC-MS/MS. The developed library, MS/MS spectra, and development templates are freely available for commercial and noncommercial use at https://zenodo.org/record/3606905.",2020-03-31 +27365365,Diverse alternative back-splicing and alternative splicing landscape of circular RNAs.,"Circular RNAs (circRNAs) derived from back-spliced exons have been widely identified as being co-expressed with their linear counterparts. A single gene locus can produce multiple circRNAs through alternative back-splice site selection and/or alternative splice site selection; however, a detailed map of alternative back-splicing/splicing in circRNAs is lacking. Here, with the upgraded CIRCexplorer2 pipeline, we systematically annotated different types of alternative back-splicing and alternative splicing events in circRNAs from various cell lines. Compared with their linear cognate RNAs, circRNAs exhibited distinct patterns of alternative back-splicing and alternative splicing. Alternative back-splice site selection was correlated with the competition of putative RNA pairs across introns that bracket alternative back-splice sites. In addition, all four basic types of alternative splicing that have been identified in the (linear) mRNA process were found within circRNAs, and many exons were predominantly spliced in circRNAs. Unexpectedly, thousands of previously unannotated exons were detected in circRNAs from the examined cell lines. Although these novel exons had similar splice site strength, they were much less conserved than known exons in sequences. Finally, both alternative back-splicing and circRNA-predominant alternative splicing were highly diverse among the examined cell lines. All of the identified alternative back-splicing and alternative splicing in circRNAs are available in the CIRCpedia database (http://www.picb.ac.cn/rnomics/circpedia). Collectively, the annotation of alternative back-splicing and alternative splicing in circRNAs provides a valuable resource for depicting the complexity of circRNA biogenesis and for studying the potential functions of circRNAs in different cells.",2016-06-30 +31286864,Analyzing magnetic bead QuantiGene® Plex 2.0 gene expression data in high throughput mode using QGprofiler.,"

Background

The QuantiGene® Plex 2.0 platform (ThermoFisher Scientific) combines bDNA with the Luminex/xMAP magnetic bead capturing technology to assess differential gene expression in a compound exposure setting. This technology allows multiplexing in a single well of a 96 or 384 multi-well plate and can thus be used in high throughput drug discovery mode. Data interpretation follows a three-step normalization/transformation flow in which raw median fluorescent gene signals are transformed to fold change values with the use of proper housekeeping genes and negative controls. Clear instructions on how to assess the data quality and tools to perform this analysis in high throughput mode are, however, currently lacking.

Results

In this paper we introduce QGprofiler, an open source R based shiny application. QGprofiler allows for proper QuantiGene® Plex 2.0 assay optimization, choice of housekeeping genes and data pre-processing up to fold change, including appropriate QC metrics. In addition, QGprofiler allows for an Akaike information criterion based dose response fold change model selection and has a built-in tool to detect the cytotoxic potential of compounds evaluated in a high throughput screening campaign.

Conclusion

QGprofiler is a user friendly, open source available R based shiny application, which is developed to support drug discovery campaigns. In this context, entire compound libraries/series can be tested in dose response against a gene signature of choice in search for new disease relevant chemical entities. QGprofiler is available at: https://qgprofiler.openanalytics.eu/app/QGprofiler.",2019-07-08 +31619686,"Fungi of French Guiana gathered in a taxonomic, environmental and molecular dataset.","In Amazonia, the knowledge about Fungi remains patchy and biased towards accessible sites. This is particularly the case in French Guiana where the existing collections have been confined to few coastal localities. Here, we aimed at filling the gaps of knowledge in undersampled areas of this region, particularly focusing on the Basidiomycota. From 2011, we comprehensively collected fruiting-bodies with a stratified and reproducible sampling scheme in 126 plots. Sites of sampling reflected the main forest habitats of French Guiana in terms of soil fertility and topography. The dataset of 5219 specimens gathers 245 genera belonging to 75 families, 642 specimens are barcoded. The dataset is not a checklist as only 27% of the specimens are identified at the species level but 96% are identified at the genus level. We found an extraordinary diversity distributed across forest habitats. The dataset is an unprecedented and original collection of Basidiomycota for the region, making specimens available for taxonomists and ecologists. The database is publicly available in the GBIF repository ( https://doi.org/10.15468/ymvlrp ).",2019-10-16 +31752856,Development and validation of a predictive model for American Society of Anesthesiologists Physical Status.,"BACKGROUND:The American Society of Anesthesiologists Physical Status (ASA-PS) classification system was developed to categorize the fitness of patients before surgery. Increasingly, the ASA-PS has been applied to other uses including justification of inpatient admission. Our objectives were to develop and cross-validate a statistical model for predicting ASA-PS; and 2) assess the concurrent and predictive validity of the model by assessing associations between model-derived ASA-PS, observed ASA-PS, and a diverse set of 30-day outcomes. METHODS:Using the 2014 American College of Surgeons National Surgical Quality Improvement Program (ACS NSQIP) Participant Use Data File, we developed and internally cross-validated multinomial regression models to predict ASA-PS using preoperative NSQIP data. Accuracy was assessed with C-Statistics and calibration plots. We assessed both concurrent and predictive validity of model-derived ASA-PS relative to observed ASA-PS and 30-day outcomes. To aid further research and use of the ASA-PS model, we implemented it into an online calculator. RESULTS:Of the 566,797 elective procedures in the final analytic dataset, 8.9% were ASA-PS 1, 48.9% were ASA-PS 2, 39.1% were ASA-PS 3, and 3.2% were ASA-PS 4. The accuracy of the 21-variable model to predict ASA-PS was C = 0.77 +/- 0.0025. The model-derived ASA-PS had stronger association with key indicators of preoperative status including comorbidities and higher BMI (concurrent validity) compared to observed ASA-PS, but less strong associations with postoperative complications (predictive validity). The online ASA-PS calculator may be accessed at https://s-spire-clintools.shinyapps.io/ASA_PS_Estimator/ CONCLUSIONS: Model-derived ASA-PS better tracked key indicators of preoperative status compared to observed ASA-PS. The ability to have an electronically derived measure of ASA-PS can potentially be useful in research, quality measurement, and clinical applications.",2019-11-21 +32270138,HIPPIE2: a method for fine-scale identification of physically interacting chromatin regions.,"Most regulatory chromatin interactions are mediated by various transcription factors (TFs) and involve physically interacting elements such as enhancers, insulators or promoters. To map these elements and interactions at a fine scale, we developed HIPPIE2 that analyzes raw reads from high-throughput chromosome conformation (Hi-C) experiments to identify precise loci of DNA physically interacting regions (PIRs). Unlike standard genome binning approaches (e.g. 10-kb to 1-Mb bins), HIPPIE2 dynamically infers the physical locations of PIRs using the distribution of restriction sites to increase analysis precision and resolution. We applied HIPPIE2 to in situ Hi-C datasets across six human cell lines (GM12878, IMR90, K562, HMEC, HUVEC, NHEK) with matched ENCODE/Roadmap functional genomic data. HIPPIE2 detected 1042 738 distinct PIRs, with high resolution (average PIR length of 1006 bp) and high reproducibility (92.3% in GM12878). PIRs are enriched for epigenetic marks (H3K27ac, H3K4me1) and open chromatin, suggesting active regulatory roles. HIPPIE2 identified 2.8 million significant PIR-PIR interactions, 27.2% of which were enriched for TF binding sites. 50 608 interactions were enhancer-promoter interactions and were enriched for 33 TFs, including known DNA looping/long-range mediators. These findings demonstrate that the novel dynamic approach of HIPPIE2 (https://bitbucket.com/wanglab-upenn/HIPPIE2) enables the characterization of chromatin and regulatory interactions with high resolution and reproducibility.",2020-03-31 +32209832,Factors Associated With Ambulation in Myelomeningocele: A Longitudinal Study From the National Spina Bifida Patient Registry.,"

Objective

Evidence is limited regarding clinical factors associated with ambulation status over the lifespan of individuals with myelomeningocele. We used longitudinal data from the National Spina Bifida Patient Registry to model population-level variation in ambulation over time and hypothesized that effects of clinical factors associated with ambulation would vary by age and motor level.

Design

A population-averaged generalized estimating equation was used to estimate the probability of independent ambulation. Model predictors included time (age), race, ethnicity, sex, insurance, and interactions between time, motor level, and the number of orthopedic, noncerebral shunt neurosurgeries, and cerebral shunt neurosurgeries.

Results

The study cohort included 5371 participants with myelomeningocele. A change from sacral to low-lumbar motor level initially reduced the odds of independent ambulation (OR = 0.24, 95% CI = 0.15-0.38) but became insignificant with increasing age. Surgery count was associated with decreased odds of independent ambulation (orthopedic: OR = 0.65, 95% CI = 0.50-0.85; noncerebral shunt neurosurgery: OR = 0.65, 95% CI = 0.51-0.84; cerebral shunt: OR = 0.90, 95% CI = 0.83-0.98), with increasing effects seen at lower motor levels.

Conclusions

Our findings suggest that effects of several commonly accepted predictors of ambulation status vary with time. As the myelomeningocele population ages, it becomes increasingly important that study design account for this time-varying nature of clinical reality.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) Describe general trends in ambulation status by age in the myelomeningocele population; (2) Recognize the nuances of cause and effect underlying the relationship between surgical intervention and ambulation status; (3) Explain why variation of clinical effect over time within myelomeningocele population matters.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2020-07-01 +31308550,EPIC: software toolkit for elution profile-based inference of protein complexes.,"Protein complexes are key macromolecular machines of the cell, but their description remains incomplete. We and others previously reported an experimental strategy for global characterization of native protein assemblies based on chromatographic fractionation of biological extracts coupled to precision mass spectrometry analysis (chromatographic fractionation-mass spectrometry, CF-MS), but the resulting data are challenging to process and interpret. Here, we describe EPIC (elution profile-based inference of complexes), a software toolkit for automated scoring of large-scale CF-MS data to define high-confidence multi-component macromolecules from diverse biological specimens. As a case study, we used EPIC to map the global interactome of Caenorhabditis elegans, defining 612 putative worm protein complexes linked to diverse biological processes. These included novel subunits and assemblies unique to nematodes that we validated using orthogonal methods. The open source EPIC software is freely available as a Jupyter notebook packaged in a Docker container (https://hub.docker.com/r/baderlab/bio-epic/).",2019-07-15 +28387199,GSA: Genome Sequence Archive.,"With the rapid development of sequencing technologies towards higher throughput and lower cost, sequence data are generated at an unprecedentedly explosive rate. To provide an efficient and easy-to-use platform for managing huge sequence data, here we present Genome Sequence Archive (GSA; http://bigd.big.ac.cn/gsa or http://gsa.big.ac.cn), a data repository for archiving raw sequence data. In compliance with data standards and structures of the International Nucleotide Sequence Database Collaboration (INSDC), GSA adopts four data objects (BioProject, BioSample, Experiment, and Run) for data organization, accepts raw sequence reads produced by a variety of sequencing platforms, stores both sequence reads and metadata submitted from all over the world, and makes all these data publicly available to worldwide scientific communities. In the era of big data, GSA is not only an important complement to existing INSDC members by alleviating the increasing burdens of handling sequence data deluge, but also takes the significant responsibility for global big data archive and provides free unrestricted access to all publicly available data in support of research activities throughout the world.",2017-02-02 +29771380,dbCAN2: a meta server for automated carbohydrate-active enzyme annotation.,"Complex carbohydrates of plants are the main food sources of animals and microbes, and serve as promising renewable feedstock for biofuel and biomaterial production. Carbohydrate active enzymes (CAZymes) are the most important enzymes for complex carbohydrate metabolism. With an increasing number of plant and plant-associated microbial genomes and metagenomes being sequenced, there is an urgent need of automatic tools for genomic data mining of CAZymes. We developed the dbCAN web server in 2012 to provide a public service for automated CAZyme annotation for newly sequenced genomes. Here, dbCAN2 (http://cys.bios.niu.edu/dbCAN2) is presented as an updated meta server, which integrates three state-of-the-art tools for CAZome (all CAZymes of a genome) annotation: (i) HMMER search against the dbCAN HMM (hidden Markov model) database; (ii) DIAMOND search against the CAZy pre-annotated CAZyme sequence database and (iii) Hotpep search against the conserved CAZyme short peptide database. Combining the three outputs and removing CAZymes found by only one tool can significantly improve the CAZome annotation accuracy. In addition, dbCAN2 now also accepts nucleotide sequence submission, and offers the service to predict physically linked CAZyme gene clusters (CGCs), which will be a very useful online tool for identifying putative polysaccharide utilization loci (PULs) in microbial genomes or metagenomes.",2018-07-01 +33123532,An Online Application for Retinoblastoma Surveillance.,"

Background

Retinoblastoma (RB) is a potentially heritable childhood cancer that is vision- and life-threatening. Assessing the risk of inheriting RB is important for structuring ophthalmic and genetic screening of family members.

Purpose

To create a free online application that integrates phenotypic, genetic, and familial relationships with clinical best practice surveillance guidelines for families with RB.

Methods

The risk of germline RB1 gene mutation was assessed for first- and second-degree relatives of a proband under variable clinical scenarios, integrating age, phenotype, relationship data, and genotype (germline RB1 mutation status: detected, undetected, not tested). Based on the assessed risk of a germline RB1 mutation, recommendations regarding further genetic testing as well as ophthalmic surveillance were derived from consensus guidelines.

Results

The recommendations depend on the RB1 germline mutation status (detected, undetected, not tested), which were further subcategorized by the results of tumor phenotype, relationship to proband, age of the relative, and family structure. The online application is available at https://nakul-singh.shinyapps.io/RB_Screening_rec/.

Conclusions

The assessed risk of germline RB1 mutation determines ophthalmic surveillance recommendations. The tool may have most value in regions where access to specialized care is limited.",2020-03-10 +32228381,"Exploring Early Childhood Language Environments: A Comparison of Language Use, Exposure, and Interactions in the Home and Childcare Settings.","Purpose This study was conducted in a large Midwestern metropolitan area to examine the language environments at home and in center-based childcare for young children who are living in poverty. We compared child language use and exposure in the home and childcare settings using extended observations with automated Language Environment Analysis to gain a deeper understanding of the environmental factors that may affect change in language outcomes for young children. Method Thirty-eight children, along with parents (n = 38) and childcare providers (n = 14) across five childcare centers, participated in this study. Each child completed a standardized language assessment and two daylong recordings with Language Environment Analysis to determine the number of adult words, conversational turns, and child vocalizations that occurred in each setting. Data were analyzed at 5-min intervals across each recording. Results Comparisons between home recordings in this sample and a comparison group showed reliably higher rates of adult words and conversational turns in the home setting. Linear mixed-effects regression models showed significant differences in the child language environments, with the home setting providing higher levels of language input and use. These effects were still meaningful after accounting for the time of day, participant demographic characteristics, and child language ability. Conclusions Practical implications for supporting child language development across settings are discussed, and suggestions for further research are provided. Supplemental Material https://doi.org/10.23641/asha.12042678.",2020-03-30 +31569338,A Multi-Protocol IoT Platform Based on Open-Source Frameworks. ,"Internet of Things (IoT) technologies have evolved rapidly during the last decade, and many architecture types have been proposed for distributed and interconnected systems. However, most systems are implemented following fragmented approaches for specific application domains, introducing difficulties in providing unified solutions. However, the unification of solutions is an important feature from an IoT perspective. In this paper, we present an IoT platform that supports multiple application layer communication protocols (Representational State Transfer (REST)/HyperText Transfer Protocol (HTTP), Message Queuing Telemetry Transport (MQTT), Advanced Message Queuing Protocol (AMQP), Constrained Application Protocol (CoAP), and Websockets) and that is composed of open-source frameworks (RabbitMQ, Ponte, OM2M, and RDF4J). We have explored a back-end system that interoperates with the various frameworks and offers a single approach for user-access control on IoT data streams and micro-services. The proposed platform is evaluated using its containerized version, being easily deployable on the vast majority of modern computing infrastructures. Its design promotes service reusability and follows a marketplace architecture, so that the creation of interoperable IoT ecosystems with active contributors is enabled. All the platform's features are analyzed, and we discuss the results of experiments, with the multiple communication protocols being tested when used interchangeably for transferring data. Developing unified solutions using such a platform is of interest to users and developers as they can test and evaluate local instances or even complex applications composed of their own IoT resources before releasing a production version to the marketplace.",2019-09-28 +31656398,"Vascular plants dataset of the herbarium (COFC) of the University of Cordoba, Spain.","This paper describes the herbarium (COFC) dataset of vascular plants of the University of Cordoba (SW Spain). This dataset is made up of two collections, the General collection (61,377 specimens) and the Historical collection (1,614 specimens). This study has focused mainly on the General collection, which contains the largest number of vascular plant specimens, predominantly angiosperms, mainly provincial and regional (Andalusia, Spain), but also with a good representation of other areas of the Iberian Peninsula and neighboring countries. The place of collection is specified in 99.7% of the labels, about 35% being georeferenced, and it is estimated that, currently, about 86% of the material housed in the herbarium has been databased using Elysia v1.0. software. With more than 178 families, 1,178 genera, and 3,750 species, this collection not only has educational importance, but is a valuable research tool that has been useful for the development of important works such as ""Flora Vascular de Andalucía Occidental"" and the ""Flora iberica"". The dataset described in this paper is registered with GBIF (accessible at https://doi.org/10.15468/fdzzal).",2019-10-15 +31100387,The EXPANDER Integrated Platform for Transcriptome Analysis.,"Genome-wide analysis of cellular transcriptomes using RNA-seq or expression arrays is a major mainstay of current biological and biomedical research. EXPANDER (EXPression ANalyzer and DisplayER) is a comprehensive software package for analysis of expression data, with built-in support for 18 different organisms. It is designed as a ""one-stop shop"" platform for transcriptomic analysis, allowing for execution of all analysis steps starting with gene expression data matrix. Analyses offered include low-level preprocessing and normalization, differential expression analysis, clustering, bi-clustering, supervised grouping, high-level functional and pathway enrichment tests, and networks and motif analyses. A variety of options is offered for each step, using established algorithms, including many developed and published by our laboratory. EXPANDER has been continuously developed since 2003, having to date over 18,000 downloads and 540 citations. One of the innovations in the recent version is support for combined analysis of gene expression and ChIP-seq data to enhance the inference of transcriptional networks and their functional interpretation. EXPANDER implements cutting-edge algorithms and makes them accessible to users through user-friendly interface and intuitive visualizations. It is freely available to users at http://acgt.cs.tau.ac.il/expander/.",2019-05-14 +29309510,Mustguseal: a server for multiple structure-guided sequence alignment of protein families.,"Motivation:Comparative analysis of homologous proteins in a functionally diverse superfamily is a valuable tool at studying structure-function relationship, but represents a methodological challenge. Results:The Mustguseal web-server can automatically build large structure-guided sequence alignments of functionally diverse protein families that include thousands of proteins basing on all available information about their structures and sequences in public databases. Superimposition of protein structures is implemented to compare evolutionarily distant relatives, whereas alignment of sequences is used to compare close homologues. The final alignment can be downloaded for a local use or operated on-line with the built-in interactive tools and further submitted to the integrated sister web-servers of Mustguseal to analyze conserved, subfamily-specific and co-evolving residues at studying a protein function and regulation, designing improved enzyme variants for practical applications and selective ligands to modulate functional properties of proteins. Availability and implementation:Freely available on the web at https://biokinet.belozersky.msu.ru/mustguseal. Contact:vytas@belozersky.msu.ru. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +32366478,A Premalignant Cell-Based Model for Functionalization and Classification of PTEN Variants.,"As sequencing becomes more economical, we are identifying sequence variations in the population faster than ever. For disease-associated genes, it is imperative that we differentiate a sequence variant as either benign or pathogenic, such that the appropriate therapeutic interventions or surveillance can be implemented. PTEN is a frequently mutated tumor suppressor that has been linked to the PTEN hamartoma tumor syndrome. Although the domain structure of PTEN and the functional impact of a number of its most common tumor-linked mutations have been characterized, there is a lack of information about many recently identified clinical variants. To address this challenge, we developed a cell-based assay that utilizes a premalignant phenotype of normal mammary epithelial cells lacking PTEN. We measured the ability of PTEN variants to rescue the spheroid formation phenotype of PTEN-/- MCF10A cells maintained in suspension. As proof of concept, we functionalized 47 missense variants using this assay, only 19 of which have clear classifications in ClinVar. We utilized a machine learning model trained with annotated genotypic data to classify variants as benign or pathogenic based on our functional scores. Our model predicted with high accuracy that loss of PTEN function was indicative of pathogenicity. We also determined that the pathogenicity of certain variants may have arisen from reduced stability of the protein product. Overall, this assay outperformed computational predictions, was scalable, and had a short run time, serving as an ideal alternative for annotating the clinical significance of cancer-associated PTEN variants. SIGNIFICANCE: Combined three-dimensional tumor spheroid modeling and machine learning classifies PTEN missense variants, over 70% of which are currently listed as variants of uncertain significance. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/13/2775/F1.large.jpg.",2020-05-04 +32497138,G-OnRamp: Generating genome browsers to facilitate undergraduate-driven collaborative genome annotation.,"Scientists are sequencing new genomes at an increasing rate with the goal of associating genome contents with phenotypic traits. After a new genome is sequenced and assembled, structural gene annotation is often the first step in analysis. Despite advances in computational gene prediction algorithms, most eukaryotic genomes still benefit from manual gene annotation. This requires access to good genome browsers to enable annotators to visualize and evaluate multiple lines of evidence (e.g., sequence similarity, RNA sequencing [RNA-Seq] results, gene predictions, repeats) and necessitates many volunteers to participate in the work. To address the technical barriers to creating genome browsers, the Genomics Education Partnership (GEP; https://gep.wustl.edu/) has partnered with the Galaxy Project (https://galaxyproject.org) to develop G-OnRamp (http://g-onramp.org), a web-based platform for creating UCSC Genome Browser Assembly Hubs and JBrowse genome browsers. G-OnRamp also converts a JBrowse instance into an Apollo instance for collaborative genome annotations in research and educational settings. The genome browsers produced can be transferred to the CyVerse Data Store for long-term access. G-OnRamp enables researchers to easily visualize their experimental results, educators to create Course-based Undergraduate Research Experiences (CUREs) centered on genome annotation, and students to participate in genomics research. In the process, students learn about genes/genomes and about how to utilize large datasets. Development of G-OnRamp was guided by extensive user feedback. Sixty-five researchers/educators from >40 institutions participated through in-person workshops, which produced >20 genome browsers now available for research and education. Genome browsers generated for four parasitoid wasp species have been used in a CURE engaging students at 15 colleges and universities. Our assessment results in the classroom demonstrate that the genome browsers produced by G-OnRamp are effective tools for engaging undergraduates in research and in enabling their contributions to the scientific literature in genomics. Expansion of such genomics research/education partnerships will be beneficial to researchers, faculty, and students alike.",2020-06-04 +22116062,GeneDB--an annotation database for pathogens.,"GeneDB (http://www.genedb.org) is a genome database for prokaryotic and eukaryotic pathogens and closely related organisms. The resource provides a portal to genome sequence and annotation data, which is primarily generated by the Pathogen Genomics group at the Wellcome Trust Sanger Institute. It combines data from completed and ongoing genome projects with curated annotation, which is readily accessible from a web based resource. The development of the database in recent years has focused on providing database-driven annotation tools and pipelines, as well as catering for increasingly frequent assembly updates. The website has been significantly redesigned to take advantage of current web technologies, and improve usability. The current release stores 41 data sets, of which 17 are manually curated and maintained by biologists, who review and incorporate data from the scientific literature, as well as other sources. GeneDB is primarily a production and annotation database for the genomes of predominantly pathogenic organisms.",2011-11-23 +30596130,"Data on the impact of objects with different shapes, masses, and impact velocities on a dummy head.","In this article, a data generated from impacts of objects with different shapes, masses, and impact velocities on a developed dummy head. The mass considered was in the range of 0.3-0.5 kg while the shapes considered were cube, wedge, and cylinder. The impact velocities levels were in the range of 1-3 m/s. A total of 144 experiments were conducted and the corresponding videos and raw data were analyzed for impact velocity, peak head linear acceleration, 3 ms criterion, and the Head Injury Criterion (HIC). This dataset includes the raw acceleration data and a summary of the overall processed data. The data is available on Harvard Dataverse: https://doi.org/10.7910/DVN/AVC8GG.",2018-12-06 +31695883,Functional richness shows spatial scale dependency in Pheidole ant assemblages from Neotropical savannas.,"There is a growing recognition that spatial scale is important for understanding ecological processes shaping community membership, but empirical evidence on this topic is still scarce. Ecological processes such as environmental filtering can decrease functional differences among species and promote functional clustering of species assemblages, whereas interspecific competition can do the opposite. These different ecological processes are expected to take place at different spatial scales, with competition being more likely at finer scales and environmental filtering most likely at coarser scales. We used a comprehensive dataset on species assemblages of a dominant ant genus, Pheidole, in the Cerrado (savanna) biodiversity hotspot to ask how functional richness relates to species richness gradients and whether such relationships vary across spatial scales. Functional richness of Pheidole assemblages decreased with increasing species richness, but such relationship did not vary across different spatial scales. Species were more functionally dissimilar at finer spatial scales, and functional richness increased less than expected with increasing species richness. Our results indicate a tighter packing of the functional volume as richness increases and point out to a primary role for environmental filtering in shaping membership of Pheidole assemblages in Neotropical savannas. OPEN RESEARCH BADGES:This article has been awarded Open Materials, Open Data, Preregistered Research Designs Badges. All materials and data are publicly accessible via the Open Science Framework at https://doi.org/10.5061/dryad.31201jg.",2019-09-27 +31076763,DOGMA: a web server for proteome and transcriptome quality assessment.,"Even in the era of next generation sequencing, in which bioinformatics tools abound, annotating transcriptomes and proteomes remains a challenge. This can have major implications for the reliability of studies based on these datasets. Therefore, quality assessment represents a crucial step prior to downstream analyses on novel transcriptomes and proteomes. DOGMA allows such a quality assessment to be carried out. The data of interest are evaluated based on a comparison with a core set of conserved protein domains and domain arrangements. Depending on the studied species, DOGMA offers precomputed core sets for different phylogenetic clades. We now developed a web server for the DOGMA software, offering a user-friendly, simple to use interface. Additionally, the server provides a graphical representation of the analysis results and their placement in comparison to publicly available data. The server is freely available under https://domainworld-services.uni-muenster.de/dogma/. Additionally, for large scale analyses the software can be downloaded free of charge from https://domainworld.uni-muenster.de.",2019-07-01 +31028400,"Updated MS²PIP web server delivers fast and accurate MS² peak intensity prediction for multiple fragmentation methods, instruments and labeling techniques.","MS²PIP is a data-driven tool that accurately predicts peak intensities for a given peptide's fragmentation mass spectrum. Since the release of the MS²PIP web server in 2015, we have brought significant updates to both the tool and the web server. In addition to the original models for CID and HCD fragmentation, we have added specialized models for the TripleTOF 5600+ mass spectrometer, for TMT-labeled peptides, for iTRAQ-labeled peptides, and for iTRAQ-labeled phosphopeptides. Because the fragmentation pattern is heavily altered in each of these cases, these additional models greatly improve the prediction accuracy for their corresponding data types. We have also substantially reduced the computational resources required to run MS²PIP, and have completely rebuilt the web server, which now allows predictions of up to 100 000 peptide sequences in a single request. The MS²PIP web server is freely available at https://iomics.ugent.be/ms2pip/.",2019-07-01 +28481982,McPAS-TCR: a manually curated catalogue of pathology-associated T cell receptor sequences.,"

Motivation

While growing numbers of T cell receptor (TCR) repertoires are being mapped by high-throughput sequencing, existing methods do not allow for computationally connecting a given TCR sequence to its target antigen, or relating it to a specific pathology. As an alternative, a manually-curated database can relate TCR sequences with their cognate antigens and associated pathologies based on published experimental data.

Results

We present McPAS-TCR, a manually curated database of TCR sequences associated with various pathologies and antigens based on published literature. Our database currently contains more than 5000 sequences of TCRs associated with various pathologic conditions (including pathogen infections, cancer and autoimmunity) and their respective antigens in humans and in mice. A web-based tool allows for searching the database based on different criteria, and for finding annotated sequences from the database in users' data. The McPAS-TCR website assembles information from a large number of studies that is very hard to dissect otherwise. Initial analyses of the data provide interesting insights on pathology-associated TCR sequences.

Availability and implementation

Free access at http://friedmanlab.weizmann.ac.il/McPAS-TCR/ .

Contact

nir.friedman@weizmann.ac.il.",2017-09-01 +31346436,Population structure of avian malaria parasites.,"The geographic distribution of genetic diversity in malaria parasite populations (Apicomplexa: Haemosporida) presumably influences local patterns of virulence and the evolution of host-resistance, but little is known about population genetic structure in these parasites. We assess the distribution of genetic diversity in the partial Domain I of apical membrane antigen 1 (AMA1) in three mtDNA-defined lineages of avian Plasmodium to determine spatial population structure and host-parasite genetic relationships. We find that one parasite lineage is genetically differentiated in association with a single host genus and among some locations, but not with respect to other hosts. Two other parasite lineages are undifferentiated with respect to host species but exhibit geographic differentiation that is inconsistent with shared geographic barriers or with isolation-by-distance. Additional differentiation within two other lineages is unassociated with host species or location; in one case, we tentatively interpret this differentiation as the result of mitochondrial introgression from one of the lineages into a second lineage. More sampling of nuclear genetic diversity within populations of avian Plasmodium is needed to rule out coinfection as a possible confounding factor. If coinfections are not responsible for these findings, further assessment is needed to determine the frequency of mitonuclear discordance and its implications for defining parasite lineages based on mitochondrial genetic variation.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at Genbank https://www.ncbi.nlm.nih.gov/genbank/, accession numbers MK965548-MK965653 and MK929797-MK930264.",2019-06-17 +30065744,miRVIT: A Novel miRNA Database and Its Application to Uncover Vitis Responses to Flavescence dorée Infection.,"Micro(mi)RNAs play crucial roles in plant developmental processes and in defense responses to biotic and abiotic stresses. In the last years, many works on small RNAs in grapevine (Vitis spp.) were published, and several conserved and putative novel grapevine-specific miRNAs were identified. In order to reorganize the high quantity of available data, we produced ""miRVIT,"" the first database of all novel grapevine miRNA candidates characterized so far, and still not deposited in miRBase. To this aim, each miRNA accession was renamed, repositioned in the last version of the grapevine genome, and compared with all the novel and conserved miRNAs detected in grapevine. Conserved and novel miRNAs cataloged in miRVIT were then used for analyzing Vitis vinifera plants infected by Flavescence dorée (FD), one of the most severe phytoplasma diseases affecting grapevine. The analysis of small RNAs from healthy, recovered (plants showing spontaneous and stable remission of symptoms), and FD-infected ""Barbera"" grapevines showed that FD altered the expression profiles of several miRNAs, including those involved in cell development and photosynthesis, jasmonate signaling, and disease resistance response. The application of miRVIT in a biological context confirmed the effectiveness of the followed approach, especially for the identification of novel miRNA candidates in grapevine. miRVIT database is available at http://mirvit.ipsp.cnr.it. Highlights: The application of the newly produced database of grapevine novel miRNAs to the analysis of plants infected by Flavescence dorée reveals key roles of miRNAs in photosynthesis and jasmonate signaling.",2018-07-17 +28653025,Proteomics data on MAP Kinase Kinase 3 knock out bone marrow derived macrophages exposed to cigarette smoke extract.,"This data article reports changes in the phosphoproteome and total proteome of cigarette smoke extract (CSE) exposed WT and MAP Kinase Kinase 3 knock out (MKK3-/-) bone marrow derived macrophages (BMDM). The dataset generated is helpful for understanding the mechanism of CSE induced inflammation and the role of MAP kinase signaling pathway. The cellular proteins were labeled with isobaric tags for relative and absolute quantitation (iTRAQ®) reagents and analyzed by LC-MS/MS. The standard workflow module for iTRAQ® quantification within the Proteome Discoverer was utilized for the data analysis. Ingenuity Pathway Analysis (IPA) software and Reactome was used to identify enriched canonical pathways and molecular networks (Mannam et al., 2016) [1]. All the associated mass spectrometry data has been deposited in the Yale Protein Expression Database (YPED) with the web-link to the data: http://yped.med.yale.edu/repository/ViewSeriesMenu.do;jsessionid=6A5CB07543D8B529FAE8C3FCFE29471D?series_id=5044&series_name=MMK3+Deletion+in+MEFs.",2017-06-07 +25308527,A computational frame and resource for understanding the lncRNA-environmental factor associations and prediction of environmental factors implicated in diseases.,"The complex traits of an organism are associated with a complex interplay between genetic factors (GFs) and environmental factors (EFs). However, compared with protein-coding genes and microRNAs, there is a paucity of computational methods and bioinformatic resource platform for understanding the associations between lncRNA and EF. In this study, we developed a novel computational method to identify potential associations between lncRNA and EF, and released LncEnvironmentDB, a user-friendly web-based database aiming to provide a comprehensive resource platform for lncRNA and EF. Topological analysis of EF-related networks revealed the small world, scale-free and modularity structure. We also found that lncRNA and EF significantly enriched interacting miRNAs are functionally more related by analyzing their related diseases, implying that the predicted lncRNA signature of EF can reflect the functional characteristics to some degree. Finally, we developed a random walk with a restart-based computational model (RWREFD) to predict potential disease-related EFs by integrating lncRNA-EF associations and EF-disease associations. The performance of RWREFD was evaluated by experimentally verified EF-disease associations based on leave-one-out cross-validation and achieved an AUC value of 0.71, which is higher than randomization test, indicating that the RWREFD method has a reliable and high accuracy of prediction. To the best of our knowledge, LncEnvironmentDB is the first attempt to predict and house the experimental and predicted associations between lncRNA and EF. LncEnvironmentDB is freely available on the web at http://bioinfo.hrbmu.edu.cn/lncefdb/.",2014-12-01 +26654445,Subnational benchmarking of health systems performance in Africa using health outcome and coverage indicators.,"National health systems performance (HSP) assessments and benchmarking are critical to understanding how well the delivery of healthcare meets the needs of citizens. Benchmarking HSP has often been done between countries to inform the global public health space. However, its impact is likely to be far greater when implemented sub-nationally to inform actual decisions on resource allocations and performance improvements, especially in high disease burden, low-income countries, where the resource envelope available for health is inadequate. In their study, Roberts and colleagues assemble, analyse and map a minimum set of health intervention and outcome indicators from 1990-2011 to assess and benchmark HSP across the 11 regions of Uganda. This is the first empirical sub-national HSP benchmarking study in the country and the results have potentially important health system policy implications. Please see related research: http://www.biomedcentral.com/1741-7015/13/285.",2015-12-14 +30923381,Selene: a PyTorch-based deep learning library for sequence data.,"To enable the application of deep learning in biology, we present Selene (https://selene.flatironinstitute.org/), a PyTorch-based deep learning library for fast and easy development, training, and application of deep learning model architectures for any biological sequence data. We demonstrate on DNA sequences how Selene allows researchers to easily train a published architecture on new data, develop and evaluate a new architecture, and use a trained model to answer biological questions of interest.",2019-03-28 +26792755,"Oral medicine (stomatology) across the globe: birth, growth, and future.","Oral medicine (stomatology) is a recognized and increasingly important dental specialty in many parts of the world that recognizes and fosters the interplay between medical health and oral health. Its dental activities rely greatly on the underlying biology of disease and evidence-based outcomes. However, full recognition of the importance of oral medicine to patient care, research, and education is not yet totally universally acknowledged. To address these shortcomings, we outline the birth, growth, and future of oral medicine globally, and record identifiable past contributions to the development of the specialty, providing an accurate, unique, and valuable resource on oral medicine. Although it was challenging to gather the data, we present this information as a review that endeavors to summarize the salient points about oral medicine, based on MEDLINE, other internet searches, communication with oral medicine and stomatological societies across the world, the web page http://en.wikipedia.org/wiki/List_of_dental_organizations, and discussions with a wide range of key senior persons in the specialty.",2015-10-19 +32273881,Computing Skin Cutaneous Melanoma Outcome From the HLA-Alleles and Clinical Characteristics.,"Human leukocyte antigen (HLA) are essential components of the immune system that stimulate immune cells to provide protection and defense against cancer. Thousands of HLA alleles have been reported in the literature, but only a specific set of HLA alleles are present in an individual. The capability of the immune system to recognize cancer-associated mutations depends on the presence of a particular set of alleles, which elicit an immune response to fight against cancer. Therefore, the occurrence of specific HLA alleles affects the survival outcome of cancer patients. In the current study, prediction models were developed, using 401 cutaneous melanoma patients, to predict the overall survival (OS) of patients using their clinical data and HLA alleles. We observed that the presence of certain favorable superalleles like HLA-B∗55 (HR = 0.15, 95% CI 0.034-0.67), HLA-A∗01 (HR = 0.5, 95% CI 0.3-0.8), is responsible for the improved OS. In contrast, the presence of certain unfavorable superalleles such as HLA-B∗50 (HR = 2.76, 95% CI 1.284-5.941), HLA-DRB1∗12 (HR = 3.44, 95% CI 1.64-7.2) is responsible for the poor survival. We developed prediction models using key 14 HLA superalleles, demographic, and clinical characteristics for predicting high-risk cutaneous melanoma patients and achieved HR = 4.52 (95% CI 3.088-6.609, p-value = 8.01E-15). Eventually, we also provide a web-based service to the community for predicting the risk status in cutaneous melanoma patients (https://webs.iiitd.edu.in/raghava/skcmhrp/).",2020-03-26 +31603259,The AQUA-MER databases and aqueous speciation server: A web resource for multiscale modeling of mercury speciation.,"To assess the chemical reactivity, toxicity, and mobility of pollutants in the environment, knowledge of their species distributions is critical. Because their direct measurement is often infeasible, speciation modeling is widely adopted. Mercury (Hg) is a representative pollutant for which study of its speciation benefits from modeling. However, Hg speciation modeling is often hindered by a lack of reliable thermodynamic constants. Although computational chemistry (e.g., density functional theory [DFT]) can generate these constants, methods for directly coupling DFT and speciation modeling are not available. Here, we combine computational chemistry and continuum-scale modeling with curated online databases to ameliorate the problem of unreliable inputs to Hg speciation modeling. Our AQUA-MER databases and web server (https://aquamer.ornl.gov) provides direct speciation results by combining web-based interfaces to a speciation calculator, databases of thermodynamic constants, and a computational chemistry toolkit to estimate missing constants. Although Hg is presented as a concrete use case, AQUA-MER can also be readily applied to other elements. © 2019 Wiley Periodicals, Inc.",2019-10-11 +25604238,ocsESTdb: a database of oil crop seed EST sequences for comparative analysis and investigation of a global metabolic network and oil accumulation metabolism.,"

Background

Oil crop seeds are important sources of fatty acids (FAs) for human and animal nutrition. Despite their importance, there is a lack of an essential bioinformatics resource on gene transcription of oil crops from a comparative perspective. In this study, we developed ocsESTdb, the first database of expressed sequence tag (EST) information on seeds of four large-scale oil crops with an emphasis on global metabolic networks and oil accumulation metabolism that target the involved unigenes.

Description

A total of 248,522 ESTs and 106,835 unigenes were collected from the cDNA libraries of rapeseed (Brassica napus), soybean (Glycine max), sesame (Sesamum indicum) and peanut (Arachis hypogaea). These unigenes were annotated by a sequence similarity search against databases including TAIR, NR protein database, Gene Ontology, COG, Swiss-Prot, TrEMBL and Kyoto Encyclopedia of Genes and Genomes (KEGG). Five genome-scale metabolic networks that contain different numbers of metabolites and gene-enzyme reaction-association entries were analysed and constructed using Cytoscape and yEd programs. Details of unigene entries, deduced amino acid sequences and putative annotation are available from our database to browse, search and download. Intuitive and graphical representations of EST/unigene sequences, functional annotations, metabolic pathways and metabolic networks are also available. ocsESTdb will be updated regularly and can be freely accessed at http://ocri-genomics.org/ocsESTdb/ .

Conclusion

ocsESTdb may serve as a valuable and unique resource for comparative analysis of acyl lipid synthesis and metabolism in oilseed plants. It also may provide vital insights into improving oil content in seeds of oil crop species by transcriptional reconstruction of the metabolic network.",2015-01-21 +31854504,"Pepfar 3.0's HIV testing policy in Côte d'Ivoire (2014 to 2018): fragmentation, acceleration and disconnection.","

Introduction

HIV Testing and Counselling (HTC) remains a key challenge in achieving control of the HIV epidemic by 2030. In the early 2010s, the President's Emergency Plan for AIDS Relief (Pepfar) adopted targeted HTC strategies for populations and geographical areas most affected by HIV. We examine how Pepfar defined targeted HTC in Côte d'Ivoire, a country with a mixed HIV epidemic, after a decade of expanding HTC services.

Methods

We explored the evolution of HTC strategies through the Country Operational Plans (COP) of Pepfar during its phase 3.0, from COP 14 to COP 17 (October 2014 to September 2018) in Côte d'Ivoire. We conducted an analysis of the grey literature over the period 2014 to 2018 (Budget & Target Report, Strategic Direction Summary, Sustainability Index and Dashboard Summary, https://data.pepfar.gov). We also conducted a qualitative study in Côte d'Ivoire (2015 to 2018) using in-depth interviews with stakeholders in the AIDS public response: CDC/Pepfar (3), Ministry of Health (3), intermediary NGOs (7); and public meeting observations (14).

Results

Since the COP 14, Pepfar's HIV testing strategies have been characterized by significant variations in terms of numerical, geographical and population targets. While the aim of COP 14 and COP 15 seemed to be the improvement of testing efficacy in general and testing yield in particular, COP 16 and COP 17 prioritized accelerating progress towards the ""first 90"" (i.e. reducing the proportion of people living with HIV who are unaware of their HIV). A shift was observed in the definition of testing targets, with less focus on the inclusion of programmatic data and feedback from field actors, and greater emphasis on the use of models to estimate and disaggregate the targets by geographical units and sub-populations (even if the availability of data by this disaggregation was limited or uncertain); increasingly leading to gaps between targets and results.

Conclusions

These trials and tribulations question the real and long-term effectiveness of annually-revised, fragmented strategies, which widen an increasing disparity between the realities of the actors on the ground and the objectives set in Washington.",2019-12-01 +32139710,"Classification models for Invasive Ductal Carcinoma Progression, based on gene expression data-trained supervised machine learning.","Early detection of breast cancer and its correct stage determination are important for prognosis and rendering appropriate personalized clinical treatment to breast cancer patients. However, despite considerable efforts and progress, there is a need to identify the specific genomic factors responsible for, or accompanying Invasive Ductal Carcinoma (IDC) progression stages, which can aid the determination of the correct cancer stages. We have developed two-class machine-learning classification models to differentiate the early and late stages of IDC. The prediction models are trained with RNA-seq gene expression profiles representing different IDC stages of 610 patients, obtained from The Cancer Genome Atlas (TCGA). Different supervised learning algorithms were trained and evaluated with an enriched model learning, facilitated by different feature selection methods. We also developed a machine-learning classifier trained on the same datasets with training sets reduced data corresponding to IDC driver genes. Based on these two classifiers, we have developed a web-server Duct-BRCA-CSP to predict early stage from late stages of IDC based on input RNA-seq gene expression profiles. The analysis conducted by us also enables deeper insights into the stage-dependent molecular events accompanying IDC progression. The server is publicly available at http://bioinfo.icgeb.res.in/duct-BRCA-CSP.",2020-03-05 +30692979,"iVikodak-A Platform and Standard Workflow for Inferring, Analyzing, Comparing, and Visualizing the Functional Potential of Microbial Communities.","Background: The objectives of any metagenomic study typically include identification of resident microbes and their relative proportions (taxonomic analysis), profiling functional diversity (functional analysis), and comparing the identified microbes and functions with available metadata (comparative metagenomics). Given the advantage of cost-effectiveness and convenient data-size, amplicon-based sequencing has remained the technology of choice for exploring phylogenetic diversity of an environment. A recent school of thought, employing the existing genome annotation information for inferring functional capacity of an identified microbiome community, has given a promising alternative to Whole Genome Shotgun sequencing for functional analysis. Although a handful of tools are currently available for function inference, their scope, functionality and utility has essentially remained limited. Need for a comprehensive framework that expands upon the existing scope and enables a standardized workflow for function inference, analysis, and visualization, is therefore felt. Methods: We present iVikodak, a multi-modular web-platform that hosts a logically inter-connected repertoire of functional inference and analysis tools, coupled with a comprehensive visualization interface. iVikodak is equipped with microbial co-inhabitance pattern driven published algorithms along with multiple updated databases of various curated microbe-function maps. It also features an advanced task management and result sharing system through introduction of personalized and portable dashboards. Results: In addition to inferring functions from 16S rRNA gene data, iVikodak enables (a) an in-depth analysis of specific functions of interest (b) identification of microbes contributing to various functions (c) microbial interaction patterns through function-driven correlation networks, and (d) simultaneous functional comparison between multiple microbial communities. We have bench-marked iVikodak through multiple case studies and comparisons with existing state of art. We also introduce the concept of a public repository which provides a first of its kind community-driven framework for scientific data analytics, collaboration and sharing in this area of microbiome research. Conclusion: Developed using modern design and task management practices, iVikodak provides a multi-modular, yet inter-operable, one-stop framework, that intends to simplify the entire approach toward inferred function analysis. It is anticipated to serve as a significant value addition to the existing space of functional metagenomics. iVikodak web-server may be freely accessed at https://web.rniapps.net/iVikodak/.",2018-01-01 +31601176,Benchmark datasets of immune receptor-epitope structural complexes.,"

Background

The development of accurate epitope prediction tools is important in facilitating disease diagnostics, treatment and vaccine development. The advent of new approaches making use of antibody and TCR sequence information to predict receptor-specific epitopes have the potential to transform the epitope prediction field. Development and validation of these new generation of epitope prediction methods would benefit from regularly updated high-quality receptor-antigen complex datasets.

Results

To address the need for high-quality datasets to benchmark performance of these new generation of receptor-specific epitope prediction tools, a webserver called SCEptRe (Structural Complexes of Epitope-Receptor) was created. SCEptRe extracts weekly updated 3D complexes of antibody-antigen, TCR-pMHC and MHC-ligand from the Immune Epitope Database and clusters them based on antigen, receptor and epitope features to generate benchmark datasets. SCEptRe also provides annotated information such as CDR sequences and VDJ genes on the receptors. Users can generate custom datasets based by selecting thresholds for structural quality and clustering parameters (e.g. resolution, R-free factor, antigen or epitope sequence identity) based on their need.

Conclusions

SCEptRe provides weekly updated, user-customized comprehensive benchmark datasets of immune receptor-epitope structural complexes. These datasets can be used to develop and benchmark performance of receptor-specific epitope prediction tools in the future. SCEptRe is freely accessible at http://tools.iedb.org/sceptre .",2019-10-10 +31538697,Bioinformatics Tools and Workflow to Select Blood Biomarkers for Early Cancer Diagnosis: An Application to Pancreatic Cancer.,"Secretome proteomics for the discovery of cancer biomarkers holds great potential to improve early cancer diagnosis. A knowledge-based approach relying on mechanistic criteria related to the type of cancer should help to identify candidates from available ""omics"" information. With the aim of accelerating the discovery process for novel biomarkers, a set of tools is developed and made available via a Galaxy-based instance to assist end-users biologists. These implemented tools proceed by a step-by-step strategy to mine transcriptomics and proteomics databases for information relating to tissue specificity, allow the selection of proteins that are part of the secretome, and combine this information with proteomics datasets to rank the most promising candidate biomarkers for early cancer diagnosis. Using pancreatic cancer as a case study, this strategy produces a list of 24 candidate biomarkers suitable for experimental assessment by MS-based proteomics. Among these proteins, three (SYCN, REG1B, and PRSS2) were previously reported as circulating candidate biomarkers of pancreatic cancer. Here, further refinement of this list allows to prioritize 14 candidate biomarkers along with their associated proteotypic peptides for further investigation, using targeted MS-based proteomics. The bioinformatics tools and the workflow implementing this strategy for the selection of candidate biomarkers are freely accessible at http://www.proteore.org.",2019-10-10 +29529902,BioPepDB: an integrated data platform for food-derived bioactive peptides.,"Food-derived bioactive peptides play critical roles in regulating most biological processes and have considerable biological, medical and industrial importance. However, a large number of active peptides data, including sequence, function, source, commercial product information, references and other information are poorly integrated. BioPepDB is a searchable database of food-derived bioactive peptides and their related articles, including more than four thousand bioactive peptide entries. Moreover, BioPepDB provides modules of prediction and hydrolysis-simulation for discovering novel peptides. It can serve as a reference database to investigate the function of different bioactive peptides. BioPepDB is available at http://bis.zju.edu.cn/biopepdbr/ . The web page utilises Apache, PHP5 and MySQL to provide the user interface for accessing the database and predict novel peptides. The database itself is operated on a specialised server.",2018-03-12 +24475173,Non-redundant unique interface structures as templates for modeling protein interactions.,"Improvements in experimental techniques increasingly provide structural data relating to protein-protein interactions. Classification of structural details of protein-protein interactions can provide valuable insights for modeling and abstracting design principles. Here, we aim to cluster protein-protein interactions by their interface structures, and to exploit these clusters to obtain and study shared and distinct protein binding sites. We find that there are 22604 unique interface structures in the PDB. These unique interfaces, which provide a rich resource of structural data of protein-protein interactions, can be used for template-based docking. We test the specificity of these non-redundant unique interface structures by finding protein pairs which have multiple binding sites. We suggest that residues with more than 40% relative accessible surface area should be considered as surface residues in template-based docking studies. This comprehensive study of protein interface structures can serve as a resource for the community. The dataset can be accessed at http://prism.ccbb.ku.edu.tr/piface.",2014-01-27 +23057594,"DNA repair gene polymorphisms at XRCC1 (Arg194Trp, Arg280His, and Arg399Gln) in a healthy Tunisian population: interethnic variation and functional prediction.","The genetic polymorphisms in DNA repair genes might affect the repair activities of the enzymes, predisposing individuals to cancer risk. Due to these genetic variants, interethnic differences in DNA repair capacity were observed in various populations. Hence, our study aimed to determine the prevalence of three nonsynonymous single-nucleotide polymorphisms (SNPs) in an X-ray repair cross-complementation group 1 gene (XRCC1) (Arg194Trp, Arg280His, and Arg399Gln) in a healthy Tunisian population (TUN) and to compare that with HapMap ( www.hapmap.org ) populations. Also, we predicted their eventual functional effect based on the protein conservation analysis by Sorting Intolerant From Tolerant (SIFT; http://sift.jcvi.org/www/SIFT_dbSNP.html ) software. The genotypes of 154 healthy individuals were determined by the polymerase chain reaction-restriction fragment length polymorphism. Tunisians showed a relative relatedness with Caucasians (European ancestry) for Arg194Trp and Arg399Gln that may be explained by the strategic geographic location of Tunisia in the Mediterranean, allowing exchanges with European countries. However, a characteristic pattern was observed in Arg280His polymorphism, which could be explained by the high inbreeding rate in TUN. The analysis of protein conservation showed that the three amino acid substitutions were predicted as damaged. The results presented here provide the first report on XRCC1 polymorphisms about Tunisians and may establish baseline database for our future clinical and genetic studies.",2012-10-01 +31009127,Full-length mRNA sequencing and gene expression profiling reveal broad involvement of natural antisense transcript gene pairs in pepper development and response to stresses.,"Pepper is an important vegetable with great economic value and unique biological features. In the past few years, significant development has been made toward understanding the huge complex pepper genome; however, pepper functional genomics has not been well studied. To better understand the pepper gene structure and pepper gene regulation, we conducted full-length mRNA sequencing by PacBio sequencing and obtained 57 862 high-quality full-length mRNA sequences derived from 18 362 previously annotated and 5769 newly detected genes. New gene models were built that combined the full-length mRNA sequences and corrected approximately 500 fragmented gene models from previous annotations. Based on the full-length mRNA, we identified 4114 and 5880 pepper genes forming natural antisense transcript (NAT) genes in-cis and in-trans, respectively. Most of these genes accumulate small RNAs in their overlapping regions. By analyzing these NAT gene expression patterns in our transcriptome data, we identified many NAT pairs responsive to a variety of biological processes in pepper. Pepper formate dehydrogenase 1 (FDH1), which is required for R-gene-mediated disease resistance, may be regulated by nat-siRNAs and participate in a positive feedback loop in salicylic acid biosynthesis during resistance responses. Several cis-NAT pairs and subgroups of trans-NAT genes were responsive to pepper pericarp and placenta development, which may play roles in capsanthin and capsaicin biosynthesis. Using a comparative genomics approach, the evolutionary mechanisms of cis-NATs were investigated, and we found that an increase in intergenic sequences accounted for the loss of most cis-NATs, while transposon insertion contributed to the formation of most new cis-NATs. OPEN RESEARCH BADGES: This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at http://bigd.big.ac.cn/gsa Accession number, CRA001412.",2019-06-08 +29859055,Sequence-based prediction of physicochemical interactions at protein functional sites using a function-and-interaction-annotated domain profile database.,"

Background

Identifying protein functional sites (PFSs) and, particularly, the physicochemical interactions at these sites is critical to understanding protein functions and the biochemical reactions involved. Several knowledge-based methods have been developed for the prediction of PFSs; however, accurate methods for predicting the physicochemical interactions associated with PFSs are still lacking.

Results

In this paper, we present a sequence-based method for the prediction of physicochemical interactions at PFSs. The method is based on a functional site and physicochemical interaction-annotated domain profile database, called fiDPD, which was built using protein domains found in the Protein Data Bank. This method was applied to 13 target proteins from the very recent Critical Assessment of Structure Prediction (CASP10/11), and our calculations gave a Matthews correlation coefficient (MCC) value of 0.66 for PFS prediction and an 80% recall in the prediction of the associated physicochemical interactions.

Conclusions

Our results show that, in addition to the PFSs, the physical interactions at these sites are also conserved in the evolution of proteins. This work provides a valuable sequence-based tool for rational drug design and side-effect assessment. The method is freely available and can be accessed at http://202.119.249.49 .",2018-06-01 +30566615,Inter-residue interactions in alpha-helical transmembrane proteins.,"MOTIVATION:The number of available membrane protein structures has markedly increased in the last years and, in parallel, the reliability of the methods to detect transmembrane (TM) segments. In the present report, we characterized inter-residue interactions in α-helical membrane proteins using a dataset of 3462 TM helices from 430 proteins. This is by far the largest analysis published to date. RESULTS:Our analysis of residue-residue interactions in TM segments of membrane proteins shows that almost all interactions involve aliphatic residues and Phe. There is lack of polar-polar, polar-charged and charged-charged interactions except for those between Thr or Ser sidechains and the backbone carbonyl of aliphatic and Phe residues. The results are discussed in the context of the preferences of amino acids to be in the protein core or exposed to the lipid bilayer and to occupy specific positions along the TM segment. Comparison to datasets of β-barrel membrane proteins and of α-helical globular proteins unveils the specific patterns of interactions and residue composition characteristic of α-helical membrane proteins that are the clue to understanding their structure. AVAILABILITY AND IMPLEMENTATION:Results data and datasets used are available at http://lmc.uab.cat/TMalphaDB/interactions.php. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-08-01 +31236242,Improved detection of influenza A virus from blue-winged teals by sequencing directly from swab material.,"

Abstract

The greatest diversity of influenza A virus (IAV) is found in wild aquatic birds of the orders Anseriformes and Charadriiformes. In these birds, IAV replication occurs mostly in the intestinal tract. Fecal, cloacal, and/or tracheal swabs are typically collected and tested by real-time RT-PCR (rRT-PCR) and/or by virus isolation in embryonated chicken eggs in order to determine the presence of IAV. Virus isolation may impose bottlenecks that select variant populations that are different from those circulating in nature, and such bottlenecks may result in artifactual representation of subtype diversity and/or underrepresented mixed infections. The advent of next-generation sequencing (NGS) technologies provides an opportunity to explore to what extent IAV subtype diversity is affected by virus isolation in eggs. In the present work, we evaluated the advantage of sequencing by NGS directly from swab material of IAV rRT-PCR-positive swabs collected during the 2013-14 surveillance season in Guatemala and compared to results from NGS after virus isolation. The results highlight the benefit of sequencing IAV genomes directly from swabs to better understand subtype diversity and detection of alternative amino acid motifs that could otherwise escape detection using traditional methods of virus isolation. In addition, NGS sequencing data from swabs revealed reduced presence of defective interfering particles compared to virus isolates. We propose an alternative workflow in which original swab samples positive for IAV by rRT-PCR are first subjected to NGS before attempting viral isolation. This approach should speed the processing of samples and better capture natural IAV diversity.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.3h2n106.",2019-05-11 +25858286,"miRGate: a curated database of human, mouse and rat miRNA-mRNA targets.","MicroRNAs (miRNAs) are small non-coding elements involved in the post-transcriptional down-regulation of gene expression through base pairing with messenger RNAs (mRNAs). Through this mechanism, several miRNA-mRNA pairs have been described as critical in the regulation of multiple cellular processes, including early embryonic development and pathological conditions. Many of these pairs (such as miR-15 b/BCL2 in apoptosis or BART-6/BCL6 in diffuse large B-cell lymphomas) were experimentally discovered and/or computationally predicted. Available tools for target prediction are usually based on sequence matching, thermodynamics and conservation, among other approaches. Nevertheless, the main issue on miRNA-mRNA pair prediction is the little overlapping results among different prediction methods, or even with experimentally validated pairs lists, despite the fact that all rely on similar principles. To circumvent this problem, we have developed miRGate, a database containing novel computational predicted miRNA-mRNA pairs that are calculated using well-established algorithms. In addition, it includes an updated and complete dataset of sequences for both miRNA and mRNAs 3'-Untranslated region from human (including human viruses), mouse and rat, as well as experimentally validated data from four well-known databases. The underlying methodology of miRGate has been successfully applied to independent datasets providing predictions that were convincingly validated by functional assays. miRGate is an open resource available at http://mirgate.bioinfo.cnio.es. For programmatic access, we have provided a representational state transfer web service application programming interface that allows accessing the database at http://mirgate.bioinfo.cnio.es/API/ Database URL: http://mirgate.bioinfo.cnio.es",2015-04-08 +31649628,Microcins in Enterobacteriaceae: Peptide Antimicrobials in the Eco-Active Intestinal Chemosphere.,"Microcins are low-molecular-weight, ribosomally produced, highly stable, bacterial-inhibitory molecules involved in competitive, and amensalistic interactions between Enterobacteriaceae in the intestine. These interactions take place in a highly complex chemical landscape, the intestinal eco-active chemosphere, composed of chemical substances that positively or negatively influence bacterial growth, including those originated from nutrient uptake, and those produced by the action of the human or animal host and the intestinal microbiome. The contribution of bacteria results from their effect on the host generated molecules, on food and digested food, and organic substances from microbial origin, including from bacterial degradation. Here, we comprehensively review the main chemical substances present in the human intestinal chemosphere, particularly of those having inhibitory effects on microorganisms. With this background, and focusing on Enterobacteriaceae, the most relevant human pathogens from the intestinal microbiota, the microcin's history and classification, mechanisms of action, and mechanisms involved in microcin's immunity (in microcin producers) and resistance (non-producers) are reviewed. Products from the chemosphere likely modulate the ecological effects of microcin activity. Several cross-resistance mechanisms are shared by microcins, colicins, bacteriophages, and some conventional antibiotics, which are expected to produce cross-effects. Double-microcin-producing strains (such as microcins MccM and MccH47) have been successfully used for decades in the control of pathogenic gut organisms. Microcins are associated with successful gut colonization, facilitating translocation and invasion, leading to bacteremia, and urinary tract infections. In fact, Escherichia coli strains from the more invasive phylogroups (e.g., B2) are frequently microcinogenic. A publicly accessible APD3 database http://aps.unmc.edu/AP/ shows particular genes encoding microcins in 34.1% of E. coli strains (mostly MccV, MccM, MccH47, and MccI47), and much less in Shigella and Salmonella (<2%). Some 4.65% of Klebsiella pneumoniae are microcinogenic (mostly with MccE492), and even less in Enterobacter or Citrobacter (mostly MccS). The high frequency and variety of microcins in some Enterobacteriaceae indicate key ecological functions, a notion supported by their dominance in the intestinal microbiota of biosynthetic gene clusters involved in the synthesis of post-translationally modified peptide microcins.",2019-10-09 +31649674,cAb-Rep: A Database of Curated Antibody Repertoires for Exploring Antibody Diversity and Predicting Antibody Prevalence.,"The diversity of B cell receptors provides a basis for recognizing numerous pathogens. Antibody repertoire sequencing has revealed relationships between B cell receptor sequences, their diversity, and their function in infection, vaccination, and disease. However, many repertoire datasets have been deposited without annotation or quality control, limiting their utility. To accelerate investigations of B cell immunoglobulin sequence repertoires and to facilitate development of algorithms for their analysis, we constructed a comprehensive public database of curated human B cell immunoglobulin sequence repertoires, cAb-Rep (https://cab-rep.c2b2.columbia.edu), which currently includes 306 immunoglobulin repertoires from 121 human donors, who were healthy, vaccinated, or had autoimmune disease. The database contains a total of 267.9 million V(D)J heavy chain and 72.9 million VJ light chain transcripts. These transcripts are full-length or near full-length, have been annotated with gene origin, antibody isotype, somatic hypermutations, and other biological characteristics, and are stored in FASTA format to facilitate their direct use by most current repertoire-analysis programs. We describe a website to search cAb-Rep for similar antibodies along with methods for analysis of the prevalence of antibodies with specific genetic signatures, for estimation of reproducibility of somatic hypermutation patterns of interest, and for delineating frequencies of somatically introduced N-glycosylation. cAb-Rep should be useful for investigating attributes of B cell sequence repertoires, for understanding characteristics of affinity maturation, and for identifying potential barriers to the elicitation of effective neutralizing antibodies in infection or by vaccination.",2019-10-09 +29946422,Simulation and visualization of multiple KEGG pathways using BioNSi.,"Motivation: Many biologists are discouraged from using network simulation tools because these require manual, often tedious network construction. This situation calls for building new tools or extending existing ones with the ability to import biological pathways previously deposited in databases and analyze them, in order to produce novel biological insights at the pathway level. Results: We have extended a network simulation tool (BioNSi), which now allows merging of multiple pathways from the KEGG pathway database into a single, coherent network, and visualizing its properties. Furthermore, the enhanced tool enables loading experimental expression data into the network and simulating its dynamics under various biological conditions or perturbations. As a proof of concept, we tested two sets of published experimental data, one related to inflammatory bowel disease condition and the other to breast cancer treatment. We predict some of the major observations obtained following these laboratory experiments, and provide new insights that may shed additional light on these results. Tool requirements: Cytoscape 3.x, JAVA 8 Availability: The tool is freely available at http://bionsi.wix.com/bionsi, where a complete user guide and a step-by-step manual can also be found.",2017-12-11 +29450569,Pharmacogenomics in diabetes: outcomes of thiamine therapy in TRMA syndrome.,"AIMS/HYPOTHESIS:Diabetes is one of the cardinal features of thiamine-responsive megaloblastic anaemia (TRMA) syndrome. Current knowledge of this rare monogenic diabetes subtype is limited. We investigated the genotype, phenotype and response to thiamine (vitamin B1) in a cohort of individuals with TRMA-related diabetes. METHODS:We studied 32 individuals with biallelic SLC19A2 mutations identified by Sanger or next generation sequencing. Clinical details were collected through a follow-up questionnaire. RESULTS:We identified 24 different mutations, of which nine are novel. The onset of the first TRMA symptom ranged from birth to 4 years (median 6 months [interquartile range, IQR 3-24]) and median age at diabetes onset was 10 months (IQR 5-27). At presentation, three individuals had isolated diabetes and 12 had asymptomatic hyperglycaemia. Follow-up data was available for 15 individuals treated with thiamine for a median 4.7 years (IQR 3-10). Four patients were able to stop insulin and seven achieved better glycaemic control on lower insulin doses. These 11 patients were significantly younger at diabetes diagnosis (p = 0.042), at genetic testing (p = 0.01) and when starting thiamine (p = 0.007) compared with the rest of the cohort. All patients treated with thiamine became transfusion-independent and adolescents achieved normal puberty. There were no additional benefits of thiamine doses >150 mg/day and no reported side effects up to 300 mg/day. CONCLUSIONS/INTERPRETATION:In TRMA syndrome, diabetes can be asymptomatic and present before the appearance of other features. Prompt recognition is essential as early treatment with thiamine can result in improved glycaemic control, with some individuals becoming insulin-independent. DATA AVAILABILITY:SLC19A2 mutation details have been deposited in the Decipher database ( https://decipher.sanger.ac.uk/ ).",2018-02-15 +26476458,GlyTouCan 1.0--The international glycan structure repository.,"Glycans are known as the third major class of biopolymers, next to DNA and proteins. They cover the surfaces of many cells, serving as the 'face' of cells, whereby other biomolecules and viruses interact. The structure of glycans, however, differs greatly from DNA and proteins in that they are branched, as opposed to linear sequences of amino acids or nucleotides. Therefore, the storage of glycan information in databases, let alone their curation, has been a difficult problem. This has caused many duplicated efforts when integration is attempted between different databases, making an international repository for glycan structures, where unique accession numbers are assigned to every identified glycan structure, necessary. As such, an international team of developers and glycobiologists have collaborated to develop this repository, called GlyTouCan and is available at http://glytoucan.org/, to provide a centralized resource for depositing glycan structures, compositions and topologies, and to retrieve accession numbers for each of these registered entries. This will thus enable researchers to reference glycan structures simply by accession number, as opposed to by chemical structure, which has been a burden to integrate glycomics databases in the past.",2015-10-17 +22080563,CADRE: the Central Aspergillus Data REpository 2012.,"The Central Aspergillus Data REpository (CADRE; http://www.cadre-genomes.org.uk) is a public resource for genomic data extracted from species of Aspergillus. It provides an array of online tools for searching and visualising features of this significant fungal genus. CADRE arose from a need within the medical community to understand the human pathogen Aspergillus fumigatus. Due to the paucity of Aspergillus genomic resources 10 years ago, the long-term goal of this project was to collate and maintain Aspergillus genomes as they became available. Since our first release in 2004, the resource has expanded to encompass annotated sequence for eight other Aspergilli and provides much needed support to the international Aspergillus research community. Recent developments, however, in sequencing technology are creating a vast amount of genomic data and, as a result, we shortly expect a tidal wave of Aspergillus data. In preparation for this, we have upgraded the database and software suite. This not only enables better management of more complex data sets, but also improves annotation by providing access to genome comparison data and the integration of high-throughput data.",2011-11-12 +32105160,Role of TET Dioxygenases and DNA Hydroxymethylation in Bisphenols-Stimulated Proliferation of Breast Cancer Cells.,"BACKGROUND:Bisphenol A (BPA), a ubiquitous environmental endocrine disruptor targeting estrogen receptors (ERs), has been implicated in the promotion of breast cancer. Perinatal exposure of BPA could induce longitudinal alteration of DNA hydroxymethylation in imprinted loci of mouse blood cells. To date, no data has been reported on the effects of BPA on DNA hydroxymethylation in breast cells. Therefore, we asked whether BPA can induce DNA hydroxymethylation change in human breast cells. Given that dysregulated epigenetic DNA hydroxymethylation is observed in various cancers, we wondered how DNA hydroxymethylation modulates cancer development, and specifically, whether and how BPA and its analogs promote breast cancer development via DNA hydroxymethylation. OBJECTIVES:We aimed to explore the interplay of the estrogenic activity of bisphenols at environmental exposure dose levels with TET dioxygenase-catalyzed DNA hydroxymethylation and to elucidate their roles in the proliferation of ER+ breast cancer cells as stimulated by environmentally relevant bisphenols. METHODS:Human MCF-7 and T47D cell lines were used as ER-dependent breast cell proliferation models, and the human MDA-MB-231 cell line was used as an ER-independent breast cell model. These cells were treated with BPA or bisphenol S (BPS) to examine BPA/BPS-related proliferation. Ultra-high performance liquid chromatography-tandem mass spectrometry (UHPLC-MS/MS) and enzyme-linked immunosorbent assays (ELISAs) were used to detect DNA hydroxymethylation. Crispr/Cas9 and RNA interference technologies, quantitative polymerase chain reaction (qPCR), and Western blot analyses were used to evaluate the expression and function of genes. Co-immunoprecipitation (Co-IP), bisulfite sequencing-PCR (BSP), and chromatin immunoprecipitation-qPCR (ChIP-qPCR) were used to identify the interactions of target proteins. RESULTS:We measured higher proliferation in ER+ breast cancer cells treated with BPA or its replacement, BPS, accompanied by an ERα-dependent decrease in genomic DNA hydroxymethylation. The results of our overexpression, knockout, knockdown, and inhibition experiments suggested that TET2-catalyzed DNA hydroxymethylation played a suppressive role in BPA/BPS-stimulated cell proliferation. On the other hand, we observed that TET2 was negatively regulated by the activation of ERα (dimerized and phosphorylated), which was also induced by BPA/BPS binding. Instead of a direct interaction between TET2 and ERα, data of our Co-IP, BSP, and ChIP-qPCR experiments indicated that the activated ERα increased the DNA methyltransferase (DNMT)-mediated promoter methylation of TET2, leading to an inhibition of the TET2 expression and DNA hydroxymethylation. CONCLUSIONS:We identified a new feedback circuit of ERα activation-DNMT-TET2-DNA hydroxymethylation in ER+ breast cancer cells and uncovered a pivotal role of TET2-mediated DNA hydroxymethylation in modulating BPA/BPS-stimulated proliferation. Moreover, to our knowledge, we for the first time established a linkage among chemical exposure, DNA hydroxymethylation, and tumor-associated proliferation. These findings further clarify the estrogenic activity of BPA/BPS and its profound implications for the regulation of epigenetic DNA hydroxymethylation and cell proliferation. https://doi.org/10.1289/EHP5862.",2020-02-27 +25262351,"AnimalTFDB 2.0: a resource for expression, prediction and functional study of animal transcription factors.","Transcription factors (TFs) are key regulators for gene expression. Here we updated the animal TF database AnimalTFDB to version 2.0 (http://bioinfo.life.hust.edu.cn/AnimalTFDB/). Using the improved prediction pipeline, we identified 72 336 TF genes, 21 053 transcription co-factor genes and 6502 chromatin remodeling factor genes from 65 species covering main animal lineages. Besides the abundant annotations (basic information, gene model, protein functional domain, gene ontology, pathway, protein interaction, ortholog and paralog, etc.) in the previous version, we made several new features and functions in the updated version. These new features are: (i) gene expression from RNA-Seq for nine model species, (ii) gene phenotype information, (iii) multiple sequence alignment of TF DNA-binding domains, and the weblogo and phylogenetic tree based on the alignment, (iv) a TF prediction server to identify new TFs from input sequences and (v) a BLAST server to search against TFs in AnimalTFDB. A new nice web interface was designed for AnimalTFDB 2.0 allowing users to browse and search all data in the database. We aim to maintain the AnimalTFDB as a solid resource for TF identification and studies of transcription regulation and comparative genomics.",2014-09-27 +31124003,QuaPra: Efficient transcript assembly and quantification using quadratic programming with Apriori algorithm.,"RNA sequencing (RNA-seq) has greatly facilitated the exploring of transcriptome landscape for diverse organisms. However, transcriptome reconstruction is still challenging due to various limitations of current tools and sequencing technologies. Here, we introduce an efficient tool, QuaPra (Quadratic Programming combined with Apriori), for accurate transcriptome assembly and quantification. QuaPra could detect at least 26.5% more low abundance (0.1-1 FPKM) transcripts with over 2.1% increase of sensitivity and precision on simulated data compared to other currently popular tools. Moreover, around one-quarter more known transcripts were correctly assembled by QuaPra than other assemblers on real sequencing data. QuaPra is freely available at https://doi.org/www.megabionet.org/QuaPra/ .",2019-05-22 +31780447,Exercise treatment effect modifiers in persistent low back pain: an individual participant data meta-analysis of 3514 participants from 27 randomised controlled trials.,"BACKGROUND:Low back pain is one of the leading causes of disability worldwide. Exercise therapy is widely recommended to treat persistent non-specific low back pain. While evidence suggests exercise is, on average, moderately effective, there remains uncertainty about which individuals might benefit the most from exercise. METHODS:In parallel with a Cochrane review update, we requested individual participant data (IPD) from high-quality randomised clinical trials of adults with our two primary outcomes of interest, pain and functional limitations, and calculated global recovery. We compiled a master data set including baseline participant characteristics, exercise and comparison characteristics, and outcomes at short-term, moderate-term and long-term follow-up. We conducted descriptive analyses and one-stage IPD meta-analysis using multilevel mixed-effects regression of the overall treatment effect and prespecified potential treatment effect modifiers. RESULTS:We received IPD for 27 trials (3514 participants). For studies included in this analysis, compared with no treatment/usual care, exercise therapy on average reduced pain (mean effect/100 (95% CI) -10.7 (-14.1 to -7.4)), a result compatible with a clinically important 20% smallest worthwhile effect. Exercise therapy reduced functional limitations with a clinically important 23% improvement (mean effect/100 (95% CI) -10.2 (-13.2 to -7.3)) at short-term follow-up. Not having heavy physical demands at work and medication use for low back pain were potential treatment effect modifiers-these were associated with superior exercise outcomes relative to non-exercise comparisons. Lower body mass index was also associated with better outcomes in exercise compared with no treatment/usual care. This study was limited by inconsistent availability and measurement of participant characteristics. CONCLUSIONS:This study provides potentially useful information to help treat patients and design future studies of exercise interventions that are better matched to specific subgroups. PROTOCOL PUBLICATION: https://doi.org/10.1186/2046-4053-1-64.",2019-11-28 +31753619,Quantitative Structural Interpretation of Protein Crosslinks.,"Chemical crosslinking, combined with mass spectrometry analysis, is a key source of information for characterizing the structure of large protein assemblies, in the context of molecular modeling. In most approaches, the interpretation is limited to simple spatial restraints, neglecting physico-chemical interactions between the crosslinker and the protein and their flexibility. Here we present a method, named NRGXL (new realistic grid for crosslinks), which models the flexibility of the crosslinker and the linked side-chains, by explicitly sampling many conformations. Also, the method can efficiently deal with overall protein dynamics. This method creates a physical model of the crosslinker and associated energy. A classifier based on it outperforms others, based on Euclidean distance or solvent-accessible distance and its efficiency makes it usable for validating 3D models from crosslinking data. NRGXL is freely available as a web server at: https://nrgxl.pasteur.fr.",2019-11-18 +31939734,"CNApp, a tool for the quantification of copy number alterations and integrative analysis revealing clinical implications. ","Somatic copy number alterations (CNAs) are a hallmark of cancer, but their role in tumorigenesis and clinical relevance remain largely unclear. Here, we developed CNApp, a web-based tool that allows a comprehensive exploration of CNAs by using purity-corrected segmented data from multiple genomic platforms. CNApp generates genome-wide profiles, computes CNA scores for broad, focal and global CNA burdens, and uses machine learning-based predictions to classify samples. We applied CNApp to the TCGA pan-cancer dataset of 10,635 genomes showing that CNAs classify cancer types according to their tissue-of-origin, and that each cancer type shows specific ranges of broad and focal CNA scores. Moreover, CNApp reproduces recurrent CNAs in hepatocellular carcinoma and predicts colon cancer molecular subtypes and microsatellite instability based on broad CNA scores and discrete genomic imbalances. In summary, CNApp facilitates CNA-driven research by providing a unique framework to identify relevant clinical implications. CNApp is hosted at https://tools.idibaps.org/CNApp/.",2020-01-15 +31425100,LassoNet: Deep Lasso-Selection of 3D Point Clouds. ,"Selection is a fundamental task in exploratory analysis and visualization of 3D point clouds. Prior researches on selection methods were developed mainly based on heuristics such as local point density, thus limiting their applicability in general data. Specific challenges root in the great variabilities implied by point clouds (e.g., dense vs. sparse), viewpoint (e.g., occluded vs. non-occluded), and lasso (e.g., small vs. large). In this work, we introduce LassoNet, a new deep neural network for lasso selection of 3D point clouds, attempting to learn a latent mapping from viewpoint and lasso to point cloud regions. To achieve this, we couple user-target points with viewpoint and lasso information through 3D coordinate transform and naive selection, and improve the method scalability via an intention filtering and farthest point sampling. A hierarchical network is trained using a dataset with over 30K lasso-selection records on two different point cloud data. We conduct a formal user study to compare LassoNet with two state-of-the-art lasso-selection methods. The evaluations confirm that our approach improves the selection effectiveness and efficiency across different combinations of 3D point clouds, viewpoints, and lasso selections. Project Website: https://lassonet.github.io.",2019-08-19 +24137000,Updates of the HbVar database of human hemoglobin variants and thalassemia mutations.,"HbVar (http://globin.bx.psu.edu/hbvar) is one of the oldest and most appreciated locus-specific databases launched in 2001 by a multi-center academic effort to provide timely information on the genomic alterations leading to hemoglobin variants and all types of thalassemia and hemoglobinopathies. Database records include extensive phenotypic descriptions, biochemical and hematological effects, associated pathology and ethnic occurrence, accompanied by mutation frequencies and references. Here, we report updates to >600 HbVar entries, inclusion of population-specific data for 28 populations and 27 ethnic groups for α-, and β-thalassemias and additional querying options in the HbVar query page. HbVar content was also inter-connected with two other established genetic databases, namely FINDbase (http://www.findbase.org) and Leiden Open-Access Variation database (http://www.lovd.nl), which allows comparative data querying and analysis. HbVar data content has contributed to the realization of two collaborative projects to identify genomic variants that lie on different globin paralogs. Most importantly, HbVar data content has contributed to demonstrate the microattribution concept in practice. These updates significantly enriched the database content and querying potential, enhanced the database profile and data quality and broadened the inter-relation of HbVar with other databases, which should increase the already high impact of this resource to the globin and genetic database community.",2013-10-16 +32044343,SSizer: Determining the Sample Sufficiency for Comparative Biological Study.,"Comparative biological studies typically require plenty of samples to ensure full representation of the given problem. A frequently-encountered question is how many samples are sufficient for a particular study. This question is traditionally assessed using the statistical power, but it alone may not guarantee the full and reproducible discovery of features truly discriminating biological groups. Two new types of statistical criteria have thus been introduced to assess sample sufficiency from different perspectives by considering diagnostic accuracy and robustness. Due to the complementary nature of these criteria, a comprehensive evaluation based on all criteria is necessary for achieving a more accurate assessment. However, no such tool is available yet. Herein, an online tool SSizer (https://idrblab.org/ssizer/) was developed and validated to enable the assessment of the sample sufficiency for a user-input biological dataset, and three statistical criteria were adopted to achieve comprehensive and collective assessment. A sample simulation based on a user-input dataset was performed to expand the data and then determine the sample size required by the particular study. In sum, SSizer is unique for its ability to comprehensively evaluate whether the sample size is sufficient and determine the required number of samples for the user-input dataset, which, therefore, facilitates the comparative and OMIC-based biological studies.",2020-02-07 +31730202,Cross-lingual semantic annotation of biomedical literature: experiments in Spanish and English.,"

Motivation

Biomedical literature is one of the most relevant sources of information for knowledge mining in the field of Bioinformatics. In spite of English being the most widely addressed language in the field; in recent years, there has been a growing interest from the natural language processing community in dealing with languages other than English. However, the availability of language resources and tools for appropriate treatment of non-English texts is lacking behind. Our research is concerned with the semantic annotation of biomedical texts in the Spanish language, which can be considered an under-resourced language where biomedical text processing is concerned.

Results

We have carried out experiments to assess the effectiveness of several methods for the automatic annotation of biomedical texts in Spanish. One approach is based on the linguistic analysis of Spanish texts and their annotation using an information retrieval and concept disambiguation approach. A second method takes advantage of a Spanish-English machine translation process to annotate English documents and transfer annotations back to Spanish. A third method takes advantage of the combination of both procedures. Our evaluation shows that a combined system has competitive advantages over the two individual procedures.

Availability and implementation

UMLSMapper (https://snlt.vicomtech.org/umlsmapper) and the annotation transfer tool (http://scientmin.taln.upf.edu/anntransfer/) are freely available for research purposes as web services and/or demos.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +23331499,SignaLink 2 - a signaling pathway resource with multi-layered regulatory networks.,"

Background

Signaling networks in eukaryotes are made up of upstream and downstream subnetworks. The upstream subnetwork contains the intertwined network of signaling pathways, while the downstream regulatory part contains transcription factors and their binding sites on the DNA as well as microRNAs and their mRNA targets. Currently, most signaling and regulatory databases contain only a subsection of this network, making comprehensive analyses highly time-consuming and dependent on specific data handling expertise. The need for detailed mapping of signaling systems is also supported by the fact that several drug development failures were caused by undiscovered cross-talk or regulatory effects of drug targets. We previously created a uniformly curated signaling pathway resource, SignaLink, to facilitate the analysis of pathway cross-talks. Here, we present SignaLink 2, which significantly extends the coverage and applications of its predecessor.

Description

We developed a novel concept to integrate and utilize different subsections (i.e., layers) of the signaling network. The multi-layered (onion-like) database structure is made up of signaling pathways, their pathway regulators (e.g., scaffold and endocytotic proteins) and modifier enzymes (e.g., phosphatases, ubiquitin ligases), as well as transcriptional and post-transcriptional regulators of all of these components. The user-friendly website allows the interactive exploration of how each signaling protein is regulated. The customizable download page enables the analysis of any user-specified part of the signaling network. Compared to other signaling resources, distinctive features of SignaLink 2 are the following: 1) it involves experimental data not only from humans but from two invertebrate model organisms, C. elegans and D. melanogaster; 2) combines manual curation with large-scale datasets; 3) provides confidence scores for each interaction; 4) operates a customizable download page with multiple file formats (e.g., BioPAX, Cytoscape, SBML). Non-profit users can access SignaLink 2 free of charge at http://SignaLink.org.

Conclusions

With SignaLink 2 as a single resource, users can effectively analyze signaling pathways, scaffold proteins, modifier enzymes, transcription factors and miRNAs that are important in the regulation of signaling processes. This integrated resource allows the systems-level examination of how cross-talks and signaling flow are regulated, as well as provide data for cross-species comparisons and drug discovery analyses.",2013-01-18 +31272357,MolOpt: A Web Server for Drug Design using Bioisosteric Transformation.,"

Background

Bioisosteric replacement is widely used in drug design for lead optimization. However, the identification of a suitable bioisosteric group is not an easy task.

Methods

In this work, we present MolOpt, a web server for in silico drug design using bioisosteric transformation. Potential bioisosteric transformation rules were derived from data mining, deep generative machine learning and similarity comparison. MolOpt tries to assist the medicinal chemist in his/her search for what to make next.

Results and discussion

By replacing molecular substructures with similar chemical groups, MolOpt automatically generates lists of analogues. MolOpt also evaluates forty important pharmacokinetic and toxic properties for each newly designed molecule. The transformed analogues can be assessed for possible future study.

Conclusion

MolOpt is useful for the identification of suitable lead optimization ideas. The MolOpt Server is freely available for use on the web at http://xundrug.cn/molopt.",2020-01-01 +26786711,Tissue-specific patterns of allelically-skewed DNA methylation.,"While DNA methylation is usually thought to be symmetrical across both alleles, there are some notable exceptions. Genomic imprinting and X chromosome inactivation are two well-studied sources of allele-specific methylation (ASM), but recent research has indicated a more complex pattern in which genotypic variation can be associated with allelically-skewed DNA methylation in cis. Given the known heterogeneity of DNA methylation across tissues and cell types we explored inter- and intra-individual variation in ASM across several regions of the human brain and whole blood from multiple individuals. Consistent with previous studies, we find widespread ASM with > 4% of the ∼220,000 loci interrogated showing evidence of allelically-skewed DNA methylation. We identify ASM flanking known imprinted regions, and show that ASM sites are enriched in DNase I hypersensitivity sites and often located in an extended genomic context of intermediate DNA methylation. We also detect examples of genotype-driven ASM, some of which are tissue-specific. These findings contribute to our understanding of the nature of differential DNA methylation across tissues and have important implications for genetic studies of complex disease. As a resource to the community, ASM patterns across each of the tissues studied are available in a searchable online database: http://epigenetics.essex.ac.uk/ASMBrainBlood.",2016-01-19 +32452706,"Daily Temperature and Bacillary Dysentery: Estimated Effects, Attributable Risks, and Future Disease Burden in 316 Chinese Cities.","

Background

Bacillary dysentery (BD) remains a significant public health issue, especially in developing countries. Evidence assessing the risk of BD from temperature is limited, particularly from national studies including multiple locations with different climatic characteristics.

Objectives

We estimated the effect of temperature on BD across China, assessed heterogeneity and attributable risks across cities and regions, and projected the future risk of BD under climate change.

Methods

Daily BD surveillance and meteorological data over 2014-2016 were collected from the Chinese Center for Disease Control and Prevention and the China Meteorology Administration, respectively. A two-stage statistical model was used to estimate city-specific temperature-BD relationships that were pooled to derive regional and national estimates. The risk of BD attributable to temperature was estimated, and the future burden of BD attributable to temperature was projected under different climate change scenarios.

Results

A positive linear relationship for the pooled effect was estimated at the national level. Subgroup analyses indicate that the estimated effect of temperature on BD was similar by age (≤5y or >5y) and gender. At baseline, estimated attributable risks for BD due to average daily mean temperatures above the 50th percentile were highest for the Inner Mongolia (16%), Northeast China (14%), and Northern China (13%). Most of the individual cities in the same regions and most of the cities in the Northwest, Southern, and Southwest regions, had high attributable risks (≥5%). The Northern, Northeast, Inner Mongolia, Northwest, and Southern China regions were identified as high risk for future BD, with estimated increases by the 2090s compared with baseline of 20% (95% confidence interval: 11%, 27%), 15% (6%, 20%), 15% (-1%, 22%), 12% (1%, 19%), and 11% (5%, 15%), respectively, under Representative Concentration Pathway 8.5.

Conclusions

The positive association between temperature and BD in different climatic regions of China, and the projection for increased risk due to climate change, support efforts to mitigate future risks. https://doi.org/10.1289/EHP5779.",2020-05-26 +30181897,Draft genome of Dugesia japonica provides insights into conserved regulatory elements of the brain restriction gene nou-darake in planarians.,"

Background

Planarians are non-parasitic Platyhelminthes (flatworms) famous for their regeneration ability and for having a well-organized brain. Dugesia japonica is a typical planarian species that is widely distributed in the East Asia. Extensive cellular and molecular experimental methods have been developed to identify the functions of thousands of genes in this species, making this planarian a good experimental model for regeneration biology and neurobiology. However, no genome-level information is available for D. japonica, and few gene regulatory networks have been identified thus far.

Results

To obtain whole-genome information on this species and to study its gene regulatory networks, we extracted genomic DNA from 200 planarians derived from a laboratory-bred asexual clonal strain, and sequenced 476 Gb of data by second-generation sequencing. Kmer frequency graphing and fosmid sequence analysis indicated a complex genome that would be difficult to assemble using second-generation sequencing short reads. To address this challenge, we developed a new assembly strategy and improved the de novo genome assembly, producing a 1.56 Gb genome sequence (DjGenome ver1.0, including 202,925 scaffolds and N50 length 27,741 bp) that covers 99.4% of all 19,543 genes in the assembled transcriptome, although the genome is fragmented as 80% of the genome consists of repeated sequences (genomic frequency ≥ 2). By genome comparison between two planarian genera, we identified conserved non-coding elements (CNEs), which are indicative of gene regulatory elements. Transgenic experiments using Xenopus laevis indicated that one of the CNEs in the Djndk gene may be a regulatory element, suggesting that the regulation of the ndk gene and the brain formation mechanism may be conserved between vertebrates and invertebrates.

Conclusion

This draft genome and CNE analysis will contribute to resolving gene regulatory networks in planarians. The genome database is available at: http://www.planarian.jp.",2018-08-29 +30105604,Annotation and detection of drug effects in text for pharmacovigilance.,"Pharmacovigilance (PV) databases record the benefits and risks of different drugs, as a means to ensure their safe and effective use. Creating and maintaining such resources can be complex, since a particular medication may have divergent effects in different individuals, due to specific patient characteristics and/or interactions with other drugs being administered. Textual information from various sources can provide important evidence to curators of PV databases about the usage and effects of drug targets in different medical subjects. However, the efficient identification of relevant evidence can be challenging, due to the increasing volume of textual data. Text mining (TM) techniques can support curators by automatically detecting complex information, such as interactions between drugs, diseases and adverse effects. This semantic information supports the quick identification of documents containing information of interest (e.g., the different types of patients in which a given adverse drug reaction has been observed to occur). TM tools are typically adapted to different domains by applying machine learning methods to corpora that are manually labelled by domain experts using annotation guidelines to ensure consistency. We present a semantically annotated corpus of 597 MEDLINE abstracts, PHAEDRA, encoding rich information on drug effects and their interactions, whose quality is assured through the use of detailed annotation guidelines and the demonstration of high levels of inter-annotator agreement (e.g., 92.6% F-Score for identifying named entities and 78.4% F-Score for identifying complex events, when relaxed matching criteria are applied). To our knowledge, the corpus is unique in the domain of PV, according to the level of detail of its annotations. To illustrate the utility of the corpus, we have trained TM tools based on its rich labels to recognise drug effects in text automatically. The corpus and annotation guidelines are available at: http://www.nactem.ac.uk/PHAEDRA/ .",2018-08-13 +30967756,MEG Source Imaging and Group Analysis Using VBMEG.,"Variational Bayesian Multimodal EncephaloGraphy (VBMEG) is a MATLAB toolbox that estimates distributed source currents from magnetoencephalography (MEG)/electroencephalography (EEG) data by integrating functional MRI (fMRI) (https://vbmeg.atr.jp/). VBMEG also estimates whole-brain connectome dynamics using anatomical connectivity derived from a diffusion MRI (dMRI). In this paper, we introduce the VBMEG toolbox and demonstrate its usefulness. By collaborating with VBMEG's tutorial page (https://vbmeg.atr.jp/docs/v2/static/vbmeg2_tutorial_neuromag.html), we show its full pipeline using an open dataset recorded by Wakeman and Henson (2015). We import the MEG data and preprocess them to estimate the source currents. From the estimated source currents, we perform a group analysis and examine the differences of current amplitudes between conditions by controlling the false discovery rate (FDR), which yields results consistent with previous studies. We highlight VBMEG's characteristics by comparing these results with those obtained by other source imaging methods: weighted minimum norm estimate (wMNE), dynamic statistical parametric mapping (dSPM), and linearly constrained minimum variance (LCMV) beamformer. We also estimate source currents from the EEG data and the whole-brain connectome dynamics from the MEG data and dMRI. The observed results indicate the reliability, characteristics, and usefulness of VBMEG.",2019-03-22 +24784381,Provision of the DDSM mammography metadata in an accessible format.,"

Purpose

The Digital Database for Screening Mammography (DDSM) is the largest publicly available resource for mammographic image analysis research and has been used extensively in the past for computer assisted diagnosis (CADx) studies. However, the database has not been searchable for a specific kind of lesion, which rendered the case selection process in past studies often times arbitrary. Therefore, the authors want to provide the complete metadata of the DDSM in an accessible format.

Methods

The authors semiautomatically transformed the data available athttp://marathon.csee.usf.edu/Mammography/Database.html into table format. The 1769 cases (914 from cancer volumes, 855 from benign volumes) comprise 1220 mass lesions (578 benign, 642 malignant) and 859 calcifications (433 benign, 426 malignant). Additionally, 694 normal cases were processed to allow for matching according to age and breast density.

Results

The authors provide the entire DDSM metadata (for benign, malignant, and normal cases) as tab-delimited text files[see supplementary material at http://dx.doi.org/10.1118/1.4870379E-MPHYA6-41-006405 for DDSM metadata].

Conclusions

The data provided make the case selection for future studies using the DDSM reproducible. Furthermore, it may serve as a validation dataset for CADx approaches using the BI-RADS lexicon.",2014-05-01 +32517646,CapsNet-SSP: multilane capsule network for predicting human saliva-secretory proteins.,"

Background

Compared with disease biomarkers in blood and urine, biomarkers in saliva have distinct advantages in clinical tests, as they can be conveniently examined through noninvasive sample collection. Therefore, identifying human saliva-secretory proteins and further detecting protein biomarkers in saliva have significant value in clinical medicine. There are only a few methods for predicting saliva-secretory proteins based on conventional machine learning algorithms, and all are highly dependent on annotated protein features. Unlike conventional machine learning algorithms, deep learning algorithms can automatically learn feature representations from input data and thus hold promise for predicting saliva-secretory proteins.

Results

We present a novel end-to-end deep learning model based on multilane capsule network (CapsNet) with differently sized convolution kernels to identify saliva-secretory proteins only from sequence information. The proposed model CapsNet-SSP outperforms existing methods based on conventional machine learning algorithms. Furthermore, the model performs better than other state-of-the-art deep learning architectures mostly used to analyze biological sequences. In addition, we further validate the effectiveness of CapsNet-SSP by comparison with human saliva-secretory proteins from existing studies and known salivary protein biomarkers of cancer.

Conclusions

The main contributions of this study are as follows: (1) an end-to-end model based on CapsNet is proposed to identify saliva-secretory proteins from the sequence information; (2) the proposed model achieves better performance and outperforms existing models; and (3) the saliva-secretory proteins predicted by our model are statistically significant compared with existing cancer biomarkers in saliva. In addition, a web server of CapsNet-SSP is developed for saliva-secretory protein identification, and it can be accessed at the following URL: http://www.csbg-jlu.info/CapsNet-SSP/. We believe that our model and web server will be useful for biomedical researchers who are interested in finding salivary protein biomarkers, especially when they have identified candidate proteins for analyzing diseased tissues near or distal to salivary glands using transcriptome or proteomics.",2020-06-09 +32277022,Clinical Evaluation of the cobas SARS-CoV-2 Test and a Diagnostic Platform Switch during 48 Hours in the Midst of the COVID-19 Pandemic. ,"Laboratories are currently witnessing extraordinary demand globally for sampling devices, reagents, consumables, and diagnostic instruments needed for timely diagnosis of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) infection. To meet diagnostic needs as the pandemic grows, the U.S. Food and Drug Administration (FDA) recently granted several commercial SARS-CoV-2 tests Emergency Use Authorization (EUA), but manufacturer-independent evaluation data are scarce. We performed the first manufacturer-independent evaluation of the fully automated sample-to-result two-target test cobas 6800 SARS-CoV-2 (cobas) (Roche Molecular Systems, Branchburg, NJ), which received U.S. FDA EUA on 12 March 2020. The comparator was a standardized 3-h SARS-CoV-2 protocol, consisting of RNA extraction using an automated portable instrument, followed by a two-target reverse transcription real-time PCR (RT-PCR), which our laboratory has routinely used since January 2020 [V. M. Corman, O. Landt, M. Kaiser, R. Molenkamp, et al., Euro Surveill 25(3):pii=2000045, 2020, https://doi.org/10.2807/1560-7917.ES.2020.25.3.2000045]. cobas and the comparator showed overall agreement of 98.1% and a kappa value of 0.95 on an in-house validation panel consisting of 217 well-characterized retrospective samples. Immediate prospective head-to-head comparative evaluation followed on 502 samples, and the diagnostic approaches showed overall agreement of 99.6% and a kappa value of 0.98. A good correlation (r2 = 0.96) between cycle threshold values for SARS-CoV-2-specific targets obtained by cobas and the comparator was observed. Our results showed that cobas is a reliable assay for qualitative detection of SARS-CoV-2 in nasopharyngeal swab samples collected in the Universal Transport Medium System (UTM-RT) (Copan, Brescia, Italy). Under the extraordinary circumstances that laboratories are facing worldwide, a safe diagnostic platform switch is feasible in only 48 h and in the midst of the COVID-19 pandemic if carefully planned and executed.",2020-05-26 +32266474,Conjunctive reward-place coding properties of dorsal distal CA1 hippocampus cells.,"Autonomous motivated spatial navigation in animals or robots requires the association between spatial location and value. Hippocampal place cells are involved in goal-directed spatial navigation and the consolidation of spatial memories. Recently, Gauthier and Tank (Neuron 99(1):179-193, 2018. https://doi.org/10.1016/j.neuron.2018.06.008) have identified a subpopulation of hippocampal cells selectively activated in relation to rewarded goals. However, the relationship between these cells' spiking activity and goal representation remains elusive. We analyzed data from experiments in which rats underwent five consecutive tasks in which reward locations and spatial context were manipulated. We found CA1 populations with properties continuously ranging from place cells to reward cells. Specifically, we found typical place cells insensitive to reward locations, reward cells that only fired at correct rewarded feeders in each task regardless of context, and ""hybrid cells"" that responded to spatial locations and change of reward locations. Reward cells responded mostly to the reward delivery rather than to its expectation. In addition, we found a small group of neurons that transitioned between place and reward cells properties within the 5-task session. We conclude that some pyramidal cells (if not all) integrate both spatial and reward inputs to various degrees. These results provide insights into the integrative coding properties of CA1 pyramidal cells, focusing on their abilities to carry both spatial and reward information in a mixed and plastic manner. This conjunctive coding property prompts a re-thinking of current computational models of spatial navigation in which hippocampal spatial and subcortical value representations are independent.",2020-04-07 +29715310,FusionHub: A unified web platform for annotation and visualization of gene fusion events in human cancer.,"Gene fusion is a chromosomal rearrangement event which plays a significant role in cancer due to the oncogenic potential of the chimeric protein generated through fusions. At present many databases are available in public domain which provides detailed information about known gene fusion events and their functional role. Existing gene fusion detection tools, based on analysis of transcriptomics data usually report a large number of fusion genes as potential candidates, which could be either known or novel or false positives. Manual annotation of these putative genes is indeed time-consuming. We have developed a web platform FusionHub, which acts as integrated search engine interfacing various fusion gene databases and simplifies large scale annotation of fusion genes in a seamless way. In addition, FusionHub provides three ways of visualizing fusion events: circular view, domain architecture view and network view. Design of potential siRNA molecules through ensemble method is another utility integrated in FusionHub that could aid in siRNA-based targeted therapy. FusionHub is freely available at https://fusionhub.persistent.co.in.",2018-05-01 +29284497,Intergenic disease-associated regions are abundant in novel transcripts.,"

Background

Genotyping of large populations through genome-wide association studies (GWAS) has successfully identified many genomic variants associated with traits or disease risk. Unexpectedly, a large proportion of GWAS single nucleotide polymorphisms (SNPs) and associated haplotype blocks are in intronic and intergenic regions, hindering their functional evaluation. While some of these risk-susceptibility regions encompass cis-regulatory sites, their transcriptional potential has never been systematically explored.

Results

To detect rare tissue-specific expression, we employed the transcript-enrichment method CaptureSeq on 21 human tissues to identify 1775 multi-exonic transcripts from 561 intronic and intergenic haploblocks associated with 392 traits and diseases, covering 73.9 Mb (2.2%) of the human genome. We show that a large proportion (85%) of disease-associated haploblocks express novel multi-exonic non-coding transcripts that are tissue-specific and enriched for GWAS SNPs as well as epigenetic markers of active transcription and enhancer activity. Similarly, we captured transcriptomes from 13 melanomas, targeting nine melanoma-associated haploblocks, and characterized 31 novel melanoma-specific transcripts that include fusion proteins, novel exons and non-coding RNAs, one-third of which showed allelically imbalanced expression.

Conclusions

This resource of previously unreported transcripts in disease-associated regions ( http://gwas-captureseq.dingerlab.org ) should provide an important starting point for the translational community in search of novel biomarkers, disease mechanisms, and drug targets.",2017-12-28 +31238003,"Behavioral Assessment of Hearing in 2- to 7-Year-Old Children: Evaluation of a Two-Interval, Observer-Based Procedure Using Conditioned Play-Based Responses.","Purpose It is challenging to collect reliable behavioral data from toddlers and preschoolers. Consequently, we have significant gaps in our understanding of how auditory development unfolds during this time period. One method that appears to be promising is an observer-based procedure that uses conditioned, play-based responses (Bonino & Leibold, 2017). In order to evaluate the quality of data obtained with this method, this study presented a suprathreshold signal to determine the number of trials 2- to 7-year-old children could complete, as well as the associated hit rate and observer confidence. Method Participants were 23 children (2-7 years old). Children were taught to perform a play-based motor response when they detected the 1000-Hz warble tone signal (at 30 dB SPL). An observer evaluated children's behavior using a 2-interval, 2-alternative testing paradigm. Testing was terminated after 100 trials or earlier, if signs of habituation were observed. Results Data were successfully collected from 22 of the 23 children. Of the 22 children, all but 1 child completed 100 trials. Overall hit rate was high (0.88-1.0; M = 0.94) and improved with listener age. Hit rate was stable across the test session. Strong agreement was seen between the correctness of the response and the observer's confidence in the judgment. Conclusion Results of this study confirm that the 2-interval, observer-based procedure described in this article is a powerful tool for measuring detection and discrimination abilities in young children. Future research will (a) evaluate coder reliability and (b) examine stability of performance across a test session when the signal intensity is manipulated. Supplemental Material https://doi.org/10.23641/asha.8309273.",2019-06-25 +26169799,Correlating bladder cancer risk genes with their targeting microRNAs using MMiRNA-Tar.,"The Cancer Genome Atlas (TCGA) (http://cancergenome.nih.gov) is a valuable data resource focused on an increasing number of well-characterized cancer genomes. In part, TCGA provides detailed information about cancer-dependent gene expression changes, including changes in the expression of transcription-regulating microRNAs. We developed a web interface tool MMiRNA-Tar (http://bioinf1.indstate.edu/MMiRNA-Tar) that can calculate and plot the correlation of expression for mRNA-microRNA pairs across samples or over a time course for a list of pairs under different prediction confidence cutoff criteria. Prediction confidence was established by requiring that the proposed mRNA-microRNA pair appears in at least one of three target prediction databases: TargetProfiler, TargetScan, or miRanda. We have tested our MMiRNA-Tar tool through analyzing 53 tumor and 11 normal samples of bladder urothelial carcinoma (BLCA) datasets obtained from TCGA and identified 204 microRNAs. These microRNAs were correlated with the mRNAs of five previously-reported bladder cancer risk genes and these selected pairs exhibited correlations in opposite direction between the tumor and normal samples based on the customized cutoff criterion of prediction. Furthermore, we have identified additional 496 genes (830 pairs) potentially targeted by 79 significant microRNAs out of 204 using three cutoff criteria, i.e., false discovery rate (FDR)<0.1, opposite correlation coefficient between the tumor and normal samples, and predicted by at least one of three target prediction databases. Therefore, MMiRNA-Tar provides researchers a convenient tool to visualize the co-relationship between microRNAs and mRNAs and to predict their targeting relationship. We believe that correlating expression profiles for microRNAs and mRNAs offers a complementary approach for elucidating their interactions.",2015-06-01 +26568329,MAPanalyzer: a novel online tool for analyzing microtubule-associated proteins. ,"The wide functional impacts of microtubules are unleashed and controlled by a battery of microtubule-associated proteins (MAPs). Specialists in the field appreciate the diversity of known MAPs and propel the identifications of novel MAPs. By contrast, there is neither specific database to record known MAPs, nor MAP predictor that can facilitate the discovery of potential MAPs. We here report the establishment of a MAP-centered online analysis tool MAPanalyzer, which consists of a MAP database and a MAP predictor. In the database, a core MAP dataset, which is fully manually curated from the literature, is further enriched by MAP information collected via automated pipeline. The core dataset, on the other hand, enables the building of a novel MAP predictor which combines specialized machine learning classifiers and the BLAST homology searching tool. Benchmarks on the curated testing dataset and the Arabidopsis thaliana whole genome dataset have shown that the proposed predictor outperforms not only its own components (i.e. the machine learning classifiers and BLAST), but also another popular homology searching tool, PSI-BLAST. Therefore, MAPanalyzer will serve as a promising computational resource for the investigations of MAPs. Database URL: http://systbio.cau.edu.cn/mappred/.",2015-11-13 +,A novel automated method for the adjustment of ionic metal concentrations in soil extracts,A novel method is proposed for correcting metal fraction concentrations remaining within the sediment containing the solid residue of the sequentially extracted fraction. An easy‐to‐use Excel spreadsheet was prepared to assist adjustment of concentration in each fraction and demonstrate the difference between adjusted and non‐adjusted metal concentration of the fraction. The demonstration of a calculation of the modified BCR protocol data showed that this difference may reach 10–15% of the result value. The spreadsheet is available to download at: http://departments.agri.huji.ac.il/zabam/Rosen_Chen_Fraction_Adjustment_Formulae.xls,2016-10-01 +31582461,Survival and Kidney Outcomes of Children with an Early Diagnosis of Posterior Urethral Valves.,"BACKGROUND AND OBJECTIVES:Posterior urethral valve is the most common cause of bladder outlet obstruction in infants. We aimed to describe the rate and timing of kidney-related and survival outcomes for children diagnosed with posterior urethral valves in United States children's hospitals using the Pediatric Health Information System database. DESIGN, SETTING, PARTICIPANTS, & MEASUREMENTS:This retrospective cohort study included children hospitalized between January 1, 1992 and December 31, 2006, who were in their first year of life, had a diagnosis of congenital urethral stenosis, and underwent endoscopic valve ablation or urinary drainage intervention, or died. Records were searched up to December 31, 2018 for kidney-related mortality, placement of a dialysis catheter, and kidney transplantation. Cox regression analysis was used to identify risk factors, and Kaplan-Meier survival analysis used to determine time-to-event probability. Subgroup survival analysis was performed with outcomes stratified by the strongest identified risk factor. RESULTS:Included were 685 children hospitalized at a median age of 7 (interquartile range, 1-37) days. Thirty four children (5%) died, over half during their initial hospitalization. Pulmonary hypoplasia was the strongest risk factor for death (hazard ratio, 7.5; 95% confidence interval [95% CI], 3.3 to 17.0). Ten-year survival probability was 94%. Fifty-nine children (9%) underwent one or more dialysis catheter placements. Children with kidney dysplasia had over four-fold risk of dialysis catheter placement (hazard ratio, 4.6; 95% CI, 2.6 to 8.1). Thirty-six (7%) children underwent kidney transplant at a median age of 3 (interquartile range, 2-8) years. Kidney dysplasia had a nine-fold higher risk of kidney transplant (hazard ratio, 9.5; 95% CI, 4.1 to 22.2). CONCLUSIONS:Patients in this multicenter cohort with posterior urethral valves had a 5% risk of death, and were most likely to die during their initial hospitalization. Risk of death was higher with a diagnosis of pulmonary hypoplasia. Kidney dysplasia was associated with a higher risk of need for dialysis/transplant. PODCAST:This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2019_10_03_CJN04350419.mp3.",2019-10-03 +31581866,DNA methylation profiles of genes associated with angiogenesis in the samples of placenta in pregnancies complicated by intrauterine growth restriction.,"

Background

Impairment in placental angiogenesis is blamed for the etiopathogenesis of intrauterine growth restriction (IUGR).

Aim

To assess the genes related to angiogenesis in placental biopsies of pregnancies complicated by IUGR that could be aberrantly methylated and adversely affect placental angiogenesis.

Methods

The methylation profiles of soluble fms-like tyrosine kinase-1 (sFLT-1), vascular endothelial growth factor (VEGF), and the placental growth factor (PIGF) were evaluated using Illumina MiSeq™ System in placental biopsies from term IUGR pregnancies without preeclampsia (n  =  18) and healthy controls (n  =  17). DNA was isolated from samples of tissue collected from the fetal side of the placenta. In the targeted regions, we have identified 30, 24, and 29 CpG islands (CpGi) within sFLT-1, VEGF and PIGF genes, respectively. CpGi which are most methylated in the promoter regions of three genes were selected for the study from the database http://www.ensembl.org.

Result(s)

IUGR fetuses had significantly lower placental and fetal birth weight than controls. The promoter of sFLT-1 at three CpGi and VEGF at six CpGi were the regions with significant methylation differences between IUGR and control placentas. sFLT-1 was hypermethylated at 265 and 352 CpGi; however, hypermethylation was lower in IUGR group compared to control group at this position. sFLT-1 was hypomethylated at 456 CpGi in IUGR group and hypermethylated at the same region in control group. VEGF was hypomethylated at 668, 703, and 710 CpGi in control and IUGR groups; however, hypomethylation at these positions was significantly higher in control group compared to IUGR. 776, 845, and 863 CpGi of VEGF promoter were significantly hypermethylated in IUGR group whereas hypomethylated in control group. The methylation profile of PIGF did not differ between the groups. After adjustment for the factors known to affect fetal birth weight, DNA methylation of VEGF 668 CpGi had a significant negative association with fetal birth weight in the control and the IUGR group and a positive association with IUGR pregnancies.

Conclusion(s)

Our results do not support the hypothesis that altered DNA methylation in the placental angiogenic genes is a major mechanism generally involved in IUGR. Only a specific region (at 668 CpGi) corresponding to the promoter of VEGF may serve as an epigenetic marker of IUGR and may be involved in the mechanism of IUGR. Large sample-sized studies are needed to understand the effects of DNA methylation on placental gene function and how they might influence fetal growth.",2019-10-03 +29315358,HTT-DB: new features and updates. ,"Horizontal Transfer (HT) of genetic material between species is a common phenomenon among Bacteria and Archaea species and several databases are available for information retrieval and data mining. However, little attention has been given to this phenomenon among eukaryotic species mainly due to the lower proportion of these events. In the last years, a vertiginous amount of new HT events involving eukaryotic species was reported in the literature, highlighting the need of a common repository to keep the scientific community up to date and describe overall trends. Recently, we published the first HT database focused on HT of transposable elements among eukaryotes: the Horizontal Transposon Transfer DataBase (http://lpa.saogabriel.unipampa.edu.br: 8080/httdatabase/). Here, we present new features and updates of this unique database: (i) its expansion to include virus-host exchange of genetic material, which we called Horizontal Virus Transfer (HVT) and (ii) the availability of a web server for HT detection, where we implemented the online version of vertical and horizontal inheritance consistence analysis (VHICA), an R package developed for HT detection. These improvements will help researchers to navigate through known HVT cases, take data-informed decision and export figures based on keywords searches. Moreover, the availability of the VHICA as an online tool will make this software easily reachable even for researchers with no or little computation knowledge as well as foster our capability to detect new HT events in a wide variety of taxa. (Database URL: http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase/).",2018-01-01 +30082508,Predicting data quality in biological X-ray solution scattering.,"Biological small-angle X-ray solution scattering (BioSAXS) is now widely used to gain information on biomolecules in the solution state. Often, however, it is not obvious in advance whether a particular sample will scatter strongly enough to give useful data to draw conclusions under practically achievable solution conditions. Conformational changes that appear to be large may not always produce scattering curves that are distinguishable from each other at realistic concentrations and exposure times. Emerging technologies such as time-resolved SAXS (TR-SAXS) pose additional challenges owing to small beams and short sample path lengths. Beamline optics vary in brilliance and degree of background scatter, and major upgrades and improvements to sources promise to expand the reach of these methods. Computations are developed to estimate BioSAXS sample intensity at a more detailed level than previous approaches, taking into account flux, energy, sample thickness, window material, instrumental background, detector efficiency, solution conditions and other parameters. The results are validated with calibrated experiments using standard proteins on four different beamlines with various fluxes, energies and configurations. The ability of BioSAXS to statistically distinguish a variety of conformational movements under continuous-flow time-resolved conditions is then computed on a set of matched structure pairs drawn from the Database of Macromolecular Motions (http://molmovdb.org). The feasibility of experiments is ranked according to sample consumption, a quantity that varies by over two orders of magnitude for the set of structures. In addition to photon flux, the calculations suggest that window scattering and choice of wavelength are also important factors given the short sample path lengths common in such setups.",2018-07-24 +32175316,RF-PseU: A Random Forest Predictor for RNA Pseudouridine Sites.,"One of the ubiquitous chemical modifications in RNA, pseudouridine modification is crucial for various cellular biological and physiological processes. To gain more insight into the functional mechanisms involved, it is of fundamental importance to precisely identify pseudouridine sites in RNA. Several useful machine learning approaches have become available recently, with the increasing progress of next-generation sequencing technology; however, existing methods cannot predict sites with high accuracy. Thus, a more accurate predictor is required. In this study, a random forest-based predictor named RF-PseU is proposed for prediction of pseudouridylation sites. To optimize feature representation and obtain a better model, the light gradient boosting machine algorithm and incremental feature selection strategy were used to select the optimum feature space vector for training the random forest model RF-PseU. Compared with previous state-of-the-art predictors, the results on the same benchmark data sets of three species demonstrate that RF-PseU performs better overall. The integrated average leave-one-out cross-validation and independent testing accuracy scores were 71.4% and 74.7%, respectively, representing increments of 3.63% and 4.77% versus the best existing predictor. Moreover, the final RF-PseU model for prediction was built on leave-one-out cross-validation and provides a reliable and robust tool for identifying pseudouridine sites. A web server with a user-friendly interface is accessible at http://148.70.81.170:10228/rfpseu.",2020-02-26 +26855883,CressInt: a user-friendly web resource for genome-scale exploration of gene regulation in Arabidopsis thaliana.,"The thale cress Arabidopsis thaliana is a powerful model organism for studying a wide variety of biological processes. Recent advances in sequencing technology have resulted in a wealth of information describing numerous aspects of A. thaliana genome function. However, there is a relative paucity of computational systems for efficiently and effectively using these data to create testable hypotheses. We present CressInt, a user-friendly web resource for exploring gene regulatory mechanisms in A. thaliana on a genomic scale. The CressInt system incorporates a variety of genome-wide data types relevant to gene regulation, including transcription factor (TF) binding site models, ChIP-seq, DNase-seq, eQTLs, and GWAS. We demonstrate the utility of CressInt by showing how the system can be used to (1) Identify TFs binding to the promoter of a gene of interest; (2) identify genetic variants that are likely to impact TF binding based on a ChIP-seq dataset; and (3) identify specific TFs whose binding might be impacted by phenotype-associated variants. CressInt is freely available at http://cressint.cchmc.org.",2015-09-01 +30010786,Broom: application for non-redundant storage of high throughput sequencing data.,"

Motivation

The data generation capabilities of high throughput sequencing (HTS) instruments have exponentially increased over the last few years, while the cost of sequencing has dramatically decreased allowing this technology to become widely used in biomedical studies. For small labs and individual researchers, however, storage and transfer of large amounts of HTS data present a significant challenge. The recent trends in increased sequencing quality and genome coverage can be used to reconsider HTS data storage strategies.

Results

We present Broom, a stand-alone application designed to select and store only high-quality sequencing reads at extremely high compression rates. Written in C++, the application accepts single and paired-end reads in FASTQ and FASTA formats and decompresses data in FASTA format.

Availability and implementation

C++ code available at https://scsb.utmb.edu/labgroups/fofanov/broom.asp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +24939193,'RetinoGenetics': a comprehensive mutation database for genes related to inherited retinal degeneration. ,"Inherited retinal degeneration (IRD), a leading cause of human blindness worldwide, is exceptionally heterogeneous with clinical heterogeneity and genetic variety. During the past decades, tremendous efforts have been made to explore the complex heterogeneity, and massive mutations have been identified in different genes underlying IRD with the significant advancement of sequencing technology. In this study, we developed a comprehensive database, 'RetinoGenetics', which contains informative knowledge about all known IRD-related genes and mutations for IRD. 'RetinoGenetics' currently contains 4270 mutations in 186 genes, with detailed information associated with 164 phenotypes from 934 publications and various types of functional annotations. Then extensive annotations were performed to each gene using various resources, including Gene Ontology, KEGG pathways, protein-protein interaction, mutational annotations and gene-disease network. Furthermore, by using the search functions, convenient browsing ways and intuitive graphical displays, 'RetinoGenetics' could serve as a valuable resource for unveiling the genetic basis of IRD. Taken together, 'RetinoGenetics' is an integrative, informative and updatable resource for IRD-related genetic predispositions. Database URL: http://www.retinogenetics.org/.",2014-06-17 +31510703,Statistical compression of protein sequences and inference of marginal probability landscapes over competing alignments using finite state models and Dirichlet priors.,"The information criterion of minimum message length (MML) provides a powerful statistical framework for inductive reasoning from observed data. We apply MML to the problem of protein sequence comparison using finite state models with Dirichlet distributions. The resulting framework allows us to supersede the ad hoc cost functions commonly used in the field, by systematically addressing the problem of arbitrariness in alignment parameters, and the disconnect between substitution scores and gap costs. Furthermore, our framework enables the generation of marginal probability landscapes over all possible alignment hypotheses, with potential to facilitate the users to simultaneously rationalize and assess competing alignment relationships between protein sequences, beyond simply reporting a single (best) alignment. We demonstrate the performance of our program on benchmarks containing distantly related protein sequences.

Availability and implementation

The open-source program supporting this work is available from: http://lcb.infotech.monash.edu.au/seqmmligner.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +31294057,"Genome/transcriptome collection of plethora of economically important, previously unexplored organisms from India and abroad.","Genome and transcriptome sequencing data are extremely useful resources for researchers in carrying out biological experiments that involves cloning and characterizing genes. We are presenting here genome sequence data from different clades of life including photosynthetic prokaryotes; oomycetes pathogens; probiotic bacteria; endophytic yeasts and filamentous fungus and pathogenic protozoa Leishmania donovani. In addition, we are also presenting paired control and treated stress response transcriptomes of Cyanobacteria growing in extreme conditions. The Cyanobacterial species that are included in this dataset were isolated from extreme conditions including desiccated monuments, hot springs and saline archipelagos. The probiotic Lactobacillus paracasei was isolated from Indian sub-continent. The Kala azar causing protozoan Leishmania donovani, whose early infectious stage is also included in this dataset. The endophyte Arthrinium malaysianum was isolated as a contaminant has significant bio-remediation property. Our collaborators have isolated endophyte Rhodotorula mucilaginosa JGTA1 from Jaduguda mines, West Bengal, India infested with Uranium. Our collaborators have isolated a heterozygous diploid oomycetes pathogen, Phytophthora ramorum causing sudden oak death in CA, USA coast is also part of the data. These dataset presents a unique heterogeneous collection from various sources that are analyzed using ""Genome Annotator Light (GAL): A Docker-based package for genome analysis and visualization"" (Panda et al., 2019) and are presented in a web site automatically created by GAL at http://www.eumicrobedb.org/cglab.",2019-06-05 +31800736,STUDY OF LIPID BIOMARKERS OF PATIENTS WITH POLYPS AND COLORECTAL CÂNCER.,"BACKGROUND:Colorectal cancer (CRC) is one of the leading causes of cancer worldwide. Early diagnostic methods using serum biomarkers are required. The study of omics, most recently lipidomics, has the purpose of analyzing lipids for a better understanding of human lipidoma. The evolution of mass spectrometry methods, such as MALDI-MS technology, has enabled the detection and identification of a wide variety of lipids with great potential to open new avenues for predictive and preventive medicine. OBJECTIVE:To determine the lipid profile of patients with colorectal cancer and polyps. METHODS:Patients with stage I-III CRC, adenomatous polyps and individuals with normal colonoscopy were selected. All patients underwent peripheral blood collection for lipid extraction. The samples were analyzed by MALDI-MS technique for lipid identification. STATISTICAL ANALYSIS:Univariate and multivariate (principal component analysis [PCA] and discriminant analysis by partial least squares [PLS-DA]) analyses workflows were applied to the dataset, using MetaboAnalyst 3.0 software. The ions were identified according to the class of lipids using the online database Lipid Maps (http://www.lipidmaps.org). RESULTS:We included 88 individuals, 40 with CRC, 12 with polyps and 32 controls. Boxplot analysis showed eight VIP ions in the three groups. Differences were observed between the cancer and control groups, as well as between cancer and polyp, but not between polyps and control. The polyketide (810.1) was the lipid represented in cancer and overrepresented in polyp and control. Among the patients with CRC we observed differences between lipids with lymph node invasion (N1-2) compared to those without lymph node invasion (N). CONCLUSION:Possible lipid biomarkers were identified among cancer patients compared to control and polyp groups. The polyketide lipid (810.1) was the best biomarker to differentiate the cancer group from control and polyp. We found no difference between the biomarkers in the polyp group in relation to the control.",2019-10-01 +31566403,"""When facial expressions do and do not signal minds: The role of face inversion, expression dynamism, and emotion type"": Correction to Krumhuber et al. (2019).","Reports an error in ""When facial expressions do and do not signal minds: The role of face inversion, expression dynamism, and emotion type"" by Eva G. Krumhuber, Yu-Kun Lai, Paul L. Rosin and Kurt Hugenberg (Emotion, 2019[Jun], Vol 19[4], 746-750). In the original article, the supplemental materials link (http://dx.doi.org/10.1037/emo0000475.supp) was missing from the first page of the article. The supplemental materials are now available online, and the online version of this article has been corrected. (The following abstract of the original article appeared in record 2018-37624-001.) Recent research has linked facial expressions to mind perception. Specifically, Bowling and Banissy (2017) found that ambiguous doll-human morphs were judged as more likely to have a mind when smiling. Herein, we investigate 3 key potential boundary conditions of this ""expression-to-mind"" effect. First, we demonstrate that face inversion impairs the ability of happy expressions to signal mindful states in static faces; however, inversion does not disrupt this effect for dynamic displays of emotion. Finally, we demonstrate that not all emotions have equivalent effects. Whereas happy faces generate more mind ascription compared to neutral faces, we find that expressions of disgust actually generate less mind ascription than those of happiness. (PsycINFO Database Record (c) 2019 APA, all rights reserved).",2019-10-01 +31626750,Properties of Stress Granule and P-Body Proteomes.,"Stress granules and P-bodies are cytosolic biomolecular condensates that dynamically form by the phase separation of RNAs and proteins. They participate in translational control and buffer the proteome. Upon stress, global translation halts and mRNAs bound to the translational machinery and other proteins coalesce to form stress granules (SGs). Similarly, translationally stalled mRNAs devoid of translation initiation factors shuttle to P-bodies (PBs). Here, we review the cumulative progress made in defining the protein components that associate with mammalian SGs and PBs. We discuss the composition of SG and PB proteomes, supported by a new user-friendly database (http://rnagranuledb.lunenfeld.ca/) that curates current literature evidence for genes or proteins associated with SGs or PBs. As previously observed, the SG and PB proteomes are biased toward intrinsically disordered regions and have a high propensity to contain primary sequence features favoring phase separation. We also provide an outlook on how the various components of SGs and PBs may cooperate to organize and form membraneless organelles.",2019-10-01 +31212715,Approaching the Communication Constraints of Ethereum-Based Decentralized Applications. ,"Those working on Blockchain technologies have described several new innovative directions and novel services in the Internet of things (IoT), including decentralized trust, trusted and verifiable execution of smart contracts, and machine-to-machine communications and automation that reach beyond the mere exchange of data. However, applying blockchain principles in the IoT is a challenge due to the constraints of the end devices. Because of fierce cost pressure, the hardware resources in these devices are usually reduced to the minimum necessary for operation. To achieve the high coverage needed, low bitrate mobile or wireless technologies are frequently applied, so the communication is often constrained, too. These constraints make the implementation of blockchain nodes for IoT as standalone end-devices impractical or even impossible. We therefore investigated possible design approaches to decentralized applications based on the Ethereum blockchain for the IoT. We proposed and evaluated three application architectures differing in communication, computation, storage, and security requirements. In a pilot setup we measured and analyzed the data traffic needed to run the blockchain clients and their applications. We found out that with the appropriate designs and the remote server architecture we can strongly reduce the storage and communication requirements imposed on devices, with predictable security implications. Periodic device traffic is reduced to 2400 B/s (HTTP) and 170 B/s (Websocket) from about 18 kB/s in the standalone-device full client architecture. A notification about a captured blockchain event and the corresponding verification resulted in about 2000 B of data. A transaction sent from the application to the client resulted in an about 500 B (HTTP) and 300 B message (Websocket). The key store location, which affects the serialization of a transaction, only had a small influence on the transaction-related data. Raw transaction messages were 45 B larger than when passing the JSON transaction objects. These findings provide directions for fog/cloud IoT application designers to avoid unrealistic expectations imposed upon their IoT devices and blockchain technologies, and enable them to select the appropriate system design according to the intended use case and system constraints. However, for very low bit-rate communication networks, new communication protocols for device to blockchain-client need to be considered.",2019-06-11 +30865284,PleioNet: a web-based visualization tool for exploring pleiotropy across complex traits.,"

Summary

Pleiotropy plays an important role in furthering our understanding of the shared genetic architecture of different human diseases and traits. However, exploring and visualizing pleiotropic information with currently publicly available tools is limiting and challenging. To aid researchers in constructing and digesting pleiotropic networks, we present PleioNet, a web-based visualization tool for exploring this information across human diseases and traits. This program provides an intuitive and interactive web interface that seamlessly integrates large database queries with visualizations that enable users to quickly explore complex high-dimensional pleiotropic information. PleioNet works on all modern computer and mobile web browsers, making pleiotropic information readily available to a broad range of researchers and clinicians with diverse technical backgrounds. We expect that PleioNet will be an important tool for studying the underlying pleiotropic connections among human diseases and traits.

Availability and implementation

PleioNet is hosted on Google cloud and freely available at http://www.pleionet.com/.",2019-10-01 +30825370,AutoDock Bias: improving binding mode prediction and virtual screening using known protein-ligand interactions.,"SUMMARY:The performance of docking calculations can be improved by tuning parameters for the system of interest, e.g. biasing the results towards the formation of relevant protein-ligand interactions, such as known ligand pharmacophore or interaction sites derived from cosolvent molecular dynamics. AutoDock Bias is a straightforward and easy to use script-based method that allows the introduction of different types of user-defined biases for fine-tuning AutoDock4 docking calculations. AVAILABILITY AND IMPLEMENTATION:AutoDock Bias is distributed with MGLTools (since version 1.5.7), and freely available on the web at http://ccsb.scripps.edu/mgltools/ or http://autodockbias.wordpress.com. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-10-01 +26699919,ECGene: A Literature-Based Knowledgebase of Endometrial Cancer Genes.,"Endometrial cancer (EC) ranks as the sixth common cancer for women worldwide. To better distinguish cancer subtypes and identify effective early diagnostic biomarkers, we need improved understanding of the biological mechanisms associated with EC dysregulated genes. Although there is a wealth of clinical and molecular information relevant to EC in the literature, there has been no systematic summary of EC-implicated genes. In this study, we developed a literature-based database ECGene (Endometrial Cancer Gene database) with comprehensive annotations. ECGene features manual curation of 414 genes from thousands of publications, results from eight EC gene expression datasets, precomputation of coexpressed long noncoding RNAs, and an EC-implicated gene interactome. In the current release, we generated and comprehensively annotated a list of 458 EC-implicated genes. We found the top-ranked EC-implicated genes are frequently mutated in The Cancer Genome Atlas (TCGA) tumor samples. Furthermore, systematic analysis of coexpressed lncRNAs provided insight into the important roles of lncRNA in EC development. ECGene has a user-friendly Web interface and is freely available at http://ecgene.bioinfo-minzhao.org/. As the first literature-based online resource for EC, ECGene serves as a useful gateway for researchers to explore EC genetics.",2016-01-13 +30984248,Integrative Differential Expression Analysis for Multiple EXperiments (IDEAMEX): A Web Server Tool for Integrated RNA-Seq Data Analysis.,"The current DNA sequencing technologies and their high-throughput yield, allowed the thrive of genomic and transcriptomic experiments but it also have generated big data problem. Due to this exponential growth of sequencing data, also the complexity of managing, processing and interpreting it in order to generate results, has raised. Therefore, the demand of easy-to-use friendly software and websites to run bioinformatic tools is imminent. In particular, RNA-Seq and differential expression analysis have become a popular and useful method to evaluate the genetic expression change in any organism. However, many scientists struggle with the data analysis since most of the available tools are implemented in a UNIX-based environment. Therefore, we have developed the web server IDEAMEX (Integrative Differential Expression Analysis for Multiple EXperiments). The IDEAMEX pipeline needs a raw count table for as many desired replicates and conditions, allowing the user to select which conditions will be compared, instead of doing all-vs.-all comparisons. The whole process consists of three main steps (1) Data Analysis: that allows a preliminary analysis for quality control based on the data distribution per sample, using different types of graphs; (2) Differential expression: performs the differential expression analysis with or without batch effect error awareness, using the bioconductor packages, NOISeq, limma-Voom, DESeq2 and edgeR, and generate reports for each method; (3) Result integration: the obtained results the integrated results are reported using different graphical outputs such as correlograms, heatmaps, Venn diagrams and text lists. Our server allows an easy and friendly visualization for results, providing an easy interaction during the analysis process, as well as error tracking and debugging by providing output log files. The server is currently available and can be accessed at http://www.uusmb.unam.mx/ideamex/ where the documentation and example input files are provided. We consider that this web server can help other researchers with no previous bioinformatic knowledge, to perform their analyses in a simple manner.",2019-03-29 +24767249,Genome-wide Mycobacterium tuberculosis variation (GMTV) database: a new tool for integrating sequence variations and epidemiology.,"

Background

Tuberculosis (TB) poses a worldwide threat due to advancing multidrug-resistant strains and deadly co-infections with Human immunodeficiency virus. Today large amounts of Mycobacterium tuberculosis whole genome sequencing data are being assessed broadly and yet there exists no comprehensive online resource that connects M. tuberculosis genome variants with geographic origin, with drug resistance or with clinical outcome.

Description

Here we describe a broadly inclusive unifying Genome-wide Mycobacterium tuberculosis Variation (GMTV) database, (http://mtb.dobzhanskycenter.org) that catalogues genome variations of M. tuberculosis strains collected across Russia. GMTV contains a broad spectrum of data derived from different sources and related to M. tuberculosis molecular biology, epidemiology, TB clinical outcome, year and place of isolation, drug resistance profiles and displays the variants across the genome using a dedicated genome browser. GMTV database, which includes 1084 genomes and over 69,000 SNP or Indel variants, can be queried about M. tuberculosis genome variation and putative associations with drug resistance, geographical origin, and clinical stages and outcomes.

Conclusions

Implementation of GMTV tracks the pattern of changes of M. tuberculosis strains in different geographical areas, facilitates disease gene discoveries associated with drug resistance or different clinical sequelae, and automates comparative genomic analyses among M. tuberculosis strains.",2014-04-25 +31580794,Genus-wide Yersinia core-genome multilocus sequence typing for species identification and strain characterization. ,"The genus Yersinia comprises species that differ widely in their pathogenic potential and public-health significance. Yersinia pestis is responsible for plague, while Yersinia enterocolitica is a prominent enteropathogen. Strains within some species, including Y. enterocolitica, also vary in their pathogenic properties. Phenotypic identification of Yersinia species is time-consuming, labour-intensive and may lead to incorrect identifications. Here, we developed a method to automatically identify and subtype all Yersinia isolates from their genomic sequence. A phylogenetic analysis of Yersinia isolates based on a core subset of 500 shared genes clearly demarcated all existing Yersinia species and uncovered novel, yet undefined Yersinia taxa. An automated taxonomic assignment procedure was developed using species-specific thresholds based on core-genome multilocus sequence typing (cgMLST). The performance of this method was assessed on 1843 isolates prospectively collected by the French National Surveillance System and analysed in parallel using phenotypic reference methods, leading to nearly complete (1814; 98.4 %) agreement at species and infra-specific (biotype and serotype) levels. For 29 isolates, incorrect phenotypic assignments resulted from atypical biochemical characteristics or lack of phenotypic resolution. To provide an identification tool, a database of cgMLST profiles and reference taxonomic information has been made publicly accessible (https://bigsdb.pasteur.fr/yersinia). Genomic sequencing-based identification and subtyping of any Yersinia is a powerful and reliable novel approach to define the pathogenic potential of isolates of this medically important genus.",2019-09-30 +31571406,"Molecular characterization of Mycobacterium bovis infection in cattle and buffalo in Amazon Region, Brazil.","The aim of this study was to characterize Mycobacterium bovis from cattle and buffalo tissue samples, from two Brazilian states, and to analyse their genetic diversity by spoligotyping. Tissue samples from tuberculosis suspect animals, 57 in Amazonas State (12 cattle and 45 buffaloes) and six from Pará State (5 cattle and one buffalo) from slaughterhouses under State Veterinary Inspection, were isolated in culture medium Stonebrink. The positive cultures were confirmed by PCR and analysed by the spoligotyping technique and the patterns (spoligotypes) were identified and compared at the Mycobacterium bovis Spoligotype Database (http://www.mbovis.org/). There was bacterial growth in 44 (69.8%) of the tissues of the 63 animals, of which PCR for region of differentiation 4 identified 35/44 (79.5%) as Mycobacterium bovis. Six different spoligotypes were identified among the 35 Mycobacterium bovis isolates, of which SB0295, SB1869, SB0121 and SB1800 had already been described in Brazil, and SB0822 and SB1608 had not been described. The most frequent spoligotype in this study (SB0822) had already been described in buffaloes in Colombia, a neighbouring country of Amazonas state. The other identified spoligotypes were also described in other South American countries, such as Argentina and Venezuela, and described in the Brazilian states of Rio Grande do Sul, Santa Catarina, São Paulo, Minas Gerais, Mato Grosso do Sul, Mato Grosso and Goiás, indicating an active movement of Mycobacterium bovis strains within Brazil.",2019-09-30 +32551881,"Fine Particulate Matter and Poor Cognitive Function among Chinese Older Adults: Evidence from a Community-Based, 12-Year Prospective Cohort Study.","

Background

Research on the relationship between long-term exposure to particulate matter with aerodynamic diameter ≤2.5μm (PM2.5) and poor cognitive function is lacking in developing countries, especially in highly polluted areas.

Objectives

We evaluated associations of long-term exposure to PM2.5 with poor cognitive function in a diverse, national sample of older adults in China.

Methods

This analysis included data on 13,324 older adults (5,879 who were 65-79 years of age, 3,052 who were 80-89 years of age, 2,634 who were 90-99 years of age, and 1,759 who were ≥100 years of age) with normal cognitive function at baseline from March 2002 to September 2014, with 64,648 person-years of follow-up. We used a geographic information system analysis to estimate the annual average satellite-derived PM2.5 concentration for the geocoded location of the participants' baseline residences. Poor cognitive function was defined as a score of less than 18 on the Chinese version of the Mini-Mental State Examination (MMSE). Competing risk models were performed to explore the association of PM2.5 with poor cognitive function.

Results

Each 10-μg/m3 increase in PM2.5 was associated with a 5.1% increased risk of poor cognitive function [adjusted hazard ratio (HR): 1.051; 95% confidence interval (CI): 1.023, 1.079]. Compared to the lowest quartile of PM2.5 (<41.4 μg/m3), adjusted HR values were 1.20 (95% CI: 1.09, 1.33), 1.27 (95% CI: 1.15, 1.41), and 1.21 (95% CI: 1.09, 1.34) for the second (≥41.4-50.3 ug/m3), third (≥50.3-60.7μg/m3), and fourth (≥60.7 μg/m3) quartiles of PM2.5, respectively (p for trend <0.001). Subgroup analyses suggested stronger associations between PM2.5 and poor cognitive impairment in men than women. The association was positive in the 65- to 79- and ≥100-y age group but not significant and positive in the other two age groups with similar results.

Conclusion

PM2.5 was identified as a risk factor for poor cognitive function in Chinese older adults. Improving air quality may reduce the future population burden of poor cognitive function, especially in areas with high air pollution. https://doi.org/10.1289/EHP5304.",2020-06-18 +31380097,A review of protocols for the experimental release of kelp (Laminariales) zoospores.,"

Abstract

Kelps (order Laminariales) are foundation species in temperate and arctic seas globally, but they are in decline in many places. Laminarian kelp have an alternation of generations and this poses challenges for experimental studies due to the difficulties in achieving zoospore release and gametophyte growth. Here, we review and synthesize the protocols that have been used to induce zoospore release in kelps to identify commonalities and provide guidance on best practices. We found 171 papers, where zoospore release was induced in four kelp families from 35 different ecoregions. The most commonly treated family was Laminariaceae, followed by Lessoniaceae and the most studied ecoregion was Central Chile, followed by the Southern California Bight. Zoospore release generally involved three steps: a pretreatment which included cleaning of the reproductive tissue to eliminate epiphytic organisms, followed by desiccation of the tissue, and finally a postdesiccation immersion of the reproductive material in a seawater medium for zoospore release. Despite these commonalities, there was a high degree of variation in the detail within each of these steps, even among studies within genera and from the same ecoregions. This suggests either that zoospore release may be relatively insensitive across the Laminariales or that little methods optimization has been undertaken. We suggest that greater attention to standardization of protocols and reporting of methodology and optimization would improve comparisons of kelp zoospore release across species and locations and facilitate a broader understanding of this key, but understudied life history stage.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.0kh1f8j.",2019-06-20 +29335333,Predictors of In-Hospital Death After Aneurysmal Subarachnoid Hemorrhage: Analysis of a Nationwide Database (Swiss SOS [Swiss Study on Aneurysmal Subarachnoid Hemorrhage]).,"To identify predictors of in-hospital mortality in patients with aneurysmal subarachnoid hemorrhage and to estimate their impact.Retrospective analysis of prospective data from a nationwide multicenter registry on all aneurysmal subarachnoid hemorrhage cases admitted to a tertiary neurosurgical department in Switzerland (Swiss SOS [Swiss Study on Aneurysmal Subarachnoid Hemorrhage]; 2009-2015). Both clinical and radiological independent predictors of in-hospital mortality were identified, and their effect size was determined by calculating adjusted odds ratios (aORs) using multivariate logistic regression. Survival was displayed using Kaplan-Meier curves.Data of n=1866 aneurysmal subarachnoid hemorrhage patients in the Swiss SOS database were available. In-hospital mortality was 20% (n=373). In n=197 patients (10.6%), active treatment was discontinued after hospital admission (no aneurysm occlusion attempted), and this cohort was excluded from analysis of the main statistical model. In the remaining n=1669 patients, the rate of in-hospital mortality was 13.9% (n=232). Strong independent predictors of in-hospital mortality were rebleeding (aOR, 7.69; 95% confidence interval, 3.00-19.71; P<0.001), cerebral infarction attributable to delayed cerebral ischemia (aOR, 3.66; 95% confidence interval, 1.94-6.89; P<0.001), intraventricular hemorrhage (aOR, 2.65; 95% confidence interval, 1.38-5.09; P=0.003), and new infarction post-treatment (aOR, 2.57; 95% confidence interval, 1.43-4.62; P=0.002).Several-and among them modifiable-factors seem to be associated with in-hospital mortality after aneurysmal subarachnoid hemorrhage. Our data suggest that strategies aiming to reduce the risk of rebleeding are most promising in patients where active treatment is initially pursued.URL: http://www.clinicaltrials.gov. Unique identifier: NCT03245866.",2018-01-15 +33816867,"Citation.js: a format-independent, modular bibliography tool for the browser and command line.","

Background

Given the vast number of standards and formats for bibliographical data, any program working with bibliographies and citations has to be able to interpret such data. This paper describes the development of Citation.js (https://citation.js.org/), a tool to parse and format according to those standards. The program follows modern guidelines for software in general and JavaScript in specific, such as version control, source code analysis, integration testing and semantic versioning.

Results

The result is an extensible tool that has already seen adaption in a variety of sources and use cases: as part of a server-side page generator of a publishing platform, as part of a local extensible document generator, and as part of an in-browser converter of extracted references. Use cases range from transforming a list of DOIs or Wikidata identifiers into a BibTeX file on the command line, to displaying RIS references on a webpage with added Altmetric badges to generating ""How to cite this"" sections on a blog. The accuracy of conversions is currently 27% for properties and 60% for types on average and a typical initialization takes 120 ms in browsers and 1 s with Node.js on the command line.

Conclusions

Citation.js is a library supporting various formats of bibliographic information in a broad selection of use cases and environments. Given the support for plugins, more formats can be added with relative ease.",2019-08-12 +28322240,International spinal cord injury endocrine and metabolic extended data set.,"

Objective

The objective of this study was to develop the International Spinal Cord Injury (SCI) Endocrine and Metabolic Extended Data Set (ISCIEMEDS) within the framework of the International SCI Data Sets that would facilitate consistent collection and reporting of endocrine and metabolic findings in the SCI population.

Setting

This study was conducted in an international setting.

Methods

The ISCIEMEDS was developed by a working group. The initial ISCIEMEDS was revised based on suggestions from members of the International SCI Data Sets Committee, the International Spinal Cord Society (ISCoS) Executive and Scientific Committees, American Spinal Injury Association (ASIA) Board, other interested organizations, societies and individual reviewers. The data set was posted for two months on ISCoS and ASIA websites for comments. Variable names were standardized, and a suggested database structure for the ISCIEMEDS was provided by the Common Data Elements (CDEs) project at the National Institute on Neurological Disorders and Stroke (NINDS) of the US National Institute of Health (NIH), and are available at https://commondataelements.ninds.nih.gov/SCI.aspx#tab=Data_Standards.

Results

The final ISCIEMEDS contains questions on the endocrine and metabolic conditions related to SCI. Because the information may be collected at any time, the date of data collection is important to determine the time after SCI. ISCIEMEDS includes information on carbohydrate metabolism (6 variables), calcium and bone metabolism (12 variables), thyroid function (9 variables), adrenal function (2 variables), gonadal function (7 variables), pituitary function (6 variables), sympathetic nervous system function (1 variable) and renin-aldosterone axis function (2 variables).

Conclusion

The complete instructions for data collection and the data sheet itself are freely available on the website of ISCoS (http://www.iscos.org.uk/international-sci-data-sets).",2017-03-21 +31593887,DistAA: Database of amino acid distances in proteins and web application for statistical review of distances.,"Three-dimensional structure of a protein chain is determined by its amino acid interactions. One approach to the analysis of amino acid interactions refers to geometric distances of amino acid pairs in polypeptide chains. For a detailed analysis of the amino acid distances, the database with three types of amino acid distances in a set of chains was created. Web application Distances of Amino Acids has also been developed to enable scientists to explore interactions of amino acids with different properties based on distances stored in the database. Web application calculates and displays descriptive statistics and graphs of amino acid pair distances with selected properties, such as geometric distance threshold, corresponding SCOP class of proteins and secondary structure types. In addition to the analysis of pre-calculated distances stored in the database, the amino acid distances of a single protein with the specified PDB identifier can also be analyzed. The web application is available at http://andromeda.matf.bg.ac.rs/aadis_dynamic/.",2019-09-28 +31509535,Baseline human gut microbiota profile in healthy people and standard reporting template.,"A comprehensive knowledge of the types and ratios of microbes that inhabit the healthy human gut is necessary before any kind of pre-clinical or clinical study can be performed that attempts to alter the microbiome to treat a condition or improve therapy outcome. To address this need we present an innovative scalable comprehensive analysis workflow, a healthy human reference microbiome list and abundance profile (GutFeelingKB), and a novel Fecal Biome Population Report (FecalBiome) with clinical applicability. GutFeelingKB provides a list of 157 organisms (8 phyla, 18 classes, 23 orders, 38 families, 59 genera and 109 species) that forms the baseline biome and therefore can be used as healthy controls for studies related to dysbiosis. This list can be expanded to 863 organisms if closely related proteomes are considered. The incorporation of microbiome science into routine clinical practice necessitates a standard report for comparison of an individual's microbiome to the growing knowledgebase of ""normal"" microbiome data. The FecalBiome and the underlying technology of GutFeelingKB address this need. The knowledgebase can be useful to regulatory agencies for the assessment of fecal transplant and other microbiome products, as it contains a list of organisms from healthy individuals. In addition to the list of organisms and their abundances, this study also generated a collection of assembled contiguous sequences (contigs) of metagenomics dark matter. In this study, metagenomic dark matter represents sequences that cannot be mapped to any known sequence but can be assembled into contigs of 10,000 nucleotides or higher. These sequences can be used to create primers to study potential novel organisms. All data is freely available from https://hive.biochemistry.gwu.edu/gfkb and NCBI's Short Read Archive.",2019-09-11 +30127348,PPInS: a repository of protein-protein interaction sitesbase.,"Protein-Protein Interaction Sitesbase (PPInS), a high-performance database of protein-protein interacting interfaces, is presented. The atomic level information of the molecular interaction happening amongst various protein chains in protein-protein complexes (as reported in the Protein Data Bank [PDB]) together with their evolutionary information in Structural Classification of Proteins (SCOPe release 2.06), is made available in PPInS. Total 32468 PDB files representing X-ray crystallized multimeric protein-protein complexes with structural resolution better than 2.5 Å had been shortlisted to demarcate the protein-protein interaction interfaces (PPIIs). A total of 111857 PPIIs with ~32.24 million atomic contact pairs (ACPs) were generated and made available on a web server for on-site analysis and downloading purpose. All these PPIIs and protein-protein interacting patches (PPIPs) involved in them, were also analyzed in terms of a number of residues contributing in patch formation, their hydrophobic nature, amount of surface area they contributed in binding, and their homo and heterodimeric nature, to describe the diversity of information covered in PPInS. It was observed that 42.37% of total PPIPs were made up of 6-20 interacting residues, 53.08% PPIPs had interface area ≤1000 Å2 in PPII formation, 82.64% PPIPs were reported with hydrophobicity score of ≤10, and 73.26% PPIPs were homologous to each other with the sequence similarity score ranging from 75-100%. A subset ""Non-Redundant Database (NRDB)"" of the PPInS containing 2265 PPIIs, with over 1.8 million ACPs corresponding to the 1931 protein-protein complexes (PDBs), was also designed by removing structural redundancies at the level of SCOP superfamily (SCOP release 1.75). The web interface of the PPInS ( http://www.cup.edu.in:99/ppins/home.php ) offers an easy-to-navigate, intuitive and user-friendly environment, and can be accessed by providing PDB ID, SCOP superfamily ID, and protein sequence.",2018-08-20 +28472272,modlAMP: Python for antimicrobial peptides.,"

Summary

We have implemented the lecular esign aboratory's nti icrobial eptides package ( ), a Python-based software package for the design, classification and visual representation of peptide data. modlAMP offers functions for molecular descriptor calculation and the retrieval of amino acid sequences from public or local sequence databases, and provides instant access to precompiled datasets for machine learning. The package also contains methods for the analysis and representation of circular dichroism spectra.

Availability and implementation

The modlAMP Python package is available under the BSD license from URL http://doi.org/10.5905/ethz-1007-72 or via pip from the Python Package Index (PyPI).

Contact

gisbert.schneider@pharma.ethz.ch.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +24771669,PhylOPDb: a 16S rRNA oligonucleotide probe database for prokaryotic identification.,"In recent years, high-throughput molecular tools have led to an exponential growth of available 16S rRNA gene sequences. Incorporating such data, molecular tools based on target-probe hybridization were developed to monitor microbial communities within complex environments. Unfortunately, only a few 16S rRNA gene-targeted probe collections were described. Here, we present PhylOPDb, an online resource for a comprehensive phylogenetic oligonucleotide probe database. PhylOPDb provides a convivial and easy-to-use web interface to browse both regular and explorative 16S rRNA-targeted probes. Such probes set or subset could be used to globally monitor known and unknown prokaryotic communities through various techniques including DNA microarrays, polymerase chain reaction (PCR), fluorescent in situ hybridization (FISH), targeted gene capture or in silico rapid sequence identification. PhylOPDb contains 74 003 25-mer probes targeting 2178 genera including Bacteria and Archaea. Database URL: http://g2im.u-clermont1.fr/phylopdb/",2014-04-26 +31560649,The CELLmicrocosmos Tools: A Small History of Java-Based Cell and Membrane Modelling Open Source Software Development. ,"For more than one decade, CELLmicrocosmos tools are being developed. Here, we discus some of the technical and administrative hurdles to keep a software suite running so many years. The tools were being developed during a number of student projects and theses, whereas main developers refactored and maintained the code over the years. The focus of this publication is laid on two Java-based Open Source Software frameworks. Firstly, the CellExplorer with the PathwayIntegration combines the mesoscopic and the functional level by mapping biological networks onto cell components using database integration. Secondly, the MembraneEditor enables users to generate membranes of different lipid and protein compositions using the PDB format. Technicalities will be discussed as well as the historical development of these tools with a special focus on group-based development. In this way, university-associated developers of Integrative Bioinformatics applications should be inspired to go similar ways. All tools discussed in this publication can be downloaded and installed from https://www.CELLmicrocosmos.org.",2019-09-27 +31150052,RBPSponge: genome-wide identification of lncRNAs that sponge RBPs.,"

Summary

Long non-coding RNAs (lncRNAs) can act as molecular sponge or decoys for an RNA-binding protein (RBP) through their RBP-binding sites, thereby modulating the expression of all target genes of the corresponding RBP of interest. Here, we present a web tool named RBPSponge to explore lncRNAs based on their potential to act as a sponge for an RBP of interest. RBPSponge identifies the occurrences of RBP-binding sites and CLIP peaks on lncRNAs, and enables users to run statistical analyses to investigate the regulatory network between lncRNAs, RBPs and targets of RBPs.

Availability and implementation

The web server is available at https://www.RBPSponge.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +32310705,Age Norms for Auditory-Perceptual Neurophonetic Parameters: A Prerequisite for the Assessment of Childhood Dysarthria.,"Purpose The aim of this study was to collect auditory-perceptual data on established symptom categories of dysarthria from typically developing children between 3 and 9 years of age, for the purpose of creating age norms for dysarthria assessment. Method One hundred forty-four typically developing children (3;0-9;11 [years;months], 72 girls and 72 boys) participated. We used a computer-based game specifically designed for this study to elicit sentence repetitions and spontaneous speech samples. Speech recordings were analyzed using the auditory-perceptual criteria of the Bogenhausen Dysarthria Scales, a standardized German assessment tool for dysarthria in adults. The Bogenhausen Dysarthria Scales (scales and features) cover clinically relevant dimensions of speech and allow for an evaluation of well-established symptom categories of dysarthria. Results The typically developing children exhibited a number of speech characteristics overlapping with established symptom categories of dysarthria (e.g., breathy voice, frequent inspirations, reduced articulatory precision, decreased articulation rate). Substantial progress was observed between 3 and 9 years of age, but with different developmental trajectories across different dimensions. In several areas (e.g., respiration, voice quality), 9-year-olds still presented with salient developmental speech characteristics, while in other dimensions (e.g., prosodic modulation), features typically associated with dysarthria occurred only exceptionally, even in the 3-year-olds. Conclusions The acquisition of speech motor functions is a prolonged process not yet completed with 9 years. Various developmental influences (e.g., anatomic-physiological changes) shape children's speech specifically. Our findings are a first step toward establishing auditory-perceptual norms for dysarthria in children of kindergarten and elementary school age. Supplemental Material https://doi.org/10.23641/asha.12133380.",2020-04-18 +29036289,VICTOR: genome-based phylogeny and classification of prokaryotic viruses.,"

Motivation

Bacterial and archaeal viruses are crucial for global biogeochemical cycles and might well be game-changing therapeutic agents in the fight against multi-resistant pathogens. Nevertheless, it is still unclear how to best use genome sequence data for a fast, universal and accurate taxonomic classification of such viruses.

Results

We here present a novel in silico framework for phylogeny and classification of prokaryotic viruses, in line with the principles of phylogenetic systematics, and using a large reference dataset of officially classified viruses. The resulting trees revealed a high agreement with the classification. Except for low resolution at the family level, the majority of taxa was well supported as monophyletic. Clusters obtained with distance thresholds chosen for maximizing taxonomic agreement appeared phylogenetically reasonable, too. Analysis of an expanded dataset, containing >4000 genomes from public databases, revealed a large number of novel species, genera, subfamilies and families.

Availability and implementation

The selected methods are available as the easy-to-use web service 'VICTOR' at https://victor.dsmz.de.

Contact

jan.meier-kolthoff@dsmz.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +31322437,Proposed Key Characteristics of Female Reproductive Toxicants as an Approach for Organizing and Evaluating Mechanistic Data in Hazard Assessment.,"

Background

Identification of female reproductive toxicants is currently based largely on integrated epidemiological and in vivo toxicology data and, to a lesser degree, on mechanistic data. A uniform approach to systematically search, organize, integrate, and evaluate mechanistic evidence of female reproductive toxicity from various data types is lacking.

Objective

We sought to apply a key characteristics approach similar to that pioneered for carcinogen hazard identification to female reproductive toxicant hazard identification.

Methods

A working group of international experts was convened to discuss mechanisms associated with chemical-induced female reproductive toxicity and identified 10 key characteristics of chemicals that cause female reproductive toxicity: 1) alters hormone receptor signaling; alters reproductive hormone production, secretion, or metabolism; 2) chemical or metabolite is genotoxic; 3) induces epigenetic alterations; 4) causes mitochondrial dysfunction; 5) induces oxidative stress; 6) alters immune function; 7) alters cell signal transduction; 8) alters direct cell–cell interactions; 9) alters survival, proliferation, cell death, or metabolic pathways; and 10) alters microtubules and associated structures. As proof of principle, cyclophosphamide and diethylstilbestrol (DES), for which both human and animal studies have demonstrated female reproductive toxicity, display at least 5 and 3 key characteristics, respectively. 2,3,7,8-Tetrachlorodibenzo-p-dioxin (TCDD), for which the epidemiological evidence is mixed, exhibits 5 key characteristics.

Discussion

Future efforts should focus on evaluating the proposed key characteristics against additional known and suspected female reproductive toxicants. Chemicals that exhibit one or more of the key characteristics could be prioritized for additional evaluation and testing. A key characteristics approach has the potential to integrate with pathway-based toxicity testing to improve prediction of female reproductive toxicity in chemicals and potentially prevent some toxicants from entering common use. https://doi.org/10.1289/EHP4971.",2019-07-19 +32245765,Whole-Genome Comparisons of Staphylococcus agnetis Isolates from Cattle and Chickens. ,"Staphylococcus agnetis has been previously associated with subclinical or clinically mild cases of mastitis in dairy cattle and is one of several staphylococcal species that have been isolated from the bones and blood of lame broilers. We reported that S. agnetis could be obtained frequently from bacterial chondronecrosis with osteomyelitis (BCO) lesions of lame broilers (A. Al-Rubaye et al., PLoS One 10:e0143336, 2015 [https://doi.org/10.1371/journal.pone.0143336]). A particular isolate, S. agnetis 908, can induce lameness in over 50% of exposed chickens, exceeding normal BCO incidences in broiler operations. We reported the assembly and annotation of the genome of isolate 908. To better understand the relationship between dairy cattle and broiler isolates, we assembled 11 additional genomes for S. agnetis isolates, an additional chicken BCO strain, and ten isolates from cattle milk, mammary gland secretions, or udder skin from the collection at the University of Missouri. To trace phylogenetic relationships, we constructed phylogenetic trees based on multilocus sequence typing and genome-to-genome distance comparisons. Chicken isolate 908 clustered with two of the cattle isolates, along with three isolates from chickens in Denmark and an isolate of S. agnetis we isolated from a BCO lesion on a commercial broiler farm in Arkansas. We used a number of BLAST tools to compare the chicken isolates to those from cattle and identified 98 coding sequences distinguishing isolate 908 from the cattle isolates. None of the identified genes explain the differences in host or tissue tropism. These analyses are critical to understanding how staphylococci colonize and infect different hosts and potentially how they can transition to alternative niches (bone versus dermis).IMPORTANCEStaphylococcus agnetis has been recently recognized as associated with disease in dairy cattle and meat-type chickens. The infections appear to be limited in cattle and systemic in broilers. This report details the molecular relationships between cattle and chicken isolates in order to understand how this recently recognized species infects different hosts with different disease manifestations. The data show that the chicken and cattle isolates are very closely related, but the chicken isolates all cluster together, suggesting a single jump from cattle to chickens.",2020-06-02 +31077416,"CRYSPLOT: A new tool to visualize physical and chemical properties of molecules, polymers, surfaces, and crystalline solids.","CRYSPLOT is a web-oriented tool (http://crysplot.crystalsolutions.eu) to visualize computed properties of periodic systems, in particular, as computed with the CRYSTAL code. Along with plotting, CRYSPLOT also permits the modification and customization of plots to meet the standards required for scientific graphics. CRYSPLOT has been designed with advanced and freely available graphical Javascript libraries as Plotly. The programming language used is Javascript. The code parses the input files, reads the data, and organizes them into objects ready to be plotted with the plotly.js library. It is modular and flexible so that it is very simple to add other input data formats. The new graphical tool is presented in details along with selected applications on metal-organic frameworks to show some of its capabilities. © 2019 Wiley Periodicals, Inc.",2019-05-11 +31199528,"Decrypting protein surfaces by combining evolution, geometry, and molecular docking.","The growing body of experimental and computational data describing how proteins interact with each other has emphasized the multiplicity of protein interactions and the complexity underlying protein surface usage and deformability. In this work, we propose new concepts and methods toward deciphering such complexity. We introduce the notion of interacting region to account for the multiple usage of a protein's surface residues by several partners and for the variability of protein interfaces coming from molecular flexibility. We predict interacting patches by crossing evolutionary, physicochemical and geometrical properties of the protein surface with information coming from complete cross-docking (CC-D) simulations. We show that our predictions match well interacting regions and that the different sources of information are complementary. We further propose an indicator of whether a protein has a few or many partners. Our prediction strategies are implemented in the dynJET2 algorithm and assessed on a new dataset of 262 protein on which we performed CC-D. The code and the data are available at: http://www.lcqb.upmc.fr/dynJET2/.",2019-06-26 +31719438,"LutzoDex™-A digital key for Brazilian sand flies (Diptera, Phlebotominae) within an Android App.","Here we present an Android mobile application (app) for the identification of Brazilian phlebotomine sand fly species. The app, which is named LutzoDex™, relies on information included in a data source with morphological and morphometrical characters. This tool can present up to seven answer options to a question. Images of morphological structures can be referenced to make identification easier, and users can see a list of possible species based on the features they report. Maps are also used to determine the geographical distribution and whether the species is incriminated or suspected as a vector of Leishmania spp. in Brazil. The app is available free of charge in both English and Portuguese in the Google Play Store at https://play.google.com/store/apps/details?id=max.com.lutzodexhl=pt_BR.",2019-10-23 +30413482,ChIP-Atlas: a data-mining suite powered by full integration of public ChIP-seq data. ,"We have fully integrated public chromatin chromatin immunoprecipitation sequencing (ChIP-seq) and DNase-seq data (n > 70,000) derived from six representative model organisms (human, mouse, rat, fruit fly, nematode, and budding yeast), and have devised a data-mining platform-designated ChIP-Atlas (http://chip-atlas.org). ChIP-Atlas is able to show alignment and peak-call results for all public ChIP-seq and DNase-seq data archived in the NCBI Sequence Read Archive (SRA), which encompasses data derived from GEO, ArrayExpress, DDBJ, ENCODE, Roadmap Epigenomics, and the scientific literature. All peak-call data are integrated to visualize multiple histone modifications and binding sites of transcriptional regulators (TRs) at given genomic loci. The integrated data can be further analyzed to show TR-gene and TR-TR interactions, as well as to examine enrichment of protein binding for given multiple genomic coordinates or gene names. ChIP-Atlas is superior to other platforms in terms of data number and functionality for data mining across thousands of ChIP-seq experiments, and it provides insight into gene regulatory networks and epigenetic mechanisms.",2018-11-09 +29278730,CM-viewer: Visualizing interaction network of co-mutated and mutually exclusively mutated cancer genes.,"Cancer genes usually play a crucial role in regulating cell growth. Normal cells transform into malignant tumors by the acquisition of accumulated genetic mutations that enable them to evade normal growth control. It is therefore important to understand the relationships between mutations during cancer development and progression. Although cancer genes with co-occurring and mutually exclusive mutations have already been studied on different scales, there is no timely updated interaction network available for co-mutated and mutually exclusively mutated cancer genes. Therefore, we firstly downloaded 567 cancer genes from COSMIC (catalogue of somatic mutations in cancer) cancer gene census. Secondly, somatic mutations of 71 cancer genomics projects were downloaded from the ICGC (International Cancer Genome Consortium) data portal. Thirdly, mutated cancer genes and affected donors were extracted from the ICGC data to form a mutation matrix where rows are genes, columns are donors, 1 denotes occurrence, and 0 denotes absence of mutation. Afterwards, co-mutated and mutually exclusively mutated cancer gene pairs were identified using DISCOVER (discrete independence statistic controlling for observations with varying event rates). Finally, CM-viewer was developed to visualize the interaction network of cancer genes with co-occurring and mutually exclusive mutations. It is an online visualization tool as well as a biological database. It promises to understand how gene mutations contribute to tumorigenesis and to identify key biomarkers and drug targets for cancer. CM-viewer is freely available at http://www.zhounan.org/comutgene.",2017-12-24 +26637529,ONRLDB--manually curated database of experimentally validated ligands for orphan nuclear receptors: insights into new drug discovery. ,"Orphan nuclear receptors are potential therapeutic targets. The Orphan Nuclear Receptor Ligand Binding Database (ONRLDB) is an interactive, comprehensive and manually curated database of small molecule ligands targeting orphan nuclear receptors. Currently, ONRLDB consists of ∼11,000 ligands, of which ∼6500 are unique. All entries include information for the ligand, such as EC50 and IC50, number of aromatic rings and rotatable bonds, XlogP, hydrogen donor and acceptor count, molecular weight (MW) and structure. ONRLDB is a cross-platform database, where either the cognate small molecule modulators of a receptor or the cognate receptors to a ligand can be searched. The database can be searched using three methods: text search, advanced search or similarity search. Substructure search, cataloguing tools, and clustering tools can be used to perform advanced analysis of the ligand based on chemical similarity fingerprints, hierarchical clustering, binning partition and multidimensional scaling. These tools, together with the Tree function provided, deliver an interactive platform and a comprehensive resource for identification of common and unique scaffolds. As demonstrated, ONRLDB is designed to allow selection of ligands based on various properties and for designing novel ligands or to improve the existing ones. Database URL: http://www.onrldb.org/.",2015-12-04 +28293298,Rapid development of entity-based data models for bioinformatics with persistence object-oriented design and structured interfaces.,"Databases are imperative for research in bioinformatics and computational biology. Current challenges in database design include data heterogeneity and context-dependent interconnections between data entities. These challenges drove the development of unified data interfaces and specialized databases. The curation of specialized databases is an ever-growing challenge due to the introduction of new data sources and the emergence of new relational connections between established datasets. Here, an open-source framework for the curation of specialized databases is proposed. The framework supports user-designed models of data encapsulation, objects persistency and structured interfaces to local and external data sources such as MalaCards, Biomodels and the National Centre for Biotechnology Information (NCBI) databases. The proposed framework was implemented using Java as the development environment, EclipseLink as the data persistency agent and Apache Derby as the database manager. Syntactic analysis was based on J3D, jsoup, Apache Commons and w3c.dom open libraries. Finally, a construction of a specialized database for aneurysms associated vascular diseases is demonstrated. This database contains 3-dimensional geometries of aneurysms, patient's clinical information, articles, biological models, related diseases and our recently published model of aneurysms' risk of rapture. Framework is available in: http://nbel-lab.com.",2017-03-11 +32418343,Head and neck cancer patient images for determining auto-segmentation accuracy in T2-weighted magnetic resonance imaging through expert manual segmentations.,"

Purpose

The use of magnetic resonance imaging (MRI) in radiotherapy treatment planning has rapidly increased due to its ability to evaluate patient's anatomy without the use of ionizing radiation and due to its high soft tissue contrast. For these reasons, MRI has become the modality of choice for longitudinal and adaptive treatment studies. Automatic segmentation could offer many benefits for these studies. In this work, we describe a T2-weighted MRI dataset of head and neck cancer patients that can be used to evaluate the accuracy of head and neck normal tissue auto-segmentation systems through comparisons to available expert manual segmentations.

Acquisition and validation methods

T2-weighted MRI images were acquired for 55 head and neck cancer patients. These scans were collected after radiotherapy computed tomography (CT) simulation scans using a thermoplastic mask to replicate patient treatment position. All scans were acquired on a single 1.5 T Siemens MAGNETOM Aera MRI with two large four-channel flex phased-array coils. The scans covered the region encompassing the nasopharynx region cranially and supraclavicular lymph node region caudally, when possible, in the superior-inferior direction. Manual contours were created for the left/right submandibular gland, left/right parotids, left/right lymph node level II, and left/right lymph node level III. These contours underwent quality assurance to ensure adherence to predefined guidelines, and were corrected if edits were necessary.

Data format and usage notes

The T2-weighted images and RTSTRUCT files are available in DICOM format. The regions of interest are named based on AAPM's Task Group 263 nomenclature recommendations (Glnd_Submand_L, Glnd_Submand_R, LN_Neck_II_L, Parotid_L, Parotid_R, LN_Neck_II_R, LN_Neck_III_L, LN_Neck_III_R). This dataset is available on The Cancer Imaging Archive (TCIA) by the National Cancer Institute under the collection ""AAPM RT-MAC Grand Challenge 2019"" (https://doi.org/10.7937/tcia.2019.bcfjqfqb).

Potential applications

This dataset provides head and neck patient MRI scans to evaluate auto-segmentation systems on T2-weighted images. Additional anatomies could be provided at a later time to enhance the existing library of contours.",2020-06-01 +32164691,Intra- and inter- observer reliability of anthropometric measurements and blood pressure in primary schoolchildren and adults: the Feel4Diabetes-study.,"BACKGROUND:Feel4Diabetes was a large-scale, multicenter lifestyle intervention aiming to prevent type 2 diabetes among families from vulnerable population groups in six European countries (Belgium, Bulgaria, Finland, Greece, Hungary and Spain). The current study aimed to describe the process that was followed to harmonize and standardize the measurement of anthropometric (weight, height and waist circumference) and blood pressure (systolic and diastolic) indices, as well as to assess the intra- and inter- observer reliability of these measurements. METHODS:A central training workshop was conducted prior to the baseline measurements of the Feel4Diabetes-intervention. One researcher from each intervention country, as well as 12 adults and 12 children (for the anthropometric measurements) and 21 adults (for the blood pressure measurements) participated in this workshop. Technical Error of Measurement (TEM) and reliability (%R) were calculated to assess the reliability of the indices which were assessed to evaluate the outcome of the Feel4Diabetes-intervention. The Feel4Diabetes-intervention is registered at https://clinicaltrials.gov/ (NCT02393872). RESULTS:Intra-observer reliability was found to be higher than 99.5% for all anthropometric measurements in both children and adults. Inter-observer reliability was found to be higher than 98% regarding the anthropometric measurements, while for blood pressure measurements %R was 76.62 and 91.38% for systolic and diastolic blood pressure measurements, respectively. CONCLUSION:The central training of the Fee4Diabetes-intervention ensured that the data collected for the outcome evaluation of the Feel4Diabetes-intervention in the six European countries at three different time points (baseline, follow-up 1 and follow-up 2) were valid and comparable.",2020-03-12 +30535134,"Improving prediction of protein secondary structure, backbone angles, solvent accessibility and contact numbers by using predicted contact maps and an ensemble of recurrent and residual convolutional neural networks.","

Motivation

Sequence-based prediction of one dimensional structural properties of proteins has been a long-standing subproblem of protein structure prediction. Recently, prediction accuracy has been significantly improved due to the rapid expansion of protein sequence and structure libraries and advances in deep learning techniques, such as residual convolutional networks (ResNets) and Long-Short-Term Memory Cells in Bidirectional Recurrent Neural Networks (LSTM-BRNNs). Here we leverage an ensemble of LSTM-BRNN and ResNet models, together with predicted residue-residue contact maps, to continue the push towards the attainable limit of prediction for 3- and 8-state secondary structure, backbone angles (θ, τ, ϕ and ψ), half-sphere exposure, contact numbers and solvent accessible surface area (ASA).

Results

The new method, named SPOT-1D, achieves similar, high performance on a large validation set and test set (≈1000 proteins in each set), suggesting robust performance for unseen data. For the large test set, it achieves 87% and 77% in 3- and 8-state secondary structure prediction and 0.82 and 0.86 in correlation coefficients between predicted and measured ASA and contact numbers, respectively. Comparison to current state-of-the-art techniques reveals substantial improvement in secondary structure and backbone angle prediction. In particular, 44% of 40-residue fragment structures constructed from predicted backbone Cα-based θ and τ angles are less than 6 Å root-mean-squared-distance from their native conformations, nearly 20% better than the next best. The method is expected to be useful for advancing protein structure and function prediction.

Availability and implementation

SPOT-1D and its data is available at: http://sparks-lab.org/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +30178837,The role novel targeted agents in the treatment of previously treated patients with advanced urothelial carcinoma (UC): a meta-analysis.,"

Objective

Second-line treatment options for advanced urothelial carcinoma (UC) patients are limited. We aim to investigate the efficacy and toxicities of novel targeted agents (TAs) as salvage treatment for advanced UC by using a meta-analysis.

Materials and methods

Relevant trials published from 1994 to 2017 were identified by an electronic search of public databases. Demographic data, treatment regimens, objective response rate (ORR), disease control rate (DCR), median progression-free and overall survival (PFS, OS) and grade 3/4 toxicities were extracted and analyzed using open Meta-Analyst software version 4.16.12 (Tufts University, URL http://tuftscaes.org/open_meta/).

Results

Eleven trials with 1,630 previously treated UC patients were included for analysis. The pooled ORR, DCR and 1-year OS for single targeted agent in pre-treated UC patients was 10.7% (95% CI: 10.7-19.6%), 33.2% (95% CI: 25-41.4%), and 31% (95%: 23.6-39.4%), respectively. Sub-group analysis based on specific targeted agents showed that the efficacy of immune checkpoints inhibitors (ICIs) was significantly higher than that of small molecular tyrosine-kinase inhibitors (TKIs) concerning ORR and 1-year OS. Also, a meta-analysis of three randomized controlled trials showed that the use of TAs in advanced UC patients significantly improved ORR, but not for DCR. As for grade 3 and 4 toxicities, more incidences of severe anemia, fatigue, and diarrhea were observed in the TKIs group than in ICIs group, but not for hypertension.

Conclusions

Our findings support the use of immune checkpoints inhibitors, but not for tyrosine kinase inhibitors as salvage treatment for previously treated UC patients due to its potential survival benefits.",2018-08-01 +29069500,sRNAnalyzer-a flexible and customizable small RNA sequencing data analysis pipeline.,"Although many tools have been developed to analyze small RNA sequencing (sRNA-Seq) data, it remains challenging to accurately analyze the small RNA population, mainly due to multiple sequence ID assignment caused by short read length. Additional issues in small RNA analysis include low consistency of microRNA (miRNA) measurement results across different platforms, miRNA mapping associated with miRNA sequence variation (isomiR) and RNA editing, and the origin of those unmapped reads after screening against all endogenous reference sequence databases. To address these issues, we built a comprehensive and customizable sRNA-Seq data analysis pipeline-sRNAnalyzer, which enables: (i) comprehensive miRNA profiling strategies to better handle isomiRs and summarization based on each nucleotide position to detect potential SNPs in miRNAs, (ii) different sequence mapping result assignment approaches to simulate results from microarray/qRT-PCR platforms and a local probabilistic model to assign mapping results to the most-likely IDs, (iii) comprehensive ribosomal RNA filtering for accurate mapping of exogenous RNAs and summarization based on taxonomy annotation. We evaluated our pipeline on both artificial samples (including synthetic miRNA and Escherichia coli cultures) and biological samples (human tissue and plasma). sRNAnalyzer is implemented in Perl and available at: http://srnanalyzer.systemsbiology.net/.",2017-12-01 +31191601,LncRRIsearch: A Web Server for lncRNA-RNA Interaction Prediction Integrated With Tissue-Specific Expression and Subcellular Localization Data.,"Long non-coding RNAs (lncRNAs) play critical roles in various biological processes, but the function of the majority of lncRNAs is still unclear. One approach for estimating a function of a lncRNA is the identification of its interaction target because functions of lncRNAs are expressed through interaction with other biomolecules in quite a few cases. In this paper, we developed ""LncRRIsearch,"" which is a web server for comprehensive prediction of human and mouse lncRNA-lncRNA and lncRNA-mRNA interaction. The prediction was conducted using RIblast, which is a fast and accurate RNA-RNA interaction prediction tool. Users can investigate interaction target RNAs of a particular lncRNA through a web interface. In addition, we integrated tissue-specific expression and subcellular localization data for the lncRNAs with the web server. These data enable users to examine tissue-specific or subcellular localized lncRNA interactions. LncRRIsearch is publicly accessible at http://rtools.cbrc.jp/LncRRIsearch/.",2019-05-28 +25518738,The chickpea genomic web resource: visualization and analysis of the desi-type Cicer arietinum nuclear genome for comparative exploration of legumes.,"

Background

Availability of the draft nuclear genome sequences of small-seeded desi-type legume crop Cicer arietinum has provided an opportunity for investigating unique chickpea genomic features and evaluation of their biological significance. The increasing number of legume genome sequences also presents a challenge for developing reliable and information-driven bioinformatics applications suitable for comparative exploration of this important class of crop plants.

Results

The Chickpea Genomic Web Resource (CGWR) is an implementation of a suite of web-based applications dedicated to chickpea genome visualization and comparative analysis, based on next generation sequencing and assembly of Cicer arietinum desi-type genotype ICC4958. CGWR has been designed and configured for mapping, scanning and browsing the significant chickpea genomic features in view of the important existing and potential roles played by the various legume genome projects in mutant mapping and cloning. It also enables comparative informatics of ICC4958 DNA sequence analysis with other wild and cultivated genotypes of chickpea, various other leguminous species as well as several non-leguminous model plants, to enable investigations into evolutionary processes that shape legume genomes.

Conclusions

CGWR is an online database offering a comprehensive visual and functional genomic analysis of the chickpea genome, along with customized maps and gene-clustering options. It is also the only plant based web resource supporting display and analysis of nucleosome positioning patterns in the genome. The usefulness of CGWR has been demonstrated with discoveries of biological significance made using this server. The CGWR is compatible with all available operating systems and browsers, and is available freely under the open source license at http://www.nipgr.res.in/CGWR/home.php.",2014-12-18 +31510691,Precise modelling and interpretation of bioactivities of ligands targeting G protein-coupled receptors.,"

Motivation

Accurate prediction and interpretation of ligand bioactivities are essential for virtual screening and drug discovery. Unfortunately, many important drug targets lack experimental data about the ligand bioactivities; this is particularly true for G protein-coupled receptors (GPCRs), which account for the targets of about a third of drugs currently on the market. Computational approaches with the potential of precise assessment of ligand bioactivities and determination of key substructural features which determine ligand bioactivities are needed to address this issue.

Results

A new method, SED, was proposed to predict ligand bioactivities and to recognize key substructures associated with GPCRs through the coupling of screening for Lasso of long extended-connectivity fingerprints (ECFPs) with deep neural network training. The SED pipeline contains three successive steps: (i) representation of long ECFPs for ligand molecules, (ii) feature selection by screening for Lasso of ECFPs and (iii) bioactivity prediction through a deep neural network regression model. The method was examined on a set of 16 representative GPCRs that cover most subfamilies of human GPCRs, where each has 300-5000 ligand associations. The results show that SED achieves excellent performance in modelling ligand bioactivities, especially for those in the GPCR datasets without sufficient ligand associations, where SED improved the baseline predictors by 12% in correlation coefficient (r2) and 19% in root mean square error. Detail data analyses suggest that the major advantage of SED lies on its ability to detect substructures from long ECFPs which significantly improves the predictive performance.

Availability and implementation

The source code and datasets of SED are freely available at https://zhanglab.ccmb.med.umich.edu/SED/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +31045611,Development of the Initial Surveys for the All of Us Research Program.,"

Background

The All of Us Research Program is building a national longitudinal cohort and collecting data from multiple information sources (e.g., biospecimens, electronic health records, and mobile/wearable technologies) to advance precision medicine. Participant-provided information, collected via surveys, will complement and augment these information sources. We report the process used to develop and refine the initial three surveys for this program.

Methods

The All of Us survey development process included: (1) prioritization of domains for scientific needs, (2) examination of existing validated instruments, (3) content creation, (4) evaluation and refinement via cognitive interviews and online testing, (5) content review by key stakeholders, and (6) launch in the All of Us electronic participant portal. All content was translated into Spanish.

Results

We conducted cognitive interviews in English and Spanish with 169 participants, and 573 individuals completed online testing. Feedback led to over 40 item content changes. Lessons learned included: (1) validated survey instruments performed well in diverse populations reflective of All of Us; (2) parallel evaluation of multiple languages can ensure optimal survey deployment; (3) recruitment challenges in diverse populations required multiple strategies; and (4) key stakeholders improved integration of surveys into larger Program context.

Conclusions

This efficient, iterative process led to successful testing, refinement, and launch of three All of Us surveys. Reuse of All of Us surveys, available at http://researchallofus.org, may facilitate large consortia targeting diverse populations in English and Spanish to capture participant-provided information to supplement other data, such as genetic, physical measurements, or data from electronic health records.",2019-07-01 +25186741,"An RNA-sequencing transcriptome and splicing database of glia, neurons, and vascular cells of the cerebral cortex.","The major cell classes of the brain differ in their developmental processes, metabolism, signaling, and function. To better understand the functions and interactions of the cell types that comprise these classes, we acutely purified representative populations of neurons, astrocytes, oligodendrocyte precursor cells, newly formed oligodendrocytes, myelinating oligodendrocytes, microglia, endothelial cells, and pericytes from mouse cerebral cortex. We generated a transcriptome database for these eight cell types by RNA sequencing and used a sensitive algorithm to detect alternative splicing events in each cell type. Bioinformatic analyses identified thousands of new cell type-enriched genes and splicing isoforms that will provide novel markers for cell identification, tools for genetic manipulation, and insights into the biology of the brain. For example, our data provide clues as to how neurons and astrocytes differ in their ability to dynamically regulate glycolytic flux and lactate generation attributable to unique splicing of PKM2, the gene encoding the glycolytic enzyme pyruvate kinase. This dataset will provide a powerful new resource for understanding the development and function of the brain. To ensure the widespread distribution of these datasets, we have created a user-friendly website (http://web.stanford.edu/group/barres_lab/brain_rnaseq.html) that provides a platform for analyzing and comparing transciption and alternative splicing profiles for various cell classes in the brain.",2014-09-01 +31670575,Predicted Northward Expansion of the Geographic Range of the Tick Vector Amblyomma americanum in North America under Future Climate Conditions.,"BACKGROUND:The geographic range of the tick Amblyomma americanum, a vector of diseases of public health significance such as ehrlichiosis, has expanded from the southeast of the United States northward during the 20th century. Recently, populations of this tick have been reported to be present close to the Canadian border in Michigan and New York states, but established populations are not known in Canada. Previous research suggests that changing temperature patterns with climate change may influence tick life cycles and permit northward range expansion of ticks in the northern hemisphere. OBJECTIVES:We aimed to estimate minimal temperature conditions for survival of A. americanum populations at the northern edge of the tick's range and to investigate the possibility of range expansion of A. americanum into northern U.S. states and southern Canada in the coming decades. METHODS:A simulation model of the tick A. americanum was used, via simulations using climate data from meteorological stations in the United States and Canada, to estimate minimal temperature conditions for survival of A. americanum populations at the northern edge of the tick's range. RESULTS:The predicted geographic scope of temperature suitability [≥3,285  annual cumulative degree days (DD) >0°C] included most of the central and eastern U.S. states east of longitude 110°W, which is consistent with current surveillance data for the presence of the tick in this region, as well as parts of southern Quebec and Ontario in Canada. Regional climate model output raises the possibility of northward range expansion into all provinces of Canada from Alberta to Newfoundland and Labrador during the coming decades, with the greatest northward range expansion (up to 1,000km by the year 2100) occurring under the greenhouse gas (GHG) emissions of Representative Concentration Pathway (RCP) 8.5. Predicted northward range expansion was reduced by approximately half under the reduced GHG emissions of RCP4.5. DISCUSSION:Our results raise the possibility of range expansion of A. americanum into northern U.S. states and southern Canada in the coming decades, and conclude that surveillance for this tick, and the diseases it transmits, would be prudent. https://doi.org/10.1289/EHP5668.",2019-10-31 +27434804,"Training in metabolomics research. I. Designing the experiment, collecting and extracting samples and generating metabolomics data.","The study of metabolism has had a long history. Metabolomics, a systems biology discipline representing analysis of known and unknown pathways of metabolism, has grown tremendously over the past 20 years. Because of its comprehensive nature, metabolomics requires careful consideration of the question(s) being asked, the scale needed to answer the question(s), collection and storage of the sample specimens, methods for extraction of the metabolites from biological matrices, the analytical method(s) to be employed and the quality control of the analyses, how collected data are correlated, the statistical methods to determine metabolites undergoing significant change, putative identification of metabolites and the use of stable isotopes to aid in verifying metabolite identity and establishing pathway connections and fluxes. The National Institutes of Health Common Fund Metabolomics Program was established in 2012 to stimulate interest in the approaches and technologies of metabolomics. To deliver one of the program's goals, the University of Alabama at Birmingham has hosted an annual 4-day short course in metabolomics for faculty, postdoctoral fellows and graduate students from national and international institutions. This paper is the first part of a summary of the training materials presented in the course to be used as a resource for all those embarking on metabolomics research. The complete set of training materials including slide sets and videos can be viewed at http://www.uab.edu/proteomics/metabolomics/workshop/workshop_june_2015.php. Copyright © 2016 John Wiley & Sons, Ltd.",2016-07-01 +28830355,"The porcine translational research database: a manually curated, genomics and proteomics-based research resource.","

Background

The use of swine in biomedical research has increased dramatically in the last decade. Diverse genomic- and proteomic databases have been developed to facilitate research using human and rodent models. Current porcine gene databases, however, lack the robust annotation to study pig models that are relevant to human studies and for comparative evaluation with rodent models. Furthermore, they contain a significant number of errors due to their primary reliance on machine-based annotation. To address these deficiencies, a comprehensive literature-based survey was conducted to identify certain selected genes that have demonstrated function in humans, mice or pigs.

Results

The process identified 13,054 candidate human, bovine, mouse or rat genes/proteins used to select potential porcine homologs by searching multiple online sources of porcine gene information. The data in the Porcine Translational Research Database (( http://www.ars.usda.gov/Services/docs.htm?docid=6065 ) is supported by >5800 references, and contains 65 data fields for each entry, including >9700 full length (5' and 3') unambiguous pig sequences, >2400 real time PCR assays and reactivity information on >1700 antibodies. It also contains gene and/or protein expression data for >2200 genes and identifies and corrects 8187 errors (gene duplications artifacts, mis-assemblies, mis-annotations, and incorrect species assignments) for 5337 porcine genes.

Conclusions

This database is the largest manually curated database for any single veterinary species and is unique among porcine gene databases in regard to linking gene expression to gene function, identifying related gene pathways, and connecting data with other porcine gene databases. This database provides the first comprehensive description of three major Super-families or functionally related groups of proteins (Cluster of Differentiation (CD) Marker genes, Solute Carrier Superfamily, ATP binding Cassette Superfamily), and a comparative description of porcine microRNAs.",2017-08-22 +28787149,MetExtract II: A Software Suite for Stable Isotope-Assisted Untargeted Metabolomics.,"Stable isotope labeling (SIL) techniques have the potential to enhance different aspects of liquid chromatography-high-resolution mass spectrometry (LC-HRMS)-based untargeted metabolomics methods including metabolite detection, annotation of unknown metabolites, and comparative quantification. In this work, we present MetExtract II, a software toolbox for detection of biologically derived compounds. It exploits SIL-specific isotope patterns and elution profiles in LC-HRMS(/MS) data. The toolbox consists of three complementary modules: M1 (AllExtract) uses mixtures of uniformly highly isotope-enriched and native biological samples for selective detection of the entire accessible metabolome. M2 (TracExtract) is particularly suited to probe the metabolism of endogenous or exogenous secondary metabolites and facilitates the untargeted screening of tracer derivatives from concurrently metabolized native and uniformly labeled tracer substances. With M3 (FragExtract), tandem mass spectrometry (MS/MS) fragments of corresponding native and uniformly labeled ions are evaluated and automatically assigned with putative sum formulas. Generated results can be graphically illustrated and exported as a comprehensive data matrix that contains all detected pairs of native and labeled metabolite ions that can be used for database queries, metabolome-wide internal standardization, and statistical analysis. The software, associated documentation, and sample data sets are freely available for noncommercial use at http://metabolomics-ifa.boku.ac.at/metextractII .",2017-08-22 +32211391,CLING: Candidate Cancer-Related lncRNA Prioritization via Integrating Multiple Biological Networks.,"Identification and characterization of lncRNAs in cancer with a view to their application in improving diagnosis and therapy remains a major challenge that requires new and innovative approaches. We have developed an integrative framework termed ""CLING"", aimed to prioritize candidate cancer-related lncRNAs based on their associations with known cancer lncRNAs. CLING focuses on joint optimization and prioritization of all candidates for each cancer type by integrating lncRNA topological properties and multiple lncRNA-centric networks. Validation analyses revealed that CLING is more effective than prioritization based on a single lncRNA network. Reliable AUC (Area Under Curve) scores were obtained across 10 cancer types, ranging from 0.85 to 0.94. Several novel lncRNAs predicted in the top 10 candidates for various cancer types have been confirmed by recent biological experiments. Furthermore, using a case study on liver hepatocellular carcinoma as an example, CLING facilitated the successful identification of novel cancer lncRNAs overlooked by differential expression analyses (DEA). This time- and cost-effective computational model may provide a valuable complement to experimental studies and assist in future investigations on lncRNA involvement in the pathogenesis of cancers. We have developed a web-based server for users to rapidly implement CLING and visualize data, which is freely accessible at http://bio-bigdata.hrbmu.edu.cn/cling/. CLING has been successfully applied to predict a few potential lncRNAs from thousands of candidates for many cancer types.",2020-03-10 +,DETERMINANTS OF DNA METHYLATION BASED AGE ACCELERATION IN YOUNG AND OLDER TWIN PAIRS,"Abstract DNA methylation (DNAm) age, a novel marker of biological aging, has been shown to predict mortality and to be associated with physiological aging. However, the relative contribution of genetic and environmental factors to DNAm age over life span is not fully known. We estimated the magnitude of genetic and environmental factors in DNAm based age acceleration. Age acceleration (residuals from a linear regression model of DNAm age on chronological age) in white blood cells was calculated from 450k BeadChip methylation data using an online calculator (https://dnamage.genetics.ucla.edu). The genetic and shared and non-shared environmental determinants of age acceleration were studied in young (20 to 25 year-old) and older (55 to 75 year-old) monozygotic (MZ, n=168 young, n=122 older) and dizygotic (DZ, n=121 young, n=42 older) twin pairs. Correlation between DNAm age and chronological age was 0.97 (p<0.001, n=1249 individuals). Mean age acceleration was similar among young and older twin pairs, whether MZ or DZ. Intraclass correlation coefficients were 0.74 (95% CI 0.66, 0.80) for young MZ, 0.43 (0.27, 0.56) for young DZ, and 0.59 (0.46, 0.69) for older MZ and 0.17 (-0.13, 0.45) for older DZ twin pairs. Quantitative genetic modeling revealed that genetic factors explained larger amount of the variation in DNAm age acceleration in young 74% (65, 82) compared to older twin pairs 53% (37, 65), while non-shared environmental factors were larger in older twin pairs (difference between age groups p<0.001). The increasing discordance in age acceleration in older age is most likely explained by unique environmental factors.",2017-06-30 +28212602,HAPPI-2: a Comprehensive and High-quality Map of Human Annotated and Predicted Protein Interactions.,"

Background

Human protein-protein interaction (PPI) data is essential to network and systems biology studies. PPI data can help biochemists hypothesize how proteins form complexes by binding to each other, how extracellular signals propagate through post-translational modification of de-activated signaling molecules, and how chemical reactions are coupled by enzymes involved in a complex biological process. Our capability to develop good public database resources for human PPI data has a direct impact on the quality of future research on genome biology and medicine.

Results

The database of Human Annotated and Predicted Protein Interactions (HAPPI) version 2.0 is a major update to the original HAPPI 1.0 database. It contains 2,922,202 unique protein-protein interactions (PPI) linked by 23,060 human proteins, making it the most comprehensive database covering human PPI data today. These PPIs contain both physical/direct interactions and high-quality functional/indirect interactions. Compared with the HAPPI 1.0 database release, HAPPI database version 2.0 (HAPPI-2) represents a 485% of human PPI data coverage increase and a 73% protein coverage increase. The revamped HAPPI web portal provides users with a friendly search, curation, and data retrieval interface, allowing them to retrieve human PPIs and available annotation information on the interaction type, interaction quality, interacting partner drug targeting data, and disease information. The updated HAPPI-2 can be freely accessed by Academic users at http://discovery.informatics.uab.edu/HAPPI .

Conclusions

While the underlying data for HAPPI-2 are integrated from a diverse data sources, the new HAPPI-2 release represents a good balance between data coverage and data quality of human PPIs, making it ideally suited for network biology.",2017-02-17 +25653163,CAGEr: precise TSS data retrieval and high-resolution promoterome mining for integrative analyses.,"Cap analysis of gene expression (CAGE) is a high-throughput method for transcriptome analysis that provides a single base-pair resolution map of transcription start sites (TSS) and their relative usage. Despite their high resolution and functional significance, published CAGE data are still underused in promoter analysis due to the absence of tools that enable its efficient manipulation and integration with other genome data types. Here we present CAGEr, an R implementation of novel methods for the analysis of differential TSS usage and promoter dynamics, integrated with CAGE data processing and promoterome mining into a first comprehensive CAGE toolbox on a common analysis platform. Crucially, we provide collections of TSSs derived from most published CAGE datasets, as well as direct access to FANTOM5 resource of TSSs for numerous human and mouse cell/tissue types from within R, greatly increasing the accessibility of precise context-specific TSS data for integrative analyses. The CAGEr package is freely available from Bioconductor at http://www.bioconductor.org/packages/release/bioc/html/CAGEr.html.",2015-02-04 +,VIP Barcoding: composition vector‐based software for rapid species identification based on DNA barcoding,"Species identification based on short sequences of DNA markers, that is, DNA barcoding, has emerged as an integral part of modern taxonomy. However, software for the analysis of large and multilocus barcoding data sets is scarce. The Basic Local Alignment Search Tool (BLAST) is currently the fastest tool capable of handling large databases (e.g. >5000 sequences), but its accuracy is a concern and has been criticized for its local optimization. However, current more accurate software requires sequence alignment or complex calculations, which are time‐consuming when dealing with large data sets during data preprocessing or during the search stage. Therefore, it is imperative to develop a practical program for both accurate and scalable species identification for DNA barcoding. In this context, we present VIP Barcoding: a user‐friendly software in graphical user interface for rapid DNA barcoding. It adopts a hybrid, two‐stage algorithm. First, an alignment‐free composition vector (CV) method is utilized to reduce searching space by screening a reference database. The alignment‐based K2P distance nearest‐neighbour method is then employed to analyse the smaller data set generated in the first stage. In comparison with other software, we demonstrate that VIP Barcoding has (i) higher accuracy than Blastn and several alignment‐free methods and (ii) higher scalability than alignment‐based distance methods and character‐based methods. These results suggest that this platform is able to deal with both large‐scale and multilocus barcoding data with accuracy and can contribute to DNA barcoding for modern taxonomy. VIP Barcoding is free and available at http://msl.sls.cuhk.edu.hk/vipbarcoding/.",2014-07-01 +,Phylogeny of the tribe Naupactini (Coleoptera: Curculionidae) based on morphological characters,"Naupactini (Curculionidae: Entiminae) is a primarily Neotropical tribe of broad‐nosed weevils with its highest genus and species diversity in South America. Despite several taxonomic contributions published during the last decades, the evolutionary history of Naupactini remains poorly understood. We present the first comprehensive phylogenetic analysis for this tribe based on a data matrix of 100 adult morphological characters scored for 70 species, representing 55 genera of Naupactini (ingroup) and four outgroups belonging to the entimine tribes Otiorhynchini, Entimini, Eustylini and Tanymecini. According to the most parsimonious tree Artipus does not belong to Naupactini; the genera with flat and broad antennae, formerly assigned to other entimine tribes, form a monophyletic group (Saurops (Curiades (Aptolemus (Platyomus)))) related to the clade (Megalostylus (Megalostylodes (Chamaelops Wagneriella))); and the genera distributed along the high Andes, Paramos and Puna form a natural group (Asymmathetes (Amphideritus (Leschenius (Amitrus (Obrieniolus (Melanocyphus Trichocyphus)))))), nested within a larger clade that includes Pantomorus, Naupactus and allied genera. Atrichonotus, Hoplopactus, Mimographus and Naupactus are not recovered as monophyletic. In order to address the taxonomic implications of our phylogenetic analysis, we propose the following nomenclatural changes: to transfer Artipus from Naupactini to Geonemini, to revalidate the genera Mimographopsis (type species M. viridicans), and to revalidate the genus Floresianus (type species F. sordidus). The evolution of selected characters is discussed. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:C8AA4388‐A2F0‐4E2D‐889A‐500BEA5A9DE1.",2017-04-01 +,Combining participatory modelling and citizen science to support volunteer conservation action,"The popularity of both citizen science and participatory modelling has given way to a growing number of case studies that all outline the benefits of more inclusive forms of conservation planning. Citizen science offers volunteers the opportunity to engage in environmental research while participatory modelling engages individuals in community-level environmental decision-making. Although both of these public-science collaborations are often said to lead to improved environmental decision-making, evidence for these outcomes in the peer reviewed literature remains sparse. We suggest that combining these fields has promise for developing community-supported research leading to conservation action. To demonstrate this approach, we present the infrastructure and use of a participatory modelling software called Mental Modeler (http://www.mentalmodeler.org/), used with a citizen science web portal (www.citsci.org) that allows citizen scientists, scientists, and managers to: (1) collaboratively define local conservation issues of shared concern; (2) model and represent assumptions, evidence, and existing information about these issues; (3) run scenarios to discuss potential research or management options; and ultimately (4) co-develop citizen scientific research and conservation plans. Using case study data from two community groups working on locally-defined issues related to land management practices in the US, we demonstrate how coordinated learning through modelling practices can lead to the development of self-organized and co-created conservation action. We conclude that the development of online modelling tools holds strong promise for the fields of both citizen science and conservation biology.",2017-04-01 +31822517,SNP-CRISPR: A Web Tool for SNP-Specific Genome Editing.,"CRISPR-Cas9 is a powerful genome editing technology in which a single guide RNA (sgRNA) confers target site specificity to achieve Cas9-mediated genome editing. Numerous sgRNA design tools have been developed based on reference genomes for humans and model organisms. However, existing resources are not optimal as genetic mutations or single nucleotide polymorphisms (SNPs) within the targeting region affect the efficiency of CRISPR-based approaches by interfering with guide-target complementarity. To facilitate identification of sgRNAs (1) in non-reference genomes, (2) across varying genetic backgrounds, or (3) for specific targeting of SNP-containing alleles, for example, disease relevant mutations, we developed a web tool, SNP-CRISPR (https://www.flyrnai.org/tools/snp_crispr/). SNP-CRISPR can be used to design sgRNAs based on public variant data sets or user-identified variants. In addition, the tool computes efficiency and specificity scores for sgRNA designs targeting both the variant and the reference. Moreover, SNP-CRISPR provides the option to upload multiple SNPs and target single or multiple nearby base changes simultaneously with a single sgRNA design. Given these capabilities, SNP-CRISPR has a wide range of potential research applications in model systems and for design of sgRNAs for disease-associated variant correction.",2020-02-06 +22646299,An evaluation of the '5 Minute Medicine' video podcast series compared to conventional medical resources for the internal medicine clerkship.,"

Background

'5 Minute Medicine' (5MM) is a series of video podcasts, that in approximately 5 min, each explain a core objective of the internal medicine clerkship that all clinical clerks should understand. Video podcasts are accessible at www.5minutemedicine.com

Aim

The aim of this study was to investigate how well received 5MM video podcasts are as an educational tool for clinical clerks to use while on call.

Methods

Clinical clerks rotating through their internal medicine clerkship rotation were asked to use the 5MM video podcasts or conventional resources to prepare themselves prior to seeing patients. Questionnaires were distributed to students to determine effectiveness, appropriateness and time-efficiency of the resources students used.

Results

Students almost unanimously strongly agreed or agreed that the 5MM video podcasts were effective learning tools, appropriate for clinical clerks and time-efficient, more so than conventionally used resources. The vast majority of clerks selected the 5MM videos as their preferred resource of all resources available to them. Most clerks felt the 5MM videos were better than textbooks and conventional online resources.

Conclusion

Video podcasts such as the 5MM videos are welcomed as educational tools and may have a role in the future of undergraduate medical education.",2012-05-30 +29425325,Selenzyme: enzyme selection tool for pathway design.,"

Summary

Synthetic biology applies the principles of engineering to biology in order to create biological functionalities not seen before in nature. One of the most exciting applications of synthetic biology is the design of new organisms with the ability to produce valuable chemicals including pharmaceuticals and biomaterials in a greener; sustainable fashion. Selecting the right enzymes to catalyze each reaction step in order to produce a desired target compound is, however, not trivial. Here, we present Selenzyme, a free online enzyme selection tool for metabolic pathway design. The user is guided through several decision steps in order to shortlist the best candidates for a given pathway step. The tool graphically presents key information about enzymes based on existing databases and tools such as: similarity of sequences and of catalyzed reactions; phylogenetic distance between source organism and intended host species; multiple alignment highlighting conserved regions, predicted catalytic site, and active regions and relevant properties such as predicted solubility and transmembrane regions. Selenzyme provides bespoke sequence selection for automated workflows in biofoundries.

Availability and implementation

The tool is integrated as part of the pathway design stage into the design-build-test-learn SYNBIOCHEM pipeline. The Selenzyme web server is available at http://selenzyme.synbiochem.co.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-06-01 +32220918,Step-downs reduce workers' compensation payments to encourage return to work: are they effective?,"

Objective

To determine whether step-downs, which cut the rate of compensation paid to injured workers after they have been on benefits for several months, are effective as a return to work incentive.

Methods

We aggregated administrative claims data from seven Australian workers' compensation systems to calculate weekly scheme exit rates, a proxy for return to work. Jurisdictions were further subdivided into four injury subgroups: fractures, musculoskeletal, mental health and other trauma. The effect of step-downs on scheme exit was tested using a regression discontinuity design. Results were pooled into meta-analyses to calculate combined effects and the proportion of variance attributable to heterogeneity.

Results

The combined effect of step-downs was a 0.86 percentage point (95% CI -1.45 to -0.27) reduction in the exit rate, with significant heterogeneity between jurisdictions (I 2=68%, p=0.003). Neither timing nor magnitude of step-downs was a significant moderator of effects. Within injury subgroups, only fractures had a significant combined effect (-0.84, 95% CI -1.61 to -0.07). Sensitivity analysis indicated potential effects within mental health and musculoskeletal conditions as well.

Conclusions

The results suggest some workers' compensation recipients anticipate step-downs and exit the system early to avoid the reduction in income. However, the effects were small and suggest step-downs have marginal practical significance. We conclude that step-downs are generally ineffective as a return to work policy initiative.Postprint link: https://www.medrxiv.org/content/10.1101/19012286.",2020-03-27 +31534703,"Litter addition decreases plant diversity by suppressing seeding in a semiarid grassland, Northern China.","Plant community diversity is conducive to maintain the regional ecosystems stability and ecosystem services. Seed germination is one of the main ways to regulate plant diversity, owing to seedling recruitment as a basis for plant community renewal. However, the exact mechanism of how plant litter affects seedling recruitment, and species richness is not yet fully understood. Therefore, a litter addition and removal experiment was established in a semiarid grassland to study the effects of plant litter on seedling recruitment and species richness from April to August in 2016 and 2017 in Northern China. The positive correlation between species richness and seedling recruitment indicated that a guarantee of seedling recruitment was the main precondition to protect species richness. Adding rather than removing litter significantly reduced species richness. Litter addition inhibited species richness by directly increasing mechanical damage or indirectly reducing photosynthetically active radiation and seedling recruitment. The results of this study are conducive to understand the evolutionary and regulatory mechanisms of community species richness and seedling recruitment in grassland ecosystems after adding or removing plant litter.

Open research badges

This article has been awarded Open Data, Open Materials and Preregistered research design Badges. All materials and data are publicly accessible via the Open Science Framework at https://doi.org/10.5061/dryad.5dj3jg5 and http://doi.org/10.5061/dryad.13gj03s.",2019-08-15 +28849567,ArrayTrack: An FDA and Public Genomic Tool.,"A robust bioinformatics capability is widely acknowledged as central to realizing the promises of toxicogenomics. Successful application of toxicogenomic approaches, such as DNA microarrays, inextricably relies on appropriate data management, the ability to extract knowledge from massive amounts of data and the availability of functional information for data interpretation. At the FDA's National Center for Toxicological Research (NCTR), we are developing a public microarray data management and analysis software, called ArrayTrack that is also used in the routine review of genomic data submitted to the FDA. ArrayTrack stores a full range of information related to DNA microarrays and clinical and nonclinical studies as well as the digested data derived from proteomics and metabonomics experiments. In addition, ArrayTrack provides a rich collection of functional information about genes, proteins, and pathways drawn from various public biological databases for facilitating data interpretation. Many data analysis and visualization tools are available with ArrayTrack for individual platform data analysis, multiple omics data integration and integrated analysis of omics data with study data. Importantly, gene expression data, functional information, and analysis methods are fully integrated so that the data analysis and interpretation process is simplified and enhanced. Using ArrayTrack, users can select an analysis method from the ArrayTrack tool box, apply the method to selected microarray data and the analysis results can be directly linked to individual gene, pathway, and Gene Ontology analysis. ArrayTrack is publicly available online ( http://www.fda.gov/nctr/science/centers/toxicoinformatics/ArrayTrack/index.htm ), and the prospective user can also request a local installation version by contacting the authors.",2017-01-01 +30123079,Genome-wide Identification and Characterization of Enhancers Across 10 Human Tissues.,"Background: Enhancers can act as cis-regulatory elements (CREs) to control development and cellular function by regulating gene expression in a tissue-specific and ubiquitous manner. However, the regulatory network and characteristic of different types of enhancers (e.g., transcribed/non-transcribed enhancers, tissue-specific/ubiquitous enhancers) across multiple tissues are still unclear. Results: Here, a total of 53,924 active enhancers and 10,307 enhancer-associated RNAs (eRNAs) in 10 tissues (adrenal, brain, breast, heart, liver, lung, ovary, placenta, skeletal muscle and kidney) were identified through the integration of histone modifications (H3K4me1, H3K27ac and H3K4me3) and DNase I hypersensitive sites (DHSs) data. Moreover, 40,101 tissue-specific enhancers (TS-Enh), 1,241 ubiquitously expressed enhancers (UE-Enh) as well as transcribed enhancers (T-Enh), including 7,727 unidirectionally transcribed enhancers (1D-Enh) and 1,215 bidirectionally transcribed enhancers (2D-Enh) were defined in 10 tissues. The results show that enhancers exhibited high GC content, genomic variants and transcription factor binding sites (TFBS) enrichment in all tissues. These characteristics were significantly different between TS-Enh and UE-Enh, T-Enh and NT-Enh, 2D-Enh and 1D-Enh. Furt hermore, the results showed that enhancers obviously upregulate the expression of adjacent target genes which were remarkably correlated with the functions of corresponding tissues. Finally, a free user-friendly tissue-specific enhancer database, TiED (http://lcbb.swjtu.edu.cn/TiED), has been built to store, visualize, and confer these results. Conclusion: Genome-wide analysis of the regulatory network and characteristic of various types of enhancers showed that enhancers associated with TFs, eRNAs and target genes appeared in tissue specificity and function across different tissues.",2018-07-27 +30220203,Error-Free Data Visualization and Processing through imzML and mzML Validation.,"Open data formats are key to facilitating data processing, sharing, and integration. The imzML format ( http://imzml.org/ ) has drastically improved these aspects of mass spectrometry imaging data. Efficient processing of data depends on data sets which are consistent and adhere to the specifications; however, this is not always the case. Here we present a validation tool for data stored in both imzML and the HUPO-PSI mass spectrometery counterpart, mzML, to identify any deviations from the published (i)mzML standard which could cause issues for the user when visualizing or processing data. The tool is released in two forms, a graphical user interface (GUI) for ease of use, and a command line version to fit into existing workflows and pipelines. When certain known issues are encountered, such as the presence of negative values for the location of the binary data, the validator resolves the issue automatically upon saving. The GUI version of the validator also allows editing of the metadata included within the (i)mzML files in order to resolve inconsistencies. We also present a means of performing conditional validation on the metadata within (i)mzML files, where user-defined rules are validated against depending on whether specific metadata are present (or not). For example, if the MALDI term is present, then additional rules related to MALDI (such as the requirement of inclusion of laser parameters) can be validated against this. This enables a flexible and more thorough automated validation of (i)mzML data. Such a system is necessary for validating data against more comprehensive sets of metadata such as minimum reporting guidelines or metadata requirements prior to submission and acceptance of data to data repositories. We demonstrate how this tool can be used to validate against the proposed minimum reporting guidelines in MSI as well as institute specific metadata criteria. The validator tool is endorsed for validation of imzML ( http://imzml.org/ ) and mzML ( http://www.psidev.info/mzml ) and is made available through the respective Web sites. The validator is also released as open source under Mozilla Public License 2.0 at https://gitlab.com/imzML/imzMLValidator .",2018-10-30 +26871594,Laying a Community-Based Foundation for Data-Driven Semantic Standards in Environmental Health Sciences.,"

Background

Despite increasing availability of environmental health science (EHS) data, development, and implementation of relevant semantic standards, such as ontologies or hierarchical vocabularies, has lagged. Consequently, integration and analysis of information needed to better model environmental influences on human health remains a significant challenge.

Objectives

We aimed to identify a committed community and mechanisms needed to develop EHS semantic standards that will advance understanding about the impacts of environmental exposures on human disease.

Methods

The National Institute of Environmental Health Sciences sponsored the ""Workshop for the Development of a Framework for Environmental Health Science Language"" hosted at North Carolina State University on 15-16 September 2014. Through the assembly of data generators, users, publishers, and funders, we aimed to develop a foundation for enabling the development of community-based and data-driven standards that will ultimately improve standardization, sharing, and interoperability of EHS information.

Discussion

Creating and maintaining an EHS common language is a continuous and iterative process, requiring community building around research interests and needs, enabling integration and reuse of existing data, and providing a low barrier of access for researchers needing to use or extend such a resource.

Conclusions

Recommendations included developing a community-supported web-based toolkit that would enable a) collaborative development of EHS research questions and use cases, b) construction of user-friendly tools for searching and extending existing semantic resources, c) education and guidance about standards and their implementation, and d) creation of a plan for governance and sustainability.

Citation

Mattingly CJ, Boyles R, Lawler CP, Haugen AC, Dearry A, Haendel M. 2016. Laying a community-based foundation for data-driven semantic standards in environmental health sciences. Environ Health Perspect 124:1136-1140; http://dx.doi.org/10.1289/ehp.1510438.",2016-02-12 +31240256,SPHIRE-crYOLO is a fast and accurate fully automated particle picker for cryo-EM.,"Selecting particles from digital micrographs is an essential step in single-particle electron cryomicroscopy (cryo-EM). As manual selection of complete datasets-typically comprising thousands of particles-is a tedious and time-consuming process, numerous automatic particle pickers have been developed. However, non-ideal datasets pose a challenge to particle picking. Here we present the particle picking software crYOLO which is based on the deep-learning object detection system You Only Look Once (YOLO). After training the network with 200-2500 particles per dataset it automatically recognizes particles with high recall and precision while reaching a speed of up to five micrographs per second. Further, we present a general crYOLO network able to pick from previously unseen datasets, allowing for completely automated on-the-fly cryo-EM data preprocessing during data acquisition. crYOLO is available as a standalone program under http://sphire.mpg.de/ and is distributed as part of the image processing workflow in SPHIRE.",2019-06-19 +31217225,Current best practices in single-cell RNA-seq analysis: a tutorial.,"Single-cell RNA-seq has enabled gene expression to be studied at an unprecedented resolution. The promise of this technology is attracting a growing user base for single-cell analysis methods. As more analysis tools are becoming available, it is becoming increasingly difficult to navigate this landscape and produce an up-to-date workflow to analyse one's data. Here, we detail the steps of a typical single-cell RNA-seq analysis, including pre-processing (quality control, normalization, data correction, feature selection, and dimensionality reduction) and cell- and gene-level downstream analysis. We formulate current best-practice recommendations for these steps based on independent comparison studies. We have integrated these best-practice recommendations into a workflow, which we apply to a public dataset to further illustrate how these steps work in practice. Our documented case study can be found at https://www.github.com/theislab/single-cell-tutorial This review will serve as a workflow tutorial for new entrants into the field, and help established users update their analysis pipelines.",2019-06-19 +31264054,"Development of an Interactive Web Application ""Shiny App for Frequency Analysis on Homo sapiens Genome (SAFA-HsG)"".","The web application ""Shiny App for Frequency Analysis on Homo sapiens Genome (SAFA-HsG)"" was developed using R programming-based bioconductor packages and shiny framework. Through the app, preliminary descriptive data analysis on nucleotide frequency, and CpG island, CpG non-island, and CpG island shores and shelves (downstream and upstream) of human reference genome can be carried out, which will help biologists to work on human epigenomics. Table view of these analyses of all chromosomes can be visualized and downloaded by the end users. Similarly, the respective comparative plots can be used for CpG sites comparison. In addition, to introduce the personal genome project, the present study has done a preliminary work on few raw data and are included in the app, which will create interest on personal genome information. The app is hosted on https://SAFA-HsG.shinyapps.io/home/. It is a multi-platform application and can be initiated locally from any computer that has or has not installed R. It is a user-friendly interface, which will allow a biologist, even who has little computer knowledge to access and analyze further.",2019-07-01 +30715234,Multispectral tracing in densely labeled mouse brain with nTracer.,"SUMMARY:This note describes nTracer, an ImageJ plug-in for user-guided, semi-automated tracing of multispectral fluorescent tissue samples. This approach allows for rapid and accurate reconstruction of whole cell morphology of large neuronal populations in densely labeled brains. AVAILABILITY AND IMPLEMENTATION:nTracer was written as a plug-in for the open source image processing software ImageJ. The software, instructional documentation, tutorial videos, sample image and sample tracing results are available at https://www.cai-lab.org/ntracer-tutorial. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +24616562,Insect barcode information system.,"

Unlabelled

Insect Barcode Information System called as Insect Barcode Informática (IBIn) is an online database resource developed by the National Bureau of Agriculturally Important Insects, Bangalore. This database provides acquisition, storage, analysis and publication of DNA barcode records of agriculturally important insects, for researchers specifically in India and other countries. It bridges a gap in bioinformatics by integrating molecular, morphological and distribution details of agriculturally important insects. IBIn was developed using PHP/My SQL by using relational database management concept. This database is based on the client- server architecture, where many clients can access data simultaneously. IBIn is freely available on-line and is user-friendly. IBIn allows the registered users to input new information, search and view information related to DNA barcode of agriculturally important insects.This paper provides a current status of insect barcode in India and brief introduction about the database IBIn.

Availability

http://www.nabg-nbaii.res.in/barcode.",2014-02-19 +26149169,The Medicago sativa gene index 1.2: a web-accessible gene expression atlas for investigating expression differences between Medicago sativa subspecies.,"

Background

Alfalfa (Medicago sativa L.) is the primary forage legume crop species in the United States and plays essential economic and ecological roles in agricultural systems across the country. Modern alfalfa is the result of hybridization between tetraploid M. sativa ssp. sativa and M. sativa ssp. falcata. Due to its large and complex genome, there are few genomic resources available for alfalfa improvement.

Results

A de novo transcriptome assembly from two alfalfa subspecies, M. sativa ssp. sativa (B47) and M. sativa ssp. falcata (F56) was developed using Illumina RNA-seq technology. Transcripts from roots, nitrogen-fixing root nodules, leaves, flowers, elongating stem internodes, and post-elongation stem internodes were assembled into the Medicago sativa Gene Index 1.2 (MSGI 1.2) representing 112,626 unique transcript sequences. Nodule-specific and transcripts involved in cell wall biosynthesis were identified. Statistical analyses identified 20,447 transcripts differentially expressed between the two subspecies. Pair-wise comparisons of each tissue combination identified 58,932 sequences differentially expressed in B47 and 69,143 sequences differentially expressed in F56. Comparing transcript abundance in floral tissues of B47 and F56 identified expression differences in sequences involved in anthocyanin and carotenoid synthesis, which determine flower pigmentation. Single nucleotide polymorphisms (SNPs) unique to each M. sativa subspecies (110,241) were identified.

Conclusions

The Medicago sativa Gene Index 1.2 increases the expressed sequence data available for alfalfa by ninefold and can be expanded as additional experiments are performed. The MSGI 1.2 transcriptome sequences, annotations, expression profiles, and SNPs were assembled into the Alfalfa Gene Index and Expression Database (AGED) at http://plantgrn.noble.org/AGED/ , a publicly available genomic resource for alfalfa improvement and legume research.",2015-07-07 +31179198,An Emergent Space for Distributed Data with Hidden Internal Order through Manifold Learning.,"Manifold-learning techniques are routinely used in mining complex spatiotemporal data to extract useful, parsimonious data representations/parametrizations; these are, in turn, useful in nonlinear model identification tasks. We focus here on the case of time series data that can ultimately be modelled as a spatially distributed system (e.g. a partial differential equation, PDE), but where we do not know the space in which this PDE should be formulated. Hence, even the spatial coordinates for the distributed system themselves need to be identified - to ""emerge from""-the data mining process. We will first validate this ""emergent space"" reconstruction for time series sampled without space labels in known PDEs; this brings up the issue of observability of physical space from temporal observation data, and the transition from spatially resolved to lumped (order-parameter-based) representations by tuning the scale of the data mining kernels. We will then present actual emergent space ""discovery"" illustrations. Our illustrative examples include chimera states (states of coexisting coherent and incoherent dynamics), and chaotic as well as quasiperiodic spatiotemporal dynamics, arising in partial differential equations and/or in heterogeneous networks. We also discuss how data-driven ""spatial"" coordinates can be extracted in ways invariant to the nature of the measuring instrument. Such gauge-invariant data mining can go beyond the fusion of heterogeneous observations of the same system, to the possible matching of apparently different systems. For an older version of this article, including other examples, see https://arxiv.org/abs/1708.05406.",2018-11-22 +31646905,"Iridoid glucosides and triterpene acids from Phlomis linearifolia, growing in Uzbekistan and its hepatoprotective activity.","A number of iridoids and triterpene acids, such as pulchelloside, sesamoside, shanshiside methyl ester, barlerin, gypsogenin acid and acetate of gypsogenic acid were isolated from the aerial part of Phlomis linearifolia and their structures were confirmed by NMR, mass and IR spectroscopy. In addition, the hepatoprotective potential of iridoid fraction from P. linearifolia aerial parts was tested against CCl4 induced fibrosis in rats. The iridoid fraction not only prevented the manifestation of the hepatotoxic effect of CCl4, but rather quickly eliminated the effects of developing intoxication. The hepatoprotective activity of the SI was confirmed to be effective and exceeds knows drug carsil.Supplemental data for this article can be accessed at https://doi.org/10.1080/14786419.2019.1677650.",2019-10-24 +28918286,Identifying combinatorial biomarkers by association rule mining in the CAMD Alzheimer's database.,"The concept of combinatorial biomarkers was conceived when it was noticed that simple biomarkers are often inadequate for recognizing and characterizing complex diseases. Here we present an algorithmic search method for complex biomarkers which may predict or indicate Alzheimer's disease (AD) and other kinds of dementia. We show that our method is universal since it can describe any Boolean function for biomarker discovery. We applied data mining techniques that are capable to uncover implication-like logical schemes with detailed quality scoring. The new SCARF program was applied for the Tucson, Arizona based Critical Path Institute's CAMD database, containing laboratory and cognitive test data for 5821 patients from the placebo arm of clinical trials of large pharmaceutical companies, and consequently, the data is much more reliable than numerous other databases for dementia. The results of our study on this larger than 5800-patient cohort suggest beneficial effects of high B12 vitamin level, negative effects of high sodium levels or high AST (aspartate aminotransferase) liver enzyme levels to cognition. As an example for a more complex and quite surprising rule: Low or normal blood glucose level with either low cholesterol or high serum sodium would also increase the probability of bad cognition with a 3.7 multiplier. The source code of the new SCARF program is publicly available at http://pitgroup.org/static/scarf.zip.",2017-08-17 +30044231,A Suspect Screening Method for Characterizing Multiple Chemical Exposures among a Demographically Diverse Population of Pregnant Women in San Francisco.,"

Background

In utero exposure to environmental chemicals can adversely impact pregnancy outcomes and childhood health, but minimal biomonitoring data exist on the majority of chemicals used in commerce.

Objectives

We aimed to profile exposure to multiple environmental organic acids (EOAs) and identify novel chemicals that have not been previously biomonitored in a diverse population of pregnant women.

Methods

We used liquid chromatography-quadrupole time-of-flight mass spectrometry (LC-QTOF/MS) to perform a suspect screen for 696 EOAs, (e.g., phenols and phthalate metabolites) on the maternal serum collected at delivery from 75 pregnant women delivering at two large San Francisco Hospitals. We examined demographic differences in peak areas and detection frequency (DF) of suspect EOAs using a Kruskal-Wallis Rank Sum test or Fisher's exact test. We confirmed selected suspects by comparison with their respective reference standards.

Results

We detected, on average, 56 [standard deviation (SD)]: 8) suspect EOAs in each sample (range: 32-73). Twelve suspect EOAs with DF≥60 were matched to 21 candidate compounds in our EOA database, two-thirds of which are novel chemicals. We found demographic differences in DF for 13 suspect EOAs and confirmed the presence of 6 priority novel chemicals: 2,4-Di-tert-butylphenol, Pyrocatechol, 2,4-Dinitrophenol, 3,5-Di-tert-butylsalicylic acid, 4-Hydroxycoumarin, and 2'-Hydroxyacetophenone (or 3'-Hydroxyacetophenone). The first two are high-production-volume chemicals in the United States.

Conclusion

Suspect screening in human biomonitoring provides a viable method to characterize a broad spectrum of environmental chemicals to prioritize for targeted method development and quantification. https://doi.org/10.1289/EHP2920.",2018-07-24 +31173067,vulcanSpot: a tool to prioritize therapeutic vulnerabilities in cancer.,"

Motivation

Genetic alterations lead to tumor progression and cell survival but also uncover cancer-specific vulnerabilities on gene dependencies that can be therapeutically exploited.

Results

vulcanSpot is a novel computational approach implemented to expand the therapeutic options in cancer beyond known-driver genes unlocking alternative ways to target undruggable genes. The method integrates genome-wide information provided by massive screening experiments to detect genetic vulnerabilities associated to tumors. Then, vulcanSpot prioritizes drugs to target cancer-specific gene dependencies using a weighted scoring system based on well known drug-gene relationships and drug repositioning strategies.

Availability and implementation

http://www.vulcanspot.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +31086968,Variant Score Ranker-a web application for intuitive missense variant prioritization.,"

Motivation

The correct classification of missense variants as benign or pathogenic remains challenging. Pathogenic variants are expected to have higher deleterious prediction scores than benign variants in the same gene. However, most of the existing variant annotation tools do not reference the score range of benign population variants on gene level.

Results

We present a web-application, Variant Score Ranker, which enables users to rapidly annotate variants and perform gene-specific variant score ranking on the population level. We also provide an intuitive example of how gene- and population-calibrated variant ranking scores can improve epilepsy variant prioritization.

Availability and implementation

http://vsranker.broadinstitute.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +32582302,Lack of Association Between Genetic Variants at ACE2 and TMPRSS2 Genes Involved in SARS-CoV-2 Infection and Human Quantitative Phenotypes.,"Coronavirus disease 2019 (COVID-19) shows a wide variation in expression and severity of symptoms, from very mild or no symptoms, to flu-like symptoms, and in more severe cases, to pneumonia, acute respiratory distress syndrome, and even death. Large differences in outcome have also been observed between males and females. The causes for this variability are likely to be multifactorial, and to include genetics. The SARS-CoV-2 virus responsible for the infection depends on two human genes: the human receptor angiotensin converting enzyme 2 (ACE2) for cell invasion, and the serine protease TMPRSS2 for S protein priming. Genetic variation in these two genes may thus modulate an individual's genetic predisposition to infection and virus clearance. While genetic data on COVID-19 patients is being gathered, we carried out a phenome-wide association scan (PheWAS) to investigate the role of these genes in other human phenotypes in the general population. We examined 178 quantitative phenotypes including cytokines and cardio-metabolic biomarkers, as well as usage of 58 medications in 36,339 volunteers from the Lifelines population cohort, in relation to 1,273 genetic variants located in or near ACE2 and TMPRSS2. While none reached our threshold for significance, we observed several interesting suggestive associations. For example, single nucleotide polymorphisms (SNPs) near the TMPRSS2 genes were associated with thrombocytes count (p = 1.8 × 10-5). SNPs within the ACE2 gene were associated with (1) the use of angiotensin II receptor blockers (ARBs) combination therapies (p = 5.7 × 10-4), an association that is significantly stronger in females (p dif f = 0.01), and (2) with the use of non-steroid anti-inflammatory and antirheumatic products (p = 5.5 × 10-4). While these associations need to be confirmed in larger sample sizes, they suggest that these variants could play a role in diseases such as thrombocytopenia, hypertension, and chronic inflammation that are often observed in the more severe COVID-19 cases. Further investigation of these genetic variants in the context of COVID-19 is thus promising for better understanding of disease variability. Full results are available at https://covid19research.nl.",2020-06-08 +29774137,Saudi anti-human cancer plants database (SACPD): A collection of plants with anti-human cancer activities.,"Several anticancer drugs have been developed from natural products such as plants. Successful experiments in inhibiting the growth of human cancer cell lines using Saudi plants were published over the last three decades. Up to date, there is no Saudi anticancer plants database as a comprehensive source for the interesting data generated from these experiments. Therefore, there was a need for creating a database to collect, organize, search and retrieve such data. As a result, the current paper describes the generation of the Saudi anti-human cancer plants database (SACPD). The database contains most of the reported information about the naturally growing Saudi anticancer plants. SACPD comprises the scientific and local names of 91 plant species that grow naturally in Saudi Arabia. These species belong to 38 different taxonomic families. In Addition, 18 species that represent16 family of medicinal plants and are intensively sold in the local markets in Saudi Arabia were added to the database. The website provides interesting details, including plant part containing the anticancer bioactive compounds, plants locations and cancer/cell type against which they exhibit their anticancer activity. Our survey revealed that breast, liver and leukemia were the most studied cancer cell lines in Saudi Arabia with percentages of 27%, 19% and 15%, respectively. The current SACPD represents a nucleus around which more development efforts can expand to accommodate all future submissions about new Saudi plant species with anticancer activities. SACPD will provide an excellent starting point for researchers and pharmaceutical companies who are interested in developing new anticancer drugs. SACPD is available online at https://teeqrani1.wixsite.com/sapd.",2018-01-01 +31106350,Software Application Profile: The Anchored Multiplier calculator-a Bayesian tool to synthesize population size estimates.,"Estimating the number of people in hidden populations is needed for public health research, yet available methods produce highly variable and uncertain results. The Anchored Multiplier calculator uses a Bayesian framework to synthesize multiple population size estimates to generate a consensus estimate. Users submit point estimates and lower/upper bounds which are converted to beta probability distributions and combined to form a single posterior probability distribution. The Anchored Multiplier calculator is available as a web browser-based application. The software allows for unlimited empirical population size estimates to be submitted and combined according to Bayes Theorem to form a single estimate. The software returns output as a forest plot (to visually compare data inputs and the final Anchored Multiplier estimate) and a table that displays results as population percentages and counts. The web application 'Anchored Multiplier Calculator' is free software and is available at [http://globalhealthsciences.ucsf.edu/resources/tools] or directly at [http://anchoredmultiplier.ucsf.edu/].",2019-12-01 +24705206,FixPred: a resource for correction of erroneous protein sequences.,"Protein databases are heavily contaminated with erroneous (mispredicted, abnormal and incomplete) sequences and these erroneous data significantly distort the conclusions drawn from genome-scale protein sequence analyses. In our earlier work we described the MisPred resource that serves to identify erroneous sequences; here we present the FixPred computational pipeline that automatically corrects sequences identified by MisPred as erroneous. The current version of the associated FixPred database contains corrected UniProtKB/Swiss-Prot and NCBI/RefSeq sequences from Homo sapiens, Mus musculus, Rattus norvegicus, Monodelphis domestica, Gallus gallus, Xenopus tropicalis, Danio rerio, Fugu rubripes, Ciona intestinalis, Branchostoma floridae, Drosophila melanogaster and Caenorhabditis elegans; future releases of the FixPred database will include corrected sequences of additional Metazoan species. The FixPred computational pipeline and database (http://www.fixpred.com) are easily accessible through a simple web interface coupled to a powerful query engine and a standard web service. The content is completely or partially downloadable in a variety of formats. Database URL: http://www.fixpred.com.",2014-04-04 +28509974,Decoding the complex genetic causes of heart diseases using systems biology.,"The pace of disease gene discovery is still much slower than expected, even with the use of cost-effective DNA sequencing and genotyping technologies. It is increasingly clear that many inherited heart diseases have a more complex polygenic aetiology than previously thought. Understanding the role of gene-gene interactions, epigenetics, and non-coding regulatory regions is becoming increasingly critical in predicting the functional consequences of genetic mutations identified by genome-wide association studies and whole-genome or exome sequencing. A systems biology approach is now being widely employed to systematically discover genes that are involved in heart diseases in humans or relevant animal models through bioinformatics. The overarching premise is that the integration of high-quality causal gene regulatory networks (GRNs), genomics, epigenomics, transcriptomics and other genome-wide data will greatly accelerate the discovery of the complex genetic causes of congenital and complex heart diseases. This review summarises state-of-the-art genomic and bioinformatics techniques that are used in accelerating the pace of disease gene discovery in heart diseases. Accompanying this review, we provide an interactive web-resource for systems biology analysis of mammalian heart development and diseases, CardiacCode ( http://CardiacCode.victorchang.edu.au/ ). CardiacCode features a dataset of over 700 pieces of manually curated genetic or molecular perturbation data, which enables the inference of a cardiac-specific GRN of 280 regulatory relationships between 33 regulator genes and 129 target genes. We believe this growing resource will fill an urgent unmet need to fully realise the true potential of predictive and personalised genomic medicine in tackling human heart disease.",2014-12-10 +31540495,Interleukin4Rα (IL4Rα) and IL13Rα1 Are Associated with the Progress of Renal Cell Carcinoma through Janus Kinase 2 (JAK2)/Forkhead Box O3 (FOXO3) Pathways. ,"Specific kinds of interleukin (IL) receptors are known to mediate lymphocyte proliferation and survival. However, recent reports have suggested that the high expression of IL4Rα and IL13Rα1 in tumor tissue might be associated with tumorigenesis in several kinds of tumor. We found that a significant association between mRNA level of IL4Rα or IL13Rα1 and the poor prognosis of renal cell carcinoma (RCC) from the public database (http://www.oncolnc.org/). Then, we evaluated the clinicopathological significance of the immunohistochemical expression of IL4Rα and IL13Rα1 in 199 clear cell RCC (CCRCC) patients. The individual and co-expression patterns of IL4Rα and IL13Rα1 were significantly associated with cancer-specific survival (CSS) and relapse-free survival (RFS) in univariate analysis. Multivariate analysis indicated IL4Rα-positivity and co-expression of IL4Rα and IL13Rα1 as the independent indicators of shorter CSS and RFS of CCRCC patients. For the in vitro evaluation of the oncogenic role of IL4Rα and IL13Rα1 in RCC, we knock-downed IL4Rα or IL13Rα1 and observed that the cell proliferation rate was decreased, and the apoptosis rate was increased in A498 and ACHN cells. Furthermore, we examined the possible role of Janus kinase 2 (JAK2), well-known down-stream tyrosine kinase under the heterodimeric receptor complex of IL4Rα and IL13Rα1. Interestingly, JAK2 interacted with Forkhead box O3 (FOXO3) to cause tyrosine-phosphorylation of FOXO3. Silencing IL4Rα or JAK2 in A498 and ACHN cells reduced the interaction between JAK2 and FOXO3. Moreover, pharmacological inhibition of JAK2 induced the nuclear localization of FOXO3, leading to increase apoptosis and decrease cell proliferation rate in A498 and ACHN cells. Taken together, these results suggest that IL4Rα and IL13Rα1 might be involved in the progression of RCC through JAK2/FOXO3 pathway, and their expression might be used as the novel prognostic factor and therapeutic target for RCC patients.",2019-09-18 +30645178,Scalable and Flexible Unsupervised Feature Selection.,"Recently, graph-based unsupervised feature selection algorithms (GUFS) have been shown to efficiently handle prevalent high-dimensional unlabeled data. One common drawback associated with existing graph-based approaches is that they tend to be time-consuming and in need of large storage, especially when faced with the increasing size of data. Research has started using anchors to accelerate graph-based learning model for feature selection, while the hard linear constraint between the data matrix and the lower-dimensional representation is usually overstrict in many applications. In this letter, we propose a flexible linearization model with anchor graph and 21 -norm regularization, which can deal with large-scale data sets and improve the performance of the existing anchor-based method. In addition, the anchor-based graph Laplacian is constructed to characterize the manifold embedding structure by means of a parameter-free adaptive neighbor assignment strategy. An efficient iterative algorithm is developed to address the optimization problem, and we also prove the convergence of the algorithm. Experiments on several public data sets demonstrate the effectiveness and efficiency of the method we propose.",2019-01-15 +29220485,funRiceGenes dataset for comprehensive understanding and application of rice functional genes.,"

Background

As a main staple food, rice is also a model plant for functional genomic studies of monocots. Decoding of every DNA element of the rice genome is essential for genetic improvement to address increasing food demands. The past 15 years have witnessed extraordinary advances in rice functional genomics. Systematic characterization and proper deposition of every rice gene are vital for both functional studies and crop genetic improvement.

Findings

We built a comprehensive and accurate dataset of ∼2800 functionally characterized rice genes and ∼5000 members of different gene families by integrating data from available databases and reviewing every publication on rice functional genomic studies. The dataset accounts for 19.2% of the 39 045 annotated protein-coding rice genes, which provides the most exhaustive archive for investigating the functions of rice genes. We also constructed 214 gene interaction networks based on 1841 connections between 1310 genes. The largest network with 762 genes indicated that pleiotropic genes linked different biological pathways. Increasing degree of conservation of the flowering pathway was observed among more closely related plants, implying substantial value of rice genes for future dissection of flowering regulation in other crops. All data are deposited in the funRiceGenes database (https://funricegenes.github.io/). Functionality for advanced search and continuous updating of the database are provided by a Shiny application (http://funricegenes.ncpgr.cn/).

Conclusions

The funRiceGenes dataset would enable further exploring of the crosslink between gene functions and natural variations in rice, which can also facilitate breeding design to improve target agronomic traits of rice.",2018-01-01 +31193141,Comparative lipid profiling dataset of the inflammation-induced optic nerve regeneration.,"In adult mammals, retinal ganglion cells (RGCs) fail to regenerate following damage. As a result, RGCs die after acute injury and in progressive degenerative diseases such as glaucoma; this can lead to permanent vision loss and, eventually, blindness. Lipids are crucial for the development and maintenance of cell membranes, myelin sheaths, and cellular signaling pathways, however, little is known about their role in axon injury and repair. Studies examining changes to the lipidome during optic nerve (ON) regeneration could greatly inform treatment strategies, yet these are largely lacking. Experimental animal models of ON regeneration have facilitated the exploration of the molecular determinants that affect RGC axon regeneration. Here, we analyzed lipid profiles of the ON and retina in an ON crush rat model using liquid chromatography-mass spectrometry. Furthermore, we investigated lipidome changes after ON crush followed by intravitreal treatment with Zymosan, a yeast cell wall derivative known to enhance RGC regeneration. This data is available at the NIH Common Fund's Metabolomics Data Repository and Coordinating Center (supported by NIH grant, U01-DK097430) website, the Metabolomics Workbench, http://www.metabolomicsworkbench.org, where it has been assigned Project ID: PR000661. The data can be accessed directly via it's Project DOI: doi: 10.21,228/M87D53.",2019-04-25 +30759247,ST-Steiner: a spatio-temporal gene discovery algorithm.,"

Motivation

Whole exome sequencing (WES) studies for autism spectrum disorder (ASD) could identify only around six dozen risk genes to date because the genetic architecture of the disorder is highly complex. To speed the gene discovery process up, a few network-based ASD gene discovery algorithms were proposed. Although these methods use static gene interaction networks, functional clustering of genes is bound to evolve during neurodevelopment and disruptions are likely to have a cascading effect on the future associations. Thus, approaches that disregard the dynamic nature of neurodevelopment are limited.

Results

Here, we present a spatio-temporal gene discovery algorithm, which leverages information from evolving gene co-expression networks of neurodevelopment. The algorithm solves a prize-collecting Steiner forest-based problem on co-expression networks, adapted to model neurodevelopment and transfer information from precursor neurodevelopmental windows. The decisions made by the algorithm can be traced back, adding interpretability to the results. We apply the algorithm on ASD WES data of 3871 samples and identify risk clusters using BrainSpan co-expression networks of early- and mid-fetal periods. On an independent dataset, we show that incorporation of the temporal dimension increases the predictive power: predicted clusters are hit more and show higher enrichment in ASD-related functions compared with the state-of-the-art.

Availability and implementation

The code is available at http://ciceklab.cs.bilkent.edu.tr/st-steiner.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +28437484,"ECG-ViEW II, a freely accessible electrocardiogram database.","The Electrocardiogram Vigilance with Electronic data Warehouse II (ECG-ViEW II) is a large, single-center database comprising numeric parameter data of the surface electrocardiograms of all patients who underwent testing from 1 June 1994 to 31 July 2013. The electrocardiographic data include the test date, clinical department, RR interval, PR interval, QRS duration, QT interval, QTc interval, P axis, QRS axis, and T axis. These data are connected with patient age, sex, ethnicity, comorbidities, age-adjusted Charlson comorbidity index, prescribed drugs, and electrolyte levels. This longitudinal observational database contains 979,273 electrocardiograms from 461,178 patients over a 19-year study period. This database can provide an opportunity to study electrocardiographic changes caused by medications, disease, or other demographic variables. ECG-ViEW II is freely available at http://www.ecgview.org.",2017-04-24 +30203252,Risk of Adverse Drug Events Observed with Baricitinib 2 mg Versus Baricitinib 4 mg Once Daily for the Treatment of Rheumatoid Arthritis: A Systematic Review and Meta-Analysis of Randomized Controlled Trials.,"

Background

On 23 April 2018, the Food and Drug Administration-based Advisory Committee approved the use of baricitinib 2 mg for the treatment of rheumatoid arthritis and suggested the possibility of serious adverse events associated with baricitinib 4 mg. Hence, we aimed to systematically compare the risk of adverse drug events observed with baricitinib 2 mg versus 4 mg for the treatment of patients with rheumatoid arthritis.

Methods

Electronic databases including the Cochrane library, MEDLINE, EMBASE and http://www.ClinicalTrials.gov were searched for relevant English publications until April 2018. Adverse drug events at 12 weeks and 24 weeks were considered as the clinical endpoints. RevMan 5.3 software was used to analyze the data whereby risk ratios (RR) and 95% confidence intervals (CI) were calculated.

Results

Four trials consisting of a total of 959 participants were included in this analysis. At 12 weeks, no significant difference was observed between 2 mg and 4 mg baricitinib for serious adverse events (RR 1.33; 95% CI 0.63-2.78; p = 0.46), any adverse events after the start of therapy (RR 1.09; 95% CI 0.98-1.21; p = 0.13), discontinuation of drugs due to adverse events (RR 1.19; 95% CI 0.61-2.34; p = 0.60), malignancies (RR 3.03; 95% CI 0.12-73.90; p = 0.50), and major adverse cardiac events (RR 2.95; 95% CI 0.12-71.91; p = 0.51). Infections including herpes zoster infections and serious infections were also similarly manifested. At 24 weeks, serious adverse events (RR 1.84; 95% CI 1.02-3.30; p = 0.04) were significantly higher with baricitinib 4 mg compared with the 2-mg dosage. However, total adverse events after the start of therapy, discontinuation of drug due to adverse events, malignancies, major adverse cardiac events, infections including herpes zoster, and serious infections were not significantly different between the two doses.

Conclusions

No significant differences in adverse drug events were observed between baricitinib 2 mg and 4 mg at 12 weeks' follow-up. However, this analysis showed the risk of serious adverse events to be significantly higher with baricitinib 4 mg compared with baricitinib 2 mg at 24 weeks' follow-up. This hypothesis should be confirmed in larger trials with longer follow-up time periods.",2018-10-01 +31532508,"MUFold-SSW: a new web server for predicting protein secondary structures, torsion angles and turns.","

Motivation

Protein secondary structure and backbone torsion angle prediction can provide important information for predicting protein 3D structures and protein functions. Our new methods MUFold-SS, MUFold-Angle, MUFold-BetaTurn and MUFold-GammaTurn, developed based on advanced deep neural networks, achieved state-of-the-art performance for predicting secondary structures, backbone torsion angles, beta-turns and gamma-turns, respectively. An easy-to-use web service will provide the community a convenient way to use these methods for research and development.

Results

MUFold-SSW, a new web server, is presented. It provides predictions of protein secondary structures, torsion angles, beta-turns and gamma-turns for a given protein sequence. This server implements MUFold-SS, MUFold-Angle, MUFold-BetaTurn and MUFold-GammaTurn, which performed well for both easy targets (proteins with weak sequence similarity in PDB) and hard targets (proteins without detectable similarity in PDB) in various experimental tests, achieving results better than or comparable with those of existing methods.

Availability and implementation

MUFold-SSW is accessible at http://mufold.org/mufold-ss-angle.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +31660265,"Unfold: an integrated toolbox for overlap correction, non-linear modeling, and regression-based EEG analysis.","Electrophysiological research with event-related brain potentials (ERPs) is increasingly moving from simple, strictly orthogonal stimulation paradigms towards more complex, quasi-experimental designs and naturalistic situations that involve fast, multisensory stimulation and complex motor behavior. As a result, electrophysiological responses from subsequent events often overlap with each other. In addition, the recorded neural activity is typically modulated by numerous covariates, which influence the measured responses in a linear or non-linear fashion. Examples of paradigms where systematic temporal overlap variations and low-level confounds between conditions cannot be avoided include combined electroencephalogram (EEG)/eye-tracking experiments during natural vision, fast multisensory stimulation experiments, and mobile brain/body imaging studies. However, even ""traditional,"" highly controlled ERP datasets often contain a hidden mix of overlapping activity (e.g., from stimulus onsets, involuntary microsaccades, or button presses) and it is helpful or even necessary to disentangle these components for a correct interpretation of the results. In this paper, we introduce unfold, a powerful, yet easy-to-use MATLAB toolbox for regression-based EEG analyses that combines existing concepts of massive univariate modeling (""regression-ERPs""), linear deconvolution modeling, and non-linear modeling with the generalized additive model into one coherent and flexible analysis framework. The toolbox is modular, compatible with EEGLAB and can handle even large datasets efficiently. It also includes advanced options for regularization and the use of temporal basis functions (e.g., Fourier sets). We illustrate the advantages of this approach for simulated data as well as data from a standard face recognition experiment. In addition to traditional and non-conventional EEG/ERP designs, unfold can also be applied to other overlapping physiological signals, such as pupillary or electrodermal responses. It is available as open-source software at http://www.unfoldtoolbox.org.",2019-10-24 +27161011,BioCreative V CDR task corpus: a resource for chemical disease relation extraction. ,"Community-run, formal evaluations and manually annotated text corpora are critically important for advancing biomedical text-mining research. Recently in BioCreative V, a new challenge was organized for the tasks of disease named entity recognition (DNER) and chemical-induced disease (CID) relation extraction. Given the nature of both tasks, a test collection is required to contain both disease/chemical annotations and relation annotations in the same set of articles. Despite previous efforts in biomedical corpus construction, none was found to be sufficient for the task. Thus, we developed our own corpus called BC5CDR during the challenge by inviting a team of Medical Subject Headings (MeSH) indexers for disease/chemical entity annotation and Comparative Toxicogenomics Database (CTD) curators for CID relation annotation. To ensure high annotation quality and productivity, detailed annotation guidelines and automatic annotation tools were provided. The resulting BC5CDR corpus consists of 1500 PubMed articles with 4409 annotated chemicals, 5818 diseases and 3116 chemical-disease interactions. Each entity annotation includes both the mention text spans and normalized concept identifiers, using MeSH as the controlled vocabulary. To ensure accuracy, the entities were first captured independently by two annotators followed by a consensus annotation: The average inter-annotator agreement (IAA) scores were 87.49% and 96.05% for the disease and chemicals, respectively, in the test set according to the Jaccard similarity coefficient. Our corpus was successfully used for the BioCreative V challenge tasks and should serve as a valuable resource for the text-mining research community.Database URL: http://www.biocreative.org/tasks/biocreative-v/track-3-cdr/.",2016-05-09 +26578587,BioGPS: building your own mash-up of gene annotations and expression profiles.,"BioGPS (http://biogps.org) is a centralized gene-annotation portal that enables researchers to access distributed gene annotation resources. This article focuses on the updates to BioGPS since our last paper (2013 database issue). The unique features of BioGPS, compared to those of other gene portals, are its community extensibility and user customizability. Users contribute the gene-specific resources accessible from BioGPS ('plugins'), which helps ensure that the resource collection is always up-to-date and that it will continue expanding over time (since the 2013 paper, 162 resources have been added, for a 34% increase in the number of resources available). BioGPS users can create their own collections of relevant plugins and save them as customized gene-report pages or 'layouts' (since the 2013 paper, 488 user-created layouts have been added, for a 22% increase in the number of layouts). In addition, we recently updated the most popular plugin, the 'Gene expression/activity chart', to include ∼ 6000 datasets (from ∼ 2000 datasets) and we enhanced user interactivity. We also added a new 'gene list' feature that allows users to save query results for future reference.",2015-11-17 +24839966,Pancreatic Cancer Database: an integrative resource for pancreatic cancer.,"Pancreatic cancer is the fourth leading cause of cancer-related death in the world. The etiology of pancreatic cancer is heterogeneous with a wide range of alterations that have already been reported at the level of the genome, transcriptome, and proteome. The past decade has witnessed a large number of experimental studies using high-throughput technology platforms to identify genes whose expression at the transcript or protein levels is altered in pancreatic cancer. Based on expression studies, a number of molecules have also been proposed as potential biomarkers for diagnosis and prognosis of this deadly cancer. Currently, there are no repositories which provide an integrative view of multiple Omics data sets from published research on pancreatic cancer. Here, we describe the development of a web-based resource, Pancreatic Cancer Database (http://www.pancreaticcancerdatabase.org), as a unified platform for pancreatic cancer research. PCD contains manually curated information pertaining to quantitative alterations in miRNA, mRNA, and proteins obtained from small-scale as well as high-throughput studies of pancreatic cancer tissues and cell lines. We believe that PCD will serve as an integrative platform for scientific community involved in pancreatic cancer research.",2014-05-19 +31220804,Investigation and development of maize fused network analysis with multi-omics.,"Maize is a critically important staple crop in the whole world, which has contributed to both economic security and food in planting areas. The main target for researchers and breeding is the improvement of maize quality and yield. The use of computational biology methods combined with multi-omics for selecting biomolecules of interest for maize breeding has been receiving more attention. Moreover, the rapid growth of high-throughput sequencing data provides the opportunity to explore biomolecules of interest at the molecular level in maize. Furthermore, we constructed weighted networks for each of the omics and then integrated them into a final fused weighted network based on a nonlinear combination method. We also analyzed the final fused network and mined the orphan nodes, some of which were shown to be transcription factors that played a key role in maize development. This study could help to improve maize production via insights at the multi-omics level and provide a new perspective for maize researchers. All related data have been released at http://lab.malab.cn/∼jj/maize.htm.",2019-06-15 +31525302,Speech Perception Skills of Children With Speech Sound Disorders: A Systematic Review and Meta-Analysis.,"Purpose The aim of this study was to conduct a systematic review and meta-analysis to investigate whether preschool- and early school-age children with speech sound disorders (SSDs) have difficulties with speech perception. Method Systematic searching of 8 electronic databases identified 73 eligible studies across 71 articles examining the speech perception skills of children with SSDs. The findings and methodological characteristics of each study were reviewed, and the reporting of methodological information in each article was rated. A meta-analysis was conducted with studies that used the most common type of speech perception assessment task-lexical and/or phonetic judgment tasks. Results Across 60 of 73 studies, some or all children with SSDs were reported to have difficulties with speech perception. The meta-analysis showed a significant difference between children with SSDs and children with typically developing speech on lexical and/or phonetic judgment tasks. Conclusion Results from the meta-analysis demonstrate that children with SSDs have difficulties with speech perception. This appears to be the case for some but not all children with SSDs. The findings from this systematic review and meta-analysis also provide insight into the complex range of methodological issues involved in the study of speech perception in children with SSDs and the need for further research. Supplemental Material https://doi.org/10.23641/asha.9808361.",2019-09-16 +31536845,Analysis of Open Payments Receipts Among Surgical Faculty at a Large Academic Institution.,"

Background

Section 6002 of the Affordable Care Act, commonly referred to as ""The Sunshine Act,"" is legislation designed to provide transparency to the relationship between physicians and industry. Since 2013, medical product and pharmaceutical manufacturers were required to report any payments made to physicians to the Centers for Medicare and Medicaid Services (CMS). We predicted that most clinical faculty at our institution would be found on the Open Payments website. We elected to investigate payments in relationship to divisions within the department of surgery and the level of professorship.

Methods

All clinical faculty (n = 86) within the department of surgery at our institution were searched within the database: https://openpaymentsdata.cms.gov/. The total amount of payments, number of payments, and the nature of payments (food and beverage, travel and lodging, consulting, education, speaking, entertainment, gifts and honoraria) were recorded for 2017. Comparison by unpaired t-test (or ANOVA) where applicable, significance defined as P < 0.05.

Results

Of the 86 faculty studied, 75% were found within the CMS Open Payments database in 2017. The mean amount of payment was $4024 (range $13-152,215). Median amount of payment was $434.90 (range $12.75-152,214.70). Faculty receiving outside compensation varied significantly by division and academic rank (P < 0.05). Plastic surgery had the highest percentage of people receiving any form of payment ($143-$1912) and GI surgery had the largest payments associated with device management ($0-$152,215). The variation seen by rank was driven by a small number of faculty with receipt of large payments at the associate professor level. The median amount of payment was $428.53 (range $13.97-2306.05) for assistant professors, $5328.03 (range $28.30-152,214.70) for Associate Professors, and $753.82 (range $12.75-17,708.65) for full professors.

Conclusions

Reporting of open payments to CMS provides transparency between physicians and industry. The significant relationship of division and rank with open payments database is driven by relatively few faculty. The majority (94%) received either no payments or less than $10,000.",2019-09-16 +31536775,"Starch-binding domains as CBM families-history, occurrence, structure, function and evolution.","The term ""starch-binding domain"" (SBD) has been applied to a domain within an amylolytic enzyme that gave the enzyme the ability to bind onto raw, i.e. thermally untreated, granular starch. An SBD is a special case of a carbohydrate-binding domain, which in general, is a structurally and functionally independent protein module exhibiting no enzymatic activity but possessing potential to target the catalytic domain to the carbohydrate substrate to accommodate it and process it at the active site. As so-called families, SBDs together with other carbohydrate-binding modules (CBMs) have become an integral part of the CAZy database (http://www.cazy.org/). The first two well-described SBDs, i.e. the C-terminal Aspergillus-type and the N-terminal Rhizopus-type have been assigned the families CBM20 and CBM21, respectively. Currently, among the 85 established CBM families in CAZy, fifteen can be considered as families having SBD functional characteristics: CBM20, 21, 25, 26, 34, 41, 45, 48, 53, 58, 68, 69, 74, 82 and 83. All known SBDs, with the exception of the extra long CBM74, were recognized as a module consisting of approximately 100 residues, adopting a β-sandwich fold and possessing at least one carbohydrate-binding site. The present review aims to deliver and describe: (i) the SBD identification in different amylolytic and related enzymes (e.g., CAZy GH families) as well as in other relevant enzymes and proteins (e.g., laforin, the β-subunit of AMPK, and others); (ii) information on the position in the polypeptide chain and the number of SBD copies and their CBM family affiliation (if appropriate); (iii) structure/function studies of SBDs with a special focus on solved tertiary structures, in particular, as complexes with α-glucan ligands; and (iv) the evolutionary relationships of SBDs in a tree common to all SBD CBM families (except for the extra long CBM74). Finally, some special cases and novel potential SBDs are also introduced.",2019-09-16 +31585641,Genomic polymorphism of Mycoplasma flocculare revealed by a newly developed multilocus sequence typing scheme.,"Mycoplasma flocculare is genetically closely related to M. hyopneumoniae, the etiologic agent of porcine enzootic pneumonia, and is frequently isolated with this second species. In this article, we report on the development of the first multilocus sequence typing (MLST) scheme for M. flocculare, based on three genes (adk, rpoB and tpiA). In total, 5022 bp of sequence were analyzed. MLST was used to characterize seven M. flocculare isolates and the reference strain. Eight distinct sequence types were defined, showing the great intraspecies variability of M. flocculare, and the high discriminatory power of the new typing method. The relative contribution of recombinations to the genomic evolution of M. flocculare was revealed by calculating the index of association (IA: 0.0185). This MLST scheme is now available for the acquisition of new knowledge on M. flocculare epidemiology via an online database comprising the DNA sequences of each allele, available at http://pubmlst.org/mflocculare/.",2019-09-16 +29760467,The Oral Microbiome Bank of China.,"The human microbiome project (HMP) promoted further understanding of human oral microbes. However, research on the human oral microbiota has not made as much progress as research on the gut microbiota. Currently, the causal relationship between the oral microbiota and oral diseases remains unclear, and little is known about the link between the oral microbiota and human systemic diseases. To further understand the contribution of the oral microbiota in oral diseases and systemic diseases, a Human Oral Microbiome Database (HOMD) was established in the US. The HOMD includes 619 taxa in 13 phyla, and most of the microorganisms are from American populations. Due to individual differences in the microbiome, the HOMD does not reflect the Chinese oral microbial status. Herein, we established a new oral microbiome database-the Oral Microbiome Bank of China (OMBC, http://www.sklod.org/ombc ). Currently, the OMBC includes information on 289 bacterial strains and 720 clinical samples from the Chinese population, along with lab and clinical information. The OMBC is the first curated description of a Chinese-associated microbiome; it provides tools for use in investigating the role of the oral microbiome in health and diseases, and will give the community abundant data and strain information for future oral microbial studies.",2018-05-03 +31034726,"Genetic, epigenetic and genomic effects on variation of gene expression among grape varieties.","The transcriptional regulatory structure of plant genomes is still relatively unexplored, and little is known about factors that influence expression variation in plants. We used a genetic system consisting of 10 heterozygous grape varieties with high consanguinity and high haplotypic diversity to: (i) identify regions of haplotype sharing through whole-genome resequencing and single-nucleotide polymorphism (SNP) genotyping; (ii) analyse gene expression through RNA-seq in four stages of berry development; and (iii) associate gene expression variation with genetic and epigenetic properties. We found that haplotype sharing in and around genes was positively correlated with similarity in expression and was negatively correlated with the fraction of differentially expressed genes. Genetic and epigenetic properties of the gene and the surrounding region showed significant effects on the extent of expression variation, with negative associations for the level of gene body methylation and mean expression level, and with positive associations for nucleotide diversity, structural diversity and ratio of non-synonymous to synonymous nucleotide diversity. We also observed a spatial dependency of covariation of gene expression among varieties. These results highlight relevant roles for cis-acting factors, selective constraints and epigenetic features of the gene, and the regional context in which the gene is located, in the determination of expression variation. OPEN RESEARCH BADGES: This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://www.ncbi.nlm.nih.gov/bioproject/PRJNA385116; https://www.ncbi.nlm.nih.gov/bioproject/PRJNA392287; https://www.ncbi.nlm.nih.gov/bioproject/PRJNA373967 (released upon publication); https://www.ncbi.nlm.nih.gov/bioproject/PRJNA490160 (released upon publication); https://www.ncbi.nlm.nih.gov/bioproject/PRJNA265039; https://www.ncbi.nlm.nih.gov/bioproject/PRJNA265040.",2019-06-07 +24872424,"CODOC: efficient access, analysis and compression of depth of coverage signals.","

Unlabelled

Current data formats for the representation of depth of coverage data (DOC), a central resource for interpreting, filtering or detecting novel features in high-throughput sequencing datasets, were primarily designed for visualization purposes. This limits their applicability in stand-alone analyses of these data, mainly owing to inaccurate representation or mediocre data compression. CODOC is a novel data format and comprehensive application programming interface for efficient representation, access and analysis of DOC data. CODOC compresses these data ∼ 4-32× better than the best current comparable method by exploiting specific data characteristics while at the same time enabling more-exact signal recovery for lossy compression and very fast query answering times.

Availability and implementation

Java source code and binaries are freely available for non-commercial use at http://purl.org/bgraph/codoc.",2014-05-28 +31784861,"Docking rigid macrocycles using Convex-PL, AutoDock Vina, and RDKit in the D3R Grand Challenge 4.","The D3R Grand Challenge 4 provided a brilliant opportunity to test macrocyclic docking protocols on a diverse high-quality experimental data. We participated in both pose and affinity prediction exercises. Overall, we aimed to use an automated structure-based docking pipeline built around a set of tools developed in our team. This exercise again demonstrated a crucial importance of the correct local ligand geometry for the overall success of docking. Starting from the second part of the pose prediction stage, we developed a stable pipeline for sampling macrocycle conformers. This resulted in the subangstrom average precision of our pose predictions. In the affinity prediction exercise we obtained average results. However, we could improve these when using docking poses submitted by the best predictors. Our docking tools including the Convex-PL scoring function are available at https://team.inria.fr/nano-d/software/.",2019-11-29 +29477337,Fuzziness in Protein Interactions-A Historical Perspective.,"The proposal that coupled folding to binding is not an obligatory mechanism for intrinsically disordered (ID) proteins was put forward 10 years ago. The notion of fuzziness implies that conformational heterogeneity can be maintained upon interactions of ID proteins, which has a functional impact either on regulated assembly or activity of the corresponding complexes. Here I review how the concept has evolved in the past decade, via increasing experimental data providing insights into the mechanisms, pathways and regulatory modes. The effects of structural diversity and transient contacts on protein assemblies have been collected and systematically analyzed (Fuzzy Complexes Database, http://protdyn-database.org). Fuzziness has also been exploited as a framework to decipher molecular organization of higher-order protein structures. Quantification of conformational heterogeneity opens exciting future perspectives for drug discovery from small molecule-ID protein interactions to supramolecular assemblies.",2018-02-23 +29672508,The gender gap in science: How long until women are equally represented?,"Women comprise a minority of the Science, Technology, Engineering, Mathematics, and Medicine (STEMM) workforce. Quantifying the gender gap may identify fields that will not reach parity without intervention, reveal underappreciated biases, and inform benchmarks for gender balance among conference speakers, editors, and hiring committees. Using the PubMed and arXiv databases, we estimated the gender of 36 million authors from >100 countries publishing in >6000 journals, covering most STEMM disciplines over the last 15 years, and made a web app allowing easy access to the data (https://lukeholman.github.io/genderGap/). Despite recent progress, the gender gap appears likely to persist for generations, particularly in surgery, computer science, physics, and maths. The gap is especially large in authorship positions associated with seniority, and prestigious journals have fewer women authors. Additionally, we estimate that men are invited by journals to submit papers at approximately double the rate of women. Wealthy countries, notably Japan, Germany, and Switzerland, had fewer women authors than poorer ones. We conclude that the STEMM gender gap will not close without further reforms in education, mentoring, and academic publishing.",2018-04-19 +32020991,Information resources supporting scientific research for the international laser ranging service.,"The International Laser Ranging Service (ILRS) through its permanent components (Tracking Stations, Operations Centers, Data Centers, Analysis Centers, Central Bureau, and Governing Board) distributes satellite and lunar laser ranging data and derived products to support global, multidisciplinary scientific research. The ILRS Data Centers and Central Bureau serve as the primary source for information, data, and products for this global user community. The ILRS website, https://ilrs.gsfc.nasa.gov, is a key tool for communication for the service, providing background information on the ILRS, its organization and operation, and detailed descriptions of ILRS components, data, and products. Links are provided to extensive information on the supported satellite missions and ILRS network stations including performance assessments and data quality evaluations. Furthermore, the website connects users to archives of laser ranging data and derived products available through the data centers. In this paper, we discuss the development of the ILRS infrastructure, its current status, website resources, description of laser ranging data and products, and plans for future enhancements.",2018-10-01 +24194595,The Aspergillus Genome Database: multispecies curation and incorporation of RNA-Seq data to improve structural gene annotations.,"The Aspergillus Genome Database (AspGD; http://www.aspgd.org) is a freely available web-based resource that was designed for Aspergillus researchers and is also a valuable source of information for the entire fungal research community. In addition to being a repository and central point of access to genome, transcriptome and polymorphism data, AspGD hosts a comprehensive comparative genomics toolbox that facilitates the exploration of precomputed orthologs among the 20 currently available Aspergillus genomes. AspGD curators perform gene product annotation based on review of the literature for four key Aspergillus species: Aspergillus nidulans, Aspergillus oryzae, Aspergillus fumigatus and Aspergillus niger. We have iteratively improved the structural annotation of Aspergillus genomes through the analysis of publicly available transcription data, mostly expressed sequenced tags, as described in a previous NAR Database article (Arnaud et al. 2012). In this update, we report substantive structural annotation improvements for A. nidulans, A. oryzae and A. fumigatus genomes based on recently available RNA-Seq data. Over 26 000 loci were updated across these species; although those primarily comprise the addition and extension of untranslated regions (UTRs), the new analysis also enabled over 1000 modifications affecting the coding sequence of genes in each target genome.",2013-11-04 +30473618,"A story of data won, data lost and data re-found: the realities of ecological data preservation.","This paper discusses the process of retrieval and updating legacy data to allow on-line discovery and delivery. There are many pitfalls of institutional and non-institutional ecological data conservation over the long term. Interruptions to custodianship, old media, lost knowledge and the continuous evolution of species names makes resurrection of old data challenging. We caution against technological arrogance and emphasise the importance of international standards. We use a case study of a compiled set of continent-wide vegetation survey data for which, although the analyses had been published, the raw data had not. In the original study, publications containing plot data collected from the 1880s onwards had been collected, interpreted, digitised and integrated for the classification of vegetation and analysis of its conservation status across Australia. These compiled data are an extremely valuable national collection that demanded publishing in open, readily accessible online repositories, such as the Terrestrial Ecosystem Research Network (http://www.tern.org.au) and the Atlas of Living Australia (ALA: http://www.ala.org.au), the Australian node of the Global Biodiversity Information Facility (GBIF: http://www.gbif.org). It is hoped that the lessons learnt from this project may trigger a sober review of the value of endangered data, the cost of retrieval and the importance of suitable and timely archiving through the vicissitudes of technological change, so the initial unique collection investment enables multiple re-use in perpetuity.",2018-11-07 +30702160,"Germline variation in BRCA1/2 is highly ethnic-specific: Evidence from over 30,000 Chinese hereditary breast and ovarian cancer patients.","BRCA1 and BRCA2 play essential roles in maintaining the genome stability. Pathogenic germline mutations in these two genes disrupt their function, lead to genome instability and increase the risk of developing breast and ovarian cancers. BRCA mutations have been extensively screened in Caucasian populations, and the resulting information are used globally as the standard reference in clinical diagnosis, treatment and prevention of BRCA-related cancers. Recent studies suggest that BRCA mutations can be ethnic-specific, raising the question whether a Caucasian-based BRCA mutation information can be used as a universal standard worldwide, or whether an ethnicity-based BRCA mutation information system need to be developed for the corresponding ethnic populations. In this study, we used Chinese population as a model to test ethnicity-specific BRCA mutations considering that China has one of the latest numbers of breast cancer patients therefore BRCA mutation carriers. Through comprehensive data mining, standardization and annotation, we collected 1,088 distinct BRCA variants derived from over 30,000 Chinese individuals, one of the largest BRCA data set from a non-Caucasian population covering nearly all known BRCA variants in the Chinese population (https://dbBRCA-Chinese.fhs.umac.mo). Using this data, we performed multi-layered analyses to determine the similarities and differences of BRCA variation between Chinese and non-Chinese ethnic populations. The results show the substantial differences of BRCA data between Chinese and non-Chinese ethnicities. Our study indicates that the current Caucasian population-based BRCA data is not adequate to represent the BRCA status in non-Caucasian populations. Therefore, ethnic-based BRCA standards need to be established to serve for the non-Caucasian populations.",2019-02-13 +30244682,Examining intersectional inequalities in access to health (enabling) resources in disadvantaged communities in Scotland: advancing the participatory paradigm.,"

Background

Multiple structural, contextual and individual factors determine social disadvantage and affect health experience. There is limited understanding, however, of how this complex system works to shape access to health enabling resources (HER), especially for most marginalised or hard-to-reach populations. As a result, planning continues to be bereft of voices and lived realities of those in the margins. This paper reports on key findings and experience of a participatory action research (PAR) that aimed to deepen understanding of how multiple disadvantages (and structures of oppression) interact to produce difference in access to resources affecting well-being in disadvantaged communities in Edinburgh.

Methods

An innovative approach combining intersectionality and PAR was adopted and operationalised in three overlapping phases. A preparatory phase helped establish relationships with participant groups and policy stakeholders, and challenge assumptions underlying the study design. Field-work and analysis was conducted iteratively in two phases: with a range of participants working in policy and community roles (or 'bridge' populations), followed by residents of one Edinburgh locality with relatively high levels of deprivation (As measured by the Scottish Index of Multiple Deprivation, a geographically-based indicator. See http://www.gov.scot/Topics/Statistics/SIMD/DataAnalysis/SPconstituencyprofile/EdinburghNorthern-Leith ). Traditional qualitative methods (interviews, focus groups) alongside participatory methods (health resource mapping, spider-grams, photovoice) were employed to facilitate action-oriented knowledge production among multiply disadvantaged groups.

Results

There was considerable agreement across groups and communities as to what healthful living (in general) means. This entailed a combination of material, environmental, socio-cultural and affective resources including: a sense of belonging and of purpose, feeling valued, self-esteem, safe/secure housing, reliable income, and access to responsive and sensitive health care when needed. Differences emerge in the value placed by people at different social locations on these resources. The conditions/aspects of their living environment that affected their access to and ability to translate these resources into improved health also appeared to vary with social location.

Conclusion

Integrating intersectionality with PAR enables the generation of a fuller understanding of disparities in the distribution of, and access to, HER, notably from the standpoint of those excluded from mainstream policy and planning processes. Employing an intersectionality lens helped illuminate links between individual subjectivities and wider social structures and power relations. PAR on the other hand offered the potential to engage multiply disadvantaged groups in a process to collectively build local knowledge for action to develop healthier communities and towards positive community-led social change.",2018-09-24 +23284744,StRAP: an integrated resource for profiling high-throughput cancer genomic data from stress response studies.,"The increasing availability and maturity of DNA microarray technology has led to an explosion of cancer profiling studies for identifying cancer biomarkers, and predicting treatment response. Uncovering complex relationships, however, remains the most challenging task as it requires compiling and efficiently querying data from various sources. Here, we describe the Stress Response Array Profiler (StRAP), an open-source, web-based resource for storage, profiling, visualization, and sharing of cancer genomic data. StRAP houses multi-cancer microarray data with major emphasis on radiotherapy studies, and takes a systems biology approach towards the integration, comparison, and cross-validation of multiple cancer profiling studies. The database is a comprehensive platform for comparative analysis of gene expression data. For effective use of arrays, we provide user-friendly and interactive visualization tools that can display the data and query results. StRAP is web-based, platform-independent, and freely accessible at http://strap.nci.nih.gov/.",2012-12-17 +29553459,"Air Toxics in Relation to Autism Diagnosis, Phenotype, and Severity in a U.S. Family-Based Study.","BACKGROUND:Previous studies have reported associations of perinatal exposure to air toxics, including some metals and volatile organic compounds, with autism spectrum disorder (ASD). OBJECTIVES:Our goal was to further explore associations of perinatal air toxics with ASD and associated quantitative traits in high-risk multiplex families. METHODS:We included participants of a U.S. family-based study [the Autism Genetic Resource Exchange (AGRE)] who were born between 1994 and 2007 and had address information. We assessed associations between average annual concentrations at birth for each of 155 air toxics from the U.S. EPA emissions-based National-scale Air Toxics Assessment and a) ASD diagnosis (1,540 cases and 477 controls); b) a continuous measure of autism-related traits, the Social Responsiveness Scale (SRS, among 1,272 cases and controls); and c) a measure of autism severity, the Calibrated Severity Score (among 1,380 cases). In addition to the individual's air toxic level, mixed models (clustering on family) included the family mean air toxic level, birth year, and census covariates, with consideration of the false discovery rate. RESULTS:ASD diagnosis was positively associated with propionaldehyde, methyl tert-butyl ether (MTBE), bromoform, 1,4-dioxane, dibenzofurans, and glycol ethers and was inversely associated with 1,4-dichlorobenzene, 4,4'-methylene diphenyl diisocyanate (MDI), benzidine, and ethyl carbamate (urethane). These associations were robust to adjustment in two-pollutant models. Autism severity was associated positively with carbon disulfide and chlorobenzene, and negatively with 1,4-dichlorobenzene. There were no associations with the SRS. CONCLUSIONS:Some air toxics were associated with ASD risk and severity, including some traffic-related air pollutants and newly-reported associations, but other previously reported associations with metals and volatile organic compounds were not reproducible. https://doi.org/10.1289/EHP1867.",2018-03-12 +31832145,To remain or leave: Dispersal variation and its genetic consequences in benthic freshwater invertebrates.,"Variation in dispersal capacity may influence population genetic variation and relatedness of freshwater animals thus demonstrating how life-history traits influence patterns and processes that in turn influence biodiversity. The majority of studies have focused on the consequences of dispersal variation in taxa inhabiting riverine systems whose dendritic nature and upstream/downstream gradients facilitate characterizing populations along networks. We undertook extensive, large-scale investigations of the impacts of hydrological connectivity on population genetic variation in two freshwater bryozoan species whose dispersive propagules (statoblasts) are either attached to surfaces (Fredericella sultana) or are released as buoyant stages (Cristatella mucedo) and that live primarily in either lotic (F. sultana) or lentic environments (C. mucedo). Describing population genetic structure in multiple sites characterized by varying degrees of hydrological connectivity within each of three (or four) UK regions enabled us to test the following hypotheses: (1) genetic diversity and gene flow will be more influenced by hydrological connectivity in populations of C. mucedo (because F. sultana dispersal stages are retained); (2) populations of F. sultana will be characterized by greater genetic divergence than those of C. mucedo (reflecting their relative dispersal capacities); and (3) genetic variation will be greatest in F. sultana (reflecting a propensity for genetic divergence as a result of its low dispersal potential). We found that hydrological connectivity enhanced genetic diversity and gene flow among C. mucedo populations but not in F. sultana while higher overall measures of clonal diversity and greater genetic divergence characterized populations of F. sultana. We suggest that genetic divergence over time within F. sultana populations reflects a general constraint of releasing propagules that might eventually be swept to sea when taxa inhabit running waters. In contrast, taxa that primarily inhabit lakes and ponds may colonize across hydrologically connected regions, establishing genetically related populations. Our study contributes more nuanced views about drivers of population genetic structures in passively dispersing freshwater invertebrates as outlined by the Monopolization Hypothesis (Acta Oecologica, 23, 2002, 121) by highlighting how a range of demographic and evolutionary processes reflect life-history attributes of benthic colonial invertebrates (bryozoans) and cyclically parthenogenetic zooplankton. In addition, growing evidence that genetic divergence may commonly characterize populations of diverse groups of riverine taxa suggests that organisms inhabiting lotic systems may be particularly challenged by environmental change. Such change may predispose riverine populations to extinction as a result of genetic divergence combined with limited dispersal and gene flow.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.1tm8705.",2019-10-18 +31616466,"""EviMass"": A Literature Evidence-Based Miner for Human Microbial Associations.","The importance of understanding microbe-microbe as well as microbe-disease associations is one of the key thrust areas in human microbiome research. High-throughput metagenomic and transcriptomic projects have fueled discovery of a number of new microbial associations. Consequently, a plethora of information is being added routinely to biomedical literature, thereby contributing toward enhancing our knowledge on microbial associations. In this communication, we present a tool called ""EviMass"" (Evidence based mining of human Microbial Associations), which can assist biologists to validate their predicted hypotheses from new microbiome studies. Users can interactively query the processed back-end database for microbe-microbe and disease-microbe associations. The EviMass tool can also be used to upload microbial association networks generated from a human ""disease-control"" microbiome study and validate the associations from biomedical literature. Additionally, a list of differentially abundant microbes for the corresponding disease can be queried in the tool for reported evidences. The results are presented as graphical plots, tabulated summary, and other evidence statistics. EviMass is a comprehensive platform and is expected to enable microbiome researchers not only in mining microbial associations, but also enriching a new research hypothesis. The tool is available free for academic use at https://web.rniapps.net/evimass.",2019-09-13 +30590490,Computational prediction and analysis of species-specific fungi phosphorylation via feature optimization strategy.,"Protein phosphorylation is a reversible and ubiquitous post-translational modification that primarily occurs at serine, threonine and tyrosine residues and regulates a variety of biological processes. In this paper, we first briefly summarized the current progresses in computational prediction of eukaryotic protein phosphorylation sites, which mainly focused on animals and plants, especially on human, with a less extent on fungi. Since the number of identified fungi phosphorylation sites has greatly increased in a wide variety of organisms and their roles in pathological physiology still remain largely unknown, more attention has been paid on the identification of fungi-specific phosphorylation. Here, experimental fungi phosphorylation sites data were collected and most of the sites were classified into different types to be encoded with various features and trained via a two-step feature optimization method. A novel method for prediction of species-specific fungi phosphorylation-PreSSFP was developed, which can identify fungi phosphorylation in seven species for specific serine, threonine and tyrosine residues (http://computbiol.ncu.edu.cn/PreSSFP). Meanwhile, we critically evaluated the performance of PreSSFP and compared it with other existing tools. The satisfying results showed that PreSSFP is a robust predictor. Feature analyses exhibited that there have some significant differences among seven species. The species-specific prediction via two-step feature optimization method to mine important features for training could considerably improve the prediction performance. We anticipate that our study provides a new lead for future computational analysis of fungi phosphorylation.",2020-03-01 +31699507,"Seasonal influenza vaccination in middle-income countries: Assessment of immunization practices in Belarus, Morocco, and Thailand.","

Background

Vaccines for the control of seasonal influenza are recommended by the World Health Organization (WHO) for use in specific risk groups, but their use requires operational considerations that may challenge immunization programs. Several middle-income countries have recently implemented seasonal influenza vaccination. Early program evaluation following vaccine introduction can help ascertain positive lessons learned and areas for improvement.

Methods

An influenza vaccine post-introduction evaluation (IPIE) tool was developed jointly by WHO and the U.S. Centers for Disease Control and Prevention to provide a systematic approach to assess influenza vaccine implementation processes. The tool was used in 2017 in three middle-income countries: Belarus, Morocco and Thailand.

Results

Data from the three countries highlighted a number of critical factors: Health workers (HWs) are a key target group, given their roles as key influencers of acceptance by other groups, and for ensuring vaccine delivery and improved coverage. Despite WHO recommendations, pregnant women were not always prioritized and may present unique challenges for acceptance. Target group denominators need to be better defined, and vaccine coverage should be validated with vaccine distribution data, including from the private sector. There is a need for strengthening adverse events reporting and for addressing potential vaccine hesitancy through the establishment of risk communication plans. The assessments led to improvements in the countries' influenza vaccination programs, including a revision of policies, changes in vaccine management and coverage estimation, enhanced strategies for educating HWs and intensified collaboration between departments involved in implementing seasonal influenza vaccination.

Conclusion

The IPIE tool was found useful for delineating operational strengths and weaknesses of seasonal influenza vaccination programs. HWs emerged as a critical target group to be addressed in follow-up action. Findings from this study can help direct influenza vaccination programs in other countries, as well as contribute to pandemic preparedness efforts. The updated IPIE tool is available on the WHO website http://www.who.int/immunization/research/development/influenza/en/index1.html.",2019-11-04 +24180668,Path2Models: large-scale generation of computational models from biochemical pathway maps.,"

Background

Systems biology projects and omics technologies have led to a growing number of biochemical pathway models and reconstructions. However, the majority of these models are still created de novo, based on literature mining and the manual processing of pathway data.

Results

To increase the efficiency of model creation, the Path2Models project has automatically generated mathematical models from pathway representations using a suite of freely available software. Data sources include KEGG, BioCarta, MetaCyc and SABIO-RK. Depending on the source data, three types of models are provided: kinetic, logical and constraint-based. Models from over 2 600 organisms are encoded consistently in SBML, and are made freely available through BioModels Database at http://www.ebi.ac.uk/biomodels-main/path2models. Each model contains the list of participants, their interactions, the relevant mathematical constructs, and initial parameter values. Most models are also available as easy-to-understand graphical SBGN maps.

Conclusions

To date, the project has resulted in more than 140 000 freely available models. Such a resource can tremendously accelerate the development of mathematical models by providing initial starting models for simulation and analysis, which can be subsequently curated and further parameterized.",2013-11-01 +27549193,Comprehensive analyses of tumor immunity: implications for cancer immunotherapy.,"

Background

Understanding the interactions between tumor and the host immune system is critical to finding prognostic biomarkers, reducing drug resistance, and developing new therapies. Novel computational methods are needed to estimate tumor-infiltrating immune cells and understand tumor-immune interactions in cancers.

Results

We analyze tumor-infiltrating immune cells in over 10,000 RNA-seq samples across 23 cancer types from The Cancer Genome Atlas (TCGA). Our computationally inferred immune infiltrates associate much more strongly with patient clinical features, viral infection status, and cancer genetic alterations than other computational approaches. Analysis of cancer/testis antigen expression and CD8 T-cell abundance suggests that MAGEA3 is a potential immune target in melanoma, but not in non-small cell lung cancer, and implicates SPAG5 as an alternative cancer vaccine target in multiple cancers. We find that melanomas expressing high levels of CTLA4 separate into two distinct groups with respect to CD8 T-cell infiltration, which might influence clinical responses to anti-CTLA4 agents. We observe similar dichotomy of TIM3 expression with respect to CD8 T cells in kidney cancer and validate it experimentally. The abundance of immune infiltration, together with our downstream analyses and findings, are accessible through TIMER, a public resource at http://cistrome.org/TIMER .

Conclusions

We develop a computational approach to study tumor-infiltrating immune cells and their interactions with cancer cells. Our resource of immune-infiltrate levels, clinical associations, as well as predicted therapeutic markers may inform effective cancer vaccine and checkpoint blockade therapies.",2016-08-22 +29625201,saRNAdb: Resource of Small Activating RNAs for Up-regulating the Gene Expression.,"RNA activation (RNAa) is the process of enhancing selective gene expression at transcriptional level using double-stranded RNAs, targeting gene promoter. These RNA molecules are usually 21 nucleotides long and termed as small activating RNAs (saRNAs). They are involved in gene regulation, epigenetics, gain-of-function studies and have potential therapeutic applications for various diseases especially cancer. RNAa is opposite to RNA interference in functionality; however, both processes share some protein machinery. There are many RNA interference centered online resources but no one for saRNAs; therefore, we developed ""saRNAdb"" database (http://bioinfo.imtech.res.in/manojk/sarna/). It contains 2150 manually curated saRNA entries with detailed information about their nucleotide sequences, activities, corresponding target gene, promoter and other experimental data. Besides, saRNA-promoter binding location, predicted saRNA features, tools (off-target, map) and RNAa-related proteins with their interacting partners are provided. saRNAdb is expected to assist in RNA research especially for nucleic acid-based therapeutics development.",2018-04-03 +25620700,A genome-wide gene-expression analysis and database in transgenic mice during development of amyloid or tau pathology.,"We provide microarray data comparing genome-wide differential expression and pathology throughout life in four lines of ""amyloid"" transgenic mice (mutant human APP, PSEN1, or APP/PSEN1) and ""TAU"" transgenic mice (mutant human MAPT gene). Microarray data were validated by qPCR and by comparison to human studies, including genome-wide association study (GWAS) hits. Immune gene expression correlated tightly with plaques whereas synaptic genes correlated negatively with neurofibrillary tangles. Network analysis of immune gene modules revealed six hub genes in hippocampus of amyloid mice, four in common with cortex. The hippocampal network in TAU mice was similar except that Trem2 had hub status only in amyloid mice. The cortical network of TAU mice was entirely different with more hub genes and few in common with the other networks, suggesting reasons for specificity of cortical dysfunction in FTDP17. This Resource opens up many areas for investigation. All data are available and searchable at http://www.mouseac.org.",2015-01-22 +24098075,The Nanomaterial Registry: facilitating the sharing and analysis of data in the diverse nanomaterial community.,"The amount of data being generated in the nanotechnology research space is significant, and the coordination, sharing, and downstream analysis of the data is complex and consistently deliberated. The complexities of the data are due in large part to the inherently complicated characteristics of nanomaterials. Also, testing protocols and assays used for nanomaterials are diverse and lacking standardization. The Nanomaterial Registry has been developed to address such challenges as the need for standard methods, data formatting, and controlled vocabularies for data sharing. The Registry is an authoritative, web-based tool whose purpose is to simplify the community's level of effort in assessing nanomaterial data from environmental and biological interaction studies. Because the Registry is meant to be an authoritative resource, all data-driven content is systematically archived and reviewed by subject-matter experts. To support and advance nanomaterial research, a set of minimal information about nanomaterials (MIAN) has been developed and is foundational to the Registry data model. The MIAN has been used to create evaluation and similarity criteria for nanomaterials that are curated into the Registry. The Registry is a publicly available resource that is being built through collaborations with many stakeholder groups in the nanotechnology community, including industry, regulatory, government, and academia. Features of the Registry website (http://www.nanomaterialregistry.org) currently include search, browse, side-by-side comparison of nanomaterials, compliance ratings based on the quality and quantity of data, and the ability to search for similar nanomaterials within the Registry. This paper is a modification and extension of a proceedings paper for the Institute of Electrical and Electronics Engineers.",2013-09-16 +24165311,PROGgene: gene expression based survival analysis web application for multiple cancers.,"

Background

Identification of prognostic mRNA biomarkers has been done for various cancer types. The data that are published from such studies are archived in public repositories. There are hundreds of such datasets available for multiple cancer types in public repositories. Wealth of such data can be utilized to study prognostic implications of mRNA in different cancers as well as in different populations or subtypes of same cancer.

Description

We have created a web application that can be used for studying prognostic implications of mRNA biomarkers in a variety of cancers. We have compiled data from public repositories such as GEO, EBI Array Express and The Cancer Genome Atlas for creating this tool. With 64 patient series from 18 cancer types in our database, this tool provides the most comprehensive resource available for survival analysis to date. The tool is called PROGgene and it is available at http://www.compbio.iupui.edu/proggene.

Conclusions

We present this tool as a hypothesis generation tool for researchers to identify potential prognostic mRNA biomarkers to follow up with further research. For this reason, we have kept the web application very simple and straightforward. We believe this tool will be useful in accelerating biomarker discovery in cancer and quickly providing results that may indicate disease-specific prognostic value of specific biomarkers.",2013-10-28 +28587637,Literature evidence in open targets - a target validation platform.,"

Background

We present the Europe PMC literature component of Open Targets - a target validation platform that integrates various evidence to aid drug target identification and validation. The component identifies target-disease associations in documents and ranks the documents based on their confidence from the Europe PMC literature database, by using rules utilising expert-provided heuristic information. The confidence score of a given document represents how valuable the document is in the scope of target validation for a given target-disease association by taking into account the credibility of the association based on the properties of the text. The component serves the platform regularly with the up-to-date data since December, 2015.

Results

Currently, there are a total number of 1168365 distinct target-disease associations text mined from >26 million PubMed abstracts and >1.2 million Open Access full text articles. Our comparative analyses on the current available evidence data in the platform revealed that 850179 of these associations are exclusively identified by literature mining.

Conclusions

This component helps the platform's users by providing the most relevant literature hits for a given target and disease. The text mining evidence along with the other types of evidence can be explored visually through https://www.targetvalidation.org and all the evidence data is available for download in json format from https://www.targetvalidation.org/downloads/data .",2017-06-06 +30979351,Improving the Antinoise Ability of DNNs via a Bio-Inspired Noise Adaptive Activation Function Rand Softplus.,"Although deep neural networks (DNNs) have led to many remarkable results in cognitive tasks, they are still far from catching up with human-level cognition in antinoise capability. New research indicates how brittle and susceptible current models are to small variations in data distribution. In this letter, we study the stochasticity-resistance character of biological neurons by simulating the input-output response process of a leaky integrate-and-fire (LIF) neuron model and proposed a novel activation function, rand softplus (RSP), to model the response process. In RSP, a scale factor η is employed to mimic the stochasticity-adaptability of biological neurons, thereby enabling the antinoise capability of a DNN to be improved by the novel activation function. We validated the performance of RSP with a 19-layer residual network (ResNet) and a 19-layer visual geometry group (VGG) on facial expression recognition data sets and compared it with other popular activation functions, such as rectified linear units (ReLU), softplus, leaky ReLU (LReLU), exponential linear unit (ELU), and noisy softplus (NSP). The experimental results show that RSP is applied to VGG-19 or ResNet-19, and the average recognition accuracy under five different noise levels exceeds the other functions on both of the two facial expression data sets; in other words, RSP outperforms the other activation functions in noise resistance. Compared with the application in ResNet-19, the application of RSP in VGG-19 can improve a network's antinoise performance to a greater extent. In addition, RSP is easier to train compared to NSP because it has only one parameter to be calculated automatically according to the input data. Therefore, this work provides the deep learning community with a novel activation function that can better deal with overfitting problems.",2019-04-12 +34964105,Letter to the Editor: EDUCATIONAL ACTIVITIES RELATED TO THE ICD-11 CHAPTER ON MENTAL DISORDERS.,"Dear Editor, The 11th revision of the International Classification of Diseases and Related Health Problems (ICD-11), including the chapter on mental, behavioural and neurodevelopmental disorders, has been adopted unanimously by the 72nd World Health Assembly in Geneva on May 25, 2019. The endorsement of the new classification will not come into effect until January 1, 2022. Until that date, the Member States of the World Health Organization (WHO) will keep on using the ICD-10 for reporting data. The most significant innovations in the ICD-11 chapter, and the most important differences from the DSM-5, have been detailed elsewhere (Reed et al. 2019). Several issues debated in the process of development of the chapter - including the role of a dimensional component within a system that remains mainly based on categories, and the need for a further clinical characterization of the individual patient, in addition to the diagnosis, in order to guide the formulation of the prognosis and the management plan, have been also covered in the recent literature (Clark et al. 2017, Rebello et al. 2019, Fuss et al. 2019, Gureje et al. 2019, van Os et al. 2019, Fusar-Poli et al. 2019, Forbes et al. 2019, Gaebel et al. 2019, Patel 2019, Kotov et al. 2020, Maj et al. 2020, Sanislow et al. 2020). The training of professionals in the use of the ICD-11 chapter is now ongoing worldwide, under the coordination of a WHO International Advisory Group led by G.M. Reed. Educational courses have been conducted at the 18th and 19th World Congresses of Psychiatry (Mexico City, Mexico, September 27-30, 2018; and Lisbon, Portugal, August 21-24, 2019) (Giallonardo 2019, Pocai 2019, Perris 2020). A more comprehensive online 20-hr training course has been organized by the Naples WHO Collaborating Centre on Research and Training in Mental Health and the European Psychiatric Association from 9 to 30 April, 2021. The course has been coordinated by G.M. Reed and M. Maj, and has covered all the main sections of the ICD-11 chapter on mental disorders. W. Gaebel, M. Cloitre, M. Maj, C.S. Kogan, P. Monteleone, M. Swales, J.B. Saunders and N.A. Fineberg composed the Faculty. The live course has been attended by 120 psychiatrists, selected from almost 500 applicants, representing 78 different countries. A further group of 250 psychiatrists have had access to the course on demand. Two ICD-11 training sessions have been organized by the Psychiatric Association of Turkey within its 24th Clinical Education Symposium, held from 2 to 6 June 2021. One covered psychotic disorders and mood disorders, with the participation of W. Gaebel and M. Maj and the chairmanship of S. Vahip and C. Atbasoglu. The other dealt with trauma-related, fear-related and obsessive-compulsive disorders, with the participation of M. Cloitre and D.J. Stein and the chairmanship of R. Tukel and C. Kilic. Each session had more than 150 participants. An ICD-11 training event has been also organized by the UK Royal College of Psychiatrists from 25 to 26 May 2021. One further educational event is now going to be held by the World Psychiatric Association from 8 to 29 November 2021 (www.wpanet.org). A training course with exclusive access to the members of the WHO Global Clinical Practice Network (https://gcp.network) has been recently set up by the WHO Collaborating Centre on Mental Health at the Columbia University, in collaboration with the WHO Department of Mental Health and Substance Use. The course consists of 15 online training units, each focusing on a different disorder grouping and EDUCATIONAL ACTIVITIES RELATED TO THE ICD-11 CHAPTER ON MENTAL DISORDERS 292 Received: 13.09.2021, Accepted: 15.09.2021, Available Online Date: 30.11.2021 MD., University of Campania L. Vanvitelli, WHO Collaborating Centre for Research and Training in Mental Health, Naples, Italy. Dr. Vincenzo Giallonardo, e-mail: enzogiallo86@gmail.com https://doi.org/10.5080/u26898 taking from one to one and a half hours. Each unit provides a description of the relevant diagnostic grouping and the main innovations with respect to the ICD-10. Knowledge check questions are included to test the outcome of training. Participants have the opportunity to practice by applying diagnostic guidelines to clinical case examples. This training course is going to be available also in Spanish, and additional translations are planned. The WHO Global Clinical Practice Network now includes more than 16.000 clinicians from 159 countries (51% psychiatrists, 30% psychologists; 40% from Europe, 25% from Western Pacific, 24% from the Americas, 5% from Southeast Asia, 3% from Eastern Mediterranean, and 3% from Africa; 63% from high-income countries, 37% from middle- and low-income countries). The Network contributed significantly to the development of the ICD-11 chapter on mental disorders, in particular through its participation in the Internet field trials of the diagnostic system. It is now further serving as a catalyst for scientific and clinical research collaborations. All health professionals working in mental health or primary care are welcome to join the Network. Vincenzo GİALLONARDO REFERENCES Clark L, Cuthbert B, Lewis-Fernández R et al (2017). Three approaches to understanding and classifying mental disorder: ICD-11, DSM-5, and the National Institute of Mental Health's Research Domain Criteria (RDoC) Psychol Sci Public Interest 18:72-145. Forbes MK, Wright AGC, Markon KE et al (2019) The network approach to psychopathology: promise versus reality. World Psychiatry 18:272-3. Fusar-Poli P, Solmi M, Brondino N et al (2019) Transdiagnostic psychiatry: a systematic review. World Psychiatry 8:192-207. Fuss J, Lemay K, Stein DJ et al (2019) Public stakeholders' comments on ICD-11 chapters related to mental and sexual health. World Psychiatry 18:233-5. Giallonardo V (2019) ICD-11 sessions within the 18th World Congress of Psychiatry. World Psychiatry 18:115-6 Gaebel W, Reed GM, Jakob R (2019) Neurocognitive disorders in ICD-11: a new proposal and its outcome. World Psychiatry 18:232-3. Gureje O, Lewis-Fernandez R, Hall BJ et al (2019) Systematic inclusion of culture-related information in ICD-11. World Psychiatry 18:357-8. Kotov R, Jonas KG, Carpenter WT et al (2020) Validity and utility of Hierarchical Taxonomy of Psychopathology (HiTOP): I. Psychosis superspectrum. World Psychiatry 19:151-72. Maj M, Stein DJ, Parker G et al (2020) The clinical characterization of the adult patient with depression aimed at personalization of management. World Psychiatry 19:269-93. Patel V (2019) Reimagining outcomes requires reimagining mental health conditions. World Psychiatry 18:286-7. Perris F (2020) ICD-11 sessions at the 19th World Congress of Psychiatry. World Psychiatry 19:263-4. Pocai B (2019) The ICD-11 has been adopted by the World Health Assembly. World Psychiatry 18:371-2. Rebello TJ, Keeley JW, Kogan CS et al (2019) Anxiety and fear-related disorders in the ICD-11: results from a global case-controlled field study. Arch Med Res 50:490-501. Reed GM, First MB, Kogan CS et al (2019) Innovations and changes in the ICD-11 classification of mental, behavioural and neurodevelopmental disorders. World Psychiatry 18:3-19. Sanislow CA (2020) RDoC at 10: changing the discourse for psychopathology. World Psychiatry 19:311-2. van Os J, Guloksuz S, Vijn TW et al (2019) The evidence-based group-level symptom-reduction model as the organizing principle for mental health care: time for change? World Psychiatry 18:88-96.",2021-01-01 +31575533,"CP-North: living life in the Nordic countries? A retrospective register research protocol on individuals with cerebral palsy and their parents living in Sweden, Norway, Denmark, Finland and Iceland.","

Introduction

Cerebral palsy (CP) is one of the most common neurodevelopmental disabilities. Yet, most individuals with CP are adults. How individuals with CP fare in terms of health, quality of life (QoL), education, employment and income is largely unknown. Further, little is known about the effects of having a child with CP on the parents. The Nordic countries are known for their strong welfare systems, yet it is unknown to what extent the added burden related to disability is actually compensated for. We will explore how living with CP affects health, QoL, healthcare utilisation, education, labour market outcomes, socioeconomic status and mortality throughout the lifespan of individuals with CP and their parents. We will also investigate if these effects differ between subgroups, within and across the Nordic countries. METHODS AND ANALYSES: CP-North is a multidisciplinary 4-year (1 August 2017 to 31 July 2021) register research project. The research consortium comprises researchers and users from Sweden, Norway, Denmark, Iceland and Finland. Data from CP registries and follow-up programmes, or cohorts of individuals with CP, will be merged with general national registries. All individual studies are structured under three themes: medical outcomes, social and public health outcomes, and health economics. Both case-control and cohort designs will be included depending on the particular research question. Data will be analysed in the individual countries and later merged across nations.

Ethics and dissemination

The ethics approval processes in each individual country are followed. Findings will be published (open access) in international peer-reviewed journals in related fields. Updates on CP-North will be published online at http://rdi.arcada.fi/cpnorth/en/.",2019-10-01 +30989232,Long-term Clinical Effectiveness of Ustekinumab in Patients with Crohn's Disease Who Failed Biologic Therapies: A National Cohort Study.,"

Background

Ustekinumab [UST] was recently approved in Europe for the treatment of moderate to severe Crohn's disease [CD]. Long-term real-world data are currently scarce for CD patients previously exposed to several biologics.

Methods

This is an observational, national, retrospective multicentre study. Patients received intravenous UST ~6 mg/kg at baseline, with 90 mg subcutaneously thereafter every 8 weeks. Response and remission rates were assessed at Weeks 8, 16, and 52.

Results

Data from 152 patients were analysed. All patients were exposed to at least one anti-TNFα agent, with 69.7% were exposed to even two anti-TNFα and vedolizumab. After 1 year, 42.1% and 25.7% of patients had experienced clinical response and clinical remission, respectively, and 38.8% and 24.3% had achieved steroid-free clinical response and remission, respectively; 38.8% of patients discontinued therapy during the 12 months of follow-up. Colonic location was predictive of clinical response at 1 year, and low body mass index [BMI] at baseline was a negative predictor of clinical remission. Resolution of arthralgia was associated with clinical response over time. De novo arthralgia was reported by 17.9% of patients at Week 8 and 13.5% of patients at Week 52. No impact of UST on arthralgia was observed in patients with concomitant ankylosing spondylitis [n = 17]. Others adverse events were reported in 7.2% of patients.

Conclusions

This real-world cohort study confirms the effectiveness of UST in CD patients previously exposed to several biologics. Ustekinumab was well tolerated with respect to adverse events.

Podcast

This article has an associated podcast which can be accessed at https://academic.oup.com/ecco-jcc/pages/podcast.",2019-10-01 +29892515,iMusta4SLC: Database for the structural property and variations of solute carrier transporters.,"Membrane transporter proteins play important roles in transport of nutrients into the cell, in transport of waste out of the cell, in maintenance of homeostasis, and in signal transduction. Solute carrier (SLC) transporter is the superfamily, which has the largest number of genes (>400 in humans) in membrane transporter and consists of 52 families. SLC transporters carry a wide variety of substrates such as amino acids, peptides, saccharides, ions, neurotransmitters, lipids, hormones and related materials. Despite the apparent importance for the substrate transport, the information of sequence variation and three-dimensional structures have not been integrated to the level of providing new knowledge on the relationship to, for instance, diseases. We, therefore, built a new database named iMusta4SLC, which is available at http://cib.cf.ocha.ac.jp/slc/, that connected the data of structural properties and of pathogenic mutations on human SLC transporters. iMusta4SLC helps to investigate the structural features of pathogenic mutations on SLC transporters. With this database, we found that the mutations at the conserved arginine were frequently involved in diseases, and were located at a border between the membrane and the cytoplasm. Especially in SLC families 2 and 22, the conserved residues formed a large cluster at the border. In SLC2A1, one third of the reported pathogenic missense mutations were found in this conserved cluster.",2018-04-27 +27138013,StreptoBase: An Oral Streptococcus mitis Group Genomic Resource and Analysis Platform.,"The oral streptococci are spherical Gram-positive bacteria categorized under the phylum Firmicutes which are among the most common causative agents of bacterial infective endocarditis (IE) and are also important agents in septicaemia in neutropenic patients. The Streptococcus mitis group is comprised of 13 species including some of the most common human oral colonizers such as S. mitis, S. oralis, S. sanguinis and S. gordonii as well as species such as S. tigurinus, S. oligofermentans and S. australis that have only recently been classified and are poorly understood at present. We present StreptoBase, which provides a specialized free resource focusing on the genomic analyses of oral species from the mitis group. It currently hosts 104 S. mitis group genomes including 27 novel mitis group strains that we sequenced using the high throughput Illumina HiSeq technology platform, and provides a comprehensive set of genome sequences for analyses, particularly comparative analyses and visualization of both cross-species and cross-strain characteristics of S. mitis group bacteria. StreptoBase incorporates sophisticated in-house designed bioinformatics web tools such as Pairwise Genome Comparison (PGC) tool and Pathogenomic Profiling Tool (PathoProT), which facilitate comparative pathogenomics analysis of Streptococcus strains. Examples are provided to demonstrate how StreptoBase can be employed to compare genome structure of different S. mitis group bacteria and putative virulence genes profile across multiple streptococcal strains. In conclusion, StreptoBase offers access to a range of streptococci genomic resources as well as analysis tools and will be an invaluable platform to accelerate research in streptococci. Database URL: http://streptococcus.um.edu.my.",2016-05-03 +32484729,Genome-Wide DNA Methylation in Peripheral Blood and Long-Term Exposure to Source-Specific Transportation Noise and Air Pollution: The SAPALDIA Study.,"

Background

Few epigenome-wide association studies (EWAS) on air pollutants exist, and none have been done on transportation noise exposures, which also contribute to environmental burden of disease.

Objective

We performed mutually independent EWAS on transportation noise and air pollution exposures.

Methods

We used data from two time points of the Swiss Cohort Study on Air Pollution and Lung and Heart Diseases in Adults (SAPALDIA) from 1,389 participants contributing 2,542 observations. We applied multiexposure linear mixed-effects regressions with participant-level random intercept to identify significant Cytosine-phosphate-Guanine (CpG) sites and differentially methylated regions (DMRs) in relation to 1-y average aircraft, railway, and road traffic day-evening-night noise (Lden); nitrogen dioxide (NO2); and particulate matter (PM) with aerodynamic diameter <2.5μm (PM2.5). We performed candidate (CpG-based; cross-systemic phenotypes, combined into ""allostatic load"") and agnostic (DMR-based) pathway enrichment tests, and replicated previously reported air pollution EWAS signals.

Results

We found no statistically significant CpGs at false discovery rate <0.05. However, 14, 48, 183, 8, and 71 DMRs independently associated with aircraft, railway, and road traffic Lden; NO2; and PM2.5, respectively, with minimally overlapping signals. Transportation Lden and air pollutants tendentially associated with decreased and increased methylation, respectively. We observed significant enrichment of candidate DNA methylation related to C-reactive protein and body mass index (aircraft, road traffic Lden, and PM2.5), renal function and ""allostatic load"" (all exposures). Agnostic functional networks related to cellular immunity, gene expression, cell growth/proliferation, cardiovascular, auditory, embryonic, and neurological systems development were enriched. We replicated increased methylation in cg08500171 (NO2) and decreased methylation in cg17629796 (PM2.5).

Conclusions

Mutually independent DNA methylation was associated with source-specific transportation noise and air pollution exposures, with distinct and shared enrichments for pathways related to inflammation, cellular development, and immune responses. These findings contribute in clarifying the pathways linking these exposures and age-related diseases but need further confirmation in the context of mediation analyses. https://doi.org/10.1289/EHP6174.",2020-06-01 +32189111,Micrometer-resolution X-ray tomographic full-volume reconstruction of an intact post-mortem juvenile rat lung.,"In this article, we present an X-ray tomographic imaging method that is well suited for pulmonary disease studies in animal models to resolve the full pathway from gas intake to gas exchange. Current state-of-the-art synchrotron-based tomographic phase-contrast imaging methods allow for three-dimensional microscopic imaging data to be acquired non-destructively in scan times of the order of seconds with good soft tissue contrast. However, when studying multi-scale hierarchically structured objects, such as the mammalian lung, the overall sample size typically exceeds the field of view illuminated by the X-rays in a single scan and the necessity for achieving a high spatial resolution conflicts with the need to image the whole sample. Several image stitching and calibration techniques to achieve extended high-resolution fields of view have been reported, but those approaches tend to fail when imaging non-stable samples, thus precluding tomographic measurements of large biological samples, which are prone to degradation and motion during extended scan times. In this work, we demonstrate a full-volume three-dimensional reconstruction of an intact rat lung under immediate post-mortem conditions and at an isotropic voxel size of (2.75 µm)3. We present the methodology for collecting multiple local tomographies with 360° extended field of view scans followed by locally non-rigid volumetric stitching. Applied to the lung, it allows to resolve the entire pulmonary structure from the trachea down to the parenchyma in a single dataset. The complete dataset is available online ( https://doi.org/10.16907/7eb141d3-11f1-47a6-9d0e-76f8832ed1b2 ).",2020-03-18 +32032091,Outcomes of Acute Inpatient Rehabilitation After Ventricular Assist Device Placement.,"

Objective

The aim of the study was to compare outcomes of inpatient rehabilitation after ventricular assist device placement with outcomes for other cardiac diagnoses.

Design

This was a retrospective review of the electronic health records of 265 patients admitted to inpatient rehabilitation: 166 patients were admitted after ventricular assist device placement and 99 were admitted for other cardiac disease. Data collected included functional independence measure score on admission and discharge, dates of admission and discharge, and disposition.

Results

Patients admitted after ventricular assist device placement had a mean functional independence measure gain of 25.7 and length of stay of 8.7 days. Patients admitted for other cardiac diagnoses had a mean functional independence measure gain of 25.9 and length of stay of 9.4 days. These differences were not statistically significant. Change in functional independence measure from admission to discharge was statistically significant within each group (P < 0.001). Most patients were discharged home, and the proportions who returned to acute care or home were not different between groups.

Conclusions

Both the ventricular assist device and nonventricular assist device groups had significant and equivalent improvements in functional outcomes after inpatient rehabilitation. This study found that ventricular assist device patients benefit from inpatient rehabilitation with similar disposition rates as patients with other cardiac diagnoses. Inpatient rehabilitation is the appropriate setting for this group.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) Describe the role of rehabilitation in the care of individuals after ventricular assist device (VAD) placement; (2) Compare the outcomes of inpatient rehabilitation for individuals after VAD placement to those admitted for other cardiac reasons; and (3) Recognize potential complications that may occur during the course of a VAD patient's inpatient rehabilitation stay.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2020-06-01 +31617754,Evidence for Bisphenol B Endocrine Properties: Scientific and Regulatory Perspectives.,"BACKGROUND:The substitution of bisphenol A (BPA) by bisphenol B (BPB), a very close structural analog, stresses the need to assess its potential endocrine properties. OBJECTIVE:This analysis aimed to investigate whether BPB has endocrine disruptive properties in humans and in wildlife as defined by the World Health Organization (WHO) definition used in the regulatory field, that is, a) adverse effects, b) endocrine activity, and c) plausible mechanistic links between the observed endocrine activity and adverse effects. METHODS:We conducted a systematic review to identify BPB adverse effects and endocrine activities by focusing on animal models and in vitro mechanistic studies. The results were grouped by modality (estrogenic, androgenic, thyroid hormone, steroidogenesis-related, or other endocrine activities). After critical analysis of results, lines of evidence were built using a weight-of-evidence approach to establish a biologically plausible link. In addition, the ratio of BPA to BPB potency was reported from studies investigating both bisphenols. RESULTS:Among the 36 articles included in the analysis, 3 subchronic studies consistently reported effects of BPB on reproductive function. In rats, the 28-d and 48-week studies showed alteration of spermatogenesis associated with a lower height of the seminiferous tubules, the alteration of several sperm parameters, and a weight loss for the testis, epididymis, and seminal vesicles. In zebrafish, the results of a 21-d reproductive study demonstrated that exposed fish had a lower egg production and a lower hatching rate and viability. The in vitro and in vivo mechanistic data consistently demonstrated BPB's capacity to decrease testosterone production and to exert an estrogenic-like activity similar to or greater than BPA's, both pathways being potentially responsible for spermatogenesis impairment in rats and fish. CONCLUSION:The available in vivo, ex vivo, and in vitro data, although limited, coherently indicates that BPB meets the WHO definition of an endocrine disrupting chemical currently used in a regulatory context. https://doi.org/10.1289/EHP5200.",2019-10-16 +30357412,BatchI: Batch effect Identification in high-throughput screening data using a dynamic programming algorithm.,"

Motivation

In contemporary biological experiments, bias, which interferes with the measurements, requires attentive processing. Important sources of bias in high-throughput biological experiments are batch effects and diverse methods towards removal of batch effects have been established. These include various normalization techniques, yet many require knowledge on the number of batches and assignment of samples to batches. Only few can deal with the problem of identification of batch effect of unknown structure. For this reason, an original batch identification algorithm through dynamical programming is introduced for omics data that may be sorted on a timescale.

Results

BatchI algorithm is based on partitioning a series of high-throughput experiment samples into sub-series corresponding to estimated batches. The dynamic programming method is used for splitting data with maximal dispersion between batches, while maintaining minimal within batch dispersion. The procedure has been tested on a number of available datasets with and without prior information about batch partitioning. Datasets with a priori identified batches have been split accordingly, measured with weighted average Dice Index. Batch effect correction is justified by higher intra-group correlation. In the blank datasets, identified batch divisions lead to improvement of parameters and quality of biological information, shown by literature study and Information Content. The outcome of the algorithm serves as a starting point for correction methods. It has been demonstrated that omitting the essential step of batch effect control may lead to waste of valuable potential discoveries.

Availability and implementation

The implementation is available within the BatchI R package at http://zaed.aei.polsl.pl/index.php/pl/111-software.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +31956678,"A neuroimaging dataset on orthographic, phonological and semantic word processing in school-aged children.","Here we describe the public neuroimaging and behavioral dataset entitled ""Cross-Sectional Multidomain Lexical Processing"" available on the OpenNeuro project (https://openneuro.org). This dataset explores the neural mechanisms and development of lexical processing through task based functional magnetic resonance imaging (fMRI) of rhyming, spelling, and semantic judgement tasks in both the auditory and visual modalities. Each task employed varying degrees of trial difficulty, including conflicting versus non-conflicting orthography-phonology pairs (e.g. harm - warm, wall - tall) in the rhyming and spelling tasks as well as high versus low word pair association in the semantic tasks (e.g. dog - cat, dish - plate). In addition, this dataset contains scores from a battery of standardized psychoeducational assessments allowing for future analyses of brain-behavior relations. Data were collected from a cross-sectional sample of 91 typically developing children aged 8.7- to 15.5- years old. The cross-sectional design employed in this dataset as well as the inclusion of multiple measures of lexical processing in varying difficulties and modalities allows for multiple avenues of future research on reading development.",2020-01-03 +32106856,Systematic review and meta-analysis of video-assisted thoracoscopic surgery segmentectomy versus lobectomy for stage I non-small cell lung cancer.,"BACKGROUND:Whether video-assisted thoracoscopic surgery (VATS) segmentectomy and VATS lobectomy provide similar perioperative and oncological outcomes in stage I non-small cell lung cancer (NSCLC) is still controversial. METHODS:Meta-analysis of 12 studies comparing outcomes after VATS lobectomy and VATS segmentectomy for stage I NSCLC. Data were analyzed by the RevMan 5.3 software. RESULTS:Disease-free survival (HR 1.19, 95% CI 0.89 to 1.33, P = 0.39), overall survival (HR 1.11, 95% CI 0.89 to 1.38, P = 0.36), postoperative complications (OR = 1.10, 95% CI 0.69 to 1.75, P = 0.7), intraoperative blood loss (MD = 3.87, 95% CI - 10.21 to 17.94, P = 0.59), operative time (MD = 10.89, 95% CI - 13.04 to 34.82, P = 0.37), air leak > 5 days (OR = 1.20, 95% CI 0.66 to 2.17, P = 0.55), and in-hospital mortality (OR = 1.67, 95% CI 0.39 to 7.16, P = 0.49) were comparable between the groups. Postoperative hospital stay (MD = - 0.69, 95% CI - 1.19 to - 0.19, P = 0.007) and number of dissected lymph nodes (MD = - 6.44, 95%CI - 9.49 to - 3.40, P < 0.0001) were significantly lower in VATS segmentectomy patients. CONCLUSIONS:VATS segmentectomy and VATS lobectomy provide similar oncological and perioperative outcomes for stage I NSCLC patients. This systematic review was registered on PROSPERO and can be accessed at http://www.crd.york.ac.uk/PROSPERO/display_record.php?ID = CRD42019133398.",2020-02-27 +31506471,ePath: an online database towards comprehensive essential gene annotation for prokaryotes.,"Experimental techniques for identification of essential genes (EGs) in prokaryotes are usually expensive, time-consuming and sometimes unrealistic. Emerging in silico methods provide alternative methods for EG prediction, but often possess limitations including heavy computational requirements and lack of biological explanation. Here we propose a new computational algorithm for EG prediction in prokaryotes with an online database (ePath) for quick access to the EG prediction results of over 4,000 prokaryotes ( https://www.pubapps.vcu.edu/epath/ ). In ePath, gene essentiality is linked to biological functions annotated by KEGG Ortholog (KO). Two new scoring systems, namely, E_score and P_score, are proposed for each KO as the EG evaluation criteria. E_score represents appearance and essentiality of a given KO in existing experimental results of gene essentiality, while P_score denotes gene essentiality based on the principle that a gene is essential if it plays a role in genetic information processing, cell envelope maintenance or energy production. The new EG prediction algorithm shows prediction accuracy ranging from 75% to 91% based on validation from five new experimental studies on EG identification. Our overall goal with ePath is to provide a comprehensive and reliable reference for gene essentiality annotation, facilitating the study of those prokaryotes without experimentally derived gene essentiality information.",2019-09-10 +30652557,D3Oncoprint: Stand-Alone Software to Visualize and Dynamically Explore Annotated Genomic Mutation Files.,"

Purpose

Advances in next-generation sequencing technologies have led to a reduction in sequencing costs, which has increased the availability of genomic data sets to many laboratories. Increasing amounts of sequencing data require effective analysis tools to use genomic data for biologic discovery and patient management. Available packages typically require advanced programming knowledge and system administration privileges, or they are Web services that force researchers to work on outside servers.

Methods

To support the interactive exploration of genomic data sets on local machines with no programming skills required, we developed D3Oncoprint, a standalone application to visualize and dynamically explore annotated genomic mutation files. D3Oncoprint provides links to curated variants lists from CIViC, My Cancer Genome, OncoKB, and Food and Drug Administration-approved drugs to facilitate the use of genomic data for biomedical discovery and application. D3Oncoprint also includes curated gene lists from BioCarta pathways and FoundationOne cancer panels to explore commonly investigated biologic processes.

Results

This software provides a flexible environment to dynamically explore one or more variant mutation profiles provided as input. The focus on interactive visualization with biologic and medical annotation significantly lowers the barriers between complex genomics data and biomedical investigators. We describe how D3Oncoprint helps researchers explore their own data without the need for an extensive computational background.

Conclusion

D3Oncoprint is free software for noncommercial use. It is available for download from the Web site of the Biometric Research Program of the Division of Cancer Treatment and Diagnosis at the National Cancer Institute ( https://brb.nci.nih.gov/d3oncoprint ). We believe that this tool provides an important means of empowering researchers to translate information from collected data sets to biologic insights and clinical development.",2018-12-01 +25308624,"Screening, diagnosis, treatment, and management of hepatitis C: a novel, comprehensive, online resource center for primary care providers and specialists.","Current initiatives focusing on hepatitis C (HCV) screening and diagnosis, together with the advent of oral interferon (IFN)-free treatment regimens have prompted Elsevier Multimedia Publishing and the American Journal of Medicine (AJM) to develop a novel, comprehensive, online Resource Center dedicated to providing both primary care providers and specialists with the latest information on the screening, diagnosis, treatment, and management of HCV. To date, only 25% of infected patients have been diagnosed and only 5% cured. With the Centers for Disease Control and Prevention (CDC) and the US Prevention Services Task Force (USPSTF) recommendation of one-time screening for all individuals born between 1945 and 1965, and the availability of safe and effective therapy, it is anticipated that primary care providers and community practices will become increasingly responsible for the screening, diagnosis, and management of infected patients, as well as providing access to care by specialists when needed. The AJM Hepatitis C Resource Center site will have two major channels; one channel tailored to specifically address the needs of internal medicine physicians and other primary care providers, and one channel tailored to address the needs of specialists including hepatologists, gastroenterologists, and infectious disease specialists. Systematic surveys of these clinician audiences are being conducted by Elsevier to assess educational gaps, and ensure that the content of each channel of the Resource Center satisfies the needs of the intended audiences. In a recent Elsevier survey of primary care physicians (PCPs) who had screened and/or participated in the care of patients with HCV within 6 months of participating in the survey, 60% of PCPs stated that they were not very confident or only somewhat confident about screening patients for chronic HCV infection. A recent Elsevier survey of specialists revealed low levels of satisfaction with the treatment options available in 2013, with ""no therapy"" being selected for up to 38% of patients. This survey also showed that experience with newly-approved options for HCV including IFN-free regimens is currently limited, but the likelihood that a variety of patient types will be treated with these options is high. This provides an impetus for educational opportunities focusing on optimizing treatments for the different HCV genotypes and for patients with comorbidities. Further results of the PCP and specialist surveys will be published on the Resource Center. Each channel of the Resource Center will be comprised of a variety of specific communication elements, which are open to sponsorship, and include roundtable panel discussions, case studies, and direct links to relevant original research, review articles, and guidelines. All Resource Center components are peer-reviewed for publication on the Resource Center by the AJM Editorial Office and the Resource Center Guest Editor, Edward Lebovics, MD. The AJM Hepatitis C Resource Center will be accessible from the AJM online home page (http://www.amjmed.com) and will be launched immediately prior to the American Association for the Study of Liver Diseases (AASLD) Liver Meeting to be held from November 7 to 11, 2014 in Boston, Massachusetts.",2014-10-13 +33996891,Universal Architectural Concepts Underlying Protein Folding Patterns.,"What is the architectural ""basis set"" of the observed universe of protein structures? Using information-theoretic inference, we answer this question with a dictionary of 1,493 substructures-called concepts-typically at a subdomain level, based on an unbiased subset of known protein structures. Each concept represents a topologically conserved assembly of helices and strands that make contact. Any protein structure can be dissected into instances of concepts from this dictionary. We dissected the Protein Data Bank and completely inventoried all the concept instances. This yields many insights, including correlations between concepts and catalytic activities or binding sites, useful for rational drug design; local amino-acid sequence-structure correlations, useful for ab initio structure prediction methods; and information supporting the recognition and exploration of evolutionary relationships, useful for structural studies. An interactive site, Proçodic, at http://lcb.infotech.monash.edu.au/prosodic (click), provides access to and navigation of the entire dictionary of concepts and their usages, and all associated information. This report is part of a continuing programme with the goal of elucidating fundamental principles of protein architecture, in the spirit of the work of Cyrus Chothia.",2020-01-01 +31297537,MM-6mAPred: identifying DNA N6-methyladenine sites based on Markov model.,"

Motivation

Recent studies have shown that DNA N6-methyladenine (6mA) plays an important role in epigenetic modification of eukaryotic organisms. It has been found that 6mA is closely related to embryonic development, stress response and so on. Developing a new algorithm to quickly and accurately identify 6mA sites in genomes is important for explore their biological functions.

Results

In this paper, we proposed a new classification method called MM-6mAPred based on a Markov model which makes use of the transition probability between adjacent nucleotides to identify 6mA site. The sensitivity and specificity of our method are 89.32% and 90.11%, respectively. The overall accuracy of our method is 89.72%, which is 6.59% higher than that of the previous method i6mA-Pred. It indicated that, compared with the 41 nucleotide chemical properties used by i6mA-Pred, the transition probability between adjacent nucleotides can capture more discriminant sequence information.

Availability and implementation

The web server of MM-6mAPred is freely accessible at http://www.insect-genome.com/MM-6mAPred/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +31507131,Development of Ontology for Self-limited Epilepsy with Centrotemporal Spikes and Application of Data Mining Algorithms to Identify New Subtypes.,"BACKGROUND:Benign rolandic epilepsy or benign childhood epilepsy with centrotemporal spikes (BCECTS) is a common childhood epileptic syndrome. The syndrome resolves in adolescence, but 1-7% of patients have an atypical presentation, some of which require aggressive medical treatment. Early treatment may prevent complications and neurocognitive deterioration. Variants include Landau-Kleffner syndrome (LKS) and electrical status epilepticus during sleep (ESES). OBJECTIVES:To determine data driven identification of risk factors and characterization of new subtypes of BCECTS based on anontology. To use data mining analysis and correlation between the identified groups and known clinical variants. METHODS:We conducted a retrospective cohort study comprised of 104 patients with a diagnosis of BCECTS and a minimum of 2 years of follow-up, between the years 2005 and 2017. The medical records were obtained from the epilepsy service unit of the pediatric neurology department at Dana-Dwek Hospital, Tel Aviv Sourasky Medical Center. We developed a BCECTS ontology and performed data preprocessing and analysis using the R Project for Statistical Computing (https://www.r-project.org/) and machine learning tools to identify risk factors and characterize subgroups. RESULTS:The ontology created a uniform and understandable infrastructure for research. With the ontology, a more precise characterization of clinical symptoms and EEG activity of BCECTS was possible. Risk factors for the development of severe atypical presentations were identified: electroencephalography (EEG) with spike wave (P < 0.05), EEG without evidence of left lateralization (P < 0.05), and EEG localization (centrotemporal, frontal, or frontotemporal) (P < 0.01). CONCLUSIONS:Future use of the ontology infrastructure for expanding characterization for multicenter studies as well as future studies of the disease are needed. Identifying subgroups and adapting them to known clinical variants will enable identification of risk factors, improve prediction of disease progression, and facilitate adaptation of more accurate therapy. Early identification and frequent follow-up may have a significant impact on the prognosis of the atypical variants.",2019-07-01 +28852784,[Orthopedic surgery with limited resources after mass disasters and during armed conflicts : First international guidelines for the management of limb injuries and the experience of Doctors Without Borders].,"Disasters and armed conflicts are often the unfortunate basis for aid projects run by Doctors Without Borders/Médecins Sans Frontières (MSF). The nature of war and disasters means that surgery is an integral part of this medical emergency aid. In these situations, resources are usually limited. As a result, surgical work in these contexts differs significantly from the daily routine of a surgeon working in a highly resourced hospital. The principles of surgery do not change but surgeons must adapt their tactical approach to the changed context otherwise there is a high risk of failing to improve the health of patients and potentially jeopardizing their prospects for recovery. Every experienced war surgeon has learned new skills the hard way. The Field Guide to Manage Limb Injury in Disaster and Conflict has been written to help new surgeons who may face the challenges of disaster and war surgery and to avoid unnecessary suffering for patients ( https://icrc.aoeducation.org ). Under the guidance of the International Committee of the Red Cross (ICRC), with participation of the World Health Organization (WHO), financed by the AO Foundation, and featuring the experiences of experts from different organizations (amongst them MSF), the book details techniques and guidelines for surgery in low resource settings. The following article provides a short summary of some of the surgical challenges when working with limited resources and reflects on a few specific recommendations for so-called war surgery.",2017-10-01 +32194343,The FiberTAG project: Tagging dietary fibre intake by measuring biomarkers related to the gut microbiota and their interest for health.,"The scientific rationale for dietary fibre intake recommendations comes from the recognition of their benefits for health based on studies first published many years ago. It remains unclear which are the key physiological effects generated by dietary fibre in view of the diversity of the food components considered as dietary fibre, of the relevance of their classification (soluble and insoluble) and from the recent discoveries putting forward their interactions with the gut microbiota. The project FiberTAG (Joint Programming Initiative 'A Healthy Diet for a Healthy Life' 2017-2020 https://www.fibertag.eu/) aims to establish a set of biomarkers (markers of gut barrier function and bacterial co-metabolites including volatile compounds and lipid derivatives), measured in different biological compartments (faeces, blood or breath) linking dietary fibre intake and gut microbiota-related health effects. The FiberTAG consortium brings together academic and industrial partners from Belgium, France, Germany and Canada to share data and samples obtained from existing as well as new intervention studies in order to evaluate the relevance of such biomarkers. The FiberTAG consortium is currently working on five existing cohorts (prospective observational or nutritional interventions in healthy or obese patients), and a number of new intervention studies to analyse the effect of insoluble dietary fibre (wheat bran and chitin-glucan, provided by the industrial partners) in healthy individuals or in obese patients at high cardiometabolic risk.",2020-02-05 +32117725,Comprehensive Review of Web Servers and Bioinformatics Tools for Cancer Prognosis Analysis.,"Prognostic biomarkers are of great significance to predict the outcome of patients with cancer, to guide the clinical treatments, to elucidate tumorigenesis mechanisms, and offer the opportunity of identifying therapeutic targets. To screen and develop prognostic biomarkers, high throughput profiling methods including gene microarray and next-generation sequencing have been widely applied and shown great success. However, due to the lack of independent validation, only very few prognostic biomarkers have been applied for clinical practice. In order to cross-validate the reliability of potential prognostic biomarkers, some groups have collected the omics datasets (i.e., epigenetics/transcriptome/proteome) with relative follow-up data (such as OS/DSS/PFS) of clinical samples from different cohorts, and developed the easy-to-use online bioinformatics tools and web servers to assist the biomarker screening and validation. These tools and web servers provide great convenience for the development of prognostic biomarkers, for the study of molecular mechanisms of tumorigenesis and progression, and even for the discovery of important therapeutic targets. Aim to help researchers to get a quick learning and understand the function of these tools, the current review delves into the introduction of the usage, characteristics and algorithms of tools, and web servers, such as LOGpc, KM plotter, GEPIA, TCPA, OncoLnc, PrognoScan, MethSurv, SurvExpress, UALCAN, etc., and further help researchers to select more suitable tools for their own research. In addition, all the tools introduced in this review can be reached at http://bioinfo.henu.edu.cn/WebServiceList.html.",2020-02-05 +26949480,SNP2Structure: A Public and Versatile Resource for Mapping and Three-Dimensional Modeling of Missense SNPs on Human Protein Structures.,"One of the long-standing challenges in biology is to understand how non-synonymous single nucleotide polymorphisms (nsSNPs) change protein structure and further affect their function. While it is impractical to solve all the mutated protein structures experimentally, it is quite feasible to model the mutated structures in silico. Toward this goal, we built a publicly available structure database resource (SNP2Structure, https://apps.icbi.georgetown.edu/snp2structure) focusing on missense mutations, msSNP. Compared with web portals with similar aims, SNP2Structure has the following major advantages. First, our portal offers direct comparison of two related 3D structures. Second, the protein models include all interacting molecules in the original PDB structures, so users are able to determine regions of potential interaction changes when a protein mutation occurs. Third, the mutated structures are available to download locally for further structural and functional analysis. Fourth, we used Jsmol package to display the protein structure that has no system compatibility issue. SNP2Structure provides reliable, high quality mapping of nsSNPs to 3D protein structures enabling researchers to explore the likely functional impact of human disease-causing mutations.",2015-09-30 +31510683,Efficient merging of genome profile alignments.,"

Motivation

Whole-genome alignment (WGA) methods show insufficient scalability toward the generation of large-scale WGAs. Profile alignment-based approaches revolutionized the fields of multiple sequence alignment construction methods by significantly reducing computational complexity and runtime. However, WGAs need to consider genomic rearrangements between genomes, which make the profile-based extension of several whole-genomes challenging. Currently, none of the available methods offer the possibility to align or extend WGA profiles.

Results

Here, we present genome profile alignment, an approach that aligns the profiles of WGAs and that is capable of producing large-scale WGAs many times faster than conventional methods. Our concept relies on already available whole-genome aligners, which are used to compute several smaller sets of aligned genomes that are combined to a full WGA with a divide and conquer approach. To align or extend WGA profiles, we make use of the SuperGenome data structure, which features a bidirectional mapping between individual sequence and alignment coordinates. This data structure is used to efficiently transfer different coordinate systems into a common one based on the principles of profiles alignments. The approach allows the computation of a WGA where alignments are subsequently merged along a guide tree. The current implementation uses progressiveMauve and offers the possibility for parallel computation of independent genome alignments. Our results based on various bacterial datasets up to several hundred genomes show that we can reduce the runtime from months to hours with a quality that is negligibly worse than the WGA computed with the conventional progressiveMauve tool.

Availability and implementation

GPA is freely available at https://lambda.informatik.uni-tuebingen.de/gitlab/ahennig/GPA. GPA is implemented in Java, uses progressiveMauve and offers a parallel computation of WGAs.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +30942726,"A Queue-Poll Extension and DataSHIELD: Standardised, Monitored, Indirect and Secure Access to Sensitive Data.","Analyzing data across hospitals and institutions without the data leaving the hospitals and adding institutions to a trusted network is an important part of privacy preserving data analysis. This work implements a queue-poll extension and integrates with DataSHIELD to allow for a standardized, monitored, indirect and secure access to data. The extension was created using the HTTP protocol and requests are not pushed into a participating institution but are sent to a server outside an institutional network. These requests are then pulled into the institution from within, executed and the response sent back to the outside server, which relays the request back to the request sender. We found that the requests were slower than a direct push request, but also that the integration of new institutions into the network was easily achieved. We propose that future work should focus on optimizing the monitoring and speed of the service. The service created here could reduce the barriers to entry for institutions to form an analysis network and can be used not only to drive analysis but also the sharing of resulting information and models.",2019-01-01 +29186338,Cpf1-Database: web-based genome-wide guide RNA library design for gene knockout screens using CRISPR-Cpf1.,"Summary:Following the type II CRISPR-Cas9 system, type V CRISPR-Cpf1 endonucleases have been found to be applicable for genome editing in various organisms in vivo. However, there are as yet no web-based tools capable of optimally selecting guide RNAs (gRNAs) among all possible genome-wide target sites. Here, we present Cpf1-Database, a genome-wide gRNA library design tool for LbCpf1 and AsCpf1, which have DNA recognition sequences of 5'-TTTN-3' at the 5' ends of target sites. Cpf1-Database provides a sophisticated but simple way to design gRNAs for AsCpf1 nucleases on the genome scale. One can easily access the data using a straightforward web interface, and using the powerful collections feature one can easily design gRNAs for thousands of genes in short time. Availability and implementation:Free access at http://www.rgenome.net/cpf1-database/. Contact:sangsubae@hanyang.ac.kr.",2018-03-01 +31742326,SCOT: Rethinking the classification of secondary structure elements.,"

Motivation

Secondary structure classification is one of the most important issues in structure-based analyses due to its impact on secondary structure prediction, structural alignment and protein visualization. There are still open challenges concerning helix and sheet assignments which are currently not addressed by a single multi-purpose software.

Results

We introduce SCOT (Secondary structure Classification On Turns) as a novel secondary structure element assignment software which supports the assignment of turns, right-handed α-, 310- and π-helices, left-handed α- and 310-helices, 2.27- and polyproline II helices, β-sheets and kinks. We demonstrate that the introduction of helix Purity values enables a clear differentiation between helix classes. SCOT's unique strengths are highlighted by comparing it to six state-of-the-art methods (DSSP, STRIDE, ASSP, SEGNO, DISICL and SHAFT). The assignment approaches were compared concerning geometric consistency, protein structure quality and flexibility dependency and their impact on secondary structure element-based structural alignments. We show that only SCOT's combination of hydrogen bonds, geometric criteria and dihedral angles enables robust assignments independent of the structure quality and flexibility. We demonstrate that this combination and the elaborate kink detection lead to SCOT's clear superiority for protein alignments. As the resulting helices and strands are provided in a PDB conform output format, they can immediately be used for structure alignment algorithms. Taken together, the application of our new method and the straight-forward visualization using the accompanying PyMOL scripts enable the comprehensive analysis of regular backbone geometries in proteins.

Availability and implementation

https://this-group.rocks.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-04-01 +26555599,dbNSFP v3.0: A One-Stop Database of Functional Predictions and Annotations for Human Nonsynonymous and Splice-Site SNVs.,"The purpose of the dbNSFP is to provide a one-stop resource for functional predictions and annotations for human nonsynonymous single-nucleotide variants (nsSNVs) and splice-site variants (ssSNVs), and to facilitate the steps of filtering and prioritizing SNVs from a large list of SNVs discovered in an exome-sequencing study. A list of all potential nsSNVs and ssSNVs based on the human reference sequence were created and functional predictions and annotations were curated and compiled for each SNV. Here, we report a recent major update of the database to version 3.0. The SNV list has been rebuilt based on GENCODE 22 and currently the database includes 82,832,027 nsSNVs and ssSNVs. An attached database dbscSNV, which compiled all potential human SNVs within splicing consensus regions and their deleteriousness predictions, add another 15,030,459 potentially functional SNVs. Eleven prediction scores (MetaSVM, MetaLR, CADD, VEST3, PROVEAN, 4× fitCons, fathmm-MKL, and DANN) and allele frequencies from the UK10K cohorts and the Exome Aggregation Consortium (ExAC), among others, have been added. The original seven prediction scores in v2.0 (SIFT, 2× Polyphen2, LRT, MutationTaster, MutationAssessor, and FATHMM) as well as many SNV and gene functional annotations have been updated. dbNSFP v3.0 is freely available at http://sites.google.com/site/jpopgen/dbNSFP.",2016-01-05 +31917413,InterPep2: global peptide-protein docking using interaction surface templates.,"MOTIVATION:Interactions between proteins and peptides or peptide-like intrinsically disordered regions are involved in many important biological processes, such as gene expression and cell life-cycle regulation. Experimentally determining the structure of such interactions is time-consuming and difficult because of the inherent flexibility of the peptide ligand. Although several prediction-methods exist, most are limited in performance or availability. RESULTS:InterPep2 is a freely available method for predicting the structure of peptide-protein interactions. Improved performance is obtained by using templates from both peptide-protein and regular protein-protein interactions, and by a random forest trained to predict the DockQ-score for a given template using sequence and structural features. When tested on 252 bound peptide-protein complexes from structures deposited after the complexes used in the construction of the training and templates sets of InterPep2, InterPep2-Refined correctly positioned 67 peptides within 4.0 Å LRMSD among top10, similar to another state-of-the-art template-based method which positioned 54 peptides correctly. However, InterPep2 displays a superior ability to evaluate the quality of its own predictions. On a previously established set of 27 non-redundant unbound-to-bound peptide-protein complexes, InterPep2 performs on-par with leading methods. The extended InterPep2-Refined protocol managed to correctly model 15 of these complexes within 4.0 Å LRMSD among top10, without using templates from homologs. In addition, combining the template-based predictions from InterPep2 with ab initio predictions from PIPER-FlexPepDock resulted in 22% more near-native predictions compared to the best single method (22 versus 18). AVAILABILITY AND IMPLEMENTATION:The program is available from: http://wallnerlab.org/InterPep2. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2020-04-01 +28858673,"Evaluating the statistical power of DNA-based identification, exemplified by 'The missing grandchildren of Argentina'.","Methods and implementations of DNA-based identification are well established in several forensic contexts. However, assessing the statistical power of these methods has been largely overlooked, except in the simplest cases. In this paper we outline general methods for such power evaluation, and apply them to a large set of family reunification cases, where the objective is to decide whether a person of interest (POI) is identical to the missing person (MP) in a family, based on the DNA profile of the POI and available family members. As such, this application closely resembles database searching and disaster victim identification (DVI). If parents or children of the MP are available, they will typically provide sufficient statistical evidence to settle the case. However, if one must resort to more distant relatives, it is not a priori obvious that a reliable conclusion is likely to be reached. In these cases power evaluation can be highly valuable, for instance in the recruitment of additional family members. To assess the power in an identification case, we advocate the combined use of two statistics: the Probability of Exclusion, and the Probability of Exceedance. The former is the probability that the genotypes of a random, unrelated person are incompatible with the available family data. If this is close to 1, it is likely that a conclusion will be achieved regarding general relatedness, but not necessarily the specific relationship. To evaluate the ability to recognize a true match, we use simulations to estimate exceedance probabilities, i.e. the probability that the likelihood ratio will exceed a given threshold, assuming that the POI is indeed the MP. All simulations are done conditionally on available family data. Such conditional simulations have a long history in medical linkage analysis, but to our knowledge this is the first systematic forensic genetics application. Also, for forensic markers mutations cannot be ignored and therefore current models and implementations must be extended. All the tools are freely available in Familias (http://www.familias.no) empowered by the R library paramlink. The above approach is applied to a large and important data set: 'The missing grandchildren of Argentina'. We evaluate the power of 196 families from the DNA reference databank (Banco Nacional de Datos Genéticos, http://www.bndg.gob.ar. As a result we show that 58 of the families have poor statistical power and require additional genetic data to enable a positive identification.",2017-08-12 +29912378,iMetaLab 1.0: a web platform for metaproteomics data analysis.,"

Summary

The human gut microbiota, a complex, dynamic and biodiverse community, has been increasingly shown to influence many aspects of health and disease. Metaproteomic analysis has proven to be a powerful approach to study the functionality of the microbiota. However, the processing and analyses of metaproteomic mass spectrometry data remains a daunting task in metaproteomics data analysis. We developed iMetaLab, a web based platform to provide a user-friendly and comprehensive data analysis pipeline with a focus on lowering the technical barrier for metaproteomics data analysis.

Availability and implementation

iMetaLab is freely available through at http://imetalab.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-11-01 +26739209,SuccinSite: a computational tool for the prediction of protein succinylation sites by exploiting the amino acid patterns and properties.,"Lysine succinylation is an emerging protein post-translational modification, which plays an important role in regulating the cellular processes in both eukaryotic and prokaryotic cells. However, the succinylation modification site is particularly difficult to detect because the experimental technologies used are often time-consuming and costly. Thus, an accurate computational method for predicting succinylation sites may help researchers towards designing their experiments and to understand the molecular mechanism of succinylation. In this study, a novel computational tool termed SuccinSite has been developed to predict protein succinylation sites by incorporating three sequence encodings, i.e., k-spaced amino acid pairs, binary and amino acid index properties. Then, the random forest classifier was trained with these encodings to build the predictor. The SuccinSite predictor achieves an AUC score of 0.802 in the 5-fold cross-validation set and performs significantly better than existing predictors on a comprehensive independent test set. Furthermore, informative features and predominant rules (i.e. feature combinations) were extracted from the trained random forest model for an improved interpretation of the predictor. Finally, we also compiled a database covering 4411 experimentally verified succinylation proteins with 12 456 lysine succinylation sites. Taken together, these results suggest that SuccinSite would be a helpful computational resource for succinylation sites prediction. The web-server, datasets, source code and database are freely available at http://systbio.cau.edu.cn/SuccinSite/.",2016-01-07 +28701700,HDNetDB: A Molecular Interaction Database for Network-Oriented Investigations into Huntington's Disease.,"Huntington's disease (HD) is a progressive and fatal neurodegenerative disorder caused by an expanded CAG repeat in the huntingtin gene. Although HD is monogenic, its molecular manifestation appears highly complex and involves multiple cellular processes. The recent application of high throughput platforms such as microarrays and mass-spectrometry has indicated multiple pathogenic routes. The massive data generated by these techniques together with the complexity of the pathogenesis, however, pose considerable challenges to researchers. Network-based methods can provide valuable tools to consolidate newly generated data with existing knowledge, and to decipher the interwoven molecular mechanisms underlying HD. To facilitate research on HD in a network-oriented manner, we have developed HDNetDB, a database that integrates molecular interactions with many HD-relevant datasets. It allows users to obtain, visualize and prioritize molecular interaction networks using HD-relevant gene expression, phenotypic and other types of data obtained from human samples or model organisms. We illustrated several HDNetDB functionalities through a case study and identified proteins that constitute potential cross-talk between HD and the unfolded protein response (UPR). HDNetDB is publicly accessible at http://hdnetdb.sysbiolab.eu .",2017-07-12 +25655564,DomeTree: a canonical toolkit for mitochondrial DNA analyses in domesticated animals.,"Mitochondrial DNA (mtDNA) is widely used in various genetic studies of domesticated animals. Many applications require comprehensive knowledge about the phylogeny of mtDNA variants. Herein, we provide the most up-to-date mtDNA phylogeny (i.e. haplogroup tree or matrilineal genealogy) and a standardized hierarchical haplogroup nomenclature system for domesticated cattle, dogs, goats, horses, pigs, sheep, yaks and chickens. These high-resolution mtDNA haplogroup trees based on 1240 complete or near-complete mtDNA genome sequences are available in open resource DomeTree (http://www.dometree.org). In addition, we offer the software MitoToolPy (http://www.mitotool.org/mp.html) to facilitate the mtDNA data analyses. We will continuously and regularly update DomeTree and MitoToolPy.",2015-02-22 +32083308,Reelin signaling modulates GABAB receptor function in the neocortex.,"Reelin is a protein that is best known for its role in controlling neuronal layer formation in the developing cortex. Here, we studied its role for post-natal cortical network function, which is poorly explored. To preclude early cortical migration defects caused by Reelin deficiency, we used a conditional Reelin knock-out (RelncKO ) mouse, and induced Reelin deficiency post-natally. Induced Reelin deficiency caused hyperexcitability of the neocortical network in vitro and ex vivo. Blocking Reelin binding to its receptors ApoER2 and VLDLR resulted in a similar effect. Hyperexcitability in RelncKO organotypic slice cultures could be rescued by co-culture with wild-type organotypic slice cultures. Moreover, the GABAB receptor (GABAB R) agonist baclofen failed to activate and the antagonist CGP35348 failed to block GABAB Rs in RelncKO mice. Immunolabeling of RelncKO cortical slices revealed a reduction in GABAB R1 and GABAB R2 surface expression at the plasma membrane and western blot of RelncKO cortical tissue revealed decreased phosphorylation of the GABAB R2 subunit at serine 892 and increased phosphorylation at serine 783, reflecting receptor deactivation and proteolysis. These data show a role of Reelin in controlling early network activity, by modulating GABAB R function. Cover Image for this issue: https://doi.org/10.1111/jnc.15054.",2020-03-13 +30671511,Generalised framework for multi-criteria method selection: Rule set database and exemplary decision support system implementation blueprints.,"This data article describes the analysis of 56 MCDA (Multi-Criteria Decision Analysis) methods in regards to 9 decision-making problem characteristics structured into 3 levels. The impact of uncertainty in decision-making problem description on MCDA method selection is studied. 450,000 possible descriptions of a decision problem were studied, resulting in sets of rules which can serve as input to uncertainty-aware MCDA method selection decision support systems. Comprehensive analyses of the obtained rule sets are provided. An exemplary decision support system based on the presented data was created and is available at http://www.mcda.it. Moreover the technical documentation needed to create and expand such system is provided in this data article. The data and system can be easily extended and the authors invite all researchers to contribute.",2018-12-12 +24573882,COMPARTMENTS: unification and visualization of protein subcellular localization evidence.,"Information on protein subcellular localization is important to understand the cellular functions of proteins. Currently, such information is manually curated from the literature, obtained from high-throughput microscopy-based screens and predicted from primary sequence. To get a comprehensive view of the localization of a protein, it is thus necessary to consult multiple databases and prediction tools. To address this, we present the COMPARTMENTS resource, which integrates all sources listed above as well as the results of automatic text mining. The resource is automatically kept up to date with source databases, and all localization evidence is mapped onto common protein identifiers and Gene Ontology terms. We further assign confidence scores to the localization evidence to facilitate comparison of different types and sources of evidence. To further improve the comparability, we assign confidence scores based on the type and source of the localization evidence. Finally, we visualize the unified localization evidence for a protein on a schematic cell to provide a simple overview. Database URL: http://compartments.jensenlab.org.",2014-02-25 +28472511,"WebGestalt 2017: a more comprehensive, powerful, flexible and interactive gene set enrichment analysis toolkit.","Functional enrichment analysis has played a key role in the biological interpretation of high-throughput omics data. As a long-standing and widely used web application for functional enrichment analysis, WebGestalt has been constantly updated to satisfy the needs of biologists from different research areas. WebGestalt 2017 supports 12 organisms, 324 gene identifiers from various databases and technology platforms, and 150 937 functional categories from public databases and computational analyses. Omics data with gene identifiers not supported by WebGestalt and functional categories not included in the WebGestalt database can also be uploaded for enrichment analysis. In addition to the Over-Representation Analysis in the previous versions, Gene Set Enrichment Analysis and Network Topology-based Analysis have been added to WebGestalt 2017, providing complementary approaches to the interpretation of high-throughput omics data. The new user-friendly output interface and the GOView tool allow interactive and efficient exploration and comparison of enrichment results. Thus, WebGestalt 2017 enables more comprehensive, powerful, flexible and interactive functional enrichment analysis. It is freely available at http://www.webgestalt.org.",2017-07-01 +30560162,Data for the description of fungal diseases and agronomic parameters of Mango ginger (Curcuma amada Roxb.).,"This data article contains data, related to fungal diseases of Mango ginger (Curcuma amada Roxb.), that were collected at Federal University of Agriculture Abeokuta. Pictures described leaf spot, leaf blight and rhizome rot diseases, and associated fungi and fungus-like organisms were listed. Data of plant height and disease incidence, against plant age was described with graphs. Further, data of disease severity for planting years of 2016 and 2017 were compared and percentage commercial loss of rhizome rot for the planting years calculated https://doi.org/10.1016/j.cpb.2018.10.001 (Ayodele et al., 2018).",2018-11-16 +22559261,AlliumMap-A comparative genomics resource for cultivated Allium vegetables.,"

Background

Vegetables of the genus Allium are widely consumed but remain poorly understood genetically. Genetic mapping has been conducted in intraspecific crosses of onion (Allium cepa L.), A. fistulosum and interspecific crosses between A. roylei and these two species, but it has not been possible to access genetic maps and underlying data from these studies easily.

Description

An online comparative genomics database, AlliumMap, has been developed based on the GMOD CMap tool at http://alliumgenetics.org. It has been populated with curated data linking genetic maps with underlying markers and sequence data from multiple studies. It includes data from multiple onion mapping populations as well as the most closely related species A. roylei and A. fistulosum. Further onion EST-derived markers were evaluated in the A. cepa x A. roylei interspecific population, enabling merging of the AFLP-based maps. In addition, data concerning markers assigned in multiple studies to the Allium physical map using A. cepa-A. fistulosum alien monosomic addition lines have been compiled. The compiled data reveal extensive synteny between onion and A. fistulosum.

Conclusions

The database provides the first online resource providing genetic map and marker data from multiple Allium species and populations. The additional markers placed on the interspecific Allium map confirm the value of A. roylei as a valuable bridge between the genetics of onion and A. fistulosum and as a means to conduct efficient mapping of expressed sequence markers in Allium. The data presented suggest that comparative approaches will be valuable for genetic and genomic studies of onion and A. fistulosum. This online resource will provide a valuable means to integrate genetic and sequence-based explorations of Allium genomes.",2012-05-04 +30657943,LFMM 2: Fast and Accurate Inference of Gene-Environment Associations in Genome-Wide Studies.,"Gene-environment association (GEA) studies are essential to understand the past and ongoing adaptations of organisms to their environment, but those studies are complicated by confounding due to unobserved demographic factors. Although the confounding problem has recently received considerable attention, the proposed approaches do not scale with the high-dimensionality of genomic data. Here, we present a new estimation method for latent factor mixed models (LFMMs) implemented in an upgraded version of the corresponding computer program. We developed a least-squares estimation approach for confounder estimation that provides a unique framework for several categories of genomic data, not restricted to genotypes. The speed of the new algorithm is several order faster than existing GEA approaches and then our previous version of the LFMM program. In addition, the new method outperforms other fast approaches based on principal component or surrogate variable analysis. We illustrate the program use with analyses of the 1000 Genomes Project data set, leading to new findings on adaptation of humans to their environment, and with analyses of DNA methylation profiles providing insights on how tobacco consumption could affect DNA methylation in patients with rheumatoid arthritis. Software availability: Software is available in the R package lfmm at https://bcm-uga.github.io/lfmm/.",2019-04-01 +24885522,LincSNP: a database of linking disease-associated SNPs to human large intergenic non-coding RNAs.,"

Background

Genome-wide association studies (GWAS) have successfully identified a large number of single nucleotide polymorphisms (SNPs) that are associated with a wide range of human diseases. However, many of these disease-associated SNPs are located in non-coding regions and have remained largely unexplained. Recent findings indicate that disease-associated SNPs in human large intergenic non-coding RNA (lincRNA) may lead to susceptibility to diseases through their effects on lincRNA expression. There is, therefore, a need to specifically record these SNPs and annotate them as potential candidates for disease.

Description

We have built LincSNP, an integrated database, to identify and annotate disease-associated SNPs in human lincRNAs. The current release of LincSNP contains approximately 140,000 disease-associated SNPs (or linkage disequilibrium SNPs), which can be mapped to around 5,000 human lincRNAs, together with their comprehensive functional annotations. The database also contains annotated, experimentally supported SNP-lincRNA-disease associations and disease-associated lincRNAs. It provides flexible search options for data extraction and searches can be performed by disease/phenotype name, SNP ID, lincRNA name and chromosome region. In addition, we provide users with a link to download all the data from LincSNP and have developed a web interface for the submission of novel identified SNP-lincRNA-disease associations.

Conclusions

The LincSNP database aims to integrate disease-associated SNPs and human lincRNAs, which will be an important resource for the investigation of the functions and mechanisms of lincRNAs in human disease. The database is available at http://bioinfo.hrbmu.edu.cn/LincSNP.",2014-05-20 +28460071,Interactive microbial distribution analysis using BioAtlas.,"Massive amounts of 16S rRNA sequencing data have been stored in publicly accessible databases, such as GOLD, SILVA, GreenGenes (GG), and the Ribosomal Database Project (RDP). Many of these sequences are tagged with geo-locations. Nevertheless, researchers currently lack a user-friendly tool to analyze microbial distribution in a location-specific context. BioAtlas is an interactive web application that closes this gap between sequence databases, taxonomy profiling and geo/body-location information. It enables users to browse taxonomically annotated sequences across (i) the world map, (ii) human body maps and (iii) user-defined maps. It further allows for (iv) uploading of own sample data, which can be placed on existing maps to (v) browse the distribution of the associated taxonomies. Finally, BioAtlas enables users to (vi) contribute custom maps (e.g. for plants or animals) and to map taxonomies to pre-defined map locations. In summary, BioAtlas facilitates map-supported browsing of public 16S rRNA sequence data and analyses of user-provided sequences without requiring manual mapping to taxonomies and existing databases.

Availability

http://bioatlas.compbio.sdu.dk/.",2017-07-01 +32406679,Ensemble Models Based on QuBiLS-MAS Features and Shallow Learning for the Prediction of Drug-Induced Liver Toxicity: Improving Deep Learning and Traditional Approaches.,"Drug-induced liver injury (DILI) is a key safety issue in the drug discovery pipeline and a regulatory concern. Thus, many in silico tools have been proposed to improve the hepatotoxicity prediction of organic-type chemicals. Here, classifiers for the prediction of DILI were developed by using QuBiLS-MAS 0-2.5D molecular descriptors and shallow machine learning techniques, on a training set composed of 1075 molecules. The best ensemble model build, E13, was obtained with good statistical parameters for the learning series, namely, the following: accuracy = 0.840, sensibility = 0.890, specificity = 0.761, Matthew's correlation coefficient = 0.660, and area under the ROC curve = 0.904. The model was also satisfactorily evaluated with Y-scrambling test, and repeated k-fold cross-validation and repeated k-holdout validation. In addition, an exhaustive external validation was also carried out by using two test sets and five external test sets, with an average accuracy value equal to 0.854 (±0.062) and a coverage equal to 98.4% according to its applicability domain. A statistical comparison of the performance of the E13 model, with regard to results and tools (e.g., Padel DDPredictor Software, Deep Learning DILIserver, and Vslead) reported in the literature, was also performed. In general, E13 presented the best global performance in all experiments. The sum of the ranking differences procedure provided a very similar grouping pattern to that of the M-ANOVA statistical analysis, where E13 was identified as the best model for DILI predictions. A noncommercial and fully cross-platform software for the DILI prediction was also developed, which is freely available at http://tomocomd.com/apps/ptoxra. This software was used for the screening of seven data sets, containing natural products, leads, toxic materials, and FDA approved drugs, to assess the usefulness of the QSAR models in the DILI labeling of organic substances; it was found that 50-92% of the evaluated molecules are positive-DILI compounds. All in all, it can be stated that the E13 model is a relevant method for the prediction of DILI risk in humans, as it shows the best results among all of the methods analyzed.",2020-05-14 +30679817,Author Correction: Two million years of flaking stone and the evolutionary efficiency of stone tool technology.,"In the version of this Article originally published, the authors mistakenly included duplicate entries in the flake datasets for the new Pech de l'Azé IV and Warwasi collections, resulting in minor errors in the statistical analysis. The authors have now repeated this analysis with the correct flake datasets. As a result, in the following two sentences, the number of flakes has been changed from 19,000 to 18,000: ""Using more than 18,000 flakes from 81 assemblages spanning two million years..."" and ""We applied a comparative approach...on more that 18,000 complete and unmodified flakes."" In addition, in Figs. 1-3 and Supplementary Fig. 1, some of the data points for the Pech de l'Azé IV and Warwasi collections have moved; the original and corrected figures are below. Supplementary Tables 1 and 2 have been updated to reflect the corrected statistics, and datasets 'Flake_data' and 'Summary_data' have been replaced with the corrected data files. Furthermore, the data availability statement has been updated with the text ""Open access to these data and the R code generated for this study is provided at https://zenodo.org/record/1408081#.W6iyn84zaHs "". The authors would like to thank L. Premo at Washington State University for finding the duplicate entries in the published flake dataset.",2019-02-01 +32295494,Educational assessment of the major lower limb amputations videos on YouTube.,"OBJECTIVE:Videos of surgical procedures are viewed by some as potential training resources for surgeons and residents. However, there is little evidence on the effectiveness of surgical videos on learning and understanding complex three-dimensional surgical procedures. Lower extremity amputation is a complex surgery, and many residents and surgeons have low exposure to this type of procedures. This paper investigates the educational quality of lower extremity amputation videos posted on YouTube. METHODS:The search was limited to the first 100 videos. Full-length videos of any major lower limb amputation or disarticulation were included. Key basic video data such as title, YouTube address (http://), country of origin, channel source, uploading date, video duration time, number of views, number of up-voters and number of down-voters were collected. An educational assessment tool has been developed specifically for limb amputations. It consists in 11 items: three general and eight amputation-specific, each having a maximum score of 2. RESULTS:In total, 13 videos met the inclusion criteria for final analysis. Four videos reported the surgical technique of above knee amputation, two reported that of knee disarticulation and the remaining seven videos described below knee amputation. The average score (±SD) was 12.77 ± 5.2 yielding an average grade close to ""Fair."" A high level of concordance was found between the two assessors (κ = 0.79). No correlation was found between educational assessment tool score and the pre-set variables (r = 0.6, R2 = 35.4%, F = 1.09, P = 0.4). CONCLUSIONS:Most videos describing lower extremity amputation techniques were found to be of low-to-moderate quality. Only 4 out of 13 (30.7%) had an excellent educational and technical quality. Surgeons and surgical residents should be aware that not all posted videos on YouTube are beneficial. High educational quality videos are needed since many surgeons and residents have a low exposure to such surgeries.",2020-04-15 +31349841,Partners in Recovery: an early phase evaluation of an Australian mental health initiative using program logic and thematic analysis.,"

Background

Mental illness is a leading cause of illness and disability and around 75% of people suffering mental illness do not have access to adequate care. In Australia, nearly half the population experiences mental illness at some point in their life. The Australian Government developed a National program called Partners in Recovery (PIR) to support those with severe and persistent mental illness. The program was implemented through 48 consortia across Australia. One of these was led by the Nepean Blue Mountains Medicare Local who adapted the program according to its specific local needs.

Methods

We conducted an early evaluation of the PIR program in Nepean Blue Mountains (NBMPIR) using a program logic model (PLM) to frame the evaluation and complemented this with an additional thematic analysis. Participants (n = 73) included clients and carers, program management and staff of the Consortium and other partners and agencies, and clinical, allied health, and other service providers. Our PLM utilised multiple data sources that included document review, open and closed survey questions, and semi-structured interviews. Quantitative data received a descriptive analysis and qualitative data was analysed both in alignment with the PLM framework and inductively.

Results

We aligned our results to PLM domains of inputs, activities, outputs, outcomes and impacts. The NBMPIR consortium implemented a recovery approach and provided greater access to services by enhancing healthcare provider partnerships. Our thematic analysis further described five key themes of collaboration; communication; functioning of PIR; structural/organisational challenges; and understanding of PIR approaches. Facilitators and barriers to the NBMPIR program centred on the alignment of vision and purpose; building an efficient system; getting the message out and sharing information; understanding roles and support and training of staff; building capacity and systems change; addressing service gaps; and engaging peers.

Conclusions

Our study provided helpful insights into the coordinated management of complex mental illness. The NBMPIR's focus on partnerships and governance, service coordination, and systems change has relevance for others engaged in this work. This PLM effectively mapped the program, including its processes and resources, and is a useful framework providing a baseline for future evaluations. Full report available at https://researchdirect.westernsydney.edu.au/islandora/object/uws:33977/.",2019-07-26 +31984271,pPerturb: A Server for Predicting Long-Distance Energetic Couplings and Mutation-Induced Stability Changes in Proteins via Perturbations.,"The strength of intraprotein interactions or contact network is one of the dominant factors determining the thermodynamic stabilities of proteins. The nature and the extent of connectivity of this network also play a role in allosteric signal propagation characteristics upon ligand binding to a protein domain. Here, we develop a server for rapid quantification of the strength of an interaction network by employing an experimentally consistent perturbation approach previously validated against a large data set of 375 mutations in 19 different proteins. The web server can be employed to predict the extent of destabilization of proteins arising from mutations in the protein interior in experimentally relevant units. Moreover, coupling distances-a measure of the extent of percolation on perturbation-and overall perturbation magnitudes are predicted in a residue-specific manner, enabling a first look at the distribution of energetic couplings in a protein or its changes upon ligand binding. We show specific examples of how the server can be employed to probe for the distribution of local stabilities in a protein, to examine changes in side chain orientations or packing before and after ligand binding, and to predict changes in stabilities of proteins upon mutations of buried residues. The web server is freely available at http://pbl.biotech.iitm.ac.in/pPerturb and supports recent versions of all major browsers.",2020-01-09 +25655709,"FDA MAUDE data on complications with lasers, light sources, and energy-based devices.","

Background and objective

It is essential for physicians to be fully informed regarding adverse events and malfunctions associated with medical devices that occur in routine practice. There is limited information on this important issue in the medical literature, and it is mostly based on initial studies and case reports. More advanced knowledge regarding device adverse events is necessary to guide physicians towards providing safe treatments. The FDA requires that manufacturers and device users submit medical device reports (MDRs) for suspected injuries from device use or malfunction. The database of MDRs, entitled Manufacturer and User Facility Device Experience (MAUDE) enables the FDA to monitor device performance and identify potential safety issues.

Study design/materials and methods

We employed the following search strategy to identify reported adverse events. We searched the MAUDE electronic database on the FDA website in December 2013: http://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/search.cfm We collected all reported cases between 1991 and December 2013. The search terms utilized included a comprehensive list of device manufacturers, specific product names, and the wavelengths/technology of the devices used in the field of dermatology.

Results

Our search yielded 1257 MDRs. Forty-five MDRs were excluded due to insufficient data. The data is broken down into the adverse events observed, such as, but not limited to: blistering, burns, scarring, dyschromia, fat loss, and nerve palsy. The MDRs describe the adverse event and attempt to determine if it was related to device malfunction versus operator error. Radiofrequency devices, diode lasers, and intense pulsed light devices were the most commonly reported devices related to injuries.

Conclusion

1257 MDRs, from a myriad of devices used in dermatology, have been reported to the FDA as of December 2013. Despite the underreporting of adverse events, the MAUDE database is an untapped resource of post-market surveillance of medical devices. The database can offer additional information, which combined with the initial device studies and published case reports from our colleagues, will help raise awareness and improve patient safety.",2015-02-04 +23874618,NanoMiner - integrative human transcriptomics data resource for nanoparticle research.,"The potential impact of nanoparticles on the environment and on human health has attracted considerable interest worldwide. The amount of transcriptomics data, in which tissues and cell lines are exposed to nanoparticles, increases year by year. In addition to the importance of the original findings, this data can have value in broader context when combined with other previously acquired and published results. In order to facilitate the efficient usage of the data, we have developed the NanoMiner web resource (http://nanominer.cs.tut.fi/), which contains 404 human transcriptome samples exposed to various types of nanoparticles. All the samples in NanoMiner have been annotated, preprocessed and normalized using standard methods that ensure the quality of the data analyses and enable the users to utilize the database systematically across the different experimental setups and platforms. With NanoMiner it is possible to 1) search and plot the expression profiles of one or several genes of interest, 2) cluster the samples within the datasets, 3) find differentially expressed genes in various nanoparticle studies, 4) detect the nanoparticles causing differential expression of selected genes, 5) analyze enriched Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways and Gene Ontology (GO) terms for the detected genes and 6) search the expression values and differential expressions of the genes belonging to a specific KEGG pathway or Gene Ontology. In sum, NanoMiner database is a valuable collection of microarray data which can be also used as a data repository for future analyses.",2013-07-12 +31201317,Multi omics analysis of fibrotic kidneys in two mouse models.,"Kidney fibrosis represents an urgent unmet clinical need due to the lack of effective therapies and an inadequate understanding of the molecular pathogenesis. We have generated a comprehensive and combined multi-omics dataset (proteomics, mRNA and small RNA transcriptomics) of fibrotic kidneys that is searchable through a user-friendly web application: http://hbcreports.med.harvard.edu/fmm/ . Two commonly used mouse models were utilized: a reversible chemical-induced injury model (folic acid (FA) induced nephropathy) and an irreversible surgically-induced fibrosis model (unilateral ureteral obstruction (UUO)). mRNA and small RNA sequencing, as well as 10-plex tandem mass tag (TMT) proteomics were performed with kidney samples from different time points over the course of fibrosis development. The bioinformatics workflow used to process, technically validate, and combine the single omics data will be described. In summary, we present temporal multi-omics data from fibrotic mouse kidneys that are accessible through an interrogation tool (Mouse Kidney Fibromics browser) to provide a searchable transcriptome and proteome for kidney fibrosis researchers.",2019-06-14 +32100491,Modeling voxel-based Monte Carlo light transport with curved and oblique boundary surfaces.,"

Significance

Monte Carlo (MC) light transport simulations are most often performed in regularly spaced three-dimensional voxels, a type of data representation that naturally struggles to represent boundary surfaces with curvature and oblique angles. Not accounting properly for such boundaries with an index of refractivity, mismatches can lead to important inaccuracies, not only in the calculated angles of reflection and transmission but also in the amount of light that transmits through or reflects from these mismatched boundary surfaces.

Aim

A new MC light transport algorithm is introduced to deal with curvature and oblique angles of incidence when simulated photons encounter mismatched boundary surfaces.

Approach

The core of the proposed algorithm applies the efficient preprocessing step of calculating a gradient map of the mismatched boundaries, a smoothing step on this calculated 3D vector field to remove surface roughness due to discretization and an interpolation scheme to improve the handling of curvature.

Results

Through simulations of light hitting the side of a sphere and going through a lens, the agreement of this approach with analytical solutions is shown to be strong.

Conclusions

The MC method introduced here has the advantage of requiring only slight implementation changes from the current state-of-the-art to accurately simulate mismatched boundaries and readily exploit the acceleration of general-purpose graphics processing units. A code implementation, mcxyzn, is made available and maintained at https://omlc.org/software/mc/mcxyzn/.",2020-02-01 +23730305,The systems genetics resource: a web application to mine global data for complex disease traits.,"The Systems Genetics Resource (SGR) (http://systems.genetics.ucla.edu) is a new open-access web application and database that contains genotypes and clinical and intermediate phenotypes from both human and mouse studies. The mouse data include studies using crosses between specific inbred strains and studies using the Hybrid Mouse Diversity Panel. SGR is designed to assist researchers studying genes and pathways contributing to complex disease traits, including obesity, diabetes, atherosclerosis, heart failure, osteoporosis, and lipoprotein metabolism. Over the next few years, we hope to add data relevant to deafness, addiction, hepatic steatosis, toxin responses, and vascular injury. The intermediate phenotypes include expression array data for a variety of tissues and cultured cells, metabolite levels, and protein levels. Pre-computed tables of genetic loci controlling intermediate and clinical phenotypes, as well as phenotype correlations, are accessed via a user-friendly web interface. The web site includes detailed protocols for all of the studies. Data from published studies are freely available; unpublished studies have restricted access during their embargo period.",2013-05-20 +31211207,Data on physical and electrical properties of (ZrO2)1-x(Sc2O3)x(CeO2)y and (ZrO2)1-x-y-z(Sc2O3)x(CeO2)y(Y2O3)z solid solution crystals.,"The data presented in this article are related to the research article entitled ""Phase stability and transport characteristics of (ZrO2)1-x(Sc2O3)x(СeO2)y and (ZrO2)1-x-y-z(Sc2O3)x(СeO2)y(Y2O3)z solid solution crystals"" https://www.sciencedirect.com/science/article/pii/S2352340917302329 [1]. It contains data on densities and microhardness of the as-grown crystals. The data on the specific conductivity of the as-grown and annealing at 1000 °С for 400 h ScCeSZ and ScCeYSZ crystals in the temperature range 623-1173 K is also included in this article. The article describes also the growth of the (ZrO2)1-x(Sc2O3)x(СeO2)y and (ZrO2)1-x-y-z(Sc2O3)x(СeO2)y(Y2O3)z solid solution crystals using directional melt crystallization in a cold crucible.",2019-05-25 +31433532,Association between DIO2 polymorphism and the risk of Kashin-Beck disease in the Tibetan population.,"

Background

Kashin-Beck disease (KBD) is a local, multiple and deformable osteoarthropathy, mostly occurring in Tibet. Type 2 iodothyronine deiodinase (DIO2) is implicated in the activation of thyroid hormones to which the bones are very sensitive. Therefore, it is necessary to explore the association between KBD and DIO2 in the Tibetan population.

Methods

We carried out a case-control study among 316 cases and 320 controls from a Tibetan population. Seven single nucleotide polymorphisms in DIO2 were selected and genotyped using the Agena MassARRAY platform (Agena Bioscience, San Diego, CA, USA). Odds ratios (ORs) and 95% confidence intervals (CIs) were calculated by logistic regression analysis. HaploReg (https://pubs.broadinstitute.org/mammals/haploreg/haploreg.php) and GTEx (http://www.gtexportal.org) databases were applied for functional assessment of the polymorphisms.

Results

The ""A/C"" genotype of rs1352815 (OR = 3.18, 95% CI = 1.14-8.85, p = 0.027) and the ""A/G"" genotype of rs1388382 (OR = 3.80, 95% CI = 1.30-11.11, p = 0.015) were associated with the susceptibility of KBD under the co-dominant model. With gender stratification analysis, rs1388382 showed obvious evidence for correlation with an elevated risk of KBD in females under the co-dominant model (OR = 3.32, 95% CI = 1.06-10.41, p = 0.039).

Conclusions

The results obtained in the present study indicate that DIO2 polymorphisms rs1352815 and rs1388382 were correlated with KBD susceptibility among Tibetans, which also sheds new light on the role of DIO2 in the development of KBD.",2019-09-04 +31432077,CRDS: Consensus Reverse Docking System for target fishing.,"

Motivation

Identification of putative drug targets is a critical step for explaining the mechanism of drug action against multiple targets, finding new therapeutic indications for existing drugs and unveiling the adverse drug reactions. One important approach is to use the molecular docking. However, its widespread utilization has been hindered by the lack of easy-to-use public servers. Therefore, it is vital to develop a streamlined computational tool for target prediction by molecular docking on a large scale.

Results

We present a fully automated web tool named Consensus Reverse Docking System (CRDS), which predicts potential interaction sites for a given drug. To improve hit rates, we developed a strategy of consensus scoring. CRDS carries out reverse docking against 5254 candidate protein structures using three different scoring functions (GoldScore, Vina and LeDock from GOLD version 5.7.1, AutoDock Vina version 1.1.2 and LeDock version 1.0, respectively), and those scores are combined into a single score named Consensus Docking Score (CDS). The web server provides the list of top 50 predicted interaction sites, docking conformations, 10 most significant pathways and the distribution of consensus scores.

Availability and implementation

The web server is available at http://pbil.kaist.ac.kr/CRDS.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +27481021,The eTOX Library of Public Resources for in Silico Toxicity Prediction.,"(1000-1500 characters) In spite of the increasing amount of public access resources that offer original data related to drug toxicology, the successful exploitation of such data for the development of in silico predictive models is still limited by the quality of the data available, its integrability and its coverage for each toxicity endpoint. This work describes the strategy developed by the IMI eTOX consortium for identifying and compiling data and other related resources from the biomedical literature and a wide spectrum of public on-line sources. The main result of this effort is a large web-based structured library containing links to articles of toxicological relevance (data that can be used for modeling purposes, computational models, and toxicity mechanisms), public databases, standardized vocabularies and modeling tools. All this material has been manually reviewed, systematically evaluated and grouped into different categories. The library has been made public at the eTOX website (http://www.etoxproject.eu/), where it is updated on a monthly basis, constituting a useful resource for affording the in silico toxicity prediction of novel drug candidates.",2013-01-11 +25052702,Standardized description of scientific evidence using the Evidence Ontology (ECO). ,"The Evidence Ontology (ECO) is a structured, controlled vocabulary for capturing evidence in biological research. ECO includes diverse terms for categorizing evidence that supports annotation assertions including experimental types, computational methods, author statements and curator inferences. Using ECO, annotation assertions can be distinguished according to the evidence they are based on such as those made by curators versus those automatically computed or those made via high-throughput data review versus single test experiments. Originally created for capturing evidence associated with Gene Ontology annotations, ECO is now used in other capacities by many additional annotation resources including UniProt, Mouse Genome Informatics, Saccharomyces Genome Database, PomBase, the Protein Information Resource and others. Information on the development and use of ECO can be found at http://evidenceontology.org. The ontology is freely available under Creative Commons license (CC BY-SA 3.0), and can be downloaded in both Open Biological Ontologies and Web Ontology Language formats at http://code.google.com/p/evidenceontology. Also at this site is a tracker for user submission of term requests and questions. ECO remains under active development in response to user-requested terms and in collaborations with other ontologies and database resources. Database URL: Evidence Ontology Web site: http://evidenceontology.org.",2014-07-22 +31065002,Reply to 'A refutation to 'A new A-P compartment boundary and organizer in holometabolous insect wings'.,"Here we reply to the ""Refutation"" of Lawrence, Casal, de Cellis, and Morata, who critique our paper presenting evidence for an organizer and compartment boundary subdividing the widely recognized posterior wing compartment of butterflies and moths (Lepidoptera) and Drosophila, that we called the F-P boundary. Lawrence et al. present no data from the Lepidoptera and while the data that they present from Drosophila melanogaster mitotic clones are intriguing and may be informative with respect to the timing of the activity of the A-P and F-P organizers, considerable ambiguity remains regarding how their data should be interpreted with respect to the proposed wing compartment boundaries. Thus, contrary to their claims, Lawrence et al. have failed to falsify the F-P boundary hypothesis. Additional studies employing mitotic clones labeled with easily detectable markers that do not affect cytoskeletal organization or rates of cell division such as GFP and RFP clones produced by G-Trace or Twin Spot Generator (TSG) may further clarify the number of compartment boundaries in Drosophila wings. At the same time, because Drosophila wings are diminutive and highly modified compared to other insects, we also urge great caution in making generalizations about insect wing development based exclusively on studies in Drosophila.Replying to: Lawrence, P.A., Casal, J., de Celis, J., Morata, G. A refutation to 'A new A-P compartment boundary and organizer in holometabolous insect wings'. Sci. Rep. 9 (2019), https://doi.org/10.1038/s41598-019-42668-y .",2019-05-07 +30650089,Rethomics: An R framework to analyse high-throughput behavioural data.,"The recent development of automatised methods to score various behaviours on a large number of animals provides biologists with an unprecedented set of tools to decipher these complex phenotypes. Analysing such data comes with several challenges that are largely shared across acquisition platform and paradigms. Here, we present rethomics, a set of R packages that unifies the analysis of behavioural datasets in an efficient and flexible manner. rethomics offers a computational solution to storing, manipulating and visualising large amounts of behavioural data. We propose it as a tool to bridge the gap between behavioural biology and data sciences, thus connecting computational and behavioural scientists. rethomics comes with a extensive documentation as well as a set of both practical and theoretical tutorials (available at https://rethomics.github.io).",2019-01-16 +32347766,Design and Rationale of the HAPIN Study: A Multicountry Randomized Controlled Trial to Assess the Effect of Liquefied Petroleum Gas Stove and Continuous Fuel Distribution.,"

Background

Globally, nearly 3 billion people rely on solid fuels for cooking and heating, the vast majority residing in low- and middle-income countries (LMICs). The resulting household air pollution (HAP) is a leading environmental risk factor, accounting for an estimated 1.6 million premature deaths annually. Previous interventions of cleaner stoves have often failed to reduce exposure to levels that produce meaningful health improvements. There have been no multicountry field trials with liquefied petroleum gas (LPG) stoves, likely the cleanest scalable intervention.

Objective

This paper describes the design and methods of an ongoing randomized controlled trial (RCT) of LPG stove and fuel distribution in 3,200 households in 4 LMICs (India, Guatemala, Peru, and Rwanda).

Methods

We are enrolling 800 pregnant women at each of the 4 international research centers from households using biomass fuels. We are randomly assigning households to receive LPG stoves, an 18-month supply of free LPG, and behavioral reinforcements to the control arm. The mother is being followed along with her child until the child is 1 year old. Older adult women (40 to <80 years of age) living in the same households are also enrolled and followed during the same period. Primary health outcomes are low birth weight, severe pneumonia incidence, stunting in the child, and high blood pressure (BP) in the older adult woman. Secondary health outcomes are also being assessed. We are assessing stove and fuel use, conducting repeated personal and kitchen exposure assessments of fine particulate matter with aerodynamic diameter ≤2.5μm (PM2.5), carbon monoxide (CO), and black carbon (BC), and collecting dried blood spots (DBS) and urinary samples for biomarker analysis. Enrollment and data collection began in May 2018 and will continue through August 2021. The trial is registered with ClinicalTrials.gov (NCT02944682).

Conclusions

This study will provide evidence to inform national and global policies on scaling up LPG stove use among vulnerable populations. https://doi.org/10.1289/EHP6407.",2020-04-29 +31915234,Combination Lenalidomide/Bortezomib Treatment Synergistically Induces Calpain-Dependent Ikaros Cleavage and Apoptosis in Myeloma Cells.,"Multiple myeloma had been successfully treated by combining lenalidomide and bortezomib with reports suggesting benefits of such a combination even in relapsed/refractory cases. Recently, it was demonstrated that Ikaros degradation by lenalidomide happens via proteasome-dependent pathway and this process is critical for the eradication of myeloma cells. On the basis of this, an antagonistic effect should be observed if a combination of both these agents were used, which however is not the observation seen in the clinical setting. Our study demonstrates that when these agents are combined they exhibit a synergistic activity against myeloma cells and degradation of Ikaros happens by a proteasome-independent calcium-induced calpain pathway. Our study identifies the crucial role of calcium-induced calpain pathway in inducing apoptosis of myeloma cells when this combination or lenalidomide and bortezomib is used. We also report that this combination enhanced the expression of CD38 compared with lenalidomide alone. Thus, data from our study would establish the rationale for the addition of daratumumab along with this combination to further enhance therapeutic activity against multiple myeloma. IMPLICATIONS: Lenalidomide and bortezomib combination degrades IKZF1 in multiple myeloma through a calcium-dependent calpain and caspase pathway. VISUAL OVERVIEW: http://mcr.aacrjournals.org/content/molcanres/18/4/529/F1.large.jpg.",2020-01-08 +31410271,Geographic variation in opsin expression does not align with opsin genotype in Lake Victoria cichlid populations.,"Sensory adaptation to the local environment can contribute to speciation. Aquatic environments are well suited for studying this process: The natural attenuation of light through water results in heterogeneous light environments, to which vision-dependent species must adapt for communication and survival. Here, we study visual adaptation in sympatric Pundamilia cichlids from southeastern Lake Victoria. Species with blue or red male nuptial coloration co-occur at many rocky islands but tend to be depth-differentiated, entailing different visual habitats, more strongly at some islands than others. Divergent visual adaptation to these environments has been implicated as a major factor in the divergence of P. pundamilia and P. nyererei, as they show consistent differentiation in the long-wavelength-sensitive visual pigment gene sequence (LWS opsin). In addition to sequence variation, variation in the opsin gene expression levels may contribute to visual adaptation. We characterized opsin gene expression and LWS genotype across Pundamilia populations inhabiting turbid and clear waters, to examine how different mechanisms of visual tuning contribute to visual adaptation. As predicted, the short-wavelength-sensitive opsin (SWS2b) was expressed exclusively in a population from clear water. Contrary to prediction however, expression levels of the other opsins were species- and island-dependent and did not align with species differences in LWS genotype. Specifically, in two locations with turbid water, the shallow-water dwelling blue species expressed more LWS and less RH2A than the deeper-dwelling red species, while the opposite pattern occurred in the two locations with clear water. Visual modeling suggests that the observed distribution of opsin expression profiles and LWS genotypes does not maximize visual performance, implying the involvement of additional visual tuning mechanisms and/or incomplete adaptation.

Open research badge

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://hdl.handle.net/10411/I1IUUQ.",2019-07-09 +23762313,Information exploration system for sickle cell disease and repurposing of hydroxyfasudil.,"

Background

Sickle cell disease (SCD) is a fatal monogenic disorder with no effective cure and thus high rates of morbidity and sequelae. Efforts toward discovery of disease modifying drugs and curative strategies can be augmented by leveraging the plethora of information contained in available biomedical literature. To facilitate research in this direction we have developed a resource, Dragon Exploration System for Sickle Cell Disease (DESSCD) (http://cbrc.kaust.edu.sa/desscd/) that aims to promote the easy exploration of SCD-related data.

Description

The Dragon Exploration System (DES), developed based on text mining and complemented by data mining, processed 419,612 MEDLINE abstracts retrieved from a PubMed query using SCD-related keywords. The processed SCD-related data has been made available via the DESSCD web query interface that enables: a/information retrieval using specified concepts, keywords and phrases, and b/the generation of inferred association networks and hypotheses. The usefulness of the system is demonstrated by: a/reproducing a known scientific fact, the ""Sickle_Cell_Anemia-Hydroxyurea"" association, and b/generating novel and plausible ""Sickle_Cell_Anemia-Hydroxyfasudil"" hypothesis. A PCT patent (PCT/US12/55042) has been filed for the latter drug repurposing for SCD treatment.

Conclusion

We developed the DESSCD resource dedicated to exploration of text-mined and data-mined information about SCD. No similar SCD-related resource exists. Thus, we anticipate that DESSCD will serve as a valuable tool for physicians and researchers interested in SCD.",2013-06-10 +31481021,Dynamic prediction of long-term survival in patients with primary gastric diffuse large B-cell lymphoma: a SEER population-based study.,"

Background

This study investigated a large number of patients to develop a predictive nomogram for survival and a web-based survival rate calculator that can dynamically predict the long-term survival of patients with primary gastric diffuse large B-cell lymphoma.

Methods

A total of 2647 patients diagnosed with primary gastric diffuse large B-cell lymphoma from 1998 to 2014 were extracted from the SEER database. We used the Lasso Cox regression model to identify independent risk factors for long-term survival and to develop a predictive nomogram for survival and a web-based survival rate calculator.

Results

The median (mean) follow-up time was 30 months (52.8 months). Cancer-specific survival rates decreased with time, while the 5-year conditional survival increased with time. Cancer-specific deaths were not constant. Cancer-specific deaths of patients within the first 2 years were high, while the risk remained relatively constant after 2 years. The independent risk factors included surgery, chemotherapy, tumor stage and age, according to the Lasso Cox regression analysis. We developed a predictive nomogram and a web-based survival rate calculator ( https://linjuli1991.shinyapps.io/dynnomapp/ ). The calibration plot suggested that the actual value exhibited good agreement with the predicted value.

Conclusions

We found that patients with primary gastric diffuse large B-cell lymphoma had a high risk of death during the first 2 years. Additional active follow-up strategies should be provided during this period. This is the first study to develop a predictive nomogram and a web-based survival rate calculator that can provide evidence for individual treatment and follow-up.",2019-09-03 +24312499,PaGenBase: a pattern gene database for the global and dynamic understanding of gene function.,"Pattern genes are a group of genes that have a modularized expression behavior under serial physiological conditions. The identification of pattern genes will provide a path toward a global and dynamic understanding of gene functions and their roles in particular biological processes or events, such as development and pathogenesis. In this study, we present PaGenBase, a novel repository for the collection of tissue- and time-specific pattern genes, including specific genes, selective genes, housekeeping genes and repressed genes. The PaGenBase database is now freely accessible at http://bioinf.xmu.edu.cn/PaGenBase/. In the current version (PaGenBase 1.0), the database contains 906,599 pattern genes derived from the literature or from data mining of more than 1,145,277 gene expression profiles in 1,062 distinct samples collected from 11 model organisms. Four statistical parameters were used to quantitatively evaluate the pattern genes. Moreover, three methods (quick search, advanced search and browse) were designed for rapid and customized data retrieval. The potential applications of PaGenBase are also briefly described. In summary, PaGenBase will serve as a resource for the global and dynamic understanding of gene function and will facilitate high-level investigations in a variety of fields, including the study of development, pathogenesis and novel drug discovery.",2013-12-02 +31145698,Truncated rank correlation (TRC) as a robust measure of test-retest reliability in mass spectrometry data. ,"In mass spectrometry (MS) experiments, more than thousands of peaks are detected in the space of mass-to-charge ratio and chromatographic retention time, each associated with an abundance measurement. However, a large proportion of the peaks consists of experimental noise and low abundance compounds are typically masked by noise peaks, compromising the quality of the data. In this paper, we propose a new measure of similarity between a pair of MS experiments, called truncated rank correlation (TRC). To provide a robust metric of similarity in noisy high-dimensional data, TRC uses truncated top ranks (or top m-ranks) for calculating correlation. A comprehensive numerical study suggests that TRC outperforms traditional sample correlation and Kendall's τ. We apply TRC to measuring test-retest reliability of two MS experiments, including biological replicate analysis of the metabolome in HEK293 cells and metabolomic profiling of benign prostate hyperplasia (BPH) patients. An R package trc of the proposed TRC and related functions is available at https://sites.google.com/site/dhyeonyu/software.",2019-05-30 +25540181,The chemical component dictionary: complete descriptions of constituent molecules in experimentally determined 3D macromolecules in the Protein Data Bank.,"

Unlabelled

The Chemical Component Dictionary (CCD) is a chemical reference data resource that describes all residue and small molecule components found in Protein Data Bank (PDB) entries. The CCD contains detailed chemical descriptions for standard and modified amino acids/nucleotides, small molecule ligands and solvent molecules. Each chemical definition includes descriptions of chemical properties such as stereochemical assignments, chemical descriptors, systematic chemical names and idealized coordinates. The content, preparation, validation and distribution of this CCD chemical reference dataset are described.

Availability and implementation

The CCD is updated regularly in conjunction with the scheduled weekly release of new PDB structure data. The CCD and amino acid variant reference datasets are hosted in the public PDB ftp repository at ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz, ftp://ftp.wwpdb.org/pub/pdb/data/monomers/aa-variants-v1.cif.gz, and its mirror sites, and can be accessed from http://wwpdb.org.

Contact

jwest@rcsb.rutgers.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-02 +30943723,DNAPred: Accurate Identification of DNA-Binding Sites from Protein Sequence by Ensembled Hyperplane-Distance-Based Support Vector Machines.,"Accurate identification of protein-DNA binding sites is significant for both understanding protein function and drug design. Machine-learning-based methods have been extensively used for the prediction of protein-DNA binding sites. However, the data imbalance problem, in which the number of nonbinding residues (negative-class samples) is far larger than that of binding residues (positive-class samples), seriously restricts the performance improvements of machine-learning-based predictors. In this work, we designed a two-stage imbalanced learning algorithm, called ensembled hyperplane-distance-based support vector machines (E-HDSVM), to improve the prediction performance of protein-DNA binding sites. The first stage of E-HDSVM designs a new iterative sampling algorithm, called hyperplane-distance-based under-sampling (HD-US), to extract multiple subsets from the original imbalanced data set, each of which is used to train a support vector machine (SVM). Unlike traditional sampling algorithms, HD-US selects samples by calculating the distances between the samples and the separating hyperplane of the SVM. The second stage of E-HDSVM proposes an enhanced AdaBoost (EAdaBoost) algorithm to ensemble multiple trained SVMs. As an enhanced version of the original AdaBoost algorithm, EAdaBoost overcomes the overfitting problem. Stringent cross-validation and independent tests on benchmark data sets demonstrated the superiority of E-HDSVM over several popular imbalanced learning algorithms. Based on the proposed E-HDSVM algorithm, we further implemented a sequence-based protein-DNA binding site predictor, called DNAPred, which is freely available at http://csbio.njust.edu.cn/bioinf/dnapred/ for academic use. The computational experimental results showed that our predictor achieved an average overall accuracy of 91.7% and a Mathew's correlation coefficient of 0.395 on five benchmark data sets and outperformed several state-of-the-art sequence-based protein-DNA binding site predictors.",2019-04-16 +30956231,Data-adaptive multi-locus association testing in subjects with arbitrary genealogical relationships.,"Genome-wide sequencing enables evaluation of associations between traits and combinations of variants in genes and pathways. But such evaluation requires multi-locus association tests with good power, regardless of the variant and trait characteristics. And since analyzing families may yield more power than analyzing unrelated individuals, we need multi-locus tests applicable to both related and unrelated individuals. Here we describe such tests, and we introduce SKAT-X, a new test statistic that uses genome-wide data obtained from related or unrelated subjects to optimize power for the specific data at hand. Simulations show that: a) SKAT-X performs well regardless of variant and trait characteristics; and b) for binary traits, analyzing affected relatives brings more power than analyzing unrelated individuals, consistent with previous findings for single-locus tests. We illustrate the methods by application to rare unclassified missense variants in the tumor suppressor gene BRCA2, as applied to combined data from prostate cancer families and unrelated prostate cancer cases and controls in the Multi-ethnic Cohort (MEC). The methods can be implemented using open-source code for public use as the R-package GATARS (Genetic Association Tests for Arbitrarily Related Subjects) .",2019-04-08 +31477007,SalMotifDB: a tool for analyzing putative transcription factor binding sites in salmonid genomes.,"

Background

Recently developed genome resources in Salmonid fish provides tools for studying the genomics underlying a wide range of properties including life history trait variation in the wild, economically important traits in aquaculture and the evolutionary consequences of whole genome duplications. Although genome assemblies now exist for a number of salmonid species, the lack of regulatory annotations are holding back our mechanistic understanding of how genetic variation in non-coding regulatory regions affect gene expression and the downstream phenotypic effects.

Results

We present SalMotifDB, a database and associated web and R interface for the analysis of transcription factors (TFs) and their cis-regulatory binding sites in five salmonid genomes. SalMotifDB integrates TF-binding site information for 3072 non-redundant DNA patterns (motifs) assembled from a large number of metazoan motif databases. Through motif matching and TF prediction, we have used these multi-species databases to construct putative regulatory networks in salmonid species. The utility of SalMotifDB is demonstrated by showing that key lipid metabolism regulators are predicted to regulate a set of genes affected by different lipid and fatty acid content in the feed, and by showing that our motif database explains a significant proportion of gene expression divergence in gene duplicates originating from the salmonid specific whole genome duplication.

Conclusions

SalMotifDB is an effective tool for analyzing transcription factors, their binding sites and the resulting gene regulatory networks in salmonid species, and will be an important tool for gaining a better mechanistic understanding of gene regulation and the associated phenotypes in salmonids. SalMotifDB is available at https://salmobase.org/apps/SalMotifDB .",2019-09-02 +31612715,TeachOpenCADD-KNIME: A Teaching Platform for Computer-Aided Drug Design Using KNIME Workflows.,"Open-source workflows have become more and more an integral part of computer-aided drug design (CADD) projects since they allow reproducible and shareable research that can be easily transferred to other projects. Setting up, understanding, and applying such workflows involves either coding or using workflow managers that offer a graphical user interface. We previously reported the TeachOpenCADD teaching platform that provides interactive Jupyter Notebooks (talktorials) on central CADD topics using open-source data and Python packages. Here we present the conversion of these talktorials to KNIME workflows that allow users to explore our teaching material without any line of code. TeachOpenCADD KNIME workflows are freely available on the KNIME Hub: https://hub.knime.com/volkamerlab/space/TeachOpenCADD .",2019-10-15 +26582926,"eggNOG 4.5: a hierarchical orthology framework with improved functional annotations for eukaryotic, prokaryotic and viral sequences.","eggNOG is a public resource that provides Orthologous Groups (OGs) of proteins at different taxonomic levels, each with integrated and summarized functional annotations. Developments since the latest public release include changes to the algorithm for creating OGs across taxonomic levels, making nested groups hierarchically consistent. This allows for a better propagation of functional terms across nested OGs and led to the novel annotation of 95 890 previously uncharacterized OGs, increasing overall annotation coverage from 67% to 72%. The functional annotations of OGs have been expanded to also provide Gene Ontology terms, KEGG pathways and SMART/Pfam domains for each group. Moreover, eggNOG now provides pairwise orthology relationships within OGs based on analysis of phylogenetic trees. We have also incorporated a framework for quickly mapping novel sequences to OGs based on precomputed HMM profiles. Finally, eggNOG version 4.5 incorporates a novel data set spanning 2605 viral OGs, covering 5228 proteins from 352 viral proteomes. All data are accessible for bulk downloading, as a web-service, and through a completely redesigned web interface. The new access points provide faster searches and a number of new browsing and visualization capabilities, facilitating the needs of both experts and less experienced users. eggNOG v4.5 is available at http://eggnog.embl.de.",2015-11-17 +32647812,West-Life: A Virtual Research Environment for structural biology.,"The West-Life project (https://about.west-life.eu/) is a Horizon 2020 project funded by the European Commission to provide data processing and data management services for the international community of structural biologists, and in particular to support integrative experimental approaches within the field of structural biology. It has developed enhancements to existing web services for structure solution and analysis, created new pipelines to link these services into more complex higher-level workflows, and added new data management facilities. Through this work it has striven to make the benefits of European e-Infrastructures more accessible to life-science researchers in general and structural biologists in particular.",2019-01-01 +31418040,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline on the Management of Patients With Myelomeningocele: Whether Prenatal or Postnatal Closure Affects Future Ambulatory Status.,"

Background

Myelomeningocele (MM) is an open neural tube defect treated by pediatric neurosurgeons with prenatal or postnatal closure.

Objective

The objective of this systematic review was to answer the question: What is the evidence for the effectiveness of prenatal vs postnatal closure of MM regarding short and long-term ambulatory status? Treatment recommendations were provided based on the available evidence.

Methods

The National Library of Medicine PubMed database and Embase were queried using MeSH headings and keywords relevant to ambulatory status after prenatal or postnatal closure of MM. Abstracts were reviewed to identify which studies met the inclusion criteria. An evidence table was assembled summarizing the studies and the quality of evidence (Classes I-III). Based on the quality of the literature, a recommendation was rendered (Level I, II, or III).

Results

One randomized controlled trial (Class II) and 3 retrospective cohort studies (Class III) were included as evidence. Initial ambulatory status depended on anatomic level of the neural tube defect. In the short term, prenatal closure may improve ambulatory status compared to postnatal closure. Spinal cord tethering or dermoid inclusion cyst has been associated with neurologic deterioration in infants closed in utero and after birth. Ambulation may cease in both groups over time. No long-term studies evaluated whether there is a difference in the ability to ambulate upon reaching adulthood.

Conclusion

Prenatal closure of MM may improve ambulatory status in the short term (Level II). Spinal cord tethering in both groups caused deterioration in the ability to walk. Evaluation and treatment of spinal cord tethering may help maintain ambulatory status (Level III). No studies evaluate whether prenatal or postnatal repair provides improved ability to ambulate upon reaching adulthood.The full guideline can be found at https://www.cns.org/guidelines/guidelines-spina-bifida-chapter-3.",2019-09-01 +30650101,LittleBrain: A gradient-based tool for the topographical interpretation of cerebellar neuroimaging findings.,"Gradient-based approaches to brain function have recently unmasked fundamental properties of brain organization. Diffusion map embedding analysis of resting-state fMRI data revealed a primary-to-transmodal axis of cerebral cortical macroscale functional organization. The same method was recently used to analyze resting-state data within the cerebellum, revealing for the first time a sensorimotor-fugal macroscale organization principle of cerebellar function. Cerebellar gradient 1 extended from motor to non-motor task-unfocused (default-mode network) areas, and cerebellar gradient 2 isolated task-focused processing regions. Here we present a freely available and easily accessible tool that applies this new knowledge to the topographical interpretation of cerebellar neuroimaging findings. LittleBrain illustrates the relationship between cerebellar data (e.g., volumetric patient study clusters, task activation maps, etc.) and cerebellar gradients 1 and 2. Specifically, LittleBrain plots all voxels of the cerebellum in a two-dimensional scatterplot, with each axis corresponding to one of the two principal functional gradients of the cerebellum, and indicates the position of cerebellar neuroimaging data within these two dimensions. This novel method of data mapping provides alternative, gradual visualizations that complement discrete parcellation maps of cerebellar functional neuroanatomy. We present application examples to show that LittleBrain can also capture subtle, progressive aspects of cerebellar functional neuroanatomy that would be difficult to visualize using conventional mapping techniques. Download and use instructions can be found at https://xaviergp.github.io/littlebrain.",2019-01-16 +32075873,"The Secretome Profiling of a Pediatric Airway Epithelium Infected with hRSV Identified Aberrant Apical/Basolateral Trafficking and Novel Immune Modulating (CXCL6, CXCL16, CSF3) and Antiviral (CEACAM1) Proteins.","The respiratory epithelium comprises polarized cells at the interface between the environment and airway tissues. Polarized apical and basolateral protein secretions are a feature of airway epithelium homeostasis. Human respiratory syncytial virus (hRSV) is a major human pathogen that primarily targets the respiratory epithelium. However, the consequences of hRSV infection on epithelium secretome polarity and content remain poorly understood. To investigate the hRSV-associated apical and basolateral secretomes, a proteomics approach was combined with an ex vivo pediatric human airway epithelial (HAE) model of hRSV infection (data are available via ProteomeXchange and can be accessed at https://www.ebi.ac.uk/pride/ with identifier PXD013661). Following infection, a skewing of apical/basolateral abundance ratios was identified for several individual proteins. Novel modulators of neutrophil and lymphocyte activation (CXCL6, CSF3, SECTM1 or CXCL16), and antiviral proteins (BST2 or CEACAM1) were detected in infected, but not in uninfected cultures. Importantly, CXCL6, CXCL16, CSF3 were also detected in nasopharyngeal aspirates (NPA) from hRSV-infected infants but not healthy controls. Furthermore, the antiviral activity of CEACAM1 against RSV was confirmed in vitro using BEAS-2B cells. hRSV infection disrupted the polarity of the pediatric respiratory epithelial secretome and was associated with immune modulating proteins (CXCL6, CXCL16, CSF3) never linked with this virus before. In addition, the antiviral activity of CEACAM1 against hRSV had also never been previously characterized. This study, therefore, provides novel insights into RSV pathogenesis and endogenous antiviral responses in pediatric airway epithelium.",2020-02-19 +29699484,PDZscape: a comprehensive PDZ-protein database.,"PDZ-containing proteins comprise one of the most widely distributed protein families playing major role in localization and membrane receptor clustering. They are hence important regulators of signal transduction in cellular pathways. Although knowledge on these proteins has increased exponentially, the existing database 'PDZBase' is limited by presence of only 339 proteins as it dates back to 2004 when very little data was available. Thus, lack of exclusive information on this protein family led us to develop PDZscape. 'PDZscape' encompasses the complete available information on 58,648 PDZ-containing proteins with their known and putative binding partners on one platform. It has a user-friendly web interface that can be easily queried with external protein identifiers. With unique integration of prominent databases including NCBI, UniProtKB, Swiss-Prot, Pubmed, PDB, STRING, IntAct, KEGG, Pfam and Protein Mutant Database, it provides detailed information on PDZ interactome apart from the customized BLAST option. Most importantly, this database encompasses the mutations and diseases associated with PDZ containing proteins manually curated by our group, thus making it a comprehensive compilation. It also features tools to query the database using sequence (PDZ-Blast) and to find if protein of interest is a PDZ-binding protein. PDZscape is freely available at http://www.actrec.gov.in:8080/pdzscape .",2018-04-25 +23110975,Manual Gene Ontology annotation workflow at the Mouse Genome Informatics Database.,"The Mouse Genome Database, the Gene Expression Database and the Mouse Tumor Biology database are integrated components of the Mouse Genome Informatics (MGI) resource (http://www.informatics.jax.org). The MGI system presents both a consensus view and an experimental view of the knowledge concerning the genetics and genomics of the laboratory mouse. From genotype to phenotype, this information resource integrates information about genes, sequences, maps, expression analyses, alleles, strains and mutant phenotypes. Comparative mammalian data are also presented particularly in regards to the use of the mouse as a model for the investigation of molecular and genetic components of human diseases. These data are collected from literature curation as well as downloads of large datasets (SwissProt, LocusLink, etc.). MGI is one of the founding members of the Gene Ontology (GO) and uses the GO for functional annotation of genes. Here, we discuss the workflow associated with manual GO annotation at MGI, from literature collection to display of the annotations. Peer-reviewed literature is collected mostly from a set of journals available electronically. Selected articles are entered into a master bibliography and indexed to one of eight areas of interest such as 'GO' or 'homology' or 'phenotype'. Each article is then either indexed to a gene already contained in the database or funneled through a separate nomenclature database to add genes. The master bibliography and associated indexing provide information for various curator-reports such as 'papers selected for GO that refer to genes with NO GO annotation'. Once indexed, curators who have expertise in appropriate disciplines enter pertinent information. MGI makes use of several controlled vocabularies that ensure uniform data encoding, enable robust analysis and support the construction of complex queries. These vocabularies range from pick-lists to structured vocabularies such as the GO. All data associations are supported with statements of evidence as well as access to source publications.",2012-10-29 +30462303,Open Targets Platform: new developments and updates two years on.,"The Open Targets Platform integrates evidence from genetics, genomics, transcriptomics, drugs, animal models and scientific literature to score and rank target-disease associations for drug target identification. The associations are displayed in an intuitive user interface (https://www.targetvalidation.org), and are available through a REST-API (https://api.opentargets.io/v3/platform/docs/swagger-ui) and a bulk download (https://www.targetvalidation.org/downloads/data). In addition to target-disease associations, we also aggregate and display data at the target and disease levels to aid target prioritisation. Since our first publication two years ago, we have made eight releases, added new data sources for target-disease associations, started including causal genetic variants from non genome-wide targeted arrays, added new target and disease annotations, launched new visualisations and improved existing ones and released a new web tool for batch search of up to 200 targets. We have a new URL for the Open Targets Platform REST-API, new REST endpoints and also removed the need for authorisation for API fair use. Here, we present the latest developments of the Open Targets Platform, expanding the evidence and target-disease associations with new and improved data sources, refining data quality, enhancing website usability, and increasing our user base with our training workshops, user support, social media and bioinformatics forum engagement.",2019-01-01 +29040394,Compound image segmentation of published biomedical figures.,"

Motivation

Images convey essential information in biomedical publications. As such, there is a growing interest within the bio-curation and the bio-databases communities, to store images within publications as evidence for biomedical processes and for experimental results. However, many of the images in biomedical publications are compound images consisting of multiple panels, where each individual panel potentially conveys a different type of information. Segmenting such images into constituent panels is an essential first step toward utilizing images.

Results

In this article, we develop a new compound image segmentation system, FigSplit, which is based on Connected Component Analysis. To overcome shortcomings typically manifested by existing methods, we develop a quality assessment step for evaluating and modifying segmentations. Two methods are proposed to re-segment the images if the initial segmentation is inaccurate. Experimental results show the effectiveness of our method compared with other methods.

Availability and implementation

The system is publicly available for use at: https://www.eecis.udel.edu/~compbio/FigSplit. The code is available upon request.

Contact

shatkay@udel.edu.

Supplementary information

Supplementary data are available online at Bioinformatics.",2018-04-01 +31835171,Activation of Hypoxia-Inducible Factor Signaling Modulates the RNA Protein Interactome in Caenorhabditis elegans.,"The cellular response to hypoxia is crucial to organismal survival, and hypoxia-inducible factors (HIF) are the key mediators of this response. HIF-signaling is central to many human diseases and mediates longevity in the nematode. Despite the rapidly increasing knowledge on RNA-binding proteins (RBPs), little is known about their contribution to hypoxia-induced cellular adaptation. We used RNA interactome capture (RIC) in wild-type Caenorhabditis elegans and vhl-1 loss-of-function mutants to fill this gap. This approach identifies more than 1,300 nematode RBPs, 270 of which can be considered novel RBPs. Interestingly, loss of vhl-1 modulates the RBPome. This difference is not primarily explained by protein abundance suggesting differential RNA-binding. Taken together, our study provides a global view on the nematode RBPome and proteome as well as their modulation by HIF-signaling. The resulting RBP atlas is also provided as an interactive online data mining tool (http://shiny.cecad.uni-koeln.de:3838/celegans_rbpome).",2019-11-27 +28981576,SEGtool: a specifically expressed gene detection tool and applications in human tissue and single-cell sequencing data.,"Different tissues and diseases have distinct transcriptional profilings with specifically expressed genes (SEGs). So, the identification of SEGs is an important issue in the studies of gene function, biological development, disease mechanism and biomarker discovery. However, few accurate and easy-to-use tools are available for RNA sequencing (RNA-seq) data to detect SEGs. Here, we presented SEGtool, a tool based on fuzzy c-means, Jaccard index and greedy annealing method for SEG detection automatically and self-adaptively ignoring data distribution. Testing result showed that our SEGtool outperforms the existing tools, which was mainly developed for microarray data. By applying SEGtool to Genotype-Tissue Expression (GTEx) human tissue data set, we detected 3181 SEGs with tissue-related functions. Regulatory networks reveal tissue-specific transcription factors regulating many SEGs, such as ETV2 in testis, HNF4A in liver and NEUROD1 in brain. Applied to a case study of single-cell sequencing (SCS) data from embryo cells, we identified many SEGs in specific stages of human embryogenesis. Notably, SEGtool is suitable for RNA-seq data and even SCS data with high specificity and accuracy. An implementation of SEGtool R package is freely available at http://bioinfo.life.hust.edu.cn/SEGtool/.",2018-11-01 +31699073,OncoSim and OncoWiki: an authentic learning approach to teaching cancer genomics.,"BACKGROUND:Personalised medicine is rapidly changing the clinical environment, especially in regard to the management of cancer. However, for the large part, methods used to educate undergraduate students as future biomedical scientists and medical doctors have not reflected these changes. In order to make effective use of advances in cancer genomic knowledge, there is a need to expose students to the challenges of genomic medicine and to do so in a manner that makes this complex information accessible. METHODS:The teaching method developed, OncoSim, is a scaffolded 'Personal Research' module option for final year biomedical undergraduate students. It uses an authentic learning approach to teach cancer genomics via simulated cancer patient case studies that have identifiable potential therapeutic targets with associated drug therapies (so-called targeted therapy/precision oncology). In addition, these simulated case studies can be uploaded to a dedicated learning website (OncoWiki) where they can be freely downloaded and used to teach medical students the principles of targeted therapy. A preliminary evaluation of OncoSim was carried out using 3 research tools: (1) online questionnaires; (2) semi-structured interviews; and (3) analysis of whole cohort mark ranges. Thematic analysis was used to code and categorise interview data. RESULTS:The teaching materials for OncoSim and the OncoWiki site are freely accessible at https://www.oncowiki.co.uk. Questionnaire data and comparison of whole cohort marks showed OncoSim was at least as effective as alternative choices, and suggested OncoSim provided a valued alternative to traditional laboratory-based projects. No barriers to receptiveness were found. Interview analysis provided 5 broad themes (authentic learning experience; individual challenges; interest in cancer; positive learning experience; supportive structure) supporting the authentic learning aspect of the project, the strong scaffolding provided and the overall effectiveness of the approach. CONCLUSIONS:Our preliminary, proof-of-concept, evaluation suggests that OncoSim will be effective in supporting the teaching of genomic medicine to undergraduate students. We plan and hope our study will encourage further formal evaluation in a larger cohort of students, including a control group. The OncoWiki site has the capacity to grow independently as future students create and upload simulated case studies for other students to then download and analyse.",2019-11-07 +23847528,Toward open sharing of task-based fMRI data: the OpenfMRI project.,"The large-scale sharing of task-based functional neuroimaging data has the potential to allow novel insights into the organization of mental function in the brain, but the field of neuroimaging has lagged behind other areas of bioscience in the development of data sharing resources. This paper describes the OpenFMRI project (accessible online at http://www.openfmri.org), which aims to provide the neuroimaging community with a resource to support open sharing of task-based fMRI studies. We describe the motivation behind the project, focusing particularly on how this project addresses some of the well-known challenges to sharing of task-based fMRI data. Results from a preliminary analysis of the current database are presented, which demonstrate the ability to classify between task contrasts with high generalization accuracy across subjects, and the ability to identify individual subjects from their activation maps with moderately high accuracy. Clustering analyses show that the similarity relations between statistical maps have a somewhat orderly relation to the mental functions engaged by the relevant tasks. These results highlight the potential of the project to support large-scale multivariate analyses of the relation between mental processes and brain function.",2013-07-08 +28431131,DNAproDB: an interactive tool for structural analysis of DNA-protein complexes.,"Many biological processes are mediated by complex interactions between DNA and proteins. Transcription factors, various polymerases, nucleases and histones recognize and bind DNA with different levels of binding specificity. To understand the physical mechanisms that allow proteins to recognize DNA and achieve their biological functions, it is important to analyze structures of DNA-protein complexes in detail. DNAproDB is a web-based interactive tool designed to help researchers study these complexes. DNAproDB provides an automated structure-processing pipeline that extracts structural features from DNA-protein complexes. The extracted features are organized in structured data files, which are easily parsed with any programming language or viewed in a browser. We processed a large number of DNA-protein complexes retrieved from the Protein Data Bank and created the DNAproDB database to store this data. Users can search the database by combining features of the DNA, protein or DNA-protein interactions at the interface. Additionally, users can upload their own structures for processing privately and securely. DNAproDB provides several interactive and customizable tools for creating visualizations of the DNA-protein interface at different levels of abstraction that can be exported as high quality figures. All functionality is documented and freely accessible at http://dnaprodb.usc.edu.",2017-07-01 +28641017,NANPDB: A Resource for Natural Products from Northern African Sources.,"Natural products (NPs) are often regarded as sources of drugs or drug leads or simply as a ""source of inspiration"" for the discovery of novel drugs. We have built the Northern African Natural Products Database (NANPDB) by collecting information on ∼4500 NPs, covering literature data for the period from 1962 to 2016. The data cover compounds isolated mainly from plants, with contributions from some endophyte, animal (e.g., coral), fungal, and bacterial sources. The compounds were identified from 617 source species, belonging to 146 families. Computed physicochemical properties, often used to predict drug metabolism and pharmacokinetics, as well as predicted toxicity information, have been included for each compound in the data set. This is the largest collection of annotated natural compounds produced by native organisms from Northern Africa. While the database includes well-known drugs and drug leads, the medical potential of a majority of the molecules is yet to be investigated. The database could be useful for drug discovery efforts, analysis of the bioactivity of selected compounds, or the discovery of synthesis routes toward secondary metabolites. The current version of NANPDB is available at http://african-compounds.org/nanpdb/ .",2017-06-22 +30294645,Consumer perception data and scientific arguments about food packaging functionalities for fresh strawberries.,"This data article contains data characterizing consumer perception and scientific arguments about food packaging functionalities for fresh strawberries. These data are associated with the article ""Choice of environment-friendly food packagings through argumentation systems and preferences"" (see Yun et al., 2018). These data are stored in a public repository structured by an ontology. These data could be retrieved through the @Web tool, user-friendly interface to capitalize and query data (Buche et al., 2013; Guillard et al., 2017). The @Web tool is accessible online at http://pfl.grignon.inra.fr/atWeb/.",2018-09-15 +31112088,Using Artificial Intelligence to Revise ACR TI-RADS Risk Stratification of Thyroid Nodules: Diagnostic Accuracy and Utility.,"Background Risk stratification systems for thyroid nodules are often complicated and affected by low specificity. Continual improvement of these systems is necessary to reduce the number of unnecessary thyroid biopsies. Purpose To use artificial intelligence (AI) to optimize the American College of Radiology (ACR) Thyroid Imaging Reporting and Data System (TI-RADS). Materials and Methods A total of 1425 biopsy-proven thyroid nodules from 1264 consecutive patients (1026 women; mean age, 52.9 years [range, 18-93 years]) were evaluated retrospectively. Expert readers assigned points based on five ACR TI-RADS categories (composition, echogenicity, shape, margin, echogenic foci), and a genetic AI algorithm was applied to a training set (1325 nodules). Point and pathologic data were used to create an optimized scoring system (hereafter, AI TI-RADS). Performance of the systems was compared by using a test set of the final 100 nodules with interpretations from the expert reader, eight nonexpert readers, and an expert panel. Initial performance of AI TI-RADS was calculated by using a test for differences between binomial proportions. Additional comparisons across readers were conducted by using bootstrapping; diagnostic performance was assessed by using area under the receiver operating curve. Results AI TI-RADS assigned new point values for eight ACR TI-RADS features. Six features were assigned zero points, which simplified categorization. By using expert reader data, the diagnostic performance of ACR TI-RADS and AI TI-RADS was area under the receiver operating curve of 0.91 and 0.93, respectively. For the same expert, specificity of AI TI-RADS (65%, 55 of 85) was higher (P < .001) than that of ACR TI-RADS (47%, 40 of 85). For the eight nonexpert radiologists, mean specificity for AI TI-RADS (55%) was also higher (P < .001) than that of ACR TI-RADS (48%). An interactive AI TI-RADS calculator can be viewed at http://deckard.duhs.duke.edu/∼ai-ti-rads . Conclusion An artificial intelligence-optimized Thyroid Imaging Reporting and Data System (TI-RADS) validates the American College of Radiology TI-RADS while slightly improving specificity and maintaining sensitivity. Additionally, it simplifies feature assignments, which may improve ease of use. © RSNA, 2019 Online supplemental material is available for this article.",2019-05-21 +31066444,SPADE web service for prediction of allergen IgE epitopes.,"The specific interaction of allergens with IgE antibodies and the allergen mediated cross-linking of receptor-bound IgE are key events of allergic diseases. The elucidation of the IgE binding sites (the epitopes) on the allergen surface is an important goal of allergy research. Only few allergen-specific IgE epitopes have been determined experimentally to date. Epitope prediction methods represent a viable alternative to experimental methods and have worked well with linear epitopes. However, as most IgE epitopes are of conformational and/or discontinuous nature sequence based prediction methods have had limited success in these cases. Here, we present the web server of the program SPADE (https://spade.uni-graz.at), which is the server implementation of a previously published program (1). In this approach we utilize the structural homology of cross-reactive allergens combined with the immunological cross-reactivity data for the discrimination of putative IgE-binding sites from non-cross-reactive surface patches. The method, although predictive, does not rely on machine-learning algorithms and does not require training data. The SPADE server features an easy-to-use interface, an automated pipeline consisting of third-party, as well as own, newly developed routines and a comprehensive output page.",2019-07-01 +31114912,M1CR0B1AL1Z3R-a user-friendly web server for the analysis of large-scale microbial genomics data.,"Large-scale mining and analysis of bacterial datasets contribute to the comprehensive characterization of complex microbial dynamics within a microbiome and among different bacterial strains, e.g., during disease outbreaks. The study of large-scale bacterial evolutionary dynamics poses many challenges. These include data-mining steps, such as gene annotation, ortholog detection, sequence alignment and phylogeny reconstruction. These steps require the use of multiple bioinformatics tools and ad-hoc programming scripts, making the entire process cumbersome, tedious and error-prone due to manual handling. This motivated us to develop the M1CR0B1AL1Z3R web server, a 'one-stop shop' for conducting microbial genomics data analyses via a simple graphical user interface. Some of the features implemented in M1CR0B1AL1Z3R are: (i) extracting putative open reading frames and comparative genomics analysis of gene content; (ii) extracting orthologous sets and analyzing their size distribution; (iii) analyzing gene presence-absence patterns; (iv) reconstructing a phylogenetic tree based on the extracted orthologous set; (v) inferring GC-content variation among lineages. M1CR0B1AL1Z3R facilitates the mining and analysis of dozens of bacterial genomes using advanced techniques, with the click of a button. M1CR0B1AL1Z3R is freely available at https://microbializer.tau.ac.il/.",2019-07-01 +28346087,PubChem BioAssay: A Decade's Development toward Open High-Throughput Screening Data Sharing.,"High-throughput screening (HTS) is now routinely conducted for drug discovery by both pharmaceutical companies and screening centers at academic institutions and universities. Rapid advance in assay development, robot automation, and computer technology has led to the generation of terabytes of data in screening laboratories. Despite the technology development toward HTS productivity, fewer efforts were devoted to HTS data integration and sharing. As a result, the huge amount of HTS data was rarely made available to the public. To fill this gap, the PubChem BioAssay database ( https://www.ncbi.nlm.nih.gov/pcassay/ ) was set up in 2004 to provide open access to the screening results tested on chemicals and RNAi reagents. With more than 10 years' development and contributions from the community, PubChem has now become the largest public repository for chemical structures and biological data, which provides an information platform to worldwide researchers supporting drug development, medicinal chemistry study, and chemical biology research. This work presents a review of the HTS data content in the PubChem BioAssay database and the progress of data deposition to stimulate knowledge discovery and data sharing. It also provides a description of the database's data standard and basic utilities facilitating information access and use for new users.",2017-01-13 +31531674,Evaluating stably expressed genes in single cells. ,"Single-cell RNA-seq (scRNA-seq) profiling has revealed remarkable variation in transcription, suggesting that expression of many genes at the single-cell level is intrinsically stochastic and noisy. Yet, on the cell population level, a subset of genes traditionally referred to as housekeeping genes (HKGs) are found to be stably expressed in different cell and tissue types. It is therefore critical to question whether stably expressed genes (SEGs) can be identified on the single-cell level, and if so, how can their expression stability be assessed? We have previously proposed a computational framework for ranking expression stability of genes in single cells for scRNA-seq data normalization and integration. In this study, we perform detailed evaluation and characterization of SEGs derived from this framework. Here, we show that gene expression stability indices derived from the early human and mouse development scRNA-seq datasets and the ""Mouse Atlas"" dataset are reproducible and conserved across species. We demonstrate that SEGs identified from single cells based on their stability indices are considerably more stable than HKGs defined previously from cell populations across diverse biological systems. Our analyses indicate that SEGs are inherently more stable at the single-cell level and their characteristics reminiscent of HKGs, suggesting their potential role in sustaining essential functions in individual cells. SEGs identified in this study have immediate utility both for understanding variation and stability of single-cell transcriptomes and for practical applications such as scRNA-seq data normalization. Our framework for calculating gene stability index, ""scSEGIndex,"" is incorporated into the scMerge Bioconductor R package (https://sydneybiox.github.io/scMerge/reference/scSEGIndex.html) and can be used for identifying genes with stable expression in scRNA-seq datasets.",2019-09-01 +30847061,SPECIES: A platform for the exploration of ecological data.,"The modeling of ecological data that include both abiotic and biotic factors is fundamental to our understanding of ecosystems. Repositories of biodiversity data, such as GBIF, iDigBio, Atlas of Living Australia, and SNIB (Mexico's National System of Biodiversity Information), contain a great deal of information that can lead to knowledge discovery about ecosystems. However, there is a lack of tools with which to efficiently extract such knowledge. In this paper, we present SPECIES, an open, web-based platform designed to extract implicit information contained in large scale sets of ecological data. SPECIES is based on a tested methodology, wherein the correlations of variables of arbitrary type and spatial resolution, both biotic and abiotic, discrete and continuous, may be explored from both niche and network perspectives. In distinction to other modeling systems, SPECIES is a full stack exploratory tool that integrates the three basic components: data (which is incrementally growing), a statistical modeling and analysis engine, and an interactive visualization front end. Combined, these components provide a powerful tool that may guide ecologists toward new insights. SPECIES is optimized to support fast hypothesis prototyping and testing, analyzing thousands of biotic and abiotic variables, and presenting descriptive results to the user at different levels of detail. SPECIES is an open-access platform available online (http://species.conabio.gob.mx), that is, powerful, flexible, and easy to use. It allows for the exploration and incorporation of ecological data and its subsequent integration into predictive models for both potential ecological niche and geographic distribution. It also provides an ecosystemic, network-based analysis that may guide the researcher in identifying relations between different biota, such as the relation between disease vectors and potential disease hosts.",2019-01-28 +28905148,Building a PGC-LC-MS N-glycan retention library and elution mapping resource.,"Porous graphitised carbon-liquid chromatography (PGC-LC) has been proven to be a powerful technique for the analysis and characterisation of complex mixtures of isomeric and isobaric glycan structures. Here we evaluate the elution behaviour of N-glycans on PGC-LC and thereby provide the potential of using chromatographic separation properties, together with mass spectrometry (MS) fragmentation, to determine glycan structure assignments more easily. We used previously reported N-glycan structures released from the purified glycoproteins Immunoglobulin G (IgG), Immunoglobulin A (IgA), lactoferrin, α1-acid glycoprotein, Ribonuclease B (RNase B), fetuin and ovalbumin to profile their behaviour on capillary PGC-LC-MS. Over 100 glycan structures were determined by MS/MS, and together with targeted exoglycosidase digestions, created a N-glycan PGC retention library covering a full spectrum of biologically significant N-glycans from pauci mannose to sialylated tetra-antennary classes. The resultant PGC retention library ( http://www.glycostore.org/showPgc ) incorporates retention times and supporting fragmentation spectra including exoglycosidase digestion products, and provides detailed knowledge on the elution properties of N-glycans by PGC-LC. Consequently, this platform should serve as a valuable resource for facilitating the detailed analysis of the glycosylation of both purified recombinant, and complex mixtures of, glycoproteins using established workflows.",2017-09-13 +28402462,Omicseq: a web-based search engine for exploring omics datasets.,"The development and application of high-throughput genomics technologies has resulted in massive quantities of diverse omics data that continue to accumulate rapidly. These rich datasets offer unprecedented and exciting opportunities to address long standing questions in biomedical research. However, our ability to explore and query the content of diverse omics data is very limited. Existing dataset search tools rely almost exclusively on the metadata. A text-based query for gene name(s) does not work well on datasets wherein the vast majority of their content is numeric. To overcome this barrier, we have developed Omicseq, a novel web-based platform that facilitates the easy interrogation of omics datasets holistically to improve 'findability' of relevant data. The core component of Omicseq is trackRank, a novel algorithm for ranking omics datasets that fully uses the numerical content of the dataset to determine relevance to the query entity. The Omicseq system is supported by a scalable and elastic, NoSQL database that hosts a large collection of processed omics datasets. In the front end, a simple, web-based interface allows users to enter queries and instantly receive search results as a list of ranked datasets deemed to be the most relevant. Omicseq is freely available at http://www.omicseq.org.",2017-07-01 +31219795,The Systems Biology Markup Language (SBML): Language Specification for Level 3 Version 2 Core Release 2. ,"Computational models can help researchers to interpret data, understand biological functions, and make quantitative predictions. The Systems Biology Markup Language (SBML) is a file format for representing computational models in a declarative form that different software systems can exchange. SBML is oriented towards describing biological processes of the sort common in research on a number of topics, including metabolic pathways, cell signaling pathways, and many others. By supporting SBML as an input/output format, different tools can all operate on an identical representation of a model, removing opportunities for translation errors and assuring a common starting point for analyses and simulations. This document provides the specification for Release 2 of Version 2 of SBML Level 3 Core. The specification defines the data structures prescribed by SBML as well as their encoding in XML, the eXtensible Markup Language. Release 2 corrects some errors and clarifies some ambiguities discovered in Release 1. This specification also defines validation rules that determine the validity of an SBML document, and provides many examples of models in SBML form. Other materials and software are available from the SBML project website at http://sbml.org/.",2019-06-20 +31321430,CWL-Airflow: a lightweight pipeline manager supporting Common Workflow Language. ,"Massive growth in the amount of research data and computational analysis has led to increased use of pipeline managers in biomedical computational research. However, each of the >100 such managers uses its own way to describe pipelines, leading to difficulty porting workflows to different environments and therefore poor reproducibility of computational studies. For this reason, the Common Workflow Language (CWL) was recently introduced as a specification for platform-independent workflow description, and work began to transition existing pipelines and workflow managers to CWL. Herein, we present CWL-Airflow, a package that adds support for CWL to the Apache Airflow pipeline manager. CWL-Airflow uses CWL version 1.0 specification and can run workflows on stand-alone MacOS/Linux servers, on clusters, or on a variety of cloud platforms. A sample CWL pipeline for processing of chromatin immunoprecipitation sequencing data is provided. CWL-Airflow will provide users with the features of a fully fledged pipeline manager and the ability to execute CWL workflows anywhere Airflow can run-from a laptop to a cluster or cloud environment. CWL-Airflow is available under Apache License, version 2.0 (Apache-2.0), and can be downloaded from https://barski-lab.github.io/cwl-airflow, https://scicrunch.org/resolver/RRID:SCR_017196.",2019-07-01 +30520945,"Alfred: interactive multi-sample BAM alignment statistics, feature counting and feature annotation for long- and short-read sequencing.","

Summary

Harmonizing quality control (QC) of large-scale second and third-generation sequencing datasets is key for enabling downstream computational and biological analyses. We present Alfred, an efficient and versatile command-line application that computes multi-sample QC metrics in a read-group aware manner, across a wide variety of sequencing assays and technologies. In addition to standard QC metrics such as GC bias, base composition, insert size and sequencing coverage distributions it supports haplotype-aware and allele-specific feature counting and feature annotation. The versatility of Alfred allows for easy pipeline integration in high-throughput settings, including DNA sequencing facilities and large-scale research initiatives, enabling continuous monitoring of sequence data quality and characteristics across samples. Alfred supports haplo-tagging of BAM/CRAM files to conduct haplotype-resolved analyses in conjunction with a variety of next-generation sequencing based assays. Alfred's companion web application enables interactive exploration of results and comparison to public datasets.

Availability and implementation

Alfred is open-source and freely available at https://tobiasrausch.com/alfred/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +31036810,A multi-species repository of social networks.,"Social network analysis is an invaluable tool to understand the patterns, evolution, and consequences of sociality. Comparative studies over a range of social systems across multiple taxonomic groups are particularly valuable. Such studies however require quantitative social association or interaction data across multiple species which is not easily available. We introduce the Animal Social Network Repository (ASNR) as the first multi-taxonomic repository that collates 790 social networks from more than 45 species, including those of mammals, reptiles, fish, birds, and insects. The repository was created by consolidating social network datasets from the literature on wild and captive animals into a consistent and easy-to-use network data format. The repository is archived at https://bansallab.github.io/asnr/ . ASNR has tremendous research potential, including testing hypotheses in the fields of animal ecology, social behavior, epidemiology and evolutionary biology.",2019-04-29 +24265222,Saccharomyces genome database provides new regulation data.,"The Saccharomyces Genome Database (SGD; http://www.yeastgenome.org) is the community resource for genomic, gene and protein information about the budding yeast Saccharomyces cerevisiae, containing a variety of functional information about each yeast gene and gene product. We have recently added regulatory information to SGD and present it on a new tabbed section of the Locus Summary entitled 'Regulation'. We are compiling transcriptional regulator-target gene relationships, which are curated from the literature at SGD or imported, with permission, from the YEASTRACT database. For nearly every S. cerevisiae gene, the Regulation page displays a table of annotations showing the regulators of that gene, and a graphical visualization of its regulatory network. For genes whose products act as transcription factors, the Regulation page also shows a table of their target genes, accompanied by a Gene Ontology enrichment analysis of the biological processes in which those genes participate. We additionally synthesize information from the literature for each transcription factor in a free-text Regulation Summary, and provide other information relevant to its regulatory function, such as DNA binding site motifs and protein domains. All of the regulation data are available for querying, analysis and download via YeastMine, the InterMine-based data warehouse system in use at SGD.",2013-11-21 +23504933,Access guide to human proteinpedia.,"Human Proteinpedia (http://www.humanproteinpedia.org) is a publicly available proteome repository for sharing human protein data derived from multiple experimental platforms. It incorporates diverse features of the human proteome including protein-protein interactions, enzyme-substrate relationships, PTMs, subcellular localization, and expression of proteins in various human tissues and cell lines in diverse biological conditions including diseases. Through a publicly distributed annotation system developed especially for proteomic data, investigators across the globe can upload, view, and edit proteomic data even before they are published. Inclusion of information on investigators and laboratories that generated the data, as well as visualization of tandem mass spectra, stained tissue sections, protein/peptide microarrays, fluorescent micrographs, and western blots, ensures quality of proteomic data assimilated in Human Proteinpedia. Many of the protein annotations submitted to Human Proteinpedia have also been made available to the scientific community through Human Protein Reference Database (http://www.hprd.org), another resource developed by our group. In this protocol, we describe how to submit, edit, and retrieve proteomic data in Human Proteinpedia.",2013-03-01 +31463796,Cullin 3 overexpression inhibits lung cancer metastasis and is associated with survival of lung adenocarcinoma.,"Cullin 3 (CUL3), a molecular scaffold of Cullin-RING ubiquitin ligase, plays an important role in regulating biological processes through modulating the ubiquitylation and degradation of various protein substrates. Dysfunction of CUL3 is implicated in the development of several human diseases. However, the clinical significance and prognostic value of CUL3 in lung cancer have not been investigated. This study investigated the CUL3-modulating potential of non-small cell lung cancer cell lines, H1299, H358, H2170 and H520, by using immunoblotting, MTT, migration, invasion, colony formation and in vivo tumorigenicity assays. The prognostic significance of CUL3 was measured by public KM plotter database (http://kmplot.com/analysis/index.php?p=service&cancer=breast) and tissue immunohistochemistry analysis. The public online database analysis revealed that elevated mRNA expression of CUL3 was associated with better prognosis for non-small cell lung cancer and lung adenocarcinoma. In vitro experiments showed that ectopic overexpression of CUL3 significantly inhibited lung adenocarcinoma cell proliferation and migration, and the tumor-suppressive effect of CUL3 was dependent on the Nrf2/RhoA axis. In vivo mice model demonstrated that overexpression of CUL3 lead to a significant reduction of lung adenocarcinoma growth and metastasis. Importantly, tissue immunohistochemistry analysis showed that about 47% of non-small cell lung cancer tissues were expressed of CUL3 at high levels. Overexpression of CUL3 predicted favorable overall survival in non-small cell lung cancer patients, especially in lung adenocarcinoma, but not in lung squamous cell carcinoma patients. CUL3 could serve as a prognostic biomarker for lung adenocarcinoma. Loss of CUL3 might be driving tumorigenesis by activating the Nrf2/RhoA pathway.",2019-08-28 +32960100,Toxicity of chloroquine and hydroxychloroquine following therapeutic use or overdose.,"

Introduction

While chloroquine, a derivative of quinine, has been used as an antimalarial for 70 years, hydroxychloroquine is now used to treat conditions such as rheumatoid arthritis and systemic lupus erythematosus. In 2020, hydroxychloroquine (and to a lesser extent chloroquine) also received attention as a possible treatment for severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). During investigation for treating coronavirus disease 2019 (COVID-19) caused by SARS-CoV-2, concerns for serious adverse events arose.

Objective

We review the toxicity associated with hydroxychloroquine and chloroquine use both short-term and long-term and in overdose.

Methods

Medline (via OVID) was searched from its inception through June 7 2020 using the following as either MeSH or keyword terms: (""Chloroquine/"" or ""Hydroxychloroquine/"") AND (""Adverse Drug Event/"" or ""Toxicities, Drug/"" or ""Toxic.mp."" or ""Toxicity.mp."" or ""Overdose.mp.""). We limited resultant articles to those published in English and reporting on Human subjects. This search yielded 330 articles, of which 57 were included. Articles were excluded due to lack of relevance, not reporting desired outcomes, or being duplicative in their content. Twenty-five additional articles were identified through screening references of included articles. To identify toxicities in individuals treated with hydroxychloroquine or chloroquine with COVID-19, we searched PubMed on June 10th, 2020: (""Chloroquine"" or ""Hydroxychloroquine"") AND (""Coronavirus"" or ""COVID-19"" or ""SARS-CoV-2""). This search resulted in 638 articles. We reviewed articles for reporting of adverse events or toxicities. Most citations were excluded because they did not include original investigations or extrapolated data from subjects that did not have COVID-19; 34 citations were relevant. For the drug-interactions section, relevant classes and agents were identified through a screen of the https://www.covid19-druginteractions.org/ website. We then conducted targeted searches of PubMed up to June 7th 2020 combining ""chloroquine"" and ""hydroxychloroquine"" with terms for specific drug classes and drugs identified from the drug-interaction site as potentially relevant. We found 29 relevant articles.

Toxicity with short-term use

Gastrointestinal: Gastrointestinal toxicities are the most common to occur following initiation of chloroquine or hydroxychloroquine. Nausea, vomiting, and diarrhea account for most reported intolerances. Glucose abnormalities: Alterations in blood glucose concentrations may occur with hydroxychloroquine but are rare with standard therapeutic use. Cardiotoxicity: Short-term use can produce conduction abnormalities. Evidence from COVID-19 treatment suggests QT/QTc prolongation is of concern, particularly when used in combination with azithromycin, although disagreement exists across studies. Dermatologic: Drug eruptions or rashes, followed by cutaneous hyperpigmentation, pruritis, Stevens-Johnson syndrome, and toxic epidermal necrolysis, may occur within days to weeks of exposure but usually resolve with the discontinuation of therapy. Neuropsychiatric: Reported symptoms include confusion, disorientation, and hallucination within 24-48 h of drug initiation. Other toxicities: Hemolysis and anemia may occur in patients with glucose-6-phosphate dehydrogenase. Chloroquine treatment of COVID-19 was associated with elevation in creatine kinase and creatine kinase-MB activities with more events in the higher-dose group.

Toxicity with long-term use

Retinopathy: Retinopathy is the major dose-limiting toxicity associated with long-term use; the risk is higher with increasing age, dose, and duration of usage. Cardiotoxicity: Long-term use has been associated with conduction abnormalities, cardiomyopathy, and valvular disorders. Neurotoxicity: Rarely myositis and muscle weakness, extremity weakness, and pseudoparkinsonism have been reported.

Toxicity in overdose

Symptoms in overdose manifest rapidly (minutes to hours) and cardiotoxicity such as cardiovascular shock and collapse are most prominent. Neurotoxic effects such as psychosis and seizure may also occur.

Conclusions

Hydroxychloroquine is a generally well-tolerated medication. Short-term (days to weeks) toxicity includes gastrointestinal effects and rarely glucose abnormalities, dermatologic reactions, and neuropsychiatric events. Cardiotoxicity became of increased concern with its use in COVID-19 patients. Long-term (years) toxicities include retinopathy, neuromyotoxicity, and cardiotoxicity (conduction abnormalities, cardiomyopathy). Deaths from overdoses most often result from cardiovascular collapse.",2020-09-22 +28938868,START: a system for flexible analysis of hundreds of genomic signal tracks in few lines of SQL-like queries.,"

Background

A genomic signal track is a set of genomic intervals associated with values of various types, such as measurements from high-throughput experiments. Analysis of signal tracks requires complex computational methods, which often make the analysts focus too much on the detailed computational steps rather than on their biological questions.

Results

Here we propose Signal Track Query Language (STQL) for simple analysis of signal tracks. It is a Structured Query Language (SQL)-like declarative language, which means one only specifies what computations need to be done but not how these computations are to be carried out. STQL provides a rich set of constructs for manipulating genomic intervals and their values. To run STQL queries, we have developed the Signal Track Analytical Research Tool (START, http://yiplab.cse.cuhk.edu.hk/start/ ), a system that includes a Web-based user interface and a back-end execution system. The user interface helps users select data from our database of around 10,000 commonly-used public signal tracks, manage their own tracks, and construct, store and share STQL queries. The back-end system automatically translates STQL queries into optimized low-level programs and runs them on a computer cluster in parallel. We use STQL to perform 14 representative analytical tasks. By repeating these analyses using bedtools, Galaxy and custom Python scripts, we show that the STQL solution is usually the simplest, and the parallel execution achieves significant speed-up with large data files. Finally, we describe how a biologist with minimal formal training in computer programming self-learned STQL to analyze DNA methylation data we produced from 60 pairs of hepatocellular carcinoma (HCC) samples.

Conclusions

Overall, STQL and START provide a generic way for analyzing a large number of genomic signal tracks in parallel easily.",2017-09-22 +32255670,A Quantile-Based g-Computation Approach to Addressing the Effects of Exposure Mixtures.,"

Background

Exposure mixtures frequently occur in data across many domains, particularly in the fields of environmental and nutritional epidemiology. Various strategies have arisen to answer questions about exposure mixtures, including methods such as weighted quantile sum (WQS) regression that estimate a joint effect of the mixture components.

Objectives

We demonstrate a new approach to estimating the joint effects of a mixture: quantile g-computation. This approach combines the inferential simplicity of WQS regression with the flexibility of g-computation, a method of causal effect estimation. We use simulations to examine whether quantile g-computation and WQS regression can accurately and precisely estimate the effects of mixtures in a variety of common scenarios.

Methods

We examine the bias, confidence interval (CI) coverage, and bias-variance tradeoff of quantile g-computation and WQS regression and how these quantities are impacted by the presence of noncausal exposures, exposure correlation, unmeasured confounding, and nonlinearity of exposure effects.

Results

Quantile g-computation, unlike WQS regression, allows inference on mixture effects that is unbiased with appropriate CI coverage at sample sizes typically encountered in epidemiologic studies and when the assumptions of WQS regression are not met. Further, WQS regression can magnify bias from unmeasured confounding that might occur if important components of the mixture are omitted from the analysis.

Discussion

Unlike inferential approaches that examine the effects of individual exposures while holding other exposures constant, methods like quantile g-computation that can estimate the effect of a mixture are essential for understanding the effects of potential public health actions that act on exposure sources. Our approach may serve to help bridge gaps between epidemiologic analysis and interventions such as regulations on industrial emissions or mining processes, dietary changes, or consumer behavioral changes that act on multiple exposures simultaneously. https://doi.org/10.1289/EHP5838.",2020-04-07 +31470098,Novel missense variant in TTN cosegregating with familial atrioventricular block.,"

Background

Cardiovascular diseases are the most common cause of death globally. In which atrioventricular block (AVB) is a common disorder with genetic causes, but the responsible genes have not been fully identified yet. To determine the underlying causative genes involved in cardiac AVB, here we report a three-generation Chinese family with severe autosomal dominant cardiac AVB that has been ruled out as being caused by known genes mutations.

Methods

Whole-exome sequencing was performed in five affected family members across three generations, and co-segregation analysis was validated on other members of this family.

Results

Whole-exome sequencing and subsequent co-segregation validation identified a novel germline heterozygous point missense mutation, c.49287C > A (p.N16429K), in the titin (TTN, NM_001267550.2) gene in all 5 affected family members but not in the unaffected family members, neither in the large population according to the Genome Aggregation Database (https://gnomad.broadinstitute.org/). The point mutation is predicted to be functionally deleterious by in-silico software tools. Our finding was further supported by the conservative analysis across species.

Conclusion

Based on this study, TTN was identified as a potential novel candidate gene for autosomal dominant AVB; this study expands the mutational spectrum of TTN gene and is the first to implicate TTN mutations as AVB disease causing in a Chinese pedigree.",2019-08-27 +23203872,The non-human primate reference transcriptome resource (NHPRTR) for comparative functional genomics.,"RNA-based next-generation sequencing (RNA-Seq) provides a tremendous amount of new information regarding gene and transcript structure, expression and regulation. This is particularly true for non-coding RNAs where whole transcriptome analyses have revealed that the much of the genome is transcribed and that many non-coding transcripts have widespread functionality. However, uniform resources for raw, cleaned and processed RNA-Seq data are sparse for most organisms and this is especially true for non-human primates (NHPs). Here, we describe a large-scale RNA-Seq data and analysis infrastructure, the NHP reference transcriptome resource (http://nhprtr.org); it presently hosts data from12 species of primates, to be expanded to 15 species/subspecies spanning great apes, old world monkeys, new world monkeys and prosimians. Data are collected for each species using pools of RNA from comparable tissues. We provide data access in advance of its deposition at NCBI, as well as browsable tracks of alignments against the human genome using the UCSC genome browser. This resource will continue to host additional RNA-Seq data, alignments and assemblies as they are generated over the coming years and provide a key resource for the annotation of NHP genomes as well as informing primate studies on evolution, reproduction, infection, immunity and pharmacology.",2012-11-29 +32713858,PRIGSA2: Improved version of protein repeat identification by graph spectral analysis. ,"Tandemly repeated structural motifs in proteins form highly stable structural folds and provide multiple binding sites associated with diverse functional roles. The tertiary structure and function of these proteins are determined by the type and copy number of the repeating units. Each repeat type exhibits a unique pattern of intra- and inter-repeat unit interactions that is well-captured by the topological features in the network representation of protein structures. Here we present an improved version of our graph based algorithm, PRIGSA, with structure-based validation and filtering steps incorporated for accurate detection of tandem structural repeats. The algorithm integrates available knowledge on repeat families with de novo prediction to detect repeats in single monomer chains as well as in multimeric protein complexes. Three levels of performance evaluation are presented: comparison with state-of-the-art algorithms on benchmark dataset of repeat and nonrepeat proteins, accuracy in the detection of members of 13 known repeat families reported in UniProt and execution on the complete Protein Data Bank to show its ability to identify previously uncharacterized proteins. A ~3-fold increase in the coverage of the members of 13 known families and 3408 novel uncharacterized structural repeat proteins are identified on executing it on PDB. PRIGSA2 is available at http:// bioinf.iiit.ac.in/PRIGSA2/.",2020-01-01 +23620361,Sharing and executing linked data queries in a collaborative environment.,"

Motivation

Life Sciences have emerged as a key domain in the Linked Data community because of the diversity of data semantics and formats available through a great variety of databases and web technologies. Thus, it has been used as the perfect domain for applications in the web of data. Unfortunately, bioinformaticians are not exploiting the full potential of this already available technology, and experts in Life Sciences have real problems to discover, understand and devise how to take advantage of these interlinked (integrated) data.

Results

In this article, we present Bioqueries, a wiki-based portal that is aimed at community building around biological Linked Data. This tool has been designed to aid bioinformaticians in developing SPARQL queries to access biological databases exposed as Linked Data, and also to help biologists gain a deeper insight into the potential use of this technology. This public space offers several services and a collaborative infrastructure to stimulate the consumption of biological Linked Data and, therefore, contribute to implementing the benefits of the web of data in this domain. Bioqueries currently contains 215 query entries grouped by database and theme, 230 registered users and 44 end points that contain biological Resource Description Framework information.

Availability

The Bioqueries portal is freely accessible at http://bioqueries.uma.es.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-25 +31626566,Improving and Expanding Estimates of the Global Burden of Disease Due to Environmental Health Risk Factors.,"

Background

The Global Burden of Disease (GBD) study, coordinated by the Institute for Health Metrics and Evaluation (IHME), produces influential, data-driven estimates of the burden of disease and premature death due to major risk factors. Expanded quantification of disease due to environmental health (EH) risk factors, including climate change, will enhance accuracy of GBD estimates, which will contribute to developing cost-effective policies that promote prevention and achieving Sustainable Development Goals.

Objectives

We review key aspects of the GBD for the EH community and introduce the Global Burden of Disease-Pollution and Health Initiative (GBD-PHI), which aims to work with IHME and the GBD study to improve estimates of disease burden attributable to EH risk factors and to develop an innovative approach to estimating climate-related disease burden-both current and projected.

Methods

We discuss strategies for improving GBD quantification of specific EH risk factors, including air pollution, lead, and climate change. We highlight key methodological challenges, including new EH risk factors, notably evidence rating and global exposure assessment.

Discussion

A number of issues present challenges to the scope and accuracy of current GBD estimates for EH risk factors. For air pollution, minimal data exist on the exposure-risk relationships associated with high levels of pollution; epidemiological studies in high pollution regions should be a research priority. For lead, the GBD's current methods do not fully account for lead's impact on neurodevelopment; innovative methods to account for subclinical effects are needed. Decisions on inclusion of additional EH risk-outcome pairs need to be guided by findings of systematic reviews, the size of exposed populations, feasibility of global exposure estimates, and predicted trends in exposures and diseases. Neurotoxicants, endocrine-disrupting chemicals, and climate-related factors should be high priorities for incorporation into upcoming iterations of the GBD study. Enhancing the scope and methods will improve the GBD's estimates and better guide prevention policy. https://doi.org/10.1289/EHP5496.",2019-10-18 +31496866,"Clinical and pharmacological application of multiscale multiphysics heart simulator, UT-Heart.","A heart simulator, UT-Heart, is a finite element model of the human heart that can reproduce all the fundamental activities of the working heart, including propagation of excitation, contraction, and relaxation and generation of blood pressure and blood flow, based on the molecular aspects of the cardiac electrophysiology and excitation-contraction coupling. In this paper, we present a brief review of the practical use of UT-Heart. As an example, we focus on its application for predicting the effect of cardiac resynchronization therapy (CRT) and evaluating the proarrhythmic risk of drugs. Patient-specific, multiscale heart simulation successfully predicted the response to CRT by reproducing the complex pathophysiology of the heart. A proarrhythmic risk assessment system combining in vitro channel assays and in silico simulation of cardiac electrophysiology using UT-Heart successfully predicted druginduced arrhythmogenic risk. The assessment system was found to be reliable and efficient. We also developed a comprehensive hazard map on the various combinations of ion channel inhibitors. This in silico electrocardiogram database (now freely available at http://ut-heart.com/) can facilitate proarrhythmic risk assessment without the need to perform computationally expensive heart simulation. Based on these results, we conclude that the heart simulator, UT-Heart, could be a useful tool in clinical medicine and drug discovery.",2019-08-26 +31373604,RAG-Web: RNA structure prediction/design using RNA-As-Graphs.,"

Summary

We launch a webserver for RNA structure prediction and design corresponding to tools developed using our RNA-As-Graphs (RAG) approach. RAG uses coarse-grained tree graphs to represent RNA secondary structure, allowing the application of graph theory to analyze and advance RNA structure discovery. Our webserver consists of three modules: (a) RAG Sampler: samples tree graph topologies from an RNA secondary structure to predict corresponding tertiary topologies, (b) RAG Builder: builds three-dimensional atomic models from candidate graphs generated by RAG Sampler, and (c) RAG Designer: designs sequences that fold onto novel RNA motifs (described by tree graph topologies). Results analyses are performed for further assessment/selection. The Results page provides links to download results and indicates possible errors encountered. RAG-Web offers a user-friendly interface to utilize our RAG software suite to predict and design RNA structures and sequences.

Availability and implementation

The webserver is freely available online at: http://www.biomath.nyu.edu/ragtop/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +31227524,Serpins: Genome-Wide Characterisation and Expression Analysis of the Serine Protease Inhibitor Family in Triticum aestivum.,"The serine protease inhibitor (serpin) gene family is the largest family of protease inhibitors. Serine protease inhibitors have an active, but under-characterized, role in grain development and defense against pathogen attack in cereal crops. By exploiting publicly available genomic, transcriptomic and proteomic data for wheat (Triticum aestivum), we have identified and annotated the entire 'serpinome' of wheat and constructed a high-quality and robust phylogenetic tree of the gene family, identifying paralogous and homeologous clades from the hexaploid wheat genome, including the Serpin-Z group that have been well characterized in barley. Using publicly available RNAseq data (http://www.wheat-expression.com/), expression profiles of the wheat serpins were explored across a variety of tissues from the developing grain, spikelet and spike. We show that the SERPIN-Z clade, among others, are highly expressed during grain development, and that there is homeologous and paralogous functional redundancy in this gene family. Further to their role in grain development, serpins play an important but under-explored role in response to fungal pathogens. Using 13 RNAseq datasets of wheat tissues infected by fungal pathogens, we identified 37 serpins with a significant disease response. The majority of the disease-responsive serpins were upregulated by Fusarium graminearum, a destructive fungal pathogen that attacks the spike and developing grain of wheat. As serpins are ubiquitous in wheat grain, the genes encoding serpins may be linked to grain development, with their disease response a result of pleiotropy.",2019-08-08 +31888606,Region-based interaction detection in genome-wide case-control studies.,"BACKGROUND:In genome-wide association study (GWAS), conventional interaction detection methods such as BOOST are mostly based on SNP-SNP interactions. Although single nucleotides are the building blocks of human genome, single nucleotide polymorphisms (SNPs) are not necessarily the smallest functional unit for complex phenotypes. Region-based strategies have been proved to be successful in studies aiming at marginal effects. METHODS:We propose a novel region-region interaction detection method named RRIntCC (region-region interaction detection for case-control studies). RRIntCC uses the correlations between individual SNP-SNP interactions based on linkage disequilibrium (LD) contrast test. RESULTS:Simulation experiments showed that our method can achieve a higher power than conventional SNP-based methods with similar type-I-error rates. When applied to two real datasets, RRIntCC was able to find several significant regions, while BOOST failed to identify any significant results. The source code and the sample data of RRIntCC are available at http://bioinformatics.ust.hk/RRIntCC.html. CONCLUSION:In this paper, a new region-based interaction detection method with better performance than SNP-based interaction detection methods has been proposed.",2019-12-30 +31258543,PropaNet: Time-Varying Condition-Specific Transcriptional Network Construction by Network Propagation.,"Transcription factor (TF) has a significant influence on the state of a cell by regulating multiple down-stream genes. Thus, experimental and computational biologists have made great efforts to construct TF gene networks for regulatory interactions between TFs and their target genes. Now, an important research question is how to utilize TF networks to investigate the response of a plant to stress at the transcription control level using time-series transcriptome data. In this article, we present a new computational network, PropaNet, to investigate dynamics of TF networks from time-series transcriptome data using two state-of-the-art network analysis techniques, influence maximization and network propagation. PropaNet uses the influence maximization technique to produce a ranked list of TFs, in the order of TF that explains differentially expressed genes (DEGs) better at each time point. Then, a network propagation technique is used to select a group of TFs that explains DEGs best as a whole. For the analysis of Arabidopsis time series datasets from AtGenExpress, we used PlantRegMap as a template TF network and performed PropaNet analysis to investigate transcriptional dynamics of Arabidopsis under cold and heat stress. The time varying TF networks showed that Arabidopsis responded to cold and heat stress quite differently. For cold stress, bHLH and bZIP type TFs were the first responding TFs and the cold signal influenced histone variants, various genes involved in cell architecture, osmosis and restructuring of cells. However, the consequences of plants under heat stress were up-regulation of genes related to accelerating differentiation and starting re-differentiation. In terms of energy metabolism, plants under heat stress show elevated metabolic process and resulting in an exhausted status. We believe that PropaNet will be useful for the construction of condition-specific time-varying TF network for time-series data analysis in response to stress. PropaNet is available at http://biohealth.snu.ac.kr/software/PropaNet.",2019-06-14 +29698482,Determining the minimum number of protein-protein interactions required to support known protein complexes.,"The prediction of protein complexes from protein-protein interactions (PPIs) is a well-studied problem in bioinformatics. However, the currently available PPI data is not enough to describe all known protein complexes. In this paper, we express the problem of determining the minimum number of (additional) required protein-protein interactions as a graph theoretic problem under the constraint that each complex constitutes a connected component in a PPI network. For this problem, we develop two computational methods: one is based on integer linear programming (ILPMinPPI) and the other one is based on an existing greedy-type approximation algorithm (GreedyMinPPI) originally developed in the context of communication and social networks. Since the former method is only applicable to datasets of small size, we apply the latter method to a combination of the CYC2008 protein complex dataset and each of eight PPI datasets (STRING, MINT, BioGRID, IntAct, DIP, BIND, WI-PHI, iRefIndex). The results show that the minimum number of additional required PPIs ranges from 51 (STRING) to 964 (BIND), and that even the four best PPI databases, STRING (51), BioGRID (67), WI-PHI (93) and iRefIndex (85), do not include enough PPIs to form all CYC2008 protein complexes. We also demonstrate that the proposed problem framework and our solutions can enhance the prediction accuracy of existing PPI prediction methods. ILPMinPPI can be freely downloaded from http://sunflower.kuicr.kyoto-u.ac.jp/~nakajima/.",2018-04-26 +30860571,MinE-RFE: determine the optimal subset from RFE by minimizing the subset-accuracy-defined energy.,"Recursive feature elimination (RFE), as one of the most popular feature selection algorithms, has been extensively applied to bioinformatics. During the training, a group of candidate subsets are generated by iteratively eliminating the least important features from the original features. However, how to determine the optimal subset from them still remains ambiguous. Among most current studies, either overall accuracy or subset size (SS) is used to select the most predictive features. Using which one or both and how they affect the prediction performance are still open questions. In this study, we proposed MinE-RFE, a novel RFE-based feature selection approach by sufficiently considering the effect of both factors. Subset decision problem was reflected into subset-accuracy space and became an energy-minimization problem. We also provided a mathematical description of the relationship between the overall accuracy and SS using Gaussian Mixture Models together with spline fitting. Besides, we comprehensively reviewed a variety of state-of-the-art applications in bioinformatics using RFE. We compared their approaches of deciding the final subset from all the candidate subsets with MinE-RFE on diverse bioinformatics data sets. Additionally, we also compared MinE-RFE with some well-used feature selection algorithms. The comparative results demonstrate that the proposed approach exhibits the best performance among all the approaches. To facilitate the use of MinE-RFE, we further established a user-friendly web server with the implementation of the proposed approach, which is accessible at http://qgking.wicp.net/MinE/. We expect this web server will be a useful tool for research community.",2020-03-01 +28108447,miRmine: a database of human miRNA expression profiles.,"

Motivation

MicroRNAs (miRNAs) are small non-coding RNAs that are involved in post-transcriptional regulation of gene expression. In this high-throughput sequencing era, a tremendous amount of RNA-seq data is accumulating, and full utilization of publicly available miRNA data is an important challenge. These data are useful to determine expression values for each miRNA, but quantification pipelines are in a primitive stage and still evolving; there are many factors that affect expression values significantly.

Results

We used 304 high-quality microRNA sequencing (miRNA-seq) datasets from NCBI-SRA and calculated expression profiles for different tissues and cell-lines. In each miRNA-seq dataset, we found an average of more than 500 miRNAs with higher than 5x coverage, and we explored the top five highly expressed miRNAs in each tissue and cell-line. This user-friendly miRmine database has options to retrieve expression profiles of single or multiple miRNAs for a specific tissue or cell-line, either normal or with disease information. Results can be displayed in multiple interactive, graphical and downloadable formats.

Availability and implementation

http://guanlab.ccmb.med.umich.edu/mirmine.

Contact

bharatpa@umich.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +30815525,Translatome and transcriptome analysis of TMA20 (MCT-1) and TMA64 (eIF2D) knockout yeast strains.,"TMA20 (MCT-1), TMA22 (DENR) and TMA64 (eIF2D) are eukaryotic translation factors involved in ribosome recycling and re-initiation. They operate with P-site bound tRNA in post-termination or (re-)initiation translation complexes, thus participating in the removal of 40S ribosomal subunit from mRNA stop codons after termination and controlling translation re-initiation on mRNAs with upstream open reading frames (uORFs), as well as de novo initiation on some specific mRNAs. Here we report ribosomal profiling data of S.cerevisiae strains with individual deletions of TMA20, TMA64 or both TMA20 and TMA64 genes. We provide RNA-Seq and Ribo-Seq data from yeast strains grown in the rich YPD or minimal SD medium. We illustrate our data by plotting differential distribution of ribosomal-bound mRNA fragments throughout uORFs in 5'-untranslated region (5' UTR) of GCN4 mRNA and on mRNA transcripts encoded in MAT locus in the mutant and wild-type strains, thus providing a basis for investigation of the role of these factors in the stress response, mating and sporulation. We also document a shift of transcription start site of the APC4 gene which occurs when the neighboring TMA64 gene is replaced by the standard G418-resistance cassette used for the creation of the Yeast Deletion Library. This shift results in dramatic deregulation of the APC4 gene expression, as revealed by our Ribo-Seq data, which can be probably used to explain strong genetic interactions of TMA64 with genes involved in the cell cycle and mitotic checkpoints. Raw RNA-Seq and Ribo-Seq data as well as all gene counts are available in NCBI Gene Expression Omnibus (GEO) repository under GEO accession GSE122039 (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE122039).",2019-02-02 +31702773,Model selection for metabolomics: predicting diagnosis of coronary artery disease using automated machine learning.,"

Motivation

Selecting the optimal machine learning (ML) model for a given dataset is often challenging. Automated ML (AutoML) has emerged as a powerful tool for enabling the automatic selection of ML methods and parameter settings for the prediction of biomedical endpoints. Here, we apply the tree-based pipeline optimization tool (TPOT) to predict angiographic diagnoses of coronary artery disease (CAD). With TPOT, ML models are represented as expression trees and optimal pipelines discovered using a stochastic search method called genetic programing. We provide some guidelines for TPOT-based ML pipeline selection and optimization-based on various clinical phenotypes and high-throughput metabolic profiles in the Angiography and Genes Study (ANGES).

Results

We analyzed nuclear magnetic resonance-derived lipoprotein and metabolite profiles in the ANGES cohort with a goal to identify the role of non-obstructive CAD patients in CAD diagnostics. We performed a comparative analysis of TPOT-generated ML pipelines with selected ML classifiers, optimized with a grid search approach, applied to two phenotypic CAD profiles. As a result, TPOT-generated ML pipelines that outperformed grid search optimized models across multiple performance metrics including balanced accuracy and area under the precision-recall curve. With the selected models, we demonstrated that the phenotypic profile that distinguishes non-obstructive CAD patients from no CAD patients is associated with higher precision, suggesting a discrepancy in the underlying processes between these phenotypes.

Availability and implementation

TPOT is freely available via http://epistasislab.github.io/tpot/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +28460063,WoPPER: Web server for Position Related data analysis of gene Expression in Prokaryotes.,"The structural and conformational organization of chromosomes is crucial for gene expression regulation in eukaryotes and prokaryotes as well. Up to date, gene expression data generated using either microarray or RNA-sequencing are available for many bacterial genomes. However, differential gene expression is usually investigated with methods considering each gene independently, thus not taking into account the physical localization of genes along a bacterial chromosome. Here, we present WoPPER, a web tool integrating gene expression and genomic annotations to identify differentially expressed chromosomal regions in bacteria. RNA-sequencing or microarray-based gene expression data are provided as input, along with gene annotations. The user can select genomic annotations from an internal database including 2780 bacterial strains, or provide custom genomic annotations. The analysis produces as output the lists of positionally related genes showing a coordinated trend of differential expression. Graphical representations, including a circular plot of the analyzed chromosome, allow intuitive browsing of the results. The analysis procedure is based on our previously published R-package PREDA. The release of this tool is timely and relevant for the scientific community, as WoPPER will fill an existing gap in prokaryotic gene expression data analysis and visualization tools. WoPPER is open to all users and can be reached at the following URL: https://WoPPER.ba.itb.cnr.it.",2017-07-01 +29241251,Association between Search Behaviors and Disease Prevalence Rates at 18 U.S. Children's Hospitals.,"Background Usage of online resources by clinicians in training and practice can provide insight into knowledge gaps and inform development of decision support tools. Although online information seeking is often driven by encountered patient problems, the relationship between disease prevalence and search rate has not been previously characterized. + +Objective This article aimed to (1) identify topics frequently searched by pediatric clinicians using UpToDate (http://www.uptodate.com) and (2) explore the association between disease prevalence rate and search rate using data from the Pediatric Health Information System. + +Methods We identified the most common search queries and resources most frequently accessed on UpToDate for a cohort of 18 children's hospitals during calendar year 2012. We selected 64 of the most frequently searched diseases and matched ICD-9 data from the PHIS database during the same time period. Using linear regression, we explored the relationship between clinician query rate and disease prevalence rate. + +Results The hospital cohort submitted 1,228,138 search queries across 592,454 sessions. The majority of search sessions focused on a single search topic. We identified no consistent overall association between disease prevalence and search rates. Diseases where search rate was substantially higher than prevalence rate were often infectious or immune/rheumatologic conditions, involved potentially complex diagnosis or management, and carried risk of significant morbidity or mortality. None of the examined diseases showed a decrease in search rate associated with increased disease prevalence rates. + +Conclusion This is one of the first medical learning needs assessments to use large-scale, multisite data to identify topics of interest to pediatric clinicians, and to examine the relationship between disease prevalence and search rate for a set of pediatric diseases. Overall, disease search rate did not appear to be associated with hospital disease prevalence rates based on ICD-9 codes. However, some diseases were consistently searched at a higher rate than their prevalence rate; many of these diseases shared common features.",2017-10-01 +30687056,A Single-Cell Level and Connectome-Derived Computational Model of the Drosophila Brain.,"Computer simulations play an important role in testing hypotheses, integrating knowledge, and providing predictions of neural circuit functions. While considerable effort has been dedicated into simulating primate or rodent brains, the fruit fly (Drosophila melanogaster) is becoming a promising model animal in computational neuroscience for its small brain size, complex cognitive behavior, and abundancy of data available from genes to circuits. Moreover, several Drosophila connectome projects have generated a large number of neuronal images that account for a significant portion of the brain, making a systematic investigation of the whole brain circuit possible. Supported by FlyCircuit (http://www.flycircuit.tw), one of the largest Drosophila neuron image databases, we began a long-term project with the goal to construct a whole-brain spiking network model of the Drosophila brain. In this paper, we report the outcome of the first phase of the project. We developed the Flysim platform, which (1) identifies the polarity of each neuron arbor, (2) predicts connections between neurons, (3) translates morphology data from the database into physiology parameters for computational modeling, (4) reconstructs a brain-wide network model, which consists of 20,089 neurons and 1,044,020 synapses, and (5) performs computer simulations of the resting state. We compared the reconstructed brain network with a randomized brain network by shuffling the connections of each neuron. We found that the reconstructed brain can be easily stabilized by implementing synaptic short-term depression, while the randomized one exhibited seizure-like firing activity under the same treatment. Furthermore, the reconstructed Drosophila brain was structurally and dynamically more diverse than the randomized one and exhibited both Poisson-like and patterned firing activities. Despite being at its early stage of development, this single-cell level brain model allows us to study some of the fundamental properties of neural networks including network balance, critical behavior, long-term stability, and plasticity.",2018-01-01 +25356683,dcGOR: an R package for analysing ontologies and protein domain annotations.,"I introduce an open-source R package 'dcGOR' to provide the bioinformatics community with the ease to analyse ontologies and protein domain annotations, particularly those in the dcGO database. The dcGO is a comprehensive resource for protein domain annotations using a panel of ontologies including Gene Ontology. Although increasing in popularity, this database needs statistical and graphical support to meet its full potential. Moreover, there are no bioinformatics tools specifically designed for domain ontology analysis. As an add-on package built in the R software environment, dcGOR offers a basic infrastructure with great flexibility and functionality. It implements new data structure to represent domains, ontologies, annotations, and all analytical outputs as well. For each ontology, it provides various mining facilities, including: (i) domain-based enrichment analysis and visualisation; (ii) construction of a domain (semantic similarity) network according to ontology annotations; and (iii) significance analysis for estimating a contact (statistical significance) network. To reduce runtime, most analyses support high-performance parallel computing. Taking as inputs a list of protein domains of interest, the package is able to easily carry out in-depth analyses in terms of functional, phenotypic and diseased relevance, and network-level understanding. More importantly, dcGOR is designed to allow users to import and analyse their own ontologies and annotations on domains (taken from SCOP, Pfam and InterPro) and RNAs (from Rfam) as well. The package is freely available at CRAN for easy installation, and also at GitHub for version control. The dedicated website with reproducible demos can be found at http://supfam.org/dcGOR.",2014-10-30 +31439071,An update on the efficacy of anti-inflammatory agents for patients with schizophrenia: a meta-analysis.,"

Background

Accumulating evidence shows that a propensity towards a pro-inflammatory status in the brain plays an important role in schizophrenia. Anti-inflammatory drugs might compensate this propensity. This study provides an update regarding the efficacy of agents with some anti-inflammatory actions for schizophrenia symptoms tested in randomized controlled trials (RCTs).

Methods

PubMed, Embase, the National Institutes of Health website (http://www.clinicaltrials.gov), and the Cochrane Database of Systematic Reviews were systematically searched for RCTs that investigated clinical outcomes.

Results

Our search yielded 56 studies that provided information on the efficacy of the following components on symptom severity: aspirin, bexarotene, celecoxib, davunetide, dextromethorphan, estrogens, fatty acids, melatonin, minocycline, N-acetylcysteine (NAC), pioglitazone, piracetam, pregnenolone, statins, varenicline, and withania somnifera extract. The results of aspirin [mean weighted effect size (ES): 0.30; n = 270; 95% CI (CI) 0.06-0.54], estrogens (ES: 0.78; n = 723; CI 0.36-1.19), minocycline (ES: 0.40; n = 946; CI 0.11-0.68), and NAC (ES: 1.00; n = 442; CI 0.60-1.41) were significant in meta-analysis of at least two studies. Subgroup analysis yielded larger positive effects for first-episode psychosis (FEP) or early-phase schizophrenia studies. Bexarotene, celecoxib, davunetide, dextromethorphan, fatty acids, pregnenolone, statins, and varenicline showed no significant effect.

Conclusions

Some, but not all agents with anti-inflammatory properties showed efficacy. Effective agents were aspirin, estrogens, minocycline, and NAC. We observed greater beneficial results on symptom severity in FEP or early-phase schizophrenia.",2019-08-23 +25428357,arrayMap 2014: an updated cancer genome resource.,"Somatic copy number aberrations (CNA) represent a mutation type encountered in the majority of cancer genomes. Here, we present the 2014 edition of arrayMap (http://www.arraymap.org), a publicly accessible collection of pre-processed oncogenomic array data sets and CNA profiles, representing a vast range of human malignancies. Since the initial release, we have enhanced this resource both in content and especially with regard to data mining support. The 2014 release of arrayMap contains more than 64,000 genomic array data sets, representing about 250 tumor diagnoses. Data sets included in arrayMap have been assembled from public repositories as well as additional resources, and integrated by applying custom processing pipelines. Online tools have been upgraded for a more flexible array data visualization, including options for processing user provided, non-public data sets. Data integration has been improved by mapping to multiple editions of the human reference genome, with the majority of the data now being available for the UCSC hg18 as well as GRCh37 versions. The large amount of tumor CNA data in arrayMap can be freely downloaded by users to promote data mining projects, and to explore special events such as chromothripsis-like genome patterns.",2014-11-26 +29078314,Complex evolutionary footprints revealed in an analysis of reused protein segments of diverse lengths.,"Proteins share similar segments with one another. Such ""reused parts""-which have been successfully incorporated into other proteins-are likely to offer an evolutionary advantage over de novo evolved segments, as most of the latter will not even have the capacity to fold. To systematically explore the evolutionary traces of segment ""reuse"" across proteins, we developed an automated methodology that identifies reused segments from protein alignments. We search for ""themes""-segments of at least 35 residues of similar sequence and structure-reused within representative sets of 15,016 domains [Evolutionary Classification of Protein Domains (ECOD) database] or 20,398 chains [Protein Data Bank (PDB)]. We observe that theme reuse is highly prevalent and that reuse is more extensive when the length threshold for identifying a theme is lower. Structural domains, the best characterized form of reuse in proteins, are just one of many complex and intertwined evolutionary traces. Others include long themes shared among a few proteins, which encompass and overlap with shorter themes that recur in numerous proteins. The observed complexity is consistent with evolution by duplication and divergence, and some of the themes might include descendants of ancestral segments. The observed recursive footprints, where the same amino acid can simultaneously participate in several intertwined themes, could be a useful concept for protein design. Data are available at http://trachel-srv.cs.haifa.ac.il/rachel/ppi/themes/.",2017-10-19 +30841853,iGEAK: an interactive gene expression analysis kit for seamless workflow using the R/shiny platform.,"

Background

The use of microarrays and RNA-seq technologies is ubiquitous for transcriptome analyses in modern biology. With proper analysis tools, the differential gene expression analysis process can be significantly accelerated. Many open-source programs provide cutting-edge techniques, but these often require programming skills and lack intuitive and interactive or graphical user interfaces. To avoid bottlenecks impeding seamless analysis processing, we have developed an Interactive Gene Expression Analysis Kit, we term iGEAK, focusing on usability and interactivity. iGEAK is designed to be a simple, intuitive, light-weight that contrasts with heavy-duty programs.

Results

iGEAK is an R/Shiny-based client-side desktop application, providing an interactive gene expression data analysis pipeline for microarray and RNA-seq data. Gene expression data can be intuitively explored using a seamless analysis pipeline consisting of sample selection, differentially expressed gene prediction, protein-protein interaction, and gene set enrichment analyses. For each analysis step, users can easily alter parameters to mine more relevant biological information.

Conclusion

iGEAK is the outcome of close collaboration with wet-bench biologists who are eager to easily explore, mine, and analyze new or public microarray and RNA-seq data. We designed iGEAK as a gene expression analysis pipeline tool to provide essential analysis steps and a user-friendly interactive graphical user interface. iGEAK enables users without programing knowledge to comfortably perform differential gene expression predictions and downstream analyses. iGEAK packages, manuals, tutorials, sample datasets are available at the iGEAK project homepage ( https://sites.google.com/view/iGEAK ).",2019-03-06 +29137057,"Readability of internet-sourced patient education material related to ""labour analgesia"".","We evaluated the readability of Internet-sourced patient education materials (PEMs) related to ""labour analgesia."" In addition to assessing the readability of websites, we aimed to compare commercial, personal, and academic websites.We used the most popular search engine (http://www.google.com) in our study. The first 100 websites in English that resulted from a search for the key words ""labour analgesia"" were scanned. Websites that were not in English, graphs, pictures, videos, tables, figures and list formats in the text, all punctuation, the number of words in the text is less than 100 words, feedback forms not related to education, (Uniform Resource Locator) URL websites, author information, references, legal disclaimers, and addresses and telephone numbers were excluded.The texts included in the study were assessed using the Flesch Reading Ease Score (FRES), Flesch-Kincaid Grade Level (FKGL), Simple Measure of Gobbledygook (SMOG), and Gunning Frequency of Gobbledygook (FOG) readability formulae. The number of Latin words within the text was determined.Analysis of 300-word sections of the texts revealed that the mean FRES was 47.54 ± 12.54 (quite difficult), mean FKGL and SMOG were 11.92 ± 2.59 and 10.57 ± 1.88 years of education, respectively, and mean Gunning FOG was 14.71 ± 2.76 (very difficult). Within 300-word sections, the mean number of Latin words was identified as 16.56 ± 6.37.In our study, the readability level of Internet-sourced PEM related to ""labour analgesia"" was identified to be quite high indicating poor readability.",2017-11-01 +28234924,DNApod: DNA polymorphism annotation database from next-generation sequence read archives.,"With the rapid advances in next-generation sequencing (NGS), datasets for DNA polymorphisms among various species and strains have been produced, stored, and distributed. However, reliability varies among these datasets because the experimental and analytical conditions used differ among assays. Furthermore, such datasets have been frequently distributed from the websites of individual sequencing projects. It is desirable to integrate DNA polymorphism data into one database featuring uniform quality control that is distributed from a single platform at a single place. DNA polymorphism annotation database (DNApod; http://tga.nig.ac.jp/dnapod/) is an integrated database that stores genome-wide DNA polymorphism datasets acquired under uniform analytical conditions, and this includes uniformity in the quality of the raw data, the reference genome version, and evaluation algorithms. DNApod genotypic data are re-analyzed whole-genome shotgun datasets extracted from sequence read archives, and DNApod distributes genome-wide DNA polymorphism datasets and known-gene annotations for each DNA polymorphism. This new database was developed for storing genome-wide DNA polymorphism datasets of plants, with crops being the first priority. Here, we describe our analyzed data for 679, 404, and 66 strains of rice, maize, and sorghum, respectively. The analytical methods are available as a DNApod workflow in an NGS annotation system of the DNA Data Bank of Japan and a virtual machine image. Furthermore, DNApod provides tables of links of identifiers between DNApod genotypic data and public phenotypic data. To advance the sharing of organism knowledge, DNApod offers basic and ubiquitous functions for multiple alignment and phylogenetic tree construction by using orthologous gene information.",2017-02-24 +31141611,BSA4Yeast: Web-based quantitative trait locus linkage analysis and bulk segregant analysis of yeast sequencing data.,"

Background

Quantitative trait locus (QTL) mapping using bulk segregants is an effective approach for identifying genetic variants associated with phenotypes of interest in model organisms. By exploiting next-generation sequencing technology, the QTL mapping accuracy can be improved significantly, providing a valuable means to annotate new genetic variants. However, setting up a comprehensive analysis framework for this purpose is a time-consuming and error-prone task, posing many challenges for scientists with limited experience in this domain.

Results

Here, we present BSA4Yeast, a comprehensive web application for QTL mapping via bulk segregant analysis of yeast sequencing data. The software provides an automated and efficiency-optimized data processing, up-to-date functional annotations, and an interactive web interface to explore identified QTLs.

Conclusions

BSA4Yeast enables researchers to identify plausible candidate genes in QTL regions efficiently in order to validate their genetic variations experimentally as causative for a phenotype of interest. BSA4Yeast is freely available at https://bsa4yeast.lcsb.uni.lu.",2019-06-01 +28460136,ModFOLD6: an accurate web server for the global and local quality estimation of 3D protein models.,"Methods that reliably estimate the likely similarity between the predicted and native structures of proteins have become essential for driving the acceptance and adoption of three-dimensional protein models by life scientists. ModFOLD6 is the latest version of our leading resource for Estimates of Model Accuracy (EMA), which uses a pioneering hybrid quasi-single model approach. The ModFOLD6 server integrates scores from three pure-single model methods and three quasi-single model methods using a neural network to estimate local quality scores. Additionally, the server provides three options for producing global score estimates, depending on the requirements of the user: (i) ModFOLD6_rank, which is optimized for ranking/selection, (ii) ModFOLD6_cor, which is optimized for correlations of predicted and observed scores and (iii) ModFOLD6 global for balanced performance. The ModFOLD6 methods rank among the top few for EMA, according to independent blind testing by the CASP12 assessors. The ModFOLD6 server is also continuously automatically evaluated as part of the CAMEO project, where significant performance gains have been observed compared to our previous server and other publicly available servers. The ModFOLD6 server is freely available at: http://www.reading.ac.uk/bioinf/ModFOLD/.",2017-07-01 +34750798,Letter to the Editor: THE IMPACT OF THE COVID-19 PANDEMIC ON SCHIZOPHRENIA PATIENTS.,"Dear Editor, The Covid-19 pandemic affected human life globally, inducing much stress on daily living (Çakıroğlu et al. 2020). Although assessments of general mental health during the Covid-19 pandemic have been widely reported, there is not adequate research on how schizophrenia patients have been affected. According to the World Health Organization (2020), individuals with chronic diseases who do not pay attention to their personal hygiene and the rules of protection from COVID-19 have a higher risk of getting infected than the healthy individuals who observe these measures. It is well known that the risk of Covid-19 infection is raised among schizophrenia patients due to negligence on the requisite control of personal hygiene and health conditions related to smoking and diabetes ( Cohn et al., 2004; Dinan et al., 2004; Krieger et al. 2019). The cognitive impairment in this disorder which reduces the perceptions on the necessity of self protection and the awareness of the risks proposed to underlie this raised risk of COVID-19 positivity (Yao et al. 2020). These patients have difficulty in following the preventive regulations (Palomar-Ciria et al., 2020). Apart from the risk of infection, there is also the risk of pandemic related development of auditory or visual hallucinations and delusional symptoms by acute and chronic psychosis patients during and after the pandemic (Brown et al. 2020, Cowan 2020). Therefore, this survey has been organised to evaluate the reaction developed by schizophrenia patients to the pandemic conditions. The first Covid-19 case was reported in Turkey on 11 March 2020 (Anadolu Agency, 11.03.2020) which was followed by the gradual increase in case numbers. In order to prevent the spread of Covid-19 and maintain the existing public health, the Republic of Turkey Ministry of Health established a 'Scientific Committee'' and prepared effective strategies including social isolation, quarantine, school closures, social distancing and wearing face mask in the community. During this process, the approximately 250 schizophrenia and schizoaffective disorder patients followed up by the Psychosis Outpatient Unit of Dokuz Eylul University Hospital Psychiatry Department (DEUPD) were instructed to visit the outpatient unit only in emergency conditions. It was determined that there were 176 schizophrenia patients whose follow up visit appointments for the period of April 1 - June 22, 2020, scheduled before the announcement of the pandemic, were cancelled. Therefore, the survey reported here was conducted with the schizophrenia patients of the DEUPD online and by telephone connections during 10- 20 May, 2020, the 9th and the 10th weeks of the pandemic. Only 76 (43.19%) of the 176 patients joined the survey, since 4 (2.27%) refused to participate and 96 (54.4%) could not be contacted. The survey aimed to determine the incidence of Covid-19 diagnosis among these schizophrenia patients and their attitude to the preventive measures against the infection during the first 2 months of the pandemic, together with how they felt and their needs for psychiatric consultation on outpatient basis during this period. The surveyed 76 patients consisted of 49 (64%) males and 27 (35%) females, with 73 (96.1%) dwelling in urban and 3 (3.9%) in suburban areas; and only 11 (14.5%) employed while 65 (85.5%) were not working. Only two patients reported consulting emergency services for Covid-19 symptoms. The rest of the patients did not report consulting a healthcare facility for suspecting Covid-19 symptoms or   Table 1. Data on the demographic, clinical and social features of the schizophrenia patients during the COVID-19 pandemic   n=76           Mean SD   Gender (F/M) 27 (35.5%) / 49 (64.5%)     Age   44.54 12.21   Disease duration   16.62 9.96   Patients living /with         Alone   3 (3.9%)     Parent(s)   43 (56.6%)     Spouse/children 25 (32.9%)     Sibling (s) 1 (1.3%)     Relative(s) 2 (2.6%)     Friend(s)   2 (2.6%)           Yes No Need to see a psychiatrist     23 (30.3%) 53 (69.7%) Subjective psychiatric complaints     32 (42.1%) 44 (57.9%) Consultation with an emergency service     2 (2.6%) 74 (97.4%) Planning to go to the hospital in the post-quarantine period   58 (76.3%) 18 (23.7%) Wearing a mask in community     67 (88.2%) 4 (5.3%) Keeping social distancing     68 (89.5%) 3 (3.9%) Expressed feeling         Loneliness   26 (34.2%) 49 (64.5%) Depressed   31 (40.8%) 44 (57.9%) Despaired   22 (28.9%) 52 (68.4%) Anxious     25 (32.9%) 49 (64.5%) Difficulty of going to the hospital in the quarantine period   53 (69.7%) 23 (30.3%)   hospital admission for Covid-19 infection or psychotic attack or incidences of Covid-19 related hallucination or delusions. During this 2-month period, 4 patients had experienced fatigue, 2 had episodes of dry cough and 7 had experienced shortness of breath, which can be associated with the nature of schizophrenia, the sedentary life style.and cigarette smoking. Medication was prescribed by a psychiatrist for 10 patients and by a family doctor for 16 patients or supplied directly by pharmacies for 45 patients on the basis of prescriptions with 1-year validity issued by the hospital* (Table 1). Much as it had been aimed to contact all patients with cancelled appointments, this objective was not attainable The patients who were not reached are likely to include those with low awareness and difficulty of adapting to infection prevention strategies. On the other hand, regardless of the level of awareness of the pandemic and compliance with the rules, phone use by these patients might have been limited by economic and environmental reasons, as well as the difficulties imposed by the pandemic. In conclusion, it is possible to say that most of the patients with schizophrenia were aware of the risk of COVID-19 infection, and understood and mostly obeyed the general health rules and advices of healthcare professionals even if they had difficulty in doing so. This could also have resulted from the nature of schizophrenia with preference for social isolation even if this can worsen the prognosis. On the other hand, patients need to be in contact with a mental healthcare professional in extraordinary situations of a pandemic. This survey did not find a remarkable increase in positive symptom severity in association with COVID-19 as most patients included in the survey had not seen a psychiatrist or mental healthcare professional for two months with 53 patients stating that they did not have to need.   However, 58 patients also stated that despite planning to make a consultation after normalization of the quarantine measures, the anxiety of contagion outweighed the option of visiting outpatient clinics. This anxiety over Covid-19 infection, however, may make it difficult for patients to understand the level of the need to see a psychiatrist and may be associated with the assumption that the pandemic would be taken under control in the normalization process with a decrease in the risk of contagion. On the results of this survey, it may be concluded that strategies for prevention of COVID-19 spread were effective among schizophrenia patients and that there is need to develop a system that reaches all patients and keeps them socially connected during the COVID-19 pandemic. *In Turkey, prescription reports with 1-year validity are issued for patients with chronic disorders. The medications can only be prescribed by a specialist, and in the case of pyshchiatric disorders, by a consultant psychiatrist. When the report is confirmed by a hospital committee of specialists, a family doctor is able to issue prescriptions. According to the decision of the Ministry of Health, patients who have medication prescription reports valid for one year would be able to get their medicines directly from pharmacies without having to consult a psychiatrist or family doctor during the pandemic.               REFERENCES Anadolu Agency (2020, Mach 11). Sağlık Bakanı Koca Türkiye'de ilk koronavirüs vakasının görüldüğünü açıkladı, https://www.aa.com.tr/tr/kor onavir us/ saglik-bakani-koca-turkiyede-ilk-koronavirus-vakasinin-goruldugunu- acikladi/1761466. Accessed 28 May 2020.   Brown E, Gray R, Lo Monaco S et al (2020) The potential impact of COVID-19 on psychosis: A rapid review of contemporary epidemic and pandemic research. Schizophr Res 222:79-87. Cohn T, Prud'homme D, Streiner D et al (2004) Characterizing coronary heart disease risk in chronic schizophrenia: High prevalence of the metabolic syndrome. Can J Psychiatry 49:753-60. Cowan, HR (2020) Is schizophrenia research relevant during the COVID-19 pandemic?. Schizophr Res 220:271-2. Çakıroğlu S, Ertaş E, and Alyanak B (2020) Letter To The Editor - The Covid-19 Pandemic And Mental Health As Issues Considered Within The Context Of Adjustment Disorder And Psychosocial Interventions. Turk Psikiyatri Derg 31:148-50. Dinan T, Holt R, Kohen D et al (2004) ""Schizophrenia and diabetes 2003"" expert consensus meeting, Dublin, 3-4 october 2003: Consensus summary. Br J Psychiatry 184 (Suppl. 47): 0-2. Krieger I, Bitan DT, Comaneshter D et al (2019) Increased risk of smoking- related illnesses in schizophrenia patients: A nationwide cohort study. Schizophr Res 212:121-5. Palomar-Ciria N, del Valle PB, Hernández-Las Heras MÁ et al (2020) Schizophrenia and COVID-19 delirium. Psychiatry Res 290:113137. Yao H, Chen JH, and Xu YF (2020) Patients with mental health disorders in the COVID-19 epidemic. Lancet Psychiatry 7: e21. World Health Organization (2020, Mart 25). Covid-19: Vulnerable and High Risk Group, Geneva, Switzerland: World Health Organization, https:// www.who.int/westernpacific/emergencies/covid-19/information/high-risk- groups. Accessed 28 May 2020.",2021-01-01 +30950696,Descriptive Analysis of the Interactive Patterning of the Vocalization Subsystems in Healthy Participants: A Dynamic Systems Perspective.,"Purpose Normative data for many objective voice measures are routinely used in clinical voice assessment; however, normative data reflect vocal output, but not vocalization process. The underlying physiologic processes of healthy phonation have been shown to be nonlinear and thus are likely different across individuals. Dynamic systems theory postulates that performance behaviors emerge from the nonlinear interplay of multiple physiologic components and that certain patterns are preferred and loosely governed by the interactions of physiology, task, and environment. The purpose of this study was to descriptively characterize the interactive nature of the vocalization subsystem triad in subjects with healthy voices and to determine if differing subgroups could be delineated to better understand how healthy voicing is physiologically generated. Method Respiratory kinematic, aerodynamic, and acoustic formant data were obtained from 29 individuals with healthy voices (21 female and eight male). Multivariate analyses were used to descriptively characterize the interactions among the subsystems that contributed to healthy voicing. Results Group data revealed representative measures of the 3 subsystems to be generally within the boundaries of established normative data. Despite this, 3 distinct clusters were delineated that represented 3 subgroups of individuals with differing subsystem patterning. Seven of the 9 measured variables in this study were found to be significantly different across at least 1 of the 3 subgroups indicating differing physiologic processes across individuals. Conclusion Vocal output in healthy individuals appears to be generated by distinct and preferred physiologic processes that were represented by 3 subgroups indicating that the process of vocalization is different among individuals, but not entirely idiosyncratic. Possibilities for these differences are explored using the framework of dynamic systems theory and the dynamics of emergent behaviors. A revised physiologic model of phonation that accounts for differences within and among the vocalization subsystems is described. Supplemental Material https://doi.org/10.23641/asha.7616462.",2019-02-01 +28737911,Platform for Unified Molecular Analysis: PUMA.,"We introduce a free platform for chemoinformatic-based diversity analysis and visualization of chemical space of user supplied data sets. Platform for Unified Molecular Analysis (PUMA) integrates metrics used to characterize compound databases including visualization of chemical space, scaffold content, and analysis of chemical diversity. The user's input is a file with SMILES, database names, and compound IDs. PUMA computes molecular properties of pharmaceutical relevance, Murcko scaffolds, and diversity analysis. The user can interactively navigate through the graphs and export image files and the raw data of the diversity calculations. The platform links two public online resources: Consensus Diversity Plots for the assessment of global diversity and Activity Landscape Plotter to analyze structure-activity relationships. Herein, we describe the functionalities of PUMA and exemplify its use through the analysis of compound databases of general interest. PUMA is freely accessible at the authors web-site https://www.difacquim.com/d-tools/ .",2017-08-08 +31254027,Transfusion after total knee arthroplasty can be predicted using the machine learning algorithm.,"PURPOSE:A blood transfusion after total knee arthroplasty (TKA) is associated with an increase in complication and infection rates. However, no studies have been conducted to predict transfusion after TKA using a machine learning algorithm. The purpose of this study was to identify informative preoperative variables to create a machine learning model, and to provide a web-based transfusion risk-assessment system for clinical use. METHODS:This study retrospectively reviewed 1686 patients who underwent TKA at our institution. Data for 43 preoperative variables, including medication history, laboratory values, and demographic characteristics, were collected. Variable selection was conducted using the recursive feature elimination algorithm. The transfusion group was defined as patients with haemoglobin (Hb) < 7 g/dL after TKA. A predictive model was developed using the gradient boosting machine, and the performance of the model was assessed by the area under the receiver operating characteristic curve (AUC). Data sets from an independent institution were tested with the model for external validation. RESULTS:Of the 1686 patients who underwent TKA, 108 (6.4%) were categorized into the transfusion group. Six preoperative variables were selected, including preoperative Hb, platelet count, type of surgery, tranexamic acid, age, and body weight. The predictive model demonstrated good predictive performance using the six variables [AUC 0.842; 95% confidence interval (CI) 0.820-0.856]. Performance was also good according to the external validation using 400 data from an independent institution (AUC 0.880; 95% CI 0.844-0.910). This web-based blood transfusion risk-assessment system can be accessed at http://safetka.net. CONCLUSIONS:A web-based predictive model for transfusion after TKA using a machine learning algorithm was developed using six preoperative variables. The model is simple, has been validated, showed good performance, and can be used before TKA to predict the risk of transfusion and guide appropriate precautions for high-risk patients. LEVEL OF EVIDENCE:Diagnostic level II.",2019-06-28 +34637131,Letter to the Editor: THE IMPACT OF THE COVID-19 PANDEMIC ON SCHIZOPHRENIA PATIENTS.,"Dear Editor, The Covid-19 pandemic affected human life globally, inducing much stress on daily living (Çakıroğlu et al. 2020). Although assessments of general mental health during the Covid-19 pandemic have been widely reported, there is not adequate research on how schizophrenia patients have been affected. According to the World Health Organization (2020), individuals with chronic diseases who do not pay attention to their personal hygiene and the rules of protection from COVID-19 have a higher risk of getting infected than the healthy individuals who observe these measures. It is well known that the risk of Covid-19 infection is raised among schizophrenia patients due to negligence on the requisite control of personal hygiene and health conditions related to smoking and diabetes ( Cohn et al., 2004; Dinan et al., 2004; Krieger et al. 2019). The cognitive impairment in this disorder which reduces the perceptions on the necessity of self protection and the awareness of the risks proposed to underlie this raised risk of COVID-19 positivity (Yao et al. 2020). These patients have difficulty in following the preventive regulations (Palomar-Ciria et al., 2020). Apart from the risk of infection, there is also the risk of pandemic related development of auditory or visual hallucinations and delusional symptoms by acute and chronic psychosis patients during and after the pandemic (Brown et al. 2020, Cowan 2020). Therefore, this survey has been organised to evaluate the reaction developed by schizophrenia patients to the pandemic conditions. The first Covid-19 case was reported in Turkey on 11 March 2020 (Anadolu Agency, 11.03.2020) which was followed by the gradual increase in case numbers. In order to prevent the spread of Covid-19 and maintain the existing public health, the Republic of Turkey Ministry of Health established a 'Scientific Committee'' and prepared effective strategies including social isolation, quarantine, school closures, social distancing and wearing face mask in the community. During this process, the approximately 250 schizophrenia and schizoaffective disorder patients followed up by the Psychosis Outpatient Unit of Dokuz Eylul University Hospital Psychiatry Department (DEUPD) were instructed to visit the outpatient unit only in emergency conditions. It was determined that there were 176 schizophrenia patients whose follow up visit appointments for the period of April 1 - June 22, 2020, scheduled before the announcement of the pandemic, were cancelled. Therefore, the survey reported here was conducted with the schizophrenia patients of the DEUPD online and by telephone connections during 10- 20 May, 2020, the 9th and the 10th weeks of the pandemic. Only 76 (43.19%) of the 176 patients joined the survey, since 4 (2.27%) refused to participate and 96 (54.4%) could not be contacted. The survey aimed to determine the incidence of Covid-19 diagnosis among these schizophrenia patients and their attitude to the preventive measures against the infection during the first 2 months of the pandemic, together with how they felt and their needs for psychiatric consultation on outpatient basis during this period. The surveyed 76 patients consisted of 49 (64%) males and 27 (35%) females, with 73 (96.1%) dwelling in urban and 3 (3.9%) in suburban areas; and only 11 (14.5%) employed while 65 (85.5%) were not working. Only two patients reported consulting emergency services for Covid-19 symptoms. The rest of the patients did not report consulting a healthcare facility for suspecting Covid-19 symptoms or   Table 1. Data on the demographic, clinical and social features of the schizophrenia patients during the COVID-19 pandemic   n=76           Mean SD   Gender (F/M) 27 (35.5%) / 49 (64.5%)     Age   44.54 12.21   Disease duration   16.62 9.96   Patients living /with         Alone   3 (3.9%)     Parent(s)   43 (56.6%)     Spouse/children 25 (32.9%)     Sibling (s) 1 (1.3%)     Relative(s) 2 (2.6%)     Friend(s)   2 (2.6%)           Yes No Need to see a psychiatrist     23 (30.3%) 53 (69.7%) Subjective psychiatric complaints     32 (42.1%) 44 (57.9%) Consultation with an emergency service     2 (2.6%) 74 (97.4%) Planning to go to the hospital in the post-quarantine period   58 (76.3%) 18 (23.7%) Wearing a mask in community     67 (88.2%) 4 (5.3%) Keeping social distancing     68 (89.5%) 3 (3.9%) Expressed feeling         Loneliness   26 (34.2%) 49 (64.5%) Depressed   31 (40.8%) 44 (57.9%) Despaired   22 (28.9%) 52 (68.4%) Anxious     25 (32.9%) 49 (64.5%) Difficulty of going to the hospital in the quarantine period   53 (69.7%) 23 (30.3%)   hospital admission for Covid-19 infection or psychotic attack or incidences of Covid-19 related hallucination or delusions. During this 2-month period, 4 patients had experienced fatigue, 2 had episodes of dry cough and 7 had experienced shortness of breath, which can be associated with the nature of schizophrenia, the sedentary life style.and cigarette smoking. Medication was prescribed by a psychiatrist for 10 patients and by a family doctor for 16 patients or supplied directly by pharmacies for 45 patients on the basis of prescriptions with 1-year validity issued by the hospital* (Table 1). Much as it had been aimed to contact all patients with cancelled appointments, this objective was not attainable The patients who were not reached are likely to include those with low awareness and difficulty of adapting to infection prevention strategies. On the other hand, regardless of the level of awareness of the pandemic and compliance with the rules, phone use by these patients might have been limited by economic and environmental reasons, as well as the difficulties imposed by the pandemic. In conclusion, it is possible to say that most of the patients with schizophrenia were aware of the risk of COVID-19 infection, and understood and mostly obeyed the general health rules and advices of healthcare professionals even if they had difficulty in doing so. This could also have resulted from the nature of schizophrenia with preference for social isolation even if this can worsen the prognosis. On the other hand, patients need to be in contact with a mental healthcare professional in extraordinary situations of a pandemic. This survey did not find a remarkable increase in positive symptom severity in association with COVID-19 as most patients included in the survey had not seen a psychiatrist or mental healthcare professional for two months with 53 patients stating that they did not have to need.   However, 58 patients also stated that despite planning to make a consultation after normalization of the quarantine measures, the anxiety of contagion outweighed the option of visiting outpatient clinics. This anxiety over Covid-19 infection, however, may make it difficult for patients to understand the level of the need to see a psychiatrist and may be associated with the assumption that the pandemic would be taken under control in the normalization process with a decrease in the risk of contagion. On the results of this survey, it may be concluded that strategies for prevention of COVID-19 spread were effective among schizophrenia patients and that there is need to develop a system that reaches all patients and keeps them socially connected during the COVID-19 pandemic. *In Turkey, prescription reports with 1-year validity are issued for patients with chronic disorders. The medications can only be prescribed by a specialist, and in the case of pyshchiatric disorders, by a consultant psychiatrist. When the report is confirmed by a hospital committee of specialists, a family doctor is able to issue prescriptions. According to the decision of the Ministry of Health, patients who have medication prescription reports valid for one year would be able to get their medicines directly from pharmacies without having to consult a psychiatrist or family doctor during the pandemic.               REFERENCES Anadolu Agency (2020, Mach 11). Sağlık Bakanı Koca Türkiye'de ilk koronavirüs vakasının görüldüğünü açıkladı, https://www.aa.com.tr/tr/kor onavir us/ saglik-bakani-koca-turkiyede-ilk-koronavirus-vakasinin-goruldugunu- acikladi/1761466. Accessed 28 May 2020.   Brown E, Gray R, Lo Monaco S et al (2020) The potential impact of COVID-19 on psychosis: A rapid review of contemporary epidemic and pandemic research. Schizophr Res 222:79-87. Cohn T, Prud'homme D, Streiner D et al (2004) Characterizing coronary heart disease risk in chronic schizophrenia: High prevalence of the metabolic syndrome. Can J Psychiatry 49:753-60. Cowan, HR (2020) Is schizophrenia research relevant during the COVID-19 pandemic?. Schizophr Res 220:271-2. Çakıroğlu S, Ertaş E, and Alyanak B (2020) Letter To The Editor - The Covid-19 Pandemic And Mental Health As Issues Considered Within The Context Of Adjustment Disorder And Psychosocial Interventions. Turk Psikiyatri Derg 31:148-50. Dinan T, Holt R, Kohen D et al (2004) ""Schizophrenia and diabetes 2003"" expert consensus meeting, Dublin, 3-4 october 2003: Consensus summary. Br J Psychiatry 184 (Suppl. 47): 0-2. Krieger I, Bitan DT, Comaneshter D et al (2019) Increased risk of smoking- related illnesses in schizophrenia patients: A nationwide cohort study. Schizophr Res 212:121-5. Palomar-Ciria N, del Valle PB, Hernández-Las Heras MÁ et al (2020) Schizophrenia and COVID-19 delirium. Psychiatry Res 290:113137. Yao H, Chen JH, and Xu YF (2020) Patients with mental health disorders in the COVID-19 epidemic. Lancet Psychiatry 7: e21. World Health Organization (2020, Mart 25). Covid-19: Vulnerable and High Risk Group, Geneva, Switzerland: World Health Organization, https:// www.who.int/westernpacific/emergencies/covid-19/information/high-risk- groups. Accessed 28 May 2020.",2021-01-01 +29502301,Kaleido: Visualizing Big Brain Data with Automatic Color Assignment for Single-Neuron Images.,"Effective 3D visualization is essential for connectomics analysis, where the number of neural images easily reaches over tens of thousands. A formidable challenge is to simultaneously visualize a large number of distinguishable single-neuron images, with reasonable processing time and memory for file management and 3D rendering. In the present study, we proposed an algorithm named ""Kaleido"" that can visualize up to at least ten thousand single neurons from the Drosophila brain using only a fraction of the memory traditionally required, without increasing computing time. Adding more brain neurons increases memory only nominally. Importantly, Kaleido maximizes color contrast between neighboring neurons so that individual neurons can be easily distinguished. Colors can also be assigned to neurons based on biological relevance, such as gene expression, neurotransmitters, and/or development history. For cross-lab examination, the identity of every neuron is retrievable from the displayed image. To demonstrate the effectiveness and tractability of the method, we applied Kaleido to visualize the 10,000 Drosophila brain neurons obtained from the FlyCircuit database ( http://www.flycircuit.tw/modules.php?name=kaleido ). Thus, Kaleido visualization requires only sensible computer memory for manual examination of big connectomics data.",2018-04-01 +23457042,RCPedia: a database of retrocopied genes.,"

Motivation

Retrocopies are copies of mature RNAs that are usually devoid of regulatory sequences and introns. They have routinely been classified as processed pseudo-genes with little or no biological relevance. However, recent findings have revealed functional roles for retrocopies, as well as their high frequency in some organisms, such as primates. Despite their increasing importance, there is no user-friendly and publicly available resource for the study of retrocopies.

Results

Here, we present RCPedia, an integrative and user-friendly database designed for the study of retrocopied genes. RCPedia contains a complete catalogue of the retrocopies that are known to be present in human and five other primate genomes, their genomic context, inter-species conservation and gene expression data. RCPedia also offers a streamlined data representation and an efficient query system.

Availability and implementation

RCPedia is available at http://www.bioinfo.mochsl.org.br/rcpedia.",2013-03-01 +31016214,Dataset of growth cone-enriched lipidome and proteome of embryonic to early postnatal mouse brain.,"A growth cone is a part of a neuron considered as a hub for axon growth, motility and guidance functions. Growth cones are thought to play a critical role during development of neurons. Growth cones also play a significant role in adult regeneration. Here, we present a dataset on the lipid and protein profiling of the growth cone-enriched fractions derived from C57BL/6J mice forebrains of developmental stage: E18, P0, P3, P6, and P9. For comparison, we analyzed non-growth cone membranes from the same samples. Lipid data is available at the Metabolomics Workbench [http://www.metabolomicsworkbench.org (Project ID: PR000746)]. Protein data is available at Proteomics Identifications (PRIDE) partner repository (PRIDE identifier PXD012134).",2019-03-26 +27899562,The ChEMBL database in 2017.,"ChEMBL is an open large-scale bioactivity database (https://www.ebi.ac.uk/chembl), previously described in the 2012 and 2014 Nucleic Acids Research Database Issues. Since then, alongside the continued extraction of data from the medicinal chemistry literature, new sources of bioactivity data have also been added to the database. These include: deposited data sets from neglected disease screening; crop protection data; drug metabolism and disposition data and bioactivity data from patents. A number of improvements and new features have also been incorporated. These include the annotation of assays and targets using ontologies, the inclusion of targets and indications for clinical candidates, addition of metabolic pathways for drugs and calculation of structural alerts. The ChEMBL data can be accessed via a web-interface, RDF distribution, data downloads and RESTful web-services.",2016-11-28 +31141144,CoCo: RNA-seq read assignment correction for nested genes and multimapped reads.,"

Motivation

Next-generation sequencing techniques revolutionized the study of RNA expression by permitting whole transcriptome analysis. However, sequencing reads generated from nested and multi-copy genes are often either misassigned or discarded, which greatly reduces both quantification accuracy and gene coverage.

Results

Here we present count corrector (CoCo), a read assignment pipeline that takes into account the multitude of overlapping and repetitive genes in the transcriptome of higher eukaryotes. CoCo uses a modified annotation file that highlights nested genes and proportionally distributes multimapped reads between repeated sequences. CoCo salvages over 15% of discarded aligned RNA-seq reads and significantly changes the abundance estimates for both coding and non-coding RNA as validated by PCR and bedgraph comparisons.

Availability and implementation

The CoCo software is an open source package written in Python and available from http://gitlabscottgroup.med.usherbrooke.ca/scott-group/coco.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31339350,"Mortality Risk and Fine Particulate Air Pollution in a Large, Representative Cohort of U.S. Adults.","

Background

Evidence indicates that air pollution contributes to cardiopulmonary mortality. There is ongoing debate regarding the size and shape of the pollution–mortality exposure–response relationship. There are also growing appeals for estimates of pollution–mortality relationships that use public data and are based on large, representative study cohorts.

Objectives

Our goal was to evaluate fine particulate matter air pollution ([Formula: see text]) and mortality using a large cohort that is representative of the U.S. population and is based on public data. Additional objectives included exploring model sensitivity, evaluating relative effects across selected subgroups, and assessing the shape of the [Formula: see text]–mortality relationship.

Methods

National Health Interview Surveys (1986–2014), with mortality linkage through 2015, were used to create a cohort of 1,599,329 U.S. adults and a subcohort with information on smoking and body mass index (BMI) of 635,539 adults. Data were linked with modeled ambient [Formula: see text] at the census-tract level. Cox proportional hazards models were used to estimate [Formula: see text]–mortality hazard ratios for all-cause and specific causes of death while controlling for individual risk factors and regional and urban versus rural differences. Sensitivity and subgroup analyses were conducted and the shape of the [Formula: see text]–mortality relationship was explored.

Results

Estimated mortality hazard ratios, per [Formula: see text] long-term exposure to [Formula: see text], were 1.12 (95% CI: 1.08, 1.15) for all-cause mortality, 1.23 (95% CI: 1.17, 1.29) for cardiopulmonary mortality, and 1.12 (95% CI: 1.00, 1.26) for lung cancer mortality. In general, [Formula: see text]–mortality associations were consistently positive for all-cause and cardiopulmonary mortality across key modeling choices and across subgroups of sex, age, race-ethnicity, income, education levels, and geographic regions.

Discussion

This large, nationwide, representative cohort of U.S. adults provides robust evidence that long-term [Formula: see text] exposure contributes to cardiopulmonary mortality risk. The ubiquitous and involuntary nature of exposures and the broadly observed effects across subpopulations underscore the public health importance of breathing clean air. https://doi.org/10.1289/EHP4438.",2019-07-24 +30192911,Inference and visualization of DNA damage patterns using a grade of membership model.,"

Motivation

Quality control plays a major role in the analysis of ancient DNA (aDNA). One key step in this quality control is assessment of DNA damage: aDNA contains unique signatures of DNA damage that distinguish it from modern DNA, and so analyses of damage patterns can help confirm that DNA sequences obtained are from endogenous aDNA rather than from modern contamination. Predominant signatures of DNA damage include a high frequency of cytosine to thymine substitutions (C-to-T) at the ends of fragments, and elevated rates of purines (A & G) before the 5' strand-breaks. Existing QC procedures help assess damage by simply plotting for each sample, the C-to-T mismatch rate along the read and the composition of bases before the 5' strand-breaks. Here we present a more flexible and comprehensive model-based approach to infer and visualize damage patterns in aDNA, implemented in an R package aRchaic. This approach is based on a 'grade of membership' model (also known as 'admixture' or 'topic' model) in which each sample has an estimated grade of membership in each of K damage profiles that are estimated from the data.

Results

We illustrate aRchaic on data from several aDNA studies and modern individuals from 1000 Genomes Project Consortium (2012). Here, aRchaic clearly distinguishes modern from ancient samples irrespective of DNA extraction, lab and sequencing protocols. Additionally, through an in-silico contamination experiment, we show that the aRchaic grades of membership reflect relative levels of exogenous modern contamination. Together, the outputs of aRchaic provide a concise visual summary of DNA damage patterns, as well as other processes generating mismatches in the data.

Availability and implementation

aRchaic is available for download from https://www.github.com/kkdey/aRchaic.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +31583909,Structural elucidation and antimicrobial activity of a diketopiperazine isolated from a Bacillus sp. associated with the marine sponge Spongia officinalis.,"A diketopiperazine (3S, 6S)-3,6-diisobutylpiperazine-2,5-dione was isolated from a sponge-associated microbe for the first time and characterized by FTIR, HRESI-MS, 1H, 13C NMR and 2D NMR. The source is novel for this compound. Single crystal XRD of this diketopiperazine obtained as a natural product was analysed for the first time and its melting point was determined to be 262 °C. MICs of this cyclic dipeptide against Escherichia coli and Staphylococcus aureus subsp. aureus were 16 µg mL-1 and 22 µg mL-1 respectively, the first report of antibacterial activity of this diketopiperazine.Supplemental data for this article can be accessed at https://doi.org/10.1080/14786419.2019.1672684.",2019-10-04 +30621600,The 1000IBD project: multi-omics data of 1000 inflammatory bowel disease patients; data release 1.,"

Background

Inflammatory bowel disease (IBD) is a chronic complex disease of the gastrointestinal tract. Patients with IBD can experience a wide range of symptoms, but the pathophysiological mechanisms that cause these individual differences in clinical presentation remain largely unknown. In consequence, IBD is currently classified into subtypes using clinical characteristics. If we are to develop a more targeted treatment approach, molecular subtypes of IBD need to be discovered that can be used as new drug targets. To achieve this, we need multiple layers of molecular data generated from the same IBD patients.

Construction and content

We initiated the 1000IBD project ( https://1000ibd.org ) to prospectively follow more than 1000 IBD patients from the Northern provinces of the Netherlands. For these patients, we have collected a uniquely large number of phenotypes and generated multi-omics profiles. To date, 1215 participants have been enrolled in the project and enrolment is on-going. Phenotype data collected for these participants includes information on dietary and environmental factors, drug responses and adverse drug events. Genome information has been generated using genotyping (ImmunoChip, Global Screening Array and HumanExomeChip) and sequencing (whole exome sequencing and targeted resequencing of IBD susceptibility loci), transcriptome information generated using RNA-sequencing of intestinal biopsies and microbiome information generated using both sequencing of the 16S rRNA gene and whole genome shotgun metagenomic sequencing.

Utility and discussion

All molecular data generated within the 1000IBD project will be shared on the European Genome-Phenome Archive ( https://ega-archive.org , accession no: EGAS00001002702). The first data release, detailed in this announcement and released simultaneously with this publication, will contain basic phenotypes for 1215 participants, genotypes of 314 participants and gut microbiome data from stool samples (315 participants) and biopsies (107 participants) generated by tag sequencing the 16S gene. Future releases will comprise many more additional phenotypes and -omics data layers. 1000IBD data can be used by other researchers as a replication cohort, a dataset to test new software tools, or a dataset for applying new statistical models.

Conclusions

We report on the establishment and future development of the 1000IBD project: the first comprehensive multi-omics dataset aimed at discovering IBD biomarker profiles and treatment targets.",2019-01-08 +31528670,High-throughput amplicon sequencing datasets of the metacommunity DNA of the gut microbiota of naturally occurring and laboratory aquaculture green sea urchins Lytechinus variegatus.,"We present high-throughput amplicon sequence (HTS) datasets of the microbial metacommunity DNA of the gut tissue and the gut digesta of naturally occurring (n = 3) and laboratory aquaculture (n = 2) green sea urchins, Lytechinus variegatus. The HTS datasets were generated on an Illumina MiSeq by targeting the amplicons of the V4 region of the 16S rRNA gene. After the raw sequences were quality checked and filtered, 88% of the sequence reads were subjected to bioinformatics analyses to generate operation taxonomic units (OTUs), which were then verified for saturation by using rarefaction analysis at a 3% sequence variation. Further, the OTUs were randomly subsampled to the minimum sequence count values. Then, the FASTA-formatted representative sequences of the microbiota were assigned taxonomic identities through multiple databases using the SILVA ACT: Alignment, Classification and Tree Service (www.arb-silva.de/aligner). The HTS datasets of this metagenome can be accessed from the BioSample Submission Portal (https://www.ncbi.nlm.nih.gov/bioproject/) under the BioProject IDs PRJNA291441 and PRJNA326427.",2019-08-20 +31442619,"Sophora alopecuroides L.: An ethnopharmacological, phytochemical, and pharmacological review.","

Ethnopharmacological relevance

Sophora alopecuroides L., which is called Kudouzi in China, is a medicinal plant distributed in Western and Central Asia, especially in China, and has been used for decades to treat fever, bacterial infection, heart disease, rheumatism, and gastrointestinal diseases.

Aim of the review

This review aims to provide up-to-date information on S. alopecuroides, including its botanical characterization, medicinal resources, traditional uses, phytochemistry, pharmacological research, and toxicology, in exploring future therapeutic and scientific potentials.

Materials and methods

The information related to this article was systematically collected from the scientific literature databases including PubMed, Google Scholar, Web of Science, Science Direct, Springer, China National Knowledge Infrastructure, published books, PhD and MS dissertations, and other web sources, such as the official website of Flora of China and Yao Zhi website (https://db.yaozh.com/).

Results

A total of 128 compounds, such as alkaloids, flavonoids, steroids, and polysaccharides, were isolated from S. alopecuroides. Among these compounds, the effects of alkaloids, such as matrine and oxymatrine, were extensively studied and developed into new drugs. S. alopecuroides and its active components had a wide range of pharmacological activities, such as anticancer, antiviral, anti-inflammatory, antimicrobial, analgesic, and neuroprotective functions, as well as protective properties against pulmonary fibrosis and cardiac fibroblast proliferation.

Conclusions

As an important traditional Chinese medicine, modern pharmacological studies have demonstrated that S. alopecuroides has prominent bioactivities, especially on gynecological inflammation and hepatitis B, and anticancer activities. These activities provide prospects for novel drug development for cancer and some chronic diseases. Nevertheless, the comprehensive evaluation, quality control, understanding of the multitarget network pharmacology, long-term in vivo toxicity, and clinical efficacy of S. alopecuroides require further detailed research.",2019-08-20 +31732105,Evidence Is Unclear About the Best Material and Technique Required for Alveolar Ridge Preservation for Dental IMPLANT SITE Development.,"

Article title and bibliographic information

Interventions for replacing missing teeth: alveolar ridge preservation techniques for dental implant site development. Atieh MA, Alsabeeha NH, Payne AG, Duncan W, Faggion CM, Esposito M. Cochrane Database Syst Rev 2015. https://doi.org/10.1002/14651858.CD010176.pub2.

Source of funding

This research did not receive any specific grant from funding agencies in the public, commercial, or not-for-profit sectors.

Type of study/design

Systematic review with meta-analysis.",2019-08-20 +32489025,[Exploration on scientific connotation of TCM syndromes and recommended prescriptions against COVID-19 based on TCMTP V2.0].,"Coronavirus disease 2019(COVID-19) has attracted great attentions from the whole world. Traditional Chinese medicine(TCM) has been widely used and shown satisfying efficacies in treating all stages of COVID-19. In this study, the molecular interaction networks of different stages of COVID-19(the early, severe, critical and recovery stage) were constructed using the links among symptoms-related genes collected from TCMIP V2.0(http://www.tcmip.cn/), an integrated pharmacology network-computing platform for TCM. Following the network topological feature calculation and functional enrichment analysis, we found that the molecular targets and pathways related with the ""immune-inflammation system"" were involved throughout all the stages of COVID-19. The severe stage and the critical period of COVID-19 were occupied by a large proportion of inflammatory factors and pathways, suggesting that there might be a cytokine storm in these periods, along with respiratory disorders, cardiopulmonary dysfunction, nervous system disorders, etc. Accordingly, the therapeutic targets and pathways hit by the recommended prescriptions against COVID-19 were also aimed to regulate the balance of immune-inflammation system, nutrient absorption and metabolism, abnormal energy metabolism, the cardio-pulmonary function, nerve system function, etc., which may be related to the therapeutic effects of these prescriptions in terms of several clinical symptoms, such as expiratory dyspnea, chest tightness and shortness of breath, abdominal distension and constipation, sweating and limb cold, dizziness, and irritability, etc. The above findings reflect the integrative actions of TCM characterizing by multiple-components, multiple-targets, multiple-pathways, and multiple-effects. This study systematically constructed the molecular networks of different TCM syndromes during the development and progression of COVID-19 and uncovered the biological basis for symptomatic treatment of TCM. Furthermore, our data revealed the pharmacological mechanisms and the scientific connotation of recommended prescriptions, which may provide supports for the prevention and treatment of COVID-19 using TCM.",2020-04-01 +31325175,Spinophilin regulates phosphorylation and interactions of the GluN2B subunit of the N-methyl-d-aspartate receptor.,"N-methyl-d-Aspartate receptors (NMDARs) are abundant postsynaptic proteins that are critical for normal synaptic communication. NMDAR channel function is regulated by multiple properties, including phosphorylation. Inhibition of protein phosphatase 1 (PP1) in hippocampal neurons increases NMDAR activity, an effect abrogated by loss of spinophilin, the major PP1-targeting protein in the postsynaptic density. However, how spinophilin regulates PP1-dependent NMDAR function is unclear. We hypothesize that spinophilin regulates PP1 binding to the NMDAR to alter NMDAR phosphorylation. Our data demonstrate that spinophilin interacts with the GluN2B subunit of the NMDAR. In human embryonic kidney 293 FT cells, activation and/or overexpression of protein kinase A increased the association between spinophilin and the GluN2B subunit of the NMDAR. Functionally, we found that spinophilin overexpression decreased PP1 binding to the GluN2B subunit of the NMDAR and attenuated the PP1-dependent dephosphorylation of GluN2B at Ser-1284. Moreover, in P28 hippocampal lysates isolated from spinophilin KO compared to WT mice, there was increased binding of GluN2B to PP1, decreased phosphorylation of GluN2B at Ser-1284, and altered GluN2B protein interactions with postsynaptic density-enriched proteins. Together, our data demonstrate that spinophilin decreases PP1 binding to GluN2B and concomitantly enhances the phosphorylation of GluN2B at Ser-1284. The putative consequences of these spinophilin-dependent alterations in GluN2B phosphorylation and interactions on synaptic GluN2B localization and function are discussed. Open Science: This manuscript was awarded with the Open Materials Badge For more information see: https://cos.io/our-services/open-science-badges/.",2019-08-02 +30835258,A longitudinal neuroimaging dataset on arithmetic processing in school children.,"We describe functional and structural data acquired using a 3T scanner in a sample of 132 typically developing children, who were scanned when they were approximately 11 years old (i.e. Time 1). Sixty-three of them were scanned again approximately 2 years later (i.e. Time 2). Children performed four tasks inside the scanner: two arithmetic tasks and two localizer tasks. The arithmetic tasks were a single-digit multiplication and a single-digit subtraction task. The localizer tasks, a written rhyming judgment task and a numerosity judgment task, were used to independently identify verbal and quantity brain areas, respectively. Additionally, we provide data on behavioral performance on the tasks inside the scanner, participants' scores on standardized tests, including reading and math skill, and a developmental history questionnaire completed by parents. This dataset could be useful to answer questions regarding the neural bases of the development of math in children and its relation to individual differences in skill. The data, entitled ""Brain Correlates of Math Development"", are freely available from OpenNeuro (https://openneuro.org).",2019-03-05 +29580709,Impact of Surgical Factors on Robotic Partial Nephrectomy Outcomes: Comprehensive Systematic Review and Meta-Analysis.,"

Purpose

Utilization of robotic partial nephrectomy has increased significantly. We report a literature wide systematic review and cumulative meta-analysis to critically evaluate the impact of surgical factors on the operative, perioperative, functional, oncologic and survival outcomes in patients undergoing robotic partial nephrectomy.

Materials and methods

All English language publications on robotic partial nephrectomy comparing various surgical approaches were evaluated. We followed the PRISMA (Preferred Reporting Items for Systematic Review and Meta-Analyses) statement and AHRQ (Agency for Healthcare Research and Quality) guidelines to evaluate PubMed®, Scopus® and Web of Science™ databases (January 1, 2000 to October 31, 2016, updated June 2017). Weighted mean difference and odds ratio were used to compare continuous and dichotomous variables, respectively. Sensitivity analyses were performed as needed. To condense the sheer volume of analyses, for the first time data are presented using novel summary forest plots. The study was registered at PROSPERO (https://www.crd.york.ac.uk/prospero/, ID CRD42017062712).

Results

Our meta-analysis included 20,282 patients. When open partial nephrectomy was compared to robotic partial nephrectomy, the latter was superior for blood loss (weighted mean difference 85.01, p  <0.00001), transfusions (OR 1.81, p <0.001), complications (OR 1.87, p <0.00001), hospital stay (weighted mean difference 2.26, p = 0.001), readmissions (OR 2.58, p = 0.005), percentage reduction of latest estimated glomerular filtration rate (weighted mean difference 0.37, p = 0.04), overall mortality (OR 4.45, p <0.0001) and recurrence rate (OR 5.14, p <0.00001). Sensitivity analyses adjusting for baseline disparities revealed similar findings. When robotic partial nephrectomy was compared to laparoscopic partial nephrectomy, the former was superior for ischemia time (weighted mean difference 4.21, p <0.0001), conversion rate (OR 2.61, p = 0.002), intraoperative (OR 2.05, p >0.0001) and postoperative complications (OR 1.27, p = 0.0003), positive margins (OR 2.01, p <0.0001), percentage decrease of latest estimated glomerular filtration rate (weighted mean difference -1.97, p = 0.02) and overall mortality (OR 2.98, p = 0.04). Hilar control techniques, selective and unclamped, are effective alternatives to clamped robotic partial nephrectomy. An important limitation is the overall suboptimal level of evidence of publications in the field of robotic partial nephrectomy. No level I prospective randomized data are available. Oxford level of evidence was level II, III and IV in 5%, 74% and 21% of publications, respectively. No study has indexed functional outcomes against volume of parenchyma preserved.

Conclusions

Based on the contemporary literature, our comprehensive meta-analysis indicates that robotic partial nephrectomy delivers mostly superior, and at a minimum equivalent, outcomes compared to open and laparoscopic partial nephrectomy. Robotics has now matured into an excellent approach for performing partial nephrectomy for renal masses.",2018-03-24 +31066451,"CPGAVAS2, an integrated plastome sequence annotator and analyzer.","We previously developed a web server CPGAVAS for annotation, visualization and GenBank submission of plastome sequences. Here, we upgrade the server into CPGAVAS2 to address the following challenges: (i) inaccurate annotation in the reference sequence likely causing the propagation of errors; (ii) difficulty in the annotation of small exons of genes petB, petD and rps16 and trans-splicing gene rps12; (iii) lack of annotation for other genome features and their visualization, such as repeat elements; and (iv) lack of modules for diversity analysis of plastomes. In particular, CPGAVAS2 provides two reference datasets for plastome annotation. The first dataset contains 43 plastomes whose annotation have been validated or corrected by RNA-seq data. The second one contains 2544 plastomes curated with sequence alignment. Two new algorithms are also implemented to correctly annotate small exons and trans-splicing genes. Tandem and dispersed repeats are identified, whose results are displayed on a circular map together with the annotated genes. DNA-seq and RNA-seq data can be uploaded for identification of single-nucleotide polymorphism sites and RNA-editing sites. The results of two case studies show that CPGAVAS2 annotates better than several other servers. CPGAVAS2 will likely become an indispensible tool for plastome research and can be accessed from http://www.herbalgenomics.org/cpgavas2.",2019-07-01 +31412138,Boosting phosphorylation site prediction with sequence feature-based machine learning.,"Protein phosphorylation is one of the essential posttranslation modifications playing a vital role in the regulation of many fundamental cellular processes. We propose a LightGBM-based computational approach that uses evolutionary, geometric, sequence environment, and amino acid-specific features to decipher phosphate binding sites from a protein sequence. Our method, while compared with other existing methods on 2429 protein sequences taken from standard Phospho.ELM (P.ELM) benchmark data set featuring 11 organisms reports a higher F1 score = 0.504 (harmonic mean of the precision and recall) and ROC AUC = 0.836 (area under the curve of the receiver operating characteristics). The computation time of our proposed approach is much less than that of the recently developed deep learning-based framework. Structural analysis on selected protein sequences informs that our prediction is the superset of the phosphorylation sites, as mentioned in P.ELM data set. The foundation of our scheme is manual feature engineering and a decision tree-based classification. Hence, it is intuitive, and one can interpret the final tree as a set of rules resulting in a deeper understanding of the relationships between biophysical features and phosphorylation sites. Our innovative problem transformation method permits more control over precision and recall as is demonstrated by the fact that if we incorporate output probability of the existing deep learning framework as an additional feature, then our prediction improves (F1 score = 0.546; ROC AUC = 0.849). The implementation of our method can be accessed at http://cse.iitkgp.ac.in/~pralay/resources/PPSBoost/ and is mirrored at https://cosmos.iitkgp.ac.in/PPSBoost.",2019-08-22 +28802948,Modelling the toxicity of a large set of metal and metal oxide nanoparticles using the OCHEM platform.,"Inorganic nanomaterials have become one of the new areas of modern knowledge and technology and have already found an increasing number of applications. However, some nanoparticles show toxicity to living organisms, and can potentially have a negative influence on environmental ecosystems. While toxicity can be determined experimentally, such studies are time consuming and costly. Computational toxicology can provide an alternative approach and there is a need to develop methods to reliably assess Quantitative Structure-Property Relationships for nanomaterials (nano-QSPRs). Importantly, development of such models requires careful collection and curation of data. This article overviews freely available nano-QSPR models, which were developed using the Online Chemical Modeling Environment (OCHEM). Multiple data on toxicity of nanoparticles to different living organisms were collected from the literature and uploaded in the OCHEM database. The main characteristics of nanoparticles such as chemical composition of nanoparticles, average particle size, shape, surface charge and information about the biological test species were used as descriptors for developing QSPR models. QSPR methodologies used Random Forests (WEKA-RF), k-Nearest Neighbors and Associative Neural Networks. The predictive ability of the models was tested through cross-validation, giving cross-validated coefficients q2 = 0.58-0.80 for regression models and balanced accuracies of 65-88% for classification models. These results matched the predictions for the test sets used to develop the models. The proposed nano-QSPR models and uploaded data are freely available online at http://ochem.eu/article/103451 and can be used for estimation of toxicity of new and emerging nanoparticles at the early stages of nanomaterial development.",2017-08-09 +31950866,The Promises and Challenges of Toxico-Epigenomics: Environmental Chemicals and Their Impacts on the Epigenome.,"BACKGROUND:It has been estimated that a substantial portion of chronic and noncommunicable diseases can be caused or exacerbated by exposure to environmental chemicals. Multiple lines of evidence indicate that early life exposure to environmental chemicals at relatively low concentrations could have lasting effects on individual and population health. Although the potential adverse effects of environmental chemicals are known to the scientific community, regulatory agencies, and the public, little is known about the mechanistic basis by which these chemicals can induce long-term or transgenerational effects. To address this question, epigenetic mechanisms have emerged as the potential link between genetic and environmental factors of health and disease. OBJECTIVES:We present an overview of epigenetic regulation and a summary of reported evidence of environmental toxicants as epigenetic disruptors. We also discuss the advantages and challenges of using epigenetic biomarkers as an indicator of toxicant exposure, using measures that can be taken to improve risk assessment, and our perspectives on the future role of epigenetics in toxicology. DISCUSSION:Until recently, efforts to apply epigenomic data in toxicology and risk assessment were restricted by an incomplete understanding of epigenomic variability across tissue types and populations. This is poised to change with the development of new tools and concerted efforts by researchers across disciplines that have led to a better understanding of epigenetic mechanisms and comprehensive maps of epigenomic variation. With the foundations now in place, we foresee that unprecedented advancements will take place in the field in the coming years. https://doi.org/10.1289/EHP6104.",2020-01-17 +31423613,GalaxyDock3: Protein-ligand docking that considers the full ligand conformational flexibility.,"Predicting conformational changes of both the protein and the ligand is a major challenge when a protein-ligand complex structure is predicted from the unbound protein and ligand structures. Herein, we introduce a new protein-ligand docking program called GalaxyDock3 that considers the full ligand conformational flexibility by explicitly sampling the ligand ring conformation and allowing the relaxation of the full ligand degrees of freedom, including bond angles and lengths. This method is based on the previous version (GalaxyDock2) which performs the global optimization of a designed score function. Ligand ring conformation is sampled from a ring conformation library constructed from structure databases. The GalaxyDock3 score function was trained with an additional bonded energy term for the ligand on a large set of complex structures. The performance of GalaxyDock3 was improved compared to GalaxyDock2 when predicted ligand conformation was used as the input for docking, especially when the input ligand conformation differs significantly from the crystal conformation. GalaxyDock3 also compared favorably with other available docking programs on two benchmark tests that contained diverse ligand rings. The program is freely available at http://galaxy.seoklab.org/softwares/galaxydock.html. © 2019 Wiley Periodicals, Inc.",2019-08-19 +30500871,DiTaxa: nucleotide-pair encoding of 16S rRNA for host phenotype and biomarker detection.,"

Summary

Identifying distinctive taxa for micro-biome-related diseases is considered key to the establishment of diagnosis and therapy options in precision medicine and imposes high demands on the accuracy of micro-biome analysis techniques. We propose an alignment- and reference- free subsequence based 16S rRNA data analysis, as a new paradigm for micro-biome phenotype and biomarker detection. Our method, called DiTaxa, substitutes standard operational taxonomic unit (OTU)-clustering by segmenting 16S rRNA reads into the most frequent variable-length subsequences. We compared the performance of DiTaxa to the state-of-the-art methods in phenotype and biomarker detection, using human-associated 16S rRNA samples for periodontal disease, rheumatoid arthritis and inflammatory bowel diseases, as well as a synthetic benchmark dataset. DiTaxa performed competitively to the k-mer based state-of-the-art approach in phenotype prediction while outperforming the OTU-based state-of-the-art approach in finding biomarkers in both resolution and coverage evaluated over known links from literature and synthetic benchmark datasets.

Availability and implementation

DiTaxa is available under the Apache 2 license at http://llp.berkeley.edu/ditaxa.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +31951607,A new method for inferring timetrees from temporally sampled molecular sequences.,"Pathogen timetrees are phylogenies scaled to time. They reveal the temporal history of a pathogen spread through the populations as captured in the evolutionary history of strains. These timetrees are inferred by using molecular sequences of pathogenic strains sampled at different times. That is, temporally sampled sequences enable the inference of sequence divergence times. Here, we present a new approach (RelTime with Dated Tips [RTDT]) to estimating pathogen timetrees based on a relative rate framework underlying the RelTime approach that is algebraic in nature and distinct from all other current methods. RTDT does not require many of the priors demanded by Bayesian approaches, and it has light computing requirements. In analyses of an extensive collection of computer-simulated datasets, we found the accuracy of RTDT time estimates and the coverage probabilities of their confidence intervals (CIs) to be excellent. In analyses of empirical datasets, RTDT produced dates that were similar to those reported in the literature. In comparative benchmarking with Bayesian and non-Bayesian methods (LSD, TreeTime, and treedater), we found that no method performed the best in every scenario. So, we provide a brief guideline for users to select the most appropriate method in empirical data analysis. RTDT is implemented for use via a graphical user interface and in high-throughput settings in the newest release of cross-platform MEGA X software, freely available from http://www.megasoftware.net.",2020-01-17 +31725859,"Extension modules for storage, visualization and querying of genomic, genetic and breeding data in Tripal databases. ","Tripal is an open-source database platform primarily used for development of genomic, genetic and breeding databases. We report here on the release of the Chado Loader, Chado Data Display and Chado Search modules to extend the functionality of the core Tripal modules. These new extension modules provide additional tools for (1) data loading, (2) customized visualization and (3) advanced search functions for supported data types such as organism, marker, QTL/Mendelian Trait Loci, germplasm, map, project, phenotype, genotype and their respective metadata. The Chado Loader module provides data collection templates in Excel with defined metadata and data loaders with front end forms. The Chado Data Display module contains tools to visualize each data type and the metadata which can be used as is or customized as desired. The Chado Search module provides search and download functionality for the supported data types. Also included are the tools to visualize map and species summary. The use of materialized views in the Chado Search module enables better performance as well as flexibility of data modeling in Chado, allowing existing Tripal databases with different metadata types to utilize the module. These Tripal Extension modules are implemented in the Genome Database for Rosaceae (rosaceae.org), CottonGen (cottongen.org), Citrus Genome Database (citrusgenomedb.org), Genome Database for Vaccinium (vaccinium.org) and the Cool Season Food Legume Database (coolseasonfoodlegume.org). Database URL: https://www.citrusgenomedb.org/, https://www.coolseasonfoodlegume.org/, https://www.cottongen.org/, https://www.rosaceae.org/, https://www.vaccinium.org/.",2017-01-01 +30553884,GITAR: An Open Source Tool for Analysis and Visualization of Hi-C Data.,"Interactions between chromatin segments play a large role in functional genomic assays and developments in genomic interaction detection methods have shown interacting topological domains within the genome. Among these methods, Hi-C plays a key role. Here, we present the Genome Interaction Tools and Resources (GITAR), a software to perform a comprehensive Hi-C data analysis, including data preprocessing, normalization, and visualization, as well as analysis of topologically-associated domains (TADs). GITAR is composed of two main modules: (1) HiCtool, a Python library to process and visualize Hi-C data, including TAD analysis; and (2) processed data library, a large collection of human and mouse datasets processed using HiCtool. HiCtool leads the user step-by-step through a pipeline, which goes from the raw Hi-C data to the computation, visualization, and optimized storage of intra-chromosomal contact matrices and TAD coordinates. A large collection of standardized processed data allows the users to compare different datasets in a consistent way, while saving time to obtain data for visualization or additional analyses. More importantly, GITAR enables users without any programming or bioinformatic expertise to work with Hi-C data. GITAR is publicly available at http://genomegitar.org as an open-source software.",2018-10-01 +31510698,Prediction of mRNA subcellular localization using deep recurrent neural networks.,"

Motivation

Messenger RNA subcellular localization mechanisms play a crucial role in post-transcriptional gene regulation. This trafficking is mediated by trans-acting RNA-binding proteins interacting with cis-regulatory elements called zipcodes. While new sequencing-based technologies allow the high-throughput identification of RNAs localized to specific subcellular compartments, the precise mechanisms at play, and their dependency on specific sequence elements, remain poorly understood.

Results

We introduce RNATracker, a novel deep neural network built to predict, from their sequence alone, the distributions of mRNA transcripts over a predefined set of subcellular compartments. RNATracker integrates several state-of-the-art deep learning techniques (e.g. CNN, LSTM and attention layers) and can make use of both sequence and secondary structure information. We report on a variety of evaluations showing RNATracker's strong predictive power, which is significantly superior to a variety of baseline predictors. Despite its complexity, several aspects of the model can be isolated to yield valuable, testable mechanistic hypotheses, and to locate candidate zipcode sequences within transcripts.

Availability and implementation

Code and data can be accessed at https://www.github.com/HarveyYan/RNATracker.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +27141756,DEVELOPMENT OF AN ANNOTATION SCHEME FOR STANDARDIZED DESCRIPTION OF MATHEMATICAL MODELS IN THE FIELD OF PLANT PROTECTION.,"Mathematical models on properties and behavior of harmful organisms in the food chain are an increas- ingly relevant approach of the agriculture and food industry. As a consequence, there are many efforts to develop biological models in science, economics and risk assessment nowadays. However, there is a lack of international harmonized standards on model annotation and model formats, which would be neces- sary to set up efficient tools supporting broad model application and information exchange. There are some established standards in the field of systems biology, but there is currently no corresponding provi- sion in the area of plant protection. This work therefore aimed at the development of an annotation scheme using domain-specific metadata. The proposed scheme has been validated in a prototype implementation of a web-database model repository. This prototypic community resource currently contains models on aflatoxin secreting fungal Aspergillus flavus in maize, as these models have a high relevance to food safety and economic impact. Specifically, models describing biological processes of the fungus (growth, Aflatoxin secreting), as well as dose-response- and carry over models were included. Furthermore, phenological models for maize were integrated as well. The developed annotation scheme is based on the well-established data exchange format SBML, which is broadly applied in the field of systems biology. The identified example models were annotated according to the developed scheme and entered into a Web-table (Google Sheets), which was transferred to a web based demonstrator available at https://sites.google.com/site/test782726372685/. By implementation of a software demonstrator it became clear that the proposed annotation scheme can be applied to models on plant pathogens and that broad adoption within the domain could promote communication and application of mathematical models.",2015-01-01 +31197309,iTUPA: an online automated application to perform Topographic-Unit Parsimony Analysis.,"

Summary

iTUPA is a free online application for automatizing the Topographic-Unit Parsimony Analysis (TUPA), which identifies areas of endemism based on topography. iTUPA generates species-occurrences matrices based on user-defined topographic units (TUs) and provides a parsimony analysis of the generated matrix. We tested iTUPA after a proposal of regionalization for the Brazilian Atlantic Forest. iTUPA can handle millions of species registers simultaneously and uses Google Earth high-definition maps to visually explore the endemism data. We believe iTUPA is a useful tool for further discussions on biodiversity conservation.

Availability and implementation

iTUPA is hosted on Google cloud and freely available at http://nuvem.ufabc.edu.br/itupa. iTUPA is implemented using R (version 3.5.1), with RStudio 1.1.453 used as the implementation IDE, Shiny 1.1.0 web framework, and Google Maps® API version 3.36.",2019-11-01 +31135038,Seq2Feature: a comprehensive web-based feature extraction tool.,"

Motivation

Machine learning techniques require various descriptors from protein and nucleic acid sequences to understand/predict their structure and function as well as distinguishing between disease and neutral mutations. Hence, availability of a feature extraction tool is necessary to bridge the gap.

Results

We developed a comprehensive web-based tool, Seq2Feature, which computes 252 protein and 41 DNA sequence-based descriptors. These features include physicochemical, energetic and conformational properties of proteins, mutation matrices and contact potentials as well as nucleotide composition, physicochemical and conformational properties of DNA. We propose that Seq2Feature could serve as an effective tool for extracting protein and DNA sequence-based features as applicable inputs to machine learning algorithms.

Availability and implementation

https://www.iitm.ac.in/bioinfo/SBFE/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +23793747,Linc2GO: a human LincRNA function annotation resource based on ceRNA hypothesis.,"

Unlabelled

Large numbers of long intergenic non-coding RNA (lincRNA) have been detected through high-throughput sequencing technology. However, currently we still know very little about their functions. Therefore, a lincRNA function annotation database is needed to facilitate the study in this field. In this article, we present Linc2GO, a web resource that aims to provide comprehensive functional annotations for human lincRNA. MicroRNA-mRNA and microRNA-lincRNA interaction data were integrated to generate lincRNA functional annotations based on the 'competing endogenous RNA hypothesis'. To the best of our knowledge, Linc2GO is the first database that makes use of the 'competing endogenous RNA hypothesis' to predict lincRNA functions.

Availability

Freely available at http://www.bioinfo.tsinghua.edu.cn/~liuke/Linc2GO/index.html",2013-06-22 +29876463,"ESI-LC-MS based-metabolomics data of mangosteen (Garcinia mangostana Linn.) fruit pericarp, aril and seed at different ripening stages.","Fruit ripening is a complex phenomenon involving a series of biochemical, physiological and organoleptic changes. Ripening process in mangosteen (Garcinia mangostana Linn.) is unique of which the fruit will only ripen properly if harvested during its middle stage (emergence of purple/pink colour) but not earlier (green stage). The knowledge on the molecular mechanism and regulation behind this phenomenon is still limited. Hence, electrospray ionization liquid chromatography mass spectrometry (ESI-LC-MS) based metabolomics analysis was applied to determine the metabolome of mangosteen ripening. Specifically, mangosteen pericarp, aril and seed were collected at four different ripening stages (stage 0: green, stage 2: yellowish with pink patches, stage 4: brownish red and stage 6: dark purple) and subjected to metabolite profiling analysis. The data provided in this article have been deposited to the EMBL-EBI MetaboLights database (DOI: 10.1093/nar/gks1004. PubMed PMID: 23109552) with the identifier MTBLS595. The complete dataset can be accessed here https://www.ebi.ac.uk/metabolights/MTBLS595.",2018-02-15 +29028897,Sipros Ensemble improves database searching and filtering for complex metaproteomics.,"Motivation:Complex microbial communities can be characterized by metagenomics and metaproteomics. However, metagenome assemblies often generate enormous, and yet incomplete, protein databases, which undermines the identification of peptides and proteins in metaproteomics. This challenge calls for increased discrimination of true identifications from false identifications by database searching and filtering algorithms in metaproteomics. Results:Sipros Ensemble was developed here for metaproteomics using an ensemble approach. Three diverse scoring functions from MyriMatch, Comet and the original Sipros were incorporated within a single database searching engine. Supervised classification with logistic regression was used to filter database searching results. Benchmarking with soil and marine microbial communities demonstrated a higher number of peptide and protein identifications by Sipros Ensemble than MyriMatch/Percolator, Comet/Percolator, MS-GF+/Percolator, Comet & MyriMatch/iProphet and Comet & MyriMatch & MS-GF+/iProphet. Sipros Ensemble was computationally efficient and scalable on supercomputers. Availability and implementation:Freely available under the GNU GPL license at http://sipros.omicsbio.org. Contact:cpan@utk.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-03-01 +29785409,De novo genome and transcriptome resources of the Adzuki bean borer Ostrinia scapulalis (Lepidoptera: Crambidae).,"We present a draft genome assembly with a de novo prediction and automated functional annotation of coding genes, and a reference transcriptome of the Adzuki bean borer, Ostrinia scapulalis, based on RNA sequencing of various tissues and developmental stages. The genome assembly spans 419 Mb, has a GC content of 37.4% and includes 26,120 predicted coding genes. The reference transcriptome holds 33,080 unigenes and contains a high proportion of a set of genes conserved in eukaryotes and arthropods, used as quality assessment of the reconstructed transcripts. The new genomic and transcriptomic data presented here significantly enrich the public sequence databases for the Crambidae and Lepidoptera, and represent useful resources for future researches related to the evolution and the adaptation of phytophagous moths. The genome and transcriptome assemblies have been deposited and made accessible via a NCBI BioProject (id PRJNA390510) and the LepidoDB database (http://bipaa.genouest.org/sp/ostrinia_scapulalis/).",2018-02-01 +31283070,"VIPdb, a genetic Variant Impact Predictor Database.","Genome sequencing identifies vast number of genetic variants. Predicting these variants' molecular and clinical effects is one of the preeminent challenges in human genetics. Accurate prediction of the impact of genetic variants improves our understanding of how genetic information is conveyed to molecular and cellular functions, and is an essential step towards precision medicine. Over one hundred tools/resources have been developed specifically for this purpose. We summarize these tools as well as their characteristics, in the genetic Variant Impact Predictor Database (VIPdb). This database will help researchers and clinicians explore appropriate tools, and inform the development of improved methods. VIPdb can be browsed and downloaded at https://genomeinterpretation.org/vipdb.",2019-08-17 +30753300,PairedFB: a full hierarchical Bayesian model for paired RNA-seq data with heterogeneous treatment effects.,"

Motivation

Several methods have been proposed for the paired RNA-seq analysis. However, many of them do not consider the heterogeneity in treatment effect among pairs that can naturally arise in real data. In addition, it has been reported in literature that the false discovery rate (FDR) control of some popular methods has been problematic. In this paper, we present a full hierarchical Bayesian model for the paired RNA-seq count data that accounts for variation of treatment effects among pairs and controls the FDR through the posterior expected FDR.

Results

Our simulation studies show that most competing methods can have highly inflated FDR for small to moderate sample sizes while PairedFB is able to control FDR close to the nominal levels. Furthermore, PairedFB has overall better performance in ranking true differentially expressed genes (DEGs) on the top than others, especially when the sample size gets bigger or when the heterogeneity level of treatment effects is high. In addition, PairedFB can be applied to identify the biologically significant DEGs with controlled FDR. The real data analysis also indicates PairedFB tends to find more biologically relevant genes even when the sample size is small. PairedFB is also shown to be robust with respect to the model misspecification in terms of its relative performance compared to others.

Availability and implementation

Software to implement this method (PairedFB) can be downloaded at: https://sites.google.com/a/udel.edu/qiujing/publication.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +24170808,MitoBreak: the mitochondrial DNA breakpoints database.,"Mitochondrial DNA (mtDNA) rearrangements are key events in the development of many diseases. Investigations of mtDNA regions affected by rearrangements (i.e. breakpoints) can lead to important discoveries about rearrangement mechanisms and can offer important clues about the causes of mitochondrial diseases. Here, we present the mitochondrial DNA breakpoints database (MitoBreak; http://mitobreak.portugene.com), a free, web-accessible comprehensive list of breakpoints from three classes of somatic mtDNA rearrangements: circular deleted (deletions), circular partially duplicated (duplications) and linear mtDNAs. Currently, MitoBreak contains >1400 mtDNA rearrangements from seven species (Homo sapiens, Mus musculus, Rattus norvegicus, Macaca mulatta, Drosophila melanogaster, Caenorhabditis elegans and Podospora anserina) and their associated phenotypic information collected from nearly 400 publications. The database allows researchers to perform multiple types of data analyses through user-friendly interfaces with full or partial datasets. It also permits the download of curated data and the submission of new mtDNA rearrangements. For each reported case, MitoBreak also documents the precise breakpoint positions, junction sequences, disease or associated symptoms and links to the related publications, providing a useful resource to study the causes and consequences of mtDNA structural alterations.",2013-10-28 +,Orphan Crops Browser: a bridge between model and orphan crops,"Many important crops have received little attention by the scientific community, either because they are not considered economically important or due to their large and complex genomes. De novo transcriptome assembly, using next-generation sequencing data, is an attractive option for the study of these orphan crops. In spite of the large amount of sequencing data that can be generated, there is currently a lack of tools which can effectively help molecular breeders and biologists to mine this type of information. Our goal was to develop a tool that enables molecular breeders, without extensive bioinformatics knowledge, to efficiently study de novo transcriptome data from any orphan crop (http://www.bioinformatics.nl/denovobrowser/db/species/index). The Orphan Crops Browser has been designed to facilitate the following tasks (1) search and identification of candidate transcripts based on phylogenetic relationships between orthologous sequence data from a set of related species and (2) design specific and degenerate primers for expression studies in the orphan crop of interest. To demonstrate the usability and reliability of the browser, it was used to identify the putative orthologues of 17 known lignin biosynthetic genes from maize and sugarcane in the orphan crop Miscanthus sinensis. Expression studies in miscanthus stem internode tissue differing in maturation were subsequently carried out, to follow the expression of these genes during lignification. Our results showed a negative correlation between lignin content and gene expression. The present data are in agreement with recent findings in maize and other crops, and it is further discussed in this paper.",2016-01-01 +21394448,GIDMP: good protein-protein interaction data metamining practice.,"Studying the interactome is one of the exciting frontiers of proteomics, as shown lately at the recent bioinformatics conferences (for example ISMB 2010, or ECCB 2010). Distribution of data is facilitated by a large number of databases. Metamining databases have been created in order to allow researchers access to several databases in one search, but there are serious difficulties for end users to evaluate the metamining effort. Therefore we suggest a new standard, ""Good Interaction Data Metamining Practice"" (GIDMP), which could be easily automated and requires only very minor inclusion of statistical data on each database homepage. Widespread adoption of the GIDMP standard would provide users with: a standardized way to evaluate the statistics provided by each metamining database, thus enhancing the end-user experience; a stable contact point for each database, allowing the smooth transition of statistics; a fully automated system, enhancing time- and cost-effectiveness. The proposed information can be presented as a few hidden lines of text on the source database www page, and a constantly updated table for a metamining database included in the source/credits web page.",2011-03-09 +31317060,Descriptive statistics and visualization of data from the R datasets package with implications for clusterability.,"The manuscript describes and visualizes datasets from the datasets package in the R statistical software, focusing on descriptive statistics and visualizations that provide insights into the clusterability of these datasets. These publicly available datasets are contained in the R software system, and can be downloaded at https://www.r-project.org/, with documentation provided at https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html. Further information on clusterability is found in the companion to this article, To Cluster or Not to Cluster: An Analysis of Clusterability Methods? (https://doi.org/10.1016/j.patcog.2018.10.026). Brief descriptions and graphs of the variables contained in each dataset are provided in the form of means, extrema, quartiles, standard deviation and standard error. Two-dimensional plots for each pair of variables are provided. Original references to the data sets are included when available. Further, each dataset is reduced to a single dimension by each of two different methods: pairwise distances and principal component analysis. For the latter, only the first component is used. Histograms of the reduced data are included for every dataset using both methods.",2019-05-24 +31083984,QSAR-Co: An Open Source Software for Developing Robust Multitasking or Multitarget Classification-Based QSAR Models.,"Quantitative structure-activity relationships (QSAR) modeling is a well-known computational technique with wide applications in fields such as drug design, toxicity predictions, nanomaterials, etc. However, QSAR researchers still face certain problems to develop robust classification-based QSAR models, especially while handling response data pertaining to diverse experimental and/or theoretical conditions. In the present work, we have developed an open source standalone software ""QSAR-Co"" (available to download at https://sites.google.com/view/qsar-co ) to setup classification-based QSAR models that allow mining the response data coming from multiple conditions. The software comprises two modules: (1) the Model development module and (2) the Screen/Predict module. This user-friendly software provides several functionalities required for developing a robust multitasking or multitarget classification-based QSAR model using linear discriminant analysis or random forest techniques, with appropriate validation, following the principles set by the Organisation for Economic Co-operation and Development (OECD) for applying QSAR models in regulatory assessments.",2019-05-24 +33575571,RNAsamba: neural network-based assessment of the protein-coding potential of RNA sequences.,"The advent of high-throughput sequencing technologies made it possible to obtain large volumes of genetic information, quickly and inexpensively. Thus, many efforts are devoted to unveiling the biological roles of genomic elements, being the distinction between protein-coding and long non-coding RNAs one of the most important tasks. We describe RNAsamba, a tool to predict the coding potential of RNA molecules from sequence information using a neural network-based that models both the whole sequence and the ORF to identify patterns that distinguish coding from non-coding transcripts. We evaluated RNAsamba's classification performance using transcripts coming from humans and several other model organisms and show that it recurrently outperforms other state-of-the-art methods. Our results also show that RNAsamba can identify coding signals in partial-length ORFs and UTR sequences, evidencing that its algorithm is not dependent on complete transcript sequences. Furthermore, RNAsamba can also predict small ORFs, traditionally identified with ribosome profiling experiments. We believe that RNAsamba will enable faster and more accurate biological findings from genomic data of species that are being sequenced for the first time. A user-friendly web interface, the documentation containing instructions for local installation and usage, and the source code of RNAsamba can be found at https://rnasamba.lge.ibi.unicamp.br/.",2020-01-13 +22923302,HSPIR: a manually annotated heat shock protein information resource.,"

Summary

Heat shock protein information resource (HSPIR) is a concerted database of six major heat shock proteins (HSPs), namely, Hsp70, Hsp40, Hsp60, Hsp90, Hsp100 and small HSP. The HSPs are essential for the survival of all living organisms, as they protect the conformations of proteins on exposure to various stress conditions. They are a highly conserved group of proteins involved in diverse physiological functions, including de novo folding, disaggregation and protein trafficking. Moreover, their critical role in the control of disease progression made them a prime target of research. Presently, limited information is available on HSPs in reference to their identification and structural classification across genera. To that extent, HSPIR provides manually curated information on sequence, structure, classification, ontology, domain organization, localization and possible biological functions extracted from UniProt, GenBank, Protein Data Bank and the literature. The database offers interactive search with incorporated tools, which enhances the analysis. HSPIR is a reliable resource for researchers exploring structure, function and evolution of HSPs.

Availability

http://pdslab.biochem.iisc.ernet.in/hspir/",2012-08-24 +23443684,Protein complex-based analysis framework for high-throughput data sets.,"Analysis of high-throughput data increasingly relies on pathway annotation and functional information derived from Gene Ontology. This approach has limitations, in particular for the analysis of network dynamics over time or under different experimental conditions, in which modules within a network rather than complete pathways might respond and change. We report an analysis framework based on protein complexes, which are at the core of network reorganization. We generated a protein complex resource for human, Drosophila, and yeast from the literature and databases of protein-protein interaction networks, with each species having thousands of complexes. We developed COMPLEAT (http://www.flyrnai.org/compleat), a tool for data mining and visualization for complex-based analysis of high-throughput data sets, as well as analysis and integration of heterogeneous proteomics and gene expression data sets. With COMPLEAT, we identified dynamically regulated protein complexes among genome-wide RNA interference data sets that used the abundance of phosphorylated extracellular signal-regulated kinase in cells stimulated with either insulin or epidermal growth factor as the output. The analysis predicted that the Brahma complex participated in the insulin response.",2013-02-26 +31416842,Curatopes Melanoma: A Database of Predicted T-cell Epitopes from Overly Expressed Proteins in Metastatic Cutaneous Melanoma.,"Therapeutic anticancer vaccination has been adapted as an immunotherapy in several solid tumors. However, the selection of promising candidates from the total quantity of possible epitopes poses a challenge to clinicians and bioinformaticians alike, and very few epitopes have been tested in experimental or clinical settings to validate their efficacy. Here, we present a comprehensive database of predicted nonmutated peptide epitopes derived from genes that are overly expressed in a group of 32 melanoma biopsies compared with healthy tissues and that were filtered against expression in a curated list of survival-critical tissues. We hypothesize that these ""self-tolerant"" epitopes have two desirable properties: they do not depend on mutations, being immediately applicable to a large patient collective, and they potentially cause fewer autoimmune reactions. To support epitope selection, we provide an aggregated score of expected therapeutic efficiency as a shortlist mechanism. The database has applications in facilitating epitope selection and trial design and is freely accessible at https://www.curatopes.com. SIGNIFICANCE: A database is presented that predicts and scores antitumor T-cell epitopes, with a focus on tolerability and avoidance of severe autoimmunity, offering a supplementary epitope set for further investigation in immunotherapy.",2019-08-15 +31518964,"HIrisPlex-S system for eye, hair, and skin color prediction from DNA: Massively parallel sequencing solutions for two common forensically used platforms.","Forensic DNA Phenotyping (FDP) provides the ability to predict externally visible characteristics from minute amounts of crime scene DNA, which can help find unknown perpetrators who are typically unidentifiable via conventional forensic DNA profiling. Fundamental human genetics research has led to a better understanding of the specific DNA variants responsible for physical appearance characteristics, particularly eye, hair, and skin color. Recently, we introduced the HIrisPlex-S system for the simultaneous prediction of eye, hair, and skin color based on 41 DNA variants generated from two forensically validated SNaPshot multiplex assays using capillary electrophoresis (CE). Here we introduce massively parallel sequencing (MPS) solutions for the HIrisPlex-S (HPS) system on two MPS platforms commonly used in forensics, Ion Torrent and MiSeq, that cover all 41 DNA variants in a single assay, respectively. Additionally, we present the forensic developmental validation of the two HPS-MPS assays. The Ion Torrent MPS assay, based on Ion AmpliSeq technology, illustrated the successful generation of full HIrisPlex-S genotypic profiles from 100 pg of input control DNA, while the MiSeq MPS assay based on an in-house design yielded complete profiles from 250 pg of input DNA. Assessing simulated forensic casework samples such as saliva, hair (bulb), blood, semen, and low quantity touch DNA, as well as artificially damaged DNA samples, concordance testing, and samples from numerous species, all illustrated the ability of both versions of the HIrisPlex-S MPS assay to produce results that motivate forensic applications. By also providing an integrated bioinformatics analysis pipeline, MPS data can now be analyzed and a file generated for upload to the publically accessible HIrisPlex online webtool (https://hirisplex.erasmusmc.nl). In addition, we updated the website to accept VCF input data for those with genome sequence data. We thus provide a user-friendly and semi-automated MPS workflow from DNA sample to individual eye, hair, and skin color prediction probabilities. Furthermore, we present a 2-person mixture separation tool that not only assesses genotype reliability with regards genotyping confidence but also provides the most fitting mixture scenario for both minor and major contributors, including profile separation. We envision this MPS implementation of the HIrisPlex-S system for eye, hair, and skin color prediction from DNA as a starting point for further expanding MPS-based forensic DNA phenotyping. This may include the future addition of SNPs predictive for more externally visible characteristics, as well as SNPs for bio-geographic ancestry inference, provided the statistical framework for DNA prediction of these traits is in place.",2019-08-26 +31811682,All-trans retinoic acid (ATRA)-induced TFEB expression is required for myeloid differentiation in acute promyelocytic leukemia (APL).,"OBJECTIVE:In acute promyelocytic leukemia (APL), normal retinoid signaling is disrupted by an abnormal PML-RARα fusion oncoprotein, leading to a block in cell differentiation. Therapeutic concentrations of all-trans-retinoic acid (ATRA) can restore retinoid-induced transcription and promote degradation of the PML-RARα protein. Autophagy is a catabolic pathway that utilizes lysosomal machinery to degrade intracellular material and facilitate cellular re-modeling. Recent studies have identified autophagy as an integral component of ATRA-induced myeloid differentiation. METHODS:As the molecular communication between retinoid signaling and the autophagy pathway is not defined, we performed RNA sequencing of NB4 APL cells treated with ATRA and examined autophagy-related transcripts. RESULTS:ATRA altered the expression of >80 known autophagy-related transcripts, including the key transcriptional regulator of autophagy and lysosomal biogenesis, TFEB (11.5-fold increase). Induction of TFEB and its transcriptional target, sequestosome 1 (SQSTM1, p62), is reduced in ATRA-resistant NB4R cells compared to NB4 cells. TFEB knockdown in NB4 cells alters the expression of transcriptional targets of TFEB and reduces CD11b transcript levels in response to ATRA. CONCLUSIONS:We show for the first time that TFEB plays an important role in ATRA-induced autophagy during myeloid differentiation and that autophagy induction potentiates leukemic cell differentiation (Note: this study includes data obtained from NCT00195156, https://clinicaltrials.gov/show/NCT00195156).",2020-01-13 +25943471,The SwissLipids knowledgebase for lipid biology.,"

Motivation

Lipids are a large and diverse group of biological molecules with roles in membrane formation, energy storage and signaling. Cellular lipidomes may contain tens of thousands of structures, a staggering degree of complexity whose significance is not yet fully understood. High-throughput mass spectrometry-based platforms provide a means to study this complexity, but the interpretation of lipidomic data and its integration with prior knowledge of lipid biology suffers from a lack of appropriate tools to manage the data and extract knowledge from it.

Results

To facilitate the description and exploration of lipidomic data and its integration with prior biological knowledge, we have developed a knowledge resource for lipids and their biology-SwissLipids. SwissLipids provides curated knowledge of lipid structures and metabolism which is used to generate an in silico library of feasible lipid structures. These are arranged in a hierarchical classification that links mass spectrometry analytical outputs to all possible lipid structures, metabolic reactions and enzymes. SwissLipids provides a reference namespace for lipidomic data publication, data exploration and hypothesis generation. The current version of SwissLipids includes over 244 000 known and theoretically possible lipid structures, over 800 proteins, and curated links to published knowledge from over 620 peer-reviewed publications. We are continually updating the SwissLipids hierarchy with new lipid categories and new expert curated knowledge.

Availability

SwissLipids is freely available at http://www.swisslipids.org/.

Contact

alan.bridge@isb-sib.ch

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-05 +29111271,Forensic characteristics and phylogenetic analysis of two Han populations from the southern coastal regions of China using 27 Y-STR loci.,"Currently, the largest national database within the Y-STR Haplotype Reference Database (YHRD, https://yhrd.org, release 55) is China, which has a very large Y-STR haplotype profiles. However, no haplotype data was available for Hainan province, the smallest and southernmost province of China. Herein, 280 unrelated male Chinese Han individuals residing in Hainan province were recruited and genotyped with 27 Y-STR loci. Moreover, 136 Han individuals from Shenzhen, the largest migrant city in China, also investigated. 279 distinct haplotypes were obtained in the Hainan Han individuals with the overall haplotype diversity (HD) and discrimination capacity (DC) were 0.999974 and 0.9964, respectively. 136 unique haplotypes were observed in the Shenzhen individuals, and thus both of the related HD and DC values were 1.0. The two multi-copy loci (DYS385a/b and DYF387S1a/b) and rapidly mutating Y-STRs (RM Y-STRs, DYS449, DYS518, DYS570, DYS576, DYS627) exhibited high genetic diversity (GD) values (GD>0.7) in two Han populations. Furthermore, genetic relationships along Chinese administrative and ethnic divisions were analyzed, and we also combined our data with existing datasets of non-Chinese groups to explore the genetic variance. Genetic differentiations were observed between Northern and Southern Han nationality, and genetic differences existed between the two Han populations and some ethnic groups, most prominently for the Tibetans and Kazakhs, as well as non-Chinese groups, especially African groups.",2017-10-27 +29968566,Beyond the RfD: Broad Application of a Probabilistic Approach to Improve Chemical Dose-Response Assessments for Noncancer Effects.,"

Background

The National Academies recommended risk assessments redefine the traditional noncancer Reference Dose (RfD) as a probabilistically derived risk-specific dose, a framework for which was recently developed by the World Health Organization (WHO).

Objectives

Our aim was to assess the feasibility and implications of replacing traditional RfDs with probabilistic estimates of the human dose associated with an effect magnitude M and population incidence I (HDMI).

Methods

We created a comprehensive, curated database of RfDs derived from animal data and developed a standardized, automated, web-accessible probabilistic dose-response workflow implementing the WHO framework.

Results

We identified 1,464 RfDs and associated endpoints, representing 608 chemicals across many types of effects. Applying our standardized workflow resulted in 1,522 HDMI values. Traditional RfDs are generally within an order of magnitude of the HDMI lower confidence bound for I=1% and M values commonly used for benchmark doses. The greatest contributor to uncertainty was lack of benchmark dose estimates, followed by uncertainty in the extent of human variability. Exposure at the traditional RfD frequently implies an upper 95% confidence bound of several percent of the population affected. Whether such incidences are considered acceptable is likely to vary by chemical and risk context, especially given the wide range of severity of the associated effects, from clinical chemistry to mortality.

Conclusions

Overall, replacing RfDs with HDMI estimates can provide a more consistent, scientifically rigorous, and transparent basis for risk management decisions, as well as support additional decision contexts such as economic benefit-cost analysis, risk-risk tradeoffs, life-cycle impact analysis, and emergency response. https://doi.org/10.1289/EHP3368.",2018-06-28 +31647104,"MicroScope: an integrated platform for the annotation and exploration of microbial gene functions through genomic, pangenomic and metabolic comparative analysis.","Large-scale genome sequencing and the increasingly massive use of high-throughput approaches produce a vast amount of new information that completely transforms our understanding of thousands of microbial species. However, despite the development of powerful bioinformatics approaches, full interpretation of the content of these genomes remains a difficult task. Launched in 2005, the MicroScope platform (https://www.genoscope.cns.fr/agc/microscope) has been under continuous development and provides analysis for prokaryotic genome projects together with metabolic network reconstruction and post-genomic experiments allowing users to improve the understanding of gene functions. Here we present new improvements of the MicroScope user interface for genome selection, navigation and expert gene annotation. Automatic functional annotation procedures of the platform have also been updated and we added several new tools for the functional annotation of genes and genomic regions. We finally focus on new tools and pipeline developed to perform comparative analyses on hundreds of genomes based on pangenome graphs. To date, MicroScope contains data for >11 800 microbial genomes, part of which are manually curated and maintained by microbiologists (>4500 personal accounts in September 2019). The platform enables collaborative work in a rich comparative genomic context and improves community-based curation efforts.",2020-01-01 +30376034,VarSome: the human genomic variant search engine.,"

Summary

VarSome.com is a search engine, aggregator and impact analysis tool for human genetic variation and a community-driven project aiming at sharing global expertise on human variants.

Availability and implementation

VarSome is freely available at http://varsome.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +27401965,New BAR tools for mining expression data and exploring Cis-elements in Arabidopsis thaliana.,"Identifying sets of genes that are specifically expressed in certain tissues or in response to an environmental stimulus is useful for designing reporter constructs, generating gene expression markers, or for understanding gene regulatory networks. We have developed an easy-to-use online tool for defining a desired expression profile (a modification of our Expression Angler program), which can then be used to identify genes exhibiting patterns of expression that match this profile as closely as possible. Further, we have developed another online tool, Cistome, for predicting or exploring cis-elements in the promoters of sets of co-expressed genes identified by such a method, or by other methods. We present two use cases for these tools, which are freely available on the Bio-Analytic Resource at http://BAR.utoronto.ca.",2016-10-05 +31393550,Genome-phenome explorer (GePhEx): a tool for the visualization and interpretation of phenotypic relationships supported by genetic evidence.,"

Motivation

Association studies based on SNP arrays and Next Generation Sequencing technologies have enabled the discovery of thousands of genetic loci related to human diseases. Nevertheless, their biological interpretation is still elusive, and their medical applications limited. Recently, various tools have been developed to help bridging the gap between genomes and phenomes. To our knowledge, however none of these tools allows users to retrieve the phenotype-wide list of genetic variants that may be linked to a given disease or to visually explore the joint genetic architecture of different pathologies.

Results

We present the Genome-Phenome Explorer (GePhEx), a web-tool easing the visual exploration of phenotypic relationships supported by genetic evidences. GePhEx is primarily based on the thorough analysis of linkage disequilibrium between disease-associated variants and also considers relationships based on genes, pathways or drug-targets, leveraging on publicly available variant-disease associations to detect potential relationships between diseases. We demonstrate that GePhEx does retrieve well-known relationships as well as novel ones, and that, thus, it might help shedding light on the patho-physiological mechanisms underlying complex diseases. To this end, we investigate the potential relationship between schizophrenia and lung cancer, first detected using GePhEx and provide further evidence supporting a functional link between them.

Availability and implementation

GePhEx is available at: https://gephex.ega-archive.org/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-02-01 +31201609,Applying EMG technology in medial and lateral elbow enthesopathy treatment using Myo motion controller.,"Electromyography (EMG) is a diagnostic technique allowing for the detection of signals generated by changes in electrical potentials of striated muscles. The application of this technology is becoming an increasingly popular subject of scientific research. With the appearance of new devices retrieving EMG data, novel methods of its processing for various purposes are being developed. One such device is the Myo movement controller, produced by Thalmic Labs (now North). The device has been used for the analysis of muscle activation levels in patients with ""tennis elbow"" and ""golfer's elbow""-conditions of upper limbs which usually result from occupational injuries. The process of their rehabilitation is complex and requires a continuous monitoring of its progress. The data obtained by means of the Myo controller was used for pattern recognition of an injured hand with relation to the healthy one. The study involved examining ten subjects, including five controls. The results indicate that the muscle activation force is considerably lower in injured individuals. The arithmetic mean for the 6 analyzed motions in the injured group is 38.54% lower. The SmartEMG application ( https://www.smartemg.com ) enables the implementation of procedures performed during an examination as well as those involved in the management of the collected recordings. The study produced satisfactory results, which indicates the possibility of using the Myo controller in the treatment of elbow enthesopathy.",2019-06-14 +31409613,The Indenoisoquinoline TOP1 Inhibitors Selectively Target Homologous Recombination-Deficient and Schlafen 11-Positive Cancer Cells and Synergize with Olaparib.,"

Purpose

Irinotecan and topotecan are used to treat a variety of different cancers. However, they have limitations, including chemical instability and severe side effects. To overcome these limitations, we developed the clinical indenoisoquinolines: LMP400 (indotecan), LMP776 (indimitecan), and LMP744. The purpose of the study is to build the molecular rationale for phase II clinical trials.

Experimental design

CellMinerCDB (http://discover.nci.nih.gov/cellminercdb) was used to mine the cancer cell lines genomic databases. The causality of Schlafen11 (SLFN11) was validated in isogenic cell lines. Because topoisomerase I (TOP1)-mediated replication DNA damage is repaired by homologous recombination (HR), we tested the ""synthetic lethality"" of HR-deficient (HRD) cells. Survival and cell-cycle alterations were performed after drug treatments in isogenic DT40, DLD1, and OVCAR cell lines with BRCA1, BRCA2, or PALB2 deficiencies and in organoids cultured from prostate cancer patient-derived xenografts with BRCA2 loss. We also used an ovarian orthotopic allograft model with BRCA1 loss to validate the efficacy of LMP400 and olaparib combination.

Results

CellMinerCDB reveals that SLFN11, which kills cells undergoing replicative stress, is a dominant drug determinant to the clinical indenoisoquinolines. In addition, BRCA1-, BRCA2-, and PALB2-deficient cells were hypersensitive to the indenoisoquinolines. All 3 clinical indenoisoquinolines were also synergistic with olaparib, especially in the HRD cells. The synergy between LMP400 and olaparib was confirmed in the orthotopic allograft model harboring BRCA1 loss.

Conclusions

Our results provide a rationale for molecularly designed clinical trials with the indenoisoquinolines as single agents and in combination with PARP inhibitors in HRD cancers expressing SLFN11.",2019-08-13 +32413515,Procleave: Predicting Protease-specific Substrate Cleavage Sites by Combining Sequence and Structural Information.,"Proteases are enzymes that cleave and hydrolyse the peptide bonds between two specific amino acid residues of target substrate proteins. Protease-controlled proteolysis plays a key role in the degradation and recycling of proteins, which is essential for various physiological processes. Thus, solving the substrate identification problem will have important implications for the precise understanding of functions and physiological roles of proteases, as well as for therapeutic target identification and pharmaceutical applicability. Consequently, there is a great demand for bioinformatics methods that can predict novel substrate cleavage events with high accuracy by utilizing both sequence and structural information. In this study, we present Procleave, a novel bioinformatics approach for predicting protease-specific substrates and specific cleavage sites by taking into account both their sequence and 3D structural information. Structural features of known cleavage sites were represented by discrete values using a LOWESS data-smoothing optimization method, which turned out to be critical for the performance of Procleave. The optimal approximations of all structural parameter values were encoded in a conditional random field (CRF) computational framework, alongside sequence and chemical group-based features. Here, we demonstrate the outstanding performance of Procleave through extensive benchmarking and independent tests. Procleave is capable of correctly identifying most cleavage sites in the case study. Importantly, when applied to the human structural proteome encompassing 17,628 protein structures, Procleave suggests a number of potential novel target substrates and their corresponding cleavage sites of different proteases. Procleave is implemented as a webserver and is freely accessible at http://procleave.erc.monash.edu/.",2020-02-01 +26503244,sRNATarBase 3.0: an updated database for sRNA-target interactions in bacteria.,"Bacterial sRNAs are a class of small regulatory RNAs of about 40-500 nt in length; they play multiple biological roles through binding to their target mRNAs or proteins. Therefore, elucidating sRNA targets is very important. However, only targets of a few sRNAs have been described. To facilitate sRNA functional studies such as developing sRNA target prediction models, we updated the sRNATarBase database, which was initially developed in 2010. The new version (recently moved to http://ccb1.bmi.ac.cn/srnatarbase/) contains 771 sRNA-target entries manually collected from 213 papers, and 23 290 and 11 750 predicted targets from sRNATarget and sTarPicker, respectively. Among the 771 entries, 475 and 17 were involved in validated sRNA-mRNA and sRNA-protein interactions, respectively, while 279 had no reported interactions. We also presented detailed information for 316 binding regions of sRNA-target mRNA interactions and related mutation experiments, as well as new features, including NCBI sequence viewer, sRNA regulatory network, target prediction-based GO and pathway annotations, and error report system. The new version provides a comprehensive annotation of validated sRNA-target interactions, and will be a useful resource for bacterial sRNA studies.",2015-10-25 +25187689,TRIPATH: A Biological Genetic and Genomic Database of Three Economically Important Fungal Pathogen of Wheat - Rust: Smut: Bunt.,"

Unlabelled

Wheat, the major source of vegetable protein in human diet, provides staple food globally for a large proportion of the human population. With higher protein content than other major cereals, wheat has great socio- economic importance. Nonetheless for wheat, three important fungal pathogens i.e. rust, smut and bunt are major cause of significant yield losses throughout the world. Researchers are putting up a strong fight against devastating wheat pathogens, and have made progress in tracking and controlling disease outbreaks from East Africa to South Asia. The aim of the present work hence was to develop a fungal pathogens database dedicated to wheat, gathering information about different pathogen species and linking them to their biological classification, distribution and control. Towards this end, we developed an open access database Tripath: A biological, genetic and genomic database of economically important wheat fungal pathogens - rust: smut: bunt. Data collected from peer-reviewed publications and fungal pathogens were added to the customizable database through an extended relational design. The strength of this resource is in providing rapid retrieval of information from large volumes of text at a high degree of accuracy. Database TRIPATH is freely accessible.

Availability

http://www.gbpuat-cbsh.ac.in/departments/bi/database/tripath/",2014-07-22 +24214961,"Data, information, knowledge and principle: back to metabolism in KEGG.","In the hierarchy of data, information and knowledge, computational methods play a major role in the initial processing of data to extract information, but they alone become less effective to compile knowledge from information. The Kyoto Encyclopedia of Genes and Genomes (KEGG) resource (http://www.kegg.jp/ or http://www.genome.jp/kegg/) has been developed as a reference knowledge base to assist this latter process. In particular, the KEGG pathway maps are widely used for biological interpretation of genome sequences and other high-throughput data. The link from genomes to pathways is made through the KEGG Orthology system, a collection of manually defined ortholog groups identified by K numbers. To better automate this interpretation process the KEGG modules defined by Boolean expressions of K numbers have been expanded and improved. Once genes in a genome are annotated with K numbers, the KEGG modules can be computationally evaluated revealing metabolic capacities and other phenotypic features. The reaction modules, which represent chemical units of reactions, have been used to analyze design principles of metabolic networks and also to improve the definition of K numbers and associated annotations. For translational bioinformatics, the KEGG MEDICUS resource has been developed by integrating drug labels (package inserts) used in society.",2013-11-07 +22078435,SIGNATURE: a workbench for gene expression signature analysis.,"

Background

The biological phenotype of a cell, such as a characteristic visual image or behavior, reflects activities derived from the expression of collections of genes. As such, an ability to measure the expression of these genes provides an opportunity to develop more precise and varied sets of phenotypes. However, to use this approach requires computational methods that are difficult to implement and apply, and thus there is a critical need for intelligent software tools that can reduce the technical burden of the analysis. Tools for gene expression analyses are unusually difficult to implement in a user-friendly way because their application requires a combination of biological data curation, statistical computational methods, and database expertise.

Results

We have developed SIGNATURE, a web-based resource that simplifies gene expression signature analysis by providing software, data, and protocols to perform the analysis successfully. This resource uses bayesian methods for processing gene expression data coupled with a curated database of gene expression signatures, all carried out within a GenePattern web interface for easy use and access.

Conclusions

SIGNATURE is available for public use at http://genepattern.genome.duke.edu/signature/.",2011-11-14 +29269821,Which adhesive strategy for non-carious cervical lesions?,"Data sourcesMedline, Scopus, Web of Science, Latin American and Caribbean Health Sciences Literature database (LILACS), Brazilian Library in Dentistry (BBO), Cochrane Library, System for Information on Grey literature in Europe (SIGLE), ProQuest Dissertations and Theses, Periódicos Capes Theses database, Current Controlled Trials (www.controlled-trials.com), International Clinical trials registry platform (http://apps.who.int/trialsearch/), the ClinicalTrials.gov (www.clinicaltrials.gov), Rebec (www.rebec.gov.br) and EU Clinical Trials Register (https://www.clinicaltrialsregister.eu), abstracts of the annual conference of the International Association for Dental Research (IADR) and their regional divisions.Study selectionTwo reviewers selected studies; parallel or split-mouth randomised controlled trials (RCTs) comparing adhesive strategies were considered.Data extraction and synthesisData were abstracted by two reviewers and into four follow up periods, 1 year; 18 months to 2 years; 3 years and 4 to 5 years. The Cochrane risk of bias tool was used to assess study quality. Data outcomes were dichotomous and summarised using relative risks and random effects meta-analysis.ResultsTwenty-nine studies were included in the meta-analysis. Bonding strategy did not influence postoperative sensitivity (risk ratio [RR] 1.04; 95% CI 0.81 to 1.34) or retention rates (RR = 1.04; 95% CI 0.81 to 1.34). The etch-and-rinse approach produced less marginal discoloration at 18 months to 2 years (RR 1.51; 95% CI 1.21 to 1.90) and at 4 to 5 years (RR 1.81; 95% CI 1.28 to 2.55) (p<0.0007).ConclusionsComposite resin restorations placed with self-etch and etch-and-rinse adhesives produce restoration with similar clinical service and POS, however using etch-and-rinse adhesives one can reduce marginal discoloration.",2017-12-01 +29226381,A robust and powerful two-step testing procedure for local ancestry adjusted allelic association analysis in admixed populations.,"Genetic association studies in admixed populations allow us to gain deeper understanding of the genetic architecture of human diseases and traits. However, population stratification, complicated linkage disequilibrium (LD) patterns, and the complex interplay of allelic and ancestry effects on phenotypic traits pose challenges in such analyses. These issues may lead to detecting spurious associations and/or result in reduced statistical power. Fortunately, if handled appropriately, these same challenges provide unique opportunities for gene mapping. To address these challenges and to take these opportunities, we propose a robust and powerful two-step testing procedure Local Ancestry Adjusted Allelic (LAAA) association. In the first step, LAAA robustly captures associations due to allelic effect, ancestry effect, and interaction effect, allowing detection of effect heterogeneity across ancestral populations. In the second step, LAAA identifies the source of association, namely allelic, ancestry, or the combination. By jointly modeling allele, local ancestry, and ancestry-specific allelic effects, LAAA is highly powerful in capturing the presence of interaction between ancestry and allele effect. We evaluated the validity and statistical power of LAAA through simulations over a broad spectrum of scenarios. We further illustrated its usefulness by application to the Candidate Gene Association Resource (CARe) African American participants for association with hemoglobin levels. We were able to replicate independent groups' previously identified loci that would have been missed in CARe without joint testing. Moreover, the loci, for which LAAA detected potential effect heterogeneity, were replicated among African Americans from the Women's Health Initiative study. LAAA is freely available at https://yunliweb.its.unc.edu/LAAA.",2017-12-10 +30874723,Protein model quality assessment using 3D oriented convolutional neural networks.,"

Motivation

Protein model quality assessment (QA) is a crucial and yet open problem in structural bioinformatics. The current best methods for single-model QA typically combine results from different approaches, each based on different input features constructed by experts in the field. Then, the prediction model is trained using a machine-learning algorithm. Recently, with the development of convolutional neural networks (CNN), the training paradigm has changed. In computer vision, the expert-developed features have been significantly overpassed by automatically trained convolutional filters. This motivated us to apply a three-dimensional (3D) CNN to the problem of protein model QA.

Results

We developed Ornate (Oriented Routed Neural network with Automatic Typing)-a novel method for single-model QA. Ornate is a residue-wise scoring function that takes as input 3D density maps. It predicts the local (residue-wise) and the global model quality through a deep 3D CNN. Specifically, Ornate aligns the input density map, corresponding to each residue and its neighborhood, with the backbone topology of this residue. This circumvents the problem of ambiguous orientations of the initial models. Also, Ornate includes automatic identification of atom types and dynamic routing of the data in the network. Established benchmarks (CASP 11 and CASP 12) demonstrate the state-of-the-art performance of our approach among single-model QA methods.

Availability and implementation

The method is available at https://team.inria.fr/nano-d/software/Ornate/. It consists of a C++ executable that transforms molecular structures into volumetric density maps, and a Python code based on the TensorFlow framework for applying the Ornate model to these maps.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +29069330,DISC: DISulfide linkage Characterization from tandem mass spectra.,"

Motivation

Enzymatic digestion under appropriate reducing conditions followed by mass spectrometry analysis has emerged as the primary method for disulfide bond analysis. The large amount of mass spectral data collected in the mass spectrometry experiment requires effective computational approaches to automate the interpretation process. Although different approaches have been developed for such purpose, they always choose to ignore the frequently observed internal ion fragments and they lack a reasonable quality control strategy and calibrated scoring scheme for the statistical validation and ranking of the reported results.

Results

In this research, we present a new computational approach, DISC (DISulfide bond Characterization), for matching an input MS/MS spectrum against the putative disulfide linkage structures hypothetically constructed from the protein database. More specifically, we consider different ion types including a variety of internal ions that frequently observed in mass spectra resulted from disulfide linked peptides, and introduce an effective two-layer scoring scheme to evaluate the significance of the matching between spectrum and structure, based on which we have also developed a useful target-decoy strategy for providing quality control and reporting false discovery rate in the final results. Systematic experiments conducted on both low-complexity and high-complexity datasets demonstrated the efficiency of our proposed method for the identification of disulfide bonds from MS/MS spectra, and showed its potential in characterizing disulfide bonds at the proteome scale instead of just a single protein.

Availability and implementation

Software is available for downloading at http://www.csd.uwo.ca/yliu766/.

Contact

yliu766@uwo.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +31552422,Best (but oft-forgotten) practices: identifying and accounting for regression to the mean in nutrition and obesity research.,"

Background

Regression to the mean (RTM) is a statistical phenomenon where initial measurements of a variable in a nonrandom sample at the extreme ends of a distribution tend to be closer to the mean upon a second measurement. Unfortunately, failing to account for the effects of RTM can lead to incorrect conclusions on the observed mean difference between the 2 repeated measurements in a nonrandom sample that is preferentially selected for deviating from the population mean of the measured variable in a particular direction. Study designs that are susceptible to misattributing RTM as intervention effects have been prevalent in nutrition and obesity research. This field often conducts secondary analyses of existing intervention data or evaluates intervention effects in those most at risk (i.e., those with observations at the extreme ends of a distribution).

Objectives

To provide best practices to avoid unsubstantiated conclusions as a result of ignoring RTM in nutrition and obesity research.

Methods

We outlined best practices for identifying whether RTM is likely to be leading to biased inferences, using a flowchart that is available as a web-based app at https://dustyturner.shinyapps.io/DecisionTreeMeanRegression/. We also provided multiple methods to quantify the degree of RTM.

Results

Investigators can adjust analyses to include the RTM effect, thereby plausibly removing its biasing influence on estimating the true intervention effect.

Conclusions

The identification of RTM and implementation of proper statistical practices will help advance the field by improving scientific rigor and the accuracy of conclusions. This trial was registered at clinicaltrials.gov as NCT00427193.",2020-02-01 +24399916,bex-db: Bioinformatics workbench for comprehensive analysis of barley-expressed genes.,"Barley (Hordeum vulgare) is one of the world's most important cereal crops. Although its large and complex genome has held back barley genomics for quite a while, the whole genome sequence was released in 2012 by the International Barley Genome Sequencing Consortium (IBSC). Moreover, more than 30,000 barley full-length cDNAs (FLcDNAs) are now available in the public domain. Here we present the Barley Gene Expression Database (bex-db: http://barleyflc.dna.affrc.go.jp/bexdb/index.html) as a repository of transcriptome data including the sequences and the expression profiles of barley genes resulting from microarray analysis. In addition to FLcDNA sequences, bex-db also contains partial sequences of more than 309,000 novel expressed sequence tags (ESTs). Users can browse the data via keyword, sequence homology and expression profile search options. A genome browser was also developed to display the chromosomal locations of barley FLcDNAs and wheat (Triticum aestivum) transcripts as well as Aegilops tauschii gene models on the IBSC genome sequence for future comparative analysis of orthologs among Triticeae species. The bex-db should provide a useful resource for further genomics studies and development of genome-based tools to enhance the progress of the genetic improvement of cereal crops.",2013-12-01 +22786849,"Transcriptomine, a web resource for nuclear receptor signaling transcriptomes.","The nuclear receptor (NR) superfamily of ligand-regulated transcription factors directs ligand- and tissue-specific transcriptomes in myriad developmental, metabolic, immunological, and reproductive processes. The NR signaling field has generated a wealth of genome-wide expression data points, but due to deficits in their accessibility, annotation, and integration, the full potential of these studies has not yet been realized. We searched public gene expression databases and MEDLINE for global transcriptomic datasets relevant to NRs, their ligands, and coregulators. We carried out extensive, deep reannotation of the datasets using controlled vocabularies for RNA Source and regulating molecule and resolved disparate gene identifiers to official gene symbols to facilitate comparison of fold changes and their significance across multiple datasets. We assembled these data points into a database, Transcriptomine (http://www.nursa.org/transcriptomine), that allows for multiple, menu-driven querying strategies of this transcriptomic ""superdataset,"" including single and multiple genes, Gene Ontology terms, disease terms, and uploaded custom gene lists. Experimental variables such as regulating molecule, RNA Source, as well as fold-change and P value cutoff values can be modified, and full data records can be either browsed or downloaded for downstream analysis. We demonstrate the utility of Transcriptomine as a hypothesis generation and validation tool using in silico and experimental use cases. Our resource empowers users to instantly and routinely mine the collective biology of millions of previously disparate transcriptomic data points. By incorporating future transcriptome-wide datasets in the NR signaling field, we anticipate Transcriptomine developing into a powerful resource for the NR- and other signal transduction research communities.",2012-07-10 +21460878,[Protein Structure Discovery: software package to perform computational proteomics tasks].,"Software-information system Protein Structure Discovery was developed. The system can be used for the wide range of tasks in the field of computer proteomics including prediction of function, structure and immunological properties of proteins. A specially created section of the system allows evaluating the quantitative and qualitative effects of mutations on the structural and functional properties of proteins. There are 19 of different programs integrated into the system, including the database of protein functional sites PDBSite, a PDBSiteScan program for the prediction of functional sites in three-dimensional structures of proteins, and WebProAnalyst program for the quantitative analysis of the structure-activity relationship of proteins. Protein Structure Discovery program has a Web interface and is available for users through the Internet (http://www-bionet.sscc.ru/psd/). For example, binding sites of zinc ion and ADP showed high stability of the method to errors PDBSiteScan reconstruction of spatial structures of proteins in the recognition of functional sites in model structures.",2011-01-01 +31077078,Common Data Elements for Unruptured Intracranial Aneurysm and Subarachnoid Hemorrhage Clinical Research: Recommendations from the Working Group on Long-Term Therapies.,"

Objectives

The goal for the long-term therapies (LTT) working group (WG) of the Unruptured Intracranial Aneurysm (UIA) and Subarachnoid Hemorrhage (SAH) common data elements (CDEs) was to develop a comprehensive set of CDEs, data definitions, case report forms, and guidelines for use in UIA and SAH LTT clinical research, as part of a new joint effort between the National Institute of Neurological Disorders and Stroke (NINDS) and the National Library of Medicine of the US National Institutes of Health. These UIA and SAH CDEs will join other neurological disease-specific CDEs already developed and available for use by research investigators.

Methods

The eight LTT WG members comprised international UIA, and SAH experts reviewed existing NINDS CDEs and instruments, created new elements when needed, and provided recommendations for future LTT clinical research. The recommendations were compiled, internally reviewed by the all UIA and SAH WGs and steering committee members. The NINDS CDE team also reviewed the final version before posting the SAH Version 1.0 CDE recommendations on the NINDS CDE website.

Results

The NINDS UIA and SAH LTT CDEs and supporting documents are publicly available on the NINDS CDE ( https://www.commondataelements.ninds.nih.gov/#page=Default ) and NIH Repository ( https://cde.nlm.nih.gov/home ) websites. The subcommittee members discussed and reviewed various parameters, outcomes, and endpoints in UIA and SAH LTT studies. The following meetings with WG members, the LTT WG's recommendations are incorporated into the disease/injury-related events, assessments and examinations, and treatment/intervention data domains.

Conclusions

Noting gaps in the literature regarding medication and rehabilitation parameters in UIA and SAH clinical studies, the current CDE recommendations aim to arouse interest to explore the impact of medication and rehabilitation treatments and therapies and encourage the convergence of LTT clinical study parameters to develop a harmonized standard.",2019-06-01 +29212269,Low neighbor of Brca1 gene expression predicts poor clinical outcome and resistance of sunitinib in clear cell renal cell carcinoma.,"

Objective

To study the expression of Neighbor of Brca1 gene (NBR1) in clear cell renal cell carcinoma (ccRCC), renal cancer cells and the chemoresistance cells and to elucidate its clinical prognostic and chemoresistance value.

Materials and methods

We screened the NBR1 mRNA in ccRCC from The Cancer Genome Atlas (TCGA) database and examined expression levels of NBR1 mRNA in 48 cases of ccRCC tissues, renal cancer cell lines and chemoresistance cells by qRT-PCR. Then, we extended two additional data sets in oncomine datebase (https://www.oncomine.org) to further confirm the results of the TCGA database. Immunohistochemistry (IHC) assay data performed in ccRCC tissues and normal tissues were downloaded from The Human Protein Atlas.

Results

The mRNA levels of NBR1 were downregulated in TCGA-KIRC database (n = 533) and ccRCC patient samples (n=48) as well as in RCC cell lines and their chemoresistance cells. Similarly, the protein levels of NBR1 were lower in ccRCC patient samples. NBR1 level was associated with the clinical pathological stage and could discriminate metastasis, recurrence and prognosis in ccRCC patients. Low level of NBR1 mRNA showed a significance poor prognostic of overall survival (OS), disease-free survival (DFS) with univariate and multivariate analyses in ccRCC patients and sunitinib resistance.

Conclusions

Taken together, our results suggest that low level of NBR1 can predict poor clinical outcome and resistance of sunitinib in patients with ccRCC.",2017-10-23 +24285300,The Mouse Genome Database: integration of and access to knowledge about the laboratory mouse.,"The Mouse Genome Database (MGD) (http://www.informatics.jax.org) is the community model organism database resource for the laboratory mouse, a premier animal model for the study of genetic and genomic systems relevant to human biology and disease. MGD maintains a comprehensive catalog of genes, functional RNAs and other genome features as well as heritable phenotypes and quantitative trait loci. The genome feature catalog is generated by the integration of computational and manual genome annotations generated by NCBI, Ensembl and Vega/HAVANA. MGD curates and maintains the comprehensive listing of functional annotations for mouse genes using the Gene Ontology, and MGD curates and integrates comprehensive phenotype annotations including associations of mouse models with human diseases. Recent improvements include integration of the latest mouse genome build (GRCm38), improved access to comparative and functional annotations for mouse genes with expanded representation of comparative vertebrate genomes and new loads of phenotype data from high-throughput phenotyping projects. All MGD resources are freely available to the research community.",2013-11-26 +26578596,HPMCD: the database of human microbial communities from metagenomic datasets and microbial reference genomes.,"The Human Pan-Microbe Communities (HPMC) database (http://www.hpmcd.org/) provides a manually curated, searchable, metagenomic resource to facilitate investigation of human gastrointestinal microbiota. Over the past decade, the application of metagenome sequencing to elucidate the microbial composition and functional capacity present in the human microbiome has revolutionized many concepts in our basic biology. When sufficient high quality reference genomes are available, whole genome metagenomic sequencing can provide direct biological insights and high-resolution classification. The HPMC database provides species level, standardized phylogenetic classification of over 1800 human gastrointestinal metagenomic samples. This is achieved by combining a manually curated list of bacterial genomes from human faecal samples with over 21000 additional reference genomes representing bacteria, viruses, archaea and fungi with manually curated species classification and enhanced sample metadata annotation. A user-friendly, web-based interface provides the ability to search for (i) microbial groups associated with health or disease state, (ii) health or disease states and community structure associated with a microbial group, (iii) the enrichment of a microbial gene or sequence and (iv) enrichment of a functional annotation. The HPMC database enables detailed analysis of human microbial communities and supports research from basic microbiology and immunology to therapeutic development in human health and disease.",2015-11-17 +28941791,Methicillin-resistant Staphylococcus aureus (MRSA) in Iran: A systematic review and meta-analysis.,"

Introduction

Methicillin-resistant Staphylococcus aureus (MRSA) is among the most prevalent pathogens causing healthcare-associated infections. Accurate and updated data describing the epidemiology of MRSA are crucial for the development of national policies to control MRSA infection in each country. This study aimed to estimate the prevalence of MRSA in different parts of Iran.

Methods

Several databases, including MEDLINE, Embase, Web of Science and Scientific Information Database (http://www.sid.ir), were searched from 1 January 2000 to 31 March 2016 to identify studies addressing the frequency or prevalence of MRSA in Iran. Comprehensive Meta-Analysis software v.2.2 was used to analyse the data.

Results

Of the 725 records identified from the databases, 31 studies fulfilled the eligibility criteria. The analyses showed that the frequency of MRSA infections was 43.0% (95% confidence interval 36.3-50.0%) among confirmed S. aureus isolates. Further stratified analyses indicated that the prevalence of MRSA was higher in studies performed after the year 2000.

Conclusions

Since a high rate of MRSA infections was seen in this analysis, regular surveillance of hospital-associated infections, monitoring of antibiotic sensitivity patterns, and formulation of definite antibiotic policy may facilitate more accurate action for the prevention and control of MRSA.",2017-09-21 +30575037,LipidomeDB Data Calculation Environment Has Been Updated to Process Direct-Infusion Multiple Reaction Monitoring Data.,"LipidomeDB Data Calculation Environment (DCE), a web application for processing data from direct-infusion tandem mass spectrometer data on lipids, was described by Zhou et al. (, Lipids, 46, 879-884). The original version processed multiple precursor and/or neutral loss scans on multiple samples. A recent update to LipidomeDB DCE, http://lipidome.bcf.ku.edu:8080/Lipidomics/, extends LipidomeDB DCE's functionality to process data acquired in multiple reaction monitoring (MRM) mode by direct-infusion mass spectrometry. Both the precursor-neutral loss workflow and the MRM workflow remove signals due to isotopic overlap of lipid analytes and calculation of the amount of each target lipid in comparison with internal standards.",2018-11-01 +30101688,Introducing NASN's New Data Initiative: National School Health Data Set: Every Student Counts! Make This YOUR Year of Data.,"The National Association of School Nurses (NASN) is launching a new data initiative: National School Health Data Set: Every Student Counts! This article describes the vision of the initiative, as well as what school nurses can do to advance a data-driven school health culture. This is the first article in a data and school nursing series for the 2018-2019 school year. For more information on NASN's initiative and to learn how school nurses can join the data revolution, go to http://nasn.org/everystudentcounts.",2018-09-01 +31399070,Intra and post-operative complications observed with femtosecond laser-assisted cataract surgery versus conventional phacoemulsification surgery: a systematic review and meta-analysis.,"

Background

In this analysis, we aimed to systematically compare the complications which were associated with femtosecond laser-assisted cataract surgery (FLACS) versus the conventional phacoemulsification surgery (CPE).

Methods

Commonly used search databases, specifically MEDLINE, Cochrane Central, EMBASE, and http://www.clinicaltrials.gov were carefully searched for English publications comparing FLACS versus CPE. The selected endpoints which were assessed included incomplete capsulotomy, anterior capsulotomy tag, anterior capsule tear, posterior capsule tear, injury to the descemet's membrane, zonular dialysis, vitreous loss, macular or corneal edema, and elevated intra-ocular pressure. Statistical analysis was carried out by the latest version of the RevMan software (version 5.3) and represented by risk ratios (RR) with 95% confidence intervals (CI).

Results

A total number of 7156 participants were included. Three thousand five hundred and fifty four (3554) participants were assigned to the FLACS group. The risks for incomplete capsulotomy, anterior capsulotomy tag, and anterior capsular tear were significantly higher with FLACS (RR: 22.42, 95% CI: 4.53-110.82; P = 0.0001), (RR: 33.07, 95% CI: 6.53-167.56; P = 0.0001) and (RR: 4.74, 95% CI: 2.59-8.68; P = 0.00001) respectively. The risks for macular/corneal edema (RR: 2.05, 95% CI: 1.18-3.55; P = 0.01) and elevated intra-ocular pressure (RR: 3.24, 95% CI: 1.55-6.78; P = 0.002) were also significantly higher with FLACS. However, the risks for impaired descemet's membrane (RR: 0.95, 95% CI: 0.61-1.47; P = 0.80), zonular dialysis (RR: 0.40, 95% CI: 0.06-2.72; P = 0.35), vitreous loss (RR: 0.09, 95% CI: 0.01-1.63; P = 0.10) and posterior capsular tear (RR: 1.45, 95% CI: 0.23-9.16; P = 0.69) were not significantly different.

Conclusions

The current results showed that FLACS did not improve intra/post-operative complications in comparison to CPE. Further larger studies should confirm this hypothesis.",2019-08-09 +24371150,tasiRNAdb: a database of ta-siRNA regulatory pathways.,"

Summary

In plants, many trans-acting small interfering RNA (ta-siRNA) regulatory pathways have been identified as significant components of the gene networks involved in development, metabolism, responses to biotic and abiotic stresses and DNA methylation at the TAS locus. To obtain a more comprehensive understanding on the nature of ta-siRNA regulatory pathways, we developed a freely accessible resource, tasiRNAdb, to serve as a repository for the sequences of ta-siRNA regulatory pathway-related microRNAs, TASs, ta-siRNAs and ta-siRNA targets, and for the cascading relations among them. With 583 pathways from 18 species, tasiRNAdb is the largest resource for known ta-siRNA regulatory pathways currently available. tasiRNAdb also provides a tool named TasExpAnalysis that was developed to map user-submitted small RNA and degradome libraries to a stored/input TAS and to perform sRNA phasing analysis and TAS cleavage analysis.

Availability

The database of plant ta-siRNA regulatory pathways is available at http://bioinfo.jit.edu.cn/tasiRNADatabase/.",2013-12-25 +31926033,Enhanced Akt/GSK-3β/CREB signaling mediates the anti-inflammatory actions of mGluR5 positive allosteric modulators in microglia and following traumatic brain injury in male mice.,"We have previously shown that treatment with a mGluR5 positive allosteric modulator (PAM) is neuroprotective after experimental traumatic brain injury (TBI), limiting post-traumatic neuroinflammation by reducing pro-inflammatory microglial activation and promoting anti-inflammatory and neuroprotective responses. However, the specific molecular mechanisms governing this anti-inflammatory shift in microglia remain unknown. Here we show that the mGluR5 PAM, VU0360172 (VuPAM), regulates microglial inflammatory responses through activation of Akt, resulting in the inhibition of GSK-3β. GSK-3β regulates the phosphorylation of CREB, thereby controlling the expression of inflammation-related genes and microglial plasticity. The anti-inflammatory action of VuPAM in microglia is reversed by inhibiting Akt/GSK-3β/CREB signaling. Using a well-characterized TBI model and CX3CR1gfp/+ mice to visualize microglia in vivo, we demonstrate that VuPAM enhances Akt/GSK-3β/CREB signaling in the injured cortex, as well as anti-inflammatory microglial markers. Furthermore, in situ analysis revealed that GFP + microglia in the cortex of VuPAM-treated TBI mice co-express pCREB and the anti-inflammatory microglial phenotype marker YM1. Taken together, our data show that VuPAM decreases pro-inflammatory microglial activation by modulating Akt/GSK-3β/CREB signaling. These findings serve to clarify the potential neuroprotective mechanisms of mGluR5 PAM treatment after TBI, and suggest novel therapeutic targets for post-traumatic neuroinflammation. Cover Image for this issue: https://doi.org/10.1111/jnc.15048.",2020-01-28 +31401609,Dynamic Urban Environmental Exposures on Depression and Suicide (NEEDS) in the Netherlands: a protocol for a cross-sectional smartphone tracking study and a longitudinal population register study.,"

Introduction

Environmental exposures are intertwined with mental health outcomes. People are exposed to the environments in which they currently live, and to a multitude of environments along their daily movements and through their residential relocations. However, most research assumes that people are immobile, disregarding that such dynamic exposures also serve as stressors or buffers potentially associated with depression and suicide risk. The aim of the Dynamic Urban Environmental Exposures on Depression and Suicide (NEEDS) study is to examine how dynamic environmental exposures along people's daily movements and over their residential histories affect depression and suicide mortality in the Netherlands.

Methods and analysis

The research design comprises two studies emphasising the temporality of exposures. First, a cross-sectional study is assessing how daily exposures correlate with depression. A nationally representative survey was administered to participants recruited through stratified random sampling of the population aged 18-65 years. Survey data were enriched with smartphone-based data (eg, Global Positioning System tracking, Bluetooth sensing, social media usage, communication patterns) and environmental exposures (eg, green and blue spaces, noise, air pollution). Second, a longitudinal population register study is addressing the extent to which past environmental exposures over people's residential history affect suicide risk later in life. Statistical and machine learning-based models are being developed to quantify environment-health relations.

Ethics and dissemination

Ethical approval (FETC17-060) was granted by the Ethics Review Board of Utrecht University, The Netherlands. Project-related findings will be disseminated at conferences and in peer-reviewed journal papers. Other project outcomes will be made available through the project's web page, http://www.needs.sites.uu.nl.",2019-08-10 +29272359,"BioSeq-Analysis: a platform for DNA, RNA and protein sequence analysis based on machine learning approaches.","With the avalanche of biological sequences generated in the post-genomic age, one of the most challenging problems is how to computationally analyze their structures and functions. Machine learning techniques are playing key roles in this field. Typically, predictors based on machine learning techniques contain three main steps: feature extraction, predictor construction and performance evaluation. Although several Web servers and stand-alone tools have been developed to facilitate the biological sequence analysis, they only focus on individual step. In this regard, in this study a powerful Web server called BioSeq-Analysis (http://bioinformatics.hitsz.edu.cn/BioSeq-Analysis/) has been proposed to automatically complete the three main steps for constructing a predictor. The user only needs to upload the benchmark data set. BioSeq-Analysis can generate the optimized predictor based on the benchmark data set, and the performance measures can be reported as well. Furthermore, to maximize user's convenience, its stand-alone program was also released, which can be downloaded from http://bioinformatics.hitsz.edu.cn/BioSeq-Analysis/download/, and can be directly run on Windows, Linux and UNIX. Applied to three sequence analysis tasks, experimental results showed that the predictors generated by BioSeq-Analysis even outperformed some state-of-the-art methods. It is anticipated that BioSeq-Analysis will become a useful tool for biological sequence analysis.",2019-07-01 +31693213,Can Designer Indels Be Tailored by Gene Editing?: Can Indels Be Customized?,"Genome editing with engineered nucleases (GEENs) introduce site-specific DNA double-strand breaks (DSBs) and repairs DSBs via nonhomologous end-joining (NHEJ) pathways that eventually create indels (insertions/deletions) in a genome. Whether the features of indels resulting from gene editing could be customized is asked. A review of the literature reveals how gene editing technologies via NHEJ pathways impact gene editing. The survey consolidates a body of literature that suggests that the type (insertion, deletion, and complex) and the approximate length of indel edits can be somewhat customized with different GEENs and by manipulating the expression of key NHEJ genes. Structural data suggest that binding of GEENs to DNA may interfere with binding of key components of DNA repair complexes, favoring either classical- or alternative-NHEJ. The hypotheses have some limitations, but if validated, will enable scientists to better control indel makeup, holding promise for basic science and clinical applications of gene editing. Also see the video abstract here https://youtu.be/vTkJtUsLi3w.",2019-11-06 +31211210,Physiological and RNA sequencing data of white lupin plants grown under Fe and P deficiency.,"This DIB article provides details about transcriptional and physiological response of Fe- and P-deficient white lupin roots, an extensive and complete description of plant response is shown in the research article ""Physiological and transcriptomic data highlight common features between iron and phosphorus acquisition mechanisms in white lupin roots"" Venuti et al. [1]. White lupin plants were grown under hydroponic system and three different nutritional regimes: Fe deficiency (-Fe), P deficiency (-P), or Fe and P sufficiency (+P + Fe). Depending on nutritional treatment, white lupin plants showed changes in the fresh weights, in root external acidification and FeIII-reductase activity. Moreover, the transcriptomic changes occurring in apices and clusters of Fe-deficient lupin roots were investigated and compared with differences of gene expression occurring in P-deficient plants (-P) and in Fe- and P-sufficient plants (+P + Fe). Transcriptomic data are available in the public repository Gene Expression Omnibus (http://www.ncbi.nlm.nih.gov/geo) under the series entry (GSE112220). The annotation, mapping and enrichment analyses of differentially modulated transcripts were assessed.",2019-05-28 +31395072,MLMDA: a machine learning approach to predict and validate MicroRNA-disease associations by integrating of heterogenous information sources.,"

Background

Emerging evidences show that microRNA (miRNA) plays an important role in many human complex diseases. However, considering the inherent time-consuming and expensive of traditional in vitro experiments, more and more attention has been paid to the development of efficient and feasible computational methods to predict the potential associations between miRNA and disease.

Methods

In this work, we present a machine learning-based model called MLMDA for predicting the association of miRNAs and diseases. More specifically, we first use the k-mer sparse matrix to extract miRNA sequence information, and combine it with miRNA functional similarity, disease semantic similarity and Gaussian interaction profile kernel similarity information. Then, more representative features are extracted from them through deep auto-encoder neural network (AE). Finally, the random forest classifier is used to effectively predict potential miRNA-disease associations.

Results

The experimental results show that the MLMDA model achieves promising performance under fivefold cross validations with AUC values of 0.9172, which is higher than the methods using different classifiers or different feature combination methods mentioned in this paper. In addition, to further evaluate the prediction performance of MLMDA model, case studies are carried out with three Human complex diseases including Lymphoma, Lung Neoplasm, and Esophageal Neoplasms. As a result, 39, 37 and 36 out of the top 40 predicted miRNAs are confirmed by other miRNA-disease association databases.

Conclusions

These prominent experimental results suggest that the MLMDA model could serve as a useful tool guiding the future experimental validation for those promising miRNA biomarker candidates. The source code and datasets explored in this work are available at http://220.171.34.3:81/ .",2019-08-08 +31059668,Surgical Management of Lower Urinary Tract Symptoms Attributed to Benign Prostatic Hyperplasia: AUA Guideline Amendment 2019.,"

Purpose

Male lower urinary tract symptoms (LUTS) secondary to benign prostatic hyperplasia (BPH) is common in men and can have negative effects on quality of life (QoL). It is the hope that this Guideline becomes a reference for effective evidence-based surgical management of LUTS/BPH.

Materials and methods

The evidence team searched Ovid MEDLINE, the Cochrane Library, and the Agency for Healthcare Research and Quality database to identify studies indexed between January 2007-September 2017. Following initial publication, this guideline was amended in 2019 and reflects relevant literature published through January 2019. When sufficient evidence existed, the body of evidence was assigned a strength rating of A (high), B (moderate), or C (low) for support of Strong, Moderate, or Conditional Recommendations. In the absence of sufficient evidence, additional information is provided as Clinical Principles and Expert Opinions (table 1 in supplementary unabridged guideline, https://www.jurology.com).

Results

This Guideline provides evidence-based recommendations regarding management of LUTS/BPH utilizing surgery and minimally invasive surgical therapies (MIST). Additional statements are made regarding diagnostic and pre-operative tests. Clinical statements are made in comparison to what is generally accepted as the gold standard (i.e. transurethral resection of the prostate [TURP] monopolar and/or bipolar). This guideline is designed to be used in conjunction with the associated treatment algorithm (see figure).[Figure: see text]Conclusions:The prevalence and the severity of LUTS increases as men age and is an important diagnosis in the healthcare of patients and the welfare of society. This document will undergo updating as knowledge regarding treatments and future surgical options continues to expand.",2019-08-08 +30088220,Mapping correlations of psychological and structural connectome properties of the dataset of the human connectome project with the maximum spanning tree method.,"Genome-wide association studies (GWAS) opened new horizons in genomics and medicine by discovering novel genetic factors in numerous health conditions. The analogous analysis of the correlations of large quantities of psychological and brain imaging measures may yield similarly striking results in the brain science. Smith et al. (Nat Neurosci. 18(11): 1565-1567, 2015) presented a study of the associations between MRI-detected resting-state functional connectomes and behavioral data, based on the Human Connectome Project's (HCP) data release. Here we analyze the pairwise correlations between 717 psychological-, anatomical- and structural connectome-properties, based also on the Human Connectome Project's 500-subject dataset. For the connectome properties, we have focused on the structural (or anatomical) connectomes, instead of the functional connectomes. For the structural connectome analysis we have computed and publicly deposited structural braingraphs at the site http://braingraph.org . Numerous non-trivial and hard-to-compute graph-theoretical parameters (like minimum bisection width, minimum vertex cover, eigenvalue gap, maximum matching number, maximum fractional matching number) were computed for braingraphs of each subject, gained from the left- and right hemispheres and the whole brain. The correlations of these parameters, as well as other anatomical and behavioral measures were detected and analyzed. For discovering and visualizing the most interesting correlations in the 717 x 717 matrix, we have applied the maximum spanning tree method. Apart from numerous natural correlations, which describe parameters computable or approximable from one another, we have found several significant, novel correlations in the dataset, e.g., between the score of the NIH Toolbox 9-hole Pegboard Dexterity Test and the maximum weight graph theoretical matching in the left hemisphere. We also have found correlations described very recently and independently from the HCP-dataset: e.g., between gambling behavior and the number of the connections leaving the insula: these already known findings independently validate the power of our method.",2019-10-01 +22674158,Receptor databases and computational websites for ligand binding.,"Ligand binding to receptors is a key step in the regulation of cellular function by neurotransmitters, hormones, and many drugs. Not surprisingly then, genome projects have found that families of receptor genes form the largest groups of functional genes in mammalian genomes. A large body of experimental data have thus been generated on receptor-ligand interactions, and in turn, numerous computational tools for the in silico prediction of receptor-ligand interactions have been developed. Websites containing ligand binding data and tools to assess and manipulate such data are available in the public domain. Such Websites provide a resource for experimentalists studying receptor binding and for scientists interested in utilizing large data sets for other purposes, which include modeling structure-function relationships, defining patterns of interactions of drugs with different receptors, and computational comparisons among receptors. The Websites include databases of receptor protein and nucleotide sequences for particular classes of receptors (such as G-protein-coupled receptors and nuclear receptors) and of experimental results from receptor-ligand binding assays, as well as computational tools for modeling the interactions between ligands and receptors and predicting the function of orphan receptors. In this chapter, we provide information and Uniform Resource Locators (URLs) for Websites that facilitate computational and experimental studies of receptor-ligand interactions. This list will be periodically updated at https://sites.google.com/site/receptorligandbinding/.",2012-01-01 +28214993,SCALEUS: Semantic Web Services Integration for Biomedical Applications.,"In recent years, we have witnessed an explosion of biological data resulting largely from the demands of life science research. The vast majority of these data are freely available via diverse bioinformatics platforms, including relational databases and conventional keyword search applications. This type of approach has achieved great results in the last few years, but proved to be unfeasible when information needs to be combined or shared among different and scattered sources. During recent years, many of these data distribution challenges have been solved with the adoption of semantic web. Despite the evident benefits of this technology, its adoption introduced new challenges related with the migration process, from existent systems to the semantic level. To facilitate this transition, we have developed Scaleus, a semantic web migration tool that can be deployed on top of traditional systems in order to bring knowledge, inference rules, and query federation to the existent data. Targeted at the biomedical domain, this web-based platform offers, in a single package, straightforward data integration and semantic web services that help developers and researchers in the creation process of new semantically enhanced information systems. SCALEUS is available as open source at http://bioinformatics-ua.github.io/scaleus/ .",2017-02-18 +31528358,DisCVR: Rapid viral diagnosis from high-throughput sequencing data.,"High-throughput sequencing (HTS) enables most pathogens in a clinical sample to be detected from a single analysis, thereby providing novel opportunities for diagnosis, surveillance, and epidemiology. However, this powerful technology is difficult to apply in diagnostic laboratories because of its computational and bioinformatic demands. We have developed DisCVR, which detects known human viruses in clinical samples by matching sample k-mers (twenty-two nucleotide sequences) to k-mers from taxonomically labeled viral genomes. DisCVR was validated using published HTS data for eighty-nine clinical samples from adults with upper respiratory tract infections. These samples had been tested for viruses metagenomically and also by real-time polymerase chain reaction assay, which is the standard diagnostic method. DisCVR detected human viruses with high sensitivity (79%) and specificity (100%), and was able to detect mixed infections. Moreover, it produced results comparable to those in a published metagenomic analysis of 177 blood samples from patients in Nigeria. DisCVR has been designed as a user-friendly tool for detecting human viruses from HTS data using computers with limited RAM and processing power, and includes a graphical user interface to help users interpret and validate the output. It is written in Java and is publicly available from http://bioinformatics.cvr.ac.uk/discvr.php.",2019-07-01 +31395530,Health Care in the Age of Interoperability Part 6: The Future of FHIR.,"About this Series This is the sixth and last article in a series on the dramatic transformation taking place in health informatics in large part because of the new Health Level 7 (HL7) Fast Healthcare Interoperability Resources (FHIR) standard. The first article provided background on health care, electronic health record systems for physicians, and the challenges they both face along with the potential of interoperability to help overcome them. The second introduced the basics of the FHIR standard and some suggested resources for those who are interested in its further exploration. The third introduced SMART on FHIR which, based on its wide adoption, has become the default standard FHIR app platform. The fourth looked at clinical decision support, arguably the single most important provider-facing use case for FHIR. The fifth introduced the personal health record and tools that can utilize the data stored in it as an important use case for FHIR in support of patients. This article looks at the future uses of FHIR with a particular emphasis on those that might impact on research uses of health data. The articles in this series are intended to introduce researchers from other fields to this one and assume no prior knowledge of healthcare or health informatics. They are abstracted from the author's recently published book, Health Informatics on FHIR: How HL7's New API is Transforming Healthcare (Springer International Publishing: https://www.springer.com/us/book/9783319934136).",2019-07-01 +31534680,Female-biased gape and body-size dimorphism in the New World watersnakes (tribe: Thamnophiini) oppose predictions from Rensch's rule.,"

Abstract

Sexual-size dimorphism (SSD) is ubiquitous across animals and often biased in the direction of larger females in snakes and other ectothermic organisms. To understand how SSD evolves across species, Rensch's rule predicts that in taxa where males are larger, SSD increases with body size. In contrast, where females are larger, SSD decreases with body size. While this rule holds for many taxa, it may be ambiguous for others, particularly ectothermic vertebrates. Importantly, this rule suggests that the outcomes of SSD over phylogenetic time scales depend on the direction of dimorphism predicated on the difference in reproductive efforts between males and females. Here, we examine SSD in the context of Rensch's rule in Thamnophiini, the gartersnakes and watersnakes, a prominent group that in many areas comprises the majority of the North American snake biota. Using a dated phylogeny, measurements of gape, body, and tail size, we show that these snakes do not follow Rensch's rule, but rather female-biased SSD increases with body size. We in turn find that this allometry is most pronounced with gape and is correlated with both neonate and litter size, suggesting that acquiring prey of increased size may be directly related to fecundity selection. These changes in SSD are not constrained to any particular clade; we find no evidence of phylogenetic shifts in those traits showing SSD. We suggest several ways forward to better understand the anatomical units of selection for SSD and modularity.

Open research badges

This article has been awarded Open Data and Open Materials Badges. All materials and data are publicly accessible via the Open Science Framework at https://doi.org/10.5061/dryad.3pn57h0.",2019-08-09 +30239606,Powerful and efficient SNP-set association tests across multiple phenotypes using GWAS summary data.,"

Motivation

Many GWAS conducted in the past decade have identified tens of thousands of disease related variants, which in total explained only part of the heritability for most traits. There remain many more genetics variants with small effect sizes to be discovered. This has motivated the development of sequencing studies with larger sample sizes and increased resolution of genotyped variants, e.g., the ongoing NHLBI Trans-Omics for Precision Medicine (TOPMed) whole genome sequencing project. An alternative approach is the development of novel and more powerful statistical methods. The current dominating approach in the field of GWAS analysis is the ""single trait single variant"" association test, despite the fact that most GWAS are conducted in deeply-phenotyped cohorts with many correlated traits measured. In this paper, we aim to develop rigorous methods that integrate multiple correlated traits and multiple variants to improve the power to detect novel variants. In recognition of the difficulty of accessing raw genotype and phenotype data due to privacy and logistic concerns, we develop methods that are applicable to publicly available GWAS summary data.

Results

We build rigorous statistical models for GWAS summary statistics to motivate novel multi-trait SNP-set association tests, including variance component test, burden test and their adaptive test, and develop efficient numerical algorithms to quickly compute their analytical P-values. We implement the proposed methods in an open source R package. We conduct thorough simulation studies to verify the proposed methods rigorously control type I errors at the genome-wide significance level, and further demonstrate their utility via comprehensive analysis of GWAS summary data for multiple lipids traits and glycemic traits. We identified many novel loci that were not detected by the individual trait based GWAS analysis.

Availability and implementation

We have implemented the proposed methods in an R package freely available at http://www.github.com/baolinwu/MSKAT.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +28093404,Development and application of an algorithm to compute weighted multiple glycan alignments.,"

Motivation

A glycan consists of monosaccharides linked by glycosidic bonds, has branches and forms complex molecular structures. Databases have been developed to store large amounts of glycan-binding experiments, including glycan arrays with glycan-binding proteins. However, there are few bioinformatics techniques to analyze large amounts of data for glycans because there are few tools that can handle the complexity of glycan structures. Thus, we have developed the MCAW (Multiple Carbohydrate Alignment with Weights) tool that can align multiple glycan structures, to aid in the understanding of their function as binding recognition molecules.

Results

We have described in detail the first algorithm to perform multiple glycan alignments by modeling glycans as trees. To test our tool, we prepared several data sets, and as a result, we found that the glycan motif could be successfully aligned without any prior knowledge applied to the tool, and the known recognition binding sites of glycans could be aligned at a high rate amongst all our datasets tested. We thus claim that our tool is able to find meaningful glycan recognition and binding patterns using data obtained by glycan-binding experiments. The development and availability of an effective multiple glycan alignment tool opens possibilities for many other glycoinformatics analysis, making this work a big step towards furthering glycomics analysis.

Availability and implementation

http://www.rings.t.soka.ac.jp.

Contact

kkiyoko@soka.ac.jp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +28168018,"IRBAS: An online database to collate, analyze, and synthesize data on the biodiversity and ecology of intermittent rivers worldwide.","Key questions dominating contemporary ecological research and management concern interactions between biodiversity, ecosystem processes, and ecosystem services provision in the face of global change. This is particularly salient for freshwater biodiversity and in the context of river drying and flow-regime change. Rivers that stop flowing and dry, herein intermittent rivers, are globally prevalent and dynamic ecosystems on which the body of research is expanding rapidly, consistent with the era of big data. However, the data encapsulated by this work remain largely fragmented, limiting our ability to answer the key questions beyond a case-by-case basis. To this end, the Intermittent River Biodiversity Analysis and Synthesis (IRBAS; http://irbas.cesab.org) project has collated, analyzed, and synthesized data from across the world on the biodiversity and environmental characteristics of intermittent rivers. The IRBAS database integrates and provides free access to these data, contributing to the growing, and global, knowledge base on these ubiquitous and important river systems, for both theoretical and applied advancement. The IRBAS database currently houses over 2000 data samples collected from six countries across three continents, primarily describing aquatic invertebrate taxa inhabiting intermittent rivers during flowing hydrological phases. As such, there is room to expand the biogeographic and taxonomic coverage, for example, through addition of data collected during nonflowing and dry hydrological phases. We encourage contributions and provide guidance on how to contribute and access data. Ultimately, the IRBAS database serves as a portal, storage, standardization, and discovery tool, enabling collation, synthesis, and analysis of data to elucidate patterns in river biodiversity and guide management. Contribution creates high visibility for datasets, facilitating collaboration. The IRBAS database will grow in content as the study of intermittent rivers continues and data retrieval will allow for networking, meta-analyses, and testing of generalizations across multiple systems, regions, and taxa.",2017-01-03 +,International Soil Moisture Network: a data hosting facility for global in situ soil moisture measurements,"In situ measurements of soil moisture are invaluable for calibrating and validating land surface models and satellite-based soil moisture retrievals. In addition, longterm time series of in situ soil moisture measurements themselves can reveal trends in the water cycle related to climate or land cover change. Nevertheless, on a worldwide basis the number of meteorological networks and stations measuring soil moisture, in particular on a continuous basis, is still limited and the data they provide lack standardization of technique and protocol. To overcome many of these limitations, the International Soil Moisture Network (ISMN; http://www.ipf.tuwien.ac.at/insitu) was initiated to serve as a centralized data hosting facility where globally available in situ soil moisture measurements from operational networks and validation campaigns are collected, harmonized, and made available to users. Data collecting networks share their soil moisture datasets with the ISMN on a voluntary and no-cost basis. Incoming soil moisture data are automatically transformed into common volumetric soil moisture units and checked for outliers and implausible values. Apart from soil water measurements from different depths, important metadata and meteorological variables (e.g., precipitation and soil temperature) are stored in the database. These will assist the user in correctly interpreting the soil moisture data. The database is queried through a graphical user interface while output of data selected for download is provided according to common standards for data and metadata. Currently (status May 2011), the ISMN contains data of 19 networks and more than 500 stations located in North America, Europe, Asia, and Australia. The time period spanned by the entire database runs from 1952 until the present, although most datasets have originated during the last decade. The database is rapidly expanding, which means that both the number of stations and the time period covered by the existing stations are still growing. Hence, it will become an increasingly important resource for validating and improving satellite-derived soil moisture products and studying climate related trends. As the ISMN is animated by the scientific community itself, we invite potential networks to enrich the collection by sharing their in situ soil moisture data.",2011-01-01 +29447218,FindPrimaryPairs: An efficient algorithm for predicting element-transferring reactant/product pairs in metabolic networks.,"The metabolism of individual organisms and biological communities can be viewed as a network of metabolites connected to each other through chemical reactions. In metabolic networks, chemical reactions transform reactants into products, thereby transferring elements between these metabolites. Knowledge of how elements are transferred through reactant/product pairs allows for the identification of primary compound connections through a metabolic network. However, such information is not readily available and is often challenging to obtain for large reaction databases or genome-scale metabolic models. In this study, a new algorithm was developed for automatically predicting the element-transferring reactant/product pairs using the limited information available in the standard representation of metabolic networks. The algorithm demonstrated high efficiency in analyzing large datasets and provided accurate predictions when benchmarked with manually curated data. Applying the algorithm to the visualization of metabolic networks highlighted pathways of primary reactant/product connections and provided an organized view of element-transferring biochemical transformations. The algorithm was implemented as a new function in the open source software package PSAMM in the release v0.30 (https://zhanglab.github.io/psamm/).",2018-02-15 +32418878,"Negative Predictive Value of Biparametric Prostate Magnetic Resonance Imaging in Excluding Significant Prostate Cancer: A Pooled Data Analysis Based on Clinical Data from Four Prospective, Registered Studies.","

Background

Multiparametric prostate magnetic resonance imaging (mpMRI) can be considered the gold standard in prostate magnetic resonance imaging (MRI). Biparametric prostate MRI (bpMRI) is faster and could be a feasible alternative to mpMRI.

Objective

To determine the negative predictive value (NPV) of Improved Prostate Cancer Diagnosis (IMPROD) bpMRI as a whole and in clinical subgroups in primary diagnostics of clinically significant prostate cancer (CSPCa).

Design, setting, and participants

This is a pooled data analysis of four prospective, registered clinical trials investigating prebiopsy IMPROD bpMRI. Men with a clinical suspicion of prostate cancer (PCa) were included.

Intervention

Prebiopsy IMPROD bpMRI was performed, and an IMPROD bpMRI Likert scoring system was used. If suspicious lesions (IMPROD bpMRI Likert score 3-5) were visible, targeted biopsies in addition to systematic biopsies were taken.

Outcome measurements and statistical analysis

Performance measures of IMPROD bpMRI in CSPCa diagnostics were evaluated. NPV was also evaluated in clinical subgroups. Gleason grade ≥3 + 4 in any biopsy core taken was defined as CSPCa.

Results and limitations

A total of 639 men were included in the analysis. The mean age was 64 yr, mean prostate-specific antigen level was 8.9 ng/ml, and CSPCa prevalence was 48%. NPVs of IMPROD bpMRI Likert scores 3-5 and 4-5 for CSPCa were 0.932 and 0.909, respectively, and the corresponding positive predictive values were 0.589 and 0.720. Only nine of 132 (7%) men with IMPROD bpMRI Likert score 1-2 had CSPCa and none with Gleason score >7. Thus, 132 of 639 (21%) study patients could have avoided biopsies without missing a single Gleason >7 cancer in the study biopsies. In the subgroup analysis, no clear outlier was present. The limitation is uncertainty of the true CSPCa prevalence.

Conclusions

IMPROD bpMRI demonstrated a high NPV to rule out CSPCa. IMPROD bpMRI Likert score 1-2 excludes Gleason >7 PCa in the study biopsies.

Patient summary

We investigated the feasibility of prostate magnetic resonance imaging (MRI) with the Improved Prostate Cancer Diagnosis (IMPROD) biparametric MRI (bpMRI) protocol in excluding significant prostate cancer. In this study, highly aggressive prostate cancer was excluded using the publicly available IMPROD bpMRI protocol (http://petiv.utu.fi/multiimprod/).",2020-05-14 +31848056,Characterisation of the dynamics of past droughts.,"Drought is a complex natural phenomenon. The description of the way in which drought changes (moves) in space may help to acquire knowledge on its drivers and processes to improve its monitoring and prediction. This research presents the application of an approach to characterise the dynamics of drought. Tracks, severity, duration, as well as localisation (onset and end position), and rotation of droughts were calculated. Results of calculated droughts were compared with documented information. Data from the Standardized Precipitation Evaporation Index (SPEI) Global Drought Monitor was used to identify droughts in India as an example for the period 1901-2013. Results show regions where droughts with considerable coverage tend to occur. Paths, i.e. consecutive spatial tracks, of six of the most severe reported droughts were analysed. In all of them, areas overlap considerably over time, which suggest that drought remains in the same region for a period of time. Results of this research are being used to build a model to predict the spatial drought tracks, incl. India (https://www.researchgate.net/project/STAND-Spatio-Temporal-ANalysis-of-Drought).",2019-11-18 +28025343,Reefgenomics.Org - a repository for marine genomics data. ,"Over the last decade, technological advancements have substantially decreased the cost and time of obtaining large amounts of sequencing data. Paired with the exponentially increased computing power, individual labs are now able to sequence genomes or transcriptomes to investigate biological questions of interest. This has led to a significant increase in available sequence data. Although the bulk of data published in articles are stored in public sequence databases, very often, only raw sequencing data are available; miscellaneous data such as assembled transcriptomes, genome annotations etc. are not easily obtainable through the same means. Here, we introduce our website (http://reefgenomics.org) that aims to centralize genomic and transcriptomic data from marine organisms. Besides providing convenient means to download sequences, we provide (where applicable) a genome browser to explore available genomic features, and a BLAST interface to search through the hosted sequences. Through the interface, multiple datasets can be queried simultaneously, allowing for the retrieval of matching sequences from organisms of interest. The minimalistic, no-frills interface reduces visual clutter, making it convenient for end-users to search and explore processed sequence data. DATABASE URL: http://reefgenomics.org.",2016-12-26 +24267917,mirCoX: a database of miRNA-mRNA expression correlations derived from RNA-seq meta-analysis.,"

Background

Experimentally validated co-expression correlations between miRNAs and genes are a valuable resource to corroborate observations about miRNA/mRNA changes after experimental perturbations, as well as compare miRNA target predictions with empirical observations. For example, when a given miRNA is transcribed, true targets of that miRNA should tend to have lower expression levels relative to when the miRNA is not expressed.

Methods

We processed publicly available human RNA-seq experiments obtained from NCBI's Sequence Read Archive (SRA) to identify miRNA-mRNA co-expression trends and summarized them in terms of their Pearson's Correlation Coefficient (PCC) and significance.

Results

We found that sequence-derived parameters from TargetScan and miRanda were predictive of co-expression, and that TargetScan- and miRanda-derived gene-miRNA pairs tend to have anti-correlated expression patterns in RNA-seq data compared to controls. We provide this data for download and as a web application available at http://wrenlab.org/mirCoX/.

Conclusion

This database of empirically established miRNA-mRNA transcriptional correlations will help to corroborate experimental observations and could be used to help refine and validate miRNA target predictions.",2013-10-09 +31815298,"Land-use change affects size spectra, energy flux and ecosystem functions in litter and soil invertebrates.","In Focus: Potapov, A. M., Klarner, B., Sandmann, D., Widyastuti, R. and Scheu, S. (2019). Linking size spectrum, energy flux and trophic multifunctionality in soil food webs of tropical land-use systems. Journal of Animal Ecology, 88, 1845-1859. https://doi.org/10.1111/1365-2656.13027 Potapov et al. (2019) advance our understanding of the various levels of the consequences of human impact on ecosystems. They examine the communities of litter and soil invertebrates in four different forests (from rainforest to oil palm plantations). Data on abundance, body masses and trophic guild in litter and soil invertebrates are expanded to a study towards biodiversity, biomass, energy flux and ecosystem functions. Their results show that size spectra are affected differently for decomposers, herbivores, omnivores and predators. Most of these groups decrease in abundance with increasing land use, and only large decomposers increase strongly. Moreover, creating trophic-group food webs for litter and soil invertebrates of each forest demonstrates the changes in energy flux and ecosystem functions. With their study, Potapov et al. (2019) present new insights into ecosystem functions and the sensitivity of communities to changes in land use.",2019-12-01 +30291593,The role of semantic transparency in visual word recognition of compound words: A megastudy approach.,"Previous studies on visual word recognition of compound words have provided evidence for the influence of lexical properties (e.g., length, frequency) and semantic transparency (the degree of relatedness in meaning between a compound word and its constituents) in morphological processing (e.g., to what extent is doorbell influenced by door and bell?). However, a number of questions in this domain, which are difficult to address with the available methodological resources, are still unresolved. We collected semantic transparency scores for 2,861 compound words at the constituent level (i.e., how strongly the overall meaning of a compound word is related to that of each constituent) and analyzed their effects on speeded pronunciation and lexical decision performance for the compound words using the English Lexicon Project (http://elexicon.wustl.edu) data. The results from both tasks indicated that our human-judged semantic transparency ratings for both the first and second constituents play a significant role in compound word processing. Moreover, additional analyses indicated that the human-judged semantic transparency scores at the constituent level accounted for more variance in compound word recognition performance than did either whole-word semantic transparency scores or corpus-based semantic distance scores.",2019-12-01 +30252043,SanXoT: a modular and versatile package for the quantitative analysis of high-throughput proteomics experiments.,"

Summary

Mass spectrometry-based proteomics has had a formidable development in recent years, increasing the amount of data handled and the complexity of the statistical resources needed. Here we present SanXoT, an open-source, standalone software package for the statistical analysis of high-throughput, quantitative proteomics experiments. SanXoT is based on our previously developed weighted spectrum, peptide and protein statistical model and has been specifically designed to be modular, scalable and user-configurable. SanXoT allows limitless workflows that adapt to most experimental setups, including quantitative protein analysis in multiple experiments, systems biology, quantification of post-translational modifications and comparison and merging of experimental data from technical or biological replicates.

Availability and implementation

Download links for the SanXoT Software Package, source code and documentation are available at https://wikis.cnic.es/proteomica/index.php/SSP.

Contact

jvazquez@cnic.es or ebonzon@cnic.es.

Supplementary information

Supplementary information is available at Bioinformatics online.",2019-05-01 +28529706,Expresso: A database and web server for exploring the interaction of transcription factors and their target genes in Arabidopsis thaliana using ChIP-Seq peak data.,"Motivation: The increasing availability of chromatin immunoprecipitation sequencing (ChIP-Seq) data enables us to learn more about the action of transcription factors in the regulation of gene expression. Even though in vivo transcriptional regulation often involves the concerted action of more than one transcription factor, the format of each individual ChIP-Seq dataset usually represents the action of a single transcription factor. Therefore, a relational database in which available ChIP-Seq datasets are curated is essential. Results: We present Expresso (database and webserver) as a tool for the collection and integration of available Arabidopsis ChIP-Seq peak data, which in turn can be linked to a user's gene expression data. Known target genes of transcription factors were identified by motif analysis of publicly available GEO ChIP-Seq data sets. Expresso currently provides three services: 1) Identification of target genes of a given transcription factor; 2) Identification of transcription factors that regulate a gene of interest; 3) Computation of correlation between the gene expression of transcription factors and their target genes. Availability: Expresso is freely available at http://bioinformatics.cs.vt.edu/expresso/.",2017-03-28 +32027752,Lactobacillus rhamnosus GG Attenuates Lipopolysaccharide-Induced Inflammation and Barrier Dysfunction by Regulating MAPK/NF-κB Signaling and Modulating Metabolome in the Piglet Intestine.,"BACKGROUND:Probiotic Lactobacillius rhamnosus GG (LGG) shows beneficial immunomodulation on cultured cell lines in vitro and in mouse models. OBJECTIVE:The aim was to investigate the effects of LGG on intestinal injury and the underlying mechanisms by elucidating inflammatory signaling pathways and metabolomic response to LPS stimulation in the piglet intestine. METHODS:Piglets (Duroc × Landrace × Large White, including males and female; 8.6 ± 1.1 kg) aged 28 d were assigned to 3 groups (n = 6/group): oral inoculation with PBS for 2 wk before intraperitoneal injection of physiological saline [control (CON)] or LPS (25 μg/kg body weight; LPS) or oral inoculation with LGG for 2 wk before intraperitoneal injection of LPS (LGG+LPS). Piglets were killed 4 h after LPS injection. Systemic inflammation, intestinal integrity, inflammation signals, and metabolomic characteristics in the intestine were determined. RESULTS:Compared with CON, LPS stimulation significantly decreased ileal zonula occludens 1 (ZO-1; 44%), claudin-3 (44%), and occludin (41%) expression; increased serum diamineoxidase (73%), D-xylose (19%), TNF-α (43%), and IL-6 (55%) concentrations; induced p38 mitogen-activated protein kinase (p38 MAPK; 85%), extracellular signal-regulated kinase (ERK; 96%), and NF-κB p65 phosphorylation (37%) (P < 0.05). Compared with LPS stimulation alone, LGG pretreatment significantly enhanced the intestinal barrier by upregulating expressions of tight junction proteins (ZO-1, 73%; claudin-3, 55%; occludin, 67%), thereby decreasing serum diamineoxidase (26%) and D-xylose (28%) concentrations, and also reduced serum TNF-α expression (16%) and ileal p38 MAPK (79%), ERK (43%) and NF-κB p65 (37%) phosphorylation levels (P < 0.05). Metabolomic analysis showed clear separation between each group. The concentrations of caprylic acid [fold-change (FC) = 2.39], 1-mono-olein (FC = 2.68), erythritol (FC = 4.62), and ethanolamine (FC = 4.47) significantly increased in the intestine of LGG + LPS piglets compared with the LPS group (P < 0.05). CONCLUSIONS:These data suggest that LGG alleviates gut inflammation, improves intestinal barrier function, and modulates the metabolite profile of piglets challenged with LPS. This trial was registered at the Zhejiang University (http://www.lac.zju.edu.cn) as ZJU20170529.",2020-05-01 +31809465,Surgical Treatment of Patients With Dual Hip and Spinal Degenerative Disease: Effect of Surgical Sequence of Spinal Fusion and Total Hip Arthroplasty on Postoperative Complications.,"

Study design

Retrospective study.

Objective

To determine how lumbar spinal fusion-total hip arthroplasty (LSF-THA) operative sequence would affect THA outcomes.

Summary of background data

Outcomes following THA in patients with a history of lumbar spinal degenerative disease and fusion are incompletely understood.

Methods

The PearlDiver Research Program (http://www.pearldiverinc.com) was used to identify patients undergoing primary THA. Patients were divided into four cohorts: 1) Primary THA without spine pathology, 2) remote LSF prior to hip pathology and THA, and patients with concurrent hip and spinal pathology that had 3) THA following LSF, and 4) THA prior to LSF. Postoperative complications and opioid use were assessed with multivariable logistic regression to determine the effect of spinal degenerative disease and operative sequence.

Results

Between 2007 and 2017, 85,595 patients underwent primary THA, of whom 93.6% had THA without lumbar spine degenerative disease, 0.7% had a history of remote LSF, and those with concurrent hip and spine pathology, 1.6% had THA prior to LSF, and 2.4% had THA following LSF. Patients with hip and lumbar spine pathology who underwent THA prior to LSF had significantly higher rates of dislocation (aOR = 2.46, P < 0.0001), infection (aOR = 2.65, P < 0.0001), revision surgery (aOR = 1.91, P < 0.0001), and postoperative opioid use at 1 month (aOR: 1.63, P < 0.001), 3 months (aOR = 1.80, P < 0.001), 6 months (aOR: 2.69, P < 0.001), and 12 months (aOR = 3.28, P < 0.001) compared with those treated with THA following LSF.

Conclusion

Patients with degenerative hip and lumbar spine pathology who undergo THA prior to LSF have a significantly increased risk of postoperative dislocation, infection, revision surgery, and prolonged opioid use compared with THA after LSF. Surgeons should consider the surgical sequence of THA and LSF on outcomes for patients with this dual pathology. Shared decision making between patients, spine surgeons, and arthroplasty surgeons is necessary to optimize outcomes in patients with concomitant hip and spine pathology.

Level of evidence

3.",2020-05-01 +31385145,"Rapid detection of IMP, NDM, VIM, KPC and OXA-48-like carbapenemases from Enterobacteriales and Gram-negative non-fermenter bacteria by real-time PCR and melt-curve analysis.","Carbapenemase-producing microorganisms are increasingly isolated and often associated with treatment failures and outbreaks. The need for reliable and timely detection and/or confirmation of carbapenemase production is paramount; therefore, a real-time PCR assay targeting IMP, NDM, VIM, KPC and OXA-48-like carbapenemases was designed and validated. All available allele variants of the above carbapenemases were downloaded from the Beta-Lactamase DataBase ( http://bldb.eu/ ), aligned with Clustal Omega and primers designed using Primer-BLAST. Real-time PCR monoplexes were optimized for the QuantStudio 6-Flex (Applied Biosystems) using the PowerUp SYBR Green Master Mix (Life Technologies) and validated using a panel of 204 characterised strains carrying a wide range of beta-lactamases, sometimes in combination. Melt-curve analysis was used to confirm positive results. The in silico approach allowed primers to be designed in conserved regions of the KPC and NDM alignments, while three primer sets for IMP and two for VIM were necessary to ensure amplification of the different variants. One primer set was designed for OXA-48-like; however, it is unlikely to detect all variants. Expected results were obtained for all 204 tested strains, with 100% sensitivity and specificity. Melt-curve analysis showed consistent Tm results for KPC, NDM, and OXA-48-like; differences were instead noted for IMP and VIM as likely consequence of higher variability in the PCR target regions. Inhibition was not observed. The assay is rapid, easy to perform and implement. It enables unequivocal detection of IMP, NDM, VIM, KPC and OXA-48-like carbapenemases even when more than one type is present simultaneously.",2019-08-05 +30670076,Prediction of functional microRNA targets by integrative modeling of microRNA binding and target expression data.,"We perform a large-scale RNA sequencing study to experimentally identify genes that are downregulated by 25 miRNAs. This RNA-seq dataset is combined with public miRNA target binding data to systematically identify miRNA targeting features that are characteristic of both miRNA binding and target downregulation. By integrating these common features in a machine learning framework, we develop and validate an improved computational model for genome-wide miRNA target prediction. All prediction data can be accessed at miRDB ( http://mirdb.org ).",2019-01-22 +30581927,Korean public and hospital data for estimating LDL-cholesterol.,"The data is composed of 14,812 subjects, and was obtained from the Korean National Health and Nutritional Examination Survey (KNHANES) from 2009 to 2015. The KNHANES data is publicly available at https://knhanes.cdc.go.kr/knhanes/eng/index.do. By using random annotated ID, all subjects can be anonymous. The dataset consisting of 4520 participants was obtained from Wonju Severance Christian Hospital (WSCH) in South Korea. To protect the patient׳s personal information, we removed ID and gender of patients. Detail information of the data presented in the present article is available in the research article ""Deep neural network for estimating low density lipoprotein cholesterol"" (Lee et al., 2018).",2018-12-07 +22681406,The immune epitope database: a historical retrospective of the first decade.,"As the amount of biomedical information available in the literature continues to increase, databases that aggregate this information continue to grow in importance and scope. The population of databases can occur either through fully automated text mining approaches or through manual curation by human subject experts. We here report our experiences in populating the National Institute of Allergy and Infectious Diseases sponsored Immune Epitope Database and Analysis Resource (IEDB, http://iedb.org), which was created in 2003, and as of 2012 captures the epitope information from approximately 99% of all papers published to date that describe immune epitopes (with the exception of cancer and HIV data). This was achieved using a hybrid model based on automated document categorization and extensive human expert involvement. This task required automated scanning of over 22 million PubMed abstracts followed by classification and curation of over 13 000 references, including over 7000 infectious disease-related manuscripts, over 1000 allergy-related manuscripts, roughly 4000 related to autoimmunity, and 1000 transplant/alloantigen-related manuscripts. The IEDB curation involves an unprecedented level of detail, capturing for each paper the actual experiments performed for each different epitope structure. Key to enabling this process was the extensive use of ontologies to ensure rigorous and consistent data representation as well as interoperability with other bioinformatics resources, including the Protein Data Bank, Chemical Entities of Biological Interest, and the NIAID Bioinformatics Resource Centers. A growing fraction of the IEDB data derives from direct submissions by research groups engaged in epitope discovery, and is being facilitated by the implementation of novel data submission tools. The present explosion of information contained in biological databases demands effective query and display capabilities to optimize the user experience. Accordingly, the development of original ways to query the database, on the basis of ontologically driven hierarchical trees, and display of epitope data in aggregate in a biologically intuitive yet rigorous fashion is now at the forefront of the IEDB efforts. We also highlight advances made in the realm of epitope analysis and predictive tools available in the IEDB.",2012-10-01 +30808938,The UWHAM and SWHAM Software Package.,"We introduce the UWHAM (binless weighted histogram analysis method) and SWHAM (stochastic UWHAM) software package that can be used to estimate the density of states and free energy differences based on the data generated by multi-state simulations. The programs used to solve the UWHAM equations are written in the C++ language and operated via the command line interface. In this paper, first we review the theoretical bases of UWHAM, its stochastic solver RE-SWHAM (replica exchange-like SWHAM)and ST-SWHAM (serial tempering-like SWHAM). Then we provide a tutorial with examples that explains how to apply the UWHAM program package to analyze the data generated by different types of multi-state simulations: umbrella sampling, replica exchange, free energy perturbation simulations, etc. The tutorial examples also show that the UWHAM equations can be solved stochastically by applying the RE-SWHAM and ST-SWHAM programs when the data ensemble is large. If the simulations at some states are far from equilibrium, the Stratified RE-SWHAM program can be applied to obtain the equilibrium distribution of the state of interest. All the source codes and the tutorial examples are available from our group's web page: https://ronlevygroup.cst.temple.edu/software/UWHAM_and_SWHAM_webpage/index.html .",2019-02-26 +31501882,IL-13 mRNA Tissue Content Identifies Two Subsets of Adult Ulcerative Colitis Patients With Different Clinical and Mucosa-Associated Microbiota Profiles.,"

Background and aims

A personalized approach to therapy hold great promise to improve disease outcomes. To this end, the identification of different subsets of patients according to the prevalent pathogenic process might guide the choice of therapeutic strategy. We hypothesize that ulcerative colitis [UC] patients might be stratified according to distinctive cytokine profiles and/or to a specific mucosa-associated microbiota.

Methods

In a cohort of clinically and endoscopic active UC patients and controls, we used quantitative PCR to analyse the mucosal cytokine mRNA content and 16S rRNA gene sequencing to assess the mucosa-associated microbiota composition.

Results

We demonstrate, by means of data-driven approach, the existence of a specific UC patient subgroup characterized by elevated IL-13 mRNA tissue content separate from patients with low IL-13 mRNA tissue content. The two subsets differ in clinical-pathological characteristics. High IL-13 mRNA patients are younger at diagnosis and have a higher prevalence of extensive colitis than low IL-13 mRNA patients. They also show more frequent use of steroid/immunosuppressant/anti-tumour necrosis factor α therapy during 1 year of follow-up. The two subgroups show differential enrichment of mucosa-associated microbiota genera with a prevalence of Prevotella in patients with high IL-13 mRNA tissue content and Sutterella and Acidaminococcus in patients with low IL-13 mRNA tissue content.

Conclusion

Assessment of mucosal IL-13 mRNA might help in the identification of a patient subgroup that might benefit from a therapeutic approach modulating IL-13.

Podcast

This article has an associated podcast which can be accessed at https://academic.oup.com/ecco-jcc/pages/podcast.",2020-03-01 +24302577,topPTM: a new module of dbPTM for identifying functional post-translational modifications in transmembrane proteins.,"Transmembrane (TM) proteins have crucial roles in various cellular processes. The location of post-translational modifications (PTMs) on TM proteins is associated with their functional roles in various cellular processes. Given the importance of PTMs in the functioning of TM proteins, this study developed topPTM (available online at http://topPTM.cse.yzu.edu.tw), a new dbPTM module that provides a public resource for identifying the functional PTM sites on TM proteins with structural topology. Experimentally verified TM topology data were integrated from TMPad, TOPDB, PDBTM and OPM. In addition to the PTMs obtained from dbPTM, experimentally verified PTM sites were manually extracted from research articles by text mining. In an attempt to provide a full investigation of PTM sites on TM proteins, all UniProtKB protein entries containing annotations related to membrane localization and TM topology were considered potential TM proteins. Two effective tools were then used to annotate the structural topology of the potential TM proteins. The TM topology of TM proteins is represented by graphical visualization, as well as by the PTM sites. To delineate the structural correlation between the PTM sites and TM topologies, the tertiary structure of PTM sites on TM proteins was visualized by Jmol program. Given the support of research articles by manual curation and the investigation of domain-domain interactions in Protein Data Bank, 1347 PTM substrate sites are associated with protein-protein interactions for 773 TM proteins. The database content is regularly updated on publication of new data by continuous surveys of research articles and available resources.",2013-12-02 +23947436,Gentrepid V2.0: a web server for candidate disease gene prediction.,"

Background

Candidate disease gene prediction is a rapidly developing area of bioinformatics research with the potential to deliver great benefits to human health. As experimental studies detecting associations between genetic intervals and disease proliferate, better bioinformatic techniques that can expand and exploit the data are required.

Description

Gentrepid is a web resource which predicts and prioritizes candidate disease genes for both Mendelian and complex diseases. The system can take input from linkage analysis of single genetic intervals or multiple marker loci from genome-wide association studies. The underlying database of the Gentrepid tool sources data from numerous gene and protein resources, taking advantage of the wealth of biological information available. Using known disease gene information from OMIM, the system predicts and prioritizes disease gene candidates that participate in the same protein pathways or share similar protein domains. Alternatively, using an ab initio approach, the system can detect enrichment of these protein annotations without prior knowledge of the phenotype.

Conclusions

The system aims to integrate the wealth of protein information currently available with known and novel phenotype/genotype information to acquire knowledge of biological mechanisms underpinning disease. We have updated the system to facilitate analysis of GWAS data and the study of complex diseases. Application of the system to GWAS data on hypertension using the ICBP data is provided as an example. An interesting prediction is a ZIP transporter additional to the one found by the ICBP analysis. The webserver URL is https://www.gentrepid.org/.",2013-08-16 +31038449,"Caribbean multi-centre study of Klebsiella pneumoniae: whole-genome sequencing, antimicrobial resistance and virulence factors. ","The surveillance of antimicrobial-resistant isolates has proven to be one of the most valuable tools to understand the global rise of multidrug-resistant bacterial pathogens. We report the first insights into the current situation in the Caribbean, where a pilot project to monitor antimicrobial resistance (AMR) through phenotypic resistance measurements combined with whole-genome sequencing was set up in collaboration with the Caribbean Public Health Agency (CARPHA). Our first study focused on Klebsiella pneumoniae, a highly relevant organism amongst the Gram-negative opportunistic pathogens worldwide causing hospital- and community-acquired infections. Our results show that not only carbapenem resistance, but also hypervirulent strains, are circulating in patients in the Caribbean. Our current data does not allow us to infer their prevalence in the population. We argue for the urgent need to further support AMR surveillance and stewardship in this almost uncharted territory, which can make a significant impact on the reduction of antimicrobial usage. This article contains data hosted by Microreact (https://microreact.org).",2019-04-29 +31603466,SOLart: a structure-based method to predict protein solubility and aggregation.,"

Motivation

The solubility of a protein is often decisive for its proper functioning. Lack of solubility is a major bottleneck in high-throughput structural genomic studies and in high-concentration protein production, and the formation of protein aggregates causes a wide variety of diseases. Since solubility measurements are time-consuming and expensive, there is a strong need for solubility prediction tools.

Results

We have recently introduced solubility-dependent distance potentials that are able to unravel the role of residue-residue interactions in promoting or decreasing protein solubility. Here, we extended their construction by defining solubility-dependent potentials based on backbone torsion angles and solvent accessibility, and integrated them, together with other structure- and sequence-based features, into a random forest model trained on a set of Escherichia coli proteins with experimental structures and solubility values. We thus obtained the SOLart protein solubility predictor, whose most informative features turned out to be folding free energy differences computed from our solubility-dependent statistical potentials. SOLart performances are very good, with a Pearson correlation coefficient between experimental and predicted solubility values of almost 0.7 both in cross-validation on the training dataset and in an independent set of Saccharomyces cerevisiae proteins. On test sets of modeled structures, only a limited drop in performance is observed. SOLart can thus be used with both high-resolution and low-resolution structures, and clearly outperforms state-of-art solubility predictors. It is available through a user-friendly webserver, which is easy to use by non-expert scientists.

Availability and implementation

The SOLart webserver is freely available at http://babylone.ulb.ac.be/SOLART/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-03-01 +30559761,DREAMSeq: An Improved Method for Analyzing Differentially Expressed Genes in RNA-seq Data.,"RNA sequencing (RNA-seq) has become a widely used technology for analyzing global gene-expression changes during certain biological processes. It is generally acknowledged that RNA-seq data displays equidispersion and overdispersion characteristics; therefore, most RNA-seq analysis methods were developed based on a negative binomial model capable of capturing both equidispersed and overdispersed data. In this study, we reported that in addition to equidispersion and overdispersion, RNA-seq data also displays underdispersion characteristics that cannot be adequately captured by general RNA-seq analysis methods. Based on a double Poisson model capable of capturing all data characteristics, we developed a new RNA-seq analysis method (DREAMSeq). Comparison of DREAMSeq with five other frequently used RNA-seq analysis methods using simulated datasets showed that its performance was comparable to or exceeded that of other methods in terms of type I error rate, statistical power, receiver operating characteristics (ROC) curve, area under the ROC curve, precision-recall curve, and the ability to detect the number of differentially expressed genes, especially in situations involving underdispersion. These results were validated by quantitative real-time polymerase chain reaction using a real Foxtail dataset. Our findings demonstrated DREAMSeq as a reliable, robust, and powerful new method for RNA-seq data mining. The DREAMSeq R package is available at http://tanglab.hebtu.edu.cn/tanglab/Home/DREAMSeq.",2018-11-30 +22674824,Circular dichroism spectral data and metadata in the Protein Circular Dichroism Data Bank (PCDDB): a tutorial guide to accession and deposition.,"The Protein Circular Dichroism Data Bank (PCDDB) is a web-based resource containing circular dichroism (CD) and synchrotron radiation circular dichroism spectral and associated metadata located at http://pcddb.cryst.bbk.ac.uk. This resource provides a freely available, user-friendly means of accessing validated CD spectra and their associated experimental details and metadata, thereby enabling broad usage of this material and new developments across the structural biology, chemistry, and bioinformatics communities. The resource also enables researchers utilizing CD as an experimental technique to have a means of storing their data at a secure site from which it is easily retrievable, thereby making their results publicly accessible, a current requirement of many grant-funding agencies world-wide, as well as meeting the data-sharing requirements for journal publications. This tutorial provides extensive information on searching, accessing, and downloading procedures for those who wish to utilize the data available in the data bank, and detailed information on deposition procedures for creating and validating entries, including comprehensive explanations of their contents and formats, for those who wish to include their data in the data bank.",2012-06-04 +33184245,A predicted protein functional network aids in novel gene mining for characteristic secondary metabolites in tea plant (Camellia sinensis). ,"Modeling a protein functional network in concerned species is an efficient approach for identifying novel genes in certain biological pathways. Tea plant (Camellia sinensis) is an important commercial crop abundant in numerous characteristic secondary metabolites (e.g., polyphenols, alkaloids, alkaloids) that confer tea quality and health benefits. Decoding novel genes responsible for tea characteristic components is an important basis for applied genetic improvement and metabolic engineering. Herein, a high-quality protein functional network for tea plant (TeaPoN) was predicted using cross-species protein functional associations transferring and integration combined with a stringent biological network criterion control. TeaPoN contained 31,273 nonredundant functional interactions among 6,634 tea proteins (or genes), with general network topological properties such as scale-free and small-world. We revealed the modular organization of genes related to the major three tea characteristic components (theanine, caffeine, catechin) in TeaPoN, which served as strong evidence for the utility of TeaPoN in novel gene mining. Importantly, several case studies regarding gene identification for tea characteristic components were presented. To aid in the use of TeaPoN, a concise web interface for data deposit and novel gene screening was developed (http://teapon.wchoda.com). We believe that TeaPoN will serve as a useful platform for functional genomics studies associated with characteristic secondary metabolites in tea plant.",2020-01-01 +29325029,Neuroconductor: an R platform for medical imaging analysis.,"Neuroconductor (https://neuroconductor.org) is an open-source platform for rapid testing and dissemination of reproducible computational imaging software. The goals of the project are to: (i) provide a centralized repository of R software dedicated to image analysis, (ii) disseminate software updates quickly, (iii) train a large, diverse community of scientists using detailed tutorials and short courses, (iv) increase software quality via automatic and manual quality controls, and (v) promote reproducibility of image data analysis. Based on the programming language R (https://www.r-project.org/), Neuroconductor starts with 51 inter-operable packages that cover multiple areas of imaging including visualization, data processing and storage, and statistical inference. Neuroconductor accepts new R package submissions, which are subject to a formal review and continuous automated testing. We provide a description of the purpose of Neuroconductor and the user and developer experience.",2019-04-01 +31665499,ReMap 2020: a database of regulatory regions from an integrative analysis of Human and Arabidopsis DNA-binding sequencing experiments.,"ReMap (http://remap.univ-amu.fr) aims to provide the largest catalogs of high-quality regulatory regions resulting from a large-scale integrative analysis of hundreds of transcription factors and regulators from DNA-binding experiments in Human and Arabidopsis (Arabidopsis thaliana). In this 2020 update of ReMap we have collected, analyzed and retained after quality control 2764 new human ChIP-seq and 208 ChIP-exo datasets available from public sources. The updated human atlas totalize 5798 datasets covering a total of 1135 transcriptional regulators (TRs) with a catalog of 165 million (M) peaks. This ReMap update comes with two unique Arabidopsis regulatory catalogs. First, a catalog of 372 Arabidopsis TRs across 2.6M peaks as a result of the integration of 509 ChIP-seq and DAP-seq datasets. Second, a catalog of 33 histone modifications and variants across 4.5M peaks from the integration of 286 ChIP-seq datasets. All catalogs are made available through track hubs at Ensembl and UCSC Genome Browsers. Additionally, this update comes with a new web framework providing an interactive user-interface, including improved search features. Finally, full programmatically access to the underlying data is available using a RESTful API together with a new R Shiny interface for a TRs binding enrichment analysis tool.",2020-01-01 +31199465,Positive multistate protein design.,"

Motivation

Structure-based computational protein design (CPD) plays a critical role in advancing the field of protein engineering. Using an all-atom energy function, CPD tries to identify amino acid sequences that fold into a target structure and ultimately perform a desired function. The usual approach considers a single rigid backbone as a target, which ignores backbone flexibility. Multistate design (MSD) allows instead to consider several backbone states simultaneously, defining challenging computational problems.

Results

We introduce efficient reductions of positive MSD problems to Cost Function Networks with two different fitness definitions and implement them in the Pompd (Positive Multistate Protein design) software. Pompd is able to identify guaranteed optimal sequences of positive multistate full protein redesign problems and exhaustively enumerate suboptimal sequences close to the MSD optimum. Applied to nuclear magnetic resonance and back-rubbed X-ray structures, we observe that the average energy fitness provides the best sequence recovery. Our method outperforms state-of-the-art guaranteed computational design approaches by orders of magnitudes and can solve MSD problems with sizes previously unreachable with guaranteed algorithms.

Availability and implementation

https://forgemia.inra.fr/thomas.schiex/pompd as documented Open Source.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +33628083,The Language Experience and Proficiency Questionnaire (LEAP-Q): Ten years later.,"The Language Experience and Proficiency Questionnaire (LEAP-Q) is a validated questionnaire tool for collecting self-reported proficiency and experience data from bilingual and multilingual speakers ages 14 to 80. It is available in over 20 languages, and can be administered in a digital, paper-and-pencil, and oral interview format. The LEAP-Q is used by researchers across various disciplines (Psychology, Neuroscience, Linguistics, Education, Communication Sciences & Disorders, etc.) to provide a comprehensive description of their bilingual participants, to substantiate a division of bilinguals into groups (e.g., early vs. late bilinguals), and to screen participants for adequate or threshold levels of language proficiency. Best practices for using the LEAP-Q include administration of the full questionnaire, consideration of acquisition and history of language use together with self-ratings of proficiency, and supplementation of self-reported data with objective language measures whenever possible. The LEAP-Q can be downloaded at no cost at https://bilingualism.northwestern.edu/leapq/.",2019-04-15 +31733823,NHP-immunome: A translational research-oriented database of non-human primate immune system proteins.,"We are currently living the advent of a new age for medicine in which basic research is being quickly translated into marketable drugs, and the widespread access to genomics data is allowing the design and implementation of personalized solutions to medical conditions. Non-human primates (NHP) have gained an essential role in drug discovery and safety testing due to their close phylogenetic relationship to humans. In this study, a collection of well characterized genes of the human immune system was used to define the orthology-based immunome in four NHP species, with carefully curated annotations available based on multi-tissue RNA-seq datasets. A broad variation in the frequency of expressed protein isoforms was observed between species. Finally, this analysis also revealed the lack of expression of at least four different chemokines in new-world primates. In addition, transcripts corresponding to four genes including interleukin 12 subunit alpha were expressed in humans but no other primate species analyzed. Access to the non-human primate immunome is available in http://www.fidic.org.co:90/proyecto/.",2019-10-31 +28764727,EMQIT: a machine learning approach for energy based PWM matrix quality improvement.,"

Background

Transcription factor binding affinities to DNA play a key role for the gene regulation. Learning the specificity of the mechanisms of binding TFs to DNA is important both to experimentalists and theoreticians. With the development of high-throughput methods such as, e.g., ChiP-seq the need to provide unbiased models of binding events has been made apparent. We present EMQIT a modification to the approach introduced by Alamanova et al. and later implemented as 3DTF server. We observed that tuning of Boltzmann factor weights, used for conversion of calculated energies to nucleotide probabilities, has a significant impact on the quality of the associated PWM matrix.

Results

Consequently, we proposed to use receiver operator characteristics curves and the 10-fold cross-validation to learn best weights using experimentally verified data from TRANSFAC database. We applied our method to data available for various TFs. We verified the efficiency of detecting TF binding sites by the 3DTF matrices improved with our technique using experimental data from the TRANSFAC database. The comparison showed a significant similarity and comparable performance between the improved and the experimental matrices (TRANSFAC). Improved 3DTF matrices achieved significantly higher AUC values than the original 3DTF matrices (at least by 0.1) and, at the same time, detected notably more experimentally verified TFBSs.

Conclusions

The resulting new improved PWM matrices for analyzed factors show similarity to TRANSFAC matrices. Matrices had comparable predictive capabilities. Moreover, improved PWMs achieve better results than matrices downloaded from 3DTF server. Presented approach is general and applicable to any energy-based matrices. EMQIT is available online at http://biosolvers.polsl.pl:3838/emqit .

Reviewers

This article was reviewed by Oliviero Carugo, Marek Kimmel and István Simon.",2017-08-01 +28081714,ClinGen Pathogenicity Calculator: a configurable system for assessing pathogenicity of genetic variants.,"

Background

The success of the clinical use of sequencing based tests (from single gene to genomes) depends on the accuracy and consistency of variant interpretation. Aiming to improve the interpretation process through practice guidelines, the American College of Medical Genetics and Genomics (ACMG) and the Association for Molecular Pathology (AMP) have published standards and guidelines for the interpretation of sequence variants. However, manual application of the guidelines is tedious and prone to human error. Web-based tools and software systems may not only address this problem but also document reasoning and supporting evidence, thus enabling transparency of evidence-based reasoning and resolution of discordant interpretations.

Results

In this report, we describe the design, implementation, and initial testing of the Clinical Genome Resource (ClinGen) Pathogenicity Calculator, a configurable system and web service for the assessment of pathogenicity of Mendelian germline sequence variants. The system allows users to enter the applicable ACMG/AMP-style evidence tags for a specific allele with links to supporting data for each tag and generate guideline-based pathogenicity assessment for the allele. Through automation and comprehensive documentation of evidence codes, the system facilitates more accurate application of the ACMG/AMP guidelines, improves standardization in variant classification, and facilitates collaborative resolution of discordances. The rules of reasoning are configurable with gene-specific or disease-specific guideline variations (e.g. cardiomyopathy-specific frequency thresholds and functional assays). The software is modular, equipped with robust application program interfaces (APIs), and available under a free open source license and as a cloud-hosted web service, thus facilitating both stand-alone use and integration with existing variant curation and interpretation systems. The Pathogenicity Calculator is accessible at http://calculator.clinicalgenome.org .

Conclusions

By enabling evidence-based reasoning about the pathogenicity of genetic variants and by documenting supporting evidence, the Calculator contributes toward the creation of a knowledge commons and more accurate interpretation of sequence variants in research and clinical care.",2017-01-12 +31675852,Ensemble first-principles molecular dynamics simulations of water using the SCAN meta-GGA density functional.,"We present an ensemble of 16 independent first-principles molecular dynamics simulations of water performed using the Strongly Constrained and Appropriately Normed (SCAN) meta-generalized gradient approximation exchange-correlation functional. These simulations were used to compute the structural and electronic properties of liquid water, as well as polarizabilities, Raman and infrared spectra. Overall, we find that the SCAN functional used at a simulation temperature of 330 K provides an accurate description of the structural and electronic properties of water while incurring a moderate computational cost. The availability of an ensemble of independent simulations provides a quantitative estimate of the uncertainty in computed structural and electronic properties. Results are also compared with a similar dataset generated using the Perdew, Burke, and Ernzerhof exchange-correlation functional at a temperature of 400 K. All simulation data and trajectories are available at http://quantum-simulation.org.",2019-10-01 +30936559,A comparison of single-cell trajectory inference methods.,"Trajectory inference approaches analyze genome-wide omics data from thousands of single cells and computationally infer the order of these cells along developmental trajectories. Although more than 70 trajectory inference tools have already been developed, it is challenging to compare their performance because the input they require and output models they produce vary substantially. Here, we benchmark 45 of these methods on 110 real and 229 synthetic datasets for cellular ordering, topology, scalability and usability. Our results highlight the complementarity of existing tools, and that the choice of method should depend mostly on the dataset dimensions and trajectory topology. Based on these results, we develop a set of guidelines to help users select the best method for their dataset. Our freely available data and evaluation pipeline ( https://benchmark.dynverse.org ) will aid in the development of improved tools designed to analyze increasingly large and complex single-cell datasets.",2019-04-01 +29145823,dbMDEGA: a database for meta-analysis of differentially expressed genes in autism spectrum disorder.,"

Background

Autism spectrum disorders (ASD) are hereditary, heterogeneous and biologically complex neurodevelopmental disorders. Individual studies on gene expression in ASD cannot provide clear consensus conclusions. Therefore, a systematic review to synthesize the current findings from brain tissues and a search tool to share the meta-analysis results are urgently needed.

Methods

Here, we conducted a meta-analysis of brain gene expression profiles in the current reported human ASD expression datasets (with 84 frozen male cortex samples, 17 female cortex samples, 32 cerebellum samples and 4 formalin fixed samples) and knock-out mouse ASD model expression datasets (with 80 collective brain samples). Then, we applied R language software and developed an interactive shared and updated database (dbMDEGA) displaying the results of meta-analysis of data from ASD studies regarding differentially expressed genes (DEGs) in the brain.

Results

This database, dbMDEGA ( https://dbmdega.shinyapps.io/dbMDEGA/ ), is a publicly available web-portal for manual annotation and visualization of DEGs in the brain from data from ASD studies. This database uniquely presents meta-analysis values and homologous forest plots of DEGs in brain tissues. Gene entries are annotated with meta-values, statistical values and forest plots of DEGs in brain samples. This database aims to provide searchable meta-analysis results based on the current reported brain gene expression datasets of ASD to help detect candidate genes underlying this disorder.

Conclusion

This new analytical tool may provide valuable assistance in the discovery of DEGs and the elucidation of the molecular pathogenicity of ASD. This database model may be replicated to study other disorders.",2017-11-16 +30985146,ProteinExplorer: A Repository-Scale Resource for Exploration of Protein Detection in Public Mass Spectrometry Data Sets.,"High-throughput tandem mass spectrometry has enabled the detection and identification of over 75% of all proteins predicted to result in translated gene products in the human genome. In fact, the galloping rate of data acquisition and sharing of mass spectrometry data has led to the current availability of many tens of terabytes of public data in thousands of human data sets. The systematic reanalysis of these public data sets has been used to build a community-scale spectral library of 2.1 million precursors for over 1 million unique sequences from over 19,000 proteins (including spectra of synthetic peptides). However, it has remained challenging to find and inspect spectra of peptides covering functional protein regions or matching novel proteins. ProteinExplorer addresses these challenges with an intuitive interface mapping tens of millions of identifications to functional sites on nearly all human proteins while maintaining provenance for every identification back to the original data set and data file. Additionally, ProteinExplorer facilitates the selection and inspection of HPP-compliant peptides whose spectra can be matched to spectra of synthetic peptides and already includes HPP-compliant evidence for 107 missing (PE2, PE3, and PE4) and 23 dubious (PE5) proteins. Finally, ProteinExplorer allows users to rate spectra and to contribute to a community library of peptides entitled PrEdict (Protein Existance dictionary) mapping to novel proteins but whose preliminary identities have not yet been fully established with community-scale false discovery rates and synthetic peptide spectra. ProteinExplorer can be now be accessed at https://massive.ucsd.edu/ProteoSAFe/protein_explorer_splash.jsp .",2018-10-15 +32111728,The Integrated RNA Landscape of Renal Preconditioning against Ischemia-Reperfusion Injury.,"

Background

Although AKI lacks effective therapeutic approaches, preventive strategies using preconditioning protocols, including caloric restriction and hypoxic preconditioning, have been shown to prevent injury in animal models. A better understanding of the molecular mechanisms that underlie the enhanced resistance to AKI conferred by such approaches is needed to facilitate clinical use. We hypothesized that these preconditioning strategies use similar pathways to augment cellular stress resistance.

Methods

To identify genes and pathways shared by caloric restriction and hypoxic preconditioning, we used RNA-sequencing transcriptome profiling to compare the transcriptional response with both modes of preconditioning in mice before and after renal ischemia-reperfusion injury.

Results

The gene expression signatures induced by both preconditioning strategies involve distinct common genes and pathways that overlap significantly with the transcriptional changes observed after ischemia-reperfusion injury. These changes primarily affect oxidation-reduction processes and have a major effect on mitochondrial processes. We found that 16 of the genes differentially regulated by both modes of preconditioning were strongly correlated with clinical outcome; most of these genes had not previously been directly linked to AKI.

Conclusions

This comparative analysis of the gene expression signatures in preconditioning strategies shows overlapping patterns in caloric restriction and hypoxic preconditioning, pointing toward common molecular mechanisms. Our analysis identified a limited set of target genes not previously known to be associated with AKI; further study of their potential to provide the basis for novel preventive strategies is warranted. To allow for optimal interactive usability of the data by the kidney research community, we provide an online interface for user-defined interrogation of the gene expression datasets (http://shiny.cecad.uni-koeln.de:3838/IRaP/).",2020-02-28 +25964299,CATH FunFHMMer web server: protein functional annotations using functional family assignments.,"The widening function annotation gap in protein databases and the increasing number and diversity of the proteins being sequenced presents new challenges to protein function prediction methods. Multidomain proteins complicate the protein sequence-structure-function relationship further as new combinations of domains can expand the functional repertoire, creating new proteins and functions. Here, we present the FunFHMMer web server, which provides Gene Ontology (GO) annotations for query protein sequences based on the functional classification of the domain-based CATH-Gene3D resource. Our server also provides valuable information for the prediction of functional sites. The predictive power of FunFHMMer has been validated on a set of 95 proteins where FunFHMMer performs better than BLAST, Pfam and CDD. Recent validation by an independent international competition ranks FunFHMMer as one of the top function prediction methods in predicting GO annotations for both the Biological Process and Molecular Function Ontology. The FunFHMMer web server is available at http://www.cathdb.info/search/by_funfhmmer.",2015-05-11 +31366898,Daphnia stressor database: Taking advantage of a decade of Daphnia '-omics' data for gene annotation.,"Gene expression patterns help to measure and characterize the effect of environmental perturbations at the cellular and organism-level. Complicating interpretation is the presence of uncharacterized or ""hypothetical"" gene functions for a large percentage of genomes. This is particularly evident in Daphnia genomes, which contains many regions coding for ""hypothetical proteins"" and are significantly divergent from many of the available arthropod model species, but might be ecologically important. In the present study, we developed a gene expression database, the Daphnia stressor database (http://www.daphnia-stressordb.uni-hamburg.de/dsdbstart.php), built from 90 published studies on Daphnia gene expression. Using a comparative genomics approach, we used the database to annotate D. galeata transcripts. The extensive body of literature available for Daphnia species allowed to associate stressors with gene expression patterns. We believe that our stressor based annotation strategy allows for better understanding and interpretation of the functional role of the understudied hypothetical or uncharacterized Daphnia genes, thereby increasing our understanding of Daphnia's genetic and phenotypic variability.",2019-07-31 +31534992,Datasets for multi-scale diffraction analysis (synchrotron XRD and EBSD) of twinning-detwinning during tensile-compressive deformation of AZ31B magnesium alloy samples.,"Diffraction data were collected using synchrotron X-ray scattering (sXRD) and electron back-scattered diffraction (EBSD) during in situ tensile-compressive deformation of Mg alloy AZ31B dogbone samples. The onset and evolution of twinning and detwinning were monitored based on intensity changes in sXRD 2D scattering patterns (which also provided average elastic strain values through the calculation of orientation-specific lattice spacing changes), and EBSD, that revealed the micro-scale grain morphology changes. The observations were interpreted and analysed with the help of crystal plasticity finite element modelling (CP-FEM), as reported in the published article (https://doi.org/10.1016/j.ijplas.2019.02.018).",2019-08-28 +25228593,RADB: a database of rheumatoid arthritis-related polymorphisms. ,"Rheumatoid arthritis (RA) is an autoimmune disease that has a complex genetic basis. Therefore, it is important to explore the genetic background of RA. The extensive recent application of polymorphic genetic markers, especially single nucleotide polymorphisms, has presented us with a large quantity of genetic data. In this study, we developed the Database of Rheumatoid Arthritis-related Polymorphisms (RADB), to integrate all the RA-related genetic polymorphisms and provide a useful resource for researchers. We manually extracted the RA-related polymorphisms from 686 published reports, including RA susceptibility loci, polymorphisms associated with particular clinical features of RA, polymorphisms associated with drug response in RA and polymorphisms associated with a higher risk of cardiovascular disease in RA. Currently, RADB V1.0 contains 3235 polymorphisms that are associated with 636 genes and refer to 68 countries. The detailed information extracted from the literature includes basic information about the articles (e.g., PubMed ID, title and abstract), population information (e.g., country, geographic area and sample size) and polymorphism information (e.g., polymorphism name, gene, genotype, odds ratio and 95% confidence interval, P-value and risk allele). Meanwhile, useful annotations, such as hyperlinks to dbSNP, GenBank, UCSC, Gene Ontology and Kyoto Encyclopedia of Genes and Genomes pathway, are included. In addition, a tool for meta-analysis was developed to summarize the results of multiple studies. The database is freely available at http://www.bioapp.org/RADB. Database URL: http://www.bioapp.org/RADB.",2014-09-15 +32367143,"Not just a research method: If used with caution, can job-exposure matrices be a useful tool in the practice of occupational medicine and public health?","The recent editorial by Dr Susan Peters ""Although a valuable method in occupational epidemiology, job-exposure matrices are no magic fix"" ably describes the strengths and limitations of job-exposure matrix (JEM) approaches in occupational epidemiology research (1). In addition to their use in research, we would like to add that JEM may also be of use in compensation and surveillance efforts in occupational health. JEM could assist the compensation process by supporting the assessment of relevant exposures related to specific health conditions (2). The potential usefulness of a JEM as a decision tool for compensation of work-related musculoskeletal disorders has been examined (3). Because occupational diseases are often under-recognized, another practical application is using a JEM to screen for occupational exposures as part of health surveillance. Use of JEM to screen for asbestos and wood dust exposure in the clinical setting has shown promising results (4-6). By summarizing multiple exposures at a job level (7), JEM may also assist policy-makers in setting priorities for hazards and controls at work, as well as occupational practitioners to target prevention efforts and direct the conduct of more precise exposure measures to particular jobs. Sharing JEM across different countries may be useful in providing estimates of exposures across larger populations to calculate global burden of disease related to occupational exposure. The JEMINI (JEM InterNatIonal) initiative was launched to explore the possibility of developing international JEM that could be used across countries (8). Beginning with physical (biomechanical) exposures, this open group has started homogenizing job coding systems and comparing some available JEM. Estimating differences in the level of exposure between countries will require much more work, without guaranteed success. As Peters mentioned, many limitations exist in the use of JEM. Users of JEM must consider the source of exposure data - expert assessments, data collected from individual workers, or environmental sampling. The coding of occupations is time consuming and can introduce error (9), and more testing of and comparison with automated job coding systems is needed (10). JEM reflect an ""average"" level of exposure within a job at the expense of individual variation. At population level, JEM can offer a useful estimate of exposures. If used at an individual level in a clinical or compensation setting, JEM cannot replace the professionals involved in exposure assessment but may help them focus their action more effectively on complex situations that require their expertise. In conclusion, these JEM developed for research might also be used as a public health tool, provided that their limitations are properly taken into account. References 1. Peters S. Although a valuable method in occupational epidemiology, job-exposure matrices are no magic fix. Scand J Work Environ Health 2020;46:2314. https://doi.org/10.5271/sjweh.3894 2. Kerbrat J, Descatha A. (The recognition of health consequences of difficult working conditions in France and its evaluation with the use of a job-exposure matrix). Arch Mal Prof Environ. 2018;79:493500. https://doi.org/10.1016/j.admp.2017.12.001 3. Fadel M, Valter R, Quignette A, Descatha A. Usefulness of a job-exposure matrix « MADE » as a decision tool for compensation of work-related musculoskeletal disorders. Eur J Public Health 2019;29:86870. https://doi.org/10.1093/eurpub/cky274 4. Lorentz E, Despreaux T, Quignette A, Chinet T, Descatha A. (Screening of occupational exposure to asbestos and silica by job-exposure matrix among patients with lung cancer and mesothelioma). Rev Mal Respir. 2019;36:108895. https://doi.org/10.1016/j.rmr.2019.08.006 5. Imbernon E, Goldberg M, Spyckerell Y, Steinmetz J, Bonenfant S, Fournier B. (Use of a job-exposure matrix for the screening of occupational exposure to asbestos). Rev Epidemiol Sante Publique 2004;52:717. https://doi.org/10.1016/S0398-7620(04)99018-9 6. Carton M, Bonnaud S, Nachtigal M, Serrano A, Carole C, Bonenfant S, et al. Post-retirement surveillance of workers exposed to asbestos or wood dust: first results of the French national SPIRALE Program. Epidemiol Prev. 2011;35:31523.   7. Guéguen A, Goldberg M, Bonenfant S, Martin JC. Using a representative sample of workers for constructing the SUMEX French general population based job-exposure matrix. Occup Environ Med. 2004;61:58693. https://doi.org/10.1136/oem.2003.010660 8. Descatha A, Evanoff BA, Andersen JH, Fadel M, Ngabirano L, Leclerc A, et al. JEMINI (Job Exposure Matrix InterNatIonal) Initiative: a Utopian Possibility for Helping Occupational Exposure Assessment All Around the World? J Occup Environ Med. 2019;61:e3201. https://doi.org/10.1097/JOM.0000000000001631 9. Petersen SB, Flachs EM, Svendsen SW, Marott JL, Budtz-Jørgensen E, Hansen J, et al. Influence of errors in job codes on job exposure matrix-based exposure assessment in the register-based occupational cohort DOC*X. Scand J Work Environ Health 2020;46:25967. https://doi.org/10.5271/sjweh.3857 10. Buckner-Petty S, Dale AM, Evanoff BA. Efficiency of autocoding programs for converting job descriptors into standard occupational classification (SOC) codes. Am J Ind Med. 2019;62:5968. https://doi.org/10.1002/ajim.22928.",2020-05-05 +29596615,ONETOOL for the analysis of family-based big data.,"Motivation:Despite the need for separate tools to analyze family-based data, there are only a handful of tools optimized for family-based big data compared to the number of tools available for analyzing population-based data. Results:ONETOOL implements the properties of well-known existing family data analysis tools and recently developed methods in a computationally efficient manner, and so is suitable for analyzing the vast amount of variant data available from sequencing family members, providing a rich choice of analysis methods for big data on families. Availability and implementation:ONETOOL is freely available from http://healthstat.snu.ac.kr/software/onetool/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +31960201,Cirrhotic Patients on Mechanical Ventilation Have a Low Rate of Successful Extubation and Survival.,"

Background and aims

We hypothesized that mechanically ventilated cirrhotic patients not only have poor outcomes, but also that certain clinical variables are likely to be associated with mortality. We aimed to describe the predictors of mortality in these patients.

Methods

This observational study examined 113 mechanically ventilated cirrhotic patients cared for at our institution between July 1, 2014, and February 28, 2018. We performed bivariate and multivariate analyses to identify risk factors for mortality on mechanical ventilation and created an equation to calculate probability of mortality based on these variables.

Results

Seventy percent of patients had a history of a decompensating event. Altered mental status was the most frequently encountered indication for intubation (46%). 53% patients died on mechanical ventilation. After controlling for variables associated with increased mortality, multivariate analysis revealed that vasopressor use was the strongest predictor of mortality on mechanical ventilation (OR = 9.3) followed by sepsis (OR = 4.1). A formula with an area under the curve of 0.85 was obtained in order to predict the probability of mortality for cirrhotic patients on mechanical ventilation (available at https://medweb.musc.edu/mvcp/ ). This model (AUC = 0.85) outperformed the CLIF-SOFA score (AUC = 0.68) in predicting mortality in this cohort.

Conclusion

Cirrhotic patients requiring mechanical ventilation have an extremely poor prognosis, and in patients requiring vasopressors, having a history of decompensation, sepsis or low albumin, mortality is higher. Our data points to the clinical variables should be considered in the medical management of these patients and provide physicians with a formula to predict the probability of mortality.",2020-01-20 +31661165,Why Do Intravascular Schistosomes Coat Themselves in Glycolytic Enzymes?,"Schistosomes are intravascular parasitic helminths (blood flukes) that infect more than 200 million people globally. Proteomic analysis of the tegument (skin) of these worms has revealed the surprising presence of glycolytic enzymes on the parasite's external surface. Immunolocalization data as well as enzyme activity displayed by live worms confirm that functional glycolytic enzymes are indeed expressed at the host-parasite interface. Since these enzymes are traditionally considered to function intracellularly to drive glycolysis, in an extracellular location they are hypothesized to engage in novel ""moonlighting"" functions such as immune modulation and blood clot dissolution that promote parasite survival. For instance, several glycolytic enzymes can interact with plasminogen and promote its activation to the thrombolytic plasmin; some can inhibit complement function; some induce B cell proliferation or macrophage apoptosis. Several pathogenic bacteria and protists also express glycolytic enzymes externally, suggesting that moonlighting functions of extracellular glycolytic enzymes can contribute broadly to pathogen virulence. Also see the video abstract here https://youtu.be/njtWZ2y3k_I.",2019-10-29 +21546359,TparvaDB: a database to support Theileria parva vaccine development.,"We describe the development of TparvaDB, a comprehensive resource to facilitate research towards development of an East Coast fever vaccine, by providing an integrated user-friendly database of all genome and related data currently available for Theileria parva. TparvaDB is based on the Generic Model Organism Database (GMOD) platform. It contains a complete reference genome sequence, Expressed Sequence Tags (ESTs), Massively Parallel Signature Sequencing (MPSS) expression tag data and related information from both public and private repositories. The Artemis annotation workbench provides online annotation functionality. TparvaDB represents a resource that will underpin and promote ongoing East Coast fever vaccine development and biological research. Database URL: http://tparvadb.ilri.cgiar.org.",2011-05-04 +24185697,The Candida Genome Database: the new homology information page highlights protein similarity and phylogeny.,"The Candida Genome Database (CGD, http://www.candidagenome.org/) is a freely available online resource that provides gene, protein and sequence information for multiple Candida species, along with web-based tools for accessing, analyzing and exploring these data. The goal of CGD is to facilitate and accelerate research into Candida pathogenesis and biology. The CGD Web site is organized around Locus pages, which display information collected about individual genes. Locus pages have multiple tabs for accessing different types of information; the default Summary tab provides an overview of the gene name, aliases, phenotype and Gene Ontology curation, whereas other tabs display more in-depth information, including protein product details for coding genes, notes on changes to the sequence or structure of the gene and a comprehensive reference list. Here, in this update to previous NAR Database articles featuring CGD, we describe a new tab that we have added to the Locus page, entitled the Homology Information tab, which displays phylogeny and gene similarity information for each locus.",2013-10-31 +32657691,Genomic research and privacy: A response to Staunton et al.,"The Protection of Personal Information Act No. 4 of 2013 (POPIA) promises a new dispensation of privacy protection for research participants in South Africa. In a recent article, Staunton et al. proposed that a purposive interpretation of POPIA would allow for the retention of the status quo of broad consent in the context of genomic research. In this response article, we analyse the argument presented by Staunton et al., and conclude that it fails to convince: firstly, because Staunton et al. do not present empirical data for their factual assumption that moving up the consent benchmark is likely to stymie research; secondly, because genomic research does not have a monopoly on the public interest, but shares it with the privacy rights of research participants; and thirdly, because POPIA was designed to promote the protection of privacy, not simply to preserve the status quo as found in existing policy instruments. In contrast to the position advocated by Staunton et al., we suggest that a purposive interpretation of POPIA is aligned with the plain meaning of the statute - namely that specific (not broad) consent is a prerequisite for research on genomic information.This article, which comments on an article by Staunton et al. (Staunton C, Adams R, Botes M, et al. Safeguarding the future of genomic research in South Africa: Broad consent and the Protection of Personal Information Act No. 4 of 2013. S Afr Med J 2019;109(7):468-470. https://doi.org/10.7196/SAMJ.2019.v109i7.14148), is followed by a letter by Thaldar and Townsend (Privacy rights of human research participants in South Africa must be taken seriously. S Afr Med J 2020;110(3):175-176. https://doi.org/10.7196/SAMJ.2020.v110i3.14450); and a response to the article and letter by Staunton et al. (S Afr Med J 2020;110(3):175-176. https://doi.org/10.7196/SAMJ.2020.v110i3.14450).",2020-02-26 +,Nucleotide-Level Variant Analysis of Next-Generation Sequencing Data Using a Cloud-Based Data Analysis Pipeline,"To demonstrate the flexibility of a cloud-based solution for analyzing disparate sets of next-generation sequencing data, we looked at carefully chosen samples across different populations from the 1,000 Genomes Project (www.1000genomes.org) and conducted an extensive analysis on two Chinese populations, the “Chinese in Beijing” (CHB) and the “Chinese in metropolitan Denver” (CHD), each consisting of 28 exomes. Each dataset was uploaded into the system using raw data files acquired from the 1,000 Genomes Project. Using these data and a cloud-based data analysis pipeline, we performed a nucleotide-level variant analysis combined with a population allele frequency analysis across all samples for the two populations. To identify alleles that are significantly different across the two populations, a Pearson's chi-square test was applied, which resulted in a total of 1.5 Mio SNPs, of which 84 were non-synonymous with a p-value of less than 0.01. Interestingly, the genes associated with non-synonymous variants of the Chinese in metropolitan Denver population were enriched for biological annotations such as endocrine system disorder, metabolic disease, cardiac fibrosis, and inflammation (includes ZNF264, RPS6KA2, ROBO2, CRK, MUSK, CBL, CRK, and others). Furthermore, genes usually associated with liver injury were also identified for this population, suggesting the liver is exposed to toxic agents more so in this population compared to the CHB population. The observed genomic differences in these two different Chinese populations living in different parts of the world hint towards a potential link between nutrition and different diseases (e.g. heart disease or metabolic diseases). Using this analysis as a case study, we will demonstrate how a scalable computational infrastructure can provide researchers and sequencing service providers alike, a cost effective and secure cloud-based computing platform as a powerful and collaborative technology solution for large scale sequence data analysis and management.",2011-10-01 +24243840,The Reactome pathway knowledgebase.,"Reactome (http://www.reactome.org) is a manually curated open-source open-data resource of human pathways and reactions. The current version 46 describes 7088 human proteins (34% of the predicted human proteome), participating in 6744 reactions based on data extracted from 15 107 research publications with PubMed links. The Reactome Web site and analysis tool set have been completely redesigned to increase speed, flexibility and user friendliness. The data model has been extended to support annotation of disease processes due to infectious agents and to mutation.",2013-11-15 +29040459,UC2 search: using unique connectivity of uncharged compounds for metabolite annotation by database searching in mass spectrometry-based metabolomics.,"

Summary

For metabolite annotation in metabolomics, variations in the registered states of compounds (charged molecules and multiple components, such as salts) and their redundancy among compound databases could be the cause of misannotations and hamper immediate recognition of the uniqueness of metabolites while searching by mass values measured using mass spectrometry. We developed a search system named UC2 (Unique Connectivity of Uncharged Compounds), where compounds are tentatively neutralized into uncharged states and stored on the basis of their unique connectivity of atoms after removing their stereochemical information using the first block in the hash of the IUPAC International Chemical Identifier, by which false-positive hits are remarkably reduced, both charged and uncharged compounds are properly searched in a single query and records having a unique connectivity are compiled in a single search result.

Availability and implementation

The UC2 search tool is available free of charge as a REST web service (http://webs2.kazusa.or.jp/mfsearcher) and a Java-based GUI tool.

Contact

sakurai@kazusa.or.jp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-02-01 +24174536,"GeneProf data: a resource of curated, integrated and reusable high-throughput genomics experiments.","GeneProf Data (http://www.geneprof.org) is an open web resource for analysed functional genomics experiments. We have built up a large collection of completely processed RNA-seq and ChIP-seq studies by carefully and transparently reanalysing and annotating high-profile public data sets. GeneProf makes these data instantly accessible in an easily interpretable, searchable and reusable manner and thus opens up the path to the advantages and insights gained from genome-scale experiments to a broader scientific audience. Moreover, GeneProf supports programmatic access to these data via web services to further facilitate the reuse of experimental data across tools and laboratories.",2013-10-29 +31584270,Novel Consensus Architecture To Improve Performance of Large-Scale Multitask Deep Learning QSAR Models.,"Advances in the development of high-throughput screening and automated chemistry have rapidly accelerated the production of chemical and biological data, much of them freely accessible through literature aggregator services such as ChEMBL and PubChem. Here, we explore how to use this comprehensive mapping of chemical biology space to support the development of large-scale quantitative structure-activity relationship (QSAR) models. We propose a new deep learning consensus architecture (DLCA) that combines consensus and multitask deep learning approaches together to generate large-scale QSAR models. This method improves knowledge transfer across different target/assays while also integrating contributions from models based on different descriptors. The proposed approach was validated and compared with proteochemometrics, multitask deep learning, and Random Forest methods paired with various descriptors types. DLCA models demonstrated improved prediction accuracy for both regression and classification tasks. The best models together with their modeling sets are provided through publicly available web services at https://predictor.ncats.io .",2019-10-25 +24803509,RAID: a comprehensive resource for human RNA-associated (RNA-RNA/RNA-protein) interaction.,"Transcriptomic analyses have revealed an unexpected complexity in the eukaryote transcriptome, which includes not only protein-coding transcripts but also an expanding catalog of noncoding RNAs (ncRNAs). Diverse coding and noncoding RNAs (ncRNAs) perform functions through interaction with each other in various cellular processes. In this project, we have developed RAID (http://www.rna-society.org/raid), an RNA-associated (RNA-RNA/RNA-protein) interaction database. RAID intends to provide the scientific community with all-in-one resources for efficient browsing and extraction of the RNA-associated interactions in human. This version of RAID contains more than 6100 RNA-associated interactions obtained by manually reviewing more than 2100 published papers, including 4493 RNA-RNA interactions and 1619 RNA-protein interactions. Each entry contains detailed information on an RNA-associated interaction, including RAID ID, RNA/protein symbol, RNA/protein categories, validated method, expressing tissue, literature references (Pubmed IDs), and detailed functional description. Users can query, browse, analyze, and manipulate RNA-associated (RNA-RNA/RNA-protein) interaction. RAID provides a comprehensive resource of human RNA-associated (RNA-RNA/RNA-protein) interaction network. Furthermore, this resource will help in uncovering the generic organizing principles of cellular function network.",2014-05-06 +29164602,18F PET with flutemetamol for the early diagnosis of Alzheimer's disease dementia and other dementias in people with mild cognitive impairment (MCI).,"

Background

18F-flutemetamol uptake by brain tissue, measured by positron emission tomography (PET), is accepted by regulatory agencies like the Food and Drug Administration (FDA) and the European Medicine Agencies (EMA) for assessing amyloid load in people with dementia. Its added value is mainly demonstrated by excluding Alzheimer's pathology in an established dementia diagnosis. However, the National Institute on Aging and Alzheimer's Association (NIA-AA) revised the diagnostic criteria for Alzheimer's disease and the confidence in the diagnosis of mild cognitive impairment (MCI) due to Alzheimer's disease may be increased when using some amyloid biomarkers tests like 18F-flutemetamol. These tests, added to the MCI core clinical criteria, might increase the diagnostic test accuracy (DTA) of a testing strategy. However, the DTA of 18F-flutemetamol to predict the progression from MCI to Alzheimer's disease dementia (ADD) or other dementias has not yet been systematically evaluated.

Objectives

To determine the DTA of the 18F-flutemetamol PET scan for detecting people with MCI at time of performing the test who will clinically progress to ADD, other forms of dementia (non-ADD) or any form of dementia at follow-up.

Search methods

The most recent search for this review was performed in May 2017. We searched MEDLINE (OvidSP), Embase (OvidSP), PsycINFO (OvidSP), BIOSIS Citation Index (Thomson Reuters Web of Science), Web of Science Core Collection, including the Science Citation Index (Thomson Reuters Web of Science) and the Conference Proceedings Citation Index (Thomson Reuters Web of Science), LILACS (BIREME), CINAHL (EBSCOhost), ClinicalTrials.gov (https://clinicaltrials.gov), and the World Health Organization International Clinical Trials Registry Platform (WHO ICTRP) (http://www.who.int/ictrp/search/en/). We also searched ALOIS, the Cochrane Dementia & Cognitive Improvement Group's specialised register of dementia studies (http://www.medicine.ox.ac.uk/alois/). We checked the reference lists of any relevant studies and systematic reviews, and performed citation tracking using the Science Citation Index to identify any additional relevant studies. No language or date restrictions were applied to the electronic searches.

Selection criteria

We included studies that had prospectively defined cohorts with any accepted definition of MCI at time of performing the test and the use of 18F-flutemetamol scan to evaluate the DTA of the progression from MCI to ADD or other forms of dementia. In addition, we only selected studies that applied a reference standard for Alzheimer's dementia diagnosis, for example, National Institute of Neurological and Communicative Disorders and Stroke and the Alzheimer's Disease and Related Disorders Association (NINCDS-ADRDA) or Diagnostic and Statistical Manual of Mental Disorders-IV (DSM-IV) criteria.

Data collection and analysis

We screened all titles and abstracts identified in electronic-database searches. Two review authors independently selected studies for inclusion and extracted data to create two-by-two tables, showing the binary test results cross-classified with the binary reference standard. We used these data to calculate sensitivities, specificities, and their 95% confidence intervals. Two independent assessors performed quality assessment using the QUADAS-2 tool plus some additional items to assess the methodological quality of the included studies.

Main results

Progression from MCI to ADD was evaluated in 243 participants from two studies. The studies reported data on 19 participants with two years of follow-up and on 224 participants with three years of follow-up. Nine (47.4%) participants converted at two years follow-up and 81 (36.2%) converted at three years of follow-up.There were concerns about participant selection and sampling in both studies. The index test domain in one study was considered unclear and in the second study it was considered at low risk of bias. For the reference standard domain, one study was considered at low risk and the second study was considered to have an unclear risk of bias. Regarding the domains of flow and timing, both studies were considered at high risk of bias. MCI to ADD;Progression from MCI to ADD at two years of follow-up had a sensitivity of 89% (95% CI 52 to 100) and a specificity of 80% (95% CI 44 to 97) by quantitative assessment by SUVR (n = 19, 1 study).Progression from MCI to ADD at three years of follow-up had a sensitivity of 64% (95% CI 53 to 75) and a specificity of 69% (95% CI 60 to 76) by visual assessment (n = 224, 1 study).There was no information regarding the other two objectives in this systematic review (SR): progression from MCI to other forms of dementia and progression to any form of dementia at follow-up.

Authors' conclusions

Due to the varying sensitivity and specificity for predicting the progression from MCI to ADD and the limited data available, we cannot recommend routine use of 18F-flutemetamol in clinical practice. 18F-flutemetamol has high financial costs; therefore, clearly demonstrating its DTA and standardising the process of the 18F-flutemetamol modality is important prior to its wider use.",2017-11-22 +28155665,TEA: the epigenome platform for Arabidopsis methylome study.,"

Background

Bisulfite sequencing (BS-seq) has become a standard technology to profile genome-wide DNA methylation at single-base resolution. It allows researchers to conduct genome-wise cytosine methylation analyses on issues about genomic imprinting, transcriptional regulation, cellular development and differentiation. One single data from a BS-Seq experiment is resolved into many features according to the sequence contexts, making methylome data analysis and data visualization a complex task.

Results

We developed a streamlined platform, TEA, for analyzing and visualizing data from whole-genome BS-Seq (WGBS) experiments conducted in the model plant Arabidopsis thaliana. To capture the essence of the genome methylation level and to meet the efficiency for running online, we introduce a straightforward method for measuring genome methylation in each sequence context by gene. The method is scripted in Java to process BS-Seq mapping results. Through a simple data uploading process, the TEA server deploys a web-based platform for deep analysis by linking data to an updated Arabidopsis annotation database and toolkits.

Conclusions

TEA is an intuitive and efficient online platform for analyzing the Arabidopsis genomic DNA methylation landscape. It provides several ways to help users exploit WGBS data. TEA is freely accessible for academic users at: http://tea.iis.sinica.edu.tw .",2016-12-22 +28645144,Increased alignment sensitivity improves the usage of genome alignments for comparative gene annotation.,"Genome alignments provide a powerful basis to transfer gene annotations from a well-annotated reference genome to many other aligned genomes. The completeness of these annotations crucially depends on the sensitivity of the underlying genome alignment. Here, we investigated the impact of the genome alignment parameters and found that parameters with a higher sensitivity allow the detection of thousands of novel alignments between orthologous exons that have been missed before. In particular, comparisons between species separated by an evolutionary distance of >0.75 substitutions per neutral site, like human and other non-placental vertebrates, benefit from increased sensitivity. To systematically test if increased sensitivity improves comparative gene annotations, we built a multiple alignment of 144 vertebrate genomes and used this alignment to map human genes to the other 143 vertebrates with CESAR. We found that higher alignment sensitivity substantially improves the completeness of comparative gene annotations by adding on average 2382 and 7440 novel exons and 117 and 317 novel genes for mammalian and non-mammalian species, respectively. Our results suggest a more sensitive alignment strategy that should generally be used for genome alignments between distantly-related species. Our 144-vertebrate genome alignment and the comparative gene annotations (https://bds.mpi-cbg.de/hillerlab/144VertebrateAlignment_CESAR/) are a valuable resource for comparative genomics.",2017-08-01 +25211071,The Integrative Human Microbiome Project: dynamic analysis of microbiome-host omics profiles during periods of human health and disease.,"Much has been learned about the diversity and distribution of human-associated microbial communities, but we still know little about the biology of the microbiome, how it interacts with the host, and how the host responds to its resident microbiota. The Integrative Human Microbiome Project (iHMP, http://hmp2.org), the second phase of the NIH Human Microbiome Project, will study these interactions by analyzing microbiome and host activities in longitudinal studies of disease-specific cohorts and by creating integrated data sets of microbiome and host functional properties. These data sets will serve as experimental test beds to evaluate new models, methods, and analyses on the interactions of host and microbiome. Here we describe the three models of microbiome-associated human conditions, on the dynamics of preterm birth, inflammatory bowel disease, and type 2 diabetes, and their underlying hypotheses, as well as the multi-omic data types to be collected, integrated, and distributed through public repositories as a community resource.",2014-09-01 +31102653,Lack of correlation of virulence gene profiles of Staphylococcus aureus bacteremia isolates with mortality.,"

Purpose

Whole genome sequencing (WGS) analysis of Staphylococcus aureus is increasingly used in clinical practice. Although bioinformatics tools used in WGS analysis readily define the S. aureus virulome, the clinical value of this type of analysis is unclear. Here, virulence genes in S. aureus bacteremia (SAB) isolates were evaluated by WGS, with superantigens (SAgs) further evaluated by conventional PCR and functional assays, and results correlated with mortality.

Methods

152 SAB isolates collected throughout 2015 at a large Minnesota medical center were studied and associated clinical data analyzed. Virulence genes were identified from previously-reported WGS data (https://doi.org/10.1371/journal.pone.0179003). SAg genes sea, seb, sec, sed, see, seg, seh, sei, sej, and tst were also assessed by individual PCR assays. Mitogenicity of SAgs was assessed using an in vitro proliferation assay with splenocytes from HLA-DR3 transgenic mice.

Results

Of the 152 SAB isolates studied, 106 (69%) were methicillin-susceptible S. aureus (MSSA). The number of deaths attributed and not attributed to SAB, and 30-day survivors were 24 (16%), 2 (1%), and 128 (83%), respectively. From WGS data, both MSSA and MRSA had high proportions of adhesion (>80%) and immune-evasion (>70%) genes. There was no difference in virulomes between survivor- and non-survivor-associated isolates. Although over 60% of SAB isolates produced functional SAgs, there were no differences in the distribution or prevalence of SAg genes between survivor- and non-survivor-associated isolates.

Conclusion

In this study of one year of SAB isolates from a large medical center, the S. aureus virulome, as assessed by WGS, and also for SAgs using individual PCRs and phenotypic characterization, did not impact mortality.",2019-05-15 +31874601,CSA: a web service for the complete process of ChIP-Seq analysis.,"

Background

Chromatin immunoprecipitation sequencing (ChIP-seq) is a technology that combines chromatin immunoprecipitation (ChIP) with next generation of sequencing technology (NGS) to analyze protein interactions with DNA. At present, most ChIP-seq analysis tools adopt the command line, which lacks user-friendly interfaces. Although some web services with graphical interfaces have been developed for ChIP-seq analysis, these sites cannot provide a comprehensive analysis of ChIP-seq from raw data to downstream analysis.

Results

In this study, we develop a web service for the whole process of ChIP-Seq Analysis (CSA), which covers mapping, quality control, peak calling, and downstream analysis. In addition, CSA provides a customization function for users to define their own workflows. And the visualization of mapping, peak calling, motif finding, and pathway analysis results are also provided in CSA. For the different types of ChIP-seq datasets, CSA can provide the corresponding tool to perform the analysis. Moreover, CSA can detect differences in ChIP signals between ChIP samples and controls to identify absolute binding sites.

Conclusions

The two case studies demonstrate the effectiveness of CSA, which can complete the whole procedure of ChIP-seq analysis. CSA provides a web interface for users, and implements the visualization of every analysis step. The website of CSA is available at http://CompuBio.csu.edu.cn.",2019-12-24 +25030112,DBSecSys: a database of Burkholderia mallei secretion systems.,"

Background

Bacterial pathogenicity represents a major public health concern worldwide. Secretion systems are a key component of bacterial pathogenicity, as they provide the means for bacterial proteins to penetrate host-cell membranes and insert themselves directly into the host cells' cytosol. Burkholderia mallei is a Gram-negative bacterium that uses multiple secretion systems during its host infection life cycle. To date, the identities of secretion system proteins for B. mallei are not well known, and their pathogenic mechanisms of action and host factors are largely uncharacterized.

Description

We present the Database of Burkholderia malleiSecretion Systems (DBSecSys), a compilation of manually curated and computationally predicted bacterial secretion system proteins and their host factors. Currently, DBSecSys contains comprehensive experimentally and computationally derived information about B. mallei strain ATCC 23344. The database includes 143 B. mallei proteins associated with five secretion systems, their 1,635 human and murine interacting targets, and the corresponding 2,400 host-B. mallei interactions. The database also includes information about 10 pathogenic mechanisms of action for B. mallei secretion system proteins inferred from the available literature. Additionally, DBSecSys provides details about 42 virulence attenuation experiments for 27 B. mallei secretion system proteins. Users interact with DBSecSys through a Web interface that allows for data browsing, querying, visualizing, and downloading.

Conclusions

DBSecSys provides a comprehensive, systematically organized resource of experimental and computational data associated with B. mallei secretion systems. It provides the unique ability to study secretion systems not only through characterization of their corresponding pathogen proteins, but also through characterization of their host-interacting partners.The database is available at https://applications.bhsai.org/dbsecsys.",2014-07-16 +31283967,SATQPCR: Website for statistical analysis of real-time quantitative PCR data.,"SATQPCR is a web tool providing statistical analysis of real-time quantitative PCR data including all MIQE rules (gene efficiency, selection of reference genes and normalization with them). Our application is a quick tool that provides to the biologist, graphs as well as statistical tables summarizing their results with the chosen methods (t-test or ANOVA with Tukey test). The application is available at http://satqpcr.sophia.inra.fr with a demo dataset. Source code can be found at https://framagit.org/. SUPPLEMENTARY INFORMATION: Tutorials at http://satqpcr.sophia.inra.fr/cgi/help.cgi.",2019-07-05 +,Reprint of ‘Tracking the blue: A MLST approach to characterise the Pseudomonas fluorescens group’,"The Pseudomonas fluorescens group comprises several closely related species that are involved in food contamination and spoilage. Specifically, the interest in P. fluorescens as a spoiler of dairy products increased after the cases of “blue mozzarella” that occurred in Italy in 2010.A Multilocus Sequence Typing (MLST) scheme was developed and applied to characterise 136 isolates (reference strains and food borne isolates) at strain level, to reveal the genetic relationships among them and to disclose any possible genetic clustering of phenotypic markers involved in food spoilage (protease, lipase, lecithinase activities and pigmented or fluorescent molecule production). The production of dark blue diffusible pigment was evaluated on several bacterial culture media and directly on mozzarella cheese.The MLST scheme provided precise genotyping at the strain level, and the population analyses of the concatenated sequences allowed major taxa to be defined. This approach was revealed to be suitable for tracking the strains according to their origin, such as dairy plants or food matrices. The genetic analysis revealed the presence of a connection between the blue pigment production and a specific phylogenetic cluster. The development of the online database specific to the P. fluorescens group (http://pubmlst.org/pfluorescens) will facilitate the application of the scheme and the sharing of the data.",2015-02-01 +29956270,Data Mining: Applying the AD&FTD Mutation Database to Progranulin.,"The online AD&FTD Mutation Database ( http://www.molgen.vib-ua.be/FTDmutations ) was conceived to meet the needs of a comprehensive knowledge base of genetic variations in genes associated with monogenic forms of Alzheimer's disease (AD) and frontotemporal dementia (FTD). Today, the AD&FTD Mutation Database provides curated, referenced information of 764 genetic variants in APP, PSEN1, and PSEN2 associated with AD and GRN, C9orf72, TBK1, MAPT, VCP, CHMP2B, TARDBP, and FUS associated with FTD and related diseases. In addition, the database stores demographic and clinicogenetic data of 1646 dementia families associated with these mutations. In FTD, the granulin (GRN) gene has the highest number of different mutations (79/231 = 34%) and the second highest number of associated FTD families after C9orf72. In addition to the detailed mutation and patient information, summary reports in tabular and graphical formats can be consulted. Further, all variants can be uploaded to the human genome browser for custom-designed analyses.",2018-01-01 +23798489,eFG: an electronic resource for Fusarium graminearum.,"Fusarium graminearum is a plant pathogen, which causes crop diseases and further leads to huge economic damage worldwide in past decades. Recently, the accumulation of different types of molecular data provides insights into the pathogenic mechanism of F. graminearum, and might help develop efficient strategies to combat this destructive fungus. Unfortunately, most available molecular data related to F. graminearum are distributed in various media, where each single source only provides limited information on the complex biological systems of the fungus. In this work, we present a comprehensive database, namely eFG (Electronic resource for Fusarium graminearum), to the community for further understanding this destructive pathogen. In particular, a large amount of functional genomics data generated by our group is deposited in eFG, including protein subcellular localizations, protein-protein interactions and orthologous genes in other model organisms. This valuable knowledge can not only help to disclose the molecular underpinnings of pathogenesis of the destructive fungus F. graminearum but also help the community to develop efficient strategies to combat this pathogen. To our best knowledge, eFG is the most comprehensive functional genomics database for F. graminearum until now. The eFG database is freely accessible at http://csb.shu.edu.cn/efg/ with a user-friendly and interactive interface, and all data can be downloaded freely. DATABASE URL: http://csb.shu.edu.cn/efg/",2013-06-22 +28626454,A Syst-OMICS Approach to Ensuring Food Safety and Reducing the Economic Burden of Salmonellosis.,"The Salmonella Syst-OMICS consortium is sequencing 4,500 Salmonella genomes and building an analysis pipeline for the study of Salmonella genome evolution, antibiotic resistance and virulence genes. Metadata, including phenotypic as well as genomic data, for isolates of the collection are provided through the Salmonella Foodborne Syst-OMICS database (SalFoS), at https://salfos.ibis.ulaval.ca/. Here, we present our strategy and the analysis of the first 3,377 genomes. Our data will be used to draw potential links between strains found in fresh produce, humans, animals and the environment. The ultimate goals are to understand how Salmonella evolves over time, improve the accuracy of diagnostic methods, develop control methods in the field, and identify prognostic markers for evidence-based decisions in epidemiology and surveillance.",2017-06-02 +30165565,admetSAR 2.0: web-service for prediction and optimization of chemical ADMET properties.,"

Summary

admetSAR was developed as a comprehensive source and free tool for the prediction of chemical ADMET properties. Since its first release in 2012 containing 27 predictive models, admetSAR has been widely used in chemical and pharmaceutical fields. This update, admetSAR 2.0, focuses on extension and optimization of existing models with significant quantity and quality improvement on training data. Now 47 models are available for either drug discovery or environmental risk assessment. In addition, we added a new module named ADMETopt for lead optimization based on predicted ADMET properties.

Availability and implementation

Free available on the web at http://lmmd.ecust.edu.cn/admetsar2/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +,Atmospheric Correction for Hyperspectral Ocean Color Retrieval with Application to the Hyperspectral Imager for the Coastal Ocean (HICO),"The classical multi-spectral Atmospheric Correction (AC) algorithm is inadequate for the new generation of spaceborne hyperspectral sensors such as NASA’s first hyperspectral Ocean Color Instrument (OCI) onboard the anticipated Plankton, Aerosol, Cloud, ocean Ecosystem (PACE) satellite mission. The AC process must estimate and remove the atmospheric path radiance contribution due to the Rayleigh scattering by air molecules and scattering by aerosols from the measured top-of-atmosphere (TOA) radiance, compensate for the absorption by atmospheric gases, and correct for reflection and refraction of the air-sea interface. In this work, we present and evaluate an improved AC for hyperspectral sensors developed within NASA’s SeaWiFS Data Analysis System software package (SeaDAS). The improvement is based on combining the classical AC approach of multi-spectral capabilities to correct for the atmospheric path radiance, extended to hyperspectral, with a gas correction algorithm to compensate for absorbing gases in the atmosphere, including water vapor. The SeaDAS-hyperspectral version is capable of operationally processing the AC of any hyperspectral airborne or spaceborne sensor. The new algorithm development was evaluated and assessed using the Hyperspectral Imager for Coastal Ocean (HICO) scenes collected at the Marine Optical BuoY (MOBY) site, and other SeaWiFS Bio-optical Archive and Storage System (SeaBASS) and AERosol Robotic NETwork - Ocean Color (AERONET-OC) coastal sites. A hyperspectral vicarious calibration was applied to HICO, showing the validity and consistency of HICO’s ocean color products. The hyperspectral AC capability is currently available in SeaDAS to the scientific community at https://oceancolor.gsfc.nasa.gov/.",2017-01-01 +31990795,Prevalence of Female Urinary Incontinence in Crossfit Practitioners and Associated Factors: An Internet Population-Based Survey.,"

Objectives

CrossFit comprises a set of high-intensity, high-impact exercises that includes movements that may increase intra-abdominal pressure and cause involuntary loss of urine. There is scant literature about the prevalence of urinary incontinence (UI) in female crossfitters, as well as its associated factors.

Methods

A population-based Internet survey stored in a website created with information on the benefits and risks of CrossFit for women's health (https://crosscontinencebr.wixsite.com/crosscontinencebr) invited female crossfitters. In total, 551 women answered an online questionnaire, and the demographic variables (age, marital status, and parity), anthropometric data (weight, height, and body mass index), and the presence of UI during exercises were also investigated. The prevalence of UI and its associated factors were calculated using a logistic regression model. The significance level was set at 5%.

Results

The overall prevalence of UI during CrossFit exercises was 29.95%, and most women with UI reported loss of urine during at least one exercise (16.70%). Women with UI were older (33.77 ± 8.03 years) than those without UI (30.63 ± 6.93 years; P < 0.001). Double under (20.15%) and single under (7.99%) were the exercises that were most frequently associated with UI and also the only variables that remained in the final model that caused UI. The duration of CrossFit practice, number of days per week practicing CrossFit, daily time practice, previous vaginal delivery, and mean birth weight were not statistically associated with UI.

Conclusions

One-third of female crossfitters presented with UI during exercise. Double under was the exercise that was the most associated with UI.",2020-02-01 +28824589,Fast and Simple Analysis of MiSeq Amplicon Sequencing Data with MetaAmp.,"Microbial community profiling by barcoded 16S rRNA gene amplicon sequencing currently has many applications in microbial ecology. The low costs of the parallel sequencing of multiplexed samples, combined with the relative ease of data processing and interpretation (compared to shotgun metagenomes) have made this an entry-level approach. Here we present the MetaAmp pipeline for processing of SSU rRNA gene and other non-coding or protein-coding amplicon sequencing data by investigators that are inexperienced with bioinformatics procedures. It accepts single-end or paired-end sequences in fasta or fastq format from various sequencing platforms. It includes read quality control, and merging of forward and reverse reads of paired-end reads. It makes use of UPARSE, Mothur, and the SILVA database for clustering, removal of chimeric reads, taxonomic classification, and generation of diversity metrics. The pipeline has been validated with a mock community of known composition. MetaAmp provides a convenient web interface as well as command line interface. It is freely available at: http://ebg.ucalgary.ca/metaamp. Since its launch 2 years ago, MetaAmp has been used >2,800 times, by many users worldwide.",2017-08-03 +30465426,Unipept 4.0: Functional Analysis of Metaproteome Data.,"Unipept ( https://unipept.ugent.be ) is a web application for metaproteome data analysis, with an initial focus on tryptic-peptide-based biodiversity analysis of MS/MS samples. Because the true potential of metaproteomics lies in gaining insight into the expressed functions of complex environmental samples, the 4.0 release of Unipept introduces complementary functional analysis based on GO terms and EC numbers. Integration of this new functional analysis with the existing biodiversity analysis is an important asset of the extended pipeline. As a proof of concept, a human faecal metaproteome data set from 15 healthy subjects was reanalyzed with Unipept 4.0, yielding fast, detailed, and straightforward characterization of taxon-specific catalytic functions that is shown to be consistent with previous results from a BLAST-based functional analysis of the same data.",2018-12-04 +28953918,Escape Excel: A tool for preventing gene symbol and accession conversion errors.,"

Background

Microsoft Excel automatically converts certain gene symbols, database accessions, and other alphanumeric text into dates, scientific notation, and other numerical representations. These conversions lead to subsequent, irreversible, corruption of the imported text. A recent survey of popular genomic literature estimates that one-fifth of all papers with supplementary gene lists suffer from this issue.

Results

Here, we present an open-source tool, Escape Excel, which prevents these erroneous conversions by generating an escaped text file that can be safely imported into Excel. Escape Excel is implemented in a variety of formats (http://www.github.com/pstew/escape_excel), including a command line based Perl script, a Windows-only Excel Add-In, an OS X drag-and-drop application, a simple web-server, and as a Galaxy web environment interface. Test server implementations are accessible as a Galaxy interface (http://apostl.moffitt.org) and simple non-Galaxy web server (http://apostl.moffitt.org:8000/).

Conclusions

Escape Excel detects and escapes a wide variety of problematic text strings so that they are not erroneously converted into other representations upon importation into Excel. Examples of problematic strings include date-like strings, time-like strings, leading zeroes in front of numbers, and long numeric and alphanumeric identifiers that should not be automatically converted into scientific notation. It is hoped that greater awareness of these potential data corruption issues, together with diligent escaping of text files prior to importation into Excel, will help to reduce the amount of Excel-corrupted data in scientific analyses and publications.",2017-09-27 +32088679,"2019-nCoV (Wuhan virus), a novel Coronavirus: human-to-human transmission, travel-related cases, and vaccine readiness.","On 31 December 2019 the Wuhan Health Commission reported a cluster of atypical pneumonia cases that was linked to a wet market in the city of Wuhan, China. The first patients began experiencing symptoms of illness in mid-December 2019. Clinical isolates were found to contain a novel coronavirus with similarity to bat coronaviruses. As of 28 January 2020, there are in excess of 4,500 laboratory-confirmed cases, with > 100 known deaths. As with the SARS-CoV, infections in children appear to be rare. Travel-related cases have been confirmed in multiple countries and regions outside mainland China including Germany, France, Thailand, Japan, South Korea, Vietnam, Canada, and the United States, as well as Hong Kong and Taiwan. Domestically in China, the virus has also been noted in several cities and provinces with cases in all but one provinence. While zoonotic transmission appears to be the original source of infections, the most alarming development is that human-to-human transmission is now prevelant. Of particular concern is that many healthcare workers have been infected in the current epidemic. There are several critical clinical questions that need to be resolved, including how efficient is human-to-human transmission? What is the animal reservoir? Is there an intermediate animal reservoir? Do the vaccines generated to the SARS-CoV or MERS-CoV or their proteins offer protection against 2019-nCoV? We offer a research perspective on the next steps for the generation of vaccines. We also present data on the use of in silico docking in gaining insight into 2019-nCoV Spike-receptor binding to aid in therapeutic development. Diagnostic PCR protocols can be found at https://www.who.int/health-topics/coronavirus/laboratory-diagnostics-for-novel-coronavirus.",2020-01-31 +28716001,Variant Ranker: a web-tool to rank genomic data according to functional significance.,"

Background

The increasing volume and complexity of high-throughput genomic data make analysis and prioritization of variants difficult for researchers with limited bioinformatics skills. Variant Ranker allows researchers to rank identified variants and determine the most confident variants for experimental validation.

Results

We describe Variant Ranker, a user-friendly simple web-based tool for ranking, filtering and annotation of coding and non-coding variants. Variant Ranker facilitates the identification of causal variants based on novelty, effect and annotation information. The algorithm implements and aggregates multiple prediction algorithm scores, conservation scores, allelic frequencies, clinical information and additional open-source annotations using accessible databases via ANNOVAR. The available information for a variant is transformed into user-specified weights, which are in turn encoded into the ranking algorithm. Through its different modules, users can (i) rank a list of variants (ii) perform genotype filtering for case-control samples (iii) filter large amounts of high-throughput data based on user custom filter requirements and apply different models of inheritance (iv) perform downstream functional enrichment analysis through network visualization. Using networks, users can identify clusters of genes that belong to multiple ontology categories (like pathways, gene ontology, disease categories) and therefore expedite scientific discoveries. We demonstrate the utility of Variant Ranker to identify causal genes using real and synthetic datasets. Our results indicate that Variant Ranker exhibits excellent performance by correctly identifying and ranking the candidate genes CONCLUSIONS: Variant Ranker is a freely available web server on http://paschou-lab.mbg.duth.gr/Software.html . This tool will enable users to prioritise potentially causal variants and is applicable to a wide range of sequencing data.",2017-07-17 +31667273,Inter-participant variability data in loading applied on osseointegrated implant by transtibial bone-anchored prostheses during daily activities.,"The data in this paper are related to the research article entitled ""Loading applied on osseointegrated implant by transtibial bone-anchored prostheses during daily activities: Preliminary characterization of prosthetic feet"" (Frossard et al., 2019: Accepted). This article contains the individual and grouped loading characteristics applied on transtibial osseointegrated implant generated while walking with bone-anchored prostheses including prosthetic feet with different index of anthropomorphicity. Inter-participant variability was presented for (A) the spatio-temporal characteristics, (B) the loading boundaries and (C) the loading local extremum during walking, ascending and descending ramp and stairs. These initial inter-participant variability benchmark datasets are critical to improve the efficacy and safety of prosthetic components for transtibial prostheses as well as the design of future automated algorithms and clinical trials. Online repository contains the files: https://doi.org/10.17632/vhc6sf7ngy.1.",2019-09-20 +31610622,FusionScan: accurate prediction of fusion genes from RNA-Seq data.,"Identification of fusion gene is of prominent importance in cancer research field because of their potential as carcinogenic drivers. RNA sequencing (RNA-Seq) data have been the most useful source for identification of fusion transcripts. Although a number of algorithms have been developed thus far, most programs produce too many false-positives, thus making experimental confirmation almost impossible. We still lack a reliable program that achieves high precision with reasonable recall rate. Here, we present FusionScan, a highly optimized tool for predicting fusion transcripts from RNA-Seq data. We specifically search for split reads composed of intact exons at the fusion boundaries. Using 269 known fusion cases as the reference, we have implemented various mapping and filtering strategies to remove false-positives without discarding genuine fusions. In the performance test using three cell line datasets with validated fusion cases (NCI-H660, K562, and MCF-7), FusionScan outperformed other existing programs by a considerable margin, achieving the precision and recall rates of 60% and 79%, respectively. Simulation test also demonstrated that FusionScan recovered most of true positives without producing an overwhelming number of false-positives regardless of sequencing depth and read length. The computation time was comparable to other leading tools. We also provide several curative means to help users investigate the details of fusion candidates easily. We believe that FusionScan would be a reliable, efficient and convenient program for detecting fusion transcripts that meet the requirements in the clinical and experimental community. FusionScan is freely available at http://fusionscan.ewha.ac.kr/.",2019-07-23 +31266745,A traditional hip implant is as effective as newer types for people over 65.,"The studyFawsitt C, Thom H, Hunt L. Choice of prosthetic implant combinations in total hip replacement: cost-effectiveness analysis using UK and Swedish hip joint registries data. Value Health 2019;22:303-12.This study was funded by the NIHR Research for Patient Benefit Programme (project number PB-PG-0613-31032).To read the full NIHR Signal, go to https://discover.dc.nihr.ac.uk/content/signal-000771/a-traditional-hip-implant-is-as-effective-as-more-expensive-newer-types-for-older-people.",2019-07-02 +30286773,The 3D Genome Browser: a web-based browser for visualizing 3D genome organization and long-range chromatin interactions.,"Here, we introduce the 3D Genome Browser, http://3dgenome.org , which allows users to conveniently explore both their own and over 300 publicly available chromatin interaction data of different types. We design a new binary data format for Hi-C data that reduces the file size by at least a magnitude and allows users to visualize chromatin interactions over millions of base pairs within seconds. Our browser provides multiple methods linking distal cis-regulatory elements with their potential target genes. Users can seamlessly integrate thousands of other omics data to gain a comprehensive view of both regulatory landscape and 3D genome structure.",2018-10-04 +31586118,Temperature affected guided wave propagation in a composite plate complementing the Open Guided Waves Platform.,"The influence of temperature is regarded as particularly important for a structural health monitoring system based on ultrasonic guided waves. Since the temperature effect causes stronger signal changes than a typical defect, the former must be addressed and compensated for reliable damage assessment. Development of new temperature compensation techniques as well as the comparison of existing algorithms require high-quality benchmark measurements. This paper investigates a carbon fiber reinforced plastic (CFRP) plate that was fully characterized in previous research in terms of stiffness tensor and guided wave propagation. The same CFRP plate is used here for the analysis of the temperature effect for a wide range of ultrasound frequencies and temperatures. The measurement data are a contribution to the Open Guided Waves (OGW) platform: http://www.open-guided-waves.de . The technical validation includes initial results on the analysis of phase velocity variations with temperature and exemplary damage detection results using state-of-the-art signal processing methods that aim to suppress the temperature effect.",2019-10-04 +31147718,RNAmod: an integrated system for the annotation of mRNA modifications.,"Dynamic and reversible RNA modifications such as N6-methyladenosine (m6A) can play important roles in regulating messenger RNA (mRNA) splicing, export, stability and translation. Defective mRNA modification through altered expression of the methyltransferase and/or demethylases results in developmental defects and cancer progression. Identifying modified mRNAs, annotating the distribution of modification sites across the mRNA, as well as characterizing and comparing other modification features are essential for studying the function and elucidating the mechanism of mRNA modifications. Several methods including methylated RNA immunoprecipitation and sequencing (MeRIP-seq) are available for the detection of mRNA modifications. However, a convenient and comprehensive tool to annotate diverse kinds of mRNA modifications in different species is lacking. Here, we developed RNAmod (https://bioinformatics.sc.cn/RNAmod), an interactive, one-stop, web-based platform for the automated analysis, annotation, and visualization of mRNA modifications in 21 species. RNAmod provides intuitive interfaces to show outputs including the distribution of RNA modifications, modification coverage for different gene features, functional annotation of modified mRNAs, and comparisons between different groups or specific gene sets. Furthermore, sites of known RNA modification, as well as binding site data for hundreds of RNA-binding proteins (RBPs) are integrated in RNAmod to help users compare their modification data with known modifications and to explore the relationship with the binding sites of known RBPs. RNAmod is freely available and meets the emerging need for a convenient and comprehensive analysis tool for the fast-developing RNA modification field.",2019-07-01 +30724836,RSVpredict: An Online Tool to Calculate the Likelihood of Respiratory Syncytial Virus Infection in Children Hospitalized With Acute Respiratory Disease.,"BACKGROUND:Respiratory syncytial virus (RSV) is the leading cause of acute lower respiratory tract infection in young children. Early detection of RSV infection can avoid unnecessary diagnostic and therapeutic intervention and is required to prevent the nosocomial spread of RSV infection in pediatric hospitals. We developed a web tool to calculate the probability of RSV infection in children hospitalized with acute respiratory tract infection (ARTI) (RSVpredict). METHODS:During winter seasons 2014/2015 to 2017/2018, 1545 children hospitalized with clinical symptoms of ARTI at the University Hospital Heidelberg/Germany were prospectively included. Medical information was reported on a standardized data sheet, and nasopharyngeal swabs were obtained for multiplex real-time polymerase chain reaction analyses. We applied logistic regression to develop a prediction model and developed a web-based application to predict the individual probability of RSV infection. RESULTS:Duration of clinical symptoms ≥2 days on admission, calendar month of admission, admission for lower respiratory tract infection, the presence of cough and rale and younger age were associated with RSV infection (P < 0.05). Those data were included in the prediction model (RSVpredict, https://web.imbi.uni-heidelberg.de/rsv/). RSVpredict is a web-based application to calculate the risk of RSV infection in children hospitalized with ARTI. The prediction model is based on easily accessible clinical symptoms and predicts the individual probability of RSV infection risk immediately. CONCLUSIONS:Pediatricians might use the RSVpredict to take informed decisions on further diagnostic and therapeutic intervention, including targeted RSV testing in children with relevant RSV infection risk.",2019-07-01 +29532461,"HpBase: A genome database of a sea urchin, Hemicentrotus pulcherrimus.","To understand the mystery of life, it is important to accumulate genomic information for various organisms because the whole genome encodes the commands for all the genes. Since the genome of Strongylocentrotus purpratus was sequenced in 2006 as the first sequenced genome in echinoderms, the genomic resources of other North American sea urchins have gradually been accumulated, but no sea urchin genomes are available in other areas, where many scientists have used the local species and reported important results. In this manuscript, we report a draft genome of the sea urchin Hemincentrotus pulcherrimus because this species has a long history as the target of developmental and cell biology in East Asia. The genome of H. pulcherrimus was assembled into 16,251 scaffold sequences with an N50 length of 143 kbp, and approximately 25,000 genes were identified in the genome. The size of the genome and the sequencing coverage were estimated to be approximately 800 Mbp and 100×, respectively. To provide these data and information of annotation, we constructed a database, HpBase (http://cell-innovation.nig.ac.jp/Hpul/). In HpBase, gene searches, genome browsing, and blast searches are available. In addition, HpBase includes the ""recipes"" for experiments from each lab using H. pulcherrimus. These recipes will continue to be updated according to the circumstances of individual scientists and can be powerful tools for experimental biologists and for the community. HpBase is a suitable dataset for evolutionary, developmental, and cell biologists to compare H. pulcherrimus genomic information with that of other species and to isolate gene information.",2018-03-13 +30726877,KOnezumi: a web application for automating gene disruption strategies to generate knockout mice.,"SUMMARY:Although gene editing using the CRISPR/Cas9 system enables the rapid generation of knockout mice, constructing an optimal gene disruption strategy is still labourious. Here, we propose KOnezumi, a simple and user-friendly web application, for use in automating the design of knockout strategies for multiple genes. Users only need to input gene symbols, and then KOnezumi returns target exons, gRNA candidates to delete the target exons, genotyping PCR primers, nucleotide sequences of the target exons and coding sequences of expected deletion products. KOnezumi enables users to easily and rapidly apply a rational strategy to accelerate the generation of KO mice using the CRISPR/Cas9 system. AVAILABILITY AND IMPLEMENTATION:This web application is freely available at http://www.md.tsukuba.ac.jp/LabAnimalResCNT/KOanimals/konezumi.html. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +30721953,IGLOSS: iterative gapless local similarity search.,"SUMMARY:Searching for local sequence patterns is one of the basic tasks in bioinformatics. Sequence patterns might have structural, functional or some other relevance, and numerous methods have been developed to detect and analyze them. These methods often depend on the wealth of information already collected. The explosion in the number of newly available sequences calls for novel methods to explore local sequence similarity. We have developed a new method for iterative motif scanning that will look for ungapped sequence patterns similar to a submitted query. Using careful parameter estimation and an adaptation of a fast string-matching algorithm, the method performs significantly better in this context than the existing software. AVAILABILITY AND IMPLEMENTATION:The IGLOSS web server is available at http://compbioserv.math.hr/igloss/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +22075990,The Mouse Genome Database (MGD): comprehensive resource for genetics and genomics of the laboratory mouse.,"The Mouse Genome Database (MGD, http://www.informatics.jax.org) is the international community resource for integrated genetic, genomic and biological data about the laboratory mouse. Data in MGD are obtained through loads from major data providers and experimental consortia, electronic submissions from laboratories and from the biomedical literature. MGD maintains a comprehensive, unified, non-redundant catalog of mouse genome features generated by distilling gene predictions from NCBI, Ensembl and VEGA. MGD serves as the authoritative source for the nomenclature of mouse genes, mutations, alleles and strains. MGD is the primary source for evidence-supported functional annotations for mouse genes and gene products using the Gene Ontology (GO). MGD provides full annotation of phenotypes and human disease associations for mouse models (genotypes) using terms from the Mammalian Phenotype Ontology and disease names from the Online Mendelian Inheritance in Man (OMIM) resource. MGD is freely accessible online through our website, where users can browse and search interactively, access data in bulk using Batch Query or BioMart, download data files or use our web services Application Programming Interface (API). Improvements to MGD include expanded genome feature classifications, inclusion of new mutant allele sets and phenotype associations and extensions of GO to include new relationships and a new stream of annotations via phylogenetic-based approaches.",2011-11-10 +31859508,"Structural Analysis of 14-3-3-ζ-Derived Phosphopeptides Using Electron Capture Dissociation Mass Spectrometry, Traveling Wave Ion Mobility Spectrometry, and Molecular Modeling.","Previously, we have demonstrated the effect of salt bridges on the electron capture dissociation mass spectrometry behavior of synthetic model phosphopeptides and applied an ion mobility spectrometry/molecular modeling approach to rationalize the findings in terms of peptide ion structure. Here, we develop and apply the approach to a biologically derived phosphopeptide. Specifically, we have investigated variants of a 15-mer phosphopeptide VVGARRSsWRVVSSI (s denotes phosphorylated Ser) derived from Akt1 substrate 14-3-3-ζ, which contains the phosphorylation motif RRSsWR. Variants were generated by successive arginine-to-leucine substitutions within the phosphorylation motif. ECD fragmentation patterns for the eight phosphopeptide variants show greater sequence coverage with successive R → L substitutions. Peptides with two or more basic residues had regions with no sequence coverage, while full sequence coverage was observed for peptides with one or no basic residues. For three of the peptide variants, low-abundance fragments were observed between the phosphoserine and a basic residue, possibly due to the presence of multiple conformers with and without noncovalent interactions between these residues. For the five variants whose dissociation behavior suggested the presence of intramolecular noncovalent interactions, we employed ion mobility spectrometry and molecular modeling to probe the nature of these interactions. Our workflow allowed us to propose candidate structures whose noncovalent interactions were consistent with the ECD data for all of the peptides modeled. Additionally, the AMBER parameter sets created for and validated by this work are presented and made available online ( http://www.biosciences-labs.bham.ac.uk/cooper/datasets.php ).",2020-01-09 +35372925,Venous Thrombotic Events in ANCA-Associated Vasculitis: Incidence and Risk Factors.,"

Background

The incidence of venous thromboembolism (VTE) is increased in ANCA-associated vasculitis (AAV). We assessed the frequency of VTE observed among patients with AAV evaluated at our center and identified risk factors.

Methods

Patients from the Johns Hopkins Vasculitis Center cohort who were evaluated between 1998 and 2018 and had a diagnosis of granulomatosis with polyangiitis (GPA) or microscopic polyangiitis (MPA) were eligible for analysis. Baseline demographics and clinical and serologic data were extracted. Univariate and multivariate analyses were performed to identify factors associated with VTE in AAV.

Results

A total of 162 patients with AAV were identified, 105 (65%) with GPA; 22 (14%) of these patients had a recorded VTE with a median time to VTE of 1 month. The mean (SD) age in the VTE versus non-VTE groups was 54±20 versus 55±17 years (P=0.99), 64% versus 60% female (P=0.93), 82% versus 49% PR3-ANCA positive (P=0.01), with a total mean BMI of 33.3±5.7 versus 28.3±6.1 kg/m2, (P<0.001) respectively. The median Birmingham Vasculitis Activity Score (BVAS version 3) was 19 versus 14 (P=0.02). Univariate analyses identified PR3-ANCA, rapidly progressive GN (RPGN), and hypoalbuminemia. In multivariate analysis, the significant associations with VTE included PR3-ANCA (OR, 4.77; P=0.02), hypoalbuminemia (OR, 4.84; P=0.004), and BMI (OR, 1.18; P<0.001).

Conclusions

VTE is a surprisingly common complication of AAV. PR3-ANCA and hypoalbuminemia are risk factors for developing VTEs. Further studies are needed to confirm these findings.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/K360/2020_04_30_KID0000572019.mp3.",2020-03-03 +31104596,"A probabilistic census-travel model to predict introduction sites of exotic plant, animal and human pathogens.","International travel offers an extensive network for new and recurring human-mediated introductions of exotic infectious pathogens and biota, freeing geographical constraints. We present a predictive census-travel model that integrates international travel with endpoint census data and epidemiological characteristics to predict points of introduction. Population demographics, inbound and outbound travel patterns, and quantification of source strength by country are combined to estimate and rank risk of introduction at user-scalable land parcel areas (e.g. state, county, zip code, census tract, gridded landscapes (1 mi2, 5 km2, etc.)). This risk ranking by parcel can be used to develop pathogen surveillance programmes, and has been incorporated in multiple US state/federal surveillance protocols. The census-travel model is versatile and independent of pathosystems, and applies a risk algorithm to generate risk maps for plant, human and animal contagions at different spatial scales. An interactive, user-friendly interface is available online (https://epi-models.shinyapps.io/Census_Travel/) to provide ease-of-use for regulatory agencies for early detection of high-risk exotics. The interface allows users to parametrize and run the model without knowledge of background code and underpinning data. This article is part of the theme issue 'Modelling infectious disease outbreaks in humans, animals and plants: epidemic forecasting and control'. This theme issue is linked with the earlier issue 'Modelling infectious disease outbreaks in humans, animals and plants: approaches and important themes'.",2019-07-01 +31431912,Data from quantitative serum proteomic analysis after laparoscopic gastric plication.,"Bariatric surgery is an effective treatment for morbid obesity with a sustained weight loss and improvements in metabolic syndrome. We present a label free quantitative shotgun proteomics approach to analyze the serum proteome of obese people who underwent Laparoscopic Gastric Plication (LGP) as a new bariatric surgery. Pre-surgery serum samples of obese individuals were compared with the serum of the same subjects 1-2 months post-surgery (T1) and 4-5 months post-surgery (T2). The data provide a list of 224 quantifiable proteins with at least two unique peptides that were quantifiable in at least 70% of samples. Gene ontology biological processes and molecular functions of differentially regulated proteins between pre- and post-surgery samples were investigated using WebGestalt online tool. In addition, molecular networks of differentially abundant proteins were determined through Ingenuity Pathway Analysis (IPA) software. This report is related to the research article entitled ""Serum proteome changes and accelerated reduction of fat mass after Laparoscopic Gastric Plication in morbidly obese patients"" (Savedoroudi et al. [1]). Proteomics data have been deposited to the ProteomeXchange Consortium (http://proteomecentral.proteomexchange.org) via the PRIDE partner repository through the identifier PXD010528.",2019-05-30 +28700230,LigQ: A Webserver to Select and Prepare Ligands for Virtual Screening.,"Virtual screening is a powerful methodology to search for new small molecule inhibitors against a desired molecular target. Usually, it involves evaluating thousands of compounds (derived from large databases) in order to select a set of potential binders that will be tested in the wet-lab. The number of tested compounds is directly proportional to the cost, and thus, the best possible set of ligands is the one with the highest number of true binders, for the smallest possible compound set size. Therefore, methods that are able to trim down large universal data sets enriching them in potential binders are highly appreciated. Here we present LigQ, a free webserver that is able to (i) determine best structure and ligand binding pocket for a desired protein, (ii) find known binders, as well as potential ligands known to bind to similar protein domains, (iii) most importantly, select a small set of commercial compounds enriched in potential binders, and (iv) prepare them for virtual screening. LigQ was tested with several proteins, showing an impressive capacity to retrieve true ligands from large data sets, achieving enrichment factors of over 10%. LigQ is available at http://ligq.qb.fcen.uba.ar/ .",2017-07-27 +30918935,PgpRules: a decision tree based prediction server for P-glycoprotein substrates and inhibitors.,"SUMMARY:P-glycoprotein (P-gp) is a member of ABC transporter family that actively pumps xenobiotics out of cells to protect organisms from toxic compounds. P-gp substrates can be easily pumped out of the cells to reduce their absorption; conversely P-gp inhibitors can reduce such pumping activity. Hence, it is crucial to know if a drug is a P-gp substrate or inhibitor in view of pharmacokinetics. Here we present PgpRules, an online P-gp substrate and P-gp inhibitor prediction server with ruled-sets. The two models were built using classification and regression tree algorithm. For each compound uploaded, PgpRules not only predicts whether the compound is a P-gp substrate or a P-gp inhibitor, but also provides the rules containing chemical structural features for further structural optimization. AVAILABILITY AND IMPLEMENTATION:PgpRules is freely accessible at https://pgprules.cmdm.tw/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-10-01 +25237112,"An eight-parent multiparent advanced generation inter-cross population for winter-sown wheat: creation, properties, and validation.","MAGIC populations represent one of a new generation of crop genetic mapping resources combining high genetic recombination and diversity. We describe the creation and validation of an eight-parent MAGIC population consisting of 1091 F7 lines of winter-sown wheat (Triticum aestivum L.). Analyses based on genotypes from a 90,000-single nucleotide polymorphism (SNP) array find the population to be well-suited as a platform for fine-mapping quantitative trait loci (QTL) and gene isolation. Patterns of linkage disequilibrium (LD) show the population to be highly recombined; genetic marker diversity among the founders was 74% of that captured in a larger set of 64 wheat varieties, and 54% of SNPs segregating among the 64 lines also segregated among the eight founder lines. In contrast, a commonly used reference bi-parental population had only 54% of the diversity of the 64 varieties with 27% of SNPs segregating. We demonstrate the potential of this MAGIC resource by identifying a highly diagnostic marker for the morphological character ""awn presence/absence"" and independently validate it in an association-mapping panel. These analyses show this large, diverse, and highly recombined MAGIC population to be a powerful resource for the genetic dissection of target traits in wheat, and it is well-placed to efficiently exploit ongoing advances in phenomics and genomics. Genetic marker and trait data, together with instructions for access to seed, are available at http://www.niab.com/MAGIC/.",2014-09-18 +32392191,[Genetic Predisposition to Early Myocardial Infarction].,"The aim of the study was to identify the features of the genetic structure of myocardial infarction (MI) susceptibility depending on age (""early MI"" denoting individuals who had the first MI before the age of 60 years, and ""late MI"" the group of patients with the first ""MI after 60 years""). A total of 355 patients were examined (n = 121 early MI and n = 234 late MI) and 285 residents of the Siberian region (as a control group). Genotyping of 58 single nucleotide variants (SNPs) was performed using mass spectrometry using the Agena (ex Sequenom) MassARRAY® System. Statistical analysis was performed using Statistica 8.0 (""StatSoft Inc."", USA), as well as the ""stats"" and ""genetics"" packages in the R environment. The regulatory potential of SNPs was evaluated using the rSNPBase online service (http://rsnp.psych.ac.cn/). eQTL loci were identified using data from the Genotype-Tissue Expression (GTEx) project (http://www.gtexportal.org/) and the Blood eQTL online service (https://genenetwork.nl/bloodeqtlbrowser/). The GG genotype of ITGA4 rs1143674, the CC genotype of CDKN2B-AS1 rs1333049, and the CC genotype of KIAA1462 rs3739998, are generally associated with MI. The AA genotype of ADAMDEC1 rs3765124 (OR = 2.03; 95% CI 1.23-3.33; p = 0.004) and the GG genotype of AQP2 rs2878771 (OR = 2.24; 95% CI 1.23-4.09; p = 0.006) are associated with the development of MI at an early age, and the TT genotype of TAS2R38 rs1726866 (OR = 1.82; 95% CI 1.11-2.89; p = 0.009) was the high-risk genotype for the late MI. Genetic variants associated with MI are regulatory SNP (rSNP) and affect the affinity of DNA binding to transcription factors, carry out post-transcriptional control of gene activity and change the level of gene expression in various tissues. Thus, early and late MI are based on both common genetic variants of ITGA4, CDKN2B-AS1, KIAA1462 genes and specific ones (ADAMDEC1 and AQP2 for early MI and TAS2R38 for late MI).",2020-03-01 +31171633,"Reply to ""Phosphorylation of G3BP1-S149 does not influence stress granule assembly"".","In this issue, Panas et al. (2019. J. Cell Biol. https://doi.org/10.1083/jcb.201801214) challenge the data published in the Tourrière et al. (2013. J. Cell Biol. https://doi.org/10.1083/jcb.200212128) paper on the role of G3BP phosphorylation in stress granule (SG) assembly. This reply addresses that letter and suggests that more work is needed to understand the role of this modification in SG formation.",2019-06-06 +31915167,Dementia among migrants and ethnic minorities in Italy: rationale and study protocol of the ImmiDem project.,"

Introduction

Due to the ongoing demographic and epidemiological transitions, estimating the phenomenon of dementia in migrants and minority groups, exploring its characteristics and challenges and implementing dedicated healthcare policies, constitute emerging and urgent matters for Western countries. In the present paper we describe the rationale and design of the 'Dementia in immigrants and ethnic minorities living in Italy: clinical-epidemiological aspects and public health perspectives"" (ImmiDem) project.

Methods and analysis

Three main aims will be pursued by the ImmiDem project. First, a survey of all Italian dementia services will be conducted with dedicated questionnaires in order to estimate and describe the proportion and characteristics of migrants seeking help for cognitive disturbances. The different clinical approaches for diagnosing dementia and the challenges encountered in the assessment of cognitive functioning and in the provision of care in these groups of individuals will also be investigated. Second, record linkage procedures of data routinely collected in regional Health Information Systems will be conducted in order to identify and monitor migrant individuals with dementia living in the Lazio region. Third, tailored national and local care-coordination pathways and/or good practices dedicated to migrants affected by dementia and cognitive disorders will be identified and promoted.

Ethics and dissemination

The study protocol was approved by the Ethics Committee of the Italian National Institute of Health (protocol 10749; 5 April 2018). The project was launched in November 2018 and will end in November 2021. The findings of the project will be disseminated through scientific peer-reviewed journals as well as to the public via the Dementia Observatory website (https://demenze.iss.it).",2020-01-07 +31315309,"Editorial for Special Issue ""Molecular Advances in Wheat and Barley"". ","Along with maize and rice, allohexaploid bread wheat and diploid barley are the most cultivated crops in the world (FAOSTAT database, http://www.fao.org/faostat, accessed on 22 June 2019)[...].",2019-07-16 +30007665,Mapping the risk of evaporated milk spoilage in the Mediterranean region based on the effect of temperature conditions on Geobacillus stearothermophilus growth.,"A predictive model for the effect of storage temperature on the growth of Geobacillus stearothermophilus was applied in order to assess the risk of evaporated milk spoilage in the markets of the Mediterranean region. The growth of G. stearothermophilus in evaporated milk was evaluated during a shelf life of one year based on historical temperature profiles (hourly) covering 23 Mediterranean capitals for five years over the period 2012-2016 obtained from the Weather Underground database (http://www.wunderground.com/). In total, 115 scenarios were tested simulating the distribution and storage conditions of evaporated milk in the Mediterranean region. The highest growth of G. stearothermophilus was predicted for Marrakech, Damascus and Cairo over the period 2012-2016 with mean values of 7.2, 7.4 and 5.5 log CFU/ml, respectively, followed by Tunis, Podgorica and Tripoli with mean growth of 2.8, 2.4 and 2.3 log CFU/ml, respectively. For the rest 17 capitals the mean growth of the spoiler was <1.5 log CFU/ml. The capitals Podgorica, Cairo, Tunis and Ankara showed the highest variability in the growth during the 5 years examined with standard deviation values for growth of 2.01, 1.79, 1.77 and 1.25 log CFU/ml, respectively. The predicted extent and the variability of growth during the shelf life were used to assess the risk of spoilage which was visualised in a geographical risk map. The growth model of G. stearothermophilus was also used to evaluate adjustments of the evaporated milk expiration date which can reduce the risk of spoilage. The quantitative data provided in the present study can assist the food industry to effectively evaluate the microbiological stability of these products throughout distribution and storage at a reduced cost (by reducing sampling quality control) and assess whether and under which conditions (e.g. expiration date) will be able to export a product to a country without spoilage problems. This decision support may lead to a significant benefit for both the competitiveness of the food industry and the consumer.",2018-05-07 +30169744,A generic deep convolutional neural network framework for prediction of receptor-ligand interactions-NetPhosPan: application to kinase phosphorylation prediction.,"

Motivation

Understanding the specificity of protein receptor-ligand interactions is pivotal for our comprehension of biological mechanisms and systems. Receptor protein families often have a certain level of sequence diversity that converges into fewer conserved protein structures, allowing the exertion of well-defined functions. T and B cell receptors of the immune system and protein kinases that control the dynamic behaviour and decision processes in eukaryotic cells by catalysing phosphorylation represent prime examples. Driven by the large sequence diversity, the receptors within such protein families are often found to share specificities although divergent at the sequence level. This observation has led to the notion that prediction models of such systems are most effectively handled in a receptor-specific manner.

Results

We show that this approach in many cases is suboptimal, and describe an alternative improved framework for generating models with pan-receptor-predictive power for receptor protein families. The framework is based on deep artificial neural networks and integrates information from individual receptors into a single pan-receptor model, leveraging information across multiple receptor-specific datasets allowing predictions of the receptor specificity for all members of a given protein family including those described by limited or no ligand data. The approach was applied to the protein kinase superfamily, leading to the method NetPhosPan. The method was extensively validated and benchmarked against state-of-the-art prediction methods and was found to have unprecedented performance in particularly for kinase domains characterized by limited or no experimental data.

Availability and implementation

The method is freely available to non-commercial users and can be downloaded at http://www.cbs.dtu.dk/services/NetPhospan-1.0.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +32334636,Dual anti-platelet therapy following percutaneous coronary intervention in a population of patients with thrombocytopenia at baseline: a meta-analysis.,"

Background

In this meta-analysis, we aimed to systematically compare the post percutaneous coronary interventional (PCI) adverse bleeding events, stent thrombosis, stroke and other cardiovascular outcomes in a population of patients with and without thrombocytopenia at baseline who were followed up on dual antiplatelet therapy (DAPT).

Methods

Relevant English language articles which were published before June 2019 were retrieved from MEDLINE, http://www.ClinicalTrials.com, EMBASE, Cochrane central, and Google scholar briefly using specific terms such as percutaneous coronary intervention or dual antiplatelet therapy, and thrombocytopenia. All the participants were followed up on DAPT following discharge. Specific endpoints including bleeding events, stent thrombosis, stroke and other adverse cardiovascular events were assessed. The latest version of the RevMan software was used for the statistical assessment. Odd ratios (OR) with 95% confidence intervals (CI) based on a fixed or a random statistical model were used to represent the data graphically.

Results

A total number of 118,945 participants (from 8 studies) were included with 37,753 suffering from thrombocytopenia at baseline. Our results showed post procedural bleeding (OR: 1.89, 95% CI: 1.16-3.07; P = 0.01), access site bleeding (OR: 1.66, 95% CI: 1.15-2.39; P = 0.006), intra-cranial bleeding (OR: 1.78, 95% CI: 1.30-2.43; P = 0.0003), gastro-intestinal bleeding (OR: 1.44, 95% CI: 1.14-1.82; P = 0.002) and any major bleeding (OR: 1.67, 95% CI: 1.42-1.97; P = 0.00001) to be significantly higher in thrombocytopenic patients treated with DAPT after PCI. Total stroke (OR: 1.45, 95% CI: 1.18-1.78; P = 0.0004) specifically hemorrhagic stroke (OR: 1.67, 95% CI: 1.30-2.14; P = 0.0001) was also significantly higher in these patients with thrombocytopenia at baseline. All-cause mortality and major adverse cardiac events were also significantly higher. However, overall total stent thrombosis (OR: 1.18, 95% CI: 0.90-1.55; P = 0.24) including definite and probable stent thrombosis were not significantly different compared to the control group.

Conclusions

According to the results of this analysis, DAPT might have to be cautiously be used following PCI in a population of patients with thrombocytopenia at baseline due to the significantly higher bleeding rate including gastro-intestinal, intra-cranial bleeding and hemorrhagic stroke. Hence, special care might have to be taken when considering anti-platelet agents following PCI in these high risk patients. However, considering the present limitations of this analysis, this hypothesis will have to be confirmed in future trials.",2020-04-25 +28462625,[Experience with a rheumatoid arthritis biobank: analysis of biological samples and clinical data of 204 patients].,"

Introduction

A biobank is a registry, which is suitable for the storage of biological samples (e.g. tissues, DNA, protein), genetical abnormalities and clinical data. Several biobanks have been created worldwide, which contribute to research and the better understanding of disease pathogenesis, genetical polymorphisms. Biobanking also helps to improve the efficacy of therapies.

Aim

Our purpose was to create an internet-based biobank, in which laboratory test results, genetic alterations and related disorders of rheumatoid arthritis (RA) patients can be registered. This biobank would be able to make the research easier and it can help to improve our knowledge about diseases and it can inhibit loss of data.

Patients and method

We have biological samples from 204 RA patients and we have entered their data in the biobank which can be found on the website http://rheuma.biobank.eu . Statistical analysis was performed by SPSS20 statistical programme.

Results

By the creation of biobank that contains clinical data and biological samples of 204 RA patients, we have a database which can help to improve our knowledge about the disease and help to develop new treatment strategies.

Conclusion

Biobanking is suitable to analyze blood samples and clinical data together. Orv. Hetil., 2017, 158(7), 270-277.",2017-02-01 +24373114,'Beyond Milestones': a randomised controlled trial evaluating an innovative digital resource teaching quality observation of normal child development.,"

Aims

The study aimed to create and evaluate the educational effectiveness of a digital resource instructing paediatric trainees in a systematic approach to critical and quality observation of normal child development.

Methods

A digital educational resource was developed utilising the skills of an expert developmental paediatrician who was videoed assessing normal early child development at a series of critical stages. Videos illustrated aspects of language, sophistication of play and socialisation, cognition, and motor progress. Expert commentary, teaching text and summaries were used. A randomised controlled trial evaluated the resource. Paediatric trainees were recruited from The Sydney Children's Hospitals Network. Outcome measures were repeated at three time points (pre-teaching, immediate-post and 1 month) and included self-rated attitudes, knowledge of markers of development and observational expertise. Qualitative data on teaching usefulness were obtained through open-ended questions.

Results

Fifty-six paediatric trainees (registrar 79%, women 82%; mean age 31 years) completed the pre-assessment, 46 the immediate-post and 45 the 1-month follow-up (20% attrition). Compared with the Control group, the Teaching group scored higher over time on markers of development (P = 0.006), observational expertise (P < 0.0001), confidence (P = 0.035) and satisfaction (P < 0.0001). Teaching participants valued the video and expert commentary and reported improvement in confidence and understanding and acquiring a more structured approach.

Conclusions

The 'Beyond Milestones' free online resource for medical professionals improves knowledge, increases confidence and is useful, providing a structured approach to developmental assessment. The techniques taught can be applied to every paediatric consultation.",2013-12-23 +21914464,IGD: a resource for intronless genes in the human genome.,"Intronless genes (IGs) fraction varies between 2.7 and 97.7% in eukaryotic genomes. Although many databases on exons and introns exist, there was no curated database for such genes which allowed their study in a concerted manner. Such a database would be useful to identify the functional features and the distribution of these genes across the genome. Here, a new database of IGs in eukaryotes based on GenBank data was described. This database, called IGD (Intronless Gene Database), is a collection of gene sequences that were annotated and curated. The current version of IGD contains 687 human intronless genes with their protein and CDS sequences. Some features of the entries are given in this paper. Data was extracted from GenBank release 183 using a Perl script. Data extraction was followed by a manual curation step. Intronless genes were then analyzed based on their RefSeq annotation and Gene Ontology functional class. IGD represents a useful resource for retrieval and in silico study of intronless genes. IGD is available at http://www.bioinfo-cbs.org/igd with comprehensive help and FAQ pages that illustrate the main uses of this resource.",2011-09-02 +30614595,Genetic association with high-resolution climate data reveals selection footprints in the genomes of barley landraces across the Iberian Peninsula.,"Landraces are local populations of crop plants adapted to a particular environment. Extant landraces are surviving genetic archives, keeping signatures of the selection processes experienced by them until settling in their current niches. This study intends to establish relationships between genetic diversity of barley (Hordeum vulgare L.) landraces collected in Spain and the climate of their collection sites. A high-resolution climatic data set (5 × 5 km spatial, 1-day temporal grid) was computed from over 2,000 temperature and 7,000 precipitation stations across peninsular Spain. This data set, spanning the period 1981-2010, was used to derive agroclimatic variables meaningful for cereal production at the collection sites of 135 barley landraces. Variables summarize temperature, precipitation, evapotranspiration, potential vernalization and frost probability at different times of the year and time scales (season and month). SNP genotyping of the landraces was carried out combining Illumina Infinium assays and genotyping-by-sequencing, yielding 9,920 biallelic markers (7,479 with position on the barley reference genome). The association of these SNPs with agroclimatic variables was analysed at two levels of genetic diversity, with and without taking into account population structure. The whole data sets and analysis pipelines are documented and available at https://eead-csic-compbio.github.io/barley-agroclimatic-association. We found differential adaptation of the germplasm groups identified to be dominated by reactions to cold temperature and late-season frost occurrence, as well as to water availability. Several significant associations pointing at specific adaptations to agroclimatic features related to temperature and water availability were observed, and candidate genes underlying some of the main regions are proposed.",2019-04-02 +22120664,The Comprehensive Phytopathogen Genomics Resource: a web-based resource for data-mining plant pathogen genomes.,"The Comprehensive Phytopathogen Genomics Resource (CPGR) provides a web-based portal for plant pathologists and diagnosticians to view the genome and trancriptome sequence status of 806 bacterial, fungal, oomycete, nematode, viral and viroid plant pathogens. Tools are available to search and analyze annotated genome sequences of 74 bacterial, fungal and oomycete pathogens. Oomycete and fungal genomes are obtained directly from GenBank, whereas bacterial genome sequences are downloaded from the A Systematic Annotation Package (ASAP) database that provides curation of genomes using comparative approaches. Curated lists of bacterial genes relevant to pathogenicity and avirulence are also provided. The Plant Pathogen Transcript Assemblies Database provides annotated assemblies of the transcribed regions of 82 eukaryotic genomes from publicly available single pass Expressed Sequence Tags. Data-mining tools are provided along with tools to create candidate diagnostic markers, an emerging use for genomic sequence data in plant pathology. The Plant Pathogen Ribosomal DNA (rDNA) database is a resource for pathogens that lack genome or transcriptome data sets and contains 131 755 rDNA sequences from GenBank for 17 613 species identified as plant pathogens and related genera. Database URL: http://cpgr.plantbiology.msu.edu.",2011-11-26 +31309681,Lecanosticta acicola: A growing threat to expanding global pine forests and plantations.,"Lecanosticta acicola causes brown spot needle blight (BSNB) of Pinus species. The pathogen occurs mostly in the Northern Hemisphere but has also been reported in Central America and Colombia. BSNB can lead to stunted growth and tree mortality, and has resulted in severe damage to pine plantations in the past. There have been increasingly frequent new reports of this pathogen in Europe and in North America during the course of the past 10 years. This is despite the fact that quarantine practices and eradication protocols are in place to prevent its spread.

Taxonomy

Kingdom Fungi; Phylum Ascomycota; Subphylum Pezizomycotina; Class Dothideomycetes; Subclass Dothideomycetidae; Order Capniodales; Family Mycosphaerellaceae; Genus Lecanosticta.

Host range and distribution

Lecanosticta spp. occur on various Pinus species and are found in North America, Central America, South America (Colombia), Europe as well as Asia.

Disease symptoms

Small yellow irregular spots appear on the infected pine needles that become brown over time. They can be surrounded by a yellow halo. These characteristic brown spots develop to form narrow brown bands that result in needle death from the tips down to the point of infection. Needles are prematurely shed, leaving bare branches with tufts of new needles at the branch tips. Infection is usually most severe in the lower parts of the trees and progresses upwards into the canopies.

Useful websites

The EPPO global database providing information on L. acicola (https://gd.eppo.int/taxon/SCIRAC) Reference genome of L. acicola available on GenBank (https://www.ncbi.nlm.nih.gov/genome/?term=Lecanosticta+acicola) JGI Gold Genome database information sheet of L. acicola sequenced genome (https://gold.jgi.doe.gov/organism?xml:id=Go0047147).",2019-07-15 +23729504,CNVinspector: a web-based tool for the interactive evaluation of copy number variations in single patients and in cohorts.,"

Objectives

Many genetic disorders are caused by copy number variations (CNVs) in the human genome. However, the large number of benign CNV polymorphisms makes it difficult to delineate causative variants for a certain disease phenotype. Hence, we set out to create software that accumulates and visualises locus-specific knowledge and enables clinicians to study their own CNVs in the context of known polymorphisms and disease variants.

Methods

CNV data from healthy cohorts (Database of Genomic Variants) and from disease-related databases (DECIPHER) were integrated into a joint resource. Data are presented in an interactive web-based application that allows inspection, evaluation and filtering of CNVs in single individuals or in entire cohorts.

Results

CNVinspector provides simple interfaces to upload CNV data, compare them with own or published control data and visualise the results in graphical interfaces. Beyond choosing control data from different public studies, platforms and methods, dedicated filter options allow the detection of CNVs that are either enriched in patients or depleted in controls. Alternatively, a search can be restricted to those CNVs that appear in individuals of similar clinical phenotype. For each gene of interest within a CNV, we provide a link to NCBI, ENSEMBL and the GeneDistiller search engine to browse for potential disease-associated genes.

Conclusions

With its user-friendly handling, the integration of control data and the filtering options, CNVinspector will facilitate the daily work of clinical geneticists and accelerate the delineation of new syndromes and gene functions. CNVinspector is freely accessible under http://www.cnvinspector.org.",2013-05-31 +30998681,"SPINDLE: End-to-end learning from EEG/EMG to extrapolate animal sleep scoring across experimental settings, labs and species.","Understanding sleep and its perturbation by environment, mutation, or medication remains a central problem in biomedical research. Its examination in animal models rests on brain state analysis via classification of electroencephalographic (EEG) signatures. Traditionally, these states are classified by trained human experts by visual inspection of raw EEG recordings, which is a laborious task prone to inter-individual variability. Recently, machine learning approaches have been developed to automate this process, but their generalization capabilities are often insufficient, especially across animals from different experimental studies. To address this challenge, we crafted a convolutional neural network-based architecture to produce domain invariant predictions, and furthermore integrated a hidden Markov model to constrain state dynamics based upon known sleep physiology. Our method, which we named SPINDLE (Sleep Phase Identification with Neural networks for Domain-invariant LEearning) was validated using data of four animal cohorts from three independent sleep labs, and achieved average agreement rates of 99%, 98%, 93%, and 97% with scorings from five human experts from different labs, essentially duplicating human capability. It generalized across different genetic mutants, surgery procedures, recording setups and even different species, far exceeding state-of-the-art solutions that we tested in parallel on this task. Moreover, we show that these scored data can be processed for downstream analyzes identical to those from human-scored data, in particular by demonstrating the ability to detect mutation-induced sleep alteration. We provide to the scientific community free usage of SPINDLE and benchmarking datasets as an online server at https://sleeplearning.ethz.ch. Our aim is to catalyze high-throughput and well-standardized experimental studies in order to improve our understanding of sleep.",2019-04-18 +28025346,The importance of digitized biocollections as a source of trait data and a new VertNet resource. ,"For vast areas of the globe and large parts of the tree of life, data needed to inform trait diversity is incomplete. Such trait data, when fully assembled, however, form the link between the evolutionary history of organisms, their assembly into communities, and the nature and functioning of ecosystems. Recent efforts to close data gaps have focused on collating trait-by-species databases, which only provide species-level, aggregated value ranges for traits of interest and often lack the direct observations on which those ranges are based. Perhaps under-appreciated is that digitized biocollection records collectively contain a vast trove of trait data measured directly from individuals, but this content remains hidden and highly heterogeneous, impeding discoverability and use. We developed and deployed a suite of openly accessible software tools in order to collate a full set of trait descriptions and extract two key traits, body length and mass, from >18 million specimen records in VertNet, a global biodiversity data publisher and aggregator. We tested success rate of these tools against hand-checked validation data sets and characterized quality and quantity. A post-processing toolkit was developed to standardize and harmonize data sets, and to integrate this improved content into VertNet for broadest reuse. The result of this work was to add more than 1.5 million harmonized measurements on vertebrate body mass and length directly to specimen records. Rates of false positives and negatives for extracted data were extremely low. We also created new tools for filtering, querying, and assembling this research-ready vertebrate trait content for view and download. Our work has yielded a novel database and platform for harmonized trait content that will grow as tools introduced here become part of publication workflows. We close by noting how this effort extends to new communities already developing similar digitized content.Database URL: http://portal.vertnet.org/search?advanced=1.",2016-12-26 +27939289,Intensification: A Resource for Amplifying Population-Genetic Signals with Protein Repeats.,"Large-scale genome sequencing holds great promise for the interpretation of protein structures through the discovery of many, rare functional variants in the human population. However, because protein-coding regions are under high selective constraints, these variants occur at low frequencies, such that there is often insufficient statistics for downstream calculations. To address this problem, we develop the Intensification approach, which uses the modular structure of repeat protein domains to amplify signals of selection from population genetics and traditional interspecies conservation. In particular, we are able to aggregate variants at the codon level to identify important positions in repeat domains that show strong conservation signals. This allows us to compare conservation over different evolutionary timescales. It also enables us to visualize population-genetic measures on protein structures. We make available the Intensification results as an online resource (http://intensification.gersteinlab.org) and illustrate the approach through a case study on the tetratricopeptide repeat.",2016-12-07 +30295702,MetWork: a web server for natural products anticipation.,"

Motivation

The annotation of natural products and more generally small molecules is one of the major drawbacks in untargeted mass spectrometry analysis. Molecular networking has emerged as a structured way to organize and mine data from untargeted tandem mass spectrometry (MS/MS) experiments. Despite the great potential of this tool, the annotation is usually performed manually by the expert as only few spectral libraries are available.

Results

Herein we propose a web server of in silico metabolization of metabolites that represents a full implementation of the metabolome consistency concept. The workflow is based on MS/MS data, organized in molecular network using the Global Natural Products Social Molecular Networking (GNPS) platform, a collaborative library of reactions and a MS/MS spectra prediction module. Having one node identified in the molecular network, the server generates putative structures and predict the associated MS/MS spectra when the exact mass is detected in the network. A similarity comparison between the MS/MS spectra is then performed in order to annotate the node.

Availability and implementation

The web server is available at: https://metwork.pharmacie.parisdescartes.fr.",2019-05-01 +24651967,OncomiRDB: a database for the experimentally verified oncogenic and tumor-suppressive microRNAs.,"

Summary

MicroRNAs (miRNAs), a class of small regulatory RNAs, play important roles in cancer initiation, progression and therapy. MiRNAs are found to regulate diverse cancer-related processes by targeting a large set of oncogenic and tumor-suppressive genes. To establish a high-confidence reference resource for studying the miRNA-regulated target genes and cellular processes in cancer, we manually curated 2259 entries of cancer-related miRNA regulations with direct experimental evidence from ∼9000 abstracts, covering more than 300 miRNAs and 829 target genes across 25 cancer tissues. A web-based portal named oncomiRDB, which provides both graphical and text-based interfaces, was developed for easily browsing and searching all the annotations. It should be a useful resource for both the computational analysis and experimental study on miRNA regulatory networks and functions in cancer.

Availability and implementation

http://bioinfo.au.tsinghua.edu.cn/oncomirdb/

Contact

jgu@tsinghua.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-03-20 +32433107,How Does the Skeletal Oncology Research Group Algorithm's Prediction of 5-year Survival in Patients with Chondrosarcoma Perform on International Validation?,"

Background

The Skeletal Oncology Research Group (SORG) machine learning algorithm for predicting survival in patients with chondrosarcoma was developed using data from the Surveillance, Epidemiology, and End Results (SEER) registry. This algorithm was externally validated on a dataset of patients from the United States in an earlier study, where it demonstrated generally good performance but overestimated 5-year survival. In addition, this algorithm has not yet been validated in patients outside the United States; doing so would be important because external validation is necessary as algorithm performance may be misleading when applied in different populations.

Questions/purposes

Does the SORG algorithm retain validity in patients who underwent surgery for primary chondrosarcoma outside the United States, specifically in Italy?

Methods

A total of 737 patients were treated for chondrosarcoma between January 2000 and October 2014 at the Italian tertiary care center which was used for international validation. We excluded patients whose first surgical procedure was performed elsewhere (n = 25), patients who underwent nonsurgical treatment (n = 27), patients with a chondrosarcoma of the soft tissue or skull (n = 60), and patients with peripheral, periosteal, or mesenchymal chondrosarcoma (n = 161). Thus, 464 patients were ultimately included in this external validation study, as the earlier performed SEER study was used as the training set. Therefore, this study-unlike most of this type-does not have a training and validation set. Although the earlier study overestimated 5-year survival, we did not modify the algorithm in this report, as this is the first international validation and the prior performance in the single-institution validation study from the United States may have been driven by a small sample or non-generalizable patterns related to its single-center setting. Variables needed for the SORG algorithm were manually collected from electronic medical records. These included sex, age, histologic subtype, tumor grade, tumor size, tumor extension, and tumor location. By inputting these variables into the algorithm, we calculated the predicted probabilities of survival for each patient. The performance of the SORG algorithm was assessed in this study through discrimination (the ability of a model to distinguish between a binary outcome), calibration (the agreement of observed and predicted outcomes), overall performance (the accuracy of predictions), and decision curve analysis (establishment on the ability of a model to make a decision better than without using the model). For discrimination, the c-statistic (commonly known as the area under the receiver operating characteristic curve for binary classification) was calculated; this ranged from 0.5 (no better than chance) to 1.0 (excellent discrimination). The agreement between predicted and observed outcomes was visualized with a calibration plot, and the calibration slope and intercept were calculated. Perfect calibration results in a slope of 1 and an intercept of 0. For overall performance, the Brier score and the null-model Brier score were calculated. The Brier score ranges from 0 (perfect prediction) to 1 (poorest prediction). Appropriate interpretation of the Brier score requires comparison with the null-model Brier score. The null-model Brier score is the score for an algorithm that predicts a probability equal to the population prevalence of the outcome for every patient. A decision curve analysis was performed to compare the potential net benefit of the algorithm versus other means of decision support, such as treating all or none of the patients. There were several differences between this study and the earlier SEER study, and such differences are important because they help us to determine the performance of the algorithm in a group different from the initial study population. In this study from Italy, 5-year survival was different from the earlier SEER study (71% [319 of 450 patients] versus 76% [1131 of 1487 patients]; p = 0.03). There were more patients with dedifferentiated chondrosarcoma than in the earlier SEER study (25% [118 of 464 patients] versus 8.5% [131 of 1544 patients]; p < 0.001). In addition, in this study patients were older, tumor size was larger, and there were higher proportions of high-grade tumors than the earlier SEER study (age: 56 years [interquartile range {IQR} 42 to 67] versus 52 years [IQR 40 to 64]; p = 0.007; tumor size: 80 mm [IQR 50 to 120] versus 70 mm [IQR 42 to 105]; p < 0.001; tumor grade: 22% [104 of 464 had Grade 1], 42% [196 of 464 had Grade 2], and 35% [164 of 464 had Grade 3] versus 41% [592 of 1456 had Grade 1], 40% [588 of 1456 had Grade 2], and 19% [276 of 1456 had Grade 3]; p ≤ 0.001).

Results

Validation of the SORG algorithm in a primarily Italian population achieved a c-statistic of 0.86 (95% confidence interval 0.82 to 0.89), suggesting good-to-excellent discrimination. The calibration plot showed good agreement between the predicted probability and observed survival in the probability thresholds of 0.8 to 1.0. With predicted survival probabilities lower than 0.8, however, the SORG algorithm underestimated the observed proportion of patients with 5-year survival, reflected in the overall calibration intercept of 0.82 (95% CI 0.67 to 0.98) and calibration slope of 0.68 (95% CI 0.42 to 0.95). The Brier score for 5-year survival was 0.15, compared with a null-model Brier of 0.21. The algorithm showed a favorable decision curve analysis in the validation cohort.

Conclusions

The SORG algorithm to predict 5-year survival for patients with chondrosarcoma held good discriminative ability and overall performance on international external validation; however, it underestimated 5-year survival for patients with predicted probabilities from 0 to 0.8 because the calibration plot was not perfectly aligned for the observed outcomes, which resulted in a maximum underestimation of 20%. The differences may reflect the baseline differences noted between the two study populations. The overall performance of the algorithm supports the utility of the algorithm and validation presented here. The freely available digital application for the algorithm is available here: https://sorg-apps.shinyapps.io/extremitymetssurvival/.

Level of evidence

Level III, prognostic study.",2020-10-01 +29987736,AT_CHLORO: The First Step When Looking for Information About Subplastidial Localization of Proteins.,"Plastids contain several key subcompartments. The two limiting envelope membranes (inner and outer membrane of the plastid envelope with an intermembrane space between), an aqueous phase (stroma), and an internal membrane system terms (thylakoids) formed of flat compressed vesicles (grana) and more light structures (lamellae). The thylakoid vesicles delimit another discrete soluble compartment, the thylakoid lumen. AT_CHLORO ( http://at-chloro.prabi.fr/at_chloro/ ) is a unique database supplying information about the subplastidial localization of proteins. It was created from simultaneous proteomic analyses targeted to the main subcompartments of the chloroplast from Arabidopsis thaliana (i.e., envelope, stroma, thylakoid) and to the two subdomains of thylakoid membranes (i.e., grana and stroma lamellae). AT_CHLORO assembles several complementary information (MS-based experimental data, curated functional annotations and subplastidial localization, links to other public databases and references) which give a comprehensive overview of the current knowledge about the subplastidial localization and the function of chloroplast proteins, with a specific attention given to chloroplast envelope proteins.",2018-01-01 +30239681,PalmXplore: oil palm gene database. ,"A set of Elaeis guineensis genes had been generated by combining two gene prediction pipelines: Fgenesh++ developed by Softberry and Seqping by the Malaysian Palm Oil Board. PalmXplore was developed to provide a scalable data repository and a user-friendly search engine system to efficiently store, manage and retrieve the oil palm gene sequences and annotations. Information deposited in PalmXplore includes predicted genes, their genomic coordinates, as well as the annotations derived from external databases, such as Pfam, Gene Ontology and Kyoto Encyclopedia of Genes and Genomes. Information about genes related to important traits, such as those involved in fatty acid biosynthesis (FAB) and disease resistance, is also provided. The system offers Basic Local Alignment Search Tool homology search, where the results can be downloaded or visualized in the oil palm genome browser (MYPalmViewer). PalmXplore is regularly updated offering new features, improvements to genome annotation and new genomic sequences. The system is freely accessible at http://palmxplore.mpob.gov.my.",2018-01-01 +29036351,miRandola 2017: a curated knowledge base of non-invasive biomarkers.,"miRandola (http://mirandola.iit.cnr.it/) is a database of extracellular non-coding RNAs (ncRNAs) that was initially published in 2012, foreseeing the relevance of ncRNAs as non-invasive biomarkers. An increasing amount of experimental evidence shows that ncRNAs are frequently dysregulated in diseases. Further, ncRNAs have been discovered in different extracellular forms, such as exosomes, which circulate in human body fluids. Thus, miRandola 2017 is an effort to update and collect the accumulating information on extracellular ncRNAs that is spread across scientific publications and different databases. Data are manually curated from 314 articles that describe miRNAs, long non-coding RNAs and circular RNAs. Fourteen organisms are now included in the database, and associations of ncRNAs with 25 drugs, 47 sample types and 197 diseases. miRandola also classifies extracellular RNAs based on their extracellular form: Argonaute2 protein, exosome, microvesicle, microparticle, membrane vesicle, high density lipoprotein and circulating. We also implemented a new web interface to improve the user experience.",2018-01-01 +28025347,HEDD: the human epigenetic drug database. ,"Epigenetic drugs are chemical compounds that target disordered post-translational modification of histone proteins and DNA through enzymes, and the recognition of these changes by adaptor proteins. Epigenetic drug-related experimental data such as gene expression probed by high-throughput sequencing, co-crystal structure probed by X-RAY diffraction and binding constants probed by bio-assay have become widely available. The mining and integration of multiple kinds of data can be beneficial to drug discovery and drug repurposing. HEMD and other epigenetic databases store comprehensively epigenetic data where users can acquire segmental information of epigenetic drugs. However, some data types such as high-throughput datasets are not provide by these databases and they do not support flexible queries for epigenetic drug-related experimental data. Therefore, in reference to HEMD and other epigenetic databases, we developed a relatively comprehensive database for human epigenetic drugs. The human epigenetic drug database (HEDD) focuses on the storage and integration of epigenetic drug datasets obtained from laboratory experiments and manually curated information. The latest release of HEDD incorporates five kinds of datasets: (i) drug, (ii) target, (iii) disease, (vi) high-throughput and (v) complex. In order to facilitate data extraction, flexible search options were built in HEDD, which allowed an unlimited condition query for specific kinds of datasets using drug names, diseases and experiment types.Database URL: http://hedds.org/.",2016-12-26 +28183824,Global Cancer in Women: Cancer Control Priorities.,"This review is an abbreviated version of a report prepared for the American Cancer Society Global Health department and EMD Serono, Inc., a subsidiary of Merck KGaA, Darmstadt, Germany, which was released at the Union for International Cancer Control World Cancer Congress in Paris in November 2016. The original report can be found at https://www.cancer.org/health-care-professionals/our-global-health-work/global-cancer-burden/global-burden-of-cancer-in-women.html. Staff in the Intramural Research Department of the American Cancer Society designed and conducted the study, including analysis, interpretation, and presentation of the review. The funding sources had no involvement in the study design, data analysis and interpretation, or preparation of the reviewThe global burden of cancer in women has recently received much attention, but there are few comprehensive reviews of the burden and policy approaches to reduce it. This article, second in series of two, summarizes the most important cancer control priorities with specific examples of proven interventions, with a particular focus on primary prevention in low- and middle-income countries (LMIC). There are a number of effective cancer control measures available to countries of all resource levels. Many of these measures are extremely cost-effective, especially in the case of tobacco control and vaccination. Countries must prioritize efforts to reduce known cancer risk factors and make prevention accessible to all. Effective treatments and palliative care are also needed for those who develop cancer. Given scarce resources, this may seem infeasible in many LMICs, but past experience with other diseases like HIV, tuberculosis, and malaria have shown that it is possible to make affordable care accessible to all. Expansion of population-based cancer registries and research in LMICs are needed for setting cancer control priorities and for determining the most effective interventions. For LMICs, all of these activities require support and commitment from the global community. Cancer Epidemiol Biomarkers Prev; 26(4); 458-70. ©2017 AACRSee related article by Torre et al. in this CEBP Focus section, ""Global Cancer in Women.""",2017-02-09 +28595657,brain-coX: investigating and visualising gene co-expression in seven human brain transcriptomic datasets.,"

Background

The pathogenesis of neurological and mental health disorders often involves multiple genes, complex interactions, as well as brain- and development-specific biological mechanisms. These characteristics make identification of disease genes for such disorders challenging, as conventional prioritisation tools are not specifically tailored to deal with the complexity of the human brain. Thus, we developed a novel web-application-brain-coX-that offers gene prioritisation with accompanying visualisations based on seven gene expression datasets in the post-mortem human brain, the largest such resource ever assembled.

Results

We tested whether our tool can correctly prioritise known genes from 37 brain-specific KEGG pathways and 17 psychiatric conditions. We achieved average sensitivity of nearly 50%, at the same time reaching a specificity of approximately 75%. We also compared brain-coX's performance to that of its main competitors, Endeavour and ToppGene, focusing on the ability to discover novel associations. Using a subset of the curated SFARI autism gene collection we show that brain-coX's prioritisations are most similar to SFARI's own curated gene classifications.

Conclusions

brain-coX is the first prioritisation and visualisation web-tool targeted to the human brain and can be freely accessed via http://shiny.bioinf.wehi.edu.au/freytag.s/ .",2017-06-08 +31898917,A State-of-the-Science Review of Arsenic's Effects on Glucose Homeostasis in Experimental Models.,"

Background

The prevalence of type 2 diabetes (T2D) has more than doubled since 1980. Poor nutrition, sedentary lifestyle, and obesity are among the primary risk factors. While an estimated 70% of cases are attributed to excess adiposity, there is an increased interest in understanding the contribution of environmental agents to diabetes causation and severity. Arsenic is one of these environmental chemicals, with multiple epidemiology studies supporting its association with T2D. Despite extensive research, the molecular mechanism by which arsenic exerts its diabetogenic effects remains unclear.

Objectives

We conducted a literature search focused on arsenite exposure in vivo and in vitro, using relevant end points to elucidate potential mechanisms of oral arsenic exposure and diabetes development.

Methods

We explored experimental results for potential mechanisms and elucidated the distinct effects that occur at high vs. low exposure. We also performed network analyses relying on publicly available data, which supported our key findings.

Results

While several mechanisms may be involved, our findings support that arsenite has effects on whole-body glucose homeostasis, insulin-stimulated glucose uptake, glucose-stimulated insulin secretion, hepatic glucose metabolism, and both adipose and pancreatic β-cell dysfunction.

Discussion

This review applies state-of-the-science approaches to identify the current knowledge gaps in our understanding of arsenite on diabetes development. https://doi.org/10.1289/EHP4517.",2020-01-03 +28974379,Structural-functional diversity of the natural oligopeptides.,"Natural oligopeptides may regulate nearly all vital processes. To date, the chemical structures of many oligopeptides have been identified from >2000 organisms representing all the biological kingdoms. We have considered a number of mathematical (sequence length), chemical, physical, and biological features of an array of natural oligopeptides on the basis of the oligopeptide EROP-Moscow database (http://erop.inbi.ras.ru, 15,351 entries) data. There is the substantial difference of these substances from polypeptide molecules of proteins according to their physicochemical characteristics. These characteristics may be critical for understanding the molecular mechanisms of the action of oligopeptides that lead to the development of physiological effects.",2017-09-30 +31197306,DNN-Dom: predicting protein domain boundary from sequence alone by deep neural network.,"

Motivation

Accurate delineation of protein domain boundary plays an important role for protein engineering and structure prediction. Although machine-learning methods are widely used to predict domain boundary, these approaches often ignore long-range interactions among residues, which have been proven to improve the prediction performance. However, how to simultaneously model the local and global interactions to further improve domain boundary prediction is still a challenging problem.

Results

This article employs a hybrid deep learning method that combines convolutional neural network and gate recurrent units' models for domain boundary prediction. It not only captures the local and non-local interactions, but also fuses these features for prediction. Additionally, we adopt balanced Random Forest for classification to deal with high imbalance of samples and high dimensions of deep features. Experimental results show that our proposed approach (DNN-Dom) outperforms existing machine-learning-based methods for boundary prediction. We expect that DNN-Dom can be useful for assisting protein structure and function prediction.

Availability and implementation

The method is available as DNN-Dom Server at http://isyslab.info/DNN-Dom/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +,Monitoring conterminous United States (CONUS) land cover change with Web-Enabled Landsat Data (WELD),"Forest cover loss and bare ground gain from 2006 to 2010 for the conterminous United States (CONUS) were quantified at a 30m spatial resolution using Web-Enabled Landsat Data available from the USGS Center for Earth Resources Observation and Science (EROS) (http://landsat.usgs.gov/WELD.php). The approach related multi-temporal WELD metrics and expert-derived training data for forest cover loss and bare ground gain through a decision tree classification algorithm. Forest cover loss was reported at state and ecoregional scales, and the identification of core forests' absent of change was made and verified using LiDAR data from the GLAS (Geoscience Laser Altimetry System) instrument. Bare ground gain correlated with population change for large metropolitan statistical areas (MSAs) outside of desert or semi-desert environments. GoogleEarth™ time-series images were used to validate the products. Mapped forest cover loss totaled 53,084km2 and was found to be depicted conservatively, with a user's accuracy of 78% and a producer's accuracy of 68%. Excluding errors of adjacency, user's and producer's accuracies rose to 93% and 89%, respectively. Mapped bare ground gain equaled 5974km2 and nearly matched the estimated area from the reference (GoogleEarth™) classification; however, user's (42%) and producer's (49%) accuracies were much less than those of the forest cover loss product. Excluding errors of adjacency, user's and producer's accuracies rose to 62% and 75%, respectively. Compared to recent 2001–2006 USGS National Land Cover Database validation data for forest loss (82% and 30% for respective user's and producer's accuracies) and urban gain (72% and 18% for respective user's and producer's accuracies), results using a single CONUS-scale model with WELD data are promising and point to the potential for national-scale operational mapping of key land cover transitions. However, validation results highlighted limitations, some of which can be addressed by improving training data, creating a more robust image feature space, adding contemporaneous Landsat 5 data to the inputs, and modifying definition sets to account for differences in temporal and spatial observational scales. The presented land cover extent and change data are available via the official WELD website (ftp://weldftp.cr.usgs.gov/CONUS_5Y_LandCover/ftp://weldftp.cr.usgs.gov/CONUS_5Y_LandCover/).",2014-01-01 +24217918,Gramene 2013: comparative plant genomics resources.,"Gramene (http://www.gramene.org) is a curated online resource for comparative functional genomics in crops and model plant species, currently hosting 27 fully and 10 partially sequenced reference genomes in its build number 38. Its strength derives from the application of a phylogenetic framework for genome comparison and the use of ontologies to integrate structural and functional annotation data. Whole-genome alignments complemented by phylogenetic gene family trees help infer syntenic and orthologous relationships. Genetic variation data, sequences and genome mappings available for 10 species, including Arabidopsis, rice and maize, help infer putative variant effects on genes and transcripts. The pathways section also hosts 10 species-specific metabolic pathways databases developed in-house or by our collaborators using Pathway Tools software, which facilitates searches for pathway, reaction and metabolite annotations, and allows analyses of user-defined expression datasets. Recently, we released a Plant Reactome portal featuring 133 curated rice pathways. This portal will be expanded for Arabidopsis, maize and other plant species. We continue to provide genetic and QTL maps and marker datasets developed by crop researchers. The project provides a unique community platform to support scientific research in plant genomics including studies in evolution, genetics, plant breeding, molecular biology, biochemistry and systems biology.",2013-11-11 +31296905,Characterization of two putative Dichelobacter nodosus footrot vaccine antigens identifies the first lysozyme inhibitor in the genus.,"The Gram-negative anaerobic bacterium Dichelobacter nodosus (Dn) causes footrot in ruminants, a debilitating and highly contagious disease that results in necrotic hooves and significant economic losses in agriculture. Vaccination with crude whole-cell vaccine mixed with multiple recombinant fimbrial proteins can provide protection during species-specific outbreaks, but subunit vaccines containing broadly cross-protective antigens are desirable. We have investigated two D. nodosus candidate vaccine antigens. Macrophage Infectivity Potentiator Dn-MIP (DNO_0012, DNO_RS00050) and Adhesin Complex Protein Dn-ACP (DNO_0725, DNO_RS06795) are highly conserved amongst ~170 D. nodosus isolates in the https://pubmlst.org/dnodosus/ database. We describe the presence of two homologous ACP domains in Dn-ACP with potent C-type lysozyme inhibitor function, and homology of Dn-MIP to other putative cell-surface and membrane-anchored MIP virulence factors. Immunization of mice with recombinant proteins with a variety of adjuvants induced antibodies that recognised both proteins in D. nodosus. Notably, immunization with fimbrial-whole-cell Footvax vaccine induced anti-Dn-ACP and anti-Dn-MIP antibodies. Although all adjuvants induced high titre antibody responses, only antisera to rDn-ACP-QuilA and rDn-ACP-Al(OH)3 significantly prevented rDn-ACP protein from inhibiting lysozyme activity in vitro. Therefore, a vaccine incorporating rDn-ACP in particular could contribute to protection by enabling normal innate immune lysozyme function to aid bacterial clearance.",2019-07-11 +23482072,Tetrahymena functional genomics database (TetraFGD): an integrated resource for Tetrahymena functional genomics.,"The ciliated protozoan Tetrahymena thermophila is a useful unicellular model organism for studies of eukaryotic cellular and molecular biology. Researches on T. thermophila have contributed to a series of remarkable basic biological principles. After the macronuclear genome was sequenced, substantial progress has been made in functional genomics research on T. thermophila, including genome-wide microarray analysis of the T. thermophila life cycle, a T. thermophila gene network analysis based on the microarray data and transcriptome analysis by deep RNA sequencing. To meet the growing demands for the Tetrahymena research community, we integrated these data to provide a public access database: Tetrahymena functional genomics database (TetraFGD). TetraFGD contains three major resources, including the RNA-Seq transcriptome, microarray and gene networks. The RNA-Seq data define gene structures and transcriptome, with special emphasis on exon-intron boundaries; the microarray data describe gene expression of 20 time points during three major stages of the T. thermophila life cycle; the gene network data identify potential gene-gene interactions of 15 049 genes. The TetraFGD provides user-friendly search functions that assist researchers in accessing gene models, transcripts, gene expression data and gene-gene relationships. In conclusion, the TetraFGD is an important functional genomic resource for researchers who focus on the Tetrahymena or other ciliates. Database URL: http://tfgd.ihb.ac.cn/",2013-03-12 +,The Quantification of Representative Sequences pipeline for amplicon sequencing: case study on within‐population ITS1 sequence variation in a microparasite infecting Daphnia,"Next generation sequencing (NGS) platforms are replacing traditional molecular biology protocols like cloning and Sanger sequencing. However, accuracy of NGS platforms has rarely been measured when quantifying relative frequencies of genotypes or taxa within populations. Here we developed a new bioinformatic pipeline (QRS) that pools similar sequence variants and estimates their frequencies in NGS data sets from populations or communities. We tested whether the estimated frequency of representative sequences, generated by 454 amplicon sequencing, differs significantly from that obtained by Sanger sequencing of cloned PCR products. This was performed by analysing sequence variation of the highly variable first internal transcribed spacer (ITS1) of the ichthyosporean Caullerya mesnili, a microparasite of cladocerans of the genus Daphnia. This analysis also serves as a case example of the usage of this pipeline to study within‐population variation. Additionally, a public Illumina data set was used to validate the pipeline on community‐level data. Overall, there was a good correspondence in absolute frequencies of C. mesnili ITS1 sequences obtained from Sanger and 454 platforms. Furthermore, analyses of molecular variance (amova) revealed that population structure of C. mesnili differs across lakes and years independently of the sequencing platform. Our results support not only the usefulness of amplicon sequencing data for studies of within‐population structure but also the successful application of the QRS pipeline on Illumina‐generated data. The QRS pipeline is freely available together with its documentation under GNU Public Licence version 3 at http://code.google.com/p/quantification-representative-sequences.",2015-11-01 +23203869,"SIFTS: Structure Integration with Function, Taxonomy and Sequences resource.","The Structure Integration with Function, Taxonomy and Sequences resource (SIFTS; http://pdbe.org/sifts) is a close collaboration between the Protein Data Bank in Europe (PDBe) and UniProt. The two teams have developed a semi-automated process for maintaining up-to-date cross-reference information to UniProt entries, for all protein chains in the PDB entries present in the UniProt database. This process is carried out for every weekly PDB release and the information is stored in the SIFTS database. The SIFTS process includes cross-references to other biological resources such as Pfam, SCOP, CATH, GO, InterPro and the NCBI taxonomy database. The information is exported in XML format, one file for each PDB entry, and is made available by FTP. Many bioinformatics resources use SIFTS data to obtain cross-references between the PDB and other biological databases so as to provide their users with up-to-date information.",2012-11-29 +,RiceRBP: A database of experimentally identified RNA-binding proteins in Oryza sativa L,"RNA-binding proteins play critical roles at multiple steps during gene expression, including mRNA transport and translation. mRNA transport is particularly important in rice (Oryza sativa L.) in order to ensure the proper localization of the prolamine and glutelin seed storage proteins. However, relatively little information is available about RNA-binding proteins that have been isolated or characterized in plants. The RiceRBP database is a novel resource for the analysis of RNA-binding proteins in rice. RiceRBP contains 257 experimentally identified RNA-binding proteins, which are derived from at least 221 distinct rice genes. Many of the identified proteins catalogued in RiceRBP had not previously been annotated or predicted to bind RNA. RiceRBP provides tools to facilitate the analysis of the identified RNA-binding proteins, including information about predicted protein domains, phylogenetic relationships, and expression patterns of the identified genes. Importantly, RiceRBP also contains tools to search and analyze predicted RNA-binding protein orthologs in other plant species. We anticipate that the data and analysis tools provided by RiceRBP should facilitate the study of plant RNA-binding proteins. RiceRBP is available at http://www.bioinformatics2.wsu.edu/RiceRBP.",2011-02-01 +31753749,Multiplex testing for the screening of lysosomal storage disease in urine: Sulfatides and glycosaminoglycan profiles in 40 cases of sulfatiduria.,"PURPOSE:To describe an efficient and effective multiplex screening strategy for sulfatide degradation disorders and mucolipidosis type II/III (MLII/III) using 3 mL of urine. METHODS:Glycosaminoglycans were analyzed by liquid chromatography-tandem mass spectrometry. Matrix assisted laser desorption/ionization-time of flight tandem mass spectrometry was used to identify free oligosaccharides and identify 22 ceramide trihexosides and 23 sulfatides, which are integrated by 670 calculated ratios. Collaborative Laboratory Integrated Reports (CLIR; https://clir.mayo.edu) was used for post-analytical interpretation of the complex metabolite profile and to aid in the differential diagnosis of abnormal results. RESULTS:Multiplex analysis was performed on 25 sulfatiduria case samples and compiled with retrospective data from an additional 15 cases revealing unique patterns of biomarkers for each disorder of sulfatide degradation (MLD, MSD, and Saposin B deficiency) and for MLII/III, thus allowing the formulation of a novel algorithm for the biochemical diagnosis of these disorders. CONCLUSIONS:Comprehensive and integrated urine screening could be very effective in the initial workup of patients suspected of having a lysosomal disorder as it covers disorders of sulfatide degradation and narrows down the differential diagnosis in patients with elevated glycosaminoglycans.",2019-11-05 +24364888,SGR: an online genomic resource for the woodland strawberry.,"

Background

Fragaria vesca, a diploid strawberry species commonly known as the alpine or woodland strawberry, is a versatile experimental plant system and an emerging model for the Rosaceae family. An ancestral F. vesca genome contributed to the genome of the octoploid dessert strawberry (F. ×ananassa), and the extant genome exhibits synteny with other commercially important members of the Rosaceae family such as apple and peach. To provide a molecular description of floral organ and fruit development at the resolution of specific tissues and cell types, RNAs from flowers and early developmental stage fruit tissues of the inbred F. vesca line YW5AF7 were extracted and the resulting cDNA libraries sequenced using an Illumina HiSeq2000. To enable easy access as well as mining of this two-dimensional (stage and tissue) transcriptome dataset, a web-based database, the Strawberry Genomic Resource (SGR), was developed.

Description

SGR is a web accessible database that contains sample description, sample statistics, gene annotation, and gene expression analysis. This information can be accessed publicly from a web-based interface at http://bioinformatics.towson.edu/strawberry/Default.aspx. The SGR website provides user friendly search and browse capabilities for all the data stored in the database. Users are able to search for genes using a gene ID or description or obtain differentially expressed genes by entering different comparison parameters. Search results can be downloaded in a tabular format compatible with Microsoft excel application. Aligned reads to individual genes and exon/intron structures are displayed using the genome browser, facilitating gene re-annotation by individual users.

Conclusions

The SGR database was developed to facilitate dissemination and data mining of extensive floral and fruit transcriptome data in the woodland strawberry. It enables users to mine the data in different ways to study different pathways or biological processes during reproductive development.",2013-12-23 +29628980,Berberine Suppresses Fibronectin Expression through Inhibition of c-Jun Phosphorylation in Breast Cancer Cells.,"

Purpose

The exact mechanism regulating fibronectin (FN) expression in breast cancer cells has not been fully elucidated. In this study, we investigated the pharmacological mechanism of berberine (BBR) with respect to FN expression in triple-negative breast cancer (TNBC) cells.

Methods

The clinical significance of FN mRNA expression was analyzed using the Kaplan-Meier plotter database (http://kmplot.com/breast). FN mRNA and protein expression levels were analyzed by real-time polymerase chain reaction and western blotting, respectively.

Results

Using publicly available clinical data, we observed that high FN expression was associated with poor prognosis in patients with breast cancer. FN mRNA and protein expression was increased in TNBC cells compared with non-TNBC cells. As expected, recombinant human FN significantly induced cell spreading and adhesion in MDA-MB231 TNBC cells. We also investigated the regulatory mechanism underlying FN expression. Basal levels of FN mRNA and protein expression were downregulated by a specific activator protein-1 (AP-1) inhibitor, SR11302. Interestingly, FN expression in TNBC cells was dose-dependently decreased by BBR treatment. The level of c-Jun phosphorylation was also decreased by BBR treatment.

Conclusion

Our findings demonstrate that FN expression is regulated via an AP-1-dependent mechanism, and that BBR suppresses FN expression in TNBC cells through inhibition of AP-1 activity.",2018-03-23 +36284693,A novel growing rod technique to treat early-onset scoliosis (EOS): a step-by-step 2D surgical video.,"Early-onset scoliosis (EOS) correction techniques have evolved slowly over the past 40 years and still remain a challenge for the spine surgeon. Avoiding spinal fusion in these patients is key to decreasing morbidity and mortality in this population. Current treatments for EOS include both conservative and surgical options. The authors present the modified Luqué technique that has been performed at their institution for the past decade. This modified technique relies on Luqué's principle, but with newer ""gliding"" implants through a less disruptive approach. The goal of this technique is to delay fusion as long as possible, with the intent to prevent deformity progression while preserving maximal growth. Normally, these patients will have definitive fusion surgery once they have reached skeletal maturity or as close as possible. Out of 23 patients until present (close to 4-year follow-up), the authors have not performed any revision due to implant failure. Three patients have undergone final fusion as the curve progressed (one patient, 4 years out, had final fusion at age 12 years; two other patients had final fusion at 3 years). These implants, which have the CE mark in Europe, are available in Canada via a special access process with Health Canada. The implants have not yet been submitted to the FDA, as they are waiting on clinical data out of Europe and Canada. In the following video the authors describe the modified Luqué technique step-by-step. The video can be found here: https://youtu.be/k0AuFa9lYXY.",2020-01-01 +31349166,Using high-throughput transcriptome sequencing to investigate the biotransformation mechanism of hexabromocyclododecane with Rhodopseudomonas palustris in water.,"We discovered one purple photosynthetic bacterium, Rhodopseudomonas palustris YSC3, which has a specific ability to degrade 1, 2, 5, 6, 9, 10-hexabromocyclododecane (HBCD). The whole transcriptome of R. palustris YSC3 was analyzed using the RNA-based sequencing technology in illumina and was compared as well as discussed through Multi-Omics onLine Analysis System (MOLAS, http://molas.iis.sinica.edu.tw/NTUIOBYSC3/) platform we built. By using genome based mapping approach, we can align the trimmed reads on the genome of R. palustris and estimate the expression profiling for each transcript. A total of 341 differentially expressed genes (DEGs) in HBCD-treated R. palustris (RPH) versus control R. palustris (RPC) was identified by 2-fold changes, among which 305 genes were up-regulated and 36 genes were down-regulated. The regulated genes were mapped to the database of Gene Ontology (GO) and Genes and Genomes Encyclopedia of Kyoto (KEGG), resulting in 78 pathways being identified. Among those DEGs which annotated to important functions in several metabolic pathways, including those involved in two-component system (13.6%), ribosome assembly (10.7%), glyoxylate and dicarboxylate metabolism (5.3%), fatty acid degradation (4.7%), drug metabolism-cytochrome P450 (2.3%), and chlorocyclohexane and chlorobenzene degradation (3.0%) were differentially expressed in RPH and RPC samples. We also identified one transcript annotated as dehalogenase and other genes involved in the HBCD biotransformation in R. palustris. Furthermore, the putative HBCD biotransformation mechanism in R. palustris was proposed.",2019-07-10 +31690595,Molecular Reconstruction of the Diet in Human Stool Samples. ,"Understanding dietary effects on the gut microbial composition is one of the key questions in human microbiome research. It is highly important to have reliable dietary data on the stool samples to unambiguously link the microbiome composition to food intake. Often, however, self-reported diet surveys have low accuracy and can be misleading. Thereby, additional molecular biology-based methods could help to revise the diet composition. The article by Reese et al. [A. T. Reese, T. R. Kartzinel, B. L. Petrone, P. J. Turnbaugh, et al., mSystems 4(5):e00458-19, 2019, https://doi.org/10.1128/mSystems.00458-19] in a recent issue of mSystems describes a DNA metabarcoding strategy targeting chloroplast DNA markers in stool samples from 11 human subjects consuming both controlled and freely selected diets. The aim of this study was to evaluate the efficiency of this molecular method in detecting plant remains in the sample compared to the written dietary records. This study displays an important first step in implementing molecular dietary reconstructions in stool microbiome studies which will finally help to increase the accuracy of dietary metadata.",2019-11-05 +30346493,SENSE: Siamese neural network for sequence embedding and alignment-free comparison.,"

Motivation

Sequence analysis is arguably a foundation of modern biology. Classic approaches to sequence analysis are based on sequence alignment, which is limited when dealing with large-scale sequence data. A dozen of alignment-free approaches have been developed to provide computationally efficient alternatives to alignment-based approaches. However, existing methods define sequence similarity based on various heuristics and can only provide rough approximations to alignment distances.

Results

In this article, we developed a new approach, referred to as SENSE (SiamEse Neural network for Sequence Embedding), for efficient and accurate alignment-free sequence comparison. The basic idea is to use a deep neural network to learn an explicit embedding function based on a small training dataset to project sequences into an embedding space so that the mean square error between alignment distances and pairwise distances defined in the embedding space is minimized. To the best of our knowledge, this is the first attempt to use deep learning for alignment-free sequence analysis. A large-scale experiment was performed that demonstrated that our method significantly outperformed the state-of-the-art alignment-free methods in terms of both efficiency and accuracy.

Availability and implementation

Open-source software for the proposed method is developed and freely available at https://www.acsu.buffalo.edu/∼yijunsun/lab/SENSE.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +27899657,The eukaryotic promoter database in its 30th year: focus on non-vertebrate organisms.,"We present an update of the Eukaryotic Promoter Database EPD (http://epd.vital-it.ch), more specifically on the EPDnew division, which contains comprehensive organisms-specific transcription start site (TSS) collections automatically derived from next generation sequencing (NGS) data. Thanks to the abundant release of new high-throughput transcript mapping data (CAGE, TSS-seq, GRO-cap) the database could be extended to plant and fungal species. We further report on the expansion of the mass genome annotation (MGA) repository containing promoter-relevant chromatin profiling data and on improvements for the EPD entry viewers. Finally, we present a new data access tool, ChIP-Extract, which enables computational biologists to extract diverse types of promoter-associated data in numerical table formats that are readily imported into statistical analysis platforms such as R.",2016-11-28 +31173056,P3DOCK: a protein-RNA docking webserver based on template-based and template-free docking.,"

Motivation

The main function of protein-RNA interaction is to regulate the expression of genes. Therefore, studying protein-RNA interactions is of great significance. The information of three-dimensional (3D) structures reveals that atomic interactions are particularly important. The calculation method for modeling a 3D structure of a complex mainly includes two strategies: free docking and template-based docking. These two methods are complementary in protein-protein docking. Therefore, integrating these two methods may improve the prediction accuracy.

Results

In this article, we compare the difference between the free docking and the template-based algorithm. Then we show the complementarity of these two methods. Based on the analysis of the calculation results, the transition point is confirmed and used to integrate two docking algorithms to develop P3DOCK. P3DOCK holds the advantages of both algorithms. The results of the three docking benchmarks show that P3DOCK is better than those two non-hybrid docking algorithms. The success rate of P3DOCK is also higher (3-20%) than state-of-the-art hybrid and non-hybrid methods. Finally, the hierarchical clustering algorithm is utilized to cluster the P3DOCK's decoys. The clustering algorithm improves the success rate of P3DOCK. For ease of use, we provide a P3DOCK webserver, which can be accessed at www.rnabinding.com/P3DOCK/P3DOCK.html. An integrated protein-RNA docking benchmark can be downloaded from http://rnabinding.com/P3DOCK/benchmark.html.

Availability and implementation

www.rnabinding.com/P3DOCK/P3DOCK.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +24377417,"Global catalogue of microorganisms (gcm): a comprehensive database and information retrieval, analysis, and visualization system for microbial resources.","

Background

Throughout the long history of industrial and academic research, many microbes have been isolated, characterized and preserved (whenever possible) in culture collections. With the steady accumulation in observational data of biodiversity as well as microbial sequencing data, bio-resource centers have to function as data and information repositories to serve academia, industry, and regulators on behalf of and for the general public. Hence, the World Data Centre for Microorganisms (WDCM) started to take its responsibility for constructing an effective information environment that would promote and sustain microbial research data activities, and bridge the gaps currently present within and outside the microbiology communities.

Description

Strain catalogue information was collected from collections by online submission. We developed tools for automatic extraction of strain numbers and species names from various sources, including Genbank, Pubmed, and SwissProt. These new tools connect strain catalogue information with the corresponding nucleotide and protein sequences, as well as to genome sequence and references citing a particular strain. All information has been processed and compiled in order to create a comprehensive database of microbial resources, and was named Global Catalogue of Microorganisms (GCM). The current version of GCM contains information of over 273,933 strains, which includes 43,436 bacterial, fungal and archaea species from 52 collections in 25 countries and regions.A number of online analysis and statistical tools have been integrated, together with advanced search functions, which should greatly facilitate the exploration of the content of GCM.

Conclusion

A comprehensive dynamic database of microbial resources has been created, which unveils the resources preserved in culture collections especially for those whose informatics infrastructures are still under development, which should foster cumulative research, facilitating the activities of microbiologists world-wide, who work in both public and industrial research centres. This database is available from http://gcm.wfcc.info.",2013-12-30 +29101506,Estimated prevalence of potentially damaging variants in the leptin gene.,"

Background

Mutations in the leptin gene (LEP) can alter the secretion or interaction of leptin with its receptor, leading to extreme early-onset obesity. The purpose of this work was to estimate the prevalence of heterozygous and homozygous mutations in the leptin gene with the help of the Exome Aggregation Consortium (ExAC) database ( http://exac.broadinstitute.org/about ).

Results

The ExAC database encompasses exome sequencing data from 60,706 individuals. We searched for listed leptin variants and identified 36 missense, 1 in-frame deletion, and 3 loss-of-function variants. The functional relevance of these variants was assessed by the in silico prediction tools PolyPhen-2, Sorting Intolerant from Tolerant (SIFT), and Loss-Of-Function Transcript Effect Estimator (LOFTEE). PolyPhen-2 predicted 7 of the missense variants to be probably damaging and 10 to be possibly damaging. SIFT predicted 7 of the missense variants to be deleterious. Three loss-of-function variants were predicted by LOFTEE. Excluding double counts, we can summarize 21 variants as potentially damaging. Considering the allele count, we identified 31 heterozygous but no homozygous subjects with at least probably damaging variants. In the ExAC population, the estimated prevalence of heterozygous carriers of these potentially damaging variants was 1:2000. The probability of homozygosity was 1:15,000,000. We furthermore tried to assess the functionality of ExAC-listed leptin variants by applying a knowledge-driven approach. By this approach, additional 6 of the ExAC-listed variants were considered potentially damaging, increasing the number of heterozygous subjects to 58, the prevalence of heterozygosity to 1:1050, and the probability of homozygosity to 1:4,400,000.

Conclusion

Using exome sequencing data from ExAC, in silico prediction tools and by applying a knowledge-driven approach, we identified 27 probably damaging variants in the leptin gene of 58 heterozygous subjects. With this information, we estimate the prevalence for heterozygosity at 1:1050 corresponding to an incidence of homozygosity of 1:4,400,000 in this large pluriethnic cohort.",2017-11-03 +28077569,RAIN: RNA-protein Association and Interaction Networks. ,"Protein association networks can be inferred from a range of resources including experimental data, literature mining and computational predictions. These types of evidence are emerging for non-coding RNAs (ncRNAs) as well. However, integration of ncRNAs into protein association networks is challenging due to data heterogeneity. Here, we present a database of ncRNA-RNA and ncRNA-protein interactions and its integration with the STRING database of protein-protein interactions. These ncRNA associations cover four organisms and have been established from curated examples, experimental data, interaction predictions and automatic literature mining. RAIN uses an integrative scoring scheme to assign a confidence score to each interaction. We demonstrate that RAIN outperforms the underlying microRNA-target predictions in inferring ncRNA interactions. RAIN can be operated through an easily accessible web interface and all interaction data can be downloaded.Database URL: http://rth.dk/resources/rain.",2017-01-10 +26074488,CancerHSP: anticancer herbs database of systems pharmacology.,"The numerous natural products and their bioactivity potentially afford an extraordinary resource for new drug discovery and have been employed in cancer treatment. However, the underlying pharmacological mechanisms of most natural anticancer compounds remain elusive, which has become one of the major obstacles in developing novel effective anticancer agents. Here, to address these unmet needs, we developed an anticancer herbs database of systems pharmacology (CancerHSP), which records anticancer herbs related information through manual curation. Currently, CancerHSP contains 2439 anticancer herbal medicines with 3575 anticancer ingredients. For each ingredient, the molecular structure and nine key ADME parameters are provided. Moreover, we also provide the anticancer activities of these compounds based on 492 different cancer cell lines. Further, the protein targets of the compounds are predicted by state-of-art methods or collected from literatures. CancerHSP will help reveal the molecular mechanisms of natural anticancer products and accelerate anticancer drug development, especially facilitate future investigations on drug repositioning and drug discovery. CancerHSP is freely available on the web at http://lsp.nwsuaf.edu.cn/CancerHSP.php.",2015-06-15 +29549212,"Protocol for a 1-year prospective, longitudinal cohort study of patients undergoing Roux-en-Y gastric bypass and sleeve gastrectomy: the BARI-LIFESTYLE observational study.","

Introduction

Roux-en-Y gastric bypass and sleeve gastrectomy are the two most common bariatric surgery performed in the UK that result in comparable weight loss and remission of obesity-associated comorbidities. However, there is a paucity of studies examining the impact of these procedures on body composition, physical activity levels, sedentary behaviour, physical function and strength, dietary intake, health-related quality of life and costs.

Methods and analysis

The BARI-LIFESTYLE observational study is a 1-year prospective, longitudinal cohort study within a real-world routine clinical care setting aiming to recruit 100 patients with severe obesity undergoing either primary Roux-en-Y gastric bypass or sleeve gastrectomy from two bariatric centres in London, UK. Participants will be followed up four times during the study period; presurgery baseline (T0) and at 3 (T1), 6 (T2) and 12 months (T3) postsurgery. In addition to the standard follow-up investigations, assessments including dual-energy X-ray absorptiometry scan, bioelectric impedance analysis, 6 min walk test, sit-to-stand test and handgrip test will be undertaken together with completion of questionnaires. Physical activity levels and sedentary behaviour will be assessed using accelerometer, and dietary intake will be recorded using a 3-day food diary. Outcome measures will include body weight, body fat mass, lean muscle mass, bone mineral density, physical activity levels, sedentary behaviour, physical function and strength, dietary intake, health-related quality of life, remission of comorbidities, healthcare resource utilisation and costs.

Ethics and dissemination

This study has been reviewed and given a favourable ethical opinion by London-Dulwich Research Ethics Committee (17/LO/0950). The results will be presented to stakeholder groups locally, nationally and internationally and published in peer-reviewed medical journals. The lay-person summary of the findings will be published on the Centre for Obesity Research, University College London website (http://www.ucl.ac.uk/obesity).",2018-03-16 +30202854,"Phylolink: phylogenetically-based profiling, visualisations and metrics for biodiversity.","

Summary

Phylolink is a research collaboration toolkit through which biodiversity can be explored from a phylogenetic perspective. It is an update of PhyloJiVE that has been integrated into the framework of the Atlas of Living Australia, including its spatial portal. Phylolink connects phylogenetic tree nodes with species occurrence records, environmental data, and species trait information. Features new to Phylolink allow users to upload and download spatial datasets, store files and link to the ALA spatial portal, improve graphics and provide the novel ability to analyze environmental attributes of species and clades distributions. Species richness and phylogenetic diversity comparisons can be made among geographic areas. The result is a powerful way of combining data to generate flexible and customizable visualizations, profiles and metrics for biodiversity.

Availability and implementation

Phylolink is available at the Atlas of Living Australia, http://phylolink.ala.org.au/. It works on any browser and users can select from a range of stored phylogenetic trees and spatial datasets, or upload their own.",2019-04-01 +31168931,Proteome Dataset of Qualea grandiflora Mart. (Vochysiaceae) by LC-MS/MS Label-Free Identification in Response to Aluminum.,"This dataset brief is about the descriptive proteome of Qualea grandiflora plants by label free mass spectrometry (LC-MS/MS). Q. grandiflora is a plant that accumulates aluminum (Al) in high quantities and requires it for growth and development. Although quite relevant for the understanding of Al effects on plants, the proteome of Q. grandiflora has not been studied yet. Therefore, the current proteome analysis identifies a total of 2010 proteins. Furthermore, the identified Q. grandiflora root proteins are associated with several crucial molecular functions, biological processes, and cellular sites. Hence, the proteome analysis of Q. grandiflora will contribute to unravel how plants evolved to cope with high levels of Al in soils. All data can be accessed at the Centre for Computational Mass Spectrometry - MassIVE MSV000082284 - https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?task=adb9647282a5421a9cffe3124c060f46.",2019-08-21 +31775760,MySeq: privacy-protecting browser-based personal Genome analysis for genomics education and exploration.,"BACKGROUND:The complexity of genome informatics is a recurring challenge for genome exploration and analysis by students and other non-experts. This complexity creates a barrier to wider implementation of experiential genomics education, even in settings with substantial computational resources and expertise. Reducing the need for specialized software tools will increase access to hands-on genomics pedagogy. RESULTS:MySeq is a React.js single-page web application for privacy-protecting interactive personal genome analysis. All analyses are performed entirely in the user's web browser eliminating the need to install and use specialized software tools or to upload sensitive data to an external web service. MySeq leverages Tabix-indexing to efficiently query whole genome-scale variant call format (VCF) files stored locally or available remotely via HTTP(s) without loading the entire file. MySeq currently implements variant querying and annotation, physical trait prediction, pharmacogenomic, polygenic disease risk and ancestry analyses to provide representative pedagogical examples; and can be readily extended with new analysis or visualization components. CONCLUSIONS:MySeq supports multiple pedagogical approaches including independent exploration and interactive online tutorials. MySeq has been successfully employed in an undergraduate human genome analysis course where it reduced the barriers-to-entry for hands-on human genome analysis.",2019-11-27 +25183487,RCSB PDB Mobile: iOS and Android mobile apps to provide data access and visualization to the RCSB Protein Data Bank.,"

Summary

The Research Collaboratory for Structural Bioinformatics Protein Data Bank (RCSB PDB) resource provides tools for query, analysis and visualization of the 3D structures in the PDB archive. As the mobile Web is starting to surpass desktop and laptop usage, scientists and educators are beginning to integrate mobile devices into their research and teaching. In response, we have developed the RCSB PDB Mobile app for the iOS and Android mobile platforms to enable fast and convenient access to RCSB PDB data and services. Using the app, users from the general public to expert researchers can quickly search and visualize biomolecules, and add personal annotations via the RCSB PDB's integrated MyPDB service.

Availability and implementation

RCSB PDB Mobile is freely available from the Apple App Store and Google Play (http://www.rcsb.org).",2014-09-02 +22847375,BioGPS and GXD: mouse gene expression data-the benefits and challenges of data integration.,"Mouse gene expression data are complex and voluminous. To maximize the utility of these data, they must be made readily accessible through databases, and those resources need to place the expression data in the larger biological context. Here we describe two community resources that approach these problems in different but complementary ways: BioGPS and the Mouse Gene Expression Database (GXD). BioGPS connects its large and homogeneous microarray gene expression reference data sets via plugins with a heterogeneous collection of external gene centric resources, thus casting a wide but loose net. GXD acquires different types of expression data from many sources and integrates these data tightly with other types of data in the Mouse Genome Informatics (MGI) resource, with a strong emphasis on consistency checks and manual curation. We describe and contrast the ""loose"" and ""tight"" data integration strategies employed by BioGPS and GXD, respectively, and discuss the challenges and benefits of data integration. BioGPS is freely available at http://biogps.org . GXD is freely available through the MGI web site ( www.informatics.jax.org ) or directly at www.informatics.jax.org/expression.shtml .",2012-07-31 +,AB119. Induction of suppressor of cytokine signaling-3 in FLT3-ITD positive MV4-11 acute myeloid leukemia cells in response to 5-Azacytidine and Trichostatin A,"

Background and objective

Suppressor of cytokine signaling-3 (SOCS-3) has been shown to be an important candidate in molecular therapeutic strategies in management of acute myeloid leukemia (AML), particularly in patients carrying FLT3-ITD mutation. SOCS-3 suppresses cytokine signalling by inhibiting the activity of Janus Kinase-2 (JAK-2), and by competing with signal transducer and activator of transcription (STAT) molecules that leads to underexpression. The study aims to determine the epigenetically silence genes in AML cells carrying a FLT3-ITD mutation and epigenetically expressed genes afer treatment with demethylating agent and histone deacetylase inhibitor.

Methods

MV4-11, a FLT3-ITD positive AML cell line was treated with epigenetic modulating agents; 5-azacytidine (5-Aza, a DNA demethylating agent) and Trichostatin A (TSA, a histone deacetylase inhibitor) at IC50 concentrations. One-Color Microarray-based expression analysis (Agilent SurePrint Technology) was utilized and the data was collected and analyzed by Genespring 12.6 software. The gene expression datasets were subjected to pathway analysis by online DAVID tool (http://david.abcc.ncifcrf.gov/) using KEGG pathway database. The microarray results were validated by quantitative real-time PCR to determine the relative quantification (RQ) values.

Results

Microarray analysis detected 1,291 expressed genes related to drug interactions. Pathway analysis by KEGG database revealed that the 1,291 genes were: 21 genes from MAPK pathway, 19 genes from pathways in cancers, 17 genes from cytokine-cytokine receptor interaction, 12 from focal adhesion, 12 from regulation of action cytoskeleton, 10 genes from JAK/STAT pathway, 10 genes from Calcium signalling and several other pathways with less than 10 genes involved. Among the 10 genes in JAK/STAT pathway, SOCS-3 was highly expressed in 5-Aza and TSA with 66.24 and 147.43 folds (Genespring analysis, Benjamini Hochberg, P<0.05), respectively compared to untreated cells. Whereas, STAT6 was down regulated by −8.57 and −2.28 folds, respectively. Validation of microarray result showed RQ of SOCS-3 gene was upregulated by 3.7 and 18.2 folds, whereas STAT6 by 0.7 and 0.1 folds in 5-Aza and TSA respectively. SOCS-3 over expression reduces STAT6 activities and thus induces cell death in AML cells.

Conclusions

SOCS-3 was epigenetically silenced in AML cells and re-expressed after 5-Aza and TSA treatments. Whereas, STAT6 plays a role in a negative feedback loop. The finding suggests that, SOCS-3 expression is associated with pathogenesis of AML and can be served as prognosis marker in molecular targeted therapy of AML.",2015-09-01 +28180140,Land use and land cover data changes in Indian Ocean Islands: Case study of Unguja in Zanzibar Island.,"Land use and land cover changes will continue to affect resilient human communities and ecosystems as a result of climate change. However, an assessment of land use and land cover changes over time in Indian Ocean Islands is less documented. The land use/cover data changes over 10 years at smaller geographical scale across Unguja Island in Zanzibar were analyzed. Downscaling of the data was obtained from SERVIR through partnership with Kenya-based Regional Centre for Mapping of Resources for Development (RCMRD) database (http://www.servirglobal.net), and clipped down in ArcMap (Version 10.1) to Unguja Island. SERVIR and RCMRD Land Cover Dataset are mainly 30 m multispectral images include Landsat TM and ETM+Multispectral Images. Landscape ecology Statistics tool (LecoS) was used to analysis the land use and land cover changes. The data provide information on the status of the land use and land cover changes along the Unguja Island in Zanzibar. The data is of great significance to the future research on global change.",2017-01-17 +31114843,CytoPacq: a web-interface for simulating multi-dimensional cell imaging.,"MOTIVATION:Objective assessment of bioimage analysis methods is an essential step towards understanding their robustness and parameter sensitivity, calling for the availability of heterogeneous bioimage datasets accompanied by their reference annotations. Because manual annotations are known to be arduous, highly subjective and barely reproducible, numerous simulators have emerged over past decades, generating synthetic bioimage datasets complemented with inherent reference annotations. However, the installation and configuration of these tools generally constitutes a barrier to their widespread use. RESULTS:We present a modern, modular web-interface, CytoPacq, to facilitate the generation of synthetic benchmark datasets relevant for multi-dimensional cell imaging. CytoPacq poses a user-friendly graphical interface with contextual tooltips and currently allows a comfortable access to various cell simulation systems of fluorescence microscopy, which have already been recognized and used by the scientific community, in a straightforward and self-contained form. AVAILABILITY AND IMPLEMENTATION:CytoPacq is a publicly available online service running at https://cbia.fi.muni.cz/simulator. More information about it as well as examples of generated bioimage datasets are available directly through the web-interface. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-11-01 +28460067,PRISM 3: expanded prediction of natural product chemical structures from microbial genomes.,"Microbial natural products represent a rich resource of pharmaceutically and industrially important compounds. Genome sequencing has revealed that the majority of natural products remain undiscovered, and computational methods to connect biosynthetic gene clusters to their corresponding natural products therefore have the potential to revitalize natural product discovery. Previously, we described PRediction Informatics for Secondary Metabolomes (PRISM), a combinatorial approach to chemical structure prediction for genetically encoded nonribosomal peptides and type I and II polyketides. Here, we present a ground-up rewrite of the PRISM structure prediction algorithm to derive prediction of natural products arising from non-modular biosynthetic paradigms. Within this new version, PRISM 3, natural product scaffolds are modeled as chemical graphs, permitting structure prediction for aminocoumarins, antimetabolites, bisindoles and phosphonate natural products, and building upon the addition of ribosomally synthesized and post-translationally modified peptides. Further, with the addition of cluster detection for 11 new cluster types, PRISM 3 expands to detect 22 distinct natural product cluster types. Other major modifications to PRISM include improved sequence input and ORF detection, user-friendliness and output. Distribution of PRISM 3 over a 300-core server grid improves the speed and capacity of the web application. PRISM 3 is available at http://magarveylab.ca/prism/.",2017-07-01 +28737414,The reciprocal relationship between depression and physical morbidity: The role of subjective age.,"

Objectives

The study aims to examine whether the reciprocal effects of physical morbidity and depression are moderated by subjective age-that is, individuals' perception of themselves as young or old.

Method

Data from the first two waves of the Midlife in the United States study (1995-6, T1; 2004-6, T2; http://midus.wisc.edu/) were analyzed using a cross-lagged design. We assessed 3,591 individuals who participated in both waves and provided full data on all the relevant variables (mean age at T1 = 47.4). Depression and the number of chronic illnesses (the indicator of physical morbidity) were measured at both waves and were tested as predictors and outcomes in a cross-lagged model. The moderating role of subjective age was assessed by examining whether T1 variables interacted with subjective age in predicting T2 outcomes.

Results

Subjective age moderated the T1 depression-T2 morbidity relationship, so that the relationship was stronger for those with older subjective age. Subjective age did not moderate the T1 morbidity-T2 depression relationship.

Conclusion

Older subjective age could be a risk factor for experiencing greater physical morbidity following depression. (PsycINFO Database Record",2017-07-24 +30238039,Observed data of extreme rainfall events over the West African Sahel.,"The data described in this article are sets of daily rainfall values derived from observed station records. The data was recorded by 72 in-situ rain gauges spread over the West African Sahel. The daily rainfall time series from synoptic, climate, agro-meteorological, and rainfall stations are assessed for quality and consistency before extreme values are extracted based on 90th, 95th, and 99th percentile thresholds. This data is free for use as part of the study ""Scales for rating heavy rainfall events in West African Sahel"" [1] (Salack et al., 2018). Complementary and up to date time series can be taken from WASCAL data infrastructure (WADI) geoportal https://wascal-dataportal.org/wascal_searchportal2/. This is a derived product (DP), made public in line with WASCAL׳s ""3rd party data sharing policy"" signed by the WASCAL member countries.",2018-09-06 +28264673,The important challenge of quantifying tropical diversity.,"The tropics are the repository of much of the world's biodiversity, yet are undersampled relative to temperate regions. To help fill this knowledge gap, a paper in BMC Biology explores diversity patterns in tropical African plants, as revealed by the RAINBIO database. The paper documents spatial variation in diversity and data coverage, but also highlights the challenges faced in quantifying diversity patterns using data collated from a range of sources including herbaria.See research article: http://bmcbiol.biomedcentral.com/articles/10.1186/s12915-017-0356-8 .",2017-03-07 +31272242,Durvalumab in cancer medicine: a comprehensive review.,"Introduction: The U.S. FDA has approved durvalumab for the treatment of advanced urothelial and non-small cell lung cancers. However, this immunotherapy agent is also being explored in other cancers. There is also ongoing research to better predict the responses to this drug. Areas covered: We summarize the literature regarding durvalumab pharmacology, safety and efficacy in several tumor types. We searched PubMed/Medline database from inception to 20 April 2019, performed a snowball method, and visited independent websites such as the U.S. FDA ( https://www.fda.gov ), ClinicalTrials.gov, among others. Expert opinion: Advanced phase clinical trials have shown benefit of durvalumab in advanced urothelial and non-small cell lung cancers, and suggest benefit in several other tumor types. This agent has a tolerable toxicity profile and seems more effective in patients with a higher PDL-1 expression, although this correlation is not perfect. An improved method to predict a response to durvalumab would be beneficial to best tailor therapy and minimize medical care costs. More research is needed to establish its efficacy in different disease stages and applicability in other tumor types. Hopefully, prospective, randomized trials of durvalumab, alone and/or in combination with other agents, will bring answers to these questions in the near future.",2019-07-05 +32012413,The relative importance of phylogeny and habitat in determining the presence and prominence of a granula iridica in hooved mammals.,"

Purpose

To investigate the relationship between phylogeny and amount of shade in a species' habitat regarding the presence or absence of an iridal granula iridica (GI) in a large sample of Artiodactyl and Perissodactyl clades and using online resources.

Methods

The Comparative Ocular Pathology Laboratory of Wisconsin (COPLOW) archives were searched for glass slide material from Artiodactyl (even-toed) and Perissodactyl (odd-toed) ungulates. The slides were examined, and the presence or absence of the GI was noted. The phylogenetic tree of the ungulate species was inferred using TimeTree (http://www.timetree.org), and the habitat data are derived from Animal Diversity Web (https://animaldiversity.org/). We assessed the probability of the presence of GI occurring given the amount of shade in a species' environment using phylogenetic logistic regression.

Results

Forty-eight artiodactyl species were able to be evaluated and tabulated. Nine perissodactyl species were able to be evaluated. The phylogenetic logistic regression showed that the probability of GI presence was lower in artiodactyl species that inhabited shaded environments (βshaded  = -1.774). Arctiodacyl species inhabiting a nonshaded environment were slightly more probable to have the GI present (βnonshaded  = 0.023), with species inhabitating ambiguously shaded environments having a high probability of GI presence (βambiguous  = 2.214).

Conclusions

Our results suggest that the GI may be a common morphological feature to shade the pupil in nonshaded environments, and, in its absence, increase the amount of light reaching the retina to improve vision in shaded environments for hooved mammals. Further research on the functional optics of the GI and studies that include additional ungulate species would further elucidate phylogenetic and ecological factors influencing the occurrence of GI in hooved mammals.",2020-02-03 +31277625,Additional qualifications of trainees in specialist training programs in Australia.,"

Background

In Australia, the number of medical graduates per year has increased at a greater rate than the increase in the number of specialist training places. Consequently, competition for training positions is intensifying. There is anecdotal evidence to suggest that medical graduates are acquiring additional qualifications to compete with their peers Stevenson 2017 ( https://insightplus.mja.com.au/2017/36/specialty-training-places-the-other-looming-crisis/ ). Our study investigates this phenomenon of additional credentialing and demonstrates the number and type of postgraduate and research qualifications obtained by specialists in training in Australia. This is the first study to assess the number and type of credentials acquired by registrars in each specialty and to provide insight into differences between specialities.

Methods

Information on specialists in training was obtained through the Medicine in Australia: Balancing Employment and Life (MABEL) survey conducted between 2008 and 2014. The number of any additional qualifications and specific PhD, Master's degree, postgraduate diploma/certificate and research degrees from medical school were assessed for each specialist training scheme in the database.

Results

Overall, 995 registrars representing 13 specialties were included. Just under a third (30.4%) completed a research-based degree during their medical degree and almost half (46.7%) of specialist registrars obtained further qualifications after completing medicine. A significantly higher proportion of ophthalmology (78.6%) and paediatric (67.5%) registrars, and a lower percentage of emergency medicine (36.7%) registrars, held postgraduate qualifications. Overall, 2.4% of registrars held a PhD and 10.1% held a Master's degree. A higher percentage of either PhD or Master's was held by ophthalmology (64.3%) and surgical (30.6%) trainees and a lower percentage by anaesthetics (6.3%) and physician trainees (7.9%). Postgraduate diplomas or certificates were most common among paediatric (41.2%) and obstetrics and gynaecology (25.6%) registrars.

Conclusion

This is the first study to investigate the additional qualifications of specialists in training in Australia. Almost half of specialists in training surveyed (46.7%) have completed some form of additional study, whether it is a PhD, Master's, postgraduate diploma/certificate or research degree from medical school. Trainees of specialist training schemes are more qualified than specialists who trained in the past Aust Fam Physician 32:92-4, 2003.",2019-07-05 +31276528,Appraisal of clinical practice guidelines for the management of attention deficit hyperactivity disorder (ADHD) using the AGREE II Instrument: A systematic review.,"

Background and objective

High quality evidence-based clinical practice guidelines (CPGs) have a major impact on the appropriate diagnosis and management and positive outcomes. The evidence-based healthcare for patients with attention deficit hyperactive disorder (ADHD) is challenging. The objective of this study was to appraise the quality of published CPGs for ADHD.

Methods

A systematic review was conducted for ADHD CPGs using CPG databases, DynaMed, PubMed, and Google Scholar. The quality of each included CPG was appraised by three independent appraisers using the Appraisal of Guidelines for Research & Evaluation II (AGREE II) instrument.

Results

Six CPGs were critically reviewed. The AGREE II standardized domain scores revealed variation between the quality of these CPGs with the National Institute of Health and Care Excellence (NICE), University of Michigan Health System, and American Academy of Pediatrics CPGs as the top three. Overall, the recommendations for management of ADHD were similar in these CPGs.

Conclusions

Reporting of CPG development is often poorly documented. Guideline development groups should aim to follow the AGREE II criteria to improve the standards and quality of CPGs. The NICE CPG showed the best quality. Embedding the AGREE II appraisal of CPGs in the training and education of healthcare providers is recommended. The protocol for this study was published in PROSPERO (International prospective register of systematic reviews). Link: http://www.crd.york.ac.uk/PROSPERO/display_record.php?ID=CRD42017078712 and is additionally available from protocols.io. Link: https://dx.doi.org/10.17504/protocols.io.q27dyhn.",2019-07-05 +29086189,Meta-expression analysis of unannotated genes in rice and approaches for network construction to suggest the probable roles.,"

Key message

This work suggests 2020 potential candidates in rice for the functional annotation of unannotated genes using meta-analysis of anatomical samples derived from microarray and RNA-seq technologies and this information will be useful to identify novel morphological agronomic traits. Although the genome of rice (Oryza sativa) has been sequenced, 14,365 genes are considered unannotated because they lack putative annotation information. According to the Rice Genome Annotation Project Database ( http://rice.plantbiology.msu.edu/ ), the proportion of functionally characterized unannotated genes (0.35%) is quite limited when compared with the approximately 3.9% of annotated genes with assigned putative functions. Researchers require additional information to help them investigate the molecular mechanisms associated with those unannotated genes. To determine which of them might regulate morphological or physiological traits in the rice genome, we conducted a meta-analysis of expression data that covered a wide range of tissue/organ samples. Overall, 2020 genes showed cultivar-, tissue-, or organ-preferential patterns of expression. Representative candidates from featured groups were validated by RT-PCR, and the GUS reporter system was used to validate the expression of genes that were clustered according to their leaf or root preference. Taking a molecular and genetics approach, we examined meta-expression data and found that 127 genes were differentially expressed between japonica and indica rice cultivars. This is potentially significant for future agronomic applications. We also used a T-DNA insertional mutant and performed a co-expression network analysis of Sword shape dwarf1 (SSD1), a gene that regulates cell division. This network was refined via RT-PCR analysis. Our results suggested that SSD1 represses the expression of four genes related to the processes of DNA replication or cell division and provides insight into possible molecular mechanisms. Together, these strategies present a valuable tool for in-depth characterization of currently unannotated genes.",2017-10-30 +22809392,CIDeR: multifactorial interaction networks in human diseases.,"The pathobiology of common diseases is influenced by heterogeneous factors interacting in complex networks. CIDeR http://mips.helmholtz-muenchen.de/cider/ is a publicly available, manually curated, integrative database of metabolic and neurological disorders. The resource provides structured information on 18,813 experimentally validated interactions between molecules, bioprocesses and environmental factors extracted from the scientific literature. Systematic annotation and interactive graphical representation of disease networks make CIDeR a versatile knowledge base for biologists, analysis of large-scale data and systems biology approaches.",2012-07-18 +26040787,CARMO: a comprehensive annotation platform for functional exploration of rice multi-omics data.,"High-throughput technology is gradually becoming a powerful tool for routine research in rice. Interpretation of biological significance from the huge amount of data is a critical but non-trivial task, especially for rice, for which gene annotations rely heavily on sequence similarity rather than direct experimental evidence. Here we describe the annotation platform for comprehensive annotation of rice multi-omics data (CARMO), which provides multiple web-based analysis tools for in-depth data mining and visualization. The central idea involves systematic integration of 1819 samples from omics studies and diverse sources of functional evidence (15 401 terms), which are further organized into gene sets and higher-level gene modules. In this way, the high-throughput data may easily be compared across studies and platforms, and integration of multiple types of evidence allows biological interpretation from the level of gene functional modules with high confidence. In addition, the functions and pathways for thousands of genes lacking description or validation may be deduced based on concerted expression of genes within the constructed co-expression networks or gene modules. Overall, CARMO provides comprehensive annotations for transcriptomic datasets, epi-genomic modification sites, single nucleotide polymorphisms identified from genome re-sequencing, and the large gene lists derived from these omics studies. Well-organized results, as well as multiple tools for interactive visualization, are available through a user-friendly web interface. Finally, we illustrate how CARMO enables biological insights using four examples, demonstrating that CARMO is a highly useful resource for intensive data mining and hypothesis generation based on rice multi-omics data. CARMO is freely available online (http://bioinfo.sibs.ac.cn/carmo).",2015-07-01 +29156309,Recent development of computational resources for new antibiotics discovery.,"Understanding a complex working mechanism of biosynthetic gene clusters (BGCs) encoding secondary metabolites is a key to discovery of new antibiotics. Computational resources continue to be developed in order to better process increasing volumes of genome and chemistry data, and thereby better understand BGCs. In this context, this review highlights recent advances in computational resources for secondary metabolites with emphasis on genome mining, compound identification and dereplication as well as databases. We also introduce an updated version of Secondary Metabolite Bioinformatics Portal (SMBP; http://www.secondarymetabolites.org), which we previously released as a curated gateway to all the computational tools and databases useful for discovery and engineering of secondary metabolites.",2017-10-01 +30377652,Data from X-ray crystallographic analysis and DFT calculations on isomeric azo disperse dyes.,"X-ray crystallography and DFT calculations were used to characterize the molecular nature and excited state properties of isomeric photostable azo dyes for textile fibers undergoing extensive sunlight exposure. Structural data in CIF files arising from X-ray analysis are reported and the complete files are deposited with the Cambridge Crystallographic Data Centre as CCDC 1548989 (https://www.ccdc.cam.ac.uk/structures/Search?Ccdcid=1548989) and CCDC 1548990 (https://www.ccdc.cam.ac.uk/structures/Search?Ccdcid=1548990). Data from calculating the vertical electronic excitation of 20 excited states for each dye and from calculating excited state oxidation potential (ESOP) and Frontier HOMO/LUMO isosurfaces are also presented. This data is related to the article ""Molecular and excited state properties of isomeric scarlet disperse dyes"" (Lim et al., 2018) [1].",2018-10-09 +22110269,Comparative epidemiology of gastric cancer between Japan and China.,"

Aim

To clarify the similarities and differences in gastric cancer epidemiology between Japan and China.

Methods

A comprehensive literature search of the PubMed database was performed. The relevant literature published in China was also been cited. Data on incidence and mortality rates in 2008 were obtained from the Cancer Mondial database, published by International Agency for Research on Cancer at http://www-dep.iarc.fr/.

Results

Gastric cancer remains a significant public health burden in both Japan and China. The prevalence of Helicobacter pylori (H. pylori) colonization is high in the adult populations of both countries. Accumulating evidence from intervention studies in both countries has shown the effectiveness of H. pylori eradication in reducing gastric cancer incidence. There are differences, however, in many aspects of gastric cancer, including patterns of incidence and mortality, trends in the prevalence of H. pylori infection, H. pylori strains, the magnitude of risk of gastric cancer related to H. pylori infection, and associations with dietary habits. Compared with China, Japan has seen a more rapid decline in H. pylori infection among adolescents. While Japanese cohort studies have dominated the literature concerning the associations between gastric cancer and dietary habits, numerous case-control studies in China suggest a positive association between a high intake of preserved fish and vegetables and gastric cancer risk. There is a need for a multidisciplinary research approach to understand the interactions between various strains of H. pylori, host factors, and other lifestyle and environmental factors in gastric carcinogenesis in both countries.

Conclusion

The shared high incidence of gastric cancer and high prevalence of H. pylori, as well as differences in many aspects of gastric cancer, provide an excellent opportunity to establish Sino-Japanese collaborations.",2011-10-01 +30587106,Web-based design and analysis tools for CRISPR base editing.,"

Background

As a result of its simplicity and high efficiency, the CRISPR-Cas system has been widely used as a genome editing tool. Recently, CRISPR base editors, which consist of deactivated Cas9 (dCas9) or Cas9 nickase (nCas9) linked with a cytidine or a guanine deaminase, have been developed. Base editing tools will be very useful for gene correction because they can produce highly specific DNA substitutions without the introduction of any donor DNA, but dedicated web-based tools to facilitate the use of such tools have not yet been developed.

Results

We present two web tools for base editors, named BE-Designer and BE-Analyzer. BE-Designer provides all possible base editor target sequences in a given input DNA sequence with useful information including potential off-target sites. BE-Analyzer, a tool for assessing base editing outcomes from next generation sequencing (NGS) data, provides information about mutations in a table and interactive graphs. Furthermore, because the tool runs client-side, large amounts of targeted deep sequencing data (< 1 GB) do not need to be uploaded to a server, substantially reducing running time and increasing data security. BE-Designer and BE-Analyzer can be freely accessed at http://www.rgenome.net/be-designer/ and http://www.rgenome.net/be-analyzer /, respectively.

Conclusion

We develop two useful web tools to design target sequence (BE-Designer) and to analyze NGS data from experimental results (BE-Analyzer) for CRISPR base editors.",2018-12-27 +31015528,Comparative transcriptome analysis reveals the genetic basis of coat color variation in Pashmina goat.,"The genetics of coat color variation remains a classic area. Earlier studies have focused on a limited number of genes involved in color determination; however, the complete set of trait determinants are still not well known. In this study, we used high-throughput sequencing technology to identify and characterize intricate interactions between genes that cause complex coat color variation in Changthangi Pashmina goats, producer of finest and costly commercial animal fiber. We systematically identified differentially expressed mRNAs and lncRNAs from black, brown and white Pashmina goat skin samples by using RNA-sequencing technique. A pairwise comparison of black, white and brown skin samples yielded 2479 significantly dysregulated genes (2422 mRNA and 57 lncRNAs). Differentially expressed genes were enriched in melanin biosynthesis, melanocyte differentiation, developmental pigmentation, melanosome transport activities GO terms. Our analysis suggested the potential role of lncRNAs on color coding mRNAs in cis and trans configuration. We have also developed online data repository as a component of the study to provide a central location for data access, visualization and interpretation accessible through http://pcd.skuastk.org/ .",2019-04-23 +28472236,KMC 3: counting and manipulating k-mer statistics.,"

Summary

Counting all k -mers in a given dataset is a standard procedure in many bioinformatics applications. We introduce KMC3, a significant improvement of the former KMC2 algorithm together with KMC tools for manipulating k -mer databases. Usefulness of the tools is shown on a few real problems.

Availability and implementation

Program is freely available at http://sun.aei.polsl.pl/REFRESH/kmc .

Contact

sebastian.deorowicz@polsl.pl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +31677479,Systematic Identification of Cell-Cell Communication Networks in the Developing Brain.,"Since the generation of cell-type specific knockout models, the importance of inter-cellular communication between neural, vascular, and microglial cells during neural development has been increasingly appreciated. However, the extent of communication between these major cell populations remains to be systematically mapped. Here, we describe EMBRACE (embryonic brain cell extraction using FACS), a method to simultaneously isolate neural, mural, endothelial, and microglial cells to more than 94% purity in ∼4 h. Utilizing EMBRACE we isolate, transcriptionally analyze, and build a cell-cell communication map of the developing mouse brain. We identify 1,710 unique ligand-receptor interactions between neural, endothelial, mural, and microglial cells in silico and experimentally confirm the APOE-LDLR, APOE-LRP1, VTN-KDR, and LAMA4-ITGB1 interactions in the E14.5 brain. We provide our data via the searchable ""Brain interactome explorer"", available at https://mpi-ie.shinyapps.io/braininteractomeexplorer/. Together, this study provides a comprehensive map that reveals the richness of communication within the developing brain.",2019-10-17 +29868717,The Rényi divergence enables accurate and precise cluster analysis for localization microscopy.,"

Motivation

Clustering analysis is a key technique for quantitatively characterizing structures in localization microscopy images. To build up accurate information about biological structures, it is critical that the quantification is both accurate (close to the ground truth) and precise (has small scatter and is reproducible).

Results

Here, we describe how the Rényi divergence can be used for cluster radius measurements in localization microscopy data. We demonstrate that the Rényi divergence can operate with high levels of background and provides results which are more accurate than Ripley's functions, Voronoi tesselation or DBSCAN.

Availability and implementation

The data supporting this research and the software described are accessible at the following site: https://dx.doi.org/10.18742/RDM01-316. Correspondence and requests for materials should be addressed to the corresponding author.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +30016509,"MAVIS: merging, annotation, validation, and illustration of structural variants.","

Summary

Reliably identifying genomic rearrangements and interpreting their impact is a key step in understanding their role in human cancers and inherited genetic diseases. Many short read algorithmic approaches exist but all have appreciable false negative rates. A common approach is to evaluate the union of multiple tools increasing sensitivity, followed by filtering to retain specificity. Here we describe an application framework for the rapid generation of structural variant consensus, unique in its ability to visualize the genetic impact and context as well as process both genome and transcriptome data.

Availability and implementation

http://mavis.bcgsc.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +31180159,A Vietnamese human genetic variation database.,"Large scale human genome projects have created tremendous human genome databases for some well-studied populations. Vietnam has about 95 million people (the 14th largest country by population in the world) of which more than 86% are Kinh people. To date, genetic studies for Vietnamese people mostly rely on genetic information from other populations. Building a Vietnamese human genetic variation database is a must for properly interpreting Vietnamese genetic variants. To this end, we sequenced 105 whole genomes and 200 whole exomes of 305 unrelated Kinh Vietnamese (KHV) people. We also included 101 other previously published KHV genomes to build a Vietnamese human genetic variation database of 406 KHV people. The KHV database contains 24.81 million variants (22.47 million single nucleotide polymorphisms (SNPs) and 2.34 million indels) of which 0.71 million variants are novel. It includes more than 99.3% of variants with a frequency of >1% in the KHV population. Noticeably, the KHV database revealed 107 variants reported in the human genome mutation database as pathological mutations with a frequency above 1% in the KHV population. The KHV database (available at https://genomes.vn) would be beneficial for genetic studies and medical applications not only for the Vietnamese population but also for other closely related populations.",2019-07-03 +32211018,"Expression Changes Confirm Genomic Variants Predicted to Result in Allele-Specific, Alternative mRNA Splicing.","Splice isoform structure and abundance can be affected by either noncoding or masquerading coding variants that alter the structure or abundance of transcripts. When these variants are common in the population, these nonconstitutive transcripts are sufficiently frequent so as to resemble naturally occurring, alternative mRNA splicing. Prediction of the effects of such variants has been shown to be accurate using information theory-based methods. Single nucleotide polymorphisms (SNPs) predicted to significantly alter natural and/or cryptic splice site strength were shown to affect gene expression. Splicing changes for known SNP genotypes were confirmed in HapMap lymphoblastoid cell lines with gene expression microarrays and custom designed q-RT-PCR or TaqMan assays. The majority of these SNPs (15 of 22) as well as an independent set of 24 variants were then subjected to RNAseq analysis using the ValidSpliceMut web beacon (http://validsplicemut.cytognomix.com), which is based on data from the Cancer Genome Atlas and International Cancer Genome Consortium. SNPs from different genes analyzed with gene expression microarray and q-RT-PCR exhibited significant changes in affected splice site use. Thirteen SNPs directly affected exon inclusion and 10 altered cryptic site use. Homozygous SNP genotypes resulting in stronger splice sites exhibited higher levels of processed mRNA than alleles associated with weaker sites. Four SNPs exhibited variable expression among individuals with the same genotypes, masking statistically significant expression differences between alleles. Genome-wide information theory and expression analyses (RNAseq) in tumor exomes and genomes confirmed splicing effects for 7 of the HapMap SNP and 14 SNPs identified from tumor genomes. q-RT-PCR resolved rare splice isoforms with read abundance too low for statistical significance in ValidSpliceMut. Nevertheless, the web-beacon provides evidence of unanticipated splicing outcomes, for example, intron retention due to compromised recognition of constitutive splice sites. Thus, ValidSpliceMut and q-RT-PCR represent complementary resources for identification of allele-specific, alternative splicing.",2020-03-05 +23411718,The Eimeria transcript DB: an integrated resource for annotated transcripts of protozoan parasites of the genus Eimeria.,"Parasites of the genus Eimeria infect a wide range of vertebrate hosts, including chickens. We have recently reported a comparative analysis of the transcriptomes of Eimeria acervulina, Eimeria maxima and Eimeria tenella, integrating ORESTES data produced by our group and publicly available Expressed Sequence Tags (ESTs). All cDNA reads have been assembled, and the reconstructed transcripts have been submitted to a comprehensive functional annotation pipeline. Additional studies included orthology assignment across apicomplexan parasites and clustering analyses of gene expression profiles among different developmental stages of the parasites. To make all this body of information publicly available, we constructed the Eimeria Transcript Database (EimeriaTDB), a web repository that provides access to sequence data, annotation and comparative analyses. Here, we describe the web interface, available sequence data sets and query tools implemented on the site. The main goal of this work is to offer a public repository of sequence and functional annotation data of reconstructed transcripts of parasites of the genus Eimeria. We believe that EimeriaTDB will represent a valuable and complementary resource for the Eimeria scientific community and for those researchers interested in comparative genomics of apicomplexan parasites. Database URL: http://www.coccidia.icb.usp.br/eimeriatdb/",2013-02-14 +28052254,Pan-cancer Immunogenomic Analyses Reveal Genotype-Immunophenotype Relationships and Predictors of Response to Checkpoint Blockade.,"The Cancer Genome Atlas revealed the genomic landscapes of human cancers. In parallel, immunotherapy is transforming the treatment of advanced cancers. Unfortunately, the majority of patients do not respond to immunotherapy, making the identification of predictive markers and the mechanisms of resistance an area of intense research. To increase our understanding of tumor-immune cell interactions, we characterized the intratumoral immune landscapes and the cancer antigenomes from 20 solid cancers and created The Cancer Immunome Atlas (https://tcia.at/). Cellular characterization of the immune infiltrates showed that tumor genotypes determine immunophenotypes and tumor escape mechanisms. Using machine learning, we identified determinants of tumor immunogenicity and developed a scoring scheme for the quantification termed immunophenoscore. The immunophenoscore was a superior predictor of response to anti-cytotoxic T lymphocyte antigen-4 (CTLA-4) and anti-programmed cell death protein 1 (anti-PD-1) antibodies in two independent validation cohorts. Our findings and this resource may help inform cancer immunotherapy and facilitate the development of precision immuno-oncology.",2017-01-01 +32021469,Multimorbidity by Patient and Tumor Factors and Time-to-Surgery Among Colorectal Cancer Patients in Spain: A Population-Based Study.,"

Background

Cancer treatment and outcomes can be influenced by tumor characteristics, patient overall health status, and comorbidities. While previous studies have analyzed the influence of comorbidity on cancer outcomes, limited information is available regarding factors associated with the increased prevalence of comorbidities and multimorbidity among patients with colorectal cancer in Spain.

Patients and methods

This cross-sectional study obtained data from all colorectal cancer cases diagnosed in two Spanish provinces in 2011 from two population-based cancer registries and electronic health records. We calculated the prevalence of comorbidities according to patient and tumor factors, identified factors associated with an increased prevalence of comorbidity and multimorbidity, analyzed the association between comorbidities and time-to-surgery, and developed an interactive web application (https://comcor.netlify.com/).

Results

The most common comorbidities were diabetes (23.6%), chronic obstructive pulmonary disease (17.2%), and congestive heart failure (14.5%). Among all comorbidities, 52% of patients were diagnosed at more advanced stages (stage III/IV). Patients with advanced age, restricted performance status or who were disabled, obese, and smokers had a higher prevalence of multimorbidity. Patients with multimorbidity had a longer time-to-surgery than those without comorbidity (17 days, 95% confidence interval: 3-29 days).

Conclusion

We identified a consistent pattern of factors associated with a higher prevalence of comorbidities and multimorbidity at diagnosis and an increased time-to-surgery among patients with colorectal cancer with multimorbidity in Spain. This pattern may provide insights for further etiological and preventive research and help to identify patients at a higher risk for poorer cancer outcomes and suboptimal treatment.",2020-01-14 +29106666,TADB 2.0: an updated database of bacterial type II toxin-antitoxin loci.,"TADB2.0 (http://bioinfo-mml.sjtu.edu.cn/TADB2/) is an updated database that provides comprehensive information about bacterial type II toxin-antitoxin (TA) loci. Compared with the previous version, the database refined and the new data schema is employed. With the aid of text mining and manual curation, it recorded 6193 type II TA loci in 870 replicons of bacteria and archaea, including 105 experimentally validated TA loci. In addition, the newly developed tool TAfinder combines the homolog searches and the operon structure detection, allowing the prediction for type II TA pairs in bacterial genome sequences. It also helps to investigate the genomic context of predicted TA loci for putative virulence factors, antimicrobial resistance determinants and mobile genetic elements via alignments to the specific public databases. Additionally, the module TAfinder-Compare allows comparing the presence of the given TA loci across the close relative genomes. With the recent updates, TADB2.0 might provide better support for understanding the important roles of type II TA systems in the prokaryotic life activities.",2018-01-01 +30134911,bioSyntax: syntax highlighting for computational biology.,"BACKGROUND:Computational biology requires the reading and comprehension of biological data files. Plain-text formats such as SAM, VCF, GTF, PDB and FASTA, often contain critical information which is obfuscated by the data structure complexity. RESULTS:bioSyntax ( https://biosyntax.org/ ) is a freely available suite of biological syntax highlighting packages for vim, gedit, Sublime, VSCode, and less. bioSyntax improves the legibility of low-level biological data in the bioinformatics workspace. CONCLUSION:bioSyntax supports computational scientists in parsing and comprehending their data efficiently and thus can accelerate research output.",2018-08-22 +29059320,DiseaseEnhancer: a resource of human disease-associated enhancer catalog.,"Large-scale sequencing studies discovered substantial genetic variants occurring in enhancers which regulate genes via long range chromatin interactions. Importantly, such variants could affect enhancer regulation by changing transcription factor bindings or enhancer hijacking, and in turn, make an essential contribution to disease progression. To facilitate better usage of published data and exploring enhancer deregulation in various human diseases, we created DiseaseEnhancer (http://biocc.hrbmu.edu.cn/DiseaseEnhancer/), a manually curated database for disease-associated enhancers. As of July 2017, DiseaseEnhancer includes 847 disease-associated enhancers in 143 human diseases. Database features include basic enhancer information (i.e. genomic location and target genes); disease types; associated variants on the enhancer and their mediated phenotypes (i.e. gain/loss of enhancer and the alterations of transcription factor bindings). We also include a feature on our website to export any query results into a file and download the full database. DiseaseEnhancer provides a promising avenue for researchers to facilitate the understanding of enhancer deregulation in disease pathogenesis, and identify new biomarkers for disease diagnosis and therapy.",2018-01-01 +29630775,PR-10 proteins as potential mediators of melatonin-cytokinin cross-talk in plants: crystallographic studies of LlPR-10.2B isoform from yellow lupine.,"LlPR-10.2B, a Pathogenesis-related class 10 (PR-10) protein from yellow lupine (Lupinus luteus) was crystallized in complex with melatonin, an emerging important plant regulator and antioxidant. The structure reveals two molecules of melatonin bound in the internal cavity of the protein, plus a very well-defined electron density near the cavity entrance, corresponding to an unknown ligand molecule comprised of two flat rings, which is most likely a product of melatonin transformation. In a separate LlPR-10.2B co-crystallization experiment with an equimolar mixture of melatonin and trans-zeatin, which is a cytokinin phytohormone well recognized as a PR-10-binding partner, a quaternary 1 : 1 : 1 : 1 complex was formed, in which one of the melatonin-binding sites has been substituted with trans-zeatin, whereas the binding of melatonin at the second binding site and binding of the unknown ligand are undisturbed. This unusual complex, when compared with the previously described PR-10/trans-zeatin complexes and with the emerging structural information about melatonin binding by PR-10 proteins, provides intriguing insights into the role of PR-10 proteins in phytohormone regulation in plants, especially with the involvement of melatonin, and implicates the PR-10 proteins as low-affinity melatonin binders under the conditions of elevated melatonin concentration. DATABASES:Atomic coordinates and processed structure factors corresponding to the final models of the LlPR-10.2B/melatonin and LlPR-10.2B/melatonin + trans-zeatin complexes have been deposited with the Protein Data Bank (PDB) under the accession codes 5MXB and 5MXW. The corresponding raw X-ray diffraction images have been deposited in the RepOD Repository at the Interdisciplinary Centre for Mathematical and Computational Modelling (ICM) of the University of Warsaw, Poland, and are available for download with the following Digital Object Identifiers (DOI): https://doi.org/10.18150/repod.9923638 and https://doi.org/10.18150/repod.6621013.",2018-04-20 +30306862,Development of an Information System of Structures and Force Field Parameters of Chemical Compounds from Sri Lankan Flora.,"

Background

Sri Lanka offers a huge diversity of flora with a large proportion of those being endemic to the island. Both the endemic and native plants species serve as a rich bank of phytochemicals.

Method

In this study, ""Sri Lankan Flora"" an online web-based information system of phytochemical compounds isolated from the flora of Sri Lanka was proposed.

Results

The database contained 3D structures of those compounds, calculated quantitativestructure- activity relationship (QSAR) data and the GROMOS 54a7 force field parameters for each and every compound. The manually curated chemical structures, activities and force field parameters provide a possible direct avenue for computer-aided drug discovery. The present study is a continuing project with a wider goal of building up a database, not only for assisting the computeraided drug designing process, but also for other chemical applications, as the database includes structural, physical, chemical and dynamic properties of chemical compounds of the flora of Sri Lanka. The database is freely accessible at http://science.cmb.ac.lk/tools/slflora.",2018-01-01 +29934899,Enalos Suite: New Cheminformatics Platform for Drug Discovery and Computational Toxicology.,"In this chapter we present and discuss, with the aid of several representative case studies from drug discovery and computational toxicology, a new cheminformatics platform, Enalos Suite, that was developed with open source and freely available software. Enalos Suite ( http://enalossuite.novamechanics.com/ ) was designed and developed as a useful tool to address a variety of cheminformatics problems, given that it expedites tasks performed in predictive modeling and allows access, data mining and manipulation for multiple chemical databases (PubChem, UniChem, etc.). Enalos Suite was carefully designed to permit its extension and adjustment to the special field of interest of each user, including, for instance, nanoinformatics, biomedical, and other applications. To demonstrate the functionalities of Enalos Suite that are useful in different cheminformatics applications, we present indicative case studies that include the exploitation of chemical databases within a drug discovery project, the calculation of molecular descriptors, and finally the development of a predictive QSAR model validated according to OECD principles. We aspire that at the end of this chapter, the reader will capture the effectiveness of different functionalities included in the Enalos Suite that could be of significant value in a multitude of cheminformatics applications.",2018-01-01 +29346778,Diverse Brain Myeloid Expression Profiles Reveal Distinct Microglial Activation States and Aspects of Alzheimer's Disease Not Evident in Mouse Models.,"Microglia, the CNS-resident immune cells, play important roles in disease, but the spectrum of their possible activation states is not well understood. We derived co-regulated gene modules from transcriptional profiles of CNS myeloid cells of diverse mouse models, including new tauopathy model datasets. Using these modules to interpret single-cell data from an Alzheimer's disease (AD) model, we identified microglial subsets-distinct from previously reported ""disease-associated microglia""-expressing interferon-related or proliferation modules. We then analyzed whole-tissue RNA profiles from human neurodegenerative diseases, including a new AD dataset. Correcting for altered cellular composition of AD tissue, we observed elevated expression of the neurodegeneration-related modules, but also modules not implicated using expression profiles from mouse models alone. We provide a searchable, interactive database for exploring gene expression in all these datasets (http://research-pub.gene.com/BrainMyeloidLandscape). Understanding the dimensions of CNS myeloid cell activation in human disease may reveal opportunities for therapeutic intervention.",2018-01-01 +32387465,Metabolomics analysis of the antidepressant prescription Danzhi Xiaoyao Powder in a rat model of Chronic Unpredictable Mild Stress (CUMS).,"

Ethnopharmacological relevance

Danzhi Xiaoyao Powder (DZXY) is a classical prescription, that has been extensively used in traditional Chinese medicine (TMC) to treat depression for many years. However, the mechanism of DZXY is still unclear.

Aim of the study

The aim was to investigate the mechanism of the antidepressant effect of DZXY on a rat model of chronic unpredictable mild stress (CUMS).

Materials and methods

Forty male SD (Sprague-Dawley) rats with similar open field test (OFT) results were randomLy divided into a control group (n = 10) and an experimental group (n = 30). A depression model was established in the experimental group using the CUMS method. After the CUMS model was established successfully, the rats were randomLy divided into a depression model group and a DZXY group. The DZXY group was fed DZXY, while the depression model group and control group were given an equal amount of 0.5% sodium carboxymethyl cellulose suspension. Intragastric administration was performed once daily for 14 consecutive days. Animal weight, the sugar preference test, the open field test and the forced swimming test were used to evaluate the modeling effect and the antidepressant effect of DZXY. After the experiment, the plasma of rats was collected and the changes in plasma metabolites were analyzed by UPLC/Q-TOF-MS. The UPLC/Q-TOF-MS spectra data were evaluated by pattern recognition analysis to determine the changes in endogenous metabolites in the rat plasma samples.

Results

The results of the behavioral investigation showed that the rat model of depression was successfully replicated and that DZXY had an antidepressant effect. Using the UPLC-MS/MS metabolomics platform, partial least squares (PLS) and orthogonal partial least squares (OPLS), metabolic profile models (R2 and Q2 ≥ 0.5) of rat plasma were successfully constructed. The model could distinguish among the control group, the depression model group and the DZXY group. Finally, 38 differential metabolites were identified in the plasma. According to KEGG (http://www.kegg.jp) pathway analysis, amino acid metabolism, lipid metabolism, purine metabolism, the prolactin signaling pathway and bile secretion were enriched and represented the main metabolic pathways influenced in the plasma.

Conclusions

This study successfully established a CUMS depression model. A total of 38 differential metabolites associated with depression were identified in the plasma of rats, 24 of which were modulated by DZXY. These results suggest that DZXY can improve excitability and play an antidepressant role by regulating phenylalanine metabolism, arachidonic acid metabolism, porphyrin metabolism, D-arginine and D-ornithine metabolism, steroid hormone biosynthesis, unsaturated fatty acid biosynthesis and steroid biosynthesis.",2020-05-06 +29308007,Virus Database and Online Inquiry System Based on Natural Vectors.,"We construct a virus database called VirusDB (http://yaulab.math.tsinghua.edu.cn/VirusDB/) and an online inquiry system to serve people who are interested in viral classification and prediction. The database stores all viral genomes, their corresponding natural vectors, and the classification information of the single/multiple-segmented viral reference sequences downloaded from National Center for Biotechnology Information. The online inquiry system serves the purpose of computing natural vectors and their distances based on submitted genomes, providing an online interface for accessing and using the database for viral classification and prediction, and back-end processes for automatic and manual updating of database content to synchronize with GenBank. Submitted genomes data in FASTA format will be carried out and the prediction results with 5 closest neighbors and their classifications will be returned by email. Considering the one-to-one correspondence between sequence and natural vector, time efficiency, and high accuracy, natural vector is a significant advance compared with alignment methods, which makes VirusDB a useful database in further research.",2017-12-17 +27733501,The human-induced pluripotent stem cell initiative-data resources for cellular genetics.,"The Human Induced Pluripotent Stem Cell Initiative (HipSci) isf establishing a large catalogue of human iPSC lines, arguably the most well characterized collection to date. The HipSci portal enables researchers to choose the right cell line for their experiment, and makes HipSci's rich catalogue of assay data easy to discover and reuse. Each cell line has genomic, transcriptomic, proteomic and cellular phenotyping data. Data are deposited in the appropriate EMBL-EBI archives, including the European Nucleotide Archive (ENA), European Genome-phenome Archive (EGA), ArrayExpress and PRoteomics IDEntifications (PRIDE) databases. The project will make 500 cell lines from healthy individuals, and from 150 patients with rare genetic diseases; these will be available through the European Collection of Authenticated Cell Cultures (ECACC). As of August 2016, 238 cell lines are available for purchase. Project data is presented through the HipSci data portal (http://www.hipsci.org/lines) and is downloadable from the associated FTP site (ftp://ftp.hipsci.ebi.ac.uk/vol1/ftp). The data portal presents a summary matrix of the HipSci cell lines, showing available data types. Each line has its own page containing descriptive metadata, quality information, and links to archived assay data. Analysis results are also available in a Track Hub, allowing visualization in the context of public genomic annotations (http://www.hipsci.org/data/trackhubs).",2016-10-12 +29530061,Stearoyl-CoA desaturase-1 promotes colorectal cancer metastasis in response to glucose by suppressing PTEN.,"BACKGROUND:Diabetic patients have a higher risk factor for colorectal cancer (CRC) metastasis. Stearoyl-CoA desaturase 1 (SCD1), the main enzyme responsible for producing monounsaturated fatty acids(MUFA) from saturated fatty acids, is frequently deregulated in both diabetes and CRC. The function and mechanism of SCD1 in metastasis of CRC and its relevance to glucose remains largely unknown. METHODS:SCD1 expression levels were analyzed in human CRC tissues and the Cancer Browser database ( https://genome-cancer.ucsc.edu/ ). CRC cell lines stably transfected with SCD1 shRNAs or vector were established to investigate the role of SCD1 in modulating migration and invasion of CRC cells. A glucose concentration gradient was set to investigate regulation of SCD1 in CRC relevant to diabetic conditions. RESULTS:The clinical data analysis showed high expression of SCD1 in CRC tissues with a negative correlation with the prognosis of CRC. In vitro experiments revealed that SCD1 increased CRC progression through promoting epithelial-mesenchymal transition (EMT). Lipidomic analysis demonstrated that SCD1 increased MUFA levels and MUFA administration could rescue migration and invasion defect of CRC cells induced by SCD1 knockdown. Furthermore, SCD1-mediated progression of CRC was promoted by carbohydrate response-element binding protein (ChREBP) in response to high glucose. Mechanistically, hyperglycemia-SCD1-MUFA induced CRC cell migration and invasion by regulating PTEN. CONCLUSIONS:Our findings show that SCD1 promotes metastasis of CRC cells through MUFA production and suppressing PTEN in response to glucose, which may be a novel mechanism for diabetes-induced CRC metastasis.",2018-03-12 +29069517,Expanded and updated data and a query pipeline for iBeetle-Base.,"The iBeetle-Base provides access to sequence and phenotype information for genes of the beetle Tribolium castaneum. It has been updated including more and updated data and new functions. RNAi phenotypes are now available for >50% of the genes, which represents an expansion of 60% compared to the previous version. Gene sequence information has been updated based on the new official gene set OGS3 and covers all genes. Interoperability with FlyBase has been enhanced: First, gene information pages of homologous genes are interlinked between both databases. Second, some steps of a new query pipeline allow transforming gene lists from either species into lists with related gene IDs, names or GO terms. This facilitates the comparative analysis of gene functions between fly and beetle. The backend of the pipeline is implemented as endpoints of a RESTful interface, such that it can be reused by other projects or tools. A novel online interface allows the community to propose GO terms for their gene of interest expanding the range of animals where GO terms are defined. iBeetle-Base is available at http://ibeetle-base.uni-goettingen.de/.",2018-01-01 +29069447,The DifferentialNet database of differential protein-protein interactions in human tissues.,"DifferentialNet is a novel database that provides users with differential interactome analysis of human tissues (http://netbio.bgu.ac.il/diffnet/). Users query DifferentialNet by protein, and retrieve its differential protein-protein interactions (PPIs) per tissue via an interactive graphical interface. To compute differential PPIs, we integrated available data of experimentally detected PPIs with RNA-sequencing profiles of tens of human tissues gathered by the Genotype-Tissue Expression consortium (GTEx) and by the Human Protein Atlas (HPA). We associated each PPI with a score that reflects whether its corresponding genes were expressed similarly across tissues, or were up- or down-regulated in the selected tissue. By this, users can identify tissue-specific interactions, filter out PPIs that are relatively stable across tissues, and highlight PPIs that show relative changes across tissues. The differential PPIs can be used to identify tissue-specific processes and to decipher tissue-specific phenotypes. Moreover, they unravel processes that are tissue-wide yet tailored to the specific demands of each tissue.",2018-01-01 +31194082,Raw genome sequence data for 13 isogenic Aspergillus fumigatus strains isolated over a 2 year period from a patient with chronic granulomatous disease.,"Azole-resistance in Aspergillus fumigatus is an emerging worldwide threat as it precludes the use of one of the 3 major classes of antifungal drugs to treat chronic and invasive aspergillosis [1]. In addition to the well-known environmental emergence of azole-resistant A. fumigatus strains, associated with the use of fungicides in agriculture [2], [3], the development of in-host resistance, facilitated by medical antifungal use, has been described [4]. Investigations involving linked sets of (isogenic) clinical isolates of A. fumigatus sequentially recovered from individual patients, are extremely important in order to improve our understanding of how azole resistance develops in-host. Here we present the whole genome sequences of 13 clinical isogenic A. fumigatus isolates. These isolates were cultured from a single patient suffering from invasive aspergillosis over a period of 2 years. This patient underwent a wide range of antifungal therapies and the resultant isolates acquired multiple azole resistance in-host during the course of infection. The data presented here is related to our research paper titled ""In-host microevolution of Aspergillus fumigatus: a phenotypic and genotypic analysis"" which describes the phenotypic characterisation of these clinical isolates [5]. The raw sequence data was deposited in the NCBI Sequence Read Archive (https://www.ncbi.nlm.nih.gov/sra), under BioProject ID number PRJNA528395.",2019-05-23 +32186404,"Uterine Patterning, Endometrial Gland Development, and Implantation Failure in Mice Exposed Neonatally to Genistein.","BACKGROUND:Embryo implantation relies on precise hormonal regulation, associated gene expression changes, and appropriate female reproductive tract tissue architecture. Female mice exposed neonatally to the phytoestrogen genistein (GEN) at doses similar to those in infants consuming soy-based infant formulas are infertile due in part to uterine implantation defects. OBJECTIVES:Our goal was to determine the mechanisms by which neonatal GEN exposure causes implantation defects. METHODS:Female mice were exposed to GEN on postnatal days (PND)1-5 and uterine tissues collected on PND5, PND22-26, and during pregnancy. Analysis of tissue weights, morphology, and gene expression was performed using standard histology, confocal imaging with three-dimensional analysis, real-time reverse transcription polymerase chain reaction (real-time RT-PCR), and microarrays. The response of ovariectomized adults to 17β-estradiol (E2) and artificial decidualization were measured. Leukemia inhibitory factor (LIF) injections were given intraperitoneally and implantation sites visualized. Gene expression patterns were compared with curated data sets to identify upstream regulators. RESULTS:GEN-exposed mice exhibited reduced uterine weight gain in response to E2 treatment or artificial decidualization compared with controls; however, expression of select hormone responsive genes remained similar between the two groups. Uteri from pregnant GEN-exposed mice were posteriorized and had reduced glandular epithelium. Implantation failure was not rescued by LIF administration. Microarray analysis of GEN-exposed uteri during early pregnancy revealed significant overlap with several conditional uterine knockout mouse models, including Foxa2, Wnt4, and Sox17. These models exhibit reduced endometrial glands, features of posteriorization and implantation failure. Expression of Foxa2, Wnt4, and Sox17, as well as genes important for neonatal uterine differentiation (Wnt7a, Hoxa10, and Msx2), were severely disrupted on PND5 in GEN-exposed mice. DISCUSSION:Our findings suggest that neonatal GEN exposure in mice disrupts expression of genes important for uterine development, causing posteriorization and diminished gland function during pregnancy that contribute to implantation failure. These findings could have implications for women who consumed soy-based formulas as infants. https://doi.org/10.1289/EHP6336.",2020-03-18 +28011601,CSDB_GT: a new curated database on glycosyltransferases.,"Glycosyltransferases (GTs) are carbohydrate-active enzymes (CAZy) involved in the synthesis of natural glycan structures. The application of CAZy is highly demanded in biotechnology and pharmaceutics. However, it is being hindered by the lack of high-quality and comprehensive repositories of the research data accumulated so far. In this paper, we describe a new curated Carbohydrate Structure Glycosyltransferase Database (CSDB_GT). Currently, CSDB_GT provides ca. 780 activities exhibited by GTs, as well as several other CAZy, found in Arabidopsis thaliana and described in ca. 180 publications. It covers most published data on A. thaliana GTs with evidenced functions. CSDB_GT is linked to the Carbohydrate Structure Database (CSDB), which stores data on archaeal, bacterial, fungal and plant glycans. The CSDB_GT data are supported by experimental evidences and can be traced to original publications. CSDB_GT is freely available at http://csdb.glycoscience.ru/gt.html.",2016-12-23 +31114921,ChEA3: transcription factor enrichment analysis by orthogonal omics integration.,"Identifying the transcription factors (TFs) responsible for observed changes in gene expression is an important step in understanding gene regulatory networks. ChIP-X Enrichment Analysis 3 (ChEA3) is a transcription factor enrichment analysis tool that ranks TFs associated with user-submitted gene sets. The ChEA3 background database contains a collection of gene set libraries generated from multiple sources including TF-gene co-expression from RNA-seq studies, TF-target associations from ChIP-seq experiments, and TF-gene co-occurrence computed from crowd-submitted gene lists. Enrichment results from these distinct sources are integrated to generate a composite rank that improves the prediction of the correct upstream TF compared to ranks produced by individual libraries. We compare ChEA3 with existing TF prediction tools and show that ChEA3 performs better. By integrating the ChEA3 libraries, we illuminate general transcription factor properties such as whether the TF behaves as an activator or a repressor. The ChEA3 web-server is available from https://amp.pharm.mssm.edu/ChEA3.",2019-07-01 +28471369,Pepsi-SAXS: an adaptive method for rapid and accurate computation of small-angle X-ray scattering profiles.,"A new method called Pepsi-SAXS is presented that calculates small-angle X-ray scattering profiles from atomistic models. The method is based on the multipole expansion scheme and is significantly faster compared with other tested methods. In particular, using the Nyquist-Shannon-Kotelnikov sampling theorem, the multipole expansion order is adapted to the size of the model and the resolution of the experimental data. It is argued that by using the adaptive expansion order, this method has the same quadratic dependence on the number of atoms in the model as the Debye-based approach, but with a much smaller prefactor in the computational complexity. The method has been systematically validated on a large set of over 50 models collected from the BioIsis and SASBDB databases. Using a laptop, it was demonstrated that Pepsi-SAXS is about seven, 29 and 36 times faster compared with CRYSOL, FoXS and the three-dimensional Zernike method in SAStbx, respectively, when tested on data from the BioIsis database, and is about five, 21 and 25 times faster compared with CRYSOL, FoXS and SAStbx, respectively, when tested on data from SASBDB. On average, Pepsi-SAXS demonstrates comparable accuracy in terms of χ2 to CRYSOL and FoXS when tested on BioIsis and SASBDB profiles. Together with a small allowed variation of adjustable parameters, this demonstrates the effectiveness of the method. Pepsi-SAXS is available at http://team.inria.fr/nano-d/software/pepsi-saxs.",2017-04-27 +31045205,MRPrimerW2: an enhanced tool for rapid design of valid high-quality primers with multiple search modes for qPCR experiments.,"For the best results in quantitative polymerase chain reaction (qPCR) experiments, it is essential to design high-quality primers considering a multitude of constraints and the purpose of experiments. The constraints include many filtering constraints, homology test on a huge number of off-target sequences, the same constraints for batch design of primers, exon spanning, and avoiding single nucleotide polymorphism (SNP) sites. The target sequences are either in database or given as FASTA sequences, and the experiment is for amplifying either each target sequence with each corresponding primer pairs designed under the same constraints or all target sequences with a single pair of primers. Many websites have been proposed, but none of them including our previous MRPrimerW fulfilled all the above features. Here, we describe the MRPrimerW2, the update version of MRPrimerW, which fulfils all the features by maintaining the advantages of MRPrimerW in terms of the kinds and sizes of databases for valid primers and the number of search modes. To achieve it, we exploited GPU computation and a disk-based key-value store using PCIe SSD. The complete set of 3 509 244 680 valid primers of MRPrimerW2 covers 99% of nine important organisms in an exhaustive manner. Free access: http://MRPrimerW2.com.",2019-07-01 +31259547,PerMM: A Web Tool and Database for Analysis of Passive Membrane Permeability and Translocation Pathways of Bioactive Molecules.,"The PerMM web server and database were developed for quantitative analysis and visualization of passive translocation of bioactive molecules across lipid membranes. The server is the first physics-based web tool that calculates membrane binding energies and permeability coefficients of diverse molecules through artificial and natural membranes (phospholipid bilayers, PAMPA-DS, blood-brain barrier, and Caco-2/MDCK cell membranes). It also visualizes the transmembrane translocation pathway as a sequence of translational and rotational positions of a permeant as it moves across the lipid bilayer, along with the corresponding changes in solvation energy. The server can be applied for prediction of permeability coefficients of compounds with diverse chemical scaffolds to facilitate selection and optimization of potential drug leads. The complementary PerMM database allows comparison of computationally and experimentally determined permeability coefficients for more than 500 compounds in different membrane systems. The website and database are freely accessible at https://permm.phar.umich.edu/ .",2019-07-01 +31106356,Yosshi: a web-server for disulfide engineering by bioinformatic analysis of diverse protein families.,"Disulfide bonds play a significant role in protein stability, function or regulation but are poorly conserved among evolutionarily related proteins. The Yosshi can help to understand the role of S-S bonds by comparing sequences and structures of homologs with diverse properties and different disulfide connectivity patterns within a common structural fold of a superfamily, and assist to select the most promising hot-spots to improve stability of proteins/enzymes or modulate their functions by introducing naturally occurring crosslinks. The bioinformatic analysis is supported by the integrated Mustguseal web-server to construct large structure-guided sequence alignments of functionally diverse protein families that can include thousands of proteins based on all available information in public databases. The Yosshi+Mustguseal is a new integrated web-tool for a systematic homology-driven analysis and engineering of S-S bonds that facilitates a broader interpretation of disulfides not just as a factor of structural stability, but rather as a mechanism to implement functional diversity within a superfamily. The results can be downloaded as a content-rich PyMol session file or further studied online using the HTML5-based interactive analysis tools. Both web-servers are free and open to all users at https://biokinet.belozersky.msu.ru/yosshi and there is no login requirement.",2019-07-01 +31073595,INGA 2.0: improving protein function prediction for the dark proteome.,"Our current knowledge of complex biological systems is stored in a computable form through the Gene Ontology (GO) which provides a comprehensive description of genes function. Prediction of GO terms from the sequence remains, however, a challenging task, which is particularly critical for novel genomes. Here we present INGA 2.0, a new version of the INGA software for protein function prediction. INGA exploits homology, domain architecture, interaction networks and information from the 'dark proteome', like transmembrane and intrinsically disordered regions, to generate a consensus prediction. INGA was ranked in the top ten methods on both CAFA2 and CAFA3 blind tests. The new algorithm can process entire genomes in a few hours or even less when additional input files are provided. The new interface provides a better user experience by integrating filters and widgets to explore the graph structure of the predicted terms. The INGA web server, databases and benchmarking are available from URL: https://inga.bio.unipd.it/.",2019-07-01 +30949694,OrganellarGenomeDRAW (OGDRAW) version 1.3.1: expanded toolkit for the graphical visualization of organellar genomes.,"Organellar (plastid and mitochondrial) genomes play an important role in resolving phylogenetic relationships, and next-generation sequencing technologies have led to a burst in their availability. The ongoing massive sequencing efforts require software tools for routine assembly and annotation of organellar genomes as well as their display as physical maps. OrganellarGenomeDRAW (OGDRAW) has become the standard tool to draw graphical maps of plastid and mitochondrial genomes. Here, we present a new version of OGDRAW equipped with a new front end. Besides several new features, OGDRAW now has access to a local copy of the organelle genome database of the NCBI RefSeq project. Together with batch processing of (multi-)GenBank files, this enables the user to easily visualize large sets of organellar genomes spanning entire taxonomic clades. The new OGDRAW server can be accessed at https://chlorobox.mpimp-golm.mpg.de/OGDraw.html.",2019-07-01 +28849559,Strategic Integration of Multiple Bioinformatics Resources for System Level Analysis of Biological Networks.,"Recent technological advances in genomics allow the production of biological data at unprecedented tera- and petabyte scales. Efficient mining of these vast and complex datasets for the needs of biomedical research critically depends on a seamless integration of the clinical, genomic, and experimental information with prior knowledge about genotype-phenotype relationships. Such experimental data accumulated in publicly available databases should be accessible to a variety of algorithms and analytical pipelines that drive computational analysis and data mining.We present an integrated computational platform Lynx (Sulakhe et al., Nucleic Acids Res 44:D882-D887, 2016) ( http://lynx.cri.uchicago.edu ), a web-based database and knowledge extraction engine. It provides advanced search capabilities and a variety of algorithms for enrichment analysis and network-based gene prioritization. It gives public access to the Lynx integrated knowledge base (LynxKB) and its analytical tools via user-friendly web services and interfaces. The Lynx service-oriented architecture supports annotation and analysis of high-throughput experimental data. Lynx tools assist the user in extracting meaningful knowledge from LynxKB and experimental data, and in the generation of weighted hypotheses regarding the genes and molecular mechanisms contributing to human phenotypes or conditions of interest. The goal of this integrated platform is to support the end-to-end analytical needs of various translational projects.",2017-01-01 +32184295,Nephrology Fellows' and Program Directors' Perceptions of Hospital Rounds in the United States.,"

Background and objectives

Hospital rounds are a traditional vehicle for patient-care delivery and experiential learning for trainees. We aimed to characterize practices and perceptions of rounds in United States nephrology training programs.

Design, setting, participants, & measurements

We conducted a national survey of United States nephrology fellows and program directors. Fellows received the survey after completing the 2019 National Board of Medical Examiners Nephrology In-Training Exam. Program directors received the survey at the American Society of Nephrology's 2019 Nephrology Training Program Directors' Retreat. Surveys assessed the structure and perceptions of rounds, focusing on workload, workflow, value for patient care, and fellows' clinical skill-building. Directors were queried about their expectations for fellow prerounds and efficiency of rounds. Responses were quantified by proportions.

Results

Fellow and program director response rates were 73% (n=621) and 70% (n=55). Most fellows (74%) report a patient census of >15, arrive at the hospital before 7:00 am (59%), and complete progress notes after 5:00 pm (46%). Among several rounding activities, fellows most valued bedside discussions for building their clinical skills (34%), but only 30% examine all patients with the attending at the bedside. Most directors (71%) expect fellows to both examine patients and collect data before attending-rounds. A majority (78%) of directors commonly complete their documentation after 5:00 pm, and for 36%, after 8:00 pm. Like fellows, directors most value bedside discussion for development of fellows' clinical skills (44%). Lack of preparedness for the rigors of nephrology fellowship was the most-cited barrier to efficient rounds (31%).

Conclusions

Hospital rounds in United States nephrology training programs are characterized by high patient volumes, early-morning starts, and late-evening clinical documentation. Fellows use a variety of prerounding styles and examine patients at the beside with their attendings at different frequencies.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_03_17_CJN.10190819.mp3.",2020-03-17 +30304988,WBFQC: A new approach for compressing next-generation sequencing data splitting into homogeneous streams.,"Genomic data nowadays is playing a vital role in number of fields such as personalized medicine, forensic, drug discovery, sequence alignment and agriculture, etc. With the advancements and reduction in the cost of next-generation sequencing (NGS) technology, these data are growing exponentially. NGS data are being generated more rapidly than they could be significantly analyzed. Thus, there is much scope for developing novel data compression algorithms to facilitate data analysis along with data transfer and storage directly. An innovative compression technique is proposed here to address the problem of transmission and storage of large NGS data. This paper presents a lossless non-reference-based FastQ file compression approach, segregating the data into three different streams and then applying appropriate and efficient compression algorithms on each. Experiments show that the proposed approach (WBFQC) outperforms other state-of-the-art approaches for compressing NGS data in terms of compression ratio (CR), and compression and decompression time. It also has random access capability over compressed genomic data. An open source FastQ compression tool is also provided here ( http://www.algorithm-skg.com/wbfqc/home.html ).",2018-06-28 +27787827,Predicting Real-Valued Protein Residue Fluctuation Using FlexPred.,"The conventional view of a protein structure as static provides only a limited picture. There is increasing evidence that protein dynamics are often vital to protein function including interaction with partners such as other proteins, nucleic acids, and small molecules. Considering flexibility is also important in applications such as computational protein docking and protein design. While residue flexibility is partially indicated by experimental measures such as the B-factor from X-ray crystallography and ensemble fluctuation from nuclear magnetic resonance (NMR) spectroscopy as well as computational molecular dynamics (MD) simulation, these techniques are resource-intensive. In this chapter, we describe the web server and stand-alone version of FlexPred, which rapidly predicts absolute per-residue fluctuation from a three-dimensional protein structure. On a set of 592 nonredundant structures, comparing the fluctuations predicted by FlexPred to the observed fluctuations in MD simulations showed an average correlation coefficient of 0.669 and an average root mean square error of 1.07 Å. FlexPred is available at http://kiharalab.org/flexPred/ .",2017-01-01 +31077297,CaverDock: a molecular docking-based tool to analyse ligand transport through protein tunnels and channels.,"

Motivation

Protein tunnels and channels are key transport pathways that allow ligands to pass between proteins' external and internal environments. These functionally important structural features warrant detailed attention. It is difficult to study the ligand binding and unbinding processes experimentally, while molecular dynamics simulations can be time-consuming and computationally demanding.

Results

CaverDock is a new software tool for analysing the ligand passage through the biomolecules. The method uses the optimized docking algorithm of AutoDock Vina for ligand placement docking and implements a parallel heuristic algorithm to search the space of possible trajectories. The duration of the simulations takes from minutes to a few hours. Here we describe the implementation of the method and demonstrate CaverDock's usability by: (i) comparison of the results with other available tools, (ii) determination of the robustness with large ensembles of ligands and (iii) the analysis and comparison of the ligand trajectories in engineered tunnels. Thorough testing confirms that CaverDock is applicable for the fast analysis of ligand binding and unbinding in fundamental enzymology and protein engineering.

Availability and implementation

User guide and binaries for Ubuntu are freely available for non-commercial use at https://loschmidt.chemi.muni.cz/caverdock/. The web implementation is available at https://loschmidt.chemi.muni.cz/caverweb/. The source code is available upon request.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31099381,Iterative feature representations improve N4-methylcytosine site prediction.,"

Motivation

Accurate identification of N4-methylcytosine (4mC) modifications in a genome wide can provide insights into their biological functions and mechanisms. Machine learning recently have become effective approaches for computational identification of 4mC sites in genome. Unfortunately, existing methods cannot achieve satisfactory performance, owing to the lack of effective DNA feature representations that are capable to capture the characteristics of 4mC modifications.

Results

In this work, we developed a new predictor named 4mcPred-IFL, aiming to identify 4mC sites. To represent and capture discriminative features, we proposed an iterative feature representation algorithm that enables to learn informative features from several sequential models in a supervised iterative mode. Our analysis results showed that the feature representations learnt by our algorithm can capture the discriminative distribution characteristics between 4mC sites and non-4mC sites, enlarging the decision margin between the positives and negatives in feature space. Additionally, by evaluating and comparing our predictor with the state-of-the-art predictors on benchmark datasets, we demonstrate that our predictor can identify 4mC sites more accurately.

Availability and implementation

The user-friendly webserver that implements the proposed 4mcPred-IFL is well established, and is freely accessible at http://server.malab.cn/4mcPred-IFL.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31504343,Influence of Drug Exposure on Vedolizumab-Induced Endoscopic Remission in Anti-Tumour Necrosis Factor [TNF] Naïve and Anti-TNF Exposed IBD Patients.,"

Background and objectives

Vedolizumab has demonstrated efficacy and safety in patients with Crohn's disease [CD] and ulcerative colitis [UC]. Endoscopic outcome data are limited, especially in anti-tumour necrosis factor [TNF] naïve patients. The present study compared endoscopic outcome in anti-TNF naïve and exposed patients, and explored if this was affected by drug exposure.

Methods

We retrospectively analysed all patients initiating vedolizumab at our tertiary referral centre since 2015. For UC, endoscopic improvement was defined as a Mayo endoscopic subscore ≤1 at week 14. For CD, endoscopic remission was defined as absence of ulcerations at week 22. Vedolizumab trough concentrations were measured at week 6, week 14 and during maintenance.

Results

A total of 336 patients were identified [53.3% CD], 20% of them being anti-TNF naïve. Endoscopic improvement was achieved by 56.1% of UC patients and endoscopic remission by 39.1% of CD patients. Endoscopic outcomes were significantly better in anti-TNF naïve vs exposed patients [all: 67.2% vs 42.0%, p = 0.0002; UC: 74.4% vs 50.0%, p = 0.02; CD: 57.1% vs 35.8%, p = 0.03]. Achievement of endoscopic end points significantly impacted long-term treatment continuation [p = 9.7 × 10-13]. A better endoscopic outcome was associated with significantly higher drug exposure in both CD and UC.

Conclusions

The results of this observational, single-centre real-life study suggest that vedolizumab may induce endoscopic remission in both CD and UC. Although anti-TNF naïve patients had a significantly better outcome, 42% of anti-TNF exposed patients still benefited endoscopically. A clear exposure-endoscopic response relationship exists, but not all patients will benefit from treatment intensification. Hence, predictive biomarkers remain necessary.

Podcast

This article has an associated podcast which can be accessed at https://academic.oup.com/ecco-jcc/pages/podcast.",2020-03-01 +31125060,EROS-DOCK: protein-protein docking using exhaustive branch-and-bound rotational search.,"

Motivation

Protein-protein docking algorithms aim to predict the 3D structure of a binary complex using the structures of the individual proteins. This typically involves searching and scoring in a 6D space. Many docking algorithms use FFT techniques to exhaustively cover the search space and to accelerate the scoring calculation. However, FFT docking results often depend on the initial protein orientations with respect to the Fourier sampling grid. Furthermore, Fourier-transforming a physics-base force field can involve a serious loss of precision.

Results

Here, we present EROS-DOCK, an algorithm to rigidly dock two proteins using a series of exhaustive 3D rotational searches in which non-clashing orientations are scored using the ATTRACT coarse-grained force field model. The rotational space is represented as a quaternion 'π-ball', which is systematically sub-divided in a 'branch-and-bound' manner, allowing efficient pruning of rotations that will give steric clashes. The algorithm was tested on 173 Docking Benchmark complexes, and results were compared with those of ATTRACT and ZDOCK. According to the CAPRI quality criteria, EROS-DOCK typically gives more acceptable or medium quality solutions than ATTRACT and ZDOCK.

Availability and implementation

The EROS-DOCK program is available for download at http://erosdock.loria.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31603323,Exhaustive Repertoire of Druggable Cavities at Protein-Protein Interfaces of Known Three-Dimensional Structure.,"Protein-protein interactions (PPIs) offer the unique opportunity to tailor ligands aimed at specifically stabilizing or disrupting the corresponding interfaces and providing a safer alternative to conventional ligands targeting monomeric macromolecules. Selecting biologically relevant protein-protein interfaces for either stabilization or disruption by small molecules is usually biology-driven on a case-by-case basis and does not follow a structural rationale that could be applied to an entire interactome. We herewith provide a first step to the latter goal by using a fully automated and structure-based workflow, applicable to any PPI of known three-dimensional (3D) structure, to identify and prioritize druggable cavities at and nearby PPIs of pharmacological interest. When applied to the entire Protein Data Bank, 164 514 druggable cavities were identified and classified in four groups (interfacial, rim, allosteric, orthosteric) according to their properties and spatial locations. Systematic comparison of PPI cavities with pockets deduced from druggable protein-ligand complexes shows almost no overlap in property space, suggesting that even the most druggable PPI cavities are unlikely to be addressed with conventional drug-like compound libraries. The archive is freely accessible at http://drugdesign.unistra.fr/ppiome .",2019-10-25 +29963059,Predicting HLA CD4 Immunogenicity in Human Populations.,"

Background

Prediction of T cell immunogenicity is a topic of considerable interest, both in terms of basic understanding of the mechanisms of T cells responses and in terms of practical applications. HLA binding affinity is often used to predict T cell epitopes, since HLA binding affinity is a key requisite for human T cell immunogenicity. However, immunogenicity at the population it is complicated by the high level of variability of HLA molecules, potential other factors beyond HLA as well as the frequent lack of HLA typing data. To overcome those issues, we explored an alternative approach to identify the common characteristics able to distinguish immunogenic peptides from non-recognized peptides.

Methods

Sets of dominant epitopes derived from peer-reviewed published papers were used in conjunction with negative peptides from the same experiments/donors to train neural networks and generate an ""immunogenicity score."" We also compared the performance of the immunogenicity score with previously described method for immunogenicity prediction based on HLA class II binding at the population level.

Results

The immunogenicity score was validated on a series of independent datasets derived from the published literature, representing 57 independent studies where immunogenicity in human populations was assessed by testing overlapping peptides spanning different antigens. Overall, these testing datasets corresponded to over 2,000 peptides and tested in over 1,600 different human donors. The 7-allele method prediction and the immunogenicity score were associated with similar performance [average area under the ROC curve (AUC) values of 0.703 and 0.702, respectively] while the combined methods reached an average AUC of 0.725. This increase in average AUC value is significant compared with the immunogenicity score (p = 0.0135) and a strong trend toward significance is observed when compared to the 7-allele method (p = 0.0938). The new immunogenicity score method is now freely available using CD4 T cell immunogenicity prediction tool on the Immune Epitope Database website (http://tools.iedb.org/CD4episcore).

Conclusion

The new immunogenicity score predicts CD4 T cell immunogenicity at the population level starting from protein sequences and with no need for HLA typing. Its efficacy has been validated in the context of different antigen sources, ethnicities, and disparate techniques for epitope identification.",2018-06-14 +29722838,"Vitamin A and D intake in pregnancy, infant supplementation, and asthma development: the Norwegian Mother and Child Cohort.","

Background

Western diets may provide excess vitamin A, which is potentially toxic and could adversely affect respiratory health and counteract benefits from vitamin D.

Objective

The aim of this study was to examine child asthma at age 7 y in relation to maternal intake of vitamins A and D during pregnancy, infant supplementation with these vitamins, and their potential interaction.

Design

We studied 61,676 school-age children (born during 2002-2007) from the Norwegian Mother and Child Cohort with data on maternal total (food and supplement) nutrient intake in pregnancy (food-frequency questionnaire validated against biomarkers) and infant supplement use at age 6 mo (n = 54,142 children). Linkage with the Norwegian Prescription Database enabled near-complete follow-up (end of second quarter in 2015) for dispensed medications to classify asthma. We used log-binomial regression to calculate adjusted RRs (aRRs) for asthma with 95% CIs.

Results

Asthma increased according to maternal intake of total vitamin A [retinol activity equivalents (RAEs)] in the highest (≥2031 RAEs/d) compared with the lowest (≤779 RAEs/d) quintile (aRR: 1.21; 95% CI: 1.05, 1.40) and decreased for total vitamin D in the highest (≥13.6 µg/d) compared with the lowest (≤3.5 µg/d) quintile (aRR: 0.81; 95% CI: 0.67, 0.97) during pregnancy. No association was observed for maternal intake in the highest quintiles of both nutrients (aRR: 0.99; 95% CI: 0.83, 1.18) and infant supplementation with vitamin D or cod liver oil.

Conclusions

Excess vitamin A (≥2.5 times the recommended intake) during pregnancy was associated with increased risk, whereas vitamin D intake close to recommendations was associated with a reduced risk of asthma in school-age children. No association for high intakes of both nutrients suggests antagonistic effects of vitamins A and D. This trial was registered at http://www.clinicaltrials.gov as NCT03197233.",2018-05-01 +31031920,"Accounting for kin sampling reveals genetic connectivity in Tasmanian and New Zealand school sharks, Galeorhinus galeus.","Fishing represents a major problem for conservation of chondrichthyans, with a quarter of all species being overexploited. School sharks, Galeorhinus galeus, are targeted by commercial fisheries in Australia and New Zealand. The Australian stock has been depleted to below 20% of its virgin biomass, and the species is recorded as Conservation Dependent within Australia. Individuals are known to move between both countries, but it is disputed whether the stocks are reproductively linked. Accurate and unbiased determination of stock and population connectivity is crucial to inform effective management. In this study, we assess the genetic composition and population connectivity between Australian and New Zealand school sharks using genome-wide SNPs, while accounting for non-random kin sampling. Between 2009 and 2013, 88 neonate and juvenile individuals from Tasmanian and New Zealand nurseries were collected and genotyped. Neutral loci were analyzed to detect fine-scale signals of reproductive connectivity. Seven full-sibling groups were identified and removed for unbiased analysis. Based on 6,587 neutral SNPs, pairwise genetic differentiation from Tasmanian and New Zealand neonates was non-significant (F ST = 0.0003, CI95 = [-0.0002, 0.0009], p = 0.1163; D est = 0.0006 ± 0.0002). This pattern was supported by clustering results. In conclusion, we show a significant effect of non-random sampling of kin and identify fine-scale reproductive connectivity between Australian and New Zealand school sharks.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.pd8612j.",2019-04-01 +31254009,"Diversity of Wolbachia Associated with the Giant Turtle Ant, Cephalotes atratus.","Symbiotic relationships between organisms are common throughout the tree of life, and often these organisms share an evolutionary history. In turtle ants (Cephalotes), symbiotic associations with bacteria are known to be especially important for supplementing the nutrients that their herbivorous diets do not provide. However, much remains unknown about the diversity of many common bacterial symbionts with turtle ants, such as Wolbachia. Here, we surveyed the diversity of Wolbachia, focusing on one species of turtle ant with a particularly wide geographic range, Cephalotes atratus. Colonies were collected from the entire range of C. atratus, and we detected the presence of Wolbachia by sequencing multiple individuals per colony for wsp. Then, using the multilocus sequence typing (MLST) approach, we determined each individual's unique sequence type (ST) based on comparison to sequences published in the Wolbachia MLST Database ( https://pubmlst.org/wolbachia/ ). The results of this study suggest that there is a high level of diversity of Wolbachia strains among colonies from different regions, while the diversity within colonies is very low. Additionally, 13 novel variants (alleles) were uncovered. These results suggest that the level of diversity of Wolbachia within species is affected by geography, and the high level of diversity observed among Cephalotes atratus populations may be explained by their wide geographic range.",2019-06-28 +25928765,coMET: visualisation of regional epigenome-wide association scan results and DNA co-methylation patterns.,"

Background

Epigenome-wide association scans (EWAS) are an increasingly powerful and widely-used approach to assess the role of epigenetic variation in human complex traits. However, this rapidly emerging field lacks dedicated visualisation tools that can display features specific to epigenetic datasets.

Result

We developed coMET, an R package and online tool for visualisation of EWAS results in a genomic region of interest. coMET generates a regional plot of epigenetic-phenotype association results and the estimated DNA methylation correlation between CpG sites (co-methylation), with further options to visualise genomic annotations based on ENCODE data, gene tracks, reference CpG-sites, and user-defined features. The tool can be used to display phenotype association signals and correlation patterns of microarray or sequencing-based DNA methylation data, such as Illumina Infinium 450k, WGBS, or MeDIP-seq, as well as other types of genomic data, such as gene expression profiles. The software is available as a user-friendly online tool from http://epigen.kcl.ac.uk/comet and as an R Bioconductor package. Source code, examples, and full documentation are also available from GitHub.

Conclusion

Our new software allows visualisation of EWAS results with functional genomic annotations and with estimation of co-methylation patterns. coMET is available to a wide audience as an online tool and R package, and can be a valuable resource to interpret results in the fast growing field of epigenetics. The software is designed for epigenetic data, but can also be applied to genomic and functional genomic datasets in any species.",2015-04-28 +,Phenopix: A R package for image-based vegetation phenology,"In this paper we extensively describe new software available as a R package that allows for the extraction of phenological information from time-lapse digital photography of vegetation cover. The phenopix R package includes all steps in data processing. It enables the user to: draw a region of interest (ROI) on an image; extract red green and blue digital numbers (DN) from a seasonal series of images; depict greenness index trajectories; fit a curve to the seasonal trajectories; extract relevant phenological thresholds (phenophases); extract phenophase uncertainties.The software capabilities are illustrated by analyzing one year of data from a selection of seven sites belonging to the PhenoCam network (http://phenocam.sr.unh.edu/), including an unmanaged subalpine grassland, a tropical grassland, a deciduous needle-leaf forest, three deciduous broad-leaf temperate forests and an evergreen needle-leaf forest. One of the novelties introduced by the package is the spatially explicit, pixel-based analysis, which potentially allows to extract within-ecosystem or within-individual variability of phenology. We examine the relationship between phenophases extracted by the traditional ROI-averaged and the novel pixel-based approaches, and further illustrate potential applications of pixel-based image analysis available in the phenopix R package.",2016-04-01 +30488835,QALOG-Pro: A web-based computer program for machine daily log and quality assurance and quality control data management.,"

Aim of study

Computer-based data applications increase the efficiency, ease to access of data, and tidiness, especially in the modern radiotherapy centers. A computer application program is presented here which replaces archaic paper-based daily log (Log Book), quality assurance, and quality control (QA/QC) documentation.

Materials and methods

QALOG-Pro program is installed on an http://server and accessed using wire free (WiFi) network and run on a tablet device. The web browser is used to run this program on the tablet device, and the data is stored on the server. The linear accelerator (LINAC) history of events and QA/QC test results are entered under activity menu.

Results

The program can produce reports of the data of activities in the available reports menu; this allows the user to view the various activities on machine. The ""calculate"" menu gives the total time between the failures and total time to repair also called uptime and downtime, respectively for an equipment and is viewable in a Pie chart display. View menu lists all the available equipments and scheduled activities. The ""administrator"" and ""manage data"" menu is accessible for a system administrator to manage various administrator activities and data management.

Conclusion

QALOG-Pro helpful to the Radiation Oncology Department in managing LINACs by recording the events of the machine under a daily log and to calculate uptime and downtime of the machine. This application will help the radiotherapy centers or clinics to eliminate printed versions and walk toward paperless for day-to-day log and QA/QC information recording and administration.",2018-10-01 +31018945,Increased RNA Expression of von Willebrand Factor Gene Is Associated With Infiltrating Lobular Breast Cancer and Normal PAM50 Subtype.,"

Background

Infiltrating lobular carcinoma (ILC) is the second most common histologicaI subtype of breast cancer, accounting for 10% of all cases. ILC has a characteristic genomic profile. ILC shows a high frequency of cadherin 1 (CDH1) mutations, along with loss of phosphatase and tensin homolog (PTEN), activation of alpha serine/threonine kinase (AKT), and mutations in T-box transcription factor (TBX3) and forkhead box protein A1 (FOXA1). We suspected that another gene, von Willebrand factor (VWF), might also be part of the profile, since coagulation tests reveal significant differences in patients with breast cancer.

Materials and methods

For newly-diagnosed breast cancer, the association between VWF and histology in the GDC Breast Cancer dataset in The Cancer Genome Atlas (TCGA) was evaluated. The following were used to access and analyze the data: Genomic Data Commons Data Portal (https://portal.gdc.cancer.gov/); Xena browser (https://xenabrowser.net); cBioportal (http://cbioportal.org); Oncomine (https://oncomine.org); and Prediction Analysis of Microarray 50 (PAM50).

Results

Patients with ILC had higher VWF RNA expression than patients with infiltrating ductal carcinoma and other histology. The difference of expression was present to the same degree in both pre-menopausal and post-menopausal patients. Nine alterations in VWF and PTEN were significantly co-occurrent. Considering all histologies in 843 samples, Tukey's honest significant difference post hoc test showed that VWF RNA expression of the normal subtype was significantly greater than that of the other subtypes (p<0.001).

Conclusion

Our finding of significantly higher VWF RNA expression in the PAM50 normal (non-basal-like) breast cancer subtype suggests that VWF protein measurement might complement or corroborate PAM50 results. VWF and PAM50 results both suggesting a low risk of recurrence might make the decision whether to give chemotherapy easier, especially if VWF protein were an independent predictor.",2019-05-01 +31346510,iPASTIC: An online toolkit to estimate plant abiotic stress indices.,"

Premise

In crop breeding programs, breeders use yield performance in both optimal and stressful environments as a key indicator for screening the most tolerant genotypes. During the past four decades, several yield-based indices have been suggested for evaluating stress tolerance in crops. Despite the well-established use of these indices in agronomy and plant breeding, a user-friendly software that would provide access to these methods is still lacking.

Methods and results

The Plant Abiotic Stress Index Calculator (iPASTIC) is an online program based on JavaScript and R that calculates common stress tolerance and susceptibility indices for various crop traits including the tolerance index (TOL), relative stress index (RSI), mean productivity (MP), harmonic mean (HM), yield stability index (YSI), geometric mean productivity (GMP), stress susceptibility index (SSI), stress tolerance index (STI), and yield index (YI). Along with these indices, this easily accessible tool can also calculate their ranking patterns, estimate the relative frequency for each index, and create heat maps based on Pearson's and Spearman's rank-order correlation analyses. In addition, it can also render three-dimensional plots based on both yield performances and each index to separate entry genotypes into Fernandez's groups (A, B, C, and D), and perform principal component analysis. The accuracy of the results calculated from our software was tested using two different data sets obtained from previous experiments testing the salinity and drought stress in wheat genotypes, respectively.

Conclusions

iPASTIC can be widely used in agronomy and plant breeding programs as a user-friendly interface for agronomists and breeders dealing with large volumes of data. The software is available at https://mohsenyousefian.com/ipastic/.",2019-07-17 +27924010,DNA Data Bank of Japan.,"The DNA Data Bank of Japan (DDBJ) (http://www.ddbj.nig.ac.jp) has been providing public data services for thirty years (since 1987). We are collecting nucleotide sequence data from researchers as a member of the International Nucleotide Sequence Database Collaboration (INSDC, http://www.insdc.org), in collaboration with the US National Center for Biotechnology Information (NCBI) and European Bioinformatics Institute (EBI). The DDBJ Center also services Japanese Genotype-phenotype Archive (JGA), with the National Bioscience Database Center to collect human-subjected data from Japanese researchers. Here, we report our database activities for INSDC and JGA over the past year, and introduce retrieval and analytical services running on our supercomputer system and their recent modifications. Furthermore, with the Database Center for Life Science, the DDBJ Center improves semantic web technologies to integrate and to share biological data, for providing the RDF version of the sequence data.",2016-10-24 +31110669,"Combinations of reproductive, individual, and weather effects best explain torpor patterns among female little brown bats (Myotis lucifugus).","Heterothermic mammals can use torpor, a state of metabolic suppression, to conserve energy during times of limited food and poor environmental conditions. Females may use torpor throughout gestation and lactation; however, there are associated physiological and ecological costs with potential fitness consequences. Previous studies have controlled for, but not quantified the impact of interindividual variation on torpor patterns and understanding this may provide insight on why certain thermoregulatory responses are employed. The objective of this study was to identify and quantitatively characterize the intrinsic variables and weather conditions that best explain variation in torpor patterns among individual female little brown bats, Myotis lucifugus. We used temperature-sensitive radio-transmitters affixed to females to measure skin temperature patterns of 35 individuals roosting in bat boxes in the spring and summer. We used Bayesian multi-model inference to rank a priori-selected models and variables based on their explanatory power. Reproductive condition and interindividual effects best explained torpor duration and depth, and weather best explained torpor frequency. Of the reproductive conditions, lactating females used torpor for the shortest durations and at shallower depths (i.e., smallest drop in minimum T sk), while females in early spring (i.e., not-obviously-pregnant) used torpor for the longest and deepest. Among individuals, the greatest difference in effects on duration occurred between pregnant individuals, suggesting interindividual variation within reproductive condition. Increases in precipitation and wind were associated with a higher probability of torpor use. Our results provide further support that multiple variables explain torpor patterns and highlight the importance of including individual effects when studying thermoregulatory patterns in heterothermic species. OPEN RESEARCH BADGES:This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.c04tj85.",2019-04-16 +31985273,Screening ToxCast™ for Chemicals That Affect Cholesterol Biosynthesis: Studies in Cell Culture and Human Induced Pluripotent Stem Cell-Derived Neuroprogenitors.,"

Background

Changes in cholesterol metabolism are common hallmarks of neurodevelopmental pathologies. A diverse array of genetic disorders of cholesterol metabolism support this claim as do multiple lines of research that demonstrate chemical inhibition of cholesterol biosynthesis compromises neurodevelopment. Recent work has revealed that a number of commonly used pharmaceuticals induce changes in cholesterol metabolism that are similar to changes induced by genetic disorders with devastating neurodevelopmental deficiencies.

Objectives

We tested the hypothesis that common environmental toxicants may also impair cholesterol metabolism and thereby possibly contribute to neurodevelopmental toxicity.

Methods

Using high-throughput screening with a targeted lipidomic analysis and the mouse neuroblastoma cell line, Neuro-2a, the ToxCast™ chemical library was screened for compounds that impact sterol metabolism. Validation of chemical effects was conducted by assessing cholesterol biosynthesis in human induced pluripotent stem cell (hiPSC)-derived neuroprogenitors using an isotopically labeled cholesterol precursor and by monitoring product formation with UPLC-MS/MS.

Results

Twenty-nine compounds were identified as validated lead-hits, and four were prioritized for further study (endosulfan sulfate, tributyltin chloride, fenpropimorph, and spiroxamine). All four compounds were validated to cause hypocholesterolemia in Neuro-2a cells. The morpholine-like fungicides, fenpropimorph and spiroxamine, mirrored their Neuro-2a activity in four immortalized human cell lines and in a human neuroprogenitor model derived from hiPSCs, but endosulfan sulfate and tributyltin chloride did not.

Conclusions

These data reveal the existence of environmental compounds that interrupt cholesterol biosynthesis and that methodologically hiPSC neuroprogenitor cells provide a particularly sensitive system to monitor the effect of small molecules on de novo cholesterol formation. https://doi.org/10.1289/EHP5053.",2020-01-27 +23074187,"ZFIN, the Zebrafish Model Organism Database: increased support for mutants and transgenics.","ZFIN, the Zebrafish Model Organism Database (http://zfin.org), is the central resource for zebrafish genetic, genomic, phenotypic and developmental data. ZFIN curators manually curate and integrate comprehensive data involving zebrafish genes, mutants, transgenics, phenotypes, genotypes, gene expressions, morpholinos, antibodies, anatomical structures and publications. Integrated views of these data, as well as data gathered through collaborations and data exchanges, are provided through a wide selection of web-based search forms. Among the vertebrate model organisms, zebrafish are uniquely well suited for rapid and targeted generation of mutant lines. The recent rapid production of mutants and transgenic zebrafish is making management of data associated with these resources particularly important to the research community. Here, we describe recent enhancements to ZFIN aimed at improving our support for mutant and transgenic lines, including (i) enhanced mutant/transgenic search functionality; (ii) more expressive phenotype curation methods; (iii) new downloads files and archival data access; (iv) incorporation of new data loads from laboratories undertaking large-scale generation of mutant or transgenic lines and (v) new GBrowse tracks for transgenic insertions, genes with antibodies and morpholinos.",2012-10-15 +31313948,Oral Systemic Bioavailability of Bisphenol A and Bisphenol S in Pigs.,"

Background

Given its hormonal activity, bisphenol S (BPS) as a substitute for bisphenol A (BPA) could actually increase the risk of endocrine disruption if its toxicokinetic (TK) properties, namely its oral availability and systemic persistency, were higher than those of BPA.

Objectives

The TK behavior of BPA and BPS was investigated by administering the two compounds by intravenous and oral routes in piglet, a known valid model for investigating oral TK.

Methods

Experiments were conducted in piglets to evaluate the kinetics of BPA, BPS, and their glucuronoconjugated metabolites in plasma and urine after intravenous administration of BPA, BPS, and BPS glucuronide (BPSG) and gavage administration of BPA and BPS. A population semiphysiologically based TK model describing the disposition of BPA and BPS and their glucuronides was built from these data to estimate the key TK parameters that drive the internal exposure to active compounds.

Results

The data indicated that almost all the BPS oral dose was absorbed and transported into the liver where only 41% of BPS was glucuronidated, leading to a systemic bioavailability of 57.4%. In contrast, only 77% of the oral dose of BPA was absorbed and underwent an extensive first-pass glucuronidation either in the gut (44%) or in the liver (53%), thus accounting for the low systemic bioavailability of BPA (0.50%). Due to the higher systemic availability of BPS, in comparison with BPA, and its lower plasma clearance (3.5 times lower), the oral BPS systemic exposure was on average about 250 times higher than for BPA for an equal oral molar dose of the two compounds.

Conclusion

Given the similar digestive tracts of pigs and humans, our results suggest that replacing BPA with BPS will likely lead to increased internal exposure to an endocrine-active compound that would be of concern for human health. https://doi.org/10.1289/EHP4599.",2019-07-17 +29385418,DIBS: a repository of disordered binding sites mediating interactions with ordered proteins.,"

Motivation

Intrinsically Disordered Proteins (IDPs) mediate crucial protein-protein interactions, most notably in signaling and regulation. As their importance is increasingly recognized, the detailed analyses of specific IDP interactions opened up new opportunities for therapeutic targeting. Yet, large scale information about IDP-mediated interactions in structural and functional details are lacking, hindering the understanding of the mechanisms underlying this distinct binding mode.

Results

Here, we present DIBS, the first comprehensive, curated collection of complexes between IDPs and ordered proteins. DIBS not only describes by far the highest number of cases, it also provides the dissociation constants of their interactions, as well as the description of potential post-translational modifications modulating the binding strength and linear motifs involved in the binding. Together with the wide range of structural and functional annotations, DIBS will provide the cornerstone for structural and functional studies of IDP complexes.

Availability and implementation

DIBS is freely accessible at http://dibs.enzim.ttk.mta.hu/. The DIBS application is hosted by Apache web server and was implemented in PHP. To enrich querying features and to enhance backend performance a MySQL database was also created.

Contact

dosztanyi@caesar.elte.hu or bmeszaros@caesar.elte.hu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-02-01 +25883148,I-TASSER server: new development for protein structure and function predictions.,"The I-TASSER server (http://zhanglab.ccmb.med.umich.edu/I-TASSER) is an online resource for automated protein structure prediction and structure-based function annotation. In I-TASSER, structural templates are first recognized from the PDB using multiple threading alignment approaches. Full-length structure models are then constructed by iterative fragment assembly simulations. The functional insights are finally derived by matching the predicted structure models with known proteins in the function databases. Although the server has been widely used for various biological and biomedical investigations, numerous comments and suggestions have been reported from the user community. In this article, we summarize recent developments on the I-TASSER server, which were designed to address the requirements from the user community and to increase the accuracy of modeling predictions. Focuses have been made on the introduction of new methods for atomic-level structure refinement, local structure quality estimation and biological function annotations. We expect that these new developments will improve the quality of the I-TASSER server and further facilitate its use by the community for high-resolution structure and function prediction.",2015-04-16 +28582506,Glycan Reader is improved to recognize most sugar types and chemical modifications in the Protein Data Bank.,"

Motivation

Glycans play a central role in many essential biological processes. Glycan Reader was originally developed to simplify the reading of Protein Data Bank (PDB) files containing glycans through the automatic detection and annotation of sugars and glycosidic linkages between sugar units and to proteins, all based on atomic coordinates and connectivity information. Carbohydrates can have various chemical modifications at different positions, making their chemical space much diverse. Unfortunately, current PDB files do not provide exact annotations for most carbohydrate derivatives and more than 50% of PDB glycan chains have at least one carbohydrate derivative that could not be correctly recognized by the original Glycan Reader.

Results

Glycan Reader has been improved and now identifies most sugar types and chemical modifications (including various glycolipids) in the PDB, and both PDB and PDBx/mmCIF formats are supported. CHARMM-GUI Glycan Reader is updated to generate the simulation system and input of various glycoconjugates with most sugar types and chemical modifications. It also offers a new functionality to edit the glycan structures through addition/deletion/modification of glycosylation types, sugar types, chemical modifications, glycosidic linkages, and anomeric states. The simulation system and input files can be used for CHARMM, NAMD, GROMACS, AMBER, GENESIS, LAMMPS, Desmond, OpenMM, and CHARMM/OpenMM. Glycan Fragment Database in GlycanStructure.Org is also updated to provide an intuitive glycan sequence search tool for complex glycan structures with various chemical modifications in the PDB.

Availability and implementation

http://www.charmm-gui.org/input/glycan and http://www.glycanstructure.org.

Contact

wonpil@lehigh.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +31242807,Role of the Aryl Hydrocarbon Receptor/ARNT/Cytochrome P450 System in Pulmonary Vascular Diseases.,"

Rationale

CYPs (cytochrome p450) are critically involved in the metabolism of xenobiotics and toxins. Given that pulmonary hypertension is strongly associated with environmental exposure, we hypothesize that CYPs play a role in the development and maintenance of pathological vascular remodeling.

Objective

We sought to identify key CYPs that could link drug or hormone metabolism to the development of pulmonary hypertension.

Methods and results

We searched in Medline (PubMed) database, as well as http://www.clinicaltrials.gov, for CYPs associated with many key aspects of pulmonary arterial hypertension including, but not limited to, severe pulmonary hypertension, estrogen metabolism, inflammation mechanisms, quasi-malignant cell growth, drug susceptibility, and metabolism of the pulmonary arterial hypertension-specific drugs.

Conclusions

We postulate a hypothesis where the AhR (aryl hydrocarbon receptor) mediates aberrant cell growth via expression of different CYPs associated with estrogen metabolism and inflammation.",2019-06-26 +28137890,An Interactive Macrophage Signal Transduction Map Facilitates Comparative Analyses of High-Throughput Data.,"Macrophages (Mϕs) are key players in the coordination of the lifesaving or detrimental immune response against infections. The mechanistic understanding of the functional modulation of Mϕs by pathogens and pharmaceutical interventions at the signal transduction level is still far from complete. The complexity of pathways and their cross-talk benefits from holistic computational approaches. In the present study, we reconstructed a comprehensive, validated, and annotated map of signal transduction pathways in inflammatory Mϕs based on the current literature. In a second step, we selectively expanded this curated map with database knowledge. We provide both versions to the scientific community via a Web platform that is designed to facilitate exploration and analysis of high-throughput data. The platform comes preloaded with logarithmic fold changes from 44 data sets on Mϕ stimulation. We exploited three of these data sets-human primary Mϕs infected with the common lung pathogens Streptococcus pneumoniae, Legionella pneumophila, or Mycobacterium tuberculosis-in a case study to show how our map can be customized with expression data to pinpoint regulated subnetworks and druggable molecules. From the three infection scenarios, we extracted a regulatory core of 41 factors, including TNF, CCL5, CXCL10, IL-18, and IL-12 p40, and identified 140 drugs targeting 16 of them. Our approach promotes a comprehensive systems biology strategy for the exploitation of high-throughput data in the context of Mϕ signal transduction. In conclusion, we provide a set of tools to help scientists unravel details of Mϕ signaling. The interactive version of our Mϕ signal transduction map is accessible online at https://vcells.net/macrophage.",2017-01-30 +26071784,Aging Saves Populations from Extinction under Lack of Resources: in silico Experiments.,"By admitting the programmed organism death (phenoptosis) concept, it inevitably raises a question as to what advantages it gives to communities containing elderly and naturally weakened individuals. We believe that the broadest prevalence of the aging phenomenon is accounted for, particularly, by the fact that in certain situations occurrence of elderly individuals may guarantee not only evolution, but also the mere existence of populations. The goal of our study was to create a mathematical model illustrating the occurrence of situations when existence of elderly individuals accounts for population survival, whereas an ageless population would be completely extinguished. A logic basis for such model is as follows: 1) natural populations exist under conditions of uneven renewal of resources; 2) if resources are renewed at a high level and rapidly compensated by their restoration, then a population strives to achieve a maximum size, which is limited not by resource availability, but specific biological mechanisms; 3) rate of resource influx may decline down to zero very rapidly (e.g. during drought); 4) a capacity, at least, for some individuals to survive during resource shortage is crucial for survival of the entire population; 5) rapid extinction of the weaker elderly individuals saves resources for survival of the younger and stronger ones. A multi-agent simulation is used as a mathematical basis for the proposed model (http://www.winmobile.biz). In silico experiments confirmed the lack of fatal contradictions in our logical construction. The presence of the aged individuals once the aging program has been turned at the age of 25-30 years results in a 24-26% increase in lifetime of the population.",2015-05-01 +27863463,The ChIP-Seq tools and web server: a resource for analyzing ChIP-seq and other types of genomic data.,"

Background

ChIP-seq and related high-throughput chromatin profilig assays generate ever increasing volumes of highly valuable biological data. To make sense out of it, biologists need versatile, efficient and user-friendly tools for access, visualization and itegrative analysis of such data.

Results

Here we present the ChIP-Seq command line tools and web server, implementing basic algorithms for ChIP-seq data analysis starting with a read alignment file. The tools are optimized for memory-efficiency and speed thus allowing for processing of large data volumes on inexpensive hardware. The web interface provides access to a large database of public data. The ChIP-Seq tools have a modular and interoperable design in that the output from one application can serve as input to another one. Complex and innovative tasks can thus be achieved by running several tools in a cascade.

Conclusions

The various ChIP-Seq command line tools and web services either complement or compare favorably to related bioinformatics resources in terms of computational efficiency, ease of access to public data and interoperability with other web-based tools. The ChIP-Seq server is accessible at http://ccg.vital-it.ch/chipseq/ .",2016-11-18 +32111184,DNA methylation-based measures of accelerated biological ageing and the risk of dementia in the oldest-old: a study of the Lothian Birth Cohort 1921.,"

Background

Previous studies have demonstrated an association between DNA methylation-based measures of accelerated ageing and age-related health outcomes and mortality. As a disease closely associated with advancing age, we hypothesized that DNA methylation-based measures of accelerated ageing might be associated with risk for dementia. This study therefore aimed to examine the association between four recognised measures of age acceleration and subsequent dementia.

Methods

Study subjects (n = 488) were members of the Lothian Birth Cohort 1921. Dementia case ascertainment used data from death certificates, electronic hospital records, and clinical reviews. Venous blood samples were taken at baseline, at age 79 years. DNA methylation and measures of epigenetic age were calculated in accordance with Horvath's epigenetic clock tutorial, using the online calculator (https://dnamage.genetics.ucla.edu/). From these values, four measures of accelerated ageing were calculated: extrinsic epigenetic age acceleration (EEAA), intrinsic epigenetic age acceleration (IEAA), AgeAccelPheno and AgeAccelGrim. Competing risk regression models - with death as a competing risk - were performed to examine the association between each measure of accelerated ageing and incident dementia. APOE ɛ4 status, sex, age, smoking status, history of cardiovascular disease, cerebrovascular disease, hypertension, and diabetes were included as covariates.

Results

None of the multivariate models revealed a positive association between increased epigenetic age acceleration and dementia risk. Across all included models, never-smoking increased risk for dementia (HR 1.69 [1.06, 2.71], p = 0.03), and having no APOE ɛ4 alleles reduced risk for dementia (HR 0.44 [0.29, 0.67], p < 0.001).

Conclusions

The present study did not demonstrate any consistent association between DNA methylation-based measures of accelerated ageing and dementia in subjects aged over 79 years. Further, larger studies - including separate analyses of dementia subtypes - are required to further investigate the potential association between DNA methylation-based measures of accelerated ageing and dementia.",2020-02-28 +30590437,KinVis: a visualization tool to detect cryptic relatedness in genetic datasets.,"

Motivation

It is important to characterize individual relatedness in terms of familial relationships and underlying population structure in genome-wide association studies for correct downstream analysis. The characterization of individual relatedness becomes vital if the cohort is to be used as reference panel in other studies for association tests and for identifying ethnic diversities. In this paper, we propose a kinship visualization tool to detect cryptic relatedness between subjects. We utilize multi-dimensional scaling, bar charts, heat maps and node-link visualizations to enable analysis of relatedness information.

Availability and implementation

Available online as well as can be downloaded at http://shiny-vis.qcri.org/public/kinvis/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +28338930,EXPath tool-a system for comprehensively analyzing regulatory pathways and coexpression networks from high-throughput transcriptome data.,"Next generation sequencing (NGS) has become the mainstream approach for monitoring gene expression levels in parallel with various experimental treatments. Unfortunately, there is no systematical webserver to comprehensively perform further analysis based on the huge amount of preliminary data that is obtained after finishing the process of gene annotation. Therefore, a user-friendly and effective system is required to mine important genes and regulatory pathways under specific conditions from high-throughput transcriptome data. EXPath Tool (available at: http://expathtool.itps.ncku.edu.tw/) was developed for the pathway annotation and comparative analysis of user-customized gene expression profiles derived from microarray or NGS platforms under various conditions to infer metabolic pathways for all organisms in the KEGG database. EXPath Tool contains several functions: access the gene expression patterns and the candidates of co-expression genes; dissect differentially expressed genes (DEGs) between two conditions (DEGs search), functional grouping with pathway and GO (Pathway/GO enrichment analysis), and correlation networks (co-expression analysis), and view the expression patterns of genes involved in specific pathways to infer the effects of the treatment. Additionally, the effectively of EXPath Tool has been performed by a case study on IAA-responsive genes. The results demonstrated that critical hub genes under IAA treatment could be efficiently identified.",2017-08-01 +23317855,International Life Science Institute North America Cronobacter (Formerly Enterobacter sakazakii) isolate set.,"Foodborne pathogen isolate collections are important for the development of detection methods, for validation of intervention strategies, and to develop an understanding of pathogenesis and virulence. We have assembled a publicly available Cronobacter (formerly Enterobacter sakazakii) isolate set that consists of (i) 25 Cronobacter sakazakii isolates, (ii) two Cronobacter malonaticus isolates, (iii) one Cronobacter muytjensii isolate, which displays some atypical phenotypic characteristics, biochemical profiles, and colony color on selected differential media, and (iv) two nonclinical Enterobacter asburiae isolates, which show some phenotypic characteristics similar to those of Cronobacter spp. The set consists of human (n = 10), food (n = 11), and environmental (n = 9) isolates. Analysis of partial 16S rDNA sequence and seven-gene multilocus sequence typing data allowed for reliable identification of these isolates to species and identification of 14 isolates as sequence type 4, which had previously been shown to be the most common C. sakazakii sequence type associated with neonatal meningitis. Phenotypic characterization was carried out with API 20E and API 32E test strips and streaking on two selective chromogenic agars; isolates were also assessed for sorbitol fermentation and growth at 45°C. Although these strategies typically produced the same classification as sequence-based strategies, based on a panel of four biochemical tests, one C. sakazakii isolate yielded inconclusive data and one was classified as C. malonaticus. EcoRI automated ribotyping and pulsed-field gel electrophoresis (PFGE) with XbaI separated the set into 23 unique ribotypes and 30 unique PFGE types, respectively, indicating subtype diversity within the set. Subtype and source data for the collection are publicly available in the PathogenTracker database (www. pathogentracker. net), which allows for continuous updating of information on the set, including links to publications that include information on isolates from this collection.",2013-01-01 +31304272,"FoodOn: a harmonized food ontology to increase global food traceability, quality control and data integration.","The construction of high capacity data sharing networks to support increasing government and commercial data exchange has highlighted a key roadblock: the content of existing Internet-connected information remains siloed due to a multiplicity of local languages and data dictionaries. This lack of a digital lingua franca is obvious in the domain of human food as materials travel from their wild or farm origin, through processing and distribution chains, to consumers. Well defined, hierarchical vocabulary, connected with logical relationships-in other words, an ontology-is urgently needed to help tackle data harmonization problems that span the domains of food security, safety, quality, production, distribution, and consumer health and convenience. FoodOn (http://foodon.org) is a consortium-driven project to build a comprehensive and easily accessible global farm-to-fork ontology about food, that accurately and consistently describes foods commonly known in cultures from around the world. FoodOn addresses food product terminology gaps and supports food traceability. Focusing on human and domesticated animal food description, FoodOn contains animal and plant food sources, food categories and products, and other facets like preservation processes, contact surfaces, and packaging. Much of FoodOn's vocabulary comes from transforming LanguaL, a mature and popular food indexing thesaurus, into a World Wide Web Consortium (W3C) OWL Web Ontology Language-formatted vocabulary that provides system interoperability, quality control, and software-driven intelligence. FoodOn compliments other technologies facilitating food traceability, which is becoming critical in this age of increasing globalization of food networks.",2018-12-18 +32061453,Single and repeated ketamine treatment induces perfusion changes in sensory and limbic networks in major depressive disorder.,"Ketamine infusion therapy can produce fast-acting antidepressant effects in patients with major depressive disorder (MDD). Yet, how single and repeated ketamine treatment induces brain systems-level neuroplasticity underlying symptom improvement is unknown. Advanced multiband imaging (MB) pseudo-continuous arterial spin labeling (pCASL) perfusion MRI data was acquired from patients with treatment resistant depression (TRD) (N = 22, mean age=35.2 ± 9.95 SD, 27% female) at baseline, and 24 h after receiving single, and four subanesthetic (0.5 mg/kg) intravenous ketamine infusions. Changes in global and regional CBF were compared across time points, and relationships with overall mood, anhedonia and apathy were examined. Comparisons between patients at baseline and controls (N = 18, mean age=36.11 ± 14.5 SD, 57% female) established normalization of treatment effects. Results showed increased regional CBF in the cingulate and primary and higher-order visual association regions after first ketamine treatment. Baseline CBF in the fusiform, and acute changes in CBF in visual areas were related to symptom improvement after single and repeated ketamine treatment, respectively. In contrast, after serial infusion therapy, decreases in regional CBF were observed in the bilateral hippocampus and right insula with ketamine treatment. Findings demonstrate that neurophysiological changes occurring with single and repeated ketamine treatment follow both a regional and temporal pattern including sensory and limbic regions. Initial changes are observed in the posterior cingulate and precuneus and primary and higher-order visual areas, which relate to clinical responses. However, repeated exposure to ketamine, though not relating to clinical outcome, appears to engage deeper limbic structures and insula. ClinicalTrials.gov: Biomarkers of Fast Acting Therapies in Major Depression, https://clinicaltrials.gov/ct2/show/NCT02165449, NCT02165449.",2020-02-12 +31596140,"Chemical composition and potentiation of insecticidal and fungicidal activities of Citrus trifoliata L. fruits essential oil against Spodoptera littoralis, Fusarium oxysporum and Fusarium solani via nano-cubosomes.","Development of natural nano-based plant-protection formulations represents an emerging phenomenon that has been widely improved for crops protection and for enhancing the efficiency and safety of pesticides. In the present study we isolated the essential oil from the fruits of Citrus trifoliata L. and investigated it using gas chromatography-mass spectrometry analysis. Limonene (78.46%) was the major component followed by β-Myrcene (7.94%) and Caryophyllene (4.20%). Citrus trifoliata essential oil (CTEO) loaded nano-cubosomes were successfully prepared by the emulsification technique. The insecticidal and fungicidal activities of formulated CTEO nano-cubosomes and unformulated CTEO were tested. While both of them exhibited substantial activities, CTEO nano-cubosomes were more effective than unformulated oil. It is the first time to formulate CTEO in nano-cubosomes and examine their insecticidal and fungicidal activities. In light of the current study, CTEO as it is or as nano-cubosomes is recommended as a promising candidate for pest and fungal pathogens control.Supplemental data for this article can be accessed at https://doi.org/10.1080/14786419.2019.1675063.",2019-10-09 +31233342,Identification of Key Biomarkers and Potential Molecular Mechanisms in Renal Cell Carcinoma by Bioinformatics Analysis.,"Renal cell carcinoma (RCC) is the most common form of kidney cancer, caused by renal epithelial cells. RCC remains to be a challenging public health problem worldwide. Metastases that are resistant to radiotherapy and chemotherapy are the major cause of death from cancer. However, the underlying molecular mechanism regulating the metastasis of RCC is poorly known. Publicly available databases of RCC were obtained from Gene Expression Omnibus (GEO) database. The differentially expressed genes (DEGs) were identified using GEO2R analysis, whereas the Gene Ontology (GO) analysis and Kyoto Encyclopedia of Genes and Genomes (KEGG) analysis were performed by Gene Set Enrichment Analysis (GSEA) and Metascape. Protein-protein interaction (PPI) network of DEGs was analyzed by STRING online database, and Cytoscape software was used for visualizing PPI network. Survival analysis of hub genes was conducted using GEPIA online database. The expression levels of hub genes were investigated from The Human Protein Atlas online database and GEPIA online database. Finally, the comparative toxicogenomics database (CTD; http://ctdbase.org) was used to identify hub genes associated with tumor or metastasis. We identified 229 DEGs comprising 135 downregulated genes and 94 upregulated genes. Functional analysis revealed that these DEGs were associates with cell recognition, regulation of immune, negative regulation of adaptive immune response, and other functions. And these DEGs mainly related to P53 signaling pathway, cytokine-cytokine receptor interaction, Natural killer cell mediated cytotoxicity, and other pathways are involved. Ten genes were identified as hub genes through module analyses in the PPI network. Finally, survival analysis of 10 hub genes was conducted, which showed that the MMP2 (matrix metallo peptidase 2), DCN, COL4A1, CASR (calcium sensing receptor), GPR4 (G protein-coupled receptor 4), UTS2 (urotensin 2), and LDLR (low density lipoprotein receptor) genes were significant for survival. In this study, the DEGs between RCC and metastatic RCC were analyzed, which assist us in systematically understanding the pathogeny underlying metastasis of RCC. The MMP2, DCN, COL4A1, CASR, GPR4, UTS2, and LDLR genes might be used as potential targets to improve diagnosis and immunotherapy biomarkers for RCC.",2019-06-24 +31907209,pVACtools: A Computational Toolkit to Identify and Visualize Cancer Neoantigens.,"Identification of neoantigens is a critical step in predicting response to checkpoint blockade therapy and design of personalized cancer vaccines. This is a cross-disciplinary challenge, involving genomics, proteomics, immunology, and computational approaches. We have built a computational framework called pVACtools that, when paired with a well-established genomics pipeline, produces an end-to-end solution for neoantigen characterization. pVACtools supports identification of altered peptides from different mechanisms, including point mutations, in-frame and frameshift insertions and deletions, and gene fusions. Prediction of peptide:MHC binding is accomplished by supporting an ensemble of MHC Class I and II binding algorithms within a framework designed to facilitate the incorporation of additional algorithms. Prioritization of predicted peptides occurs by integrating diverse data, including mutant allele expression, peptide binding affinities, and determination whether a mutation is clonal or subclonal. Interactive visualization via a Web interface allows clinical users to efficiently generate, review, and interpret results, selecting candidate peptides for individual patient vaccine designs. Additional modules support design choices needed for competing vaccine delivery approaches. One such module optimizes peptide ordering to minimize junctional epitopes in DNA vector vaccines. Downstream analysis commands for synthetic long peptide vaccines are available to assess candidates for factors that influence peptide synthesis. All of the aforementioned steps are executed via a modular workflow consisting of tools for neoantigen prediction from somatic alterations (pVACseq and pVACfuse), prioritization, and selection using a graphical Web-based interface (pVACviz), and design of DNA vector-based vaccines (pVACvector) and synthetic long peptide vaccines. pVACtools is available at http://www.pvactools.org.",2020-01-06 +30109135,Electronic medical record: data collection and reporting for spinal cord injury.,"

Study design

Presentation of implementation of International Spinal Cord Injury (SCI) Data Sets, International Standards for Neurological Classification of SCI (ISNCSCI), and other structured SCI tools in to the Electronic Medical Record (EMR) Epic.

Objectives

To describe the implementation of SCI tools in Epic at Rigshospitalet, University of Hospital, Capital Region of Denmark, and the ambitions for the future development of SCI related structured data and their reporting in the Epic EMR to be able to standardize data collection to facilitate research within institutions and collaboratively with other institutions locally and globally.

Setting

Denmark and United States of America.

Methods

The general content of the EMR Epic and the SCI-specific structured data implemented are described as well as the tools for reporting.

Results

The ISNCSCI is made available via access to http://isncscialgorithm.azurewebsites.net/. After filling in the test data on the website, one can save the completed form as an image within the patient's chart. The International SCI Core Data Set and 13 International SCI Basic Data Sets (Table 1) are nearly completely implemented in the Danish version of Epic as SmartForms. In addition, 14 functional measures, including the Spinal Cord Independence Measure III, are implemented as flowsheets (Table 2).

Conclusions

The possibility of entering international recognized structured data into the EMR gives better possibility for data sharing across SCI centers worldwide.

Sponsorship

Gianna Maria Rodriguez, Stacey Cohen, and Fin Biering-Sørensen are users of Epic, but have no economic relationship with Epic. Kelly Tausk and Josh Martin are employees of Epic.",2018-08-07 +30010718,A parallel computational framework for ultra-large-scale sequence clustering analysis.,"

Motivation

The rapid development of sequencing technology has led to an explosive accumulation of genomic data. Clustering is often the first step to be performed in sequence analysis. However, existing methods scale poorly with respect to the unprecedented growth of input data size. As high-performance computing systems are becoming widely accessible, it is highly desired that a clustering method can easily scale to handle large-scale sequence datasets by leveraging the power of parallel computing.

Results

In this paper, we introduce SLAD (Separation via Landmark-based Active Divisive clustering), a generic computational framework that can be used to parallelize various de novo operational taxonomic unit (OTU) picking methods and comes with theoretical guarantees on both accuracy and efficiency. The proposed framework was implemented on Apache Spark, which allows for easy and efficient utilization of parallel computing resources. Experiments performed on various datasets demonstrated that SLAD can significantly speed up a number of popular de novo OTU picking methods and meanwhile maintains the same level of accuracy. In particular, the experiment on the Earth Microbiome Project dataset (∼2.2B reads, 437 GB) demonstrated the excellent scalability of the proposed method.

Availability and implementation

Open-source software for the proposed method is freely available at https://www.acsu.buffalo.edu/~yijunsun/lab/SLAD.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +32265329,Visualizing Association of the Retroviral Gag Protein with Unspliced Viral RNA in the Nucleus. ,"Packaging of genomic RNA (gRNA) by retroviruses is essential for infectivity, yet the subcellular site of the initial interaction between the Gag polyprotein and gRNA remains poorly defined. Because retroviral particles are released from the plasma membrane, it was previously thought that Gag proteins initially bound to gRNA in the cytoplasm or at the plasma membrane. However, the Gag protein of the avian retrovirus Rous sarcoma virus (RSV) undergoes active nuclear trafficking, which is required for efficient gRNA encapsidation (L. Z. Scheifele, R. A. Garbitt, J. D. Rhoads, and L. J. Parent, Proc Natl Acad Sci U S A 99:3944-3949, 2002, https://doi.org/10.1073/pnas.062652199; R. Garbitt-Hirst, S. P. Kenney, and L. J. Parent, J Virol 83:6790-6797, 2009, https://doi.org/10.1128/JVI.00101-09). These results raise the intriguing possibility that the primary contact between Gag and gRNA might occur in the nucleus. To examine this possibility, we created a RSV proviral construct that includes 24 tandem repeats of MS2 RNA stem-loops, making it possible to track RSV viral RNA (vRNA) in live cells in which a fluorophore-conjugated MS2 coat protein is coexpressed. Using confocal microscopy, we observed that both wild-type Gag and a nuclear export mutant (Gag.L219A) colocalized with vRNA in the nucleus. In live-cell time-lapse images, the wild-type Gag protein trafficked together with vRNA as a single ribonucleoprotein (RNP) complex in the nucleoplasm near the nuclear periphery, appearing to traverse the nuclear envelope into the cytoplasm. Furthermore, biophysical imaging methods suggest that Gag and the unspliced vRNA physically interact in the nucleus. Taken together, these data suggest that RSV Gag binds unspliced vRNA to export it from the nucleus, possibly for packaging into virions as the viral genome.IMPORTANCE Retroviruses cause severe diseases in animals and humans, including cancer and acquired immunodeficiency syndromes. To propagate infection, retroviruses assemble new virus particles that contain viral proteins and unspliced vRNA to use as gRNA. Despite the critical requirement for gRNA packaging, the molecular mechanisms governing the identification and selection of gRNA by the Gag protein remain poorly understood. In this report, we demonstrate that the Rous sarcoma virus (RSV) Gag protein colocalizes with unspliced vRNA in the nucleus in the interchromatin space. Using live-cell confocal imaging, RSV Gag and unspliced vRNA were observed to move together from inside the nucleus across the nuclear envelope, suggesting that the Gag-gRNA complex initially forms in the nucleus and undergoes nuclear export into the cytoplasm as a viral ribonucleoprotein (vRNP) complex.",2020-04-07 +27171405,Generating Gene Ontology-Disease Inferences to Explore Mechanisms of Human Disease at the Comparative Toxicogenomics Database.,"Strategies for discovering common molecular events among disparate diseases hold promise for improving understanding of disease etiology and expanding treatment options. One technique is to leverage curated datasets found in the public domain. The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) manually curates chemical-gene, chemical-disease, and gene-disease interactions from the scientific literature. The use of official gene symbols in CTD interactions enables this information to be combined with the Gene Ontology (GO) file from NCBI Gene. By integrating these GO-gene annotations with CTD's gene-disease dataset, we produce 753,000 inferences between 15,700 GO terms and 4,200 diseases, providing opportunities to explore presumptive molecular underpinnings of diseases and identify biological similarities. Through a variety of applications, we demonstrate the utility of this novel resource. As a proof-of-concept, we first analyze known repositioned drugs (e.g., raloxifene and sildenafil) and see that their target diseases have a greater degree of similarity when comparing GO terms vs. genes. Next, a computational analysis predicts seemingly non-intuitive diseases (e.g., stomach ulcers and atherosclerosis) as being similar to bipolar disorder, and these are validated in the literature as reported co-diseases. Additionally, we leverage other CTD content to develop testable hypotheses about thalidomide-gene networks to treat seemingly disparate diseases. Finally, we illustrate how CTD tools can rank a series of drugs as potential candidates for repositioning against B-cell chronic lymphocytic leukemia and predict cisplatin and the small molecule inhibitor JQ1 as lead compounds. The CTD dataset is freely available for users to navigate pathologies within the context of extensive biological processes, molecular functions, and cellular components conferred by GO. This inference set should aid researchers, bioinformaticists, and pharmaceutical drug makers in finding commonalities in disease mechanisms, which in turn could help identify new therapeutics, new indications for existing pharmaceuticals, potential disease comorbidities, and alerts for side effects.",2016-05-12 +27918179,Mining big data to extract patterns and predict real-life outcomes.,"This article aims to introduce the reader to essential tools that can be used to obtain insights and build predictive models using large data sets. Recent user proliferation in the digital environment has led to the emergence of large samples containing a wealth of traces of human behaviors, communication, and social interactions. Such samples offer the opportunity to greatly improve our understanding of individuals, groups, and societies, but their analysis presents unique methodological challenges. In this tutorial, we discuss potential sources of such data and explain how to efficiently store them. Then, we introduce two methods that are often employed to extract patterns and reduce the dimensionality of large data sets: singular value decomposition and latent Dirichlet allocation. Finally, we demonstrate how to use dimensions or clusters extracted from data to build predictive models in a cross-validated way. The text is accompanied by examples of R code and a sample data set, allowing the reader to practice the methods discussed here. A companion website (http://dataminingtutorial.com) provides additional learning resources. (PsycINFO Database Record",2016-12-01 +32041836,Extensive Clonal Branching Shapes the Evolutionary History of High-Risk Pediatric Cancers.,"Darwinian evolution of tumor cells remains underexplored in childhood cancer. We here reconstruct the evolutionary histories of 56 pediatric primary tumors, including 24 neuroblastomas, 24 Wilms tumors, and 8 rhabdomyosarcomas. Whole-genome copy-number and whole-exome mutational profiling of multiple regions per tumor were performed, followed by clonal deconvolution to reconstruct a phylogenetic tree for each tumor. Overall, 88% of the tumors exhibited genetic variation among primary tumor regions. This variability typically emerged through collateral phylogenetic branching, leading to spatial variability in the distribution of more than 50% (96/173) of detected diagnostically informative genetic aberrations. Single-cell sequencing of 547 individual cancer cells from eight solid pediatric tumors confirmed branching evolution to be a fundamental underlying principle of genetic variation in all cases. Strikingly, cell-to-cell genetic diversity was almost twice as high in aggressive compared with clinically favorable tumors (median Simpson index of diversity 0.45 vs. 0.88; P = 0.029). Similarly, a comparison of multiregional sampling data from a total of 274 tumor regions showed that new phylogenetic branches emerge at a higher frequency per sample and carry a higher mutational load in high-risk than in low-risk tumors. Timelines based on spatial genetic variation showed that the mutations most influencing relapse risk occur at initiation of clonal expansion in neuroblastoma and rhabdomyosarcoma, whereas in Wilms tumor, they are late events. Thus, from an evolutionary standpoint, some high-risk childhood cancers are born bad, whereas others grow worse over time. SIGNIFICANCE: Different pediatric cancers with a high risk of relapse share a common generic pattern of extensively branching evolution of somatic mutations. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/7/1512/F1.large.jpg.",2020-02-10 +24678985,ClearedLeavesDB: an online database of cleared plant leaf images.,"

Background

Leaf vein networks are critical to both the structure and function of leaves. A growing body of recent work has linked leaf vein network structure to the physiology, ecology and evolution of land plants. In the process, multiple institutions and individual researchers have assembled collections of cleared leaf specimens in which vascular bundles (veins) are rendered visible. In an effort to facilitate analysis and digitally preserve these specimens, high-resolution images are usually created, either of entire leaves or of magnified leaf subsections. In a few cases, collections of digital images of cleared leaves are available for use online. However, these collections do not share a common platform nor is there a means to digitally archive cleared leaf images held by individual researchers (in addition to those held by institutions). Hence, there is a growing need for a digital archive that enables online viewing, sharing and disseminating of cleared leaf image collections held by both institutions and individual researchers.

Description

The Cleared Leaf Image Database (ClearedLeavesDB), is an online web-based resource for a community of researchers to contribute, access and share cleared leaf images. ClearedLeavesDB leverages resources of large-scale, curated collections while enabling the aggregation of small-scale collections within the same online platform. ClearedLeavesDB is built on Drupal, an open source content management platform. It allows plant biologists to store leaf images online with corresponding meta-data, share image collections with a user community and discuss images and collections via a common forum. We provide tools to upload processed images and results to the database via a web services client application that can be downloaded from the database.

Conclusions

We developed ClearedLeavesDB, a database focusing on cleared leaf images that combines interactions between users and data via an intuitive web interface. The web interface allows storage of large collections and integrates with leaf image analysis applications via an open application programming interface (API). The open API allows uploading of processed images and other trait data to the database, further enabling distribution and documentation of analyzed data within the community. The initial database is seeded with nearly 19,000 cleared leaf images representing over 40 GB of image data. Extensible storage and growth of the database is ensured by using the data storage resources of the iPlant Discovery Environment. ClearedLeavesDB can be accessed at http://clearedleavesdb.org.",2014-03-28 +,Species specific approach to the development of web-based antimicrobial peptides prediction tool for cattle,"Antimicrobial peptides (AMPs) are the defence molecules of the host gaining extensive attention worldwide as these are natural alternative to chemical antibiotics. Machine learning techniques have capabilities to analyse large biological data for detection of hidden pattern in understanding complex underlying biological problems. Presently, development of resistance to chemical antibiotics in cattle is unsolved and growing problem which needs immediate attention. In the present study, attempt was made to apply machine learning algorithms such as Artificial Neuron Network (ANN) and Support Vector Machine (SVM). It was found that performance of SVM based models for in silico prediction/identification of AMPs of cattle is superior than ANN. A total of 99 AMPs related to cattle collected from various databases and published literature were taken for this study. N-terminus residues, C-terminus residues and full sequences were used for model development and identification/prediction. It was found that best SVM models in this case for C-terminus residues, N-terminus residues and full sequence were with kernels Radial Basis Function (RBF), Sigmoid and RBF with accuracy as 95%, 99% and 97%, respectively. These SVM models were implemented on web server and made available to users at http://cabin.iasri.res.in/amp/ for classification/prediction of novel AMPs of cattle. This computational server can accelerate novel AMP discovery from whole genome proteins of a given cattle species for bulk discovery with very high accuracy. This is the first successful attempt for development of species specific approach for prediction/classification of AMPs, which may be used further as a model in other species as well.",2015-02-01 +27613420,PCDDB: new developments at the Protein Circular Dichroism Data Bank.,"The Protein Circular Dichroism Data Bank (PCDDB) has been in operation for more than 5 years as a public repository for archiving circular dichroism spectroscopic data and associated bioinformatics and experimental metadata. Since its inception, many improvements and new developments have been made in data display, searching algorithms, data formats, data content, auxillary information, and validation techniques, as well as, of course, an increase in the number of holdings. It provides a site (http://pcddb.cryst.bbk.ac.uk) for authors to deposit experimental data as well as detailed information on methods and calculations associated with published work. It also includes links for each entry to bioinformatics databases. The data are freely available to accessors either as single files or as complete data bank downloads. The PCDDB has found broad usage by the structural biology, bioinformatics, analytical and pharmaceutical communities, and has formed the basis for new software and methods developments.",2016-09-08 +29796383,HaloDom: a new database of halophiles across all life domains.,"

Background

Halophilic organisms may thrive in or tolerate high salt concentrations. They have been studied for decades and a considerable number of papers reporting new halophilic species are being published every year. However, an extensive collection of these salt-loving organisms does not exist nowadays. Halophilic life forms have representatives from all three life domains, Archaea, Bacteria and Eukarya. The purpose of this study was to search for all documented halophilic species in the scientific literature and accommodate this information in the form of an online database.

Results

We recorded more than 1000 halophilic species from the scientific literature. From these, 21.9% belong to Archaea, 50.1% to Bacteria and 27.9% to Eukaryotes. Our records contain basic information such as the salinity that a particular organism was found, its taxonomy and genomic information via NCBI and other links. The online database named ""HaloDom"" can be accessed at http://www.halodom.bio.auth.gr.

Conclusions

Over the last few years, data on halophiles are growing fast. Compared to previous efforts, this new halophiles database expands its coverage to all life domains and offers a valuable reference system for studies in biotechnology, early life evolution and comparative genomics.",2018-01-15 +33828767,"Eye movements in developing readers: From basic research to classroom application. Parts of symposium 7 at the 20th European Conference on Eye Movements in Alicante, September 21, 2019. ","Eye-movement recording has made it possible to achieve a detailed understanding of oculomotor and cognitive behavior during reading and of changes in this behavior across the stages of reading development. Given that many students struggle to attain even basic reading skills, a logical extension of eye-movement research involves its applications in both the diagnostic and instructional areas of reading education. The focus of this symposium is on eye-movement research with potential implications for reading education. Christian Vorstius will review results from a large-scale longitudinal study that examined the development of spatial parameters in fixation patterns within three cohorts, ranging from elementary to early middle school, discussing an early development window and its potential influences on reading ability and orthography. Ronan Reilly and Xi Fan will present longitudinal data related to developmental changes in reading-related eye movements in Chinese. Their findings are indicative of increasing sensitivity to lexical predictability and sentence coherence. The authors suggest that delays in the emergence of these reading behaviors may signal early an increased risk of reading difficulty. Jochen Laubrock's presentation will focus on perceptual span development and explore dimensions of this phenomenon with potential educational implications, such as the modulation of perceptual span in relation to cognitive load, as well as preview effects during oral and silent reading --and while reading comic books. Video stream: https://vimeo.com/362645755.",2019-11-25 +30329098,LncBook: a curated knowledgebase of human long non-coding RNAs.,"Long non-coding RNAs (lncRNAs) have significant functions in a wide range of important biological processes. Although the number of known human lncRNAs has dramatically increased, they are poorly annotated, posing great challenges for better understanding their functional significance and elucidating their complex functioning molecular mechanisms. Here, we present LncBook (http://bigd.big.ac.cn/lncbook), a curated knowledgebase of human lncRNAs that features a comprehensive collection of human lncRNAs and systematic curation of lncRNAs by multi-omics data integration, functional annotation and disease association. In the present version, LncBook houses a large number of 270 044 lncRNAs and includes 1867 featured lncRNAs with 3762 lncRNA-function associations. It also integrates an abundance of multi-omics data from expression, methylation, genome variation and lncRNA-miRNA interaction. Also, LncBook incorporates 3772 experimentally validated lncRNA-disease associations and further identifies a total of 97 998 lncRNAs that are putatively disease-associated. Collectively, LncBook is dedicated to the integration and curation of human lncRNAs as well as their associated data and thus bears great promise to serve as a valuable knowledgebase for worldwide research communities.",2019-01-01 +30118600,MetaboGroup S: A Group Entropy-Based Web Platform for Evaluating Normalization Methods in Blood Metabolomics Data from Maintenance Hemodialysis Patients.,"Because of inevitable and complicated signal variations in LC-MSn-based nontargeted metabolomics, normalization of metabolites data is a highly recommended procedure to assist in improving accuracies in metabolic profiling and discovery of potential biomarkers. Despite various normalization methods having been developed and applied for processing these data sets, it is still difficult to assess their performance. Moreover, such methods are elusive and difficult to choose for users, especially those without bioinformatics training. In this study, we present a powerful and user-friendly web platform, named MetaboGroup S, for comparison and evaluation of seven popular normalization methods and provide an optimal one automatically for end users based on the group entropies of every sample data point. For examination and application of this tool, we analyzed a complex clinical human data set from maintenance hemodialysis patients with erythrin resistance. Metabolite peaks (11 027) were extracted from the experimental data and then imported into this platform; the entire analysis process was completed sequentially within 5 min. To further test the performance and universality of MetaboGroup S, we analyzed two more published data sets including a nuclear magnetic resonance (NMR) data set on this platform. The results indicated that the method with a lower intragroup entropy and higher intergroup entropy would be preferable. In addition, MetaboGroup S can be quite conveniently operated by users and does not require any profound computational expertise or background for scientists in many fields. MetaboGroup S is freely available at https://omicstools.shinyapps.io/MetaboGroupSapp/ .",2018-08-31 +30984618,OSlms: A Web Server to Evaluate the Prognostic Value of Genes in Leiomyosarcoma.,"The availability of transcriptome data and clinical annotation offers the opportunity to identify prognosis biomarkers in cancer. However, efficient online prognosis analysis tools are still lacking. Herein, we developed a user-friendly web server, namely Online consensus Survival analysis of leiomyosarcoma (OSlms), to centralize published gene expression data and clinical datasets of leiomyosarcoma (LMS) patients from The Cancer Genome Atlas (TCGA) and Gene Expression Omnibus (GEO). OSlms comprises of a total of 268 samples from three independent datasets, and employs the Kaplan Meier survival plot with hazard ratio (HR) and log rank test to estimate the prognostic potency of genes of interests for LMS patients. Using OSlms, clinicians and basic researchers could determine the prognostic significance of genes of interests and get opportunities to identify novel potential important molecules for LMS. OSlms is free and publicly accessible at http://bioinfo.henu.edu.cn/LMS/LMSList.jsp.",2019-03-29 +33431059,3DStructGen: an interactive web-based 3D structure generation for non-periodic molecule and crystal.,"

Background

The increasing number of organic and inorganic structures promotes the development of the ""Big Data"" in chemistry and material science, and raises the need for cross-platform and web-based methods to search, view and edit structures. Many web-based three-dimensional (3D) structure tools have been developed for displaying existing models, building new models, and preparing initial input files for external calculations. But few of these tools can deal with crystal structures.

Results

We developed a user-friendly and versatile program based on standard web techniques, such as Hyper Text Markup Language 5 (HTML5), Cascade Style Sheet (CSS) and JavaScript. Both non-periodic organic molecule and crystal structure can be visualized, built and edited interactively. The atom, bond, angle and dihedral in a molecule can be viewed and modified using sample mouse operations. A wide range of cheminformatics algorithms for crystal structure are provided, including cleaving surfaces, establishing vacuum layers, and building supercells. Four displayed styles, namely ""Primitive cell"", ""Original"", ""In-cell"" and ""Packing"" can be used to visualize a unit cell. Additionally, the initial input files for Vienna Ab-initio Simulation Package (VASP) and Gaussian can be obtained by interacting with dialog boxes in 3DStructGen.

Conclusions

3DStructGen is a highly platform-independent program. It can provide web service independently or can be integrated into other web platforms. Other than local desktop software, it does not require any additional effort to install the system but a web browser supporting HTML5. 3DStructGen may play a valuable role in online chemistry education and pre-processing of quantum calculations. The program has been released under MIT open-source license and is available on: https://matgen.nscc-gz.cn/Tools.html.",2020-01-23 +30918940,parMATT: parallel multiple alignment of protein 3D-structures with translations and twists for distributed-memory systems.,"MOTIVATION:Accurate structural alignment of proteins is crucial at studying structure-function relationship in evolutionarily distant homologues. Various software tools were proposed to align multiple protein 3D-structures utilizing one CPU and thus are of limited productivity at large-scale analysis of protein families/superfamilies. RESULTS:The parMATT is a hybrid MPI/pthreads/OpenMP parallel re-implementation of the MATT algorithm to align multiple protein 3D-structures by allowing translations and twists. The parMATT can be faster than MATT on a single multi-core CPU, and provides a much greater speedup when executed on distributed-memory systems, i.e. computing clusters and supercomputers hosting memory-independent computing nodes. The most computationally demanding steps of the MATT algorithm-the initial construction of pairwise alignments between all input structures and further iterative progression of the multiple alignment-were parallelized using MPI and pthreads, and the concluding refinement step was optimized by introducing the OpenMP support. The parMATT can significantly accelerate the time-consuming process of building a multiple structural alignment from a large set of 3D-records of homologous proteins. AVAILABILITY AND IMPLEMENTATION:The source code is available at https://biokinet.belozersky.msu.ru/parMATT. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-11-01 +31047152,Removal of false positive features to generate authentic peak table for high-resolution mass spectrometry-based metabolomics study.,"In metabolomics research, false positive features from non-sample sources and noises usually exist in the peak table, they will make the results of screening differential metabolites or biomarkers unreliable. In this study, a method to remove false positive features (rFPF) was developed to improve the quality of the peak table. rFPF recognizes real peak profiles based on the information entropy and statistical correlation, and eliminates false positive features from non-sample sources and noises. A standard mixture with 42 standards (14 isotopic labeled internal standards and 28 common standards) and a urine sample were applied to evaluate the effectiveness of the rFPF method. The analysis results of metabolite standards showed that more than 92% false positive features were removed by rFPF, but target standards completely remained. The analysis results of urine sample showed that the number of features was significantly reduced from 7182 to 2522. Interestingly, 98% of the identified metabolites remained after removing false positive features. The proposed rFPF shows great prospects as a new data handling method for metabolomics studies. The MATLAB code and data can be downloaded from http://app.ifc.dicp.ac.cn/Confirmation/Authentication.html.",2019-04-10 +31974522,ISOGO: Functional annotation of protein-coding splice variants.,"The advent of RNA-seq technologies has switched the paradigm of genetic analysis from a genome to a transcriptome-based perspective. Alternative splicing generates functional diversity in genes, but the precise functions of many individual isoforms are yet to be elucidated. Gene Ontology was developed to annotate gene products according to their biological processes, molecular functions and cellular components. Despite a single gene may have several gene products, most annotations are not isoform-specific and do not distinguish the functions of the different proteins originated from a single gene. Several approaches have tried to automatically annotate ontologies at the isoform level, but this has shown to be a daunting task. We have developed ISOGO (ISOform + GO function imputation), a novel algorithm to predict the function of coding isoforms based on their protein domains and their correlation of expression along 11,373 cancer patients. Combining these two sources of information outperforms previous approaches: it provides an area under precision-recall curve (AUPRC) five times larger than previous attempts and the median AUROC of assigned functions to genes is 0.82. We tested ISOGO predictions on some genes with isoform-specific functions (BRCA1, MADD,VAMP7 and ITSN1) and they were coherent with the literature. Besides, we examined whether the main isoform of each gene -as predicted by APPRIS- was the most likely to have the annotated gene functions and it occurs in 99.4% of the genes. We also evaluated the predictions for isoform-specific functions provided by the CAFA3 challenge and results were also convincing. To make these results available to the scientific community, we have deployed a web application to consult ISOGO predictions (https://biotecnun.unav.es/app/isogo). Initial data, website link, isoform-specific GO function predictions and R code is available at https://gitlab.com/icassol/isogo.",2020-01-23 +29126295,MoonProt 2.0: an expansion and update of the moonlighting proteins database.,"MoonProt 2.0 (http://moonlightingproteins.org) is an updated, comprehensive and open-access database storing expert-curated annotations for moonlighting proteins. Moonlighting proteins contain two or more physiologically relevant distinct functions performed by a single polypeptide chain. Here, we describe developments in the MoonProt website and database since our previous report in the Database Issue of Nucleic Acids Research. For this V 2.0 release, we expanded the number of proteins annotated to 370 and modified several dozen protein annotations with additional or updated information, including more links to protein structures in the Protein Data Bank, compared with the previous release. The new entries include more examples from humans and several model organisms, more proteins involved in disease, and proteins with different combinations of functions. The updated web interface includes a search function using BLAST to enable users to search the database for proteins that share amino acid sequence similarity with a protein of interest. The updated website also includes additional background information about moonlighting proteins and an expanded list of links to published articles about moonlighting proteins.",2018-01-01 +30043113,Classification of samples from NMR-based metabolomics using principal components analysis and partial least squares with uncertainty estimation.,"Recent progress in metabolomics has been aided by the development of analysis techniques such as gas and liquid chromatography coupled with mass spectrometry (GC-MS and LC-MS) and nuclear magnetic resonance (NMR) spectroscopy. The vast quantities of data produced by these techniques has resulted in an increase in the use of machine algorithms that can aid in the interpretation of this data, such as principal components analysis (PCA) and partial least squares (PLS). Techniques such as these can be applied to biomarker discovery, interlaboratory comparison, and clinical diagnoses. However, there is a lingering question whether the results of these studies can be applied to broader sets of clinical data, usually taken from different data sources. In this work, we address this question by creating a metabolomics workflow that combines a previously published consensus analysis procedure ( https://doi.org/10.1016/j.chemolab.2016.12.010 ) with PCA and PLS models using uncertainty analysis based on bootstrapping. This workflow is applied to NMR data that come from an interlaboratory comparison study using synthetic and biologically obtained metabolite mixtures. The consensus analysis identifies trusted laboratories, whose data are used to create classification models that are more reliable than without. With uncertainty analysis, the reliability of the classification can be rigorously quantified, both for data from the original set and from new data that the model is analyzing. Graphical abstract ᅟ.",2018-07-25 +29790910,SoS Notebook: an interactive multi-language data analysis environment.,"Motivation:Complex bioinformatic data analysis workflows involving multiple scripts in different languages can be difficult to consolidate, share and reproduce. An environment that streamlines the entire processes of data collection, analysis, visualization and reporting of such multi-language analyses is currently lacking. Results:We developed Script of Scripts (SoS) Notebook, a web-based notebook environment that allows the use of multiple scripting language in a single notebook, with data flowing freely within and across languages. SoS Notebook enables researchers to perform sophisticated bioinformatic analysis using the most suitable tools for different parts of the workflow, without the limitations of a particular language or complications of cross-language communications. Availability and implementation:SoS Notebook is hosted at http://vatlab.github.io/SoS/ and is distributed under a BSD license.",2018-11-01 +31762185,The effect on pregnant women's prenatal attachment of a nursing practice using the first and second Leopold's maneuvers.,"OBJECTIVE:This study aimed to determine the effect on pregnant women's prenatal attachment of a nursing practice using the first and second Leopold's maneuvers. METHOD:This experimental, randomized and controlled study was conducted in a pregnancy class of a training and research hospital in Kocaeli, Turkey from September 2016 to September 2017. Its sample included 100 pregnant women, 50 in the experimental group and 50 in the control group (https://www.random.org, accessed: 09.20.2016). Data were collected using a personal information form, the Prenatal Attachment Inventory (PAI) and the Fetal Position Awareness Scale (FPAS). The study offered education that included fetal development, the first and second Leopold's maneuvers, and Leopold's maneuvers were administered in the 28th week of the women's pregnancy and re-administered in the 32nd and 36th weeks of pregnancy. No intervention was administered to the control group in these weeks, but the scales were administered. RESULTS:The sociodemographic, obstetric, social support and baby-related characteristics of the groups were similar (p > .05). There were no statically significant differences between their mean PAI and FPAS scores in the 28th week of pregnancy (p > .05). The experimental group's mean PAI and the FPAS scores in the 32nd and 36th weeks of pregnancy were significantly higher than those of the control group (p < .01). CONCLUSION:The study concluded that Leopold's maneuvers affected the pregnant women's prenatal attachment levels.",2019-11-24 +30211284,Smart bandwidth allocation for next generation networks adopting software-defined network approach.,"This data article contains information on a new intelligent bandwidth allocation model for future network (Smart Allocation). The included data describe the topology of the network testbed and the obtained results. Obtained data show the effectiveness of the proposed model in comparison with the MAM and RDM bandwidth allocation models. In relation to the performances evaluation, a variety of flows are used such as: voice over IP (VoIP), video, HTTP, and Internet Control Message Protocol (ICMP). The evaluation criteria are: VoIP latency and jitter, Peak Signal to Noise Ratio (PSNR) video, retransmission video, goodput, HTTP response page, and the Round-Trip Time (RTT) ICMP delay. The presented data are extracted based on simulation.",2018-08-30 +29858800,The IPD Databases: Cataloguing and Understanding Allele Variants.,"The IMGT/HLA Database has provided a repository for information regarding polymorphism in the genes of the immune system since 1998. In 2003, it was absorbed into the Immuno Polymorphism Database (IPD). The IPD project has enabled us to create and maintain a platform for curating and publishing locus-specific databases which are either involved directly with, or relate to, the function of the Major Histocompatibility Complex across a number of species. In collaboration with specialist groups and nomenclature committees individual sections have been curated prior to their submission to the IPD for online publication. The IPD consists of five core databases, with the primary database being the IMGT/HLA Database. With the work of various nomenclature committees, the HLA Informatics Group, and alongside the European Bioinformatics Institute, we provide access to this data through the website ( http://www.ebi.ac.uk/ipd/ ) to the public domain. The IPD project continually develops new tools in conjunction with on-going scientific developments-such as Next-Generation Sequencing-to maintain efficiency and usability in response to user feedback and requests. The website is updated on a regular basis to ensure that new and confirmatory sequences are distributed to the immunogenetics community, as well as the wider research and clinical communities.",2018-01-01 +29069520,"RNArchitecture: a database and a classification system of RNA families, with a focus on structural information.","RNArchitecture is a database that provides a comprehensive description of relationships between known families of structured non-coding RNAs, with a focus on structural similarities. The classification is hierarchical and similar to the system used in the SCOP and CATH databases of protein structures. Its central level is Family, which builds on the Rfam catalog and gathers closely related RNAs. Consensus structures of Families are described with a reduced secondary structure representation. Evolutionarily related Families are grouped into Superfamilies. Similar structures are further grouped into Architectures. The highest level, Class, organizes families into very broad structural categories, such as simple or complex structured RNAs. Some groups at different levels of the hierarchy are currently labeled as 'unclassified'. The classification is expected to evolve as new data become available. For each Family with an experimentally determined three-diemsional (3D) structure(s), a representative one is provided. RNArchitecture also presents theoretical models of RNA 3D structure and is open for submission of structural models by users. Compared to other databases, RNArchitecture is unique in its focus on structure-based RNA classification, and in providing a platform for storing RNA 3D structure predictions. RNArchitecture can be accessed at http://iimcb.genesilico.pl/RNArchitecture/.",2018-01-01 +31247281,Revisiting cell-particle association in vitro: A quantitative method to compare particle performance.,"Nanoengineering has the potential to revolutionize medicine by designing drug delivery systems that are both efficacious and highly selective. Determination of the affinity between cell lines and nanoparticles is thus of central importance, both to enable comparison of particles and to facilitate prediction of in vivo response. Attempts to compare particle performance can be dominated by experimental artifacts (including settling effects) or variability in experimental protocol. Instead, qualitative methods are generally used, limiting the reusability of many studies. Herein, we introduce a mathematical model-based approach to quantify the affinity between a cell-particle pairing, independent of the aforementioned confounding artifacts. The analysis presented can serve as a quantitative metric of the stealth, fouling, and targeting performance of nanoengineered particles in vitro. We validate this approach using a newly created in vitro dataset, consisting of seven different disulfide-stabilized poly(methacrylic acid) particles ranging from ~100 to 1000 nm in diameter that were incubated with three different cell lines (HeLa, THP-1, and RAW 264.7). We further expanded this dataset through the inclusion of previously published data and use it to determine which of five mathematical models best describe cell-particle association. We subsequently use this model to perform a quantitative comparison of cell-particle association for cell-particle pairings in our dataset. This analysis reveals a more complex cell-particle association relationship than a simplistic interpretation of the data, which erroneously assigns high affinity for all cell lines examined to large particles. Finally, we provide an online tool (http://bionano.xyz/estimator), which allows other researchers to easily apply this modeling approach to their experimental results.",2019-06-24 +25009735,Unification of multi-species vertebrate anatomy ontologies for comparative biology in Uberon.,"

Background

Elucidating disease and developmental dysfunction requires understanding variation in phenotype. Single-species model organism anatomy ontologies (ssAOs) have been established to represent this variation. Multi-species anatomy ontologies (msAOs; vertebrate skeletal, vertebrate homologous, teleost, amphibian AOs) have been developed to represent 'natural' phenotypic variation across species. Our aim has been to integrate ssAOs and msAOs for various purposes, including establishing links between phenotypic variation and candidate genes.

Results

Previously, msAOs contained a mixture of unique and overlapping content. This hampered integration and coordination due to the need to maintain cross-references or inter-ontology equivalence axioms to the ssAOs, or to perform large-scale obsolescence and modular import. Here we present the unification of anatomy ontologies into Uberon, a single ontology resource that enables interoperability among disparate data and research groups. As a consequence, independent development of TAO, VSAO, AAO, and vHOG has been discontinued.

Conclusions

The newly broadened Uberon ontology is a unified cross-taxon resource for metazoans (animals) that has been substantially expanded to include a broad diversity of vertebrate anatomical structures, permitting reasoning across anatomical variation in extinct and extant taxa. Uberon is a core resource that supports single- and cross-species queries for candidate genes using annotations for phenotypes from the systematics, biodiversity, medical, and model organism communities, while also providing entities for logical definitions in the Cell and Gene Ontologies. THE ONTOLOGY RELEASE FILES ASSOCIATED WITH THE ONTOLOGY MERGE DESCRIBED IN THIS MANUSCRIPT ARE AVAILABLE AT: http://purl.obolibrary.org/obo/uberon/releases/2013-02-21/ CURRENT ONTOLOGY RELEASE FILES ARE AVAILABLE ALWAYS AVAILABLE AT: http://purl.obolibrary.org/obo/uberon/releases/",2014-05-19 +31165321,I-PINE web server: an integrative probabilistic NMR assignment system for proteins.,"Various methods for understanding the structural and dynamic properties of proteins rely on the analysis of their NMR chemical shifts. These methods require the initial assignment of NMR signals to particular atoms in the sequence of the protein, a step that can be very time-consuming. The probabilistic interaction network of evidence (PINE) algorithm for automated assignment of backbone and side chain chemical shifts utilizes a Bayesian probabilistic network model that analyzes sequence data and peak lists from multiple NMR experiments. PINE, which is one of the most popular and reliable automated chemical shift assignment algorithms, has been available to the protein NMR community for longer than a decade. We announce here a new web server version of PINE, called Integrative PINE (I-PINE), which supports more types of NMR experiments than PINE (including three-dimensional nuclear Overhauser enhancement and four-dimensional J-coupling experiments) along with more comprehensive visualization of chemical shift based analysis of protein structure and dynamics. The I-PINE server is freely accessible at http://i-pine.nmrfam.wisc.edu . Help pages and tutorial including browser capability are available at: http://i-pine.nmrfam.wisc.edu/instruction.html . Sample data that can be used for testing the web server are available at: http://i-pine.nmrfam.wisc.edu/examples.html .",2019-05-01 +29982559,A database of wild rice germplasm of Oryza rufipogon species complex from different agro-climatic zones of India. ,"Rice is a staple food for the people of Asia that supplies more than 50% of the food energy globally. It is widely accepted that the crop domestication process has left behind substantial useful genetic diversity in their wild progenitor species that has huge potential for developing crop varieties with enhanced resistance to an array of biotic and abiotic stresses. In this context, Oryza rufipogon, Oryza nivara and their intermediate types wild rice germplasm/s collected from diverse agro-climatic regions would provide a rich repository of genes and alleles that could be utilized for rice improvement using genomics-assisted breeding. Here we present a database of detailed information on 614 such diverse wild rice accessions collected from different agro-climatic zones of India, including 46 different morphological descriptors, complete passport data and DNA fingerprints. The information has been stored in a web-based database entitled 'Indian Wild Rice (IWR) Database'. The information provided in the IWR Database will be useful for the rice geneticists and breeders for improvement of rice cultivars for yield, quality and resilience to climate change.Database URL: http://nksingh.nationalprof.in: 8080/iwrdb/index.jsp.",2018-01-01 +29617745,TISSUES 2.0: an integrative web resource on mammalian tissue expression. ,"Physiological and molecular similarities between organisms make it possible to translate findings from simpler experimental systems—model organisms—into more complex ones, such as human. This translation facilitates the understanding of biological processes under normal or disease conditions. Researchers aiming to identify the similarities and differences between organisms at the molecular level need resources collecting multi-organism tissue expression data. We have developed a database of gene–tissue associations in human, mouse, rat and pig by integrating multiple sources of evidence: transcriptomics covering all four species and proteomics (human only), manually curated and mined from the scientific literature. Through a scoring scheme, these associations are made comparable across all sources of evidence and across organisms. Furthermore, the scoring produces a confidence score assigned to each of the associations. The TISSUES database (version 2.0) is publicly accessible through a user-friendly web interface and as part of the STRING app for Cytoscape. In addition, we analyzed the agreement between datasets, across and within organisms, and identified that the agreement is mainly affected by the quality of the datasets rather than by the technologies used or organisms compared. http://tissues.jensenlab.org/",2018-01-01 +23935057,"metabolicMine: an integrated genomics, genetics and proteomics data warehouse for common metabolic disease research.","Common metabolic and endocrine diseases such as diabetes affect millions of people worldwide and have a major health impact, frequently leading to complications and mortality. In a search for better prevention and treatment, there is ongoing research into the underlying molecular and genetic bases of these complex human diseases, as well as into the links with risk factors such as obesity. Although an increasing number of relevant genomic and proteomic data sets have become available, the quantity and diversity of the data make their efficient exploitation challenging. Here, we present metabolicMine, a data warehouse with a specific focus on the genomics, genetics and proteomics of common metabolic diseases. Developed in collaboration with leading UK metabolic disease groups, metabolicMine integrates data sets from a range of experiments and model organisms alongside tools for exploring them. The current version brings together information covering genes, proteins, orthologues, interactions, gene expression, pathways, ontologies, diseases, genome-wide association studies and single nucleotide polymorphisms. Although the emphasis is on human data, key data sets from mouse and rat are included. These are complemented by interoperation with the RatMine rat genomics database, with a corresponding mouse version under development by the Mouse Genome Informatics (MGI) group. The web interface contains a number of features including keyword search, a library of Search Forms, the QueryBuilder and list analysis tools. This provides researchers with many different ways to analyse, view and flexibly export data. Programming interfaces and automatic code generation in several languages are supported, and many of the features of the web interface are available through web services. The combination of diverse data sets integrated with analysis tools and a powerful query system makes metabolicMine a valuable research resource. The web interface makes it accessible to first-time users, whereas the Application Programming Interface (API) and web services provide convenient data access and tools for bioinformaticians. metabolicMine is freely available online at http://www.metabolicmine.org Database URL: http://www.metabolicmine.org.",2013-08-09 +29522157,PriLive: privacy-preserving real-time filtering for next-generation sequencing.,"Motivation:In next-generation sequencing, re-identification of individuals and other privacy-breaching strategies can be applied even for anonymized data. This also holds true for applications in which human DNA is acquired as a by-product, e.g. for viral or metagenomic samples from a human host. Conventional data protection strategies including cryptography and post-hoc filtering are only appropriate for the final and processed sequencing data. This can result in an insufficient level of data protection and a considerable time delay in the further analysis workflow. Results:We present PriLive, a novel tool for the automated removal of sensitive data while the sequencing machine is running. Thereby, human sequence information can be detected and removed before being completely produced. This facilitates the compliance with strict data protection regulations. The unique characteristic to cause almost no time delay for further analyses is also a clear benefit for applications other than data protection. Especially if the sequencing data are dominated by known background signals, PriLive considerably accelerates consequent analyses by having only fractions of input data. Besides these conceptual advantages, PriLive achieves filtering results at least as accurate as conventional post-hoc filtering tools. Availability and implementation:PriLive is open-source software available at https://gitlab.com/rki_bioinformatics/PriLive. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +29376339,Discovering Putative Peptides Encoded from Noncoding RNAs in Ribosome Profiling Data of Arabidopsis thaliana.,"Most noncoding RNAs are considered by their expression at low levels and as having a limited phylogenetic distribution in the cytoplasm, indicating that they may be only involved in specific biological processes. However, recent studies showed the protein-coding potential of ncRNAs, indicating that they might be a source of some special proteins. Although there are increasing noncoding RNAs identified to be able to code proteins, it is challenging to distinguish coding RNAs from previously annotated ncRNAs, and to detect the proteins from their translation. In this article, we designed a pipeline to identify these noncoding RNAs in Arabidopsis thaliana from three NCBI GEO data sets with coding potential and predict their translation products. 31 311 noncoding RNAs were predicted to be translated into peptides, and they showed lower conservation rate than common proteins. In addition, we built an interaction network between these peptides and annotated Arabidopsis proteins using BIPS, which included 69 peptides from noncoding RNAs. Peptides in the interaction network showed different characteristics from other noncoding RNA-derived peptides, and they participated in several crucial biological processes, such as photorespiration and stress-responses. All the information of putative ncPEPs and their interaction with proteins predicted above are finally integrated in a database, PncPEPDB ( http://bis.zju.edu.cn/PncPEPDB ). These results showed that peptides derived from noncoding RNAs may play important roles in noncoding RNA regulation, which provided another hypothesis that noncoding RNA may regulate the metabolism via their translation products.",2018-02-05 +29740786,Rapid Evidence Review of Mobile Applications for Self-management of Diabetes.,"BACKGROUND:Patients with diabetes lack information on which commercially available applications (apps) improve diabetes-related outcomes. We conducted a rapid evidence review to examine features, clinical efficacy, and usability of apps for self-management of type 1 and type 2 diabetes in adults. METHODS:Ovid/Medline and the Cochrane Database of Systematic Reviews were searched for systematic reviews and technology assessments. Reference lists of relevant systematic reviews were examined for primary studies. Additional searches for primary studies were conducted online, through Ovid/Medline, Embase, CINAHL, and ClinicalTrials.gov . Studies were evaluated for eligibility based on predetermined criteria, data were extracted, study quality was assessed using a risk of bias tool, information on app features was collected, and app usability was assessed. Results are summarized qualitatively. RESULTS:Fifteen articles evaluating 11 apps were identified: six apps for type 1 and five apps for type 2 diabetes. Common features of apps included setting reminders and tracking blood glucose and hemoglobin A1c (HbA1c), medication use, physical activity, and weight. Compared with controls, use of eight apps, when paired with support from a healthcare provider or study staff, improved at least one outcome, most often HbA1c. Patients did not experience improvements in quality of life, blood pressure, or weight, regardless of app used or type of diabetes. Study quality was variable. Of the eight apps available for usability testing, two were scored ""acceptable,"" three were ""marginal,"" and three were ""not acceptable."" DISCUSSION:Limited evidence suggests that use of some commercially available apps, when combined with additional support from a healthcare provider or study staff, may improve some short-term diabetes-related outcomes. The impact of these apps on longer-term outcomes is unclear. More rigorous and longer-term studies of apps are needed. REGISTRATION:This review was funded by the Agency for Healthcare Research and Quality (AHRQ). The protocol is available at: http://www.effectivehealthcare.ahrq.gov/topics/diabetes-mobile-devices/research-protocol .",2018-05-08 +24723423,OrthoMaM v8: a database of orthologous exons and coding sequences for comparative genomics in mammals.,"Comparative genomic studies extensively rely on alignments of orthologous sequences. Yet, selecting, gathering, and aligning orthologous exons and protein-coding sequences (CDS) that are relevant for a given evolutionary analysis can be a difficult and time-consuming task. In this context, we developed OrthoMaM, a database of ORTHOlogous MAmmalian Markers describing the evolutionary dynamics of orthologous genes in mammalian genomes using a phylogenetic framework. Since its first release in 2007, OrthoMaM has regularly evolved, not only to include newly available genomes but also to incorporate up-to-date software in its analytic pipeline. This eighth release integrates the 40 complete mammalian genomes available in Ensembl v73 and provides alignments, phylogenies, evolutionary descriptor information, and functional annotations for 13,404 single-copy orthologous CDS and 6,953 long exons. The graphical interface allows to easily explore OrthoMaM to identify markers with specific characteristics (e.g., taxa availability, alignment size, %G+C, evolutionary rate, chromosome location). It hence provides an efficient solution to sample preprocessed markers adapted to user-specific needs. OrthoMaM has proven to be a valuable resource for researchers interested in mammalian phylogenomics, evolutionary genomics, and has served as a source of benchmark empirical data sets in several methodological studies. OrthoMaM is available for browsing, query and complete or filtered downloads at http://www.orthomam.univ-montp2.fr/.",2014-04-09 +,Rapid Adjustment of Clinical Decision Support in Response to Updated Recommendations for Palivizumab Eligibility,"Summary Background: Palivizumab is effective at reducing hospitalizations due to +respiratory syncytial virus among high-risk children, but is indicated for a small +population. Identification of patients eligible to receive palivizumab is labor-intensive +and error-prone. To support patient identification we developed Clinical Decision Support +(CDS) based on published recommendations in 2012. This CDS was developed using a +systematic process, which directly linked computer code to a recommendation’s narrative +text. In 2014, updated recommendations were published, which changed several key criteria +used to determine eligible patients. Objective: Assess the effort required to update CDS in response to new palivizumab +recommendations and identify factors that impacted these efforts. Methods: We reviewed the updated American Academy of Pediatrics (AAP) policy +statement from Aug 2014 and identified areas of divergence from the prior publication. We +modified the CDS to account for each difference. We recorded time spent on each activity +to approximate the total effort required to update the CDS. Results: Of the 15 recommendations in the initial policy statement, 7 required +updating. The CDS update was completed in 11 person-hours. Comparison of old and new +recommendations was facilitated by the AAP policy statement structure and required 3 +hours. Validation of the revised logic required 2 hours by a clinical domain expert. An +informaticist required 3 hours to update and test the CDS. This included adding 24 lines +and deleting 37 lines of code. Updating relevant data queries took an additional 3 hours +and involved 10 edits. Conclusion: We quickly adapted CDS in response to changes in recommendations for +palivizumab administration. The consistent AAP policy statement structure and the link we +developed between these statements and the CDS rules facilitated our efforts. We recommend +that CDS implementers establish linkages between published narrative recommendations and +their executable rules to facilitate maintenance efforts. Citation: Michel J, Utidjian LH, Karavite D, Hogan A, Ramos MJ, Miller J, Shiffman +RN, Grundmeier RW. Rapid adjustment of clinical decision support in response to updated +recommendations for palivizumab eligibility. Appl Clin Inform 2017; 8: 581–592 https://doi.org/10.4338/ACI-2016-10-RA-0173",2017-04-01 +28025199,Computational prediction of species-specific malonylation sites via enhanced characteristic strategy.,"

Motivation

Protein malonylation is a novel post-translational modification (PTM) which orchestrates a variety of biological processes. Annotation of malonylation in proteomics is the first-crucial step to decipher its physiological roles which are implicated in the pathological processes. Comparing with the expensive and laborious experimental research, computational prediction can provide an accurate and effective approach to the identification of many types of PTMs sites. However, there is still no online predictor for lysine malonylation.

Results

By searching from literature and database, a well-prepared up-to-data benchmark datasets were collected in multiple organisms. Data analyses demonstrated that different organisms were preferentially involved in different biological processes and pathways. Meanwhile, unique sequence preferences were observed for each organism. Thus, a novel malonylation site online prediction tool, called MaloPred, which can predict malonylation for three species, was developed by integrating various informative features and via an enhanced feature strategy. On the independent test datasets, AUC (area under the receiver operating characteristic curves) scores are obtained as 0.755, 0.827 and 0.871 for Escherichia coli ( E.coli ), Mus musculus ( M.musculus ) and Homo sapiens ( H.sapiens ), respectively. The satisfying results suggest that MaloPred can provide more instructive guidance for further experimental investigation of protein malonylation.

Availability and implementation

http://bioinfo.ncu.edu.cn/MaloPred.aspx .

Contact

jdqiu@ncu.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +31667690,"A simple, web-based repository for the management, access and analysis of micrographic images.","Microscopy is advancing at a rapid pace, enabling high-speed, high-resolution analyses to be conducted in a wide range of cellular contexts. For example, the capacity to quickly capture high-resolution images from multiple optical sections over multiple channels with confocal microscopy has allowed researchers to gain deeper understanding of tissue morphology via techniques such as three-dimensional rendering, as have more recent advances such as lattice light sheet microscopy and superresolution structured illumination microscopy. With this, though, comes the challenge of storing, curating, analysing and sharing data. While there are ways in which this has been attempted previously, few approaches have provided a central repository in which all of these different aspects of microscopy can be seamlessly integrated. Here, we describe a web-based storage and analysis platform called Microndata, that enables relatively straightforward storage, annotation, tracking, analysis and multi-user access to micrographs. This easy to use tool will simplify and harmonise laboratory work flows, and, importantly, will provide a central storage repository that is readily accessed, even after the researcher responsible for capturing the images has left the laboratory. Microndata is open-source software, available at http://www.microndata.net/.",2019-10-30 +30358822,A framework for identifying dysregulated chromatin regulators as master regulators in human cancer.,"

Motivation

Chromatin regulators (CRs) are frequently dysregulated to reprogram the epigenetic landscape of the cancer genome. However, the underpinnings of the dysregulation of CRs and their downstream effectors remain to be elucidated.

Results

Here, we designed an integrated framework based on multi-omics data to identify candidate master regulatory CRs affected by genomic alterations across eight cancer types in The Cancer Genome Atlas. Most of them showed consistent activated or repressed (i.e. oncogenic or tumor-suppressive) roles in cancer initiation and progression. In order to further explore the insight mechanism of the dysregulated CRs, we developed an R package ModReg based on differential connectivity to identify CRs as modulators of transcription factors (TFs) involved in tumorigenesis. Our analysis revealed that the connectivity between TFs and their target genes (TGs) tended to be disrupted in the patients who had a high expression of oncogenic CRs or low-expression of tumor-suppressive CRs. As a proof-of-principle study, 14 (82.4%) of the top-ranked 17 driver CRs in liver cancer were able to be validated by literature mining or experiments including shRNA knockdown and dCas9-based epigenetic editing. Moreover, we confirmed that CR SIRT7 physically interacted with TF NFE2L2, and positively modulated the transcriptional program of NFE2L2 by affecting ∼64% of its TGs.

Availability and implementation

ModReg is freely accessible at http://cis.hku.hk/software/ModReg.tar.gz.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +31211546,Esketamine for Treatment-Resistant Depression,"

Methods

These bulletins are not systematic reviews and do not involve critical appraisal or include a detailed summary of study findings. Rather, they present an overview of the technology and available evidence. They are not intended to provide recommendations for or against a particular technology.

Literature search

A limited literature search was conducted using the following bibliographic databases: MEDLINE, Embase, PsychInfo, PubMed, and the Cochrane Library. Grey literature was identified by searching relevant sections of the Grey Matters checklist (https://www.cadth.ca/grey-matters). No methodological filters were applied. The search was limited to English-language documents but not limited by publication year. Regular alerts updated the search until project completion; only citations retrieved before March 13, 2019 were incorporated into the analysis.

Study selection

One author screened the literature search results and reviewed the full text of all potentially relevant studies. Studies were considered for inclusion if the intervention was esketamine (S-ketamine) and studied for treatment-resistant depression in a phase III trial. Conference abstracts and grey literature were included when they provided additional information to that available in the published studies.

Peer review

A draft version of this bulletin was reviewed by one clinical expert. The drug manufacturer also provided input on an earlier draft of this report.",2019-06-19 +31216283,In silico analysis of PFN1 related to amyotrophic lateral sclerosis.,"Profilin 1 (PFN1) protein plays key roles in neuronal growth and differentiation, membrane trafficking, and regulation of the actin cytoskeleton. Four natural variants of PFN1 were described as related to ALS, the most common adult-onset motor neuron disorder. However, the pathological mechanism of PFN1 in ALS is not yet completely understood. The goal of this work is to thoroughly analyze the effects of the ALS-related mutations on PFN1 structure and function using computational simulations. Here, PhD-SNP, PMUT, PolyPhen-2, SIFT, SNAP, SNPS&GO, SAAP, nsSNPAnalyzer, SNPeffect4.0 and I-Mutant2.0 were used to predict the functional and stability effects of PFN1 mutations. ConSurf was used for the evolutionary conservation analysis, and GROMACS was used to perform the MD simulations. The mutations C71G, M114T, and G118V, but not E117G, were predicted as deleterious by most of the functional prediction algorithms that were used. The stability prediction indicated that the ALS-related mutations could destabilize PFN1. The ConSurf analysis indicated that the mutation C71G, M114T, E117G, and G118V occur in highly conserved positions. The MD results indicated that the studied mutations could affect the PFN1 flexibility at the actin and PLP-binding domains, and consequently, their intermolecular interactions. It may be therefore related to the functional impairment of PFN1 upon C71G, M114T, E117G and G118V mutations, and their involvement in ALS development. We also developed a database, SNPMOL (http://www.snpmol.org/), containing the results presented on this paper for biologists and clinicians to exploit PFN1 and its natural variants.",2019-06-19 +29197720,ZikaBase: An integrated ZIKV- Human Interactome Map database.,"Re-emergence of ZIKV has caused infections in more than 1.5 million people. The molecular mechanism and pathogenesis of ZIKV is not well explored due to unavailability of adequate model and lack of publically accessible resources to provide information of ZIKV-Human protein interactome map till today. This study made an attempt to curate the ZIKV-Human interaction proteins from published literatures and RNA-Seq data. 11 direct interaction, 12 associated genes are retrieved from literatures and 3742 Differentially Expressed Genes (DEGs) are obtained from RNA-Seq analysis. The genes have been analyzed to construct the ZIKV-Human Interactome Map. The importance of the study has been illustrated by the enrichment analysis and observed that direct interaction and associated genes are enriched in viral entry into host cell. Also, ZIKV infection modulates 32% signal and 27% immune system pathways. The integrated database, ZikaBase has been developed to help the virology research community and accessible at https://test5.bicpu.edu.in.",2017-12-01 +,obitools: a unix‐inspired software package for DNA metabarcoding,"DNA metabarcoding offers new perspectives in biodiversity research. This recently developed approach to ecosystem study relies heavily on the use of next‐generation sequencing (NGS) and thus calls upon the ability to deal with huge sequence data sets. The obitools package satisfies this requirement thanks to a set of programs specifically designed for analysing NGS data in a DNA metabarcoding context. Their capacity to filter and edit sequences while taking into account taxonomic annotation helps to set up tailor‐made analysis pipelines for a broad range of DNA metabarcoding applications, including biodiversity surveys or diet analyses. The obitools package is distributed as an open source software available on the following website: http://metabarcoding.org/obitools. A Galaxy wrapper is available on the GenOuest core facility toolshed: http://toolshed.genouest.org.",2016-01-01 +31567371,Interaction between eNOS gene polymorphism and current smoking on susceptibility to coronary heart disease in Chinese people.,"

Objective

This study aims to explore the relation between endothelial nitric oxide synthase (eNOS) single-nucleotide polymorphisms (SNPs) and the risk of coronary heart disease (CHD).

Methods

SNPstats (online software: http://bioinfo.iconcologia.net/SNPstats) was performed to test Hardy-Weinberg equilibrium in controls. Generalized multifactor dimensionality reduction (GMDR) was adopted to screen the preferable interaction between eNOS SNPs and smoking.

Results

The frequency for the rs1799983-T allele was 31.1% in CHD patients, which was significantly higher than that of 19.8% in controls (P < 0.05). The frequency for the rs891512-A allele was 28.8% in cases, which was also significantly higher than that of 20.1% in controls (P < 0.05). Logistic regression analysis showed that both rs1799983-T and rs891512-A alleles were related with increased risk of CHD, and the odds ratios (ORs) [95% confidence interval (CI)] were 1.71 (1.31-2.15) and 1.57 (1.14-2.07), respectively. High-order interactions were investigated among SNPs and environmental factors using the GMDR method. The data showed that a two-locus model (rs1799983 × smoking) had a testing accuracy of 0.60 (P = 0.001). We found that current smokers with rs1799983-GT or TT within eNOS gene have the highest CHD risk, compared to never smokers with rs1799983-GG genotype, OR (95% CI) = 2.74 (1.78-3.85), after covariates adjustment for age, gender, BMI, and alcohol drinking.

Conclusion

The rs1799983-T and rs891512-A alleles and interaction between rs1799983 and smoking were all risk factors of CHD.",2020-01-01 +31368479,Bayesian estimation of genetic regulatory effects in high-throughput reporter assays.,"

Motivation

High-throughput reporter assays dramatically improve our ability to assign function to noncoding genetic variants, by measuring allelic effects on gene expression in the controlled setting of a reporter gene. Unlike genetic association tests, such assays are not confounded by linkage disequilibrium when loci are independently assayed. These methods can thus improve the identification of causal disease mutations. While work continues on improving experimental aspects of these assays, less effort has gone into developing methods for assessing the statistical significance of assay results, particularly in the case of rare variants captured from patient DNA.

Results

We describe a Bayesian hierarchical model, called Bayesian Inference of Regulatory Differences, which integrates prior information and explicitly accounts for variability between experimental replicates. The model produces substantially more accurate predictions than existing methods when allele frequencies are low, which is of clear advantage in the search for disease-causing variants in DNA captured from patient cohorts. Using the model, we demonstrate a clear tradeoff between variant sequencing coverage and numbers of biological replicates, and we show that the use of additional biological replicates decreases variance in estimates of effect size, due to the properties of the Poisson-binomial distribution. We also provide a power and sample size calculator, which facilitates decision making in experimental design parameters.

Availability and implementation

The software is freely available from www.geneprediction.org/bird. The experimental design web tool can be accessed at http://67.159.92.22:8080.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +31350877,DeepGOPlus: improved protein function prediction from sequence.,"

Motivation

Protein function prediction is one of the major tasks of bioinformatics that can help in wide range of biological problems such as understanding disease mechanisms or finding drug targets. Many methods are available for predicting protein functions from sequence based features, protein-protein interaction networks, protein structure or literature. However, other than sequence, most of the features are difficult to obtain or not available for many proteins thereby limiting their scope. Furthermore, the performance of sequence-based function prediction methods is often lower than methods that incorporate multiple features and predicting protein functions may require a lot of time.

Results

We developed a novel method for predicting protein functions from sequence alone which combines deep convolutional neural network (CNN) model with sequence similarity based predictions. Our CNN model scans the sequence for motifs which are predictive for protein functions and combines this with functions of similar proteins (if available). We evaluate the performance of DeepGOPlus using the CAFA3 evaluation measures and achieve an Fmax of 0.390, 0.557 and 0.614 for BPO, MFO and CCO evaluations, respectively. These results would have made DeepGOPlus one of the three best predictors in CCO and the second best performing method in the BPO and MFO evaluations. We also compare DeepGOPlus with state-of-the-art methods such as DeepText2GO and GOLabeler on another dataset. DeepGOPlus can annotate around 40 protein sequences per second on common hardware, thereby making fast and accurate function predictions available for a wide range of proteins.

Availability and implementation

http://deepgoplus.bio2vec.net/ .

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +22533462,"Juvenile and adult-onset ALS/MND among Africans: incidence, phenotype, survival: a review.","

Aim

We reviewed the epidemiology of ALS among subjects of African origin, considering incidence, phenotype and prognosis.

Methods

We searched Medline, Scopus, Science direct, Bibliothèque Virtuelle de Neurologie Africaine (BVNA), ( http://www-ient.unilim.fr/ ) and African journal OnLine databases using the following search terms ""amyotrophic lateral sclerosis (ALS)"", ""motor neuron disease (MND)"" or ""Charcot disease"", in combination with ""Africa"", ""ethnic groups"", ""blacks"" or ""epidemiology"". Of 1264 references examined, 35 were included in this review.

Results and discussion

Among the 35 references, 19 studies were performed in the African continent and dealt with MND/ALS; four other studies focused on ALS-like syndromes; finally, 12 studies were not performed in Africa but focused on either incidence and mortality or survival of ALS in subjects of African origin. Several characteristics of ALS among Africans or subjects of African origin were identified: (i) lower incidence rates among people of African origin living in western countries, (ii) higher incidence of classic ALS among men, (iii) presence of juvenile form, (iv) younger age at onset of classic ALS. We cannot draw firm conclusions about (i) the prognosis in African ALS patients, (ii) prognostic factors, (iii) genetic or behavioral factors affecting incidence or clinical phenotype.

Conclusion

Further multicenter prospective studies with homogeneous methodological approaches need to be performed in Africa to clarify the situation.",2012-05-01 +23716197,EpiCombFlu: exploring known influenza epitopes and their combination to design a universal influenza vaccine.,"

Motivation

Influenza is responsible for half a million deaths annually, and vaccination is the best preventive measure against this pervasive health problem. Influenza vaccines developed from surveillance data of each season are strain-specific, and therefore, are unable to provide protection against pandemic strains arising from antigenic shift and drift. Seasonal epidemics and occasional pandemics of influenza have created a need for a universal influenza vaccine (UIV). Researchers have shown that a combination of conserved epitopes has the potential to be used as a UIV.

Result

In the present work, available data on strains, proteins, epitopes and their associated information were used to develop a Web resource, 'EpiCombFlu', which can explore different influenza epitopes and their combinations for conservation among different strains, population coverage and immune response for vaccine design. Forward selection algorithm was implemented in EpiCombFlu to select optimum combination of epitopes that may be expressed and evaluated as potential UIV.

Availability

The Web resource is freely available at http://117.211.115.67/influenza/home.html.

Contact

chittaranjan.rout@juit.ac.in

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-28 +31240299,Increasing the efficiency and accuracy of the ABACUS protein sequence design method.,"

Motivation

The ABACUS (a backbone-based amino acid usage survey) method uses unique statistical energy functions to carry out protein sequence design. Although some of its results have been experimentally verified, its accuracy remains improvable because several important components of the method have not been specifically optimized for sequence design or in contexts of other parts of the method. The computational efficiency also needs to be improved to support interactive online applications or the consideration of a large number of alternative backbone structures.

Results

We derived a model to measure solvent accessibility with larger mutual information with residue types than previous models, optimized a set of rotamers which can approximate the sidechain atomic positions more accurately, and devised an empirical function to treat inter-atomic packing with parameters fitted to native structures and optimized in consistence with the rotamer set. Energy calculations have been accelerated by interpolation between pre-determined representative points in high-dimensional structural feature spaces. Sidechain repacking tests showed that ABACUS2 can accurately reproduce the conformation of native sidechains. In sequence design tests, the native residue type recovery rate reached 37.7%, exceeding the value of 32.7% for ABACUS1. Applying ABACUS2 to designed sequences on three native backbones produced proteins shown to be well-folded by experiments.

Availability and implementation

The ABACUS2 sequence design server can be visited at http://biocomp.ustc.edu.cn/servers/abacus-design.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +31319950,Development and use of a Cytoscape app for GRNCOP2.,"

Background and objective

Gene regulatory networks (GRNs) are essential for understanding most molecular processes. In this context, the so-called model-free approaches have an advantage modeling the complex topologies behind these dynamic molecular networks, since most GRNs are difficult to map correctly by any other mathematical model. Abstract model-free approaches, also known as rule-based extraction methods, offer valuable benefits when performing data-driven analysis; such as requiring the least amount of data and simplifying the inference of large models at a faster analysis speed. In particular, GRNCOP2 is a combinatorial optimization method with an adaptive criterion for the discretization of gene expression data and high performance, in contrast to other rule-based extraction methods for discovering GRNs. However, the analysis of the large relational structures of the networks inferred by GRNCOP2 requires the support of effective tools for interactive network visualization and topological analysis of the extracted associations. This need motivated the possibility of integrating GRNCOP2 in the Cytoscape ecosystem in order to benefit from Cytoscapes core functionality, as well as all the other apps in its ecosystem.

Methods

In this paper, we introduce the implementation of a GRNCOP2 Cytoscape app. This incorporation to Cytoscape platform includes new functionality for GRN visualizations, dynamic user-interaction and integration with other apps for topological analysis of the networks.

Results

In order to demonstrate the usefulness of integrating GRNCOP2 in Cytoscape, the new app was used to tackle a novel use case for GRNCOP2: the analysis of crosstalk between pathways. In this regard, datasets associated with Alzheimer's disease (AD) were analyzed using GRNCOP2 app and other apps of the Cytoscape ecosystem by performing a topological analysis of the AD progression and its synchronization with the Ubiquitin Mediated Proteolysis pathway. Finally, the biological relevance of the findings achieved by this new app were evaluated by searching for evidence in the literature.

Conclusions

The proposed crosstalk analysis with the new GRNCOP2 app focused on assessing the phase of the Alzheimer's disease progression where the coordination with the Ubiquitin Mediated Proteolysis pathway increase, and identifying the genes that explain the signalling between these cellular processes. Both questions were explored by topological contrastive analysis of the GRNs generated for the GRNCOP2 app, where several facilities of Cytoscape were exploited. The topological patterns inferred by this new App have been consistent with biological evidence reported in the scientic literature, illustrating the effectiveness of using this new GRNCOP2 App in pathway analysis.

Availability

The GRNCOP2 App is freely available at the official Cytoscape app store: http://apps.cytoscape.org/apps/grncop2.",2019-06-04 +32901468,Treatment of classic phenylketonuria in Poland in the years 2009-2015 based on the database of the Polish National Health Fund.,"

Introduction

To avoid the risk of intellectual disabilities, newborns in Poland are screened for phenylketonuria and are recommended to start a life-long phenylalanine-restricted diet shortly after birth. The aim of this paper is to evaluate the health care for patients with classical phenylketonuria in Poland.

Material and methods

We reviewed the National Health Fund's reporting data concerning information on healthcare services for patients with classical phenylketonuria (PKU), which were reported to the payer by the healthcare service providers between 2009 and 2015. The analysis was prepared within the framework of mapping the health care needs of patients with metabolic diseases published in December 2016 (http://www.mapypotrzebzdrowotnych.mz.gov.pl/).

Results

A total of 2706 patients with PKU (including 1180 children) were registered in the healthcare system in the period covered. The estimated national prevalence of PKU was 1 per 7758 live births. Paediatric patients up to 12 moths of age accounted for over 40% of all visits to outpatient clinics. Patients over 28 years of age accounted for only 1% of all PKU patients receiving specialist outpatient care. There were twice as many clinics providing health care to children than to adults. The majority of adult patients received healthcare from the same providers as children. Sixty-nine percent of adults and 64% of children were treated in the two largest outpatient centres. There were 12 deaths, with a median age of 63 years. The working-age adults accounted for 50% of the deaths.

Conclusions

Adult patients with PKU do not receive sufficient healthcare. The discontinuation of healthcare by adults with PKU can result from the lack of an adequate transition process from paediatric to adult care.",2020-01-01 +31350558,"Towards accurate high-throughput ligand affinity prediction by exploiting structural ensembles, docking metrics and ligand similarity.","

Motivation

Nowadays, virtual screening (VS) plays a major role in the process of drug development. Nonetheless, an accurate estimation of binding affinities, which is crucial at all stages, is not trivial and may require target-specific fine-tuning. Furthermore, drug design also requires improved predictions for putative secondary targets among which is Estrogen Receptor alpha (ERα).

Results

VS based on combinations of Structure-Based VS (SBVS) and Ligand-Based VS (LBVS) is gaining momentum to improve VS performances. In this study, we propose an integrated approach using ligand docking on multiple structural ensembles to reflect receptor flexibility. Then, we investigate the impact of the two different types of features (structure-based and ligand molecular descriptors) on affinity predictions using a random forest algorithm. We find that ligand-based features have lower predictive power (rP = 0.69, R2 = 0.47) than structure-based features (rP = 0.78, R2 = 0.60). Their combination maintains high accuracy (rP = 0.73, R2 = 0.50) on the internal test set, but it shows superior robustness on external datasets. Further improvement and extending the training dataset to include xenobiotics, leads to a novel high-throughput affinity prediction method for ERα ligands (rP = 0.85, R2 = 0.71). The presented prediction tool is provided to the community as a dedicated satellite of the @TOME server in which one can upload a ligand dataset in mol2 format and get ligand docked and affinity predicted.

Availability and implementation

http://edmon.cbs.cnrs.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2020-01-01 +22807998,SyStemCell: a database populated with multiple levels of experimental data from stem cell differentiation research.,"Elucidation of the mechanisms of stem cell differentiation is of great scientific interest. Increasing evidence suggests that stem cell differentiation involves changes at multiple levels of biological regulation, which together orchestrate the complex differentiation process; many related studies have been performed to investigate the various levels of regulation. The resulting valuable data, however, remain scattered. Most of the current stem cell-relevant databases focus on a single level of regulation (mRNA expression) from limited stem cell types; thus, a unifying resource would be of great value to compile the multiple levels of research data available. Here we present a database for this purpose, SyStemCell, deposited with multi-level experimental data from stem cell research. The database currently covers seven levels of stem cell differentiation-associated regulatory mechanisms, including DNA CpG 5-hydroxymethylcytosine/methylation, histone modification, transcript products, microRNA-based regulation, protein products, phosphorylation proteins and transcription factor regulation, all of which have been curated from 285 peer-reviewed publications selected from PubMed. The database contains 43,434 genes, recorded as 942,221 gene entries, for four organisms (Homo sapiens, Mus musculus, Rattus norvegicus, and Macaca mulatta) and various stem cell sources (e.g., embryonic stem cells, neural stem cells and induced pluripotent stem cells). Data in SyStemCell can be queried by Entrez gene ID, symbol, alias, or browsed by specific stem cell type at each level of genetic regulation. An online analysis tool is integrated to assist researchers to mine potential relationships among different regulations, and the potential usage of the database is demonstrated by three case studies. SyStemCell is the first database to bridge multi-level experimental information of stem cell studies, which can become an important reference resource for stem cell researchers. The database is available at http://lifecenter.sgst.cn/SyStemCell/.",2012-07-13 +32265998,"Oncology nurses' compassion fatigue, burn out and compassion satisfaction.","

Background

For oncology nurses, compassion fatigue, burn out and compassion satisfactions are frequently experienced psychosocial consequences of the oncology work environment. Surveying such phenomena helps to understand how nurses feel and behave when cancer care is provided. Besides, tracking the evolving nature of those three concepts can lend a hand for the early detection of personal and professional suffering of nurses while offering some healing remedies to their struggling bodies and souls.

Purpose

The purpose of this study was to explore the level of compassion fatigue, burn out and compassion satisfaction among a group of specialized oncology nurses. Besides, this study aimed to detect some probable interesting inferences between compassion satisfaction and the concept of rest and leisure. Correlations between compassion fatigue, burn out and compassion satisfaction were investigated. Correlations between oncology nurses' scores on the three subscales and a group of demographic, organizational and leisure-related variables were examined.

Methods

This study adopted a descriptive correlation design to survey compassion fatigue, burn out and compassion satisfaction among a convenient sample of 100 oncology nurses who work in a specialized cancer care centre. Participants completed compassion fatigue self-test developed by Figely (Compassion fatigue, New York: Brunner/Mazel. B. HudnallStamm, Traumatic Stress Research Group; 1995-1998. http://www.dartmouth.edu/~bhstamm/index.htm, 1995) and a literature-based demographic survey. Analysis of data included descriptive statistics and Pearson correlation co-efficient.

Results

Nurses reported a low level of compassion satisfaction, moderate risk for burn out and an extremely high risk for compassion fatigue. Results revealed significant negative relationships among compassion satisfaction and the number of dependents per nurse. Additionally the correlation between compassion satisfaction and the nurses' number of hours slept was positive. Only two components of the concept rest and leisure yielded statistical significance when correlated to the concept of compassion satisfaction. A significant negative relationship was observed between compassion satisfaction and compassion fatigue while a strong positive relationship was observed between compassion fatigue and burn out.

Conclusions

The studied oncology nurses sample had evidently low level of compassion satisfaction when contrasted to the significantly increased risks of burn out and compassion fatigue. Thus, health authorities and management are advised to care, in a holistic approach, for nurses who work in oncology departments. Staff-oriented services that offer comfort, reward, leisure, screening, consultation and support are urgently recommended.",2020-03-31 +28442501,ChloroKB: A Web Application for the Integration of Knowledge Related to Chloroplast Metabolic Network.,"Higher plants, as autotrophic organisms, are effective sources of molecules. They hold great promise for metabolic engineering, but the behavior of plant metabolism at the network level is still incompletely described. Although structural models (stoichiometry matrices) and pathway databases are extremely useful, they cannot describe the complexity of the metabolic context, and new tools are required to visually represent integrated biocurated knowledge for use by both humans and computers. Here, we describe ChloroKB, a Web application (http://chlorokb.fr/) for visual exploration and analysis of the Arabidopsis (Arabidopsis thaliana) metabolic network in the chloroplast and related cellular pathways. The network was manually reconstructed through extensive biocuration to provide transparent traceability of experimental data. Proteins and metabolites were placed in their biological context (spatial distribution within cells, connectivity in the network, participation in supramolecular complexes, and regulatory interactions) using CellDesigner software. The network contains 1,147 reviewed proteins (559 localized exclusively in plastids, 68 in at least one additional compartment, and 520 outside the plastid), 122 proteins awaiting biochemical/genetic characterization, and 228 proteins for which genes have not yet been identified. The visual presentation is intuitive and browsing is fluid, providing instant access to the graphical representation of integrated processes and to a wealth of refined qualitative and quantitative data. ChloroKB will be a significant support for structural and quantitative kinetic modeling, for biological reasoning, when comparing novel data with established knowledge, for computer analyses, and for educational purposes. ChloroKB will be enhanced by continuous updates following contributions from plant researchers.",2017-04-25 +31075275,"PhyreRisk: A Dynamic Web Application to Bridge Genomics, Proteomics and 3D Structural Data to Guide Interpretation of Human Genetic Variants.","PhyreRisk is an open-access, publicly accessible web application for interactively bridging genomic, proteomic and structural data facilitating the mapping of human variants onto protein structures. A major advance over other tools for sequence-structure variant mapping is that PhyreRisk provides information on 20,214 human canonical proteins and an additional 22,271 alternative protein sequences (isoforms). Specifically, PhyreRisk provides structural coverage (partial or complete) for 70% (14,035 of 20,214 canonical proteins) of the human proteome, by storing 18,874 experimental structures and 84,818 pre-built models of canonical proteins and their isoforms generated using our in house Phyre2. PhyreRisk reports 55,732 experimentally, multi-validated protein interactions from IntAct and 24,260 experimental structures of protein complexes. Another major feature of PhyreRisk is that, rather than presenting a limited set of precomputed variant-structure mapping of known genetic variants, it allows the user to explore novel variants using, as input, genomic coordinates formats (Ensembl, VCF, reference SNP ID and HGVS notations) and Human Build GRCh37 and GRCh38. PhyreRisk also supports mapping variants using amino acid coordinates and searching for genes or proteins of interest. PhyreRisk is designed to empower researchers to translate genetic data into protein structural information, thereby providing a more comprehensive appreciation of the functional impact of variants. PhyreRisk is freely available at http://phyrerisk.bc.ic.ac.uk.",2019-05-07 +31570503,The mechanism of thin filament regulation: Models in conflict?,"In a recent JGP article, Heeley et al. (2019. J. Gen. Physiol https://doi.org/10.1085/jgp.201812198) reopened the debate about two- versus three-state models of thin filament regulation. The authors review their work, which measures the rate constant of Pi release from myosin.ADP.Pi activated by actin or thin filaments under a variety of conditions. They conclude that their data can be described by a two-state model and raise doubts about the generally accepted three-state model as originally formulated by McKillop and Geeves (1993. Biophys. J. https://doi.org/10.1016/S0006-3495(93)81110-X). However, in the following article, we follow Plato's dictum that ""twice and thrice over, as they say, good it is to repeat and review what is good."" We have therefore reviewed the evidence for the three- and two-state models and present our view that the evidence is overwhelmingly in favor of three structural states of the thin filament, which regulate access of myosin to its binding sites on actin and, hence, muscle contractility.",2019-09-30 +23203866,FlyAtlas: database of gene expression in the tissues of Drosophila melanogaster.,"The FlyAtlas resource contains data on the expression of the genes of Drosophila melanogaster in different tissues (currently 25-17 adult and 8 larval) obtained by hybridization of messenger RNA to Affymetrix Drosophila Genome 2 microarrays. The microarray probe sets cover 13,250 Drosophila genes, detecting 12,533 in an unambiguous manner. The data underlying the original web application (http://flyatlas.org) have been restructured into a relational database and a Java servlet written to provide a new web interface, FlyAtlas 2 (http://flyatlas.gla.ac.uk/), which allows several additional queries. Users can retrieve data for individual genes or for groups of genes belonging to the same or related ontological categories. Assistance in selecting valid search terms is provided by an Ajax 'autosuggest' facility that polls the database as the user types. Searches can also focus on particular tissues, and data can be retrieved for the most highly expressed genes, for genes of a particular category with above-average expression or for genes with the greatest difference in expression between the larval and adult stages. A novel facility allows the database to be queried with a specific gene to find other genes with a similar pattern of expression across the different tissues.",2012-11-29 +31821684,Pred-MutHTP: Prediction of disease-causing and neutral mutations in human transmembrane proteins.,"Membrane proteins are unique in that segments thereof concurrently reside in vastly different physicochemical environments: the extracellular space, the lipid bilayer, and the cytoplasm. Accordingly, the effects of missense variants disrupting their sequence depend greatly on the characteristics of the environment of the protein segment affected as well as the function it performs. Because membrane proteins have many crucial roles (transport, signal transduction, cell adhesion, etc.), compromising their functionality often leads to diseases including cancers, diabetes mellitus or cystic fibrosis. Here, we report a suite of sequence-based computational methods ""Pred-MutHTP"" for discriminating between disease-causing and neutral alterations in their sequence. With a data set of 11,846 disease-causing and 9,533 neutral mutations, we obtained an accuracy of 74% and 78% with 10-fold group-wise cross-validation and test set, respectively. The features used in the models include evolutionary information, physiochemical properties, neighboring residue information, and specialized membrane protein attributes incorporating the number of transmembrane segments, substitution matrices specific to membrane proteins as well as residue distributions occurring in specific topological regions. Across 11 disease classes, the method achieved accuracies in the range of 75-85%. The model designed specifically for the transmembrane segments achieved an accuracy of 85% on the test set with a sensitivity and specificity of 86% and 83%, respectively. This renders our method the current state-of-the-art with regard to predicting the effects of variants in the transmembrane protein segments. Pred-MutHTP allows predicting the effect of any variant occurring in a membrane protein-available at https://www.iitm.ac.in/bioinfo/PredMutHTP/.",2019-12-10 +22374386,GrameneMart: the BioMart data portal for the Gramene project.,"Gramene is a well-established resource for plant comparative genome analysis. Data are generated through automated and curated analyses and made available through web interfaces such as GrameneMart. The Gramene project was an early adopter of the BioMart software, which remains an integral and well-used component of the Gramene website. BioMart accessible data sets include plant gene annotations, plant variation catalogues, genetic markers, physical mapping entities, public DNA/mRNA sequences of various types and curated quantitative trait loci for various species. DATABASE URL: http://www.gramene.org/biomart/martview.",2012-02-28 +31744934,"GRANAR, a Computational Tool to Better Understand the Functional Importance of Monocotyledon Root Anatomy.","Root hydraulic conductivity is a limiting factor along the water pathways between the soil and the leaf, and root radial conductivity is itself defined by cell-scale hydraulic properties and anatomical features. However, quantifying the influence of anatomical features on the radial conductivity remains challenging due to complex time-consuming experimental procedures. We present an open-source computational tool, the Generator of Root Anatomy in R (GRANAR; http://granar.github.io), that can be used to rapidly generate digital versions of contrasted monocotyledon root anatomical networks. GRANAR uses a limited set of root anatomical parameters, easily acquired with existing image analysis tools. The generated anatomical network can then be used in combination with hydraulic models to estimate the corresponding hydraulic properties. We used GRANAR to reanalyze large maize (Zea mays) anatomical datasets from the literature. Our model was successful at creating virtual anatomies for each experimental observation. We also used GRANAR to generate anatomies not observed experimentally over wider ranges of anatomical parameters. The generated anatomies were then used to estimate the corresponding radial conductivities with the hydraulic model MECHA (model of explicit cross-section hydraulic architecture). Our simulations highlight the large importance of the width of the stele and the cortex. GRANAR is a computational tool that generates root anatomical networks from experimental data. It enables the quantification of the effect of individual anatomical features on the root radial conductivity.",2019-11-19 +31151648,Nutritional composition of food fishes and their importance in providing food and nutritional security.,"Fish is a healthy food, rich in quality animal proteins, polyunsaturated fatty acids especially the (ω)-3 eicosapentaenoic acid and docosahexaenoic acid and micronutrients. In addition, fish are more available and affordable than other sources of animal proteins in tropical countries. Aquaculture, which is one of the fastest growing food production sectors, could play a big role in eradicating hunger, malnutrition and nutrient-deprivation globally. However, nutritional information on fish is necessary for utilization of fish in achieving nutritional security and will be helpful in prioritizing species for aquaculture. In this context, we have studied the detailed nutritional composition of selected fishes from India and developed a database (http://www.cifri.res.in/nutrifishin/index.php) with the food data generated. This review explore the implications of such nutritional information in consumer guidance, dietary counselling, food-policy planning and prioritization of species for aquaculture to fight hunger, malnutrition and micronutrient deficiency; ultimately contributing to food and nutritional security.",2017-11-12 +26286928,MicRhoDE: a curated database for the analysis of microbial rhodopsin diversity and evolution. ,"Microbial rhodopsins are a diverse group of photoactive transmembrane proteins found in all three domains of life and in viruses. Today, microbial rhodopsin research is a flourishing research field in which new understandings of rhodopsin diversity, function and evolution are contributing to broader microbiological and molecular knowledge. Here, we describe MicRhoDE, a comprehensive, high-quality and freely accessible database that facilitates analysis of the diversity and evolution of microbial rhodopsins. Rhodopsin sequences isolated from a vast array of marine and terrestrial environments were manually collected and curated. To each rhodopsin sequence are associated related metadata, including predicted spectral tuning of the protein, putative activity and function, taxonomy for sequences that can be linked to a 16S rRNA gene, sampling date and location, and supporting literature. The database currently covers 7857 aligned sequences from more than 450 environmental samples or organisms. Based on a robust phylogenetic analysis, we introduce an operational classification system with multiple phylogenetic levels ranging from superclusters to species-level operational taxonomic units. An integrated pipeline for online sequence alignment and phylogenetic tree construction is also provided. With a user-friendly interface and integrated online bioinformatics tools, this unique resource should be highly valuable for upcoming studies of the biogeography, diversity, distribution and evolution of microbial rhodopsins. Database URL: http://micrhode.sb-roscoff.fr.",2015-08-18 +32131901,Risk factors of obstructive sleep apnea syndrome in children.,"

Background

The known risk factors of childhood OSAS include tonsillar and adenoidhypertrophy, obesity, craniofacial anomalies, neuromuscular disorders and African-American (AA) ancestry. Whether other factors such as allergic rhinitis (AR), premature, environmental tobacco smoking (ETS) are associated with OSAS are inconsistent in different studies. Our study enrolled children of a broad age range and included potential risk factors of OSAS derived from previous studies and our own experience. Our objective is to identify risk factors of OSAS in children in a clinical setting.

Methods

Children between 2 and 15 years of age exhibiting snoring symptoms who visited the sleep center for polysomnography (PSG) were enrolled. All children completed a questionnaire, physical examination and PSG. The questionnaire included demographic data and information related to potential risk factors for sleep disorders. A physical examination included measurements of height, weight, neck circumference, waist and hip ratio, visual evaluation of the tonsils and the degree of adenoid obstruction. Children with obstructive apnea-hypopnea index (OAHI) ≥ 1 were defined as OSAS.

Results

A total of 1578 children were enrolled and1009 children exhibited OSAS. Univariate analyses showed that snoring occurring for ≥ 3 months, male gender, preterm birth, breastfeeding, obesity, neck circumference ≥ 30 cm, waist/hip ratio ≥ 0.95, tonsillar hypertrophy, and adenoid hypertrophy were associated with OSAS. The proportion of low educational level was higher in parents who breastfed their babies than those who didn't. Multivariate analysis showed that snoring for ≥ 3 months, male gender, obesity, breastfeeding, tonsillar hypertrophy, and adenoid hypertrophy were associated with OSAS. Confounders such as socioeconomic status, parental occupation, and health-related behaviors should be explored further to investigate the relationship between breastfeeding and OSAS.

Conclusion

The independent risk factors for OSAS in children included snoring ≥ 3 months, male gender, obesity, breastfeeding, tonsillar and adenoid hypertrophy. The study was registered on Clinical Trials government (NCT02447614). The name of the trial is ""Follow-up Studies of Primary Snoring (PS) and Obstructive Sleep Apnea Hypopnea Syndrome (OSAHS) in Chinese Children"" and the URL is https://clinicaltrials.gov/.",2020-03-04 +23110970,13CFLUX2--high-performance software suite for (13)C-metabolic flux analysis.,"

Summary

(13)C-based metabolic flux analysis ((13)C-MFA) is the state-of-the-art method to quantitatively determine in vivo metabolic reaction rates in microorganisms. 13CFLUX2 contains all tools for composing flexible computational (13)C-MFA workflows to design and evaluate carbon labeling experiments. A specially developed XML language, FluxML, highly efficient data structures and simulation algorithms achieve a maximum of performance and effectiveness. Support of multicore CPUs, as well as compute clusters, enables scalable investigations. 13CFLUX2 outperforms existing tools in terms of universality, flexibility and built-in features. Therewith, 13CFLUX2 paves the way for next-generation high-resolution (13)C-MFA applications on the large scale.

Availability and implementation

13CFLUX2 is implemented in C++ (ISO/IEC 14882 standard) with Java and Python add-ons to run under Linux/Unix. A demo version and binaries are available at www.13cflux.net.",2012-10-30 +22768977,Oomycete Transcriptomics Database: a resource for oomycete transcriptomes.,"

Background

Oomycete pathogens have attracted significant attention in recent years due to their economic impact. With improving sequencing technologies, large amounts of oomycete transcriptomics data are now available which have great biological utility. A known bottleneck with next generation sequencing data however lies with their analysis, interpretation, organization, storage and visualization. A number of efforts have been made in this respect resulting in development of a myriad of resources. Most of the existing NGS browsers work as standalone applications that need processed data to be uploaded to the browser locally for visualization. At the same time, several oomycete EST databases such as PFGD, ESTAP and SPC, are not available anymore, so there is an immediate need for a database resource that can store and disseminate this legacy information in addition to NGS data.

Description

Oomycetes Transcriptomics Database is an integrated transcriptome and EST data resource for oomycete pathogens. The database currently stores processed ABI SOLiD transcript sequences from Phytophthora sojae and its host soybean (P. sojae mycelia, healthy soybean and P. sojae-infected soybean) as well as Illumina transcript sequences from five Hyaloperonospora arabidopsidis libraries. In addition to those resources, it has also a complete set of Sanger EST sequences from P. sojae, P. infestans and H. arabidopsidis grown under various conditions. A web-based transcriptome browser was created for visualization of assembled transcripts, their mapping to the reference genome, expression profiling and depth of read coverage for particular locations on the genome. The transcriptome browser merges EST-derived contigs with NGS-derived assembled transcripts on the fly and displays the consensus. OTD possesses strong query features and the database interacts with the VBI Microbial Database as well as the Phytophthora Transcriptomics Database.

Conclusion

Oomycete Transcriptomics Database provides access to NGS transcript and EST data for oomycete pathogens and soybean. The OTD browser is a light weight transcriptome browser that displays the raw read alignment as well as the transcript assembly and expression information quantitatively. The query features offer a wide variety of options including querying data from the VBI microbial database and the Phytophthora transcriptomics database. The database is publicly available at http://www.eumicrobedb.org/transcripts/.",2012-07-06 +31199787,Pathogenicity and functional impact of non-frameshifting insertion/deletion variation in the human genome.,"Differentiation between phenotypically neutral and disease-causing genetic variation remains an open and relevant problem. Among different types of variation, non-frameshifting insertions and deletions (indels) represent an understudied group with widespread phenotypic consequences. To address this challenge, we present a machine learning method, MutPred-Indel, that predicts pathogenicity and identifies types of functional residues impacted by non-frameshifting insertion/deletion variation. The model shows good predictive performance as well as the ability to identify impacted structural and functional residues including secondary structure, intrinsic disorder, metal and macromolecular binding, post-translational modifications, allosteric sites, and catalytic residues. We identify structural and functional mechanisms impacted preferentially by germline variation from the Human Gene Mutation Database, recurrent somatic variation from COSMIC in the context of different cancers, as well as de novo variants from families with autism spectrum disorder. Further, the distributions of pathogenicity prediction scores generated by MutPred-Indel are shown to differentiate highly recurrent from non-recurrent somatic variation. Collectively, we present a framework to facilitate the interrogation of both pathogenicity and the functional effects of non-frameshifting insertion/deletion variants. The MutPred-Indel webserver is available at http://mutpred.mutdb.org/.",2019-06-14 +24634472,Linking tissues to phenotypes using gene expression profiles.,"Despite great biological and computational efforts to determine the genetic causes underlying human heritable diseases, approximately half (3500) of these diseases are still without an identified genetic cause. Model organism studies allow the targeted modification of the genome and can help with the identification of genetic causes for human diseases. Targeted modifications have led to a vast amount of model organism data. However, these data are scattered across different databases, preventing an integrated view and missing out on contextual information. Once we are able to combine all the existing resources, will we be able to fully understand the causes underlying a disease and how species differ. Here, we present an integrated data resource combining tissue expression with phenotypes in mouse lines and bringing us one step closer to consequence chains from a molecular level to a resulting phenotype. Mutations in genes often manifest in phenotypes in the same tissue that the gene is expressed in. However, in other cases, a systems level approach is required to understand how perturbations to gene-networks connecting multiple tissues lead to a phenotype. Automated evaluation of the predicted tissue-phenotype associations reveals that 72-76% of the phenotypes are associated with disruption of genes expressed in the affected tissue. However, 55-64% of the individual phenotype-tissue associations show spatially separated gene expression and phenotype manifestation. For example, we see a correlation between 'total body fat' abnormalities and genes expressed in the 'brain', which fits recent discoveries linking genes expressed in the hypothalamus to obesity. Finally, we demonstrate that the use of our predicted tissue-phenotype associations can improve the detection of a known disease-gene association when combined with a disease gene candidate prediction tool. For example, JAK2, the known gene associated with Familial Erythrocytosis 1, rises from the seventh best candidate to the top hit when the associated tissues are taken into consideration. Database URL: http://www.sanger.ac.uk/resources/databases/phenodigm/phenotype/list.",2014-03-13 +31194174,Observed switches and derived profitability indicators for peaking power plants: Northeast U.S. 2001-2009.,"The data are related to the research article ""Structural estimation of switching costs for peaking power plants,"" https://doi.org/10.1016/j.ejor.2019.03.031. Fleten et al., 2019 We display the operating status of peaking power plants as they were reported annually to the United States Energy Information Administration during 2001-2009. Operating status can either be operating, on standby, or retired. Changes in operating status allow us to infer shutdowns, startups, and retirements. We also derive annual profitability indicators.",2019-05-24 +28050585,Dataset of the proteome of purified outer membrane vesicles from the human pathogen Aggregatibacter actinomycetemcomintans.,"The Gram-negative bacterium Aggregatibacter actinomycetemcomitans is an oral and systemic pathogen, which is linked to aggressive forms of periodontitis and can be associated with endocarditis. The outer membrane vesicles (OMVs) of this species contain effector proteins such as cytolethal distending toxin (CDT) and leukotoxin (LtxA), which they can deliver into human host cells. The OMVs can also activate innate immunity through NOD1- and NOD2-active pathogen-associated molecular patterns. This dataset provides a proteome of highly purified OMVs from A. actinomycetemcomitans serotype e strain 173. The experimental data do not only include the raw data of the LC-MS/MS analysis of four independent preparations of purified OMVs but also the mass lists of the processed data and the Mascot.dat files from the database searches. In total 501 proteins are identified, of which 151 are detected in at least three of four independent preparations. In addition, this dataset contains the COG definitions and the predicted subcellular locations (PSORTb 3.0) for the entire genome of A. actinomycetemcomitans serotype e strain SC1083, which is used for the evaluation of the LC-MS/MS data. These data are deposited in ProteomeXchange in the public dataset PXD002509. In addition, a scientific interpretation of this dataset by Kieselbach et al. (2015) [2] is available at http://dx.doi.org/10.1371/journal.pone.0138591.",2016-12-15 +32974523,"Ocins database: a database of bug-busters from Bifidobacterium, Lactobacillus, and Enterococcus.","The ocins are antimicrobial polypeptides produced by probiotic microbes, such as Lactobacillus , Enterococcus , Streptococcus , Leuconostoc and Bifidobacterium . They are produced in response to stress and for the self-defense of the bacterium. It is indispensable to understand their mechanistic characteristics, structures, and functions, if the food industry is to reduce contamination levels and produce germfree foods. Databases of the ocins that are readily accessible to the food industry are scarce, but urgently required. Therefore, we established a very useful, unique, and a simple ocin database, which not merely provides information about ocins, but also directs their utilisation in the food industry. The database includes information about each ocin, its amino acid sequence, molecular weight, and isoelectric point. The database also possesses all the currently known ocin (probiotic origin only) sequences and structures, target organisms, and relevant to food industries (aqua culture, dairy and meat industries), which is hard to obtain in other databases. The database is free for public and accessed at http://ocins.cftri.com/ocins/.",2019-06-13 +30723726,Is idiopathic pulmonary fibrosis a cancer-like disease? Transcriptome analysis to fuel the debate. ,"Despite promising examples of anticancer drugs as potential treatment modalities for IPF, these transcriptome data argue against the general nature of anticancer drugs as anti-IPF drugs http://ow.ly/HjsV30nbcji.",2019-02-01 +27694206,The Global Genome Biodiversity Network (GGBN) Data Standard specification. ,"Genomic samples of non-model organisms are becoming increasingly important in a broad range of studies from developmental biology, biodiversity analyses, to conservation. Genomic sample definition, description, quality, voucher information and metadata all need to be digitized and disseminated across scientific communities. This information needs to be concise and consistent in today's ever-increasing bioinformatic era, for complementary data aggregators to easily map databases to one another. In order to facilitate exchange of information on genomic samples and their derived data, the Global Genome Biodiversity Network (GGBN) Data Standard is intended to provide a platform based on a documented agreement to promote the efficient sharing and usage of genomic sample material and associated specimen information in a consistent way. The new data standard presented here build upon existing standards commonly used within the community extending them with the capability to exchange data on tissue, environmental and DNA sample as well as sequences. The GGBN Data Standard will reveal and democratize the hidden contents of biodiversity biobanks, for the convenience of everyone in the wider biobanking community. Technical tools exist for data providers to easily map their databases to the standard.Database URL: http://terms.tdwg.org/wiki/GGBN_Data_Standard.",2016-10-02 +31092280,Systematic assessment of secondary bile acid metabolism in gut microbes reveals distinct metabolic capabilities in inflammatory bowel disease.,"

Background

The human gut microbiome performs important functions in human health and disease. A classic example for host-gut microbial co-metabolism is host biosynthesis of primary bile acids and their subsequent deconjugation and transformation by the gut microbiome. To understand these system-level host-microbe interactions, a mechanistic, multi-scale computational systems biology approach that integrates the different types of omic data is needed. Here, we use a systematic workflow to computationally model bile acid metabolism in gut microbes and microbial communities.

Results

Therefore, we first performed a comparative genomic analysis of bile acid deconjugation and biotransformation pathways in 693 human gut microbial genomes and expanded 232 curated genome-scale microbial metabolic reconstructions with the corresponding reactions (available at https://vmh.life ). We then predicted the bile acid biotransformation potential of each microbe and in combination with other microbes. We found that each microbe could produce maximally six of the 13 secondary bile acids in silico, while microbial pairs could produce up to 12 bile acids, suggesting bile acid biotransformation being a microbial community task. To investigate the metabolic potential of a given microbiome, publicly available metagenomics data from healthy Western individuals, as well as inflammatory bowel disease patients and healthy controls, were mapped onto the genomes of the reconstructed strains. We constructed for each individual a large-scale personalized microbial community model that takes into account strain-level abundances. Using flux balance analysis, we found considerable variation in the potential to deconjugate and transform primary bile acids between the gut microbiomes of healthy individuals. Moreover, the microbiomes of pediatric inflammatory bowel disease patients were significantly depleted in their bile acid production potential compared with that of controls. The contributions of each strain to overall bile acid production potential across individuals were found to be distinct between inflammatory bowel disease patients and controls. Finally, bottlenecks limiting secondary bile acid production potential were identified in each microbiome model.

Conclusions

This large-scale modeling approach provides a novel way of analyzing metagenomics data to accelerate our understanding of the metabolic interactions between the host and gut microbiomes in health and diseases states. Our models and tools are freely available to the scientific community.",2019-05-15 +28951529,AdmixPower: Statistical Power and Sample Size Estimation for Mapping Genetic Loci in Admixed Populations.,"Admixed populations result from recent admixture of two or more ancestral populations with divergent allele frequencies. The genome of each admixed individual is a mosaic of haplotypes inherited from the ancestral populations. Despite the substantial work to assess power and sample size requirements for association mapping in genetically homogeneous populations of European ancestry, power and sample size estimation methods for mapping genes in genetically heterogeneous admixed populations such as African Americans are lacking. Admixture mapping is a method that traces the ancestral origin of disease-susceptibility genetic loci in the admixed population. We developed AdmixPower, a freely available tool set based on the open-source R software, to perform power and sample size analysis for genetically heterogeneous admixed populations considering continuous or dichotomous outcomes with a case-only or case-control study design. AdmixPower can be used to compute the sample size required to achieve investigator-specified statistical power under several key parameters including ancestry odds ratio, genotype risk ratio, parental risk ratio, an underlying genetic risk model, trait type, and admixture model (hybrid-isolation or continuous gene flow model). We demonstrate that differences in the key parameters in the admixed population results in substantial differences in the sample size required to achieve adequate power in admixture mapping studies. Our tool provides a resource for researchers to develop a strategy to minimize cost and maximize the success of identifying disease-susceptibility loci in an admixed population. R code used in the sample size and power analysis is freely available from https://research.cchmc.org/mershalab/Tools.html.",2017-09-26 +31031931,"The role of glacial-interglacial climate change in shaping the genetic structure of eastern subterranean termites in the southern Appalachian Mountains, USA.","The eastern subterranean termite, Reticulitermes flavipes, currently inhabits previously glaciated regions of the northeastern U.S., as well as the unglaciated southern Appalachian Mountains and surrounding areas. We hypothesized that Pleistocene climatic fluctuations have influenced the distribution of R. flavipes, and thus the evolutionary history of the species. We estimated contemporary and historical geographic distributions of R. flavipes by constructing Species Distribution Models (SDM). We also inferred the evolutionary and demographic history of the species using mitochondrial (cytochrome oxidase I and II) and nuclear (endo-beta-1,4-glucanase) DNA sequence data. To do this, genetic populations were delineated using Bayesian spatial-genetic clustering, competing hypotheses about population divergence were assessed using approximate Bayesian computation (ABC), and changes in population size were estimated using Bayesian skyline plots. SDMs identified areas in the north with suitable habitat during the transition from the Last Interglacial to the Last Glacial Maximum, as well as an expanding distribution from the mid-Holocene to the present. Genetic analyses identified three geographically cohesive populations, corresponding with northern, central, and southern portions of the study region. Based on ABC analyses, divergence between the Northern and Southern populations was the oldest, estimated to have occurred 64.80 thousand years ago (kya), which corresponds with the timing of available habitat in the north. The Central and Northern populations diverged in the mid-Holocene, 8.63 kya, after which the Central population continued to expand. Accordingly, phylogeographic patterns of R. flavipes in the southern Appalachians appear to have been strongly influenced by glacial-interglacial climate change.

Open research badges

This article has been awarded Open Materials, Open Data Badges. All materials and data are publicly accessible via the Open Science Framework at https://doi.org/10.5061/dryad.5hr7f31.",2019-04-01 +29476645,Crystal structures of sampatrilat and sampatrilat-Asp in complex with human ACE - a molecular basis for domain selectivity.,"Angiotensin-1-converting enzyme (ACE) is a zinc metallopeptidase that consists of two homologous catalytic domains (known as nACE and cACE) with different substrate specificities. Based on kinetic studies it was previously reported that sampatrilat, a tight-binding inhibitor of ACE, Ki = 13.8 nm and 171.9 nm for cACE and nACE respectively [Sharma et al., Journal of Chemical Information and Modeling (2016), 56, 2486-2494], was 12.4-fold more selective for cACE. In addition, samAsp, in which an aspartate group replaces the sampatrilat lysine, was found to be a nonspecific and lower micromolar affinity inhibitor. Here, we report a detailed three-dimensional structural analysis of sampatrilat and samAsp binding to ACE using high-resolution crystal structures elucidated by X-ray crystallography, which provides a molecular basis for differences in inhibitor affinity and selectivity for nACE and cACE. The structures show that the specificity of sampatrilat can be explained by increased hydrophobic interactions and a H-bond from Glu403 of cACE with the lysine side chain of sampatrilat that are not observed in nACE. In addition, the structures clearly show a significantly greater number of hydrophilic and hydrophobic interactions with sampatrilat compared to samAsp in both cACE and nACE consistent with the difference in affinities. Our findings provide new experimental insights into ligand binding at the active site pockets that are important for the design of highly specific domain selective inhibitors of ACE.

Database

The atomic coordinates and structure factors for N- and C-domains of ACE bound to sampatrilat and sampatrilat-Asp complexes (6F9V, 6F9R, 6F9T and 6F9U respectively) have been deposited in the Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers University, New Brunswick, NJ (http://www.rcsb.org/).",2018-03-08 +31734254,Prediction of drug-target interaction based on protein features using undersampling and feature selection techniques with boosting.,"Accurate identification of drug-target interaction (DTI) is a crucial and challenging task in the drug discovery process, having enormous benefit to the patients and pharmaceutical company. The traditional wet-lab experiments of DTI is expensive, time-consuming, and labor-intensive. Therefore, many computational techniques have been established for this purpose; although a huge number of interactions are still undiscovered. Here, we present pdti-EssB, a new computational model for identification of DTI using protein sequence and drug molecular structure. More specifically, each drug molecule is transformed as the molecular substructure fingerprint. For a protein sequence, different descriptors are utilized to represent its evolutionary, sequence, and structural information. Besides, our proposed method uses data balancing techniques to handle the imbalance problem and applies a novel feature eliminator to extract the best optimal features for accurate prediction. In this paper, four classes of DTI benchmark datasets are used to construct a predictive model with XGBoost. Here, the auROC is utilized as an evaluation metric to compare the performance of pdti-EssB method with recent methods, applying five-fold cross-validation. Finally, the experimental results indicate that our proposed method is able to outperform other approaches in predicting DTI, and introduces new drug-target interaction samples based on prediction probability scores. pdti-EssB webserver is available online at http://pdtiessb-uestc.com/.",2019-11-15 +29126285,ReMap 2018: an updated atlas of regulatory regions from an integrative analysis of DNA-binding ChIP-seq experiments.,"With this latest release of ReMap (http://remap.cisreg.eu), we present a unique collection of regulatory regions in human, as a result of a large-scale integrative analysis of ChIP-seq experiments for hundreds of transcriptional regulators (TRs) such as transcription factors, transcriptional co-activators and chromatin regulators. In 2015, we introduced the ReMap database to capture the genome regulatory space by integrating public ChIP-seq datasets, covering 237 TRs across 13 million (M) peaks. In this release, we have extended this catalog to constitute a unique collection of regulatory regions. Specifically, we have collected, analyzed and retained after quality control a total of 2829 ChIP-seq datasets available from public sources, covering a total of 485 TRs with a catalog of 80M peaks. Additionally, the updated database includes new search features for TR names as well as aliases, including cell line names and the ability to navigate the data directly within genome browsers via public track hubs. Finally, full access to this catalog is available online together with a TR binding enrichment analysis tool. ReMap 2018 provides a significant update of the ReMap database, providing an in depth view of the complexity of the regulatory landscape in human.",2018-01-01 +28977416,IMOTA: an interactive multi-omics tissue atlas for the analysis of human miRNA-target interactions.,"Web repositories for almost all 'omics' types have been generated-detailing the repertoire of representatives across different tissues or cell types. A logical next step is the combination of these valuable sources. With IMOTA (interactive multi omics tissue atlas), we developed a database that includes 23 725 relations between miRNAs and 23 tissues, 310 932 relations between mRNAs and the same tissues as well as 63 043 relations between proteins and the 23 tissues in Homo sapiens. IMOTA also contains data on tissue-specific interactions, e.g. information on 331 413 miRNAs and target gene pairs that are jointly expressed in the considered tissues. By using intuitive filter and visualization techniques, it is with minimal effort possible to answer various questions. These include rather general questions but also requests specific for genes, miRNAs or proteins. An example for a general task could be 'identify all miRNAs, genes and proteins in the lung that are highly expressed and where experimental evidence proves that the miRNAs target the genes'. An example for a specific request for a gene and a miRNA could for example be 'In which tissues is miR-34c and its target gene BCL2 expressed?'. The IMOTA repository is freely available online at https://ccb-web.cs.uni-saarland.de/imota/.",2018-01-01 +31189899,High-throughput Discovery of Topologically Non-trivial Materials using Spin-orbit Spillage.,"We present a novel methodology to identify topologically non-trivial materials based on band inversion induced by spin-orbit coupling (SOC) effect. Specifically, we compare the density functional theory (DFT) based wavefunctions with and without spin-orbit coupling and compute the 'spin-orbit-spillage' as a measure of band-inversion. Due to its ease of calculation, without any need for symmetry analysis or dense k-point interpolation, the spillage is an excellent tool for identifying topologically non-trivial materials. Out of 30000 materials available in the JARVIS-DFT database, we applied this methodology to more than 4835 non-magnetic materials consisting of heavy atoms and low bandgaps. We found 1868 candidate materials with high-spillage (using 0.5 as a threshold). We validated our methodology by carrying out conventional Wannier-interpolation calculations for 289 candidate materials. We demonstrate that in addition to Z2 topological insulators, this screening method successfully identified many semimetals and topological crystalline insulators. Importantly, our approach is applicable to the investigation of disordered or distorted as well as magnetic materials, because it is not based on symmetry considerations. We discuss some individual example materials, as well as trends throughout our dataset, which is available at the websites: https://www.ctcms.nist.gov/~knc6/JVASP.html and https://jarvis.nist.gov/ .",2019-06-12 +29140464,HOCOMOCO: towards a complete collection of transcription factor binding models for human and mouse via large-scale ChIP-Seq analysis.,"We present a major update of the HOCOMOCO collection that consists of patterns describing DNA binding specificities for human and mouse transcription factors. In this release, we profited from a nearly doubled volume of published in vivo experiments on transcription factor (TF) binding to expand the repertoire of binding models, replace low-quality models previously based on in vitro data only and cover more than a hundred TFs with previously unknown binding specificities. This was achieved by systematic motif discovery from more than five thousand ChIP-Seq experiments uniformly processed within the BioUML framework with several ChIP-Seq peak calling tools and aggregated in the GTRD database. HOCOMOCO v11 contains binding models for 453 mouse and 680 human transcription factors and includes 1302 mononucleotide and 576 dinucleotide position weight matrices, which describe primary binding preferences of each transcription factor and reliable alternative binding specificities. An interactive interface and bulk downloads are available on the web: http://hocomoco.autosome.ru and http://www.cbrc.kaust.edu.sa/hocomoco11. In this release, we complement HOCOMOCO by MoLoTool (Motif Location Toolbox, http://molotool.autosome.ru) that applies HOCOMOCO models for visualization of binding sites in short DNA sequences.",2018-01-01 +30654742,Peptimapper: proteogenomics workflow for the expert annotation of eukaryotic genomes.,"

Background

Accurate structural annotation of genomes is still a challenge, despite the progress made over the past decade. The prediction of gene structure remains difficult, especially for eukaryotic species, and is often erroneous and incomplete. We used a proteogenomics strategy, taking advantage of the combination of proteomics datasets and bioinformatics tools, to identify novel protein coding-genes and splice isoforms, assign correct start sites, and validate predicted exons and genes.

Results

Our proteogenomics workflow, Peptimapper, was applied to the genome annotation of Ectocarpus sp., a key reference genome for both the brown algal lineage and stramenopiles. We generated proteomics data from various life cycle stages of Ectocarpus sp. strains and sub-cellular fractions using a shotgun approach. First, we directly generated peptide sequence tags (PSTs) from the proteomics data. Second, we mapped PSTs onto the translated genomic sequence. Closely located hits (i.e., PSTs locations on the genome) were then clustered to detect potential coding regions based on parameters optimized for the organism. Third, we evaluated each cluster and compared it to gene predictions from existing conventional genome annotation approaches. Finally, we integrated cluster locations into GFF files to use a genome viewer. We identified two potential novel genes, a ribosomal protein L22 and an aryl sulfotransferase and corrected the gene structure of a dihydrolipoamide acetyltransferase. We experimentally validated the results by RT-PCR and using transcriptomics data.

Conclusions

Peptimapper is a complementary tool for the expert annotation of genomes. It is suitable for any organism and is distributed through a Docker image available on two public bioinformatics docker repositories: Docker Hub and BioShaDock. This workflow is also accessible through the Galaxy framework and for use by non-computer scientists at https://galaxy.protim.eu . Data are available via ProteomeXchange under identifier PXD010618.",2019-01-17 +31561642,A Portal to Visualize Transcriptome Profiles in Mouse Models of Neurological Disorders. ,"Target nomination for drug development has been a major challenge in the path to finding a cure for several neurological disorders. Comprehensive transcriptome profiles have revealed brain gene expression changes associated with many neurological disorders, and the functional validation of these changes is a critical next step. Model organisms are a proven approach for the elucidation of disease mechanisms, including screening of gene candidates as therapeutic targets. Frequently, multiple models exist for a given disease, creating a challenge to select the optimal model for validation and functional follow-up. To help in nominating the best mouse models for studying neurological diseases, we developed a web portal to visualize mouse transcriptomic data related to neurological disorders: http://mmad.nrihub.org. Users can examine gene expression changes across mouse model studies to help select the optimal mouse model for further investigation. The portal provides access to mouse studies related to Alzheimer's diseases (AD), Parkinson's disease (PD), Huntington's disease (HD), Amyotrophic Lateral Sclerosis (ALS), Spinocerebellar ataxia (SCA), and models related to aging.",2019-09-26 +30535313,Evaluation of methods for generative modeling of cell and nuclear shape.,"

Motivation

Cell shape provides both geometry for, and a reflection of, cell function. Numerous methods for describing and modeling cell shape have been described, but previous evaluation of these methods in terms of the accuracy of generative models has been limited.

Results

Here we compare traditional methods and deep autoencoders to build generative models for cell shapes in terms of the accuracy with which shapes can be reconstructed from models. We evaluated the methods on different collections of 2D and 3D cell images, and found that none of the methods gave accurate reconstructions using low dimensional encodings. As expected, much higher accuracies were observed using high dimensional encodings, with outline-based methods significantly outperforming image-based autoencoders. The latter tended to encode all cells as having smooth shapes, even for high dimensions. For complex 3D cell shapes, we developed a significant improvement of a method based on the spherical harmonic transform that performs significantly better than other methods. We obtained similar results for the joint modeling of cell and nuclear shape. Finally, we evaluated the modeling of shape dynamics by interpolation in the shape space. We found that our modified method provided lower deformation energies along linear interpolation paths than other methods. This allows practical shape evolution in high dimensional shape spaces. We conclude that our improved spherical harmonic based methods are preferable for cell and nuclear shape modeling, providing better representations, higher computational efficiency and requiring fewer training images than deep learning methods.

Availability and implementation

All software and data is available at http://murphylab.cbd.cmu.edu/software.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +29442412,Effect of chewing gum on gastrointestinal function after gynecological surgery: A systematic literature review and meta-analysis.,"

Aim

Recently, several randomized controlled trials (RCT) reported the effect of chewing gum on gastrointestinal function after gynecological surgery; however, these results are inconsistent. The aim of this study was to systematically analyze the effect of chewing gum on postoperative gastrointestinal function and complications in women undergoing gynecological surgery.

Methods

Pumbed, Embase, Cochrane Library, Web of Science, Chinese Wanfang databases, China National Knowledge Infrastructure and http://clinicaltrials.gov were searched from inceptions to April 30, 2017. Studies including chewing gum's impact on postoperative gastrointestinal function or complications were evaluated. Two authors individually performed data extraction from 10 RCT. Weighted mean difference (WMD) and odds ratio (OR) were used.

Results

Contrasting the group of standard postoperative care, the gum chewing group had a lower duration from the end of operation to first aerofluxus (WMD -7.55, 95%CI: -10.99 to -4.12); first intestinal sounds (WMD -6.20, 95%CI: -8.14 to -4.27); first defecation (WMD -12.24, 95%CI: -18.47 to -6.01); hospitalization duration (WMD -0.72. 95%CI -1.19 to -0.25); and lower incidence of nausea (OR 0.45, 95%CI: 0.29 to 0.69), vomiting (OR 0.38, 95%CI: 0.22 to 0.68) and postoperative ileus (OR 0.25, 95%CI: 0.14 to 0.44).

Conclusion

Chewing gum is an effective measure to ameliorate gastrointestinal function and decrease complications after gynecological surgery.",2018-02-14 +28456986,"Using the MEROPS Database for Investigation of Lysosomal Peptidases, Their Inhibitors, and Substrates.","This chapter describes how to retrieve data on lysosomal peptidases from the MEROPS database for proteolytic enzymes, their substrates and inhibitors ( http://merops.sanger.ac.uk ). Features described in this chapter include the summary page, pages for structure, interactions with inhibitors, substrates, literature and involvement in physiological pathways, and how to download data from the MEROPS FTP site. The lysosomal peptidase legumain is used as an example.",2017-01-01 +25982285,Association of chromosome 19 to lung cancer genotypes and phenotypes.,"The Chromosome 19 Consortium, a part of the Chromosome-Centric Human Proteome Project (C-HPP, http://www.C-HPP.org ), is tasked with the understanding chromosome 19 functions at the gene and protein levels, as well as their roles in lung oncogenesis. Comparative genomic hybridization (CGH) studies revealed chromosome aberration in lung cancer subtypes, including ADC, SCC, LCC, and SCLC. The most common abnormality is 19p loss and 19q gain. Sixty-four aberrant genes identified in previous genomic studies and their encoded protein functions were further validated in the neXtProt database ( http://www.nextprot.org/ ). Among those, the loss of tumor suppressor genes STK11, MUM1, KISS1R (19p13.3), and BRG1 (19p13.13) is associated with lung oncogenesis or remote metastasis. Gene aberrations include translocation t(15, 19) (q13, p13.1) fusion oncogene BRD4-NUT, DNA repair genes (ERCC1, ERCC2, XRCC1), TGFβ1 pathway activation genes (TGFB1, LTBP4), Dyrk1B, and potential oncogenesis protector genes such as NFkB pathway inhibition genes (NFKBIB, PPP1R13L) and EGLN2. In conclusion, neXtProt is an effective resource for the validation of gene aberrations identified in genomic studies. It promises to enhance our understanding of lung cancer oncogenesis.",2015-06-01 +30462147,Application of machine learning techniques to tuberculosis drug resistance analysis.,"

Motivation

Timely identification of Mycobacterium tuberculosis (MTB) resistance to existing drugs is vital to decrease mortality and prevent the amplification of existing antibiotic resistance. Machine learning methods have been widely applied for timely predicting resistance of MTB given a specific drug and identifying resistance markers. However, they have been not validated on a large cohort of MTB samples from multi-centers across the world in terms of resistance prediction and resistance marker identification. Several machine learning classifiers and linear dimension reduction techniques were developed and compared for a cohort of 13 402 isolates collected from 16 countries across 6 continents and tested 11 drugs.

Results

Compared to conventional molecular diagnostic test, area under curve of the best machine learning classifier increased for all drugs especially by 23.11%, 15.22% and 10.14% for pyrazinamide, ciprofloxacin and ofloxacin, respectively (P < 0.01). Logistic regression and gradient tree boosting found to perform better than other techniques. Moreover, logistic regression/gradient tree boosting with a sparse principal component analysis/non-negative matrix factorization step compared with the classifier alone enhanced the best performance in terms of F1-score by 12.54%, 4.61%, 7.45% and 9.58% for amikacin, moxifloxacin, ofloxacin and capreomycin, respectively, as well increasing area under curve for amikacin and capreomycin. Results provided a comprehensive comparison of various techniques and confirmed the application of machine learning for better prediction of the large diverse tuberculosis data. Furthermore, mutation ranking showed the possibility of finding new resistance/susceptible markers.

Availability and implementation

The source code can be found at http://www.robots.ox.ac.uk/ davidc/code.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +28603918,"CFTR-France, a national relational patient database for sharing genetic and phenotypic data associated with rare CFTR variants.","Most of the 2,000 variants identified in the CFTR (cystic fibrosis transmembrane regulator) gene are rare or private. Their interpretation is hampered by the lack of available data and resources, making patient care and genetic counseling challenging. We developed a patient-based database dedicated to the annotations of rare CFTR variants in the context of their cis- and trans-allelic combinations. Based on almost 30 years of experience of CFTR testing, CFTR-France (https://cftr.iurc.montp.inserm.fr/cftr) currently compiles 16,819 variant records from 4,615 individuals with cystic fibrosis (CF) or CFTR-RD (related disorders), fetuses with ultrasound bowel anomalies, newborns awaiting clinical diagnosis, and asymptomatic compound heterozygotes. For each of the 736 different variants reported in the database, patient characteristics and genetic information (other variations in cis or in trans) have been thoroughly checked by a dedicated curator. Combining updated clinical, epidemiological, in silico, or in vitro functional data helps to the interpretation of unclassified and the reassessment of misclassified variants. This comprehensive CFTR database is now an invaluable tool for diagnostic laboratories gathering information on rare variants, especially in the context of genetic counseling, prenatal and preimplantation genetic diagnosis. CFTR-France is thus highly complementary to the international database CFTR2 focused so far on the most common CF-causing alleles.",2017-06-28 +31673075,Prediction and Analysis of Skin Cancer Progression using Genomics Profiles of Patients.,"The metastatic Skin Cutaneous Melanoma (SKCM) has been associated with diminished survival rates and high mortality rates worldwide. Thus, segregating metastatic melanoma from the primary tumors is crucial to employ an optimal therapeutic strategy for the prolonged survival of patients. The SKCM mRNA, miRNA and methylation data of TCGA is comprehensively analysed to recognize key genomic features that can segregate metastatic and primary tumors. Further, machine learning models have been developed using selected features to distinguish the same. The Support Vector Classification with Weight (SVC-W) model developed using the expression of 17 mRNAs achieved Area under the Receiver Operating Characteristic (AUROC) curve of 0.95 and an accuracy of 89.47% on an independent validation dataset. This study reveals the genes C7, MMP3, KRT14, LOC642587, CASP7, S100A7 and miRNAs hsa-mir-205 and hsa-mir-203b as the key genomic features that may substantially contribute to the oncogenesis of melanoma. Our study also proposes genes ESM1, NFATC3, C7orf4, CDK14, ZNF827, and ZSWIM7 as novel putative markers for cutaneous melanoma metastasis. The major prediction models and analysis modules to predict metastatic and primary tumor samples of SKCM are available from a webserver, CancerSPP ( http://webs.iiitd.edu.in/raghava/cancerspp/ ).",2019-10-31 +31177403,Genome-wide cis-regulatory signatures for modulation of agronomic traits as exemplified by drought yield index (DYI) in chickpea.,"Developing functional molecular tags from the cis-regulatory sequence components of genes is vital for their deployment in efficient genetic dissection of complex quantitative traits in crop plants including chickpea. The current study identified 431,194 conserved non-coding SNP (CNSNP) from the cis-regulatory element regions of genes which were annotated on a chickpea genome. These genome-wide CNSNP marker resources are made publicly accessible through a user-friendly web-database ( http://www.cnsnpcicarbase.com ). The CNSNP-based quantitative trait loci (QTL) and expression QTL (eQTL) mapping and genome-wide association study (GWAS) were further integrated with global gene expression landscapes, molecular haplotyping, and DNA-protein interaction study in the association panel and recombinant inbred lines (RIL) mapping population to decode complex genetic architecture of one of the vital seed yield trait under drought stress, drought yield index (DYI), in chickpea. This delineated two constituted natural haplotypes and alleles from a histone H3 protein-coding gene and its transcriptional regulator NAC transcription factor (TF) harboring the major QTLs and trans-acting eQTL governing DYI in chickpea. The effect of CNSNPs in TF-binding cis-element of a histone H3 gene in altering the binding affinity and transcriptional activity of NAC TF based on chromatin immunoprecipitation-quantitative PCR (ChIP-qPCR) assay was evident. The CNSNP-led promising molecular tags scanned will essentially have functional significance to decode transcriptional gene regulatory function and thus can drive translational genomic analysis in chickpea.",2019-06-08 +28125221,3D-e-Chem-VM: Structural Cheminformatics Research Infrastructure in a Freely Available Virtual Machine.,"3D-e-Chem-VM is an open source, freely available Virtual Machine ( http://3d-e-chem.github.io/3D-e-Chem-VM/ ) that integrates cheminformatics and bioinformatics tools for the analysis of protein-ligand interaction data. 3D-e-Chem-VM consists of software libraries, and database and workflow tools that can analyze and combine small molecule and protein structural information in a graphical programming environment. New chemical and biological data analytics tools and workflows have been developed for the efficient exploitation of structural and pharmacological protein-ligand interaction data from proteomewide databases (e.g., ChEMBLdb and PDB), as well as customized information systems focused on, e.g., G protein-coupled receptors (GPCRdb) and protein kinases (KLIFS). The integrated structural cheminformatics research infrastructure compiled in the 3D-e-Chem-VM enables the design of new approaches in virtual ligand screening (Chemdb4VS), ligand-based metabolism prediction (SyGMa), and structure-based protein binding site comparison and bioisosteric replacement for ligand design (KRIPOdb).",2017-02-14 +32074461,"Airway Hyperresponsiveness, Inflammation, and Pulmonary Emphysema in Rodent Models Designed to Mimic Exposure to Fuel Oil-Derived Volatile Organic Compounds Encountered during an Experimental Oil Spill.","BACKGROUND:Fuel oil-derived volatile organic compounds (VOCs) inhalation is associated with accidental marine spills. After the Prestige petroleum tanker sank off northern Spain in 2002 and the Deepwater Horizon oil rig catastrophe in 2009, subjects involved in environmental decontamination showed signs of ongoing or residual lung disease up to 5 y after the exposure. OBJECTIVES:We aimed at investigating mechanisms driving persistent respiratory disease by developing an animal model of inhalational exposure to fuel oil-derived VOCs. METHODS:Female Wistar and Brown Norway (BN) rats and C57BL mice were exposed to VOCs produced from fuel oil mimicking the Prestige spill. Exposed animals inhaled the VOCs 2 h daily, 5 d per week, for 3 wk. Airway responsiveness to methacholine (MCh) was assessed, and bronchoalveolar lavage (BAL) and lung tissues were analyzed after the exposure and following a 2-wk washout. RESULTS:Consistent with data from human studies, both strains of rats that inhaled fuel oil-derived VOCs developed airway hyperresponsiveness that persisted after the washout period, in the absence of detectable inflammation in any lung compartment. Histopathology and quantitative morphology revealed the development of peripherally distributed pulmonary emphysema, which persisted after the washout period, associated with increased alveolar septal cell apoptosis, microvascular endothelial damage of the lung parenchyma, and inhibited expression of vascular endothelial growth factor (VEGF). DISCUSSION:In this rat model, fuel oil VOCs inhalation elicited alveolar septal cell apoptosis, likely due to DNA damage. In turn, the development of a peculiar pulmonary emphysema pattern altered lung mechanics and caused persistent noninflammatory airway hyperresponsiveness. Such findings suggest to us that humans might also respond to VOCs through physiopathological pathways different from those chiefly involved in typical cigarette smoke-driven emphysema in chronic obstructive pulmonary disease (COPD). If so, this study could form the basis for a novel disease mechanism for lasting respiratory disease following inhalational exposure to catastrophic fuel oil spills. https://doi.org/10.1289/EHP4178.",2020-02-12 +30016975,GIVE: portable genome browsers for personal websites.,"Growing popularity and diversity of genomic data demand portable and versatile genome browsers. Here, we present an open source programming library called GIVE that facilitates the creation of personalized genome browsers without requiring a system administrator. By inserting HTML tags, one can add to a personal webpage interactive visualization of multiple types of genomics data, including genome annotation, ""linear"" quantitative data, and genome interaction data. GIVE includes a graphical interface called HUG (HTML Universal Generator) that automatically generates HTML code for displaying user chosen data, which can be copy-pasted into user's personal website or saved and shared with collaborators. GIVE is available at: https://www.givengine.org/ .",2018-07-18 +,Development of a core set of SNP markers for the identification of upland cotton cultivars in China,"Considering the advantages of single nucleotide polymorphisms (SNP) in genotyping and variety identification, the first set public SNP markers at Cotton Marker Database (http://www.cottonmarker.org/) were validated and screened across standard varieties of cotton distinctness, uniformity and stability (DUS) test, aiming to obtain an appropriate set of core SNP markers suitable for upland cotton cultivars in China. A total of 399 out of 1 005 SNPs from 270 loci including 170 insertions-deletions (InDels) were evaluated for their polymorphisms among 30 standard varieties using Sanger sequencing. As a result, 147 loci were sequenced successfully, 377 SNPs and 49 InDels markers were obtained. Among the 377 SNP markers, 333 markers (88.3%) were polymorphic between Gossypium hirsutum and G. barbadense, while 164 markers (43.5%) were polymorphic within upland cotton. As for InDel markers, the polymorphic rate is relatively lower than that of SNP both between species and within species. The homozygous DNA locus ratio of 121 SNPs was higher than 86.2% while that of other 43 SNPs was less than 70%. Only 64 SNPs displayed completely homozygous genotypes among all of the detected upland cotton varieties with 100% homozygous DNA locus ratio. At last, a set of 23 pairs of core SNPs were achieved in view of avoidance of linkage, with polymorphism information content (PIC) values varying from 0.21 to 0.38 with an average of 0.28. Genotype characteristics and genetic diversity were analyzed based on the set of core markers, while 40 pairs of core simple-sequence repeats (SSR) primers comprised of 10 sets of four multiplex PCR combinations were also used for analysis based on fluorescence detection system. Comparison results indicated that the genetic diversity level was almost equal, while various varieties were significantly different from each other. Genetic relationship revealed by SSR markers is related to geographic source to a certain extent. Meanwhile clustering results analyzed by SNP markers are more consistent with kinship, which demonstrated that the screen strategy for core SNP marker is effective.",2016-05-01 +29933373,"ODM Data Analysis-A tool for the automatic validation, monitoring and generation of generic descriptive statistics of patient data.","

Introduction

A required step for presenting results of clinical studies is the declaration of participants demographic and baseline characteristics as claimed by the FDAAA 801. The common workflow to accomplish this task is to export the clinical data from the used electronic data capture system and import it into statistical software like SAS software or IBM SPSS. This software requires trained users, who have to implement the analysis individually for each item. These expenditures may become an obstacle for small studies. Objective of this work is to design, implement and evaluate an open source application, called ODM Data Analysis, for the semi-automatic analysis of clinical study data.

Methods

The system requires clinical data in the CDISC Operational Data Model format. After uploading the file, its syntax and data type conformity of the collected data is validated. The completeness of the study data is determined and basic statistics, including illustrative charts for each item, are generated. Datasets from four clinical studies have been used to evaluate the application's performance and functionality.

Results

The system is implemented as an open source web application (available at https://odmanalysis.uni-muenster.de) and also provided as Docker image which enables an easy distribution and installation on local systems. Study data is only stored in the application as long as the calculations are performed which is compliant with data protection endeavors. Analysis times are below half an hour, even for larger studies with over 6000 subjects.

Discussion

Medical experts have ensured the usefulness of this application to grant an overview of their collected study data for monitoring purposes and to generate descriptive statistics without further user interaction. The semi-automatic analysis has its limitations and cannot replace the complex analysis of statisticians, but it can be used as a starting point for their examination and reporting.",2018-06-22 +25379008,Transcriptome profiling and comparative analysis of Panax ginseng adventitious roots.,"

Background

Panax ginseng Meyer is a traditional medicinal plant famous for its strong therapeutic effects and serves as an important herbal medicine. To understand and manipulate genes involved in secondary metabolic pathways including ginsenosides, transcriptome profiling of P. ginseng is essential.

Methods

RNA-seq analysis of adventitious roots of two P. ginseng cultivars, Chunpoong (CP) and Cheongsun (CS), was performed using the Illumina HiSeq platform. After transcripts were assembled, expression profiling was performed.

Results

Assemblies were generated from ∼85 million and ∼77 million high-quality reads from CP and CS cultivars, respectively. A total of 35,527 and 27,716 transcripts were obtained from the CP and CS assemblies, respectively. Annotation of the transcriptomes showed that approximately 90% of the transcripts had significant matches in public databases. We identified several candidate genes involved in ginsenoside biosynthesis. In addition, a large number of transcripts (17%) with different gene ontology designations were uniquely detected in adventitious roots compared to normal ginseng roots.

Conclusion

This study will provide a comprehensive insight into the transcriptome of ginseng adventitious roots, and a way for successful transcriptome analysis and profiling of resource plants with less genomic information. The transcriptome profiling data generated in this study are available in our newly created adventitious root transcriptome database (http://im-crop.snu.ac.kr/transdb/index.php) for public use.",2014-06-05 +30457872,Using Data Mining To Search for Perovskite Materials with Higher Specific Surface Area.,"The specific surface area (SSA) of ABO3-type perovskite is one of the important properties associated with photocatalytic ability. In this work, data mining methods were used to explore the relationship between the SSA (in the range of 1-60 m2 g-1) of perovskite and its features, including chemical compositions and technical parameters. The genetic algorithm-support vector regression method was used to screen the main features for modeling. The correlation coefficient ( R) between the predicted and experimental SSAs reached as high as 0.986 for the training data set and 0.935 for leave-one-out cross-validation. ABO3-type perovskites with higher SSA can be screened out using the Online Computation Platform for Materials Data Mining (OCPMDM) developed in our laboratory. Further, an online web server has been developed to share the model for the prediction of SSA of ABO3-type perovskite, which is accessible at http://118.25.4.79/material_api/csk856q0fulhhhwv .",2018-12-04 +30713089,Evidence Synthesis to Accelerate and Improve the Evaluation of Therapies for Metastatic Hormone-sensitive Prostate Cancer.,"There are many ongoing randomised trials of promising therapies for metastatic hormone-sensitive prostate cancer (mHSPC), but standard systematic reviews may not synthesise these in a timely or reliable way. We demonstrate how a novel approach to evidence synthesis is being used to speed up and improve treatment evaluations for mHSPC. This more prospective, dynamic, and collaborative approach to systematic reviews of both trial results and individual participant data (IPD) is helping in establishing quickly and reliably which treatments are most effective and for which men. However, mHSPC is a complex disease and trials can be lengthy. Thus, parallel efforts will synthesise further IPD to identify early surrogate endpoints for overall survival and prognostic factors, to reduce the duration and improve the design of future trials. The STOPCAP M1 repository of IPD will be made available to other researchers for tackling new questions that might arise. The associated global, collaborative forum will aid strategic and harmonised development of the next generation of mHSPC trials (STOPCAP M1; http://www.stopcapm1.org). PATIENT SUMMARY: We report how a worldwide research effort will review results and anonymised data from advanced prostate cancer trials in new and different ways. We will work out, as quickly as possible, which advanced prostate cancer treatments are best and for which men. We will also find which measures of prostate cancer control and which cancer and patient characteristics can be used to shorten and improve trials of newer treatments. Finally, we describe how the data will help answer new questions about advanced prostate cancer and its treatments.",2019-02-01 +24453546,"A Tenebrionid beetle's dataset (Coleoptera, Tenebrionidae) from Peninsula Valdés (Chubut, Argentina).","The Natural Protected Area Peninsula Valdés, located in Northeastern Patagonia, is one of the largest conservation units of arid lands in Argentina. Although this area has been in the UNESCO World Heritage List since 1999, it has been continually exposed to sheep grazing and cattle farming for more than a century which have had a negative impact on the local environment. Our aim is to describe the first dataset of tenebrionid beetle species living in Peninsula Valdés and their relationship to sheep grazing. The dataset contains 118 records on 11 species and 198 adult individuals collected. Beetles were collected using pitfall traps in the two major environmental units of Peninsula Valdés, taking into account grazing intensities over a three year time frame from 2005-2007. The Data quality was enhanced following the best practices suggested in the literature during the digitalization and geo-referencing processes. Moreover, identification of specimens and current accurate spelling of scientific names were reviewed. Finally, post-validation processes using DarwinTest software were applied. Specimens have been deposited at Entomological Collection of the Centro Nacional Patagónico (CENPAT-CONICET). The dataset is part of the database of this collection and has been published on the internet through GBIF Integrated Publishing Toolkit (IPT) (http://data.gbif.org/datasets/resource/14669/). Furthermore, it is the first dataset for tenebrionid beetles of arid Patagonia available in GBIF database, and it is the first one based on a previously designed and standardized sampling to assess the interaction between these beetles and grazing in the area. The main purposes of this dataset are to ensure accessibility to data associated with Tenebrionidae specimens from Peninsula Valdés (Chubut, Argentina), also to contribute to GBIF with primary data about Patagonian tenebrionids and finally, to promote the Entomological Collection of Centro Nacional Patagónico (CENPAT-CONICET) and its associated biodiversity data. For these reasons, we believe that this information will certainly be useful for future faunistic, ecological, conservational and biogeographical studies.",2013-12-18 +26732614,"A transcriptome-wide, organ-specific regulatory map of Dendrobium officinale, an important traditional Chinese orchid herb.","Dendrobium officinale is an important traditional Chinese herb. Here, we did a transcriptome-wide, organ-specific study on this valuable plant by combining RNA, small RNA (sRNA) and degradome sequencing. RNA sequencing of four organs (flower, root, leaf and stem) of Dendrobium officinale enabled us to obtain 536,558 assembled transcripts, from which 2,645, 256, 42 and 54 were identified to be highly expressed in the four organs respectively. Based on sRNA sequencing, 2,038, 2, 21 and 24 sRNAs were identified to be specifically accumulated in the four organs respectively. A total of 1,047 mature microRNA (miRNA) candidates were detected. Based on secondary structure predictions and sequencing, tens of potential miRNA precursors were identified from the assembled transcripts. Interestingly, phase-distributed sRNAs with degradome-based processing evidences were discovered on the long-stem structures of two precursors. Target identification was performed for the 1,047 miRNA candidates, resulting in the discovery of 1,257 miRNA--target pairs. Finally, some biological meaningful subnetworks involving hormone signaling, development, secondary metabolism and Argonaute 1-related regulation were established. All of the sequencing data sets are available at NCBI Sequence Read Archive (http://www.ncbi.nlm.nih.gov/sra/). Summarily, our study provides a valuable resource for the in-depth molecular and functional studies on this important Chinese orchid herb.",2016-01-06 +31169974,Medicinal Chemistry Aware Database GDBMedChem.,"The generated database GDB17 enumerates 166.4 billion possible molecules up to 17 atoms of C, N, O, S and halogens following simple chemical stability and synthetic feasibility rules, however medicinal chemistry criteria are not taken into account. Here we applied rules inspired by medicinal chemistry to exclude problematic functional groups and complex molecules from GDB17, and sampled the resulting subset uniformly across molecular size, stereochemistry and polarity to form GDBMedChem as a compact collection of 10 million small molecules. This collection has reduced complexity and better synthetic accessibility than the entire GDB17 but retains higher sp3 -carbon fraction and natural product likeness scores compared to known drugs. GDBMedChem molecules are more diverse and very different from known molecules in terms of substructures and represent an unprecedented source of diversity for drug design. GDBMedChem is available for 3D-visualization, similarity searching and for download at http://gdb.unibe.ch.",2019-06-06 +31743831,Taxonomy dimension reduction for colorectal cancer prediction.,"A growing number of people suffer from colorectal cancer, which is one of the most common cancers. It is essential to diagnose and treat the cancer as early as possible. The disease may change the microorganism communities in the gut, and it could be an efficient method to employ gut microorganisms to predict colorectal cancer. In this study, we selected operational taxonomic units that include several kinds of microorganisms to predict colorectal cancer. To find the most important microorganisms and obtain the best prediction performance, we explore effective feature selection methods. We employ three main steps. First, we use a single method to reduce features. Next, to reduce the number of features, we integrate the dimension reduction methods correlation-based feature selection and maximum relevance-maximum distance (MRMD 1.0 and MRMD 2.0). Then, we selected the important features according to the taxonomy files. In this study, we created training and test sets to obtain a more objective evaluation. Random forest, naïve Bayes, and decision tree classifiers were evaluated. The results show that the methods proposed in this study are better than hierarchical feature engineering. The proposed method, which combines correlation-based feature selection with MRMD 2.0, performed the best on the CRC2 dataset. The dataset and methods can be found in http://lab.malab.cn/data/microdata/data.html.",2019-11-09 +30738671,Implementation of Bayesian methods to identify SNP and haplotype regions with transmission ratio distortion across the whole genome: TRDscan v.1.0.,"Realized deviations from the expected Mendelian inheritance of alleles from heterozygous parents have been previously reported in a broad range of organisms (i.e., transmission ratio distortion; TRD). Various biological mechanisms affecting gametes, embryos, fetuses, or even postnatal offspring can produce patterns of TRD. However, knowledge about its prevalence and potential causes in livestock species is still scarce. Specific Bayesian models have been recently developed for the analyses of TRD for biallelic loci, which accommodated a wide range of population structures, enabling TRD investigation in livestock populations. The parameterization of these models is flexible and allows the study of overall (parent-unspecific) TRD and sire- and dam-specific TRD. This research aimed at deriving Bayesian models for fitting TRD on the basis of haplotypes, testing the models for both haplotype- and SNP-based methods in simulated data and actual Holstein genotypes, and developing a specific software for TRD analyses. Results obtained on simulated data sets showed that the statistical power of the analysis increased with sample size of trios (n), proportion of heterozygous parents, and the magnitude of the TRD. On the other hand, the statistical power to detect TRD decreased with the number of alleles at each loci. Bayesian analyses showed a strong Pearson correlation coefficient (≥0.97) between simulated and estimated TRD that reached the significance level of Bayes factor ≥10 for both single-marker and haplotype analyses when n ≥ 25. Moreover, the accuracy in terms of the mean absolute error decreased with the increase of the sample size and increased with the number of alleles at each loci. Using real data (55,732 genotypes of Holstein trios), SNP- and haplotype-based distortions were detected with overall TRD, sire-TRD, or dam-TRD, showing different magnitudes of TRD and statistical relevance. Additionally, the haplotype-based method showed more ability to capture TRD compared with individual SNP. To discard possible random TRD in real data, an approximate empirical null distribution of TRD was developed. The program TRDscan v.1.0 was written in Fortran 2008 language and provides a powerful statistical tool to scan for TRD regions across the whole genome. This developed program is freely available at http://www.casellas.info/files/TRDscan.zip.",2019-02-07 +32716615,NTP Developmental and Reproductive Toxicity Technical Report on the Prenatal Development Studies of Tris(chloropropyl) Phosphate (CASRN 13674-84-5) in Sprague Dawley (Hsd:Sprague Dawley® SD®) Rats (Gavage Studies): DART Report 01,"

Unlabelled

Tris(chloropropyl) phosphate (TCPP) is used as a flame retardant in textiles, furniture (flexible polyurethane foam), and other related products. In addition, it is manufactured for use in construction materials (rigid polyurethane foam), electronic products, paints, coatings, and adhesives. Several flame retardants have been removed from products in commerce because of toxicity concerns, and TCPP has been considered as a replacement flame retardant for use in these products. Because of concerns for increased use, and thus increased human exposure, the Consumer Product Safety Commission nominated TCPP for toxicological testing by the National Toxicology Program. Additional information on the evaluation of the potential toxicity of TCPP is available at the Program’s website (https://ntp.niehs.nih.gov/testing/status/agents/ts-m20263.html). The purpose of this report is to summarize and discuss TCPP effects on prenatal development. In these studies, time-mated female Sprague Dawley (Hsd:Sprague Dawley® SD®) rats received TCPP (95.7–97% pure) in 0.5% methylcellulose by gavage from implantation on gestation day (GD) 6 to the day before expected parturition (GD 20). Evidence of TCPP-related maternal and fetal toxicity was examined in the dose range-finding study followed by the standard prenatal developmental toxicity study.

Dose range-finding prenatal developmental toxicity study

Groups of 11 time-mated female rats were administered 0, 300, 650, or 1,000 mg TCPP/kg body weight per day (mg/kg/day) in 0.5% aqueous methylcellulose by gavage from GD 6 to GD 20. Vehicle control (0 mg/kg) animals received aqueous methylcellulose. Maternal toxicity was observed in the 1,000 mg/kg group as evidenced by 7 of 11 dams being either found dead or euthanized moribund. Associated clinical observations in the 1,000 mg/kg group included convulsion, tremors, prone, gasping, hypoactivity, hunched posture, nasal discharge, stained fur, piloerection, salivation, and rooting (pre- and postdosing), which occurred throughout gestation. One female in the 650 mg/kg group was euthanized moribund on GD 16 with associated clinical observations including cold to touch, hypoactivity, paleness, ataxia, and labored breathing, which may have been related to TCPP exposure. All vehicle control and 300 mg/kg animals survived to study termination. No TCPP-related effects were found on maternal body weights, body weight gain, or feed consumption from GD 6 to GD 20. Additionally, there were no significant exposure-related effects on postimplantation loss, fetal body weights, or fetal sex ratio, although limited litters were available for assessment in the 1,000 mg/kg TCPP group because of maternal toxicity. Finally, there were no significant exposure-related external fetal findings (including examination of the palate).

Prenatal developmental toxicity study

Because of the maternal toxicity observed at 1,000 mg/kg in the dose ranging-finding study, groups of 25 time-mated female rats were administered 0 (n = 50), 162.5, 325, or 650 mg TCPP/kg/ body weight per day in 0.5% aqueous methylcellulose by gavage from GD 6 to GD 20. Vehicle control (0 m/kg) animals received aqueous methylcellulose. Animals were added to the vehicle control group to obtain historical control data for both maternal and fetal findings in this strain of rat. In this study, TCPP was well tolerated and no exposure-related effects occurred on mortality, maternal body weights, body weight gains, or feed consumption during gestation. Low incidences of clinical observations including nasal discharge, salivation, twitches, ataxia, piloerection, audible respiratory sounds, and hyperactivity were observed in the 650 mg/kg group. Adverse clinical observations were not observed in other groups exposed to TCPP. There were no notable placental or other maternal gross observations at necropsy except for dose-related increases in absolute (9%, 16%, and 26% at 162.5, 325, and 650 mg/kg, respectively) and relative liver weights. No significant effects of TCPP were observed on postimplantation loss, mean fetal body weights, or fetal sex ratio. Likewise, no biologically relevant exposure-related malformations were found in external, visceral, and skeletal fetal exams of groups exposed to TCPP.

Conclusions

Under the conditions of the prenatal study, no evidence of developmental toxicity† of TCPP was found in Hsd:Sprague Dawley® SD® rats administered 162.5, 325, or 650 mg/kg in the absence of overt maternal toxicity. Trade names: Amgard TMCP, Antiblaze 80, Antiblaze TMCP, Fyrol PCF †See Explanation of Levels of Evidence for Developmental Toxicity. [Table: see text]",2020-07-28 +26494170,Development and promotion of a national website to improve dissemination of information related to the prevention of mother-to-child HIV transmission (PMTCT) in Tanzania.,"

Background

Websites that address national public health issues provide an important mechanism to improve health education and services in resource limited countries. This article describes the development, promotion and initial evaluation of a national website to increase access to information and resources about prevention of mother-to-child transmission of HIV (PMTCT) among healthcare workers and PMTCT stakeholders in Tanzania.

Methods

A participatory approach, involving the Tanzania Ministry of Health and Social Welfare (MOHSW) and key PMTCT stakeholders, was used to develop and manage the online PMTCT National Resource Center (NRC), http://pmtct.or.tz/ . The website was created with a content management system software system that does not require advanced computer skills and facilitates content updates and site management. The PMTCT NRC hosts related regularly updated PMTCT-related news, resources and publications. Website implementation, access and performance were evaluated over two years using Google Analytics data about visits, page views, downloads, bounce rates and location of visitors, supplemented by anecdotal feedback.

Results

Following its launch in July 2013, the PMTCT NRC website received a total of 28,400 visits, with 66,463 page views, over 2 years; 30 % of visits were from returning visitors. During year 1, visits increased by 80 % from the first to second 6 month period and then declined slightly (9-11 %) but remained stable in Year 2. Monthly visits spiked by about 70 % during October 2013 and January 2014 in response to the release and promotion of revised national PMTCT guidelines and training manuals. The majority of visitors came from primarily urban areas in Tanzania (50 %) and from other African countries (16 %). By year 2, over one-third of visitors used mobile devices to access the site.

Conclusions

The successfully implemented PMTCT NRC website provides centralized, easily accessed information designed to address the needs of clinicians, educators and program partners in Tanzania. Ongoing involvement of the MOHSW and key stakeholders are essential ensure the website's growth, effectiveness and sustainability. Additional efforts are needed to expand use of the PMTCT NRC throughout the country. Future evaluations should examine the role of the website in supporting implementation of national PMTCT guidelines and services in Tanzania.",2015-10-22 +30101310,Linkage disequilibrium maps to guide contig ordering for genome assembly.,"

Motivation

Efforts to establish reference genome sequences by de novo sequence assembly have to address the difficulty of linking relatively short sequence contigs to form much larger chromosome assemblies. Efficient strategies are required to span gaps and establish contig order and relative orientation. We consider here the use of linkage disequilibrium (LD) maps of sequenced contigs and the utility of LD for ordering, orienting and positioning linked sequences. LD maps are readily constructed from population data and have at least an order of magnitude higher resolution than linkage maps providing the potential to resolve difficult areas in assemblies. We empirically evaluate a linkage disequilibrium map-based method using single nucleotide polymorphism genotype data in a 216 kilobase region of human 6p21.3 from which three shorter contigs are formed.

Results

LD map length is most informative about the correct order and orientation and is suggested by the shortest LD map where the residual error variance is close to one. For regions in strong LD this method may be less informative for correcting inverted contigs than for identifying correct contig orders. For positioning two contigs in linkage disequilibrium with each other the inter-contig distances may be roughly estimated by this method.

Availability and implementation

The LDMAP program is written in C for a linux platform and is available at https://www.soton.ac.uk/genomicinformatics/research/ld.page.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +28807044,MetaMeta: integrating metagenome analysis tools to improve taxonomic profiling.,"

Background

Many metagenome analysis tools are presently available to classify sequences and profile environmental samples. In particular, taxonomic profiling and binning methods are commonly used for such tasks. Tools available among these two categories make use of several techniques, e.g., read mapping, k-mer alignment, and composition analysis. Variations on the construction of the corresponding reference sequence databases are also common. In addition, different tools provide good results in different datasets and configurations. All this variation creates a complicated scenario to researchers to decide which methods to use. Installation, configuration and execution can also be difficult especially when dealing with multiple datasets and tools.

Results

We propose MetaMeta: a pipeline to execute and integrate results from metagenome analysis tools. MetaMeta provides an easy workflow to run multiple tools with multiple samples, producing a single enhanced output profile for each sample. MetaMeta includes a database generation, pre-processing, execution, and integration steps, allowing easy execution and parallelization. The integration relies on the co-occurrence of organisms from different methods as the main feature to improve community profiling while accounting for differences in their databases.

Conclusions

In a controlled case with simulated and real data, we show that the integrated profiles of MetaMeta overcome the best single profile. Using the same input data, it provides more sensitive and reliable results with the presence of each organism being supported by several methods. MetaMeta uses Snakemake and has six pre-configured tools, all available at BioConda channel for easy installation (conda install -c bioconda metameta). The MetaMeta pipeline is open-source and can be downloaded at: https://gitlab.com/rki_bioinformatics .",2017-08-14 +28011869,PCoM-DB Update: A Protein Co-Migration Database for Photosynthetic Organisms.,"The identification of protein complexes is important for the understanding of protein structure and function and the regulation of cellular processes. We used blue-native PAGE and tandem mass spectrometry to identify protein complexes systematically, and built a web database, the protein co-migration database (PCoM-DB, http://pcomdb.lowtem.hokudai.ac.jp/proteins/top), to provide prediction tools for protein complexes. PCoM-DB provides migration profiles for any given protein of interest, and allows users to compare them with migration profiles of other proteins, showing the oligomeric states of proteins and thus identifying potential interaction partners. The initial version of PCoM-DB (launched in January 2013) included protein complex data for Synechocystis whole cells and Arabidopsis thaliana thylakoid membranes. Here we report PCoM-DB version 2.0, which includes new data sets and analytical tools. Additional data are included from whole cells of the pelagic marine picocyanobacterium Prochlorococcus marinus, the thermophilic cyanobacterium Thermosynechococcus elongatus, the unicellular green alga Chlamydomonas reinhardtii and the bryophyte Physcomitrella patens. The Arabidopsis protein data now include data for intact mitochondria, intact chloroplasts, chloroplast stroma and chloroplast envelopes. The new tools comprise a multiple-protein search form and a heat map viewer for protein migration profiles. Users can compare migration profiles of a protein of interest among different organelles or compare migration profiles among different proteins within the same sample. For Arabidopsis proteins, users can compare migration profiles of a protein of interest with putative homologous proteins from non-Arabidopsis organisms. The updated PCoM-DB will help researchers find novel protein complexes and estimate their evolutionary changes in the green lineage.",2017-01-01 +24267744,The vertebrate taxonomy ontology: a framework for reasoning across model organism and species phenotypes.,"

Background

A hierarchical taxonomy of organisms is a prerequisite for semantic integration of biodiversity data. Ideally, there would be a single, expansive, authoritative taxonomy that includes extinct and extant taxa, information on synonyms and common names, and monophyletic supraspecific taxa that reflect our current understanding of phylogenetic relationships.

Description

As a step towards development of such a resource, and to enable large-scale integration of phenotypic data across vertebrates, we created the Vertebrate Taxonomy Ontology (VTO), a semantically defined taxonomic resource derived from the integration of existing taxonomic compilations, and freely distributed under a Creative Commons Zero (CC0) public domain waiver. The VTO includes both extant and extinct vertebrates and currently contains 106,947 taxonomic terms, 22 taxonomic ranks, 104,736 synonyms, and 162,400 cross-references to other taxonomic resources. Key challenges in constructing the VTO included (1) extracting and merging names, synonyms, and identifiers from heterogeneous sources; (2) structuring hierarchies of terms based on evolutionary relationships and the principle of monophyly; and (3) automating this process as much as possible to accommodate updates in source taxonomies.

Conclusions

The VTO is the primary source of taxonomic information used by the Phenoscape Knowledgebase (http://phenoscape.org/), which integrates genetic and evolutionary phenotype data across both model and non-model vertebrates. The VTO is useful for inferring phenotypic changes on the vertebrate tree of life, which enables queries for candidate genes for various episodes in vertebrate evolution.",2013-11-22 +30967119,Large-scale 3D chromatin reconstruction from chromosomal contacts.,"

Background

Recent advances in genome analysis have established that chromatin has preferred 3D conformations, which bring distant loci into contact. Identifying these contacts is important for us to understand possible interactions between these loci. This has motivated the creation of the Hi-C technology, which detects long-range chromosomal interactions. Distance geometry-based algorithms, such as ChromSDE and ShRec3D, have been able to utilize Hi-C data to infer 3D chromosomal structures. However, these algorithms, being matrix-based, are space- and time-consuming on very large datasets. A human genome of 100 kilobase resolution would involve ∼30,000 loci, requiring gigabytes just in storing the matrices.

Results

We propose a succinct representation of the distance matrices which tremendously reduces the space requirement. We give a complete solution, called SuperRec, for the inference of chromosomal structures from Hi-C data, through iterative solving the large-scale weighted multidimensional scaling problem.

Conclusions

SuperRec runs faster than earlier systems without compromising on result accuracy. The SuperRec package can be obtained from http://www.cs.cityu.edu.hk/~shuaicli/SuperRec .",2019-04-04 +31911555,miR-149 Suppresses Breast Cancer Metastasis by Blocking Paracrine Interactions with Macrophages.,"Paracrine activation of cells contained in the tumor microenvironment promotes tumor progression and metastasis. In breast cancer, malignant cells recruit and educate macrophages into a M2 tumor-promoting phenotype that supports the metastatic spread of cancer cells. Here, we show that miR-149 functions as a metastasis-suppressing microRNA in breast cancer cells by limiting colony-stimulating factor-1 (CSF1)-dependent recruitment and M2 polarization of macrophages. In lymph node-positive, triple-negative breast cancer (TNBC) tissues, low miR-149 expression correlated with macrophage infiltration and reduced patient survival. By directly targeting CSF1, miR-149 expression in TNBC cell lines (MDA-MB-231 and BT-549) inhibited the recruitment of human monocytic THP-1 cells and primary human macrophages. Furthermore, in macrophages cocultured with MDA-MB-231 cells expressing miR-149, epidermal growth factor (EGF) and amphiregulin expression levels were strongly reduced, resulting in reduced EGF receptor activation in the cancer cells. In vivo, lung metastases developing from orthotopic MDA-MB-231 tumors were reduced by 75% by miR-149 expression, and this was associated with impaired M2 macrophage infiltration of the primary tumors. These data suggest that miR-149 downregulation functionally contributes to breast tumor progression by recruiting macrophages to the tumor and facilitating CSF1 and EGF receptor cross-talk between cancer cells and macrophages. SIGNIFICANCE: These findings contribute to the understanding of tumor-stroma interactions by showing that miR-149 downregulation in TNBC enhances reciprocal growth factor signaling between macrophages and cancer cells, which promotes tumor progression and metastasis. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/6/1330/F1.large.jpg.",2020-01-07 +23060735,MetaboLights: towards a new COSMOS of metabolomics data management.,"Exciting funding initiatives are emerging in Europe and the US for metabolomics data production, storage, dissemination and analysis. This is based on a rich ecosystem of resources around the world, which has been build during the past ten years, including but not limited to resources such as MassBank in Japan and the Human Metabolome Database in Canada. Now, the European Bioinformatics Institute has launched MetaboLights, a database for metabolomics experiments and the associated metadata (http://www.ebi.ac.uk/metabolights). It is the first comprehensive, cross-species, cross-platform metabolomics database maintained by one of the major open access data providers in molecular biology. In October, the European COSMOS consortium will start its work on Metabolomics data standardization, publication and dissemination workflows. The NIH in the US is establishing 6-8 metabolomics services cores as well as a national metabolomics repository. This communication reports about MetaboLights as a new resource for Metabolomics research, summarises the related developments and outlines how they may consolidate the knowledge management in this third large omics field next to proteomics and genomics.",2012-09-25 +29931305,snpAD: an ancient DNA genotype caller.,"

Motivation

The study of ancient genomes can elucidate the evolutionary past. However, analyses are complicated by base-modifications in ancient DNA molecules that result in errors in DNA sequences. These errors are particularly common near the ends of sequences and pose a challenge for genotype calling.

Results

I describe an iterative method that estimates genotype frequencies and errors along sequences to allow for accurate genotype calling from ancient sequences. The implementation of this method, called snpAD, performs well on high-coverage ancient data, as shown by simulations and by subsampling the data of a high-coverage Neandertal genome. Although estimates for low-coverage genomes are less accurate, I am able to derive approximate estimates of heterozygosity from several low-coverage Neandertals. These estimates show that low heterozygosity, compared to modern humans, was common among Neandertals.

Availability and implementation

The C++ code of snpAD is freely available at http://bioinf.eva.mpg.de/snpAD/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +,2397 : A checklist for developing and implementing a high-impact monitoring and evaluation system in clinical and translational science programs,"OBJECTIVES/SPECIFIC AIMS: This presentation will highlight the framework and domains of the monitoring and evaluation (M&E) System Checklist created in response to the need for practical guidelines and intended to improve the quality, efficiency, and consistency of monitoring and evaluation of the clinical and translational work. The recently published NCATS Strategic Plan (2016; p. 18) presents the following objectives and guidelines that implicitly suggest the need for sound M&E: “Objective 4-1: Continually assess and optimize internal business practices” and “Objective 4-2: Ensure all scientific programs and operational activities are conducted in a rigorous, robust and data-driven manner.” Given the complexity of clinical and translational work and associated monitoring/evaluation processes and the dearth of practical tools in the CTR evaluation area, the need for such a checklist is clear. A “checklist” (a detailed list of items/steps required, things to be done, or points to be considered) is a type of informational job aid used to improve performance, reduce failure, deal with complexity, and ensure consistency and completeness in carrying out work. Checklists are popular in many fields—due to their brevity, concreteness, order, implicit (and sometimes explicit) mandate to do things right, and expectation for a checklist’s being grounded in good practices and/or strong theory. A notable example is the famed WHO Surgical Safety Checklist (2008). The proposed M&E Checklist has been developed based on the author’s extensive experience in internal evaluation, checklist development and use, and working with the Clinical and Translational Sciences Awards (CTSAs)—as the UMN CTSI M&E Director, ACTS Evaluation SIG Chair, and a Co-Lead of the Evaluators Working Group within the NCATS CTSA Common Metrics Initiative. Although there is no “golden” algorithm that will totally suit every organization, the M&E checklist provides useful guidelines for building M&E. The Checklist presents the key concepts and important issues in M&E development and implementation. It also incorporates a synthesis of 3 grounded frameworks: King and Volkov’s Framework for Building Evaluation Capacity (2005), Simister’s Framework for Developing M&E Systems for Complex Organizations (2009), and the award-winning CDC Framework for Program Evaluation in Public Health (1999). For the purposes of the proposed Checklist, an M&E system (or framework/approach) is understood as “a series of policies, practices and processes that enable the systematic and effective collection, analysis and use of monitoring and evaluation information” (Simister, 2009; p. 1). A well-designed M&E system ensures a consistent approach to the collection, analysis, and use of information, while allowing considerable scope for different parts of an organization to develop and apply their own solutions in response to their particular situations. The M&E Checklist structured around 3 key domains (adapted from the Volkov and King ECB Checklist, 2007): (1) M&E/organizational context: taking advantage of the internal and external organizational context, administrative culture, and decision-making processes. (2) M&E structures: creating structures—mechanisms within the organization—that enable the M&E development and use. (3) M&E resources: making M&E resources available and used. For each domain, the Checklist has a number of associated categories and activities. Specifically, the checklist adopts and adapts the following useful steps from Simister’s approach: “Define the scope and purpose,” “Perform a situational analysis,” “Consult with relevant stakeholders,” “Identify the key levels and focus areas,” and “Integrate the M&E system horizontally and vertically,” as well as the CDC Framework’s steps “Engage stakeholders,” “Focus the M&E Design,” and “Ensure use and share lessons learned.”With slight modification, the organizations can also utilize the Checklist as a rubric/assessment tool to gauge the status of their M&E capacity. METHODS/STUDY POPULATION: A case study of methodological/implementation tool development. There are no human subjects in this study, thus, Study Population is not applicable to this study. This study is not subject to IRB review. RESULTS/ANTICIPATED RESULTS: The proposed checklist approach shows sound promise to not only impact individual programs and their M&E systems but to also enhance internal evaluation capacity, critical thinking, learning, strategic management, and improvement within clinical and translational science organizations. DISCUSSION/SIGNIFICANCE OF IMPACT: The ultimate goal and impact of the proposed checklist is to help ensure that organizations and their M&E teams consistently follow a few critical steps and thereby maximize the quality, efficiency, and consistency of monitoring and evaluation of the clinical and translational work. The checklist’s impact is significant in that it fills the current gap in the practice, literature, and methodology and provides practical guidance for CTR (and other) organizations and programs striving to improve the quantity and quality of evaluation. References Centers for Disease Control and Prevention (CDC). Framework for program evaluation in public health. MMWR 1999; 48 (no. RR-11). King JA, Volkov B. A framework for building evaluation capacity based on the experiences of three organizations. CURA Reporter 2005; 35(3): 10–16. National Center for Advancing Translational Sciences. NCATS Strategic Plan [Internet], 2016. NIH. (https://ncats.nih.gov/strategicplan) Simister N. Developing M&E systems for complex organisations: a methodology. INTRAC, 2009. Volkov B, King J. A checklist for building organizational evaluation capacity [Internt], 2007 (https://www.wmich.edu/sites/default/files/attachments/u350/2014/organiziationevalcapacity.pdf) World Alliance for Patient Safety. WHO surgical safety checklist and implementation manual [Internet], 2008 (http://www.who.int/patientsafety/safesurgery/ss_checklist/en/)",2017-09-01 +29878046,Graph-guided multi-task sparse learning model: a method for identifying antigenic variants of influenza A(H3N2) virus.,"

Motivation

Influenza virus antigenic variants continue to emerge and cause disease outbreaks. Time-consuming, costly and middle-throughput serologic methods using virus isolates are routinely used to identify influenza antigenic variants for vaccine strain selection. However, the resulting data are notoriously noisy and difficult to interpret and integrate because of variations in reagents, supplies and protocol implementation. A novel method without such limitations is needed for antigenic variant identification.

Results

We developed a Graph-Guided Multi-Task Sparse Learning (GG-MTSL) model that uses multi-sourced serologic data to learn antigenicity-associated mutations and infer antigenic variants. By applying GG-MTSL to influenza H3N2 hemagglutinin sequences, we showed the method enables rapid characterization of antigenic profiles and identification of antigenic variants in real time and on a large scale. Furthermore, sequences can be generated directly by using clinical samples, thus minimizing biases due to culture-adapted mutation during virus isolation.

Availability and implementation

MATLAB source codes developed for GG-MTSL are available through http://sysbio.cvm.msstate.edu/files/GG-MTSL/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +31170516,Plants mentioned in the Islamic Scriptures (Holy Qur'ân and Ahadith): Traditional uses and medicinal importance in contemporary times.,"

Ethnopharmacological relevance

Over the past thousand years, Islamic physicians have collected cultural, philosophical, sociological and historical backgrounds for understanding diseases and medications. The Prophet Mohammed (Peace Be Upon Him (PBUH) said: ""There is no disease that Allah has created, except that Allah also has created its cure."" Therefore, Islamic scholars are encouraged to explore and use both traditional and modern forms of medicine.

Aim of the study

(1) To identify some of the medicinal plants mentioned in the Holy Qur'ân and Ahadith textbooks of the period 700-1500 AD; (2) to compare them with presently used traditional medicines; (3) to evaluate their value based on modern research; and (4) to investigate the contributions of Islamic scholars to the development of the scientific branches, particularly medicine.

Materials and methods

A literature search was performed relating to 12 medicinal plants mentioned in the Holy Qur'ân and Ahadith using textbooks, Al-Azhar scholars, published articles, the plant list website (http://www.theplantlist.org/), the medicinal plant names services website (http://mpns.kew.org/mpns-portal/) and web databases (PubMed, Science Direct, and Google Scholar).

Results and discussion

The Islamic Golden Age was a step towards modern medicine, with unique insights and multi-disciplinary aspects. Traditional Islamic Medicine has had a significant impact on the development of various medical, scientific and educational activities. Innumerable Muslim and non-Muslim physicians have built on the strong foundation of Traditional Islamic Medicine by translating the described natural remedies and effects. The influences of different ancient cultures on the traditional uses of natural products were also documented in Islamic Scriptures in the last part of the second millennium. The divine teachings of Islam combine natural and practical healing and incorporate inherited science and technology.

Conclusion

In this review, we discuss Traditional Islamic Medicine with reference to both medical recommendations mentioned in the Holy Qur'ân and Prophetic Traditional Medicine (al-Tibb al-Nabawi). Although the molecular mechanisms and functions of some of the listed medicinal plants and their derivatives have been intensively studied, some traditional remedies have yet to be translated into clinical applications.",2019-06-04 +21729256,RASOnD-a comprehensive resource and search tool for RAS superfamily oncogenes from various species.,"

Background

The Ras superfamily plays an important role in the control of cell signalling and division. Mutations in the Ras genes convert them into active oncogenes. The Ras oncogenes form a major thrust of global cancer research as they are involved in the development and progression of tumors. This has resulted in the exponential growth of data on Ras superfamily across different public databases and in literature. However, no dedicated public resource is currently available for data mining and analysis on this family. The present database was developed to facilitate straightforward accession, retrieval and analysis of information available on Ras oncogenes from one particular site.

Description

We have developed the RAS Oncogene Database (RASOnD) as a comprehensive knowledgebase that provides integrated and curated information on a single platform for oncogenes of Ras superfamily. RASOnD encompasses exhaustive genomics and proteomics data existing across diverse publicly accessible databases. This resource presently includes overall 199,046 entries from 101 different species. It provides a search tool to generate information about their nucleotide and amino acid sequences, single nucleotide polymorphisms, chromosome positions, orthologies, motifs, structures, related pathways and associated diseases. We have implemented a number of user-friendly search interfaces and sequence analysis tools. At present the user can (i) browse the data (ii) search any field through a simple or advance search interface and (iii) perform a BLAST search and subsequently CLUSTALW multiple sequence alignment by selecting sequences of Ras oncogenes. The Generic gene browser, GBrowse, JMOL for structural visualization and TREEVIEW for phylograms have been integrated for clear perception of retrieved data. External links to related databases have been included in RASOnD.

Conclusions

This database is a resource and search tool dedicated to Ras oncogenes. It has utility to cancer biologists and cell molecular biologists as it is a ready source for research, identification and elucidation of the role of these oncogenes. The data generated can be used for understanding the relationship between the Ras oncogenes and their association with cancer. The database updated monthly is freely accessible online at http://202.141.47.181/rasond/ and http://www.aiims.edu/RAS.html.",2011-07-05 +23110816,Sagace: a web-based search engine for biomedical databases in Japan.,"

Background

In the big data era, biomedical research continues to generate a large amount of data, and the generated information is often stored in a database and made publicly available. Although combining data from multiple databases should accelerate further studies, the current number of life sciences databases is too large to grasp features and contents of each database.

Findings

We have developed Sagace, a web-based search engine that enables users to retrieve information from a range of biological databases (such as gene expression profiles and proteomics data) and biological resource banks (such as mouse models of disease and cell lines). With Sagace, users can search more than 300 databases in Japan. Sagace offers features tailored to biomedical research, including manually tuned ranking, a faceted navigation to refine search results, and rich snippets constructed with retrieved metadata for each database entry.

Conclusions

Sagace will be valuable for experts who are involved in biomedical research and drug development in both academia and industry. Sagace is freely available at http://sagace.nibio.go.jp/en/.",2012-10-31 +31275847,OSblca: A Web Server for Investigating Prognostic Biomarkers of Bladder Cancer Patients.,"Bladder cancer (BC) is one of the most common malignant tumors in the urinary system. The discovery of prognostic biomarkers is still one of the major challenges to improve clinical treatment of BC patients. In order to assist biologists and clinicians in easily evaluating the prognostic potency of genes in BC patients, we developed a user-friendly Online consensus Survival tool for bladder cancer (OSblca), to analyze the prognostic value of genes. The OSblca includes gene expression profiles of 1,075 BC patients and their respective clinical follow-up information. The clinical follow-up data include overall survival (OS), disease specific survival (DSS), disease free interval (DFI), and progression free interval (PFI). To analyze the prognostic value of a gene, users only need to input the official gene symbol and then click the ""Kaplan-Meier plot"" button, and Kaplan-Meier curve with the hazard ratio, 95% confidence intervals and log-rank P-value are generated and graphically displayed on the website using default options. For advanced analysis, users could limit their analysis by confounding factors including data source, survival type, TNM stage, histological type, smoking history, gender, lymph invasion, and race, which are set up as optional parameters to meet the specific needs of different researchers. To test the performance of the web server, we have tested and validated its reliability using previously reported prognostic biomarkers, including KPNA2, TP53, and MYC etc., which had their prognostic values validated as reported in OSblca. In conclusion, OSblca is a useful tool to evaluate and discover novel prognostic biomarkers in BC. The web server can be accessed at http://bioinfo.henu.edu.cn/BLCA/BLCAList.jsp.",2019-06-04 +31381336,KBbox: A Toolbox of Computational Methods for Studying the Kinetics of Molecular Binding.,"The past few years have seen increasing recognition of the importance of understanding molecular binding kinetics. This has led to the development of myriad computational methods for studying the kinetics of binding processes and predicting their associated rate constants that show varying ranges of application, degrees of accuracy, and computational requirements. In order to help researchers decide which method might be suitable for their projects, we have developed KBbox, a web server that guides users in choosing the methods they should consider on the basis of the information they wish to obtain, the data they currently have available, and the computational resources to which they have access. KBbox provides information on the toolbox of available methods, their associated software tools, an expanding list of curated examples of published applications, and tutorials explaining how to apply some of the methods. It has been designed to allow the easy addition of new methods, tools, and examples as they are developed and published. KBbox is available at https://kbbox.h-its.org/ .",2019-08-20 +31163671,De Novo Design and In Vitro Testing of Antimicrobial Peptides against Gram-Negative Bacteria. ,"Antimicrobial peptides (AMPs) have been identified as a potentially new class of antibiotics to combat bacterial resistance to conventional drugs. The design of de novo AMPs with high therapeutic indexes, low cost of synthesis, high resistance to proteases and high bioavailability remains a challenge. Such design requires computational modeling of antimicrobial properties. Currently, most computational methods cannot accurately calculate antimicrobial potency against particular strains of bacterial pathogens. We developed a tool for AMP prediction (Special Prediction (SP) tool) and made it available on our Web site (https://dbaasp.org/prediction). Based on this tool, a simple algorithm for the design of de novo AMPs (DSP) was created. We used DSP to design short peptides with high therapeutic indexes against gram-negative bacteria. The predicted peptides have been synthesized and tested in vitro against a panel of gram-negative bacteria, including drug resistant ones. Predicted activity against Escherichia coli ATCC 25922 was experimentally confirmed for 14 out of 15 peptides. Further improvements for designed peptides included the synthesis of D-enantiomers, which are traditionally used to increase resistance against proteases. One synthetic D-peptide (SP15D) possesses one of the lowest values of minimum inhibitory concentration (MIC) among all DBAASP database short peptides at the time of the submission of this article, while being highly stable against proteases and having a high therapeutic index. The mode of anti-bacterial action, assessed by fluorescence microscopy, shows that SP15D acts similarly to cell penetrating peptides. SP15D can be considered a promising candidate for the development of peptide antibiotics. We plan further exploratory studies with the SP tool, aiming at finding peptides which are active against other pathogenic organisms.",2019-06-03 +30458523,"Normative Data for a Rapid, Automated Test of Spatial Release From Masking.","

Purpose

The purpose of this study is to report normative data and predict thresholds for a rapid test of spatial release from masking for speech perception. The test is easily administered and has good repeatability, with the potential to be used in clinics and laboratories. Normative functions were generated for adults varying in age and amounts of hearing loss.

Method

The test of spatial release presents a virtual auditory scene over headphones with 2 conditions: colocated (with target and maskers at 0°) and spatially separated (with target at 0° and maskers at ± 45°). Listener thresholds are determined as target-to-masker ratios, and spatial release from masking (SRM) is determined as the difference between the colocated condition and spatially separated condition. Multiple linear regression was used to fit the data from 82 adults 18-80 years of age with normal to moderate hearing loss (0-40 dB HL pure-tone average [PTA]). The regression equations were then used to generate normative functions that relate age (in years) and hearing thresholds (as PTA) to target-to-masker ratios and SRM.

Results

Normative functions were able to predict thresholds with an error of less than 3.5 dB in all conditions. In the colocated condition, the function included only age as a predictive parameter, whereas in the spatially separated condition, both age and PTA were included as parameters. For SRM, PTA was the only significant predictor. Different functions were generated for the 1st run, the 2nd run, and the average of the 2 runs. All 3 functions were largely similar in form, with the smallest error being associated with the function on the basis of the average of 2 runs.

Conclusion

With the normative functions generated from this data set, it would be possible for a researcher or clinician to interpret data from a small number of participants or even a single patient without having to first collect data from a control group, substantially reducing the time and resources needed.

Supplemental material

https://doi.org/10.23641/asha.7080878.",2018-12-01 +30537372,Bioinformatic analysis of the prognostic value of the lncRNAs encoding snoRNAs in hepatocellular carcinoma.,"Some lncRNAs can encode small nucleolar RNAs (snoRNAs), called small nucleolar RNA host genes (SNHGs), which exert diverse regulatory effects on cellular processes. In this study, using RNA-seq and survival data in the Cancer Genome Atlas (TCGA)-Liver Hepatocellular Carcinoma (LIHC), we examined the expression profile of some SNHG genes and explored their prognostic value in hepatocellular carcinoma (HCC). Level-3 RNA-sequencing data, the clinicopathological and survival data of patients with primary HCC were downloaded from the UCSC Xena browser (https://xenabrowser.net/), for a secondary analysis. Results showed that SNHG1, GAS5, SNHG3-7 and SNHG10-12 were significantly upregulated in HCC tissues (N = 49) compared with adjacent normal tissues (N = 49). After adjustment for confounding factors, the multivariate analysis confirmed that increased SNHG4 expression was independently associated with shorter OS (HR: 1.319, 95%CI: 1.131-1.537, P < 0.001), while increased GAS5 expression was an independent predictor of shorter RFS (HR: 1.287, 95% CI: 1.027-1.612, P = 0.028). Using the methylation data obtained from the Infinium HumanMethylation450 BeadChip, we found that SNHG4 expression was not likely to be modulated by methylation in HCC. In comparison, the methylation status of 5 CpG sites (cg07177756, cg17025683, cg16290996, cg03044573 and cg06644515) showed a moderately negative correlation (Pearson's r = -0.54, P < 0.001) with GAS5 expression. Based on these findings, we infer that SNHG4 and GAS5 might be valuable prognostic markers in HCC. DNA hypomethylation might play an important role in elevated GAS5 transcription in HCC. © 2018 BioFactors, 45(2):244-252, 2019.",2018-12-08 +29985974,Noise peak filtering in multi-dimensional NMR spectra using convolutional neural networks.,"

Motivation

Multi-dimensional NMR spectra are generally used for NMR signal assignment and structure analysis. There are several programs that can achieve highly automated NMR signal assignments and structure analysis. On the other hand, NMR spectra tend to have a large number of noise peaks even for data acquired with good sample and machine conditions, and it is still difficult to eliminate these noise peaks.

Results

We have developed a method to eliminate noise peaks using convolutional neural networks, implemented in the program package Filt_Robot. The filtering accuracy of Filt_Robot was around 90-95% when applied to 2D and 3D NMR spectra, and the numbers of resulting non-noise peaks were close to those in corresponding manually prepared peaks lists. The filtering can strongly enhance automated NMR spectra analysis.

Availability and implementation

The full package of the program, documents and example data are available from http://bmrbdep.pdbj.org/en/nmr_tool_box/Filt_Robot.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +29045755,The TriForC database: a comprehensive up-to-date resource of plant triterpene biosynthesis.,"Triterpenes constitute a large and important class of plant natural products with diverse structures and functions. Their biological roles range from membrane structural components over plant hormones to specialized plant defence compounds. Furthermore, triterpenes have great potential for a variety of commercial applications such as vaccine adjuvants, anti-cancer drugs, food supplements and agronomic agents. Their biosynthesis is carried out through complicated, branched pathways by multiple enzyme types that include oxidosqualene cyclases, cytochrome P450s, and UDP-glycosyltransferases. Given that the number of characterized triterpene biosynthesis enzymes has been growing fast recently, the need for a database specifically focusing on triterpene enzymology became eminent. Here, we present the TriForC database (http://bioinformatics.psb.ugent.be/triforc/), encompassing a comprehensive catalogue of triterpene biosynthesis enzymes. This highly interlinked database serves as a user-friendly access point to versatile data sets of enzyme and compound features, enabling the scanning of a complete catalogue of experimentally validated triterpene enzymes, their substrates and products, as well as the pathways they constitute in various plant species. The database can be accessed by direct browsing or through convenient search tools including keyword, BLAST, plant species and substructure options. This database will facilitate gene mining and creating genetic toolboxes for triterpene synthetic biology.",2018-01-01 +22064857,FungiDB: an integrated functional genomics database for fungi.,"FungiDB (http://FungiDB.org) is a functional genomic resource for pan-fungal genomes that was developed in partnership with the Eukaryotic Pathogen Bioinformatic resource center (http://EuPathDB.org). FungiDB uses the same infrastructure and user interface as EuPathDB, which allows for sophisticated and integrated searches to be performed using an intuitive graphical system. The current release of FungiDB contains genome sequence and annotation from 18 species spanning several fungal classes, including the Ascomycota classes, Eurotiomycetes, Sordariomycetes, Saccharomycetes and the Basidiomycota orders, Pucciniomycetes and Tremellomycetes, and the basal 'Zygomycete' lineage Mucormycotina. Additionally, FungiDB contains cell cycle microarray data, hyphal growth RNA-sequence data and yeast two hybrid interaction data. The underlying genomic sequence and annotation combined with functional data, additional data from the FungiDB standard analysis pipeline and the ability to leverage orthology provides a powerful resource for in silico experimentation.",2011-11-07 +27899614,SUBA4: the interactive data analysis centre for Arabidopsis subcellular protein locations.,"The SUBcellular location database for Arabidopsis proteins (SUBA4, http://suba.live) is a comprehensive collection of manually curated published data sets of large-scale subcellular proteomics, fluorescent protein visualization, protein-protein interaction (PPI) as well as subcellular targeting calls from 22 prediction programs. SUBA4 contains an additional 35 568 localizations totalling more than 60 000 experimental protein location claims as well as 37 new suborganellar localization categories. The experimental PPI data has been expanded to 26 327 PPI pairs including 856 PPI localizations from experimental fluorescent visualizations. The new SUBA4 user interface enables users to choose quickly from the filter categories: 'subcellular location', 'protein properties', 'protein-protein interaction' and 'affiliations' to build complex queries. This allows substantial expansion of search parameters into 80 annotation types comprising 1 150 204 new annotations to study metadata associated with subcellular localization. The 'BLAST' tab contains a sequence alignment tool to enable a sequence fragment from any species to find the closest match in Arabidopsis and retrieve data on subcellular location. Using the location consensus SUBAcon, the SUBA4 toolbox delivers three novel data services allowing interactive analysis of user data to provide relative compartmental protein abundances and proximity relationship analysis of PPI and coexpression partners from a submitted list of Arabidopsis gene identifiers.",2016-11-28 +28560281,Microarray data and gene expression statistics for Saccharomyces cerevisiae exposed to simulated asbestos mine drainage.,"Here we describe microarray expression data (raw and normalized), experimental metadata, and gene-level data with expression statistics from Saccharomyces cerevisiae exposed to simulated asbestos mine drainage from the Vermont Asbestos Group (VAG) Mine on Belvidere Mountain in northern Vermont, USA. For nearly 100 years (between the late 1890s and 1993), chrysotile asbestos fibers were extracted from serpentinized ultramafic rock at the VAG Mine for use in construction and manufacturing industries. Studies have shown that water courses and streambeds nearby have become contaminated with asbestos mine tailings runoff, including elevated levels of magnesium, nickel, chromium, and arsenic, elevated pH, and chrysotile asbestos-laden mine tailings, due to leaching and gradual erosion of massive piles of mine waste covering approximately 9 km2. We exposed yeast to simulated VAG Mine tailings leachate to help gain insight on how eukaryotic cells exposed to VAG Mine drainage may respond in the mine environment. Affymetrix GeneChip® Yeast Genome 2.0 Arrays were utilized to assess gene expression after 24-h exposure to simulated VAG Mine tailings runoff. The chemistry of mine-tailings leachate, mine-tailings leachate plus yeast extract peptone dextrose media, and control yeast extract peptone dextrose media is also reported. To our knowledge this is the first dataset to assess global gene expression patterns in a eukaryotic model system simulating asbestos mine tailings runoff exposure. Raw and normalized gene expression data are accessible through the National Center for Biotechnology Information Gene Expression Omnibus (NCBI GEO) Database Series GSE89875 (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE89875).",2017-05-12 +29040693,AmyPro: a database of proteins with validated amyloidogenic regions.,"Soluble functional proteins may transform into insoluble amyloid fibrils that deposit in a variety of tissues. Amyloid formation is a hallmark of age-related degenerative disorders. Perhaps surprisingly, amyloid fibrils can also be beneficial and are frequently exploited for diverse functional roles in organisms. Here we introduce AmyPro, an open-access database providing a comprehensive, carefully curated collection of validated amyloid fibril-forming proteins from all kingdoms of life classified into broad functional categories (http://amypro.net). In particular, AmyPro provides the boundaries of experimentally validated amyloidogenic sequence regions, short descriptions of the functional relevance of the proteins and their amyloid state, a list of the experimental techniques applied to study the amyloid state, important structural/functional/variation/mutation data transferred from UniProt, a list of relevant PDB structures categorized according to protein states, database cross-references and literature references. AmyPro greatly improves on similar currently available resources by incorporating both prions and functional amyloids in addition to pathogenic amyloids, and allows users to screen their sequences against the entire collection of validated amyloidogenic sequence fragments. By enabling further elucidation of the sequential determinants of amyloid fibril formation, we hope AmyPro will enhance the development of new methods for the precise prediction of amyloidogenic regions within proteins.",2018-01-01 +30821295,[The Pacemaker and Implantable Cardioverter-Defibrillator Registry of the Italian Association of Arrhythmology and Cardiac Pacing - Annual report 2017].,"

Background

The pacemaker (PM) and implantable cardioverter-defibrillator (ICD) Registry of the Italian Association of Arrhythmology and Cardiac Pacing (AIAC) monitors the main epidemiological data in real-world practice. The survey for the 2017 activity collects information about demographics, clinical characteristics, main indications for PM/ICD therapy and device types from the Italian collaborating centers.

Methods

The Registry collects prospectively national PM and ICD implantation activity on the basis of European cards.

Results

PM Registry: data about 23 457 PM implantations were collected (19 378 first implant and 4079 replacements). The number of collaborating centers was 185. Median age of treated patients was 81 years (75 quartile I; 86 quartile III). ECG indications included atrioventricular conduction disorders in 37.1% of first PM implants, sick sinus syndrome in 19.5%, atrial fibrillation plus bradycardia in 13.2%, other in 30.2%. Among atrioventricular conduction defects, third-degree atrioventricular block was the most common type (21.0% of first implants). Use of single-chamber PMs was reported in 25.6% of first implants, of dual-chamber PMs in 66.7%, of PMs with cardiac resynchronization therapy (CRT) in 1.4%, and of single lead atrial-synchronized ventricular stimulation (VDD/R PMs) in 6.3%. ICD Registry: data about 19 023 ICD implantations were collected (13 898 first implants and 5125 replacements). The number of collaborating centers was 437. Median age of treated patients was 71 years (63 quartile I; 78 quartile III). Primary prevention indication was reported in 81.8% of first implants, secondary prevention in 18.2% (cardiac arrest in 6.4%). A single-chamber ICD was used in 27.0% of first implants, dual-chamber in 33.6% and biventricular in 39.3%.

Conclusions

The PM and ICD Registry appears fundamental for monitoring PM and ICD utilization on a large national scale with rigorous examination of demographics and clinical indications. The PM Registry showed stable electrocardiographic and symptom indications, with an important prevalence of dual-chamber pacing. The use of CRT-PM regards a very limited number of patients. The ICD Registry documented a large use of prophylactic and biventricular ICD, reflecting a favorable adherence to trials and guidelines in clinical practice. In order to increase and optimize the cooperation of Italian implanting centers, online data entry (http://www.aiac.it/riprid) should be adopted at large scale.",2019-03-01 +30458725,ASGAL: aligning RNA-Seq data to a splicing graph to detect novel alternative splicing events.,"

Background

While the reconstruction of transcripts from a sample of RNA-Seq data is a computationally expensive and complicated task, the detection of splicing events from RNA-Seq data and a gene annotation is computationally feasible. This latter task, which is adequate for many transcriptome analyses, is usually achieved by aligning the reads to a reference genome, followed by comparing the alignments with a gene annotation, often implicitly represented by a graph: the splicing graph.

Results

We present ASGAL (Alternative Splicing Graph ALigner): a tool for mapping RNA-Seq data to the splicing graph, with the specific goal of detecting novel splicing events, involving either annotated or unannotated splice sites. ASGAL takes as input the annotated transcripts of a gene and a RNA-Seq sample, and computes (1) the spliced alignments of each read in input, and (2) a list of novel events with respect to the gene annotation.

Conclusions

An experimental analysis shows that ASGAL allows to enrich the annotation with novel alternative splicing events even when genes in an experiment express at most one isoform. Compared with other tools which use the spliced alignment of reads against a reference genome for differential analysis, ASGAL better predicts events that use splice sites which are novel with respect to a splicing graph, showing a higher accuracy. To the best of our knowledge, ASGAL is the first tool that detects novel alternative splicing events by directly aligning reads to a splicing graph.

Availability

Source code, documentation, and data are available for download at http://asgal.algolab.eu .",2018-11-20 +30456241,Data on the determination of human epidermis integrity in skin permeation experiments by electrical resistance.,"The data presented in this article are related to the research article entitled ""Design of in vitro skin permeation studies according to the EMA Guideline on quality of transdermal patches"" (https://doi.org/10.1016/j.ejps.2018.09.014) (Cilurzo et al., 2018) [1]. In vitro permeation studies are generally carried out by Franz's diffusion cell method using human epidermis as a membrane (Franz, 1975) [2]. The evaluation of membrane integrity is mandatory to assure the quality of the experiments. However, the methods used for this determination are different and the results are strictly dependent on the operative conditions. The article reports the electrical resistance values of human epidermis samples and in vitro skin permeability data of caffeine and benzoic acid. The data are used to establish a cut-off suitable for checking the skin integrity. This information may be useful to enable critical or extended analyses in order to contribute to the development of a compendial method.",2018-10-26 +35116820,Upregulated necroptosis-pathway-associated genes are unfavorable prognostic markers in low-grade glioma and glioblastoma multiforme.,"

Background

Glioma accounts for 70% of primary brain malignancies in adults with unfavorable prognoses. In the past decades, much efforts have been invested to identify better biomarkers for predicting prognoses. Recently, necroptosis has been reported as a specialized pathway of programmed necrosis. Moreover, regulators of necroptosis-pathway-associated genes (RIPK1, RIPK3 and MLKL) were reported to be related to prognoses of many types of tumors. However, the prognostic value of these genes in diffuse glioma including low-grade glioma (LGG) and glioblastoma multiforme (GBM) remains unknown.

Methods

An online tool-Gene Expression Profiling Interactive Analysis (GEPIA) (http://gepia.cancer-pku.cn/) was used to analyze different expression of necroptosis-pathway-associated genes between tumor and normal tissue, correlation between RIPK1, RIPK3 and MLKL, the relationship between necroptosis-pathway-associated genes and prognosis [overall survival (OS) and disease-free survival (DFS)] in LGG and GBM. The median expression of RIPK1, RIPK3 and MLKL was used to divide patients into high- versus low-expression group. All graphic presentations were drawn by Gepia database.

Results

Expression of RIPK1 and RIPK3 were significantly higher in tumor tissue of GBM as compared with normal tissue. A moderate correlation between MLKL and RIPK3 was demonstrated in both LGG (R =0.79) and GBM (R =0.79). In LGG, higher expression of RIPK1, RIPK3, and MLKL were associated with poor OS and DFS with HR values of 2.2, 2, 1.9 for OS and 1.7, 1.8, 1.6 for DFS, respectively. In GBM, only a higher expression of MLKL was associated with worse OS and DFS with HR values of 1.5 and 1.6, respectively.

Conclusions

Regulators of necroptosis-pathway-associated genes appear to have a potential to serve as biomarkers of prognosis in both LGG and GBM.",2019-06-01 +31298393,Meta-analysis of the impact on early and late mortality of TAVI compared to surgical aortic valve replacement in high and low-intermediate surgical risk patients.,"

Objective

We studied the impact of transcatheter aortic valve implantation (TAVI) compared to the surgical aortic valve replacement (SAVR) on 30-day and one-year mortality from randomized controlled trials (RCTs) in patients with severe aortic stenosis at high or low-intermediate surgical risk.

Materials and methods

All RCTs were retrieved through PubMed computerized database and the site https://www.clinicaltrials.gov from January 2010 until March 31st, 2019. The absolute risk reduction (RD) with the 95% confidence interval (CI) was used to assess the effectiveness of the intervention under comparison. We evaluated overall mortality rates at 30-day and one-year follow-up in the comparison between TAVI vs. SAVR. We also evaluated the role played by the site access for TAVI performed through the femoral or subclavian artery (TV-TAVI) vs. SAVR, or transapically (TA-TAVI) vs. SAVR.

Results

In the ""as-treated population"" the overall 30-day mortality was significantly lower in TAVI (p=0.03) with respect to SAVR. However, the analysis for TAVI subgroups showed that 30-day mortality was (1) significantly lower in TV-TAVI vs. SAVR (p=0.006), (2) increased, not significantly, in TA-TAVI vs. SAVR (p=0.62). No significant differences were found between TAVI vs. SAVR at one-year follow-up.

Conclusions

The results of our meta-analysis suggest that TV-TAVI is a powerful tool in the treatment of severe aortic stenosis at high or low-intermediate surgical risk, with a significant lower mortality with respect to SAVR. On the contrary, SAVR seems to provide better results than TA-TAVI.",2019-06-01 +30395195,Discovering network phenotype between genetic risk factors and disease status via diagnosis-aligned multi-modality regression method in Alzheimer's disease.,"

Motivation

Neuroimaging genetics is an emerging field to identify the associations between genetic variants [e.g. single-nucleotide polymorphisms (SNPs)] and quantitative traits (QTs) such as brain imaging phenotypes. However, most of the current studies focus only on the associations between brain structure imaging and genetic variants, while neglecting the connectivity information between brain regions. In addition, the brain itself is a complex network, and the higher-order interaction may contain useful information for the mechanistic understanding of diseases [i.e. Alzheimer's disease (AD)].

Results

A general framework is proposed to exploit network voxel information and network connectivity information as intermediate traits that bridge genetic risk factors and disease status. Specifically, we first use the sparse representation (SR) model to build hyper-network to express the connectivity features of the brain. The network voxel node features and network connectivity edge features are extracted from the structural magnetic resonance imaging (sMRI) and resting-state functional magnetic resonance imaging (fMRI), respectively. Second, a diagnosis-aligned multi-modality regression method is adopted to fully explore the relationships among modalities of different subjects, which can help further mine the relation between the risk genetics and brain network features. In experiments, all methods are tested on the Alzheimer's Disease Neuroimaging Initiative (ADNI) database. The experimental results not only verify the effectiveness of our proposed framework but also discover some brain regions and connectivity features that are highly related to diseases.

Availability and implementation

The Matlab code is available at http://ibrain.nuaa.edu.cn/2018/list.htm.",2019-06-01 +31261558,Prognostic biomarkers of cervical squamous cell carcinoma identified via plasma metabolomics.,"Cervical cancer is the second most common female malignancy worldwide. The metabolic profile of plasma associated with the prognosis of cervical cancer remains poorly understood. In this cross-sectional study, plasma samples were collected from three groups of patients with CSCC, namely primary patients before treatment (BT group), patients with a poor prognosis (PP group, including patients with distant metastasis and local recurrence), and patients with a good prognosis within two years after the first treatment (GP group). The plasma metabolomics was conducted to detect the dynamic changes of metabolites via ultra-performance liquid chromatography with quadrupole time-of-flight mass spectrometry. Multivariate analyses, including principle component, partial least square-discriminant, and orthogonal projection to latent structure-discriminant analyses, were performed to compare each pair of the three groups. The differential metabolites were identified by comparison of the exact m/z values and mass spectrometry (MS)/MS spectra with the structural information of the metabolites obtained from the Human Metabolome Database (http://www.hmdb.ca/) and LIPID MAPS (http://www.lipidmaps.org/). To screen for potential markers, receiver operating characteristic curve analysis of the differential metabolites. Finally, thirty plasma samples were collected from each group. Multivariate analyses showed that 31 metabolites were significantly different among the 3 groups studied. Of those, the 5 metabolites phosphatidyl choline (15:0/16:0), phosphatidyl glycerol (12:0/13:0), actosylceramide (d18:1/16:0), D-Maltose, and phthalic acid, with an area under the curve above 0.75, were identified as potential biomarkers. The present findings provide evidence for biomarkers to monitor prognosis of patients with CSCC, which may help in better managing the disease.",2019-06-01 +24857970,PTM-SD: a database of structurally resolved and annotated posttranslational modifications in proteins. ,"Posttranslational modifications (PTMs) define covalent and chemical modifications of protein residues. They play important roles in modulating various biological functions. Current PTM databases contain important sequence annotations but do not provide informative 3D structural resource about these modifications. Posttranslational modification structural database (PTM-SD) provides access to structurally solved modified residues, which are experimentally annotated as PTMs. It combines different PTM information and annotation gathered from other databases, e.g. Protein DataBank for the protein structures and dbPTM and PTMCuration for fine sequence annotation. PTM-SD gives an accurate detection of PTMs in structural data. PTM-SD can be browsed by PDB id, UniProt accession number, organism and classic PTM annotation. Advanced queries can also be performed, i.e. detailed PTM annotations, amino acid type, secondary structure, SCOP class classification, PDB chain length and number of PTMs by chain. Statistics and analyses can be computed on a selected dataset of PTMs. Each PTM entry is detailed in a dedicated page with information on the protein sequence, local conformation with secondary structure and Protein Blocks. PTM-SD gives valuable information on observed PTMs in protein 3D structure, which is of great interest for studying sequence-structure- function relationships at the light of PTMs, and could provide insights for comparative modeling and PTM predictions protocols. Database URL: PTM-SD can be accessed at http://www.dsimb.inserm.fr/dsimb_tools/PTM-SD/.",2014-05-24 +30238037,Data on the early oxidation of SiO2-coated pure Ti and bulk Ti5Si3 at 800 °C.,"Oxidation of pure Ti sputtered with a 250 nm layer of amorphous SiO2 and bulk Ti5Si3 was conducted at 800 °C for 2 or 32 h in a 1 standard cubic centimeter per minute (SCCM) O2/4 SCCM Ar environment (approximately pO2 = 0.2 atm/20.3 kPa). Specimens were characterized using transmission electron microscopy, scanning transmission electron microscopy, and energy dispersive spectroscopy. The data in this article accompanies research article ""Early oxidation behavior of Si-coated titanium"" [1], which contains further discussion. The data for this article is hosted at the Materials Commons data repository and is available for download at https://materialscommons.org/mcapp/#/data/dataset/b8bc8038-a735-4cb9-9a9e-a0fb912b248c.",2018-09-01 +30988030,Competence beyond Genes: Filling in the Details of the Pneumococcal Competence Transcriptome by a Systems Approach. ,"DNA uptake by natural competence is a central process underlying the genetic plasticity, biology, and virulence of the human respiratory opportunistic pathogen Streptococcus pneumoniae A study reported in this issue (J. Slager, R. Aprianto, and J.-W. Veening, J. Bacteriol. 201:e00780-18, https://doi.org/10.1128/JB.00780-18) combined deep-genome annotation and high-resolution transcriptome analyses to considerably extend the previous model of temporal regulation of competence at the operon and component gene levels. That extended study also provides a playbook for updating, refining, and extending genomic data sets and making them publicly available.",2019-06-10 +,"Out of the Neotropics: newly discovered relictual species sheds light on the biogeographical history of spider ants (Leptomyrmex, Dolichoderinae, Formicidae)","Spider ants of the genus Leptomyrmex Mayr (Hymenoptera: Formicidae: Dolichoderinae) are conspicuous species of Australasian rainforests, with putative fossil relatives in the Neotropics and Europe. There is longstanding debate over the biogeographical history of the genus, with the Palaearctic and Neotropical regions proposed as alternate centres of origin. We propose a resolution of this debate with the recent discovery and analysis of an extant species from central Brazil, L. relictus sp.n., which we describe from workers, males and brood. We sequence ten nuclear genes in the new species and in several Australian Leptomyrmex species, and append these data to a 54‐taxon, 10‐gene data matrix previously generated for the subfamily Dolichoderinae. We conduct phylogenetic and divergence dating analyses, and re‐evaluate the fossil record of the group. We recover Leptomyrmex relictus sp.n. as a member of the Leptomyrmex clade with high support. It is sister to the Australasian species, and the genus Leptomyrmex is, in turn, sister to a pair of Neotropical genera, Forelius and Dorymyrmex. We infer a Neotropical origin for the genus and estimate a mid‐Eocene (46 Ma, 95% CI 56 to 36 Ma) origin for the crown genus and an Oligocene origin for the Australasian clade (29 Ma, 95% CI 40 to 19 Ma). We confirm placement of the Dominican amber species †L. neotropicus Baroni Urbani in the genus but reject a close relationship with the Palaearctic fossil taxa †Leptomyrmula Emery and †Usomyrma Dlussky, Radchenko & Dubovikoff, considering them incertae sedis in the subfamily (Dolichoderinae). In contrast to the mesophilic preferences of the Australasian species of Leptomyrmex, the new Brazilian species inhabits cerrado (dry savannah). Our results support a Neotropical origin for spider ants with dispersal to Australia. Rafting on west‐bound currents and/or a historical diversity imbalance between Australia and South America are proposed as alternate hypotheses to explain a pattern of biased E–W mid‐Tertiary dispersal for ants with austral distributions. This pattern is suggested by our results in conjunction with observations of other ant clades. Overall, our findings highlight the value of integrated taxonomy, critical interpretation of morphology, and a comparative phylogenetic framework when conducting palaeontological and biogeographical studies of insect species. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:6E9E6617‐6E53‐40B8‐82C7‐67F89A83C553.",2016-07-01 +31607167,A new ent-kaurane diterpene from Isodon henryi.,"One new ent-Kaurane diterpenoid (1) was isolated from the ethyl acetate fraction of Isodon henryi. Along with ten diterpenoids (2-11) were isolated from this plant for the first time, including six 7,20-epoxy diterpenoids, three enmenol-type diterpenoids and one 6,7-seco-ent-kaurene diterpenoid. Their structures were elucidated by 1 D and 2 D NMR, confirmed by HRESIMS and electronic circular dichroism analyses. Furthermore, the cytotoxicities of twelve compounds were investigated in five human cancer cell lines, including A2780, BGC-823, HCT-116, HepG2 and HeLa. And the IC50 values of these diterpenoids ranged from 2.1 to 88.8 μM in the tested cell lines. Based on the molecular structures of 12 compounds and the bioassay results, it suggests that α,β-unsaturated pentanone is the cytotoxic active site of 7,20 epoxy ent-kaurane diterpenoid, but it does not contribute much to enmenol-type diterpenoid.Supplemental data for this article can be accessed at https://doi.org/10.1080/14786419.2019.1675067.",2019-10-12 +28605774,GenomeHubs: simple containerized setup of a custom Ensembl database and web server for any species. ,"http://GenomeHubs.org. As the generation and use of genomic datasets is becoming increasingly common in all areas of biology, the need for resources to collate, analyse and present data from one or more genome projects is becoming more pressing. The Ensembl platform is a powerful tool to make genome data and cross-species analyses easily accessible through a web interface and a comprehensive application programming interface. Here we introduce GenomeHubs, which provide a containerized environment to facilitate the setup and hosting of custom Ensembl genome browsers. This simplifies mirroring of existing content and import of new genomic data into the Ensembl database schema. GenomeHubs also provide a set of analysis containers to decorate imported genomes with results of standard analyses and functional annotations and support export to flat files, including EMBL format for submission of assemblies and annotations to International Nucleotide Sequence Database Collaboration.",2017-01-01 +31644058,Anthrax Antitoxins,"High titers of antibody to infectious bacteria and viruses can be used to both prevent and treat infectious diseases. In particular, antitoxins have been shown to be beneficial in several forms of severe acute infections such as diphtheria, rabies and anthrax. The recent use of active anthrax spores as a bioweapon, particularly in acts of terror, has renewed research efforts to develop potent, rapidly active means of prevention and treatment of anthrax after purposeful or accidental exposure. Several monoclonal antibodies to anthrax antigens have been developed as part of research efforts in bioterrorism, of which two are commercially available: raxibacumab (2012) and obiltoxaximab (2016). These two agents were approved for use based upon the so called “Animal Rule”, which allows FDA approval based upon efficacy as shown in animal models of severe infectious diseases and upon safety data developed in healthy volunteers. Both monoclonal antibodies have had limited use in humans, but neither has been associated with serum enzyme elevations or with instances of clinically apparent liver injury during the limited preclinical safety evaluation in healthy volunteers. Updated information on prevention and treatment of anthrax is available from the Centers for Disease Control and Prevention on their website: https://www.cdc.gov/anthrax/index.html.",2019-10-24 +30715213,SaGePhy: an improved phylogenetic simulation framework for gene and subgene evolution.,"SUMMARY:SaGePhy is a software package for improved phylogenetic simulation of gene and subgene evolution. SaGePhy can be used to generate species trees, gene trees and subgene or (protein) domain trees using a probabilistic birth-death process that allows for gene and subgene duplication, horizontal gene and subgene transfer and gene and subgene loss. SaGePhy implements a range of important features not found in other phylogenetic simulation frameworks/software. These include (i) simulation of subgene or domain level evolution inside one or more gene trees, (ii) simultaneous simulation of both additive and replacing horizontal gene/subgene transfers and (iii) probabilistic sampling of species tree and gene tree nodes, respectively, for gene- and domain-family birth. SaGePhy is open-source, platform independent and written in Java and Python. AVAILABILITY AND IMPLEMENTATION:Executables, source code (open-source under the revised BSD license) and a detailed manual are freely available from http://compbio.engr.uconn.edu/software/sagephy/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +22702248,WikiCell: a unified resource platform for human transcriptomics research.,"Here we present a database, WikiCell, as a portal for a unified view of the human transcriptome. At present, WikiCell consists of Expressed Sequenced Tags (ESTs), and users can access, curate, and submit database data by interactive mode, and also can browse, query, upload, and download sequences. Researchers can utilize the transcriptome model based on a human taxonomy graph. The sequences in each model are sorted by attributes such as physiological and pathological samples. The Genbank EST data format are conserved. Gene information is provided, including housekeeping genes, taxonomy location, and gene ontology (GO) description. We believe that WikiCell provides a useful resource for defining expression pattern and tissue differentiation based on human taxonomy mode. It can be accessed at http://www.wikicell.org/.",2012-06-01 +29059524,Computing Prediction and Functional Analysis of Prokaryotic Propionylation.,"Identification and systematic analysis of candidates for protein propionylation are crucial steps for understanding its molecular mechanisms and biological functions. Although several proteome-scale methods have been performed to delineate potential propionylated proteins, the majority of lysine-propionylated substrates and their role in pathological physiology still remain largely unknown. By gathering various databases and literatures, experimental prokaryotic propionylation data were collated to be trained in a support vector machine with various features via a three-step feature selection method. A novel online tool for seeking potential lysine-propionylated sites (PropSeek) ( http://bioinfo.ncu.edu.cn/PropSeek.aspx ) was built. Independent test results of leave-one-out and n-fold cross-validation were similar to each other, showing that PropSeek is a stable and robust predictor with satisfying performance. Meanwhile, analyses of Gene Ontology, Kyoto Encyclopedia of Genes and Genomes pathways, and protein-protein interactions implied a potential role of prokaryotic propionylation in protein synthesis and metabolism.",2017-11-07 +25979979,DroughtDB: an expert-curated compilation of plant drought stress genes and their homologs in nine species.,"Plants are sessile and therefore exposed to a number of biotic and abiotic stresses. Drought is the major abiotic stress restricting plant growth worldwide. A number of genes involved in drought stress response have already been characterized, mainly in the model species Arabidopsis thaliana and Oryza sativa. However, with the aim to produce drought tolerant crop varieties, it is of importance to identify the respective orthologs for each species. We have developed DroughtDB, a manually curated compilation of molecularly characterized genes that are involved in drought stress response. DroughtDB includes information about the originally identified gene, its physiological and/or molecular function and mutant phenotypes and provides detailed information about computed orthologous genes in nine model and crop plant species including maize and barley. All identified orthologs are interlinked with the respective reference entry in MIPS/PGSB PlantsDB, which allows retrieval of additional information like genome context and sequence information. Thus, DroughtDB is a valuable resource and information tool for researchers working on drought stress and will facilitate the identification, analysis and characterization of genes involved in drought stress tolerance in agriculturally important crop plants. Database URL: http://pgsb.helmholtz-muenchen.de/droughtdb/",2015-05-15 +30715207,ChimeraUGEM: unsupervised gene expression modeling in any given organism.,"MOTIVATION:Regulation of the amount of protein that is synthesized from genes has proved to be a serious challenge in terms of analysis and prediction, and in terms of engineering and optimization, due to the large diversity in expression machinery across species. RESULTS:To address this challenge, we developed a methodology and a software tool (ChimeraUGEM) for predicting gene expression as well as adapting the coding sequence of a target gene to any host organism. We demonstrate these methods by predicting protein levels in seven organisms, in seven human tissues, and by increasing in vivo the expression of a synthetic gene up to 26-fold in the single-cell green alga Chlamydomonas reinhardtii. The underlying model is designed to capture sequence patterns and regulatory signals with minimal prior knowledge on the host organism and can be applied to a multitude of species and applications. AVAILABILITY AND IMPLEMENTATION:Source code (MATLAB, C) and binaries are freely available for download for non-commercial use at http://www.cs.tau.ac.il/~tamirtul/ChimeraUGEM/, and supported on macOS, Linux and Windows. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +30477418,"pyHIVE, a health-related image visualization and engineering system using Python.","

Background

Imaging is one of the major biomedical technologies to investigate the status of a living object. But the biomedical image based data mining problem requires extensive knowledge across multiple disciplinaries, e.g. biology, mathematics and computer science, etc. RESULTS: pyHIVE (a Health-related Image Visualization and Engineering system using Python) was implemented as an image processing system, providing five widely used image feature engineering algorithms. A standard binary classification pipeline was also provided to help researchers build data models immediately after the data is collected. pyHIVE may calculate five widely-used image feature engineering algorithms efficiently using multiple computing cores, and also featured the modules of Principal Component Analysis (PCA) based preprocessing and normalization.

Conclusions

The demonstrative example shows that the image features generated by pyHIVE achieved very good classification performances based on the gastrointestinal endoscopic images. This system pyHIVE and the demonstrative example are freely available and maintained at http://www.healthinformaticslab.org/supp/resources.php .",2018-11-26 +29339971,Terzyme: a tool for identification and analysis of the plant terpenome.,"

Background

Terpenoid hydrocarbons represent the largest and most ancient group of phytochemicals, such that the entire chemical library of a plant is often referred to as its 'terpenome'. Besides having numerous pharmacological properties, terpenes contribute to the scent of the rose, the flavors of cinnamon and the yellow of sunflowers. Rapidly increasing -omics datasets provide an unprecedented opportunity for terpenome detection, paving the way for automated web resources dedicated to phytochemical predictions in genomic data.

Results

We have developed Terzyme, a predictive algorithm for identification, classification and assignment of broad substrate unit to terpene synthase (TPS) and prenyl transferase (PT) enzymes, known to generate the enormous structural and functional diversity of terpenoid compounds across the plant kingdom. Terzyme uses sequence information, plant taxonomy and machine learning methods for predicting TPSs and PTs in genome and proteome datasets. We demonstrate a significant enrichment of the currently identified terpenome by running Terzyme on more than 40 plants.

Conclusions

Terzyme is the result of a rigorous analysis of evolutionary relationships between hundreds of characterized sequences of TPSs and PTs with known specificities, followed by analysis of genome-wide gene distribution patterns, ontology based clustering and optimization of various parameters for building accurate profile Hidden Markov Models. The predictive webserver and database is freely available at http://nipgr.res.in/terzyme.html and would serve as a useful tool for deciphering the species-specific phytochemical potential of plant genomes.",2018-01-10 +29632808,"Profiling expression of coding genes, long noncoding RNA, and circular RNA in lung adenocarcinoma by ribosomal RNA-depleted RNA sequencing.","Noncoding RNA play important roles in various biological processes and diseases, including cancer. The expression profile of circular RNA (circRNA) has not been systematically investigated in lung adenocarcinoma (LUAD). In this study, we performed genomewide transcriptome profiling of coding genes, long noncoding RNA (lncRNA), and circRNA in paired LUAD and nontumor tissues by ribosomal RNA-depleted RNA sequencing. The detected reads were first mapped to the human genome to analyze expression of coding genes and lncRNA, while the unmapped reads were subjected to a circRNA prediction algorithm to identify circRNA candidates. We identified 1282 differentially expressed coding genes in LUAD. Expression of 19 023 lncRNA was detected, of which 244 lncRNAs were differentially expressed in LUAD. AFAP1-AS1, BLACAT1, LOC101928245, and FENDRR were most differentially expressed lncRNAs in LUAD. Also identified were 9340 circRNA candidates with ≥ 2 backspliced, including 3590 novel circRNA transcripts. The median length of circRNA was ~ 530 nt. CircRNA are often of low abundance, and more than half of circRNAs we identified had < 10 reads. Agarose electrophoresis and Sanger sequencing were used to confirm that four candidate circRNA were truly circular. Our results characterized the expression profile of coding genes, lncRNA, and circRNA in LUAD; 9340 circRNAs were detected, demonstrating that circRNA are widely expressed in LUAD.

Database

The raw RNA sequencing data have been submitted to Gene Expression Omnibus (GEO) database and can be accessed with the ID GEO: http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE104854.",2018-02-21 +30667224,P-Mart: Interactive Analysis of Ion Abundance Global Proteomics Data.,"The use of mass-spectrometry-based techniques for global protein profiling of biomedical or environmental experiments has become a major focus in research centered on biomarker discovery; however, one of the most important issues recently highlighted in the new era of omics data generation is the ability to perform analyses in a robust and reproducible manner. This has been hypothesized to be one of the issues hindering the ability of clinical proteomics to successfully identify clinical diagnostic and prognostic biomarkers of disease. P-Mart ( https://pmart.labworks.org ) is a new interactive web-based software environment that enables domain scientists to perform quality-control processing, statistics, and exploration of large-complex proteomics data sets without requiring statistical programming. P-Mart is developed in a manner that allows researchers to perform analyses via a series of modules, explore the results using interactive visualization, and finalize the analyses with a collection of output files documenting all stages of the analysis and a report to allow reproduction of the analysis.",2019-02-06 +28222698,"ATGC transcriptomics: a web-based application to integrate, explore and analyze de novo transcriptomic data.","

Background

In the last years, applications based on massively parallelized RNA sequencing (RNA-seq) have become valuable approaches for studying non-model species, e.g., without a fully sequenced genome. RNA-seq is a useful tool for detecting novel transcripts and genetic variations and for evaluating differential gene expression by digital measurements. The large and complex datasets resulting from functional genomic experiments represent a challenge in data processing, management, and analysis. This problem is especially significant for small research groups working with non-model species.

Results

We developed a web-based application, called ATGC transcriptomics, with a flexible and adaptable interface that allows users to work with new generation sequencing (NGS) transcriptomic analysis results using an ontology-driven database. This new application simplifies data exploration, visualization, and integration for a better comprehension of the results.

Conclusions

ATGC transcriptomics provides access to non-expert computer users and small research groups to a scalable storage option and simple data integration, including database administration and management. The software is freely available under the terms of GNU public license at http://atgcinta.sourceforge.net .",2017-02-22 +28651544,SalmoBase: an integrated molecular data resource for Salmonid species.,"

Background

Salmonids are ray-finned fishes which constitute 11 genera and at least 70 species including Atlantic salmon, whitefishes, graylings, rainbow trout, and char. The common ancestor of all Salmonidae experienced a whole genome duplication (WGD) ~80 million years ago, resulting in an autotetraploid genome. Genomic rediplodization is still going on in salmonid species, providing an unique system for studying evolutionary consequences of whole genome duplication. In recent years, high quality genome sequences of Atlantic salmon and Rainbow trout has been established, due to their scientific and commercial values. In this paper we introduce SalmoBase ( http://www.salmobase.org/ ), a tool for making molecular resources for salmonids public available in a framework of visualizations and analytic tools.

Results

SalmoBase has been developed as a part of the ELIXIR.NO project. Currently, SalmoBase contains molecular resources for Atlantic salmon and Rainbow trout. Data can be accessed through BLAST, Genome Browser (GBrowse), Genetic Variation Browser (GVBrowse) and Gene Expression Browser (GEBrowse).

Conclusions

To the best of our knowledge, SalmoBase is the first database which integrates salmonids data and allow users to study salmonids in an integrated framework. The database and its tools (e.g., comparative genomics tools, synteny browsers) will be expanded as additional public resources describing other Salmonidae genomes become available.",2017-06-26 +31364710,Bayesian estimation of past population dynamics in BEAST 1.10 using the Skygrid coalescent model. ,"Inferring past population dynamics over time from heterochronous molecular sequence data is often achieved using the Bayesian Skygrid model, a non-parametric coalescent model that estimates the effective population size over time. Available in BEAST, a cross-platform program for Bayesian analysis of molecular sequences using Markov chain Monte Carlo, this coalescent model is often estimated in conjunction with a molecular clock model to produce time-stamped phylogenetic trees. We here provide a practical guide to using BEAST and its accompanying applications for the purpose of drawing inference under these models. We focus on best practices, potential pitfalls and recommendations that can be generalized to other software packages for Bayesian inference. This protocol shows how to use TempEst, BEAUti and BEAST 1.10 (http://beast.community/), LogCombiner as well as Tracer in a complete workflow.",2019-07-31 +31031918,Future temperature and salinity do not exert selection pressure on cyst germination of a toxic phytoplankton species.,"Environmental conditions regulate the germination of phytoplankton resting stages. While some factors lead to synchronous germination, others stimulate germination of only a small fraction of the resting stages. This suggests that habitat filters may act on the germination level and thus affect selection of blooming strains. Benthic ""seed banks"" of the toxic dinoflagellate Alexandrium ostenfeldii from the Baltic Sea are genetically and phenotypically diverse, indicating a high potential for adaptation by selection on standing genetic variation. Here, we experimentally tested the role of climate-related salinity and temperature as selection filters during germination and subsequent establishment of A. ostenfeldii strains. A representative resting cyst population was isolated from sediment samples, and germination and reciprocal transplantation experiments were carried out, including four treatments: Average present day germination conditions and three potential future conditions: high temperature, low salinity, and high temperature in combination with low salinity. We found that the final germination success of A. ostenfeldii resting cysts was unaffected by temperature and salinity in the range tested. A high germination success of more than 80% in all treatments indicates that strains are not selected by temperature and salinity during germination, but selection becomes more important shortly after germination, in the vegetative stage of the life cycle. Moreover, strains were not adapted to germination conditions. Instead, highly plastic responses occurred after transplantation and significantly higher growth rates were observed at higher temperature. High variability of strain-specific responses has probably masked the overall effect of the treatments, highlighting the importance of testing the effect of environmental factors on many strains. It is likely that A. ostenfeldii populations can persist in the future, because suitable strains, which are able to germinate and grow well at potential future climate conditions, are part of the highly diverse cyst population.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.c8c83nr.",2019-04-01 +29642853,A comparative genomics study of carbohydrate/glucose metabolic genes: from fish to mammals.,"

Background

Glucose plays a key role as an energy source in most mammals, but its importance in fish appears to be limited that so far seemed to belong to diabetic humans only. Several laboratories worldwide have made important efforts in order to better understand this strange phenotype observed in fish. However, the mechanism of carbohydrate/glucose metabolism is astonishingly complex. Why basal glycaemia is different between fish and mammals and how carbohydrate metabolism is different amongst organisms is largely uncharted territory. The utilization of comparative systems biology with model vertebrates to explore fish metabolism has become an essential approach to unravelling hidden in vivo mechanisms.

Results

In this study, we first built a database containing 791, 593, 523, 666 and 698 carbohydrate/glucose metabolic genes from the genomes of Danio rerio, Xenopus tropicalis, Gallus gallus, Mus musculus and Homo sapiens, respectively, and most of these genes in our database are predicted to encode specific enzymes that play roles in defined reactions; over 57% of these genes are related to human type 2 diabetes. Then, we systematically compared these genes and found that more than 70% of the carbohydrate/glucose metabolic genes are conserved in the five species. Interestingly, there are 4 zebrafish-specific genes (si:ch211-167b20.8, CABZ01043017.1, socs9 and eif4e1c) and 1 human-specific gene (CALML6) that may alter glucose utilization in their corresponding species. Interestingly, these 5 genes are all carbohydrate regulation factors, but the enzymes themselves are involved in insulin regulation pathways. Lastly, in order to facilitate the use of our data sets, we constructed a glucose metabolism database platform ( http://101.200.43.1:10000/ ).

Conclusions

This study provides the first systematic genomic insights into carbohydrate/glucose metabolism. After exhaustive analysis, we found that most metabolic genes are conserved in vertebrates. This work may resolve some of the complexities of carbohydrate/glucose metabolic heterogeneity amongst different vertebrates and may provide a reference for the treatment of diabetes and for applications in the aquaculture industry.",2018-04-11 +28838002,Pediatric Intestinal Rehabilitation and Transplantation Registry: Initial Report from a European Collaborative Registry.,"

Introduction

 Short bowel syndrome (SBS) is the main cause of intestinal failure (IF) in the pediatric population. To promote the standardization of care of these patients, the registry of Pediatric Intestinal Rehabilitation and Transplantation (PIRAT) has been established. The aim of this study is to describe patients with IF using PIRAT database.

Materials and methods

 Data from two tertiary care European referral Centers registered in PIRAT (https://www.studeon.eu/pirat) were analyzed (1994-2015). Neonatal SBS-related IF was defined as need for parenteral nutrition (PN) to sustain life and growth for more than 75 days, after extensive bowel resection during neonatal period. Data included patient demographics, disease at birth, residual small intestine, and intestinal autonomy (PN on/off).

Results

 In this study, 114 children with SBS-related IF were identified (male 60%). Median gestational age was 35.3 weeks (interquartile range [IQR]: 33.0-38.0); median birth weight was 2,440 g (IQR: 1,700-2,990). The main causes of SBS were intestinal atresia in 31 (27%), midgut volvulus in 29 (25%), necrotizing enterocolitis in 23 (20%), and gastroschisis in 12 (11%). Nine (7.9%) patients died on PN (six sepsis, two IF-associated liver disease, and one multiorgan failure). Median residual small bowel length was 46 cm (IQR: 13.0-92.5). Ileocecal valve was resected in 48 patients (42%). Intestinal autonomy was achieved in 68% patients.

Conclusion

 We present the web-based registry PIRAT and the first results of patients with IF registered from two European Centers. PIRAT could give the opportunity to create a dedicated international network (IF-net) to standardize, improve, and spread the therapeutic paths for the rare and heterogeneous condition of SBS-related IF.",2017-08-24 +30393052,Bayesian Weighing of Electron Cryo-Microscopy Data for Integrative Structural Modeling.,"Cryo-electron microscopy (cryo-EM) has become a mainstream technique for determining the structures of complex biological systems. However, accurate integrative structural modeling has been hampered by the challenges in objectively weighing cryo-EM data against other sources of information due to the presence of random and systematic errors, as well as correlations, in the data. To address these challenges, we introduce a Bayesian scoring function that efficiently and accurately ranks alternative structural models of a macromolecular system based on their consistency with a cryo-EM density map as well as other experimental and prior information. The accuracy of this approach is benchmarked using complexes of known structure and illustrated in three applications: the structural determination of the GroEL/GroES, RNA polymerase II, and exosome complexes. The approach is implemented in the open-source Integrative Modeling Platform (http://integrativemodeling.org), thus enabling integrative structure determination by combining cryo-EM data with other sources of information.",2018-11-01 +27261244,Creating an animation-enhanced video library of hepato-pancreato-biliary and transplantation surgical procedures.,"The potential for integrating real-time surgical video and state-of-the art animation techniques has not been widely applied to surgical education. This paper describes the use of new technology for creating videos of liver, pancreas and transplant surgery, annotating them with 3D animations, resulting in a freely-accessible online resource: The Toronto Video Atlas of Liver, Pancreas and Transplant Surgery ( http://tvasurg.ca ). The atlas complements the teaching provided to trainees in the operating room, and the techniques described in this study can be readily adapted by other surgical training programmes.",2016-01-01 +31138601,Serine/Arginine-Rich Splicing Factor 3 Modulates the Alternative Splicing of Cytoplasmic Polyadenylation Element Binding Protein 2.,"Triple negative breast cancer (TNBC) has an unusually low 5-year survival rate linked to higher metastatic rates. Our laboratory recently delineated a role for the alternative RNA splicing (AS) of cytoplasmic polyadenylation element binding protein 2 (CPEB2), via inclusion/exclusion of exon 4, in the metastasis of TNBC. In these studies, the mechanism governing the inclusion/exclusion of exon 4 was examined. Specifically, the RNA trans-factor, SRSF3, was found to be explicitly associated with CPEB2 exon 4. A SRSF3 consensus sequence was identified in exon 4, and mutation of this sequence abolished the association of SRSF3. The expression of SRSF3 was upregulated in TNBC cells upon the acquisition of anoikis resistance correlating with a reduction in the CPEB2A/B ratio. Importantly, downregulation of SRSF3 in these cells by siRNA induced the exclusion of exon 4 in cells increasing the ratio of CPEB2A (exon 4 excluded) to CPEB2B (exon 4 included). Downregulation of SRSF3 also reversed the CPEB2A/B ratio of a wild-type CPEB2 exon 4 minigene and endogenous CPEB2 pre-mRNA, but not a mutant CPEB2 minigene with the SRSF3 RNA cis-element ablated. SRSF3 downregulation ablated the anoikis resistance of TNBC cells, which was ""rescued"" by ectopic expression of CPEB2B. Finally, analysis of The Cancer Genome Atlas database showed a positive relationship between SRSF3 expression and lower CPEB2A/B ratios in aggressive breast cancers. IMPLICATIONS: These findings demonstrate that SRSF3 modulates CPEB2 AS to induce the expression of the CPEB2B isoform that drives TNBC phenotypes correlating with aggressive human breast cancer. VISUAL OVERVIEW: http://mcr.aacrjournals.org/content/molcanres/17/9/1920/F1.large.jpg.",2019-05-28 +25840430,A Multifunctional Mutagenesis System for Analysis of Gene Function in Zebrafish.,"Since the sequencing of the human reference genome, many human disease-related genes have been discovered. However, understanding the functions of all the genes in the genome remains a challenge. The biological activities of these genes are usually investigated in model organisms such as mice and zebrafish. Large-scale mutagenesis screens to generate disruptive mutations are useful for identifying and understanding the activities of genes. Here, we report a multifunctional mutagenesis system in zebrafish using the maize Ds transposon. Integration of the Ds transposable element containing an mCherry reporter for protein trap events and an EGFP reporter for enhancer trap events produced a collection of transgenic lines marking distinct cell and tissue types, and mutagenized genes in the zebrafish genome by trapping and prematurely terminating endogenous protein coding sequences. We obtained 642 zebrafish lines with dynamic reporter gene expression. The characterized fish lines with specific expression patterns will be made available through the European Zebrafish Resource Center (EZRC), and a database of reporter expression is available online (http://fishtrap.warwick.ac.uk/). Our approach complements other efforts using zebrafish to facilitate functional genomic studies in this model of human development and disease.",2015-04-02 +31191588,Expanding Alternative Splicing Identification by Integrating Multiple Sources of Transcription Data in Tomato.,"Tomato (Solanum lycopersicum) is an important vegetable and fruit crop. Its genome was completely sequenced and there are also a large amount of available expressed sequence tags (ESTs) and short reads generated by RNA sequencing (RNA-seq) technologies. Mapping transcripts including mRNA sequences, ESTs, and RNA-seq reads to the genome allows identifying pre-mRNA alternative splicing (AS), a post-transcriptional process generating two or more RNA isoforms from one pre-mRNA transcript. We comprehensively analyzed the AS landscape in tomato by integrating genome mapping information of all available mRNA and ESTs with mapping information of RNA-seq reads which were collected from 27 published projects. A total of 369,911 AS events were identified from 34,419 genomic loci involving 161,913 transcripts. Within the basic AS events, intron retention is the prevalent type (18.9%), followed by alternative acceptor site (12.9%) and alternative donor site (7.3%), with exon skipping as the least type (6.0%). Complex AS types having two or more basic event accounted for 54.9% of total AS events. Within 35,768 annotated protein-coding gene models, 23,233 gene models were found having pre-mRNAs generating AS isoform transcripts. Thus the estimated AS rate was 65.0% in tomato. The list of identified AS genes with their corresponding transcript isoforms serves as a catalog for further detailed examination of gene functions in tomato biology. The post-transcriptional information is also expected to be useful in improving the predicted gene models in tomato. The sequence and annotation information can be accessed at plant alternative splicing database (http://proteomics.ysu.edu/altsplice).",2019-05-28 +29136166,Diabetes-induced hyperglycemia impairs male reproductive function: a systematic review.,"

Background

Hyperglycemia can result from a loss of pancreatic beta-cells or a decline in their function leading to decreased insulin secretion or may arise from insulin resistance and variable degrees of inadequate insulin secretion resulting in diabetes and related comorbidities. To date several reviews have addressed the issue of diabetes-related male infertility but most have focused on how metabolic syndrome causes the decline in male fertility. However, a comprehensive overview as to how diabetes-induced hyperglycemia impairs male fertility is missing. Impaired regulation of glucose and the resultant hyperglycemia are major threats to the health of individuals in modern societies especially given the rapidly rising prevalence affecting an increasing number of men in their reproductive years. Consequently, diabetes-induced hyperglycemia is likely to contribute to a decline in global birth rates especially in those societies with a high diabetic prevalence.

Objective and rationale

This systematic review addresses and summarizes the impact of hyperglycemia on male reproductive health with a particular emphasis on the molecular mechanisms that influence the testis and other parts of the male reproductive tract.

Search methods

A systematic search of the literature published in the MEDLINE-Pubmed database (http://www.ncbi.nlm.nih.gov/pubmed) and Cochrane Library (http://www.cochranelibrary.com) was performed, as well as hand searching reference lists, from the earliest available online indexing year until May 2017, using diabetes- and male fertility-related keywords in combination with other search phrases relevant to the topic of hyperglycemia. Inclusion criteria were: clinical studies on type 1 diabetic (T1D) men and studies on T1D animal models with a focus on reproductive parameters. Case reports/series, observational studies and clinical trials were included. Studies on patients with type 2 diabetes (T2D) or animal models of T2D were excluded to distinguish hyperglycemia from other metabolic effects.

Outcomes

A total of 890 articles were identified of which 197 (32 clinical, 165 animal studies) were selected for qualitative analysis. While the clinical data from men with hyperglycemia-induced reproductive dysfunction were reported in most studies on T1D, the study designs were variable and lacked complete information on patients. Moreover, only a few studies (and mostly animal studies) addressed the underlying mechanisms of how hyperglycemia induces infertility. Potential causes included impaired function of the hypothalamic-pituitary-gonadal axis, increased DNA damage, perturbations in the system of advanced glycation endproducts and their receptor, oxidative stress, increased endoplasmatic reticulum stress, modulation of cellular pathways, impaired mitochondrial function and disrupted sympathetic innervation. However, intervention studies to identify and confirm the pathological mechanisms were missing: data that are essential in understanding these interactions.

Wider implications

While the effects of regulating the hyperglycemia by the use of insulin and other modulators of glucose metabolism have been reported, more clinical trials providing high quality evidence and specifically addressing the beneficial effects on male reproduction are required. We conclude that interventions using insulin to restore normoglycemia should be a feasible approach to assess the proposed underlying mechanisms of infertility.",2018-01-01 +31897467,Association of BMI and major molecular pathological markers of colorectal cancer in men and women.,"

Background

Observational studies have consistently shown that a high BMI is associated with increased risk of colorectal cancer (CRC). However, the underlying mechanisms linking obesity to CRC remain unclear.

Objectives

To investigate the associations of BMI and CRC by major molecular pathological subtypes of CRC.

Methods

This analysis included 2407 cases and 2454 controls from a large German population-based case-control study. Information on recent weight and height as well as other demographic and lifestyle data were obtained by standardized interviews. Multinomial logistic regression was used to estimate ORs and 95% CIs for the associations between BMI and risk of CRC by major molecular pathological features: microsatellite instability (MSI), CpG island methylator phenotype (CIMP), B-Raf proto-oncogene serine/threonine kinase (BRAF) mutation, and Kirsten rat sarcoma viral oncogene homolog gene (KRAS) mutation.

Results

Among women, a higher BMI was differentially and more strongly associated with risk of MSI CRC (OR per 5 kg/m2: 1.69; 95% CI: 1.34, 2.12; Pheterogeneity ≤ 0.001), CIMP-high CRC (OR per 5 kg/m2: 1.57; 95% CI: 1.30, 1.89; Pheterogeneity ≤ 0.001), BRAF-mutated CRC (OR per 5 kg/m2: 1.56; 95% CI: 1.22, 1.99; Pheterogeneity = 0.04), and KRAS-wildtype CRC (OR per 5 kg/m2: 1.35; 95% CI: 1.17, 1.54; Pheterogeneity = 0.01), compared with the risk of CRC in subjects with the molecular feature counterpart. In men, no meaningful differences in CRC risk were observed for the investigated molecular feature pairs. For the association of BMI with MSI CRC, we observed effect modification by sex (Pinteraction = 0.04). Also, in women, the risk of CRC with the serrated pathway features was more strongly increased with higher BMI than risk of CRC with the traditional pathway features (OR per 5 kg/m2: 1.73; 95% CI: 1.28, 2.34; Pheterogeneity = 0.01).

Conclusions

In women, the relation between BMI and MSI-high CRC seems to be stronger than that between BMI and microsatellite-stable CRC. However, a validation in an independent cohort is needed. This observational study was registered at the German Clinical Trials Register (http://www.drks.de; study ID: DRKS00011793), an approved primary register in the WHO network.",2020-03-01 +29982278,MetaboRank: network-based recommendation system to interpret and enrich metabolomics results.,"

Motivation

Metabolomics has shown great potential to improve the understanding of complex diseases, potentially leading to therapeutic target identification. However, no single analytical method allows monitoring all metabolites in a sample, resulting in incomplete metabolic fingerprints. This incompleteness constitutes a stumbling block to interpretation, raising the need for methods that can enrich those fingerprints. We propose MetaboRank, a new solution inspired by social network recommendation systems for the identification of metabolites potentially related to a metabolic fingerprint.

Results

MetaboRank method had been used to enrich metabolomics data obtained on cerebrospinal fluid samples from patients suffering from hepatic encephalopathy (HE). MetaboRank successfully recommended metabolites not present in the original fingerprint. The quality of recommendations was evaluated by using literature automatic search, in order to check that recommended metabolites could be related to the disease. Complementary mass spectrometry experiments and raw data analysis were performed to confirm these suggestions. In particular, MetaboRank recommended the overlooked α-ketoglutaramate as a metabolite which should be added to the metabolic fingerprint of HE, thus suggesting that metabolic fingerprints enhancement can provide new insight on complex diseases.

Availability and implementation

Method is implemented in the MetExplore server and is available at www.metexplore.fr. A tutorial is available at https://metexplore.toulouse.inra.fr/com/tutorials/MetaboRank/2017-MetaboRank.pdf.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +30248878,The IADN data visualization tool.,"Data on atmospheric levels of toxic pollutants in samples collected near the Great Lakes are now readily available online to scientists, researchers, and the public on a website called IADN Data Viz (https://iadnviz.iu.edu/). These data come from the Integrated Atmospheric Deposition Network (IADN), a long term monitoring program run by the U.S. Environmental Protection Agency (US EPA).",2018-07-26 +24675620,TIBS: a web database to browse gene expression in irritable bowel syndrome.,"Irritable bowel syndrome (IBS) is a chronic functional gastrointestinal disorder. Its symptoms include chronic abdominal pain, bloating gas, diarrhea and constipation. Many IBS patients also have psychological symptoms like depression or anxiety. These unpleasant symptoms significantly lower patients׳ quality of life. The prevalence of IBS in Europe and North America is about 10-15% of the population, which makes IBS a disorder with a high social cost. The pathophysiology of IBS is considered to be multifactorial and the exact cause of the disease remains poorly understood. Recently, a genome-wide expression microarray technique has been applied to investigate the possible mechanisms of IBS. However, a user-friendly database that allows scientists without bioinformatics background to query gene expression levels in these data sets and compare gene expression patterns across different tissues has not yet been established. Therefore, we have integrated four public expression microarray data (320 samples) from the Gene Expression Omnibus (GEO) and ArrayExpress databases into an online database called Transcriptome of Irritable Bowel Syndrome (TIBS). The gene expression change in IBS patients compared to healthy volunteers or UC patients in jejunum, sigmoid colon, rectum, and descending colon can be queried by gene symbols. Users can compare gene expression levels of IBS patients across these tissues. Sex difference of gene expression in IBS patients was also shown in the database. The current version of TIBS database contains 42,400 annotated gene probe sets represented on the Affymetrix Human Genome U133 plus 2.0 platform. TIBS will be an invaluable resource for a better understanding of the pathogenesis of IBS at the molecular level and for drug development. The TIBS database is available online at http://www.chengfeng.info/tibs_database.html.",2014-03-24 +29231978,Impact of a shift in nuchal translucency measurements on the detection rate of first-trimester Down syndrome screening: A population-based study.,"OBJECTIVE:To assess the distribution of nuchal translucency (NT) measurements following a national policy without credentialing and its impact on first-trimester Down syndrome screening (DSS) detection rate. METHOD:All first-trimester DSS data recorded in France (2010-2014) were collected by the laboratories in charge via an Internet database (https://www.bionuqual.org/echo.php). There was no minimal requirement for image quality to allow sonographers to enter the screening process. A subgroup of DSS with complete DS follow-up corresponded to 1614 sonographers. Based on the distribution of maternal age, DS detection rate was calculated and split as a function of the distribution of NT multiple of the median (MoM). RESULTS:Four thousand nine hundred forty-three sonographers performed 2,337,372 NT measurements. Median NT expressed in MoM was 0.83. Screenings with complete follow-up consisted of 197,417 screenings, in which DSS detection rates were respectively 70.4%, 70.9%, 79.4%, 87.7%, and 79.5% for the following median NT MoM ranges: <0.7, 0.70 to 0.79, 0.80 to 0.89, 0.90 to 0.99, and >0.99 (trend χ = 12.21; P = .0158). CONCLUSION:In France, following a policy of quality assessment without standardized credentialing, the distribution of NT measurements did not fit the expected distribution. Down syndrome detection rate was 10% lower in screenings by sonographers with a median NT < 0.80 MoM.",2018-01-04 +27899608,TFBSbank: a platform to dissect the big data of protein-DNA interaction in human and model species.,"Genome-wide transcription factors (TFs) binding data has been extensively generated in the past few years, which poses a great challenge to data interpretation. Therefore, comprehensive and dedicated functional annotation databases for TF-DNA interaction are in great demands to manage, explore and utilize those invaluable data resources. Here, we constructed a platform 'TFBSbank' which houses the annotation of 1870 chromatin immunoprecipitation (ChIP) datasets of 585 TFs in five species (human, mouse, fly, worm and yeast). There are mainly five functional modules in TFBSbank aimed at characterizing ChIP peaks, identifying putative targets, predicting TF responsive enhancers, revealing potential cofactors/collaborators and discovering enriched TF motifs. TFBSbank has two distinctive features compared to the existing databases. Firstly, we provided putative cofactors/collaborators analysis (for Drosophila melanogaster), as they are crucial for the in vivo functions of TFs. Additionally, this database predicted the enrichment of both known and de novo motifs based on ChIP data. TFBSbank is freely accessible at http://tfbsbank.co.uk.",2016-11-28 +21843613,Novel use patterns of Salvia divinorum: unobtrusive observation using YouTube™.,"

Unlabelled

ETHNOPHARMACOLOGICAL RELEVANCE AND AIMS: The traditional use of the Hallucinogenic sage, Salvia divinorum has been of ethnopharmalogical interest for some time. This plant, endemic to Oaxaca Mexico and traditionally used by the Mazatec, is now utilized worldwide for its psychoactive effects. This use demonstrates a novel use pattern which is distinctly different from Mazatec use. This study offers a new methodology to study emerging global plant use and assesses the users' experience with it. The aim of this research was to develop a new methodology to collect and analyze archived data on the World Wide Web, specifically videos which depict Salvia divinorum use.

Methods

The basis of the methodology for this project was unobtrusive observation which allows the researcher to observe without influencing the event which is being observed. Qualitative, ethnographic data was used in conjunction with quantitative meta data collected by a customized web crawler programed to archive YouTube™ data.

Results

Using this methodology enabled us to understand reported uses and the users' experiences as expressed on the World Wide Web. The main result of this research was the documentation of a distinct, novel use pattern of Salvia divinorum which has developed outside of Oaxaca; a use pattern which differs in a number of ways from traditional, Mazatec use. The majority of the YouTube™ videos analyzed were found to present indications of a positive Salvia divinorum experience. This result highlighted the contradiction between ethnographic data and what is reported by the media. Finally the representation of Salvia divinorum on YouTube™ (and by inference the WWW as a whole) is a growing phenomena.

Conclusions

While anthropological and more specifically medico-anthropological research has, for many years, embraced the dynamics of cultures, until recently, ethnopharmalogical research has generally focused on 'traditional' plant use, failing to capture the dynamic elements of plant/human interaction and framing research in the past or as decontextualized largely descriptive reports. Global migration and urban environments formed a basis for looking at the interplay of continuity and change. Such cultural dynamics are exacerbated by the opportunities which the WWW offers.",2011-08-06 +29519547,Non-vitamin K antagonist oral anticoagulants have better efficacy and equivalent safety compared to warfarin in elderly patients with atrial fibrillation: A systematic review and meta-analysis.,"BACKGROUND:To evaluate the efficacy and safety of non-vitamin K antagonist oral anticoagulants (NOACs) in elderly patients (aged ≥75 years) with atrial fibrillation (AF), depending on dose and/or renal function. METHODS:After systematically searching the databases (Medline, EMBASE, CENTRAL, SCOPUS, and Web of Science), 5 phase III randomized controlled trials and reported data according to subgroups of elderly/non-elderly AF patients, comparing any NOACs and warfarin were included. The primary efficacy and safety outcomes were stroke/systemic thromboembolism and major bleeding. RESULTS:(1) NOACs showed better efficacy than warfarin in elderly patients [RR 0.83 (0.69-1.00), p=0.04, I2=55%], but equivalent efficacy in non-elderly patients. (2) NOACs reduced major bleeding compared to warfarin in non-elderly (p<0.001) and had comparable safety to warfarin in elderly patients. (3) Even in elderly patients with moderately impaired renal function, NOACs had a safety profile comparable to that of warfarin for major bleeding if dose reduction was reached appropriately [pooled RR 0.82 (0.35-1.88), p=0.63, I2=63%]. (4) All-cause mortality was lower with NOACs in non-elderly patients [RR 0.89 (0.83-0.95), p=0.001, I2=0%], and with standard-dose NOAC group of elderly patients [RR 0.93 (0.86-1.00), p=0.04, I2=0%] compared to warfarin. CONCLUSIONS:For elderly patients (aged ≥75 years), NOACs showed better efficacy and equivalent safety compared to warfarin even in those with moderately impaired renal function. All-cause mortality was lower with standard-dose NOACs compared to warfarin in the elderly patient group. SYSTEMATIC REVIEW REGISTRATION:The protocol of this meta-analysis was registered on PROSPERO under CRD42016047922 (https://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42016047922).",2018-03-05 +29945233,appreci8: a pipeline for precise variant calling integrating 8 tools.,"

Motivation

The application of next-generation sequencing in research and particularly in clinical routine requires valid variant calling results. However, evaluation of several commonly used tools has pointed out that not a single tool meets this requirement. False positive as well as false negative calls necessitate additional experiments and extensive manual work. Intelligent combination and output filtration of different tools could significantly improve the current situation.

Results

We developed appreci8, an automatic variant calling pipeline for calling single nucleotide variants and short indels by combining and filtering the output of eight open-source variant calling tools, based on a novel artifact- and polymorphism score. Appreci8 was trained on two data sets from patients with myelodysplastic syndrome, covering 165 Illumina samples. Subsequently, appreci8's performance was tested on five independent data sets, covering 513 samples. Variation in sequencing platform, target region and disease entity was considered. All calls were validated by re-sequencing on the same platform, a different platform or expert-based review. Sensitivity of appreci8 ranged between 0.93 and 1.00, while positive predictive value ranged between 0.65 and 1.00. In all cases, appreci8 showed superior performance compared to any evaluated alternative approach.

Availability and implementation

Appreci8 is freely available at https://hub.docker.com/r/wwuimi/appreci8/. Sequencing data (BAM files) of the 678 patients analyzed with appreci8 have been deposited into the NCBI Sequence Read Archive (BioProjectID: 388411; https://www.ncbi.nlm.nih.gov/bioproject/PRJNA388411).

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +30140043,Instant Clue: A Software Suite for Interactive Data Visualization and Analysis.,"The development of modern high-throughput instrumentation and improved core facility infrastructures leads to an accumulation of large amounts of scientific data. However, for a majority of scientists the comprehensive analysis and visualization of their data goes beyond their expertise. To reduce this hurdle, we developed a software suite called Instant Clue that helps scientists to visually analyze data and to gain insights into biological processes from their high-dimensional dataset. Instant Clue combines the power of visual and statistical analytics using a straight forward drag & drop approach making the software highly intuitive. Additionally, it offers a comprehensive portfolio of statistical tools for systematic analysis such as dimensional reduction, (un)-supervised learning, clustering, multi-block (omics) integration and curve fitting. Charts can be combined with high flexibility into a main figure template for direct usage in scientific publications. Even though Instant Clue was developed with the omics-sciences in mind, users can analyze any kind of data from low to high dimensional data sets. The open-source software is available for Windows and Mac OS ( http://www.instantclue.uni-koeln.de ) and is accompanied by a detailed video tutorial series.",2018-08-23 +30709075,Childhood Cancer in Rhode Island.,"The incidence rate of childhood cancer is increasing in the United States. We sought to describe the epidemiology of childhood cancer in the state of Rhode Island. Data from the Rhode Island Cancer Registry was reviewed to assess incidence and trends in childhood cancer for individuals age 0-19 years from 1995-2015. Cancer mortality was based on deaths with cause of deaths associated with malignant cancers filed with the Rhode Island Vital Records and CDC National Center for Health Statistics. We found that pediatric cancer is increasing in Rhode Island. Between 1995-2015, there were 1,090 new diagnoses of childhood cancer. Leukemia, tumors of the central nervous system, and lymphomas are the most common types of cancer in children in the state. Additionally, the overall mortality rate from childhood cancer is decreasing. In conclusion, the childhood cancer trends in Rhode Island are consistent with the national data. [Full article available at http://rimed.org/rimedicaljournal-2019-02.asp].",2019-02-01 +26789753,The Cardiac Electrophysiology Web Lab.,"Computational modeling of cardiac cellular electrophysiology has a long history, and many models are now available for different species, cell types, and experimental preparations. This success brings with it a challenge: how do we assess and compare the underlying hypotheses and emergent behaviors so that we can choose a model as a suitable basis for a new study or to characterize how a particular model behaves in different scenarios? We have created an online resource for the characterization and comparison of electrophysiological cell models in a wide range of experimental scenarios. The details of the mathematical model (quantitative assumptions and hypotheses formulated as ordinary differential equations) are separated from the experimental protocol being simulated. Each model and protocol is then encoded in computer-readable formats. A simulation tool runs virtual experiments on models encoded in CellML, and a website (https://chaste.cs.ox.ac.uk/WebLab) provides a friendly interface, allowing users to store and compare results. The system currently contains a sample of 36 models and 23 protocols, including current-voltage curve generation, action potential properties under steady pacing at different rates, restitution properties, block of particular channels, and hypo-/hyperkalemia. This resource is publicly available, open source, and free, and we invite the community to use it and become involved in future developments. Investigators interested in comparing competing hypotheses using models can make a more informed decision, and those developing new models can upload them for easy evaluation under the existing protocols, and even add their own protocols.",2016-01-01 +23193297,"MiRGator v3.0: a microRNA portal for deep sequencing, expression profiling and mRNA targeting.","Biogenesis and molecular function are two key subjects in the field of microRNA (miRNA) research. Deep sequencing has become the principal technique in cataloging of miRNA repertoire and generating expression profiles in an unbiased manner. Here, we describe the miRGator v3.0 update (http://mirgator.kobic.re.kr) that compiled the deep sequencing miRNA data available in public and implemented several novel tools to facilitate exploration of massive data. The miR-seq browser supports users to examine short read alignment with the secondary structure and read count information available in concurrent windows. Features such as sequence editing, sorting, ordering, import and export of user data would be of great utility for studying iso-miRs, miRNA editing and modifications. miRNA-target relation is essential for understanding miRNA function. Coexpression analysis of miRNA and target mRNAs, based on miRNA-seq and RNA-seq data from the same sample, is visualized in the heat-map and network views where users can investigate the inverse correlation of gene expression and target relations, compiled from various databases of predicted and validated targets. By keeping datasets and analytic tools up-to-date, miRGator should continue to serve as an integrated resource for biogenesis and functional investigation of miRNAs.",2012-11-27 +24253212,An evaluation of the association for the advancement of wound care venous ulcer guideline and recommendations for further research.,"

Objective

The goals of this study were to analyze the 2010 update of the Association for the Advancement of Wound Care (AAWC) Venous Ulcer Guideline (VUG) and examine recommendations with less than A-level evidence to identify important research questions.

Data sources

The AAWC VUG may be found at http://aawconline.org/professional-resources/resources and at the National Guideline Clearinghouse, http://www.guideline.gov. Supporting references for each recommendation, compiled by the AAWC Guideline Task Force from MEDLINE, CINAHL, and EMBASE databases, may be viewed at the first website.

Study selection

The literature identified in support of the AAWC VUG recommendations with less than A-level evidence was evaluated and is summarized below.

Data extraction

Questions requiring further research in venous ulcer (VU) care were developed from recommendations having less than A-level support and that fall under the following topics: diagnosis, documentation, prevention, wound care, adjunctive interventions, and palliation.

Data synthesis

Practitioners lack strong evidence for several generally accepted recommendations of this synthesis of VU guidelines concerning the following: diagnostic or screening validity of varicosities, timing of biopsies for differential diagnosis, clinic visit frequency, criteria for changing VU care plans, and effective VU preventive parameters. Bedside surgical debridement, several biologic interventions, certain types of grafting, and the comparative efficacy of intravascular surgical procedures also require rigorous examination. Adjunctive interventions to be investigated include systemic pain management, topical biophysical treatments, novel devices, pharmaceuticals, timing, methods and procedures for some surgical interventions.

Conclusions

Better evidence for recommendations with less than A-level support may improve the quality and consistency of VU care, reduce costs, and improve resource use.",2013-12-01 +30560246,Glioma and Alzheimer's Disease.,"

Background

Cancer mortality and Alzheimer's disease (AD) mortality increase with age, but some studies have shown an inverse relationship of the two diseases, that is, older persons with cancer have a reduced risk of AD and vice versa. However, other analyses suggest that AD and brain tumor might be positively correlated.

Objective

In the current study, we wished to determine the relationship of AD mortality to malignant brain tumor mortality in US states and counties.

Methods

Data and maps of malignant brain tumor mortality and Alzheimer's disease mortality (1999-2016) are from the CDC Wonder tool (https://wonder.cdc.gov/cmf-icd10.html). Data on malignant brain tumor types and their frequencies are from the Surveillance, Epidemiology, and End Results Program (SEER, https://seer.cancer.gov). Data on the genetics of lower grade glioma are from the TCGA Lower Grade Glioma (LGG) dataset in TCGA (The Cancer Genome Atlas).

Results

SEER data indicate that astrocytomas make up 58.2% of malignant brain tumors in patients 65 and older; glioblastoma and anaplastic astrocytoma make up 41.6%. We found a significant positive correlation between AD mortality rate and malignant brain tumor mortality rate 1999-2016 in persons age 65 and older in A) 1,101 US counties, p <  0.001 and B) 50 US states, p <  0.001.

Conclusion

Adult malignant brain tumors may share some environmental risks with AD. Malignant brain tumors and AD also have some genes in common: TREM2, SPI1, CD33, and INPP5D. The interaction of environment and genetics is complex and overlaps in malignant brain tumors and AD.",2018-12-14 +31940340,BrainIAK tutorials: User-friendly learning materials for advanced fMRI analysis.,"Advanced brain imaging analysis methods, including multivariate pattern analysis (MVPA), functional connectivity, and functional alignment, have become powerful tools in cognitive neuroscience over the past decade. These tools are implemented in custom code and separate packages, often requiring different software and language proficiencies. Although usable by expert researchers, novice users face a steep learning curve. These difficulties stem from the use of new programming languages (e.g., Python), learning how to apply machine-learning methods to high-dimensional fMRI data, and minimal documentation and training materials. Furthermore, most standard fMRI analysis packages (e.g., AFNI, FSL, SPM) focus on preprocessing and univariate analyses, leaving a gap in how to integrate with advanced tools. To address these needs, we developed BrainIAK (brainiak.org), an open-source Python software package that seamlessly integrates several cutting-edge, computationally efficient techniques with other Python packages (e.g., Nilearn, Scikit-learn) for file handling, visualization, and machine learning. To disseminate these powerful tools, we developed user-friendly tutorials (in Jupyter format; https://brainiak.org/tutorials/) for learning BrainIAK and advanced fMRI analysis in Python more generally. These materials cover techniques including: MVPA (pattern classification and representational similarity analysis); parallelized searchlight analysis; background connectivity; full correlation matrix analysis; inter-subject correlation; inter-subject functional connectivity; shared response modeling; event segmentation using hidden Markov models; and real-time fMRI. For long-running jobs or large memory needs we provide detailed guidance on high-performance computing clusters. These notebooks were successfully tested at multiple sites, including as problem sets for courses at Yale and Princeton universities and at various workshops and hackathons. These materials are freely shared, with the hope that they become part of a pool of open-source software and educational materials for large-scale, reproducible fMRI analysis and accelerated discovery.",2020-01-15 +25414345,The SUPERFAMILY 1.75 database in 2014: a doubling of data.,"We present updates to the SUPERFAMILY 1.75 (http://supfam.org) online resource and protein sequence collection. The hidden Markov model library that provides sequence homology to SCOP structural domains remains unchanged at version 1.75. In the last 4 years SUPERFAMILY has more than doubled its holding of curated complete proteomes over all cellular life, from 1400 proteomes reported previously in 2010 up to 3258 at present. Outside of the main sequence collection, SUPERFAMILY continues to provide domain annotation for sequences provided by other resources such as: UniProt, Ensembl, PDB, much of JGI Phytozome and selected subcollections of NCBI RefSeq. Despite this growth in data volume, SUPERFAMILY now provides users with an expanded and daily updated phylogenetic tree of life (sTOL). This tree is built with genomic-scale domain annotation data as before, but constantly updated when new species are introduced to the sequence library. Our Gene Ontology and other functional and phenotypic annotations previously reported have stood up to critical assessment by the function prediction community. We have now introduced these data in an integrated manner online at the level of an individual sequence, and--in the case of whole genomes--with enrichment analysis against a taxonomically defined background.",2014-11-20 +29762722,CalFitter: a web server for analysis of protein thermal denaturation data.,"Despite significant advances in the understanding of protein structure-function relationships, revealing protein folding pathways still poses a challenge due to a limited number of relevant experimental tools. Widely-used experimental techniques, such as calorimetry or spectroscopy, critically depend on a proper data analysis. Currently, there are only separate data analysis tools available for each type of experiment with a limited model selection. To address this problem, we have developed the CalFitter web server to be a unified platform for comprehensive data fitting and analysis of protein thermal denaturation data. The server allows simultaneous global data fitting using any combination of input data types and offers 12 protein unfolding pathway models for selection, including irreversible transitions often missing from other tools. The data fitting produces optimal parameter values, their confidence intervals, and statistical information to define unfolding pathways. The server provides an interactive and easy-to-use interface that allows users to directly analyse input datasets and simulate modelled output based on the model parameters. CalFitter web server is available free at https://loschmidt.chemi.muni.cz/calfitter/.",2018-07-01 +31939706,Hepatic Tumor Formation in Adult Mice Developmentally Exposed to Organotin.,"

Background

Tributyltin (TBT) is a persistent and bioaccumulative environmental toxicant. Developmental exposure to TBT has been shown to cause fatty liver disease (steatosis), as well as increased adiposity in many species, leading to its characterization as an obesogen.

Objective

We aimed to determine the long-term effects of developmental TBT exposure on the liver.

Methods

C57BL/6J mice were exposed to a dose of TBT (0.5mg/kg body weight per day; 3.07μM) below the current developmental no observed adverse effect level (NOAEL) via drinking water, or drinking water alone, provided to the dam from preconception through lactation. Sires were exposed during breeding and lactation. Pups from two parity cycles were included in this study. Animals were followed longitudinally, and livers of offspring were analyzed by pathological evaluation, immunohistochemistry, immunoblotting, and RNA sequencing.

Results

Developmental exposure to TBT led to increased adiposity and hepatic steatosis at 14 and 20 weeks of age and increased liver adenomas at 45 weeks of age in male offspring. Female offspring displayed increased adiposity as compared with males, but TBT did not lead to an increase in fatty liver or tumor development in female offspring. Liver tumors in male mice were enriched in pathways and gene signatures associated with human and rodent nonalcoholic fatty liver disease (NAFLD) and hepatocellular carcinoma (HCC). This includes down-regulation of growth hormone receptor (GHR) and of STAT5 signaling, which occurred in response to TBT exposure and preceded liver tumor development.

Conclusions

These data reveal a previously unappreciated ability of TBT to increase risk for liver tumorigenesis in mice in a sex-specific manner. Taken together, these findings provide new insights into how early life environmental exposures contribute to liver disease in adulthood. https://doi.org/10.1289/EHP5414.",2020-01-15 +28877926,Effectiveness of Quality Improvement Strategies for the Management of CKD: A Meta-Analysis.,"

Background and objectives

Quality improvement interventions have enhanced care for other chronic illnesses, but their effectiveness for patients with CKD is unknown. We sought to determine the effects of quality improvement strategies on clinical outcomes in adult patients with nondialysis-requiring CKD.

Design, setting, participants, & measurements

We conducted a systematic review of randomized trials, searching Medline and the Cochrane Effective Practice and Organization of Care database from January of 2003 to April of 2015. Eligible studies evaluated one or more of 11 prespecified quality improvement strategies, and prespecified study outcomes included at least one process of care measure, surrogate outcome, or hard clinical outcome. We used a random effects model to estimate the pooled risk ratio (RR; dichotomous data) or the mean difference (continuous data).

Results

We reviewed 15 patient-level randomized trials (n=3298 patients), and six cluster-randomized trials (n=30,042 patients). Quality improvement strategies reduced dialysis incidence (seven trials; RR, 0.85; 95% confidence interval [95% CI], 0.74 to 0.97) and LDL cholesterol concentrations (four trials; mean difference, -17.6 mg/dl; 95% CI, -28.7 to -6.5), and increased the likelihood that patients received renin-angiotensin-aldosterone system inhibitors (nine trials; RR, 1.16; 95% CI, 1.06 to 1.27). We did not observe statistically significant effects on mortality, cardiovascular events, eGFR, glycated hemoglobin, and systolic or diastolic BP.

Conclusions

Quality improvement interventions yielded significant beneficial effects on three elements of CKD care. Estimates of the effectiveness of quality improvement strategies were limited by study number and adherence to quality improvement principles.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2017_09_06_CJASNPodcast_17_10.mp3.",2017-09-06 +30263917,Data confirming murine erythrocyte opsonization and oxidative damage and live microscopic analysis of oxidatively damaged erythrocyte uptake by mast cells.,"The data in the present article are related to research article (doi: https://doi.org/10.1016/j.imlet.2018.04.002) [1]. The data describes the detailed immunization protocol for generating polyclonal antisera to murine erythrocytes in rat. The rat anti-mouse erythrocyte serum is then tested for its ability to bind and opsonize murine erythrocytes. Second set of data confirms the oxidative damage to murine erythrocytes by treatment with different dose of the tert-butyl hydroperoxide (t-BHP) on the basis of phosphotidylserine externalization by murine erythrocytes as well as measurement of reactive oxygen species (ROS) formation in t-BHP treated erythrocytes. Third set of data depicts lack of mast cell degranulation in the form of β- hexosaminidase release in response to co-incubation of mast cell with normal and oxidatively damaged erythrocytes. Lastly, the uptake of oxidatively damaged erythrocytes by resting and activated RBL-2H3 mast cells is shown by live cell imaging using confocal microscope.",2018-09-13 +24082050,yApoptosis: yeast apoptosis database.,"In the past few years, programmed cell death (PCD) has become a popular research area due to its fundamental aspects and its links to human diseases. Yeast has been used as a model for studying PCD, since the discovery of morphological markers of apoptotic cell death in yeast in 1997. Increasing knowledge in identification of components and molecular pathways created a need for organization of information. To meet the demands from the research community, we have developed a curated yeast apoptosis database, yApoptosis. The database structurally collects an extensively curated set of apoptosis, PCD and related genes, their genomic information, supporting literature and relevant external links. A web interface including necessary functions is provided to access and download the data. In addition, we included several networks where the apoptosis genes or proteins are involved, and present them graphically and interactively to facilitate rapid visualization. We also promote continuous inputs and curation by experts. yApoptosis is a highly specific resource for sharing information online, which supports researches and studies in the field of yeast apoptosis and cell death. DATABASE URL: http://www.ycelldeath.com/yapoptosis/.",2013-09-29 +29718162,lncFunTK: a toolkit for functional annotation of long noncoding RNAs.,"Motivation:Thousands of long noncoding RNAs (lncRNAs) were newly identified from high throughput RNA-seq data. Functional annotation and prioritization of these lncRNAs for further experimental validation as well as the functional investigation is the bottleneck step for many noncoding RNA studies. Results:Here we describe lncFunTK that can run either as standard application or webserver for this purpose. It integrates high throughput sequencing data (i.e. ChIP-seq, CLIP-seq and RNA-seq) to construct the regulatory network associated with lncRNAs. Through the network, it calculates the Functional Information Score (FIS) of each individual lncRNA for prioritizing and inferring its functions through Gene Ontology (GO) terms of neighboring genes. In addition, it also provides utility scripts to support the input data preprocessing and the parameter optimizing. We further demonstrate that lncFunTK can be widely used in various biological systems for lncRNA prioritization and functional annotation. Availability and implementation:The lncFunTK standalone version is an open source package and freely available at http://sunlab.cpy.cuhk.edu.hk/lncfuntk under the MIT license. A webserver implementation is also available at http://sunlab.cpy.cuhk.edu.hk/lncfuntk/runlncfuntk.html. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +29316735,The 2018 Nucleic Acids Research database issue and the online molecular biology database collection.,"The 2018 Nucleic Acids Research Database Issue contains 181 papers spanning molecular biology. Among them, 82 are new and 84 are updates describing resources that appeared in the Issue previously. The remaining 15 cover databases most recently published elsewhere. Databases in the area of nucleic acids include 3DIV for visualisation of data on genome 3D structure and RNArchitecture, a hierarchical classification of RNA families. Protein databases include the established SMART, ELM and MEROPS while GPCRdb and the newcomer STCRDab cover families of biomedical interest. In the area of metabolism, HMDB and Reactome both report new features while PULDB appears in NAR for the first time. This issue also contains reports on genomics resources including Ensembl, the UCSC Genome Browser and ENCODE. Update papers from the IUPHAR/BPS Guide to Pharmacology and DrugBank are highlights of the drug and drug target section while a number of proteomics databases including proteomicsDB are also covered. The entire Database Issue is freely available online on the Nucleic Acids Research website (https://academic.oup.com/nar). The NAR online Molecular Biology Database Collection has been updated, reviewing 138 entries, adding 88 new resources and eliminating 47 discontinued URLs, bringing the current total to 1737 databases. It is available at http://www.oxfordjournals.org/nar/database/c/.",2018-01-01 +29112736,VarCards: an integrated genetic and clinical database for coding variants in the human genome.,"A growing number of genomic tools and databases were developed to facilitate the interpretation of genomic variants, particularly in coding regions. However, these tools are separately available in different online websites or databases, making it challenging for general clinicians, geneticists and biologists to obtain the first-hand information regarding some particular variants and genes of interest. Starting with coding regions and splice sties, we artificially generated all possible single nucleotide variants (n = 110 154 363) and cataloged all reported insertion and deletions (n = 1 223 370). We then annotated these variants with respect to functional consequences from more than 60 genomic data sources to develop a database, named VarCards (http://varcards.biols.ac.cn/), by which users can conveniently search, browse and annotate the variant- and gene-level implications of given variants, including the following information: (i) functional effects; (ii) functional consequences through different in silico algorithms; (iii) allele frequencies in different populations; (iv) disease- and phenotype-related knowledge; (v) general meaningful gene-level information; and (vi) drug-gene interactions. As a case study, we successfully employed VarCards in interpretation of de novo mutations in autism spectrum disorders. In conclusion, VarCards provides an intuitive interface of necessary information for researchers to prioritize candidate variations and genes.",2018-01-01 +31095319,PopNetD3-A Network-Based Web Resource for Exploring Population Structure.,"We present PopNetD3, a web tool that provides an integrated approach for the network-based visualization of population structure based on the PopNet clustering framework. Users first submit a tab-delimited file that defines diversity of SNPs across the genome which is subsequently processed by the PopNet backend to define patterns of conservation at the chromosome level. The resulting population structure is visualized through a dedicated D3-based tool, allowing users to interactively examine chromosomal regions predicted to share ancestry. We illustrate the capabilities of PopNetD3 through an analysis of 16 strains of Neisseria gonorrhoeae. PopNetD3 is capable of processing population data sets consisting of hundreds of individuals and is publicly available online at: http://compsysbio.org/popnetd3 Last Accessed: May 17, 2019.",2019-07-01 +30481283,BlendMol: advanced macromolecular visualization in Blender.,"

Summary

Programs such as VMD and PyMOL are excellent tools for analyzing macromolecular structures, but they do not implement many of the advanced rendering techniques common in the film and video-game industries. In contrast, the open-source program Blender is a general-purpose tool for industry-standard rendering/visualization, but its user interface is poorly suited for rigorous scientific analysis. We present BlendMol, a Blender plugin that imports VMD or PyMOL scenes into Blender. BlendMol-generated images are well suited for use in manuscripts, outreach programs, websites and classes.

Availability and implementation

BlendMol is available free of charge from http://durrantlab.com/blendmol/. It is written in Python.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +30053238,mutTCPdb: a comprehensive database for genomic variants of a tropical country neglected disease-tropical calcific pancreatitis.,"Tropical calcific pancreatitis (TCP) is a juvenile, non-alcoholic form of chronic pancreatitis with its exclusive presence in tropical regions associated with the low economic status. TCP initiates in the childhood itself and then proliferates silently. mutTCPdb is a manually curated and comprehensive disease specific single nucleotide variant (SNV) database. Extensive search strategies were employed to create a repository while SNV information was collected from published articles. Several existing databases such as the dbSNP, Uniprot, miRTarBase2.0, HGNC, PFAM, KEGG, PROSITE, MINT, BIOGRID 3.4 and Ensemble Genome Browser 87 were queried to collect information specific to the gene. mutTCPdb is running on the XAMPP web server with MYSQL database in the backend for data storage and management. Currently, the mutTCPdb enlists 100 variants of all 11 genes identified in TCP, out of which 45 are non-synonymous (missense, nonsense, deletions and insertions), 46 are present in non-coding regions (UTRs, promoter region and introns) and 9 are synonymous variants. The database is highly curated for disease-specific gene variants and provides complete information on function, transcript information, pathways, interactions, miRNAs and PubMed references along with remarks. It is an informative portal for clinicians and researchers for a better understanding of the disease, as it may help in identifying novel targets and diagnostic markers, hence, can be a source to improve the strategies for TCP management.Database URL: http://lms.snu.edu.in/mutTCPDB/index.php.",2018-01-01 +29106641,The MAR databases: development and implementation of databases specific for marine metagenomics.,"We introduce the marine databases; MarRef, MarDB and MarCat (https://mmp.sfb.uit.no/databases/), which are publicly available resources that promote marine research and innovation. These data resources, which have been implemented in the Marine Metagenomics Portal (MMP) (https://mmp.sfb.uit.no/), are collections of richly annotated and manually curated contextual (metadata) and sequence databases representing three tiers of accuracy. While MarRef is a database for completely sequenced marine prokaryotic genomes, which represent a marine prokaryote reference genome database, MarDB includes all incomplete sequenced prokaryotic genomes regardless level of completeness. The last database, MarCat, represents a gene (protein) catalog of uncultivable (and cultivable) marine genes and proteins derived from marine metagenomics samples. The first versions of MarRef and MarDB contain 612 and 3726 records, respectively. Each record is built up of 106 metadata fields including attributes for sampling, sequencing, assembly and annotation in addition to the organism and taxonomic information. Currently, MarCat contains 1227 records with 55 metadata fields. Ontologies and controlled vocabularies are used in the contextual databases to enhance consistency. The user-friendly web interface lets the visitors browse, filter and search in the contextual databases and perform BLAST searches against the corresponding sequence databases. All contextual and sequence databases are freely accessible and downloadable from https://s1.sfb.uit.no/public/mar/.",2018-01-01 +29939244,PtRFdb: a database for plant transfer RNA-derived fragments. ,"Transfer RNA-derived fragments (tRFs) represent a novel class of small RNAs (sRNAs) generated through endonucleolytic cleavage of both mature and precursor transfer RNAs (tRNAs). These 14-28 nt length tRFs that have been extensively studied in animal kingdom are to be explored in plants. In this study, we introduce a database of plant tRFs named PtRFdb (www.nipgr.res.in/PtRFdb), for the scientific community. We analyzed a total of 1344 sRNA sequencing datasets of 10 different plant species and identified a total of 5607 unique tRFs (758 tRF-1, 2269 tRF-3 and 2580 tRF-5), represented by 487 765 entries. In PtRFdb, detailed and comprehensive information is available for each tRF entry. Apart from the core information consisting of the tRF type, anticodon, source organism, tissue, sequence and the genomic location; additional information like PubMed identifier (PMID), Sample accession number (GSM), sequence length and frequency relevant to the tRFs may be of high utility to the user. Two different types of search modules (Basic Search and Advanced Search), sequence similarity search (by BLAST) and Browse option with data download facility for each search is provided in this database. We believe that PtRFdb is a unique database of its kind and it will be beneficial in the validation and further characterization of plant tRFs.Database URL: http://www.nipgr.res.in/PtRFdb/.",2018-01-01 +31934794,Moving beyond Fine Particle Mass: High-Spatial Resolution Exposure to Source-Resolved Atmospheric Particle Number and Chemical Mixing State.,"BACKGROUND:Most epidemiological studies address health effects of atmospheric particulate matter (PM) using mass-based measurements as exposure surrogates. However, this approach ignores many critical physiochemical properties of individual atmospheric particles. These properties control the deposition of particles in the human lung and likely their toxicity; in addition, they likely have larger spatial variability than PM mass. OBJECTIVES:This study was designed to quantify the spatial variability in number, size, source, and chemical mixing state of individual particles in a populous urban area. We quantified the population exposure to these detailed particle properties and compared them to mass-based exposures. METHODS:We performed mobile sampling using an advanced single-particle mass spectrometer to measure the spatial variability of number concentration of source-resolved 50-1,000 nm particles and particle mixing state in Pittsburgh, Pennsylvania. We built land-use regression (LUR) models to estimate their spatial patterns and coupled them with demographic data to estimate population exposure. RESULTS:Particle number concentration had a much larger spatial variability than mass concentration within the city. Freshly emitted particles from traffic and cooking drive the variability in particle number, but mass concentrations are dominated by aged background particles composed of secondary materials. In addition, people exposed to elevated number concentrations of atmospheric particles are also exposed to more externally mixed particles. CONCLUSIONS:Our advanced measurement technique provides a new exposure picture that resolves the large intra-city spatial heterogeneity in traffic and cooking particle number concentrations in the populous urban area. Our results provide a complementary and more detailed perspective compared with bulk measurements of composition. In addition, given the influence of particle mixing state on properties such as particle deposition in the lung, the large spatial gradients of chemical mixing state may significantly influence the health effects of fine PM. https://doi.org/10.1289/EHP5311.",2020-01-14 +23794736,ESCAPE: database for integrating high-content published data collected from human and mouse embryonic stem cells.,"High content studies that profile mouse and human embryonic stem cells (m/hESCs) using various genome-wide technologies such as transcriptomics and proteomics are constantly being published. However, efforts to integrate such data to obtain a global view of the molecular circuitry in m/hESCs are lagging behind. Here, we present an m/hESC-centered database called Embryonic Stem Cell Atlas from Pluripotency Evidence integrating data from many recent diverse high-throughput studies including chromatin immunoprecipitation followed by deep sequencing, genome-wide inhibitory RNA screens, gene expression microarrays or RNA-seq after knockdown (KD) or overexpression of critical factors, immunoprecipitation followed by mass spectrometry proteomics and phosphoproteomics. The database provides web-based interactive search and visualization tools that can be used to build subnetworks and to identify known and novel regulatory interactions across various regulatory layers. The web-interface also includes tools to predict the effects of combinatorial KDs by additive effects controlled by sliders, or through simulation software implemented in MATLAB. Overall, the Embryonic Stem Cell Atlas from Pluripotency Evidence database is a comprehensive resource for the stem cell systems biology community. Database URL: http://www.maayanlab.net/ESCAPE",2013-06-21 +23933261,Global proteome analysis of the NCI-60 cell line panel.,"The NCI-60 cell line collection is a very widely used panel for the study of cellular mechanisms of cancer in general and in vitro drug action in particular. It is a model system for the tissue types and genetic diversity of human cancers and has been extensively molecularly characterized. Here, we present a quantitative proteome and kinome profile of the NCI-60 panel covering, in total, 10,350 proteins (including 375 protein kinases) and including a core cancer proteome of 5,578 proteins that were consistently quantified across all tissue types. Bioinformatic analysis revealed strong cell line clusters according to tissue type and disclosed hundreds of differentially regulated proteins representing potential biomarkers for numerous tumor properties. Integration with public transcriptome data showed considerable similarity between mRNA and protein expression. Modeling of proteome and drug-response profiles for 108 FDA-approved drugs identified known and potential protein markers for drug sensitivity and resistance. To enable community access to this unique resource, we incorporated it into a public database for comparative and integrative analysis (http://wzw.tum.de/proteomics/nci60).",2013-08-08 +30024245,Stereotypes of age differences in personality traits: Universal and accurate: Correction to Chan et al. (2012).,"Reports an error in ""Stereotypes of age differences in personality traits: Universal and accurate"" by Wayne Chan, Robert R. Mccrae, Filip De Fruyt, Lee Jussim, Corinna E. Löckenhoff, Marleen De Bolle, Paul T. Costa, Angelina R. Sutin, Anu Realo, Jüri Allik, Katsuharu Nakazato, Yoshiko Shimonaka, Martina Hřebíčková, Sylvie Graf, Michelle Yik, Marina Brunner-sciarra, Nora Leibovich De Figueroa, Vanina Schmidt, Chang-kyu Ahn, Hyun-nie Ahn, Maria E. Aguilar-vafaie, Jerzy Siuta, Barbara Szmigielska, Thomas R. Cain, Jarret T. Crawford, Khairul Anwar Mastor, Jean-pierre Rolland, Florence Nansubuga, Daniel R. Miramontez, Verónica Benet-martínez, Jérôme Rossier, Denis Bratko, Iris Marušić, Jamin Halberstadt, Mami Yamaguchi, Goran Knežević, Thomas A. Martin, Mirona Gheorghiu, Peter B. Smith, Claudio Barbaranelli, Lei Wang, Jane Shakespeare-finch, Margarida P. Lima, Waldemar Klinkosz, Andrzej Sekowski, Lidia Alcalay, Franco Simonetti, Tatyana V. Avdeyeva, V. S. Pramila and Antonio Terracciano (Journal of Personality and Social Psychology, 2012[Dec], Vol 103[6], 1050-1066). In the article ""Stereotypes of Age Differences in Personality Traits: Universal and Accurate?"" by Wayne Chan, Robert R. McCrae, Filip De Fruyt, Lee Jussim, Corinna E. Löckenhoff, Marleen De Bolle, Paul T. Costa Jr., Angelina R. Sutin, Anu Realo, Jüri Allik, Katsuharu Nakazato, Yoshiko Shimonaka, Martina Hřebíková, Sylvie Graf, Michelle Yik, Marina Brunner-Sciarra, Nora Leibovich de Figueroa, Vanina Schmidt, Chang-kyu Ahn, Hyun-nie Ahn, Maria E. Aguilar-Vafaie, Jerzy Siuta, Barbara Szmigielska, Thomas R. Cain, Jarret T. Crawford, Khairul Anwar Mastor, Jean-Pierre Rolland, Florence Nansubuga, Daniel R. Miramontez, Veronica Benet-Martínez, Jérôme Rossier, Denis Bratko, Iris Marušić, Jamin Halberstadt, Mami Yamaguchi, Goran Knežević, Thomas A. Martin, Mirona Gheorghiu, Peter B. Smith, Claudio Barbaranelli, Lei Wang, Jane Shakespeare-Finch, Margarida P. Lima, Waldemar Klinkosz, Andrzej Sekowski, Lidia Alcalay, Franco Simonetti, Tatyana V. Avdeyeva, V. S. Pramila, and Antonio Terracciano (Journal of Personality and Social Psychology, 2012, Vol. 103, No. 6, pp. 1050-1066. http://dx.doi.org/10 .1037/a0029712), the 17th author's name was misspelled in the byline and author note. The correct spelling is Nora Leibovich de Figueroa. (The following abstract of the original article appeared in record 2012-28195-001.) Age trajectories for personality traits are known to be similar across cultures. To address whether stereotypes of age groups reflect these age-related changes in personality, we asked participants in 26 countries (N = 3,323) to rate typical adolescents, adults, and old persons in their own country. Raters across nations tended to share similar beliefs about different age groups; adolescents were seen as impulsive, rebellious, undisciplined, preferring excitement and novelty, whereas old people were consistently considered lower on impulsivity, activity, antagonism, and Openness. These consensual age group stereotypes correlated strongly with published age differences on the five major dimensions of personality and most of 30 specific traits, using as criteria of accuracy both self-reports and observer ratings, different survey methodologies, and data from up to 50 nations. However, personal stereotypes were considerably less accurate, and consensual stereotypes tended to exaggerate differences across age groups. (PsycINFO Database Record",2018-08-01 +28039431,Data integration in physiology using Bayes' rule and minimum Bayes' factors: deubiquitylating enzymes in the renal collecting duct.,"A major challenge in physiology is to exploit the many large-scale data sets available from ""-omic"" studies to seek answers to key physiological questions. In previous studies, Bayes' theorem has been used for this purpose. This approach requires a means to map continuously distributed experimental data to probabilities (likelihood values) to derive posterior probabilities from the combination of prior probabilities and new data. Here, we introduce the use of minimum Bayes' factors for this purpose and illustrate the approach by addressing a physiological question, ""Which deubiquitylating enzymes (DUBs) encoded by mammalian genomes are most likely to regulate plasma membrane transport processes in renal cortical collecting duct principal cells?"" To do this, we have created a comprehensive online database of 110 DUBs present in the mammalian genome (https://hpcwebapps.cit.nih.gov/ESBL/Database/DUBs/). We used Bayes' theorem to integrate available information from large-scale data sets derived from proteomic and transcriptomic studies of renal collecting duct cells to rank the 110 known DUBs with regard to likelihood of interacting with and regulating transport processes. The top-ranked DUBs were OTUB1, USP14, PSMD7, PSMD14, USP7, USP9X, OTUD4, USP10, and UCHL5. Among these USP7, USP9X, OTUD4, and USP10 are known to be involved in endosomal trafficking and have potential roles in endosomal recycling of plasma membrane proteins in the mammalian cortical collecting duct.",2016-12-30 +29223505,Pakistan Genetic Mutation Database (PGMD); A centralized Pakistani mutome data source.,"The development and advancement of next generation sequencing have not only sped up the process of identifying rare variants, but have also enabled scientists to explore all variants in a single individual. The Pakistani population has a high ratio of first degree consanguinity, which is why it is a rich source for various kinds of genetic disorders. Due to the heterogeneous composition of Pakistani population, the likelihood of genetic heterogeneity for each disorder is high. Therefore, the compilation and organization of such vast genetic data is necessary to facilitate access for analysis and interpretation to researchers and medical geneticists. The increased research on Pakistani ethnic families for disease gene identification has revealed many mutations, which has led us to develop a Pakistani mutome database entitled ""Pakistan Genetic Mutation Database (PGMD)"". In PGMD, the medico-genetic information about diseases are mainly compiled into Syndromic and Non-syndromic disorders. It is a public database, which can be freely accessed from http://www.pakmutation.com. At present, we have registered more than 1000 mutations, reported in about 130 different kinds of genetic disorders. Practically, PGMD will assist researchers, clinicians, and geneticists in genetic counseling and screening of population-specific mutations, which will also aid in personalized healthcare.",2017-12-07 +31123286,AICD: an integrated anti-inflammatory compounds database for drug discovery.,"Systemic or local inflammation drives the pathogenesis of various human diseases. Small compounds with anti-inflammatory properties hold great potential for clinical translation. Over recent decades, many compounds have been screened for their action against inflammation-related targets. Databases that integrate the physicochemical properties and bioassay results of these compounds are lacking. We created an ""Anti-Inflammatory Compounds Database"" (AICD) to deposit compounds with potential anti-inflammation activities. A total of 232 inflammation-related targets were recruited by the AICD. Gene set enrichment analysis showed that these targets were involved in various human diseases. Bioassays of these targets were collected from open-access databases and adopted to extract 79,781 small molecules with information on chemical properties, candidate targets, bioassay models and bioassay results. Principal component analysis demonstrated that these deposited compounds were closely related to US Food and Drug Administration-approved drugs with respect to chemical space and chemical properties. Finally, pathway-based screening for drug combination/multi-target drugs provided a case study for drug discovery using the AICD. The AICD focuses on inflammation-related drug targets and contains substantial candidate compounds with high chemical diversity and good drug-like properties. It could be serviced for the discovery of anti-inflammatory medicines and can be accessed freely at http://956023.ichengyun.net/AICD/index.php .",2019-05-23 +29036590,TissGDB: tissue-specific gene database in cancer.,"Tissue-specific gene expression is critical in understanding biological processes, physiological conditions, and disease. The identification and appropriate use of tissue-specific genes (TissGenes) will provide important insights into disease mechanisms and organ-specific therapeutic targets. To better understand the tissue-specific features for each cancer type and to advance the discovery of clinically relevant genes or mutations, we built TissGDB (Tissue specific Gene DataBase in cancer) available at http://zhaobioinfo.org/TissGDB. We collected and curated 2461 tissue specific genes (TissGenes) across 22 tissue types that matched the 28 cancer types of The Cancer Genome Atlas (TCGA) from three representative tissue-specific gene expression resources: The Human Protein Atlas (HPA), Tissue-specific Gene Expression and Regulation (TiGER), and Genotype-Tissue Expression (GTEx). For these 2461 TissGenes, we performed gene expression, somatic mutation, and prognostic marker-based analyses across 28 cancer types using TCGA data. Our analyses identified hundreds of TissGenes, including genes that universally kept or lost tissue-specific gene expression, with other features: cancer type-specific isoform expression, fusion with oncogenes or tumor suppressor genes, and markers for protective or risk prognosis. TissGDB provides seven categories of annotations: TissGeneSummary, TissGeneExp, TissGene-miRNA, TissGeneMut, TissGeneNet, TissGeneProg, TissGeneClin.",2018-01-01 +25946867,The linked human imprintome v1.0: over 120 genes confirmed as imprinted impose a major review on previous censuses.,"The whole set of human imprinted genes, termed imprintome, is here analysed by means of a reasonable, valid application of the Semantic Web and Linked Data approaches to a few structured datasets in order to provide a comprehensive collection of imprinted genes in the human genome. Thus, we have stored, organised, filtered, and analysed massive amounts of existing data on human imprinted genes towards compiling, structuring and linking data to comprise a sharing resource for genome and epigenome interrogated studies. Our datasets of linked data are the actual research outcome of this human imprintome analysis because as genomics become more and more data intensive, due to huge amounts of biological data, so does our needs for more structured data to be easier mined and shared. We present the resulting first version of the Linked Human Imprintome as a project within Linked Open Data (LOD) initiative (http://lod-cloud.net/) through Data Hub (http:// thedatahub.org/en/dataset/a-draft-version-of-the-linked-human-imprintome).",2014-01-01 +28194231,SkinSensDB: a curated database for skin sensitization assays.,"Skin sensitization is an important toxicological endpoint for chemical hazard determination and safety assessment. Prediction of chemical skin sensitizer had traditionally relied on data from rodent models. The development of the adverse outcome pathway (AOP) and associated alternative in vitro assays have reshaped the assessment of skin sensitizers. The integration of multiple assays as key events in the AOP has been shown to have improved prediction performance. Current computational models to predict skin sensitization mainly based on in vivo assays without incorporating alternative in vitro assays. However, there are few freely available databases integrating both the in vivo and the in vitro skin sensitization assays for development of AOP-based skin sensitization prediction models. To facilitate the development of AOP-based prediction models, a skin sensitization database named SkinSensDB has been constructed by curating data from published AOP-related assays. In addition to providing datasets for developing computational models, SkinSensDB is equipped with browsing and search tools which enable the assessment of new compounds for their skin sensitization potentials based on data from structurally similar compounds. SkinSensDB is publicly available at http://cwtung.kmu.edu.tw/skinsensdb.",2017-01-31 +24358873,Construction and accessibility of a cross-species phenotype ontology along with gene annotations for biomedical research.,"Phenotype analyses, e.g. investigating metabolic processes, tissue formation, or organism behavior, are an important element of most biological and medical research activities. Biomedical researchers are making increased use of ontological standards and methods to capture the results of such analyses, with one focus being the comparison and analysis of phenotype information between species. We have generated a cross-species phenotype ontology for human, mouse and zebrafish that contains classes from the Human Phenotype Ontology, Mammalian Phenotype Ontology, and generated classes for zebrafish phenotypes. We also provide up-to-date annotation data connecting human genes to phenotype classes from the generated ontology. We have included the data generation pipeline into our continuous integration system ensuring stable and up-to-date releases. This article describes the data generation process and is intended to help interested researchers access both the phenotype annotation data and the associated cross-species phenotype ontology. The resource described here can be used in sophisticated semantic similarity and gene set enrichment analyses for phenotype data across species. The stable releases of this resource can be obtained from http://purl.obolibrary.org/obo/hp/uberpheno/.",2013-02-01 +26208906,TANRIC: An Interactive Open Platform to Explore the Function of lncRNAs in Cancer.,"Long noncoding RNAs (lncRNA) have emerged as essential players in cancer biology. Using recent large-scale RNA-seq datasets, especially those from The Cancer Genome Atlas (TCGA), we have developed ""The Atlas of Noncoding RNAs in Cancer"" (TANRIC; http://bioinformatics.mdanderson.org/main/TANRIC:Overview), a user-friendly, open-access web resource for interactive exploration of lncRNAs in cancer. It characterizes the expression profiles of lncRNAs in large patient cohorts of 20 cancer types, including TCGA and independent datasets (>8,000 samples overall). TANRIC enables researchers to rapidly and intuitively analyze lncRNAs of interest (annotated lncRNAs or any user-defined ones) in the context of clinical and other molecular data, both within and across tumor types. Using TANRIC, we have identified a large number of lncRNAs with potential biomedical significance, many of which show strong correlations with established therapeutic targets and biomarkers across tumor types or with drug sensitivity across cell lines. TANRIC represents a valuable tool for investigating the function and clinical relevance of lncRNAs in cancer, greatly facilitating lncRNA-related biologic discoveries and clinical applications.",2015-07-24 +31245691,ShinyAIM: Shiny-based application of interactive Manhattan plots for longitudinal genome-wide association studies.,"Owning to advancements in sensor-based, non-destructive phenotyping platforms, researchers are increasingly collecting data with higher temporal resolution. These phenotypes collected over several time points are cataloged as longitudinal traits and used for genome-wide association studies (GWAS). Longitudinal GWAS typically yield a large number of output files, posing a significant challenge to data interpretation and visualization. Efficient, dynamic, and integrative data visualization tools are essential for the interpretation of longitudinal GWAS results for biologists; however, these tools are not widely available to the community. We have developed a flexible and user-friendly Shiny-based online application, ShinyAIM, to dynamically view and interpret temporal GWAS results. The main features of the application include (a) interactive Manhattan plots for single time points, (b) a grid plot to view Manhattan plots for all time points simultaneously, (c) dynamic scatter plots for p-value-filtered selected markers to investigate co-localized genomic regions across time points, (d) and interactive phenotypic data visualization to capture variation and trends in phenotypes. The application is written entirely in the R language and can be used with limited programming experience. ShinyAIM is deployed online as a Shiny web server application at https://chikudaisei.shinyapps.io/shinyaim/, enabling easy access for users without installation. The application can also be launched on a local machine in RStudio.",2018-10-24 +30498988,Investigating the structure of ORTO-15: a meta-analytical simulation study.,"Missbach et al. (Appetite 108:521-524, https://doi.org/10.1016/j.appet.2016.07.010 , 2016) argued that there is a critical need to develop new tools assessing orthorexia nervosa (ON), as the existing measure (i.e., ORTO-15; Donini, Eat Weight Disord 10:28-32, https://doi.org/10.1007/BF03327537 , 2005) is an unvalidated measure, which fails to adequately assess the prevalence rate of ON. We believe that ignoring past data from ORTO-15 and going in the ""baby with the bath water"" direction will not catalyse but inhibit ON research. Using data from the review of the psychometric studies analysing the structure of ORTO-15 provided in Missbach et al. (2016), we selected six items, which were present in each study, and estimated effect sizes for the factor loadings. The effect sizes were used in a Monte Carlo simulation study with N = 100, 500, and 1000 to test whether the analysed model is valid. The obtained results confirmed that the six-item version of ORTO-15 is a valid and reliable measure of ON. Although new measures of ON are needed, the past data also provide valuable insight into a better understanding of ON.",2018-11-29 +31294065,"Dataset of the infrared spectrometry, gas chromatography-mass spectrometry analysis and nuclear magnetic resonance spectroscopy of the polysaccharides from C. militaris.","The data presented in this article describe characteristics of the polysaccharides, designated as CM1 and CMS, isolated from the fruiting body of C. militaris. Fourier transform infrared spectrometry analysis was used to identify the basic characteristics of the polysaccharides and the completeness of methylation. Gas chromatography-tandem mass spectrometry and nuclear magnetic resonance spectroscopy were carried out to reveal the glycosidic linkages of CM1 and CMS. Further interpretation and discussion could be found at our research article entitled ""Structural characterisation and cholesterol efflux improving capacity of the novel polysaccharides from Cordyceps militaris"" (Hu et al., 2019; https://doi.org/10.1016/j.ijbiomac.2019.03.078) [1].",2019-06-11 +30239587,Sequence clustering in bioinformatics: an empirical study. ,"Sequence clustering is a basic bioinformatics task that is attracting renewed attention with the development of metagenomics and microbiomics. The latest sequencing techniques have decreased costs and as a result, massive amounts of DNA/RNA sequences are being produced. The challenge is to cluster the sequence data using stable, quick and accurate methods. For microbiome sequencing data, 16S ribosomal RNA operational taxonomic units are typically used. However, there is often a gap between algorithm developers and bioinformatics users. Different software tools can produce diverse results and users can find them difficult to analyze. Understanding the different clustering mechanisms is crucial to understanding the results that they produce. In this review, we selected several popular clustering tools, briefly explained the key computing principles, analyzed their characters and compared them using two independent benchmark datasets. Our aim is to assist bioinformatics users in employing suitable clustering tools effectively to analyze big sequencing data. Related data, codes and software tools were accessible at the link http://lab.malab.cn/∼lg/clustering/.",2018-09-18 +32311994,Maternal and fetal complications associated with systemic lupus erythematosus: An updated meta-analysis of the most recent studies (2017-2019).,"

Background

Recent guidelines provide better treatment and management of pregnancy in women with systemic lupus erythematosus (SLE). In this analysis, we aimed to systematically assess the maternal and fetal complications associated with SLE using the most recent studies (2017-2019) to obtain an updated result of the present situation.

Methods

http://www.clinicaltrials.gov, MEDLINE, Cochrane Central, Web of Science, EMBASE, and Google Scholar were searched for English based studies comparing maternal and fetal complications in pregnant women with versus without SLE. Maternal and fetal complications were the endpoints in this analysis. The RevMan software 5.3 (latest version) was the most suitable analytical software for this analysis. Data were represented by risk ratio (RR) with 95% confidence interval (CI).

Results

A total number of eight million eight hundred and twelve thousand two hundred seventy-two (8,812,272) participants were included in this analysis, consisting of 9696 SLE-associated pregnancy. Based on an analysis of recently published studies (2017-2019), pre-eclampsia/eclampsia was significantly higher in pregnant women with SLE (RR: 3.38, 95% CI: 3.15-3.62; P = .00001). SLE was also associated with an increased risk of stillbirth (RR: 16.49, 95% CI: 2.95-92.13; P = .001) and fetal loss (RR: 7.55, 95% CI: 4.75-11.99; P = .00001). Abortion (RR: 4.70, 95% CI: 3.02-7.29; P = .00001) and the risk for cesarean section due to complications (RR: 1.38, 95% CI: 1.11-1.70; P = .003) were also significantly higher in pregnant women with SLE. In addition, fetal complications including preterm birth (RR: 2.33, 95% CI: 1.78-3.05; P = .00001), infants who were small for gestational age (RR: 2.50, 95% CI: 1.41-4.45; P = .002) and infants with low birth weight (RR: 4.78, 95% CI: 3.65-6.26; P = .00001) were also significantly higher in newborns from mothers with SLE. Moreover, the risk of newborns who were admitted to the neonatal intensive care unit (RR: 2.79, 95% CI: 2.31-3.37; P = .00001), newborns with an APGAR score <7 within 1 minute (RR: 2.47, 95% CI: 1.68-3.62; P = .00001) and 5 minutes (RR: 3.63, 95% CI: 2.04-6.45; P = .0001) respectively, were significantly highly associated with SLE.

Conclusions

Based on the most recent studies, we could conclude that maternal and fetal complications were significantly higher in SLE-associated pregnancy. Therefore, SLE should still be considered a severe risk factor for pregnancy.",2020-04-01 +23729657,"The non-obese diabetic mouse sequence, annotation and variation resource: an aid for investigating type 1 diabetes.","Model organisms are becoming increasingly important for the study of complex diseases such as type 1 diabetes (T1D). The non-obese diabetic (NOD) mouse is an experimental model for T1D having been bred to develop the disease spontaneously in a process that is similar to humans. Genetic analysis of the NOD mouse has identified around 50 disease loci, which have the nomenclature Idd for insulin-dependent diabetes, distributed across at least 11 different chromosomes. In total, 21 Idd regions across 6 chromosomes, that are major contributors to T1D susceptibility or resistance, were selected for finished sequencing and annotation at the Wellcome Trust Sanger Institute. Here we describe the generation of 40.4 mega base-pairs of finished sequence from 289 bacterial artificial chromosomes for the NOD mouse. Manual annotation has identified 738 genes in the diabetes sensitive NOD mouse and 765 genes in homologous regions of the diabetes resistant C57BL/6J reference mouse across 19 candidate Idd regions. This has allowed us to call variation consequences between homologous exonic sequences for all annotated regions in the two mouse strains. We demonstrate the importance of this resource further by illustrating the technical difficulties that regions of inter-strain structural variation between the NOD mouse and the C57BL/6J reference mouse can cause for current next generation sequencing and assembly techniques. Furthermore, we have established that the variation rate in the Idd regions is 2.3 times higher than the mean found for the whole genome assembly for the NOD/ShiLtJ genome, which we suggest reflects the fact that positive selection for functional variation in immune genes is beneficial in regard to host defence. In summary, we provide an important resource, which aids the analysis of potential causative genes involved in T1D susceptibility. Database URLs: http://www.sanger.ac.uk/resources/mouse/nod/; http://vega-previous.sanger.ac.uk/info/data/mouse_regions.html#Idd",2013-05-31 +31372401,Proteomic dataset: Profiling of cultivated Echerichia coli isolates from Crohn's disease patients and healthy individuals.,"One of the dysbioses often observed in Crohn's disease (CD) patients is an increased abundance of Escherichia coli (10-100 fold compared to healthy individuals) (Gevers et al., 2014). The data reported is a large-scale proteome profile for E. coli isolates collected from CD patients and healthy individuals. 43 isolates were achieved from 30 CD patients (17 male, 12 female, median age 30) and 19 isolates from 7 healthy individuals (7 male, median age 19). Isolates were cultivated on LB medium at aerobic conditions up to medium log phase. Protein extraction was performed with sodium deoxycholate (DCNa) and urea, alcylation with tris(2-carboxyethyl)phosphine and iodacetamide. Protein trypsinolysis was performed as described in (Matyushkina et al., 2016). Total cell proteomes were analysed by shotgun proteomics with HPLC-MS/MS on a maXis qTOF mass-spectrometer. The data including HPLC-MS/MS raw files and exported Mascot search results was deposited to the PRIDE repository project accession: PXD010920, project https://doi.org/10.6019/PXD010920.",2019-03-07 +30510599,The GEWEX Water Vapor Assessment archive of water vapour products from satellite observations and reanalyses.,"The Global Energy and Water cycle Exchanges (GEWEX) Data and Assessments Panel (GDAP) initiated the GEWEX Water Vapor Assessment (G-VAP), which has the main objectives to quantify the current state of art in water vapour products being constructed for climate applications and to support the selection process of suitable water vapour products by GDAP for its production of globally consistent water and energy cycle products. During the construction of the G-VAP data archive, freely available and mature satellite and reanalysis data records with a minimum temporal coverage of 10 years were considered. The archive contains total column water vapour (TCWV) as well as specific humidity and temperature at four pressure levels (1000, 700, 500, 300 hPa) from 22 different data records. All data records were remapped to a regular longitude/latitude grid of 2°x2°. The archive consists of four different folders: 22 TCWV data records covering the period 2003-2008, 11 TCWV data records covering the period 1988-2008, as well as seven specific humidity and seven temperature data records covering the period 1988-2009. The G-VAP data archive is referenced under the following digital object identifier (doi): http://dx.doi.org/10.5676/EUM SAF CM/GVAP/V001. Within G-VAP, the characterisation of water vapour products is, among other ways, achieved through intercomparisons of the considered data records, as a whole and grouped into three classes of predominant retrieval condition: clear-sky, cloudy-sky and all-sky. Associated results are shown using the 22 TCWV data records. The standard deviations among the 22 TCWV data records have been analysed and exhibit distinct maxima over central Africa and the tropical warm pool (in absolute terms) as well as over the poles and mountain regions (in relative terms). The variability in TCWV within each class can be large and prohibits conclusions on systematic differences in TCWV between the classes.",2018-06-15 +29548284,IDPM: an online database for ion distribution in protein molecules.,"

Background

Interactions between ions and proteins have been extensively studied, yet most of the studies focus on the ion binding site. The binding mechanism for many ion binding sites can be clearly described from high resolution structures. Although knowledge accumulated on a case-by-case basis is valuable, it is also important to study the ion-protein interaction statistically. From experimentally determined structures, it is possible to examine the ion distribution around each amino acid. Such distributions can reveal relation between ions and amino acids, so it is desirable to carry out a systematic survey of 'ion-amino acid' pairing interaction and share the information with a publicly available database.

Results

The survey in the Protein Data Bank (PDB) revealed that approximately 40% of molecules records contain at least one ion. To reduce the bias resulted from protein redundancy, the statistics were extracted from a non-redundant dataset by excluding the proteins with similar sequences. Based on the structures of protein molecules and the location of ions, the statistical distributions of ions around each proteinogenic amino acid type were investigated and further summarized in a database. To systematically quantify the interactions between ions and each amino acid, the positions of ions were mapped to the coordinate system centered at each neighboring amino acid. It was found that the distribution of ions follows the expected rules governed by the physicochemical interactions in general. Large variations were observed, reflecting the preference in 'ion-amino acid' interactions. The analysis program is written in the Python programming language. The statistical results and program are available from the online database: ion distribution in protein molecules (IDPM) at http://liulab.csrc.ac.cn/idpm/ .

Conclusion

The spatial distribution of ions around amino acids is documented and analyzed. The statistics can be useful for identifying ion types for a given site in biomolecules, and can be potentially used in ion position prediction for given structures.",2018-03-16 +28025334,Minimizing proteome redundancy in the UniProt Knowledgebase. ,"Advances in high-throughput sequencing have led to an unprecedented growth in genome sequences being submitted to biological databases. In particular, the sequencing of large numbers of nearly identical bacterial genomes during infection outbreaks and for other large-scale studies has resulted in a high level of redundancy in nucleotide databases and consequently in the UniProt Knowledgebase (UniProtKB). Redundancy negatively impacts on database searches by causing slower searches, an increase in statistical bias and cumbersome result analysis. The redundancy combined with the large data volume increases the computational costs for most reuses of UniProtKB data. All of this poses challenges for effective discovery in this wealth of data. With the continuing development of sequencing technologies, it is clear that finding ways to minimize redundancy is crucial to maintaining UniProt's essential contribution to data interpretation by our users. We have developed a methodology to identify and remove highly redundant proteomes from UniProtKB. The procedure identifies redundant proteomes by performing pairwise alignments of sets of sequences for pairs of proteomes and subsequently, applies graph theory to find dominating sets that provide a set of non-redundant proteomes with a minimal loss of information. This method was implemented for bacteria in mid-2015, resulting in a removal of 50 million proteins in UniProtKB. With every new release, this procedure is used to filter new incoming proteomes, resulting in a more scalable and scientifically valuable growth of UniProtKB.Database URL: http://www.uniprot.org/proteomes/.",2016-12-26 +25048123,Antigen-Antibody Interaction Database (AgAbDb): a compendium of antigen-antibody interactions.,"Antigen-Antibody Interaction Database (AgAbDb) is an immunoinformatics resource developed at the Bioinformatics Centre, University of Pune, and is available online at http://bioinfo.net.in/AgAbDb.htm. Antigen-antibody interactions are a special class of protein-protein interactions that are characterized by high affinity and strict specificity of antibodies towards their antigens. Several co-crystal structures of antigen-antibody complexes have been solved and are available in the Protein Data Bank (PDB). AgAbDb is a derived knowledgebase developed with an objective to compile, curate, and analyze determinants of interactions between the respective antigen-antibody molecules. AgAbDb lists not only the residues of binding sites of antigens and antibodies, but also interacting residue pairs. It also helps in the identification of interacting residues and buried residues that constitute antibody-binding sites of protein and peptide antigens. The Antigen-Antibody Interaction Finder (AAIF), a program developed in-house, is used to compile the molecular interactions, viz. van der Waals interactions, salt bridges, and hydrogen bonds. A module for curating water-mediated interactions has also been developed. In addition, various residue-level features, viz. accessible surface area, data on epitope segment, and secondary structural state of binding site residues, are also compiled. Apart from the PDB numbering, Wu-Kabat numbering and explicit definitions of complementarity-determining regions are provided for residues of antibodies. The molecular interactions can be visualized using the program Jmol. AgAbDb can be used as a benchmark dataset to validate algorithms for prediction of B-cell epitopes. It can as well be used to improve accuracy of existing algorithms and to design new algorithms. AgAbDb can also be used to design mimotopes representing antigens as well as aid in designing processes leading to humanization of antibodies.",2014-01-01 +30052749,Re-identification of individuals in genomic data-sharing beacons via allele inference.,"

Motivation

Genomic data-sharing beacons aim to provide a secure, easy to implement and standardized interface for data-sharing by only allowing yes/no queries on the presence of specific alleles in the dataset. Previously deemed secure against re-identification attacks, beacons were shown to be vulnerable despite their stringent policy. Recent studies have demonstrated that it is possible to determine whether the victim is in the dataset, by repeatedly querying the beacon for his/her single-nucleotide polymorphisms (SNPs). Here, we propose a novel re-identification attack and show that the privacy risk is more serious than previously thought.

Results

Using the proposed attack, even if the victim systematically hides informative SNPs, it is possible to infer the alleles at positions of interest as well as the beacon query results with very high confidence. Our method is based on the fact that alleles at different loci are not necessarily independent. We use linkage disequilibrium and a high-order Markov chain-based algorithm for inference. We show that in a simulated beacon with 65 individuals from the European population, we can infer membership of individuals with 95% confidence with only 5 queries, even when SNPs with MAF <0.05 are hidden. We need less than 0.5% of the number of queries that existing works require, to determine beacon membership under the same conditions. We show that countermeasures such as hiding certain parts of the genome or setting a query budget for the user would fail to protect the privacy of the participants.

Availability and implementation

Software is available at http://ciceklab.cs.bilkent.edu.tr/beacon_attack.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +30903145,ORE identifies extreme expression effects enriched for rare variants.,"

Motivation

Non-coding rare variants (RVs) may contribute to Mendelian disorders but have been challenging to study due to small sample sizes, genetic heterogeneity and uncertainty about relevant non-coding features. Previous studies identified RVs associated with expression outliers, but varying outlier definitions were employed and no comprehensive open-source software was developed.

Results

We developed Outlier-RV Enrichment (ORE) to identify biologically-meaningful non-coding RVs. We implemented ORE combining whole-genome sequencing and cardiac RNAseq from congenital heart defect patients from the Pediatric Cardiac Genomics Consortium and deceased adults from Genotype-Tissue Expression. Use of rank-based outliers maximized sensitivity while a most extreme outlier approach maximized specificity. Rarer variants had stronger associations, suggesting they are under negative selective pressure and providing a basis for investigating their contribution to Mendelian disorders.

Availability and implementation

ORE, source code, and documentation are available at https://pypi.python.org/pypi/ore under the MIT license.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +27895718,Consensus Diversity Plots: a global diversity analysis of chemical libraries.,"

Background

Measuring the structural diversity of compound databases is relevant in drug discovery and many other areas of chemistry. Since molecular diversity depends on molecular representation, comprehensive chemoinformatic analysis of the diversity of libraries uses multiple criteria. For instance, the diversity of the molecular libraries is typically evaluated employing molecular scaffolds, structural fingerprints, and physicochemical properties. However, the assessment with each criterion is analyzed independently and it is not straightforward to provide an evaluation of the ""global diversity"".

Results

Herein the Consensus Diversity Plot (CDP) is proposed as a novel method to represent in low dimensions the diversity of chemical libraries considering simultaneously multiple molecular representations. We illustrate the application of CDPs to classify eight compound data sets and two subsets with different sizes and compositions using molecular scaffolds, structural fingerprints, and physicochemical properties.

Conclusions

CDPs are general data mining tools that represent in two-dimensions the global diversity of compound data sets using multiple metrics. These plots can be constructed using single or combined measures of diversity. An online version of the CDPs is freely available at: https://consensusdiversityplots-difacquim-unam.shinyapps.io/RscriptsCDPlots/.Graphical AbstractConsensus Diversity Plot is a novel data mining tool that represents in two-dimensions the global diversity of compound data sets using multiple metrics.",2016-11-10 +28165111,MotifNet: a web-server for network motif analysis.,"

Summary

Network motifs are small topological patterns that recur in a network significantly more often than expected by chance. Their identification emerged as a powerful approach for uncovering the design principles underlying complex networks. However, available tools for network motif analysis typically require download and execution of computationally intensive software on a local computer. We present MotifNet, the first open-access web-server for network motif analysis. MotifNet allows researchers to analyze integrated networks, where nodes and edges may be labeled, and to search for motifs of up to eight nodes. The output motifs are presented graphically and the user can interactively filter them by their significance, number of instances, node and edge labels, and node identities, and view their instances. MotifNet also allows the user to distinguish between motifs that are centered on specific nodes and motifs that recur in distinct parts of the network.

Availability and implementation

MotifNet is freely available at http://netbio.bgu.ac.il/motifnet . The website was implemented using ReactJs and supports all major browsers. The server interface was implemented in Python with data stored on a MySQL database.

Contact

estiyl@bgu.ac.il or michaluz@cs.bgu.ac.il.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +31051039,High precision protein functional site detection using 3D convolutional neural networks.,"

Motivation

Accurate annotation of protein functions is fundamental for understanding molecular and cellular physiology. Data-driven methods hold promise for systematically deriving rules underlying the relationship between protein structure and function. However, the choice of protein structural representation is critical. Pre-defined biochemical features emphasize certain aspects of protein properties while ignoring others, and therefore may fail to capture critical information in complex protein sites.

Results

In this paper, we present a general framework that applies 3D convolutional neural networks (3DCNNs) to structure-based protein functional site detection. The framework can extract task-dependent features automatically from the raw atom distributions. We benchmarked our method against other methods and demonstrate better or comparable performance for site detection. Our deep 3DCNNs achieved an average recall of 0.955 at a precision threshold of 0.99 on PROSITE families, detected 98.89 and 92.88% of nitric oxide synthase and TRYPSIN-like enzyme sites in Catalytic Site Atlas, and showed good performance on challenging cases where sequence motifs are absent but a function is known to exist. Finally, we inspected the individual contributions of each atom to the classification decisions and show that our models successfully recapitulate known 3D features within protein functional sites.

Availability and implementation

The 3DCNN models described in this paper are available at https://simtk.org/projects/fscnn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +31882276,Independent support for corticopallidal contributions to schizophrenia-related functional impairment.,"

Background

Abnormalities between the prefrontal cortex and basal ganglia have been described by numerous studies of schizophrenia (SZ). We recently reported that individuals with first episode SZ who develop greater vocational and social impairments show lower baseline functional connectivity between the globus pallidus (GP) and regions of the intrinsic salience network. Here we extend these findings to probe the integrity of this system in individuals with chronic illness.

Methods

All data were obtained from a publicly available Center of Biomedical Research Excellence dataset (http://fcon_1000.

Projects

nitric.org/indi/retro/cobre.html) that included resting-state fMRI and structural scans, and an array of clinical and neuropsychological measures. Participants with SZ were divided into high- or low-functioning groups based on scores across measures of psychopathology and cognitive functioning. Corticopallidal functional connectivity was examined between low- and high-functioning individuals with SZ and matched healthy control participants. We focused on connectivity between GP structures and a priori regions of the salience network that were significant in our previous study. Exploratory voxel-wise analyses were also conducted.

Results

Lower functioning individuals with SZ demonstrated less connectivity between bilateral GP externa and nodes within the salience network, relative to healthy controls. No connectivity differences were observed between low- and high-functioning individuals with SZ. Exploratory voxel-wise analyses highlighted additional large-scale corticopallidal abnormalities in lower-functioning participants with SZ.

Conclusions

These results confirm our previous work in a more chronic cohort of individuals with SZ. Our findings further advance corticopallidal connectivity as a biomarker of functional impairments in SZ and lay the groundwork for treatment-based studies.",2019-12-24 +30002915,International Spinal Cord Injury Lower Urinary Tract Function Basic Data Set (version 2.0).,"

Study design

Revision, review, and presentation of the International Spinal Cord Injury (SCI) Lower Urinary Tract (LUT) Function Basic Data Set (version 2.0).

Objectives

Describe the revision and review and present the data set.

Setting

International.

Methods

The first version of the data set was revised according to new knowledge and suggestions. The review included International SCI Data Sets Committee, American Spinal Injury Association (ASIA) board, International Spinal Cord Society (ISCoS) executive and scientific committees, major organizations, and interested individuals. The data set was also on ASIA and ISCoS websites. All replies were answered and appropriate adjustments made. Finally, the data set was endorsed by ASIA board, and ISCoS executive and scientific committees.

Results

Among revisions are adoptions of new terminology by the International Continence Society. For most variables, advice for collection of information from pediatric patients stated. For the variable 'Bladder emptying', is in the data collection form to the response category 'Normal voiding' expanded. 'Sacral Anterior Root Stimulator' is deleted as response category. For the variable 'Any involuntary urine leakage (incontinence) within the last 4 weeks' 'last 4 weeks' has replaced 'last 3 months'. The response categories have been adjusted to: 'Daily', 'Once or more per week (but not daily)', 'Less than once per week', 'Never', 'Not applicable' and 'Unknown'. For the variable 'Any drugs for the urinary tract within the last four weeks' 'last four weeks' has replaced 'last year'.

Conclusions

The International SCI LUT Function Basic Data Set (version 2.0) with its complete syllabus is available from http://www.iscos.org.uk/international-sci-data-sets.",2018-07-06 +31814401,Smart Cutter: An Efficient Strategy for Increasing the Coverage of Chemical Cross-Linking Analysis.,"Chemical cross-linking combined with mass spectrometry (CXMS) has emerged as a powerful tool to study protein structure, conformation, and protein-protein interactions (PPIs). Until now, most cross-linked peptides were generated by using commercial cross-linkers, such as DSS, BS3, and DSSO, which react with the primary amino groups of the lysine residues of proteins. However, trypsin, the most commonly used proteolytic enzyme, cannot cleave the C-terminus of a linked lysine, making the obtained cross-linked peptides longer than common peptides and unfavorable for MS identification and data searching. Herein, we propose an in situ sequential digestion strategy using enzymes with distinct cleavage specificity, named as smart cutter, to generate cross-linked peptides with suitable length so that the identification coverage could improve. Through the application of such a strategy to DSS cross-linked E. coli lysates, additional cross-linked sites (1.3-fold increase) obtained in comparison with those obtained by trypsin-trypsin digestion (2879 vs 1255). Among the different digestion combinations, AspN-trypsin performed the best, with 64% (673/1059) of the cross-linked sites complementary to trypsin-trypsin digestion, which is beneficial to ensure the depth for studying protein structure and PPIs. Taking the 60 kDa chaperonin protein as an example, more than twice the cross-linked sites (30 vs 14) were identified to enrich the protein structure information. In addition, compared to the published protein interaction network for E. coli ( http://www.bacteriome.org ), 91 potential PPIs were discovered with our strategy, of which 65 have not covered by trypsin-trypsin digestion. Therefore, these results illustrate the great significance of smart-cutter-based CXMS for the revelation of protein structure as well as finding new PPIs.",2019-12-23 +26485378,Integrative Genomics-Based Discovery of Novel Regulators of the Innate Antiviral Response.,"The RIG-I-like receptor (RLR) pathway is essential for detecting cytosolic viral RNA to trigger the production of type I interferons (IFNα/β) that initiate an innate antiviral response. Through systematic assessment of a wide variety of genomics data, we discovered 10 molecular signatures of known RLR pathway components that collectively predict novel members. We demonstrate that RLR pathway genes, among others, tend to evolve rapidly, interact with viral proteins, contain a limited set of protein domains, are regulated by specific transcription factors, and form a tightly connected interaction network. Using a Bayesian approach to integrate these signatures, we propose likely novel RLR regulators. RNAi knockdown experiments revealed a high prediction accuracy, identifying 94 genes among 187 candidates tested (~50%) that affected viral RNA-induced production of IFNβ. The discovered antiviral regulators may participate in a wide range of processes that highlight the complexity of antiviral defense (e.g. MAP3K11, CDK11B, PSMA3, TRIM14, HSPA9B, CDC37, NUP98, G3BP1), and include uncharacterized factors (DDX17, C6orf58, C16orf57, PKN2, SNW1). Our validated RLR pathway list (http://rlr.cmbi.umcn.nl/), obtained using a combination of integrative genomics and experiments, is a new resource for innate antiviral immunity research.",2015-10-20 +31456676,Linking Molecular Pathways and Large-Scale Computational Modeling to Assess Candidate Disease Mechanisms and Pharmacodynamics in Alzheimer's Disease.,"Introduction: While the prevalence of neurodegenerative diseases associated with dementia such as Alzheimer's disease (AD) increases, our knowledge on the underlying mechanisms, outcome predictors, or therapeutic targets is limited. In this work, we demonstrate how computational multi-scale brain modeling links phenomena of different scales and therefore identifies potential disease mechanisms leading the way to improved diagnostics and treatment. Methods: The Virtual Brain (TVB; thevirtualbrain.org) neuroinformatics platform allows standardized large-scale structural connectivity-based simulations of whole brain dynamics. We provide proof of concept for a novel approach that quantitatively links the effects of altered molecular pathways onto neuronal population dynamics. As a novelty, we connect chemical compounds measured with positron emission tomography (PET) with neural function in TVB addressing the phenomenon of hyperexcitability in AD related to the protein amyloid beta (Abeta). We construct personalized virtual brains based on an averaged healthy connectome and individual PET derived distributions of Abeta in patients with mild cognitive impairment (MCI, N = 8) and Alzheimer's Disease (AD, N = 10) and in age-matched healthy controls (HC, N = 15) using data from ADNI-3 data base (http://adni.loni.usc.edu). In the personalized virtual brains, individual Abeta burden modulates regional Excitation-Inhibition balance, leading to local hyperexcitation with high Abeta loads. We analyze simulated regional neural activity and electroencephalograms (EEG). Results: Known empirical alterations of EEG in patients with AD compared to HCs were reproduced by simulations. The virtual AD group showed slower frequencies in simulated local field potentials and EEG compared to MCI and HC groups. The heterogeneity of the Abeta load is crucial for the virtual EEG slowing which is absent for control models with homogeneous Abeta distributions. Slowing phenomena primarily affect the network hubs, independent of the spatial distribution of Abeta. Modeling the N-methyl-D-aspartate (NMDA) receptor antagonism of memantine in local population models, reveals potential functional reversibility of the observed large-scale alterations (reflected by EEG slowing) in virtual AD brains. Discussion: We demonstrate how TVB enables the simulation of systems effects caused by pathogenetic molecular candidate mechanisms in human virtual brains.",2019-08-13 +31799878,"Multiple Sources of the Outbreak of Legionnaires' Disease in Genesee County, Michigan, in 2014 and 2015.","BACKGROUND:A community-wide outbreak of Legionnaires' disease (LD) occurred in Genesee County, Michigan, in 2014 and 2015. Previous reports about the outbreak are conflicting and have associated the outbreak with a change of water source in the city of Flint and, alternatively, to a Flint hospital. OBJECTIVE:The objective of this investigation was to independently identify relevant sources of Legionella pneumophila that likely resulted in the outbreak. METHODS:An independent, retrospective investigation of the outbreak was conducted, making use of public health, health care, and environmental data and whole-genome multilocus sequence typing (wgMLST) of clinical and environmental isolates. RESULTS:Strong evidence was found for a hospital-associated outbreak in both 2014 and 2015: a) 49% of cases had prior exposure to Flint hospital A, significantly higher than expected from Medicare admissions; b) hospital plumbing contained high levels of L. pneumophila; c) Legionella control measures in hospital plumbing aligned with subsidence of hospital A-associated cases; and d) wgMLST showed Legionella isolates from cases exposed to hospital A and from hospital plumbing to be highly similar. Multivariate analysis showed an increased risk of LD in 2014 for people residing in a home that received Flint water or was located in proximity to several Flint cooling towers. DISCUSSION:This is the first LD outbreak in the United States with evidence for three sources (in 2014): a) exposure to hospital A, b) receiving Flint water at home, and c) residential proximity to cooling towers; however, for 2015, evidence points to hospital A only. Each source could be associated with only a proportion of cases. A focus on a single source may have delayed recognition and remediation of other significant sources of L. pneumophila. https://doi.org/10.1289/EHP5663.",2019-12-04 +27928499,Whole genome resequencing of a laboratory-adapted Drosophila melanogaster population sample.,"As part of a study into the molecular genetics of sexually dimorphic complex traits, we used high-throughput sequencing to obtain data on genomic variation in an outbred laboratory-adapted fruit fly ( Drosophila melanogaster) population. We successfully resequenced the whole genome of 220 hemiclonal females that were heterozygous for the same Berkeley reference line genome (BDGP6/dm6), and a unique haplotype from the outbred base population (LH M). The use of a static and known genetic background enabled us to obtain sequences from whole-genome phased haplotypes. We used a BWA-Picard-GATK pipeline for mapping sequence reads to the dm6 reference genome assembly, at a median depth-of coverage of 31X, and have made the resulting data publicly-available in the NCBI Short Read Archive (Accession number SRP058502). We used Haplotype Caller to discover and genotype 1,726,931 small genomic variants (SNPs and indels, <200bp). Additionally we detected and genotyped 167 large structural variants (1-100Kb in size) using GenomeStrip/2.0. Sequence and genotype data are publicly-available at the corresponding NCBI databases: Short Read Archive, dbSNP and dbVar (BioProject PRJNA282591). We have also released the unfiltered genotype data, and the code and logs for data processing and summary statistics ( https://zenodo.org/communities/sussex_drosophila_sequencing/).",2016-11-07 +26317619,FMiR: A Curated Resource of Mitochondrial DNA Information for Fish.,"Mitochondrial genome sequences have been widely used for evolutionary and phylogenetic studies. Among vertebrates, fish are an important, diverse group, and their mitogenome sequences are growing rapidly in public repositories. To facilitate mitochondrial genome analysis and to explore the valuable genetic information, we developed the Fish Mitogenome Resource (FMiR) database to provide a workbench for mitogenome annotation, species identification and microsatellite marker mining. The microsatellites are also known as simple sequence repeats (SSRs) and used as molecular markers in studies on population genetics, gene duplication and marker assisted selection. Here, easy-to-use tools have been implemented for mining SSRs and for designing primers to identify species/habitat specific markers. In addition, FMiR can analyze complete or partial mitochondrial genome sequence to identify species and to deduce relational distances among sequences across species. The database presently contains curated mitochondrial genomes from 1302 fish species belonging to 297 families and 47 orders reported from saltwater and freshwater ecosystems. In addition, the database covers information on fish species such as conservation status, ecosystem, family, distribution and occurrence downloaded from the FishBase and IUCN Red List databases. Those fish information have been used to browse mitogenome information for the species belonging to a particular category. The database is scalable in terms of content and inclusion of other analytical modules. The FMiR is running under Linux operating platform on high performance server accessible at URL http://mail.nbfgr.res.in/fmir.",2015-08-28 +28457834,"A network perspective on antimicrobial peptide combination therapies: the potential of colistin, polymyxin B and nisin.","Antimicrobial combinations involving antimicrobial peptides (AMPs) attract considerable attention within current antimicrobial and anti-resistance research. The objective of this study was to review the available scientific literature on the effects of antimicrobial combinations involving colistin (polymyxin E), polymyxin B and nisin, which are US Food and Drug Administration (FDA)-approved AMPs broadly tested against prominent multidrug-resistant pathogens. A bioinformatics approach based on literature mining and manual expert curation supported the reconstruction of experimental evidence on the potential of these AMP combinations, as described in the literature. Network analysis enabled further characterisation of the retrieved antimicrobial agents, targets and combinatory effects. This systematic analysis was able to output valuable information on the studies conducted on colistin, polymyxin B and nisin combinations. The reconstructed networks enable the traversal and browsing of a large number of agent combinations, providing comprehensive details on the organisms, modes of growth and methodologies used in the studies. Therefore, network analysis enables a bird's-eye view of current research trends as well as in-depth analysis of specific drugs, organisms and combinatory effects, according to particular user interests. The reconstructed knowledge networks are publicly accessible at http://sing-group.org/antimicrobialCombination/. Hopefully, this resource will help researchers to look into antimicrobial combinations more easily and systematically. User-customised queries may help identify missing and less studied links and to generate new research hypotheses.",2017-04-27 +29617937,Efficient analysis of large-scale genome-wide data with two R packages: bigstatsr and bigsnpr.,"

Motivation

Genome-wide datasets produced for association studies have dramatically increased in size over the past few years, with modern datasets commonly including millions of variants measured in dozens of thousands of individuals. This increase in data size is a major challenge severely slowing down genomic analyses, leading to some software becoming obsolete and researchers having limited access to diverse analysis tools.

Results

Here we present two R packages, bigstatsr and bigsnpr, allowing for the analysis of large scale genomic data to be performed within R. To address large data size, the packages use memory-mapping for accessing data matrices stored on disk instead of in RAM. To perform data pre-processing and data analysis, the packages integrate most of the tools that are commonly used, either through transparent system calls to existing software, or through updated or improved implementation of existing methods. In particular, the packages implement fast and accurate computations of principal component analysis and association studies, functions to remove single nucleotide polymorphisms in linkage disequilibrium and algorithms to learn polygenic risk scores on millions of single nucleotide polymorphisms. We illustrate applications of the two R packages by analyzing a case-control genomic dataset for celiac disease, performing an association study and computing polygenic risk scores. Finally, we demonstrate the scalability of the R packages by analyzing a simulated genome-wide dataset including 500 000 individuals and 1 million markers on a single desktop computer.

Availability and implementation

https://privefl.github.io/bigstatsr/ and https://privefl.github.io/bigsnpr/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-08-01 +31271472,ProtDCal-Suite: A web server for the numerical codification and functional analysis of proteins.,"Computational tools for the analysis of protein data and the prediction of biological properties are essential in life sciences and biomedical research. Here, we introduce ProtDCal-Suite, a web server comprising a set of machine learning-based methods for studying proteins. The main module of ProtDCal-Suite is the ProtDCal software. ProtDCal translates the structural information of proteins into numerical descriptors that serve as input to machine-learning techniques. The ProtDCal-Suite server also incorporates a post-processing optional stage that allows ranking and filtering the obtained descriptors by computing their Shannon entropy values across the input set of proteins. ProtDCal's codification was used in the development of models for the prediction of specific protein properties. Thus, the other modules of ProtDCal-Suite are protein analysis tools implemented using ProtDCal's descriptors. Among them are PPI-Detect, for predicting the interaction likelihood of protein-protein and protein-peptide pairs, Enzyme Identifier, for identifying enzymes from amino acid sequences or 3D structures, and Pred-NGlyco, for predicting N-glycosylation sites. ProtDCal-Suite is freely accessible at https://protdcal.zmb.uni-due.de.",2019-09-01 +25382819,ProKinO: a unified resource for mining the cancer kinome.,"Protein kinases represent a large and diverse family of evolutionarily related proteins that are abnormally regulated in human cancers. Although genome sequencing studies have revealed thousands of variants in protein kinases, translating ""big"" genomic data into biological knowledge remains a challenge. Here, we describe an ontological framework for integrating and conceptualizing diverse forms of information related to kinase activation and regulatory mechanisms in a machine readable, human understandable form. We demonstrate the utility of this framework in analyzing the cancer kinome, and in generating testable hypotheses for experimental studies. Through the iterative process of aggregate ontology querying, hypothesis generation and experimental validation, we identify a novel mutational hotspot in the αC-β4 loop of the kinase domain and demonstrate the functional impact of the identified variants in epidermal growth factor receptor (EGFR) constitutive activity and inhibitor sensitivity. We provide a unified resource for the kinase and cancer community, ProKinO, housed at http://vulcan.cs.uga.edu/prokino.",2015-02-01 +28894890,New prognostic factor telomerase reverse transcriptase promotor mutation presents without MR imaging biomarkers in primary glioblastoma.,"

Purpose

Magnetic resonance (MR) imaging biomarkers can assist in the non-invasive assessment of the genetic status in glioblastomas (GBMs). Telomerase reverse transcriptase (TERT) promoter mutations are associated with a negative prognosis. This study was performed to identify MR imaging biomarkers to forecast the TERT mutation status.

Methods

Pre-operative MRIs of 64/67 genetically confirmed primary GBM patients (51/67 TERT-mutated with rs2853669 polymorphism) were analyzed according to Visually AcceSAble Rembrandt Images (VASARI) ( https://wiki.cancerimagingarchive.net/display/Public/VASARI+Research+Project ) imaging criteria by three radiological raters. TERT mutation and O6-methylguanine-DNA methyltransferase (MGMT) hypermethylation data were obtained through direct and pyrosequencing as described in a previous study. Clinical data were derived from a prospectively maintained electronic database. Associations of potential imaging biomarkers and genetic status were assessed by Fisher and Mann-Whitney U tests and stepwise linear regression.

Results

No imaging biomarkers could be identified to predict TERT mutational status (alone or in conjunction with TERT promoter polymorphism rs2853669 AA-allele). TERT promoter mutations were more common in patients with tumor-associated seizures as first symptom (26/30 vs. 25/37, p = 0.07); these showed significantly smaller tumors [13.1 (9.0-19.0) vs. 24.0 (16.6-37.5) all cm3; p = 0.007] and prolonged median overall survival [17.0 (11.5-28.0) vs. 9.0 (4.0-12.0) all months; p = 0.02]. TERT-mutated GBMs were underrepresented in the extended angularis region (p = 0.03), whereas MGMT-methylated GBMs were overrepresented in the corpus callosum (p = 0.03) and underrepresented temporomesially (p = 0.01).

Conclusion

Imaging biomarkers for prediction of TERT mutation status remain weak and cannot be derived from the VASARI protocol. Tumor-associated seizures are less common in TERT mutated glioblastomas.",2017-09-11 +28983682,Biliary atresia: unity in diversity.,"Biliary atresia (BA) is a rare disease of unknown origin and unsatisfying outcome. Single, multicenter and national evaluations of epidemiological and outcome data on BA have been periodically published over the course of decades. However, the diversity of the registered parameters and outcome measures impede comparability and cumulative analysis of these very worthwhile studies. Taking into account the fact that BA is a good example of translational research and transition of patients from pediatric surgery and hepatology to transplant surgery and hepatology in general, the interdisciplinary community should make every effort to develop a common platform upon which further activities are conducted. Extending this topic to BA-related diseases might increase the acceptance of research studies and enhance the effectiveness of any recommendations outlined therein. The use of the Internet-based communication platform and registry on http://www.bard-online.com represents the first step in this direction, and the database should be viewed as a helpful tool that guides further activities.",2017-10-05 +28613845,Spatial and Temporal Trends in Global Emissions of Nitrogen Oxides from 1960 to 2014.,"The quantification of nitrogen oxide (NOx) emissions is critical for air quality modeling. Based on updated fuel consumption and emission factor databases, a global emission inventory was compiled with high spatial (0.1° × 0.1°), temporal (monthly), and source (87 sources) resolutions for the period 1960 to 2014. The monthly emission data have been uploaded online ( http://inventory.pku.edu.cn ), along with a number of other air pollutant and greenhouse gas data for free download. Differences in source profiles, not global total quantities, between our results and those reported previously were found. There were significant differences in total and per capita emissions and emission intensities among countries, especially between the developing and developed countries. Globally, the total annual NOx emissions finally stopped increasing in 2013 after continuously increasing over several decades, largely due to strict control measures taken in China in recent years. Nevertheless, the peak year of NOx emissions was later than for many other major air pollutants. Per capita emissions, either among countries or over years, follow typical inverted U-shaped environmental Kuznets curves, indicating that the emissions increased during the early stage of development and were restrained when socioeconomic development reached certain points. Although the trends are similar among countries, the turning points of developing countries appeared sooner than those of developed countries in terms of development status, confirming late-move advantages.",2017-06-29 +31094220,AmyCo: the amyloidoses collection.,"Amyloid fibrils are formed when soluble proteins misfold into highly ordered insoluble fibrillar aggregates and affect various organs and tissues. The deposition of amyloid fibrils is the main hallmark of a group of disorders, called amyloidoses. Curiously, fibril deposition has been also recorded as a complication in a number of other pathological conditions, including well-known neurodegenerative or endocrine diseases. To date, amyloidoses are roughly classified, owing to their tremendous heterogeneity. In this work, we introduce AmyCo, a freely available collection of amyloidoses and clinical disorders related to amyloid deposition. AmyCo classifies 75 diseases associated with amyloid deposition into two distinct categories, namely 1) amyloidosis and 2) clinical conditions associated with amyloidosis. Each database entry is annotated with the major protein component (causative protein), other components of amyloid deposits and affected tissues or organs. Database entries are also supplemented with appropriate detailed annotation and are referenced to ICD-10, MeSH, OMIM, PubMed, AmyPro and UniProtKB databases. To our knowledge, AmyCo is the first attempt towards the creation of a complete and an up-to-date repository, containing information about amyloidoses and diseases related to amyloid deposition. The AmyCo web interface is available at http://bioinformatics.biol.uoa.gr/amyco .",2019-05-16 +28343097,STRait Razor v2s: Advancing sequence-based STR allele reporting and beyond to other marker systems.,"STRait Razor has provided the forensic community a free-to-use, open-source tool for short tandem repeat (STR) analysis of massively parallel sequencing (MPS) data. STRait Razor v2s (SRv2s) allows users to capture physically phased haplotypes within the full amplicon of both commercial (ForenSeq) and ""early access"" panels (PowerSeq, Mixture ID). STRait Razor v2s may be run in batch mode to facilitate population-level analysis and is supported by all Unix distributions (including MAC OS). Data are reported in tables in string (haplotype), length-based (e.g., vWA allele 14), and International Society of Forensic Genetics (ISFG)-recommended (vWA [CE 14]-GRCh38-chr12:5983950-5984049 (TAGA)10 (CAGA)3 TAGA) formats. STRait Razor v2s currently contains a database of ∼2500 unique sequences. This database is used by SRv2s to match strings to the appropriate allele in ISFG-recommended format. In addition to STRs, SRv2s has configuration files necessary to capture and report haplotypes from all marker types included in these multiplexes (e.g., SNPs, InDels, and microhaplotypes). To facilitate mixture interpretation, data may be displayed from all markers in a format similar to that of electropherograms displayed by traditional forensic software. The download package for SRv2s may be found at https://www.unthsc.edu/graduate-school-of-biomedical-sciences/molecular-and-medical-genetics/laboratory-faculty-and-staff/strait-razor.",2017-03-12 +27504778,Sharing and community curation of mass spectrometry data with Global Natural Products Social Molecular Networking.,"The potential of the diverse chemistries present in natural products (NP) for biotechnology and medicine remains untapped because NP databases are not searchable with raw data and the NP community has no way to share data other than in published papers. Although mass spectrometry (MS) techniques are well-suited to high-throughput characterization of NP, there is a pressing need for an infrastructure to enable sharing and curation of data. We present Global Natural Products Social Molecular Networking (GNPS; http://gnps.ucsd.edu), an open-access knowledge base for community-wide organization and sharing of raw, processed or identified tandem mass (MS/MS) spectrometry data. In GNPS, crowdsourced curation of freely available community-wide reference MS libraries will underpin improved annotations. Data-driven social-networking should facilitate identification of spectra and foster collaborations. We also introduce the concept of 'living data' through continuous reanalysis of deposited data.",2016-08-01 +32027205,Blood volume and pain perception during finger prick capillary blood sampling: are all safety lancets equal?,"

Objectives

This study aimed to assess various types of safety lancets in terms of blood volume and pain perception during capillary blood sampling, a routine finger-puncture procedure for obtaining a small amount of human blood for running various screening and diagnostic tests.

Methods

Data were collected from 100 adult healthy volunteers following finger-puncture procedure. Four different types of safety lancets were tested (Acti-Lance, Prolance, Medlance Plus, and MediSafe Solo). Each type has its own versions, giving 16 different safety lancets in total.

Results

A significant difference in the mean capillary blood volume was found between blade and needle equipped safety lancets. MediSafe Solo type lancet had no blade version, and hence its use was associated with the lowest mean collected capillary blood volume (42.4 μL). Acti-Lance and Medlance Plus type lancets had one blade version and the mean collected capillary blood volume was 82.2 and 99.0 μL, respectively. Prolance type lancet had two blade versions, and its use was associated with the highest mean capillary blood volume (118.3 μL). The level of pain intensity was evaluated as low by the majority of patients for all lancets. Medlance Plus was the least painful and Acti-Lance was the most painful type of safety lancet. On a 0-to-10 scale of pain, 75% of punctures were assessed by the participants at a level not exceeding 3 points.

Conclusions

This study suggests that although all investigated safety lancets achieve adequate performance regarding the necessary capillary blood volume to run a diagnostic of test, lancets equipped with blades differ significantly from those equipped with needles in terms of the mean obtained capillary blood volume. Further, although all devices produced relatively low levels of pain, the amount of pain caused by blade versions of safety lancets has been found to be higher than that of needle versions.

Clinicaltrials.gov id

NCT04001348. (https://clinicaltrials.gov/ct2/show/NCT04001348?term=NCT04001348&draw=2&rank=1).",2020-02-06 +31127271,IAMBEE: a web-service for the identification of adaptive pathways from parallel evolved clonal populations.,"IAMBEE is a web server designed for the Identification of Adaptive Mutations in Bacterial Evolution Experiments (IAMBEE). Input data consist of genotype information obtained from independently evolved clonal populations or strains that show the same adapted behavior (phenotype). To distinguish adaptive from passenger mutations, IAMBEE searches for neighborhoods in an organism-specific interaction network that are recurrently mutated in the adapted populations. This search for recurrently mutated network neighborhoods, as proxies for pathways is driven by additional information on the functional impact of the observed genetic changes and their dynamics during adaptive evolution. In addition, the search explicitly accounts for the differences in mutation rate between the independently evolved populations. Using this approach, IAMBEE allows exploiting parallel evolution to identify adaptive pathways. The web-server is freely available at http://bioinformatics.intec.ugent.be/iambee/ with no login requirement.",2019-07-01 +31856825,Finding prognostic gene pairs for cancer from patient-specific gene networks.,"

Background

Molecular characterization of individual cancer patients is important because cancer is a complex and heterogeneous disease with many possible genetic and environmental causes. Many studies have been conducted to identify diagnostic or prognostic gene signatures for cancer from gene expression profiles. However, some gene signatures may fail to serve as diagnostic or prognostic biomarkers and gene signatures may not be found in gene expression profiles.

Methods

In this study, we developed a general method for constructing patient-specific gene correlation networks and for identifying prognostic gene pairs from the networks. A patient-specific gene correlation network was constructed by comparing a reference gene correlation network from normal samples to a network perturbed by a single patient sample. The main difference of our method from previous ones includes (1) it is focused on finding prognostic gene pairs rather than prognostic genes and (2) it can identify prognostic gene pairs from gene expression profiles even when no significant prognostic genes exist.

Results

Evaluation of our method with extensive data sets of three cancer types (breast invasive carcinoma, colon adenocarcinoma, and lung adenocarcinoma) showed that our approach is general and that gene pairs can serve as more reliable prognostic signatures for cancer than genes.

Conclusions

Our study revealed that prognosis of individual cancer patients is associated with the existence of prognostic gene pairs in the patient-specific network and the size of a subnetwork of the prognostic gene pairs in the patient-specific network. Although preliminary, our approach will be useful for finding gene pairs to predict survival time of patients and to tailor treatments to individual characteristics. The program for dynamically constructing patient-specific gene networks and for finding prognostic gene pairs is available at http://bclab.inha.ac.kr/pancancer.",2019-12-20 +31740530,"Staphylococcus aureus Isolated from Skin from Atopic-Dermatitis Patients Produces Staphylococcal Enterotoxin Y, Which Predominantly Induces T-Cell Receptor Vα-Specific Expansion of T Cells. ","While investigating the virulence traits of Staphylococcus aureus adhering to the skin of atopic-dermatitis (AD) patients, we identified a novel open reading frame (ORF) with structural similarity to a superantigen from genome sequence data of an isolate from AD skin. Concurrently, the same ORF was identified in a bovine isolate of S. aureus and designated SElY (H. K. Ono, Y. Sato'o, K. Narita, I. Naito, et al., Appl Environ Microbiol 81:7034-7040, 2015, https://doi.org/10.1128/AEM.01873-15). Recombinant SElYbov had superantigen activity in human peripheral blood mononuclear cells. It further demonstrated emetic activity in a primate animal model, and it was proposed that SElY be renamed SEY (H. K. Ono, S. Hirose, K. Narita, M. Sugiyama, et al., PLoS Pathog 15:e1007803, 2019, https://doi.org/10.1371/journal.ppat.1007803). Here, we investigated the prevalence of the sey gene in 270 human clinical isolates of various origins in Japan. Forty-two strains were positive for the sey gene, and the positive isolates were from patients with the skin diseases atopic dermatitis and impetigo/staphylococcal scalded skin syndrome (SSSS), with a detection rate of ∼17 to 22%. There were three variants of SEY (SEY1, SEY2, and SEY3), and isolates producing SEY variants formed three distinct clusters corresponding to clonal complexes (CCs) 121, 59, and 20, respectively. Most sey+ isolates produced SEY in broth culture. Unlike SEYbov, the three recombinant SEY variants exhibited stability against heat treatment. SEY predominantly activated human T cells with a particular T-cell receptor (TCR) Vα profile, a unique observation since most staphylococcal enterotoxins exert their superantigenic activities through activating T cells with specific TCR Vβ profiles. SEY may act to induce localized inflammation via skin-resident T-cell activation, facilitating the pathogenesis of S. aureus infection in disrupted epithelial barriers.",2020-01-22 +31858615,Volatile organic compounds in breath can serve as a non-invasive diagnostic biomarker for the detection of advanced adenomas and colorectal cancer.,"

Background

Colorectal cancer (CRC) is the third most common cancer diagnosis in the Western world.

Aim

To evaluate exhaled volatile organic compounds (VOCs) as a non-invasive biomarker for the detection of CRC and precursor lesions using an electronic nose.

Methods

In this multicentre study adult colonoscopy patients, without inflammatory bowel disease or (previous) malignancy, were invited for breath analysis. Two-thirds of the breath tests were randomly assigned to develop training models which were used to predict the diagnosis of the remaining patients (external validation). In the end, all data were used to develop final-disease models to further improve the discriminatory power of the algorithms.

Results

Five hundred and eleven breath samples were collected. Sixty-four patients were excluded due to an inadequate breath test (n = 51), incomplete colonoscopy (n = 8) or colitis (n = 5). Classification was based on the most advanced lesion found; CRC (n = 70), advanced adenomas (AAs) (n = 117), non-advanced adenoma (n = 117), hyperplastic polyp (n = 15), normal colonoscopy (n = 125). Training models for CRC and AAs had an area under the curve (AUC) of 0.76 and 0.71 and blind validation resulted in an AUC of 0.74 and 0.61 respectively. Final models for CRC and AAs yielded an AUC of 0.84 (sensitivity 95% and specificity 64%) and 0.73 (sensitivity and specificity 79% and 59%) respectively.

Conclusions

This study suggests that exhaled VOCs could potentially serve as a non-invasive biomarker for the detection of CRC and AAs. Future studies including more patients could further improve the discriminatory potential of VOC analysis for the detection of (pre-)malignant colorectal lesions. (https://clinicaltrials.gov Identifier NCT03488537).",2019-12-20 +29523070,"Textpresso Central: a customizable platform for searching, text mining, viewing, and curating biomedical literature.","

Background

The biomedical literature continues to grow at a rapid pace, making the challenge of knowledge retrieval and extraction ever greater. Tools that provide a means to search and mine the full text of literature thus represent an important way by which the efficiency of these processes can be improved.

Results

We describe the next generation of the Textpresso information retrieval system, Textpresso Central (TPC). TPC builds on the strengths of the original system by expanding the full text corpus to include the PubMed Central Open Access Subset (PMC OA), as well as the WormBase C. elegans bibliography. In addition, TPC allows users to create a customized corpus by uploading and processing documents of their choosing. TPC is UIMA compliant, to facilitate compatibility with external processing modules, and takes advantage of Lucene indexing and search technology for efficient handling of millions of full text documents. Like Textpresso, TPC searches can be performed using keywords and/or categories (semantically related groups of terms), but to provide better context for interpreting and validating queries, search results may now be viewed as highlighted passages in the context of full text. To facilitate biocuration efforts, TPC also allows users to select text spans from the full text and annotate them, create customized curation forms for any data type, and send resulting annotations to external curation databases. As an example of such a curation form, we describe integration of TPC with the Noctua curation tool developed by the Gene Ontology (GO) Consortium.

Conclusion

Textpresso Central is an online literature search and curation platform that enables biocurators and biomedical researchers to search and mine the full text of literature by integrating keyword and category searches with viewing search results in the context of the full text. It also allows users to create customized curation interfaces, use those interfaces to make annotations linked to supporting evidence statements, and then send those annotations to any database in the world. Textpresso Central URL: http://www.textpresso.org/tpc.",2018-03-09 +31092193,PathMe: merging and exploring mechanistic pathway knowledge.,"

Background

The complexity of representing biological systems is compounded by an ever-expanding body of knowledge emerging from multi-omics experiments. A number of pathway databases have facilitated pathway-centric approaches that assist in the interpretation of molecular signatures yielded by these experiments. However, the lack of interoperability between pathway databases has hindered the ability to harmonize these resources and to exploit their consolidated knowledge. Such a unification of pathway knowledge is imperative in enhancing the comprehension and modeling of biological abstractions.

Results

Here, we present PathMe, a Python package that transforms pathway knowledge from three major pathway databases into a unified abstraction using Biological Expression Language as the pivotal, integrative schema. PathMe is complemented by a novel web application (freely available at https://pathme.scai.fraunhofer.de/ ) which allows users to comprehensively explore pathway crosstalk and compare areas of consensus and discrepancies.

Conclusions

This work has harmonized three major pathway databases and transformed them into a unified schema in order to gain a holistic picture of pathway knowledge. We demonstrate the utility of the PathMe framework in: i) integrating pathway landscapes at the database level, ii) comparing the degree of consensus at the pathway level, and iii) exploring pathway crosstalk and investigating consensus at the molecular level.",2019-05-15 +31091224,DigChem: Identification of disease-gene-chemical relationships from Medline abstracts.,"Chemicals interact with genes in the process of disease development and treatment. Although much biomedical research has been performed to understand relationships among genes, chemicals, and diseases, which have been reported in biomedical articles in Medline, there are few studies that extract disease-gene-chemical relationships from biomedical literature at a PubMed scale. In this study, we propose a deep learning model based on bidirectional long short-term memory to identify the evidence sentences of relationships among genes, chemicals, and diseases from Medline abstracts. Then, we develop the search engine DigChem to enable disease-gene-chemical relationship searches for 35,124 genes, 56,382 chemicals, and 5,675 diseases. We show that the identified relationships are reliable by comparing them with manual curation and existing databases. DigChem is available at http://gcancer.org/digchem.",2019-05-15 +31031925,A trait-based approach to plant species selection to increase functionality of farmland vegetative strips.,"Farmland vegetative strips are a proven source of support for ecosystem services and are globally used to mitigate effects of agricultural intensification. However, increasing pressures on agricultural land require increases in their functionality, such as supporting multiple ecosystem services concurrently.The plant species sown in a vegetative strip seed mix determine the establishment, plant community, and ecosystem services that are supported. Currently, there is no clearly defined or structured method to select plant species for multifunctional vegetative strips.Plant traits determine how plants support ecosystem services. Also, the establishment and persistence of plant communities is influenced by key internal and external factors. We propose a novel, evidence-informed method of multifunctional vegetative strip design based on these essential traits and factors.This study had three distinct stages. The first identified plant traits that support water quality protection, pollinators and/or crop pest natural enemies, using existing research evidence. We then identified key factors affecting plant community establishment and persistence. Finally, we applied these standardized methods to design a multifunctional vegetative strip for a specific case study (UK lowland farmland).Key plant traits identified, included floral display size, flower color, nectar content, leaf surface area, leaf trichome density, percentage fine roots, root length, rooting depth, and root density. Key internal and external establishment factors included life history, native status, distribution, established competitive strategy, associated floristic diversity, flowering time and duration, and preferred soil type and pH. In the United Kingdom case study, we used five different plant traits and all of the identified factors to design a seed mix for a multifunctional vegetative strip.We present a transferable method of vegetative strip design that can be adapted for other ecosystem services and climates. It provides landowners and advisors with an evidence-informed approach to increase field margin functionality while supporting farmland biodiversity.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.8t52n38.",2019-04-01 +,Assessment of the mapping of fractional woody cover in southern African savannas using multi-temporal and polarimetric ALOS PALSAR L-band images,"Woody vegetation cover affects several ecosystem processes including carbon and water cycling, energy fluxes, and fire regimes. In order to understand the dynamics of savanna ecosystems, information on the spatial distribution of woody vegetation over large areas is needed. In this study we sought to assess multi-temporal ALOS PALSAR L-band backscatter to map woody cover in southern African savannas. The SAR data were acquired from the JAXA archive, covering various modes and seasons between 2007 and 2010. We used high resolution airborne LiDAR data as reference data to interpret SAR parameters (including backscatter intensities and polarimetric decomposition components), to develop SAR-based models as well as to validate SAR-based woody cover maps. The LiDAR survey was carried out in April 2008 with the Carnegie Airborne Observatory (CAO, http://cao.ciw.edu). The highest correlations to the reference data were obtained from SAR backscatters of the dry season, followed by the wet season, and the end of the wet season. The volume components from polarimetric decompositions (Freeman-Durden, Van Zyl) were calculated for the end of wet season, and showed similar correlations to the LiDAR data, when compared to cross-polarized backscatters (HV). We observed increased correlation between the SAR and LiDAR datasets with an increase in the spatial scale at which datasets were integrated, with an optimum value at 50m. We modeled woody cover using three scenarios: (1) a single date scenario (i.e., woody cover map based on a single SAR image), (2) a multi-seasonal scenario (i.e., woody cover map based on SAR images from the same year and different seasons, based on key phonological difference), and (3) a multi-annual scenario (i.e., woody cover map based on SAR data from different years). Predicted SAR-based woody cover map based on Fine Beam Dual Polarization dry season SAR backscatters of all years yielded the best performance with an R2 of 0.71 and RMSE of 7.88%. However, single dry season SAR backscatter achieved only a slightly lower accuracy (R2=0.66, RMSE=8.45%) as multi-annual SAR data, suggesting that a single SAR scene from the dry season can also be used for woody cover mapping. Moreover, we investigated the impact of the number of samples on the model prediction performance and showed the benefits of a larger spatially explicit LiDAR dataset compared to much smaller number of samples as they can be collected in the field. Collectively, our results demonstrate that L-band backscatter shows promising sensitivity for the purposes of mapping woody cover in southern African savannas, particularly during the dry season leaf-off conditions.",2015-09-01 +31156493,Affective Images of Climate Change.,"Climate change is not only a scientific phenomenon, but also a cultural one. Individuals' opinions on climate change are often based on emotion rather than on scientific evidence. Therefore, research into the emotional characteristics of the imagery that the non-expert public find relevant to climate change is important in order to build a database of effective climate change imagery, which can then be used by scientists, policymakers, and practitioners in mobilizing climate adaptation and resilience efforts. To this end, we collected ratings of relevance to climate change as well as emotional arousal and valence on 320 images to assess the relationship between relevance to climate change and the emotional qualities of the image. In addition, participants' environmental beliefs were measured, to investigate the relationship between beliefs and image ratings. The results suggest that images rated highly relevant to climate change are higher in negative emotional valence and emotional arousal. Overall, images were rated as being more relevant to climate change by participants with higher pro-environmental disposition. Critically, we have compiled the mean relevance, valence, and arousal ratings of each of these 320 images into a database that is posted online and freely available (https://affectiveclimateimages.weebly.com; https://www.nmu.edu/affectiveclimateimages) for use in future research on climate change visuals.",2019-05-15 +28203707,Correlated mutations select misfolded from properly folded proteins.,"

Motivation

The recently developed direct coupling analysis (DCA) method has greatly improved the accuracy with which residue-residue contacts can be predicted from multiple sequence alignments. Contact prediction accuracy, though, is still often not sufficient for complete ab initio protein structure prediction. DCA can, however, support protein structure studies in several ways.

Results

We show that DCA can select the better structure from among properly folded and misfolded variants. This idea was tested by comparing obsolete PDB files with their more correctly folded successors and by the comparison of structures with deliberately misfolded decoy models from the Decoys 'R' Us database. The DCA method systematically predicts more contacts for properly folded structures than for misfolded ones. The method works much better for X-ray structures than for NMR structures.

Availability and implementation

All data are available from http://comprec-lin.iiar.pwr.edu.pl/dcaVSmisfolds/ and http://swift.cmbi.ru.nl/dcaVSmisfolds/ .

Contact

malgorzata.kotulska@pwr.edu.pl .

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +31192358,Graphlet Laplacians for topology-function and topology-disease relationships.,"

Motivation

Laplacian matrices capture the global structure of networks and are widely used to study biological networks. However, the local structure of the network around a node can also capture biological information. Local wiring patterns are typically quantified by counting how often a node touches different graphlets (small, connected, induced sub-graphs). Currently available graphlet-based methods do not consider whether nodes are in the same network neighbourhood. To combine graphlet-based topological information and membership of nodes to the same network neighbourhood, we generalize the Laplacian to the Graphlet Laplacian, by considering a pair of nodes to be 'adjacent' if they simultaneously touch a given graphlet.

Results

We utilize Graphlet Laplacians to generalize spectral embedding, spectral clustering and network diffusion. Applying Graphlet Laplacian-based spectral embedding, we visually demonstrate that Graphlet Laplacians capture biological functions. This result is quantified by applying Graphlet Laplacian-based spectral clustering, which uncovers clusters enriched in biological functions dependent on the underlying graphlet. We explain the complementarity of biological functions captured by different Graphlet Laplacians by showing that they capture different local topologies. Finally, diffusing pan-cancer gene mutation scores based on different Graphlet Laplacians, we find complementary sets of cancer-related genes. Hence, we demonstrate that Graphlet Laplacians capture topology-function and topology-disease relationships in biological networks.

Availability and implementation

http://www0.cs.ucl.ac.uk/staff/natasa/graphlet-laplacian/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +30277078,NormalyzerDE: Online Tool for Improved Normalization of Omics Expression Data and High-Sensitivity Differential Expression Analysis.,"Technical biases are introduced in omics data sets during data generation and interfere with the ability to study biological mechanisms. Several normalization approaches have been proposed to minimize the effects of such biases, but fluctuations in the electrospray current during liquid chromatography-mass spectrometry gradients cause local and sample-specific bias not considered by most approaches. Here we introduce a software named NormalyzerDE that includes a generic retention time (RT)-segmented approach compatible with a wide range of global normalization approaches to reduce the effects of time-resolved bias. The software offers straightforward access to multiple normalization methods, allows for data set evaluation and normalization quality assessment as well as subsequent or independent differential expression analysis using the empirical Bayes Limma approach. When evaluated on two spike-in data sets the RT-segmented approaches outperformed conventional approaches by detecting more peptides (8-36%) without loss of precision. Furthermore, differential expression analysis using the Limma approach consistently increased recall (2-35%) compared to analysis of variance. The combination of RT-normalization and Limma was in one case able to distinguish 108% (2597 vs 1249) more spike-in peptides compared to traditional approaches. NormalyzerDE provides widely usable tools for performing normalization and evaluating the outcome and makes calculation of subsequent differential expression statistics straightforward. The program is available as a web server at http://quantitativeproteomics.org/normalyzerde .",2018-10-15 +23134687,The Yak genome database: an integrative database for studying yak biology and high-altitude adaption.,"

Background

The yak (Bos grunniens) is a long-haired bovine that lives at high altitudes and is an important source of milk, meat, fiber and fuel. The recent sequencing, assembly and annotation of its genome are expected to further our understanding of the means by which it has adapted to life at high altitudes and its ecologically important traits.

Description

The Yak Genome Database (YGD) is an internet-based resource that provides access to genomic sequence data and predicted functional information concerning the genes and proteins of Bos grunniens. The curated data stored in the YGD includes genome sequences, predicted genes and associated annotations, non-coding RNA sequences, transposable elements, single nucleotide variants, and three-way whole-genome alignments between human, cattle and yak. YGD offers useful searching and data mining tools, including the ability to search for genes by name or using function keywords as well as GBrowse genome browsers and/or BLAST servers, which can be used to visualize genome regions and identify similar sequences. Sequence data from the YGD can also be downloaded to perform local searches.

Conclusions

A new yak genome database (YGD) has been developed to facilitate studies on high-altitude adaption and bovine genomics. The database will be continuously updated to incorporate new information such as transcriptome data and population resequencing data. The YGD can be accessed at http://me.lzu.edu.cn/yak.",2012-11-07 +28149703,Expanding our understanding of the trade in marine aquarium animals.,"The trade of live marine animals for home and public aquaria has grown into a major global industry. Millions of marine fishes and invertebrates are removed from coral reefs and associated habitats each year. The majority are imported into the United States, with the remainder sent to Europe, Japan, and a handful of other countries. Despite the recent growth and diversification of the aquarium trade, to date, data collection is not mandatory, and hence comprehensive information on species volume and diversity is lacking. This lack of information makes it impossible to study trade pathways. Without species-specific volume and diversity data, it is unclear how importing and exporting governments can oversee this industry effectively or how sustainability should be encouraged. To expand our knowledge and understanding of the trade, and to effectively communicate this new understanding, we introduce the publically-available Marine Aquarium Biodiversity and Trade Flow online database (https://www.aquariumtradedata.org/). This tool was created to communicate the volume and diversity of marine fishes and/or invertebrates imported into the US over three complete years (2008, 2009, and 2011) and three partial years (2000, 2004, 2005). To create this tool, invoices pertaining to shipments of live marine fishes and invertebrates were scanned and analyzed for species name, species quantities, country of origin, port of entry, and city of import destination. Here we focus on the analysis of the later three years of data and also produce an estimate for the entirety of 2000, 2004, and 2005. The three-year aggregate totals (2008, 2009, 2011) indicate that just under 2,300 fish and 725 invertebrate species were imported into the US cumulatively, although just under 1,800 fish and 550 invertebrate species were traded annually. Overall, the total number of live marine animals decreased between 2008 and 2011. In 2008, 2009, and 2011, the total number of individual fish (8.2, 7.3, and 6.9 million individuals) and invertebrates (4.2, 3.7, and 3.6 million individuals) assessed by analyzing the invoice data are roughly 60% of the total volumes recorded through the Law Enforcement Management Information System (LEMIS) dataset. Using these complete years, we back-calculated the number of individuals of both fishes and invertebrates imported in 2000, 2004, and 2005. These estimates (9.3, 10.8, and 11.2 million individual fish per year) were consistent with the three years of complete data. We also use these data to understand the global trade in two species (Banggai cardinalfish, Pterapogon kauderni, and orange clownfish, Amphiprion ocellaris / percula) recently considered for Endangered Species Act listing. Aquariumtradedata.org can help create more effective management plans for the traded species, and ideally could be implemented at key trade ports to better assess the global trade of aquatic wildlife.",2017-01-26 +26467481,SIGNOR: a database of causal relationships between biological entities.,"Assembly of large biochemical networks can be achieved by confronting new cell-specific experimental data with an interaction subspace constrained by prior literature evidence. The SIGnaling Network Open Resource, SIGNOR (available on line at http://signor.uniroma2.it), was developed to support such a strategy by providing a scaffold of prior experimental evidence of causal relationships between biological entities. The core of SIGNOR is a collection of approximately 12,000 manually-annotated causal relationships between over 2800 human proteins participating in signal transduction. Other entities annotated in SIGNOR are complexes, chemicals, phenotypes and stimuli. The information captured in SIGNOR can be represented as a signed directed graph illustrating the activation/inactivation relationships between signalling entities. Each entry is associated to the post-translational modifications that cause the activation/inactivation of the target proteins. More than 4900 modified residues causing a change in protein concentration or activity have been curated and linked to the modifying enzymes (about 351 human kinases and 94 phosphatases). Additional modifications such as ubiquitinations, sumoylations, acetylations and their effect on the modified target proteins are also annotated. This wealth of structured information can support experimental approaches based on multi-parametric analysis of cell systems after physiological or pathological perturbations and to assemble large logic models.",2015-10-13 +30882025,"Australia and New Zealand Islet and Pancreas Transplant Registry Annual Report 2018-Islet Donations, Islet Isolations, and Islet Transplants.","

Background

This is an excerpt from chapter 4 of the annual registry report from the Australia and New Zealand islet and pancreas transplant registry. The full report is available at http://anziptr.org/reports/.

Methods

We report data for all allogeneic islet isolation and transplant activity from 2002 to end 2017. Solid organ pancreas transplantation activity is reported separately. New Zealand does not have an islet transplant program. Data analysis was performed using Stata software version 14 (StataCorp, College Station, TX).

Results

From 2002 to 2017, a total of 104 allogeneic islet transplants were performed in 62 recipients.

Conclusions

The number of islet transplants performed in Australia was slightly lower in 2017 but continues to increase over time.",2019-01-07 +31855609,Expanding Communication Modalities and Functions for Preschoolers With Autism Spectrum Disorder: Secondary Analysis of a Peer Partner Speech-Generating Device Intervention.,"Purpose This study reports a secondary analysis of the nature of communicative functions and modalities used in initiations and responses of minimally verbal preschoolers with severe autism spectrum disorder (ASD) from a previously published study (Thiemann-Bourque, Feldmiller, Hoffman, & Johner, 2018). This analysis focused on the final cohort (n = 6) from a group design study (N = 45) that examined a peer mediation and speech-generating device (SGD) intervention compared to an SGD-only condition. Method After teaching peers to use an iPad as an SGD within a modified stay-play-talk approach, school staff implemented SGD instruction in child-peer dyads during typical preschool activities. To investigate individual differences among children who demonstrated increased communication acts in the peer + SGD condition, changes in reciprocity, modalities used, and communicative functions were examined using a multiple-baseline design across children. Fidelity of implementation and social validity data were also collected. Results Six children with ASD and their peers demonstrated more balanced reciprocity, with individual differences in how and why children communicated during exchanges. That is, all children with ASD increased in SGD use as their primary communication mode; 3 children used different modalities including more speech, and 3 children used primarily gestures and SGD. The most frequent function expressed was requests for objects. More modest increases were observed in comments and requests for actions, with negligible changes in gaining attention. Social validity reports by naïve judges reflected clear improvements in communication interactions. Conclusion Findings are promising for a preschool SGD intervention that can expand children's modalities and communicative functions to engage in balanced exchanges with peer partners. Supplemental Material https://doi.org/10.23641/asha.11374203.",2019-12-19 +31709920,Individual cognitive behavioral therapy and combined family/individual therapy for young adults with Anorexia nervosa: A randomized controlled trial.,"Objective: In this study, we evaluate the efficacy of outpatient individual cognitive behavioral therapy for young adults (CBT-YA) and combined family/individual therapy for young adults (FT-YA) for anorexia nervosa (AN). Method: Participants (aged 17-24 years) with AN in Sweden were recruited and assigned to 18 months of CBT-YA or FT-YA. Treatment efficacy was assessed primarily using BMI, presence of diagnosis, and degree of eating-related psychopathology at post-treatment and follow-up. Secondary outcomes included depression and general psychological psychopathology. The trial was registered at http://www.isrctn.com/, ISRCTN (25181390). Results: Seventy-eight participants were randomized, and seventy-four of them received allocated treatment and provided complete data. Clinical outcomes from within groups resulted in significant improvements for both groups. BMI increased from baseline (CBT-YA 16.49; FT-YA 16.54) to post-treatment (CBT-YA 19.61; FT-YA 19.33) with high effect sizes. The rate of weight restoration was 64.9% in the CBT-YA group and 83.8% in the FT-YA group. The rate of recovery was 76% in both groups at post-treatment, and at follow-up, 89% and 81% had recovered in the CBT-YA and FT-YA groups respectively. Conclusions: Outpatient CBT-YA and FT-YA appear to be of benefit to young adults with AN in terms of weight restoration and reduced eating disorder and general psychopathology.",2019-11-10 +28158643,PlantExpress: A Database Integrating OryzaExpress and ArthaExpress for Single-species and Cross-species Gene Expression Network Analyses with Microarray-Based Transcriptome Data.,"Publicly available microarray-based transcriptome data on plants are remarkably valuable in terms of abundance and variation of samples, particularly for Oryza sativa (rice) and Arabidopsis thaliana (Arabidopsis). Here, we introduce the web database PlantExpress (http://plantomics.mind.meiji.ac.jp/PlantExpress/) as a platform for gene expression network (GEN) analysis with the public microarray data of rice and Arabidopsis. PlantExpress has two functional modes. The single-species mode is specialized for GEN analysis within one of the species, while the cross-species mode is optimized for comparative GEN analysis between the species. The single-species mode for rice is the new version of OryzaExpress, which we have maintained since 2006. The single-species mode for Arabidopsis, named ArthaExpress, was newly developed. PlantExpress stores data obtained from three microarrays, the Affymetrix Rice Genome Array, the Agilent Rice Gene Expression 4x44K Microarray, and the Affymetrix Arabidopsis ATH1 Genome Array, with respective totals of 2,678, 1,206, and 10,940 samples. This database employs a ‘MyList’ function with which users may save lists of arbitrary genes and samples (experimental conditions) to use in analyses. In cross-species mode, the MyList function allows performing comparative GEN analysis between rice and Arabidopsis. In addition, the gene lists saved in MyList can be directly exported to the PODC database, which provides information and a platform for comparative GEN analysis based on RNA-seq data and knowledge-based functional annotation of plant genes. PlantExpress will facilitate understanding the biological functions of plant genes.",2017-01-01 +28539606,SesameFG: an integrated database for the functional genomics of sesame.,"Sesame (Sesamum indicum L.) has high oil content, a small diploid genome and a short growth period, making it an attractive species for genetic studies on oilseed crops. With the advancement of next-generation sequencing technology, genomics and functional genomics research of sesame has developed quickly in the last few years, and large amounts of data have been generated. However, these results are distributed in many different publications, and there is a lack of integration. To promote functional genomics research of sesame, we collected genetic information combined with comprehensive phenotypic information and integrated them in the web-based database named SesameFG. The current version of SesameFG contains phenotypic information on agronomic traits of 705 sesame accessions, de novo assembled genomes of three sesame varieties, massive numbers of identified SNPs, gene expression profiles of five tissues, gene families, candidate genes for the important agronomic traits and genomic-SSR markers. All phenotypic and genotypic information in SesameFG is available for online queries and can be downloaded freely. SesameFG provides useful search functions and data mining tools, including Genome Browser and local BLAST services. SesameFG is freely accessible at http://ncgr.ac.cn/SesameFG/. SesameFG provides valuable resources and tools for functional genomics research and the molecular breeding of sesame.",2017-05-24 +30106588,Identification of Missing Proteins in Normal Human Cerebrospinal Fluid.,"The cerebrospinal fluid (CSF) proteome data set presented herein was obtained after immunodepletion of abundant proteins and off-gel electrophoresis fractionation of a commercial pool of normal human CSF; liquid chromatography tandem mass spectrometry analysis was performed with a linear ion trap-Orbitrap Elite. We report the identification of 12 344 peptides mapping on 2281 proteins. In the context of the Chromosome-centric Human Proteome Project (C-HPP), the existence of seven missing proteins is proposed to be validated. This data set is available to the ProteomeXchange Consortium ( http://www.proteomexchange.org/ ) with the data set identifier PXD008029.",2018-08-17 +31516935,"Dataset for the assessment of metallic pollution in the Saint-Charles River sediments (Québec City, QC, Canada).","This Data in Brief article presents sedimentological and geochemical parameters from a set of sedimentary samples collected in the Saint-Charles River, a tributary of the Saint-Lawrence River flowing in Québec City (QC, Canada). It details the experimental design, methods, materials and results of destructive analyses related to a multi-proxy study of polymetallic contamination in sediments collected within an urban reservoir (Spatial and temporal patterns of metallic pollution in Québec City, Canada: Sources and hazard assessment from reservoir sediment records, https://doi.org/10.1016/j.scitotenv.2019.04.021, (Chassiot et al., 2019)). The present article summarizes the results of relevant parameters on a set of 68 samples: total organic carbon (TOC), sulfur content, grain-size, and concentrations of heavy and trace metals. It also presents the calculation of enrichment factors, geoaccumulation indexes, and metallic pollution index.",2019-07-15 +31396659,Adipocyte-specific disruption of ATPase copper transporting α in mice accelerates lipoatrophy.,"

Aims/hypothesis

ATPase copper transporting α (ATP7A), also known as Menkes disease protein, is a P-type ATPase that transports copper across cell membranes. The critical role of ATP7A-mediated copper homeostasis has been well recognised in various organs, such as the intestine, macrophages and the nervous system. However, the importance of adipocyte ATP7A-mediated copper homeostasis on fat metabolism is not well understood. Here, we sought to reveal the contribution of adipose ATP7A to whole-body fat metabolism in mice.

Methods

We generated adipocyte-specific Atp7a-knockout (ASKO) mice using the Cre/loxP system, with Cre expression driven by the adiponectin promoter. ASKO mice and littermate control mice were aged on a chow diet or fed with a high-fat diet (HFD); body weight, fat mass, and glucose and insulin metabolism were analysed. Histological analysis, transmission electron microscopy and RNA-sequencing (RNA-Seq) analysis of white adipose tissue (WAT) were used to understand the physiological and molecular changes associated with loss of copper homeostasis in adipocytes.

Results

Significantly increased copper concentrations were observed in adipose tissues of ASKO mice compared with control mice. Aged or HFD-fed ASKO mice manifested a lipoatrophic phenotype characterised by a progressive generalised loss of WAT. Dysfunction of adipose tissues in these ASKO mice was confirmed by decreased levels of both serum leptin and adiponectin and increased levels of triacylglycerol and insulin. Systemic metabolism was also impaired in these mice, as evidenced by a pronounced glucose intolerance, insulin resistance and hepatic steatosis. Moreover, we demonstrate a significant induction of lipolysis and DNA-damage signalling pathways in gonadal WAT from aged and HFD-fed ASKO mice. In vitro studies suggest that copper overload is responsible for increased lipolysis and DNA damage.

Conclusions/interpretation

Our results show a previously unappreciated role of adipocyte Atp7a in the regulation of ageing-related metabolic disease and identify new metallophysiologies in whole-body fat metabolism.

Data availability

The datasets generated during the current study are available in the Genome Sequence Archive in BIG Data Center, Beijing Institute of Genomics (BIG), Chinese Academy of Sciences, under accession number CRA001769 (http://bigd.big.ac.cn/gsa).",2019-08-08 +30701134,"Prognostic values of GMPS, PR, CD40, and p21 in ovarian cancer.","Early detection and prediction of prognosis and treatment responses are all the keys in improving survival of ovarian cancer patients. This study profiled an ovarian cancer progression model to identify prognostic biomarkers for ovarian cancer patients. Mouse ovarian surface epithelial cells (MOSECs) can undergo spontaneous malignant transformation in vitro cell culture. These were used as a model of ovarian cancer progression for alterations in gene expression and signaling detected using the Illumina HiSeq2000 Next-Generation Sequencing platform and bioinformatical analyses. The differential expression of four selected genes was identified using the gene expression profiling interaction analysis (http://gepia.cancer-pku.cn/) and then associated with survival in ovarian cancer patients using the Cancer Genome Atlas dataset and the online Kaplan-Meier Plotter (http://www.kmplot.com) data. The data showed 263 aberrantly expressed genes, including 182 up-regulated and 81 down-regulated genes between the early and late stages of tumor progression in MOSECs. The bioinformatic data revealed four genes (i.e., guanosine 5'-monophosphate synthase (GMPS), progesterone receptor (PR), CD40, and p21 (cyclin-dependent kinase inhibitor 1A)) to play an important role in ovarian cancer progression. Furthermore, the Cancer Genome Atlas dataset validated the differential expression of these four genes, which were associated with prognosis in ovarian cancer patients. In conclusion, this study profiled differentially expressed genes using the ovarian cancer progression model and identified four (i.e., GMPS, PR, CD40, and p21) as prognostic markers for ovarian cancer patients. Future studies of prospective patients could further verify the clinical usefulness of this four-gene signature.",2019-01-25 +31021279,UVGD 1.0: a gene-centric database bridging ultraviolet radiation and molecular biology effects in organisms.,"Objectives: Exposing to ultraviolet for a certain time will trigger some significant molecular biology effects in an organism. In the past few decades, varied ultraviolet-associated biological effects as well as their related genes, have been discovered under biologists' efforts. However, information about ultraviolet-related genes is dispersed in thousands of scientific papers, and there is still no study emphasizing on the systematic collection of ultraviolet-related genes. Methods: We collected ultraviolet-related genes and built this gene-centric database UVGD based on literature mining and manual curation. Literature mining was based on the ultraviolet-related abstracts downloaded from PubMed, and we obtained sentences in which ultraviolet keywords and genes co-occur at single-sentence level by using bio-entity recognizer. After that, manual curation was implemented in order to identify whether the genes are related to ultraviolet or not. Results: We built the ultraviolet-related knowledge base UVGD 1.0 (URL: http://biokb.ncpsb.org/UVGD/ ), which contains 663 ultraviolet-related genes, together with 17 associated biological processes, 117 associated phenotypes, and 2628 MeSH terms. Conclusion: UVGD is helpful to understand the ultraviolet-related biological processes in organisms and we believe it would be useful for biologists to study the responding mechanisms to ultraviolet.",2019-05-13 +25171179,SoilGrids1km--global soil information based on automated mapping.,"

Background

Soils are widely recognized as a non-renewable natural resource and as biophysical carbon sinks. As such, there is a growing requirement for global soil information. Although several global soil information systems already exist, these tend to suffer from inconsistencies and limited spatial detail.

Methodology/principal findings

We present SoilGrids1km--a global 3D soil information system at 1 km resolution--containing spatial predictions for a selection of soil properties (at six standard depths): soil organic carbon (g kg-1), soil pH, sand, silt and clay fractions (%), bulk density (kg m-3), cation-exchange capacity (cmol+/kg), coarse fragments (%), soil organic carbon stock (t ha-1), depth to bedrock (cm), World Reference Base soil groups, and USDA Soil Taxonomy suborders. Our predictions are based on global spatial prediction models which we fitted, per soil variable, using a compilation of major international soil profile databases (ca. 110,000 soil profiles), and a selection of ca. 75 global environmental covariates representing soil forming factors. Results of regression modeling indicate that the most useful covariates for modeling soils at the global scale are climatic and biomass indices (based on MODIS images), lithology, and taxonomic mapping units derived from conventional soil survey (Harmonized World Soil Database). Prediction accuracies assessed using 5-fold cross-validation were between 23-51%.

Conclusions/significance

SoilGrids1km provide an initial set of examples of soil spatial data for input into global models at a resolution and consistency not previously available. Some of the main limitations of the current version of SoilGrids1km are: (1) weak relationships between soil properties/classes and explanatory variables due to scale mismatches, (2) difficulty to obtain covariates that capture soil forming factors, (3) low sampling density and spatial clustering of soil profile locations. However, as the SoilGrids system is highly automated and flexible, increasingly accurate predictions can be generated as new input data become available. SoilGrids1km are available for download via http://soilgrids.org under a Creative Commons Non Commercial license.",2014-08-29 +28738270,"Chemical structure and correlation analysis of HIV-1 NNRT and NRT inhibitors and database-curated, published inhibition constants with chemical structure in diverse datasets.","Human immunodeficiency virus (HIV-1) reverse transcriptase is a major target for designing anti-HIV drugs. Developed inhibitors are divided into non-nucleoside analog reverse-transcriptase inhibitors (NNRTIs) and nucleoside analog reverse-transcriptase inhibitors (NRTIs) depending on their mechanism. Given that many inhibitors have been studied and for many of them binding affinity constants have been calculated, it is beneficial to analyze the chemical landscape of these families of inhibitors and correlate these inhibition constants with molecular structure descriptors. For this, the HIV-1 RT data was retrieved from the ChEMBL database, carefully curated, and original literature verified, grouped into NRTIs and NNRTIs, analyzed using a hierarchical scaffold classification method and modelled with best multi-linear regression approach. Analysis of the HIV-1 NNRTIs subset results in ten different common structural parent types of oxazepanone, piperazinone, pyrazine, oxazinanone, diazinanone, pyridine, pyrrole, diazepanone, thiazole, and triazine. The same analysis for HIV-1 NRTIs groups structures into four different parent types of uracil, pyrimide, pyrimidione, and imidazole. Each scaffold tree corresponding to the parent types has been carefully analyzed and examined, and changes in chemical structure favorable to potency and stability are highlighted. For both subsets, descriptive and predictive QSAR models are derived, discussed and externally validated, revealing general trends in relationships between molecular structure and binding affinity constants in structurally diverse datasets. Data and QSAR models are available at the QsarDB repository (http://dx.doi.org/10.15152/QDB.202).",2017-06-24 +27799467,OGEE v2: an update of the online gene essentiality database with special focus on differentially essential genes in human cancer cell lines.,"OGEE is an Online GEne Essentiality database. To enhance our understanding of the essentiality of genes, in OGEE we collected experimentally tested essential and non-essential genes, as well as associated gene properties known to contribute to gene essentiality. We focus on large-scale experiments, and complement our data with text-mining results. We organized tested genes into data sets according to their sources, and tagged those with variable essentiality statuses across data sets as conditionally essential genes, intending to highlight the complex interplay between gene functions and environments/experimental perturbations. Developments since the last public release include increased numbers of species and gene essentiality data sets, inclusion of non-coding essential sequences and genes with intermediate essentiality statuses. In addition, we included 16 essentiality data sets from cancer cell lines, corresponding to 9 human cancers; with OGEE, users can easily explore the shared and differentially essential genes within and between cancer types. These genes, especially those derived from cell lines that are similar to tumor samples, could reveal the oncogenic drivers, paralogous gene expression pattern and chromosomal structure of the corresponding cancer types, and can be further screened to identify targets for cancer therapy and/or new drug development. OGEE is freely available at http://ogee.medgenius.info.",2016-10-30 +27605099,OntoBrowser: a collaborative tool for curation of ontologies by subject matter experts.,"The lack of controlled terminology and ontology usage leads to incomplete search results and poor interoperability between databases. One of the major underlying challenges of data integration is curating data to adhere to controlled terminologies and/or ontologies. Finding subject matter experts with the time and skills required to perform data curation is often problematic. In addition, existing tools are not designed for continuous data integration and collaborative curation. This results in time-consuming curation workflows that often become unsustainable. The primary objective of OntoBrowser is to provide an easy-to-use online collaborative solution for subject matter experts to map reported terms to preferred ontology (or code list) terms and facilitate ontology evolution. Additional features include web service access to data, visualization of ontologies in hierarchical/graph format and a peer review/approval workflow with alerting.

Availability and implementation

The source code is freely available under the Apache v2.0 license. Source code and installation instructions are available at http://opensource.nibr.com This software is designed to run on a Java EE application server and store data in a relational database.

Contact

philippe.marc@novartis.com.",2016-09-06 +30100533,"pH-permeability profiles for drug substances: Experimental detection, comparison with human intestinal absorption and modelling.","The influence of pH on human intestinal absorption is frequently not considered in early drug discovery studies in the modelling and subsequent prediction of intestinal absorption for drug candidates. To bridge this gap, in this study, experimental membrane permeability data were measured for current and former drug substances with a parallel artificial membrane permeability assay (PAMPA) at different pH values (3, 5, 7.4 and 9). The presented data are in good agreement with human intestinal absorption, showing a clear influence of pH on the efficiency of intestinal absorption. For the measured data, simple and general quantitative structure-activity relationships (QSARs) were developed for each pH that makes it possible to predict the pH profiles for passive membrane permeability (i.e., a pH-permeability profile), and these predictions coincide well with the experimental data. QSARs are also proposed for the data series of highest and intrinsic membrane permeability. The molecular descriptors in the models were analysed and mechanistically related to the interaction pattern of permeability in membranes. In addition to the regression models, classification models are also proposed. All models were successfully validated and blind tested with external data. The models are available in the QsarDB repository (http://dx.doi.org/10.15152/QDB.203).",2018-07-06 +28369334,NeBcon: protein contact map prediction using neural network training coupled with naïve Bayes classifiers.,"

Motivation

Recent CASP experiments have witnessed exciting progress on folding large-size non-humongous proteins with the assistance of co-evolution based contact predictions. The success is however anecdotal due to the requirement of the contact prediction methods for the high volume of sequence homologs that are not available to most of the non-humongous protein targets. Development of efficient methods that can generate balanced and reliable contact maps for different type of protein targets is essential to enhance the success rate of the ab initio protein structure prediction.

Results

We developed a new pipeline, NeBcon, which uses the naïve Bayes classifier (NBC) theorem to combine eight state of the art contact methods that are built from co-evolution and machine learning approaches. The posterior probabilities of the NBC model are then trained with intrinsic structural features through neural network learning for the final contact map prediction. NeBcon was tested on 98 non-redundant proteins, which improves the accuracy of the best co-evolution based meta-server predictor by 22%; the magnitude of the improvement increases to 45% for the hard targets that lack sequence and structural homologs in the databases. Detailed data analysis showed that the major contribution to the improvement is due to the optimized NBC combination of the complementary information from both co-evolution and machine learning predictions. The neural network training also helps to improve the coupling of the NBC posterior probability and the intrinsic structural features, which were found particularly important for the proteins that do not have sufficient number of homologous sequences to derive reliable co-evolution profiles.

Availiablity and implementation

On-line server and standalone package of the program are available at http://zhanglab.ccmb.med.umich.edu/NeBcon/ .

Contact

zhng@umich.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +30335965,MetGem Software for the Generation of Molecular Networks Based on the t-SNE Algorithm.,"Molecular networking (MN) is becoming a standard bioinformatics tool in the metabolomic community. Its paradigm is based on the observation that compounds with a high degree of chemical similarity share comparable MS2 fragmentation pathways. To afford a clear separation between MS2 spectral clusters, only the most relevant similarity scores are selected using dedicated filtering steps requiring time-consuming parameter optimization. Depending on the filtering values selected, some scores are arbitrarily deleted and a part of the information is ignored. The problem of creating a reliable representation of MS2 spectra data sets can be solved using algorithms developed for dimensionality reduction and pattern recognition purposes, such as t-distributed stochastic neighbor embedding (t-SNE). This multivariate embedding method pays particular attention to local details by using nonlinear outputs to represent the entire data space. To overcome the limitations inherent to the GNPS workflow and the networking architecture, we developed MetGem. Our software allows the parallel investigation of two complementary representations of the raw data set, one based on a classic GNPS-style MN and another based on the t-SNE algorithm. The t-SNE graph preserves the interactions between related groups of spectra, while the MN output allows an unambiguous separation of clusters. Additionally, almost all parameters can be tuned in real time, and new networks can be generated within a few seconds for small data sets. With the development of this unified interface ( https://metgem.github.io ), we fulfilled the need for a dedicated, user-friendly, local software for MS2 comparison and spectral network generation.",2018-11-14 +,Verification of a Parkinson's Disease Protein Signature by Multiple Reaction Monitoring,"OBJECTIVE: Integration of different ‘omics data (genomic, transcriptomic, proteomic) reveals novel discoveries into biological systems. Integration of these datasets is challenging however, involving use of multiple disparate software in a sequential manner. However, the use of multiple, disparate software in a sequential manner makes the integration of multi-omic data a serious challenge. We describe the extension of Galaxy for mass spectrometric-based proteomics software, enabling advanced multi-omic applications in proteogenomics and metaproteomics. We will demonstrate the benefits of Galaxy for these analyses, as well as its value for software developers seeking to publish new software. We will also share insights on the benefits of the Galaxy framework as a bioinformatics solution for proteomic/metabolomic core facilities. METHODS: Multiple datasets for proteogenomics research (3D-fractionated salivary dataset and oral pre-malignant lesion (OPML) dataset) and metaproteomics research (OPML dataset and Severe Early Childhood Caries (SECC) dataset). Software required for analytical steps such as peaklist generation, database generation (RNA-Seq derived and others), database search (ProteinPilot and X! tandem) and for quantitative proteomics were deployed, tested and optimized for use in workflows. The software are shared in Galaxy toolshed (http://toolshed.g2.bx.psu.edu/). Results: Usage of analytical workflows resulted in reliable identification of novel proteoforms (proteogenomics) or microorganisms (metaproteomics). Proteogenomics analysis identified novel proteoforms in the salivary dataset (51) and OPML dataset (38). Metaproteomics analysis led to microbial identification in OPML and SECC datasets using MEGAN software. As examples, workflows for proteogenomics analysis (http://z.umn.edu/pg140) and metaproteomic analysis (http://z.umn.edu/mp65) are available at the usegalaxyp.org website. Tutorials for workflow usage within Galaxy-P framework are also available (http://z.umn.edu/ppingp). CONCLUSIONS: We demonstrate the use of Galaxy for integrated analysis of multi-omic data, in an accessible, transparent and reproducible manner. Our results and experiences using this framework demonstrate the potential for Galaxy to be a unifying bioinformatics solution for ‘omics core facilities.",2014-05-01 +,The Galaxy Framework as a Unifying Bioinformatics Solution for ‘omics’ Core Facilities,"Integration of different omics data (genomic, transcriptomic, proteomic) reveals novel discoveries into biological systems. Integration of these datasets is challenging however, involving use of multiple disparate software in a sequential manner. However, the use of multiple, disparate software in a sequential manner makes the integration of multi-omic data a serious challenge. We describe the extension of Galaxy for mass spectrometric-based proteomics software, enabling advanced multi-omic applications in proteogenomics and metaproteomics. We will demonstrate the benefits of Galaxy for these analyses, as well as its value for software developers seeking to publish new software. We will also share insights on the benefits of the Galaxy framework as a bioinformatics solution for proteomic/metabolomic core facilities. Multiple datasets for proteogenomics research (3D-fractionated salivary dataset and oral pre-malignant lesion (OPML) dataset) and metaproteomics research (OPML dataset and Severe Early Childhood Caries (SECC) dataset). Software required for analytical steps such as peaklist generation, database generation (RNA-Seq derived and others), database search (ProteinPilot and X! tandem) and for quantitative proteomics were deployed, tested and optimized for use in workflows. The software are shared in Galaxy toolshed (http://toolshed.g2.bx.psu.edu/). Usage of analytical workflows resulted in reliable identification of novel proteoforms (proteogenomics) or microorganisms (metaproteomics). Proteogenomics analysis identified novel proteoforms in the salivary dataset (51) and OPML dataset (38). Metaproteomics analysis led to microbial identification in OPML and SECC datasets using MEGAN software. As examples, workflows for proteogenomics analysis (http://z.umn.edu/pg140) and metaproteomic analysis (http://z.umn.edu/mp65) are available at the usegalaxyp.org website. Tutorials for workflow usage within Galaxy-P framework are also available (http://z.umn.edu/ppingp). We demonstrate the use of Galaxy for integrated analysis of multi-omic data, in an accessible, transparent and reproducible manner. Our results and experiences using this framework demonstrate the potential for Galaxy to be a unifying bioinformatics solution for ‘omics core facilities.",2014-05-01 +23095257,"IBDsite: a Galaxy-interacting, integrative database for supporting inflammatory bowel disease high throughput data analysis.","

Background

Inflammatory bowel diseases (IBD) refer to a group of inflammatory conditions concerning colon and small intestine, which cause socially uncomfortable symptoms and often are associated with an increased risk of colon cancer. IBD are complex disorders, which rely on genetic susceptibility, environmental factors, deregulation of the immune system, and host relationship with commensal microbiota. The complexity of these pathologies makes difficult to clearly understand the mechanisms of their onset. Therefore, the study of IBD must be faced exploiting an integrated and multilevel approach, ranging from genes, transcripts and proteins to pathways altered in affected tissues, and carefully considering their regulatory mechanisms, which may intervene in the pathology onset. It is also crucial to have a knowledge base about the symbiotic bacteria that are hosted in the human gut. To date, much data exist regarding IBD and human commensal bacteria, but this information is sparse in literature and no free resource provides a homogeneously and rationally integrated view of biomolecular data related to these pathologies.

Methods

Human genes altered in IBD have been collected from literature, paying particular interest for the immune system alterations prompted by the interaction with the gut microbiome. This process has been performed manually to assure the reliability of collected data. Heterogeneous metadata from different sources have been automatically formatted and integrated in order to enrich information about these altered genes. A user-friendly web interface has been created for easy access to structured data. Tools such as gene clustering coefficients, all-pairs shortest paths and pathway lengths calculation have been developed to provide data analysis support. Moreover, the implemented resource is compliant to the Galaxy framework, allowing the collected data to be exploited in the context of high throughput bioinformatics analysis.

Results

To fill the lack of a reference resource for 'omics' science analysis in the context of IBD, we developed the IBDsite (available at http://www.itb.cnr.it/ibd), a disease-oriented platform, which collects data related to biomolecular mechanisms involved in the IBD onset. The resource provides a section devoted to human genes identified as altered in IBD, which can be queried at different biomolecular levels and visualised in gene-centred report pages. Furthermore, the system presents information related to the gut microbiota involved in IBD affected patients. The IBDsite is compliant with all Galaxy installations (in particular, it can be accessed from our custom version of Galaxy, http://www.itb.cnr.it/galaxy), in order to facilitate high-throughput data integration and to enable evaluations of the genomic basis of these diseases, complementing the tools embedded in the IBDsite.

Conclusions

Lots of sparse data exist concerning IBD studies, but no on-line resource homogeneously and rationally integrate and collect them. The IBDsite is an attempt to group available information regarding human genes and microbial aspects related to IBD, by means of a multilevel mining tool. Moreover, it constitutes a knowledge base to filter, annotate and understand new experimental data in order to formulate new scientific hypotheses, thanks to the possibility of integrating genomics aspects by employing the Galaxy framework. Discussed use-cases demonstrate that the developed system is useful to infer not trivial knowledge from the existing widespread data or from novel experiments.",2012-09-07 +28481528,PubChemQC Project: A Large-Scale First-Principles Electronic Structure Database for Data-Driven Chemistry.,"Large-scale molecular databases play an essential role in the investigation of various subjects such as the development of organic materials, in silico drug design, and data-driven studies with machine learning. We have developed a large-scale quantum chemistry database based on first-principles methods. Our database currently contains the ground-state electronic structures of 3 million molecules based on density functional theory (DFT) at the B3LYP/6-31G* level, and we successively calculated 10 low-lying excited states of over 2 million molecules via time-dependent DFT with the B3LYP functional and the 6-31+G* basis set. To select the molecules calculated in our project, we referred to the PubChem Project, which was used as the source of the molecular structures in short strings using the InChI and SMILES representations. Accordingly, we have named our quantum chemistry database project ""PubChemQC"" ( http://pubchemqc.riken.jp/ ) and placed it in the public domain. In this paper, we show the fundamental features of the PubChemQC database and discuss the techniques used to construct the data set for large-scale quantum chemistry calculations. We also present a machine learning approach to predict the electronic structure of molecules as an example to demonstrate the suitability of the large-scale quantum chemistry database.",2017-05-19 +31730280,Temperature dependence of NMR chemical shifts: Tracking and statistical analysis.,"Isotropic chemical shifts measured by solution nuclear magnetic resonance (NMR) spectroscopy offer extensive insights into protein structure and dynamics. Temperature dependences add a valuable dimension; notably, the temperature dependences of amide proton chemical shifts are valuable probes of hydrogen bonding, temperature-dependent loss of structure, and exchange between distinct protein conformations. Accordingly, their uses include structural analysis of both folded and disordered proteins, and determination of the effects of mutations, binding, or solution conditions on protein energetics. Fundamentally, these temperature dependences result from changes in the local magnetic environments of nuclei, but correlations with global thermodynamic parameters measured via calorimetric methods have been observed. Although the temperature dependences of amide proton and nitrogen chemical shifts are often well approximated by a linear model, deviations from linearity are also observed and may be interpreted as evidence of fast exchange between distinct conformational states. Here, we describe computational methods, accessible via the Shift-T web server, including an automated tracking algorithm that propagates initial (single temperature) 1 H15 N cross peak assignments to spectra collected over a range of temperatures. Amide proton and nitrogen temperature coefficients (slopes determined by fitting chemical shift vs. temperature data to a linear model) are subsequently calculated. Also included are methods for the detection of systematic, statistically significant deviation from linearity (curvature) in the temperature dependences of amide proton chemical shifts. The use and utility of these methods are illustrated by example, and the Shift-T web server is freely available at http://meieringlab.uwaterloo.ca/shiftt.",2019-11-26 +31755556,Combination of antioxidants and NFAT (nuclear factor of activated T cells) inhibitor protects auditory hair cells from ototoxic insult.,"Hair cell (HC) degeneration causes hearing loss in millions of people worldwide. Aminoglycoside exposure is one major cause of sensory HC damage. Aminoglycosides generate free radicals within the inner ear, permanently damaging sensory cells, and thus causing hearing loss. Hearing protection requires strategies to overcome the apparently irreversible loss of HCs in mammals. The nuclear factor of activated T cells (NFAT) inhibitor 11R-VIVIT reportedly protects HCs from gentamicin toxicity. Here we investigated whether the combination of 11R-VIVIT with the antioxidant L-carnitine or N-acetylcysteine could protect mouse cochlear HCs from gentamicin damage. Compared to single-component treatment, combined treatment with 11R-VIVIT plus L-carnitine yielded significant protection from gentamicin, and 11R-VIVIT plus N-acetylcysteine provided almost complete protection of HCs from gentamicin. Caspase activity in organ of Corti was significantly reduced by combined treatment with 11R-VIVIT + N-acetylcysteine + gentamicin, compared to 11R-VIVIT + gentamicin or gentamicin alone. Analysis of relative gene expression by qPCR revealed down-regulation of the pro-apoptotic genes Fasl and Casp9, and up-regulation of the antioxidant genes Hmox1 and Nrf2 after treatment with 11R-VIVIT + N-acetylcysteine + gentamicin, compared to single-compound treatment or gentamicin alone in cultures. Selective NFAT inhibition by 11R-VIVIT may be a good strategy for preventing gentamicin-induced HC damage. L-carnitine and N-acetylcysteine, with their ROS-reducing properties, contribute to the synergistic effectiveness with 11R-VIVIT by decreasing ROS-induced NFAT translocation. Our data suggest that a combined approach of NFAT inhibition together with an antioxidant, like N-acetylcysteine, could be useful for hearing loss treatment and/or prevention. Cover Image for this issue: https://doi.org/10.1111/jnc.14759.",2019-12-15 +22134927,hUbiquitome: a database of experimentally verified ubiquitination cascades in humans.,"Protein ubiquitination is an evolutionarily conserved and functionally diverse post-translational modification achieved through the sequential action of E1-activating enzymes, E2-conjugating enzymes and E3 ligases. A summary of validated ubiquitination substrates have been presented and a prediction of new substrates have been conducted in yeast. However, a systematic summary of human ubiquitination substrates containing experimental evidence and the enzymatic cascade of each substrate is not available. In the present study, hUbiquitome web resource is introduced, a public resource for the retrieval of experimentally verified human ubiquitination enzymes and substrates. hUbiquitome is the first comprehensive database of human ubiquitination cascades. Currently, hUbiquitome has in its repertoire curated data comprising 1 E1 enzyme, 12 E2 enzymes, 138 E3 ligases or complexes, 279 different substrate proteins and 17 deubiquitination enzyme terms. The biological functions of substrates from different kinds of E3s were analyzed using the collected data. The findings show that substrates ubiquitinated by RING (Really Interesting New Gene) E3s are enriched most in apoptosis-related processes, whereas substrates ubiquitinated by other E3s are enriched in gene expression-associated processes. An analysis of the data demonstrates the biological process preferences of the different kinds of E3s. hUbiquitome is the first database to systematically collect experimentally validated ubiquitinated proteins and related ubiquitination cascade enzymes which might be helpful in the field of ubiquitination-modification research. Database URL: http://202.38.126.151/hmdd/hubi/",2011-11-30 +31076743,Direct and Indirect Costs of Inflammatory Bowel Disease: Ten Years of Follow-up in a Danish Population-based Inception Cohort.,"

Background

Inflammatory bowel disease [IBD], encompassing Crohn's disease [CD] and ulcerative colitis [UC], places a high burden on health care resources. To date, no study has assessed the combined direct and indirect cost of IBD in a population-based setting. Our aim was to assess this in a population-based inception cohort with 10 years of follow-up.

Methods

All incident patients diagnosed with CD or UC, 2003-2004, in a well-defined area of Copenhagen, were followed prospectively until 2015. Direct and indirect costs were retrieved from Danish national registries. Data were compared with a control population [1:20]. Associations between the costs and multiple variables were assessed.

Results

A total of 513 (CD: 213 [42%], UC: 300 [58%]) IBD patients were included. No significant differences were found in indirect costs between CD, UC, and the control population. Costs for CD patients were significantly higher than those for UC regarding all direct expenditures (except for5-aminosalicylates [5-ASA] and diagnostic expenses). Biologics accounted for €1.6 and €0.3 million for CD and UC, respectively. The total costs amounted to €42.6 million. Only patients with extensive colitis had significantly higher direct costs (proctitis: €2273 [1341-4092], left-sided: €3606 [2354-5311], extensive: €4093 [2313-6057], p <0.001). No variables were significantly associated with increased total costs in CD or in UC patients.

Conclusions

In this prospective population-based cohort, direct costs for IBD remain high. However, indirect costs did not surpass the control population. Total costs were mainly driven by hospitalisation, but indirect costs accounted for a higher percentage overall, although these did decrease over time.

Podcast

This article has an associated podcast which can be accessed at https://academic.oup.com/ecco-jcc/pages/podcast.",2020-01-01 +30915620,A novel application of process mapping in a criminal justice setting to examine implementation of peer support for veterans leaving incarceration.,"BACKGROUND:Between 12,000 and 16,000 veterans leave incarceration every year, yet resources are limited for reentry support that helps veterans remain connected to VA and community health care and services after leaving incarceration. Homelessness and criminal justice recidivism may result when such follow-up and support are lacking. In order to determine where gaps exist in current reentry support efforts, we developed a novel methodological adaptation of process mapping (a visualization technique being increasingly used in health care to identify gaps in services and linkages) in the context of a larger implementation study of a peer-support intervention to link veterans to health-related services after incarceration ( https://clinicaltrials.gov/ , NCT02964897, registered November 4, 2016) to support their reentry into the community. METHODS:We employed process mapping to analyze qualitative interviews with staff from organizations providing reentry support. Interview data were used to generate process maps specifying the sequence of events and the multiple parties that connect veterans to post-incarceration services. Process maps were then analyzed for uncertainties, gaps, and bottlenecks. RESULTS:We found that reentry programs lack systematic means of identifying soon-to-be released veterans who may become their clients; veterans in prisons/jails, and recently released, lack information about reentry supports and how to access them; and veterans' whereabouts between their release and their health care appointments are often unknown to reentry and health care teams. These system-level shortcomings informed our intervention development and implementation planning of peer-support services for veterans' reentry. CONCLUSIONS:Systematic information sharing that is inherent to process mapping makes more transparent the research needed, helping to engage participants and operational partners who are critical for successful implementation of interventions to improve reentry support for veterans leaving incarceration. Even beyond our immediate study, process mapping based on qualitative interview data enables visualization of data that is useful for 1) verifying the research team's interpretation of interviewee's accounts, 2) specifying the events that occur within processes that the implementation is targeting (identifying knowledge gaps and inefficiencies), and 3) articulating and tracking the pre- to post-implementation changes clearly to support dissemination of evidence-based health care practices for justice-involved populations.",2019-03-26 +31072883,"Genome Sequence and Methylation Patterns of Halorubrum sp. Strain BOL3-1, the First Haloarchaeon Isolated and Cultured from Salar de Uyuni, Bolivia. ","Halorubrum sp. strain BOL3-1 was isolated from Salar de Uyuni, Bolivia, and sequenced using single-molecule real-time sequencing. Its 3.7-Mbp genome was analyzed for gene content and methylation patterns and incorporated into the Haloarchaeal Genomes Database (http://halo.umbc.edu). The polyextremophilic character and high-elevation environment make the microbe of interest for astrobiology.",2019-05-09 +31070385,"""Embracing in a female-bonded monkey species (Theropithecus gelada)"": Correction to Pallante et al. (2019).","Reports an error in ""Embracing in a female-bonded monkey species (Theropithecus gelada)"" by Virginia Pallante, Pier Francesco Ferrari, Marco Gamba and Elisabetta Palagi (Journal of Comparative Psychology, Advanced Online Publication, Mar 25, 2019, np). In the article ""Embracing in a Female-Bonded Monkey Species (Theropithecus gelada)"" by Virginia Pallante, Pier Francesco Ferrari, Marco Gamba, and Elisabetta Palagi (Journal of Comparative Psychology, Advance online publication. March 25, 2019. http://dx.doi.org/10.1037/ com0000173), the title incorrectly read ""Embracing in a Female-Bonded Monkey Specie (Theropithecus gelada)"" All versions of this article have been corrected. (The following abstract of the original article appeared in record 2019-15836-001.) In several primate species, including humans, embracing predicts the level of affiliation between subjects. To explore the functional meaning of embracing, we selected Theropithecus gelada as a model species. The basic level of the gelada society is the 1-male unit, and the integrity of the group is maintained by the strong bonds between females. In our study group, we observed 3 different kinds of embracing: the frontal and side embraces involving a face-to-face and chest-to-chest interaction and the posterior embrace that consists in putting the arms around conspecifics' back and posing a cheek on it. We verified that the quality of relationships between subjects predicts the type of embracing. Frontal and side embraces were more frequent between females sharing strong bonds. Posterior embracing was randomly distributed. We found a high level of female embracing among the mothers during the first months of lactation. This may improve female cohesiveness against males, thus reducing the risk of infanticide, which is particularly high in geladas. Embracing seems also to act as an ice-breaker favoring grooming. Female embracing could be an affiliative strategy that has evolved to maintain group integrity and high social cohesion among females, especially mothers. (PsycINFO Database Record (c) 2019 APA, all rights reserved).",2019-05-09 +21929785,The representation of protein complexes in the Protein Ontology (PRO).,"

Background

Representing species-specific proteins and protein complexes in ontologies that are both human- and machine-readable facilitates the retrieval, analysis, and interpretation of genome-scale data sets. Although existing protin-centric informatics resources provide the biomedical research community with well-curated compendia of protein sequence and structure, these resources lack formal ontological representations of the relationships among the proteins themselves. The Protein Ontology (PRO) Consortium is filling this informatics resource gap by developing ontological representations and relationships among proteins and their variants and modified forms. Because proteins are often functional only as members of stable protein complexes, the PRO Consortium, in collaboration with existing protein and pathway databases, has launched a new initiative to implement logical and consistent representation of protein complexes.

Description

We describe here how the PRO Consortium is meeting the challenge of representing species-specific protein complexes, how protein complex representation in PRO supports annotation of protein complexes and comparative biology, and how PRO is being integrated into existing community bioinformatics resources. The PRO resource is accessible at http://pir.georgetown.edu/pro/.

Conclusion

PRO is a unique database resource for species-specific protein complexes. PRO facilitates robust annotation of variations in composition and function contexts for protein complexes within and between species.",2011-09-19 +30651943,Automated Region Extraction from Thermal Images for Peripheral Vascular Disease Monitoring.,"This work develops a method for automatically extracting temperature data from prespecified anatomical regions of interest from thermal images of human hands, feet, and shins for the monitoring of peripheral arterial disease in diabetic patients. Binarisation, morphological operations, and geometric transformations are applied in cascade to automatically extract the required data from 44 predefined regions of interest. The implemented algorithms for region extraction were tested on data from 395 participants. A correct extraction in around 90% of the images was achieved. The process of automatically extracting 44 regions of interest was performed in a total computation time of approximately 1 minute, a substantial improvement over 10 minutes it took for a corresponding manual extraction of the regions by a trained individual. Interrater reliability tests showed that the automatically extracted ROIs are similar to those extracted by humans with minimal temperature difference. This set of algorithms provides a sufficiently accurate and reliable method for temperature extraction from thermal images at par with human raters with a tenfold reduction in time requirement. The automated process may replace the manual human extraction, leading to a faster process, making it feasible to carry out large-scale studies and to increase the regions of interest with minimal cost. The code for the developed algorithms, to extract the 44 ROIs from thermal images of hands, feet, and shins, has been made available online in the form of MATLAB functions and can be accessed from http://www.um.edu.mt/cbc/tipmid.",2018-12-13 +25024350,OMICtools: an informative directory for multi-omic data analysis. ,"Recent advances in 'omic' technologies have created unprecedented opportunities for biological research, but current software and database resources are extremely fragmented. OMICtools is a manually curated metadatabase that provides an overview of more than 4400 web-accessible tools related to genomics, transcriptomics, proteomics and metabolomics. All tools have been classified by omic technologies (next-generation sequencing, microarray, mass spectrometry and nuclear magnetic resonance) associated with published evaluations of tool performance. Information about each tool is derived either from a diverse set of developers, the scientific literature or from spontaneous submissions. OMICtools is expected to serve as a useful didactic resource not only for bioinformaticians but also for experimental researchers and clinicians. Database URL: http://omictools.com/.",2014-07-14 +28025344,GeneBase 1.1: a tool to summarize data from NCBI gene datasets and its application to an update of human gene statistics. ,"We release GeneBase 1.1, a local tool with a graphical interface useful for parsing, structuring and indexing data from the National Center for Biotechnology Information (NCBI) Gene data bank. Compared to its predecessor GeneBase (1.0), GeneBase 1.1 now allows dynamic calculation and summarization in terms of median, mean, standard deviation and total for many quantitative parameters associated with genes, gene transcripts and gene features (exons, introns, coding sequences, untranslated regions). GeneBase 1.1 thus offers the opportunity to perform analyses of the main gene structure parameters also following the search for any set of genes with the desired characteristics, allowing unique functionalities not provided by the NCBI Gene itself. In order to show the potential of our tool for local parsing, structuring and dynamic summarizing of publicly available databases for data retrieval, analysis and testing of biological hypotheses, we provide as a sample application a revised set of statistics for human nuclear genes, gene transcripts and gene features. In contrast with previous estimations strongly underestimating the length of human genes, a 'mean' human protein-coding gene is 67 kbp long, has eleven 309 bp long exons and ten 6355 bp long introns. Median, mean and extreme values are provided for many other features offering an updated reference source for human genome studies, data useful to set parameters for bioinformatic tools and interesting clues to the biomedical meaning of the gene features themselves.Database URL: http://apollo11.isto.unibo.it/software/.",2016-12-26 +22080555,The UCSC Archaeal Genome Browser: 2012 update.,"The UCSC Archaeal Genome Browser (http://archaea.ucsc.edu) offers a graphical web-based resource for exploration and discovery within archaeal and other selected microbial genomes. By bringing together existing gene annotations, gene expression data, multiple-genome alignments, pre-computed sequence comparisons and other specialized analysis tracks, the genome browser is a powerful aggregator of varied genomic information. The genome browser environment maintains the current look-and-feel of the vertebrate UCSC Genome Browser, but also integrates archaeal and bacterial-specific tracks with a few graphic display enhancements. The browser currently contains 115 archaeal genomes, plus 31 genomes of viruses known to infect archaea. Some of the recently developed or enhanced tracks visualize data from published high-throughput RNA-sequencing studies, the NCBI Conserved Domain Database, sequences from pre-genome sequencing studies, predicted gene boundaries from three different protein gene prediction algorithms, tRNAscan-SE gene predictions with RNA secondary structures and CRISPR locus predictions. We have also developed a companion resource, the Archaeal COG Browser, to provide better search and display of arCOG gene function classifications, including their phylogenetic distribution among available archaeal genomes.",2011-11-12 +25725062,PathCards: multi-source consolidation of human biological pathways. ,"The study of biological pathways is key to a large number of systems analyses. However, many relevant tools consider a limited number of pathway sources, missing out on many genes and gene-to-gene connections. Simply pooling several pathways sources would result in redundancy and the lack of systematic pathway interrelations. To address this, we exercised a combination of hierarchical clustering and nearest neighbor graph representation, with judiciously selected cutoff values, thereby consolidating 3215 human pathways from 12 sources into a set of 1073 SuperPaths. Our unification algorithm finds a balance between reducing redundancy and optimizing the level of pathway-related informativeness for individual genes. We show a substantial enhancement of the SuperPaths' capacity to infer gene-to-gene relationships when compared with individual pathway sources, separately or taken together. Further, we demonstrate that the chosen 12 sources entail nearly exhaustive gene coverage. The computed SuperPaths are presented in a new online database, PathCards, showing each SuperPath, its constituent network of pathways, and its contained genes. This provides researchers with a rich, searchable systems analysis resource. Database URL: http://pathcards.genecards.org/",2015-02-27 +,"Two new species of fossil Corethrella Coquillett from Late Eocene Rovno amber, with a species‐level phylogeny for the family based on morphological traits (Diptera: Corethrellidae)","Species in the genus Corethrella Coquillett, the only genus in the family Corethrellidae, feed on the blood of frogs and toads and are unique among Diptera in locating their hosts by sound. Their fossil record goes back to the Cretaceous and comprises seven previously described species from various amber deposits. Herein, we present the first records of Corethrella from Late Eocene Rovno amber (35 Ma), with the description of two new species: Corethrella sontagae sp.n. based on a single male, and Corethrella rovnoensis sp.n. based on a single female. A revised key to fossil species of Corethrella is given. Phylogenetic relationships within Corethrella are investigated based on Bayesian and parsimony analysis, including the new data. All species groups recognized by Borkent (2008) are recovered as monophyletic, with the exception of the drakensbergensis group, which is found to be a paraphyletic grade. Both of the newly described species can be accommodated within modern species groups: C. sontagae shows affinities with the quadrivittata group and C. rovnoensis with the rotunda group. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:AE394C59‐775F‐42DC‐A5A8‐AF9C91E16464.",2016-07-01 +25708381,A systems biology approach toward understanding seed composition in soybean.,"

Background

The molecular, biochemical, and genetic mechanisms that regulate the complex metabolic network of soybean seed development determine the ultimate balance of protein, lipid, and carbohydrate stored in the mature seed. Many of the genes and metabolites that participate in seed metabolism are unknown or poorly defined; even more remains to be understood about the regulation of their metabolic networks. A global omics analysis can provide insights into the regulation of seed metabolism, even without a priori assumptions about the structure of these networks.

Results

With the future goal of predictive biology in mind, we have combined metabolomics, transcriptomics, and metabolic flux technologies to reveal the global developmental and metabolic networks that determine the structure and composition of the mature soybean seed. We have coupled this global approach with interactive bioinformatics and statistical analyses to gain insights into the biochemical programs that determine soybean seed composition. For this purpose, we used Plant/Eukaryotic and Microbial Metabolomics Systems Resource (PMR, http://www.metnetdb.org/pmr, a platform that incorporates metabolomics data to develop hypotheses concerning the organization and regulation of metabolic networks, and MetNet systems biology tools http://www.metnetdb.org for plant omics data, a framework to enable interactive visualization of metabolic and regulatory networks.

Conclusions

This combination of high-throughput experimental data and bioinformatics analyses has revealed sets of specific genes, genetic perturbations and mechanisms, and metabolic changes that are associated with the developmental variation in soybean seed composition. Researchers can explore these metabolomics and transcriptomics data interactively at PMR.",2015-01-29 +31284879,ImtRDB: a database and software for mitochondrial imperfect interspersed repeats annotation.,"

Background

Mitochondria is a powerhouse of all eukaryotic cells that have its own circular DNA (mtDNA) encoding various RNAs and proteins. Somatic perturbations of mtDNA are accumulating with age thus it is of great importance to uncover the main sources of mtDNA instability. Recent analyses demonstrated that somatic mtDNA deletions depend on imperfect repeats of various nature between distant mtDNA segments. However, till now there are no comprehensive databases annotating all types of imperfect repeats in numerous species with sequenced complete mitochondrial genome as well as there are no algorithms capable to call all types of imperfect repeats in circular mtDNA.

Results

We implemented naïve algorithm of pattern recognition by analogy to standard dot-plot construction procedures allowing us to find both perfect and imperfect repeats of four main types: direct, inverted, mirror and complementary. Our algorithm is adapted to specific characteristics of mtDNA such as circularity and an excess of short repeats - it calls imperfect repeats starting from the length of 10 b.p. We constructed interactive web available database ImtRDB depositing perfect and imperfect repeats positions in mtDNAs of more than 3500 Vertebrate species. Additional tools, such as visualization of repeats within a genome, comparison of repeat densities among different genomes and a possibility to download all results make this database useful for many biologists. Our first analyses of the database demonstrated that mtDNA imperfect repeats (i) are usually short; (ii) associated with unfolded DNA structures; (iii) four types of repeats positively correlate with each other forming two equivalent pairs: direct and mirror versus inverted and complementary, with identical nucleotide content and similar distribution between species; (iv) abundance of repeats is negatively associated with GC content; (v) dinucleotides GC versus CG are overrepresented on light chain of mtDNA covered by repeats.

Conclusions

ImtRDB is available at http://bioinfodbs.kantiana.ru/ImtRDB/ . It is accompanied by the software calling all types of interspersed repeats with different level of degeneracy in circular DNA. This database and software can become a very useful tool in various areas of mitochondrial and chloroplast DNA research.",2019-05-08 +31067213,EAT-Rice: A predictive model for flanking gene expression of T-DNA insertion activation-tagged rice mutants by machine learning approaches.,"T-DNA activation-tagging technology is widely used to study rice gene functions. When T-DNA inserts into genome, the flanking gene expression may be altered using CaMV 35S enhancer, but the affected genes still need to be validated by biological experiment. We have developed the EAT-Rice platform to predict the flanking gene expression of T-DNA insertion site in rice mutants. The three kinds of DNA sequences including UPS1K, DISTANCE, and MIDDLE were retrieved to encode and build a forecast model of two-layer machine learning. In the first-layer models, the features nucleotide context (N-gram), cis-regulatory elements (Motif), nucleotide physicochemical properties (NPC), and CG-island (CGI) were used to build SVM models by analysing the concealed information embedded within the three kinds of sequences. Logistic regression was used to estimate the probability of gene activation which as feature-encoding weighting within first-layer model. In the second-layer models, the NaiveBayesUpdateable algorithm was used to integrate these first layer-models, and the system performance was 88.33% on 5-fold cross-validation, and 79.17% on independent-testing finally. In the three kinds of sequences, the model constructed by Middle had the best contribution to the system for identifying the activated genes. The EAT-Rice system provided better performance and gene expression prediction at further distances when compared to the TRIM database. An online server based on EAT-rice is available at http://predictor.nchu.edu.tw/EAT-Rice.",2019-05-08 +31891522,Occurrence of Selected Organic Contaminants in Edible Insects and Assessment of Their Chemical Safety.,"Background: Feeding the continuously growing world population is challenging, and edible insects offer a sustainable alternative to conventional sources of animal proteins. As with any food source, the potential presence of hazardous organic chemicals, such as persistent organic pollutants (POPs), plasticizers and flame retardants (FRs), must be investigated to guarantee consumer chemical safety.

Objectives: Here, we have investigated the contamination levels of several classes of organic compounds in edible insects. To evaluate their chemical safety, a dietary exposure risk assessment was then performed by combining the measured chemical contamination with the most recent food consumption data from local surveys.

Methods: Insect samples, belonging to six orders (Orthoptera, Coleoptera, Lepidoptera, Hemiptera, Odonata, Hymenoptera) were purchased from five European and three Asian countries. POPs and halogenated FRs were analyzed by gas chromatography-mass spectrometry (GC/MS) and organophosphorus FRs and plasticizers were quantified by liquid chromatography-MS/MS, according to validated protocols.

Results: The overall levels of chemical contamination varied greatly among the insect orders and country of purchase, but they were generally low and comparable with other commonly consumed animal products.

Discussion: Here we show that, besides the activities during rearing, the industrial post-harvesting handling and addition of ingredients are supplementary factors influencing the chemical load of the final insect food-product. The total estimated dietary intakes of the considered classes of compounds through insect consumption are comparable with those generally assessed in common food of animal origin worldwide and, when compared with existing reference dose values, suggest that the risk of adverse health effects from exposure to the targeted organic compounds via insect consumption is unlikely. https://doi.org/10.1289/EHP5782.",2019-12-31 +26611085,"Discovery of Sound in the Sea: Resources for Educators, Students, the Public, and Policymakers.","There is increasing concern about the effects of underwater sound on marine life. However, the science of sound is challenging. The Discovery of Sound in the Sea (DOSITS) Web site ( http://www.dosits.org ) was designed to provide comprehensive scientific information on underwater sound for the public and educational and media professionals. It covers the physical science of underwater sound and its use by people and marine animals for a range of tasks. Celebrating 10 years of online resources, DOSITS continues to develop new material and improvements, providing the best resource for the most up-to-date information on underwater sound and its potential effects.",2016-01-01 +29549628,IIKmTA: Inter and Intra Kingdom miRNA-Target Analyzer.,"BACKGROUND:Growing evidences suggest that microRNAs (miRNAs) can efficiently regulate gene expression at intracellular and extracellular levels. It has been previously reported that plant/food-derived miRNAs are highly enriched in human serum or serum from phytophagous animals, and they are responsible for regulating mammalian gene expression. Thus, miRNAs could function as active signaling molecules, which carry information across distinct species or even kingdoms. However, the mode of miRNA shuttling among various organisms is still a mystery to unravel. The intra and inter kingdom miRNA transfer has boosted up the hypothesis about the potential impact of plant or animal miRNAs on each other. To our knowledge, the software for analyzing cross-kingdom miRNA-targets is lacking. RESULTS:We have developed a web-tool ""IIKmTA: Inter and Intra Kingdom miRNA-Target Analyzer"" utilizing a database; the data of which have been collected from another web server. Here, user can analyze the targeting potential of (i) plant miRNAs on animal UTRs (Untranslated regions), and vice versa (i.e., inter kingdom), (ii) plant miRNAs on plant UTRs and animal miRNAs on animal UTRs (i.e., intra kingdom). Further, user can analyze (i) miRNAs to targets, (ii) targets to miRNAs, and (iii) miRNA sets targeting sets of targets. For a wide variety of animal and plant species, IIKmTA can identify the miRNA binding sites in the probable target UTRs. Moreover, GC% and AU% of miRNAs will be calculated. All the results can be saved as .csv file. CONCLUSIONS:Recent researches identified miRNAs in plants and human secretions and their role in regulating the human genes. Such findings indicate the therapeutic role of secretory miRNAs of such plants which exhibits medicinal value and in near future many diseases may be treated by consumption of these plant miRNAs through food. Using our newly developed database and analyzing tool, one can easily determine the different relationships between miRNAs and their targets across kingdoms. IIKmTA is freely available at http://www.bioinformatics.org/iikmta/ .",2018-03-16 +27284060,InteractoMIX: a suite of computational tools to exploit interactomes in biological and clinical research.,"Virtually all the biological processes that occur inside or outside cells are mediated by protein-protein interactions (PPIs). Hence, the charting and description of the PPI network, initially in organisms, the interactome, but more recently in specific tissues, is essential to fully understand cellular processes both in health and disease. The study of PPIs is also at the heart of renewed efforts in the medical and biotechnological arena in the quest of new therapeutic targets and drugs. Here, we present a mini review of 11 computational tools and resources tools developed by us to address different aspects of PPIs: from interactome level to their atomic 3D structural details. We provided details on each specific resource, aims and purpose and compare with equivalent tools in the literature. All the tools are presented in a centralized, one-stop, web site: InteractoMIX (http://interactomix.com).",2016-06-01 +28750104,BrEPS 2.0: Optimization of sequence pattern prediction for enzyme annotation.,"The prediction of gene functions is crucial for a large number of different life science areas. Faster high throughput sequencing techniques generate more and larger datasets. The manual annotation by classical wet-lab experiments is not suitable for these large amounts of data. We showed earlier that the automatic sequence pattern-based BrEPS protocol, based on manually curated sequences, can be used for the prediction of enzymatic functions of genes. The growing sequence databases provide the opportunity for more reliable patterns, but are also a challenge for the implementation of automatic protocols. We reimplemented and optimized the BrEPS pattern generation to be applicable for larger datasets in an acceptable timescale. Primary improvement of the new BrEPS protocol is the enhanced data selection step. Manually curated annotations from Swiss-Prot are used as reliable source for function prediction of enzymes observed on protein level. The pool of sequences is extended by highly similar sequences from TrEMBL and SwissProt. This allows us to restrict the selection of Swiss-Prot entries, without losing the diversity of sequences needed to generate significant patterns. Additionally, a supporting pattern type was introduced by extending the patterns at semi-conserved positions with highly similar amino acids. Extended patterns have an increased complexity, increasing the chance to match more sequences, without losing the essential structural information of the pattern. To enhance the usability of the database, we introduced enzyme function prediction based on consensus EC numbers and IUBMB enzyme nomenclature. BrEPS is part of the Braunschweig Enzyme Database (BRENDA) and is available on a completely redesigned website and as download. The database can be downloaded and used with the BrEPScmd command line tool for large scale sequence analysis. The BrEPS website and downloads for the database creation tool, command line tool and database are freely accessible at http://breps.tu-bs.de.",2017-07-27 +31939685,"Hourly Exposure to Ultrafine Particle Metrics and the Onset of Myocardial Infarction in Augsburg, Germany.","BACKGROUND:Epidemiological evidence on the health effects of ultrafine particles (UFP) remains insufficient to infer a causal relationship that is largely due to different size ranges and exposure metrics examined across studies. Moreover, evidence regarding the association between UFP and cardiovascular disease at a sub-daily timescale is lacking. OBJECTIVE:We investigated the relationship between different particle metrics, including particle number (PNC), length (PLC), and surface area (PSC) concentrations, and myocardial infarction (MI) at an hourly timescale. METHODS:We collected hourly air pollution and meteorological data from fixed urban background monitoring sites and hourly nonfatal MI cases from a MI registry in Augsburg, Germany, during 2005-2015. We conducted a time-stratified case-crossover analysis with conditional logistic regression to estimate the association between hourly particle metrics and MI cases, adjusted for air temperature and relative humidity. We also examined the independent effects of a certain particle metric in two-pollutant models by adjusting for copollutants, including particulate matter (PM) with an aerodynamic diameter of ≤10μm or 2.5μm (PM10 and PM2.5, respectively), nitrogen dioxide, ozone, and black carbon. RESULTS:Overall, a total of 5,898 cases of nonfatal MI cases were recorded. Exploratory analyses showed similar associations across particle metrics in the first 6-12 h. For example, interquartile range increases in PNC within the size range of 10-100 nm, PLC, and PSC were associated with an increase of MI 6 h later by 3.27% [95% confidence interval (CI): 0.27, 6.37], 5.71% (95% CI: 1.79, 9.77), and 5.84% (95% CI: 1.04, 10.87), respectively. Positive, albeit imprecise, associations were observed for PNC within the size range of 10-30 nm and 100-500 nm. Effect estimates for PLC and PSC remained similar after adjustment for PM and gaseous pollutants. CONCLUSIONS:Transient exposure to particle number, length, and surface area concentrations or other potentially related exposures may trigger the onset of nonfatal myocardial infraction. https://doi.org/10.1289/EHP5478.",2020-01-15 +32074458,"Exposure to Air Pollution during Pregnancy and Childhood, and White Matter Microstructure in Preadolescents.","BACKGROUND:Air pollution has been related to brain structural alterations, but a relationship with white matter microstructure is unclear. OBJECTIVES:We assessed whether pregnancy and childhood exposures to air pollution are related to white matter microstructure in preadolescents. METHODS:We used data of 2,954 children from the Generation R Study, a population-based birth cohort from Rotterdam, Netherlands (2002-2006). Concentrations of 17 air pollutants including nitrogen oxides (NOX), particulate matter (PM), and components of PM were estimated at participants' homes during pregnancy and childhood using land-use regression models. Diffusion tensor images were obtained at child's 9-12 years of age, and fractional anisotropy (FA) and mean diffusivity (MD) were computed. We performed linear regressions adjusting for socioeconomic and lifestyle characteristics. Single-pollutant analyses were followed by multipollutant analyses using the Deletion/Substitution/Addition (DSA) algorithm. RESULTS:In the single-pollutant analyses, higher concentrations of several air pollutants during pregnancy or childhood were associated with significantly lower FA or higher MD (p<0.05). In multipollutant models of pregnancy exposures selected by DSA, higher concentration of fine particles was associated with significantly lower FA [-0.71 (95% CI: -1.26, -0.16) per 5 μg/m3 fine particles] and higher concentration of elemental silicon with significantly higher MD [0.06 (95% CI: 0.01, 0.11) per 100 ng/m3 silicon]. Multipollutant models of childhood exposures selected by DSA indicated significant associations of NOX with FA [-0.14 (95% CI: -0.23, -0.04) per 20-μg/m3 NOX increase], and of elemental zinc and the oxidative potential of PM with MD [0.03 (95% CI: 0.01, 0.04) per 10-ng/m3 zinc increase and 0.07 (95% CI: 0.00, 0.44) per 1-nmol DTT/min/m3 oxidative potential increase]. Mutually adjusted models of significant exposures during pregnancy and childhood indicated significant associations of silicon during pregnancy, and zinc during childhood, with MD. DISCUSSION:Exposure in pregnancy and childhood to air pollutants from tailpipe and non-tailpipe emissions were associated with lower FA and higher MD in white matter of preadolescents. https://doi.org/10.1289/EHP4709.",2020-02-13 +29293498,Improving pairwise comparison of protein sequences with domain co-occurrence.,"Comparing and aligning protein sequences is an essential task in bioinformatics. More specifically, local alignment tools like BLAST are widely used for identifying conserved protein sub-sequences, which likely correspond to protein domains or functional motifs. However, to limit the number of false positives, these tools are used with stringent sequence-similarity thresholds and hence can miss several hits, especially for species that are phylogenetically distant from reference organisms. A solution to this problem is then to integrate additional contextual information to the procedure. Here, we propose to use domain co-occurrence to increase the sensitivity of pairwise sequence comparisons. Domain co-occurrence is a strong feature of proteins, since most protein domains tend to appear with a limited number of other domains on the same protein. We propose a method to take this information into account in a typical BLAST analysis and to construct new domain families on the basis of these results. We used Plasmodium falciparum as a case study to evaluate our method. The experimental findings showed an increase of 14% of the number of significant BLAST hits and an increase of 25% of the proteome area that can be covered with a domain. Our method identified 2240 new domains for which, in most cases, no model of the Pfam database could be linked. Moreover, our study of the quality of the new domains in terms of alignment and physicochemical properties show that they are close to that of standard Pfam domains. Source code of the proposed approach and supplementary data are available at: https://gite.lirmm.fr/menichelli/pairwise-comparison-with-cooccurrence.",2018-01-02 +31996129,pSpatiocyte: a high-performance simulator for intracellular reaction-diffusion systems.,"BACKGROUND:Studies using quantitative experimental methods have shown that intracellular spatial distribution of molecules plays a central role in many cellular systems. Spatially resolved computer simulations can integrate quantitative data from these experiments to construct physically accurate models of the systems. Although computationally expensive, microscopic resolution reaction-diffusion simulators, such as Spatiocyte can directly capture intracellular effects comprising diffusion-limited reactions and volume exclusion from crowded molecules by explicitly representing individual diffusing molecules in space. To alleviate the steep computational cost typically associated with the simulation of large or crowded intracellular compartments, we present a parallelized Spatiocyte method called pSpatiocyte. RESULTS:The new high-performance method employs unique parallelization schemes on hexagonal close-packed (HCP) lattice to efficiently exploit the resources of common workstations and large distributed memory parallel computers. We introduce a coordinate system for fast accesses to HCP lattice voxels, a parallelized event scheduler, a parallelized Gillespie's direct-method for unimolecular reactions, and a parallelized event for diffusion and bimolecular reaction processes. We verified the correctness of pSpatiocyte reaction and diffusion processes by comparison to theory. To evaluate the performance of pSpatiocyte, we performed a series of parallelized diffusion runs on the RIKEN K computer. In the case of fine lattice discretization with low voxel occupancy, pSpatiocyte exhibited 74% parallel efficiency and achieved a speedup of 7686 times with 663552 cores compared to the runtime with 64 cores. In the weak scaling performance, pSpatiocyte obtained efficiencies of at least 60% with up to 663552 cores. When executing the Michaelis-Menten benchmark model on an eight-core workstation, pSpatiocyte required 45- and 55-fold shorter runtimes than Smoldyn and the parallel version of ReaDDy, respectively. As a high-performance application example, we study the dual phosphorylation-dephosphorylation cycle of the MAPK system, a typical reaction network motif in cell signaling pathways. CONCLUSIONS:pSpatiocyte demonstrates good accuracies, fast runtimes and a significant performance advantage over well-known microscopic particle methods in large-scale simulations of intracellular reaction-diffusion systems. The source code of pSpatiocyte is available at https://spatiocyte.org.",2020-01-29 +29893814,SSMART: sequence-structure motif identification for RNA-binding proteins.,"

Motivation

RNA-binding proteins (RBPs) regulate every aspect of RNA metabolism and function. There are hundreds of RBPs encoded in the eukaryotic genomes, and each recognize its RNA targets through a specific mixture of RNA sequence and structure properties. For most RBPs, however, only a primary sequence motif has been determined, while the structure of the binding sites is uncharacterized.

Results

We developed SSMART, an RNA motif finder that simultaneously models the primary sequence and the structural properties of the RNA targets sites. The sequence-structure motifs are represented as consensus strings over a degenerate alphabet, extending the IUPAC codes for nucleotides to account for secondary structure preferences. Evaluation on synthetic data showed that SSMART is able to recover both sequence and structure motifs implanted into 3'UTR-like sequences, for various degrees of structured/unstructured binding sites. In addition, we successfully used SSMART on high-throughput in vivo and in vitro data, showing that we not only recover the known sequence motif, but also gain insight into the structural preferences of the RBP.

Availability and implementation

SSMART is freely available at https://ohlerlab.mdc-berlin.de/software/SSMART_137/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +29688353,Fungal Stress Database (FSD)--a repository of fungal stress physiological data. ,"The construction of the Fungal Stress Database (FSD) was initiated and fueled by two major goals. At first, some outstandingly important groups of filamentous fungi including the aspergilli possess remarkable capabilities to adapt to a wide spectrum of environmental stress conditions but the underlying mechanisms of this stress tolerance have remained yet to be elucidated. Furthermore, the lack of any satisfactory interlaboratory standardization of stress assays, e.g. the widely used stress agar plate experiments, often hinders the direct comparison and discussion of stress physiological data gained for various fungal species by different research groups. In order to overcome these difficulties and to promote multilevel, e.g. combined comparative physiology-based and comparative genomics-based, stress research in filamentous fungi, we constructed FSD, which currently stores 1412 photos taken on Aspergillus colonies grown under precisely defined stress conditions. This study involved altogether 18 Aspergillus strains representing 17 species with two different strains for Aspergillus niger and covered six different stress conditions. Stress treatments were selected considering the frequency of various stress tolerance studies published in the last decade in the aspergilli and included oxidative (H2O2, menadione sodium bisulphite), high-osmolarity (NaCl, sorbitol), cell wall integrity (Congo Red) and heavy metal (CdCl2) stress exposures. In the future, we would like to expand this database to accommodate further fungal species and stress treatments.URL: http://www.fung-stress.org/",2018-01-01 +25380960,IntSide: a web server for the chemical and biological examination of drug side effects.,"

Summary

Drug side effects are one of the main health threats worldwide, and an important obstacle in drug development. Understanding how adverse reactions occur requires knowledge on drug mechanisms at the molecular level. Despite recent advances, the need for tools and methods that facilitate side effect anticipation still remains. Here, we present IntSide, a web server that integrates chemical and biological information to elucidate the molecular mechanisms underlying drug side effects. IntSide currently catalogs 1175 side effects caused by 996 drugs, associated with drug features divided into eight categories, belonging to either biology or chemistry. On the biological side, IntSide reports drug targets and off-targets, pathways, molecular functions and biological processes. From a chemical viewpoint, it includes molecular fingerprints, scaffolds and chemical entities. Finally, we also integrate additional biological data, such as protein interactions and disease-related genes, to facilitate mechanistic interpretations.

Availability and implementation

Our data and web resource are available online (http://intside.irbbarcelona.org/).

Contact

patrick.aloy@irbbarcelona.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-17 +29040692,RMBase v2.0: deciphering the map of RNA modifications from epitranscriptome sequencing data.,"More than 100 distinct chemical modifications to RNA have been characterized so far. However, the prevalence, mechanisms and functions of various RNA modifications remain largely unknown. To provide transcriptome-wide landscapes of RNA modifications, we developed the RMBase v2.0 (http://rna.sysu.edu.cn/rmbase/), which is a comprehensive database that integrates epitranscriptome sequencing data for the exploration of post-transcriptional modifications of RNAs and their relationships with miRNA binding events, disease-related single-nucleotide polymorphisms (SNPs) and RNA-binding proteins (RBPs). RMBase v2.0 was expanded with ∼600 datasets and ∼1 397 000 modification sites from 47 studies among 13 species, which represents an approximately 10-fold expansion when compared with the previous release. It contains ∼1 373 000 N6-methyladenosines (m6A), ∼5400 N1-methyladenosines (m1A), ∼9600 pseudouridine (Ψ) modifications, ∼1000 5-methylcytosine (m5C) modifications, ∼5100 2'-O-methylations (2'-O-Me), and ∼2800 modifications of other modification types. Moreover, we built a new module called 'Motif' that provides the visualized logos and position weight matrices (PWMs) of the modification motifs. We also constructed a novel module termed 'modRBP' to study the relationships between RNA modifications and RBPs. Additionally, we developed a novel web-based tool named 'modMetagene' to plot the metagenes of RNA modification along a transcript model. This database will help researchers investigate the potential functions and mechanisms of RNA modifications.",2018-01-01 +30849020,The Effect of Gaussian Noise on Maximum Likelihood Fitting of Gompertz and Weibull Mortality Models with Yeast Lifespan Data.,"Background/study context: Empirical lifespan data sets are often studied with the best-fitted mathematical model for aging. Here, we studied how experimental noises can influence the determination of the best-fitted aging model. We investigated the influence of Gaussian white noise in lifespan data sets on the fitting outcomes of two-parameter Gompertz and Weibull mortality models, commonly adopted in aging research.

Methods

To un-equivocally demonstrate the effect of Gaussian white noises, we simulated lifespans based on Gompertz and Weibull models with added white noises. To gauge the influence of white noise on model fitting, we defined a single index, δLL , for the difference between the maximal log-likelihoods of the Weibull and Gompertz model fittings. We then applied the δLL approach using experimental replicative lifespan data sets for the laboratory BY4741 and BY4742 wildtype reference strains.

Results

We systematically evaluated how Gaussian white noise can influence the maximal likelihood-based comparison of the Gompertz and Weibull models. Our comparative study showed that the Weibull model is generally more tolerant to Gaussian white noise than the Gompertz model. The effect of noise on model fitting is also sensitive to model parameters.

Conclusion

Our study shows that Gaussian white noise can influence the fitting of an aging model for yeast replicative lifespans. Given that yeast replicative lifespans are hard to measure and are often pooled from different experiments, our study highlights that interpreting model fitting results should take experimental procedure variation into account, and the best fitting model may not necessarily offer more biological insights.",2019-03-08 +27412095,SSBD: a database of quantitative data of spatiotemporal dynamics of biological phenomena.,"

Motivation

Rapid advances in live-cell imaging analysis and mathematical modeling have produced a large amount of quantitative data on spatiotemporal dynamics of biological objects ranging from molecules to organisms. There is now a crucial need to bring these large amounts of quantitative biological dynamics data together centrally in a coherent and systematic manner. This will facilitate the reuse of this data for further analysis.

Results

We have developed the Systems Science of Biological Dynamics database (SSBD) to store and share quantitative biological dynamics data. SSBD currently provides 311 sets of quantitative data for single molecules, nuclei and whole organisms in a wide variety of model organisms from Escherichia coli to Mus musculus The data are provided in Biological Dynamics Markup Language format and also through a REST API. In addition, SSBD provides 188 sets of time-lapse microscopy images from which the quantitative data were obtained and software tools for data visualization and analysis.

Availability and implementation

SSBD is accessible at http://ssbd.qbic.riken.jp CONTACT: sonami@riken.jp.",2016-07-13 +28596423,"Integration of over 9,000 mass spectrometry experiments builds a global map of human protein complexes.","Macromolecular protein complexes carry out many of the essential functions of cells, and many genetic diseases arise from disrupting the functions of such complexes. Currently, there is great interest in defining the complete set of human protein complexes, but recent published maps lack comprehensive coverage. Here, through the synthesis of over 9,000 published mass spectrometry experiments, we present hu.MAP, the most comprehensive and accurate human protein complex map to date, containing > 4,600 total complexes, > 7,700 proteins, and > 56,000 unique interactions, including thousands of confident protein interactions not identified by the original publications. hu.MAP accurately recapitulates known complexes withheld from the learning procedure, which was optimized with the aid of a new quantitative metric (k-cliques) for comparing sets of sets. The vast majority of complexes in our map are significantly enriched with literature annotations, and the map overall shows improved coverage of many disease-associated proteins, as we describe in detail for ciliopathies. Using hu.MAP, we predicted and experimentally validated candidate ciliopathy disease genes in vivo in a model vertebrate, discovering CCDC138, WDR90, and KIAA1328 to be new cilia basal body/centriolar satellite proteins, and identifying ANKRD55 as a novel member of the intraflagellar transport machinery. By offering significant improvements to the accuracy and coverage of human protein complexes, hu.MAP (http://proteincomplexes.org) serves as a valuable resource for better understanding the core cellular functions of human proteins and helping to determine mechanistic foundations of human disease.",2017-06-08 +29511608,Ligand complex structures of l-amino acid oxidase/monooxygenase from Pseudomonas sp. AIU 813 and its conformational change.,"l-Amino acid oxidase/monooxygenase from Pseudomonas sp. AIU 813 (l-AAO/MOG) catalyzes both the oxidative deamination and oxidative decarboxylation of the α-group of l-Lys to produce a keto acid and amide, respectively. l-AAO/MOG exhibits limited specificity for l-amino acid substrates with a basic side chain. We previously determined its ligand-free crystal structure and identified a key residue for maintaining the dual activities. Here, we determined the structures of l-AAO/MOG complexed with l-Lys, l-ornithine, and l-Arg and revealed its substrate recognition. Asp238 is located at the ceiling of a long hydrophobic pocket and forms a strong interaction with the terminal, positively charged group of the substrates. A mutational analysis on the D238A mutant indicated that the interaction is critical for substrate binding but not for catalytic control between the oxidase/monooxygenase activities. The catalytic activities of the D238E mutant unexpectedly increased, while the D238F mutant exhibited altered substrate specificity to long hydrophobic substrates. In the ligand-free structure, there are two channels connecting the active site and solvent, and a short region located at the dimer interface is disordered. In the l-Lys complex structure, a loop region is displaced to plug the channels. Moreover, the disordered region in the ligand-free structure forms a short helix in the substrate complex structures and creates the second binding site for the substrate. It is assumed that the amino acid substrate enters the active site of l-AAO/MOG through this route.

Database

The atomic coordinates and structure factors (codes 5YB6, 5YB7, and 5YB8) have been deposited in the Protein Data Bank (http://wwpdb.org/).

Ec numbers

1.4.3.2 (l-amino acid oxidase), 1.13.12.2 (lysine 2-monooxygenase).",2018-02-08 +25161219,Lambda: the local aligner for massive biological data.,"

Motivation

Next-generation sequencing technologies produce unprecedented amounts of data, leading to completely new research fields. One of these is metagenomics, the study of large-size DNA samples containing a multitude of diverse organisms. A key problem in metagenomics is to functionally and taxonomically classify the sequenced DNA, to which end the well-known BLAST program is usually used. But BLAST has dramatic resource requirements at metagenomic scales of data, imposing a high financial or technical burden on the researcher. Multiple attempts have been made to overcome these limitations and present a viable alternative to BLAST.

Results

In this work we present Lambda, our own alternative for BLAST in the context of sequence classification. In our tests, Lambda often outperforms the best tools at reproducing BLAST's results and is the fastest compared with the current state of the art at comparable levels of sensitivity.

Availability and implementation

Lambda was implemented in the SeqAn open-source C++ library for sequence analysis and is publicly available for download at http://www.seqan.de/projects/lambda.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +23245335,Full-length transcriptome-based H-InvDB throws a new light on chromosome-centric proteomics.,"H-Invitational Database (H-InvDB; http://hinv.jp/ ) is an integrated database of all human genes and transcripts that started in an international collaborative research project for establishing a functional annotation database of human full-length cDNAs. Because H-InvDB contains an abundance of information for human transcripts, including not only well-characterized protein-coding transcripts but also those without experimental evidence at the protein level, this will be a useful information resource for identifying novel and uncharacterized human proteins (so-called missing proteins). By extending predicted protein data in H-InvDB, we developed the H-Inv Extended Protein Database (H-EPD; http://hinv.jp/hinv/h-epd/ ). From now on, we plan to carry out a database-driven proteome research that makes full use of H-EPD to promote discoveries in the current and future C-HPP. Furthermore, we will push forward with the integration of genome, transcriptome, and proteome databases using a unique tool for connecting distributed databases and would like to develop a knowledge discovery system by incorporating data mining tools.",2012-12-17 +31503436,Web Interactive Presentation of EPC Reports: A Foray Into Interactive Reports,"

Background

Systematic reviews are consumed by different types of users to address an ever-expanding set of questions and needs. It is unlikely that a single static report will efficiently satisfy the diverse needs of diverse users. It might be practical and satisfactory to give users the ability to generate their own report from the information produced by the systematic review.

Methods

We developed an open-source web-based interactive report presentation (IRP) of a systematic review on 14 nonsurgical treatments for Urinary Incontinence (UI) in women. We used data from a systematic review we conducted through the Agency for Healthcare Research and Quality Evidence-based Practice Center Program for the Patient Centered Outcomes Research Institute. We presented wireframes (mockups) of the IRP to a panel of three Duke Health System stakeholders in teleconferences, and incorporated their feedback in the final IR, which includes a visual representation of the evidence base, and allows users to access generic or detailed descriptive and analytic information through a point-and-click interface. Subsequently, the stakeholders piloted the IRP and evaluated its ease of use by answering open ended questions. We proposed a roadmap for scalable IRPs of systematic reviews starting from a thematic analysis of these suggestions.

Findings

The final tool (accessible at https://to1.infalliblekitty.com/#index) allows users to obtain descriptive and analytic results for a network of 14 treatment categories, various outcomes (cure, improvement, satisfaction, quality of life) and several subgroups (all women, older women, or those with stress or urge UI). Users can access descriptions overall or for specific sets of studies. The stakeholders were satisfied with the functionality of the tool, and commented on its ease of use. They proposed numerous augmentations, which we organized in themes of presentation (e.g., present information on numbers of trials on figures), analytic (e.g., allow on-the-fly subgroup analyses, explore tradeoffs between several outcomes) and information sharing (e.g., ability to import/export data from/to other software). A roadmap to satisfying these augmentations involves the following: (i) integrating analytic capabilities in the tool, (ii) incorporating ways to assess the tradeoffs between several distinct benefits and harms of interventions with minimal assumptions, (iii) defining an information technology standard for evidence synthesis objects (i.e., what information, in in what format, completely describes an evidence-base and its synthesis), and (iv) developing a rigorous representation of evidence synthesis objects as mathematical graphs that can be analyzed.

Conclusions

In sum, we described a foray into an alternative view of a systematic review that complements a static systematic review report. Health system representatives found it useful and practical, and requested analytical, expository, and information sharing augmentations to the tool.",2019-09-11 +30063721,HSRA: Hadoop-based spliced read aligner for RNA sequencing data.,"Nowadays, the analysis of transcriptome sequencing (RNA-seq) data has become the standard method for quantifying the levels of gene expression. In RNA-seq experiments, the mapping of short reads to a reference genome or transcriptome is considered a crucial step that remains as one of the most time-consuming. With the steady development of Next Generation Sequencing (NGS) technologies, unprecedented amounts of genomic data introduce significant challenges in terms of storage, processing and downstream analysis. As cost and throughput continue to improve, there is a growing need for new software solutions that minimize the impact of increasing data volume on RNA read alignment. In this work we introduce HSRA, a Big Data tool that takes advantage of the MapReduce programming model to extend the multithreading capabilities of a state-of-the-art spliced read aligner for RNA-seq data (HISAT2) to distributed memory systems such as multi-core clusters or cloud platforms. HSRA has been built upon the Hadoop MapReduce framework and supports both single- and paired-end reads from FASTQ/FASTA datasets, providing output alignments in SAM format. The design of HSRA has been carefully optimized to avoid the main limitations and major causes of inefficiency found in previous Big Data mapping tools, which cannot fully exploit the raw performance of the underlying aligner. On a 16-node multi-core cluster, HSRA is on average 2.3 times faster than previous Hadoop-based tools. Source code in Java as well as a user's guide are publicly available for download at http://hsra.dec.udc.es.",2018-07-31 +31274316,Kinase Atlas: Druggability Analysis of Potential Allosteric Sites in Kinases.,"The inhibition of kinases has been pursued by the pharmaceutical industry for over 20 years. While the locations of the sites that bind type II and III inhibitors at or near the adenosine 5'-triphosphate binding sites are well defined, the literature describes 10 different regions that were reported as regulatory hot spots in some kinases and thus are potential target sites for type IV inhibitors. Kinase Atlas is a systematic collection of binding hot spots located at the above ten sites in 4910 structures of 376 distinct kinases available in the Protein Data Bank. The hot spots are identified by FTMap, a computational analogue of experimental fragment screening. Users of Kinase Atlas ( https://kinase-atlas.bu.edu ) may view summarized results for all structures of a particular kinase, such as which binding sites are present and how druggable they are, or they may view hot spot information for a particular kinase structure of interest.",2019-07-05 +25339269,A proteome quality index.,"We present the Proteome Quality Index (PQI; http://pqi-list.org), a much-needed resource for users of bacterial and eukaryotic proteomes. Completely sequenced genomes for which there is an available set of protein sequences (the proteome) are given a one- to five-star rating supported by 11 different metrics of quality. The database indexes over 3000 proteomes at the time of writing and is provided via a website for browsing, filtering and downloading. Previous to this work, there was no systematic way to account for the large variability in quality of the thousands of proteomes, and this is likely to have profoundly influenced the outcome of many published studies, in particular large-scale comparative analyses. The lack of a measure of proteome quality is likely due to the difficulty in producing one, a problem that we have approached by integrating multiple metrics. The continued development and improvement of the index will require the contribution of additional metrics by us and by others; the PQI provides a useful point of reference for the scientific community, but it is only the first step towards a 'standard' for the field.",2014-10-22 +26039571,Gene Perturbation Atlas (GPA): a single-gene perturbation repository for characterizing functional mechanisms of coding and non-coding genes.,"Genome-wide transcriptome profiling after gene perturbation is a powerful means of elucidating gene functional mechanisms in diverse contexts. The comprehensive collection and analysis of the resulting transcriptome profiles would help to systematically characterize context-dependent gene functional mechanisms and conduct experiments in biomedical research. To this end, we collected and curated over 3000 transcriptome profiles in human and mouse from diverse gene perturbation experiments, which involved 1585 different perturbed genes (microRNAs, lncRNAs and protein-coding genes) across 1170 different cell lines/tissues. For each profile, we identified differential genes and their associated functions and pathways, constructed perturbation networks, predicted transcription regulation and cancer/drug associations, and assessed cooperative perturbed genes. Based on these transcriptome analyses, the Gene Perturbation Atlas (GPA) can be used to detect (i) novel or cell-specific functions and pathways affected by perturbed genes, (ii) protein interactions and regulatory cascades affected by perturbed genes, and (iii) perturbed gene-mediated cooperative effects. The GPA is a user-friendly database to support the rapid searching and exploration of gene perturbations. Particularly, we visualized functional effects of perturbed genes from multiple perspectives. In summary, the GPA is a valuable resource for characterizing gene functions and regulatory mechanisms after single-gene perturbations. The GPA is freely accessible at http://biocc.hrbmu.edu.cn/GPA/.",2015-06-03 +24288140,"A CTD-Pfizer collaboration: manual curation of 88,000 scientific articles text mined for drug-disease and drug-phenotype interactions.","Improving the prediction of chemical toxicity is a goal common to both environmental health research and pharmaceutical drug development. To improve safety detection assays, it is critical to have a reference set of molecules with well-defined toxicity annotations for training and validation purposes. Here, we describe a collaboration between safety researchers at Pfizer and the research team at the Comparative Toxicogenomics Database (CTD) to text mine and manually review a collection of 88,629 articles relating over 1,200 pharmaceutical drugs to their potential involvement in cardiovascular, neurological, renal and hepatic toxicity. In 1 year, CTD biocurators curated 254,173 toxicogenomic interactions (152,173 chemical-disease, 58,572 chemical-gene, 5,345 gene-disease and 38,083 phenotype interactions). All chemical-gene-disease interactions are fully integrated with public CTD, and phenotype interactions can be downloaded. We describe Pfizer's text-mining process to collate the articles, and CTD's curation strategy, performance metrics, enhanced data content and new module to curate phenotype information. As well, we show how data integration can connect phenotypes to diseases. This curation can be leveraged for information about toxic endpoints important to drug safety and help develop testable hypotheses for drug-disease events. The availability of these detailed, contextualized, high-quality annotations curated from seven decades' worth of the scientific literature should help facilitate new mechanistic screening assays for pharmaceutical compound survival. This unique partnership demonstrates the importance of resource sharing and collaboration between public and private entities and underscores the complementary needs of the environmental health science and pharmaceutical communities. Database URL: http://ctdbase.org/",2013-11-28 +31575245,Shallow bore-hole three-axial fiber Bragg grating strain sensor for Etna volcano monitoring.,"We present the realization, installation, and first results of a three-axial Fiber Bragg Grating (FBG) strain sensor prototype. This sensor has been developed in the framework of the Mediterranean supersite volcanoes (http://www.med-suv.eu, 2013) project and, in particular, with the aim at contributing to the study and monitoring of Etna volcano. The FBG sensor was installed in the facilities of the Serra La Nave Astrophysical Observatory (Catania, Italy) about 7 km south-west from the summit craters, at an elevation of about 1740 m. The three-axial device showed a dynamic range of some hundreds of microstrains with microstrain resolution (submicrostrain concerning the vertical component). That is a good trade-off among performances, cost, and power consumption. The sensor structure and its read-out system are innovative in their assembly and offers practical advantages in comparison with traditional strain meters. As a demonstration of the performances of our device, the data of about 28 months of operation are presented together with the records of some local, regional, and teleseismic events. The sensor along the vertical axis showed to be the best performing one, having a power spectral density of about -90 dB re. 1ε2/Hz around one day period.",2019-09-01 +30759185,Temporal network alignment via GoT-WAVE.,"

Motivation

Network alignment (NA) finds conserved regions between two networks. NA methods optimize node conservation (NC) and edge conservation. Dynamic graphlet degree vectors are a state-of-the-art dynamic NC measure, used within the fastest and most accurate NA method for temporal networks: DynaWAVE. Here, we use graphlet-orbit transitions (GoTs), a different graphlet-based measure of temporal node similarity, as a new dynamic NC measure within DynaWAVE, resulting in GoT-WAVE.

Results

On synthetic networks, GoT-WAVE improves DynaWAVE's accuracy by 30% and speed by 64%. On real networks, when optimizing only dynamic NC, the methods are complementary. Furthermore, only GoT-WAVE supports directed edges. Hence, GoT-WAVE is a promising new temporal NA algorithm, which efficiently optimizes dynamic NC. We provide a user-friendly user interface and source code for GoT-WAVE.

Availability and implementation

http://www.dcc.fc.up.pt/got-wave/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +31437540,Identification of prokaryotic promoters and their strength by integrating heterogeneous features.,"The promoter is a regulatory DNA region and important for gene transcriptional regulation. It is located near the transcription start site (TSS) upstream of the corresponding gene. In the post-genomics era, the availability of data makes it possible to build computational models for robustly detecting the promoters as these models are expected to be helpful for academia and drug discovery. Until recently, developed models focused only on discriminating the sequences into promoter and non-promoter. However, promoter predictors can be further improved by considering weak and strong promoter classification. In this work, we introduce a hybrid model, named iPSW(PseDNC-DL), for identification of prokaryotic promoters and their strength. It combines a convolutional neural network with a pseudo-di-nucleotide composition (PseDNC). The proposed model iPSW(PseDNC-DL) has been evaluated on the benchmark datasets and outperformed the current state-of-the-art models in both tasks namely promoter identification and promoter strength identification. The developed tool iPSW(PseDNC-DL) has been constructed in a web server and made freely available at https://home.jbnu.ac.kr/NSCL/PseDNC-DL.htm.",2019-08-19 +24103152,"The clinical measurement, measurement method and experimental condition ontologies: expansion, improvements and new applications.","

Background

The Clinical Measurement Ontology (CMO), Measurement Method Ontology (MMO), and Experimental Condition Ontology (XCO) were originally developed at the Rat Genome Database (RGD) to standardize quantitative rat phenotype data in order to integrate results from multiple studies into the PhenoMiner database and data mining tool. These ontologies provide the framework for presenting what was measured, how it was measured, and under what conditions it was measured.

Results

There has been a continuing expansion of subdomains in each ontology with a parallel 2-3 fold increase in the total number of terms, substantially increasing the size and improving the scope of the ontologies. The proportion of terms with textual definitions has increased from ~60% to over 80% with greater synchronization of format and content throughout the three ontologies. Representation of definition source Uniform Resource Identifiers (URI) has been standardized, including the removal of all non-URI characters, and systematic versioning of all ontology files has been implemented. The continued expansion and success of these ontologies has facilitated the integration of more than 60,000 records into the RGD PhenoMiner database. In addition, new applications of these ontologies, such as annotation of Quantitative Trait Loci (QTL), have been added at the sites actively using them, including RGD and the Animal QTL Database.

Conclusions

The improvements to these three ontologies have been substantial, and development is ongoing. New terms and expansions to the ontologies continue to be added as a result of active curation efforts at RGD and the Animal QTL database. Use of these vocabularies to standardize data representation for quantitative phenotypes and quantitative trait loci across databases for multiple species has demonstrated their utility for integrating diverse data types from multiple sources. These ontologies are freely available for download and use from the NCBO BioPortal website at http://bioportal.bioontology.org/ontologies/1583 (CMO), http://bioportal.bioontology.org/ontologies/1584 (MMO), and http://bioportal.bioontology.org/ontologies/1585 (XCO), or from the RGD ftp site at ftp://rgd.mcw.edu/pub/ontology/.",2013-10-08 +29726900,Edge-group sparse PCA for network-guided high dimensional data analysis.,"Motivation:Principal component analysis (PCA) has been widely used to deal with high-dimensional gene expression data. In this study, we proposed an Edge-group Sparse PCA (ESPCA) model by incorporating the group structure from a prior gene network into the PCA framework for dimension reduction and feature interpretation. ESPCA enforces sparsity of principal component (PC) loadings through considering the connectivity of gene variables in the prior network. We developed an alternating iterative algorithm to solve ESPCA. The key of this algorithm is to solve a new k-edge sparse projection problem and a greedy strategy has been adapted to address it. Here we adopted ESPCA for analyzing multiple gene expression matrices simultaneously. By incorporating prior knowledge, our method can overcome the drawbacks of sparse PCA and capture some gene modules with better biological interpretations. Results:We evaluated the performance of ESPCA using a set of artificial datasets and two real biological datasets (including TCGA pan-cancer expression data and ENCODE expression data), and compared their performance with PCA and sparse PCA. The results showed that ESPCA could identify more biologically relevant genes, improve their biological interpretations and reveal distinct sample characteristics. Availability and implementation:An R package of ESPCA is available at http://page.amss.ac.cn/shihua.zhang/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +31746643,Association between Heat Exposure and Hospitalization for Diabetes in Brazil during 2000-2015: A Nationwide Case-Crossover Study.,"

Background

Exposure to excessive heat, which will continue to increase with climate change, is associated with increased morbidity due to a range of noncommunicable diseases (NCDs). Whether this is true for diabetes is unknown.

Objectives

We aimed to quantify the relationship between heat exposure and risk of hospitalization due to diabetes in Brazil.

Methods

Data on hospitalizations and weather conditions were collected from 1,814 cities during the hot seasons from 2000 to 2015. A time-stratified case-crossover design was used to quantify the association between hospitalization for diabetes and heat exposure. Region-specific odds ratios (ORs) were used to calculate the attributable fractions (AFs).

Results

A total of 553,351 hospitalizations associated with diabetes were recorded during 2000-2015. Every 5°C increase in daily mean temperature was associated with 6% [OR=1.06; 95% confidence interval (CI): 1.04, 1.07] increase in hospitalization due to diabetes with lag 0-3 d. The association was greatest (OR=1.18; 95% CI: 1.13, 1.23) in those ≥80y of age, but did not vary by sex, and was generally consistent by region and type of diabetes. Assuming a causal association, we estimated that 7.3% (95% CI: 3.5, 10.9) of all hospitalizations due to diabetes in the hot season could be attributed to heat exposure during the study period.

Discussion

Short-term heat exposure may increase the burden of diabetes-related hospitalization, especially among the very elderly. As global temperatures continue to rise, this burden is likely to increase. https://doi.org/10.1289/EHP5688.",2019-11-20 +31459245,How Precise Are Our Quantitative Structure-Activity Relationship Derived Predictions for New Query Chemicals?,"Quantitative structure-activity relationship (QSAR) models have long been used for making predictions and data gap filling in diverse fields including medicinal chemistry, predictive toxicology, environmental fate modeling, materials science, agricultural science, nanoscience, food science, and so forth. Usually a QSAR model is developed based on chemical information of a properly designed training set and corresponding experimental response data while the model is validated using one or more test set(s) for which the experimental response data are available. However, it is interesting to estimate the reliability of predictions when the model is applied to a completely new data set (true external set) even when the new data points are within applicability domain (AD) of the developed model. In the present study, we have categorized the quality of predictions for the test set or true external set into three groups (good, moderate, and bad) based on absolute prediction errors. Then, we have used three criteria [(a) mean absolute error of leave-one-out predictions for 10 most close training compounds for each query molecule; (b) AD in terms of similarity based on the standardization approach; and (c) proximity of the predicted value of the query compound to the mean training response] in different weighting schemes for making a composite score of predictions. It was found that using the most frequently appearing weighting scheme 0.5-0-0.5, the composite score-based categorization showed concordance with absolute prediction error-based categorization for more than 80% test data points while working with 5 different datasets with 15 models for each set derived in three different splitting techniques. These observations were also confirmed with true external sets for another four endpoints suggesting applicability of the scheme to judge the reliability of predictions for new datasets. The scheme has been implemented in a tool ""Prediction Reliability Indicator"" available at http://dtclab.webs.com/software-tools and http://teqip.jdvu.ac.in/QSAR_Tools/DTCLab/, and the tool is presently valid for multiple linear regression models only.",2018-09-19 +31348170,The Diabetes Prevention Impact Tool Kit: An Online Tool Kit to Assess the Cost-Effectiveness of Preventing Type 2 Diabetes.,"The National Diabetes Prevention Program lifestyle change program demonstrated health benefits and potential for health care cost-savings. For many states, employers, and insurers, there is a strong business case for paying for type 2 diabetes prevention, which will likely result in medical and nonmedical cost-savings as well as improved quality of life after a few years. Using an iterative feedback process with multiple stakeholders, the Centers for Disease Control and Prevention developed the Diabetes Prevention Impact Tool kit, https://nccd.cdc.gov/toolkit/diabetesimpact, which forecasts the cost impact the lifestyle change program can have for states, employers, and health insurers. We conducted key informant interviews and a qualitative analysis to evaluate the tool kit. We found that end users recognized its utility for decision making. They valued the detail of the tool kit's underlying calculations and appreciated the option of either using the default settings or revising assumptions based on their own data. The Diabetes Prevention Impact Tool kit can be a helpful tool for organizations that wish to forecast the economic costs and benefits of implementing or covering the National Diabetes Prevention Program lifestyle change program.",2019-09-01 +26902442,A Nurse's Guide to Supporting Physiologic Birth.,"Physiologic birth promotes the practice of normal labor and birth, in which a woman's innate power is supported and unnecessary interventions are avoided. Nurses are in a unique position to support physiologic birth because they attend almost all births. Several resources are available to assist nurses in promoting physiologic birth, including BirthTOOLS.org, a new online resource developed by the American College of Nurse-Midwives in collaboration with other organizations. By using resources such as BirthTOOLS.org and others, nurses can become familiar with the evidence surrounding physiologic birth and can contribute to improved patient safety and quality of care by supporting physiologic birth. http://dx.doi.org/10.1016/j.nwh.2015.12.009.",2016-02-12 +27630202,Establishment of Kawasaki disease database based on metadata standard. ,"Kawasaki disease (KD) is a rare disease that occurs predominantly in infants and young children. To identify KD susceptibility genes and to develop a diagnostic test, a specific therapy, or prevention method, collecting KD patients' clinical and genomic data is one of the major issues. For this purpose, Kawasaki Disease Database (KDD) was developed based on the efforts of Korean Kawasaki Disease Genetics Consortium (KKDGC). KDD is a collection of 1292 clinical data and genomic samples of 1283 patients from 13 KKDGC-participating hospitals. Each sample contains the relevant clinical data, genomic DNA and plasma samples isolated from patients' blood, omics data and KD-associated genotype data. Clinical data was collected and saved using the common data elements based on the ISO/IEC 11179 metadata standard. Two genome-wide association study data of total 482 samples and whole exome sequencing data of 12 samples were also collected. In addition, KDD includes the rare cases of KD (16 cases with family history, 46 cases with recurrence, 119 cases with intravenous immunoglobulin non-responsiveness, and 52 cases with coronary artery aneurysm). As the first public database for KD, KDD can significantly facilitate KD studies. All data in KDD can be searchable and downloadable. KDD was implemented in PHP, MySQL and Apache, with all major browsers supported.Database URL: http://www.kawasakidisease.kr.",2016-07-01 +30967769,Visbrain: A Multi-Purpose GPU-Accelerated Open-Source Suite for Multimodal Brain Data Visualization.,"We present Visbrain, a Python open-source package that offers a comprehensive visualization suite for neuroimaging and electrophysiological brain data. Visbrain consists of two levels of abstraction: (1) objects which represent highly configurable neuro-oriented visual primitives (3D brain, sources connectivity, etc.) and (2) graphical user interfaces for higher level interactions. The object level offers flexible and modular tools to produce and automate the production of figures using an approach similar to that of Matplotlib with subplots. The second level visually connects these objects by controlling properties and interactions through graphical interfaces. The current release of Visbrain (version 0.4.2) contains 14 different objects and three responsive graphical user interfaces, built with PyQt: Signal, for the inspection of time-series and spectral properties, Brain for any type of visualization involving a 3D brain and Sleep for polysomnographic data visualization and sleep analysis. Each module has been developed in tight collaboration with end-users, i.e., primarily neuroscientists and domain experts, who bring their experience to make Visbrain as transparent as possible to the recording modalities (e.g., intracranial EEG, scalp-EEG, MEG, anatomical and functional MRI). Visbrain is developed on top of VisPy, a Python package providing high-performance 2D and 3D visualization by leveraging the computational power of the graphics card. Visbrain is available on Github and comes with a documentation, examples, and datasets (http://visbrain.org).",2019-03-22 +31870283,Rapid classification of group B Streptococcus serotypes based on matrix-assisted laser desorption ionization-time of flight mass spectrometry and machine learning techniques.,"

Background

Group B streptococcus (GBS) is an important pathogen that is responsible for invasive infections, including sepsis and meningitis. GBS serotyping is an essential means for the investigation of possible infection outbreaks and can identify possible sources of infection. Although it is possible to determine GBS serotypes by either immuno-serotyping or geno-serotyping, both traditional methods are time-consuming and labor-intensive. In recent years, the matrix-assisted laser desorption ionization-time of flight mass spectrometry (MALDI-TOF MS) has been reported as an effective tool for the determination of GBS serotypes in a more rapid and accurate manner. Thus, this work aims to investigate GBS serotypes by incorporating machine learning techniques with MALDI-TOF MS to carry out the identification.

Results

In this study, a total of 787 GBS isolates, obtained from three research and teaching hospitals, were analyzed by MALDI-TOF MS, and the serotype of the GBS was determined by a geno-serotyping experiment. The peaks of mass-to-charge ratios were regarded as the attributes to characterize the various serotypes of GBS. Machine learning algorithms, such as support vector machine (SVM) and random forest (RF), were then used to construct predictive models for the five different serotypes (Types Ia, Ib, III, V, and VI). After optimization of feature selection and model generation based on training datasets, the accuracies of the selected models attained 54.9-87.1% for various serotypes based on independent testing data. Specifically, for the major serotypes, namely type III and type VI, the accuracies were 73.9 and 70.4%, respectively.

Conclusion

The proposed models have been adopted to implement a web-based tool (GBSTyper), which is now freely accessible at http://csb.cse.yzu.edu.tw/GBSTyper/, for providing efficient and effective detection of GBS serotypes based on a MALDI-TOF MS spectrum. Overall, this work has demonstrated that the combination of MALDI-TOF MS and machine intelligence could provide a practical means of clinical pathogen testing.",2019-12-24 +28365723,SilkPathDB: a comprehensive resource for the study of silkworm pathogens. ,"Silkworm pathogens have been heavily impeding the development of sericultural industry and play important roles in lepidopteran ecology, and some of which are used as biological insecticides. Rapid advances in studies on the omics of silkworm pathogens have produced a large amount of data, which need to be brought together centrally in a coherent and systematic manner. This will facilitate the reuse of these data for further analysis. We have collected genomic data for 86 silkworm pathogens from 4 taxa (fungi, microsporidia, bacteria and viruses) and from 4 lepidopteran hosts, and developed the open-access Silkworm Pathogen Database (SilkPathDB) to make this information readily available. The implementation of SilkPathDB involves integrating Drupal and GBrowse as a graphic interface for a Chado relational database which houses all of the datasets involved. The genomes have been assembled and annotated for comparative purposes and allow the search and analysis of homologous sequences, transposable elements, protein subcellular locations, including secreted proteins, and gene ontology. We believe that the SilkPathDB will aid researchers in the identification of silkworm parasites, understanding the mechanisms of silkworm infections, and the developmental ecology of silkworm parasites (gene expression) and their hosts. http://silkpathdb.swu.edu.cn.",2017-01-01 +31052310,DynaStI: A Dynamic Retention Time Database for Steroidomics. ,": Steroidomics studies face the challenge of separating analytical compounds with very similar structures (i.e., isomers). Liquid chromatography (LC) is commonly used to this end, but the shared core structure of this family of compounds compromises effective separations among the numerous chemical analytes with comparable physico-chemical properties. Careful tuning of the mobile phase gradient and an appropriate choice of the stationary phase can be used to overcome this problem, in turn modifying the retention times in different ways for each compound. In the usual workflow, this approach is suboptimal for the annotation of features based on retention times since it requires characterizing a library of known compounds for every fine-tuned configuration. We introduce a software solution, DynaStI, that is capable of annotating liquid chromatography-mass spectrometry (LC-MS) features by dynamically generating the retention times from a database containing intrinsic properties of a library of metabolites. DynaStI uses the well-established linear solvent strength (LSS) model for reversed-phase LC. Given a list of LC-MS features and some characteristics of the LC setup, this software computes the corresponding retention times for the internal database and then annotates the features using the exact masses with predicted retention times at the working conditions. DynaStI (https://dynasti.vital-it.ch) is able to automatically calibrate its predictions to compensate for deviations in the input parameters. The database also includes identification and structural information for each annotation, such as IUPAC name, CAS number, SMILES string, metabolic pathways, and links to external metabolomic or lipidomic databases.",2019-04-30 +28592645,Inbred Strain Variant Database (ISVdb): A Repository for Probabilistically Informed Sequence Differences Among the Collaborative Cross Strains and Their Founders.,"The Collaborative Cross (CC) is a panel of recently established multiparental recombinant inbred mouse strains. For the CC, as for any multiparental population (MPP), effective experimental design and analysis benefit from detailed knowledge of the genetic differences between strains. Such differences can be directly determined by sequencing, but until now whole-genome sequencing was not publicly available for individual CC strains. An alternative and complementary approach is to infer genetic differences by combining two pieces of information: probabilistic estimates of the CC haplotype mosaic from a custom genotyping array, and probabilistic variant calls from sequencing of the CC founders. The computation for this inference, especially when performed genome-wide, can be intricate and time-consuming, requiring the researcher to generate nontrivial and potentially error-prone scripts. To provide standardized, easy-to-access CC sequence information, we have developed the Inbred Strain Variant Database (ISVdb). The ISVdb provides, for all the exonic variants from the Sanger Institute mouse sequencing dataset, direct sequence information for CC founders and, critically, the imputed sequence information for CC strains. Notably, the ISVdb also: (1) provides predicted variant consequence metadata; (2) allows rapid simulation of F1 populations; and (3) preserves imputation uncertainty, which will allow imputed data to be refined in the future as additional sequencing and genotyping data are collected. The ISVdb information is housed in an SQL database and is easily accessible through a custom online interface (http://isvdb.unc.edu), reducing the analytic burden on any researcher using the CC.",2017-06-07 +29452334,SEED 2: a user-friendly platform for amplicon high-throughput sequencing data analyses.,"Motivation:Modern molecular methods have increased our ability to describe microbial communities. Along with the advances brought by new sequencing technologies, we now require intensive computational resources to make sense of the large numbers of sequences continuously produced. The software developed by the scientific community to address this demand, although very useful, require experience of the command-line environment, extensive training and have steep learning curves, limiting their use. We created SEED 2, a graphical user interface for handling high-throughput amplicon-sequencing data under Windows operating systems. Results:SEED 2 is the only sequence visualizer that empowers users with tools to handle amplicon-sequencing data of microbial community markers. It is suitable for any marker genes sequences obtained through Illumina, IonTorrent or Sanger sequencing. SEED 2 allows the user to process raw sequencing data, identify specific taxa, produce of OTU-tables, create sequence alignments and construct phylogenetic trees. Standard dual core laptops with 8 GB of RAM can handle ca. 8 million of Illumina PE 300 bp sequences, ca. 4 GB of data. Availability and implementation:SEED 2 was implemented in Object Pascal and uses internal functions and external software for amplicon data processing. SEED 2 is a freeware software, available at http://www.biomed.cas.cz/mbu/lbwrf/seed/ as a self-contained file, including all the dependencies, and does not require installation. Supplementary data contain a comprehensive list of supported functions. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +30351027,Evidence for Cross Species Extrapolation of Mammalian-Based High-Throughput Screening Assay Results.,"High-throughput screening (HTS) and computational technologies have emerged as important tools for chemical hazard identification. The US Environmental Protection Agency (EPA) launched the Toxicity ForeCaster (ToxCast) Program, which has screened thousands of chemicals in hundreds of mammalian-based HTS assays for biological activity. The data are being used to prioritize toxicity testing on those chemicals likely to lead to adverse effects. To use HTS assays in predicting hazard to both humans and wildlife, it is necessary to understand how broadly these data may be extrapolated across species. The US EPA Sequence Alignment to Predict Across Species Susceptibility (SeqAPASS; https://seqapass.epa.gov/seqapass/ ) tool was used to assess conservation of the 484 protein targets represented in the suite of ToxCast assays and other HTS assays. To demonstrate the utility of the SeqAPASS data for guiding extrapolation, case studies were developed which focused on targets of interest to the US Endocrine Disruptor Screening Program and the Organisation for Economic Cooperation and Development. These case studies provide a line of evidence for conservation of endocrine targets across vertebrate species, with few exceptions, and demonstrate the utility of SeqAPASS for defining the taxonomic domain of applicability for HTS results and identifying organisms for suitable follow-up toxicity tests.",2018-11-13 +27706282,Community Health Teams: A Healthcare Provider's System Transformation Opportunity.,"""The goal of community health teams is to develop and implement care models that integrate clinical and community health promotion and preventive services for patients."" -Association of State and Territorial Health Officials (ASTHO)1 Eleven community health teams (CHTs) operate in various geographies within Rhode Island. Physicians and payers refer their highest-risk patients to CHTs that serve as community extenders. Community health workers and others work to link referred individuals to primary care and work to address the other determinants affecting their health, such as safe housing. Since much of health is driven by factors outside of the healthcare setting, CHTs compliment the work of physicians within the office environment. Transforming practices and addressing both the physical and behavioral needs of patients simultaneously is key to CHT success. This article attempts to quantify the expanding need for CHTs within Rhode Island and describes ways in which CHTs as a practice transformation resource may be leveraged by providers. [Full article available at http://rimed.org/rimedicaljournal-2016-10.asp].",2016-10-04 +25431229,An efficient algorithm to explore liquid association on a genome-wide scale.,"

Background

The growing wealth of public available gene expression data has made the systemic studies of how genes interact in a cell become more feasible. Liquid association (LA) describes the extent to which coexpression of two genes may vary based on the expression level of a third gene (the controller gene). However, genome-wide application has been difficult and resource-intensive. We propose a new screening algorithm for more efficient processing of LA estimation on a genome-wide scale and apply its use to a Saccharomyces cerevisiae data set.

Results

On a test subset of the data, the fast screening algorithm achieved >99.8% agreement with the exhaustive search of LA values, while reduced run time by 81-93 %. Using a well-known yeast cell-cycle data set with 6,178 genes, we identified triplet combinations with significantly large LA values. In an exploratory gene set enrichment analysis, the top terms for the controller genes in these triplets with large LA values are involved in some of the most fundamental processes in yeast such as energy regulation, transportation, and sporulation.

Conclusion

In summary, in this paper we propose a novel, efficient algorithm to explore LA on a genome-wide scale and identified triplets of interest in cell cycle pathways using the proposed method in a yeast data set. A software package named fastLiquidAssociation for implementing the algorithm is available through http://www.bioconductor.org .",2014-11-28 +23613707,Large-scale event extraction from literature with multi-level gene normalization.,"Text mining for the life sciences aims to aid database curation, knowledge summarization and information retrieval through the automated processing of biomedical texts. To provide comprehensive coverage and enable full integration with existing biomolecular database records, it is crucial that text mining tools scale up to millions of articles and that their analyses can be unambiguously linked to information recorded in resources such as UniProt, KEGG, BioGRID and NCBI databases. In this study, we investigate how fully automated text mining of complex biomolecular events can be augmented with a normalization strategy that identifies biological concepts in text, mapping them to identifiers at varying levels of granularity, ranging from canonicalized symbols to unique gene and proteins and broad gene families. To this end, we have combined two state-of-the-art text mining components, previously evaluated on two community-wide challenges, and have extended and improved upon these methods by exploiting their complementary nature. Using these systems, we perform normalization and event extraction to create a large-scale resource that is publicly available, unique in semantic scope, and covers all 21.9 million PubMed abstracts and 460 thousand PubMed Central open access full-text articles. This dataset contains 40 million biomolecular events involving 76 million gene/protein mentions, linked to 122 thousand distinct genes from 5032 species across the full taxonomic tree. Detailed evaluations and analyses reveal promising results for application of this data in database and pathway curation efforts. The main software components used in this study are released under an open-source license. Further, the resulting dataset is freely accessible through a novel API, providing programmatic and customized access (http://www.evexdb.org/api/v001/). Finally, to allow for large-scale bioinformatic analyses, the entire resource is available for bulk download from http://evexdb.org/download/, under the Creative Commons - Attribution - Share Alike (CC BY-SA) license.",2013-04-17 +32073305,"Gestational Exposures to Phthalates and Folic Acid, and Autistic Traits in Canadian Children.","

Background

The etiology of autism spectrum disorder is poorly understood. Few studies have investigated the link between endocrine-disrupting chemicals and autistic traits. We examined the relationship between gestational phthalates and autistic traits in 3- to 4-y-old Canadian children. We also investigated potential effect modification by sex and folic acid supplementation.

Methods

We enrolled 2,001 women>18 years of age during the first trimester of pregnancy between 2008 and 2011 from 10 cities in Canada. At 3-4 years of age, 610 children underwent neuropsychological assessments including the Social Responsiveness Scale-II (SRS-2) as a measure of autistic traits and social impairment. We measured 11 phthalate metabolites in maternal first trimester urine samples and assessed folic acid supplementation from reported intakes. We estimated covariate-adjusted differences in SRS-2 T-scores with a doubling in phthalate concentrations in 510 children with complete data.

Results

Mean total SRS T-score was 45.3 (SD=6.1). Children with higher gestational exposure to mono-n-butyl (MBP) and mono-3-carboxypropyl (MCPP) concentrations exhibited significantly higher total SRS T-scores, indicating greater overall social impairment, as well as higher scores on subdomains, indicating deficits in social cognition, social communication, social motivation, and restricted interests/repetitive behaviors. A doubling in MBP or MCPP concentrations was associated with 0.6 (95% CI: 0.1, 1.0) and 0.5 (95% CI: 0.1, 0.8) higher total SRS T-scores. Associations were consistently and significantly stronger in boys (βMBP=1.0; 95% CI: 0.4, 1.6; n=252) compared with girls (βMBP=0.1; 95% CI: -0.6, 0.7; n=258) and among children who had lower prenatal folic acid supplementation (<400μg/d) (βMBP=1.3; 95% CI: 0.4, 2.3; n=59) compared with those who had adequate folic acid supplementation (≥400μg/d) (βMBP=0.4; 95% CI: -0.1, 0.8; n=451).

Conclusions

Higher gestational concentrations of some phthalate metabolites were associated with higher scores of autistic traits as measured by the SRS-2 in boys, but not girls; these small size effects were mitigated by first trimester-of-pregnancy folic acid supplementation. https://doi.org/10.1289/EHP5621.",2020-02-19 +30747916,MicroRNA expression profiles across blood and different tissues in cattle.,"MicroRNAs (miRNAs) play essential roles in regulating gene expression involved in various biological functions. The knowledge of miRNA expression profiles across different tissues in cattle is still limited. Using the miRNAs data generated from 158 samples in three studies, we characterized the miRNA expression profiles of bovine sera, exosomes and 11 different tissues. Totally 639 miRNAs were identified and 159 miRNAs were expressed in all samples. After relative log expression normalization, four miRNA expression clusters were generated: 1) sera and exosomes; 2) liver; 3) mammary gland; 4) rumen and gut tissues. The top 10 most abundant miRNAs accounted for >55% of total miRNA expression in each tissue. In addition, this study described a detailed pipeline for identification of both tissue and circulating miRNAs, and the shareable datasets can be re-used by researchers to investigate miRNA-related biological questions in cattle. In addition, a web-based repository was developed, which enables researchers to access the distribution range and raw counts number of the miRNA expression data (https://www.cattleomics.com/micrornaome).",2019-02-12 +30329014,Landscape of multi-tissue global gene expression reveals the regulatory signatures of feed efficiency in beef cattle.,"

Motivation

Feed efficiency is an important trait for sustainable beef production that is regulated by the complex biological process, but the mode of action behinds it has not been clearly defined. Here, we aimed to elucidate the regulatory mechanisms of this trait through studying the landscape of the genome-wide gene expression of rumen, liver, muscle and backfat tissues, the key ones involved in the energy metabolism.

Results

The transcriptome of 189 samples across four tissues from 48 beef steers with varied feed efficiency were generated using Illumina HiSeq4000. The analysis of global gene expression profiles of four tissues, functional analysis of tissue-shared and -unique genes, co-expressed network construction of tissue-shared genes, weighted correlations analysis between gene modules and feed efficiency-related traits in each tissue were performed. Among four tissues, the transcriptome of muscle tissue was distinctive from others, while those of rumen and backfat tissues were similar. The associations between co-expressed genes and feed efficiency related traits at single or all tissues level exhibited that the gene expression in the rumen, liver, muscle and backfat were the most correlated with feed conversion ratio, dry matter intake, average daily gain and residual feed intake, respectively. The 19 overlapped genes identified from the strongest module-trait relationships in four tissues are potential generic gene markers for feed efficiency.

Availability and implementation

The distribution of gene expression data can be accessed at https://www.cattleomics.com/transcriptome.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +22110037,Saccharomyces Genome Database: the genomics resource of budding yeast.,"The Saccharomyces Genome Database (SGD, http://www.yeastgenome.org) is the community resource for the budding yeast Saccharomyces cerevisiae. The SGD project provides the highest-quality manually curated information from peer-reviewed literature. The experimental results reported in the literature are extracted and integrated within a well-developed database. These data are combined with quality high-throughput results and provided through Locus Summary pages, a powerful query engine and rich genome browser. The acquisition, integration and retrieval of these data allow SGD to facilitate experimental design and analysis by providing an encyclopedia of the yeast genome, its chromosomal features, their functions and interactions. Public access to these data is provided to researchers and educators via web pages designed for optimal ease of use.",2011-11-21 +29408997,AbDesigner3D: a structure-guided tool for peptide-based antibody production.,"Summary:We present AbDesigner3D, a new tool for identification of optimal immunizing peptides for antibody production using a peptide-based strategy. AbDesigner3D integrates 3D structural data from the Protein Data Bank (PDB) with UniProt data, which includes basic sequence data, post-translational modification sites, SNP occurrences and more. Other features, such as uniqueness and conservation scores, are calculated based on sequences from UniProt. The 3D visualization capabilities allow an intuitive interface, while an abundance of quantitative output simplifies the process of comparing immunogen peptides. Important quantitative features added in this tool include calculation and display of accessible surface area (ASA) and protein-protein interacting residues (PPIR). The specialized data visualization features of AbDesigner3D will greatly assist users to optimize their choice of immunizing peptides. Availability and implementation:AbDesigner3D is freely available at http://sysbio.chula.ac.th/AbDesigner3D or https://hpcwebapps.cit.nih.gov/AbDesigner3D/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +30393569,International spinal cord injury urodynamic basic data set (version 2.0).,"

Study design

Revision, review, and presentation of the International Spinal Cord Injury (SCI) Urodynamic Basic Data Set (version 2.0).

Objectives

Describe the revision and review and present the dataset.

Setting

International.

Methods

The first version of the dataset was revised according to new knowledge and suggestions. The review included International SCI Data Sets Committee, American Spinal Injury Association (ASIA) board, International Spinal Cord Society (ISCoS) executive and scientific committees, major organizations, and interested individuals. The dataset was also on ASIA and ISCoS websites. All replies were answered and appropriate adjustments made. Finally, the dataset was endorsed by ASIA board, and ISCoS executive and scientific committees.

Results

Among revisions are adoptions of new terminology by the International Continence Society. The variable ""Detrusor function"" has been divided into ""Detrusor function during filling cystometry"" and ""Detrusor function during voiding"". The response categories have been adjusted, deleting 'Not applicable' for the variables ""Detrusor leak point pressure during filling cystometry"", ""Cystometric bladder capacity during filling cystometry"" and ""Urethral function during voiding"". The cutoff-value for low bladder compliance has been modified from 10 mL/cm H2O to 20 mL/cm H2O.

Conclusions

The International SCI Urodynamic Basic Data Set (version 2.0) with its complete syllabus is available from http://www.iscos.org.uk/international-sci-data-sets.",2018-11-01 +31347916,CORP: Practical tools for improving experimental design and reporting of laboratory studies of cardiovascular physiology and metabolism.,"The exercise consisted of: 1) a short survey to acquire baseline data on current practices regarding the conduct of animal studies, 2) a series of presentations for promoting awareness and providing advice and practical tools for improving experimental design, and 3) a follow-up survey 12 mo later to assess whether practices had changed. The surveys were compulsory for responsible investigators (n = 16; paired data presented). Other investigators named on animal ethics applications were encouraged to participate (2017, total of 36 investigators; 2018, 37 investigators). The major findings to come from the exercise included 1) a willingness of investigators to make changes when provided with knowledge/tools and solutions that were relatively simple to implement (e.g., proportion of responsible investigators showing improved practices using a structured method for randomization was 0.44, 95% CI (0.19; 0.70), P = 0.003, and deidentifying drugs/interventions was 0.40, 95% CI (0.12; 0.68), P = 0.010); 2) resistance to change if this involved more personnel and time (e.g., as required for allocation concealment); and 3) evidence that changes to long-term practices (""habits"") require time and follow-up. Improved practices could be verified based on changes in reporting within publications or documented evidence provided during laboratory visits. In summary, this exercise resulted in changed attitudes, practices, and reporting, but continued follow-up, monitoring, and incentives are required. Efforts to improve experimental rigor will reduce bias and will lead to findings with the greatest translational potential.NEW & NOTEWORTHY The goal of this exercise was to encourage preclinical researchers to improve the quality of their cardiac and metabolic animal studies by 1) increasing awareness of concerns, which can arise from suboptimal experimental designs; 2) providing knowledge, tools, and templates to overcome bias; and 3) conducting two short surveys over 12 mo to monitor change. Improved practices were identified for the uptake of structured methods for randomization, and de-identifying interventions/drugs.Listen to this article's corresponding podcast at https://ajpheart.podbean.com/e/experimental-design-survey-training-practical-tools/.",2019-07-26 +30590382,Ancestral sequence reconstruction: accounting for structural information by averaging over replacement matrices.,"

Motivation

Ancestral sequence reconstruction (ASR) is widely used to understand protein evolution, structure and function. Current ASR methodologies do not fully consider differences in evolutionary constraints among positions imposed by the three-dimensional (3D) structure of the protein. Here, we developed an ASR algorithm that allows different protein sites to evolve according to different mixtures of replacement matrices. We show that assigning replacement matrices to protein positions based on their solvent accessibility leads to ASR with higher log-likelihoods compared to naïve models that assume a single replacement matrix for all sites. Improved ASR log-likelihoods are also demonstrated when solvent accessibility is predicted from protein sequences rather than inferred from a known 3D structure. Finally, we show that using such structure-aware mixture models results in substantial differences in the inferred ancestral sequences.

Availability and implementation

http://fastml.tau.ac.il.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +31274204,Senescence in nature: New insights from a long-term seabird study.,"In Focus: Tompkins, E. M., & Anderson, D. J. (2019). Sex-specific patterns of senescencein Nazca boobies linked to mating system. Journal of Animal Ecology, 88, 986-1000. https://doi.org/10.1111/1365-2656.12944. Sex-specific differences in senescence and environmental impacts on senescence in both sexes remain poorly understood. Tompkins and Anderson (2019) studied senescence in survival (hereafter called actuarial senescence) and in reproduction (hereafter called reproductive senescence) in Nazca boobies using 33 years of individual-based capture-recapture data. Senescence patterns (life-history traits, ages at onset, senescence rates) differed between sexes and were affected by environmental conditions (food availability) faced by individuals during their younger ages. Patterns of sex differences in senescence may result from the mating dynamics due to the population's male-biased sex ratio.",2019-07-01 +27616708,ImmuneDB: a system for the analysis and exploration of high-throughput adaptive immune receptor sequencing data.,"As high-throughput sequencing of B cells becomes more common, the need for tools to analyze the large quantity of data also increases. This article introduces ImmuneDB, a system for analyzing vast amounts of heavy chain variable region sequences and exploring the resulting data. It can take as input raw FASTA/FASTQ data, identify genes, determine clones, construct lineages, as well as provide information such as selection pressure and mutation analysis. It uses an industry leading database, MySQL, to provide fast analysis and avoid the complexities of using error prone flat-files.

Availability and implementation

ImmuneDB is freely available at http://immunedb.comA demo of the ImmuneDB web interface is available at: http://immunedb.com/demo CONTACT: Uh25@drexel.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-11 +23340253,Research resource: the Endometrium Database Resource (EDR).,"In order to understand the biology of the endometrium and potentially develop new diagnostic tools and treatments for endometrial diseases, the highly orchestrated gene expression/regulation that occurs within the uterus must first be understood. Even though a wealth of information on endometrial gene expression/regulation is available, this information is scattered across several different resources in formats that can be difficult for the average bench scientist to query, integrate, and utilize. The Endometrium Database Resource (EDR) was created as a single evolving resource for protein- and micro-RNA-encoding genes that have been shown by gene expression microarray, Northern blot, or other experiments in the literature to have their expression regulated in the uterus of humans, mice, rats, cows, domestic pigs, guinea pigs, and sheep. Genes are annotated in EDR with basic gene information (eg, gene symbol and chromosome), gene orthologs, and gene ontologies. Links are also provided to external resources for publication/s, nucleic and amino acid sequence, gene product function, and Gene Expression Omnibus (GEO) phase expression graph information. The resource also allows for direct comparison of relative gene expression in different microarray experiments for genes shown in the literature to be differentially expressed in the uterus. It is available via a user-friendly, web-based interface and is available without charge or restriction to the entire scientific community. The EDR can be accessed at http://edr.research.bcm.edu.",2013-01-22 +24234447,UniCarbKB: building a knowledge platform for glycoproteomics.,"The UniCarb KnowledgeBase (UniCarbKB; http://unicarbkb.org) offers public access to a growing, curated database of information on the glycan structures of glycoproteins. UniCarbKB is an international effort that aims to further our understanding of structures, pathways and networks involved in glycosylation and glyco-mediated processes by integrating structural, experimental and functional glycoscience information. This initiative builds upon the success of the glycan structure database GlycoSuiteDB, together with the informatic standards introduced by EUROCarbDB, to provide a high-quality and updated resource to support glycomics and glycoproteomics research. UniCarbKB provides comprehensive information concerning glycan structures, and published glycoprotein information including global and site-specific attachment information. For the first release over 890 references, 3740 glycan structure entries and 400 glycoproteins have been curated. Further, 598 protein glycosylation sites have been annotated with experimentally confirmed glycan structures from the literature. Among these are 35 glycoproteins, 502 structures and 60 publications previously not included in GlycoSuiteDB. This article provides an update on the transformation of GlycoSuiteDB (featured in previous NAR Database issues and hosted by ExPASy since 2009) to UniCarbKB and its integration with UniProtKB and GlycoMod. Here, we introduce a refactored database, supported by substantial new curated data collections and intuitive user-interfaces that improve database searching.",2013-11-13 +29149241,Statistical Learning in Specific Language Impairment: A Meta-Analysis.,"

Purpose

The current meta-analysis provides a quantitative overview of published and unpublished studies on statistical learning in the auditory verbal domain in people with and without specific language impairment (SLI). The database used for the meta-analysis is accessible online and open to updates (Community-Augmented Meta-Analysis), which facilitates the accumulation and evaluation of previous and future studies on statistical learning in this domain.

Method

A systematic literature search identified 10 unique experiments examining auditory verbal statistical learning in 213 participants with SLI and 363 without SLI, aged between 6 and 19 years. Data from qualifying studies were extracted and converted to Hedges' g effect sizes.

Results

The overall standardized mean difference between participants with SLI and participants without SLI was 0.54, which was significantly different from 0 (p < .001, 95% confidence interval [0.36, 0.71]).

Conclusion

Together, the results of our meta-analysis indicate a robust difference between people with SLI and people without SLI in their detection of statistical regularities in the auditory input. The detection of statistical regularities is, on average, not as effective in people with SLI compared with people without SLI. The results of this meta-analysis are congruent with a statistical learning deficit hypothesis in SLI.

Supplemental material

https://doi.org/10.23641/asha.5558074.",2017-12-01 +29036271,Cancer Hallmarks Analytics Tool (CHAT): a text mining approach to organize and evaluate scientific literature on cancer.,"

Motivation

To understand the molecular mechanisms involved in cancer development, significant efforts are being invested in cancer research. This has resulted in millions of scientific articles. An efficient and thorough review of the existing literature is crucially important to drive new research. This time-demanding task can be supported by emerging computational approaches based on text mining which offer a great opportunity to organize and retrieve the desired information efficiently from sizable databases. One way to organize existing knowledge on cancer is to utilize the widely accepted framework of the Hallmarks of Cancer. These hallmarks refer to the alterations in cell behaviour that characterize the cancer cell.

Results

We created an extensive Hallmarks of Cancer taxonomy and developed automatic text mining methodology and a tool (CHAT) capable of retrieving and organizing millions of cancer-related references from PubMed into the taxonomy. The efficiency and accuracy of the tool was evaluated intrinsically as well as extrinsically by case studies. The correlations identified by the tool show that it offers a great potential to organize and correctly classify cancer-related literature. Furthermore, the tool can be useful, for example, in identifying hallmarks associated with extrinsic factors, biomarkers and therapeutics targets.

Availability and implementation

CHAT can be accessed at: http://chat.lionproject.net. The corpus of hallmark-annotated PubMed abstracts and the software are available at: http://chat.lionproject.net/about.

Contact

simon.baker@cl.cam.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +23203877,NPACT: Naturally Occurring Plant-based Anti-cancer Compound-Activity-Target database.,"Plant-derived molecules have been highly valued by biomedical researchers and pharmaceutical companies for developing drugs, as they are thought to be optimized during evolution. Therefore, we have collected and compiled a central resource Naturally Occurring Plant-based Anti-cancer Compound-Activity-Target database (NPACT, http://crdd.osdd.net/raghava/npact/) that gathers the information related to experimentally validated plant-derived natural compounds exhibiting anti-cancerous activity (in vitro and in vivo), to complement the other databases. It currently contains 1574 compound entries, and each record provides information on their structure, manually curated published data on in vitro and in vivo experiments along with reference for users referral, inhibitory values (IC(50)/ED(50)/EC(50)/GI(50)), properties (physical, elemental and topological), cancer types, cell lines, protein targets, commercial suppliers and drug likeness of compounds. NPACT can easily be browsed or queried using various options, and an online similarity tool has also been made available. Further, to facilitate retrieval of existing data, each record is hyperlinked to similar databases like SuperNatural, Herbal Ingredients' Targets, Comparative Toxicogenomics Database, PubChem and NCI-60 GI(50) data.",2012-11-29 +23104377,ClusterMine360: a database of microbial PKS/NRPS biosynthesis.,"ClusterMine360 (http://www.clustermine360.ca/) is a database of microbial polyketide and non-ribosomal peptide gene clusters. It takes advantage of crowd-sourcing by allowing members of the community to make contributions while automation is used to help achieve high data consistency and quality. The database currently has >200 gene clusters from >185 compound families. It also features a unique sequence repository containing >10 000 polyketide synthase/non-ribosomal peptide synthetase domains. The sequences are filterable and downloadable as individual or multiple sequence FASTA files. We are confident that this database will be a useful resource for members of the polyketide synthases/non-ribosomal peptide synthetases research community, enabling them to keep up with the growing number of sequenced gene clusters and rapidly mine these clusters for functional information.",2012-10-26 +28155662,An ensemble micro neural network approach for elucidating interactions between zinc finger proteins and their target DNA.,"

Background

The ability to engineer zinc finger proteins binding to a DNA sequence of choice is essential for targeted genome editing to be possible. Experimental techniques and molecular docking have been successful in predicting protein-DNA interactions, however, they are highly time and resource intensive. Here, we present a novel algorithm designed for high throughput prediction of optimal zinc finger protein for 9 bp DNA sequences of choice. In accordance with the principles of information theory, a subset identified by using K-means clustering was used as a representative for the space of all possible 9 bp DNA sequences. The modeling and simulation results assuming synergistic mode of binding obtained from this subset were used to train an ensemble micro neural network. Synergistic mode of binding is the closest to the DNA-protein binding seen in nature, and gives much higher quality predictions, while the time and resources increase exponentially in the trade off. Our algorithm is inspired from an ensemble machine learning approach, and incorporates the predictions made by 100 parallel neural networks, each with a different hidden layer architecture designed to pick up different features from the training dataset to predict optimal zinc finger proteins for any 9 bp target DNA.

Results

The model gave an accuracy of an average 83% sequence identity for the testing dataset. The BLAST e-value are well within the statistical confidence interval of E-05 for 100% of the testing samples. The geometric mean and median value for the BLAST e-values were found to be 1.70E-12 and 7.00E-12 respectively. For final validation of approach, we compared our predictions against optimal ZFPs reported in literature for a set of experimentally studied DNA sequences. The accuracy, as measured by the average string identity between our predictions and the optimal zinc finger protein reported in literature for a 9 bp DNA target was found to be as high as 81% for DNA targets with a consensus sequence GCNGNNGCN reported in literature. Moreover, the average string identity of our predictions for a catalogue of over 100 9 bp DNA for which the optimal zinc finger protein has been reported in literature was found to be 71%.

Conclusions

Validation with experimental data shows that our tool is capable of domain adaptation and thus scales well to datasets other than the training set with high accuracy. As synergistic binding comes the closest to the ideal mode of binding, our algorithm predicts biologically relevant results in sync with the experimental data present in the literature. While there have been disjointed attempts to approach this problem synergistically reported in literature, there is no work covering the whole sample space. Our algorithm allows designing zinc finger proteins for DNA targets of the user's choice, opening up new frontiers in the field of targeted genome editing. This algorithm is also available as an easy to use web server, ZifNN, at http://web.iitd.ac.in/~sundar/ZifNN/ .",2016-12-22 +29266816,Undergraduate Health Students' Intention to Use Evidence-Based Practice After Graduation: A Systematic Review of Predictive Modeling Studies.,"

Background

Incorporating evidence-based practice (EBP) into clinical decision making and professional practice is a requirement for many health disciplines, yet research across health disciplines on factors that influence and predict student intention to use EBP following graduation has not been previously synthesized.

Aim

To synthesize research on factors that influence development of EBP behaviors and subsequently predict undergraduate students' intention toward EBP uptake.

Methods

A systematic review of prediction modeling studies was conducted according to a protocol previously published on the Prospero database: https://www.crd.york.ac.uk/PROSPERO/. The outcome variable was undergraduate students' future use or intention to use EBP. Evidence synthesis methods were guided by resources from the Cochrane Methods Prognosis Group Web site (https://prognosismethods.cochrane.org).

Results and findings

Only three studies were found to meet inclusion criteria for the review. Factors relating to EBP capability, EBP attitudes, as well as clinical and academic support were identified as influential toward students' intention to use evidence in practice. Heterogeneity limited data pooling, consequently, results are presented in narrative and tabular form.

Linking evidence to action

Although using a developing method, this review presents a unique contribution to further discussions regarding students' intention to use EBP following graduation. Despite limitations, consideration of identified factors for undergraduate curriculum could support student's intention to use EBP in their respective clinical environments.",2017-12-21 +29473009,COREMIC: a web-tool to search for a niche associated CORE MICrobiome.,"Microbial diversity on earth is extraordinary, and soils alone harbor thousands of species per gram of soil. Understanding how this diversity is sorted and selected into habitat niches is a major focus of ecology and biotechnology, but remains only vaguely understood. A systems-biology approach was used to mine information from databases to show how it can be used to answer questions related to the core microbiome of habitat-microbe relationships. By making use of the burgeoning growth of information from databases, our tool ""COREMIC"" meets a great need in the search for understanding niche partitioning and habitat-function relationships. The work is unique, furthermore, because it provides a user-friendly statistically robust web-tool (http://coremic2.appspot.com or http://core-mic.com), developed using Google App Engine, to help in the process of database mining to identify the ""core microbiome"" associated with a given habitat. A case study is presented using data from 31 switchgrass rhizosphere community habitats across a diverse set of soil and sampling environments. The methodology utilizes an outgroup of 28 non-switchgrass (other grasses and forbs) to identify a core switchgrass microbiome. Even across a diverse set of soils (five environments), and conservative statistical criteria (presence in more than 90% samples and FDR q-val <0.05% for Fisher's exact test) a core set of bacteria associated with switchgrass was observed. These included, among others, closely related taxa from Lysobacter spp., Mesorhizobium spp, and Chitinophagaceae. These bacteria have been shown to have functions related to the production of bacterial and fungal antibiotics and plant growth promotion. COREMIC can be used as a hypothesis generating or confirmatory tool that shows great potential for identifying taxa that may be important to the functioning of a habitat (e.g. host plant). The case study, in conclusion, shows that COREMIC can identify key habitat-specific microbes across diverse samples, using currently available databases and a unique freely available software.",2018-02-15 +22260095,IntegromeDB: an integrated system and biological search engine.,"

Background

With the growth of biological data in volume and heterogeneity, web search engines become key tools for researchers. However, general-purpose search engines are not specialized for the search of biological data.

Description

Here, we present an approach at developing a biological web search engine based on the Semantic Web technologies and demonstrate its implementation for retrieving gene- and protein-centered knowledge. The engine is available at http://www.integromedb.org.

Conclusions

The IntegromeDB search engine allows scanning data on gene regulation, gene expression, protein-protein interactions, pathways, metagenomics, mutations, diseases, and other gene- and protein-related data that are automatically retrieved from publicly available databases and web pages using biological ontologies. To perfect the resource design and usability, we welcome and encourage community feedback.",2012-01-19 +25466819,White adipose tissue reference network: a knowledge resource for exploring health-relevant relations.,"Optimal health is maintained by interaction of multiple intrinsic and environmental factors at different levels of complexity-from molecular, to physiological, to social. Understanding and quantification of these interactions will aid design of successful health interventions. We introduce the reference network concept as a platform for multi-level exploration of biological relations relevant for metabolic health, by integration and mining of biological interactions derived from public resources and context-specific experimental data. A White Adipose Tissue Health Reference Network (WATRefNet) was constructed as a resource for discovery and prioritization of mechanism-based biomarkers for white adipose tissue (WAT) health status and the effect of food and drug compounds on WAT health status. The WATRefNet (6,797 nodes and 32,171 edges) is based on (1) experimental data obtained from 10 studies addressing different adiposity states, (2) seven public knowledge bases of molecular interactions, (3) expert's definitions of five physiologically relevant processes key to WAT health, namely WAT expandability, Oxidative capacity, Metabolic state, Oxidative stress and Tissue inflammation, and (4) a collection of relevant biomarkers of these processes identified by BIOCLAIMS ( http://bioclaims.uib.es ). The WATRefNet comprehends multiple layers of biological complexity as it contains various types of nodes and edges that represent different biological levels and interactions. We have validated the reference network by showing overrepresentation with anti-obesity drug targets, pathology-associated genes and differentially expressed genes from an external disease model dataset. The resulting network has been used to extract subnetworks specific to the above-mentioned expert-defined physiological processes. Each of these process-specific signatures represents a mechanistically supported composite biomarker for assessing and quantifying the effect of interventions on a physiological aspect that determines WAT health status. Following this principle, five anti-diabetic drug interventions and one diet intervention were scored for the match of their expression signature to the five biomarker signatures derived from the WATRefNet. This confirmed previous observations of successful intervention by dietary lifestyle and revealed WAT-specific effects of drug interventions. The WATRefNet represents a sustainable knowledge resource for extraction of relevant relationships such as mechanisms of action, nutrient intervention targets and biomarkers and for assessment of health effects for support of health claims made on food products.",2014-12-03 +28086069,Explanation of the Nagoya Protocol on Access and Benefit Sharing and its implication for microbiology.,"Working with genetic resources and associated data requires greater attention since the Nagoya Protocol on Access and Benefit Sharing (ABS) came into force in October 2014. Biologists must ensure that they have legal clarity in how they can and cannot use the genetic resources on which they carry out research. Not only must they work within the spirit in the Convention on Biological Diversity (https://www.cbd.int/convention/articles/default.shtml?a=cbd-02) but also they may have regulatory requirements to meet. Although the Nagoya Protocol was negotiated and agreed globally, it is the responsibility of each country that ratifies it to introduce their individual implementing procedures and practices. Many countries in Europe, such as the UK, have chosen not to put access controls in place at this time, but others already have laws enacted providing ABS measures under the Convention on Biological Diversity or specifically to implement the Nagoya Protocol. Access legislation is in place in many countries and information on this can be found at the ABS Clearing House (https://absch.cbd.int/). For example, Brazil, although not a Party to the Nagoya Protocol at the time of writing, has Law 13.123 which entered into force on 17 November 2015, regulated by Decree 8.772 which was published on 11 May 2016. In this case, export of Brazilian genetic resources is not allowed unless the collector is registered in the National System for Genetic Heritage and Associated Traditional Knowledge Management (SisGen). The process entails that a foreign scientist must first of all be registered working with someone in Brazil and have authorization to collect. The enactment of European Union Regulation po. 511/2014 implements Nagoya Protocol elements that govern compliance measures for users and offers the opportunity to demonstrate due diligence in sourcing their organisms by selecting from holdings of 'registered collections'. The UK has introduced a Statutory Instrument that puts in place enforcement measures within the UK to implement this European Union Regulation; this is regulated by Regulatory Delivery, Department for Business, Energy and Industrial Strategies. Scientific communities, including the private sector, individual institutions and organizations, have begun to design policy and best practices for compliance. Microbiologists and culture collections alike need to be aware of the legislation of the source country of the materials they use and put in place best practices for compliance; such best practice has been drafted by the Microbial Resource Research Infrastructure, and other research communities such as the Consortium of European Taxonomic Facilities, the Global Genome Biodiversity Network and the International Organisation for Biological Control have published best practice and/or codes of conduct to ensure legitimate exchange and use of genetic resources.",2017-03-29 +31455825,A predictive model for extubation readiness in extremely preterm infants.,"

Objective

To develop an estimator for predicting successful extubation for an individual preterm infant.

Study design

This was a retrospective study that included infants with birth weights ≤1250 g, who were admitted to a tertiary NICU over a 7-year period, received mechanical ventilation and had an elective extubation attempt within 60 days of age. Perinatal and periextubation characteristics were compared in the successful and failed extubation groups.

Results

Of 621 screened infants, 312 were included. Extubation succeeded in 73% and failed in 27%. Adjusted factors associated with successful extubation included greater gestational age, chronologic age, pre-extubation pH and lower pre-extubation FiO2, along with lower ""peak"" respiratory severity score in the first 6 h of age.

Conclusions

We used readily available demographic and clinical data to create an extubation readiness estimator that provides the probability of extubation success for an individual preterm infant (http://elasticbeanstalk-us-east-2-676799334712.s3-website.us-east-2.amazonaws.com/).",2019-08-27 +29312626,Optimizing prognosis-related key miRNA-target interactions responsible for cancer metastasis.,"Increasing evidence suggests that the abnormality of microRNAs (miRNAs) and their downstream targets is frequently implicated in the pathogenesis of human cancers, however, the clinical benefit of causal miRNA-target interactions has been seldom studied. Here, we proposed a computational method to optimize prognosis-related key miRNA-target interactions by combining transcriptome and clinical data from thousands of TCGA tumors across 16 cancer types. We obtained a total of 1,956 prognosis-related key miRNA-target interactions between 112 miRNAs and 1,443 their targets. Interestingly, these key target genes are specifically involved in tumor progression-related functions, such as 'cell adhesion' and 'cell migration'. Furthermore, they are most significantly correlated with 'tissue invasion and metastasis', a hallmark of metastasis, in ten distinct types of cancer through the hallmark analysis. These results implicated that the prognosis-related key miRNA-target interactions were highly associated with cancer metastasis. Finally, we observed that the combination of these key miRNA-target interactions allowed to distinguish patients with good prognosis from those with poor prognosis both in most TCGA cancer types and independent validation sets, highlighting their roles in cancer metastasis. We provided a user-friendly database named miRNATarget (freely available at http://biocc.hrbmu.edu.cn/miRNATar/), which provides an overview of the prognosis-related key miRNA-target interactions across 16 cancer types.",2017-11-27 +29143332,Accurate prediction and elucidation of drug resistance based on the robust and reproducible chemoresponse communities.,"Selecting the available treatment for each cancer patient from genomic context is a core goal of precision medicine, but innovative approaches with mechanism interpretation and improved performance are still highly needed. Through utilizing in vitro chemotherapy response data coupled with gene and miRNA expression profiles, we applied a network-based approach that identified markers not as individual molecules but as functional groups extracted from the integrated transcription factor and miRNA regulatory network. Based on the identified chemoresponse communities, the predictors of drug resistance achieved high accuracy in cross-validation and were more robust and reproducible than conventional single-molecule markers. Meanwhile, as candidate communities not only enriched abundant cellular process but also covered a variety of drug enzymes, transporters, and targets, these resulting chemoresponse communities furnished novel models to interpret multiple kinds of potential regulatory mechanism, such as dysregulation of cancer cell apoptosis or disturbance of drug metabolism. Moreover, compounds were linked based on the enrichment of their common chemoresponse communities to uncover undetected response patterns and possible multidrug resistance phenotype. Finally, an omnibus repository named ChemoCommunity (http://www.jianglab.cn/ChemoCommunity/) was constructed, which furnished a user-friendly interface for a convenient retrieval of the detailed information on chemoresponse communities. Taken together, our method, and the accompanying database, improved the performance of classifiers for drug resistance and provided novel model to uncover the possible regulatory mechanism of individual response to drug.",2017-11-27 +31126321,Molecular and pharmacological modulators of the tumor immune contexture revealed by deconvolution of RNA-seq data.,"We introduce quanTIseq, a method to quantify the fractions of ten immune cell types from bulk RNA-sequencing data. quanTIseq was extensively validated in blood and tumor samples using simulated, flow cytometry, and immunohistochemistry data.quanTIseq analysis of 8000 tumor samples revealed that cytotoxic T cell infiltration is more strongly associated with the activation of the CXCR3/CXCL9 axis than with mutational load and that deconvolution-based cell scores have prognostic value in several solid cancers. Finally, we used quanTIseq to show how kinase inhibitors modulate the immune contexture and to reveal immune-cell types that underlie differential patients' responses to checkpoint blockers.Availability: quanTIseq is available at http://icbi.at/quantiseq .",2019-05-24 +26558755,Gene Network Rewiring to Study Melanoma Stage Progression and Elements Essential for Driving Melanoma.,"Metastatic melanoma patients have a poor prognosis, mainly attributable to the underlying heterogeneity in melanoma driver genes and altered gene expression profiles. These characteristics of melanoma also make the development of drugs and identification of novel drug targets for metastatic melanoma a daunting task. Systems biology offers an alternative approach to re-explore the genes or gene sets that display dysregulated behaviour without being differentially expressed. In this study, we have performed systems biology studies to enhance our knowledge about the conserved property of disease genes or gene sets among mutually exclusive datasets representing melanoma progression. We meta-analysed 642 microarray samples to generate melanoma reconstructed networks representing four different stages of melanoma progression to extract genes with altered molecular circuitry wiring as compared to a normal cellular state. Intriguingly, a majority of the melanoma network-rewired genes are not differentially expressed and the disease genes involved in melanoma progression consistently modulate its activity by rewiring network connections. We found that the shortlisted disease genes in the study show strong and abnormal network connectivity, which enhances with the disease progression. Moreover, the deviated network properties of the disease gene sets allow ranking/prioritization of different enriched, dysregulated and conserved pathway terms in metastatic melanoma, in agreement with previous findings. Our analysis also reveals presence of distinct network hubs in different stages of metastasizing tumor for the same set of pathways in the statistically conserved gene sets. The study results are also presented as a freely available database at http://bioinfo.icgeb.res.in/m3db/. The web-based database resource consists of results from the analysis presented here, integrated with cytoscape web and user-friendly tools for visualization, retrieval and further analysis.",2015-11-11 +31301672,"Geno2proteo, a Tool for Batch Retrieval of DNA and Protein Sequences from Any Genomic or Protein Regions. ","The interconversion of sequences that constitute the genome and the proteome is becoming increasingly important due to the generation of large amounts of DNA sequence data. Following mapping of DNA segments to the genome, one fundamentally important task is to find the amino acid sequences which are coded within a list of genomic sections. Conversely, given a series of protein segments, an important task is to find the genomic loci which code for a list of protein regions. To perform these tasks on a region by region basis is extremely laborious when a large number of regions are being studied. We have therefore implemented an R package geno2proteo which performs the two mapping tasks and subsequent sequence retrieval in a batch fashion. In order to make the tool more accessible to users, we have created a web interface of the R package which allows the users to perform the mapping tasks by going to the web page http://sharrocksresources.manchester.ac.uk/tofigaps and using the web service.",2019-07-13 +31249848,Data on SVCT2 transporter expression and localization in cancer cell lines and tissues.,"The data presented in this article are related to the research paper entitled ""Increased expression of mitochondrial sodium-coupled ascorbic acid transporter-2 (mitSVCT2) as a central feature in breast cancer"", available in Free Radical Biology and Medicine Journal [1]. In this article, we examined the SVCT2 transporter expression in various breast cancer cell lines using RT-PCR and Western blot assays. In addition, we analyzed the subcellular localization of SVCT2 by immunofluorescence colocalization assays and cellular fractionation experiments. Finally, an analysis of different cancer tissue microarrays immunostained for SVCT2 and imaged by The Human Protein Atlas (https://www.proteinatlas.org) is presented.",2019-05-06 +31022176,TASmania: A bacterial Toxin-Antitoxin Systems database.,"Bacterial Toxin-Antitoxin systems (TAS) are involved in key biological functions including plasmid maintenance, defense against phages, persistence and virulence. They are found in nearly all phyla and classified into 6 different types based on the mode of inactivation of the toxin, with the type II TAS being the best characterized so far. We have herein developed a new in silico discovery pipeline named TASmania, which mines the >41K assemblies of the EnsemblBacteria database for known and uncharacterized protein components of type I to IV TAS loci. Our pipeline annotates the proteins based on a list of curated HMMs, which leads to >2.106 loci candidates, including orphan toxins and antitoxins, and organises the candidates in pseudo-operon structures in order to identify new TAS candidates based on a guilt-by-association strategy. In addition, we classify the two-component TAS with an unsupervised method on top of the pseudo-operon (pop) gene structures, leading to 1567 ""popTA"" models offering a more robust classification of the TAs families. These results give valuable clues in understanding the toxin/antitoxin modular structures and the TAS phylum specificities. Preliminary in vivo work confirmed six putative new hits in Mycobacterium tuberculosis as promising candidates. The TASmania database is available on the following server https://shiny.bioinformatics.unibe.ch/apps/tasmania/.",2019-04-25 +,"Molecular phylogeny and systematics of the Polysphincta group of genera (Hymenoptera, Ichneumonidae, Pimplinae)","The phylogenetic relationships between genera of the Polysphincta group of Pimplinae (Ichneumonidae) were surveyed using molecular markers, partial sequences of cytochrome c oxidase I (COI), 28S rRNA and elongation factor 1α, and maximum likelihood and Bayesian approaches to obtain a robust phylogenetic hypothesis to understand the evolution of the group. The Polysphincta group was recovered as monophyletic, although relationships between genera were different from previous hypotheses based on morphological data. Within the Polysphincta group, three major clades were recognized and phylogenetic relationships among them were well resolved as (Schizopyga subgroup + (Acrodactyla subgroup + Polysphincta subgroup)). The Schizopyga subgroup consisted of the genera Piogaster, Schizopyga, Zabrachypus and Brachyzapus. As the genus Schizopyga was found to be polyphyletic, the genus Dreisbachia, which had been synonymized under Schizopyga, was resurrected and Iania gen.n. is proposed for Dreisbachia pictifrons, to maintain monophyletic genera. Species of the Schizopyga subgroup utilize spiders constructing egg‐laying chambers or funnel webs as hosts. The genus Piogaster was not recovered as the sister to all other members of the genus group, unlike previous hypotheses, but was nested in this clade as (Zabrachypus + ((Brachyzapus + Schizopyga) + (Dreisbachia + (Iania + Piogaster)))). Members of the Acrodactyla and Polysphincta subgroups attack spiders that weave aerial webs. The host range of the former is centred on tetragnathid and linyphiid spiders, the host range of the latter seems to centre mainly on orb‐weaving araneids and partly on theridiids weaving three‐dimensional (3D) irregular webs. Based on the obtained phylogeny of the group, the evolution of larval and cocoon morphology, and the mode of parasitism are discussed. Acrodactyla varicarinata Uchida & Momoi and A. inoperta Kusigemati are transferred to the genus Megaetaira (comb.n.). This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:0AB1086F‐9F23‐4057‐B7ED‐3A3943E19C61.",2016-10-01 +23936191,HSC-explorer: a curated database for hematopoietic stem cells.,"HSC-Explorer (http://mips.helmholtz-muenchen.de/HSC/) is a publicly available, integrative database containing detailed information about the early steps of hematopoiesis. The resource aims at providing fast and easy access to relevant information, in particular to the complex network of interacting cell types and molecules, from the wealth of publications in the field through visualization interfaces. It provides structured information on more than 7000 experimentally validated interactions between molecules, bioprocesses and environmental factors. Information is manually derived by critical reading of the scientific literature from expert annotators. Hematopoiesis-relevant interactions are accompanied with context information such as model organisms and experimental methods for enabling assessment of reliability and relevance of experimental results. Usage of established vocabularies facilitates downstream bioinformatics applications and to convert the results into complex networks. Several predefined datasets (Selected topics) offer insights into stem cell behavior, the stem cell niche and signaling processes supporting hematopoietic stem cell maintenance. HSC-Explorer provides a versatile web-based resource for scientists entering the field of hematopoiesis enabling users to inspect the associated biological processes through interactive graphical presentation.",2013-07-30 +27794554,RNAcentral: a comprehensive database of non-coding RNA sequences.,"RNAcentral is a database of non-coding RNA (ncRNA) sequences that aggregates data from specialised ncRNA resources and provides a single entry point for accessing ncRNA sequences of all ncRNA types from all organisms. Since its launch in 2014, RNAcentral has integrated twelve new resources, taking the total number of collaborating database to 22, and began importing new types of data, such as modified nucleotides from MODOMICS and PDB. We created new species-specific identifiers that refer to unique RNA sequences within a context of single species. The website has been subject to continuous improvements focusing on text and sequence similarity searches as well as genome browsing functionality. All RNAcentral data is provided for free and is available for browsing, bulk downloads, and programmatic access at http://rnacentral.org/.",2016-10-28 +24808858,NeuroElectro: a window to the world's neuron electrophysiology data.,"The behavior of neural circuits is determined largely by the electrophysiological properties of the neurons they contain. Understanding the relationships of these properties requires the ability to first identify and catalog each property. However, information about such properties is largely locked away in decades of closed-access journal articles with heterogeneous conventions for reporting results, making it difficult to utilize the underlying data. We solve this problem through the NeuroElectro project: a Python library, RESTful API, and web application (at http://neuroelectro.org) for the extraction, visualization, and summarization of published data on neurons' electrophysiological properties. Information is organized both by neuron type (using neuron definitions provided by NeuroLex) and by electrophysiological property (using a newly developed ontology). We describe the techniques and challenges associated with the automated extraction of tabular electrophysiological data and methodological metadata from journal articles. We further discuss strategies for how to best combine, normalize and organize data across these heterogeneous sources. NeuroElectro is a valuable resource for experimental physiologists attempting to supplement their own data, for computational modelers looking to constrain their model parameters, and for theoreticians searching for undiscovered relationships among neurons and their properties.",2014-04-29 +29543799,A FAIR guide for data providers to maximise sharing of human genomic data.,"It is generally acknowledged that, for reproducibility and progress of human genomic research, data sharing is critical. For every sharing transaction, a successful data exchange is produced between a data consumer and a data provider. Providers of human genomic data (e.g., publicly or privately funded repositories and data archives) fulfil their social contract with data donors when their shareable data conforms to FAIR (findable, accessible, interoperable, reusable) principles. Based on our experiences via Repositive (https://repositive.io), a leading discovery platform cataloguing all shared human genomic datasets, we propose guidelines for data providers wishing to maximise their shared data's FAIRness.",2018-03-15 +24550246,Cohort profile: UK Millennium Cohort Study (MCS).,"The UK Millennium Cohort Study (MCS) is an observational, multidisciplinary cohort study that was set up to follow the lives of children born at the turn of the new century. The MCS is nationally representative and 18 552 families (18 827 children) were recruited to the cohort in the first sweep. There have currently been five main sweeps of data collection, at ages 9 months and 3, 5, 7 and 11 years. A further sweep of data collection is planned for age 14 years. A range of health-related data have been collected as well as measures concerning child development, cognitive ability and educational attainment. The data also include a wealth of information describing the social, economic and demographic characteristics of the cohort members and their families. In addition, the MCS data have been linked to administrative data resources including health records. The MCS provides a unique and valuable resource for the analysis of health outcomes and health inequalities. The MCS data are freely available to bona fide researchers under standard access conditions via the UK Data Service (http://ukdataservice.ac.uk) and the MCS website provides detailed information on the study (http://www.cls.ioe.ac.uk/mcs).",2014-02-17 +23172286,WDDD: Worm Developmental Dynamics Database.,"During animal development, cells undergo dynamic changes in position and gene expression. A collection of quantitative information about morphological dynamics under a wide variety of gene perturbations would provide a rich resource for understanding the molecular mechanisms of development. Here, we created a database, the Worm Developmental Dynamics Database (http://so.qbic.riken.jp/wddd/), which stores a collection of quantitative information about cell division dynamics in early Caenorhabditis elegans embryos with single genes silenced by RNA-mediated interference. The information contains the three-dimensional coordinate values of the outlines of nuclear regions and the dynamics of the outlines over time. The database provides free access to 50 sets of quantitative data for wild-type embryos and 136 sets of quantitative data for RNA-mediated interference embryos corresponding to 72 of the 97 essential embryonic genes on chromosome III. The database also provides sets of four-dimensional differential interference contrast microscopy images on which the quantitative data were based. The database will provide a novel opportunity for the development of computational methods to obtain fresh insights into the mechanisms of development. The quantitative information and microscopy images can be synchronously viewed through a web browser, which is designed for easy access by experimental biologists.",2012-11-20 +28314850,Temporal Trends and Temperature-Related Incidence of Electrical Storm: The TEMPEST Study (Temperature-Related Incidence of Electrical Storm). ,"The occurrence of ventricular tachyarrhythmias seems to follow circadian, daily, and seasonal distributions. Our aim is to identify potential temporal patterns of electrical storm (ES), in which a cluster of ventricular tachycardias or ventricular fibrillation, negatively affects short- and long-term survival. The TEMPEST study (Circannual Pattern and Temperature-Related Incidence of Electrical Storm) is a patient-level, pooled analysis of previously published data sets. Study selection criteria included diagnosis of ES, absence of acute coronary syndrome as the arrhythmic trigger, and ≥10 patients included. At the end of the selection and collection processes, 5 centers had the data set from their article pooled into the present registry. Temperature data and sunrise and sunset hours were retrieved from Weather Underground, the largest weather database available online. Total sample included 246 patients presenting with ES (221 men; age: 65±9 years). Each ES episode included a median of 7 ventricular tachycardia/ventricular fibrillation episodes. Fifty-nine percent of patients experienced ES during daytime hours (P<0.001). The prevalence of ES was significantly higher during workdays, with Saturdays and Sundays registering the lowest rates of ES (10.4% and 7.2%, respectively, versus 16.5% daily mean from Monday to Friday; P<0.001). ES occurrence was significantly associated with increased monthly temperature range when compared with the month before (P=0.003). ES incidence is not homogenous over time but seems to have a clustered pattern, with a higher incidence during daytime hours and working days. ES is associated with an increase in monthly temperature variation. https://www.crd.york.ac.uk. Unique identifier: CRD42013003744.",2017-03-01 +31894168,High-Energy (>10 MeV) Oxygen and Sulfur Ions Observed at Jupiter From Pulse Width Measurements of the JEDI Sensors.,"The Jovian polar regions produce X-rays that are characteristic of very energetic oxygen and sulfur that become highly charged on precipitating into Jupiter's upper atmosphere. Juno has traversed the polar regions above where these energetic ions are expected to be precipitating revealing a complex composition and energy structure. Energetic ions are likely to drive the characteristic X-rays observed at Jupiter (Haggerty et al., 2017, https://doi.org/10.1002/2017GL072866; Houston et al., 2018, https://doi.org/10.1002/2017JA024872; Kharchenko et al., 2006, https://doi.org/10.1029/2006GL026039). Motivated by the science of X-ray generation, we describe here Juno Jupiter Energetic Particle Detector Instrument (JEDI) measurements of ions above 1 MeV and demonstrate the capability of measuring oxygen and sulfur ions with energies up to 100 MeV. We detail the process of retrieving ion fluxes from pulse width data on instruments like JEDI (called ""puck's""; Clark, Cohen, et al., 2016, https://doi.org/10.1002/2017GL074366; Clark, Mauk, et al., 2016, https://doi.org/10.1002/2015JA022257; Mauk et al., 2013, https://doi.org/10.1007/s11214-013-0025-3) as well as details on retrieving very energetic particles (>20 MeV) above which the pulse width also saturates.",2019-10-01 +30903686,SPRINT-Gly: predicting N- and O-linked glycosylation sites of human and mouse proteins by using sequence and predicted structural properties.,"

Motivation

Protein glycosylation is one of the most abundant post-translational modifications that plays an important role in immune responses, intercellular signaling, inflammation and host-pathogen interactions. However, due to the poor ionization efficiency and microheterogeneity of glycopeptides identifying glycosylation sites is a challenging task, and there is a demand for computational methods. Here, we constructed the largest dataset of human and mouse glycosylation sites to train deep learning neural networks and support vector machine classifiers to predict N-/O-linked glycosylation sites, respectively.

Results

The method, called SPRINT-Gly, achieved consistent results between ten-fold cross validation and independent test for predicting human and mouse glycosylation sites. For N-glycosylation, a mouse-trained model performs equally well in human glycoproteins and vice versa, however, due to significant differences in O-linked sites separate models were generated. Overall, SPRINT-Gly is 18% and 50% higher in Matthews correlation coefficient than the next best method compared in N-linked and O-linked sites, respectively. This improved performance is due to the inclusion of novel structure and sequence-based features.

Availability and implementation

http://sparks-lab.org/server/SPRINT-Gly/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-10-01 +28182744,Organic materials database: An open-access online database for data mining.,"We present an organic materials database (OMDB) hosting thousands of Kohn-Sham electronic band structures, which is freely accessible online at http://omdb.diracmaterials.org. The OMDB focus lies on electronic structure, density of states and other properties for purely organic and organometallic compounds that are known to date. The electronic band structures are calculated using density functional theory for the crystal structures contained in the Crystallography Open Database. The OMDB web interface allows users to retrieve materials with specified target properties using non-trivial queries about their electronic structure. We illustrate the use of the OMDB and how it can become an organic part of search and prediction of novel functional materials via data mining techniques. As a specific example, we provide data mining results for metals and semiconductors, which are known to be rare in the class of organic materials.",2017-02-09 +30562706,"Spatiotemporal continuous estimates of PM2.5 concentrations in China, 2000-2016: A machine learning method with inputs from satellites, chemical transport model, and ground observations.","Ambient exposure to fine particulate matter (PM2.5) is known to harm public health in China. Satellite remote sensing measurements of aerosol optical depth (AOD) were statistically associated with in-situ observations after 2013 to predict PM2.5 concentrations nationwide, while the lack of surface monitoring data before 2013 have created difficulties in historical PM2.5 exposure estimates. Hindcast approaches using statistical models or chemical transport models (CTMs) were developed to overcome this limitation, while those approaches still suffer from incomplete daily coverage due to missing AOD data or limited accuracy due to uncertainties of CTMs. Here we developed a new machine learning (ML) model with high-dimensional expansion (HD-expansion) of numerous predictors (including AOD and other satellite covariates, meteorological variables and CTM simulations). Through comprehensive characterization of the nonlinear effects of, and interactions among different predictors, the HD-expansion parameterized the association between PM2.5 and AOD as a nonlinear function of space and time covariates (e.g., planetary boundary layer height and relative humidity). In this way, the PM2.5-AOD association can vary spatiotemporally. We trained the model with data from 2013 to 2016 and evaluated its performance using annually-iterated cross-validation, which iteratively held out the in-situ observations for a whole calendar year (as testing data) to examine the predictions from a model trained by the rest of the observations. Our estimates were found to be in good agreement with in-situ observations, with correlation coefficients (R2) of 0.61, 0.68, and 0.75 for daily, monthly and annual averages, respectively. To interpolate the missing predictions due to incomplete AOD data, we incorporated a generalized additive model into the ML model. The two-stage estimates of PM2.5 sacrificed the prediction accuracy on a daily timescale (R2 = 0.55), but achieved complete spatiotemporal coverage and improved the accuracy of monthly (R2 = 0.71) and annual (R2 = 0.77) averages. The model was then used to predict daily PM2.5 concentrations during 2000-2016 across China and estimate long-term trends in PM2.5 for the period. We found that population-weighted concentrations of PM2.5 significantly increased, by 2.10 (95% confidence interval (CI): 1.74, 2.46) μg/m3/year during 2000-2007, and rapidly decreased by 4.51 (3.12, 5.90) μg/m3/year during 2013-2016. In this study, we produced AOD-based estimates of historical PM2.5 with complete spatiotemporal coverage, which were evidenced as accurate, particularly in middle and long term. The products could support large-scale epidemiological studies and risk assessments of ambient PM2.5 in China and can be accessed via the website (http://www.meicmodel.org/dataset-phd.html).",2018-12-18 +31016417,DNAmod: the DNA modification database.,"Covalent DNA modifications, such as 5-methylcytosine (5mC), are increasingly the focus of numerous research programs. In eukaryotes, both 5mC and 5-hydroxymethylcytosine (5hmC) are now recognized as stable epigenetic marks, with diverse functions. Bacteria, archaea, and viruses contain various other modified DNA nucleobases. Numerous databases describe RNA and histone modifications, but no database specifically catalogues DNA modifications, despite their broad importance in epigenetic regulation. To address this need, we have developed DNAmod: the DNA modification database. DNAmod is an open-source database ( https://dnamod.hoffmanlab.org ) that catalogues DNA modifications and provides a single source to learn about their properties. DNAmod provides a web interface to easily browse and search through these modifications. The database annotates the chemical properties and structures of all curated modified DNA bases, and a much larger list of candidate chemical entities. DNAmod includes manual annotations of available sequencing methods, descriptions of their occurrence in nature, and provides existing and suggested nomenclature. DNAmod enables researchers to rapidly review previous work, select mapping techniques, and track recent developments concerning modified bases of interest.",2019-04-23 +23055619,PRD: A protein-RNA interaction database.,"

Unlabelled

Although protein-RNA interactions (PRIs) are involved in various important cellular processes, compiled data on PRIs are still limited. This contrasts with protein-protein interactions, which have been intensively recorded in public databases and subjected to network level analysis. Here, we introduce PRD, an online database of PRIs, dispersed across several sources, including scientific literature. Currently, over 10,000 interactions have been stored in PRD using PSI-MI 2.5, which is a standard model for describing detailed molecular interactions, with an emphasis on gene level data. Users can browse all recorded interactions and execute flexible keyword searches against the database via a web interface. Our database is not only a reference of PRIs, but will also be a valuable resource for studying characteristics of PRI networks.

Availability

PRD can be freely accessed at http://pri.hgc.jp/",2012-08-03 +31749431,"In Silico Screening for Anti-inflammatory Bioactive Molecules from Ayurvedic Decoction, Balaguluchyadi kashayam.","

Background

Balaguluchyadi kashayam, a polyherbal Ayurvedic decoction prepared from Sidacordifolia L., Tinospora cordifolia (Willd.) Miers, and Cedrusdeodara (Roxb. ex D.Don) G.Don, is used in Ayurveda for the treatment of chronic inflammatory conditions. Although this herbal decoction has been used for a long period for treating chronic inflammatory conditions, the mechanism of action of the decoction in reducing inflammatory conditions associated with chronic inflammation has not been clearly understood. Mass spectroscopy-based identification of bioactive molecules present in the decoction and its interaction with enzymes/proteins involved in the pathogenesis of chronic inflammation has been carried and reported in this study.

Introduction

Polyherbalism is one of the major principles of Ayurveda. Various phytoconstituents with different activities in the polyherbal decoction act on multi targets of a wide range of diseases. Balaguluchyadi kashayam is a polyherbal decoction prescribed for chronic inflammatory etiologies and the present study aims to evaluate the binding potential of the compounds, identified from Balaguluchyadi kashayam to enzymes/proteins involved in the development and progression of chronic inflammation.

Methods

The bioactive compounds present in the Balaguluchyadi Kashayam fractions were extracted by preparative HPLC and identified using UPLC MS Q-TOF. The physicochemical characteristics and ADMET properties of the compounds were calculated using Mol soft, Swiss ADME and OSIRIS data warrior software. Then the binding interactions between the molecules and the proinflammatory mediators such as 5 Lipoxygenase, Cyclooxygenase 2, Tumor necrosis factoralpha convertase enzyme (TACE) and Caspase 1 were determined using molecular docking software Auto Dock 4.0 (http://autodock.scripps.edu/downloads).

Results

The identified bioactive molecules in the decoction showed a good binding affinity towards the enzymes/proteins involved in the development and progression of chronic inflammation compared to the binding affinity of known inhibitors/drugs to the respective enzymes/proteins.

Conclusion

The bioactive molecules identified in Balaguluchyadi Kashayam could be developed as potential therapeutic molecules against enzymes/proteins involved in the development and progression of chronic inflammation.",2020-01-01 +24185698,LSD 2.0: an update of the leaf senescence database.,"This manuscript describes an update of the leaf senescence database (LSD) previously featured in the 2011 NAR Database Issue. LSD provides comprehensive information concerning senescence-associated genes (SAGs) and their corresponding mutants. We have made extensive annotations for these SAGs through both manual and computational approaches. Recently, we updated LSD to a new version LSD 2.0 (http://www.eplantsenescence.org/), which contains 5356 genes and 322 mutants from 44 species, an extension from the previous version containing 1145 genes and 154 mutants from 21 species. In the current version, we also included several new features: (i) Primer sequences retrieved based on experimental evidence or designed for high-throughput analysis were added; (ii) More than 100 images of Arabidopsis SAG mutants were added; (iii) Arabidopsis seed information obtained from The Arabidopsis Information Resource (TAIR) was integrated; (iv) Subcellular localization information of SAGs in Arabidopsis mined from literature or generated from the SUBA3 program was presented; (v) Quantitative Trait Loci information was added with links to the original database and (vi) New options such as primer and miRNA search for database query were implemented. The updated database will be a valuable and informative resource for basic research of leaf senescence and for the manipulation of traits of agronomically important plants.",2013-10-31 +26363020,Co-LncRNA: investigating the lncRNA combinatorial effects in GO annotations and KEGG pathways based on human RNA-Seq data. ,"Long non-coding RNAs (lncRNAs) are emerging as key regulators of diverse biological processes and diseases. However, the combinatorial effects of these molecules in a specific biological function are poorly understood. Identifying co-expressed protein-coding genes of lncRNAs would provide ample insight into lncRNA functions. To facilitate such an effort, we have developed Co-LncRNA, which is a web-based computational tool that allows users to identify GO annotations and KEGG pathways that may be affected by co-expressed protein-coding genes of a single or multiple lncRNAs. LncRNA co-expressed protein-coding genes were first identified in publicly available human RNA-Seq datasets, including 241 datasets across 6560 total individuals representing 28 tissue types/cell lines. Then, the lncRNA combinatorial effects in a given GO annotations or KEGG pathways are taken into account by the simultaneous analysis of multiple lncRNAs in user-selected individual or multiple datasets, which is realized by enrichment analysis. In addition, this software provides a graphical overview of pathways that are modulated by lncRNAs, as well as a specific tool to display the relevant networks between lncRNAs and their co-expressed protein-coding genes. Co-LncRNA also supports users in uploading their own lncRNA and protein-coding gene expression profiles to investigate the lncRNA combinatorial effects. It will be continuously updated with more human RNA-Seq datasets on an annual basis. Taken together, Co-LncRNA provides a web-based application for investigating lncRNA combinatorial effects, which could shed light on their biological roles and could be a valuable resource for this community. Database URL: http://www.bio-bigdata.com/Co-LncRNA/.",2015-09-10 +28150236,Bioinformatics Analysis of Protein Phosphorylation in Plant Systems Biology Using P3DB.,"Protein phosphorylation is one of the most pervasive protein post-translational modification events in plant cells. It is involved in many plant biological processes, such as plant growth, organ development, and plant immunology, by regulating or switching signaling and metabolic pathways. High-throughput experimental methods like mass spectrometry can easily characterize hundreds to thousands of phosphorylation events in a single experiment. With the increasing volume of the data sets, Plant Protein Phosphorylation DataBase (P3DB, http://p3db.org ) provides a comprehensive, systematic, and interactive online platform to deposit, query, analyze, and visualize these phosphorylation events in many plant species. It stores the protein phosphorylation sites in the context of identified mass spectra, phosphopeptides, and phosphoproteins contributed from various plant proteome studies. In addition, P3DB associates these plant phosphorylation sites to protein physicochemical information in the protein charts and tertiary structures, while various protein annotations from hierarchical kinase phosphatase families, protein domains, and gene ontology are also added into the database. P3DB not only provides rich information, but also interconnects and provides visualization of the data in networks, in systems biology context. Currently, P3DB includes the KiC (Kinase Client) assay network, the protein-protein interaction network, the kinase-substrate network, the phosphatase-substrate network, and the protein domain co-occurrence network. All of these are available to query for and visualize existing phosphorylation events. Although P3DB only hosts experimentally identified phosphorylation data, it provides a plant phosphorylation prediction model for any unknown queries on the fly. P3DB is an entry point to the plant phosphorylation community to deposit and visualize any customized data sets within this systems biology framework. Nowadays, P3DB has become one of the major bioinformatics platforms of protein phosphorylation in plant biology.",2017-01-01 +25352729,FCDD: A Database for Fruit Crops Diseases.,"

Unlabelled

Fruit Crops Diseases Database (FCDD) requires a number of biotechnology and bioinformatics tools. The FCDD is a unique bioinformatics resource that compiles information about 162 details on fruit crops diseases, diseases type, its causal organism, images, symptoms and their control. The FCDD contains 171 phytochemicals from 25 fruits, their 2D images and their 20 possible sequences. This information has been manually extracted and manually verified from numerous sources, including other electronic databases, textbooks and scientific journals. FCDD is fully searchable and supports extensive text search. The main focus of the FCDD is on providing possible information of fruit crops diseases, which will help in discovery of potential drugs from one of the common bioresource-fruits. The database was developed using MySQL. The database interface is developed in PHP, HTML and JAVA. FCDD is freely available.

Availability

http://www.fruitcropsdd.com/",2014-09-30 +23875173,MelanomaDB: A Web Tool for Integrative Analysis of Melanoma Genomic Information to Identify Disease-Associated Molecular Pathways.,"Despite on-going research, metastatic melanoma survival rates remain low and treatment options are limited. Researchers can now access a rapidly growing amount of molecular and clinical information about melanoma. This information is becoming difficult to assemble and interpret due to its dispersed nature, yet as it grows it becomes increasingly valuable for understanding melanoma. Integration of this information into a comprehensive resource to aid rational experimental design and patient stratification is needed. As an initial step in this direction, we have assembled a web-accessible melanoma database, MelanomaDB, which incorporates clinical and molecular data from publically available sources, which will be regularly updated as new information becomes available. This database allows complex links to be drawn between many different aspects of melanoma biology: genetic changes (e.g., mutations) in individual melanomas revealed by DNA sequencing, associations between gene expression and patient survival, data concerning drug targets, biomarkers, druggability, and clinical trials, as well as our own statistical analysis of relationships between molecular pathways and clinical parameters that have been produced using these data sets. The database is freely available at http://genesetdb.auckland.ac.nz/melanomadb/about.html. A subset of the information in the database can also be accessed through a freely available web application in the Illumina genomic cloud computing platform BaseSpace at http://www.biomatters.com/apps/melanoma-profiler-for-research. The MelanomaDB database illustrates dysregulation of specific signaling pathways across 310 exome-sequenced melanomas and in individual tumors and identifies the distribution of somatic variants in melanoma. We suggest that MelanomaDB can provide a context in which to interpret the tumor molecular profiles of individual melanoma patients relative to biological information and available drug therapies.",2013-07-16 +28358862,Comprehensive framework for visualizing and analyzing spatio-temporal dynamics of racial diversity in the entire United States.,"The United States is increasingly becoming a multi-racial society. To understand multiple consequences of this overall trend to our neighborhoods we need a methodology capable of spatio-temporal analysis of racial diversity at the local level but also across the entire U.S. Furthermore, such methodology should be accessible to stakeholders ranging from analysts to decision makers. In this paper we present a comprehensive framework for visualizing and analyzing diversity data that fulfills such requirements. The first component of our framework is a U.S.-wide, multi-year database of race sub-population grids which is freely available for download. These 30 m resolution grids have being developed using dasymetric modeling and are available for 1990-2000-2010. We summarize numerous advantages of gridded population data over commonly used Census tract-aggregated data. Using these grids frees analysts from constructing their own and allows them to focus on diversity analysis. The second component of our framework is a set of U.S.-wide, multi-year diversity maps at 30 m resolution. A diversity map is our product that classifies the gridded population into 39 communities based on their degrees of diversity, dominant race, and population density. It provides spatial information on diversity in a single, easy-to-understand map that can be utilized by analysts and end users alike. Maps based on subsequent Censuses provide information about spatio-temporal dynamics of diversity. Diversity maps are accessible through the GeoWeb application SocScape (http://sil.uc.edu/webapps/socscape_usa/) for an immediate online exploration. The third component of our framework is a proposal to quantitatively analyze diversity maps using a set of landscape metrics. Because of its form, a grid-based diversity map could be thought of as a diversity ""landscape"" and analyzed quantitatively using landscape metrics. We give a brief summary of most pertinent metrics and demonstrate how they can be applied to diversity maps.",2017-03-30 +30740268,LiBiNorm: an htseq-count analogue with improved normalisation of Smart-seq2 data and library preparation diagnostics.,"Protocols for preparing RNA sequencing (RNA-seq) libraries, most prominently ""Smart-seq"" variations, introduce global biases that can have a significant impact on the quantification of gene expression levels. This global bias can lead to drastic over- or under-representation of RNA in non-linear length-dependent fashion due to enzymatic reactions during cDNA production. It is currently not corrected by any RNA-seq software, which mostly focus on local bias in coverage along RNAs. This paper describes LiBiNorm, a simple command line program that mimics the popular htseq-count software and allows diagnostics, quantification, and global bias removal. LiBiNorm outputs gene expression data that has been normalized to correct for global bias introduced by the Smart-seq2 protocol. In addition, it produces data and several plots that allow insights into the experimental history underlying library preparation. The LiBiNorm package includes an R script that allows visualization of the main results. LiBiNorm is the first software application to correct for the global bias that is introduced by the Smart-seq2 protocol. It is freely downloadable at http://www2.warwick.ac.uk/fac/sci/lifesci/research/libinorm.",2019-02-04 +,Biology and phylogenetic placement of a new species of Lasiokapala Ashmead from Argentina (Hymenoptera: Eucharitidae),"Within the ant‐parasitic wasp family Eucharitidae (Hymenoptera), the Kapala clade is a monophyletic group attacking Ectatomminae and Ponerinae. Members often express extreme phenotypic features, especially in the morphology of the paired frenal spines. Although the means of attack and developmental history of the eucharitid wasps within the ant nest are very similar, the means by which they oviposit and optimize encounters of their active first‐instar larvae with ants is highly variable. The relationships and life‐history strategies of Lasiokapala Ashmead (Hymenoptera: Eucharitidae) and related taxa within the Kapala clade are discussed based on phylogenetic analyses of morphological and molecular data. Descriptions are provided for the adults (both sexes), eggs and planidia of Lasiokapala spiralicornis sp.n. from Santiago del Estero (Argentina). Females deposit their eggs on the underside of leaves of Sida cordifolia L. (Malvaceae) and the likely host is postulated to be the genus Ectatomma (Formicidae: Ectatomminae). Even within a closely related group of genera, there is extreme independent divergence in morphology of scutellar spines, antennae and other features, but the larvae and larval biology are highly conserved across a much larger group of Eucharitidae. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:3536985B‐D193‐4771‐BFA3‐25A59EFF6AB5.",2016-07-01 +32082172,SAMbinder: A Web Server for Predicting S-Adenosyl-L-Methionine Binding Residues of a Protein From Its Amino Acid Sequence.,"

Motivation

S-adenosyl-L-methionine (SAM) is an essential cofactor present in the biological system and plays a key role in many diseases. There is a need to develop a method for predicting SAM binding sites in a protein for designing drugs against SAM associated disease. To the best of our knowledge, there is no method that can predict the binding site of SAM in a given protein sequence.

Result

This manuscript describes a method SAMbinder, developed for predicting SAM interacting residue in a protein from its primary sequence. All models were trained, tested, and evaluated on 145 SAM binding protein chains where no two chains have more than 40% sequence similarity. Firstly, models were developed using different machine learning techniques on a balanced data set containing 2,188 SAM interacting and an equal number of non-interacting residues. Our random forest based model developed using binary profile feature got maximum Matthews Correlation Coefficient (MCC) 0.42 with area under receiver operating characteristics (AUROC) 0.79 on the validation data set. The performance of our models improved significantly from MCC 0.42 to 0.61, when evolutionary information in the form of the position-specific scoring matrix (PSSM) profile is used as a feature. We also developed models on a realistic data set containing 2,188 SAM interacting and 40,029 non-interacting residues and got maximum MCC 0.61 with AUROC of 0.89. In order to evaluate the performance of our models, we used internal as well as external cross-validation technique.

Availability and implementation

https://webs.iiitd.edu.in/raghava/sambinder/.",2019-01-01 +27122320,Parallel workflow manager for non-parallel bioinformatic applications to solve large-scale biological problems on a supercomputer.,"Rapid expansion of online resources providing access to genomic, structural, and functional information associated with biological macromolecules opens an opportunity to gain a deeper understanding of the mechanisms of biological processes due to systematic analysis of large datasets. This, however, requires novel strategies to optimally utilize computer processing power. Some methods in bioinformatics and molecular modeling require extensive computational resources. Other algorithms have fast implementations which take at most several hours to analyze a common input on a modern desktop station, however, due to multiple invocations for a large number of subtasks the full task requires a significant computing power. Therefore, an efficient computational solution to large-scale biological problems requires both a wise parallel implementation of resource-hungry methods as well as a smart workflow to manage multiple invocations of relatively fast algorithms. In this work, a new computer software mpiWrapper has been developed to accommodate non-parallel implementations of scientific algorithms within the parallel supercomputing environment. The Message Passing Interface has been implemented to exchange information between nodes. Two specialized threads - one for task management and communication, and another for subtask execution - are invoked on each processing unit to avoid deadlock while using blocking calls to MPI. The mpiWrapper can be used to launch all conventional Linux applications without the need to modify their original source codes and supports resubmission of subtasks on node failure. We show that this approach can be used to process huge amounts of biological data efficiently by running non-parallel programs in parallel mode on a supercomputer. The C++ source code and documentation are available from http://biokinet.belozersky.msu.ru/mpiWrapper .",2016-04-01 +29967725,Specalyzer-an interactive online tool to analyze spectral reflectance measurements.,"Low-cost phenotyping using proximal sensors is increasingly becoming popular in plant breeding. As these techniques generate a large amount of data, analysis pipelines that do not require expertise in computer programming can benefit a broader user base. In this work, a new online tool Specalyzer is presented that allows interactive analysis of the spectral reflectance data generated by proximal spectroradiometers. Specalyzer can be operated from any web browser allowing data uploading, analysis, interactive plots and exporting by point and click using a simple graphical user interface. Specalyzer is evaluated with case study data from a winter wheat fertilizer trial with two fertilizer treatments. Specalyzer can be accessed online at http://www.specalyzer.org.",2018-06-25 +29126995,FVD: The fish-associated virus database.,"With the expanding of marine and freshwater aquaculture, the outbreaks of aquatic animal diseases have increasingly become the major threats to the healthy development of aquaculture industries. Notably, viral infections lead to massive fish deaths and result in great economic loss every year across the world. Hence, it is meaningful to clarify the biodiversity, geographical distribution and host specificity of fish-associated viruses. In this study, viral sequences detected in fish samples were manually collected from public resources, along with the related metadata, such as sampling time, location, specimen type and fish species. Moreover, the information regarding the host fish, including aliases, diet type and geographic distribution were also integrated into a database (FVD). To date, FVD covers the information of 4860 fish-associated viruses belonging to 15 viral families, which were detected from 306 fish species in 57 countries. Meanwhile, sequence alignment, live data statistics and download function are available. Through the user-friendly interface, FVD provides a practical platform that would not only benefit virologists who want to disclose the spread of fish-associated viruses, but also zoologists who focus on the health of domestic and wild animals. Furthermore, it may facilitate the surveillance and prevention of fish viral diseases. Database URL: http://bioinfo.ihb.ac.cn/fvd.",2017-11-08 +30307523,PVsiRNAdb: a database for plant exclusive virus-derived small interfering RNAs. ,"Ribonucleic acids (RNA) interference mechanism has been proved to be an important regulator of both transcriptional and post-transcription controls of gene expression during biotic and abiotic stresses in plants. Virus-derived small interfering RNAs (vsiRNAs) are established components of the RNA silencing mechanism for incurring anti-viral resistance in plants. Some databases like siRNAdb, HIVsirDB and VIRsiRNAdb are available online pertaining to siRNAs as well as vsiRNAs generated during viral infection in humans; however, currently there is a lack of repository for plant exclusive vsiRNAs. We have developed `PVsiRNAdb (http://www.nipgr.res.in/PVsiRNAdb)', a manually curated plant-exclusive database harboring information related to vsiRNAs found in different virus-infected plants collected by exhaustive data mining of published literature so far. This database contains a total of 322 214 entries and 282 549 unique sequences of vsiRNAs. In PVsiRNAdb, detailed and comprehensive information is available for each vsiRNA sequence. Apart from the core information consisting of plant, tissue, virus name and vsiRNA sequence, additional information of each vsiRNAs (map position, length, coordinates, strand information and predicted structure) may be of high utility to the user. Different types of search and browse modules with three different tools namely BLAST, Smith-Waterman Align and Mapping are provided at PVsiRNAdb. Thus, this database being one of its kind will surely be of much use to molecular biologists for exploring the complex viral genetics and genomics, viral-host interactions and beneficial to the scientific community and can prove to be very advantageous in the field of agriculture for producing viral resistance transgenic crops.",2018-01-01 +31199677,Comparison of Bisphenol A and Bisphenol S Percutaneous Absorption and Biotransformation.,"

Background

Bisphenol S (BPS) has been widely substituted for bisphenol A (BPA) on thermal papers, but little is known about its skin absorption.

Objectives

We compared the percutaneous absorption and biotransformation of BPS and BPA in vitro and in a controlled human trial.

Methods

Absorption and biotransformation of BPS and BPA were monitored across reconstructed human epidermis at two environmentally relevant doses over 25 h. In the human trial, five male participants handled thermal receipts containing BPS and washed their hands after 2 h. Urine (0-48 h) and serum (0-7.5h) were analyzed for target bisphenols, and one participant repeated the experiment with extended monitoring. BPS data were compared with published data for isotope-labeled BPA ([Formula: see text]) in the same participants.

Results

At doses of 1.5 and [Formula: see text] applied to reconstructed human epidermis, the permeability coefficient of BPS (0.009 and [Formula: see text], respectively) was significantly lower than for BPA (0.036 and [Formula: see text], respectively), and metabolism of both bisphenols was negligible. In participants handling thermal receipts, the quantities of BPS and [Formula: see text] on hands was significantly correlated with maximum urinary event flux ([Formula: see text]), but the slope was lower for BPS than BPA ([Formula: see text] and 1.1, respectively). As a proportion of total urinary bisphenol, free BPS [[Formula: see text]: [Formula: see text]] was higher than for free BPA ([Formula: see text]). Postexposure maximum urinary BPS concentrations (0.93 to [Formula: see text]; [Formula: see text]) were in the 93-98th percentile range of BPS in background Canadians ([Formula: see text]; [Formula: see text]).

Conclusion

Both the in vitro and human studies suggested lower percutaneous absorption of BPS compared with BPA, but a lower biotransformation efficiency of BPS should also be considered in its evaluation as a BPA substitute. https://doi.org/10.1289/EHP5044.",2019-06-14 +30693157,STABILITYSOFT: A new online program to calculate parametric and non-parametric stability statistics for crop traits.,"

Premise of the study

Access to improved crop cultivars is the foundation for successful agriculture. New cultivars must have improved yields that are determined by quantitative and qualitative traits. Genotype-by-environment interactions (GEI) occur for quantitative traits such as reproductive fitness, longevity, height, weight, yield, and disease resistance. The stability of genotypes across a range of environments can be analyzed using GEI analysis. GEI analysis includes univariate and multivariate analyses with both parametric and non-parametric models.

Methods and results

The program STABILITYSOFT is online software based on JavaScript and R to calculate several univariate parametric and non-parametric statistics for various crop traits. These statistics include Plaisted and Peterson's mean variance component (θ i ), Plaisted's GE variance component (θ (i) ), Wricke's ecovalence stability index (W i 2 ), regression coefficient (b i ), deviation from regression (S di 2 ), Shukla's stability variance (σ i 2 ), environmental coefficient of variance (CV i ), Nassar and Huhn's statistics (S (1) , S (2) ), Huhn's equation (S (3) and S (6) ), Thennarasu's non-parametric statistics (NP (i) ), and Kang's rank-sum. These statistics are important in the identification of stable genotypes; hence, this program can compare and select genotypes across multiple environment trials for a given data set. This program supports both the repeated data across environments and matrix data types. The accuracy of the results obtained from this software was tested on several crop plants.

Conclusions

This new software provides a user-friendly interface to estimate stability statistics accurately for plant scientists, agronomists, and breeders who deal with large volumes of quantitative data. This software can also show ranking patterns of genotypes and describe associations among different statistics with yield performance through a heat map plot. The software is available at https://mohsenyousefian.com/stabilitysoft/.",2019-01-15 +28439289,Bipolarity and suicidal ideation in children and adolescents: a systematic review with meta-analysis.,"

Background

Affective disorders in children and adolescents have received growing attention in the world scenario of mental health. Additionally, there has been an increasing prevalence of suicidal ideation in this population.

Objective

A systematic review with meta-analysis was conducted to demonstrate the main risk factors regarding the development of suicidal ideation in the bipolar disorder.

Methods

This is a systematic review with meta-analysis using the PRISMA protocol (http://www.prisma-statement.org/). This study included secondary data. Original data in mental health were collected by mapping the evidence found in the following electronic databases: MEDLINE/PubMed, LILACS, SciELO, and ScienceDirect in the period from 2005 to 2015.

Results

We found 1418 registrations in such databases, and 46 of them were selected to comprise this review. The result introduces a joint risk between the studies of 2.94 CI [2.29-3.78]. A significant correlation was verified between the risk factors and the suicidal ideation. The result was r (Pearson) = 0.7103 and p value <0.001.

Conclusion

Children and adolescents living with bipolar disorder are more vulnerable to suicidal ideation. These results reinforce the need of a more effective public policy directed toward this population.",2017-04-21 +29726908,VarExp: estimating variance explained by genome-wide GxE summary statistics.,"

Summary

Many genome-wide association studies and genome-wide screening for gene-environment (GxE) interactions have been performed to elucidate the underlying mechanisms of human traits and diseases. When the analyzed outcome is quantitative, the overall contribution of identified genetic variants to the outcome is often expressed as the percentage of phenotypic variance explained. This is commonly done using individual-level genotype data but it is challenging when results are derived through meta-analyses. Here, we present R package, 'VarExp', that allows for the estimation of the percentage of phenotypic variance explained using summary statistics only. It allows for a range of models to be evaluated, including marginal genetic effects, GxE interaction effects and both effects jointly. Its implementation integrates all recent methodological developments and does not need external data to be uploaded by users.

Availability and implementation

The R package is available at https://gitlab.pasteur.fr/statistical-genetics/VarExp.git.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-10-01 +24275494,1000 Genomes Selection Browser 1.0: a genome browser dedicated to signatures of natural selection in modern humans.,"Searching for Darwinian selection in natural populations has been the focus of a multitude of studies over the last decades. Here we present the 1000 Genomes Selection Browser 1.0 (http://hsb.upf.edu) as a resource for signatures of recent natural selection in modern humans. We have implemented and applied a large number of neutrality tests as well as summary statistics informative for the action of selection such as Tajima's D, CLR, Fay and Wu's H, Fu and Li's F* and D*, XPEHH, ΔiHH, iHS, F(ST), ΔDAF and XPCLR among others to low coverage sequencing data from the 1000 genomes project (Phase 1; release April 2012). We have implemented a publicly available genome-wide browser to communicate the results from three different populations of West African, Northern European and East Asian ancestry (YRI, CEU, CHB). Information is provided in UCSC-style format to facilitate the integration with the rich UCSC browser tracks and an access page is provided with instructions and for convenient visualization. We believe that this expandable resource will facilitate the interpretation of signals of selection on different temporal, geographical and genomic scales.",2013-11-25 +31920392,Development and Validation of Nomograms for Predicting the Prognosis of Triple-Negative Breast Cancer Patients Based on 379 Chinese Patients.,"

Purpose

We aimed to construct universally applicable nomograms incorporating prognostic factors to predict the prognosis of patients with triple-negative breast cancer (TNBC).

Patients and methods

Clinicopathological data of 379 patients with TNBC from March 2008 to June 2014 were retrospectively collected and analyzed. The endpoints were disease-free survival (DFS) and overall survival (OS). Patients were randomly divided into a training group and an independent validation group. In the training group, the prognostic factors were screened to develop nomograms. C-index and calibration curves were used to evaluate the predictive accuracy and discriminative ability of nomograms in both groups. The accuracy of the nomograms was also compared with the traditional American Joint Committee on Cancer Tumor-Node-Metastasis anatomical stage (8th edition).

Results

Four prognostic factors (albumin-to-globulin ratio, neutrophil-to-lymphocyte ratio, positive lymph nodes, and tumor size) were used to construct the nomogram of DFS. In addition to the aforementioned factors, age was taken into account in the construction of the OS nomogram. The C-index of the DFS nomogram in the training and validation groups was 0.71 (95% confidence interval [CI]: 0.64-0.77) and 0.69 (95% CI: 0.58-0.79), respectively; the C-index of the OS nomogram was 0.77 (95% CI: 0.70-0.84) and 0.74 (95% CI: 0.62-0.86), respectively. This suggests that the nomograms had high accuracy. Moreover, calibration curves showed good consistencies in both groups. Our models showed superiority in predicting accuracy compared with the AJCC TNM staging system. Furthermore, two web pages of the nomograms were produced: DFS: https://sh-skipper.shinyapps.io/TNBC1/; OS: https://sh-skipper.shinyapps.io/TNBC2/.

Conclusion

These predictive models are simple and easy to use, particularly the web versions. They have certain clinical value in predicting the prognosis of patients with TNBC. They can assist doctors in identifying patients at different prognostic risks and strengthen the treatment or follow-up accordingly.",2019-12-30 +27141961,Enrichr: a comprehensive gene set enrichment analysis web server 2016 update.,"Enrichment analysis is a popular method for analyzing gene sets generated by genome-wide experiments. Here we present a significant update to one of the tools in this domain called Enrichr. Enrichr currently contains a large collection of diverse gene set libraries available for analysis and download. In total, Enrichr currently contains 180 184 annotated gene sets from 102 gene set libraries. New features have been added to Enrichr including the ability to submit fuzzy sets, upload BED files, improved application programming interface and visualization of the results as clustergrams. Overall, Enrichr is a comprehensive resource for curated gene sets and a search engine that accumulates biological knowledge for further biological discoveries. Enrichr is freely available at: http://amp.pharm.mssm.edu/Enrichr.",2016-05-03 +30973798,A Decade of Disproportionality: A State-Level Analysis of African American Students Enrolled in the Primary Disability Category of Speech or Language Impairment.,"Purpose This study aimed to determine if African American students were disproportionately represented between the years of 2004 and 2014 in the primary disability category of Speech or Language Impairment (S/LI) under the 2004 reauthorized Individuals with Disabilities Education Improvement Act. Method S/LI enrollment data from the Office of Special Education Programs and general enrollment data from the National Center for Education Statistics were analyzed to compare the risk of primary S/LI category enrollment of African American students to that of all other students. Risk ratios with 99% confidence intervals were calculated for each state across the 10 years studied. Results An average of 75% of states disproportionately represented African American students in the S/LI category each year; on average, 62% underrepresented African American students, and 14% overrepresented them. A post hoc analysis of the relationship between African American student representation and population densities revealed that states with high African American population densities almost exclusively underrepresented African American students and states with low densities tended toward a proportionate representation. Conclusions African American students were largely underrepresented in the category of S/LI in the years studied. These findings, alongside historic and chronic overrepresentation in other categories of special education, are discussed in the context of the fragmented harm theory ( Payne, 1984 ; Voulgarides, 2018 ; Voulgarides, Zwerger, & Noguera, 2013 ) and the disability rights and critical race theory ( Annamma, Connor, & Ferri, 2013 ). Supplemental Material https://doi.org/10.23641/asha.7967024.",2019-04-01 +27980332,"Antibiotics in aggressive periodontitis, is there a clinical benefit?","Data sourcesMedline, Embase and CENTRAL databases were searched up to December 2014. Unpublished data were sought by searching a database listing unpublished studies OpenGray [http://www.opengrey.eu/], formerly OpenSIGLE.Study selectionRandomised clinical trials assessing treatment of patients with AgP comparing scaling and root planing (SRP) alone with SRP plus a single antibiotic or a combination of drugs with a minimum of three months follow-up were considered. Studies specifically designed to evaluate smokers or subjects with diabetes mellitus and HIV/AIDS were not included.Data extraction and synthesisTwo researchers independently screened titles, abstracts and full texts of the search results. The studies, which fulfilled inclusion criteria, underwent validity assessment and data extraction using a specifically designed form. The quality of included studies was assessed using the Cochranes collaboration tool for risk of bias. Only two of the 11 included trials were considered at a low risk of bias. The data extracted from ten studies was analysed by pair-wise meta-analyses and the data extracted from five studies was included in Bayesian network meta-analyses pooled estimates. The six studies evaluated in the pairwise meta-analyses were excluded in the pooled estimates because four studies included patients with advanced disease only and one study did not present average data for pocket depth (PD) and clinical attachment level (CAL) and another one for using a different mechanical treatment.ResultsFourteen studies reporting 11 randomised clinical trials with a total of 388 patients were included in the review. Nine of 11 studies reported a statistically significant greater gain in full mouth mean clinical attachment (CA) and reduction in probing depth (PD) when a systemic antibiotic was used. From those studies the calculated mean difference for CA gained was 1.08 mm (p < 0.0001) and for PD reduction was 1.05 mm (p< 0.00001) for SRP + Metronidazole (Mtz). For SRP + Mtz+ amoxicillin (Amx) group the mean difference was 0.45 mm for CA gained and 0.53 mm for PD reduction (p<0.00001) than SRP alone/placebo. Bayesian network meta-analysis showed some additional benefits in CA gain and PD reduction when SRP was associated with systemic antibiotics.ConclusionsThe results support a statistically significant benefit of adjunctive systemic antibiotics in the treatment of AgP. The most consistent advantages - reduction in PD and CAL gain - were attained with the use of Mtz and Mtz + Amx. Future RCTs should be designed in order to directly compare these two antibiotic protocols in the treatment of AgP.",2016-12-01 +30423064,Towards an accurate and efficient heuristic for species/gene tree co-estimation.,"

Motivation

Species and gene trees represent how species and individual loci within their genomes evolve from their most recent common ancestors. These trees are central to addressing several questions in biology relating to, among other issues, species conservation, trait evolution and gene function. Consequently, their accurate inference from genomic data is a major endeavor. One approach to their inference is to co-estimate species and gene trees from genome-wide data. Indeed, Bayesian methods based on this approach already exist. However, these methods are very slow, limiting their applicability to datasets with small numbers of taxa. The more commonly used approach is to first infer gene trees individually, and then use gene tree estimates to infer the species tree. Methods in this category rely significantly on the accuracy of the gene trees which is often not high when the dataset includes closely related species.

Results

In this work, we introduce a simple, yet effective, iterative method for co-estimating gene and species trees from sequence data of multiple, unlinked loci. In every iteration, the method estimates a species tree, uses it as a generative process to simulate a collection of gene trees, and then selects gene trees for the individual loci from among the simulated gene trees by making use of the sequence data. We demonstrate the accuracy and efficiency of our method on simulated as well as biological data, and compare them to those of existing competing methods.

Availability and implementation

The method has been implemented in PhyloNet, which is publicly available at http://bioinfocs.rice.edu/phylonet.",2018-09-01 +27932926,"A dataset of fishes in and around Inle Lake, an ancient lake of Myanmar, with DNA barcoding, photo images and CT/3D models.","

Background

Inle (Inlay) Lake, an ancient lake of Southeast Asia, is located at the eastern part of Myanmar, surrounded by the Shan Mountains. Detailed information on fish fauna in and around the lake has long been unknown, although its outstanding endemism was reported a century ago.

New information

Based on the fish specimens collected from markets, rivers, swamps, ponds and ditches around Inle Lake as well as from the lake itself from 2014 to 2016, we recorded a total of 948 occurrence data (2120 individuals), belonging to 10 orders, 19 families, 39 genera and 49 species. Amongst them, 13 species of 12 genera are endemic or nearly endemic to the lake system and 17 species of 16 genera are suggested as non-native. The data are all accessible from the document ""A dataset of Inle Lake fish fauna and its distribution (http://ipt.pensoft.net/resource.do?r=inle_fish_2014-16)"", as well as DNA barcoding data (mitochondrial COI) for all species being available from the DDBJ/EMBL/GenBank (Accession numbers: LC189568-LC190411). Live photographs of almost all the individuals and CT/3D model data of several specimens are also available at the graphical fish biodiversity database (http://ffish.asia/INLE2016; http://ffish.asia/INLE2016-3D). The information can benefit the clarification, public concern and conservation of the fish biodiversity in the region.",2016-11-09 +28781262,[Correction of the electrophoretic shift in virtual 2D SDS-PAGE electrophoresis].,"Virtual electrophoresis in proteomics can be used to search localization of proteins and their proteoforms (especially those existing in low concentrations), to identify proteoforms found in experiments etc. Although the problem of predicting the isoelectric point is well studied, the need of electrophoretic shift correction is usually ignored. Researchers simply use the brutto molecular weight of the protein. In this study four data sets taken from the literature sources and the SWISS-2DPAGE database have been used to build correction equations for prediction of the electrophoretic shift (123, 72, 118 and 470 points, respectively). Two groups of models were built. The first model was based on the amino acid composition of proteins, the second one, on analysis of parameters calculated by amino acid sequences (theoretical molecular weight, hydrophobicity, charge distribution, ability to form helix structures). The coefficient of determination ranged from 0.35 to 0.75 in each single set, but cross-prediction between samples did not gave satisfactory results. At the same time, the direction of correction was predicted correctly in 74% of cases. After combining of the samples and dividing pooled data into 2 representative sets, the coefficient of determination during in the process of learning ranged from 0.44 to 0.51, and R2 of predictions were not less than 0.39. The direction of correction was predicted correctly in 80% of cases. This prediction models have been integrated into the program pIPredict v.2, freely available at http://www.ibmc.msk.ru/LPCIT/pIPredict.",2017-05-01 +29300827,MIIC online: a web server to reconstruct causal or non-causal networks from non-perturbative data.,"

Summary

We present a web server running the MIIC algorithm, a network learning method combining constraint-based and information-theoretic frameworks to reconstruct causal, non-causal or mixed networks from non-perturbative data, without the need for an a priori choice on the class of reconstructed network. Starting from a fully connected network, the algorithm first removes dispensable edges by iteratively subtracting the most significant information contributions from indirect paths between each pair of variables. The remaining edges are then filtered based on their confidence assessment or oriented based on the signature of causality in observational data. MIIC online server can be used for a broad range of biological data, including possible unobserved (latent) variables, from single-cell gene expression data to protein sequence evolution and outperforms or matches state-of-the-art methods for either causal or non-causal network reconstruction.

Availability and implementation

MIIC online can be freely accessed at https://miic.curie.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +21335611,"B2G-FAR, a species-centered GO annotation repository.","

Motivation

Functional genomics research has expanded enormously in the last decade thanks to the cost reduction in high-throughput technologies and the development of computational tools that generate, standardize and share information on gene and protein function such as the Gene Ontology (GO). Nevertheless, many biologists, especially working with non-model organisms, still suffer from non-existing or low-coverage functional annotation, or simply struggle retrieving, summarizing and querying these data.

Results

The Blast2GO Functional Annotation Repository (B2G-FAR) is a bioinformatics resource envisaged to provide functional information for otherwise uncharacterized sequence data and offers data mining tools to analyze a larger repertoire of species than currently available. This new annotation resource has been created by applying the Blast2GO functional annotation engine in a strongly high-throughput manner to the entire space of public available sequences. The resulting repository contains GO term predictions for over 13.2 million non-redundant protein sequences based on BLAST search alignments from the SIMAP database. We generated GO annotation for approximately 150 000 different taxa making available 2000 species with the highest coverage through B2G-FAR. A second section within B2G-FAR holds functional annotations for 17 non-model organism Affymetrix GeneChips.

Conclusions

B2G-FAR provides easy access to exhaustive functional annotation for 2000 species offering a good balance between quality and quantity, thereby supporting functional genomics research especially in the case of non-model organisms.

Availability

The annotation resource is available at http://www.b2gfar.org.",2011-02-18 +30150755,XCMS-MRM and METLIN-MRM: a cloud library and public resource for targeted analysis of small molecules.,"We report XCMS-MRM and METLIN-MRM ( http://xcmsonline-mrm.scripps.edu/ and http://metlin.scripps.edu/ ), a cloud-based data-analysis platform and a public multiple-reaction monitoring (MRM) transition repository for small-molecule quantitative tandem mass spectrometry. This platform provides MRM transitions for more than 15,500 molecules and facilitates data sharing across different instruments and laboratories.",2018-08-27 +25232097,The Digital Ageing Atlas: integrating the diversity of age-related changes into a unified resource.,"Multiple studies characterizing the human ageing phenotype have been conducted for decades. However, there is no centralized resource in which data on multiple age-related changes are collated. Currently, researchers must consult several sources, including primary publications, in order to obtain age-related data at various levels. To address this and facilitate integrative, system-level studies of ageing we developed the Digital Ageing Atlas (DAA). The DAA is a one-stop collection of human age-related data covering different biological levels (molecular, cellular, physiological, psychological and pathological) that is freely available online (http://ageing-map.org/). Each of the >3000 age-related changes is associated with a specific tissue and has its own page displaying a variety of information, including at least one reference. Age-related changes can also be linked to each other in hierarchical trees to represent different types of relationships. In addition, we developed an intuitive and user-friendly interface that allows searching, browsing and retrieving information in an integrated and interactive fashion. Overall, the DAA offers a new approach to systemizing ageing resources, providing a manually-curated and readily accessible source of age-related changes.",2014-09-17 +31630971,Identification of transcriptional isoforms associated with survival in cancer patient.,"The Cancer Genome Atlas (TCGA) project produced RNA-Seq data for tens of thousands of cancer and non-cancer samples with clinical survival information, providing an unprecedented opportunity for analyzing prognostic genes and their isoforms. In this study, we performed the first large-scale identification of transcriptional isoforms that are specifically associated with patient prognosis, even without gene-level association. These specific isoforms are defined as Transcripts Associated with Patient Prognosis (TAPPs). Although a group of TAPPs are the principal isoforms of their genes with intact functional protein domains, another group of TAPPs lack important protein domains found in their canonical gene isoforms. This dichotomy in the distribution of protein domains may indicate different patterns of TAPPs association with cancer. TAPPs in protein-coding genes, especially those with altered protein domains, are rich in known cancer driver genes. We further identified multiple types of cancer recurrent TAPPs, such as DCAF17-201, providing a new approach for the detection of cancer-associated events. In order to make the wide research community to study prognostic isoforms, we developed a portal named GESUR (http://gesur.cancer-pku.cn/), which illustrates the detailed prognostic characteristics of TAPPs and other isoforms. Overall, our integrated analysis of gene expression and clinical parameters provides a new perspective for understanding the applications of different gene isoforms in tumor progression.",2019-09-25 +31551362,Epigenomic Profiling Discovers Trans-lineage SOX2 Partnerships Driving Tumor Heterogeneity in Lung Squamous Cell Carcinoma.,"Molecular characterization of lung squamous cell carcinoma (LUSC), one of the major subtypes of lung cancer, has not sufficiently improved its nonstratified treatment strategies over decades. Accumulating evidence suggests that lineage-specific transcriptional regulators control differentiation states during cancer evolution and underlie their distinct biological behaviors. In this study, by investigating the super-enhancer landscape of LUSC, we identified a previously undescribed ""neural"" subtype defined by Sox2 and a neural lineage factor Brn2, as well as the classical LUSC subtype defined by Sox2 and its classical squamous partner p63. Robust protein-protein interaction and genomic cooccupancy of Sox2 and Brn2, in place for p63 in the classical LUSC, indicated their transcriptional cooperation imparting this unique lineage state in the ""neural"" LUSC. Forced expression of p63 downregulated Brn2 in the ""neural"" LUSC cells and invoked the classical LUSC lineage with more squamous/epithelial features, which were accompanied by increased activities of ErbB/Akt and MAPK-ERK pathways, suggesting differential dependency. Collectively, our data demonstrate heterogeneous cell lineage states of LUSC featured by Sox2 cooperation with Brn2 or p63, for which distinct therapeutic approaches may be warranted. SIGNIFICANCE: Epigenomic profiling reveals a novel subtype of lung squamous cell carcinoma with neural differentiation.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/79/24/6084/F1.large.jpg.",2019-09-24 +30994344,Chemical Composition of Commercial Cow's Milk.,"Bovine milk is a nutritionally rich, chemically complex biofluid consisting of hundreds of different components. While the chemical composition of cow's milk has been studied for decades, much of this information is fragmentary and very dated. In an effort to consolidate and update this information, we have applied modern, quantitative metabolomics techniques along with computer-aided literature mining to obtain the most comprehensive and up-to-date characterization of the chemical constituents in commercial cow's milk. Using nuclear magnetic resonance (NMR) spectroscopy, liquid chromatography-mass spectrometry (LC-MS), and inductively coupled plasma-mass spectrometry (ICP-MS), we were able to identify and quantify 296 bovine milk metabolites or metabolite species (corresponding to 1447 unique structures) from a variety of commercial milk samples. Through our literature analysis, we also found another 676 metabolites or metabolite species (corresponding to 908 unique structures). Detailed information regarding all 2355 of the identified chemicals in bovine milk have been made freely available through a Web-accessible database called the Milk Composition Database or MCDB ( http://www.mcdb.ca/ ).",2019-04-17 +31129987,Gaps in Understanding of the Epidemiology of Mood and Anxiety Disorders among Migrant Groups in Canada: A Systematic Review.,"

Objective

Estimates of mood and anxiety disorders are highly variable among migrant groups, as they are influenced by the socio-political context. Our objective was to conduct a systematic review and meta-analysis to synthesize available Canadian evidence on the prevalence and incidence of mood and anxiety disorders among migrant groups.

Methods

Studies were identified from MEDLINE, EMBASE, and PsycINFO. They were included if they used population-based samples, presented data on the incidence or prevalence of diagnosed or self-reported mood or anxiety disorders for first-generation migrant groups in Canada, and used a Canadian-born or long-term resident reference group.

Results

Nineteen studies met our inclusion criteria. Prevalence ratios ranged from 0.48 to 0.87, and nearly all estimates were obtained from population health surveys. Prevalence estimates among migrant groups were lower than the reference group, with the 90th percentile of estimates ranging from 1.5% to 8.2%. Risk factors for mood and anxiety disorders among migrants included being female, younger, unemployed, having lower income, and living in neighborhoods with a lower proportion of migrants.

Conclusions

There remain many gaps in our current understanding of mood and anxiety disorders among migrant groups in Canada. Although evidence suggests the prevalence of mood and anxiety disorders are consistently lower among migrant groups, a lack of incidence estimates limits the strength of this conclusion. Future research should focus on comparisons of self-reported and diagnosed estimates, the use of a range of different primary or secondary data sources, and consideration of important risk factors.

Prospero citation

Jordan Edwards, Malini Hu, Amardeep Thind, Saverio Stranges, Maria Chiu, Kelly Anderson. The burden of mood and anxiety disorders among immigrant and refugee populations in Canada: a systematic review. PROSPERO 2018 CRD42018087869 Available from: http://www.crd.york.ac.uk/PROSPERO/display_record.php?ID=CRD42018087869 .",2019-05-26 +26697753,DLGP: A database for lineage-conserved and lineage-specific gene pairs in animal and plant genomes.,"The conservation of gene organization in the genome with lineage-specificity is an invaluable resource to decipher their potential functionality with diverse selective constraints, especially in higher animals and plants. Gene pairs appear to be the minimal structure for such kind of gene clusters that tend to reside in their preferred locations, representing the distinctive genomic characteristics in single species or a given lineage. Despite gene families having been investigated in a widespread manner, the definition of gene pair families in various taxa still lacks adequate attention. To address this issue, we report DLGP (http://lcgbase.big.ac.cn/DLGP/) that stores the pre-calculated lineage-based gene pairs in currently available 134 animal and plant genomes and inspect them under the same analytical framework, bringing out a set of innovational features. First, the taxonomy or lineage has been classified into four levels such as Kingdom, Phylum, Class and Order. It adopts all-to-all comparison strategy to identify the possible conserved gene pairs in all species for each gene pair in certain species and reckon those that are conserved in over a significant proportion of species in a given lineage (e.g. Primates, Diptera or Poales) as the lineage-conserved gene pairs. Furthermore, it predicts the lineage-specific gene pairs by retaining the above-mentioned lineage-conserved gene pairs that are not conserved in any other lineages. Second, it carries out pairwise comparison for the gene pairs between two compared species and creates the table including all the conserved gene pairs and the image elucidating the conservation degree of gene pairs in chromosomal level. Third, it supplies gene order browser to extend gene pairs to gene clusters, allowing users to view the evolution dynamics in the gene context in an intuitive manner. This database will be able to facilitate the particular comparison between animals and plants, between vertebrates and arthropods, and between monocots and eudicots, accounting for the significant contribution of gene pairs to speciation and diversification in specific lineages.",2015-12-15 +28991755,Robust Discriminant Regression for Feature Extraction.,"Ridge regression (RR) and its extended versions are widely used as an effective feature extraction method in pattern recognition. However, the RR-based methods are sensitive to the variations of data and can learn only limited number of projections for feature extraction and recognition. To address these problems, we propose a new method called robust discriminant regression (RDR) for feature extraction. In order to enhance the robustness, the L2,1-norm is used as the basic metric in the proposed RDR. The designed robust objective function in regression form can be solved by an iterative algorithm containing an eigenfunction, through which the optimal orthogonal projections of RDR can be obtained by eigen decomposition. The convergence analysis and computational complexity are presented. In addition, we also explore the intrinsic connections and differences between the RDR and some previous methods. Experiments on some well-known databases show that RDR is superior to the classical and very recent proposed methods reported in the literature, no matter the L2-norm or the L2,1-norm-based regression methods. The code of this paper can be downloaded from http://www.scholat.com/laizhihui.",2017-10-09 +31659097,Whole Transcriptomic Analysis of Apigenin on TNFα Immuno-activated MDA-MB-231 Breast Cancer Cells.,"

Background

Triple-negative breast cancer is categorized by a lack of hormone receptors, inefficacy of anti-estrogen or aromatase inhibitor chemotherapies and greater mortality rates in African American populations. Advanced-stage breast tumors have a high concentration of tumor necrosis factor-α (TNFα) throughout the tumor/stroma milieu, prompting sustained release of diverse chemokines (i.e. C-C motif chemokine ligand 2 (CCL2)/CCL5). These potent chemokines can subsequently direct mass infiltration of leukocyte sub-populations to lodge within the tumor, triggering a loss of tumor immune surveillance and subsequent rapid tumor growth. Previously, we demonstrated that in the MDA-MB-231 TNBC cell line, TNFα evoked a rise in immune signaling proteins: CCL2, granulocyte macrophage colony-stimulating factor, interleukin (IL)1α, IL6 and inhibitor of nuclear factor kappa-B kinase subunit epsilon (IKBKε) all of which were attenuated by apigenin, a dietary flavonoid found in chamomile and parsley.

Materials and methods

The present work elucidates changes evoked by TNFα in the presence or absence of apigenin by examining the entire transcriptome for mRNA and long intergenic non-coding RNA with Affymetrix Hugene-2.1_ST human microarrays. Differential gene-expression analysis was conducted on 48,226 genes.

Results

TNFα caused up-regulation of 75 genes and down-regulation of 10. Of these, apigenin effectively down-regulated 35 of the 75 genes which were up-regulated by TNFα. These findings confirm our previous work, specifically for the TNFα-evoked spike in IL1A vs. untreated controls [+21-fold change (FC), p<0.0001] being attenuated by apigenin in the presence of TNFa (-15 FC vs. TNFα, p<0.0001). Similar trends were seen for apigenin-mediated down-regulation of TNFα-up-regulated transcripts: IKBKE (TNFα: 4.55 FC vs. control, p<0.001; and TNFα plus apigenin: -4.92 FC, p<0.001), CCL2 (2.19 FC, p<0.002; and -2.12 FC, p<0.003), IL6 (3.25 FC, p<0.020; and -2.85 FC, p<0.043) and CSF2 (TNFα +6.04 FC, p<0.001; and -2.36 FC, p<0.007). In addition, these data further establish more than a 65% reduction by apigenin for the following transcripts which were also up-regulated by TNFα: cathepsin S (CTSS), complement C3 (C3), laminin subunit gamma 2 (LAMC2), (TLR2), toll-like receptor 2 G protein-coupled receptor class C group 5 member B (GPRC5B), contactin-associated protein 1 (CNTNAP1), claudin 1 (CLDN1), nuclear factor of activated T-cells 2 (NFATC2), C-X-C motif chemokine ligand 10 (CXCL10), CXCL11, interleukin 1 receptor-associated kinase 3 (IRAK3), nuclear receptor subfamily 3 group C member 2 (NR3C2), interleukin 32 (IL32), IL24, slit guidance ligand 2 (SLIT2), transmembrane protein 132A (TMEM132A), TMEM171, signal transducing adaptor family member 2 (STAP2), mixed lineage kinase domain-like pseudokinase (MLKL), kinase insert domain receptor (KDR), BMP-binding endothelial regulator (BMPER), and kelch-like family member 36 (KLHL36).

Conclusion

There is a possible therapeutic role for apigenin in down-regulating diverse genes associated with tumorigenic leukocyte sub-population infiltration by triple-negative breast cancer. The data have been deposited into the Gene Expression Omnibus for public analysis at https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE120550.",2019-11-01 +31618061,Lexical-Level Predictors of Reading Comprehension in Third Grade: Is Spelling a Unique Contributor?,"Purpose Considerable research effort has focused on understanding reading comprehension and reading comprehension difficulties. The purpose of this correlational study was to add to the small but growing body of literature on the role that spelling may play in reading comprehension, by investigating the full range of lexical-level literacy skills and whether spelling makes a unique contribution. This study also explored whether these relations vary with the spelling scoring metric. Method Data were collected from 63 children attending Grade 3 in a Midwestern state. In addition to measuring reading comprehension, word recognition, and vocabulary, 4 spelling scoring metrics were examined: the number of words spelled correctly, the number of correct letter sequences (CLS), and Spelling Sensitivity Scores for elements and for words. Results All spelling metrics were significantly correlated with reading comprehension. Results of hierarchical regressions showed that spelling was a significant, unique predictor of reading comprehension when the CLS metric was used. The scoring metrics were differentially related to reading comprehension. Metrics that gave credit based on orthographic precision only (number of words spelled correctly and CLS) were more highly related to reading comprehension than metrics that scored not only on orthographic accuracy but also on phonological and morphological accuracy (Spelling Sensitivity Scores for elements and for words). Conclusion These results indicate that spelling is related to reading comprehension and have theoretical and clinical implications for the use of spelling assessment. Supplemental Material https://doi.org/10.23641/asha.9947216.",2019-10-16 +30991958,Phylogenetic mapping of scale nanostructure diversity in snakes.,"

Background

Many species of snakes exhibit epidermal surface nanostructures that form complex motifs conferring self-cleaning properties, and sometimes structural iridescence, to their skin.

Results

Using confocal microscopy, we show that these specialised cells can be greatly elongated along their left-right axis and that different types of nanostructures are generated by cell borders and cell surface. To characterise the complexity and diversity of these surface gratings, we analysed scanning electron microscopy images of skin sheds from 353 species spanning 19 of the 26 families of snakes and characterised the observed nanostructures with four characters. The full character matrix, as well as one representative SEM image of each of the corresponding species, is available as a MySQL relational database at https://snake-nanogratings.lanevol.org . We then performed continuous-time Markov phylogenetic mapping on the snake phylogeny, providing an evolutionary dynamical estimate for the different types of nanostructures. These analyses suggest that the presence of cell border digitations is the ancestral state for snake skin nanostructures which was subsequently and independently lost in multiple lineages. Our analyses also indicate that cell shape and cell border shape are co-dependent characters whereas we did not find correlation between a simple life habit classification and any specific nanomorphological character.

Conclusions

These results, compatible with the fact that multiple types of nanostructures can generate hydrophobicity, suggest that the diversity and complexity of snake skin surface nano-morphology are dominated by phylogenetic rather than habitat-specific functional constraints. The present descriptive study opens the perspective of investigating the cellular self-organisational cytoskeletal processes controlling the patterning of different skin surface nanostructures in snakes and lizards.",2019-04-16 +30906874,Secondary Outcomes From the Child/Adolescent Anxiety Multimodal Study: Implications for Clinical Practice.,"

Background

Controlled evaluations comparing medication, cognitive-behavioral therapy (CBT), and their combination in the treatment of youth anxiety have predominantly focused on global ratings by independent evaluators. Such ratings are resource-intensive, may be of limited generalizability, and do not directly inform our understanding of treatment responses from the perspective of treated families. We examined outcomes from the perspective of treated youth and parents in the Child/Adolescent Anxiety Multimodal Study (CAMS).

Methods

Participants (N=488; ages 7-17 years) who had a primary diagnosis of separation, social, and/or generalized anxiety disorder were randomly assigned to a treatment condition in the CAMS trial. Linear mixed-effects and ANCOVA models examined parent- and youth-reported anxiety symptoms, impact of anxiety, broader internalizing and externalizing psychopathology, depressive symptoms, and family burden throughout the 12-week acute treatment phase and 6-month follow-up.

Results

At week 12, combination treatment showed superiority over placebo, sertraline, and CBT with regard to parent-reported youth anxiety symptoms, and sertraline and CBT as monotherapies showed superiority over placebo with regard to parent-reported youth anxiety. Combination therapy and sertraline also showed week 12 superiority over placebo with regard to parent-reported internalizing psychopathology, and superiority over placebo and CBT with regard to parent-reported impact of anxiety, family burden, and youth depressive symptoms. By week 36, parent reports of many youth outcomes were comparable across active conditions. Youth measures tracked parent measures on many outcomes.

Conclusions

Findings were drawn on brief, readily available questionnaires that in conjunction with clinician measures can inform patient-centered care and collaborative decision-making.Trial Registry Name: Child and Adolescent Anxiety Disorders (CAMS)Registry identification number: NCT00052078Registry URL: https://www.clinicaltrials.gov/ct2/show/NCT00052078.",2017-12-05 +27547241,"msBiodat analysis tool, big data analysis for high-throughput experiments.","

Background

Mass spectrometry (MS) are a group of a high-throughput techniques used to increase knowledge about biomolecules. They produce a large amount of data which is presented as a list of hundreds or thousands of proteins. Filtering those data efficiently is the first step for extracting biologically relevant information. The filtering may increase interest by merging previous data with the data obtained from public databases, resulting in an accurate list of proteins which meet the predetermined conditions.

Results

In this article we present msBiodat Analysis Tool, a web-based application thought to approach proteomics to the big data analysis. With this tool, researchers can easily select the most relevant information from their MS experiments using an easy-to-use web interface. An interesting feature of msBiodat analysis tool is the possibility of selecting proteins by its annotation on Gene Ontology using its Gene Id, ensembl or UniProt codes.

Conclusion

The msBiodat analysis tool is a web-based application that allows researchers with any programming experience to deal with efficient database querying advantages. Its versatility and user-friendly interface makes easy to perform fast and accurate data screening by using complex queries. Once the analysis is finished, the result is delivered by e-mail. msBiodat analysis tool is freely available at http://msbiodata.irb.hr.",2016-08-19 +30944051,Video for Knowledge Translation: Engaging Older Adults in Social and Physical Activity.,"Une vaste majorité des Canadiens âgés ne pratiquent pas suffisamment d'activité physique. Le développement de stratégies originales et innovantes encourageant et appuyant les modes de vie actifs est donc urgent. La vidéo est un outil prometteur pour l'application de connaissances (AC) visant l'engagement de divers publics dans la discussion et l'adoption de comportements favorisant la santé. L'Approche systématique pour les vidéos fondées sur des données probantes (Systematic Approach to Evidence-informed Video, SAEV), qui fournit un cadre pour guider et structurer le développement de vidéos ayant pour objectif l'AC, a été utilisée pour la création et la diffusion d'un documentaire de 19 minutes, I'd Rather Stay (https://vimeo.com/80503957). Quarante-huit participants âgés de 60 ans et plus ont visionné la vidéo, participé à des groupes de discussion et rempli des questionnaires concernant cette vidéo. Les données ont été recueillies après le visionnement et lors d'un suivi organisé six mois plus tard. La vidéo a éduqué, encouragé et mobilisé les personnes âgées sur les questions liées à l'autonomie, à l'activité physique et aux liens sociaux. Nous encourageons les chercheurs à adopter des stratégies d'AC auxquelles les personnes âgées peuvent s'identifier, qui sont accessibles et par lesquelles elles peuvent s'engager à un niveau critique, autant sur le plan émotionnel qu'intellectuel, comme les vidéos basées sur des preuves scientifiques.Most older Canadians do not engage in sufficient physical activity. There is an urgent need for outside-the-box strategies that encourage and sustain active lifestyles. Video is a promising knowledge translation (KT) tool to engage diverse audiences in discussion and action around health promoting behaviours. We adopted a KT framework to inform a structured process of video development we have named systematic approach to evidence-informed video (SAEV). This guided the creation and dissemination of a 19-minute documentary video: I’d Rather Stay (https://vimeo.com/80503957). Following screenings, we collected focus group and questionnaire data from 48 participants aged 60 years and older at baseline and 6-month follow-up. The video educated, encouraged, and activated older people around issues such as independence, physical activity and social connectedness. We encourage researchers to adopt KT strategies – and to use evidence-informed video – that older adults can relate to and critically engage with on an accessible, emotional, and intellectual level.",2020-03-01 +27899642,The UCSC Genome Browser database: 2017 update.,"Since its 2001 debut, the University of California, Santa Cruz (UCSC) Genome Browser (http://genome.ucsc.edu/) team has provided continuous support to the international genomics and biomedical communities through a web-based, open source platform designed for the fast, scalable display of sequence alignments and annotations landscaped against a vast collection of quality reference genome assemblies. The browser's publicly accessible databases are the backbone of a rich, integrated bioinformatics tool suite that includes a graphical interface for data queries and downloads, alignment programs, command-line utilities and more. This year's highlights include newly designed home and gateway pages; a new 'multi-region' track display configuration for exon-only, gene-only and custom regions visualization; new genome browsers for three species (brown kiwi, crab-eating macaque and Malayan flying lemur); eight updated genome assemblies; extended support for new data types such as CRAM, RNA-seq expression data and long-range chromatin interaction pairs; and the unveiling of a new supported mirror site in Japan.",2016-11-29 +28090394,GExplore 1.4: An expanded web interface for queries on Caenorhabditis elegans protein and gene function.,"Genetic high-throughput experiments often result in hundreds or thousands of genes satisfying certain experimental conditions. Grouping and prioritizing a large number of genes for further analysis can be a time-consuming challenge. In 2009 we developed a web-based user interface, GExplore, to assist with large-scale data-mining related to gene function in Caenorhabditis elegans. The underlying database contained information about Caenorhabditis elegans genes and proteins including domain organization of the proteins, phenotypic descriptions, expression data and Gene Ontology Consortium annotations. These data enable users to quickly obtain an overview of biological and biochemical functions of a large number of genes at once. Since its inception the underlying database has been updated and expanded significantly. Here we describe the current version of GExplore 1.4, documenting the changes since the original release. GExplore 1.4 now contains information about the domain organization of the proteomes of 9 nematode species, can display the location of Caenorhabditis elegans mutations with respect to the domain organization of the proteins, and includes stage-specific RNAseq gene expression data generated by the modENCODE project. The underlying database has been reorganized to facilitate independent updates of the different parts of the database and to allow the addition of novel data sets in the future. The web interface is available under http://genome.sfu.ca/gexplore.",2016-09-19 +30986209,Hypercomplex extreme learning machine with its application in multispectral palmprint recognition.,"An extreme learning machine (ELM) is a novel training method for single-hidden layer feedforward neural networks (SLFNs) in which the hidden nodes are randomly assigned and fixed without iterative tuning. ELMs have earned widespread global interest due to their fast learning speed, satisfactory generalization ability and ease of implementation. In this paper, we extend this theory to hypercomplex space and attempt to simultaneously consider multisource information using a hypercomplex representation. To illustrate the performance of the proposed hypercomplex extreme learning machine (HELM), we have applied this scheme to the task of multispectral palmprint recognition. Images from different spectral bands are utilized to construct the hypercomplex space. Extensive experiments conducted on the PolyU and CASIA multispectral databases demonstrate that the HELM scheme can achieve competitive results. The source code together with datasets involved in this paper can be available for free download at https://figshare.com/s/01aef7d48840afab9d6d.",2019-04-15 +22434826,Considerations for creating and annotating the budding yeast Genome Map at SGD: a progress report.,"The Saccharomyces Genome Database (SGD) is compiling and annotating a comprehensive catalogue of functional sequence elements identified in the budding yeast genome. Recent advances in deep sequencing technologies have enabled for example, global analyses of transcription profiling and assembly of maps of transcription factor occupancy and higher order chromatin organization, at nucleotide level resolution. With this growing influx of published genome-scale data, come new challenges for their storage, display, analysis and integration. Here, we describe SGD's progress in the creation of a consolidated resource for genome sequence elements in the budding yeast, the considerations taken in its design and the lessons learned thus far. The data within this collection can be accessed at http://browse.yeastgenome.org and downloaded from http://downloads.yeastgenome.org. DATABASE URL: http://www.yeastgenome.org.",2012-03-20 +31881980,Venn-diaNet : venn diagram based network propagation analysis framework for comparing multiple biological experiments.,"BACKGROUND:The main research topic in this paper is how to compare multiple biological experiments using transcriptome data, where each experiment is measured and designed to compare control and treated samples. Comparison of multiple biological experiments is usually performed in terms of the number of DEGs in an arbitrary combination of biological experiments. This process is usually facilitated with Venn diagram but there are several issues when Venn diagram is used to compare and analyze multiple experiments in terms of DEGs. First, current Venn diagram tools do not provide systematic analysis to prioritize genes. Because that current tools generally do not fully focus to prioritize genes, genes that are located in the segments in the Venn diagram (especially, intersection) is usually difficult to rank. Second, elucidating the phenotypic difference only with the lists of DEGs and expression values is challenging when the experimental designs have the combination of treatments. Experiment designs that aim to find the synergistic effect of the combination of treatments are very difficult to find without an informative system. RESULTS:We introduce Venn-diaNet, a Venn diagram based analysis framework that uses network propagation upon protein-protein interaction network to prioritizes genes from experiments that have multiple DEG lists. We suggest that the two issues can be effectively handled by ranking or prioritizing genes with segments of a Venn diagram. The user can easily compare multiple DEG lists with gene rankings, which is easy to understand and also can be coupled with additional analysis for their purposes. Our system provides a web-based interface to select seed genes in any of areas in a Venn diagram and then perform network propagation analysis to measure the influence of the selected seed genes in terms of ranked list of DEGs. CONCLUSIONS:We suggest that our system can logically guide to select seed genes without additional prior knowledge that makes us free from the seed selection of network propagation issues. We showed that Venn-diaNet can reproduce the research findings reported in the original papers that have experiments that compare two, three and eight experiments. Venn-diaNet is freely available at: http://biohealth.snu.ac.kr/software/venndianet.",2019-12-27 +27899671,"RepeatsDB 2.0: improved annotation, classification, search and visualization of repeat protein structures.","RepeatsDB 2.0 (URL: http://repeatsdb.bio.unipd.it/) is an update of the database of annotated tandem repeat protein structures. Repeat proteins are a widespread class of non-globular proteins carrying heterogeneous functions involved in several diseases. Here we provide a new version of RepeatsDB with an improved classification schema including high quality annotations for ∼5400 protein structures. RepeatsDB 2.0 features information on start and end positions for the repeat regions and units for all entries. The extensive growth of repeat unit characterization was possible by applying the novel ReUPred annotation method over the entire Protein Data Bank, with data quality is guaranteed by an extensive manual validation for >60% of the entries. The updated web interface includes a new search engine for complex queries and a fully re-designed entry page for a better overview of structural data. It is now possible to compare unit positions, together with secondary structure, fold information and Pfam domains. Moreover, a new classification level has been introduced on top of the existing scheme as an independent layer for sequence similarity relationships at 40%, 60% and 90% identity.",2016-11-29 +31030180,ECG anomaly class identification using LSTM and error profile modeling.,"Automatic diagnosis of cardiac events is a current problem of interest in which deep learning has shown promising success. We have earlier reported the use of Long Short Term Memory (LSTM) networks-trained on normal ECG patterns-to the detection of anomalies from the prediction errors for real-time diagnostic applications. In this work, we extend our anomaly detection algorithm by introducing a second stage predictor that can identify the actual anomaly class from the error outputs of the first stage model. Results from seven types of anomalies have been presented including Atrial Premature Contraction (APC), Paced Beat (PB), Premature Ventricular Contraction (PVC), Right Bundle Branch Block (RBBB), Ventricular Bigeminy (VB), Ventricular Couplets (VCs) and Ventricular Tachycardia (VT). To optimize anomaly class prediction performance, multiple choices of second stage models such as multilayer perceptron (MLP), support vector machine (SVM) and logistic regression have been employed. A featurization scheme for LSTM prediction errors in the form of overall summaries has been proposed and a successful predictor for the same was developed with good performance. Our results indicate that the error vectors represented by their summary features carry useful predictive information about actual ECG anomaly type. We discuss how the accuracy scores without attention to inherent class imbalances and paucity of data instances may produce misleading performance estimates and hence accurate background models are needed to estimate true predictive performance of multi-class predictors such as those presented in this work. The training data sets and related resources for this study are provided at http://ecg.sciwhylab.org.",2019-04-16 +23193283,The SILVA ribosomal RNA gene database project: improved data processing and web-based tools.,"SILVA (from Latin silva, forest, http://www.arb-silva.de) is a comprehensive web resource for up to date, quality-controlled databases of aligned ribosomal RNA (rRNA) gene sequences from the Bacteria, Archaea and Eukaryota domains and supplementary online services. The referred database release 111 (July 2012) contains 3 194 778 small subunit and 288 717 large subunit rRNA gene sequences. Since the initial description of the project, substantial new features have been introduced, including advanced quality control procedures, an improved rRNA gene aligner, online tools for probe and primer evaluation and optimized browsing, searching and downloading on the website. Furthermore, the extensively curated SILVA taxonomy and the new non-redundant SILVA datasets provide an ideal reference for high-throughput classification of data from next-generation sequencing approaches.",2012-11-28 +24865352,Structuring osteosarcoma knowledge: an osteosarcoma-gene association database based on literature mining and manual annotation. ,"Osteosarcoma (OS) is the most common primary bone cancer exhibiting high genomic instability. This genomic instability affects multiple genes and microRNAs to a varying extent depending on patient and tumor subtype. Massive research is ongoing to identify genes including their gene products and microRNAs that correlate with disease progression and might be used as biomarkers for OS. However, the genomic complexity hampers the identification of reliable biomarkers. Up to now, clinico-pathological factors are the key determinants to guide prognosis and therapeutic treatments. Each day, new studies about OS are published and complicate the acquisition of information to support biomarker discovery and therapeutic improvements. Thus, it is necessary to provide a structured and annotated view on the current OS knowledge that is quick and easily accessible to researchers of the field. Therefore, we developed a publicly available database and Web interface that serves as resource for OS-associated genes and microRNAs. Genes and microRNAs were collected using an automated dictionary-based gene recognition procedure followed by manual review and annotation by experts of the field. In total, 911 genes and 81 microRNAs related to 1331 PubMed abstracts were collected (last update: 29 October 2013). Users can evaluate genes and microRNAs according to their potential prognostic and therapeutic impact, the experimental procedures, the sample types, the biological contexts and microRNA target gene interactions. Additionally, a pathway enrichment analysis of the collected genes highlights different aspects of OS progression. OS requires pathways commonly deregulated in cancer but also features OS-specific alterations like deregulated osteoclast differentiation. To our knowledge, this is the first effort of an OS database containing manual reviewed and annotated up-to-date OS knowledge. It might be a useful resource especially for the bone tumor research community, as specific information about genes or microRNAs is quick and easily accessible. Hence, this platform can support the ongoing OS research and biomarker discovery. Database URL: http://osteosarcoma-db.uni-muenster.de.",2014-05-27 +25072490,Using a digital story format: a contemporary approach to meeting the workforce needs of public health laboratories.,"

Introduction

Public health laboratories are an integral partner in preparedness and emergency response. The Upper Midwest Preparedness and Emergency Response Learning Center (UMPERLC) and the State Hygienic Laboratory at the University of Iowa have a long history of working together to identify and meet the preparedness training needs of the laboratory workforce. The training, Anatomy of a Foodborne Outbreak, which uses a digital story format, provides an example of this partnership.

Background/rationale

The State Hygienic Laboratory expressed the need for training programs targeted at enhancing early detection and investigation of outbreaks. Clinical laboratory staff play a significant role in identifying patient samples that may represent the effects of foodborne illness. Given that foodborne illnesses are on the increase nationally, it is critical that laboratory staff be prepared to deal with these outbreaks.

Methods/activity

UMPERLC collaborated with State Hygienic Laboratory content experts in the design and development of a digital story, using a foodborne outbreak that focuses on testing to detect Shiga toxin-producing Escherichia coli. This narrative format was selected because seeing and hearing a story about the training content provide the learner with a deeper interaction and richer learning experience, allowing the learner to better see the bigger picture.

Results/outcomes

Anatomy of a Foodborne Outbreak is available on UMPERLC's Learning Management System, Training Source (http://training-source.org). Evaluation data indicate positive learning experiences overall.

Discussion

The digital story format, which is a video that uses a blend of images, text, and audio narration, was an appropriate method for the content and learning outcomes of the Anatomy of a Foodborne Outbreak training. This format requires more active learning, which increases retention and transfer of knowledge. Training that is easily accessed and user-friendly is an important resource for laboratory staff.

Lessons learned/next steps

When reviewing the course completion data, the highest enrollment occurred immediately after the training program was released. To increase visibility, Anatomy of a Foodborne Outbreak is housed on both the State Hygienic Laboratory Web site and UMPERLC's Learning Management System. The course has also been added to national learning databases such as the Centers for Disease Control and Prevention TrainingFinder Real-time Affiliate Integrated Network (CDC TRAIN) and Certified in Public Health Recertification & Reporting System.",2014-09-01 +31822494,Extracellular ATP and Purinergic P2Y2 Receptor Signaling Promote Liver Tumorigenesis in Mice by Exacerbating DNA Damage.,"Release of ATP to the extracellular compartment and subsequent activation of purinergic receptors is a conserved mechanism mediating inflammatory responses and cell fate decisions in various organs including the liver. Previous findings suggest that extracellular ATP may promote liver tumorigenesis, however, the underlying mechanisms are poorly understood. Therefore, our aim was to dissect the functions of extracellular ATP and P2Y2 receptors (P2Y2R) during hepatocarcinogenesis. Liver tumors were induced in wild-type and P2y2r -/- knockout mice by intraperitoneal diethylnitrosamine (DEN) injection. Tumorigenesis was analyzed after 8 to 10 months and molecular analyses were performed at different stages of tumorigenesis in vivo, as well as in primary mouse hepatocytes in vitro. Liver tumor incidence and tumor numbers were strongly reduced in P2y2r -/- mice, whereas tumor size and morphology were comparable to wild-type controls, suggesting that P2Y2R contributes to tumor initiation. Mechanistically, hepatocyte proliferation in DEN-treated P2y2r -/- mice was reduced, which correlated with reduced c-JUN and CCND1 but increased p21 expression. Moreover, DNA damage as determined by hepatocellular expression of γH2A.X and of genes related to genotoxic stress, as well as STAT3 phosphorylation, was reduced in the absence of P2y2r. Administration of genotoxic agents to primary hepatocytes in vitro confirmed that DNA damage was indeed exacerbated by extracellular ATP, subsequent P2Y2R activation, and downstream intracellular calcium-dependent signal transduction. In conclusion, our data reveal that extracellular ATP and subsequent P2Y2R function stimulate DNA damage responses and hepatocyte proliferation, thereby promoting hepatocarcinogenesis. Targeting this pathway may be an attractive approach for chemoprevention of hepatocellular carcinoma. SIGNIFICANCE: Extracellular ATP and subsequent P2Y2 receptor function stimulate DNA damage responses and hepatocyte proliferation, thereby promoting hepatocarcinogenesis in mice. Targeting this pathway may be an attractive approach for chemoprevention of hepatocellular carcinoma. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/4/699/F1.large.jpg.",2019-12-10 +30764740,A Distributed Framework for the Construction of Transport Maps.,"The need to reason about uncertainty in large, complex, and multimodal data sets has become increasingly common across modern scientific environments. The ability to transform samples from one distribution P to another distribution Q enables the solution to many problems in machine learning (e.g., Bayesian inference, generative modeling) and has been actively pursued from theoretical, computational, and application perspectives across the fields of information theory, computer science, and biology. Performing such transformations in general still leads to computational difficulties, especially in high dimensions. Here, we consider the problem of computing such ""measure transport maps"" with efficient and parallelizable methods. Under the mild assumptions that P need not be known but can be sampled from and that the density of Q is known up to a proportionality constant, and that Q is log-concave, we provide in this work a convex optimization problem pertaining to relative entropy minimization. We show how an empirical minimization formulation and polynomial chaos map parameterization can allow for learning a transport map between P and Q with distributed and scalable methods. We also leverage findings from nonequilibrium thermodynamics to represent the transport map as a composition of simpler maps, each of which is learned sequentially with a transport cost regularized version of the aforementioned problem formulation. We provide examples of our framework within the context of Bayesian inference for the Boston housing data set and generative modeling for handwritten digit images from the MNIST data set.",2019-02-14 +31461658,Molecular Portraits of Early Rheumatoid Arthritis Identify Clinical and Treatment Response Phenotypes.,"There is a current imperative to unravel the hierarchy of molecular pathways that drive the transition of early to established disease in rheumatoid arthritis (RA). Herein, we report a comprehensive RNA sequencing analysis of the molecular pathways that drive early RA progression in the disease tissue (synovium), comparing matched peripheral blood RNA-seq in a large cohort of early treatment-naive patients, namely, the Pathobiology of Early Arthritis Cohort (PEAC). We developed a data exploration website (https://peac.hpc.qmul.ac.uk/) to dissect gene signatures across synovial and blood compartments, integrated with deep phenotypic profiling. We identified transcriptional subgroups in synovium linked to three distinct pathotypes: fibroblastic pauci-immune pathotype, macrophage-rich diffuse-myeloid pathotype, and a lympho-myeloid pathotype characterized by infiltration of lymphocytes and myeloid cells. This is suggestive of divergent pathogenic pathways or activation disease states. Pro-myeloid inflammatory synovial gene signatures correlated with clinical response to initial drug therapy, whereas plasma cell genes identified a poor prognosis subgroup with progressive structural damage.",2019-08-01 +27899563,ChimerDB 3.0: an enhanced database for fusion genes from cancer transcriptome and literature data mining.,"Fusion gene is an important class of therapeutic targets and prognostic markers in cancer. ChimerDB is a comprehensive database of fusion genes encompassing analysis of deep sequencing data and manual curations. In this update, the database coverage was enhanced considerably by adding two new modules of The Cancer Genome Atlas (TCGA) RNA-Seq analysis and PubMed abstract mining. ChimerDB 3.0 is composed of three modules of ChimerKB, ChimerPub and ChimerSeq. ChimerKB represents a knowledgebase including 1066 fusion genes with manual curation that were compiled from public resources of fusion genes with experimental evidences. ChimerPub includes 2767 fusion genes obtained from text mining of PubMed abstracts. ChimerSeq module is designed to archive the fusion candidates from deep sequencing data. Importantly, we have analyzed RNA-Seq data of the TCGA project covering 4569 patients in 23 cancer types using two reliable programs of FusionScan and TopHat-Fusion. The new user interface supports diverse search options and graphic representation of fusion gene structure. ChimerDB 3.0 is available at http://ercsb.ewha.ac.kr/fusiongene/.",2016-11-28 +30629299,Multicenter CT phantoms public dataset for radiomics reproducibility tests.,"

Purpose

The aim of this paper is to describe a public, open-access, computed tomography (CT) phantom image set acquired at three centers and collected especially for radiomics reproducibility research. The dataset is useful to test radiomic features reproducibility with respect to various parameters, such as acquisition settings, scanners, and reconstruction algorithms.

Acquisition and validation methods

Three phantoms were scanned in three independent institutions. Images of the following phantoms were acquired: Catphan 700 and COPDGene Phantom II (Phantom Laboratory, Greenwich, NY, USA), and the Triple modality 3D Abdominal Phantom (CIRS, Norfolk, VA, USA). Data were collected at three Dutch medical centers: MAASTRO Clinic (Maastricht, NL), Radboud University Medical Center (Nijmegen, NL), and University Medical Center Groningen (Groningen, NL) with scanners from two different manufacturers Siemens Healthcare and Philips Healthcare. The following acquisition parameter were varied in the phantom scans: slice thickness, reconstruction kernels, and tube current.

Data format and usage notes

We made the dataset publically available on the Dutch instance of ""Extensible Neuroimaging Archive Toolkit-XNAT"" (https://xnat.bmia.nl). The dataset is freely available and reusable with attribution (Creative Commons 3.0 license).

Potential applications

Our goal was to provide a findable, open-access, annotated, and reusable CT phantom dataset for radiomics reproducibility studies. Reproducibility testing and harmonization are fundamental requirements for wide generalizability of radiomics-based clinical prediction models. It is highly desirable to include only reproducible features into models, to be more assured of external validity across hitherto unseen contexts. In this view, phantom data from different centers represent a valuable source of information to exclude CT radiomic features that may already be unstable with respect to simplified structures and tightly controlled scan settings. The intended extension of our shared dataset is to include other modalities and phantoms with more realistic lesion simulations.",2019-01-29 +29106702,Hypothalamic-pituitary-adrenal (HPA) axis suppression after treatment with glucocorticoid therapy for childhood acute lymphoblastic leukaemia.,"

Background

Glucocorticoids play a major role in the treatment of acute lymphoblastic leukaemia (ALL). However, supraphysiological doses can suppress the hypothalamic-pituitary-adrenal (HPA) axis. HPA axis suppression resulting in reduced cortisol response may cause an impaired stress response and an inadequate host defence against infection, which remain a cause of morbidity and death. Suppression commonly occurs in the first days after cessation of glucocorticoid therapy, but the exact duration is unclear. This review is the second update of a previously published Cochrane review.

Objectives

To examine the occurrence and duration of HPA axis suppression after (each cycle of) glucocorticoid therapy for childhood ALL.

Search methods

We searched the Cochrane Central Register of Controlled Trials (CENTRAL; 2016, Issue 11), MEDLINE/PubMed (from 1945 to December 2016), and Embase/Ovid (from 1980 to December 2016). In addition, we searched reference lists of relevant articles, conference proceedings (the International Society for Paediatric Oncology and the American Society of Clinical Oncology from 2005 up to and including 2016, and the American Society of Pediatric Hematology/Oncology from 2014 up to and including 2016), and ongoing trial databases (the International Standard Registered Clinical/Social Study Number (ISRCTN) register via http://www.controlled-trials.com, the National Institutes of Health (NIH) register via www.clinicaltrials.gov, and the International Clinical Trials Registry Platform (ICTRP) of the World Health Organization (WHO) via apps.who.int/trialsearch) on 27 December 2016.

Selection criteria

All study designs, except case reports and patient series with fewer than 10 children, examining effects of glucocorticoid therapy for childhood ALL on HPA axis function.

Data collection and analysis

Two review authors independently performed study selection. One review author extracted data and assessed 'Risk of bias'; another review author checked this information.

Main results

We identified 10 studies (total of 298 children; we identified two studies for this update) including two randomised controlled trials (RCTs) that assessed adrenal function. None of the included studies assessed the HPA axis at the level of the hypothalamus, the pituitary, or both. Owing to substantial differences between studies, we could not pool results. All studies had risk of bias issues. Included studies demonstrated that adrenal insufficiency occurs in nearly all children during the first days after cessation of glucocorticoid treatment for childhood ALL. Most children recovered within a few weeks, but a small number of children had ongoing adrenal insufficiency lasting up to 34 weeks.Included studies evaluated several risk factors for (prolonged) adrenal insufficiency. First, three studies including two RCTs investigated the difference between prednisone and dexamethasone in terms of occurrence and duration of adrenal insufficiency. The RCTs found no differences between prednisone and dexamethasone arms. In the other (observational) study, children who received prednisone recovered earlier than children who received dexamethasone. Second, treatment with fluconazole appeared to prolong the duration of adrenal insufficiency, which was evaluated in two studies. One of these studies reported that the effect was present only when children received fluconazole at a dose higher than 10 mg/kg/d. Finally, two studies evaluated the presence of infection, stress episodes, or both, as a risk factor for adrenal insufficiency. In one of these studies (an RCT), trial authors found no relationship between the presence of infection/stress and adrenal insufficiency. The other study found that increased infection was associated with prolonged duration of adrenal insufficiency.

Authors' conclusions

We concluded that adrenal insufficiency commonly occurs in the first days after cessation of glucocorticoid therapy for childhood ALL, but the exact duration is unclear. No data were available on the levels of the hypothalamus and the pituitary; therefore, we could draw no conclusions regarding these outcomes. Clinicians may consider prescribing glucocorticoid replacement therapy during periods of serious stress in the first weeks after cessation of glucocorticoid therapy for childhood ALL to reduce the risk of life-threatening complications. However, additional high-quality research is needed to inform evidence-based guidelines for glucocorticoid replacement therapy.Special attention should be paid to patients receiving fluconazole therapy, and perhaps similar antifungal drugs, as these treatments may prolong the duration of adrenal insufficiency, especially when administered at a dose higher than 10 mg/kg/d.Finally, it would be relevant to investigate further the relationship between present infection/stress and adrenal insufficiency in a larger, separate study specially designed for this purpose.",2017-11-06 +24356117,Caveat emptor: single nucleotide polymorphism reporting in pharmacogenomics.,"While it is arguably the most comprehensive source of genetic information, the NCBI's dbSNP database (National Center for Biotechnology Information database of single nucleotide polymorphisms; http://www.ncbi.nlm.nih.gov/projects/SNP/) is imperfect. In this commentary, we highlight the issues surrounding this database, while considering the great importance and utility of this resource for those in the pharmacology and pharmacogenomics communities. We describe our experience with the information in this database as a cautionary tale for those who will utilize such information in the future. We also discuss several measures that could render it more reliable.",2013-12-12 +31971835,Effects of Aluminum on the Integrity of the Intestinal Epithelium: An in Vitro and in Vivo Study.,"BACKGROUND:Aluminum (Al) is the most abundant and ubiquitous metal in the environment. The main route of human exposure to Al is through food and water intake. Although human exposure to Al is common, the influence of Al on the gastrointestinal tract remains poorly understood. OBJECTIVES:We aimed to further understand the toxic effect of Al and to elucidate the underlying cellular mechanisms in the intestinal barrier. METHODS:The human intestinal epithelial cell line HT-29 and C57BL6 mice were exposed to AlCl3 at 0-16 mM (1-24h) and 5-50mg/kg body weight (13 weeks), respectively. In cell culture experiments, intracellular oxidative stress, inflammatory protein and gene expression, and intestinal epithelial permeability were measured. In animal studies, histological examination, gene expression, and myeloperoxidase (MPO) activity assays were conducted. RESULTS:Cellular oxidative stress level (superoxide production) in AlCl3-treated cells (4 mM, 3h) was approximately 38-fold higher than that of the control. Both protein and mRNA expression of tight junction (TJ) components (occludin and claudin-1) in AlCl3-treated cells (1-4 mM, 24h) was significantly lower than that of the control. Transepithelial electrical resistance (TEER) decreased up to 67% in AlCl3-treated cells (2 mM, 24h) compared with that of the control, which decreased approximately 7%. Al activated extracellular signal-regulated kinase 1/2 and nuclear factor-kappa B (NF-κB), resulting in mRNA expression of matrix metalloproteinase-9, myosin light-chain kinase, and inflammatory cytokines [tumor necrosis factor alpha (TNF-α), interleukin-1β (IL-1β), and IL-6] in HT-29 cells. Moreover, oral administration of AlCl3 to mice induced pathological alteration, MPO activation, and inflammatory cytokine (TNF-α, IL-1β, and IL-6) production in the colon. CONCLUSION:Al induced epithelial barrier dysfunction and inflammation via generation of oxidative stress, down-regulation of the TJ proteins, and production of inflammatory cytokines in HT-29 cells. In addition, Al induced toxicity in the colon by increasing the levels of inflammatory cytokines and MPO activity and induced histological damage in a mouse model. Our data suggest that Al may be a potential risk factor for human intestinal diseases. https://doi.org/10.1289/EHP5701.",2020-01-23 +28968662,Large-scale prediction of ADAR-mediated effective human A-to-I RNA editing.,"Adenosine-to-inosine (A-to-I) editing by adenosine deaminase acting on the RNA (ADAR) proteins is one of the most frequent modifications during post- and co-transcription. To facilitate the assignment of biological functions to specific editing sites, we designed an automatic online platform to annotate A-to-I RNA editing sites in pre-mRNA splicing signals, microRNAs (miRNAs) and miRNA target untranslated regions (3' UTRs) from human (Homo sapiens) high-throughput sequencing data and predict their effects based on large-scale bioinformatic analysis. After analysing plenty of previously reported RNA editing events and human normal tissues RNA high-seq data, >60 000 potentially effective RNA editing events on functional genes were found. The RNA Editing Plus platform is available for free at https://www.rnaeditplus.org/, and we believe our platform governing multiple optimized methods will improve further studies of A-to-I-induced editing post-transcriptional regulation.",2019-01-01 +30101165,Contributing to agricultural mix:analysis of the living standard measurement study - Integrated survey on agriculture data set.,"The Living Standard Measurement Study- Integrated Survey on Agriculture (LSMS-ISA) is a General Household Survey (GHS) and a cross-sectional survey consisting of 22,000 households which is carried out periodically across the globe. Currently, the GHS has three panels consisting of 5000 households of the GHS collecting additional data on agricultural activities, other household income activities, and household expenditure and consumption, among others. This is to improve data from the agricultural sector and the linkage to other facets of households' characteristics and outcomes. The LSMS data-set, questionnaire, and basic information document are freely available online at: http://microdata.worldbank.org/index.php/catalog/2734.",2018-07-27 +29253074,Analyzing large scale genomic data on the cloud with Sparkhit.,"Motivation:The increasing amount of next-generation sequencing data poses a fundamental challenge on large scale genomic analytics. Existing tools use different distributed computational platforms to scale-out bioinformatics workloads. However, the scalability of these tools is not efficient. Moreover, they have heavy run time overheads when pre-processing large amounts of data. To address these limitations, we have developed Sparkhit: a distributed bioinformatics framework built on top of the Apache Spark platform. Results:Sparkhit integrates a variety of analytical methods. It is implemented in the Spark extended MapReduce model. It runs 92-157 times faster than MetaSpark on metagenomic fragment recruitment and 18-32 times faster than Crossbow on data pre-processing. We analyzed 100 terabytes of data across four genomic projects in the cloud in 21 h, which includes the run times of cluster deployment and data downloading. Furthermore, our application on the entire Human Microbiome Project shotgun sequencing data was completed in 2 h, presenting an approach to easily associate large amounts of public datasets with reference data. Availability and implementation:Sparkhit is freely available at: https://rhinempi.github.io/sparkhit/. Contact:asczyrba@cebitec.uni-bielefeld.de. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +24816183,TuberQ: a Mycobacterium tuberculosis protein druggability database.,"In 2012 an estimated 8.6 million people developed tuberculosis (TB) and 1.3 million died from the disease [including 320 000 deaths among human immunodeficiency virus (HIV)-positive people]. There is an urgent need for new anti-TB drugs owing to the following: the fact that current treatments have severe side effects, the increasing emergence of multidrug-resistant strains of Mycobacterium tuberculosis (Mtb), the negative drug-drug interactions with certain HIV (or other disease) treatments and the ineffectiveness against dormant Mtb. In this context we present here the TuberQ database, a novel resource for all researchers working in the field of drug development in TB. The main feature of TuberQ is to provide a druggability analysis of Mtb proteins in a consistent and effective manner, contributing to a better selection of potential drug targets for screening campaigns and the analysis of targets for structure-based drug design projects. The structural druggability analysis is combined with features related to the characteristics of putative inhibitor binding pockets and with functional and biological data of proteins. The structural analysis is performed on all available unique Mtb structures and high-quality structural homology-based models. This information is shown in an interactive manner, depicting the protein structure, the pockets and the associated characteristics for each protein. TuberQ also provides information about gene essentiality information, as determined from whole cell-based knockout experiments, and expression information obtained from microarray experiments done in different stress-related conditions. We hope that TuberQ will be a powerful tool for researchers working in TB and eventually will lead to the identification of novel putative targets and progresses in therapeutic activities. Database URL: http://tuberq.proteinq.com.ar/",2014-05-08 +31874636,Performance of rotation forest ensemble classifier and feature extractor in predicting protein interactions using amino acid sequences.,"

Background

There are two significant problems associated with predicting protein-protein interactions using the sequences of amino acids. The first problem is representing each sequence as a feature vector, and the second is designing a model that can identify the protein interactions. Thus, effective feature extraction methods can lead to improved model performance. In this study, we used two types of feature extraction methods-global encoding and pseudo-substitution matrix representation (PseudoSMR)-to represent the sequences of amino acids in human proteins and Human Immunodeficiency Virus type 1 (HIV-1) to address the classification problem of predicting protein-protein interactions. We also compared principal component analysis (PCA) with independent principal component analysis (IPCA) as methods for transforming Rotation Forest.

Results

The results show that using global encoding and PseudoSMR as a feature extraction method successfully represents the amino acid sequence for the Rotation Forest classifier with PCA or with IPCA. This can be seen from the comparison of the results of evaluation metrics, which were >73% across the six different parameters. The accuracy of both methods was >74%. The results for the other model performance criteria, such as sensitivity, specificity, precision, and F1-score, were all >73%. The data used in this study can be accessed using the following link: https://www.dsc.ui.ac.id/research/amino-acid-pred/.

Conclusions

Both global encoding and PseudoSMR can successfully represent the sequences of amino acids. Rotation Forest (PCA) performed better than Rotation Forest (IPCA) in terms of predicting protein-protein interactions between HIV-1 and human proteins. Both the Rotation Forest (PCA) classifier and the Rotation Forest IPCA classifier performed better than other classifiers, such as Gradient Boosting, K-Nearest Neighbor, Logistic Regression, Random Forest, and Support Vector Machine (SVM). Rotation Forest (PCA) and Rotation Forest (IPCA) have accuracy, sensitivity, specificity, precision, and F1-score values >70% while the other classifiers have values <70%.",2019-12-24 +25931459,novPTMenzy: a database for enzymes involved in novel post-translational modifications.,"With the recent discoveries of novel post-translational modifications (PTMs) which play important roles in signaling and biosynthetic pathways, identification of such PTM catalyzing enzymes by genome mining has been an area of major interest. Unlike well-known PTMs like phosphorylation, glycosylation, SUMOylation, no bioinformatics resources are available for enzymes associated with novel and unusual PTMs. Therefore, we have developed the novPTMenzy database which catalogs information on the sequence, structure, active site and genomic neighborhood of experimentally characterized enzymes involved in five novel PTMs, namely AMPylation, Eliminylation, Sulfation, Hydroxylation and Deamidation. Based on a comprehensive analysis of the sequence and structural features of these known PTM catalyzing enzymes, we have created Hidden Markov Model profiles for the identification of similar PTM catalyzing enzymatic domains in genomic sequences. We have also created predictive rules for grouping them into functional subfamilies and deciphering their mechanistic details by structure-based analysis of their active site pockets. These analytical modules have been made available as user friendly search interfaces of novPTMenzy database. It also has a specialized analysis interface for some PTMs like AMPylation and Eliminylation. The novPTMenzy database is a unique resource that can aid in discovery of unusual PTM catalyzing enzymes in newly sequenced genomes. Database URL: http://www.nii.ac.in/novptmenzy.html",2015-04-29 +31069374,MISIM v2.0: a web server for inferring microRNA functional similarity based on microRNA-disease associations.,"MicroRNAs (miRNAs) are one class of important small non-coding RNA molecules and play critical roles in health and disease. Therefore, it is important and necessary to evaluate the functional relationship of miRNAs and then predict novel miRNA-disease associations. For this purpose, here we developed the updated web server MISIM (miRNA similarity) v2.0. Besides a 3-fold increase in data content compared with MISIM v1.0, MISIM v2.0 improved the original MISIM algorithm by implementing both positive and negative miRNA-disease associations. That is, the MISIM v2.0 scores could be positive or negative, whereas MISIM v1.0 only produced positive scores. Moreover, MISIM v2.0 achieved an algorithm for novel miRNA-disease prediction based on MISIM v2.0 scores. Finally, MISIM v2.0 provided network visualization and functional enrichment analysis for functionally paired miRNAs. The MISIM v2.0 web server is freely accessible at http://www.lirmed.com/misim/.",2019-07-01 +31053848,OrthoVenn2: a web server for whole-genome comparison and annotation of orthologous clusters across multiple species.,"OrthoVenn is a powerful web platform for the comparison and analysis of whole-genome orthologous clusters. Here we present an updated version, OrthoVenn2, which provides new features that facilitate the comparative analysis of orthologous clusters among up to 12 species. Additionally, this update offers improvements to data visualization and interpretation, including an occurrence pattern table for interrogating the overlap of each orthologous group for the queried species. Within the occurrence table, the functional annotations and summaries of the disjunctions and intersections of clusters between the chosen species can be displayed through an interactive Venn diagram. To facilitate a broader range of comparisons, a larger number of species, including vertebrates, metazoa, protists, fungi, plants and bacteria, have been added in OrthoVenn2. Finally, a stand-alone version is available to perform large dataset comparisons and to visualize results locally without limitation of species number. In summary, OrthoVenn2 is an efficient and user-friendly web server freely accessible at https://orthovenn2.bioinfotoolkits.net.",2019-07-01 +32019792,EspM Is a Conserved Transcription Factor That Regulates Gene Expression in Response to the ESX-1 System. ,"Pathogenic mycobacteria encounter multiple environments during macrophage infection. Temporally, the bacteria are engulfed into the phagosome, lyse the phagosomal membrane, and interact with the cytosol before spreading to another cell. Virulence factors secreted by the mycobacterial ESX-1 (ESAT-6-system-1) secretion system mediate the essential transition from the phagosome to the cytosol. It was recently discovered that the ESX-1 system also regulates mycobacterial gene expression in Mycobacterium marinum (R. E. Bosserman, T. T. Nguyen, K. G. Sanchez, A. E. Chirakos, et al., Proc Natl Acad Sci U S A 114:E10772-E10781, 2017, https://doi.org/10.1073/pnas.1710167114), a nontuberculous mycobacterial pathogen, and in the human-pathogenic species M. tuberculosis (A. M. Abdallah, E. M. Weerdenburg, Q. Guan, R. Ummels, et al., PLoS One 14:e0211003, 2019, https://doi.org/10.1371/journal.pone.0211003). It is not known how the ESX-1 system regulates gene expression. Here, we identify the first transcription factor required for the ESX-1-dependent transcriptional response in pathogenic mycobacteria. We demonstrate that the gene divergently transcribed from the whiB6 gene and adjacent to the ESX-1 locus in mycobacterial pathogens encodes a conserved transcription factor (MMAR_5438, Rv3863, now espM). We prove that EspM from both M. marinum and M. tuberculosis directly and specifically binds the whiB6-espM intergenic region. We show that EspM is required for ESX-1-dependent repression of whiB6 expression and for the regulation of ESX-1-associated gene expression. Finally, we demonstrate that EspM functions to fine-tune ESX-1 activity in M. marinum Taking the data together, this report extends the esx-1 locus, defines a conserved regulator of the ESX-1 virulence pathway, and begins to elucidate how the ESX-1 system regulates gene expression.IMPORTANCE Mycobacterial pathogens use the ESX-1 system to transport protein substrates that mediate essential interactions with the host during infection. We previously demonstrated that in addition to transporting proteins, the ESX-1 secretion system regulates gene expression. Here, we identify a conserved transcription factor that regulates gene expression in response to the ESX-1 system. We demonstrate that this transcription factor is functionally conserved in M. marinum, a pathogen of ectothermic animals; M. tuberculosis, the human-pathogenic species that causes tuberculosis; and M. smegmatis, a nonpathogenic mycobacterial species. These findings provide the first mechanistic insight into how the ESX-1 system elicits a transcriptional response, a function of this protein transport system that was previously unknown.",2020-02-04 +30010797,GECO: gene expression correlation analysis after genetic algorithm-driven deconvolution.,"

Motivation

Large-scale gene expression analysis is a valuable asset for data-driven hypothesis generation. However, the convoluted nature of large expression datasets often hinders extraction of meaningful biological information.

Results

To this end, we developed GECO, a gene expression correlation analysis software that uses a genetic algorithm-driven approach to deconvolute complex expression datasets into two subpopulations that display positive and negative correlations between a pair of queried genes. GECO's mutational enrichment and pairwise drug sensitivity analyses functions that follow the deconvolution step may help to identify the mutational factors that drive the gene expression correlation in the generated subpopulations and their differential drug vulnerabilities. Finally, GECO's drug sensitivity screen function can be used to identify drugs that differentially affect the subpopulations.

Availability and implementation

http://www.proteinguru.com/geco/ and http://www.proteinguru.com/geco/codes/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +30994882,PEPred-Suite: improved and robust prediction of therapeutic peptides using adaptive feature representation learning.,"

Motivation

Prediction of therapeutic peptides is critical for the discovery of novel and efficient peptide-based therapeutics. Computational methods, especially machine learning based methods, have been developed for addressing this need. However, most of existing methods are peptide-specific; currently, there is no generic predictor for multiple peptide types. Moreover, it is still challenging to extract informative feature representations from the perspective of primary sequences.

Results

In this study, we have developed PEPred-Suite, a bioinformatics tool for the generic prediction of therapeutic peptides. In PEPred-Suite, we introduce an adaptive feature representation strategy that can learn the most representative features for different peptide types. To be specific, we train diverse sequence-based feature descriptors, integrate the learnt class information into our features, and utilize a two-step feature optimization strategy based on the area under receiver operating characteristic curve to extract the most discriminative features. Using the learnt representative features, we trained eight random forest models for eight different types of functional peptides, respectively. Benchmarking results showed that as compared with existing predictors, PEPred-Suite achieves better and robust performance for different peptides. As far as we know, PEPred-Suite is currently the first tool that is capable of predicting so many peptide types simultaneously. In addition, our work demonstrates that the learnt features can reliably predict different peptides.

Availability and implementation

The user-friendly webserver implementing the proposed PEPred-Suite is freely accessible at http://server.malab.cn/PEPred-Suite.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-11-01 +31676574,Twist1-Induced Epithelial Dissemination Requires Prkd1 Signaling.,"Dissemination is an essential early step in metastasis but its molecular basis remains incompletely understood. To define the essential targetable effectors of this process, we developed a 3D mammary epithelial culture model, in which dissemination is induced by overexpression of the transcription factor Twist1. Transcriptomic analysis and ChIP-PCR together demonstrated that protein kinase D1 (Prkd1) is a direct transcriptional target of Twist1 and is not expressed in the normal mammary epithelium. Pharmacologic and genetic inhibition of Prkd1 in the Twist1-induced dissemination model demonstrated that Prkd1 was required for cells to initiate extracellular matrix (ECM)-directed protrusions, release from the epithelium, and migrate through the ECM. Antibody-based protein profiling revealed that Prkd1 induced broad phosphorylation changes, including an inactivating phosphorylation of β-catenin and two microtubule depolymerizing phosphorylations of Tau, potentially explaining the release of cell-cell contacts and persistent activation of Prkd1. In patients with breast cancer, TWIST1 and PRKD1 expression correlated with metastatic recurrence, particularly in basal breast cancer. Prkd1 knockdown was sufficient to block dissemination of both murine and human mammary tumor organoids. Finally, Prkd1 knockdown in vivo blocked primary tumor invasion and distant metastasis in a mouse model of basal breast cancer. Collectively, these data identify Prkd1 as a novel and targetable signaling node downstream of Twist1 that is required for epithelial invasion and dissemination. SIGNIFICANCE: Twist1 is a known regulator of metastatic cell behaviors but not directly targetable. This study provides a molecular explanation for how Twist1-induced dissemination works and demonstrates that it can be targeted. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/2/204/F1.large.jpg.",2019-11-01 +31569268,"Joint Estimation of Gross Recharge, Groundwater Usage, and Hydraulic Properties within HydroSight.","Groundwater management decisions are often founded upon estimates of aquifer hydraulic properties, recharge and the rate of groundwater usage. Too often hydraulic properties are unavailable, recharge estimates are very uncertain, and usage is unmetered or infrequently metered over only recent years or estimated using numerical groundwater models decoupled from the drivers of drawdown. This paper extends the HydroSight groundwater time-series package ( http://peterson-tim-j.github.io/HydroSight/) to allow the joint estimation of gross recharge, transmissivity, storativity, and daily usage at multiple production bores. A genetic evolutionary scheme was extended from estimating time-series model parameters to also estimating time series of usage that honor metered volumes at each production bore and produces (1) the best fit with the observed hydrograph and (2) plausible estimates of actual evapotranspiration and hence recharge. The reliability of the approach was rigorously tested. Repeated calibration of models for four bores produced estimates of transmissivity, storativity, and mean recharge that varied by a factor of 0.22-0.32, 0.13-0.2, and 0.03-0.48, respectively, when recharge boundary effects were low and the error in monthly, quarterly, and biannual metered usage was generally <10%. Application to the 30 observation bores within the Warrion groundwater management area (Australia), produced a coefficient of efficiency of ≥0.80 at 22 bores and ≥0.90 at 12 bores. The aquifer transmissivity and storativity were reasonably estimated, and were consistent with independent estimates, while mean gross recharge may be slightly overestimated. Overall, the approach allows greater insights from the available data and provides opportunity for the exploration of usage and climatic scenarios.",2019-11-01 +30616356,Cannabidiol: A New Hope for Patients With Dravet or Lennox-Gastaut Syndromes.,"

Objective

To review the efficacy, safety, pharmacology and pharmacokinetics of pure, plant-derived cannabidiol (CBD; Epidiolex) in the treatment of Dravet syndrome (DS) and Lennox-Gastaut syndrome (LGS).

Data sources

Relevant information was identified through EMBASE and Ovid MEDLINE (1946 to October 2018). Product labeling and https://www.clinicaltrials.gov were also reviewed.

Study selection/data extraction

English language articles evaluating efficacy and safety in humans with treatment-resistant epilepsies were reviewed; additional pharmacology and pharmacokinetic studies in humans, animals, and in vitro were also included.

Data synthesis

Pure, plant-based CBD is a pharmaceutical grade extract that exhibits clinically significant antiseizure properties, with a hypothesized multimodal mechanism of action. In the GWPCARE trial series, CBD displayed superior efficacy in reducing key seizure frequencies (convulsive seizures in DS; drop seizures in LGS) by 17% to 23% compared with placebo as adjunctive therapy to standard antiepileptic drugs in patients 2 years of age and older. Common adverse effects were somnolence, diarrhea, and elevated hepatic transaminases. Noteworthy drug-drug interactions included clobazam, valproates, and significant inducers/inhibitors of CYP2C19 and 3A4 enzymes. Relevance to Patient Care and Clinical Practice: A discussion regarding CBD dosing, administration, adverse effects, monitoring parameters, and interactions is provided to guide clinicians. CBD offers patients with DS and LGS a new treatment option for refractory seizures.

Conclusion

This is the first cannabis-derived medication with approval from the US Food and Drug Administration. This CBD formulation significantly reduces seizures as an adjunct to standard antiepileptic therapies in patients ≥2 years old with DS and LGS and is well tolerated.",2019-01-08 +31191604,Systems Chemical Genetics-Based Drug Discovery: Prioritizing Agents Targeting Multiple/Reliable Disease-Associated Genes as Drug Candidates.,"Genetic disease genes are considered a promising source of drug targets. Most diseases are caused by more than one pathogenic factor; thus, it is reasonable to consider that chemical agents targeting multiple disease genes are more likely to have desired activities. This is supported by a comprehensive analysis on the relationships between agent activity/druggability and target genetic characteristics. The therapeutic potential of agents increases steadily with increasing number of targeted disease genes, and can be further enhanced by strengthened genetic links between targets and diseases. By using the multi-label classification models for genetics-based drug activity prediction, we provide universal tools for prioritizing drug candidates. All of the documented data and the machine-learning prediction service are available at SCG-Drug (http://zhanglab.hzau.edu.cn/scgdrug).",2019-05-29 +31909654,"Independent and Combined Effects of Heatwaves and PM2.5 on Preterm Birth in Guangzhou, China: A Survival Analysis.","

Background

Both extreme heat and air pollution exposure during pregnancy have been associated with preterm birth; however, their combined effects are unclear.

Objectives

Our goal was to estimate the independent and joint effects of heatwaves and fine particulate matter [PM <2.5μm in aerodynamic diameter (PM2.5)], exposure during the final gestational week on preterm birth.

Methods

Using birth registry data from Guangzhou, China, we included 215,059 singleton live births in the warm season (1 May-31 October) between January 2015 and July 2017. Daily meteorological variables from 5 monitoring stations and PM2.5 concentrations from 11 sites were used to estimate district-specific exposures. A series of cut off temperature thresholds and durations (2, 3, and 4 consecutive d) were used to define 15 different heatwaves. Cox proportional hazard models were used to estimate the effects of heatwaves and PM2.5 exposures during the final week on preterm birth, and departures from additive joint effects were assessed using the relative excess risk due to interaction (RERI).

Results

Numbers of preterm births increased in association with heatwave exposures during the final gestational week. Depending on the heatwave definition used, hazard ratios (HRs) ranged from 1.10 (95% CI: 1.01, 1.20) to 1.92 (1.39, 2.64). Associations were stronger for more intense heatwaves. Combined effects of PM2.5 exposures and heatwaves appeared to be synergistic (RERIs>0) for less extreme heatwaves (i.e., shorter or with relatively low temperature thresholds) but were less than additive (RERIs<0) for more intense heatwaves.

Conclusions

Our research strengthens the evidence that exposure to heatwaves during the final gestational week can independently trigger preterm birth. Moderate heatwaves may also act synergistically with PM2.5 exposure to increase risk of preterm birth, which adds new evidence to the current understanding of combined effects of air pollution and meteorological variables on adverse birth outcomes. https://doi.org/10.1289/EHP5117.",2020-01-07 +31358795,Taxonomy of Arabian Temnothorax Mayr (Formicidae: Myrmicinae) with description of a new species enhanced by x-ray microtomography.,"Temnothorax elmenshawyi sp. n., a new ant species from the Asir Mountains of the southwestern region of the Kingdom of Saudi Arabia, is described based on the worker caste. The new species is a member of the T. exilis species group and is distinguished from the other species included in this group by the impressed metanotal groove, the short, acute and broadly-based propodeal spines, the finely punctate posterior half of cephalic surface, and absence of a median clypeal carina. Despite extensive collecting by the authors at the type locality, only two specimens are available for description, suggesting that this species may be rare and likely endemic to the Asir Mountains. The species description is complemented by still images of volume renderings of a 3D model and a 3D rotation video of the holotype based on x-ray microtomography (micro-CT), allowing remote in-depth examination of the specimen. The virtual micro-CT data is provided as cybertype dataset and freely available online https://doi.org/10.5061/dryad.4gg39k6 , as well as 3D surface model (Sketchfab.com, https://skfb.ly/6HYRz). An updated identification key to the Arabian species is presented.",2019-07-29 +25324309,PLAZA 3.0: an access point for plant comparative genomics.,"Comparative sequence analysis has significantly altered our view on the complexity of genome organization and gene functions in different kingdoms. PLAZA 3.0 is designed to make comparative genomics data for plants available through a user-friendly web interface. Structural and functional annotation, gene families, protein domains, phylogenetic trees and detailed information about genome organization can easily be queried and visualized. Compared with the first version released in 2009, which featured nine organisms, the number of integrated genomes is more than four times higher, and now covers 37 plant species. The new species provide a wider phylogenetic range as well as a more in-depth sampling of specific clades, and genomes of additional crop species are present. The functional annotation has been expanded and now comprises data from Gene Ontology, MapMan, UniProtKB/Swiss-Prot, PlnTFDB and PlantTFDB. Furthermore, we improved the algorithms to transfer functional annotation from well-characterized plant genomes to other species. The additional data and new features make PLAZA 3.0 (http://bioinformatics.psb.ugent.be/plaza/) a versatile and comprehensible resource for users wanting to explore genome information to study different aspects of plant biology, both in model and non-model organisms.",2014-10-16 +30819096,isma: an R package for the integrative analysis of mutations detected by multiple pipelines.,"

Background

Recent comparative studies have brought to our attention how somatic mutation detection from next-generation sequencing data is still an open issue in bioinformatics, because different pipelines result in a low consensus. In this context, it is suggested to integrate results from multiple calling tools, but this operation is not trivial and the burden of merging, comparing, filtering and explaining the results demands appropriate software.

Results

We developed isma (integrative somatic mutation analysis), an R package for the integrative analysis of somatic mutations detected by multiple pipelines for matched tumor-normal samples. The package provides a series of functions to quantify the consensus, estimate the variability, underline outliers, integrate evidences from publicly available mutation catalogues and filter sites. We illustrate the capabilities of isma analysing breast cancer somatic mutations generated by The Cancer Genome Atlas (TCGA) using four pipelines.

Conclusions

Comparing different ""points of view"" on the same data, isma generates a unique mutation catalogue and a series of reports that underline common patterns, variability, as well as sites already catalogued by other studies (e.g. TCGA), so as to design and apply filtering strategies to screen more reliable sites. The package is available for non-commercial users at the URL https://www.itb.cnr.it/isma .",2019-02-28 +30950745,"Essential Statistical Concepts for Research in Speech, Language, and Hearing Sciences.","Purpose Clinicians depend on the accuracy of research in the speech, language, and hearing sciences to improve assessment and treatment of patients with communication disorders. Although this work has contributed to great advances in clinical care, common statistical misconceptions remain, which deserve closer inspection in the field. Challenges in applying and interpreting traditional statistical methods with behavioral data from humans have led to difficulties with replication and reproducibility in other allied scientific fields, including psychology and medicine. The importance of research in our fields of study for advancing science and clinical care for our patients means that the choices of statistical methods can have far-reaching, real-world implications. Method The goal of this article is to provide an overview of fundamental statistical concepts and methods that are used in the speech, language, and hearing sciences. Results We reintroduce basic statistical terms such as the p value and effect size, as well as recommended procedures for model selection and multiple comparisons. Conclusions Research in the speech, language, and hearing sciences can have a profound positive impact on the lives of individuals with communication disorders, but the validity of scientific findings in our fields is enhanced when data are analyzed using sound statistical methods. Misunderstanding or misinterpretation of basic statistical principles may erode public trust in research findings. Recommendations for practices that can help minimize the likelihood of errors in statistical inference are provided. Supplemental Material https://doi.org/10.23641/asha.7849223.",2019-03-01 +30364629,"Species characteristics of felids and canids, and the number of articles published for each species between 2013 and 2017.","The data presented are related to the research article entitled ""Biases in wildlife and conservation research, using felids and canids as a case study"" available at https://doi.org/10.1016/j.gecco.2018.e00423. This data article lists species characteristics of two families of the order Carnivora, the Felidae and Canidae, and quantitatively categorizes research output for each species. The species characteristics that were included in the dataset are body size (in kg), geographic range size, IUCN species status, population trend, likelihood of being a keystone species, number of species per genus, the Evolutionary Distinctiveness (ED) score, and the Evolutionary Distinct and Globally Endangered (EDGE) score. All scientific articles that were published on felid and canid species between 2013 and 2017 were listed and subdivided into the following research topics: (1) ecology and behaviour, (2) conservation and wildlife management, (3) anatomy and physiology, (4) diseases and other health issues, (5) captive housing and artificial reproduction, (6) genetic diversity and phylogenetic structure, and (7) taxonomy and palaeoecology. All the data is made publically available.",2018-10-03 +28420402,SSER: Species specific essential reactions database.,"

Background

Essential reactions are vital components of cellular networks. They are the foundations of synthetic biology and are potential candidate targets for antimetabolic drug design. Especially if a single reaction is catalyzed by multiple enzymes, then inhibiting the reaction would be a better option than targeting the enzymes or the corresponding enzyme-encoding gene. The existing databases such as BRENDA, BiGG, KEGG, Bio-models, Biosilico, and many others offer useful and comprehensive information on biochemical reactions. But none of these databases especially focus on essential reactions. Therefore, building a centralized repository for this class of reactions would be of great value.

Description

Here, we present a species-specific essential reactions database (SSER). The current version comprises essential biochemical and transport reactions of twenty-six organisms which are identified via flux balance analysis (FBA) combined with manual curation on experimentally validated metabolic network models. Quantitative data on the number of essential reactions, number of the essential reactions associated with their respective enzyme-encoding genes and shared essential reactions across organisms are the main contents of the database.

Conclusion

SSER would be a prime source to obtain essential reactions data and related gene and metabolite information and it can significantly facilitate the metabolic network models reconstruction and analysis, and drug target discovery studies. Users can browse, search, compare and download the essential reactions of organisms of their interest through the website http://cefg.uestc.edu.cn/sser .",2017-04-19 +30256891,De novo pattern discovery enables robust assessment of functional consequences of non-coding variants.,"

Motivation

Given the complexity of genome regions, prioritize the functional effects of non-coding variants remains a challenge. Although several frameworks have been proposed for the evaluation of the functionality of non-coding variants, most of them used 'black boxes' methods that simplify the task as the pathogenicity/benign classification problem, which ignores the distinct regulatory mechanisms of variants and leads to less desirable performance. In this study, we developed DVAR, an unsupervised framework that leverage various biochemical and evolutionary evidence to distinguish the gene regulatory categories of variants and assess their comprehensive functional impact simultaneously.

Results

DVAR performed de novo pattern discovery in high-dimensional data and identified five regulatory clusters of non-coding variants. Leveraging the new insights into the multiple functional patterns, it measures both the between-class and the within-class functional implication of the variants to achieve accurate prioritization. Compared to other two-class learning methods, it showed improved performance in identification of clinically significant variants, fine-mapped GWAS variants, eQTLs and expression-modulating variants. Moreover, it has superior performance on disease causal variants verified by genome-editing (like CRISPR-Cas9), which could provide a pre-selection strategy for genome-editing technologies across the whole genome. Finally, evaluated in BioVU and UK Biobank, two large-scale DNA biobanks linked to complete electronic health records, DVAR demonstrated its effectiveness in prioritizing non-coding variants associated with medical phenotypes.

Availability and implementation

The C++ and Python source codes, the pre-computed DVAR-cluster labels and DVAR-scores across the whole genome are available at https://www.vumc.org/cgg/dvar.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +28642562,What is the methodological quality of published dental implant guidelines?,"Data sourcesSix implant dentistry journals with impact factors (2014) assigned by Journal Citation Reports (Clinical Oral Implants Research, Clinical Implant Dentistry and Related Research, European Journal of Oral Implants, The International Journal of Oral and Maxillofacial Implants, Journal of Oral Implantology, and Implant Dentistry) and the Medline database.Study selectionTwo reviewers independently selected guidelines published between May 2009 and February 2016.Data evaluationFollowing training four reviewers independently applied the Agree II tool (http://www.agreetrust.org/) to the selected guidelines with disagreements being resolved by discussion. Scores for the six domains of the AGREE II tool were presented as median percentages of the maximum possible with their respective interquartile ranges (IQR). Domain scores were divided into consensus guidelines, and consensus guidelines with systematic reviews.ResultsTwenty-seven consensus guidelines were included, with 19 contributing to the comparisons between groups. Twenty-six guidelines were developed after meetings in Europe, with the European Association of Osseointergration developing the most guidelines (n=9). The number of authors for the guidelines varied from 2-27 (median, 9). For consensus guidelines only domain four scored highest. Guidelines with systematic review scored higher for all domains with the exception of domain five (Table 1).ConclusionsThere is room to improve the quality of consensus guidelines published in highly ranked implant dentistry journals. Clinicians' and researchers' development of consensus guidelines to improve clinical treatment with dental implants is laudable. However, as for primary and secondary research, these guidelines should adhere to high and transparent standards. The AGREE II instrument can be used as a reference for the development of high-quality guidelines to provide unbiased and adequate clinical recommendations to clinicians working with dental implants.",2017-06-01 +31662451,miR-29a Is Repressed by MYC in Pancreatic Cancer and Its Restoration Drives Tumor-Suppressive Effects via Downregulation of LOXL2.,"Pancreatic ductal adenocarcinoma (PDAC) is an intractable cancer with a dismal prognosis. miR-29a is commonly downregulated in PDAC; however, mechanisms for its loss and role still remain unclear. Here, we show that in PDAC, repression of miR-29a is directly mediated by MYC via promoter activity. RNA sequencing analysis, integrated with miRNA target prediction, identified global miR-29a downstream targets in PDAC. Target enrichment coupled with gene ontology and survival correlation analyses identified the top five miR-29a-downregulated target genes (LOXL2, MYBL2, CLDN1, HGK, and NRAS) that are known to promote tumorigenic mechanisms. Functional validation confirmed that upregulation of miR-29a is sufficient to ablate translational expression of these five genes in PDAC. We show that the most promising target among the identified genes, LOXL2, is repressed by miR-29a via 3'-untranslated region binding. Pancreatic tissues from a PDAC murine model and patient biopsies showed overall high LOXL2 expression with inverse correlations with miR-29a levels. Collectively, our data delineate an antitumorigenic, regulatory role of miR-29a and a novel MYC-miR-29a-LOXL2 regulatory axis in PDAC pathogenesis, indicating the potential of the molecule in therapeutic opportunities. IMPLICATIONS: This study unravels a novel functional role of miR-29a in PDAC pathogenesis and identifies an MYC-miR-29a-LOXL2 axis in regulation of the disease progression, implicating miR-29a as a potential therapeutic target for PDAC. VISUAL OVERVIEW: http://mcr.aacrjournals.org/content/molcanres/18/2/311/F1.large.jpg.",2019-10-29 +22563070,"GEMSiRV: a software platform for GEnome-scale metabolic model simulation, reconstruction and visualization.","

Motivation

Genome-scale metabolic network models have become an indispensable part of the increasingly important field of systems biology. Metabolic systems biology studies usually include three major components-network model construction, objective- and experiment-guided model editing and visualization, and simulation studies based mainly on flux balance analyses. Bioinformatics tools are required to facilitate these complicated analyses. Although some of the required functions have been served separately by existing tools, a free software resource that simultaneously serves the needs of the three major components is not yet available.

Results

Here we present a software platform, GEMSiRV (GEnome-scale Metabolic model Simulation, Reconstruction and Visualization), to provide functionalities of easy metabolic network drafting and editing, amenable network visualization for experimental data integration and flux balance analysis tools for simulation studies. GEMSiRV comes with downloadable, ready-to-use public-domain metabolic models, reference metabolite/reaction databases and metabolic network maps, all of which can be input into GEMSiRV as the starting materials for network construction or simulation analyses. Furthermore, all of the GEMSiRV-generated metabolic models and analysis results, including projects in progress, can be easily exchanged in the research community. GEMSiRV is a powerful integrative resource that may facilitate the development of systems biology studies.

Availability

The software is freely available on the web at http://sb.nhri.org.tw/GEMSiRV.",2012-05-04 +29788413,ezTag: tagging biomedical concepts via interactive learning.,"Recently, advanced text-mining techniques have been shown to speed up manual data curation by providing human annotators with automated pre-annotations generated by rules or machine learning models. Due to the limited training data available, however, current annotation systems primarily focus only on common concept types such as genes or diseases. To support annotating a wide variety of biological concepts with or without pre-existing training data, we developed ezTag, a web-based annotation tool that allows curators to perform annotation and provide training data with humans in the loop. ezTag supports both abstracts in PubMed and full-text articles in PubMed Central. It also provides lexicon-based concept tagging as well as the state-of-the-art pre-trained taggers such as TaggerOne, GNormPlus and tmVar. ezTag is freely available at http://eztag.bioqrator.org.",2018-07-01 +30961560,"Diversity of HIV-1 genotypes and high prevalence of pretreatment drug resistance in newly diagnosed HIV-infected patients in Shanghai, China.","

Background

Genetic variability and liability to develop drug-resistant mutations are the main characteristics of HIV-1, which can not only increase the risk of antiretroviral treatment (ART) failure, but also can lead to the spread of resistant strains. We aim to investigate the distribution of HIV-1 genotypes and prevalence of pretreatment drug resistance (PDR) in ART-naïve HIV-1 infected patients in Shanghai China.

Methods

A cross-sectional study was performed among the newly diagnosed ART-naive HIV-1 infected patients during the period from January 2017 to November 2017 in Shanghai Public Health Clinical Center. The target fragment of 1316 bp in the pol gene spanning the reverse transcriptase and protease regions was amplified using a nested polymerase chain reaction. HIV-1 genotypes were determined by phylogenetic analysis, and PDR associated mutations were determined according to Stanford University HIV Drug Resistance Database ( http://hivdb.stanford.edu/ ).

Results

We successfully amplified pol gene sequences from blood samples of 317 patients, of whom 95.3% were male, and 68.8% were men who have sex with men. The median age was 33 years; and the median CD4 count was 275 cells/μL. The predominant HIV-1 genotype was circulating recombinant form (CRF) 01_AE (53.0%, 168/317), followed by CRF07_BC (29.7%, 94/317), B (7.6%, 24/317), CRF08_BC (1.9%, 6/317), CRF55_01B (1.9%, 6/317), CRF 59_01B (0.9%, 3/317). In addition, 5% (16/317) HIV-1 strains were identified as other subtypes or CRFs/URFs (unique recombinant forms). The overall prevalence of PDR was 17.4% (55/317). PDR frequency to non-nucleoside reverse transcriptase inhibitor (NNRTI, 16.4%) was much higher than that to nucleoside reverse transcriptase inhibitor (NRTI, 4.7%) and protease inhibitor (PI, 0.6%). The most common HIV-1 mutation pattern for NNRTI and NRTI were V179D/E (10.1%, 32/317) and M184 V (2.8%, 9/317), respectively. About half (49.1%, 27/55) of the HIV-1 strains with mutation presented as potential low-level resistant to NNRTI attributed to V179D/E.

Conclusion

The distribution of HIV-1 genotypes in Shanghai China is diverse and complex. The high prevalence of PDR highlights the significance of baseline HIV-1 drug resistance testing. Non-NNRTI-containing regimen may be the preferred initial therapy for newly diagnosed HIV-1 patients in Shanghai in the absence of PDR test results.",2019-04-08 +30958970,Progression of Aphasia Severity in the Chronic Stages of Stroke.,"Background and Purpose The severity of aphasic impairment in chronic stroke survivors is typically thought to be stable by 6 months postonset. However, a recent study showed that stroke survivors with aphasia experience language improvement or decline in the chronic phase, years beyond onset. Little is known about why some individuals improve whereas others remain stable or decline. Additionally, no study has tracked changes in aphasia from assessments completed at multiple time points across many years. The current study offers a comprehensive analysis of potential predictive demographic and health information to determine which factors predict dynamic changes in aphasia severity in chronic stroke. Methods Individuals in the chronic stage of a single-event, left-hemisphere ischemic stroke were identified from an archival database and included for study ( N = 39). Participants were included if they had undergone 2 or more standardized language assessments acquired at time points at least 6 months apart, with the 1st assessment at least 6 months postinjury. A linear mixed-effects model was used to determine the impact of treatment and a variety of demographic and health factors on language change. Results Over time, half of the participants improved (51%), whereas approximately a quarter (26%) decreased, and a quarter (23%) remained stable. A greater number of aphasia treatment hours significantly predicted language improvement ( p = .03), whereas older stroke age was associated with long-term decline ( p = .04). Two interactions were found to be significant in predicting improvement in individuals with diabetes: Increased exercise and younger age at stroke were significant in predicting outcomes ( p < .05). Conclusions Factors that significantly influence language recovery in chronic aphasia include stroke age and receiving aphasia treatment. For those with diabetes, increased exercise was shown to improve outcomes. Results from this study offer clinicians greater insight into the influence of patient factors on long-term recovery from stroke aphasia while suggesting a potential adjunct to language therapy: exercise. Supplemental Material https://doi.org/10.23641/asha.7849304.",2019-04-08 +29868903,ACPred-FL: a sequence-based predictor using effective feature representation to improve the prediction of anti-cancer peptides.,"

Motivation

Anti-cancer peptides (ACPs) have recently emerged as promising therapeutic agents for cancer treatment. Due to the avalanche of protein sequence data in the post-genomic era, there is an urgent need to develop automated computational methods to enable fast and accurate identification of novel ACPs within the vast number of candidate proteins and peptides.

Results

To address this, we propose a novel predictor named Anti-Cancer peptide Predictor with Feature representation Learning (ACPred-FL) for accurate prediction of ACPs based on sequence information. More specifically, we develop an effective feature representation learning model, with which we can extract and learn a set of informative features from a pool of support vector machine-based models trained using sequence-based feature descriptors. By doing so, the class label information of data samples is fully utilized. To improve the feature representation, we further employ a two-step feature selection technique, resulting in a most informative five-dimensional feature vector for the final peptide representation. Experimental results show that such five features provide the most discriminative power for identifying ACPs than currently available feature descriptors, highlighting the effectiveness of the proposed feature representation learning approach. The developed ACPred-FL method significantly outperforms state-of-the-art methods.

Availability and implementation

The web-server of ACPred-FL is available at http://server.malab.cn/ACPred-FL.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +30473620,Incentivising use of structured language in biological descriptions: Author-driven phenotype data and ontology production.,"Phenotypes are used for a multitude of purposes such as defining species, reconstructing phylogenies, diagnosing diseases or improving crop and animal productivity, but most of this phenotypic data is published in free-text narratives that are not computable. This means that the complex relationship between the genome, the environment and phenotypes is largely inaccessible to analysis and important questions related to the evolution of organisms, their diseases or their response to climate change cannot be fully addressed. It takes great effort to manually convert free-text narratives to a computable format before they can be used in large-scale analyses. We argue that this manual curation approach is not a sustainable solution to produce computable phenotypic data for three reasons: 1) it does not scale to all of biodiversity; 2) it does not stop the publication of free-text phenotypes that will continue to need manual curation in the future and, most importantly, 3) It does not solve the problem of inter-curator variation (curators interpret/convert a phenotype differently from each other). Our empirical studies have shown that inter-curator variation is as high as 40% even within a single project. With this level of variation, it is difficult to imagine that data integrated from multiple curation projects can be of high quality. The key causes of this variation have been identified as semantic vagueness in original phenotype descriptions and difficulties in using standardised vocabularies (ontologies). We argue that the authors describing phenotypes are the key to the solution. Given the right tools and appropriate attribution, the authors should be in charge of developing a project's semantics and ontology. This will speed up ontology development and improve the semantic clarity of phenotype descriptions from the moment of publication. A proof of concept project on this idea was funded by NSF ABI in July 2017. We seek readers input or critique of the proposed approaches to help achieve community-based computable phenotype data production in the near future. Results from this project will be accessible through https://biosemantics.github.io/author-driven-production.",2018-11-07 +24198707,Herbarium of vascular plants collection of the university of extremadura (Spain).,"The herbarium of University of Extremadura (UNEX Herbarium) is formed by 36451 specimens of vascular plants whose main origin is the autonomous region of Extremadura (Spain) and Portugal, although it also contains a smaller number of specimens from different places, including the rest of peninsular Spain, the Baleares Islands, the Macaronesian region (Canary Islands, Madeira and Azores), northwest of Africa (Morocco) and Brazil. 98% of the total records are georeferenced. It is an active collection in continuous growth. Its data can be accessed through the GBIF data portal at http://data.gbif.org/datasets/resource/255 and http://www.eweb.unex.es/eweb/botanica/herbario/. This paper describes the specimen associated data set of the UNEX Herbarium, with an objective to disseminate the data contained in a data set with potential users, and promote the multiple uses of the data.",2013-06-19 +29950004,Inference of species phylogenies from bi-allelic markers using pseudo-likelihood.,"

Motivation

Phylogenetic networks represent reticulate evolutionary histories. Statistical methods for their inference under the multispecies coalescent have recently been developed. A particularly powerful approach uses data that consist of bi-allelic markers (e.g. single nucleotide polymorphism data) and allows for exact likelihood computations of phylogenetic networks while numerically integrating over all possible gene trees per marker. While the approach has good accuracy in terms of estimating the network and its parameters, likelihood computations remain a major computational bottleneck and limit the method's applicability.

Results

In this article, we first demonstrate why likelihood computations of networks take orders of magnitude more time when compared to trees. We then propose an approach for inference of phylogenetic networks based on pseudo-likelihood using bi-allelic markers. We demonstrate the scalability and accuracy of phylogenetic network inference via pseudo-likelihood computations on simulated data. Furthermore, we demonstrate aspects of robustness of the method to violations in the underlying assumptions of the employed statistical model. Finally, we demonstrate the application of the method to biological data. The proposed method allows for analyzing larger datasets in terms of the numbers of taxa and reticulation events. While pseudo-likelihood had been proposed before for data consisting of gene trees, the work here uses sequence data directly, offering several advantages as we discuss.

Availability and implementation

The methods have been implemented in PhyloNet (http://bioinfocs.rice.edu/phylonet).",2018-07-01 +31921518,A transcriptomic study of probenecid on injured spinal cords in mice.,"

Background

Recent studies have found that probenecid has neuroprotective and reparative effects on central nervous system injuries. However, its effect on genome-wide transcription in acute spinal cord injury (SCI) remains unknown. In the present study, RNA sequencing (RNA-Seq) is used to analyze the effect of probenecid on the local expression of gene transcription 8 h after spinal injury.

Methods

An Infinite Horizon impactor was used to perform contusive SCI in mice. The SCI model was made by using a rod (1.3 mm diameter) with a force of 50 Kdynes. Sham-operated mice only received a laminectomy without contusive injury. The injured mice were randomly assigned into either the control (SCI_C) or probenecid injection (SCI_P) group. In the latter group, the probenecid drug was intraperitoneally injected (0.5 mg/kg) immediately following injury. Eight hours after the injury or laminectomy, the spinal cords were removed from the mice in both groups. The total RNAs were extracted and purified for library preparation and transcriptome sequencing. Differential gene expressions (DEGs) of the three groups-sham, SCI_C and SCI_P-were analyzed using a DESeq software. Gene Ontology (GO) and Kyoto Encyclopedia of Genes and Genomes (KEGG) enrichment analysis of DEGs were performed using a GOseq R package and KOBAS software. Real-time quantitative reverse-transcriptase polymerase chain reaction was used to validate RNA-Seq results.

Results

RNA-Seq showed that, compared to the SCI_C group, the number of DEGs was 641 in the SCI_P group (286 upregulated and 355 downregulated). According to GO analysis, DEGs were most enriched in extracellular matrix (ECM), collagen trimer, protein bounding and sequence specific DNA binding. KEGG analysis showed that the most enriched pathways included: cell adhesion molecules, Leukocyte transendothelial migration, ECM-receptor interactions, PI3K-Akt signaling pathways, hematopoietic cell lineages, focal adhesions, the Rap1 signaling pathway, etc. The sequence data have been deposited into the Sequence Read Archive (https://www.ncbi.nlm.nih.gov/sra/PRJNA554464).",2020-01-03 +31855605,Conversational Language in 3-Year-Old Children Born Very Preterm and at Term.,"Purpose Language difficulties are prevalent among children born preterm. Existing studies have largely used standardized language tests, providing limited scope for detailed descriptive examination of preterm language. This study aimed to examine differences in conversational language between children born < 30 weeks and at term as well as correlations between language sample analysis (LSA) and a standardized language tool. Method Two hundred four 3-year-olds (103 born < 30 weeks, 101 born at term) recruited at birth provided a 10-min language sample and completed the Preschool Language Scales-Fifth Edition (I. Zimmerman, Steiner, & Pond, 2011). LSA was conducted using the Systematic Analysis of Language Transcripts and Index of Productive Syntax. Group differences were analyzed using linear regression, and Pearson correlation coefficient (coef) was used to determine correlations between measures. Results Children born < 30 weeks scored lower than term-born peers on multiple metrics when controlled for confounding factors (sex, high social risk, multilingualism, and diagnosed neurodevelopmental disorders), including mean length of utterance in morphemes (coef = -0.28, 95% confidence interval [CI] [-0.56, 0.01]) and words (coef = -0.29, 95% CI [-0.53, -0.05]), number of different word roots (coef = -10.04, 95% CI [-17.93, -2.14]), and Index of Productive Syntax sentence structures (coef = -1.81, 95% CI [-3.10, -0.52]). Other variables (e.g., number of utterances, number of nouns and adjectives) were not significantly different between groups. LSA and the Preschool Language Scales-Fifth Edition were at most moderately correlated (≤ .45). Conclusions Three-year-old children born preterm demonstrated poorer conversational language than children born at term, with some specific areas of deficit emerging. Furthermore, formal assessment and LSA appear to provide relatively distinct and yet complementary data to guide diagnostic and intervention decisions. Supplemental Material https://doi.org/10.23641/asha.11368073.",2019-12-19 +31060915,Hemiarthroplasty vs Total Hip Arthroplasty for the Management of Displaced Neck of Femur Fractures: A Systematic Review and Meta-Analysis.,"

Background

Displaced femoral neck fractures (DFNF) are common and can be treated with osteosynthesis, hemiarthroplasty (HA), or total hip arthroplasty (THA). There is no consensus as to which intervention is superior in managing DFNF.

Methods

Studies were identified through a systematic search of the MEDLINE database, EMBASE database, and Cochrane Controlled Trials. Included studies were randomized or controlled trials (1966 to August 2018) comparing THA with HA for the management of DFNF. (https://www.crd.york.ac.uk/PROSPERO Identifier: CRD42018110057).

Results

Seventeen studies were included totaling 1364 patients (660 THA and 704 HA). THA was found to be superior to HA in terms of risk of reoperation, Harris Hip Score and Quality of Life (Short Form 36). Overall, the risk of dislocation was greater in THA group than HA in the first 4 years, after which there was no difference. There was no difference between THA and HA in terms of mortality or infection.

Conclusion

Overall, THA appears to be superior to HA. THA should be the recommended intervention for DFNF in patients with a life expectancy >4 years and in patients younger than 80 years. However, both HA and THA are reasonable interventions in patients older than 80 years and with shorter life expectancy.",2019-04-06 +26097510,SANCDB: a South African natural compound database.,"

Background

Natural products (NPs) are important to the drug discovery process. NP research efforts are expanding world-wide and South Africa is no exception to this. While freely-accessible small molecule databases, containing compounds isolated from indigenous sources, have been established in a number of other countries, there is currently no such online database in South Africa.

Description

The current research presents a South African natural compound database, named SANCDB. This is a curated and fully-referenced database containing compound information for 600 natural products extracted directly from journal articles, book chapters and theses. There is a web interface to the database, which is simple and easy to use, while allowing for compounds to be searched by a number of different criteria. Being fully referenced, each compound page contains links to the original referenced work from which the information was obtained. Further, the website provides a submission pipeline, allowing researchers to deposit compounds from their own research into the database.

Conclusions

SANCDB is currently the only web-based NP database in Africa. It aims to provide a useful resource for the in silico screening of South African NPs for drug discovery purposes. The database is supported by a submission pipeline to allow growth by entries from researchers. As such, we currently present SANCDB the starting point of a platform for a community-driven, curated database to further natural products research in South Africa. SANCDB is freely available at https://sancdb.rubi.ru.ac.za/.",2015-06-19 +29467030,Drug discontinuation before contrast procedures and the effect on acute kidney injury and other clinical outcomes: a systematic review protocol.,"BACKGROUND:Contrast-induced acute kidney injury (CI-AKI) is defined as worsening of renal function after the administration of iodinated contrast material. In patients with cardiovascular disease, kidney disease, and/or diabetes, renin-angiotensin system blockers, non-steroidal anti-inflammatory drugs, diuretics, and metformin can increase the risk of CI-AKI when undergoing contrast imaging. Despite CI-AKI being the leading iatrogenic cause of acute kidney injury, there is a lack of sufficient scientific evidence supporting which drugs should be stopped, when they should be stopped, and when they should be resumed. The purpose of this systematic review is to assess (1) the effect of withholding medication before contrast procedures on the risk of CI-AKI and other clinical outcomes and (2) the incidence of adverse events occurring after withholding these drugs prior to contrast procedures. This protocol has been registered with PROSPERO, https://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42016033178 . METHODS:An information specialist will assist in searching MEDLINE, Embase, and the Cochrane Library databases to identify randomized controlled trials, observational studies, case reports, and case series. Relevant abstracts from professional society meetings and web-based registries of clinical trials will also be included. Studies included will compare patients aged ≥ 18 years instructed to continue taking the drugs of interest and those advised to stop taking them before undergoing contrast procedures. If these drugs are not withheld prior to contrast procedures, the studies must compare patients who are administered these drugs and those who are not before undergoing contrast procedures. Two reviewers will independently screen the titles and abstracts of the studies obtained from the search using pre-defined inclusion criteria and will then extract data from the full texts of selected studies. The quality of the studies will be assessed by two independent reviewers using the Cochrane Risk of Bias 2.0 tool for randomized trials and the Newcastle-Ottawa Scale for observational studies. DISCUSSION:This systematic review will provide a synthesis of current evidence on the discontinuation of drugs prior to contrast procedures and its effect on CI-AKI and other clinical outcomes. These findings will provide clinicians with guidelines and serve as a strong research base for future studies in this field. SYSTEMATIC REVIEW REGISTRATION:PROSPERO CRD42016033178.",2018-02-21 +31897965,3D printing method for next-day acetabular fracture surgery using a surface filtering pipeline: feasibility and 1-year clinical results.,"

Introduction

In orthopedic surgery, 3D printing is a technology with promising medical applications. Publications show promising results in acetabular fracture surgery over the last years using 3D printing. However, only little information about the workflow and circumstances of how to properly derive the 3D printed fracture model out of a CT scan is published.

Materials and methods

We conducted a retrospective analysis of patients with acetabular fractures in a level 1 trauma center. DICOM data were preoperatively used in a series of patients with acetabular fractures. The 3D mesh models were created using 3D Slicer (https://www.slicer.org) with a newly introduced surface filtering method. The models were printed using PLA material with FDM printer. After reduction in the printed model, the acetabular reconstruction plate was bent preoperatively and sterilized. A clinical follow-up after 12 months in average was conducted with the patients.

Results

In total, 12 patients included. Mean printing time was 8:40 h. The calculated mean printing time without applying the surface filter was 25:26 h. This concludes an average printing time reduction of 65%. Mean operation time was 3:16 h, and mean blood loss was 853 ml. Model creation time was about 11 min, and mean printing time of the 3D model was 8:40 h, preoperative model reduction time was 5 min on average, and preoperative bending of the plate took about 10 min. After 12 months, patients underwent a structured follow-up. Harris Hip Score was 75.7 points, the Modified Harris Hip Score 71.6 points and the Merle d'Aubigne Score 11.1 points on average.

Conclusions

We presented the first clinical practical technique to use 3D printing in acetabular fracture surgery. By introducing a new surface filtering pipeline, we reduced printing time and cost compared to the current literature and the state of the art. Low costs and easy handling of the 3D printing workflow make it usable in nearly every hospital setting for acetabular fracture surgery.",2020-01-02 +30001141,Drug Repurposing Using Deep Embeddings of Gene Expression Profiles.,"Computational drug repositioning requires assessment of the functional similarities among compounds. Here, we report a new method for measuring compound functional similarity based on gene expression data. This approach takes advantage of deep neural networks to learn an embedding that substantially denoises expression data, making replicates of the same compound more similar. Our method uses unlabeled data in the sense that it only requires compounds to be labeled by identity rather than detailed pharmacological information, which is often unavailable and costly to obtain. Similarity in the learned embedding space accurately predicted pharmacological similarities despite the lack of any such labels during training and achieved substantially improved performance in comparison with previous similarity measures applied to gene expression measurements. Our method could identify drugs with shared therapeutic and biological targets even when the compounds were structurally dissimilar, thereby revealing previously unreported functional relationships between compounds. Thus, our approach provides an improved engine for drug repurposing based on expression data, which we have made available through the online tool DeepCodex ( http://deepcodex.org ).",2018-08-07 +30478323,Found In Translation: a machine learning model for mouse-to-human inference.,"Cross-species differences form barriers to translational research that ultimately hinder the success of clinical trials, yet knowledge of species differences has yet to be systematically incorporated in the interpretation of animal models. Here we present Found In Translation (FIT; http://www.mouse2man.org ), a statistical methodology that leverages public gene expression data to extrapolate the results of a new mouse experiment to expression changes in the equivalent human condition. We applied FIT to data from mouse models of 28 different human diseases and identified experimental conditions in which FIT predictions outperformed direct cross-species extrapolation from mouse results, increasing the overlap of differentially expressed genes by 20-50%. FIT predicted novel disease-associated genes, an example of which we validated experimentally. FIT highlights signals that may otherwise be missed and reduces false leads, with no experimental cost.",2018-11-26 +30649193,KORP: knowledge-based 6D potential for fast protein and loop modeling.,"

Motivation

Knowledge-based statistical potentials constitute a simpler and easier alternative to physics-based potentials in many applications, including folding, docking and protein modeling. Here, to improve the effectiveness of the current approximations, we attempt to capture the six-dimensional nature of residue-residue interactions from known protein structures using a simple backbone-based representation.

Results

We have developed KORP, a knowledge-based pairwise potential for proteins that depends on the relative position and orientation between residues. Using a minimalist representation of only three backbone atoms per residue, KORP utilizes a six-dimensional joint probability distribution to outperform state-of-the-art statistical potentials for native structure recognition and best model selection in recent critical assessment of protein structure prediction and loop-modeling benchmarks. Compared with the existing methods, our side-chain independent potential has a lower complexity and better efficiency. The superior accuracy and robustness of KORP represent a promising advance for protein modeling and refinement applications that require a fast but highly discriminative energy function.

Availability and implementation

http://chaconlab.org/modeling/korp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +30010236,OneTwoTree: An online tool for phylogeny reconstruction.,"Phylogeny reconstruction is a key instrument in numerous biological analyses, ranging from evolutionary and ecology research, to conservation and systems biology. The increasing accumulation of genomic data makes it possible to reconstruct phylogenies with both high accuracy and at increasingly finer resolution. Yet, taking advantage of the enormous amount of sequence data available requires the use of computational tools for efficient data retrieval and processing, or else the process could quickly become an error-prone endeavour. Here, we present OneTwoTree (http://onetwotree.tau.ac.il/), a Web-based tool for tree reconstruction based on the supermatrix paradigm. Given a list of taxa names of interest as the sole input requirement, OneTwoTree retrieves all available sequence data from NCBI GenBank, clusters these into orthology groups, identifies the most informative set of markers, searches for an appropriate outgroup, and assembles a partitioned sequence matrix that is then used for the final phylogeny reconstruction step. OneTwoTree further allows users to control various steps of the process, such as the merging of sequences from similar clusters, or phylogeny reconstruction based on markers from a specific genome type. By comparing the performance of OneTwoTree to a manually reconstructed phylogeny of the Antirrhineae tribe, we show that the use of OneTwoTree resulted in substantially higher data coverage in terms of both taxon sampling and the number of informative markers assembled. OneTwoTree provides a flexible online tool for species-tree reconstruction, aimed to assist researchers ranging in their level of prior expertise in the task of phylogeny reconstruction.",2018-08-03 +30668635,admixr-R package for reproducible analyses using ADMIXTOOLS.,"

Summary

We present a new R package admixr, which provides a convenient interface for performing reproducible population genetic analyses (f3, D, f4, f4-ratio, qpWave and qpAdm), as implemented by command-line programs in the ADMIXTOOLS software suite. In a traditional ADMIXTOOLS workflow, the user must first generate a set of text configuration files tailored to each individual analysis, often using a combination of shell scripting and manual text editing. The non-tabular output files then need to be parsed to extract values of interest prior to further analyses. Our package simplifies this process by automating all low-level configuration and parsing steps, making analyses as simple as running a single R command. Furthermore, we provide a set of R functions for processing, filtering and manipulating datasets in the EIGENSTRAT format. By unifying all steps of the workflow under a single R framework, this package enables the automation of analytic pipelines, significantly improving the reproducibility of population genetic studies.

Availability and implementation

The source code of the R package is available under the MIT license. Installation instructions, reference manual and a tutorial can be found on the package website at https://bioinf.eva.mpg.de/admixr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +31513561,Severe Pulmonary Disease Associated with Electronic-Cigarette-Product Use - Interim Guidance.,"On September 6, 2019, this report was posted as an MMWR Early Release on the MMWR website (https://www.cdc.gov/mmwr). As of August 27, 2019, 215 possible cases of severe pulmonary disease associated with the use of electronic cigarette (e-cigarette) products (e.g., devices, liquids, refill pods, and cartridges) had been reported to CDC by 25 state health departments. E-cigarettes are devices that produce an aerosol by heating a liquid containing various chemicals, including nicotine, flavorings, and other additives (e.g., propellants, solvents, and oils). Users inhale the aerosol, including any additives, into their lungs. Aerosols produced by e-cigarettes can contain harmful or potentially harmful substances, including heavy metals such as lead, volatile organic compounds, ultrafine particles, cancer-causing chemicals, or other agents such as chemicals used for cleaning the device (1). E-cigarettes also can be used to deliver tetrahydrocannabinol (THC), the principal psychoactive component of cannabis, or other drugs; for example, ""dabbing"" involves superheating substances that contain high concentrations of THC and other plant compounds (e.g., cannabidiol) with the intent of inhaling the aerosol. E-cigarette users could potentially add other substances to the devices. This report summarizes available information and provides interim case definitions and guidance for reporting possible cases of severe pulmonary disease. The guidance in this report reflects data available as of September 6, 2019; guidance will be updated as additional information becomes available.",2019-09-13 +30423104,Analysis of single amino acid variations in singlet hot spots of protein-protein interfaces.,"

Motivation

Single amino acid variations (SAVs) in protein-protein interaction (PPI) sites play critical roles in diseases. PPI sites (interfaces) have a small subset of residues called hot spots that contribute significantly to the binding energy, and they may form clusters called hot regions. Singlet hot spots are the single amino acid hot spots outside of the hot regions. The distribution of SAVs on the interface residues may be related to their disease association.

Results

We performed statistical and structural analyses of SAVs with literature curated experimental thermodynamics data, and demonstrated that SAVs which destabilize PPIs are more likely to be found in singlet hot spots rather than hot regions and energetically less important interface residues. In contrast, non-hot spot residues are significantly enriched in neutral SAVs, which do not affect PPI stability. Surprisingly, we observed that singlet hot spots tend to be enriched in disease-causing SAVs, while benign SAVs significantly occur in non-hot spot residues. Our work demonstrates that SAVs in singlet hot spot residues have significant effect on protein stability and function.

Availability and implementation

The dataset used in this paper is available as Supplementary Material. The data can be found at http://prism.ccbb.ku.edu.tr/data/sav/ as well.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-09-01 +31114841,ARMBIS: accurate and robust matching of brain image sequences from multiple modal imaging techniques.,"

Motivation

Study of brain images of rodent animals is the most straightforward way to understand brain functions and neural basis of physiological functions. An important step in brain image analysis is to precisely assign signal labels to specified brain regions through matching brain images to standardized brain reference atlases. However, no significant effort has been made to match different types of brain images to atlas images due to influence of artifact operation during slice preparation, relatively low resolution of images and large structural variations in individual brains.

Results

In this study, we develop a novel image sequence matching procedure, termed accurate and robust matching brain image sequences (ARMBIS), to match brain image sequences to established atlas image sequences. First, for a given query image sequence a scaling factor is estimated to match a reference image sequence by a curve fitting algorithm based on geometric features. Then, the texture features as well as the scale and rotation invariant shape features are extracted, and a dynamic programming-based procedure is designed to select optimal image subsequences. Finally, a hierarchical decision approach is employed to find the best matched subsequence using regional textures. Our simulation studies show that ARMBIS is effective and robust to image deformations such as linear or non-linear scaling, 2D or 3D rotations, tissue tear and tissue loss. We demonstrate the superior performance of ARMBIS on three types of brain images including magnetic resonance imaging, mCherry with 4',6-diamidino-2-phenylindole (DAPI) staining and green fluorescent protein without DAPI staining images.

Availability and implementation

The R software package is freely available at https://www.synapse.org/#!Synapse:syn18638510/wiki/591054 for Not-For-Profit Institutions. If you are a For-Profit Institution, please contact the corresponding author.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-12-01 +31798979,CDK4/6 inhibition in low burden and extensive metastatic breast cancer: summary of an ESMO Open-Cancer Horizons pro and con discussion.,"In December 2017, ESMO Open-Cancer Horizons convened a round-table discussion on the background and latest data regarding cyclin-dependent kinase (CDK)4/6 inhibitors with endocrine therapy (ET) in the treatment of endocrine-sensitive breast cancer (BC). A review on this discussion was published in summer 2018 (https://esmoopen.bmj.com/content/3/5/e000368).Endocrine-sensitive BC with non-visceral disease and limited spread of the metastases.Endocrine-sensitive BC with non-life-threatening visceral involvement. Several open questions were identified, which led to a second ESMO Open discussion on CDK4/6 inhibitors, taking place in December 2018 and covered in this article. The panel discussed two important clinical scenarios and the pro and cons of a treatment approach with CDK4/6 inhibitors for each scenario:Endocrine-sensitive BC with non-visceral disease and limited spread of the metastases.Endocrine-sensitive BC with non-life-threatening visceral involvement. Regarding scenario 1, the panel agreed that CDK4/6 inhibitors should be recommended in first-line therapy for most patients if cost and practicality allow. However, the use of single-agent ET with an aromatase inhibitor in the first-line treatment of these patients is still a possibility for a small group of patients with very limited disease, such as one or two bone lesions or limited lymph node involvement. Regarding scenario 2, chemotherapy is the first approach for patients with endocrine-sensitive metastatic BC with life-threatening visceral involvement because of the need for a faster response. The therapeutic approaches for patients with non-life-threatening visceral involvement are still under debate. Nevertheless, CDK4/6 inhibitors are currently the treatment of choice for most patients with a close follow-up of tumour response. A treatment algorithm has been suggested at the round table.",2019-11-13 +31901553,Predicting prolonged opioid prescriptions in opioid-naïve lumbar spine surgery patients.,"

Importance

Preoperative determination of the potential for postoperative opioid dependence in previously naïve patients undergoing elective spine surgery may facilitate targeted interventions.

Objective

The purpose of this study was to develop supervised machine learning algorithms for preoperative prediction of prolonged opioid prescription use in opioid-naïve patients following lumbar spine surgery.

Design

Retrospective review of clinical registry data. Variables considered for prediction included demographics, insurance status, preoperative medications, surgical factors, laboratory values, comorbidities, and neighborhood characteristics. Five supervised machine learning algorithms were developed and assessed by discrimination, calibration, Brier score, and decision curve analysis.

Setting

One healthcare entity (two academic medical centers, three community hospitals), 2000 to 2018.

Participants

Opioid-naïve patients undergoing decompression and/or fusion for lumbar disk herniation, stenosis, and spondylolisthesis.

Main outcome

Sustained prescription opioid use exceeding 90 days after surgery.

Results

Overall, of 8,435 patients included, 359 (4.3%) were found to have prolonged postoperative opioid prescriptions. The elastic-net penalized logistic regression achieved the best performance in the independent testing set not used for algorithm development with c-statistic=0.70, calibration intercept=0.06, calibration slope=1.02, and Brier score=0.039. The five most important factors for prolonged opioid prescriptions were use of instrumented spinal fusion, preoperative benzodiazepine use, preoperative antidepressant use, preoperative gabapentin use, and uninsured status. Individual patient-level explanations were provided for the algorithm predictions and the algorithms were incorporated into an open access digital application available here: https://sorg-apps.shinyapps.io/lumbaropioidnaive/.

Conclusion and relevance

The clinician decision aid developed in this study may be helpful to preoperatively risk-stratify opioid-naïve patients undergoing lumbar spine surgery. The tool demonstrates moderate discriminative capacity for identifying those at greatest risk of prolonged prescription opioid use. External validation is required to further support the potential utility of this tool in practice.",2019-12-31 +30118777,Mapping molecular HLA typing data to UNOS antigen equivalents.,"BACKGROUND:Histocompatibility labs must convert molecular HLA typing data to antigen equivalencies for entry into the United Network for Organ Sharing (UNOS) UNet system. While an Organ Procurement and Transplantation Network (OPTN) policy document provides general guidelines for conversion, the process is complex because no antigen mapping table is available. We present a UNOS antigen equivalency table for all IPD-IMGT/HLA alleles at the A, B, C, DRB1, DRB3/4/5, DQA1, and DQB1 loci. METHODS:An automated script was developed to generate a UNOS antigen equivalency table. Data sources used in the conversion algorithm included the World Marrow Donor Association (WMDA) antigen table, the HLA Dictionary, and UNOS-provided tables. To validate antigen mappings, we converted National Marrow Donor Program (NMDP) high resolution allele frequencies to antigen equivalents and compared with the UNOS Calculated Panel Reactive Antibodies (CPRA) reference panel. RESULTS:Normalized frequency similarity scores between independent NMDP and UNOS panels for 4 US population categories (Caucasian, Hispanic, African American and Asian/Pacific Islander) ranged from 0.85 to 0.97, indicating correct antigen mapping. An open source web application (ALLele to ANtigen (""ALLAN"")) and web services were also developed to map unambiguous and ambiguous HLA typing data to UNOS antigen equivalents based on NMDP population-specific allele frequencies (http://www.transplanttoolbox.org). CONCLUSIONS:Computer-assisted interpretation of molecular HLA data may aid in reducing typing discrepancies in UNet. This work also sets a foundation for molecular typing data to be utilized directly in the UNet match run as well as the virtual crossmatch process at transplant centers.",2018-08-15 +27978411,"Comparative Analysis of Five Observational Audit Tools to Assess the Physical Environment of Parks for Physical Activity, 2016.","We reviewed prominent audit tools used to assess the physical environment of parks and their potential to promote physical activity. To accomplish this, we manually searched the Active Living Research website (http://www.activelivingresearch.com) for published observational audit tools that evaluate the physical environment of parks, and we reviewed park audit tools used in studies included in a systematic review of observational park-based physical activity studies. We identified 5 observational audit tools for review: Bedimo-Rung Assessment Tool-Direct Observation (BRAT-DO), Community Park Audit Tool (CPAT), Environmental Assessment of Public Recreation Spaces (EAPRS) tool, Physical Activity Resource Assessment (PARA), and Quality of Public Open Space Tool (POST). All 5 tools have established inter-rater reliability estimates ranging from moderate to good. However, BRAT-DO is the only tool with published validity. We found substantial heterogeneity among the 5 in length, format, intended users, and specific items assessed. Researchers, practitioners, or community coalition members should review the goal of their specific project and match their goal with the most appropriate tool and the people who will be using it.",2016-12-15 +31784425,Single-Cell Gene Expression Analyses Reveal Distinct Self-Renewing and Proliferating Subsets in the Leukemia Stem Cell Compartment in Acute Myeloid Leukemia.,"Standard chemotherapy for acute myeloid leukemia (AML) targets proliferative cells and efficiently induces complete remission; however, many patients relapse and die of their disease. Relapse is caused by leukemia stem cells (LSC), the cells with self-renewal capacity. Self-renewal and proliferation are separate functions in normal hematopoietic stem cells (HSC) in steady-state conditions. If these functions are also separate functions in LSCs, then antiproliferative therapies may fail to target self-renewal, allowing for relapse. We investigated whether proliferation and self-renewal are separate functions in LSCs as they often are in HSCs. Distinct transcriptional profiles within LSCs of Mll-AF9/NRASG12V murine AML were identified using single-cell RNA sequencing. Single-cell qPCR revealed that these genes were also differentially expressed in primary human LSCs and normal human HSPCs. A smaller subset of these genes was upregulated in LSCs relative to HSPCs; this subset of genes constitutes ""LSC-specific"" genes in human AML. To assess the differences between these profiles, we identified cell surface markers, CD69 and CD36, whose genes were differentially expressed between these profiles. In vivo mouse reconstitution assays resealed that only CD69High LSCs were capable of self-renewal and were poorly proliferative. In contrast, CD36High LSCs were unable to transplant leukemia but were highly proliferative. These data demonstrate that the transcriptional foundations of self-renewal and proliferation are distinct in LSCs as they often are in normal stem cells and suggest that therapeutic strategies that target self-renewal, in addition to proliferation, are critical to prevent relapse and improve survival in AML. SIGNIFICANCE: These findings define and functionally validate a self-renewal gene profile of leukemia stem cells at the single-cell level and demonstrate that self-renewal and proliferation are distinct in AML. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/3/458/F1.large.jpg.",2019-11-29 +27172383,A benchmark testing ground for integrating homology modeling and protein docking.,"Protein docking procedures carry out the task of predicting the structure of a protein-protein complex starting from the known structures of the individual protein components. More often than not, however, the structure of one or both components is not known, but can be derived by homology modeling on the basis of known structures of related proteins deposited in the Protein Data Bank (PDB). Thus, the problem is to develop methods that optimally integrate homology modeling and docking with the goal of predicting the structure of a complex directly from the amino acid sequences of its component proteins. One possibility is to use the best available homology modeling and docking methods. However, the models built for the individual subunits often differ to a significant degree from the bound conformation in the complex, often much more so than the differences observed between free and bound structures of the same protein, and therefore additional conformational adjustments, both at the backbone and side chain levels need to be modeled to achieve an accurate docking prediction. In particular, even homology models of overall good accuracy frequently include localized errors that unfavorably impact docking results. The predicted reliability of the different regions in the model can also serve as a useful input for the docking calculations. Here we present a benchmark dataset that should help to explore and solve combined modeling and docking problems. This dataset comprises a subset of the experimentally solved 'target' complexes from the widely used Docking Benchmark from the Weng Lab (excluding antibody-antigen complexes). This subset is extended to include the structures from the PDB related to those of the individual components of each complex, and hence represent potential templates for investigating and benchmarking integrated homology modeling and docking approaches. Template sets can be dynamically customized by specifying ranges in sequence similarity and in PDB release dates, or using other filtering options, such as excluding sets of specific structures from the template list. Multiple sequence alignments, as well as structural alignments of the templates to their corresponding subunits in the target are also provided. The resource is accessible online or can be downloaded at http://cluspro.org/benchmark, and is updated on a weekly basis in synchrony with new PDB releases. Proteins 2016; 85:10-16. © 2016 Wiley Periodicals, Inc.",2016-11-13 +32391237,PEACOCK: A Map-Based Multitype Infectious Disease Outbreak Information System.,"A map-based infectious disease outbreak information system, called PEACOCK, that provides three types of necessary infectious disease outbreak information is presented. The system first collects the infectious disease outbreak statistics from the government agencies and displays the number of infected people and infection indices on the map. Then, it crawls online news articles for each infectious disease and displays the number of mentions of each disease on the map. Users can also search for news articles regarding the disease. Finally, it retrieves the portal search query data and plots the graphs of the trends. It divides the risk into three levels (i.e., normal, caution, and danger) and visualizes them using different colors on the map. Users can access infectious disease outbreak information accurately and quickly using the system. As the system visualizes the information using both a map and various types of graphs, users can check the information at a glance. This system is in live at http://www.epidemic.co.kr/map.",2019-06-21 +27616775,PGD: a pangolin genome hub for the research community. ,"Pangolins (order Pholidota) are the only mammals covered by scales. We have recently sequenced and analyzed the genomes of two critically endangered Asian pangolin species, namely the Malayan pangolin (Manis javanica) and the Chinese pangolin (Manis pentadactyla). These complete genome sequences will serve as reference sequences for future research to address issues of species conservation and to advance knowledge in mammalian biology and evolution. To further facilitate the global research effort in pangolin biology, we developed the Pangolin Genome Database (PGD), as a future hub for hosting pangolin genomic and transcriptomic data and annotations, and with useful analysis tools for the research community. Currently, the PGD provides the reference pangolin genome and transcriptome data, gene sequences and functional information, expressed transcripts, pseudogenes, genomic variations, organ-specific expression data and other useful annotations. We anticipate that the PGD will be an invaluable platform for researchers who are interested in pangolin and mammalian research. We will continue updating this hub by including more data, annotation and analysis tools particularly from our research consortium.Database URL: http://pangolin-genome.um.edu.my.",2016-09-11 +29391044,DeepARG: a deep learning approach for predicting antibiotic resistance genes from metagenomic data.,"

Background

Growing concerns about increasing rates of antibiotic resistance call for expanded and comprehensive global monitoring. Advancing methods for monitoring of environmental media (e.g., wastewater, agricultural waste, food, and water) is especially needed for identifying potential resources of novel antibiotic resistance genes (ARGs), hot spots for gene exchange, and as pathways for the spread of ARGs and human exposure. Next-generation sequencing now enables direct access and profiling of the total metagenomic DNA pool, where ARGs are typically identified or predicted based on the ""best hits"" of sequence searches against existing databases. Unfortunately, this approach produces a high rate of false negatives. To address such limitations, we propose here a deep learning approach, taking into account a dissimilarity matrix created using all known categories of ARGs. Two deep learning models, DeepARG-SS and DeepARG-LS, were constructed for short read sequences and full gene length sequences, respectively.

Results

Evaluation of the deep learning models over 30 antibiotic resistance categories demonstrates that the DeepARG models can predict ARGs with both high precision (> 0.97) and recall (> 0.90). The models displayed an advantage over the typical best hit approach, yielding consistently lower false negative rates and thus higher overall recall (> 0.9). As more data become available for under-represented ARG categories, the DeepARG models' performance can be expected to be further enhanced due to the nature of the underlying neural networks. Our newly developed ARG database, DeepARG-DB, encompasses ARGs predicted with a high degree of confidence and extensive manual inspection, greatly expanding current ARG repositories.

Conclusions

The deep learning models developed here offer more accurate antimicrobial resistance annotation relative to current bioinformatics practice. DeepARG does not require strict cutoffs, which enables identification of a much broader diversity of ARGs. The DeepARG models and database are available as a command line version and as a Web service at http://bench.cs.vt.edu/deeparg .",2018-02-01 +34652796,Non-pharmacological interventions for the improvement of post-stroke quality of life amongst older stroke survivors: a systematic review of systematic reviews (The SENATOR ONTOP series).,"

Purpose

The efficacy of non-pharmacological stroke rehabilitation approaches for older stroke survivors is largely unknown, particularly in relation to psychosocial outcomes such as quality of life. This systematic review examined the evidence for such interventions as part of the Optimal Evidence-Based Non-Drug Therapies in Older Persons (ONTOP) project conducted under an European Union funded project called the Software Engine for the Assessment and Optimisation of Drug and Non-Drug Therapies in Older Persons (SENATOR) [ http://www.senator-project.eu ].

Methods

Thirteen experts in geriatric medicine, as part of a Delphi panel, agreed quality of life to be a critical outcome of stroke rehabilitation. A comprehensive search strategy was developed and databases were searched for eligible systematic reviews from which trials meeting our criteria were identified. Eligible papers were then double reviewed. Due to heterogeneity, narrative analysis was performed. Cochrane risk of bias and GRADE assessment tools were used to assess bias and quality of evidence.

Results

We identified 28 trials, spanning ten types of intervention. Limited evidence supports the use of additional occupational therapy and physiotherapy, with very limited evidence supporting our recommendation to explore caregiver training, constraint-induced movement therapy, device-assisted physiotherapy, and self-management education further.

Conclusion

Limited evidence suggests a range of non-pharmacological interventions may improve the quality of life of older stroke survivors. However, evidence is limited by low study quality and the small number of studies targeting older stroke survivors. We recommend future studies explore such interventions exclusively in older adult populations and improve methodological and outcome reporting.",2019-04-02 +27279488,BIG: a large-scale data integration tool for renal physiology.,"Due to recent advances in high-throughput techniques, we and others have generated multiple proteomic and transcriptomic databases to describe and quantify gene expression, protein abundance, or cellular signaling on the scale of the whole genome/proteome in kidney cells. The existence of so much data from diverse sources raises the following question: ""How can researchers find information efficiently for a given gene product over all of these data sets without searching each data set individually?"" This is the type of problem that has motivated the ""Big-Data"" revolution in Data Science, which has driven progress in fields such as marketing. Here we present an online Big-Data tool called BIG (Biological Information Gatherer) that allows users to submit a single online query to obtain all relevant information from all indexed databases. BIG is accessible at http://big.nhlbi.nih.gov/.",2016-06-08 +21520336,"ThalInd, a β-thalassemia and hemoglobinopathies database for India: defining a model country-specific and disease-centric bioinformatics resource.","Web-based informatics resources for genetic disorders have evolved from genome-wide databases like OMIM and HGMD to Locus Specific databases (LSDBs) and National and Ethnic Mutation Databases (NEMDBs). However, with the increasing amenability of genetic disorders to diagnosis and better management, many previously underreported conditions are emerging as disorders of public health significance. In turn, the greater emphasis on noncommunicable disorders has generated a demand for comprehensive and relevant disease-based information from end-users, including clinicians, patients, genetic epidemiologists, health administrators and policymakers. To accommodate these demands, country-specific and disease-centric resources are required to complement the existing LSDBs and NEMDBs. Currently available preconfigured Web-based software applications can be customized for this purpose. The present article describes the formulation and construction of a Web-based informatics resource for β-thalassemia and other hemoglobinopathies, initially for use in India, a multiethnic, multireligious country with a population approaching 1,200 million. The resource ThalInd (http://ccg.murdoch.edu.au/thalind) has been created using the LOVD system, an open source platform-independent database system. The system has been customized to incorporate and accommodate data pertinent to molecular genetics, population genetics, genotype-phenotype correlations, disease burden, and infrastructural assessment. Importantly, the resource also has been aligned with the administrative health system and demographic resources of the country.",2011-06-23 +28531195,Livestock metabolomics and the livestock metabolome: A systematic review.,"Metabolomics uses advanced analytical chemistry techniques to comprehensively measure large numbers of small molecule metabolites in cells, tissues and biofluids. The ability to rapidly detect and quantify hundreds or even thousands of metabolites within a single sample is helping scientists paint a far more complete picture of system-wide metabolism and biology. Metabolomics is also allowing researchers to focus on measuring the end-products of complex, hard-to-decipher genetic, epigenetic and environmental interactions. As a result, metabolomics has become an increasingly popular ""omics"" approach to assist with the robust phenotypic characterization of humans, crop plants and model organisms. Indeed, metabolomics is now routinely used in biomedical, nutritional and crop research. It is also being increasingly used in livestock research and livestock monitoring. The purpose of this systematic review is to quantitatively and objectively summarize the current status of livestock metabolomics and to identify emerging trends, preferred technologies and important gaps in the field. In conducting this review we also critically assessed the applications of livestock metabolomics in key areas such as animal health assessment, disease diagnosis, bioproduct characterization and biomarker discovery for highly desirable economic traits (i.e., feed efficiency, growth potential and milk production). A secondary goal of this critical review was to compile data on the known composition of the livestock metabolome (for 5 of the most common livestock species namely cattle, sheep, goats, horses and pigs). These data have been made available through an open access, comprehensive livestock metabolome database (LMDB, available at http://www.lmdb.ca). The LMDB should enable livestock researchers and producers to conduct more targeted metabolomic studies and to identify where further metabolome coverage is needed.",2017-05-22 +22736059,IDPredictor: predict database links in biomedical database.,"Knowledge found in biomedical databases, in particular in Web information systems, is a major bioinformatics resource. In general, this biological knowledge is worldwide represented in a network of databases. These data is spread among thousands of databases, which overlap in content, but differ substantially with respect to content detail, interface, formats and data structure. To support a functional annotation of lab data, such as protein sequences, metabolites or DNA sequences as well as a semi-automated data exploration in information retrieval environments, an integrated view to databases is essential. Search engines have the potential of assisting in data retrieval from these structured sources, but fall short of providing a comprehensive knowledge except out of the interlinked databases. A prerequisite of supporting the concept of an integrated data view is to acquire insights into cross-references among database entities. This issue is being hampered by the fact, that only a fraction of all possible cross-references are explicitely tagged in the particular biomedical informations systems. In this work, we investigate to what extend an automated construction of an integrated data network is possible. We propose a method that predicts and extracts cross-references from multiple life science databases and possible referenced data targets. We study the retrieval quality of our method and report on first, promising results. The method is implemented as the tool IDPredictor, which is published under the DOI 10.5447/IPK/2012/4 and is freely available using the URL: http://dx.doi.org/10.5447/IPK/2012/4.",2012-06-26 +27216254,miRiaD: A Text Mining Tool for Detecting Associations of microRNAs with Diseases.,"

Background

MicroRNAs are increasingly being appreciated as critical players in human diseases, and questions concerning the role of microRNAs arise in many areas of biomedical research. There are several manually curated databases of microRNA-disease associations gathered from the biomedical literature; however, it is difficult for curators of these databases to keep up with the explosion of publications in the microRNA-disease field. Moreover, automated literature mining tools that assist manual curation of microRNA-disease associations currently capture only one microRNA property (expression) in the context of one disease (cancer). Thus, there is a clear need to develop more sophisticated automated literature mining tools that capture a variety of microRNA properties and relations in the context of multiple diseases to provide researchers with fast access to the most recent published information and to streamline and accelerate manual curation.

Methods

We have developed miRiaD (microRNAs in association with Disease), a text-mining tool that automatically extracts associations between microRNAs and diseases from the literature. These associations are often not directly linked, and the intermediate relations are often highly informative for the biomedical researcher. Thus, miRiaD extracts the miR-disease pairs together with an explanation for their association. We also developed a procedure that assigns scores to sentences, marking their informativeness, based on the microRNA-disease relation observed within the sentence.

Results

miRiaD was applied to the entire Medline corpus, identifying 8301 PMIDs with miR-disease associations. These abstracts and the miR-disease associations are available for browsing at http://biotm.cis.udel.edu/miRiaD . We evaluated the recall and precision of miRiaD with respect to information of high interest to public microRNA-disease database curators (expression and target gene associations), obtaining a recall of 88.46-90.78. When we expanded the evaluation to include sentences with a wide range of microRNA-disease information that may be of interest to biomedical researchers, miRiaD also performed very well with a F-score of 89.4. The informativeness ranking of sentences was evaluated in terms of nDCG (0.977) and correlation metrics (0.678-0.727) when compared to an annotator's ranked list.

Conclusions

miRiaD, a high performance system that can capture a wide variety of microRNA-disease related information, extends beyond the scope of existing microRNA-disease resources. It can be incorporated into manual curation pipelines and serve as a resource for biomedical researchers interested in the role of microRNAs in disease. In our ongoing work we are developing an improved miRiaD web interface that will facilitate complex queries about microRNA-disease relationships, such as ""In what diseases does microRNA regulation of apoptosis play a role?"" or ""Is there overlap in the sets of genes targeted by microRNAs in different types of dementia?"".""",2016-04-29 +28057473,Analysis of longitudinal diffusion-weighted images in healthy and pathological aging: An ADNI study.,"

Background & new method

The widely used framework of voxel-based morphometry for analyzing neuroimages is extended here to model longitudinal imaging data by exchanging the linear model with a linear mixed-effects model. The new approach is employed for analyzing a large longitudinal sample of 756 diffusion-weighted images acquired in 177 subjects of the Alzheimer's Disease Neuroimaging initiative (ADNI).

Results and comparison with existing methods

While sample- and group-level results from both approaches are equivalent, the mixed-effect model yields information at the single subject level. Interestingly, the neurobiological relevance of the relevant parameter at the individual level describes specific differences associated with aging. In addition, our approach highlights white matter areas that reliably discriminate between patients with Alzheimer's disease and healthy controls with a predictive power of 0.99 and include the hippocampal alveus, the para-hippocampal white matter, the white matter of the posterior cingulate, and optic tracts. In this context, notably the classifier includes a sub-population of patients with minimal cognitive impairment into the pathological domain.

Conclusion

Our classifier offers promising features for an accessible biomarker that predicts the risk of conversion to Alzheimer's disease. Data used in preparation of this article were obtained from the Alzheimer's Disease Neuroimaging Initiative (ADNI) database (adni.loni.usc.edu). As such, the investigators within the ADNI contributed to the design and implementation of ADNI and/or provided data but did not participate in analysis or writing of this report. A complete listing of ADNI investigators can be found at: http://adni.loni.usc.edu/wp-content/uploads/how to apply/ADNI Acknowledgement List.pdf. Significance statement This study assesses neuro-degenerative processes in the brain's white matter as revealed by diffusion-weighted imaging, in order to discriminate healthy from pathological aging in a large sample of elderly subjects. The analysis of time-series examinations in a linear mixed effects model allowed the discrimination of population-based aging processes from individual determinants. We demonstrate that a simple classifier based on white matter imaging data is able to predict the conversion to Alzheimer's disease with a high predictive power.",2017-01-03 +31066364,[Facts and figures of clinical pathways in Italy: results from the PDTA Net project.],"The approval of clinical pathways (CPWs) represents a key step to focus the care management on the patient. The PDTA Net project, by ReS Foundation and CINECA, aims to create a reference tool to study how the local organizational models influence healthcare and clinical outcomes. The article shows the analysis of all CPWs approved by Italian Regions and Autonomous Provinces until 31/12/2018. The search for documents was performed on the institutional websites through specific keywords. CPWs were filled into a database, according to the Region, publication year, disease of interest (distinguishing between chronic diseases with high epidemiological impact and rare diseases) and relevant clinical area. All documents were analyzed by geographical and temporal distribution, the latter also according to ministerial measures. From 2005 to 2018, 536 Regional CPWs were approved (316 for chronic diseases with a high epidemiological impact and 220 for rare diseases). The Regions with the highest number of CPWs of chronic diseases were Umbria (34 CPWs) and Piemonte (33). The most addressed clinical areas were: oncology (72), neurology (60), cardiology (34) and metabolic disorders (22). The most issued diseases were: diabetes (17), trauma/polytrauma (15), chronic obstructive pulmonary disease and multiple sclerosis (12 each), stroke (11), rheumatoid arthritis, breast cancer and colorectal neoplasms (10 each). The publication of the documents was affected by ministerial measures (Balduzzi Law, National Chronicity Plan, Diabetic Disease Plan and National Dementia Plan). The majority of CPWs on rare diseases was retrieved in Regions with activated Rare Disease Networks: Lombardia (110 CPWs), Lazio (64) and Toscana (17). This study showed that, to date, in Italy there are several CPWs published at Regional level, nevertheless their structure and application are heterogeneous and strongly influenced by the National Plans. All analyzed documents are available through the web platform of the project https://fondazioneres.it/pdta/. This project could be useful for health system stakeholders, in order to encourage the transition to new health governance and making CPWs effective governance tools.",2019-04-01 +30689864,CHARMM-GUI Glycan Modeler for modeling and simulation of carbohydrates and glycoconjugates.,"Characterizing glycans and glycoconjugates in the context of three-dimensional structures is important in understanding their biological roles and developing efficient therapeutic agents. Computational modeling and molecular simulation have become an essential tool complementary to experimental methods. Here, we present a computational tool, Glycan Modeler for in silico N-/O-glycosylation of the target protein and generation of carbohydrate-only systems. In our previous study, we developed Glycan Reader, a web-based tool for detecting carbohydrate molecules from a PDB structure and generation of simulation system and input files. As integrated into Glycan Reader in CHARMM-GUI, Glycan Modeler (Glycan Reader & Modeler) enables to generate the structures of glycans and glycoconjugates for given glycan sequences and glycosylation sites using PDB glycan template structures from Glycan Fragment Database (http://glycanstructure.org/fragment-db). Our benchmark tests demonstrate the universal applicability of Glycan Reader & Modeler to various glycan sequences and target proteins. We also investigated the structural properties of modeled glycan structures by running 2-μs molecular dynamics simulations of HIV envelope protein. The simulations show that the modeled glycan structures built by Glycan Reader & Modeler have the similar structural features compared to the ones solved by X-ray crystallography. We also describe the representative examples of glycoconjugate modeling with video demos to illustrate the practical applications of Glycan Reader & Modeler. Glycan Reader & Modeler is freely available at http://charmm-gui.org/input/glycan.",2019-04-01 +25643705,BRCA1 Circos: a visualisation resource for functional analysis of missense variants.,"

Background

Inactivating germline mutations in the tumour suppressor gene BRCA1 are associated with a significantly increased risk of developing breast and ovarian cancer. A large number (>1500) of unique BRCA1 variants have been identified in the population and can be classified as pathogenic, non-pathogenic or as variants of unknown significance (VUS). Many VUS are rare missense variants leading to single amino acid changes. Their impact on protein function cannot be directly inferred from sequence information, precluding assessment of their pathogenicity. Thus, functional assays are critical to assess the impact of these VUS on protein activity. BRCA1 is a multifunctional protein and different assays have been used to assess the impact of variants on different biochemical activities and biological processes.

Methods and results

To facilitate VUS analysis, we have developed a visualisation resource that compiles and displays functional data on all documented BRCA1 missense variants. BRCA1 Circos is a web-based visualisation tool based on the freely available Circos software package. The BRCA1 Circos web tool (http://research.nhgri.nih.gov/bic/circos/) aggregates data from all published BRCA1 missense variants for functional studies, harmonises their results and presents various functionalities to search and interpret individual-level functional information for each BRCA1 missense variant.

Conclusions

This research visualisation tool will serve as a quick one-stop publically available reference for all the BRCA1 missense variants that have been functionally assessed. It will facilitate meta-analysis of functional data and improve assessment of pathogenicity of VUS.",2015-02-02 +29297283,R3D-BLAST2: an improved search tool for similar RNA 3D substructures.,"BACKGROUND:RNA molecules have been known to play a variety of significant roles in cells. In principle, the functions of RNAs are largely determined by their three-dimensional (3D) structures. As more and more RNA 3D structures are available in the Protein Data Bank (PDB), a bioinformatics tool, which is able to rapidly and accurately search the PDB database for similar RNA 3D structures or substructures, is helpful to understand the structural and functional relationships of RNAs. RESULTS:Since its first release in 2011, R3D-BLAST has become a useful tool for searching the PDB database for similar RNA 3D structures and substructures. It was implemented by a structural-alphabet (SA)-based method, which utilizes an SA with 23 structural letters to encode RNA 3D structures into one-dimensional (1D) structural sequences and applies BLAST to the resulting structural sequences for searching similar substructures of RNAs. In this study, we have upgraded R3D-BLAST to develop a new web server named R3D-BLAST2 based on a higher quality SA newly constructed from a representative and sufficiently non-redundant list of RNA 3D structures. In addition, we have modified the kernel program in R3D-BLAST2 so that it can accept an RNA structure in the mmCIF format as an input. The results of our experiments on a benchmark dataset have demonstrated that R3D-BLAST2 indeed performs very well in comparison to its earlier version R3D-BLAST and other similar tools RNA FRABASE, FASTR3D and RAG-3D by searching a larger number of RNA 3D substructures resembling those of the input RNA. CONCLUSIONS:R3D-BLAST2 is a valuable BLAST-like search tool that can more accurately scan the PDB database for similar RNA 3D substructures. It is publicly available at http://genome.cs.nthu.edu.tw/R3D-BLAST2/ .",2017-12-28 +30758815,Vertebral Artery Dissection in Sport: A Systematic Review.,"

Background

Vertebral artery dissection (VAD) is a potentially catastrophic injury that may occur during sports participation. A comprehensive review is needed to collate documented cases to improve understanding and inform future preventative approaches.

Objective

This review aimed to understand the extent of VAD in sport and characterise trends suggestive of mechanisms of injury.

Methods

Electronic databases were searched using terms related to VAD and sport. Records were included if they described one or more cases of VAD attributed to sport.

Results

A total of 79 records described 128 individual cases of VAD in sport, of which 118 were confirmed by imaging or autopsy and included in analyses. Cases were attributed to 43 contact and non-contact sports. The median age of cases was 33 years (IQR 22-44), and 75% were male. There were 22 cases of fatal injury, of which ten involved an impact to the mastoid region and seven involved an impact to the head or neck. Non-fatal cases of VAD were attributed to impact to the head or neck (not mastoid region), movement or held position without impact, and in some cases no reported incident.

Conclusions

VAD attributed to sports participation is uncommonly reported and the mechanisms are varied. Impact to the mastoid region is consistently implicated in fatal cases and should be the focus of injury prevention strategies in sport. Efforts may also be directed at improving the prognosis of cases with delayed presentation through clinical recognition and imaging. The review was registered on the international prospective register for systematic reviews ( http://www.crd.york.ac.uk/PROSPERO ) (CRD42018090543).",2019-04-01 +30803047,Molecular typing of Clostridioides difficile isolates from clinical and non-clinical samples in Iran.,"Clostridioides difficile is a major cause of nosocomial infectious diarrhea in hospitalized patients throughout the world. We aimed to characterize C. difficile isolates among hospitalized patients, hospital staffs, and hospital environment samples obtained in three tertiary care hospitals of Iran with regard to their molecular types between June 2016 and November 2017. The toxigenicity of C. difficile isolates was determined by toxigenic culture and multiplex-PCR. Toxigenic C. difficile isolates collected were ribotyped using capillary gel electrophoresis-based PCR and the database of WEBRIBO (http://webribo.ages.at). Of 500 clinical and non-clinical samples, toxigenic C. difficile were identified in 35 of 250 stool samples (14%) and in 3 of 250 swabs (1.2%). The most frequently found ribotypes (RTs) were 039, AI-12, and AI-21 (15.8, 10.52, and 10.52% of all isolates, respectively). Further RTs were: 017, 001, AI-3, AI-15, AI-18, AI-10, AI-4, and PR21195 (as new ribotype). The epidemic RTs (027 and 078) seen in the Europe, North America, and Asia were completely absent in this study.",2019-04-01 +35116777,The proteomic comparison of peripheral circulation-derived exosomes from the epithelial ovarian carcinoma (EOC) patients and non-EOC subjects.,"

Background

Ovarian cancer is the most lethal tumor of the female reproductive system. Establishing a methodology to screen and diagnose ovarian cancer in the early stage is important. Exosomes have been shown to be loaded with tumor-associated molecules. In this study, we compared the proteins loaded in exosomes from the peripheral circulation of epithelial ovarian carcinoma (EOC) patients and controls.

Methods

Exosomes were purified via ultracentrifugation plus 0.22 µm filtration from the blood of EOC patients and patients with pelvic floor dysfunction (PFD). Tumor tissues and normal ovarian tissues were also obtained. Proteomic analyses of exosomes and tumor/normal ovarian tissues were performed with isobaric tags for relative and absolute quantitation (iTRAQ) and high-performance liquid chromatography/mass spectrometry (HPLC/MS) analyses. The LocDB (http://www.rostlab.org/services/locDB), PANTHER (http://www.pantherdb.org/) and Vesiclepedia databases were used for biological information analysis.

Results

We identified 408 differentially expressed proteins in exosomes from EOC patients and noncancer controls. Furthermore, we identified 954 differentially expressed proteins from ovarian cancer tissues and normal ovarian tissues. Thirty-five proteins exhibited upregulation in both cancer patient exosomes and cancer tissues. Among these 35 proteins, eight proteins (chloride intracellular channel protein 4, serine/threonine-protein kinase 1, aminoacyl tRNA synthetase complex-interacting multifunctional protein 1, sorting nexin-3, protein FAM49B, fermitin family homolog 3, tubulin beta-3 chain and lactotransferrin) were confirmed in both exosome databases and other studies.

Conclusions

We isolated exosomes from the peripheral blood of EOC patients and noncancer controls and identified 35 proteins that were upregulated in both EOC patient exosomes and ovarian cancer tissues. Comparisons with the exosome molecular databases and other studies identified eight proteins as potential tumor markers, which might offer new tools for the early diagnosis of ovarian cancer.",2019-04-01 +30948799,"Publisher Correction: DNA sequencing at 40: past, present and future.","In this Review, the year of publication of reference 54 should be 2005, not 2015. In Box 2, ""1982: GenBank ( https://www.ncbi.nlm.nih.gov/genbank/statistics/ )"" should read ""1982: Genbank/ENA/DDBJ"" and ""2007: NCBI Short Read Archive"" should read ""2007: NCBI and ENA Short Read Archives""; this is because the launches of these American, European and Japanese databases were coordinated. These errors have not been corrected.",2019-04-01 +30169743,HLA-IMPUTER: an easy to use web application for HLA imputation and association analysis using population-specific reference panels.,"

Summary

HLA allele imputation from SNP genotypes has become increasingly useful, but its accuracy is heavily dependent on the reference panels used. HLA-IMPUTER implements HIBAG algorithm for HLA imputation with different population specific reference panels, including a new Han Chinese reference panel derived from 10 689 samples. We provide a convenient platform for researchers to impute HLA alleles and perform association analysis.

Availability and implementation

http://wyanglab.org: 3838/RefPanelWebsite/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +28018846,Developing integrated crop knowledge networks to advance candidate gene discovery.,"The chances of raising crop productivity to enhance global food security would be greatly improved if we had a complete understanding of all the biological mechanisms that underpinned traits such as crop yield, disease resistance or nutrient and water use efficiency. With more crop genomes emerging all the time, we are nearer having the basic information, at the gene-level, to begin assembling crop gene catalogues and using data from other plant species to understand how the genes function and how their interactions govern crop development and physiology. Unfortunately, the task of creating such a complete knowledge base of gene functions, interaction networks and trait biology is technically challenging because the relevant data are dispersed in myriad databases in a variety of data formats with variable quality and coverage. In this paper we present a general approach for building genome-scale knowledge networks that provide a unified representation of heterogeneous but interconnected datasets to enable effective knowledge mining and gene discovery. We describe the datasets and outline the methods, workflows and tools that we have developed for creating and visualising these networks for the major crop species, wheat and barley. We present the global characteristics of such knowledge networks and with an example linking a seed size phenotype to a barley WRKY transcription factor orthologous to TTG2 from Arabidopsis, we illustrate the value of integrated data in biological knowledge discovery. The software we have developed (www.ondex.org) and the knowledge resources (http://knetminer.rothamsted.ac.uk) we have created are all open-source and provide a first step towards systematic and evidence-based gene discovery in order to facilitate crop improvement.",2016-11-02 +31639788,Evidence-Based Clinical Practice Guidelines for the Management of Patients with Lentigo Maligna.,"

Introduction

Lentigo maligna (LM) is a subtype of melanoma in situ that usually occurs in sun-damaged skin and is characterised by an atypical proliferation of melanocytes within the basal epidermis. If left untreated, LM can develop into invasive melanoma, termed lentigo maligna melanoma, which shares the same prognosis as other types of invasive melanoma. The incidence rates of LM are steadily increasing worldwide, in parallel with increases in the incidence rates of invasive melanoma, and establishing appropriate guidelines for the management of LM is therefore of great importance.

Methods

A multidisciplinary working party established by Cancer Council Australia has recently produced up-to-date, evidence-based clinical practice guidelines for the management of melanoma and LM. Following selection of the most relevant clinical questions, a comprehensive literature search for relevant studies was conducted, followed by systematic review of these studies. Data were summarised and the evidence was assessed, leading to the development of recommendations. After public consultation and approval by the full guidelines working party, these recommendations were published on the Cancer Council Australia wiki platform (https://wiki.cancer.org.au/australia/Clinical_question:Effective_interventions_to_improve_outcomes_in_lentigo_maligna%3F). Main Recommendations: Surgical removal of LM remains the standard treatment, with 5- to 10-mm clinical margins when possible. While yet to be fully validated, the use of peri-operative reflectance confocal microscopy to assess margins should be considered where available. There is a lack of high-quality evidence to infer the most effective non-surgical treatment. When surgical removal of LM is not possible or refused, radiotherapy is recommended. When both surgery and radiotherapy are not appropriate or refused, topical imiquimod is the recommended treatment. Cryotherapy and laser therapy are not recommended for the treatment of LM.",2019-10-22 +30652085,PDB_Amyloid: an extended live amyloid structure list from the PDB.,"The Protein Data Bank (PDB) contains more than 135 000 entries at present. From these, relatively few amyloid structures can be identified, since amyloids are insoluble in water. Therefore, most amyloid structures deposited in the PDB are in the form of solid state NMR data. Based on the geometric analysis of these deposited structures, we have prepared an automatically updated web server, which generates a list of the deposited amyloid structures, and also entries of globular proteins that have amyloid-like substructures of given size and characteristics. We have found that by applying only appropriately selected geometric conditions, it is possible to identify deposited amyloid structures and a number of globular proteins with amyloid-like substructures. We have analyzed these globular proteins and have found proof in the literature that many of them form amyloids more easily than many other globular proteins. Our results relate to the method of Stanković et al. [Stanković I et al. (2017) IPSI BgD Tran Int Res 13, 47-51], who applied a hybrid textual-search and geometric approach for finding amyloids in the PDB. If one intends to identify a subset of the PDB for certain applications, the identification algorithm needs to be re-run periodically, since in 2017 on average 30 new entries per day were deposited in the data bank. Our web server is updated regularly and automatically, and the identified amyloid and partial amyloid structures can be viewed or their list can be downloaded from the following website https://pitgroup.org/amyloid.",2018-11-22 +30202060,Terminal exon characterization with TECtool reveals an abundance of cell-specific isoforms.,"Sequencing of RNA 3' ends has uncovered numerous sites that do not correspond to the termination sites of known transcripts. Through their 3' untranslated regions, protein-coding RNAs interact with RNA-binding proteins and microRNAs, which regulate many properties, including RNA stability and subcellular localization. We developed the terminal exon characterization (TEC) tool ( http://tectool.unibas.ch ), which can be used with RNA-sequencing data from any species for which a genome annotation that includes sites of RNA cleavage and polyadenylation is available. We discovered hundreds of previously unknown isoforms and cell-type-specific terminal exons in human cells. Ribosome profiling data revealed that many of these isoforms were translated. By applying TECtool to single-cell sequencing data, we found that the newly identified isoforms were expressed in subpopulations of cells. Thus, TECtool enables the identification of previously unknown isoforms in well-studied cell systems and in rare cell types.",2018-09-10 +22080512,Plantmetabolomics.org: mass spectrometry-based Arabidopsis metabolomics--database and tools update.,"The PlantMetabolomics (PM) database (http://www.plantmetabolomics.org) contains comprehensive targeted and untargeted mass spectrum metabolomics data for Arabidopsis mutants across a variety of metabolomics platforms. The database allows users to generate hypotheses about the changes in metabolism for mutants with genes of unknown function. Version 2.0 of PlantMetabolomics.org currently contains data for 140 mutant lines along with the morphological data. A web-based data analysis wizard allows researchers to select preprocessing and data-mining procedures to discover differences between mutants. This community resource enables researchers to formulate models of the metabolic network of Arabidopsis and enhances the research community's ability to formulate testable hypotheses concerning gene functions. PM features new web-based tools for data-mining analysis, visualization tools and enhanced cross links to other databases. The database is publicly available. PM aims to provide a hypothesis building platform for the researchers interested in any of the mutant lines or metabolites.",2011-11-10 +31004132,"DamMet: ancient methylome mapping accounting for errors, true variants, and post-mortem DNA damage. ","Recent computational advances in ancient DNA research have opened access to the detection of ancient DNA methylation footprints at the genome-wide scale. The most commonly used approach infers the methylation state of a given genomic region on the basis of the amount of nucleotide mis-incorporations observed at CpG dinucleotide sites. However, this approach overlooks a number of confounding factors, including the presence of sequencing errors and true variants. The scale and distribution of the inferred methylation measurements are also variable across samples, precluding direct comparisons. Here, we present DamMet, an open-source software program retrieving maximum likelihood estimates of regional CpG methylation levels from ancient DNA sequencing data. It builds on a novel statistical model of post-mortem DNA damage for dinucleotides, accounting for sequencing errors, genotypes, and differential post-mortem cytosine deamination rates at both methylated and unmethylated sites. To validate DamMet, we extended gargammel, a sequence simulator for ancient DNA data, by introducing methylation-dependent features of post-mortem DNA decay. This new simulator provides direct validation of DamMet predictions. Additionally, the methylation levels inferred by DamMet were found to be correlated to those inferred by epiPALEOMIX and both on par and directly comparable to those measured from whole-genome bisulphite sequencing experiments of fresh tissues. DamMet provides genuine estimates for local DNA methylation levels in ancient individual genomes. The returned estimates are directly cross-sample comparable, and the software is available as an open-source C++ program hosted at https://gitlab.com/KHanghoj/DamMet along with a manual and tutorial.",2019-04-01 +28651548,Canis mtDNA HV1 database: a web-based tool for collecting and surveying Canis mtDNA HV1 haplotype in public database.,"

Background

Canine and wolf mitochondrial DNA haplotypes, which can be used for forensic or phylogenetic analyses, have been defined in various schemes depending on the region analyzed. In recent studies, the 582 bp fragment of the HV1 region is most commonly used. 317 different canine HV1 haplotypes have been reported in the rapidly growing public database GenBank. These reported haplotypes contain several inconsistencies in their haplotype information. To overcome this issue, we have developed a Canis mtDNA HV1 database. This database collects data on the HV1 582 bp region in dog mitochondrial DNA from the GenBank to screen and correct the inconsistencies. It also supports users in detection of new novel mutation profiles and assignment of new haplotypes.

Description

The Canis mtDNA HV1 database (CHD) contains 5567 nucleotide entries originating from 15 subspecies in the species Canis lupus. Of these entries, 3646 were haplotypes and grouped into 804 distinct sequences. 319 sequences were recognized as previously assigned haplotypes, while the remaining 485 sequences had new mutation profiles and were marked as new haplotype candidates awaiting further analysis for haplotype assignment. Of the 3646 nucleotide entries, only 414 were annotated with correct haplotype information, while 3232 had insufficient or lacked haplotype information and were corrected or modified before storing in the CHD. The CHD can be accessed at http://chd.vnbiology.com . It provides sequences, haplotype information, and a web-based tool for mtDNA HV1 haplotyping. The CHD is updated monthly and supplies all data for download.

Conclusions

The Canis mtDNA HV1 database contains information about canine mitochondrial DNA HV1 sequences with reconciled annotation. It serves as a tool for detection of inconsistencies in GenBank and helps identifying new HV1 haplotypes. Thus, it supports the scientific community in naming new HV1 haplotypes and to reconcile existing annotation of HV1 582 bp sequences.",2017-06-26 +26227548,Simulated unbound structures for benchmarking of protein docking in the DOCKGROUND resource.,"

Background

Proteins play an important role in biological processes in living organisms. Many protein functions are based on interaction with other proteins. The structural information is important for adequate description of these interactions. Sets of protein structures determined in both bound and unbound states are essential for benchmarking of the docking procedures. However, the number of such proteins in PDB is relatively small. A radical expansion of such sets is possible if the unbound structures are computationally simulated.

Results

The DOCKGROUND public resource provides data to improve our understanding of protein-protein interactions and to assist in the development of better tools for structural modeling of protein complexes, such as docking algorithms and scoring functions. A large set of simulated unbound protein structures was generated from the bound structures. The modeling protocol was based on 1 ns Langevin dynamics simulation. The simulated structures were validated on the ensemble of experimentally determined unbound and bound structures. The set is intended for large scale benchmarking of docking algorithms and scoring functions.

Conclusions

A radical expansion of the unbound protein docking benchmark set was achieved by simulating the unbound structures. The simulated unbound structures were selected according to criteria from systematic comparison of experimentally determined bound and unbound structures. The set is publicly available at http://dockground.compbio.ku.edu.",2015-07-31 +29056536,Predicting risk of pelvic floor disorders 12 and 20 years after delivery.,"

Background

Little progress has been made in the prevention of pelvic floor disorders, despite their significant health and economic impact. The identification of women who are at risk remains a key element in targeting prevention and planning health resource allocation strategies. Although events around the time of childbirth are recognized clinically as important predictors, it is difficult to counsel women and to intervene around the time of childbirth because of an inability to convey a patient's risk accurately in the presence of multiple risk factors and the long time lapse, which is often decades, between obstetric events and the onset of pelvic floor disorders later in life. Prediction models and scoring systems have been used in other areas of medicine to identify patients who are at risk for chronic diseases. Models have been developed for use before delivery that predict short-term risk of pelvic floor disorders after childbirth, but no models that predict long-term risk exist.

Objective

The purpose of this study was to use variables that are known before and during childbirth to develop and validate prognostic models that will estimate the risks of these disorders 12 and 20 years after delivery.

Study design

Obstetric variables were collected from 2 cohorts: (1) women who gave birth in the United Kingdom and New Zealand (n=3763) and (2) women from the Swedish Medical Birth Register (n=4991). Pelvic floor disorders were self-reported 12 years after childbirth in the United Kingdom/New Zealand cohort and 20 years after childbirth in the Swedish Register. The cohorts were split so that data during the first half of the cohort's time period were used to fit prediction models, and validation was performed from the second half (temporal validation). Because there is currently no consensus on how to best define pelvic floor disorders from a patient's perspective, we chose to fit the data for each model using multiple outcome definitions for prolapse, urinary incontinence, fecal incontinence, ≥1 pelvic floor disorder, and ≥2 pelvic floor disorders. Model accuracy was measured in the following manner: (1) by ranking an individual's risk among all subjects in the cohort (discrimination) with the use of a concordance index and (2) by observing whether the predicted probability was too high or low (calibration) at a range of predicted probabilities with the use of visual plots.

Results

Models were able to discriminate between women who experienced bothersome symptoms or received treatment at 12 and 20 years, respectively, for pelvic organ prolapse (concordance indices, 0.570, 0.627), urinary incontinence (concordance indices, 0.653, 0.689), fecal incontinence (concordance indices, 0.618, 0.676), ≥1 pelvic floor disorders (concordance indices, 0.639, 0.675), and ≥2 pelvic floor disorders (concordance indices, 0.635, 0.619). Route of delivery and family history of each pelvic floor disorder were strong predictors in most models. Urinary incontinence before and during the index pregnancy was a strong predictor for the development of all pelvic floor disorders in most models 12 years after delivery. The 12- and 20-year bothersome symptoms or treatment for prolapse models were accurate when predictions were provided for risk from 0% to approximately 15%. The 12- and 20-year primiparous model began to over predict when risk rates reached 20%. When we predicted bothersome symptoms or treatment for urinary incontinence, the 12-year models were accurate when predictions ranged from approximately 5-60%; the 20-year primiparous models were accurate from 5% and 80%. For bothersome symptoms or treatment for fecal incontinence, the 12- and 20-year models were accurate from 1-15% risk and began to over predict at rates at >15% and 20%, respectively.

Conclusion

Models may provide an opportunity before birth to identify women who are at low risk of the development of pelvic floor disorders and may provide institute prevention strategies such as pelvic floor muscle training, weight control, or elective cesarean section for women who are at higher risk. Models are provided at http://riskcalc.org/UR_CHOICE/.",2017-10-19 +31589793,Geometrical characterization of T cell receptor binding modes reveals class-specific binding to maximize access to antigen.,"Recognition of antigenic peptides bound to major histocompatibility complex (MHC) proteins by αβ T cell receptors (TCRs) is a hallmark of T cell mediated immunity. Recent data suggest that variations in TCR binding geometry may influence T cell signaling, which could help explain outliers in relationships between physical parameters such as TCR-pMHC binding affinity and T cell function. Traditionally, TCR binding geometry has been described with simple descriptors such as the crossing angle, which quantifies what has become known as the TCR's diagonal binding mode. However, these descriptors often fail to reveal distinctions in binding geometry that are apparent through visual inspection. To provide a better framework for relating TCR structure to T cell function, we developed a comprehensive system for quantifying the geometries of how TCRs bind peptide/MHC complexes. We show that our system can discern differences not clearly revealed by more common methods. As an example of its potential to impact biology, we used it to reveal differences in how TCRs bind class I and class II peptide/MHC complexes, which we show allow the TCR to maximize access to and ""read out"" the peptide antigen. We anticipate our system will be of use in not only exploring these and other details of TCR-peptide/MHC binding interactions, but also addressing questions about how TCR binding geometry relates to T cell function, as well as modeling structural properties of class I and class II TCR-peptide/MHC complexes from sequence information. The system is available at https://tcr3d.ibbr.umd.edu/tcr_com or for download as a script.",2019-10-21 +30225298,Whole-genome sequence data and analysis of a Staphylococcus aureus strain SJTUF_J27 isolated from seaweed.,"The complete genome sequence data of S. aureus SJTUF_J27 isolated from seaweed in China is reported here. The size of the genome is 2.8 Mbp with 32.9% G + C content, consisting of 2614 coding sequences and 77 RNAs. A number of virulence factors, including antimicrobial resistance genes (fluoroquinolone, beta-lactams, fosfomycin, mupirocin, trimethoprim, and aminocoumarin) and the egc enterotoxin cluster, were found in the genome. In addition, the genes encoding metal-binding proteins and associated heavy metal resistance were identified. Phylogenetic data analysis, based upon genome-wide single nucleotide polymorphisms (SNPs), and comparative genomic evaluation with BLAST Ring Image Generator (BRIG) were performed for SJTUF_J27 and four S. aureus strains isolated from food. The completed genome data was deposited in NCBI׳s GenBank under the accession number CP019117, https://www.ncbi.nlm.nih.gov/nuccore/CP019117.",2018-08-30 +21898825,UniCarbKB: putting the pieces together for glycomics research.,"Despite the success of several international initiatives the glycosciences still lack a managed infrastructure that contributes to the advancement of research through the provision of comprehensive structural and experimental glycan data collections. UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. The knowledgebase will offer a freely accessible and information-rich resource supported by querying interfaces, annotation technologies and the adoption of common standards to integrate structural, experimental and functional data. The UniCarbKB framework endeavors to support the growth of glycobioinformatics and the dissemination of knowledge through the provision of an open and unified portal to encourage the sharing of data. In order to achieve this, the framework is committed to the development of tools and procedures that support data annotation, and expanding interoperability through cross-referencing of existing databases. Database URL: http://www.unicarbkb.org.",2011-09-19 +23429746,An updated metabolic view of the Bacillus subtilis 168 genome.,"Continuous updating of the genome sequence of Bacillus subtilis, the model of the Firmicutes, is a basic requirement needed by the biology community. In this work new genomic objects have been included (toxin/antitoxin genes and small RNA genes) and the metabolic network has been entirely updated. The curated view of the validated metabolic pathways present in the organism as of 2012 shows several significant differences from pathways present in the other bacterial reference, Escherichia coli: variants in synthesis of cofactors (thiamine, biotin, bacillithiol), amino acids (lysine, methionine), branched-chain fatty acids, tRNA modification and RNA degradation. In this new version, gene products that are enzymes or transporters are explicitly linked to the biochemical reactions of the RHEA reaction resource (http://www.ebi.ac.uk/rhea/), while novel compound entries have been created in the database Chemical Entities of Biological Interest (http://www.ebi.ac.uk/chebi/). The newly annotated sequence is deposited at the International Nucleotide Sequence Data Collaboration with accession number AL009126.4.",2013-02-21 +22123747,MitoZoa 2.0: a database resource and search tools for comparative and evolutionary analyses of mitochondrial genomes in Metazoa.,"The MITOchondrial genome database of metaZOAns (MitoZoa) is a public resource for comparative analyses of metazoan mitochondrial genomes (mtDNA) at both the sequence and genomic organizational levels. The main characteristics of the MitoZoa database are the careful revision of mtDNA entry annotations and the possibility of retrieving gene order and non-coding region (NCR) data in appropriate formats. The MitoZoa retrieval system enables basic and complex queries at various taxonomic levels using different search menus. MitoZoa 2.0 has been enhanced in several aspects, including: a re-annotation pipeline to check the correctness of protein-coding gene predictions; a standardized annotation of introns and of precursor ORFs whose functionality is post-transcriptionally recovered by RNA editing or programmed translational frameshifting; updates of taxon-related fields and a BLAST sequence similarity search tool. Database novelties and the definition of standard mtDNA annotation rules, together with the user-friendly retrieval system and the BLAST service, make MitoZoa a valuable resource for comparative and evolutionary analyses as well as a reference database to assist in the annotation of novel mtDNA sequences. MitoZoa is freely accessible at http://www.caspur.it/mitozoa.",2011-11-28 +30364728,"Microbiome dataset from the upper respiratory tract of patients living with HIV, HIV/TB and TB from Myanmar.","This article contains microbiome data from the upper respiratory tract of patients living with HIV/TB, HIV and TB from Meiktila, a town in Myanmar where there is a high incidence of HIV and TB. Microbiomes were compared for HIV/TB infected and healthy adults from the same population. We collected nasopharyngeal and oropharyngeal swabs from a total of 33 participants (Healthy {5}, HIV/TB {8}, HIV {14}, and TB {6}). DNA was extracted from the swabs and subjected to custom single step 16s rRNA sequencing on an Illumina MiSeq platform. The sequencing data is available via http://www.ncbi.nlm.nih.gov/bioproject/ PRJNA432583.",2018-10-04 +22779037,From sequencer to supercomputer: an automatic pipeline for managing and processing next generation sequencing data.,"Next Generation Sequencing is highly resource intensive. NGS Tasks related to data processing, management and analysis require high-end computing servers or even clusters. Additionally, processing NGS experiments requires suitable storage space and significant manual interaction. At The Ohio State University's Biomedical Informatics Shared Resource, we designed and implemented a scalable architecture to address the challenges associated with the resource intensive nature of NGS secondary analysis built around Illumina Genome Analyzer II sequencers and Illumina's Gerald data processing pipeline. The software infrastructure includes a distributed computing platform consisting of a LIMS called QUEST (http://bisr.osumc.edu), an Automation Server, a computer cluster for processing NGS pipelines, and a network attached storage device expandable up to 40TB. The system has been architected to scale to multiple sequencers without requiring additional computing or labor resources. This platform provides demonstrates how to manage and automate NGS experiments in an institutional or core facility setting.",2012-03-19 +23920826,Analyses of medical data models - identifying common concepts and items in a repository of medical forms.,"One year ago the portal of Medical Data Models (http://medical-data-models.org) was presented as a resource for the scientific community. As of November 2012 there are approximately 3,300 forms with 102,000 items available in the CDISC ODM format. First descriptive analyses regarding form metadata demonstrate the capability of such a repository to identify commonly used medical concepts. Most common items are administrative attributes which indicates that more clinical information are needed to increase the secondary use of data documented within these forms.",2013-01-01 +31296218,Evaluating single-subject study methods for personal transcriptomic interpretations to advance precision medicine.,"

Background

Gene expression profiling has benefited medicine by providing clinically relevant insights at the molecular candidate and systems levels. However, to adopt a more 'precision' approach that integrates individual variability including 'omics data into risk assessments, diagnoses, and therapeutic decision making, whole transcriptome expression needs to be interpreted meaningfully for single subjects. We propose an ""all-against-one"" framework that uses biological replicates in isogenic conditions for testing differentially expressed genes (DEGs) in a single subject (ss) in the absence of an appropriate external reference standard or replicates. To evaluate our proposed ""all-against-one"" framework, we construct reference standards (RSs) with five conventional replicate-anchored analyses (NOISeq, DEGseq, edgeR, DESeq, DESeq2) and the remainder were treated separately as single-subject sample pairs for ss analyses (without replicates).

Results

Eight ss methods (NOISeq, DEGseq, edgeR, mixture model, DESeq, DESeq2, iDEG, and ensemble) for identifying genes with differential expression were compared in Yeast (parental line versus snf2 deletion mutant; n = 42/condition) and a MCF7 breast-cancer cell line (baseline versus stimulated with estradiol; n = 7/condition). Receiver-operator characteristic (ROC) and precision-recall plots were determined for eight ss methods against each of the five RSs in both datasets. Consistent with prior analyses of these data, ~ 50% and ~ 15% DEGs were obtained in Yeast and MCF7 datasets respectively, regardless of the RSs method. NOISeq, edgeR, and DESeq were the most concordant for creating a RS. Single-subject versions of NOISeq, DEGseq, and an ensemble learner achieved the best median ROC-area-under-the-curve to compare two transcriptomes without replicates regardless of the RS method and dataset (> 90% in Yeast, > 0.75 in MCF7). Further, distinct specific single-subject methods perform better according to different proportions of DEGs.

Conclusions

The ""all-against-one"" framework provides a honest evaluation framework for single-subject DEG studies since these methods are evaluated, by design, against reference standards produced by unrelated DEG methods. The ss-ensemble method was the only one to reliably produce higher accuracies in all conditions tested in this conservative evaluation framework. However, single-subject methods for identifying DEGs from paired samples need improvement, as no method performed with precision> 90% and obtained moderate levels of recall. http://www.lussiergroup.org/publications/EnsembleBiomarker.",2019-07-11 +23093593,The Papillomavirus Episteme: a central resource for papillomavirus sequence data and analysis.,"The goal of the Papillomavirus Episteme (PaVE) is to provide an integrated resource for the analysis of papillomavirus (PV) genome sequences and related information. The PaVE is a freely accessible, web-based tool (http://pave.niaid.nih.gov) created around a relational database, which enables storage, analysis and exchange of sequence information. From a design perspective, the PaVE adopts an Open Source software approach and stresses the integration and reuse of existing tools. Reference PV genome sequences have been extracted from publicly available databases and reannotated using a custom-created tool. To date, the PaVE contains 241 annotated PV genomes, 2245 genes and regions, 2004 protein sequences and 47 protein structures, which users can explore, analyze or download. The PaVE provides scientists with the data and tools needed to accelerate scientific progress for the study and treatment of diseases caused by PVs.",2012-10-23 +,Poster Session A,"A.1 Lessons Learned During Eight Years of Using iTRAQ in the Proteomic Study of Pathology Mia Jullig1, Martin Middleditch1, Garth Cooper1,2 1Maurice Wilkins Centre, University of Auckland, Auckland, New Zealand; 2University of Manchester, UK Our research team has long held the view that proteomics holds the key to unlock pathology. During the past eight years we have increasingly used the LC-MS/MS based iTRAQ labeling approach to further our understanding of a number of pathologies in various tissues of interest. From our very first project utilizing iTRAQ, it became clear that manual processing of the LC-MS/MS output to obtain protein ratios is preferable to using the ratios automatically generated by proprietary quantitative proteomic software. For example, we have identified two relatively common amino acid patterns which generate MS/MS m/z values overlapping with those of certain iTRAQ reporter ions and, if not accounted for, may interfere with the quantitative results. Furthermore, systematically different degrees of missed tryptic cleavages between study groups may skew the results since miscleaved peptides are often less suitable for detection through LC-MS/MS. Other factors which may compromise the relative quantitation include variable success in depleting biofluids e.g. plasma of highly abundant proteins, and large change in abundance of one or more highly abundant proteins in disease. We are continually developing strategies to overcome these issues. It is however clear that each new tissue and pathology comes with its own challenges, highlighting the need for a tailored approach for each project rather than the application of a general formula. We here present our current approach to manual handling of LC-MS/MS data from iTRAQ labeled samples, along with a number of cases illustrating the potential pitfalls above and demonstrate how these can be managed. A.2 Whole Animal 15N Metabolic Pulse Chase Labeling Shows which Proteins are Built to Last Jeffrey Savas1, Brandon Toyama2, Varda Levram-Ellisman3, Roger Tsien3, Martin Hetzer2, John Yates1 1The Scripps Research Institute, La Jolla, CA, USA; 2Salk Institute, La Jolla, CA, USA; 3HHMI and Univ. of California San Diego, CA, USA A few proteins with exceptional long lifespans have previously been recognized and shown to perform highly specialized biological functions. These extremely long lived proteins (ELLPs) have been linked to age-dependent deterioration, ranging from decreased fertility and hearing loss to functional decline of neurons. While previously recognized ELLPs such as eye lens crystallin have served as important models for the damage base theory of aging, a systematic approach to identify the extremely long-lived proteome in vivo has been lacking. We have now performed deep proteomic mass spectrometric analysis of rodent tissues metabolically pulse labeled with 15N to determine the ELLP proteome. In order to systematically search for ELLPs, we generated “heavy” rodents with a generational 15N metabolic labeling protocol and estimated labeling efficiency to be near complete. We chased these animals with 14N for 0, 3, 9, 26, 78, or 104 weeks at which time they were immediately sacrificed. These chase periods are roughly equally spaced in log (time). Tissue harvesting and fractionation was performed by standard biochemical techniques and peptides were analyzed by LCLC on Orbitrap Elite mass spectrometers. Protein identification and quantification analysis was performed with the Integrated Proteomics Pipeline (IP2, Integrated Proteomics Applications) using ProLuCID, DTASelect2, and Census. As expected, we found that nearly all proteins (>99%) in rodent tissues are recycled in less than a few days and a relatively small number of proteins are long-lived in vivo. Overall tissues possessing post mitotic cells which do not turn over possessed more ELLPs than those tissues with rapid cellular replenishment. The ELLPs could easily be grouped based on shared characteristics. Many ELLPs reside in the cell nucleus and are components of large protein complexes which include nucleosomes/chromatin and the nuclear pore complex. Interesting, we found that often the core proteins of these multi sub-unit assemblies are long lived while peripheral subunits are not. Many extracellular ELLPs have structural roles and include myelin basic protein, laminins, and collagens. The longevity of these proteins is consistent with their molecular function and may be shielded from most protein degradation machinery. We have also identified new ELLPs in brain which localize to axons and synapses and are a current and active area of investigation. By examining intermediate chase periods of a few months we found another unexpected class of ELLPs. These ELLPs are not like the others maintained in extracellular environments for the life time of the organism. Rather, they may play critical sustained functions exclusively during development. One such example are a small number of gene specific transcription factors which may be responsible for gene activation programs which could drive cell specification. Another unexpected example are transmembrane proteins which aid in the cellular patterning of the brain during development. Altogether the ELLP proteome represents an extraordinary set of proteins which may help motivate future investigations into their potential anti-aging or developmental activities. The take-away message from these efforts is that while nearly all proteins are rapidly turned over a limited number of proteins show exceptional longevity and function in essential biological processes. A.3 Protein Evolution through the Lens of the Sperm Proteome Timothy Karr Arizona State University, Tempe, AZ, USA Although it is well known that eukaryotic proteins (≈500 aa) are, on average approximately two-thirds longer compared to prokaryotic (≈300 aa), there is as yet no consensus regarding the evolutionary mechanisms responsible for these differences. The length of a given protein is determined in part by the cellular context in which it functions. Selective pressures for small efficient proteins present in high concentration (e.g., metabolic enzymes) in fast growing environments are understandable. However, the complexity of eukaryotic cells compared to prokaryotes might necessitate longer proteins carrying out specialized functions (e.g., cytoskeletal and membrane proteins). Knowledge of cell-type specific proteomes could allow deeper understanding of protein length variation at the cell level. Unfortunately, high throughput MS techniques and data analyses are only now just beginning to achieve deep coverage of the proteomes of complex diploid cells (which may contain upwards to 10,000 proteins). However, MS has proven useful for defining sperm proteomes, a cell type of lower proteome complexity. I analyzed the sperm protein lengths from a variety of species including Drosophila, mouse, rat, human and macaque and compared them to the average whole protein lengths of these species. Remarkably, and without exception, the average sperm protein length of all species analyzed were significantly longer than the whole proteome. The Dmel sperm protein length was greater than twice the average for the whole proteome length (950 aa vs. 507 aa). These datasets have also provided insights into the variation of individual cellular proteomes within a complex metazoan and possibly provides an index for the degree in which evolutionary pressures (selection) have shaped protein length. Thus, protein length evolution may have been driven by traits related to sperm functionality (i.e., motility, high axial ratios). This represents the first analysis of whole cell proteome lengths and provides a foundation for future functional, bioinformatic and evolutionary analyses of cellular proteome evolution in an organismal context. A.4 MEK and PI3K Dual-inhibition Elicits Pathway-specific and Mitochondrial Ubiquitination Daisy Bustos, Joshua Baughman, Lilian Phu, Taner Dogan, William F. Forrest, Klaus P. Hoeflich, Donald S. Kirkpatrick Genentech, So. San Francisco, CA, USA Small molecule inhibitors of MEK (GDC-0973) and PI3K (GDC-0941) have been developed for the treatment of cancer and effective at triggering tumor cell death in preclinical models. We previously demonstrated that the combination of these inhibitors in A2058 melanoma cells induced a DNA damage like response involving amplification of protein phosphorylation coincident with apoptosis (Kirkpatrick et al. PNAS 2013). These results were obtained using motif-specific immunoaffinity enrichment and label free quantitative mass spectrometry of phosphopeptides at various time points following treatment with GDC-0973 and GDC-0941. In parallel with these studies, ubiquitin substrate profiling was performed by K-GG immunoaffinity enrichment. K-GG peptides were captured using an antibody targeting the diglycine remnant (-GG) from ubiquitin, which remains covalently attached to substrate lysine (K) residues after tryptic digestion. Linear mixed effect modeling (LiME) was employed to assemble K-GG peptide data at the protein level and select top candidates for follow up analysis. Noted in these timecourse results were early responding substrates within the inhibitor targeted pathway, the DNA damage response and the cell death regulatory network, such as MEK1, PRKDC and PGAM5. Ubiquitination of MEK1 was maximally induced within the first hour following treatment, while extensive ubiquitination of PRKDC and PGAM5 occurred at later time points consistent with their posited roles in the downstream response. Concurrent with these, a marked increase in ubiquitination of mitochondrial substrates such as cytochrome C, MIRO2, IMMT, and CHCH3 upon commitment to the apoptotic state. Cells treated over 8h with a 4xEC50 combination of GDC-0973 and GDC-0941 demonstrated significantly increased ubiquitination for over 25 mitochondrial substrates including metabolic regulators, structural components, and apoptotic effectors. Evaluating the sub-organellar distribution of these mitochondrial proteins, it was surprising to note that many reside natively within the intermembrane space (IMS) or mitochondrial matrix. This finding suggests that a compromised outer mitochondrial membrane and/or disruption of mitochondrial protein import during apoptosis enables the cytosolic ubiquitination machinery to access this cluster of substrates. These results illustrate how ubiquitin substrate temporal profiling can reveal the dynamic proteome in response to pathway specific inhibition and may be valuable in dissecting the pathways affected by compounds of unknown function. A.5 Preserved Proteins from Extinct Bison Latifrons Identified by Tandem Mass Spectrometry Alexander Barrett, Ryan C. Hill, Travis Nemkov, Angelo D'Alessandro, Monika Dzieciatkowska, Kirk C. Hansen University of Colorado Denver, Denver, CO, USA Proteomics analysis using a simplified sample preparation procedure and tandem mass spectrometry (MS) was applied to obtain protein identifications on a sample from the extinct Bison latifrons that yielded peptide identifications mapping to over 45 bovine proteins. Our analysis resulted in extensive fibrillar collagen sequence coverage, including the identification of in vivo generated post-translational modifications. Hydroxylysine galactosylglucosylation, a modification thought to be involved in collagen fiber formation and bone mineralization, was identified for the first time in an ancient protein dataset. Meta-analyses of data from other studies indicates that this modification may be enriched in well-preserved prehistoric samples. This analysis has unearthed a potential “collagen code” that identifies post-translation modifications that may assist in the preservation of collagen over time. A.6 Mass Spectrometric Analysis of Endogenous Tau Modifications in Mice Giselle Knudsen1, Meaghan Morris2,3, Sumihiro Maeda2, Jonathan C. Trinidad4, Alexandra Ianoviciu1, Alma L. Burlingame1, Lennart Mucke2 1University of California, San Francisco, CA, USA; 2Gladstone Institute of Neurological Disease, San Francisco, CA, USA; 3The Johns Hopkins University Medical School, Baltimore, MD, USA; 4Indiana University, Bloomington, IN, USA The microtubule-associated protein tau has been implicated in the pathogenesis of Alzheimer's disease (AD) and other neurodegenerative disorders. Reducing tau levels ameliorates AD-related synaptic, network and behavioral abnormalities in human amyloid precursor protein (hAPP) transgenic mice. We used mass spectrometry to characterize the post-translational modification of endogenous tau isolated from wildtype or hAPP mice. We identified six different types of tau modifications at 63 sites in wildtype mice. Tau modifications were similar between hAPP and wildtype mice, supporting the hypothesis that neuronal dysfunction in hAPP mice is enabled by physiological forms of tau. Our data provide clear evidence supporting the targeting of acetylation and ubiquitination to the same lysine residues, with select sites also targeted by lysine methylation. Our data does not support the hypothesis of extensive O-GlcNAc modification of endogenous tau. The complex post-translational modification of physiological tau suggests that tau is subject to extensive regulation by diverse pathways. A.7 Advances in Antibody-based Proteomic Analysis Matthew P. Stokes, Hongbo Gu, Charles L. Farnsworth, Jian Min Ren, Kimberly A. Lee, Jeffrey C. Silva 1Cell Signaling Technology, Inc., Danvers, MA, USA Immunoaffinity purification (IAP) of proteins and peptides is a powerful tool to enrich samples for subsequent LC-MS/MS analysis. These antibody-based methods have long been used to probe for post-translationally modified (PTM) peptides that normally would not be detected due to their lower relative abundance in samples. Antibodies are generated using degenerate peptide libraries that recognize a particular PTM selectively but are agnostic to amino acids surrounding the PTM. A limited number of amino acids in addition to the modified residue can also be fixed in the library to generate antibodies that recognize a particular sequence motif, such as a consensus substrate sequence for protein kinases. The method has been successfully applied to a number of PTM's, including phosphorylation, acetylation, and ubiquitination. Recently, a number of advances have allowed for significantly improved performance and increased Proteome coverage using IAP methods. New antibodies have been developed to cover more PTMs, including methyl- arginine, methyl-lysine, and succinyl-lysine. Existing antibodies have also been improved, allowing identification of even more post-translationally modified peptides in a single LC-MS/MS run. These include an updated phosphotyrosine antibody, as well as a new monoclonal acetyl-lysine reagent (consisting of seven monoclonal antibodies) that provides the highest number of acetylated peptide identifications of any available acetyl-lysine antibody. PTMScan Direct, an IAP LC-MS/MS method utilizing cocktails of site-specific antibodies to phosphosites on critical signaling proteins, has been updated to include more reagents (thus covering more pathways), and to cover more proteins/sites using a single affinity reagent. The IAP LC-MS/MS protocol has also been adapted for use on robotic platforms such as the Agilent AssayMAP Bravo system, allowing automation of enrichment protocols for larger scale experiments to probe cellular signaling. A.8 Proteomics versus Transcriptomics for the Identification of Cancer Biomarkers: the Case of Brain-derived Metastatic Breast Cancer Cells Matthew D. Dun1, Robert J. Chalkley2, Sheridan Keene1, Ralph A. Bradshaw2, Hubert Hondermarck1 1School of Biomedical Sciences & Pharmacy, Faculty of Health and Medicine, University of Newcastle, New South Wales, Australia; 2Department of Pharmaceutical Chemistry, University of California, San Francisco, CA, USA Transcriptomics and proteomics have become common approaches in the search to identify new cancer biomarkers. However, it is unclear how well cancer-associated changes in protein and mRNA expression match, as a study from the same set of cancer samples has not been reported. Here we have compared changes in the proteome versus transcriptome in the brain-derived metastatic breast cancer cell line, 231-BR, as compared to the parental line, MDA-MB-231, which is also highly metastatic but with no organ selectivity. Comparative proteomic analyses were performed by reciprocally labelling each of these cell lines with 13C6 L-lysine (SILAC analysis). Soluble and membrane extracts were sequentially digested using a trypsin/Lys-C mixture and analyzed by a Q-Exactive Plus (Thermo Fisher Scientific) with chromatography performed with an EASY-nLC ultraperformance liquid chromatography system (Thermo). Quantitative measurements were extracted from the raw data by Search Compare in Protein Prospector (UCSF) and only the peaks with a signal to noise of greater than 10 were used in the quantification. Transcriptomic data were obtained after mRNA extraction and exon based analysis with an Affymetrix GeneChip platform. About 2,500 proteins were quantified in the 231-BR/MDA-MB-231 cell comparison, generating a list of more than 169 proteins up- or down-regulated more than 2-fold (with at least two peptides and sequenced reciprocally). When this was compared with the transcriptomic data (∼9,800 transcripts), 52% of correlation was found between the changes observed at the protein level versus those seen at the mRNA level. Ingenuity pathway analysis revealed a pattern of changes associated with the deregulation of cell invasion and metastasis. In particular, the signaling pathways related to the neuronal guidance and survival molecule ephrinB1, the matrix metalloprotease MMP1 and the tissue transglutaminase TGM2 appeared to be associated with the implantation and development of metastatic tumor cells in the brain. Taken together, these data indicate that proteomic and transcriptomic data are complementary rather than confirmatory in the quest for new cancer biomarkers, and that multiple levels of post-transcriptional regulations are likely to be involved in the acquisition of a brain metastatic phenotype. This work was supported in part by the University of Newcastle Australia, the Hunter Cancer Research Alliance, the Biomedical Technology Research Centers program of the USPHS National Institute of General Medical Sciences, 8P41GM103481, and NIH 1S10OD016229. A.9 Evolution of Separate Predation- and Defence-evoked Venoms in Carnivorous Cone Snails Sébastien Dutertre1,2, Ai-Hua Jin1, Irina Vetter1,3, Brett Hamilton4, Kartik Sunagar5,6, Vincent Lavergne1, Valentin Dutertre1, Bryan G. Fry1,7, Agostinho Antunes5,6, Deon J. Venter4,8, Paul F. Alewood1, Richard J. Lewis1 1Institute for Molecular Bioscience; The University of Queensland, Australia; 2Institut des Biomolécules Max Mousseron, CNRS, Université Montpellier, France; 3School of Pharmacy, The University of Queensland, Australia; 4Pathology Department, Mater Health Services, South Brisbane, Queensland, Australia; 5CIMAR/CIIMAR, Centro Interdisciplinar de Investigação Marinha e Ambiental, Universidade do Porto, Portugal; 6Dept. de Biologia, Universidade do Porto, Portugal; 7Venom Evolution Lab, School of Biological Sciences, The University of Queensland, Australia, 8Department of Medicine, The University of Queensland, Australia Venomous animals are thought to inject the same combination of toxins for both predation and defence, presumably exploiting conserved target pharmacology across prey and predators. Surprisingly, we have discovered that cone snails can rapidly and reversibly change their venom composition in response to predatory or defensive stimuli. In this study, predation- and defence-evoked venoms were collected from killer cone snails Conus geographus. One specimen was also dissected and the venom duct was divided into 12 sections for proteomic analysis. The underivatised venoms were analysed by standard LC-MS and MALDI imagining. Information Dependent Acquisition was performed on the reduced, reduced/alkylated and enzymatically digested venom samples. Predation- and defence-evoked venoms were fractionated and the activity of each fraction was assessed using high-throughput Ca2+ imaging assays. Here, we show that the defence-evoked venom was significantly more complex than predation-evoked venom, with limited overlap in peptide composition. The defence-evoked venom of C. geographus contains high levels of paralytic toxins that potently block neuromuscular receptors, consistent with its lethal effects on humans. In contrast, C. geographus predation-evoked venom contains prey-specific toxin mostly inactive at human targets. Predation- and defence-evoked venoms originate from the distal and proximal regions of the venom duct, respectively, explaining how different stimuli can generate two distinct venoms. A specialized defensive envenomation strategy is widely evolved across worm, mollusk and fish-hunting cone snails. We propose that defensive toxins, originally evolved in ancestral worm-hunting cone snails to protect against cephalopod and fish predation, have been repurposed in predatory venoms to facilitate diversification to fish and mollusk diets. A.10 A Standardized Scoring System for Affinity Purification/Mass Spectrometry Data Xu Li1, Benjamin White2, Rudy Guerra2, Junjie Chen1 1The University of Texas, MD Anderson Cancer Center, Houston, TX, USA; 2Rice University, Houston, TX, USA Studies of protein-protein interactions using affinity purification coupled with mass-spectrometry (AP/MS) have provided immense insights into protein functions. Many AP/MS-based studies have been conducted and uncovered hundreds of thousands of PPIs. However, it remains challenging to utilize these data efficiently due to the appearance of non-specific associated proteins, and the relatively low data reproducibility across different studies. Here we present a Minkowski distance-based probabilistic scoring method for label-free AP/MS data, which assigns standardized probability scores to interactions in a data-driven manner. We provide a knowledge-driven mode using available AP/MS datasets as controls, and a knowledge-free mode using customized negative controls, to deal with different types and scales of datasets. Comparing with other algorithms, it works more efficiently in both recognizing true positives and eliminating false positives in AP/MS datasets we have tested. Using this method, we filtered TAP/MS data generated by our lab and other labs and achieved high interlaboratory data reproducibility. We hope this new computational tool will be able to direct in-depth functional study of proteins more efficiently, and be helpful for building up a unified human protein interactome. A.11 Large Scale Proteomic Characterisation of Sites of Proline Hydroxylation in Human Proteins Dalila Bensaddek, Sandra C. Moser, Brian Ortmann, Sonia Rocha, Jason R. Swedlow, Angus I. Lamond Centre for Gene Regulation and Expression, Dundee, Scotland, GB Proline hydroxylation is an abundant post-translational modification in humans that is catalysed by prolyl 4- and prolyl 3-hydroxylases to yield 4-hydroxyproline and 3-hydroxyproline, respectively. There are three isoforms of prolyl-4 hydroxylases, designated Prolyl Hydroxylase Domain proteins PHDs (PHD1-3 or EGLN1-3). Hydroxylation of proline residues plays a critical role in sensing molecular oxygen levels. Under normal oxygen levels (normoxia) PHDs catalyse proline hydroxylation within the oxygen-dependent degradation domain (ODDD) of the hypoxia inducible factor one-α (HIF1-α). This promotes its binding to the von Hippel Lindau protein (pVHL), which acts as a targeting component for an E3 ubiquitin ligase complex that mediates rapid degradation of HIF1-α by the proteasome. In addition to HIF, using targeted mass spectrometric approaches we have recently identified Cep192 as a novel PHD1 substrate*1. A critical centrosome component, Cep192, was found to be hydroxylated on proline 1717 by PHD1 and targeted for proteasomal degradation by the E3 ubiquitin ligase SCF/Skp2. By modulating Cep192 levels, PHD affects the processes of centriole duplication and centrosome maturation and contributes to the regulation of cell-cycle progression. Similarly, we have identified centromere protein N CENP-N as a substrate for PHD2. Hydroxylation of a specific proline residue in CENP-N is required for mitotic progression (manuscript submitted). These recent findings suggest an important link between proline hydroxylation and cell cycle regulation. We have therefore undertaken a systematic analysis of human proteins that are hydroxylated on proline residues by PHD enzymes to identify new substrates and potentially reveal new roles for proline hydroxylation in biological regulatory mechanisms. To facilitate a large- scale quantitative proteomics study of PHD substrates, it was imperative to optimise the workflow for reliable detection of hydroxyproline by mass spectrometry. Proline hydroxylation results in a small mass increment (16Da) and results in a modified amino acid that is similar in mass to both leucine and isoleucine, which presents potential technical difficulties in detection that largely accounts for why sites of hydroxyproline modification have not been widely characterised. However, hydroxylation changes the chemical properties of modified peptides, making them more hydrophilic than their unmodified counterparts. This difference in chromatographic behavior makes proline-hydroxylated peptides amenable to enrichment. We have shown that we can use Hydrophilic Interaction Chromatography (HILIC) to enrich hydroxylated proline- (HyPro) containing peptides to increase their detection in subsequent reverse-phase LC-MS analysis. Using this approach, we carried out a large-scale quantitative analysis of PHD1–3 substrates using high-resolution mass spectrometry. We have identified over 2,000 hydroxylated proline (HyPro)-containing peptides in proteins isolated from human U2OS cell lines, corresponding to ∼ 1,000 proteins. This includes previously identified PHD substrates, such PKM2 *2 and HCLK2 *3 along with many novel substrates. Initial analyses showed that HyPro sites identified extend beyond the LXXLAP motif that was originally identified for PHD enzymes (PHD1- 3) in the oxygen dependent degradation domain (ODDD) in HIF1- α. *4 References 1. Moser, S. C. et al. PHD1 links cell-cycle progression to oxygen sensing through hydroxylation of the centrosomal protein Cep192. Developmental cell 26, 381–392, doi:10.1016/j.devcel.2013.06.014 (2013). 2. Luo, W. et al. Pyruvate Kinase M2 Is a PHD3-Stimulated Coactivator for Hypoxia-Inducible Factor 1. Cell 145, 732–744, doi:http://dx.doi.org/10.1016/j.cell.2011.03.054 (2011). 3. Xie, L. et al. PHD3-dependent hydroxylation of HCLK2 promotes the DNA damage response. The Journal of Clinical Investigation 122, 2827–2836, doi:10.1172/JCI62374 (2012). 4. Epstein, A. C. R. et al. C. elegans EGL-9 and Mammalian Homologs Define a Family of Dioxygenases that Regulate HIF by Prolyl Hydroxylation. Cell 107, 43–54, doi:http://dx.doi.org/10.1016/S0092-8674(01)00507–4 (2001). A.12 Using the Exactive Plus EMR Mass Spectrometer for Probing Protein-ligand and Protein-protein Interactions Jonathan B. Johnston1, Žygy Roe-Žurž2, Mike Trnka1, Shenheng Guan1, Paul R. Ortiz de Montellano1, David Agard2, Alma L. Burlingame1 Depts. 1Pharmaceutical Chemistry, 2Biochemistry and Biophysics, University of California, San Francisco, CA, USA The Exactive Plus EMR orbitrap mass spectrometer was employed to investigate protein-ligand and protein-protein interactions. This mass spectrometer excels in these areas of bio-analytical research due to its high sensitivity and high-resolution capabilities. Data will be presented on several applications of this analytical platform to address complex biological problems involving ligand binding and higher-order protein interactions. The systems examined include: glucocorticoid receptor, cytochrome P450s, and the nucleosome. A.13 Phoxtrack - an Unbiased Approach for Deciphering Proteins that are Causative for Post-translational Modifications Sascha Sauer, Christopher Weidner, Cornelius Fischer, Magdalena Kliem Max-Planck-Institute for Molecular Genetics, Berlin, Germany Physiology is largely controlled by fine-regulation of post translation modification (PTM) signaling. Analyses of the entire sets of kinases and other enzymes inducing PTM signalling can provide a holistic view of cellular states and generate unbiased hypotheses for experimental testing. Quantitative mass spectrometry allows for comprehensive analysis of PTMs, including for example the detection of several ten thousand phosphosites. However, the current data analysis tools applied for the discovery of underlying regulatory enzymes suffer from large inefficiencies. To overcome this bottleneck, we introduce PHOXTRACK (PHOsphosite-X-TRacing Analysis of Causal Kinases), a strategy and user-friendly software tool for analysing large PTM data sets. In contrast to other tools, PHOXTRACK makes use of full sets of quantitative proteomics data and applies non-parametric statistics to determine whether defined kinase-specific sets of phosphosite sequences indicate statistically significant, concordant differences between various biological conditions. Thereby, PHOXTRACK can efficiently extract posttranslational information of comprehensive proteomics datasets to dissect key regulatory proteins and to infer molecular pathways. Various case-studies for benchmarking PHOXTRACK including construction of signaling pathways will be presented. PHOXTRACK will be maintained over the next years and is freely available as an online tool for noncommercial use at http://phoxtrack.molgen.mpg.de/. Users will also find a tutorial and can additionally give feedback at: https://groups.google.com/d/forum/phoxtrack-discuss. A.14 Top Down Venom Analysis with Byonic Software Marshall Bern1, Doron Kletter1, David Fenyo2, David Morgenstern2, Beatrix Ueberheide2, Nicholas Bern1, Wilfred Tang1, Yong J. Kil1, Christopher Becker1 1Protein Metrics, San Carlos, CA, USA; 2New York University, New York, NY, USA Venom from snails, spiders, snakes, and other organisms represent a rich source of bioactive molecules with potential as drug leads and research tools. Venom poses a number of special challenges for proteomics analysis: (1) crude venom may contain 100 or more individual toxins, some of quite low abundance; (2) protein databases are often incomplete or inaccurate; (3) toxins have masses up to ∼10 kDa so it may be difficult to obtain good fragmentation coverage; (4) toxins contain multiple disulfide bonds, and in the case of marine snails, numerous posttranslational modifications; (5) venoms contain multiple sequence variants with varying potency, efficacy, and selectivity, so it is important to achieve absolute accuracy in sequence assignment. One successful data acquisition method derivitizes cysteines to increase the charge state and then employs ETD fragmentation on the highly charged precursors. This method produces complex spectra that in turn pose challenges for data analysis. Here we show how we used Byonic software in the analysis of venom from a variety of organisms. Byonic includes a number of features not found in more standard proteomics search engines: advanced ETD scoring that takes into account many ETD-specific phenomena; modification fine control, which enables large modification searches; and wildcard search, which finds unanticipated modifications and sequence variants. We used Byonic to score and annotate database sequences and both automatically and manually generated de novo candidates. We found that successful sequencing of one venom component often led to the identification of many related toxins. We also found that Byonic is especially well-suited to analyzing molecules in the 3 to 10 kDa and z=4+ to z=14+ range, which strain the standard bottom-up search engines, but are not large enough to justify the simplified scoring and search strategies in top-down search engines. Using our strategy we were able to characterize end to end over 60 sequences directly from crude venom of auger snails, spiders and scorpions ranging in size from 2 to 8 kDa. Acknowledgments: This work was supported by NIH grant R43 GM103362. A.15 Treatment-induced Apoptotic Peptides as Efficacy Biomarkers in Mouse Models Julia Seaman, James A. Wells University of California, San Francisco, CA, USA Apoptosis, a form of programmed cell death, is an essential cellular function and conserved process throughout metazoans. Many chemotherapeutics induce apoptosis in tumor cells, and successful treatments selectively induce apoptosis in disease tissue. However, it is still difficult to predict and track treatment efficacy in the clinic. Signals of the pathway are attractive efficacy biomarker targets as apoptotic biochemical changes occur almost immediately after drug exposure. There is a great need for treatment efficacy biomarkers for diseases like lung cancer and multiple myeloma where many patients show drug resistance. The major actors in apoptosis are proteases, especially caspases, that cleave thousands of proteins leading to the dismantling of the cell. The Wells lab has developed a positive enrichment labeling method to identify apoptotic proteolytic peptides and recent data reveals thousands of cleavage events. As mice are commonly used to study cancer and drug efficacy, I will generate a mouse dataset and analyze peptides based on evolution, kinetics and drug- and cell-type patterns to reveal important apoptotic subsets with biomarker potential. I will then use the detected peptides as a basis to create a biomarker panel for treatment efficacy in mouse models, specifically focused on multiple myeloma. This study intends to discover and validate murine apoptotic related peptides, to classify them, and to create an accurate biomarker panel to predict treatment response. A.16 Extracellular Matrix Quantification of Tissue Engineered Scaffolds: Analysis of Rat Lung using the QconCAT Method Ryan Hill1, Elizabeth A. Calle2, Laura E. Niklason2, Kirk C. Hansen1 1University of Colorado-Denver, Aurora, CO, USA; 2Yale University, New Haven, CT, USA The Extracellular Matrix (ECM) is a complex milieu of macromolecules whose structure and function plays an integral role in tissue and organ morphogenesis. Changes in ECM composition and architecture have been shown to mediate cell proliferation, differentiation, and growth, serving as a critical component of organ scaffolds for tissue engineering efforts. Efficient Re-seeding of decellularized scaffolds has been shown to be dependent on retaining native ECM structural integrity and elasticity. A critical goal of the decellularization procedure is to preserve the microarchitecture of the organ and retain tissue-specific molecular cues that guide successful recellularization. Local variations in expression of abundant proteins in the ECM scaffolding (Collagens, Laminins, Fibronectin) have been correlated to variance in cell repopulation and subsequent proliferation. It is thought that retaining ECM morphology allows cells to be directed back to a tissue specific niche during reseeding, and that small changes in abundance of these molecular cues can drastically effect the recellularization process. However, current methods used to characterize the protein composition of native and acellular tissues fail to accurately quantify ECM proteins. Thus, we developed a more complete and accurate method for protein characterization by (1) increasing proteome coverage by analyzing both the chaotrope soluble and insoluble fractions and (2) accurately quantifying protein levels using the QconCAT method. Using this method, we have absolutely quantified 210 peptides representing 102 ECM-targeted and cellular proteins. Initial results indicate that up to 70% of fibrillar protein, the main structural component of a decellularized scaffold, is being discarded using commonly accepted proteomic methods. In addition, varied decellularization procedures result in distinct profiles of ECM proteins in the lung. Our approach allows for more accurate quantification of protein levels in lung tissues used for organ engineering experiments. The accurate characterization of ECM proteins from lung samples should help advance tissue engineering efforts by yielding a readout that can be correlated with functional outcome to drive further development. A.17 Proteomic Analysis of the Role of the Translation Initiation Factor eIF4E in C. Elegans Juan Oses-Prieto1, Maria Quimis Ponce2, Qin Dong2, A. L. Burlingame1, Robert E. Rhoads2 1University of California, San Francisco, San Francisco, CA, USA; 2Louisiana State University Health Sciences Center in Shreveport, LA, USA Translation initiation is a multistep process in which a series of initiation factor polypeptides bind sequentially to the mRNA to build up complexes of increasing size. For most eukaryotic mRNAs, initiation is dependent on the 7-methylguanosine (m7G)-containing 5′-terminal cap structure. The cap inserts into a narrow pocket in eukaryotic initiation factor 4E (eIF4E). When eIF4E is active in translation, eIF4G is bound to the opposite (“dorsal”) side of eIF4E, and eIF4G is bound, in turn, to the RNA helicase eIF4A, the translational poly(A)-binding protein, eIF3, and other initiation factors. When eIF4E is inactive in translation, the eIF4G-binding site is blocked by other eIF4E-binding proteins. Different eIF4E-binding proteins have been shown to regulate activity of this initiation factor in various biological systems. Five eIF4E-family members, termed IFE-1 to IFE-5, are expressed in Caenorhabditis elegans, but the physiological rationale for multiple family members is unclear. Disruption of the ife-1 gene shows that IFE-1 is required for spermatogenesis in both hermaphrodites and males, causing spermatocytes to fail in cytokinesis and producing a moderate defect in oocyte development as well. To gain insight into the molecular mechanism for this, we used affinity chromatography on m7GTP-Sepharose to purify proteins that co-purify with eIF4E and compared them between wild-type and ife-1 knockout strains, reasoning that proteins uniquely present in IFE-1-containing complexes would be depleted in the mutants. Proteins from wild-type and mutant strains were compared quantitatively with iTRAQ technology coupled to mass spectrometry. We identifed 7500 peptides coming from 900 proteins, with a false discovery rate of 0.3%. Of these, 439 proteins were quantified using at least 3 peptides. The iTRAQ data indicated that IFE-1 levels were reduced at least 8-fold in mutant extracts, but western blotting showed that IFE-1 was undetectable. The underestimation by iTRAQ is due to the well characterized compression of the quantitative ratio. All other IFE family members were unchanged between wild-type and mutant. Three proteins were depleted to the same degree as IFE-1 in mutant extracts: P granule abnormality protein 1 (PGL-1), P granule abnormality protein 2 (PGL-2), and Cid homolog 1 (CID-1), a terminal uridylate transferase (TUTase). An additional 150 proteins were changed to a lesser degree. TUTases add U residues to a variety of RNA species to initiate their degradation, the best characterized being histone mRNAs and miRNAs. Thus, CID-1 may alter the levels of mRNAs or miRNAs to either promote the germline program or suppress the somatic program during gametogenesis. For all proteins showing significant changes between the cap-binding complexes from mutant and wild-type worms, we also measured their levels in total cell extracts to determine whether these changes were due to differences in total cellular levels. For instance, IFE-1 could conceivably be involved in synthesis of some of these proteins. Since antibodies suitable for western blotting were not available for CID-1 and most of the other proteins, we used MS-based label-free quantitation by Single Reaction Monitoring (SRM) to quantify their relative levels. CID levels were not different in total protein extracts. This work was supported by NIH grants NIH NIGMS 8P41GM103481, 1S10RR026662 and R01 GM020818.",2014-08-01 +33828771,"Eye and head movements while looking at rotated scenes in VR. Session ""Beyond the screen's edge"" at the 20th European Conference on Eye Movement Research (ECEM) in Alicante, 19.8.2019. ","We examined the extent to which image shape (square vs. circle), image rotation, and image content (landscapes vs. fractal images) influenced eye and head movements. Both the eyes and head were tracked while observers looked at natural scenes in a virtual reality (VR) environment. In line with previous work, we found a horizontal bias in saccade directions, but this was affected by both the image shape and its content. Interestingly, when viewing landscapes (but not fractals), observers rotated their head in line with the image rotation, presumably to make saccades in cardinal, rather than oblique, directions. We discuss our findings in relation to current theories on eye movement control, and how insights from VR might inform traditional eyetracking studies. - Part 2: Observers looked at panoramic, 360 degree scenes using VR goggles while eye and head movements were tracked. Fixations were determined using IDT (Salvucci & Goldberg, 2000) adapted to a spherical coordinate system. We then analyzed a) the spatial distribution of fixations and the distribution of saccade directions, b) the spatial distribution of head positions and the distribution of head movements, and c) the relation between gaze and head movements. We found that, for landscape scenes, gaze and head best fit the allocentric frame defined by the scene horizon, especially when taking head tilt (i.e., head rotation around the view axis) into account. For fractal scenes, which are isotropic on average, the bias toward a body-centric frame gaze is weak for gaze and strong for the head. Furthermore, our data show that eye and head movements are closely linked in space and time in stereotypical ways, with volitional eye movements predominantly leading the head. We discuss our results in terms of models of visual exploratory behavior in panoramic scenes, both in virtual and real environments. Video stream: https://vimeo.com/356859979 Production and publication of the video stream was sponsored by SCIANS Ltd http://www.scians.ch/.",2019-11-25 +24444495,Igloo-Plot: a tool for visualization of multidimensional datasets.,"Advances in science and technology have resulted in an exponential growth of multivariate (or multi-dimensional) datasets which are being generated from various research areas especially in the domain of biological sciences. Visualization and analysis of such data (with the objective of uncovering the hidden patterns therein) is an important and challenging task. We present a tool, called Igloo-Plot, for efficient visualization of multidimensional datasets. The tool addresses some of the key limitations of contemporary multivariate visualization and analysis tools. The visualization layout, not only facilitates an easy identification of clusters of data-points having similar feature compositions, but also the 'marker features' specific to each of these clusters. The applicability of the various functionalities implemented herein is demonstrated using several well studied multi-dimensional datasets. Igloo-Plot is expected to be a valuable resource for researchers working in multivariate data mining studies. Igloo-Plot is available for download from: http://metagenomics.atc.tcs.com/IglooPlot/.",2014-01-17 +30923379,A cheminformatics approach to characterize metabolomes in stable-isotope-labeled organisms.,"We report a computational approach (implemented in MS-DIAL 3.0; http://prime.psc.riken.jp/) for metabolite structure characterization using fully 13C-labeled and non-labeled plants and LC-MS/MS. Our approach facilitates carbon number determination and metabolite classification for unknown molecules. Applying our method to 31 tissues from 12 plant species, we assigned 1,092 structures and 344 formulae to 3,604 carbon-determined metabolite ions, 69 of which were found to represent structures currently not listed in metabolome databases.",2019-03-28 +28967524,Vaccination strategies in pediatric inflammatory bowel disease.,"The incidence of pediatric inflammatory bowel disease (IBD) is rising, as is the use of immunomodulatory and biological drugs. IBD patients are vulnerable to infections owing to disease-related immunological alterations and drug-induced systemic immunosuppression. Although many infections are vaccine-preventable, vaccination coverage in IBD patients is insufficient. Current guidelines recommend that children with IBD follow the same routine immunization schedule as healthy children, avoiding live vaccines during immunosuppressive therapy. Immunization status should be checked at diagnosis, and patients should be immunized with the vaccines they need. Some studies have demonstrated a suboptimal immune response to vaccinations in IBD patients, but responsible mechanisms are poorly understood. In this manuscript, we provide a broad review of available data about vaccine coverage rates, immunogenicity and safety of both killed and live attenuated vaccinations in the pediatric IBD population; furthermore, we provide comprehensive information regarding current guidelines for immunization of children with IBD and their household contacts. A comprehensive search of published literature using the PubMed (http:// www.ncbi.nlm.nih.gov/pubmed/) database was carried out to identify all articles published in English from 1998 to March 2017, using the following key terms: ""inflammatory bowel disease"", ""vaccination"", ""immunization"", ""immunogenicity"", ""safety"" and ""children"".",2017-09-28 +30830990,Mutation update: TGFBI pathogenic and likely pathogenic variants in corneal dystrophies.,"Human transforming growth factor β-induced (TGFBI), is a gene responsible for various corneal dystrophies. TGFBI produces a protein called TGFBI, which is involved in cell adhesion and serves as a recognition sequence for integrins. An alteration in cell surface interactions could be the underlying cause for the progressive accumulation of extracellular deposits in different layers of the cornea with the resulting changes of refractive index and transparency. To this date, 69 different pathogenic or likely pathogenic variants in TGFBI have been identified in a heterozygous or homozygous state in various corneal dystrophies, including a novel variant reported here. All disease-associated variants were inherited as autosomal-dominant traits but one; this latter was inherited as an autosomal recessive trait. Most corneal dystrophy-associated variants are located at amino acids Arg124 and Arg555. To keep the list of corneal dystrophy-associated variant current, we generated a locus-specific database for TGFBI (http://databases.lovd.nl/shared/variants/TGFBI) containing all pathogenic and likely pathogenic variants reported so far. Non-disease-associated variants are described in specific databases, like gnomAD and ExAC but are not listed here. This article presents the most recent up-to-date list of disease-associated variants.",2019-03-28 +30190516,SeesawPred: A Web Application for Predicting Cell-fate Determinants in Cell Differentiation.,"Cellular differentiation is a complex process where a less specialized cell evolves into a more specialized cell. Despite the increasing research effort, identification of cell-fate determinants (transcription factors (TFs) determining cell fates during differentiation) still remains a challenge, especially when closely related cell types from a common progenitor are considered. Here, we develop SeesawPred, a web application that, based on a gene regulatory network (GRN) model of cell differentiation, can computationally predict cell-fate determinants from transcriptomics data. Unlike previous approaches, it allows the user to upload gene expression data and does not rely on pre-compiled reference data sets, enabling its application to novel differentiation systems. SeesawPred correctly predicted known cell-fate determinants on various cell differentiation examples in both mouse and human, and also performed better compared to state-of-the-art methods. The application is freely available for academic, non-profit use at http://seesaw.lcsb.uni.lu.",2018-09-06 +28140407,Evaluation and comparison of classical interatomic potentials through a user-friendly interactive web-interface.,"Classical empirical potentials/force-fields (FF) provide atomistic insights into material phenomena through molecular dynamics and Monte Carlo simulations. Despite their wide applicability, a systematic evaluation of materials properties using such potentials and, especially, an easy-to-use user-interface for their comparison is still lacking. To address this deficiency, we computed energetics and elastic properties of variety of materials such as metals and ceramics using a wide range of empirical potentials and compared them to density functional theory (DFT) as well as to experimental data, where available. The database currently consists of 3248 entries including energetics and elastic property calculations, and it is still increasing. We also include computational tools for convex-hull plots for DFT and FF calculations. The data covers 1471 materials and 116 force-fields. In addition, both the complete database and the software coding used in the process have been released for public use online (presently at http://www.ctcms.nist.gov/∼knc6/periodic.html) in a user-friendly way designed to enable further material design and discovery.",2017-01-31 +27722873,"Bayesian pretest probability estimation for primary malignant bone tumors based on the Surveillance, Epidemiology and End Results Program (SEER) database.","

Purpose

In the diagnostic process of primary bone tumors, patient age, tumor localization and to a lesser extent sex affect the differential diagnosis. We therefore aim to develop a pretest probability calculator for primary malignant bone tumors based on population data taking these variables into account.

Methods

We access the SEER (Surveillance, Epidemiology and End Results Program of the National Cancer Institute, 2015 release) database and analyze data of all primary malignant bone tumors diagnosed between 1973 and 2012. We record age at diagnosis, tumor localization according to the International Classification of Diseases (ICD-O-3) and sex. We take relative probability of the single tumor entity as a surrogate parameter for unadjusted pretest probability. We build a probabilistic (naïve Bayes) classifier to calculate pretest probabilities adjusted for age, tumor localization and sex.

Results

We analyze data from 12,931 patients (647 chondroblastic osteosarcomas, 3659 chondrosarcomas, 1080 chordomas, 185 dedifferentiated chondrosarcomas, 2006 Ewing's sarcomas, 281 fibroblastic osteosarcomas, 129 fibrosarcomas, 291 fibrous malignant histiocytomas, 289 malignant giant cell tumors, 238 myxoid chondrosarcomas, 3730 osteosarcomas, 252 parosteal osteosarcomas, 144 telangiectatic osteosarcomas). We make our probability calculator accessible at http://ebm-radiology.com/bayesbone/index.html . We provide exhaustive tables for age and localization data. Results from tenfold cross-validation show that in 79.8 % of cases the pretest probability is correctly raised.

Conclusions

Our approach employs population data to calculate relative pretest probabilities for primary malignant bone tumors. The calculator is not diagnostic in nature. However, resulting probabilities might serve as an initial evaluation of probabilities of tumors on the differential diagnosis list.",2016-10-08 +21731755,PanSNPdb: the Pan-Asian SNP genotyping database.,"The HUGO Pan-Asian SNP consortium conducted the largest survey to date of human genetic diversity among Asians by sampling 1,719 unrelated individuals among 71 populations from China, India, Indonesia, Japan, Malaysia, the Philippines, Singapore, South Korea, Taiwan, and Thailand. We have constructed a database (PanSNPdb), which contains these data and various new analyses of them. PanSNPdb is a research resource in the analysis of the population structure of Asian peoples, including linkage disequilibrium patterns, haplotype distributions, and copy number variations. Furthermore, PanSNPdb provides an interactive comparison with other SNP and CNV databases, including HapMap3, JSNP, dbSNP and DGV and thus provides a comprehensive resource of human genetic diversity. The information is accessible via a widely accepted graphical interface used in many genetic variation databases. Unrestricted access to PanSNPdb and any associated files is available at: http://www4a.biotec.or.th/PASNP.",2011-06-23 +,"High Tenofovir Failure Rates in an Emerging, Non-B Subtype HIV Epidemic","Abstract

Background

The WHO-recommended regimen for antiretrovirals (ARVs) is tenofovir (TDF) + lamivudine/emtricitabine (3TC/FTC) + efavirenz (EFV), based on demonstrated superiority of TDF+FTC+EFV over zidovudine (AZT) +FTC+ EFV in clinical trials. However, there are reports of increasing TDF resistance in non-B subtypes. We have previously shown that HIV genotypes in the Philippines have shifted (https://idsa.confex.com/idsa/2014/webprogram/Paper45090.html) from B to CRF01_AE. We compared failure rates for ARVs during an acquired drug-resistance surveillance study.

Methods

We analyzed ARV data from a study with the Department of Health on treatment failure in Filipinos after one year of treatment. Institutional Board Review approval and informed consent were obtained.

Results

513 adult patients from 3 national treatment hubs (Philippine General Hospital, San Lazaro Hospital, Vicente Sotto Memorial Medical Center) were enrolled and analyzed. Treatment failure (viral load>1000 copies/mL) at one year for specific regimens are summarized in Table 1. No baseline genotyping was available. 53 (10.3%) patients failed treatment. Genotypes among these were CRF01_AE (87%), B (11%) and C (2%). TDF-containing regimens had significantly higher failure rates (43/303;14.2%) than AZT-containing regimens (10/209;4.5%) (P < 0.001). Failure rates for NVP-based regimens (13/85;15.3%) vs. EFV-based regimens (40/424; 9.4%) were not significantly different (P = 0.1064). The most durable regimen (with >3 patients) was AZT+3TC+EFV, and the worst regimen was TDF+3TC+NVP (P < 0.001). Failure rates for TDF+3TC+EFV were significantly higher than for AZT+3TC+EFV (P = 0.0029). There was no significant difference in adherence (P = 0.5531). 53% of unsuppressed patients had a TDF-resistance mutation, compared with 8% for AZT (P < 0.001).

Conclusion

TDF-containing regimens were associated with higher treatment failure rates in our CRF01_AE-predominant HIV epidemic. WHO recommendations for treatment may need be revisited for non-B subtypes.Table 1. Failure rates for ARV regimens.RegimenOn Regimen 
(N = 513)UnsuppressedFailure (%)Adherence (%)TDF+3TC+EFV2693412.666.5AZT+3TC+EFV15563.961.9TDF+3TC+NVP31929.064.5AZT+3TC+NVP5447.472.2TDF+3TC+LPV/r30033.3ABC+3TC+LPV/r100100

Disclosures

E. M. Salvana, Merck: Scientific Advisor and Speaker’s Bureau, Consulting fee and Speaker honorarium",2017-01-01 +25725063,PreDREM: a database of predicted DNA regulatory motifs from 349 human cell and tissue samples. ,"PreDREM is a database of DNA regulatory motifs and motifs modules predicted from DNase I hypersensitive sites in 349 human cell and tissue samples. It contains 845-1325 predicted motifs in each sample, which result in a total of 2684 non-redundant motifs. In comparison with seven large collections of known motifs, more than 84% of the 2684 predicted motifs are similar to the known motifs, and 54-76% of the known motifs are similar to the predicted motifs. PreDREM also stores 43 663-20 13 288 motif modules in each sample, which provide the cofactor motifs of each predicted motif. Compared with motifs of known interacting transcription factor (TF) pairs in eight resources, on average, 84% of motif pairs corresponding to known interacting TF pairs are included in the predicted motif modules. Through its web interface, PreDREM allows users to browse motif information by tissues, datasets, individual non-redundant motifs, etc. Users can also search motifs, motif modules, instances of motifs and motif modules in given genomic regions, tissue or cell types a motif occurs, etc. PreDREM thus provides a useful resource for the understanding of cell- and tissue-specific gene regulation in the human genome. Database URL: http://server.cs.ucf.edu/predrem/.",2015-02-27 +30500879,CRISPR-Local: a local single-guide RNA (sgRNA) design tool for non-reference plant genomes.,"

Summary

CRISPR-Local is a high-throughput local tool for designing single-guide RNAs (sgRNAs) in plants and other organisms that factors in genetic variation and is optimized to generate genome-wide sgRNAs. CRISPR-Local outperforms other sgRNA design tools in the following respects: (i) designing sgRNAs suitable for non-reference varieties; (ii) screening for sgRNAs that are capable of simultaneously targeting multiple genes; (iii) saving computational resources by avoiding repeated calculations from multiple submissions and (iv) running offline, with both command-line and graphical user interface modes and the ability to export multiple formats for further batch analysis or visualization. We have applied CRISPR-Local to 71 public plant genomes, using both CRISPR/Cas9 and CRISPR/cpf1 systems.

Availability and implementation

CRISPR-Local can be freely downloaded from http://crispr.hzau.edu.cn/CRISPR-Local/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +31056021,"Publication pattern, study design, authors and countries involved in orthodontic RCTs - a bibliometric MEDLINE survey over the past 50 years.","

Objectives

The objective of this study was to investigate the number and proportions, study design, journal publications, authors and countries involved in orthodontic randomised controlled trials (RCTs) over the past 50 years.

Method

A MEDLINE database search (Entrez PubMed, http://www.ncbi.nlm.nih.gov ) was performed in November 2018 for RCT publications from 1 January 1968 to 31 December 2017. All orthodontic publications, clinical trials (CTs) or RCTs were selected. For each year from 1968 to 2017, the total number of orthodontic publications and total number of CTs and RCTs were computed. Following this, study design, journal publications, authors and countries involved in orthodontic RCTs were noted.

Results

The RCTs accounted for 0.1% of all orthodontic publications in the 1970s and 1980s, and, following this, the proportion of RCTs was increased and amounted to 2.7% over the past decade (2008-2017). The majority of RCTs were of single centre design (93.4%) and in 88.3% a parallel-arm design was performed. The United States and United Kingdom endorsed 16.9% and 15.5% of the RCTs, followed by Turkey (11.1%), Brazil (7.0%), Sweden (6.6%), China (5.6%), Italy (5.4%), and Germany (4.1%). Of all RCTs, 74.2% were published in 20 orthodontic journals and 25.8% in 81 non-orthodontic journals. The American Journal of Orthodontics and Dentofacial Orthopedics, Angle Orthodontist, European Journal of Orthodontics and Journal of Orthodontics contributed to 60.2% of all the RCTs.

Conclusion

The considerable increase in orthodontic RCTs over the past 50 years implies an increased scientific impact of orthodontic literature.",2019-03-27 +30955983,Vegetarian diet and risk of gout in two separate prospective cohort studies.,"

Backgrounds & aims

Plant-based diets may target multiple pathways in gout pathogenesis (uric acid reduction and anti-inflammation) while improving gout associated cardiometabolic comorbidities. We aim to prospectively examine the relationship between a vegetarian diet and gout, and to explore if this relationship is independent of hyperuricemia.

Methods

We followed 4903 participants in the Tzu Chi Health Study (Cohort1, recruited in 2007-2009) and 9032 participants in the Tzu Chi Vegetarian Study (Cohort2, recruited in 2005) until end of 2014. Baseline serum uric acid was measured in Cohort1. Vegetarian status was assessed through a diet questionnaire that includes dietary habits and a food frequency questionnaire. Incidence of gout was ascertained by linkage to the National Health Insurance Database. Hazard Ratio of gout in vegetarians versus nonvegetarians was assessed by Cox regression, adjusted for age, sex, lifestyle and metabolic risk factors. Hyperuricemia was additionally adjusted in Cohort1.

Results

In Cohort1, lacto-ovo vegetarians had the lowest uric acid concentration, followed by vegans, then nonvegetarians (men: 6.05, 6.19, 6.32 mg/dL, respectively; women: 4.92, 4.96, 5.11 mg/dL, respectively); 65 gout cases occurred in the 29,673 person-years of follow-up; vegetarians experienced a lower risk of gout (without adjustment for hyperuricemia: HR: 0.33; 95% CI: 0.14, 0.79; with adjustment for hyperuricemia: HR: 0.40; 95% CI: 0.17, 0.97). In Cohort2, 161 gout cases occurred in the 83,019 person-years follow-up, and vegetarians also experienced a lower risk of gout (HR: 0.61; 95% CI: 0.41, 0.88).

Conclusion

Taiwanese vegetarian diet is associated with lower risk of gout. This protective association may be independent of baseline hyperuricemia.

Study registered

URL: https://www.clinicaltrials.gov. Unique Identifier: NCT03470584.",2019-03-27 +,Clumpak: a program for identifying clustering modes and packaging population structure inferences across K,"The identification of the genetic structure of populations from multilocus genotype data has become a central component of modern population‐genetic data analysis. Application of model‐based clustering programs often entails a number of steps, in which the user considers different modelling assumptions, compares results across different predetermined values of the number of assumed clusters (a parameter typically denoted K), examines multiple independent runs for each fixed value of K, and distinguishes among runs belonging to substantially distinct clustering solutions. Here, we present Clumpak (Cluster Markov Packager Across K), a method that automates the postprocessing of results of model‐based population structure analyses. For analysing multiple independent runs at a single K value, Clumpak identifies sets of highly similar runs, separating distinct groups of runs that represent distinct modes in the space of possible solutions. This procedure, which generates a consensus solution for each distinct mode, is performed by the use of a Markov clustering algorithm that relies on a similarity matrix between replicate runs, as computed by the software Clumpp. Next, Clumpak identifies an optimal alignment of inferred clusters across different values of K, extending a similar approach implemented for a fixed K in Clumpp and simplifying the comparison of clustering results across different K values. Clumpak incorporates additional features, such as implementations of methods for choosing K and comparing solutions obtained by different programs, models, or data subsets. Clumpak, available at http://clumpak.tau.ac.il, simplifies the use of model‐based analyses of population structure in population genetics and molecular ecology.",2015-09-01 +31562339,Tissue-specific mouse mRNA isoform networks.,"Alternative Splicing produces multiple mRNA isoforms of genes which have important diverse roles such as regulation of gene expression, human heritable diseases, and response to environmental stresses. However, little has been done to assign functions at the mRNA isoform level. Functional networks, where the interactions are quantified by their probability of being involved in the same biological process are typically generated at the gene level. We use a diverse array of tissue-specific RNA-seq datasets and sequence information to train random forest models that predict the functional networks. Since there is no mRNA isoform-level gold standard, we use single isoform genes co-annotated to Gene Ontology biological process annotations, Kyoto Encyclopedia of Genes and Genomes pathways, BioCyc pathways and protein-protein interactions as functionally related (positive pair). To generate the non-functional pairs (negative pair), we use the Gene Ontology annotations tagged with ""NOT"" qualifier. We describe 17 Tissue-spEcific mrNa iSoform functIOnal Networks (TENSION) following a leave-one-tissue-out strategy in addition to an organism level reference functional network for mouse. We validate our predictions by comparing its performance with previous methods, randomized positive and negative class labels, updated Gene Ontology annotations, and by literature evidence. We demonstrate the ability of our networks to reveal tissue-specific functional differences of the isoforms of the same genes. All scripts and data from TENSION are available at: https://doi.org/10.25380/iastate.c.4275191 .",2019-09-27 +31814243,Illness Risk Representation beliefs underlying adolescents' cardiovascular disease risk appraisals and the preventative role of physical activity.,"OBJECTIVES:The primary aim was to explore adolescents' cardiovascular disease risk appraisals and establish whether they understood the preventative role of physical activity (PA). The secondary aim was to examine whether adolescents' cardiovascular disease risk appraisal fitted with the Illness Risk Representations (IRR) framework. DESIGN:Qualitative. METHODS:Thirty-one adolescents aged between 13 and 15 years participated in semi-structured interviews. Data were analysed using Framework Analysis. RESULTS:Knowledge of lifestyle behaviours contributing to cardiovascular disease was good. Participants reflected on their current (or expected future) patterns of these behaviours when making judgements about lifetime risk. They struggled however to explain how different health behaviours, including PA, affected the development of the disease. Cardiovascular disease was viewed as potentially fatal, but participants had only a superficial understanding of the consequences of, or treatments for, the disease. The IRR framework, as proposed by Cameron (2003, https://cancercontrol.cancer.gov/brp/research/theories_project/cameron.pdf), largely captured the way in which adolescents' made judgements about their risk of cardiovascular disease. CONCLUSIONS:The findings suggest that adolescents are underestimating their risk of cardiovascular disease due to unhelpful beliefs. Interventions should: provide clear and simple explanations of how different health behaviours contribute to cardiovascular risk, highlight discrepancies that exist between current levels of preventative behaviour and that required to confer a protective effect, expose the false belief that a lack of PA in early life can be compensated for in later adulthood, and aid understanding of the true impact that the disease and its treatment could have of health and quality of life outcomes. Statement of contribution What is already known on this subject? Physical activity (PA) throughout one's lifetime can reduce the risk of developing cardiovascular disease. The majority of adolescents' do not meet the recommended levels of PA. Changing beliefs about the risk of cardiovascular disease might be a useful strategy to motivate engagement in PA. What does this study add? An increased understanding of adolescents' knowledge of cardiovascular disease and the link with PA. Identification of strategies to change adolescents' risk perceptions of cardiovascular disease in ways that could motivate PA. Evidence to support the Illness Risk Representation framework.",2019-12-09 +30814982,Vgas: A Viral Genome Annotation System.,"The in-depth study of viral genomes is of great help in many aspects, especially in the treatment of human diseases caused by viral infections. With the rapid accumulation of viral sequencing data, improved, or alternative gene-finding systems have become necessary to process and mine these data. In this article, we present Vgas, a system combining an ab initio method and a similarity-based method to automatically find viral genes and perform gene function annotation. Vgas was compared with existing programs, such as Prodigal, GeneMarkS, and Glimmer. Through testing 5,705 virus genomes downloaded from RefSeq, Vgas demonstrated its superiority with the highest average precision and recall (both indexes were 1% higher or more than the other programs); particularly for small virus genomes (≤ 10 kb), it showed significantly improved performance (precision was 6% higher, and recall was 2% higher). Moreover, Vgas presents an annotation module to provide functional information for predicted genes based on BLASTp alignment. This characteristic may be specifically useful in some cases. When combining Vgas with GeneMarkS and Prodigal, better prediction results could be obtained than with each of the three individual programs, suggesting that collaborative prediction using several different software programs is an alternative for gene prediction. Vgas is freely available at http://cefg.uestc.cn/vgas/ or http://121.48.162.133/vgas/. We hope that Vgas could be an alternative virus gene finder to annotate new genomes or reannotate existing genome.",2019-02-13 +25388151,EVpedia: a community web portal for extracellular vesicles research.,"

Motivation

Extracellular vesicles (EVs) are spherical bilayered proteolipids, harboring various bioactive molecules. Due to the complexity of the vesicular nomenclatures and components, online searches for EV-related publications and vesicular components are currently challenging.

Results

We present an improved version of EVpedia, a public database for EVs research. This community web portal contains a database of publications and vesicular components, identification of orthologous vesicular components, bioinformatic tools and a personalized function. EVpedia includes 6879 publications, 172 080 vesicular components from 263 high-throughput datasets, and has been accessed more than 65 000 times from more than 750 cities. In addition, about 350 members from 73 international research groups have participated in developing EVpedia. This free web-based database might serve as a useful resource to stimulate the emerging field of EV research.

Availability and implementation

The web site was implemented in PHP, Java, MySQL and Apache, and is freely available at http://evpedia.info.",2014-11-10 +28759605,THPdb: Database of FDA-approved peptide and protein therapeutics.,"THPdb (http://crdd.osdd.net/raghava/thpdb/) is a manually curated repository of Food and Drug Administration (FDA) approved therapeutic peptides and proteins. The information in THPdb has been compiled from 985 research publications, 70 patents and other resources like DrugBank. The current version of the database holds a total of 852 entries, providing comprehensive information on 239 US-FDA approved therapeutic peptides and proteins and their 380 drug variants. The information on each peptide and protein includes their sequences, chemical properties, composition, disease area, mode of activity, physical appearance, category or pharmacological class, pharmacodynamics, route of administration, toxicity, target of activity, etc. In addition, we have annotated the structure of most of the protein and peptides. A number of user-friendly tools have been integrated to facilitate easy browsing and data analysis. To assist scientific community, a web interface and mobile App have also been developed.",2017-07-31 +26099468,dbEMT: an epithelial-mesenchymal transition associated gene resource.,"As a cellular process that changes epithelial cells to mesenchymal cells, Epithelial-mesenchymal transition (EMT) plays important roles in development and cancer metastasis. Recent studies on cancer metastasis have identified many new susceptibility genes that control this transition. However, there is no comprehensive resource for EMT by integrating various genetic studies and the relationship between EMT and the risk of complex diseases such as cancer are still unclear. To investigate the cellular complexity of EMT, we have constructed dbEMT (http://dbemt.bioinfo-minzhao.org/), the first literature-based gene resource for exploring EMT-related human genes. We manually curated 377 experimentally verified genes from literature. Functional analyses highlighted the prominent role of proteoglycans in tumor metastatic cascades. In addition, the disease enrichment analysis provides a clue for the potential transformation in affected tissues or cells in Alzheimer's disease and Type 2 Diabetes. Moreover, the global mutation pattern of EMT-related genes across multiple cancers may reveal common cancer metastasis mechanisms. Our further reconstruction of the EMT-related protein-protein interaction network uncovered a highly modular structure. These results illustrate the importance of dbEMT to our understanding of cell development and cancer metastasis, and also highlight the utility of dbEMT for elucidating the functions of EMT-related genes.",2015-06-23 +31757180,DNA damage responses in murine Pre-B cells with genetic deficiencies in damage response genes.,"DNA damage can be generated in multiple ways from genotoxic and physiologic sources. Genotoxic damage is known to disrupt cellular functions and is lethal if not repaired properly. We compare the transcriptional programs activated in response to genotoxic DNA damage induced by ionizing radiation (IR) in abl pre-B cells from mice deficient in DNA damage response (DDR) genes Atm, Mre11, Mdc1, H2ax, 53bp1, and DNA-PKcs. We identified a core IR-specific transcriptional response that occurs in abl pre-B cells from WT mice and compared the response of the other genotypes to the WT response. We also identified genotype specific responses and compared those to each other. The WT response includes many processes involved in lymphocyte development and immune response, as well as responses associated with the molecular mechanisms of cancer, such as TP53 signaling. As expected, there is a range of similarity in transcriptional profiles in comparison to WT cells, with Atm-/- cells being the most different from the core WT DDR and Mre11 hypomorph (Mre11A/A) cells also very dissimilar to WT and other genotypes. For example, NF-kB-related signaling and CD40 signaling are deficient in both Atm-/- and Mre11A/A cells, but present in all other genotypes. In contrast, IR-induced TP53 signaling is seen in the Mre11A/A cells, while these responses are not seen in the Atm-/- cells. By examining the similarities and differences in the signaling pathways in response to IR when specific genes are absent, our results further illustrate the contribution of each gene to the DDR. The microarray gene expression data discussed in this paper have been deposited in NCBI's Gene Expression Omnibus (GEO) (http://www.ncbi.nlm.nih.gov/geo/) and are accessible under accession number GSE116388.",2019-11-22 +30908487,"Searching algorithm for Type IV effector proteins (S4TE) 2.0: Improved tools for Type IV effector prediction, analysis and comparison in proteobacteria.","Bacterial pathogens have evolved numerous strategies to corrupt, hijack or mimic cellular processes in order to survive and proliferate. Among those strategies, Type IV effectors (T4Es) are proteins secreted by pathogenic bacteria to manipulate host cell processes during infection. They are delivered into eukaryotic cells in an ATP-dependent manner via the type IV secretion system, a specialized multiprotein complex. T4Es contain a wide spectrum of features including eukaryotic-like domains, localization signals or a C-terminal translocation signal. A combination of these features enables prediction of T4Es in a given bacterial genome. In this study, we developed a web-based comprehensive suite of tools with a user-friendly graphical interface. This version 2.0 of S4TE (Searching Algorithm for Type IV Effector Proteins; http://sate.cirad.fr) enables accurate prediction and comparison of T4Es. Search parameters and threshold can be customized by the user to work with any genome sequence, whether publicly available or not. Applications range from characterizing effector features and identifying potential T4Es to analyzing the effectors based on the genome G+C composition and local gene density. S4TE 2.0 allows the comparison of putative T4E repertoires of up to four bacterial strains at the same time. The software identifies T4E orthologs among strains and provides a Venn diagram and lists of genes for each intersection. New interactive features offer the best visualization of the location of candidate T4Es and hyperlinks to NCBI and Pfam databases. S4TE 2.0 is designed to evolve rapidly with the publication of new experimentally validated T4Es, which will reinforce the predictive power of the algorithm. The computational methodology can be used to identify a wide spectrum of candidate bacterial effectors that lack sequence conservation but have similar amino acid characteristics. This approach will provide very valuable information about bacterial host-specificity and virulence factors and help identify host targets for the development of new anti-bacterial molecules.",2019-03-25 +25180527,Every family has a north star: family healing and recovery.,

Topic

This contribution describes a personal recovery journey and the creation of an organization focused on rebuilding relationships between members of families living with a parent(s) with psychiatric disabilities.

Purpose

Adults living with serious mental illnesses have the same hopes and dreams of being successful and resourceful parents to their children and contributing family members as other parents. Specific suggestions highlight ways in which mental health and psychiatric rehabilitation practitioners can support and promote recovery for families.

Sources used

Personal data and resource information available on the Child and Family Connections website located at http://www.childfamilyconnections.org.

Conclusions and implications for practice

Practical guidelines are offered to engage and work with parents living with mental illnesses. Improving our understanding and capacity to better meet the needs of parents with psychiatric disabilities will more likely enhance their roles as parents.,2014-09-01 +25753708,Analyzing glycan structure synthesis with the Glycan Pathway Predictor (GPP) Tool.,"This chapter describes the Glycan Pathway Predictor Tool, which is available as a part of the RINGS (Resource for INformatics of Glycomes at Soka) website at http://www.rings.t.soka.ac.jp/. It implements the mathematical model of N-glycosylation previously described by Krambeck et al. (Glycobiology 19:1163-1175, 2009). This tool computes the glycans that can be potentially generated from a select set of glyco-enzymes, based on a mathematical model that characterizes substrate specificity. The resulting glycans are displayed as a pathway map, which can be evaluated in an interactive manner. Detailed results can also be obtained for each glycan that is theoretically generated, along with the corresponding enzymes that were found to be involved with the selected glycan. Thus, glycobiologists can take a particular starting glycan structure as input, select a set of glyco-enzymes, and instantly retrieve the set of potentially synthesized glycans as a pathway map or as a text file. Applications of this tool are numerous, including prediction of glycan profiles for a glyco-gene knockout experiment, comparison with mass spectrometric data, etc.",2015-01-01 +30914659,Development of model web-server for crop variety identification using throughput SNP genotyping data.,"Crop varieties or genotypes of a given species are pivotal for agricultural production and ownership, management and improvement of their germplasm is a great challenge. Its morphological identification requires time, cost and descriptors are often compromised statistically due to phenotypic plasticity. Development of DNA based signature of varieties can overcome these limitations. There is a global need to implement world trade organization (WTO) and intellectual property rights (IPR) guidelines of Plant Breeders Rights (PBR) where DUS (distinctness, uniformity and stability) testing can be supplemented by DNA profile. Universalization and minimization of SNP number without compromising identification accuracy is the major challenge in development of varietal profile by rapid genotype assay. Besides this, there is no server-based approach reducing computational skill with global accessibility of referral phenotypic and genotypic data. We report world's first model web server for crop variety identification using >350 Indian wheat varieties and Axiom 35 K SNP chip data. Standard filtering and linkage disequilibrium approach were used to develop varietal signature in Linux using HTML, Java, PHP and MySQL with provision of QR code generator to facilitate bar-coding. Phylogenetic tree constructed by selected SNPs confirms six major trait based clusters of varieties and their pedigree. Our user friendly server based tool, VISTa (Variety Identification System of Triticum aestivum) ( http://webtom.cabgrid.res.in/vista ) can be used in DUS testing having dispute resolution of sovereignty and access benefit sharing (ABS) issues. This model approach can be used in other crops with pan-global level management of crop germplasm in endeavour of crop productivity.",2019-03-26 +31170009,Tap Water Contributions to Plasma Concentrations of Poly- and Perfluoroalkyl Substances (PFAS) in a Nationwide Prospective Cohort of U.S. Women.,"

Background

Between 2013 and 2015, concentrations of poly- and perfluoroalkyl substances (PFAS) in public drinking water supplies serving at least six million individuals exceeded the level set forth in the health advisory established by the U.S. Environmental Protection Agency. Other than data reported for contaminated sites, no systematic or prospective data exist on the relative source contribution (RSC) of drinking water to human PFAS exposures.

Objectives

This study estimates the RSC of tap water to overall PFAS exposure among members of the general U.S.

Population

Methods

We measured concentrations of 15 PFAS in home tap water samples collected in 1989-1990 from 225 participants in a nationwide prospective cohort of U.S. women: the Nurses' Health Study (NHS). We used a one-compartment toxicokinetic model to estimate plasma concentrations corresponding to tap water intake of PFAS. We compared modeled results with measured plasma PFAS concentrations among a subset of 110 NHS participants.

Results

Tap water perfluorooctanoic acid (PFOA) and perfluorononanoic acid (PFNA) were statistically significant predictors of plasma concentrations among individuals who consumed [Formula: see text] cups of tap water per day. Modeled median contributions of tap water to measured plasma concentrations were: PFOA 12% (95% probability interval 11%-14%), PFNA 13% (8.7%-21%), linear perfluorooctanesulfonic acid (nPFOS) 2.2% (2.0%-2.5%), branched perfluorooctanesulfonic acid (brPFOS) 3.0% (2.5%-3.2%), and perfluorohexanesulfonic acid (PFHxS) 34% (29%-39%). In five locations, comparisons of PFASs in community tap water collected in the period 2013-2016 with samples from 1989-1990 indicated increases in quantifiable PFAS and extractable organic fluorine (a proxy for unquantified PFAS).

Conclusions

Our results for 1989-1990 compare well with the default RSC of 20% used in risk assessments for legacy PFAS by many agencies. Future evaluation of drinking water exposures should incorporate emerging PFAS. https://doi.org/10.1289/EHP4093.",2019-06-06 +30707580,BCL::MolAlign: Three-Dimensional Small Molecule Alignment for Pharmacophore Mapping.,"Small molecule flexible alignment is a critical component of both ligand- and structure-based methods in computer-aided drug discovery. Despite its importance, the availability of high-quality flexible alignment software packages is limited. Here, we present BCL::MolAlign, a freely available property-based molecular alignment program. BCL::MolAlign accommodates ligand flexibility through a combination of pregenerated conformers and on-the-fly bond rotation. BCL::MolAlign converges on alignment poses by sampling the relative orientations of mutually matching atom pairs between molecules through Monte Carlo Metropolis sampling. Across six diverse ligand data sets, BCL::MolAlign flexible alignment outperforms MOE, ROCS, and FLEXS in recovering native ligand binding poses. Moreover, the BCL::MolAlign alignment score is more predictive of ligand activity than maximum common substructure similarity across 10 data sets. Finally, on a recently published benchmark set of 20 high quality congeneric ligand-protein complexes, BCL::MolAlign is able to recover a larger fraction of native binding poses than maximum common substructure-based alignment and RosettaLigand. BCL::MolAlign can be obtained as part of the Biology and Chemistry Library (BCL) software package freely with an academic license or can be accessed via Web server at http://meilerlab.org/index.php/servers/molalign .",2019-02-12 +27402679,FANTOM5 transcriptome catalog of cellular states based on Semantic MediaWiki. ,"The Functional Annotation of the Mammalian Genome project (FANTOM5) mapped transcription start sites (TSSs) and measured their activities in a diverse range of biological samples. The FANTOM5 project generated a large data set; including detailed information about the profiled samples, the uncovered TSSs at high base-pair resolution on the genome, their transcriptional initiation activities, and further information of transcriptional regulation. Data sets to explore transcriptome in individual cellular states encoded in the mammalian genomes have been enriched by a series of additional analysis, based on the raw experimental data, along with the progress of the research activities. To make the heterogeneous data set accessible and useful for investigators, we developed a web-based database called Semantic catalog of Samples, Transcription initiation And Regulators (SSTAR). SSTAR utilizes the open source wiki software MediaWiki along with the Semantic MediaWiki (SMW) extension, which provides flexibility to model, store, and display a series of data sets produced during the course of the FANTOM5 project. Our use of SMW demonstrates the utility of the framework for dissemination of large-scale analysis results. SSTAR is a case study in handling biological data generated from a large-scale research project in terms of maintenance and growth alongside research activities.Database URL: http://fantom.gsc.riken.jp/5/sstar/.",2016-07-09 +22693105,Automatically detecting workflows in PubChem.,"Public databases that store the data from small-molecule screens are a rich and untapped resource of chemical and biological information. However, screening databases are unorganized, which makes interpreting their data difficult. We propose a method of inferring workflow graphs--which encode the relationships between assays in screening projects--directly from screening data and using these workflows to organize each project's data. On the basis of four heuristics regarding the organization of screening projects, we designed an algorithm that extracts a project's workflow graph from screening data. Where possible, the algorithm is evaluated by comparing each project's inferred workflow to its documentation. In the majority of cases, there are no discrepancies between the two. Most errors can be traced to points in the project where screeners chose additional molecules to test based on structural similarity to promising molecules, a case our algorithm is not yet capable of handling. Nonetheless, these workflows accurately organize most of the data and also provide a method of visualizing a screening project. This method is robust enough to build a workflow-oriented front-end to PubChem and is currently being used regularly by both our lab and our collaborators. A Python implementation of the algorithm is available online, and a searchable database of all PubChem workflows is available at http://swami.wustl.edu/flow.",2012-06-12 +30816813,Use of Medicare Administrative Claims to Identify a Population at High Risk for Adverse Drug Events and Hospital Use for Quality Improvement.,"

Background

A system using administrative claims to monitor medication use patterns and associated adverse events is not currently available. Establishment of a standardized method to identify Medicare beneficiaries at high risk for adverse events, by assessing Medicare Part D medication claim patterns and associated outcomes, including outpatient adverse drug events (ADEs) and hospital use, enhances prevention efforts and monitoring for quality improvement efforts.

Objectives

To (a) demonstrate that Medicare claims data can be used to identify a population of beneficiaries at high risk for adverse events for quality improvement and (b) define trends associated with adverse health outcomes in identified high-risk beneficiaries for quality improvement opportunities.

Methods

We used Medicare fee-for-service Part D claims data to identify a population at high risk for adverse events by evaluating medication use patterns. This population was taking at least 3 medications, 1 of which was an anticoagulant, an opioid, or an antidiabetic agent. Next, we used associated Part A claims to calculate rates of outpatient ADEs, looking for specific ICD-9-CM or ICD-10-CM codes in the principal diagnosis code position. Rates of hospital use (inpatient hospitalization, observation stays, emergency department visits, and 30-day rehospitalizations) were also evaluated for the identified high-risk population. The data were then shared for targeted quality improvement.

Results

We identified 8,178,753 beneficiaries at high risk for adverse events, or 20.7% of the total eligible fee-for-service population (time frame of October 2016-September 2017). The overall rate of outpatient ADEs for beneficiaries at high risk was 46.28 per 1,000, with anticoagulant users demonstrating the highest rate of ADEs (68.52/1,000), followed by opioid users (42.11/1,000) and diabetic medication users (20.72/1,000). As expected, the primary setting for beneficiaries at high risk to seek care for outpatient ADEs was the emergency department, followed by inpatient hospitalizations and observation stays.

Conclusions

Medicare claims are an accessible source of data, which can be used to establish for quality improvement a population at high risk for ADEs and increased hospital use. Using medication use patterns to attribute risk and associated outcomes, such as outpatient ADEs and hospital use, is a simple process that can be readily implemented. The described method has the potential to be further validated and used as a foundation to monitor population-based quality improvement efforts for medication safety.

Disclosures

This work was performed under contract HHSM-500-2014-QINNCC, Modification No. 000004, funded by Centers for Medicare & Medicaid Services (CMS), an agency of the U.S. Department of Health and Human Services. CMS did not have a role in the analysis. At the time of this analysis, Digmann, Peppercorn, Zhang, Irby, and Brock were employees of Telligen, which was awarded the National Coordinating Center-Quality Improvement Organization contract from CMS, which supported the work. Ryan was an employee at Qsource, which was awarded the Quality Innovation Network-Quality Improvement Organization contract from CMS, which supported the work. Thomas was employed by CMS. The content is solely the responsibility of the authors and does not necessarily represent the official views or policies of the CMS. This work is posted on the QIOprogram.org website, as recommended in the Common Rule ( https://www.hhs.gov/ohrp/regulations-and-policy/regulations/common-rule/index.html ).",2019-03-01 +27632579,MetaStorm: A Public Resource for Customizable Metagenomics Annotation.,"Metagenomics is a trending research area, calling for the need to analyze large quantities of data generated from next generation DNA sequencing technologies. The need to store, retrieve, analyze, share, and visualize such data challenges current online computational systems. Interpretation and annotation of specific information is especially a challenge for metagenomic data sets derived from environmental samples, because current annotation systems only offer broad classification of microbial diversity and function. Moreover, existing resources are not configured to readily address common questions relevant to environmental systems. Here we developed a new online user-friendly metagenomic analysis server called MetaStorm (http://bench.cs.vt.edu/MetaStorm/), which facilitates customization of computational analysis for metagenomic data sets. Users can upload their own reference databases to tailor the metagenomics annotation to focus on various taxonomic and functional gene markers of interest. MetaStorm offers two major analysis pipelines: an assembly-based annotation pipeline and the standard read annotation pipeline used by existing web servers. These pipelines can be selected individually or together. Overall, MetaStorm provides enhanced interactive visualization to allow researchers to explore and manipulate taxonomy and functional annotation at various levels of resolution.",2016-09-15 +29041922,Institute collection and analysis of Nanobodies (iCAN): a comprehensive database and analysis platform for nanobodies.,"

Background

Nanobodies are single-domain antibodies that contain the unique structural and functional properties of naturally-occurring heavy chain in camelidae. As a novel class of antibody, they show many advantages compared with traditional antibodies such as smaller size, higher stability, improved specificity, more easily expressed in microorganisms. These unusual hallmarks make them as promising tools in basic research and clinical practice. Although thousands of nanobodies are known to be published, no single database provides searchable, unified annotation and integrative analysis tools for these various nanobodies.

Results

Here, we present the database of Institute Collection and Analysis of Nanobodies (iCAN). It is built for the aim that addressing the above gap to expand and accelerate the nanobody research. iCAN, as the first database of nanobody, contains the most comprehensive information to date on nanobodies and related antigens. So far, iCAN incorporates 2391 entries which include 2131 from patents and 260 from publications and provides a simple user interface for researchers to retrieve and view the detailed information of nanobodies. In addition to the data collection, iCAN also provides online bioinformatic tools for sequence analysis and characteristic feature extraction.

Conclusions

In summary, iCAN enables researchers to analyze nanobody features and explore the applications of nanobodies more efficiently. iCAN is freely available at http://ican.ils.seu.edu.cn .",2017-10-17 +25817920,MorphoCol: An ontology-based knowledgebase for the characterisation of clinically significant bacterial colony morphologies.,"

Background

One of the major concerns of the biomedical community is the increasing prevalence of antimicrobial resistant microorganisms. Recent findings show that the diversification of colony morphology may be indicative of the expression of virulence factors and increased resistance to antibiotic therapeutics. To transform these findings, and upcoming results, into a valuable clinical decision making tool, colony morphology characterisation should be standardised. Notably, it is important to establish the minimum experimental information necessary to contextualise the environment that originated the colony morphology, and describe the main morphological features associated unambiguously.

Results

This paper presents MorphoCol, a new ontology-based tool for the standardised, consistent and machine-interpretable description of the morphology of colonies formed by human pathogenic bacteria. The Colony Morphology Ontology (CMO) is the first controlled vocabulary addressing the specificities of the morphology of clinically significant bacteria, whereas the MorphoCol publicly Web-accessible knowledgebase is an end-user means to search and compare CMO annotated colony morphotypes. Its ultimate aim is to help correlate the morphological alterations manifested by colony-forming bacteria during infection with their response to the antimicrobial treatments administered.

Conclusions

MorphoCol is the first tool to address bacterial colony morphotyping systematically and deliver a free of charge resource to the community. Hopefully, it may introduce interesting features of analysis on pathogenic behaviour and play a significant role in clinical decision making.

Database url

http://morphocol.org.",2015-03-25 +30287904,Pattern to Knowledge: Deep Knowledge-Directed Machine Learning for Residue-Residue Interaction Prediction.,"Residue-residue close contact (R2R-C) data procured from three-dimensional protein-protein interaction (PPI) experiments is currently used for predicting residue-residue interaction (R2R-I) in PPI. However, due to complex physiochemical environments, R2R-I incidences, facilitated by multiple factors, are usually entangled in the source environment and masked in the acquired data. Here we present a novel method, P2K (Pattern to Knowledge), to disentangle R2R-I patterns and render much succinct discriminative information expressed in different specific R2R-I statistical/functional spaces. Since such knowledge is not visible in the data acquired, we refer to it as deep knowledge. Leveraging the deep knowledge discovered to construct machine learning models for sequence-based R2R-I prediction, without trial-and-error combination of the features over external knowledge of sequences, our R2R-I predictor was validated for its effectiveness under stringent leave-one-complex-out-alone cross-validation in a benchmark dataset, and was surprisingly demonstrated to perform better than an existing sequence-based R2R-I predictor by 28% (p: 1.9E-08). P2K is accessible via our web server on https://p2k.uwaterloo.ca .",2018-10-04 +30838378,NFPws: a web server for delineating broadly neutralizing antibody specificities from serum HIV-1 neutralization data.,"MOTIVATION:A better understanding of antibody responses to HIV-1 infection in humans can provide novel insights for the development of an effective HIV-1 vaccine. Neutralization fingerprinting (NFP) is an efficient and accurate algorithm for delineating the epitope specificities found in polyclonal antibody responses to HIV-1 infection. Here, we report the development of NFPws, a web server implementation of the NFP algorithm. The server takes as input serum neutralization data for a set of diverse viral strains, and uses a mathematical model to identify similarities between the serum neutralization pattern and the patterns for known broadly neutralizing monoclonal antibodies (bNAbs), in order to predict the prevalence of bNAb epitope specificities in the given serum. In addition, NFPws also computes and displays a number of estimates related to prediction confidence, as well as the likelihood of presence of novel, previously uncharacterized, antibody specificities in a given serum. NFPws also implements a JSmol viewer for molecular structure visualization of the prediction results. Overall, the NFPws server will be an important tool for the identification and analysis of epitope specificities of bNAb responses against HIV-1. AVAILABILITY AND IMPLEMENTATION:NFPws is freely available to access at (http://iglab.accre.vanderbilt.edu/NFPws). The webserver is developed using html, CSS, javascript and perl CGI scripts. The NFP algorithm is implemented with scripts written in octave, linux shell and perl. JSmol is implemented to visualize the prediction results on a representative 3D structure of an HIV-1 antigen.",2019-09-01 +25172923,The Naked Mole Rat Genome Resource: facilitating analyses of cancer and longevity-related adaptations.,"

Motivation

The naked mole rat (Heterocephalus glaber) is an exceptionally long-lived and cancer-resistant rodent native to East Africa. Although its genome was previously sequenced, here we report a new assembly sequenced by us with substantially higher N50 values for scaffolds and contigs.

Results

We analyzed the annotation of this new improved assembly and identified candidate genomic adaptations which may have contributed to the evolution of the naked mole rat's extraordinary traits, including in regions of p53, and the hyaluronan receptors CD44 and HMMR (RHAMM). Furthermore, we developed a freely available web portal, the Naked Mole Rat Genome Resource (http://www.naked-mole-rat.org), featuring the data and results of our analysis, to assist researchers interested in the genome and genes of the naked mole rat, and also to facilitate further studies on this fascinating species.",2014-08-28 +30668824,OSF-Builder: A New Tool for Constructing and Representing Evolutionary Histories Involving Introgression.,"Introgression is an evolutionary process which provides an important source of innovation for evolution. Although various methods have been used to detect introgression, very few methods are currently available for constructing evolutionary histories involving introgression. In this article, we propose a new method for constructing such evolutionary histories whose starting point is a species forest (consisting of a collection of lineage trees, usually arising as a collection of clades or monophyletic groups in a species tree), and a gene tree for a specific allele of interest, or allele tree for short. Our method is based on representing introgression in terms of a certain ""overlay"" of the allele tree over the lineage trees, called an overlaid species forest (OSF). OSFs are similar to phylogenetic networks although a key difference is that they typically have multiple roots because each monophyletic group in the species tree has a different point of origin. Employing a new model for introgression, we derive an efficient algorithm for building OSFs called OSF-Builder that is guaranteed to return an optimal OSF in the sense that the number of potential introgression events is minimized. As well as using simulations to assess the performance of OSF-Builder, we illustrate its use on a butterfly data set in which introgression has been previously inferred. The OSF-Builder software is available for download from https://www.uea.ac.uk/computing/software/OSF-Builder.",2019-09-01 +30649190,AQUAPONY: visualization and interpretation of phylogeographic information on phylogenetic trees.,"

Motivation

The visualization and interpretation of evolutionary spatiotemporal scenarios is broadly and increasingly used in infectious disease research, ecology or agronomy. Using probabilistic frameworks, well-known tools can infer from molecular data ancestral traits for internal nodes in a phylogeny, and numerous phylogenetic rendering tools can display such evolutionary trees. However, visualizing such ancestral information and its uncertainty on the tree remains tedious. For instance, ancestral nodes can be associated to several geographical annotations with close probabilities and thus, several migration or transmission scenarios exist.

Results

We expose a web-based tool, named AQUAPONY, that facilitates such operations. Given an evolutionary tree with ancestral (e.g. geographical) annotations, the user can easily control the display of ancestral information on the entire tree or a subtree, and can view alternative phylogeographic scenarios along a branch according to a chosen uncertainty threshold. AQUAPONY interactively visualizes the tree and eases the objective interpretation of evolutionary scenarios. AQUAPONY's implementation makes it highly responsive to user interaction, and instantaneously updates the tree visualizations even for large trees (which can be exported as image files).

Availability and implementation

AQUAPONY is coded in JavaScript/HTML, available under Cecill license, and can be freely used at http://www.atgc-montpellier.fr/aquapony/.",2019-09-01 +31016009,scClustViz - Single-cell RNAseq cluster assessment and visualization. ,"Single-cell RNA sequencing (scRNAseq) represents a new kind of microscope that can measure the transcriptome profiles of thousands of individual cells from complex cellular mixtures, such as in a tissue, in a single experiment. This technology is particularly valuable for characterization of tissue heterogeneity because it can be used to identify and classify all cell types in a tissue. This is generally done by clustering the data, based on the assumption that cells of a particular type share similar transcriptomes, distinct from other cell types in the tissue. However, nearly all clustering algorithms have tunable parameters which affect the number of clusters they will identify in data. The R Shiny software tool described here, scClustViz, provides a simple interactive graphical user interface for exploring scRNAseq data and assessing the biological relevance of clustering results. Given that cell types are expected to have distinct gene expression patterns, scClustViz uses differential gene expression between clusters as a metric for assessing the fit of a clustering result to the data at multiple cluster resolution levels. This helps select a clustering parameter for further analysis. scClustViz also provides interactive visualisation of: cluster-specific distributions of technical factors, such as predicted cell cycle stage and other metadata; cluster-wise gene expression statistics to simplify annotation of cell types and identification of cell type specific marker genes; and gene expression distributions over all cells and cell types. scClustViz provides an interactive interface for visualisation, assessment, and biological interpretation of cell-type classifications in scRNAseq experiments that can be easily added to existing analysis pipelines, enabling customization by bioinformaticians while enabling biologists to explore their results without the need for computational expertise. It is available at https://baderlab.github.io/scClustViz/.",2018-09-21 +27694195,Combining multiple tools outperforms individual methods in gene set enrichment analyses.,"

Motivation

Gene set enrichment (GSE) analysis allows researchers to efficiently extract biological insight from long lists of differentially expressed genes by interrogating them at a systems level. In recent years, there has been a proliferation of GSE analysis methods and hence it has become increasingly difficult for researchers to select an optimal GSE tool based on their particular dataset. Moreover, the majority of GSE analysis methods do not allow researchers to simultaneously compare gene set level results between multiple experimental conditions.

Results

The ensemble of genes set enrichment analyses (EGSEA) is a method developed for RNA-sequencing data that combines results from twelve algorithms and calculates collective gene set scores to improve the biological relevance of the highest ranked gene sets. EGSEA's gene set database contains around 25 000 gene sets from sixteen collections. It has multiple visualization capabilities that allow researchers to view gene sets at various levels of granularity. EGSEA has been tested on simulated data and on a number of human and mouse datasets and, based on biologists' feedback, consistently outperforms the individual tools that have been combined. Our evaluation demonstrates the superiority of the ensemble approach for GSE analysis, and its utility to effectively and efficiently extrapolate biological functions and potential involvement in disease processes from lists of differentially regulated genes.

Availability and implementation

EGSEA is available as an R package at http://www.bioconductor.org/packages/EGSEA/ . The gene sets collections are available in the R package EGSEAdata from http://www.bioconductor.org/packages/EGSEAdata/ .

Contacts

monther.alhamdoosh@csl.com.au mritchie@wehi.edu.au.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +24079801,Bolbase: a comprehensive genomics database for Brassica oleracea.,"

Background

Brassica oleracea is a morphologically diverse species in the family Brassicaceae and contains a group of nutrition-rich vegetable crops, including common heading cabbage, cauliflower, broccoli, kohlrabi, kale, Brussels sprouts. This diversity along with its phylogenetic membership in a group of three diploid and three tetraploid species, and the recent availability of genome sequences within Brassica provide an unprecedented opportunity to study intra- and inter-species divergence and evolution in this species and its close relatives.

Description

We have developed a comprehensive database, Bolbase, which provides access to the B. oleracea genome data and comparative genomics information. The whole genome of B. oleracea is available, including nine fully assembled chromosomes and 1,848 scaffolds, with 45,758 predicted genes, 13,382 transposable elements, and 3,581 non-coding RNAs. Comparative genomics information is available, including syntenic regions among B. oleracea, Brassica rapa and Arabidopsis thaliana, synonymous (Ks) and non-synonymous (Ka) substitution rates between orthologous gene pairs, gene families or clusters, and differences in quantity, category, and distribution of transposable elements on chromosomes. Bolbase provides useful search and data mining tools, including a keyword search, a local BLAST server, and a customized GBrowse tool, which can be used to extract annotations of genome components, identify similar sequences and visualize syntenic regions among species. Users can download all genomic data and explore comparative genomics in a highly visual setting.

Conclusions

Bolbase is the first resource platform for the B. oleracea genome and for genomic comparisons with its relatives, and thus it will help the research community to better study the function and evolution of Brassica genomes as well as enhance molecular breeding research. This database will be updated regularly with new features, improvements to genome annotation, and new genomic sequences as they become available. Bolbase is freely available at http://ocri-genomics.org/bolbase.",2013-09-30 +29912392,GeneMANIA update 2018.,"GeneMANIA (http://genemania.org) is a flexible user-friendly web site for generating hypotheses about gene function, analyzing gene lists and prioritizing genes for functional assays. Given a query gene list, GeneMANIA finds functionally similar genes using a wealth of genomics and proteomics data. In this mode, it weights each functional genomic dataset according to its predictive value for the query. Another use of GeneMANIA is gene function prediction. Given a single query gene, GeneMANIA finds genes likely to share function with it based on their interactions with it. Enriched Gene Ontology categories among this set can point to the function of the gene. Nine organisms are currently supported (Arabidopsis thaliana, Caenorhabditis elegans, Danio rerio, Drosophila melanogaster, Escherichia coli, Homo sapiens, Mus musculus, Rattus norvegicus and Saccharomyces cerevisiae). Hundreds of data sets and hundreds of millions of interactions have been collected from GEO, BioGRID, IRefIndex and I2D, as well as organism-specific functional genomics data sets. Users can customize their search by selecting specific data sets to query and by uploading their own data sets to analyze. We have recently updated the user interface to GeneMANIA to make it more intuitive and make more efficient use of visual space. GeneMANIA can now be used effectively on a variety of devices.",2018-07-01 +31341211,Modeling the Amplification of Immunoglobulins through Machine Learning on Sequence-Specific Features.,"Successful primer design for polymerase chain reaction (PCR) hinges on the ability to identify primers that efficiently amplify template sequences. Here, we generated a novel Taq PCR data set that reports the amplification status for pairs of primers and templates from a reference set of 47 immunoglobulin heavy chain variable sequences and 20 primers. Using logistic regression, we developed TMM, a model for predicting whether a primer amplifies a template given their nucleotide sequences. The model suggests that the free energy of annealing, ΔG, is the key driver of amplification (p = 7.35e-12) and that 3' mismatches should be considered in dependence on ΔG and the mismatch closest to the 3' terminus (p = 1.67e-05). We validated TMM by comparing its estimates with those from the thermodynamic model of DECIPHER (DE) and a model based solely on the free energy of annealing (FE). TMM outperformed the other approaches in terms of the area under the receiver operating characteristic curve (TMM: 0.953, FE: 0.941, DE: 0.896). TMM can improve primer design and is freely available via openPrimeR ( http://openPrimeR.mpi-inf.mpg.de ).",2019-07-24 +30505911,Data on differentially expressed proteins in retinal emmetropization process in guinea pig using integrated SWATH-based and targeted-based proteomics.,"Myopia is generally regarded as a failure of normal emmetropization process, however, its underlying molecular mechanisms are unclear. Retinal protein profile changes using integrated SWATH and MRM-HR MS were studied in guinea pigs at 3- and 21-days of age, where the axial elongation was significantly detected. Differential proteins expressions were identified, and related to pathways which are important in postnatal development in retina, proliferation, breakdown of glycogen-energy and visual phototransduction. These results are significant as key retinal protein players and pathways that underlying emmetropization can be discovered. All raw data generated from IDA and SWATH acquisitions were accepted and published in the Peptide Atlas public repository (http://www.peptideatlas.org/) for general release (Data ID PASS00746). A more comprehensive analysis of this data can be obtained in the article ""Integrated SWATH-based and targeted-based proteomics provide insights into the retinal emmetropization process in guinea pig"" in Journal of Proteomics (Shan et al., 2018) [1].",2018-08-31 +27993519,Quo vadis G protein-coupled receptor ligands? A tool for analysis of the emergence of new groups of compounds over time.,"Exponential growth in the number of compounds with experimentally verified activity towards particular target has led to the emergence of various databases gathering data on biological activity. In this study, the ligands of family A of the G Protein-Coupled Receptors that are collected in the ChEMBL database were examined, and special attention was given to serotonin receptors. Sets of compounds were examined in terms of their appearance over time, they were mapped to the chemical space of drugs deposited in DrugBank, and the emergence of structurally new clusters of compounds was indicated. In addition, a tool for detailed analysis of the obtained visualizations was prepared and made available online at http://chem.gmum.net/vischem, which enables the investigation of chemical structures while referring to particular data points depicted in the figures and changes in compounds datasets over time.",2016-12-02 +23395672,OralCard: a bioinformatic tool for the study of oral proteome.,"

Objectives

The molecular complexity of the human oral cavity can only be clarified through identification of components that participate within it. However current proteomic techniques produce high volumes of information that are dispersed over several online databases. Collecting all of this data and using an integrative approach capable of identifying unknown associations is still an unsolved problem. This is the main motivation for this work.

Results

We present the online bioinformatic tool OralCard, which comprises results from 55 manually curated articles reflecting the oral molecular ecosystem (OralPhysiOme). It comprises experimental information available from the oral proteome both of human (OralOme) and microbial origin (MicroOralOme) structured in protein, disease and organism.

Conclusions

This tool is a key resource for researchers to understand the molecular foundations implicated in biology and disease mechanisms of the oral cavity. The usefulness of this tool is illustrated with the analysis of the oral proteome associated with diabetes melitus type 2. OralCard is available at http://bioinformatics.ua.pt/oralcard.",2013-02-08 +29873706,SPRENO: a BioC module for identifying organism terms in figure captions. ,"Recent advances in biological research reveal that the majority of the experiments strive for comprehensive exploration of the biological system rather than targeting specific biological entities. The qualitative and quantitative findings of the investigations are often exclusively available in the form of figures in published papers. There is no denying that such findings have been instrumental in intensive understanding of biological processes and pathways. However, data as such is unacknowledged by machines as the descriptions in the figure captions comprise of sumptuous information in an ambiguous manner. The abbreviated term 'SIN' exemplifies such issue as it may stand for Sindbis virus or the sex-lethal interactor gene (Drosophila melanogaster). To overcome this ambiguity, entities should be identified by linking them to the respective entries in notable biological databases. Among all entity types, the task of identifying species plays a pivotal role in disambiguating related entities in the text. In this study, we present our species identification tool SPRENO (Species Recognition and Normalization), which is established for recognizing organism terms mentioned in figure captions and linking them to the NCBI taxonomy database by exploiting the contextual information from both the figure caption and the corresponding full text. To determine the ID of ambiguous organism mentions, two disambiguation methods have been developed. One is based on the majority rule to select the ID that has been successfully linked to previously mentioned organism terms. The other is a convolutional neural network (CNN) model trained by learning both the context and the distance information of the target organism mention. As a system based on the majority rule, SPRENO was one of the top-ranked systems in the BioCreative VI BioID track and achieved micro F-scores of 0.776 (entity recognition) and 0.755 (entity normalization) on the official test set, respectively. Additionally, the SPRENO-CNN exhibited better precisions with lower recalls and F-scores (0.720/0.711 for entity recognition/normalization). SPRENO is freely available at https://bigodatamining.github.io/software/201801/.Database URL: https://bigodatamining.github.io/software/201801/.",2018-01-01 +31678214,"PromPDD, a web-based tool for the prediction, deciphering and design of promiscuous peptides that bind to HLA class I molecules.","Promiscuous peptides that can be presented by multiple human leukocyte antigens (HLAs) have great potential for the development of vaccines with wide population coverage. However, the current available methods for the prediction of peptides that bind to major histocompatibility complex (MHC) are mainly aimed at the rapid or mass screening of potential T cell epitopes from pathogen antigens or proteomics. The current approaches do not allow deciphering the contribution of the residue at each peptide position to the promiscuous binding ability of the peptide or obtaining guidelines for the design of promiscuous peptides. In this study, we re-evaluated and characterized four matrix-based prediction models that have been extensively used for the prediction of HLA-binding peptides and found that the prediction models generated based on the average relative binding (ARB) matrix shared a consistent and conservative threshold for all well-studied HLA class I alleles. Evaluations performed using datasets of HLA supertype-specific peptides with various cross-binding abilities and peptide mutant analogues indicated that the ARB-based binding matrices could be used to decipher and design promiscuous peptides that bind to multiple HLA molecules. A web-based tool called PromPDD was developed using ARB matrix-based models, and this tool enables the prediction, deciphering and design of promiscuous peptides that bind to multiple HLA molecules within or across HLA supertypes in a simpler and more direct manner. Furthermore, we expanded the application of PromPDD to HLA class I alleles with limited experimentally verified data by generating pan-specific matrices using a derived modular method, and 2641 HLA molecules encoded by HLA-A and HLA-B genes are available in PromPDD. PromPDD, which is freely available at http://www.immunoinformatics.net/PromPDD/, is the first tool for the deciphering and design of promiscuous peptides that bind to HLA class I molecules.",2019-10-31 +28813437,"A systematic review of the relationship between internet use, self-harm and suicidal behaviour in young people: The good, the bad and the unknown.","

Background

Research exploring internet use and self-harm is rapidly expanding amidst concerns regarding influences of on-line activities on self-harm and suicide, especially in young people. We aimed to systematically review evidence regarding the potential influence of the internet on self-harm/suicidal behaviour in young people.

Methods

We conducted a systematic review based on an electronic search for articles published between 01/01/2011 and 26/01/2015 across databases including Medline, Cochrane and PsychInfo. Articles were included if: the study examined internet use by individuals who engaged in self-harm/ suicidal behaviour, or internet use clearly related to self-harm content; reported primary empirical data; participants were aged under 25 years. New studies were combined with those identified in a previous review and subject to data extraction, quality rating and narrative synthesis.

Results

Forty-six independent studies (51 articles) of varying quality were included. Perceived influences were: positive for 11 studies (38191 participants); negative for 18 studies (119524 participants); and mixed for 17 studies (35235 participants). In contrast to previous reviews on this topic studies focused on a wide range of internet mediums: general internet use; internet addiction; online intervention/treatment; social media; dedicated self-harm websites; forums; video/image sharing and blogs. A relationship between internet use and self-harm/suicidal behaviour was particularly associated with internet addiction, high levels of internet use, and websites with self-harm or suicide content. While there are negative aspects of internet use the potential for isolation reduction, outreach and as a source of help and therapy were also identified.

Conclusions

There is significant potential for harm from online behaviour (normalisation, triggering, competition, contagion) but also the potential to exploit its benefits (crisis support, reduction of social isolation, delivery of therapy, outreach). Young people appear to be increasingly using social media to communicate distress, particularly to peers. The focus should now be on how specific mediums' (social media, video/image sharing) might be used in therapy and recovery. Clinicians working with young people who self-harm or have mental health issues should engage in discussion about internet use. This should be a standard item during assessment. A protocol for this review was registered with the PROSPERO systematic review protocol registry: (http://www.crd.york.ac.uk/prospero/display_record.asp?ID=CRD42015019518).",2017-08-16 +30938767,Assessing reproducibility of matrix factorization methods in independent transcriptomes.,"MOTIVATION:Matrix factorization (MF) methods are widely used in order to reduce dimensionality of transcriptomic datasets to the action of few hidden factors (metagenes). MF algorithms have never been compared based on the between-datasets reproducibility of their outputs in similar independent datasets. Lack of this knowledge might have a crucial impact when generalizing the predictions made in a study to others. RESULTS:We systematically test widely used MF methods on several transcriptomic datasets collected from the same cancer type (14 colorectal, 8 breast and 4 ovarian cancer transcriptomic datasets). Inspired by concepts of evolutionary bioinformatics, we design a novel framework based on Reciprocally Best Hit (RBH) graphs in order to benchmark the MF methods for their ability to produce generalizable components. We show that a particular protocol of application of independent component analysis (ICA), accompanied by a stabilization procedure, leads to a significant increase in the between-datasets reproducibility. Moreover, we show that the signals detected through this method are systematically more interpretable than those of other standard methods. We developed a user-friendly tool for performing the Stabilized ICA-based RBH meta-analysis. We apply this methodology to the study of colorectal cancer (CRC) for which 14 independent transcriptomic datasets can be collected. The resulting RBH graph maps the landscape of interconnected factors associated to biological processes or to technological artifacts. These factors can be used as clinical biomarkers or robust and tumor-type specific transcriptomic signatures of tumoral cells or tumoral microenvironment. Their intensities in different samples shed light on the mechanistic basis of CRC molecular subtyping. AVAILABILITY AND IMPLEMENTATION:The RBH construction tool is available from http://goo.gl/DzpwYp. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-11-01 +25348404,WDSPdb: a database for WD40-repeat proteins.,"WD40-repeat proteins, as one of the largest protein families, often serve as platforms to assemble functional complexes through the hotspot residues on their domain surfaces, and thus play vital roles in many biological processes. Consequently, it is highly required for researchers who study WD40 proteins and protein-protein interactions to obtain structural information of WD40 domains. Systematic identification of WD40-repeat proteins, including prediction of their secondary structures, tertiary structures and potential hotspot residues responsible for protein-protein interactions, may constitute a valuable resource upon this request. To achieve this goal, we developed a specialized database WDSPdb (http://wu.scbb.pkusz.edu.cn/wdsp/) to provide these details of WD40-repeat proteins based on our recently published method WDSP. The WDSPdb contains 63,211 WD40-repeat proteins identified from 3383 species, including most well-known model organisms. To better serve the community, we implemented a user-friendly interactive web interface to browse, search and download the secondary structures, 3D structure models and potential hotspot residues provided by WDSPdb.",2014-10-27 +31035918,"Software tool for internal standard based normalization of lipids, and effect of data-processing strategies on resulting values.","

Background

Lipidomics, the comprehensive measurement of lipids within a biological system or substrate, is an emerging field with significant potential for improving clinical diagnosis and our understanding of health and disease. While lipids diverse biological roles contribute to their clinical utility, the diversity of lipid structure and concentrations prove to make lipidomics analytically challenging. Without internal standards to match each lipid species, researchers often apply individual internal standards to a broad range of related lipids. To aid in standardizing and automating this relative quantitation process, we developed LipidMatch Normalizer (LMN) http://secim.ufl.edu/secim-tools/ which can be used in most open source lipidomics workflows.

Results

LMN uses a ranking system (1-3) to assign lipid standards to target analytes. A ranking of 1 signifies that both the lipid class and adduct of the internal standard and target analyte match, while a ranking of 3 signifies that neither the adduct or class match. If multiple internal standards are provided for a lipid class, standards with the closest retention time to the target analyte will be chosen. The user can also signify which lipid classes an internal standard represents, for example indicating that ether-linked phosphatidylcholine can be semi-quantified using phosphatidylcholine. LMN is designed to work with any lipid identification software and feature finding software, and in this study is used to quantify lipids in NIST SRM 1950 human plasma annotated using LipidMatch and MZmine.

Conclusions

LMN can be integrated into an open source workflow which completes all data processing steps including feature finding, annotation, and quantification for LC-MS/MS studies. Using LMN we determined that in certain cases the use of peak height versus peak area, certain adducts, and negative versus positive polarity data can have major effects on the final concentration obtained.",2019-04-29 +31667228,Data in brief on inter-row rainwater harvest and fertilizer application on yield of maize and pigeon-pea cropping systems in sub humid tropics.,"Soil moisture management and fertilizer micro-dosing on yield and land utilization efficiency of inter-cropping maize-pigeon-pea in sub humid Tanzania [1]. Farmers typically grow pigeon-pea as a mixed cropping system, the advances of these systems have been well studied, for example: increased productivity and rainfall infiltration. Much research has been done on cereal-pigeon pea intercropping on research stations, comparing yields in intercrops with sole maize. However, the role of inorganic fertilizers in sustainably intensifying intercropping systems has not been optimalised in all cases. For example in a recent study ""Sustainable Intensification with Cereal-Legume Intercropping in Eastern and Southern Africa"" published in Sustainability 2019, 11, 2891; https://doi.org/10.3390/su11102891, also the effect of inorganic fertilizers were studied. But usually these studies did not pay attention on the relation with water supply. Data in this article presents rainfall variability in the season and between seasons, yield of maize (Zea mays cv. TMV1) and pigeon-pea (Cajanus cajan cv. Babati White) under sole crop and intercropping. Yield of maize and pigeon-pea is analyzed under inter-row rainwater harvesting practices and fertilizer application in the field. Sole cropping and intercropping biological and/or economic yield are used to determine land use efficiency through land equivalent ratio. Comparisons between sites and seasons are done using a T-test.",2019-08-30 +22345505,ANAP: an integrated knowledge base for Arabidopsis protein interaction network analysis.,"Protein interactions are fundamental to the molecular processes occurring within an organism and can be utilized in network biology to help organize, simplify, and understand biological complexity. Currently, there are more than 10 publicly available Arabidopsis (Arabidopsis thaliana) protein interaction databases. However, there are limitations with these databases, including different types of interaction evidence, a lack of defined standards for protein identifiers, differing levels of information, and, critically, a lack of integration between them. In this paper, we present an interactive bioinformatics Web tool, ANAP (Arabidopsis Network Analysis Pipeline), which serves to effectively integrate the different data sets and maximize access to available data. ANAP has been developed for Arabidopsis protein interaction integration and network-based study to facilitate functional protein network analysis. ANAP integrates 11 Arabidopsis protein interaction databases, comprising 201,699 unique protein interaction pairs, 15,208 identifiers (including 11,931 The Arabidopsis Information Resource Arabidopsis Genome Initiative codes), 89 interaction detection methods, 73 species that interact with Arabidopsis, and 6,161 references. ANAP can be used as a knowledge base for constructing protein interaction networks based on user input and supports both direct and indirect interaction analysis. It has an intuitive graphical interface allowing easy network visualization and provides extensive detailed evidence for each interaction. In addition, ANAP displays the gene and protein annotation in the generated interactive network with links to The Arabidopsis Information Resource, the AtGenExpress Visualization Tool, the Arabidopsis 1,001 Genomes GBrowse, the Protein Knowledgebase, the Kyoto Encyclopedia of Genes and Genomes, and the Ensembl Genome Browser to significantly aid functional network analysis. The tool is available open access at http://gmdd.shgmo.org/Computational-Biology/ANAP.",2012-02-16 +27924046,New data and features for advanced data mining in Manteia.,"Manteia is an integrative database available online at http://manteia.igbmc.fr which provides a large array of OMICs data related to the development of the mouse, chicken, zebrafish and human. The system is designed to use different types of data together in order to perform advanced datamining, test hypotheses or provide candidate genes involved in biological processes or responsible for human diseases. In this new version of the database, Manteia has been enhanced with new expression data originating from microarray and next generation sequencing experiments. In addition, the system includes new statistics tools to analyze lists of genes in order to compare their functions and highlight their specific features. One of the main novelties of this release is the integration of a machine learning tool called Lookalike that we have developed to analyze the different datasets present in the system in order to identify new disease genes. This tool identifies the key features of known disease genes to provide and rank new candidates with similar properties from the genome. It is also designed to highlight and take into account the specificities of a disease in order to increase the accuracy of its predictions.",2016-10-24 +30763461,Taxane-based chemohormonal therapy for metastatic hormone-sensitive prostate cancer: a Cochrane Review.,"To provide a precis of the Cochrane Collaboration Review of taxane-based chemohormonal therapy for metastatic hormone-sensitive prostate cancer by Sathianathen NJ, Philippou YA, Kuntz GM et al. Cochrane Database of Systematic Reviews 2018, Issue 10. Art. No.: CD012816. https://doi.org/10.1002/14651858.cd012816.pub2.",2019-03-20 +24951797,UCbase 2.0: ultraconserved sequences database (2014 update). ,"UCbase 2.0 (http://ucbase.unimore.it) is an update, extension and evolution of UCbase, a Web tool dedicated to the analysis of ultraconserved sequences (UCRs). UCRs are 481 sequences >200 bases sharing 100% identity among human, mouse and rat genomes. They are frequently located in genomic regions known to be involved in cancer or differentially expressed in human leukemias and carcinomas. UCbase 2.0 is a platform-independent Web resource that includes the updated version of the human genome annotation (hg19), information linking disorders to chromosomal coordinates based on the Systematized Nomenclature of Medicine classification, a query tool to search for Single Nucleotide Polymorphisms (SNPs) and a new text box to directly interrogate the database using a MySQL interface. To facilitate the interactive visual interpretation of UCR chromosomal positioning, UCbase 2.0 now includes a graph visualization interface directly linked to UCSC genome browser. Database URL: http://ucbase.unimore.it.",2014-06-19 +32468465,Alzheimer's Disease Therapeutic Approaches.,"Alzheimer's disease (AD) was first described and diagnosed by Dr. Alois Alzheimer in 1906 (Hippius and Neundorfer, Dialogues Clin Neurosc 5:101-108, 2003). According to World Health Organization (WHO), AD is the most common cause of dementia, accounting for as many as 60-70% of senile dementia cases and affecting 47.5 million people worldwide (data from 2015) (Dementia Fact Sheet No 362. http://who.int/mediacentre/factsheets/fs362/en/ ). The median survival time after the onset of dementia ranges from 3.3 to 11.7 years (Todd et al. Int J Geriatr Psychiatry 28:1109-1124, 2013). AD is characterized as a severe, chronic, incurable, and progressive neurodegenerative disorder, associated with memory loss and cognition impairment accompanied by abnormal behavior and personality changes (Godyn et al. Pharmacol Rep 68:127-138, 2016). AD is characterized by neuronal death, which usually correlates with the appearance of key neuropathological changes, including acetylcholine deficiency, glutamate excitotoxicity, extracellular deposition of β-amyloid (Aβ plaques), intracellular neurofibrillary tangles by hyperphosphorylated tau protein deposits, neuroinflammation, and widespread neuronal loss (Godyn et al. Pharmacol Rep 68:127-138, 2016; Graham et al. Annu Rev. Med 68:413-430, 2017). The discovery of the degeneration of cholinergic neurons and the reduction of acetylcholine levels in postmortem studies of patients resulted in the use of drugs that leads to the increase of acetylcholine levels in brain (Dubois et al. Lacet Neurol 13:614-629, 2014). At present there is no preventative or curative treatment that interferes with the development of the disease. However, in recent years progress was made in the development of cholinergic drugs which have a positive effect on disease progression. Nowadays, specific drugs that can inhibit the enzyme that degrades acetylcholine are used. The development of new effective drugs involves a difficult and time-consuming process, accompanied by a very high failure rate. In the absence of effective therapies, the estimated number of people with dementia will reach 115 to 131, five million by 2050 (Dubois et al. Lacet Neurol 13:614-629, 2014; Cummings et al. Alzheimers Res Ther 6:37, 2014). Novel therapies and new targets required for developing more effective drugs for the treatment of AD patients are urgently needed.",2020-01-01 +21609966,Data mining using the Catalogue of Somatic Mutations in Cancer BioMart.,"Catalogue of Somatic Mutations in Cancer (COSMIC) (http://www.sanger.ac.uk/cosmic) is a publicly available resource providing information on somatic mutations implicated in human cancer. Release v51 (January 2011) includes data from just over 19,000 genes, 161,787 coding mutations and 5573 gene fusions, described in more than 577,000 tumour samples. COSMICMart (COSMIC BioMart) provides a flexible way to mine these data and combine somatic mutations with other biological relevant data sets. This article describes the data available in COSMIC along with examples of how to successfully mine and integrate data sets using COSMICMart. DATABASE URL: http://www.sanger.ac.uk/genetics/CGP/cosmic/biomart/martview/.",2011-05-23 +30972342,Effective Multidisciplinary Search Strategies for Assistance Animals: A Librarian's Perspective.,"Successful search strategies are based on good background knowledge and a focused clinical research question. Due to the multidisciplinary nature of research involving assistance animals means there is no one universal database to answer all research questions. The topic of assistance animals can yield better results when creating subheadings based on discipline focus. Subheadings have been divided into ethicolegal, sociocultural, psychobehavioral, and medical/veterinary. Each subheading, or discipline, has their own specific databases that will yield higher relevant content than others. Contacting local academic librarians and utilizing search guides created by those librarians can lead to successful search strategies. The goal of this article is to create a template for successful search strategies in assistance animals. Eighty-nine subject guides curated by academic librarians are reviewed to identify strong databases for each topic of ethicolegal, sociocultural, pscyhobehavioral, and medical/veterinary topics in relationship to assistance animals. A live subject guide has been created and maintained at https://www.library.ucdavis.edu/guide/assistance-animals/.",2019-03-19 +30124047,Deep Dive on the Proteome of Human Cerebrospinal Fluid: A Valuable Data Resource for Biomarker Discovery and Missing Protein Identification.,"Cerebrospinal fluid (CSF) is a body fluid of choice for biomarker studies of brain disorders but remains relatively under-studied compared with other biological fluids such as plasma, partly due to the more invasive means of its sample collection. The present study establishes an in-depth CSF proteome through the analysis of a unique CSF sample from a pool of donors. After immunoaffinity depletion, the CSF sample was fractionated using off-gel electrophoresis and analyzed with liquid chromatography tandem mass spectrometry (MS) using the latest generation of hybrid Orbitrap mass spectrometers. The shotgun proteomic analysis allowed the identification of 20 689 peptides mapping on 3379 proteins. To the best of our knowledge, the obtained data set constitutes the largest CSF proteome published so far. Among the CSF proteins identified, 34% correspond to genes whose transcripts are highly expressed in brain according to the Human Protein Atlas. The principal Alzheimer's disease biomarkers (e.g., tau protein, amyloid-β, apolipoprotein E, and neurogranin) were detected. Importantly, our data set significantly contributes to the Chromosome-centric Human Proteome Project (C-HPP), and 12 proteins considered as missing are proposed for validation in accordance with the HPP guidelines. Of these 12 proteins, 8 proteins are based on 2 to 6 uniquely mapping peptides from this CSF analysis, and 4 match a new peptide with a ""stranded"" single peptide in PeptideAtlas from previous CSF studies. The MS proteomic data are available to the ProteomeXchange Consortium ( http://www.proteomexchange.org/ ) with the data set identifier PXD009646.",2018-08-31 +30904539,Asian isolates of Anaplasma phagocytophilum: Multilocus sequence typing.,"Anaplasma phagocytophilum is the bacterial agent of granulocytic anaplasmosis in humans and animals; it is widely distributed in Eurasia and North America and transmitted mainly by Ixodes ticks. Several approaches have been used to study genetic diversity in A. phagocytophilum, multilocus sequence typing (MLST) currently being the most reliable and comparable. The MLST method based on seven housekeeping loci, 2877 bp total length, has been used to create and maintain the MLST database available worldwide (https://pubmlst.org/aphagocytophilum/). Before this study, the database contained 150 sequence types (STs) and 418 isolates, 397 of them originating from Europe and 21 from the USA, with none from Asia. We typed 25 A. phagocytophilum isolated from Ixodes ticks collected in the Asian part of Russia and compared the results with the conventional 16S rRNA typing. Substantial variability in the primer binding sites was found, so we had to modify the original primers for six out of seven loci. None of the sequences obtained matched those from the database; 15 new STs and 39 new alleles were revealed. Russian isolates belonged to two clusters, cluster 1 (19 isolates) and 3 (6 isolates), in both of which they formed separate clades. For the first time, we found A. phagocytophilum isolates from Ixodes persulcatus and I. pavlovskyi to belong to cluster 3, previously containing only the strains from voles and shrews. Further research is needed to estimate the prevalence of two MLST clusters of A. phagocytophilum in ticks and vertebrate hosts in Asia.",2019-03-18 +30886413,SIRIUS 4: a rapid tool for turning tandem mass spectra into metabolite structure information.,"Mass spectrometry is a predominant experimental technique in metabolomics and related fields, but metabolite structural elucidation remains highly challenging. We report SIRIUS 4 (https://bio.informatik.uni-jena.de/sirius/), which provides a fast computational approach for molecular structure identification. SIRIUS 4 integrates CSI:FingerID for searching in molecular structure databases. Using SIRIUS 4, we achieved identification rates of more than 70% on challenging metabolomics datasets.",2019-03-18 +30335217,Obtaining a Genetic Family History Using Computer-Based Tools.,"Family health history has long been known to be a powerful predictor of individual disease risk. It can be obtained prior to DNA sequencing in order to examine inheritance patterns, to be used as a proxy for genetic information, or as a tool to guide decision-making on the utility of diagnostic genetic testing. Increasingly, it is also being obtained retrospectively from sequenced individuals to examine familial disease penetrance and to identify at-risk relatives for cascade testing. The collection of adequate family history information to screen patients for disease risk and guide decision-making is a time-consuming process that is difficult to accomplish exclusively through discussion between patients and their providers. Engaging individuals and families in data collection and data entry has the potential to improve data accuracy through re-iterative review with family members and health care providers, and to empower patients in their healthcare. In addition, electronic datasets can be shared amongst relatives and stored in electronic health records or personal files, enabling portability of family history information. The U.S. Surgeon General, the Centers for Disease Control and Prevention (CDC), and others have developed tools for electronic family history collection to help families and providers obtain this useful information in an efficient manner. This unit describes the utility of the web-based My Family Health Portrait (https://familyhistory.hhs.gov) as the prototype for patient-entered family history. © 2018 by John Wiley & Sons, Inc.",2018-10-18 +29036533,gVolante for standardizing completeness assessment of genome and transcriptome assemblies.,"

Motivation

Along with the increasing accessibility to comprehensive sequence information, such as whole genomes and transcriptomes, the demand for assessing their quality has been multiplied. To this end, metrics based on sequence lengths, such as N50, have become a standard, but they only evaluate one aspect of assembly quality. Conversely, analyzing the coverage of pre-selected reference protein-coding genes provides essential content-based quality assessment, but the currently available pipelines for this purpose, CEGMA and BUSCO, do not have a user-friendly interface to serve as a uniform environment for assembly completeness assessment.

Results

Here, we introduce a brand-new web server, gVolante, which provides an online tool for (i) on-demand completeness assessment of sequence sets by means of the previously developed pipelines CEGMA and BUSCO and (ii) browsing pre-computed completeness scores for publicly available data in its database section. Completeness assessments performed on gVolante report scores based on not just the coverage of reference genes but also on sequence lengths (e.g. N50 scaffold length), allowing quality control in multiple aspects. Using gVolante, one can compare the quality of original assemblies between their multiple versions (obtained through program choice and parameter tweaking, for example) and evaluate them in comparison to the scores of public resources found in the database section.

Availability and implementation

gVoalte is freely available at https://gvolante.riken.jp/.

Contact

shigehiro.kuraku@riken.jp.",2017-11-01 +29036425,Motif independent identification of potential RNA G-quadruplexes by G4RNA screener.,"

Motivation

G-quadruplex structures in RNA molecules are known to have regulatory impacts in cells but are difficult to locate in the genome. The minimal requirements for G-quadruplex folding in RNA (G≥3N1-7 G≥3N1-7 G≥3N1-7 G≥3) is being challenged by observations made on specific examples in recent years. The definition of potential G-quadruplex sequences has major repercussions on the observation of the structure since it introduces a bias. The canonical motif only describes a sub-population of the reported G-quadruplexes. To address these issues, we propose an RNA G-quadruplex prediction strategy that does not rely on a motif definition.

Results

We trained an artificial neural network with sequences of experimentally validated G-quadruplexes from the G4RNA database encoded using an abstract definition of their sequence. This artificial neural network, G4NN, evaluates the similarity of a given sequence to known G-quadruplexes and reports it as a score. G4NN has a predictive power comparable to the reported G richness and G/C skewness evaluations that are the current state-of-the-art for the identification of potential RNA G-quadruplexes. We combined these approaches in the G4RNA screener, a program designed to manage and evaluate the sequences to identify potential G-quadruplexes.

Availability and implementation

G4RNA screener is available for download at http://gitlabscottgroup.med.usherbrooke.ca/J-Michel/g4rna_screener.

Contact

jean-michel.garant@usherbrooke.ca or jean-pierre.perreault@usherbrooke.ca or michelle.scott@usherbrooke.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +23823062,The SPECIES and ORGANISMS Resources for Fast and Accurate Identification of Taxonomic Names in Text.,"The exponential growth of the biomedical literature is making the need for efficient, accurate text-mining tools increasingly clear. The identification of named biological entities in text is a central and difficult task. We have developed an efficient algorithm and implementation of a dictionary-based approach to named entity recognition, which we here use to identify names of species and other taxa in text. The tool, SPECIES, is more than an order of magnitude faster and as accurate as existing tools. The precision and recall was assessed both on an existing gold-standard corpus and on a new corpus of 800 abstracts, which were manually annotated after the development of the tool. The corpus comprises abstracts from journals selected to represent many taxonomic groups, which gives insights into which types of organism names are hard to detect and which are easy. Finally, we have tagged organism names in the entire Medline database and developed a web resource, ORGANISMS, that makes the results accessible to the broad community of biologists. The SPECIES software is open source and can be downloaded from http://species.jensenlab.org along with dictionary files and the manually annotated gold-standard corpus. The ORGANISMS web resource can be found at http://organisms.jensenlab.org.",2013-06-18 +29163505,IL17eScan: A Tool for the Identification of Peptides Inducing IL-17 Response.,"IL-17 cytokines are pro-inflammatory cytokines and are crucial in host defense against various microbes. Induction of these cytokines by microbial antigens has been investigated in the case of ischemic brain injury, gingivitis, candidiasis, autoimmune myocarditis, etc. In this study, we have investigated the ability of amino acid sequence of antigens to induce IL-17 response using machine-learning approaches. A total of 338 IL-17-inducing and 984 IL-17 non-inducing peptides were retrieved from Immune Epitope Database. 80% of the data were randomly selected as training dataset and rest 20% as validation dataset. To predict the IL-17-inducing ability of peptides/protein antigens, different sequence-based machine-learning models were developed. The performance of support vector machine (SVM) and random forest (RF) was compared with different parameters to predict IL-17-inducing epitopes (IIEs). The dipeptide composition-based SVM-model displayed an accuracy of 82.4% with Matthews correlation coefficient = 0.62 at polynomial (t = 1) kernel on 10-fold cross-validation and outperformed RF. Amino acid residues Leu, Ser, Arg, Asn, and Phe and dipeptides LL, SL, LK, IL, LI, NL, LR, FK, SF, and LE are abundant in IIEs. The present tool helps in the identification of IIEs using machine-learning approaches. The induction of IL-17 plays an important role in several inflammatory diseases, and identification of such epitopes would be of great help to the immunologists. It is freely available at http://metagenomics.iiserb.ac.in/IL17eScan/ and http://metabiosys.iiserb.ac.in/IL17eScan/.",2017-10-31 +27989944,Top-down protein identification using isotopic envelope fingerprinting.,"For top-down protein database search and identification from tandem mass spectra, our isotopic envelope fingerprinting search algorithm and ProteinGoggle search engine have demonstrated their strength of efficiently resolving heavily overlapping data as well separating non-ideal data with non-ideal isotopic envelopes from ideal ones with ideal isotopic envelopes. Here we report our updated ProteinGoggle 2.0 for intact protein database search with full-capacity. The indispensable updates include users' optional definition of dynamic post-translational modifications and static chemical labeling during database creation, comprehensive dissociation methods and ion series, as well as a Proteoform Score for each proteoform. ProteinGoggle has previously been benchmarked with both collision-based dissociation (CID, HCD) and electron-based dissociation (ETD) data of either intact proteins or intact proteomes. Here we report our further benchmarking of the new version of ProteinGoggle with publically available photon-based dissociation (UVPD) data (http://hdl.handle.net/2022/17316) of intact E. coli ribosomal proteins.

Biological significance

Protein species (aka proteoforms) function at their molecular level, and diverse structures and biological roles of every proteoform come from often co-occurring proteolysis, amino acid variation and post-translational modifications. Complete and high-throughput capture of this combinatorial information of proteoforms has become possible in evolving top-down proteomics; yet, various methods and technologies, especially database search and bioinformatics identification tools, in the top-down pipeline are still in their infancy stages and demand intensive research and development.",2016-10-27 +29458937,Chinese herbal medicine Dengzhan Shengmai capsule as adjunctive treatment for ischemic stroke: A systematic review and meta-analysis of randomized clinical trials.,"

Objective

The existing eligible randomized controlled trials (RCTs) were critically appraised for the effectiveness and safety of Chinese herbal medicine Dengzhan Shenmai for ischemic stroke.

Design

Systematic review and meta-analysis (CRD42016042914, http://www.crd.york.ac.uk/PROSPERO).

Methods

Six electronic databases were searched from inception to May 2016. Risk ratio (RR) and mean difference (MD) with a 95% confidence interval (CI) were used as effect estimates using RevMan 5.3. Meta-analysis was performed where data were available. A summary of finding table was generated by the GRADEpro (version 3.6).

Results

We identified 14 RCTs involving 5206 participants. Majority of the included trials were of high risk of bias in methodological quality. For acute ischemic stroke, adding DZSM capsule to conventional therapy achieved higher Barthel Index scores (MD 22.37, 95% CI 21.34-23.40), lower neurological function deficit scores (MD - 3.73, 95% CI -5.27 to -2.19) and lower recurrence rate (RR 0.22, 95% CI 0.10, 0.46). For patients in their convalescence (or sequelae) stage of ischemic stroke, DZSM capsule was superior in improving quality of life (MD 28.8, 95% CI 7.10-50.50) and recurrence rate (RR 0.71, 95% CI 0.51-0.99) compared to placebo. No trials reported serious adverse events.

Conclusion

DZSM capsule appears to improve neurological function, quality of life, and reduce recurrence rate based on conventional therapy for ischemic stroke. DZSM capsule seems generally safe for clinical application. However, the findings of benefit are inconclusive due to generally weak evidence, and further large, rigorous trials are still warranted.",2017-12-09 +25332395,Updates in Rhea--a manually curated resource of biochemical reactions.,"Rhea (http://www.ebi.ac.uk/rhea) is a comprehensive and non-redundant resource of expert-curated biochemical reactions described using species from the ChEBI (Chemical Entities of Biological Interest) ontology of small molecules. Rhea has been designed for the functional annotation of enzymes and the description of genome-scale metabolic networks, providing stoichiometrically balanced enzyme-catalyzed reactions (covering the IUBMB Enzyme Nomenclature list and additional reactions), transport reactions and spontaneously occurring reactions. Rhea reactions are extensively curated with links to source literature and are mapped to other publicly available enzyme and pathway databases such as Reactome, BioCyc, KEGG and UniPathway, through manual curation and computational methods. Here we describe developments in Rhea since our last report in the 2012 database issue of Nucleic Acids Research. These include significant growth in the number of Rhea reactions and the inclusion of reactions involving complex macromolecules such as proteins, nucleic acids and other polymers that lie outside the scope of ChEBI. Together these developments will significantly increase the utility of Rhea as a tool for the description, analysis and reconciliation of genome-scale metabolic models.",2014-10-20 +23674824,Elucidating human phosphatase-substrate networks.,"Phosphatases are crucially involved in cellular processes by dephosphorylating cellular components. We describe a structure-based classification scheme for all active human phosphatases that reveals previously unrecognized relationships between them. By collating protein and nonprotein substrates and integrating colocalization and coexpression data, we generated a human phosphatase-substrate network. Analysis of the protein sequences surrounding sites of dephosphorylation suggested that common recognition mechanisms may apply to both kinases and a subset of phosphatases. Analysis of three-dimensional substrate recognition by protein phosphatases revealed preferred domains in the substrates. We identified phosphatases with highly specific substrates and those with less specificity by examining the relationship between phosphatases, kinases, and their shared substrates and showed how this analysis can be used to generate testable hypotheses about phosphatase biological function. DEPOD (human DEPhOsphorylation Database, version 1.0, http://www.DEPOD.org) is an online resource with information about active human phosphatases, their substrates, and the pathways in which they function. The database includes links to kinases and chemical modulators of phosphatase activity and contains a sequence similarity search function for identifying related proteins in other species.",2013-05-14 +29907681,"Case Studies of Gastric, Lung, and Oral Cancer Connect Etiologic Agent Prevalence to Cancer Incidence.","Obtaining detailed individual-level data on both exposure and cancer outcomes is challenging, and it is difficult to understand and characterize how temporal aspects of exposures translate into cancer risk. We show that, in lieu of individual-level information, population-level data on cancer incidence and etiologic agent prevalence can be leveraged to investigate cancer mechanisms and to better characterize and predict cancer trends. We use mechanistic carcinogenesis models [multistage clonal expansion (MSCE) models] and data on smoking, Helicobacter pylori (H. pylori), and HPV infection prevalence to investigate trends of lung, gastric, and HPV-related oropharyngeal cancers. MSCE models are based on the initiation-promotion-malignant conversion paradigm and allow for interpretation of trends in terms of general biological mechanisms. We assumed the rates of initiation depend on the prevalence of the corresponding risk factors. We performed two types of analysis, using the agent prevalence and cancer incidence data to estimate the model parameters and using cancer incidence data to infer the etiologic agent prevalence as well as the model parameters. By including risk factor prevalence, MSCE models with as few as three parameters closely reproduced 40 years of age-specific cancer incidence data. We recovered trends of H. pylori prevalence in the United States and demonstrated that cohort effects can explain the observed bimodal, age-specific pattern of oral HPV prevalence in men. Our results demonstrate the potential for joint analyses of population-level cancer and risk factor data through mechanistic modeling. This approach can be a first step in systematically testing relationships between exposures and cancer risk when individual-level data is lacking.Significance: Analysis of trends in risk-factor prevalence and cancer incidence can shed light on cancer mechanisms and the way that carcinogen exposure through time shapes the risk of cancer at different ages.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/12/3386/F1.large.jpg Cancer Res; 78(12); 3386-96. ©2018 AACR.",2018-06-01 +29522192,esATAC: an easy-to-use systematic pipeline for ATAC-seq data analysis.,"Summary:ATAC-seq is rapidly emerging as one of the major experimental approaches to probe chromatin accessibility genome-wide. Here, we present 'esATAC', a highly integrated easy-to-use R/Bioconductor package, for systematic ATAC-seq data analysis. It covers essential steps for full analyzing procedure, including raw data processing, quality control and downstream statistical analysis such as peak calling, enrichment analysis and transcription factor footprinting. esATAC supports one command line execution for preset pipelines and provides flexible interfaces for building customized pipelines. Availability and implementation:esATAC package is open source under the GPL-3.0 license. It is implemented in R and C++. Source code and binaries for Linux, MAC OS X and Windows are available through Bioconductor (https://www.bioconductor.org/packages/release/bioc/html/esATAC.html). Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +,Session 3,"5.1 Using Nanoflow LC-MS/MS to Study Metabolic Changes in Low Grade Astrocytoma Thomas A. Neubert, Aram Modrek, Jingjing Deng, Guoan Zhang, Dimitris Placantonakis New York University School of Medicine, New York, NY, USA The presence of a common isocitrate dehydrogenase 1 (IDH1) mutation in two divergent tumor lineages, astrocytoma and oligodendroglioma, suggests that it is a driver mutation that occurs early in the cascade of mutations in a progenitor-like cell. To test the hypothesis that mutant IDH1 is a driver of gliomagenesis, we used human embryonic stem cell derived neural stem cells (NSCs) to overexpress mutant IDH1 protein systematically in combination with p53 and ATRX knockdown, thereby modeling the mutations found in astrocytomas. Mutant IDH1 alone paradoxically decreased proliferative capacity, increased apoptotic rates and blocked neuronal and astrocytic differentiation. The addition of p53 knockdown reversed all of these phenotypes. The third “oncogenic hit” of ATRX knockdown reverted the phenotype to a state characterized by normal proliferation and apoptotic rates, but maintained the block in differentiation. Analysis of the transcriptome and DNA methylome revealed that thousands of genes and methylation sites were also subject to a “switching” phenotype dependent on the oncogenic mutations present. We found that Sox2, a master stem-cell transcription factor, was strikingly down-regulated in NSCs with oncogenic mutations that were not differentiating. Preliminary evidence suggests that restoration of Sox2 expression rescues the differentiation block induced by mutant IDH1, suggesting a critical regulation of transcription factor networks by mutant IDH1 during early gliomagenesis. We used targeted and untargeted nanoflow LC-MS/MS workflows to characterize changes in small molecule metabolites as a result of the genetic manipulations in these cell lines. Expression of mutant IDH1 resulted in robust production of 2-hydroxyglutarate, among other changes. Some of the technical issues involved in the analysis of small molecules by mass spectrometry, such as chromatography and data analysis, will be discussed from the perspective of a lab that normally specializes in the analysis of proteins. 5.2 Degradomics analysis reveals a neuron specific death domain containing protein that is a caspase 3 cleavage target required for axon degeneration Nicholas T. Hertz1, R. A. Weber, C. Morgan, Mark M. Tešić, H. Molina, James A. Wells2, M. Tessier-Lavigne 1Stanford University, Stanford, CA, USA; 2University of California, San Francisco, CA, USA Caspase 3 (Casp3) activation potently regulates axon and neuron degeneration. All identified strong regulators of this death lie upstream of Casp3 activation (e.g. Puma, Bax, Casp9). Here we utilized degradomics to identify novel regulators of neuron cell death downstream of caspase activation. We identified a completely novel CARD or death domain containing protein that is neuron specific. This Casp3 substrate mediates Casp9 activation of Casp3 in a CARD domain specific manner and loss of function potently blocks axon and neuron death. In addition, the knockout mouse of this gene demonstrates a dramatic neuronal pruning phenotype. 5.3 Updates from the NO-Seq Zone: Translatome Proteomics in Nerve Axons. Michael Fainzilber Weizmann Institute of Science, Rehovot, Israel NO-Seq is a term proposed by Pedro Beltrao as “a way of making mass spectrometry more appealing to the genomics community” (https://twitter.com/pedrobeltrao/status/885925182602391552). The term also aptly captures the reality of studying the biology of the long axonal processes of neurons, since the long distances between axon tips and transcriptional events in the cell body limit the applicability of commonly used genomics approaches. I will present recent progress on deciphering the regulation and dynamics of local translation in axons, and the roles of these mechanisms in supporting neuronal growth, survival and regeneration. 5.4 Plenary Lecture Proteomics technologies for signaling, clinical applications Matthias Mann Max-Planck Institute of Biochemistry, Munich, Germany and Novo Nordisk Foundation Center for Protein Research, Copenhagen, Denmark, EU Mass spectrometry-based proteomics is now being employed in a wide variety of applications spanning the entire breadth of molecular biology (Aebersold and Mann, Nature, 2016). Arguably, cell signaling is on of the areas in which this approach has made the greatest and most unique contributions. Here, I will summarize the current status of the methodology and applications of the 'EasyPhos' method hat we have developed recently (Humphrey et al. Nat. Biotech, 2015). This technology now enables studying complex signaling events in vivo and we have used it to uncover the long sought substrates of the Parkinson's kinase LRRK2 (Steger et al. eLife 2016 and unpublished). In the circadian rhythm it has revealed that a large percentage of the phospho-proteomics is coordinately regulated during the day and night cycle, and that many of the target sites appear to fine tune the metabolic machinery (Robles et al. Cell Metabolism, 2017). Currently, we are using EasyPhos to unravel signaling events downstream of opioid receptors in the brain in the context of analgesia and addiction. Body fluids have long been of great interest to researchers in proteomics because of their potential to directly 'phenotype' individuals with minimally invasive procedures. However, the high dynamic range – along with other challenges – have long stymied this promise. Along with other groups, we have recently revisited this area using the latest technological advances. The 'protein correlation profiling' approach allows us to study the plasma proteome rapidly in a wide range of conditions (Geyer et al. Cell Systems 2016). We have now increased the protein coverage several-fold using a novel scan mode termed BoxCar and applied our workflow to a number of clinical studies (Geyer et al. MSB 2016 and unpublished). These will be described in the talk together with a perspective of how plasma proteome profiling could be implemented in the clinic. 6.1 Revealing Nascent Proteomics in vivo in Signaling Pathways and Cell Differentiation Craig Forester1, Qian Zhao2, Nancy J. Phillips1, Robert J. Chalkley1, Juan A. Oses-Prieto1, Davide Ruggero1, A. L. Burlingame1 1University of California, San Francisco, CA, USA; 2Hong Kong Baptist University, Kowloon, Hong Kong Regulation of gene expression at the level of protein synthesis is a crucial element in driving how the genetic landscape is expressed. However, we are still limited in technologies that can quantitatively capture the immediate proteomic changes that allow cells to respond to specific stimuli. Here, we present a method to capture and identify the nascent proteome in vivo across different cell types without disturbing normal growth conditions utilizing O-propargyl puromycin (OPP). Cell permeable OPP rapidly labels nascent elongating peptides which are subsequently conjugated to biotin-azide using click chemistry and captured with streptavidin beads, followed by digestion and analysis using liquid chromatography-tandem mass spectrometry. Our technique of OPP-mediated Identification (OPP-ID) allows detection of widespread proteomic changes within a short two-hour pulse of OPP. We illustrate our technique by recapitulating alterations of proteomic networks induced by a potent mTOR inhibitor, MLN128. In addition, by employing OPP-ID, we identify over 2100 proteins and uncover distinct protein networks underlying early erythroid progenitor and differentiation states not amenable to alternative approaches such as amino acid analog labeling. We present OPP-ID as a method to quantitatively identify nascent proteomes across an array of biological contexts while preserving the subtleties directing signaling in the native cellular environment. 6.2 CRISPR/Cas9-APEX-mediated proximity labeling enables discovery of proteins associated with a predefined genomic locus Samuel A. Myers, Jason Wright, Feng Zhang, Steven A. Carr The Broad Institute of MIT and Harvard, Cambridge, MA, USA The activation or repression of a gene's expression is primarily controlled by changes in the proteins that occupy its regulatory elements. The most common method to identify proteins associated with genomic loci is chromatin immunoprecipitation (ChIP). While having greatly advanced our understanding of gene expression regulation, ChIP requires specific, high quality, IP-competent antibodies against nominated proteins, which can limit its utility and scope for discovery. Thus, a method able to discover and identify proteins associated with a particular genomic locus within the native cellular context would be extremely valuable. Here, we present a novel technology combining recent advances in chemical biology, genome targeting, and quantitative mass spectrometry to develop genomic locus proteomics, a method able to identify proteins which occupy a specific genomic locus. 6.3 Growth Factors, Cell Signaling and the Rise of Proteomics Ralph A. Bradshaw University of California, San Diego, CA, USA",2017-08-01 +30096152,scPipe: A flexible R/Bioconductor preprocessing pipeline for single-cell RNA-sequencing data.,"Single-cell RNA sequencing (scRNA-seq) technology allows researchers to profile the transcriptomes of thousands of cells simultaneously. Protocols that incorporate both designed and random barcodes have greatly increased the throughput of scRNA-seq, but give rise to a more complex data structure. There is a need for new tools that can handle the various barcoding strategies used by different protocols and exploit this information for quality assessment at the sample-level and provide effective visualization of these results in preparation for higher-level analyses. To this end, we developed scPipe, an R/Bioconductor package that integrates barcode demultiplexing, read alignment, UMI-aware gene-level quantification and quality control of raw sequencing data generated by multiple protocols that include CEL-seq, MARS-seq, Chromium 10X, Drop-seq and Smart-seq. scPipe produces a count matrix that is essential for downstream analysis along with an HTML report that summarises data quality. These results can be used as input for downstream analyses including normalization, visualization and statistical testing. scPipe performs this processing in a few simple R commands, promoting reproducible analysis of single-cell data that is compatible with the emerging suite of open-source scRNA-seq analysis tools available in R/Bioconductor and beyond. The scPipe R package is available for download from https://www.bioconductor.org/packages/scPipe.",2018-08-10 +30788914,Dual-grid mesh-based Monte Carlo algorithm for efficient photon transport simulations in complex three-dimensional media.,"The mesh-based Monte Carlo (MMC) method is an efficient algorithm to model light propagation inside tissues with complex boundaries, but choosing appropriate mesh density can be challenging. A fine mesh improves the spatial resolution of the output but requires more computation. We propose an improved MMC-dual-grid mesh-based Monte Carlo (DMMC)-to accelerate photon simulations using a coarsely tessellated tetrahedral mesh for ray-tracing computation and an independent voxelated grid for output data storage. The decoupling between ray-tracing and data storage grids allows us to simultaneously achieve faster simulations and improved output spatial accuracy. Furthermore, we developed an optimized ray-tracing technique to eliminate unnecessary ray-tetrahedron intersection tests in optically thick mesh elements. We validate the proposed algorithms using a complex heterogeneous domain and compare the solutions with those from MMC and voxel-based Monte Carlo. We found that DMMC with an unrefined constrained Delaunay tessellation of the boundary nodes yielded the highest speedup, ranging from 1.3  ×   to 2.9  ×   for various scattering settings, with nearly no loss in accuracy. In addition, the optimized ray-tracing technique offers excellent acceleration in high-scattering media, reducing the ray-tetrahedron test count by over 100-fold. Our DMMC software can be downloaded at http://mcx.space/mmc.",2019-02-01 +30284448,Sequential Fractionation Strategy Identifies Three Missing Proteins in the Mitochondrial Proteome of Commonly Used Cell Lines.,"Mitochondria are undeniably the cell powerhouse, directly affecting cell survival and fate. Growing evidence suggest that mitochondrial protein repertoire affects metabolic activity and plays an important role in determining cell proliferation/differentiation or quiescence shift. Consequently, the bioenergetic status of a cell is associated with the quality and abundance of the mitochondrial populations and proteomes. Mitochondrial morphology changes in the development of different cellular functions associated with metabolic switches. It is therefore reasonable to speculate that different cell lines do contain different mitochondrial-associated proteins, and the investigation of these pools may well represent a source for mining missing proteins (MPs). A very effective approach to increase the number of IDs through mass spectrometry consists of reducing the complexity of the biological samples by fractionation. The present study aims at investigating the mitochondrial proteome of five phenotypically different cell lines, possibly expressing some of the MPs, through an enrichment-fractionation approach at the organelle and protein level. We demonstrate a substantial increase in the proteome coverage, which, in turn, increases the likelihood of detecting low abundant proteins, often falling in the category of MPs, and resulting, for the present study, in the identification of METTL12, FAM163A, and RGS13. All MS data have been deposited to the MassIVE data repository ( https://massive.ucsd.edu ) with the data set identifier MSV000082409 and PXD010446.",2018-10-05 +27560970,hEIDI: An Intuitive Application Tool To Organize and Treat Large-Scale Proteomics Data.,"Advances in high-throughput proteomics have led to a rapid increase in the number, size, and complexity of the associated data sets. Managing and extracting reliable information from such large series of data sets require the use of dedicated software organized in a consistent pipeline to reduce, validate, exploit, and ultimately export data. The compilation of multiple mass-spectrometry-based identification and quantification results obtained in the context of a large-scale project represents a real challenge for developers of bioinformatics solutions. In response to this challenge, we developed a dedicated software suite called hEIDI to manage and combine both identifications and semiquantitative data related to multiple LC-MS/MS analyses. This paper describes how, through a user-friendly interface, hEIDI can be used to compile analyses and retrieve lists of nonredundant protein groups. Moreover, hEIDI allows direct comparison of series of analyses, on the basis of protein groups, while ensuring consistent protein inference and also computing spectral counts. hEIDI ensures that validated results are compliant with MIAPE guidelines as all information related to samples and results is stored in appropriate databases. Thanks to the database structure, validated results generated within hEIDI can be easily exported in the PRIDE XML format for subsequent publication. hEIDI can be downloaded from http://biodev.extra.cea.fr/docs/heidi .",2016-09-12 +31246107,Using Collaborative Cross Mouse Population to Fill Data Gaps in Risk Assessment: A Case Study of Population-Based Analysis of Toxicokinetics and Kidney Toxicodynamics of Tetrachloroethylene.,"BACKGROUND:Interindividual variability in susceptibility remains poorly characterized for environmental chemicals such as tetrachloroethylene (PERC). Development of population-based experimental models provide a potential approach to fill this critical need in human health risk assessment. OBJECTIVES:In this study, we aimed to better characterize the contribution of glutathione (GSH) conjugation to kidney toxicity of PERC and the degree of associated interindividual toxicokinetic (TK) and toxicodynamic (TD) variability by using the Collaborative Cross (CC) mouse population. METHODS:Male mice from 45 strains were intragastrically dosed with PERC ([Formula: see text]) or vehicle (5% Alkamuls EL-620 in saline), and time-course samples were collected for up to 24 h. Population variability in TK of S-(1,2,2-trichlorovinyl)GSH (TCVG), S-(1,2,2-trichlorovinyl)-L-cysteine (TCVC), and N-acetyl-S-(1,2,2-trichlorovinyl)-L-cysteine (NAcTCVC) was quantified in serum, liver, and kidney, and analyzed using a toxicokinetic model. Effects of PERC on kidney weight, fatty acid metabolism-associated genes [ Acot1 (Acyl-CoA thioesterase 1), Fabp1 (fatty acid-binding protein 1), and Ehhadh (enoyl-coenzyme A, hydratase/3-hydroxyacyl coenzyme A dehydrogenase)], and a marker of proximal tubular injury [KIM-1 (kidney injury molecule-1)/Hepatitis A virus cellular receptor 1 ( Havcr1)] were evaluated. Finally, quantitative data on interstrain variability in both formation of GSH conjugation metabolites of PERC and its kidney effects was used to calculate adjustment factors for the interindividual variability in both TK and TD. RESULTS:Mice treated with PERC had significantly lower kidney weight, higher kidney-to-body weight (BW) ratio, and higher expression of fatty acid metabolism-associated genes ( Acot1, Fabp1, and Ehhadh) and a marker of proximal tubular injury (KIM-1/ Havcr1). Liver levels of TCVG were significantly correlated with KIM-1/ Havcr1 in kidney, consistent with kidney injury being associated with GSH conjugation. We found that the default uncertainty factor for human variability may be marginally adequate to protect 95%, but not more, of the population for kidney toxicity mediated by PERC. DISCUSSION:Overall, this study demonstrates the utility of the CC mouse population in characterizing metabolism-toxicity interactions and quantifying interindividual variability. Further refinement of the characterization of interindividual variability can be accomplished by incorporating these data into in silico population models both for TK (such as a physiologically based pharmacokinetic model), as well as for toxicodynamic responses. https://doi.org/10.1289/EHP5105.",2019-06-27 +30329013,Thalia: semantic search engine for biomedical abstracts.,"

Summary

Although the publication rate of the biomedical literature has been growing steadily during the last decades, the accessibility of pertinent research publications for biologist and medical practitioners remains a challenge. This article describes Thalia, which is a semantic search engine that can recognize eight different types of concepts occurring in biomedical abstracts. Thalia is available via a web-based interface or a RESTful API. A key aspect of our search engine is that it is updated from PubMed on a daily basis. We describe here the main building blocks of our tool as well as an evaluation of the retrieval capabilities of Thalia in the context of a precision medicine dataset.

Availability and implementation

Thalia is available at http://nactem.ac.uk/Thalia_BI/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +25972521,AtmiRNET: a web-based resource for reconstructing regulatory networks of Arabidopsis microRNAs.,"Compared with animal microRNAs (miRNAs), our limited knowledge of how miRNAs involve in significant biological processes in plants is still unclear. AtmiRNET is a novel resource geared toward plant scientists for reconstructing regulatory networks of Arabidopsis miRNAs. By means of highlighted miRNA studies in target recognition, functional enrichment of target genes, promoter identification and detection of cis- and trans-elements, AtmiRNET allows users to explore mechanisms of transcriptional regulation and miRNA functions in Arabidopsis thaliana, which are rarely investigated so far. High-throughput next-generation sequencing datasets from transcriptional start sites (TSSs)-relevant experiments as well as five core promoter elements were collected to establish the support vector machine-based prediction model for Arabidopsis miRNA TSSs. Then, high-confidence transcription factors participate in transcriptional regulation of Arabidopsis miRNAs are provided based on statistical approach. Furthermore, both experimentally verified and putative miRNA-target interactions, whose validity was supported by the correlations between the expression levels of miRNAs and their targets, are elucidated for functional enrichment analysis. The inferred regulatory networks give users an intuitive insight into the pivotal roles of Arabidopsis miRNAs through the crosstalk between miRNA transcriptional regulation (upstream) and miRNA-mediate (downstream) gene circuits. The valuable information that is visually oriented in AtmiRNET recruits the scant understanding of plant miRNAs and will be useful (e.g. ABA-miR167c-auxin signaling pathway) for further research. Database URL: http://AtmiRNET.itps.ncku.edu.tw/",2015-05-13 +31129958,"DecoyDeveloper: An On-Demand, De Novo Decoy Glycopeptide Generator.","Glycopeptide analysis is a growing field that is struggling to adopt effective, automated tools. Many creative workflows and software apps have emerged recently that offer promising capabilities for assigning glycopeptides to MS data in an automated fashion. The effectiveness of these tools is best measured and improved by determining how often they would select a glycopeptide decoy as a spectral match, instead of its correct assignment; yet generating the appropriate number and type of glycopeptide decoys can be challenging. To address this need, we have designed DecoyDeveloper, an on-demand decoy glycopeptide generator that can produce a high volume of decoys with low mass differences. DecoyDeveloper has a simple user interface and is capable of producing large sets of decoys containing complete, biologically relevant glycan and peptide sequences. We demonstrate the tool's efficiency by applying it to a set of 80 glycopeptide targets. This tool is freely available and can be found at http://glycopro.chem.ku.edu/J1.php .",2019-06-03 +29398924,"Implementing a geographical information system to assess endemic fluoride areas in Lamphun, Thailand.","

Introduction

Many studies have shown that fluoride can cross the placenta and that exposure to high fluoride during pregnancy may result in premature birth and/or a low birth weight. Lamphun is one of six provinces in Thailand where natural water fluoride (WF) concentrations >10.0 mg/L were found, and it was also found that >50% of households used water with high fluoride levels. Nevertheless, geographical information system (GIS) and maps of endemic fluoride areas are lacking. We aimed to measure the fluoride level of village water supplies to assess endemic fluoride areas and present GIS with maps in Google Maps.

Methods

A cross-sectional survey was conducted from July 2016 to January 2017. Purpose sampling was used to identify villages of districts with WF >10.0 mg/L in the Mueang Lamphun, Pasang, and Ban Thi districts. Water samples were collected with the geolocation measured by Smart System Info. Fluoride was analyzed with an ion-selective electrode instrument using a total ionic strength adjustment buffer. WF >0.70 mg/L was used to identify unsafe drinking water and areas with high endemic fluoride levels. Descriptive statistics were used to describe the findings, and MS Excel was used to create the GIS database. Maps were created in Google Earth and presented in Google Maps.

Results

We found that WF concentrations ranged between 0.10-13.60 mg/L. Forty-four percent (n=439) of samples were at unsafe levels (>0.70 mg/L), and. 54% (n=303) of villages and 46% (n=79,807) of households used the unsafe drinking water. Fifty percent (n=26) of subdistricts were classified as being endemic fluoride areas. Five subdistricts were endemic fluoride areas, and in those, there were two subdistricts in which every household used unsafe drinking water.

Conclusion

These findings show the distribution of endemic fluoride areas and unsafe drinking water in Lamphun. This is useful for health policy authorities, local governments, and villagers and enables collaboration to resolve these issues. The GIS data are available at https://drive.google.com/open?id=1mi4Pvomf5xHZ1MQjK44pdp2xXFw&usp=sharing.",2018-01-25 +,Mapping global cropland and field size,"A new 1 km global IIASA‐IFPRI cropland percentage map for the baseline year 2005 has been developed which integrates a number of individual cropland maps at global to regional to national scales. The individual map products include existing global land cover maps such as GlobCover 2005 and MODIS v.5, regional maps such as AFRICOVER and national maps from mapping agencies and other organizations. The different products are ranked at the national level using crowdsourced data from Geo‐Wiki to create a map that reflects the likelihood of cropland. Calibration with national and subnational crop statistics was then undertaken to distribute the cropland within each country and subnational unit. The new IIASA‐IFPRI cropland product has been validated using very high‐resolution satellite imagery via Geo‐Wiki and has an overall accuracy of 82.4%. It has also been compared with the EarthStat cropland product and shows a lower root mean square error on an independent data set collected from Geo‐Wiki. The first ever global field size map was produced at the same resolution as the IIASA‐IFPRI cropland map based on interpolation of field size data collected via a Geo‐Wiki crowdsourcing campaign. A validation exercise of the global field size map revealed satisfactory agreement with control data, particularly given the relatively modest size of the field size data set used to create the map. Both are critical inputs to global agricultural monitoring in the frame of GEOGLAM and will serve the global land modelling and integrated assessment community, in particular for improving land use models that require baseline cropland information. These products are freely available for downloading from the http://cropland.geo-wiki.org website.",2015-05-01 +30877383,"Genetic diversity and phylogenetic analysis of 29 Y-STR loci in the Tibetan population from Sichuan Province, Southwest China.","Y-Chromosomal short tandem repeat polymorphisms (Y-STRs) are widely applied in human forensic cases and population genetic studies. There is a lack of information about the Sichuan Tibetan population in the Y-STR Haplotype Reference Database (YHRD, https://yhrd.org, release 59). In this study, 502 unrelated male individuals residing in the Sichuan Province were recruited and genotyped at 29 Y-STR loci. A total of 479 haplotypes were observed, 460 (96.03%) of which were unique. The haplotype diversity (HD) and discrimination capacity (DC) for the Sichuan Tibetan population were 0.9998 and 0.9542, respectively. To reveal the genetic diversities and relationships between the Chinese Sichuan Tibetan and 29 other previously reported populations, forensic parameter analysis, multi-dimensional scaling, and phylogenetic reconstruction were performed. The results showed that the Sichuan Tibetan population was relatively isolated from other populations, suggesting that genetic proximity is in line with geographical boundaries.",2019-03-14 +32626059,Application of data science in risk assessment and early warning.,"The currently applied approaches, procedures and tools used for the identification of emerging risks vary greatly among Member States of the EU. EFSA established a structured approach for emerging risk identification that mainly consists of systematically searching, collecting, collating and analysing information and data. In addition, EFSA concluded that new methodologies and tools are needed to facilitate efficient and transparent sharing of data, knowledge and methods in the field of emerging risk identification between Member States. As the result of an open call issued by EFSA, the 'Determination and metrics of emerging risks' (DEMETER) project was established in spring 2017 to support current and future procedures for identification of emerging risks. As the Bundesinstitut für Risikobewertung (BfR) hosting site is involved in the DEMETER project, as well as in several other software development activities in the area of quantitative microbiological risk assessment, the fellow had the opportunity to play an active role in the project work and development of the running DEMETER project. The training and close integration in the project team enabled the fellow to make significant contributions, e.g. with the creation of new open source data processing workflows and by contributing to the Emerging Risk Knowledge Exchange Platform (ERKEP) Framework Concept Note. Besides DEMETER, the fellow participated in other activities of the Unit for Food Technologies, Supply Chains and Food Defence, including testing and applying several BfR open source software tools which had been developed in previous projects and that are used in microbiological risk assessment (e.g. Predictive Microbial Modelling Lab (PMM-Lab)) or as automatic data retrieval systems (e.g. SiLeBAT NewsRadar) - see https://foodrisklabs.bfr.bund.de.",2018-08-27 +30131346,Inferring Population Structure and Admixture Proportions in Low-Depth NGS Data.,"We here present two methods for inferring population structure and admixture proportions in low-depth next-generation sequencing (NGS) data. Inference of population structure is essential in both population genetics and association studies, and is often performed using principal component analysis (PCA) or clustering-based approaches. NGS methods provide large amounts of genetic data but are associated with statistical uncertainty, especially for low-depth sequencing data. Models can account for this uncertainty by working directly on genotype likelihoods of the unobserved genotypes. We propose a method for inferring population structure through PCA in an iterative heuristic approach of estimating individual allele frequencies, where we demonstrate improved accuracy in samples with low and variable sequencing depth for both simulated and real datasets. We also use the estimated individual allele frequencies in a fast non-negative matrix factorization method to estimate admixture proportions. Both methods have been implemented in the PCAngsd framework available at http://www.popgen.dk/software/.",2018-08-21 +30467516,"BgeeDB, an R package for retrieval of curated expression datasets and for gene list expression localization enrichment tests.","BgeeDB is a collection of functions to import into R re-annotated, quality-controlled and re-processed expression data available in the Bgee database. This includes data from thousands of wild-type healthy samples of multiple animal species, generated with different gene expression technologies (RNA-seq, Affymetrix microarrays, expressed sequence tags, and in situ hybridizations). BgeeDB facilitates downstream analyses, such as gene expression analyses with other Bioconductor packages. Moreover, BgeeDB includes a new gene set enrichment test for preferred localization of expression of genes in anatomical structures (""TopAnat""). Along with the classical Gene Ontology enrichment test, this test provides a complementary way to interpret gene lists. Availability: https://www.bioconductor.org/packages/BgeeDB/.",2016-11-23 +30314426,The Information Bottleneck and Geometric Clustering.,"The information bottleneck (IB) approach to clustering takes a joint distribution P(X,Y) and maps the data X to cluster labels T , which retain maximal information about Y (Tishby, Pereira, & Bialek, 1999 ). This objective results in an algorithm that clusters data points based on the similarity of their conditional distributions P(YX) . This is in contrast to classic geometric clustering algorithms such as k -means and gaussian mixture models (GMMs), which take a set of observed data points {xi}i=1:N and cluster them based on their geometric (typically Euclidean) distance from one another. Here, we show how to use the deterministic information bottleneck (DIB) (Strouse & Schwab, 2017 ), a variant of IB, to perform geometric clustering by choosing cluster labels that preserve information about data point location on a smoothed data set. We also introduce a novel intuitive method to choose the number of clusters via kinks in the information curve. We apply this approach to a variety of simple clustering problems, showing that DIB with our model selection procedure recovers the generative cluster labels. We also show that, in particular limits of our model parameters, clustering with DIB and IB is equivalent to k -means and EM fitting of a GMM with hard and soft assignments, respectively. Thus, clustering with (D)IB generalizes and provides an information-theoretic perspective on these classic algorithms.",2018-10-12 +31721105,"Expression analysis of LTR-derived miR-1269a and target gene, KSR2 in Sebastes schlegelii.","

Background

Sebastes schlegelii are an important species of fish found in the coastal areas of the Korea with significant commercial importance. Most studies thus far have been primarily focused on environmental factors; behavioural patterns, aquaculture, diseases and limited genetic studies with little to none related to either microRNAs (miRNAs) or transposable elements (TE).

Objectives

In order to understand biological roles of TE-derived miR-1269a, we examined expression pattern for miR-1269a and its target gene, KSR2, in various tissues of Sebastes schlegelii. Also, we performed luciferase reporter assay in HINAE cells.

Methods

UCSC Genome Browser (https://genome.ucsc.edu/) was used to examine which TE is associated with miR-1269a. For the target genes for miR-1269a, the target genes associated with the miRNA were identified using miRDB (http://www.mirdb.org/) and TargetScan 7.1 (http://www.targetscan.org/vert_71/). A two-step miRNA kit, HB miR Multi Assay Kit™ System. I was used for the analysis of TE-derived miRNA expression patterns. The 3'UTR of KSR2 gene was cloned into the psiCHECK-2 vector. Subsequently co-transfected with miR-1269a mimics to HINAE cells for luciferase reporter assay.

Results

MiR-1269a was found to be derived from LTR retrotransposon, MLT2B. LTR-derived miR-1269a was highly expressed in the muscle, liver and gonad tissues of Sebastes schlegelii, but KSR2 revealed high expression in the brain. Co-transfection of KSR2 and miR-1269a mimic to HINAE cells showed high activity of miR-1269a in relation to KSR2.

Conclusion

LTR-derived miR-1269a showed enhancer activity with relation to KSR2 in Sebastes schlegelii. The data may be used as a foundation for further investigation regarding correlation of miRNA and target genes in addition to other functional studies of biological significance in Sebastes schlegelii.",2019-11-12 +30878565,Navigating the landscape of core outcome set development in dermatology.,"The development of core outcome sets (COSs; ie, a minimum set of core outcomes that should be measured and reported in all trials or in clinical practice for a specific condition) in dermatology is increasing in pace. A total of 44 dermatology-related COS projects have been registered in the online Core Outcome Measures in Effectiveness Trials database (http://www.comet-initiative.org/studies/search) and include studies on 26 different skin diseases. With the increasing number of COSs in dermatology, care is needed to ensure the delivery of high-quality COSs that meet quality standards when using state-of-the-art methods. In 2015, the Cochrane Skin-Core Outcome Set Initiative (CS-COUSIN) was established. CS-COUSIN is an international, multidisciplinary working group aiming to improve the development and implementation of COSs in dermatology. CS-COUSIN has developed guidance on how to develop high-quality COSs for skin diseases and supports dermatology-specific COS initiatives. Currently, 17 COS development groups are affiliated with CS-COUSIN and following standardized COS development processes. To ensure successful uptake of COSs in dermatology, researchers, clinicians, systematic reviewers, guideline developers, and other stakeholders should use existing COSs in their work.",2019-03-13 +28578993,A statistical view of protein chemical synthesis using NCL and extended methodologies.,"Native chemical ligation and extended methodologies are the most popular chemoselective reactions for protein chemical synthesis. Their combination with desulfurization techniques can give access to small or challenging proteins that are exploited in a large variety of research areas. In this report, we have conducted a statistical review of their use for protein chemical synthesis in order to provide a flavor of the recent trends and identify the most popular chemical tools used by protein chemists. To this end, a protein chemical synthesis (PCS) database (http://pcs-db.fr) was created by collecting a set of relevant data from more than 450 publications covering the period 1994-2017. A preliminary account of what this database tells us is presented in this report.",2017-05-24 +25643357,Figure-associated text summarization and evaluation.,"Biomedical literature incorporates millions of figures, which are a rich and important knowledge resource for biomedical researchers. Scientists need access to the figures and the knowledge they represent in order to validate research findings and to generate new hypotheses. By themselves, these figures are nearly always incomprehensible to both humans and machines and their associated texts are therefore essential for full comprehension. The associated text of a figure, however, is scattered throughout its full-text article and contains redundant information content. In this paper, we report the continued development and evaluation of several figure summarization systems, the FigSum+ systems, that automatically identify associated texts, remove redundant information, and generate a text summary for every figure in an article. Using a set of 94 annotated figures selected from 19 different journals, we conducted an intrinsic evaluation of FigSum+. We evaluate the performance by precision, recall, F1, and ROUGE scores. The best FigSum+ system is based on an unsupervised method, achieving F1 score of 0.66 and ROUGE-1 score of 0.97. The annotated data is available at figshare.com (http://figshare.com/articles/Figure_Associated_Text_Summarization_and_Evaluation/858903).",2015-02-02 +31441739,[Study of associations of polymorphism of matrix metalloproteinases genes with the development of arterial hypertension in men].,"The aim of research. To study the association of polymorphic loci of matrix metalloproteinases with the development of essential hypertension (EH) in men of the Central Chernozem Region of Russia. Materials and methods. A study of 564 men with EH and 257 control men was performed. Analysis of the polymorphic loci of metalloproteinases rs11568818 MMР7, rs1320632 MMР8, rs11225395 MMР8, rs1799750 MMР1, rs3025058 MMР3 was performed using real-time PCR. The study of associations of SNPs and their haplotypes with the development of arterial hypertension was carried out using logistic regression analysis in the PLINK software (v. 2.050). The regulatory potential of polymorphic loci was analyzed in the HaploReg software (v. 4.1) (http://archive.broadinstitute.org). The effect of SNP on gene expression was studied using the data of the Genotype-Tissue Expression project (http://www.gtexportal.org/). Results. Haplotype including rs11568818 MMP7, rs1320632 MMP8, rs11225395 MMP8 and rs1799750 MMP1 associated with a high risk of disease in men (OR=2,58, pperm=0,04). These polymorphisms located in region of promoter and enhancer histone marks and in the region of hypersensitivity to DNAse-1. They located in sites of proteins bound (TBP, CJUN, CFOS and GATA2) and they associated with the level of gene expression ММР7, ММР27 and RP11-817J15.3 (in peripheral blood, skeletal muscles, nervous tissue and other). Сonclusion. Haplotype G-A-C-1G for polymorphisms rs11568818 MMP7, rs1320632 MMP8, rs11225395 MMP8, rs1799750 MMP1 are associated with the development of essential hypertension in men in the Central Chernozem Region of Russia.",2019-08-23 +31185938,Development and validation of a model that includes two ultrasound parameters and the plasma D-dimer level for predicting malignancy in adnexal masses: an observational study.,"

Background

Pre-operative discrimination of malignant from benign adnexal masses is crucial for planning additional imaging, preparation, surgery and postoperative care. This study aimed to define key ultrasound and clinical variables and develop a predictive model for calculating preoperative ovarian tumor malignancy risk in a gynecologic oncology referral center. We compared our model to a subjective ultrasound assessment (SUA) method and previously described models.

Methods

This prospective, single-center observational study included consecutive patients. We collected systematic ultrasound and clinical data, including cancer antigen 125, D-dimer (DD) levels and platelet count. Histological examinations served as the reference standard. We performed univariate and multivariate regressions, and Bayesian information criterion (BIC) to assess the optimal model. Data were split into 2 subsets: training, for model development (190 observations) and testing, for model validation (n = 100).

Results

Among 290 patients, 52% had malignant disease, including epithelial ovarian cancer (72.8%), metastatic disease (14.5%), borderline tumors (6.6%), and non-epithelial malignancies (4.6%). Significant variables were included into a multivariate analysis. The optimal model, included three independent factors: solid areas, the color score, and the DD level. Malignant and benign lesions had mean DD values of 2.837 and 0.354 μg/ml, respectively. We transformed established formulae into a web-based calculator ( http://gin-onc-calculators.com/gynonc.php ) for calculating the adnexal mass malignancy risk. The areas under the curve (AUCs) for models compared in the testing set were: our model (0.977), Simple Rules risk calculation (0.976), Assessment of Different NEoplasias in the adneXa (ADNEX) (0.972), Logistic Regression 2 (LR2) (0.969), Risk of Malignancy Index (RMI) 4 (0.932), SUA (0.930), and RMI3 (0.912).

Conclusions

Two simple ultrasound predictors and the DD level (also included in a mathematical model), when used by gynecologist oncologist, discriminated malignant from benign ovarian lesions as well or better than other more complex models and the SUA method. These parameters (and the model) may be clinically useful for planning adequate management in the cancer center. The model needs substantial validation.",2019-06-11 +26133526,Visualization and dissemination of multidimensional proteomics data comparing protein abundance during Caenorhabditis elegans development.,"Regulation of protein abundance is a critical aspect of cellular function, organism development, and aging. Alternative splicing may give rise to multiple possible proteoforms of gene products where the abundance of each proteoform is independently regulated. Understanding how the abundances of these distinct gene products change is essential to understanding the underlying mechanisms of many biological processes. Bottom-up proteomics mass spectrometry techniques may be used to estimate protein abundance indirectly by sequencing and quantifying peptides that are later mapped to proteins based on sequence. However, quantifying the abundance of distinct gene products is routinely confounded by peptides that map to multiple possible proteoforms. In this work, we describe a technique that may be used to help mitigate the effects of confounding ambiguous peptides and multiple proteoforms when quantifying proteins. We have applied this technique to visualize the distribution of distinct gene products for the whole proteome across 11 developmental stages of the model organism Caenorhabditis elegans. The result is a large multidimensional dataset for which web-based tools were developed for visualizing how translated gene products change during development and identifying possible proteoforms. The underlying instrument raw files and tandem mass spectra may also be downloaded. The data resource is freely available on the web at http://www.yeastrc.org/wormpes/ . Graphical Abstract ᅟ.",2015-07-02 +31147700,CNIT: a fast and accurate web tool for identifying protein-coding and long non-coding transcripts based on intrinsic sequence composition.,"As more and more high-throughput data has been produced by next-generation sequencing, it is still a challenge to classify RNA transcripts into protein-coding or non-coding, especially for poorly annotated species. We upgraded our original coding potential calculator, CNCI (Coding-Non-Coding Index), to CNIT (Coding-Non-Coding Identifying Tool), which provides faster and more accurate evaluation of the coding ability of RNA transcripts. CNIT runs ∼200 times faster than CNCI and exhibits more accuracy compared with CNCI (0.98 versus 0.94 for human, 0.95 versus 0.93 for mouse, 0.93 versus 0.92 for zebrafish, 0.93 versus 0.92 for fruit fly, 0.92 versus 0.88 for worm, and 0.98 versus 0.85 for Arabidopsis transcripts). Moreover, the AUC values of 11 animal species and 27 plant species showed that CNIT was capable of obtaining relatively accurate identification results for almost all eukaryotic transcripts. In addition, a mobile-friendly web server is now freely available at http://cnit.noncode.org/CNIT.",2019-07-01 +30456262,Data supporting assessment for nitrous oxide emissions from soils under traditional cropland and apple orchard in the Loess Plateau of China.,"The data presented in this article relates to the research article entitled ""Nitrous oxide emissions from soils under traditional cropland and apple orchard in the semi-arid Loess Plateau of China"" (https://doi.org/10.1016/j.dib.2016.08.027) (Pang et al., 2019). The dataset includes soil N2O emissions for two land use types (wheat field and apple orchard) in the semi-arid Loess Plateau and related environmental factors, such as soil temperature and soil moisture. In addition, the estimated annual average and seasonal cumulative emissions of N2O are presented here. Nitrous oxide emissions were measured by static, closed chamber methods. The data provides evidence for the difference in N2O emissions among two dominant land uses on the Loess Plateau of China.",2018-11-01 +24167507,MASCP gator: an overview of the Arabidopsis proteomic aggregation portal.,"A key challenge in the area of bioinformatics in the coming decades is the ability to manage the wealth of information that is being generated from the variety of high throughput methodologies currently being undertaken in laboratories across the world. While these approaches have made available large volumes of data to the research community, less attention has been given to the problem of how to intuitively present the data to enable greater biological insights. Recently, an attempt was made to tackle this problem in the area of Arabidopsis proteomics. The model plant has been the target of countless proteomics surveys producing an exhaustive array of data and online repositories. The MASCP Gator is an aggregation portal for proteomic data currently being produced by the community and unites a large collection of specialized resources to a single portal (http://gator.masc-proteomics.org/). Here we describe the latest additions, upgrades and features to this resource further expanding its role into protein modifications and genome sequence variations.",2013-10-23 +30862804,Using Machine Learning to Measure Relatedness Between Genes: A Multi-Features Model.,"Measuring conditional relatedness between a pair of genes is a fundamental technique and still a significant challenge in computational biology. Such relatedness can be assessed by gene expression similarities while suffering high false discovery rates. Meanwhile, other types of features, e.g., prior-knowledge based similarities, is only viable for measuring global relatedness. In this paper, we propose a novel machine learning model, named Multi-Features Relatedness (MFR), for accurately measuring conditional relatedness between a pair of genes by incorporating expression similarities with prior-knowledge based similarities in an assessment criterion. MFR is used to predict gene-gene interactions extracted from the COXPRESdb, KEGG, HPRD, and TRRUST databases by the 10-fold cross validation and test verification, and to identify gene-gene interactions collected from the GeneFriends and DIP databases for further verification. The results show that MFR achieves the highest area under curve (AUC) values for identifying gene-gene interactions in the development, test, and DIP datasets. Specifically, it obtains an improvement of 1.1% on average of precision for detecting gene pairs with both high expression similarities and high prior-knowledge based similarities in all datasets, comparing to other linear models and coexpression analysis methods. Regarding cancer gene networks construction and gene function prediction, MFR also obtains the results with more biological significances and higher average prediction accuracy, than other compared models and methods. A website of the MFR model and relevant datasets can be accessed from http://bmbl.sdstate.edu/MFR .",2019-03-12 +29850768,Bio-knowledge-based filters improve residue-residue contact prediction accuracy.,"Motivation:Residue-residue contact prediction through direct coupling analysis has reached impressive accuracy, but yet higher accuracy will be needed to allow for routine modelling of protein structures. One way to improve the prediction accuracy is to filter predicted contacts using knowledge about the particular protein of interest or knowledge about protein structures in general. Results:We focus on the latter and discuss a set of filters that can be used to remove false positive contact predictions. Each filter depends on one or a few cut-off parameters for which the filter performance was investigated. Combining all filters while using default parameters resulted for a test set of 851 protein domains in the removal of 29% of the predictions of which 92% were indeed false positives. Availability and implementation:All data and scripts are available at http://comprec-lin.iiar.pwr.edu.pl/FPfilter/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-11-01 +30620434,Evaluation and training of Executive Functions in genocide survivors. The case of Yazidi children.,"Executive Functions (EFs) development is critically affected by stress and trauma, as well as the socioeconomic context in which children grow up (Welsh, Nix, Blair, Bierman, & Nelson, 2010, Journal of Educational Psychology, 102, 43-53). Research in this field is surprisingly lacking in relation to war contexts. This study represents a first attempt at addressing this topic by evaluating EFs in Yazidi children. The Yazidi community is an ethnic and religious minority living in Iraq. From August 2014 onwards, the Yazidi community has been the target of several atrocities perpetrated by ISIS and described as genocide by the international community at large. The University of Trieste, thanks to a program financed by the Friuli Venezia Giulia Region, developed a study aimed at (a) evaluating hot and cool EFs in children living in a war context and (b) developing a specific training method to enhance hot and cool EFs in Yazidi children of preschool age (N = 53). Data related to this group of children were compared with a sample of typically developing Italian children randomly assigned to either an EFs training group (N = 55) or a passive control group (N = 51). Results indicate different baselines in EFs in Yazidi and Italian samples and a significant effect of the program on both trained groups, especially in tasks measuring hot EFs. Data are discussed in terms of hot and cool EFs in children growing in adverse environments, as well as the evaluation of educational and developmental opportunities to prevent children who survived genocide from becoming a 'lost generation'. A video abstract of this article can be viewed at https://youtu.be/7t_08TbxR_8.",2019-02-20 +30169739,Drug Gene Budger (DGB): an application for ranking drugs to modulate a specific gene based on transcriptomic signatures.,"

Summary

Mechanistic molecular studies in biomedical research often discover important genes that are aberrantly over- or under-expressed in disease. However, manipulating these genes in an attempt to improve the disease state is challenging. Herein, we reveal Drug Gene Budger (DGB), a web-based and mobile application developed to assist investigators in order to prioritize small molecules that are predicted to maximally influence the expression of their target gene of interest. With DGB, users can enter a gene symbol along with the wish to up-regulate or down-regulate its expression. The output of the application is a ranked list of small molecules that have been experimentally determined to produce the desired expression effect. The table includes log-transformed fold change, P-value and q-value for each small molecule, reporting the significance of differential expression as determined by the limma method. Relevant links are provided to further explore knowledge about the target gene, the small molecule and the source of evidence from which the relationship between the small molecule and the target gene was derived. The experimental data contained within DGB is compiled from signatures extracted from the LINCS L1000 dataset, the original Connectivity Map (CMap) dataset and the Gene Expression Omnibus (GEO). DGB also presents a specificity measure for a drug-gene connection based on the number of genes a drug modulates. DGB provides a useful preliminary technique for identifying small molecules that can target the expression of a single gene in human cells and tissues.

Availability and implementation

The application is freely available on the web at http://DGB.cloud and as a mobile phone application on iTunes https://itunes.apple.com/us/app/drug-gene-budger/id1243580241? mt=8 and Google Play https://play.google.com/store/apps/details? id=com.drgenebudger.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +21690186,Integrative analysis of environmental sequences using MEGAN4.,"A major challenge in the analysis of environmental sequences is data integration. The question is how to analyze different types of data in a unified approach, addressing both the taxonomic and functional aspects. To facilitate such analyses, we have substantially extended MEGAN, a widely used taxonomic analysis program. The new program, MEGAN4, provides an integrated approach to the taxonomic and functional analysis of metagenomic, metatranscriptomic, metaproteomic, and rRNA data. While taxonomic analysis is performed based on the NCBI taxonomy, functional analysis is performed using the SEED classification of subsystems and functional roles or the KEGG classification of pathways and enzymes. A number of examples illustrate how such analyses can be performed, and show that one can also import and compare classification results obtained using others' tools. MEGAN4 is freely available for academic purposes, and installers for all three major operating systems can be downloaded from www-ab.informatik.uni-tuebingen.de/software/megan.",2011-06-20 +31769300,Suicide and Ambient Temperature: A Multi-Country Multi-City Study.,"

Background

Previous literature suggests that higher ambient temperature may play a role in increasing the risk of suicide. However, no multi-country study has explored the shape of the association and the role of moderate and extreme heat across different locations.

Objectives

We examined the short-term temperature-suicide relationship using daily time-series data collected for 341 locations in 12 countries for periods ranging from 4 to 40 y.

Methods

We conducted a two-stage meta-analysis. First, we performed location-specific time-stratified case-crossover analyses to examine the temperature-suicide association for each location. Then, we used a multivariate meta-regression to combine the location-specific lag-cumulative nonlinear associations across all locations and by country.

Results

A total of 1,320,148 suicides were included in this study. Higher ambient temperature was associated with an increased risk of suicide in general, and we observed a nonlinear association (inverted J-shaped curve) with the highest risk at 27°C. The relative risk (RR) for the highest risk was 1.33 (95% CI: 1.30, 1.36) compared with the risk at the first percentile. Country-specific results showed that the nonlinear associations were more obvious in northeast Asia (Japan, South Korea, and Taiwan). The temperature with the highest risk of suicide ranged from the 87th to 88th percentiles in the northeast Asian countries, whereas this value was the 99th percentile in Western countries (Canada, Spain, Switzerland, the UK, and the United States) and South Africa, where nearly linear associations were estimated. The country-specific RRs ranged from 1.31 (95% CI: 1.19, 1.44) in the United States to 1.65 (95% CI: 1.40, 1.93) in Taiwan, excluding countries where the results were substantially uncertain.

Discussion

Our findings showed that the risk of suicide increased with increasing ambient temperature in many countries, but to varying extents and not necessarily linearly. This temperature-suicide association should be interpreted cautiously, and further evidence of the relationship and modifying factors is needed. https://doi.org/10.1289/EHP4898.",2019-11-26 +21418024,PlantPIs--an interactive web resource on plant protease inhibitors.,"PlantPIs is a web querying system for a database collection of plant protease inhibitors data. Protease inhibitors in plants are naturally occurring proteins that inhibit the function of endogenous and exogenous proteases. In this paper the design and development of a web framework providing a clear and very flexible way of querying plant protease inhibitors data is reported. The web resource is based on a relational database, containing data of plants protease inhibitors publicly accessible, and a graphical user interface providing all the necessary browsing tools, including a data exporting function. PlantPIs contains information extracted principally from MEROPS database, filtered, annotated and compared with data stored in other protein and gene public databases, using both automated techniques and domain expert evaluations. The data are organized to allow a flexible and easy way to access stored information. The database is accessible at http://www.plantpis.ba.itb.cnr.it/.",2011-08-01 +25075115,Retro: concept-based clustering of biomedical topical sets.,"

Motivation

Clustering methods can be useful for automatically grouping documents into meaningful clusters, improving human comprehension of a document collection. Although there are clustering algorithms that can achieve the goal for relatively large document collections, they do not always work well for small and homogenous datasets.

Methods

In this article, we present Retro-a novel clustering algorithm that extracts meaningful clusters along with concise and descriptive titles from small and homogenous document collections. Unlike common clustering approaches, our algorithm predicts cluster titles before clustering. It relies on the hypergeometric distribution model to discover key phrases, and generates candidate clusters by assigning documents to these phrases. Further, the statistical significance of candidate clusters is tested using supervised learning methods, and a multiple testing correction technique is used to control the overall quality of clustering.

Results

We test our system on five disease datasets from OMIM(®) and evaluate the results based on MeSH(®) term assignments. We further compare our method with several baseline and state-of-the-art methods, including K-means, expectation maximization, latent Dirichlet allocation-based clustering, Lingo, OPTIMSRC and adapted GK-means. The experimental results on the 20-Newsgroup and ODP-239 collections demonstrate that our method is successful at extracting significant clusters and is superior to existing methods in terms of quality of clusters. Finally, we apply our system to a collection of 6248 topical sets from the HomoloGene(®) database, a resource in PubMed(®). Empirical evaluation confirms the method is useful for small homogenous datasets in producing meaningful clusters with descriptive titles.

Availability and implementation

A web-based demonstration of the algorithm applied to a collection of sets from the HomoloGene database is available at http://www.ncbi.nlm.nih.gov/CBBresearch/Wilbur/IRET/CLUSTERING_HOMOLOGENE/index.html.

Contact

lana.yeganova@nih.gov

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-29 +29186294,Classifying next-generation sequencing data using a zero-inflated Poisson model.,"

Motivation

With the development of high-throughput techniques, RNA-sequencing (RNA-seq) is becoming increasingly popular as an alternative for gene expression analysis, such as RNAs profiling and classification. Identifying which type of diseases a new patient belongs to with RNA-seq data has been recognized as a vital problem in medical research. As RNA-seq data are discrete, statistical methods developed for classifying microarray data cannot be readily applied for RNA-seq data classification. Witten proposed a Poisson linear discriminant analysis (PLDA) to classify the RNA-seq data in 2011. Note, however, that the count datasets are frequently characterized by excess zeros in real RNA-seq or microRNA sequence data (i.e. when the sequence depth is not enough or small RNAs with the length of 18-30 nucleotides). Therefore, it is desired to develop a new model to analyze RNA-seq data with an excess of zeros.

Results

In this paper, we propose a Zero-Inflated Poisson Logistic Discriminant Analysis (ZIPLDA) for RNA-seq data with an excess of zeros. The new method assumes that the data are from a mixture of two distributions: one is a point mass at zero, and the other follows a Poisson distribution. We then consider a logistic relation between the probability of observing zeros and the mean of the genes and the sequencing depth in the model. Simulation studies show that the proposed method performs better than, or at least as well as, the existing methods in a wide range of settings. Two real datasets including a breast cancer RNA-seq dataset and a microRNA-seq dataset are also analyzed, and they coincide with the simulation results that our proposed method outperforms the existing competitors.

Availability and implementation

The software is available at http://www.math.hkbu.edu.hk/∼tongt.

Contact

xwan@comp.hkbu.edu.hk or tongt@hkbu.edu.hk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-04-01 +29953864,Community-Driven Data Analysis Training for Biology.,"The primary problem with the explosion of biomedical datasets is not the data, not computational resources, and not the required storage space, but the general lack of trained and skilled researchers to manipulate and analyze these data. Eliminating this problem requires development of comprehensive educational resources. Here we present a community-driven framework that enables modern, interactive teaching of data analytics in life sciences and facilitates the development of training materials. The key feature of our system is that it is not a static but a continuously improved collection of tutorials. By coupling tutorials with a web-based analysis framework, biomedical researchers can learn by performing computation themselves through a web browser without the need to install software or search for example datasets. Our ultimate goal is to expand the breadth of training materials to include fundamental statistical and data science topics and to precipitate a complete re-engineering of undergraduate and graduate curricula in life sciences. This project is accessible at https://training.galaxyproject.org.",2018-06-01 +30626441,An ontological foundation for ocular phenotypes and rare eye diseases.,"

Background

The optical accessibility of the eye and technological advances in ophthalmic diagnostics have put ophthalmology at the forefront of data-driven medicine. The focus of this study is rare eye disorders, a group of conditions whose clinical heterogeneity and geographic dispersion make data-driven, evidence-based practice particularly challenging. Inter-institutional collaboration and information sharing is crucial but the lack of standardised terminology poses an important barrier. Ontologies are computational tools that include sets of vocabulary terms arranged in hierarchical structures. They can be used to provide robust terminology standards and to enhance data interoperability. Here, we discuss the development of the ophthalmology-related component of two well-established biomedical ontologies, the Human Phenotype Ontology (HPO; includes signs, symptoms and investigation findings) and the Orphanet Rare Disease Ontology (ORDO; includes rare disease nomenclature/nosology).

Methods

A variety of approaches were used including automated matching to existing resources and extensive manual curation. To achieve the latter, a study group including clinicians, patient representatives and ontology developers from 17 countries was formed. A broad range of terms was discussed and validated during a dedicated workshop attended by 60 members of the group.

Results

A comprehensive, structured and well-defined set of terms has been agreed on including 1106 terms relating to ocular phenotypes (HPO) and 1202 terms relating to rare eye disease nomenclature (ORDO). These terms and their relevant annotations can be accessed in http://www.human-phenotype-ontology.org/ and http://www.orpha.net/ ; comments, corrections, suggestions and requests for new terms can be made through these websites. This is an ongoing, community-driven endeavour and both HPO and ORDO are regularly updated.

Conclusions

To our knowledge, this is the first effort of such scale to provide terminology standards for the rare eye disease community. We hope that this work will not only improve coding and standardise information exchange in clinical care and research, but also it will catalyse the transition to an evidence-based precision ophthalmology paradigm.",2019-01-09 +30235322,GC4S: A bioinformatics-oriented Java software library of reusable graphical user interface components.,"Modern bioinformatics and computational biology are fields of study driven by the availability of effective software required for conducting appropriate research tasks. Apart from providing reliable and fast implementations of different data analysis algorithms, these software applications should also be clear and easy to use through proper user interfaces, providing appropriate data management and visualization capabilities. In this regard, the user experience obtained by interacting with these applications via their Graphical User Interfaces (GUI) is a key factor for their final success and real utility for researchers. Despite the existence of different packages and applications focused on advanced data visualization, there is a lack of specific libraries providing pertinent GUI components able to help scientific bioinformatics software developers. To that end, this paper introduces GC4S, a bioinformatics-oriented collection of high-level, extensible, and reusable Java GUI elements specifically designed to speed up bioinformatics software development. Within GC4S, developers of new applications can focus on the specific GUI requirements of their projects, relying on GC4S for generalities and abstractions. GC4S is free software distributed under the terms of GNU Lesser General Public License and both source code and documentation are publicly available at http://www.sing-group.org/gc4s.",2018-09-20 +29211825,Bicycle: a bioinformatics pipeline to analyze bisulfite sequencing data.,"Summary:High-throughput sequencing of bisulfite-converted DNA is a technique used to measure DNA methylation levels. Although a considerable number of computational pipelines have been developed to analyze such data, none of them tackles all the peculiarities of the analysis together, revealing limitations that can force the user to manually perform additional steps needed for a complete processing of the data. This article presents bicycle, an integrated, flexible analysis pipeline for bisulfite sequencing data. Bicycle analyzes whole genome bisulfite sequencing data, targeted bisulfite sequencing data and hydroxymethylation data. To show how bicycle overtakes other available pipelines, we compared them on a defined number of features that are summarized in a table. We also tested bicycle with both simulated and real datasets, to show its level of performance, and compared it to different state-of-the-art methylation analysis pipelines. Availability and implementation:Bicycle is publicly available under GNU LGPL v3.0 license at http://www.sing-group.org/bicycle. Users can also download a customized Ubuntu LiveCD including bicycle and other bisulfite sequencing data pipelines compared here. In addition, a docker image with bicycle and its dependencies, which allows a straightforward use of bicycle in any platform (e.g. Linux, OS X or Windows), is also available. Contact:ograna@cnio.es or dgpena@uvigo.es. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +31634369,RFQAmodel: Random Forest Quality Assessment to identify a predicted protein structure in the correct fold.,"While template-free protein structure prediction protocols now produce good quality models for many targets, modelling failure remains common. For these methods to be useful it is important that users can both choose the best model from the hundreds to thousands of models that are commonly generated for a target, and determine whether this model is likely to be correct. We have developed Random Forest Quality Assessment (RFQAmodel), which assesses whether models produced by a protein structure prediction pipeline have the correct fold. RFQAmodel uses a combination of existing quality assessment scores with two predicted contact map alignment scores. These alignment scores are able to identify correct models for targets that are not otherwise captured. Our classifier was trained on a large set of protein domains that are structurally diverse and evenly balanced in terms of protein features known to have an effect on modelling success, and then tested on a second set of 244 protein domains with a similar spread of properties. When models for each target in this second set were ranked according to the RFQAmodel score, the highest-ranking model had a high-confidence RFQAmodel score for 67 modelling targets, of which 52 had the correct fold. At the other end of the scale RFQAmodel correctly predicted that for 59 targets the highest-ranked model was incorrect. In comparisons to other methods we found that RFQAmodel is better able to identify correct models for targets where only a few of the models are correct. We found that RFQAmodel achieved a similar performance on the model sets for CASP12 and CASP13 free-modelling targets. Finally, by iteratively generating models and running RFQAmodel until a model is produced that is predicted to be correct with high confidence, we demonstrate how such a protocol can be used to focus computational efforts on difficult modelling targets. RFQAmodel and the accompanying data can be downloaded from http://opig.stats.ox.ac.uk/resources.",2019-10-21 +30895421,Empirical content as a criterion for evaluating models.,"Hypotheses derived from models can be tested in an empirical study: If the model reliably fails to predict behavior, it can be dismissed or modified. Models can also be evaluated before data are collected: More useful models have a high level of empirical content (Popper in Logik der Forschung, Mohr Siebeck, Tübingen, 1934), i.e., they make precise predictions (degree of precision) for many events (level of universality). I apply these criteria to reflect on some critical aspects of Kirsch's (Cognit Process, 2019. https://doi.org/10.1007/s10339-019-00904-3 ) unifying computational model of decision making.",2019-03-20 +24136998,Soybean knowledge base (SoyKB): a web resource for integration of soybean translational genomics and molecular breeding.,"Soybean Knowledge Base (http://soykb.org) is a comprehensive web resource developed for bridging soybean translational genomics and molecular breeding research. It provides information for six entities including genes/proteins, microRNAs/sRNAs, metabolites, single nucleotide polymorphisms, plant introduction lines and traits. It also incorporates many multi-omics datasets including transcriptomics, proteomics, metabolomics and molecular breeding data, such as quantitative trait loci, traits and germplasm information. Soybean Knowledge Base has a new suite of tools such as In Silico Breeding Program for soybean breeding, which includes a graphical chromosome visualizer for ease of navigation. It integrates quantitative trait loci, traits and germplasm information along with genomic variation data, such as single nucleotide polymorphisms, insertions, deletions and genome-wide association studies data, from multiple soybean cultivars and Glycine soja.",2013-10-16 +24973796,Synthesis of 53 tissue and cell line expression QTL datasets reveals master eQTLs.,"

Background

Gene expression genetic studies in human tissues and cells identify cis- and trans-acting expression quantitative trait loci (eQTLs). These eQTLs provide insights into regulatory mechanisms underlying disease risk. However, few studies systematically characterized eQTL results across cell and tissues types. We synthesized eQTL results from >50 datasets, including new primary data from human brain, peripheral plaque and kidney samples, in order to discover features of human eQTLs.

Results

We find a substantial number of robust cis-eQTLs and far fewer trans-eQTLs consistent across tissues. Analysis of 45 full human GWAS scans indicates eQTLs are enriched overall, and above nSNPs, among positive statistical signals in genetic mapping studies, and account for a significant fraction of the strongest human trait effects. Expression QTLs are enriched for gene centricity, higher population allele frequencies, in housekeeping genes, and for coincidence with regulatory features, though there is little evidence of 5' or 3' positional bias. Several regulatory categories are not enriched including microRNAs and their predicted binding sites and long, intergenic non-coding RNAs. Among the most tissue-ubiquitous cis-eQTLs, there is enrichment for genes involved in xenobiotic metabolism and mitochondrial function, suggesting these eQTLs may have adaptive origins. Several strong eQTLs (CDK5RAP2, NBPFs) coincide with regions of reported human lineage selection. The intersection of new kidney and plaque eQTLs with related GWAS suggest possible gene prioritization. For example, butyrophilins are now linked to arterial pathogenesis via multiple genetic and expression studies. Expression QTL and GWAS results are made available as a community resource through the NHLBI GRASP database [http://apps.nhlbi.nih.gov/grasp/].

Conclusions

Expression QTLs inform the interpretation of human trait variability, and may account for a greater fraction of phenotypic variability than protein-coding variants. The synthesis of available tissue eQTL data highlights many strong cis-eQTLs that may have important biologic roles and could serve as positive controls in future studies. Our results indicate some strong tissue-ubiquitous eQTLs may have adaptive origins in humans. Efforts to expand the genetic, splicing and tissue coverage of known eQTLs will provide further insights into human gene regulation.",2014-06-27 +,Phosphorus forms in forest soil colloids as revealed by liquid‐state 31P‐NMR,"Nanoparticles and colloids affect the storage and hence the availability of P in forest ecosystems. We investigated the fine colloids present in forest soils and their association with inorganic and organic P. To differentiate between the different P forms, we performed liquid‐state ³¹P‐nucelar magnetic resonance (³¹P‐NMR) measurements on forest bulk soil extracts, on colloid extracts and on the electrolyte phase of their soil suspensions. The ³¹P‐NMR spectra indicated that soil nanoparticles and colloids were more enriched with organic than with inorganic P forms compared to the electrolyte phase. The P concentration was enriched in the colloidal fraction in comparison to the bulk soil and the phosphate diesters were more dominant in the colloidal fraction when compared to the bulk soil. The colloidal P‐diester to P‐monoester ratios were 2 to 3 times higher in the colloidal fraction than in the bulk soil. In contrast, relatively large percentages of inorganic P were found in the electrolyte phase. Supplementary (not shown) Data are available at the JuSER Server (juser.fz‐juelich.de, reference number: FZJ‐2016‐01739) https://juser.fz‐juelich.de/record/283057.",2016-04-01 +30270947,Randomization-Based Statistical Inference: A Resampling and Simulation Infrastructure.,"Statistical inference involves drawing scientifically-based conclusions describing natural processes or observable phenomena from datasets with intrinsic random variation. There are parametric and non-parametric approaches for studying the data or sampling distributions, yet few resources are available to provide integrated views of data (observed or simulated), theoretical concepts, computational mechanisms and hands-on utilization via flexible graphical user interfaces. We designed, implemented and validated a new portable randomization-based statistical inference infrastructure (http://socr.umich.edu/HTML5/Resampling_Webapp) that blends research-driven data analytics and interactive learning, and provides a backend computational library for managing large amounts of simulated or user-provided data. The core of this framework is a modern randomization webapp, which may be invoked on any device supporting a JavaScript-enabled web-browser. We demonstrate the use of these resources to analyze proportion, mean, and other statistics using simulated (virtual experiments) and observed (e.g., Acute Myocardial Infarction, Job Rankings) data. Finally, we draw parallels between parametric inference methods and their distribution-free alternatives. The Randomization and Resampling webapp can be used for data analytics, as well as for formal, in-class and informal, out-of-the-classroom learning and teaching of different scientific concepts. Such concepts include sampling, random variation, computational statistical inference and data-driven analytics. The entire scientific community may utilize, test, expand, modify or embed these resources (data, source-code, learning activity, webapp) without any restrictions.",2018-04-11 +30745781,Selfie identification app as a forensic tool for missing and unidentified persons.,"Social media applications can be valuable investigative tools in the search for missing and unidentified persons. As yet, no forensic App exists with the aim of assisting the human identification process, through the search of antemortem data to be used as adjunct data in the comparison with postmortem data collected. The aim of this article is to introduce a new application for Smartphones called ""Selfie Forensic ID"" App which will employ selfie and face photographs as an archive of dental data and dental features of the front teeth of missing persons sharing with Instagram, Tumblr, and Twitter Social Networks (available for free download from both Android and Apple store at http://onelink.to/selfieforensic). Features such as diastema rotated or wrongly positioned teeth, lip anomalies, recognizable fixed prosthetics, dental crown discolorations, dental or cutis piercing could represent strong identifiers in the comparison of AM and PM data. The increased number of terrorist attacks and natural disasters which result in the premature death of innocent people underlines the importance of storing personal identification data to avoid bodies remaining unidentified. The authors believe there will be an increased public willingness to share personal ID information through understanding of the ethical and administrative consequences to the families of deceased persons should bodies remain unidentified.",2018-05-01 +25725058,FR database 1.0: a resource focused on fruit development and ripening. ,"Fruits form unique growing period in the life cycle of higher plants. They provide essential nutrients and have beneficial effects on human health. Characterizing the genes involved in fruit development and ripening is fundamental to understanding the biological process and improving horticultural crops. Although, numerous genes that have been characterized are participated in regulating fruit development and ripening at different stages, no dedicated bioinformatic resource for fruit development and ripening is available. In this study, we have developed such a database, FR database 1.0, using manual curation from 38 423 articles published before 1 April 2014, and integrating protein interactomes and several transcriptome datasets. It provides detailed information for 904 genes derived from 53 organisms reported to participate in fleshy fruit development and ripening. Genes from climacteric and non-climacteric fruits are also annotated, with several interesting Gene Ontology (GO) terms being enriched for these two gene sets and seven ethylene-related GO terms found only in the climacteric fruit group. Furthermore, protein-protein interaction analysis by integrating information from FR database presents the possible function network that affects fleshy fruit size formation. Collectively, FR database will be a valuable platform for comprehensive understanding and future experiments in fruit biology. Database URL: http://www.fruitech.org/",2015-02-27 +30016933,HGT-ID: an efficient and sensitive workflow to detect human-viral insertion sites using next-generation sequencing data.,"

Background

Transfer of genetic material from microbes or viruses into the host genome is known as horizontal gene transfer (HGT). The integration of viruses into the human genome is associated with multiple cancers, and these can now be detected using next-generation sequencing methods such as whole genome sequencing and RNA-sequencing.

Results

We designed a novel computational workflow, HGT-ID, to identify the integration of viruses into the human genome using the sequencing data. The HGT-ID workflow primarily follows a four-step procedure: i) pre-processing of unaligned reads, ii) virus detection using subtraction approach, iii) identification of virus integration site using discordant and soft-clipped reads and iv) HGT candidates prioritization through a scoring function. Annotation and visualization of the events, as well as primer design for experimental validation, are also provided in the final report. We evaluated the tool performance with the well-understood cervical cancer samples. The HGT-ID workflow accurately detected known human papillomavirus (HPV) integration sites with high sensitivity and specificity compared to previous HGT methods. We applied HGT-ID to The Cancer Genome Atlas (TCGA) whole-genome sequencing data (WGS) from liver tumor-normal pairs. Multiple hepatitis B virus (HBV) integration sites were identified in TCGA liver samples and confirmed by HGT-ID using the RNA-Seq data from the matched liver pairs. This shows the applicability of the method in both the data types and cross-validation of the HGT events in liver samples. We also processed 220 breast tumor WGS data through the workflow; however, there were no HGT events detected in those samples.

Conclusions

HGT-ID is a novel computational workflow to detect the integration of viruses in the human genome using the sequencing data. It is fast and accurate with functions such as prioritization, annotation, visualization and primer design for future validation of HGTs. The HGT-ID workflow is released under the MIT License and available at http://kalarikrlab.org/Software/HGT-ID.html .",2018-07-17 +29556758,"PyBioMed: a python library for various molecular representations of chemicals, proteins and DNAs and their interactions.","BACKGROUND:With the increasing development of biotechnology and informatics technology, publicly available data in chemistry and biology are undergoing explosive growth. Such wealthy information in these data needs to be extracted and transformed to useful knowledge by various data mining methods. Considering the amazing rate at which data are accumulated in chemistry and biology fields, new tools that process and interpret large and complex interaction data are increasingly important. So far, there are no suitable toolkits that can effectively link the chemical and biological space in view of molecular representation. To further explore these complex data, an integrated toolkit for various molecular representation is urgently needed which could be easily integrated with data mining algorithms to start a full data analysis pipeline. RESULTS:Herein, the python library PyBioMed is presented, which comprises functionalities for online download for various molecular objects by providing different IDs, the pretreatment of molecular structures, the computation of various molecular descriptors for chemicals, proteins, DNAs and their interactions. PyBioMed is a feature-rich and highly customized python library used for the characterization of various complex chemical and biological molecules and interaction samples. The current version of PyBioMed could calculate 775 chemical descriptors and 19 kinds of chemical fingerprints, 9920 protein descriptors based on protein sequences, more than 6000 DNA descriptors from nucleotide sequences, and interaction descriptors from pairwise samples using three different combining strategies. Several examples and five real-life applications were provided to clearly guide the users how to use PyBioMed as an integral part of data analysis projects. By using PyBioMed, users are able to start a full pipelining from getting molecular data, pretreating molecules, molecular representation to constructing machine learning models conveniently. CONCLUSION:PyBioMed provides various user-friendly and highly customized APIs to calculate various features of biological molecules and complex interaction samples conveniently, which aims at building integrated analysis pipelines from data acquisition, data checking, and descriptor calculation to modeling. PyBioMed is freely available at http://projects.scbdd.com/pybiomed.html .",2018-03-20 +31568041,Prediction of Thiopurine Metabolite Levels Based on Haematological and Biochemical Parameters.,"OBJECTIVES:Therapeutic drug monitoring of thiopurine erythrocyte levels is not available in all centers and it usually requires quite a long time to obtain the results. The aims of this study were to build a model predicting low levels of 6-thioguanine and 6-methylmercaptopurine in pediatric inflammatory bowel disease (IBD) patients and to build a model to predict nonadherence in patients treated with azathioprine (AZA). METHODS:The study consisted of 332 observations in 88 pediatric IBD patients. Low AZA dosing was defined as 6-thioguanine levels <125 pmol/8 × 10 erythrocytes and 6-methylmercaptopurine levels <5700 pmol/8 × 10 erythrocytes. Nonadherence was defined as undetectable levels of 6-thioguanine and 6-methylmercaptopurine <240 pmol/8 × 10 erythrocytes. Data were divided into training and testing part. To construct the model predicting low 6-thioguanine levels, nonadherence, and the level of 6-thioguanine, the modification of random forest method with cross-validation and resampling was used. RESULTS:The final models predicting low 6-thioguanine levels and nonadherence had area under the curve, 0.87 and 0.94; sensitivity, 0.81 and 0.82; specificity, 0.80 and 86; and distance, 0.31 and 0.21, respectively, when applied on the testing part of the dataset. When the final model for prediction of 6-thioguanine values was applied on testing dataset, a root-mean-square error of 110 was obtained. CONCLUSIONS:Using easily obtained laboratory parameters, we constructed a model with sufficient accuracy to predict patients with low 6-thioguanine levels and a model for prediction of AZA treatment nonadherence (web applications: https://hradskyo.shinyapps.io/6TG_prediction/ and https://hradskyo.shinyapps.io/Non_adherence/).",2019-10-01 +29860481,DEXTER: Disease-Expression Relation Extraction from Text. ,"Gene expression levels affect biological processes and play a key role in many diseases. Characterizing expression profiles is useful for clinical research, and diagnostics and prognostics of diseases. There are currently several high-quality databases that capture gene expression information, obtained mostly from large-scale studies, such as microarray and next-generation sequencing technologies, in the context of disease. The scientific literature is another rich source of information on gene expression-disease relationships that not only have been captured from large-scale studies but have also been observed in thousands of small-scale studies. Expression information obtained from literature through manual curation can extend expression databases. While many of the existing databases include information from literature, they are limited by the time-consuming nature of manual curation and have difficulty keeping up with the explosion of publications in the biomedical field. In this work, we describe an automated text-mining tool, Disease-Expression Relation Extraction from Text (DEXTER) to extract information from literature on gene and microRNA expression in the context of disease. One of the motivations in developing DEXTER was to extend the BioXpress database, a cancer-focused gene expression database that includes data derived from large-scale experiments and manual curation of publications. The literature-based portion of BioXpress lags behind significantly compared to expression information obtained from large-scale studies and can benefit from our text-mined results. We have conducted two different evaluations to measure the accuracy of our text-mining tool and achieved average F-scores of 88.51 and 81.81% for the two evaluations, respectively. Also, to demonstrate the ability to extract rich expression information in different disease-related scenarios, we used DEXTER to extract information on differential expression information for 2024 genes in lung cancer, 115 glycosyltransferases in 62 cancers and 826 microRNA in 171 cancers. All extractions using DEXTER are integrated in the literature-based portion of BioXpress.Database URL: http://biotm.cis.udel.edu/DEXTER.",2018-01-01 +27663502,LD Hub: a centralized database and web interface to perform LD score regression that maximizes the potential of summary level GWAS data for SNP heritability and genetic correlation analysis.,"

Motivation

LD score regression is a reliable and efficient method of using genome-wide association study (GWAS) summary-level results data to estimate the SNP heritability of complex traits and diseases, partition this heritability into functional categories, and estimate the genetic correlation between different phenotypes. Because the method relies on summary level results data, LD score regression is computationally tractable even for very large sample sizes. However, publicly available GWAS summary-level data are typically stored in different databases and have different formats, making it difficult to apply LD score regression to estimate genetic correlations across many different traits simultaneously.

Results

In this manuscript, we describe LD Hub - a centralized database of summary-level GWAS results for 173 diseases/traits from different publicly available resources/consortia and a web interface that automates the LD score regression analysis pipeline. To demonstrate functionality and validate our software, we replicated previously reported LD score regression analyses of 49 traits/diseases using LD Hub; and estimated SNP heritability and the genetic correlation across the different phenotypes. We also present new results obtained by uploading a recent atopic dermatitis GWAS meta-analysis to examine the genetic correlation between the condition and other potentially related traits. In response to the growing availability of publicly accessible GWAS summary-level results data, our database and the accompanying web interface will ensure maximal uptake of the LD score regression methodology, provide a useful database for the public dissemination of GWAS results, and provide a method for easily screening hundreds of traits for overlapping genetic aetiologies.

Availability and implementation

The web interface and instructions for using LD Hub are available at http://ldsc.broadinstitute.org/ CONTACT: jie.zheng@bristol.ac.ukSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-22 +23600727,Identification and biosynthesis of acylphloroglucinols in Hypericum gentianoides.,"Species of the genus Hypericum contain a rich array of unusual polyketides, however, only a small proportion of the over 450 Hypericum species, other than the popular medicinal supplement St. John's Wort (Hypericum perforatum), have even been chemically characterized. Hypericum gentianoides, a small annual used medicinally by Cherokee Americans, contains bioactive acylphloroglucinols. Here, we identify acylphloroglucinol constituents of H. gentianoides and determine a potential pathway to their synthesis. Liquid chromatography/electrospray ionization-mass spectrometry (LC/ESI-MS) and HPLC-UV indicate that the level of accumulation and profile of acylphloroglucinols in H. gentianoides vary little seasonally when grown in a greenhouse, but do vary with development and are highly dependent on the accession, highlighting the importance of the selection of plant material for study. We identify the chemical structures of the nine prevalent polyketides, based on LC/ESI-MS and hybrid quadrupole orthogonal time-of-flight (Q-TOF) mass spectrometry; these metabolites include one monomeric phlorisobutyrophenone (PIB) derivative and eight dimeric acylphloroglucinols. Q-TOF spectrometry was used to identify eight additional PIB derivatives that were not detected by LC/ESI-MS. These data lead us to propose that diacylphloroglucinols are synthesized via modification of PIB to yield diverse phloroglucinol and filicinic acids moieties, followed by dimerization of a phloroglucinol and a filicinic acid monomer to yield the observed complement of diacylphloroglucinols. The metabolomics data from H. gentianoides are accessible in plant metabolomics resource (PMR) (http://www.metnetdb.org/pmr), a public metabolomics database with analysis software for plants and microbial organisms.",2013-05-24 +31760132,Effect of omalizumab on lung function and eosinophil levels in adolescents with moderate-to-severe allergic asthma.,"

Background

Omalizumab improves clinical outcomes in patients with asthma. Several studies have shown lung function improvements with omalizumab; however, this has not been examined exclusively in adolescents.

Objective

To assess the effect of omalizumab on lung function and eosinophil counts in adolescents with uncontrolled moderate-to-severe allergic asthma.

Methods

In this post hoc analysis, data from adolescents aged 12 to 17 years from 8 randomized trials of omalizumab were pooled (studies 008, 009, and 011, and SOLAR, INNOVATE, ALTO, ETOPA, and EXTRA). Changes from baseline to end of study in forced expiratory volume in 1 second (FEV1), percent predicted FEV1 (ppFEV1), forced vital capacity (FVC), and blood eosinophil counts were assessed by fitting an analysis of covariance model and calculating least squares mean (LSM) difference for omalizumab vs placebo.

Results

A total of 340 adolescents were identified (omalizumab, n = 203 [59.7%]; placebo, n = 137 [40.3%]). Omalizumab increased all baseline lung function variables more than placebo by end of study: LSM treatment differences (95% confidence interval) were 3.0% (0.2%-5.7%; P = .035), 120.9 mL (30.6-211.2 mL; P = .009), and 101.5 mL (8.3-194.6 mL; P = .033) for ppFEV1, absolute FEV1, and FVC, respectively. The LSM difference demonstrated a greater reduction in eosinophil counts for omalizumab vs placebo: -85.9 cells/μL (-137.1 to -34.6 cells/μL; P = .001).

Conclusion

Omalizumab was associated with lung function improvements and circulating eosinophil counts reductions in adolescents with moderate-to-severe uncontrolled asthma. Findings emphasize the effect of omalizumab in young patients and the need to optimize treatment early in the disease course. https://clinicaltrials.gov/: NCT00314574, NCT00046748, NCT00401596.",2019-11-22 +30846812,Development of species specific putative miRNA and its target prediction tool in wheat (Triticum aestivum L.).,"MicroRNA are 20-24 nt, non-coding, single stranded molecule regulating traits and stress response. Tissue and time specific expression limits its detection, thus is major challenge in their discovery. Wheat has limited 119 miRNAs in MiRBase due to limitation of conservation based methodology where old and new miRNA genes gets excluded. This is due to origin of hexaploid wheat by three successive hybridization, older AA, BB and younger DD subgenome. Species specific miRNA prediction (SMIRP concept) based on 152 thermodynamic features of training dataset using support vector machine learning approach has improved prediction accuracy to 97.7%. This has been implemented in TamiRPred ( http://webtom.cabgrid.res.in/tamirpred ). We also report highest number of putative miRNA genes (4464) of wheat from whole genome sequence populated in database developed in PHP and MySQL. TamiRPred has predicted 2092 (>45.10%) additional miRNA which was not predicted by miRLocator. Predicted miRNAs have been validated by miRBase, small RNA libraries, secondary structure, degradome dataset, star miRNA and binding sites in wheat coding region. This tool can accelerate miRNA polymorphism discovery to be used in wheat trait improvement. Since it predicts chromosome-wise miRNA genes with their respective physical location thus can be transferred using linked SSR markers. This prediction approach can be used as model even in other polyploid crops.",2019-03-07 +30851277,EvoDesign: Designing Protein-Protein Binding Interactions Using Evolutionary Interface Profiles in Conjunction with an Optimized Physical Energy Function.,"EvoDesign (https://zhanglab.ccmb.med.umich.edu/EvoDesign) is an online server system for protein design. The method uses evolutionary profiles to guide the sequence search simulation and demonstrated significant advantages over physics-based approaches in terms of more accurately designing proteins that adopt desired target folds. Despite the success, the previous EvoDesign program focused only on monomer protein design, which limited its ability and usefulness in terms of designing functional proteins. In this work, we propose a new EvoDesign server, which extends the principles of evolution-based design to design protein-protein interactions. Starting from a two-chain complex structure, structurally similar interfaces are identified from known protein-protein interaction databases. An interface evolutionary profile is then constructed from a multiple sequence alignment of the interface analogies, which is combined with a newly developed, atomic-level physical energy function to guide the replica-exchange Monte Carlo simulation search. The purpose of the server is to redesign the specified complex chain to increase its stability and binding affinity for the other chain in the complex. With the improved scope and accuracy of the methodology, the new EvoDesign pipeline should become a useful online tool for functional protein design and drug discovery studies.",2019-03-07 +30844149,PubChem and ChEMBL beyond Lipinski.,"Seven million of the currently 94 million entries in the PubChem database break at least one of the four Lipinski constraints for oral bioavailability, 183,185 of which are also found in the ChEMBL database. These non-Lipinski PubChem (NLP) and ChEMBL (NLC) subsets are interesting because they contain new modalities that can display biological properties not accessible to small molecule drugs. Unfortunately, the current search tools in PubChem and ChEMBL are designed for small molecules and are not well suited to explore these subsets, which therefore remain poorly appreciated. Herein we report MXFP (macromolecule extended atom-pair fingerprint), a 217-D fingerprint tailored to analyze large molecules in terms of molecular shape and pharmacophores. We implement MXFP in two web-based applications, the first one to visualize NLP and NLC interactively using Faerun (http://faerun.gdb.tools/), the second one to perform MXFP nearest neighbor searches in NLP and NLC (http://similaritysearch.gdb.tools/). We show that these tools provide a meaningful insight into the diversity of large molecules in NLP and NLC. The interactive tools presented here are publicly available at http://gdb.unibe.ch and can be used freely to explore and better understand the diversity of non-Lipinski molecules in PubChem and ChEMBL.",2019-03-07 +28475710,An efficient graph kernel method for non-coding RNA functional prediction.,"

Motivation

The importance of RNA protein-coding gene regulation is by now well appreciated. Non-coding RNAs (ncRNAs) are known to regulate gene expression at practically every stage, ranging from chromatin packaging to mRNA translation. However the functional characterization of specific instances remains a challenging task in genome scale settings. For this reason, automatic annotation approaches are of interest. Existing computational methods are either efficient but non-accurate or they offer increased precision, but present scalability problems.

Results

In this article, we present a predictive system based on kernel methods, a type of machine learning algorithm grounded in statistical learning theory. We employ a flexible graph encoding to preserve multiple structural hypotheses and exploit recent advances in representation and model induction to scale to large data volumes. Experimental results on tens of thousands of ncRNA sequences available from the Rfam database indicate that we can not only improve upon state-of-the-art predictors, but also achieve speedups of several orders of magnitude.

Availability and implementation

The code is available from http://www.bioinf.uni-freiburg.de/~costa/EDeN.tgz .

Contact

f.costa@exeter.ac.uk.",2017-09-01 +29191519,Opisthorchis felineus infection prevalence in Western Siberia: A review of Russian literature.,"In this study we reviewed Russian scientific literature (scientific publications, book chapters, monographs) published between 1 January 1979 and 31 August 2015 from two sources: Main database of the Russian Scientific Electronic Library (eLIBRARY, http://elibrary.ru/), and the Scientific Medical Library of Siberian State Medical University (http://medlib.tomsk.ru/). Specifically, the review details the infection prevalence of Opisthorchis felineus (O. felineus) in Western Siberia, Russian Federation. From the primary key words screening, 1591 records were identified from which 32 Russian-language publications were relevant. The lowest O. felineus infection rate of 0.4% was reported in Tatarstan Republic, and the highest reached 83.9% in the Khanty-Mansiysk Autonomous Okrug. The infection prevalence was lower in children than in adults and increased with age. O. felineus infection was detected more often in indigenous population than in migrants. Infection intensity in western regions (Permskaya, Bryanskaya Oblast) was low and varied from 15 to 336 eggs per gram stool (epg), while in endemic regions it reached more than 2000 epg. In some settlements the mean intensity infection was 5234 epg. The high rates of intensity were registered in regions with a high prevalence of infection. Based on obtained data, a map of O. felineus infection prevalence in Western Siberia was developed. After mapping the results, the highest prevalence was detected in Tyumenskaya Oblast with over 60%, while the Tomskaya Oblast had the lowest prevalence at fewer than 19.0%. Khanty-Mansiysk Autonomus Okrug, Altaiskii Krai, Novosibirskaya Oblast and Omskaya Oblast had an average level of O. felineus infection of 20-39%. According to the results of the review, Western Siberia must be considered as highly endemic region for opisthorchiasis in the Russian Federation. The development of a control program specific for the Russian community is warranted.",2017-11-27 +30465642,PANINI: Pangenome Neighbour Identification for Bacterial Populations. ,"The standard workhorse for genomic analysis of the evolution of bacterial populations is phylogenetic modelling of mutations in the core genome. However, a notable amount of information about evolutionary and transmission processes in diverse populations can be lost unless the accessory genome is also taken into consideration. Here, we introduce panini (Pangenome Neighbour Identification for Bacterial Populations), a computationally scalable method for identifying the neighbours for each isolate in a data set using unsupervised machine learning with stochastic neighbour embedding based on the t-SNE (t-distributed stochastic neighbour embedding) algorithm. panini is browser-based and integrates with the Microreact platform for rapid online visualization and exploration of both core and accessory genome evolutionary signals, together with relevant epidemiological, geographical, temporal and other metadata. Several case studies with single- and multi-clone pneumococcal populations are presented to demonstrate the ability to identify biologically important signals from gene content data. panini is available at http://panini.pathogen.watch and code at http://gitlab.com/cgps/panini.",2018-11-22 +31528534,"A cross-sectional description of open access publication costs, policies and impact in emergency medicine and critical care journals.","

Introduction

Finding journal open access information alongside its global impact requires access to multiple databases. We describe a single, searchable database of all emergency medicine and critical care journals that include their open access policies, publication costs, and impact metrics.

Methods

A list of emergency medicine and critical care journals (including citation metrics) was created using Scopus (Citescore) and the Web of Science (Impact Factor). Cost of gold/hybrid open access and article process charges (open access fees) were collected from journal websites. Self-archiving policies were collected from the Sherpa/RoMEO database. Relative cost of access in different regions were calculated using the World Bank Purchasing Power Parity index for authors from the United States, Germany, Turkey, China, Brazil, South Africa and Australia.

Results

We identified 78 emergency medicine and 82 critical care journals. Median Citescore for emergency medicine was 0.73 (interquartile range, IQR 0.32-1.27). Median impact factor was 1.68 (IQR 1.00-2.39). Median Citescore for critical care was 0.95 (IQR 0.25-2.06). Median impact factor was 2.18 (IQR 1.73-3.50). Mean article process charge for emergency medicine was $2243.04, SD = $1136.16 and for critical care $2201.64, SD = $1174.38. Article process charges were 2.24, 1.75, 2.28 and 1.56 times more expensive for South African, Chinese, Turkish and Brazilian authors respectively than United States authors, but neutral for German and Australian authors (1.02 and 0.81 respectively). The database can be accessed here: http://www.emct.info/publication-search.html.

Conclusions

We present a single database that captures emergency medicine and critical care journal impact rankings alongside its respective open access cost and green open access policies.",2019-03-06 +23613709,Text mining effectively scores and ranks the literature for improving chemical-gene-disease curation at the comparative toxicogenomics database.,"The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is a public resource that curates interactions between environmental chemicals and gene products, and their relationships to diseases, as a means of understanding the effects of environmental chemicals on human health. CTD provides a triad of core information in the form of chemical-gene, chemical-disease, and gene-disease interactions that are manually curated from scientific articles. To increase the efficiency, productivity, and data coverage of manual curation, we have leveraged text mining to help rank and prioritize the triaged literature. Here, we describe our text-mining process that computes and assigns each article a document relevancy score (DRS), wherein a high DRS suggests that an article is more likely to be relevant for curation at CTD. We evaluated our process by first text mining a corpus of 14,904 articles triaged for seven heavy metals (cadmium, cobalt, copper, lead, manganese, mercury, and nickel). Based upon initial analysis, a representative subset corpus of 3,583 articles was then selected from the 14,094 articles and sent to five CTD biocurators for review. The resulting curation of these 3,583 articles was analyzed for a variety of parameters, including article relevancy, novel data content, interaction yield rate, mean average precision, and biological and toxicological interpretability. We show that for all measured parameters, the DRS is an effective indicator for scoring and improving the ranking of literature for the curation of chemical-gene-disease information at CTD. Here, we demonstrate how fully incorporating text mining-based DRS scoring into our curation pipeline enhances manual curation by prioritizing more relevant articles, thereby increasing data content, productivity, and efficiency.",2013-04-17 +32086151,Management of epileptic seizures in school-age children: Educational project dedicated to school staff.,"

Objective

The objective of this study was to educate the school staff for a correct management of epileptic seizures in order to increase the safety of young people at school and promoting the administration of rescue drugs and in order to improve care and reduce improper calls to the health emergency number.

Methods

This project started in January 2016, and it is still ongoing at the Department of Neuroscience of Bambino Gesù Children's Hospital in Rome, Italy. There has been a data cut-off evaluation in November 2018. Two-hour training meetings with the school staff have been organized. The major topics of the training activities were as follows: report what epilepsy is, how to manage students with epileptic seizures, and how to administer rescue medications. During the meetings, the following two questionnaires were administered: one pretest in order to collect personal information and information on awareness of epilepsy, willingness to administer rescue medications, and anxiety in facing a seizure; and one posttest in order to check the knowledge acquired after the training sessions. Statistical analysis was performed using R version 3.2.3 (R Foundation for Statistical Computing, http://www.R-project.org/). Demographics (sex and age) and teaching experience were summarized with descriptive statistics for each variable. Demographics, teaching experience, awareness of disability, and knowledge of epilepsy were correlated to the management of seizures occurring in the classroom before the course; results are reported as odds ratios [OR] and 95% confidence interval (95 CI).

Results

Nine hundred school staff members (95% school staff and 5% social workers) entered in the project between January 2016 and November 2018. Seven hundred and forty (82%) returned the questionnaires fulfilled, and not all of them were completely filled. Ninety-eight percent of school staff (676/691) were aware about epilepsy; however, only in 16% (110) the awareness of epilepsy came from medical staff, scientific brochures, or participation in conventions. Thirty-five percent of school staff (248/707) believed that epilepsy reduces learning abilities, and 58% (409/703) believed that children with epilepsy need school support. After the training, 68% of school staff (496/734) correctly filled in the questionnaire related to the management of acute seizures versus 8% of them (57/718) in the prequestionnaire. After the training, 89% of school staff (601/675) were ready to administer rescue medications versus 54% (384/712) before the training. The majority of participants reported that the level of anxiety related to the management of seizures after the training significantly reduced.

Conclusions

Results of this project documented an increase in knowledge of epilepsy, a better knowledge on management of acute seizures in the school settings, a reduction in anxiety, and an increase in willingness to administer rescue medications. Further studies should be planned in order to document the changes in the real-world management of seizures, to evaluate if a reduction in hospital admittances might be reached, and to extend the project by assessing, through a questionnaire, the stigma and prejudices against the children affected by epilepsy by their classmates.",2020-02-18 +25931458,NeuroPep: a comprehensive resource of neuropeptides.,"Neuropeptides play a variety of roles in many physiological processes and serve as potential therapeutic targets for the treatment of some nervous-system disorders. In recent years, there has been a tremendous increase in the number of identified neuropeptides. Therefore, we have developed NeuroPep, a comprehensive resource of neuropeptides, which holds 5949 non-redundant neuropeptide entries originating from 493 organisms belonging to 65 neuropeptide families. In NeuroPep, the number of neuropeptides in invertebrates and vertebrates is 3455 and 2406, respectively. It is currently the most complete neuropeptide database. We extracted entries deposited in UniProt, the database (www.neuropeptides.nl) and NeuroPedia, and used text mining methods to retrieve entries from the MEDLINE abstracts and full text articles. All the entries in NeuroPep have been manually checked. 2069 of the 5949 (35%) neuropeptide sequences were collected from the scientific literature. Moreover, NeuroPep contains detailed annotations for each entry, including source organisms, tissue specificity, families, names, post-translational modifications, 3D structures (if available) and literature references. Information derived from these peptide sequences such as amino acid compositions, isoelectric points, molecular weight and other physicochemical properties of peptides are also provided. A quick search feature allows users to search the database with keywords such as sequence, name, family, etc., and an advanced search page helps users to combine queries with logical operators like AND/OR. In addition, user-friendly web tools like browsing, sequence alignment and mapping are also integrated into the NeuroPep database. Database URL: http://isyslab.info/NeuroPep",2015-04-29 +30296561,Probabilistic TFCE: A generalized combination of cluster size and voxel intensity to increase statistical power.,"The threshold-free cluster enhancement (TFCE) approach integrates cluster information into voxel-wise statistical inference to enhance detectability of neuroimaging signal. Despite the significantly increased sensitivity, the application of TFCE is limited by several factors: (i) generalisation to data structures, like brain network connectivity data is not trivial, (ii) TFCE values are in an arbitrary unit, therefore, P-values can only be obtained by a computationally demanding permutation-test. Here, we introduce a probabilistic approach for TFCE (pTFCE), that gives a simple general framework for topology-based belief boosting. The core of pTFCE is a conditional probability, calculated based on Bayes' rule, from the probability of voxel intensity and the threshold-wise likelihood function of the measured cluster size. In this paper, we provide an estimation of these distributions based on Gaussian Random Field theory. The conditional probabilities are then aggregated across cluster-forming thresholds by a novel incremental aggregation method. pTFCE is validated on simulated and real fMRI data. The results suggest that pTFCE is more robust to various ground truth shapes and provides a stricter control over cluster ""leaking"" than TFCE and, in many realistic cases, further improves its sensitivity. Correction for multiple comparisons can be trivially performed on the enhanced P-values, without the need for permutation testing, thus pTFCE is well-suitable for the improvement of statistical inference in any neuroimaging workflow. Implementation of pTFCE is available at https://spisakt.github.io/pTFCE.",2018-10-06 +27069559,MD-CTS: An integrated terminology reference of clinical and translational medicine.,"New vocabularies are rapidly evolving in the literature relative to the practice of clinical medicine and translational research. To provide integrated access to new terms, we developed a mobile and desktop online reference-Marshfield Dictionary of Clinical and Translational Science (MD-CTS). It is the first public resource that comprehensively integrates Wiktionary (word definition), BioPortal (ontology), Wiki (image reference), and Medline abstract (word usage) information. MD-CTS is accessible at http://spellchecker.mfldclin.edu/. The website provides a broadened capacity for the wider clinical and translational science community to keep pace with newly emerging scientific vocabulary. An initial evaluation using 63 randomly selected biomedical words suggests that online references generally provided better coverage (73%-95%) than paper-based dictionaries (57-71%).",2016-03-02 +28077563,The BioC-BioGRID corpus: full text articles annotated for curation of protein-protein and genetic interactions. ,"A great deal of information on the molecular genetics and biochemistry of model organisms has been reported in the scientific literature. However, this data is typically described in free text form and is not readily amenable to computational analyses. To this end, the BioGRID database systematically curates the biomedical literature for genetic and protein interaction data. This data is provided in a standardized computationally tractable format and includes structured annotation of experimental evidence. BioGRID curation necessarily involves substantial human effort by expert curators who must read each publication to extract the relevant information. Computational text-mining methods offer the potential to augment and accelerate manual curation. To facilitate the development of practical text-mining strategies, a new challenge was organized in BioCreative V for the BioC task, the collaborative Biocurator Assistant Task. This was a non-competitive, cooperative task in which the participants worked together to build BioC-compatible modules into an integrated pipeline to assist BioGRID curators. As an integral part of this task, a test collection of full text articles was developed that contained both biological entity annotations (gene/protein and organism/species) and molecular interaction annotations (protein-protein and genetic interactions (PPIs and GIs)). This collection, which we call the BioC-BioGRID corpus, was annotated by four BioGRID curators over three rounds of annotation and contains 120 full text articles curated in a dataset representing two major model organisms, namely budding yeast and human. The BioC-BioGRID corpus contains annotations for 6409 mentions of genes and their Entrez Gene IDs, 186 mentions of organism names and their NCBI Taxonomy IDs, 1867 mentions of PPIs and 701 annotations of PPI experimental evidence statements, 856 mentions of GIs and 399 annotations of GI evidence statements. The purpose, characteristics and possible future uses of the BioC-BioGRID corpus are detailed in this report.Database URL: http://bioc.sourceforge.net/BioC-BioGRID.html.",2017-01-10 +27752469,Genome sequences of six Phytophthora species threatening forest ecosystems.,"The Phytophthora genus comprises of some of the most destructive plant pathogens and attack a wide range of hosts including economically valuable tree species, both angiosperm and gymnosperm. Many known species of Phytophthora are invasive and have been introduced through nursery and agricultural trade. As part of a larger project aimed at utilizing genomic data for forest disease diagnostics, pathogen detection and monitoring (The TAIGA project: Tree Aggressors Identification using Genomic Approaches; http://taigaforesthealth.com/), we sequenced the genomes of six important Phytophthora species that are important invasive pathogens of trees and a serious threat to the international trade of forest products. This genomic data was used to develop highly sensitive and specific detection assays and for genome comparisons and to make evolutionary inferences and will be useful to the broader plant and tree health community. These WGS data have been deposited in the International Nucleotide Sequence Database Collaboration (DDBJ/ENA/GenBank) under the accession numbers AUPN01000000, AUVH01000000, AUWJ02000000, AUUF02000000, AWVV02000000 and AWVW02000000.",2016-10-03 +31162981,Ambient Temperature and Markers of Fetal Growth: A Retrospective Observational Study of 29 Million U.S. Singleton Births.,"

Background

Emerging studies suggest that ambient temperature during pregnancy may be associated with fetal growth, but the existing evidence is limited and inconsistent.

Objectives

We aimed to evaluate the association of trimester-specific temperature with risk of being born small for gestational age (SGA) and birth weight-markers of fetal growth-among term births in the contiguous United States.

Methods

We included data on 29,597,735 live singleton births between 1989 and 2002 across 403 U.S. counties. We estimated daily county-level population-weighted mean temperature using a spatially refined gridded climate data set. We used logistic regression to estimate the association between trimester-specific temperature and risk of SGA and linear regression to evaluate the association between trimester-specific temperature and term birth weight z-score, adjusting for parity, maternal demographics, smoking or drinking during pregnancy, chronic hypertension, and year and month of conception. We then pooled results overall and by geographic regions and climate zones.

Results

High ambient temperatures ([Formula: see text] percentile) during the entire pregnancy were associated with higher risk of term SGA {odds ratio [OR] [Formula: see text] 1.041 [95% confidence interval (CI): 1.029, 1.054]} and lower term birth weight [standardized to [Formula: see text] (95% CI: [Formula: see text], [Formula: see text]) reduction in birth weight for infants born at 40 weeks of gestation]. Low temperatures ([Formula: see text] percentile) during the entire pregnancy were not associated with SGA [OR [Formula: see text] 1.003 (95% CI: 0.991, 1.015)] but were associated with a small decrement in term birth weight [standardized to [Formula: see text] (95% CI: [Formula: see text], [Formula: see text])]. Risks of term SGA and birth weight were more strongly associated with temperature averaged across the second and third trimesters, in areas the Northeast, and in areas with cold or very cold climates.

Conclusions

Above-average temperatures during pregnancy were associated with lower fetal growth. Our findings provide evidence that temperature may be a novel risk factor for reduced fetal growth. https://doi.org/10.1289/EHP4648.",2019-06-04 +28602763,Combined versus single application of tranexamic acid in total knee and hip arthroplasty: A meta-analysis of randomized controlled trials.,"

Objective

To compare the efficacy and safety of the combined application of both intravenous and topical tranexamic acid versus the single use of either application in patients with total knee and hip arthroplasty.

Methods

Potentially relevant studies were identified from electronic databases including Medline, PubMed, Embase, ScienceDirect and the Cochrane Library. Patients undergoing primary total knee and hip arthroplasty were included in our studies, with an experimental group that received combined intravenous and topical application of tranexamic acid and a control group that received a single application of tranexamic acid or normal saline. The primary outcomes were total blood loss, hemoglobin decline and transfusion requirements. The secondary outcomes were length of stay, operation time and tranexamic acid-related adverse effects, such as superficial infection, deep vein thrombosis or pulmonary embolism. Modified Jadad scores were used to assess the quality of the included randomized controlled trials (RCTs). The data was pooled using RevMan 5.3. After testing for heterogeneity across studies, the data were aggregated using random-effects modeling when appropriate. We have registered the trial at http://www.researchregistry.com.

Results

Six RCTs that included 704 patients met the inclusion criteria. The present meta-analysis indicated significant differences existed in the total blood loss (MD = -134.65, 95% CI: -191.66 to -77.64, P < 0.0001), postoperative hemoglobin level (MD = 0.74, 95% CI: 0.39 to 1.10, P < 0.0001), drainage volume (MD = -40.19, 95% CI: -55.95 to -24.43, P < 0.00001) and transfusion rate (RD = -0.07, 95% CI: -0.11 to -0.03, P = 0.0004) between groups.

Conclusion

Combined administration of tranexamic acid in total knee and hip arthroplasty was associated with significantly reduced total blood loss, postoperative hemoglobin decline, drainage volume, and transfusion requirements. Based on the limitations of current meta-analysis, well-designed, high-quality RCTs with long-term follow-up are still required.",2017-06-10 +30425123,Gene2vec: gene subsequence embedding for prediction of mammalian N 6-methyladenosine sites from mRNA.,"N 6-Methyladenosine (m6A) refers to methylation modification of the adenosine nucleotide acid at the nitrogen-6 position. Many conventional computational methods for identifying N 6-methyladenosine sites are limited by the small amount of data available. Taking advantage of the thousands of m6A sites detected by high-throughput sequencing, it is now possible to discover the characteristics of m6A sequences using deep learning techniques. To the best of our knowledge, our work is the first attempt to use word embedding and deep neural networks for m6A prediction from mRNA sequences. Using four deep neural networks, we developed a model inferred from a larger sequence shifting window that can predict m6A accurately and robustly. Four prediction schemes were built with various RNA sequence representations and optimized convolutional neural networks. The soft voting results from the four deep networks were shown to outperform all of the state-of-the-art methods. We evaluated these predictors mentioned above on a rigorous independent test data set and proved that our proposed method outperforms the state-of-the-art predictors. The training, independent, and cross-species testing data sets are much larger than in previous studies, which could help to avoid the problem of overfitting. Furthermore, an online prediction web server implementing the four proposed predictors has been built and is available at http://server.malab.cn/Gene2vec/.",2018-11-13 +30458531,Examining Factors Influencing the Viability of Automatic Acoustic Analysis of Child Speech.,"

Purpose

Heterogeneous child speech was force-aligned to investigate whether (a) manipulating specific parameters could improve alignment accuracy and (b) forced alignment could be used to replicate published results on acoustic characteristics of /s/ production by children.

Method

In Part 1, child speech from 2 corpora was force-aligned with a trainable aligner (Prosodylab-Aligner) under different conditions that systematically manipulated input training data and the type of transcription used. Alignment accuracy was determined by comparing hand and automatic alignments as to how often they overlapped (%-Match) and absolute differences in duration and boundary placements. Using mixed-effects regression, accuracy was modeled as a function of alignment conditions, as well as segment and child age. In Part 2, forced alignments derived from a subset of the alignment conditions in Part 1 were used to extract spectral center of gravity of /s/ productions from young children. These findings were compared to published results that used manual alignments of the same data.

Results

Overall, the results of Part 1 demonstrated that using training data more similar to the data to be aligned as well as phonetic transcription led to improvements in alignment accuracy. Speech from older children was aligned more accurately than younger children. In Part 2, /s/ center of gravity extracted from force-aligned segments was found to diverge in the speech of male and female children, replicating the pattern found in previous work using manually aligned segments. This was true even for the least accurate forced alignment method.

Conclusions

Alignment accuracy of child speech can be improved by using more specific training and transcription. However, poor alignment accuracy was not found to impede acoustic analysis of /s/ produced by even very young children. Thus, forced alignment presents a useful tool for the analysis of child speech.

Supplemental material

https://doi.org/10.23641/asha.7070105.",2018-10-01 +30272184,Data and systems for medication-related text classification and concept normalization from Twitter: insights from the Social Media Mining for Health (SMM4H)-2017 shared task.,"

Objective

We executed the Social Media Mining for Health (SMM4H) 2017 shared tasks to enable the community-driven development and large-scale evaluation of automatic text processing methods for the classification and normalization of health-related text from social media. An additional objective was to publicly release manually annotated data.

Materials and methods

We organized 3 independent subtasks: automatic classification of self-reports of 1) adverse drug reactions (ADRs) and 2) medication consumption, from medication-mentioning tweets, and 3) normalization of ADR expressions. Training data consisted of 15 717 annotated tweets for (1), 10 260 for (2), and 6650 ADR phrases and identifiers for (3); and exhibited typical properties of social-media-based health-related texts. Systems were evaluated using 9961, 7513, and 2500 instances for the 3 subtasks, respectively. We evaluated performances of classes of methods and ensembles of system combinations following the shared tasks.

Results

Among 55 system runs, the best system scores for the 3 subtasks were 0.435 (ADR class F1-score) for subtask-1, 0.693 (micro-averaged F1-score over two classes) for subtask-2, and 88.5% (accuracy) for subtask-3. Ensembles of system combinations obtained best scores of 0.476, 0.702, and 88.7%, outperforming individual systems.

Discussion

Among individual systems, support vector machines and convolutional neural networks showed high performance. Performance gains achieved by ensembles of system combinations suggest that such strategies may be suitable for operational systems relying on difficult text classification tasks (eg, subtask-1).

Conclusions

Data imbalance and lack of context remain challenges for natural language processing of social media text. Annotated data from the shared task have been made available as reference standards for future studies (http://dx.doi.org/10.17632/rxwfb3tysd.1).",2018-10-01 +,"CartograTree: connecting tree genomes, phenotypes and environment","Today, researchers spend a tremendous amount of time gathering, formatting, filtering and visualizing data collected from disparate sources. Under the umbrella of forest tree biology, we seek to provide a platform and leverage modern technologies to connect biotic and abiotic data. Our goal is to provide an integrated web‐based workspace that connects environmental, genomic and phenotypic data via geo‐referenced coordinates. Here, we connect the genomic query web‐based workspace, DiversiTree and a novel geographical interface called CartograTree to data housed on the TreeGenes database. To accomplish this goal, we implemented Simple Semantic Web Architecture and Protocol to enable the primary genomics database, TreeGenes, to communicate with semantic web services regardless of platform or back‐end technologies. The novelty of CartograTree lies in the interactive workspace that allows for geographical visualization and engagement of high performance computing (HPC) resources. The application provides a unique tool set to facilitate research on the ecology, physiology and evolution of forest tree species. CartograTree can be accessed at: http://dendrome.ucdavis.edu/cartogratree.",2013-05-01 +28018331,Construction of a Pan-Genome Allele Database of Salmonella enterica Serovar Enteritidis for Molecular Subtyping and Disease Cluster Identification.,"We built a pan-genome allele database with 395 genomes of Salmonella enterica serovar Enteritidis and developed computer tools for analysis of whole genome sequencing (WGS) data of bacterial isolates for disease cluster identification. A web server (http://wgmlst.imst.nsysu.edu.tw) was set up with the database and the tools, allowing users to upload WGS data to generate whole genome multilocus sequence typing (wgMLST) profiles and to perform cluster analysis of wgMLST profiles. The usefulness of the database in disease cluster identification was demonstrated by analyzing a panel of genomes from 55 epidemiologically well-defined S. Enteritidis isolates provided by the Minnesota Department of Health. The wgMLST-based cluster analysis revealed distinct clades that were concordant with the epidemiologically defined outbreaks. Thus, using a common pan-genome allele database, wgMLST can be a promising WGS-based subtyping approach for disease surveillance and outbreak investigation across laboratories.",2016-12-15 +31568781,"Nalbuphine, a kappa opioid receptor agonist and mu opioid receptor antagonist attenuates pruritus, decreases IL-31, and increases IL-10 in mice with contact dermatitis.","Chronic itch is one of the disturbing symptoms of inflammatory skin diseases. Kappa opioid receptor agonists are effective in suppressing scratching in mice against different pruritogens. Nalbuphine, a nonscheduled kappa opioid receptor agonist and mu opioid receptor antagonist, has been in clinical use for post-operative pain management since the 1980s and recently has been in clinical trials for chronic itch of prurigo nodularis (https://www.trevitherapeutics.com/nalbuphine). We studied whether nalbuphine is effective against chronic scratching induced by rostral neck application of 1-fluoro-2,4-dinitrobenzene (DNFB), an accepted mouse model of contact dermatitis to study pruritoceptive itch. Mice were treated once a week with either saline or nalbuphine 20 min before the third, fifth, seventh, and ninth sensitizations with DNFB and the number of scratching bouts was counted for 30 min. Skin samples from the neck of mice at week 4 were used to measure protein levels and mRNA expressions of chemokines and cytokines. Different sets of mice were used to study sedation and anhedonic-like behavior of nalbuphine. We found that: nalbuphine (a) antagonized scratching in a dose- and time-dependent manner without affecting locomotion, b) decreased IL-31, and increased anti-inflammatory IL-10, and c) induced more elevations in the levels of CCL2, CCL3, CCL12, CXCL1, CXCL2, CXCL9, CXCL10, IL-1β, IL-16, TIMP-1, M-CSF, TREM-1 and M1-type macrophages compared to saline. Increases in chemokines and cytokines and M1 macrophages by nalbuphine suggest an inflammatory phase of healing in damaged skin due to scratching. Our data indicate that nalbuphine is an effective antipruritic in murine model of pruritoceptive itch.",2019-09-27 +31720321,WiseNET: An indoor multi-camera multi-space dataset with contextual information and annotations for people detection and tracking.,"Nowadays, camera networks are part of our every-day life environments, consequently, they represent a massive source of information for monitoring human activities and to propose new services to the building users. To perform human activity monitoring, people must be detected and the analysis has to be done according to the information relative to the environment and the context. Available multi-camera datasets furnish videos with few (or none) information of the environment where the network was deployed. The proposed dataset provides multi-camera multi-space video sets along with the complete contextual information of the environment. The dataset regroups 11 video sets (composed of 62 single videos) recorded using 6 indoor cameras deployed on multiple spaces. The video sets represent more than 1 h of video footage, include 77 people tracks and captured different human actions such as walking around, standing/sitting, motionless, entering/leaving a space and group merging/splitting. Moreover, each video has been manually and automatically annotated to include people detection and tracking meta-information. The automatic people detection annotations were obtained by using different complexity and robustness detectors, from machine learning to state-of-art deep Convolutional Neural Network (CNN) models. Concerning the contextual information, the Industry Foundation Classes (IFC) file that represents the environment's Building Information Modeling (BIM) data is also provided. The BIM/IFC file describes the complete structure of the environment, it's topology and the elements contained in it. To our knowledge, the WiseNET dataset is the first to provide a set of videos along with the complete information of the environment. The WiseNET dataset is publicly available at https://doi.org/10.4121/uuid:c1fb5962-e939-4c51-bfd5-eac6f2935d44, as well as at the project's website http://wisenet.checksem.fr/#/dataset.",2019-10-16 +27980519,ContaMiner and ContaBase: a webserver and database for early identification of unwantedly crystallized protein contaminants.,"Solving the phase problem in protein X-ray crystallography relies heavily on the identity of the crystallized protein, especially when molecular replacement (MR) methods are used. Yet, it is not uncommon that a contaminant crystallizes instead of the protein of interest. Such contaminants may be proteins from the expression host organism, protein fusion tags or proteins added during the purification steps. Many contaminants co-purify easily, crystallize and give good diffraction data. Identification of contaminant crystals may take time, since the presence of the contaminant is unexpected and its identity unknown. A webserver (ContaMiner) and a contaminant database (ContaBase) have been established, to allow fast MR-based screening of crystallographic data against currently 62 known contaminants. The web-based ContaMiner (available at http://strube.cbrc.kaust.edu.sa/contaminer/) currently produces results in 5 min to 4 h. The program is also available in a github repository and can be installed locally. ContaMiner enables screening of novel crystals at synchrotron beamlines, and it would be valuable as a routine safety check for 'crystallization and preliminary X-ray analysis' publications. Thus, in addition to potentially saving X-ray crystallographers much time and effort, ContaMiner might considerably lower the risk of publishing erroneous data.",2016-11-02 +28430858,Mapping genes for calcium signaling and their associated human genetic disorders.,"

Motivation

Signal transduction via calcium ions (Ca2+) represents a fundamental signaling pathway in all eukaryotic cells. A large portion of the human genome encodes proteins used to assemble signaling systems that can transduce signals with diverse spatial and temporal dynamics.

Results

Here, we provide a map of all of the genes involved in Ca2+ signaling and link these genes to human genetic disorders. Using Gene Ontology terms and genome databases, 1805 genes were identified as regulators or targets of intracellular Ca2+ signals. Associating these 1805 genes with human genetic disorders uncovered 1470 diseases with mutated 'Ca2+ genes'. A network with scale-free properties appeared when the Ca2+ genes were mapped to their associated genetic disorders.

Availability and implementation

The Ca2+ genome database is freely available at http://cagedb.uhlenlab.org and will foster studies of gene functions and genetic disorders associated with Ca2+ signaling.

Contact

per.uhlen@ki.se.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +,Turning the page,"This year will see some fundamental changes at the Annals – changes that are aimed at both enhancing and maintaining its appeal across the broad range of surgical specialties. In this respect, there have already been some important refinements to the structure of the Editorial board. The Annals will now have representatives on its panel from the diapason of subspecialties. It is hoped that this will ensure an equitable apportionment of high-quality original research, review articles and case reports from across the surgical spectrum. It represents the single most tangible adjustment to the structure of the Annals in a generation and one of which I am sure Sir Cecil Wakeley would have approved. I would like to take this opportunity to formally welcome our new board members and invite them to join with our other long-term stalwarts into what is a uniquely collegiate editorial team. As many of our fellows and members will already be aware, there has been a significant shift made over the last few months in the handling of research contributions to the Annals. In recent weeks we have completed the transition to our new submissions portal and it is encouraging that reviewers and authors alike have commended it in equal measure. While we are sadly not in a position to accept much of the material submitted to the journal (we currently accept only one tenth of all the articles subjected to peer review) we can at least aim to improve and enhance the experience for all those involved. In many ways this digital migration is a precursor to a number of innovations that will fundamentally transform the way in which we produce the Annals, the most significant of which is the launch of our new digital platform this month. These innovations signal a gradual move away from the printed version as the principal conduit by which the Annals is distributed. Inevitably, there will be those who will lament the passing of this hitherto more familiar and tactile media and so measures are in hand to allow for a more limited production of a paper version of the Annals for RCS fellows and members who continue to elect to receive their Annals in the traditional format. Medical colleges around the world are currently undergoing similar deliberations and for some a digital version may represent the only opportunity to maintain editorial independence – unhindered by the implications of a commercial publishing partner. It is however hoped that for the vast majority of fellows and members, the new and enhanced digital platform will offer significant advantages such that the digital version becomes the de facto medium of choice. Matt Whitaker and the team at the Annals should be congratulated for their sterling efforts in making this transition. The new site, now live at http://publishing.rcseng.ac.uk, will enhance the experience of finding, accessing, reading, citing, sharing and saving articles from the Annals, Bulletin and FDJ. Sign-on will be much easier; page load times quicker and the search engine more powerful and intuitive. The new platform boasts improved functionality, full in-page article text and multi-media, citation tracking, reference generators and advanced social media integration. We are simultaneously launching a new video library where we will be hosting our technical videos. It will, I am certain, become a huge resource for our surgical fraternity. Our new platform will be followed later this year by the inevitable and ubiquitous app, which will allow readers to download issues of the Annals and read them offline and at leisure on whatever their tablet of choice might be. It is my belief that these and forthcoming changes herald the transformation of the Annals into a truly modern journal with all the digital services that authors and readers now rightly expect from their RCS publication. Tim Lane Editor-in-Chief, rcsannalseditor@gmail.com",2015-07-01 +,"Sibling species in the Chrysis ignita complex: molecular, morphological and trophic differentiation of Baltic species, with a description of two new cryptic species (Hymenoptera: Chrysididae)","Cryptic species complexes cause major challenges for taxonomists and alter understanding of species diversity. In Northern Europe, the Chrysis ignita species group is one such complex with numerous sympatric sibling species. The objective of this paper is to assess the taxonomy of 15 species from this group using three different approaches: molecular, morphological and trophic differentiation. The analysed set of molecular markers included a 7400‐bp‐long sequence of the mitochondrial genome covering complete sequences of CO1, CO2, ATP8, ATP6, CO3, ND3, 16S and 12S rRNA, nine tRNAs and a partial sequence of CytB, as well as a 3880‐bp‐long sequence of the nuclear DNA covering a part of 18S rRNA, the ITS1, 5.8S rRNA, ITS2 and a part of 28S rRNA. Discrete diagnostic characters of each species sequence were retrieved using the Characteristic Attribute Organisation System algorithm and a molecular identification key was compiled. The study revealed a higher evolutionary rate of the genes ATP8, ATP6, CO3, ND3 and CytB compared to that of CO1, CO2 and 16S; the studied nuclear markers demonstrated a lower evolutionary rate than the mitochondrial markers. A consensus tree compiled based on the combined mtDNA and nuclear markers with a strongly supported topology resolved the position of the C. schencki – C. parietis sp.n. clade as sister to the C. ignita – C. impressa clade and supported the monophyly of the C. angustula – C. longula clade. We compiled a morphometric species identification key applying linear discriminant equations. The trophic differentiation was assessed using data on host preferences of ten Chrysis species reared from trap‐nests; the analysis demonstrated that most of them are specialists exploiting a single or a few taxonomically related host species. In most cases, all three approaches supported the distinct status of the included species. Moreover, two previously undescribed species were consistently supported by the molecular methods. Therefore, we describe these as new, namely C. horridula sp.n. and C. parietis sp.n. Only C. mediata and C. solida were not clearly distinguished using the molecular phylogeny reconstruction methods. However, based on distinctive niche divergence, the presence of molecular characters and morphometric differences, we consider them as phylogenetically young but distinct species. In view of the weak morphological and molecular differentiation, the widely overlapping distribution areas and often similar habitat preferences and the trophic specialization, the C. ignita complex presents a possible model for studies of sympatric cryptic speciation. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:1EBAF0E1‐5FB7‐4CF4‐A595‐C11982448360.",2016-10-01 +31850801,"Air Pollution (Particulate Matter) Exposure and Associations with Depression, Anxiety, Bipolar, Psychosis and Suicide Risk: A Systematic Review and Meta-Analysis.","

Background

Particulate air pollution's physical health effects are well known, but associations between particulate matter (PM) exposure and mental illness have not yet been established. However, there is increasing interest in emerging evidence supporting a possible etiological link.

Objectives

This systematic review aims to provide a comprehensive overview and synthesis of the epidemiological literature to date by investigating quantitative associations between PM and multiple adverse mental health outcomes (depression, anxiety, bipolar disorder, psychosis, or suicide).

Methods

We undertook a systematic review and meta-analysis. We searched Medline, PsycINFO, and EMBASE from January 1974 to September 2017 for English-language human observational studies reporting quantitative associations between exposure to PM <1.0μm in aerodynamic diameter (ultrafine particles) and PM <2.5 and <10μm in aerodynamic diameter (PM2.5 and PM10, respectively) and the above psychiatric outcomes. We extracted data, appraised study quality using a published quality assessment tool, summarized methodological approaches, and conducted meta-analyses where appropriate.

Results

Of 1,826 citations identified, 22 met our overall inclusion criteria, and we included 9 in our primary meta-analyses. In our meta-analysis of associations between long-term (>6 months) PM2.5 exposure and depression (n=5 studies), the pooled odds ratio was 1.102 per 10-μg/m3 PM2.5 increase (95% CI: 1.023, 1.189; I2=0.00%). Two of the included studies investigating associations between long-term PM2.5 exposure and anxiety also reported statistically significant positive associations, and we found a statistically significant association between short-term PM10 exposure and suicide in meta-analysis at a 0-2 d cumulative exposure lag.

Discussion

Our findings support the hypothesis of an association between long-term PM2.5 exposure and depression, as well as supporting hypotheses of possible associations between long-term PM2.5 exposure and anxiety and between short-term PM10 exposure and suicide. The limited literature and methodological challenges in this field, including heterogeneous outcome definitions, exposure assessment, and residual confounding, suggest further high-quality studies are warranted to investigate potentially causal associations between air pollution and poor mental health. https://doi.org/10.1289/EHP4595.",2019-12-18 +31391659,"First record of the family Ameronothridae (Acari: Oribatida) from Japan - new species, juvenile morphology, ecology and biogeographic remarks.","The Ameronothridae are recorded for the first time from Japanese coasts with the new species Ameronothrus yoichi sp. n. from Hokkaido. The report of this species represents the most southern occurrence of an Ameronothrus species in the Asian Pacific region. Ameronothrus yoichi sp. n. can be easily distinguished from its congeners by the conspicuously pusticulate body surface and the loss of dorsal companion setae d on all genua in the adult stage. Based on adult and juvenile morphology, a close relation to Ameronothrus maculatus and A. schneideri is suggested. Ameronothrus yoichi sp. n. is classified as a lichenivorous inhabitant of sediment-free rocky coastal substrates. Due to a lack of genetic sequence data of nearly all ameronothrid species a molecular genetic comparison is yet unfeasible, but a Bayesian inference tree based on the 18S rRNA gene shows a paraphyletic clustering of the ameronothrid A. yoichi sp. n. and Paraquanothrus grahami. http://www.zoobank.org/urn:lsid:zoobank.org:pub:5B772E2C-7D5E-4C86-9955-AB84A84C50DA.",2019-06-23 +28230042,Meta-analysis of association between mobile phone use and glioma risk.,"

Objective

The purpose of this study was to evaluate the association between mobile phone use and glioma risk through pooling the published data.

Methods

By searching Medline, EMBSE, and CNKI databases, we screened the open published case-control or cohort studies about mobile phone use and glioma risk by systematic searching strategy. The pooled odds of mobile use in glioma patients versus healthy controls were calculated by meta-analysis method. The statistical analysis was done by Stata12.0 software (http://www.stata.com).

Results

After searching the Medline, EMBSE, and CNKI databases, we ultimately included 11 studies range from 2001 to 2008. For ≥1 year group, the data were pooled by random effects model. The combined data showed that there was no association between mobile phone use and glioma odds ratio (OR) =1.08 (95% confidence interval [CI]: 0.91-1.25,P > 0.05). However, a significant association was found between mobile phone use more than 5 years and glioma risk OR = 1.35 (95% CI: 1.09-1.62, P < 0.05). The publication bias of this study was evaluated by funnel plot and line regression test. The funnel plot and line regression test (t = 0.25,P = 0.81) did not indicate any publication bias.

Conclusion

Long-term mobile phone use may increase the risk of developing glioma according to this meta-analysis.",2016-12-01 +28525607,RNA-MoIP: prediction of RNA secondary structure and local 3D motifs from sequence data.,"RNA structures are hierarchically organized. The secondary structure is articulated around sophisticated local three-dimensional (3D) motifs shaping the full 3D architecture of the molecule. Recent contributions have identified and organized recurrent local 3D motifs, but applications of this knowledge for predictive purposes is still in its infancy. We recently developed a computational framework, named RNA-MoIP, to reconcile RNA secondary structure and local 3D motif information available in databases. In this paper, we introduce a web service using our software for predicting RNA hybrid 2D-3D structures from sequence data only. Optionally, it can be used for (i) local 3D motif prediction or (ii) the refinement of user-defined secondary structures. Importantly, our web server automatically generates a script for the MC-Sym software, which can be immediately used to quickly predict all-atom RNA 3D models. The web server is available at http://rnamoip.cs.mcgill.ca.",2017-07-01 +27644243,"Transcription factor HIF1A: downstream targets, associated pathways, polymorphic hypoxia response element (HRE) sites, and initiative for standardization of reporting in scientific literature.","Hypoxia-inducible factor-1α (HIF-1α) has crucial role in adapting cells to hypoxia through expression regulation of many genes. Identification of HIF-1α target genes (HIF-1α-TGs) is important for understanding the adapting mechanism. The aim of the present study was to collect known HIF-1α-TGs and identify their associated pathways. Targets and associated genomics data were retrieved using PubMed, WoS ( http://apps.webofknowledge.com/ ), HGNC ( http://www.genenames.org/ ), NCBI ( http://www.ncbi.nlm.nih.gov/ ), Ensemblv.84 ( http://www.ensembl.org/index.html ), DAVID Bioinformatics Resources ( https://david.ncifcrf.gov /), and Disease Ontology database ( http://disease-ontology.org/ ). From 51 papers, we collected 98 HIF-1α TGs found to be associated with 20 pathways, including metabolism of carbohydrates and pathways in cancer. Reanalysis of genomic coordinates of published HREs (hypoxia response elements) revealed six polymorphisms within HRE sites (HRE-SNPs): ABCG2, ACE, CA9, and CP. Due to large heterogeneity of results presentation in scientific literature, we also propose a first step towards reporting standardization of HIF-1α-target interactions consisting of ten relevant data types. Suggested minimal checklist for reporting will enable faster development of a complete catalog of HIF-1α-TGs, data sharing, bioinformatics analyses, and setting novel more targeted hypotheses. The proposed format for data standardization is not yet complete but presents a baseline for further optimization of the protocol with additional details, for example, regarding the experimental validation.",2016-09-19 +31853733,Utilization of biologic disease-modifying anti-rheumatic drugs in patients with rheumatoid arthritis and cancer.,"

Introduction

Biologic disease-modifying anti-rheumatic drugs (bDMARDs) interfere with the immune system and could theoretically increase risk of malignancies. However, recent evidence has not substantiated such concerns and physicians are less reluctant in treating patients with underlying cancer with such bDMARDs. We aimed to understand the current utilization patterns of bDMARDs for the treatment of rheumatoid arthritis (RA) in cancer patients.

Methods

We performed a retrospective cohort study of patients with prevalent RA and cancer initially seen at MD Anderson Cancer Center between 2002 and 2014. A cohort of cancer patients was identified from the tumor registry, and patients with RA were identified through ICD-9 codes, followed by review of electronic medical records. We included patients 18 years and older, with a cancer diagnosis, and a diagnosis of RA by a rheumatologist. Patients were followed until 2016.

Results

We identified 431 patients with RA and cancer that met our inclusion criteria. Overall, 111 (26%) received bDMARDs after their cancer diagnosis; of these, 60 (54%) had received bDMARDs prior to their cancer diagnosis and continued to receive this therapy following their diagnosis. Thirteen (22%) switched to a different bDMARD, and the rest continued to receive the same agent after their cancer diagnosis. Of all patients on a bDMARD, 91 (82%) received tumor necrosis factor inhibitors (TNFi).

Conclusions

The treatment landscape of patients with a history of cancer and RA is changing. Future studies evaluating the safety of bDMARDs in patients with a recent history of cancer or with active cancer are needed. Part of the data of this project was presented as a poster at the 2016 American College of Rheumatology annual meeting. Zamora NV, Siddhanamatha H, Barbo A, Tayar J, Lin H, Suarez-Almazor M. Utilization of Biologic Therapy in Patients with Rheumatoid Arthritis and Cancer [abstract].Arthritis Rheumatol. 2016; 68 (suppl 10). https://acrabstracts.org/abstract/utilization-of-biologic-therapy-in-patients-with-rheumatoid-arthritis-and-cancer/. Accessed September 30, 2019. Key Points • One in four patients with RA and concomitant cancer received bDMARDs, including TNFi, after their cancer diagnosis, at our institution. • Half of the patients with RA and cancer who received bDMARDs had initiated therapy prior to the cancer diagnosis, continuing thereafter.",2019-12-18 +30351377,Computational analysis and prediction of lysine malonylation sites by exploiting informative features in an integrative machine-learning framework.,"As a newly discovered post-translational modification (PTM), lysine malonylation (Kmal) regulates a myriad of cellular processes from prokaryotes to eukaryotes and has important implications in human diseases. Despite its functional significance, computational methods to accurately identify malonylation sites are still lacking and urgently needed. In particular, there is currently no comprehensive analysis and assessment of different features and machine learning (ML) methods that are required for constructing the necessary prediction models. Here, we review, analyze and compare 11 different feature encoding methods, with the goal of extracting key patterns and characteristics from residue sequences of Kmal sites. We identify optimized feature sets, with which four commonly used ML methods (random forest, support vector machines, K-nearest neighbor and logistic regression) and one recently proposed [Light Gradient Boosting Machine (LightGBM)] are trained on data from three species, namely, Escherichia coli, Mus musculus and Homo sapiens, and compared using randomized 10-fold cross-validation tests. We show that integration of the single method-based models through ensemble learning further improves the prediction performance and model robustness on the independent test. When compared to the existing state-of-the-art predictor, MaloPred, the optimal ensemble models were more accurate for all three species (AUC: 0.930, 0.923 and 0.944 for E. coli, M. musculus and H. sapiens, respectively). Using the ensemble models, we developed an accessible online predictor, kmal-sp, available at http://kmalsp.erc.monash.edu/. We hope that this comprehensive survey and the proposed strategy for building more accurate models can serve as a useful guide for inspiring future developments of computational methods for PTM site prediction, expedite the discovery of new malonylation and other PTM types and facilitate hypothesis-driven experimental validation of novel malonylated substrates and malonylation sites.",2019-11-01 +,Transformation dynamics of the natural cover in the Dry Chaco ecoregion: A plot level geo-database from 1976 to 2012,"The aim of this work was to characterize the spatial and temporal dynamics of the transformation of the natural cover in the Dry Chaco ecoregion from 1976 to 2012. Dry forests in this region have one of the highest deforestation rates in the world. We analyzed 44 Landsat scenes, including part of Argentina, Paraguay and Bolivia. The analysis was based on tracking individual transformed plots of the entire Dry Chaco region for over more than three decades using the same protocol. Until the end of 2012 15.8 million ha of the original habitats of the Chaco were transformed into croplands or pastures. Our study showed that the greater annual rates of transformation were observed in Paraguay, where deforestation increased dramatically in the last decade, reaching values higher than 4.0% in 2010, the highest historical value in the entire region. The size of the transformed plots increased significantly through the studied period both in Argentina and Paraguay, while in Bolivia decreased. At the landscape level, the use of several fragmentation indices showed the disruption of the continuity and connectivity of the original vegetation. The spatially explicit description of the dynamics of transformed areas is an indispensable tool for natural resources management, territorial planning and deforestation impacts assessment. The developed geo-database is available online at http://monitoreodesmonte.com.ar/ for further analyses and use.",2015-12-01 +,Are cultural values linked to genetics in Europe?,"Our study aims at a broader understanding of interconnections between socio-cultural and biological diversity in the human populations of Europe based on two major datasets. The European subset of Population Reference Sample (Nelson et al. in Am J Hum Genet 83:347–358, 2008) serves as our source of biological observations while the social and cultural implications of our study stem from the World Values Survey (http://www.worldvaluessurvey.org/) database. Inglehart and Welzel drew the cultural map of the world that showed that in spite of their geographical proximity, there is a striking divergence among the value-sets of European countries compared to other regions of the world (Inglehart and Welzel in Perspect Polit 8:551–567, 2010). A recent large-scale genetic study of 3000 individuals from 36 European countries revealed that despite having lower genomic diversity than other regions, Europe’s geographic map is reflected in its genetic composition with an astounding precision (Novembre et al. in Nature 456:98–101, 2008). Here we report a synthesis of the above approaches employing a statistical method, principal component analysis, to identify which cultural values position countries similarly to the genetic and geographical map of Europe, and which reflect other influences. We find an astonishing distinction between the performance of private and public sphere values, as the latter are less robust to short-term changes in environmental influences, such as political systems. Therefore our interdisciplinary study serves to bridge research of cultural and biological diversities of human populations in Europe and start a discussion on the role of genetics, geography and history in the formation of cultural values.",2015-12-01 +29947737,GeneCT: a generalizable cancerous status and tissue origin classifier for pan-cancer biopsies.,"

Motivation

Tissue biopsy is commonly used in cancer diagnosis and molecular studies. However, advanced skills are required for determining cancerous status of biopsies and tissue origin of tumor for cancerous ones. Correct classification is essential for downstream experiment design and result interpretation, especially in molecular cancer studies. Methods for accurate classification of cancerous status and tissue origin for pan-cancer biopsies are thus urgently needed.

Results

We developed a deep learning-based classifier, named GeneCT, for predicting cancerous status and tissue origin of pan-cancer biopsies. GeneCT showed high performance on pan-cancer datasets from various sources and outperformed existing tools. We believe that GeneCT can potentially facilitate cancer diagnosis, tumor origin determination and molecular cancer studies.

Availability and implementation

GeneCT is implemented in Perl/R and supported on GNU/Linux platforms. Source code, testing data and webserver are freely available at http://sunlab.cpy.cuhk.edu.hk/GeneCT/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +27899612,YMDB 2.0: a significantly expanded version of the yeast metabolome database.,"YMDB or the Yeast Metabolome Database (http://www.ymdb.ca/) is a comprehensive database containing extensive information on the genome and metabolome of Saccharomyces cerevisiae Initially released in 2012, the YMDB has gone through a significant expansion and a number of improvements over the past 4 years. This manuscript describes the most recent version of YMDB (YMDB 2.0). More specifically, it provides an updated description of the database that was previously described in the 2012 NAR Database Issue and it details many of the additions and improvements made to the YMDB over that time. Some of the most important changes include a 7-fold increase in the number of compounds in the database (from 2007 to 16 042), a 430-fold increase in the number of metabolic and signaling pathway diagrams (from 66 to 28 734), a 16-fold increase in the number of compounds linked to pathways (from 742 to 12 733), a 17-fold increase in the numbers of compounds with nuclear magnetic resonance or MS spectra (from 783 to 13 173) and an increase in both the number of data fields and the number of links to external databases. In addition to these database expansions, a number of improvements to YMDB's web interface and its data visualization tools have been made. These additions and improvements should greatly improve the ease, the speed and the quantity of data that can be extracted, searched or viewed within YMDB. Overall, we believe these improvements should not only improve the understanding of the metabolism of S. cerevisiae, but also allow more in-depth exploration of its extensive metabolic networks, signaling pathways and biochemistry.",2016-11-28 +27669338,ChemDataExtractor: A Toolkit for Automated Extraction of Chemical Information from the Scientific Literature.,"The emergence of ""big data"" initiatives has led to the need for tools that can automatically extract valuable chemical information from large volumes of unstructured data, such as the scientific literature. Since chemical information can be present in figures, tables, and textual paragraphs, successful information extraction often depends on the ability to interpret all of these domains simultaneously. We present a complete toolkit for the automated extraction of chemical entities and their associated properties, measurements, and relationships from scientific documents that can be used to populate structured chemical databases. Our system provides an extensible, chemistry-aware, natural language processing pipeline for tokenization, part-of-speech tagging, named entity recognition, and phrase parsing. Within this scope, we report improved performance for chemical named entity recognition through the use of unsupervised word clustering based on a massive corpus of chemistry articles. For phrase parsing and information extraction, we present the novel use of multiple rule-based grammars that are tailored for interpreting specific document domains such as textual paragraphs, captions, and tables. We also describe document-level processing to resolve data interdependencies and show that this is particularly necessary for the autogeneration of chemical databases since captions and tables commonly contain chemical identifiers and references that are defined elsewhere in the text. The performance of the toolkit to correctly extract various types of data was evaluated, affording an F-score of 93.4%, 86.8%, and 91.5% for extracting chemical identifiers, spectroscopic attributes, and chemical property attributes, respectively; set against the CHEMDNER chemical name extraction challenge, ChemDataExtractor yields a competitive F-score of 87.8%. All tools have been released under the MIT license and are available to download from http://www.chemdataextractor.org .",2016-10-06 +31036081,"Disparities, variations, inequalities or inequities: whatever you call them, we need data to monitor them.","Health inequalities are a problem in high, middle and low income countries. Most are unfair ('inequities') and could be minimised but primarily through policies outside the health service.In the US, the Center for Diseases Control has used high quality, nationally-available data to monitor conditions and determinants of health among different groups (by sex, disability, race, ethnicity, and language) to motivate action to reduce inequalities. In the UK, the 10 top level 'health' indicators in London at the turn of the millennium included unemployment, education, housing quality, crime, air pollution, road travel injuries, as well as traditional health measures. Most of these affect mental and physical health through social determinants or adverse environmental exposures. Current inequalities monitoring in England includes a Local Basket of Inequalities Indicators focusing on a wide range of determinants of health as well as traditional health metrics.Israel, like the US, has above average socio-economic inequalities but has universal healthcare. Health inequalities in Israel occur within different Jewish groups and by income, education, ethnicity, and religion, with disadvantages often clustering. Current monitoring in Israel focuses on health outcomes and 'midstream' healthcare-related provision. I agree with Abu-Saad and her colleagues that including monitoring of social determinants of health is crucial to identify and tackle health inequalities in Israel.National, 'upstream', interventions are the most effective ways to reduce inequalities and improve the population's health. High-level political support is crucial for this. While a 'Health in all Policies' approach combined with political will to 'leave no one behind' can lead to great improvements, regular monitoring is essential, to: identify the inequities; plan appropriate and effective, targeted interventions; implement and evaluate them; and change them where needed. All of this requires adequate and timely data on health and its determinants, including information about undiagnosed and poorly controlled disease, obtained from the general population not just those attending for healthcare, analysed for each population sub-group at risk of experiencing inequalities.This is a commentary on https://doi.org/10.1186/s13584-018-0208-1.",2019-04-29 +30689869,"High-resolution array-CGH analysis on 46,XX patients affected by early onset primary ovarian insufficiency discloses new genes involved in ovarian function.","STUDY QUESTION:Can high resolution array-CGH analysis on a cohort of women showing a primary ovarian insufficiency (POI) phenotype in young age identify copy number variants (CNVs) with a deleterious effect on ovarian function? SUMMARY ANSWER:This approach has proved effective to clarify the role of CNVs in POI pathogenesis and to better unveil both novel candidate genes and pathogenic mechanisms. WHAT IS KNOWN ALREADY:POI describes the progression toward the cessation of ovarian function before the age of 40 years. Genetic causes are highly heterogeneous and despite several genes being associated with ovarian failure, most of genetic basis of POI still needs to be elucidated. STUDY DESIGN, SIZE, DURATION:The current study included 67 46,XX patients with early onset POI (<19 years) and 134 control females recruited between 2012 and 2016 at the Medical Cytogenetics and Molecular Genetics Lab, IRCCS Istituto Auxologico Italiano. PARTICIPANTS/MATERIALS, SETTING, METHODS:High resolution array-CGH analysis was carried out on POI patients' DNA. Results of patients and female controls were analyzed to search for rare CNVs. All variants were validated and subjected to a gene content analysis and disease gene prioritization based on the present literature to find out new ovary candidate genes. Case-control study with statistical analysis was carried out to validate our approach and evaluate any ovary CNVs/gene enrichment. Characterization of particular CNVs with molecular and functional studies was performed to assess their pathogenic involvement in POI. MAIN RESULTS AND THE ROLE OF CHANCE:We identified 37 ovary-related CNVs involving 44 genes with a role in ovary in 32 patients. All except one of the selected CNVs were not observed in the control group. Possible involvement of the CNVs in POI pathogenesis was further corroborated by a case-control analysis that showed a significant enrichment of ovary-related CNVs/genes in patients (P = 0.0132; P = 0.0126). Disease gene prioritization identified both previously reported POI genes (e.g. BMP15, DIAPH2, CPEB1, BNC1) and new candidates supported by transcript and functional studies, such as TP63 with a role in oocyte genomic integrity and VLDLR which is involved in steroidogenesis. LARGE SCALE DATA:ClinVar database (http://www.ncbi.nlm.nih.gov/clinvar/); accession numbers SCV000787656 to SCV000787743. LIMITATIONS, REASONS FOR CAUTION:This is a descriptive analysis for almost all of the CNVs identified. Inheritance studies of CNVs in some non-familial sporadic cases was not performed as the parents' DNA samples were not available. Addionally, RT-qPCR analyses were carried out in few cases as RNA samples were not always available and the genes were not expressed in blood. WIDER IMPLICATIONS OF THE FINDINGS:Our array-CGH screening turned out to be efficient in identifying different CNVs possibly implicated in disease onset, thus supporting the extremely wide genetic heterogeneity of POI. Since almost 50% of cases are negative rare ovary-related CNVs, array-CGH together with next generation sequencing might represent the most suitable approach to obtain a comprehensive genetic characterization of POI patients. STUDY FUNDING/COMPETING INTEREST(S):Supported by Italian Ministry of Health grants 'Ricerca Corrente' (08C203_2012) and 'Ricerca Finalizzata' (GR-2011-02351636, BIOEFFECT) to IRCCS Istituto Auxologico Italiano.",2019-03-01 +30896618,Effect of chronic kidney disease in ischemic cardiomyopathy: Long-term follow-up - REVISION-DM2 trial.,"A strong association exists between chronic kidney disease (CKD) and coronary artery disease (CAD). The role of CKD in the long-term prognosis of CAD patients with versus those without CKD is unknown. This study investigated whether CKD affects ventricular function.From January 2009 to January 2010, 918 consecutive patients were selected from an outpatient database. Patients had undergone percutaneous, surgical, or clinical treatment and were followed until May 2015.In patients with preserved renal function (n = 405), 73 events (18%) occurred, but 108 events (21.1%) occurred among those with CKD (n = 513) (P < .001). Regarding left ventricular ejection fraction (LVEF) <50%, we found 84 events (21.5%) in CKD patients and 12 (11.8%) in those with preserved renal function (P < .001). The presence of LVEF <50% brought about a modification effect. Death occurred in 22 (5.4%) patients with preserved renal function and in 73 (14.2%) with CKD (P < .001). In subjects with LVEF <50%, 66 deaths (16.9%) occurred in CKD patients and 7 (6.9%) in those with preserved renal function (P = .001). No differences were found in CKD strata regarding events or overall death among those with preserved LVEF. In a multivariate model, creatinine clearance remained an independent predictor of death (P < .001).We found no deleterious effects of CKD in patients with CAD when ventricular function was preserved. However, there was a worse prognosis in patients with CKD and ventricular dysfunction.Resgistry number is ISRCTN17786790 at https://doi.org/10.1186/ISRCTN17786790.",2019-03-01 +30837356,MusatransSSRDB (a transcriptome derived SSR database) - An advanced tool for banana improvement. ,"Availability of transcriptome datasets for use in accelerated molecular-based breeding in Musa species is limited. Illumina Hiseq technology was employed to determine differential gene expression between the contrasting cultivars for three different stresses (Eumusae leaf spot -Mycosphaerella eumusae, root lesion nematode - Pratylenchus coffeae and moisture deficit stress) under challenged and unchallenged conditions. An average of 34.72 million of reads was assembled into ~47629 contigs, and ~5,466 simple sequence repeats (SSR) from each library were identified. GO annotation and KEGG pathway analysis were carried for all the transcripts and the SSR, SNPs were also detected. Based on this information, a MusatransSSRDB has been developed. Currently, the database consists of 32,800 SSRs with the unique information like putative function of the SSR-containing genes and their metabolic pathway and expression profiling under various stress conditions. This database provides information on in silico polymorphic SSRs (2830 SSRs) between the contrasting cultivars for each stress and within stress. Information on in silico polymorphic SSRs specific to differentially expressed genes under challenged condition for each stress can also be accessed. This database facilitates the retrieval of results by navigating the tabs for cultivars, stress and polymorphism. This database was developed using HTML, Java and PHP; datasets are stored in MySQL database and accessible in the public domain (http://bioinfnrcb.byethost7.com/nrcbbio/). This unique information facilitates the banana breeder to select the SSR primers based on specific objectives. MusatransSSRDB along with other genomics databases will facilitate the genetic dissection and breeding for complex traits in banana. Thus, this database is a step forward in economizing cost, time, manpower and other resources. Keywords.",2019-03-01 +30802134,"""The face of STEM: Racial phenotypic stereotypicality predicts stem persistence by-and ability attributions about-students of color"": Correction to Williams, George-Jones, and Hebl (2018).","Reports an error in ""The face of STEM: Racial phenotypic stereotypicality predicts STEM persistence by-and ability attributions about-students of color"" by Melissa J. Williams, Julia George-Jones and Mikki Hebl (Journal of Personality and Social Psychology, Advanced Online Publication, Oct 15, 2018, np). In the article ""The Face of STEM: Racial Phenotypic Stereotypicality Predicts STEM Persistence by-and Ability Attributions About-Students of Color"" by Melissa J. Williams, Julia George- Jones, and Mikki Hebl (Journal of Personality and Social Psychology. Advance online publication. October 15, 2018. http://dx.doi.org/10.1037/pspi0000153), the Editor' Note acknowledging Toni Schmader as the action editor for this article was omitted. All versions of this article have been corrected. (The following abstract of the original article appeared in record 2018-51203-001.) Despite strong initial interest, college students-especially those from underrepresented minority (URM) backgrounds-leave STEM majors at high rates. Here, we explore the role of racial phenotypic stereotypicality, or how typical one's physical appearance is of one's racial group, in STEM persistence. In a longitudinal study, URM students were especially likely to leave STEM to the extent that they looked more stereotypical of their group; Asian American students were especially likely to leave STEM to the extent that they looked less stereotypical. Three experiments documented a possible mechanism; participants (Studies 2-4), including college advisors (Study 3), attributed greater STEM ability to more-stereotypical Asian Americans and to less-stereotypical Black women (not men), than to same-race peers. Study 4 showed that prejudice concerns, activated in interactions with Black men (not women), account for this gender difference; more-stereotypical Black men (like women) were negatively evaluated when prejudice concerns were not salient. This work has important implications for ongoing efforts to achieve diversity in STEM. (PsycINFO Database Record (c) 2019 APA, all rights reserved).",2019-03-01 +30920259,Why and how to use patient-oriented research to promote translational research.,"As we discussed in our first editorial in the December 2018 issue (Polaha & Sunderji, 2018), an emerging science of knowledge translation (also known as implementation and dissemination science) aims to bridge the disconnect between evidence and practice. Researchers are increasingly engaging with knowledge users and other stakeholders as a key strategy to promote uptake. This may include policymakers, payers, and-the focus of this editorial-patients. Patient-oriented research is featured in national research agendas around the world including in Canada (Canadian Institutes of Health Research, 2018) and the United States (Patient-Centered Outcomes Research Institute, see https://www.pcori.org/), in part as it may contribute one solution to the ""bench to bedside"" gap (Greenhalgh, Jackson, Shaw, & Janamian, 2016; Jull, Giles, & Graham, 2017; McGavin, 2017). In this editorial, we provide a general introduction to research, its potential, and its realized value. We also suggest strategies for conducting patient-oriented research effectively, including a description of common barriers and how they can be dealt with. We hope this background will inspire you to get started with patient-oriented research and to learn more, as well as to share your patient-oriented research through Families, Systems, & Health. (PsycINFO Database Record (c) 2019 APA, all rights reserved).",2019-03-01 +30838062,How to validate a diagnosis recorded in electronic health records.,Systematic measurement errors in electronic health record databases can lead to large inferential errors. Validation techniques can help determine the degree of these errors and therefore aid in the interpretation of findings. http://ow.ly/iHQ630np4xU.,2019-03-01 +30802133,"""Is overconfidence a social liability? The effect of verbal versus nonverbal Expressions of confidence"": Correction to Tenney et al. (2018).","Reports an error in ""Is overconfidence a social liability? The effect of verbal versus nonverbal expressions of confidence"" by Elizabeth R. Tenney, Nathan L. Meikle, David Hunsaker, Don A. Moore and Cameron Anderson (Journal of Personality and Social Psychology, Advanced Online Publication, Oct 11, 2018, np). In the article ""Is Overconfidence a Social Liability? The Effect of Verbal Versus Nonverbal Expressions of Confidence"" by Elizabeth R. Tenney, Nathan L. Meikle, David Hunsaker, Don A. Moore, and Cameron Anderson (Journal of Personality and Social Psychology. Advance online publication. October 11, 2018. http://dx.doi.org/10.1037/pspi0000150), the Editor' Note acknowledging David Dunning as the action editor for this article was omitted. All versions of this article have been corrected. (The following abstract of the original article appeared in record 2018-50999-001.) What are the reputational consequences of being overconfident? We propose that the channel of confidence expression is one key moderator-that is, whether confidence is expressed verbally or nonverbally. In a series of experiments, participants assessed target individuals (potential collaborators or advisors) who were either overconfident or cautious. Targets expressed confidence, or a lack thereof, verbally or nonverbally. Participants then learned targets' actual performance. Across studies, overconfidence was advantageous initially-regardless of whether targets expressed confidence verbally or nonverbally. After performance was revealed, overconfident targets who had expressed confidence verbally were viewed more negatively than cautious targets; however, overconfident targets who had expressed confidence nonverbally were still viewed more positively than cautious ones. The one condition wherein nonverbal overconfidence was detrimental was when confidence was clearly tied to a falsifiable claim. Results suggest that, compared with verbal statements, nonverbal overconfidence reaps reputational benefits because of its plausible deniability. (PsycINFO Database Record (c) 2019 APA, all rights reserved).",2019-03-01 +27200217,GEE: An Informatics Tool for Gene Expression Data Explore.,"

Objectives

Major public high-throughput functional genomic data repositories, including the Gene Expression Omnibus (GEO) and ArrayExpress have rapidly expanded. As a result, a large number of diverse high-throughput functional genomic data retrieval systems have been developed. However, high-throughput functional genomic data retrieval remains challenging.

Methods

We developed Gene Expression data Explore (GEE), the first powerful, flexible web and mobile search application for searching whole-genome epigenetic data and microarray data in public databases, such as GEO and ArrayExpress.

Results

GEE provides an elaborate, convenient interface of query generation competences not available via various high-throughput functional genomic data retrieval systems, including GEO, ArrayExpress, and Atlas. In particular, GEE provides a suitable query generator using eVOC, the Experimental Factor Ontology (EFO), which is well represented with a variety of high-throughput functional genomic data experimental conditions. In addition, GEE provides an experimental design query constructor (EDQC), which provides elaborate retrieval filter conditions when the user designs real experiments.

Conclusions

The web version of GEE is available at http://www.snubi.org/software/gee, and its app version is available from the Apple App Store.",2016-04-30 +27995664,AraQTL - workbench and archive for systems genetics in Arabidopsis thaliana.,"Genetical genomics studies uncover genome-wide genetic interactions between genes and their transcriptional regulators. High-throughput measurement of gene expression in recombinant inbred line populations has enabled investigation of the genetic architecture of variation in gene expression. This has the potential to enrich our understanding of the molecular mechanisms affected by and underlying natural variation. Moreover, it contributes to the systems biology of natural variation, as a substantial number of experiments have resulted in a valuable amount of interconnectable phenotypic, molecular and genotypic data. A number of genetical genomics studies have been published for Arabidopsis thaliana, uncovering many expression quantitative trait loci (eQTLs). However, these complex data are not easily accessible to the plant research community, leaving most of the valuable genetic interactions unexplored as cross-analysis of these studies is a major effort. We address this problem with AraQTL (http://www.bioinformatics.nl/Ara QTL/), an easily accessible workbench and database for comparative analysis and meta-analysis of all published Arabidopsis eQTL datasets. AraQTL provides a workbench for comparing, re-using and extending upon the results of these experiments. For example, one can easily screen a physical region for specific local eQTLs that could harbour candidate genes for phenotypic QTLs, or detect gene-by-environment interactions by comparing eQTLs under different conditions.",2017-02-13 +29186292,SATORI: a system for ontology-guided visual exploration of biomedical data repositories.,"Motivation:The ever-increasing number of biomedical datasets provides tremendous opportunities for re-use but current data repositories provide limited means of exploration apart from text-based search. Ontological metadata annotations provide context by semantically relating datasets. Visualizing this rich network of relationships can improve the explorability of large data repositories and help researchers find datasets of interest. Results:We developed SATORI-an integrative search and visual exploration interface for the exploration of biomedical data repositories. The design is informed by a requirements analysis through a series of semi-structured interviews. We evaluated the implementation of SATORI in a field study on a real-world data collection. SATORI enables researchers to seamlessly search, browse and semantically query data repositories via two visualizations that are highly interconnected with a powerful search interface. Availability and implementation:SATORI is an open-source web application, which is freely available at http://satori.refinery-platform.org and integrated into the Refinery Platform. Contact:nils@hms.harvard.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +31552505,Relationships between recovery experiences and well-being among younger and older teachers.,"

Purpose

The study had three aims. We investigated, first, how six recovery experiences (i.e., detachment, relaxation, control, mastery, meaning, and affiliation) during off-job time suggested by the DRAMMA model (Newman et al. in J Happiness Stud 15(3):555-578. https://doi.org/10.1007/s10902-013-9435-x, 2014) are related to well-being (i.e., vitality, life satisfaction, and work ability). Second, we examined how age related to these outcomes, and third, we investigated whether age moderated the relationships between recovery experiences and well-being outcomes.

Methods

A sample of 909 Finnish teachers responded to an electronic questionnaire (78% women, average age 51 years). The data were analyzed with moderated hierarchical regression analyses.

Results

Detachment from work, relaxation, control, and mastery were associated with higher vitality. Detachment, relaxation, meaning, and affiliation were related to higher life satisfaction. Older age was related to lower work ability, but not to vitality or life satisfaction. Older teachers benefited more from control and mastery during off-job time than did younger teachers in terms of vitality, whereas younger teachers benefited more from relaxation in terms of all well-being outcomes.

Conclusions

Detachment, relaxation, control, mastery, meaning, and affiliation during off-job time were related to higher well-being, supporting the DRAMMA model. Age moderated the relationships between control, mastery, and relaxation and vitality and life satisfaction. The role of aging in recovery from work needs further research.",2019-09-24 +33816800,"The Modern Research Data Portal: a design pattern for networked, data-intensive science.","We describe best practices for providing convenient, high-speed, secure access to large data via research data portals. We capture these best practices in a new design pattern, the Modern Research Data Portal, that disaggregates the traditional monolithic web-based data portal to achieve orders-of-magnitude increases in data transfer performance, support new deployment architectures that decouple control logic from data storage, and reduce development and operations costs. We introduce the design pattern; explain how it leverages high-performance data enclaves and cloud-based data management services; review representative examples at research laboratories and universities, including both experimental facilities and supercomputer sites; describe how to leverage Python APIs for authentication, authorization, data transfer, and data sharing; and use coding examples to demonstrate how these APIs can be used to implement a range of research data portal capabilities. Sample code at a companion web site, https://docs.globus.org/mrdp, provides application skeletons that readers can adapt to realize their own research data portals.",2018-01-15 +30816123,ImmtorLig_DB: repertoire of virtually screened small molecules against immune receptors to bolster host immunity.,"Host directed therapies to boost immunity against infection are gaining considerable impetus following the observation that use of antibiotics has become a continuous source for the emergence of drug resistant strains of pathogens. Receptors expressed by the cells of immune system play a cardinal role in initiating sequence of events necessary to ameliorate many morbid conditions. Although, ligands for the immune receptors are available; but their use is limited due to complex structure, synthesis and cost-effectiveness. Virtual screening (VS) is an integral part of chemoinformatics and computer-aided drug design (CADD) and aims to streamline the process of drug discovery. ImmtorLig_DB is a repertoire of 5000 novel small molecules, screened from ZINC database and ranked using structure based virtual screening (SBVS) against 25 immune receptors which play a pivotal role in defending and initiating the activation of immune system. Consequently, in the current study, small molecules were screened by docking on the essential domains present on the receptors expressed by cells of immune system. The screened molecules exhibited efficacious binding to immune receptors, and indicated a possibility of discovering novel small molecules. Other features of ImmtorLig_DB include information about availability, clustering analysis, and estimation of absorption, distribution, metabolism, and excretion (ADME) properties of the screened small molecules. Structural comparisons indicate that predicted small molecules may be considered novel. Further, this repertoire is available via a searchable graphical user interface (GUI) through http://bioinfo.imtech.res.in/bvs/immtor/ .",2019-02-28 +31267738,MsDBP: Exploring DNA-Binding Proteins by Integrating Multiscale Sequence Information via Chou's Five-Step Rule.,"DNA-binding proteins are crucial to alternative splicing, methylation, and the structural composition of the DNA. The existing experimental methods for identifying DNA-binding proteins are expensive and time-consuming; thus, it is necessary to develop a fast and accurate computational method to address the problem. In this Article, we report a novel predictor MsDBP, a DNA-binding protein prediction method that combines the multiscale sequence feature into a deep neural network. First of all, instead of developing a narrow-application structured-based method, we are committed to a sequenced-based predictor. Second, instead of characterizing the whole protein directly, we divide the protein into subsequences with different lengths and then encode them into a vector based on composition information. In this way, the multiscale sequence feature can be obtained. Finally, a branch of dense layers is applied for learning multilevel abstract features to discriminate DNA-binding proteins. When MsDBP is tested on the independent data set PDB2272, it achieves an overall accuracy of 66.99% with the SE of 70.69%. In addition, we also perform extensive experiments to compare the proposed method with other existing methods. The results indicate that MsDBP would be a useful tool for the identification of DNA-binding proteins. MsDBP is freely available at a web server on http://47.100.203.218/MsDBP/ .",2019-07-17 +23093603,The Online Protein Processing Resource (TOPPR): a database and analysis platform for protein processing events.,"We here present The Online Protein Processing Resource (TOPPR; http://iomics.ugent.be/toppr/), an online database that contains thousands of published proteolytically processed sites in human and mouse proteins. These cleavage events were identified with COmbinded FRActional DIagonal Chromatography proteomics technologies, and the resulting database is provided with full data provenance. Indeed, TOPPR provides an interactive visual display of the actual fragmentation mass spectrum that led to each identification of a reported processed site, complete with fragment ion annotations and search engine scores. Apart from warehousing and disseminating these data in an intuitive manner, TOPPR also provides an online analysis platform, including methods to analyze protease specificity and substrate-centric analyses. Concretely, TOPPR supports three ways to retrieve data: (i) the retrieval of all substrates for one or more cellular stimuli or assays; (ii) a substrate search by UniProtKB/Swiss-Prot accession number, entry name or description; and (iii) a motif search that retrieves substrates matching a user-defined protease specificity profile. The analysis of the substrates is supported through the presence of a variety of annotations, including predicted secondary structure, known domains and experimentally obtained 3D structure where available. Across substrates, substrate orthologs and conserved sequence stretches can also be shown, with iceLogo visualization provided for the latter.",2012-10-23 +27843753,Bioinformatic curation and alignment of genotyped hepatitis B virus (HBV) sequence data from the GenBank public database.,"

Background

Hepatitis B virus (HBV) DNA sequence data from thousands of samples are present in the public sequence databases. No publicly available, up-to-date, multiple sequence alignments, containing full-length and subgenomic fragments per genotype, are available. Such alignments are useful in many analysis applications, including data-mining and phylogenetic analyses.

Results

By issuing a query, all HBV sequence data from the GenBank public database was downloaded (67,893 sequences). Full-length and subgenomic sequences, which were genotyped by the submitters (30,852 sequences), were placed into a multiple sequence alignment, for each genotype (genotype A: 5868 sequences, B: 4630, C: 7820, D: 8300, E: 2043, F: 985, G: 189, H: 108, I: 23), according to the results of offline BLAST searches against a custom reference library of full-length sequences. Further curation was performed to improve the alignment.

Conclusions

The algorithm described in this paper generates, for each of the nine HBV genotypes, multiple sequence alignments, which contain full-length and subgenomic fragments. The alignments can be updated as new sequences become available in the online public sequence databases. The alignments are available at http://hvdr.bioinf.wits.ac.za/alignments.",2016-10-28 +23920640,"Identifying unproven cancer treatments on the health web: addressing accuracy, generalizability and scalability.","Building machine learning models that identify unproven cancer treatments on the Health Web is a promising approach for dealing with the dissemination of false and dangerous information to vulnerable health consumers. Aside from the obvious requirement of accuracy, two issues are of practical importance in deploying these models in real world applications. (a) Generalizability: The models must generalize to all treatments (not just the ones used in the training of the models). (b) Scalability: The models can be applied efficiently to billions of documents on the Health Web. First, we provide methods and related empirical data demonstrating strong accuracy and generalizability. Second, by combining the MapReduce distributed architecture and high dimensionality compression via Markov Boundary feature selection, we show how to scale the application of the models to WWW-scale corpora. The present work provides evidence that (a) a very small subset of unproven cancer treatments is sufficient to build a model to identify unproven treatments on the web; (b) unproven treatments use distinct language to market their claims and this language is learnable; (c) through distributed parallelization and state of the art feature selection, it is possible to prepare the corpora and build and apply models with large scalability.",2013-01-01 +31962460,Computational nanoplasmonics in the quasistatic limit for biosensing applications.,"The phenomenon of localized surface plasmon resonance (LSPR) provides high sensitivity in detecting biomolecules through shifts in resonance frequency when a target is present. Computational studies in this field have used the full Maxwell equations with simplified models of a sensor-analyte system, or they neglected the analyte altogether. In the long-wavelength limit, one can simplify the theory via an electrostatics approximation while adding geometrical detail in the sensor and analytes (at moderate computational cost). This work uses the latter approach, expanding the open-source PyGBe code to compute the extinction cross section of metallic nanoparticles in the presence of any target for sensing. The target molecule is represented by a surface mesh, based on its crystal structure. PyGBe is research software for continuum electrostatics, written in python with computationally expensive parts accelerated on GPU hardware, via PyCUDA. It is also accelerated algorithmically via a treecode that offers O(NlogN) computational complexity. These features allow PyGBe to handle problems with half a million boundary elements or more. In this work, we demonstrate the suitability of PyGBe, extended to compute LSPR response in the electrostatic limit, for biosensing applications. Using a model problem consisting of an isolated silver nanosphere in an electric field, our results show grid convergence as 1/N, and accurate computation of the extinction cross section as a function of wavelength (compared with an analytical solution). For a model of a sensor-analyte system, consisting of a spherical silver nanoparticle and a set of bovine serum albumin (BSA) proteins, our results again obtain grid convergence as 1/N (with respect to the Richardson extrapolated value). Computing the LSPR response as a function of wavelength in the presence of BSA proteins captures a redshift of 0.5 nm in the resonance frequency due to the presence of the analytes at 1-nm distance. The final result is a sensitivity study of the biosensor model, obtaining the shift in resonance frequency for various distances between the proteins and the nanoparticle. All results in this paper are fully reproducible, and we have deposited in archival data repositories all the materials needed to run the computations again and recreate the figures. PyGBe is open source under a permissive license and openly developed. Documentation is available at http://pygbe.github.io/pygbe/docs/.",2019-12-01 +30165572,Phylo-PFP: improved automated protein function prediction using phylogenetic distance of distantly related sequences.,"

Motivation

Function annotation of proteins is fundamental in contemporary biology across fields including genomics, molecular biology, biochemistry, systems biology and bioinformatics. Function prediction is indispensable in providing clues for interpreting omics-scale data as well as in assisting biologists to build hypotheses for designing experiments. As sequencing genomes is now routine due to the rapid advancement of sequencing technologies, computational protein function prediction methods have become increasingly important. A conventional method of annotating a protein sequence is to transfer functions from top hits of a homology search; however, this approach has substantial short comings including a low coverage in genome annotation.

Results

Here we have developed Phylo-PFP, a new sequence-based protein function prediction method, which mines functional information from a broad range of similar sequences, including those with a low sequence similarity identified by a PSI-BLAST search. To evaluate functional similarity between identified sequences and the query protein more accurately, Phylo-PFP reranks retrieved sequences by considering their phylogenetic distance. Compared to the Phylo-PFP's predecessor, PFP, which was among the top ranked methods in the second round of the Critical Assessment of Functional Annotation (CAFA2), Phylo-PFP demonstrated substantial improvement in prediction accuracy. Phylo-PFP was further shown to outperform prediction programs to date that were ranked top in CAFA2.

Availability and implementation

Phylo-PFP web server is available for at http://kiharalab.org/phylo_pfp.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +30165467,Variable selection and validation in multivariate modelling.,"

Motivation

Validation of variable selection and predictive performance is crucial in construction of robust multivariate models that generalize well, minimize overfitting and facilitate interpretation of results. Inappropriate variable selection leads instead to selection bias, thereby increasing the risk of model overfitting and false positive discoveries. Although several algorithms exist to identify a minimal set of most informative variables (i.e. the minimal-optimal problem), few can select all variables related to the research question (i.e. the all-relevant problem). Robust algorithms combining identification of both minimal-optimal and all-relevant variables with proper cross-validation are urgently needed.

Results

We developed the MUVR algorithm to improve predictive performance and minimize overfitting and false positives in multivariate analysis. In the MUVR algorithm, minimal variable selection is achieved by performing recursive variable elimination in a repeated double cross-validation (rdCV) procedure. The algorithm supports partial least squares and random forest modelling, and simultaneously identifies minimal-optimal and all-relevant variable sets for regression, classification and multilevel analyses. Using three authentic omics datasets, MUVR yielded parsimonious models with minimal overfitting and improved model performance compared with state-of-the-art rdCV. Moreover, MUVR showed advantages over other variable selection algorithms, i.e. Boruta and VSURF, including simultaneous variable selection and validation scheme and wider applicability.

Availability and implementation

Algorithms, data, scripts and tutorial are open source and available as an R package ('MUVR') at https://gitlab.com/CarlBrunius/MUVR.git.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +30698657,Calculation of accurate interatomic contact surface areas for the quantitative analysis of non-bonded molecular interactions.,"SUMMARY:Intra- and intermolecular contact surfaces are routinely calculated for a large array of applications in bioinformatics but are typically approximated from differential solvent accessible surface area calculations and not calculated directly. These approximations do not properly take the effects of neighboring atoms into account and tend to deviate considerably from the true contact surface. We implemented an extension of the original Shrake-Rupley algorithm to accurately estimate interatomic contact surface areas of molecular structures and complexes. Our extended algorithm is able to calculate the contact area of an atom to all nearby atoms by directly calculating overlapping surface patches, taking into account the possible shielding effects of neighboring atoms. Here, we present a versatile software tool and web server for the calculation of contact surface areas, as well as buried surface areas and solvent accessible surface areas (SASA) for different types of biomolecules, such as proteins, nucleic acids and small organic molecules. Detailed results are provided in tab-separated values format for analysis and Protein Databank files for visualization. Direct contact surface area calculation resulted in improved accuracy in a benchmark with a non-redundant set of 245 protein-DNA complexes. SASA-based approximations underestimated protein-DNA contact surfaces on average by 40%. This software tool may be useful for surface-based intra- and intermolecular interaction analyses and scoring function development. AVAILABILITY AND IMPLEMENTATION:A web server, stand-alone binaries for Linux, MacOS and Windows and C++ source code are freely available from http://schuellerlab.org/dr_sasa/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-09-01 +29790900,GSCALite: a web server for gene set cancer analysis.,"Summary:The availability of cancer genomic data makes it possible to analyze genes related to cancer. Cancer is usually the result of a set of genes and the signal of a single gene could be covered by background noise. Here, we present a web server named Gene Set Cancer Analysis (GSCALite) to analyze a set of genes in cancers with the following functional modules. (i) Differential expression in tumor versus normal, and the survival analysis; (ii) Genomic variations and their survival analysis; (iii) Gene expression associated cancer pathway activity; (iv) miRNA regulatory network for genes; (v) Drug sensitivity for genes; (vi) Normal tissue expression and eQTL for genes. GSCALite is a user-friendly web server for dynamic analysis and visualization of gene set in cancer and drug sensitivity correlation, which will be of broad utilities to cancer researchers. Availability and implementation:GSCALite is available on http://bioinfo.life.hust.edu.cn/web/GSCALite/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-11-01 +30629218,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines for the Treatment of Adults With Metastatic Brain Tumors: Executive Summary.,"

Background

The Congress of Neurological Surgeons systematic review and evidence-based clinical practice parameter guidelines for the treatment of adults with metastatic brain tumors was first published in 2010. Because of the time elapsed since that publication, an update of this set of guidelines based on literature published since is now indicated.

Objective

To establish the best evidence-based management of metastatic brain tumors over all commonly used diagnostic and treatment modalities in regularly encountered clinical situations.

Methods

Literature searches regarding management of metastatic brain tumors with whole brain radiation therapy, surgery, stereotactic radiosurgery, chemotherapy, prophylactic anticonvulsants, steroids, instances of multiple brain metastases, and emerging and investigational therapies were carried out to answer questions designed by consensus of a multidisciplinary writing group.

Results

Recommendations were created and their strength linked to the quality of the literature data available thus creating an evidence-based guideline. Importantly, shortcomings and biases to the literature data are brought out so as to provide guidance for future investigation and improvements in the management of patients with metastatic brain tumors.

Conclusion

This series of guidelines was constructed to assess the most current and clinically relevant evidence for management of metastatic brain tumors. They set a benchmark regarding the current evidence base for this management while also highlighting important key areas for future basic and clinical research, particularly on those topics for which no recommendations could be formulated.The full guideline can be found at: https://www.cns.org/guidelines-treatment-adults-metastatic-brain-tumors/chapter_1.",2019-03-01 +29924895,PlantSEED enables automated annotation and reconstruction of plant primary metabolism with improved compartmentalization and comparative consistency.,"Genome-scale metabolic reconstructions help us to understand and engineer metabolism. Next-generation sequencing technologies are delivering genomes and transcriptomes for an ever-widening range of plants. While such omic data can, in principle, be used to compare metabolic reconstructions in different species, organs and environmental conditions, these comparisons require a standardized framework for the reconstruction of metabolic networks from transcript data. We previously introduced PlantSEED as a framework covering primary metabolism for 10 species. We have now expanded PlantSEED to include 39 species and provide tools that enable automated annotation and metabolic reconstruction from transcriptome data. The algorithm for automated annotation in PlantSEED propagates annotations using a set of signature k-mers (short amino acid sequences characteristic of particular proteins) that identify metabolic enzymes with an accuracy of about 97%. PlantSEED reconstructions are built from a curated template that includes consistent compartmentalization for more than 100 primary metabolic subsystems. Together, the annotation and reconstruction algorithms produce reconstructions without gaps and with more accurate compartmentalization than existing resources. These tools are available via the PlantSEED web interface at http://modelseed.org, which enables users to upload, annotate and reconstruct from private transcript data and simulate metabolic activity under various conditions using flux balance analysis. We demonstrate the ability to compare these metabolic reconstructions with a case study involving growth on several nitrogen sources in roots of four species.",2018-08-09 +30885906,Can treating vitamin D deficiency reduce exacerbations of chronic obstructive pulmonary disease?,"The studyJolliffe D, Greenberg L, Hooper R, Mathyssen C, Rafiq R, de Jongh R, Camargo C, Griffiths C, Janssens W, Martineau A. Vitamin D to prevent exacerbations of COPD: systematic review and meta-analysis of individual participant data from randomised controlled trials. Thorax 2019. doi:10.1136/thoraxjnl-2018-212092This project was funded by the National Institute for Health Research Health Technology Appraisal programme (project number 13/03/25).To read the full NIHR Signal, go to: https://discover.dc.nihr.ac.uk/content/signal-000737/treating-vitamin-d-deficiency-may-reduce-exacerbations-of-copd.",2019-03-18 +31113766,Prosthetic valve endocarditis after transcatheter or surgical aortic valve replacement with a bioprosthesis: results from the FinnValve Registry.,"AIMS:The aim of this study was to compare the risk of prosthetic valve endocarditis (PVE) in patients with transcatheter aortic valve replacement (TAVR) or surgical aortic valve replacement (SAVR). METHODS AND RESULTS:The FinnValve registry included data from 6,463 consecutive patients who underwent TAVR (n=2,130) or SAVR (n=4,333) with a bioprosthesis from 2008 to 2017. PVE was defined according to the modified Duke criteria. In this study, the incidence of PVE was 3.4/1,000 person-years after TAVR, and 2.9/1,000 person-years after SAVR. In competing risk analysis there was no significant difference in the risk of PVE between patients with TAVR and SAVR over an eight-year observational period. Male gender (HR 1.73, 95% CI: 1.04-2.89) and deep sternal wound infection or vascular access-site infection (HR 5.45, 95% CI: 2.24-13.2) were positively associated with PVE, but not type of procedure (HR 1.09, 95% CI: 0.59-2.01) in multivariate analysis. The mortality rate was 37.7% at one month and increased to 52.5% at one year. Surgical treatment was independently associated with decreased in-hospital mortality (HR 0.34, 95% CI: 0.21-0.61). CONCLUSIONS:PVE is rare, and its risk is similar after TAVR and SAVR. ClinicalTrials.gov Identifier: NCT03385915. https://clinicaltrials.gov/ct2/show/NCT03385915.",2019-08-09 +27664130,"The environment ontology in 2016: bridging domains with increased scope, semantic density, and interoperation.","

Background

The Environment Ontology (ENVO; http://www.environmentontology.org/ ), first described in 2013, is a resource and research target for the semantically controlled description of environmental entities. The ontology's initial aim was the representation of the biomes, environmental features, and environmental materials pertinent to genomic and microbiome-related investigations. However, the need for environmental semantics is common to a multitude of fields, and ENVO's use has steadily grown since its initial description. We have thus expanded, enhanced, and generalised the ontology to support its increasingly diverse applications.

Methods

We have updated our development suite to promote expressivity, consistency, and speed: we now develop ENVO in the Web Ontology Language (OWL) and employ templating methods to accelerate class creation. We have also taken steps to better align ENVO with the Open Biological and Biomedical Ontologies (OBO) Foundry principles and interoperate with existing OBO ontologies. Further, we applied text-mining approaches to extract habitat information from the Encyclopedia of Life and automatically create experimental habitat classes within ENVO.

Results

Relative to its state in 2013, ENVO's content, scope, and implementation have been enhanced and much of its existing content revised for improved semantic representation. ENVO now offers representations of habitats, environmental processes, anthropogenic environments, and entities relevant to environmental health initiatives and the global Sustainable Development Agenda for 2030. Several branches of ENVO have been used to incubate and seed new ontologies in previously unrepresented domains such as food and agronomy. The current release version of the ontology, in OWL format, is available at http://purl.obolibrary.org/obo/envo.owl .

Conclusions

ENVO has been shaped into an ontology which bridges multiple domains including biomedicine, natural and anthropogenic ecology, 'omics, and socioeconomic development. Through continued interactions with our users and partners, particularly those performing data archiving and sythesis, we anticipate that ENVO's growth will accelerate in 2017. As always, we invite further contributions and collaboration to advance the semantic representation of the environment, ranging from geographic features and environmental materials, across habitats and ecosystems, to everyday objects in household settings.",2016-09-23 +31008149,The GEDI Simulator: A Large-Footprint Waveform Lidar Simulator for Calibration and Validation of Spaceborne Missions.,"NASA's Global Ecosystem Dynamics Investigation (GEDI) is a spaceborne lidar mission which will produce near global (51.6°S to 51.6°N) maps of forest structure and above-ground biomass density during its 2-year mission. GEDI uses a waveform simulator for calibration of algorithms and assessing mission accuracy. This paper implements a waveform simulator, using the method proposed in Blair and Hofton (1999; https://doi.org/10.1029/1999GL010484), and builds upon that work by adding instrument noise and by validating simulated waveforms across a range of forest types, airborne laser scanning (ALS) instruments, and survey configurations. The simulator was validated by comparing waveform metrics derived from simulated waveforms against those derived from observed large-footprint, full-waveform lidar data from NASA's airborne Land, Vegetation, and Ice Sensor (LVIS). The simulator was found to produce waveform metrics with a mean bias of less than 0.22 m and a root-mean-square error of less than 5.7 m, as long as the ALS data had sufficient pulse density. The minimum pulse density required depended upon the instrument. Measurement errors due to instrument noise predicted by the simulator were within 1.5 m of those from observed waveforms and 70-85% of variance in measurement error was explained. Changing the ALS survey configuration had no significant impact on simulated metrics, suggesting that the ALS pulse density is a sufficient metric of simulator accuracy across the range of conditions and instruments tested. These results give confidence in the use of the simulator for the pre-launch calibration and performance assessment of the GEDI mission.",2019-02-27 +30649179,MULTiPly: a novel multi-layer predictor for discovering general and specific types of promoters.,"

Motivation

Promoters are short DNA consensus sequences that are localized proximal to the transcription start sites of genes, allowing transcription initiation of particular genes. However, the precise prediction of promoters remains a challenging task because individual promoters often differ from the consensus at one or more positions.

Results

In this study, we present a new multi-layer computational approach, called MULTiPly, for recognizing promoters and their specific types. MULTiPly took into account the sequences themselves, including both local information such as k-tuple nucleotide composition, dinucleotide-based auto covariance and global information of the entire samples based on bi-profile Bayes and k-nearest neighbour feature encodings. Specifically, the F-score feature selection method was applied to identify the best unique type of feature prediction results, in combination with other types of features that were subsequently added to further improve the prediction performance of MULTiPly. Benchmarking experiments on the benchmark dataset and comparisons with five state-of-the-art tools show that MULTiPly can achieve a better prediction performance on 5-fold cross-validation and jackknife tests. Moreover, the superiority of MULTiPly was also validated on a newly constructed independent test dataset. MULTiPly is expected to be used as a useful tool that will facilitate the discovery of both general and specific types of promoters in the post-genomic era.

Availability and implementation

The MULTiPly webserver and curated datasets are freely available at http://flagshipnt.erc.monash.edu/MULTiPly/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +31171447,"SynGO: An Evidence-Based, Expert-Curated Knowledge Base for the Synapse.","Synapses are fundamental information-processing units of the brain, and synaptic dysregulation is central to many brain disorders (""synaptopathies""). However, systematic annotation of synaptic genes and ontology of synaptic processes are currently lacking. We established SynGO, an interactive knowledge base that accumulates available research about synapse biology using Gene Ontology (GO) annotations to novel ontology terms: 87 synaptic locations and 179 synaptic processes. SynGO annotations are exclusively based on published, expert-curated evidence. Using 2,922 annotations for 1,112 genes, we show that synaptic genes are exceptionally well conserved and less tolerant to mutations than other genes. Many SynGO terms are significantly overrepresented among gene variations associated with intelligence, educational attainment, ADHD, autism, and bipolar disorder and among de novo variants associated with neurodevelopmental disorders, including schizophrenia. SynGO is a public, universal reference for synapse research and an online analysis platform for interpretation of large-scale -omics data (https://syngoportal.org and http://geneontology.org).",2019-06-03 +,Phylogenetic relationships of nonbiting midges in the subfamily Tanypodinae (Diptera: Chironomidae) inferred from morphology,"The nonbiting midge subfamily Tanypodinae represents one of the most diverse lineages of Chironomidae. Despite the wide distribution and high diversity of tanypodine chironomids, the evolutionary history of the subfamily remains poorly understood. Here, we present the first phylogenetic analysis of the subfamily Tanypodinae based on morphological data. Cladistic analyses were conducted using 86 morphological characters from 115 species belonging to 54 tanypodine genera, including the eight currently recognised tribes: Anatopyniini, Clinotanypodini, Coelopyniini, Macropelopiini, Natarsiini, Pentaneurini, Procladiini and Tanypodini. We use characters from fourth‐instar larvae, pupae and adults of both sexes. We examine the effects of implied weighting by reanalysing the data with varying values of concavity constant (k). Our analysis supports the monophyly of Tanypodinae with Podonominae as its sister group. All previously proposed tribes are recovered as monophyletic assemblages under a wide range of weighting factors. Under these conditions, the genus Fittkauimyia is the sister group of the remaining Macropelopiini and is erected as a new monobasic tribe, Fittkauimyiini trib.n. The tribe Pentaneurini is recovered as monophyletic with some internal relationships resolved. The genus Paramerina, recovered as sister of Reomyia + Zavrelimyia, is formally synonymised with Zavrelimyia syn.n., based on morphological similarity in all three life stages and treated as a subgenus of the latter. Finally, the recently suggested synonymies of Gressittius and Guassutanypus with Alotanypus and the establishment of the subgenera Conchapelopia (Helopelopia), Macropelopia (Bethbilbeckia), Monopelopia (Cantopelopia), Thienemannimyia (Hayesomyia) and Zavrelimyia (Reomyia and Schineriella) are investigated. Our results support all proposed changes, except for the subgenus‐level status of Helopelopia and Cantopelopia. We suggest re‐establishment of Helopelopia as a genus, but refrain from promoting genus‐level status of Cantopelopia at present because the apparent sister‐relationship between Monopelopia + Nilotanypus likely is due to wing vein reduction caused by miniaturisation. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:DF012C17‐AFB3‐4904‐83DC‐30DD94D0B376.",2016-01-01 +30714194,The Short-chain Dehydrogenase/Reductase Engineering Database (SDRED): A classification and analysis system for a highly diverse enzyme family.,"The Short-chain Dehydrogenases/Reductases Engineering Database (SDRED) covers one of the largest known protein families (168 150 proteins). Assignment to the superfamilies of Classical and Extended SDRs was achieved by global sequence similarity and by identification of family-specific sequence motifs. Two standard numbering schemes were established for Classical and Extended SDRs that allow for the determination of conserved amino acid residues, such as cofactor specificity determining positions or superfamily specific sequence motifs. The comprehensive sequence dataset of the SDRED facilitates the refinement of family-specific sequence motifs. The glycine-rich motifs for Classical and Extended SDRs were refined to improve the precision of superfamily classification. In each superfamily, the majority of sequences formed a tightly connected sequence network and belonged to a large homologous family. Despite their different sequence motifs and their different sequence length, the two sequence networks of Classical and Extended SDRs are not separate, but connected by edges at a threshold of 40% sequence similarity, indicating that all SDRs belong to a large, connected network. The SDRED is accessible at https://sdred.biocatnet.de/.",2019-02-25 +27387194,TBC2health: a database of experimentally validated health-beneficial effects of tea bioactive compounds.,"Tea is one of the most consumed beverages in the world. Considerable studies show the exceptional health benefits (e.g. antioxidation, cancer prevention) of tea owing to its various bioactive components. However, data from these extensively published papers had not been made available in a central database. To lay a foundation in improving the understanding of healthy tea functions, we established a TBC2health database that currently documents 1338 relationships between 497 tea bioactive compounds and 206 diseases (or phenotypes) manually culled from over 300 published articles. Each entry in TBC2health contains comprehensive information about a bioactive relationship that can be accessed in three aspects: (i) compound information, (ii) disease (or phenotype) information and (iii) evidence and reference. Using the curated bioactive relationships, a bipartite network was reconstructed and the corresponding network (or sub-network) visualization and topological analyses are provided for users. This database has a user-friendly interface for entry browse, search and download. In addition, TBC2health provides a submission page and several useful tools (e.g. BLAST, molecular docking) to facilitate use of the database. Consequently, TBC2health can serve as a valuable bioinformatics platform for the exploration of beneficial effects of tea on human health. TBC2health is freely available at http://camellia.ahau.edu.cn/TBC2health.",2017-09-01 +30474566,Quantitative characterization of the urine and serum metabolomes of children is essential for 'omics' studies.,"Understanding the long-term health impacts of the early-life exposome requires the characterization and assimilation of multi 'omics' data to ultimately link molecular changes to exposures. In this way, markers associated with negative health outcomes, such as increased disease risk, can be ascertained. However, determining the extent and direction of metabolic perturbations relies on comparisons to existing metabolomic reference profiles. While such resources are increasingly available for adult populations, analogous tools for children are decidedly lacking. Lau et al. have compiled robust, translatable quantitative metabolomics data on urine and serum samples for European children across six study locations. Metabolites were associated with body mass index, diet and demographics, and correlated within and between biofluids. As a result, a novel association between urinary 4-deoxyerythronic acid and body mass index was uncovered. This work serves as a crucial reference for future studies in exposomics, and - more broadly - represents a significant step forward for metabolomics by creating the foundation for a comprehensive reference metabolome for children.Please see related article: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-018-1190-8.",2018-11-26 +27635320,"CardioTF, a database of deconstructing transcriptional circuits in the heart system.","

Background

Information on cardiovascular gene transcription is fragmented and far behind the present requirements of the systems biology field. To create a comprehensive source of data for cardiovascular gene regulation and to facilitate a deeper understanding of genomic data, the CardioTF database was constructed. The purpose of this database is to collate information on cardiovascular transcription factors (TFs), position weight matrices (PWMs), and enhancer sequences discovered using the ChIP-seq method.

Methods

The Naïve-Bayes algorithm was used to classify literature and identify all PubMed abstracts on cardiovascular development. The natural language learning tool GNAT was then used to identify corresponding gene names embedded within these abstracts. Local Perl scripts were used to integrate and dump data from public databases into the MariaDB management system (MySQL). In-house R scripts were written to analyze and visualize the results.

Results

Known cardiovascular TFs from humans and human homologs from fly, Ciona, zebrafish, frog, chicken, and mouse were identified and deposited in the database. PWMs from Jaspar, hPDI, and UniPROBE databases were deposited in the database and can be retrieved using their corresponding TF names. Gene enhancer regions from various sources of ChIP-seq data were deposited into the database and were able to be visualized by graphical output. Besides biocuration, mouse homologs of the 81 core cardiac TFs were selected using a Naïve-Bayes approach and then by intersecting four independent data sources: RNA profiling, expert annotation, PubMed abstracts and phenotype.

Discussion

The CardioTF database can be used as a portal to construct transcriptional network of cardiac development.

Availability and implementation

Database URL: http://www.cardiosignal.org/database/cardiotf.html.",2016-08-23 +28053165,proGenomes: a resource for consistent functional and taxonomic annotations of prokaryotic genomes.,"The availability of microbial genomes has opened many new avenues of research within microbiology. This has been driven primarily by comparative genomics approaches, which rely on accurate and consistent characterization of genomic sequences. It is nevertheless difficult to obtain consistent taxonomic and integrated functional annotations for defined prokaryotic clades. Thus, we developed proGenomes, a resource that provides user-friendly access to currently 25 038 high-quality genomes whose sequences and consistent annotations can be retrieved individually or by taxonomic clade. These genomes are assigned to 5306 consistent and accurate taxonomic species clusters based on previously established methodology. proGenomes also contains functional information for almost 80 million protein-coding genes, including a comprehensive set of general annotations and more focused annotations for carbohydrate-active enzymes and antibiotic resistance genes. Additionally, broad habitat information is provided for many genomes. All genomes and associated information can be downloaded by user-selected clade or multiple habitat-specific sets of representative genomes. We expect that the availability of high-quality genomes with comprehensive functional annotations will promote advances in clinical microbial genomics, functional evolution and other subfields of microbiology. proGenomes is available at http://progenomes.embl.de.",2016-10-24 +31290545,A curated collection of transcriptome datasets to investigate the molecular mechanisms of immunoglobulin E-mediated atopic diseases. ,"Prevalence of allergies has reached ~20% of population in developed countries and sensitization rate to one or more allergens among school age children are approaching 50%. However, the combination of the complexity of atopic allergy susceptibility/development and environmental factors has made identification of gene biomarkers challenging. The amount of publicly accessible transcriptomic data presents an unprecedented opportunity for mechanistic discoveries and validation of complex disease signatures across studies. However, this necessitates structured methodologies and visual tools for the interpretation of results. Here, we present a curated collection of transcriptomic datasets relevant to immunoglobin E-mediated atopic diseases (ranging from allergies to primary immunodeficiencies). Thirty-three datasets from the Gene Expression Omnibus, encompassing 1860 transcriptome profiles, were made available on the Gene Expression Browser (GXB), an online and open-source web application that allows for the query, visualization and annotation of metadata. The thematic compositions, disease categories, sample number and platforms of the collection are described. Ranked gene lists and sample grouping are used to facilitate data visualization/interpretation and are available online via GXB (http://ige.gxbsidra.org/dm3/geneBrowser/list). Dataset validation using associated publications showed good concordance in GXB gene expression trend and fold-change.",2019-01-01 +27242038,GESDB: a platform of simulation resources for genetic epidemiology studies. ,"Computer simulations are routinely conducted to evaluate new statistical methods, to compare the properties among different methods, and to mimic the observed data in genetic epidemiology studies. Conducting simulation studies can become a complicated task as several challenges can occur, such as the selection of an appropriate simulation tool and the specification of parameters in the simulation model. Although abundant simulated data have been generated for human genetic research, currently there is no public database designed specifically as a repository for these simulated data. With the lack of such a database, for similar studies, similar simulations may have been repeated, which resulted in redundant work. Thus, we created an online platform, the Genetic Epidemiology Simulation Database (GESDB), for simulation data sharing and discussion of simulation techniques for genetic epidemiology studies. GESDB consists of a database for storing simulation scripts, simulated data and documentation from published articles as well as a discussion forum, which provides a platform for discussion of the simulated data and exchanging simulation ideas. Moreover, summary statistics such as the simulation tools that are most commonly used and datasets that are most frequently downloaded are provided. The statistics will be informative for researchers to choose an appropriate simulation tool or select a common dataset for method comparisons. GESDB can be accessed at http://gesdb.nhri.org.twDatabase URL: http://gesdb.nhri.org.tw.",2016-05-30 +30295728,DIGREM: an integrated web-based platform for detecting effective multi-drug combinations.,"

Motivation

Synergistic drug combinations are a promising approach to achieve a desirable therapeutic effect in complex diseases through the multi-target mechanism. However, in vivo screening of all possible multi-drug combinations remains cost-prohibitive. An effective and robust computational model to predict drug synergy in silico will greatly facilitate this process.

Results

We developed DIGREM (Drug-Induced Genomic Response models for identification of Effective Multi-drug combinations), an online tool kit that can effectively predict drug synergy. DIGREM integrates DIGRE, IUPUI_CCBB, gene set-based and correlation-based models for users to predict synergistic drug combinations with dose-response information and drug-treated gene expression profiles.

Availability and implementation

http://lce.biohpc.swmed.edu/drugcombination.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +22140110,PubChem's BioAssay Database.,"PubChem (http://pubchem.ncbi.nlm.nih.gov) is a public repository for biological activity data of small molecules and RNAi reagents. The mission of PubChem is to deliver free and easy access to all deposited data, and to provide intuitive data analysis tools. The PubChem BioAssay database currently contains 500,000 descriptions of assay protocols, covering 5000 protein targets, 30,000 gene targets and providing over 130 million bioactivity outcomes. PubChem's bioassay data are integrated into the NCBI Entrez information retrieval system, thus making PubChem data searchable and accessible by Entrez queries. Also, as a repository, PubChem constantly optimizes and develops its deposition system answering many demands of both high- and low-volume depositors. The PubChem information platform allows users to search, review and download bioassay description and data. The PubChem platform also enables researchers to collect, compare and analyze biological test results through web-based and programmatic tools. In this work, we provide an update for the PubChem BioAssay resource, including information content growth, data model extension and new developments of data submission, retrieval, analysis and download tools.",2011-12-02 +30373486,A Trauma-Informed Approach to Building College Students' Resilience.,"

Purpose

This paper describes the development of a new psychoeducational universal prevention resilience program ( https://strong.fsu.edu ) designed to complement existing mental health services at a large public university. The first set of descriptive data (n = 229) from the project's student surveys is discussed.

Methods

A voluntary and anonymous online questionnaire was used to determine student attitudes toward the new program.

Results

A large majority of participants (more than 80%) perceived the website to be credible, rating it as believable, trustworthy, and accurate. 90% believe the university resources included in the project would help themselves and others overcome struggles and challenges. Other results are also discussed.

Discussion

This data from a unique project shows the promise of using an online, integrative tool for a campus resilience initiative. The project is dynamic; analysis of student responses will inform ongoing revisions and refinements.",2018-10-29 +27787818,Predicting Protein Secondary Structure Using Consensus Data Mining (CDM) Based on Empirical Statistics and Evolutionary Information.,"Predicting the secondary structure of a protein from its sequence still remains a challenging problem. The prediction accuracies remain around 80 %, and for very diverse methods. Using evolutionary information and machine learning algorithms in particular has had the most impact. In this chapter, we will first define secondary structures, then we will review the Consensus Data Mining (CDM) technique based on the robust GOR algorithm and Fragment Database Mining (FDM) approach. GOR V is an empirical method utilizing a sliding window approach to model the secondary structural elements of a protein by making use of generalized evolutionary information. FDM uses data mining from experimental structure fragments, and is able to successfully predict the secondary structure of a protein by combining experimentally determined structural fragments based on sequence similarities of the fragments. The CDM method combines predictions from GOR V and FDM in a hierarchical manner to produce consensus predictions for secondary structure. In other words, if sequence fragment are not available, then it uses GOR V to make the secondary structure prediction. The online server of CDM is available at http://gor.bb.iastate.edu/cdm/ .",2017-01-01 +30545485,ProvCaRe: Characterizing scientific reproducibility of biomedical research studies using semantic provenance metadata.,"OBJECTIVE:Reproducibility of research studies is key to advancing biomedical science by building on sound results and reducing inconsistencies between published results and study data. We propose that the available data from research studies combined with provenance metadata provide a framework for evaluating scientific reproducibility. We developed the ProvCaRe platform to model, extract, and query semantic provenance information from 435, 248 published articles. METHODS:The ProvCaRe platform consists of: (1) the S3 model and a formal ontology; (2) a provenance-focused text processing workflow to generate provenance triples consisting of subject, predicate, and object using metadata extracted from articles; and (3) the ProvCaRe knowledge repository that supports ""provenance-aware"" hypothesis-driven search queries. A new provenance-based ranking algorithm is used to rank the articles in the search query results. RESULTS:The ProvCaRe knowledge repository contains 48.9 million provenance triples. Seven research hypotheses were used as search queries for evaluation and the resulting provenance triples were analyzed using five categories of provenance terms. The highest number of terms (34%) described provenance related to population cohort followed by 29% of terms describing statistical data analysis methods, and only 5% of the terms described the measurement instruments used in a study. In addition, the analysis showed that some articles included a higher number of provenance terms across multiple provenance categories suggesting a higher potential for reproducibility of these research studies. CONCLUSION:The ProvCaRe knowledge repository (https://provcare. CASE:edu/) is one of the largest provenance resources for biomedical research studies that combines intuitive search functionality with a new provenance-based ranking feature to list articles related to a search query.",2018-11-03 +29070035,SaVanT: a web-based tool for the sample-level visualization of molecular signatures in gene expression profiles.,"

Background

Molecular signatures are collections of genes characteristic of a particular cell type, tissue, disease, or perturbation. Signatures can also be used to interpret expression profiles generated from heterogeneous samples. Large collections of gene signatures have been previously developed and catalogued in the MSigDB database. In addition, several consortia and large-scale projects have systematically profiled broad collections of purified primary cells, molecular perturbations of cell types, and tissues from specific diseases, and the specificity and breadth of these datasets can be leveraged to create additional molecular signatures. However, to date there are few tools that allow the visualization of individual signatures across large numbers of expression profiles. Signature visualization of individual samples allows, for example, the identification of patient subcategories a priori on the basis of well-defined molecular signatures.

Result

Here, we generate and compile 10,985 signatures (636 newly-generated and 10,349 previously available from MSigDB) and provide a web-based Signature Visualization Tool (SaVanT; http://newpathways.mcdb.ucla.edu/savant ), to visualize these signatures in user-generated expression data. We show that using SaVanT, immune activation signatures can distinguish patients with different types of acute infections (influenza A and bacterial pneumonia). Furthermore, SaVanT is able to identify the prominent signatures within each patient group, and identify the primary cell types underlying different leukemias (acute myeloid and acute lymphoblastic) and skin disorders.

Conclusions

The development of SaVanT facilitates large-scale analysis of gene expression profiles on a patient-level basis to identify patient subphenotypes, or potential therapeutic target pathways.",2017-10-25 +,Tracking the blue: A MLST approach to characterise the Pseudomonas fluorescens group,"The Pseudomonas fluorescens group comprises several closely related species that are involved in food contamination and spoilage. Specifically, the interest in P. fluorescens as a spoiler of dairy products increased after the cases of “blue mozzarella” that occurred in Italy in 2010.A Multilocus Sequence Typing (MLST) scheme was developed and applied to characterise 136 isolates (reference strains and food borne isolates) at strain level, to reveal the genetic relationships among them and to disclose any possible genetic clustering of phenotypic markers involved in food spoilage (protease, lipase, lecithinase activities and pigmented or fluorescent molecule production). The production of dark blue diffusible pigment was evaluated on several bacterial culture media and directly on mozzarella cheese.The MLST scheme provided precise genotyping at the strain level, and the population analyses of the concatenated sequences allowed major taxa to be defined. This approach was revealed to be suitable for tracking the strains according to their origin, such as dairy plants or food matrices. The genetic analysis revealed the presence of a connection between the blue pigment production and a specific phylogenetic cluster. The development of the online database specific to the P. fluorescens group (http://pubmlst.org/pfluorescens) will facilitate the application of the scheme and the sharing of the data.",2014-05-01 +26956673,CoeViz: a web-based tool for coevolution analysis of protein residues.,"

Background

Proteins generally perform their function in a folded state. Residues forming an active site, whether it is a catalytic center or interaction interface, are frequently distant in a protein sequence. Hence, traditional sequence-based prediction methods focusing on a single residue (or a short window of residues) at a time may have difficulties in identifying and clustering the residues constituting a functional site, especially when a protein has multiple functions. Evolutionary information encoded in multiple sequence alignments is known to greatly improve sequence-based predictions. Identification of coevolving residues further advances the protein structure and function annotation by revealing cooperative pairs and higher order groupings of residues.

Results

We present a new web-based tool (CoeViz) that provides a versatile analysis and visualization of pairwise coevolution of amino acid residues. The tool computes three covariance metrics: mutual information, chi-square statistic, Pearson correlation, and one conservation metric: joint Shannon entropy. Implemented adjustments of covariance scores include phylogeny correction, corrections for sequence dissimilarity and alignment gaps, and the average product correction. Visualization of residue relationships is enhanced by hierarchical cluster trees, heat maps, circular diagrams, and the residue highlighting in protein sequence and 3D structure. Unlike other existing tools, CoeViz is not limited to analyzing conserved domains or protein families and can process long, unstructured and multi-domain proteins thousands of residues long. Two examples are provided to illustrate the use of the tool for identification of residues (1) involved in enzymatic function, (2) forming short linear functional motifs, and (3) constituting a structural domain.

Conclusions

CoeViz represents a practical resource for a quick sequence-based protein annotation for molecular biologists, e.g., for identifying putative functional clusters of residues and structural domains. CoeViz also can serve computational biologists as a resource of coevolution matrices, e.g., for developing machine learning-based prediction models. The presented tool is integrated in the POLYVIEW-2D server (http://polyview.cchmc.org/) and available from resulting pages of POLYVIEW-2D.",2016-03-08 +30069446,"Alternative Splicing Detection Tool-a novel PERL algorithm for sensitive detection of splicing events, based on next-generation sequencing data analysis.","Next-generation sequencing (NGS) can provide researchers with high impact information regarding alternative splice variants or transcript identifications. However, the enormous amount of data acquired from NGS platforms make the analysis of alternative splicing events hard to accomplish. For this reason, we designed the ""Alternative Splicing Detection Tool"" (ASDT), an algorithm that is capable of identifying alternative splicing events, including novel ones from high-throughput NGS data. ASDT is available as a PERL script at http://aias.biol.uoa.gr/~mtheo and can be executed on any system with PERL installed. In addition to the detection of annotated and novel alternative splicing events from high-throughput NGS data, ASDT can also analyze the intronic regions of genes, thus enabling the detection of novel cryptic exons residing in annotated introns, extensions of previously annotated exons, or even intron retentions. Consequently, ASDT demonstrates many innovative and unique features that can efficiently contribute to alternative splicing analysis of NGS data.",2018-06-01 +24501395,ANIA: ANnotation and Integrated Analysis of the 14-3-3 interactome.,"The dimeric 14-3-3 proteins dock onto pairs of phosphorylated Ser and Thr residues on hundreds of proteins, and thereby regulate many events in mammalian cells. To facilitate global analyses of these interactions, we developed a web resource named ANIA: ANnotation and Integrated Analysis of the 14-3-3 interactome, which integrates multiple data sets on 14-3-3-binding phosphoproteins. ANIA also pinpoints candidate 14-3-3-binding phosphosites using predictor algorithms, assisted by our recent discovery that the human 14-3-3-interactome is highly enriched in 2R-ohnologues. 2R-ohnologues are proteins in families of two to four, generated by two rounds of whole genome duplication at the origin of the vertebrate animals. ANIA identifies candidate 'lynchpins', which are 14-3-3-binding phosphosites that are conserved across members of a given 2R-ohnologue protein family. Other features of ANIA include a link to the catalogue of somatic mutations in cancer database to find cancer polymorphisms that map to 14-3-3-binding phosphosites, which would be expected to interfere with 14-3-3 interactions. We used ANIA to map known and candidate 14-3-3-binding enzymes within the 2R-ohnologue complement of the human kinome. Our projections indicate that 14-3-3s dock onto many more human kinases than has been realized. Guided by ANIA, PAK4, 6 and 7 (p21-activated kinases 4, 6 and 7) were experimentally validated as a 2R-ohnologue family of 14-3-3-binding phosphoproteins. PAK4 binding to 14-3-3 is stimulated by phorbol ester, and involves the 'lynchpin' site phosphoSer99 and a major contribution from Ser181. In contrast, PAK6 and PAK7 display strong phorbol ester-independent binding to 14-3-3, with Ser113 critical for the interaction with PAK6. These data point to differential 14-3-3 regulation of PAKs in control of cell morphology. Database URL: https://ania-1433.lifesci.dundee.ac.uk/prediction/webserver/index.py.",2014-02-05 +23794918,FORMIDABEL: The Belgian Ants Database.,"

Unlabelled

FORMIDABEL is a database of Belgian Ants containing more than 27.000 occurrence records. These records originate from collections, field sampling and literature. The database gives information on 76 native and 9 introduced ant species found in Belgium. The collection records originated mainly from the ants collection in Royal Belgian Institute of Natural Sciences (RBINS), the 'Gaspar' Ants collection in Gembloux and the zoological collection of the University of Liège (ULG). The oldest occurrences date back from May 1866, the most recent refer to August 2012. FORMIDABEL is a work in progress and the database is updated twice a year. THE LATEST VERSION OF THE DATASET IS PUBLICLY AND FREELY ACCESSIBLE THROUGH THIS URL: http://ipt.biodiversity.be/resource.do?r=formidabel. The dataset is also retrievable via the GBIF data portal through this link: http://data.gbif.org/datasets/resource/14697 A dedicated geo-portal, developed by the Belgian Biodiversity Platform is accessible at: http://www.formicidae-atlas.be

Purpose

FORMIDABEL is a joint cooperation of the Flemish ants working group ""Polyergus"" (http://formicidae.be) and the Wallonian ants working group ""FourmisWalBru"" (http://fourmiswalbru.be). The original database was created in 2002 in the context of the preliminary red data book of Flemish Ants (Dekoninck et al. 2003). Later, in 2005, data from the Southern part of Belgium; Wallonia and Brussels were added. In 2012 this dataset was again updated for the creation of the first Belgian Ants Atlas (Figure 1) (Dekoninck et al. 2012). The main purpose of this atlas was to generate maps for all outdoor-living ant species in Belgium using an overlay of the standard Belgian ecoregions. By using this overlay for most species, we can discern a clear and often restricted distribution pattern in Belgium, mainly based on vegetation and soil types.",2013-06-03 +31296220,Pathway analysis of rare variants for the clustered phenotypes by using hierarchical structured components analysis.,"

Backgrounds

Recent large-scale genetic studies often involve clustered phenotypes such as repeated measurements. Compared to a series of univariate analyses of single phenotypes, an analysis of clustered phenotypes can be useful for substantially increasing statistical power to detect more genetic associations. Moreover, for the analysis of rare variants, incorporation of biological information can boost weak effects of the rare variants.

Results

Through simulation studies, we showed that the proposed method outperforms other method currently available for pathway-level analysis of clustered phenotypes. Moreover, a real data analysis using a large-scale whole exome sequencing dataset of 995 samples with metabolic syndrome-related phenotypes successfully identified the glyoxylate and dicarboxylate metabolism pathway that could not be identified by the univariate analyses of single phenotypes and other existing method.

Conclusion

In this paper, we introduced a novel pathway-level association test by combining hierarchical structured components analysis and penalized generalized estimating equations. The proposed method analyzes all pathways in a single unified model while considering their correlations. C/C++ implementation of PHARAOH-GEE is publicly available at http://statgen.snu.ac.kr/software/pharaoh-gee/ .",2019-07-11 +28155707,Prioritizing biological pathways by recognizing context in time-series gene expression data.,"

Background

The primary goal of pathway analysis using transcriptome data is to find significantly perturbed pathways. However, pathway analysis is not always successful in identifying pathways that are truly relevant to the context under study. A major reason for this difficulty is that a single gene is involved in multiple pathways. In the KEGG pathway database, there are 146 genes, each of which is involved in more than 20 pathways. Thus activation of even a single gene will result in activation of many pathways. This complex relationship often makes the pathway analysis very difficult. While we need much more powerful pathway analysis methods, a readily available alternative way is to incorporate the literature information.

Results

In this study, we propose a novel approach for prioritizing pathways by combining results from both pathway analysis tools and literature information. The basic idea is as follows. Whenever there are enough articles that provide evidence on which pathways are relevant to the context, we can be assured that the pathways are indeed related to the context, which is termed as relevance in this paper. However, if there are few or no articles reported, then we should rely on the results from the pathway analysis tools, which is termed as significance in this paper. We realized this concept as an algorithm by introducing Context Score and Impact Score and then combining the two into a single score. Our method ranked truly relevant pathways significantly higher than existing pathway analysis tools in experiments with two data sets.

Conclusions

Our novel framework was implemented as ContextTRAP by utilizing two existing tools, TRAP and BEST. ContextTRAP will be a useful tool for the pathway based analysis of gene expression data since the user can specify the context of the biological experiment in a set of keywords. The web version of ContextTRAP is available at http://biohealth.snu.ac.kr/software/contextTRAP .",2016-12-23 +31294886,A deep dense inception network for protein beta-turn prediction.,"Beta-turn prediction is useful in protein function studies and experimental design. Although recent approaches using machine-learning techniques such as support vector machine (SVM), neural networks, and K nearest neighbor have achieved good results for beta-turn prediction, there is still significant room for improvement. As previous predictors utilized features in a sliding window of 4-20 residues to capture interactions among sequentially neighboring residues, such feature engineering may result in incomplete or biased features and neglect interactions among long-range residues. Deep neural networks provide a new opportunity to address these issues. Here, we proposed a deep dense inception network (DeepDIN) for beta-turn prediction, which takes advantage of the state-of-the-art deep neural network design of dense networks and inception networks. A test on a recent BT6376 benchmark data set shows that DeepDIN outperformed the previous best tool BetaTPred3 significantly in both the overall prediction accuracy and the nine-type beta-turn classification accuracy. A tool, called MUFold-BetaTurn, was developed, which is the first beta-turn prediction tool utilizing deep neural networks. The tool can be downloaded at http://dslsrv8.cs.missouri.edu/~cf797/MUFoldBetaTurn/download.html.",2019-07-23 +26076414,[Guidelines 2.0: systematic development of a comprehensive checklist for a successful guideline enterprise].,"

Introduction

Guideline developers worldwide are struggling with the lack of guidance for the practical steps in the guideline enterprise. Our objective was to systematically compile a comprehensive checklist of items linked to relevant resources and tools that guideline developers would consider for development and support of implementation.

Methods

Data sources included manuals of international guideline developers, literature on guidelines for guidelines with a focus on international and national guideline agencies, professional societies, and recent systematic guidance articles. We reviewed these sources in duplicate, extracted items using a sensitive approach and developed overarching topics that are relevant to guidelines. In an iterative process, we reviewed items for duplication and omissions and involved experts in guideline development for revisions.

Results

We developed a checklist with 18 topics and 146 items and a webpage to facilitate its use by guideline developers (http://cebgrade.mcmaster.ca/guidecheck.html). The topics and items included cover all stages of the guideline enterprise, from planning to formulating recommendations, to dissemination and evaluation. The final itemized guideline development checklist (GDC) includes links to training material and resources for methodology.

Conclusions

The GDC will serve as a resource for those involved in guideline development and we will use crowdsourcing to keep the checklist up to date and enhance it.",2015-06-01 +26296678,Full-length de novo assembly of RNA-seq data in pea (Pisum sativum L.) provides a gene expression atlas and gives insights into root nodulation in this species.,"Next-generation sequencing technologies allow an almost exhaustive survey of the transcriptome, even in species with no available genome sequence. To produce a Unigene set representing most of the expressed genes of pea, 20 cDNA libraries produced from various plant tissues harvested at various developmental stages from plants grown under contrasting nitrogen conditions were sequenced. Around one billion reads and 100 Gb of sequence were de novo assembled. Following several steps of redundancy reduction, 46 099 contigs with N50 length of 1667 nt were identified. These constitute the 'Caméor' Unigene set. The high depth of sequencing allowed identification of rare transcripts and detected expression for approximately 80% of contigs in each library. The Unigene set is now available online (http://bios.dijon.inra.fr/FATAL/cgi/pscam.cgi), allowing (i) searches for pea orthologs of candidate genes based on gene sequences from other species, or based on annotation, (ii) determination of transcript expression patterns using various metrics, (iii) identification of uncharacterized genes with interesting patterns of expression, and (iv) comparison of gene ontology pathways between tissues. This resource has allowed identification of the pea orthologs of major nodulation genes characterized in recent years in model species, as a major step towards deciphering unresolved pea nodulation phenotypes. In addition to a remarkable conservation of the early transcriptome nodulation apparatus between pea and Medicago truncatula, some specific features were highlighted. The resource provides a reference for the pea exome, and will facilitate transcriptome and proteome approaches as well as SNP discovery in pea.",2015-10-01 +25889572,mBISON: Finding miRNA target over-representation in gene lists from ChIP-sequencing data.,"

Background

Over-representation of predicted miRNA targets in sets of genes regulated by a given transcription factor (e.g. as defined by ChIP-sequencing experiments) helps to identify biologically relevant miRNA targets and is useful to get insight into post-transcriptional regulation.

Findings

To facilitate the application of this approach we have created the mBISON web-application. mBISON calculates the significance of over-representation of miRNA targets in a given non-ranked gene set. The gene set can be specified either by a list of genes or by one or more ChIP-seq datasets followed by a user-defined peak-gene association procedure. mBISON is based on predictions from TargetScan and uses a randomization step to calculate False-Discovery-Rates for each miRNA, including a correction for gene set specific properties such as 3'UTR length. The tool can be accessed from the following web-resource: http://cbdm.mdc-berlin.de/~mgebhardt/cgi-bin/mbison/home .

Conclusion

mBISON is a web-application that helps to extract functional information about miRNAs from gene lists, which is in contrast to comparable applications easy to use by everyone and can be applied on ChIP-seq data directly.",2015-04-16 +30730766,Decision Support Systems in Oncology.,"Precision medicine is the future of health care: please watch the animation at https://vimeo.com/241154708 . As a technology-intensive and -dependent medical discipline, oncology will be at the vanguard of this impending change. However, to bring about precision medicine, a fundamental conundrum must be solved: Human cognitive capacity, typically constrained to five variables for decision making in the context of the increasing number of available biomarkers and therapeutic options, is a limiting factor to the realization of precision medicine. Given this level of complexity and the restriction of human decision making, current methods are untenable. A solution to this challenge is multifactorial decision support systems (DSSs), continuously learning artificial intelligence platforms that integrate all available data-clinical, imaging, biologic, genetic, cost-to produce validated predictive models. DSSs compare the personalized probable outcomes-toxicity, tumor control, quality of life, cost effectiveness-of various care pathway decisions to ensure optimal efficacy and economy. DSSs can be integrated into the workflows both strategically (at the multidisciplinary tumor board level to support treatment choice, eg, surgery or radiotherapy) and tactically (at the specialist level to support treatment technique, eg, prostate spacer or not). In some countries, the reimbursement of certain treatments, such as proton therapy, is already conditional on the basis that a DSS is used. DSSs have many stakeholders-clinicians, medical directors, medical insurers, patient advocacy groups-and are a natural consequence of big data in health care. Here, we provide an overview of DSSs, their challenges, opportunities, and capacity to improve clinical decision making, with an emphasis on the utility in oncology.",2019-02-01 +25326239,DEOP: a database on osmoprotectants and associated pathways. ,"Microorganisms are known to counteract salt stress through salt influx or by the accumulation of osmoprotectants (also called compatible solutes). Understanding the pathways that synthesize and/or breakdown these osmoprotectants is of interest to studies of crops halotolerance and to biotechnology applications that use microbes as cell factories for production of biomass or commercial chemicals. To facilitate the exploration of osmoprotectants, we have developed the first online resource, 'Dragon Explorer of Osmoprotection associated Pathways' (DEOP) that gathers and presents curated information about osmoprotectants, complemented by information about reactions and pathways that use or affect them. A combined total of 141 compounds were confirmed osmoprotectants, which were matched to 1883 reactions and 834 pathways. DEOP can also be used to map genes or microbial genomes to potential osmoprotection-associated pathways, and thus link genes and genomes to other associated osmoprotection information. Moreover, DEOP provides a text-mining utility to search deeper into the scientific literature for supporting evidence or for new associations of osmoprotectants to pathways, reactions, enzymes, genes or organisms. Two case studies are provided to demonstrate the usefulness of DEOP. The system can be accessed at. Database URL: http://www.cbrc.kaust.edu.sa/deop/",2014-10-17 +26467875,Utilizing the PCICS Nursing Guidelines in Managing the CICU Patient.,"The Pediatric Cardiac Intensive Care Society (PCICS) Nursing Guidelines were developed to provide an evidence-based resource for bedside cardiac intensive care unit nursing care. Guideline topics include postoperative care, hemodynamic monitoring, arrhythmia management, and nutrition. These evidence-based care guidelines were presented at the 10th International Meeting of PCICS and have been utilized in the preparation of this article. They can be accessed at http://www.pcics.org/resources/pediatric-neonatal/. Utilization of these guidelines in practice is illustrated for single ventricle stage 1 palliation, Fontan operation, truncus arteriosus, and atrioventricular septal defect.",2015-10-01 +26339475,SwissPalm: Protein Palmitoylation database.,"Protein S-palmitoylation is a reversible post-translational modification that regulates many key biological processes, although the full extent and functions of protein S-palmitoylation remain largely unexplored. Recent developments of new chemical methods have allowed the establishment of palmitoyl-proteomes of a variety of cell lines and tissues from different species.  As the amount of information generated by these high-throughput studies is increasing, the field requires centralization and comparison of this information. Here we present SwissPalm ( http://swisspalm.epfl.ch), our open, comprehensive, manually curated resource to study protein S-palmitoylation. It currently encompasses more than 5000 S-palmitoylated protein hits from seven species, and contains more than 500 specific sites of S-palmitoylation. SwissPalm also provides curated information and filters that increase the confidence in true positive hits, and integrates predictions of S-palmitoylated cysteine scores, orthologs and isoform multiple alignments. Systems analysis of the palmitoyl-proteome screens indicate that 10% or more of the human proteome is susceptible to S-palmitoylation. Moreover, ontology and pathway analyses of the human palmitoyl-proteome reveal that key biological functions involve this reversible lipid modification. Comparative analysis finally shows a strong crosstalk between S-palmitoylation and other post-translational modifications. Through the compilation of data and continuous updates, SwissPalm will provide a powerful tool to unravel the global importance of protein S-palmitoylation.",2015-07-16 +25740460,PhytoREF: a reference database of the plastidial 16S rRNA gene of photosynthetic eukaryotes with curated taxonomy.,"Photosynthetic eukaryotes have a critical role as the main producers in most ecosystems of the biosphere. The ongoing environmental metabarcoding revolution opens the perspective for holistic ecosystems biological studies of these organisms, in particular the unicellular microalgae that often lack distinctive morphological characters and have complex life cycles. To interpret environmental sequences, metabarcoding necessarily relies on taxonomically curated databases containing reference sequences of the targeted gene (or barcode) from identified organisms. To date, no such reference framework exists for photosynthetic eukaryotes. In this study, we built the PhytoREF database that contains 6490 plastidial 16S rDNA reference sequences that originate from a large diversity of eukaryotes representing all known major photosynthetic lineages. We compiled 3333 amplicon sequences available from public databases and 879 sequences extracted from plastidial genomes, and generated 411 novel sequences from cultured marine microalgal strains belonging to different eukaryotic lineages. A total of 1867 environmental Sanger 16S rDNA sequences were also included in the database. Stringent quality filtering and a phylogeny-based taxonomic classification were applied for each 16S rDNA sequence. The database mainly focuses on marine microalgae, but sequences from land plants (representing half of the PhytoREF sequences) and freshwater taxa were also included to broaden the applicability of PhytoREF to different aquatic and terrestrial habitats. PhytoREF, accessible via a web interface (http://phytoref.fr), is a new resource in molecular ecology to foster the discovery, assessment and monitoring of the diversity of photosynthetic eukaryotes using high-throughput sequencing.",2015-04-06 +30827665,"Cloning, expression and characterization of a novel chitosanase from Streptomyces albolongus ATCC 27414.","A gene encoding chitosanase from Streptomyces albolongus was cloned, sequenced and expressed in Escherichia coli. The novel recombinant enzyme (Csn21c) was purified by Ni-NTA Superflow Column and showed a molecular mass of 29.6 kDa by SDS-PAGE. The enzyme Csn21c showed the optimal activity in 50 mmol/L Tris-HCl buffer, pH 8.0, and 50 °C and it was strongly activated (2-fold) by Mn2+. It belonged to glycoside hydrolase 46 family according to NCBI database (http://www.ncbi.nlm.nih.gov/) and displayed an exo-type cleavage pattern, hydrolyzing chitosan mainly into d-glucosamine (GlcN) and chitobiose ((GlcN)2) as confirmed by TLC and MS analysis. This study demonstrated that Csn21c can be an effective tool to produce abundant glucosamine and chitooligosaccharides (COS) from chitosan.",2019-02-20 +29506200,Atlas of the normal intracranial electroencephalogram: neurophysiological awake activity in different cortical areas.,"In contrast to scalp EEG, our knowledge of the normal physiological intracranial EEG activity is scarce. This multicentre study provides an atlas of normal intracranial EEG of the human brain during wakefulness. Here we present the results of power spectra analysis during wakefulness. Intracranial electrodes are placed in or on the brain of epilepsy patients when candidates for surgical treatment and non-invasive approaches failed to sufficiently localize the epileptic focus. Electrode contacts are usually in cortical regions showing epileptic activity, but some are placed in normal regions, at distance from the epileptogenic zone or lesion. Intracranial EEG channels defined using strict criteria as very likely to be in healthy brain regions were selected from three tertiary epilepsy centres. All contacts were localized in a common stereotactic space allowing the accumulation and superposition of results from many subjects. Sixty-second artefact-free sections during wakefulness were selected. Power spectra were calculated for 38 brain regions, and compared to a set of channels with no spectral peaks in order to identify significant peaks in the different regions. A total of 1785 channels with normal brain activity from 106 patients were identified. There were on average 2.7 channels per cm3 of cortical grey matter. The number of contacts per brain region averaged 47 (range 6-178). We found significant differences in the spectral density distributions across the different brain lobes, with beta activity in the frontal lobe (20-24 Hz), a clear alpha peak in the occipital lobe (9.25-10.25 Hz), intermediate alpha (8.25-9.25 Hz) and beta (17-20 Hz) frequencies in the parietal lobe, and lower alpha (7.75-8.25 Hz) and delta (0.75-2.25 Hz) peaks in the temporal lobe. Some cortical regions showed a specific electrophysiological signature: peaks present in >60% of channels were found in the precentral gyrus (lateral: peak frequency range, 20-24 Hz; mesial: 24-30 Hz), opercular part of the inferior frontal gyrus (20-24 Hz), cuneus (7.75-8.75 Hz), and hippocampus (0.75-1.25 Hz). Eight per cent of all analysed channels had more than one spectral peak; these channels were mostly recording from sensory and motor regions. Alpha activity was not present throughout the occipital lobe, and some cortical regions showed peaks in delta activity during wakefulness. This is the first atlas of normal intracranial EEG activity; it includes dense coverage of all cortical regions in a common stereotactic space, enabling direct comparisons of EEG across subjects. This atlas provides a normative baseline against which clinical EEGs and experimental results can be compared. It is provided as an open web resource (https://mni-open-ieegatlas.

Research

mcgill.ca).",2018-04-01 +22718786,ChromoHub: a data hub for navigators of chromatin-mediated signalling.,"

Unlabelled

The rapidly increasing research activity focused on chromatin-mediated regulation of epigenetic mechanisms is generating waves of data on writers, readers and erasers of the histone code, such as protein methyltransferases, bromodomains or histone deacetylases. To make these data easily accessible to communities of research scientists coming from diverse horizons, we have created ChromoHub, an online resource where users can map on phylogenetic trees disease associations, protein structures, chemical inhibitors, histone substrates, chromosomal aberrations and other types of data extracted from public repositories and the published literature. The interface can be used to define the structural or chemical coverage of a protein family, highlight domain architectures, interrogate disease relevance or zoom in on specific genes for more detailed information. This open-access resource should serve as a hub for cell biologists, medicinal chemists, structural biologists and other navigators that explore the biology of chromatin signalling.

Availability

http://www.thesgc.org/chromohub/.",2012-06-19 +30873204,Natural Selection Equally Supports the Human Tendencies in Subordination and Domination: A Genome-Wide Study With in silico Confirmation and in vivo Validation in Mice.,"We proposed the following heuristic decision-making rule: ""IF {an excess of a protein relating to the nervous system is an experimentally known physiological marker of low pain sensitivity, fast postinjury recovery, or aggressive, risk/novelty-seeking, anesthetic-like, or similar agonistic-intolerant behavior} AND IF {a single nucleotide polymorphism (SNP) causes overexpression of the gene encoding this protein} THEN {this SNP can be a SNP marker of the tendency in dominance} WHILE {underexpression corresponds to subordination} AND vice versa."" Using this decision-making rule, we analyzed 231 human genes of neuropeptidergic, non-neuropeptidergic, and neurotrophinergic systems that encode neurotrophic and growth factors, interleukins, neurotransmitters, receptors, transporters, and enzymes. These proteins are known as key factors of human social behavior. We analyzed all the 5,052 SNPs within the 70 bp promoter region upstream of the position where the protein-coding transcript starts, which were retrieved from databases Ensembl and dbSNP using our previously created public Web service SNP_TATA_Comparator (http://beehive.bionet.nsc.ru/cgi-bin/mgs/tatascan/start.pl). This definition of the promoter region includes all TATA-binding protein (TBP)-binding sites. A total of 556 and 552 candidate SNP markers contributing to the dominance and the subordination, respectively, were uncovered. On this basis, we determined that 231 human genes under study are subject to natural selection against underexpression (significance p < 0.0005), which equally supports the human tendencies in domination and subordination such as the norm of a reaction (plasticity) of the human social hierarchy. These findings explain vertical transmission of domination and subordination traits previously observed in rodent models. Thus, the results of this study equally support both sides of the century-old unsettled scientific debate on whether both aggressiveness and the social hierarchy among humans are inherited (as suggested by Freud and Lorenz) or are due to non-genetic social education, when the children are influenced by older individuals across generations (as proposed by Berkowitz and Fromm).",2019-02-20 +27801297,Path2enet: generation of human pathway-derived networks in an expression specific context.,"

Background

Biological pathways are subsets of the complex biomolecular wiring that occur in living cells. They are usually rationalized and depicted in cartoon maps or charts to show them in a friendly visible way. Despite these efforts to present biological pathways, the current progress of bioinformatics indicates that translation of pathways in networks can be a very useful approach to achieve a computer-based view of the complex processes and interactions that occurr in a living system.

Results

We have developed a bioinformatic tool called Path2enet that provides a translation of biological pathways in protein networks integrating several layers of information about the biomolecular nodes in a multiplex view. Path2enet is an R package that reads the relations and links between proteins stored in a comprehensive database of biological pathways, KEGG (Kyoto Encyclopedia of Genes and Genomes, http://www.genome.jp/kegg/ ), and integrates them with expression data from various resources and with data on protein-protein physical interactions. Path2enet tool uses the expression data to determine if a given protein in a network (i.e., a node) is active (ON) or inactive (OFF) in a specific cellular context or sample type. In this way, Path2enet reduces the complexity of the networks and reveals the proteins that are active (expressed) under specific conditions. As a proof of concept, this work presents a practical ""case of use"" generating the pathway-expression-networks corresponding to the NOTCH Signaling Pathway in human B- and T-lymphocytes. This case is produced by the analysis and integration in Path2enet of an experimental dataset of genome-wide expression microarrays produced with these cell types (i.e., B cells and T cells).

Conclusions

Path2enet is an open source and open access tool that allows the construction of pathway-expression-networks, reading and integrating the information from biological pathways, protein interactions and gene expression cell specific data. The development of this type of tools aims to provide a more integrative and global view of the links and associations that exist between the proteins working in specific cellular systems.",2016-10-25 +28472422,PharmMapper 2017 update: a web server for potential drug target identification with a comprehensive target pharmacophore database.,"The PharmMapper online tool is a web server for potential drug target identification by reversed pharmacophore matching the query compound against an in-house pharmacophore model database. The original version of PharmMapper includes more than 7000 target pharmacophores derived from complex crystal structures with corresponding protein target annotations. In this article, we present a new version of the PharmMapper web server, of which the backend pharmacophore database is six times larger than the earlier one, with a total of 23 236 proteins covering 16 159 druggable pharmacophore models and 51 431 ligandable pharmacophore models. The expanded target data cover 450 indications and 4800 molecular functions compared to 110 indications and 349 molecular functions in our last update. In addition, the new web server is united with the statistically meaningful ranking of the identified drug targets, which is achieved through the use of standard scores. It also features an improved user interface. The proposed web server is freely available at http://lilab.ecust.edu.cn/pharmmapper/.",2017-07-01 +30782615,Stand by me(mory): Chronic infection diminishes memory pool via IL-6/STAT1.,"Despite great efforts to eradicate chronic viral infections, they still remain a global health problem. In this issue, Barnstorf et al. (2019. J. Exp. Med. https://doi.org/10.1084/jem.20181589) show that virus-unspecific bystander memory T cells are highly affected during chronic viral infection via IL-6/STAT1. Bystander memory T cells are strongly decimated in numbers and change in phenotype and function during chronic viral infection. These data provide new explanations for immune-mediated problems during chronic virus infections.",2019-02-19 +29126286,Deep learning for tumor classification in imaging mass spectrometry.,"

Motivation

Tumor classification using imaging mass spectrometry (IMS) data has a high potential for future applications in pathology. Due to the complexity and size of the data, automated feature extraction and classification steps are required to fully process the data. Since mass spectra exhibit certain structural similarities to image data, deep learning may offer a promising strategy for classification of IMS data as it has been successfully applied to image classification.

Results

Methodologically, we propose an adapted architecture based on deep convolutional networks to handle the characteristics of mass spectrometry data, as well as a strategy to interpret the learned model in the spectral domain based on a sensitivity analysis. The proposed methods are evaluated on two algorithmically challenging tumor classification tasks and compared to a baseline approach. Competitiveness of the proposed methods is shown on both tasks by studying the performance via cross-validation. Moreover, the learned models are analyzed by the proposed sensitivity analysis revealing biologically plausible effects as well as confounding factors of the considered tasks. Thus, this study may serve as a starting point for further development of deep learning approaches in IMS classification tasks.

Availability and implementation

https://gitlab.informatik.uni-bremen.de/digipath/Deep_Learning_for_Tumor_Classification_in_IMS.

Contact

jbehrmann@uni-bremen.de or christianetmann@uni-bremen.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-04-01 +30463894,Predicting Antimicrobial Resistance and Associated Genomic Features from Whole-Genome Sequencing. ,"Thanks to the genomics revolution, thousands of strain-specific whole-genome sequences are now accessible for a wide range of pathogenic bacteria. This availability enables big data informatics approaches to be used to study the spread and acquisition of antimicrobial resistance (AMR). In this issue of the Journal of Clinical Microbiology, Nguyen et al. (M. Nguyen, S. W. Long, P. F. McDermott, R. J. Olsen, R. Olson, R. L. Stevens, G. H. Tyson, S. Zhao, and J. J. Davis, J Clin Microbiol 57:e01260-18, 2019, https://doi.org/10.1128/JCM.01260-18) report the results obtained with their machine learning models based on whole-genome sequencing data to predict the MICs of antibiotics for 5,728 nontyphoidal Salmonella genomes collected over 15 years in the United States. Their major finding demonstrates that MICs can be predicted with an average accuracy of 95% within ±1 2-fold dilution step (confidence interval, 95% to 95%), an average very major error rate of 2.7%, and an average major error rate of 0.1%. Importantly, these models predict MICs with no a priori information about the underlying gene content or resistance phenotypes of the strains, enabling the possibility to identify AMR determinants and rapidly diagnose and prioritize antibiotic use directly from the organism sequence. Employing such tools to diagnose and limit the spread of resistance-conferring mechanisms could help ameliorate the looming antibiotic resistance crisis.",2019-01-30 +21300622,FunSecKB: the Fungal Secretome KnowledgeBase.,"The Fungal Secretome KnowledgeBase (FunSecKB) provides a resource of secreted fungal proteins, i.e. secretomes, identified from all available fungal protein data in the NCBI RefSeq database. The secreted proteins were identified using a well evaluated computational protocol which includes SignalP, WolfPsort and Phobius for signal peptide or subcellular location prediction, TMHMM for identifying membrane proteins, and PS-Scan for identifying endoplasmic reticulum (ER) target proteins. The entries were mapped to the UniProt database and any annotations of subcellular locations that were either manually curated or computationally predicted were included in FunSecKB. Using a web-based user interface, the database is searchable, browsable and downloadable by using NCBI's RefSeq accession or gi number, UniProt accession number, keyword or by species. A BLAST utility was integrated to allow users to query the database by sequence similarity. A user submission tool was implemented to support community annotation of subcellular locations of fungal proteins. With the complete fungal data from RefSeq and associated web-based tools, FunSecKB will be a valuable resource for exploring the potential applications of fungal secreted proteins. Database URL: http://proteomics.ysu.edu/secretomes/fungi.php.",2011-02-06 +28212856,"ProBiS tools (algorithm, database, and web servers) for predicting and modeling of biologically interesting proteins.","ProBiS (Protein Binding Sites) Tools consist of algorithm, database, and web servers for prediction of binding sites and protein ligands based on the detection of structurally similar binding sites in the Protein Data Bank. In this article, we review the operations that ProBiS Tools perform, provide comments on the evolution of the tools, and give some implementation details. We review some of its applications to biologically interesting proteins. ProBiS Tools are freely available at http://probis.cmm.ki.si and http://probis.nih.gov.",2017-02-15 +31828184,Dietary acrylamide exposure in F344 rats and colon tumor-bearing nude nu/nu mice: Dataset of gene expression of cancer pathway targets and methylation status of tumor suppressor genes in colon mucosae and tumors.,"Dietary acrylamide, a thermally induced food contaminant, at a level (2 mg/kg diet) typifying higher occurrence in certain food products - is neither an independent carcinogen nor a tumor promoter in the colon. This is evidenced by our previous studies using the medium-term azoxymethane (AOM)-induced colon tumorigenesis assay in F344 rats and the human colon tumor xenograft model in athymic nude (nu/nu) mice (https://doi.org/10.1371/journal.pone.0073916) [1]. In addition, we found that acrylamide may act as a colon co-carcinogen in association with a known carcinogen (AOM) in F344 rats. Furthermore, exposure to acrylamide at 2 mg/kg in the diet was not associated with any toxicologically relevant changes in clinical biochemistry, hematology, and apical endpoints in healthy rats (exposed only to saline injections) (https://doi.org/10.1016/j.toxrep.2016.08.010) [2]. Here we report data from our previous investigation [1] on gene expression of cancer pathway targets as well as the methylation status of select tumor suppressor genes. Briefly, mRNA and DNA were extracted from (a) colon mucosae and tumors from F344 rats exposed to AOM or saline and (b) athymic nude (nu/nu) mice bearing human colon tumor xenografts, both exposed to dietary acrylamide at concentrations of 0 or 2 mg/kg diet for 20 and 4 weeks, respectively. RT2 Profiler PCR Cancer PathwayFinder Arrays (Qiagen) and EpiTect Methyl II DNA Restriction kits and PCR Assays (Qiagen) were used to detect cancer-relevant gene expression (84 genes representing 9 pathways) and the methylation status of the CpG islands associated with 22 tumor suppressor genes in colon mucosae, tumors and xenografts. Additionally, RT2 Profiler PCR Arrays (Qiagen) for cell cycle regulation, growth factors, inflammatory cytokines and receptors, and inflammatory response and autoimmunity were used to investigate the gene expression (84 genes in each array) of targets involved in these select cellular pathways in the colon mucosae from AOM-treated F344 rats.",2019-11-07 +27698587,Flora-On: Occurrence data of the vascular flora of mainland Portugal.,"The Flora-On dataset currently includes 253,310 occurrence records for the class Embryopsidae (vascular plants), comprising data collated via the platform http://flora-on.pt/ relating to observation records of vascular plants across mainland Portugal. Observations are uploaded directly to the database primarily by experienced botanists and naturalists, typically on a weekly basis, and consist of geo-referenced data points for species (or infraspecific taxa) along with their date of observation and phenological state. The Flora-On project aims to compile and make publicly accessible chorological, ecological, morphological and photographic information for the entire vascular flora of Portugal. The project's website offers powerful query and visualization capabilities, of which we highlight the probabilistic bioclimatic and phenological queries which operate based on the empirical density distributions of species in those variables. Flora-On was created and continues to be maintained by volunteers who are Associate members of Sociedade Portuguesa de Botânica (Botanical Society of Portugal). Given its focus on research-grade and current data, the Flora-On project represents a significant contribution to the knowledge of the present distribution and status of the Portuguese flora.",2016-09-09 +31617466,Inferring disease and pathway associations of long non-coding RNAs using heterogeneous information network model.,"Recent findings from biological experiments demonstrate that long non-coding RNAs (lncRNAs) are actively involved in critical cellular processes and are associated with innumerable diseases. Computational prediction of lncRNA-disease association draws tremendous research attention nowadays. This paper proposes a machine learning model that predicts lncRNA-disease associations using Heterogeneous Information Network (HIN) of lncRNAs and diseases. A Support Vector Machine classifier is developed using the feature set extracted from a meta-path-based parameter, Association Index derived from the HIN. Performance of the model is validated using standard statistical metrics and it generated an AUC value of 0.87, which is better than the existing methods in the literature. Results are further validated using the recent literature and many of the predicted lncRNA-disease associations are identified as actually existing. This paper also proposes an HIN-based methodology to associate lncRNAs with pathways in which they may have biological influence. A case study on the pathway associations of four well-known lncRNAs (HOTAIR, TUG1, NEAT1, and MALAT1) has been conducted. It has been observed that many times the same lncRNA is associated with more than one biologically related pathways. Further exploration is needed to substantiate whether such lncRNAs have any role in determining the pathway interplay. The script and sample data for the model construction is freely available at http://bdbl.nitc.ac.in/LncDisPath/index.html.",2019-08-01 +30652603,Optimizing Outcome Prediction in Diffuse Large B-Cell Lymphoma by Use of Machine Learning and Nationwide Lymphoma Registries: A Nordic Lymphoma Group Study.,"

Purpose

Prognostic models for diffuse large B-cell lymphoma (DLBCL), such as the International Prognostic Index (IPI) are widely used in clinical practice. The models are typically developed with simplicity in mind and thus do not exploit the full potential of detailed clinical data. This study investigated whether nationwide lymphoma registries containing clinical data and machine learning techniques could prove to be useful for building modern prognostic tools.

Patients and methods

This study was based on nationwide lymphoma registries from Denmark and Sweden, which include large amounts of clinicopathologic data. Using the Danish DLBCL cohort, a stacking approach was used to build a new prognostic model that leverages the strengths of different survival models. To compare the performance of the stacking approach with established prognostic models, cross-validation was used to estimate the concordance index (C-index), time-varying area under the curve, and integrated Brier score. Finally, the generalizability was tested by applying the new model to the Swedish cohort.

Results

In total, 2,759 and 2,414 patients were included from the Danish and Swedish cohorts, respectively. In the Danish cohort, the stacking approach led to the lowest integrated Brier score, indicating that the survival curves obtained from the stacking model fitted the observed survival the best. The C-index and time-varying area under the curve indicated that the stacked model (C-index: Denmark [DK], 0.756; Sweden [SE], 0.744) had good discriminative capabilities compared with the other considered prognostic models (IPI: DK, 0.662; SE, 0.661; and National Comprehensive Cancer Network-IPI: DK, 0.681; SE, 0.681). Furthermore, these results were reproducible in the independent Swedish cohort.

Conclusion

A new prognostic model based on machine learning techniques was developed and was shown to significantly outperform established prognostic indices for DLBCL. The model is available at https://lymphomapredictor.org .",2018-12-01 +30657726,Evidence for heterogeneous subsarcolemmal Na+ levels in rat ventricular myocytes.,"The intracellular Na+ concentration ([Na+]) regulates cardiac contractility. Previous studies have suggested that subsarcolemmal [Na+] is higher than cytosolic [Na+] in cardiac myocytes, but this concept remains controversial. Here, we used electrophysiological experiments and mathematical modeling to test whether there are subsarcolemmal pools with different [Na+] and dynamics compared with the bulk cytosol in rat ventricular myocytes. A Na+ dependency curve for Na+-K+-ATPase (NKA) current was recorded with symmetrical Na+ solutions, i.e., the same [Na+] in the superfusate and internal solution. This curve was used to estimate [Na+] sensed by NKA in other experiments. Three experimental observations suggested that [Na+] is higher near NKA than in the bulk cytosol: 1) when extracellular [Na+] was high, [Na+] sensed by NKA was ~6 mM higher than the internal solution in quiescent cells; 2) long trains of Na+ channel activation almost doubled this gradient; compared with an even intracellular distribution of Na+, the increase of [Na+] sensed by NKA was 10 times higher than expected, suggesting a local Na+ domain; and 3) accumulation of Na+ near NKA after trains of Na+ channel activation dissipated very slowly. Finally, mathematical models assuming heterogeneity of [Na+] between NKA and the Na+ channel better reproduced experimental data than the homogeneous model. In conclusion, our data suggest that NKA-sensed [Na+] is higher than [Na+] in the bulk cytosol and that there are differential Na+ pools in the subsarcolemmal space, which could be important for cardiac contractility and arrhythmogenesis. NEW & NOTEWORTHY Our data suggest that the Na+-K+-ATPase-sensed Na+ concentration is higher than the Na+ concentration in the bulk cytosol and that there are differential Na+ pools in the subsarcolemmal space, which could be important for cardiac contractility and arrhythmogenesis. Listen to this article's corresponding podcast at https://ajpheart.podbean.com/e/heterogeneous-sodium-in-ventricular-myocytes/ .",2019-01-18 +25432969,Triticeae resources in Ensembl Plants.,"Recent developments in DNA sequencing have enabled the large and complex genomes of many crop species to be determined for the first time, even those previously intractable due to their polyploid nature. Indeed, over the course of the last 2 years, the genome sequences of several commercially important cereals, notably barley and bread wheat, have become available, as well as those of related wild species. While still incomplete, comparison with other, more completely assembled species suggests that coverage of genic regions is likely to be high. Ensembl Plants (http://plants.ensembl.org) is an integrative resource organizing, analyzing and visualizing genome-scale information for important crop and model plants. Available data include reference genome sequence, variant loci, gene models and functional annotation. For variant loci, individual and population genotypes, linkage information and, where available, phenotypic information are shown. Comparative analyses are performed on DNA and protein sequence alignments. The resulting genome alignments and gene trees, representing the implied evolutionary history of the gene family, are made available for visualization and analysis. Driven by the case of bread wheat, specific extensions to the analysis pipelines and web interface have recently been developed to support polyploid genomes. Data in Ensembl Plants is accessible through a genome browser incorporating various specialist interfaces for different data types, and through a variety of additional methods for programmatic access and data mining. These interfaces are consistent with those offered through the Ensembl interface for the genomes of non-plant species, including those of plant pathogens, pests and pollinators, facilitating the study of the plant in its environment.",2014-11-27 +30746643,The Macroscope: A tool for examining the historical structure of language.,"The recent rise in digitized historical text has made it possible to quantitatively study our psychological past. This involves understanding changes in what words meant, how words were used, and how these changes may have responded to changes in the environment, such as in healthcare, wealth disparity, and war. Here we make available a tool, the Macroscope, for studying historical changes in language over the last two centuries. The Macroscope uses over 155 billion words of historical text, which will grow as we include new historical corpora, and derives word properties from frequency-of-usage and co-occurrence patterns over time. Using co-occurrence patterns, the Macroscope can track changes in semantics, allowing researchers to identify semantically stable and unstable words in historical text and providing quantitative information about changes in a word's valence, arousal, and concreteness, as well as information about new properties, such as semantic drift. The Macroscope provides information about both the local and global properties of words, as well as information about how these properties change over time, allowing researchers to visualize and download data in order to make inferences about historical psychology. Although quantitative historical psychology represents a largely new field of study, we see this work as complementing a wealth of other historical investigations, offering new insights and new approaches to understanding existing theory. The Macroscope is available online at http://www.macroscope.tech .",2019-08-01 +29329368,Discovering personalized driver mutation profiles of single samples in cancer by network control strategy.,"Motivation:It is a challenging task to discover personalized driver genes that provide crucial information on disease risk and drug sensitivity for individual patients. However, few methods have been proposed to identify the personalized-sample driver genes from the cancer omics data due to the lack of samples for each individual. To circumvent this problem, here we present a novel single-sample controller strategy (SCS) to identify personalized driver mutation profiles from network controllability perspective. Results:SCS integrates mutation data and expression data into a reference molecular network for each patient to obtain the driver mutation profiles in a personalized-sample manner. This is the first such a computational framework, to bridge the personalized driver mutation discovery problem and the structural network controllability problem. The key idea of SCS is to detect those mutated genes which can achieve the transition from the normal state to the disease state based on each individual omics data from network controllability perspective. We widely validate the driver mutation profiles of our SCS from three aspects: (i) the improved precision for the predicted driver genes in the population compared with other driver-focus methods; (ii) the effectiveness for discovering the personalized driver genes and (iii) the application to the risk assessment through the integration of the driver mutation signature and expression data, respectively, across the five distinct benchmarks from The Cancer Genome Atlas. In conclusion, our SCS makes efficient and robust personalized driver mutation profiles predictions, opening new avenues in personalized medicine and targeted cancer therapy. Availability and implementation:The MATLAB-package for our SCS is freely available from http://sysbio.sibcb.ac.cn/cb/chenlab/software.htm. Contact:zhangsw@nwpu.edu.cn or zengtao@sibs.ac.cn or lnchen@sibs.ac.cn. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +23203887,Zinc Finger Database (ZiFDB) v2.0: a comprehensive database of C₂H₂ zinc fingers and engineered zinc finger arrays.,"ZiFDB (Zinc Finger Database, http://zifdb.msi.umn.edu) is a web-accessible database that houses information on individual C(2)H(2) zinc fingers (ZFs) and engineered zinc finger arrays (ZFAs). ZiFDB serves as a resource for biologists interested in engineering ZFAs for use as sequence-specific DNA-binding reagents. Here, we describe four new features of ZiFDB: (i) the database allows users to input new ZFs and ZFAs; (ii) a shadow database temporarily stores user-submitted data, pending approval by the database curator and subsequent loading into the persistent database; (iii) ZiFDB contains 181 Context-Dependent Assembly (CoDA) ZFAs, which were generated by this newly described ZFA engineering platform; and (iv) the database also now contains 319 F1F2 CoDA units and 334 F2F3 CoDA units that can be used to construct CoDA arrays. In total, the new release of ZiFDB contains 1226 ZFs and 1123 ZFAs.",2012-11-29 +29949952,PartsGenie: an integrated tool for optimizing and sharing synthetic biology parts.,"

Motivation

Synthetic biology is typified by developing novel genetic constructs from the assembly of reusable synthetic DNA parts, which contain one or more features such as promoters, ribosome binding sites, coding sequences and terminators. PartsGenie is introduced to facilitate the computational design of such synthetic biology parts, bridging the gap between optimization tools for the design of novel parts, the representation of such parts in community-developed data standards such as Synthetic Biology Open Language, and their sharing in journal-recommended data repositories. Consisting of a drag-and-drop web interface, a number of DNA optimization algorithms, and an interface to the well-used data repository JBEI ICE, PartsGenie facilitates the design, optimization and dissemination of reusable synthetic biology parts through an integrated application.

Availability and implementation

PartsGenie is freely available at https://parts.synbiochem.co.uk.",2018-07-01 +30590445,Systematic selection of chemical fingerprint features improves the Gibbs energy prediction of biochemical reactions.,"

Motivation

Accurate and wide-ranging prediction of thermodynamic parameters for biochemical reactions can facilitate deeper insights into the workings and the design of metabolic systems.

Results

Here, we introduce a machine learning method with chemical fingerprint-based features for the prediction of the Gibbs free energy of biochemical reactions. From a large pool of 2D fingerprint-based features, this method systematically selects a small number of relevant ones and uses them to construct a regularized linear model. Since a manual selection of 2D structure-based features can be a tedious and time-consuming task, requiring expert knowledge about the structure-activity relationship of chemical compounds, the systematic feature selection step in our method offers a convenient means to identify relevant 2D fingerprint-based features. By comparing our method with state-of-the-art linear regression-based methods for the standard Gibbs free energy prediction, we demonstrated that its prediction accuracy and prediction coverage are most favorable. Our results show direct evidence that a number of 2D fingerprints collectively provide useful information about the Gibbs free energy of biochemical reactions and that our systematic feature selection procedure provides a convenient way to identify them.

Availability and implementation

Our software is freely available for download at http://sfb.kaust.edu.sa/Pages/Software.aspx.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +29228285,Matrix factorization-based data fusion for the prediction of lncRNA-disease associations.,"Motivation:Long non-coding RNAs (lncRNAs) play crucial roles in complex disease diagnosis, prognosis, prevention and treatment, but only a small portion of lncRNA-disease associations have been experimentally verified. Various computational models have been proposed to identify lncRNA-disease associations by integrating heterogeneous data sources. However, existing models generally ignore the intrinsic structure of data sources or treat them as equally relevant, while they may not be. Results:To accurately identify lncRNA-disease associations, we propose a Matrix Factorization based LncRNA-Disease Association prediction model (MFLDA in short). MFLDA decomposes data matrices of heterogeneous data sources into low-rank matrices via matrix tri-factorization to explore and exploit their intrinsic and shared structure. MFLDA can select and integrate the data sources by assigning different weights to them. An iterative solution is further introduced to simultaneously optimize the weights and low-rank matrices. Next, MFLDA uses the optimized low-rank matrices to reconstruct the lncRNA-disease association matrix and thus to identify potential associations. In 5-fold cross validation experiments to identify verified lncRNA-disease associations, MFLDA achieves an area under the receiver operating characteristic curve (AUC) of 0.7408, at least 3% higher than those given by state-of-the-art data fusion based computational models. An empirical study on identifying masked lncRNA-disease associations again shows that MFLDA can identify potential associations more accurately than competing models. A case study on identifying lncRNAs associated with breast, lung and stomach cancers show that 38 out of 45 (84%) associations predicted by MFLDA are supported by recent biomedical literature and further proves the capability of MFLDA in identifying novel lncRNA-disease associations. MFLDA is a general data fusion framework, and as such it can be adopted to predict associations between other biological entities. Availability and implementation:The source code for MFLDA is available at: http://mlda.swu.edu.cn/codes.php? name = MFLDA. Contact:gxyu@swu.edu.cn. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +25057899,"Design, synthesis and pharmacological evaluation of novel vanadium-containing complexes as antidiabetic agents.","Based on the data about structure and antidiabetic activity of twenty seven vanadium and zinc coordination complexes collected from literature we developed QSAR models using the GUSAR program. These QSAR models were applied to 10 novel vanadium coordination complexes designed in silico in order to predict their hypoglycemic action. The five most promising substances with predicted potent hypoglycemic action were selected for chemical synthesis and pharmacological evaluation. The selected coordination vanadium complexes were synthesized and tested in vitro and in vivo for their hypoglycemic activities and acute rat toxicity. Estimation of acute rat toxicity of these five vanadium complexes was performed using a freely available web-resource (http://way2drug.com/GUSAR/acutoxpredict.html). It has shown that the selected compounds belong to the class of moderate toxic pharmaceutical agents, according to the scale of Hodge and Sterner. Comparison with the predicted data has demonstrated a reasonable correspondence between the experimental and predicted values of hypoglycemic activity and toxicity. Bis{tert-butyl[amino(imino)methyl]carbamato}oxovanadium (IV) and sodium(2,2'-Bipyridyl)oxo-diperoxovanadate(V) octahydrate were identified as the most potent hypoglycemic agents among the synthesized compounds.",2014-07-24 +25707511,LncRNA2Function: a comprehensive resource for functional investigation of human lncRNAs based on RNA-seq data.,"

Background

The GENCODE project has collected over 10,000 human long non-coding RNA (lncRNA) genes. However, the vast majority of them remain to be functionally characterized. Computational investigation of potential functions of human lncRNA genes is helpful to guide further experimental studies on lncRNAs.

Results

In this study, based on expression correlation between lncRNAs and protein-coding genes across 19 human normal tissues, we used the hypergeometric test to functionally annotate a single lncRNA or a set of lncRNAs with significantly enriched functional terms among the protein-coding genes that are significantly co-expressed with the lncRNA(s). The functional terms include all nodes in the Gene Ontology (GO) and 4,380 human biological pathways collected from 12 pathway databases. We successfully mapped 9,625 human lncRNA genes to GO terms and biological pathways, and then developed the first ontology-driven user-friendly web interface named lncRNA2Function, which enables researchers to browse the lncRNAs associated with a specific functional term, the functional terms associated with a specific lncRNA, or to assign functional terms to a set of human lncRNA genes, such as a cluster of co-expressed lncRNAs. The lncRNA2Function is freely available at http://mlg.hit.edu.cn/lncrna2function.

Conclusions

The LncRNA2Function is an important resource for further investigating the functions of a single human lncRNA, or functionally annotating a set of human lncRNAs of interest.",2015-01-29 +29668996,BRepertoire: a user-friendly web server for analysing antibody repertoire data.,"Antibody repertoire analysis by high throughput sequencing is now widely used, but a persisting challenge is enabling immunologists to explore their data to discover discriminating repertoire features for their own particular investigations. Computational methods are necessary for large-scale evaluation of antibody properties. We have developed BRepertoire, a suite of user-friendly web-based software tools for large-scale statistical analyses of repertoire data. The software is able to use data preprocessed by IMGT, and performs statistical and comparative analyses with versatile plotting options. BRepertoire has been designed to operate in various modes, for example analysing sequence-specific V(D)J gene usage, discerning physico-chemical properties of the CDR regions and clustering of clonotypes. Those analyses are performed on the fly by a number of R packages and are deployed by a shiny web platform. The user can download the analysed data in different table formats and save the generated plots as image files ready for publication. We believe BRepertoire to be a versatile analytical tool that complements experimental studies of immune repertoires. To illustrate the server's functionality, we show use cases including differential gene usage in a vaccination dataset and analysis of CDR3H properties in old and young individuals. The server is accessible under http://mabra.biomed.kcl.ac.uk/BRepertoire.",2018-07-01 +27926382,Prediction of anti-cancer drug response by kernelized multi-task learning.,"

Motivation

Chemotherapy or targeted therapy are two of the main treatment options for many types of cancer. Due to the heterogeneous nature of cancer, the success of the therapeutic agents differs among patients. In this sense, determination of chemotherapeutic response of the malign cells is essential for establishing a personalized treatment protocol and designing new drugs. With the recent technological advances in producing large amounts of pharmacogenomic data, in silico methods have become important tools to achieve this aim.

Objective

Data produced by using cancer cell lines provide a test bed for machine learning algorithms that try to predict the response of cancer cells to different agents. The potential use of these algorithms in drug discovery/repositioning and personalized treatments motivated us in this study to work on predicting drug response by exploiting the recent pharmacogenomic databases. We aim to improve the prediction of drug response of cancer cell lines.

Methods

We propose to use a method that employs multi-task learning to improve learning by transfer, and kernels to extract non-linear relationships to predict drug response.

Results

The method outperforms three state-of-the-art algorithms on three anti-cancer drug screen datasets. We achieved a mean squared error of 3.305 and 0.501 on two different large scale screen data sets. On a recent challenge dataset, we obtained an error of 0.556. We report the methodological comparison results as well as the performance of the proposed algorithm on each single drug.

Conclusion

The results show that the proposed method is a strong candidate to predict drug response of cancer cell lines in silico for pre-clinical studies. The source code of the algorithm and data used can be obtained from http://mtan.etu.edu.tr/Supplementary/kMTrace/.",2016-10-03 +25300481,"SMART: recent updates, new developments and status in 2015.","SMART (Simple Modular Architecture Research Tool) is a web resource (http://smart.embl.de/) providing simple identification and extensive annotation of protein domains and the exploration of protein domain architectures. In the current version, SMART contains manually curated models for more than 1200 protein domains, with ∼ 200 new models since our last update article. The underlying protein databases were synchronized with UniProt, Ensembl and STRING, bringing the total number of annotated domains and other protein features above 100 million. SMART's 'Genomic' mode, which annotates proteins from completely sequenced genomes was greatly expanded and now includes 2031 species, compared to 1133 in the previous release. SMART analysis results pages have been completely redesigned and include links to several new information sources. A new, vector-based display engine has been developed for protein schematics in SMART, which can also be exported as high-resolution bitmap images for easy inclusion into other documents. Taxonomic tree displays in SMART have been significantly improved, and can be easily navigated using the integrated search engine.",2014-10-09 +26980513,Principles of metadata organization at the ENCODE data coordination center. ,"The Encyclopedia of DNA Elements (ENCODE) Data Coordinating Center (DCC) is responsible for organizing, describing and providing access to the diverse data generated by the ENCODE project. The description of these data, known as metadata, includes the biological sample used as input, the protocols and assays performed on these samples, the data files generated from the results and the computational methods used to analyze the data. Here, we outline the principles and philosophy used to define the ENCODE metadata in order to create a metadata standard that can be applied to diverse assays and multiple genomic projects. In addition, we present how the data are validated and used by the ENCODE DCC in creating the ENCODE Portal (https://www.encodeproject.org/). Database URL: www.encodeproject.org.",2016-03-15 +31193989,Data on MECOM rearrangement-driven chromosomal aberrations in myeloid malignancies.,"Data in this article presents the results of conventional cytogenetics and fluorescence in situ hybridization (FISH) analyses in 129 patients with confirmed MECOM rearrangement (https://doi.org/10.1016/j.cancergen.2019.03.002) [1]. Generally, the MECOM rearrangement has arisen through translocation, inversion, and insertion and/or unknown mechanism. In addition to the typical chromosomal aberrations, inv(3)(q21q26.2) and t(3; 3)(q21; q26.6) [2-4], over 50% of cases presented here exhibit a wide spectrum of MECOM rearrangement-driven, atypical chromosomal aberrations, including inv(3) with breakpoint other than 3q21; t(1; 3); t(2; 3); t(3; 6); t(3; 8); t(3; 12); t(3; 17); t(3; 21) as well as an insertion of 3q26.2 into different chromosomes. These cases are thoroughly characterized by karyotyping, interphase-, metaphase-, map-back FISH and whole chromosomal painting (WCP) analyses.",2019-05-23 +23203989,The BioGRID interaction database: 2013 update.,"The Biological General Repository for Interaction Datasets (BioGRID: http//thebiogrid.org) is an open access archive of genetic and protein interactions that are curated from the primary biomedical literature for all major model organism species. As of September 2012, BioGRID houses more than 500 000 manually annotated interactions from more than 30 model organisms. BioGRID maintains complete curation coverage of the literature for the budding yeast Saccharomyces cerevisiae, the fission yeast Schizosaccharomyces pombe and the model plant Arabidopsis thaliana. A number of themed curation projects in areas of biomedical importance are also supported. BioGRID has established collaborations and/or shares data records for the annotation of interactions and phenotypes with most major model organism databases, including Saccharomyces Genome Database, PomBase, WormBase, FlyBase and The Arabidopsis Information Resource. BioGRID also actively engages with the text-mining community to benchmark and deploy automated tools to expedite curation workflows. BioGRID data are freely accessible through both a user-defined interactive interface and in batch downloads in a wide variety of formats, including PSI-MI2.5 and tab-delimited files. BioGRID records can also be interrogated and analyzed with a series of new bioinformatics tools, which include a post-translational modification viewer, a graphical viewer, a REST service and a Cytoscape plugin.",2012-11-30 +30813912,An update to database TraVA: organ-specific cold stress response in Arabidopsis thaliana.,"

Background

Transcriptome map is a powerful tool for a variety of biological studies; transcriptome maps that include different organs, tissues, cells and stages of development are currently available for at least 30 plants. Some of them include samples treated by environmental or biotic stresses. However, most studies explore only limited set of organs and developmental stages (leaves or seedlings). In order to provide broader view of organ-specific strategies of cold stress response we studied expression changes that follow exposure to cold (+ 4 °C) in different aerial parts of plant: cotyledons, hypocotyl, leaves, young flowers, mature flowers and seeds using RNA-seq.

Results

The results on differential expression in leaves are congruent with current knowledge on stress response pathways, in particular, the role of CBF genes. In other organs, both essence and dynamics of gene expression changes are different. We show the involvement of genes that are confined to narrow expression patterns in non-stress conditions into stress response. In particular, the genes that control cell wall modification in pollen, are activated in leaves. In seeds, predominant pattern is the change of lipid metabolism.

Conclusions

Stress response is highly organ-specific; different pathways are involved in this process in each type of organs. The results were integrated with previously published transcriptome map of Arabidopsis thaliana and used for an update of a public database TraVa: http://travadb.org/browse/Species=AthStress .",2019-02-15 +28511181,1-CMDb: A Curated Database of Genomic Variations of the One-Carbon Metabolism Pathway.,"

Background

The one-carbon metabolism pathway is vital in maintaining tissue homeostasis by driving the critical reactions of folate and methionine cycles. A myriad of genetic and epigenetic events mark the rate of reactions in a tissue-specific manner. Integration of these to predict and provide personalized health management requires robust computational tools that can process multiomics data. The DNA sequences that may determine the chain of biological events and the endpoint reactions within one-carbon metabolism genes remain to be comprehensively recorded. Hence, we designed the one-carbon metabolism database (1-CMDb) as a platform to interrogate its association with a host of human disorders.

Methods

DNA sequence and network information of a total of 48 genes were extracted from a literature survey and KEGG pathway that are involved in the one-carbon folate-mediated pathway. The information generated, collected, and compiled for all these genes from the UCSC genome browser included the single nucleotide polymorphisms (SNPs), CpGs, copy number variations (CNVs), and miRNAs, and a comprehensive database was created. Furthermore, a significant correlation analysis was performed for SNPs in the pathway genes.

Results

Detailed data of SNPs, CNVs, CpG islands, and miRNAs for 48 folate pathway genes were compiled. The SNPs in CNVs (9670), CpGs (984), and miRNAs (14) were also compiled for all pathway genes. The SIFT score, the prediction and PolyPhen score, as well as the prediction for each of the SNPs were tabulated and represented for folate pathway genes. Also included in the database for folate pathway genes were the links to 124 various phenotypes and disease associations as reported in the literature and from publicly available information.

Conclusion

A comprehensive database was generated consisting of genomic elements within and among SNPs, CNVs, CpGs, and miRNAs of one-carbon metabolism pathways to facilitate (a) single source of information and (b) integration into large-genome scale network analysis to be developed in the future by the scientific community. The database can be accessed at http://slsdb.manipal.edu/ocm/.",2017-05-17 +30388198,Bastion3: a two-layer ensemble predictor of type III secreted effectors.,"

Motivation

Type III secreted effectors (T3SEs) can be injected into host cell cytoplasm via type III secretion systems (T3SSs) to modulate interactions between Gram-negative bacterial pathogens and their hosts. Due to their relevance in pathogen-host interactions, significant computational efforts have been put toward identification of T3SEs and these in turn have stimulated new T3SE discoveries. However, as T3SEs with new characteristics are discovered, these existing computational tools reveal important limitations: (i) most of the trained machine learning models are based on the N-terminus (or incorporating also the C-terminus) instead of the proteins' complete sequences, and (ii) the underlying models (trained with classic algorithms) employed only few features, most of which were extracted based on sequence-information alone. To achieve better T3SE prediction, we must identify more powerful, informative features and investigate how to effectively integrate these into a comprehensive model.

Results

In this work, we present Bastion3, a two-layer ensemble predictor developed to accurately identify type III secreted effectors from protein sequence data. In contrast with existing methods that employ single models with few features, Bastion3 explores a wide range of features, from various types, trains single models based on these features and finally integrates these models through ensemble learning. We trained the models using a new gradient boosting machine, LightGBM and further boosted the models' performances through a novel genetic algorithm (GA) based two-step parameter optimization strategy. Our benchmark test demonstrates that Bastion3 achieves a much better performance compared to commonly used methods, with an ACC value of 0.959, F-value of 0.958, MCC value of 0.917 and AUC value of 0.956, which comprehensively outperformed all other toolkits by more than 5.6% in ACC value, 5.7% in F-value, 12.4% in MCC value and 5.8% in AUC value. Based on our proposed two-layer ensemble model, we further developed a user-friendly online toolkit, maximizing convenience for experimental scientists toward T3SE prediction. With its design to ease future discoveries of novel T3SEs and improved performance, Bastion3 is poised to become a widely used, state-of-the-art toolkit for T3SE prediction.

Availability and implementation

http://bastion3.erc.monash.edu/.

Contact

selkrig@embl.de or wyztli@163.com or or trevor.lithgow@monash.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +30596133,Spatial distribution of marker gene activity in the mouse lung during alveolarization.,"This data is a curated collection of visual images of gene expression patterns from the pre- and post-natal mouse lung, accompanied by associated mRNA probe sequences and RNA-Seq expression profiles. Mammalian lungs undergo significant growth and cellular differentiation before and after the transition to breathing air. Documenting normal lung development is an important step in understanding abnormal lung development, as well as the challenges faced during a preterm birth. Images in this dataset indicate the spatial distribution of mRNA transcripts for over 500 different genes that are active during lung development, as initially determined via RNA-Seq. Images were systematically acquired using high-throughput in situ hybridization with non-radioactive digoxigenin-labeled mRNA probes across mouse lungs from developmental time points E16.5, E18.5, P7, and P28. The dataset was produced as part of The Molecular Atlas of Lung Development Program (LungMAP) and is hosted at https://lungmap.net. This manuscript describes the nature of the data and the protocols for generating the dataset.",2018-11-03 +30110138,"flowIO: Flow cytometry standard conformance testing, editing, and export tool.","The Flow Cytometry Standard (FCS) format is a widely accepted norm for storing Flow Cytometry (FCM) data. Its goal as a standard is to allow FCM data sharing and re-analysis. Over more than three decades of its existence FCS has evolved into a well-defined, flexible file format reflecting technical changes in the FCM field. Its flexibility as well as rising numbers of instrument vendors leads to suboptimal implementations of FCS in some cases. Such situations compromise the primary goal of the standard and hinder the ability to reproduce FCM analyses. It is further underlined by rapid rise of advanced FCM analyses, often carried out outside traditional software tools and heavily relying on standard data storage and presentation. We have developed flowIO, an R package which tests FCS file conformance with the standard as defined by International Society for Advancement of Cytometry (ISAC) normative. Along with the package we provide a web based application (also at http://bioinformin.cesnet.cz/flowIO/) allowing user friendly access to the conformance testing as well as FCS file editing and export for further analysis.",2018-08-15 +30773498,Parkinson's Disease Is Associated with Risk of Sexual Dysfunction in Men but Not in Women: A Systematic Review and Meta-Analysis.,"

Background

Mounting evidence has emerged suggesting that patients with Parkinson's disease (PD) are susceptible to sexual dysfunction (SD).

Aim

To better clarify the relationship between PD and SD.

Methods

PubMed, Embase, Cochrane Library database, and PsychINFO database were systematically searched for pertinent studies evaluating sexual function in the patients with PD. This systematic review and meta-analysis have been registered on PROSPERO (ID: CRD42018108714; http://www.crd.york.ac.uk/PROSPERO).

Outcomes

The association between PD and SD was assessed using relative risk (RR) with 95% CI. The quality of evidence was ranked by the GRADE profiler.

Results

11 observational studies met the predefined criteria for inclusion, enrolling 30,150 subjects from both the PD group and healthy control group (mean age 54.6-75.1 years). Synthesis results revealed that PD was associated with an elevated risk of SD in males (7 studies; 1.79; 95% CI = 1.26-2.54, P = .001; heterogeneity: I2 = 73.2%, P < .001). However, when restricted to female subjects, the combined RR from 3 eligible studies suggested a lack of significant association between PD and SD (RR = 1.3, 95% CI = 0.64-2.61, P = .469; heterogeneity: I2 = 80.0%, P = .007). The GRADE profiler indicated the overall quality of the evidence was low in studies including males and very low in studies including females.

Clinical implications

The current meta-analysis indicated that men with PD were more likely to experience SD than those without PD. In female subjects, however, PD seemed to not be associated with a high prevalence of SD compared with healthy controls. Based on these findings, patients with PD should be routinely assessed for sexual functioning, especially males.

Strengths & limitations

This is the first systematic review and meta-analysis of the association between PD and the risks of SD in both males and females. However, substantial heterogeneities were detected across the included studies.

Conclusion

A hazardous effect of PD for developing SD was detected in men but not in women. As a result, sexual function assessment and appropriate therapy are recommended for men with PD in clinical practice. Zhao S, Wang J, Xie Q, et al. Parkinson's Disease Is Associated with Risk of Sexual Dysfunction in Men but Not in Women: A Systematic Review and Meta-Analysis J Sex Med 2019;16:434-446.",2019-02-14 +30804701,"The PPEAO experimental fishing dataset: Fish from West African estuaries, lagoons and reservoirs.","

Background

This paper describes a dataset of fish, crustacean and mollusc occurrences extracted from the ""Experimental Fishing"" section of the IRD's PPEAO information system. PPEAO stands for ""Fish communities and artisanal fisheries of West African estuarine, lagoon and freshwater ecosystems"". This database contains information collected using two different methods: experimental fishing and surveys of the artisanal fisheries that exploit these ecosystems. The database is accessible at http://ppeao.ird.fr.

New information

The current dataset is available on GBIF.org at 10.15468/ra4voa. It comprises the occurrences of 314 fish, crustacean and mollusc taxa collected in experimental sampling surveys of different aquatic ecosystems in West Africa between 1979 and 2013. Different types of fishing gear were used including purse seines, gill nets and fyke nets. The taxa were identified by IRD scientists or by scientific partners well trained in systematics. Most taxa were identified at species level (97% of cases). This dataset is the result of 213 fishing surveys, 5,362 fishing hauls and 31,709 occurrences (28,428 of fish taxa and 3,281 of crustaceans and molluscs). The number of individuals per species and per haul is included and 80% of occurrences are geolocated.",2019-02-14 +30767086,Dimorphite-DL: an open-source program for enumerating the ionization states of drug-like small molecules.,"Small-molecule protonation can promote or discourage protein binding by altering hydrogen-bond, electrostatic, and van-der-Waals interactions. To improve virtual-screen pose and affinity predictions, researchers must account for all major small-molecule ionization states. But existing programs for calculating these states have notable limitations such as high cost, restrictive licenses, slow execution times, and poor modularity. Here, we present dimorphite-DL 1.0, a fast, accurate, accessible, and modular open-source program for enumerating small-molecule ionization states. Dimorphite-DL uses a straightforward empirical algorithm that leverages substructure searching and draws on a database of experimentally characterized ionizable molecules. We have tested dimorphite-DL using several versions of Python and RDKit on all major operating systems. We release it under the terms of the Apache License, Version 2.0. A copy is available free of charge from http://durrantlab.com/dimorphite-dl/ .",2019-02-14 +30764761,MultiDomainBenchmark: a multi-domain query and subject database suite.,"

Background

Genetic sequence database retrieval benchmarks play an essential role in evaluating the performance of sequence searching tools. To date, all phylogenetically diverse benchmarks known to the authors include only query sequences with single protein domains. Domains are the primary building blocks of protein structure and function. Independently, each domain can fulfill a single function, but most proteins (>80% in Metazoa) exist as multi-domain proteins. Multiple domain units combine in various arrangements or architectures to create different functions and are often under evolutionary pressures to yield new ones. Thus, it is crucial to create gold standards reflecting the multi-domain complexity of real proteins to more accurately evaluate sequence searching tools.

Description

This work introduces MultiDomainBenchmark (MDB), a database suite of 412 curated multi-domain queries and 227,512 target sequences, representing at least 5108 species and 1123 phylogenetically divergent protein families, their relevancy annotation, and domain location. Here, we use the benchmark to evaluate the performance of two commonly used sequence searching tools, BLAST/PSI-BLAST and HMMER. Additionally, we introduce a novel classification technique for multi-domain proteins to evaluate how well an algorithm recovers a domain architecture.

Conclusion

MDB is publicly available at http://csc.columbusstate.edu/carroll/MDB/ .",2019-02-14 +31143528,Towards a distributed connectionist account of cognates and interlingual homographs: evidence from semantic relatedness tasks.,"

Background

Current models of how bilinguals process cognates (e.g., ""wolf"", which has the same meaning in Dutch and English) and interlingual homographs (e.g., ""angel"", meaning ""insect's sting"" in Dutch) are based primarily on data from lexical decision tasks. A major drawback of such tasks is that it is difficult-if not impossible-to separate processes that occur during decision making (e.g., response competition) from processes that take place in the lexicon (e.g., lateral inhibition). Instead, we conducted two English semantic relatedness judgement experiments.

Methods

In Experiment 1, highly proficient Dutch-English bilinguals (N = 29) and English monolinguals (N = 30) judged the semantic relatedness of word pairs that included a cognate (e.g., ""wolf""-""howl""; n = 50), an interlingual homograph (e.g., ""angel""-""heaven""; n = 50) or an English control word (e.g., ""carrot""-""vegetable""; n = 50). In Experiment 2, another group of highly proficient Dutch-English bilinguals (N = 101) read sentences in Dutch that contained one of those cognates, interlingual homographs or the Dutch translation of one of the English control words (e.g., ""wortel"" for ""carrot"") approximately 15 minutes prior to completing the English semantic relatedness task.

Results

In Experiment 1, there was an interlingual homograph inhibition effect of 39 ms only for the bilinguals, but no evidence for a cognate facilitation effect. Experiment 2 replicated these findings and also revealed that cross-lingual long-term priming had an opposite effect on the cognates and interlingual homographs: recent experience with a cognate in Dutch speeded processing of those items 15 minutes later in English but slowed processing of interlingual homographs. However, these priming effects were smaller than previously observed using a lexical decision task.

Conclusion

After comparing our results to studies in both the bilingual and monolingual domain, we argue that bilinguals appear to process cognates and interlingual homographs as monolinguals process polysemes and homonyms, respectively. In the monolingual domain, processing of such words is best modelled using distributed connectionist frameworks. We conclude that it is necessary to explore the viability of such a model for the bilingual case.

Data scripts materials and pre-registrations

Experiment 1: http://www.osf.io/ndb7p; Experiment 2: http://www.osf.io/2at49.",2019-05-16 +31353949,Making Open Science Work for Science and Society.,"

Background

The open science movement is transforming scientific practice with the goal of enhancing the transparency, productivity, and reproducibility of research. Nevertheless, transparency is a complex concept, and efforts to promote some forms of transparency may do relatively little to advance other important forms of transparency.

Objectives

Drawing from the literature in history, philosophy, and sociology of science, we aim to distinguish between different forms of scientific transparency. Our goal is to identify strategies for achieving forms of transparency that are relevant not only to scientists but also to decision makers and members of the public.

Discussion

We draw a distinction between ""scientifically relevant transparency"" and ""socially relevant transparency."" Most of the prominent strategies associated with the open science movement (e.g., making data publicly available and registering studies) are designed primarily to promote scientifically relevant transparency. To achieve socially relevant transparency, which is particularly important in fields like environmental health, further steps are needed to provide scientific information in ways that are relevant to decision makers and members of the public.

Conclusions

Promoting socially relevant transparency will require a range of activities by many different individuals and institutions. We propose an array of strategies that can be pursued by scientists and other scholars, journals, universities, funders, government agencies, and members of the public. https://doi.org/10.1289/EHP4808.",2019-07-29 +31564248,Identifying Crohn's disease signal from variome analysis.,"

Background

After years of concentrated research efforts, the exact cause of Crohn's disease (CD) remains unknown. Its accurate diagnosis, however, helps in management and preventing the onset of disease. Genome-wide association studies have identified 241 CD loci, but these carry small log odds ratios and are thus diagnostically uninformative.

Methods

Here, we describe a machine learning method-AVA,Dx (Analysis of Variation for Association with Disease)-that uses exonic variants from whole exome or genome sequencing data to extract CD signal and predict CD status. Using the person-specific coding variation in genes from a panel of only 111 individuals, we built disease-prediction models informative of previously undiscovered disease genes. By additionally accounting for batch effects, we were able to accurately predict CD status for thousands of previously unseen individuals from other panels.

Results

AVA,Dx highlighted known CD genes including NOD2 and new potential CD genes. AVA,Dx identified 16% (at strict cutoff) of CD patients at 99% precision and 58% of the patients (at default cutoff) with 82% precision in over 3000 individuals from separately sequenced panels.

Conclusions

Larger training panels and additional features, including other types of genetic variants and environmental factors, e.g., human-associated microbiota, may improve model performance. However, the results presented here already position AVA,Dx as both an effective method for revealing pathogenesis pathways and as a CD risk analysis tool, which can improve clinical diagnostic time and accuracy. Links to the AVA,Dx Docker image and the BitBucket source code are at https://bromberglab.org/project/avadx/ .",2019-09-30 +27573070,gFinder: A Web-Based Bioinformatics Tool for the Analysis of N-Glycopeptides.,"Glycoproteins influence numerous indispensable biological functions, and changes in protein glycosylation have been observed in various diseases. The identification and characterization of glycoprotein and glycosylation sites by mass spectrometry (MS) remain challenging tasks, and great efforts have been devoted to the development of proteome informatics tools that facilitate the MS analysis of glycans and glycopeptides. Here we report on the development of gFinder, a web-based bioinformatics tool that analyzes mixtures of native N-glycopeptides that have been profiled by tandem MS. gFinder not only enables the simultaneous integration of collision-induced dissociation (CID) and high-energy collisional dissociation (HCD) fragmentation but also merges the spectra for high-throughput analysis. These merged spectra expedite the identification of both glycans and N-glycopeptide backbones in tandem MS data using the glycan database and a proteomic search tool (e.g., Mascot). These data can be used to simultaneously characterize peptide backbone sequences and possible N-glycan structures using assigned scores. gFinder also provides many convenient functions that make it easy to perform manual calculations while viewing the spectrum on-screen. We used gFinder to detect an additional protein (Q8N9B8) that was missed from the previously published data set containing N-linked glycosylation. For N-glycan analysis, we used the GlycomeDB glycan structure database, which integrates the structural and taxonomic data from all of the major carbohydrate databases available in the public domain. Thus, gFinder is a convenient, high-throughput analytical tool for interpreting the tandem mass spectra of N-glycopeptides, which can then be used for identification of potential missing proteins having glycans. gFinder is available publicly at http://gFinder.proteomix.org/ .",2016-09-14 +24444128,Database for exchangeable gene trap clones: pathway and gene ontology analysis of exchangeable gene trap clone mouse lines.,"Gene trapping in embryonic stem (ES) cells is a proven method for large-scale random insertional mutagenesis in the mouse genome. We have established an exchangeable gene trap system, in which a reporter gene can be exchanged for any other DNA of interest through Cre/mutant lox-mediated recombination. We isolated trap clones, analyzed trapped genes, and constructed the database for Exchangeable Gene Trap Clones (EGTC) [http://egtc.jp]. The number of registered ES cell lines was 1162 on 31 August 2013. We also established 454 mouse lines from trap ES clones and deposited them in the mouse embryo bank at the Center for Animal Resources and Development, Kumamoto University, Japan. The EGTC database is the most extensive academic resource for gene-trap mouse lines. Because we used a promoter-trap strategy, all trapped genes were expressed in ES cells. To understand the general characteristics of the trapped genes in the EGTC library, we used Kyoto Encyclopedia of Genes and Genomes (KEGG) for pathway analysis and found that the EGTC ES clones covered a broad range of pathways. We also used Gene Ontology (GO) classification data provided by Mouse Genome Informatics (MGI) to compare the functional distribution of genes in each GO term between trapped genes in the EGTC mouse lines and total genes annotated in MGI. We found the functional distributions for the trapped genes in the EGTC mouse lines and for the RefSeq genes for the whole mouse genome were similar, indicating that the EGTC mouse lines had trapped a wide range of mouse genes.",2014-01-20 +30509617,Automatic data analysis workflow for ultra-high performance liquid chromatography-high resolution mass spectrometry-based metabolomics.,"Data analysis for ultra-performance liquid chromatography high-resolution mass spectrometry-based metabolomics is a challenging task. The present work provides an automatic data analysis workflow (AntDAS2) by developing three novel algorithms, as follows: (i) a density-based ion clustering algorithm is designed for extracted-ion chromatogram extraction from high-resolution mass spectrometry; (ii) a new maximal value-based peak detection method is proposed with the aid of automatic baseline correction and instrumental noise estimation; and (iii) the strategy that clusters high-resolution m/z peaks to simultaneously align multiple components by a modified dynamic programing is designed to efficiently correct time-shift problem across samples. Standard compounds and complex datasets are used to study the performance of AntDAS2. AntDAS2 is better than several state-of-the-art methods, namely, XCMS Online, Mzmine2, and MS-DIAL, to identify underlying components and improve pattern recognition capability. Meanwhile, AntDAS2 is more efficient than XCMS Online and Mzmine2. A MATLAB GUI of AntDAS2 is designed for convenient analysis and is available at the following webpage: http://software.tobaccodb.org/software/antdas2.",2018-11-26 +30815000,Analysis of Predicted Host-Parasite Interactomes Reveals Commonalities and Specificities Related to Parasitic Lifestyle and Tissues Tropism.,"The study of molecular host-parasite interactions is essential to understand parasitic infection and adaptation within the host system. As well, prevention and treatment of infectious diseases require a clear understanding of the molecular crosstalk between parasites and their hosts. Yet, large-scale experimental identification of host-parasite molecular interactions remains challenging, and the use of computational predictions becomes then necessary. Here, we propose a computational integrative approach to predict host-parasite protein-protein interaction (PPI) networks resulting from the human infection by 15 different eukaryotic parasites. We used an orthology-based approach to transfer high-confidence intraspecies interactions obtained from the STRING database to the corresponding interspecies homolog protein pairs in the host-parasite system. Our approach uses either the parasites predicted secretome and membrane proteins, or only the secretome, depending on whether they are uni- or multi-cellular, respectively, to reduce the number of false predictions. Moreover, the host proteome is filtered for proteins expressed in selected cellular localizations and tissues supporting the parasite growth. We evaluated the inferred interactions by analyzing the enriched biological processes and pathways in the predicted networks and their association with known parasitic invasion and evasion mechanisms. The resulting PPI networks were compared across parasites to identify common mechanisms that may define a global pathogenic hallmark. We also provided a study case focusing on a closer examination of the human-S. mansoni predicted interactome, detecting central proteins that have relevant roles in the human-S. mansoni network, and identifying tissue-specific interactions with key roles in the life cycle of the parasite. The predicted PPI networks can be visualized and downloaded at http://orthohpi.jensenlab.org.",2019-02-13 +31726390,RNAm5CPred: Prediction of RNA 5-Methylcytosine Sites Based on Three Different Kinds of Nucleotide Composition.,"5-methylcytosine (m5C) is one of the most common and abundant post-transcriptional modifications (PTCMs) in RNA. Recent studies showed that m5C plays important roles in many biological functions such as RNA metabolism and cell fate decision. Because most experimental methods that determine m5C sites across the transcriptome are time-consuming and expensive, it is urgent to develop accurate computational methods to identify m5C sites effectively. A benchmark dataset is important for developing and evaluating computational methods. In this work, we constructed four different datasets according to the data redundancy and imbalance. Based on these datasets, we generated three different kinds of features, i.e., KNFs (K-nucleotide frequencies), KSNPFs (K-spaced nucleotide pair frequencies), and pseDNC (pseudo-dinucleotide composition), and then used a support vector machine (SVM) to build our models. Based on the imbalanced and nonredundant dataset, Met935, we extensively studied the three kinds of features and determined an optimal combination of the features. Based on the feature combination, we built models on the three different datasets and compared them with state-of-the-art models. According to the predictive results of the stringent jackknife test, the models based on the three features, 4NF, 1SNPF, and pseDNC, are superior or comparable to other methods. To determine the best model between the models based on the imbalanced dataset Met935 and the balanced dataset Met240, we further evaluated the two models on an independent test set Test1157. Our results demonstrate that the model based on the balanced dataset Met240 achieved the highest recall (68.79%) and the highest Matthews correlation coefficient (MCC) (0.154). In addition, the model is also superior to other state-of-the-art methods according to the integrated parameter MCC on the independent test set. Thus, we selected the model based on Met240 as our final model, which was named RNAm5CPred. In addition, a web server for RNAm5CPred (http://zhulab.ahu.edu.cn/RNAm5CPred/) has been provided to facilitate experimental research.",2019-10-18 +27293150,Integrated Genomic Analysis of Diverse Induced Pluripotent Stem Cells from the Progenitor Cell Biology Consortium.,"The rigorous characterization of distinct induced pluripotent stem cells (iPSC) derived from multiple reprogramming technologies, somatic sources, and donors is required to understand potential sources of variability and downstream potential. To achieve this goal, the Progenitor Cell Biology Consortium performed comprehensive experimental and genomic analyses of 58 iPSC from ten laboratories generated using a variety of reprogramming genes, vectors, and cells. Associated global molecular characterization studies identified functionally informative correlations in gene expression, DNA methylation, and/or copy-number variation among key developmental and oncogenic regulators as a result of donor, sex, line stability, reprogramming technology, and cell of origin. Furthermore, X-chromosome inactivation in PSC produced highly correlated differences in teratoma-lineage staining and regulator expression upon differentiation. All experimental results, and raw, processed, and metadata from these analyses, including powerful tools, are interactively accessible from a new online portal at https://www.synapse.org to serve as a reusable resource for the stem cell community.",2016-06-09 +27328409,"A database of marine phytoplankton abundance, biomass and species composition in Australian waters.","There have been many individual phytoplankton datasets collected across Australia since the mid 1900s, but most are unavailable to the research community. We have searched archives, contacted researchers, and scanned the primary and grey literature to collate 3,621,847 records of marine phytoplankton species from Australian waters from 1844 to the present. Many of these are small datasets collected for local questions, but combined they provide over 170 years of data on phytoplankton communities in Australian waters. Units and taxonomy have been standardised, obviously erroneous data removed, and all metadata included. We have lodged this dataset with the Australian Ocean Data Network (http://portal.aodn.org.au/) allowing public access. The Australian Phytoplankton Database will be invaluable for global change studies, as it allows analysis of ecological indicators of climate change and eutrophication (e.g., changes in distribution; diatom:dinoflagellate ratios). In addition, the standardised conversion of abundance records to biomass provides modellers with quantifiable data to initialise and validate ecosystem models of lower marine trophic levels.",2016-06-21 +30759968,CDRgator: An Integrative Navigator of Cancer Drug Resistance Gene Signatures.,"Understanding the mechanisms of cancer drug resistance is a critical challenge in cancer therapy. For many cancer drugs, various resistance mechanisms have been identified such as target alteration, alternative signaling pathways, epithelial-mesenchymal transition, and epigenetic modulation. Resistance may arise via multiple mechanisms even for a single drug, making it necessary to investigate multiple independent models for comprehensive understanding and therapeutic application. In particular, we hypothesize that different resistance processes result in distinct gene expression changes. Here, we present a web-based database, CDRgator (Cancer Drug Resistance navigator) for comparative analysis of gene expression signatures of cancer drug resistance. Resistance signatures were extracted from two different types of datasets. First, resistance signatures were extracted from transcriptomic profiles of cancer cells or patient samples and their resistance-induced counterparts for >30 cancer drugs. Second, drug resistance group signatures were also extracted from two large-scale drug sensitivity datasets representing ~1,000 cancer cell lines. All the datasets are available for download, and are conveniently accessible based on drug class and cancer type, along with analytic features such as clustering analysis, multidimensional scaling, and pathway analysis. CDRgator allows meta-analysis of independent resistance models for more comprehensive understanding of drug-resistance mechanisms that is difficult to accomplish with individual datasets alone (database URL: http://cdrgator.ewha.ac.kr).",2019-02-12 +30364564,Two-year field data on neonicotinoid concentrations in guttation drops of seed treated maize (Zea mays).,"We present neonicotinoid concentrations in guttation drops of commonly used maize (Zea mays) cultivars, germinated from seeds coated with active substances (a.s.): i) imidacloprid (IMD), ii) clothianidin (CTN) and iii) thiamethoxam (THM) over two growing seasons. In one variant clothianidin was applied as seed granule. The trial took place at the experimental fields of the Julius Kühn-Institut in Berlin in 2010 and 2011. Data from 2010 are related to a presentation of ""Pesticides in guttation droplets following seed treatment - field studies"" (Schenke et al., 2011) [1] presented at the SETAC North America conference and only some figures were used in the ""Scientific opinion on the science behind the development of a risk assessement of plant protection products on bees (Apis mellifera, Bombus spp. and solitary bees)"" (EFSA, 2012) [2]. Only parts of the data from 2011 was presented in relation to the ""Exposure of Coccinellidae to guttation droplets on maize seedlings with seed or granule treatment of neonicotinoids"" (Schenke and Heimbach, 2014) [3]. The article describes the study sites, the variants of treated maize seeds, sample collection and the analytical methods used to quantify the neonicotinoids and relevant metabolites of IMD (5-OH-IMD and IMD-olefine) and of THM (CTN) in guttation drop samples. The complete field data set is publicly available at the OpenAgrar repository under https://doi.org/10.5073/20180907-142020 (Schenke et al., 2018) [4].",2018-10-04 +30591009,Reconstructing high-resolution chromosome three-dimensional structures by Hi-C complex networks.,"

Background

Hi-C data have been widely used to reconstruct chromosomal three-dimensional (3D) structures. One of the key limitations of Hi-C is the unclear relationship between spatial distance and the number of Hi-C contacts. Many methods used a fixed parameter when converting the number of Hi-C contacts to wish distances. However, a single parameter cannot properly explain the relationship between wish distances and genomic distances or the  locations of topologically associating domains (TADs).

Results

We have addressed one of the key issues of using Hi-C data, that is, the unclear relationship between spatial distances and the number of Hi-C contacts, which is crucial to understand significant biological functions, such as the enhancer-promoter interactions. Specifically, we developed a new method to infer this converting parameter and pairwise Euclidean distances based on the topology of the Hi-C complex network (HiCNet). The inferred distances were modeled by clustering coefficient and multiple other types of constraints. We found that our inferred distances between bead-pairs within the same TAD were apparently smaller than those distances between bead-pairs from different TADs. Our inferred distances had a higher correlation with fluorescence in situ hybridization (FISH) data, fitted the localization patterns of Xist transcripts on DNA, and better matched 156 pairs of protein-enabled long-range chromatin interactions detected by ChIA-PET. Using the inferred distances and another round of optimization, we further reconstructed 40 kb high-resolution 3D chromosomal structures of mouse male ES cells. The high-resolution structures successfully illustrate TADs and DNA loops (peaks in Hi-C contact heatmaps) that usually indicate enhancer-promoter interactions.

Conclusions

We developed a novel method to infer the wish distances between DNA bead-pairs from Hi-C contacts. High-resolution 3D structures of chromosomes were built based on the newly-inferred wish distances. This whole process has been implemented as a tool named HiCNet, which is publicly available at http://dna.cs.miami.edu/HiCNet/ .",2018-12-28 +28053162,POSTAR: a platform for exploring post-transcriptional regulation coordinated by RNA-binding proteins.,"We present POSTAR (http://POSTAR.ncrnalab.org), a resource of POST-trAnscriptional Regulation coordinated by RNA-binding proteins (RBPs). Precise characterization of post-transcriptional regulatory maps has accelerated dramatically in the past few years. Based on new studies and resources, POSTAR supplies the largest collection of experimentally probed (∼23 million) and computationally predicted (approximately 117 million) RBP binding sites in the human and mouse transcriptomes. POSTAR annotates every transcript and its RBP binding sites using extensive information regarding various molecular regulatory events (e.g., splicing, editing, and modification), RNA secondary structures, disease-associated variants, and gene expression and function. Moreover, POSTAR provides a friendly, multi-mode, integrated search interface, which helps users to connect multiple RBP binding sites with post-transcriptional regulatory events, phenotypes, and diseases. Based on our platform, we were able to obtain novel insights into post-transcriptional regulation, such as the putative association between CPSF6 binding, RNA structural domains, and Li-Fraumeni syndrome SNPs. In summary, POSTAR represents an early effort to systematically annotate post-transcriptional regulatory maps and explore the putative roles of RBPs in human diseases.",2016-10-05 +27582018,ANGIOGENES: knowledge database for protein-coding and noncoding RNA genes in endothelial cells.,"Increasing evidence indicates the presence of long noncoding RNAs (lncRNAs) is specific to various cell types. Although lncRNAs are speculated to be more numerous than protein-coding genes, the annotations of lncRNAs remain primitive due to the lack of well-structured schemes for their identification and description. Here, we introduce a new knowledge database ""ANGIOGENES"" (http://angiogenes.uni-frankfurt.de) to allow for in silico screening of protein-coding genes and lncRNAs expressed in various types of endothelial cells, which are present in all tissues. Using the latest annotations of protein-coding genes and lncRNAs, publicly-available RNA-seq data was analyzed to identify transcripts that are expressed in endothelial cells of human, mouse and zebrafish. The analyzed data were incorporated into ANGIOGENES to provide a one-stop-shop for transcriptomics data to facilitate further biological validation. ANGIOGENES is an intuitive and easy-to-use database to allow in silico screening of expressed, enriched and/or specific endothelial transcripts under various conditions. We anticipate that ANGIOGENES serves as a starting point for functional studies to elucidate the roles of protein-coding genes and lncRNAs in angiogenesis.",2016-09-01 +28755519,Hierarchical role for transcription factors and chromatin structure in genome organization along adipogenesis.,"The three dimensional folding of mammalian genomes is cell type specific and difficult to alter suggesting that it is an important component of gene regulation. However, given the multitude of chromatin-associating factors, the mechanisms driving the colocalization of active chromosomal domains and the role of this organization in regulating the transcription program in adipocytes are not clear. Analysis of genome-wide chromosomal associations revealed cell type-specific spatial clustering of adipogenic genes in 3T3-L1 cells. Time course analysis demonstrated that the adipogenic 'hub', sampled by PPARγ and Lpin1, undergoes orchestrated reorganization during adipogenesis. Coupling the dynamics of genome architecture with multiple chromatin datasets indicated that among all the transcription factors (TFs) tested, RXR is central to genome reorganization at the beginning of adipogenesis. Interestingly, at the end of differentiation, the adipogenic hub was shifted to an H3K27me3-repressive environment in conjunction with attenuation of gene transcription. We propose a stage-specific hierarchy for the activity of TFs contributing to the establishment of an adipogenic genome architecture that brings together the adipogenic genetic program. In addition, the repositioning of this network in a H3K27me3-rich environment at the end of differentiation may contribute to the stabilization of gene transcription levels and reduce the developmental plasticity of these specialized cells.

Database

All sequence data reported in this paper have been deposited at GEO (http://www.ncbi.nlm.nih.gov/geo/) (GSE92475).",2017-08-16 +28846680,A curated catalog of canine and equine keratin genes.,"Keratins represent a large protein family with essential structural and functional roles in epithelial cells of skin, hair follicles, and other organs. During evolution the genes encoding keratins have undergone multiple rounds of duplication and humans have two clusters with a total of 55 functional keratin genes in their genomes. Due to the high similarity between different keratin paralogs and species-specific differences in gene content, the currently available keratin gene annotation in species with draft genome assemblies such as dog and horse is still imperfect. We compared the National Center for Biotechnology Information (NCBI) (dog annotation release 103, horse annotation release 101) and Ensembl (release 87) gene predictions for the canine and equine keratin gene clusters to RNA-seq data that were generated from adult skin of five dogs and two horses and from adult hair follicle tissue of one dog. Taking into consideration the knowledge on the conserved exon/intron structure of keratin genes, we annotated 61 putatively functional keratin genes in both the dog and horse, respectively. Subsequently, curators in the RefSeq group at NCBI reviewed their annotation of keratin genes in the dog and horse genomes (Annotation Release 104 and Annotation Release 102, respectively) and updated annotation and gene nomenclature of several keratin genes. The updates are now available in the NCBI Gene database (https://www.ncbi.nlm.nih.gov/gene).",2017-08-28 +29608179,"Integrating single-cell transcriptomic data across different conditions, technologies, and species.","Computational single-cell RNA-seq (scRNA-seq) methods have been successfully applied to experiments representing a single condition, technology, or species to discover and define cellular phenotypes. However, identifying subpopulations of cells that are present across multiple data sets remains challenging. Here, we introduce an analytical strategy for integrating scRNA-seq data sets based on common sources of variation, enabling the identification of shared populations across data sets and downstream comparative analysis. We apply this approach, implemented in our R toolkit Seurat (http://satijalab.org/seurat/), to align scRNA-seq data sets of peripheral blood mononuclear cells under resting and stimulated conditions, hematopoietic progenitors sequenced using two profiling technologies, and pancreatic cell 'atlases' generated from human and mouse islets. In each case, we learn distinct or transitional cell states jointly across data sets, while boosting statistical power through integrated analysis. Our approach facilitates general comparisons of scRNA-seq data sets, potentially deepening our understanding of how distinct cell states respond to perturbation, disease, and evolution.",2018-04-02 +30225310,Differential expression analysis of transcriptome data of Trypanosoma brucei RBP6 induction in procyclics leading to infectious metacyclics and bloodstream forms in vitro.,"We used an in vitro system based on the inducible expression of the RNA binding protein 6 (RBP6) to monitor transcriptome changes during the differentiation of Trypanosoma brucei from non-infectious procyclics to infectious metacyclics and from metacyclics to bloodstream forms. This data file describes the bioinformatics analysis of 20 distinct RNA-Seq samples, with four biological replicates each, highlighting differential transcript abundance. Additional functional annotation analysis using Gene Ontology is also presented. Complete raw data files were deposited at the NCBI Sequence Read Archive - SRA at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi with accession numbers: SRP153824, SRP153562, and SRP152737.",2018-09-01 +31001635,GalaxyRefine2: simultaneous refinement of inaccurate local regions and overall protein structure.,"The 3D structure of a protein can be predicted from its amino acid sequence with high accuracy for a large fraction of cases because of the availability of large quantities of experimental data and the advance of computational algorithms. Recently, deep learning methods exploiting the coevolution information obtained by comparing related protein sequences have been successfully used to generate highly accurate model structures even in the absence of template structure information. However, structures predicted based on either template structures or related sequences require further improvement in regions for which information is missing. Refining a predicted protein structure with insufficient information on certain regions is critical because these regions may be connected to functional specificity that is not conserved among related proteins. The GalaxyRefine2 web server, freely available via http://galaxy.seoklab.org/refine2, is an upgraded version of the GalaxyRefine protein structure refinement server and reflects recent developments successfully tested through CASP blind prediction experiments. This method adopts an iterative optimization approach involving various structure move sets to refine both local and global structures. The estimation of local error and hybridization of available homolog structures are also employed for effective conformation search.",2019-07-01 +30871603,RnBeads 2.0: comprehensive analysis of DNA methylation data.,"DNA methylation is a widely investigated epigenetic mark with important roles in development and disease. High-throughput assays enable genome-scale DNA methylation analysis in large numbers of samples. Here, we describe a new version of our RnBeads software - an R/Bioconductor package that implements start-to-finish analysis workflows for Infinium microarrays and various types of bisulfite sequencing. RnBeads 2.0 ( https://rnbeads.org/ ) provides additional data types and analysis methods, new functionality for interpreting DNA methylation differences, improved usability with a novel graphical user interface, and better use of computational resources. We demonstrate RnBeads 2.0 in four re-runnable use cases focusing on cell differentiation and cancer.",2019-03-14 +31146118,HIVCoR: A sequence-based tool for predicting HIV-1 CRF01_AE coreceptor usage.,"Determination of HIV-1 coreceptor usage is strongly recommended before starting the coreceptor-specific inhibitors for HIV treatment. Currently, the genotypic assays are the most interesting tools due to they are more feasible than phenotypic assays. However, most of prediction models were developed and validated by data set of HIV-1 subtype B and C. The present study aims to develop a powerful and reliable model to accurately predict HIV-1 coreceptor usage for CRF01_AE subtype called HIVCoR. HIVCoR utilized random forest and support vector machine as the prediction model, together with amino acid compositions, pseudo amino acid compositions and relative synonymous codon usage frequencies as the input feature. The overall success rate of 93.79% was achieved from the external validation test on the objective benchmark dataset. Comparison results indicated that HIVCoR was superior to other bioinformatics tools and genotypic predictors. For the convenience of experimental scientists, a user-friendly webserver has been established at http://codes.bio/hivcor/.",2019-05-20 +27504011,MODEM: multi-omics data envelopment and mining in maize. ,"MODEM is a comprehensive database of maize multidimensional omics data, including genomic, transcriptomic, metabolic and phenotypic information from the cellular to individual plant level. This initial release contains approximately 1.06 M high quality SNPs for 508 diverse inbred lines obtained by combining variations from RNA sequencing on whole kernels (15 days after pollination) of 368 lines and a 50 K array for all 508 individuals. As all of these data were derived from the same diverse panel of lines, the database also allows various types of genetic mapping (including characterization of phenotypic QTLs, pQTLs; expression QTLs, eQTLs and metabolic QTLs, mQTLs). MODEM is thus designed to promote a better understanding of maize genetic architecture and deep functional annotation of the complex maize genome (and potentially those of other crop plants) and to explore the genotype-phenotype relationships and regulation of maize kernel development at multiple scales, which is also comprehensive for developing novel methods. MODEM is additionally designed to link with other databases to make full use of current resources, and it provides visualization tools for easy browsing. All of the original data and the related mapping results are freely available for easy query and download. This platform also provides helpful tools for general analyses and will be continually updated with additional materials, features and public data related to maize genetics or regulation as they become available.Database URL: (http://modem.hzau.edu.cn).",2016-08-07 +31345217,Antenatal iron supplementation and birth weight in conditions of high exposure to infectious diseases.,"

Background

A recent cohort study among Papua New Guinean women surprisingly showed iron deficiency during pregnancy to be associated with increased birth weight. These findings seemingly contradict previous trial evidence that iron supplementation leads to increased birth weight, particularly in iron-deficient women, and hence require explanation.

Main text

We have re-analysed data from a previous trial in Kenya and demonstrated that, because women who were initially iron deficient respond better to iron supplementation, they show an increase in birthweight. There is evidence that this benefit is decreased in iron-replete women, possibly due to the adverse effects of haemoconcentration that can impair oxygen and nutrient transfer across the placenta. The Papua New Guinean results might be explained by a similar differential response to the iron supplements that they all received.

Conclusions

Antenatal iron supplementation should ideally be administered in conjunction with measures to prevent, diagnose and treat malaria given the propensity of pathogenic microorganisms to proliferate in iron-supplemented individuals. However, even where services to prevent and treat malaria are poor, current evidence supports the conclusion that the benefits of universal iron supplementation outweigh its risks. Please see related article: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-018-1146-z. Please see related article: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-019-1376-8.",2019-07-26 +27738135,IMG/M: integrated genome and metagenome comparative data analysis system.,"The Integrated Microbial Genomes with Microbiome Samples (IMG/M: https://img.jgi.doe.gov/m/) system contains annotated DNA and RNA sequence data of (i) archaeal, bacterial, eukaryotic and viral genomes from cultured organisms, (ii) single cell genomes (SCG) and genomes from metagenomes (GFM) from uncultured archaea, bacteria and viruses and (iii) metagenomes from environmental, host associated and engineered microbiome samples. Sequence data are generated by DOE's Joint Genome Institute (JGI), submitted by individual scientists, or collected from public sequence data archives. Structural and functional annotation is carried out by JGI's genome and metagenome annotation pipelines. A variety of analytical and visualization tools provide support for examining and comparing IMG/M's datasets. IMG/M allows open access interactive analysis of publicly available datasets, while manual curation, submission and access to private datasets and computationally intensive workspace-based analysis require login/password access to its expert review (ER) companion system (IMG/M ER: https://img.jgi.doe.gov/mer/). Since the last report published in the 2014 NAR Database Issue, IMG/M's dataset content has tripled in terms of number of datasets and overall protein coding genes, while its analysis tools have been extended to cope with the rapid growth in the number and size of datasets handled by the system.",2016-10-13 +31388939,"In-Hospital Healthcare Utilization, Outcomes, and Costs in Pre-Hospital-Adjudicated Low-Risk Chest-Pain Patients.","

Background

There is increasing evidence that in patients presenting with acute chest pain, pre-hospital triage can accurately identify low-risk patients. It is, however, still unclear which diagnostics are performed in pre-hospital-adjudicated low-risk patients and what the contribution is of those diagnostic results in the healthcare process.

Objectives

The aim of this study was to quantify healthcare utilization, costs, and outcomes in pre-hospital-adjudicated low-risk chest-pain patients, and to extrapolate to total costs in the Netherlands.

Methods

This was a prospective cohort study including 700 patients with suspected non-ST-elevation acute coronary syndrome in which pre-hospital risk stratification using the HEART score was performed by paramedics. Low risk was defined as a pre-hospital HEART score ≤ 3. Data on (results of) hospital diagnostics, costs, and discharge diagnosis were collected.

Results

A total of 172 (25%) patients were considered as low risk. Of these low-risk patients, the mean age was 54 years, 52% were male, and 84% of patients were discharged within 12 h. Repeated electrocardiography and routine laboratory measurements, including cardiac markers, were performed in all patients. Chest X-ray was performed in 61% and echocardiography in 11% of patients. After additional diagnostics, two patients (1.2%) were diagnosed as non-ST-elevation myocardial infarction and two patients (1.2%) as unstable angina. Other diagnoses were atrial fibrillation (n = 1) and acute pancreatitis/cholecystitis (n = 2); all other patients had non-specific/non-acute discharge diagnoses. Mean in-hospital costs per patient were €1580. The estimated yearly acute healthcare cost in low-risk chest-pain patients in the Netherlands is €30,438,700.

Conclusion

In low-risk chest-pain patients according to pre-hospital risk assessment, acute healthcare utilization and costs are high, with limited added value. Possibly, if a complete risk assessment can be performed by ambulance paramedics, acute hospitalization of the majority of low-risk patients is not necessary, which can lead to substantial cost reduction.

Trial id

Dutch Trial Register [http://www.trialregister.nl]: trial number 4205.",2019-12-01 +31106342,MutationDistiller: user-driven identification of pathogenic DNA variants.,"MutationDistiller is a freely available online tool for user-driven analyses of Whole Exome Sequencing data. It offers a user-friendly interface aimed at clinicians and researchers, who are not necessarily bioinformaticians. MutationDistiller combines MutationTaster's pathogenicity predictions with a phenotype-based approach. Phenotypic information is not limited to symptoms included in the Human Phenotype Ontology (HPO), but may also comprise clinical diagnoses and the suspected mode of inheritance. The search can be restricted to lists of candidate genes (e.g. virtual gene panels) and by tissue-specific gene expression. The inclusion of GeneOntology (GO) and metabolic pathways facilitates the discovery of hitherto unknown disease genes. In a novel approach, we trained MutationDistiller's HPO-based prioritization on authentic genotype-phenotype sets obtained from ClinVar and found it to match or outcompete current prioritization tools in terms of accuracy. In the output, the program provides a list of potential disease mutations ordered by the likelihood of the affected genes to cause the phenotype. MutationDistiller provides links to gene-related information from various resources. It has been extensively tested by clinicians and their suggestions have been valued in many iterative cycles of revisions. The tool, a comprehensive documentation and examples are freely available at https://www.mutationdistiller.org/.",2019-07-01 +,Web Application to Access and Visualize US Forest Inventory and Analysis Program Down Woody Materials Data,"Accurate assessments of down woody materials (DWM)-soil duff and litter, dead and downed fine and coarse woody detritus, and live and dead understory vegetation-are important because of their roles in forest floor carbon storage, organism habitat, nutrient cycling, and fuel loading. The USDA Forest Service's Forest Inventory and Analysis Program (FIA) subsamples DWM in its extensive annual survey of US forestland and reports raw data measurements in its database. We combined FIA plot measurements and auxiliary specific gravity and other data to estimate the DWM component mass on FIA plots. The resulting database, designed for use with key stand structure variables and geographic subsets (state and/or ecoregion), can be downloaded at http://web.gis.vt.edu/forestry/dwm. A companion interactive map allows viewing of the DWM data set via a color-coded display of user-selected DWM component mass, thus allowing users to quickly access and choose DWM data for downloading. Research scientists, managers, and policymakers can use a valuable national-scale DWM baseline data set via this online tool.",2013-03-01 +22110025,"The DNA Data Bank of Japan launches a new resource, the DDBJ Omics Archive of functional genomics experiments.","The DNA Data Bank of Japan (DDBJ; http://www.ddbj.nig.ac.jp) maintains and provides archival, retrieval and analytical resources for biological information. The central DDBJ resource consists of public, open-access nucleotide sequence databases including raw sequence reads, assembly information and functional annotation. Database content is exchanged with EBI and NCBI within the framework of the International Nucleotide Sequence Database Collaboration (INSDC). In 2011, DDBJ launched two new resources: the 'DDBJ Omics Archive' (DOR; http://trace.ddbj.nig.ac.jp/dor) and BioProject (http://trace.ddbj.nig.ac.jp/bioproject). DOR is an archival database of functional genomics data generated by microarray and highly parallel new generation sequencers. Data are exchanged between the ArrayExpress at EBI and DOR in the common MAGE-TAB format. BioProject provides an organizational framework to access metadata about research projects and the data from the projects that are deposited into different databases. In this article, we describe major changes and improvements introduced to the DDBJ services, and the launch of two new resources: DOR and BioProject.",2011-11-22 +31103852,Early Cancer Detection from Multianalyte Blood Test Results.,"The early detection of cancers has the potential to save many lives. A recent attempt has been demonstrated successful. However, we note several critical limitations. Given the central importance and broad impact of early cancer detection, we aspire to address those limitations. We explore different supervised learning approaches for multiple cancer type detection and observe significant improvements; for instance, one of our approaches (i.e., CancerA1DE) can double the existing sensitivity from 38% to 77% for the earliest cancer detection (i.e., Stage I) at the 99% specificity level. For Stage II, it can even reach up to about 90% across multiple cancer types. In addition, CancerA1DE can also double the existing sensitivity from 30% to 70% for detecting breast cancers at the 99% specificity level. Data and model analysis are conducted to reveal the underlying reasons. A website is built at http://cancer.cs.cityu.edu.hk/.",2019-05-04 +31263275,CB-Dock: a web server for cavity detection-guided protein-ligand blind docking.,"As the number of elucidated protein structures is rapidly increasing, the growing data call for methods to efficiently exploit the structural information for biological and pharmaceutical purposes. Given the three-dimensional (3D) structure of a protein and a ligand, predicting their binding sites and affinity are a key task for computer-aided drug discovery. To address this task, a variety of docking tools have been developed. Most of them focus on docking in the preset binding sites given by users. To automatically predict binding modes without information about binding sites, we developed a user-friendly blind docking web server, named CB-Dock, which predicts binding sites of a given protein and calculates the centers and sizes with a novel curvature-based cavity detection approach, and performs docking with a popular docking program, Autodock Vina. This method was carefully optimized and achieved ~70% success rate for the top-ranking poses whose root mean square deviation (RMSD) were within 2 Å from the X-ray pose, which outperformed the state-of-the-art blind docking tools in our benchmark tests. CB-Dock offers an interactive 3D visualization of results, and is freely available at http://cao.labshare.cn/cb-dock/.",2019-07-01 +31106382,RegulationSpotter: annotation and interpretation of extratranscriptic DNA variants.,"RegulationSpotter is a web-based tool for the user-friendly annotation and interpretation of DNA variants located outside of protein-coding transcripts (extratranscriptic variants). It is designed for clinicians and researchers who wish to assess the potential impact of the considerable number of non-coding variants found in Whole Genome Sequencing runs. It annotates individual variants with underlying regulatory features in an intuitive way by assessing over 100 genome-wide annotations. Additionally, it calculates a score, which reflects the regulatory potential of the variant region. Its dichotomous classifications, 'functional' or 'non-functional', and a human-readable presentation of the underlying evidence allow a biologically meaningful interpretation of the score. The output shows key aspects of every variant and allows rapid access to more detailed information about its possible role in gene regulation. RegulationSpotter can either analyse single variants or complete VCF files. Variants located within protein-coding transcripts are automatically assessed by MutationTaster as well as by RegulationSpotter to account for possible intragenic regulatory effects. RegulationSpotter offers the possibility of using phenotypic data to focus on known disease genes or genomic elements interacting with them. RegulationSpotter is freely available at https://www.regulationspotter.org.",2019-07-01 +28702715,"[The EU Portal: Implementation, importance, and features].","The European Medicines Agency (EMA) is developing a web-based EU portal with a database ""at Union level as a single entry point for the submission of data and information relating to clinical trials in accordance with"" the new EU regulation No. 536/2014. The specifications are mostly published, but some documents are still missing. Because the project is integrated and has dependencies on other projects, this could result in other specification upgrades. The IT solution is under ongoing development until project completion in quarter III of 2019. The EU Portal and the database will be audited. If the audit is successful, the new regulation will come into force in October 2018. The use of the EU Portal will then be mandatory with some transition rules. The software development of the portal is restricted to the regulation and the derived requirements. It is not possible to implement any national requirements. We describe in this paper the current key functionalities of the portal and try to derive requirements for a national IT system.On 16.06.2017 the EMA Management Board announced that the development of the new portal has been delayed and it is foreseen that the new regulation can come into effect in 2019 at the earliest. The press release can be found here: http://www.ema.europa.eu/ema/index.jsp?curl=pages/news_and_events/news/2017/06/news_detail_002764.jsp%26mid=WC0b01ac058004d5c1 (accessed: 12.07.2017).",2017-08-01 +31734332,Risk factors for wheezing in primary health care settings in the tropics.,"

Background

The International Study of Wheezing in Infants (EISL) is a cross-sectional, population-based study, based on ISAAC (http://www.isaac.auckland.ac.nz). It uses a validated questionnaire on early wheezing and risk/protective factors.

Objective

To apply the EISL questionnaire regarding wheezing events in 0- to 12-month-old infants with or without atopic background searching for risk factors in the tropics.

Methods

The population was toddlers coming in for a checkup or 12-months' vaccination in primary health care clinics of a tropical city. Apart from child factors (eg, daycare attendance), we evaluated home factors (eg, air conditioning, bathroom, carpet, >6 persons, pollution) and mothers' factors (eg, education level, employment, cellphone). Data analysis was descriptive and case-control, with as cases atopic (AW) or non-atopic (NAW) wheezing children vs healthy controls. Wheezing-associated factors were evaluated using multivariate analysis, adjusted for the relation of AW/NAW with factors that were significant in prior univariate analysis.

Results

The study included 999 toddlers. Any wheeze: 31.3%, recurrent wheeze (≥3 episodes): 12.1%. Major risk factors for AW (OR; 95%CI) included smoking (11.39; 2.36-54.99), common cold before 3 months of life (3.72; 2.59-5.36), mold (3.48; 2.28-5.30), kitchen indoors (2.40; 1.27-4.54), and pets (1.69; 1.09-2.62); breastfeeding was almost protective. For NAW, common cold and pets were risk factors, but cesarean section (0.44; 0.23-0.82), more than 1 sibling (0.33; 0.18-0.61), and breastfeeding for longer than 3 months (0.50; 0.28-0.91) were protective.

Conclusion

Wheezing is a health care burden. We found potential new risk factors for AW, some possibly unique for tropical climates. We suggest testing several hypotheses: could early AW be reduced in the tropics by attacking mold growth? Enhancing cooking place ventilation? Keeping pets outside? Or by postponing daycare attendance until after 4 months of age and avoiding (passive) smoking during pregnancy?",2019-11-15 +23193290,DbPTM 3.0: an informative resource for investigating substrate site specificity and functional association of protein post-translational modifications.,"Protein modification is an extremely important post-translational regulation that adjusts the physical and chemical properties, conformation, stability and activity of a protein; thus altering protein function. Due to the high throughput of mass spectrometry (MS)-based methods in identifying site-specific post-translational modifications (PTMs), dbPTM (http://dbPTM.mbc.nctu.edu.tw/) is updated to integrate experimental PTMs obtained from public resources as well as manually curated MS/MS peptides associated with PTMs from research articles. Version 3.0 of dbPTM aims to be an informative resource for investigating the substrate specificity of PTM sites and functional association of PTMs between substrates and their interacting proteins. In order to investigate the substrate specificity for modification sites, a newly developed statistical method has been applied to identify the significant substrate motifs for each type of PTMs containing sufficient experimental data. According to the data statistics in dbPTM, >60% of PTM sites are located in the functional domains of proteins. It is known that most PTMs can create binding sites for specific protein-interaction domains that work together for cellular function. Thus, this update integrates protein-protein interaction and domain-domain interaction to determine the functional association of PTM sites located in protein-interacting domains. Additionally, the information of structural topologies on transmembrane (TM) proteins is integrated in dbPTM in order to delineate the structural correlation between the reported PTM sites and TM topologies. To facilitate the investigation of PTMs on TM proteins, the PTM substrate sites and the structural topology are graphically represented. Also, literature information related to PTMs, orthologous conservations and substrate motifs of PTMs are also provided in the resource. Finally, this version features an improved web interface to facilitate convenient access to the resource.",2012-11-27 +28012137,VaProS: a database-integration approach for protein/genome information retrieval.,"Life science research now heavily relies on all sorts of databases for genome sequences, transcription, protein three-dimensional (3D) structures, protein-protein interactions, phenotypes and so forth. The knowledge accumulated by all the omics research is so vast that a computer-aided search of data is now a prerequisite for starting a new study. In addition, a combinatory search throughout these databases has a chance to extract new ideas and new hypotheses that can be examined by wet-lab experiments. By virtually integrating the related databases on the Internet, we have built a new web application that facilitates life science researchers for retrieving experts' knowledge stored in the databases and for building a new hypothesis of the research target. This web application, named VaProS, puts stress on the interconnection between the functional information of genome sequences and protein 3D structures, such as structural effect of the gene mutation. In this manuscript, we present the notion of VaProS, the databases and tools that can be accessed without any knowledge of database locations and data formats, and the power of search exemplified in quest of the molecular mechanisms of lysosomal storage disease. VaProS can be freely accessed at http://p4d-info.nig.ac.jp/vapros/ .",2016-12-23 +,Phylogeny and historical biogeography of leafhopper subfamily Iassinae (Hemiptera: Cicadellidae) with a revised tribal classification based on morphological and molecular data,"Phylogenetic relationships among major lineages of the leafhopper subfamily Iassinae were explored by analysing a dataset of 91 discrete morphological characters and DNA sequence data from nuclear 28S rDNA and histone H3 genes and mitochondrial 12S rDNA. Bayesian, maximum‐likelihood and maximum parsimony analyses yielded similar tree topologies that were well resolved with strong branch support except at the base of the tree, resulting in equivocal support for inclusion of Bythoniini as a tribe of Iassinae but strong support for the monophyly of Iassinae (excluding Bythoniini) and most previously recognized iassine tribes. Divergence times for recovered nodes were estimated using a Bayesian relaxed clock method with two fossil calibration points. The results suggest that the deepest divergences coincided with Gondwanan vicariant events but that more recent divergences resulted from long‐range dispersal and colonization. Biogeographical analyses suggest that the group most likely has a Neotropical origin. The following changes to the taxonomic classification are proposed: establishment of three new tribes, Batracomorphini trib.n. (based on type genus Batracomorphus Lewis), Hoplojassini trib.n. (based on type genus Hoplojassus Dietrich and including one other South American genus), Lipokrisnini trib.n. (based on type genus Lipokrisna Freytag and including two other endemic Caribbean genera); Krisnini is redefined to include only the Old World genera Krisna and Gessius; Iassini is redefined to include only the type genus and four endemic Afrotropical genera; Bascarrhinus Fowler and Platyhynna Berg, recently treated as genera incertae sedis, are placed in Hyalojassini; Thalattoscopus Kirkaldy is added to the previously monobasic tribe Trocnadini. Iassinae now includes 12 tribes, all of which appear to be monophyletic. Revised morphological diagnoses of the subfamily and each of the included tribes are provided and a key to tribes is also given. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:41295B68‐2DAB‐4C4F‐B260‐F7C054922173.",2016-07-01 +28577791,PlanTEnrichment: A tool for enrichment analysis of transposable elements in plants.,"Transposable elements (TEs) are mobile DNA sequences that play roles in gene regulation, and have a potential to influence the expression of nearby genes by functioning as cis-regulatory sequences. However, bioinformatics tools facilitating analysis of the associations between TEs and nearby genes in plants are still lacking. We therefore reanalyzed the comprehensive annotation data of gene models and TEs of 11 plant species available in Ensembl Plants database, and built an up-to-date, unique tool called PlanTEnrichment, enabling enrichment analysis of TEs located within the upstream regions of a given gene list. PlanTEnrichment takes, for example, a group of differentially expressed genes under a particular biological condition as input and returns the list of TEs associated with those genes, along with their calculated enrichment scores and statistical significances. PlanTEnrichment is freely available at http://tools.ibg.deu.edu.tr/plantenrichment/ and is likely to substantially enhance our understanding of the role of TEs in diverse biological processes.",2017-06-01 +31028392,Referee: Reference Assembly Quality Scores.,"Genome assemblies from next-generation sequencing technologies are now an integral part of biological research, but many sequencing and assembly processes are still error-prone. Unfortunately, these errors can propagate to downstream analyses and wreak havoc on results and conclusions. Although such errors are recognized when dealing with diploid genotype data, modern reference assemblies (which are represented as haploid sequences) lack any type of succinct quality assessment for every position. Here we present Referee, a program that uses diploid genotype quality information in order to annotate a haploid assembly with a quality score for every position. Referee aims to provide an assembly with concise quality information on a Phred-like scale in FASTQ format for easy filtering of low-quality sites. Referee also provides output of quality scores in BED format that can be easily visualized as tracks on most genome browsers. Referee is freely available at https://gwct.github.io/referee/.",2019-05-01 +31018934,Proteomic Analysis of Urinary Microvesicles and Exosomes in Medullary Sponge Kidney Disease and Autosomal Dominant Polycystic Kidney Disease.,"

Background and objectives

Microvesicles and exosomes are involved in the pathogenesis of autosomal dominant polycystic kidney disease. However, it is unclear whether they also contribute to medullary sponge kidney, a sporadic kidney malformation featuring cysts, nephrocalcinosis, and recurrent kidney stones. We addressed this knowledge gap by comparative proteomic analysis.

Design, setting, participants, & measurements

The protein content of microvesicles and exosomes isolated from the urine of 15 patients with medullary sponge kidney and 15 patients with autosomal dominant polycystic kidney disease was determined by mass spectrometry followed by weighted gene coexpression network analysis, support vector machine learning, and partial least squares discriminant analysis to compare the profiles and select the most discriminative proteins. The proteomic data were verified by ELISA.

Results

A total of 2950 proteins were isolated from microvesicles and exosomes, including 1579 (54%) identified in all samples but only 178 (6%) and 88 (3%) specific for medullary sponge kidney microvesicles and exosomes, and 183 (6%) and 98 (3%) specific for autosomal dominant polycystic kidney disease microvesicles and exosomes, respectively. The weighted gene coexpression network analysis revealed ten modules comprising proteins with similar expression profiles. Support vector machine learning and partial least squares discriminant analysis identified 34 proteins that were highly discriminative between the diseases. Among these, CD133 was upregulated in exosomes from autosomal dominant polycystic kidney disease and validated by ELISA.

Conclusions

Our data indicate a different proteomic profile of urinary microvesicles and exosomes in patients with medullary sponge kidney compared with patients with autosomal dominant polycystic kidney disease. The urine proteomic profile of patients with autosomal dominant polycystic kidney disease was enriched of proteins involved in cell proliferation and matrix remodeling. Instead, proteins identified in patients with medullary sponge kidney were associated with parenchymal calcium deposition/nephrolithiasis and systemic metabolic derangements associated with stones formation and bone mineralization defects.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2019_04_24_CJASNPodcast_19_06_.mp3.",2019-04-24 +26216192,Functional Networks of Highest-Connected Splice Isoforms: From The Chromosome 17 Human Proteome Project.,"Alternative splicing allows a single gene to produce multiple transcript-level splice isoforms from which the translated proteins may show differences in their expression and function. Identifying the major functional or canonical isoform is important for understanding gene and protein functions. Identification and characterization of splice isoforms is a stated goal of the HUPO Human Proteome Project and of neXtProt. Multiple efforts have catalogued splice isoforms as ""dominant"", ""principal"", or ""major"" isoforms based on expression or evolutionary traits. In contrast, we recently proposed highest connected isoforms (HCIs) as a new class of canonical isoforms that have the strongest interactions in a functional network and revealed their significantly higher (differential) transcript-level expression compared to nonhighest connected isoforms (NCIs) regardless of tissues/cell lines in the mouse. HCIs and their expression behavior in the human remain unexplored. Here we identified HCIs for 6157 multi-isoform genes using a human isoform network that we constructed by integrating a large compendium of heterogeneous genomic data. We present examples for pairs of transcript isoforms of ABCC3, RBM34, ERBB2, and ANXA7. We found that functional networks of isoforms of the same gene can show large differences. Interestingly, differential expression between HCIs and NCIs was also observed in the human on an independent set of 940 RNA-seq samples across multiple tissues, including heart, kidney, and liver. Using proteomic data from normal human retina and placenta, we showed that HCIs are a promising indicator of expressed protein isoforms exemplified by NUDFB6 and M6PR. Furthermore, we found that a significant percentage (20%, p = 0.0003) of human and mouse HCIs are homologues, suggesting their conservation between species. Our identified HCIs expand the repertoire of canonical isoforms and are expected to facilitate studying main protein products, understanding gene regulation, and possibly evolution. The network is available through our web server as a rich resource for investigating isoform functional relationships (http://guanlab.ccmb.med.umich.edu/hisonet). All MS/MS data were available at ProteomeXchange Web site (http://www.proteomexchange.org) through their identifiers (retina: PXD001242, placenta: PXD000754).",2015-08-11 +31728565,Plasma ascorbic acid and the risk of islet autoimmunity and type 1 diabetes: the TEDDY study.,"

Aims/hypothesis

We studied the association of plasma ascorbic acid with the risk of developing islet autoimmunity and type 1 diabetes and examined whether SNPs in vitamin C transport genes modify these associations. Furthermore, we aimed to determine whether the SNPs themselves are associated with the risk of islet autoimmunity or type 1 diabetes.

Methods

We used a risk set sampled nested case-control design within an ongoing international multicentre observational study: The Environmental Determinants of Diabetes in the Young (TEDDY). The TEDDY study followed children with increased genetic risk from birth to endpoints of islet autoantibodies (350 cases, 974 controls) and type 1 diabetes (102 cases, 282 controls) in six clinical centres. Control participants were matched for family history of type 1 diabetes, clinical centre and sex. Plasma ascorbic acid concentration was measured at ages 6 and 12 months and then annually up to age 6 years. SNPs in vitamin C transport genes were genotyped using the ImmunoChip custom microarray. Comparisons were adjusted for HLA genotypes and for background population stratification.

Results

Childhood plasma ascorbic acid (mean ± SD 10.76 ± 3.54 mg/l in controls) was inversely associated with islet autoimmunity risk (adjusted OR 0.96 [95% CI 0.92, 0.99] per +1 mg/l), particularly islet autoimmunity, starting with insulin autoantibodies (OR 0.94 [95% CI 0.88, 0.99]), but not with type 1 diabetes risk (OR 0.93 [95% Cl 0.86, 1.02]). The SLC2A2 rs5400 SNP was associated with increased risk of type 1 diabetes (OR 1.77 [95% CI 1.12, 2.80]), independent of plasma ascorbic acid (OR 0.92 [95% CI 0.84, 1.00]).

Conclusions/interpretation

Higher plasma ascorbic acid levels may protect against islet autoimmunity in children genetically at risk for type 1 diabetes. Further studies are warranted to confirm these findings.

Data availability

The datasets generated and analysed during the current study will be made available in the NIDDK Central Repository at https://www.niddkrepository.org/studies/teddy.",2019-11-14 +30733462,PFDB: A standardized protein folding database with temperature correction.,"We constructed a standardized protein folding kinetics database (PFDB) in which the logarithmic rate constants of all listed proteins are calculated at the standard temperature (25 °C). A temperature correction based on the Eyring-Kramers equation was introduced for proteins whose folding kinetics were originally measured at temperatures other than 25 °C. We verified the temperature correction by comparing the logarithmic rate constants predicted and experimentally observed at 25 °C for 14 different proteins, and the results demonstrated improvement of the quality of the database. PFDB consists of 141 (89 two-state and 52 non-two-state) single-domain globular proteins, which has the largest number among the currently available databases of protein folding kinetics. PFDB is thus intended to be used as a standard for developing and testing future predictive and theoretical studies of protein folding. PFDB can be accessed from the following link: http://lee.kias.re.kr/~bala/PFDB .",2019-02-07 +30729351,Efficacy of low-level laser for treatment of cancer oral mucositis: a systematic review and meta-analysis.,"Review effectiveness of low-level laser therapy (LLLT) in the curative treatment of oral mucositis (OM) in patients receiving cancer therapy. A systematic review with meta-analysis was performed using Medline, Embase, and Cochrane Library databases according to PRISMA guidelines, to identify randomized controlled trials (RCT) on OM in patients during and/or after cancer therapy and in which the therapeutic approach was LLLT, with wavelengths between 632 and 970 nm. We considered grade of OM as a dichotomous variable (such as an improvement or not in severe OM on the seventh day of therapy), with the analysis of subgroups of adult patients or children and adolescents and as a continuous variable with determination of the time for the complete resolution and the subgroup analysis occurred with the strata of the samples by treatment only with chemotherapy or chemotherapy and radiotherapy. This paper's protocol was registered a priori at https://www.crd.york.ac.uk/PROSPERO . We found five RCT (total of 315 patients) with adequate methodology. LLLT was effective, presenting a 62% risk reduction of severe mucositis on the seventh day of evaluation (RR = 0.38 [95% CI, 0.19-0.75]). When we analyzed subgroups, RR was 0.28 (95% CI 0.17-0.46) in the adult studies and 0.90 (95% CI, 0.46-1.78) in the studies with children and adolescents. We demonstrated a mean reduction of 4.21 days in the time of complete resolution of OM (CI - 5.65 to - 2.76) in favor of LLLT. There is moderate evidence that LLLT is effective in resolving OM lesions in adult patients undergoing cancer therapy. LLLT demonstrates potential for decreasing the resolution time of OM lesions by approximately 4.21 days.",2019-02-07 +29901776,EasyFRAP-web: a web-based tool for the analysis of fluorescence recovery after photobleaching data.,"Understanding protein dynamics is crucial in order to elucidate protein function and interactions. Advances in modern microscopy facilitate the exploration of the mobility of fluorescently tagged proteins within living cells. Fluorescence recovery after photobleaching (FRAP) is an increasingly popular functional live-cell imaging technique which enables the study of the dynamic properties of proteins at a single-cell level. As an increasing number of labs generate FRAP datasets, there is a need for fast, interactive and user-friendly applications that analyze the resulting data. Here we present easyFRAP-web, a web application that simplifies the qualitative and quantitative analysis of FRAP datasets. EasyFRAP-web permits quick analysis of FRAP datasets through an intuitive web interface with interconnected analysis steps (experimental data assessment, different types of normalization and estimation of curve-derived quantitative parameters). In addition, easyFRAP-web provides dynamic and interactive data visualization and data and figure export for further analysis after every step. We test easyFRAP-web by analyzing FRAP datasets capturing the mobility of the cell cycle regulator Cdt2 in the presence and absence of DNA damage in cultured cells. We show that easyFRAP-web yields results consistent with previous studies and highlights cell-to-cell heterogeneity in the estimated kinetic parameters. EasyFRAP-web is platform-independent and is freely accessible at: https://easyfrap.vmnet.upatras.gr/.",2018-07-01 +28820125,Fungal Genomes and Insights into the Evolution of the Kingdom. ,"The kingdom Fungi comprises species that inhabit nearly all ecosystems. Fungi exist as both free-living and symbiotic unicellular and multicellular organisms with diverse morphologies. The genomes of fungi encode genes that enable them to thrive in diverse environments, invade plant and animal cells, and participate in nutrient cycling in terrestrial and aquatic ecosystems. The continuously expanding databases of fungal genome sequences have been generated by individual and large-scale efforts such as Génolevures, Broad Institute's Fungal Genome Initiative, and the 1000 Fungal Genomes Project (http://1000.fungalgenomes.org). These efforts have produced a catalog of fungal genes and genomic organization. The genomic datasets can be utilized to better understand how fungi have adapted to their lifestyles and ecological niches. Large datasets of fungal genomic and transcriptomic data have enabled the use of novel methodologies and improved the study of fungal evolution from a molecular sequence perspective. Combined with microscopes, petri dishes, and woodland forays, genome sequencing supports bioinformatics and comparative genomics approaches as important tools in the study of the biology and evolution of fungi.",2017-07-01 +31490686,TMB Library of Nucleosome Simulations.,"Nucleosomes are the fundamental building blocks of chromatin, the biomaterial that houses the genome in all higher organisms. A nucleosome consists of 145-147 base pairs of DNA wrapped 1.7 times around eight histones. Given a four-letter code (A, C, G, T), there are approximately 4147 or 1088 oligonucleotides that can form a nucleosome. Comparative, rather than comprehensive, studies are required. Here we introduce the TMB Library of nucleosome simulations and present a meta-analysis of over 20 μs of all atom molecular dynamics simulations representing 518 different realizations of the nucleosome. The TMB Library serves as a reference for future comparative, on-demand simulations of nucleosomes and a demonstration of iBIOMES Lite as a tool for managing a laboratory's simulation library. For every simulation, dewatered trajectories, RMSD, and DNA helical parameter data are provided through iBIOMES Lite in a Web browser and a file browser format. A novel view of nucleosomal DNA emerges from our meta-analysis of the TMB Library. DNA conformation is restricted to a specific left-handed superhelix, but the range of conformations observed for individual bases and base pairs is not more restricted nor more highly deformed than DNA free in solution. With the exception of Roll, mean DNA helical parameter values obtained from simulations of nucleosomes are largely within the range of thermal motion of DNA free in solution. The library provides evidence of DNA kinking in the nucleosome and clearly demonstrates the effects of DNA sequence on the gross structure and dynamics of nucleosomes. These effects and mispositioning of the 601 super strong nucleosome positioning sequence can be detected in short simulations (10 ns). Collectively, the results provide a basis for comparative simulation studies of nucleosomes and extend our understanding of the binding of proteins and drugs to nucleosomal DNA. The TMB Library can be found at http://dna.engr.latech.edu/~tmbshare/ .",2019-09-24 +30228995,A comprehensive annotation for the root-knot nematode Meloidogyne incognita proteome data.,"Root-knot nematodes are devastating pathogens of crop plants. The draft genome of southern root-knot nematode Meloidogyne incognita was published in 2008 and additional genome and transcriptome data became available later on. However, lack of a publically available annotation for M. incognita genome and transcriptome(s) limits the use of this data for functional and comparative genomics by the interested researchers. Here we present a comprehensive annotation for the M. incognita proteome data available at INRA Meloidogyne Genomic Resources page (https://meloidogyne.inra.fr/Downloads/Meloidogyne-incognita-V2-2017) and European Nucleotide Archive (ENA) (accession number: ERP009887) using a multi-pronged approach.",2018-05-26 +27352221,Recommendations of the DNA Commission of the International Society for Forensic Genetics (ISFG) on quality control of autosomal Short Tandem Repeat allele frequency databasing (STRidER).,"The statistical evaluation of autosomal Short Tandem Repeat (STR) genotypes is based on allele frequencies. These are empirically determined from sets of randomly selected human samples, compiled into STR databases that have been established in the course of population genetic studies. There is currently no agreed procedure of performing quality control of STR allele frequency databases, and the reliability and accuracy of the data are largely based on the responsibility of the individual contributing research groups. It has been demonstrated with databases of haploid markers (EMPOP for mitochondrial mtDNA, and YHRD for Y-chromosomal loci) that centralized quality control and data curation is essential to minimize error. The concepts employed for quality control involve software-aided likelihood-of-genotype, phylogenetic, and population genetic checks that allow the researchers to compare novel data to established datasets and, thus, maintain the high quality required in forensic genetics. Here, we present STRidER (http://strider.online), a publicly available, centrally curated online allele frequency database and quality control platform for autosomal STRs. STRidER expands on the previously established ENFSI DNA WG STRbASE and applies standard concepts established for haploid and autosomal markers as well as novel tools to reduce error and increase the quality of autosomal STR data. The platform constitutes a significant improvement and innovation for the scientific community, offering autosomal STR data quality control and reliable STR genotype estimates.",2016-06-14 +30118434,Pollution and Global Health – An Agenda for Prevention.,"SUMMARY:Pollution is a major, overlooked, global health threat that was responsible in 2015 for an estimated 9 million deaths and great economic losses. To end neglect of pollution and advance prevention of pollution-related disease, we formed the Lancet Commission on Pollution and Health. Despite recent gains in understanding of pollution and its health effects, this Commission noted that large gaps in knowledge remain. To close these gaps and guide prevention, the Commission made research recommendations and proposed creation of a Global Observatory on Pollution and Health. We posit that successful pollution research will be translational and based on transdisciplinary collaborations among exposure science, epidemiology, data science, engineering, health policy, and economics. We envision that the Global Observatory on Pollution and Health will be a multinational consortium based at Boston College and the Harvard T.H. Chan School of Public Health that will aggregate, geocode, and archive data on pollution and pollution-related disease; analyze these data to discern trends, geographic patterns, and opportunities for intervention; and make its findings available to policymakers, the media, and the global public to catalyze research, inform policy, and assist cities and countries to target pollution, track progress, and save lives. https://doi.org/10.1289/EHP3141.",2018-08-06 +30565464,PPIExp: A Web-Based Platform for Integration and Visualization of Protein-Protein Interaction Data and Spatiotemporal Proteomics Data.,"Integrating spatiotemporal proteomics data with protein-protein interaction (PPI) data can help researchers make an in-depth exploration of their proteins of interest in a dynamic manner. However, there is still a lack of proper tools for the biologists who usually have few programming skills to construct a PPI network for a protein list, visualize active PPI subnetworks, and then select key nodes for further study. We propose a web-based platform named PPIExp that can automatically construct a PPI network, perform clustering analysis according to protein abundances, and perform functional enrichment analysis. More importantly, it provides multiple effective visualization interfaces, such as the interface to display the PPI network map, the interface to display a dendrogram and heatmap for the clustering result, and the interface to display the expression pattern of a selected protein. To visualize the active PPI subnetworks in specific space or time, it provides buttons to highlight the differentially expressed proteins under each condition on the network map. Additionally, to help researchers determine which proteins are worth further attention, PPIExp provides extensive one-click interactive operations to map node centrality measures to node size on the network and highlight three types of proteins, that is, the proteins in an enriched functional term, the coexpressed proteins selected from the dendgrogram and heatmap, and the proteins input by users. PPIExp is available at http://www.fgvis.com/expressvis/PPIExp .",2018-12-26 +31656094,"Genetic Susceptibility, Dietary Protein Intake, and Changes of Blood Pressure: The POUNDS Lost Trial.","High blood pressure (BP) is closely related to obesity, and weight loss lowers BP. Evidence has shown considerable interpersonal variation of changes in BP among people experiencing weight loss, and such variation might be partly determined by genetic factors. We assessed the changes in systolic and diastolic BP (SBP/DBP) among 692 participants randomly assigned to 1 of 4 diets varying in macronutrient content for 2 years. Two separate polygenic scores (SBP/DBP-PGS derived from 52/50 single nucleotide polymorphisms) were built for each participant based on 66 BP-associated single nucleotide polymorphisms. During a 2-year intervention, participants in the bottom versus upper tertile of SBP/DBP-PGS had a greater decrease in SBP (△SBP at 6, 12, and 24 months: -3.84 versus -1.61, -4.76 versus -2.75, -2.49 versus -1.63; P=0.001) or in DBP (△DBP at 6, 12, and 24 months: -3.09 versus -1.34, -2.69 versus -1.44, -1.82 versus -0.53; P<0.001). We also found gene-diet interaction on changes in SBP from baseline to 24 months (Pinteraction=0.009). Among participants assigned to a high-protein diet, those with a lower SBP-polygenic scores had greater decreases in SBP at months 6 (P=0.018), months 12 (P=0.007), and months 24 (P=0.089); while no significant difference was observed across the SBP-polygenic scores tertile groups among those assigned to an average-protein diet (all P values >0.05). Our data indicate that genetic susceptibility may affect BP changes in response to weight-loss diet interventions, and protein intake may modify the genetic associations with changes in BP. This trial was registered at URL: http://www.clinicaltrials.gov. Unique identifier: NCT00072995.",2019-10-28 +27587660,Causality modeling for directed disease network.,"

Motivation

Causality between two diseases is valuable information as subsidiary information for medicine which is intended for prevention, diagnostics and treatment. Conventional cohort-centric researches are able to obtain very objective results, however, they demands costly experimental expense and long period of time. Recently, data source to clarify causality has been diversified: available information includes gene, protein, metabolic pathway and clinical information. By taking full advantage of those pieces of diverse information, we may extract causalities between diseases, alternatively to cohort-centric researches.

Method

In this article, we propose a new approach to define causality between diseases. In order to find causality, three different networks were constructed step by step. Each step has different data sources and different analytical methods, and the prior step sifts causality information to the next step. In the first step, a network defines association between diseases by utilizing disease-gene relations. And then, potential causalities of disease pairs are defined as a network by using prevalence and comorbidity information from clinical results. Finally, disease causalities are confirmed by a network defined from metabolic pathways.

Results

The proposed method is applied to data which is collected from database such as MeSH, OMIM, HuDiNe, KEGG and PubMed. The experimental results indicated that disease causality that we found is 19 times higher than that of random guessing. The resulting pairs of causal-effected diseases are validated on medical literatures.

Availability and implementation

http://www.alphaminers.net

Contact

shin@ajou.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +27518140,The Genomic Basis of Noise-induced Hearing Loss: A Literature Review Organized by Cellular Pathways.,"

Objective

Using Reactome, a curated Internet database, noise-induced hearing loss studies were aggregated into cellular pathways for organization of the emerging genomic and epigenetic data in the literature.

Data sources

PubMed and Reactome.org, a relational data base program systematizing biological processes into interactive pathways and subpathways based on ontology, cellular constituents, gene expression, and molecular components.

Study selection

Peer-reviewed population and laboratory studies for the previous 15 years relating genomics and noise and hearing loss were identified in PubMed. Criteria included p values <0.05 with correction for multiple genes, a fold change of >1.5, or duplicated studies.

Data extraction and synthesis

One-hundred fifty-eight unique HGNC identifiers from 77 articles met the selection criteria, and were uploaded into the analysis program at http://reactome.org. These genes participated in a total of 621 cellular interactions in 21 of 23 pathways. Cellular response to stress with its attenuation phase, particularly in response to heat stress, detoxification of ROS, and specific areas of the immune system are predominant pathways identified as significantly 'overrepresented' (p values <0.1e-5 and false discovery rates <0.01).

Conclusion

Twenty-one of 23 of the designated pathways in Reactome have significant influence on noise-induced hearing loss, signifying a confluence of molecular pathways in reaction to acoustic trauma; however, cellular response to stress, including heat shock response, and other small areas of immune response were highly overrepresented. Yet-to-be-explored genomics areas include miRNA, lncRNA, copy number variations, RNA sequencing, and human genome-wide association study.",2016-09-01 +23794735,The CARLSBAD database: a confederated database of chemical bioactivities.,"Many bioactivity databases offer information regarding the biological activity of small molecules on protein targets. Information in these databases is often hard to resolve with certainty because of subsetting different data in a variety of formats; use of different bioactivity metrics; use of different identifiers for chemicals and proteins; and having to access different query interfaces, respectively. Given the multitude of data sources, interfaces and standards, it is challenging to gather relevant facts and make appropriate connections and decisions regarding chemical-protein associations. The CARLSBAD database has been developed as an integrated resource, focused on high-quality subsets from several bioactivity databases, which are aggregated and presented in a uniform manner, suitable for the study of the relationships between small molecules and targets. In contrast to data collection resources, CARLSBAD provides a single normalized activity value of a given type for each unique chemical-protein target pair. Two types of scaffold perception methods have been implemented and are available for datamining: HierS (hierarchical scaffolds) and MCES (maximum common edge subgraph). The 2012 release of CARLSBAD contains 439 985 unique chemical structures, mapped onto 1,420 889 unique bioactivities, and annotated with 277 140 HierS scaffolds and 54 135 MCES chemical patterns, respectively. Of the 890 323 unique structure-target pairs curated in CARLSBAD, 13.95% are aggregated from multiple structure-target values: 94 975 are aggregated from two bioactivities, 14 544 from three, 7 930 from four and 2214 have five bioactivities, respectively. CARLSBAD captures bioactivities and tags for 1435 unique chemical structures of active pharmaceutical ingredients (i.e. 'drugs'). CARLSBAD processing resulted in a net 17.3% data reduction for chemicals, 34.3% reduction for bioactivities, 23% reduction for HierS and 25% reduction for MCES, respectively. The CARLSBAD database supports a knowledge mining system that provides non-specialists with novel integrative ways of exploring chemical biology space to facilitate knowledge mining in drug discovery and repurposing. Database URL: http://carlsbad.health.unm.edu/carlsbad/.",2013-06-21 +22517761,NGS catalog: A database of next generation sequencing studies in humans.,"Next generation sequencing (NGS) technologies have been rapidly applied in biomedical and biological research since its advent only a few years ago, and they are expected to advance at an unprecedented pace in the following years. To provide the research community with a comprehensive NGS resource, we have developed the database Next Generation Sequencing Catalog (NGS Catalog, http://bioinfo.mc.vanderbilt.edu/NGS/index.html), a continually updated database that collects, curates and manages available human NGS data obtained from published literature. NGS Catalog deposits publication information of NGS studies and their mutation characteristics (SNVs, small insertions/deletions, copy number variations, and structural variants), as well as mutated genes and gene fusions detected by NGS. Other functions include user data upload, NGS general analysis pipelines, and NGS software. NGS Catalog is particularly useful for investigators who are new to NGS but would like to take advantage of these powerful technologies for their own research. Finally, based on the data deposited in NGS Catalog, we summarized features and findings from whole exome sequencing, whole genome sequencing, and transcriptome sequencing studies for human diseases or traits.",2012-04-19 +30827356,Proteomic identification of predictive biomarkers for malignant transformation in complete hydatidiform moles.,"INTRODUCTION:Protein expression in cells are associated with oncogenesis. This study aims to explore proteomic profiles and discover potential biomarkers that can predict malignant transformation of hydatidiform mole. METHODS:Retrospective analysis was done in 14 cases of remission hydatidiform mole and 14 cases of hydatidiform mole who later developed malignancy (GTN group). Molar tissues were retrieved from -70 °C frozen tissue. Subsequently, a large-scale proteomic analysis was performed to identify proteins and compare their abundance levels in the preserved molar tissues from these two groups using a dimethyl-labeling technique coupled with liquid chromatography-tandem mass spectrometry (LC-MS/MS). RESULTS:A total of 2,153 proteins were identified from all samples. 22 and 10 proteins were significantly up-regulated and down-regulated, respectively, in the GTN group compared with the mole group. These altered proteins were found in several biological groups such as cell-cell adhesion, secreted proteins, and ribonucleoproteins. Several hormone-related proteins were among the most up-regulated proteins in the GTN group including choriogonadotropin subunit beta (β-hCG) and alpha (α-hCG), growth/differentiation factor 15, as well as both pregnancy-specific beta-1-glycoproteins 2 and 3. In contrast, protein S100-A11 and l-lactate dehydrogenase A chain, were down-regulated in molar tissue from most patients in the GTN group. DISCUSSION:This study identified a set of differentially expressed proteins in molar tissues that could potentially be further examined as predictive biomarkers for the malignant transformation of CHMs. A molar proteome database was constructed and can be accessible online at http://sysbio.chula.ac.th/Database/GTD_DB/Supplementary_Data.xlsx.",2019-02-05 +30936375,Ethanol Stimulates Trehalose Production through a SpoT-DksA-AlgU-Dependent Pathway in Pseudomonas aeruginosa. ,"Pseudomonas aeruginosa frequently resides among ethanol-producing microbes, making its response to the microbially produced concentrations of ethanol relevant to understanding its biology. Our transcriptome analysis found that genes involved in trehalose metabolism were induced by low concentrations of ethanol, and biochemical assays showed that levels of intracellular trehalose increased significantly upon growth with ethanol. The increase in trehalose was dependent on the TreYZ pathway but not other trehalose-metabolic enzymes (TreS or TreA). The sigma factor AlgU (AlgT), a homolog of RpoE in other species, was required for increased expression of the treZ gene and trehalose levels, but induction was not controlled by the well-characterized proteolysis of its anti-sigma factor, MucA. Growth with ethanol led to increased SpoT-dependent (p)ppGpp accumulation, which stimulates AlgU-dependent transcription of treZ and other AlgU-regulated genes through DksA, a (p)ppGpp and RNA polymerase binding protein. Ethanol stimulation of trehalose also required acylhomoserine lactone (AHL)-mediated quorum sensing (QS), as induction was not observed in a ΔlasR ΔrhlR strain. A network analysis using a model, eADAGE, built from publicly available P. aeruginosa transcriptome data sets (J. Tan, G. Doing, K. A. Lewis, C. E. Price, et al., Cell Syst 5:63-71, 2017, https://doi.org/10.1016/j.cels.2017.06.003) provided strong support for our model in which treZ and coregulated genes are controlled by both AlgU- and AHL-mediated QS. Consistent with (p)ppGpp- and AHL-mediated quorum-sensing regulation, ethanol, even when added at the time of culture inoculation, stimulated treZ transcript levels and trehalose production in cells from post-exponential-phase cultures but not in cells from exponential-phase cultures. These data highlight the integration of growth and cell density cues in the P. aeruginosa transcriptional response to ethanol.IMPORTANCEPseudomonas aeruginosa is often found with bacteria and fungi that produce fermentation products, including ethanol. At concentrations similar to those produced by environmental microbes, we found that ethanol stimulated expression of trehalose-biosynthetic genes and cellular levels of trehalose, a disaccharide that protects against environmental stresses. The induction of trehalose by ethanol required the alternative sigma factor AlgU through DksA- and SpoT-dependent (p)ppGpp. Trehalose accumulation also required AHL quorum sensing and occurred only in post-exponential-phase cultures. This work highlights how cells integrate cell density and growth cues in their responses to products made by other microbes and reveals a new role for (p)ppGpp in the regulation of AlgU activity.",2019-05-22 +30374457,Metapopulation Structure of CRISPR-Cas Immunity in Pseudomonas aeruginosa and Its Viruses. ,"Viruses that infect the widespread opportunistic pathogen Pseudomonas aeruginosa have been shown to influence physiology and critical clinical outcomes in cystic fibrosis (CF) patients. To understand how CRISPR-Cas immune interactions may contribute to the distribution and coevolution of P. aeruginosa and its viruses, we reconstructed CRISPR arrays from a highly sampled longitudinal data set from CF patients attending the Copenhagen Cystic Fibrosis Clinic in Copenhagen, Denmark (R. L. Marvig, L. M. Sommer, S. Molin, and H. K. Johansen, Nat Genet 47:57-64, 2015, https://doi.org/10.1038/ng.3148). We show that new spacers are not added to or deleted from CRISPR arrays over time within a single patient but do vary among patients in this data set. We compared assembled CRISPR arrays from this data set to CRISPR arrays extracted from 726 additional publicly available P. aeruginosa sequences to show that local diversity in this population encompasses global diversity and that there is no evidence for population structure associated with location or environment sampled. We compare over 3,000 spacers from our global data set to 98 lytic and temperate viruses and proviruses and find a subset of related temperate virus clusters frequently targeted by CRISPR spacers. Highly targeted viruses are matched by different spacers in different arrays, resulting in a pattern of distributed immunity within the global population. Understanding the multiple immune contexts that P. aeruginosa viruses face can be applied to study of P. aeruginosa gene transfer, the spread of epidemic strains in cystic fibrosis patients, and viral control of P. aeruginosa infection. IMPORTANCE Pseudomonas aeruginosa is a widespread opportunistic pathogen and a major cause of morbidity and mortality in cystic fibrosis patients. Microbe-virus interactions play a critical role in shaping microbial populations, as viral infections can kill microbial populations or contribute to gene flow among microbes. Investigating how P. aeruginosa uses its CRISPR immune system to evade viral infection aids our understanding of how this organism spreads and evolves alongside its viruses in humans and the environment. Here, we identify patterns of CRISPR targeting and immunity that indicate P. aeruginosa and its viruses evolve in both a broad global population and in isolated human ""islands."" These data set the stage for exploring metapopulation dynamics occurring within and between isolated ""island"" populations associated with CF patients, an essential step to inform future work predicting the specificity and efficacy of virus therapy and the spread of invasive viral elements and pathogenic epidemic bacterial strains.",2018-09-01 +29718355,MetExplore: collaborative edition and exploration of metabolic networks.,"Metabolism of an organism is composed of hundreds to thousands of interconnected biochemical reactions responding to environmental or genetic constraints. This metabolic network provides a rich knowledge to contextualize omics data and to elaborate hypotheses on metabolic modulations. Nevertheless, performing this kind of integrative analysis is challenging for end users with not sufficiently advanced computer skills since it requires the use of various tools and web servers. MetExplore offers an all-in-one online solution composed of interactive tools for metabolic network curation, network exploration and omics data analysis. In particular, it is possible to curate and annotate metabolic networks in a collaborative environment. The network exploration is also facilitated in MetExplore by a system of interactive tables connected to a powerful network visualization module. Finally, the contextualization of metabolic elements in the network and the calculation of over-representation statistics make it possible to interpret any kind of omics data. MetExplore is a sustainable project maintained since 2010 freely available at https://metexplore.toulouse.inra.fr/metexplore2/.",2018-07-01 +25070993,BC4GO: a full-text corpus for the BioCreative IV GO task. ,"Gene function curation via Gene Ontology (GO) annotation is a common task among Model Organism Database groups. Owing to its manual nature, this task is considered one of the bottlenecks in literature curation. There have been many previous attempts at automatic identification of GO terms and supporting information from full text. However, few systems have delivered an accuracy that is comparable with humans. One recognized challenge in developing such systems is the lack of marked sentence-level evidence text that provides the basis for making GO annotations. We aim to create a corpus that includes the GO evidence text along with the three core elements of GO annotations: (i) a gene or gene product, (ii) a GO term and (iii) a GO evidence code. To ensure our results are consistent with real-life GO data, we recruited eight professional GO curators and asked them to follow their routine GO annotation protocols. Our annotators marked up more than 5000 text passages in 200 articles for 1356 distinct GO terms. For evidence sentence selection, the inter-annotator agreement (IAA) results are 9.3% (strict) and 42.7% (relaxed) in F1-measures. For GO term selection, the IAAs are 47% (strict) and 62.9% (hierarchical). Our corpus analysis further shows that abstracts contain ∼ 10% of relevant evidence sentences and 30% distinct GO terms, while the Results/Experiment section has nearly 60% relevant sentences and >70% GO terms. Further, of those evidence sentences found in abstracts, less than one-third contain enough experimental detail to fulfill the three core criteria of a GO annotation. This result demonstrates the need of using full-text articles for text mining GO annotations. Through its use at the BioCreative IV GO (BC4GO) task, we expect our corpus to become a valuable resource for the BioNLP research community. Database URL: http://www.biocreative.org/resources/corpora/bc-iv-go-task-corpus/.",2014-07-28 +22135289,MMDB: 3D structures and macromolecular interactions.,"Close to 60% of protein sequences tracked in comprehensive databases can be mapped to a known three-dimensional (3D) structure by standard sequence similarity searches. Potentially, a great deal can be learned about proteins or protein families of interest from considering 3D structure, and to this day 3D structure data may remain an underutilized resource. Here we present enhancements in the Molecular Modeling Database (MMDB) and its data presentation, specifically pertaining to biologically relevant complexes and molecular interactions. MMDB is tightly integrated with NCBI's Entrez search and retrieval system, and mirrors the contents of the Protein Data Bank. It links protein 3D structure data with sequence data, sequence classification resources and PubChem, a repository of small-molecule chemical structures and their biological activities, facilitating access to 3D structure data not only for structural biologists, but also for molecular biologists and chemists. MMDB provides a complete set of detailed and pre-computed structural alignments obtained with the VAST algorithm, and provides visualization tools for 3D structure and structure/sequence alignment via the molecular graphics viewer Cn3D. MMDB can be accessed at http://www.ncbi.nlm.nih.gov/structure.",2011-12-01 +27924039,FlyRNAi.org-the database of the Drosophila RNAi screening center and transgenic RNAi project: 2017 update.,"The FlyRNAi database of the Drosophila RNAi Screening Center (DRSC) and Transgenic RNAi Project (TRiP) at Harvard Medical School and associated DRSC/TRiP Functional Genomics Resources website (http://fgr.hms.harvard.edu) serve as a reagent production tracking system, screen data repository, and portal to the community. Through this portal, we make available protocols, online tools, and other resources useful to researchers at all stages of high-throughput functional genomics screening, from assay design and reagent identification to data analysis and interpretation. In this update, we describe recent changes and additions to our website, database and suite of online tools. Recent changes reflect a shift in our focus from a single technology (RNAi) and model species (Drosophila) to the application of additional technologies (e.g. CRISPR) and support of integrated, cross-species approaches to uncovering gene function using functional genomics and other approaches.",2016-10-23 +29900309,A comprehensive metabolomic data set of date palm fruit.,"This article provides detailed information on the phenotypes and the metabolic profiles of 196 date fruits from 123 unique date fruit varieties. These date fruits are extensively diverse in their country of origin, variety and post harvesting conditions. We used a non-targeted mass-spectrometry based metabolomics approach to metabolically characterize date fruits, and measured 427 metabolites from a wide range of metabolic pathways. The metabolomics data for all the date fruit samples are available at the NIH Common Fund's Data Repository and Coordinating Center (supported by NIH grant, U01-DK097430) website, http://www.metabolomicsworkbench.org), under Metabolomics Workbench StudyID: ST000867. The data are directly accessible at http://www.metabolomicsworkbench.org/data/DRCCMetadata.php?Mode=Study&StudyID=ST000867&StudyType=MS&ResultType=1.",2018-04-10 +30717647,Characterization and identification of lysine glutarylation based on intrinsic interdependence between positions in the substrate sites.,"BACKGROUND:Glutarylation, the addition of a glutaryl group (five carbons) to a lysine residue of a protein molecule, is an important post-translational modification and plays a regulatory role in a variety of physiological and biological processes. As the number of experimentally identified glutarylated peptides increases, it becomes imperative to investigate substrate motifs to enhance the study of protein glutarylation. We carried out a bioinformatics investigation of glutarylation sites based on amino acid composition using a public database containing information on 430 non-homologous glutarylation sites. RESULTS:The TwoSampleLogo analysis indicates that positively charged and polar amino acids surrounding glutarylated sites may be associated with the specificity in substrate site of protein glutarylation. Additionally, the chi-squared test was utilized to explore the intrinsic interdependence between two positions around glutarylation sites. Further, maximal dependence decomposition (MDD), which consists of partitioning a large-scale dataset into subgroups with statistically significant amino acid conservation, was used to capture motif signatures of glutarylation sites. We considered single features, such as amino acid composition (AAC), amino acid pair composition (AAPC), and composition of k-spaced amino acid pairs (CKSAAP), as well as the effectiveness of incorporating MDD-identified substrate motifs into an integrated prediction model. Evaluation by five-fold cross-validation showed that AAC was most effective in discriminating between glutarylation and non-glutarylation sites, according to support vector machine (SVM). CONCLUSIONS:The SVM model integrating MDD-identified substrate motifs performed well, with a sensitivity of 0.677, a specificity of 0.619, an accuracy of 0.638, and a Matthews Correlation Coefficient (MCC) value of 0.28. Using an independent testing dataset (46 glutarylated and 92 non-glutarylated sites) obtained from the literature, we demonstrated that the integrated SVM model could improve the predictive performance effectively, yielding a balanced sensitivity and specificity of 0.652 and 0.739, respectively. This integrated SVM model has been implemented as a web-based system (MDDGlutar), which is now freely available at http://csb.cse.yzu.edu.tw/MDDGlutar/ .",2019-02-04 +23203886,MIPS PlantsDB: a database framework for comparative plant genome research.,"The rapidly increasing amount of plant genome (sequence) data enables powerful comparative analyses and integrative approaches and also requires structured and comprehensive information resources. Databases are needed for both model and crop plant organisms and both intuitive search/browse views and comparative genomics tools should communicate the data to researchers and help them interpret it. MIPS PlantsDB (http://mips.helmholtz-muenchen.de/plant/genomes.jsp) was initially described in NAR in 2007 [Spannagl,M., Noubibou,O., Haase,D., Yang,L., Gundlach,H., Hindemitt, T., Klee,K., Haberer,G., Schoof,H. and Mayer,K.F. (2007) MIPSPlantsDB-plant database resource for integrative and comparative plant genome research. Nucleic Acids Res., 35, D834-D840] and was set up from the start to provide data and information resources for individual plant species as well as a framework for integrative and comparative plant genome research. PlantsDB comprises database instances for tomato, Medicago, Arabidopsis, Brachypodium, Sorghum, maize, rice, barley and wheat. Building up on that, state-of-the-art comparative genomics tools such as CrowsNest are integrated to visualize and investigate syntenic relationships between monocot genomes. Results from novel genome analysis strategies targeting the complex and repetitive genomes of triticeae species (wheat and barley) are provided and cross-linked with model species. The MIPS Repeat Element Database (mips-REdat) and Catalog (mips-REcat) as well as tight connections to other databases, e.g. via web services, are further important components of PlantsDB.",2012-11-29 +28633399,The interfacial character of antibody paratopes: analysis of antibody-antigen structures.,"

Summary

In this study, computational methods are applied to investigate the general properties of antigen engaging residues of a paratope from a non-redundant dataset of 403 antibody-antigen complexes to dissect the contribution of hydrogen bonds, hydrophobic, van der Waals contacts and ionic interactions, as well as role of water molecules in the antigen-antibody interface. Consistent with previous reports using smaller datasets, we found that Tyr, Trp, Ser, Asn, Asp, Thr, Arg, Gly, His contribute substantially to the interactions between antibody and antigen. Furthermore, antibody-antigen interactions can be mediated by interfacial waters. However, there is no reported comprehensive analysis for a large number of structured waters that engage in higher ordered structures at the antibody-antigen interface. From our dataset, we have found the presence of interfacial waters in 242 complexes. We present evidence that suggests a compelling role of these interfacial waters in interactions of antibodies with a range of antigens differing in shape complementarity. Finally, we carry out 296 835 pairwise 3D structure comparisons of 771 structures of contact residues of antibodies with their interfacial water molecules from our dataset using CLICK method. A heuristic clustering algorithm is used to obtain unique structural similarities, and found to separate into 368 different clusters. These clusters are used to identify structural motifs of contact residues of antibodies for epitope binding.

Availability and implementation

This clustering database of contact residues is freely accessible at http://mspc.bii.a-star.edu.sg/minhn/pclick.html.

Contact

minhn@bii.a-star.edu.sg, chandra@bii.a-star.edu.sg or zhong_pingyu@immunol.a-star.edu.sg.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +27150362,Whole-Genome Sequencing for Routine Pathogen Surveillance in Public Health: a Population Snapshot of Invasive Staphylococcus aureus in Europe.,"

Unlabelled

The implementation of routine whole-genome sequencing (WGS) promises to transform our ability to monitor the emergence and spread of bacterial pathogens. Here we combined WGS data from 308 invasive Staphylococcus aureus isolates corresponding to a pan-European population snapshot, with epidemiological and resistance data. Geospatial visualization of the data is made possible by a generic software tool designed for public health purposes that is available at the project URL (http://www.microreact.org/project/EkUvg9uY?tt=rc). Our analysis demonstrates that high-risk clones can be identified on the basis of population level properties such as clonal relatedness, abundance, and spatial structuring and by inferring virulence and resistance properties on the basis of gene content. We also show that in silico predictions of antibiotic resistance profiles are at least as reliable as phenotypic testing. We argue that this work provides a comprehensive road map illustrating the three vital components for future molecular epidemiological surveillance: (i) large-scale structured surveys, (ii) WGS, and (iii) community-oriented database infrastructure and analysis tools.

Importance

The spread of antibiotic-resistant bacteria is a public health emergency of global concern, threatening medical intervention at every level of health care delivery. Several recent studies have demonstrated the promise of routine whole-genome sequencing (WGS) of bacterial pathogens for epidemiological surveillance, outbreak detection, and infection control. However, as this technology becomes more widely adopted, the key challenges of generating representative national and international data sets and the development of bioinformatic tools to manage and interpret the data become increasingly pertinent. This study provides a road map for the integration of WGS data into routine pathogen surveillance. We emphasize the importance of large-scale routine surveys to provide the population context for more targeted or localized investigation and the development of open-access bioinformatic tools to provide the means to combine and compare independently generated data with publicly available data sets.",2016-05-05 +31231133,The Incontinentia Pigmenti Genetic Biobank: study design and cohort profile to facilitate research into a rare disease worldwide.,"Incontinentia pigmenti (IP; OMIM#308300) is a rare genetic disease resulting in neuroectodermal defects, which can lead to disability. At present, there is neither definitive cure available nor are there any sufficiently reliable insights to predict the severity of the disease. We launched the Incontinentia Pigmenti Genetic Biobank (IPGB) project ( http://www.igb.cnr.it/ipgb ) in 2015 to establish a large-scale deposit of biological samples, to provide detailed clinical information about children diagnosed with IP and to facilitate research. We have built a cohort comprising samples of 381 clinically confirmed patients with IP and 633 healthy individuals recruited through IP patients' associations. The collection includes 269 trios, 83 duos, and 95 families with at least two affected members and represents an extensive dataset (200 cooperative medical institutes, 139 in Italy and 61 worldwide) that enables a comprehensive phenotyping. Joining the IPGB guarantees all participants access to the results including the genetic testing of IP and the long-term storage of the samples. The IPGB is the largest IP sample collection and one of the largest rare-disease-oriented collections in the world and will be open to requests for access to data by the national and international scientific community.",2019-06-23 +28490127,Milk bioactive peptide database: A comprehensive database of milk protein-derived bioactive peptides and novel visualization.,"During processing and digestion, milk proteins are disassembled into peptides with an array of biological functions, including antimicrobial, angiotensin-converting enzyme inhibition, antioxidant, opioid, and immunomodulation. These functions are summarized in numerous reviews, yet information on which peptides have which functions remains scattered across hundreds of research articles. We systematically searched the literature for all instances of bioactive peptides derived from milk proteins from any mammalian source. The data were compiled into a comprehensive database, which can be used to search for specific functions, peptides, or proteins (http://mbpdb.nws.oregonstate.edu). To review this large dataset, the bioactive peptides reported in the literature were visually mapped on the parent protein sequences, providing information on sites with highest abundance of bioactive peptides.",2017-04-12 +28155670,ESAP plus: a web-based server for EST-SSR marker development.,"

Background

Simple sequence repeats (SSRs) have become widely used as molecular markers in plant genetic studies due to their abundance, high allelic variation at each locus and simplicity to analyze using conventional PCR amplification. To study plants with unknown genome sequence, SSR markers from Expressed Sequence Tags (ESTs), which can be obtained from the plant mRNA (converted to cDNA), must be utilized. With the advent of high-throughput sequencing technology, huge EST sequence data have been generated and are now accessible from many public databases. However, SSR marker identification from a large in-house or public EST collection requires a computational pipeline that makes use of several standard bioinformatic tools to design high quality EST-SSR primers. Some of these computational tools are not users friendly and must be tightly integrated with reference genomic databases.

Results

A web-based bioinformatic pipeline, called EST Analysis Pipeline Plus (ESAP Plus), was constructed for assisting researchers to develop SSR markers from a large EST collection. ESAP Plus incorporates several bioinformatic scripts and some useful standard software tools necessary for the four main procedures of EST-SSR marker development, namely 1) pre-processing, 2) clustering and assembly, 3) SSR mining and 4) SSR primer design. The proposed pipeline also provides two alternative steps for reducing EST redundancy and identifying SSR loci. Using public sugarcane ESTs, ESAP Plus automatically executed the aforementioned computational pipeline via a simple web user interface, which was implemented using standard PHP, HTML, CSS and Java scripts. With ESAP Plus, users can upload raw EST data and choose various filtering options and parameters to analyze each of the four main procedures through this web interface. All input EST data and their predicted SSR results will be stored in the ESAP Plus MySQL database. Users will be notified via e-mail when the automatic process is completed and they can download all the results through the web interface.

Conclusions

ESAP Plus is a comprehensive and convenient web-based bioinformatic tool for SSR marker development. ESAP Plus offers all necessary EST-SSR development processes with various adjustable options that users can easily use to identify SSR markers from a large EST collection. With familiar web interface, users can upload the raw EST using the data submission page and visualize/download the corresponding EST-SSR information from within ESAP Plus. ESAP Plus can handle considerably large EST datasets. This EST-SSR discovery tool can be accessed directly from: http://gbp.kku.ac.th/esap_plus/ .",2016-12-22 +30645593,Adding function to the genome of African Salmonella Typhimurium ST313 strain D23580.,"Salmonella Typhimurium sequence type (ST) 313 causes invasive nontyphoidal Salmonella (iNTS) disease in sub-Saharan Africa, targeting susceptible HIV+, malarial, or malnourished individuals. An in-depth genomic comparison between the ST313 isolate D23580 and the well-characterized ST19 isolate 4/74 that causes gastroenteritis across the globe revealed extensive synteny. To understand how the 856 nucleotide variations generated phenotypic differences, we devised a large-scale experimental approach that involved the global gene expression analysis of strains D23580 and 4/74 grown in 16 infection-relevant growth conditions. Comparison of transcriptional patterns identified virulence and metabolic genes that were differentially expressed between D23580 versus 4/74, many of which were validated by proteomics. We also uncovered the S. Typhimurium D23580 and 4/74 genes that showed expression differences during infection of murine macrophages. Our comparative transcriptomic data are presented in a new enhanced version of the Salmonella expression compendium, SalComD23580: http://bioinf.gen.tcd.ie/cgi-bin/salcom_v2.pl. We discovered that the ablation of melibiose utilization was caused by three independent SNP mutations in D23580 that are shared across ST313 lineage 2, suggesting that the ability to catabolize this carbon source has been negatively selected during ST313 evolution. The data revealed a novel, to our knowledge, plasmid maintenance system involving a plasmid-encoded CysS cysteinyl-tRNA synthetase, highlighting the power of large-scale comparative multicondition analyses to pinpoint key phenotypic differences between bacterial pathovariants.",2019-01-15 +30452543,Bioinformatics methodologies for coeliac disease and its comorbidities. ,"Coeliac disease (CD) is a complex, multifactorial pathology caused by different factors, such as nutrition, immunological response and genetic factors. Many autoimmune diseases are comorbidities for CD, and a comprehensive and integrated analysis with bioinformatics approaches can help in evaluating the interconnections among all the selected pathologies. We first performed a detailed survey of gene expression data available in public repositories on CD and less commonly considered comorbidities. Then we developed an innovative pipeline that integrates gene expression, cell-type data and online resources (e.g. a list of comorbidities from the literature), using bioinformatics methods such as gene set enrichment analysis and semantic similarity. Our pipeline is written in R language, available at the following link: http://bioinformatica.isa.cnr.it/COELIAC_DISEASE/SCRIPTS/. We found a list of common differential expressed genes, gene ontology terms and pathways among CD and comorbidities and the closeness among the selected pathologies by means of disease ontology terms. Physicians and other researchers, such as molecular biologists, systems biologists and pharmacologists can use it to analyze pathology in detail, from differential expressed genes to ontologies, performing a comparison with the pathology comorbidities or with other diseases.",2018-11-16 +25768839,"A Commentary on: ""A History of the United States Department of Energy (DOE) Low Dose Radiation Research Program: 1998-2008"".","This commentary provides a very brief overview of the book ""A History of the United States Department of Energy (DOE) Low Dose Radiation Research Program: 1998-2008"" ( http://lowdose.energy.gov ). The book summarizes and evaluates the research progress, publications and impact of the U.S. Department of Energy Low Dose Radiation Research Program over its first 10 years. The purpose of this book was to summarize the impact of the program's research on the current thinking and low-dose paradigms associated with the radiation biology field and to help stimulate research on the potential adverse and/or protective health effects of low doses of ionizing radiation. In addition, this book provides a summary of the data generated in the low dose program and a scientific background for anyone interested in conducting future research on the effects of low-dose or low-dose-rate radiation exposure. This book's exhaustive list of publications coupled with discussions of major observations should provide a significant resource for future research in the low-dose and dose-rate region. However, because of space limitations, only a limited number of critical references are mentioned. Finally, this history book provides a list of major advancements that were accomplished by the program in the field of radiation biology, and these bulleted highlights can be found in last part of chapters 4-10.",2015-03-13 +31652105,Prenatal Exposure to Phthalates and Neurodevelopment in the CHAMACOS Cohort.,"BACKGROUND:Previous studies suggest that prenatal exposure to phthalates, ubiquitous synthetic chemicals, may adversely affect neurodevelopment. However, data are limited on how phthalates affect cognition, executive function, and behavioral function into adolescence. OBJECTIVE:We aimed to investigate associations of prenatal phthalate exposure with neurodevelopment in childhood and adolescence in the Center for the Health Assessment of Mothers and Children of Salinas (CHAMACOS) study. METHODS:We examined associations between maternal urinary phthalate metabolite concentrations measured twice during pregnancy and a range of neurodevelopmental outcomes from ages 7 through 16 y in the CHAMACOS birth cohort (n=334). We used age-specific linear regression models and generalized estimating equation models to assess longitudinal effects and examined differences by sex. RESULTS:Phthalate metabolites were detected in 88%-100% of samples, depending on the metabolite. Associations of phthalates with neurodevelopmental outcomes were largely null with some noteworthy patterns. Higher prenatal concentrations of metabolites of low-molecular weight phthalates (ΣLMW) were associated with more self-reported hyperactivity [β=0.8, 95% confidence interval (CI): 0.1, 1.4 per 2-fold increase in ΣLMW phthalates], attention problems (β=1.5, 95% CI: 0.7, 2.2), and anxiety (β=0.9, 95% CI: 0.0, 1.8) at age 16. We observed sex-specific differences for the sums of high-molecular-weight and di(2-ethylhexyl) metabolites and cognitive outcomes (e.g., β for Full-Scale IQ for boys=-1.9, 95% CI: -4.1, 0.3 and -1.7, 95% CI: -3.8, 0.3, respectively; β for girls=1.8, 95% CI: 0.1, 3.4 and 1.6, 95% CI: 0.0, 3.2, respectively; p-int=0.01 for both). CONCLUSION:We found predominantly null associations of prenatal phthalates with neurodevelopment in CHAMACOS, and weak associations of ΣLMW phthalates with internalizing and externalizing behaviors in adolescence. No previous studies have examined associations of prenatal phthalate exposure with neurodevelopment into adolescence, an important time for manifestations of effects. https://doi.org/10.1289/EHP5165.",2019-10-25 +24680097,Statistical parsing of varieties of clinical Finnish.,"

Objectives

In this paper, we study the development and domain-adaptation of statistical syntactic parsers for three different clinical domains in Finnish.

Methods and materials

The materials include text from daily nursing notes written by nurses in an intensive care unit, physicians' notes from cardiology patients' health records, and daily nursing notes from cardiology patients' health records. The parsing is performed with the statistical parser of Bohnet (http://code.google.com/p/mate-tools/, accessed: 22 November 2013).

Results

A parser trained only on general language performs poorly in all clinical subdomains, the labelled attachment score (LAS) ranging from 59.4% to 71.4%, whereas domain data combined with general language gives better results, the LAS varying between 67.2% and 81.7%. However, even a small amount of clinical domain data quickly outperforms this and also clinical data from other domains is more beneficial (LAS 71.3-80.0%) than general language only. The best results (LAS 77.4-84.6%) are achieved by using as training data the combination of all the clinical treebanks.

Conclusions

In order to develop a good syntactic parser for clinical language variants, a general language resource is not mandatory, while data from clinical fields is. However, in addition to the exact same clinical domain, also data from other clinical domains is useful.",2014-03-05 +27241320,Analysis of the dynamic co-expression network of heart regeneration in the zebrafish.,"The zebrafish has the capacity to regenerate its heart after severe injury. While the function of a few genes during this process has been studied, we are far from fully understanding how genes interact to coordinate heart regeneration. To enable systematic insights into this phenomenon, we generated and integrated a dynamic co-expression network of heart regeneration in the zebrafish and linked systems-level properties to the underlying molecular events. Across multiple post-injury time points, the network displays topological attributes of biological relevance. We show that regeneration steps are mediated by modules of transcriptionally coordinated genes, and by genes acting as network hubs. We also established direct associations between hubs and validated drivers of heart regeneration with murine and human orthologs. The resulting models and interactive analysis tools are available at http://infused.vital-it.ch. Using a worked example, we demonstrate the usefulness of this unique open resource for hypothesis generation and in silico screening for genes involved in heart regeneration.",2016-05-31 +29047407,A web-based collection of genotype-phenotype associations in hereditary recurrent fevers from the Eurofever registry.,"

Background

Hereditary recurrent fevers (HRF) are a group of rare monogenic diseases leading to recurrent inflammatory flares. A large number of variants has been described for the four genes associated with the best known HRF, namely MEFV, NLRP3, MVK, TNFRSF1A. The Infevers database ( http://fmf.igh.cnrs.fr/ISSAID/infevers ) is a large international registry collecting variants reported in these genes. However, no genotype-phenotype associations are provided, but only the clinical phenotype of the first patient(s) described for each mutation. The aim of this study is to develop a registry of genotype-phenotype associations observed in patients with HRF, enrolled and validated in the Eurofever registry.

Results

Genotype-phenotype associations observed in all the patients with HRF enrolled in the Eurofever registry were retrospectively analyzed. For autosomal dominant diseases (CAPS and TRAPS), all mutations were individually analyzed. For autosomal recessive diseases (FMF and MKD), homozygous and heterozygous combinations were described. Mean age of onset, disease course (recurrent or chronic), mean duration of fever episodes, clinical manifestations associated with fever episodes, atypical manifestations, complications and response to treatment were also studied. Data observed in 751 patients (346 FMF, 133 CAPS, 114 MKD, 158 TRAPS) included in the Eurofever registry and validated by experts were summarized in Tables. A total of 149 variants were described: 46 TNFRSF1A and 27 NLRP3 variants, as well as various combinations of 48 MVK and 28 MEFV variants were available.

Conclusions

We provide a potentially useful tool for physicians dealing with HRF, namely a registry of genotype-phenotype associations for patients enrolled in the Eurofever registry. This tool is complementary to the Infevers database and will be available at the Eurofever and Infevers websites.",2017-10-18 +30343417,Artificial Intelligence and the Future of the Drug Safety Professional.,"The healthcare industry, and specifically the pharmacovigilance industry, recognizes the need to support the increasing amount of data received from individual case safety reports (ICSRs). To cope with this increase, more healthcare and qualified professionals are required to capture and evaluate the data. To address the evolving landscape, it will be necessary to embrace assistive technologies such as artificial intelligence (AI) at scale. AI in the field of pharmacovigilance will possibly result in the transformation of the drug safety (DS) professional's daily work life and their career development. Celgene's Global Drug Safety and Risk Management (GDSRM) function has established a series of work activities to drive innovation across the pharmacovigilance value chain (Celgene Chrysalis Fact Sheet. https://www.celgene.com/newsroom/media-library/chrysalis-fact-sheet/, 2018). The development of AI in pharmacovigilance raises questions about the possible changes in DS professionals' lives, who may find themselves curious about their future roles in a workplace assisted by AI. We discuss the current state of pharmacovigilance and the DS professional, AI in pharmacovigilance and the potential skillsets a DS professional may require when working with AI. We also describe the results of research conducted at Celgene GDSRM. The objective of the research was to understand the thoughts of pharmacovigilance professionals about their jobs. These results are provided in the form of aggregated responses to interview questions based on a 12-part questionnaire [see the Electronic Supplementary Material (ESM)]. A sample of six DS professionals representing various areas of pharmacovigilance operations were asked a range of questions about their backgrounds, current roles and future expectations. The DS professionals interviewed were, overall, enthusiastic about their job roles potentially changing with AI enhancements. Interviewees suggested that AI would allow for pharmacovigilance resources, time, and skills to shift the work from a volume-based to a value-based focus. The results suggest that pharmacovigilance professionals wish to use their qualifications, skillsets and experience in work that provides more value for their efforts. Machine learning algorithms have the potential to enhance DS professionals' decision-making processes and support more efficient and accurate case processing.",2019-04-01 +23193260,Clone DB: an integrated NCBI resource for clone-associated data.,"The National Center for Biotechnology Information (NCBI) Clone DB (http://www.ncbi.nlm.nih.gov/clone/) is an integrated resource providing information about and facilitating access to clones, which serve as valuable research reagents in many fields, including genome sequencing and variation analysis. Clone DB represents an expansion and replacement of the former NCBI Clone Registry and has records for genomic and cell-based libraries and clones representing more than 100 different eukaryotic taxa. Records provide details of library construction, associated sequences, map positions and information about resource distribution. Clone DB is indexed in the NCBI Entrez system and can be queried by fields that include organism, clone name, gene name and sequence identifier. Whenever possible, genomic clones are mapped to reference assemblies and their map positions provided in clone records. Clones mapping to specific genomic regions can also be searched for using the NCBI Clone Finder tool, which accepts queries based on sequence coordinates or features such as gene or transcript names. Clone DB makes reports of library, clone and placement data on its FTP site available for download. With Clone DB, users now have available to them a centralized resource that provides them with the tools they will need to make use of these important research reagents.",2012-11-27 +31542453,An Individualized Prediction Model for Long-term Lung Function Trajectory and Risk of COPD in the General Population.,"

Background

Prediction of future lung function will enable the identification of individuals at high risk of developing COPD, but the trajectory of lung function decline varies greatly among individuals. This study involved the development and validation of an individualized prediction model of lung function trajectory and risk of airflow limitation in the general population.

Methods

Data were obtained from the Framingham Offspring Cohort, which included 4,167 participants ≥ 20 years of age and who had ≥ 2 valid spirometry assessments. The primary outcome was prebronchodilator FEV1; the secondary outcome was the risk of airflow limitation (defined as FEV1/FVC less than the lower limit of normal). Mixed effects regression models were developed for individualized prediction, and a machine learning algorithm was used to determine essential predictors. The model was validated in two large, independent multicenter cohorts (N = 2,075 and 12,913, respectively).

Results

With 20 common predictors, the model explained 79% of the variation in FEV1 decline in the derivation cohort. In two validation datasets, the model had low error in predicting FEV1 decline (root mean square error range, 0.18-0.22 L) and high discriminative power in predicting risk of airflow limitation (C-statistic range, 0.86-0.87). This model was implemented in a freely accessible website-based application, which allows prediction based on flexible sets of predictors (http://resp.core.ubc.ca/ipress/FraminghamFEV1).

Conclusions

The individualized predictor is an accurate tool to predict long-term lung function trajectories and risk of airflow limitation in the general population. This model enables identifying individuals at higher risk of COPD, who can then be targeted for preventive therapies.",2019-09-19 +29483591,"GourdBase: a genome-centered multi-omics database for the bottle gourd (Lagenaria siceraria), an economically important cucurbit crop.","GourdBase is an integrative data platform for the bottle gourd to examine its multifarious intuitive morphology and annotated genome. GourdBase consists of six main modules that store and interlink multi-omic data: the genome (with transcriptomic data integrated) module, the phenome module, the markers/QTLs module, the maps (genetic, physical and comparative) module, the cultivars module, and the publications module. These modules provide access to various type of data including the annotated reference genome sequence, gene models, transcriptomic data from various tissues, physical and comparative genome maps, molecular markers in different types, phenotypic data for featuring traits including fruit shape and umami taste, and quantitative trait loci (QTLs) that underlie these traits. GourdBase is intuitive, user-friendly and interlinked and is designed to allow researchers, breeders and trained farmers to browse, search and fetch information on interests and assist in genomics-driven studies and breeding. The knowledge base and web interface can be accessed at http://www.gourdbase.cn/ .",2018-02-26 +24886360,WebChem Viewer: a tool for the easy dissemination of chemical and structural data sets.,"

Background

Sharing sets of chemical data (e.g., chemical properties, docking scores, etc.) among collaborators with diverse skill sets is a common task in computer-aided drug design and medicinal chemistry. The ability to associate this data with images of the relevant molecular structures greatly facilitates scientific communication. There is a need for a simple, free, open-source program that can automatically export aggregated reports of entire chemical data sets to files viewable on any computer, regardless of the operating system and without requiring the installation of additional software.

Results

We here present a program called WebChem Viewer that automatically generates these types of highly portable reports. Furthermore, in designing WebChem Viewer we have also created a useful online web application for remotely generating molecular structures from SMILES strings. We encourage the direct use of this online application as well as its incorporation into other software packages.

Conclusions

With these features, WebChem Viewer enables interdisciplinary collaborations that require the sharing and visualization of small molecule structures and associated sets of heterogeneous chemical data. The program is released under the FreeBSD license and can be downloaded from http://nbcr.ucsd.edu/WebChemViewer. The associated web application (called ""Smiley2png 1.0"") can be accessed through freely available web services provided by the National Biomedical Computation Resource at http://nbcr.ucsd.edu.",2014-05-23 +30730193,"""Parameter uncertainty in structural equation models: Confidence sets and fungible estimates"": Correction to Pek and Wu (2018).","Reports an error in ""Parameter uncertainty in structural equation models: Confidence sets and fungible estimates"" by Jolynn Pek and Hao Wu (Psychological Methods, 2018[Dec], Vol 23[4], 635-653). In the article ""Parameter Uncertainty in Structural Equation Models: Confidence Sets and Fungible Estimates,"" by Jolynn Pek and Hao Wu (Psychological Methods, 2018, Vol. 23, No. 4, pp. 635-653. http://dx.doi.org/10.1037/met0000163), the copyright attribution was incorrect. The copyright should not have been ""In the public domain."" The online version of this article has been corrected. (The following abstract of the original article appeared in record 2018-00186-001.) Current concerns regarding the dependability of psychological findings call for methodological developments to provide additional evidence in support of scientific conclusions. This article highlights the value and importance of two distinct kinds of parameter uncertainty, which are quantified by confidence sets (CSs) and fungible parameter estimates (FPEs; Lee, MacCallum, & Browne, 2017); both provide essential information regarding the defensibility of scientific findings. Using the structural equation model, we introduce a general perturbation framework based on the likelihood function that unifies CSs and FPEs and sheds new light on the conceptual distinctions between them. A targeted illustration is then presented to demonstrate the factors which differentially influence CSs and FPEs, further highlighting their theoretical differences. With 3 empirical examples on initiating a conversation with a stranger (Bagozzi & Warshaw, 1988), posttraumatic growth of caregivers in the context of pediatric palliative care (Cadell et al., 2014), and the direct and indirect effects of spirituality on thriving among youth (Dowling, Gestsdottir, Anderson, von Eye, & Lerner, 2004), we illustrate how CSs and FPEs provide unique information which lead to better informed scientific conclusions. Finally, we discuss the importance of considering information afforded by CSs and FPEs in strengthening the basis of interpreting statistical results in substantive research, conclude with future research directions, and provide example OpenMx code for the computation of CSs and FPEs. (PsycINFO Database Record (c) 2019 APA, all rights reserved).",2019-02-01 +30672158,Implementation of Korean Clinical Imaging Guidelines: A Mobile App-Based Decision Support System.,"

Objective

The aims of this study were to develop a mobile app-based clinical decision support system (CDSS) for implementation of Korean clinical imaging guidelines (K-CIGs) and to assess future developments therein.

Materials and methods

K-CIGs were implemented in the form of a web-based application (http://cdss.or.kr/). The app containing K-CIGs consists of 53 information databases, including 10 medical subspecialties and 119 guidelines, developed by the Korean Society of Radiology (KSR) between 2015 and 2017. An email survey consisting of 18 questions on the implementation of K-CIGs and the mobile app-based CDSS was distributed to 43 members of the guideline working group (expert members of the KSR and Korean Academy of Oral and Maxillofacial Radiology) and 23 members of the consultant group (clinical experts belonging to related medical societies) to gauge opinion on the future developmental direction of K-CIGs.

Results

The web-based mobile app can be downloaded from the Google Play Store. Detailed information on the grade of recommendation, evidence level, and radiation dose for each imaging modality in the K-CIGs can be accessed via the home page and side menus. In total, 32 of the 66 experts contacted completed the survey (response rate, 45%). Twenty-four of the 32 respondents were from the working group and eight were from the consulting group. Most (93.8%) of the respondents agreed on the need for ongoing development and implementation of K-CIGs.

Conclusion

This study describes the mobile app-based CDSS designed for implementation of K-CIGs in Korea. The results will allow physicians to have easy access to the K-CIGs and encourage appropriate use of imaging modalities.",2019-02-01 +30717315,"Potential Applications of DNA, RNA and Protein Biomarkers in Diagnosis, Therapy and Prognosis for Colorectal Cancer: A Study from Databases to AI-Assisted Verification. ","In order to find out the most valuable biomarkers and pathways for diagnosis, therapy and prognosis in colorectal cancer (CRC) we have collected the published CRC biomarkers and established a CRC biomarker database (CBD: http://sysbio.suda.edu.cn/CBD/index.html). In this study, we analysed the single and multiple DNA, RNA and protein biomarkers as well as their positions in cancer related pathways and protein-protein interaction (PPI) networks to describe their potential applications in diagnosis, therapy and prognosis. CRC biomarkers were collected from the CBD. The RNA and protein biomarkers were matched to their corresponding DNAs by the miRDB database and the PubMed Gene database, respectively. The PPI networks were used to investigate the relationships between protein biomarkers and further detect the multiple biomarkers. The Kyoto Encyclopaedia of Genes and Genomes (KEGG) pathway enrichment analysis and Gene Ontology (GO) annotation were used to analyse biological functions of the biomarkers. AI classification techniques were utilized to further verify the significances of the multiple biomarkers in diagnosis and prognosis for CRC. We showed that a large number of the DNA, RNA and protein biomarkers were associated with the diagnosis, therapy and prognosis in various degrees in the CRC biomarker networks. The CRC biomarkers were closely related to the CRC initiation and progression. Moreover, the biomarkers played critical roles in cellular proliferation, apoptosis and angiogenesis and they were involved in Ras, p53 and PI3K pathways. There were overlaps among the DNA, RNA and protein biomarkers. AI classification verifications showed that the combined multiple protein biomarkers played important roles to accurate early diagnosis and predict outcome for CRC. There were several single and multiple CRC protein biomarkers which were associated with diagnosis, therapy and prognosis in CRC. Further, AI-assisted analysis revealed that multiple biomarkers had potential applications for diagnosis and prognosis in CRC.",2019-02-01 +30423079,Liquid-chromatography retention order prediction for metabolite identification.,"

Motivation

Liquid Chromatography (LC) followed by tandem Mass Spectrometry (MS/MS) is one of the predominant methods for metabolite identification. In recent years, machine learning has started to transform the analysis of tandem mass spectra and the identification of small molecules. In contrast, LC data is rarely used to improve metabolite identification, despite numerous published methods for retention time prediction using machine learning.

Results

We present a machine learning method for predicting the retention order of molecules; that is, the order in which molecules elute from the LC column. Our method has important advantages over previous approaches: We show that retention order is much better conserved between instruments than retention time. To this end, our method can be trained using retention time measurements from different LC systems and configurations without tedious pre-processing, significantly increasing the amount of available training data. Our experiments demonstrate that retention order prediction is an effective way to learn retention behaviour of molecules from heterogeneous retention time data. Finally, we demonstrate how retention order prediction and MS/MS-based scores can be combined for more accurate metabolite identifications when analyzing a complete LC-MS/MS run.

Availability and implementation

Implementation of the method is available at https://version.aalto.fi/gitlab/bache1/retention_order_prediction.git.",2018-09-01 +28111161,CORAL: Binary classifications (active/inactive) for drug-induced liver injury.,"

Introduction

The data on human hepatotoxcity (drug-induced liver injury) is extremely important information from point of view of drug discovery. Experimental clinical data on this endpoint is scarce. Experimental way to extend databases on this endpoint is extremely difficult. Quantitative structure - activity relationships (QSAR) is attractive alternative of the experimental approach.

Methods

Predictive models for human hepatotoxicity (drug-induced liver injury) have been built up by the Monte Carlo method with using of the CORAL software (http://www.insilico.eu/coral). These models are the binary classifications into active class and inactive class. These models are calculated with so-called ""semi correlations"" described in this work. The Mattews correlation coefficient of these models for external validation sets ranged from 0.52 to 0.62.

Results discussion

The approach has been checked up with a group of random splits into the training and validation sets. These stochastic experiments have shown the stability of results: predictability of the models for various splits. Thus, the attempt to build up the classification QSAR model by means of the Monte Carlo technique, based on representation of the molecular structure via simplified molecular input line entry systems (SMILES) and hydrogen suppressed graph (HSG) using the CORAL software (http://www.insilico.eu/coral) has shown ability of this approach to provide quite good prediction of the examined endpoint (drug-induced liver injury).",2017-01-19 +23221299,Targeted journal curation as a method to improve data currency at the Comparative Toxicogenomics Database.,"The Comparative Toxicogenomics Database (CTD) is a public resource that promotes understanding about the effects of environmental chemicals on human health. CTD biocurators read the scientific literature and manually curate a triad of chemical-gene, chemical-disease and gene-disease interactions. Typically, articles for CTD are selected using a chemical-centric approach by querying PubMed to retrieve a corpus containing the chemical of interest. Although this technique ensures adequate coverage of knowledge about the chemical (i.e. data completeness), it does not necessarily reflect the most current state of all toxicological research in the community at large (i.e. data currency). Keeping databases current with the most recent scientific results, as well as providing a rich historical background from legacy articles, is a challenging process. To address this issue of data currency, CTD designed and tested a journal-centric approach of curation to complement our chemical-centric method. We first identified priority journals based on defined criteria. Next, over 7 weeks, three biocurators reviewed 2425 articles from three consecutive years (2009-2011) of three targeted journals. From this corpus, 1252 articles contained relevant data for CTD and 52 752 interactions were manually curated. Here, we describe our journal selection process, two methods of document delivery for the biocurators and the analysis of the resulting curation metrics, including data currency, and both intra-journal and inter-journal comparisons of research topics. Based on our results, we expect that curation by select journals can (i) be easily incorporated into the curation pipeline to complement our chemical-centric approach; (ii) build content more evenly for chemicals, genes and diseases in CTD (rather than biasing data by chemicals-of-interest); (iii) reflect developing areas in environmental health and (iv) improve overall data currency for chemicals, genes and diseases. Database URL: http://ctdbase.org/",2012-12-06 +30032213,StackDPPred: a stacking based prediction of DNA-binding protein from sequence.,"

Motivation

Identification of DNA-binding proteins from only sequence information is one of the most challenging problems in the field of genome annotation. DNA-binding proteins play an important role in various biological processes such as DNA replication, repair, transcription and splicing. Existing experimental techniques for identifying DNA-binding proteins are time-consuming and expensive. Thus, prediction of DNA-binding proteins from sequences alone using computational methods can be useful to quickly annotate and guide the experimental process. Most of the methods developed for predicting DNA-binding proteins use the information from the evolutionary profile, called the position-specific scoring matrix (PSSM) profile, alone and the accuracies of such methods have been limited. Here, we propose a method, called StackDPPred, which utilizes features extracted from PSSM and residue specific contact-energy to help train a stacking based machine learning method for the effective prediction of DNA-binding proteins.

Results

Based on benchmark sequences of 1063 (518 DNA-binding and 545 non DNA-binding) proteins and using jackknife validation, StackDPPred achieved an ACC of 89.96%, MCC of 0.799 and AUC of 94.50%. This outcome outperforms several state-of-the-art approaches. Furthermore, when tested on recently designed two independent test datasets, StackDPPred outperforms existing approaches consistently. The proposed StackDPPred can be used for effective prediction of DNA-binding proteins from sequence alone.

Availability and implementation

Online server is at http://bmll.cs.uno.edu/add and code-data is at http://cs.uno.edu/∼tamjid/Software/StackDPPred/code_data.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +29186324,Robust inference of population structure from next-generation sequencing data with systematic differences in sequencing.,"Motivation:Inferring population structure is important for both population genetics and genetic epidemiology. Principal components analysis (PCA) has been effective in ascertaining population structure with array genotype data but can be difficult to use with sequencing data, especially when low depth leads to uncertainty in called genotypes. Because PCA is sensitive to differences in variability, PCA using sequencing data can result in components that correspond to differences in sequencing quality (read depth and error rate), rather than differences in population structure. We demonstrate that even existing methods for PCA specifically designed for sequencing data can still yield biased conclusions when used with data having sequencing properties that are systematically different across different groups of samples (i.e. sequencing groups). This situation can arise in population genetics when combining sequencing data from different studies, or in genetic epidemiology when using historical controls such as samples from the 1000 Genomes Project. Results:To allow inference on population structure using PCA in these situations, we provide an approach that is based on using sequencing reads directly without calling genotypes. Our approach is to adjust the data from different sequencing groups to have the same read depth and error rate so that PCA does not generate spurious components representing sequencing quality. To accomplish this, we have developed a subsampling procedure to match the depth distributions in different sequencing groups, and a read-flipping procedure to match the error rates. We average over subsamples and read flips to minimize loss of information. We demonstrate the utility of our approach using two datasets from 1000 Genomes, and further evaluate it using simulation studies. Availability and implementation:TASER-PC software is publicly available at http://web1.sph.emory.edu/users/yhu30/software.html. Contact:yijuan.hu@emory.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +31638837,Examining the Shape of the Association between Low Levels of Fine Particulate Matter and Mortality across Three Cycles of the Canadian Census Health and Environment Cohort.,"BACKGROUND:Ambient fine particulate air pollution with aerodynamic diameter ≤2.5 μm (PM2.5) is an important contributor to the global burden of disease. Information on the shape of the concentration-response relationship at low concentrations is critical for estimating this burden, setting air quality standards, and in benefits assessments. OBJECTIVES:We examined the concentration-response relationship between PM2.5 and nonaccidental mortality in three Canadian Census Health and Environment Cohorts (CanCHECs) based on the 1991, 1996, and 2001 census cycles linked to mobility and mortality data. METHODS:Census respondents were linked with death records through 2016, resulting in 8.5 million adults, 150 million years of follow-up, and 1.5 million deaths. Using annual mailing address, we assigned time-varying contextual variables and 3-y moving-average ambient PM2.5 at a 1×1 km spatial resolution from 1988 to 2015. We ran Cox proportional hazards models for PM2.5 adjusted for eight subject-level indicators of socioeconomic status, seven contextual covariates, ozone, nitrogen dioxide, and combined oxidative potential. We used three statistical methods to examine the shape of the concentration-response relationship between PM2.5 and nonaccidental mortality. RESULTS:The mean 3-y annual average estimate of PM2.5 exposure ranged from 6.7 to 8.0 μg/m3 over the three cohorts. We estimated a hazard ratio (HR) of 1.053 [95% confidence interval (CI): 1.041, 1.065] per 10-μg/m3 change in PM2.5 after pooling the three cohort-specific hazard ratios, with some variation between cohorts (1.041 for the 1991 and 1996 cohorts and 1.084 for the 2001 cohort). We observed a supralinear association in all three cohorts. The lower bound of the 95% CIs exceeded unity for all concentrations in the 1991 cohort, for concentrations above 2 μg/m3 in the 1996 cohort, and above 5 μg/m3 in the 2001 cohort. DISCUSSION:In a very large population-based cohort with up to 25 y of follow-up, PM2.5 was associated with nonaccidental mortality at concentrations as low as 5 μg/m3. https://doi.org/10.1289/EHP5204.",2019-10-22 +23272654,PROGmiR: a tool for identifying prognostic miRNA biomarkers in multiple cancers using publicly available data.,"

Unlabelled

Background

Identification of prognostic biomarkers is hallmark of cancer genomics. Since miRNAs regulate expression of multiple genes, they act as potent biomarkers in several cancers. Identification of miRNAs that are prognostically important has been done sporadically, but no resource is available till date that allows users to study prognostics of miRNAs of interest, utilizing the wealth of available data, in major cancer types.

Description

In this paper, we present a web based tool that allows users to study prognostic properties of miRNAs in several cancer types, using publicly available data. We have compiled data from Gene Expression Omnibus (GEO), and recently developed ""The Cancer Genome Atlas (TCGA)"", to create this tool. The tool is called ""PROGmiR"" and it is available at http://www.compbio.iupui.edu/progmir. Currently, our tool can be used to study overall survival implications for approximately 1050 human miRNAs in 16 major cancer types.

Conclusions

We believe this resource, as a hypothesis generation tool, will be helpful for researchers to link miRNA expression with cancer outcome and to design mechanistic studies. We studied performance of our tool using identified miRNA biomarkers from published studies. The prognostic plots created using our tool for specific miRNAs in specific cancer types corroborated with the findings in the studies.",2012-12-28 +22707908,A dataset from bottom trawl survey around Taiwan.,"Bottom trawl fishery is one of the most important coastal fisheries in Taiwan both in production and economic values. However, its annual production started to decline due to overfishing since the 1980s. Its bycatch problem also damages the fishery resource seriously. Thus, the government banned the bottom fishery within 3 nautical miles along the shoreline in 1989. To evaluate the effectiveness of this policy, a four year survey was conducted from 2000-2003, in the waters around Taiwan and Penghu (Pescadore) Islands, one region each year respectively. All fish specimens collected from trawling were brought back to lab for identification, individual number count and body weight measurement. These raw data have been integrated and established in Taiwan Fish Database (http://fishdb.sinica.edu.tw). They have also been published through TaiBIF (http://taibif.tw), FishBase and GBIF (website see below). This dataset contains 631 fish species and 3,529 records, making it the most complete demersal fish fauna and their temporal and spatial distributional data on the soft marine habitat in Taiwan.",2012-05-30 +30740494,Draft genome sequence data of Lactobacillus paracasei strain DTA83 isolated from infant stools.,"Here the draft genome sequence of Lactobacillus paracasei strain DTA83, isolated from stools of healthy infants in Rio de Janeiro (Brazil), is reported. The 2.8-Mb genome possesses 2825 protein-coding sequences distributed on 330 SEED subsystems. This strain belongs to a set of potentially probiotic Lactobacillus spp. strains used to study genetic factors related to antibiotic resistance after stress conditions, such as simulated gastrointestinal conditions. The complete genome data have been deposited in GenBank under the accession number QRBH00000000, https://www.ncbi.nlm.nih.gov/nuccore/QRBH00000000.",2019-01-19 +30101275,KEDDY: a knowledge-based statistical gene set test method to detect differential functional protein-protein interactions.,"

Motivation

Identifying differential patterns between conditions is a popular approach to understanding the discrepancy between different biological contexts. Although many statistical tests were proposed for identifying gene sets with differential patterns based on different definitions of differentiality, few methods were suggested to identify gene sets with differential functional protein networks due to computational complexity.

Results

We propose a method of Knowledge-based Evaluation of Dependency DifferentialitY (KEDDY), which is a statistical test for differential functional protein networks of a set of genes between two conditions with utilizing known functional protein-protein interaction information. Unlike other approaches focused on differential expressions of individual genes or differentiality of individual interactions, KEDDY compares two conditions by evaluating the probability distributions of functional protein networks based on known functional protein-protein interactions. The method has been evaluated and compared with previous methods through simulation studies, where KEDDY achieves significantly improved performance in accuracy and speed than the previous method that does not use prior knowledge and better performance in identifying gene sets with differential interactions than other methods evaluating changes in gene expressions. Applications to cancer data sets show that KEDDY identifies alternative cancer subtype-related differential gene sets compared to other differential expression-based methods, and the results also provide detailed gene regulatory information that drives the differentiality of the gene sets.

Availability and implementation

The Java implementation of KEDDY is freely available to non-commercial users at https://sites.google.com/site/sjunggsm/keddy.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +25324307,Platinum: a database of experimentally measured effects of mutations on structurally defined protein-ligand complexes.,"Drug resistance is a major challenge for the treatment of many diseases and a significant concern throughout the drug development process. The ability to understand and predict the effects of mutations on protein-ligand affinities and their roles in the emergence of resistance would significantly aid treatment and drug design strategies. In order to study and understand the impacts of missense mutations on the interaction of ligands with the proteome, we have developed Platinum (http://structure.bioc.cam.ac.uk/platinum). This manually curated, literature-derived database, comprising over 1000 mutations, associates for the first time experimental information on changes in affinity with three-dimensional structures of protein-ligand complexes. To minimize differences arising from experimental techniques and to directly compare binding affinities, Platinum considers only changes measured by the same group and with the same amino-acid sequence used for structure determination, providing a direct link between protein structure, how a ligand binds and how mutations alter the affinity of the ligand of the protein. We believe Platinum will be an invaluable resource for understanding the effects of mutations that give rise to drug resistance, a major problem emerging in pandemics including those caused by the influenza virus, in infectious diseases such as tuberculosis, in cancer and in many other life-threatening illnesses.",2014-10-16 +27899675,"JET2 Viewer: a database of predicted multiple, possibly overlapping, protein-protein interaction sites for PDB structures.","The database JET2 Viewer, openly accessible at http://www.jet2viewer.upmc.fr/, reports putative protein binding sites for all three-dimensional (3D) structures available in the Protein Data Bank (PDB). This knowledge base was generated by applying the computational method JET2 at large-scale on more than 20 000 chains. JET2 strategy yields very precise predictions of interacting surfaces and unravels their evolutionary process and complexity. JET2 Viewer provides an online intelligent display, including interactive 3D visualization of the binding sites mapped onto PDB structures and suitable files recording JET2 analyses. Predictions were evaluated on more than 15 000 experimentally characterized protein interfaces. This is, to our knowledge, the largest evaluation of a protein binding site prediction method. The overall performance of JET2 on all interfaces are: Sen = 52.52, PPV = 51.24, Spe = 80.05, Acc = 75.89. The data can be used to foster new strategies for protein-protein interactions modulation and interaction surface redesign.",2016-11-28 +24165880,EBI metagenomics--a new resource for the analysis and archiving of metagenomic data.,"Metagenomics is a relatively recently established but rapidly expanding field that uses high-throughput next-generation sequencing technologies to characterize the microbial communities inhabiting different ecosystems (including oceans, lakes, soil, tundra, plants and body sites). Metagenomics brings with it a number of challenges, including the management, analysis, storage and sharing of data. In response to these challenges, we have developed a new metagenomics resource (http://www.ebi.ac.uk/metagenomics/) that allows users to easily submit raw nucleotide reads for functional and taxonomic analysis by a state-of-the-art pipeline, and have them automatically stored (together with descriptive, standards-compliant metadata) in the European Nucleotide Archive.",2013-10-27 +,Land surface temperature retrieval over circumpolar Arctic using SSM/I–SSMIS and MODIS data,"Remote sensing instruments are key players to map land surface temperature (LST) at large temporal and spatial scales. In this paper, we present how we combine passive microwave and thermal infrared data to estimate LST during summer snow-free periods over northern high latitudes. The methodology is based on the SSM/I–SSMIS 37GHz measurements at both vertical and horizontal polarizations on a 25km×25km grid size. LST is retrieved from brightness temperatures introducing an empirical linear relationship between emissivities at both polarizations as described in Royer and Poirier (2010). This relationship is calibrated at pixel scale, using cloud-free independent LST data from MODIS instruments. The SSM/I–SSMIS and MODIS data are synchronized by fitting a diurnal cycle model built on skin temperature reanalysis provided by the European Centre for Medium-Range Weather Forecasts (ECMWF). The resulting temperature dataset is provided at 25km scale and at an hourly time step during the ten-year analysis period (2000–2011). This new product was locally evaluated at five experimental sites of the EU-PAGE21 project against air temperature measurements and meteorological model reanalysis, and compared to the MODIS LST product at both local and circumpolar scale. The results giving a mean RMSE of the order of 2.2K demonstrate the usefulness of the microwave product, which is unaffected by clouds as opposed to thermal infrared products and offers a better resolution compared to model reanalysis. The dataset can be downloaded from the PANGAEA website: http://doi.pangaea.de/10.1594/PANGAEA.833409.",2015-06-01 +24792048,A protocol for visual analysis of alternative splicing in RNA-Seq data using integrated genome browser.,"Ultrahigh-throughput sequencing of cDNA (RNA-Seq) is an invaluable resource for investigating alternative splicing in an organism. Alternative splicing is a form of posttranscriptional regulation in which primary RNA transcripts from a single gene can be spliced in multiple ways leading to different RNA and protein products. In plants and other species, it has been shown that many genes involved in circadian regulation are alternatively spliced. As new RNA-Seq data sets become available, these data will lead to new insights into links between regulation RNA splicing and the circadian system. Analyzing RNA-Seq data sets requires software tools that can display RNA-Seq read alignments alongside gene models, enabling assessment of how treatments or developmental stages affect splicing patterns and production of novel variants. The Integrated Genome Browser (IGB) software program is a free and flexible desktop tool that enables discovery and quantification of alternative splicing. In this protocol, we use IGB and a cold-stress RNA-Seq data set to examine alternative splicing of Arabidopsis thaliana LHY, a circadian clock regulator. IGB is freely available from http://www.bioviz.org .",2014-01-01 +30588712,Population-neuroscience study of the Tokyo TEEN Cohort (pn-TTC): Cohort longitudinal study to explore the neurobiological substrates of adolescent psychological and behavioral development.,"

Aim

Adolescence is a crucial stage of psychological development and is critically vulnerable to the onset of psychopathology. Our understanding of how the maturation of endocrine, epigenetics, and brain circuit may underlie psychological development in adolescence, however, has not been integrated. Here, we introduce our research project, the population-neuroscience study of the Tokyo TEEN Cohort (pn-TTC), a longitudinal study to explore the neurobiological substrates of development during adolescence.

Methods

Participants in the first wave of the pn-TTC (pn-TTC-1) study were recruited from those of the TTC study, a large-scale epidemiological survey in which 3171 parent-adolescent pairs were recruited from the general population. Participants underwent psychological, cognitive, sociological, and physical assessment. Moreover, adolescents and their parents underwent magnetic resonance imaging (MRI; structural MRI, resting-state functional MRI, and magnetic resonance spectroscopy), and adolescents provided saliva samples for hormone analysis and for DNA analysis including epigenetics. Furthermore, the second wave (pn-TTC-2) followed similar methods as in the first wave.

Results

A total of 301 parent-adolescent pairs participated in the pn-TTC-1 study. Moreover, 281 adolescents participated in the pn-TTC-2 study, 238 of whom were recruited from the pn-TTC-1 sample. The instruction for data request is available at: http://value.umin.jp/data-resource.html.

Conclusion

The pn-TTC project is a large-scale and population-neuroscience-based survey with a plan of longitudinal biennial follow up. Through this approach we seek to elucidate adolescent developmental mechanisms according to biopsychosocial models. This current biomarker research project, using minimally biased samples recruited from the general population, has the potential to expand the new research field of population neuroscience.",2019-02-19 +30200994,TAP: a targeted clinical genomics pipeline for detecting transcript variants using RNA-seq data.,"

Background

RNA-seq is a powerful and cost-effective technology for molecular diagnostics of cancer and other diseases, and it can reach its full potential when coupled with validated clinical-grade informatics tools. Despite recent advances in long-read sequencing, transcriptome assembly of short reads remains a useful and cost-effective methodology for unveiling transcript-level rearrangements and novel isoforms. One of the major concerns for adopting the proven de novo assembly approach for RNA-seq data in clinical settings has been the analysis turnaround time. To address this concern, we have developed a targeted approach to expedite assembly and analysis of RNA-seq data.

Results

Here we present our Targeted Assembly Pipeline (TAP), which consists of four stages: 1) alignment-free gene-level classification of RNA-seq reads using BioBloomTools, 2) de novo assembly of individual targets using Trans-ABySS, 3) alignment of assembled contigs to the reference genome and transcriptome with GMAP and BWA and 4) structural and splicing variant detection using PAVFinder. We show that PAVFinder is a robust gene fusion detection tool when compared to established methods such as Tophat-Fusion and deFuse on simulated data of 448 events. Using the Leucegene acute myeloid leukemia (AML) RNA-seq data and a set of 580 COSMIC target genes, TAP identified a wide range of hallmark molecular anomalies including gene fusions, tandem duplications, insertions and deletions in agreement with published literature results. Moreover, also in this dataset, TAP captured AML-specific splicing variants such as skipped exons and novel splice sites reported in studies elsewhere. Running time of TAP on 100-150 million read pairs and a 580-gene set is one to 2 hours on a 48-core machine.

Conclusions

We demonstrated that TAP is a fast and robust RNA-seq variant detection pipeline that is potentially amenable to clinical applications. TAP is available at http://www.bcgsc.ca/platform/bioinfo/software/pavfinder.",2018-09-10 +28701418,Laboratory Workflow Analysis of Culture of Periprosthetic Tissues in Blood Culture Bottles.,"Culture of periprosthetic tissue specimens in blood culture bottles is more sensitive than conventional techniques, but the impact on laboratory workflow has yet to be addressed. Herein, we examined the impact of culture of periprosthetic tissues in blood culture bottles on laboratory workflow and cost. The workflow was process mapped, decision tree models were constructed using probabilities of positive and negative cultures drawn from our published study (T. N. Peel, B. L. Dylla, J. G. Hughes, D. T. Lynch, K. E. Greenwood-Quaintance, A. C. Cheng, J. N. Mandrekar, and R. Patel, mBio 7:e01776-15, 2016, https://doi.org/10.1128/mBio.01776-15), and the processing times and resource costs from the laboratory staff time viewpoint were used to compare periprosthetic tissues culture processes using conventional techniques with culture in blood culture bottles. Sensitivity analysis was performed using various rates of positive cultures. Annualized labor savings were estimated based on salary costs from the U.S. Labor Bureau for Laboratory staff. The model demonstrated a 60.1% reduction in mean total staff time with the adoption of tissue inoculation into blood culture bottles compared to conventional techniques (mean ± standard deviation, 30.7 ± 27.6 versus 77.0 ± 35.3 h per month, respectively; P < 0.001). The estimated annualized labor cost savings of culture using blood culture bottles was $10,876.83 (±$337.16). Sensitivity analysis was performed using various rates of culture positivity (5 to 50%). Culture in blood culture bottles was cost-effective, based on the estimated labor cost savings of $2,132.71 for each percent increase in test accuracy. In conclusion, culture of periprosthetic tissue in blood culture bottles is not only more accurate than but is also cost-saving compared to conventional culture methods.",2017-07-12 +27511743,VHLdb: A database of von Hippel-Lindau protein interactors and mutations.,"Mutations in von Hippel-Lindau tumor suppressor protein (pVHL) predispose to develop tumors affecting specific target organs, such as the retina, epididymis, adrenal glands, pancreas and kidneys. Currently, more than 400 pVHL interacting proteins are either described in the literature or predicted in public databases. This data is scattered among several different sources, slowing down the comprehension of pVHL's biological role. Here we present VHLdb, a novel database collecting available interaction and mutation data on pVHL to provide novel integrated annotations. In VHLdb, pVHL interactors are organized according to two annotation levels, manual and automatic. Mutation data are easily accessible and a novel visualization tool has been implemented. A user-friendly feedback function to improve database content through community-driven curation is also provided. VHLdb presently contains 478 interactors, of which 117 have been manually curated, and 1,074 mutations. This makes it the largest available database for pVHL-related information. VHLdb is available from URL: http://vhldb.bio.unipd.it/.",2016-08-11 +31662803,"The Open-source Data Inventory for Anthropogenic Carbon dioxide (CO2), version 2016 (ODIAC2016): A global, monthly fossil-fuel CO2 gridded emission data product for tracer transport simulations and surface flux inversions.","The Open-source Data Inventory for Anthropogenic CO2 (ODIAC) is a global high-spatial resolution gridded emission data product that distributes carbon dioxide (CO2) emissions from fossil fuel combustion. The emission spatial distributions are estimated at a 1×1 km spatial resolution over land using power plant profiles (emission intensity and geographical location) and satellite-observed nighttime lights. This paper describes the year 2016 version of the ODIAC emission data product (ODIAC2016) and presents analyses that help guiding data users, especially for atmospheric CO2 tracer transport simulations and flux inversion analysis. Since the original publication in 2011, we have made modifications to our emission modeling framework in order to deliver a comprehensive global gridded emission data product. Major changes from the 2011 publication are 1) the use of emissions estimates made by the Carbon Dioxide Information Analysis Center (CDIAC) at the Oak Ridge National Laboratory (ORNL) by fuel type (solid, liquid, gas, cement manufacturing, gas flaring and international aviation and marine bunkers), 2) the use of multiple spatial emission proxies by fuel type such as nightlight data specific to gas flaring and ship/aircraft fleet tracks and 3) the inclusion of emission temporal variations. Using global fuel consumption data, we extrapolated the CDIAC emissions estimates for the recent years and produced the ODIAC2016 emission data product that covers 2000-2015. Our emission data can be viewed as an extended version of CDIAC gridded emission data product, which should allow data users to impose global fossil fuel emissions in more comprehensive manner than original CDIAC product. Our new emission modeling framework allows us to produce future versions of ODIAC emission data product with a timely update. Such capability has become more significant given the CDIAC/ORNL's shutdown. ODIAC data product could play an important role to support carbon cycle science, especially modeling studies with space-based CO2 data collected near real time by ongoing carbon observing missions such as Japanese Greenhouse Observing SATellite (GOSAT), NASA's Orbiting Carbon Observatory 2 (OCO-2) and upcoming future missions. The ODIAC emission data product including the latest version of the ODIAC emission data (ODIAC2017, 2000-2016), is distributed from http://db.cger.nies.go.jp/dataset/ODIAC/ with a DOI.",2018-01-18 +31114870,QBiC-Pred: quantitative predictions of transcription factor binding changes due to sequence variants.,"Non-coding genetic variants/mutations can play functional roles in the cell by disrupting regulatory interactions between transcription factors (TFs) and their genomic target sites. For most human TFs, a myriad of DNA-binding models are available and could be used to predict the effects of DNA mutations on TF binding. However, information on the quality of these models is scarce, making it hard to evaluate the statistical significance of predicted binding changes. Here, we present QBiC-Pred, a web server for predicting quantitative TF binding changes due to nucleotide variants. QBiC-Pred uses regression models of TF binding specificity trained on high-throughput in vitro data. The training is done using ordinary least squares (OLS), and we leverage distributional results associated with OLS estimation to compute, for each predicted change in TF binding, a P-value reflecting our confidence in the predicted effect. We show that OLS models are accurate in predicting the effects of mutations on TF binding in vitro and in vivo, outperforming widely-used PWM models as well as recently developed deep learning models of specificity. QBiC-Pred takes as input mutation datasets in several formats, and it allows post-processing of the results through a user-friendly web interface. QBiC-Pred is freely available at http://qbic.genome.duke.edu.",2019-07-01 +30317875,The index of ideality of correlation: improvement of models for toxicity to algae.,"Toxicity to algae is important characteristic of substances from ecologic point of view. The CORAL software ( http://www.insilico.eu/coral ) gives possibility to build up model of toxicity to algae using data on the molecular architecture and experimental toxicity, without additional data on physicochemical and/or biochemical parameters. Considerable improvement of the model is observed in the case of using the index of ideality of correlation (IIC) in the role of additional criterion of predictive potential. The IIC is calculated with using of the correlation coefficient between experimental and calculated values of endpoint for the calibration set, with taking into account the positive and negative dispersions between experimental and calculated values. The best model calculated with use the IIC is characterized (the validation set) by n = 50, r2 = 0.947, RMSE = 0.401 whereas, model calculated without use the IIC is characterized by n = 50, r2 = 0.805, and RMSE = 0.539. The suggested models are built up in accordance to five OECD principles.",2018-10-15 +28472356,BepiPred-2.0: improving sequence-based B-cell epitope prediction using conformational epitopes.,"Antibodies have become an indispensable tool for many biotechnological and clinical applications. They bind their molecular target (antigen) by recognizing a portion of its structure (epitope) in a highly specific manner. The ability to predict epitopes from antigen sequences alone is a complex task. Despite substantial effort, limited advancement has been achieved over the last decade in the accuracy of epitope prediction methods, especially for those that rely on the sequence of the antigen only. Here, we present BepiPred-2.0 (http://www.cbs.dtu.dk/services/BepiPred/), a web server for predicting B-cell epitopes from antigen sequences. BepiPred-2.0 is based on a random forest algorithm trained on epitopes annotated from antibody-antigen protein structures. This new method was found to outperform other available tools for sequence-based epitope prediction both on epitope data derived from solved 3D structures, and on a large collection of linear epitopes downloaded from the IEDB database. The method displays results in a user-friendly and informative way, both for computer-savvy and non-expert users. We believe that BepiPred-2.0 will be a valuable tool for the bioinformatics and immunology community.",2017-07-01 +31510688,A divide-and-conquer method for scalable phylogenetic network inference from multilocus data.,"

Motivation

Reticulate evolutionary histories, such as those arising in the presence of hybridization, are best modeled as phylogenetic networks. Recently developed methods allow for statistical inference of phylogenetic networks while also accounting for other processes, such as incomplete lineage sorting. However, these methods can only handle a small number of loci from a handful of genomes.

Results

In this article, we introduce a novel two-step method for scalable inference of phylogenetic networks from the sequence alignments of multiple, unlinked loci. The method infers networks on subproblems and then merges them into a network on the full set of taxa. To reduce the number of trinets to infer, we formulate a Hitting Set version of the problem of finding a small number of subsets, and implement a simple heuristic to solve it. We studied their performance, in terms of both running time and accuracy, on simulated as well as on biological datasets. The two-step method accurately infers phylogenetic networks at a scale that is infeasible with existing methods. The results are a significant and promising step towards accurate, large-scale phylogenetic network inference.

Availability and implementation

We implemented the algorithms in the publicly available software package PhyloNet (https://bioinfocs.rice.edu/PhyloNet).

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +31106361,NetGO: improving large-scale protein function prediction with massive network information.,"Automated function prediction (AFP) of proteins is of great significance in biology. AFP can be regarded as a problem of the large-scale multi-label classification where a protein can be associated with multiple gene ontology terms as its labels. Based on our GOLabeler-a state-of-the-art method for the third critical assessment of functional annotation (CAFA3), in this paper we propose NetGO, a web server that is able to further improve the performance of the large-scale AFP by incorporating massive protein-protein network information. Specifically, the advantages of NetGO are threefold in using network information: (i) NetGO relies on a powerful learning to rank framework from machine learning to effectively integrate both sequence and network information of proteins; (ii) NetGO uses the massive network information of all species (>2000) in STRING (other than only some specific species) and (iii) NetGO still can use network information to annotate a protein by homology transfer, even if it is not contained in STRING. Separating training and testing data with the same time-delayed settings of CAFA, we comprehensively examined the performance of NetGO. Experimental results have clearly demonstrated that NetGO significantly outperforms GOLabeler and other competing methods. The NetGO web server is freely available at http://issubmission.sjtu.edu.cn/netgo/.",2019-07-01 +31028399,NGPhylogeny.fr: new generation phylogenetic services for non-specialists.,"Phylogeny.fr, created in 2008, has been designed to facilitate the execution of phylogenetic workflows, and is nowadays widely used. However, since its development, user needs have evolved, new tools and workflows have been published, and the number of jobs has increased dramatically, thus promoting new practices, which motivated its refactoring. We developed NGPhylogeny.fr to be more flexible in terms of tools and workflows, easily installable, and more scalable. It integrates numerous tools in their latest version (e.g. TNT, FastME, MrBayes, etc.) as well as new ones designed in the last ten years (e.g. PhyML, SMS, FastTree, trimAl, BOOSTER, etc.). These tools cover a large range of usage (sequence searching, multiple sequence alignment, model selection, tree inference and tree drawing) and a large panel of standard methods (distance, parsimony, maximum likelihood and Bayesian). They are integrated in workflows, which have been already configured ('One click'), can be customized ('Advanced'), or are built from scratch ('A la carte'). Workflows are managed and run by an underlying Galaxy workflow system, which makes workflows more scalable in terms of number of jobs and size of data. NGPhylogeny.fr is deployable on any server or personal computer, and is freely accessible at https://ngphylogeny.fr.",2019-07-01 +31533899,PLANET-SNP pipeline: PLants based ANnotation and Establishment of True SNP pipeline.,"Acute prediction of SNPs (Single Nucleotide Polymorphisms) from high throughput sequencing data is a challenging problem, having potential to explore possible variation within plants species. For the extraction of profitable information from bulk of data, machine learning (ML) could lead to development of accurate model based on the learning of prior information. We performed state of art, in-depth learning on six different plant species. Comparative evaluation of five different algorithms showed that Random Forest substantially outperformed in selection of potential SNPs, with markedly improved prediction accuracy via 10-fold cross validation technique and integrated in system known as PLANET-SNP. We present the accurate method to extract the potential SNPs with user specific customizable parameters. It will facilitate the identification of efficient and functional SNPs in most easy and intuitive way. PLANET-SNP pipeline is very flexible in terms of data input and output formats. PLANET-SNP Pipeline is available at http://www.ncgd.nbri.res.in/PLANET-SNP-Pipeline.aspx.",2018-07-03 +22857741,e-MIR2: a public online inventory of medical informatics resources.,"

Background

Over the past years, the number of available informatics resources in medicine has grown exponentially. While specific inventories of such resources have already begun to be developed for Bioinformatics (BI), comparable inventories are as yet not available for the Medical Informatics (MI) field, so that locating and accessing them currently remains a difficult and time-consuming task.

Description

We have created a repository of MI resources from the scientific literature, providing free access to its contents through a web-based service. We define informatics resources as all those elements that constitute, serve to define or are used by informatics systems, ranging from architectures or development methodologies to terminologies, vocabularies, databases or tools. Relevant information describing the resources is automatically extracted from manuscripts published in top-ranked MI journals. We used a pattern matching approach to detect the resources' names and their main features. Detected resources are classified according to three different criteria: functionality, resource type and domain. To facilitate these tasks, we have built three different classification schemas by following a novel approach based on folksonomies and social tagging. We adopted the terminology most frequently used by MI researchers in their publications to create the concepts and hierarchical relationships belonging to the classification schemas. The classification algorithm identifies the categories associated with resources and annotates them accordingly. The database is then populated with this data after manual curation and validation.

Conclusions

We have created an online repository of MI resources to assist researchers in locating and accessing the most suitable resources to perform specific tasks. The database contains 609 resources at the time of writing and is available at http://www.gib.fi.upm.es/eMIR2. We are continuing to expand the number of available resources by taking into account further publications as well as suggestions from users and resource developers.",2012-08-02 +31088927,Bordetella pertussis Can Be Motile and Express Flagellum-Like Structures.,"Bordetella bronchiseptica encodes and expresses a flagellar apparatus. In contrast, Bordetella pertussis, the causative agent of whooping cough, has historically been described as a nonmotile and nonflagellated organism. The previous statements that B. pertussis was a nonmotile organism were consistent with a stop codon located in the flagellar biosynthesis gene, flhA, discovered when the B. pertussis Tohama I genome was sequenced and analyzed by Parkhill et al. in 2003 (J. Parkhill, M. Sebaihia, A. Preston, L. D. Murphy, et al., Nat Genet, 35:32-40, 2003, https://doi.org/10.1038/ng1227). The stop codon has subsequently been found in all annotated genomes. Parkhill et al. also showed, however, that B. pertussis contains all genetic material required for flagellar synthesis and function. We and others have determined by various transcriptomic analyses that these flagellar genes are differentially regulated under a variety of B. pertussis growth conditions. In light of these data, we tested for B. pertussis motility and found that both laboratory-adapted strains and clinical isolates can be motile. Upon isolation of motile B. pertussis, we discovered flagellum-like structures on the surface of the bacteria. B. pertussis motility appears to occur primarily in the Bvg(-) phase, consistent with regulation present in B. bronchiseptica Motility can also be induced by the presence of fetal bovine serum. These observations demonstrate that B. pertussis can express flagellum-like structures, and although it remains to be determined if B. pertussis expresses flagella during infection or if motility and/or flagella play roles during the cycle of infection and transmission, it is clear that these data warrant further investigation.IMPORTANCE This report provides evidence for motility and expression of flagella by B. pertussis, a bacterium that has been reported as nonmotile since it was first isolated and studied. As with B. bronchiseptica, B. pertussis cells can express and assemble a flagellum-like structure on their surface, which in other organisms has been implicated in several important processes that occur in vivo The discovery that B. pertussis is motile raises many questions, including those regarding the mechanisms of regulation for flagellar gene and protein expression and, importantly, the role of flagella during infection. This novel observation provides a foundation for further study of Bordetella flagella and motility in the contexts of infection and transmission.",2019-05-14 +31021009,Usability testing of MySkinSelfie: a mobile phone application for skin self-monitoring.,"Teledermatology generally involves doctors taking images of patients; however, patients increasingly want to own or have easy access to their health data. MySkinSelfie ( http://myskinselfie.com) is a mobile phone application (app) designed to improve the quality, consistency and accessibility of patient-held photos, and was developed to give patients the ability to generate and hold their own skin images to help guide their skin care. This study assessed the usability of this app in a cohort of patients attending a National Health Service Dermatology clinic. Patients were asked to use the app but were not given specific tasks to achieve. Of the 102 patients recruited, 32 downloaded the app and registered an account, 21 took at least one photo (median 5, range 1-103) and 19 completed the usability questionnaire. The majority of questionnaire respondents found the app easy to use but were more neutral on whether it really helped them to manage their skin problem. MySkinSelfie has been shown to be easy to use. Self-monitoring of skin problems may be useful for a subset of patients, and this is likely to depend on diagnosis, age and other patient factors.",2019-05-21 +30932916,Correlation Between Neurologic Impairment Grade and Ambulation Status in the Adult Spina Bifida Population.,"

Objective

The aim of the study was to identify which neurologic impairment scales correlate with ambulation status in adults with spina bifida.

Design

A retrospective chart review was performed on patients seen at the University of Pittsburgh Medical Center Adult Spina Bifida Clinic. Findings were graded using several neurologic impairment scales: two versions of the National Spina Bifida Patient Registry classification, the International Standards for Neurological Classification of Spinal Cord Injury motor level, and the Broughton Neurologic Impairment Scale. Ambulation ability was ranked using the Hoffer classification system.

Results

Data collected from 409 patient records showed significant correlations between Hoffer ambulation status and all neurologic impairment scales evaluated. The strongest correlation was noted with the Broughton classification (rs = -0.771, P < 0.001). High correlations were also noted with both versions of the National Spina Bifida Patient Registry: strength 3/5 or greater (rs = -0.763, P < 0.001), and strength 1/5 or greater (rs = -0.716, P < 0.001). For the International Standards for Neurological Classification of Spinal Cord Injury motor level, only a moderate correlation was observed (rs = -0.565, P < 0.001).

Conclusions

Multiple grading scales can be used to measure motor function in adult spina bifida patients. Although the Broughton classification seems to be the most highly correlated with ambulation status, the less complex National Spina Bifida Patient Registry scale is also highly correlated and may be easier to administer in busy clinic settings.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) Explain the clinical significance of identifying ambulation status and maximizing ambulation potential in adults with spina bifida; (2) Describe each of the neurologic grading scales examined in this study, identifying potential shortcomings in applying them to the adult spina bifida population; and (3) Administer the National Spina Bifida Patient Registry (NSBPR) impairment scale motor assessment in a standard adult spina bifida outpatient clinic visit.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2019-12-01 +23696878,PEpiD: a prostate epigenetic database in mammals.,"Epigenetic mechanisms play key roles in initiation and progression of prostate cancer by changing gene expression. The Prostate Epigenetic Database (PEpiD: http://wukong.tongji.edu.cn/pepid) archives the three extensively characterized epigenetic mechanisms DNA methylation, histone modification, and microRNA implicated in prostate cancer of human, mouse, and rat. PEpiD uses a distinct color scheme to present the three types of epigenetic data and provides a user-friendly interface for flexible query. The retrieved information includes Refseq ID, gene symbol, gene alias, genomic loci of epigenetic changes, tissue source, experimental method, and supportive references. The change of histone modification (hyper or hypo) and the corresponding gene expression change (up or down) are also indicated. A graphic view of DNA methylation with exon-intron structure and predicted CpG islands is provided as well. Moreover, the prostate-related ENCODE tracks (DNA methylation, histone modifications, chromatin remodelers), and other key transcription factors with reported roles in prostate are displayed in the browser as well. The reversibility of epigenetic aberrations has made them potential markers for diagnosis and prognosis, and targets for treatment of cancers. This curated information will improve our understanding of epigenetic mechanisms of gene regulation in prostate cancer, and serve as an important resource for epigenetic research in prostate cancer.",2013-05-16 +25911153,ProtoBug: functional families from the complete proteomes of insects.,"ProtoBug (http://www.protobug.cs.huji.ac.il) is a database and resource of protein families in Arthropod genomes. ProtoBug platform presents the relatedness of complete proteomes from 17 insects as well as a proteome of the crustacean, Daphnia pulex. The represented proteomes from insects include louse, bee, beetle, ants, flies and mosquitoes. Based on an unsupervised clustering method, protein sequences were clustered into a hierarchical tree, called ProtoBug. ProtoBug covers about 300,000 sequences that are partitioned to families. At the default setting, all sequences are partitioned to ∼20,000 families (excluding singletons). From the species perspective, each of the 18 analysed proteomes is composed of 5000-8000 families. In the regime of the advanced operational mode, the ProtoBug provides rich navigation capabilities for touring the hierarchy of the families at any selected resolution. A proteome viewer shows the composition of sequences from any of the 18 analysed proteomes. Using functional annotation from an expert system (Pfam) we assigned domains, families and repeats by 4400 keywords that cover 73% of the sequences. A strict inference protocol is applied for expanding the functional knowledge. Consequently, secured annotations were associated with 81% of the proteins, and with 70% of the families (≥10 proteins each). ProtoBug is a database and webtool with rich visualization and navigation tools. The properties of each family in relation to other families in the ProtoBug tree, and in view of the taxonomy composition are reported. Furthermore, the user can paste its own sequences to find relatedness to any of the ProtoBug families. The database and the navigation tools are the basis for functional discoveries that span 350 million years of evolution of Arthropods. ProtoBug is available with no restriction at: www.protobug.cs.huji.ac.il. Database URL: www.protobug.cs.huji.ac.il",2015-04-24 +22080511,ADHDgene: a genetic database for attention deficit hyperactivity disorder.,"With a worldwide prevalence of ~5%, attention deficit hyperactivity disorder (ADHD) has become one of the most common psychiatric disorders. The polygenetic nature of ADHD indicates that multiple genes jointly contribute to the development of this complex disease. Studies aiming to explore genetic susceptibility of ADHD have been increasing in recent years. There is a growing need to integrate the genetic data from various genetic studies to provide a comprehensive data set and uniform access for convenience of in-depth data mining. So far, there has been no such effort for ADHD. To address the genetic complexity of ADHD, we developed the ADHDgene database by integrating ADHD-related genetic factors by profound literature reading. Based on the data from the literature, extended functional analysis, including linkage disequilibrium analysis, pathway-based analysis and gene mapping were performed to provide new insights into genetic causes of ADHD. Moreover, powerful search tools and a graphical browser were developed to facilitate the navigation of the data and data connections. As the first genetic database for ADHD, ADHDgene aims to provide researchers with a central genetic resource and analysis platform for ADHD and is freely available at http://adhd.psych.ac.cn/.",2011-11-10 +25053277,First report on the antibody verification of MICA epitopes recorded in the HLA epitope registry.,The International Registry of HLA Epitopes (http://epregistry.com.br) has been recently established as a tool to understand antibody responses to HLA mismatches. These epitopes are defined structurally by three-dimensional molecular modelling and amino acid sequence differences between HLA antigens. A major goal was to identify HLA epitopes that have been verified experimentally with informative antibodies. This report addresses the identification of MICA epitopes. Our analysis included published information about MICA antibody reactivity in sera from sensitized patients as well as data from our own laboratories. This report describes twenty-one MICA epitopes verified with antibodies which have primarily been tested in Luminex assays with single alleles. The epitopes correspond to distinct eplets that are often defined by single residues. The Registry is still a work-in-progress and will become a useful resource for HLA professionals interested in histocompatibility testing at the epitope level and investigating antibody responses to HLA mismatches in transplant patients.,2014-07-22 +29504905,Closha: bioinformatics workflow system for the analysis of massive sequencing data.,"BACKGROUND:While next-generation sequencing (NGS) costs have fallen in recent years, the cost and complexity of computation remain substantial obstacles to the use of NGS in bio-medical care and genomic research. The rapidly increasing amounts of data available from the new high-throughput methods have made data processing infeasible without automated pipelines. The integration of data and analytic resources into workflow systems provides a solution to the problem by simplifying the task of data analysis. RESULTS:To address this challenge, we developed a cloud-based workflow management system, Closha, to provide fast and cost-effective analysis of massive genomic data. We implemented complex workflows making optimal use of high-performance computing clusters. Closha allows users to create multi-step analyses using drag and drop functionality and to modify the parameters of pipeline tools. Users can also import the Galaxy pipelines into Closha. Closha is a hybrid system that enables users to use both analysis programs providing traditional tools and MapReduce-based big data analysis programs simultaneously in a single pipeline. Thus, the execution of analytics algorithms can be parallelized, speeding up the whole process. We also developed a high-speed data transmission solution, KoDS, to transmit a large amount of data at a fast rate. KoDS has a file transfer speed of up to 10 times that of normal FTP and HTTP. The computer hardware for Closha is 660 CPU cores and 800 TB of disk storage, enabling 500 jobs to run at the same time. CONCLUSIONS:Closha is a scalable, cost-effective, and publicly available web service for large-scale genomic data analysis. Closha supports the reliable and highly scalable execution of sequencing analysis workflows in a fully automated manner. Closha provides a user-friendly interface to all genomic scientists to try to derive accurate results from NGS platform data. The Closha cloud server is freely available for use from http://closha.kobic.re.kr/ .",2018-02-19 +30416012,Spatiotemporal m(i)RNA Architecture and 3' UTR Regulation in the C. elegans Germline.,"In animal germlines, regulation of cell proliferation and differentiation is particularly important but poorly understood. Here, using a cryo-cut approach, we mapped RNA expression along the Caenorhabditis elegans germline and, using mutants, dissected gene regulatory mechanisms that control spatiotemporal expression. We detected, at near single-cell resolution, >10,000 mRNAs, >300 miRNAs, and numerous unannotated miRNAs. Most RNAs were organized in distinct spatial patterns. Germline-specific miRNAs and their targets were co-localized. Moreover, we observed differential 3' UTR isoform usage for hundreds of mRNAs. In tumorous gld-2 gld-1 mutants, gene expression was strongly perturbed. In particular, differential 3' UTR usage was significantly impaired. We propose that PIE-1, a transcriptional repressor, functions to maintain spatial gene expression. Our data also suggest that cpsf-4 and fipp-1 control differential 3' UTR usage for hundreds of genes. Finally, we constructed a ""virtual gonad"" enabling ""virtual in situ hybridizations"" and access to all data (https://shiny.mdc-berlin.de/spacegerm/).",2018-11-08 +30627219,Application of an interpretable classification model on Early Folding Residues during protein folding.,"

Background

Machine learning strategies are prominent tools for data analysis. Especially in life sciences, they have become increasingly important to handle the growing datasets collected by the scientific community. Meanwhile, algorithms improve in performance, but also gain complexity, and tend to neglect interpretability and comprehensiveness of the resulting models.

Results

Generalized Matrix Learning Vector Quantization (GMLVQ) is a supervised, prototype-based machine learning method and provides comprehensive visualization capabilities not present in other classifiers which allow for a fine-grained interpretation of the data. In contrast to commonly used machine learning strategies, GMLVQ is well-suited for imbalanced classification problems which are frequent in life sciences. We present a Weka plug-in implementing GMLVQ. The feasibility of GMLVQ is demonstrated on a dataset of Early Folding Residues (EFR) that have been shown to initiate and guide the protein folding process. Using 27 features, an area under the receiver operating characteristic of 76.6% was achieved which is comparable to other state-of-the-art classifiers. The obtained model is accessible at https://biosciences.hs-mittweida.de/efpred/.

Conclusions

The application on EFR prediction demonstrates how an easy interpretation of classification models can promote the comprehension of biological mechanisms. The results shed light on the special features of EFR which were reported as most influential for the classification: EFR are embedded in ordered secondary structure elements and they participate in networks of hydrophobic residues. Visualization capabilities of GMLVQ are presented as we demonstrate how to interpret the results.",2019-01-05 +27389461,"Fate, behaviour and weathering of priority HNS in the marine environment: An online tool.","Literature data and data obtained with modelling tools were compiled to derive the physicochemical behaviour of 24 priority Hazardous and Noxious Substances (HNS), as a proxy to improve environmental, public health and political issues in relation to HNS spills. Parameters that rule the HNS behaviour in water and those that determine their distribution and persistence in the environment, such as fugacity, physicochemical degradation, biodegradation, bioaccumulation/biotransformation and aquatic toxicity, were selected. Data systematized and produced in the frame of the Arcopol Platform project was made available through a public database (http://www.ciimar.up.pt/hns/substances.php). This tool is expected to assist stakeholders involved in HNS spills preparedness and response, policy makers and legislators, as well as to contribute to a current picture of the scientific knowledge on the fate, behaviour, weathering and toxicity of priority HNS, being essential to support future improvements in maritime safety and coastal pollution response before, during and after spill incidents.",2016-07-04 +31570101,"Development of the Australian Cancer Atlas: spatial modelling, visualisation, and reporting of estimates.","BACKGROUND:It is well known that the burden caused by cancer can vary geographically, which may relate to differences in health, economics or lifestyle. However, to date, there was no comprehensive picture of how the cancer burden, measured by cancer incidence and survival, varied by small geographical area across Australia. METHODS:The Atlas consists of 2148 Statistical Areas level 2 across Australia defined by the Australian Statistical Geography Standard which provide the best compromise between small population and small area. Cancer burden was estimated for males, females, and persons separately, with 50 unique sex-specific (males, females, all persons) cancer types analysed. Incidence and relative survival were modelled with Bayesian spatial models using the Leroux prior which was carefully selected to provide adequate spatial smoothing while reflecting genuine geographic variation. Markov Chain Monte Carlo estimation was used because it facilitates quantifying the uncertainty of the posterior estimates numerically and visually. RESULTS:The results of the statistical model and visualisation development were published through the release of the Australian Cancer Atlas ( https://atlas.cancer.org.au ) in September, 2018. The Australian Cancer Atlas provides the first freely available, digital, interactive picture of cancer incidence and survival at the small geographical level across Australia with a focus on incorporating uncertainty, while also providing the tools necessary for accurate estimation and appropriate interpretation and decision making. CONCLUSIONS:The success of the Atlas will be measured by how widely it is used by key stakeholders to guide research and inform decision making. It is hoped that the Atlas and the methodology behind it motivates new research opportunities that lead to improvements in our understanding of the geographical patterns of cancer burden, possible causes or risk factors, and the reasons for differences in variation between cancer types, both within Australia and globally. Future versions of the Atlas are planned to include new data sources to include indicators such as cancer screening and treatment, and extensions to the statistical methods to incorporate changes in geographical patterns over time.",2019-10-01 +24753414,VAP: a versatile aggregate profiler for efficient genome-wide data representation and discovery.,"The analysis of genomic data such as ChIP-Seq usually involves representing the signal intensity level over genes or other genetic features. This is often illustrated as a curve (representing the aggregate profile of a group of genes) or as a heatmap (representing individual genes). However, no specific resource dedicated to easily generating such profiles is currently available. We therefore built the versatile aggregate profiler (VAP), designed to be used by experimental and computational biologists to generate profiles of genomic datasets over groups of regions of interest, using either an absolute or a relative method. Graphical representation of the results is automatically generated, and subgrouping can be performed easily, based on the orientation of the flanking annotations. The outputs include statistical measures to facilitate comparisons between groups or datasets. We show that, through its intuitive design and flexibility, VAP can help avoid misinterpretations of genomics data. VAP is highly efficient and designed to run on laptop computers by using a memory footprint control, but can also be easily compiled and run on servers. VAP is accessible at http://lab-jacques.recherche.usherbrooke.ca/vap/.",2014-04-21 +29764365,Towards pan-genome read alignment to improve variation calling.,"BACKGROUND:Typical human genome differs from the reference genome at 4-5 million sites. This diversity is increasingly catalogued in repositories such as ExAC/gnomAD, consisting of >15,000 whole-genomes and >126,000 exome sequences from different individuals. Despite this enormous diversity, resequencing data workflows are still based on a single human reference genome. Identification and genotyping of genetic variants is typically carried out on short-read data aligned to a single reference, disregarding the underlying variation. RESULTS:We propose a new unified framework for variant calling with short-read data utilizing a representation of human genetic variation - a pan-genomic reference. We provide a modular pipeline that can be seamlessly incorporated into existing sequencing data analysis workflows. Our tool is open source and available online: https://gitlab.com/dvalenzu/PanVC . CONCLUSIONS:Our experiments show that by replacing a standard human reference with a pan-genomic one we achieve an improvement in single-nucleotide variant calling accuracy and in short indel calling accuracy over the widely adopted Genome Analysis Toolkit (GATK) in difficult genomic regions.",2018-05-09 +29912458,CircadiOmics: circadian omic web portal.,"Circadian rhythms play a fundamental role at all levels of biological organization. Understanding the mechanisms and implications of circadian oscillations continues to be the focus of intense research. However, there has been no comprehensive and integrated way for accessing and mining all circadian omic datasets. The latest release of CircadiOmics (http://circadiomics.ics.uci.edu) fills this gap for providing the most comprehensive web server for studying circadian data. The newly updated version contains high-throughput 227 omic datasets corresponding to over 74 million measurements sampled over 24 h cycles. Users can visualize and compare oscillatory trajectories across species, tissues and conditions. Periodicity statistics (e.g. period, amplitude, phase, P-value, q-value etc.) obtained from BIO_CYCLE and other methods are provided for all samples in the repository and can easily be downloaded in the form of publication-ready figures and tables. New features and substantial improvements in performance and data volume make CircadiOmics a powerful web portal for integrated analysis of circadian omic data.",2018-07-01 +27980099,The BioGRID interaction database: 2017 update.,"The Biological General Repository for Interaction Datasets (BioGRID: https://thebiogrid.org) is an open access database dedicated to the annotation and archival of protein, genetic and chemical interactions for all major model organism species and humans. As of September 2016 (build 3.4.140), the BioGRID contains 1 072 173 genetic and protein interactions, and 38 559 post-translational modifications, as manually annotated from 48 114 publications. This dataset represents interaction records for 66 model organisms and represents a 30% increase compared to the previous 2015 BioGRID update. BioGRID curates the biomedical literature for major model organism species, including humans, with a recent emphasis on central biological processes and specific human diseases. To facilitate network-based approaches to drug discovery, BioGRID now incorporates 27 501 chemical-protein interactions for human drug targets, as drawn from the DrugBank database. A new dynamic interaction network viewer allows the easy navigation and filtering of all genetic and protein interaction data, as well as for bioactive compounds and their established targets. BioGRID data are directly downloadable without restriction in a variety of standardized formats and are freely distributed through partner model organism databases and meta-databases.",2016-12-14 +31437145,ProtFus: A Comprehensive Method Characterizing Protein-Protein Interactions of Fusion Proteins.,"Tailored therapy aims to cure cancer patients effectively and safely, based on the complex interactions between patients' genomic features, disease pathology and drug metabolism. Thus, the continual increase in scientific literature drives the need for efficient methods of data mining to improve the extraction of useful information from texts based on patients' genomic features. An important application of text mining to tailored therapy in cancer encompasses the use of mutations and cancer fusion genes as moieties that change patients' cellular networks to develop cancer, and also affect drug metabolism. Fusion proteins, which are derived from the slippage of two parental genes, are produced in cancer by chromosomal aberrations and trans-splicing. Given that the two parental proteins for predicted fusion proteins are known, we used our previously developed method for identifying chimeric protein-protein interactions (ChiPPIs) associated with the fusion proteins. Here, we present a validation approach that receives fusion proteins of interest, predicts their cellular network alterations by ChiPPI and validates them by our new method, ProtFus, using an online literature search. This process resulted in a set of 358 fusion proteins and their corresponding protein interactions, as a training set for a Naïve Bayes classifier, to identify predicted fusion proteins that have reliable evidence in the literature and that were confirmed experimentally. Next, for a test group of 1817 fusion proteins, we were able to identify from the literature 2908 PPIs in total, across 18 cancer types. The described method, ProtFus, can be used for screening the literature to identify unique cases of fusion proteins and their PPIs, as means of studying alterations of protein networks in cancers. Availability: http://protfus.md.biu.ac.il/.",2019-08-22 +23193286,SpermatogenesisOnline 1.0: a resource for spermatogenesis based on manual literature curation and genome-wide data mining.,"Human infertility affects 10-15% of couples, half of which is attributed to the male partner. Abnormal spermatogenesis is a major cause of male infertility. Characterizing the genes involved in spermatogenesis is fundamental to understand the mechanisms underlying this biological process and in developing treatments for male infertility. Although many genes have been implicated in spermatogenesis, no dedicated bioinformatic resource for spermatogenesis is available. We have developed such a database, SpermatogenesisOnline 1.0 (http://mcg.ustc.edu.cn/sdap1/spermgenes/), using manual curation from 30 233 articles published before 1 May 2012. It provides detailed information for 1666 genes reported to participate in spermatogenesis in 37 organisms. Based on the analysis of these genes, we developed an algorithm, Greed AUC Stepwise (GAS) model, which predicted 762 genes to participate in spermatogenesis (GAS probability >0.5) based on genome-wide transcriptional data in Mus musculus testis from the ArrayExpress database. These predicted and experimentally verified genes were annotated, with several identical spermatogenesis-related GO terms being enriched for both classes. Furthermore, protein-protein interaction analysis indicates direct interactions of predicted genes with the experimentally verified ones, which supports the reliability of GAS. The strategy (manual curation and data mining) used to develop SpermatogenesisOnline 1.0 can be easily extended to other biological processes.",2012-11-28 +31245515,Outlining the Grb2 interactome data and its interacting partners in HEK293 cells in absence and presence of epidermal growth factor.,"Growth factor receptor-bound protein 2 (Grb2) is an adaptor protein involved in the signal transduction pathways. This dataset enlists proteins which interact with Grb2 in the presence and absence of a mitogenic stimulus. Grb2 expressing HEK293 cells were cultured in light and heavy labeled SILAC media. Normal lysine and arginine were incorporated as light labels while 8 and 10 Da heavier labels of respective isotopes were used for heavy labeling. While light labeled cells were used to enrich basal Grb2 interactome, the heavy labeled cells were stimulated in presence of epidermal growth factor (EGF) to investigate the altered Grb2 interactome dynamics. Equal number of EGF stimulated and non-stimulated cells was pooled, lysed and subjected to affinity purification coupled to mass spectrometry (AP-MS). The variety of Grb2 protein partners changed as a consequence of EGF stimulation. Additionally, SILAC labeling helped in quantitative estimation of altered association of a few interactors with the bait protein. Data are available via PRIDE repository with the dataset identifier PXD012957 (https://www.ebi.ac.uk/pride/archive/projects/PXD012957).",2019-05-30 +30670618,Going Viral: a Novel Role for Bacteriophage in Colorectal Cancer. ,"Microbiome-based signatures of disease have focused primarily on the bacterial component of the microbiome for numerous reasons, including ease of sample preparation and depth of the curated bacterial database. However, even more numerous than bacteria are the bacteriophages of the viral portion of the microbiome, which have emerged with identifiable disease signatures in other diseases, such as inflammatory bowel diseases. Here, G. D. Hannigan, M. B. Duhaime, M. T. Ruffin, IV, C. C. Koumpouras, and P. D. Schloss (mBio 9:e02248-18, https://doi.org/10.1128/mBio.02248-18) present a study that explores the potential bacteriophage signatures in patients with colorectal cancer (CRC) and the associated changes in bacterial signatures. Sampling from a cross section of 60 patients at different stages of CRC in addition to 30 healthy controls, this study highlights the need for greater exploration into the virome, including the ""dark matter"" of diverse forms that viruses assume in the gastrointestinal tract.",2019-01-22 +29522196,Optimality and identification of dynamic models in systems biology: an inverse optimal control framework.,"Motivation:Optimality principles have been used to explain many biological processes and systems. However, the functions being optimized are in general unknown a priori. Here we present an inverse optimal control framework for modeling dynamics in systems biology. The objective is to identify the underlying optimality principle from observed time-series data and simultaneously estimate unmeasured time-dependent inputs and time-invariant model parameters. As a special case, we also consider the problem of optimal simultaneous estimation of inputs and parameters from noisy data. After presenting a general statement of the inverse optimal control problem, and discussing special cases of interest, we outline numerical strategies which are scalable and robust. Results:We discuss the existence, relevance and implications of identifiability issues in the above problems. We present a robust computational approach based on regularized cost functions and the use of suitable direct numerical methods based on the control-vector parameterization approach. To avoid convergence to local solutions, we make use of hybrid global-local methods. We illustrate the performance and capabilities of this approach with several challenging case studies, including simulated and real data. We pay particular attention to the computational scalability of our approach (with the objective of considering large numbers of inputs and states). We provide a software implementation of both the methods and the case studies. Availability and implementation:The code used to obtain the results reported here is available at https://zenodo.org/record/1009541. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +30131590,From homogeneous to heterogeneous network alignment via colored graphlets.,"Network alignment (NA) compares networks with the goal of finding a node mapping that uncovers highly similar (conserved) network regions. Existing NA methods are homogeneous, i.e., they can deal only with networks containing nodes and edges of one type. Due to increasing amounts of heterogeneous network data with nodes or edges of different types, we extend three recent state-of-the-art homogeneous NA methods, WAVE, MAGNA++, and SANA, to allow for heterogeneous NA for the first time. We introduce several algorithmic novelties. Namely, these existing methods compute homogeneous graphlet-based node similarities and then find high-scoring alignments with respect to these similarities, while simultaneously maximizing the amount of conserved edges. Instead, we extend homogeneous graphlets to their heterogeneous counterparts, which we then use to develop a new measure of heterogeneous node similarity. Also, we extend S3, a state-of-the-art measure of edge conservation for homogeneous NA, to its heterogeneous counterpart. Then, we find high-scoring alignments with respect to our heterogeneous node similarity and edge conservation measures. In evaluations on synthetic and real-world biological networks, our proposed heterogeneous NA methods lead to higher-quality alignments and better robustness to noise in the data than their homogeneous counterparts. The software and data from this work is available at https://nd.edu/~cone/colored_graphlets/.",2018-08-21 +31663775,Exposure to Bisphenol A and Bisphenol S and Incident Type 2 Diabetes: A Case-Cohort Study in the French Cohort D.E.S.I.R.,"

Background

The question of whether exposure to bisphenol A (BPA) contributes to the development of type 2 diabetes is still unresolved. Most epidemiological evidence on the association between BPA and diabetes is from cross-sectional studies or longitudinal studies with single urinary measurements. No prospective study has examined exposure to BPA analogs such as bisphenol S (BPS) in relation to incident type 2 diabetes.

Objectives

We aimed to investigate whether exposure to BPA and BPS, assessed at up to two time points, was associated with the incidence of type 2 diabetes.

Methods

We performed a case-cohort study on 755 participants without diabetes at baseline and followed-up over 9 y as part of the French prospective cohort Data from an Epidemiological Study on the Insulin Resistance Syndrome (D.E.S.I.R.). BPA-glucuronide (BPA-G) and BPS-glucuronide (BPS-G) were assessed in fasting spot urine samples collected during the health examinations at baseline and 3 y later. Associations with incident diabetes were examined using Prentice-weighted Cox regression models adjusted for potential confounders.

Results

A total of 201 incident cases of type 2 diabetes were diagnosed over the follow-up, including 30 in the subcohort. Compared with participants with the lowest average BPA exposure (below the first quartile), participants in the second, third, and fourth quartile groups of exposure had a near doubling of the risk of type 2 diabetes, with a hazard ratio (HR) = 2.56 (95% CI: 1.16, 5.65), 2.35 (95% CI: 1.07, 5.15), and 1.56 (95% CI: 0.68, 3.55), respectively. The detection of BPS-G in urine at one or both time points was associated with incident diabetes, with an HR = 2.81 (95% CI: 1.74, 4.53).

Discussion

This study shows positive associations between exposure to BPA and BPS and the incidence of type 2 diabetes, independent of traditional diabetes risk factors. Our results should be confirmed by recent, population-based observational studies in different populations and settings. Overall, these findings raise concerns about using BPS as a BPA substitute. Further research on BPA analogs is warranted. https://doi.org/10.1289/EHP5159.",2019-10-30 +30832626,Integrating research and system-wide practice in public health: lessons learnt from Better Start Bradford.,"Many interventions that are delivered within public health services have little evidence of effect. Evaluating interventions that are being delivered as a part of usual practice offers opportunities to improve the evidence base of public health. However, such evaluation is challenging and requires the integration of research into system-wide practice. The Born in Bradford's Better Start experimental birth cohort offers an opportunity to efficiently evaluate multiple complex community interventions to improve the health, wellbeing and development of children aged 0-3 years. Based on the learning from this programme, this paper offers a pragmatic and practical guide to researchers, public health commissioners and service providers to enable them to integrate research into their everyday practice, thus enabling relevant and robust evaluations within a complex and changing system.Using the principles of co-production the key challenges of integrating research and practice were identified, and appropriate strategies to overcome these, developed across five key stages: 1) Community and stakeholder engagement; 2) Intervention design; 3) Optimising routinely collected data; 4) Monitoring implementation; and 5) Evaluation. As a result of our learning we have developed comprehensive toolkits ( https://borninbradford.nhs.uk/what-we-do/pregnancy-early-years/toolkit/ ) including: an operational guide through the service design process; an implementation and monitoring guide; and an evaluation framework. The evaluation framework incorporates implementation evaluations to enable understanding of intervention performance in practice, and quasi experimental approaches to infer causal effects in a timely manner. We also offer strategies to harness routinely collected data to enhance the efficiency and affordability of evaluations that are directly relevant to policy and practice.These strategies and tools will help researchers, commissioners and service providers to work together to evaluate interventions delivered in real-life settings. More importantly, however, we hope that they will support the development of a connected system that empowers practitioners and commissioners to embed innovation and improvement into their own practice, thus enabling them to learn, evaluate and improve their own services.",2019-03-04 +25874014,Genetic Variability of MicroRNA Genes in 15 Animal Species.,"MicroRNAs (miRNA) are a class of non-coding RNAs important in posttranscriptional regulation of target genes. Previous studies have proven that genetic variability of miRNA genes (miR-SNP) has an impact on phenotypic variation and disease susceptibility in human, mice and some livestock species. MicroRNA gene polymorphisms could therefore represent biomarkers for phenotypic traits also in other animal species. We upgraded our previously developed tool miRNA SNiPer to the version 4.0 which enables the search of miRNA genetic variability in 15 animal genomes: http://www.integratomics-time.com/miRNA-SNiPer. Genome-wide in silico screening (GWISS) of 15 genomes revealed that based on the current database releases, miRNA genes are most polymorphic in cattle, followed by human, fruitfly, mouse, chicken, pig, horse, and sheep. The difference in the number of miRNA gene polymorphisms between species is most probably not due to a biological reason and lack of genetic variability in some species, but to different stage of sequencing projects and differences in development of genomic resource databases in different species. Genome screening revealed several interesting genomic hotspots. For instance, several multiple nucleotide polymorphisms (MNPs) are present within mature seed region in cattle. Among miR-SNPs 46 are present on commercial whole-genome SNP chips: 16 in cattle, 26 in chicken, two in sheep and two in pig. The update of the miRNA SNiPer tool and the generated catalogs will serve researchers as a starting point in designing projects dealing with the effects of genetic variability of miRNA genes.",2015-02-15 +22110040,ELM--the database of eukaryotic linear motifs.,"Linear motifs are short, evolutionarily plastic components of regulatory proteins and provide low-affinity interaction interfaces. These compact modules play central roles in mediating every aspect of the regulatory functionality of the cell. They are particularly prominent in mediating cell signaling, controlling protein turnover and directing protein localization. Given their importance, our understanding of motifs is surprisingly limited, largely as a result of the difficulty of discovery, both experimentally and computationally. The Eukaryotic Linear Motif (ELM) resource at http://elm.eu.org provides the biological community with a comprehensive database of known experimentally validated motifs, and an exploratory tool to discover putative linear motifs in user-submitted protein sequences. The current update of the ELM database comprises 1800 annotated motif instances representing 170 distinct functional classes, including approximately 500 novel instances and 24 novel classes. Several older motif class entries have been also revisited, improving annotation and adding novel instances. Furthermore, addition of full-text search capabilities, an enhanced interface and simplified batch download has improved the overall accessibility of the ELM data. The motif discovery portion of the ELM resource has added conservation, and structural attributes have been incorporated to aid users to discriminate biologically relevant motifs from stochastically occurring non-functional instances.",2011-11-21 +33828716,Automating Areas of Interest Analysis in Mobile Eye Tracking Experiments based on Machine Learning. ,"For an in-depth, AOI-based analysis of mobile eye tracking data, a preceding gaze assign-ment step is inevitable. Current solutions such as manual gaze mapping or marker-based approaches are tedious and not suitable for applications manipulating tangible objects. This makes mobile eye tracking studies with several hours of recording difficult to analyse quan-titatively. We introduce a new machine learning-based algorithm, the computational Gaze-Object Mapping (cGOM), that automatically maps gaze data onto respective AOIs. cGOM extends state-of-the-art object detection and segmentation by mask R-CNN with a gaze mapping feature. The new algorithm's performance is validated against a manual fixation-by-fixation mapping, which is considered as ground truth, in terms of true positive rate (TPR), true negative rate (TNR) and efficiency. Using only 72 training images with 264 labelled object representations, cGOM is able to reach a TPR of approx. 80% and a TNR of 85% compared to the manual mapping. The break-even point is reached at 2 hours of eye tracking recording for the total procedure, respectively 1 hour considering human working time only. Together with a real-time capability of the mapping process after completed train-ing, even hours of eye tracking recording can be evaluated efficiently. (Code and video examples have been made available at: https://gitlab.ethz.ch/pdz/cgom.git).",2018-12-10 +27899556,dbDEMC 2.0: updated database of differentially expressed miRNAs in human cancers.,"MicroRNAs (miRNAs) are often deregulated in cancer and are thought to play an important role in cancer development. Large amount of differentially expressed miRNAs have been identified in various cancers by using high-throughput methods. It is therefore quite important to make a comprehensive collection of these miRNAs and to decipher their roles in oncogenesis and tumor progression. In 2010, we presented the first release of dbDEMC, representing a database for collection of differentially expressed miRNAs in human cancers obtained from microarray data. Here we describe an update of the database. dbDEMC 2.0 documents 209 expression profiling data sets across 36 cancer types and 73 subtypes, and a total of 2224 differentially expressed miRNAs were identified. An easy-to-use web interface was constructed that allows users to make a quick search of the differentially expressed miRNAs in certain cancer types. In addition, a new function of 'meta-profiling' was added to view differential expression events according to user-defined miRNAs and cancer types. We expect this database to continue to serve as a valuable source for cancer investigation and potential clinical application related to miRNAs. dbDEMC 2.0 is freely available at http://www.picb.ac.cn/dbDEMC.",2016-11-28 +30482846,Integrated Identification and Quantification Error Probabilities for Shotgun Proteomics.,"Protein quantification by label-free shotgun proteomics experiments is plagued by a multitude of error sources. Typical pipelines for identifying differential proteins use intermediate filters to control the error rate. However, they often ignore certain error sources and, moreover, regard filtered lists as completely correct in subsequent steps. These two indiscretions can easily lead to a loss of control of the false discovery rate (FDR). We propose a probabilistic graphical model, Triqler, that propagates error information through all steps, employing distributions in favor of point estimates, most notably for missing value imputation. The model outputs posterior probabilities for fold changes between treatment groups, highlighting uncertainty rather than hiding it. We analyzed 3 engineered data sets and achieved FDR control and high sensitivity, even for truly absent proteins. In a bladder cancer clinical data set we discovered 35 proteins at 5% FDR, whereas the original study discovered 1 and MaxQuant/Perseus 4 proteins at this threshold. Compellingly, these 35 proteins showed enrichment for functional annotation terms, whereas the top ranked proteins reported by MaxQuant/Perseus showed no enrichment. The model executes in minutes and is freely available at https://pypi.org/project/triqler/.",2018-11-27 +,MorphoTools: a set of R functions for morphometric analysis,"A set of R functions for the convenient handling of morphometric analysis is provided. No previous knowledge of R is required. The functions include data import from Excel or tab-delimited text files, descriptive statistics for populations and taxa, histograms of characters, correlation matrices of characters, cluster analysis, principal component analysis, linear discriminant analysis with permutation tests, classificatory discriminant analysis and k-nearest neighbour classification. The use of the functions is demonstrated on a sample data set. Detailed descriptions of the functions and examples of the scripts for producing graphics are included as an electronic appendix. Documentation and function definitions can be downloaded from http://www.prf.jcu.cz/systematics/morphotools.html .",2015-04-01 +30010792,AuTom-dualx: a toolkit for fully automatic fiducial marker-based alignment of dual-axis tilt series with simultaneous reconstruction.,"

Motivation

Dual-axis electron tomography is an important 3 D macro-molecular structure reconstruction technology, which can reduce artifacts and suppress the effect of missing wedge. However, the fully automatic data process for dual-axis electron tomography still remains a challenge due to three difficulties: (i) how to track the mass of fiducial markers automatically; (ii) how to integrate the information from the two different tilt series; and (iii) how to cope with the inconsistency between the two different tilt series.

Results

Here we develop a toolkit for fully automatic alignment of dual-axis electron tomography, with a simultaneous reconstruction procedure. The proposed toolkit and its workflow carries out the following solutions: (i) fully automatic detection and tracking of fiducial markers under large-field datasets; (ii) automatic combination of two different tilt series and global calibration of projection parameters; and (iii) inconsistency correction based on distortion correction parameters and the consequently simultaneous reconstruction. With all of these features, the presented toolkit can achieve accurate alignment and reconstruction simultaneously and conveniently under a single global coordinate system.

Availability and implementation

The toolkit AuTom-dualx (alignment module dualxmauto and reconstruction module volrec_mltm) are accessible for general application at http://ear.ict.ac.cn, and the key source code is freely available under request.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +30784433,Measure clinical drug-drug similarity using Electronic Medical Records.,"

Objective

Quantitative measurement of clinical drug-drug similarity has many potential applications in assessing medication therapy similarity and patient similarity. Currently, most of the methods to measure drug-drug similarity were not directly obtained from clinical data and cannot cover clinical drugs. We sought to propose a computational approach to measure clinical drug-drug similarity based on the Electronic Medical Record (EMR) system.

Materials and methods

We used the Bonferroni-corrected hypergeometric P value to generate statistically significant associations between drugs and diagnoses in an EMR dataset which contained 812 554 medication records and 339 269 discharge diagnosis codes. Then the Jaccard similarity coefficient was used to measure the distances between drugs. A k-means based bootstrapping method was proposed to generate drug clusters.

Results

The similarity matrix contains total 1210 clinical drugs used in the hospital was calculated. The clinical drug-drug similarity shows significant correlation with the chemical similarity of drugs and literature-based drug-drug similarity but with unique features. Based on this drug-drug similarity, 36 clinical drug clusters most of which were related to specific clinical conditions were generated. Detail of this drug clusters available at http://kb4md.org:4000/drugcluster.

Discussion

This method provided a whole new view of the relationship among clinical drugs. Furthermore, it has the potential to evaluate the effectiveness of drug knowledge translation and provide quantitative knowledge resources for many applications such as treatment comparisons and patient similarity.

Conclusion

We proposed a clinical drug-drug similarity measurement that generated from clinical practice data and covers all clinical drugs.",2019-02-11 +22080554,Disease Ontology: a backbone for disease semantic integration.,"The Disease Ontology (DO) database (http://disease-ontology.org) represents a comprehensive knowledge base of 8043 inherited, developmental and acquired human diseases (DO version 3, revision 2510). The DO web browser has been designed for speed, efficiency and robustness through the use of a graph database. Full-text contextual searching functionality using Lucene allows the querying of name, synonym, definition, DOID and cross-reference (xrefs) with complex Boolean search strings. The DO semantically integrates disease and medical vocabularies through extensive cross mapping and integration of MeSH, ICD, NCI's thesaurus, SNOMED CT and OMIM disease-specific terms and identifiers. The DO is utilized for disease annotation by major biomedical databases (e.g. Array Express, NIF, IEDB), as a standard representation of human disease in biomedical ontologies (e.g. IDO, Cell line ontology, NIFSTD ontology, Experimental Factor Ontology, Influenza Ontology), and as an ontological cross mappings resource between DO, MeSH and OMIM (e.g. GeneWiki). The DO project (http://diseaseontology.sf.net) has been incorporated into open source tools (e.g. Gene Answers, FunDO) to connect gene and disease biomedical data through the lens of human disease. The next iteration of the DO web browser will integrate DO's extended relations and logical definition representation along with these biomedical resource cross-mappings.",2011-11-12 +28013277,Enhancing the GABI-Kat Arabidopsis thaliana T-DNA Insertion Mutant Database by Incorporating Araport11 Annotation.,"SimpleSearch provides access to a database containing information about T-DNA insertion lines of the GABI-Kat collection of Arabidopsis thaliana mutants. These mutants are an important tool for reverse genetics, and GABI-Kat is the second largest collection of such T-DNA insertion mutants. Insertion sites were deduced from flanking sequence tags (FSTs), and the database contains information about mutant plant lines as well as insertion alleles. Here, we describe improvements within the interface (available at http://www.gabi-kat.de/db/genehits.php) and with regard to the database content that have been realized in the last five years. These improvements include the integration of the Araport11 genome sequence annotation data containing the recently updated A. thaliana structural gene descriptions, an updated visualization component that displays groups of insertions with very similar insertion positions, mapped confirmation sequences, and primers. The visualization component provides a quick way to identify insertions of interest, and access to improved data about the exact structure of confirmed insertion alleles. In addition, the database content has been extended by incorporating additional insertion alleles that were detected during the confirmation process, as well as by adding new FSTs that have been produced during continued efforts to complement gaps in FST availability. Finally, the current database content regarding predicted and confirmed insertion alleles as well as primer sequences has been made available as downloadable flat files.",2017-01-01 +28878802,Development of Highly Informative Genome-Wide Single Sequence Repeat Markers for Breeding Applications in Sesame and Construction of a Web Resource: SisatBase.,"The sequencing of the full nuclear genome of sesame (Sesamum indicum L.) provides the platform for functional analyses of genome components and their application in breeding programs. Although the importance of microsatellites markers or simple sequence repeats (SSR) in crop genotyping, genetics, and breeding applications is well established, only a little information exist concerning SSRs at the whole genome level in sesame. In addition, SSRs represent a suitable marker type for sesame molecular breeding in developing countries where it is mainly grown. In this study, we identified 138,194 genome-wide SSRs of which 76.5% were physically mapped onto the 13 pseudo-chromosomes. Among these SSRs, up to three primers pairs were supplied for 101,930 SSRs and used to in silico amplify the reference genome together with two newly sequenced sesame accessions. A total of 79,957 SSRs (78%) were polymorphic between the three genomes thereby suggesting their promising use in different genomics-assisted breeding applications. From these polymorphic SSRs, 23 were selected and validated to have high polymorphic potential in 48 sesame accessions from different growing areas of Africa. Furthermore, we have developed an online user-friendly database, SisatBase (http://www.sesame-bioinfo.org/SisatBase/), which provides free access to SSRs data as well as an integrated platform for functional analyses. Altogether, the reference SSR and SisatBase would serve as useful resources for genetic assessment, genomic studies, and breeding advancement in sesame, especially in developing countries.",2017-08-22 +28111364,TOMATOMICS: A Web Database for Integrated Omics Information in Tomato.,"Solanum lycopersicum (tomato) is an important agronomic crop and a major model fruit-producing plant. To facilitate basic and applied research, comprehensive experimental resources and omics information on tomato are available following their development. Mutant lines and cDNA clones from a dwarf cultivar, Micro-Tom, are two of these genetic resources. Large-scale sequencing data for ESTs and full-length cDNAs from Micro-Tom continue to be gathered. In conjunction with information on the reference genome sequence of another cultivar, Heinz 1706, the Micro-Tom experimental resources have facilitated comprehensive functional analyses. To enhance the efficiency of acquiring omics information for tomato biology, we have integrated the information on the Micro-Tom experimental resources and the Heinz 1706 genome sequence. We have also inferred gene structure by comparison of sequences between the genome of Heinz 1706 and the transcriptome, which are comprised of Micro-Tom full-length cDNAs and Heinz 1706 RNA-seq data stored in the KaFTom and Sequence Read Archive databases. In order to provide large-scale omics information with streamlined connectivity we have developed and maintain a web database TOMATOMICS (http://bioinf.mind.meiji.ac.jp/tomatomics/). In TOMATOMICS, access to the information on the cDNA clone resources, full-length mRNA sequences, gene structures, expression profiles and functional annotations of genes is available through search functions and the genome browser, which has an intuitive graphical interface.",2017-01-01 +29868852,LiveKraken--real-time metagenomic classification of illumina data.,"

Motivation

In metagenomics, Kraken is one of the most widely used tools due to its robustness and speed. Yet, the overall turnaround time of metagenomic analysis is hampered by the sequential paradigm of wet and dry lab. In urgent experiments, it can be crucial to gain a timely insight into a dataset.

Results

Here, we present LiveKraken, a real-time read classification tool based on the core algorithm of Kraken. LiveKraken uses streams of raw data from Illumina sequencers to classify reads taxonomically. This way, we are able to produce results identical to those of Kraken the moment the sequencer finishes. We are furthermore able to provide comparable results in early stages of a sequencing run, allowing saving up to a week of sequencing time on an Illumina HiSeq in High Throughput Mode. While the number of classified reads grows over time, false classifications appear in negligible numbers and proportions of identified taxa are only affected to a minor extent.

Availability and implementation

LiveKraken is available at https://gitlab.com/rki_bioinformatics/LiveKraken.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-11-01 +24029424,Proteome-wide measurement of protein half-lives and translation rates in vasopressin-sensitive collecting duct cells.,"Vasopressin regulates water excretion, in part, by controlling the abundances of the water channel aquaporin-2 (AQP2) protein and regulatory proteins in the renal collecting duct. To determine whether vasopressin-induced alterations in protein abundance result from modulation of protein production, protein degradation, or both, we used protein mass spectrometry with dynamic stable isotope labeling in cell culture to achieve a proteome-wide determination of protein half-lives and relative translation rates in mpkCCD cells. Measurements were made at steady state in the absence or presence of the vasopressin analog, desmopressin (dDAVP). Desmopressin altered the translation rate rather than the stability of most responding proteins, but it significantly increased both the translation rate and the half-life of AQP2. In addition, proteins associated with vasopressin action, including Mal2, Akap12, gelsolin, myosin light chain kinase, annexin-2, and Hsp70, manifested altered translation rates. Interestingly, desmopressin increased the translation of seven glutathione S-transferase proteins and enhanced protein S-glutathionylation, uncovering a previously unexplored vasopressin-induced post-translational modification. Additional bioinformatic analysis of the mpkCCD proteome indicated a correlation between protein function and protein half-life. In particular, processes that are rapidly regulated, such as transcription, endocytosis, cell cycle regulation, and ubiquitylation are associated with proteins with especially short half-lives. These data extend our understanding of the mechanisms underlying vasopressin signaling and provide a broad resource for additional investigation of collecting duct function (http://helixweb.nih.gov/ESBL/Database/ProteinHalfLives/index.html).",2013-09-12 +30668479,TargetDBP: Accurate DNA-Binding Protein Prediction Via Sequence-Based Multi-View Feature Learning.,"Accurately identifying DNA-binding proteins (DBPs) from protein sequence information is an important but challenging task for protein function annotations. In this paper, we establish a novel computational method, named TargetDBP, for accurately targeting DBPs from primary sequences. In TargetDBP, four single-view features, i.e., AAC (Amino Acid Composition), PsePSSM (Pseudo Position-Specific Scoring Matrix), PsePRSA (Pseudo Predicted Relative Solvent Accessibility), and PsePPDBS (Pseudo Predicted Probabilities of DNA-Binding Sites), are first extracted to represent different base features, respectively. Second, differential evolution algorithm is employed to learn the weights of four base features. Using the learned weights, we weightedly combine these base features to form the original super feature. An excellent subset of the super feature is then selected by using a suitable feature selection algorithm SVM-REF+CBR (Support Vector Machine Recursive Feature Elimination with Correlation Bias Reduction). Finally, the prediction model is learned via using support vector machine on the selected feature subset. We also construct a new gold-standard and non-redundant benchmark dataset from PDB database to evaluate and compare the proposed TargetDBP with other existing predictors. On this new dataset, TargetDBP can achieve higher performance than other state-of-the-art predictors. The TargetDBP web server and datasets are freely available at http://csbio.njust.edu.cn/bioinf/targetdbp/ for academic use.",2019-01-18 +30665783,How to prescribe a genetic test for the diagnosis of autoinflammatory diseases?,"The systemic autoinflammatory disorders (SAIDs) are associated with dysregulation of the innate immune system, affecting pro-inflammatory cytokines and apoptosis pathways. The spectrum of SAIDs continues to grow with over 30 different disorders identified to date. The main indication for genetic referral is when a patient presents with clinical symptoms consistent with one or more of the SAIDs. Thus, in making a referral for DNA screening, clinical information that supports the choice for screening of one or more SAIDs genes is required. Many of the SAIDs can display overlapping, partial or atypical symptoms, which makes the differential diagnosis extremely difficult and thus heavily dependent on genetic testing. Various attempts have been aimed at improving the efficiency of SAIDs diagnosis by proposing a set of clinical criteria to guide the genetic analysis of the SAIDs. In the last decade, due to application of the next-generation sequencing (NGS) the genetic diagnosis in patients with SAIDs have greatly improved; novel diseases and disease-associated genes have been identified and remarkable progress has been made in the genetic characterization of the undiagnosed patients and the sporadic cases. To date more than 800 variants have been recorded on the Infevers database, an online repository for DNA changes in genes associated with SAIDs (http://fmf.igh.cnrs.fr/ISSAID/infevers/). Recently, it has been updated with the new guidelines for classification of genetic variants pathogenicity in the in four most recognised SAIDs genes: MEFV, TNFRSF1A, NLRP3 and MVK.",2019-01-18 +22645317,dbCAN: a web resource for automated carbohydrate-active enzyme annotation.,"Carbohydrate-active enzymes (CAZymes) are very important to the biotech industry, particularly the emerging biofuel industry because CAZymes are responsible for the synthesis, degradation and modification of all the carbohydrates on Earth. We have developed a web resource, dbCAN (http://csbl.bmb.uga.edu/dbCAN/annotate.php), to provide a capability for automated CAZyme signature domain-based annotation for any given protein data set (e.g. proteins from a newly sequenced genome) submitted to our server. To accomplish this, we have explicitly defined a signature domain for every CAZyme family, derived based on the CDD (conserved domain database) search and literature curation. We have also constructed a hidden Markov model to represent the signature domain of each CAZyme family. These CAZyme family-specific HMMs are our key contribution and the foundation for the automated CAZyme annotation.",2012-05-29 +30658684,Development of a cardiac-centered frailty ontology.,"

Background

A Cardiac-centered Frailty Ontology can be an important foundation for using NLP to assess patient frailty. Frailty is an important consideration when making patient treatment decisions, particularly in older adults, those with a cardiac diagnosis, or when major surgery is a consideration. Clinicians often report patient's frailty in progress notes and other documentation. Frailty is recorded in many different ways in patient records and many different validated frailty-measuring instruments are available, with little consistency across instruments. We specifically explored concepts relevant to decisions regarding cardiac interventions. We based our work on text found in a large corpus of clinical notes from the Department of Veterans Affairs (VA) national Electronic Health Record (EHR) database.

Results

The full ontology has 156 concepts, with 246 terms. It includes 86 concepts we expect to find in clinical documents, with 12 qualifier values. The remaining 58 concepts represent hierarchical groups (e.g., physical function findings). Our top-level class is clinical finding, which has children clinical history finding, instrument finding, and physical examination finding, reflecting the OGMS definition of clinical finding. Instrument finding is any score found for the existing frailty instruments. Within our ontology, we used SNOMED-CT concepts where possible. Some of the 86 concepts we expect to find in clinical documents are associated with the properties like ability interpretation. The concept ability to walk can either be able, assisted or unable. Each concept-property level pairing gets a different frailty score. Each scored concept received three scores: a frailty score, a relevance to cardiac decisions score, and a likelihood of resolving after the recommended intervention score. The ontology includes the relationship between scores from ten frailty instruments and frailty as assessed using ontology concepts. It also included rules for mapping ontology elements to instrument items for three common frailty assessment instruments. Ontology elements are used in two clinical NLP systems.

Conclusions

We developed and validated a Cardiac-centered Frailty Ontology, which is a machine-interoperable description of frailty that reflects all the areas that clinicians consider when deciding which cardiac intervention will best serve the patient as well as frailty indications generally relevant to medical decisions. The ontology owl file is available on Bioportal at http://bioportal.bioontology.org/ontologies/CCFO .",2019-01-18 +21435384,HOCTAR database: a unique resource for microRNA target prediction.,"microRNAs (miRNAs) are the most abundant class of small RNAs in mammals. They play an important role in regulation of gene expression by inducing mRNA cleavage or translational inhibition. Each miRNA targets an average of 100-200 genes by binding, preferentially, to their 3' UTRs by means of partial sequence complementarity. Most miRNAs are localized within transcriptional units, termed host genes, and show similar expression behavior with respect to their corresponding host genes. Considering the impact of miRNA in the regulation of gene expression and their involvement in a growing number of human disorders, it is vital to develop sensitive computational approaches able to identify miRNA target genes. The HOCTAR database (db) is a publicly available resource collecting ranked list of predicted target genes for 290 intragenic miRNAs annotated in human. HOCTARdb is a unique resource that integrates miRNA target prediction genes and transcriptomic data to score putative miRNA targets looking at the expression behavior of their host genes. We demonstrated, by testing 135 known validated target genes (either at the translational or transcriptional level) for different miRNAs, that the miRNA target prediction lists present in HOCTARdb are highly reliable. Moreover, HOCTARdb associates biological roles to each miRNA-controlled transcriptional network by means of Gene Ontology analysis. This information is easily accessible through a user-friendly query page. The HOCTARdb is available at http://hoctar.tigem.it/. We believe that a detailed relationship between miRNAs and their target genes and a constant update of the information contained in HOCTARdb will provide an extremely valuable resource to assist the researcher in the discovery of miRNA target genes.",2011-03-22 +25957785,"Transdisciplinary synthesis for ecosystem science, policy and management: The Australian experience.","Mitigating the environmental effects of global population growth, climatic change and increasing socio-ecological complexity is a daunting challenge. To tackle this requires synthesis: the integration of disparate information to generate novel insights from heterogeneous, complex situations where there are diverse perspectives. Since 1995, a structured approach to inter-, multi- and trans-disciplinary(1) collaboration around big science questions has been supported through synthesis centres around the world. These centres are finding an expanding role due to ever-accumulating data and the need for more and better opportunities to develop transdisciplinary and holistic approaches to solve real-world problems. The Australian Centre for Ecological Analysis and Synthesis (ACEAS ) has been the pioneering ecosystem science synthesis centre in the Southern Hemisphere. Such centres provide analysis and synthesis opportunities for time-pressed scientists, policy-makers and managers. They provide the scientific and organisational environs for virtual and face-to-face engagement, impetus for integration, data and methodological support, and innovative ways to deliver synthesis products. We detail the contribution, role and value of synthesis using ACEAS to exemplify the capacity for synthesis centres to facilitate trans-organisational, transdisciplinary synthesis. We compare ACEAS to other international synthesis centres, and describe how it facilitated project teams and its objective of linking natural resource science to policy to management. Scientists and managers were brought together to actively collaborate in multi-institutional, cross-sectoral and transdisciplinary research on contemporary ecological problems. The teams analysed, integrated and synthesised existing data to co-develop solution-oriented publications and management recommendations that might otherwise not have been produced. We identify key outcomes of some ACEAS working groups which used synthesis to tackle important ecosystem challenges. We also examine the barriers and enablers to synthesis, so that risks can be minimised and successful outcomes maximised. We argue that synthesis centres have a crucial role in developing, communicating and using synthetic transdisciplinary research.",2015-05-06 +24030781,IQdb: an intelligence quotient score-associated gene resource for human intelligence.,"Intelligence quotient (IQ) is the most widely used phenotype to characterize human cognitive abilities. Recent advances in studies on human intelligence have identified many new susceptibility genes. However, the genetic mechanisms involved in IQ score and the relationship between IQ score and the risk of mental disorders have won little attention. To address the genetic complexity of IQ score, we have developed IQdb (http://IQdb.cbi.pku.edu.cn), a publicly available database for exploring IQ-associated human genes. In total, we collected 158 experimental verified genes from literature as a core dataset in IQdb. In addition, 46 genomic regions related to IQ score have been curated from literature. Based on the core dataset and 46 confirmed linked genomic regions, more than 6932 potential IQ-related genes are expanded using data of protein-protein interactions. A systematic gene ranking approach was applied to all the collected and expanded genes to represent the relative importance of all the 7090 genes in IQdb. Our further systematic pathway analysis reveals that IQ-associated genes are significantly enriched in multiple signal events, especially related to cognitive systems. Of the 158 genes in the core dataset, 81 are involved in various psychotic and mental disorders. This comprehensive gene resource illustrates the importance of IQdb to our understanding on human intelligence, and highlights the utility of IQdb for elucidating the functions of IQ-associated genes and the cross-talk mechanisms among cognition-related pathways in some mental disorders for community. Database URL: http://IQdb.cbi.pku.edu.cn.",2013-09-11 +23180788,SomamiR: a database for somatic mutations impacting microRNA function in cancer.,"Whole-genome sequencing of cancers has begun to identify thousands of somatic mutations that distinguish the genomes of normal tissues from cancers. While many germline mutations within microRNAs (miRNAs) and their targets have been shown to alter miRNA function in cancers and have been associated with cancer risk, the impact of somatic mutations on miRNA function has received relatively little attention. Here, we have created the SomamiR database (http://compbio.uthsc.edu/SomamiR/) to provide a comprehensive resource that integrates several types of data for use in investigating the impact of somatic and germline mutations on miRNA function in cancer. The database contains somatic mutations that may create or disrupt miRNA target sites and integrates these somatic mutations with germline mutations within the same target sites, genome-wide and candidate gene association studies of cancer and functional annotations that link genes containing mutations with cancer. Additionally, the database contains a collection of germline and somatic mutations in miRNAs and their targets that have been experimentally shown to impact miRNA function and have been associated with cancer.",2012-11-24 +28383703,mCSM-NA: predicting the effects of mutations on protein-nucleic acids interactions.,"Over the past two decades, several computational methods have been proposed to predict how missense mutations can affect protein structure and function, either by altering protein stability or interactions with its partners, shedding light into potential molecular mechanisms giving rise to different phenotypes. Effectively and efficiently predicting consequences of mutations on protein-nucleic acid interactions, however, remained until recently a great and unmet challenge. Here we report an updated webserver for mCSM-NA, the only scalable method we are aware of capable of quantitatively predicting the effects of mutations in protein coding regions on nucleic acid binding affinities. We have significantly enhanced the original method by including a pharmacophore modelling and information of nucleic acid properties into our graph-based signatures, considering the reverse mutation and by using a refined, more reliable data set, based on a new release of the ProNIT database, which has significantly improved the reliability and applicability of the methodology. Our new predictive model was capable of achieving a correlation coefficient of up to 0.70 on cross-validation and 0.68 on blind-tests, outperforming its previous version. The server is freely available via a user-friendly web interface at: http://structure.bioc.cam.ac.uk/mcsm_na.",2017-07-01 +22135296,VectorBase: improvements to a bioinformatics resource for invertebrate vector genomics.,"VectorBase (http://www.vectorbase.org) is a NIAID-supported bioinformatics resource for invertebrate vectors of human pathogens. It hosts data for nine genomes: mosquitoes (three Anopheles gambiae genomes, Aedes aegypti and Culex quinquefasciatus), tick (Ixodes scapularis), body louse (Pediculus humanus), kissing bug (Rhodnius prolixus) and tsetse fly (Glossina morsitans). Hosted data range from genomic features and expression data to population genetics and ontologies. We describe improvements and integration of new data that expand our taxonomic coverage. Releases are bi-monthly and include the delivery of preliminary data for emerging genomes. Frequent updates of the genome browser provide VectorBase users with increasing options for visualizing their own high-throughput data. One major development is a new population biology resource for storing genomic variations, insecticide resistance data and their associated metadata. It takes advantage of improved ontologies and controlled vocabularies. Combined, these new features ensure timely release of multiple types of data in the public domain while helping overcome the bottlenecks of bioinformatics and annotation by engaging with our user community.",2011-12-01 +27377064,Retrovirus Integration Database (RID): a public database for retroviral insertion sites into host genomes.,"

Unlabelled

The NCI Retrovirus Integration Database is a MySql-based relational database created for storing and retrieving comprehensive information about retroviral integration sites, primarily, but not exclusively, HIV-1. The database is accessible to the public for submission or extraction of data originating from experiments aimed at collecting information related to retroviral integration sites including: the site of integration into the host genome, the virus family and subtype, the origin of the sample, gene exons/introns associated with integration, and proviral orientation. Information about the references from which the data were collected is also stored in the database. Tools are built into the website that can be used to map the integration sites to UCSC genome browser, to plot the integration site patterns on a chromosome, and to display provirus LTRs in their inserted genome sequence. The website is robust, user friendly, and allows users to query the database and analyze the data dynamically.

Availability

https://rid.ncifcrf.gov ; or http://home.ncifcrf.gov/hivdrp/resources.htm .",2016-07-04 +30863512,Natural hazard mitigation strategies review: Actor-network theory and the eco-based approach understanding in Zimbabwe.,"This paper presents the literature reviewed on the evolution of the natural hazard mitigation perspective and an overview of its progression to date. The article uses information taken from diverse sources such as a globally accepted scientific databases Google Scholar (http://www.scholar.google.co.in), Scopus (http://www.scopus.com), Science Direct (http://www.sciencedirect.com), SpringerLink (http://www.springer.co.in) and Wiley (http://www.onlinelibrary.wiley.com); conference proceedings; theses; abstracts; and impact and non-indexed journals. It demonstrates how the actor-network theory (ANT) theoretical framework can be applicable to Muzarabani in Zimbabwe as a tool for analysing and elaborating hazard mitigation strategies. Actor-network theory is gradually becoming influential but is still a bone of contention, mainly because of its radical approach. Actor-network theory treats humans and non-humans as equal actors. In spite of its limitations, studies have shown that an ANT-grounded approach is useful in providing a framework for the comprehension of the complexities of daily life during natural hazard episodes and the dynamic role of Ziziphus mauritiana in the network in Muzarabani, Zimbabwe. The theory can demonstrate its importance in respect of how social results are produced as a result of linkages among diverse actors (human and non-human) in a network. The article argues that if ANT is used logically it is useful in examining eco-based natural hazard mitigation and resilience approaches in semi-arid regions.",2019-01-16 +22064863,EcoliWiki: a wiki-based community resource for Escherichia coli.,"EcoliWiki is the community annotation component of the PortEco (http://porteco.org; formerly EcoliHub) project, an online data resource that integrates information on laboratory strains of Escherichia coli, its phages, plasmids and mobile genetic elements. As one of the early adopters of the wiki approach to model organism databases, EcoliWiki was designed to not only facilitate community-driven sharing of biological knowledge about E. coli as a model organism, but also to be interoperable with other data resources. EcoliWiki content currently covers genes from five laboratory E. coli strains, 21 bacteriophage genomes, F plasmid and eight transposons. EcoliWiki integrates the Mediawiki wiki platform with other open-source software tools and in-house software development to extend how wikis can be used for model organism databases. EcoliWiki can be accessed online at http://ecoliwiki.net.",2011-11-07 +28260517,CAPi: Computational Model for Apicoplast Inhibitors Prediction Against Plasmodium Parasite.,"

Background

Discovery of apicoplast as a drug target offers a new direction in the development of novel anti-malarial compounds, especially against the drug-resistant strains. Drugs such as azithromycin were reported to block the apicoplast development that leads to unusual phenotypes affecting the parasite. This phenomenon suggests that identification of new apicoplast inhibitors will aid in the anti-malarial drug discovery. Therefore, in this study, we developed a computational model to predict apicoplast inhibitors by applying state-of-the-art machine learning techniques.

Methods

We have used two high-throughput chemical screening data (AID-504850, AID-504848) from PubChem BioAssay database and applied machine learning techniques. The performance of the models were assessed on various types of binary fingerprints.

Results

In this study, we developed a robust computational algorithm for the prediction of apicoplast inhibition. We observed 73.7% sensitivity and 84% specificity along with 81.4% accuracy rate only on 41 PubChem fingerprints on 48 hrs dataset. Similarly, an accuracy rate of 75.8% was observed for 96 hrs dataset. Additionally, we observed that our model has ~70% positive prediction rate on the independent dataset obtained from ChEMBL-NTD database. Furthermore, the fingerprint analysis suggested that compounds with at least one heteroatom containing hexagonal ring would most likely belong to the antimalarial category as compared to simple aliphatic compounds. We also observed that aromatic compounds with oxygen and chlorine atoms were preferred in inhibitors class as compared to sulphur. Additionally, the compounds with average molecular weight >380Da and XlogP>4 were most likely to belong to the inhibitor category.

Conclusion

This study highlighted the significance of simple interpretable molecular properties along with some preferred substructure in designing the novel anti-malarial compounds. In addition to that, robustness and accuracy of models developed in the present work could be utilized to screen a large chemical library. Based on this study, we developed freely available software at http://deepaklab. com/capi. This study would provide the best alternative for searching the novel apicoplast inhibitors against Plasmodium.",2017-11-01 +22140101,MethylomeDB: a database of DNA methylation profiles of the brain.,"MethylomeDB (http://epigenomics.columbia.edu/methylomedb/index.html) is a new database containing genome-wide brain DNA methylation profiles. DNA methylation is an important epigenetic mark in the mammalian brain. In human studies, aberrant DNA methylation alterations have been associated with various neurodevelopmental and neuropsychiatric disorders such as schizophrenia, and depression. In this database, we present methylation profiles of carefully selected non-psychiatric control, schizophrenia, and depression samples. We also include data on one mouse forebrain sample specimen to allow for cross-species comparisons. In addition to our DNA methylation data generated in-house, we have and will continue to include published DNA methylation data from other research groups with the focus on brain development and function. Users can view the methylation data at single-CpG resolution with the option of wiggle and microarray formats. They can also download methylation data for individual samples. MethylomeDB offers an important resource for research into brain function and behavior. It provides the first source of comprehensive brain methylome data, encompassing whole-genome DNA methylation profiles of human and mouse brain specimens that facilitate cross-species comparative epigenomic investigations, as well as investigations of schizophrenia and depression methylomes.",2011-12-02 +27388620,"The Minho Word Pool: Norms for imageability, concreteness, and subjective frequency for 3,800 Portuguese words.","Words are widely used as stimuli in cognitive research. Because of their complexity, using words requires strict control of their objective (lexical and sublexical) and subjective properties. In this work, we present the Minho Word Pool (MWP), a dataset that provides normative values of imageability, concreteness, and subjective frequency for 3,800 (European) Portuguese words-three subjective measures that, in spite of being used extensively in research, have been scarce for Portuguese. Data were collected with 2,357 college students who were native speakers of European Portuguese. The participants rated 100 words drawn randomly from the full set for each of the three subjective indices, using a Web survey procedure (via a URL link). Analyses comparing the MWP ratings with those obtained for the same words from other national and international databases showed that the MWP norms are reliable and valid, thus providing researchers with a useful tool to support research in all neuroscientific areas using verbal stimuli. The MWP norms can be downloaded along with this article or from http://p-pal.di.uminho.pt/about/databases .",2017-06-01 +28701780,High-throughput Identification and Characterization of Two-dimensional Materials using Density functional theory.,"We introduce a simple criterion to identify two-dimensional (2D) materials based on the comparison between experimental lattice constants and lattice constants mainly obtained from Materials-Project (MP) density functional theory (DFT) calculation repository. Specifically, if the relative difference between the two lattice constants for a specific material is greater than or equal to 5%, we predict them to be good candidates for 2D materials. We have predicted at least 1356 such 2D materials. For all the systems satisfying our criterion, we manually create single layer systems and calculate their energetics, structural, electronic, and elastic properties for both the bulk and the single layer cases. Currently the database consists of 1012 bulk and 430 single layer materials, of which 371 systems are common to bulk and single layer. The rest of calculations are underway. To validate our criterion, we calculated the exfoliation energy of the suggested layered materials, and we found that in 88.9% of the cases the currently accepted criterion for exfoliation was satisfied. Also, using molybdenum telluride as a test case, we performed X-ray diffraction and Raman scattering experiments to benchmark our calculations and understand their applicability and limitations. The data is publicly available at the website http://www.ctcms.nist.gov/~knc6/JVASP.html.",2017-07-12 +31337332,Soybean-VCF2Genomes: a database to identify the closest accession in soybean germplasm collection.,"

Background

The development of next generation sequencer (NGS) and the analytical methods allowed the researchers to profile their samples more precisely and easier than before. Especially for agriculture, the certification of the genomic background of their plant materials would be important for the reliability of seed market and stable yield as well as for quarantine procedure. However, the analysis of NGS data is still difficult for non-computational researchers or breeders to verify their samples because majority of current softwares for NGS analysis require users to access unfamiliar Linux environment.

Main body

Here, we developed a web-application, ""Soybean-VCF2Genomes"", http://pgl.gnu.ac.kr/soy_vcf2genome/ to map single sample variant call format (VCF) file against known soybean germplasm collection for identification of the closest soybean accession. Based on principal component analysis (PCA), we simplified genotype matrix for lowering computational burden while maintaining accurate clustering. With our web-application, users can simply upload single sample VCF file created by more than 10x resequencing strategy to find the closest samples along with linkage dendrogram of the reference genotype matrix.

Conclusion

The information of the closest soybean cultivar will allow breeders to estimate relative germplasmic position of their query sample to determine soybean breeding strategies. Moreover, our VCF2Genomes scheme can be extended to other plant species where the whole genome sequences of core collection are publicly available.",2019-07-24 +26989153,High-performance integrated virtual environment (HIVE): a robust infrastructure for next-generation sequence data analysis. ,"The High-performance Integrated Virtual Environment (HIVE) is a distributed storage and compute environment designed primarily to handle next-generation sequencing (NGS) data. This multicomponent cloud infrastructure provides secure web access for authorized users to deposit, retrieve, annotate and compute on NGS data, and to analyse the outcomes using web interface visual environments appropriately built in collaboration with research and regulatory scientists and other end users. Unlike many massively parallel computing environments, HIVE uses a cloud control server which virtualizes services, not processes. It is both very robust and flexible due to the abstraction layer introduced between computational requests and operating system processes. The novel paradigm of moving computations to the data, instead of moving data to computational nodes, has proven to be significantly less taxing for both hardware and network infrastructure.The honeycomb data model developed for HIVE integrates metadata into an object-oriented model. Its distinction from other object-oriented databases is in the additional implementation of a unified application program interface to search, view and manipulate data of all types. This model simplifies the introduction of new data types, thereby minimizing the need for database restructuring and streamlining the development of new integrated information systems. The honeycomb model employs a highly secure hierarchical access control and permission system, allowing determination of data access privileges in a finely granular manner without flooding the security subsystem with a multiplicity of rules. HIVE infrastructure will allow engineers and scientists to perform NGS analysis in a manner that is both efficient and secure. HIVE is actively supported in public and private domains, and project collaborations are welcomed. Database URL: https://hive.biochemistry.gwu.edu.",2016-03-17 +30997504,AutoMLST: an automated web server for generating multi-locus species trees highlighting natural product potential.,"Understanding the evolutionary background of a bacterial isolate has applications for a wide range of research. However generating an accurate species phylogeny remains challenging. Reliance on 16S rDNA for species identification currently remains popular. Unfortunately, this widespread method suffers from low resolution at the species level due to high sequence conservation. Currently, there is now a wealth of genomic data that can be used to yield more accurate species designations via modern phylogenetic methods and multiple genetic loci. However, these often require extensive expertise and time. The Automated Multi-Locus Species Tree (autoMLST) was thus developed to provide a rapid 'one-click' pipeline to simplify this workflow at: https://automlst.ziemertlab.com. This server utilizes Multi-Locus Sequence Analysis (MLSA) to produce high-resolution species trees; this does not preform multi-locus sequence typing (MLST), a related classification method. The resulting phylogenetic tree also includes helpful annotations, such as species clade designations and secondary metabolite counts to aid natural product prospecting. Distinct from currently available web-interfaces, autoMLST can automate selection of reference genomes and out-group organisms based on one or more query genomes. This enables a wide range of researchers to perform rigorous phylogenetic analyses more rapidly compared to manual MLSA workflows.",2019-07-01 +31260503,GAIL: An interactive webserver for inference and dynamic visualization of gene-gene associations based on gene ontology guided mining of biomedical literature.,"In systems biology, inference of functional associations among genes is compelling because the construction of functional association networks facilitates biomarker discovery. Specifically, such gene associations in human can help identify putative biomarkers that can be used as diagnostic tools in treating patients. Although biomedical literature is considered a valuable data source for this task, currently only a limited number of webservers are available for mining gene-gene associations from the vast amount of biomedical literature using text mining techniques. Moreover, these webservers often have limited coverage of biomedical literature and also lack efficient and user-friendly tools to interpret and visualize mined relationships among genes. To address these limitations, we developed GAIL (Gene-gene Association Inference based on biomedical Literature), an interactive webserver that infers human gene-gene associations from Gene Ontology (GO) guided biomedical literature mining and provides dynamic visualization of the resulting association networks and various gene set enrichment analysis tools. We evaluate the utility and performance of GAIL with applications to gene signatures associated with systemic lupus erythematosus and breast cancer. Results show that GAIL allows effective interrogation and visualization of gene-gene networks and their subnetworks, which facilitates biological understanding of gene-gene associations. GAIL is available at http://chunglab.io/GAIL/.",2019-07-01 +30418649,Exploring and Visualizing Spaces of Tree Reconciliations.,"Tree reconciliation is the mathematical tool that is used to investigate the coevolution of organisms, such as hosts and parasites. A common approach to tree reconciliation involves specifying a model that assigns costs to certain events, such as cospeciation, and then tries to find a mapping between two specified phylogenetic trees which minimizes the total cost of the implied events. For such models, it has been shown that there may be a huge number of optimal solutions, or at least solutions that are close to optimal. It is therefore of interest to be able to systematically compare and visualize whole collections of reconciliations between a specified pair of trees. In this article, we consider various metrics on the set of all possible reconciliations between a pair of trees, some that have been defined before but also new metrics that we shall propose. We show that the diameter for the resulting spaces of reconciliations can in some cases be determined theoretically, information that we use to normalize and compare properties of the metrics. We also implement the metrics and compare their behavior on several host parasite data sets, including the shapes of their distributions. In addition, we show that in combination with multidimensional scaling, the metrics can be useful for visualizing large collections of reconciliations, much in the same way as phylogenetic tree metrics can be used to explore collections of phylogenetic trees. Implementations of the metrics can be downloaded from: https://team.inria.fr/erable/en/team-members/blerina-sinaimeri/reconciliation-distances/.",2019-07-01 +26527719,CEGA--a catalog of conserved elements from genomic alignments.,"By identifying genomic sequence regions conserved among several species, comparative genomics offers opportunities to discover putatively functional elements without any prior knowledge of what these functions might be. Comparative analyses across mammals estimated 4-5% of the human genome to be functionally constrained, a much larger fraction than the 1-2% occupied by annotated protein-coding or RNA genes. Such functionally constrained yet unannotated regions have been referred to as conserved non-coding sequences (CNCs) or ultra-conserved elements (UCEs), which remain largely uncharacterized but probably form a highly heterogeneous group of elements including enhancers, promoters, motifs, and others. To facilitate the study of such CNCs/UCEs, we present our resource of Conserved Elements from Genomic Alignments (CEGA), accessible from http://cega.ezlab.org. Harnessing the power of multiple species comparisons to detect genomic elements under purifying selection, CEGA provides a comprehensive set of CNCs identified at different radiations along the vertebrate lineage. Evolutionary constraint is identified using threshold-free phylogenetic modeling of unbiased and sensitive global alignments of genomic synteny blocks identified using protein orthology. We identified CNCs independently for five vertebrate clades, each referring to a different last common ancestor and therefore to an overlapping but varying set of CNCs with 24 488 in vertebrates, 241 575 in amniotes, 709 743 in Eutheria, 642 701 in Boreoeutheria and 612 364 in Euarchontoglires, spanning from 6 Mbp in vertebrates to 119 Mbp in Euarchontoglires. The dynamic CEGA web interface displays alignments, genomic locations, as well as biologically relevant data to help prioritize and select CNCs of interest for further functional investigations.",2015-11-02 +29739837,NvERTx: a gene expression database to compare embryogenesis and regeneration in the sea anemone Nematostella vectensis. ,"For over a century, researchers have been comparing embryogenesis and regeneration hoping that lessons learned from embryonic development will unlock hidden regenerative potential. This problem has historically been a difficult one to investigate because the best regenerative model systems are poor embryonic models and vice versa. Recently, however, there has been renewed interest in this question, as emerging models have allowed researchers to investigate these processes in the same organism. This interest has been further fueled by the advent of high-throughput transcriptomic analyses that provide virtual mountains of data. Here, we present Nematostella vectensis Embryogenesis and Regeneration Transcriptomics (NvERTx), a platform for comparing gene expression during embryogenesis and regeneration. NvERTx consists of close to 50 transcriptomic data sets spanning embryogenesis and regeneration in Nematostella These data were used to perform a robust de novo transcriptome assembly, with which users can search, conduct BLAST analyses, and plot the expression of multiple genes during these two developmental processes. The site is also home to the results of gene clustering analyses, to further mine the data and identify groups of co-expressed genes. The site can be accessed at http://nvertx.kahikai.org.",2018-05-17 +31045209,SeqTailor: a user-friendly webserver for the extraction of DNA or protein sequences from next-generation sequencing data.,"Human whole-genome-sequencing reveals about 4 000 000 genomic variants per individual. These data are mostly stored as VCF-format files. Although many variant analysis methods accept VCF as input, many other tools require DNA or protein sequences, particularly for splicing prediction, sequence alignment, phylogenetic analysis, and structure prediction. However, there is no existing webserver capable of extracting DNA/protein sequences for genomic variants from VCF files in a user-friendly and efficient manner. We developed the SeqTailor webserver to bridge this gap, by enabling rapid extraction of (i) DNA sequences around genomic variants, with customizable window sizes and options to annotate the splice sites closest to the variants and to consider the neighboring variants within the window; and (ii) protein sequences encoded by the DNA sequences around genomic variants, with built-in SnpEff annotator and customizable window sizes. SeqTailor supports 11 species, including: human (GRCh37/GRCh38), chimpanzee, mouse, rat, cow, chicken, lizard, zebrafish, fruitfly, Arabidopsis and rice. Standalone programs are provided for command-line-based needs. SeqTailor streamlines the sequence extraction process, and accelerates the analysis of genomic variants with software requiring DNA/protein sequences. It will facilitate the study of genomic variation, by increasing the feasibility of sequence-based analysis and prediction. The SeqTailor webserver is freely available at http://shiva.rockefeller.edu/SeqTailor/.",2019-07-01 +27226753,"Capturing biodiversity: linking a cyanobacteria culture collection to the ""scratchpads"" virtual research environment enhances biodiversity knowledge.","

Background

Currently, cyanobacterial diversity is examined using a polyphasic approach by assessing morphological and molecular data (Komárek 2015). However, the comparison of morphological and genetic data is sometimes hindered by the lack of cultures of several cyanobacterial morphospecies and inadequate morphological data of sequenced strains (Rajaniemi et al. 2005). Furthermore, in order to evaluate the phenotypic plasticity within defined taxa, the variability observed in cultures has to be compared to the range in natural variation (Komárek and Mareš 2012). Thus, new tools are needed to aggregate, link and process data in a meaningful way, in order to properly study and understand cyanodiversity.

New information

An online database on cyanobacteria has been created, namely the Cyanobacteria culture collection (CCC) (http://cyanobacteria.myspecies.info/) using as case studies cyanobacterial strains isolated from lakes of Greece, which are part of the AUTH culture collection (School of Biology, Aristotle University of Thessaloniki). The database hosts, for the first time, information and data such as morphology/morphometry, biogeography, phylogeny, microphotographs, distribution maps, toxicology and biochemical traits of the strains. All this data are structured managed, and presented online and are publicly accessible with a recently developed tool, namely ""Scratchpads"", a taxon-centric virtual research environment allowing browsing the taxonomic classification and retrieving various kinds of relevant information for each taxon.",2016-04-06 +25435546,Plant-PrAS: a database of physicochemical and structural properties and novel functional regions in plant proteomes.,"Arabidopsis thaliana is an important model species for studies of plant gene functions. Research on Arabidopsis has resulted in the generation of high-quality genome sequences, annotations and related post-genomic studies. The amount of annotation, such as gene-coding regions and structures, is steadily growing in the field of plant research. In contrast to the genomics resource of animals and microorganisms, there are still some difficulties with characterization of some gene functions in plant genomics studies. The acquisition of information on protein structure can help elucidate the corresponding gene function because proteins encoded in the genome possess highly specific structures and functions. In this study, we calculated multiple physicochemical and secondary structural parameters of protein sequences, including length, hydrophobicity, the amount of secondary structure, the number of intrinsically disordered regions (IDRs) and the predicted presence of transmembrane helices and signal peptides, using a total of 208,333 protein sequences from the genomes of six representative plant species, Arabidopsis thaliana, Glycine max (soybean), Populus trichocarpa (poplar), Oryza sativa (rice), Physcomitrella patens (moss) and Cyanidioschyzon merolae (alga). Using the PASS tool and the Rosetta Stone method, we annotated the presence of novel functional regions in 1,732 protein sequences that included unannotated sequences from the Arabidopsis and rice proteomes. These results were organized into the Plant Protein Annotation Suite database (Plant-PrAS), which can be freely accessed online at http://plant-pras.riken.jp/.",2014-11-29 +22095872,Prediction of protein secondary structure from circular dichroism using theoretically derived spectra.,"Circular dichroism (CD) is a spectroscopic technique commonly used to investigate the structure of proteins. Major secondary structure types, alpha-helices and beta-strands, produce distinctive CD spectra. Thus, by comparing the CD spectrum of a protein of interest to a reference set consisting of CD spectra of proteins of known structure, predictive methods can estimate the secondary structure of the protein. Currently available methods, including K2D2, use such experimental CD reference sets, which are very small in size when compared to the number of tertiary structures available in the Protein Data Bank (PDB). Conversely, given a PDB structure, it is possible to predict a theoretical CD spectrum from it. The methodological framework for this calculation was established long ago but only recently a convenient implementation called DichroCalc has been developed. In this study, we set to determine whether theoretically derived spectra could be used as reference set for accurate CD based predictions of secondary structure. We used DichroCalc to calculate the theoretical CD spectra of a nonredundant set of structures representing most proteins in the PDB, and applied a straightforward approach for predicting protein secondary structure content using these theoretical CD spectra as reference set. We show that this method improves the predictions, particularly for the wavelength interval between 200 and 240 nm and for beta-strand content. We have implemented this method, called K2D3, in a publicly accessible web server at http://www. ogic.ca/projects/k2d3.",2011-11-17 +31647026,Tumor immune microenvironment and genomic evolution in a patient with metastatic triple negative breast cancer and a complete response to atezolizumab.,"BACKGROUND:Metastatic TNBC (mTNBC) has a poor prognosis and few treatment options. The anti-PD-L1 antibody atezolizumab demonstrated clinical activity in mTNBC patients with PD-L1-positive tumor-infiltrating immune cells. The current study describes the tumor immune microenvironment (TiME) and genomic evolution across sequential therapies in a patient with a 31-year history of TNBC and a complete response (CR) to atezolizumab monotherapy. MATERIALS AND METHODS:In 1986, the patient had surgery and radiotherapy (XRT) for newly diagnosed TNBC, followed by surgery and adjuvant chemotherapy for two locoregional recurrences. She developed mTNBC in 2009 and was sequentially treated with capecitabine, gemcitabine-carboplatin-iniparib (GCI), XRT and an experimental vaccine. She experienced disease progression (PD) to all these therapies. In 2013, she had a PD-L1 positive tumor and enrolled in a phase 1 atezolizumab monotherapy study (PCD4989g; NCT01375842). She received atezolizumab for 1 year with initial pseudo-progression followed by a partial response. After 1 year without treatment she experienced PD, reinitiated atezolizumab and subsequently achieved CR. Tumor specimens were collected at numerous times between 2008 and 2015 and assessed by immunohistochemistry, RNA-seq and DNA-seq. RESULTS:TiME biomarkers, including CD8, ICs and PD-L1 on IC, increased after capecitabine and remained high after GCI, XRT and through pseudo-progression on atezolizumab. At PD post-atezolizumab exposure, TiME biomarkers decreased but PD-L1 status remained positive. Immune-related RNA signatures confirmed these findings. TNBC subtyping revealed evolution from luminal androgen receptor (LAR) to basal-like immune activated (BLIA). Genomic profiling showed truncal alterations in RB1 and TP53, while the presence of other genomic alterations varied over time. Tumor mutational burden peaked after XRT and declined after atezolizumab exposure. CONCLUSIONS:This case report describes the evolution of TiME and TNBC molecular subtypes/genomics over time with sequential therapies in a TNBC patient with a CR to atezolizumab monotherapy. These data suggest the TiME is pliable and may be manipulated to maximize response to immunotherapy (NCT01375842, https://clinicaltrials.gov/ct2/show/NCT01375842?term=NCT01375842&rank=1 ).",2019-10-23 +31095432,The Association between Long-Term Air Pollution and Urinary Catecholamines: Evidence from the Multi-Ethnic Study of Atherosclerosis.,"

Background

Autonomic nervous system effects have been hypothesized as a mechanism of air pollutant health effects, though scant prior epidemiologic research has examined the association between air pollutants and catecholamines.

Objectives

To examine the association of long-term air pollutants with three urinary catecholamines: dopamine (DA), epinephrine (EPI), and norepinephrine (NE). As a secondary aim, we also examined the association between short-term (or acute) exposure to fine particulate matter [particulate matter with aerodynamic diameter [Formula: see text] ([Formula: see text])] and those catecholamines.

Methods

We used data from the Multi-Ethnic Study of Atherosclerosis (MESA) and two of its ancillary studies, the MESA Air Pollution Study and the MESA Stress Study, to provide exposure and outcome data. DA, EPI, and NE from urine samples were collected from 2004 to 2006 from 1,002 participants in the New York, New York, and Los Angeles, California, study sites. Spatiotemporal models incorporated cohort-specific monitoring and estimated annual average pollutant concentrations ([Formula: see text], [Formula: see text], [Formula: see text] and black carbon) at participants' homes the year prior to urine collection. Secondarily, short-term [Formula: see text] was evaluated (day of, day prior, and 2- to 5-d lags prior to urine collection). Several covariates were considered confounders (age, race, sex, site, socioeconomic status, cardiovascular disease risk factors, psychosocial stressors, and medication use) in linear regression models.

Results

A [Formula: see text] higher annual [Formula: see text] concentration was associated with 6.3% higher mean EPI level [95% confidence interval (CI): 0.3%, 12.6%]. A 2-[Formula: see text] higher annual ambient [Formula: see text] concentration was associated with 9.1% higher mean EPI (95% CI: 3.2%, 15.3%) and 4.4% higher DA level (95% CI: 1%, 7.9%). [Formula: see text], black carbon, and short-term [Formula: see text] exposures were not significantly associated with any of the catecholamines.

Conclusions

We found an association between EPI and long-term concentrations of [Formula: see text] and [Formula: see text] and an association between DA and long-term ambient [Formula: see text]. These novel findings provide modest support for the hypothesis that air pollutant exposures are related to sympathetic nervous system activation. https://doi.org/10.1289/EHP3286.",2019-05-01 +25435547,MPIC: a mitochondrial protein import components database for plant and non-plant species.,"In the 2 billion years since the endosymbiotic event that gave rise to mitochondria, variations in mitochondrial protein import have evolved across different species. With the genomes of an increasing number of plant species sequenced, it is possible to gain novel insights into mitochondrial protein import pathways. We have generated the Mitochondrial Protein Import Components (MPIC) Database (DB; http://www.plantenergy.uwa.edu.au/applications/mpic) providing searchable information on the protein import apparatus of plant and non-plant mitochondria. An in silico analysis was carried out, comparing the mitochondrial protein import apparatus from 24 species representing various lineages from Saccharomyces cerevisiae (yeast) and algae to Homo sapiens (human) and higher plants, including Arabidopsis thaliana (Arabidopsis), Oryza sativa (rice) and other more recently sequenced plant species. Each of these species was extensively searched and manually assembled for analysis in the MPIC DB. The database presents an interactive diagram in a user-friendly manner, allowing users to select their import component of interest. The MPIC DB presents an extensive resource facilitating detailed investigation of the mitochondrial protein import machinery and allowing patterns of conservation and divergence to be recognized that would otherwise have been missed. To demonstrate the usefulness of the MPIC DB, we present a comparative analysis of the mitochondrial protein import machinery in plants and non-plant species, revealing plant-specific features that have evolved.",2014-11-29 +29342241,iPat: intelligent prediction and association tool for genomic research.,"Summary:The ultimate goal of genomic research is to effectively predict phenotypes from genotypes so that medical management can improve human health and molecular breeding can increase agricultural production. Genomic prediction or selection (GS) plays a complementary role to genome-wide association studies (GWAS), which is the primary method to identify genes underlying phenotypes. Unfortunately, most computing tools cannot perform data analyses for both GWAS and GS. Furthermore, the majority of these tools are executed through a command-line interface (CLI), which requires programming skills. Non-programmers struggle to use them efficiently because of the steep learning curves and zero tolerance for data formats and mistakes when inputting keywords and parameters. To address these problems, this study developed a software package, named the Intelligent Prediction and Association Tool (iPat), with a user-friendly graphical user interface. With iPat, GWAS or GS can be performed using a pointing device to simply drag and/or click on graphical elements to specify input data files, choose input parameters and select analytical models. Models available to users include those implemented in third party CLI packages such as GAPIT, PLINK, FarmCPU, BLINK, rrBLUP and BGLR. Users can choose any data format and conduct analyses with any of these packages. File conversions are automatically conducted for specified input data and selected packages. A GWAS-assisted genomic prediction method was implemented to perform genomic prediction using any GWAS method such as FarmCPU. iPat was written in Java for adaptation to multiple operating systems including Windows, Mac and Linux. Availability and implementation:The iPat executable file, user manual, tutorials and example datasets are freely available at http://zzlab.net/iPat. Contact:zhiwu.zhang@wsu.edu.",2018-06-01 +28302154,The Israeli National Genetic database: a 10-year experience.,"

Background

The Israeli National and Ethnic Mutation database ( http://server.goldenhelix.org/israeli ) was launched in September 2006 on the ETHNOS software to include clinically relevant genomic variants reported among Jewish and Arab Israeli patients. In 2016, the database was reviewed and corrected according to ClinVar ( https://www.ncbi.nlm.nih.gov/clinvar ) and ExAC ( http://exac.broadinstitute.org ) database entries. The present article summarizes some key aspects from the development and continuous update of the database over a 10-year period, which could serve as a paradigm of successful database curation for other similar resources.

Results

In September 2016, there were 2444 entries in the database, 890 among Jews, 1376 among Israeli Arabs, and 178 entries among Palestinian Arabs, corresponding to an ~4× data content increase compared to when originally launched. While the Israeli Arab population is much smaller than the Jewish population, the number of pathogenic variants causing recessive disorders reported in the database is higher among Arabs (934) than among Jews (648). Nevertheless, the number of pathogenic variants classified as founder mutations in the database is smaller among Arabs (175) than among Jews (192). In 2016, the entire database content was compared to that of other databases such as ClinVar and ExAC. We show that a significant difference in the percentage of pathogenic variants from the Israeli genetic database that were present in ExAC was observed between the Jewish population (31.8%) and the Israeli Arab population (20.6%).

Conclusions

The Israeli genetic database was launched in 2006 on the ETHNOS software and is available online ever since. It allows querying the database according to the disorder and the ethnicity; however, many other features are not available, in particular the possibility to search according to the name of the gene. In addition, due to the technical limitations of the previous ETHNOS software, new features and data are not included in the present online version of the database and upgrade is currently ongoing.",2017-03-16 +30170591,SNOMED CT standard ontology based on the ontology for general medical science.,"

Background

Systematized Nomenclature of Medicine-Clinical Terms (SNOMED CT, hereafter abbreviated SCT) is a comprehensive medical terminology used for standardizing the storage, retrieval, and exchange of electronic health data. Some efforts have been made to capture the contents of SCT as Web Ontology Language (OWL), but these efforts have been hampered by the size and complexity of SCT.

Method

Our proposal here is to develop an upper-level ontology and to use it as the basis for defining the terms in SCT in a way that will support quality assurance of SCT, for example, by allowing consistency checks of definitions and the identification and elimination of redundancies in the SCT vocabulary. Our proposed upper-level SCT ontology (SCTO) is based on the Ontology for General Medical Science (OGMS).

Results

The SCTO is implemented in OWL 2, to support automatic inference and consistency checking. The approach will allow integration of SCT data with data annotated using Open Biomedical Ontologies (OBO) Foundry ontologies, since the use of OGMS will ensure consistency with the Basic Formal Ontology, which is the top-level ontology of the OBO Foundry. Currently, the SCTO contains 304 classes, 28 properties, 2400 axioms, and 1555 annotations. It is publicly available through the bioportal at http://bioportal.bioontology.org/ontologies/SCTO/ .

Conclusion

The resulting ontology can enhance the semantics of clinical decision support systems and semantic interoperability among distributed electronic health records. In addition, the populated ontology can be used for the automation of mobile health applications.",2018-08-31 +27489569,Mango: combining and analyzing heterogeneous biological networks.,"

Background

Heterogeneous biological data such as sequence matches, gene expression correlations, protein-protein interactions, and biochemical pathways can be merged and analyzed via graphs, or networks. Existing software for network analysis has limited scalability to large data sets or is only accessible to software developers as libraries. In addition, the polymorphic nature of the data sets requires a more standardized method for integration and exploration.

Results

Mango facilitates large network analyses with its Graph Exploration Language, automatic graph attribute handling, and real-time 3-dimensional visualization. On a personal computer Mango can load, merge, and analyze networks with millions of links and can connect to online databases to fetch and merge biological pathways.

Conclusions

Mango is written in C++ and runs on Mac OS, Windows, and Linux. The stand-alone distributions, including the Graph Exploration Language integrated development environment, are freely available for download from http://www.complex.iastate.edu/download/Mango. The Mango User Guide listing all features can be found at http://www.gitbook.com/book/j23414/mango-user-guide.",2016-08-02 +31329318,CHARMM-GUI DEER facilitator for spin-pair distance distribution calculations and preparation of restrained-ensemble molecular dynamics simulations.,"The double electron-electron resonance (DEER) is a powerful structural biology technique to obtain distance information in the range of 18 to 80 å by measuring the dipolar coupling between two unpaired electron spins. The distance distributions obtained from the experiment provide valuable structural information about the protein in its native environment that can be exploited using restrained ensemble molecular dynamics (reMD) simulations. We present a new tool DEER Facilitator in CHARMM-GUI that consists of two modules Spin-Pair Distributor and reMD Prepper to setup simulations that utilize information from DEER experiments. Spin-Pair Distributor provides a web-based interface to calculate the spin-pair distance distribution of labeled sites in a protein using MD simulations. The calculated distribution can be used to guide the selection of the labeling sites in experiments as well as validate different protein structure models. reMD Prepper facilities the setup of reMD simulations using different types of spin labels in four different environments including vacuum, solution, micelle, and bilayer. The applications of these two modules are demonstrated with several test cases. Spin-Pair Distributor and reMD Prepper are available at http://www.charmm-gui.org/input/deer and http://www.charmm-gui.org/input/deerre. DEER Facilitator is expected to facilitate advanced biomolecular modeling and simulation, thereby leading to an improved understanding of the structure and dynamics of complex biomolecular systems based on experimental DEER data. © 2019 Wiley Periodicals, Inc.",2019-07-22 +29385401,Accurity: accurate tumor purity and ploidy inference from tumor-normal WGS data by jointly modelling somatic copy number alterations and heterozygous germline single-nucleotide-variants.,"Motivation:Tumor purity and ploidy have a substantial impact on next-gen sequence analyses of tumor samples and may alter the biological and clinical interpretation of results. Despite the existence of several computational methods that are dedicated to estimate tumor purity and/or ploidy from The Cancer Genome Atlas (TCGA) tumor-normal whole-genome-sequencing (WGS) data, an accurate, fast and fully-automated method that works in a wide range of sequencing coverage, level of tumor purity and level of intra-tumor heterogeneity, is still missing. Results:We describe a computational method called Accurity that infers tumor purity, tumor cell ploidy and absolute allelic copy numbers for somatic copy number alterations (SCNAs) from tumor-normal WGS data by jointly modelling SCNAs and heterozygous germline single-nucleotide-variants (HGSNVs). Results from both in silico and real sequencing data demonstrated that Accurity is highly accurate and robust, even in low-purity, high-ploidy and low-coverage settings in which several existing methods perform poorly. Accounting for tumor purity and ploidy, Accurity significantly increased signal/noise gaps between different copy numbers. We are hopeful that Accurity is of clinical use for identifying cancer diagnostic biomarkers. Availability and implementation:Accurity is implemented in C++/Rust, available at http://www.yfish.org/software/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +27899625,YM500v3: a database for small RNA sequencing in human cancer research.,"We previously presented the YM500 database, which contains >8000 small RNA sequencing (smRNA-seq) data sets and integrated analysis results for various cancer miRNome studies. In the updated YM500v3 database (http://ngs.ym.edu.tw/ym500/) presented herein, we not only focus on miRNAs but also on other functional small non-coding RNAs (sncRNAs), such as PIWI-interacting RNAs (piRNAs), tRNA-derived fragments (tRFs), small nuclear RNAs (snRNAs) and small nucleolar RNAs (snoRNAs). There is growing knowledge of the role of sncRNAs in gene regulation and tumorigenesis. We have also incorporated >10 000 cancer-related RNA-seq and >3000 more smRNA-seq data sets into the YM500v3 database. Furthermore, there are two main new sections, 'Survival' and 'Cancer', in this updated version. The 'Survival' section provides the survival analysis results in all cancer types or in a user-defined group of samples for a specific sncRNA. The 'Cancer' section provides the results of differential expression analyses, miRNA-gene interactions and cancer miRNA-related pathways. In the 'Expression' section, sncRNA expression profiles across cancer and sample types are newly provided. Cancer-related sncRNAs hold potential for both biotech applications and basic research.",2016-11-29 +30649194,Characterizing and ranking computed metabolic engineering strategies.,"

Motivation

The computer-aided design of metabolic intervention strategies has become a key component of an integrated metabolic engineering approach and a broad range of methods and algorithms has been developed for this task. Many of these algorithms enforce coupling of growth with product synthesis and may return thousands of possible intervention strategies from which the most suitable strategy must then be selected.

Results

This work focuses on how to evaluate and rank, in a meaningful way, a given pool of computed metabolic engineering strategies for growth-coupled product synthesis. Apart from straightforward criteria, such as a preferably small number of necessary interventions, a reasonable growth rate and a high product yield, we present several new criteria useful to pick the most suitable intervention strategy. Among others, we investigate the robustness of the intervention strategies by searching for metabolites that may disrupt growth coupling when accumulated or secreted and by checking whether the interventions interrupt pathways at their origin (preferable) or at downstream steps. We also assess thermodynamic properties of the pathway(s) favored by the intervention strategy. Furthermore, strategies that have a significant overlap with alternative solutions are ranked higher because they provide flexibility in implementation. We also introduce the notion of equivalence classes for grouping intervention strategies with identical solution spaces. Our ranking procedure involves in total ten criteria and we demonstrate its applicability by assessing knockout-based intervention strategies computed in a genome-scale model of E.coli for the growth-coupled synthesis of l-methionine and of the heterologous product 1,4-butanediol.

Availability and implementation

The MATLAB scripts that were used to characterize and rank the example intervention strategies are available at http://www2.mpi-magdeburg.mpg.de/projects/cna/etcdownloads.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +30863010,Neural network and logistic regression diagnostic prediction models for giant cell arteritis: development and validation.,"

Purpose

To develop and validate neural network (NN) vs logistic regression (LR) diagnostic prediction models in patients with suspected giant cell arteritis (GCA). Design: Multicenter retrospective chart review.

Methods

An audit of consecutive patients undergoing temporal artery biopsy (TABx) for suspected GCA was conducted at 14 international medical centers. The outcome variable was biopsy-proven GCA. The predictor variables were age, gender, headache, clinical temporal artery abnormality, jaw claudication, vision loss, diplopia, erythrocyte sedimentation rate, C-reactive protein, and platelet level. The data were divided into three groups to train, validate, and test the models. The NN model with the lowest false-negative rate was chosen. Internal and external validations were performed.

Results

Of 1,833 patients who underwent TABx, there was complete information on 1,201 patients, 300 (25%) of whom had a positive TABx. On multivariable LR age, platelets, jaw claudication, vision loss, log C-reactive protein, log erythrocyte sedimentation rate, headache, and clinical temporal artery abnormality were statistically significant predictors of a positive TABx (P≤0.05). The area under the receiver operating characteristic curve/Hosmer-Lemeshow P for LR was 0.867 (95% CI, 0.794, 0.917)/0.119 vs NN 0.860 (95% CI, 0.786, 0.911)/0.805, with no statistically significant difference of the area under the curves (P=0.316). The misclassification rate/false-negative rate of LR was 20.6%/47.5% vs 18.1%/30.5% for NN. Missing data analysis did not change the results.

Conclusion

Statistical models can aid in the triage of patients with suspected GCA. Misclassification remains a concern, but cutoff values for 95% and 99% sensitivities are provided (https://goo.gl/THCnuU).",2019-02-21 +25656224,A RESTful interface to pseudonymization services in modern web applications.,"

Background

Medical research networks rely on record linkage and pseudonymization to determine which records from different sources relate to the same patient. To establish informational separation of powers, the required identifying data are redirected to a trusted third party that has, in turn, no access to medical data. This pseudonymization service receives identifying data, compares them with a list of already reported patient records and replies with a (new or existing) pseudonym. We found existing solutions to be technically outdated, complex to implement or not suitable for internet-based research infrastructures. In this article, we propose a new RESTful pseudonymization interface tailored for use in web applications accessed by modern web browsers.

Methods

The interface is modelled as a resource-oriented architecture, which is based on the representational state transfer (REST) architectural style. We translated typical use-cases into resources to be manipulated with well-known HTTP verbs. Patients can be re-identified in real-time by authorized users' web browsers using temporary identifiers. We encourage the use of PID strings for pseudonyms and the EpiLink algorithm for record linkage. As a proof of concept, we developed a Java Servlet as reference implementation.

Results

The following resources have been identified: Sessions allow data associated with a client to be stored beyond a single request while still maintaining statelessness. Tokens authorize for a specified action and thus allow the delegation of authentication. Patients are identified by one or more pseudonyms and carry identifying fields. Relying on HTTP calls alone, the interface is firewall-friendly. The reference implementation has proven to be production stable.

Conclusion

The RESTful pseudonymization interface fits the requirements of web-based scenarios and allows building applications that make pseudonymization transparent to the user using ordinary web technology. The open-source reference implementation implements the web interface as well as a scientifically grounded algorithm to generate non-speaking pseudonyms.",2015-02-07 +30658878,Prevention and treatment of pressure ulcers/injuries: The protocol for the second update of the international Clinical Practice Guideline 2019.,"

Aim

The European Pressure Ulcer Advisory Panel, the Pan Pacific Pressure Injury Alliance, and the National Pressure Ulcer Advisory Panel are updating the 'Prevention and Treatment of Pressure Ulcers: Clinical Practice Guideline' (CPG) in 2019. The aim of this contribution is to summarize and to discuss the guideline development protocol for the 2019 update.

Methods

A guideline governance group determines and monitors all steps of the CPG development. An international survey of consumers will be undertaken to establish consumer needs and interests. Systematic evidence searches in relevant electronic databases cover the period from July 2013 through August 2018. Risk of bias of included studies will be assessed by two reviewers using established checklists and an overall strength of evidence assigned to the cumulative body of evidence. Small working groups review the evidence available for each topic, review and/or draft the guideline chapters and recommendations and/or good practice statements. Finally, strength of recommendation grades are assigned. The recommendations are rated based on their importance and their potential to improve individual patient outcomes using an international formal consensus process.

Discussion

Major methodological advantages of the current revision are a clear distinction between evidence-based recommendations and good practice statements and strong consumer involvement.

Conclusion

The 2019 guideline update builds on the previous 2014 version to ensure consistency and comparability. Methodology changes will improve the guideline quality to increase clarity and to enhance implementation and compliance. The full guideline development protocol can be accessed from the guideline website (http://www.internationalguideline.com/).",2019-01-11 +28073746,miRDis: a Web tool for endogenous and exogenous microRNA discovery based on deep-sequencing data analysis.,"Small RNA sequencing is the most widely used tool for microRNA (miRNA) discovery, and shows great potential for the efficient study of miRNA cross-species transport, i.e., by detecting the presence of exogenous miRNA sequences in the host species. Because of the increased appreciation of dietary miRNAs and their far-reaching implication in human health, research interests are currently growing with regard to exogenous miRNAs bioavailability, mechanisms of cross-species transport and miRNA function in cellular biological processes. In this article, we present microRNA Discovery (miRDis), a new small RNA sequencing data analysis pipeline for both endogenous and exogenous miRNA detection. Specifically, we developed and deployed a Web service that supports the annotation and expression profiling data of known host miRNAs and the detection of novel miRNAs, other noncoding RNAs, and the exogenous miRNAs from dietary species. As a proof-of-concept, we analyzed a set of human plasma sequencing data from a milk-feeding study where 225 human miRNAs were detected in the plasma samples and 44 show elevated expression after milk intake. By examining the bovine-specific sequences, data indicate that three bovine miRNAs (bta-miR-378, -181* and -150) are present in human plasma possibly because of the dietary uptake. Further evaluation based on different sets of public data demonstrates that miRDis outperforms other state-of-the-art tools in both detection and quantification of miRNA from either animal or plant sources. The miRDis Web server is available at: http://sbbi.unl.edu/miRDis/index.php.",2018-05-01 +28440791,A curated database of cyanobacterial strains relevant for modern taxonomy and phylogenetic studies.,"The dataset herein described lays the groundwork for an online database of relevant cyanobacterial strains, named CyanoType (http://lege.ciimar.up.pt/cyanotype). It is a database that includes categorized cyanobacterial strains useful for taxonomic, phylogenetic or genomic purposes, with associated information obtained by means of a literature-based curation. The dataset lists 371 strains and represents the first version of the database (CyanoType v.1). Information for each strain includes strain synonymy and/or co-identity, strain categorization, habitat, accession numbers for molecular data, taxonomy and nomenclature notes according to three different classification schemes, hierarchical automatic classification, phylogenetic placement according to a selection of relevant studies (including this), and important bibliographic references. The database will be updated periodically, namely by adding new strains meeting the criteria for inclusion and by revising and adding up-to-date metadata for strains already listed. A global 16S rDNA-based phylogeny is provided in order to assist users when choosing the appropriate strains for their studies.",2017-04-25 +28800607,RNA-sequencing-based transcriptome and biochemical analyses of steroidal saponin pathway in a complete set of Allium fistulosum-A. cepa monosomic addition lines.,"The genus Allium is a rich source of steroidal saponins, and its medicinal properties have been attributed to these bioactive compounds. The saponin compounds with diverse structures play a pivotal role in Allium's defense mechanism. Despite numerous studies on the occurrence and chemical structure of steroidal saponins, their biosynthetic pathway in Allium species is poorly understood. The monosomic addition lines (MALs) of the Japanese bunching onion (A. fistulosum, FF) with an extra chromosome from the shallot (A. cepa Aggregatum group, AA) are powerful genetic resources that enable us to understand many physiological traits of Allium. In the present study, we were able to isolate and identify Alliospiroside A saponin compound in A. fistulosum with extra chromosome 2A from shallot (FF2A) and its role in the defense mechanism against Fusarium pathogens. Furthermore, to gain molecular insight into the Allium saponin biosynthesis pathway, high-throughput RNA-Seq of the root, bulb, and leaf of AA, MALs, and FF was carried out using Illumina's HiSeq 2500 platform. An open access Allium Transcript Database (Allium TDB, http://alliumtdb.kazusa.or.jp) was generated based on RNA-Seq data. The resulting assembled transcripts were functionally annotated, revealing 50 unigenes involved in saponin biosynthesis. Differential gene expression (DGE) analyses of AA and MALs as compared with FF (as a control) revealed a strong up-regulation of the saponin downstream pathway, including cytochrome P450, glycosyltransferase, and beta-glucosidase in chromosome 2A. An understanding of the saponin compounds and biosynthesis-related genes would facilitate the development of plants with unique saponin content and, subsequently, improved disease resistance.",2017-08-11 +30202962,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients With Thoracolumbar Spine Trauma: Pharmacological Treatment.,"

Question

Does the administration of a specific pharmacologic agent (eg, methylprednisolone) improve clinical outcomes in patients with thoracic and lumbar fractures and spinal cord injury?

Recommendation

There is insufficient evidence to make a recommendation; however, the task force concluded, in light of previously published data and guidelines, that the complication profile should be carefully considered when deciding on the administration of methylprednisolone. Strength of recommendation: Grade Insufficient The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_5.",2019-01-01 +30639977,Association between epilepsy and risk of sexual dysfunction: A meta-analysis.,"

Purpose

Sexual functioning is an important factor influencing quality of life. Mounting evidence suggests that both male and female patients with epilepsy (PWE) have an increased risk of developing sexual dysfunction (SD). The aim of this meta-analysis was to quantify the association between epilepsy and the risk of SD.

Methods

PubMed, Embase, and Cochrane Library database were systematically searched to identify the pertinent studies focusing on the association between epilepsy and SD. Relative risk (RR) for SD with 95% confidence interval (CI) was calculated. The overall quality of the evidence was generated by applying the GRADE-profiler. This meta-analysis was registered on the PROSPERO (ID: CRD42018103572, http://www.crd.york.ac.uk/PROSPERO).

Results

Nine studies (3 cross-sectional, 5 case-control, and 1 cohort) were included in this meta-analysis, for a total of 1556 subjects and 599 cases of epilepsy. Synthetic results demonstrated that epilepsy was associated with an increased risk of female SD (6 studies, pooled RR = 2.69, 95%CI: 1.48-4.89, P =  0.001; heterogeneity: I2 = 88.9%, P <  0.001) as well as male SD (3 studies, pooled RR = 4.85, 95%CI: 2.01-11.7, P <  0.001; heterogeneity: I2 = 74.2%, P =  0.021). The GRADE-profiler showed that the rate of events of SD on average in the PWE and the controls were 383/659 (58.1%) and 168/1017 (16.5%), respectively. The quality of evidence across outcomes was MODERATE.

Conclusions

Epilepsy is significantly associated with an increased risk of SD in both sexes. These findings suggest that both clinicians and patients should recognize that epilepsy has a potential hazardous effect on sexual functioning.",2019-01-10 +31212016,Breast cancer histopathological image classification using a hybrid deep neural network.,"Even with the rapid advances in medical sciences, histopathological diagnosis is still considered the gold standard in diagnosing cancer. However, the complexity of histopathological images and the dramatic increase in workload make this task time consuming, and the results may be subject to pathologist subjectivity. Therefore, the development of automatic and precise histopathological image analysis methods is essential for the field. In this paper, we propose a new hybrid convolutional and recurrent deep neural network for breast cancer histopathological image classification. Based on the richer multilevel feature representation of the histopathological image patches, our method integrates the advantages of convolutional and recurrent neural networks, and the short-term and long-term spatial correlations between patches are preserved. The experimental results show that our method outperforms the state-of-the-art method with an obtained average accuracy of 91.3% for the 4-class classification task. We also release a dataset with 3771 breast cancer histopathological images to the scientific community that is now publicly available at http://ear.ict.ac.cn/?page_id=1616. Our dataset is not only the largest publicly released dataset for breast cancer histopathological image classification, but it covers as many different subclasses spanning different age groups as possible, thus providing enough data diversity to alleviate the problem of relatively low classification accuracy of benign images.",2019-06-15 +23594715,TUMIR: an experimentally supported database of microRNA deregulation in various cancers.,"

Background

MicroRNAs were found to play an important role in cancers and several literatures exist to describe the relationship between microRNA and cancer, but the expression pattern was still faintly. There is a need for a comprehensive collection and summary of the interactions under experimental support.

Description

TUMIR (http://www.ncrnalab.com/TUMIR/), a manually extracted database of experimentally supported microRNA-cancer relationship, aims at providing a large, high-quality, validated comprehensive resource of microRNA deregulation in various cancers. The current version includes a systematic literature search to May-1-2012 using PubMed database, contains data extracted from 205 literatures and 1163 entries describing a regulatory interaction between human microRNAs and cancers. Each entry in the database contains the details of microRNA name, the disease name, case number, control number, p value, the experimentally validated targets, sample type, and a brief description of patients' clinic pathologic parameters mentioned in the same paper. The website has several extensive external links to the related websites and any requests can be made by emailing to tumir_pumc@163.com.

Conclusion

TUMIR is an open access website and will be an accurate clue for the researchers who are interested in better understanding the relationship between miRNAs and cancer.",2013-04-17 +31199676,Proposed Key Characteristics of Male Reproductive Toxicants as an Approach for Organizing and Evaluating Mechanistic Evidence in Human Health Hazard Assessments.,"

Background

Assessing chemicals for their potential to cause male reproductive toxicity involves the evaluation of evidence obtained from experimental, epidemiological, and mechanistic studies. Although mechanistic evidence plays an important role in hazard identification and evidence integration, the process of identifying, screening and analyzing mechanistic studies and outcomes is a challenging exercise due to the diversity of research models and methods and the variety of known and proposed pathways for chemical-induced toxicity. Ten key characteristics of carcinogens provide a valuable tool for organizing and assessing chemical-specific data by potential mechanisms for cancer-causing agents. However, such an approach has not yet been developed for noncancer adverse outcomes.

Objectives

The objective in this study was to identify a set of key characteristics that are frequently exhibited by exogenous agents that cause male reproductive toxicity and that could be applied for identifying, organizing, and summarizing mechanistic evidence related to this outcome.

Discussion

The identification of eight key characteristics of male reproductive toxicants was based on a survey of known male reproductive toxicants and established mechanisms and pathways of toxicity. The eight key characteristics can provide a basis for the systematic, transparent, and objective organization of mechanistic evidence relevant to chemical-induced effects on the male reproductive system. https://doi.org/10.1289/EHP5045.",2019-06-14 +31331071,IoTDS: A One-Class Classification Approach to Detect Botnets in Internet of Things Devices. ,"Internet of Things (IoT) devices have become increasingly widespread. Despite their potential of improving multiple application domains, these devices have poor security, which can be explored by attackers to build large-scale botnets. In this work, we propose a host-based approach to detect botnets in IoT devices, named IoTDS (Internet of Things Detection System). It relies on one-class classifiers, which model only the legitimate device behaviour for further detection of deviations, avoiding the manual labelling process. The proposed solution is underpinned by a novel agent-manager architecture based on HTTPS, which prevents the IoT device from being overloaded by the training activities. To analyse the device's behaviour, the approach extracts features from the device's CPU utilisation and temperature, memory consumption, and number of running tasks, meaning that it does not make use of network traffic data. To test our approach, we used an experimental IoT setup containing a device compromised by bot malware. Multiple scenarios were made, including three different IoT device profiles and seven botnets. Four one-class algorithms (Elliptic Envelope, Isolation Forest, Local Outlier Factor, and One-class Support Vector Machine) were evaluated. The results show the proposed system has a good predictive performance for different botnets, achieving a mean F1-score of 94% for the best performing algorithm, the Local Outlier Factor. The system also presented a low impact on the device's energy consumption, and CPU and memory utilisation.",2019-07-19 +31297314,Lingual mucosal graft urethroplasty 12 years later: Systematic review and meta-analysis.,"

Objective

To evaluate the functional results and complications of the lingual mucosal graft (LMG) urethroplasty and to sum up the current state of the art of this surgical technique.

Methods

A systematic search of PubMed and Scopus electronic databases was performed, according to the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) statement. Studies involving male patients treated with LMG urethroplasty for urethral stricture were included. Complete protocol is available at http://www.crd.york.ac.uk/PROSPERO/display_record.php?ID=CRD42017080121. A meta-analysis comparing functional and long-term oral complication outcomes of LMG and buccal mucosal graft (BMG) was performed, calculating the odds ratio (OR) and 95% confidence interval (CI).

Results

Twenty original articles were included in the qualitative analysis. Strictures of 1.5-16.5 cm have been treated with LMG urethroplasty, due to the improvement of harvesting technique and very low rate of long-term oral complications. Very good functional results have been reported by different authors for LMG urethroplasty, with lower rate of oral complications than BMG. The meta-analysis included six comparative studies involving 187 and 178 patients treated with LMG and BMG urethroplasty, respectively. An OR of 1.65 (95% CI [0.95-2.87], I 2 = 0%) and 0.18 (95% CI [0.03-1.26], I 2 = 68%) were found for LMG vs. BMG urethroplasty, in terms of success and oral complication rate, respectively.

Conclusion

LMG urethroplasty can be reasonably considered a first choice technique for urethral stricture with very good results. Oral complications are temporary and minimally disabling, basically less than those for BMG, and depend mainly on the graft extent.",2019-01-09 +30626363,Effect of surfactant administration on outcomes of adult patients in acute respiratory distress syndrome: a meta-analysis of randomized controlled trials.,"

Introduction

Surfactant is usually deficiency in adult acute respiratory distress syndrome(ARDS) patients and surfactant administration may be a useful therapy. The aim of this study was to perform a meta-analysis of the effect of surfactant administration on outcomes of adult patients with acute respiratory distress syndrome.

Methods

PubMed, EMBASE, Medline, Cochrane database, Elsevier, Web of Science and http://clinicaltrials.gov were searched and investigated until December 2017. Randomized controlled trials(RCTs) comparing surfactant administration with general therapy in adult patients with ARDS were enrolled. The primary outcome was mortality (7-10-day, 28-30-day and 90-180-day). Secondary outcome included oxygenation (PaO2/FiO2 ratio). Demographic variables, surfactant administration, and outcomes were retrieved. Sensitivity analyses were used to evaluate the impact of study quality issues on the overall effect. Funnel plot inspection, Egger's and Begger's test were applied to investigate the publication bias. Internal validity was assessed with the risk of bias tool. Random errors were evaluated with trial sequential analysis(TSA). Quality levels were assessed by Grading of Recommendations Assessment, Development, and Evaluation methodology(GRADE).

Results

Eleven RCTs with 3038 patients were identified. Surfactant administration could not improve mortality of adult patients [Risk ratio (RR) (95%CI)) = 1.02(0.93-1.12), p = 0.65]. Subgroup analysis revealed no difference of 7-10-day mortality [RR(95%CI)) = 0.89(0.54-1.49), p = 0.66], 28-30-day mortality[RR(95%CI) = 1.00(0.89-1.12), p = 0.98] and 90-180-day mortality [RR(95%CI) = 1.11(0.94-1.32), p = 0.22] between surfactant group and control group. The change of the PaO2/FiO2 ratio in adult ARDS patients had no difference [MD(95%CI) = 0.06(- 0.12-0.24), p = 0.5] after surfactant administration. Finally, TSA and GRADE indicated lack of firm evidence for a beneficial effect.

Conclusions

Surfactant administration has not been shown to improve mortality and improve oxygenation for adult ARDS patients. Large rigorous randomized trials are needed to explore the effect of surfactant to adult ARDS patients.",2019-01-09 +30420468,CRISPR/Cas9 Methodology for the Generation of Knockout Deletions in Caenorhabditis elegans.,"The Caenorhabditis elegans Gene Knockout Consortium is tasked with obtaining null mutations in each of the more than 20,000 open reading frames (ORFs) of this organism. To date, approximately 15,000 ORFs have associated putative null alleles. As there has been substantial success in using CRISPR/Cas9 in C. elegans, this appears to be the most promising technique to complete the task. To enhance the efficiency of using CRISPR/Cas9 to generate gene deletions in C. elegans we provide a web-based interface to access our database of guide RNAs (http://genome.sfu.ca/crispr). When coupled with previously developed selection vectors, optimization for homology arm length, and the use of purified Cas9 protein, we demonstrate a robust and effective protocol for generating deletions for this large-scale project. Debate and speculation in the larger scientific community concerning off-target effects due to non-specific Cas9 cutting has prompted us to investigate through whole genome sequencing the occurrence of single nucleotide variants and indels accompanying targeted deletions. We did not detect any off-site variants above the natural spontaneous mutation rate and therefore conclude that this modified protocol does not generate off-target events to any significant degree in C. elegans We did, however, observe a number of non-specific alterations at the target site itself following the Cas9-induced double-strand break and offer a protocol for best practice quality control for such events.",2019-01-09 +30474027,Preventing Translational Scientists From Extinction: The Long-Term Impact of a Personalized Training Program in Translational Medicine on the Careers of Translational Scientists.,"Far too much biomedical research is wasted and ends in the so called ""Valley of Death"": the gap that exists between biomedical research and its clinical application. While the translational process requires collaboration between many disciplines, current translational medicine focuses on single disciplines. Therefore, educational pathways that integrate clinical and research skills in interdisciplinary and interprofessional contexts are needed. The Eureka institute (http://www.eurekainstitute.org/) was founded to address these issues. The institute organizes an annual 1-week international certificate course to educate professionals in the domains of translational medicine. Study design: This study set out to investigate the impact of the Eureka certificate course on the alumni, focusing on their ability to engage in translational activities and thus become more proficient translational professionals. An explanatory, mixed-methods study was executed. Data collection: A questionnaire was distributed to collect quantitative data on the number of alumni who were able to apply what they learned during the Eureka course and engage in translational activities. Questionnaire data were also used to inform the semi-structured interviews that were conducted subsequently. Results: Fifty-one percent of the alumni reported that participating in the Eureka course played a role in their decision to change to a different job or in the way they were accomplishing their everyday work. Ten conditions for change that either hampered or supported the Eureka alumni's engagement in translational research activities were identified. Further, the learning outcomes of the Eureka course that impacted the alumni's professional activities were explored using Personal Professional Theory (PPT). The insight that alumni gained in the full translational spectrum and stakeholders involved stimulated reflection on their own role within that pathway. Further, according to the alumni, the course provided them with the skills and confidence to pursue a career as translational professional. These learning outcomes, in combination with conditions that supported alumni's engagement in translational activities, such as supportive professional partners, opportunities to network or collaborate, and a translational work environment, contributed to the large number of alumni that were able to engage in translational activities.",2018-11-09 +23334680,Getting the word out about treating borderline personality disorder: an online information resource.,"This column reviews the need for greater awareness among mental health clinicians regarding evidence-based treatments for borderline personality disorder (BPD) and describes an online resource that has been developed to partially address this need. This resource is the Borderline Personality Disorder Clinician Resource Centre, which can be accessed at http://www.treatingBPD.ca.",2013-01-01 +29753646,Network Visualization and Analysis of Spatially Aware Gene Expression Data with InsituNet.,"In situ sequencing methods generate spatially resolved RNA localization and expression data at an almost single-cell resolution. Few methods, however, currently exist to analyze and visualize the complex data that is produced, which can encode the localization and expression of a million or more individual transcripts in a tissue section. Here, we present InsituNet, an application that converts in situ sequencing data into interactive network-based visualizations, where each unique transcript is a node in the network and edges represent the spatial co-expression relationships between transcripts. InsituNet is available as an app for the Cytoscape platform at http://apps.cytoscape.org/apps/insitunet. InsituNet enables the analysis of the relationships that exist between these transcripts and can uncover how spatial co-expression profiles change in different regions of the tissue or across different tissue sections.",2018-05-09 +25276335,Structuring research methods and data with the research object model: genomics workflows as a case study.,"

Background

One of the main challenges for biomedical research lies in the computer-assisted integrative study of large and increasingly complex combinations of data in order to understand molecular mechanisms. The preservation of the materials and methods of such computational experiments with clear annotations is essential for understanding an experiment, and this is increasingly recognized in the bioinformatics community. Our assumption is that offering means of digital, structured aggregation and annotation of the objects of an experiment will provide necessary meta-data for a scientist to understand and recreate the results of an experiment. To support this we explored a model for the semantic description of a workflow-centric Research Object (RO), where an RO is defined as a resource that aggregates other resources, e.g., datasets, software, spreadsheets, text, etc. We applied this model to a case study where we analysed human metabolite variation by workflows.

Results

We present the application of the workflow-centric RO model for our bioinformatics case study. Three workflows were produced following recently defined Best Practices for workflow design. By modelling the experiment as an RO, we were able to automatically query the experiment and answer questions such as ""which particular data was input to a particular workflow to test a particular hypothesis?"", and ""which particular conclusions were drawn from a particular workflow?"".

Conclusions

Applying a workflow-centric RO model to aggregate and annotate the resources used in a bioinformatics experiment, allowed us to retrieve the conclusions of the experiment in the context of the driving hypothesis, the executed workflows and their input data. The RO model is an extendable reference model that can be used by other systems as well.

Availability

The Research Object is available at http://www.myexperiment.org/packs/428 The Wf4Ever Research Object Model is available at http://wf4ever.github.io/ro.",2014-09-18 +28105966,A Novel Approach for Pathway Analysis of GWAS Data Highlights Role of BMP Signaling and Muscle Cell Differentiation in Colorectal Cancer Susceptibility.,"Genome-wide association studies (GWAS) have revolutionized the field of gene mapping. As the GWAS field matures, it is becoming clear that for many complex traits, a proportion of the missing heritability is attributable to common variants of individually small effect. Detecting these small effects individually can be difficult, and statistical power would be increased if relevant variants could be grouped together for testing. Here, we propose a VEGAS2Pathway approach that aggregates association strength of individual markers into pre-specified biological pathways. It accounts for gene size and linkage disequilibrium between markers using simulations from the multivariate normal distribution. Pathway size is taken into account via a resampling approach. Importantly, since the approach only requires summary data, the method can easily be applied in all GWASs, including meta-analysis, singleton-based, family-based, and DNA-pooling-based designs. This approach is implemented in a user-friendly web page https://vegas2.qimrberghofer.edu.au and a command line tool. The web implementation uses gene-sets from the gene ontology (GO), curated gene-sets from MSigDB (containing canonical pathways and gene-sets from BIOCARTA, REACTOME, KEGG databases), PANTHER, and pathway commons databases, enabling analysis of a wide range of complex traits. We applied this method on a colorectal cancer GWAS meta-analysis data set (10,934 cases, 12,328 controls) from the Genetics and Epidemiology of Colorectal Cancer Consortium (GECCO). We report statistically significant enrichment of association signal for the 'BMP signaling' and 'muscle cell differentiation' pathways, suggesting a possible role for these pathways onto the risk of colorectal cancer.",2017-02-01 +30132174,Suicide and community justice.,"BACKGROUND:There has long been concern about the number of people who die in custody in England and Wales, particularly in prisons or police stations. The concern is obviously heightened when people die either at their own hand, or at the hands of others. Yet there has been selective critical gaze, and people who die whilst under probation or community supervision have been neglected (Phillips, J, Gelsthorpe, L, Padfield, N., Criminology & Criminal Justice, https://doi.org/10.1177/1748895817745939 , 2017). Given that there is evidence to suggest that contact with the criminal justice system in non-custodial settings is associated with higher mortality rates than those found in the general population, such neglect is concerning. METHODS:This article explores data which has been published since 2016 by Her Majesty's Prison and Probation Service (HMPPS) on the deaths of offenders whilst under supervision. We draw on data which is collected by probation providers and collated by HMPPS to present original analyses, with particular focus on deaths by suicide. We calculate rates of self-inflicted deaths and rate ratios with the general population and the prison population. RESULTS:The suicide rates for all groups within the sample are higher than the general population. CONCLUSIONS:We explore the utility of the data in helping us to understand the trends regarding people dying whilst under probation supervision with a particular focus on suicide, and highlight areas where the dataset is deficient. We conclude that whilst the dataset can be used to calculate headline rates of suicide it raises many questions in terms of the extant risks that people on probation face, and we explore ways in which the data can be used more fully to understand this important social and public health issue. We consider ways in which the dataset could be matched with other datasets in future research so that health issues might be brought into the analysis, and reflect on other research methodologies which would add depth to our understanding of why the mortality rate amongst people in contact with the criminal justice system is higher than in the general population.",2018-08-21 +28730433,Computational Prediction of the Immunomodulatory Potential of RNA Sequences.,"Advances in the knowledge of various roles played by non-coding RNAs have stimulated the application of RNA molecules as therapeutics. Among these molecules, miRNA, siRNA, and CRISPR-Cas9 associated gRNA have been identified as the most potent RNA molecule classes with diverse therapeutic applications. One of the major limitations of RNA-based therapeutics is immunotoxicity of RNA molecules as it may induce the innate immune system. In contrast, RNA molecules that are potent immunostimulators are strong candidates for use in vaccine adjuvants. Thus, it is important to understand the immunotoxic or immunostimulatory potential of these RNA molecules. The experimental techniques for determining immunostimulatory potential of siRNAs are time- and resource-consuming. To overcome this limitation, recently our group has developed a web-based server ""imRNA"" for predicting the immunomodulatory potential of RNA sequences. This server integrates a number of modules that allow users to perform various tasks including (1) generation of RNA analogs with reduced immunotoxicity, (2) identification of highly immunostimulatory regions in RNA sequence, and (3) virtual screening. This server may also assist users in the identification of minimum mutations required in a given RNA sequence to minimize its immunomodulatory potential that is required for designing RNA-based therapeutics. Besides, the server can be used for designing RNA-based vaccine adjuvants as it may assist users in the identification of mutations required for increasing immunomodulatory potential of a given RNA sequence. In summary, this chapter describes major applications of the ""imRNA"" server in designing RNA-based therapeutics and vaccine adjuvants (http://www.imtech.res.in/raghava/imrna/).",2017-01-01 +23584836,hLGDB: a database of human lysosomal genes and their regulation.,"Lysosomes are cytoplasmic organelles present in almost all eukaryotic cells, which play a fundamental role in key aspects of cellular homeostasis such as membrane repair, autophagy, endocitosis and protein metabolism. The characterization of the genes and enzymes constituting the lysosome represents a central issue to be addressed toward a better understanding of the biology of this organelle. In humans, mutations that cause lysosomal enzyme deficiencies result in >50 different disorders and severe pathologies. So far, many experimental efforts using different methodologies have been carried out to identity lysosomal genes. The Human Lysosome Gene Database (hLGDB) is the first resource that provides a comprehensive and accessible census of the human genes belonging to the lysosomal system. This database was developed by collecting and annotating gene lists from many different sources. References to the studies that have identified each gene are provided together with cross databases gene related information. Special attention has been given to the regulation of the genes through microRNAs and the transcription factor EB. The hLGDB can be easily queried to retrieve, combine and analyze information on different lists of lysosomal genes and their regulation by microRNA (binding sites predicted by five different algorithms). The hLGDB is an open access dynamic project that will permit in the future to collapse in a unique publicly accessible resource all the available biological information about lysosome genes and their regulation. Database URL: http://lysosome.unipg.it/.",2013-04-12 +29843602,"Combining RNA-seq data and homology-based gene prediction for plants, animals and fungi.","

Background

Genome annotation is of key importance in many research questions. The identification of protein-coding genes is often based on transcriptome sequencing data, ab-initio or homology-based prediction. Recently, it was demonstrated that intron position conservation improves homology-based gene prediction, and that experimental data improves ab-initio gene prediction.

Results

Here, we present an extension of the gene prediction program GeMoMa that utilizes amino acid sequence conservation, intron position conservation and optionally RNA-seq data for homology-based gene prediction. We show on published benchmark data for plants, animals and fungi that GeMoMa performs better than the gene prediction programs BRAKER1, MAKER2, and CodingQuarry, and purely RNA-seq-based pipelines for transcript identification. In addition, we demonstrate that using multiple reference organisms may help to further improve the performance of GeMoMa. Finally, we apply GeMoMa to four nematode species and to the recently published barley reference genome indicating that current annotations of protein-coding genes may be refined using GeMoMa predictions.

Conclusions

GeMoMa might be of great utility for annotating newly sequenced genomes but also for finding homologs of a specific gene or gene family. GeMoMa has been published under GNU GPL3 and is freely available at http://www.jstacs.de/index.php/GeMoMa .",2018-05-30 +22345019,Prediction of autism susceptibility genes based on association rules.,"Autism is a complex neuropsychiatric disorder with high heritability and an unclear etiology. The identification of key genes related to autism may elucidate its etiology. The current study provides an approach to predicting autism susceptibility genes. Genes are first extracted from the biomedical literature, and some autism susceptibility genes are then recognized as seeds by the prior knowledge. As candidates, the remaining genes are predicted by creating association rules between the seeds and candidates. In an evaluated data set, 27 autism susceptibility genes (type ""Y"") are extracted and 43 possible autism susceptibility genes (type ""P"") are predicted. The sum of ""Y"" and ""P"" genes accounts for 93.3% of the data set that are not contained in the typical database of autism susceptibility genes. Our approach can effectively extract and predict autism susceptibility genes from the biomedical literature. These predicted results complement the typical database of autism susceptibility genes. The web portal for the predicted results, which is freely available at http://biolab.hyit.edu.cn/ar, can be a valuable resource in studies of diseases related to genes.",2012-02-16 +30627607,Peptidome profiling dataset of ovarian cancer and non-cancer proximal fluids: Ascites and blood sera.,"Despite a large number of proteomic studies of biological fluids from ovarian cancer patients, there is a lack of sensitive screening methods in clinical practice (Kim et al., 2016) (DOI:https://doi.org/10.1111/cas.12987[1]). Low molecular weight endogenous peptides more easily diffuse across endothelial barriers than proteins and can be more relevant biomarker candidates (Meo et al., 2016) (DOI:https://doi.org/10.18632/oncotarget.8931[2], (Bery et al., 2014) DOI:https://doi.org/10.1186/1559-0275-11-13[3], (Huang et al., 2018) DOI:https://doi.org/10.1097/IGC.0000000000001166[4]). Detailed peptidomic analysis of 26 ovarian cancer and 15 non-cancer samples of biological fluids (ascites and sera) were performed using TripleTOF 5600+ mass-spectrometer. Prior to LC-MS/MS analysis, peptides were extracted from biological fluids using anion exchange sorbent with subsequent peptide desorption from the surface of highly abundant proteins. In total, we identified 4874 peptides; 3123 peptides were specific for the ovarian cancer samples. The mass-spectrometry peptidomics data presented in this data article have been deposited to the ProteomeXchange Consortium (Deutsch et al., 2017) (DOI:https://doi.org/10.1093/nar/gkw936[5]) via the PRIDE partner repository with the dataset identifier PXD009382 and https://doi.org/10.6019/PXD009382, http://www.ebi.ac.uk/pride/archive/projects/PXD009382.",2018-12-19 +31677033,Cultivating resiliency in patients with neurofibromatosis 2 who are deafened or have severe hearing loss: a live‑video randomized control trial.,"

Introduction

Patients with NF2 who are deaf or have significant hearing loss face numerous and unique challenges which lead to poor quality of life, and thus may benefit from resiliency programs.

Methods

We performed secondary data analyses on a single blind, randomized controlled trial of an 8 week mind-body resiliency program (the Relaxation Response and Resiliency program for Deaf NF2; d3RP-NF2) versus a health education control (Health Enhancement Program for Deaf NF2;dHEP-NF2) which showed improvement in quality of life (Funes in JAMA 2019, https://doi.org/10.1007/s11060-019-03182-3). Here we report on improvements in resiliency factors (i.e. optimism, gratitude, perceived social support, mindfulness, and perceived coping abilities) assessed at baseline, post-test and 6-month follow-up. Both programs were delivered via Skype using Communication Access Real-Time Translation.

Results

Patients who were randomized to the d3RP-NF2 program exhibited significant improvements from baseline to post-program in gratitude (Mdifference = 4.04, 95% CI 1.58-6.50; p = 0.002), perceived social support (Mdifference = 16.36, 95% CI 9.20-23.51; p < 0.001), mindfulness (Mdifference = 4.02, 95% CI 1.10-6.94; p = 0.008), perceived coping (Mdifference = 15.25, 95% CI 10.21-20.28; p < 0.001), and a non-significant trend of improvement in optimism (Mdifference = 1.15, 95% CI -0.14-12.44; p = 0.079). These improvements were all maintained through the 6-month follow up. Improvements in perceived coping (Mdifference = 12.34, 95% CI 4.75-19.93; p = 0.002), social support (Mdifference = 13.11, 95% CI 2.19-24.03; p = 0.02), and gratitude (Mdifference = 4.59, 95% CI 0.83-8.36; p = 0.018) were over and above the changes observed in those randomized to dHEP-NF2.

Conclusion

The d3RP-NF2 sustainably improves multiple dimensions of resiliency. Promoting resiliency may be of utmost importance for this uderserved population.",2019-11-01 +31112078,Consensus QSPR modelling for the prediction of cellular response and fibrinogen adsorption to the surface of polymeric biomaterials.,"In the current study, we have developed predictive quantitative structure-activity relationship (QSAR) models for cellular response (foetal rate lung fibroblast proliferation) and protein adsorption (fibrinogen adsorption (FA)) on the surface of tyrosine-derived biodegradable polymers designed for tissue engineering purpose using a dataset of 66 and 40 biodegradable polymers, respectively, employing two-dimensional molecular descriptors. Best four individual models have been selected for each of the endpoints. These models are developed using partial least squares regression with a unique combination of six and four descriptors for cellular response and protein adsorption, respectively. The generated models were strictly validated using internal and external metrics to determine the predictive ability and robustness of proposed models. Subsequently, the validated individual models for each response endpoints were used for the generation of 'intelligent' consensus models ( http://teqip.jdvu.ac.in/QSAR_Tools/DTCLab/ ) to improve the quality of predictions for the external data set. These models may help in prediction of virtual polymer libraries for rational design/optimization for properties relevant to biomedical applications prior to their synthesis.",2019-05-01 +30304379,PrecursorFinder: a customized biosynthetic precursor explorer.,"

Summary

Synthetic biology has a great potential to produce high value pharmaceuticals, commodities or bulk chemicals. However, many biosynthetic target molecules have no defined or predicted biosynthetic pathways. Biosynthetic precursors are crucial to create biosynthetic pathways. Thus computer-assisted tools for precursor identification are urgently needed to develop novel metabolic pathways. To this end, we present PrecursorFinder, a computational tool that explores biosynthetic precursors for the query target molecules using chemical structure, similarity as well as MCS (maximum common substructure). This platform comprises more than 60 000 compounds biosynthesized for being promising precursors, which are extracted from >500 000 scientific literatures and manually curated by more than 100 people over the past 8 years. The PrecursorFinder could speed up the process of biosynthesis research and make synthetic biology or metabolic engineering more efficient.

Availability and implementation

PrecursorFinder is available at: http://www.rxnfinder.org/precursorfinder/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +29220515,BUSCO Applications from Quality Assessments to Gene Prediction and Phylogenomics.,"Genomics promises comprehensive surveying of genomes and metagenomes, but rapidly changing technologies and expanding data volumes make evaluation of completeness a challenging task. Technical sequencing quality metrics can be complemented by quantifying completeness of genomic data sets in terms of the expected gene content of Benchmarking Universal Single-Copy Orthologs (BUSCO, http://busco.ezlab.org). The latest software release implements a complete refactoring of the code to make it more flexible and extendable to facilitate high-throughput assessments. The original six lineage assessment data sets have been updated with improved species sampling, 34 new subsets have been built for vertebrates, arthropods, fungi, and prokaryotes that greatly enhance resolution, and data sets are now also available for nematodes, protists, and plants. Here, we present BUSCO v3 with example analyses that highlight the wide-ranging utility of BUSCO assessments, which extend beyond quality control of genomics data sets to applications in comparative genomics analyses, gene predictor training, metagenomics, and phylogenomics.",2018-03-01 +30341374,SUMOgo: Prediction of sumoylation sites on lysines by motif screening models and the effects of various post-translational modifications.,"Most modern tools used to predict sites of small ubiquitin-like modifier (SUMO) binding (referred to as SUMOylation) use algorithms, chemical features of the protein, and consensus motifs. However, these tools rarely consider the influence of post-translational modification (PTM) information for other sites within the same protein on the accuracy of prediction results. This study applied the Random Forest machine learning method, as well as motif screening models and a feature selection combination mechanism, to develop a SUMOylation prediction system, referred to as SUMOgo. With regard to prediction method, PTM sites were coded as new functional features in addition to structural features, such as sequence-based binary coding, encoded chemical features of proteins, and encoded secondary structure information that is important for PTM. Twenty cycles of prediction were conducted with a 1:1 combination of positive test data and random negative data. Matthew's correlation coefficient of SUMOgo reached 0.511, which is higher than that of current commonly used tools. This study further verified the important role of PTM in SUMOgo and includes a case study on CREB binding protein (CREBBP). The website for the final tool is http://predictor.nchu.edu.tw/SUMOgo .",2018-10-19 +29496338,"Googling your hand hygiene data: Using Google Forms, Google Sheets, and R to collect and automate analysis of hand hygiene compliance monitoring.","BACKGROUND:Hand hygiene is one of the most important interventions in the quest to eliminate healthcare-associated infections, and rates in healthcare facilities are markedly low. Since hand hygiene observation and feedback are critical to improve adherence, we created an easy-to-use, platform-independent hand hygiene data collection process and an automated, on-demand reporting engine. METHODS:A 3-step approach was used for this project: 1) creation of a data collection form using Google Forms, 2) transfer of data from the form to a spreadsheet using Google Spreadsheets, and 3) creation of an automated, cloud-based analytics platform for report generation using R and RStudio Shiny software. RESULTS:A video tutorial of all steps in the creation and use of this free tool can be found on our YouTube channel: https://www.youtube.com/watch?v=uFatMR1rXqU&t. The on-demand reporting tool can be accessed at: https://crsp.louisville.edu/shiny/handhygiene. CONCLUSIONS:This data collection and automated analytics engine provides an easy-to-use environment for evaluating hand hygiene data; it also provides rapid feedback to healthcare workers. By reducing some of the data management workload required of the infection preventionist, more focused interventions may be instituted to increase global hand hygiene rates and reduce infection.",2018-02-26 +28472330,DynOmics: dynamics of structural proteome and beyond.,"DynOmics (dynomics.pitt.edu) is a portal developed to leverage rapidly growing structural proteomics data by efficiently and accurately evaluating the dynamics of structurally resolved systems, from individual molecules to large complexes and assemblies, in the context of their physiological environment. At the core of the portal is a newly developed server, ENM 1.0, which permits users to efficiently generate information on the collective dynamics of any structure in PDB format, user-uploaded or database-retrieved. ENM 1.0 integrates two widely used elastic network models (ENMs)-the Gaussian Network Model (GNM) and the Anisotropic Network Model (ANM), extended to take account of molecular environment. It enables users to assess potentially functional sites, signal transduction or allosteric communication mechanisms, and protein-protein and protein-DNA interaction poses, in addition to delivering ensembles of accessible conformers reconstructed at atomic details based on the global modes of motions predicted by the ANM. The 'environment' is defined in a flexible manner, from lipid bilayer and crystal contacts, to substrate or ligands bound to a protein, or surrounding subunits in a multimeric structure or assembly. User-friendly interactive features permit users to easily visualize how the environment alter the intrinsic dynamics of the query systems. ENM 1.0 can be accessed at http://enm.pitt.edu/ or http://dyn.life.nthu.edu.tw/oENM/.",2017-07-01 +25324303,P2CS: updates of the prokaryotic two-component systems database.,"The P2CS database (http://www.p2cs.org/) is a comprehensive resource for the analysis of Prokaryotic Two-Component Systems (TCSs). TCSs are comprised of a receptor histidine kinase (HK) and a partner response regulator (RR) and control important prokaryotic behaviors. The latest incarnation of P2CS includes 164,651 TCS proteins, from 2758 sequenced prokaryotic genomes. Several important new features have been added to P2CS since it was last described. Users can search P2CS via BLAST, adding hits to their cart, and homologous proteins can be aligned using MUSCLE and viewed using Jalview within P2CS. P2CS also provides phylogenetic trees based on the conserved signaling domains of the RRs and HKs from entire genomes. HK and RR trees are annotated with gene organization and domain architecture, providing insights into the evolutionary origin of the contemporary gene set. The majority of TCSs are encoded by adjacent HK and RR genes, however, 'orphan' unpaired TCS genes are also abundant and identifying their partner proteins is challenging. P2CS now provides paired HK and RR trees with proteins from the same genetic locus indicated. This allows the appraisal of evolutionary relationships across entire TCSs and in some cases the identification of candidate partners for orphan TCS proteins.",2014-10-16 +28977460,GWIPS-viz: 2018 update.,"The GWIPS-viz browser (http://gwips.ucc.ie/) is an on-line genome browser which is tailored for exploring ribosome profiling (Ribo-seq) data. Since its publication in 2014, GWIPS-viz provides Ribo-seq data for an additional 14 genomes bringing the current total to 23. The integration of new Ribo-seq data has been automated thereby increasing the number of available tracks to 1792, a 10-fold increase in the last three years. The increase is particularly substantial for data derived from human sources. Following user requests, we added the functionality to download these tracks in bigWig format. We also incorporated new types of data (e.g. TCP-seq) as well as auxiliary tracks from other sources that help with the interpretation of Ribo-seq data. Improvements in the visualization of the data have been carried out particularly for bacterial genomes where the Ribo-seq data are now shown in a strand specific manner. For higher eukaryotic datasets, we provide characteristics of individual datasets using the RUST program which includes the triplet periodicity, sequencing biases and relative inferred A-site dwell times. This information can be used for assessing the quality of Ribo-seq datasets. To improve the power of the signal, we aggregate Ribo-seq data from several studies into Global aggregate tracks for each genome.",2018-01-01 +23970545,Automated classification of RNA 3D motifs and the RNA 3D Motif Atlas.,"The analysis of atomic-resolution RNA three-dimensional (3D) structures reveals that many internal and hairpin loops are modular, recurrent, and structured by conserved non-Watson-Crick base pairs. Structurally similar loops define RNA 3D motifs that are conserved in homologous RNA molecules, but can also occur at nonhomologous sites in diverse RNAs, and which often vary in sequence. To further our understanding of RNA motif structure and sequence variability and to provide a useful resource for structure modeling and prediction, we present a new method for automated classification of internal and hairpin loop RNA 3D motifs and a new online database called the RNA 3D Motif Atlas. To classify the motif instances, a representative set of internal and hairpin loops is automatically extracted from a nonredundant list of RNA-containing PDB files. Their structures are compared geometrically, all-against-all, using the FR3D program suite. The loops are clustered into motif groups, taking into account geometric similarity and structural annotations and making allowance for a variable number of bulged bases. The automated procedure that we have implemented identifies all hairpin and internal loop motifs previously described in the literature. All motif instances and motif groups are assigned unique and stable identifiers and are made available in the RNA 3D Motif Atlas (http://rna.bgsu.edu/motifs), which is automatically updated every four weeks. The RNA 3D Motif Atlas provides an interactive user interface for exploring motif diversity and tools for programmatic data access.",2013-08-22 +31523495,"The RIPper, a web-based tool for genome-wide quantification of Repeat-Induced Point (RIP) mutations.","

Background

The RIPper (http://theripper.hawk.rocks) is a set of web-based tools designed for analyses of Repeat-Induced Point (RIP) mutations in the genome sequences of Ascomycota. The RIP pathway is a fungal genome defense mechanism that is aimed at identifying repeated and duplicated motifs, into which it then introduces cytosine to thymine transition mutations. RIP thus serves to deactivate and counteract the deleterious consequences of selfish or mobile DNA elements in fungal genomes. The occurrence, genetic context and frequency of RIP mutations are widely used to assess the activity of this pathway in genomic regions of interest. Here, we present a bioinformatics tool that is specifically fashioned to automate the investigation of changes in RIP product and substrate nucleotide frequencies in fungal genomes.

Results

We demonstrated the ability of The RIPper to detect the occurrence and extent of RIP mutations in known RIP affected sequences. Specifically, a sliding window approach was used to perform genome-wide RIP analysis on the genome assembly of Neurospora crassa. Additionally, fine-scale analysis with The RIPper showed that gene regions and transposable element sequences, previously determined to be affected by RIP, were indeed characterized by high frequencies of RIP mutations. Data generated using this software further showed that large proportions of the N. crassa genome constitutes RIP mutations with extensively affected regions displaying reduced GC content. The RIPper was further useful for investigating and visualizing changes in RIP mutations across the length of sequences of interest, allowing for fine-scale analyses.

Conclusion

This software identified RIP targeted genomic regions and provided RIP statistics for an entire genome assembly, including the genomic proportion affected by RIP. Here, we present The RIPper as an efficient tool for genome-wide RIP analyses.",2019-08-26 +30381355,Novel Observations Concerning Differentiation of Bloodstream-Form Trypanosomes to the Form That Is Adapted for Growth in Tsetse Flies. ,"Salivarian trypanosomes grow in mammals, where they depend on glucose, and as procyclic forms in tsetse flies, where they metabolize proline. Differentiation of bloodstream forms to nongrowing stumpy forms, and to procyclic forms, has been studied extensively, but reconciling the results is tricky because investigators have used parasites with various differentiation competences and different media for procyclic-form culture. Standard protocols include lowering the temperature to 27°C, adding a tricarboxylic acid, and transferring the parasites to high-proline medium, often including glucose. A 20°C cold shock enhanced efficiency. Y. Qiu, J. E. Milanes, J. A. Jones, R. E. Noorai, et al. (mSphere 3:e00366-18, 2018, https://doi.org/10.1128/mSphere.00366-18) studied this systematically, and their results call long-established protocols into question. Importantly, highly efficient differentiation was observed after cold shock and transfer to no-glucose medium without tricarboxylic acid; in contrast, glucose made differentiation tricarboxylic acid dependent and inhibited procyclic growth. New transcriptome data for stumpy and procyclic forms will enable informative comparisons with biochemical observations and with other RNA and protein data sets.",2018-10-31 +22067452,"WormBase 2012: more genomes, more data, new website.","Since its release in 2000, WormBase (http://www.wormbase.org) has grown from a small resource focusing on a single species and serving a dedicated research community, to one now spanning 15 species essential to the broader biomedical and agricultural research fields. To enhance the rate of curation, we have automated the identification of key data in the scientific literature and use similar methodology for data extraction. To ease access to the data, we are collaborating with journals to link entities in research publications to their report pages at WormBase. To facilitate discovery, we have added new views of the data, integrated large-scale datasets and expanded descriptions of models for human disease. Finally, we have introduced a dramatic overhaul of the WormBase website for public beta testing. Designed to balance complexity and usability, the new site is species-agnostic, highly customizable, and interactive. Casual users and developers alike will be able to leverage the public RESTful application programming interface (API) to generate custom data mining solutions and extensions to the site. We report on the growth of our database and on our work in keeping pace with the growing demand for data, efforts to anticipate the requirements of users and new collaborations with the larger science community.",2011-11-08 +30832735,Ultrasound Elastography supplement assessing nodal status of magnetic resonance imaging staged cervical N0 patients with nasopharyngeal carcinoma.,"

Background

To determine whether ultrasound elastography can distinguish reactive or metastatic small lymph nodes (sLN) of magnetic resonance imaging (MRI) staged cervical N0 patients with nasopharyngeal carcinoma (NPC).

Methods

A pilot study was performed involving the diagnostic performances of conventional high-frequency ultrasound (CHFU) and/or shear wave elastography (SWE) for predicting metastases in sLN of MRI-staged N0 NPC patients with reference to the histologically-proven ultrasound guided core needle biopsy (US-CNB). The diagnosis of CHFU was based on the superficial lymph node ultrasonic criteria with the five-point-scale (FPS). The mean (Emean), minimum (Emin) and maximum (Emax) of the elasticity indices were measured by SWE at the stiffest part of the sLN in kilopascal. Diagnostic performances were analyzed using a receiver operating curve (ROC) on a per-node basis. The authenticity of this article has been validated by uploading the key raw data onto the Research Data Deposit public platform ( http://www.researchdata.org.cn ), with the approval RDD number as RDDA2017000447.

Results

All 113 cervical sLN of 49 MRI-staged cervical N0 NPC patients underwent evaluation of CHFU and SWE; 38 sLN (FPS < 2) were regarded as benign, which were excluded from subsequent analysis due to none biopsy-proven. And 75 indeterminate sLN (FPS ≥ 2) were referred to US-CNB and revealed 15 (20%) metastases. All SWE elastic indices were significantly higher in malignant sLNs than in benign sLNs (p < 0.05). Moreover, Emax exhibited the highest diagnostic value (AUC:0.733 ± 0.067, p = 0.005) with excellent measurement reproducibility (ICC: 0.786; 95%CI: 0.684, 0.864). CHFU plus SWE was superior to CHFU or SWE alone for predicting metastases in sLN of MRI-staged N0 patients with NPC (p < 0.001).

Conclusions

CHFU plus SWE is an optional non-invasive modality to supplement MRI in assessing cervical nodal status of patients with NPC.",2019-03-04 +29462247,A new approach for interpreting Random Forest models and its application to the biology of ageing.,"Motivation:This work uses the Random Forest (RF) classification algorithm to predict if a gene is over-expressed, under-expressed or has no change in expression with age in the brain. RFs have high predictive power, and RF models can be interpreted using a feature (variable) importance measure. However, current feature importance measures evaluate a feature as a whole (all feature values). We show that, for a popular type of biological data (Gene Ontology-based), usually only one value of a feature is particularly important for classification and the interpretation of the RF model. Hence, we propose a new algorithm for identifying the most important and most informative feature values in an RF model. Results:The new feature importance measure identified highly relevant Gene Ontology terms for the aforementioned gene classification task, producing a feature ranking that is much more informative to biologists than an alternative, state-of-the-art feature importance measure. Availability and implementation:The dataset and source codes used in this paper are available as 'Supplementary Material' and the description of the data can be found at: https://fabiofabris.github.io/bioinfo2018/web/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +31586768,Identification of miRNAs and associated pathways regulated by Leukemia Inhibitory Factor in trophoblastic cell lines.,"

Introduction

Leukemia Inhibitory Factor (LIF) regulates behavior of trophoblast cells and their interaction with immune and endothelial cells. In vitro, trophoblast cell response to LIF may vary depending on the cell model. Reported differences in the miRNA profile of trophoblastic cells may be responsible for these observations. Therefore, miRNA expression was investigated in four trophoblastic cell lines under LIF stimulation followed by in silico analysis of altered miRNAs and their associated pathways.

Methods

Low density TaqMan miRNA assays were used to quantify levels of 762 mature miRNAs under LIF stimulation in three choriocarcinoma-derived (JEG-3, ACH-3P and AC1-M59) and a trophoblast immortalized (HTR-8/SVneo) cell lines. Expression of selected miRNAs was confirmed in primary trophoblast cells and cell lines by qPCR. Targets and associated pathways of the differentially expressed miRNAs were inferred from the miRTarBase followed by a KEGG Pathway Enrichment Analysis. HTR-8/SVneo and JEG-3 cells were transfected with miR-21-mimics and expression of miR-21 targets was assessed by qPCR.

Results

A similar number of miRNAs changed in each tested cell line upon LIF stimulation, however, low coincidence of individual miRNA species was observed and occurred more often among choriocarcinoma-derived cells (complete data set at http://www.ncbi.nlm.nih.gov/geo/ under GEO accession number GSE130489). Altered miRNAs were categorized into pathways involved in human diseases, cellular processes and signal transduction. Six cascades were identified as significantly enriched, including JAK/STAT and TGFB-SMAD. Upregulation of miR-21-3p was validated in all cell lines and primary cells and STAT3 was confirmed as its target.

Discussion

Dissimilar miRNA responses may be involved in differences of LIF effects on trophoblastic cell lines.",2019-09-12 +28782718,Merging in-silico and in vitro salivary protein complex partners using the STRING database: A tutorial.,"Protein-protein interaction is a common physiological mechanism for protection and actions of proteins in an organism. The identification and characterization of protein-protein interactions in different organisms is necessary to better understand their physiology and to determine their efficacy. In a previous in vitro study using mass spectrometry, we identified 43 proteins that interact with histatin 1. Six previously documented interactors were confirmed and 37 novel partners were identified. In this tutorial, we aimed to demonstrate the usefulness of the STRING database for studying protein-protein interactions. We used an in-silico approach along with the STRING database (http://string-db.org/) and successfully performed a fast simulation of a novel constructed histatin 1 protein-protein network, including both the previously known and the predicted interactors, along with our newly identified interactors. Our study highlights the advantages and importance of applying bioinformatics tools to merge in-silico tactics with experimental in vitro findings for rapid advancement of our knowledge about protein-protein interactions. Our findings also indicate that bioinformatics tools such as the STRING protein network database can help predict potential interactions between proteins and thus serve as a guide for future steps in our exploration of the Human Interactome.

Significance

Our study highlights the usefulness of the STRING protein database for studying protein-protein interactions. The STRING database can collect and integrate data about known and predicted protein-protein associations from many organisms, including both direct (physical) and indirect (functional) interactions, in an easy-to-use interface.",2017-08-03 +30622655,Systematic survey of plant LTR-retrotransposons elucidates phylogenetic relationships of their polyprotein domains and provides a reference for element classification.,"

Background

Plant LTR-retrotransposons are classified into two superfamilies, Ty1/copia and Ty3/gypsy. They are further divided into an enormous number of families which are, due to the high diversity of their nucleotide sequences, usually specific to a single or a group of closely related species. Previous attempts to group these families into broader categories reflecting their phylogenetic relationships were limited either to analyzing a narrow range of plant species or to analyzing a small numbers of elements. Furthermore, there is no reference database that allows for similarity based classification of LTR-retrotransposons.

Results

We have assembled a database of retrotransposon encoded polyprotein domains sequences extracted from 5410 Ty1/copia elements and 8453 Ty3/gypsy elements sampled from 80 species representing major groups of green plants (Viridiplantae). Phylogenetic analysis of the three most conserved polyprotein domains (RT, RH and INT) led to dividing Ty1/copia and Ty3/gypsy retrotransposons into 16 and 14 lineages respectively. We also characterized various features of LTR-retrotransposon sequences including additional polyprotein domains, extra open reading frames and primer binding sites, and found that the occurrence and/or type of these features correlates with phylogenies inferred from the three protein domains.

Conclusions

We have established an improved classification system applicable to LTR-retrotransposons from a wide range of plant species. This system reflects phylogenetic relationships as well as distinct sequence and structural features of the elements. A comprehensive database of retrotransposon protein domains (REXdb) that reflects this classification provides a reference for efficient and unified annotation of LTR-retrotransposons in plant genomes. Access to REXdb related tools is implemented in the RepeatExplorer web server (https://repeatexplorer-elixir.cerit-sc.cz/) or using a standalone version of REXdb that can be downloaded seaparately from RepeatExplorer web page (http://repeatexplorer.org/).",2019-01-03 +28265493,Transcriptome analysis of Polygonum minus reveals candidate genes involved in important secondary metabolic pathways of phenylpropanoids and flavonoids.,"

Background

Polygonum minus is an herbal plant in the Polygonaceae family which is rich in ethnomedicinal plants. The chemical composition and characteristic pungent fragrance of Polygonum minus have been extensively studied due to its culinary and medicinal properties. There are only a few transcriptome sequences available for species from this important family of medicinal plants. The limited genetic information from the public expressed sequences tag (EST) library hinders further study on molecular mechanisms underlying secondary metabolite production.

Methods

In this study, we performed a hybrid assembly of 454 and Illumina sequencing reads from Polygonum minus root and leaf tissues, respectively, to generate a combined transcriptome library as a reference.

Results

A total of 34.37 million filtered and normalized reads were assembled into 188,735 transcripts with a total length of 136.67 Mbp. We performed a similarity search against all the publicly available genome sequences and found similarity matches for 163,200 (86.5%) of Polygonum minus transcripts, largely from Arabidopsis thaliana (58.9%). Transcript abundance in the leaf and root tissues were estimated and validated through RT-qPCR of seven selected transcripts involved in the biosynthesis of phenylpropanoids and flavonoids. All the transcripts were annotated against KEGG pathways to profile transcripts related to the biosynthesis of secondary metabolites.

Discussion

This comprehensive transcriptome profile will serve as a useful sequence resource for molecular genetics and evolutionary research on secondary metabolite biosynthesis in Polygonaceae family. Transcriptome assembly of Polygonum minus can be accessed at http://prims.researchfrontier.org/index.php/dataset/transcriptome.",2017-02-28 +25392421,"MiCroKiTS 4.0: a database of midbody, centrosome, kinetochore, telomere and spindle.","We reported an updated database of MiCroKiTS 4.0 (http://microkit.biocuckoo.org) for proteins temporally and spatially localized in distinct subcellular positions including midbody, centrosome, kinetochore, telomere and mitotic spindle during cell division/mitosis. The database was updated from our previously developed database of MiCroKit 3.0, which contained 1489 proteins mostly forming super-complexes at midbody, centrosome and kinetochore from seven eukaryotes. Since the telomere and spindle apparatus are critical for cell division, the proteins localized at the two positions were also integrated. From the scientific literature, we curated 1872 experimentally identified proteins which at least locate in one of the five positions from eight species. Then the ortholog detection was performed to identify potential MiCroKiTS proteins from 144 eukaryotic organisms, which contains 66, 45 and 33 species of animals, fungi and plants, respectively. In total, 87,983 unique proteins with corresponding localization information were integrated into the database. The primary references of experimentally identified localizations were provided and the fluorescence microscope figures for the localizations of human proteins were shown. The orthologous relations between predicted and experimental localizations were also present. Taken together, we anticipate the database can serve as a useful resource for further analyzing the molecular mechanisms during cell division.",2014-11-11 +29547986,NMRNet: a deep learning approach to automated peak picking of protein NMR spectra.,"Motivation:Automated selection of signals in protein NMR spectra, known as peak picking, has been studied for over 20 years, nevertheless existing peak picking methods are still largely deficient. Accurate and precise automated peak picking would accelerate the structure calculation, and analysis of dynamics and interactions of macromolecules. Recent advancement in handling big data, together with an outburst of machine learning techniques, offer an opportunity to tackle the peak picking problem substantially faster than manual picking and on par with human accuracy. In particular, deep learning has proven to systematically achieve human-level performance in various recognition tasks, and thus emerges as an ideal tool to address automated identification of NMR signals. Results:We have applied a convolutional neural network for visual analysis of multidimensional NMR spectra. A comprehensive test on 31 manually annotated spectra has demonstrated top-tier average precision (AP) of 0.9596, 0.9058 and 0.8271 for backbone, side-chain and NOESY spectra, respectively. Furthermore, a combination of extracted peak lists with automated assignment routine, FLYA, outperformed other methods, including the manual one, and led to correct resonance assignment at the levels of 90.40%, 89.90% and 90.20% for three benchmark proteins. Availability and implementation:The proposed model is a part of a Dumpling software (platform for protein NMR data analysis), and is available at https://dumpling.bio/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +23585830,MitoLSDB: a comprehensive resource to study genotype to phenotype correlations in human mitochondrial DNA variations.,"Human mitochondrial DNA (mtDNA) encodes a set of 37 genes which are essential structural and functional components of the electron transport chain. Variations in these genes have been implicated in a broad spectrum of diseases and are extensively reported in literature and various databases. In this study, we describe MitoLSDB, an integrated platform to catalogue disease association studies on mtDNA (http://mitolsdb.igib.res.in). The main goal of MitoLSDB is to provide a central platform for direct submissions of novel variants that can be curated by the Mitochondrial Research Community. MitoLSDB provides access to standardized and annotated data from literature and databases encompassing information from 5231 individuals, 675 populations and 27 phenotypes. This platform is developed using the Leiden Open (source) Variation Database (LOVD) software. MitoLSDB houses information on all 37 genes in each population amounting to 132397 variants, 5147 unique variants. For each variant its genomic location as per the Revised Cambridge Reference Sequence, codon and amino acid change for variations in protein-coding regions, frequency, disease/phenotype, population, reference and remarks are also listed. MitoLSDB curators have also reported errors documented in literature which includes 94 phantom mutations, 10 NUMTs, six documentation errors and one artefactual recombination. MitoLSDB is the largest repository of mtDNA variants systematically standardized and presented using the LOVD platform. We believe that this is a good starting resource to curate mtDNA variants and will facilitate direct submissions enhancing data coverage, annotation in context of pathogenesis and quality control by ensuring non-redundancy in reporting novel disease associated variants.",2013-04-09 +,Genomic-associated Markers and comparative Genome Maps of Xanthomonas oryzae pv. oryzae and X. oryzae pv. oryzicola,"Xanthomonas oryzae pv. oryzae (Xoo) and X. oryzae pv. oryzicola (Xoc) cause two major seed quarantine diseases in rice, bacterial blight and bacterial leaf streak, respectively. Xoo and Xoc share high similarity in genomic sequence, which results in hard differentiation of the two pathogens. Genomic-associated Markers and comparative Genome Maps database (GMGM) is an integrated database providing comprehensive information including compared genome maps and full genomic-coverage molecular makers of Xoo and Xoc. This database was established based on bioinformatic analysis of complete sequenced genomes of several X. oryzae pathovars of which the similarity of the genomes was up to 91.39 %. The program was designed with a series of specific PCR primers, including 286 pairs of Xoo dominant markers, 288 pairs of Xoc dominant markers, and 288 pairs of Xoo and Xoc co-dominant markers, which were predicted to distinguish two pathovars. Test on a total of 40 donor pathogen strains using randomly selected 120 pairs of primers demonstrated that over 52.5 % of the primers were efficacious. The GMGM web portal (http://biodb.sdau.edu.cn/gmgm/) will be a powerful tool that can present highly specific diagnostic markers, and it also provides information about comparative genome maps of the two pathogens for future evolution study.",2015-09-01 +27515999,PoplarGene: poplar gene network and resource for mining functional information for genes from woody plants.,"Poplar is not only an important resource for the production of paper, timber and other wood-based products, but it has also emerged as an ideal model system for studying woody plants. To better understand the biological processes underlying various traits in poplar, e.g., wood development, a comprehensive functional gene interaction network is highly needed. Here, we constructed a genome-wide functional gene network for poplar (covering ~70% of the 41,335 poplar genes) and created the network web service PoplarGene, offering comprehensive functional interactions and extensive poplar gene functional annotations. PoplarGene incorporates two network-based gene prioritization algorithms, neighborhood-based prioritization and context-based prioritization, which can be used to perform gene prioritization in a complementary manner. Furthermore, the co-functional information in PoplarGene can be applied to other woody plant proteomes with high efficiency via orthology transfer. In addition to poplar gene sequences, the webserver also accepts Arabidopsis reference gene as input to guide the search for novel candidate functional genes in PoplarGene. We believe that PoplarGene (http://bioinformatics.caf.ac.cn/PoplarGene and http://124.127.201.25/PoplarGene) will greatly benefit the research community, facilitating studies of poplar and other woody plants.",2016-08-12 +28460117,Fast Genome-Wide Functional Annotation through Orthology Assignment by eggNOG-Mapper.,"Orthology assignment is ideally suited for functional inference. However, because predicting orthology is computationally intensive at large scale, and most pipelines are relatively inaccessible (e.g., new assignments only available through database updates), less precise homology-based functional transfer is still the default for (meta-)genome annotation. We, therefore, developed eggNOG-mapper, a tool for functional annotation of large sets of sequences based on fast orthology assignments using precomputed clusters and phylogenies from the eggNOG database. To validate our method, we benchmarked Gene Ontology (GO) predictions against two widely used homology-based approaches: BLAST and InterProScan. Orthology filters applied to BLAST results reduced the rate of false positive assignments by 11%, and increased the ratio of experimentally validated terms recovered over all terms assigned per protein by 15%. Compared with InterProScan, eggNOG-mapper achieved similar proteome coverage and precision while predicting, on average, 41 more terms per protein and increasing the rate of experimentally validated terms recovered over total term assignments per protein by 35%. EggNOG-mapper predictions scored within the top-5 methods in the three GO categories using the CAFA2 NK-partial benchmark. Finally, we evaluated eggNOG-mapper for functional annotation of metagenomics data, yielding better performance than interProScan. eggNOG-mapper runs ∼15× faster than BLAST and at least 2.5× faster than InterProScan. The tool is available standalone and as an online service at http://eggnog-mapper.embl.de.",2017-08-01 +31512371,Phyllosticta citricarpa and sister species of global importance to Citrus.,"Several Phyllosticta species are known as pathogens of Citrus spp., and are responsible for various disease symptoms including leaf and fruit spots. One of the most important species is P. citricarpa, which causes a foliar and fruit disease called citrus black spot. The Phyllosticta species occurring on citrus can most effectively be distinguished from P. citricarpa by means of multilocus DNA sequence data. Recent studies also demonstrated P. citricarpa to be heterothallic, and reported successful mating in the laboratory. Since the domestication of citrus, different clones of P. citricarpa have escaped Asia to other continents via trade routes, with obvious disease management consequences. This pathogen profile represents a comprehensive literature review of this pathogen and allied taxa associated with citrus, focusing on identification, distribution, genomics, epidemiology and disease management. This review also considers the knowledge emerging from seven genomes of Phyllosticta spp., demonstrating unknown aspects of these species, including their mating behaviour.

Taxonomy

Phyllosticta citricarpa (McAlpine) Aa, 1973. Kingdom Fungi, Phylum Ascomycota, Class Dothideomycetes, Order Botryosphaeriales, Family Phyllostictaceae, Genus Phyllosticta, Species citricarpa.

Host range

Confirmed on more than 12 Citrus species, Phyllosticta citricarpa has only been found on plant species in the Rutaceae.

Disease symptoms

P. citricarpa causes diverse symptoms such as hard spot, virulent spot, false melanose and freckle spot on fruit, and necrotic lesions on leaves and twigs.

Useful websites

DOE Joint Genome Institute MycoCosm portals for the Phyllosticta capitalensis (https://genome.jgi.doe.gov/Phycap1), P. citriasiana (https://genome.jgi.doe.gov/Phycit1), P. citribraziliensis (https://genome.jgi.doe.gov/Phcit1), P. citrichinaensis (https://genome.jgi.doe.gov/Phcitr1), P. citricarpa (https://genome.jgi.doe.gov/Phycitr1, https://genome.jgi.doe.gov/Phycpc1), P. paracitricarpa (https://genome.jgi.doe.gov/Phy27169) genomes. All available Phyllosticta genomes on MycoCosm can be viewed at https://genome.jgi.doe.gov/Phyllosticta.",2019-09-11 +28964253,RiceMetaSys for salt and drought stress responsive genes in rice: a web interface for crop improvement.,"

Background

Genome-wide microarray has enabled development of robust databases for functional genomics studies in rice. However, such databases do not directly cater to the needs of breeders. Here, we have attempted to develop a web interface which combines the information from functional genomic studies across different genetic backgrounds with DNA markers so that they can be readily deployed in crop improvement. In the current version of the database, we have included drought and salinity stress studies since these two are the major abiotic stresses in rice.

Results

RiceMetaSys, a user-friendly and freely available web interface provides comprehensive information on salt responsive genes (SRGs) and drought responsive genes (DRGs) across genotypes, crop development stages and tissues, identified from multiple microarray datasets. 'Physical position search' is an attractive tool for those using QTL based approach for dissecting tolerance to salt and drought stress since it can provide the list of SRGs and DRGs in any physical interval. To identify robust candidate genes for use in crop improvement, the 'common genes across varieties' search tool is useful. Graphical visualization of expression profiles across genes and rice genotypes has been enabled to facilitate the user and to make the comparisons more impactful. Simple Sequence Repeat (SSR) search in the SRGs and DRGs is a valuable tool for fine mapping and marker assisted selection since it provides primers for survey of polymorphism. An external link to intron specific markers is also provided for this purpose. Bulk retrieval of data without any limit has been enabled in case of locus and SSR search.

Conclusions

The aim of this database is to facilitate users with a simple and straight-forward search options for identification of robust candidate genes from among thousands of SRGs and DRGs so as to facilitate linking variation in expression profiles to variation in phenotype. Database URL: http://14.139.229.201.",2017-09-30 +27936097,PeTMbase: A Database of Plant Endogenous Target Mimics (eTMs).,"MicroRNAs (miRNA) are small endogenous RNA molecules, which regulate target gene expression at post-transcriptional level. Besides, miRNA activity can be controlled by a newly discovered regulatory mechanism called endogenous target mimicry (eTM). In target mimicry, eTMs bind to the corresponding miRNAs to block the binding of specific transcript leading to increase mRNA expression. Thus, miRNA-eTM-target-mRNA regulation modules involving a wide range of biological processes; an increasing need for a comprehensive eTM database arose. Except miRSponge with limited number of Arabidopsis eTM data no available database and/or repository was developed and released for plant eTMs yet. Here, we present an online plant eTM database, called PeTMbase (http://petmbase.org), with a highly efficient search tool. To establish the repository a number of identified eTMs was obtained utilizing from high-throughput RNA-sequencing data of 11 plant species. Each transcriptome libraries is first mapped to corresponding plant genome, then long non-coding RNA (lncRNA) transcripts are characterized. Furthermore, additional lncRNAs retrieved from GREENC and PNRD were incorporated into the lncRNA catalog. Then, utilizing the lncRNA and miRNA sources a total of 2,728 eTMs were successfully predicted. Our regularly updated database, PeTMbase, provides high quality information regarding miRNA:eTM modules and will aid functional genomics studies particularly, on miRNA regulatory networks.",2016-12-09 +28013273,CATchUP: A Web Database for Spatiotemporally Regulated Genes.,"For proper control of biological activity, some key genes are highly expressed in a particular spatiotemporal domain. Mining of such spatiotemporally expressed genes using large-scale gene expression data derived from a broad range of experimental sources facilitates our understanding of genome-scale functional gene networks. However, comprehensive information on spatiotemporally expressed genes is lacking in plants. To collect such information, we devised a new index, Δdmax, which is the maximum difference in relative gene expression levels between sample runs which are neighboring when sorted by the levels. Employing this index, we comprehensively evaluated transcripts using large-scale RNA sequencing (RNA-Seq) data stored in the Sequence Read Archive for eight plant species: Arabidopsis thaliana (Arabidopsis), Solanum lycopersicum (tomato), Solanum tuberosum (potato), Oryza sativa (rice), Sorghum bicolor (sorghum), Vitis vinifera (grape), Medicago truncatula (Medicago), and Glycine max (soybean). Based on the frequency distribution of the Δdmax values, approximately 70,000 transcripts showing 0.3 or larger Δdmax values were extracted for the eight species. Information on these genes including the Δdmax values, functional annotations, conservation among species, and experimental conditions where the genes show high expression levels is provided in a new database, CATchUP (http://plantomics.mind.meiji.ac.jp/CATchUP). The CATchUP database assists in identifying genes specifically expressed under particular conditions with powerful search functions and an intuitive graphical user interface.",2017-01-01 +29692469,2016 Guidelines of the American Society of Mammalogists for the use of wild mammals in research and education.,"Guidelines for use of wild mammal species in research are updated from Sikes et al. (2011) . These guidelines cover current professional techniques and regulations involving the use of mammals in research and teaching; they also incorporate new resources, procedural summaries, and reporting requirements. Included are details on capturing, marking, housing, and humanely killing wild mammals. It is recommended that Institutional Animal Care and Use Committees (IACUCs), regulatory agencies, and investigators use these guidelines as a resource for protocols involving wild mammals, whether studied in the field or in captivity. These guidelines were prepared and approved by the American Society of Mammalogists (ASM), in consultation with professional veterinarians experienced in wildlife research and IACUCs, whose collective expertise provides a broad and comprehensive understanding of the biology of nondomesticated mammals. The current version of these guidelines and any subsequent modifications are available online on the Animal Care and Use Committee page of the ASM website ( http://mammalogy.org/uploads/committee_files/CurrentGuidelines.pdf ). Additional resources pertaining to the use of wild animals in research are available at: http://www.mammalsociety.org/committees/animal-care-and-use#tab3 .",2016-05-28 +29220466,Improving taxonomic accuracy for fungi in public sequence databases: applying 'one name one species' in well-defined genera with Trichoderma/Hypocrea as a test case. ,"The ITS (nuclear ribosomal internal transcribed spacer) RefSeq database at the National Center for Biotechnology Information (NCBI) is dedicated to the clear association between name, specimen and sequence data. This database is focused on sequences obtained from type material stored in public collections. While the initial ITS sequence curation effort together with numerous fungal taxonomy experts attempted to cover as many orders as possible, we extended our latest focus to the family and genus ranks. We focused on Trichoderma for several reasons, mainly because the asexual and sexual synonyms were well documented, and a list of proposed names and type material were recently proposed and published. In this case study the recent taxonomic information was applied to do a complete taxonomic audit for the genus Trichoderma in the NCBI Taxonomy database. A name status report is available here: https://www.ncbi.nlm.nih.gov/Taxonomy/TaxIdentifier/tax_identifier.cgi. As a result, the ITS RefSeq Targeted Loci database at NCBI has been augmented with more sequences from type and verified material from Trichoderma species. Additionally, to aid in the cross referencing of data from single loci and genomes we have collected a list of quality records of the RPB2 gene obtained from type material in GenBank that could help validate future submissions. During the process of curation misidentified genomes were discovered, and sequence records from type material were found hidden under previous classifications. Source metadata curation, although more cumbersome, proved to be useful as confirmation of the type material designation. Database URL:http://www.ncbi.nlm.nih.gov/bioproject/PRJNA177353",2017-01-01 +29218357,Response to letter to the editor from Dr Rahman Shiri: The challenging topic of suicide across occupational groups.,"We thank Dr Rahman Shiri (1) for his careful reading of our systematic review and meta-analysis on suicide among agricultural, forestry, and fishery workers (2). Our paper had the objective of providing a pooled effect size of suicide for this occupational group. Suicide is a crucial issue in public and occupational health. Suicide has a multifactorial etiology and recent systematic reviews and meta-analyses have pointed out the role of occupational exposures, mainly psychosocial work stressors, as risk factors for suicide (3, 4). Suicide is a very rare event in the general population and still more seldom in the working population. Indeed, unemployed and economically inactive people have a higher risk of suicide compared to employed people (5, 6). However, the total number of suicides is greater in the employed population than among the economically inactive or unemployed (6). Shiri's letter (1) questioned several aspects of our review and meta-analysis. One comment related to the single reference database used in our review and a suggestion that our review could not be considered to be systematic. The review was based on Medline because our main interest was in quantitative epidemiologic studies. This is the largest database for biomedical literature and we would argue the most pertinent. Furthermore, we checked the reference lists of the most recent papers and literature reviews, and Shiri did not report any paper that was missing. No review, whether searching one or more databases, can expect to be totally exhaustive. There may always be missing studies, especially if we consider grey literature. Thus we assert that our review was systematic, while acknowledging that it may not be perfectly comprehensive. Shiri suggested an absence of quality assessment of the studies included in our meta-analysis. First, quality was considered in the context of our comments in the discussion section. Second, as suggested by Rothman et al (7), quality assessment was replaced by regression analyses of the effect of each quality item (study characteristics, ie, study design, effect measure, reference group, and adjustment). Third, because most studies included in this review were based on objective data (census, administrative, or register data), they were free of many of the sources of bias that exist in studies where information on exposure and outcome must be collected from participants. Consequently, many of the items related to quality were not pertinent, such as response and follow-up rates, coverage and representativeness of the sample, selection, etc. Contrary to what Shiri suggested, all study designs can be informative in this topic because all of them are able to provide an unbiased estimate of the effect size. In addition, the prospective and case-control studies may have shortcomings. For example, we excluded five studies including three prospective and case-control studies in the sensibility analysis because the group of interest was defined on the basis of the exposure to chemicals (pesticides) rather than job title. Our choice to retain the least adjusted models was justified because aggregated data were used for the meta-analysis. Therefore, unless all included studies adjusted for the same covariates measured in the same way, adjusted estimates cannot be meaningfully provided in an aggregate data meta-analysis. In addition, as the objective was above all descriptive and not etiological or explanatory, and as it is the norm in the exploration of social inequalities in health (8), the results from the least (gender- and age-) adjusted models were in line with the objective. Indeed, including more adjustment variables could lead to overadjustment as they may be intermediate variables on the causal pathways between occupation and suicide. Our strategy was in line with previous meta-analyses on similar topics (9-11). Consequently, we would argue that our results are not likely to be largely due to confounding, contrary to the comment by Shiri. Indeed, the study of the contribution of underlying factors in explaining social inequalities in health outcomes is a fully-fledged topic of research (12-15), but this is relevant research to conduct after demonstrating that inequalities exist between social or occupational groups. Several of Shiri's comments were about statistical aspects of our analyses. First, it was suggested that we did not correctly extract the confidence intervals for the estimates of several studies. We disagree. We used the STATA metan suite of commands using log-transformed effect sizes and standard errors. Our figure 1 and the values of effect sizes and confidence intervals were provided by STATA, this explains why there may be small differences in these values compared with the results published in some studies. Using log-transformed effect sizes and confidence intervals, the analysis provided the same results. Second, our subgroup comparison was based on subsamples that were independent. As not all studies provided information for these subgroups, each subgroup was treated as a unit of analysis. This strategy allows the use of all relevant subgroups and comparisons between them (16). Third, we were also criticized for the use of random-effects models. Random-effects models are generally more plausible for meta-analysis based on studies from the published literature, because the fixed-effect model assumed that the entire corpus of literature has been obtained, ie, that every study has been or ever will be written on the topic has been included, which is an implausible assumption. We also assumed differences in effect size between studies and between subgroups, and the use of random-effects models was consistent with such an assumption. However, random-effects models produce wider confidence intervals compared to fixed-effect models (16). These models are thus more conservative, making our results all the more robust. One of Shiri's comments related to the reference group used in the studies for the comparison of agricultural, forestry, and fishery workers. Although we reported that the studies using a specific occupational group as reference group provided a higher effect size than the studies using other reference groups, we did not explicitly recognize and state in the paper that the results for Japan were based on two studies using a specific occupational group as reference; we concede that this may explain why we found a much more elevated effect size for Japan. Shiri's results (1) allow to conclude that the difference between Japan and the other geographic areas could be explained by the choice of reference group-we are grateful to him for raising this point. However, we would note that the effect size of suicide was still elevated and significant for agricultural, forestry, and fishery workers even after this change in the reference group for these two studies. Nevertheless, the choice of the optimal reference group is not obvious. If we consider the general population as the reference group, as unemployed people and economically inactive people (including people who may not be working due to illness or disability) are part of it and have a higher risk of suicide than employed people, the effect size provided by the nine studies using the general population as the reference is likely to be underestimated, which may contribute to an underestimation of the observed effect size of suicide among agricultural, forestry, and fishery workers in our study. The comparison was made in our paper with the other occupational groups (ie, the working population except the group of interest) as the reference, which was used by nine other studies, but this did not allow to determine the exact rank of the group of interest in the occupational hierarchy. Another relevant choice would have been to retain the group with the lowest suicide risk (for example, the high-skilled occupational group) as the reference, which would have led to a much higher effect size of suicide for agricultural, forestry, and fishery workers. To conclude, as statistical power in detecting differences between subgroups may be low in subgroup analyses and meta-regression, the absence of significant results according to subgroups found in our results cannot be interpreted as evidence that the effect size is the same across subgroups. Consequently, our meta-analysis reporting a significant excess of risk of suicide among agricultural, forestry, and fishery workers may also be a good incentive for more research among this group of workers to (i) confirm this observed excess of risk using differing methodological approaches to meta-analysis and (ii) explore the potential differences within this group and the underlying factors that may explain this excess of risk. References 1. Shiri R. Suicide among agricultural, forestry, and fishery workers. Scand J Work Environ Health - online first. https://doi.org/10.5271/sjweh.3697 2. Klingelschmidt J, Milner A, Khireddine-Medouni I, Witt K, Alexopoulos EC, Toivanen S, et al. Suicide among agricultural, forestry, and fishery workers: a systematic literature review and meta-analysis. Scand J Work Environ Health - online first. https://doi.org/10.5271/sjweh.3682 3. Milner A, Witt K, LaMontagne AD, Niedhammer I. Psychosocial job stressors and suicidality: a meta-analysis and systematic review. Occup Environ Med - online first. https://doi.org/10.1136/oemed-2017-104531 4. Leach LS, Poyser C, Butterworth P. Workplace bullying and the association with suicidal ideation/thoughts and behaviour: a systematic review. Occup Environ Med. 2017;74(1):72-9. https://doi.org/10.1136/oemed-2016-103726 5. Milner A, Page A, LaMontagne AD. Long-term unemployment and suicide: a systematic review and meta-analysis. PLoS One. 2013;8(1):e51333. https://doi.org/10.1371/journal.pone.0051333 6. Milner A, Morrell S, Lamontagne AD. Economically inactive, unemployed and employed suicides in Australia by age and sex over a 10-year period: what was the impact of the 2007 economic recession? Int J Epidemiol. 2014;43(5):1500-7. https://doi.org/10.1093/ije/dyu148 7. Rothman KJ, Greenland S, Lash TL. Modern Epidemiology - Third Edition. Philadelphia: Wolters Kluwer Health - Lippincott Williams & Wilkins; 2008. 8. Lundberg I, Hemmingsson T, Hogstedt C. Work and social inequalities in health in Europe. Brussels: P.I.E. Peter Lang SA; 2007. 9. Milner A, Spittal MJ, Pirkis J, Lamontagne AD. Suicide by occupation: systematic review and meta-analysis. Br J Psychiatry. 2013;203(6):409-16. https://doi.org/10.1192/bjp.bp.113.128405 10. Lorant V, Deliege D, Eaton W, Robert A, Philippot P, Ansseau M. Socioeconomic inequalities in depression: a meta-analysis. Am J Epidemiol. 2003;157(2):98-112. https://doi.org/10.1093/aje/kwf182 11. Grittner U, Kuntsche S, Gmel G, Bloomfield K. Alcohol consumption and social inequality at the individual and country levels--results from an international study. Eur J Public Health. 2013;23(2):332-9. https://doi.org/10.1093/eurpub/cks044 12. Niedhammer I, Bourgkard E, Chau N. Occupational and behavioural factors in the explanation of social inequalities in premature and total mortality: a 12.5-year follow-up in the Lorhandicap study. Eur J Epidemiol. 2011;26(1):1-12. https://doi.org/10.1007/s10654-010-9506-9 13. Niedhammer I, Chastang JF, David S, Kelleher C. The contribution of occupational factors to social inequalities in health: findings from the national French SUMER survey. Soc Sci Med. 2008;67(11):1870-81. https://doi.org/10.1016/j.socscimed.2008.09.007 14. Chazelle E, Lemogne C, Morgan K, Kelleher CC, Chastang JF, Niedhammer I. Explanations of educational differences in major depression and generalised anxiety disorder in the Irish population. J Affect Disord. 2011;134(1-3):304-14. https://doi.org/10.1016/j.jad.2011.05.049 15. Niedhammer I, Lesuffleur T, Coutrot T, Chastang JF. Contribution of working conditions to occupational inequalities in depressive symptoms: results from the national French SUMER survey. Int Arch Occup Environ Health. 2016;89(6):1025-37.https://doi.org/10.1007/s00420-016-1142-6 16. Borenstein M, Hedges LV, Higgins JPT, Rothstein HR. Introduction to meta-analysis: John Wiley & Sons, Ltd. ISBN: 978-0-470-05724-7; 2009. https://doi.org/10.1002/9780470743386.",2017-12-08 +28124611,ImmunemiR - A Database of Prioritized Immune miRNA Disease Associations and its Interactome.,"

Background

MicroRNAs are the key regulators of gene expression and their abnormal expression in the immune system may be associated with several human diseases such as inflammation, cancer and autoimmune diseases. Elucidation of miRNA disease association through the interactome will deepen the understanding of its disease mechanisms. A specialized database for immune miRNAs is highly desirable to demonstrate the immune miRNA disease associations in the interactome.

Methods

miRNAs specific to immune related diseases were retrieved from curated databases such as HMDD, miR2disease and PubMed literature based on MeSH classification of immune system diseases. The additional data such as miRNA target genes, genes coding protein-protein interaction information were compiled from related resources. Further, miRNAs were prioritized to specific immune diseases using random walk ranking algorithm.

Results

In total 245 immune miRNAs associated with 92 OMIM disease categories were identified from external databases. The resultant data were compiled as ImmunemiR, a database of prioritized immune miRNA disease associations. This database provides both text based annotation information and network visualization of its interactome.

Conclusion

To our knowledge, ImmunemiR is the first available database to provide a comprehensive repository of human immune disease associated miRNAs with network visualization options of its target genes, protein-protein interactions (PPI) and its disease associations. It is freely available at http://www.biominingbu.org/immunemir/.",2017-01-01 +32038710,Quantifying Gene Essentiality Based on the Context of Cellular Components.,"Different genes have their protein products localized in various subcellular compartments. The diversity in protein localization may serve as a gene characteristic, revealing gene essentiality from a subcellular perspective. To measure this diversity, we introduced a Subcellular Diversity Index (SDI) based on the Gene Ontology-Cellular Component Ontology (GO-CCO) and a semantic similarity measure of GO terms. Analyses revealed that SDI of human genes was well correlated with some known measures of gene essentiality, including protein-protein interaction (PPI) network topology measurements, dN/dS ratio, homologous gene number, expression level and tissue specificity. In addition, SDI had a good performance in predicting human essential genes (AUC = 0.702) and drug target genes (AUC = 0.704), and drug targets with higher SDI scores tended to cause more side-effects. The results suggest that SDI could be used to identify novel drug targets and to guide the filtering of drug targets with fewer potential side effects. Finally, we developed a user-friendly online database for querying SDI score for genes across eight species, and the predicted probabilities of human drug target based on SDI. The online database of SDI is available at: http://www.cuilab.cn/sdi.",2019-01-01 +31798767,Genomic alterations of Tenascin C in highly aggressive prostate cancer: a meta-analysis.,"Tenascin C (TNC), an extra-cellular matrix (ECM) family gene, is expressed in several cancer tissues of breast, lung, colon, and gastrointestinal tract leading to proliferation, migration, invasion, angiogenesis and metastasis, but its role in tumorigenesis of prostate cancer is poorly understood. We took a meta-analysis approach to characterize the alterations of TNC gene in prostate cancer using publicly available databases (cBioportal Version 2.2.0, http://www.cBioportal.org/index.do). The analysis identified TNC alterations (gene amplification) significantly in the neuroendocrine prostate cancer dataset (Trento/Broad/Cornell, N = 114), which was further validated in other prostate cancer datasets, including The Cancer Genome Atlas (TCGA) prostate cancer (2015). In the TCGA prostate cancer dataset (N = 498), high TNC (alteration frequency, 36%) revealed a strong association with high diagnostic Gleason score. Genomic alterations of TNC was also significantly associated (P < 0.05) with expression level of genes from NOTCH, SOX and WNT family, implicating a link between TNC and poorly differentiated aggressive phenotype in NEPC. TCGA prostate adenocarcinoma cases with TNC alteration also demonstrated prominent decrease in disease-free survival (P = 0.0637). These findings indicate a possible association of TNC to the aggressive subtype of prostate cancer and warrant further functional studies to evident the involvement of TNC in prostate cancer progression.",2019-01-01 +30963491,Functional Analysis of Genetic Variants and Somatic Mutations Impacting MicroRNA-Target Recognition: Bioinformatics Resources.,"MicroRNAs are small noncoding RNA molecules with great importance in regulating a large number of diverse biological processes in health and disease. MicroRNAs can bind to both coding and noncoding RNAs and regulate their stability and expression. Genetic variants and somatic mutations may alter microRNA sequences and their target sites and therefore impact microRNA-target recognition. Aberrant microRNA-target interactions have been associated with many diseases. In recent years, computational resources have been developed for retrieving, annotating, and analyzing the impact of mutations on microRNA-target recognition. In this chapter, we provide an overview on the computational analysis of mutations impacting microRNA target recognition, followed by a detailed tutorial on how to use three major Web-based bioinformatics resources: PolymiRTS ( http://compbio.uthsc.edu/miRSNP ), a database of genetic variants impacting microRNA target recognition; SomamiR ( http://compbio.uthsc.edu/SomamiR ), a database of somatic mutations affecting the interactions between microRNAs and their targets in mRNAs and noncoding RNAs; and miR2GO ( http://compbio.uthsc.edu/miR2GO ), a computational tool for knowledge-based functional analysis of genetic variants and somatic mutations in microRNA seed regions.",2019-01-01 +30963486,Sfold Tools for MicroRNA Target Prediction.,"Computational prediction of miRNA binding sites on target mRNAs facilitates experimental investigation of miRNA functions. In this chapter, we describe STarMir and STarMirDB, two application modules of the Sfold RNA package. STarMir is a Web server for performing miRNA binding site predictions for mRNA and target sequences submitted by users. STarMirDB is a database of precomputed transcriptome-scale predictions. Both STarMir and STarMirDB provide comprehensive sequence, thermodynamic, and target structure features, a logistic probability as a measure of confidence for each predicted site, and a publication-quality diagram of the predicted miRNA-target hybrid. In addition, STarMir now offers a new quantitative score to address combined regulatory effects of multiple seed and seedless sites. This score provides a quantitative measure of the overall regulatory effects of both seed and seedless sites on the target. STarMir and STarMirDB are freely available to all through the Sfold Web application server at http://sfold.wadsworth.org .",2019-01-01 +30820574,MSGP: the first database of the protein components of the mammalian stress granules. ,"In response to different stress stimuli, cells transiently form stress granules (SGs) in order to protect themselves and re-establish homeostasis. Besides these important cellular functions, SGs are now being implicated in different human diseases, such as neurodegenerative disorders and cancer. SGs are ribonucleoprotein granules, constituted by a variety of different types of proteins, RNAs, factors involved in translation and signaling molecules, being capable of regulating mRNA translation to facilitate stress response. However, until now a complete list of the SG components has not been available. Therefore, we aimer at identifying and linting in an open access database all the proteins described so far as components of SGs. The identification was made through an exhaustive search of studies listed in PubMed and double checked. Moreover, for each identified protein several details were also gathered from public databases, such as the molecular function, the cell types in which they were detected, the type of stress stimuli used to induce SG formation and the reference of the study describing the recruitment of the component to SGs. Expression levels in the context of different neurodegenerative diseases were also obtained and are also described in the database. The Mammalian Stress Granules Proteome is available at https://msgp.pt/, being a new and unique open access online database, the first to list all the protein components of the SGs identified so far. The database constitutes an important and valuable tool for researchers in this research area of growing interest.",2019-01-01 +30321422,RetroRules: a database of reaction rules for engineering biology.,"RetroRules is a database of reaction rules for metabolic engineering (https://retrorules.org). Reaction rules are generic descriptions of chemical reactions that can be used in retrosynthesis workflows in order to enumerate all possible biosynthetic routes connecting a target molecule to its precursors. The use of such rules is becoming increasingly important in the context of synthetic biology applied to de novo pathway discovery and in systems biology to discover underground metabolism due to enzyme promiscuity. Here, we provide for the first time a complete set containing >400 000 stereochemistry-aware reaction rules extracted from public databases and expressed in the community-standard SMARTS (SMIRKS) format, augmented by a rule representation at different levels of specificity (the atomic environment around the reaction center). Such numerous representations of reactions expand natural chemical diversity by predicting de novo reactions of promiscuous enzymes.",2019-01-01 +30202989,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients With Thoracolumbar Spine Trauma: Radiological Evaluation.,"

Background

Radiological evaluation of traumatic thoracolumbar fractures is used to classify the injury and determine the optimal treatment plan. Currently, there remains a lack of consensus regarding appropriate radiological protocol. Most clinicians use a combination of plain radiographs, 3-dimensional computed tomography with reconstructions, and magnetic resonance imaging (MRI).

Objective

To determine, through evidence-based guidelines review: (1) whether the use of MRI to identify ligamentous integrity predicted the need for surgical intervention; and (2) if there are any radiological findings that can assist in predicting clinical outcomes.

Methods

A systematic review of the literature was performed using the National Library of Medicine/PubMed database and the Cochrane Library for studies relevant to thoracolumbar trauma. Clinical studies specifically addressing the radiological evaluation of thoracolumbar spine trauma were selected for review.

Results

Two of 2278 studies met inclusion criteria for review. One retrospective review (Level III) and 1 prospective cohort (Level III) provided evidence that the addition of an MRI scan in acute thoracic and thoracolumbar trauma can predict the need for surgical intervention. There was insufficient evidence that MRI can help predict clinical outcomes in patients with acute traumatic thoracic and thoracolumbar spine injuries.

Conclusion

This evidence-based guideline provides a Grade B recommendation that radiological findings in patients with acute thoracic or thoracolumbar spine trauma can predict the need for surgical intervention. This evidence-based guideline provides a grade insufficient recommendation that there is insufficient evidence to determine if radiographic findings can assist in predicting clinical outcomes in patients with acute thoracic and thoracolumbar spine injuries.

Question 1

Are there radiographic findings in patients with traumatic thoracolumbar fractures that can predict the need for surgical intervention?

Recommendation 1

Because MRI has been shown to influence the management of up to 25% of patients with thoracolumbar fractures, providers may use MRI to assess posterior ligamentous complex integrity, when determining the need for surgery. Strength of Recommendation: Grade B.

Question 2

Are there radiographic findings in patients with traumatic thoracolumbar fractures that can assist in predicting clinical outcomes?

Recommendation 2

Due to a paucity of published studies, there is insufficient evidence that radiographic findings can be used as predictors of clinical outcomes in thoracolumbar fractures. Strength of Recommendation: Grade Insufficient The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_3.",2019-01-01 +31256127,Quantitative Brain Amyloid Measures Predict Time-to-Progression from Amnestic Mild Cognitive Impairment to Alzheimer's Disease.,"BACKGROUND:This study was designed to investigate factors that predict progression from amnestic mild cognitive impairment (aMCI) to probable Alzheimer's disease (AD). OBJECTIVE:We studied the usefulness of quantitative assessment of amyloid burden measured by Florbetapir PET scan. METHODS:The study cohort consisted of aMCI participants older than 65 and those with available Florbetapir PET scan at diagnosis from the ADNI database (http://adni.loni.usc.edu). To assess the prognostic impact of amyloid burden, a staging system based on the global SUVr of the PET scan was applied. We defined the stages as: stage I, negative amyloid scan; stage II, positive amyloid in 1st tertile; stage III, positive amyloid in 2nd tertile; and stage IV, positive amyloid in 3rd tertile. RESULTS:Of 250 eligible aMCI subjects (age 74.1±5.4, female n = 105), 71 (28.4%) were diagnosed with probable AD within 3 years. Higher amyloid stages showed faster cognitive decline by Kaplan-Meier analysis. In multivariate Cox analysis, with stage I as a reference, the hazard ratio (HR) increased as the stage increased: stage II (HR, 4.509; p = 0.015), stage III (HR, 7.616; p = 0.001), and stage IV (HR, 9.421; p < 0.001). Along with amyloid stage, ApoE ɛ4 (HR, 1.943; p = 0.031), score of CDR-SB (HR, 1.845; p < 0.001) and ADAS 11 (HR, 1.144; p < 0.001), and hippocampal volume (HR, 0.002; p = 0.005) were also identified as predictors of dementia progression in aMCI subjects. CONCLUSIONS:Large amyloid burden measured from amyloid PET scan could be a predictor of faster cognitive decline in aMCI patients.",2019-01-01 +31020562,AnABlast: Re-searching for Protein-Coding Sequences in Genomic Regions.,"AnABlast is a computational tool that highlights protein-coding regions within intergenic and intronic DNA sequences which escape detection by standard gene prediction algorithms. DNA sequences with small protein-coding genes or exons, complex intron-containing genes, or degenerated DNA fragments are efficiently targeted by AnABlast. Furthermore, this algorithm is particularly useful in detecting protein-coding sequences with nonsignificant homologs to sequences in databases. AnABlast can be executed online at http://www.bioinfocabd.upo.es/anablast/ .",2019-01-01 +30945200,A Walkthrough to the Use of GreeNC: The Plant lncRNA Database.,"Experimentally validated plant lncRNAs have been shown to regulate important agronomic traits such as phosphate starvation response, flowering time, and interaction with symbiotic organisms, making them of great interest in plant biology and in breeding. We developed a pipeline to annotate lncRNAs and applied it to 37 plant species and 6 algae, resulting in the annotation of more than 120,000 lncRNAs. To facilitate the study of lncRNAs for the plant research community, the information gathered is organized in the Green Non-Coding Database (GreeNC, http://greenc.sciencedesigners.com/) . This chapter contains a detailed explanation of the content of GreeNC and how to access both programmatically and with a web browser.",2019-01-01 +30445555,The SUPERFAMILY 2.0 database: a significant proteome update and a new webserver.,"Here, we present a major update to the SUPERFAMILY database and the webserver. We describe the addition of new SUPERFAMILY 2.0 profile HMM library containing a total of 27 623 HMMs. The database now includes Superfamily domain annotations for millions of protein sequences taken from the Universal Protein Recourse Knowledgebase (UniProtKB) and the National Center for Biotechnology Information (NCBI). This addition constitutes about 51 and 45 million distinct protein sequences obtained from UniProtKB and NCBI respectively. Currently, the database contains annotations for 63 244 and 102 151 complete genomes taken from UniProtKB and NCBI respectively. The current sequence collection and genome update is the biggest so far in the history of SUPERFAMILY updates. In order to the deal with the massive wealth of information, here we introduce a new SUPERFAMILY 2.0 webserver (http://supfam.org). Currently, the webserver mainly focuses on the search, retrieval and display of Superfamily annotation for the entire sequence and genome collection in the database.",2019-01-01 +30407591,"IID 2018 update: context-specific physical protein-protein interactions in human, model organisms and domesticated species.","Knowing the set of physical protein-protein interactions (PPIs) that occur in a particular context-a tissue, disease, or other condition-can provide valuable insights into key research questions. However, while the number of identified human PPIs is expanding rapidly, context information remains limited, and for most non-human species context-specific networks are completely unavailable. The Integrated Interactions Database (IID) provides one of the most comprehensive sets of context-specific human PPI networks, including networks for 133 tissues, 91 disease conditions, and many other contexts. Importantly, it also provides context-specific networks for 17 non-human species including model organisms and domesticated animals. These species are vitally important for drug discovery and agriculture. IID integrates interactions from multiple databases and datasets. It comprises over 4.8 million PPIs annotated with several types of context: tissues, subcellular localizations, diseases, and druggability information (the latter three are new annotations not available in the previous version). This update increases the number of species from 6 to 18, the number of PPIs from ∼1.5 million to ∼4.8 million, and the number of tissues from 30 to 133. IID also now supports topology and enrichment analyses of returned networks. IID is available at http://ophid.utoronto.ca/iid.",2019-01-01 +30380071,DSMNC: a database of somatic mutations in normal cells.,"Numerous non-inherited somatic mutations, distinct from those of germ-line origin, occur in somatic cells during DNA replication per cell-division. The somatic mutations, recording the unique genetic cell-lineage 'history' of each proliferating normal cell, are important but remain to be investigated because of their ultra-low frequency hidden in the genetic background of heterogeneous cells. Luckily, the recent development of single-cell genomics biotechnologies enables the screening and collection of the somatic mutations, especial single nucleotide variations (SNVs), occurring in normal cells. Here, we established DSMNC: a database of somatic mutations in normal cells (http://dsmnc.big.ac.cn/), which provides most comprehensive catalogue of somatic SNVs in single cells from various normal tissues. In the current version, the database collected ∼0.8 million SNVs accumulated in ∼600 single normal cells (579 human cells and 39 mouse cells). The database interface supports the user-friendly capability of browsing and searching the SNVs and their annotation information. DSMNC, which serves as a timely and valuable collection of somatic mutations in individual normal cells, has made it possible to analyze the burdens and signatures of somatic mutations in various types of heterogeneous normal cells. Therefore, DSMNC will significantly improve our understanding of the characteristics of somatic mutations in normal cells.",2019-01-01 +30364956,HMDD v3.0: a database for experimentally supported human microRNA-disease associations.,"Comprehensive databases of microRNA-disease associations are continuously demanded in biomedical researches. The recently launched version 3.0 of Human MicroRNA Disease Database (HMDD v3.0) manually collects a significant number of miRNA-disease association entries from literature. Comparing to HMDD v2.0, this new version contains 2-fold more entries. Besides, the associations have been more accurately classified based on literature-derived evidence code, which results in six generalized categories (genetics, epigenetics, target, circulation, tissue and other) covering 20 types of detailed evidence code. Furthermore, we added new functionalities like network visualization on the web interface. To exemplify the utility of the database, we compared the disease spectrum width of miRNAs (DSW) and the miRNA spectrum width of human diseases (MSW) between version 3.0 and 2.0 of HMDD. HMDD is freely accessible at http://www.cuilab.cn/hmdd. With accumulating evidence of miRNA-disease associations, HMDD database will keep on growing in the future.",2019-01-01 +30202990,ChIPprimersDB: a public repository of verified qPCR primers for chromatin immunoprecipitation (ChIP).,"Chromatin immunoprecipitation (ChIP) has ushered in a new era of scientific discovery by allowing new insights into DNA-protein interactions. ChIP is used to quantify enriched genomic regions using qPCR, and more recently is combined with next generation sequencing (ChIP-seq) to obtain a genome wide profile of protein binding sites. Nevertheless, ChIP-qPCR remains an integral component of this technology for quality control purposes, before the library preparation and sequencing steps. In addition, ChIP-qPCR remains more time- and cost-effective for many focused projects in which the DNA regions of interest are already known. However, the DNA oligonucleotide primers needed for ChIP-qPCR are more challenging to design than for other qPCR projects. Here, we present the first public repository for ChIP oligonucleotides that have been verified to perform well in ChIP-qPCR experiments. ChIPprimersDB was developed by manual screening of publications to ensure primer quality and provide additional specific information on the ChIP experiments where the primers have been used. In addition to the primer sequences, the database includes information about the antibody, cells and tissues used in the experiment, information on the experimental design, and a direct link to the original publication. The database is linked at https://umiamihealth.org/bascom-palmer-eye-institute/research/clinical-and-laboratory-research/ocular-oncology-laboratory/chip-primers and hosted at https://www.chipprimers.com/.",2019-01-01 +30202985,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients with Thoracolumbar Spine Trauma: Executive Summary.,"

Background

The thoracic and lumbar (""thoracolumbar"") spine are the most commonly injured region of the spine in blunt trauma. Trauma of the thoracolumbar spine is frequently associated with spinal cord injury and other visceral and bony injuries. Prolonged pain and disability after thoracolumbar trauma present a significant burden on patients and society.

Objective

To formulate evidence-based clinical practice recommendations for the care of patients with injuries to the thoracolumbar spine.

Methods

A systematic review of the literature was performed using the National Library of Medicine PubMed database and the Cochrane Library for studies relevant to thoracolumbar spinal injuries based on specific clinically oriented questions. Relevant publications were selected for review.

Results

For all of the questions posed, the literature search yielded a total of 6561 abstracts. The task force selected 804 articles for full text review, and 78 were selected for inclusion in this overall systematic review.

Conclusion

The available evidence for the evaluation and treatment of patients with thoracolumbar spine injuries demonstrates considerable heterogeneity and highly variable degrees of quality. However, the workgroup was able to formulate a number of key recommendations to guide clinical practice. Further research is needed to counter the relative paucity of evidence that specifically pertains to patients with only thoracolumbar spine injuries. The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_1.",2019-01-01 +31582619,[Systematic Review of Case Reports on the Adverse Events Due to Health Food Intake by Cancer Patients].,"Cancer patients use health foods (HFs) as complementary and alternative medicine, although the details of their adverse events (AEs) are unclear. We searched three databases [PubMed, ""Igaku Chuo Zasshi"", and Information System on Safety and Effectiveness for Health Foods website (https://hfnet.nibiohn.go.jp/)] for case reports on AEs related to HF intake in cancer patients published before October 2018. Of the matched reports, 76 studies and 92 patients (31 in Japan, 61 overseas) that met the selection criteria were included in this review. Thus, the severity of AEs and outcomes were not related to either the concomitant use of HF with cancer chemotherapy or cancer stages of patients. AEs caused by HF intake itself accounted for 87%, while drug-HF interaction accounted for 11%. According to the Common Terminology Criteria for Adverse Events (CTCAE) classification, 70% of patients whose grades were identified had severe cases (grades 3 to 5). In Japanese patients, hepatic and respiratory disorders accounted for 52% of the severe cases. Cases were predominantly developed as a result of an allergic mechanism, and mushroom products were mostly used. Overseas, serious cases were induced by products that were already indicated for safety problems. Moreover, notable AEs were recognized, such as hypercalcemia, which were caused by intake of HF containing calcium, vitamin D, and shark cartilage, and bacterial infection caused by probiotic products. Analyzing the details of AEs related to HF intake can help health professionals and cancer patients prevent health hazards.",2019-01-01 +30893420,AYbRAH: a curated ortholog database for yeasts and fungi spanning 600 million years of evolution. ,"Budding yeasts inhabit a range of environments by exploiting various metabolic traits. The genetic bases for these traits are mostly unknown, preventing their addition or removal in a chassis organism for metabolic engineering. Insight into the evolution of orthologs, paralogs and xenologs in the yeast pan-genome can help bridge these genotypes; however, existing phylogenomic databases do not span diverse yeasts, and sometimes cannot distinguish between these homologs. To help understand the molecular evolution of these traits in yeasts, we created Analyzing Yeasts by Reconstructing Ancestry of Homologs (AYbRAH), an open-source database of predicted and manually curated ortholog groups for 33 diverse fungi and yeasts in Dikarya, spanning 600 million years of evolution. OrthoMCL and OrthoDB were used to cluster protein sequence into ortholog and homolog groups, respectively; MAFFT and PhyML reconstructed the phylogeny of all homolog groups. Ortholog assignments for enzymes and small metabolite transporters were compared to their phylogenetic reconstruction, and curated to resolve any discrepancies. Information on homolog and ortholog groups can be viewed in the AYbRAH web portal (https://lmse.github.io/aybrah/), including functional annotations, predictions for mitochondrial localization and transmembrane domains, literature references and phylogenetic reconstructions. Ortholog assignments in AYbRAH were compared to HOGENOM, KEGG Orthology, OMA, eggNOG and PANTHER. PANTHER and OMA had the most congruent ortholog groups with AYbRAH, while the other phylogenomic databases had greater amounts of under-clustering, over-clustering or no ortholog annotations for proteins. Future plans are discussed for AYbRAH, and recommendations are made for other research communities seeking to create curated ortholog databases.",2019-01-01 +30670912,Biomarker identification and trans-regulatory network analyses in esophageal adenocarcinoma and Barrett's esophagus.,"

Background

Esophageal adenocarcinoma (EAC) is an aggressive disease with high mortality and an overall 5-year survival rate of less than 20%. Barrett's esophagus (BE) is the only known precursor of EAC, and patients with BE have a persistent and excessive risk of EAC over time. Individuals with BE are up to 30-125 times more likely to develop EAC than the general population. Thus, early detection of EAC and BE could significantly improve the 5-year survival rate of EAC. Due to the limitations of endoscopic surveillance and the lack of clinical risk stratification strategies, molecular biomarkers should be considered and thoroughly investigated.

Aim

To explore the transcriptome changes in the progression from normal esophagus (NE) to BE and EAC.

Methods

Two datasets from the Gene Expression Omnibus (GEO) in NCBI Database (https://www.ncbi.nlm.nih.gov/geo/) were retrieved and used as a training and a test dataset separately, since NE, BE, and EAC samples were included and the sample sizes were adequate. This study identified differentially expressed genes (DEGs) using the R/Bioconductor project and constructed trans-regulatory networks based on the Transcriptional Regulatory Element Database and Cytoscape software. Enrichment of Kyoto Encyclopedia of Genes and Genomes (KEGG) and Gene Ontology (GO) terms was identified using the Database for Annotation, Visualization, and Integrated Discovery (DAVID) Bioinformatics Resources. The diagnostic potential of certain DEGs was assessed in both datasets.

Results

In the GSE1420 dataset, the number of up-regulated DEGs was larger than that of down-regulated DEGs when comparing EAC vs NE and BE vs NE. Among these DEGs, five differentially expressed transcription factors (DETFs) displayed the same trend in expression across all the comparison groups. Of these five DETFs, E2F3, FOXA2, and HOXB7 were up-regulated, while PAX9 and TFAP2C were down-regulated. Additionally, the majority of the DEGs in trans-regulatory networks were up-regulated. The intersection of these potential DEGs displayed the same direction of changes in expression when comparing the DEGs in the GSE26886 dataset to the DEGs in trans-regulatory networks above. The receiver operating characteristic curve analysis was performed for both datasets and found that TIMP1 and COL1A1 could discriminate EAC from NE tissue, while REG1A, MMP1, and CA2 could distinguish BE from NE tissue. DAVID annotation indicated that COL1A1 and MMP1 could be potent biomarkers for EAC and BE, respectively, since they participate in the majority of the enriched KEGG and GO terms that are important for inflammation and cancer.

Conclusion

After the construction and analyses of the trans-regulatory networks in EAC and BE, the results indicate that COL1A1 and MMP1 could be potential biomarkers for EAC and BE, respectively.",2019-01-01 +30500919,SCOPe: classification of large macromolecular structures in the structural classification of proteins-extended database.,"The SCOPe (Structural Classification of Proteins-extended, https://scop.berkeley.edu) database hierarchically classifies domains from the majority of proteins of known structure according to their structural and evolutionary relationships. SCOPe also incorporates and updates the ASTRAL compendium, which provides multiple databases and tools to aid in the analysis of the sequences and structures of proteins classified in SCOPe. Protein structures are classified using a combination of manual curation and highly precise automated methods. In the current release of SCOPe, 2.07, we have focused our manual curation efforts on larger protein structures, including the spliceosome, proteasome and RNA polymerase I, as well as many other Pfam families that had not previously been classified. Domains from these large protein complexes are distinctive in several ways: novel non-globular folds are more common, and domains from previously observed protein families often have N- or C-terminal extensions that were disordered or not present in previous structures. The current monthly release update, SCOPe 2.07-2018-10-18, classifies 90 992 PDB entries (about two thirds of PDB entries).",2019-01-01 +30476229,ENPD - A Database of Eukaryotic Nucleic Acid Binding Proteins: Linking Gene Regulations to Proteins.,"Eukaryotic nucleic acid binding protein database (ENPD, http://qinlab.sls.cuhk.edu.hk/ENPD/) is a library of nucleic acid binding proteins (NBPs) and their functional information. NBPs such as DNA binding proteins (DBPs), RNA binding proteins (RBPs), and DNA and RNA binding proteins (DRBPs) are involved in every stage of gene regulation through their interactions with DNA and RNA. Due to the importance of NBPs, the database was constructed based on manual curation and a newly developed pipeline utilizing both sequenced transcriptomes and genomes. In total the database has recorded 2.8 million of NBPs and their binding motifs from 662 NBP families and 2423 species, constituting the largest NBP database. ENPD covers evolutionarily important lineages which have never been included in the previous NBP databases, while lineage-specific NBP family expansions were also found. ENPD also focuses on the involvements of DBPs, RBPs and DRBPs in non-coding RNA (ncRNA) mediated gene regulation. The predicted and experimentally validated targets of NBPs have both been recorded and manually curated in ENPD, linking the interactions between ncRNAs, DNA regulatory elements and NBPs in gene regulation. This database provides key resources for the scientific community, laying a solid foundation for future gene regulatory studies from both functional and evolutionary perspectives.",2019-01-01 +30407568,ICEberg 2.0: an updated database of bacterial integrative and conjugative elements.,"ICEberg 2.0 (http://db-mml.sjtu.edu.cn/ICEberg/) is an updated database that provides comprehensive information about bacterial integrative and conjugative elements (ICEs). Compared with the previous version, three major improvements were made. First, with the aid of text mining and manual curation, it now recorded the details of 1032 ICEs, including 270 with experimental supports and 762 from bioinformatics prediction. Second, as increasing evidence has shown that ICEs frequently mobilize the so-called 'hitchhikers', such as integrative and mobilizable elements (IMEs) and cis-mobilizable elements (CIMEs), 83 known transfer interactions between 49 IMEs and 7 CIMEs with 19 ICEs taken from the literature were included and illustrated with visually intuitive directed graphs. An expanded collection of 260 chromosome-borne IMEs and 235 CIMEs was also added. At last, ICEberg 2.0 provides an online tool ICEfinder to predict ICEs or IMEs in bacterial genome sequences. It combines a similarity search for the integrase, relaxase and/or type IV secretion system and the co-localization of these corresponding homologous genes. With the recent updates, ICEberg 2.0 might provide better support for understanding the biological traits of ICEs, especially as their interaction with cognate mobilizable elements may further promote horizontal gene flow.",2019-01-01 +30357342,CoevDB: a database of intramolecular coevolution among protein-coding genes of the bony vertebrates.,"The study of molecular coevolution, due to its potential to identify gene regions under functional or structural constraints, has recently been subject to numerous scientific inquiries. Particular efforts have been conducted to develop methods predicting the presence of coevolution in molecular sequences. Among these methods, a few aim to model the underlying evolutionary process of coevolution, which enable to differentiate the shared history of genes to coevolution and thus improve their accuracy. However, the usage of such methods remains sparse due to their expensive computational cost and the lack of resources alleviating this issue. Here we present CoevDB (http://phylodb.unil.ch/CoevDB), a database containing the result of a large-scale analysis of intramolecular coevolution of 8201 protein-coding genes of bony vertebrates. The web interface of CoevDB gives access to the results to 800 millions of statistical tests corresponding to all the pairs of sites analyzed. Several type of queries enable users to explore the database by either targeting specific genes or by discovering genes having promising estimations of coevolution.",2019-01-01 +30335169,PopHumanScan: the online catalog of human genome adaptation.,"Since the migrations that led humans to colonize Earth, our species has faced frequent adaptive challenges that have left signatures in the landscape of genetic variation and that we can identify in our today's genomes. Here, we (i) perform an outlier approach on eight different population genetic statistics for 22 non-admixed human populations of the Phase III of the 1000 Genomes Project to detect selective sweeps at different historical ages, as well as events of recurrent positive selection in the human lineage; and (ii) create PopHumanScan, an online catalog that compiles and annotates all candidate regions under selection to facilitate their validation and thoroughly analysis. Well-known examples of human genetic adaptation published elsewhere are included in the catalog, as well as hundreds of other attractive candidates that will require further investigation. Designed as a collaborative database, PopHumanScan aims to become a central repository to share information, guide future studies and help advance our understanding of how selection has modeled our genomes as a response to changes in the environment or lifestyle of human populations. PopHumanScan is open and freely available at https://pophumanscan.uab.cat.",2019-01-01 +30299485,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients With Thoracolumbar Spine Trauma: Novel Surgical Strategies.,"

Background

Treatment of thoracolumbar burst fractures has traditionally involved spinal instrumentation with fusion performed with standard open surgical techniques. Novel surgical strategies, including instrumentation without fusion and percutaneous instrumentation alone, have been considered less invasive and more efficient treatments.

Objective

To review the current literature and determine the role of fusion in instrumented fixation, as well as the role of percutaneous instrumentation, in the treatment of patients with thoracolumbar burst fractures.

Methods

The task force members identified search terms/parameters and a medical librarian implemented the literature search, consistent with the literature search protocol (see Appendix I), using the National Library of Medicine PubMed database and the Cochrane Library for the period from January 1, 1946 to March 31, 2015.

Results

A total of 906 articles were identified and 38 were selected for full-text review. Of these articles, 12 articles met criteria for inclusion in this systematic review.

Conclusion

There is grade A evidence for the omission of fusion in instrumented fixation for thoracolumbar burst fractures. There is grade B evidence that percutaneous instrumentation is as effective as open instrumentation for thoracolumbar burst fractures.

Question

Does the addition of arthrodesis to instrumented fixation improve outcomes in patients with thoracic and lumbar burst fractures?

Recommendation

It is recommended that in the surgical treatment of patients with thoracolumbar burst fractures, surgeons should understand that the addition of arthrodesis to instrumented stabilization has not been shown to impact clinical or radiological outcomes, and adds to increased blood loss and operative time. Strength of Recommendation: Grade A.

Question

How does the use of minimally invasive techniques (including percutaneous instrumentation) affect outcomes in patients undergoing surgery for thoracic and lumbar fractures compared to conventional open techniques?

Recommendation

Stabilization using both open and percutaneous pedicle screws may be considered in the treatment of thoracolumbar burst fractures as the evidence suggests equivalent clinical outcomes. Strength of Recommendation: Grade B The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_12.",2019-01-01 +30945202,Experimentally Validated Plant lncRNAs in EVLncRNAs Database.,"Plant long noncoding RNAs (lncRNAs) play important functional roles in various biological processes. Most databases deposit all plant lncRNA candidates produced by high-throughput experimental and/or computational techniques. There are several databases for experimentally validated lncRNAs. However, these databases are small in scale (with a few hundreds of lncRNAs only) and specific in their focuses (plants, diseases, or interactions). Thus, we established EVLncRNAs by curating lncRNAs validated by low-throughput experiments (up to May 1, 2016) and integrating specific databases (lncRNAdb, LncRANDisease, Lnc2Cancer, and PLNIncRBase) with additional functional and disease-specific information not covered previously. The current version of EVLncRNAs contains 1543 lncRNAs from 77 species, including 428 plant lncRNAs from 44 plant species. Compared to PLNIncRBase, our dataset does not contain any lncRNAs from microarray and deep sequencing. Moreover, 40% of entries contain new information (interaction and additional information from NCBI and Ensembl). The database allows users to browse, search, and download as well as to submit experimentally validated lncRNAs. The database is available at http://biophy.dzu.edu.cn/EVLncRNAs .",2019-01-01 +30626175,The 26th annual Nucleic Acids Research database issue and Molecular Biology Database Collection.,"The 2019 Nucleic Acids Research (NAR) Database Issue contains 168 papers spanning molecular biology. Among them, 64 are new and another 92 are updates describing resources that appeared in the Issue previously. The remaining 12 are updates on databases most recently published elsewhere. This Issue contains two Breakthrough articles, on the Virtual Metabolic Human (VMH) database which links human and gut microbiota metabolism with diet and disease, and Vibrism DB, a database of mouse brain anatomy and gene (co-)expression with sophisticated visualization and session sharing. Major returning nucleic acid databases include RNAcentral, miRBase and LncRNA2Target. Protein sequence databases include UniProtKB, InterPro and Pfam, while wwPDB and RCSB cover protein structure. STRING and KEGG update in the section on metabolism and pathways. Microbial genomes are covered by IMG/M and resources for human and model organism genomics include Ensembl, UCSC Genome Browser, GENCODE and Flybase. Genomic variation and disease are well-covered by GWAS Catalog, PopHumanScan, OMIM and COSMIC, CADD being another major newcomer. Major new proteomics resources reporting here include iProX and jPOSTdb. The entire database issue is freely available online on the NAR website (https://academic.oup.com/nar). The NAR online Molecular Biology Database Collection has been updated, reviewing 506 entries, adding 66 new resources and eliminating 147 discontinued URLs, bringing the current total to 1613 databases. It is available at http://www.oxfordjournals.org/nar/database/c.",2019-01-01 +30445427,"15 years of PhosphoSitePlus®: integrating post-translationally modified sites, disease variants and isoforms.","For 15 years the mission of PhosphoSitePlus® (PSP, https://www.phosphosite.org) has been to provide comprehensive information and tools for the study of mammalian post-translational modifications (PTMs). The number of unique PTMs in PSP is now more than 450 000 from over 22 000 articles and thousands of MS datasets. The most important areas of growth in PSP are in disease and isoform informatics. Germline mutations associated with inherited diseases and somatic cancer mutations have been added to the database and can now be viewed along with PTMs and associated quantitative information on novel 'lollipop' plots. These plots enable researchers to interactively visualize the overlap between disease variants and PTMs, and to identify mutations that may alter phenotypes by rewiring signaling networks. We are expanding the sequence space to include over 30 000 human and mouse isoforms to enable researchers to explore the important but understudied biology of isoforms. This represents a necessary expansion of sequence space to accommodate the growing precision and depth of coverage enabled by ongoing advances in mass spectrometry. Isoforms are aligned using a new algorithm. Exploring the worlds of PTMs and disease mutations in the entire isoform space will hopefully lead to new biomarkers, therapeutic targets, and insights into isoform biology.",2019-01-01 +30423142,miRBase: from microRNA sequences to function.,"miRBase catalogs, names and distributes microRNA gene sequences. The latest release of miRBase (v22) contains microRNA sequences from 271 organisms: 38 589 hairpin precursors and 48 860 mature microRNAs. We describe improvements to the database and website to provide more information about the quality of microRNA gene annotations, and the cellular functions of their products. We have collected 1493 small RNA deep sequencing datasets and mapped a total of 5.5 billion reads to microRNA sequences. The read mapping patterns provide strong support for the validity of between 20% and 65% of microRNA annotations in different well-studied animal genomes, and evidence for the removal of >200 sequences from the database. To improve the availability of microRNA functional information, we are disseminating Gene Ontology terms annotated against miRBase sequences. We have also used a text-mining approach to search for microRNA gene names in the full-text of open access articles. Over 500 000 sentences from 18 542 papers contain microRNA names. We score these sentences for functional information and link them with 12 519 microRNA entries. The sentences themselves, and word clouds built from them, provide effective summaries of the functional information about specific microRNAs. miRBase is publicly and freely available at http://mirbase.org/.",2019-01-01 +30380113,MethMotif: an integrative cell specific database of transcription factor binding motifs coupled with DNA methylation profiles.,"Several recent studies have portrayed DNA methylation as a new player in the recruitment of transcription factors (TF) within chromatin, highlighting a need to connect TF binding sites (TFBS) with their respective DNA methylation profiles. However, current TFBS databases are restricted to DNA binding motif sequences. Here, we present MethMotif, a two-dimensional TFBS database that records TFBS position weight matrices along with cell type specific CpG methylation information computed from a combination of ChIP-seq and whole genome bisulfite sequencing datasets. Integrating TFBS motifs with TFBS DNA methylation better portrays the features of DNA loci recognised by TFs. In particular, we found that DNA methylation patterns within TFBS can be cell specific (e.g. MAFF). Furthermore, for a given TF, different DNA methylation profiles are associated with different DNA binding motifs (e.g. REST). To date, MethMotif database records over 500 TFBSs computed from over 2000 ChIP-seq datasets in 11 different cell types. MethMotif portal is accessible through an open source web interface (https://bioinfo-csi.nus.edu.sg/methmotif) that allows users to intuitively explore the entire dataset and perform both single, and batch queries.",2019-01-01 +30371849,LNCipedia 5: towards a reference set of human long non-coding RNAs.,"While long non-coding RNA (lncRNA) research in the past has primarily focused on the discovery of novel genes, today it has shifted towards functional annotation of this large class of genes. With thousands of lncRNA studies published every year, the current challenge lies in keeping track of which lncRNAs are functionally described. This is further complicated by the fact that lncRNA nomenclature is not straightforward and lncRNA annotation is scattered across different resources with their own quality metrics and definition of a lncRNA. To overcome this issue, large scale curation and annotation is needed. Here, we present the fifth release of the human lncRNA database LNCipedia (https://lncipedia.org). The most notable improvements include manual literature curation of 2482 lncRNA articles and the use of official gene symbols when available. In addition, an improved filtering pipeline results in a higher quality reference lncRNA gene set.",2019-01-01 +30371819,MoonDB 2.0: an updated database of extreme multifunctional and moonlighting proteins.,"MoonDB 2.0 (http://moondb.hb.univ-amu.fr/) is a database of predicted and manually curated extreme multifunctional (EMF) and moonlighting proteins, i.e. proteins that perform multiple unrelated functions. We have previously shown that such proteins can be predicted through the analysis of their molecular interaction subnetworks, their functional annotations and their association to distinct groups of proteins that are involved in unrelated functions. In MoonDB 2.0, we updated the set of human EMF proteins (238 proteins), using the latest functional annotations and protein-protein interaction networks. Furthermore, for the first time, we applied our method to four additional model organisms - mouse, fly, worm and yeast - and identified 54 novel EMF proteins in these species. In addition to novel predictions, this update contains 63 human and yeast proteins that were manually curated from literature, including descriptions of moonlighting functions and associated references. Importantly, MoonDB's interface was fully redesigned and improved, and its entries are now cross-referenced in the UniProt Knowledgebase (UniProtKB). MoonDB will be updated once a year with the novel EMF candidates calculated from the latest available protein interactions and functional annotations.",2019-01-01 +30357356,CMAUP: a database of collective molecular activities of useful plants.,"The beneficial effects of functionally useful plants (e.g. medicinal and food plants) arise from the multi-target activities of multiple ingredients of these plants. The knowledge of the collective molecular activities of these plants facilitates mechanistic studies and expanded applications. A number of databases provide information about the effects and targets of various plants and ingredients. More comprehensive information is needed for broader classes of plants and for the landscapes of individual plant's multiple targets, collective activities and regulated biological pathways, processes and diseases. We therefore developed a new database, Collective Molecular Activities of Useful Plants (CMAUP), to provide the collective landscapes of multiple targets (ChEMBL target classes) and activity levels (in 2D target-ingredient heatmap), and regulated gene ontologies (GO categories), biological pathways (KEGG categories) and diseases (ICD blocks) for 5645 plants (2567 medicinal, 170 food, 1567 edible, 3 agricultural and 119 garden plants) collected from or traditionally used in 153 countries and regions. These landscapes were derived from 47 645 plant ingredients active against 646 targets in 234 KEGG pathways associated with 2473 gene ontologies and 656 diseases. CMAUP (http://bidd2.nus.edu.sg/CMAUP/) is freely accessible and searchable by keywords, plant usage classes, species families, targets, KEGG pathways, gene ontologies, diseases (ICD code) and geographical locations.",2019-01-01 +30285109,LncRNADisease 2.0: an updated database of long non-coding RNA-associated diseases.,"Mounting evidence suggested that dysfunction of long non-coding RNAs (lncRNAs) is involved in a wide variety of diseases. A knowledgebase with systematic collection and curation of lncRNA-disease associations is critically important for further examining their underlying molecular mechanisms. In 2013, we presented the first release of LncRNADisease, representing a database for collection of experimental supported lncRNA-disease associations. Here, we describe an update of the database. The new developments in LncRNADisease 2.0 include (i) an over 40-fold lncRNA-disease association enhancement compared with the previous version; (ii) providing the transcriptional regulatory relationships among lncRNA, mRNA and miRNA; (iii) providing a confidence score for each lncRNA-disease association; (iv) integrating experimentally supported circular RNA disease associations. LncRNADisease 2.0 documents more than 200 000 lncRNA-disease associations. We expect that this database will continue to serve as a valuable source for potential clinical application related to lncRNAs. LncRNADisease 2.0 is freely available at http://www.rnanut.net/lncrnadisease/.",2019-01-01 +30247677,AmtDB: a database of ancient human mitochondrial genomes.,"Ancient mitochondrial DNA is used for tracing human past demographic events due to its population-level variability. The number of published ancient mitochondrial genomes has increased in recent years, alongside with the development of high-throughput sequencing and capture enrichment methods. Here, we present AmtDB, the first database of ancient human mitochondrial genomes. Release version contains 1107 hand-curated ancient samples, freely accessible for download, together with the individual descriptors, including geographic location, radiocarbon dating, and archaeological culture affiliation. The database also features an interactive map for sample location visualization. AmtDB is a key platform for ancient population genetic studies and is available at https://amtdb.org.",2019-01-01 +31802128,RAACBook: a web server of reduced amino acid alphabet for sequence-dependent inference by using Chou's five-step rule. ,"By reducing amino acid alphabet, the protein complexity can be significantly simplified, which could improve computational efficiency, decrease information redundancy and reduce chance of overfitting. Although some reduced alphabets have been proposed, different classification rules could produce distinctive results for protein sequence analysis. Thus, it is urgent to construct a systematical frame for reduced alphabets. In this work, we constructed a comprehensive web server called RAACBook for protein sequence analysis and machine learning application by integrating reduction alphabets. The web server contains three parts: (i) 74 types of reduced amino acid alphabet were manually extracted to generate 673 reduced amino acid clusters (RAACs) for dealing with unique protein problems. It is easy for users to select desired RAACs from a multilayer browser tool. (ii) An online tool was developed to analyze primary sequence of protein. The tool could produce K-tuple reduced amino acid composition by defining three correlation parameters (K-tuple, g-gap, λ-correlation). The results are visualized as sequence alignment, mergence of RAA composition, feature distribution and logo of reduced sequence. (iii) The machine learning server is provided to train the model of protein classification based on K-tuple RAAC. The optimal model could be selected according to the evaluation indexes (ROC, AUC, MCC, etc.). In conclusion, RAACBook presents a powerful and user-friendly service in protein sequence analysis and computational proteomics. RAACBook can be freely available at http://bioinfor.imu.edu.cn/raacbook. Database URL: http://bioinfor.imu.edu.cn/raacbook.",2019-01-01 +31617938,Design and evolution of the Seafarer's Health Passport for supporting (tele)-medical assistance to seafarers.,"BACKGROUND:Seafarers undergo periodic medical examination for their employment. This information inmost cases is not effectively used when requesting for medical assistance during service on board ships.The medical history of an individual is important for provision of medical care and can be critical to theoutcome. There is growing adoption of digital applications and electronic health records that are addinggreat value to the care provided. The Seafarer's Health Passport (SHP) is an application specifically designedfor improving the quality of medical assistance provided to seafarers both through telemedicineor classic medical check-ups in ports/hospitals worldwide. The SHP provides a secure and unique way toarchive and retrieve the seafarer's medical history in an electronic support. MATERIALS AND METHODS:The SHP that we have developed is a product with specific hardware and softwarespecifications. The basic features of this software are Linux operating system Debian/Ubuntu, ApacheWeb server 2.x, Server database MySQL/Maria DB PHP programming language 5.6.xx, and secure connectionin https. RESULTS:The SHP represents a helpful hint to physicians providing medical advices to seafarers enablingthem to make more decisions that are informed and curtailing possible complications due to misdiagnosis. CONCLUSIONS:Provision of high quality medical assistance requires knowledge of patient's medical history.The availability of an easy access and friendly use system of own medical history is useful for a populationof travellers, such as seafarers to guarantee a reasonable level of medical care at any time.",2019-01-01 +30380087,SymMap: an integrative database of traditional Chinese medicine enhanced by symptom mapping.,"Recently, the pharmaceutical industry has heavily emphasized phenotypic drug discovery (PDD), which relies primarily on knowledge about phenotype changes associated with diseases. Traditional Chinese medicine (TCM) provides a massive amount of information on natural products and the clinical symptoms they are used to treat, which are the observable disease phenotypes that are crucial for clinical diagnosis and treatment. Curating knowledge of TCM symptoms and their relationships to herbs and diseases will provide both candidate leads and screening directions for evidence-based PDD programs. Therefore, we present SymMap, an integrative database of traditional Chinese medicine enhanced by symptom mapping. We manually curated 1717 TCM symptoms and related them to 499 herbs and 961 symptoms used in modern medicine based on a committee of 17 leading experts practicing TCM. Next, we collected 5235 diseases associated with these symptoms, 19 595 herbal constituents (ingredients) and 4302 target genes, and built a large heterogeneous network containing all of these components. Thus, SymMap integrates TCM with modern medicine in common aspects at both the phenotypic and molecular levels. Furthermore, we inferred all pairwise relationships among SymMap components using statistical tests to give pharmaceutical scientists the ability to rank and filter promising results to guide drug discovery. The SymMap database can be accessed at http://www.symmap.org/ and https://www.bioinfo.org/symmap.",2019-01-01 +30357370,liqDB: a small-RNAseq knowledge discovery database for liquid biopsy studies.,"MiRNAs are important regulators of gene expression and are frequently deregulated under pathologic conditions. They are highly stable in bodily fluids which makes them feasible candidates to become minimally invasive biomarkers. In fact, several studies already proposed circulating miRNA-based biomarkers for different types of neoplastic, cardiovascular and degenerative diseases. However, many of these studies rely on small RNA sequencing experiments that are based on different RNA extraction and processing protocols, rendering results incomparable. We generated liqDB, a database for liquid biopsy small RNA sequencing profiles that provides users with meaningful information to guide their small RNA liquid biopsy research and to overcome technical and conceptual problems. By means of a user-friendly web interface, miRNA expression profiles from 1607 manually annotated samples can be queried and explored at different levels. Result pages include downloadable expression matrices, differential expression analysis, most stably expressed miRNAs, cluster analysis and relevant visualizations by means of boxplots and heatmaps. We anticipate that liqDB will be a useful tool in liquid biopsy research as it provides a consistently annotated large compilation of experiments together with tools for reproducible analysis, comparison and hypothesis generation. LiqDB is available at http://bioinfo5.ugr.es/liqdb.",2019-01-01 +30215764,AWESOME: a database of SNPs that affect protein post-translational modifications.,"Protein post-translational modifications (PTMs), including phosphorylation, ubiquitination, methylation, acetylation, glycosylation et al, are very important biological processes. PTM changes in some critical genes, which may be induced by base-pair substitution, are shown to affect the risk of diseases. Recently, large-scale exome-wide association studies found that missense single nucleotide polymorphisms (SNPs) play an important role in the susceptibility for complex diseases or traits. One of the functional mechanisms of missense SNPs is that they may affect PTMs and leads to a protein dysfunction and its downstream signaling pathway disorder. Here, we constructed a database named AWESOME (A Website Exhibits SNP On Modification Event, http://www.awesome-hust.com), which is an interactive web-based analysis tool that systematically evaluates the role of SNPs on nearly all kinds of PTMs based on 20 available tools. We also provided a well-designed scoring system to compare the performance of different PTM prediction tools and help users to get a better interpretation of results. Users can search SNPs, genes or position of interest, filter with specific modifications or prediction methods, to get a comprehensive PTM change induced by SNPs. In summary, our database provides a convenient way to detect PTM-related SNPs, which may potentially be pathogenic factors or therapeutic targets.",2019-01-01 +30203096,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients With Thoracolumbar Spine Trauma: Nonoperative Care.,"

Background

Thoracic and lumbar burst fractures in neurologically intact patients are considered to be inherently stable, and responsive to nonsurgical management. There is a lack of consensus regarding the optimal conservative treatment modality. The question remains whether external bracing is necessary vs mobilization without a brace after these injuries.

Objective

To determine if the use of external bracing improves outcomes compared to no brace for neurologically intact patients with thoracic or lumbar burst fractures.

Methods

A systematic review of the literature was performed using the National Library of Medicine PubMed database and the Cochrane Library for studies relevant to thoracolumbar trauma. Clinical studies specifically comparing external bracing to no brace for neurologically intact patients with thoracic or lumbar burst fractures were selected for review.

Results

Three studies out of 1137 met inclusion criteria for review. One randomized controlled trial (level I) and an additional randomized controlled pilot study (level II) provided evidence that both external bracing and no brace equally improve pain and disability in neurologically intact patients with burst fractures. There was no difference in final clinical and radiographic outcomes between patients treated with an external brace vs no brace. One additional level IV retrospective study demonstrated equivalent clinical outcomes for external bracing vs no brace.

Conclusion

This evidence-based guideline provides a grade B recommendation that management either with or without an external brace is an option given equivalent improvement in outcomes for neurologically intact patients with thoracic and lumbar burst fractures. The decision to use an external brace is at the discretion of the treating physician, as bracing is not associated with increased adverse events compared to no brace.

Question

Does the use of external bracing improve outcomes in the nonoperative treatment of neurologically intact patients with thoracic and lumbar burst fractures?

Recommendation

The decision to use an external brace is at the discretion of the treating physician, as the nonoperative management of neurologically intact patients with thoracic and lumbar burst fractures either with or without an external brace produces equivalent improvement in outcomes. Bracing is not associated with increased adverse events compared to not bracing. Strength of Recommendation: Grade B The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_8.",2019-01-01 +30361181,"A web-based diagnostic reference centre for the European Reference Network ""EpiCare"": recommendations of the eNeuropathology working group.","Epilepsy surgery is a valuable treatment strategy for a selected group of patients with drug-resistant focal epilepsy. While reliable disease classification is essential for the optimal management of patients in general and crucial for the development of more personalized therapies in the future, arriving at a precise diagnosis often poses considerable difficulties due to the broad and variant-rich spectrum of epilepsy-associated brain lesions. Given the scarcity of European institutions diagnostically focusing on the histopathology of epilepsy surgery cases, the provision of subspecialty expertise as well as training opportunities remains logistically and financially challenging. To improve this situation, the European Reference Network's (ERN) epilepsy care program (EpiCare, http://epi-care.eu) has set out to develop a web-based microscopy referral and teaching framework. This paper reviews the aspects of digital microscopy, data storage, and image analysis technology relevant to the practice of neuropathology. Cognizant of the European data security requirements and regulations, we propose a collaborative, diagnostic network initiative (the eNeuropathology reference centre) and delineate a roadmap for its implementation favouring open-source, vendor-independent browser platforms.",2018-10-01 +31428122,PredPRBA: Prediction of Protein-RNA Binding Affinity Using Gradient Boosted Regression Trees.,"Protein-RNA interactions play essential roles in many biological aspects. Quantifying the binding affinity of protein-RNA complexes is helpful to the understanding of protein-RNA recognition mechanisms and identification of strong binding partners. Due to experimentally measured protein-RNA binding affinity data available is still limited to date, there is a pressing demand for accurate and reliable computational approaches. In this paper, we propose a computational approach, PredPRBA, which can effectively predict protein-RNA binding affinity using gradient boosted regression trees. We build a dataset of protein-RNA binding affinity that includes 103 protein-RNA complex structures manually collected from related literature. Then, we generate 37 kinds of sequence and structural features and explore the relationship between the features and protein-RNA binding affinity. We find that the binding affinity mainly depends on the structure of RNA molecules. According to the type of RNA associated with proteins composed of the protein-RNA complex, we split the 103 protein-RNA complexes into six categories. For each category, we build a gradient boosted regression tree (GBRT) model based on the generated features. We perform a comprehensive evaluation for the proposed method on the binding affinity dataset using leave-one-out cross-validation. We show that PredPRBA achieves correlations ranging from 0.723 to 0.897 among six categories, which is significantly better than other typical regression methods and the pioneer protein-RNA binding affinity predictor SPOT-Seq-RNA. In addition, a user-friendly web server has been developed to predict the binding affinity of protein-RNA complexes. The PredPRBA webserver is freely available at http://PredPRBA.denglab.org/.",2019-08-02 +30169674,ANNOgesic: a Swiss army knife for the RNA-seq based annotation of bacterial/archaeal genomes. ,"To understand the gene regulation of an organism of interest, a comprehensive genome annotation is essential. While some features, such as coding sequences, can be computationally predicted with high accuracy based purely on the genomic sequence, others, such as promoter elements or noncoding RNAs, are harder to detect. RNA sequencing (RNA-seq) has proven to be an efficient method to identify these genomic features and to improve genome annotations. However, processing and integrating RNA-seq data in order to generate high-resolution annotations is challenging, time consuming, and requires numerous steps. We have constructed a powerful and modular tool called ANNOgesic that provides the required analyses and simplifies RNA-seq-based bacterial and archaeal genome annotation. It can integrate data from conventional RNA-seq and differential RNA-seq and predicts and annotates numerous features, including small noncoding RNAs, with high precision. The software is available under an open source license (ISCL) at https://pypi.org/project/ANNOgesic/.",2018-09-01 +31439548,Meflin-Positive Cancer-Associated Fibroblasts Inhibit Pancreatic Carcinogenesis.,"Cancer-associated fibroblasts (CAF) constitute a major component of the tumor microenvironment. Recent observations in genetically engineered mouse models and clinical studies have suggested that there may exist at least two functionally different populations of CAFs, that is, cancer-promoting CAFs (pCAF) and cancer-restraining CAFs (rCAF). Although various pCAF markers have been identified, the identity of rCAFs remains unknown because of the lack of rCAF-specific marker(s). In this study, we found that Meflin, a glycosylphosphatidylinositol-anchored protein that is a marker of mesenchymal stromal/stem cells and maintains their undifferentiated state, is expressed by pancreatic stellate cells that are a source of CAFs in pancreatic ductal adenocarcinoma (PDAC). In situ hybridization analysis of 71 human PDAC tissues revealed that the infiltration of Meflin-positive CAFs correlated with favorable patient outcome. Consistent herewith, Meflin deficiency led to significant tumor progression with poorly differentiated histology in a PDAC mouse model. Similarly, genetic ablation of Meflin-positive CAFs resulted in poor differentiation of tumors in a syngeneic transplantation model. Conversely, delivery of a Meflin-expressing lentivirus into the tumor stroma or overexpression of Meflin in CAFs suppressed the growth of xenograft tumors. Lineage tracing revealed that Meflin-positive cells gave rise to α-smooth muscle actin-positive CAFs that are positive or negative for Meflin, suggesting a mechanism for generating CAF heterogeneity. Meflin deficiency or low expression resulted in straightened stromal collagen fibers, which represent a signature for aggressive tumors, in mouse or human PDAC tissues, respectively. Together, the data suggest that Meflin is a marker of rCAFs that suppress PDAC progression. SIGNIFICANCE: Meflin marks and functionally contributes to a subset of cancer-associated fibroblasts that exert antitumoral effects.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/79/20/5367/F1.large.jpg.",2019-08-22 +27379351,Willingness of Rhode Island Dentists to Provide Limited Preventive Primary Care.,"In response to the shortage of primary care physicians and the need for greater intercollaboration among health professionals, dentists with sufficient medical and surgical training are an untapped resource to provide limited preventive primary care (LPPC), such as chairside screening for chronic diseases. The objective of this study was to determine attitudes of Rhode Island dentists toward becoming more involved in the overall health of their patients. Using a 5-point scale (1 being highest), a pretested survey was administered to 92 respondent RI dentists who were asked to indicate their willingness to become more involved in patients' overall health, and undergo additional training to provide LPPC. Their moderate level of willingness was offset by great concern for liability, with older dentists being significantly more willing to assume these additional responsibilities than younger dentists (p< .05). Rank order of designation of oral health providers among dentist, dental physician, oral physician, odontologist, stomatologist, and stomiatrist was still dentist first, but with no significant difference between the mean ranks of dentist and oral physician.[Full article available at http://rimed.org/rimedicaljournal-2016-07.asp, free with no login].",2016-07-01 +30598096,Predicting protein-protein interactions using high-quality non-interacting pairs.,"

Background

Identifying protein-protein interactions (PPIs) is of paramount importance for understanding cellular processes. Machine learning-based approaches have been developed to predict PPIs, but the effectiveness of these approaches is unsatisfactory. One major reason is that they randomly choose non-interacting protein pairs (negative samples) or heuristically select non-interacting pairs with low quality.

Results

To boost the effectiveness of predicting PPIs, we propose two novel approaches (NIP-SS and NIP-RW) to generate high quality non-interacting pairs based on sequence similarity and random walk, respectively. Specifically, the known PPIs collected from public databases are used to generate the positive samples. NIP-SS then selects the top-m dissimilar protein pairs as negative examples and controls the degree distribution of selected proteins to construct the negative dataset. NIP-RW performs random walk on the PPI network to update the adjacency matrix of the network, and then selects protein pairs not connected in the updated network as negative samples. Next, we use auto covariance (AC) descriptor to encode the feature information of amino acid sequences. After that, we employ deep neural networks (DNNs) to predict PPIs based on extracted features, positive and negative examples. Extensive experiments show that NIP-SS and NIP-RW can generate negative samples with higher quality than existing strategies and thus enable more accurate prediction.

Conclusions

The experimental results prove that negative datasets constructed by NIP-SS and NIP-RW can reduce the bias and have good generalization ability. NIP-SS and NIP-RW can be used as a plugin to boost the effectiveness of PPIs prediction. Codes and datasets are available at http://mlda.swu.edu.cn/codes.php?name=NIP .",2018-12-31 +30598077,Constructing a database for the relations between CNV and human genetic diseases via systematic text mining.,"

Background

The detection and interpretation of CNVs are of clinical importance in genetic testing. Several databases and web services are already being used by clinical geneticists to interpret the medical relevance of identified CNVs in patients. However, geneticists or physicians would like to obtain the original literature context for more detailed information, especially for rare CNVs that were not included in databases.

Results

The resulting CNVdigest database includes 440,485 sentences for CNV-disease relationship. A total number of 1582 CNVs and 2425 diseases are involved. Sentences describing CNV-disease correlations are indexed in CNVdigest, with CNV mentions and disease mentions annotated.

Conclusions

In this paper, we use a systematic text mining method to construct a database for the relationship between CNVs and diseases. Based on that, we also developed a concise front-end to facilitate the analysis of CNV/disease association, providing a user-friendly web interface for convenient queries. The resulting system is publically available at http://cnv.gtxlab.com /.",2018-12-31 +29875422,Stargazer: a software tool for calling star alleles from next-generation sequencing data using CYP2D6 as a model.,"

Purpose

Genotyping CYP2D6 is important for precision drug therapy because the enzyme it encodes metabolizes approximately 25% of drugs, and its activity varies considerably among individuals. Genotype analysis of CYP2D6 is challenging due to its highly polymorphic nature. Over 100 haplotypes (star alleles) have been defined for CYP2D6, some involving a gene conversion with its nearby nonfunctional but highly homologous paralog CYP2D7. We present Stargazer, a new bioinformatics tool that uses next-generation sequencing (NGS) data to call star alleles for CYP2D6 ( https://stargazer.gs.washington.edu/stargazerweb/ ). Stargazer is currently being extended for other pharmacogenes.

Methods

Stargazer identifies star alleles from NGS data by detecting single nucleotide variants, insertion-deletion variants, and structural variants. Stargazer detects structural variation, including gene deletions, duplications, and conversions, by calculating paralog-specific copy numbers from read depths.

Results

We applied Stargazer to the NGS data of 32 ethnically diverse HapMap trios that were genotyped by TaqMan assays, long-range polymerase chain reaction, quantitative multiplex polymerase chain reaction, high-resolution melting analysis, and/or Sanger sequencing. CYP2D6 genotyping by Stargazer was 99.0% concordant with the data obtained by these methods, and showed that 28.1% of the samples had structural variation including CYP2D6/CYP2D7 hybrids.

Conclusion

Accurate genotyping of pharmacogenes with NGS and subsequent allele calling with Stargazer will aid the implementation of precision drug therapy.",2018-06-06 +29939207,A descriptive marker gene approach to single-cell pseudotime inference.,"

Motivation

Pseudotime estimation from single-cell gene expression data allows the recovery of temporal information from otherwise static profiles of individual cells. Conventional pseudotime inference methods emphasize an unsupervised transcriptome-wide approach and use retrospective analysis to evaluate the behaviour of individual genes. However, the resulting trajectories can only be understood in terms of abstract geometric structures and not in terms of interpretable models of gene behaviour.

Results

Here we introduce an orthogonal Bayesian approach termed 'Ouija' that learns pseudotimes from a small set of marker genes that might ordinarily be used to retrospectively confirm the accuracy of unsupervised pseudotime algorithms. Crucially, we model these genes in terms of switch-like or transient behaviour along the trajectory, allowing us to understand why the pseudotimes have been inferred and learn informative parameters about the behaviour of each gene. Since each gene is associated with a switch or peak time the genes are effectively ordered along with the cells, allowing each part of the trajectory to be understood in terms of the behaviour of certain genes. We demonstrate that this small panel of marker genes can recover pseudotimes that are consistent with those obtained using the entire transcriptome. Furthermore, we show that our method can detect differences in the regulation timings between two genes and identify 'metastable' states-discrete cell types along the continuous trajectories-that recapitulate known cell types.

Availability and implementation

An open source implementation is available as an R package at http://www.github.com/kieranrcampbell/ouija and as a Python/TensorFlow package at http://www.github.com/kieranrcampbell/ouijaflow.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +29112706,NanoStringNormCNV: pre-processing of NanoString CNV data.,"Summary:The NanoString System is a well-established technology for measuring RNA and DNA abundance. Although it can estimate copy number variation, relatively few tools support analysis of these data. To address this gap, we created NanoStringNormCNV, an R package for pre-processing and copy number variant calling from NanoString data. This package implements algorithms for pre-processing, quality-control, normalization and copy number variation detection. A series of reporting and data visualization methods support exploratory analyses. To demonstrate its utility, we apply it to a new dataset of 96 genes profiled on 41 prostate tumour and 24 matched normal samples. Availability and implementation:NanoStringNormCNV is implemented in R and is freely available at http://labs.oicr.on.ca/boutros-lab/software/nanostringnormcnv. Contact:paul.boutros@oicr.on.ca. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-03-01 +30681830,Autonomous METLIN-Guided In-source Fragment Annotation for Untargeted Metabolomics.,"Computational metabolite annotation in untargeted profiling aims at uncovering neutral molecular masses of underlying metabolites and assign those with putative identities. Existing annotation strategies rely on the observation and annotation of adducts to determine metabolite neutral masses. However, a significant fraction of features usually detected in untargeted experiments remains unannotated, which limits our ability to determine neutral molecular masses. Despite the availability of tools to annotate, relatively few of them benefit from the inherent presence of in-source fragments in liquid chromatography-electrospray ionization-mass spectrometry. In this study, we introduce a strategy to annotate in-source fragments in untargeted data using low-energy tandem mass spectrometry (MS) spectra from the METLIN library. Our algorithm, MISA (METLIN-guided in-source annotation), compares detected features against low-energy fragments from MS/MS spectra, enabling robust annotation and putative identification of metabolic features based on low-energy spectral matching. The algorithm was evaluated through an annotation analysis of a total of 140 metabolites across three different sets of biological samples analyzed with liquid chromatography-mass spectrometry. Results showed that, in cases where adducts were not formed or detected, MISA was able to uncover neutral molecular masses by in-source fragment matching. MISA was also able to provide putative metabolite identities via two annotation scores. These scores take into account the number of in-source fragments matched and the relative intensity similarity between the experimental data and the reference low-energy MS/MS spectra. Overall, results showed that in-source fragmentation is a highly frequent phenomena that should be considered for comprehensive feature annotation. Thus, combined with adduct annotation, this strategy adds a complementary annotation layer, enabling in-source fragments to be annotated and increasing putative identification confidence. The algorithm is integrated into the XCMS Online platform and is freely available at http://xcmsonline.scripps.edu .",2019-02-11 +28339265,Topology-Scaling Identification of Layered Solids and Stable Exfoliated 2D Materials.,"The Materials Project crystal structure database has been searched for materials possessing layered motifs in their crystal structures using a topology-scaling algorithm. The algorithm identifies and measures the sizes of bonded atomic clusters in a structure's unit cell, and determines their scaling with cell size. The search yielded 826 stable layered materials that are considered as candidates for the formation of two-dimensional monolayers via exfoliation. Density-functional theory was used to calculate the exfoliation energy of each material and 680 monolayers emerge with exfoliation energies below those of already-existent two-dimensional materials. The crystal structures of these two-dimensional materials provide templates for future theoretical searches of stable two-dimensional materials. The optimized structures and other calculated data for all 826 monolayers are provided at our database (https://materialsweb.org).",2017-03-07 +27222801,Genome-wide functional annotation of Phomopsis longicolla isolate MSPL 10-6.,"Phomopsis seed decay of soybean is caused primarily by the seed-borne fungal pathogen Phomopsis longicolla (syn. Diaporthe longicolla). This disease severely decreases soybean seed quality, reduces seedling vigor and stand establishment, and suppresses yield. It is one of the most economically important soybean diseases. In this study we annotated the entire genome of P. longicolla isolate MSPL 10-6, which was isolated from field-grown soybean seed in Mississippi, USA. This study represents the first reported genome-wide functional annotation of a seed borne fungal pathogen in the Diaporthe-Phomopsis complex. The P. longicolla genome annotation will enable research into the genetic basis of fungal infection of soybean seed and provide information for the study of soybean-fungal interactions. The genome annotation will also be a valuable resource for the research and agricultural communities. It will aid in the development of new control strategies for this pathogen. The annotations can be found from: http://bioinformatics.towson.edu/phomopsis_longicolla/download.html. NCBI accession number is: AYRD00000000.",2016-04-06 +28502701,FastPCR: An in silico tool for fast primer and probe design and advanced sequence analysis.,"Polymerase chain reaction (PCR) is one of the most important laboratory techniques used in molecular biology, genetics and molecular diagnostics. The success of a PCR-based method largely depends on the correct nucleic acid sequence analysis in silico prior to a wet-bench experiment. Here, we report the development of an online Java-based software for virtual PCR on linear or circular DNA templates and multiple primer or probe search from large or small databases. Primer or probe sensitivity and specificity are predicted by searching a database to find sequences with an optimal number of mismatches, similarity and stability. The software determines primer location, orientation, efficiency of binding and calculates primer melting temperatures for standard and degenerate oligonucleotides. The software is suitable for batch file processing, which is essential for automation when working with large amounts of data. The online Java software is available for download at http://primerdigital.com/tools/pcr.html. Accession numbers for the sequences resulting from this study: EU140956 EU177767 EU867815 EU882730 FJ975775-FJ975780 HM481419 HM481420 KC686837-KC686839 KM262797.",2017-05-12 +22135287,miRNEST database: an integrative approach in microRNA search and annotation.,"Despite accumulating data on animal and plant microRNAs and their functions, existing public miRNA resources usually collect miRNAs from a very limited number of species. A lot of microRNAs, including those from model organisms, remain undiscovered. As a result there is a continuous need to search for new microRNAs. We present miRNEST (http://mirnest.amu.edu.pl), a comprehensive database of animal, plant and virus microRNAs. The core part of the database is built from our miRNA predictions conducted on Expressed Sequence Tags of 225 animal and 202 plant species. The miRNA search was performed based on sequence similarity and as many as 10,004 miRNA candidates in 221 animal and 199 plant species were discovered. Out of them only 299 have already been deposited in miRBase. Additionally, miRNEST has been integrated with external miRNA data from literature and 13 databases, which includes miRNA sequences, small RNA sequencing data, expression, polymorphisms and targets data as well as links to external miRNA resources, whenever applicable. All this makes miRNEST a considerable miRNA resource in a sense of number of species (544) that integrates a scattered miRNA data into a uniform format with a user-friendly web interface.",2011-12-01 +29485625,"Datasets2Tools, repository and search engine for bioinformatics datasets, tools and canned analyses.","Biomedical data repositories such as the Gene Expression Omnibus (GEO) enable the search and discovery of relevant biomedical digital data objects. Similarly, resources such as OMICtools, index bioinformatics tools that can extract knowledge from these digital data objects. However, systematic access to pre-generated 'canned' analyses applied by bioinformatics tools to biomedical digital data objects is currently not available. Datasets2Tools is a repository indexing 31,473 canned bioinformatics analyses applied to 6,431 datasets. The Datasets2Tools repository also contains the indexing of 4,901 published bioinformatics software tools, and all the analyzed datasets. Datasets2Tools enables users to rapidly find datasets, tools, and canned analyses through an intuitive web interface, a Google Chrome extension, and an API. Furthermore, Datasets2Tools provides a platform for contributing canned analyses, datasets, and tools, as well as evaluating these digital objects according to their compliance with the findable, accessible, interoperable, and reusable (FAIR) principles. By incorporating community engagement, Datasets2Tools promotes sharing of digital resources to stimulate the extraction of knowledge from biomedical research data. Datasets2Tools is freely available from: http://amp.pharm.mssm.edu/datasets2tools.",2018-02-27 +25420108,Synergy: a web resource for exploring gene regulation in Synechocystis sp. PCC6803.,"Despite being a highly studied model organism, most genes of the cyanobacterium Synechocystis sp. PCC 6803 encode proteins with completely unknown function. To facilitate studies of gene regulation in Synechocystis, we have developed Synergy (http://synergy.plantgenie.org), a web application integrating co-expression networks and regulatory motif analysis. Co-expression networks were inferred from publicly available microarray experiments, while regulatory motifs were identified using a phylogenetic footprinting approach. Automatically discovered motifs were shown to be enriched in the network neighborhoods of regulatory proteins much more often than in the neighborhoods of non-regulatory genes, showing that the data provide a sound starting point for studying gene regulation in Synechocystis. Concordantly, we provide several case studies demonstrating that Synergy can be used to find biologically relevant regulatory mechanisms in Synechocystis. Synergy can be used to interactively perform analyses such as gene/motif search, network visualization and motif/function enrichment. Considering the importance of Synechocystis for photosynthesis and biofuel research, we believe that Synergy will become a valuable resource to the research community.",2014-11-24 +26571275,"CentiServer: A Comprehensive Resource, Web-Based Application and R Package for Centrality Analysis.","Various disciplines are trying to solve one of the most noteworthy queries and broadly used concepts in biology, essentiality. Centrality is a primary index and a promising method for identifying essential nodes, particularly in biological networks. The newly created CentiServer is a comprehensive online resource that provides over 110 definitions of different centrality indices, their computational methods, and algorithms in the form of an encyclopedia. In addition, CentiServer allows users to calculate 55 centralities with the help of an interactive web-based application tool and provides a numerical result as a comma separated value (csv) file format or a mapped graphical format as a graph modeling language (GML) file. The standalone version of this application has been developed in the form of an R package. The web-based application (CentiServer) and R package (centiserve) are freely available at http://www.centiserver.org/.",2015-11-16 +30179954,Can Machine-learning Techniques Be Used for 5-year Survival Prediction of Patients With Chondrosarcoma?,"

Background

Several studies have identified prognostic factors for patients with chondrosarcoma, but there are few studies investigating the accuracy of computationally intensive methods such as machine learning. Machine learning is a type of artificial intelligence that enables computers to learn from data. Studies using machine learning are potentially appealing, because of its possibility to explore complex patterns in data and to improve its models over time.

Questions/purposes

The purposes of this study were (1) to develop machine-learning algorithms for the prediction of 5-year survival in patients with chondrosarcoma; and (2) to deploy the best algorithm as an accessible web-based app for clinical use.

Methods

All patients with a microscopically confirmed diagnosis of conventional or dedifferentiated chondrosarcoma were extracted from the Surveillance, Epidemiology, and End Results (SEER) Registry from 2000 to 2010. SEER covers approximately 30% of the US population and consists of demographic, tumor characteristic, treatment, and outcome data. In total, 1554 patients met the inclusion criteria. Mean age at diagnosis was 52 years (SD 17), ranging from 7 to 102 years; 813 of the 1554 patients were men (55%); and mean tumor size was 8 cm (SD 6), ranging from 0.1 cm to 50 cm. Exact size was missing in 340 of 1544 patients (22%), grade in 88 of 1544 (6%), tumor extension in 41 of 1544 (3%), and race in 16 of 1544 (1%). Data for 1-, 3-, 5-, and 10-year overall survival were available for 1533 (99%), 1512 (98%), 1487 (96%), and 977 (63%) patients, respectively. One-year survival was 92%, 3-year survival was 82%, 5-year survival was 76%, and 10-year survival was 54%. Missing data were imputed using the nonparametric missForest method. Boosted decision tree, support vector machine, Bayes point machine, and neural network models were developed for 5-year survival. These models were chosen as a result of their capability of predicting two outcomes based on prior work on machine-learning models for binary classification. The models were assessed by discrimination, calibration, and overall performance. The c-statistic is a measure of discrimination. It ranges from 0.5 to 1.0 with 1.0 being perfect discrimination and 0.5 that the model is no better than chance at making a prediction. The Brier score measures the squared difference between the predicted probability and the actual outcome. A Brier score of 0 indicates perfect prediction, whereas a Brier score of 1 indicates the poorest prediction. The Brier scores of the models are compared with the null model, which is calculated by assigning each patient a probability equal to the prevalence of the outcome.

Results

Four models for 5-year survival were developed with c-statistics ranging from 0.846 to 0.868 and Brier scores ranging from 0.117 to 0.135 with a null model Brier score of 0.182. The Bayes point machine was incorporated into a freely available web-based application. This application can be accessed through https://sorg-apps.shinyapps.io/chondrosarcoma/.

Conclusions

Although caution is warranted, because the prediction model has not been validated yet, healthcare providers could use the online prediction tool in daily practice when survival prediction of patients with chondrosarcoma is desired. Future studies should seek to validate the developed prediction model.

Level of evidence

Level III, prognostic study.",2018-10-01 +29186325,glactools: a command-line toolset for the management of genotype likelihoods and allele counts.,"Motivation:Research projects involving population genomics routinely need to store genotyping information, population allele counts, combine files from different samples, query the data and export it to various formats. This is often done using bespoke in-house scripts, which cannot be easily adapted to new projects and seldom constitute reproducible workflows. Results:We introduce glactools, a set of command-line utilities that can import data from genotypes or population-wide allele counts into an intermediate representation, compute various operations on it and export the data to several file formats used by population genetics software. This intermediate format can take two forms, one to store per-individual genotype likelihoods and a second for allele counts from one or more individuals. glactools allows users to perform operations such as intersecting datasets, merging individuals into populations, creating subsets, perform queries (e.g. return sites where a given population does not share an allele with a second one) and compute summary statistics to answer biologically relevant questions. Availability and implementation:glactools is freely available for use under the GPL. It requires a C ++ compiler and the htslib library. The source code and the instructions about how to download test data are available on the website (https://grenaud.github.io/glactools/). Contact:gabriel.reno@gmail.com. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +29676732,Donated chemical probes for open science. ,"Potent, selective and broadly characterized small molecule modulators of protein function (chemical probes) are powerful research reagents. The pharmaceutical industry has generated many high-quality chemical probes and several of these have been made available to academia. However, probe-associated data and control compounds, such as inactive structurally related molecules and their associated data, are generally not accessible. The lack of data and guidance makes it difficult for researchers to decide which chemical tools to choose. Several pharmaceutical companies (AbbVie, Bayer, Boehringer Ingelheim, Janssen, MSD, Pfizer, and Takeda) have therefore entered into a pre-competitive collaboration to make available a large number of innovative high-quality probes, including all probe-associated data, control compounds and recommendations on use (https://openscienceprobes.sgc-frankfurt.de/). Here we describe the chemical tools and target-related knowledge that have been made available, and encourage others to join the project.",2018-04-20 +26935103,FlyBase portals to human disease research using Drosophila models.,"The use of Drosophila melanogaster as a model for studying human disease is well established, reflected by the steady increase in both the number and proportion of fly papers describing human disease models in recent years. In this article, we highlight recent efforts to improve the availability and accessibility of the disease model information in FlyBase (http://flybase.org), the model organism database for Drosophila. FlyBase has recently introduced Human Disease Model Reports, each of which presents background information on a specific disease, a tabulation of related disease subtypes, and summaries of experimental data and results using fruit flies. Integrated presentations of relevant data and reagents described in other sections of FlyBase are incorporated into these reports, which are specifically designed to be accessible to non-fly researchers in order to promote collaboration across model organism communities working in translational science. Another key component of disease model information in FlyBase is that data are collected in a consistent format --- using the evolving Disease Ontology (an open-source standardized ontology for human-disease-associated biomedical data) - to allow robust and intuitive searches. To facilitate this, FlyBase has developed a dedicated tool for querying and navigating relevant data, which include mutations that model a disease and any associated interacting modifiers. In this article, we describe how data related to fly models of human disease are presented in individual Gene Reports and in the Human Disease Model Reports. Finally, we discuss search strategies and new query tools that are available to access the disease model data in FlyBase.",2016-03-01 +29900925,Burden of dental diseases in India as compared to South Asia: An insight.,"

Introduction

In the recent past, the level of prevalence and incidence of dental caries and periodontal diseases in India and its comparison with South-Asian neighbors have not been reported. The manuscript estimates the same using the global disease burden 2016 approach.

Materials and methods

Secondary data analysis of primary data presented by Vos et al., 2016, was used for this study. Data from the global burden of disease, data at https://vizhub.healthdata.org/gbd-compare/ and http://ghdx.healthdata.org/gbd-results-tool, and data for permanent dental caries, periodontal diseases, and overall dental disorders for both genders (age standardized) at prevalence, incidence, and disability-adjusted life years (DALYs) were obtained for India and South Asian region and compared qualitatively.

Results

: For every 100,000 Indians, as compared to South Asian males (30,903 in every 100,000), Indian males (31,489) had prevalence of dental caries. Among females, the same was 33,926 for South Asians and 34,426 for Indians. Similarly, the 2016 incidence of dental caries was higher among Indians as compared to South Asians and more females suffered from dental caries than males, whereas a reversal of gender trend was observed with periodontal disease. Overall dental disorder burden from 1990 to 2016 is presented.

Conclusion

There is a difference between genders in the prevalence, incidence, and DALYs of caries of permanent dentition and that of periodontal diseases. As compared to South Asia, India has more burden of dental diseases. Indian dental workforce and oral health policy need to be realigned to counter the burden of oral disorders.",2018-05-01 +,New fossils from China elucidating the phylogeny of Praesiricidae (Insecta: Hymenoptera),"A new subfamily of Praesiricidae (Pamphilioidea), Decorisiricinae subfam.n., is erected based on three new genera: Decorisiricius gen.n., Limbisiricius gen.n. and Brevisiricius gen.n. Two new species – Decorisiricius patulus gen. et sp.n. and D. longus sp.n. – from the Lower Cretaceous Yixian Formation and three species –Limbisiricius aequalis gen. et sp.n., Limbisiricius complanatus sp.n. and Brevisiricius partialis gen. et sp.n. – from the Middle Jurassic Jiulongshan Formation, are described. Based on these well‐preserved new fossil specimens and previously published data, the nonmonophyly of Praesiricidae is confirmed and the phylogenetic relationships of species of Praesiricidae are analysed for the first time. Two main clades within Praesiricidae are recognized from the cladistic analysis: Decorisiricinae subfam.n. forms a monophyletic lineage, with the remaining members of Praesiricidae plus Megalodontes (Megalodontesidae) forming its sister group. The two subfamilies Archoxyelydinae and Praesiricinae are discarded with no strong supported synapomorphic characters based on phylogenetic research. A key to all genera of Praesiricidae is provided. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:38D703ED‐127A‐4DB0‐8153‐8D78AF4AC212.",2016-01-01 +27794040,Genomes OnLine Database (GOLD) v.6: data updates and feature enhancements.,"The Genomes Online Database (GOLD) (https://gold.jgi.doe.gov) is a manually curated data management system that catalogs sequencing projects with associated metadata from around the world. In the current version of GOLD (v.6), all projects are organized based on a four level classification system in the form of a Study, Organism (for isolates) or Biosample (for environmental samples), Sequencing Project and Analysis Project. Currently, GOLD provides information for 26 117 Studies, 239 100 Organisms, 15 887 Biosamples, 97 212 Sequencing Projects and 78 579 Analysis Projects. These are integrated with over 312 metadata fields from which 58 are controlled vocabularies with 2067 terms. The web interface facilitates submission of a diverse range of Sequencing Projects (such as isolate genome, single-cell genome, metagenome, metatranscriptome) and complex Analysis Projects (such as genome from metagenome, or combined assembly from multiple Sequencing Projects). GOLD provides a seamless interface with the Integrated Microbial Genomes (IMG) system and supports and promotes the Genomic Standards Consortium (GSC) Minimum Information standards. This paper describes the data updates and additional features added during the last two years.",2016-10-27 +30478325,Interpretation of an individual functional genomics experiment guided by massive public data.,"A key unmet challenge in interpreting omics experiments is inferring biological meaning in the context of public functional genomics data. We developed a computational framework, Your Evidence Tailored Integration (YETI; http://yeti.princeton.edu/ ), which creates specialized functional interaction maps from large public datasets relevant to an individual omics experiment. Using this tailored integration, we predicted and experimentally confirmed an unexpected divergence in viral replication after seasonal or pandemic human influenza virus infection.",2018-11-26 +23667450,LipidHome: a database of theoretical lipids optimized for high throughput mass spectrometry lipidomics.,"Protein sequence databases are the pillar upon which modern proteomics is supported, representing a stable reference space of predicted and validated proteins. One example of such resources is UniProt, enriched with both expertly curated and automatic annotations. Taken largely for granted, similar mature resources such as UniProt are not available yet in some other ""omics"" fields, lipidomics being one of them. While having a seasoned community of wet lab scientists, lipidomics lies significantly behind proteomics in the adoption of data standards and other core bioinformatics concepts. This work aims to reduce the gap by developing an equivalent resource to UniProt called 'LipidHome', providing theoretically generated lipid molecules and useful metadata. Using the 'FASTLipid' Java library, a database was populated with theoretical lipids, generated from a set of community agreed upon chemical bounds. In parallel, a web application was developed to present the information and provide computational access via a web service. Designed specifically to accommodate high throughput mass spectrometry based approaches, lipids are organised into a hierarchy that reflects the variety in the structural resolution of lipid identifications. Additionally, cross-references to other lipid related resources and papers that cite specific lipids were used to annotate lipid records. The web application encompasses a browser for viewing lipid records and a 'tools' section where an MS1 search engine is currently implemented. LipidHome can be accessed at http://www.ebi.ac.uk/apweiler-srv/lipidhome.",2013-05-07 +30602089,STADIUM: Species-Specific tRNA Adaptive Index Compendium.,"Due to the increasing interest in synonymous codons, several codon bias-related terms were introduced. As one measure of them, the tRNA adaptation index (tAI) was invented about a decade ago. The tAI is a measure of translational efficiency for a gene and is calculated based on the abundance of intracellular tRNA and the binding strength between a codon and a tRNA. The index has been widely used in various fields of molecular evolution, genetics, and pharmacology. Afterwards, an improved version of the index, named specific tRNA adaptation index (stAI), was developed by adapting tRNA copy numbers in species. Although a subsequently developed webserver (stAIcalc) provided tools that calculated stAI values, it was not available to access pre-calculated values. In addition to about 100 species in stAIcalc, we calculated stAI values for whole coding sequences in 148 species. To enable easy access to this index, we constructed a novel web database, named STADIUM (Species-specific tRNA adaptive index compendium). STADIUM provides not only the stAI value of each gene but also statistics based on pathway-based classification. The database is expected to help researchers who have interests in codon optimality and the role of synonymous codons. STADIUM is freely available at http://stadium.pmrc.re.kr.",2018-12-28 +31287756,"Current Methods of Evaluating the Language Abilities of Multilingual Preschoolers: A Scoping Review Using the International Classification of Functioning, Disability and Health-Children and Youth Version.","Purpose The purpose of this scoping review was to identify current measures used to evaluate the language abilities of multilingual preschoolers within the framework of the International Classification of Functioning, Disability and Health-Children and Youth Version (ICF-CY; World Health Organization, 2007 ). Method This review adhered to established models for conducting a comprehensive, iterative scoping review outlined by Arksey and O'Malley (2005) and Levac, Colquhoun, and O'Brien (2010) and included the following phases: (a) articulating the research question; (b) identifying relevant studies; (c) selecting studies; (d) charting the data; and (e) collating, summarizing, and reporting the results. The ICF-CY was used to frame the identified measures ( World Health Organization, 2007 ). Results Three hundred twenty-five peer-reviewed publications were identified and included in this review. The majority of publications used measures that evaluated the activity component of multilingual preschoolers' language (70%), with few evaluating participation (9%). Most identified measures (73%) assessed children's semantic language skills. We also observed that 88% of studies explicitly measured children's language input to interpret assessment results. Conclusions A variety of measures are currently used that address the activity component of the ICF-CY with a particular emphasis on semantics. There is, however, a dearth of measures examining language abilities for participation. The authors strongly recommend an increased focus on the development, use, and evaluation of measures that explicitly assess multilingual preschoolers' language participation, particularly in school-based settings. Supplemental Material https://doi.org/10.23641/asha.8637206.",2019-07-09 +24828056,First report on the antibody verification of HLA-ABC epitopes recorded in the website-based HLA Epitope Registry.,"The International Registry of Antibody-Defined HLA Epitopes ( http://www.epregistry.com.br) has been recently established as a tool to understand humoral responses to human leukocyte antigen (HLA) mismatches. These epitopes are defined structurally by three-dimensional molecular modeling and amino acid sequence differences between HLA antigens. So-called eplets represent essential components of HLA epitopes and they are defined by polymorphic residues. A major goal is to identify HLA epitopes that have been verified experimentally with informative antibodies. Our analysis has also included data in many publications. As of 1 November 2013, 95 HLA-ABC antibody-verified epitopes have been recorded, 62 correspond to eplets and 33 are defined by eplets paired with other residue configurations. The Registry is still a work-in-progress and will become a useful resource for HLA professionals interested in histocompatibility testing at the epitope level and investigating antibody responses to HLA mismatches in transplant patients.",2014-06-01 +31596611,Dietary Habits Related to Food Packaging and Population Exposure to PFASs.,"

Background

Per- and polyfluoroalkyl substances (PFASs) are common industrial and consumer product chemicals with widespread human exposures that have been linked to adverse health effects. PFASs are commonly detected in foods and food-contact materials (FCMs), including fast food packaging and microwave popcorn bags.

Objectives

Our goal was to investigate associations between serum PFASs and consumption of restaurant food and popcorn in a representative sample of Americans.

Methods

We analyzed 2003-2014 serum PFAS and dietary recall data from the National Health and Nutrition Examination Survey (NHANES). We used multivariable linear regressions to investigate relationships between consumption of fast food, restaurant food, food eaten at home, and microwave popcorn and serum levels of perfluorooctanoic acid (PFOA), perfluorononanoic acid (PFNA), perfluorodecanoic acid (PFDA), perfluorohexanesulfonic acid (PFHxS), and perfluorooctanesulfonic acid (PFOS).

Results

Calories of food eaten at home in the past 24 h had significant inverse associations with serum levels of all five PFASs; these associations were stronger in women. Consumption of meals from fast food/pizza restaurants and other restaurants was generally associated with higher serum PFAS concentrations, based on 24-h and 7-d recall, with limited statistical significance. Consumption of popcorn was associated with significantly higher serum levels of PFOA, PFNA, PFDA, and PFOS, based on 24-h and 12-month recall, up to a 63% (95% CI: 34, 99) increase in PFDA among those who ate popcorn daily over the last 12 months.

Conclusions

Associations between serum PFAS and popcorn consumption may be a consequence of PFAS migration from microwave popcorn bags. Inverse associations between serum PFAS and food eaten at home-primarily from grocery stores-is consistent with less contact between home-prepared food and FCMs, some of which contain PFASs. The potential for FCMs to contribute to PFAS exposure, coupled with concerns about toxicity and persistence, support the use of alternatives to PFASs in FCMs. https://doi.org/10.1289/EHP4092.",2019-10-09 +31276053,Shoulder Reduction Bench Project: improving care for patients with shoulder dislocations.,"This paper presents the background, methodology and results of a quality improvement project undertaken at a district general hospital. The project was launched in response to the concerning results from audit data which showed significant delays in the treatment of patients with shoulder dislocations and a high percentage of patients receiving procedural sedation. Using 'Plan-Do-Study-Act' cycles involving training sessions, written protocols and an online video, we were able to train a large cohort of nurse practitioners in the use of the Shoulder Reduction Bench. This is a relatively novel, evidence-based technique for reducing shoulder dislocations without the need for sedation. The new shoulder dislocation protocol was successful in reducing the average time from presentation to shoulder relocation by 31 min and the average time from presentation to discharge by 52 min. It also resulted in a 68% reduction in the number of patients receiving procedural sedation over a 6-month period. This project inspired the practitioners, most of whom had never reduced a shoulder dislocation before. The success of the new shoulder reduction bench protocol prompted interest from the trust's innovation department and has been publicised both within the trust and regionally. This publicity and the satisfaction gained by the staff from this effective new skill have helped to anchor the change in departmental culture. Link to training video: https://www.youtube.com/watch?v=40aCqhfQXD4&feature=youtu.be.",2019-06-14 +30047895,Disruption of Protein Complexes from Weighted Complex Networks.,"Essential proteins are indispensable units for living organisms. Removing those leads to disruption of protein complexes and causing lethality. Recently, theoretical methods have been presented to detect essential proteins in protein interaction network. In these methods, an essential protein is predicted as a high-degree vertex of protein interaction network. However, interaction data are usually incomplete and an essential protein cannot have high-connection due to data deficiency. Then, it is critical to design informative networks from other biological data sources. In this paper, we defined a minimal set of proteins to disrupt the maximum number of protein complexes. We constructed a weighted graph using a set of given complexes. We proposed a more appropriate method based on betweenness values to diagnose a minimal set of proteins whose removal would generate the disruption of protein complexes. The effectiveness of the proposed method was benchmarked using given dataset of complexes. The results of our method were compared to the results of other methods in terms of the number of disrupted complexes. Also, results indicated significant superiority of the minimal set of proteins in the massive disruption of complexes. Finally, we investigated the performance of our method for yeast and human datasets and analyzed biological properties of the selected proteins. Our algorithm and some example are freely available from http://bs.ipm.ac.ir/softwares/DPC/DPC.zip.",2018-07-25 +22439011,microPIR: an integrated database of microRNA target sites within human promoter sequences.,"

Background

microRNAs are generally understood to regulate gene expression through binding to target sequences within 3'-UTRs of mRNAs. Therefore, computational prediction of target sites is usually restricted to these gene regions. Recent experimental studies though have suggested that microRNAs may alternatively modulate gene expression by interacting with promoters. A database of potential microRNA target sites in promoters would stimulate research in this field leading to more understanding of complex microRNA regulatory mechanism.

Methodology

We developed a database hosting predicted microRNA target sites located within human promoter sequences and their associated genomic features, called microPIR (microRNA-Promoter Interaction Resource). microRNA seed sequences were used to identify perfect complementary matching sequences in the human promoters and the potential target sites were predicted using the RNAhybrid program. >15 million target sites were identified which are located within 5000 bp upstream of all human genes, on both sense and antisense strands. The experimentally confirmed argonaute (AGO) binding sites and EST expression data including the sequence conservation across vertebrate species of each predicted target are presented for researchers to appraise the quality of predicted target sites. The microPIR database integrates various annotated genomic sequence databases, e.g. repetitive elements, transcription factor binding sites, CpG islands, and SNPs, offering users the facility to extensively explore relationships among target sites and other genomic features. Furthermore, functional information of target genes including gene ontologies, KEGG pathways, and OMIM associations are provided. The built-in genome browser of microPIR provides a comprehensive view of multidimensional genomic data. Finally, microPIR incorporates a PCR primer design module to facilitate experimental validation.

Conclusions

The proposed microPIR database is a useful integrated resource of microRNA-promoter target interactions for experimental microRNA researchers and computational biologists to study the microRNA regulation through gene promoter. The database can be freely accessed from: http://www4a.biotec.or.th/micropir.",2012-03-16 +23758844,Monitoring the antigenic evolution of human influenza A viruses to understand how and when viruses escape from existing immunity.,"

Background

The World Health Organization (WHO) organizes consultations in February and September of each year, spearheaded by an advisory group of experts to analyze influenza surveillance data generated by the WHO Global Influenza Surveillance and Response System (GISRS). The purpose of these consultations is to recommend the composition on influenza virus vaccines for the northern and southern hemispheres, respectively. The latest news of influenza viruses is made available to the public and updated on the WHO website. Although WHO discloses the manner in which it has made the recommendation, usually by considering epidemiological and clinical information to analyze the antigenic and genetic characteristics of seasonal influenza viruses, most individuals do not possess an understanding of antigenic drift and when it occurs.

Findings

We have constructed a web server, named Fluctrl, and implemented a pipeline whereby HA sequence data is downloaded from the Influenza Virus Resource at NCBI along with their isolation information including isolation year and location, which are parsed and managed in MySQL database. By analyzing the frequency of each amino acid residue of the HA1 domain expressed by the viruses on annual basis, users are able to obtain evolutionary dynamics of human influenza viruses corresponding with epidemics. Users are able to upload and analyze their HA1 sequences for generating evolutionary dynamics. In addition, a distribution of amino acid residues at a particular site is represented geographically to trace the location where antigenic variants are seeded.

Conclusions

Fluctrl is constructed for monitoring the antigenic evolution of human influenza A viruses. This tool is intended to inform the general public how and when influenza viruses evade the human body's immunity. Furthermore, leveraging the geographic information, the original locations of emerging influenza viruses can be traced. Fluctrl is freely accessible at http://sb.nhri.org.tw/fluctrl.",2013-06-11 +31217121,The KIMORE Dataset: KInematic Assessment of MOvement and Clinical Scores for Remote Monitoring of Physical REhabilitation.,"This paper proposes a free dataset, available at the following link,1named KIMORE, regarding different rehabilitation exercises collected by a RGB-D sensor. Three data inputs including RGB, depth videos, and skeleton joint positions were recorded during five physical exercises, specific for low back pain and accurately selected by physicians. For each exercise, the dataset also provides a set of features, specifically defined by the physicians, and relevant to describe its scope. These features, validated with respect to a stereophotogrammetric system, can be analyzed to compute a score for the subject's performance. The dataset also contains an evaluation of the same performance provided by the clinicians, through a clinical questionnaire. The impact of KIMORE has been analyzed by comparing the output obtained by an example of rule and template-based approaches and the clinical score. The dataset presented is intended to be used as a benchmark for human movement assessment in a rehabilitation scenario in order to test the effectiveness and the reliability of different computational approaches. Unlike other existing datasets, the KIMORE merges a large heterogeneous population of 78 subjects, divided into 2 groups with 44 healthy subjects and 34 with motor dysfunctions. It provides the most clinically-relevant features and the clinical score for each exercise.1https://univpm-my.sharepoint.com/:f:/g/personal/p008099_staff_univpm_it/EiwbKIzk6N9NoJQx4J8aubIBx0o7tIa1XwclWp1NmRkA-w?e=F3jtBk.",2019-06-14 +31487205,Evaluating Chemicals for Thyroid Disruption: Opportunities and Challenges with in Vitro Testing and Adverse Outcome Pathway Approaches.,"

Background

Extensive clinical and experimental research documents the potential for chemical disruption of thyroid hormone (TH) signaling through multiple molecular targets. Perturbation of TH signaling can lead to abnormal brain development, cognitive impairments, and other adverse outcomes in humans and wildlife. To increase chemical safety screening efficiency and reduce vertebrate animal testing, in vitro assays that identify chemical interactions with molecular targets of the thyroid system have been developed and implemented.

Objectives

We present an adverse outcome pathway (AOP) network to link data derived from in vitro assays that measure chemical interactions with thyroid molecular targets to downstream events and adverse outcomes traditionally derived from in vivo testing. We examine the role of new in vitro technologies, in the context of the AOP network, in facilitating consideration of several important regulatory and biological challenges in characterizing chemicals that exert effects through a thyroid mechanism.

Discussion

There is a substantial body of knowledge describing chemical effects on molecular and physiological regulation of TH signaling and associated adverse outcomes. Until recently, few alternative nonanimal assays were available to interrogate chemical effects on TH signaling. With the development of these new tools, screening large libraries of chemicals for interactions with molecular targets of the thyroid is now possible. Measuring early chemical interactions with targets in the thyroid pathway provides a means of linking adverse outcomes, which may be influenced by many biological processes, to a thyroid mechanism. However, the use of in vitro assays beyond chemical screening is complicated by continuing limits in our knowledge of TH signaling in important life stages and tissues, such as during fetal brain development. Nonetheless, the thyroid AOP network provides an ideal tool for defining causal linkages of a chemical exerting thyroid-dependent effects and identifying research needs to quantify these effects in support of regulatory decision making. https://doi.org/10.1289/EHP5297.",2019-09-05 +26177815,Critical evaluation of in silico methods for prediction of coiled-coil domains in proteins.,"Coiled-coils refer to a bundle of helices coiled together like strands of a rope. It has been estimated that nearly 3% of protein-encoding regions of genes harbour coiled-coil domains (CCDs). Experimental studies have confirmed that CCDs play a fundamental role in subcellular infrastructure and controlling trafficking of eukaryotic cells. Given the importance of coiled-coils, multiple bioinformatics tools have been developed to facilitate the systematic and high-throughput prediction of CCDs in proteins. In this article, we review and compare 12 sequence-based bioinformatics approaches and tools for coiled-coil prediction. These approaches can be categorized into two classes: coiled-coil detection and coiled-coil oligomeric state prediction. We evaluated and compared these methods in terms of their input/output, algorithm, prediction performance, validation methods and software utility. All the independent testing data sets are available at http://lightning.med.monash.edu/coiledcoil/. In addition, we conducted a case study of nine human polyglutamine (PolyQ) disease-related proteins and predicted CCDs and oligomeric states using various predictors. Prediction results for CCDs were highly variable among different predictors. Only two peptides from two proteins were confirmed to be CCDs by majority voting. Both domains were predicted to form dimeric coiled-coils using oligomeric state prediction. We anticipate that this comprehensive analysis will be an insightful resource for structural biologists with limited prior experience in bioinformatics tools, and for bioinformaticians who are interested in designing novel approaches for coiled-coil and its oligomeric state prediction.",2015-07-15 +30590004,KIF11 Functions as an Oncogene and Is Associated with Poor Outcomes from Breast Cancer.,"

Purpose

The study aimed to search and identify genes that were differentially expressed in breast cancer, and their roles in cancer growth and progression.

Materials and methods

The Gene Expression Omnibus (Oncomine) and The Cancer Genome Atlas databases (https://cancergenome.nih.gov/) were screened for genes that were expressed differentially in breast cancer and were closely related to a poor prognosis. Gene expressions were verified by quantitative real-time polymerase chain reaction, and genes were knocked down by a lentivirus-based system. Cell growth and motility were evaluated and in vivo nude mice were used to confirm the in vitro roles of genes. Markers of epithelial-to-mesenchymal transition and the associations of KIF11 with the classical cancer signaling pathways were detected by Western blot.

Results

A series of genes expressed differentially in patients with breast cancer. The prognosis associated with high KIF11 expression was poor, and the expression of KIF11 increased significantly in high stage and malignant tumor cells. Inhibiting KIF11 expression in lentivirus-suppressed cells revealed that KIF11 inhibition significantly reduced cell viability and colony formation, inhibited migration and invasion, but promoted apoptosis. The sizes and weights of KIF11-inhibited tumors in nude mice were significantly lower than in the negative controls. Western blot showed that E-cadherin in breast cancer was significantly upregulated in KIF-inhibited cells and tumor tissues, whereas N-cadherin and vimentin were significantly downregulated. BT549 and MDA231 cells with KIF11 knockdown exhibited decreased ERK, AMPK, AKT, and CREB phosphorylation.

Conclusion

KIF11 acts as a potential oncogene that regulates the development and progression of breast cancer.",2018-12-27 +31201162,Identification of Novel RAS Signaling Therapeutic Vulnerabilities in Diffuse Intrinsic Pontine Gliomas.,"Diffuse intrinsic pontine gliomas (DIPG) are incurable brain tumors with an aggressive onset. Apart from irradiation, there are currently no effective therapies available for patients with DIPG, who have a median survival time of less than one year. Most DIPG cells harbor mutations in genes encoding histone H3 (H3K27M) proteins, resulting in a global reduction of H3K27 trimethylation and activation of oncogenic signaling pathways. Here we show that the H3K27M mutations contribute to RAS pathway signaling, which is augmented by additional RAS activators including PDGFRA. H3K27M mutation led to increased expression of receptor tyrosine kinases (RTK). A RAS pathway functional screen identified ERK5, but not ERK1/2, as a RAS pathway effector important for DIPG growth. Suppression of ERK5 decreased DIPG cell proliferation and induced apoptosis in vitro and in vivo. In addition, depletion or inhibition of ERK5 significantly increased survival of mice intracranially engrafted with DIPG cells. Mechanistically, ERK5 directly stabilized the proto-oncogene MYC at the protein level. Collectively, our data demonstrate an underappreciated role of H3K27M in RAS activation and reveal novel therapeutic targets for treating DIPG tumors. SIGNIFICANCE: These findings identify the H3K27M mutation as an enhancer of RAS activation in DIPG and ERK5 as a novel, immediately actionable molecular target. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/79/16/4026/F1.large.jpg.",2019-06-14 +27549386,A high resolution map of the Arabidopsis thaliana developmental transcriptome based on RNA-seq profiling.,"Arabidopsis thaliana is a long established model species for plant molecular biology, genetics and genomics, and studies of A. thaliana gene function provide the basis for formulating hypotheses and designing experiments involving other plants, including economically important species. A comprehensive understanding of the A. thaliana genome and a detailed and accurate understanding of the expression of its associated genes is therefore of great importance for both fundamental research and practical applications. Such goal is reliant on the development of new genetic and genomic resources, involving new methods of data acquisition and analysis. We present here the genome-wide analysis of A. thaliana gene expression profiles across different organs and developmental stages using high-throughput transcriptome sequencing. The expression of 25 706 protein-coding genes, as well as their stability and their spatiotemporal specificity, was assessed in 79 organs and developmental stages. A search for alternative splicing events identified 37 873 previously unreported splice junctions, approximately 30% of them occurred in intergenic regions. These potentially represent novel spliced genes that are not included in the TAIR10 database. These data are housed in an open-access web-based database, TraVA (Transcriptome Variation Analysis, http://travadb.org/), which allows visualization and analysis of gene expression profiles and differential gene expression between organs and developmental stages.",2016-11-19 +28764323,LipidCCS: Prediction of Collision Cross-Section Values for Lipids with High Precision To Support Ion Mobility-Mass Spectrometry-Based Lipidomics.,"The use of collision cross-section (CCS) values derived from ion mobility-mass spectrometry (IM-MS) has been proven to facilitate lipid identifications. Its utility is restricted by the limited availability of CCS values. Recently, the machine-learning algorithm-based prediction (e.g., MetCCS) is reported to generate CCS values in a large-scale. However, the prediction precision is not sufficient to differentiate lipids due to their high structural similarities and subtle differences on CCS values. To address this challenge, we developed a new approach, namely, LipidCCS, to precisely predict lipid CCS values. In LipidCCS, a set of molecular descriptors were optimized using bioinformatic approaches to comprehensively describe the subtle structure differences for lipids. The use of optimized molecular descriptors together with a large set of standard CCS values for lipids (458 in total) to build the prediction model significantly improved the precision. The prediction precision of LipidCCS was externally validated with median relative errors (MRE) of ∼1% using independent data sets across different instruments (Agilent DTIM-MS and Waters TWIM-MS) and laboratories. We also demonstrated that the improved precision in the predicted LipidCCS database (15 646 lipids and 63 434 CCS values in total) could effectively reduce false-positive identifications of lipids. Common users can freely access our LipidCCS web server for the following: (1) the prediction of lipid CCS values directly from SMILES structure; (2) database search; and (3) lipid match and identification. We believe LipidCCS will be a valuable tool to support IM-MS-based lipidomics. The web server is freely available on the Internet ( http://www.metabolomics-shanghai.org/LipidCCS/ ).",2017-08-15 +,P06.19 TERT promoter mutation is an independent prognostic factor in 1p/19q co-deleted oligodendrogliomas: a POLA network study,"Abstract

Background:

Overall, gliomas harboring 1p19q co-deletion (oligodendrogliomas in the future WHO classification) have better prognosis and better response to treatments compared to their non 1p/19q co-deleted counterparts. However, some of these tumors have disconcerting poor outcome. We have recently shown that chromosome arm 9p loss of heterozygosity (9pLOH) participates to identify some but not all oligodendrogliomas with an aggressive clinical behavior. In parallel, TERT promoter (TERTp) mutations have been recently described as the most frequent mutations in oligodendrogliomas.

Objectives:

We sought to study the potential clinical significance of TERTp mutational status in oligodendrogliomas.

Material and methods:

224 anaplastic oligodendrogliomas (AO) from the POLA network, were included in the present study. TERT mutational status was obtained using Sanger sequencing and SNP-array. 121 lower grade gliomas exhibiting 1p/19q co-deletion, from the TCGA dataset, were used as the validation cohort. In this latter series, level of TERT expression, obtained by RNA-sequencing, was used as a surrogate marker of TERTp mutation. 1p/19q co-deletion was defined as loss of whole-chromosome arms 1p and 19q using allelic-specific copy number analysis. Overall survivals (OS) were compared using log-rank test in univariate analysis. All covariates with p-value < 0.2 were included in a Cox’s proportional hazard ratio model for multivariate analysis. Statistical tests were two-sided and p-values < 0.05 were interpreted as statistically significant.

Results:

TERTp was wild-type (wt) in 14/224 AO (6.25%) in POLA cohort and in 8/121 (6.6%) in TCGA dataset. TERTp wt is associated with poor OS in univariate analysis in POLA and TCGA cohorts (p=0.032 and p=0.035, respectively). Interestingly, after adjusting for allelic 9p LOH, age (<50 vs. ≥ 50 years), treatment (chemotherapy and radiotherapy versus other treatments) and WHO grade (II vs III), TERTp mutation was an independent favorable prognostic factor in the POLA cohort (p=0.037, HR=3.7 (1.1–13)) and in the TCGA dataset (p=0.01, HR=9.8 (1.6–60)).

Conclusion:

This study identifies TERTp mutation as a novel independent prognostic biomarker in oligodendrogliomas toward a better stratification of this tumor type. Acknowledgements: The results shown here are in whole or part based upon data generated by the TCGA Research Network: http://cancergenome.nih.gov/. La Ligue Nationale Contre La Cancer. The Institut Universitaire de Cancérologie (IUC). The program Investissements d’avenir” ANR-10-IAIHU-06. POLA network is supported by Institut National du Cancer.",2016-09-21 +30593925,"Macro-geographical specificities of the prevailing tuberculosis epidemic as seen through SITVIT2, an updated version of the Mycobacterium tuberculosis genotyping database.","In order to provide a global overview of genotypic, epidemiologic, demographic, phylogeographical, and drug resistance characteristics related to the prevailing tuberculosis (TB) epidemic, we hereby report an update of the 6th version of the international genotyping database SITVIT2. We also make all the available information accessible through a dedicated website (available at http://www.pasteur-guadeloupe.fr:8081/SITVIT2). Thanks to the public release of SITVIT2 which is currently the largest international multimarker genotyping database with a compilation of 111,635 clinical isolates from 169 countries of patient origin (131 countries of isolation, representing 1032 cities), our major aim is to highlight macro- and micro-geographical cleavages and phylogeographical specificities of circulating Mycobacterium tuberculosis complex (MTBC) clones worldwide. For this purpose, we retained strains typed by the most commonly used PCR-based methodology for TB genotyping, i.e., spoligotyping based on the polymorphism of the direct repeat (DR) locus, 5-loci Exact Tandem Repeats (ETRs), and MIRU-VNTR minisatellites used in 12-, 15-, or 24-loci formats. We describe the SITVIT2 database and integrated online applications that permit to interrogate the database using easy drop-down menus to draw maps, graphics and tables versus a long list of parameters and variables available for individual clinical isolates (year and place of isolation, origin, sex, and age of patient, drug-resistance, etc.). Available tools further allow to generate phylogenetical snapshot of circulating strains as Lineage-specific WebLogos, as well as minimum spanning trees of their genotypes in conjunction with their geographical distribution, drug-resistance, demographic, and epidemiologic characteristics instantaneously; whereas online statistical analyses let a user to pinpoint phylogeographical specificities of circulating MTBC lineages and conclude on actual demographic trends. Available associated information on gender (n = 18,944), age (n = 16,968), drug resistance (n = 19,606), and HIV serology (n = 2673), allowed to draw some important conclusions on TB geo-epidemiology; e.g. a positive correlation exists between certain Mycobacterium tuberculosis lineages (such as CAS and Beijing) and drug resistance (p-value<.001), while other lineages (such as LAM, X, and BOV) are more frequently associated with HIV-positive serology (p-value<.001). Besides, availability of information on the year of isolation of strains (range 1759-2012), also allowed to make tentative correlations between drug resistance information and lineages - portraying probable evolution trends over time and space. To conclude, the present approach of geographical mapping of predominant clinical isolates of tubercle bacilli causing the bulk of the disease both at country and regional level in conjunction with epidemiologic and demographic characteristics allows to shed new light on TB geo-epidemiology in relation with the continued waves of peopling and human migration.",2018-12-26 +,P06.18 DNA methylation distance score in lower-grade gliomas has prognostic value: a POLA network study,"Abstract

Background:

CpG Island Methylator Phenotype (CIMP) is strongly associated with IDH1/2 mutations that are frequently found in lower-grade gliomas (LGG: WHO grade II and WHO grade III). In addition, LGG have been recently divided in three histomolecular subgroups with clinical relevance: (i) IDH wild type, (ii) IDH mutated without 1p/19q co-deletion and (iii) IDH mutated with 1p/19q co-deletion. However, the study of intra-tumor methylation heterogeneity in LGG has not been fully elucidated, so far.

Objectives:

We sought to study the intra-tumor methylation heterogeneity in LGG.

Material and methods:

108 LGG from the POLA network, as the discovery cohort, were included in the present study. 273 lower grade gliomas from the TCGA dataset, were used as the validation cohort. Methylation analysis was performed using Infinium Human Methylation 450 Illumina BeadChip arrays in both cohorts. We have also used methylation dataset of adult normal brain publicly available (GSE36278). A phylogenetic methylation score was calculated comparing normal brain to the different molecular subgroups of LGG. Overall survivals (OS) were compared using log-rank test in univariate analysis. All covariates with p-value < 0.2 were included in a Cox’s proportional hazard ratio model for multivariate analysis. Statistical tests were two-sided and p-values < 0.05 were interpreted as statistically significant.

Results:

Mean promoter-level methylation was strongly correlated with the different molecular subgroups of LGG. Interestingly, we have identified a subgroup of 1p/19q co-deleted LGG harboring a significantly higher level of CIMP (hCIMP). In parallel, this particular subgroup of hCIMP exhibits the highest intra-tumor methylation level heterogeneity. Finally, using a phylogenetic clustering approach, we have identified a methylation heterogeneity score derived from distances of the methylation pattern of each tumor that is associated with poor prognosis in Cox’s proportional hazard model after adjusting by age, WHO grade, IDH1/2 mutation and 1p/19q co-deletion. These findings were validated in the TCGA dataset.

Conclusion:

We have identified a subgroup of 1p19q co-deleted LGG with hCIMP and higher intra-tumor methylation heterogeneity. In addition, a phylogenetic methylation score is a potential new prognostic biomarker in LGG. Acknowledgements: The results shown here are in whole or part based upon data generated by the TCGA Research Network: http://cancergenome.nih.gov/. La Ligue Nationale Contre La Cancer. The Institut Universitaire de Cancérologie (IUC). The program Investissements d’avenir” ANR-10-IAIHU-06. POLA network is supported by Institut National du Cancer.",2016-09-21 +22700702,RegPrecise web services interface: programmatic access to the transcriptional regulatory interactions in bacteria reconstructed by comparative genomics.,"Web services application programming interface (API) was developed to provide a programmatic access to the regulatory interactions accumulated in the RegPrecise database (http://regprecise.lbl.gov), a core resource on transcriptional regulation for the microbial domain of the Department of Energy (DOE) Systems Biology Knowledgebase. RegPrecise captures and visualize regulogs, sets of genes controlled by orthologous regulators in several closely related bacterial genomes, that were reconstructed by comparative genomics. The current release of RegPrecise 2.0 includes >1400 regulogs controlled either by protein transcription factors or by conserved ribonucleic acid regulatory motifs in >250 genomes from 24 taxonomic groups of bacteria. The reference regulons accumulated in RegPrecise can serve as a basis for automatic annotation of regulatory interactions in newly sequenced genomes. The developed API provides an efficient access to the RegPrecise data by a comprehensive set of 14 web service resources. The RegPrecise web services API is freely accessible at http://regprecise.lbl.gov/RegPrecise/services.jsp with no login requirements.",2012-06-14 +30956147,Cheminformatics Tools for Analyzing and Designing Optimized Small-Molecule Collections and Libraries.,"Libraries of well-annotated small molecules have many uses in chemical genetics, drug discovery, and therapeutic repurposing. Multiple libraries are available, but few data-driven approaches exist to compare them and design new libraries. We describe an approach to scoring and creating libraries based on binding selectivity, target coverage, and induced cellular phenotypes as well as chemical structure, stage of clinical development, and user preference. The approach, available via the online tool http://www.smallmoleculesuite.org, assembles sets of compounds with the lowest possible off-target overlap. Analysis of six kinase inhibitor libraries using our approach reveals dramatic differences among them and led us to design a new LSP-OptimalKinase library that outperforms existing collections in target coverage and compact size. We also describe a mechanism of action library that optimally covers 1,852 targets in the liganded genome. Our tools facilitate creation, analysis, and updates of both private and public compound collections.",2019-04-04 +27987169,"Using TropGeneDB: A Database Containing Data on Molecular Markers, QTLs, Maps, Genotypes, and Phenotypes for Tropical Crops.","TropGeneDB ( http://tropgenedb.cirad.fr ) is a web database that manages genomic, genetic, and phenotypic information on tropical crops. It is organized on a crop basis with currently nine public modules: banana, cocoa, coconut, coffee, cotton, oil palm, rice, rubber tree, and sugarcane. TropGeneDB contains data on molecular markers, quantitative trait loci (QTLs), genetic and physical maps, genotyping and phenotyping studies, and information on genetic resources (geographic origin, parentage, collection). Crop-specific web interfaces have been designed to allow quick consultations as well as personalized complex queries.",2017-01-01 +26977448,SILAC based protein profiling data of MKK3 knockout mouse embryonic fibroblasts.,"This data article reports changes in the phospho and total proteome of MKK3 knock out (MKK3(-) (/) (-)) mouse embryonic fibroblasts (MEFs). The dataset generated highlights the changes at protein level which can be helpful for understanding targets of the MAP kinase signaling pathway. Data was collected after TiO2-based phosphopeptide enrichment of whole cell lysate at baseline condition with bottom-up SILAC-based LC MS/MS quantitative mass spectrometry. We report all the proteins and peptides identified and quantified in MKK3(-/-) and WT MEFs. The altered pathways in MKK3(-/-) MEFs were analyzed by Database for Annotation, Visualization and Integrated Discovery (DAVID, v6.7) and Ingenuity Pathway Analysis (IPA) and are presented as a table and graph, respectively. The data reported here is related to the published work [1]. All the associated mass spectrometry data has been deposited in the Yale Protein Expression Database (YPED) with the web-link to the data: http://yped.med.yale.edu/repository/ViewSeriesMenu.do;jsessionid=6A5CB07543D8B529FAE8C3FCFE29471D?series_id=5044&series_name=MMK3+Deletion+in+MEFs.",2016-03-02 +30474154,Estimating cross-population genetic correlations of causal effect sizes.,"Recent studies have examined the genetic correlations of single-nucleotide polymorphism (SNP) effect sizes across pairs of populations to better understand the genetic architectures of complex traits. These studies have estimated ρ g , the cross-population correlation of joint-fit effect sizes at genotyped SNPs. However, the value of ρ g depends both on the cross-population correlation of true causal effect sizes ( ρ b ) and on the similarity in linkage disequilibrium (LD) patterns in the two populations, which drive tagging effects. Here, we derive the value of the ratio ρ g / ρ b as a function of LD in each population. By applying existing methods to obtain estimates of ρ g , we can use this ratio to estimate ρ b . Our estimates of ρ b were equal to 0.55 ( SE = 0.14) between Europeans and East Asians averaged across nine traits in the Genetic Epidemiology Research on Adult Health and Aging data set, 0.54 ( SE = 0.18) between Europeans and South Asians averaged across 13 traits in the UK Biobank data set, and 0.48 ( SE = 0.06) and 0.65 ( SE = 0.09) between Europeans and East Asians in summary statistic data sets for type 2 diabetes and rheumatoid arthritis, respectively. These results implicate substantially different causal genetic architectures across continental populations.",2018-11-25 +24773765,A customized Web portal for the genome of the ctenophore Mnemiopsis leidyi.,"

Background

Mnemiopsis leidyi is a ctenophore native to the coastal waters of the western Atlantic Ocean. A number of studies on Mnemiopsis have led to a better understanding of many key biological processes, and these studies have contributed to the emergence of Mnemiopsis as an important model for evolutionary and developmental studies. Recently, we sequenced, assembled, annotated, and performed a preliminary analysis on the 150-megabase genome of the ctenophore, Mnemiopsis. This sequencing effort has produced the first set of whole-genome sequencing data on any ctenophore species and is amongst the first wave of projects to sequence an animal genome de novo solely using next-generation sequencing technologies.

Description

The Mnemiopsis Genome Project Portal (http://research.nhgri.nih.gov/mnemiopsis/) is intended both as a resource for obtaining genomic information on Mnemiopsis through an intuitive and easy-to-use interface and as a model for developing customized Web portals that enable access to genomic data. The scope of data available through this Portal goes well beyond the sequence data available through GenBank, providing key biological information not available elsewhere, such as pathway and protein domain analyses; it also features a customized genome browser for data visualization.

Conclusions

We expect that the availability of these data will allow investigators to advance their own research projects aimed at understanding phylogenetic diversity and the evolution of proteins that play a fundamental role in metazoan development. The overall approach taken in the development of this Web site can serve as a viable model for disseminating data from whole-genome sequencing projects, framed in a way that best-serves the specific needs of the scientific community.",2014-04-28 +30040660,GaMRed-Adaptive Filtering of High-Throughput Biological Data.,"Data filtering based on removing non-informative features, with unchanged signal between compared experimental conditions, can significantly increase sensitivity of methods used to detect differentially expressed genes or other molecular components measured in high-throughput biological experiments. Criteria for data filtering can be stated on the basis of averages or variances of signal levels across samples. The crucial parts of feature filtering are selection of filter type and cut-off threshold, which are specific to the particular dataset. In this paper, we present an algorithm and a stand-alone application, GaMRed, for adaptive filtering insignificant features in high-throughput data, based on Gaussian mixture decomposition. We have tested the performance of our algorithm using datasets from three different high-throughput biological experiments. We estimated the number of differentially expressed features after applying multiple testing correction and performed functional analysis of obtained features using Gene Ontology terms. Also, we checked if the control of false discovery rate and family-wise error rate after applying feature filtering remains at appropriate level. GaMRed is fast, automatic, and does not require expert knowledge in parameter tuning. The algorithm increases sensitivity of methods used to find differentially expressed features and biological validity of the findings. The program can be downloaded from: http://zaed.aei.polsl.pl/index.php/pl/oprogramowanie-zaed.",2018-07-23 +29490021,ShinyKGode: an interactive application for ODE parameter inference using gradient matching.,"Motivation:Mathematical modelling based on ordinary differential equations (ODEs) is widely used to describe the dynamics of biological systems, particularly in systems and pathway biology. Often the kinetic parameters of these ODE systems are unknown and have to be inferred from the data. Approximate parameter inference methods based on gradient matching (which do not require performing computationally expensive numerical integration of the ODEs) have been getting popular in recent years, but many implementations are difficult to run without expert knowledge. Here, we introduce ShinyKGode, an interactive web application to perform fast parameter inference on ODEs using gradient matching. Results:ShinyKGode can be used to infer ODE parameters on simulated and observed data using gradient matching. Users can easily load their own models in Systems Biology Markup Language format, and a set of pre-defined ODE benchmark models are provided in the application. Inferred parameters are visualized alongside diagnostic plots to assess convergence. Availability and implementation:The R package for ShinyKGode can be installed through the Comprehensive R Archive Network (CRAN). Installation instructions, as well as tutorial videos and source code are available at https://joewandy.github.io/shinyKGode. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +30806479,Longitudinal fan-beam computed tomography dataset for head-and-neck squamous cell carcinoma patients.,"

Purpose

To describe in detail a dataset consisting of longitudinal fan-beam computed tomography (CT) imaging to visualize anatomical changes in head-and-neck squamous cell carcinoma (HNSCC) patients throughout radiotherapy (RT) treatment course.

Acquisition and validation methods

This dataset consists of CT images from 31 HNSCC patients who underwent volumetric modulated arc therapy (VMAT). Patients had three CT scans acquired throughout the duration of the radiation treatment course. Pretreatment planning CT scans with a median of 13 days before treatment (range: 2-27), mid-treatment CT at 22 days after start of treatment (range: 13-38), and post-treatment CT 65 days after start of treatment (range: 35-192). Patients received RT treatment to a total dose of 58-70 Gy, using daily 2.0-2.20 Gy, fractions for 30-35 fractions. The fan-beam CT images were acquired using a Siemens 16-slice CT scanner head protocol with 120 kV and current of 400 mAs. A helical scan with 1 rotation per second was used with a slice thickness of 2 mm and table increment of 1.2 mm. In addition to the imaging data, contours of anatomical structures for RT, demographic, and outcome measurements are provided.

Data format and usage notes

The dataset with DICOM files including images, RTSTRUCT files, and RTDOSE files can be found and publicly accessed in the Cancer Imaging Archive (TCIA, http://www.cancerimagingarchive.net/) as collection Head-and-neck squamous cell carcinoma patients with CT taken during pretreatment, mid-treatment, and post-treatment (HNSCC-3DCT-RT).

Discussion

This is the first dataset to date in TCIA which provides a collection of multiple CT imaging studies (pretreatment, mid-treatment, and post-treatment) throughout the treatment course. The dataset can serve a wide array of research projects including (but not limited to): quantitative imaging assessment, investigation on anatomical changes with treatment progress, dosimetry of target volumes and/or normal structures due to anatomical changes occurring during treatment, investigation of RT toxicity, and concurrent chemotherapy and RT effects on head-and-neck patients.",2019-03-12 +30567473,Cloud-BS: A MapReduce-based bisulfite sequencing aligner on cloud.,"In recent years, there have been many studies utilizing DNA methylome data to answer fundamental biological questions. Bisulfite sequencing (BS-seq) has enabled measurement of a genome-wide absolute level of DNA methylation at single-nucleotide resolution. However, due to the ambiguity introduced by bisulfite-treatment, the aligning process especially in large-scale epigenetic research is still considered a huge burden. We present Cloud-BS, an efficient BS-seq aligner designed for parallel execution on a distributed environment. Utilizing Apache Hadoop framework, Cloud-BS splits sequencing reads into multiple blocks and transfers them to distributed nodes. By designing each aligning procedure into separate map and reducing tasks while an internal key-value structure is optimized based on the MapReduce programming model, the algorithm significantly improves alignment performance without sacrificing mapping accuracy. In addition, Cloud-BS minimizes the innate burden of configuring a distributed environment by providing a pre-configured cloud image. Cloud-BS shows significantly improved bisulfite alignment performance compared to other existing BS-seq aligners. We believe our algorithm facilitates large-scale methylome data analysis. The algorithm is freely available at https://paryoja.github.io/Cloud-BS/ .",2018-10-30 +27893735,Computational Discovery of Putative Leads for Drug Repositioning through Drug-Target Interaction Prediction.,"De novo experimental drug discovery is an expensive and time-consuming task. It requires the identification of drug-target interactions (DTIs) towards targets of biological interest, either to inhibit or enhance a specific molecular function. Dedicated computational models for protein simulation and DTI prediction are crucial for speed and to reduce the costs associated with DTI identification. In this paper we present a computational pipeline that enables the discovery of putative leads for drug repositioning that can be applied to any microbial proteome, as long as the interactome of interest is at least partially known. Network metrics calculated for the interactome of the bacterial organism of interest were used to identify putative drug-targets. Then, a random forest classification model for DTI prediction was constructed using known DTI data from publicly available databases, resulting in an area under the ROC curve of 0.91 for classification of out-of-sampling data. A drug-target network was created by combining 3,081 unique ligands and the expected ten best drug targets. This network was used to predict new DTIs and to calculate the probability of the positive class, allowing the scoring of the predicted instances. Molecular docking experiments were performed on the best scoring DTI pairs and the results were compared with those of the same ligands with their original targets. The results obtained suggest that the proposed pipeline can be used in the identification of new leads for drug repositioning. The proposed classification model is available at http://bioinformatics.ua.pt/software/dtipred/.",2016-11-28 +23280235,Characterization of transcriptomes from sexual and asexual lineages of a New Zealand snail (Potamopyrgus antipodarum).,"Understanding the evolution and maintenance of sexual reproduction is one of the central challenges of evolutionary biology, yet we know very little about how sex influences molecular evolution. The New Zealand freshwater snail Potamopyrgus antipodarum is ideally suited to address this knowledge gap because obligately sexual individuals often coexist with multiple independently derived obligately asexual lineages. This unusual situation allows direct comparisons both between sexual and asexual P. antipodarum and across populations that differ in the relative frequency of sexual individuals. As such, P. antipodarum has received a great deal of attention as a model system for the maintenance of sex in nature and is also used as a model for environmental toxicology and biological invasions. Molecular genetic resources for P. antipodarum will thus be useful to investigators in a variety of biological fields. We used 454 sequencing of cDNA libraries to generate transcriptomes from two sexual and two asexual P. antipodarum lineages. A de novo assembly of 116.7 Mb of sequence reads produced 41 396 contigs, and sequence similarity-based Gene Ontology annotations were obtained for 3740 contigs. We detected 408 315 SNP loci and 7315 microsatellite loci, which together represent the first genome-scale resource available for P. antipodarum. Raw 454 read sequences, contig sequences, annotation data and polymorphism data are publicly available in a searchable online database and for download at http://www.biology.uiowa.edu/neiman/transcriptome.php.",2012-12-27 +30518975,A proposed mechanism influencing structural patterns in X-linked retinoschisis and stellate nonhereditary idiopathic foveomacular retinoschisis.,"

Objective

To explore the structural differences between X-linked retinoschisis (XLR) and stellate nonhereditary idiopathic foveomacular retinoschisis (SNIFR) using swept-source optical coherence tomography angiography (SS-OCTA).

Methods

A case series of two patients, a 9-year-old male with XLR and a 58-year-old woman with SNIFR were imaged with swept-source optical coherence tomography angiography (SS-OCTA; PLEX Elite 900, Carl Zeiss Meditec, Inc, Dublin, CA). Automated segmentation was manually adjusted to include the areas of retinoschisis within en face flow and structural slabs. The flow data were binarized using ImageJ 1.51s (Wayne Rasband, National Institutes of Health, USA, http://imagej.nih.gov.ij ) and superimposed onto the structural slab.

Results

In the eye with XLR, OCTA flow data superimposed on the structural slab demonstrated flow signal within numerous bridging structures connecting the inner and outer plexiform layers containing the intermediate (ICP) and deep (DCP) capillary plexuses. In contrast, the same technique applied to the eye with SNIFR demonstrated an absence of flow signal in the cystic retinal spaces within Henle's fiber layer.

Conclusions

The vascular pattern of bridging vessels between the ICP and DCP is closely related to the structural ""retinoschisis"" pattern of XLR and appears to be structurally different from that seen in SNIFR. Moreover, the connecting vessels appear to be highly represented and regularly distributed, thereby supporting a serial arrangement of the retinal capillary plexuses within the perifoveal macula.",2018-12-05 +23203988,PDBTM: Protein Data Bank of transmembrane proteins after 8 years.,"The PDBTM database (available at http://pdbtm.enzim.hu), the first comprehensive and up-to-date transmembrane protein selection of the Protein Data Bank, was launched in 2004. The database was created and has been continuously updated by the TMDET algorithm that is able to distinguish between transmembrane and non-transmembrane proteins using their 3D atomic coordinates only. The TMDET algorithm can locate the spatial positions of transmembrane proteins in lipid bilayer as well. During the last 8 years not only the size of the PDBTM database has been steadily growing from ∼400 to 1700 entries but also new structural elements have been identified, in addition to the well-known α-helical bundle and β-barrel structures. Numerous 'exotic' transmembrane protein structures have been solved since the first release, which has made it necessary to define these new structural elements, such as membrane loops or interfacial helices in the database. This article reports the new features of the PDBTM database that have been added since its first release, and our current efforts to keep the database up-to-date and easy to use so that it may continue to serve as a fundamental resource for the scientific community.",2012-11-30 +29297297,Divisive hierarchical maximum likelihood clustering.,"BACKGROUND:Biological data comprises various topologies or a mixture of forms, which makes its analysis extremely complicated. With this data increasing in a daily basis, the design and development of efficient and accurate statistical methods has become absolutely necessary. Specific analyses, such as those related to genome-wide association studies and multi-omics information, are often aimed at clustering sub-conditions of cancers and other diseases. Hierarchical clustering methods, which can be categorized into agglomerative and divisive, have been widely used in such situations. However, unlike agglomerative methods divisive clustering approaches have consistently proved to be computationally expensive. RESULTS:The proposed clustering algorithm (DRAGON) was verified on mutation and microarray data, and was gauged against standard clustering methods in the literature. Its validation included synthetic and significant biological data. When validated on mixed-lineage leukemia data, DRAGON achieved the highest clustering accuracy with data of four different dimensions. Consequently, DRAGON outperformed previous methods with 3-,4- and 5-dimensional acute leukemia data. When tested on mutation data, DRAGON achieved the best performance with 2-dimensional information. CONCLUSIONS:This work proposes a computationally efficient divisive hierarchical clustering method, which can compete equally with agglomerative approaches. The proposed method turned out to correctly cluster data with distinct topologies. A MATLAB implementation can be extraced from http://www.riken.jp/en/research/labs/ims/med_sci_math/ or http://www.alok-ai-lab.com.",2017-12-28 +27199606,A database on the distribution of butterflies (Lepidoptera) in northern Belgium (Flanders and the Brussels Capital Region).,"In this data paper, we describe two datasets derived from two sources, which collectively represent the most complete overview of butterflies in Flanders and the Brussels Capital Region (northern Belgium). The first dataset (further referred to as the INBO dataset - http://doi.org/10.15468/njgbmh) contains 761,660 records of 70 species and is compiled by the Research Institute for Nature and Forest (INBO) in cooperation with the Butterfly working group of Natuurpunt (Vlinderwerkgroep). It is derived from the database Vlinderdatabank at the INBO, which consists of (historical) collection and literature data (1830-2001), for which all butterfly specimens in institutional and available personal collections were digitized and all entomological and other relevant publications were checked for butterfly distribution data. It also contains observations and monitoring data for the period 1991-2014. The latter type were collected by a (small) butterfly monitoring network where butterflies were recorded using a standardized protocol. The second dataset (further referred to as the Natuurpunt dataset - http://doi.org/10.15468/ezfbee) contains 612,934 records of 63 species and is derived from the database http://waarnemingen.be, hosted at the nature conservation NGO Natuurpunt in collaboration with Stichting Natuurinformatie. This dataset contains butterfly observations by volunteers (citizen scientists), mainly since 2008. Together, these datasets currently contain a total of 1,374,594 records, which are georeferenced using the centroid of their respective 5 × 5 km² Universal Transverse Mercator (UTM) grid cell. Both datasets are published as open data and are available through the Global Biodiversity Information Facility (GBIF).",2016-04-26 +30644065,A Model of [Formula: see text] Dynamics in an Accurate Reconstruction of Parotid Acinar Cells.,"We have constructed a spatiotemporal model of [Formula: see text] dynamics in parotid acinar cells, based on new data about the distribution of inositol trisphophate receptors (IPR). The model is solved numerically on a mesh reconstructed from images of a cluster of parotid acinar cells. In contrast to our earlier model (Sneyd et al. in J Theor Biol 419:383-393. https://doi.org/10.1016/j.jtbi.2016.04.030 , 2017b), which cannot generate realistic [Formula: see text] oscillations with the new data on IPR distribution, our new model reproduces the [Formula: see text] dynamics observed in parotid acinar cells. This model is then coupled with a fluid secretion model described in detail in a companion paper: A mathematical model of fluid transport in an accurate reconstruction of a parotid acinar cell (Vera-Sigüenza et al. in Bull Math Biol. https://doi.org/10.1007/s11538-018-0534-z , 2018b). Based on the new measurements of IPR distribution, we show that Class I models (where [Formula: see text] oscillations can occur at constant [[Formula: see text]]) can produce [Formula: see text] oscillations in parotid acinar cells, whereas Class II models (where [[Formula: see text]] needs to oscillate in order to produce [Formula: see text] oscillations) are unlikely to do so. In addition, we demonstrate that coupling fluid flow secretion with the [Formula: see text] signalling model changes the dynamics of the [Formula: see text] oscillations significantly, which indicates that [Formula: see text] dynamics and fluid flow cannot be accurately modelled independently. Further, we determine that an active propagation mechanism based on calcium-induced calcium release channels is needed to propagate the [Formula: see text] wave from the apical region to the basal region of the acinar cell.",2019-01-14 +31458051,QSPR Modeling of the Refractive Index for Diverse Polymers Using 2D Descriptors.,"In the present work, predictive quantitative structure-property relationship models have been developed to predict refractive indices (RIs) of a set of 221 diverse organic polymers using theoretical two-dimensional descriptors generated on the basis of the structures of polymers' monomer units. Four models have been developed by applying partial least squares (PLS) regression with a different combination of six descriptors obtained via double cross-validation approaches. The predictive ability and robustness of the proposed models were checked using multiple validation strategies. Subsequently, the validated models were used for the generation of ""intelligent"" consensus models (http://teqip.jdvu.ac.in/QSAR_Tools/DTCLab/) to improve the quality of predictions for the external data set. The selected consensus models were used for the prediction of refractive index values of various classes of polymers. The final selected model was used to predict the refractive index of four small virtual libraries of monomers recently reported. We also used a true external data set of 98 diverse monomer units with the experimental RI values of the corresponding polymers. The obtained models showed a good predictive ability as evidenced from a very good external predicted variance.",2018-10-17 +30759180,refineD: improved protein structure refinement using machine learning based restrained relaxation.,"

Motivation

Protein structure refinement aims to bring moderately accurate template-based protein models closer to the native state through conformational sampling. However, guiding the sampling towards the native state by effectively using restraints remains a major issue in structure refinement.

Results

Here, we develop a machine learning based restrained relaxation protocol that uses deep discriminative learning based binary classifiers to predict multi-resolution probabilistic restraints from the starting structure and subsequently converts these restraints to be integrated into Rosetta all-atom energy function as additional scoring terms during structure refinement. We use four restraint resolutions as adopted in GDT-HA (0.5, 1, 2 and 4 Å), centered on the Cα atom of each residue that are predicted by ensemble of four deep discriminative classifiers trained using combinations of sequence and structure-derived features as well as several energy terms from Rosetta centroid scoring function. The proposed method, refineD, has been found to produce consistent and substantial structural refinement through the use of cumulative and non-cumulative restraints on 150 benchmarking targets. refineD outperforms unrestrained relaxation strategy or relaxation that is restrained to starting structures using the FastRelax application of Rosetta or atomic-level energy minimization based ModRefiner method as well as molecular dynamics (MD) simulation based FG-MD protocol. Furthermore, by adjusting restraint resolutions, the method addresses the tradeoff that exists between degree and consistency of refinement. These results demonstrate a promising new avenue for improving accuracy of template-based protein models by effectively guiding conformational sampling during structure refinement through the use of machine learning based restraints.

Availability and implementation

http://watson.cse.eng.auburn.edu/refineD/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-09-01 +24293649,"The SILVA and ""All-species Living Tree Project (LTP)"" taxonomic frameworks.","SILVA (from Latin silva, forest, http://www.arb-silva.de) is a comprehensive resource for up-to-date quality-controlled databases of aligned ribosomal RNA (rRNA) gene sequences from the Bacteria, Archaea and Eukaryota domains and supplementary online services. SILVA provides a manually curated taxonomy for all three domains of life, based on representative phylogenetic trees for the small- and large-subunit rRNA genes. This article describes the improvements the SILVA taxonomy has undergone in the last 3 years. Specifically we are focusing on the curation process, the various resources used for curation and the comparison of the SILVA taxonomy with Greengenes and RDP-II taxonomies. Our comparisons not only revealed a reasonable overlap between the taxa names, but also points to significant differences in both names and numbers of taxa between the three resources.",2013-11-28 +24653643,INsPeCT: INtegrative Platform for Cancer Transcriptomics.,"The emergence of transcriptomics, fuelled by high-throughput sequencing technologies, has changed the nature of cancer research and resulted in a massive accumulation of data. Computational analysis, integration, and data visualization are now major bottlenecks in cancer biology and translational research. Although many tools have been brought to bear on these problems, their use remains unnecessarily restricted to computational biologists, as many tools require scripting skills, data infrastructure, and powerful computational facilities. New user-friendly, integrative, and automated analytical approaches are required to make computational methods more generally useful to the research community. Here we present INsPeCT (INtegrative Platform for Cancer Transcriptomics), which allows users with basic computer skills to perform comprehensive in-silico analyses of microarray, ChIP-seq, and RNA-seq data. INsPeCT supports the selection of interesting genes for advanced functional analysis. Included in its automated workflows are (i) a novel analytical framework, RMaNI (regulatory module network inference), which supports the inference of cancer subtype-specific transcriptional module networks and the analysis of modules; and (ii) WGCNA (weighted gene co-expression network analysis), which infers modules of highly correlated genes across microarray samples, associated with sample traits, eg survival time. INsPeCT is available free of cost from Bioinformatics Resource Australia-EMBL and can be accessed at http://inspect.braembl.org.au.",2014-03-12 +23193254,UCNEbase--a database of ultraconserved non-coding elements and genomic regulatory blocks.,"UCNEbase (http://ccg.vital-it.ch/UCNEbase) is a free, web-accessible information resource on the evolution and genomic organization of ultra-conserved non-coding elements (UCNEs). It currently covers 4351 such elements in 18 different species. The majority of UCNEs are supposed to be transcriptional regulators of key developmental genes. As most of them occur as clusters near potential target genes, the database is organized along two hierarchical levels: individual UCNEs and ultra-conserved genomic regulatory blocks (UGRBs). UCNEbase introduces a coherent nomenclature for UCNEs reflecting their respective associations with likely target genes. Orthologous and paralogous UCNEs share components of their names and are systematically cross-linked. Detailed synteny maps between the human and other genomes are provided for all UGRBs. UCNEbase is managed by a relational database system and can be accessed by a variety of web-based query pages. As it relies on the UCSC genome browser as visualization platform, a large part of its data content is also available as browser viewable custom track files. UCNEbase is potentially useful to any computational, experimental or evolutionary biologist interested in conserved non-coding DNA elements in vertebrates.",2012-11-27 +30169777,"NanoShaper-VMD interface: computing and visualizing surfaces, pockets and channels in molecular systems.","

Summary

NanoShaper is a program specifically aiming the construction and analysis of the molecular surface of nanoscopic systems. It uses ray-casting for parallelism and it performs analytical computations whenever possible to maximize robustness and accuracy of the approach. Among the other features, NanoShaper provides volume, surface area, including that of internal cavities, for any considered molecular system. It identifies pockets via a very intuitive definition based on the concept of probe radius, intrinsic to the definition of the solvent excluded surface. We show here that, with a suitable choice of the parameters, the same approach can also permit the visualisation of molecular channels. NanoShaper has now been interfaced with the widely used molecular visualization software VMD, further enriching its already well furnished toolset.

Availability and implementation

VMD is available at http://www.ks.uiuc.edu/Research/vmd/. NanoShaper, its documentation, tutorials and supporting programs are available at http://concept.iit.it/downloads.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +31725664,Impact of physical exercise on children with attention deficit hyperactivity disorders: Evidence through a meta-analysis.,"

Background

Attention deficit hyperactivity disorder (ADHD) which is characterized by developmentally inappropriate levels of attention, hyperactivity and impulsivity, is considered as the most common neurodevelopmental disorder in childhood. Physical exercise has shown to have several benefits in the improvement of children with ADHD. In this meta-analysis, we aimed to systematically show, with evidence, the impact of physical exercise on children with ADHD.

Methods

Web of Science, MEDLINE, EMBASE, Google Scholar, Cochrane Central and http://www.ClinicalTrials.gov were the searched sources for studies which were based on the impact of physical exercise on children with ADHD. Relevant endpoints were assessed. This evidence based meta-analysis was carried out by the most relevant RevMan 5.3 software. Due to the involvement of continuous data (mean and standard deviation), weight mean difference (WMD) with 95% confidence intervals (CI) were used to represent the final analysis. A significant level of P ≤ .05 was set and a fixed statistical effect model was used throughout the analysis.

Results

Fourteen studies with a total number of 574 participants with ADHD were included in this evidenced based meta-analysis. Two hundred and seventy six (276) participants were assigned to the physical activity group whereas 298 participants were assigned to the control group. Results of this analysis showed that anxiety and depression were significantly improved with physical activity in these children with ADHD (WMD: -1.84; 95% CI: [-2.65 - (-1.03)], P = .00001). Hyperactive/impulsive symptoms (WMD: -0.01; 95% CI: [-0.32 - 0.29], P = .93) and inattention symptoms (WMD: -0.22; 95% CI: [-0.51 - 0.08], P = .15) were also improved with physical exercise but the results were not statistically significant. This evidence based analysis showed thought problems (WMD: -3.49; 95% CI: [-5.51 - (-1.47)], P = .0007), social problems (WMD: -5.08; 95% CI: [-7.34 - (-2.82)], P = .0001), and aggressive behaviors (WMD: -3.90; 95% CI: [-7.10 - (-0.70)], P = .02) to have significantly been improved in participants with ADHD who were assigned to physical activity group.

Conclusions

This current meta-analysis showed with evidence, that physical exercise has a major contribution owing to significant improvement in anxiety and depression, aggressive behaviors, thought and social problems among children suffering from ADHD. Therefore, physical exercise should be incorporated in the daily life of children with ADHD. Further future research should be able to confirm this hypothesis.",2019-11-01 +29186510,The European Bioinformatics Institute in 2017: data coordination and integration.,"The European Bioinformatics Institute (EMBL-EBI) supports life-science research throughout the world by providing open data, open-source software and analytical tools, and technical infrastructure (https://www.ebi.ac.uk). We accommodate an increasingly diverse range of data types and integrate them, so that biologists in all disciplines can explore life in ever-increasing detail. We maintain over 40 data resources, many of which are run collaboratively with partners in 16 countries (https://www.ebi.ac.uk/services). Submissions continue to increase exponentially: our data storage has doubled in less than two years to 120 petabytes. Recent advances in cellular imaging and single-cell sequencing techniques are generating a vast amount of high-dimensional data, bringing to light new cell types and new perspectives on anatomy. Accordingly, one of our main focus areas is integrating high-quality information from bioimaging, biobanking and other types of molecular data. This is reflected in our deep involvement in Open Targets, stewarding of plant phenotyping standards (MIAPPE) and partnership in the Human Cell Atlas data coordination platform, as well as the 2017 launch of the Omics Discovery Index. This update gives a birds-eye view of EMBL-EBI's approach to data integration and service development as genomics begins to enter the clinic.",2018-01-01 +29309662,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on Pathological Methods and Prognostic Factors in Vestibular Schwannomas.,TARGET POPULATION:Adults diagnosed with vestibular schwannomas. QUESTION 1:What is the prognostic significance of Antoni A vs B histologic patterns in vestibular schwannomas? RECOMMENDATION:No recommendations can be made due to a lack of adequate data. QUESTION 2:What is the prognostic significance of mitotic figures seen in vestibular schwannoma specimens? RECOMMENDATION:No recommendations can be made due to a lack of adequate data. QUESTION 3:Are there other light microscopic features that predict clinical behavior of vestibular schwannomas? RECOMMENDATION:No recommendations can be made due to a lack of adequate data. QUESTION 4:Does the KI-67 labeling index predict clinical behavior of vestibular schwannomas? RECOMMENDATION:No recommendations can be made due to a lack of adequate data. QUESTION 5:Does the proliferating cell nuclear antigen labeling index predict clinical behavior of vestibular schwannomas? RECOMMENDATION:No recommendations can be made due to a lack of adequate data. QUESTION 6:Does degree of vascular endothelial growth factor expression predict clinical behavior of vestibular schwannomas? RECOMMENDATION:No recommendations can be made due to a lack of adequate data.  The full guideline can be found at: https://www.cns.org/guidelines/guidelines-management-patients-vestibular-schwannoma/chapter_6.,2018-02-01 +31409293,Guide for library design and bias correction for large-scale transcriptome studies using highly multiplexed RNAseq methods.,"

Background

Standard RNAseq methods using bulk RNA and recent single-cell RNAseq methods use DNA barcodes to identify samples and cells, and the barcoded cDNAs are pooled into a library pool before high throughput sequencing. In cases of single-cell and low-input RNAseq methods, the library is further amplified by PCR after the pooling. Preparation of hundreds or more samples for a large study often requires multiple library pools. However, sometimes correlation between expression profiles among the libraries is low and batch effect biases make integration of data between library pools difficult.

Results

We investigated 166 technical replicates in 14 RNAseq libraries made using the STRT method. The patterns of the library biases differed by genes, and uneven library yields were associated with library biases. The former bias was corrected using the NBGLM-LBC algorithm, which we present in the current study. The latter bias could not be corrected directly, but could be solved by omitting libraries with particularly low yields. A simulation experiment suggested that the library bias correction using NBGLM-LBC requires a consistent sample layout. The NBGLM-LBC correction method was applied to an expression profile for a cohort study of childhood acute respiratory illness, and the library biases were resolved.

Conclusions

The R source code for the library bias correction named NBGLM-LBC is available at https://shka.github.io/NBGLM-LBC and https://shka.bitbucket.io/NBGLM-LBC . This method is applicable to correct the library biases in various studies that use highly multiplexed sequencing-based profiling methods with a consistent sample layout with samples to be compared (e.g., ""cases"" and ""controls"") equally distributed in each library.",2019-08-13 +29214320,"Suicide among agricultural, forestry, and fishery workers.","In their meta-analysis, Klingelschmidt and her associates (1) found that agricultural, forestry, and fishery workers are at 48% higher risk of suicide than the working-age population. Moreover, they found that the excess risk is even greater among Japanese agricultural workers than workers from other high-income countries. There are several concerns regarding this meta-analysis. It appears that the excess risk has been overestimated for these workers. Furthermore, the excess risk in Japan is not different than other high-income countries. First, in a systematic review, a literature search is comprehensive. A search of a single database is unlikely to identify most of relevant studies, and these types of reviews are not therefore considered as systematic reviews (2). In this review, a specialized database (-PsycINFO) or a European database (EMBASE or -Scopus) was not searched. Second, following the PRISMA guidelines, the critical appraisal of included studies (quality assessment) is a requirement for a systematic review. In a meta-analysis of observational studies, selection bias and confounding should be ruled out. Third, the reviewers did not correctly extract confidence intervals (CI) for the estimates of several studies such as Hassler 2004, Fleming 1999, and Fragar 2011. Moreover, some studies reported both the least- and maximally adjusted risk estimates. The reviewers, however, extracted age- or the least-adjusted risk estimate. A confounder-adjusted estimate is a more appropriate estimate of the true association. In some studies [eg, Kposowa (3) Agerbo (4)], the excess risk dropped by 52-71% after adjustment for confounders. As a sensitivity analysis, the reviewers could limit their meta-analysis to a subgroup of studies controlled for confounders. Fourth, the reviewers did not estimate an overall risk estimate for each study. They included the estimates of 2-6 subgroups for 22 studies in forest and funnel plots. A fixed-effect meta-analysis is a more appropriate model to combine the subgroups of a single study. Moreover, for the assessment of publication bias, it is not appropriate to include several subsamples of a single study in a funnel plot. Using estimates of subgroups can change a large study into several smaller studies. Fifth, some of the included studies compared agricultural, forestry, and fishery workers with a specific occupational group. The reviewers could calculate a risk estimate using all other occupational groups as a comparison group and exclude those studies that did not provide sufficient data for estimating such a risk estimate. In some studies, the excess risk for agricultural, forestry, and fishery workers disappears after comparing with other occupational groups [eg, adjusted risk ratios (RR) for Kposowa (3) = 1.02, 95% CI 0.41-2.54]. This is a main reason for observed higher excess risk in Japanese workers. Wada et al (5) compared Japanese agricultural workers with sales workers and Suzuki et al (6) compared Japanese agricultural, forestry, and fishery workers with production process and related workers. Using all other occupational groups as a reference group, age-adjusted RR dropped from 3.53 (95% CI 2.84-4.38) to 2.61 (95% CI 2.10-3.25) for Wada et al (5) and from 3.24 (CI 2.95-3.57, both sexes combined) to 1.31 (CI 1.27-1.35 age-adjusted OR after excluding unemployed people) for Suzuki et al (6). The pooled estimate of these two register-based studies was 1.33 (95% CI 1.29-1.37) using a fixed model and 1.83 (95% CI 0.93-3.60) using a random model. Sixth, most of the included studies used register data, which had little information on the background characteristics of the participants. A majority of these studies controlled the estimates for age and sex only. Moreover, in this review, prospective cohort studies did not support the observed association. A meta-analysis of 11 case-control and prospective cohort studies shows no significant excess risk of suicide for agricultural, forestry, and fishery workers (pooled estimate = 1.02, 95% CI 0.71-1.47 for 6 cohort studies and 1.13, 95% CI 0.92-1.39, I2 = 91% for 11 case control and cohort studies, combining maximally adjusted risk estimates and comparing agricultural, forestry, and fishery workers with all other occupational groups where possible). The excess risk found in this review (1) can thus largely be due to confounding. References 1. Klingelschmidt J, Milner A, Khireddine-Medouni I, Witt K, Alexopoulos EC, Toivanen S, LaMontagne AD, Chastang JF, Niedhammer I. Suicide among agricultural, forestry, and fishery workers: a systematic literature review and meta-analysis. Scand J Work Environ Health. 2018;44(1):3-15. https://doi.org/10.5271/sjweh.3682.  2. Puljak L. If there is only one author or only one database was searched, a study should not be called a systematic review. J Clin Epidemiol. 2017;91:4-5. https://doi.org/10.1016/j.jclinepi.2017.08.002.  3. Kposowa AJ. Suicide mortality in the United States: differentials by industrial and occupational groups. Am J Ind Med. 1999;36:645-52. https://doi.org/10.1002/(SICI)1097-0274(199912)36:63.0.CO;2-T. 4. Agerbo E, Gunnell D, Bonde JP, Mortensen PB, Nordentoft M. Suicide and occupation: the impact of socio-economic, demographic and psychiatric differences. Psychol Med. 2007;37:1131-40. https://doi.org/10.1017/S0033291707000487.  5. Wada K, Gilmour S. Inequality in mortality by occupation related to economic crisis from 1980 to 2010 among working-age Japanese males. Sci Rep. 2016;6:22255. https://doi.org/10.1038/srep22255. 6. Suzuki E, Kashima S, Kawachi I, Subramanian SV. Social and geographical inequalities in suicide in Japan from 1975 through 2005: a census-based longitudinal analysis. PLoS One. 2013;8:e63443. https://doi.org/10.1371/journal.pone.0063443.",2017-12-07 +23180799,PGDD: a database of gene and genome duplication in plants.,"Genome duplication (GD) has permanently shaped the architecture and function of many higher eukaryotic genomes. The angiosperms (flowering plants) are outstanding models in which to elucidate consequences of GD for higher eukaryotes, owing to their propensity for chromosomal duplication or even triplication in a few cases. Duplicated genome structures often require both intra- and inter-genome alignments to unravel their evolutionary history, also providing the means to deduce both obvious and otherwise-cryptic orthology, paralogy and other relationships among genes. The burgeoning sets of angiosperm genome sequences provide the foundation for a host of investigations into the functional and evolutionary consequences of gene and GD. To provide genome alignments from a single resource based on uniform standards that have been validated by empirical studies, we built the Plant Genome Duplication Database (PGDD; freely available at http://chibba.agtec.uga.edu/duplication/), a web service providing synteny information in terms of colinearity between chromosomes. At present, PGDD contains data for 26 plants including bryophytes and chlorophyta, as well as angiosperms with draft genome sequences. In addition to the inclusion of new genomes as they become available, we are preparing new functions to enhance PGDD.",2012-11-24 +27799471,Genenames.org: the HGNC and VGNC resources in 2017.,"The HUGO Gene Nomenclature Committee (HGNC) based at the European Bioinformatics Institute (EMBL-EBI) assigns unique symbols and names to human genes. Currently the HGNC database contains almost 40 000 approved gene symbols, over 19 000 of which represent protein-coding genes. In addition to naming genomic loci we manually curate genes into family sets based on shared characteristics such as homology, function or phenotype. We have recently updated our gene family resources and introduced new improved visualizations which can be seen alongside our gene symbol reports on our primary website http://www.genenames.org In 2016 we expanded our remit and formed the Vertebrate Gene Nomenclature Committee (VGNC) which is responsible for assigning names to vertebrate species lacking a dedicated nomenclature group. Using the chimpanzee genome as a pilot project we have approved symbols and names for over 14 500 protein-coding genes in chimpanzee, and have developed a new website http://vertebrate.genenames.org to distribute these data. Here, we review our online data and resources, focusing particularly on the improvements and new developments made during the last two years.",2016-10-30 +30759982,DeepDDG: Predicting the Stability Change of Protein Point Mutations Using Neural Networks.,"Accurately predicting changes in protein stability due to mutations is important for protein engineering and for understanding the functional consequences of missense mutations in proteins. We have developed DeepDDG, a neural network-based method, for use in the prediction of changes in the stability of proteins due to point mutations. The neural network was trained on more than 5700 manually curated experimental data points and was able to obtain a Pearson correlation coefficient of 0.48-0.56 for three independent test sets, which outperformed 11 other methods. Detailed analysis of the input features shows that the solvent accessible surface area of the mutated residue is the most important feature, which suggests that the buried hydrophobic area is the major determinant of protein stability. We expect this method to be useful for large-scale design and engineering of protein stability. The neural network is freely available to academic users at http://protein.org.cn/ddg.html .",2019-02-25 +31176362,Facilitating validation of prediction models: a comparison of manual and semi-automated validation using registry-based data of breast cancer patients in the Netherlands.,"

Background

Clinical prediction models are not routinely validated. To facilitate validation procedures, the online Evidencio platform ( https://www.evidencio.com ) has developed a tool partly automating this process. This study aims to determine whether semi-automated validation can reliably substitute manual validation.

Methods

Four different models used in breast cancer care were selected: CancerMath, INFLUENCE, Predicted Probability of Axillary Metastasis, and PREDICT v.2.0. Data were obtained from the Netherlands Cancer Registry according to the inclusion criteria of the original development population. Calibration (intercepts and slopes) and discrimination (area under the curve (AUC)) were compared between semi-automated and manual validation.

Results

Differences between intercepts and slopes of all models using semi-automated validation ranged from 0 to 0.03 from manual validation, which was not clinically relevant. AUCs were identical for both validation methods.

Conclusions

This easy to use semi-automated validation option is a good substitute for manual validation and might increase the number of validations of prediction models used in clinical practice. In addition, the validation tool was considered to be user-friendly and to save a lot of time compared to manual validation. Semi-automated validation will contribute to more accurate outcome predictions and treatment recommendations in the target population.",2019-06-08 +22110030,The genome portal of the Department of Energy Joint Genome Institute.,"The Department of Energy (DOE) Joint Genome Institute (JGI) is a national user facility with massive-scale DNA sequencing and analysis capabilities dedicated to advancing genomics for bioenergy and environmental applications. Beyond generating tens of trillions of DNA bases annually, the Institute develops and maintains data management systems and specialized analytical capabilities to manage and interpret complex genomic data sets, and to enable an expanding community of users around the world to analyze these data in different contexts over the web. The JGI Genome Portal (http://genome.jgi.doe.gov) provides a unified access point to all JGI genomic databases and analytical tools. A user can find all DOE JGI sequencing projects and their status, search for and download assemblies and annotations of sequenced genomes, and interactively explore those genomes and compare them with other sequenced microbes, fungi, plants or metagenomes using specialized systems tailored to each particular class of organisms. We describe here the general organization of the Genome Portal and the most recent addition, MycoCosm (http://jgi.doe.gov/fungi), a new integrated fungal genomics resource.",2011-11-22 +26421146,High-coverage sequencing and annotated assembly of the genome of the Australian dragon lizard Pogona vitticeps.,"

Background

The lizards of the family Agamidae are one of the most prominent elements of the Australian reptile fauna. Here, we present a genomic resource built on the basis of a wild-caught male ZZ central bearded dragon Pogona vitticeps.

Findings

The genomic sequence for P. vitticeps, generated on the Illumina HiSeq 2000 platform, comprised 317 Gbp (179X raw read depth) from 13 insert libraries ranging from 250 bp to 40 kbp. After filtering for low-quality and duplicated reads, 146 Gbp of data (83X) was available for assembly. Exceptionally high levels of heterozygosity (0.85 % of single nucleotide polymorphisms plus sequence insertions or deletions) complicated assembly; nevertheless, 96.4 % of reads mapped back to the assembled scaffolds, indicating that the assembly included most of the sequenced genome. Length of the assembly was 1.8 Gbp in 545,310 scaffolds (69,852 longer than 300 bp), the longest being 14.68 Mbp. N50 was 2.29 Mbp. Genes were annotated on the basis of de novo prediction, similarity to the green anole Anolis carolinensis, Gallus gallus and Homo sapiens proteins, and P. vitticeps transcriptome sequence assemblies, to yield 19,406 protein-coding genes in the assembly, 63 % of which had intact open reading frames. Our assembly captured 99 % (246 of 248) of core CEGMA genes, with 93 % (231) being complete.

Conclusions

The quality of the P. vitticeps assembly is comparable or superior to that of other published squamate genomes, and the annotated P. vitticeps genome can be accessed through a genome browser available at https://genomics.canberra.edu.au.",2015-09-28 +26650466,Text Mining for Protein Docking.,"The rapidly growing amount of publicly available information from biomedical research is readily accessible on the Internet, providing a powerful resource for predictive biomolecular modeling. The accumulated data on experimentally determined structures transformed structure prediction of proteins and protein complexes. Instead of exploring the enormous search space, predictive tools can simply proceed to the solution based on similarity to the existing, previously determined structures. A similar major paradigm shift is emerging due to the rapidly expanding amount of information, other than experimentally determined structures, which still can be used as constraints in biomolecular structure prediction. Automated text mining has been widely used in recreating protein interaction networks, as well as in detecting small ligand binding sites on protein structures. Combining and expanding these two well-developed areas of research, we applied the text mining to structural modeling of protein-protein complexes (protein docking). Protein docking can be significantly improved when constraints on the docking mode are available. We developed a procedure that retrieves published abstracts on a specific protein-protein interaction and extracts information relevant to docking. The procedure was assessed on protein complexes from Dockground (http://dockground.compbio.ku.edu). The results show that correct information on binding residues can be extracted for about half of the complexes. The amount of irrelevant information was reduced by conceptual analysis of a subset of the retrieved abstracts, based on the bag-of-words (features) approach. Support Vector Machine models were trained and validated on the subset. The remaining abstracts were filtered by the best-performing models, which decreased the irrelevant information for ~ 25% complexes in the dataset. The extracted constraints were incorporated in the docking protocol and tested on the Dockground unbound benchmark set, significantly increasing the docking success rate.",2015-12-09 +26411868,MDD-SOH: exploiting maximal dependence decomposition to identify S-sulfenylation sites with substrate motifs.,"

Unlabelled

S-sulfenylation (S-sulphenylation, or sulfenic acid), the covalent attachment of S-hydroxyl (-SOH) to cysteine thiol, plays a significant role in redox regulation of protein functions. Although sulfenic acid is transient and labile, most of its physiological activities occur under control of S-hydroxylation. Therefore, discriminating the substrate site of S-sulfenylated proteins is an essential task in computational biology for the furtherance of protein structures and functions. Research into S-sulfenylated protein is currently very limited, and no dedicated tools are available for the computational identification of SOH sites. Given a total of 1096 experimentally verified S-sulfenylated proteins from humans, this study carries out a bioinformatics investigation on SOH sites based on amino acid composition and solvent-accessible surface area. A TwoSampleLogo indicates that the positively and negatively charged amino acids flanking the SOH sites may impact the formulation of S-sulfenylation in closed three-dimensional environments. In addition, the substrate motifs of SOH sites are studied using the maximal dependence decomposition (MDD). Based on the concept of binary classification between SOH and non-SOH sites, Support vector machine (SVM) is applied to learn the predictive model from MDD-identified substrate motifs. According to the evaluation results of 5-fold cross-validation, the integrated SVM model learned from substrate motifs yields an average accuracy of 0.87, significantly improving the prediction of SOH sites. Furthermore, the integrated SVM model also effectively improves the predictive performance in an independent testing set. Finally, the integrated SVM model is applied to implement an effective web resource, named MDD-SOH, to identify SOH sites with their corresponding substrate motifs.

Availability and implementation

The MDD-SOH is now freely available to all interested users at http://csb.cse.yzu.edu.tw/MDDSOH/. All of the data set used in this work is also available for download in the website.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

francis@saturn.yzu.edu.tw.",2015-09-26 +27105848,miRNet - dissecting miRNA-target interactions and functional associations through network-based visual analysis.,"MicroRNAs (miRNAs) can regulate nearly all biological processes and their dysregulation is implicated in various complex diseases and pathological conditions. Recent years have seen a growing number of functional studies of miRNAs using high-throughput experimental technologies, which have produced a large amount of high-quality data regarding miRNA target genes and their interactions with small molecules, long non-coding RNAs, epigenetic modifiers, disease associations, etc These rich sets of information have enabled the creation of comprehensive networks linking miRNAs with various biologically important entities to shed light on their collective functions and regulatory mechanisms. Here, we introduce miRNet, an easy-to-use web-based tool that offers statistical, visual and network-based approaches to help researchers understand miRNAs functions and regulatory mechanisms. The key features of miRNet include: (i) a comprehensive knowledge base integrating high-quality miRNA-target interaction data from 11 databases; (ii) support for differential expression analysis of data from microarray, RNA-seq and quantitative PCR; (iii) implementation of a flexible interface for data filtering, refinement and customization during network creation; (iv) a powerful fully featured network visualization system coupled with enrichment analysis. miRNet offers a comprehensive tool suite to enable statistical analysis and functional interpretation of various data generated from current miRNA studies. miRNet is freely available at http://www.mirnet.ca.",2016-04-21 +27539197,A long journey to short abbreviations: developing an open-source framework for clinical abbreviation recognition and disambiguation (CARD).,"

Objective

The goal of this study was to develop a practical framework for recognizing and disambiguating clinical abbreviations, thereby improving current clinical natural language processing (NLP) systems' capability to handle abbreviations in clinical narratives.

Methods

We developed an open-source framework for clinical abbreviation recognition and disambiguation (CARD) that leverages our previously developed methods, including: (1) machine learning based approaches to recognize abbreviations from a clinical corpus, (2) clustering-based semiautomated methods to generate possible senses of abbreviations, and (3) profile-based word sense disambiguation methods for clinical abbreviations. We applied CARD to clinical corpora from Vanderbilt University Medical Center (VUMC) and generated 2 comprehensive sense inventories for abbreviations in discharge summaries and clinic visit notes. Furthermore, we developed a wrapper that integrates CARD with MetaMap, a widely used general clinical NLP system.

Results and conclusion

CARD detected 27 317 and 107 303 distinct abbreviations from discharge summaries and clinic visit notes, respectively. Two sense inventories were constructed for the 1000 most frequent abbreviations in these 2 corpora. Using the sense inventories created from discharge summaries, CARD achieved an F1 score of 0.755 for identifying and disambiguating all abbreviations in a corpus from the VUMC discharge summaries, which is superior to MetaMap and Apache's clinical Text Analysis Knowledge Extraction System (cTAKES). Using additional external corpora, we also demonstrated that the MetaMap-CARD wrapper improved MetaMap's performance in recognizing disorder entities in clinical notes. The CARD framework, 2 sense inventories, and the wrapper for MetaMap are publicly available at https://sbmi.uth.edu/ccb/resources/abbreviation.htm . We believe the CARD framework can be a valuable resource for improving abbreviation identification in clinical NLP systems.",2017-04-01 +22067443,PINA v2.0: mining interactome modules.,"The Protein Interaction Network Analysis (PINA) platform is a comprehensive web resource, which includes a database of unified protein-protein interaction data integrated from six manually curated public databases, and a set of built-in tools for network construction, filtering, analysis and visualization. The second version of PINA enhances its utility for studies of protein interactions at a network level, by including multiple collections of interaction modules identified by different clustering approaches from the whole network of protein interactions ('interactome') for six model organisms. All identified modules are fully annotated by enriched Gene Ontology terms, KEGG pathways, Pfam domains and the chemical and genetic perturbations collection from MSigDB. Moreover, a new tool is provided for module enrichment analysis in addition to simple query function. The interactome data are also available on the web site for further bioinformatics analysis. PINA is freely accessible at http://cbg.garvan.unsw.edu.au/pina/.",2011-11-08 +29267884,GlycoDomainViewer: a bioinformatics tool for contextual exploration of glycoproteomes.,"The GlycoDomainViewer is a bioinformatic tool to aid in the mining of glycoproteomic datasets from different sources and facilitate incorporation of glycosylation into studies of protein structure and function. We present a version 2.0 of GlycoDomainViewer incorporating a number of advanced features, which enhances visibility and accessibility of the wealth of glycoproteomic data being generated. The GlycoDomainViewer enables visual exploration of glycoproteomic data, incorporating information from recent N- and O-glycoproteome studies on human and animal cell lines and some organs and body fluids. The initial data comprises sites of glycosylation for N-linked, O-GalNAc, O-Fucose, O-Xyl, O-Mannose (in both human and yeast) and cytosolic O-GlcNAc type. The data made available via this tool will be regularly updated to improve the coverage of known glycosylation sites and datasets, reflecting the advances currently being made in characterization of glycoproteomes. The tool is available at https://glycodomain.glycomics.ku.dk.",2018-03-01 +30578748,"Eco-epidemiology of the Venezuelan equine encephalitis virus in bats of Córdoba and Sucre, Colombia.","Alphavirus infection associated encephalitis is an emerging infectious disease with a high impact on public health in Latin America.

Objective

To study the eco-epidemiology of alphaviruses in bats of departments of Córdoba and Sucre, Colombia.

Methodology

A prospective descriptive cross-sectional study with a non-probabilistic sampling, in 12 localities of Córdoba and Sucre was carried out. Using mist nets capture of the specimens was carried out. The size of the sample was 286 bats, each specimen captured was taxonomically classified. The bats were immobilized with anesthetic and analgesic treatment according to the ethics committee of the University of Córdoba, morphometric measurements and blood samples were taken, later they were necropsied in the field to obtain a collection of tissues which were preserved in liquid N2 -190 °C. The averages of the climatic conditions of the sampling sites were extracted from the WorldClim database (http://www.worldclim.org/). The open source software QGIS (Quantum GIS Development Team.2015) was used to map and visualize bioclimatic regions of Córdoba. We used descriptive and retrospective information about the equine population and reports of foci of equine encephalitis.

Results

In Córdoba and Sucre, 286 bats were captured and 23 species were classified, Artibeus and Phyllostomus discolor were the most frequent captured genus. The geographic ranges of the captured species were variable, some had a wide distribution and others were restricted to some areas. Venezuelan equine encephalitis virus RNA was detected in Artibeus planirostris and Sturnira lilium (2/286 = 0.70%) from Cordoba - Colombia. The univariate descriptive analysis showed no significant association for any of the analyzed variables climatic.

Conclusions

Frugivorous bats from the Caribbean area of Colombia may be involved in the Venezuelan equine encephalitis virus enzootic cycle.",2018-12-19 +30392403,"Estimates of the Global Burden of Ambient [Formula: see text], Ozone, and [Formula: see text] on Asthma Incidence and Emergency Room Visits.","

Background

Asthma is the most prevalent chronic respiratory disease worldwide, affecting 358 million people in 2015. Ambient air pollution exacerbates asthma among populations around the world and may also contribute to new-onset asthma.

Objectives

We aimed to estimate the number of asthma emergency room visits and new onset asthma cases globally attributable to fine particulate matter ([Formula: see text]), ozone, and nitrogen dioxide ([Formula: see text]) concentrations.

Methods

We used epidemiological health impact functions combined with data describing population, baseline asthma incidence and prevalence, and pollutant concentrations. We constructed a new dataset of national and regional emergency room visit rates among people with asthma using published survey data.

Results

We estimated that 9–23 million and 5–10 million annual asthma emergency room visits globally in 2015 could be attributable to ozone and [Formula: see text], respectively, representing 8–20% and 4–9% of the annual number of global visits, respectively. The range reflects the application of central risk estimates from different epidemiological meta-analyses. Anthropogenic emissions were responsible for [Formula: see text] and 73% of ozone and [Formula: see text] impacts, respectively. Remaining impacts were attributable to naturally occurring ozone precursor emissions (e.g., from vegetation, lightning) and [Formula: see text] (e.g., dust, sea salt), though several of these sources are also influenced by humans. The largest impacts were estimated in China and India.

Conclusions

These findings estimate the magnitude of the global asthma burden that could be avoided by reducing ambient air pollution. We also identified key uncertainties and data limitations to be addressed to enable refined estimation. https://doi.org/10.1289/EHP3766.",2018-10-01 +21712250,Integration and visualization of host-pathogen data related to infectious diseases.,"

Motivation

Infectious disease research is generating an increasing amount of disparate data on pathogenic systems. There is a growing need for resources that effectively integrate, analyze, deliver and visualize these data, both to improve our understanding of infectious diseases and to facilitate the development of strategies for disease control and prevention.

Results

We have developed Disease View, an online host-pathogen resource that enables infectious disease-centric access, analysis and visualization of host-pathogen interactions. In this resource, we associate infectious diseases with corresponding pathogens, provide information on pathogens, pathogen virulence genes and the genetic and chemical evidences for the human genes that are associated with the diseases. We also deliver the relationships between pathogens, genes and diseases in an interactive graph and provide the geolocation reports of associated diseases around the globe in real time. Unlike many other resources, we have applied an iterative, user-centered design process to the entire resource development, including data acquisition, analysis and visualization.

Availability and implementation

Freely available at http://www.patricbrc.org; all major web browsers supported.

Contact

cmao@vbi.vt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-27 +26981387,Mapping transcriptome profiles of in vitro iPSC-derived cardiac differentiation to in utero heart development.,"The dataset includes microarray data (Affymetrix Mouse Genome 430 2.0 Array) from WT and Nos3(-/-) mouse embryonic heart ventricular tissues at 14.5 days post coitum (E14.5), induced pluripotent stem cells (iPSCs) derived from WT and Nos3(-/-) mouse tail tip fibroblasts, iPSC-differentiated cardiomyocytes at Day 11, and mouse embryonic stem cells (mESCs) and differentiated cardiomyocytes as positive controls for mouse iPSC differentiation. Both in utero (using embryonic heart tissues) and in vitro (using iPSCs and differentiated cells) microarray datasets were deposited to the NCBI Gene Expression Omnibus (GEO) database. The deposited data in GEO include raw microarray data, metadata for sample source information, experimental design, sample and data processing, and gene expression matrix. The data are available under GEO Access Number GSE69317 (GSE69315 for tissue sample microarray data, GSE69316 for iPSCs microarray data, http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc= GSE69317).",2015-12-30 +30841845,Positive-unlabelled learning of glycosylation sites in the human proteome.,"

Background

As an important type of post-translational modification (PTM), protein glycosylation plays a crucial role in protein stability and protein function. The abundance and ubiquity of protein glycosylation across three domains of life involving Eukarya, Bacteria and Archaea demonstrate its roles in regulating a variety of signalling and metabolic pathways. Mutations on and in the proximity of glycosylation sites are highly associated with human diseases. Accordingly, accurate prediction of glycosylation can complement laboratory-based methods and greatly benefit experimental efforts for characterization and understanding of functional roles of glycosylation. For this purpose, a number of supervised-learning approaches have been proposed to identify glycosylation sites, demonstrating a promising predictive performance. To train a conventional supervised-learning model, both reliable positive and negative samples are required. However, in practice, a large portion of negative samples (i.e. non-glycosylation sites) are mislabelled due to the limitation of current experimental technologies. Moreover, supervised algorithms often fail to take advantage of large volumes of unlabelled data, which can aid in model learning in conjunction with positive samples (i.e. experimentally verified glycosylation sites).

Results

In this study, we propose a positive unlabelled (PU) learning-based method, PA2DE (V2.0), based on the AlphaMax algorithm for protein glycosylation site prediction. The predictive performance of this proposed method was evaluated by a range of glycosylation data collected over a ten-year period based on an interval of three years. Experiments using both benchmarking and independent tests show that our method outperformed the representative supervised-learning algorithms (including support vector machines and random forests) and one-class learners, as well as currently available prediction methods in terms of F1 score, accuracy and AUC measures. In addition, we developed an online web server as an implementation of the optimized model (available at http://glycomine.erc.monash.edu/Lab/GlycoMine_PU/ ) to facilitate community-wide efforts for accurate prediction of protein glycosylation sites.

Conclusion

The proposed PU learning approach achieved a competitive predictive performance compared with currently available methods. This PU learning schema may also be effectively employed and applied to address the prediction problems of other important types of protein PTM site and functional sites.",2019-03-06 +30619195,Anti-flavi: A Web Platform to Predict Inhibitors of Flaviviruses Using QSAR and Peptidomimetic Approaches.,"Flaviviruses are arboviruses, which comprises more than 70 viruses, covering broad geographic ranges, and responsible for significant mortality and morbidity globally. Due to the lack of efficient inhibitors targeting flaviviruses, the designing of novel and efficient anti-flavi agents is an important problem. Therefore, in the current study, we have developed a dedicated prediction algorithm anti-flavi, to identify inhibition ability of chemicals and peptides against flaviviruses through quantitative structure-activity relationship based method. We extracted the non-redundant 2168 chemicals and 117 peptides from ChEMBL and AVPpred databases, respectively, with reported IC50 values. The regression based model developed on training/testing datasets of 1952 chemicals and 105 peptides displayed the Pearson's correlation coefficient (PCC) of 0.87, 0.84, and 0.87, 0.83 using support vector machine and random forest techniques correspondingly. We also explored the peptidomimetics approach, in which the most contributing descriptors of peptides were used to identify chemicals having anti-flavi potential. Conversely, the selected descriptors of chemicals performed well to predict anti-flavi peptides. Moreover, the developed model proved to be highly robust while checked through various approaches like independent validation and decoy datasets. We hope that our web server would prove a useful tool to predict and design the efficient anti-flavi agents. The anti-flavi webserver is freely available at URL http://bioinfo.imtech.res.in/manojk/antiflavi.",2018-12-18 +23061807,Development of a resource modelling tool to support decision makers in pandemic influenza preparedness: The AsiaFluCap Simulator.,"

Background

Health care planning for pandemic influenza is a challenging task which requires predictive models by which the impact of different response strategies can be evaluated. However, current preparedness plans and simulations exercises, as well as freely available simulation models previously made for policy makers, do not explicitly address the availability of health care resources or determine the impact of shortages on public health. Nevertheless, the feasibility of health systems to implement response measures or interventions described in plans and trained in exercises depends on the available resource capacity. As part of the AsiaFluCap project, we developed a comprehensive and flexible resource modelling tool to support public health officials in understanding and preparing for surges in resource demand during future pandemics.

Results

The AsiaFluCap Simulator is a combination of a resource model containing 28 health care resources and an epidemiological model. The tool was built in MS Excel© and contains a user-friendly interface which allows users to select mild or severe pandemic scenarios, change resource parameters and run simulations for one or multiple regions. Besides epidemiological estimations, the simulator provides indications on resource gaps or surpluses, and the impact of shortages on public health for each selected region. It allows for a comparative analysis of the effects of resource availability and consequences of different strategies of resource use, which can provide guidance on resource prioritising and/or mobilisation. Simulation results are displayed in various tables and graphs, and can also be easily exported to GIS software to create maps for geographical analysis of the distribution of resources.

Conclusions

The AsiaFluCap Simulator is freely available software (http://www.cdprg.org) which can be used by policy makers, policy advisors, donors and other stakeholders involved in preparedness for providing evidence based and illustrative information on health care resource capacities during future pandemics. The tool can inform both preparedness plans and simulation exercises and can help increase the general understanding of dynamics in resource capacities during a pandemic. The combination of a mathematical model with multiple resources and the linkage to GIS for creating maps makes the tool unique compared to other available software.",2012-10-12 +30081193,Modeling topographic regularity in structural brain connectivity with application to tractogram filtering.,"Topographic regularity is an important biological principle in brain connections that has been observed in various anatomical studies. However, there has been limited research on mathematically characterizing this property and applying it in the analysis of in vivo connectome imaging data. In this work, we propose a general mathematical model of topographic regularity for white matter fiber bundles based on previous neuroanatomical understanding. Our model is based on a novel group spectral graph analysis (GSGA) framework motivated by spectral graph theory and tensor decomposition. The GSGA provides a common set of eigenvectors for the graphs formed by topographic proximity of nearby tracts, which gives rises to the group graph spectral distance, or G2SD, for measuring the topographic regularity of each fiber tract in a tractogram. Based on this novel model of topographic regularity in fiber tracts, we then develop a tract filtering algorithm that can generally be applied to remove outliers in tractograms generated by any tractography algorithm. In the experimental results, we show that our novel algorithm outperforms existing methods in both simulation data from ISMRM 2015 Tractography Challenge and real data from the Human Connectome Project (HCP). On a large-scale dataset from 215 HCP subjects, we quantitatively show our method can significantly improve the retinotopy in the reconstruction of the optic radiation bundle. The software for the tract filtering algorithm developed in this work has also been publicly released on NITRC (https://www.nitrc.org/projects/connectopytool).",2018-08-04 +29900005,Mapping biological process relationships and disease perturbations within a pathway network.,"Molecular interaction networks are routinely used to map the organization of cellular function. Edges represent interactions between genes, proteins, or metabolites. However, in living cells, molecular interactions are dynamic, necessitating context-dependent models. Contextual information can be integrated into molecular interaction networks through the inclusion of additional molecular data, but there are concerns about completeness and relevance of this data. We developed an approach for representing the organization of human cellular processes using pathways as the nodes in a network. Pathways represent spatial and temporal sets of context-dependent interactions, generating a high-level network when linked together, which incorporates contextual information without the need for molecular interaction data. Analysis of the pathway network revealed linked communities representing functional relationships, comparable to those found in molecular networks, including metabolism, signaling, immunity, and the cell cycle. We mapped a range of diseases onto this network and find that pathways associated with diseases tend to be functionally connected, highlighting the perturbed functions that result in disease phenotypes. We demonstrated that disease pathways cluster within the network. We then examined the distribution of cancer pathways and showed that cancer pathways tend to localize within the signaling, DNA processes and immune modules, although some cancer-associated nodes are found in other network regions. Altogether, we generated a high-confidence functional network, which avoids some of the shortcomings faced by conventional molecular models. Our representation provides an intuitive functional interpretation of cellular organization, which relies only on high-quality pathway and Gene Ontology data. The network is available at https://data.mendeley.com/datasets/3pbwkxjxg9/1.",2018-06-11 +29344883,"Discovering Altered Regulation and Signaling Through Network-based Integration of Transcriptomic, Epigenomic, and Proteomic Tumor Data.","With the extraordinary rise in available biological data, biologists and clinicians need unbiased tools for data integration in order to reach accurate, succinct conclusions. Network biology provides one such method for high-throughput data integration, but comes with its own set of algorithmic problems and needed expertise. We provide a step-by-step guide for using Omics Integrator, a software package designed for the integration of transcriptomic, epigenomic, and proteomic data. Omics Integrator can be found at http://fraenkel.mit.edu/omicsintegrator .",2018-01-01 +29635310,3DClusterViSu: 3D clustering analysis of super-resolution microscopy data by 3D Voronoi tessellations.,"Motivation:Single-molecule localization microscopy (SMLM) can play an important role in integrated structural biology approaches to identify, localize and determine the 3D structure of cellular structures. While many tools exist for the 3D analysis and visualization of crystal or cryo-EM structures little exists for 3D SMLM data, which can provide unique insights but are particularly challenging to analyze in three dimensions especially in a dense cellular context. Results:We developed 3DClusterViSu, a method based on 3D Voronoi tessellations that allows local density estimation, segmentation and quantification of 3D SMLM data and visualization of protein clusters within a 3D tool. We show its robust performance on microtubules and histone proteins H2B and CENP-A with distinct spatial distributions. 3DClusterViSu will favor multi-scale and multi-resolution synergies to allow integrating molecular and cellular levels in the analysis of macromolecular complexes. Availability and impementation:3DClusterViSu is available under http://cbi-dev.igbmc.fr/cbi/voronoi3D. Supplementary information:Supplementary figures are available at Bioinformatics online.",2018-09-01 +23550210,Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal.,"The cBioPortal for Cancer Genomics (http://cbioportal.org) provides a Web resource for exploring, visualizing, and analyzing multidimensional cancer genomics data. The portal reduces molecular profiling data from cancer tissues and cell lines into readily understandable genetic, epigenetic, gene expression, and proteomic events. The query interface combined with customized data storage enables researchers to interactively explore genetic alterations across samples, genes, and pathways and, when available in the underlying data, to link these to clinical outcomes. The portal provides graphical summaries of gene-level data from multiple platforms, network visualization and analysis, survival analysis, patient-centric queries, and software programmatic access. The intuitive Web interface of the portal makes complex cancer genomics profiles accessible to researchers and clinicians without requiring bioinformatics expertise, thus facilitating biological discoveries. Here, we provide a practical guide to the analysis and visualization features of the cBioPortal for Cancer Genomics.",2013-04-02 +25055920,miRWalk database for miRNA-target interactions.,"miRWalk (http://mirwalk.uni-hd.de/) is a publicly available comprehensive resource, hosting the predicted as well as the experimentally validated microRNA (miRNA)-target interaction pairs. This database allows obtaining the possible miRNA-binding site predictions within the complete sequence of all known genes of three genomes (human, mouse, and rat). Moreover, it also integrates many novel features such as a comparative platform of miRNA-binding sites resulting from ten different prediction datasets, a holistic view of genetic networks of miRNA-gene pathway, and miRNA-gene-Online Mendelian Inheritance in Man disorder interactions, and unique experimentally validated information (e.g., cell lines, diseases, miRNA processing proteins). In this chapter, we describe a schematic workflow on how one can access the stored information from miRWalk and subsequently summarize its applications.",2014-01-01 +30562930,miRNAmotif-A Tool for the Prediction of Pre-miRNA⁻Protein Interactions. ,"MicroRNAs (miRNAs) are short, non-coding post-transcriptional gene regulators. In mammalian cells, mature miRNAs are produced from primary precursors (pri-miRNAs) using canonical protein machinery, which includes Drosha/DGCR8 and Dicer, or the non-canonical mirtron pathway. In plant cells, mature miRNAs are excised from pri-miRNAs by the DICER-LIKE1 (DCL1) protein complex. The involvement of multiple regulatory proteins that bind directly to distinct miRNA precursors in a sequence- or structure-dependent manner adds to the complexity of the miRNA maturation process. Here, we present a web server that enables searches for miRNA precursors that can be recognized by diverse RNA-binding proteins based on known sequence motifs to facilitate the identification of other proteins involved in miRNA biogenesis. The database used by the web server contains known human, murine, and Arabidopsis thaliana pre-miRNAs. The web server can also be used to predict new RNA-binding protein motifs based on a list of user-provided sequences. We show examples of miRNAmotif applications, presenting precursors that contain motifs recognized by Lin28, MCPIP1, and DGCR8 and predicting motifs within pre-miRNA precursors that are recognized by two DEAD-box helicases-DDX1 and DDX17. miRNAmotif is released as an open-source software under the MIT License. The code is available at GitHub (www.github.com/martynaut/mirnamotif). The webserver is freely available at http://mirnamotif.ibch.poznan.pl.",2018-12-17 +25048133,Customized predictions of peptide-MHC binding and T-cell epitopes using EPIMHC.,"Peptide binding to major histocompatibility complex (MHC) molecules is the most selective requisite for T-cell recognition. Therefore, prediction of peptide-MHC binding is the main basis for anticipating T-cell epitopes. A very popular and accurate method to predict peptide-MHC binding is based on motif-profiles and here we show how to make them using EPIMHC (http://imed.med.ucm.es/epimhc/). EPIMHC is a database of T-cell epitopes and MHC-binding peptides that unlike any related resource provides a framework for computational vaccinology. In this chapter, we describe how to derive peptide-MHC binding motif-profiles in EPIMHC and use them to predict peptide-MHC binding and T-cell epitopes. Moreover, we show evidence that customization of peptide-MHC binding predictors can lead to enhanced epitope predictions.",2014-01-01 +27924041,Exposome-Explorer: a manually-curated database on biomarkers of exposure to dietary and environmental factors.,"Exposome-Explorer (http://exposome-explorer.iarc.fr) is the first database dedicated to biomarkers of exposure to environmental risk factors. It contains detailed information on the nature of biomarkers, their concentrations in various human biospecimens, the study population where measured and the analytical techniques used for measurement. It also contains correlations with external exposure measurements and data on biological reproducibility over time. The data in Exposome-Explorer was manually collected from peer-reviewed publications and organized to make it easily accessible through a web interface for in-depth analyses. The database and the web interface were developed using the Ruby on Rails framework. A total of 480 publications were analyzed and 10 510 concentration values in blood, urine and other biospecimens for 692 dietary and pollutant biomarkers were collected. Over 8000 correlation values between dietary biomarker levels and food intake as well as 536 values of biological reproducibility over time were also compiled. Exposome-Explorer makes it easy to compare the performance between biomarkers and their fields of application. It should be particularly useful for epidemiologists and clinicians wishing to select panels of biomarkers that can be used in biomonitoring studies or in exposome-wide association studies, thereby allowing them to better understand the etiology of chronic diseases.",2016-10-24 +31725861,PCOSBase: a manually curated database of polycystic ovarian syndrome. ,"Polycystic ovarian syndrome (PCOS) is one of the main causes of infertility and affects 5-20% women of reproductive age. Despite the increased prevalence of PCOS, the mechanisms involved in its pathogenesis and pathophysiology remains unclear. The expansion of omics on studying the mechanisms of PCOS has lead into vast amounts of proteins related to PCOS resulting to a challenge in collating and depositing this deluge of data into one place. A knowledge-based repository named as PCOSBase was developed to systematically store all proteins related to PCOS. These proteins were compiled from various online databases and published expression studies. Rigorous criteria were developed to identify those that were highly related to PCOS. They were manually curated and analysed to provide additional information on gene ontologies, pathways, domains, tissue localizations and diseases that associate with PCOS. Other proteins that might interact with PCOS-related proteins identified from this study were also included. Currently, 8185 PCOS-related proteins were identified and assigned to 13 237 gene ontology vocabulary, 1004 pathways, 7936 domains, 29 disease classes, 1928 diseases, 91 tissues and 320 472 interactions. All publications related to PCOS are also indexed in PCOSBase. Data entries are searchable in the main page, search, browse and datasets tabs. Protein advanced search is provided to search for specific proteins. To date, PCOSBase has the largest collection of PCOS-related proteins. PCOSBase aims to become a self-contained database that can be used to further understand the PCOS pathogenesis and towards the identification of potential PCOS biomarkers. Database URL: http://pcosbase.org.",2017-01-01 +24135264,Frequent glycan structure mining of influenza virus data revealed a sulfated glycan motif that increased viral infection.,"

Motivation

It is well known influenza viruses recognize and bind terminal sialic acid (SA) on glycans that are found on the cell surface. In this work, we used a data mining technique to analyze the glycan array data of influenza viruses to find novel glycan structures other than SA that may be involved in viral infection.

Results

In addition to SA structures noted previously, we noted the sulfated structures in the mining results. For verification, we overexpressed the sulfotransferase that is involved in synthesizing these structures, and we performed a viral infection experiment to assess changes in infection in these cells. In our results, we found that there is a 70-fold increase in these cells compared with the control. Thus, we have found a novel pattern in glycan structures that may be involved in viral infection.

Availability and implementation

The Glycan Miner Tool is available from the RINGS resource at http://www.rings.t.soka.ac.jp.",2013-10-17 +23904744,ASRDb: A comprehensive resource for archaeal stress response genes.,"An organism's survival strategy under the constantly changing environment depends on its ability to sense and respond to changes in its environment. Archaea, being capable to grow under various extreme environmental conditions, provide valuable model for exploring how single-celled organisms respond to environmental stresses. However, no such approach has ever been made to make an integrated classification of various archaeal stress responses. Archaeal Stress Response Database (ASRDb) is a web accessible (http://121.241.218.70/ASRDb) database that represents the first online available resource providing a comprehensive overview of stress response genes of 66 archaeal genomes. This database currently contains almost 6000 stress specific genes of 66 archaeal genomes. All the stress specific genes are grouped into 17 different stress categories. A user-friendly interface has been designed to examine data using query tools. This database provides an efficient search engine for random and advanced database search operations. We have incorporated BLAST search options to the resulting sequences retrieved from database search operations. A site map page representing the schematic diagram will enable user to understand the logic behind the construction of the database. We have also provided a very rich and informative help page to make user familiar with the database. We sincerely believe that ASRDb will be of particular interest to the life science community and facilitates the biologists to unravel the role of stress specific genes in the adaptation of microorganisms under various extreme environmental conditions.",2013-07-12 +31579808,Renal Outcomes of Liver Transplantation Recipients Receiving Standard Immunosuppression and Early Renal Sparing Immunosuppression: A Retrospective Single Center Study.,"New-onset stage 4-5 chronic kidney disease (CKD) after liver transplantation (LT) is associated with high morbidity, mortality, and economic burden. In 2010, we instituted an early renal sparing immunosuppression (RSI) protocol for LT recipients with severe renal dysfunction (pre-LT dialysis/estimated glomerular filtration rate (eGFR)<30mL/min/1.73 m2 or post-LT acute kidney injury) consisting of 2 doses of basiliximab for induction and delaying tacrolimus to post-LT day 4-7. We examined the effect of early RSI on post-LT renal outcomes.

Methods

Data on all adults who had LT between January 1, 2010, and December 12, 2014 were collected. We calculated the renal risk index (RRI) score for each LT recipient (https://rri.med.umich.edu). Primary outcome was new-onset post-LT stage 4-5 CKD.

Results

Of 214 LT recipients, 121 (57%) received early RSI and 93 (43%) received standard immunosuppression. Cumulative incidence of new-onset stage 4-5 CKD was higher in early RSI compared with standard immunosuppression (P = 0.03). Female sex and RRI score were the significant risk factors for development of post-LT stage CKD in the entire study cohort as well as the LT recipients with RRI ≥ sixth decile (high-risk group).

Conclusions

Delaying tacrolimus initiation combined with basiliximab induction did not have a durable effect on long-term renal outcomes in high-risk LT recipients. Further studies are needed to identify the effective strategies to preserve renal function by targeting patients at high risk for CKD progression.",2019-08-08 +27131380,The MPI bioinformatics Toolkit as an integrative platform for advanced protein sequence and structure analysis.,"The MPI Bioinformatics Toolkit (http://toolkit.tuebingen.mpg.de) is an open, interactive web service for comprehensive and collaborative protein bioinformatic analysis. It offers a wide array of interconnected, state-of-the-art bioinformatics tools to experts and non-experts alike, developed both externally (e.g. BLAST+, HMMER3, MUSCLE) and internally (e.g. HHpred, HHblits, PCOILS). While a beta version of the Toolkit was released 10 years ago, the current production-level release has been available since 2008 and has serviced more than 1.6 million external user queries. The usage of the Toolkit has continued to increase linearly over the years, reaching more than 400 000 queries in 2015. In fact, through the breadth of its tools and their tight interconnection, the Toolkit has become an excellent platform for experimental scientists as well as a useful resource for teaching bioinformatic inquiry to students in the life sciences. In this article, we report on the evolution of the Toolkit over the last ten years, focusing on the expansion of the tool repertoire (e.g. CS-BLAST, HHblits) and on infrastructural work needed to remain operative in a changing web environment.",2016-04-29 +32159523,"The Role of Historical Context in Understanding Past Climate, Pollution and Health Data in Trans-disciplinary Studies: Reply to Comments on More et al., 2017.","Understanding the context from which evidence emerges is of paramount importance in reaching robust conclusions in scientific inquiries. This is as true of the present as it is of the past. In a trans-disciplinary study such as More et al. (2017, https://doi.org/10.1002/2017GH000064) and many others appearing in this and similar journals, a proper analysis of context demands the use of historical evidence. This includes demographic, epidemiological, and socio-economic data-common in many studies of the impact of anthropogenic pollution on human health-and, as in this specific case, also geoarchaeological evidence. These records anchor climate and pollution data in the geographic and human circumstances of history, without which we lose a fundamental understanding of the data itself. This article addresses Hinkley (2018, https://doi.org/10.1002/2017GH000105) by highlighting the importance of context, focusing on the historical and archaeological evidence, and then discussing atmospheric deposition and circulation in the specific region of our study. Since many of the assertions in Bindler (2018, https://doi.org/10.1002/2018GH000135) are congruent with our findings and directly contradict Hinkley (2018), this reply refers to Bindler (2018), whenever appropriate, and indicates where our evidence diverges.",2018-05-31 +29506019,Parallelization of MAFFT for large-scale multiple sequence alignments.,"Summary:We report an update for the MAFFT multiple sequence alignment program to enable parallel calculation of large numbers of sequences. The G-INS-1 option of MAFFT was recently reported to have higher accuracy than other methods for large data, but this method has been impractical for most large-scale analyses, due to the requirement of large computational resources. We introduce a scalable variant, G-large-INS-1, which has equivalent accuracy to G-INS-1 and is applicable to 50 000 or more sequences. Availability and implementation:This feature is available in MAFFT versions 7.355 or later at https://mafft.cbrc.jp/alignment/software/mpi.html. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +29512175,Microbial Diversity in the Eukaryotic SAR Clade: Illuminating the Darkness Between Morphology and Molecular Data.,"Despite their diversity and ecological importance, many areas of the SAR-Stramenopila, Alveolata, and Rhizaria-clade are poorly understood as the majority (90%) of SAR species lack molecular data and only 5% of species are from well-sampled families. Here, we review and summarize the state of knowledge about the three major clades of SAR, describing the diversity within each clade and identifying synapomorphies when possible. We also assess the ""dark area"" of SAR: the morphologically described species that are missing molecular data. The majority of molecular data for SAR lineages are characterized from marine samples and vertebrate hosts, highlighting the need for additional research effort in areas such as freshwater and terrestrial habitats and ""non-vertebrate"" hosts. We also describe the paucity of data on the biogeography of SAR species, and point to opportunities to illuminate diversity in this major eukaryotic clade. See also the video abstract here: https://youtu.be/_VUXqaX19Rw.",2018-03-07 +30914507,Encephalomyocarditis Virus Entry Unveiled.,"Picornaviruses are a widespread group of pathogens that can cause diverse pathologies. Pathogenesis is thought to be driven by the tissue-specific tropisms displayed by these viruses. For example, many picornaviruses can infect the heart and cause viral myocarditis. Encephalomyocarditis virus (EMCV) is a rodent pathogen that causes myocarditis in rodent models and has been used to model this biology. However, the receptor and entry requirements for this virus are poorly understood. L. E. Bazzone, M. King, C. R. MacKay, P. P. Kyawe, et al. (mBio 10:e02734-18, 2019, https://doi.org/10.1128/mBio.02734-18) tackled this problem using CRISPR knockout screening in human cells that are susceptible to EMCV and identified ADAM9 as an essential entry factor for EMCV in mouse and human cells. Since the extracellular domain but not the enzymatic activity or intracellular domain is required for infection, the data suggest that ADAM9 acts as an entry receptor or at an early step in the process, shedding light on the biology of EMCV infection and pathogenesis.",2019-03-26 +21785142,Ensembl BioMarts: a hub for data retrieval across taxonomic space.,"For a number of years the BioMart data warehousing system has proven to be a valuable resource for scientists seeking a fast and versatile means of accessing the growing volume of genomic data provided by the Ensembl project. The launch of the Ensembl Genomes project in 2009 complemented the Ensembl project by utilizing the same visualization, interactive and programming tools to provide users with a means for accessing genome data from a further five domains: protists, bacteria, metazoa, plants and fungi. The Ensembl and Ensembl Genomes BioMarts provide a point of access to the high-quality gene annotation, variation data, functional and regulatory annotation and evolutionary relationships from genomes spanning the taxonomic space. This article aims to give a comprehensive overview of the Ensembl and Ensembl Genomes BioMarts as well as some useful examples and a description of current data content and future objectives. Database URLs: http://www.ensembl.org/biomart/martview/; http://metazoa.ensembl.org/biomart/martview/; http://plants.ensembl.org/biomart/martview/; http://protists.ensembl.org/biomart/martview/; http://fungi.ensembl.org/biomart/martview/; http://bacteria.ensembl.org/biomart/martview/.",2011-07-23 +26861660,"Value, but high costs in post-deposition data curation. ","Discoverability of sequence data in primary data archives is proportional to the richness of contextual information associated with the data. Here, we describe an exercise in the improvement of contextual information surrounding sample records associated with metagenomics sequence reads available in the European Nucleotide Archive. We outline the annotation process and summarize findings of this effort aimed at increasing usability of publicly available environmental data. Furthermore, we emphasize the benefits of such an exercise and detail its costs. We conclude that such a third party annotation approach is expensive and has value as an element of curation, but should form only part of a more sustainable submitter-driven approach. Database URL: http://www.ebi.ac.uk/ena.",2016-02-09 +30010567,Low Rank Matrix Recovery via Robust Outlier Estimation. ,"In practice, high-dimensional data are typically sampled from low-dimensional subspaces, but with intrusion of outliers and/or noises. Recovering the underlying structure and the pollution from the observations is of utmost importance to understanding the data. Besides properly modeling the subspace structure, how to handle the pollution is a core question regarding the recovery quality, the main origins of which include small dense noises and gross sparse outliers. Compared with the small noises, the outliers more likely ruin the recovery, as their arbitrary magnitudes can dominate the fidelity, and thus lead to misleading/erroneous results. Concerning the above, this paper concentrates on robust outlier estimate for low rank matrix recovery, termed as ROUTE. The principle is to classify each entry as an outlier or an inlier (with confidence). We formulate the outlier screening and the recovery into a unified framework. To seek the optimal solution to the problem, we first introduce a block coordinate descent based optimizer (ROUTE-BCD), then customize an alternating direction method of multipliers based one (ROUTE-ADMM). Through analyzing theoretical properties and practical behaviors, ROUTE-ADMM shows its superiority over ROUTE-BCD in terms of computational complexity, initialization insensitivity and recovery accuracy. Extensive experiments on both synthetic and real data are conducted to show the efficacy of our strategy and reveal its significant improvement over other state-of-the-art alternatives. Our code is publicly available at https://sites.google.com/view/xjguo/route.",2018-07-12 +30329090,CLIPick: a sensitive peak caller for expression-based deconvolution of HITS-CLIP signals.,"High-throughput sequencing of RNAs isolated by crosslinking immunoprecipitation (HITS-CLIP, also called CLIP-Seq) has been used to map global RNA-protein interactions. However, a critical caveat of HITS-CLIP results is that they contain non-linear background noise-different extent of non-specific interactions caused by individual transcript abundance-that has been inconsiderately normalized, resulting in sacrifice of sensitivity. To properly deconvolute RNA-protein interactions, we have implemented CLIPick, a flexible peak calling pipeline for analyzing HITS-CLIP data, which statistically determines the signal-to-noise ratio for each transcript based on the expression-dependent background simulation. Comprising of streamlined Python modules with an easy-to-use standalone graphical user interface, CLIPick robustly identifies significant peaks and quantitatively defines footprint regions within which RNA-protein interactions were occurred. CLIPick outperforms other peak callers in accuracy and sensitivity, selecting the largest number of peaks particularly in lowly expressed transcripts where such marginal signals are hard to discriminate. Specifically, the application of CLIPick to Argonaute (Ago) HITS-CLIP data were sensitive enough to uncover extended features of microRNA target sites, and these sites were experimentally validated. CLIPick enables to resolve critical interactions in a wide spectrum of transcript levels and extends the scope of HITS-CLIP analysis. CLIPick is available at: http://clip.korea.ac.kr/clipick/.",2018-11-01 +26578564,Hymenoptera Genome Database: integrating genome annotations in HymenopteraMine.,"We report an update of the Hymenoptera Genome Database (HGD) (http://HymenopteraGenome.org), a model organism database for insect species of the order Hymenoptera (ants, bees and wasps). HGD maintains genomic data for 9 bee species, 10 ant species and 1 wasp, including the versions of genome and annotation data sets published by the genome sequencing consortiums and those provided by NCBI. A new data-mining warehouse, HymenopteraMine, based on the InterMine data warehousing system, integrates the genome data with data from external sources and facilitates cross-species analyses based on orthology. New genome browsers and annotation tools based on JBrowse/WebApollo provide easy genome navigation, and viewing of high throughput sequence data sets and can be used for collaborative genome annotation. All of the genomes and annotation data sets are combined into a single BLAST server that allows users to select and combine sequence data sets to search.",2015-11-17 +31435950,ALTIS: A fast and automatic lung and trachea CT-image segmentation method.,"

Purpose

The automated segmentation of each lung and trachea in CT scans is commonly taken as a solved problem. Indeed, existing approaches may easily fail in the presence of some abnormalities caused by a disease, trauma, or previous surgery. For robustness, we present ALTIS (implementation is available at http://lids.ic.unicamp.br/downloads) - a fast automatic lung and trachea CT-image segmentation method that relies on image features and relative shape- and intensity-based characteristics less affected by most appearance variations of abnormal lungs and trachea.

Methods

ALTIS consists of a sequence of image foresting transforms (IFTs) organized in three main steps: (a) lung-and-trachea extraction, (b) seed estimation inside background, trachea, left lung, and right lung, and (c) their delineation such that each object is defined by an optimum-path forest rooted at its internal seeds. We compare ALTIS with two methods based on shape models (SOSM-S and MALF), and one algorithm based on seeded region growing (PTK).

Results

The experiments involve the highest number of scans found in literature - 1255 scans, from multiple public data sets containing many anomalous cases, being only 50 normal scans used for training and 1205 scans used for testing the methods. Quantitative experiments are based on two metrics, DICE and ASSD. Furthermore, we also demonstrate the robustness of ALTIS in seed estimation. Considering the test set, the proposed method achieves an average DICE of 0.987 for both lungs and 0.898 for the trachea, whereas an average ASSD of 0.938 for the right lung, 0.856 for the left lung, and 1.316 for the trachea. These results indicate that ALTIS is statistically more accurate and considerably faster than the compared methods, being able to complete segmentation in a few seconds on modern PCs.

Conclusion

ALTIS is the most effective and efficient choice among the compared methods to segment left lung, right lung, and trachea in anomalous CT scans for subsequent detection, segmentation, and quantitative analysis of abnormal structures in the lung parenchyma and pleural space.",2019-09-11 +25190456,The Candidate Cancer Gene Database: a database of cancer driver genes from forward genetic screens in mice.,"Identification of cancer driver gene mutations is crucial for advancing cancer therapeutics. Due to the overwhelming number of passenger mutations in the human tumor genome, it is difficult to pinpoint causative driver genes. Using transposon mutagenesis in mice many laboratories have conducted forward genetic screens and identified thousands of candidate driver genes that are highly relevant to human cancer. Unfortunately, this information is difficult to access and utilize because it is scattered across multiple publications using different mouse genome builds and strength metrics. To improve access to these findings and facilitate meta-analyses, we developed the Candidate Cancer Gene Database (CCGD, http://ccgd-starrlab.oit.umn.edu/). The CCGD is a manually curated database containing a unified description of all identified candidate driver genes and the genomic location of transposon common insertion sites (CISs) from all currently published transposon-based screens. To demonstrate relevance to human cancer, we performed a modified gene set enrichment analysis using KEGG pathways and show that human cancer pathways are highly enriched in the database. We also used hierarchical clustering to identify pathways enriched in blood cancers compared to solid cancers. The CCGD is a novel resource available to scientists interested in the identification of genetic drivers of cancer.",2014-09-04 +30627609,Dataset from de novo transcriptome assembly of Nephelium lappaceum aril.,"Nephelium lappaceum (Rambutan), is one of tropical fruit in which - cultivated widely in Indonesia and has good taste and aroma. However, the transcriptomic study of rambutan has limited. In this study, we performed transcriptome assembly using paired-end Illumina technology. The assembled transcriptome was constructed using Trinity and after filtering and removal sequences redundancy produced 36,303 contigs. The contig ranged 201-11,770 bp and N50 has 1327 bp. The contig was annotated with several databases such as SwissProt, TrEMBL, and nr/nt of NCBI databases. The raw reads are deposited in the DDBJ with DRA accession number, DRA007359: https://www.ncbi.nlm.nih.gov/sra/?term=DRA007359. The assembled contigs of transcriptome are deposited in the DDBJ TSA repository with accession number IADQ01000001-IADQ01036303: ftp://ftp.ddbj.nig.ac.jp/ddbj_database/tsa/IADQ.gz and also can be accessed at http://rujakbase.id.",2018-12-14 +31652106,Environmentally Relevant Perinatal Exposures to Bisphenol A Disrupt Postnatal Kiss1/NKB Neuronal Maturation and Puberty Onset in Female Mice.,"Background: The timing of puberty is highly sensitive to environmental factors, including endocrine disruptors. Among them, bisphenol A (BPA) has been previously analyzed as potential modifier of puberty. Yet, disparate results have been reported, with BPA advancing, delaying, or being neutral in its effects on puberty onset. Likewise, mechanistic analyses addressing the central and peripheral actions/targets of BPA at puberty remain incomplete and conflictive.

Objective: We aimed to provide a comprehensive characterization of the impact of early BPA exposures, especially at low, real-life doses, on the postnatal development of hypothalamic Kiss1/NKB neurons, and its functional consequences on female pubertal maturation.

Methods: Pregnant CD1 female mice were orally administered BPA at 5, 10, or 40μg/kg body weight (BW)/d from gestational day 11 to postnatal day 8 (PND8). Vaginal opening, as an external marker of puberty onset, was monitored daily from PND19 to PND30 in the female offspring. Blood and brain samples were collected at PND12, 15, 18, 21, and 30 for measuring circulating levels of gonadotropins and analyzing the hypothalamic expression of Kiss1/kisspeptin and NKB.

Results: Perinatal exposure to BPA, in a range of doses largely below the no observed adverse effect level (NOAEL; 5mg/kg BW/d, according to the FDA), was associated with pubertal differences in the female progeny compared with those exposed to vehicle alone, with an earlier age of vaginal opening but consistently lower levels of circulating luteinizing hormone. Mice treated with BPA exhibited a persistent, but divergent, impairment of Kiss1 neuronal maturation, with more kisspeptin cells in the rostral (RP3V) hypothalamus but consistently fewer kisspeptin neurons in the arcuate nucleus (ARC). Detailed quantitative analysis of the ARC population, essential for pubertal development, revealed that mice treated with BPA had persistently lower Kiss1 expression during (pre)pubertal maturation, which was associated with lower Tac2 (encoding NKB) levels, even at low doses (5μg/kg BW/d), in the range of the tolerable daily intake (TDI), recently updated by the European Food Safety Authority.

Conclusions: Our data attest to the consistent, but divergent, effects of gestational exposures to low concentrations of BPA, via the oral route, on phenotypic and neuroendocrine markers of puberty in female mice, with an unambiguous impact on the developmental maturation not only of Kiss1, but also of the NKB system, both essential regulators of puberty onset. https://doi.org/10.1289/EHP5570.",2019-10-25 +27587678,Pathway-based approach using hierarchical components of collapsed rare variants.,"

Motivation

To address 'missing heritability' issue, many statistical methods for pathway-based analyses using rare variants have been proposed to analyze pathways individually. However, neglecting correlations between multiple pathways can result in misleading solutions, and pathway-based analyses of large-scale genetic datasets require massive computational burden. We propose a Pathway-based approach using HierArchical components of collapsed RAre variants Of High-throughput sequencing data (PHARAOH) for the analysis of rare variants by constructing a single hierarchical model that consists of collapsed gene-level summaries and pathways and analyzes entire pathways simultaneously by imposing ridge-type penalties on both gene and pathway coefficient estimates; hence our method considers the correlation of pathways without constraint by a multiple testing problem.

Results

Through simulation studies, the proposed method was shown to have higher statistical power than the existing pathway-based methods. In addition, our method was applied to the large-scale whole-exome sequencing data with levels of a liver enzyme using two well-known pathway databases Biocarta and KEGG. This application demonstrated that our method not only identified associated pathways but also successfully detected biologically plausible pathways for a phenotype of interest. These findings were successfully replicated by an independent large-scale exome chip study.

Availability and implementation

An implementation of PHARAOH is available at http://statgen.snu.ac.kr/software/pharaoh/

Contact

tspark@stats.snu.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +29920488,"The Novel PrisMax Continuous Renal Replacement Therapy System in a Multinational, Multicentre Pilot Setting.","

Background/aims

We assessed how the novel PrisMax continuous renal replacement therapy (CRRT) system performed in a prospective international multicentre setting. We compared this device to its predecessor, the Prismaflex, with regards to multiple treatment parameters. Additionally, we performed a survey, aiming to measure user satisfaction.

Methods

Data was prospectively collected from 7 intensive care units (ICU) in 6 countries. The PrisMax device data logs constituted the raw material. Clinical parameters like treatment time, filter life span, downtime, delivered dose and number and type of alarms were recorded. A user questionnaire was sent out to 3 of the participating ICUs.

Results

Filter life, downtime, blood pump stops, bag changing time and number of malfunction alarms showed significantly improved values compared to the historic Prismaflex data. The survey showed high scores with regards to user friendliness.

Conclusion

The PrisMax CRRT device is safe and outperformed its' previous generation counterpart in virtually all aspects. Video Journal Club ""Cappuccino with Claudio Ronco"" at http://www.karger.com/?doi=489213.",2018-06-19 +26649067,"SensorDB: a virtual laboratory for the integration, visualization and analysis of varied biological sensor data.","

Background

To our knowledge, there is no software or database solution that supports large volumes of biological time series sensor data efficiently and enables data visualization and analysis in real time. Existing solutions for managing data typically use unstructured file systems or relational databases. These systems are not designed to provide instantaneous response to user queries. Furthermore, they do not support rapid data analysis and visualization to enable interactive experiments. In large scale experiments, this behaviour slows research discovery, discourages the widespread sharing and reuse of data that could otherwise inform critical decisions in a timely manner and encourage effective collaboration between groups.

Results

In this paper we present SensorDB, a web based virtual laboratory that can manage large volumes of biological time series sensor data while supporting rapid data queries and real-time user interaction. SensorDB is sensor agnostic and uses web-based, state-of-the-art cloud and storage technologies to efficiently gather, analyse and visualize data.

Conclusions

Collaboration and data sharing between different agencies and groups is thereby facilitated. SensorDB is available online at http://sensordb.csiro.au.",2015-12-08 +31053897,[Multimodal treatment of sarcomas: standards and new aspects in pharmacological and radio-oncological treatment].,"

Background

Patients with localized high-risk soft tissue sarcoma are at high risk for both local recurrence and distant metastases despite optimal surgical treatment.

Objective

Importance of preoperative or postoperative chemotherapy and hyperthermia.

Methods

Evaluation and overview of published study results.

Results

Preoperative or postoperative radiotherapy is considered as standard for patients with localized high-risk soft tissue sarcoma. The results of two randomized studies on neoadjuvant chemotherapy showed a survival benefit. As both studies did not have a control arm without chemotherapy but in one case the superiority of anthracycline/ifosfamide-based chemotherapy in combination with hyperthermia over chemotherapy alone and in the other case the superiority of anthracycline/ifosfamide-based chemotherapy over histology-specific chemotherapy were shown, the formal proof of the superiority of this treatment is still missing. Stratifying the patients treated in the so far largest randomized adjuvant chemotherapy trial according to current risk criteria ( http://www.sarculator.com ) revealed a significant survival benefit for patients at high risk of recurrence.

Conclusion

For high-risk soft tissue sarcomas, multimodal treatment strategies involving perioperative chemotherapy, radiotherapy and, if possible, hyperthermia should be considered in addition to tumor resection. Preoperative chemotherapy should be given preference over postoperative chemotherapy based on available data.",2019-06-01 +25011454,"Cohort Profile: Footprints in Time, the Australian Longitudinal Study of Indigenous Children.","Indigenous Australians experience profound levels of disadvantage in health, living standards, life expectancy, education and employment, particularly in comparison with non-Indigenous Australians. Very little information is available about the healthy development of Australian Indigenous children; the Longitudinal Study of Indigenous Children (LSIC) is designed to fill this knowledge gap.This dataset provides an opportunity to follow the development of up to 1759 Indigenous children. LSIC conducts annual face-to-face interviews with children (aged 0.5-2 and 3.5-5 years at baseline in 2008) and their caregivers. This represents between 5% and 10% of the total population of Indigenous children in these age groups, including families of varied socioeconomic and cultural backgrounds. Study topics include: the physical, social and emotional well-being of children and their caregivers; language; culture; parenting; and early childhood education.LSIC is a shared resource, formed in partnership with communities; its data are readily accessible through the Australian Government Department of Social Services (see http://dss.gov.au/lsic for data and access arrangements). As one of very few longitudinal studies of Indigenous children, and the only national one, LSIC will enable an understanding of Indigenous children from a wide range of environments and cultures. Findings from LSIC form part of a growing infrastructure from which to understand Indigenous child health.",2014-07-09 +29968567,Elucidating Gene-by-Environment Interactions Associated with Differential Susceptibility to Chemical Exposure.,"

Background

Modern societies are exposed to vast numbers of potentially hazardous chemicals. Despite demonstrated linkages between chemical exposure and severe health effects, there are limited, often conflicting, data on how adverse health effects of exposure differ across individuals.

Objectives

We tested the hypothesis that population variability in response to certain chemicals could elucidate a role for gene-environment interactions (GxE) in differential susceptibility.

Methods

High-throughput screening (HTS) data on thousands of chemicals in genetically heterogeneous zebrafish were leveraged to identify a candidate chemical (Abamectin) with response patterns indicative of population susceptibility differences. We tested the prediction by generating genome-wide sequence data for 276 individual zebrafish displaying susceptible (Affected) vs. resistant (Unaffected) phenotypes following identical chemical exposure.

Results

We found GxE associated with differential susceptibility in the sox7 promoter region and then confirmed gene expression differences between phenotypic response classes.

Conclusions

The results for Abamectin in zebrafish demonstrate that GxE associated with naturally occurring, population genetic variation play a significant role in mediating individual response to chemical exposure. https://doi.org/10.1289/EHP2662.",2018-06-28 +29604342,GPS: Identification of disease genes by rank aggregation of multi-genomic scoring schemes.,"In solving the gene prioritization problem, ranking candidate genes from most to least promising is attempted before further experimental validation. Integrating the results of various data sources and methods tends to result in a better performance when solving the gene prioritization problem. Therefore, a wide range of datasets and algorithms was investigated; these included topological features of protein networks, physicochemical characteristics and blast similarity scores of protein sequences, gene ontology, biological pathways, and tissue-based data sources. The novelty of this study lies in how the best-performing methods and reliable multi-genomic data sources were applied in an efficient two-step approach. In the first step, various multi-genomic data sources and algorithms were evaluated and seven best-performing rankers were then applied to prioritize candidate genes in different ways. In the second step, global prioritization was obtained by aggregating several scoring schemes. The results showed that protein networks, functional linkage networks, gene ontology, and biological pathway data sources have a significant impact on the quality of the gene prioritization approach. The findings also demonstrated a direct relationship between the degree of genes and the ranking quality of the evaluated tools. This approach outperformed previously published algorithms (e.g., DIR, GPEC, GeneDistiller, and Endeavour) in all evaluation metrices and led to the development of GPS software. Its user-friendly interface and accuracy makes GPS a powerful tool for the identification of human disease genes. GPS is available at http://gpsranker.com and http://LBB.ut.ac.ir.",2018-03-28 +30969138,The Association between Mandated Preseason Heat Acclimatization Guidelines and Exertional Heat Illness during Preseason High School American Football Practices.,"

Background

The risk of heat-related illness and death may continue to increase in many locations as a consequence of climate change, but information on the effectiveness of policies to protect populations from the adverse effects of excessive heat is limited. In 2009, the National Athletic Trainers' Association Inter-Association Task Force (NATA-IATF) released guidelines to reduce exertional heat illness (EHI) among U.S. high school athletes participating in preseason sports activities, including preseason practice sessions for American football. A subset of state high school athletic associations have implemented state-mandated guidelines consistent with the 2009 NATA-IATF recommendations, but their effectiveness for reducing preseason EHI is unknown.

Objectives

This study examines the association between the enactment of state high school athletic association-mandated NATA-IATF guidelines and the rate of EHI among high school students during preseason American football practice sessions.

Methods

We performed a quasi-experimental interrupted time-series study of EHI during high school American football practices in the 2005/2006-2016/2017 school years. We estimated state-level EHI rates using High School Reporting Information Online injury and athlete-exposure data, and used generalized estimating equations Poisson regression models to estimate incidence rate ratios (IRRs) and 95% confidence intervals (CIs) comparing state-years with and without mandated NATA-IATF guidelines. State-level covariates included state-year-specific average August temperatures, yearly deviations from each state's August average temperature across the study period, and school year.

Results

Data were available for 455 state-years from 48 states, including 32 state-years (7.0%) from 8 states when mandated guidelines consistent with the NATA-IATF recommendations were implemented. During an estimated 2,697,089 athlete-exposures, 190 EHIs were reported. Estimated preseason EHI rates were lower during state-years with versus without mandated guidelines (adjusted [Formula: see text], 95% CI: 0.23, 0.87).

Conclusions

Our findings suggest that high school athletes would benefit from enactment of the 2009 NATA-IATF guidelines. Similar analyses of the effectiveness of other public health policies to reduce adverse health effects from ambient heat are warranted. https://doi.org/10.1289/EHP4163.",2019-04-01 +29506467,"ToxPi Graphical User Interface 2.0: Dynamic exploration, visualization, and sharing of integrated data models.","

Background

Drawing integrated conclusions from diverse source data requires synthesis across multiple types of information. The ToxPi (Toxicological Prioritization Index) is an analytical framework that was developed to enable integration of multiple sources of evidence by transforming data into integrated, visual profiles. Methodological improvements have advanced ToxPi and expanded its applicability, necessitating a new, consolidated software platform to provide functionality, while preserving flexibility for future updates.

Results

We detail the implementation of a new graphical user interface for ToxPi (Toxicological Prioritization Index) that provides interactive visualization, analysis, reporting, and portability. The interface is deployed as a stand-alone, platform-independent Java application, with a modular design to accommodate inclusion of future analytics. The new ToxPi interface introduces several features, from flexible data import formats (including legacy formats that permit backward compatibility) to similarity-based clustering to options for high-resolution graphical output.

Conclusions

We present the new ToxPi interface for dynamic exploration, visualization, and sharing of integrated data models. The ToxPi interface is freely-available as a single compressed download that includes the main Java executable, all libraries, example data files, and a complete user manual from http://toxpi.org .",2018-03-05 +31119031,Epidemic curves made easy using the R package incidence.,"The epidemiological curve (epicurve) is one of the simplest yet most useful tools used by field epidemiologists, modellers, and decision makers for assessing the dynamics of infectious disease epidemics. Here, we present the free, open-source package incidence for the R programming language, which allows users to easily compute, handle, and visualise epicurves from unaggregated linelist data. This package was built in accordance with the development guidelines of the R Epidemics Consortium (RECON), which aim to ensure robustness and reliability through extensive automated testing, documentation, and good coding practices. As such, it fills an important gap in the toolbox for outbreak analytics using the R software, and provides a solid building block for further developments in infectious disease modelling. incidence is available from https://www.repidemicsconsortium.org/incidence.",2019-01-31 +26484093,Gene expression profiling to define the cell intrinsic role of the SKI proto-oncogene in hematopoiesis and myeloid neoplasms.,"The proto-oncogene SKI is highly expressed in human myeloid leukemia and also in murine hematopoietic stem cells. However, its operative relevance in these cells remains elusive. We have over-expressed SKI to define its intrinsic role in hematopoiesis and myeloid neoplasms, which resulted in a robust competitive advantage upon transplantation, a complete dominance of the stem and progenitor compartments, and a marked enhancement of myeloid differentiation at the expense of other lineages. Accordingly, enforced expression of SKI induced gene signatures associated with hematopoietic stem cells and myeloid differentiation. Here we provide detailed experimental methods and analysis for the gene expression profiling described in our recently published study of Singbrant et al. (2014) in Haematologica. Our data sets (available at http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE39457) provide a resource for exploring the underlying molecular mechanisms of the involvement of the proto-oncogene SKI in hematopoietic stem cell function and development of myeloid neoplasms.",2014-07-04 +30810766,No evidence of a causal association of type 2 diabetes and glucose metabolism with atrial fibrillation.,"

Aims/hypothesis

Several epidemiological studies have shown an increased risk of atrial fibrillation in individuals with type 2 diabetes or milder forms of dysglycaemia. We aimed to assess whether this relation is causal using a Mendelian randomisation approach.

Methods

Two-sample Mendelian randomisation was used to obtain estimates of the influence of type 2 diabetes, fasting blood glucose (FBG), and HbA1c on the risk of atrial fibrillation. Instrumental variables were constructed using available summary statistics from meta-analyses of genome-wide association studies (GWAS) for type 2 diabetes and associated phenotypes. Pleiotropic SNPs were excluded from the analyses. The most recent GWAS meta-analysis summary statistics for atrial fibrillation, which included over 1 million individuals (approximately 60,000 individuals with atrial fibrillation) was used for outcome analysis.

Results

Neither type 2 diabetes (OR 1.01 [95% CI 0.98, 1.03]; p = 0.37), nor FBG (OR 0.95 [95% CI 0.82, 1.09] per mmol/l; p = 0.49) or HbA1c (OR 1.01 [95% CI, 0.85, 1.17] per mmol/mol [%]; p = 0.88) were associated with atrial fibrillation in Mendelian randomisation analyses. We had >80% statistical power to detect ORs of 1.08, 1.06 and 1.09 or larger for type 2 diabetes, FBG and HbA1c, respectively, for associations with atrial fibrillation.

Conclusions/interpretation

This Mendelian randomisation analysis does not support a causal role of clinical significance between genetically programmed type 2 diabetes, FBG or HbA1c and development of atrial fibrillation. These data suggest that drug treatment to reduce dysglycaemia is unlikely to be an effective strategy for atrial fibrillation prevention.

Data availability

The datasets analysed during the current study are available from the following repository: Nielsen JB, Thorolfsdottir RB, Fritsche LG, et al (2018) GWAS summary statistics for AF (N=60,620 AF cases and 970,216 controls). Center for Statistical Genetics: http://csg.sph.umich.edu/willer/public/afib2018/nielsen-thorolfsdottir-willer-NG2018-AFib-gwas-summary-statistics.tbl.gz.",2019-02-27 +22086958,DBTSS: DataBase of Transcriptional Start Sites progress report in 2012.,"To support transcriptional regulation studies, we have constructed DBTSS (DataBase of Transcriptional Start Sites), which contains exact positions of transcriptional start sites (TSSs), determined with our own technique named TSS-seq, in the genomes of various species. In its latest version, DBTSS covers the data of the majority of human adult and embryonic tissues: it now contains 418 million TSS tag sequences from 28 tissues/cell cultures. Moreover, we integrated a series of our own transcriptomic data, such as the RNA-seq data of subcellular-fractionated RNAs as well as the ChIP-seq data of histone modifications and the binding of RNA polymerase II/several transcription factors in cultured cell lines into our original TSS information. We also included several external epigenomic data, such as the chromatin map of the ENCODE project. We further associated our TSS information with public or original single-nucleotide variation (SNV) data, in order to identify SNVs in the regulatory regions. These data can be browsed in our new viewer, which supports versatile search conditions of users. We believe that our new DBTSS will be an invaluable resource for interpreting the differential uses of TSSs and for identifying human genetic variations that are associated with disordered transcriptional regulation. DBTSS can be accessed at http://dbtss.hgc.jp.",2011-11-15 +23609542,BLAST: a more efficient report with usability improvements.,"The Basic Local Alignment Search Tool (BLAST) website at the National Center for Biotechnology (NCBI) is an important resource for searching and aligning sequences. A new BLAST report allows faster loading of alignments, adds navigation aids, allows easy downloading of subject sequences and reports and has improved usability. Here, we describe these improvements to the BLAST report, discuss design decisions, describe other improvements to the search page and database documentation and outline plans for future development. The NCBI BLAST URL is http://blast.ncbi.nlm.nih.gov.",2013-04-22 +22541598,OpenTox predictive toxicology framework: toxicological ontology and semantic media wiki-based OpenToxipedia.,"

Background

The OpenTox Framework, developed by the partners in the OpenTox project (http://www.opentox.org), aims at providing a unified access to toxicity data, predictive models and validation procedures. Interoperability of resources is achieved using a common information model, based on the OpenTox ontologies, describing predictive algorithms, models and toxicity data. As toxicological data may come from different, heterogeneous sources, a deployed ontology, unifying the terminology and the resources, is critical for the rational and reliable organization of the data, and its automatic processing.

Results

The following related ontologies have been developed for OpenTox: a) Toxicological ontology - listing the toxicological endpoints; b) Organs system and Effects ontology - addressing organs, targets/examinations and effects observed in in vivo studies; c) ToxML ontology - representing semi-automatic conversion of the ToxML schema; d) OpenTox ontology- representation of OpenTox framework components: chemical compounds, datasets, types of algorithms, models and validation web services; e) ToxLink-ToxCast assays ontology and f) OpenToxipedia community knowledge resource on toxicology terminology.OpenTox components are made available through standardized REST web services, where every compound, data set, and predictive method has a unique resolvable address (URI), used to retrieve its Resource Description Framework (RDF) representation, or to initiate the associated calculations and generate new RDF-based resources.The services support the integration of toxicity and chemical data from various sources, the generation and validation of computer models for toxic effects, seamless integration of new algorithms and scientifically sound validation routines and provide a flexible framework, which allows building arbitrary number of applications, tailored to solving different problems by end users (e.g. toxicologists).

Availability

The OpenTox toxicological ontology projects may be accessed via the OpenTox ontology development page http://www.opentox.org/dev/ontology; the OpenTox ontology is available as OWL at http://opentox.org/api/1 1/opentox.owl, the ToxML - OWL conversion utility is an open source resource available at http://ambit.svn.sourceforge.net/viewvc/ambit/branches/toxml-utils/",2012-04-24 +27503227,SRinversion: a tool for detecting short inversions by splitting and re-aligning poorly mapped and unmapped sequencing reads.,"

Motivation

Rapid development in sequencing technologies has dramatically improved our ability to detect genetic variants in human genome. However, current methods have variable sensitivities in detecting different types of genetic variants. One type of such genetic variants that is especially hard to detect is inversions. Analysis of public databases showed that few short inversions have been reported so far. Unlike reads that contain small insertions or deletions, which will be considered through gap alignment, reads carrying short inversions often have poor mapping quality or are unmapped, thus are often not further considered. As a result, the majority of short inversions might have been overlooked and require special algorithms for their detection.

Results

Here, we introduce SRinversion, a framework to analyze poorly mapped or unmapped reads by splitting and re-aligning them for the purpose of inversion detection. SRinversion is very sensitive to small inversions and can detect those less than 10 bp in size. We applied SRinversion to both simulated data and high-coverage sequencing data from the 1000 Genomes Project and compared the results with those from Pindel, BreakDancer, DELLY, Gustaf and MID. A better performance of SRinversion was achieved for both datasets for the detection of small inversions.

Availability and implementation

SRinversion is implemented in Perl and is publicly available at http://paed.hku.hk/genome/software/SRinversion/index.html CONTACT: yangwl@hku.hkSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-08 +27651457,The Comparative Toxicogenomics Database: update 2017.,"The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) provides information about interactions between chemicals and gene products, and their relationships to diseases. Core CTD content (chemical-gene, chemical-disease and gene-disease interactions manually curated from the literature) are integrated with each other as well as with select external datasets to generate expanded networks and predict novel associations. Today, core CTD includes more than 30.5 million toxicogenomic connections relating chemicals/drugs, genes/proteins, diseases, taxa, Gene Ontology (GO) annotations, pathways, and gene interaction modules. In this update, we report a 33% increase in our core data content since 2015, describe our new exposure module (that harmonizes exposure science information with core toxicogenomic data) and introduce a novel dataset of GO-disease inferences (that identify common molecular underpinnings for seemingly unrelated pathologies). These advancements centralize and contextualize real-world chemical exposures with molecular pathways to help scientists generate testable hypotheses in an effort to understand the etiology and mechanisms underlying environmentally influenced diseases.",2016-09-19 +31339576,Transferrin and H-ferritin involvement in brain iron acquisition during postnatal development: impact of sex and genotype.,"Iron delivery to the developing brain is essential for energy and metabolic support needed for processes such as myelination and neuronal development. Iron deficiency, especially in the developing brain, can result in a number of long-term neurological deficits that persist into adulthood. There is considerable debate that excess access to iron during development may result in iron overload in the brain and subsequently predispose individuals to age-related neurodegenerative diseases. There is a significant gap in knowledge regarding how the brain acquires iron during development and how biological variables such as development, genetics, and sex impact brain iron status. In this study, we used a mouse model expressing a mutant form of the iron homeostatic regulator protein HFE, (Hfe H63D), the most common gene variant in Caucasians, to determine impact of the mutation on brain iron uptake. Iron uptake was assessed using 59 Fe bound to either transferrin or H-ferritin as the iron carrier proteins. We demonstrate that at postnatal day 22, mutant mice brains take up greater amounts of iron compared with wildtype. Moreover, we introduce H-ferritin as a key protein in brain iron transport during development and identify a sex and genotype effect demonstrating female mutant mice take up more iron by transferrin, whereas male mutant mice take up more iron from H-ferritin at PND22. Furthermore, we begin to elucidate the mechanism for uptake using immunohistochemistry to profile the regional distribution and temporal expression of transferrin receptor and T-cell immunoglobulin and mucin domain 2, the latter is the receptor for H-ferritin. These data demonstrate that sex and genotype have significant effects on iron uptake and that regional receptor expression may play a large role in the uptake patterns during development. Open Science: This manuscript was awarded with the Open Materials Badge For more information see: https://cos.io/our-services/open-science-badges/ Cover Image for this issue: doi: 10.1111/jnc.14731.",2019-08-22 +30631162,Publisher Correction: Assessing the efficiency of changes in land use for mitigating climate change.,"In this Letter, the PANGAEA repository was referred to incorrectly in the 'Code availability' and 'Data availability' sections of Methods: the link should be https://doi.org/10.1594/PANGAEA.893761 instead of https://doi.org/10.1594/PANGAEA.877266 . In addition, the sentence, ""However, the more commonly used system 2 (75 kg ha-1 yr-1) generates roughly the same benefits as system 1…"" should read, ""However, the more commonly used system 2 (75 kg ha-1 yr-1) generates roughly the same benefits as sugarcane ethanol…"" These errors have been corrected in the online versions of the Letter.",2019-01-01 +29069297,SeqBox: RNAseq/ChIPseq reproducible analysis on a consumer game computer.,"

Summary

Short reads sequencing technology has been used for more than a decade now. However, the analysis of RNAseq and ChIPseq data is still computational demanding and the simple access to raw data does not guarantee results reproducibility between laboratories. To address these two aspects, we developed SeqBox, a cheap, efficient and reproducible RNAseq/ChIPseq hardware/software solution based on NUC6I7KYK mini-PC (an Intel consumer game computer with a fast processor and a high performance SSD disk), and Docker container platform. In SeqBox the analysis of RNAseq and ChIPseq data is supported by a friendly GUI. This allows access to fast and reproducible analysis also to scientists with/without scripting experience.

Availability and implementation

Docker container images, docker4seq package and the GUI are available at http://www.bioinformatica.unito.it/reproducibile.bioinformatics.html.

Contact

beccuti@di.unito.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +27307138,A comprehensive view of the web-resources related to sericulture. ,"Recent progress in the field of sequencing and analysis has led to a tremendous spike in data and the development of data science tools. One of the outcomes of this scientific progress is development of numerous databases which are gaining popularity in all disciplines of biology including sericulture. As economically important organism, silkworms are studied extensively for their numerous applications in the field of textiles, biomaterials, biomimetics, etc. Similarly, host plants, pests, pathogens, etc. are also being probed to understand the seri-resources more efficiently. These studies have led to the generation of numerous seri-related databases which are extremely helpful for the scientific community. In this article, we have reviewed all the available online resources on silkworm and its related organisms, including databases as well as informative websites. We have studied their basic features and impact on research through citation count analysis, finally discussing the role of emerging sequencing and analysis technologies in the field of seri-data science. As an outcome of this review, a web portal named SeriPort, has been created which will act as an index for the various sericulture-related databases and web resources available in cyberspace.Database URL: http://www.seriport.in/.",2016-06-15 +31141524,Empirical ways to identify novel Bedaquiline resistance mutations in AtpE.,"Clinical resistance against Bedaquiline, the first new anti-tuberculosis compound with a novel mechanism of action in over 40 years, has already been detected in Mycobacterium tuberculosis. As a new drug, however, there is currently insufficient clinical data to facilitate reliable and timely identification of genomic determinants of resistance. Here we investigate the structural basis for M. tuberculosis associated bedaquiline resistance in the drug target, AtpE. Together with the 9 previously identified resistance-associated variants in AtpE, 54 non-resistance-associated mutations were identified through comparisons of bedaquiline susceptibility across 23 different mycobacterial species. Computational analysis of the structural and functional consequences of these variants revealed that resistance associated variants were mainly localized at the drug binding site, disrupting key interactions with bedaquiline leading to reduced binding affinity. This was used to train a supervised predictive algorithm, which accurately identified likely resistance mutations (93.3% accuracy). Application of this model to circulating variants present in the Asia-Pacific region suggests that current circulating variants are likely to be susceptible to bedaquiline. We have made this model freely available through a user-friendly web interface called SUSPECT-BDQ, StrUctural Susceptibility PrEdiCTion for bedaquiline (http://biosig.unimelb.edu.au/suspect_bdq/). This tool could be useful for the rapid characterization of novel clinical variants, to help guide the effective use of bedaquiline, and to minimize the spread of clinical resistance.",2019-05-29 +28981573,OPATs: Omnibus P-value association tests.,"Combining statistical significances (P-values) from a set of single-locus association tests in genome-wide association studies is a proof-of-principle method for identifying disease-associated genomic segments, functional genes and biological pathways. We review P-value combinations for genome-wide association studies and introduce an integrated analysis tool, Omnibus P-value Association Tests (OPATs), which provides popular analysis methods of P-value combinations. The software OPATs programmed in R and R graphical user interface features a user-friendly interface. In addition to analysis modules for data quality control and single-locus association tests, OPATs provides three types of set-based association test: window-, gene- and biopathway-based association tests. P-value combinations with or without threshold and rank truncation are provided. The significance of a set-based association test is evaluated by using resampling procedures. Performance of the set-based association tests in OPATs has been evaluated by simulation studies and real data analyses. These set-based association tests help boost the statistical power, alleviate the multiple-testing problem, reduce the impact of genetic heterogeneity, increase the replication efficiency of association tests and facilitate the interpretation of association signals by streamlining the testing procedures and integrating the genetic effects of multiple variants in genomic regions of biological relevance. In summary, P-value combinations facilitate the identification of marker sets associated with disease susceptibility and uncover missing heritability in association studies, thereby establishing a foundation for the genetic dissection of complex diseases and traits. OPATs provides an easy-to-use and statistically powerful analysis tool for P-value combinations. OPATs, examples, and user guide can be downloaded from http://www.stat.sinica.edu.tw/hsinchou/genetics/association/OPATs.htm.",2019-01-01 +27402185,Lessons Learned From Dissemination of Evidence-Based Interventions for HIV Prevention.,"In 1999, IOM issued a report that recommended that the Centers for Disease Control and Prevention should disseminate evidence-based HIV prevention interventions (EBIs) to be implemented by health departments, community-based organizations, drug treatment centers, and clinics. Based on these recommendations, the Diffusion of Effective Behavioral Interventions Project was initiated in 2000 and began disseminating interventions into public health practice. For 15 years, the Centers for Disease Control and Prevention has disseminated 29 EBIs to more than 11,300 agencies. Lessons were identified during the 15 years of implementation regarding successful methods of dissemination of EBIs. Lessons around selecting interventions for dissemination, developing a dissemination infrastructure including a resource website (https://effectiveinterventions.cdc.gov), and engagement with stakeholders are discussed. A continuous development approach ensured that intervention implementation materials, instructions, and technical assistance were all tailored to the needs of end users, focus populations, and agency capacities. Six follow-up studies demonstrated that adopters of EBIs were able to obtain comparable outcomes to those of the original efficacy research. The Diffusion of Effective Behavioral Interventions Project may offer guidance for other large, national, evidence-based public health dissemination projects.",2016-07-09 +27797764,RedNemo: topology-based PPI network reconstruction via repeated diffusion with neighborhood modifications.,"

Motivation

Analysis of protein-protein interaction (PPI) networks provides invaluable insight into several systems biology problems. High-throughput experimental techniques together with computational methods provide large-scale PPI networks. However, a major issue with these networks is their erroneous nature; they contain false-positive interactions and usually many more false-negatives. Recently, several computational methods have been proposed for network reconstruction based on topology, where given an input PPI network the goal is to reconstruct the network by identifying false-positives/-negatives as correctly as possible.

Results

We observe that the existing topology-based network reconstruction algorithms suffer several shortcomings. An important issue is regarding the scalability of their computational requirements, especially in terms of execution times, with the network sizes. They have only been tested on small-scale networks thus far and when applied on large-scale networks of popular PPI databases, the executions require unreasonable amounts of time, or may even crash without producing any output for some instances even after several months of execution. We provide an algorithm, RedNemo, for the topology-based network reconstruction problem. It provides more accurate networks than the alternatives as far as biological qualities measured in terms of most metrics based on gene ontology annotations. The recovery of a high-confidence network modified via random edge removals and rewirings is also better with RedNemo than with the alternatives under most of the experimented removal/rewiring ratios. Furthermore, through extensive tests on databases of varying sizes, we show that RedNemo achieves these results with much better running time performances.

Availability and implementation

Supplementary material including source code, useful scripts, experimental data and the results are available at http://webprs.khas.edu.tr/~cesim/RedNemo.tar.gz.

Contact

cesim@khas.edu.tr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +23911837,PTP-central: a comprehensive resource of protein tyrosine phosphatases in eukaryotic genomes.,"Reversible tyrosine phosphorylation is a fundamental signaling mechanism controlling a diversity of cellular processes. Whereas protein tyrosine kinases have long been implicated in many diseases, aberrant protein tyrosine phosphatase (PTP) activity is also increasingly being associated with a wide spectrum of conditions. PTPs are now regarded as key regulators of biochemical processes instead of simple ""off"" switches operating in tyrosine kinase signaling pathways. Despite the central importance that PTPs play in the cell's biochemistry, the tyrosine phosphatomes of most species remain uncharted. Here we present a highly sensitive and specific sequence-based method for the automatic classification of PTPs. As proof of principle we re-annotated the human tyrosine phosphatome, and discovered four new PTP genes that had not been reported before. Our method and the predicted tyrosine phosphatomes of 65 eukaryotic genomes are accessible online through the user-friendly PTP-central resource (http://www.PTP-central.org/), where users can also submit their own sequences for prediction. PTP-central is a comprehensive and continually developing resource that currently integrates the predicted tyrosine phosphatomes with structural data and genetic association disease studies, as well as homology relationships. PTP-central thus fills an important void for the systematic study of PTPs, both in model organisms and from an evolutionary perspective.",2013-07-31 +30498767,"Australia and New Zealand Islets and Pancreas Transplant Registry Annual Report 2018-Pancreas Waiting List, Recipients, and Donors.","

Background

This is a synopsis of the registry report from the Australia and New Zealand islet and pancreas transplant registry. The full report is available at http://anziptr.org/reports/.

Methods

We report data for all solid organ pancreas transplant activity from inception in 1984 to end 2017. Islet-cell transplantation activity is reported elsewhere. Data analysis was performed using Stata software version 14 (StataCorp, College Station, TX).

Results

From 1984 to 2017 a total of 809 solid organ pancreas transplants have been performed in Australia and New Zealand, in 790 individuals. In 2017, 52 people received a pancreas transplant. By center, this was; Auckland (4), Monash (17), and Westmead (31). In 2017, 51 transplants were simultaneous pancreas kidney, whereas 1 was pancreas after kidney, and none were pancreas transplant alone.

Conclusions

The number of pancreas transplants performed in Australia and New Zealand was slightly lower in 2017 but continues to increase over time.",2018-09-07 +30590384,Smooth orientation-dependent scoring function for coarse-grained protein quality assessment.,"

Motivation

Protein quality assessment (QA) is a crucial element of protein structure prediction, a fundamental and yet open problem in structural bioinformatics. QA aims at ranking predicted protein models to select the best candidates. The assessment can be performed based either on a single model or on a consensus derived from an ensemble of models. The latter strategy can yield very high performance but substantially depends on the pool of available candidate models, which limits its applicability. Hence, single-model QA methods remain an important research target, also because they can assist the sampling of candidate models.

Results

We present a novel single-model QA method called SBROD. The SBROD (Smooth Backbone-Reliant Orientation-Dependent) method uses only the backbone protein conformation, and hence it can be applied to scoring coarse-grained protein models. The proposed method deduces its scoring function from a training set of protein models. The SBROD scoring function is composed of four terms related to different structural features: residue-residue orientations, contacts between backbone atoms, hydrogen bonding and solvent-solute interactions. It is smooth with respect to atomic coordinates and thus is potentially applicable to continuous gradient-based optimization of protein conformations. Furthermore, it can also be used for coarse-grained protein modeling and computational protein design. SBROD proved to achieve similar performance to state-of-the-art single-model QA methods on diverse datasets (CASP11, CASP12 and MOULDER).

Availability and implementation

The standalone application implemented in C++ and Python is freely available at https://gitlab.inria.fr/grudinin/sbrod and supported on Linux, MacOS and Windows.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +31493238,Impact of sustained virological response with DAAs on gastroesophageal varices and Baveno criteria in HCV-cirrhotic patients.,"

Background

Direct-acting antivirals (DAAs) show high efficacy and safety in HCV-cirrhotic patients, but most maintain clinically significant portal hypertension after sustained virological response (SVR). Non-invasive Baveno and expanded-Baveno criteria can identify patients without high-risk gastroesophageal varices (GEV) who have no need for endoscopic surveillance. However, data after SVR are scarce. We performed a multicenter study to evaluate SVR effects over GEV and diagnostic accuracy of non-invasive criteria after SVR.

Methods

HCV-cirrhotic patients receiving DAAs and baseline endoscopic evaluation were included (November 2014-October 2015). GEV were classified as low risk (LR-GEV) (< 5 mm) or high risk (HR-GEV) (≥ 5 mm or with risk signs). Transient elastography (TE) and endoscopy were performed during follow-up.

Results

SVR was achieved in 230 (93.1%) of 247 included patients, 151 (65.7%) with endoscopic follow-up. Among 64/151 (42.4%) patients without baseline GEV, 8 (12.5%) developed GEV after SVR. Among 50/151 (33.1%) with baseline LR-GEV, 12 (24%) developed HR-GEV. Patients with GEV progression showed TE ≥ 25 kPa before treatment (64.7%) or ≥ 20 kPa after SVR (66.7%). Only 6% of patients without GEV and LSM < 25 kPa before treatment, and 10% of those with baseline LSM < 25 kPa and LSM < 20 kPa after SVR showed GEV progression after 36 months. The negative predictive value of Baveno and expanded-Baveno criteria to exclude HR-GEV was maintained after SVR (100% and 90.7%, respectively).

Conclusions

HCV-cirrhotic patients can develop HR-GEV after SVR. Surveillance is especially recommended in those with GEV before antiviral treatment. Baveno and expanded-Baveno criteria can be safely applied after SVR. https://clinicaltrials.gov: NCT02758509.",2019-09-06 +27220974,BMPOS: a Flexible and User-Friendly Tool Sets for Microbiome Studies.,"Recent advances in science and technology are leading to a revision and re-orientation of methodologies, addressing old and current issues under a new perspective. Advances in next generation sequencing (NGS) are allowing comparative analysis of the abundance and diversity of whole microbial communities, generating a large amount of data and findings at a systems level. The current limitation for biologists has been the increasing demand for computational power and training required for processing of NGS data. Here, we describe the deployment of the Brazilian Microbiome Project Operating System (BMPOS), a flexible and user-friendly Linux distribution dedicated to microbiome studies. The Brazilian Microbiome Project (BMP) has developed data analyses pipelines for metagenomic studies (phylogenetic marker genes), conducted using the two main high-throughput sequencing platforms (Ion Torrent and Illumina MiSeq). The BMPOS is freely available and possesses the entire requirement of bioinformatics packages and databases to perform all the pipelines suggested by the BMP team. The BMPOS may be used as a bootable live USB stick or installed in any computer with at least 1 GHz CPU and 512 MB RAM, independent of the operating system previously installed. The BMPOS has proved to be effective for sequences processing, sequences clustering, alignment, taxonomic annotation, statistical analysis, and plotting of metagenomic data. The BMPOS has been used during several metagenomic analyses courses, being valuable as a tool for training, and an excellent starting point to anyone interested in performing metagenomic studies. The BMPOS and its documentation are available at http://www.brmicrobiome.org .",2016-05-24 +30488523,Morphometric analysis of peripheral myelinated nerve fibers through deep learning.,"Irrespective of initial causes of neurological diseases, these disorders usually exhibit two key pathological changes-axonal loss or demyelination or a mixture of the two. Therefore, vigorous quantification of myelin and axons is essential in studying these diseases. However, the process of quantification has been labor intensive and time-consuming because of the requisite manual segmentation of myelin and axons from microscopic nerve images. As a part of AI development, deep learning has been utilized to automate certain tasks, such as image analysis. This study describes the development of a convolutional neural network (CNN)-based approach to segment images of mouse nerve cross sections. We adapted the U-Net architecture and used manually-produced segmentation data accumulated over many years in our lab for training. These images ranged from normal nerves to those afflicted by severe myelin and axon pathologies; thus, maximizing the trained model's ability to recognize atypical myelin structures. Morphometric data produced by applying the trained model to additional images were then compared to manually obtained morphometrics. The former effectively shortened the time consumption in the morphometric analysis with excellent accuracy in axonal density and g-ratio. However, we were not able to completely eliminate manual refinement of the segmentation product. We also observed small variations in axon diameter and myelin thickness within 9.5%. Nevertheless, we learned alternative ways to improve accuracy through the study. Overall, greatly increased efficiency in the CNN-based approach out-weighs minor limitations that will be addressed in future studies, thus justifying our confidence in its prospects. Note: All the relevant code is freely available at https://neurology.med.wayne.edu/drli-datashairing.",2018-12-11 +28845458,PaperBLAST: Text Mining Papers for Information about Homologs. ,"Large-scale genome sequencing has identified millions of protein-coding genes whose function is unknown. Many of these proteins are similar to characterized proteins from other organisms, but much of this information is missing from annotation databases and is hidden in the scientific literature. To make this information accessible, PaperBLAST uses EuropePMC to search the full text of scientific articles for references to genes. PaperBLAST also takes advantage of curated resources (Swiss-Prot, GeneRIF, and EcoCyc) that link protein sequences to scientific articles. PaperBLAST's database includes over 700,000 scientific articles that mention over 400,000 different proteins. Given a protein of interest, PaperBLAST quickly finds similar proteins that are discussed in the literature and presents snippets of text from relevant articles or from the curators. PaperBLAST is available at http://papers.genomics.lbl.gov/. IMPORTANCE With the recent explosion of genome sequencing data, there are now millions of uncharacterized proteins. If a scientist becomes interested in one of these proteins, it can be very difficult to find information as to its likely function. Often a protein whose sequence is similar, and which is likely to have a similar function, has been studied already, but this information is not available in any database. To help find articles about similar proteins, PaperBLAST searches the full text of scientific articles for protein identifiers or gene identifiers, and it links these articles to protein sequences. Then, given a protein of interest, it can quickly find similar proteins in its database by using standard software (BLAST), and it can show snippets of text from relevant papers. We hope that PaperBLAST will make it easier for biologists to predict proteins' functions.",2017-07-01 +28591841,RSAT matrix-clustering: dynamic exploration and redundancy reduction of transcription factor binding motif collections.,"Transcription factor (TF) databases contain multitudes of binding motifs (TFBMs) from various sources, from which non-redundant collections are derived by manual curation. The advent of high-throughput methods stimulated the production of novel collections with increasing numbers of motifs. Meta-databases, built by merging these collections, contain redundant versions, because available tools are not suited to automatically identify and explore biologically relevant clusters among thousands of motifs. Motif discovery from genome-scale data sets (e.g. ChIP-seq) also produces redundant motifs, hampering the interpretation of results. We present matrix-clustering, a versatile tool that clusters similar TFBMs into multiple trees, and automatically creates non-redundant TFBM collections. A feature unique to matrix-clustering is its dynamic visualisation of aligned TFBMs, and its capability to simultaneously treat multiple collections from various sources. We demonstrate that matrix-clustering considerably simplifies the interpretation of combined results from multiple motif discovery tools, and highlights biologically relevant variations of similar motifs. We also ran a large-scale application to cluster ∼11 000 motifs from 24 entire databases, showing that matrix-clustering correctly groups motifs belonging to the same TF families, and drastically reduced motif redundancy. matrix-clustering is integrated within the RSAT suite (http://rsat.eu/), accessible through a user-friendly web interface or command-line for its integration in pipelines.",2017-07-01 +25480117,"De novo transcriptome assembly of a fern, Lygodium japonicum, and a web resource database, Ljtrans DB.","During plant evolution, ferns originally evolved as a major vascular plant with a distinctive life cycle in which the haploid and diploid generations are completely separated. However, the low level of genetic resources has limited studies of their physiological events, as well as hindering research on the evolutionary history of land plants. In this study, to identify a comprehensive catalog of transcripts and characterize their expression traits in the fern Lygodium japonicum, nine different RNA samples isolated from prothalli, trophophylls, rhizomes and sporophylls were sequenced using Roche 454 GS-FLX and Illumina HiSeq sequencers. The hybrid assembly of the high-quality 454 GS-FLX and Illumina HiSeq reads generated a set of 37,830 isoforms with an average length of 1,444 bp. Using four open reading frame (ORF) predictors, 38,142 representative ORFs were identified from a total of 37,830 transcript isoforms and 95 contigs, which were annotated by searching against several public databases. Furthermore, an orthoMCL analysis using the protein sequences of L. japonicum and five model plants revealed various sets of lineage-specific genes, including those detected among land plant lineages and those detected in only L. japonicum. We have also examined the expression patterns of all contigs/isoforms, along with the life cycle of L. japonicum, and identified the tissue-specific transcripts using statistical expression analyses. Finally, we developed a public web resource, the L. japonicum transcriptome database at http://bioinf.mind.meiji.ac.jp/kanikusa/, which provides important opportunities to accelerate molecular research in ferns.",2014-12-04 +29506198,anexVis: visual analytics framework for analysis of RNA expression.,"

Summary

Although RNA expression data are accumulating at a remarkable speed, gaining insights from them still requires laborious analyses, which hinder many biological and biomedical researchers. This report introduces a visual analytics framework that applies several well-known visualization techniques to leverage understanding of an RNA expression dataset. Our analyses on glycosaminoglycan-related genes have demonstrated the broad application of this tool, anexVis (analysis of RNA expression), to advance the understanding of tissue-specific glycosaminoglycan regulation and functions, and potentially other biological pathways.

Availability and implementation

The application is accessible at https://anexvis.chpc.utah.edu/, source codes deposited on GitHub.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +30590410,mAHTPred: a sequence-based meta-predictor for improving the prediction of anti-hypertensive peptides using effective feature representation.,"

Motivation

Cardiovascular disease is the primary cause of death globally accounting for approximately 17.7 million deaths per year. One of the stakes linked with cardiovascular diseases and other complications is hypertension. Naturally derived bioactive peptides with antihypertensive activities serve as promising alternatives to pharmaceutical drugs. So far, there is no comprehensive analysis, assessment of diverse features and implementation of various machine-learning (ML) algorithms applied for antihypertensive peptide (AHTP) model construction.

Results

In this study, we utilized six different ML algorithms, namely, Adaboost, extremely randomized tree (ERT), gradient boosting (GB), k-nearest neighbor, random forest (RF) and support vector machine (SVM) using 51 feature descriptors derived from eight different feature encodings for the prediction of AHTPs. While ERT-based trained models performed consistently better than other algorithms regardless of various feature descriptors, we treated them as baseline predictors, whose predicted probability of AHTPs was further used as input features separately for four different ML-algorithms (ERT, GB, RF and SVM) and developed their corresponding meta-predictors using a two-step feature selection protocol. Subsequently, the integration of four meta-predictors through an ensemble learning approach improved the balanced prediction performance and model robustness on the independent dataset. Upon comparison with existing methods, mAHTPred showed superior performance with an overall improvement of approximately 6-7% in both benchmarking and independent datasets.

Availability and implementation

The user-friendly online prediction tool, mAHTPred is freely accessible at http://thegleelab.org/mAHTPred.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-08-01 +28917378,Chinese herbal medicine Dengzhan Xixin injection for acute ischemic stroke: A systematic review and meta-analysis of randomised controlled trials.,"

Objective

To evaluate the effectiveness and safety of Chinese herbal medicine Dengzhan Xixin (Erigeron breviscapus) injection for acute ischemic stroke.

Design

Systematic review and meta-analysis (CRD42016038413, http://www.crd.york.ac.uk/PROSPERO).

Methods

Six electronic databases were searched from inception to March 2016 for randomised controlled trials (RCTs) of Dengzhan Xixin (DZXX) injection for acute ischemic stroke. The methodological quality of RCTs was assessed by the Cochrane risk of bias tool.

Data synthesis

was performed using RevMan 5.3 and was presented with mean difference (MD) or relative risk (RR) and their 95% confidence interval (CI). A summary of finding table was generated by GRADEpro (version 3.6).

Results

Twenty-five RCTs with 2498 participants were included and all trials adopted conventional therapy (CT) in both arms. Most of the studies had high risk of bias. The addition of DZXX to CT showed no significant benefit on death (RR 0.27, 95% CI 0.05-1.63) within the treatment period (14-35 d), but showed higher Barthel index score (MD 10.20, 95% CI 8.16-12.25), lower neurological function deficit score (MD -3.99, 95% CI -5.68 to -2.30, by NFDS; MD -1.67, 95% CI -2.59 to -0.76, by NIHSS), and lower treatment failure (RR 0.40, 95% CI 0.31-0.52). Thirteen trials (52%) reported the outcome of adverse events, but no serious adverse events were reported.

Conclusion

Low quality evidence implied that DZXX injection appeared to improve neurological function in patients with acute ischemic stroke. However, this potential benefit should be further studied in large, rigorous trials.",2017-08-11 +28330888,Defining a Core Genome Multilocus Sequence Typing Scheme for the Global Epidemiology of Vibrio parahaemolyticus.,"Vibrio parahaemolyticus is an important human foodborne pathogen whose transmission is associated with the consumption of contaminated seafood, with a growing number of infections reported over recent years worldwide. A multilocus sequence typing (MLST) database for V. parahaemolyticus was created in 2008, and a large number of clones have been identified, causing severe outbreaks worldwide (sequence type 3 [ST3]), recurrent outbreaks in certain regions (e.g., ST36), or spreading to other regions where they are nonendemic (e.g., ST88 or ST189). The current MLST scheme uses sequences of 7 genes to generate an ST, which results in a powerful tool for inferring the population structure of this pathogen, although with limited resolution, especially compared to pulsed-field gel electrophoresis (PFGE). The application of whole-genome sequencing (WGS) has become routine for trace back investigations, with core genome MLST (cgMLST) analysis as one of the most straightforward ways to explore complex genomic data in an epidemiological context. Therefore, there is a need to generate a new, portable, standardized, and more advanced system that provides higher resolution and discriminatory power among V. parahaemolyticus strains using WGS data. We sequenced 92 V. parahaemolyticus genomes and used the genome of strain RIMD 2210633 as a reference (with a total of 4,832 genes) to determine which genes were suitable for establishing a V. parahaemolyticus cgMLST scheme. This analysis resulted in the identification of 2,254 suitable core genes for use in the cgMLST scheme. To evaluate the performance of this scheme, we performed a cgMLST analysis of 92 newly sequenced genomes, plus an additional 142 strains with genomes available at NCBI. cgMLST analysis was able to distinguish related and unrelated strains, including those with the same ST, clearly showing its enhanced resolution over conventional MLST analysis. It also distinguished outbreak-related from non-outbreak-related strains within the same ST. The sequences obtained from this work were deposited and are available in the public database (http://pubmlst.org/vparahaemolyticus). The application of this cgMLST scheme to the characterization of V. parahaemolyticus strains provided by different laboratories from around the world will reveal the global picture of the epidemiology, spread, and evolution of this pathogen and will become a powerful tool for outbreak investigations, allowing for the unambiguous comparison of strains with global coverage.",2017-03-22 +23815231,WEP: a high-performance analysis pipeline for whole-exome data.,"

Background

The advent of massively parallel sequencing technologies (Next Generation Sequencing, NGS) profoundly modified the landscape of human genetics.In particular, Whole Exome Sequencing (WES) is the NGS branch that focuses on the exonic regions of the eukaryotic genomes; exomes are ideal to help us understanding high-penetrance allelic variation and its relationship to phenotype. A complete WES analysis involves several steps which need to be suitably designed and arranged into an efficient pipeline.Managing a NGS analysis pipeline and its huge amount of produced data requires non trivial IT skills and computational power.

Results

Our web resource WEP (Whole-Exome sequencing Pipeline web tool) performs a complete WES pipeline and provides easy access through interface to intermediate and final results. The WEP pipeline is composed of several steps:1) verification of input integrity and quality checks, read trimming and filtering; 2) gapped alignment; 3) BAM conversion, sorting and indexing; 4) duplicates removal; 5) alignment optimization around insertion/deletion (indel) positions; 6) recalibration of quality scores; 7) single nucleotide and deletion/insertion polymorphism (SNP and DIP) variant calling; 8) variant annotation; 9) result storage into custom databases to allow cross-linking and intersections, statistics and much more. In order to overcome the challenge of managing large amount of data and maximize the biological information extracted from them, our tool restricts the number of final results filtering data by customizable thresholds, facilitating the identification of functionally significant variants. Default threshold values are also provided at the analysis computation completion, tuned with the most common literature work published in recent years.

Conclusions

Through our tool a user can perform the whole analysis without knowing the underlying hardware and software architecture, dealing with both paired and single end data. The interface provides an easy and intuitive access for data submission and a user-friendly web interface for annotated variant visualization.Non-IT mastered users can access through WEP to the most updated and tested WES algorithms, tuned to maximize the quality of called variants while minimizing artifacts and false positives.The web tool is available at the following web address: http://www.caspur.it/wep.",2013-04-22 +27699187,Transcriptome data and gene ontology analysis in human macrophages ingesting modified lipoproteins in the presence or absence of complement protein C1q.,"We characterized the transcriptional effects of complement opsonization on foam cell formation in human monocyte-derived macrophages (HMDM). RNA-sequencing was used to identify the pathways modulated by complement protein C1q during HMDM ingestion of the atherogenic lipoproteins oxidized low density lipoprotein (oxLDL) and acetylated low density lipoprotein (acLDL). All raw data were submitted to the MIAME-compliant database Gene Expression Omnibus (accession number GEO: GSE80442; http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE80442). Data presented here include Venn diagram overviews of up- and down-regulated genes for each condition tested, gene ontology analyses of biological processes, molecular functions and cellular components and KEGG pathway analysis. Further investigation of the pathways modulated by C1q in HMDM during ingestion of atherogenic lipoproteins and their functional relevance are described in ""Macrophage molecular signaling and inflammatory responses during ingestion of atherogenic lipoproteins are modulated by complement protein C1q"" (M.M. Ho, A. Manughian-Peter, W.R. Spivia, A. Taylor, D.A. Fraser, 2016) [1].",2016-09-14 +30257747,Where do Personalized Health Technologies stand today?,"In June 2018, experts met at ETH Zurich to discuss technological challenges in advanced cell systems, variant interpretation, novel therapeutics and the integration of clinical data. The translation of these technologies into innovative clinical approaches in oncology, immunology, infectious diseases, neurology and cardiology will be a challenge for the future. Detailed information at http://www.personalizedhealth.nexus.ethz.ch/.",2018-09-01 +30187772,Environmental Chemicals in Breast Milk and Formula: Exposure and Risk Assessment Implications.,"

Background

Human health risk assessment methods have advanced in recent years to more accurately estimate risks associated with exposure during childhood. However, predicting risks related to infant exposures to environmental chemicals in breast milk and formula remains challenging.

Objectives

Our goal was to compile available information on infant exposures to environmental chemicals in breast milk and formula, describe methods to characterize infant exposure and potential for health risk in the context of a risk assessment, and identify research needed to improve risk analyses based on this type of exposure and health risk information.

Methods

We reviewed recent literature on levels of environmental chemicals in breast milk and formula, with a focus on data from the United States. We then selected three example publications that quantified infant exposure using breast milk or formula chemical concentrations and estimated breast milk or formula intake. The potential for health risk from these dietary exposures was then characterized by comparison with available health risk benchmarks. We identified areas of this approach in need of improvement to better characterize the potential for infant health risk from this critical exposure pathway.

Discussion

Measurements of chemicals in breast milk and formula are integral to the evaluation of risk from early life dietary exposures to environmental chemicals. Risk assessments may also be informed by research investigating the impact of chemical exposure on developmental processes known to be active, and subject to disruption, during infancy, and by analysis of exposure-response data specific to the infant life stage. Critical data gaps exist in all of these areas.

Conclusions

Better-designed studies are needed to characterize infant exposures to environmental chemicals in breast milk and infant formula as well as to improve risk assessments of chemicals found in both foods. https://doi.org/10.1289/EHP1953.",2018-09-01 +29950014,Haplotype phasing in single-cell DNA-sequencing data.,"

Motivation

Current technologies for single-cell DNA sequencing require whole-genome amplification (WGA), as a single cell contains too little DNA for direct sequencing. Unfortunately, WGA introduces biases in the resulting sequencing data, including non-uniformity in genome coverage and high rates of allele dropout. These biases complicate many downstream analyses, including the detection of genomic variants.

Results

We show that amplification biases have a potential upside: long-range correlations in rates of allele dropout provide a signal for phasing haplotypes at the lengths of amplicons from WGA, lengths which are generally longer than than individual sequence reads. We describe a statistical test to measure concurrent allele dropout between single-nucleotide polymorphisms (SNPs) across multiple sequenced single cells. We use results of this test to perform haplotype assembly across a collection of single cells. We demonstrate that the algorithm predicts phasing between pairs of SNPs with higher accuracy than phasing from reads alone. Using whole-genome sequencing data from only seven neural cells, we obtain haplotype blocks that are orders of magnitude longer than with sequence reads alone (median length 10.2 kb versus 312 bp), with error rates <2%. We demonstrate similar advantages on whole-exome data from 16 cells, where we obtain haplotype blocks with median length 9.2 kb-comparable to typical gene lengths-compared with median lengths of 41 bp with sequence reads alone, with error rates <4%. Our algorithm will be useful for haplotyping of rare alleles and studies of allele-specific somatic aberrations.

Availability and implementation

Source code is available at https://www.github.com/raphael-group.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +30425802,iGHBP: Computational identification of growth hormone binding proteins from sequences using extremely randomised tree.,"A soluble carrier growth hormone binding protein (GHBP) that can selectively and non-covalently interact with growth hormone, thereby acting as a modulator or inhibitor of growth hormone signalling. Accurate identification of the GHBP from a given protein sequence also provides important clues for understanding cell growth and cellular mechanisms. In the postgenomic era, there has been an abundance of protein sequence data garnered, hence it is crucial to develop an automated computational method which enables fast and accurate identification of putative GHBPs within a vast number of candidate proteins. In this study, we describe a novel machine-learning-based predictor called iGHBP for the identification of GHBP. In order to predict GHBP from a given protein sequence, we trained an extremely randomised tree with an optimal feature set that was obtained from a combination of dipeptide composition and amino acid index values by applying a two-step feature selection protocol. During cross-validation analysis, iGHBP achieved an accuracy of 84.9%, which was ~7% higher than the control extremely randomised tree predictor trained with all features, thus demonstrating the effectiveness of our feature selection protocol. Furthermore, when objectively evaluated on an independent data set, our proposed iGHBP method displayed superior performance compared to the existing method. Additionally, a user-friendly web server that implements the proposed iGHBP has been established and is available at http://thegleelab.org/iGHBP.",2018-10-24 +31039322,A Scoping Review of the Involvement of Children's Communication Partners in Aided Augmentative and Alternative Communication Modeling Interventions.,"Purpose The purpose of this study was to inform practice and research by identifying and synthesizing research on interventions in which natural communication partners implemented aided augmentative and alternative communication (AAC) modeling strategies. Method A scoping review yielded 29 studies. Data were charted related to participant characteristics, intervention characteristics, partner instruction and assessment, and partner perspectives of social validity. Results More than 157 peer and 100 adult communication partners (e.g., parents, special educators, paraprofessionals) implemented aided AAC modeling strategies within included studies. To teach communication partners intervention strategies, researchers frequently reported using (a) oral instruction, (b) modeling, and (c) practice or application opportunities with performance feedback. Partner instruction frequently involved both training and concurrent support (e.g., coaching, facilitation, consultation, follow-up support). Conclusion Findings from this review inform the design and delivery of aided AAC modeling interventions by children's natural communication partners. Findings also highlight important avenues for enhancing the rigor of future research on interventions involving aided AAC modeling, including the quality of reporting and application of principles from implementation science. Supplemental Material https://doi.org/10.23641/asha.8038505.",2019-04-30 +29484673,The DHS Program's Modeled Surfaces Spatial Datasets.,"Spatially interpolated map surface datasets for key development indicators are being produced and publicly shared using population-based surveys from the USAID-funded Demographic and Health Survey (DHS) Program. Each modeled surface is produced with standardized geostatistical modeling methods. For each indicator, a package is available that includes spatial raster grids of 5 × 5 km pixels for the point estimate surface and an uncertainty surface, along with validation statistics and other model diagnostic data. The maps are publicly available for download on the DHS Program Spatial Data Repository at http://spatialdata.dhsprogram.com/. The modeled surfaces are produced with publicly available geo-referenced data on each indicator as collected by the DHS Program, augmented with other relevant spatial data sources that act as covariates. A Bayesian model-based geostatistical (MBG) approach is used to generate the modeled surfaces. Spatially modeled surfaces can be used to support and improve decision-making at multiple levels within many development programs including health, population, family planning, nutrition, and water and sanitation. The modeled surfaces can be used in their original 5 × 5 km pixel format, operationalized to other geographic areas as relevant for the program, or linked to DHS or other survey data for additional analysis.",2018-02-27 +28742036,Study of Temporal Effects on Subjective Video Quality of Experience.,"HTTP adaptive streaming is being increasingly deployed by network content providers, such as Netflix and YouTube. By dividing video content into data chunks encoded at different bitrates, a client is able to request the appropriate bitrate for the segment to be played next based on the estimated network conditions. However, this can introduce a number of impairments, including compression artifacts and rebuffering events, which can severely impact an end-user's quality of experience (QoE). We have recently created a new video quality database, which simulates a typical video streaming application, using long video sequences and interesting Netflix content. Going beyond previous efforts, the new database contains highly diverse and contemporary content, and it includes the subjective opinions of a sizable number of human subjects regarding the effects on QoE of both rebuffering and compression distortions. We observed that rebuffering is always obvious and unpleasant to subjects, while bitrate changes may be less obvious due to content-related dependencies. Transient bitrate drops were preferable over rebuffering only on low complexity video content, while consistently low bitrates were poorly tolerated. We evaluated different objective video quality assessment algorithms on our database and found that objective video quality models are unreliable for QoE prediction on videos suffering from both rebuffering events and bitrate changes. This implies the need for more general QoE models that take into account objective quality models, rebuffering-aware information, and memory. The publicly available video content as well as metadata for all of the videos in the new database can be found at http://live.ece.utexas.edu/research/LIVE_NFLXStudy/nflx_index.html.",2017-07-20 +31246036,Refined Empirical Force Field to Model Protein-Self-Assembled Monolayer Interactions Based on AMBER14 and GAFF.,"Understanding protein interaction with material surfaces is important for the development of nanotechnological devices. The structures and dynamics of proteins can be studied via molecular dynamics (MD) if the protein-surface interactions can be accurately modeled. To answer this question, we computed the adsorption free energies of peptides (representing eleven different amino acids) on a hydrophobic self-assembled monolayer (CH3-SAM) and compared them to the benchmark experimental data set. Our result revealed that existing biomolecular force fields, GAFF and AMBER ff14sb, cannot reproduce the experimental peptide adsorption free energies by Wei and Latour (Langmuir, 2009, 25, 5637-5646). To obtain the improved force fields, we systematically tuned the Lennard-Jones parameters of selected amino acid sidechains and the functional group of SAM with repeated metadynamics and umbrella sampling simulations. The final parameter set has yielded a significant improvement in the free energy values with R = 0.83 and MSE = 0.65 kcal/mol. We applied the refined force field to predict the initial adsorption orientation of lysozyme on CH3-SAM. Two major orientations-face-down and face-up-were predicted. Our analysis on the protein structure, solvent accessible surface area, and binding of native ligand NAG3 suggested that lysozyme in the face-up orientation can remain active after initial adsorption. However, because of its weaker affinity (ΔΔG = 7.86 kcal/mol) for the ligand, the bioactivity of the protein is expected to reduce. Our work facilitates the use of MD for the study of protein-SAM systems. The refined force field compatible with GROMACS is available at https://cbbio.cis.um.edu.mo/software/SAMFF .",2019-07-10 +28349240,"The Human Gene Mutation Database: towards a comprehensive repository of inherited mutation data for medical research, genetic diagnosis and next-generation sequencing studies.","The Human Gene Mutation Database (HGMD®) constitutes a comprehensive collection of published germline mutations in nuclear genes that underlie, or are closely associated with human inherited disease. At the time of writing (March 2017), the database contained in excess of 203,000 different gene lesions identified in over 8000 genes manually curated from over 2600 journals. With new mutation entries currently accumulating at a rate exceeding 17,000 per annum, HGMD represents de facto the central unified gene/disease-oriented repository of heritable mutations causing human genetic disease used worldwide by researchers, clinicians, diagnostic laboratories and genetic counsellors, and is an essential tool for the annotation of next-generation sequencing data. The public version of HGMD ( http://www.hgmd.org ) is freely available to registered users from academic institutions and non-profit organisations whilst the subscription version (HGMD Professional) is available to academic, clinical and commercial users under license via QIAGEN Inc.",2017-03-27 +30131967,Superposition of artificial experimental error onto calculated time series: Construction of in-silico data sets.,"The data and complementary information presented here are related to the research in the article of ""https://doi.org/10.1016/j.cej.2018.01.027; Chem. Eng. J., 342, 41-51 (2018)"", where sets of in-silico data are constructed to show a novel method for parameter estimation in biodiesel production from triglycerides (Heynderickx et al., 2018) [1]. In this paper, the method for the used error superposition is explained and in order to ensure a ready reproduction by the reader, this work presents the basic steps for superposition of a normally distributed error via a simple Excel® datasheet file.",2018-05-18 +27490513,"Prevalence of Amyotrophic Lateral Sclerosis - United States, 2012-2013.","

Problem/condition

Amyotrophic lateral sclerosis (ALS), commonly known as Lou Gehrig's disease, is a progressive and fatal neuromuscular disease for which no cure or viable treatment has been identified. ALS, like most noncommunicable diseases, is not a nationally notifiable disease in the United States. The prevalence of ALS in the United States during 2010-2011 was estimated to be 3.9 cases per 100,000 persons in the general population. Updated prevalence estimates are needed to help monitor disease status, better understand etiology, and identify risk factors for ALS.

Period covered

2012-2013.

Description of system

The National ALS Registry, established in 2009, collects data on ALS patients in the United States to better describe the incidence and prevalence of ALS, examine risk factors such as environmental and occupational exposures, and characterize the demographics of those living with ALS. To identify prevalent cases of ALS, data are compiled from four national administrative databases (maintained by the Centers for Medicare and Medicaid Services, the Veterans Health Administration, and the Veterans Benefits Administration). To identify cases not included in these databases and to better understand risk-factors associated with ALS and disease progression, the Registry also includes data that are collected from patients who voluntarily enroll and complete online surveys.

Results

During 2012 and 2013, the Registry identified 14,713 and 15,908 persons, respectively, who met the surveillance case definition of ALS. The estimated ALS prevalence rate was 4.7 cases per 100,000 U.S. population for 2012 and 5.0 per 100,000 for 2013. Due to revisions to the algorithm and use of death data from the National Death Index, an updated prevalence estimate has been calculated retrospectively for October 19, 2010-December 31, 2011. This updated estimate showed a prevalence rate of 4.3 per 100,000 population and a total of 13,282 cases. Since the inception of the Registry, the pattern of characteristics (e.g., age, sex, and race/ethnicity) among persons with ALS have remained unchanged. Overall, ALS was more common among whites, males, and persons aged 60-69 years. The age groups with the lowest number of ALS cases were persons aged 18-39 years and those aged ≥80 years. Males had a higher prevalence rate of ALS than females overall and across all data sources. These findings remained consistent during October 2010-December 2013.

Interpretation

The Registry is the only available data source that can be used to estimate the national prevalence for ALS in the United States. Use of both administrative national databases and self-report from patients enables a comprehensive approach to estimate ALS prevalence. The overall increase in the prevalence rate from 4.3 per 100,000 persons (revised) during 2010-2011 to 4.7 and 5.0 per 100,000 persons, respectively, during 2012-2013 likely is not an actual increase in the number of ALS cases. Rather, this increase might be attributed to improved case ascertainment due to the refinement of the algorithm used to identify definite ALS cases, along with an increased public awareness of the Registry. Registry estimates of ALS prevalence are consistent with findings from long-established ALS registries in Europe and from smaller-scale epidemiologic studies previously conducted in the United States.

Public health actions

Data collected by the National ALS Registry are being used to better describe the epidemiology of ALS in the United States and to help facilitate research. The combined approach of using national administrative databases and a self-enrollment web portal to collect data is novel and potentially could be used for other non-notifiable diseases such as Parkinson's disease or multiple sclerosis. Increased public awareness of the Registry might lead to more ALS cases being identified from the secure web portal (https://www.cdc.gov/als), which can ascertain cases apart from the national administrative databases. For example, in 2014, the ALS Ice Bucket Challenge, a social media-centered campaign, received extensive public visibility and created increased awareness of ALS. The Agency for Toxic Substances and Disease Registry (ATSDR) works closely with ALS advocacy and support groups, researchers, health care professionals, and others to promote the National ALS Registry and to identify all cases of ALS in the United States. In addition to estimating the prevalence of ALS, the Registry is being used to collect specimens from patient enrollees through a new biorepository, connect patient enrollees with new clinical trials and epidemiologic studies, and fund studies to help learn more about the etiology of ALS. Additional information about the National ALS Registry is available at http://www.cdc.gov/als or by calling toll-free at 1-877-442-9719.",2016-08-05 +32158378,Silver surfers from a European perspective: technology communication usage among European seniors.,"Filling a gap in our understanding of how senior citizens use information and communication technologies (ICTs), we identified several profiles of technology communication use among European seniors (aged 65+). These profiles include: Digitally immersed communicators, Asynchronous communicators and Phone enjoyers. We outline the importance of a broader distinction, one that surpasses the non-user and user dichotomy, and explores the singularities of the seniors who overcome the challenge of adopting and using ICT. We consider the digital divide concept as a starting point for the theoretical background that we reviewed in order to explain the process through which senior citizens accept and adopt this technology. Analysing data gathered within the Eurobarometer (Standard Eurobarometer 84 Autumn 2015-media use in the European Union. https://dbk.gesis.org/dbksearch/sdesc2.asp?no=6642, 2015), we applied K-Means Cluster analysis and discriminant analysis in order to identify three types of older Internet users. We run the analysis on a sample of 4404 respondents aged between 65 and 99 years. Our results help with increasing the adequacy of Digital Single Market policies for European seniors, as well as with more suitably targeting senior for social care and medical care programmes in the digital environment. Providing suggestions for further research, we argue for an in-depth classification of ICT users, based on characteristics such as gender, education, ethnicity or social class.",2019-06-18 +30343393,Trajectories of childhood BMI and adult diabetes: the Bogalusa Heart Study.,"

Aims/hypothesis

The aim of this study was to characterise longitudinal profiles of BMI from childhood and to examine the impact of level-independent childhood BMI trajectories on adult type 2 diabetes.

Methods

The longitudinal cohort consisted of 2449 adults (1613 white and 836 black) who had their BMI measured between four and 15 times from childhood (4-19 years) to adulthood (20-51 years) and fasting glucose measured in adulthood. Model-estimated levels and linear slopes of BMI at childhood age points were calculated in 1-year intervals using growth-curve parameters and their first derivatives, respectively.

Results

BMI from childhood to adulthood fit cubic growth curves; linear and non-linear curve parameters differed significantly between race-sex groups. BMI showed race and sex differences from 15 years onwards. Individuals with hyperglycaemia had higher long-term BMI levels than those who were normoglycaemic in race-sex groups. Linear and non-linear slope parameters of BMI differed consistently and significantly between adult hyperglycaemia groups. The OR of childhood BMI levels for ages 4-19 years was 1.45-1.83 (p < 0.001 for all) for adult hyperglycaemia after adjustment for confounders. Level-adjusted linear slopes of BMI at ages 10-19 years showed significantly positive associations with adult hyperglycaemia (OR 1.17-1.50, p < 0.01 for all). The associations of childhood BMI linear slopes with adult hyperglycaemia were not significant during the age period 5-9 years. The trends in these associations were consistent across race-sex groups.

Conclusions/interpretation

These observations indicate that childhood BMI trajectories have a significant impact on adult diabetes, independent of BMI levels. The adolescence age period is a crucial window for the development of diabetes in later life, which has implications for early-life prevention.

Data availability

All data and materials are publicly available at the National Heart, Lung, and Blood Institute (NHLBI) Biologic Specimen and Data Repository and can be accessed at https://biolincc.nhlbi.nih.gov/studies/bhs .",2018-10-20 +26653323,iTAP: integrated transcriptomics and phenotype database for stress response of Escherichia coli and Saccharomyces cerevisiae.,"

Background

Organisms are subject to various stress conditions, which affect both the organism's gene expression and phenotype. It is critical to understand microbial responses to stress conditions and uncover the underlying molecular mechanisms. To this end, it is necessary to build a database that collects transcriptomics and phenotypic data of microbes growing under various stress factors for in-depth systems biology analysis. Despite of numerous databases that collect gene expression profiles, to our best knowledge, there are few, if any, databases that collect both transcriptomics and phenotype data simultaneously. In light of this, we have developed an open source, web-based database, namely integrated transcriptomics and phenotype (iTAP) database, that records and links the transcriptomics and phenotype data for two model microorganisms, Escherichia coli and Saccharomyces cerevisiae in response to exposure of various stress conditions.

Results

To collect the data, we chose relevant research papers from the PubMed database containing all the necessary information for data curation including experimental conditions, transcriptomics data, and phenotype data. The transcriptomics data, including the p value and fold change, were obtained through the comparison of test strains against control strains using Gene Expression Omnibus's GEO2R analyzer. The phenotype data, including the cell growth rate and the productivity, volumetric rate, and mass-based yield of byproducts, were calculated independently from charts or graphs within the reference papers. Since the phenotype data was never reported in a standardized format, the curation of correlated transcriptomics-phenotype datasets became extremely tedious and time-consuming. Despite the challenges, till now, we successfully correlated 57 and 143 datasets of transcriptomics and phenotype for E. coli and S. cerevisiae, respectively, and applied a regression model within the iTAP database to accurately predict over 93 and 73 % of the growth rates of E. coli and S. cerevisiae, respectively, directly from the transcriptomics data.

Conclusion

This is the first time that transcriptomics and phenotype data are categorized and correlated in an open-source database. This allows biologists to access the database and utilize it to predict the phenotype of microorganisms from their transcriptomics data. The iTAP database is freely available at https://sites.google.com/a/vt.edu/biomolecular-engineering-lab/software .",2015-12-12 +30584360,"Distinct prognosis of mRNA expression of the five RecQ DNA-helicase family members - RECQL, BLM, WRN, RECQL4, and RECQL5 - in patients with breast cancer.","

Background

Five RecQ helicase family members have a role in maintaining genome stability. However, their prognostic roles in breast cancer remain unknown. We aimed to investigate the prognostic values of the RecQ family and clinical outcomes in breast cancer.

Methods

We used the Kaplan-Meier Plotter database (http://kmplot.com/analysis) to analyze prognostic values of RecQ-family mRNA expression in all breast cancers and in different intrinsic subtypes and clinicopathological characteristics. Protein-expression levels of WRN and RECQL4 were confirmed by immunohistochemistry (IHC) in breast cancer tissues.

Results

Increased expression of RECQL mRNA was significantly associated with reduced relapse-free survival (RFS) and postprogression survival (PPS) in all breast cancers, and improved overall survival (OS) in patients with basal-like breast cancer and in mutant-p53-type breast cancer patients. Increased expression of BLM mRNA was correlated with reduced distant metastasis-free survival (DMFS) in all patients. Increased expression of WRN mRNA was associated with improved OS and RFS in breast cancer patients. Increased expression of RECQL4 mRNA was associated with reduced OS, DMFS, and RFS in all breast cancers, and with reduced OS in patients with luminal A, HER2-positive, ER-positive, and PR-positive breast cancer. Increased expression of RECQL5 mRNA was associated with improved RFS in all patients, and with improved OS in patients with lymph-node-negative breast cancer, but with reduced OS in patients with HER2-positive breast cancer. IHC staining confirmed that high expression of WRN was correlated with increased OS and high expression of RECQL4 associated with reduced OS at protein levels.

Conclusion

mRNA-expression levels of RecQ members were significantly correlated with prognosis in breast cancer patients. These preliminary findings require further study to determine whether RecQ-targeting reagents might be developed for clinical application in breast cancer.",2018-12-05 +30030804,Implementing a Transcription Factor Interaction Prediction System Using the GenoMetric Query Language.,"Novel technologies and growing interest have resulted in a large increase in the amount of data available for genomics and transcriptomics studies, both in terms of volume and contents. Biology is relying more and more on computational methods to process, investigate, and extract knowledge from this huge amount of data. In this work, we present the TICA web server (available at http://www.gmql.eu/tica/ ), a fast and compact tool developed to support data-driven knowledge discovery in the realm of transcription factor interaction prediction. TICA leverages both the GenoMetric Query Language, a novel query tool (based on the Apache Hadoop and Spark technologies) specialized in the integration and management of heterogeneous, large genomic datasets, and a statistical method for robust detection of co-locations across interval-based data, in order to infer physically interacting transcription factors. Notably, TICA allows investigators to upload and analyze their own ChIP-seq experiments datasets, comparing them both against ENCODE data or between themselves, achieving computation time which increases linearly with respect to dataset size and density. Using ENCODE data from three well-studied cell lines as reference, we show that TICA predictions are supported by existing biological knowledge, making the web server a reliable and efficient tool for interaction screening and data-driven hypothesis generation.",2018-01-01 +,Ecology and spatial patterns of large-scale vegetation units within the central Namib Desert,"This article offers a review of published knowledge and a new state-of-the-art analysis regarding the floristic composition, the functional composition and the plant communities found in the central Namib Desert. At the same time, this paper contributes to the understanding of the relationship between the plant species composition of the central Namib Desert and the prevailing environmental gradients, with an emphasis on diversity and ecology in space and time. This article builds on three thematic foci. The first focus (1) lies on the present knowledge of the composition and the characteristics of the flora. A comprehensive floristic database has been compiled based on all available sources. A second focus (2) lies on the characterization and spatial distribution of the vegetation units. Therefore, we created a new vegetation classification based on a unique vegetation-plot database (http://www.givd.info/ID/AF-00-007) and additional data summing up to 2000 relevés, resulting in 21 large-scale vegetation classes. Using a supervised classification approach based on the vegetation classification, remote sensing and environmental data, we were able to produce a new vegetation map of the Central Namib. This was updated using expert knowledge, field visits and through manual preprocessing. With the third focus (3) we explore the spatial patterns of the previous foci and discuss their relation to environmental parameters and gradients.",2013-06-01 +29771388,GWAS4D: multidimensional analysis of context-specific regulatory variant for human complex diseases and traits.,"Genome-wide association studies have generated over thousands of susceptibility loci for many human complex traits, and yet for most of these associations the true causal variants remain unknown. Tissue/cell type-specific prediction and prioritization of non-coding regulatory variants will facilitate the identification of causal variants and underlying pathogenic mechanisms for particular complex diseases and traits. By leveraging recent large-scale functional genomics/epigenomics data, we develop an intuitive web server, GWAS4D (http://mulinlab.tmu.edu.cn/gwas4d or http://mulinlab.org/gwas4d), that systematically evaluates GWAS signals and identifies context-specific regulatory variants. The updated web server includes six major features: (i) updates the regulatory variant prioritization method with our new algorithm; (ii) incorporates 127 tissue/cell type-specific epigenomes data; (iii) integrates motifs of 1480 transcriptional regulators from 13 public resources; (iv) uniformly processes Hi-C data and generates significant interactions at 5 kb resolution across 60 tissues/cell types; (v) adds comprehensive non-coding variant functional annotations; (vi) equips a highly interactive visualization function for SNP-target interaction. Using a GWAS fine-mapped set for 161 coronary artery disease risk loci, we demonstrate that GWAS4D is able to efficiently prioritize disease-causal regulatory variants.",2018-07-01 +29498022,Correction to: Interior Immigration Enforcement and Political Participation of U.S. Citizens in Mixed-Status Households.,"Ruggles, S., Genadek, K., Goeken, R., Grover, J., & and Sobek, M. (2017). Integrated Public Use Microdata Series: Version 7.0 [Data set]. Minneapolis: University of Minnesota. https://doi.org/10.18128/D010.V7.0.",2018-08-01 +27188311,A comprehensive database of high-throughput sequencing-based RNA secondary structure probing data (Structure Surfer).,"

Background

RNA molecules fold into complex three-dimensional shapes, guided by the pattern of hydrogen bonding between nucleotides. This pattern of base pairing, known as RNA secondary structure, is critical to their cellular function. Recently several diverse methods have been developed to assay RNA secondary structure on a transcriptome-wide scale using high-throughput sequencing. Each approach has its own strengths and caveats, however there is no widely available tool for visualizing and comparing the results from these varied methods.

Methods

To address this, we have developed Structure Surfer, a database and visualization tool for inspecting RNA secondary structure in six transcriptome-wide data sets from human and mouse ( http://tesla.pcbi.upenn.edu/strucuturesurfer/ ). The data sets were generated using four different high-throughput sequencing based methods. Each one was analyzed with a scoring pipeline specific to its experimental design. Users of Structure Surfer have the ability to query individual loci as well as detect trends across multiple sites.

Results

Here, we describe the included data sets and their differences. We illustrate the database's function by examining known structural elements and we explore example use cases in which combined data is used to detect structural trends.

Conclusions

In total, Structure Surfer provides an easy-to-use database and visualization interface for allowing users to interrogate the currently available transcriptome-wide RNA secondary structure information for mammals.",2016-05-17 +28492865,The Relationship Between Iron Deficiency Anemia and Sensorineural Hearing Loss in the Pediatric and Adolescent Population.,"

Purpose

A correlation between iron deficiency anemia (IDA) and sudden sensorineural hearing loss (SNHL) was described in adults. In this study, we examined if there is a relationship between IDA and hearing loss in the pediatric population.

Method

This was a retrospective cohort study of data collected from the Informatics for Integrating Biology and the Bedside database from 2011 to 2016. Children and adolescents 4-21 years old seen at Penn State Milton S. Hershey Medical Center, Hershey, PA, were examined for hearing loss and IDA status. Hearing loss was determined by International Classification of Disease-9 and -10 codes, and IDA was determined by both low hemoglobin and serum ferritin levels for age and sex.

Results

We identified 20,113 patients. Prevalence of hearing loss and IDA was 1.7% and 2.3%, respectively. The prevalence of all hearing loss was 3.0% in the IDA cohort and 1.7% in those without IDA. Children and adolescents with IDA are at increased odds of developing SNHL (adjusted odds ratio: 3.67, 95% CI [1.60-7.30]).

Conclusions

Children with IDA demonstrate increased likelihood of SNHL. Although correction of IDA in those with hearing loss has yet to be linked to improvements in hearing outcomes, screening for and correcting IDA among pediatric patients will positively affect overall health status.Supplemental Material: https://doi.org/10.23641/asha.5087071.",2017-06-01 +23934791,Protein models: the Grand Challenge of protein docking.,"Characterization of life processes at the molecular level requires structural details of protein-protein interactions (PPIs). The number of experimentally determined protein structures accounts only for a fraction of known proteins. This gap has to be bridged by modeling, typically using experimentally determined structures as templates to model related proteins. The fraction of experimentally determined PPI structures is even smaller than that for the individual proteins, due to a larger number of interactions than the number of individual proteins, and a greater difficulty of crystallizing protein-protein complexes. The approaches to structural modeling of PPI (docking) often have to rely on modeled structures of the interactors, especially in the case of large PPI networks. Structures of modeled proteins are typically less accurate than the ones determined by X-ray crystallography or nuclear magnetic resonance. Thus the utility of approaches to dock these structures should be assessed by thorough benchmarking, specifically designed for protein models. To be credible, such benchmarking has to be based on carefully curated sets of structures with levels of distortion typical for modeled proteins. This article presents such a suite of models built for the benchmark set of the X-ray structures from the Dockground resource (http://dockground.bioinformatics.ku.edu) by a combination of homology modeling and Nudged Elastic Band method. For each monomer, six models were generated with predefined C(α) root mean square deviation from the native structure (1, 2, …, 6 Å). The sets and the accompanying data provide a comprehensive resource for the development of docking methodology for modeled proteins.",2013-10-17 +22139932,"HmtDB, a genomic resource for mitochondrion-based human variability studies.","HmtDB (http://www.hmtdb.uniba.it:8080/hmdb) is a open resource created to support population genetics and mitochondrial disease studies. The database hosts human mitochondrial genome sequences annotated with population and variability data, the latter being estimated through the application of the SiteVar software based on site-specific nucleotide and amino acid variability calculations. The annotations are manually curated thus adding value to the quality of the information provided to the end-user. Classifier tools implemented in HmtDB allow the prediction of the haplogroup for any human mitochondrial genome currently stored in HmtDB or externally submitted de novo by an end-user. Haplogroup definition is based on the Phylotree system. End-users accessing HmtDB are hence allowed to (i) browse the database through the use of a multi-criterion 'query' system; (ii) analyze their own human mitochondrial sequences via the 'classify' tool (for complete genomes) or by downloading the 'fragment-classifier' tool (for partial sequences); (iii) download multi-alignments with reference genomes as well as variability data.",2011-12-01 +29996917,A new bioinformatics tool to help assess the significance of BRCA1 variants.,"

Background

Germline pathogenic variants in the breast cancer type 1 susceptibility gene BRCA1 are associated with a 60% lifetime risk for breast and ovarian cancer. This overall risk estimate is for all BRCA1 variants; obviously, not all variants confer the same risk of developing a disease. In cancer patients, loss of BRCA1 function in tumor tissue has been associated with an increased sensitivity to platinum agents and to poly-(ADP-ribose) polymerase (PARP) inhibitors. For clinical management of both at-risk individuals and cancer patients, it would be important that each identified genetic variant be associated with clinical significance. Unfortunately for the vast majority of variants, the clinical impact is unknown. The availability of results from studies assessing the impact of variants on protein function may provide insight of crucial importance.

Results and conclusion

We have collected, curated, and structured the molecular and cellular phenotypic impact of 3654 distinct BRCA1 variants. The data was modeled in triple format, using the variant as a subject, the studied function as the object, and a predicate describing the relation between the two. Each annotation is supported by a fully traceable evidence. The data was captured using standard ontologies to ensure consistency, and enhance searchability and interoperability. We have assessed the extent to which functional defects at the molecular and cellular levels correlate with the clinical interpretation of variants by ClinVar submitters. Approximately 30% of the ClinVar BRCA1 missense variants have some molecular or cellular assay available in the literature. Pathogenic variants (as assigned by ClinVar) have at least some significant functional defect in 94% of testable cases. For benign variants, 77% of ClinVar benign variants, for which neXtProt Cancer variant portal has data, shows either no or mild experimental functional defects. While this does not provide evidence for clinical interpretation of variants, it may provide some guidance for variants of unknown significance, in the absence of more reliable data. The neXtProt Cancer variant portal ( https://www.nextprot.org/portals/breast-cancer ) contains over 6300 observations at the molecular and/or cellular level for BRCA1 variants.",2018-07-11 +30709337,Probing transcription factor combinatorics in different promoter classes and in enhancers.,"

Background

In eukaryotic cells, transcription factors (TFs) are thought to act in a combinatorial way, by competing and collaborating to regulate common target genes. However, several questions remain regarding the conservation of these combinations among different gene classes, regulatory regions and cell types.

Results

We propose a new approach named TFcoop to infer the TF combinations involved in the binding of a target TF in a particular cell type. TFcoop aims to predict the binding sites of the target TF upon the nucleotide content of the sequences and of the binding affinity of all identified cooperating TFs. The set of cooperating TFs and model parameters are learned from ChIP-seq data of the target TF. We used TFcoop to investigate the TF combinations involved in the binding of 106 TFs on 41 cell types and in four regulatory regions: promoters of mRNAs, lncRNAs and pri-miRNAs, and enhancers. We first assess that TFcoop is accurate and outperforms simple PWM methods for predicting TF binding sites. Next, analysis of the learned models sheds light on important properties of TF combinations in different promoter classes and in enhancers. First, we show that combinations governing TF binding on enhancers are more cell-type specific than that governing binding in promoters. Second, for a given TF and cell type, we observe that TF combinations are different between promoters and enhancers, but similar for promoters of mRNAs, lncRNAs and pri-miRNAs. Analysis of the TFs cooperating with the different targets show over-representation of pioneer TFs and a clear preference for TFs with binding motif composition similar to that of the target. Lastly, our models accurately distinguish promoters associated with specific biological processes.

Conclusions

TFcoop appears as an accurate approach for studying TF combinations. Its use on ENCODE and FANTOM data allowed us to discover important properties of human TF combinations in different promoter classes and in enhancers. The R code for learning a TFcoop model and for reproducing the main experiments described in the paper is available in an R Markdown file at address https://gite.lirmm.fr/brehelin/TFcoop .",2019-02-01 +28451979,Exploring Protein Function Using the Saccharomyces Genome Database.,"Elucidating the function of individual proteins will help to create a comprehensive picture of cell biology, as well as shed light on human disease mechanisms, possible treatments, and cures. Due to its compact genome, and extensive history of experimentation and annotation, the budding yeast Saccharomyces cerevisiae is an ideal model organism in which to determine protein function. This information can then be leveraged to infer functions of human homologs. Despite the large amount of research and biological data about S. cerevisiae, many proteins' functions remain unknown. Here, we explore ways to use the Saccharomyces Genome Database (SGD; http://www.yeastgenome.org ) to predict the function of proteins and gain insight into their roles in various cellular processes.",2017-01-01 +27352859,SorghumFDB: sorghum functional genomics database with multidimensional network analysis. ,"Sorghum (Sorghum bicolor [L.] Moench) has excellent agronomic traits and biological properties, such as heat and drought-tolerance. It is a C4 grass and potential bioenergy-producing plant, which makes it an important crop worldwide. With the sorghum genome sequence released, it is essential to establish a sorghum functional genomics data mining platform. We collected genomic data and some functional annotations to construct a sorghum functional genomics database (SorghumFDB). SorghumFDB integrated knowledge of sorghum gene family classifications (transcription regulators/factors, carbohydrate-active enzymes, protein kinases, ubiquitins, cytochrome P450, monolignol biosynthesis related enzymes, R-genes and organelle-genes), detailed gene annotations, miRNA and target gene information, orthologous pairs in the model plants Arabidopsis, rice and maize, gene loci conversions and a genome browser. We further constructed a dynamic network of multidimensional biological relationships, comprised of the co-expression data, protein-protein interactions and miRNA-target pairs. We took effective measures to combine the network, gene set enrichment and motif analyses to determine the key regulators that participate in related metabolic pathways, such as the lignin pathway, which is a major biological process in bioenergy-producing plants.Database URL: http://structuralbiology.cau.edu.cn/sorghum/index.html.",2016-06-26 +28522849,CarcinoPred-EL: Novel models for predicting the carcinogenicity of chemicals using molecular fingerprints and ensemble learning methods.,"Carcinogenicity refers to a highly toxic end point of certain chemicals, and has become an important issue in the drug development process. In this study, three novel ensemble classification models, namely Ensemble SVM, Ensemble RF, and Ensemble XGBoost, were developed to predict carcinogenicity of chemicals using seven types of molecular fingerprints and three machine learning methods based on a dataset containing 1003 diverse compounds with rat carcinogenicity. Among these three models, Ensemble XGBoost is found to be the best, giving an average accuracy of 70.1 ± 2.9%, sensitivity of 67.0 ± 5.0%, and specificity of 73.1 ± 4.4% in five-fold cross-validation and an accuracy of 70.0%, sensitivity of 65.2%, and specificity of 76.5% in external validation. In comparison with some recent methods, the ensemble models outperform some machine learning-based approaches and yield equal accuracy and higher specificity but lower sensitivity than rule-based expert systems. It is also found that the ensemble models could be further improved if more data were available. As an application, the ensemble models are employed to discover potential carcinogens in the DrugBank database. The results indicate that the proposed models are helpful in predicting the carcinogenicity of chemicals. A web server called CarcinoPred-EL has been built for these models ( http://ccsipb.lnu.edu.cn/toxicity/CarcinoPred-EL/ ).",2017-05-18 +31026367,eDiVA-Classification and prioritization of pathogenic variants for clinical diagnostics.,"Mendelian diseases have shown to be an and efficient model for connecting genotypes to phenotypes and for elucidating the function of genes. Whole-exome sequencing (WES) accelerated the study of rare Mendelian diseases in families, allowing for directly pinpointing rare causal mutations in genic regions without the need for linkage analysis. However, the low diagnostic rates of 20-30% reported for multiple WES disease studies point to the need for improved variant pathogenicity classification and causal variant prioritization methods. Here, we present the exome Disease Variant Analysis (eDiVA; http://ediva.crg.eu), an automated computational framework for identification of causal genetic variants (coding/splicing single-nucleotide variants and small insertions and deletions) for rare diseases using WES of families or parent-child trios. eDiVA combines next-generation sequencing data analysis, comprehensive functional annotation, and causal variant prioritization optimized for familial genetic disease studies. eDiVA features a machine learning-based variant pathogenicity predictor combining various genomic and evolutionary signatures. Clinical information, such as disease phenotype or mode of inheritance, is incorporated to improve the precision of the prioritization algorithm. Benchmarking against state-of-the-art competitors demonstrates that eDiVA consistently performed as a good or better than existing approach in terms of detection rate and precision. Moreover, we applied eDiVA to several familial disease cases to demonstrate its clinical applicability.",2019-05-21 +22140109,The Arabidopsis Information Resource (TAIR): improved gene annotation and new tools.,"The Arabidopsis Information Resource (TAIR, http://arabidopsis.org) is a genome database for Arabidopsis thaliana, an important reference organism for many fundamental aspects of biology as well as basic and applied plant biology research. TAIR serves as a central access point for Arabidopsis data, annotates gene function and expression patterns using controlled vocabulary terms, and maintains and updates the A. thaliana genome assembly and annotation. TAIR also provides researchers with an extensive set of visualization and analysis tools. Recent developments include several new genome releases (TAIR8, TAIR9 and TAIR10) in which the A. thaliana assembly was updated, pseudogenes and transposon genes were re-annotated, and new data from proteomics and next generation transcriptome sequencing were incorporated into gene models and splice variants. Other highlights include progress on functional annotation of the genome and the release of several new tools including Textpresso for Arabidopsis which provides the capability to carry out full text searches on a large body of research literature.",2011-12-02 +28070014,Lung Gene Expression Analysis (LGEA): an integrative web portal for comprehensive gene expression data analysis in lung development.,"'LungGENS', our previously developed web tool for mapping single-cell gene expression in the developing lung, has been well received by the pulmonary research community. With continued support from the 'LungMAP' consortium, we extended the scope of the LungGENS database to accommodate transcriptomics data from pulmonary tissues and cells from human and mouse at different stages of lung development. Lung Gene Expression Analysis (LGEA) web portal is an extended version of LungGENS useful for the analysis, display and interpretation of gene expression patterns obtained from single cells, sorted cell populations and whole lung tissues. The LGEA web portal is freely available at http://research.cchmc.org/pbge/lunggens/mainportal.html.",2017-01-09 +28605768,Surveying the Maize community for their diversity and pedigree visualization needs to prioritize tool development and curation. ,"The Maize Genetics and Genomics Database (MaizeGDB) team prepared a survey to identify breeders’ needs for visualizing pedigrees, diversity data and haplotypes in order to prioritize tool development and curation efforts at MaizeGDB. The survey was distributed to the maize research community on behalf of the Maize Genetics Executive Committee in Summer 2015. The survey garnered 48 responses from maize researchers, of which more than half were self-identified as breeders. The survey showed that the maize researchers considered their top priorities for visualization as: (i) displaying single nucleotide polymorphisms in a given region for a given list of lines, (ii) showing haplotypes for a given list of lines and (iii) presenting pedigree relationships visually. The survey also asked which populations would be most useful to display. The following two populations were on top of the list: (i) 3000 publicly available maize inbred lines used in Romay et al. (Comprehensive genotyping of the USA national maize inbred seed bank. Genome Biol, 2013;14:R55) and (ii) maize lines with expired Plant Variety Protection Act (ex-PVP) certificates. Driven by this strong stakeholder input, MaizeGDB staff are currently working in four areas to improve its interface and web-based tools: (i) presenting immediate progenies of currently available stocks at the MaizeGDB Stock pages, (ii) displaying the most recent ex-PVP lines described in the Germplasm Resources Information Network (GRIN) on the MaizeGDB Stock pages, (iii) developing network views of pedigree relationships and (iv) visualizing genotypes from SNP-based diversity datasets. These survey results can help other biological databases to direct their efforts according to user preferences as they serve similar types of data sets for their communities. https://www.maizegdb.org.",2017-01-01 +29608647,BART: a transcription factor prediction tool with query gene sets or epigenomic profiles.,"

Summary

Identification of functional transcription factors that regulate a given gene set is an important problem in gene regulation studies. Conventional approaches for identifying transcription factors, such as DNA sequence motif analysis, are unable to predict functional binding of specific factors and not sensitive enough to detect factors binding at distal enhancers. Here, we present binding analysis for regulation of transcription (BART), a novel computational method and software package for predicting functional transcription factors that regulate a query gene set or associate with a query genomic profile, based on more than 6000 existing ChIP-seq datasets for over 400 factors in human or mouse. This method demonstrates the advantage of utilizing publicly available data for functional genomics research.

Availability and implementation

BART is implemented in Python and available at http://faculty.virginia.edu/zanglab/bart.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-08-01 +29795799,Crowdsourcing Our National Gut. ,"The microbes of the human intestinal tract play a profound role in our health. The complex interactions between our gut microbial communities and the external environment, and the resulting functional consequences, can be difficult to disentangle. To address this problem, McDonald et al. (mSystems 3:e00031-18, 2018, https://doi.org/10.1128/mSystems.00031-18) present the first set of results from the American Gut Project, a citizen science-based data set currently comprised of over 10,000 gut microbiome samples and associated life history data. By combining this extensive data set with other published studies, the authors uncover novel relationships between gut microbiome structure and function. For example, they found that dietary plant diversity and recent antibiotic use predict both microbial and metabolomic diversity. McDonald et al. also demonstrate that there is high diversity across human gut microbiomes, even compared to the diversity of environmental microbiomes. The results from this study illuminate the potential of the citizen science approach to further our knowledge of host-associated microbial communities.",2018-05-15 +25428892,Genome-wide development of transposable elements-based markers in foxtail millet and construction of an integrated database.,"Transposable elements (TEs) are major components of plant genome and are reported to play significant roles in functional genome diversity and phenotypic variations. Several TEs are highly polymorphic for insert location in the genome and this facilitates development of TE-based markers for various genotyping purposes. Considering this, a genome-wide analysis was performed in the model plant foxtail millet. A total of 30,706 TEs were identified and classified as DNA transposons (24,386), full-length Copia type (1,038), partial or solo Copia type (10,118), full-length Gypsy type (1,570), partial or solo Gypsy type (23,293) and Long- and Short-Interspersed Nuclear Elements (3,659 and 53, respectively). Further, 20,278 TE-based markers were developed, namely Retrotransposon-Based Insertion Polymorphisms (4,801, ∼24%), Inter-Retrotransposon Amplified Polymorphisms (3,239, ∼16%), Repeat Junction Markers (4,451, ∼22%), Repeat Junction-Junction Markers (329, ∼2%), Insertion-Site-Based Polymorphisms (7,401, ∼36%) and Retrotransposon-Microsatellite Amplified Polymorphisms (57, 0.2%). A total of 134 Repeat Junction Markers were screened in 96 accessions of Setaria italica and 3 wild Setaria accessions of which 30 showed polymorphism. Moreover, an open access database for these developed resources was constructed (Foxtail millet Transposable Elements-based Marker Database; http://59.163.192.83/ltrdb/index.html). Taken together, this study would serve as a valuable resource for large-scale genotyping applications in foxtail millet and related grass species.",2014-11-26 +29971659,"Emotional Processing in Autism Spectrum Disorders: Effects of Age, Emotional Valence, and Social Engagement on Emotional Language Use.","Children with autism spectrum disorders (ASD) show deficits in reporting others' emotions (Lartseva et al. in Front Hum Neurosci 8:991, 2015) and in deriving meaning in social contexts (Klin et al. in Handbook of autism and pervasive developmental disorders, Wiley, Hoboken, 2005). However, researchers often use stimuli that conflate salient emotional and social information. Using a matched-pairs design, the impact of emotional and social information on emotional language in pre-school and school-age children, with and without ASD, was assessed with a picture description task comprising rated stimuli from the Pictures with Social Contexts and Emotional Scenes database (Teh et al. in Behav Res Methods, https://doi.org/10.3758/s13428-017-0947-x , 2017). Results showed both groups with ASD produced fewer emotional terms than typically developing children, but the effects were moderated by valence, social engagement, and age. Implications for theory and clinical practice are discussed.",2018-12-01 +29218589,StimulStat: A lexical database for Russian.,"In this article, we present StimulStat - a lexical database for the Russian language in the form of a web application. The database contains more than 52,000 of the most frequent Russian lemmas and more than 1.7 million word forms derived from them. These lemmas and forms are characterized according to more than 70 properties that were demonstrated to be relevant for psycholinguistic research, including frequency, length, phonological and grammatical properties, orthographic and phonological neighborhood frequency and size, grammatical ambiguity, homonymy and polysemy. Some properties were retrieved from various dictionaries and are presented collectively in a searchable form for the first time, the others were computed specifically for the database. The database can be accessed freely at http://stimul.cognitivestudies.ru .",2018-12-01 +26257768,Remote homology and the functions of metagenomic dark matter.,"Predicted open reading frames (ORFs) that lack detectable homology to known proteins are termed ORFans. Despite their prevalence in metagenomes, the extent to which ORFans encode real proteins, the degree to which they can be annotated, and their functional contributions, remain unclear. To gain insights into these questions, we applied sensitive remote-homology detection methods to functionally analyze ORFans from soil, marine, and human gut metagenome collections. ORFans were identified, clustered into sequence families, and annotated through profile-profile comparison to proteins of known structure. We found that a considerable number of metagenomic ORFans (73,896 of 484,121, 15.3%) exhibit significant remote homology to structurally characterized proteins, providing a means for ORFan functional profiling. The extent of detected remote homology far exceeds that obtained for artificial protein families (1.4%). As expected for real genes, the predicted functions of ORFans are significantly similar to the functions of their gene neighbors (p < 0.001). Compared to the functional profiles predicted through standard homology searches, ORFans show biologically intriguing differences. Many ORFan-enriched functions are virus-related and tend to reflect biological processes associated with extreme sequence diversity. Each environment also possesses a large number of unique ORFan families and functions, including some known to play important community roles such as gut microbial polysaccharide digestion. Lastly, ORFans are a valuable resource for finding novel enzymes of interest, as we demonstrate through the identification of hundreds of novel ORFan metalloproteases that all possess a signature catalytic motif despite a general lack of similarity to known proteins. Our ORFan functional predictions are a valuable resource for discovering novel protein families and exploring the boundaries of protein sequence space. All remote homology predictions are available at http://doxey.uwaterloo.ca/ORFans.",2015-07-21 +29717215,DNAp: A Pipeline for DNA-seq Data Analysis.,"Next-generation sequencing is empowering genetic disease research. However, it also brings significant challenges for efficient and effective sequencing data analysis. We built a pipeline, called DNAp, for analyzing whole exome sequencing (WES) and whole genome sequencing (WGS) data, to detect mutations from disease samples. The pipeline is containerized, convenient to use and can run under any system, since it is a fully automatic process in Docker container form. It is also open, and can be easily customized with user intervention points, such as for updating reference files and different software or versions. The pipeline has been tested with both human and mouse sequencing datasets, and it has generated mutations results, comparable to published results from these datasets, and reproducible across heterogeneous hardware platforms. The pipeline DNAp, funded by the US Food and Drug Administration (FDA), was developed for analyzing DNA sequencing data of FDA. Here we make DNAp an open source, with the software and documentation available to the public at http://bioinformatics.astate.edu/dna-pipeline/ .",2018-05-01 +26657633,The International Nucleotide Sequence Database Collaboration.,"The International Nucleotide Sequence Database Collaboration (INSDC; http://www.insdc.org) comprises three global partners committed to capturing, preserving and providing comprehensive public-domain nucleotide sequence information. The INSDC establishes standards, formats and protocols for data and metadata to make it easier for individuals and organisations to submit their nucleotide data reliably to public archives. This work enables the continuous, global exchange of information about living things. Here we present an update of the INSDC in 2015, including data growth and diversification, new standards and requirements by publishers for authors to submit their data to the public archives. The INSDC serves as a model for data sharing in the life sciences.",2015-12-10 +31278539,Core curriculum online lecture series in musculoskeletal imaging: initial results.,"

Objective

To augment the educational resources available to training programs and trainees in musculoskeletal (MSK) radiology by creating a comprehensive series of Web-based open-access core curriculum lectures.

Materials and methods

Speakers with recognized content and lecturing expertise in MSK radiology were invited to create digitally recorded lecture presentations across a series of 42 core curriculum topics in MSK imaging. Resultant presentation recordings, organized under curriculum subject headings, were archived as open-access video file recordings for online viewing on a dedicated Web page (http://radiologycorelectures.org/msk/). Information regarding the online core curriculum lecture series was distributed to members of the International Skeletal Society, Society of Skeletal Radiology, Society of Chairs of Academic Radiology Departments, and the Association of Program Directors in Radiology. Web page and online lecture utilization data were collected using Google Analytics (Alphabet, Mountain View, CA, USA).

Results

Forty-two lectures, by 38 speakers, were recorded, edited and hosted online. Lectures spanned ACGME curriculum categories of musculoskeletal trauma, arthritis, metabolic diseases, marrow, infection, tumors, imaging of internal derangement of joints, congenital disorders, and orthopedic imaging. Online access to the core curriculum lectures was opened on March 4, 2018. As of January 20, 2019, the core curriculum lectures have had 77,573 page views from 34,977 sessions.

Conclusions

To date, the MSK core curriculum lecture series lectures have been widely accessed and viewed. It is envisioned that the initial success of the project will serve to promote ongoing content renewal and expansion to the lecture materials over time.",2019-07-05 +31278492,Automated feature engineering improves prediction of protein-protein interactions.,"Over the last decade, various machine learning (ML) and statistical approaches for protein-protein interaction (PPI) predictions have been developed to help annotating functional interactions among proteins, essential for our system-level understanding of life. Efficient ML approaches require informative and non-redundant features. In this paper, we introduce novel types of expert-crafted sequence, evolutionary and graph features and apply automatic feature engineering to further expand feature space to improve predictive modeling. The two-step automatic feature-engineering process encompasses the hybrid method for feature generation and unsupervised feature selection, followed by supervised feature selection through a genetic algorithm (GA). The optimization of both steps allows the feature-engineering procedure to operate on a large transformed feature space with no considerable computational cost and to efficiently provide newly engineered features. Based on GA and correlation filtering, we developed a stacking algorithm GA-STACK for automatic ensembling of different ML algorithms to improve prediction performance. We introduced a unified method, HP-GAS, for the prediction of human PPIs, which incorporates GA-STACK and rests on both expert-crafted and 40% of newly engineered features. The extensive cross validation and comparison with the state-of-the-art methods showed that HP-GAS represents currently the most efficient method for proteome-wide forecasting of protein interactions, with prediction efficacy of 0.93 AUC and 0.85 accuracy. We implemented the HP-GAS method as a free standalone application which is a time-efficient and easy-to-use tool. HP-GAS software with supplementary data can be downloaded from: http://www.vinca.rs/180/tools/HP-GAS.php .",2019-07-05 +30239879,Trips-Viz: a transcriptome browser for exploring Ribo-Seq data.,"Ribosome profiling (Ribo-Seq) is a technique that allows for the isolation and sequencing of mRNA fragments protected from nuclease digestion by actively translating ribosomes. Mapping these ribosome footprints to a genome or transcriptome generates quantitative information on translated regions. To provide access to publicly available ribosome profiling data in the context of transcriptomes we developed Trips-Viz (transcriptome-wide information on protein synthesis-visualized). Trips-Viz provides a large range of graphical tools for exploring global properties of translatomes and of individual transcripts. It enables analysis of aligned footprints to evaluate datasets quality, differential gene expression detection, visual identification of upstream ORFs and alternative proteoforms. Trips-Viz is available at https://trips.ucc.ie.",2019-01-01 +27789699,Proteome-pI: proteome isoelectric point database.,"Proteome-pI is an online database containing information about predicted isoelectric points for 5029 proteomes calculated using 18 methods. The isoelectric point, the pH at which a particular molecule carries no net electrical charge, is an important parameter for many analytical biochemistry and proteomics techniques, especially for 2D gel electrophoresis (2D-PAGE), capillary isoelectric focusing, liquid chromatography-mass spectrometry and X-ray protein crystallography. The database, available at http://isoelectricpointdb.org allows the retrieval of virtual 2D-PAGE plots and the development of customised fractions of proteome based on isoelectric point and molecular weight. Moreover, Proteome-pI facilitates statistical comparisons of the various prediction methods as well as biological investigation of protein isoelectric point space in all kingdoms of life. For instance, using Proteome-pI data, it is clear that Eukaryotes, which evolved tight control of homeostasis, encode proteins with pI values near the cell pH. In contrast, Archaea living frequently in extreme environments can possess proteins with a wide range of isoelectric points. The database includes various statistics and tools for interactive browsing, searching and sorting. Apart from data for individual proteomes, datasets corresponding to major protein databases such as UniProtKB/TrEMBL and the NCBI non-redundant (nr) database have also been precalculated and made available in CSV format.",2016-10-26 +31404111,Association mapping for agronomic traits in six-rowed spring barley from the USA harvested in Kazakhstan.,"In barley, six-rowed barley is advantageous over two-rowed barley for feed due to the larger number of seeds per spike and the higher seed protein content. The growth of six-rowed barley is potentially important for breeding in agriculturally oriented countries, such as Kazakhstan. Nevertheless, until recently, very little attention was given to six-rowed barley in breeding projects in Kazakhstan, one of the largest countries in the world. In this study, phenotyping and single nucleotide polymorphism (SNP) genotyping data were generated from 275 accessions originating from six different breeding organizations in the USA as well as 9 accessions from Kazakhstan in field trials at six breeding institutions. The USA six-rowed barley was tested in comparison to local accessions over three years (2009-2011) based on analyses of key agronomic traits. It was determined that the average yield in the USA accessions in comparison to local lines showed heavier yield in all six tested sites. Principal Coordinate Analysis based on 1618 polymorphic SNP markers separated Kazakh lines from six USA barley origin groups based on PC1 (77.9%), and Montana lines from the remaining five USA groups based on PC2 (15.1%). A genome-wide association study based on eighteen field trials allowed the identification of 47 stable marker-trait associations (MTA) for ten agronomic traits, including key yield related characters such as yield per square meter, thousand grain weight, number of kernels per spike, and productive tillers. The comparison of chromosomal positions of identified MTA with positions of known genes and quantitative trait loci suggests that 25 out of those 47 MTAs are presumably novel. The analysis of 42 SNPs associated with 47 MTAs in the Ensemble genome annotation system (http://ensemblgenomes.org) suggested that 40 SNPs were in genic positions of the genome, as their sequences successfully aligned with corresponding Gen ID.",2019-08-12 +30327564,A prediction model for underestimation of invasive breast cancer after a biopsy diagnosis of ductal carcinoma in situ: based on 2892 biopsies and 589 invasive cancers.,"

Background

Patients with a biopsy diagnosis of ductal carcinoma in situ (DCIS) might be diagnosed with invasive breast cancer at excision, a phenomenon known as underestimation. Patients with DCIS are treated based on the risk of underestimation or progression to invasive cancer. The aim of our study was to expand the knowledge on underestimation and to develop a prediction model.

Methods

Population-based data were retrieved from the Dutch Pathology Registry and the Netherlands Cancer Registry for DCIS between January 2011 and June 2012.

Results

Of 2892 DCIS biopsies, 21% were underestimated invasive breast cancers. In multivariable analysis, risk factors were high-grade DCIS (odds ratio (OR) 1.43, 95% confidence interval (CI): 1.05-1.95), a palpable tumour (OR 2.22, 95% CI: 1.76-2.81), a BI-RADS (Breast Imaging Reporting and Data System) score 5 (OR 2.36, 95% CI: 1.80-3.09) and a suspected invasive component at biopsy (OR 3.84, 95% CI: 2.69-5.46). The predicted risk for underestimation ranged from 9.5 to 80.2%, with a median of 14.7%. Of the 596 invasive cancers, 39% had unfavourable features.

Conclusions

The risk for an underestimated diagnosis of invasive breast cancer after a biopsy diagnosis of DCIS is considerable. With our prediction model, the individual risk of underestimation can be calculated based on routinely available preoperatively known risk factors ( https://www.evidencio.com/models/show/1074 ).",2018-10-17 +25428369,Gene Ontology Consortium: going forward.,"The Gene Ontology (GO; http://www.geneontology.org) is a community-based bioinformatics resource that supplies information about gene product function using ontologies to represent biological knowledge. Here we describe improvements and expansions to several branches of the ontology, as well as updates that have allowed us to more efficiently disseminate the GO and capture feedback from the research community. The Gene Ontology Consortium (GOC) has expanded areas of the ontology such as cilia-related terms, cell-cycle terms and multicellular organism processes. We have also implemented new tools for generating ontology terms based on a set of logical rules making use of templates, and we have made efforts to increase our use of logical definitions. The GOC has a new and improved web site summarizing new developments and documentation, serving as a portal to GO data. Users can perform GO enrichment analysis, and search the GO for terms, annotations to gene products, and associated metadata across multiple species using the all-new AmiGO 2 browser. We encourage and welcome the input of the research community in all biological areas in our continued effort to improve the Gene Ontology.",2014-11-26 +28453683,MobiDB-lite: fast and highly specific consensus prediction of intrinsic disorder in proteins.,"

Motivation

Intrinsic disorder (ID) is established as an important feature of protein sequences. Its use in proteome annotation is however hampered by the availability of many methods with similar performance at the single residue level, which have mostly not been optimized to predict long ID regions of size comparable to domains.

Results

Here, we have focused on providing a single consensus-based prediction, MobiDB-lite, optimized for highly specific (i.e. few false positive) predictions of long disorder. The method uses eight different predictors to derive a consensus which is then filtered for spurious short predictions. Consensus prediction is shown to outperform the single methods when annotating long ID regions. MobiDB-lite can be useful in large-scale annotation scenarios and has indeed already been integrated in the MobiDB, DisProt and InterPro databases.

Availability and implementation

MobiDB-lite is available as part of the MobiDB database from URL: http://mobidb.bio.unipd.it/. An executable can be downloaded from URL: http://protein.bio.unipd.it/mobidblite/.

Contact

silvio.tosatto@unipd.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +27924024,GTRD: a database of transcription factor binding sites identified by ChIP-seq experiments.,"GTRD-Gene Transcription Regulation Database (http://gtrd.biouml.org)-is a database of transcription factor binding sites (TFBSs) identified by ChIP-seq experiments for human and mouse. Raw ChIP-seq data were obtained from ENCODE and SRA and uniformly processed: (i) reads were aligned using Bowtie2; (ii) ChIP-seq peaks were called using peak callers MACS, SISSRs, GEM and PICS; (iii) peaks for the same factor and peak callers, but different experiment conditions (cell line, treatment, etc.), were merged into clusters; (iv) such clusters for different peak callers were merged into metaclusters that were considered as non-redundant sets of TFBSs. In addition to information on location in genome, the sets contain structured information about cell lines and experimental conditions extracted from descriptions of corresponding ChIP-seq experiments. A web interface to access GTRD was developed using the BioUML platform. It provides: (i) browsing and displaying information; (ii) advanced search possibilities, e.g. search of TFBSs near the specified gene or search of all genes potentially regulated by a specified transcription factor; (iii) integrated genome browser that provides visualization of the GTRD data: read alignments, peaks, clusters, metaclusters and information about gene structures from the Ensembl database and binding sites predicted using position weight matrices from the HOCOMOCO database.",2016-10-24 +30453333,Sensitivity to Morphosyntactic Information in Preschool Children With and Without Developmental Language Disorder: A Follow-Up Study.,"

Purpose

This study tested children's sensitivity to tense/agreement information in fronted auxiliaries during online comprehension of questions (e.g., Are the nice little dogs running?). Data from children with developmental language disorder (DLD) were compared to previously published data from typically developing (TD) children matched according to sentence comprehension test scores.

Method

Fifteen 5-year-old children with DLD and fifteen 3-year-old TD children participated in a looking-while-listening task. Children viewed pairs of pictures, 1 with a single agent and 1 with multiple agents, accompanied by a sentence with a fronted auxiliary (is + single agent or are + two agents) or a control sentence. Proportion looking to the target was measured.

Results

Children with DLD did not show anticipatory looking based on the number information contained in the auxiliary (is or are) as the younger TD children had. Both groups showed significant increases in looking to the target upon hearing the subject noun (e.g., dogs).

Conclusions

Despite the groups' similar sentence comprehension abilities and ability to accurately respond to the information provided by the subject noun, children with DLD did not show sensitivity to number information on the fronted auxiliary. This insensitivity is considered in light of these children's weaker command of tense/agreement forms in their speech. Specifically, we consider the possibility that failure to grasp the relation between the subject-verb sequence (e.g., dogs running) and preceding information (e.g., are) in questions in the input contributes to the protracted inconsistency in producing auxiliary forms in obligatory contexts by children with DLD.

Supplemental material

https://doi.org/10.23641/asha.7283459.",2018-12-01 +29272352,Missing value imputation for LC-MS metabolomics data by incorporating metabolic network and adduct ion relations.,"Motivation:Metabolomics data generated from liquid chromatography-mass spectrometry platforms often contain missing values. Existing imputation methods do not consider underlying feature relations and the metabolic network information. As a result, the imputation results may not be optimal. Results:We proposed an imputation algorithm that incorporates the existing metabolic network, adduct ion relations even for unknown compounds, as well as linear and nonlinear associations between feature intensities to build a feature-level network. The algorithm uses support vector regression for missing value imputation based on features in the neighborhood on the network. We compared our proposed method with methods being widely used. As judged by the normalized root mean squared error in real data-based simulations, our proposed methods can achieve better accuracy. Availability and implementation:The R package is available at http://web1.sph.emory.edu/users/tyu8/MINMA. Contact:jiankang@umich.edu or tianwei.yu@emory.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +23174015,Bioinformatics Resource Manager v2.3: an integrated software environment for systems biology with microRNA and cross-species analysis tools.,"

Background

MicroRNAs (miRNAs) are noncoding RNAs that direct post-transcriptional regulation of protein coding genes. Recent studies have shown miRNAs are important for controlling many biological processes, including nervous system development, and are highly conserved across species. Given their importance, computational tools are necessary for analysis, interpretation and integration of high-throughput (HTP) miRNA data in an increasing number of model species. The Bioinformatics Resource Manager (BRM) v2.3 is a software environment for data management, mining, integration and functional annotation of HTP biological data. In this study, we report recent updates to BRM for miRNA data analysis and cross-species comparisons across datasets.

Results

BRM v2.3 has the capability to query predicted miRNA targets from multiple databases, retrieve potential regulatory miRNAs for known genes, integrate experimentally derived miRNA and mRNA datasets, perform ortholog mapping across species, and retrieve annotation and cross-reference identifiers for an expanded number of species. Here we use BRM to show that developmental exposure of zebrafish to 30 uM nicotine from 6-48 hours post fertilization (hpf) results in behavioral hyperactivity in larval zebrafish and alteration of putative miRNA gene targets in whole embryos at developmental stages that encompass early neurogenesis. We show typical workflows for using BRM to integrate experimental zebrafish miRNA and mRNA microarray datasets with example retrievals for zebrafish, including pathway annotation and mapping to human ortholog. Functional analysis of differentially regulated (p<0.05) gene targets in BRM indicates that nicotine exposure disrupts genes involved in neurogenesis, possibly through misregulation of nicotine-sensitive miRNAs.

Conclusions

BRM provides the ability to mine complex data for identification of candidate miRNAs or pathways that drive phenotypic outcome and, therefore, is a useful hypothesis generation tool for systems biology. The miRNA workflow in BRM allows for efficient processing of multiple miRNA and mRNA datasets in a single software environment with the added capability to interact with public data sources and visual analytic tools for HTP data analysis at a systems level. BRM is developed using Java™ and other open-source technologies for free distribution (http://www.sysbio.org/dataresources/brm.stm).",2012-11-23 +30663905,Developing adverse outcome pathways on silver nanoparticle-induced reproductive toxicity via oxidative stress in the nematode Caenorhabditis elegans using a Bayesian network model.,"An adverse outcome pathway (AOP) is a framework that organizes the mechanistic or predictive relationships between molecular initiating events (MIEs), key events (KEs), and adverse outcomes (AOs). Previously, we intensively investigated the molecular mechanism that underlies toxicity caused by AgNPs in the nematode Caenorhabditis elegans. Using transcriptomics, functional genetics, and various molecular/biochemical tools, we identified oxidative stress as the major mechanism underlying toxicity and reproduction failure as the outcome. With this information, here we conducted a case study of building an AOP to link oxidative stress with reproductive toxicity. To validate this AOP, we filled the gaps by conducting further experiments on its elements, such as NADPH oxidase, ROS formation, PMK-1 P38 MAPK activation, HIF-1 activation, mitochondrial damage, DNA damage, and apoptosis. The establishment of a causal link between the MIE and AO is critical for the construction of an AOP. Therefore, causal relationships between each KE and AO were verified by using functional genetic mutants of each KE. By combining these experimental data with our previously published results, we established causal relationships between the MIE, KEs, and AO using a Bayesian network (BN) model, culminating in an AOP entitled 'NADPH oxidase and P38 MAPK activation leading to reproductive failure in C. elegans ( https://aopwiki.org/aops/207)' . Overall, our approach shows that an AOP can be developed using existing data and further experiments can be conducted to fill the gaps between the MIE, KEs, and the AO. This study also shows that BN modeling has the potential to identify causal relationships in an AOP.",2018-12-01 +27285615,Northeast India Helminth Parasite Information Database (NEIHPID): Knowledge Base for Helminth Parasites.,"Most metazoan parasites that invade vertebrate hosts belong to three phyla: Platyhelminthes, Nematoda and Acanthocephala. Many of the parasitic members of these phyla are collectively known as helminths and are causative agents of many debilitating, deforming and lethal diseases of humans and animals. The North-East India Helminth Parasite Information Database (NEIHPID) project aimed to document and characterise the spectrum of helminth parasites in the north-eastern region of India, providing host, geographical distribution, diagnostic characters and image data. The morphology-based taxonomic data are supplemented with information on DNA sequences of nuclear, ribosomal and mitochondrial gene marker regions that aid in parasite identification. In addition, the database contains raw next generation sequencing (NGS) data for 3 foodborne trematode parasites, with more to follow. The database will also provide study material for students interested in parasite biology. Users can search the database at various taxonomic levels (phylum, class, order, superfamily, family, genus, and species), or by host, habitat and geographical location. Specimen collection locations are noted as co-ordinates in a MySQL database and can be viewed on Google maps, using Google Maps JavaScript API v3. The NEIHPID database has been made freely available at http://nepiac.nehu.ac.in/index.php.",2016-06-10 +28693620,Development of an in silico method for the identification of subcomplexes involved in the biogenesis of multiprotein complexes in Saccharomyces cerevisiae.,"

Background

Large sets of protein-protein interaction data coming either from biological experiments or predictive methods are available and can be combined to construct networks from which information about various cell processes can be extracted. We have developed an in silico approach based on these information to model the biogenesis of multiprotein complexes in the yeast Saccharomyces cerevisiae.

Results

Firstly, we have built three protein interaction networks by collecting the protein-protein interactions, which involved the subunits of three complexes, from different databases. The protein-protein interactions come from different kinds of biological experiments or are predicted. We have chosen the elongator and the mediator head complexes that are soluble and exhibit an architecture with subcomplexes that could be functional modules, and the mitochondrial bc 1 complex, which is an integral membrane complex and for which a late assembly subcomplex has been described. Secondly, by applying a clustering strategy to these networks, we were able to identify subcomplexes involved in the biogenesis of the complexes as well as the proteins interacting with each subcomplex. Thirdly, in order to validate our in silico results for the cytochrome bc1 complex we have analysed the physical interactions existing between three subunits by performing immunoprecipitation experiments in several genetic context.

Conclusions

For the two soluble complexes (the elongator and mediator head), our model shows a strong clustering of subunits that belong to a known subcomplex or module. For the membrane bc 1 complex, our approach has suggested new interactions between subunits in the early steps of the assembly pathway that were experimentally confirmed. Scripts can be downloaded from the site: http://bim.igmors.u-psud.fr/isips .",2017-07-11 +30502511,Exosomal proteins constitute an essential part of the human adipose tissue secretome.,"Adipose tissue is an endocrine organ, secreting various adipokines, either directly or via extracellular vesicles, including exosomes. Exosomes are vesicles of 40-150 nm size that represent a novel concept of biomolecule release. We purified exosomes from isolated primary human preadipocytes differentiated to mature adipocytes. The analyses of these exosomal preparations by LC-MS identified 884 proteins, so called exoadipokines. The comparison of exoadipokines with previously identified human exosome-associated proteins in ExoCarta database show an overlap of 817 proteins, but also revealed 67 proteins not assigned to human exosomes, yet. We further compared all exoadipokines to our previously reported reference secretome of human adipose tissue (http://diabesityprot.org/), finding 212 common proteins, whereas 672 proteins were specific for the exosomal fraction. Bioinformatic analyses revealed that the 212 common proteins can be assigned to all major functions of adipose tissue secreted proteins e.g. molecules involved in fibrotic processes or inflammation. In contrast, the exosome-specific proteins were rather assigned to signaling pathways and membrane-mediated processes. In conclusion, the isolation of exosomes allows to further specify the functionality of adipokines and exoadipokines as part of the adipocyte secretome in signaling and interorgan crosstalk.",2018-11-28 +30485709,Using the sORFs.Org Database.,"Ribosome profiling involves sequencing of approximately 30-base-long stretches of ribosome-protected mRNA. The technique enables genome-wide mapping of RNA undergoing active translation. Numerous small open reading frames have been identified by using ribosome profiling, leading researchers to question the assumed non-functional character of sORFs and to the identification of various important sORF translation products. sORFs.org (https://www.sorfs.org) is a public repository of small open reading frames identified by ribosome profiling in a database of over 3 million sORFs across 78 datasets from six species. sORFs.org is a multi-omics endeavor providing tools and metrics to assess the coding potential of the delineated sORFs. A pipeline is also in place to systematically rescan public mass spectrometry datasets to acquire new experimental evidence for sORF-encoded polypeptides. sORFs.org provides two distinct query interfaces, export functionality, and various visualization tools to enable inspection of the available information. © 2018 by John Wiley & Sons, Inc.",2018-11-28 +30486838,CHESS: a new human gene catalog curated from thousands of large-scale RNA sequencing experiments reveals extensive transcriptional noise.,"We assembled the sequences from deep RNA sequencing experiments by the Genotype-Tissue Expression (GTEx) project, to create a new catalog of human genes and transcripts, called CHESS. The new database contains 42,611 genes, of which 20,352 are potentially protein-coding and 22,259 are noncoding, and a total of 323,258 transcripts. These include 224 novel protein-coding genes and 116,156 novel transcripts. We detected over 30 million additional transcripts at more than 650,000 genomic loci, nearly all of which are likely nonfunctional, revealing a heretofore unappreciated amount of transcriptional noise in human cells. The CHESS database is available at http://ccb.jhu.edu/chess .",2018-11-28 +28057683,ORCAN-a web-based meta-server for real-time detection and functional annotation of orthologs.,"

Summary

ORCAN (ORtholog sCANner) is a web-based meta-server for one-click evolutionary and functional annotation of protein sequences. The server combines information from the most popular orthology-prediction resources, including four tools and four online databases. Functional annotation utilizes five additional comparisons between the query and identified homologs, including: sequence similarity, protein domain architectures, functional motifs, Gene Ontology term assignments and a list of associated articles. Furthermore, the server uses a plurality-based rating system to evaluate the orthology relationships and to rank the reference proteins by their evolutionary and functional relevance to the query. Using a dataset of ∼1 million true yeast orthologs as a sample reference set, we show that combining multiple orthology-prediction tools in ORCAN increases the sensitivity and precision by 1-2 percent points.

Availability and implementation

The service is available for free at http://www.combio.pl/orcan/ .

Contact

wmk@amu.edu.pl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +,metaxa2: improved identification and taxonomic classification of small and large subunit rRNA in metagenomic data,"The ribosomal rRNA genes are widely used as genetic markers for taxonomic identification of microbes. Particularly the small subunit (SSU; 16S/18S) rRNA gene is frequently used for species‐ or genus‐level identification, but also the large subunit (LSU; 23S/28S) rRNA gene is employed in taxonomic assignment. The metaxa software tool is a popular utility for extracting partial rRNA sequences from large sequencing data sets and assigning them to an archaeal, bacterial, nuclear eukaryote, mitochondrial or chloroplast origin. This study describes a comprehensive update to metaxa – metaxa2 – that extends the capabilities of the tool, introducing support for the LSU rRNA gene, a greatly improved classifier allowing classification down to genus or species level, as well as enhanced support for short‐read (100 bp) and paired‐end sequences, among other changes. The performance of metaxa2 was compared to other commonly used taxonomic classifiers, showing that metaxa2 often outperforms previous methods in terms of making correct predictions while maintaining a low misclassification rate. metaxa2 is freely available from http://microbiology.se/software/metaxa2/.",2015-11-01 +30367585,Arena-Idb: a platform to build human non-coding RNA interaction networks.,"

Background

High throughput technologies have provided the scientific community an unprecedented opportunity for large-scale analysis of genomes. Non-coding RNAs (ncRNAs), for a long time believed to be non-functional, are emerging as one of the most important and large family of gene regulators and key elements for genome maintenance. Functional studies have been able to assign to ncRNAs a wide spectrum of functions in primary biological processes, and for this reason they are assuming a growing importance as a potential new family of cancer therapeutic targets. Nevertheless, the number of functionally characterized ncRNAs is still too poor if compared to the number of new discovered ncRNAs. Thus platforms able to merge information from available resources addressing data integration issues are necessary and still insufficient to elucidate ncRNAs biological roles.

Results

In this paper, we describe a platform called Arena-Idb for the retrieval of comprehensive and non-redundant annotated ncRNAs interactions. Arena-Idb provides a framework for network reconstruction of ncRNA heterogeneous interactions (i.e., with other type of molecules) and relationships with human diseases which guide the integration of data, extracted from different sources, via mapping of entities and minimization of ambiguity.

Conclusions

Arena-Idb provides a schema and a visualization system to integrate ncRNA interactions that assists in discovering ncRNA functions through the extraction of heterogeneous interaction networks. The Arena-Idb is available at http://arenaidb.ba.itb.cnr.it.",2018-10-15 +22397686,SpiroESTdb: a transcriptome database and online tool for sparganum expressed sequences tags.,"

Background

Sparganum (plerocercoid of Spirometra erinacei) is a parasite that possesses the remarkable ability to survive by successfully modifying its physiology and morphology to suit various hosts and can be found in various tissues, even the nervous system. However, surprisingly little is known about the molecular function of genes that are expressed during the course of the parasite life cycle. To begin to decipher the molecular processes underlying gene function, we constructed a database of expressed sequence tags (ESTs) generated from sparganum.

Findings

SpiroESTdb is a web-based information resource that is built upon the annotation and curation of 5,655 ESTs data. SpiroESTdb provides an integrated platform for expressed sequence data, expression dynamics, functional genes, genetic markers including single nucleotide polymorphisms and tandem repeats, gene ontology and KEGG pathway information. Moreover, SpiroESTdb supports easy access to gene pages, such as (i) curation and query forms, (ii) in silico expression profiling and (iii) BLAST search tools. Comprehensive descriptions of the sparganum content of all sequenced data are available, including summary reports. The contents of SpiroESTdb can be viewed and downloaded from the web (http://pathod.cdc.go.kr/spiroestdb).

Conclusions

This integrative web-based database of sequence data, functional annotations and expression profiling data will serve as a useful tool to help understand and expand the characterization of parasitic infections. It can also be used to identify potential industrial drug targets and vaccine candidate genes.",2012-03-08 +29280996,CRNET: an efficient sampling approach to infer functional regulatory networks by integrating large-scale ChIP-seq and time-course RNA-seq data.,"Motivation:NGS techniques have been widely applied in genetic and epigenetic studies. Multiple ChIP-seq and RNA-seq profiles can now be jointly used to infer functional regulatory networks (FRNs). However, existing methods suffer from either oversimplified assumption on transcription factor (TF) regulation or slow convergence of sampling for FRN inference from large-scale ChIP-seq and time-course RNA-seq data. Results:We developed an efficient Bayesian integration method (CRNET) for FRN inference using a two-stage Gibbs sampler to estimate iteratively hidden TF activities and the posterior probabilities of binding events. A novel statistic measure that jointly considers regulation strength and regression error enables the sampling process of CRNET to converge quickly, thus making CRNET very efficient for large-scale FRN inference. Experiments on synthetic and benchmark data showed a significantly improved performance of CRNET when compared with existing methods. CRNET was applied to breast cancer data to identify FRNs functional at promoter or enhancer regions in breast cancer MCF-7 cells. Transcription factor MYC is predicted as a key functional factor in both promoter and enhancer FRNs. We experimentally validated the regulation effects of MYC on CRNET-predicted target genes using appropriate RNAi approaches in MCF-7 cells. Availability and implementation:R scripts of CRNET are available at http://www.cbil.ece.vt.edu/software.htm. Contact:xuan@vt.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +30075009,DiTeX: Disease-related topic extraction system through internet-based sources.,"This paper describes the web-based automated disease-related topic extraction system, called to DiTeX, which monitors important disease-related topics and provides associated information. National disease surveillance systems require a considerable amount of time to inform people of recent outbreaks of diseases. To solve this problem, many studies have used Internet-based sources such as news and Social Network Service (SNS). However, these sources contain many intentional elements that disturb extracting important topics. To address this challenge, we employ Natural Language Processing and an effective ranking algorithm, and develop DiTeX that provides important disease-related topics. This report describes the web front-end and back-end architecture, implementation, performance of the ranking algorithm, and captured topics of DiTeX. We describe processes for collecting Internet-based data and extracting disease-related topics based on search keywords. Our system then applies a ranking algorithm to evaluate the importance of disease-related topics extracted from these data. Finally, we conduct analysis based on real-world incidents to evaluate the performance and the effectiveness of DiTeX. To evaluate DiTeX, we analyze the ranking of well-known disease-related incidents for various ranking algorithms. The topic extraction rate of our ranking algorithm is superior to those of others. We demonstrate the validity of DiTeX by summarizing the disease-related topics of each day extracted by our system. To our knowledge, DiTeX is the world's first automated web-based real-time service system that extracts and presents disease-related topics, trends and related data through web-based sources. DiTeX is now available on the web through http://epidemic.co.kr/media/topics.",2018-08-03 +23203890,OrysPSSP: a comparative platform for small secreted proteins from rice and other plants.,"Plants have large diverse families of small secreted proteins (SSPs) that play critical roles in the processes of development, differentiation, defense, flowering, stress response, symbiosis, etc. Oryza sativa is one of the major crops worldwide and an excellent model for monocotyledonous plants. However, there had not been any effort to systematically analyze rice SSPs. Here, we constructed a comparative platform, OrysPSSP (http://www.genoportal.org/PSSP/index.do), involving >100 000 SSPs from rice and 25 plant species. OrysPSSP is composed of a core SSP database and a dynamic web interface that integrates a variety of user tools and resources. The current release (v0530) of core SSP database contains a total of 101 048 predicted SSPs, which were generated through a rigid computation/curation pipeline. The web interface consists of eight different modules, providing users with rich resources/functions, e.g. browsing SSP by chromosome, searching and filtering SSP, validating SSP with omics data, comparing SSP among multiple species and querying core SSP database with BLAST. Some cases of application are discussed to demonstrate the utility of OrysPSSP. OrysPSSP serves as a comprehensive resource to explore SSP on the genome scale and across the phylogeny of plant species.",2012-11-29 +30419167,"ZINClick v.18: Expanding Chemical Space of 1,2,3-Triazoles.","In the last years, we have investigated the click-chemical space covered by molecules containing the triazole ring and generated a database of 1,2,3-triazoles called ZINClick, starting from literature-reported alkynes and azides synthesizable in no more than three synthetic steps from commercially available products. This combinatorial database contains millions of 1,4-disubstituted 1,2,3-triazoles that are easily synthesizable. The library is regularly updated and can be freely downloaded from http://www.ZINClick.org . In this communication, the new implementation of ZINClick will be discussed as well as our new strategy for clustering the chemical space covered by 1,4-disubstituted 1,2,3-triazoles around their availability: from direct purchase to different degrees of synthetic feasibility of the compounds.",2018-11-27 +31871706,Teaching accelerated nursing students' self-care: A pilot project.,"

Aim

A benchmark of 4 has been determined for the reduction of self-reported stress by nursing students' status post 5 weeks of holistic educational activities and interventions provided by a nurse educator.

Design

Provision 5 in the American Nurses Association Code of Ethics for Nurses with Interpretive Statements emphasizes the duty of the nurse to not only promote the health and safety of others, but to self as well (ANA, 2015, Code of ethics with interpretive statements, http://Nursebooks.org). A self-care for nurses' pilot project was trialled with 25 accelerated nursing students over the course of 5 weeks. Holistic education programmes were facilitated by a nurse educator uninvolved in providing clinical or classroom education to the students.

Methods

The Standards for Quality Improvement Reporting Excellence (SQUIRE) guidelines are used in this pilot project as a framework to explore standardization of education of nursing students about self-care in nursing programmes and to promote positive health behaviours and student nurses' insight into how nurses' self-care can have an impact on patient outcomes. The self-care pilot project introduced the importance of self-care for the pre-licensure nursing student by teaching healthy eating, physical exercise, the value of sleep, use of positive affirmations and aromatherapy to a cohort of accelerated nursing students over the course of 5 weeks. The Star Model of Knowledge Transformation was the theoretical framework for the pilot study. Two questionnaires were used by the principal investigator to obtain participant data, the Project Participant Questionnaire and the Final-Year Group Questionnaire.

Results

On completion of the self-care for nurses' pilot, the nursing students reported a reduction in stress and an increased ability to cope with stress after exposure to different holistic stress reduction strategies. An average benchmark of 4.36 was achieved indicating that the nursing students' self-care had improved status post the interactive teaching intervention.Self-care taught to pre-licensure nursing students by nurse educators can enhance their self-awareness of the importance of stress reduction and care of themselves while enduring the academic rigour and simultaneous clinical practicum experiences in nursing programmes.Applying self-care behaviours to reduction of stress for nursing students may be of benefit to of students as they transition from the pre-licensure to graduate nurse roles. Hence, teaching health behaviours that are self-protective and contribute to maintaining safe clinical environments for nurses and the patients in their care.",2019-09-27 +26537223,The Distant Siblings-A Phylogenomic Roadmap Illuminates the Origins of Extant Diversity in Fungal Aromatic Polyketide Biosynthesis.,"In recent years, the influx of newly sequenced fungal genomes has enabled sampling of secondary metabolite biosynthesis on an unprecedented scale. However, explanations of extant diversity which take into account both large-scale phylogeny reconstructions and knowledge gained from multiple genome projects are still lacking. We analyzed the evolutionary sources of genetic diversity in aromatic polyketide biosynthesis in over 100 model fungal genomes. By reconciling the history of over 400 nonreducing polyketide synthases (NR-PKSs) with corresponding species history, we demonstrate that extant fungal NR-PKSs are clades of distant siblings, originating from a burst of duplications in early Pezizomycotina and thinned by extensive losses. The capability of higher fungi to biosynthesize the simplest precursor molecule (orsellinic acid) is highlighted as an ancestral trait underlying biosynthesis of aromatic compounds. This base activity was modified during early evolution of filamentous fungi, toward divergent reaction schemes associated with biosynthesis of, for example, aflatoxins and fusarubins (C4-C9 cyclization) or various anthraquinone derivatives (C6-C11 cyclization). The functional plasticity is further shown to have been supplemented by modularization of domain architecture into discrete pieces (conserved splice junctions within product template domain), as well as tight linkage of key accessory enzyme families and divergence in employed transcriptional factors. Although the majority of discord between species and gene history is explained by ancient duplications, this landscape has been altered by more recent duplications, as well as multiple horizontal gene transfers. The 25 detected transfers include previously undescribed events leading to emergence of, for example, fusarubin biosynthesis in Fusarium genus. Both the underlying data and the results of present analysis (including alternative scenarios revealed by sampling multiple reconciliation optima) are maintained as a freely available web-based resource: http://cropnet.pl/metasites/sekmet/nrpks_2014.",2015-11-03 +30903174,Proteomic and Bioinformatic Analyses for the Identification of Proteins With Low Allergenic Potential for Hazard Assessment.,"Use of botanicals and natural substances in consumer products has increased in recent years. Such extracts can contain protein that may theoretically represent a potential risk of IgE-mediated allergy. No method has yet been generally accepted or validated for assessment of the allergenic potential of proteins. For development of suitable methods datasets of allergenic and nonallergenic (or low allergenic) proteins are required that can serve, respectively, as positive and negative controls. However, data are unavailable on proteins that lack or have low allergenic potential. Here, low allergenic potential proteins are identified based on the assumption that proteins with established human exposure, but with a lack of an association with allergy, possess low allergenic potential. Proteins were extracted from sources considered to have less allergenic potential (corn, potato, spinach, rice, and tomato) as well as higher allergenic potential (wheat) regarding common allergenic foods. Proteins were identified and semi-quantified by label-free proteomic analysis conducted using mass spectrometry. Predicted allergenicity was determined using AllerCatPro (https://allercatpro.bii.a-star.edu.sg/). In summary, 9077 proteins were identified and semi-quantified from 6 protein sources. Within the top 10% of the most abundant proteins identified, 178 characterized proteins were found to have no evidence for allergenicity predicted by AllerCatPro and were considered to have low allergenic potential. This panel of low allergenic potential proteins provides a pragmatic approach to aid the development of alternative methods for robust testing strategies to distinguish between proteins of high and low allergenic potential to assess the risk of proteins from natural or botanical sources.",2019-07-01 +30901174,Robotic partial nephrectomy after pazopanib treatment in a solitary kidney with segmental vein thrombosis.,"

Objective

To demonstrate our surgical technique of robotic partial nephrectomy (RPN) in a patient with a solitary kidney who received neoadjuvant Pazopanib, highlighting the multidisciplinary approach.

Materials and methods

In our video, we present the case of 77-year-old male, Caucasian with 6.6cm left renal neoplasm in a solitary kidney. An initial percutaneous biopsy from the mass revealed clear cell RCC ISUP 2. After multidisciplinary tumor board meeting, Pazopanib (800mg once daily) was administered for 8 weeks with repeat imaging at completion of therapy. Post-TKI image study was compared with the pre-TKI CT using the Morphology, Attenuation, Size, and Structure criteria showing a favorable response to the treatment. Thereafter, a RPN was planned3. Perioperative surgical outcomes are presented.

Results

Operative time was 224 minutes with a cold ischemia time of 53 minutes. Estimated blood loss was 800ml and the length of hospital stay was 4 days. Pathology demonstrated a specimen of 7.6cm with a tumor size of 6.5cm consistent with clear cell renal carcinoma ISUP 3 with a TNM staging pT1b Nx. Postoperative GFR was maintained at 24 ml / min compared to the preoperative value of 33ml / min.

Conclusions

A multidisciplinary approach is effective for patients in whom nephron preservation is critical, providing na opportunity to select those that may benefi t from TKI therapy. Pazopanib may allow for PN in a highly selective subgroup of patients who would otherwise require radical nephrectomy. Prospective data will be necessary before this strategy can be disseminated into clinical practice. Available at: http://www.intbrazjurol.com.br/video-section/20180240_Garisto_et_al.",2019-07-01 +29486270,Mechanism-informed read-across assessment of skin sensitizers based on SkinSensDB.,"Integrative testing strategies using adverse outcome pathway (AOP)-based alternative assays for assessing skin sensitizers show the potential for replacing animal testing. However, the application of alternative assays for a large number of chemicals is still time-consuming and expensive. In order to facilitate the assessment of skin sensitizers based on integrative testing strategies, a mechanism-informed read-across assessment method was proposed and evaluated using data from SkinSensDB. First, the prediction performance of two integrated testing strategy models was evaluated giving the highest area under the receiver operating characteristic curve (AUC) values of 0.928 and 0.837 for predicting human and LLNA data, respectively. The proposed read-across prediction method achieves AUC values of 0.957 and 0.802 for predicting human and LLNA data, respectively, with interpretable activation statuses of AOP events. As data grows, a better prediction performance is expected. A user-friendly tool has been constructed and integrated into SkinSensDB that is publicly accessible at http://cwtung.kmu.edu.tw/skinsensdb.",2018-02-24 +23230006,The Chloroplast Function Database II: a comprehensive collection of homozygous mutants and their phenotypic/genotypic traits for nuclear-encoded chloroplast proteins.,"The Chloroplast Function Database has so far offered phenotype information on mutants of the nuclear-encoded chloroplast proteins in Arabidopsis that pertains to >200 phenotypic data sets that were obtained from 1,722 transposon- or T-DNA-tagged lines. Here, we present the development of the second version of the database, which is named the Chloroplast Function Database II and was redesigned to increase the number of mutant characters and new user-friendly tools for data mining and integration. The upgraded database offers information on genome-wide mutant screens for any visible phenotype against 2,495 tagged lines to create a comprehensive homozygous mutant collection. The collection consists of 147 lines with seedling phenotypes and 185 lines for which we could not obtain homozygotes, as well as 1,740 homozygotes with wild-type phenotypes. Besides providing basic information about primer lists that were used for the PCR genotyping of T-DNA-tagged lines and explanations about the preparation of homozygous mutants and phenotype screening, the database includes access to a link between the gene locus and existing publicly available databases. This gives users access to a combined pool of data, enabling them to gain valuable insights into biological processes. In addition, high-resolution images of plastid morphologies of mutants with seedling-specific chloroplast defects as observed with transmission electron microscopy (TEM) are available in the current database. This database is used to compare the phenotypes of visually identifiable mutants with their plastid ultrastructures and to evaluate their potential significance from characteristic patterns of plastid morphology in vivo. Thus, the Chloroplast Function Database II is a useful and comprehensive information resource that can help researchers to connect individual Arabidopsis genes to plastid functions on the basis of phenotype analysis of our tagged mutant collection. It can be freely accessed at http://rarge.psc.riken.jp/chloroplast/.",2012-12-10 +23972281,EMAP/EMAPA ontology of mouse developmental anatomy: 2013 update.,"

Background

The Edinburgh Mouse Atlas Project (EMAP) ontology of mouse developmental anatomy provides a standard nomenclature for describing normal and mutant mouse embryo anatomy. The ontology forms the core of the EMAP atlas and is used for annotating gene expression data by the mouse Gene Expression Database (GXD), Edinburgh Mouse Atlas of Gene Expression (EMAGE) and other database resources.

Findings

The original EMAP ontology listed anatomical entities for each developmental stage separately, presented as uniparental graphs organized as a strict partonomy. An ""abstract"" (i.e. non-stage-specific) representation of mouse developmental anatomy has since been developed. In this version (EMAPA) all instances for a given anatomical entity are presented as a single term, together with the first and last stage at which it is considered to be present. Timed-component anatomies are now derived using staging information in the ""primary"" non-timed version. Anatomical entities are presented as a directed acyclic graph enabling multiple parental relationships. Subsumption classification as well as partonomic and other types of relationships can now be represented. Most concept names are unique, with compound names constructed using standardized nomenclature conventions, and alternative names associated as synonyms.

Conclusions

The ontology has been extended and refined in a collaborative effort between EMAP and GXD, with additional input from others. Efforts are also underway to improve the revision process with regards to updating and editorial control. The revised EMAPA ontology is freely available from the OBO Foundry resource, with descriptive information and other documentation presented in associated Wiki pages (http://www.obofoundry.org/wiki/index.php/EMAPA:Main_Page).",2013-08-26 +29041978,Linc-RoR promotes MAPK/ERK signaling and confers estrogen-independent growth of breast cancer.,"

Background

The conversion from estrogen-dependent to estrogen-independent state of ER+ breast cancer cells is the key step to promote resistance to endocrine therapies. Although the crucial role of MAPK/ERK signaling pathway in estrogen-independent breast cancer cell growth is well established, the underlying mechanism is not fully understood.

Methods

In this study, we profiled lncRNA expression against a focused group of lncRNAs selected from lncRNA database. CRISPR/Cas9 was employed to knockout (KO) linc-RoR in MCF-7 cells, while rescue experiments were carried out to re-express linc-RoR in KO cells. Colony formation and MTT assays were used to examine the role of linc-RoR in estrogen-independent growth and tamoxifen resistance. Western blot and qRT-PCR were used to determine the change of protein and lncRNA levels, respectively. The expression of DUSP7 in clinical specimens was downloaded from Oncomine ( www.oncomine.org ) and the dataset from Kaplan-Meier Plotter ( http://kmplot.com ) was used to analyze the clinical outcomes in relation to DUSP7.

Results

We identified that linc-RoR functions as an onco-lncRNA to promote estrogen-independent growth of ER+ breast cancer. Under estrogen deprivation, linc-RoR causes the upregulation of phosphorylated MAPK/ERK pathway which in turn activates ER signaling. Knockout of linc-RoR abrogates estrogen deprivation-induced ERK activation as well as ER phosphorylation, whereas re-expression of linc-RoR restores all above phenotypes. Moreover, we show that the ERK-specific phosphatase Dual Specificity Phosphatase 7 (DUSP7), also known as MKP-X, is involved in linc-RoR KO-induced repression of MAPK/ERK signaling. Interestingly, linc-RoR KO increases the protein stability of DUSP7, resulting in repression of ERK phosphorylation. Clinical data analysis reveal that DUSP7 expression is lower in ER+ breast cancer samples than that in ER- breast cancer. Moreover, downregulation of DUSP7 expression is associated with poor patient survival.

Conclusion

Taken together, these results suggest that linc-RoR promotes estrogen-independent growth and activation of MAPK/ERK pathway of breast cancer cells by regulating the ERK-specific phosphatase DUSP7. Thus, this study might help not only in establishing a role for linc-RoR in estrogen-independent and tamoxifen resistance of ER+ breast cancer, but also suggesting a link between linc-RoR and MAPK/ERK pathway.",2017-10-17 +21996254,A comprehensive curated resource for follicle stimulating hormone signaling.,"

Background

Follicle stimulating hormone (FSH) is an important hormone responsible for growth, maturation and function of the human reproductive system. FSH regulates the synthesis of steroid hormones such as estrogen and progesterone, proliferation and maturation of follicles in the ovary and spermatogenesis in the testes. FSH is a glycoprotein heterodimer that binds and acts through the FSH receptor, a G-protein coupled receptor. Although online pathway repositories provide information about G-protein coupled receptor mediated signal transduction, the signaling events initiated specifically by FSH are not cataloged in any public database in a detailed fashion.

Findings

We performed comprehensive curation of the published literature to identify the components of FSH signaling pathway and the molecular interactions that occur upon FSH receptor activation. Our effort yielded 64 reactions comprising 35 enzyme-substrate reactions, 11 molecular association events, 11 activation events and 7 protein translocation events that occur in response to FSH receptor activation. We also cataloged 265 genes, which were differentially expressed upon FSH stimulation in normal human reproductive tissues.

Conclusions

We anticipate that the information provided in this resource will provide better insights into the physiological role of FSH in reproductive biology, its signaling mediators and aid in further research in this area. The curated FSH pathway data is freely available through NetPath (http://www.netpath.org), a pathway resource developed previously by our group.",2011-10-13 +21769196,VPDB: Viral Protein Structural Database.,"

Unlabelled

Viral Protein Database is an interactive database for three dimensional viral proteins. Our aim is to provide a comprehensive resource to the community of structural virology, with an emphasis on the description of derived data from structural biology. Currently, VPDB includes ˜1,670 viral protein structures from >277 viruses with more than 465 virus strains. The whole database can be easily accessed through the user convenience text search. Interactivity has been enhanced by using Jmol, WebMol and Strap to visualize the viral protein molecular structure.

Availability

The database is available for free at http://www.vpdb.bicpu.edu.in.",2011-07-06 +29783941,A transposable element annotation pipeline and expression analysis reveal potentially active elements in the microalga Tisochrysis lutea.,"

Background

Transposable elements (TEs) are mobile DNA sequences known as drivers of genome evolution. Their impacts have been widely studied in animals, plants and insects, but little is known about them in microalgae. In a previous study, we compared the genetic polymorphisms between strains of the haptophyte microalga Tisochrysis lutea and suggested the involvement of active autonomous TEs in their genome evolution.

Results

To identify potentially autonomous TEs, we designed a pipeline named PiRATE (Pipeline to Retrieve and Annotate Transposable Elements, download: https://doi.org/10.17882/51795 ), and conducted an accurate TE annotation on a new genome assembly of T. lutea. PiRATE is composed of detection, classification and annotation steps. Its detection step combines multiple, existing analysis packages representing all major approaches for TE detection and its classification step was optimized for microalgal genomes. The efficiency of the detection and classification steps was evaluated with data on the model species Arabidopsis thaliana. PiRATE detected 81% of the TE families of A. thaliana and correctly classified 75% of them. We applied PiRATE to T. lutea genomic data and established that its genome contains 15.89% Class I and 4.95% Class II TEs. In these, 3.79 and 17.05% correspond to potentially autonomous and non-autonomous TEs, respectively. Annotation data was combined with transcriptomic and proteomic data to identify potentially active autonomous TEs. We identified 17 expressed TE families and, among these, a TIR/Mariner and a TIR/hAT family were able to synthesize their transposase. Both these TE families were among the three highest expressed genes in a previous transcriptomic study and are composed of highly similar copies throughout the genome of T. lutea. This sum of evidence reveals that both these TE families could be capable of transposing or triggering the transposition of potential related MITE elements.

Conclusion

This manuscript provides an example of a de novo transposable element annotation of a non-model organism characterized by a fragmented genome assembly and belonging to a poorly studied phylum at genomic level. Integration of multi-omics data enabled the discovery of potential mobile TEs and opens the way for new discoveries on the role of these repeated elements in genomic evolution of microalgae.",2018-05-22 +31620266,"The 2nd Baltic Osseointegration Academy and Lithuanian University of Health Sciences Consensus Conference 2019. Summary and Consensus Statements: Group I - Biological Aspects of Tooth Extraction, Socket Healing and Indications for Socket Preservation.","

Introduction

The task of Group I was to review and update the existing data concerning the physiologic process of socket healing, in the absence or presence of grafting materials or platelet concentrates, addressing the associated molecular and cellular events that culminate in the restoration of the lost tissue architecture and functionality. The second task was to review current literature concerning extraction socket classification immediately following tooth extraction and the rationales for socket preservation/augmentation procedures and with reference to it suggest novel clinical decision tree for extraction socket preservation/augmentation in aesthetic and non-aesthetic area.

Material and methods

The main areas indicated by this group were as follows: socket healing process, including haemostasis and coagulation, inflammatory phase, proliferative phase, bone tissue modelling and remodelling; socket healing with graft materials and autologous platelet concentrates; extraction socket classifications; indications and reasons for extraction socket preservation/augmentation. The systematic reviews and/or meta-analyses were registered in PROSPERO, an international prospective register of systematic reviews: http://www.crd.york.ac.uk/PROSPERO/. The literature in the corresponding areas of interest was screened and reported following the PRISMA (Preferred Reporting Item for Systematic Review and Meta-Analysis) Statement: http://www.prisma-statement.org/. Method of preparation of the systematic reviews, based on comprehensive search strategies, was discussed and standardized. The summary of the materials and methods employed by the authors in preparing the systematic reviews and/or meta-analyses is presented in Preface chapter.

Results

The results and conclusions of the review process are presented in the respective papers. One theoretical review-analysis and one systematic review were performed. The group's general commentaries, consensus statements, clinical recommendations and implications for research are presented in this article.",2019-07-01 +30476188,Development of Machine Learning Algorithms for Prediction of 30-Day Mortality After Surgery for Spinal Metastasis.,"

Background

Preoperative prognostication of short-term postoperative mortality in patients with spinal metastatic disease can improve shared decision making around end-of-life care.

Objective

To (1) develop machine learning algorithms for prediction of short-term mortality and (2) deploy these models in an open access web application.

Methods

The American College of Surgeons, National Surgical Quality Improvement Program was used to identify patients that underwent operative intervention for metastatic disease. Four machine learning algorithms were developed, and the algorithm with the best performance across discrimination, calibration, and overall performance was integrated into an open access web application.

Results

The 30-d mortality for the 1790 patients undergoing surgery for spinal metastatic disease was 8.49%. Preoperative factors used for prognostication were albumin, functional status, white blood cell count, hematocrit, alkaline phosphatase, spinal location (cervical, thoracic, lumbosacral), and severity of comorbid systemic disease (American Society of Anesthesiologist Class). In this population, machine learning algorithms developed to predict 30-d mortality performed well on discrimination (c-statistic), calibration (assessed by calibration slope and intercept), Brier score, and decision analysis. An open access web application was developed for the best performing model and this web application can be found here: https://sorg-apps.shinyapps.io/spinemets/.

Conclusion

Machine learning algorithms are promising for prediction of postoperative outcomes in spinal oncology and these algorithms can be integrated into clinically useful decision tools. As the volume of data in oncology continues to grow, creation of learning systems and deployment of these systems as accessible tools may significantly enhance prognostication and management.",2019-07-01 +26658470,Integrated analysis of shotgun proteomic data with PatternLab for proteomics 4.0.,"PatternLab for proteomics is an integrated computational environment that unifies several previously published modules for the analysis of shotgun proteomic data. The contained modules allow for formatting of sequence databases, peptide spectrum matching, statistical filtering and data organization, extracting quantitative information from label-free and chemically labeled data, and analyzing statistics for differential proteomics. PatternLab also has modules to perform similarity-driven studies with de novo sequencing data, to evaluate time-course experiments and to highlight the biological significance of data with regard to the Gene Ontology database. The PatternLab for proteomics 4.0 package brings together all of these modules in a self-contained software environment, which allows for complete proteomic data analysis and the display of results in a variety of graphical formats. All updates to PatternLab, including new features, have been previously tested on millions of mass spectra. PatternLab is easy to install, and it is freely available from http://patternlabforproteomics.org.",2015-12-10 +30101342,PrimedRPA: primer design for recombinase polymerase amplification assays.,"

Summary

Recombinase polymerase amplification (RPA), an isothermal nucleic acid amplification method, is enhancing our ability to detect a diverse array of pathogens, thereby assisting the diagnosis of infectious diseases and the detection of microorganisms in food and water. However, new bioinformatics tools are needed to automate and improve the design of the primers and probes sets to be used in RPA, particularly to account for the high genetic diversity of circulating pathogens and cross detection of genetically similar organisms. PrimedRPA is a python-based package that automates the creation and filtering of RPA primers and probe sets. It aligns several sequences to identify conserved targets, and filters regions that cross react with possible background organisms.

Availability and implementation

PrimedRPA was implemented in Python 3 and supported on Linux and MacOS and is freely available from http://pathogenseq.lshtm.ac.uk/PrimedRPA.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +30961883,Linear-linear piecewise growth mixture models with unknown random knots: A primer for school psychology.,"Studying change over time requires rigorous and sometimes novel statistical methods that can support increasingly complex applied research questions. In this article, we provide an overview of the potential of piecewise growth mixture models. This type of longitudinal model can be used to advance our understanding of group and individual growth that may follow a segmented, or disjointed, pattern of change, and where the data come from a mixture of two or more latent classes. We then demonstrate the practical utility of piecewise growth mixture models by applying it to a subsample of students from the Early Childhood Longitudinal Study - Kindergarten Cohort of 1998 (ECLS-K) to ascertain whether mathematics achievement is characterized by one or two latent classes akin to students with and without mathematics difficulties. We discuss the applicability for school psychological research and provide supplementary online files that include an instructional sample dataset and corresponding R routine with explanatory annotations to assist in understanding the R routine before applying this approach in novel applications (https://doi.org/10.1016/j.jsp.2019.03.004).",2019-03-21 +24271386,OnTheFly: a database of Drosophila melanogaster transcription factors and their binding sites.,"We present OnTheFly (http://bhapp.c2b2.columbia.edu/OnTheFly/index.php), a database comprising a systematic collection of transcription factors (TFs) of Drosophila melanogaster and their DNA-binding sites. TFs predicted in the Drosophila melanogaster genome are annotated and classified and their structures, obtained via experiment or homology models, are provided. All known preferred TF DNA-binding sites obtained from the B1H, DNase I and SELEX methodologies are presented. DNA shape parameters predicted for these sites are obtained from a high throughput server or from crystal structures of protein-DNA complexes where available. An important feature of the database is that all DNA-binding domains and their binding sites are fully annotated in a eukaryote using structural criteria and evolutionary homology. OnTheFly thus provides a comprehensive view of TFs and their binding sites that will be a valuable resource for deciphering non-coding regulatory DNA.",2013-11-22 +22140103,Identifiers.org and MIRIAM Registry: community resources to provide persistent identification.,"The Minimum Information Required in the Annotation of Models Registry (http://www.ebi.ac.uk/miriam) provides unique, perennial and location-independent identifiers for data used in the biomedical domain. At its core is a shared catalogue of data collections, for each of which an individual namespace is created, and extensive metadata recorded. This namespace allows the generation of Uniform Resource Identifiers (URIs) to uniquely identify any record in a collection. Moreover, various services are provided to facilitate the creation and resolution of the identifiers. Since its launch in 2005, the system has evolved in terms of the structure of the identifiers provided, the software infrastructure, the number of data collections recorded, as well as the scope of the Registry itself. We describe here the new parallel identification scheme and the updated supporting software infrastructure. We also introduce the new Identifiers.org service (http://identifiers.org) that is built upon the information stored in the Registry and which provides directly resolvable identifiers, in the form of Uniform Resource Locators (URLs). The flexibility of the identification scheme and resolving system allows its use in many different fields, where unambiguous and perennial identification of data entities are necessary.",2011-12-02 +28053167,PceRBase: a database of plant competing endogenous RNA.,"Competition for microRNA (miRNA) binding between RNA molecules has emerged as a novel mechanism for the regulation of eukaryotic gene expression. Competing endogenous RNA (ceRNA) can act as decoys for miRNA binding, thereby forming a ceRNA network by regulating the abundance of other RNA transcripts which share the same or similar microRNA response elements. Although this type of RNA cross talk was first described in Arabidopsis, and was subsequently shown to be active in animal models, there is no database collecting potential ceRNA data for plants. We have developed a Plant ceRNA database (PceRBase, http://bis.zju.edu.cn/pcernadb/index.jsp) which contains potential ceRNA target-target, and ceRNA target-mimic pairs from 26 plant species. For example, in Arabidopsis lyrata, 311 candidate ceRNAs are identified which could affect 2646 target-miRNA-target interactions. Predicted pairing structure between miRNAs and their target mRNA transcripts, expression levels of ceRNA pairs and associated GO annotations are also stored in the database. A web interface provides convenient browsing and searching for specific genes of interest. Tools are available for the visualization and enrichment analysis of genes in the ceRNA networks. Moreover, users can use PceRBase to predict novel competing mimic-target and target-target interactions from their own data.",2016-10-07 +29186322,SAPP: functional genome annotation and analysis through a semantic framework using FAIR principles.,"Summary:To unlock the full potential of genome data and to enhance data interoperability and reusability of genome annotations we have developed SAPP, a Semantic Annotation Platform with Provenance. SAPP is designed as an infrastructure supporting FAIR de novo computational genomics but can also be used to process and analyze existing genome annotations. SAPP automatically predicts, tracks and stores structural and functional annotations and associated dataset- and element-wise provenance in a Linked Data format, thereby enabling information mining and retrieval with Semantic Web technologies. This greatly reduces the administrative burden of handling multiple analysis tools and versions thereof and facilitates multi-level large scale comparative analysis. Availability and implementation:SAPP is written in JAVA and freely available at https://gitlab.com/sapp and runs on Unix-like operating systems. The documentation, examples and a tutorial are available at https://sapp.gitlab.io. Contact:jasperkoehorst@gmail.com or peter.schaap@wur.nl.",2018-04-01 +29136180,BCNTB bioinformatics: the next evolutionary step in the bioinformatics of breast cancer tissue banking.,"Here, we present an update of Breast Cancer Now Tissue Bank bioinformatics, a rich platform for the sharing, mining, integration and analysis of breast cancer data. Its modalities provide researchers with access to a centralised information gateway from which they can access a network of bioinformatic resources to query findings from publicly available, in-house and experimental data generated using samples supplied from the Breast Cancer Now Tissue Bank. This in silico environment aims to help researchers use breast cancer data to their full potential, irrespective of any bioinformatics barriers. For this new release, a complete overhaul of the IT and bioinformatic infrastructure underlying the portal has been conducted and a host of novel analytical modules established. We developed and adopted an automated data selection and prioritisation system, expanded the data content and included tissue and cell line data generated from The Cancer Genome Atlas and the Cancer Cell Line Encyclopedia, designed a host of novel analytical modalities and enhanced the query building process. Furthermore, the results are presented in an interactive format, providing researchers with greater control over the information on which they want to focus. Breast Cancer Now Tissue Bank bioinformatics can be accessed at http://bioinformatics.breastcancertissuebank.org/.",2018-01-01 +26602686,Developmental progress and current status of the Animal QTLdb.,"The Animal QTL Database (QTLdb; http://www.animalgenome.org/QTLdb) has undergone dramatic growth in recent years in terms of new data curated, data downloads and new functions and tools. We have focused our development efforts to cope with challenges arising from rapid growth of newly published data and end users' data demands, and to optimize data retrieval and analysis to facilitate users' research. Evidenced by the 27 releases in the past 11 years, the growth of the QTLdb has been phenomenal. Here we report our recent progress which is highlighted by addition of one new species, four new data types, four new user tools, a new API tool set, numerous new functions and capabilities added to the curator tool set, expansion of our data alliance partners and more than 20 other improvements. In this paper we present a summary of our progress to date and an outlook regarding future directions.",2015-11-23 +30546860,YaTCM: Yet another Traditional Chinese Medicine Database for Drug Discovery.,"Traditional Chinese Medicine (TCM) has a long history of widespread clinical applications, especially in East Asia, and is becoming frequently used in Western countries. However, owing to extreme complicacy in both chemical ingredients and mechanism of action, a deep understanding of TCM is still difficult. To accelerate the modernization and popularization of TCM, a single comprehensive database is required, containing a wealth of TCM-related information and equipped with complete analytical tools. Here we present YaTCM (Yet another Traditional Chinese Medicine database), a free web-based toolkit, which provides comprehensive TCM information and is furnished with analysis tools. YaTCM allows a user to (1) identify the potential ingredients that are crucial to TCM herbs through similarity search and substructure search, (2) investigate the mechanism of action for TCM or prescription through pathway analysis and network pharmacology analysis, (3) predict potential targets for TCM molecules by multi-voting chemical similarity ensemble approach, and (4) explore functionally similar herb pairs. All these functions can lead to one systematic network for visualization of TCM recipes, herbs, ingredients, definite or putative protein targets, pathways, and diseases. This web service would help in uncovering the mechanism of action of TCM, revealing the essence of TCM theory and then promoting the drug discovery process. YaTCM is freely available at http://cadd.pharmacy.nankai.edu.cn/yatcm/home.",2018-11-23 +24561221,MitoSatPlant: mitochondrial microsatellites database of viridiplantae.,"Microsatellites also known as simple sequence repeats (SSRs) consist of 1-6 nucleotide long repeating units. The importance of mitochondrial SSRs (mtSSRs) in fields like population genetics, plant phylogenetics and genome mapping motivated us to develop MitoSatPlant, a repository of plant mtSSRs. It contains information for perfect, imperfect and compound SSRs mined from 92 mitochondrial genomes of green plants, available at NCBI (as of 1 Feb 2014). A total of 72,798 SSRs were found, of which PCR primers were designed for 72,495 SSRs. Among all sequences, tetranucleotide repeats (26,802) were found to be most abundant whereas hexanucleotide repeats (2751) were detected with least frequency. MitoSatPlant was developed using SQL server 2008 and can be accessed through a front end designed in ASP.Net. It is an easy to use, user-friendly database and will prove to be a useful resource for plant scientists. To the best of our knowledge MitoSatPlant is the only database available for plant mtSSRs and can be freely accessed at http://compubio.in/mitosatplant/.",2014-02-19 +22222540,Visual data mining of coexpression data to set research priorities in cardiac development research.,"Over the past decade, an immense amount of biomedical data have become available in the public domain due to the development of ever-more efficient screening tools such as expression microarrays. To fully leverage this important new resource, it has become imperative to develop new methodologies for mining and visualizing data to make inferences beyond the scope of the original experiments. This need motivated the development of a new freely available web-based application called StarNet ( http://vanburenlab.medicine.tamhsc.edu/starnet2.html ). Here we describe the use of StarNet, which functions primarily as a query tool that draws correlation networks centered about a gene of interest. To support inferences and the development of new hypotheses using the resulting correlation network, StarNet queries all genes in the correlation network against a database of known interactions and displays the results in a second graph and provides a statistical test of Gene Ontology term enrichment (keyword enrichment) to provide tentative summary functional annotations for the correlation network. Finally, StarNet provides additional tools for comparing networks drawn from two different selected data sets, thus providing methods for making inferences and developing new hypotheses about differential wiring for different regulatory domains.",2012-01-01 +31170349,"Read, Understand, Learn, & Excel: Development and Testing of an Automated Reading Strategy Detection Algorithm for Postsecondary Students.","Purpose An important predictor of postsecondary academic success is an individual's reading comprehension skills. Postsecondary readers apply a wide range of behavioral strategies to process text for learning purposes. Currently, no tools exist to detect a reader's use of strategies. The primary aim of this study was to develop Read, Understand, Learn, & Excel, an automated tool designed to detect reading strategy use and explore its accuracy in detecting strategies when students read digital, expository text. Method An iterative design was used to develop the computer algorithm for detecting 9 reading strategies. Twelve undergraduate students read 2 expository texts that were equated for length and complexity. A human observer documented the strategies employed by each reader, whereas the computer used digital sequences to detect the same strategies. Data were then coded and analyzed to determine agreement between the 2 sources of strategy detection (i.e., the computer and the observer). Results Agreement between the computer- and human-coded strategies was 75% or higher for 6 out of the 9 strategies. Only 3 out of the 9 strategies-previewing content, evaluating amount of remaining text, and periodic review and/or iterative summarizing-had less than 60% agreement. Conclusion Read, Understand, Learn, & Excel provides proof of concept that a reader's approach to engaging with academic text can be objectively and automatically captured. Clinical implications and suggestions to improve the sensitivity of the code are discussed. Supplemental Material https://doi.org/10.23641/asha.8204786.",2019-06-06 +27586009,InteGO2: a web tool for measuring and visualizing gene semantic similarities using Gene Ontology.,"

Background

The Gene Ontology (GO) has been used in high-throughput omics research as a major bioinformatics resource. The hierarchical structure of GO provides users a convenient platform for biological information abstraction and hypothesis testing. Computational methods have been developed to identify functionally similar genes. However, none of the existing measurements take into account all the rich information in GO. Similarly, using these existing methods, web-based applications have been constructed to compute gene functional similarities, and to provide pure text-based outputs. Without a graphical visualization interface, it is difficult for result interpretation.

Results

We present InteGO2, a web tool that allows researchers to calculate the GO-based gene semantic similarities using seven widely used GO-based similarity measurements. Also, we provide an integrative measurement that synergistically integrates all the individual measurements to improve the overall performance. Using HTML5 and cytoscape.js, we provide a graphical interface in InteGO2 to visualize the resulting gene functional association networks.

Conclusions

InteGO2 is an easy-to-use HTML5 based web tool. With it, researchers can measure gene or gene product functional similarity conveniently, and visualize the network of functional interactions in a graphical interface. InteGO2 can be accessed via http://mlg.hit.edu.cn:8089/ .",2016-08-31 +24194605,WormBase 2014: new views of curated biology.,"WormBase (http://www.wormbase.org/) is a highly curated resource dedicated to supporting research using the model organism Caenorhabditis elegans. With an electronic history predating the World Wide Web, WormBase contains information ranging from the sequence and phenotype of individual alleles to genome-wide studies generated using next-generation sequencing technologies. In recent years, we have expanded the contents to include data on additional nematodes of agricultural and medical significance, bringing the knowledge of C. elegans to bear on these systems and providing support for underserved research communities. Manual curation of the primary literature remains a central focus of the WormBase project, providing users with reliable, up-to-date and highly cross-linked information. In this update, we describe efforts to organize the original atomized and highly contextualized curated data into integrated syntheses of discrete biological topics. Next, we discuss our experiences coping with the vast increase in available genome sequences made possible through next-generation sequencing platforms. Finally, we describe some of the features and tools of the new WormBase Web site that help users better find and explore data of interest.",2013-11-04 +23172288,UUCD: a family-based database of ubiquitin and ubiquitin-like conjugation.,"In this work, we developed a family-based database of UUCD (http://uucd.biocuckoo.org) for ubiquitin and ubiquitin-like conjugation, which is one of the most important post-translational modifications responsible for regulating a variety of cellular processes, through a similar E1 (ubiquitin-activating enzyme)-E2 (ubiquitin-conjugating enzyme)-E3 (ubiquitin-protein ligase) enzyme thioester cascade. Although extensive experimental efforts have been taken, an integrative data resource is still not available. From the scientific literature, 26 E1s, 105 E2s, 1003 E3s and 148 deubiquitination enzymes (DUBs) were collected and classified into 1, 3, 19 and 7 families, respectively. To computationally characterize potential enzymes in eukaryotes, we constructed 1, 1, 15 and 6 hidden Markov model (HMM) profiles for E1s, E2s, E3s and DUBs at the family level, separately. Moreover, the ortholog searches were conducted for E3 and DUB families without HMM profiles. Then the UUCD database was developed with 738 E1s, 2937 E2s, 46 631 E3s and 6647 DUBs of 70 eukaryotic species. The detailed annotations and classifications were also provided. The online service of UUCD was implemented in PHP + MySQL + JavaScript + Perl.",2012-11-20 +31135347,Health Care in the Age of Interoperability Part 5: The Personal Health Record.,"About this series This is the fifth in a series of articles on the dramatic transformation taking place in health informatics in large part because of the new Health Level 7 (HL7) Fast Healthcare Interoperability Resources (FHIR) standard. The first article provided background on health care, electronic health record systems for physicians, and the challenges they both face along with the potential of interoperability to help overcome them. The second introduced the basics of the FHIR standard and some suggested resources for those who are interested in its further exploration. The third introduced SMART on FHIR which, based on its wide adoption, has become the default standard FHIR app platform. The fourth looked at clinical decision support, arguably the single most important provider-facing use case for FHIR. This article introduces the personal health record and tools that can utilize the data stored in it as an important use case for FHIR in support of patients. The articles in this series are intended to introduce researchers from other fields to this one and assume no prior knowledge of health care or health informatics. They are abstracted from the author's recently published book, Health Informatics on FHIR: How HL7's New API is Transforming Healthcare (Springer International Publishing: https://www.springer.com/us/book/9783319934136).",2019-05-01 +30321300,Enhanced prediction of RNA solvent accessibility with long short-term memory neural networks and improved sequence profiles.,"

Motivation

The de novo prediction of RNA tertiary structure remains a grand challenge. Predicted RNA solvent accessibility provides an opportunity to address this challenge. To the best of our knowledge, there is only one method (RNAsnap) available for RNA solvent accessibility prediction. However, its performance is unsatisfactory for protein-free RNAs.

Results

We developed RNAsol, a new algorithm to predict RNA solvent accessibility. RNAsol was built based on improved sequence profiles from the covariance models and trained with the long short-term memory (LSTM) neural networks. Independent tests on the same datasets from RNAsnap show that RNAsol achieves the mean Pearson's correlation coefficient (PCC) of 0.43/0.26 for the protein-bound/protein-free RNA molecules, which is 26.5%/136.4% higher than that of RNAsnap. When the training set is enlarged to include both types of RNAs, the PCCs increase to 0.49 and 0.46 for protein-bound and protein-free RNAs, respectively. The success of RNAsol is attributed to two aspects, including the improved sequence profiles constructed by the sequence-profile alignment and the enhanced training by the LSTM neural networks.

Availability and implementation

http://yanglab.nankai.edu.cn/RNAsol/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +30606768,The Tumor Suppressor FBW7 and the Vitamin D Receptor Are Mutual Cofactors in Protein Turnover and Transcriptional Regulation.,"The E3 ligase and tumor suppressor FBW7 targets drivers of cell-cycle progression such as the oncogenic transcription factor c-MYC, for proteasomal degradation. Vitamin D signaling regulates c-MYC expression and turnover in vitro and in vivo, which is highly significant as epidemiologic data link vitamin D deficiency to increased cancer incidence. We hypothesized that FBW7 and the vitamin D receptor (VDR) controlled each other's function as regulators of protein turnover and gene transcription, respectively. We found that hormonal 1,25-dihydroxyvitamin D3 (1,25D) rapidly enhanced the interaction of FBW7 with VDR and with c-MYC, whereas it blocked FBW7 binding to c-MYC antagonist MXD1. 1,25D stimulated the recruitment of FBW7, SCF complex subunits, and ubiquitin to DNA-bound c-MYC, consistent with 1,25D-regulated c-MYC degradation on DNA. 1,25D also accelerated the turnover of other FBW7 target proteins such as Cyclin E, c-JUN, MCL1, and AIB1, and, importantly, FBW7 depletion attenuated the 1,25D-induced cell-cycle arrest. Although the VDR contains a consensus FBW7 recognition motif in a VDR-specific insertion domain, its mutation did not affect FBW7-VDR interactions, and FBW7 ablation did not stabilize the VDR. Remarkably, however, FBW7 is essential for optimal VDR gene expression. In addition, the FBW7 and SCF complex subunits are recruited to 1,25D-induced genes and FBW7 depletion inhibited the 1,25D-dependent transactivation. Collectively, these data show that the VDR and FBW7 are mutual cofactors, and provide a mechanistic basis for the cancer-preventive actions of vitamin D. IMPLICATIONS: The key findings show that the VDR and the E3 ligase FBW7 regulate each other's functions in transcriptional regulation and control of protein turnover, respectively, and provide a molecular basis for cancer-preventive actions of vitamin D.Visual Overview: http://mcr.aacrjournals.org/content/17/3/709/F1.large.jpg.",2019-01-03 +30466398,PCRdrive: the largest qPCR assay archive to date and endless potential for lab workflow revitalization.,"

Background

Primer design is a crucial step in establishing specific and sensitive qPCR assays. Even though numerous tools for primer design exist, the majority of resulting assays still requires extensive testing and optimisation or does not allow for high quality target amplification. We developed a workflow for designing qPCR assays. Unlike other tools, we compute a PCR assay including primer design, concentrations and the optimal PCR program.

Results

Gene expression assays were already generated in a total of 283,226 genes from three species and are continued for all genes of the major model species. The results are available online at https://pcrdrive.com/lab#/assay-database . The workflow involves filtering Primer3-generated primers by considering diverse parameters including specificity, single-nucleotide polymorphisms (SNPs), secondary structure as well as compatibility with standard qPCR assay conditions. The resulting assays consist of transcript-specific primer sequences, a reagents protocol as well as instrument settings which are provided in a web-based tool called PCRdrive. PCRdrive was designed to support PCR users in their PCR-related tasks and is equipped with handy functions, components of an electronic lab notebook (ELN) as well as teamworking opportunities.

Conclusion

High quality ready to use qPCR assays for gene expression analysis are provided within the online platform PCRdrive. A built-in primer designer enables easy generation of assays which is not supported by any other tool. The wet lab optimisation of new assays can be transparently documented and shared within the team. PCRdrive also contains an archive of public PCRs which is updated regularly. Users may use the archive to publish their PCR to the community which makes it easy for other researchers worldwide to reproduce and validate the PCR. PCRdrive is a growing network of PCR users, simplifying and streamlining research through its useful existing features and continuous developments from the active development team.",2018-11-22 +30465691,Database establishment for the secondary fungal DNA barcode translational elongation factor 1α (TEF1α) 1.,"With new or emerging fungal infections, human and animal fungal pathogens are a growing threat worldwide. Current diagnostic tools are slow, non-specific at the species and subspecies levels, and require specific morphological expertise to accurately identify pathogens from pure cultures. DNA barcodes are easily amplified, universal, short species-specific DNA sequences, which enable rapid identification by comparison with a well-curated reference sequence collection. The primary fungal DNA barcode, ITS region, was introduced in 2012 and is now routinely used in diagnostic laboratories. However, the ITS region only accurately identifies around 75% of all medically relevant fungal species, which has prompted the development of a secondary barcode to increase the resolution power and suitability of DNA barcoding for fungal disease diagnostics. The translational elongation factor 1α (TEF1α) was selected in 2015 as a secondary fungal DNA barcode, but it has not been implemented into practice, due to the absence of a reference database. Here, we have established a quality-controlled reference database for the secondary barcode that together with the ISHAM-ITS database, forms the ISHAM barcode database, available online at http://its.mycologylab.org/ . We encourage the mycology community for active contributions.",2018-11-22 +27820856,Piphillin: Improved Prediction of Metagenomic Content by Direct Inference from Human Microbiomes.,"Functional analysis of a clinical microbiome facilitates the elucidation of mechanisms by which microbiome perturbation can cause a phenotypic change in the patient. The direct approach for the analysis of the functional capacity of the microbiome is via shotgun metagenomics. An inexpensive method to estimate the functional capacity of a microbial community is through collecting 16S rRNA gene profiles then indirectly inferring the abundance of functional genes. This inference approach has been implemented in the PICRUSt and Tax4Fun software tools. However, those tools have important limitations since they rely on outdated functional databases and uncertain phylogenetic trees and require very specific data pre-processing protocols. Here we introduce Piphillin, a straightforward algorithm independent of any proposed phylogenetic tree, leveraging contemporary functional databases and not obliged to any singular data pre-processing protocol. When all three inference tools were evaluated against actual shotgun metagenomics, Piphillin was superior in predicting gene composition in human clinical samples compared to both PICRUSt and Tax4Fun (p<0.01 and p<0.001, respectively) and Piphillin's ability to predict disease associations with specific gene orthologs exhibited a 15% increase in balanced accuracy compared to PICRUSt. From laboratory animal samples, no performance advantage was observed for any one of the tools over the others and for environmental samples all produced unsatisfactory predictions. Our results demonstrate that functional inference using the direct method implemented in Piphillin is preferable for clinical biospecimens. Piphillin is publicly available for academic use at http://secondgenome.com/Piphillin.",2016-11-07 +29993994,7-Point Checklist and Skin Lesion Classification using Multi-Task Multi-Modal Neural Nets. ,"We propose a multi-task deep convolutional neural network, trained on multi-modal data (clinical and dermoscopic images, and patient meta-data), to classify the 7-point melanoma checklist criteria and perform skin lesion diagnosis. Our neural network is trained using several multi-task loss functions, where each loss considers different combinations of the input modalities, which allows our model to be robust to missing data at inference time. Our final model classifies the 7-point checklist and skin condition diagnosis, produces multi-modal feature vectors suitable for image retrieval, and localizes clinically discriminant regions. We benchmark our approach using 1011 lesion cases, and report comprehensive results over all 7-point criteria and diagnosis. We also make our dataset (images and metadata) publicly available online at http://derm.cs.sfu.ca.",2018-04-09 +30302823,Overlapping clustering of gene expression data using penalized weighted normalized cut.,"Clustering has been widely conducted in the analysis of gene expression data. For complex diseases, it has played an important role in identifying unknown functions of genes, serving as the basis of other analysis, and others. A common limitation of most existing clustering approaches is to assume that genes are separated into disjoint clusters. As genes often have multiple functions and thus can belong to more than one functional cluster, the disjoint clustering results can be unsatisfactory. In addition, due to the small sample sizes of genetic profiling studies and other factors, there may not be sufficient evidence to confirm the specific functions of some genes and cluster them definitively into disjoint clusters. In this study, we develop an effective overlapping clustering approach, which takes account into the multiplicity of gene functions and lack of certainty in practical analysis. A penalized weighted normalized cut (PWNCut) criterion is proposed based on the NCut technique and an L 2 norm constraint. It outperforms multiple competitors in simulation. The analysis of the cancer genome atlas (TCGA) data on breast cancer and cervical cancer leads to biologically sensible findings which differ from those using the alternatives. To facilitate implementation, we develop the function pwncut in the R package NCutYX.",2018-10-09 +23433959,Identification of candidate transcription factor binding sites in the cattle genome.,"A resource that provides candidate transcription factor binding sites (TFBSs) does not currently exist for cattle. Such data is necessary, as predicted sites may serve as excellent starting locations for future omics studies to develop transcriptional regulation hypotheses. In order to generate this resource, we employed a phylogenetic footprinting approach-using sequence conservation across cattle, human and dog-and position-specific scoring matrices to identify 379,333 putative TFBSs upstream of nearly 8000 Mammalian Gene Collection (MGC) annotated genes within the cattle genome. Comparisons of our predictions to known binding site loci within the PCK1, ACTA1 and G6PC promoter regions revealed 75% sensitivity for our method of discovery. Additionally, we intersected our predictions with known cattle SNP variants in dbSNP and on the Illumina BovineHD 770k and Bos 1 SNP chips, finding 7534, 444 and 346 overlaps, respectively. Due to our stringent filtering criteria, these results represent high quality predictions of putative TFBSs within the cattle genome. All binding site predictions are freely available at http://bfgl.anri.barc.usda.gov/BovineTFBS/ or http://199.133.54.77/BovineTFBS.",2013-02-01 +29186290,scHiCNorm: a software package to eliminate systematic biases in single-cell Hi-C data.,"Summary:We build a software package scHiCNorm that uses zero-inflated and hurdle models to remove biases from single-cell Hi-C data. Our evaluations prove that our models can effectively eliminate systematic biases for single-cell Hi-C data, which better reveal cell-to-cell variances in terms of chromosomal structures. Availability and implementation:scHiCNorm is available at http://dna.cs.miami.edu/scHiCNorm/. Perl scripts are provided that can generate bias features. Pre-built bias features for human (hg19 and hg38) and mouse (mm9 and mm10) are available to download. R scripts can be downloaded to remove biases. Contact:zheng.wang@miami.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-03-01 +22199232,E3Net: a system for exploring E3-mediated regulatory networks of cellular functions.,"Ubiquitin-protein ligase (E3) is a key enzyme targeting specific substrates in diverse cellular processes for ubiquitination and degradation. The existing findings of substrate specificity of E3 are, however, scattered over a number of resources, making it difficult to study them together with an integrative view. Here we present E3Net, a web-based system that provides a comprehensive collection of available E3-substrate specificities and a systematic framework for the analysis of E3-mediated regulatory networks of diverse cellular functions. Currently, E3Net contains 2201 E3s and 4896 substrates in 427 organisms and 1671 E3-substrate specific relations between 493 E3s and 1277 substrates in 42 organisms, extracted mainly from MEDLINE abstracts and UniProt comments with an automatic text mining method and additional manual inspection and partly from high throughput experiment data and public ubiquitination databases. The significant functions and pathways of the extracted E3-specific substrate groups were identified from a functional enrichment analysis with 12 functional category resources for molecular functions, protein families, protein complexes, pathways, cellular processes, cellular localization, and diseases. E3Net includes interactive analysis and navigation tools that make it possible to build an integrative view of E3-substrate networks and their correlated functions with graphical illustrations and summarized descriptions. As a result, E3Net provides a comprehensive resource of E3s, substrates, and their functional implications summarized from the regulatory network structures of E3-specific substrate groups and their correlated functions. This resource will facilitate further in-depth investigation of ubiquitination-dependent regulatory mechanisms. E3Net is freely available online at http://pnet.kaist.ac.kr/e3net.",2011-12-22 +25992265,Sequential pattern mining for discovering gene interactions and their contextual information from biomedical texts.,"

Background

Discovering gene interactions and their characterizations from biological text collections is a crucial issue in bioinformatics. Indeed, text collections are large and it is very difficult for biologists to fully take benefit from this amount of knowledge. Natural Language Processing (NLP) methods have been applied to extract background knowledge from biomedical texts. Some of existing NLP approaches are based on handcrafted rules and thus are time consuming and often devoted to a specific corpus. Machine learning based NLP methods, give good results but generate outcomes that are not really understandable by a user.

Results

We take advantage of an hybridization of data mining and natural language processing to propose an original symbolic method to automatically produce patterns conveying gene interactions and their characterizations. Therefore, our method not only allows gene interactions but also semantics information on the extracted interactions (e.g., modalities, biological contexts, interaction types) to be detected. Only limited resource is required: the text collection that is used as a training corpus. Our approach gives results comparable to the results given by state-of-the-art methods and is even better for the gene interaction detection in AIMed.

Conclusions

Experiments show how our approach enables to discover interactions and their characterizations. To the best of our knowledge, there is few methods that automatically extract the interactions and also associated semantics information. The extracted gene interactions from PubMed are available through a simple web interface at https://bingotexte.greyc.fr/. The software is available at https://bingo2.greyc.fr/?q=node/22.",2015-05-18 +30026666,"Photo images, 3D models and CT scanned data of loaches (Botiidae, Cobitidae and Nemacheilidae) of Japan.","

Background

Loach is one of the major cypriniform fishes in freshwater habitats of Japan; 35 taxa/clades have, until now, been recognised. Parallel to genetic studies, morphological examinations are needed for further development of loach study, eventually ichthyology and fish biology. Digital archiving, concerning taxonomy, ecology, ethology etc., is one of the progressive challenges for the open science of biology. This paper aimed to online publish photo images, 3D models and CT scanned data of all the known clades of loaches inhabiting Japan (103 individuals in total with several type specimens), contributing to ichthyology and public interest of biodiversity/biology.

New information

Photo images, 3D models and CT scanned data of all the known 35 taxa/clades of loaches inhabiting in Japan were online published at http://ffish.asia/loachesOfJapan and http://ffish.asia/loachesOfJapan3D.",2018-07-06 +,The MORPH‐R web server and software tool for predicting missing genes in biological pathways,"A biological pathway is the set of molecular entities involved in a given biological process and the interrelations among them. Even though biological pathways have been studied extensively, discovering missing genes in pathways remains a fundamental challenge. Here, we present an easy‐to‐use tool that allows users to run MORPH (MOdule‐guided Ranking of candidate PatHway genes), an algorithm for revealing missing genes in biological pathways, and demonstrate its capabilities. MORPH supports the analysis in tomato, Arabidopsis and the two new species: rice and the newly sequenced potato genome. The new tool, called MORPH‐R, is available both as a web server (at http://bioinformatics.psb.ugent.be/webtools/morph/) and as standalone software that can be used locally. In the standalone version, the user can apply the tool to new organisms using any proprietary and public data sources.",2015-09-01 +30950747,Functional Logistic Mixed-Effects Models for Learning Curves From Longitudinal Binary Data.,"Purpose We present functional logistic mixed-effects models (FLMEMs) for estimating population and individual-level learning curves in longitudinal experiments. Method Using functional analysis tools in a Bayesian hierarchical framework, the FLMEM captures nonlinear, smoothly varying learning curves, appropriately accommodating uncertainty in various aspects of the analysis while also borrowing information across different model layers. An R package implementing our method is available as part of the Supplemental Materials . Results Application to speech learning data from Reetzke, Xie, Llanos, and Chandrasekaran (2018) and a simulation study demonstrate the utility of FLMEM and its many advantages over linear and logistic mixed-effects models. Conclusion The FLMEM is highly flexible and efficient in improving upon the practical limitations of linear models and logistic linear mixed-effects models. We expect the FLMEM to be a useful addition to the speech, language, and hearing scientist's toolkit. Supplemental Material https://doi.org/10.23641/asha.7822568.",2019-03-01 +30985072,Personality-obesity associations are driven by narrow traits: A meta-analysis.,"Obesity has inconsistent associations with broad personality domains, possibly because the links pertain to only some facets of these domains. Collating published and unpublished studies (N = 14 848), we meta-analysed the associations between body mass index (BMI) and Five-Factor Model personality domains as well as 30 Five-Factor Model personality facets. At the domain level, BMI had a positive association with Neuroticism and a negative association with Conscientiousness domains. At the facet level, we found associations between BMI and 15 facets from all five personality domains, with only some Neuroticism and Conscientiousness facets among them. Certain personality-BMI associations were moderated by sample properties, such as proportions of women or participants with obesity; these moderation effects were replicated in the individual-level analysis. Finally, facet-based personality ""risk"" scores accounted for 2.3% of variance in BMI in a separate sample of individuals (N = 3569), 409% more than domain-based scores. Taken together, personality-BMI associations are facet specific, and delineating them may help to explain obesity-related behaviours and inform intervention designs. Preprint and data are available at https://psyarxiv.com/z35vn/.",2019-04-15 +22346341,Construction of a pig physical interactome using sequence homology and a comprehensive reference human interactome.,"The analysis of interaction networks is crucial for understanding molecular function and has an essential impact for genomewide studies. However, the interactomes of most species are largely incomplete and computational strategies that take into account sequence homology can help compensating for this lack of information using cross-species analysis. In this work we report the construction of a porcine interactome resource. We applied sequence homology matching and carried out bi-directional BLASTp searches for the currently available protein sequence collections of human and pig. Using this homology we were able to recover, on average, 71% of the proteins annotated for human pathways for the pig. Porcine protein-protein interactions were deduced from homologous proteins with known interactions in human. The result of this work is a resource comprising 204,699 predicted porcine interactions that can be used in genome analyses in order to enhance functional interpretation of data. The data can be visualized and downloaded from http://cpdb.molgen.mpg.de/pig.",2012-01-24 +29065857,CGDV: a webtool for circular visualization of genomics and transcriptomics data.,"

Background

Interpretation of large-scale data is very challenging and currently there is scarcity of web tools which support automated visualization of a variety of high throughput genomics and transcriptomics data and for a wide variety of model organisms along with user defined karyotypes. Circular plot provides holistic visualization of high throughput large scale data but it is very complex and challenging to generate as most of the available tools need informatics expertise to install and run them.

Result

We have developed CGDV (Circos for Genomics and Transcriptomics Data Visualization), a webtool based on Circos, for seamless and automated visualization of a variety of large scale genomics and transcriptomics data. CGDV takes output of analyzed genomics or transcriptomics data of different formats, such as vcf, bed, xls, tab limited matrix text file, CNVnator raw output and Gene fusion raw output, to plot circular view of the sample data. CGDV take cares of generating intermediate files required for circos. CGDV is freely available at https://cgdv-upload.persistent.co.in/cgdv/ .

Conclusion

The circular plot for each data type is tailored to gain best biological insights into the data. The inter-relationship between data points, homologous sequences, genes involved in fusion events, differential expression pattern, sequencing depth, types and size of variations and enrichment of DNA binding proteins can be seen using CGDV. CGDV thus helps biologists and bioinformaticians to visualize a variety of genomics and transcriptomics data seamlessly.",2017-10-24 +25220766,LabeledIn: cataloging labeled indications for human drugs.,"Drug-disease treatment relationships, i.e., which drug(s) are indicated to treat which disease(s), are among the most frequently sought information in PubMed®. Such information is useful for feeding the Google Knowledge Graph, designing computational methods to predict novel drug indications, and validating clinical information in EMRs. Given the importance and utility of this information, there have been several efforts to create repositories of drugs and their indications. However, existing resources are incomplete. Furthermore, they neither label indications in a structured way nor differentiate them by drug-specific properties such as dosage form, and thus do not support computer processing or semantic interoperability. More recently, several studies have proposed automatic methods to extract structured indications from drug descriptions; however, their performance is limited by natural language challenges in disease named entity recognition and indication selection. In response, we report LabeledIn: a human-reviewed, machine-readable and source-linked catalog of labeled indications for human drugs. More specifically, we describe our semi-automatic approach to derive LabeledIn from drug descriptions through human annotations with aids from automatic methods. As the data source, we use the drug labels (or package inserts) submitted to the FDA by drug manufacturers and made available in DailyMed. Our machine-assisted human annotation workflow comprises: (i) a grouping method to remove redundancy and identify representative drug labels to be used for human annotation, (ii) an automatic method to recognize and normalize mentions of diseases in drug labels as candidate indications, and (iii) a two-round annotation workflow for human experts to judge the pre-computed candidates and deliver the final gold standard. In this study, we focused on 250 highly accessed drugs in PubMed Health, a newly developed public web resource for consumers and clinicians on prevention and treatment of diseases. These 250 drugs corresponded to more than 8000 drug labels (500 unique) in DailyMed in which 2950 candidate indications were pre-tagged by an automatic tool. After being reviewed independently by two experts, 1618 indications were selected, and additional 97 (missed by computer) were manually added, with an inter-annotator agreement of 88.35% as measured by the Kappa coefficient. Our final annotation results in LabeledIn consist of 7805 drug-disease treatment relationships where drugs are represented as a triplet of ingredient, dose form, and strength. A systematic comparison of LabeledIn with an existing computer-derived resource revealed significant discrepancies, confirming the need to involve humans in the creation of such a resource. In addition, LabeledIn is unique in that it contains detailed textual context of the selected indications in drug labels, making it suitable for the development of advanced computational methods for the automatic extraction of indications from free text. Finally, motivated by the studies on drug nomenclature and medication errors in EMRs, we adopted a fine-grained drug representation scheme, which enables the automatic identification of drugs with indications specific to certain dose forms or strengths. Future work includes expanding our coverage to more drugs and integration with other resources. The LabeledIn dataset and the annotation guidelines are available at http://ftp.ncbi.nlm.nih.gov/pub/lu/LabeledIn/.",2014-08-23 +30124779,StructureProfiler: an all-in-one tool for 3D protein structure profiling.,"MOTIVATION:Three-dimensional protein structures are important starting points for elucidating protein function and applications like drug design. Computational methods in this area rely on high quality validation datasets which are usually manually assembled. Due to the increase in published structures as well as the increasing demand for specially tailored validation datasets, automatic procedures should be adopted. RESULTS:StructureProfiler is a new tool for automatic, objective and customizable profiling of X-ray protein structures based on the most frequently applied selection criteria currently in use to assemble benchmark datasets. As examples, four dataset configurations (Astex, Iridium, Platinum, combined), all results of the combined tests and the list of all PDB Ids passing the combined criteria set are attached in the Supplementary Material. AVAILABILITY AND IMPLEMENTATION:StructureProfiler is available as part of the ProteinsPlus web service http://proteins.plus and as standalone tool in the NAOMI ChemBio Suite. Dataset updates together with the tool can be found on http://www.zbh.uni-hamburg.de/structureprofiler. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2019-03-01 +27375595,DGV: Dengue Genographic Viewer.,"Dengue viruses (DENVs) and their vectors are widely distributed throughout the tropical and subtropical regions of the world. An autochthonous case of DENV was reported in Tokyo, Japan, in 2014, for the first time in 70 years. A comprehensive database of DENV sequences containing both serotype and genotype data and epidemiological data is crucial to trace DENV outbreak isolates and promptly respond to outbreaks. We constructed a DENV database containing the serotype, genotype, year and country/region of collection by collecting all publically available DENV sequence information from the National Center for Biotechnology Information (NCBI) and assigning genotype information. We also implemented the web service Dengue Genographic Viewer (DGV), which shows the geographical distribution of each DENV genotype in a user-specified time span. DGV also assigns the serotype and genotype to a user-specified sequence by performing a homology search against the curated DENV database, and shows its homologous sequences with the geographical position and year of collection. DGV also shows the distribution of DENV-infected entrants to Japan by plotting epidemiological data from the Infectious Agents Surveillance Report (IASR), Japan. This overview of the DENV genotype distribution may aid in planning for the control of DENV infections. DGV is freely available online at: (https://gph.niid.go.jp/geograph/dengue/content/genomemap).",2016-06-07 +28672236,Developing and applying metamodels of high resolution process-based simulations for high throughput exposure assessment of organic chemicals in riverine ecosystems.,"As defined by Wikipedia (https://en.wikipedia.org/wiki/Metamodeling), ""(a) metamodel or surrogate model is a model of a model, and metamodeling is the process of generating such metamodels."" The goals of metamodeling include, but are not limited to (1) developing functional or statistical relationships between a model's input and output variables for model analysis, interpretation, or information consumption by users' clients; (2) quantifying a model's sensitivity to alternative or uncertain forcing functions, initial conditions, or parameters; and (3) characterizing the model's response or state space. Using five models developed by the US Environmental Protection Agency, we generate a metamodeling database of the expected environmental and biological concentrations of 644 organic chemicals released into nine US rivers from wastewater treatment works (WTWs) assuming multiple loading rates and sizes of populations serviced. The chemicals of interest have log n-octanol/water partition coefficients (logKOW) ranging from 3 to 14, and the rivers of concern have mean annual discharges ranging from 1.09 to 3240m3/s. Log-linear regression models are derived to predict mean annual dissolved and total water concentrations and total sediment concentrations of chemicals of concern based on their logKOW, Henry's Law Constant, and WTW loading rate and on the mean annual discharges of the receiving rivers. Metamodels are also derived to predict mean annual chemical concentrations in fish, invertebrates, and periphyton. We corroborate a subset of these metamodels using field studies focused on brominated flame retardants and discuss their application for high throughput screening of exposures to human and ecological populations and for analysis and interpretation of field data.",2017-06-30 +29746461,Porcupine: A visual pipeline tool for neuroimaging analysis.,"The field of neuroimaging is rapidly adopting a more reproducible approach to data acquisition and analysis. Data structures and formats are being standardised and data analyses are getting more automated. However, as data analysis becomes more complicated, researchers often have to write longer analysis scripts, spanning different tools across multiple programming languages. This makes it more difficult to share or recreate code, reducing the reproducibility of the analysis. We present a tool, Porcupine, that constructs one's analysis visually and automatically produces analysis code. The graphical representation improves understanding of the performed analysis, while retaining the flexibility of modifying the produced code manually to custom needs. Not only does Porcupine produce the analysis code, it also creates a shareable environment for running the code in the form of a Docker image. Together, this forms a reproducible way of constructing, visualising and sharing one's analysis. Currently, Porcupine links to Nipype functionalities, which in turn accesses most standard neuroimaging analysis tools. Our goal is to release researchers from the constraints of specific implementation details, thereby freeing them to think about novel and creative ways to solve a given problem. Porcupine improves the overview researchers have of their processing pipelines, and facilitates both the development and communication of their work. This will reduce the threshold at which less expert users can generate reusable pipelines. With Porcupine, we bridge the gap between a conceptual and an implementational level of analysis and make it easier for researchers to create reproducible and shareable science. We provide a wide range of examples and documentation, as well as installer files for all platforms on our website: https://timvanmourik.github.io/Porcupine. Porcupine is free, open source, and released under the GNU General Public License v3.0.",2018-05-10 +26998997,miTALOS v2: Analyzing Tissue Specific microRNA Function.,"MicroRNAs are involved in almost all biological processes and have emerged as regulators of signaling pathways. We show that miRNA target genes and pathway genes are not uniformly expressed across human tissues. To capture tissue specific effects, we developed a novel methodology for tissue specific pathway analysis of miRNAs. We incorporated the most recent and highest quality miRNA targeting data (TargetScan and StarBase), RNA-seq based gene expression data (EBI Expression Atlas) and multiple new pathway data sources to increase the biological relevance of the predicted miRNA-pathway associations. We identified new potential roles of miR-199a-3p, miR-199b-3p and the miR-200 family in hepatocellular carcinoma, involving the regulation of metastasis through MAPK and Wnt signaling. Also, an association of miR-571 and Notch signaling in liver fibrosis was proposed. To facilitate data update and future extensions of our tool, we developed a flexible database backend using the graph database neo4j. The new backend as well as the novel methodology were included in the updated miTALOS v2, a tool that provides insights into tissue specific miRNA regulation of biological pathways. miTALOS v2 is available at http://mips.helmholtz-muenchen.de/mitalos.",2016-03-21 +23203883,Facing growth in the European Nucleotide Archive.,"The European Nucleotide Archive (ENA; http://www.ebi.ac.uk/ena/) collects, maintains and presents comprehensive nucleic acid sequence and related information as part of the permanent public scientific record. Here, we provide brief updates on ENA content developments and major service enhancements in 2012 and describe in more detail two important areas of development and policy that are driven by ongoing growth in sequencing technologies. First, we describe the ENA data warehouse, a resource for which we provide a programmatic entry point to integrated content across the breadth of ENA. Second, we detail our plans for the deployment of CRAM data compression technology in ENA.",2012-11-29 +31124809,Using the Core Competencies for New Physician Assistant Graduates to Prioritize Admission Criteria for PA Practice in 2025.,"In a fast-changing medical and educational environment, it is incumbent upon the physician assistant (PA) education community to periodically consider what the future practice environment might look like for our graduates. Changes in technology, regulation, reimbursement, health system economics, and health care delivery are among the many forces shaping the practice environment of the future. The 2018 Physician Assistant Education Association (PAEA) Presidents Commission reflected on what PA practice might look like in 2025 and used the Association's Core Competencies for New PA Graduates to consider what characteristics might therefore be required of the PA graduates who will practice in this future. We postulate that the future PA practice environment will require enhanced skills in such areas as interpreting technology-driven clinical data for patients and practices, consulting effectively with increasingly specialized members of health care teams, understanding population health and predictive analytics, and knowing how to access and critically assess new medical information. Working backward, we identify certain noncognitive attributes that will likely need to be prioritized in our admission processes and suggest some tools that can be used to assess them. These attributes include ethical responsibility, communication, critical thinking, situational judgment, and professionalism. As with all Presidents Commission articles, this piece is intended primarily to stimulate thought, dialogue, and future research. We encourage all faculty to participate in this dialogue, through the new PAEA Digital Learning Hub (https://paealearning.org/learn/digital-learning-hub/) and other channels.",2019-06-01 +30335143,Disease gene identification based on generic and disease-specific genome networks.,"

Summary

Immune diseases have a strong genetic component with Mendelian patterns of inheritance. While the tight association has been a major understanding in the underlying pathophysiology for the category of immune diseases, the common features of these diseases remain unclear. Based on the potential commonality among immune genes, we design Gene Ranker for key gene identification. Gene Ranker is a network-based gene scoring algorithm that initially constructs a backbone network based on protein interactions. Patient gene expression networks are added into the network. An add-on process screens the networks of weighted gene co-expression network analysis (WGCNA) on the samples of immune patients. Gene Ranker is disease-specific; however, any WGCNA network that passes the screening procedure can be added on. With the constructed network, it employs the semi-supervised learning for gene scoring.

Results

The proposed method was applied to immune diseases. Based on the resulting scores, Gene Ranker identified potential key genes in immune diseases. In scoring validation, an average area under the receiver operating characteristic curve of 0.82 was achieved, which is a significant increase from the reference average of 0.76. Highly ranked genes were verified through retrieval and review of 27 million PubMed literatures. As a typical case, 20 potential key genes in rheumatoid arthritis were identified: 10 were de facto genes and the remaining were novel.

Availability and implementation

Gene Ranker is available at http://www.alphaminers.net/GeneRanker/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-06-01 +23161692,SchistoDB: an updated genome resource for the three key schistosomes of humans.,"The new release of SchistoDB (http://SchistoDB.net) provides a rich resource of genomic data for key blood flukes (genus Schistosoma) which cause disease in hundreds of millions of people worldwide. SchistoDB integrates whole-genome sequence and annotation of three species of the genus and provides enhanced bioinformatics analyses and data-mining tools. A simple, yet comprehensive web interface provided through the Strategies Web Development Kit is available for the mining and visualization of the data. Genomic scale data can be queried based on BLAST searches, annotation keywords and gene ID searches, gene ontology terms, sequence motifs, protein characteristics and phylogenetic relationships. Search strategies can be saved within a user's profile for future retrieval and may also be shared with other researchers using a unique web address.",2012-11-17 +27465131,PvTFDB: a Phaseolus vulgaris transcription factors database for expediting functional genomics in legumes. ,"The common bean [Phaseolus vulgaris (L.)] is one of the essential proteinaceous vegetables grown in developing countries. However, its production is challenged by low yields caused by numerous biotic and abiotic stress conditions. Regulatory transcription factors (TFs) symbolize a key component of the genome and are the most significant targets for producing stress tolerant crop and hence functional genomic studies of these TFs are important. Therefore, here we have constructed a web-accessible TFs database for P. vulgaris, called PvTFDB, which contains 2370 putative TF gene models in 49 TF families. This database provides a comprehensive information for each of the identified TF that includes sequence data, functional annotation, SSRs with their primer sets, protein physical properties, chromosomal location, phylogeny, tissue-specific gene expression data, orthologues, cis-regulatory elements and gene ontology (GO) assignment. Altogether, this information would be used in expediting the functional genomic studies of a specific TF(s) of interest. The objectives of this database are to understand functional genomics study of common bean TFs and recognize the regulatory mechanisms underlying various stress responses to ease breeding strategy for variety production through a couple of search interfaces including gene ID, functional annotation and browsing interfaces including by family and by chromosome. This database will also serve as a promising central repository for researchers as well as breeders who are working towards crop improvement of legume crops. In addition, this database provide the user unrestricted public access and the user can download entire data present in the database freely.Database URL: http://www.multiomics.in/PvTFDB/.",2016-07-27 +26740527,UCSC Data Integrator and Variant Annotation Integrator.,"

Unlabelled

Two new tools on the UCSC Genome Browser web site provide improved ways of combining information from multiple datasets, optionally including the user's own custom track data and/or data from track hubs. The Data Integrator combines columns from multiple data tracks, showing all items from the first track along with overlapping items from the other tracks. The Variant Annotation Integrator is tailored to adding functional annotations to variant calls; it offers a more restricted set of underlying data tracks but adds predictions of each variant's consequences for any overlapping or nearby gene transcript. When available, it optionally adds additional annotations including effect prediction scores from dbNSFP for missense mutations, ENCODE regulatory summary tracks and conservation scores.

Availability and implementation

The web tools are freely available at http://genome.ucsc.edu/ and the underlying database is available for download at http://hgdownload.cse.ucsc.edu/ The software (written in C and Javascript) is available from https://genome-store.ucsc.edu/ and is freely available for academic and non-profit usage; commercial users must obtain a license.

Contact

angie@soe.ucsc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-06 +,"Development of a global hybrid forest mask through the synergy of remote sensing, crowdsourcing and FAO statistics","A number of global and regional maps of forest extent are available, but when compared spatially, there are large areas of disagreement. Moreover, there is currently no global forest map that is consistent with forest statistics from FAO (Food and Agriculture Organization of the United Nations). By combining these diverse data sources into a single forest cover product, it is possible to produce a global forest map that is more accurate than the individual input layers and to produce a map that is consistent with FAO statistics. In this paper we applied geographically weighted regression (GWR) to integrate eight different forest products into three global hybrid forest cover maps at a 1km resolution for the reference year 2000. Input products included global land cover and forest maps at varying resolutions from 30m to 1km, mosaics of regional land use/land cover products where available, and the MODIS Vegetation Continuous Fields product. The GWR was trained using crowdsourced data collected via the Geo-Wiki platform and the hybrid maps were then validated using an independent dataset collected via the same system. Three different hybrid maps were produced: two consistent with FAO statistics, one at the country and one at the regional level, and a “best guess” forest cover map that is independent of FAO. Independent validation showed that the “best guess” hybrid product had the best overall accuracy of 93% when compared with the individual input datasets. The global hybrid forest cover maps are available at http://biomass.geo-wiki.org.",2015-06-01 +29697366,Fuzzy set-based generalized multifactor dimensionality reduction analysis of gene-gene interactions.,"BACKGROUND:Gene-gene interactions (GGIs) are a known cause of missing heritability. Multifactor dimensionality reduction (MDR) is one of most commonly used methods for GGI detection. The generalized multifactor dimensionality reduction (GMDR) method is an extension of MDR method that is applicable to various types of traits, and allows covariate adjustments. Our previous Fuzzy MDR (FMDR) is another extension for overcoming simple binary classification. FMDR uses continuous member-ship values instead of binary membership values 0 and 1, improving power for detecting causal SNPs and more intuitive interpretations in real data analysis. Here, we propose the fuzzy generalized multifactor dimensionality reduction (FGMDR) method, as a combined analysis of fuzzy set-based analysis and GMDR method, to detect GGIs associated with diseases using fuzzy set theory. RESULTS:Through simulation studies for different types of traits, the proposed FGMDR showed a higher detection ratio of causal SNPs, compared to GMDR. We then applied FGMDR to two real data: Crohn's disease (CD) data from the Wellcome Trust Case Control Consortium (WTCCC) with a binary phenotype and the Homeostasis Model Assessment of Insulin Resistance (HOMA-IR) data from Korean population with a continuous phenotype. The interactions derived by our method include the pre-reported interactions associated with phenotypes. CONCLUSIONS:The proposed FGMDR performs well for GGI detection with covariate adjustments. The program written in R for FGMDR is available at http://statgen.snu.ac.kr/software/FGMDR .",2018-04-20 +27043825,"CPAD, Curated Protein Aggregation Database: A Repository of Manually Curated Experimental Data on Protein and Peptide Aggregation.","Accurate distinction between peptide sequences that can form amyloid-fibrils or amorphous β-aggregates, identification of potential aggregation prone regions in proteins, and prediction of change in aggregation rate of a protein upon mutation(s) are critical to research on protein misfolding diseases, such as Alzheimer's and Parkinson's, as well as biotechnological production of protein based therapeutics. We have developed a Curated Protein Aggregation Database (CPAD), which has collected results from experimental studies performed by scientific community aimed at understanding protein/peptide aggregation. CPAD contains more than 2300 experimentally observed aggregation rates upon mutations in known amyloidogenic proteins. Each entry includes numerical values for the following parameters: change in rate of aggregation as measured by fluorescence intensity or turbidity, name and source of the protein, Uniprot and Protein Data Bank codes, single point as well as multiple mutations, and literature citation. The data in CPAD has been supplemented with five different types of additional information: (i) Amyloid fibril forming hexa-peptides, (ii) Amorphous β-aggregating hexa-peptides, (iii) Amyloid fibril forming peptides of different lengths, (iv) Amyloid fibril forming hexa-peptides whose crystal structures are available in the Protein Data Bank (PDB) and (v) Experimentally validated aggregation prone regions found in amyloidogenic proteins. Furthermore, CPAD is linked to other related databases and resources, such as Uniprot, Protein Data Bank, PUBMED, GAP, TANGO, WALTZ etc. We have set up a web interface with different search and display options so that users have the ability to get the data in multiple ways. CPAD is freely available at http://www.iitm.ac.in/bioinfo/CPAD/. The potential applications of CPAD have also been discussed.",2016-04-04 +24885229,A comprehensive assessment of the transcriptome of cork oak (Quercus suber) through EST sequencing.,"

Background

Cork oak (Quercus suber) is one of the rare trees with the ability to produce cork, a material widely used to make wine bottle stoppers, flooring and insulation materials, among many other uses. The molecular mechanisms of cork formation are still poorly understood, in great part due to the difficulty in studying a species with a long life-cycle and for which there is scarce molecular/genomic information. Cork oak forests are of great ecological importance and represent a major economic and social resource in Southern Europe and Northern Africa. However, global warming is threatening the cork oak forests by imposing thermal, hydric and many types of novel biotic stresses. Despite the economic and social value of the Q. suber species, few genomic resources have been developed, useful for biotechnological applications and improved forest management.

Results

We generated in excess of 7 million sequence reads, by pyrosequencing 21 normalized cDNA libraries derived from multiple Q. suber tissues and organs, developmental stages and physiological conditions. We deployed a stringent sequence processing and assembly pipeline that resulted in the identification of ~159,000 unigenes. These were annotated according to their similarity to known plant genes, to known Interpro domains, GO classes and E.C. numbers. The phylogenetic extent of this ESTs set was investigated, and we found that cork oak revealed a significant new gene space that is not covered by other model species or EST sequencing projects. The raw data, as well as the full annotated assembly, are now available to the community in a dedicated web portal at http://www.corkoakdb.org.

Conclusions

This genomic resource represents the first trancriptome study in a cork producing species. It can be explored to develop new tools and approaches to understand stress responses and developmental processes in forest trees, as well as the molecular cascades underlying cork differentiation and disease response.",2014-05-15 +26888907,Ensembl regulation resources. ,"New experimental techniques in epigenomics allow researchers to assay a diversity of highly dynamic features such as histone marks, DNA modifications or chromatin structure. The study of their fluctuations should provide insights into gene expression regulation, cell differentiation and disease. The Ensembl project collects and maintains the Ensembl regulation data resources on epigenetic marks, transcription factor binding and DNA methylation for human and mouse, as well as microarray probe mappings and annotations for a variety of chordate genomes. From this data, we produce a functional annotation of the regulatory elements along the human and mouse genomes with plans to expand to other species as data becomes available. Starting from well-studied cell lines, we will progressively expand our library of measurements to a greater variety of samples. Ensembl's regulation resources provide a central and easy-to-query repository for reference epigenomes. As with all Ensembl data, it is freely available at http://www.ensembl.org, from the Perl and REST APIs and from the public Ensembl MySQL database server at ensembldb.ensembl.org. Database URL: http://www.ensembl.org.",2016-02-17 +25378301,miRDB: an online resource for microRNA target prediction and functional annotations.,"MicroRNAs (miRNAs) are small non-coding RNAs that are extensively involved in many physiological and disease processes. One major challenge in miRNA studies is the identification of genes regulated by miRNAs. To this end, we have developed an online resource, miRDB (http://mirdb.org), for miRNA target prediction and functional annotations. Here, we describe recently updated features of miRDB, including 2.1 million predicted gene targets regulated by 6709 miRNAs. In addition to presenting precompiled prediction data, a new feature is the web server interface that allows submission of user-provided sequences for miRNA target prediction. In this way, users have the flexibility to study any custom miRNAs or target genes of interest. Another major update of miRDB is related to functional miRNA annotations. Although thousands of miRNAs have been identified, many of the reported miRNAs are not likely to play active functional roles or may even have been falsely identified as miRNAs from high-throughput studies. To address this issue, we have performed combined computational analyses and literature mining, and identified 568 and 452 functional miRNAs in humans and mice, respectively. These miRNAs, as well as associated functional annotations, are presented in the FuncMir Collection in miRDB.",2014-11-05 +27570672,Using a Novel Ontology to Inform the Discovery of Therapeutic Peptides from Animal Venoms.,"Venoms and venom-derived compounds constitute a rich and largely unexplored source of potentially therapeutic compounds. To facilitate biomedical research, it is necessary to design a robust informatics infrastructure that will allow semantic computation of venom concepts in a standardized, consistent manner. We have designed an ontology of venom-related concepts - named Venom Ontology - that reuses an existing public data source: UniProt's Tox-Prot database. In addition to describing the ontology and its construction, we have performed three separate case studies demonstrating its utility: (1) An exploration of venom peptide similarity networks within specific genera; (2) A broad overview of the distribution of available data among common taxonomic groups spanning the known tree of life; and (3) An analysis of the distribution of venom complexity across those same taxonomic groups. Venom Ontology is publicly available on BioPortal at http://bioportal.bioontology.org/ontologies/CU-VO.",2016-07-20 +25032988,Horizontal integration of OMIM across the medical school preclinical curriculum for early reinforcement of clinical genetics principles.,"

Purpose

With the relentless expansion of genetics into every field of medicine, stronger preclinical and clinical medical student education in genetics is needed. The explosion of genetic information cannot be addressed by simply adding content hours. We proposed that students be provided a tool to access accurate clinical information on genetic conditions and, through this tool, build life-long learning habits to carry them through their medical careers.

Methods

Surveys conducted at the Johns Hopkins University School of Medicine revealed that medical students in all years lacked confidence when approaching genetic conditions and lacked a reliable resource for accurate genetic information. In response, the school created a horizontal thread that stretches across the first-year curriculum and is devoted to teaching students how to use Online Mendelian Inheritance in Man (OMIM) (http://omim.org) and the databases to which it links as a starting point for approaching genetic conditions.

Results

The thread improved the first-year students' confidence in clinical genetics concepts and encouraged use of OMIM as a primary source for genetic information. Most students showed confidence in OMIM as a learning tool and wanted to see the thread repeated in subsequent years.

Conclusion

Incorporating OMIM into the preclinical curriculum improved students' confidence in clinical genetics concepts.",2014-07-17 +29718427,iPath3.0: interactive pathways explorer v3.,"iPath3.0 (http://pathways.embl.de) is a web-application for the visualization and analysis of cellular pathways. It is freely available and open to everyone. Currently it is based on four KEGG global maps, which summarize up to 158 traditional KEGG pathway maps, 192 KEGG modules and other metabolic elements into one connected and manually curated metabolic network. Users can fully customize these networks and interactively explore them through its redesigned, fast and lightweight interface, which highlights general metabolic trends in multi-omics data. It also offers navigation at various levels of details to help users further investigate those trends and ultimately uncover novel biological insights. Support for multiple experimental conditions and time-series datasets, tools for generation of customization data, programmatic access, and a free user accounts system were introduced in this version to further streamline its workflow.",2018-07-01 +27797540,Child behavior and sibling relationship quality: A cross-lagged analysis.,"Bidirectional associations between sibling relationships and children's problem behaviors are robust, and links with prosocial behavior have also been reported. Using cross-lagged models, we were able to conservatively test temporal directions of links between positive and negative aspects of sibling relationships and children's prosocial behavior and conduct problems across a 3-year time span in middle childhood. The Avon Longitudinal Study of Parents and Children (ALSPAC; http://www.bristol.ac.uk/alspac/researchers/data-access/data-dictionary/) is an ongoing population-based study designed to investigate the effects of a wide range of factors on children's health and development. For the purposes of the current analyses, we included 2,043 ALSPAC families who had just 1 older sibling as well as the target child, with an age gap of no more than 5 years. Mothers reported about the quality of the sibling relationship and both children's prosocial behavior and conduct problems when the target child was 4 years of age and again when the target child was 7 years old. Confirming our hypothesis, individual child behavior was predictive of sibling relationship quality, and sibling relationship quality was predictive of later child behavior, providing robust evidence of bidirectionality for both prosocial behavior and conduct problems. It would be consistent to expect that an improvement in either sibling relationship quality or individual children's behavior could have a positive spill over effect. We also found evidence of older sibling dominance in the domain of prosocial behavior and the positive aspects of sibling interaction. (PsycINFO Database Record",2016-10-31 +28053168,ccNET: Database of co-expression networks with functional modules for diploid and polyploid Gossypium.,"Plant genera with both diploid and polyploid species are a common evolutionary occurrence. Polyploids, especially allopolyploids such as cotton and wheat, are a great model system for heterosis research. Here, we have integrated genome sequences and transcriptome data of Gossypium species to construct co-expression networks and identified functional modules from different cotton species, including 1155 and 1884 modules in G. arboreum and G. hirsutum, respectively. We overlayed the gene expression results onto the co-expression network. We further provided network comparison analysis for orthologous genes across the diploid and allotetraploid Gossypium We also constructed miRNA-target networks and predicted PPI networks for both cotton species. Furthermore, we integrated in-house ChIP-seq data of histone modification (H3K4me3) together with cis-element analysis and gene sets enrichment analysis tools for studying possible gene regulatory mechanism in Gossypium species. Finally, we have constructed an online ccNET database (http://structuralbiology.cau.edu.cn/gossypium) for comparative gene functional analyses at a multi-dimensional network and epigenomic level across diploid and polyploid Gossypium species. The ccNET database will be beneficial for community to yield novel insights into gene/module functions during cotton development and stress response, and might be useful for studying conservation and diversity in other polyploid plants, such as T. aestivum and Brassica napus.",2016-10-07 +30923135,Diploid Genome Assembly of the Wine Grape Carménère.,"In this genome report, we describe the sequencing and annotation of the genome of the wine grape Carménère (clone 02, VCR-702). Long considered extinct, this old French wine grape variety is now cultivated mostly in Chile where it was imported in the 1850s just before the European phylloxera epidemic. Genomic DNA was sequenced using Single Molecule Real Time technology and assembled with FALCON-Unzip, a diploid-aware assembly pipeline. To optimize the contiguity and completeness of the assembly, we tested about a thousand combinations of assembly parameters, sequencing coverage, error correction and repeat masking methods. The final scaffolds provide a complete and phased representation of the diploid genome of this wine grape. Comparison of the two haplotypes revealed numerous heterozygous variants, including loss-of-function ones, some of which in genes associated with polyphenol biosynthesis. Comparisons with other publicly available grape genomes and transcriptomes showed the impact of structural variation on gene content differences between Carménère and other wine grape cultivars. Among the putative cultivar-specific genes, we identified genes potentially involved in aroma production and stress responses. The genome assembly of Carménère expands the representation of the genomic variability in grapes and will enable studies that aim to understand its distinctive organoleptic and agronomical features and assess its still elusive extant genetic variability. A genome browser for Carménère, its annotation, and an associated blast tool are available at http://cantulab.github.io/data.",2019-05-07 +,"Characterization of the genome and transcriptome of the blue tit Cyanistes caeruleus: polymorphisms, sex‐biased expression and selection signals","Decoding genomic sequences and determining their variation within populations has potential to reveal adaptive processes and unravel the genetic basis of ecologically relevant trait variation within a species. The blue tit Cyanistes caeruleus – a long‐time ecological model species – has been used to investigate fitness consequences of variation in mating and reproductive behaviour. However, very little is known about the underlying genetic changes due to natural and sexual selection in the genome of this songbird. As a step to bridge this gap, we assembled the first draft genome of a single blue tit, mapped the transcriptome of five females and five males to this reference, identified genomewide variants and performed sex‐differential expression analysis in the gonads, brain and other tissues. In the gonads, we found a high number of sex‐biased genes, and of those, a similar proportion were sex‐limited (genes only expressed in one sex) in males and females. However, in the brain, the proportion of female‐limited genes within the female‐biased gene category (82%) was substantially higher than the proportion of male‐limited genes within the male‐biased category (6%). This suggests a predominant on‐off switching mechanism for the female‐limited genes. In addition, most male‐biased genes were located on the Z‐chromosome, indicating incomplete dosage compensation for the male‐biased genes. We called more than 500 000 SNPs from the RNA‐seq data. Heterozygote detection in the single reference individual was highly congruent between DNA‐seq and RNA‐seq calling. Using information from these polymorphisms, we identified potential selection signals in the genome. We list candidate genes which can be used for further sequencing and detailed selection studies, including genes potentially related to meiotic drive evolution. A public genome browser of the blue tit with the described information is available at http://public-genomes-ngs.molgen.mpg.de.",2016-03-01 +29949959,COSSMO: predicting competitive alternative splice site selection using deep learning.,"

Motivation

Alternative splice site selection is inherently competitive and the probability of a given splice site to be used also depends on the strength of neighboring sites. Here, we present a new model named the competitive splice site model (COSSMO), which explicitly accounts for these competitive effects and predicts the percent selected index (PSI) distribution over any number of putative splice sites. We model an alternative splicing event as the choice of a 3' acceptor site conditional on a fixed upstream 5' donor site or the choice of a 5' donor site conditional on a fixed 3' acceptor site. We build four different architectures that use convolutional layers, communication layers, long short-term memory and residual networks, respectively, to learn relevant motifs from sequence alone. We also construct a new dataset from genome annotations and RNA-Seq read data that we use to train our model.

Results

COSSMO is able to predict the most frequently used splice site with an accuracy of 70% on unseen test data, and achieve an R2 of 0.6 in modeling the PSI distribution. We visualize the motifs that COSSMO learns from sequence and show that COSSMO recognizes the consensus splice site sequences and many known splicing factors with high specificity.

Availability and implementation

Model predictions, our training dataset, and code are available from http://cossmo.genes.toronto.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +28789374,Integrated analysis of differential expression and alternative splicing of non-small cell lung cancer based on RNA sequencing.,"Non-small cell lung cancer (NSCLC) is the most common type of lung cancer, with high morbidity and mortality rates. Numerous diagnosis and treatment methods have been proposed, and the prognosis of NSCLC has improved to a certain extent. However, the mechanisms of NSCLC remain largely unknown, and additional studies are required. In the present study, the RNA sequencing dataset of NSCLC was downloaded from the Gene Expression Omnibus (http://www.ncbi.nlm.nih.gov/geo/). The clean reads obtained from the raw data were mapped to the University of California Santa Cruz human genome (hg19), based on TopHat, and were assembled into transcripts via Cufflink. The differential expression (DE) and differential alternative splicing (DAS) genes were screened out through Cuffdiff and rMATS, respectively. The significantly enriched gene ontology (GO) terms and Kyoto Encyclopedia of Genes and Genomes pathways were obtained through the Database of Annotation, Visualization and Integrated Discovery (DAVID). Different numbers of DE and DAS genes were identified in different types of NSCLC samples, but a number of common functions and pathways were obtained, including biological processes associated with abnormal immune and cell activity. GO terms and pathways associated with substance metabolism, including the insulin signaling pathway and oxidative phosphorylation, were enriched in DAS genes rather than DE genes. Integrated analysis of differential expression and alternative splicing may be helpful in understanding the mechanisms of NSCLC, in addition to its early diagnosis and treatment.",2017-06-02 +27907889,denovo-db: a compendium of human de novo variants.,"Whole-exome and whole-genome sequencing have facilitated the large-scale discovery of de novo variants in human disease. To date, most de novo discovery through next-generation sequencing focused on congenital heart disease and neurodevelopmental disorders (NDDs). Currently, de novo variants are one of the most significant risk factors for NDDs with a substantial overlap of genes involved in more than one NDD. To facilitate better usage of published data, provide standardization of annotation, and improve accessibility, we created denovo-db (http://denovo-db.gs.washington.edu), a database for human de novo variants. As of July 2016, denovo-db contained 40 different studies and 32,991 de novo variants from 23,098 trios. Database features include basic variant information (chromosome location, change, type); detailed annotation at the transcript and protein levels; severity scores; frequency; validation status; and, most importantly, the phenotype of the individual with the variant. We included a feature on our browsable website to download any query result, including a downloadable file of the full database with additional variant details. denovo-db provides necessary information for researchers to compare their data to other individuals with the same phenotype and also to controls allowing for a better understanding of the biology of de novo variants and their contribution to disease.",2016-10-05 +30445438,Characteristics of plant circular RNAs. ,"Circular RNA (circRNA) is a kind of covalently closed single-stranded RNA molecules that have been proved to play important roles in transcriptional regulation of genes in diverse species. With the rapid development of bioinformatics tools, a huge number (95143) of circRNAs have been identified from different plant species, providing an opportunity for uncovering the overall characteristics of plant circRNAs. Here, based on publicly available circRNAs, we comprehensively analyzed characteristics of plant circRNAs with the help of various bioinformatics tools as well as in-house scripts and workflows, including the percentage of coding genes generating circRNAs, the frequency of alternative splicing events of circRNAs, the non-canonical splicing signals of circRNAs and the networks involving circRNAs, miRNAs and mRNAs. All this information has been integrated into an upgraded online database, PlantcircBase 3.0 (http://ibi.zju.edu.cn/plantcircbase/). In this database, we provided browse, search and visualization tools as well as a web-based blast tool, BLASTcirc, for prediction of circRNAs from query sequences based on searching against plant genomes and transcriptomes.",2018-11-15 +29950003,Driver gene mutations based clustering of tumors: methods and applications.,"

Motivation

Somatic mutations in proto-oncogenes and tumor suppressor genes constitute a major category of causal genetic abnormalities in tumor cells. The mutation spectra of thousands of tumors have been generated by The Cancer Genome Atlas (TCGA) and other whole genome (exome) sequencing projects. A promising approach to utilizing these resources for precision medicine is to identify genetic similarity-based sub-types within a cancer type and relate the pinpointed sub-types to the clinical outcomes and pathologic characteristics of patients.

Results

We propose two novel methods, ccpwModel and xGeneModel, for mutation-based clustering of tumors. In the former, binary variables indicating the status of cancer driver genes in tumors and the genes' involvement in the core cancer pathways are treated as the features in the clustering process. In the latter, the functional similarities of putative cancer driver genes and their confidence scores as the 'true' driver genes are integrated with the mutation spectra to calculate the genetic distances between tumors. We apply both methods to the TCGA data of 16 cancer types. Promising results are obtained when these methods are compared to state-of-the-art approaches as to the associations between the determined tumor clusters and patient race (or survival time). We further extend the analysis to detect mutation-characterized transcriptomic prognostic signatures, which are directly relevant to the etiology of carcinogenesis.

Availability and implementation

R codes and example data for ccpwModel and xGeneModel can be obtained from http://webusers.xula.edu/kzhang/ISMB2018/ccpw_xGene_software.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +29726965,PBRpredict-Suite: a suite of models to predict peptide-recognition domain residues from protein sequence.,"Motivation:Machine learning plays a substantial role in bioscience owing to the explosive growth in sequence data and the challenging application of computational methods. Peptide-recognition domains (PRDs) are critical as they promote coupled-binding with short peptide-motifs of functional importance through transient interactions. It is challenging to build a reliable predictor of peptide-binding residue in proteins with diverse types of PRDs from protein sequence alone. On the other hand, it is vital to cope up with the sequencing speed and to broaden the scope of study. Results:In this paper, we propose a machine-learning-based tool, named PBRpredict, to predict residues in peptide-binding domains from protein sequence alone. To develop a generic predictor, we train the models on peptide-binding residues of diverse types of domains. As inputs to the models, we use a high-dimensional feature set of chemical, structural and evolutionary information extracted from protein sequence. We carefully investigate six different state-of-the-art classification algorithms for this application. Finally, we use the stacked generalization approach to non-linearly combine a set of complementary base-level learners using a meta-level learner which outperformed the winner-takes-all approach. The proposed predictor is found competitive based on statistical evaluation. Availability and implementation:PBRpredict-Suite software: http://cs.uno.edu/~tamjid/Software/PBRpredict/pbrpredict-suite.zip. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +29669011,AnnotSV: an integrated tool for structural variations annotation.,"Summary:Structural Variations (SV) are a major source of variability in the human genome that shaped its actual structure during evolution. Moreover, many human diseases are caused by SV, highlighting the need to accurately detect those genomic events but also to annotate them and assist their biological interpretation. Therefore, we developed AnnotSV that compiles functionally, regulatory and clinically relevant information and aims at providing annotations useful to (i) interpret SV potential pathogenicity and (ii) filter out SV potential false positive. In particular, AnnotSV reports heterozygous and homozygous counts of single nucleotide variations (SNVs) and small insertions/deletions called within each SV for the analyzed patients, this genomic information being extremely useful to support or question the existence of an SV. We also report the computed allelic frequency relative to overlapping variants from DGV (MacDonald et al., 2014), that is especially powerful to filter out common SV. To delineate the strength of AnnotSV, we annotated the 4751 SV from one sample of the 1000 Genomes Project, integrating the sample information of four million of SNV/indel, in less than 60 s. Availability and implementation:AnnotSV is implemented in Tcl and runs in command line on all platforms. The source code is available under the GNU GPL license. Source code, README and Supplementary data are available at http://lbgi.fr/AnnotSV/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +27888793,InverPep: A database of invertebrate antimicrobial peptides.,"

Objectives

The aim of this work was to construct InverPep, a database specialised in experimentally validated antimicrobial peptides (AMPs) from invertebrates.

Methods

AMP data contained in InverPep were manually curated from other databases and the scientific literature. MySQL was integrated with the development platform Laravel; this framework allows to integrate programming in PHP with HTML and was used to design the InverPep web page's interface. InverPep contains 18 separated fields, including InverPep code, phylum and species source, peptide name, sequence, peptide length, secondary structure, molar mass, charge, isoelectric point, hydrophobicity, Boman index, aliphatic index and percentage of hydrophobic amino acids. CALCAMPI, an algorithm to calculate the physicochemical properties of multiple peptides simultaneously, was programmed in PERL language.

Results

To date, InverPep contains 702 experimentally validated AMPs from invertebrate species. All of the peptides contain information associated with their source, physicochemical properties, secondary structure, biological activity and links to external literature. Most AMPs in InverPep have a length between 10 and 50 amino acids, a positive charge, a Boman index between 0 and 2 kcal/mol, and 30-50% hydrophobic amino acids. InverPep includes 33 AMPs not reported in other databases. Besides, CALCAMPI and statistical analysis of InverPep data is presented. The InverPep database is available in English and Spanish.

Conclusions

InverPep is a useful database to study invertebrate AMPs and its information could be used for the design of new peptides. The user-friendly interface of InverPep and its information can be freely accessed via a web-based browser at http://ciencias.medellin.unal.edu.co/gruposdeinvestigacion/prospeccionydisenobiomoleculas/InverPep/public/home_en.",2016-11-19 +27099553,The Analysis Portal and the Swedish LifeWatch e-infrastructure for biodiversity research.,"

Background

During the last years, more and more online portals were generated and are now available for ecologists to run advanced models with extensive data sets. Some examples are the Biodiversity Virtual e-Laboratory (BioVel) Portal (https://portal.biovel.eu) for ecological niche modelling and the Mobyle SNAP Workbench (https://snap.hpc.ncsu.edu) for evolutionary and population genetics analysis. Such portals have the main goal to facilitate the run of advanced models, through access to large-capacity computers or servers. In this study, we present the Analysis Portal (www.analysisportal.se), which is a part of the Swedish LifeWatch e-infrastructure for biodiversity research that combines a variety of Swedish web services to perform different kinds of dataprocessing.

New information

For the first time, the Swedish Analysis Portal for integrated analysis of species occurrence data is described in detail. It was launched in 2013 and today, over 60 Million Swedish species observation records can be assessed, visualized and analyzed via the portal. Datasets can be assembled using sophisticated filtering tools, and combined with environmental and climatic data from a wide range of providers. Different validation tools, for example the official Swedish taxon concept database Dyntaxa, ensure high data quality. Results can be downloaded in different formats as maps, tables, diagrams and reports.",2016-03-23 +29456555,A Bootstrap Based Measure Robust to the Choice of Normalization Methods for Detecting Rhythmic Features in High Dimensional Data.,"Motivation: Gene-expression data obtained from high throughput technologies are subject to various sources of noise and accordingly the raw data are pre-processed before formally analyzed. Normalization of the data is a key pre-processing step, since it removes systematic variations across arrays. There are numerous normalization methods available in the literature. Based on our experience, in the context of oscillatory systems, such as cell-cycle, circadian clock, etc., the choice of the normalization method may substantially impact the determination of a gene to be rhythmic. Thus rhythmicity of a gene can purely be an artifact of how the data were normalized. Since the determination of rhythmic genes is an important component of modern toxicological and pharmacological studies, it is important to determine truly rhythmic genes that are robust to the choice of a normalization method. Results: In this paper we introduce a rhythmicity measure and a bootstrap methodology to detect rhythmic genes in an oscillatory system. Although the proposed methodology can be used for any high-throughput gene expression data, in this paper we illustrate the proposed methodology using several publicly available circadian clock microarray gene-expression datasets. We demonstrate that the choice of normalization method has very little effect on the proposed methodology. Specifically, for any pair of normalization methods considered in this paper, the resulting values of the rhythmicity measure are highly correlated. Thus it suggests that the proposed measure is robust to the choice of a normalization method. Consequently, the rhythmicity of a gene is potentially not a mere artifact of the normalization method used. Lastly, as demonstrated in the paper, the proposed bootstrap methodology can also be used for simulating data for genes participating in an oscillatory system using a reference dataset. Availability: A user friendly code implemented in R language can be downloaded from http://www.eio.uva.es/~miguel/robustdetectionprocedure.html.",2018-02-02 +26849207,Integration of Multiple Genomic and Phenotype Data to Infer Novel miRNA-Disease Associations.,"MicroRNAs (miRNAs) play an important role in the development and progression of human diseases. The identification of disease-associated miRNAs will be helpful for understanding the molecular mechanisms of diseases at the post-transcriptional level. Based on different types of genomic data sources, computational methods for miRNA-disease association prediction have been proposed. However, individual source of genomic data tends to be incomplete and noisy; therefore, the integration of various types of genomic data for inferring reliable miRNA-disease associations is urgently needed. In this study, we present a computational framework, CHNmiRD, for identifying miRNA-disease associations by integrating multiple genomic and phenotype data, including protein-protein interaction data, gene ontology data, experimentally verified miRNA-target relationships, disease phenotype information and known miRNA-disease connections. The performance of CHNmiRD was evaluated by experimentally verified miRNA-disease associations, which achieved an area under the ROC curve (AUC) of 0.834 for 5-fold cross-validation. In particular, CHNmiRD displayed excellent performance for diseases without any known related miRNAs. The results of case studies for three human diseases (glioblastoma, myocardial infarction and type 1 diabetes) showed that all of the top 10 ranked miRNAs having no known associations with these three diseases in existing miRNA-disease databases were directly or indirectly confirmed by our latest literature mining. All these results demonstrated the reliability and efficiency of CHNmiRD, and it is anticipated that CHNmiRD will serve as a powerful bioinformatics method for mining novel disease-related miRNAs and providing a new perspective into molecular mechanisms underlying human diseases at the post-transcriptional level. CHNmiRD is freely available at http://www.bio-bigdata.com/CHNmiRD.",2016-02-05 +29204470,Survey data on cost and benefits of climate smart agricultural technologies in western Kenya.,"This paper describes data that were collected in three counties of western Kenya, namely Siaya, Bungoma, and Kakamega. The main aim of collecting the data was to assess the climate smartness, profitability and returns of soil protection and rehabilitation measures. The data were collected from 88 households. The households were selected using simple random sampling technique from a primary sampling frame of 180 farm households provided by the ministry of agriculture through the counties agricultural officers. The surveys were administered by trained research assistants using a structured questionnaire that was designed in Census and Survey Processing System (CSPro). Later, the data was exported to STATA version 14.1 for cleaning and management purposes. The data are hosted in an open source dataverse to allow other researchers generate new insights from the data (http://dx.doi.org/10.7910/DVN/K6JQXC).",2017-11-11 +26527722,2016 update of the PRIDE database and its related tools.,"The PRoteomics IDEntifications (PRIDE) database is one of the world-leading data repositories of mass spectrometry (MS)-based proteomics data. Since the beginning of 2014, PRIDE Archive (http://www.ebi.ac.uk/pride/archive/) is the new PRIDE archival system, replacing the original PRIDE database. Here we summarize the developments in PRIDE resources and related tools since the previous update manuscript in the Database Issue in 2013. PRIDE Archive constitutes a complete redevelopment of the original PRIDE, comprising a new storage backend, data submission system and web interface, among other components. PRIDE Archive supports the most-widely used PSI (Proteomics Standards Initiative) data standard formats (mzML and mzIdentML) and implements the data requirements and guidelines of the ProteomeXchange Consortium. The wide adoption of ProteomeXchange within the community has triggered an unprecedented increase in the number of submitted data sets (around 150 data sets per month). We outline some statistics on the current PRIDE Archive data contents. We also report on the status of the PRIDE related stand-alone tools: PRIDE Inspector, PRIDE Converter 2 and the ProteomeXchange submission tool. Finally, we will give a brief update on the resources under development 'PRIDE Cluster' and 'PRIDE Proteomes', which provide a complementary view and quality-scored information of the peptide and protein identification data available in PRIDE Archive.",2015-11-02 +28891848,Adjuvant Chemotherapy After Preoperative Chemoradiation Improves Survival in Patients With Locally Advanced Rectal Cancer.,"

Background

Practice guidelines differ in their support of adjuvant chemotherapy use in patients who received preoperative chemoradiation for rectal cancer.

Objective

The purpose of this study was to evaluate the impact of adjuvant chemotherapy among patients with locally advanced rectal cancer who received neoadjuvant chemoradiation and surgery.

Design

This was a retrospective study. Multivariable Cox proportional hazard modeling was used to evaluate the adjusted survival differences.

Settings

Data were collected from the National Cancer Database.

Patients

Adults with pathologic stage II and III rectal adenocarcinoma who received neoadjuvant chemoradiation and surgery were included.

Main outcome measures

Overall survival was measured.

Results

Among 12,696 patients included, 4023 (32%) received adjuvant chemotherapy. The use of adjuvant chemotherapy increased over the study period from 23% to 36%. Although older age and black race were associated with a lower likelihood of receiving adjuvant chemotherapy, patients with higher education level and stage III disease were more likely to receive adjuvant chemotherapy (all p < 0.05). At 7 years, overall survival was improved among patients who received adjuvant chemotherapy (60% vs. 55%; p < 0.001). After risk adjustment, the use of adjuvant chemotherapy was associated with improved survival (HR = 0.81 (95% CI, 0.72-0.91); p < 0.001). In the subgroup of patients with stage II disease, survival was also improved among patients who received adjuvant chemotherapy (68% vs 58% at 7 y; p < 0.001; HR = 0.70 (95% CI, 0.57-0.87); p = 0.002). Among patients with stage III disease, the use of adjuvant chemotherapy was associated with a smaller but persistent survival benefit (56% vs 51% at 7 y; p = 0.017; HR = 0.85 (95% CI, 0.74-0.98); p = 0.026).

Limitations

The study was limited by its potential for selection bias and inability to compare specific chemotherapy regimens.

Conclusions

The use of adjuvant chemotherapy among patients with rectal cancer who received preoperative chemoradiation conferred a survival benefit. This study emphasizes the importance of adjuvant chemotherapy in the management of rectal cancer and advocates for its increased use in the setting of neoadjuvant therapy. See Video Abstract at http://link.lww.com/DCR/A428.",2017-10-01 +30202931,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients With Thoracolumbar Spine Trauma: Hemodynamic Management.,"

Question

Does the active maintenance of arterial blood pressure after injury affect clinical outcomes in patients with thoracic and lumbar fractures?

Recommendations

There is insufficient evidence to recommend for or against the use of active maintenance of arterial blood pressure after thoracolumbar spinal cord injury. Grade of Recommendation: Grade Insufficient However, in light of published data from pooled (cervical and thoracolumbar) spinal cord injury patient populations, clinicians may choose to maintain mean arterial blood pressures >85 mm Hg in an attempt to improve neurological outcomes. Consensus Statement by the Workgroup The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_6.",2019-01-01 +23577215,Molecular Optical Simulation Environment (MOSE): a platform for the simulation of light propagation in turbid media.,"The study of light propagation in turbid media has attracted extensive attention in the field of biomedical optical molecular imaging. In this paper, we present a software platform for the simulation of light propagation in turbid media named the ""Molecular Optical Simulation Environment (MOSE)"". Based on the gold standard of the Monte Carlo method, MOSE simulates light propagation both in tissues with complicated structures and through free-space. In particular, MOSE synthesizes realistic data for bioluminescence tomography (BLT), fluorescence molecular tomography (FMT), and diffuse optical tomography (DOT). The user-friendly interface and powerful visualization tools facilitate data analysis and system evaluation. As a major measure for resource sharing and reproducible research, MOSE aims to provide freeware for research and educational institutions, which can be downloaded at http://www.mosetm.net.",2013-04-08 +27577934,Tiered Human Integrated Sequence Search Databases for Shotgun Proteomics.,"The results of analysis of shotgun proteomics mass spectrometry data can be greatly affected by the selection of the reference protein sequence database against which the spectra are matched. For many species there are multiple sources from which somewhat different sequence sets can be obtained. This can lead to confusion about which database is best in which circumstances-a problem especially acute in human sample analysis. All sequence databases are genome-based, with sequences for the predicted gene and their protein translation products compiled. Our goal is to create a set of primary sequence databases that comprise the union of sequences from many of the different available sources and make the result easily available to the community. We have compiled a set of four sequence databases of varying sizes, from a small database consisting of only the ∼20,000 primary isoforms plus contaminants to a very large database that includes almost all nonredundant protein sequences from several sources. This set of tiered, increasingly complete human protein sequence databases suitable for mass spectrometry proteomics sequence database searching is called the Tiered Human Integrated Search Proteome set. In order to evaluate the utility of these databases, we have analyzed two different data sets, one from the HeLa cell line and the other from normal human liver tissue, with each of the four tiers of database complexity. The result is that approximately 0.8%, 1.1%, and 1.5% additional peptides can be identified for Tiers 2, 3, and 4, respectively, as compared with the Tier 1 database, at substantially increasing computational cost. This increase in computational cost may be worth bearing if the identification of sequence variants or the discovery of sequences that are not present in the reviewed knowledge base entries is an important goal of the study. We find that it is useful to search a data set against a simpler database, and then check the uniqueness of the discovered peptides against a more complex database. We have set up an automated system that downloads all the source databases on the first of each month and automatically generates a new set of search databases and makes them available for download at http://www.peptideatlas.org/thisp/ .",2016-09-12 +31342429,Obtaining Soft Matter Models of Proteins and their Phase Behavior.,"Globular proteins are roughly spherical biomolecules with attractive and highly directional interactions. This microscopic observation motivates describing these proteins as patchy particles: hard spheres with attractive surface patches. Mapping a biomolecule to a patchy model requires simplifying effective protein-protein interactions, which in turn provides a microscopic understanding of the protein solution behavior. The patchy model can indeed be fully analyzed, including its phase diagram. In this chapter, we detail the methodology of mapping a given protein to a patchy model and of determining the phase diagram of the latter. We also briefly describe the theory upon which the methodology is based, provide practical information, and discuss potential pitfalls. Data and scripts relevant to this work have been archived and can be accessed at https://doi.org/10.7924/r4ww7bs1p .",2019-01-01 +30419809,WebNetCoffee: a web-based application to identify functionally conserved proteins from Multiple PPI networks.,"

Background

The discovery of functionally conserved proteins is a tough and important task in system biology. Global network alignment provides a systematic framework to search for these proteins from multiple protein-protein interaction (PPI) networks. Although there exist many web servers for network alignment, no one allows to perform global multiple network alignment tasks on users' test datasets.

Results

Here, we developed a web server WebNetcoffee based on the algorithm of NetCoffee to search for a global network alignment from multiple networks. To build a series of online test datasets, we manually collected 218,339 proteins, 4,009,541 interactions and many other associated protein annotations from several public databases. All these datasets and alignment results are available for download, which can support users to perform algorithm comparison and downstream analyses.

Conclusion

WebNetCoffee provides a versatile, interactive and user-friendly interface for easily running alignment tasks on both online datasets and users' test datasets, managing submitted jobs and visualizing the alignment results through a web browser. Additionally, our web server also facilitates graphical visualization of induced subnetworks for a given protein and its neighborhood. To the best of our knowledge, it is the first web server that facilitates the performing of global alignment for multiple PPI networks.

Availability

http://www.nwpu-bioinformatics.com/WebNetCoffee.",2018-11-12 +30367742,The Molybdenum Storage Protein: A soluble ATP hydrolysis-dependent molybdate pump.,"A continuous FeMo cofactor supply for nitrogenase maturation is ensured in Azotobacter vinelandii by developing a cage-like molybdenum storage protein (MoSto) capable to store ca. 120 molybdate molecules ( MoO 4 2 - ) as discrete polyoxometalate (POM) clusters. To gain mechanistic insight into this process, MoSto was characterized by Mo and ATP/ADP content, structural, and kinetic analysis. We defined three functionally relevant states specified by the presence of both ATP/ADP and POM clusters (MoStofunct ), of only ATP/ADP (MoStobasal ) and of neither ATP/ADP nor POM clusters (MoStozero ), respectively. POM clusters are only produced when ATP is hydrolyzed to ADP and phosphate. Vmax was ca. 13 μmolphosphate ·min-1 ·mg-1 and Km for molybdate and ATP/Mg2+ in the low micromolar range. ATP hydrolysis presumably proceeds at subunit α, inferred from a highly occupied α-ATP/Mg2+ and a weaker occupied β-ATP/no Mg2+ -binding site found in the MoStofunct structure. Several findings indicate that POM cluster storage is separated into a rapid ATP hydrolysis-dependent molybdate transport across the protein cage wall and a slow molybdate assembly induced by combined auto-catalytic and protein-driven processes. The cage interior, the location of the POM cluster depot, is locked in all three states and thus not rapidly accessible for molybdate from the outside. Based on Vmax , the entire Mo storage process should be completed in less than 10 s but requires, according to the molybdate content analysis, ca. 15 min. Long-time incubation of MoStobasal with nonphysiological high molybdate amounts implicates an equilibrium in and outside the cage and POM cluster self-formation without ATP hydrolysis. DATABASES: The crystal structures MoSto in the MoSto-F6, MoSto-F7, MoStobasal , MoStozero , and MoSto-F1vitro states were deposited to PDB under the accession numbers PDB 6GU5, 6GUJ, 6GWB, 6GWV, and 6GX4.",2018-11-12 +31179178,Detection of condition-specific marker genes from RNA-seq data with MGFR.,"The identification of condition-specific genes is key to advancing our understanding of cell fate decisions and disease development. Differential gene expression analysis (DGEA) has been the standard tool for this task. However, the amount of samples that modern transcriptomic technologies allow us to study, makes DGEA a daunting task. On the other hand, experiments with low numbers of replicates lack the statistical power to detect differentially expressed genes. We have previously developed MGFM, a tool for marker gene detection from microarrays, that is particularly useful in the latter case. Here, we have adapted the algorithm behind MGFM to detect markers in RNA-seq data. MGFR groups samples with similar gene expression levels and flags potential markers of a sample type if their highest expression values represent all replicates of this type. We have benchmarked MGFR against other methods and found that its proposed markers accurately characterize the functional identity of different tissues and cell types in standard and single cell RNA-seq datasets. Then, we performed a more detailed analysis for three of these datasets, which profile the transcriptomes of different human tissues, immune and human blastocyst cell types, respectively. MGFR's predicted markers were compared to gold-standard lists for these datasets and outperformed the other marker detectors. Finally, we suggest novel candidate marker genes for the examined tissues and cell types. MGFR is implemented as a freely available Bioconductor package (https://doi.org/doi:10.18129/B9.bioc.MGFR), which facilitates its use and integration with bioinformatics pipelines.",2019-05-27 +31039056,Associations between Maternal Tobacco Smoke Exposure and the Cord Blood [Formula: see text] DNA Methylome.,"

Background

Maternal tobacco smoke exposure has been associated with altered DNA methylation. However, previous studies largely used methylation arrays, which cover a small fraction of CpGs, and focused on whole cord blood.

Objectives

The current study examined the impact of in utero exposure to maternal tobacco smoke on the cord blood [Formula: see text] DNA methylome.

Methods

The methylomes of 20 Hispanic white newborns ([Formula: see text] exposed to any maternal tobacco smoke in pregnancy; [Formula: see text] unexposed) from the Maternal and Child Health Study (MACHS) were profiled by whole-genome bisulfite sequencing (median coverage: [Formula: see text]). Statistical analyses were conducted using the Regression Analysis of Differential Methylation (RADMeth) program because it performs well on low-coverage data (minimizes false positives and negatives).

Results

We found that 10,381 CpGs were differentially methylated by tobacco smoke exposure [neighbor-adjusted p-values that are additionally corrected for multiple testing based on the Benjamini-Hochberg method for controlling the false discovery rate (FDR) [Formula: see text]]. From these CpGs, RADMeth identified 557 differentially methylated regions (DMRs) that were overrepresented ([Formula: see text]) in important regulatory regions, including enhancers. Of nine DMRs that could be queried in a reduced representation bisulfite sequencing (RRBS) study of adult [Formula: see text] cells ([Formula: see text] smokers; [Formula: see text] nonsmokers), four replicated ([Formula: see text]). Additionally, a CpG in the promoter of SLC7A8 (percent methylation difference: [Formula: see text] comparing exposed to unexposed) replicated ([Formula: see text]) in an EPIC (Illumina) array study of cord blood [Formula: see text] cells ([Formula: see text] exposed to sustained maternal tobacco smoke; [Formula: see text] unexposed) and in a study of adult [Formula: see text] cells across two platforms (EPIC: [Formula: see text] smokers; [Formula: see text] nonsmokers; 450K: [Formula: see text] smokers; [Formula: see text] nonsmokers).

Conclusions

Maternal tobacco smoke exposure in pregnancy is associated with cord blood [Formula: see text] DNA methylation in key regulatory regions, including enhancers. While we used a method that performs well on low-coverage data, we cannot exclude the possibility that some results may be false positives. However, we identified a differentially methylated CpG in amino acid transporter SLC7A8 that is highly reproducible, which may be sensitive to cigarette smoke in both cord blood and adult [Formula: see text] cells. https://doi.org/10.1289/EHP3398.",2019-04-01 +30522270,Acoustic interactions for robot audition: A corpus of real auditory scenes.,"The Acoustic Interactions for Robot Audition corpus is introduced for research on sound source localization and separation, and for multi-user speech recognition. Its aim is to evaluate and train Robot Audition techniques, as well as Auditory Scene Analysis in general. It was recorded in six real-life environments with different noise presence and reverberation time, using two array configurations: an equilateral triangle, and a three-dimensional 16-microphone array set over a hollow plastic body. It includes clean speech data for static sources and tracking information for mobile sources. It is freely available at https://aira.iimas.unam.mx/.",2018-11-01 +30407534,The UCSC Genome Browser database: 2019 update.,"The UCSC Genome Browser (https://genome.ucsc.edu) is a graphical viewer for exploring genome annotations. For almost two decades, the Browser has provided visualization tools for genetics and molecular biology and continues to add new data and features. This year, we added a new tool that lets users interactively arrange existing graphing tracks into new groups. Other software additions include new formats for chromosome interactions, a ChIP-Seq peak display for track hubs and improved support for HGVS. On the annotation side, we have added gnomAD, TCGA expression, RefSeq Functional elements, GTEx eQTLs, CRISPR Guides, SNPpedia and created a 30-way primate alignment on the human genome. Nine assemblies now have RefSeq-mapped gene models.",2019-01-01 +31694759,"Medium-chain triglycerides improved cognition and lipid metabolomics in mild to moderate Alzheimer's disease patients with APOE4-/-: A double-blind, randomized, placebo-controlled crossover trial.","

Background

Previous clinical and animal studies suggested that medium-chain triglycerides (MCT) might be an alternative energy substrate for the brain and might benefit patients with Alzheimer's disease (AD), but the clinical evidence is not substantial or totally convincing.

Objective

To investigate the effects of MCT on cognitive ability in patients with mild to moderate AD and explore the changes in peripheral blood metabolomics.

Methods

A double-blind, randomized, placebo-controlled crossover study was undertaken in 53 mild to moderate AD patients. Participants were randomized between two sequences (placebo followed by MCT or MCT followed by placebo) and took MCT jelly or placebo jelly (canola oil) by mouth three times daily (total daily fat dose: 17.3 g MCT, or 19.7 g canola oil) for 30 days per phase. The primary outcome was cognition as measured by the Alzheimer's Disease Assessment Scale-Cognitive Subscale, Chinese version (ADAS-Cog-C). The secondary outcome was self-care as measured by the activities of daily living scale (ADL) and changes in plasma metabolites.

Results

This study showed a significant (p < 0.01) reduction in ADAS-Cog-C scores between the MCT (2.62 points below baseline) and placebo interventions (2.57 points above baseline). Data from 46 (86.8%) APOE4-/- subjects who completed the entire study were analyzed. Changes in ADL scores were not significantly different between the MCT and placebo interventions (p > 0.05). The concentrations of TC, HDL-C, β-hydroxybutyrate and acetoacetate were significantly higher in the MCT group than in the placebo group (p < 0.05). Lysophosphatidylcholine 16:0 (LysoPC (16:0)), LysoPC (P-18:0), LysoPC (P-18:1(9Z)), LysoPC (20:2(11Z,14Z)), and LysoPC (22:5(4Z,7Z,10Z,13Z,16Z)) were significantly increased after MCT intervention, and the concentrations of LysoPC (18:0), palmitic acid, linoleic acid, oleic acid, and 7,12-dimethylbenz[a]anthracene were significantly decreased (p < 0.05), whereas no significant changes appeared after the placebo intervention. Androstenedione concentration increased after placebo intervention. Furthermore, a significant negative correlation was observed between changes in LysoPC (P-18:1(9Z)) and ADAS-Cog-C scores after MCT intervention (r = -0.1472, p < 0.05).

Conclusions

MCT had positive effects on cognitive ability in mild to moderate AD patients with APOE4-/-. These effects of MCT might be related to the metabolism of LysoPC, oleic acid, linoleic acid and palmitic acid, in addition to the ketogenic effect.

Study id number

ChiCTR-IOR-16009737.

Registry website

WHO ICTRP Search Portal - http://apps.who.int/trialsearch/Default.aspx.",2019-10-22 +30229106,Interaction analysis data of simulation gaming events using the serious game Aqua Republica.,"The data presented in this article is related to the research article entitled 'Serious games as a catalyst for boundary crossing, collaboration and knowledge co-creation in a watershed governance context' (Jean et al., In press) [1]. Understanding the team dynamics related to serious game simulations is critical for understanding the potential uses and functions of these simulations for knowledge co-creation (Medema et al., 2016) [2]. The data was obtained from four independent serious game simulation events and consists of n = 40 participants. Participants were divided into small teams and were then recorded playing the serious game Aqua Republica (http://aquarepublica.com/). Interactions were tallied and interaction maps created using the visualization software GEPHI (https://gephi.org/). The interaction maps allow for a visual representation of the progression of interactions over the course of four subsequent phases of gameplay (Jordan and Henderson, 1995) [3].",2018-06-27 +26519466,Information Commons for Rice (IC4R).,"Rice is the most important staple food for a large part of the world's human population and also a key model organism for plant research. Here, we present Information Commons for Rice (IC4R; http://ic4r.org), a rice knowledgebase featuring adoption of an extensible and sustainable architecture that integrates multiple omics data through community-contributed modules. Each module is developed and maintained by different committed groups, deals with data collection, processing and visualization, and delivers data on-demand via web services. In the current version, IC4R incorporates a variety of rice data through multiple committed modules, including genome-wide expression profiles derived entirely from RNA-Seq data, resequencing-based genomic variations obtained from re-sequencing data of thousands of rice varieties, plant homologous genes covering multiple diverse plant species, post-translational modifications, rice-related literatures and gene annotations contributed by the rice research community. Unlike extant related databases, IC4R is designed for scalability and sustainability and thus also features collaborative integration of rice data and low costs for database update and maintenance. Future directions of IC4R include incorporation of other omics data and association of multiple omics data with agronomically important traits, dedicating to build IC4R into a valuable knowledgebase for both basic and translational researches in rice.",2015-10-30 +29850773,TomoEED: fast edge-enhancing denoising of tomographic volumes.,"Summary:TomoEED is an optimized software tool for fast feature-preserving noise filtering of large 3D tomographic volumes on CPUs and GPUs. The tool is based on the anisotropic nonlinear diffusion method. It has been developed with special emphasis in the reduction of the computational demands by using different strategies, from the algorithmic to the high performance computing perspectives. TomoEED manages to filter large volumes in a matter of minutes in standard computers. Availability and implementation:TomoEED has been developed in C. It is available for Linux platforms at http://www.cnb.csic.es/%7ejjfernandez/tomoeed. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-11-01 +30423837,3DAirSig: A Framework for Enabling In-Air Signatures Using a Multi-Modal Depth Sensor. ,"In-air signature is a new modality which is essential for user authentication and access control in noncontact mode and has been actively studied in recent years. However, it has been treated as a conventional online signature, which is essentially a 2D spatial representation. Notably, this modality bears a lot more potential due to an important hidden depth feature. Existing methods for in-air signature verification neither capture this unique depth feature explicitly nor fully explore its potential in verification. Moreover, these methods are based on heuristic approaches for fingertip or hand palm center detection, which are not feasible in practice. Inspired by the great progress in deep-learning-based hand pose estimation, we propose a real-time in-air signature acquisition method which estimates hand joint positions in 3D using a single depth image. The predicted 3D position of fingertip is recorded for each frame. We present four different implementations of a verification module, which are based on the extracted depth and spatial features. An ablation study was performed to explore the impact of the depth feature in particular. For matching, we employed the most commonly used multidimensional dynamic time warping (MD-DTW) algorithm. We created a new database which contains 600 signatures recorded from 15 different subjects. Extensive evaluations were performed on our database. Our method, called 3DAirSig, achieved an equal error rate (EER) of 0 . 46 %. Experiments showed that depth itself is an important feature, which is sufficient for in-air signature verification. The dataset will be publicly available (https://goo.gl/yFdfdL).",2018-11-10 +28365730,Biocuration in the structure-function linkage database: the anatomy of a superfamily. ,"With ever-increasing amounts of sequence data available in both the primary literature and sequence repositories, there is a bottleneck in annotating molecular function to a sequence. This article describes the biocuration process and methods used in the structure-function linkage database (SFLD) to help address some of the challenges. We discuss how the hierarchy within the SFLD allows us to infer detailed functional properties for functionally diverse enzyme superfamilies in which all members are homologous, conserve an aspect of their chemical function and have associated conserved structural features that enable the chemistry. Also presented is the Enzyme Structure-Function Ontology (ESFO), which has been designed to capture the relationships between enzyme sequence, structure and function that underlie the SFLD and is used to guide the biocuration processes within the SFLD. http://sfld.rbvi.ucsf.edu/.",2017-01-01 +23104376,SwissSidechain: a molecular and structural database of non-natural sidechains.,"Amino acids form the building blocks of all proteins. Naturally occurring amino acids are restricted to a few tens of sidechains, even when considering post-translational modifications and rare amino acids such as selenocysteine and pyrrolysine. However, the potential chemical diversity of amino acid sidechains is nearly infinite. Exploiting this diversity by using non-natural sidechains to expand the building blocks of proteins and peptides has recently found widespread applications in biochemistry, protein engineering and drug design. Despite these applications, there is currently no unified online bioinformatics resource for non-natural sidechains. With the SwissSidechain database (http://www.swisssidechain.ch), we offer a central and curated platform about non-natural sidechains for researchers in biochemistry, medicinal chemistry, protein engineering and molecular modeling. SwissSidechain provides biophysical, structural and molecular data for hundreds of commercially available non-natural amino acid sidechains, both in l- and d-configurations. The database can be easily browsed by sidechain names, families or physico-chemical properties. We also provide plugins to seamlessly insert non-natural sidechains into peptides and proteins using molecular visualization software, as well as topologies and parameters compatible with molecular mechanics software.",2012-10-26 +27987179,Plant Promoter Database (PPDB).,"ppdb ( http://ppdb.agr.gifu-u.ac.jp ) is a web-based plant promoter database that provides promoter information of each gene in genomes of Arabidopsis, rice, poplar, and Physcomitrella patens. In this database, recognition of a promoter structure is achieved by annotating genome sequences with our sequence lists of bioinformatically identified octamers for core promoter structure (TATA boxes, Initiators, Y Patches, GA and CA Elements) and regulatory element groups (REGs), together with information of transcription start sites (TSSs) that have been experimentally identified. Our promoter elements are octamer sequences that show strongly biased localization profiles in the promoter region, extracted by the local distribution of short sequence (LDSS) analysis. In addition, REGs are linked with the information of the PLACE database and also with their physiological roles that are predicted using large-scale gene expression data.",2017-01-01 +27603574,Palindrome analyser - A new web-based server for predicting and evaluating inverted repeats in nucleotide sequences.,"DNA cruciform structures play an important role in the regulation of natural processes including gene replication and expression, as well as nucleosome structure and recombination. They have also been implicated in the evolution and development of diseases such as cancer and neurodegenerative disorders. Cruciform structures are formed by inverted repeats, and their stability is enhanced by DNA supercoiling and protein binding. They have received broad attention because of their important roles in biology. Computational approaches to study inverted repeats have allowed detailed analysis of genomes. However, currently there are no easily accessible and user-friendly tools that can analyse inverted repeats, especially among long nucleotide sequences. We have developed a web-based server, Palindrome analyser, which is a user-friendly application for analysing inverted repeats in various DNA (or RNA) sequences including genome sequences and oligonucleotides. It allows users to search and retrieve desired gene/nucleotide sequence entries from the NCBI databases, and provides data on length, sequence, locations and energy required for cruciform formation. Palindrome analyser also features an interactive graphical data representation of the distribution of the inverted repeats, with options for sorting according to the length of inverted repeat, length of loop, and number of mismatches. Palindrome analyser can be accessed at http://bioinformatics.ibp.cz.",2016-09-04 +24214962,The eukaryotic linear motif resource ELM: 10 years and counting.,"The eukaryotic linear motif (ELM http://elm.eu.org) resource is a hub for collecting, classifying and curating information about short linear motifs (SLiMs). For >10 years, this resource has provided the scientific community with a freely accessible guide to the biology and function of linear motifs. The current version of ELM contains ∼200 different motif classes with over 2400 experimentally validated instances manually curated from >2000 scientific publications. Furthermore, detailed information about motif-mediated interactions has been annotated and made available in standard exchange formats. Where appropriate, links are provided to resources such as switches.elm.eu.org and KEGG pathways.",2013-11-07 +21989406,"ExoCarta 2012: database of exosomal proteins, RNA and lipids.","Exosomes are membraneous nanovesicles of endocytic origin released by most cell types from diverse organisms; they play a critical role in cell-cell communication. ExoCarta (http://www.exocarta.org) is a manually curated database of exosomal proteins, RNA and lipids. The database catalogs information from both published and unpublished exosomal studies. The mode of exosomal purification and characterization, the biophysical and molecular properties are listed in the database aiding biomedical scientists in assessing the quality of the exosomal preparation and the corresponding data obtained. Currently, ExoCarta (Version 3.1) contains information on 11,261 protein entries, 2375 mRNA entries and 764 miRNA entries that were obtained from 134 exosomal studies. In addition to the data update, as a new feature, lipids identified in exosomes are added to ExoCarta. We believe that this free web-based community resource will aid researchers in identifying molecular signatures (proteins/RNA/lipids) that are specific to certain tissue/cell type derived exosomes and trigger new exosomal studies.",2011-10-11 +29931149,SpliceRover: interpretable convolutional neural networks for improved splice site prediction.,"

Motivation

During the last decade, improvements in high-throughput sequencing have generated a wealth of genomic data. Functionally interpreting these sequences and finding the biological signals that are hallmarks of gene function and regulation is currently mostly done using automated genome annotation platforms, which mainly rely on integrated machine learning frameworks to identify different functional sites of interest, including splice sites. Splicing is an essential step in the gene regulation process, and the correct identification of splice sites is a major cornerstone in a genome annotation system.

Results

In this paper, we present SpliceRover, a predictive deep learning approach that outperforms the state-of-the-art in splice site prediction. SpliceRover uses convolutional neural networks (CNNs), which have been shown to obtain cutting edge performance on a wide variety of prediction tasks. We adapted this approach to deal with genomic sequence inputs, and show it consistently outperforms already existing approaches, with relative improvements in prediction effectiveness of up to 80.9% when measured in terms of false discovery rate. However, a major criticism of CNNs concerns their 'black box' nature, as mechanisms to obtain insight into their reasoning processes are limited. To facilitate interpretability of the SpliceRover models, we introduce an approach to visualize the biologically relevant information learnt. We show that our visualization approach is able to recover features known to be important for splice site prediction (binding motifs around the splice site, presence of polypyrimidine tracts and branch points), as well as reveal new features (e.g. several types of exclusion patterns near splice sites).

Availability and implementation

SpliceRover is available as a web service. The prediction tool and instructions can be found at http://bioit2.irc.ugent.be/splicerover/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +29267877,GIGI-Quick: a fast approach to impute missing genotypes in genome-wide association family data.,"Summary:Genome-wide association studies have become common over the last ten years, with a shift towards targeting rare variants, especially in pedigree-data. Despite lower costs, sequencing for rare variants still remains expensive. To have a relatively large sample with acceptable cost, imputation approaches may be used, such as GIGI for pedigree data. GIGI is an imputation method that handles large pedigrees and is particularly good for rare variant imputation. GIGI requires a subset of individuals in a pedigree to be fully sequenced, while other individuals are sequenced only at relevant markers. The imputation will infer the missing genotypes at untyped markers. Running GIGI on large pedigrees for large numbers of markers can be very time consuming. We present GIGI-Quick as a method to efficiently split GIGI's input, run GIGI in parallel and efficiently merge the output to reduce the runtime with the number of cores. This allows obtaining imputation results faster, and therefore all subsequent association analyses. Availability and and implementation:GIGI-Quick is open source and publicly available via: https://cse-git.qcri.org/Imputation/GIGI-Quick. Contact:msaad@hbku.edu.qa. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +30851278,"GUILDify v2.0: A Tool to Identify Molecular Networks Underlying Human Diseases, Their Comorbidities and Their Druggable Targets.","The genetic basis of complex diseases involves alterations on multiple genes. Unraveling the interplay between these genetic factors is key to the discovery of new biomarkers and treatments. In 2014, we introduced GUILDify, a web server that searches for genes associated to diseases, finds novel disease genes applying various network-based prioritization algorithms and proposes candidate drugs. Here, we present GUILDify v2.0, a major update and improvement of the original method, where we have included protein interaction data for seven species and 22 human tissues and incorporated the disease-gene associations from DisGeNET. To infer potential disease relationships associated with multi-morbidities, we introduced a novel feature for estimating the genetic and functional overlap of two diseases using the top-ranking genes and the associated enrichment of biological functions and pathways (as defined by GO and Reactome). The analysis of this overlap helps to identify the mechanistic role of genes and protein-protein interactions in comorbidities. Finally, we provided an R package, guildifyR, to facilitate programmatic access to GUILDify v2.0 (http://sbi.upf.edu/guildify2).",2019-03-07 +30936940,IPCAPS: an R package for iterative pruning to capture population structure.,"

Background

Resolving population genetic structure is challenging, especially when dealing with closely related or geographically confined populations. Although Principal Component Analysis (PCA)-based methods and genomic variation with single nucleotide polymorphisms (SNPs) are widely used to describe shared genetic ancestry, improvements can be made especially when fine-scale population structure is the target.

Results

This work presents an R package called IPCAPS, which uses SNP information for resolving possibly fine-scale population structure. The IPCAPS routines are built on the iterative pruning Principal Component Analysis (ipPCA) framework that systematically assigns individuals to genetically similar subgroups. In each iteration, our tool is able to detect and eliminate outliers, hereby avoiding severe misclassification errors.

Conclusions

IPCAPS supports different measurement scales for variables used to identify substructure. Hence, panels of gene expression and methylation data can be accommodated as well. The tool can also be applied in patient sub-phenotyping contexts. IPCAPS is developed in R and is freely available from http://bio3.giga.ulg.ac.be/ipcaps.",2019-03-20 +30217992,Predicting proteome dynamics using gene expression data.,"While protein concentrations are physiologically most relevant, measuring them globally is challenging. mRNA levels are easier to measure genome-wide and hence are typically used to infer the corresponding protein abundances. The steady-state condition (assumption that protein levels remain constant) has typically been used to calculate protein concentrations, as it is mathematically convenient, even though it is often not satisfied. Here, we propose a method to estimate genome-wide protein abundances without this assumption. Instead, we assume that the system returns to its baseline at the end of the experiment, which is true for cyclic phenomena (e.g. cell cycle) and many time-course experiments. Our approach only requires availability of gene expression and protein half-life data. As proof-of-concept, we predicted proteome dynamics associated with the budding yeast cell cycle, the results are available for browsing online at http://dynprot.cent.uw.edu.pl/ . The approach was validated experimentally by verifying that the predicted protein concentration changes were consistent with measurements for all proteins tested. Additionally, if proteomic data are available as well, we can also infer changes in protein half-lives in response to posttranslational regulation, as we did for Clb2, a post-translationally regulated protein. The predicted changes in Clb2 abundance are consistent with earlier observations.",2018-09-14 +27623959,"BmncRNAdb: a comprehensive database of non-coding RNAs in the silkworm, Bombyx mori.","

Background

Long non-coding RNAs (lncRNAs) may play critical roles in a wide range of developmental processes of higher organisms. Recently, lncRNAs have been widely identified across eukaryotes and many databases of lncRNAs have been developed for human, mouse, fruit fly, etc. However, there is rare information about them in the only completely domesticated insect, silkworm (Bombyx mori).

Description

In this study, we systematically scanned lncRNAs using the available silkworm RNA-seq data and public unigenes. Finally, we identified and collected 6281 lncRNAs in the silkworm. Besides, we also collected 1986 microRNAs (miRNAs) from previous studies. Then, we organized them into a comprehensive and web-based database, BmncRNAdb. This database offers a user-friendly interface for data browse and online analysis as well as the three online tools for users to predict the target genes of lncRNA or miRNA.

Conclusions

We have systematically identified and collected the silkworm lncRNAs and constructed a comprehensive database of the silkworm lncRNAs and miRNAs. This work gives a glimpse into lncRNAs of the silkworm and lays foundations for the ncRNAs study of the silkworm and other insects in the future. The BmncRNAdb is freely available at http://gene.cqu.edu.cn/BmncRNAdb/index.php .",2016-09-13 +31081335,Proteomics Standards Initiative Extended FASTA Format.,"Mass-spectrometry-based proteomics enables the high-throughput identification and quantification of proteins, including sequence variants and post-translational modifications (PTMs) in biological samples. However, most workflows require that such variations be included in the search space used to analyze the data, and doing so remains challenging with most analysis tools. In order to facilitate the search for known sequence variants and PTMs, the Proteomics Standards Initiative (PSI) has designed and implemented the PSI extended FASTA format (PEFF). PEFF is based on the very popular FASTA format but adds a uniform mechanism for encoding substantially more metadata about the sequence collection as well as individual entries, including support for encoding known sequence variants, PTMs, and proteoforms. The format is very nearly backward compatible, and as such, existing FASTA parsers will require little or no changes to be able to read PEFF files as FASTA files, although without supporting any of the extra capabilities of PEFF. PEFF is defined by a full specification document, controlled vocabulary terms, a set of example files, software libraries, and a file validator. Popular software and resources are starting to support PEFF, including the sequence search engine Comet and the knowledge bases neXtProt and UniProtKB. Widespread implementation of PEFF is expected to further enable proteogenomics and top-down proteomics applications by providing a standardized mechanism for encoding protein sequences and their known variations. All the related documentation, including the detailed file format specification and example files, are available at http://www.psidev.info/peff .",2019-05-23 +28453681,Sphinx: merging knowledge-based and ab initio approaches to improve protein loop prediction.,"

Motivation

Loops are often vital for protein function, however, their irregular structures make them difficult to model accurately. Current loop modelling algorithms can mostly be divided into two categories: knowledge-based, where databases of fragments are searched to find suitable conformations and ab initio, where conformations are generated computationally. Existing knowledge-based methods only use fragments that are the same length as the target, even though loops of slightly different lengths may adopt similar conformations. Here, we present a novel method, Sphinx, which combines ab initio techniques with the potential extra structural information contained within loops of a different length to improve structure prediction.

Results

We show that Sphinx is able to generate high-accuracy predictions and decoy sets enriched with near-native loop conformations, performing better than the ab initio algorithm on which it is based. In addition, it is able to provide predictions for every target, unlike some knowledge-based methods. Sphinx can be used successfully for the difficult problem of antibody H3 prediction, outperforming RosettaAntibody, one of the leading H3-specific ab initio methods, both in accuracy and speed.

Availability and implementation

Sphinx is available at http://opig.stats.ox.ac.uk/webapps/sphinx.

Contact

deane@stats.ox.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +30835730,Sensitivity analysis of agent-based simulation utilizing massively parallel computation and interactive data visualization.,"An essential step in the analysis of agent-based simulation is sensitivity analysis, which namely examines the dependency of parameter values on simulation results. Although a number of approaches have been proposed for sensitivity analysis, they still have limitations in exhaustivity and interpretability. In this study, we propose a novel methodology for sensitivity analysis of agent-based simulation, MASSIVE (Massively parallel Agent-based Simulations and Subsequent Interactive Visualization-based Exploration). MASSIVE takes a unique paradigm, which is completely different from those of sensitivity analysis methods developed so far, By combining massively parallel computation and interactive data visualization, MASSIVE enables us to inspect a broad parameter space intuitively. We demonstrated the utility of MASSIVE by its application to cancer evolution simulation, which successfully identified conditions that generate heterogeneous tumors. We believe that our approach would be a de facto standard for sensitivity analysis of agent-based simulation in an era of evergrowing computational technology. All the results form our MASSIVE analysis are available at https://www.hgc.jp/~niiyan/massive.",2019-03-05 +29386051,The proBAM and proBed standard formats: enabling a seamless integration of genomics and proteomics data.,"On behalf of The Human Proteome Organization (HUPO) Proteomics Standards Initiative, we introduce here two novel standard data formats, proBAM and proBed, that have been developed to address the current challenges of integrating mass spectrometry-based proteomics data with genomics and transcriptomics information in proteogenomics studies. proBAM and proBed are adaptations of the well-defined, widely used file formats SAM/BAM and BED, respectively, and both have been extended to meet the specific requirements entailed by proteomics data. Therefore, existing popular genomics tools such as SAMtools and Bedtools, and several widely used genome browsers, can already be used to manipulate and visualize these formats ""out-of-the-box."" We also highlight that a number of specific additional software tools, properly supporting the proteomics information available in these formats, are now available providing functionalities such as file generation, file conversion, and data analysis. All the related documentation, including the detailed file format specifications and example files, are accessible at http://www.psidev.info/probam and at http://www.psidev.info/probed .",2018-01-31 +22064862,The Candida genome database incorporates multiple Candida species: multispecies search and analysis tools with curated gene and protein information for Candida albicans and Candida glabrata.,"The Candida Genome Database (CGD, http://www.candidagenome.org/) is an internet-based resource that provides centralized access to genomic sequence data and manually curated functional information about genes and proteins of the fungal pathogen Candida albicans and other Candida species. As the scope of Candida research, and the number of sequenced strains and related species, has grown in recent years, the need for expanded genomic resources has also grown. To answer this need, CGD has expanded beyond storing data solely for C. albicans, now integrating data from multiple species. Herein we describe the incorporation of this multispecies information, which includes curated gene information and the reference sequence for C. glabrata, as well as orthology relationships that interconnect Locus Summary pages, allowing easy navigation between genes of C. albicans and C. glabrata. These orthology relationships are also used to predict GO annotations of their products. We have also added protein information pages that display domains, structural information and physicochemical properties; bibliographic pages highlighting important topic areas in Candida biology; and a laboratory strain lineage page that describes the lineage of commonly used laboratory strains. All of these data are freely available at http://www.candidagenome.org/. We welcome feedback from the research community at candida-curator@lists.stanford.edu.",2011-11-07 +27242034,"Abasy Atlas: a comprehensive inventory of systems, global network properties and systems-level elements across bacteria. ","The availability of databases electronically encoding curated regulatory networks and of high-throughput technologies and methods to discover regulatory interactions provides an invaluable source of data to understand the principles underpinning the organization and evolution of these networks responsible for cellular regulation. Nevertheless, data on these sources never goes beyond the regulon level despite the fact that regulatory networks are complex hierarchical-modular structures still challenging our understanding. This brings the necessity for an inventory of systems across a large range of organisms, a key step to rendering feasible comparative systems biology approaches. In this work, we take the first step towards a global understanding of the regulatory networks organization by making a cartography of the functional architectures of diverse bacteria. Abasy ( A: cross- BA: cteria SY: stems) Atlas provides a comprehensive inventory of annotated functional systems, global network properties and systems-level elements (global regulators, modular genes shaping functional systems, basal machinery genes and intermodular genes) predicted by the natural decomposition approach for reconstructed and meta-curated regulatory networks across a large range of bacteria, including pathogenically and biotechnologically relevant organisms. The meta-curation of regulatory datasets provides the most complete and reliable set of regulatory interactions currently available, which can even be projected into subsets by considering the force or weight of evidence supporting them or the systems that they belong to. Besides, Abasy Atlas provides data enabling large-scale comparative systems biology studies aimed at understanding the common principles and particular lifestyle adaptions of systems across bacteria. Abasy Atlas contains systems and system-level elements for 50 regulatory networks comprising 78 649 regulatory interactions covering 42 bacteria in nine taxa, containing 3708 regulons and 1776 systems. All this brings together a large corpus of data that will surely inspire studies to generate hypothesis regarding the principles governing the evolution and organization of systems and the functional architectures controlling them.Database URL: http://abasy.ccg.unam.mx.",2016-05-30 +30034265,"A dataset of molluscan fauna sampled in river estuaries of medium and small size river in Kyushu island, Japan.","

Background

Many studies have evaluated the ecological integrity of large-scale estuaries of continental rivers using biotic indicators such as fish, phytoplankton and benthic communities. However, few studies have focused on the river estuaries of small and medium rivers. Molluscan fauna data in large estuaries or in the estuaries of large rivers have been collected by the The National Census on River Environments (conducted by the Ministry of Land, Infrastructure, Transport and Tourism) or National Survey on the Natural Environment (conducted by the Ministry of Environment). On the other hand, molluscan fauna of small and medium rivers are managed by local governments and have rarely been investigated.

New information

This paper provides basic information on the molluscan fauna of 70 rivers in Kyushu, Japan, collected with the aim of conserving estuaries of small and medium rivers. In total, 37 families, 82 species and 21,827 individuals were collected. The data are all accessible from the document ""A dataset of shellfish fauna sampled in estuaries of medium and small rivers in Kyushu, Japan (http://ipt.pensoft.net/resource.do?r=shellfishes_in_kyushu)"". According to the Red Data Book published by the Japanese Ministry of Environment in 2018, 3 species were determined as Critically endangered and Endangered, 6 species were determined as Vulnerable and 13 species were determined as Near Threatened. The proportions of individuals classified as Critically endangered and Endangered from the total number of individuals were extremely low, but the proportions of Near Threatened individuals were high. Our results indicate that the risk of molluscan extinction in small- and medium-sized river estuaries in Kyushu is high and that immediate conservation is necessary.",2018-07-11 +30403770,HITS-PR-HHblits: protein remote homology detection by combining PageRank and Hyperlink-Induced Topic Search. ,"As one of the most important fundamental problems in protein sequence analysis, protein remote homology detection is critical for both theoretical research (protein structure and function studies) and real world applications (drug design). Although several computational predictors have been proposed, their detection performance is still limited. In this study, we treat protein remote homology detection as a document retrieval task, where the proteins are considered as documents and its aim is to find the highly related documents with the query documents in a database. A protein similarity network was constructed based on the true labels of proteins in the database, and the query proteins were then connected into the network based on the similarity scores calculated by three ranking methods, including PSI-BLAST, Hmmer and HHblits. The PageRank algorithm and Hyperlink-Induced Topic Search (HITS) algorithm were respectively performed on this network to move the homologous proteins of query proteins to the neighbors of the query proteins in the network. Finally, PageRank and HITS algorithms were combined, and a predictor called HITS-PR-HHblits was proposed to further improve the predictive performance. Tested on the SCOP and SCOPe benchmark datasets, the experimental results showed that the proposed protocols outperformed other state-of-the-art methods. For the convenience of the most experimental scientists, a web server for HITS-PR-HHblits was established at http://bioinformatics.hitsz.edu.cn/HITS-PR-HHblits, by which the users can easily get the results without the need to go through the mathematical details. The HITS-PR-HHblits predictor is a protocol for protein remote homology detection using different sets of programs, which will become a very useful computational tool for proteome analysis.",2018-11-07 +29281278,toxFlow: A Web-Based Application for Read-Across Toxicity Prediction Using Omics and Physicochemical Data.,"We present toxFlow, a web application developed for enrichment analysis of omics data coupled with read-across toxicity prediction. A sequential analysis workflow is suggested where users can filter omics data using enrichment scores and incorporate their findings into a correlation-based read-across technique for predicting the toxicity of a substance based on its analogs. Either embedded or in-house gene signature libraries can be used for enrichment analysis. The suggested approach can be used for toxicity prediction of diverse chemical entities; however, this article focuses on the multiperspective characterization of nanoparticles and selects their neighbors based on both physicochemical and biological similarity criteria. In addition, visualization options are offered to interactively explore correlation patterns in the data, whereas results can be exported for further analysis. toxFlow is accessible at http://147.102.86.129:3838/toxflow .",2018-02-26 +23422340,IRootLab: a free and open-source MATLAB toolbox for vibrational biospectroscopy data analysis.,"

Summary

IRootLab is a free and open-source MATLAB toolbox for vibrational biospectroscopy (VBS) data analysis. It offers an object-oriented programming class library, graphical user interfaces (GUIs) and automatic MATLAB code generation. The class library contains a large number of methods, concepts and visualizations for VBS data analysis, some of which are introduced in the toolbox. The GUIs provide an interface to the class library, including a module to merge several spectral files into a dataset. Automatic code allows developers to quickly write VBS data analysis scripts and is a unique resource among tools for VBS. Documentation includes a manual, tutorials, Doxygen-generated reference and a demonstration showcase. IRootLab can handle some of the most popular file formats used in VBS. License: GNU-LGPL.

Availability

Official website: http://irootlab.googlecode.com/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-19 +23865691,Genome-wide in silico screening for microRNA genetic variability in livestock species.,"MicroRNAs are a class of non-coding RNAs that post-transcriptionally regulate target gene expression. Previous studies have shown that microRNA gene variability can interfere with its function, resulting in phenotypic variation. Polymorphisms within microRNA genes present a source of novel biomarkers for phenotypic traits in animal breeding. However, little is known about microRNA genetic variability in livestock species, which is also due to incomplete data in genomic resource databases. Therefore, the aim of this study was to perform a genome-wide in silico screening of genomic sources and determine the genetic variability of microRNA genes in livestock species using mirna sniper 3.0 (http://www.integratomics-time.com/miRNA-SNiPer/), a new version of our previously developed tool. By examining Ensembl and miRBase genome builds, it was possible to design a tool-based generated search of 16 genomes including four livestock species: pig, horse, cattle and chicken. The analysis revealed 65 polymorphisms located within mature microRNA regions in these four species, including 28% within the seed region in cattle and chicken. Polymorphic microRNA genes in cattle and chicken were further examined for mapping to quantitative trait loci regions associated with production and health traits. The developed bioinformatics tool enables the analysis of polymorphic microRNA genes and prioritization of potential regulatory polymorphisms and therefore contributes to the development of microRNA-based biomarkers in livestock species. The assembled catalog and the developed tool can serve the animal science community to efficiently select microRNA SNPs for further quantitative and molecular genetic evaluations of their phenotypic effects and causal associations with livestock production traits.",2013-07-19 +27208273,Genome-Wide Inference of Protein-Protein Interaction Networks Identifies Crosstalk in Abscisic Acid Signaling.,"Protein-protein interactions (PPIs) are essential to almost all cellular processes. To better understand the relationships of proteins in Arabidopsis (Arabidopsis thaliana), we have developed a genome-wide protein interaction network (AraPPINet) that is inferred from both three-dimensional structures and functional evidence and that encompasses 316,747 high-confidence interactions among 12,574 proteins. AraPPINet exhibited high predictive power for discovering protein interactions at a 50% true positive rate and for discriminating positive interactions from similar protein pairs at a 70% true positive rate. Experimental evaluation of a set of predicted PPIs demonstrated the ability of AraPPINet to identify novel protein interactions involved in a specific process at an approximately 100-fold greater accuracy than random protein-protein pairs in a test case of abscisic acid (ABA) signaling. Genetic analysis of an experimentally validated, predicted interaction between ARR1 and PYL1 uncovered cross talk between ABA and cytokinin signaling in the control of root growth. Therefore, we demonstrate the power of AraPPINet (http://netbio.sjtu.edu.cn/arappinet/) as a resource for discovering gene function in converging signaling pathways and complex traits in plants.",2016-04-18 +29990017,Fast and Accurate Detection of Complex Imaging Genetics Associations Based on Greedy Projected Distance Correlation.,"Recent advances in imaging genetics produce large amounts of data including functional MRI images, single nucleotide polymorphisms (SNPs), and cognitive assessments. Understanding the complex interactions among these heterogeneous and complementary data has the potential to help with diagnosis and prevention of mental disorders. However, limited efforts have been made due to the high dimensionality, group structure, and mixed type of these data. In this paper we present a novel method to detect conditional associations between imaging genetics data. We use projected distance correlation to build a conditional dependency graph among high-dimensional mixed data, then use multiple testing to detect significant group level associations (e.g., ROI-gene). In addition, we introduce a scalable algorithm based on orthogonal greedy algorithm, yielding the greedy projected distance correlation (G-PDC). This can reduce the computational cost, which is critical for analyzing large-volume of imaging genomics data. The results from our simulations demonstrate a higher degree of accuracy with GPDC than distance correlation, Pearson's correlation and partial correlation, especially when the correlation is nonlinear. Finally, we apply our method to the Philadelphia Neurodevelopmental data cohort with 866 samples including fMRI images and SNP profiles. The results uncover several statistically significant and biologically interesting interactions, which are further validated with many existing studies. The Matlab code is available at https://sites.google.com/site/jianfang86/gPDC.",2017-12-13 +22992189,"Evolution, substrate specificity and subfamily classification of glycoside hydrolase family 5 (GH5).","

Background

The large Glycoside Hydrolase family 5 (GH5) groups together a wide range of enzymes acting on β-linked oligo- and polysaccharides, and glycoconjugates from a large spectrum of organisms. The long and complex evolution of this family of enzymes and its broad sequence diversity limits functional prediction. With the objective of improving the differentiation of enzyme specificities in a knowledge-based context, and to obtain new evolutionary insights, we present here a new, robust subfamily classification of family GH5.

Results

About 80% of the current sequences were assigned into 51 subfamilies in a global analysis of all publicly available GH5 sequences and associated biochemical data. Examination of subfamilies with catalytically-active members revealed that one third are monospecific (containing a single enzyme activity), although new functions may be discovered with biochemical characterization in the future. Furthermore, twenty subfamilies presently have no characterization whatsoever and many others have only limited structural and biochemical data. Mapping of functional knowledge onto the GH5 phylogenetic tree revealed that the sequence space of this historical and industrially important family is far from well dispersed, highlighting targets in need of further study. The analysis also uncovered a number of GH5 proteins which have lost their catalytic machinery, indicating evolution towards novel functions.

Conclusion

Overall, the subfamily division of GH5 provides an actively curated resource for large-scale protein sequence annotation for glycogenomics; the subfamily assignments are openly accessible via the Carbohydrate-Active Enzyme database at http://www.cazy.org/GH5.html.",2012-09-20 +27485446,HiPub: translating PubMed and PMC texts to networks for knowledge discovery.,"

Unlabelled

We introduce HiPub, a seamless Chrome browser plug-in that automatically recognizes, annotates and translates biomedical entities from texts into networks for knowledge discovery. Using a combination of two different named-entity recognition resources, HiPub can recognize genes, proteins, diseases, drugs, mutations and cell lines in texts, and achieve high precision and recall. HiPub extracts biomedical entity-relationships from texts to construct context-specific networks, and integrates existing network data from external databases for knowledge discovery. It allows users to add additional entities from related articles, as well as user-defined entities for discovering new and unexpected entity-relationships. HiPub provides functional enrichment analysis on the biomedical entity network, and link-outs to external resources to assist users in learning new entities and relations.

Availability and implementation

HiPub and detailed user guide are available at http://hipub.korea.ac.kr

Contact

kangj@korea.ac.kr, aikchoon.tan@ucdenver.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-08-02 +24870542,A draft map of the human proteome.,"The availability of human genome sequence has transformed biomedical research over the past decade. However, an equivalent map for the human proteome with direct measurements of proteins and peptides does not exist yet. Here we present a draft map of the human proteome using high-resolution Fourier-transform mass spectrometry. In-depth proteomic profiling of 30 histologically normal human samples, including 17 adult tissues, 7 fetal tissues and 6 purified primary haematopoietic cells, resulted in identification of proteins encoded by 17,294 genes accounting for approximately 84% of the total annotated protein-coding genes in humans. A unique and comprehensive strategy for proteogenomic analysis enabled us to discover a number of novel protein-coding regions, which includes translated pseudogenes, non-coding RNAs and upstream open reading frames. This large human proteome catalogue (available as an interactive web-based resource at http://www.humanproteomemap.org) will complement available human genome and transcriptome data to accelerate biomedical research in health and disease.",2014-05-01 +27402678,TMC-SNPdb: an Indian germline variant database derived from whole exome sequences. ,"Cancer is predominantly a somatic disease. A mutant allele present in a cancer cell genome is considered somatic when it's absent in the paired normal genome along with public SNP databases. The current build of dbSNP, the most comprehensive public SNP database, however inadequately represents several non-European Caucasian populations, posing a limitation in cancer genomic analyses of data from these populations. We present the T: ata M: emorial C: entre-SNP D: ata B: ase (TMC-SNPdb), as the first open source, flexible, upgradable, and freely available SNP database (accessible through dbSNP build 149 and ANNOVAR)-representing 114 309 unique germline variants-generated from whole exome data of 62 normal samples derived from cancer patients of Indian origin. The TMC-SNPdb is presented with a companion subtraction tool that can be executed with command line option or using an easy-to-use graphical user interface with the ability to deplete additional Indian population specific SNPs over and above dbSNP and 1000 Genomes databases. Using an institutional generated whole exome data set of 132 samples of Indian origin, we demonstrate that TMC-SNPdb could deplete 42, 33 and 28% false positive somatic events post dbSNP depletion in Indian origin tongue, gallbladder, and cervical cancer samples, respectively. Beyond cancer somatic analyses, we anticipate utility of the TMC-SNPdb in several Mendelian germline diseases. In addition to dbSNP build 149 and ANNOVAR, the TMC-SNPdb along with the subtraction tool is available for download in the public domain at the following:Database URL: http://www.actrec.gov.in/pi-webpages/AmitDutt/TMCSNP/TMCSNPdp.html.",2016-07-09 +29092929,WebMeV: A Cloud Platform for Analyzing and Visualizing Cancer Genomic Data.,"Although large, complex genomic datasets are increasingly easy to generate, and the number of publicly available datasets in cancer and other diseases is rapidly growing, the lack of intuitive, easy-to-use analysis tools has remained a barrier to the effective use of such data. WebMeV (http://mev.tm4.org) is an open-source, web-based tool that gives users access to sophisticated tools for analysis of RNA-Seq and other data in an interface designed to democratize data access. WebMeV combines cloud-based technologies with a simple user interface to allow users to access large public datasets, such as that from The Cancer Genome Atlas or to upload their own. The interface allows users to visualize data and to apply advanced data mining analysis methods to explore the data and draw biologically meaningful conclusions. We provide an overview of WebMeV and demonstrate two simple use cases that illustrate the value of putting data analysis in the hands of those looking to explore the underlying biology of the systems being studied. Cancer Res; 77(21); e11-14. ©2017 AACR.",2017-11-01 +26937539,"Update of the Mental Health Gap Action Programme (mhGAP) Guidelines for Mental, Neurological and Substance Use Disorders, 2015","Mental, neurological, and substance use (MNS) disorders are prevalent in all regions of the world and are major contributors to morbidity and premature mortality. In 2008, the World Health Organization (WHO) developed the Mental Health Gap Action Programme (mhGAP), to facilitate scaling up of care for MNS disorders. A key part of mhGAP is the evidence-based guideline, published in 2010 and available through the mhGAP Evidence Resource Centre (http://www.who.int/mental health/mhgap/evidence/en/). The objectives of the guideline are: To provide up-to-date WHO guidance to facilitate delivery of interventions by non-specialist health care providers in low- and middle-income countries (LAMICs); To assist with the scale up of care for MNS disorders identified as conditions of high priority in LAMICs, specifically: depression, psychosis (including schizophrenia and bipolar disorders), epilepsy, child mental disorders, dementia, alcohol use disorders, drug use disorders and self-harm/suicide; To provide up-to-date WHO guidance that will facilitate the implementation of the WHO Comprehensive Mental Health Action Plan 2013-2020 by health care planners and programme managers in LAMICs.",2016-03-04 +30888625,Summary of Torsades de Pointes (TdP) Reports Associated with Intravenous Drug Formulations Containing the Preservative Chlorobutanol.,"INTRODUCTION:Drug-induced torsades de pointes (TdP) is a potentially lethal ventricular arrhythmia that is associated with drugs that prolong the QT interval on the electrocardiogram (ECG) due to their interference with the cardiac potassium current, IKR. Intravenous (IV) formulations of methadone have been associated with TdP and contain the preservative chlorobutanol, which, like methadone, blocks IKR. The combinations of chlorobutanol with methadone or terfenadine, another IKR blocker, produce synergistic IKR block. OBJECTIVE:The aim of this study was to examine and summarize the evidence available to address the question: what other IV drug formulations contain chlorobutanol and are they associated with TdP? METHODS:IV drug products containing the preservative chlorobutanol were identified by searching the websites DailyMed ( https://dailymed.nlm.nih.gov/dailymed/index.cfm ) and Drugs@FDA ( https://www.accessdata.fda.gov/scripts/cder/daf/ ). For each drug identified, PubMed and the FDA's Adverse Event Reporting System (FAERS) were searched for reports of TdP and/or QT prolongation and FAERS data were analyzed for disproportionality of reports. RESULTS:The search found nine drugs (methadone, epinephrine, papaverine, oxytocin, vasopressin, testosterone, estradiol, isoniazid, and desmopressin) that contain chlorobutanol 2.5 (n = 1) or 5.0 mg/mL. All nine drugs had reports of QT prolongation or TdP reported in FAERS and all but estradiol, testosterone, desmopressin, and isoniazid had reports of QT prolongation or TdP in PubMed. Two of the nine drugs (epinephrine and methadone) had positive signals (by disproportionality analysis) for TdP in FAERS (EB05 2.88 and 23.81, respectively) and four (methadone, epinephrine, papaverine, and vasopressin) were reported in published articles as the suspect drugs in cases of TdP. CONCLUSION:The pharmacologic profile of chlorobutanol (synergistic IKR block) and its association with reports of TdP and QT prolongation suggest the need for a full evaluation of its cardiac safety when used as a preservative in IV drug and vitamin formulations.",2019-07-01 +29448075,Improving mass-univariate analysis of neuroimaging data by modelling important unknown covariates: Application to Epigenome-Wide Association Studies.,"Statistical inference on neuroimaging data is often conducted using a mass-univariate model, equivalent to fitting a linear model at every voxel with a known set of covariates. Due to the large number of linear models, it is challenging to check if the selection of covariates is appropriate and to modify this selection adequately. The use of standard diagnostics, such as residual plotting, is clearly not practical for neuroimaging data. However, the selection of covariates is crucial for linear regression to ensure valid statistical inference. In particular, the mean model of regression needs to be reasonably well specified. Unfortunately, this issue is often overlooked in the field of neuroimaging. This study aims to adopt the existing Confounder Adjusted Testing and Estimation (CATE) approach and to extend it for use with neuroimaging data. We propose a modification of CATE that can yield valid statistical inferences using Principal Component Analysis (PCA) estimators instead of Maximum Likelihood (ML) estimators. We then propose a non-parametric hypothesis testing procedure that can improve upon parametric testing. Monte Carlo simulations show that the modification of CATE allows for more accurate modelling of neuroimaging data and can in turn yield a better control of False Positive Rate (FPR) and Family-Wise Error Rate (FWER). We demonstrate its application to an Epigenome-Wide Association Study (EWAS) on neonatal brain imaging and umbilical cord DNA methylation data obtained as part of a longitudinal cohort study. Software for this CATE study is freely available at http://www.bioeng.nus.edu.sg/cfa/Imaging_Genetics2.html.",2018-02-12 +27733633,Molecular Assay Validation Using Genomic Sequence Databases.,"Whole-genome sequence databases offer new in silico approaches for designing and validating PCR assays in the clinical microbiology laboratory. An article in this issue of the Journal of Clinical Microbiology (M. J. Jansen van Rensburg, C. Swift, A. J. Cody, C. Jenkins, and M. C. J. Maiden, J Clin Microbiol, 54:2882-2890, 2016, http://dx.doi.org/10.1128/JCM.01522-16) demonstrates the use of publicly available genomic sequence data to evaluate a PCR assay for distinguishing Campylobacter species.",2016-10-12 +31490960,Classification of early and late stage liver hepatocellular carcinoma patients from their genomics and epigenomics profiles.,"BACKGROUND:Liver Hepatocellular Carcinoma (LIHC) is one of the major cancers worldwide, responsible for millions of premature deaths every year. Prediction of clinical staging is vital to implement optimal therapeutic strategy and prognostic prediction in cancer patients. However, to date, no method has been developed for predicting the stage of LIHC from the genomic profile of samples. METHODS:The Cancer Genome Atlas (TCGA) dataset of 173 early stage (stage-I), 177 late stage (stage-II, Stage-III and stage-IV) and 50 adjacent normal tissue samples for 60,483 RNA transcripts and 485,577 methylation CpG sites, was extensively analyzed to identify the key transcriptomic expression and methylation-based features using different feature selection techniques. Further, different classification models were developed based on selected key features to categorize different classes of samples implementing different machine learning algorithms. RESULTS:In the current study, in silico models have been developed for classifying LIHC patients in the early vs. late stage and cancerous vs. normal samples using RNA expression and DNA methylation data. TCGA datasets were extensively analyzed to identify differentially expressed RNA transcripts and methylated CpG sites that can discriminate early vs. late stages and cancer vs. normal samples of LIHC with high precision. Naive Bayes model developed using 51 features that combine 21 CpG methylation sites and 30 RNA transcripts achieved maximum MCC (Matthew's correlation coefficient) 0.58 with an accuracy of 78.87% on the validation dataset in discrimination of early and late stage. Additionally, the prediction models developed based on 5 RNA transcripts and 5 CpG sites classify LIHC and normal samples with an accuracy of 96-98% and AUC (Area Under the Receiver Operating Characteristic curve) 0.99. Besides, multiclass models also developed for classifying samples in the normal, early and late stage of cancer and achieved an accuracy of 76.54% and AUC of 0.86. CONCLUSION:Our study reveals stage prediction of LIHC samples with high accuracy based on the genomics and epigenomics profiling is a challenging task in comparison to the classification of cancerous and normal samples. Comprehensive analysis, differentially expressed RNA transcripts, methylated CpG sites in LIHC samples and prediction models are available from CancerLSP (http://webs.iiitd.edu.in/raghava/cancerlsp/).",2019-09-06 +22121212,"NCBI Reference Sequences (RefSeq): current status, new features and genome annotation policy.","The National Center for Biotechnology Information (NCBI) Reference Sequence (RefSeq) database is a collection of genomic, transcript and protein sequence records. These records are selected and curated from public sequence archives and represent a significant reduction in redundancy compared to the volume of data archived by the International Nucleotide Sequence Database Collaboration. The database includes over 16,00 organisms, 2.4 × 0(6) genomic records, 13 × 10(6) proteins and 2 × 10(6) RNA records spanning prokaryotes, eukaryotes and viruses (RefSeq release 49, September 2011). The RefSeq database is maintained by a combined approach of automated analyses, collaboration and manual curation to generate an up-to-date representation of the sequence, its features, names and cross-links to related sources of information. We report here on recent growth, the status of curating the human RefSeq data set, more extensive feature annotation and current policy for eukaryotic genome annotation via the NCBI annotation pipeline. More information about the resource is available online (see http://www.ncbi.nlm.nih.gov/RefSeq/).",2011-11-24 +31433236,"Per- and Polyfluoroalkyl Substance Plasma Concentrations and Bone Mineral Density in Midchildhood: A Cross-Sectional Study (Project Viva, United States).","

Background

Identifying factors that impair bone accrual during childhood is a critical step toward osteoporosis prevention. Exposure to per- and polyfluoroalkyl substances (PFASs) has been associated with lower bone mineral density, but data are limited, particularly in children.

Methods

We studied 576 children in Project Viva, a Boston-area cohort of mother/child pairs recruited prenatally from 1999 to 2002. We quantified plasma concentrations of several PFASs and measured areal bone mineral density (aBMD) by dual-energy X-ray absorptiometry (DXA) in midchildhood. We used linear regression to examine associations between plasma concentrations of individual PFASs and aBMD z-score. We used weighted quantile sum (WQS) regression to examine the association of the PFAS mixture with aBMD z-score. All models were adjusted for maternal age, education, annual household income, census tract median household income, and child age, sex, race/ethnicity, dairy intake, physical activity, and year of blood draw.

Results

Children were [[Formula: see text]] [Formula: see text] of age. The highest PFAS plasma concentrations were of perfluorooctanesulfonic acid (PFOS) {median [interquartile range (IQR)]: 6.4 (5.6) ng/mL} and perfluorooctanoic acid (PFOA) [median (IQR): 4.4 (3.2) ng/mL]. Using linear regression, children with higher plasma concentrations of PFOA, PFOS, and perfluorodecanoate (PFDA) had lower aBMD z-scores [e.g., [Formula: see text]: [Formula: see text]; 95% confidence interval (CI): [Formula: see text], [Formula: see text] per doubling of PFOA]. The PFAS mixture was negatively associated with aBMD z-score ([Formula: see text]: [Formula: see text]; 95% CI: [Formula: see text], [Formula: see text] per IQR increment of the mixture index).

Conclusions

PFAS exposure may impair bone accrual in childhood and peak bone mass, an important determinant of lifelong skeletal health. https://doi.org/10.1289/EHP4918.",2019-08-21 +31434735,Long Noncoding RNA Lnc-MxA Inhibits Beta Interferon Transcription by Forming RNA-DNA Triplexes at Its Promoter. ,"Previously, we identified a set of long noncoding RNAs (lncRNAs) that were differentially expressed in influenza A virus (IAV)-infected cells. In this study, we focused on lnc-MxA, which is upregulated during IAV infection. We found that the overexpression of lnc-MxA facilitates the replication of IAV, while the knockdown of lnc-MxA inhibits viral replication. Further studies demonstrated that lnc-MxA is an interferon-stimulated gene. However, lnc-MxA inhibits the Sendai virus (SeV)- and IAV-induced activation of beta interferon (IFN-β). A luciferase assay indicated that lnc-MxA inhibits the activation of the IFN-β reporter upon stimulation with RIG-I, MAVS, TBK1, or active IRF3 (IRF3-5D). These data indicated that lnc-MxA negatively regulates the RIG-I-mediated antiviral immune response. A chromatin immunoprecipitation (ChIP) assay showed that the enrichment of IRF3 and p65 at the IFN-β promoter in lnc-MxA-overexpressing cells was significantly lower than that in control cells, indicating that lnc-MxA interfered with the binding of IRF3 and p65 to the IFN-β promoter. Chromatin isolation by RNA purification (ChIRP), triplex pulldown, and biolayer interferometry assays indicated that lnc-MxA can bind to the IFN-β promoter. Furthermore, an electrophoretic mobility shift assay (EMSA) showed that lnc-MxA can form complexes with the IFN-β promoter fragment. These results demonstrated that lnc-MxA can form a triplex with the IFN-β promoter to interfere with the activation of IFN-β transcription. Using a vesicular stomatitis virus (VSV) infection assay, we confirmed that lnc-MxA can repress the RIG-I-like receptor (RLR)-mediated antiviral immune response and influence the antiviral status of cells. In conclusion, we revealed that lnc-MxA is an interferon-stimulated gene (ISG) that negatively regulates the transcription of IFN-β by forming an RNA-DNA triplex.IMPORTANCE IAV can be recognized as a nonself molecular pattern by host immune systems and can cause immune responses. However, the intense immune response induced by influenza virus, known as a ""cytokine storm,"" can also cause widespread tissue damage (X. Z. J. Guo and P. G. Thomas, Semin Immunopathol 39:541-550, 2017, https://doi.org/10.1007/s00281-017-0636-y; S. Yokota, Nihon Rinsho 61:1953-1958, 2003; I. A. Clark, Immunol Cell Biol 85:271-273, 2007). Meanwhile, the detailed mechanisms involved in the balancing of immune responses in host cells are not well understood. Our studies reveal that, as an IFN-inducible gene, lnc-MxA functions as a negative regulator of the antiviral immune response. We uncovered the mechanism by which lnc-MxA inhibits the activation of IFN-β transcription. Our findings demonstrate that, as an ISG, lnc-MxA plays an important role in the negative-feedback loop involved in maintaining immune homeostasis.",2019-10-15 +30219490,Changing labour market conditions during the 'great recession' and mental health in Scotland 2007-2011: an example using the Scottish Longitudinal Study and data for local areas in Scotland.,"This paper reports research exploring how trends in local labour market conditions during the period 2007-2011 (early stages of the 'great recession') relate to reported mental illness for individuals. It contributes to research on spatio-temporal variation in the wider determinants of health, exploring how the lifecourse of places relates to socio-geographical inequalities in health outcomes for individuals. This study also contributes to the renewed research focus on the links between labour market trends and population health, prompted by the recent global economic recession. We report research using the Scottish Longitudinal Study (SLS), a 5.3% representative sample of the Scottish population, derived from census data (https://sls.lscs.ac.uk/). In Scotland, (2011) census data include self-reported mental health. SLS data were combined with non-disclosive information from other sources, including spatio-temporal trends in labour market conditions (calculated using trajectory modelling) in the 32 local authority areas in Scotland. We show that, for groups of local authorities in Scotland over the period 2007-2011, trends in employment varied. These geographically variable trends in employment rates were associated with inequalities in self-reported mental health across the country, after controlling for a number of other individual and neighbourhood risk factors. For residents of regions that had experienced relatively high and stable levels of employment the odds ratio for reporting a mental illness was significantly lower than for the 'reference group', living in areas with persistently low employment rates. In areas where employment declined markedly from higher levels, the odds ratio was similar to the reference group. The findings emphasise how changes in local economic conditions may influence people's health and wellbeing independently of their own employment status. We conclude that, during the recent recession, the economic life course of places across Scotland has been associated with individual mental health outcomes.",2018-08-13 +27489373,Get the Diagnosis: an evidence-based medicine collaborative Wiki for diagnostic test accuracy.,"

Background

Despite widespread calls for its use, there are challenges to the implementation of evidence-based medicine (EBM) in clinical practice.

Methods

In response to the challenges of finding timely, pertinent information on diagnostic test accuracy, we developed an online, crowd-sourced Wiki on diagnostic test accuracy called Get the Diagnosis (GTD, http://www.getthediagnosis.org).

Results

Since its launch in November 2008 till October 2015, GTD has accumulated information on 300 diagnoses, with 1617 total diagnostic entries. There are a total of 1097 unique diagnostic tests with a mean of 5.4 tests (range 0-38) per diagnosis. 73% of entries (1182 of 1617) have an associated sensitivity and specificity and 89% of entries (1432 of 1617) have associated peer-reviewed literature citations. Altogether, GTD contains 474 unique literature citations. For a sample of three diagnoses, the search precision (percentage of relevant results in the first 30 entries) in GTD was 100% as compared with a range of 13.3%-63.3% for PubMed and between 6.7% and 76.7% for Google Scholar.

Conclusion

GTD offers a fast, precise and efficient way to look up diagnostic test accuracy. On three selected examples, GTD had a greater precision rate compared with PubMed and Google Scholar in identifying diagnostic test information. GTD is a free resource that complements other currently available resources.",2016-08-03 +26875062,An Evaluation of Emergency Medicine Core Content Covered by Free Open Access Medical Education Resources.,"

Study objective

Emergency physicians are using free open access medical education (FOAM) resources at an increasing rate. The extent to which FOAM resources cover the breadth of emergency medicine core content is unknown. We hypothesize that the content of FOAM resources does not provide comprehensive or balanced coverage of the scope of knowledge necessary for emergency medicine providers. Our objective is to quantify emergency medicine core content covered by FOAM resources and identify the predominant FOAM topics.

Methods

This is an institutional review board-approved, retrospective review of all English-language FOAM posts between July 1, 2013, and June 30, 2014, as aggregated on http://FOAMem.com. The topics of FOAM posts were compared with those of the emergency medicine core content, as defined by the American Board of Emergency Medicine's Model of the Clinical Practice of Emergency Medicine (MCPEM). Each FOAM post could cover more than 1 topic. Repeated posts and summaries were excluded.

Results

Review of the MCPEM yielded 915 total emergency medicine topics grouped into 20 sections. Review of 6,424 FOAM posts yielded 7,279 total topics and 654 unique topics, representing 71.5% coverage of the 915 topics outlined by the MCPEM. The procedures section was covered most often, representing 2,285 (31.4%) FOAM topics. The 4 sections with the least coverage were cutaneous disorders, hematologic disorders, nontraumatic musculoskeletal disorders, and obstetric and gynecologic disorders, each representing 0.6% of FOAM topics. Airway techniques; ECG interpretation; research, evidence-based medicine, and interpretation of the literature; resuscitation; and ultrasonography were the most overrepresented subsections, equaling 1,674 (23.0%) FOAM topics when combined.

Conclusion

The data suggest an imbalanced and incomplete coverage of emergency medicine core content in FOAM. The study is limited by its retrospective design and use of a single referral Web site to obtain available FOAM resources. More comprehensive and balanced coverage of emergency medicine core content is needed if FOAM is to serve as a primary educational resource.",2016-02-11 +30391708,What do we need to know about drone brood homogenate and what is known.,"

Etnopharmacological relevance

In Polish folk customs, bees are surrounded by the nimbus of holiness, which is reflected in a series of proverbs and phrases in colloquial speech (Markiewicowa, 1992). It was believed that products derived from the beehive, resulting from the effort of insects are endowed with special healing and nutritive properties. As such, bee products have been used in natural medicine for centuries (Markiewicowa, 1992). Nowadays, these properties have been confirmed by systematic scientific assessment. The largest number of scientific reports are focused on the nutritive properties and therapeutic action of propolis, royal jelly, honey, bee venom and pollen. Less information can be found about another product of beekeeping which is drone brood. Drones are responsible for the fertilization of a queen bee, thereby prolonging bee species. In addition to reproduction, they do not perform any others important functions in the bee community, except draining food resources collected by worker bees. For this reason, the excess of the drone brood is removed from the hive by the beekeepers. Before the winter bees themselves banish the adult drones from the hive. The removal of drone brood has a function in the prevention and treatment of varroosis, bee parasitic disease caused by the Varroa destructor mites. Beekeepers and scientists have noticed that this parasite accumulates in wax cells in which young drones develop.

Aim of the study

The purpose of this work was to assess the current state of knowledge on the nutritional and biological properties of the drone homogenate (DBH). Information about biological or pharmacological effects of DBH are limited and research results are published in very local scientific journals. The authors tried to gather available information of the chemical composition, methods of storing and preserving the brood, as well as on biological activity and application in nutrition and medicine. The collected facts prove that this product is wrongly regarded by majority of Western beekeepers as waste. Studies carried out on animal models show that the homogenate exhibits androgenic effect and led to improve animals productive capacity. DBH is able to stimulate the immune system (stimulating the production of antibodies by the spleen and the immune response of T lymphocytes) as well as reduction the parameters of oxidative stress and the risk of death due to cardiovascular episode (Bogdanov, 2012).

Materials and methods

In searching for information on drone brood, generally available publishing databases such as Scopus, Google scholar, and PubMed were used. Search words were: ""drone homogenate"", drone brood"", ""bee brood"", ""drone larvae"", ""drone milk"". Due to the number of publications available in English, information on the drone homogenate was also searched in Russian. Patent studies of agents containing drone homogenate were searched at http://patents.google.com.

Results

This work gathers information on the chemical composition, methods of storage and preservation as well as the action of the biological drone homogenate. In addition, information on the effect of the drone homogenate on animal organisms and the use of homogenate in various disease entities in humans has been provided. Manuscript also contains information on the use of the drone homogenate as a dietary and food supplement. The critical discussion on available results was provided.

Conclusions

This paper presents the most important information on the use of drone brood in folk medicine. The studies carried out with the use of animals and humans have shown that the drone brood has an adjuvant effect that improves the efficiency of the organism. Due to its high content of amino acids and proteins, it is used as a tonic and adaptogenic agent. The presence of sex hormones in the homogenate allows its use as a potency raising agent and equalizing the hormonal system in people of both sexes. Based on the facts quoted above, it can be concluded that DBH is a promising nutritional product, an unjustly neglected source of valuable substances not only such as proteins, fatty acids but also vitamins, hormones and antioxidants.",2018-11-02 +29410079,The Marburg-Münster Affective Disorders Cohort Study (MACS): A quality assurance protocol for MR neuroimaging data.,"Large, longitudinal, multi-center MR neuroimaging studies require comprehensive quality assurance (QA) protocols for assessing the general quality of the compiled data, indicating potential malfunctions in the scanning equipment, and evaluating inter-site differences that need to be accounted for in subsequent analyses. We describe the implementation of a QA protocol for functional magnet resonance imaging (fMRI) data based on the regular measurement of an MRI phantom and an extensive variety of currently published QA statistics. The protocol is implemented in the MACS (Marburg-Münster Affective Disorders Cohort Study, http://for2107.de/), a two-center research consortium studying the neurobiological foundations of affective disorders. Between February 2015 and October 2016, 1214 phantom measurements have been acquired using a standard fMRI protocol. Using 444 healthy control subjects which have been measured between 2014 and 2016 in the cohort, we investigate the extent of between-site differences in contrast to the dependence on subject-specific covariates (age and sex) for structural MRI, fMRI, and diffusion tensor imaging (DTI) data. We show that most of the presented QA statistics differ severely not only between the two scanners used for the cohort but also between experimental settings (e.g. hardware and software changes), demonstrate that some of these statistics depend on external variables (e.g. time of day, temperature), highlight their strong dependence on proper handling of the MRI phantom, and show how the use of a phantom holder may balance this dependence. Site effects, however, do not only exist for the phantom data, but also for human MRI data. Using T1-weighted structural images, we show that total intracranial (TIV), grey matter (GMV), and white matter (WMV) volumes significantly differ between the MR scanners, showing large effect sizes. Voxel-based morphometry (VBM) analyses show that these structural differences observed between scanners are most pronounced in the bilateral basal ganglia, thalamus, and posterior regions. Using DTI data, we also show that fractional anisotropy (FA) differs between sites in almost all regions assessed. When pooling data from multiple centers, our data show that it is a necessity to account not only for inter-site differences but also for hardware and software changes of the scanning equipment. Also, the strong dependence of the QA statistics on the reliable placement of the MRI phantom shows that the use of a phantom holder is recommended to reduce the variance of the QA statistics and thus to increase the probability of detecting potential scanner malfunctions.",2018-02-01 +30695125,Infrared Spectra of Deprotonated Dicarboxylic Acids: IRMPD Spectroscopy and Empirical Valence-Bond Modeling.,"Experimental infrared multiple-photon dissociation (IRMPD) spectra recorded for a series of deprotonated dicarboxylic acids, HO2 (CH2 )n CO 2 - (n=2-4), are interpreted using a variety of computational methods. The broad bands centered near 1600 cm-1 can be reproduced neither by static vibrational calculations based on quantum chemistry nor by a dynamical description of individual structures using the many-body polarizable AMOEBA force field, strongly suggesting that these molecules experience dynamical proton sharing between the two carboxylic ends. To confirm this assumption, AMOEBA was combined with a two-state empirical valence-bond (EVB) model to allow for proton transfer in classical molecular dynamics simulations. Upon suitable parametrization based on ab initio reference data, the EVB-AMOEBA model satisfactorily reproduces the experimental infrared spectra, and the finite temperature dynamics reveals a significant amount of proton sharing in such systems.",2019-02-27 +29876374,Data set on the bioprecipitation of sulfate and trivalent arsenic by acidophilic non-traditional sulfur reducing bacteria.,"Data presented here are related to the original paper ""Simultaneous removal of sulfate and arsenic using immobilized non-traditional sulfate reducing bacteria (SRB) mixed culture and alternative low-cost carbon sources"" published by same authors (Matos et al., 2018) [1]. The data set here presented aims to facilitate this paper comprehension by giving readers some additional information. Data set includes a brief description of experimental conditions and the results obtained during both batch and semi-continuous reactors experiments. Data confirmed arsenic and sulfate were simultaneously removed under acidic pH by using a biological treatment based on the activity of a non-traditional sulfur reducing bacteria consortium. This microbial consortium was able to utilize glycerol, powdered chicken feathers as carbon donors, and proved to be resistant to arsenite up to 8.0 mg L-1. Data related to sulfate and arsenic removal efficiencies, residual arsenite and sulfate contents, pH and Eh measurements obtained under different experimental conditions were depicted in graphical format. Refers to https://doi.org/10.1016/j.cej.2017.11.035.",2018-01-02 +30974444,Extracorporeal Blood Purification Therapies for Sepsis.,"Extracorporeal blood purification is proposed as an adjuvant therapy for sepsis, aiming at controlling the associated dysregulation of the immune system, which is known to induce organ dysfunctions. Different therapies have been developed to address certain steps of the immune dysregulation. Most of the available blood purification devices focus on a single target, such as the endotoxin that triggers the immune cascade, or the cytokine storm that causes organ damages. However, the highly adsorptive membrane named oXiris® is a unique 4-in-1 device that combines cytokine and endotoxin removal properties, renal replacement function, and antithrombogenic properties. More recently, promising treatments that focus on the pathogen itself or the immune cells have been developed and are currently under investigation. In this review, we aim to summarize, according to their target, the different extracorporeal blood purification techniques that are already available for use. We will also briefly introduce the most recent techniques that are still under development. Because of its unique ability to remove both endotoxins and cytokines, we will particularly discuss the highly adsorptive preheparinized oXiris® membrane. We will present its properties, advantages, pitfalls, as well as therapeutic perspectives based on experimental and clinical data. Video Journal Club ""Cappuccino with Claudio Ronco"" at  https://www.karger.com/Journal/ArticleNews/223997?sponsor=52.",2019-04-11 +31320401,Genomic Profiling of Blood-Derived Circulating Tumor DNA from Patients with Colorectal Cancer: Implications for Response and Resistance to Targeted Therapeutics.,"Molecular profiling of circulating tumor DNA (ctDNA) is a promising noninvasive tool. Here, next-generation sequencing (NGS) of blood-derived ctDNA was performed in patients with advanced colorectal cancer. We investigated ctDNA-derived genomic alterations, including potential actionability, concordance with tissue NGS, and serial dynamics in 78 patients with colorectal cancer using a clinical-grade NGS assay that detects single nucleotide variants (54-73 genes) and selected copy-number variants, fusions, and indels. Overall, 63 patients [80.8% (63/78)] harbored ctDNA alterations; 59 [75.6% (59/78)], ≥1 characterized alteration (variants of unknown significance excluded). All 59 patients had actionable alterations potentially targetable with FDA-approved drugs [on-label and/or off-label (N = 54) or with experimental drugs in clinical trials (additional five patients); University of California San Diego Molecular Tumor Board assessment]: 45, by OncoKB (http://oncokb.org/#/). The tissue and blood concordance rates for common specific alterations ranged from 62.3% to 86.9% (median = 5 months between tests). In serial samples from patients on anti-EGFR therapy, multiple emerging alterations in genes known to be involved in therapeutic resistance, including KRAS, NRAS, BRAF, EGFR, ERBB2, and MET were detected. In conclusion, over 80% of patients with stage IV colorectal cancer had detectable ctDNA, and the majority had potentially actionable alterations. Concordance between tissue and blood was between 62% and 87%, despite a median of 5 months between tests. Resistance alterations emerged on anti-EGFR therapy. Therefore, biopsy-free, noninvasive ctDNA analysis provides data relevant to the clinical setting. Importantly, sequential ctDNA analysis detects patterns of emerging resistance allowing for precision planning of future therapy.",2019-07-18 +22588877,The cBio cancer genomics portal: an open platform for exploring multidimensional cancer genomics data.,"The cBio Cancer Genomics Portal (http://cbioportal.org) is an open-access resource for interactive exploration of multidimensional cancer genomics data sets, currently providing access to data from more than 5,000 tumor samples from 20 cancer studies. The cBio Cancer Genomics Portal significantly lowers the barriers between complex genomic data and cancer researchers who want rapid, intuitive, and high-quality access to molecular profiles and clinical attributes from large-scale cancer genomics projects and empowers researchers to translate these rich data sets into biologic insights and clinical applications.",2012-05-01 +25708359,ViralmiR: a support-vector-machine-based method for predicting viral microRNA precursors.,"

Background

microRNAs (miRNAs) play a vital role in development, oncogenesis, and apoptosis by binding to mRNAs to regulate the posttranscriptional level of coding genes in mammals, plants, and insects. Recent studies have demonstrated that the expression of viral miRNAs is associated with the ability of the virus to infect a host. Identifying potential viral miRNAs from experimental sequence data is valuable for deciphering virus-host interactions. Thus far, a specific predictive model for viral miRNA identification has yet to be developed.

Methods and results

Here, we present ViralmiR for identifying viral miRNA precursors on the basis of sequencing and structural information. We collected 263 experimentally validated miRNA precursors (pre-miRNAs) from 26 virus species and generated sequencing fragments from virus and human genomes as the negative dataset. Support vector machine and random forest models were established using 54 features from RNA sequences and secondary structural information. The results show that ViralmiR achieved a balanced accuracy higher than 83%, which is superior to that of previously developed tools for identifying pre-miRNAs.

Conclusions

The easy-to-use ViralmiR web interface has been provided as a helpful resource for researchers to use in analyzing and deciphering virus-host interactions. The web interface of ViralmiR can be accessed at http://csb.cse.yzu.edu.tw/viralmir/.",2015-01-21 +27510400,Membranome: a database for proteome-wide analysis of single-pass membrane proteins.,"The Membranome database was developed to assist analysis and computational modeling of single-pass (bitopic) transmembrane (TM) proteins and their complexes by providing structural information about these proteins on a genomic scale. The database currently collects data on >6000 bitopic proteins from Homo sapiens, Arabidopsis thaliana, Dictyostelium discoideum, Saccharomyces cerevisiae, Escherichia coli and Methanocaldococcus jannaschii It presents the following data: (i) hierarchical classification of bitopic proteins into 15 functional classes, 689 structural superfamilies and 1404 families; (ii) 446 complexes of bitopic proteins with known three-dimensional (3D) structures classified into 129 families; (iii) computationally generated three-dimensional models of TM α-helices positioned in membranes; (iv) amino acid sequences, domain architecture, functional annotation and available experimental structures of bitopic proteins; (v) TM topology and intracellular localization, (vi) physical interactions between proteins from the database along with links to other resources. The database is freely accessible at http://membranome.org There is a variety of options for browsing, sorting, searching and retrieval of the content, including downloadable coordinate files of TM domains with calculated membrane boundaries.",2016-08-10 +24569102,The European Cancer Observatory: A new data resource.,"Population-based cancer registries provide indispensable information on cancer incidence and survival, which cannot be obtained by any other means. It is clear that complete and effective use of these data is essential for cancer control, but sharing this information in a uniform, timely and user-friendly manner has been somewhat limited up to now. The European Cancer Observatory (ECO, http://eco.iarc.fr) has been developed in the framework of the EUROCOURSE project (EUROpe against Cancer: Optimisation of Use of Registries for Scientific Excellence in Research) as a comprehensive resource combining all the information currently available in Europe on cancer incidence, mortality, survival and prevalence. The website provides analytical and presentation tools to examine national estimates for 2012 in 40 European countries (EUCAN), data for 130 national or sub-national areas covered by cancer registries for up to 60 years, until 2011 (EUREG) and a planned mechanism for data download (European Cancer Incidence and Mortality (EUROCIM)). The generated statistics outline the considerable variability across Europe in the rates of all major cancer types and help identify key concerns that need to be addressed by public health policies e.g. the unprecedented rise of lung cancer incidence in women with its full impact expected within a decade or so. The support, maintenance and further development of the ECO website should be a high priority for European cancer policymakers, to continue providing this unique information to health professionals, researchers and the general public in Europe and beyond.",2014-02-22 +29734235,Effects of Repetitive Transcranial Magnetic Stimulation on Walking and Balance Function after Stroke: A Systematic Review and Meta-Analysis.,"OBJECTIVE:The aim of this study was to investigate the effects of repetitive transcranial magnetic stimulation (rTMS) on walking and balance function in patients with stroke. DESIGN:MEDLINE, EMBASE, CINAHL, PsycINFO, Web of Science, CENTRAL, and the Physiotherapy Evidence Database were comprehensively searched for randomized controlled trials published through March 2017 that investigated the effects of rTMS on lower limb function. Main outcomes included walking speed, balance function, motor function, and cortical excitability. RESULTS:Nine studies were included. The meta-analysis revealed a significant effect of rTMS on walking speed (standardized mean difference, 0.64; 95% confidence interval [CI], 0.32-0.95), particularly ipsilesional stimulation (standardized mean difference, 0.80; 95% CI, 0.36-1.24). No significant effects were found for balance function (standardized mean difference, 0.10; 95% CI, -0.26 to 0.45), motor function (mean difference, 0.50, 95% CI: -0.68 to 1.68), or cortical excitability (motor-evoked potentials of the affected hemisphere: mean difference, 0.21 mV; 95% CI, -0.11 to 0.54; motor-evoked potentials of the unaffected hemisphere: mean difference, 0.09 mV; 95% CI, -0.16 to -0.02). CONCLUSION:These results suggest that rTMS, particularly ipsilesional stimulation, significantly improves walking speed. Future studies with larger sample sizes and an adequate follow-up period are required to further understand the effects of rTMS on lower limb function and its relationship with changes in cortical excitability with the help of functional neuroimaging techniques. TO CLAIM CME CREDITS:Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: 1) Understand the potential neurophysiologic effects of rTMS; 2) Appreciate the potential benefits of rTMS on stroke recovery; and 3) Identify indications for including rTMS in a stroke rehabilitation program. LEVEL:Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2018-11-01 +29790904,Exploring drug space with ChemMaps.com.,"Motivation:Easily navigating chemical space has become more important due to the increasing size and diversity of publicly-accessible databases such as DrugBank, ChEMBL or Tox21. To do so, modelers typically rely on complex projection techniques using molecular descriptors computed for all the chemicals to be visualized. However, the multiple cheminformatics steps required to prepare, characterize, compute and explore those molecules, are technical, typically necessitate scripting skills, and thus represent a real obstacle for non-specialists. Results:We developed the ChemMaps.com webserver to easily browse, navigate and mine chemical space. The first version of ChemMaps.com features more than 8000 approved, in development, and rejected drugs, as well as over 47 000 environmental chemicals. Availability and implementation:The webserver is freely available at http://www.chemmaps.com.",2018-11-01 +,"Re‐evolution of a morphological precursor of crypsis investment in the newly revised horned praying mantises (Insecta, Mantodea, Vatinae)","The Neotropical praying mantis tribe Vatini Stål is revised using total evidence phylogenetic analysis based on molecular and coded morphological data. The subfamily Vatinae is redefined to only include Neotropical taxa with the removal of distantly related African and Asian lineages. A new tribe is erected under Vatinae (Heterovatini trib.n.) for two unique genera with historically unstable taxonomic placement (Heterovates Saussure and Chopardiella Giglio‐Tos). Phylogenetic results and morphology support the synonymy of three genera (Lobovates Deeleman‐Reinhold, Phyllovates Kirby, and Hagiotata Saussure & Zehntner) and the validity of Chopardiella Giglio‐Tos, Heterovates Saussure, Callivates Roy, Pseudovates Saussure, Vates Burmeister, and Zoolea Audinet Serville. A new genus (Alangularis gen.n.) is created for a former species of Vates with unique morphology and separate phylogenetic placement. All genera are redescribed based on external morphology and the male genital complex. A key to genera for Vatinae is provided with dorsal habitus images of representatives for each genus. A distinct pattern of correlated evolution of morphological characters linked to crypsis was uncovered. Cuticular leg lobes within single leg segments are evolving as sets, and serially homologous lobes appear simultaneously or in close succession. The posteroventral lobes in the apical position on thoracic femora appear to be the precursors to multiple positive rate shifts in the evolutionary accumulation of cryptic features. One shift occurred early in the evolution of Vatinae while the second occurred much later, after the loss and re‐evolution of the posteroventral lobes in the apical position on thoracic femora, a violation of Dollo's law. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:724C16AF-069A-46A1-B66C-007D8DE18C68.",2016-01-01 +30041657,Differential gene expression analysis tools exhibit substandard performance for long non-coding RNA-sequencing data.,"

Background

Long non-coding RNAs (lncRNAs) are typically expressed at low levels and are inherently highly variable. This is a fundamental challenge for differential expression (DE) analysis. In this study, the performance of 25 pipelines for testing DE in RNA-seq data is comprehensively evaluated, with a particular focus on lncRNAs and low-abundance mRNAs. Fifteen performance metrics are used to evaluate DE tools and normalization methods using simulations and analyses of six diverse RNA-seq datasets.

Results

Gene expression data are simulated using non-parametric procedures in such a way that realistic levels of expression and variability are preserved in the simulated data. Throughout the assessment, results for mRNA and lncRNA were tracked separately. All the pipelines exhibit inferior performance for lncRNAs compared to mRNAs across all simulated scenarios and benchmark RNA-seq datasets. The substandard performance of DE tools for lncRNAs applies also to low-abundance mRNAs. No single tool uniformly outperformed the others. Variability, number of samples, and fraction of DE genes markedly influenced DE tool performance.

Conclusions

Overall, linear modeling with empirical Bayes moderation (limma) and a non-parametric approach (SAMSeq) showed good control of the false discovery rate and reasonable sensitivity. Of note, for achieving a sensitivity of at least 50%, more than 80 samples are required when studying expression levels in realistic settings such as in clinical cancer research. About half of the methods showed a substantial excess of false discoveries, making these methods unreliable for DE analysis and jeopardizing reproducible science. The detailed results of our study can be consulted through a user-friendly web application, giving guidance on selection of the optimal DE tool ( http://statapps.ugent.be/tools/AppDGE/ ).",2018-07-24 +26546318,ATTED-II in 2016: A Plant Coexpression Database Towards Lineage-Specific Coexpression.,"ATTED-II (http://atted.jp) is a coexpression database for plant species with parallel views of multiple coexpression data sets and network analysis tools. The user can efficiently find functional gene relationships and design experiments to identify gene functions by reverse genetics and general molecular biology techniques. Here, we report updates to ATTED-II (version 8.0), including new and updated coexpression data and analysis tools. ATTED-II now includes eight microarray- and six RNA sequencing-based coexpression data sets for seven dicot species (Arabidopsis, field mustard, soybean, barrel medick, poplar, tomato and grape) and two monocot species (rice and maize). Stand-alone coexpression analyses tend to have low reliability. Therefore, examining evolutionarily conserved coexpression is a more effective approach from the viewpoints of reliability and evolutionary importance. In contrast, the reliability of species-specific coexpression data remains poor. Our assessment scores for individual coexpression data sets indicated that the quality of the new coexpression data sets in ATTED-II is higher than for any previous coexpression data set. In addition, five species (Arabidopsis, soybean, tomato, rice and maize) in ATTED-II are now supported by both microarray- and RNA sequencing-based coexpression data, which has increased the reliability. Consequently, ATTED-II can now provide lineage-specific coexpression information. As an example of the use of ATTED-II to explore lineage-specific coexpression, we demonstrate monocot- and dicot-specific coexpression of cell wall genes. With the expanded coexpression data for multilevel evaluation, ATTED-II provides new opportunities to investigate lineage-specific evolution in plants.",2015-11-06 +30384427,PWCDA: Path Weighted Method for Predicting circRNA-Disease Associations. ,"CircRNAs have particular biological structure and have proven to play important roles in diseases. It is time-consuming and costly to identify circRNA-disease associations by biological experiments. Therefore, it is appealing to develop computational methods for predicting circRNA-disease associations. In this study, we propose a new computational path weighted method for predicting circRNA-disease associations. Firstly, we calculate the functional similarity scores of diseases based on disease-related gene annotations and the semantic similarity scores of circRNAs based on circRNA-related gene ontology, respectively. To address missing similarity scores of diseases and circRNAs, we calculate the Gaussian Interaction Profile (GIP) kernel similarity scores for diseases and circRNAs, respectively, based on the circRNA-disease associations downloaded from circR2Disease database (http://bioinfo.snnu.edu.cn/CircR2Disease/). Then, we integrate disease functional similarity scores and circRNA semantic similarity scores with their related GIP kernel similarity scores to construct a heterogeneous network made up of three sub-networks: disease similarity network, circRNA similarity network and circRNA-disease association network. Finally, we compute an association score for each circRNA-disease pair based on paths connecting them in the heterogeneous network to determine whether this circRNA-disease pair is associated. We adopt leave one out cross validation (LOOCV) and five-fold cross validations to evaluate the performance of our proposed method. In addition, three common diseases, Breast Cancer, Gastric Cancer and Colorectal Cancer, are used for case studies. Experimental results illustrate the reliability and usefulness of our computational method in terms of different validation measures, which indicates PWCDA can effectively predict potential circRNA-disease associations.",2018-10-31 +31097671,"Simultaneous Improvement in the Precision, Accuracy, and Robustness of Label-free Proteome Quantification by Optimizing Data Manipulation Chains.","The label-free proteome quantification (LFQ) is multistep workflow collectively defined by quantification tools and subsequent data manipulation methods that has been extensively applied in current biomedical, agricultural, and environmental studies. Despite recent advances, in-depth and high-quality quantification remains extremely challenging and requires the optimization of LFQs by comparatively evaluating their performance. However, the evaluation results using different criteria (precision, accuracy, and robustness) vary greatly, and the huge number of potential LFQs becomes one of the bottlenecks in comprehensively optimizing proteome quantification. In this study, a novel strategy, enabling the discovery of the LFQs of simultaneously enhanced performance from thousands of workflows (integrating 18 quantification tools with 3,128 manipulation chains), was therefore proposed. First, the feasibility of achieving simultaneous improvement in the precision, accuracy, and robustness of LFQ was systematically assessed by collectively optimizing its multistep manipulation chains. Second, based on a variety of benchmark datasets acquired by various quantification measurements of different modes of acquisition, this novel strategy successfully identified a number of manipulation chains that simultaneously improved the performance across multiple criteria. Finally, to further enhance proteome quantification and discover the LFQs of optimal performance, an online tool (https://idrblab.org/anpela/) enabling collective performance assessment (from multiple perspectives) of the entire LFQ workflow was developed. This study confirmed the feasibility of achieving simultaneous improvement in precision, accuracy, and robustness. The novel strategy proposed and validated in this study together with the online tool might provide useful guidance for the research field requiring the mass-spectrometry-based LFQ technique.",2019-05-16 +30806762,Transfer learning for predicting human skin sensitizers.,"Computational prioritization of chemicals for potential skin sensitization risks plays essential roles in the risk assessment of environmental chemicals and drug development. Given the huge number of chemicals for testing, computational methods enable the fast identification of high-risk chemicals for experimental validation and design of safer alternatives. However, the development of robust prediction model requires a large dataset of tested chemicals that is usually not available for most toxicological endpoints, especially for human data. A small training dataset makes the development of effective models difficult with insufficient coverage and accuracy. In this study, an ensemble tree-based multitask learning method was developed incorporating three relevant tasks in the well-defined adverse outcome pathway (AOP) of skin sensitization to transfer shared knowledge to the major task of human sensitizers. The results show both largely improved coverage and accuracy compared with three state-of-the-art methods. A user-friendly prediction server was available at https://cwtung.kmu.edu.tw/skinsensdb/predict . As AOPs for various toxicity endpoints are being actively developed, the proposed method can be applied to develop prediction models for other endpoints.",2019-02-26 +28974579,"Data visualization, bar naked: A free tool for creating interactive graphics.","Although bar graphs are designed for categorical data, they are routinely used to present continuous data in studies that have small sample sizes. This presentation is problematic, as many data distributions can lead to the same bar graph, and the actual data may suggest different conclusions from the summary statistics. To address this problem, many journals have implemented new policies that require authors to show the data distribution. This paper introduces a free, web-based tool for creating an interactive alternative to the bar graph (http://statistika.mfub.bg.ac.rs/interactive-dotplot/). This tool allows authors with no programming expertise to create customized interactive graphics, including univariate scatterplots, box plots, and violin plots, for comparing values of a continuous variable across different study groups. Individual data points may be overlaid on the graphs. Additional features facilitate visualization of subgroups or clusters of non-independent data. A second tool enables authors to create interactive graphics from data obtained with repeated independent experiments (http://statistika.mfub.bg.ac.rs/interactive-repeated-experiments-dotplot/). These tools are designed to encourage exploration and critical evaluation of the data behind the summary statistics and may be valuable for promoting transparency, reproducibility, and open science in basic biomedical research.",2017-10-03 +28461042,Forensic characteristics and phylogenetic analysis of Hubei Han population in central China using 17 Y-STR loci.,"Currently, the largest national database within the Y chromosome haplotype reference database (YHRD, https://yhrd.org, release 53) is China, which has approximately 38000 Y chromosomal 17-marker (Yfiler) haplotypes. These haplotype profiles derived from the vast majority of Chinese administrative divisions, but no haplotype data was available for Hubei province, which is located in the Central China region. Herein, 429 unrelated male Chinese Han individuals residing in Hubei province were recruited and genotyped with 17 Y-STR loci. 115 alleles were identified with corresponding allele frequencies spanned from 0.0023 to 07506. The gene diversity (GD) values ranged from 0.3988 at DYS438 to 0.9573 at DYS385a/b. A total of 410 distinct haplotypes were obtained with the overall haplotype diversity (HD) and discrimination capacity (DC) was 0.9995 and 0.9557, respectively. Additionally, genetic relationships along administrative (Han Chinese from different provinces) and ethnic divisions (minority ethnic groups) were analyzed using analysis of molecular variance (AMOVA) tests and visualized by multidimensional scaling plots (MDS). The Han ethnicity including the Hubei Han shows a high genetic homogeneity all across China and significant genetic differences existed between the Hubei Han and some ethnic groups, most prominently for the Kazakhs and the Tibetans.",2017-04-20 +30225281,Identification of potential biomarkers of head and neck squamous cell carcinoma using iTRAQ based quantitative proteomic approach.,"Head and neck squamous cell carcinoma (HNSCC) is one of the most common cancers in India. Despite improvements in treatment strategy, the survival rates of HNSCC patients remain poor. Thus, it is necessary to identify biomarkers that can be used for early detection of disease. In this study, we employed iTRAQ-based quantitative mass spectrometry analysis to identify dysregulated proteins from a panel of head and neck squamous cell carcinoma (HNSCC) cell lines. We identified 2468 proteins, of which 496 proteins were found to be dysregulated in at least two out of three HNSCC cell lines compared to immortalized normal oral keratinocytes. We detected increased expression of replication protein A1 (RPA1) and heat shock protein family H (Hsp110) member 1 (HSPH1), in HNSCC cell lines compared to control. The differentially expressed proteins were further validated using parallel reaction monitoring (PRM) and western blot analysis in HNSCC cell lines. Immunohistochemistry-based validation using HNSCC tissue microarrays revealed overexpression of RPA1 and HSPH1 in 15.7% and 32.2% of the tested cases, respectively. Our study illustrates quantitative proteomics as a robust approach for identification of potential HNSCC biomarkers. The proteomic data has been submitted to ProteomeXchange Consortium (http://www.proteomecentral.proteomexchange.org) via the PRIDE public data repository accessible using the data identifier - PXD009241.",2018-05-24 +28503844,TypeLoader: A fast and efficient automated workflow for the annotation and submission of novel full-length HLA alleles.,"Recent years have seen a rapid increase in the discovery of novel allelic variants of the human leukocyte antigen (HLA) genes. Commonly, only the exons encoding the peptide binding domains of novel HLA alleles are submitted. As a result, the IPD-IMGT/HLA Database lacks sequence information outside those regions for the majority of known alleles. This has implications for the application of the new sequencing technologies, which deliver sequence data often covering the complete gene. As these technologies simplify the characterization of the complete gene regions, it is desirable for novel alleles to be submitted as full-length sequences to the database. However, the manual annotation of full-length alleles and the generation of specific formats required by the sequence repositories is prone to error and time consuming. We have developed TypeLoader to address both these facets. With only the full-length sequence as a starting point, Typeloader performs automatic sequence annotation and subsequently handles all steps involved in preparing the specific formats for submission with very little manual intervention. TypeLoader is routinely used at the DKMS Life Science Lab and has aided in the successful submission of more than 900 novel HLA alleles as full-length sequences to the European Nucleotide Archive repository and the IPD-IMGT/HLA Database with a 95% reduction in the time spent on annotation and submission when compared with handling these processes manually. TypeLoader is implemented as a web application and can be easily installed and used on a standalone Linux desktop system or within a Linux client/server architecture. TypeLoader is downloadable from http://www.github.com/DKMS-LSL/typeloader.",2017-05-14 +,Global oceanic DMS data inter-comparability,"The global surface seawater dimethylsulphide (DMS) database ( http://saga.pmel.noaa.gov/dms/ ) contains >50,000 data points and is the second largest trace gas database after carbon dioxide. However, there has been relatively little quality control on the data that have been collated to date. Furthermore, the recent development of technologies capable of high frequency (>1 Hz) DMS measurements will have a disproportionate effect on the database in future years. At this juncture, the comparability of analytical techniques, sample handling methodologies and standards are pressing issues that the DMS community needs to address. In October 2010, during the Fifth International Symposium on Biological and Environmental Chemistry of DMS(P) and Related Compounds held in Goa, India, attendees participated in a discussion concerning the current DMS database and its future development. We develop some of the ideas from that session and combine them with available data. From the few inter-comparison exercises that have been conducted we show that variability between existing measurements within the DMS database is likely to be ≤25%. Tests comparing different DMSP·HCl standards demonstrate that a reference calibration standard would be beneficial for the DMS community. Confidence in future data collation would be substantially improved with a comprehensive inter-comparison experiment between new analytical techniques and sampling methodologies (e.g., mass spectrometers with equilibrators attached to a continuous flow of seawater) and more established methods (i.e., filtered samples analysed with purge and trap gas chromatography). We conclude with recommendations for the future expansion of the DMS database and its data quality control.",2012-09-01 +25428349,"OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders.","Online Mendelian Inheritance in Man, OMIM(®), is a comprehensive, authoritative and timely research resource of curated descriptions of human genes and phenotypes and the relationships between them. The new official website for OMIM, OMIM.org (http://omim.org), was launched in January 2011. OMIM is based on the published peer-reviewed biomedical literature and is used by overlapping and diverse communities of clinicians, molecular biologists and genome scientists, as well as by students and teachers of these disciplines. Genes and phenotypes are described in separate entries and are given unique, stable six-digit identifiers (MIM numbers). OMIM entries have a structured free-text format that provides the flexibility necessary to describe the complex and nuanced relationships between genes and genetic phenotypes in an efficient manner. OMIM also has a derivative table of genes and genetic phenotypes, the Morbid Map. OMIM.org has enhanced search capabilities such as genome coordinate searching and thesaurus-enhanced search term options. Phenotypic series have been created to facilitate viewing genetic heterogeneity of phenotypes. Clinical synopsis features are enhanced with UMLS, Human Phenotype Ontology and Elements of Morphology terms and image links. All OMIM data are available for FTP download and through an API. MIMmatch is a novel outreach feature to disseminate updates and encourage collaboration.",2014-11-26 +29536443,iSeq: Web-Based RNA-seq Data Analysis and Visualization.,"Transcriptome sequencing (RNA-seq) is becoming a standard experimental methodology for genome-wide characterization and quantification of transcripts at single base-pair resolution. However, downstream analysis of massive amount of sequencing data can be prohibitively technical for wet-lab researchers. A functionally integrated and user-friendly platform is required to meet this demand. Here, we present iSeq, an R-based Web server, for RNA-seq data analysis and visualization. iSeq is a streamlined Web-based R application under the Shiny framework, featuring a simple user interface and multiple data analysis modules. Users without programming and statistical skills can analyze their RNA-seq data and construct publication-level graphs through a standardized yet customizable analytical pipeline. iSeq is accessible via Web browsers on any operating system at http://iseq.cbi.pku.edu.cn .",2018-01-01 +28410104,Low-Rank Embedding for Robust Image Feature Extraction.,"Robustness to noises, outliers, and corruptions is an important issue in linear dimensionality reduction. Since the sample-specific corruptions and outliers exist, the class-special structure or the local geometric structure is destroyed, and thus, many existing methods, including the popular manifold learning- based linear dimensionality methods, fail to achieve good performance in recognition tasks. In this paper, we focus on the unsupervised robust linear dimensionality reduction on corrupted data by introducing the robust low-rank representation (LRR). Thus, a robust linear dimensionality reduction technique termed low-rank embedding (LRE) is proposed in this paper, which provides a robust image representation to uncover the potential relationship among the images to reduce the negative influence from the occlusion and corruption so as to enhance the algorithm's robustness in image feature extraction. LRE searches the optimal LRR and optimal subspace simultaneously. The model of LRE can be solved by alternatively iterating the argument Lagrangian multiplier method and the eigendecomposition. The theoretical analysis, including convergence analysis and computational complexity, of the algorithms is presented. Experiments on some well-known databases with different corruptions show that LRE is superior to the previous methods of feature extraction, and therefore, it indicates the robustness of the proposed method. The code of this paper can be downloaded from http://www.scholat.com/laizhihui.",2017-04-06 +29312824,Biotea: semantics for Pubmed Central.,"A significant portion of biomedical literature is represented in a manner that makes it difficult for consumers to find or aggregate content through a computational query. One approach to facilitate reuse of the scientific literature is to structure this information as linked data using standardized web technologies. In this paper we present the second version of Biotea, a semantic, linked data version of the open-access subset of PubMed Central that has been enhanced with specialized annotation pipelines that uses existing infrastructure from the National Center for Biomedical Ontology. We expose our models, services, software and datasets. Our infrastructure enables manual and semi-automatic annotation, resulting data are represented as RDF-based linked data and can be readily queried using the SPARQL query language. We illustrate the utility of our system with several use cases. Our datasets, methods and techniques are available at http://biotea.github.io.",2018-01-02 +30372091,"""What item response theory can tell us about the complex span tasks"": Correction to Draheim et al. (2018).","Reports an error in ""What item response theory can tell us about the complex span tasks"" by Christopher Draheim, Tyler L. Harrison, Susan E. Embretson and Randall W. Engle (Psychological Assessment, 2018[Jan], Vol 30[1], 116-129). In the article ""What Item Response Theory Can Tell Us About the Complex Span Tasks,"" by Christopher Draheim, Tyler L. Harrison, Susan E. Embretson, and Randall W. Engle (Psychological Assessment, 2018, Vol. 30, No. 1, pp. 116-129, http://dx.doi.org/10.1037/pas0000444). In the article, a programming error in the operation span task in Study 2 resulted in set size 8 being administered instead of set size 9. Set sizes 3-7 were administered as intended, but set size 8 was administered twice in each block instead of one instance of set size 8 and one instance of set size 9 per block. As such, all references to set size 9 should be interpreted as an additional administration of set size 8. This error has some minor implications for the results and conclusions of Study 2 whereby it can no longer be confidently asserted that an operation span task with set sizes 8 and 9 added would be any less suitable for higher ability subjects than the rotation and symmetry span tasks. However, the error has no bearing on the argument that the standard administration of the operation span (set sizes 3-7) is lacking and that the addition of larger set sizes to the operation span vastly improves its utility for testing higher ability individuals. (The following abstract of the original article appeared in record 2017-10875-001.) Working memory capacity is an important construct in psychology because of its relationship with many higher-order cognitive abilities and psychopathologies. Working memory capacity is often measured using a type of paradigm known as complex span. Some recent work has focused on shortening the administration time of the complex span tasks, resulting in different versions of these tasks being used (Foster et al., 2015; Oswald, McAbee, Redick, & Hambrick, 2015). Variations in the complex span tasks, such as the number of set sizes, can lead to varying power to discriminate individuals at different ability levels. Thus, research findings may be inconsistent across populations due to differing appropriateness for the ability levels. The present study uses a combination of item response theory and correlational analyses to better understand the psychometric properties of the operation span, symmetry span, and rotation span. The findings show that the typical administration of these tasks, particularly the operation span, is not suitable for above average ability samples (Study 1; n = 573). When larger set sizes are added to the tasks (Study 2; n = 351), predictive validity and discriminability is improved for all complex span tasks, however the operation span is still inferior to the spatial tasks. The authors make several conclusions about which tasks and set sizes should be used depending on the intended population, and further suggest avoiding the standard-length operation span for average or higher ability populations. (PsycINFO Database Record (c) 2018 APA, all rights reserved).",2018-10-29 +24053356,An expression atlas of human primary cells: inference of gene function from coexpression networks.,"

Background

The specialisation of mammalian cells in time and space requires genes associated with specific pathways and functions to be co-ordinately expressed. Here we have combined a large number of publically available microarray datasets derived from human primary cells and analysed large correlation graphs of these data.

Results

Using the network analysis tool BioLayout Express3D we identify robust co-associations of genes expressed in a wide variety of cell lineages. We discuss the biological significance of a number of these associations, in particular the coexpression of key transcription factors with the genes that they are likely to control.

Conclusions

We consider the regulation of genes in human primary cells and specifically in the human mononuclear phagocyte system. Of particular note is the fact that these data do not support the identity of putative markers of antigen-presenting dendritic cells, nor classification of M1 and M2 activation states, a current subject of debate within immunological field. We have provided this data resource on the BioGPS web site (http://biogps.org/dataset/2429/primary-cell-atlas/) and on macrophages.com (http://www.macrophages.com/hu-cell-atlas).",2013-09-20 +26896847,Ensembl comparative genomics resources. ,"Evolution provides the unifying framework with which to understand biology. The coherent investigation of genic and genomic data often requires comparative genomics analyses based on whole-genome alignments, sets of homologous genes and other relevant datasets in order to evaluate and answer evolutionary-related questions. However, the complexity and computational requirements of producing such data are substantial: this has led to only a small number of reference resources that are used for most comparative analyses. The Ensembl comparative genomics resources are one such reference set that facilitates comprehensive and reproducible analysis of chordate genome data. Ensembl computes pairwise and multiple whole-genome alignments from which large-scale synteny, per-base conservation scores and constrained elements are obtained. Gene alignments are used to define Ensembl Protein Families, GeneTrees and homologies for both protein-coding and non-coding RNA genes. These resources are updated frequently and have a consistent informatics infrastructure and data presentation across all supported species. Specialized web-based visualizations are also available including synteny displays, collapsible gene tree plots, a gene family locator and different alignment views. The Ensembl comparative genomics infrastructure is extensively reused for the analysis of non-vertebrate species by other projects including Ensembl Genomes and Gramene and much of the information here is relevant to these projects. The consistency of the annotation across species and the focus on vertebrates makes Ensembl an ideal system to perform and support vertebrate comparative genomic analyses. We use robust software and pipelines to produce reference comparative data and make it freely available. Database URL: http://www.ensembl.org.",2016-02-20 +27487245,CLIMP: Clustering Motifs via Maximal Cliques with Parallel Computing Design.,"A set of conserved binding sites recognized by a transcription factor is called a motif, which can be found by many applications of comparative genomics for identifying over-represented segments. Moreover, when numerous putative motifs are predicted from a collection of genome-wide data, their similarity data can be represented as a large graph, where these motifs are connected to one another. However, an efficient clustering algorithm is desired for clustering the motifs that belong to the same groups and separating the motifs that belong to different groups, or even deleting an amount of spurious ones. In this work, a new motif clustering algorithm, CLIMP, is proposed by using maximal cliques and sped up by parallelizing its program. When a synthetic motif dataset from the database JASPAR, a set of putative motifs from a phylogenetic foot-printing dataset, and a set of putative motifs from a ChIP dataset are used to compare the performances of CLIMP and two other high-performance algorithms, the results demonstrate that CLIMP mostly outperforms the two algorithms on the three datasets for motif clustering, so that it can be a useful complement of the clustering procedures in some genome-wide motif prediction pipelines. CLIMP is available at http://sqzhang.cn/climp.html.",2016-08-03 +30144646,Sequence-based U.S. population data for 27 autosomal STR loci.,"This manuscript reports Short Tandem Repeat (STR) sequence-based allele frequencies for 1036 samples across 27 autosomal STR loci: D1S1656, TPOX, D2S441, D2S1338, D3S1358, D4S2408, FGA, D5S818, CSF1PO, D6S1043, D7S820, D8S1179, D9S1122, D10S1248, TH01, vWA, D12S391, D13S317, Penta E, D16S539, D17S1301, D18S51, D19S433, D20S482, D21S11, Penta D, and D22S1045. Sequence data were analyzed by two bioinformatic pipelines and all samples have been evaluated for concordance with alleles derived from CE-based analysis at all loci. Each reported sequence includes high-quality flanking sequence and is properly formatted according to the most recent guidance of the International Society for Forensic Genetics. In addition, GenBank accession numbers are reported for each sequence, and associated records are available in the STRSeq BioProject (https://www.ncbi.nlm.nih.gov/bioproject/380127). The D3S1358 locus demonstrates the greatest average increase in heterozygosity across populations (approximately 10 percentage points). Loci demonstrating average increase in heterozygosity from 10 to 5 percentage points include (in descending order) D9S1122, D13S317, D8S1179, D21S11, D5S818, D12S391, and D2S441. The remaining 19 loci each demonstrate less than 5 percentage point increase in average heterozygosity. Discussion includes the utility of this data in understanding traditional CE results, such as informing stutter models and understanding migration challenges, and considerations for population sampling strategies in light of the marked increase in rare alleles for several of the sequence-based STR loci. This NIST 1036 data set is expected to support the implementation of STR sequencing forensic casework by providing high-confidence sequence-based allele frequencies for the same sample set which are already the basis for population statistics in many U.S. forensic laboratories.",2018-07-19 +30510527,Cyber Teaming and Role Specialization in a Cyber Security Defense Competition.,"A critical requirement for developing a cyber capable workforce is to understand how to challenge, assess, and rapidly develop human cyber skill-sets in realistic cyber operational environments. Fortunately, cyber team competitions make use of simulated operational environments with scoring criteria of task performance that objectively define overall team effectiveness, thus providing the means and context for observation and analysis of cyber teaming. Such competitions allow researchers to address the key determinants that make a cyber defense team more or less effective in responding to and mitigating cyber attacks. For this purpose, we analyzed data collected at the 12th annual Mid-Atlantic Collegiate Cyber Defense Competition (MACCDC, http://www.maccdc.org), where eight teams were evaluated along four independent scoring dimensions: maintaining services, incident response, scenario injects, and thwarting adversarial activities. Data collected from the 13-point OAT (Observational Assessment of Teamwork) instrument by embedded observers and a cyber teamwork survey completed by all participants were used to assess teamwork and leadership behaviors and team composition and work processes, respectively. The scores from the competition were used as an outcome measure in our analysis to extract key features of team process, structure, leadership, and skill-sets in relation to effective cyber defense. We used Bayesian regression to relate scored performance during the competition to team skill composition, team experience level, and an observational construct of team collaboration. Our results indicate that effective collaboration, experience, and functional role-specialization within the teams are important factors that determine the success of these teams in the competition and are important observational predictors of the timely detection and effective mitigation of ongoing cyber attacks. These results support theories of team maturation and the development of functional team cognition applied to mastering cybersecurity.",2018-11-19 +26578559,VFDB 2016: hierarchical and refined dataset for big data analysis--10 years on.,"The virulence factor database (VFDB, http://www.mgc.ac.cn/VFs/) is dedicated to providing up-to-date knowledge of virulence factors (VFs) of various bacterial pathogens. Since its inception the VFDB has served as a comprehensive repository of bacterial VFs for over a decade. The exponential growth in the amount of biological data is challenging to the current database in regard to big data analysis. We recently improved two aspects of the infrastructural dataset of VFDB: (i) removed the redundancy introduced by previous releases and generated two hierarchical datasets--one core dataset of experimentally verified VFs only and another full dataset including all known and predicted VFs and (ii) refined the gene annotation of the core dataset with controlled vocabularies. Our efforts enhanced the data quality of the VFDB and promoted the usability of the database in the big data era for the bioinformatic mining of the explosively growing data regarding bacterial VFs.",2015-11-17 +31245633,Stage 2 Registered Report: There is no appreciable relationship between strength of hand preference and language ability in 6- to 7-year-old children.,"Background: Weak or inconsistent hand preference has been postulated to be a risk factor for developmental language delay. Following on from our Registered Stage 1 report this study assessed the extent to which variations in language skills are associated with the strength of hand preference. Methods: Data are drawn from a large sample ( N = 569) of 6- to 7-year-old children unselected for ability, assessed at two time points, 6 months apart. Hand preference was assessed using the Quantitative Hand Preference (QHP) task and five uni-manual motor tasks. Language skills (expressive and receptive vocabulary, receptive grammar, and morphological awareness) were assessed with standardized measures. Results: We found QHP scores did not distinguish children with weaker language skills from those with stronger language skills and the correlation between QHP scores and language ability was negligible in this study. Hand preference on the QHP task was significantly stronger among right-handed than left-handed children and left-handed children were typically inconsistent in the hand used across different tasks.  Conclusions: The findings presented here fail to provide any support for the theory that weak cerebral lateralisation (as assessed here by the QHP task) places children at risk of language difficulties . Stage 1 report:  https://doi.org/10.12688/wellcomeopenres.15077.1.",2019-05-13 +29028887,Identification of cancer driver genes in focal genomic aberrations from whole-exome sequencing data.,"Summary:Whole-exome sequencing (WES) data have been used for identifying copy number aberrations in cancer cells. Nonetheless, the use of WES is still challenging for identification of focal aberrant regions in multiple samples that may contain cancer driver genes. In this study, we developed a wavelet-based method for identifying focal genomic aberrant regions in the WES data from cancer cells (WIFA-X). When we applied WIFA-X to glioblastoma multiforme and lung adenocarcinoma datasets, WIFA-X outperformed other approaches on identifying cancer driver genes. Availability and implementation:R source code is available at http://gcancer.org/wifax. Contact:hyunjulee@gist.ac.kr. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-02-01 +26678566,YAGM: a web tool for mining associated genes in yeast based on diverse biological associations.,"

Background

Investigating association between genes can be used in understanding the relations of genes in biological processes. STRING and GeneMANIA are two well-known web tools which can provide a list of associated genes of a query gene based on diverse biological associations such as co-expression, co-localization, co-citation and so on. However, the transcriptional regulation association and mutant phenotype association have not been used in these two web tools. Since the comprehensive transcription factor (TF)-gene binding data, TF-gene regulation data and mutant phenotype data are available in yeast, we developed a web tool called YAGM (Yeast Associated Genes Miner) which constructed the transcriptional regulation association, mutant phenotype association and five commonly used biological associations to mine a list of associated genes of a query yeast gene.

Description

In YAGM, we collected seven kinds of datasets including TF-gene binding (TFB) data, TF-gene regulation (TFR) data, mutant phenotype (MP) data, functional annotation (FA) data, physical interaction (PI) data, genetic interaction (GI) data, and literature evidence (LE) data. Then by using the hypergeometric test to calculate the association scores of all gene pairs in yeast, we constructed seven biological associations including two transcriptional regulation associations (TFB association and TFR association), MP association, FA association, PI association, GI association, and LE association. Moreover, the expression profile association from SPELL database was also included in YAGM. When using YAGM, users can input a query gene and choose any possible subsets of the eight biological associations, then a list of associated genes of the query gene will be returned based on the chosen biological associations.

Conclusions

In this study, we presented the YAGM which provides eight biological associations for mining associated genes of a query gene in yeast. Among the eight biological associations constructed in YAGM, three (TFB association, TFR association, and MP association) are novel ones. By comparing the query results of two well-known web tools (STRING and GeneMANIA), we found that YAGM can find out distinct associated genes of a query gene. That is, YAGM can provide alternative candidates of associated genes for biologists to do further experimental investigation. We believe that YAGM will be a useful web tool for yeast biologists. YAGM is available online at http://cosbi3.ee.ncku.edu.tw/yagm/.",2015-12-09 +27485441,NET-GE: a web-server for NETwork-based human gene enrichment.,"

Motivation

Gene enrichment is a requisite for the interpretation of biological complexity related to specific molecular pathways and biological processes. Furthermore, when interpreting NGS data and human variations, including those related to pathologies, gene enrichment allows the inclusion of other genes that in the human interactome space may also play important key roles in the emergency of the phenotype. Here, we describe NET-GE, a web server for associating biological processes and pathways to sets of human proteins involved in the same phenotype RESULTS: NET-GE is based on protein-protein interaction networks, following the notion that for a set of proteins, the context of their specific interactions can better define their function and the processes they can be related to in the biological complexity of the cell. Our method is suited to extract statistically validated enriched terms from Gene Ontology, KEGG and REACTOME annotation databases. Furthermore, NET-GE is effective even when the number of input proteins is small.

Availability and implementation

NET-GE web server is publicly available and accessible at http://net-ge.biocomp.unibo.it/enrich CONTACT: gigi@biocomp.unibo.itSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-02 +28575171,karyoploteR: an R/Bioconductor package to plot customizable genomes displaying arbitrary data.,"

Motivation

Data visualization is a crucial tool for data exploration, analysis and interpretation. For the visualization of genomic data there lacks a tool to create customizable non-circular plots of whole genomes from any species.

Results

We have developed karyoploteR, an R/Bioconductor package to create linear chromosomal representations of any genome with genomic annotations and experimental data plotted along them. Plot creation process is inspired in R base graphics, with a main function creating karyoplots with no data and multiple additional functions, including custom functions written by the end-user, adding data and other graphical elements. This approach allows the creation of highly customizable plots from arbitrary data with complete freedom on data positioning and representation.

Availability and implementation

karyoploteR is released under Artistic-2.0 License. Source code and documentation are freely available through Bioconductor (http://www.bioconductor.org/packages/karyoploteR) and at the examples and tutorial page at https://bernatgel.github.io/karyoploter_tutorial.

Contact

bgel@igtp.cat.",2017-10-01 +22070195,"PESCADOR, a web-based tool to assist text-mining of biointeractions extracted from PubMed queries.","

Background

Biological function is greatly dependent on the interactions of proteins with other proteins and genes. Abstracts from the biomedical literature stored in the NCBI's PubMed database can be used for the derivation of interactions between genes and proteins by identifying the co-occurrences of their terms. Often, the amount of interactions obtained through such an approach is large and may mix processes occurring in different contexts. Current tools do not allow studying these data with a focus on concepts of relevance to a user, for example, interactions related to a disease or to a biological mechanism such as protein aggregation.

Results

To help the concept-oriented exploration of such data we developed PESCADOR, a web tool that extracts a network of interactions from a set of PubMed abstracts given by a user, and allows filtering the interaction network according to user-defined concepts. We illustrate its use in exploring protein aggregation in neurodegenerative disease and in the expansion of pathways associated to colon cancer.

Conclusions

PESCADOR is a platform independent web resource available at: http://cbdm.mdc-berlin.de/tools/pescador/",2011-11-09 +30961834,Comparison of All-Cause Mortality Following VTE Treatment Between Propensity Score-Adjusted Observational Studies and Matched Randomized Controlled Trials: Meta-Epidemiologic Study.,"

Background

It is unknown whether propensity score-adjusted observational studies produce results comparable to those of randomized controlled trials (RCTs) that address similar VTE treatment issues.

Methods

The PubMed and Web of Science databases were systematically searched for propensity score-adjusted observational studies, RCTs, and meta-analyses of RCTs that estimated all-cause mortality following VTE treatment. After identifying distinct clinical treatment issues evaluated in the eligible observational studies, a standardized algorithm was used to identify and match at least one RCT or RCT meta-analysis publication for paired study design analyses. Meta-analyses were used to summarize groups of studies. Treatment efficacy statistics (relative ORs) were compared between the paired observational and RCT studies, and the summary relative ORs for all study design pairs were also calculated.

Results

The observational and RCT study pairs assessed seven clinical treatment issues. Overall, the observational study-RCT pairs did not exhibit significantly different mortality estimates (summary relative OR, 0.89; 95% CI, 0.32-1.46; I2 = 23%). However, two of the seven treatment issue study pairs (thrombolysis vs anticoagulation for pulmonary embolism; once- vs twice-daily enoxaparin for VTE) exhibited a significantly different treatment effect direction, and there was a substantial (nonsignificant) difference in the magnitude of the effect in another two of the study pairs (rivaroxaban vs vitamin K antagonists for VTE; home treatment vs hospitalization for DVT).

Conclusions

This systematic comparison across seven VTE treatment topics suggests that propensity score-adjusted observational studies and RCTs often exhibit similar all-cause mortality, although differences in the direction or the magnitude of estimated treatment effects may occasionally occur.

Trial registry

PROSPERO; CRD42018087819; URL: http://www.crd.york.ac.uk/PROSPERO.",2018-10-25 +30945216,Information-Theoretic Inference of an Optimal Dictionary of Protein Supersecondary Structures.,"We recently developed an unsupervised Bayesian inference methodology to automatically infer a dictionary of protein supersecondary structures (Subramanian et al., IEEE data compression conference proceedings (DCC), 340-349, 2017). Specifically, this methodology uses the information-theoretic framework of minimum message length (MML) criterion for hypothesis selection (Wallace, Statistical and inductive inference by minimum message length, Springer Science & Business Media, New York, 2005). The best dictionary of supersecondary structures is the one that yields the most (lossless) compression on the source collection of folding patterns represented as tableaux (matrix representations that capture the essence of protein folding patterns (Lesk, J Mol Graph. 13:159-164, 1995). This book chapter outlines our MML methodology for inferring the supersecondary structure dictionary. The inferred dictionary is available at http://lcb.infotech.monash.edu.au/proteinConcepts/scop100/dictionary.html .",2019-01-01 +29789168,"quanTLC, an online open-source solution for videodensitometric quantification.","The image is the key feature of planar chromatography. Videodensitometry by digital image conversion is the fastest way of its evaluation. Instead of scanning single sample tracks one after the other, only few clicks are needed to convert all tracks at one go. A minimalistic software was newly developed, termed quanTLC, that allowed the quantitative evaluation of samples in few minutes. quanTLC includes important assets such as open-source, online, free of charge, intuitive to use and tailored to planar chromatography, as none of the nine existent software for image evaluation covered these aspects altogether. quanTLC supports common image file formats for chromatogram upload. All necessary steps were included, i.e., videodensitogram extraction, preprocessing, automatic peak integration, calibration, statistical data analysis, reporting and data export. The default options for each step are suitable for most analyses while still being tunable, if needed. A one-minute video was recorded to serve as user manual. The software capabilities are shown on the example of a lipophilic dye mixture separation. The quantitative results were verified by comparison with those obtained by commercial videodensitometry software and opto-mechanical slit-scanning densitometry. The data can be exported at each step to be processed in further software, if required. The code was released open-source to be exploited even further. The software itself is online useable without installation and directly accessible at http://shinyapps.ernaehrung.uni-giessen.de/quanTLC.",2018-05-19 +24038354,"Manteia, a predictive data mining system for vertebrate genes and its applications to human genetic diseases.","The function of genes is often evolutionarily conserved, and comparing the annotation of ortholog genes in different model organisms has proved to be a powerful predictive tool to identify the function of human genes. Here, we describe Manteia, a resource available online at http://manteia.igbmc.fr. Manteia allows the comparison of embryological, expression, molecular and etiological data from human, mouse, chicken and zebrafish simultaneously to identify new functional and structural correlations and gene-disease associations. Manteia is particularly useful for the analysis of gene lists produced by high-throughput techniques such as microarrays or proteomics. Data can be easily analyzed statistically to characterize the function of groups of genes and to correlate the different aspects of their annotation. Sophisticated querying tools provide unlimited ways to merge the information contained in Manteia along with the possibility of introducing custom user-designed biological questions into the system. This allows for example to connect all the animal experimental results and annotations to the human genome, and take advantage of data not available for human to look for candidate genes responsible for genetic disorders. Here, we demonstrate the predictive and analytical power of the system to predict candidate genes responsible for human genetic diseases.",2013-09-12 +22135293,The Genomes OnLine Database (GOLD) v.4: status of genomic and metagenomic projects and their associated metadata.,"The Genomes OnLine Database (GOLD, http://www.genomesonline.org/) is a comprehensive resource for centralized monitoring of genome and metagenome projects worldwide. Both complete and ongoing projects, along with their associated metadata, can be accessed in GOLD through precomputed tables and a search page. As of September 2011, GOLD, now on version 4.0, contains information for 11,472 sequencing projects, of which 2907 have been completed and their sequence data has been deposited in a public repository. Out of these complete projects, 1918 are finished and 989 are permanent drafts. Moreover, GOLD contains information for 340 metagenome studies associated with 1927 metagenome samples. GOLD continues to expand, moving toward the goal of providing the most comprehensive repository of metadata information related to the projects and their organisms/environments in accordance with the Minimum Information about any (x) Sequence specification and beyond.",2011-12-01 +23585031,HORDE: comprehensive resource for olfactory receptor genomics.,"Olfactory receptors (ORs) constitute the largest gene family in the mammalian genome. The existence of these proteins underlies the nature of, and variability in, odorant perception. The Human Olfactory Receptor Data Explorer (HORDE, http://genome.weizmann.ac.il/horde/ ) is a free online resource, which presents a complete compendium of all OR genes and pseudogenes in the genome of human and four other vertebrates. HORDE includes three parts: (1) an automated pipeline, which mines OR gene and pseudogene sequences out of complete genomes, and generates gene symbols based on sequence similarity; (2) a card generator that obtains and displays annotative information on individual ORs retrieved from external databases and relevant studies; and (3) a search engine that allows user retrieval of OR information. For human ORs, HORDE specifically addresses the universe of interindividual variation, as obtained from several sources, including whole genome sequences made possible by next-generation sequencing. This encompasses single nucleotide polymorphisms (SNP) and copy number variation (CNV), including deleterious mutational events. HORDE also hosts a number of tools designed specifically to assist in the study of OR evolution and function. In this chapter, we describe the status of HORDE (build #43). We also discuss plans for future enhancements and a road map for HORDE to become a better community-based bioinformatics tool. We highlight HORDE's role as a major research tool in the study of an expanding cohort of OR repertoires.",2013-01-01 +30458929,Intellectual Property in the Field of Regenerative Medicine in Japan.,"

Purpose

Although most pharmaceutical companies and bio-ventures have not yet created a practical business model or a credible exit thus far, most are expecting potential new business to be derived from regenerative medicine. This article discusses and proposes some ideas regarding a patent strategy for regenerative medicine.

Methods

PubMed literature searches were conducted to identify recent reports relevant to regenerative medicine. Information regarding patents and patent applications were obtained from the database service of the Japan Platform for Patent Information on the website of the National Center for Industrial Property Information and Training at https://www.j-platpat.inpit.go.jp/web/all/top/BTmTopEnglishPage and the Japan Patent Office at https://www.jpo.go.jp/index.htm.

Findings

The infrastructure of regenerative medicine in Japan is still maturing. Patent protection is important in regenerative medicine considering its special characteristics.

Implications

Based on the understanding of the special characteristics of regenerative medicine, this article discusses and proposes some ideas regarding a patent strategy for the field, which is different from that of a typical patent strategy as in the case of a small-molecule drug.",2018-10-24 +26784691,iMet-Q: A User-Friendly Tool for Label-Free Metabolomics Quantitation Using Dynamic Peak-Width Determination.,"Efficient and accurate quantitation of metabolites from LC-MS data has become an important topic. Here we present an automated tool, called iMet-Q (intelligent Metabolomic Quantitation), for label-free metabolomics quantitation from high-throughput MS1 data. By performing peak detection and peak alignment, iMet-Q provides a summary of quantitation results and reports ion abundance at both replicate level and sample level. Furthermore, it gives the charge states and isotope ratios of detected metabolite peaks to facilitate metabolite identification. An in-house standard mixture and a public Arabidopsis metabolome data set were analyzed by iMet-Q. Three public quantitation tools, including XCMS, MetAlign, and MZmine 2, were used for performance comparison. From the mixture data set, seven standard metabolites were detected by the four quantitation tools, for which iMet-Q had a smaller quantitation error of 12% in both profile and centroid data sets. Our tool also correctly determined the charge states of seven standard metabolites. By searching the mass values for those standard metabolites against Human Metabolome Database, we obtained a total of 183 metabolite candidates. With the isotope ratios calculated by iMet-Q, 49% (89 out of 183) metabolite candidates were filtered out. From the public Arabidopsis data set reported with two internal standards and 167 elucidated metabolites, iMet-Q detected all of the peaks corresponding to the internal standards and 167 metabolites. Meanwhile, our tool had small abundance variation (≤ 0.19) when quantifying the two internal standards and had higher abundance correlation (≥ 0.92) when quantifying the 167 metabolites. iMet-Q provides user-friendly interfaces and is publicly available for download at http://ms.iis.sinica.edu.tw/comics/Software_iMet-Q.html.",2016-01-19 +28358052,"HIVed, a knowledgebase for differentially expressed human genes and proteins during HIV infection, replication and latency.","Measuring the altered gene expression level and identifying differentially expressed genes/proteins during HIV infection, replication and latency is fundamental for broadening our understanding of the mechanisms of HIV infection and T-cell dysfunction. Such studies are crucial for developing effective strategies for virus eradication from the body. Inspired by the availability and enrichment of gene expression data during HIV infection, replication and latency, in this study, we proposed a novel compendium termed HIVed (HIV expression database; http://hivlatency.erc.monash.edu/) that harbours comprehensive functional annotations of proteins, whose genes have been shown to be dysregulated during HIV infection, replication and latency using different experimental designs and measurements. We manually curated a variety of third-party databases for structural and functional annotations of the protein entries in HIVed. With the goal of benefiting HIV related research, we collected a number of biological annotations for all the entries in HIVed besides their expression profile, including basic protein information, Gene Ontology terms, secondary structure, HIV-1 interaction and pathway information. We hope this comprehensive protein-centric knowledgebase can bridge the gap between the understanding of differentially expressed genes and the functions of their protein products, facilitating the generation of novel hypotheses and treatment strategies to fight against the HIV pandemic.",2017-03-30 +30364576,RNA sequencing dataset describing transcriptional changes in cervical dorsal root ganglia after bilateral pyramidotomy and forelimb intramuscular gene therapy with an adeno-associated viral vector encoding human neurotrophin-3.,"Unilateral or bilateral corticospinal tract injury in the medullary pyramids in adult rats causes anatomical and physiological changes in proprioceptive neurons projecting to the cervical spinal cord accompanied by hyperreflexia and abnormal behavioural movements including spasms. In a previous publication, we showed that ""Intramuscular Neurotrophin-3 normalizes low threshold spinal reflexes, reduces spasms and improves mobility after bilateral corticospinal tract injury in rats"" (Kathe et al., 2016) [1]. We hypothesize that neurotrophin-3 induces these changes by modifying gene expression in affected cervical dorsal root ganglia (DRG). Therefore in this data article, we analyzed the transcriptomes of cervical DRGs obtained during that previous study from naïve rats and from rats after bilateral pyramidotomy (bPYX) with unilateral intramuscular injections of either AAV1-CMV-NT3 or AAV1-CMV-EGFP applied 24 h after injury (Kathe et al., 2016) [1]. A bioinformatic analysis enabled us to identify genes that are likely to be expressed in TrkC+ neurons after injury and which were regulated by neurotrophin-3 in the direction expected from other datasets involving knockout or overexpression of neurotrophin-3. This dataset will help us and others identify genes in sensory neurons whose expression levels are regulated by neurotrophin-3 treatment. This may help identify novel therapeutic targets to improve sensation and movement after neurological injury. Data has been deposited in the Gene Expression Omnibus (GSE82197), http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?token=avgpicgcjhknzyv&acc=GSE82197.",2018-10-03 +31206169,Plasma kallikrein-kinin system contributes to peripheral inflammation in temporal lobe epilepsy.,"Temporal lobe epilepsy (TLE) is a chronic disease, characterized by severe and refractory seizures, triggered in the hippocampus and/or amygdala, disrupting the blood-brain barrier. This disruption can sustain, or aggravate, the epileptic condition. The aim of this study was to evaluate the activation of the kallikrein-kinin system in patients with TLE, as it relates to the maintenance of blood-brain barrier. Human hippocampal sclerotic tissues removed after surgery for seizure control, plasma, and serum were used in the following assays: immunostaining for white blood cells in the TLE hippocampus, C-reactive protein in serum, quantification of plasma kallikrein (PKal) and cathepsin B (CatB) activity in serum and plasma, quantification of C1-inhibitor, analysis of high-molecular-weight kininogen (H-kininogen) fragments, and activation of plasma prekallikrein for comparison with healthy controls. Infiltration of white blood cells in the sclerotic hippocampus and a significant increase in the neutrophil/lymphocyte ratio in the blood of TLE patients were observed. High levels of C-reactive protein (TLE = 1.4 ± 0.3 µg/mL), PKal (TLE = 5.4 ± 0.4 U/mL), and CatB (TLE = 4.9 ± 0.4 U/mL) were also evident in the serum of TLE patients comparing to controls. A strong linear correlation was observed between active CatB and PKal in the serum of TLE patients (r = 0.88). High levels of cleaved H-kininogen and free PKal, and low levels of C1-inhibitor (TLE = 188 ± 12 µg/mL) were observed in the serum of TLE patients. Our data demonstrated that the plasma kallikrein-kinin system is activated in patients with TLE. OPEN SCIENCE BADGES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/.",2019-07-10 +30403855,ezCADD: A Rapid 2D/3D Visualization-Enabled Web Modeling Environment for Democratizing Computer-Aided Drug Design.,"As abundant and user-friendly as computer-aided drug design (CADD) software may seem, there is still a large underserved population of biomedical researchers around the world, particularly those with no computational training and limited research funding. To address this important need and help scientists overcome barriers that impede them from leveraging CADD in their drug discovery work, we have developed ezCADD, a web-based CADD modeling environment that manifests four simple design concepts: easy, quick, user-friendly, and 2D/3D visualization-enabled. In this paper, we describe the features of three fundamental applications that have been implemented in ezCADD: small-molecule docking, protein-protein docking, and binding pocket detection, and their applications in drug design against a pathogenic microbial enzyme as an example. To assess user experience and the effectiveness of our implementation, we introduced ezCADD to first-year pharmacy students as an active learning exercise in the Principles of Drug Action course. The web service robustly handled 95 simultaneous molecular docking jobs. Our survey data showed that among the 95 participating students, 97% completed the molecular docking experiment on their own at least partially without extensive training; 88% considered ezCADD easy and user-friendly; 99-100% agreed that ezCADD enhanced the understanding of drug-receptor structures and recognition; and the student experience in molecular modeling and visualization was significantly improved from zero to a higher level. The student feedback represents the baseline data of user experience from noncomputational researchers. It is demonstrated that in addition to supporting drug discovery research, ezCADD is also an effective tool for promoting science, technology, engineering, and mathematics (STEM) education. More advanced CADD applications are being developed and added to ezCADD, available at http://dxulab.org/software .",2018-11-16 +29124092,"Dataset of the livability performance of the city of Birmingham, UK, as measured by its citizen wellbeing, resource security, resource efficiency and carbon emissions.","This data article presents the UK City LIFE1 data set for the city of Birmingham, UK. UK City LIFE1 is a new, comprehensive and holistic method for measuring the livable sustainability performance of UK cities. The Birmingham data set comprises 346 indicators structured simultaneously (1) within a four-tier, outcome-based framework in order to aid in their interpretation (e.g., promote healthy living and healthy long lives, minimize energy use, uncouple economic vitality from CO2 emissions) and (2) thematically in order to complement government and disciplinary siloes (e.g., health, energy, economy, climate change). Birmingham data for the indicators are presented within an Excel spreadsheet with their type, units, geographic area, year, source, link to secondary data files, data collection method, data availability and any relevant calculations and notes. This paper provides a detailed description of UK city LIFE1 in order to enable comparable data sets to be produced for other UK cities. The Birmingham data set is made publically available at http://epapers.bham.ac.uk/3040/ to facilitate this and to enable further analyses. The UK City LIFE1 Birmingham data set has been used to understand what is known and what is not known about the livable sustainability performance of the city and to inform how Birmingham City Council can take action now to improve its understanding and its performance into the future (see ""Improving city-scale measures of livable sustainability: A study of urban measurement and assessment through application to the city of Birmingham, UK"" Leach et al. [2]).",2017-10-13 +29020642,cBiT: A transcriptomics database for innovative biomaterial engineering.,"Creating biomaterials that are suited for clinical application is still hampered by a lack of understanding of the interaction between a cell and the biomaterial surface it grows on. This surface communication can strongly impact cellular behavior, which in turn affects the chances of a successful interaction between a material and the host tissue. Transcriptomics data have previously been linked to measurements of biomaterial properties in order to explain the biological mechanisms underlying these cell-biomaterial interactions. However, such multi-assay data are highly complex and therefore require careful and unambiguous characterization and storage. Failure to do so may result in loss of valuable data or erroneous data analysis. In order to start a new initiative that tackles these issues and offers a platform for innovative biomaterial development, we have created a publically accessible repository called The Compendium for Biomaterial Transcriptomics (cBiT, https://cbit.maastrichtuniversity.nl). cBiT is a data warehouse that gives users the opportunity to search through biomaterial-based transcriptomics data sets using a web interface. Data of interest can be selected and downloaded, together with associated measurements of material properties. Researchers are also invited to add their data to cBiT in order to further enhance its scientific value. We aim to make cBiT the hub for biomaterial-associated data, thereby enabling major contributions to a more efficient development of new materials with improved body integration. Here, we describe the structure of cBiT and provide a use case with clinically applied materials to demonstrate how cBiT can be used to correlate data across transcriptomics studies.",2017-10-03 +30368849,MDockPeP: An ab-initio protein-peptide docking server.,"Protein-peptide interactions play a crucial role in a variety of cellular processes. The protein-peptide complex structure is a key to understand the mechanisms underlying protein-peptide interactions and is critical for peptide therapeutic development. We present a user-friendly protein-peptide docking server, MDockPeP. Starting from a peptide sequence and a protein receptor structure, the MDockPeP Server globally docks the all-atom, flexible peptide to the protein receptor. The produced modes are then evaluated with a statistical potential-based scoring function, ITScorePeP. This method was systematically validated using the peptiDB benchmarking database. At least one near-native peptide binding mode was ranked among top 10 (or top 500) in 59% (85%) of the bound cases, and in 40.6% (71.9%) of the challenging unbound cases. The server can be used for both protein-peptide complex structure prediction and initial-stage sampling of the protein-peptide binding modes for other docking or simulation methods. MDockPeP Server is freely available at http://zougrouptoolkit.missouri.edu/mdockpep. © 2018 Wiley Periodicals, Inc.",2018-10-23 +29900215,"Data on three-year pesticide monitoring in ditches of the apple orchard region of Altes Land, Germany.","The data presented in this article are related to the research article 'Chemical and biological monitoring of the load of plant protection products and of zoocoenoses in ditches of the orchard region Altes Land' (Süß et al., 2006) [1], which is only available in the German language. The pesticide data presented here were acquired from four ditches (three ditches were located in apple orchards, and one ditch was located in a grassland region) between 2001 and 2003 (Lorenz et al., 2018) [2]. Two different monitoring strategies were applied: event-driven sampling after pesticide applications and weekly integrated sampling using automatic water samplers. A total of 70 active substances were monitored while farmers applied 25 active substances. This article describes the study sites and the analytical methods used to quantify the pesticides in the water samples. The field data set is publicly available at the OpenAgrar repository under https://doi.org/10.5073/20180213-144359 (Lorenz et al., 2018) [2].",2018-03-21 +30911975,Recommendations on the Use of Mobile Applications for the Collection and Communication of Pharmaceutical Product Safety Information: Lessons from IMI WEB-RADR.,"Over a period of 3 years, the European Union's Innovative Medicines Initiative WEB-RADR (Recognising Adverse Drug Reactions; https://web-radr.eu/ ) project explored the value of two digital tools for pharmacovigilance (PV): mobile applications (apps) for reporting the adverse effects of drugs and social media data for its contribution to safety signalling. The ultimate intent of WEB-RADR was to provide policy, technical and ethical recommendations on how to develop and implement such digital tools to enhance patient safety. Recommendations relating to the use of mobile apps for PV are summarised in this paper. There is a presumption amongst at least some patients and healthcare professionals that information ought to be accessed and reported from any setting, including mobile apps. WEB-RADR has focused on the use of such technology for reporting suspected adverse drug reactions and for broadcasting safety information to its users, i.e. two-way risk communication. Three apps were developed and publicly launched within Europe as part of the WEB-RADR project and subsequently assessed by a range of stakeholders to determine their value as effective tools for improving patient safety; a fourth generic app was later piloted in two African countries. The recommendations from the development and evaluation of the European apps are presented here with supporting considerations, rationales and caveats as well as suggested areas for further research.",2019-04-01 +30192921,Prediction of protein group function by iterative classification on functional relevance network.,"

Motivation

Biological experiments including proteomics and transcriptomics approaches often reveal sets of proteins that are most likely to be involved in a disease/disorder. To understand the functional nature of a set of proteins, it is important to capture the function of the proteins as a group, even in cases where function of individual proteins is not known. In this work, we propose a model that takes groups of proteins found to work together in a certain biological context, integrates them into functional relevance networks, and subsequently employs an iterative inference on graphical models to identify group functions of the proteins, which are then extended to predict function of individual proteins.

Results

The proposed algorithm, iterative group function prediction (iGFP), depicts proteins as a graph that represents functional relevance of proteins considering their known functional, proteomics and transcriptional features. Proteins in the graph will be clustered into groups by their mutual functional relevance, which is iteratively updated using a probabilistic graphical model, the conditional random field. iGFP showed robust accuracy even when substantial amount of GO annotations were missing. The perspective of 'group' function annotation opens up novel approaches for understanding functional nature of proteins in biological systems.Availability and implementation: http://kiharalab.org/iGFP/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +31149770,Do Women Know Their Prepregnancy Weight?,"

Objective

Prepregnancy weight may not always be known to women. A model was developed to estimate prepregnancy weight from measured pregnancy weight.

Methods

The model was developed and validated using participants from two studies (Project Viva, n = 301, model development; and Fit for Delivery [FFD], n = 401, model validation). Data from the third study (Programming Research in Obesity, Growth, Environment and Social Stressors [PROGRESS]), which included women from Mexico City, were used to demonstrate the utility of the newly developed model to objectively quantify prepregnancy weight.

Results

The model developed from the Project Viva study validated well with low bias (R2  = 0.95; y = 1.02x - 0.69; bias = 0.68 kg; 95% CI: -4.86 to 6.21). Predictions in women from FFD demonstrated good agreement (R2  = 0.96; y = 0.96x + 4.35; bias = 1.60 kg; 95% CI: -4.40 to 7.54; error range = -11.25 kg to 14.73 kg). High deviations from model predictions were observed in the Programming Research in PROGRESS (R2  = 0.81; y = 0.89x + 9.61; bias = 2.83 kg; 95% CI: -7.70 to 12.31; error range = -39.17 kg to 25.73 kg). The model was programmed into software (https://www.pbrc.edu/research-and-faculty/calculators/prepregnancy/).

Conclusions

The developed model provides an alternative to determine prepregnancy weight in populations receiving routine health care that may not have accurate knowledge of prepregnancy weight. The software can identify misreporting and classification into incorrect gestational weight gain categories.",2019-05-31 +23794737,Analysis of disease-associated objects at the Rat Genome Database.,"The Rat Genome Database (RGD) is the premier resource for genetic, genomic and phenotype data for the laboratory rat, Rattus norvegicus. In addition to organizing biological data from rats, the RGD team focuses on manual curation of gene-disease associations for rat, human and mouse. In this work, we have analyzed disease-associated strains, quantitative trait loci (QTL) and genes from rats. These disease objects form the basis for seven disease portals. Among disease portals, the cardiovascular disease and obesity/metabolic syndrome portals have the highest number of rat strains and QTL. These two portals share 398 rat QTL, and these shared QTL are highly concentrated on rat chromosomes 1 and 2. For disease-associated genes, we performed gene ontology (GO) enrichment analysis across portals using RatMine enrichment widgets. Fifteen GO terms, five from each GO aspect, were selected to profile enrichment patterns of each portal. Of the selected biological process (BP) terms, 'regulation of programmed cell death' was the top enriched term across all disease portals except in the obesity/metabolic syndrome portal where 'lipid metabolic process' was the most enriched term. 'Cytosol' and 'nucleus' were common cellular component (CC) annotations for disease genes, but only the cancer portal genes were highly enriched with 'nucleus' annotations. Similar enrichment patterns were observed in a parallel analysis using the DAVID functional annotation tool. The relationship between the preselected 15 GO terms and disease terms was examined reciprocally by retrieving rat genes annotated with these preselected terms. The individual GO term-annotated gene list showed enrichment in physiologically related diseases. For example, the 'regulation of blood pressure' genes were enriched with cardiovascular disease annotations, and the 'lipid metabolic process' genes with obesity annotations. Furthermore, we were able to enhance enrichment of neurological diseases by combining 'G-protein coupled receptor binding' annotated genes with 'protein kinase binding' annotated genes. Database URL: http://rgd.mcw.edu",2013-06-21 +28025337,"On-the-fly selection of cell-specific enhancers, genes, miRNAs and proteins across the human body using SlideBase. ","Genomics consortia have produced large datasets profiling the expression of genes, micro-RNAs, enhancers and more across human tissues or cells. There is a need for intuitive tools to select subsets of such data that is the most relevant for specific studies. To this end, we present SlideBase, a web tool which offers a new way of selecting genes, promoters, enhancers and microRNAs that are preferentially expressed/used in a specified set of cells/tissues, based on the use of interactive sliders. With the help of sliders, SlideBase enables users to define custom expression thresholds for individual cell types/tissues, producing sets of genes, enhancers etc. which satisfy these constraints. Changes in slider settings result in simultaneous changes in the selected sets, updated in real time. SlideBase is linked to major databases from genomics consortia, including FANTOM, GTEx, The Human Protein Atlas and BioGPS.Database URL: http://slidebase.binf.ku.dk.",2016-12-26 +30392397,Software Tools to Facilitate Systematic Review Used for Cancer Hazard Identification.,"Objective and systematic methods to search, review, and synthesize published studies are a fundamental aspect of carcinogen hazard classification. Systematic review is a historical strength of the International Agency for Research on Cancer (IARC) Monographs Program and the United States National Toxicology Program (NTP) Office of the Report on Carcinogens (RoC). Both organizations are tasked with evaluating peer-reviewed, published evidence to determine whether specific substances, exposure scenarios, or mixtures pose a cancer hazard to humans. This evidence synthesis is based on objective, transparent, published methods that call for extracting and interpreting data in a systematic manner from multiple domains, including a) human exposure, b) epidemiological evidence, c) evidence from experimental animals, and d) mechanistic evidence. The process involves multiple collaborators and requires an extensive literature search, review, and synthesis of the evidence. Several online tools have been implemented to facilitate these collaborative systematic review processes. Specifically, Health Assessment Workplace Collaborative (HAWC) and Table Builder are custom solutions designed to record and share the results of the systematic literature search, data extraction, and analyses. In addition, a content management system for web-based project management and document submission has been adopted to enable access to submitted drafts simultaneously by multiple co-authors and to facilitate their peer review and revision. These advancements in cancer hazard classification have applicability in multiple systematic review efforts. https://doi.org/10.1289/EHP4224.",2018-10-01 +28195585,The French Muséum national d'histoire naturelle vascular plant herbarium collection dataset.,"We provide a quantitative description of the French national herbarium vascular plants collection dataset. Held at the Muséum national d'histoire naturelle, Paris, it currently comprises records for 5,400,000 specimens, representing 90% of the estimated total of specimens. Ninety nine percent of the specimen entries are linked to one or more images and 16% have field-collecting information available. This major botanical collection represents the results of over three centuries of exploration and study. The sources of the collection are global, with a strong representation for France, including overseas territories, and former French colonies. The compilation of this dataset was made possible through numerous national and international projects, the most important of which was linked to the renovation of the herbarium building. The vascular plant collection is actively expanding today, hence the continuous growth exhibited by the dataset, which can be fully accessed through the GBIF portal or the MNHN database portal (available at: https://science.mnhn.fr/institution/mnhn/collection/p/item/search/form). This dataset is a major source of data for systematics, global plants macroecological studies or conservation assessments.",2017-02-14 +30828598,Data supporting polymerization of anti-fouling polymer brushes polymerized on the pore walls of porous aluminium and titanium oxides.,"The data presented in this article affords insight into the fabrication and ensuing microstructure of the supported porous anodic aluminum oxide (AAO) and TiO2-nanotubes (NT) films that are used for the subsequent grafting of antifouling poly(oligo ethyleneglycol) methylether methacrylate (POEGMA) and poly acrylamide (PAAm) brushes. The experimental procedure for the grafting of POEGMA and PAAm via atom transfer radical polymerization (ATRP) is described in Wassel et al. (2019) https://doi.org/10.1016/j.matdes.2018.107542 [1]. The FTIR spectra of the porous oxides before and after attachment of (3-Aminopropyl)trimethoxysilane (APTMS) are presented. Microscopic images of thick POEGMA films and PAAm on AAO are displayed, and an FTIR spectrum of AAO/PAAm is shown. An EDX mapping of carbon is shown on an AAO/POEGMA sample. The adsorption behavior of Fluorescein isothiocyanate (FITC) marked bovine serum albumin (BSA) on patterned porous TiO2-NT films is documented. Finally microscopic images are presented to compare the scratch resistance behavior of pristine porous films with those functionalized with POEGMA.",2019-02-02 +30357519,Challenging the heterogeneity of disease presentation in malignant melanoma-impact on patient treatment.,"There is an increasing global interest to support research areas that can assist in understanding disease and improving patient care. The National Cancer Institute (NIH) has identified precision medicine-based approaches as key research strategies to expedite advances in cancer research. The Cancer Moonshot program ( https://www.cancer.gov/research/key-initiatives/moonshot-cancer-initiative ) is the largest cancer program of all time, and has been launched to accelerate cancer research that aims to increase the availability of therapies to more patients and, ultimately, to eradicate cancer. Mass spectrometry-based proteomics has been extensively used to study the molecular mechanisms of cancer, to define molecular subtypes of tumors, to map cancer-associated protein interaction networks and post-translational modifications, and to aid in the development of new therapeutics and new diagnostic and prognostic tests. To establish the basis for our melanoma studies, we have established the Southern Sweden Malignant Melanoma Biobank. Tissues collected over many years have been accurately characterized with respect to the tumor and patient information. The extreme variability displayed in the protein profiles and the detection of missense mutations has confirmed the complexity and heterogeneity of the disease. It is envisaged that the combined analysis of clinical, histological, and proteomic data will provide patients with a more personalized medical treatment. With respect to disease presentation, targeted treatment and medical mass spectrometry analysis and imaging, this overview report will outline and summarize the current achievements and status within malignant melanoma. We present data generated by our cancer research center in Lund, Sweden, where we have built extensive capabilities in biobanking, proteogenomics, and patient treatments over an extensive time period.",2018-10-24 +22649282,Florabank1: a grid-based database on vascular plant distribution in the northern part of Belgium (Flanders and the Brussels Capital region).,"Florabank1 is a database that contains distributional data on the wild flora (indigenous species, archeophytes and naturalised aliens) of Flanders and the Brussels Capital Region. It holds about 3 million records of vascular plants, dating from 1800 till present. Furthermore, it includes ecological data on vascular plant species, redlist category information, Ellenberg values, legal status, global distribution, seed bank etc. The database is an initiative of ""Flo.Wer"" (www.plantenwerkgroep.be), the Research Institute for Nature and Forest (INBO: www.inbo.be) and the National Botanic Garden of Belgium (www.br.fgov.be). Florabank aims at centralizing botanical distribution data gathered by both professional and amateur botanists and to make these data available to the benefit of nature conservation, policy and scientific research.The occurrence data contained in Florabank1 are extracted from checklists, literature and herbarium specimen information. Of survey lists, the locality name (verbatimLocality), species name, observation date and IFBL square code, the grid system used for plant mapping in Belgium (Van Rompaey 1943), is recorded. For records dating from the period 1972-2004 all pertinent botanical journals dealing with Belgian flora were systematically screened. Analysis of herbarium specimens in the collection of the National Botanic Garden of Belgium, the University of Ghent and the University of Liège provided interesting distribution knowledge concerning rare species, this information is also included in Florabank1. The data recorded before 1972 is available through the Belgian GBIF node (http://data.gbif.org/datasets/resource/10969/), not through FLORABANK1, to avoid duplication of information. A dedicated portal providing access to all published Belgian IFBL records at this moment is available at: http://projects.biodiversity.be/ifblAll data in Florabank1 is georeferenced. Every record holds the decimal centroid coordinates of the IFBL square containing the observation. The uncertainty radius is the smallest circle possible covering the whole IFBL square, which can measure 1 Km² or 4 Km². Florabank is a work in progress and new occurrences are added as they become available; the dataset will be updated through GBIF on a regularly base.",2012-05-16 +30349118,An atlas of genetic associations in UK Biobank.,"Genome-wide association studies (GWAS) have identified many loci contributing to variation in complex traits, yet the majority of loci that contribute to the heritability of complex traits remain elusive. Large study populations with sufficient statistical power are required to detect the small effect sizes of the yet unidentified genetic variants. However, the analysis of huge cohorts, like UK Biobank, is challenging. Here, we present an atlas of genetic associations for 118 non-binary and 660 binary traits of 452,264 UK Biobank participants of European ancestry. Results are compiled in a publicly accessible database that allows querying genome-wide association results for 9,113,133 genetic variants, as well as downloading GWAS summary statistics for over 30 million imputed genetic variants (>23 billion phenotype-genotype pairs). Our atlas of associations (GeneATLAS, http://geneatlas.roslin.ed.ac.uk ) will help researchers to query UK Biobank results in an easy and uniform way without the need to incur high computational costs.",2018-10-22 +29124090,Practice variation amongst preventive child healthcare professionals in the prevention of child maltreatment in the Netherlands: Qualitative and quantitative data.,"This article provides both qualitative and quantitative data on practice variation amongst preventive child healthcare professionals in the prevention of child maltreatment in the Netherlands. Qualitative data consist of topics identified during interviews with 11 experts (with quotes), resulting in an online survey. The quantitative data are survey responses from 1104 doctors and nurses working in 29 preventive child healthcare organizations. Additionally, the interview topic list, the qualitative data analysis methodology, the survey (in English and Dutch) and anonymized raw survey data (http://hdl.handle.net/10411/5LJOGH) are provided as well. This data-in-brief article accompanies the paper ""Variation in prevention of child maltreatment by Dutch child healthcare professionals"" by Simeon Visscher and Henk van Stel [1].",2017-10-02 +29400473,phpMs: A PHP-Based Mass Spectrometry Utilities Library.,"The recent establishment of cloud computing, high-throughput networking, and more versatile web standards and browsers has led to a renewed interest in web-based applications. While traditionally big data has been the domain of optimized desktop and server applications, it is now possible to store vast amounts of data and perform the necessary calculations offsite in cloud storage and computing providers, with the results visualized in a high-quality cross-platform interface via a web browser. There are number of emerging platforms for cloud-based mass spectrometry data analysis; however, there is limited pre-existing code accessible to web developers, especially for those that are constrained to a shared hosting environment where Java and C applications are often forbidden from use by the hosting provider. To remedy this, we provide an open-source mass spectrometry library for one of the most commonly used web development languages, PHP. Our new library, phpMs, provides objects for storing and manipulating spectra and identification data as well as utilities for file reading, file writing, calculations, peptide fragmentation, and protein digestion as well as a software interface for controlling search engines. We provide a working demonstration of some of the capabilities at http://pgb.liv.ac.uk/phpMs .",2018-02-13 +30239781,Identifying core biological processes distinguishing human eye tissues with precise systems-level gene expression analyses and weighted correlation networks.,"The human eye is built from several specialized tissues which direct, capture and pre-process information to provide vision. The gene expression of the different eye tissues has been extensively profiled with RNA-seq across numerous studies. Large consortium projects have also used RNA-seq to study gene expression patterning across many different human tissues, minus the eye. There has not been an integrated study of expression patterns from multiple eye tissues compared with other human body tissues. We have collated all publicly available healthy human eye RNA-seq datasets as well as dozens of other tissues. We use this fully integrated dataset to probe the biological processes and pan expression relationships between the cornea, retina, retinal pigment epithelium (RPE)-choroid complex, and the rest of the human tissues with differential expression, clustering and gene ontology term enrichment tools. We also leverage our large collection of retina and RPE-choroid tissues to build the first human weighted gene correlation networks and use them to highlight known biological pathways and eye gene disease enrichment. We also have integrated publicly available single-cell RNA-seq data from mouse retina into our framework for validation and discovery. Finally, we make all these data, analyses and visualizations available via a powerful interactive web application (https://eyeintegration.nei.nih.gov/).",2018-10-01 +28731045,Renal cell tumors with clear cell histology and intact VHL and chromosome 3p: a histological review of tumors from the Cancer Genome Atlas database.,"Clear cell renal cell carcinoma is by far the most common form of kidney cancer; however, a number of histologically similar tumors are now recognized and considered distinct entities. The Cancer Genome Atlas published data set was queried (http://cbioportal.org) for clear cell renal cell carcinoma tumors lacking VHL gene mutation and chromosome 3p loss, for which whole-slide images were reviewed. Of the 418 tumors in the published Cancer Genome Atlas clear cell renal cell carcinoma database, 387 had VHL mutation, copy number loss for chromosome 3p, or both (93%). Of the remaining, 27/31 had whole-slide images for review. One had 3p loss based on karyotype but not sequencing, and three demonstrated VHL promoter hypermethylation. Nine could be reclassified as distinct or emerging entities: translocation renal cell carcinoma (n=3), TCEB1 mutant renal cell carcinoma (n=3), papillary renal cell carcinoma (n=2), and clear cell papillary renal cell carcinoma (n=1). Of the remaining, 6 had other clear cell renal cell carcinoma-associated gene alterations (PBRM1, SMARCA4, BAP1, SETD2), leaving 11 specimens, including 2 high-grade or sarcomatoid renal cell carcinomas and 2 with prominent fibromuscular stroma (not TCEB1 mutant). One of the remaining tumors exhibited gain of chromosome 7 but lacked histological features of papillary renal cell carcinoma. Two tumors previously reported to harbor TFE3 gene fusions also exhibited VHL mutation, chromosome 3p loss, and morphology indistinguishable from clear cell renal cell carcinoma, the significance of which is uncertain. In summary, almost all clear cell renal cell carcinomas harbor VHL mutation, 3p copy number loss, or both. Of tumors with clear cell histology that lack these alterations, a subset can now be reclassified as other entities. Further study will determine whether additional entities exist, based on distinct genetic pathways that may have implications for treatment.",2017-07-21 +21913285,DRUMS: a human disease related unique gene mutation search engine.,"With the completion of the human genome project and the development of new methods for gene variant detection, the integration of mutation data and its phenotypic consequences has become more important than ever. Among all available resources, locus-specific databases (LSDBs) curate one or more specific genes' mutation data along with high-quality phenotypes. Although some genotype-phenotype data from LSDB have been integrated into central databases little effort has been made to integrate all these data by a search engine approach. In this work, we have developed disease related unique gene mutation search engine (DRUMS), a search engine for human disease related unique gene mutation as a convenient tool for biologists or physicians to retrieve gene variant and related phenotype information. Gene variant and phenotype information were stored in a gene-centred relational database. Moreover, the relationships between mutations and diseases were indexed by the uniform resource identifier from LSDB, or another central database. By querying DRUMS, users can access the most popular mutation databases under one interface. DRUMS could be treated as a domain specific search engine. By using web crawling, indexing, and searching technologies, it provides a competitively efficient interface for searching and retrieving mutation data and their relationships to diseases. The present system is freely accessible at http://www.scbit.org/glif/new/drums/index.html.",2011-10-01 +28533016,DRodVir: A resource for exploring the virome diversity in rodents.,"Emerging zoonotic diseases have received tremendous interests in recent years, as they pose a significant threat to human health, animal welfare, and economic stability. A high proportion of zoonoses originate from wildlife reservoirs. Rodents are the most numerous, widespread, and diverse group of mammals on the earth and are reservoirs for many zoonotic viruses responsible for significant morbidity and mortality. A better understanding of virome diversity in rodents would be of importance for researchers and professionals in the field. Therefore, we developed the DRodVir database (http://www.mgc.ac.cn/DRodVir/), a comprehensive, up-to-date, and well-curated repository of rodent-associated animal viruses. The database currently covers 7690 sequences from 5491 rodent-associated mammal viruses of 26 viral families detected from 194 rodent species in 93 countries worldwide. In addition to virus sequences, the database provides detailed information on related samples and host rodents, as well as a set of online analytical tools for text query, BLAST search and phylogenetic reconstruction. The DRodVir database will help virologists better understand the virome diversity of rodents. Moreover, it will be a valuable tool for epidemiologists and zoologists for easy monitoring and tracking of the current and future zoonotic diseases. As a data application example, we further compared the current status of rodent-associated viruses with bat-associated viruses to highlight the necessity for including additional host species and geographic regions in future investigations, which will help us achieve a better understanding of the virome diversities in the two major reservoirs of emerging zoonotic infectious diseases.",2017-05-03 +30367579,An online tool for measuring and visualizing phenotype similarities using HPO.,"

Background

The Human Phenotype Ontology (HPO) is one of the most popular bioinformatics resources. Recently, HPO-based phenotype semantic similarity has been effectively applied to model patient phenotype data. However, the existing tools are revised based on the Gene Ontology (GO)-based term similarity. The design of the models are not optimized for the unique features of HPO. In addition, existing tools only allow HPO terms as input and only provide pure text-based outputs.

Results

We present PhenoSimWeb, a web application that allows researchers to measure HPO-based phenotype semantic similarities using four approaches borrowed from GO-based similarity measurements. Besides, we provide a approach considering the unique properties of HPO. And, PhenoSimWeb allows text that describes phenotypes as input, since clinical phenotype data is always in text. PhenoSimWeb also provides a graphic visualization interface to visualize the resulting phenotype network.

Conclusions

PhenoSimWeb is an easy-to-use and functional online application. Researchers can use it to calculate phenotype similarity conveniently, predict phenotype associated genes or diseases, and visualize the network of phenotype interactions. PhenoSimWeb is available at http://120.77.47.2:8080.",2018-08-13 +29896482,A new SWATH ion library for mouse adult hippocampal neural stem cells.,"Over the last years, the SWATH data-independent acquisition protocol (Sequential Window acquisition of All THeoretical mass spectra) has become a cornerstone for the worldwide proteomics community (Collins et al., 2017) [1]. In this approach, a high-resolution quadrupole-ToF mass spectrometer acquires thousands of MS/MS data by selecting not just a single precursor at a time, but by allowing a broad m/z range to be fragmented. This acquisition window is then sequentially moved from the lowest to the highest mass selection range. This technique enables the acquisition of thousands of high-resolution MS/MS spectra per minute in a standard LC-MS run. In the subsequent data analysis phase, the corresponding dataset is searched in a ""triple quadrupole-like"" mode, thus not considering the whole MS/MS scan spectrum, but by searching for several precursor to fragment transitions that identify and quantify the corresponding peptide. This search is made possible with the use of an ion library, previously acquired in a classical data dependent, full-spectrum mode (Fabre et al., 2017; Wu et al., 2017) [2], [3]. The SWATH protocol, combining the protein identification power of high-resolution MS/MS spectra with the robustness and accuracy in analyte quantification of triple-quad targeted workflows, has become very popular in proteomics research. The major drawback lies in the ion library itself, which is normally demanding and time-consuming to build. Conversely, through the realignment of chromatographic retention times, an ion library of a given proteome can relatively easily be tailored upon ""any"" proteomics experiment done on the same proteome. We are thus hereby sharing with the worldwide proteomics community our newly acquired ion library of mouse adult hippocampal neural stem cells. Given the growing effort in neuroscience research involving proteomics experiments (Pons-Espinal et al., 2017; Sarnyai and Guest, 2017; Sethi et al., 2015; Bramini et al., 2016) [4,[5], [6], [7], we believe that this data might be of great help for the neuroscience community. All the here reported data (RAW files, results and ion library) can be freely downloaded from the SWATHATLAS (Deutsch et al., 2008) [8] website (http://www.peptideatlas.org/PASS/PASS01110).",2018-02-27 +27503118,ARN: analysis and prediction by adipogenic professional database.,"Adipogenesis is the process of cell differentiation by which mesenchymal stem cells become adipocytes. Extensive research is ongoing to identify genes, their protein products, and microRNAs that correlate with fat cell development. The existing databases have focused on certain types of regulatory factors and interactions. However, there is no relationship between the results of the experimental studies on adipogenesis and these databases because of the lack of an information center. This information fragmentation hampers the identification of key regulatory genes and pathways. Thus, it is necessary to provide an information center that is quickly and easily accessible to researchers in this field. We selected and integrated data from eight external databases based on the results of text-mining, and constructed a publicly available database and web interface (URL: http://210.27.80.93/arn/ ), which contained 30873 records related to adipogenic differentiation. Then, we designed an online analysis tool to analyze the experimental data or form a scientific hypothesis about adipogenesis through Swanson's literature-based discovery process. Furthermore, we calculated the ""Impact Factor"" (""IF"") value that reflects the importance of each node by counting the numbers of relation records, expression records, and prediction records for each node. This platform can support ongoing adipogenesis research and contribute to the discovery of key regulatory genes and pathways.",2016-08-08 +31171992,"ReLayer: a Free, Online Tool for Extracting Retinal Thickness From Cross-Platform OCT Images.","

Purpose

To describe and evaluate a free, online tool for automatically segmenting optical coherence tomography (OCT) images from different devices and computing summary measures such as retinal thickness.

Methods

ReLayer (https://relayer.online) is an online platform to which OCT scan images can be uploaded and analyzed. Results can be downloaded as plaintext (.csv) files. The segmentation method includes a novel, one-dimensional active contour model, designed to locate the inner limiting membrane, inner/outer segment, and retinal pigment epithelium. The method, designed for B-scans from Heidelberg Engineering Spectralis, was adapted for Topcon 3D OCT-2000 and OptoVue AngioVue. The method was applied to scans from healthy and pathological eyes, and was validated against segmentation by the manufacturers, the IOWA Reference Algorithms, and manual segmentation.

Results

Segmentation of a B-scan took ≤1 second. In healthy eyes, mean difference in retinal thickness from ReLayer and the reference standard was below the resolution of the Spectralis and 3D OCT-2000, and slightly above the resolution of the AngioVue. In pathological eyes, ReLayer performed similarly to IOWA (P = 0.97) and better than Spectralis (P < 0.001).

Conclusions

A free online platform (ReLayer) is capable of segmenting OCT scans with similar speed, accuracy, and reliability as the other tested algorithms, but offers greater accessibility. ReLayer could represent a valuable tool for researchers requiring the full segmentation, often not made available by commercial software.

Translational relevance

A free online platform (ReLayer) provides free, accessible segmentation of OCT images: data often not available via existing commercial software.",2019-05-29 +31071345,NIMH MonkeyLogic: Behavioral control and data acquisition in MATLAB.,"

Background

Computerized control of behavioral paradigms is an essential element of neurobehavioral studies, especially physiological recording studies that require sub-millisecond precision. Few software solutions provide a simple, flexible environment to create and run these applications. MonkeyLogic, a MATLAB-based package, was developed to meet these needs, but faces a performance crisis and obsolescence due to changes in MATLAB itself.

New method

Here we report a complete redesign and rewrite of MonkeyLogic, now NIMH MonkeyLogic, that natively supports the latest 64-bit MATLAB on the Windows platform. Major layers of the underlying real-time hardware control were removed and replaced by custom toolboxes: NIMH DAQ Toolbox and MonkeyLogic Graphics Library. The redesign resolves undesirable delays in data transfers and limitations in graphics capabilities.

Results

NIMH MonkeyLogic is essentially a new product. It provides a powerful new scripting framework, has dramatic speed enhancements and provides major new graphics abilities.

Comparison with existing method

NIMH MonkeyLogic is fully backward compatible with earlier task scripts, but with better temporal precision. It provides more input device options, superior graphics and a new real-time closed-loop programming model. Because NIMH MonkeyLogic requires no commercial toolbox and has a reduced hardware requirement, implementation costs are substantially reduced.

Conclusion

NIMH MonkeyLogic is a versatile, powerful, up-to-date tool for controlling a wide range of experiments. It is freely available from https://monkeylogic.nimh.nih.gov/.",2019-05-06 +30979697,Visualizing Patterns in Pediatric and Adult Hospital Care.,"

Objectives

We aimed to design a graphical tool for understanding and effectively communicating the complex differences between pediatric and adult hospital care systems.

Patients and methods

We analyzed the most recent hospital administrative data sets for inpatient admission and emergency department visits from 7 US states (2014: Arkansas, Florida, Kentucky, Maryland, Massachusetts, and New York; 2011: California). Probabilities of care completion (Pcc) were calculated for pediatric (<18 years old) and adult conditions in all acute-care hospitals in each state. Using the Pcc, we constructed interactive heatmap visualizations for direct comparison of pediatric and adult hospital care systems.

Results

On average, across the 7 states, 70.6% of all hospitals had Pcc >0.5 for more than half of all adult conditions, whereas <14.9% of hospitals had Pcc >0.1 for half of pediatric conditions. Visualizations revealed wide variation among states with clearly apparent institutional dependencies and condition-specific gaps (full interactive versions are available at https://goo.gl/5t8vAw).

Conclusions

The functional disparities between pediatric and adult hospital care systems are substantial, and condition-specific differences should be considered in reimbursement strategies, disaster planning, network adequacy determinations, and public health planning.",2019-04-12 +25601067,"Male combat veterans' narratives of PTSD, masculinity, and health.","This article uniquely examines the ways a group of male combat veterans talk about masculinity and how, following post-traumatic stress disorder (PTSD), they performed masculinities in the context of a surfing group, and what effects this had upon their health and wellbeing. Participant observations and life history interviews were conducted with a group of combat veterans who belonged to a surfing charity for veterans experiencing PTSD. Data were rigorously explored via narrative analysis. Our findings revealed the ways in which veterans enacted masculinities in accordance with the values that were cultivated during military service. These masculine performances in the surfing group had important effects both on and for the veterans' wellbeing. Significantly, the study highlights how masculine performances can be seen alternately as a danger and as a resource for health and wellbeing in relation to PTSD. The article advances knowledge on combat veterans and mental health with critical implications for the promotion of male veterans' mental health. These include the original suggestion that health-promoting masculine performances might be recognised and supported in PTSD treatment settings. Rather than automatically viewing masculinity as problematic, this article moves the field forward by highlighting how hegemonic masculinities can be reconstructed in positive ways which might improve veterans' health and wellbeing. A video abstract of this article can be found at: https://www.youtube.com/watch?v=BaYzaOP1kAY.",2015-01-01 +29912209,Immune-centric network of cytokines and cells in disease context identified by computational mining of PubMed.,"Cytokines are signaling molecules secreted and sensed by immune and other cell types, enabling dynamic intercellular communication. Although a vast amount of data on these interactions exists, this information is not compiled, integrated or easily searchable. Here we report immuneXpresso, a text-mining engine that structures and standardizes knowledge of immune intercellular communication. We applied immuneXpresso to PubMed to identify relationships between 340 cell types and 140 cytokines across thousands of diseases. The method is able to distinguish between incoming and outgoing interactions, and it includes the effect of the interaction and the cellular function involved. These factors are assigned a confidence score and linked to the disease. By leveraging the breadth of this network, we predicted and experimentally verified previously unappreciated cell-cytokine interactions. We also built a global immune-centric view of diseases and used it to predict cytokine-disease associations. This standardized knowledgebase (http://www.immunexpresso.org) opens up new directions for interpretation of immune data and model-driven systems immunology.",2018-06-18 +27964698,Development of the crop residue and rangeland burning in the 2014 National Emissions Inventory using information from multiple sources.,"Biomass burning has been identified as an important contributor to the degradation of air quality because of its impact on ozone and particulate matter. One component of the biomass burning inventory, crop residue burning, has been poorly characterized in the National Emissions Inventory (NEI). In the 2011 NEI, wildland fires, prescribed fires, and crop residue burning collectively were the largest source of PM2.5. This paper summarizes our 2014 NEI method to estimate crop residue burning emissions and grass/pasture burning emissions using remote sensing data and field information and literature-based, crop-specific emission factors. We focus on both the postharvest and pre-harvest burning that takes place with bluegrass, corn, cotton, rice, soybeans, sugarcane and wheat. Estimates for 2014 indicate that over the continental United States (CONUS), crop residue burning excluding all areas identified as Pasture/Grass, Grassland Herbaceous, and Pasture/Hay occurred over approximately 1.5 million acres of land and produced 19,600 short tons of PM2.5. For areas identified as Pasture/Grass, Grassland Herbaceous, and Pasture/Hay, biomass burning emissions occurred over approximately 1.6 million acres of land and produced 30,000 short tons of PM2.5. This estimate compares with the 2011 NEI and 2008 NEI as follows: 2008: 49,650 short tons and 2011: 141,180 short tons. Note that in the previous two NEIs rangeland burning was not well defined and so the comparison is not exact. The remote sensing data also provided verification of our existing diurnal profile for crop residue burning emissions used in chemical transport modeling. In addition, the entire database used to estimate this sector of emissions is available on EPA's Clearinghouse for Inventories and Emission Factors (CHIEF, http://www3.epa.gov/ttn/chief/index.html ).

Implications

Estimates of crop residue burning and rangeland burning emissions can be improved by using satellite detections. Local information is helpful in distinguishing crop residue and rangeland burning from all other types of fires.",2016-12-14 +22139919,Nematode.net update 2011: addition of data sets and tools featuring next-generation sequencing data.,"Nematode.net (http://nematode.net) has been a publicly available resource for studying nematodes for over a decade. In the past 3 years, we reorganized Nematode.net to provide more user-friendly navigation through the site, a necessity due to the explosion of data from next-generation sequencing platforms. Organism-centric portals containing dynamically generated data are available for over 56 different nematode species. Next-generation data has been added to the various data-mining portals hosted, including NemaBLAST and NemaBrowse. The NemaPath metabolic pathway viewer builds associations using KOs, rather than ECs to provide more accurate and fine-grained descriptions of proteins. Two new features for data analysis and comparative genomics have been added to the site. NemaSNP enables the user to perform population genetics studies in various nematode populations using next-generation sequencing data. HelmCoP (Helminth Control and Prevention) as an independent component of Nematode.net provides an integrated resource for storage, annotation and comparative genomics of helminth genomes to aid in learning more about nematode genomes, as well as drug, pesticide, vaccine and drug target discovery. With this update, Nematode.net will continue to realize its original goal to disseminate diverse bioinformatic data sets and provide analysis tools to the broad scientific community in a useful and user-friendly manner.",2011-12-01 +26418012,Dynamic Data Visualization with Weave and Brain Choropleths.,"This article introduces the neuroimaging community to the dynamic visualization workbench, Weave (https://www.oicweave.org/), and a set of enhancements to allow the visualization of brain maps. The enhancements comprise a set of brain choropleths and the ability to display these as stacked slices, accessible with a slider. For the first time, this allows the neuroimaging community to take advantage of the advanced tools already available for exploring geographic data. Our brain choropleths are modeled after widely used geographic maps but this mashup of brain choropleths with extant visualization software fills an important neuroinformatic niche. To date, most neuroinformatic tools have provided online databases and atlases of the brain, but not good ways to display the related data (e.g., behavioral, genetic, medical, etc). The extension of the choropleth to brain maps allows us to leverage general-purpose visualization tools for concurrent exploration of brain images and related data. Related data can be represented as a variety of tables, charts and graphs that are dynamically linked to each other and to the brain choropleths. We demonstrate that the simplified region-based analyses that underlay choropleths can provide insights into neuroimaging data comparable to those achieved by using more conventional methods. In addition, the interactive interface facilitates additional insights by allowing the user to filter, compare, and drill down into the visual representations of the data. This enhanced data visualization capability is useful during the initial phases of data analysis and the resulting visualizations provide a compelling way to publish data as an online supplement to journal articles.",2015-09-29 +30165607,miES: predicting the essentiality of miRNAs with machine learning and sequence features.,"

Motivation

MicroRNAs (miRNAs) are one class of small noncoding RNA molecules, which regulate gene expression at the post-transcriptional level and play important roles in health and disease. To dissect the critical miRNAs in miRNAome, it is needed to predict the essentiality of miRNAs, however, bioinformatics methods for this purpose are limited.

Results

Here we propose miES, a novel algorithm, for the prioritization of miRNA essentiality. miES implements a machine learning strategy based on learning from positive and unlabeled samples. miES uses sequence features of known essential miRNAs and performs miRNAome-wide searching for new essential miRNAs. miES achieves an AUC of 0.9 for 5-fold cross validation. Moreover, experiments further show that the miES score is significantly correlated with some established biological metrics for miRNA importance, such as miRNA conservation, miRNA disease spectrum width (DSW) and expression level.

Availability and implementation

The R source code is available at the download page of the web server, http://www.cuilab.cn/mies.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +26992731,Publication Trends Over 55 Years of Behavioral Genetic Research.,"We document the growth in published papers on behavioral genetics for 5-year intervals from 1960 through 2014. We used 1861 papers published in Behavior Genetics to train our search strategy which, when applied to Ovid PsychINFO, selected more than 45,000 publications. Five trends stand out: (1) the number of behavioral genetic publications has grown enormously; nearly 20,000 papers were published in 2010-2014. (2) The number of human quantitative genetic (QG) publications (e.g., twin and adoption studies) has steadily increased with more than 3000 papers published in 2010-2014. (3) The number of human molecular genetic (MG) publications increased substantially from about 2000 in 2000-2004 to 5000 in 2005-2009 to 9000 in 2010-2014. (4) Nonhuman publications yielded similar trends. (5) Although there has been exponential growth in MG publications, both human and nonhuman QG publications continue to grow. A searchable resource of this corpus of behavioral genetic papers is freely available online at http://www.teds.ac.uk/public_datasets.html and will be updated annually.",2016-03-18 +30335862,miRWalk: An online resource for prediction of microRNA binding sites.,"miRWalk is an open-source platform providing an intuitive interface that generates predicted and validated miRNA-binding sites of known genes of human, mouse, rat, dog and cow. The core of miRWalk is the miRNA target site prediction with the random-forest-based approach software TarPmiR searching the complete transcript sequence including the 5'-UTR, CDS and 3'-UTR. Moreover, it integrates results other databases with predicted and validated miRNA-target interactions. The focus is set on a modular design and extensibility as well as a fast update cycle. The database is available using Python, MySQL and HTML/Javascript Database URL: http://mirwalk.umm.uni-heidelberg.de.",2018-10-18 +26072486,Improving compound-protein interaction prediction by building up highly credible negative samples.,"

Motivation

Computational prediction of compound-protein interactions (CPIs) is of great importance for drug design and development, as genome-scale experimental validation of CPIs is not only time-consuming but also prohibitively expensive. With the availability of an increasing number of validated interactions, the performance of computational prediction approaches is severely impended by the lack of reliable negative CPI samples. A systematic method of screening reliable negative sample becomes critical to improving the performance of in silico prediction methods.

Results

This article aims at building up a set of highly credible negative samples of CPIs via an in silico screening method. As most existing computational models assume that similar compounds are likely to interact with similar target proteins and achieve remarkable performance, it is rational to identify potential negative samples based on the converse negative proposition that the proteins dissimilar to every known/predicted target of a compound are not much likely to be targeted by the compound and vice versa. We integrated various resources, including chemical structures, chemical expression profiles and side effects of compounds, amino acid sequences, protein-protein interaction network and functional annotations of proteins, into a systematic screening framework. We first tested the screened negative samples on six classical classifiers, and all these classifiers achieved remarkably higher performance on our negative samples than on randomly generated negative samples for both human and Caenorhabditis elegans. We then verified the negative samples on three existing prediction models, including bipartite local model, Gaussian kernel profile and Bayesian matrix factorization, and found that the performances of these models are also significantly improved on the screened negative samples. Moreover, we validated the screened negative samples on a drug bioactivity dataset. Finally, we derived two sets of new interactions by training an support vector machine classifier on the positive interactions annotated in DrugBank and our screened negative interactions. The screened negative samples and the predicted interactions provide the research community with a useful resource for identifying new drug targets and a helpful supplement to the current curated compound-protein databases.

Availability

Supplementary files are available at: http://admis.fudan.edu.cn/negative-cpi/.",2015-06-01 +22753370,Human Variome Project country nodes: documenting genetic information within a country.,"The Human Variome Project (http://www.humanvariomeproject.org) is an international effort aiming to systematically collect and share information on all human genetic variation. The two main pillars of this effort are gene/disease-specific databases and a network of Human Variome Project Country Nodes. The latter are nationwide efforts to document the genomic variation reported within a specific population. The development and successful operation of the Human Variome Project Country Nodes are of utmost importance to the success of Human Variome Project's aims and goals because they not only allow the genetic burden of disease to be quantified in different countries, but also provide diagnosticians and researchers access to an up-to-date resource that will assist them in their daily clinical practice and biomedical research, respectively. Here, we report the discussions and recommendations that resulted from the inaugural meeting of the International Confederation of Countries Advisory Council, held on 12th December 2011, during the 2011 Human Variome Project Beijing Meeting. We discuss the steps necessary to maximize the impact of the Country Node effort for developing regional and country-specific clinical genetics resources and summarize a few well-coordinated genetic data collection initiatives that would serve as paradigms for similar projects.",2012-07-18 +30203992,Fine and Coarse Particulate Matter Exposures and Associations with Acute Cardiac Events among Participants in a Telemedicine Service: A Case-Crossover Study.,"

Background

Subclinical cardiovascular changes have been associated with ambient particulate matter (PM) exposures within hours. Although the U.S. Environmental Protection Agency continues to look for additional evidence of effects associated with sub-daily PM exposure, this information is still limited because most studies of clinical events have lacked data on the onset time of symptoms to assess rapid increased risk.

Objective

Our objective was to investigate associations between sub-daily exposures to PM and acute cardiac events using telemedicine data.

Methods

We conducted a case-crossover study among telemedicine participants [Formula: see text] of age who called a service center for cardiac-related symptoms and were transferred to a hospital in Tel Aviv and Haifa, Israel (2002-2013). Ambient [Formula: see text] and [Formula: see text] measured by monitors located in each city during the hours before the patient called with symptoms were compared with matched control periods. We investigated the sensitivity of these associations to more accurate symptom onset time and greater certainty of diagnosis.

Results

We captured 12,661 calls from 7,617 subscribers experiencing ischemic (19%), arrhythmic (31%), or nonspecific (49%) cardiac events. PM concentrations were associated with small increases in the odds of cardiac events. For example, odds ratios for any cardiac event in association with a [Formula: see text] increase in 6-h and 24-h average [Formula: see text] were 1.008 [95% confidence interval (CI): 0.998, 1.018] and 1.006 (95% CI: 0.995, 1.018), respectively, and for [Formula: see text] were 1.003 (95% CI: 1.001, 1.006) and 1.003 (95% CI: 1.000, 1.007), respectively. Associations were stronger when using exposures matched to the call time rather than calendar date and for events with higher certainty of the diagnosis.

Conclusions

Our analysis of telemedicine data suggests that risks of cardiac events in telemedicine participants [Formula: see text] of age may increase within hours of PM exposures. https://doi.org/10.1289/EHP2596.",2018-09-01 +,Development of best practices for ex situ conservation of radish germplasm in the context of the crop genebank knowledge base,"Information about crop-specific best practices for ex situ conservation of plant genetic resources has been difficult to find until recently. The CGIAR, together with national and regional partners, started to fill that gap by publishing best practices on the crop genebank knowledge base (CGKB - http://cropgenebank.sgrp.cgiar.org/ ), a website specifically developed and officially launched in 2010 to provide easy access to knowledge about all aspects of ex situ conservation of specific crops to genebank managers and ex situ conservation researchers. A collaborative study, undertaken by Bioversity International with eight national and international genebanks, utilized the framework provided by the CGKB to develop and publish radish conservation best practices. This paper focuses on two aspects of this study: (1) Differences in procedures and practices in radish conservation currently applied in five key genebank activities, namely, acquisition of germplasm, viability testing and monitoring, seed drying, seed storage, and regeneration. While in a few cases genebanks agreed on a specific best practice to recommend, in others it was not desirable to identify one practice as superior to another, therefore a range of existing practices is described as a variety of equivalent options. The results highlight the importance of proactive genebank management aimed at meeting the standards within the specific context in which a genebank operates. (2) The framework and template provided by the CGKB in guiding the development of genebank best practices, and the CGKB as an excellent resource to widely and freely share best practices with the global community to support the effective management of crop genebanks.",2013-04-01 +29360928,APAtrap: identification and quantification of alternative polyadenylation sites from RNA-seq data.,"Motivation:Alternative polyadenylation (APA) has been increasingly recognized as a crucial mechanism that contributes to transcriptome diversity and gene expression regulation. As RNA-seq has become a routine protocol for transcriptome analysis, it is of great interest to leverage such unprecedented collection of RNA-seq data by new computational methods to extract and quantify APA dynamics in these transcriptomes. However, research progress in this area has been relatively limited. Conventional methods rely on either transcript assembly to determine transcript 3' ends or annotated poly(A) sites. Moreover, they can neither identify more than two poly(A) sites in a gene nor detect dynamic APA site usage considering more than two poly(A) sites. Results:We developed an approach called APAtrap based on the mean squared error model to identify and quantify APA sites from RNA-seq data. APAtrap is capable of identifying novel 3' UTRs and 3' UTR extensions, which contributes to locating potential poly(A) sites in previously overlooked regions and improving genome annotations. APAtrap also aims to tally all potential poly(A) sites and detect genes with differential APA site usages between conditions. Extensive comparisons of APAtrap with two other latest methods, ChangePoint and DaPars, using various RNA-seq datasets from simulation studies, human and Arabidopsis demonstrate the efficacy and flexibility of APAtrap for any organisms with an annotated genome. Availability and implementation:Freely available for download at https://apatrap.sourceforge.io. Contact:liqq@xmu.edu.cn or xhuister@xmu.edu.cn. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +27388930,"CDC's Response to the 2014-2016 Ebola Epidemic - Guinea, Liberia, and Sierra Leone.","CDC's response to the 2014-2016 Ebola virus disease (Ebola) epidemic in West Africa was the largest in the agency's history and occurred in a geographic area where CDC had little operational presence. Approximately 1,450 CDC responders were deployed to Guinea, Liberia, and Sierra Leone since the start of the response in July 2014 to the end of the response at the end of March 2016, including 455 persons with repeat deployments. The responses undertaken in each country shared some similarities but also required unique strategies specific to individual country needs. The size and duration of the response challenged CDC in several ways, particularly with regard to staffing. The lessons learned from this epidemic will strengthen CDC's ability to respond to future public health emergencies. These lessons include the importance of ongoing partnerships with ministries of health in resource-limited countries and regions, a cadre of trained CDC staff who are ready to be deployed, and development of ongoing working relationships with U.S. government agencies and other multilateral and nongovernment organizations that deploy for international public health emergencies. CDC's establishment of a Global Rapid Response Team in June 2015 is anticipated to meet some of these challenges. The activities summarized in this report would not have been possible without collaboration with many U.S. and international partners (http://www.cdc.gov/vhf/ebola/outbreaks/2014-west-africa/partners.html).",2016-07-08 +28096084,TimesVector: a vectorized clustering approach to the analysis of time series transcriptome data from multiple phenotypes.,"

Motivation

Identifying biologically meaningful gene expression patterns from time series gene expression data is important to understand the underlying biological mechanisms. To identify significantly perturbed gene sets between different phenotypes, analysis of time series transcriptome data requires consideration of time and sample dimensions. Thus, the analysis of such time series data seeks to search gene sets that exhibit similar or different expression patterns between two or more sample conditions, constituting the three-dimensional data, i.e. gene-time-condition. Computational complexity for analyzing such data is very high, compared to the already difficult NP-hard two dimensional biclustering algorithms. Because of this challenge, traditional time series clustering algorithms are designed to capture co-expressed genes with similar expression pattern in two sample conditions.

Results

We present a triclustering algorithm, TimesVector, specifically designed for clustering three-dimensional time series data to capture distinctively similar or different gene expression patterns between two or more sample conditions. TimesVector identifies clusters with distinctive expression patterns in three steps: (i) dimension reduction and clustering of time-condition concatenated vectors, (ii) post-processing clusters for detecting similar and distinct expression patterns and (iii) rescuing genes from unclassified clusters. Using four sets of time series gene expression data, generated by both microarray and high throughput sequencing platforms, we demonstrated that TimesVector successfully detected biologically meaningful clusters of high quality. TimesVector improved the clustering quality compared to existing triclustering tools and only TimesVector detected clusters with differential expression patterns across conditions successfully.

Availability and implementation

The TimesVector software is available at http://biohealth.snu.ac.kr/software/TimesVector/.

Contact

sunkim.bioinfo@snu.ac.kr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +30333551,Web-Based Model for Predicting Time to Surgery in Young Patients with Familial Adenomatous Polyposis: An Internally Validated Study.,"

Introduction

The timing of prophylactic colorectal surgery in patients with familial adenomatous polyposis (FAP) is based on the immediacy of the colorectal cancer risk. The ability to predict the need for surgery may help patients and their families plan in the context of life events and CRC risk. We created a model to predict the likelihood of surgery within 2 and 5 years of first colonoscopy at our institution.

Methods

A single institution hereditary colorectal syndrome (Cologene™) database was interrogated for all patients with FAP having a deleterious APC mutation. Patients with first colonoscopy after age 30 and before year 2000 were excluded. Cox regression analysis was done to assess multiple factors associated with surgery, followed by stepwise Cox regression analysis to select an optimal model. Receiver operator curve (ROC) analysis was performed to assess the model.

Results

A total of 211 (53% female) patients were included. Forty-five percent underwent surgery after an average of 3.8 years of surveillance. The final model was created based on initial clinical characteristics (age, gender, BMI, family history of desmoids, genotype-phenotype correlation), initial colonoscopic characteristics (number of polyps, polyp size, presence of high-grade dysplasia); and on clinical events (chemoprevention and polypectomy). AUC was 0.87 and 0.84 to predict surgery within 2 and 5 years, respectively. The final model can be accessed at this website: http://app.calculoid.com/#/calculator/29638 .

Conclusion

This web-based tool allows clinicians to stratify patients' likelihood of colorectal surgery within 2 and 5 years of their initial examination, based on clinical and endoscopic features, and using the philosophy of care guiding practice at this institution.",2018-10-17 +26590259,The UCSC Genome Browser database: 2016 update.,"For the past 15 years, the UCSC Genome Browser (http://genome.ucsc.edu/) has served the international research community by offering an integrated platform for viewing and analyzing information from a large database of genome assemblies and their associated annotations. The UCSC Genome Browser has been under continuous development since its inception with new data sets and software features added frequently. Some release highlights of this year include new and updated genome browsers for various assemblies, including bonobo and zebrafish; new gene annotation sets; improvements to track and assembly hub support; and a new interactive tool, the ""Data Integrator"", for intersecting data from multiple tracks. We have greatly expanded the data sets available on the most recent human assembly, hg38/GRCh38, to include updated gene prediction sets from GENCODE, more phenotype- and disease-associated variants from ClinVar and ClinGen, more genomic regulatory data, and a new multiple genome alignment.",2015-11-20 +22009731,WheatGenome.info: an integrated database and portal for wheat genome information.,"Bread wheat (Triticum aestivum) is one of the most important crop plants, globally providing staple food for a large proportion of the human population. However, improvement of this crop has been limited due to its large and complex genome. Advances in genomics are supporting wheat crop improvement. We provide a variety of web-based systems hosting wheat genome and genomic data to support wheat research and crop improvement. WheatGenome.info is an integrated database resource which includes multiple web-based applications. These include a GBrowse2-based wheat genome viewer with BLAST search portal, TAGdb for searching wheat second-generation genome sequence data, wheat autoSNPdb, links to wheat genetic maps using CMap and CMap3D, and a wheat genome Wiki to allow interaction between diverse wheat genome sequencing activities. This system includes links to a variety of wheat genome resources hosted at other research organizations. This integrated database aims to accelerate wheat genome research and is freely accessible via the web interface at http://www.wheatgenome.info/.",2011-10-18 +29673390,Enabling multiplexed testing of pooled donor cells through whole-genome sequencing.,"We describe a method that enables the multiplex screening of a pool of many different donor cell lines. Our method accurately predicts each donor proportion from the pool without requiring the use of unique DNA barcodes as markers of donor identity. Instead, we take advantage of common single nucleotide polymorphisms, whole-genome sequencing, and an algorithm to calculate the proportions from the sequencing data. By testing using simulated and real data, we showed that our method robustly predicts the individual proportions from a mixed-pool of numerous donors, thus enabling the multiplexed testing of diverse donor cells en masse.More information is available at https://pgpresearch.med.harvard.edu/poolseq/.",2018-04-19 +30409338,SCREEN-DR: Collaborative platform for diabetic retinopathy.,"

Background and objective

Diabetic retinopathy (DR) is the most prevalent microvascular complication of diabetes mellitus and can lead to irreversible visual loss. Screening programs, based on retinal imaging techniques, are fundamental to detect the disease since the initial stages are asymptomatic. Most of these examinations reflect negative cases and many have poor image quality, representing an important inefficiency factor. The SCREEN-DR project aims to tackle this limitation, by researching and developing computer-aided methods for diabetic retinopathy detection. This article presents a multidisciplinary collaborative platform that was created to meet the needs of physicians and researchers, aiming at the creation of machine learning algorithms to facilitate the screening process.

Methods

Our proposal is a collaborative platform for textual and visual annotation of image datasets. The architecture and layout were optimized for annotating DR images by gathering feedback from several physicians during the design and conceptualization of the platform. It allows the aggregation and indexing of imagiology studies from diverse sources, and supports the creation and annotation of phenotype-specific datasets to feed artificial intelligence algorithms. The platform makes use of an anonymization pipeline and role-based access control for securing personal data.

Results

The SCREEN-DR platform has been deployed in the production environment of the SCREEN-DR project at http://demo.dicoogle.com/screen-dr, and the source code of the project is publicly available. We provide a description of the platform's interface and use cases it supports. At the time of publication, four physicians have created a total of 1826 annotations for 701 distinct images, and the annotated data has been used for training classification models.",2018-10-18 +22401035,Improving integrative searching of systems chemical biology data using semantic annotation.,"

Background

Systems chemical biology and chemogenomics are considered critical, integrative disciplines in modern biomedical research, but require data mining of large, integrated, heterogeneous datasets from chemistry and biology. We previously developed an RDF-based resource called Chem2Bio2RDF that enabled querying of such data using the SPARQL query language. Whilst this work has proved useful in its own right as one of the first major resources in these disciplines, its utility could be greatly improved by the application of an ontology for annotation of the nodes and edges in the RDF graph, enabling a much richer range of semantic queries to be issued.

Results

We developed a generalized chemogenomics and systems chemical biology OWL ontology called Chem2Bio2OWL that describes the semantics of chemical compounds, drugs, protein targets, pathways, genes, diseases and side-effects, and the relationships between them. The ontology also includes data provenance. We used it to annotate our Chem2Bio2RDF dataset, making it a rich semantic resource. Through a series of scientific case studies we demonstrate how this (i) simplifies the process of building SPARQL queries, (ii) enables useful new kinds of queries on the data and (iii) makes possible intelligent reasoning and semantic graph mining in chemogenomics and systems chemical biology.

Availability

Chem2Bio2OWL is available at http://chem2bio2rdf.org/owl. The document is available at http://chem2bio2owl.wikispaces.com.",2012-03-08 +28903802,Can National Healthcare-Associated Infections (HAIs) Data Differentiate Hospitals in the United States?,"OBJECTIVE To determine whether patients using the Centers for Medicare and Medicaid Services (CMS) Hospital Compare website (http://medicare.gov/hospitalcompare) can use nationally reported healthcare-associated infection (HAI) data to differentiate hospitals. DESIGN Secondary analysis of publicly available HAI data for calendar year 2013. METHODS We assessed the availability of HAI data for geographically proximate hospitals (ie, hospitals within the same referral region) and then analyzed these data to determine whether they are useful to differentiate hospitals. We assessed data for the 6 HAIs reported by hospitals to the Centers for Disease Control and Prevention (CDC). RESULTS Data were analyzed for 4,561 hospitals representing 88% of registered community and federal government hospitals in the United States. Healthcare-associated infection data are only useful for comparing hospitals if they are available for multiple hospitals within a geographic region. We found that data availability differed by HAI. Clostridium difficile infections (CDI) data were most available, with 82% of geographic regions (ie, hospital referral regions) having >50% of hospitals reporting them. In contrast, 4% of geographic regions had >50% of member hospitals reporting surgical site infections (SSI) for hysterectomies, which had the lowest availability. The ability of HAI data to differentiate hospitals differed by HAI: 72% of hospital referral regions had at least 1 pair of hospitals with statistically different risk-adjusted CDI rates (SIRs), compared to 9% for SSI (hysterectomy). CONCLUSIONS HAI data generally are reported by enough hospitals to meet minimal criteria for useful comparisons in many geographic locations, though this varies by type of HAI. CDI and catheter-associated urinary tract infection (CAUTI) are more likely to differentiate hospitals than the other publicly reported HAIs. Infect Control Hosp Epidemiol 2017;38:1167-1171.",2017-10-01 +,"Phylogeny, divergence times and biogeography of window flies (Scenopinidae) and the therevoid clade (Diptera: Asiloidea)","The evolution of the ‘therevoid’ clade, with an emphasis on window flies (Scenopinidae), is presented by combining DNA sequence data with morphological characters for living and fossil species. The therevoid clade represents a group of four families (Apsilocephalidae, Evocoidae, Scenopinidae and Therevidae) of lower brachyceran Diptera in the superfamily Asiloidea. A comprehensive phylogenetic analysis using parsimony and likelihood methods was undertaken using extensive taxon sampling from all families and subfamilies, and compared with outgroup taxa sampled from the related families Asilidae, Mydidae, Apioceridae and Empididae. Fifty‐nine morphological characters (adult, larval and pupal) were combined with 6.4 kb of DNA sequences for two ribosomal genes (16S and 18S ribosomal DNA) and three protein‐encoding genes [cytochrome oxidase I (COI), triose phosphate isomerase (TPI) and the CPSase region of carbamoyl‐phosphate synthase‐aspartate transcarbamoylase‐dihydroorotase (CAD)]. Results from combined analyses of morphological and molecular data for 78 taxa representing all families of the therevoid clade are presented. Specific hypotheses of the relationship between respective families and subfamilies were tested statistically using four‐cluster likelihood mapping. The therevoid clade is a well‐supported monophyletic group within Asiloidea, with Evocoidae sister to Apsilocephalidae and Therevidae sister to Scenopinidae. Temporal and zoogeographical aspects of therevoid clade evolution were investigated using Bayesian divergence time estimates and Lagrange ancestral range scenarios. The effect of inclusion of fossils as terminal taxa on phylogenetic and divergence time estimation was investigated, with morphological scoring for fossil representatives included in the analyses rather than used simply as minimum age constraints. In each analysis there was either improvement in estimation, or only marginal and localized loss in tree resolution, and with younger estimates of divergence time across the tree. The historical biogeography of the therevoid clade was examined with multiple trans‐Antarctic vicariance events between Australasia and South America evident during the Late Cretaceous to early Palaeogene. Scenopininae is newly subdivided into two tribes, Metatrichini trib.n. and Scenopinini Fallén stat.r. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:4974EBF8‐3117‐4189‐B6DE‐7D5BF9B23E53.",2015-07-01 +21177656,The Gene Expression Barcode: leveraging public data repositories to begin cataloging the human and murine transcriptomes.,"Various databases have harnessed the wealth of publicly available microarray data to address biological questions ranging from across-tissue differential expression to homologous gene expression. Despite their practical value, these databases rely on relative measures of expression and are unable to address the most fundamental question--which genes are expressed in a given cell type. The Gene Expression Barcode is the first database to provide reliable absolute measures of expression for most annotated genes for 131 human and 89 mouse tissue types, including diseased tissue. This is made possible by a novel algorithm that leverages information from the GEO and ArrayExpress public repositories to build statistical models that permit converting data from a single microarray into expressed/unexpressed calls for each gene. For selected platforms, users may upload data and obtain results in a matter of seconds. The raw data, curated annotation, and code used to create our resource are also available at http://rafalab.jhsph.edu/barcode.",2011-01-01 +29425745,Tumor suppressive ZBTB4 inhibits cell growth by regulating cell cycle progression and apoptosis in Ewing sarcoma.,"Increasing studies identify that zinc finger and BTB domain containing 4 (ZBTB4) functions as a tumor suppressor in human cancer. Underexpression of ZBTB4 is correlated with poor survival of breast cancer patients. However, the expression of ZBTB4 and its possible function remain unknown in Ewing sarcoma (ES). To clarify these issues, we investigated the expression difference between ES and normal tissues based on Gene Expression Omnibus (GEO) data from R2: Genomics Analysis and Visualization Platform (http://r2.amc.nl). GEO data (GSE68776) indicated that the expression of ZBTB4 in ES tissues was prominently lower compare to normal tissues. Our data further confirmed the underexpression of ZBTB4 in ES tissues. GEO data (GSE63157 and GSE17679) demonstrated that ZBTB4 underexpression predicted a obvious shorter overall survival and event-free survival of ES patients. Interestingly, the expression of ZBTB4 was inversely correlated with proliferation makers Ki-67 and proliferating cell nuclear antigen (PCNA) in ES tissues. In vitro, ZBTB4 overexpression inhibited cell proliferation, and induced cell cycle arrest at G1 phase and apoptosis in SK-ES-1 and RD-ES cells. Moreover, ZBTB4 restoration suppressed the tumor growth of ES in mice. An inversely correlation between ZBTB4 and Survivin expression was observed in ES tissues. ZBTB4 overexpression reduced Survivin abundance in ES cells. Notably, Survivin restoration reversed the regulatory effect of ZBTB4 on ES cell proliferation, cell cycle progression and apoptosis. To conclude, our data indicated that ZBTB4 exhibited a tumor suppressive role in ES possibly by reducing Survivin expression. ZBTB4/Survivin axis might serve as a therapeutic target for ES.",2018-02-07 +30304369,RiboProP: a probabilistic ribosome positioning algorithm for ribosome profiling.,"

Motivation

Ribosome profiling has been widely used to study translation in a genome-wide fashion. It requires deep sequencing of ribosome protected mRNA fragments followed by mapping of fragments to the reference genome. For applications such as identification of ribosome pausing sites, it is not enough to map a fragment to a given gene, but the exact position of the ribosome represented by the fragment must be identified for each mRNA fragment. The assignment of the correct ribosome position is complicated by the broad length distribution of the ribosome protected fragments caused by the known sequence bias of micrococcal nuclease (MNase), the most widely used nuclease for digesting mRNAs in bacteria. Available mapping algorithms suffer from either MNase bias or low accuracy in characterizing the ribosome pausing kinetics.

Results

In this paper, we introduce a new computational method for mapping the ribosome protected fragments to ribosome locations. We first develop a mathematical model of the interplay between MNase digestion and ribosome protection of the mRNAs. We then use the model to reconstruct the ribosome occupancy profile on a per gene level. We demonstrate that our method has the capability of mitigating the sequence bias introduced by MNase and accurately locating ribosome pausing sites at codon resolution. We believe that our method can be broadly applied to ribosome profiling studies on bacteria where codon resolution is necessary.

Availability and implementation

Source code implementing our approach can be downloaded under GPL3 license at http://bioserv.mps.ohio-state.edu/RiboProP.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +26907748,Introducing the Open Affective Standardized Image Set (OASIS).,"We introduce the Open Affective Standardized Image Set (OASIS), an open-access online stimulus set containing 900 color images depicting a broad spectrum of themes, including humans, animals, objects, and scenes, along with normative ratings on two affective dimensions-valence (i.e., the degree of positive or negative affective response that the image evokes) and arousal (i.e., the intensity of the affective response that the image evokes). The OASIS images were collected from online sources, and valence and arousal ratings were obtained in an online study (total N = 822). The valence and arousal ratings covered much of the circumplex space and were highly reliable and consistent across gender groups. OASIS has four advantages: (a) the stimulus set contains a large number of images in four categories; (b) the data were collected in 2015, and thus OASIS features more current images and reflects more current ratings of valence and arousal than do existing stimulus sets; (c) the OASIS database affords users the ability to interactively explore images by category and ratings; and, most critically, (d) OASIS allows for free use of the images in online and offline research studies, as they are not subject to the copyright restrictions that apply to the International Affective Picture System. The OASIS images, along with normative valence and arousal ratings, are available for download from www.benedekkurdi.com/#oasis or https://db.tt/yYTZYCga .",2017-04-01 +29182599,Open University Learning Analytics dataset.,"Learning Analytics focuses on the collection and analysis of learners' data to improve their learning experience by providing informed guidance and to optimise learning materials. To support the research in this area we have developed a dataset, containing data from courses presented at the Open University (OU). What makes the dataset unique is the fact that it contains demographic data together with aggregated clickstream data of students' interactions in the Virtual Learning Environment (VLE). This enables the analysis of student behaviour, represented by their actions. The dataset contains the information about 22 courses, 32,593 students, their assessment results, and logs of their interactions with the VLE represented by daily summaries of student clicks (10,655,280 entries). The dataset is freely available at https://analyse.kmi.open.ac.uk/open_dataset under a CC-BY 4.0 license.",2017-11-28 +30761873,External validation of anastomotic leakage risk analysis system in patients who underwent colorectal resection,"

Background/aim

One of the most feared complications after colon resection for carcinoma is anastomotic leakage. Prediction of anastomotic leakage can alter pre- and perioperative management of patients. This study validates an anastomotic leakage prediction system.

Materials and methods

Ninety-five patients who underwent colonic resection between 1 January 2016 and 30 January 2017 were included in the study. Patient records and electronic charting system data were used to calculate anastomotic leakage risk on the http://www.anastomoticleak.com/ website.

Results

Fifty-six (58.9%) patients were male and thirty-nine (41.1%) were female. The mean age was 61.7 (min: 33, max: 90). Six (6.3%) patients had anastomotic leakage. According to the ROC analysis, the area under curve for the prediction system was 0.767.

Conclusion

The prediction system for anastomotic leakage produced significant results for our patient population. It can be effectively utilized in preoperative and perioperative measures to prevent anastomotic leakage.",2019-02-11 +27824930,Identification of Novel Sequence Types among Staphylococcus haemolyticus Isolated from Variety of Infections in India.,"The aim of this study was to determine sequence types of 34 S. haemolyticus strains isolated from a variety of infections between 2013 and 2016 in India by MLST. The MEGA5.2 software was used to align and compare the nucleotide sequences. The advanced cluster analysis was performed to define the clonal complexes. MLST analysis showed 24 new sequence types (ST) among S. haemolyticus isolates, irrespective of sources and place of isolation. The finding of this study allowed to set up an MLST database on the PubMLST.org website using BIGSdb software and made available at http://pubmlst.org/shaemolyticus/. The data of this study thus suggest that MLST can be used to study population structure and diversity among S. haemolyticus isolates.",2016-11-08 +30318630,MB-Isoster: A software for bioisosterism simulation.,"Bioisosterism is a technique used in medicinal chemistry to optimize lead compounds in drug research. One can replace a substituent group in original molecule by another with similar physical chemistry properties and then test how this replacement affects biological activity. To help researchers in their bioisosteric replacement choose, computational efforts such as programs and databases was developed. In this article, it is presented MB-Isoster, a software that draws bioisosteric molecules. Starting from an input molecule, user selects a molecular subregion formed by connected atoms to be replaced and MB-Isoster queries an internal library to find bioisosteric substituents for selected subregion, and makes the bioisosteres. Another functionality is receptor-ligand pdb complex reading, in which nonbonded interactions are computed between receptor and ligand in a pdb file, helping in atom/subregion selection to bioisosteric replacement. Physical-chemical properties computing, and virtual screening evaluation is also available. MB-Isoster is freely available at http://molmod-cs.unifal-mg.edu.br/tools.html. © 2018 Wiley Periodicals, Inc.",2018-10-14 +26516349,BioDB extractor: customized data extraction system for commonly used bioinformatics databases.,"

Background

Diverse types of biological data, primary as well as derived, are available in various formats and are stored in heterogeneous resources. Database-specific as well as integrated search engines are available for carrying out efficient searches of databases. These search engines however, do not support extraction of subsets of data with the same level of granularity that exists in typical database entries. In order to extract fine grained subsets of data, users are required to download complete or partial database entries and write scripts for parsing and extraction.

Results

BioDBExtractor (BDE) has been developed to provide 26 customized data extraction utilities for some of the commonly used databases such as ENA (EMBL-Bank), UniprotKB, PDB, and KEGG. BDE eliminates the need for downloading entries and writing scripts. BDE has a simple web interface that enables input of query in the form of accession numbers/ID codes, choice of utilities and selection of fields/subfields of data by the users.

Conclusions

BDE thus provides a common data extraction platform for multiple databases and is useful to both, novice and expert users. BDE, however, is not a substitute to basic keyword-based database searches. Desired subsets of data, compiled using BDE can be subsequently used for downstream processing, analyses and knowledge discovery.

Availability

BDE can be accessed from http://bioinfo.net.in/BioDB/Home.html.",2015-10-28 +31009074,α-synuclein oligomers enhance astrocyte-induced synapse formation through TGF-β1 signaling in a Parkinson's disease model.,"Parkinson's disease (PD) is characterized by selective death of dopaminergic neurons in the substantia nigra, degeneration of the nigrostriatal pathway, increases in glutamatergic synapses in the striatum and aggregation of α-synuclein. Evidence suggests that oligomeric species of α-synuclein (αSO) are the genuine neurotoxins of PD. Although several studies have supported the direct neurotoxic effects of αSO on neurons, their effects on astrocytes have not been directly addressed. Astrocytes are essential to several steps of synapse formation and function, including secretion of synaptogenic factors, control of synaptic elimination and stabilization, secretion of neural/glial modulators, and modulation of extracellular ions, and neurotransmitter levels in the synaptic cleft. Here, we show that αSO induced the astrocyte reactivity and enhanced the synaptogenic capacity of human and murine astrocytes by increasing the levels of the known synaptogenic molecule transforming growth factor beta 1 (TGF-β1). Moreover, intracerebroventricular injection of αSO in mice increased the number of astrocytes, the density of excitatory synapses, and the levels of TGF-β1 in the striatum of injected animals. Inhibition of TGF-β1 signaling impaired the effect of the astrocyte-conditioned medium on glutamatergic synapse formation in vitro and on striatal synapse formation in vivo, whereas addition of TGF-β1 protected mesencephalic neurons against synapse loss triggered by αSO. Together, our data suggest that αSO have important effects on astrocytic functions and describe TGF-β1 as a new endogenous astrocyte-derived molecule involved in the increase in striatal glutamatergic synaptic density present in early stages of PD. OPEN SCIENCE BADGES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/. Cover Image for this issue: doi: 10.1111/jnc.14514.",2019-07-01 +30295699,sRNAPrimerDB: comprehensive primer design and search web service for small non-coding RNAs.,"

Motivation

Small non-coding RNAs (ncRNAs), especially microRNAs (miRNAs) and piwi-interacting RNAs (piRNAs), play key roles in many biological processes. However, only a few tools can be used to develop the optimal primer or probe design for the expression profile of small ncRNAs. Here, we developed sRNAPrimerDB, the first automated primer designing and query web service for small ncRNAs.

Results

The primer online designing module of sRNAPrimerDB is composed of primer design algorithms and quality evaluation of the polymerase chain reaction (PCR) primer. Five types of primers, namely, generic or specific reverse transcription primers, specific PCR primers pairs, TaqMan probe, double-hairpin probe and hybridization probe for different small ncRNA detection methods, can be designed and searched using this service. The quality of PCR primers is further evaluated using melting temperature, primer dimer, hairpin structure and specificity. Moreover, the sequence and size of each amplicon are also provided for the subsequent experiment verification. At present, 531 306 and 2 941 669 primer pairs exist across 223 species for miRNAs and piRNAs, respectively, according to sRNAPrimerDB. Several primers designed by sRNAPrimerDB are further successfully validated by subsequent experiments.

Availability and implementation

sRNAPrimerDB is a valuable platform that can be used to detect small ncRNAs. This module can be publicly accessible at http://www.srnaprimerdb.com or http://123.57.239.141.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +28365718,VerSeDa: vertebrate secretome database. ,"Based on the current tools, de novo secretome (full set of proteins secreted by an organism) prediction is a time consuming bioinformatic task that requires a multifactorial analysis in order to obtain reliable in silico predictions. Hence, to accelerate this process and offer researchers a reliable repository where secretome information can be obtained for vertebrates and model organisms, we have developed VerSeDa (Vertebrate Secretome Database). This freely available database stores information about proteins that are predicted to be secreted through the classical and non-classical mechanisms, for the wide range of vertebrate species deposited at the NCBI, UCSC and ENSEMBL sites. To our knowledge, VerSeDa is the only state-of-the-art database designed to store secretome data from multiple vertebrate genomes, thus, saving an important amount of time spent in the prediction of protein features that can be retrieved from this repository directly. VerSeDa is freely available at http://genomics.cicbiogune.es/VerSeDa/index.php.",2017-01-01 +31510679,SCRIBER: accurate and partner type-specific prediction of protein-binding residues from proteins sequences.,"

Motivation

Accurate predictions of protein-binding residues (PBRs) enhances understanding of molecular-level rules governing protein-protein interactions, helps protein-protein docking and facilitates annotation of protein functions. Recent studies show that current sequence-based predictors of PBRs severely cross-predict residues that interact with other types of protein partners (e.g. RNA and DNA) as PBRs. Moreover, these methods are relatively slow, prohibiting genome-scale use.

Results

We propose a novel, accurate and fast sequence-based predictor of PBRs that minimizes the cross-predictions. Our SCRIBER (SeleCtive pRoteIn-Binding rEsidue pRedictor) method takes advantage of three innovations: comprehensive dataset that covers multiple types of binding residues, novel types of inputs that are relevant to the prediction of PBRs, and an architecture that is tailored to reduce the cross-predictions. The dataset includes complete protein chains and offers improved coverage of binding annotations that are transferred from multiple protein-protein complexes. We utilize innovative two-layer architecture where the first layer generates a prediction of protein-binding, RNA-binding, DNA-binding and small ligand-binding residues. The second layer re-predicts PBRs by reducing overlap between PBRs and the other types of binding residues produced in the first layer. Empirical tests on an independent test dataset reveal that SCRIBER significantly outperforms current predictors and that all three innovations contribute to its high predictive performance. SCRIBER reduces cross-predictions by between 41% and 69% and our conservative estimates show that it is at least 3 times faster. We provide putative PBRs produced by SCRIBER for the entire human proteome and use these results to hypothesize that about 14% of currently known human protein domains bind proteins.

Availability and implementation

SCRIBER webserver is available at http://biomine.cs.vcu.edu/servers/SCRIBER/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-07-01 +30616521,GeNeCK: a web server for gene network construction and visualization.,"

Background

Reverse engineering approaches to infer gene regulatory networks using computational methods are of great importance to annotate gene functionality and identify hub genes. Although various statistical algorithms have been proposed, development of computational tools to integrate results from different methods and user-friendly online tools is still lagging.

Results

We developed a web server that efficiently constructs gene networks from expression data. It allows the user to use ten different network construction methods (such as partial correlation-, likelihood-, Bayesian- and mutual information-based methods) and integrates the resulting networks from multiple methods. Hub gene information, if available, can be incorporated to enhance performance.

Conclusions

GeNeCK is an efficient and easy-to-use web application for gene regulatory network construction. It can be accessed at http://lce.biohpc.swmed.edu/geneck .",2019-01-07 +29343101,MACON: a web tool for computing DNA methylation data obtained by the Illumina Infinium Human DNA methylation BeadArray.,"AIM:Bioinformatics analysis for Illumina Infinium Human DNA methylation BeadArray is essential, but still remains difficult task for many experimental researchers. We here aimed to develop a browser-accessible bioinformatics tool for analyzing the BeadArray data. MATERIALS & METHODS:The tool was established as an analytical pipeline using R, Perl and Python programming languages. RESULTS:We introduced a method that groups neighboring probes into a genomic block, which facilitated efficient identification of densely methylated/unmethylated regions. The tool, MACON, provided probe filtering, β-mixture quantile normalization, grouping into genomic blocks, annotation and production of a data subset. CONCLUSION:MACON allows researchers to analyze the BeadArray data using a web browser ( http://epigenome.ncc.go.jp/macon ).",2018-01-18 +31257983,Bringing EEG Back to the Future: Use of cEEG in Neurocritical Care.,"Continuous EEG Is Associated With Favorable Hospitalization Outcomes for Critically Ill Patients. Hill CE, Blank LJ, Thibault D, et al. Willis Neurology. 2018. doi: https://doi.org/10.1212/WNL.0000000000006689 Objective: To characterize continuous electroencephalography (cEEG) use patterns in the critically ill and to determine the association with hospitalization outcomes for specific diagnoses. METHODS:We performed a retrospective cross-sectional study with National Inpatient Sample data from 2004 to 2013. We sampled hospitalized adult patients who received intensive care and then compared patients who underwent cEEG to those who did not. We considered diagnostic subgroups of seizure/status epilepticus, subarachnoid or intracerebral hemorrhage, and altered consciousness. Outcomes were in-hospital mortality, hospitalization cost, and length of stay. RESULTS:In total, 7 102 399 critically ill patients were identified, of whom 22 728 received cEEG. From 2004 to 2013, the proportion of patients who received cEEG increased from 0.06% (95% confidence interval [CI]: 0.03%-0.09%) to 0.80% (95% CI: 0.62%-0.98%). While the cEEG cohort appeared more ill, cEEG use was associated with reduced in-hospital mortality after adjustment for patient and hospital characteristics (odds ratio [OR]: 0.83, 95% CI: 0.75-0.93, P < .001). This finding held for the diagnoses of subarachnoid or intracerebral hemorrhage and for altered consciousness, but not for the seizure/status epilepticus subgroup. Cost and length of hospitalization were increased for the cEEG cohort (OR: 1.17 and 1.11, respectively, P < .001). CONCLUSIONS:There was a >10-fold increase in cEEG use from 2004 to 2013. However, this procedure may still be underused; cEEG was associated with lower in-hospital mortality but used for only 0.3% of the critically ill population. While administrative claims analysis supports the utility of cEEG for critically ill patients, our findings suggest variable benefit by diagnosis, and investigation with greater clinical detail is warranted.",2019-06-30 +26586237,An affinity-structure database of helix-turn-helix: DNA complexes with a universal coordinate system.,"

Background

Molecular interactions between proteins and DNA molecules underlie many cellular processes, including transcriptional regulation, chromosome replication, and nucleosome positioning. Computational analyses of protein-DNA interactions rely on experimental data characterizing known protein-DNA interactions structurally and biochemically. While many databases exist that contain either structural or biochemical data, few integrate these two data sources in a unified fashion. Such integration is becoming increasingly critical with the rapid growth of structural and biochemical data, and the emergence of algorithms that rely on the synthesis of multiple data types to derive computational models of molecular interactions.

Description

We have developed an integrated affinity-structure database in which the experimental and quantitative DNA binding affinities of helix-turn-helix proteins are mapped onto the crystal structures of the corresponding protein-DNA complexes. This database provides access to: (i) protein-DNA structures, (ii) quantitative summaries of protein-DNA binding affinities using position weight matrices, and (iii) raw experimental data of protein-DNA binding instances. Critically, this database establishes a correspondence between experimental structural data and quantitative binding affinity data at the single basepair level. Furthermore, we present a novel alignment algorithm that structurally aligns the protein-DNA complexes in the database and creates a unified residue-level coordinate system for comparing the physico-chemical environments at the interface between complexes. Using this unified coordinate system, we compute the statistics of atomic interactions at the protein-DNA interface of helix-turn-helix proteins. We provide an interactive website for visualization, querying, and analyzing this database, and a downloadable version to facilitate programmatic analysis.

Conclusions

This database will facilitate the analysis of protein-DNA interactions and the development of programmatic computational methods that capitalize on integration of structural and biochemical datasets. The database can be accessed at http://ProteinDNA.hms.harvard.edu.",2015-11-19 +25028489,A system-level model for the microbial regulatory genome.,"Microbes can tailor transcriptional responses to diverse environmental challenges despite having streamlined genomes and a limited number of regulators. Here, we present data-driven models that capture the dynamic interplay of the environment and genome-encoded regulatory programs of two types of prokaryotes: Escherichia coli (a bacterium) and Halobacterium salinarum (an archaeon). The models reveal how the genome-wide distributions of cis-acting gene regulatory elements and the conditional influences of transcription factors at each of those elements encode programs for eliciting a wide array of environment-specific responses. We demonstrate how these programs partition transcriptional regulation of genes within regulons and operons to re-organize gene-gene functional associations in each environment. The models capture fitness-relevant co-regulation by different transcriptional control mechanisms acting across the entire genome, to define a generalized, system-level organizing principle for prokaryotic gene regulatory networks that goes well beyond existing paradigms of gene regulation. An online resource (http://egrin2.systemsbiology.net) has been developed to facilitate multiscale exploration of conditional gene regulation in the two prokaryotes.",2014-07-15 +30342418,Creation of an empiric tool to predict ECMO deployment in pediatric respiratory or cardiac failure.,"

Purpose

To create a real-time prediction tool to predict probability of ECMO deployment in children with cardiac or pulmonary failure.

Materials and methods

Patients ≤18 years old admitted to an ICU that participated in the Virtual Pediatric Systems database (2009-2015) were included. Logistic regression models using adaptive lasso methodology were used to identify independent factors associated with ECMO use.

Results

A total of 538,202 ICU patients from 140 ICUs qualified for inclusion. ECMO was deployed in 3484 patients (0.6%) with a mortality of 1450 patients (41.6%). The factors associated with increased probability of ECMO use included: younger age, pulmonary hypertension, congenital heart disease, high-complexity cardiac surgery, cardiomyopathy, acute lung injury, shock, renal failure, cardiac arrest, use of nitric oxide, use of either conventional mechanical ventilation or high frequency oscillatory ventilation, and higher annual ECMO center volume. The area under the receiver operating curve for this model was 0.90 (95% CI: 0.85-0.93). This tool can be accessed at https://soipredictiontool.shinyapps.io/ECMORisk/.

Conclusions

Here, we present a tool to predict ECMO deployment among critically ill children; this tool will help create real-time risk stratification among critically ill children, and it will help with benchmarking, family counseling, and research.",2018-10-12 +30674925,"Smooth Muscle Transcriptome Browser: offering genome-wide references and expression profiles of transcripts expressed in intestinal SMC, ICC, and PDGFRα+ cells.","Transcriptome data on the quantitative numbers of transcriptional variants expressed in primary cells offer essential clues into specific cellular functions and biological processes. We have previously collected transcriptomes from primary smooth muscle cells (SMC), interstitial cells of Cajal (ICC), and PDGFRα+ cells (fibroblast-like cells) isolated from murine jejunal and colonic smooth muscle and/or mucosal tissues as well as transcriptomes from the associated tissues (jejunal smooth muscle, colonic smooth muscle, and colonic mucosa). In this study, we have built the Smooth Muscle Transcriptome Browser (SMTB), https://med.unr.edu/physio/transcriptome , a web-based, graphical user interface that offers genetic references and expression profiles of all transcripts expressed at both the cellular (SMC, ICC, and PDGFRα+ cells) and tissue level (smooth muscle and mucosal tissue). This browser brings new insights into the cellular and biological functions of the cell types in gastrointestinal smooth muscle biology.",2019-01-23 +28961740,EMHP: an accurate automated hole masking algorithm for single-particle cryo-EM image processing.,"

Summary

The Electron Microscopy Hole Punch (EMHP) is a streamlined suite of tools for quick assessment, sorting and hole masking of electron micrographs. With recent advances in single-particle electron cryo-microscopy (cryo-EM) data processing allowing for the rapid determination of protein structures using a smaller computational footprint, we saw the need for a fast and simple tool for data pre-processing that could run independent of existing high-performance computing (HPC) infrastructures. EMHP provides a data preprocessing platform in a small package that requires minimal python dependencies to function.

Availability and implementation

https://www.bitbucket.org/chazbot/emhp Apache 2.0 License.

Contact

bowman@scripps.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +29194469,bioBakery: a meta'omic analysis environment.,"

Summary

bioBakery is a meta'omic analysis environment and collection of individual software tools with the capacity to process raw shotgun sequencing data into actionable microbial community feature profiles, summary reports, and publication-ready figures. It includes a collection of pre-configured analysis modules also joined into workflows for reproducibility.

Availability and implementation

bioBakery (http://huttenhower.sph.harvard.edu/biobakery) is publicly available for local installation as individual modules and as a virtual machine image. Each individual module has been developed to perform a particular task (e.g. quantitative taxonomic profiling or statistical analysis), and they are provided with source code, tutorials, demonstration data, and validation results; the bioBakery virtual image includes the entire suite of modules and their dependencies pre-installed. Images are available for both Amazon EC2 and Google Compute Engine. All software is open source under the MIT license. bioBakery is actively maintained with a support group at biobakery-users@googlegroups.com and new tools being added upon their release.

Contact

chuttenh@hsph.harvard.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-04-01 +29284660,The ModERN Resource: Genome-Wide Binding Profiles for Hundreds of Drosophila and Caenorhabditis elegans Transcription Factors.,"To develop a catalog of regulatory sites in two major model organisms, Drosophila melanogaster and Caenorhabditis elegans, the modERN (model organism Encyclopedia of Regulatory Networks) consortium has systematically assayed the binding sites of transcription factors (TFs). Combined with data produced by our predecessor, modENCODE (Model Organism ENCyclopedia Of DNA Elements), we now have data for 262 TFs identifying 1.23 M sites in the fly genome and 217 TFs identifying 0.67 M sites in the worm genome. Because sites from different TFs are often overlapping and tightly clustered, they fall into 91,011 and 59,150 regions in the fly and worm, respectively, and these binding sites span as little as 8.7 and 5.8 Mb in the two organisms. Clusters with large numbers of sites (so-called high occupancy target, or HOT regions) predominantly associate with broadly expressed genes, whereas clusters containing sites from just a few factors are associated with genes expressed in tissue-specific patterns. All of the strains expressing GFP-tagged TFs are available at the stock centers, and the chromatin immunoprecipitation sequencing data are available through the ENCODE Data Coordinating Center and also through a simple interface (http://epic.gs.washington.edu/modERN/) that facilitates rapid accessibility of processed data sets. These data will facilitate a vast number of scientific inquiries into the function of individual TFs in key developmental, metabolic, and defense and homeostatic regulatory pathways, as well as provide a broader perspective on how individual TFs work together in local networks and globally across the life spans of these two key model organisms.",2017-12-28 +27586241,"Association of Dietary Intake and Biomarker Levels of Arsenic, Cadmium, Lead, and Mercury among Asian Populations in the United States: NHANES 2011-2012.","

Background

We have recently shown that biomarker levels of selected metals are higher in Asians than in other U.S. ethnic groups, with important differences within selected Asian subgroups. Much of this difference may be dietary in origin; however, this is not well established.

Objective

We evaluated dietary intake of toxic metals as a source of increased biomarker levels of metals among U.S. Asians.

Methods

We estimated daily food consumption and dietary intake of arsenic, cadmium, lead, and mercury by combining 24-hr dietary intake recall data from the 2011-2012 National Health and Nutrition Examination Survey (NHANES) with data from the USDA Food Composition Intake Database and FDA Total Dietary Study. We analyzed associations between dietary metal intake and biomarker levels of the metals using linear regression. Further, estimated food consumption and metal intake levels were compared between Asians and other racial/ethnic groups (white, black, Mexican American, and other Hispanic) and within three Asian subgroups (Chinese, Indian Asian, and other Asians).

Results

Significant associations (p < 0.05) were found between biomarker levels and estimated dietary metal intake for total and inorganic arsenic and mercury among Asians. Asians had the highest daily fish and rice consumption across the racial/ethnic groups. Fish was the major contributor to dietary mercury and total arsenic intake, whereas rice was the major contributor to inorganic arsenic dietary intake. Fish consumption across the Asian subgroups varied, with Asian Indians having lower fish consumption than the other Asian subgroups. Rice consumption was similar across the Asian subgroups.

Conclusions

We confirmed that estimated dietary intake of arsenic (total and inorganic) and mercury is significantly associated with their corresponding biomarkers in U.S. Asians, using nationally representative data. In contrast, estimated dietary intake of cadmium and lead were not significantly associated with their corresponding biomarker levels in U.S. Asians. Citation: Awata H, Linder S, Mitchell LE, Delclos GL. 2017. Association of dietary intake and biomarker levels of arsenic, cadmium, lead, and mercury among Asian populations in the United States: NHANES 2011-2012. Environ Health Perspect 125:314-323; http://dx.doi.org/10.1289/EHP28.",2016-09-02 +,Disease Models for Event Prediction,"

Objective

The objective of this manuscript is to present a systematic review of biosurveillance models that operate on select agents and can forecast the occurrence of a disease event.

Introduction

One of the primary goals of this research was to characterize the viability of biosurveillance models to provide operationally relevant information to decision makers, in order to identify areas for future research. Two critical characteristics differentiate this work from other infectious disease modeling reviews [1,2]. First, we reviewed models that attempted to predict the disease event, not merely its transmission dynamics. Second, we considered models involving pathogens of concern as determined by the US National Select Agent Registry. Background: A rich and diverse field of infectious disease modeling has emerged over the past 60 years and has advanced our understanding of population- and individual-level disease transmission dynamics, including risk factors, virulence and spatio-temporal patterns of disease spread. Recent modeling advances include biostatistical methods, and massive agent-based population, biophysical, ordinary differential equation, and ecological-niche models. Diverse data sources are being integrated into these models as well, such as demographics, remotely-sensed measurements and imaging, environmental measurements, and surrogate data such as news alerts and social media. Yet, there remains a gap in the sensitivity and specificity of these models not only in tracking infectious disease events but also predicting their occurrence.

Methods

We searched dozens of commercial and government databases and harvested Google search results for eligible models utilizing terms and phrases provided by public health analysts relating to biosurveillance, remote sensing, risk assessments, spatial epidemiology, and ecological niche-modeling, This returned 13,767 webpages and 12,152 citations. After de-duplication and removal of extraneous material, a core collection of 6,503 items was established, these publications and their abstracts are presented in a semantic wiki at http://BioCat.pnnl.gov. Next, PNNL’s IN-SPIRE visual analytics software was used to cross-correlate these publications with the definition for a biosurveillance model. As a result, we systematically reviewed 44 papers, and the results are presented in this analysis.

Results

The models were classified as one or more of the following types: event forecast (9%), spatial (59%), ecological niche (64%), diagnostic or clinical (14%), spread or response (20%), and reviews (7%). The distribution of transmission modes in the models was: direct contact (55%), vector-borne (34%), water- or soil-borne (16%), and non-specific (7%). The parameters (e.g., etiology, cultural) and data sources (e.g., remote sensing, NGO, epidemiological) for each model were recorded. A highlight of this review is the analysis of verification and validation procedures employed by (and reported for) each model, if any. All models were classified as either a) Verified or Validated (89%), or b) Not Verified or Validated (11%; which for the purposes of this review was considered a standalone category).

Conclusions

The verification and validation (V&V) of these models is discussed in detail. The vast majority of models studied were verified or validated in some form or another, which was a surprising observation made from this portion of the study. We subsequently focused on those models which were not verified or validated in an attempt to identify why this information was missing. One reason may be that the V&V was simply not reported upon within the paper reviewed for those models. A positive observation was the significant use of real epidemiological data to validate the models. Even though ‘Validation using Spatially and Temporally Independent Data’ was one of the smallest classification groups, validation through the use of actual data versus predicted data represented approximately 33% of these models. We close with initial recommended operational readiness level guidelines, based on established Technology Readiness Level definitions.",2013-01-01 +29850785,GateFinder: projection-based gating strategy optimization for flow and mass cytometry.,"

Motivation

High-parameter single-cell technologies can reveal novel cell populations of interest, but studying or validating these populations using lower-parameter methods remains challenging.

Results

Here, we present GateFinder, an algorithm that enriches high-dimensional cell types with simple, stepwise polygon gates requiring only two markers at a time. A series of case studies of complex cell types illustrates how simplified enrichment strategies can enable more efficient assays, reveal novel biomarkers and clarify underlying biology.

Availability and implementation

The GateFinder algorithm is implemented as a free and open-source package for BioConductor: https://nalab.stanford.edu/gatefinder.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +31311505,miRFA: an automated pipeline for microRNA functional analysis with correlation support from TCGA and TCPA expression data in pancreatic cancer.,"

Background

MicroRNAs (miRNAs) are small RNAs that regulate gene expression at a post-transcriptional level and are emerging as potentially important biomarkers for various disease states, including pancreatic cancer. In silico-based functional analysis of miRNAs usually consists of miRNA target prediction and functional enrichment analysis of miRNA targets. Since miRNA target prediction methods generate a large number of false positive target genes, further validation to narrow down interesting candidate miRNA targets is needed. One commonly used method correlates miRNA and mRNA expression to assess the regulatory effect of a particular miRNA. The aim of this study was to build a bioinformatics pipeline in R for miRNA functional analysis including correlation analyses between miRNA expression levels and its targets on mRNA and protein expression levels available from the cancer genome atlas (TCGA) and the cancer proteome atlas (TCPA). TCGA-derived expression data of specific mature miRNA isoforms from pancreatic cancer tissue was used.

Results

Fifteen circulating miRNAs with significantly altered expression levels detected in pancreatic cancer patients were queried separately in the pipeline. The pipeline generated predicted miRNA target genes, enriched gene ontology (GO) terms and Kyoto encyclopedia of genes and genomes (KEGG) pathways. Predicted miRNA targets were evaluated by correlation analyses between each miRNA and its predicted targets. MiRNA functional analysis in combination with Kaplan-Meier survival analysis suggest that hsa-miR-885-5p could act as a tumor suppressor and should be validated as a potential prognostic biomarker in pancreatic cancer.

Conclusions

Our miRNA functional analysis (miRFA) pipeline can serve as a valuable tool in biomarker discovery involving mature miRNAs associated with pancreatic cancer and could be developed to cover additional cancer types. Results for all mature miRNAs in TCGA pancreatic adenocarcinoma dataset can be studied and downloaded through a shiny web application at https://emmbor.shinyapps.io/mirfa/ .",2019-07-16 +32082769,Machine learning techniques in cardiac risk assessment.,"

Background

The objective of this study was to predict the mortality risk of patients during or shortly after cardiac surgery by using machine learning techniques and their learning abilities from collected data.

Methods

The dataset was obtained from Acıbadem Maslak Hospital. Risk factors of the European System for Cardiac Operative Risk Evaluation (EuroSCORE) were used to predict mortality risk. First, Standard EuroSCORE scores of patients were calculated and risk groups were determined, because 30-day follow-up information of patients was not available in the dataset. Models were created with five different machine learning algorithms and two different datasets including age, serum creatinine, left ventricular dysfunction, and pulmonary hypertension were numeric in Dataset 1 and categorical in Dataset 2. Model performance evaluation was performed with 10-fold cross-validation.

Results

Data analysis and performance evaluation were performed with R, RStudio and Shiny. C4.5 was selected as the best algorithm for risk prediction (accuracy= 0.989) in Dataset 1. This model indicated that pulmonary hypertension, recent myocardial infarct, surgery on thoracic aorta are the primary three risk factors that affect the mortality risk of patients during or shortly after cardiac surgery. Also, this model is used to develop a dynamic web application which is also accessible from mobile devices (https://elifkartal.shinyapps.io/euSCR/).

Conclusion

The C4.5 decision tree model was identified as having the highest performance in Dataset 1 in predicting the mortality risk of patients. Using the numerical values of the risk factors can be useful in increasing the performance of machine learning models. Development of hospital-specific local assessment systems using hospital data, such as the application in this study, would be beneficial for both patients and doctors.",2018-07-03 +30953700,"""CodonWizard"" - An intuitive software tool with graphical user interface for customizable codon optimization in protein expression efforts.","Optimization of coding sequences to maximize protein expression yield is often outsourced to external service providers during commercial gene synthesis and thus unfortunately remains a black box for many researchers. The presented software program ""CodonWizard"" offers scientists a powerful but easy-to-use tool for customizable codon optimization: The intuitive graphical user interface empowers even scientists inexperienced in the art to straightforward design, modify, test and save complex codon optimization strategies and to publicly share successful otimization strategies among the scientific community. ""Codon Wizard"" provides highly flexible features for sequence analysis and completely customizable modification/optimization of codon usage of any given input sequence data (DNA/RNA/peptide) using freely combinable algorithms, allowing for implementation of contemporary, well-established optimization strategies as well as novel, proprietary ones alike. Contrary to comparable tools, ""Codon Wizard"" thus finally opens up ways for an empirical approach to codon optimization and may also >be used completely offline to protect resulting intellectual property. As a benchmark, the reliability, intuitiveness and utility of the application could be demonstrated by increasing the yield of recombinant TEV-protease expressed in E. coli by several orders of magnitude after codon optimization using ""CodonWizard"" - Permanently available for download on the web at http://schwalbe.org.chemie.uni-frankfurt.de/node/3324.",2019-04-04 +27508329,Connection Map for Compounds (CMC): A Server for Combinatorial Drug Toxicity and Efficacy Analysis.,"Drug discovery and development is a costly and time-consuming process with a high risk for failure resulting primarily from a drug's associated clinical safety and efficacy potential. Identifying and eliminating inapt candidate drugs as early as possible is an effective way for reducing unnecessary costs, but limited analytical tools are currently available for this purpose. Recent growth in the area of toxicogenomics and pharmacogenomics has provided with a vast amount of drug expression microarray data. Web servers such as CMap and LTMap have used this information to evaluate drug toxicity and mechanisms of action independently; however, their wider applicability has been limited by the lack of a combinatorial drug-safety type of analysis. Using available genome-wide drug transcriptional expression profiles, we developed the first web server for combinatorial evaluation of toxicity and efficacy of candidate drugs named ""Connection Map for Compounds"" (CMC). Using CMC, researchers can initially compare their query drug gene signatures with prebuilt gene profiles generated from two large-scale toxicogenomics databases, and subsequently perform a drug efficacy analysis for identification of known mechanisms of drug action or generation of new predictions. CMC provides a novel approach for drug repositioning and early evaluation in drug discovery with its unique combination of toxicity and efficacy analyses, expansibility of data and algorithms, and customization of reference gene profiles. CMC can be freely accessed at http://cadd.tongji.edu.cn/webserver/CMCbp.jsp .",2016-08-19 +30509450,Manifesto for a European research network into Problematic Usage of the Internet.,"The Internet is now all-pervasive across much of the globe. While it has positive uses (e.g. prompt access to information, rapid news dissemination), many individuals develop Problematic Use of the Internet (PUI), an umbrella term incorporating a range of repetitive impairing behaviours. The Internet can act as a conduit for, and may contribute to, functionally impairing behaviours including excessive and compulsive video gaming, compulsive sexual behaviour, buying, gambling, streaming or social networks use. There is growing public and National health authority concern about the health and societal costs of PUI across the lifespan. Gaming Disorder is being considered for inclusion as a mental disorder in diagnostic classification systems, and was listed in the ICD-11 version released for consideration by Member States (http://www.who.int/classifications/icd/revision/timeline/en/). More research is needed into disorder definitions, validation of clinical tools, prevalence, clinical parameters, brain-based biology, socio-health-economic impact, and empirically validated intervention and policy approaches. Potential cultural differences in the magnitudes and natures of types and patterns of PUI need to be better understood, to inform optimal health policy and service development. To this end, the EU under Horizon 2020 has launched a new four-year European Cooperation in Science and Technology (COST) Action Programme (CA 16207), bringing together scientists and clinicians from across the fields of impulsive, compulsive, and addictive disorders, to advance networked interdisciplinary research into PUI across Europe and beyond, ultimately seeking to inform regulatory policies and clinical practice. This paper describes nine critical and achievable research priorities identified by the Network, needed in order to advance understanding of PUI, with a view towards identifying vulnerable individuals for early intervention. The network shall enable collaborative research networks, shared multinational databases, multicentre studies and joint publications.",2018-10-10 +30305317,Structure of the Recombinant Neisseria gonorrhoeae Adhesin Complex Protein (rNg-ACP) and Generation of Murine Antibodies with Bactericidal Activity against Gonococci.,"Neisseria gonorrhoeae (gonococcus [Ng]) is the causative organism of the sexually transmitted disease gonorrhoea, and no effective vaccine exists currently. In this study, the structure, biological properties, and vaccine potential of the Ng-adhesin complex protein (Ng-ACP) are presented. The crystal structure of recombinant Ng-ACP (rNg-ACP) protein was solved at 1.65 Å. Diversity and conservation of Ng-ACP were examined in different Neisseria species and gonococcal isolates (https://pubmlst.org/neisseria/ database) in silico, and protein expression among 50 gonococcal strains in the Centers for Disease Control and Prevention/Food and Drug Administration (CDCP/FDA) AR Isolate Bank was examined by Western blotting. Murine antisera were raised to allele 10 (strain P9-17)-encoded rNg-ACP protein with different adjuvants and examined by enzyme-linked immunosorbent assay (ELISA), Western blotting, and a human serum bactericidal assay. Rabbit antiserum to rNg-ACP was tested for its ability to prevent Ng-ACP from inhibiting human lysozyme activity in vitro. Ng-ACP is structurally homologous to Neisseria meningitidis ACP and MliC/PliC lysozyme inhibitors. Gonococci expressed predominantly allele 10- and allele 6-encoded Ng-ACP (81% and 15% of isolates, respectively). Murine antisera were bactericidal (titers of 64 to 512, P < 0.05) for the homologous P9-17 strain and heterologous (allele 6) FA1090 strain. Rabbit anti-rNg-ACP serum prevented Ng-ACP from inhibiting human lysozyme with ∼100% efficiency. Ng-ACP protein was expressed by all 50 gonococcal isolates examined with minor differences in the relative levels of expression. rNg-ACP is a potential vaccine candidate that induces antibodies that (i) are bactericidal and (ii) prevent the gonococcus from inhibiting the lytic activity of an innate defense molecule.IMPORTANCE Neisseria gonorrhoeae (gonococcus [Ng]) is the causative organism of the sexually transmitted disease gonorrhoea, and the organism is listed by the World Health Organization as a high-priority pathogen for research and development of new control measures, including vaccines. In this study, we demonstrated that the N. gonorrhoeae adhesin complex protein (Ng-ACP) was conserved and expressed by 50 gonococcal strains and that recombinant proteins induced antibodies in mice that killed the bacteria in vitro We determined the structure of Ng-ACP by X-ray crystallography and investigated structural conservation with Neisseria meningitidis ACP and MliC/PliC proteins from other bacteria which act as inhibitors of the human innate defense molecule lysozyme. These findings are important and suggest that Ng-ACP could provide a potential dual target for tackling gonococcal infections.",2018-10-10 +27347102,Identification of genes associated with renal cell carcinoma using gene expression profiling analysis.,"Renal cell carcinoma (RCC) is the most common type of kidney cancer in adults and accounts for ~80% of all kidney cancer cases. However, the pathogenesis of RCC has not yet been fully elucidated. To interpret the pathogenesis of RCC at the molecular level, gene expression data and bio-informatics methods were used to identify RCC associated genes. Gene expression data was downloaded from Gene Expression Omnibus (GEO) database and identified differentially coexpressed genes (DCGs) and dysfunctional pathways in RCC patients compared with controls. In addition, a regulatory network was constructed using the known regulatory data between transcription factors (TFs) and target genes in the University of California Santa Cruz (UCSC) Genome Browser (http://genome.ucsc.edu) and the regulatory impact factor of each TF was calculated. A total of 258,0427 pairs of DCGs were identified. The regulatory network contained 1,525 pairs of regulatory associations between 126 TFs and 1,259 target genes and these genes were mainly enriched in cancer pathways, ErbB and MAPK. In the regulatory network, the 10 most strongly associated TFs were FOXC1, GATA3, ESR1, FOXL1, PATZ1, MYB, STAT5A, EGR2, EGR3 and PELP1. GATA3, ERG and MYB serve important roles in RCC while FOXC1, ESR1, FOXL1, PATZ1, STAT5A and PELP1 may be potential genes associated with RCC. In conclusion, the present study constructed a regulatory network and screened out several TFs that may be used as molecular biomarkers of RCC. However, future studies are needed to confirm the findings of the present study.",2016-05-16 +30305653,GOGO: An improved algorithm to measure the semantic similarity between gene ontology terms.,"Measuring the semantic similarity between Gene Ontology (GO) terms is an essential step in functional bioinformatics research. We implemented a software named GOGO for calculating the semantic similarity between GO terms. GOGO has the advantages of both information-content-based and hybrid methods, such as Resnik's and Wang's methods. Moreover, GOGO is relatively fast and does not need to calculate information content (IC) from a large gene annotation corpus but still has the advantage of using IC. This is achieved by considering the number of children nodes in the GO directed acyclic graphs when calculating the semantic contribution of an ancestor node giving to its descendent nodes. GOGO can calculate functional similarities between genes and then cluster genes based on their functional similarities. Evaluations performed on multiple pathways retrieved from the saccharomyces genome database (SGD) show that GOGO can accurately and robustly cluster genes based on functional similarities. We release GOGO as a web server and also as a stand-alone tool, which allows convenient execution of the tool for a small number of GO terms or integration of the tool into bioinformatics pipelines for large-scale calculations. GOGO can be freely accessed or downloaded from http://dna.cs.miami.edu/GOGO/ .",2018-10-10 +22434833,MEDIC: a practical disease vocabulary used at the Comparative Toxicogenomics Database.,"The Comparative Toxicogenomics Database (CTD) is a public resource that promotes understanding about the effects of environmental chemicals on human health. CTD biocurators manually curate a triad of chemical-gene, chemical-disease and gene-disease relationships from the scientific literature. The CTD curation paradigm uses controlled vocabularies for chemicals, genes and diseases. To curate disease information, CTD first had to identify a source of controlled terms. Two resources seemed to be good candidates: the Online Mendelian Inheritance in Man (OMIM) and the 'Diseases' branch of the National Library of Medicine's Medical Subject Headers (MeSH). To maximize the advantages of both, CTD biocurators undertook a novel initiative to map the flat list of OMIM disease terms into the hierarchical nature of the MeSH vocabulary. The result is CTD's 'merged disease vocabulary' (MEDIC), a unique resource that integrates OMIM terms, synonyms and identifiers with MeSH terms, synonyms, definitions, identifiers and hierarchical relationships. MEDIC is both a deep and broad vocabulary, composed of 9700 unique diseases described by more than 67 000 terms (including synonyms). It is freely available to download in various formats from CTD. While neither a true ontology nor a perfect solution, this vocabulary has nonetheless proved to be extremely successful and practical for our biocurators in generating over 2.5 million disease-associated toxicogenomic relationships in CTD. Other external databases have also begun to adopt MEDIC for their disease vocabulary. Here, we describe the construction, implementation, maintenance and use of MEDIC to raise awareness of this resource and to offer it as a putative scaffold in the formal construction of an official disease ontology. DATABASE URL: http://ctd.mdibl.org/voc.go?type=disease.",2012-03-20 +27053566,Vaxar: A Web-Based Database of Laboratory Animal Responses to Vaccinations and Its Application in the Meta-Analysis of Different Animal Responses to Tuberculosis Vaccinations.,"Animal models are indispensable for vaccine research and development. However, choosing which species to use and designing a vaccine study that is optimized for that species is often challenging. Vaxar (http://www.violinet.org/vaxar/) is a web-based database and analysis system that stores manually curated data regarding vaccine-induced responses in animals. To date, Vaxar encompasses models from 35 animal species including rodents, rabbits, ferrets, primates, and birds. These 35 species have been used to study more than 1300 experimentally tested vaccines for 164 pathogens and diseases significant to humans and domestic animals. The responses to vaccines by animals in more than 1500 experimental studies are recorded in Vaxar; these data can be used for systematic meta-analysis of various animal responses to a particular vaccine. For example, several variables, including animal strain, animal age, and the dose or route of either vaccination or challenge, might affect host response outcomes. Vaxar can also be used to identify variables that affect responses to different vaccines in a specific animal model. All data stored in Vaxar are publically available for web-based queries and analyses. Overall Vaxar provides a unique systematic approach for understanding vaccine-induced host immunity.",2016-04-01 +31477308,Marker discovery and associations with β-carotene content in Indian dairy cattle and buffalo breeds.,"Vitamin A is essential for human health, but current intake levels in many developing countries such as India are too low due to malnutrition. According to the World Health Organization, an estimated 250 million preschool children are vitamin A deficient globally. This number excludes pregnant women and nursing mothers, who are particularly vulnerable. Efforts to improve access to vitamin A are key because supplementation can reduce mortality rates in young children in developing countries by around 23%. Three key genes, BCMO1, BCO2, and SCARB1, have been shown to be associated with the amount of β-carotene (BC) in milk. Whole-genome sequencing reads from the coordinates of these 3 genes in 202 non-Indian cattle (141 Bos taurus, 61 Bos indicus) and 35 non-Indian buffalo (Bubalus bubalis) animals from several breeds were collected from data repositories. The number of SNP detected in the coding regions of these 3 genes ranged from 16 to 26 in the 3 species, with 5 overlapping SNP between B. taurus and B. indicus. All these SNP together with 2 SNP in the upstream part of the gene but already present in dbSNP (https://www.ncbi.nlm.nih.gov/projects/SNP/) were used to build a custom Sequenom array. Blood for DNA and milk samples for BC were obtained from 2,291 Indian cows of 5 different breeds (Gir, Holstein cross, Jersey Cross, Tharparkar, and Sahiwal) and 2,242 Indian buffaloes (Jafarabadi, Murrah, Pandharpuri, and Surti breeds). The DNA was extracted and genotyped with the Sequenom array. For each individual breed and the combined breeds, SNP with an association that had a P-value <0.3 in the first round of linear analysis were included in a second step of regression analyses to determine allele substitution effects to increase the content of BC in milk. Additionally, an F-test for all SNP within gene was performed with the objective of determining if overall the gene had a significant effect on the content of BC in milk. The analyses were repeated using a Bayesian approach to compare and validate the previous frequentist results. Multiple significant SNP were found using both methodologies with allele substitution effects ranging from 6.21 (3.13) to 9.10 (5.43) µg of BC per 100 mL of milk. Total gene effects exceeded the mean BC value for all breeds with both analysis approaches. The custom panel designed for genes related to BC production demonstrated applicability in genotyping of cattle and buffalo in India and may be used for cattle or buffalo from other developing countries. Moreover, the recommendation of selection for significant specific alleles of some gene markers provides a route to effectively increase the BC content in milk in the Indian cattle and buffalo populations.",2019-08-30 +30565316,"System for Quality-Assured Data Analysis: Flexible, reproducible scientific workflows.","The reproducibility of scientific processes is one of the paramount problems of bioinformatics, an engineering problem that must be addressed to perform good research. The System for Quality-Assured Data Analysis (SyQADA), described here, seeks to address reproducibility by managing many of the details of procedural bookkeeping in bioinformatics in as simple and transparent a manner as possible. SyQADA has been used by persons with backgrounds ranging from expert programmer to Unix novice, to perform and repeat dozens of diverse bioinformatics workflows on tens of thousands of samples, consuming over 80 CPU-months of computing on over 300,000 individual tasks of scores of projects on laptops, computer servers, and computing clusters. SyQADA is especially well-suited for paired-sample analyses found in cancer tumor-normal studies. SyQADA executable source code, documentation, tutorial examples, and workflows used in our lab is available from http://scheet.org/software.html.",2018-12-18 +30610412,Sequence-based classification of type II polyketide synthase biosynthetic gene clusters for antiSMASH.,"The software antiSMASH examines microbial genome data to identify and analyze biosynthetic gene clusters for a wide range of natural products. So far, type II polyketide synthase (PKS) gene clusters could only be identified, but no detailed predictions for type II PKS gene clusters could be provided. In this study, an antiSMASH module for analyzing type II PKS gene clusters has been developed. The module detects genes/proteins in the type II PKS gene cluster involved with polyketide biosynthesis and is able to make predictions about the aromatic polyketide product. Predictions include the putative starter unit, the number of malonyl elongations during polyketide biosynthesis, the putative class and the molecular weight of the product. Furthermore, putative cyclization patterns are predicted. The accuracy of the predictions generated with the new PKSII antiSMASH module was evaluated using a leave-one-out cross validation. The prediction module is available in antiSMASH version 5 at https://antismash.secondarymetabolites.org .",2019-01-04 +24274931,Deciphering the human brain proteome: characterization of the anterior temporal lobe and corpus callosum as part of the Chromosome 15-centric Human Proteome Project.,"Defining the proteomes encoded by each chromosome and characterizing proteins related to human illnesses are among the goals of the Chromosome-centric Human Proteome Project (C-HPP) and the Biology and Disease-driven HPP. Following these objectives, we investigated the proteomes of the human anterior temporal lobe (ATL) and corpus callosum (CC) collected post-mortem from eight subjects. Using a label-free GeLC-MS/MS approach, we identified 2454 proteins in the ATL and 1887 in the CC through roughly 7500 and 5500 peptides, respectively. Considering that the ATL is a gray-matter region while the CC is a white-matter region, they presented proteomes specific to their functions. Besides, 38 proteins were found to be differentially expressed between the two regions. Furthermore, the proteome data sets were classified according to their chromosomal origin, and five proteins were evidenced at the MS level for the first time. We identified 70 proteins of the chromosome 15 - one of them for the first time by MS - which were submitted to an in silico pathway analysis. These revealed branch point proteins associated with Prader-Willi and Angelman syndromes and dyskeratosis congenita, which are chromosome-15-associated diseases. Data presented here can be a useful for brain disorder studies as well as for contributing to the C-HPP initiative. Our data are publicly available as resource data to C-HPP participant groups at http://yoda.iq.ufrj.br/Daniel/chpp2013. Additionally, the mass spectrometry proteomics data have been deposited to the ProteomeXchange with identifier PXD000547 for the corpus callosum and PXD000548 for the anterior temporal lobe.",2013-12-03 +30251842,ADMETopt: A Web Server for ADMET Optimization in Drug Design via Scaffold Hopping.,"Drug-likeness, comprising absorption, distribution, metabolism, excretion, and toxicity (ADMET) properties, plays a significant role in early drug discovery. However, as for current strategies of lead optimization, in vitro potency is still the focus, which may cause ""molecular obesity"" (poor ADMET properties). Therefore, optimization of ADMET properties would be a preferable complement for drug discovery. In this paper, we present a web server, ADMETopt, that applies scaffold hopping and ADMET screening for lead optimization. More than 50 000 unique scaffolds were extracted by fragmenting chemicals deposited in the ChEMBL and Enamine databases. Up to 15 ADMET properties can be predicted to screen the potential molecules, including seven physicochemical properties and eight biological properties. All of the models were built in terms of our previous studies and are available in our web server admetSAR. For the plausibility measurement of the modified molecules, synthetic accessibility and quantitative evaluation of drug-likeness were then implemented. As a case study, a scaffold similarity network was constructed for compounds that have bioactivities on estrogen receptors. The results demonstrated that the feasibility and practicability of our web server are acceptable. The web server is publicly accessible at http://lmmd.ecust.edu.cn/admetsar2/admetopt/ .",2018-10-09 +24316575,The Vertebrate Genome Annotation browser 10 years on.,"The Vertebrate Genome Annotation (VEGA) database (http://vega.sanger.ac.uk), initially designed as a community resource for browsing manual annotation of the human genome project, now contains five reference genomes (human, mouse, zebrafish, pig and rat). Its introduction pages have been redesigned to enable the user to easily navigate between whole genomes and smaller multi-species haplotypic regions of interest such as the major histocompatibility complex. The VEGA browser is unique in that annotation is updated via the Human And Vertebrate Analysis aNd Annotation (HAVANA) update track every 2 weeks, allowing single gene updates to be made publicly available to the research community quickly. The user can now access different haplotypic subregions more easily, such as those from the non-obese diabetic mouse, and display them in a more intuitive way using the comparative tools. We also highlight how the user can browse manually annotated updated patches from the Genome Reference Consortium (GRC).",2013-12-06 +27173524,"DPTEdb, an integrative database of transposable elements in dioecious plants. ","Dioecious plants usually harbor 'young' sex chromosomes, providing an opportunity to study the early stages of sex chromosome evolution. Transposable elements (TEs) are mobile DNA elements frequently found in plants and are suggested to play important roles in plant sex chromosome evolution. The genomes of several dioecious plants have been sequenced, offering an opportunity to annotate and mine the TE data. However, comprehensive and unified annotation of TEs in these dioecious plants is still lacking. In this study, we constructed a dioecious plant transposable element database (DPTEdb). DPTEdb is a specific, comprehensive and unified relational database and web interface. We used a combination of de novo, structure-based and homology-based approaches to identify TEs from the genome assemblies of previously published data, as well as our own. The database currently integrates eight dioecious plant species and a total of 31 340 TEs along with classification information. DPTEdb provides user-friendly web interfaces to browse, search and download the TE sequences in the database. Users can also use tools, including BLAST, GetORF, HMMER, Cut sequence and JBrowse, to analyze TE data. Given the role of TEs in plant sex chromosome evolution, the database will contribute to the investigation of TEs in structural, functional and evolutionary dynamics of the genome of dioecious plants. In addition, the database will supplement the research of sex diversification and sex chromosome evolution of dioecious plants.Database URL: http://genedenovoweb.ticp.net:81/DPTEdb/index.php.",2016-05-12 +27391016,"D-PLACE: A Global Database of Cultural, Linguistic and Environmental Diversity.","From the foods we eat and the houses we construct, to our religious practices and political organization, to who we can marry and the types of games we teach our children, the diversity of cultural practices in the world is astounding. Yet, our ability to visualize and understand this diversity is limited by the ways it has been documented and shared: on a culture-by-culture basis, in locally-told stories or difficult-to-access repositories. In this paper we introduce D-PLACE, the Database of Places, Language, Culture, and Environment. This expandable and open-access database (accessible at https://d-place.org) brings together a dispersed corpus of information on the geography, language, culture, and environment of over 1400 human societies. We aim to enable researchers to investigate the extent to which patterns in cultural diversity are shaped by different forces, including shared history, demographics, migration/diffusion, cultural innovations, and environmental and ecological conditions. We detail how D-PLACE helps to overcome four common barriers to understanding these forces: i) location of relevant cultural data, (ii) linking data from distinct sources using diverse ethnonyms, (iii) variable time and place foci for data, and (iv) spatial and historical dependencies among cultural groups that present challenges for analysis. D-PLACE facilitates the visualisation of relationships among cultural groups and between people and their environments, with results downloadable as tables, on a map, or on a linguistic tree. We also describe how D-PLACE can be used for exploratory, predictive, and evolutionary analyses of cultural diversity by a range of users, from members of the worldwide public interested in contrasting their own cultural practices with those of other societies, to researchers using large-scale computational phylogenetic analyses to study cultural evolution. In summary, we hope that D-PLACE will enable new lines of investigation into the major drivers of cultural change and global patterns of cultural diversity.",2016-07-08 +29415010,shinyGISPA: A web application for characterizing phenotype by gene sets using multiple omics data combinations.,"While many methods exist for integrating multi-omics data or defining gene sets, there is no one single tool that defines gene sets based on merging of multiple omics data sets. We present shinyGISPA, an open-source application with a user-friendly web-based interface to define genes according to their similarity in several molecular changes that are driving a disease phenotype. This tool was developed to help facilitate the usability of a previously published method, Gene Integrated Set Profile Analysis (GISPA), among researchers with limited computer-programming skills. The GISPA method allows the identification of multiple gene sets that may play a role in the characterization, clinical application, or functional relevance of a disease phenotype. The tool provides an automated workflow that is highly scalable and adaptable to applications that go beyond genomic data merging analysis. It is available at http://shinygispa.winship.emory.edu/shinyGISPA/.",2018-02-07 +30296227,Fine-Grained Quality Assessment for Compressed Images.,"Image quality assessment (IQA) has attracted more and more attention due to the urgent demand in image services. The perceptual-based image compression is one of the most prominent applications that require IQA metrics to be highly correlated with human vision. To explore IQA algorithms that are more consistent with human vision, several calibrated databases have been constructed. However, the distorted images in the existing databases are usually generated by corrupting the pristine images with various distortions in coarse levels, such that the IQA algorithms validated on them may be inefficient to optimize the perceptual-based image compression with fine-grained quality differences. In this paper, we construct a large-scale image database which can be used for fine-grained quality assessment of compressed images. In the proposed database, reference images are compressed at constant bitrate levels by JPEG encoders with different optimization methods. To distinguish subtle differences, the pair-wise comparison method is utilized to rank them in subjective experiments. We select 100 reference images for the proposed database, and each image is compressed into three target bitrates by four different JPEG optimization methods, such that 1200 distorted images are generated in total. Sixteen well-known IQA algorithms are evaluated and analyzed on the proposed database. With the devised fine-grained IQA database, we expect to further promote image quality assessment by shifting it from a coarse-grained stage to a fine-grained stage. The database is available at: https://sites.google.com/site/zhangxinf07/fg-iqa.",2018-10-08 +31772415,Health Status and Transitions in Cohabiting Relationships of American Young Adults.,"

Objective

This study examines whether individual health predicts cohabitors' union transitions to marriage in American young adults.

Background

Associations between health and subsequent marital transitions are well documented, but less is known about how health influences transitions of cohabiting relationships. As cohabitation has become a common relationship experience, understanding how health may influence cohabitors' union transitions is an important component of how health shapes relationship exposures more broadly.

Method

Data were taken from Waves III and IV of the National Longitudinal Study of Adolescent to Adult Health (http://www.cpc.unc.edu/projects/addhealth), including the supplemental collection of relationship partners conducted during Wave III. Competing risk regressions for the transition of cohabiting unions to marriage were estimated in two samples: one of individuals and a smaller one of cohabiting couples with information from both partners.

Results

Healthier cohabiters are more likely to marry than are their less healthy counterparts, but only women's health is significantly associated with the transition to marriage. In the dyadic sample with information from both partners, the significant association between the female partner's health and the transition to marriage is robust to male partner characteristics, including health.

Conclusion

Health is an important predictor of cohabitation transitions in early adulthood, but these transitions may only be sensitive to the female partner's health.",2019-04-01 +22281013,KAREBrowser: SNP database of Korea Association REsource Project.,"The International HapMap Project and the Human Genome Diversity Project (HGDP) provide plentiful resources on human genome information to the public. However, this kind of information is limited because of the small sample size in both databases. A Genome-Wide Association Study has been conducted with 8,842 Korean subjects as a part of the Korea Association Resource (KARE) project. In an effort to build a publicly available browsing system for genome data resulted from large scale KARE GWAS, we developed the KARE browser. This browser provides users with a large amount of single nucleotide polymorphisms (SNPs) information comprising 1.5 million SNPs from population-based cohorts of 8,842 samples. KAREBrowser was based on the generic genome browser (GBrowse), a webbased application tool developed for users to navigate and visualize the genomic features and annotations in an interactive manner. All SNP information and related functions are available at the web site http://ksnp.cdc. go.kr/karebrowser/.",2012-01-01 +22045659,Genome-wide identification of SNPs in microRNA genes and the SNP effects on microRNA target binding and biogenesis.,"MicroRNAs (miRNAs) are studied as key regulators of gene expression involved in different diseases. Several single nucleotide polymorphisms (SNPs) in miRNA genes or target sites (miRNA-related SNPs) have been proved to be associated with human diseases by affecting the miRNA-mediated regulatory function. To systematically analyze miRNA-related SNPs and their effects, we performed a genome-wide scan for SNPs in human pre-miRNAs, miRNA flanking regions, target sites, and designed a pipeline to predict the effects of them on miRNA-target interaction. As a result, we identified 48 SNPs in human miRNA seed regions and thousands of SNPs in 3' untranslated regions with the potential to either disturb or create miRNA-target interactions. Furthermore, we experimentally confirmed seven loss-of-function SNPs and one gain-of-function SNP by luciferase assay. This is the first case of experimental validation of an SNP in an miRNA creating a novel miRNA target binding. All useful data were complied into miRNASNP, a user-friendly free online database (http://www.bioguo.org/miRNASNP/). These data will be a useful resource for studying miRNA function, identifying disease-associated miRNAs, and further personalized medicine.",2011-11-23 +22116064,TDR Targets: a chemogenomics resource for neglected diseases.,"The TDR Targets Database (http://tdrtargets.org) has been designed and developed as an online resource to facilitate the rapid identification and prioritization of molecular targets for drug development, focusing on pathogens responsible for neglected human diseases. The database integrates pathogen specific genomic information with functional data (e.g. expression, phylogeny, essentiality) for genes collected from various sources, including literature curation. This information can be browsed and queried using an extensive web interface with functionalities for combining, saving, exporting and sharing the query results. Target genes can be ranked and prioritized using numerical weights assigned to the criteria used for querying. In this report we describe recent updates to the TDR Targets database, including the addition of new genomes (specifically helminths), and integration of chemical structure, property and bioactivity information for biological ligands, drugs and inhibitors and cheminformatic tools for querying and visualizing these chemical data. These changes greatly facilitate exploration of linkages (both known and predicted) between genes and small molecules, yielding insight into whether particular proteins may be druggable, effectively allowing the navigation of chemical space in a genomics context.",2011-11-23 +24715219,The quail anatomy portal.,"The Japanese quail is a widely used model organism for the study of embryonic development; however, anatomical resources are lacking. The Quail Anatomy Portal (QAP) provides 22 detailed three-dimensional (3D) models of quail embryos during development from embryonic day (E)1 to E15 generated using optical projection tomography. The 3D models provided can be virtually sectioned to investigate anatomy. Furthermore, using the 3D nature of the models, we have generated a tool to assist in the staging of quail samples. Volume renderings of each stage are provided and can be rotated to allow visualization from multiple angles allowing easy comparison of features both between stages in the database and between images or samples in the laboratory. The use of JavaScript, PHP and HTML ensure the database is accessible to users across different operating systems, including mobile devices, facilitating its use in the laboratory.The QAP provides a unique resource for researchers using the quail model. The ability to virtually section anatomical models throughout development provides the opportunity for researchers to virtually dissect the quail and also provides a valuable tool for the education of students and researchers new to the field. DATABASE URL: http://quail.anatomyportal.org (For review username: demo, password: quail123).",2014-04-07 +29946431,META-pipe Authorization service. ,"We describe the design, implementation, and use of the META-pipe Authorization service. META-pipe is a complete workflow for the analysis of marine metagenomics data. We will provide META-pipe as a web based data analysis service for ELIXIR users. We have integrated our Authorization service with the ELIXIR Authorization and Authentication Infrastructure (AAI) that allows single sign-on to services across the ELIXIR infrastructure. We use the Authorization service to authorize access to data on the META-pipe storage system and jobs in the META-pipe job queue. Our Authorization server was among the first SAML2 service providers  that integrated with ELIXIR AAI. The code is open source at: https://gitlab.com/uit-sfb/AuthService2.",2018-01-09 +31998690,Vienna LiverTox Workspace-A Set of Machine Learning Models for Prediction of Interactions Profiles of Small Molecules With Transporters Relevant for Regulatory Agencies.,"Transporters expressed in the liver play a major role in drug pharmacokinetics and are a key component of the physiological bile flow. Inhibition of these transporters may lead to drug-drug interactions or even drug-induced liver injury. Therefore, predicting the interaction profile of small molecules with transporters expressed in the liver may help medicinal chemists and toxicologists to prioritize compounds in an early phase of the drug development process. Based on a comprehensive analysis of the data available in the public domain, we developed a set of classification models which allow to predict-for a small molecule-the inhibition of and transport by a set of liver transporters considered to be relevant by FDA, EMA, and the Japanese regulatory agency. The models were validated by cross-validation and external test sets and comprise cross validated balanced accuracies in the range of 0.64-0.88. Finally, models were implemented as an easy to use web-service which is freely available at https://livertox.univie.ac.at.",2019-01-01 +30032301,"SonicParanoid: fast, accurate and easy orthology inference.","

Motivation

Orthology inference constitutes a common base of many genome-based studies, as a pre-requisite for annotating new genomes, finding target genes for biotechnological applications and revealing the evolutionary history of life. Although its importance keeps rising with the ever-growing number of sequenced genomes, existing tools are computationally demanding and difficult to employ.

Results

Here, we present SonicParanoid, which is faster than, but comparably accurate to, the well-established tools with a balanced precision-recall trade-off. Furthermore, SonicParanoid substantially relieves the difficulties of orthology inference for those who need to construct and maintain their own genomic datasets.

Availability and implementation

SonicParanoid is available with a GNU GPLv3 license on the Python Package Index and BitBucket. Documentation is available at http://iwasakilab.bs.s.u-tokyo.ac.jp/sonicparanoid.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +30282789,Blood Culture Results Reporting: How Fast Is Your Laboratory and Is Faster Better? ,"Blood cultures are one of the most common and most important tests performed in clinical microbiology laboratories. Variables and technology that improve and speed the recovery of blood stream pathogens have been published in the Journal of Clinical Microbiology since its inception in 1975. Despite the importance of blood cultures, little research has focused on the turnaround time of blood culture reports. In this issue of the Journal of Clinical Microbiology, Y. P. Tabak et al. (J Clin Microbiol 56:e00500-18, 2018, https://doi.org/10.1128/JCM.00500-18) report the results of an investigation of Gram stain, organism identification, and susceptibility report turnaround times for 165,593 blood cultures from 13 laboratories. These data provide a starting point for clinical laboratories to establish targets for blood culture result reporting.",2018-11-27 +32308920,Implementing the VMC Specification to Reduce Ambiguity in Genomic Variant Representation.,"Current methods used for representing biological sequence variants allow flexibility, which has created redundancy within variant archives and discordance among variant representation tools. While research methodologies have been able to adapt to this ambiguity, strict clinical standards make it difficult to use this data in what would otherwise be useful clinical interventions. We implemented a specification developed by the GA4GH Variant Modeling Collaboration (VMC), which details a new approach to unambiguous representation of variants at the allelic level, as a haplotype, or as a genotype. Our implementation, called the VMC Test Suite (http://vcfclin.org), offers web tools to generate and insert VMC identifiers into a VCF file and to generate a VMC bundle JSON representation of a VCF file or HGVS expression. A command line tool with similar functionality is also introduced. These tools facilitate use of this standard-an important step toward reliable querying of variants and their associated annotations.",2019-01-01 +30635895,Tools for Understanding miRNA-mRNA Interactions for Reproducible RNA Analysis.,"MicroRNAs (miRNAs) are an integral part of gene regulation at the post-transcriptional level. The use of RNA data in gene expression analysis has become increasingly important to gain insights into the regulatory mechanisms behind miRNA-mRNA interactions. As a result, we are confronted with a growing landscape of tools, while standards for reproducibility and benchmarking lag behind. This work identifies the challenges for reproducible RNA analysis, and highlights best practices on the processing and dissemination of scientific results. We found that the success of a tool does not solely depend on its performances: equally important is how a tool is received, and then supported within a community. This leads us to a detailed presentation of the RNA workbench, a community effort for sharing workflows and processing tools, built on top of the Galaxy framework. Here, we follow the community guidelines to extend its portfolio of RNA tools with the integration of the TriplexRNA ( https://triplexrna.org ). Our findings provide the basis for the development of a recommendation system, to guide users in the choice of tools and workflows.",2019-01-01 +30008982,EMDB Web Resources.,"The Electron Microscopy Data Bank (EMDB; http://emdb-empiar.org) is a global openly-accessible archive of biomolecular and cellular 3D reconstructions derived from electron microscopy (EM) data. EMBL-EBI develops web-based resources to facilitate the reuse of EMDB data. Here we provide protocols for how these resources can be used for searching EMDB, visualising EMDB structures, statistically analysing EMDB content and checking the validity of EMDB structures. Protocols for searching include quick link categories from the main page, links to latest entries released during the weekly cycle, filtered browsing of the entire archive and a form-based search. For visualisation, the 'Volume Slicer' enables slices of EMDB entries to be visualised interactively and in three orthogonal directions. The EMstats web service (https://emdb-empiar.org/emstats) provides up-to-date interactive statistical charts analysing EMDB. All EMDB entries have 'visual analysis' pages that provide basic validation information for the entry.",2018-03-01 +27102089,Gene-set activity toolbox (GAT): A platform for microarray-based cancer diagnosis using an integrative gene-set analysis approach.,"Cancer is a complex disease that cannot be diagnosed reliably using only single gene expression analysis. Using gene-set analysis on high throughput gene expression profiling controlled by various environmental factors is a commonly adopted technique used by the cancer research community. This work develops a comprehensive gene expression analysis tool (gene-set activity toolbox: (GAT)) that is implemented with data retriever, traditional data pre-processing, several gene-set analysis methods, network visualization and data mining tools. The gene-set analysis methods are used to identify subsets of phenotype-relevant genes that will be used to build a classification model. To evaluate GAT performance, we performed a cross-dataset validation study on three common cancers namely colorectal, breast and lung cancers. The results show that GAT can be used to build a reasonable disease diagnostic model and the predicted markers have biological relevance. GAT can be accessed from http://gat.sit.kmutt.ac.th where GAT's java library for gene-set analysis, simple classification and a database with three cancer benchmark datasets can be downloaded.",2016-03-15 +30052762,WAVES: a web application for versatile enhanced bioinformatic services.,"

Summary

WAVES is a web application dedicated to bioinformatic tool integration. It provides an efficient way to implement a service for any bioinformatic software. Such services are automatically made available in three ways: web pages, web forms to include in remote websites and a RESTful web services single application programing interface to access remotely from applications. In order to fulfill the service's computational needs, WAVES can perform computation on various resources and environments, such as Galaxy instances.

Availability and implementation

WAVES was developed with Django, a Python-based web framework. It was designed as a reusable web application. It is fully portable, as only a Python installation is required to run Django. It is licensed under GNU General Public License. Source code, documentation with examples and demo are available from http://www.atgc-montpellier.fr/waves/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +29774499,"Factors Influencing L2 Self-repair Behavior: The Role of L2 Proficiency, Attentional Control and L1 Self-repair Behavior.","Self-repairs, that is revisions of speech that speakers themselves initiate and complete (Salonen and Laakso in J Child Lang 36:859, 2009. https://doi.org/10.1017/s0305000908009240 ), are frequently used to observe the cognitive and linguistic processes underlying second language (L2) speech production. Previous research has shown that factors such as L2 proficiency, attentional control and native language (L1) self-repair behavior interact with L2 self-repair behavior. To our knowledge, however, no research has examined how these three factors interact within a cohort of L2 speakers. As such, the present study examined the proficiency scores, attentional control scores and L2 and L1 self-repair frequency data of 58 adult L2 English speakers of various proficiency levels. Regression results showed that while proficiency was not a significant predictor of L2 self-repair behavior, attentional control and L1 repair frequency together explained 40% of the variance. Results suggest that L2 self-repair behavior may be more closely linked to stable cognitive and personality traits than to L2 proficiency.",2019-02-01 +31026604,Prediction of cross-species infection propensities of viruses with receptor similarity.,"Studies of host factors that affect susceptibility to viral infections have led to the possibility of determining the risk of emerging infections in potential host organisms. In this study, we constructed a computational framework to estimate the probability of virus transmission between potential hosts based on the hypothesis that the major barrier to virus infection is differences in cell-receptor sequences among species. Information regarding host susceptibility to virus infection was collected to classify the cross-species infection propensity between hosts. Evolutionary divergence matrices and a sequence similarity scoring program were used to determine the distance and similarity of receptor sequences. The discriminant analysis was validated with cross-validation methods. The results showed that the primary structure of the receptor protein influences host susceptibility to cross-species viral infections. Pair-wise distance, relative distance, and sequence similarity showed the best accuracy in identifying the susceptible group. Based on the results of the discriminant analysis, we constructed ViCIPR (http://lcbb3.snu.ac.kr/ViCIPR/home.jsp), a server-based tool to enable users to easily extract the cross-species infection propensities of specific viruses using a simple two-step procedure. Our sequence-based approach suggests that it may be possible to identify virus transmission between hosts without requiring complex structural analysis. Due to a lack of available data, this method is limited to viruses whose receptor use has been determined. However, the significant accuracy of predictive variables that positively and negatively influence virus transmission suggests that this approach could be improved with further analysis of receptor sequences.",2019-04-23 +30687361,PolyMorphPredict: A Universal Web-Tool for Rapid Polymorphic Microsatellite Marker Discovery From Whole Genome and Transcriptome Data.,"Microsatellites are ubiquitously distributed, polymorphic repeat sequence valuable for association, selection, population structure and identification. They can be mined by genomic library, probe hybridization and sequencing of selected clones. Such approach has many limitations like biased hybridization and selection of larger repeats. In silico mining of polymorphic markers using data of various genotypes can be rapid and economical. Available tools lack in some or other aspects like: targeted user defined primer generation, polymorphism discovery using multiple sequence, size and number limits of input sequence, no option for primer generation and e-PCR evaluation, transferability, lack of complete automation and user-friendliness. They also lack the provision to evaluate published primers in e-PCR mode to generate additional allelic data using re-sequenced data of various genotypes for judicious utilization of previously generated data. We developed the tool (PolyMorphPredict) using Perl, R, Java and launched at Apache which is available at http://webtom.cabgrid.res.in/polypred/. It mines microsatellite loci and computes primers from genome/transcriptome data of any species. It can perform e-PCR using published primers for polymorphism discovery and across species transferability of microsatellite loci. Present tool has been evaluated using five species of different genome size having 21 genotypes. Though server is equipped with genomic data of three species for test run with gel simulation, but can be used for any species. Further, polymorphism predictability has been validated using in silico and in vitro PCR of four rice genotypes. This tool can accelerate the in silico microsatellite polymorphism discovery in re-sequencing projects of any species of plant and animal for their diversity estimation along with variety/breed identification, population structure, MAS, QTL and gene discovery, traceability, parentage testing, fungal diagnostics and genome finishing.",2018-01-01 +29564830,Chromothripsis Detection and Characterization Using the CTLPScanner Web Server.,"Accurate detection of chromothripsis event is important to study the mechanisms underlying this phenomenon. CTLPScanner ( http://cgma.scu.edu.cn/CTLPScanner/ ) is a web-based tool for identification and annotation of chromothripsis-like pattern (CTLP) in genomic array data. In this chapter, we illustrate the utility of CTLPScanner for screening chromosome pulverization regions and give interpretation of the results. The web interface offers a set of parameters and thresholds for customized screening. We also provide practical recommendations for effective chromothripsis detection. In addition to the user data processing module, CTLPScanner contains more than 50,000 preprocessed oncogenomic arrays, which allow users to explore the presence of chromothripsis signatures from public data resources.",2018-01-01 +30016513,Kinome-wide identification of phosphorylation networks in eukaryotic proteomes.,"

Motivation

Signaling and metabolic pathways are finely regulated by a network of protein phosphorylation events. Unraveling the nature of this intricate network, composed of kinases, target proteins and their interactions, is therefore of crucial importance. Although thousands of kinase-specific phosphorylations (KsP) have been annotated in model organisms their kinase-target network is far from being complete, with less studied organisms lagging behind.

Results

In this work, we achieved an automated and accurate identification of kinase domains, inferring the residues that most likely contribute to peptide specificity. We integrated this information with the target peptides of known human KsP to predict kinase-specific interactions in other eukaryotes through a deep neural network, outperforming similar methods. We analyzed the differential conservation of kinase specificity among eukaryotes revealing the high conservation of the specificity of tyrosine kinases. With this approach we discovered 1590 novel KsP of potential clinical relevance in the human proteome.

Availability and implementation

http://akid.bio.uniroma2.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +29369293,CardioClassifier: disease- and gene-specific computational decision support for clinical genome interpretation.,"

Purpose

Internationally adopted variant interpretation guidelines from the American College of Medical Genetics and Genomics (ACMG) are generic and require disease-specific refinement. Here we developed CardioClassifier ( http://www.cardioclassifier.org ), a semiautomated decision-support tool for inherited cardiac conditions (ICCs).

Methods

CardioClassifier integrates data retrieved from multiple sources with user-input case-specific information, through an interactive interface, to support variant interpretation. Combining disease- and gene-specific knowledge with variant observations in large cohorts of cases and controls, we refined 14 computational ACMG criteria and created three ICC-specific rules.

Results

We benchmarked CardioClassifier on 57 expertly curated variants and show full retrieval of all computational data, concordantly activating 87.3% of rules. A generic annotation tool identified fewer than half as many clinically actionable variants (64/219 vs. 156/219, Fisher's P = 1.1  ×  10-18), with important false positives, illustrating the critical importance of disease and gene-specific annotations. CardioClassifier identified putatively disease-causing variants in 33.7% of 327 cardiomyopathy cases, comparable with leading ICC laboratories. Through addition of manually curated data, variants found in over 40% of cardiomyopathy cases are fully annotated, without requiring additional user-input data.

Conclusion

CardioClassifier is an ICC-specific decision-support tool that integrates expertly curated computational annotations with case-specific data to generate fast, reproducible, and interactive variant pathogenicity reports, according to best practice guidelines.",2018-01-25 +29573027,BRAFwild papillary thyroid carcinoma has two distinct mRNA expression patterns with different clinical behaviors.,"BACKGROUND:Using a large set of genomic data from The Cancer Genome Atlas (TCGA), we classified BRAFwild papillary thyroid carcinomas (PTCs) into 2 subtypes with distinct molecular patterns and different clinical behaviors. We also suggested gene signatures (RAS-score) to predict molecular subtypes and clinical behaviors of BRAFwild PTC. METHOD:Integrated genomic analysis was done using all genomic data of PTC in TCGA data portal (https://tcga-data.nci.nih.gov) and cancer browser (https://genome-cancer.ucsc.edu). Using Gene Ontology and a logistic regression test, we selected gene signatures (RAS-score) and applied this prediction model to the validation cohort (GSE60542). RESULT:When we performed multiplatform genomic analysis, BRAFwild PTCs were divided into 2 molecular subtypes. Each subtype showed distinct molecular patterns and clinical behaviors. Gene signatures successfully predicted molecular subtype in another validation cohort. CONCLUSION:We found that BRAFwild PTCs were divided into 2 molecular subtypes and each subtype showed distinct molecular patterns, different activated pathways, and different clinical behaviors.",2018-03-23 +30673520,"Estimate, a New iPad Application for Assessment of Plant Disease Severity Using Photographic Standard Area Diagrams.","Assessment of disease severity is a foundational component of plant pathology and essential for robust disease management. Researchers often estimate disease severity using standard area diagrams (SADs) that are reference images representing disease severity in percentage increments. SADs provide assessments of disease severity that are more accurate, precise, and reliable than other methods. Although specific SADs have been constructed for many plant diseases, they often depict severity in unrealistic black-and-white or grayscale illustrations. SADs are also usually printed, static references that can burden data collection in the field and require data to be transferred manually to a computer spreadsheet for manipulation. This data entry process and verification are prone to errors and require additional inputs of time and labor. We developed a new iPad application (app) called Estimate for researchers and crop managers for their use on a mobile device at the field-level for assessing plant disease severity in order to collect data or aid in treatment decisions. The app is a repository for digital, photographic SADs and offers savings in time for data collection and processing. Estimate allows users to select a disease from a prepopulated list and specify the reference disease images in either logarithmic or linear intervals. Data may be collected as the midpoint of an interval (ordinal) or as 1% increments (continuous). Users then select among photographic images by touching those that best match the observed disease severity on successive samples. Estimate allows data entry at the plant and leaf hierarchical levels within plots and subplots. Alternatively, data may be collected on single sampling units with an undefined experimental design (i.e., 1 to x leaves). The user may inspect and e-mail the final data in comma-separated values format for analysis using conventional spreadsheet software. Estimate was released with SADs for assessing the severity of Cercospora leaf spot in red and yellow table beet cultivars. A list of collaborators and up-to-date list of SADs included in Estimate is available at http://evade.pppmb.cals.cornell.edu/estimate/ . SADs for other diseases will be added to Estimate as they become available. Estimate is available for free download from iTunes ( https://itunes.apple.com/WebObjects/MZStore.woa/wa/viewSoftware?id=1193605571&mt=8 ) and is compatible with an iPad Air 2 or equivalent using iOS 9.0 or greater.",2017-11-16 +26452388,ViPAR: a software platform for the Virtual Pooling and Analysis of Research Data.,"

Background

Research studies exploring the determinants of disease require sufficient statistical power to detect meaningful effects. Sample size is often increased through centralized pooling of disparately located datasets, though ethical, privacy and data ownership issues can often hamper this process. Methods that facilitate the sharing of research data that are sympathetic with these issues and which allow flexible and detailed statistical analyses are therefore in critical need. We have created a software platform for the Virtual Pooling and Analysis of Research data (ViPAR), which employs free and open source methods to provide researchers with a web-based platform to analyse datasets housed in disparate locations.

Methods

Database federation permits controlled access to remotely located datasets from a central location. The Secure Shell protocol allows data to be securely exchanged between devices over an insecure network. ViPAR combines these free technologies into a solution that facilitates 'virtual pooling' where data can be temporarily pooled into computer memory and made available for analysis without the need for permanent central storage.

Results

Within the ViPAR infrastructure, remote sites manage their own harmonized research dataset in a database hosted at their site, while a central server hosts the data federation component and a secure analysis portal. When an analysis is initiated, requested data are retrieved from each remote site and virtually pooled at the central site. The data are then analysed by statistical software and, on completion, results of the analysis are returned to the user and the virtually pooled data are removed from memory.

Conclusions

ViPAR is a secure, flexible and powerful analysis platform built on open source technology that is currently in use by large international consortia, and is made publicly available at [http://bioinformatics.childhealthresearch.org.au/software/vipar/].",2015-10-08 +30283995,Can urinary biomarkers replace cystoscopy?,"

Purpose

Diagnosis and follow-up in patients with non-muscle invasive bladder cancer (NMIBC) rely on cystoscopy and urine cytology. The aim of this review paper is to give an update on urinary biomarkers and their diagnosis and surveillance potential. Besides FDA-approved markers, recent approaches like DNA methylation assays, mRNA gene expression assays and cell-free DNA (cfDNA) are evaluated to assess whether replacing cystoscopy with urine markers is a potential scenario for the future.

Methods

We performed a non-systematic review of current literature without time period restriction using the National Library of Medicine database ( http://ww.pubmed.gov ). The search included the following key words in different combinations: ""urothelial carcinoma"", ""urinary marker"", ""hematuria"", ""cytology"" and ""bladder cancer"". Further, references were extracted from identified articles. The results were evaluated regarding their clinical relevance and study quality.

Results

Currently, replacing cystoscopy with available urine markers is not recommended by international guidelines. For FDA-approved markers, prospective randomized trials are lacking. Newer approaches focusing on molecular, genomic and transcriptomic aberrations are promising with good accuracies. Furthermore, these assays may provide additional molecular information to guide individualized surveillance strategies and therapy. Currently ongoing prospective trials will determine if cystoscopy reduction is feasible.

Conclusion

Urinary markers represent a non-invasive approach for molecular characterization of the disease. Although fully replacing cystoscopy seems unrealistic in the near future, enhancing the current gold standard by additional molecular information is feasible. A reliable classification and differentiation between aggressive and nonaggressive tumors by applying DNA, mRNA, and cfDNA assays may change surveillance to help reduce cystoscopies.",2018-10-03 +24428872,GuavaH: a compendium of host genomic data in HIV biology and disease.,"

Background

There is an ever-increasing volume of data on host genes that are modulated during HIV infection, influence disease susceptibility or carry genetic variants that impact HIV infection. We created GuavaH (Genomic Utility for Association and Viral Analyses in HIV, http://www.GuavaH.org), a public resource that supports multipurpose analysis of genome-wide genetic variation and gene expression profile across multiple phenotypes relevant to HIV biology.

Findings

We included original data from 8 genome and transcriptome studies addressing viral and host responses in and ex vivo. These studies cover phenotypes such as HIV acquisition, plasma viral load, disease progression, viral replication cycle, latency and viral-host genome interaction. This represents genome-wide association data from more than 4,000 individuals, exome sequencing data from 392 individuals, in vivo transcriptome microarray data from 127 patients/conditions, and 60 sets of RNA-seq data. Additionally, GuavaH allows visualization of protein variation in ~8,000 individuals from the general population. The publicly available GuavaH framework supports queries on (i) unique single nucleotide polymorphism across different HIV related phenotypes, (ii) gene structure and variation, (iii) in vivo gene expression in the setting of human infection (CD4+ T cells), and (iv) in vitro gene expression data in models of permissive infection, latency and reactivation.

Conclusions

The complexity of the analysis of host genetic influences on HIV biology and pathogenesis calls for comprehensive motors of research on curated data. The tool developed here allows queries and supports validation of the rapidly growing body of host genomic information pertinent to HIV research.",2014-01-15 +28968724,"Meta-server for automatic analysis, scoring and ranking of docking models.","

Motivation

Modelling with multiple servers that use different algorithms for docking results in more reliable predictions of interaction sites. However, the scoring and comparison of all models by an expert is time-consuming and is not feasible for large volumes of data generated by such modelling.

Results

Quality ASsessment of DOcking Models (QASDOM) Server is a simple and efficient tool for real-time simultaneous analysis, scoring and ranking of data sets of receptor-ligand complexes built by a range of docking techniques. This meta-server is designed to analyse large data sets of docking models and rank them by scoring criteria developed in this study. It produces two types of output showing the likelihood of specific residues and clusters of residues to be involved in receptor-ligand interactions and the ranking of models. The server also allows visualizing residues that form interaction sites in the receptor and ligand sequence and displays 3D model structures of the receptor-ligand complexes.

Availability

http://qasdom.eimb.ru.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +21883742,Easy-to-use phylogenetic analysis system for hepatitis B virus infection.,"AIM:  The molecular phylogenetic analysis has been broadly applied to clinical and virological study. However, the appropriate settings and application of calculation parameters are difficult for non-specialists of molecular genetics. In the present study, the phylogenetic analysis tool was developed for the easy determination of genotypes and transmission route. METHODS:  A total of 23 patients of 10 families infected with hepatitis B virus (HBV) were enrolled and expected to undergo intrafamilial transmission. The extracted HBV DNA were amplified and sequenced in a region of the S gene. RESULTS:  The software to automatically classify query sequence was constructed and installed on the Hepatitis Virus Database (HVDB). Reference sequences were retrieved from HVDB, which contained major genotypes from A to H. Multiple-alignments using CLUSTAL W were performed before the genetic distance matrix was calculated with the six-parameter method. The phylogenetic tree was output by the neighbor-joining method. User interface using WWW-browser was also developed for intuitive control. This system was named as the easy-to-use phylogenetic analysis system (E-PAS). Twenty-three sera of 10 families were analyzed to evaluate E-PAS. The queries obtained from nine families were genotype C and were located in one cluster per family. However, one patient of a family was classified into the cluster different from her family, suggesting that E-PAS detected the sample distinct from that of her family on the transmission route. CONCLUSIONS:  The E-PAS to output phylogenetic tree was developed since requisite material was sequence data only. E-PAS could expand to determine HBV genotypes as well as transmission routes.",2011-08-26 +29324744,QCloud: A cloud-based quality control system for mass spectrometry-based proteomics laboratories.,"The increasing number of biomedical and translational applications in mass spectrometry-based proteomics poses new analytical challenges and raises the need for automated quality control systems. Despite previous efforts to set standard file formats, data processing workflows and key evaluation parameters for quality control, automated quality control systems are not yet widespread among proteomics laboratories, which limits the acquisition of high-quality results, inter-laboratory comparisons and the assessment of variability of instrumental platforms. Here we present QCloud, a cloud-based system to support proteomics laboratories in daily quality assessment using a user-friendly interface, easy setup, automated data processing and archiving, and unbiased instrument evaluation. QCloud supports the most common targeted and untargeted proteomics workflows, it accepts data formats from different vendors and it enables the annotation of acquired data and reporting incidences. A complete version of the QCloud system has successfully been developed and it is now open to the proteomics community (http://qcloud.crg.eu). QCloud system is an open source project, publicly available under a Creative Commons License Attribution-ShareAlike 4.0.",2018-01-11 +22711790,MolClass: a web portal to interrogate diverse small molecule screen datasets with different computational models.,"

Unlabelled

The MolClass toolkit and data portal generate computational models from user-defined small molecule datasets based on structural features identified in hit and non-hit molecules in different screens. Each new model is applied to all datasets in the database to classify compound specificity. MolClass thus defines a likelihood value for each compound entry and creates an activity fingerprint across diverse sets of screens. MolClass uses a variety of machine-learning methods to find molecular patterns and can therefore also assign a priori predictions of bioactivities for previously untested molecules. The power of the MolClass resource will grow as a function of the number of screens deposited in the database.

Availability and implementation

The MolClass webportal, software package and source code are freely available for non-commercial use at http://tyerslab.bio.ed.ac.uk/molclass. A MolClass tutorial and a guide on how to build models from datasets can also be found on the web site. MolClass uses the chemistry development kit (CDK), WEKA and MySQL for its core functionality. A REST service is available at http://tyerslab.bio.ed.ac.uk/molclass/api based on the OpenTox API 1.2.",2012-06-17 +30276831,LncRNAnet: a comprehensive Sus scrofa lncRNA database.,"Long noncoding RNAs (lncRNAs) constitute a large class of functional non-coding RNAs that play important roles in many biological processes. Thousands of lncRNAs have been identified in mammals. Pig is an important farm animal and biomedical model. It is essential to create a Sus scrofa lncRNA database to enable further study of the function and evolution of lncRNAs. In this study, we built a systematic S. scrofa lncRNA database named lncRNAnet that contains 53 468 S. scrofa lncRNAs with their sequence characteristics, genomic locations, conservation, overlapping SNPs and QTLs, and transcript abundance across nine tissues in pigs. We also integrated 212 922 human and mouse lncRNAs sequences into lncRNAnet. This database will provide for a systematic S. scrofa lncRNA classification and help investigators browse, search for and analyze lncRNAs as well as do blast searches among human, mouse and pig lncRNAs. Thus, lncRNAnet should improve the understanding of the biological functions of lncRNA. The database is freely accessible at http://lnc.rnanet.org/.",2018-10-02 +24917120,Structured RNAs and synteny regions in the pig genome.,"

Background

Annotating mammalian genomes for noncoding RNAs (ncRNAs) is nontrivial since far from all ncRNAs are known and the computational models are resource demanding. Currently, the human genome holds the best mammalian ncRNA annotation, a result of numerous efforts by several groups. However, a more direct strategy is desired for the increasing number of sequenced mammalian genomes of which some, such as the pig, are relevant as disease models and production animals.

Results

We present a comprehensive annotation of structured RNAs in the pig genome. Combining sequence and structure similarity search as well as class specific methods, we obtained a conservative set with a total of 3,391 structured RNA loci of which 1,011 and 2,314, respectively, hold strong sequence and structure similarity to structured RNAs in existing databases. The RNA loci cover 139 cis-regulatory element loci, 58 lncRNA loci, 11 conflicts of annotation, and 3,183 ncRNA genes. The ncRNA genes comprise 359 miRNAs, 8 ribozymes, 185 rRNAs, 638 snoRNAs, 1,030 snRNAs, 810 tRNAs and 153 ncRNA genes not belonging to the here fore mentioned classes. When running the pipeline on a local shuffled version of the genome, we obtained no matches at the highest confidence level. Additional analysis of RNA-seq data from a pooled library from 10 different pig tissues added another 165 miRNA loci, yielding an overall annotation of 3,556 structured RNA loci. This annotation represents our best effort at making an automated annotation. To further enhance the reliability, 571 of the 3,556 structured RNAs were manually curated by methods depending on the RNA class while 1,581 were declared as pseudogenes. We further created a multiple alignment of pig against 20 representative vertebrates, from which RNAz predicted 83,859 de novo RNA loci with conserved RNA structures. 528 of the RNAz predictions overlapped with the homology based annotation or novel miRNAs. We further present a substantial synteny analysis which includes 1,004 lineage specific de novo RNA loci and 4 ncRNA loci in the known annotation specific for Laurasiatheria (pig, cow, dolphin, horse, cat, dog, hedgehog).

Conclusions

We have obtained one of the most comprehensive annotations for structured ncRNAs of a mammalian genome, which is likely to play central roles in both health modelling and production. The core annotation is available in Ensembl 70 and the complete annotation is available at http://rth.dk/resources/rnannotator/susscr102/version1.02.",2014-06-10 +29029172,DnaSP 6: DNA Sequence Polymorphism Analysis of Large Data Sets.,"We present version 6 of the DNA Sequence Polymorphism (DnaSP) software, a new version of the popular tool for performing exhaustive population genetic analyses on multiple sequence alignments. This major upgrade incorporates novel functionalities to analyze large data sets, such as those generated by high-throughput sequencing technologies. Among other features, DnaSP 6 implements: 1) modules for reading and analyzing data from genomic partitioning methods, such as RADseq or hybrid enrichment approaches, 2) faster methods scalable for high-throughput sequencing data, and 3) summary statistics for the analysis of multi-locus population genetics data. Furthermore, DnaSP 6 includes novel modules to perform single- and multi-locus coalescent simulations under a wide range of demographic scenarios. The DnaSP 6 program, with extensive documentation, is freely available at http://www.ub.edu/dnasp.",2017-12-01 +24194668,Herbarium of the university of malaga (Spain): vascular plants collection.,"The herbarium of University of Málaga (MGC Herbarium) is formed by four biological collections. The vascular plants collection (MGC-Cormof) is the main collection of the herbarium. MGC-Cormof dataset aims to digitize and publish data associated with over 76.000 specimens deposited in the collection, of which 97.2% of the specimens are identified at species level. Since 2011, the University of Malaga's Central Research Service (SCAI) has been responsible for maintaining the herbariums and the dataset. The collection is growing continuously, with an annual intake of about 1.500 specimens. Nearly 96% of the collection is digitized, by Herbar v3.7.1 software (F. Pando et al. 1996-2011), making over 73.000 specimens accessible through the GBIF network (http://data.gbif.org/datasets/resource/8105/). At present, 247 families and 8.110 taxa, distributed in angiosperms (93.97%), ferns and fern allies (4.89%) and gymnosperms (1.14%), constitute the MGC-Cormof collection. The families and genera best represented in the collection are Compositae, Leguminosae, Gramineae, Labiatae, Caryophyllaceae, Teucrium, Silene, Asplenium, Linaria and Quercus. Most of the specimens are from the Western Mediterranean Region, fundamentally Southern Spain (Andalusia: 82% of specimens) and Northern Morocco (2.17%). Approximately, 63% of the specimens are georeferenced. The identification of the specimens in the collection has been carried out by the plant biology department at the University of Malaga and plus 40% of the specimens has been reviewed by experts. The MGC-Cormof dataset has been revised by DarwinTest v3.2 tool (Ortega-Maqueda and Pando 2008) before being published in GBIF. The data included in this database are important for conservation works, taxonomy, flora, cartography, phenology, palynology, among others. El Herbario de la Universidad de Málaga (Herbario MGC) está constituido por cuatro colecciones biológicas. La colección de plantas vasculares (MGC Cormof) es la colección principal del herbario. La base de datos MGC-Cormof tiene como objetivo la digitalización y publicación de los datos asociados con los más de 76.000 ejemplares depositados en la colección, de los cuales el 97,2% de las muestras se encuentran identificadas a nivel de especie. Desde 2011, los Servicios Centrales de Investigación (SCAI) de la Universidad de Málaga son responsables de mantener el herbario y sus respectivas bases de datos. Esta colección está en continuo crecimiento, con una incorporación anual de unos 1.500 ejemplares. Casi el 96% de la colección está digitalizada, a través del programa Herbar v3.7.1 (F. Pando et al. 1996-2011) por lo que más de 73.000 especímenes son accesibles a través de la red de GBIF (http://data.gbif.org/datasets/resource/8105/). Actualmente, la colección MGC-Cormof está constituida por 247 familias y 8.110 taxones, distribuidos en angiospermas (93,97%), helechos y plantas afines (4,89%) y gimnospermas (1,14%). Las familias y géneros mejor representados en la colección son Compositae, Leguminosae, Gramineae, Labiatae, Caryophyllaceae, Teucrium, Silene, Asplenium, Linaria y Quercus. La mayoría de los especímenes provienen de la región del Mediterráneo Occidental, fundamentalmente del sur de España (Andalucía: 82% de las muestras) y del norte de Marruecos (2,17%). Aproximadamente, el 63% de las muestras se encuentran georreferenciadas. La identificación de los ejemplares de la colección ha sido realizada por personal del departamento de biología vegetal de la Universidad de Málaga y además un 40% de los ejemplares ha sido revisado por especialistas. La base de datos MGC-Cormof ha sido revisada mediante la herramienta DarwinTest v3.2 (Ortega-Maqueda and Pando 2008) antes de ser publicada en GBIF. Los datos incluidos en esta base de datos son importantes para trabajos de conservación, taxonomía, flora, cartografía, fenología, palinología, entre otros.",2013-09-27 +26705106,WheatExp: an RNA-seq expression database for polyploid wheat.,"

Background

For functional genomics studies, it is important to understand the dynamic expression profiles of transcribed genes in different tissues, stages of development and in response to environmental stimuli. The proliferation in the use of next-generation sequencing technologies by the plant research community has led to the accumulation of large volumes of expression data. However, analysis of these datasets is complicated by the frequent occurrence of polyploidy among economically-important crop species. In addition, processing and analyzing such large volumes of sequence data is a technical and time-consuming task, limiting their application in functional genomics studies, particularly for smaller laboratories which lack access to high-powered computing infrastructure. Wheat is a good example of a young polyploid species with three similar genomes (97 % identical among homoeologous genes), rapidly accumulating RNA-seq datasets and a large research community.

Description

We present WheatExp, an expression database and visualization tool to analyze and compare homoeologue-specific transcript profiles across a broad range of tissues from different developmental stages in polyploid wheat. Beginning with publicly-available RNA-seq datasets, we developed a pipeline to distinguish between homoeologous transcripts from annotated genes in tetraploid and hexaploid wheat. Data from multiple studies is processed and compiled into a database which can be queried either by BLAST or by searching for a known gene of interest by name or functional domain. Expression data of multiple genes can be displayed side-by-side across all expression datasets providing immediate access to a comprehensive panel of expression data for specific subsets of wheat genes.

Conclusions

The development of a publicly accessible expression database hosted on the GrainGenes website - http://wheat.pw.usda.gov/WheatExp/ - coupled with a simple and readily-comparable visualization tool will empower the wheat research community to use RNA-seq data and to perform functional analyses of target genes. The presented expression data is homoeologue-specific allowing for the analysis of relative contributions from each genome to the overall expression of a gene, a critical consideration for breeding applications. Our approach can be expanded to other polyploid species by adjusting sequence mapping parameters according to the specific divergence of their genomes.",2015-12-24 +31221798,Prostate Cancer in World Trade Center Responders Demonstrates Evidence of an Inflammatory Cascade.,"An excess incidence of prostate cancer has been identified among World Trade Center (WTC) responders. In this study, we hypothesized that WTC dust, which contained carcinogens and tumor-promoting agents, could facilitate prostate cancer development by inducing DNA damage, promoting cell proliferation, and causing chronic inflammation. We compared expression of immunologic and inflammatory genes using a NanoString assay on archived prostate tumors from WTC Health Program (WTCHP) patients and non-WTC patients with prostate cancer. Furthermore, to assess immediate and delayed responses of prostate tissue to acute WTC dust exposure via intratracheal inhalation, we performed RNA-seq on the prostate of normal rats that were exposed to moderate to high doses of WTC dust. WTC prostate cancer cases showed significant upregulation of genes involved in DNA damage and G2-M arrest. Cell-type enrichment analysis showed that Th17 cells, a subset of proinflammatory Th cells, were specifically upregulated in WTC patients. In rats exposed to WTC dust, we observed upregulation of gene transcripts of cell types involved in both adaptive immune response (dendritic cells and B cells) and inflammatory response (Th17 cells) in the prostate. Unexpectedly, genes in the cholesterol biosynthesis pathway were also significantly upregulated 30 days after acute dust exposure. Our results suggest that respiratory exposure to WTC dust can induce inflammatory and immune responses in prostate tissue. IMPLICATIONS: WTC-related prostate cancer displayed a distinct gene expression pattern that could be the result of exposure to specific carcinogens. Our data warrant further epidemiologic and cellular mechanistic studies to better understand the consequences of WTC dust exposure.Visual Overview: http://mcr.aacrjournals.org/content/molcanres/17/8/1605/F1.large.jpg.",2019-06-20 +29697361,Mut2Vec: distributed representation of cancerous mutations.,"

Background

Embedding techniques for converting high-dimensional sparse data into low-dimensional distributed representations have been gaining popularity in various fields of research. In deep learning models, embedding is commonly used and proven to be more effective than naive binary representation. However, yet no attempt has been made to embed highly sparse mutation profiles into densely distributed representations. Since binary representation does not capture biological context, its use is limited in many applications such as discovering novel driver mutations. Additionally, training distributed representations of mutations is challenging due to a relatively small amount of available biological data compared with the large amount of text corpus data in text mining fields.

Methods

We introduce Mut2Vec, a novel computational pipeline that can be used to create a distributed representation of cancerous mutations. Mut2Vec is trained on cancer profiles using Skip-Gram since cancer can be characterized by a series of co-occurring mutations. We also augmented our pipeline with existing information in the biomedical literature and protein-protein interaction networks to compensate for the data insufficiency.

Results

To evaluate our models, we conducted two experiments that involved the following tasks: a) visualizing driver and passenger mutations, b) identifying novel driver mutations using a clustering method. Our visualization showed a clear distinction between passenger mutations and driver mutations. We also found driver mutation candidates and proved that these were true driver mutations based on our literature survey. The pre-trained mutation vectors and the candidate driver mutations are publicly available at http://infos.korea.ac.kr/mut2vec .

Conclusions

We introduce Mut2Vec that can be utilized to generate distributed representations of mutations and experimentally validate the efficacy of the generated mutation representations. Mut2Vec can be used in various deep learning applications such as cancer classification and drug sensitivity prediction.",2018-04-20 +23042674,LNCipedia: a database for annotated human lncRNA transcript sequences and structures.,"Here, we present LNCipedia (http://www.lncipedia.org), a novel database for human long non-coding RNA (lncRNA) transcripts and genes. LncRNAs constitute a large and diverse class of non-coding RNA genes. Although several lncRNAs have been functionally annotated, the majority remains to be characterized. Different high-throughput methods to identify new lncRNAs (including RNA sequencing and annotation of chromatin-state maps) have been applied in various studies resulting in multiple unrelated lncRNA data sets. LNCipedia offers 21 488 annotated human lncRNA transcripts obtained from different sources. In addition to basic transcript information and gene structure, several statistics are determined for each entry in the database, such as secondary structure information, protein coding potential and microRNA binding sites. Our analyses suggest that, much like microRNAs, many lncRNAs have a significant secondary structure, in-line with their presumed association with proteins or protein complexes. Available literature on specific lncRNAs is linked, and users or authors can submit articles through a web interface. Protein coding potential is assessed by two different prediction algorithms: Coding Potential Calculator and HMMER. In addition, a novel strategy has been integrated for detecting potentially coding lncRNAs by automatically re-analysing the large body of publicly available mass spectrometry data in the PRIDE database. LNCipedia is publicly available and allows users to query and download lncRNA sequences and structures based on different search criteria. The database may serve as a resource to initiate small- and large-scale lncRNA studies. As an example, the LNCipedia content was used to develop a custom microarray for expression profiling of all available lncRNAs.",2012-10-05 +30321271,Assessment of dietary nitrate intake in humans: a systematic review.,"

Background

The nitrate content of foods and water is highly variable, which has implications for the compilation of food-composition databases and assessment of dietary nitrate intake.

Objective

A systematic review was conducted to ascertain the dietary assessment methods used and to provide estimates of daily nitrate intake in humans.

Design

Relevant articles were identified by a systematic search of 3 electronic databases (PubMed, Web of Science, and Embase) from inception until February 2018. Observational studies conducted in adult populations and reporting information on dietary assessment methods and daily nitrate intake were included. Ecological analyses were conducted to explore the association of nitrate intake with indexes of economic development [Gross Domestic Product (GDP) and KOF Index of Globalization].

Results

A total of 55 articles were included. Forty-two studies investigated associations between nitrate intake and disease risk; 36 (87%) of these studies examined the association between nitrate intake and cancer risk, whereas only 6 studies explored the association of nitrate intake with the risk of diabetes, glaucoma, kidney failure, hypertension, and atherosclerotic vascular disease. The majority of studies used food-frequency questionnaires to assess nitrate intake (n = 43). The median daily nitrate intakes in healthy and patient populations were 108 and 110 mg/d, respectively. We found a significant inverse correlation of nitrate intake with GDP (r = -0.46, P < 0.001) and KOF index (r = -0.31, P = 0.002).

Conclusions

The median estimated daily nitrate intakes by healthy and patient populations were similar, and these values were below the safe upper intake of daily intake (3.7 mg nitrate ion/kg body weight). However, there is considerable heterogeneity in the application of food-composition tables, which may have implications for the accuracy of estimated daily nitrate intake. The association between nitrate intake and risk of cardiometabolic diseases needs further investigation. The protocol for this systematic review has been registered in the PROSPERO database (https://www.crd.york.ac.uk/prospero; CRD number: 42017060354).",2018-10-01 +28451970,An Agile Functional Analysis of Metagenomic Data Using SUPER-FOCUS.,"One of the main goals in metagenomics is to identify the functional profile of a microbial community from unannotated shotgun sequencing reads. Functional annotation is important in biological research because it enables researchers to identify the abundance of functional genes of the organisms present in the sample, answering the question, ""What can the organisms in the sample do?"" Most currently available approaches do not scale with increasing data volumes, which is important because both the number and lengths of the reads provided by sequencing platforms keep increasing. Here, we present SUPER-FOCUS, SUbsystems Profile by databasE Reduction using FOCUS, an agile homology-based approach using a reduced reference database to report the subsystems present in metagenomic datasets and profile their abundances. SUPER-FOCUS was tested with real metagenomes, and the results show that it accurately predicts the subsystems present in the profiled microbial communities, is computationally efficient, and up to 1000 times faster than other tools. SUPER-FOCUS is freely available at http://edwards.sdsu.edu/SUPERFOCUS .",2017-01-01 +26010234,SEGEL: A Web Server for Visualization of Smoking Effects on Human Lung Gene Expression.,"Cigarette smoking is a major cause of death worldwide resulting in over six million deaths per year. Cigarette smoke contains complex mixtures of chemicals that are harmful to nearly all organs of the human body, especially the lungs. Cigarette smoking is considered the major risk factor for many lung diseases, particularly chronic obstructive pulmonary diseases (COPD) and lung cancer. However, the underlying molecular mechanisms of smoking-induced lung injury associated with these lung diseases still remain largely unknown. Expression microarray techniques have been widely applied to detect the effects of smoking on gene expression in different human cells in the lungs. These projects have provided a lot of useful information for researchers to understand the potential molecular mechanism(s) of smoke-induced pathogenesis. However, a user-friendly web server that would allow scientists to fast query these data sets and compare the smoking effects on gene expression across different cells had not yet been established. For that reason, we have integrated eight public expression microarray data sets from trachea epithelial cells, large airway epithelial cells, small airway epithelial cells, and alveolar macrophage into an online web server called SEGEL (Smoking Effects on Gene Expression of Lung). Users can query gene expression patterns across these cells from smokers and nonsmokers by gene symbols, and find the effects of smoking on the gene expression of lungs from this web server. Sex difference in response to smoking is also shown. The relationship between the gene expression and cigarette smoking consumption were calculated and are shown in the server. The current version of SEGEL web server contains 42,400 annotated gene probe sets represented on the Affymetrix Human Genome U133 Plus 2.0 platform. SEGEL will be an invaluable resource for researchers interested in the effects of smoking on gene expression in the lungs. The server also provides useful information for drug development against smoking-related diseases. The SEGEL web server is available online at http://www.chengfeng.info/smoking_database.html.",2015-05-26 +30350712,A qualitative study of the feasibility and acceptability of a smoking cessation program for people living with HIV and emotional dysregulation.,"Despite high rates of co-occurring tobacco use and anxiety among persons living with HIV, evidence-based interventions for these individuals are limited. An existing cognitive-behavioral treatment protocol for smoking cessation and anxiety (Norton, P. J., & Barrera, T. L. (2012). Transdiagnostic versus diagnosis-specific CBT for anxiety disorders: A preliminary randomized controlled noninferiority trial. Depression and Anxiety, 29(10), 874-882. https://doi.org/10.1002/da.21974) was modified to address transdiagnostic constructs, such as anxiety sensitivity, distress tolerance, and depressive symptomatology (Labbe, A. K., Wilner, J. G., Kosiba, J. D., Gonzalez, A., Smits, J. A., Zvolensky, M. J., … O'Cleirigh, C. (2017). Demonstration of an Integrated Treatment for Smoking Cessation and Anxiety Symptoms in People with HIV: A Clinical Case Study. Cognitive and Behavioral Practice, 24(2), 200-214. https://doi.org/10.1016/j.cbpra.2016.03.009). This study examines the feasibility and acceptability of the intervention as determined from qualitative data from structured exit interviews from 10 participants who completed treatment. Results demonstrated that participants were very motivated to quit smoking and enrolled in the program for health-related reasons and to be able to quit. Participants found nearly all the treatment components to be useful for reaching their smoking cessation goal and in managing emotional dysregulation. Last, all participants stated that they would strongly recommend the treatment program. This qualitative study provides initial evidence for the feasibility and acceptability of a modified smoking cessation treatment protocol for HIV+ individuals with anxiety and emotional dysregulation. Future research will focus on evaluating the efficacy of the protocol in a full-scale randomized controlled trial, as well as working to collect qualitative data from participants who discontinue treatment to better understand reasons for treatment attrition.",2018-10-23 +28321144,Summaries of Safety Labeling Changes Approved by the FDA: Boxed Warnings Highlights October - December 2016.,"The FDA's MedWatch program safety labeling changes for boxed warnings are compiled quarterly for drugs and therapeutic biologics where important changes have been made to the safety information. Search of Drug Safety Labeling Changes (SLC) database was conducted on December 31, 2016 for date range ""10/1/2016-12/31/2016"", labeling section ""Boxed Warning"". These and other label changes are searchable in the Drug Safety Labeling Changes (SLC) database, where data are available to the public in downloadable and searchable formats. (Drug Safety Labeling Changes are available at: http://www.accessdata.fda.gov/scripts/cder/safetylabelingchanges/?source=govdelivery&utm_medium=email&utm_source=govdelivery) Boxed warnings are ordinarily used to highlight either: adverse reactions so serious in proportion to the potential benefit from the drug that it is essential that it be considered in assessing the risks and benefits of using the drug; OR serious adverse reactions that can be prevented/reduced in frequency or severity by appropriate use of the drug; OR FDA approved the drug with restrictions to ensure safe use because FDA concluded that the drug can be safely used only if distribution or use is restricted.",2017-02-01 +30212165,"Health Policy Analysis: Applications of Extended Cost-Effectiveness Analysis Methodology in Disease Control Priorities, Third Edition","Multiple criteria are involved in making decisions and prioritizing health policies (Baltussen and Niessen 2006). Potential trade-offs between efficiency and equity are among these criteria and have long been emphasized in the treatment and prevention of human immunodeficiency virus/acquired immune deficiency syndrome (HIV/AIDS) (for example, Cleary 2010; Kaplan and Merson 2002; Verguet 2013). Notably, several mathematical frameworks, including mathematical programming, have proposed incorporating equity into resource allocation decisions in the public sector (Birch and Gafni 1992; Bleichrodt, Diecidue, and Quiggin 2004; Epstein and others 2007; Segall 1989; Stinnett and Paltiel 1996). The worldwide application of benefit-cost analysis provided for “distributional weights” as early as the 1970s. Protection from financial risks associated with health care expenses is emerging as a critical component of national health strategies in many low- and middle-income countries (LMICs). The World Health Organization’s World Health Reports of 1999 and 2000 included the provision of financial risk protection (FRP) as one criterion of good performance for health systems (WHO 1999, 2000). Reducing these financial risks is one objective of health policy instruments such as universal public finance (UPF), that is, full public finance irrespective of whether services are provided privately or publicly. Indeed, out-of-pocket (OOP) medical payments can lead to impoverishment in many countries, with households choosing from among many coping strategies (borrowing from friends and relatives, selling assets) to manage health-related expenses (Kruk, Goldmann, and Galea 2009; van Doorslaer and others 2006; Xu and others 2003). Absent other financing mechanisms, household medical expenditures can often be catastrophic (Wagstaff 2010; Wagstaff and van Doorslaer 2003), defined as exceeding a certain fraction of total household expenditures. A large literature documents the significance of medical impoverishment, but far less is known about the medical conditions responsible for it. Essue and others (2017), in chapter 6 of this volume, review and extend that literature, and Verguet, Memirie, and Norheim (2016) provide a framework for assessing the global burden of medical impoverishment by cause, applying it to a case study of a systematic categorization by disease in Ethiopia. In the literature on medical impoverishment, attenuating such impoverishment is considered a significant objective of health policy, but surprisingly little analysis has been performed of efficient ways to address the problem. The method of Extended cost-effectiveness analysis (ECEA) was initially developed for DCP3 by Verguet, Laxminarayan, and Jamison (2015). Traditionally, economic evaluations of health interventions (cost-effectiveness analyses [CEAs]) have focused on improvements in health and estimated an intervention cost per health gain in dollar per death averted or dollar per disability-adjusted life year (DALY) averted (Jamison and others 2006). However, arguments have been developed for some time that CEA in health should be extended to explicitly consider the multiple dimensions of outcome. Jamison (2009), for example, argued that CEAs can be extended to include FRP on the outcome side and use of scarce health system capacity on the cost side (figure 8.1). Specific methods for advancing this agenda were first proposed and applied in assessments of the consequences of two alternative policies—public finance and improved access to credit—for extending coverage of tuberculosis treatment in India (Verguet, Laxminarayan, and Jamison 2015). That study and other early ECEAs (Verguet 2013; Verguet, Gauvreau, and others 2015; Verguet, Olson, and others 2015) supplemented traditional economic evaluation with evaluation of nonhealth benefits (such as FRP and equity), with the broad objective of providing valuable guidance in the design of health policies. ECEA in this respect builds on the existing frameworks of cost-benefit analysis and cost-consequence analysis that tabulate disaggregated results (Mauskopf and others 1998) and on analytical frameworks that incorporate equity and FRP concerns into economic evaluations (Asaria and others 2015; Brown and Finkelstein 2008; Cookson, Drummond, and Weatherly 2009; Finkelstein and McKnight 2008; Fleurbaey and others 2013; McClellan and Skinner 2006; Sassi, Archard, and Le Grand 2001; Smith 2007, 2013). It enables the design of benefits packages that quantify both health and nonhealth benefits for a given expenditure on specific health policies, based on the quantitative inclusion of how much nonhealth benefits are being bought as well as how much health benefits are being bought with a given investment in an intervention or policy. In this respect, ECEA can answer some of the policy questions raised by the World Health Reports for 2010 and 2013 (WHO 2010, 2013) regarding how to select and sequence the health services to be provided on the path toward universal health coverage. This chapter first describes the ECEA approach and then summarizes findings of ECEAs undertaken in the context of the third edition of Disease Control Priorities (DCP3; http://www.dcp-3.org).",2018-09-14 +28245064,A global Fine-Root Ecology Database to address below-ground challenges in plant ecology.,"Variation and tradeoffs within and among plant traits are increasingly being harnessed by empiricists and modelers to understand and predict ecosystem processes under changing environmental conditions. While fine roots play an important role in ecosystem functioning, fine-root traits are underrepresented in global trait databases. This has hindered efforts to analyze fine-root trait variation and link it with plant function and environmental conditions at a global scale. This Viewpoint addresses the need for a centralized fine-root trait database, and introduces the Fine-Root Ecology Database (FRED, http://roots.ornl.gov) which so far includes > 70 000 observations encompassing a broad range of root traits and also includes associated environmental data. FRED represents a critical step toward improving our understanding of below-ground plant ecology. For example, FRED facilitates the quantification of variation in fine-root traits across root orders, species, biomes, and environmental gradients while also providing a platform for assessments of covariation among root, leaf, and wood traits, the role of fine roots in ecosystem functioning, and the representation of fine roots in terrestrial biosphere models. Continued input of observations into FRED to fill gaps in trait coverage will improve our understanding of changes in fine-root traits across space and time.",2017-02-28 +29342232,Informational and linguistic analysis of large genomic sequence collections via efficient Hadoop cluster algorithms.,"Motivation:Information theoretic and compositional/linguistic analysis of genomes have a central role in bioinformatics, even more so since the associated methodologies are becoming very valuable also for epigenomic and meta-genomic studies. The kernel of those methods is based on the collection of k-mer statistics, i.e. how many times each k-mer in {A,C,G,T}k occurs in a DNA sequence. Although this problem is computationally very simple and efficiently solvable on a conventional computer, the sheer amount of data available now in applications demands to resort to parallel and distributed computing. Indeed, those type of algorithms have been developed to collect k-mer statistics in the realm of genome assembly. However, they are so specialized to this domain that they do not extend easily to the computation of informational and linguistic indices, concurrently on sets of genomes. Results:Following the well-established approach in many disciplines, and with a growing success also in bioinformatics, to resort to MapReduce and Hadoop to deal with 'Big Data' problems, we present KCH, the first set of MapReduce algorithms able to perform concurrently informational and linguistic analysis of large collections of genomic sequences on a Hadoop cluster. The benchmarking of KCH that we provide indicates that it is quite effective and versatile. It is also competitive with respect to the parallel and distributed algorithms highly specialized to k-mer statistics collection for genome assembly problems. In conclusion, KCH is a much needed addition to the growing number of algorithms and tools that use MapReduce for bioinformatics core applications. Availability and implementation:The software, including instructions for running it over Amazon AWS, as well as the datasets are available at http://www.di-srv.unisa.it/KCH. Contact:umberto.ferraro@uniroma1.it. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +30026881,A Panel of MicroRNA Signature as a Tool for Predicting Survival of Patients with Urothelial Carcinoma of the Bladder.,"

Introduction and objectives

MicroRNA (miRNA) expression is altered in urologic malignancies, including urothelial carcinoma of the bladder (UCB). Individual miRNAs have been shown to modulate multiple signaling pathways that contribute to BC. To identify a panel of miRNA signature that can predict aggressive phenotype from normal nonaggressive counterpart using miRNA expression levels and to assess the prognostic value of this specific miRNA markers in patients with UCB.

Methods

To determine candidate miRNAs as prognostic biomarkers for dividing aggressive type of UCB, miRNA expression was profiled in patients' samples with an aggressive phenotype or nonaggressive phenotype using 3D-Gene miRNA labeling kit (Toray, Japan). To create a prognostic index model, we used the panel of 9-miRNA signature based on Cancer Genome Atlas (TCGA) data portal (TCGA Data Portal (https://tcgadata.nci.nih.gov/tcga/tcgaHome2.jsp)). miRNA expression data and corresponding clinical data, including outcome and staging information of 84 UCB patients, were obtained. The Kaplan-Meier and log-rank test were performed to quantify the survival functions in two groups.

Results

Deregulation of nine miRNAs (hsa-miR-99a-5p, hsa-miR-100-5p, hsa-miR-125b-5p, hsa-miR-145-5p, hsa-miR-4324, hsa-miR-34b-5p, hsa-miR-29c-3p, hsa-miR-135a-3p, and hsa-miR-33b-3p) was determined in UCB patients with aggressive phenotype compared with nonaggressive subject. To validate the prognostic power of the nine-signature miRNAs using the TCGA dataset of bladder cancer, the survival status and tumor miRNA expression of all 84 TCGA UCB patients were ranked according to the prognostic score values. Of nine miRNAs, six were associated with high risk (hsa-miR-99a-5p, hsa-miR-100-5p, hsa-miR-125b-5p, hsa-miR-4324, hsa-miR-34b-5p, and hsa-miR-135a-3p) and three were shown to be protective (hsa-miR-145-5p, hsa-miR-29c-3p, and hsa-miR-33b-3p). Patients with the high-risk miRNA signature exhibited poorer OS than patients expressing the low-risk miRNA profile (HR = 7.05, p < 0.001).

Conclusions

The miRNA array identified nine dysregulated miRNAs from clinical samples. This panel of nine-miRNA signature provides predictive and prognostic value of patients with UCB.",2018-06-20 +29877995,Considering Spine Surgery: A Web-Based Calculator for Communicating Estimates of Personalized Treatment Outcomes.,"

Study design

Prospective evaluation of an informational web-based calculator for communicating estimates of personalized treatment outcomes.

Objective

To evaluate the usability, effectiveness in communicating benefits and risks, and impact on decision quality of a calculator tool for patients with intervertebral disc herniations, spinal stenosis, and degenerative spondylolisthesis who are deciding between surgical and nonsurgical treatments.

Summary of background data

The decision to have back surgery is preference-sensitive and warrants shared decision making. However, more patient-specific, individualized tools for presenting clinical evidence on treatment outcomes are needed.

Methods

Using Spine Patient Outcomes Research Trial data, prediction models were designed and integrated into a web-based calculator tool: http://spinesurgerycalc.dartmouth.edu/calc/. Consumer Reports subscribers with back-related pain were invited to use the calculator via email, and patient participants were recruited to use the calculator in a prospective manner following an initial appointment at participating spine centers. Participants completed questionnaires before and after using the calculator. We randomly assigned previously validated questions that tested knowledge about the treatment options to be asked either before or after viewing the calculator.

Results

A total of 1256 consumer reports subscribers and 68 patient participants completed the calculator and questionnaires. Knowledge scores were higher in the postcalculator group compared to the precalculator group, indicating that calculator usage successfully informed users. Decisional conflict was lower when measured following calculator use, suggesting the calculator was beneficial in the decision-making process. Participants generally found the tool helpful and easy to use.

Conclusion

Although the calculator is not a comprehensive decision aid, it does focus on communicating individualized risks and benefits for treatment options. Moreover, it appears to be helpful in achieving the goals of more traditional shared decision-making tools. It not only improved knowledge scores but also improved other aspects of decision quality.

Level of evidence

2.",2018-12-01 +26733451,VIRALpro: a tool to identify viral capsid and tail sequences.,"

Motivation

Not only sequence data continue to outpace annotation information, but also the problem is further exacerbated when organisms are underrepresented in the annotation databases. This is the case with non-human-pathogenic viruses which occur frequently in metagenomic projects. Thus, there is a need for tools capable of detecting and classifying viral sequences.

Results

We describe VIRALpro a new effective tool for identifying capsid and tail protein sequences, which are the cornerstones toward viral sequence annotation and viral genome classification.

Availability and implementation

The data, software and corresponding web server are available from http://scratch.proteomics.ics.uci.edu as part of the SCRATCH suite.

Contact

clovis.galiez@inria.fr or pfbaldi@uci.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-05 +29228504,Dynamic borrowing through empirical power priors that control type I error.,"In order for historical data to be considered for inclusion in the design and analysis of clinical trials, prospective rules are essential. Incorporation of historical data may be of particular interest in the case of small populations where available data is scarce and heterogeneity is not as well understood, and thus conventional methods for evidence synthesis might fall short. The concept of power priors can be particularly useful for borrowing evidence from a single historical study. Power priors employ a parameter γ [ 0 , 1 ] that quantifies the heterogeneity between the historical study and the new study. However, the possibility of borrowing data from a historical trial will usually be associated with an inflation of the type I error. We suggest a new, simple method of estimating the power parameter suitable for the case when only one historical dataset is available. The method is based on predictive distributions and parameterized in such a way that the type I error can be controlled by calibrating to the degree of similarity between the new and historical data. The method is demonstrated for normal responses in a one or two group setting. Generalization to other models is straightforward.",2017-12-11 +28263070,Param-Medic: A Tool for Improving MS/MS Database Search Yield by Optimizing Parameter Settings.,"In shotgun proteomics analysis, user-specified parameters are critical to database search performance and therefore to the yield of confident peptide-spectrum matches (PSMs). Two of the most important parameters are related to the accuracy of the mass spectrometer. Precursor mass tolerance defines the peptide candidates considered for each spectrum. Fragment mass tolerance or bin size determines how close observed and theoretical fragments must be to be considered a match. For either of these two parameters, too wide a setting yields randomly high-scoring false PSMs, whereas too narrow a setting erroneously excludes true PSMs, in both cases, lowering the yield of peptides detected at a given false discovery rate. We describe a strategy for inferring optimal search parameters by assembling and analyzing pairs of spectra that are likely to have been generated by the same peptide ion to infer precursor and fragment mass error. This strategy does not rely on a database search, making it usable in a wide variety of settings. In our experiments on data from a variety of instruments including Orbitrap and Q-TOF acquisitions, this strategy yields more high-confidence PSMs than using settings based on instrument defaults or determined by experts. Param-Medic is open-source and cross-platform. It is available as a standalone tool ( http://noble.gs.washington.edu/proj/param-medic/ ) and has been integrated into the Crux proteomics toolkit ( http://crux.ms ), providing automatic parameter selection for the Comet and Tide search engines.",2017-03-13 +,P16.03UNIFYING CLINICAL ROUTINE BRAIN TUMOR MR-SPECTROSCOPY AND MR-IMAGE ANALYSIS: NOVEL JMRUI PLUG-INS FOR BRAIN TUMOR ANALYSIS,"INTRODUCTION: Magnetic resonance imaging and spectroscopy are the neuroradiological methods of first choice for the diagnostics of de novo brain tumors, the evaluation tumor response to therapy and the tumor progression. Whereas (i.) T2 and post contrast T1-weighted MRI gives information on the brain tumors' anatomy and integrity of the blood brain barrier, (ii.) perfusion weighted MRI on the perfusion state (important to study processes like neo-angiogenis, apoptosis and necrosis), (iii.) diffusion weighted MRI (evaluation for cellular density, and white matter tract integrity), information on the tumors' metabolism is obtained by MR-spectroscopy. Lactate gives information on the ischemic state, choline/ mobile lipids on the membrane turnover and necrosis/apoptosis. The lactate, as present in many high grade glioma, is an indicator an ischemic condition which can ge regarded as an indicator for tumors resistance to radiation therapy. However, despite the fact that MRS gives valuable additional information to MRI, the available software for clinical routine analysis of MRS data together with MRI data is far from ideal. This abstracts reports on the software, which is currently being developed within an EU-funded Marie Curie Initial Training Network (ITN) (http://www.transact-itn.eu/) named TRANSACT which stands for “Transforming Magnetic Resonance Spectroscopy into a Clinical Tool”. METHODS: Basis for the developed software within the TRANSACT-project is the software package jMRUI, which was developed during former EU-funded projects, and targeted mainly on scientific users of MR-spectroscopy; the current TRANSACT project however focuses on the develop software for clinicians. The novel plug-ins for jMRUI were entirely developed in JAVA. RESULTS: The following important clinical work flow related requirements were incorporated: (a.) full support of DICOM image and spectroscopy data format; (b.) a fully featured integrated patient/study/series examination browser; (c.) automatic projection of spectral voxels within automatically loaded reference image stacks; (d.) DICOM transfer using the standard DICOM network data transfer protocol; (e.) DICOM reporting of spectroscopy results, and possibility to transfer of these results into PACS systems; (f.) absolute quantification of single voxel spectra; (g.) advanced display of spectroscopic SVS and MRSI data; (h.) automated fast metabolite image generation (i.) easy correlation of numeric image data and parametric spectral data. CONCLUSION: An intuitive software has been developed for a clinical setting that allows the simutaneous study of MRI/MRS brain tumor data.",2014-09-01 +27866912,Investigation of candidate genes for osteoarthritis based on gene expression profiles.,"

Objective

To explore the mechanism of osteoarthritis (OA) and provide valid biological information for further investigation.

Methods

Gene expression profile of GSE46750 was downloaded from Gene Expression Omnibus database. The Linear Models for Microarray Data (limma) package (Bioconductor project, http://www.bioconductor.org/packages/release/bioc/html/limma.html) was used to identify differentially expressed genes (DEGs) in inflamed OA samples. Gene Ontology function enrichment analysis and Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways enrichment analysis of DEGs were performed based on Database for Annotation, Visualization and Integrated Discovery data, and protein-protein interaction (PPI) network was constructed based on the Search Tool for the Retrieval of Interacting Genes/Proteins database. Regulatory network was screened based on Encyclopedia of DNA Elements. Molecular Complex Detection was used for sub-network screening. Two sub-networks with highest node degree were integrated with transcriptional regulatory network and KEGG functional enrichment analysis was processed for 2 modules.

Results

In total, 401 up- and 196 down-regulated DEGs were obtained. Up-regulated DEGs were involved in inflammatory response, while down-regulated DEGs were involved in cell cycle. PPI network with 2392 protein interactions was constructed. Moreover, 10 genes including Interleukin 6 (IL6) and Aurora B kinase (AURKB) were found to be outstanding in PPI network. There are 214 up- and 8 down-regulated transcription factor (TF)-target pairs in the TF regulatory network. Module 1 had TFs including SPI1, PRDM1, and FOS, while module 2 contained FOSL1. The nodes in module 1 were enriched in chemokine signaling pathway, while the nodes in module 2 were mainly enriched in cell cycle.

Conclusion

The screened DEGs including IL6, AGT, and AURKB might be potential biomarkers for gene therapy for OA by being regulated by TFs such as FOS and SPI1, and participating in the cell cycle and cytokine-cytokine receptor interaction pathway.",2016-11-18 +22792232,IMG/M-HMP: a metagenome comparative analysis system for the Human Microbiome Project.,"The Integrated Microbial Genomes and Metagenomes (IMG/M) resource is a data management system that supports the analysis of sequence data from microbial communities in the integrated context of all publicly available draft and complete genomes from the three domains of life as well as a large number of plasmids and viruses. IMG/M currently contains thousands of genomes and metagenome samples with billions of genes. IMG/M-HMP is an IMG/M data mart serving the US National Institutes of Health (NIH) Human Microbiome Project (HMP), focussed on HMP generated metagenome datasets, and is one of the central resources provided from the HMP Data Analysis and Coordination Center (DACC). IMG/M-HMP is available at http://www.hmpdacc-resources.org/imgm_hmp/.",2012-07-05 +31213465,Chromosome 12p Amplification in Triple-Negative/BRCA1-Mutated Breast Cancer Associates with Emergence of Docetaxel Resistance and Carboplatin Sensitivity.,"Taxanes are the mainstay of treatment in triple-negative breast cancer (TNBC), with de novo and acquired resistance limiting patient's survival. To investigate the genetic basis of docetaxel resistance in TNBC, exome sequencing was performed on matched TNBC patient-derived xenografts (PDX) sensitive to docetaxel and their counterparts that developed resistance in vivo upon continuous drug exposure. Most mutations, small insertions/deletions, and copy number alterations detected in the initial TNBC human metastatic samples were maintained after serial passages in mice and emergence of resistance. We identified a chromosomal amplification of chr12p in a human BRCA1-mutated metastatic sample and the derived chemoresistant PDX, but not in the matched docetaxel-sensitive PDX tumor. Chr12p amplification was validated in a second pair of docetaxel-sensitive/resistant BRCA1-mutated PDXs and after short-term docetaxel treatment in several TNBC/BRCA1-mutated PDXs and cell lines, as well as during metastatic recurrence in a patient with BRCA1-mutated breast cancer who had progressed on docetaxel treatment. Analysis of clinical data indicates an association between chr12p amplification and patients with TNBC/basal-like breast cancer, a BRCA1 mutational signature, and poor survival after chemotherapy. Detection of chr12p amplification in a cohort of TNBC PDX models was associated with an improved response to carboplatin. Our findings reveal tumor clonal dynamics during chemotherapy treatments and suggest that a preexisting population harboring chr12p amplification is associated with the emergence of docetaxel resistance and carboplatin responsiveness in TNBC/BRCA1-mutated tumors. SIGNIFICANCE: Chr12p copy number gains indicate rapid emergence of resistance to docetaxel and increased sensitivity to carboplatin, therefore sequential docetaxel/carboplatin treatment could improve survival in TNBC/BRCA1 patients. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/79/16/4258/F1.large.jpg.",2019-06-18 +,Cholesterol and vitamin D content of eggs in the U.S. retail market,"Nationwide sampling in the U.S. of whole large eggs, to update values in the United States Department of Agriculture (USDA) National Nutrient Database for Standard Reference (SR) (http://www.ars.usda.gov/nutrientdata), was conducted in 2000–2001 and in 2010. Retail cartons of large eggs were obtained from 12 supermarket locations using statistical sampling plans based on market share and census data. Cholesterol was analyzed at three laboratories using standard methods involving gas chromatography of the saponified total lipid extract. Vitamin D3 and 25-OH-vitamin D3 (2010 samples only) were analyzed by HPLC and UHPLC–MS/MS. Quality control materials were included to validate the accuracy and precision of measurements. The mean cholesterol content decreased 51mg/100g (12%; p<0.0001), from 423mg/100g in 2000–2001 to 372 (range 344–405) in 2010. Over the same period, average vitamin D3 increased by 60%, to 2.05μg [80IU]/100g (range 0.97–12.1). Samples from 2010 contained 0.65μg 25-OH-D3/100g (range 0.43–1.32). The disparate vitamin D (and cholesterol) content of eggs sampled from different locations may reflect industry efforts to modify poultry feed or supplements to affect the nutrient profile of eggs. Cholesterol and vitamin D3 data from this work were included in SR release 23, and support food consumption surveys, food and nutrition policy, and consumer education.",2013-03-01 +29847831,"Cognitive-Behavioral Analysis System of Psychotherapy, Drug, or Their Combination for Persistent Depressive Disorder: Personalizing the Treatment Choice Using Individual Participant Data Network Metaregression.","

Background

Persistent depressive disorder is prevalent, disabling, and often difficult to treat. The cognitive-behavioral analysis system of psychotherapy (CBASP) is the only psychotherapy specifically developed for its treatment. However, we do not know which of CBASP, antidepressant pharmacotherapy, or their combination is the most efficacious and for which types of patients. This study aims to present personalized prediction models to facilitate shared decision-making in treatment choices to match patients' characteristics and preferences based on individual participant data network metaregression.

Methods

We conducted a comprehensive search for randomized controlled trials comparing any two of CBASP, pharmacotherapy, or their combination and sought individual participant data from identified trials. The primary outcomes were reduction in depressive symptom severity for efficacy and dropouts due to any reason for treatment acceptability.

Results

All 3 identified studies (1,036 participants) were included in the present analyses. On average, the combination therapy showed significant superiority over both monotherapies in terms of efficacy and acceptability, while the latter 2 treatments showed essentially similar results. Baseline depression, anxiety, prior pharmacotherapy, age, and depression subtypes moderated their relative efficacy, which indicated that for certain subgroups of patients either drug therapy or CBASP alone was a recommendable treatment option that is less costly, may have fewer adverse effects and match an individual patient's preferences. An interactive web app (https://kokoro.med.kyoto-u.ac.jp/CBASP/prediction/) shows the predicted disease course for all possible combinations of patient characteristics.

Conclusions

Individual participant data network metaregression enables treatment recommendations based on individual patient characteristics.",2018-05-30 +29628320,Tumor Microenvironment Properties are Associated With Low CD68-positive Cell Infiltration and Favorable Disease-free Survival in EGFR-mutant Lung Adenocarcinoma.,"BACKGROUND:The benefits of immune checkpoint inhibitors for first-line treatment in patients with lung adenocarcinoma harboring EGFR mutations are unclear. The effects of ICIs depend on the tumor microenvironment (TME). Differences in TME properties between mutant and wild-type EGFR have not been fully characterized. PATIENTS AND METHODS:We collected 105 surgically resected (50 EGFR mutated and 55 EGFR wild-type), treatment-naïve lung adenocarcinoma tissues with clinical data to investigate the landscape and compartmentalization of tumor-infiltrating immune cells with respect to EGFR status by immunohistochemistry. The normalized FPKM values of data for 531 patients were obtained from The Cancer Genome Atlas (TCGA) Data Portal (https://portal.gdc.cancer.gov/). RESULTS:CD68-positive cells within the tumor niche exhibited more intensive infiltration in wild-type EGFR than in mutations, and was related to lymph node invasion. In the RNA-Seq analysis, MMP9 and VEGFA showed higher levels in wild-type EGFR than in mutant cases. The EGFR mutation independently predicted a favorable disease-free survival. CONCLUSION:The CD68-positive cells play a crucial role in discriminating the TME between different EGFR statuses.",2018-03-17 +31144100,The potential of cardiac rehabilitation as a method of suppressing abdominal aortic aneurysm expansion: a pilot study.,"This study is a prospective evaluation of the effectiveness of cardiac rehabilitation (CR) in terms of clinical outcomes for small abdominal aortic aneurysms (AAA) that were previously reported in a retrospective cohort study. We conducted a prospective non-randomized trial on patients with small AAA (N = 40; mean age 75.0 ± 6.6 years). Patients were enrolled into one of two groups, rehabilitation (CR) or non-rehabilitation (non-CR) group. Only CR group participated in a supervised-CR program including bicycle ergometer for 150 days. The AAA expansion rate and the risk of AAA repair were compared between two groups. We also researched the relationship between AAA expansion rate and body composition, blood IL-6 and TGFβ1 levels. The CR (N = 15) and non-CR groups (N = 25) were comparable in terms their baseline data. The CR group had a significantly smaller change in the maximal AAA size (- 1.3 ± 2.4 mm/years) compared to the non-CR group (2.0 ± 3.6 mm/years) (p < 0.01). The IL-6, and TGFβ1 levels were unrelated to the changes in AAA size. There was mild positive correlation between the change in systolic blood pressure from rest to exercise and the AAA expansion rate (p = 0.06). The risk of AAA repair after 12 months was lower in the CR group compared to the non-CR group (0% vs. 28%, respectively). CR in patients with small AAA significantly suppressed AAA expansion and resulted in a lowered risk of AAA repair.Clinical trial Trial name: The study of the profitability and protective effect of cardiac rehabilitation on abdominal aortic aneurysm. Number: UMIN000028237. UTL: https://upload.umin.ac.jp/cgi-open-bin/ctr_e/ctr_view.cgi?recptno=R0000323.",2019-05-29 +30259154,Hubei poty-like virus 1 is likely an interspecies recombinant of sugarcane mosaic virus and putative bean yellow mosaic virus.,"In 2016, Shi et al. (Nature 540:539-543. https://doi.org/10.1038/nature20167 2016) reported Hubei poty-like virus 1 (HuPLV1) to be a new member of the family Potyviridae. At that time, its polyprotein shared the highest sequence identity (80%) with sugarcane mosaic virus (SCMV). A year later, two isolates of SCMV from Canna sp. whose sequences were submitted to the GenBank database showed 91% identity to the HuPLV1 polyprotein sequence. Based on the species demarcation criteria for the family Potyviridae, HuPLV1 should possibly be considered an isolate of SCMV. To resolve this taxonomical inconsistency, we carried out a phylogenetic and recombination analysis and found that HuPLV1 is an interspecies recombinant of SCMV and bean yellow mosaic virus (BYMV).",2018-09-27 +30990466,Single cell transcriptomics based-MacSpectrum reveals novel macrophage activation signatures in diseases. ,"Adipose tissue macrophages (ATM) are crucial for maintaining adipose tissue homeostasis and mediating obesity-induced metabolic abnormalities, including prediabetic conditions and type 2 diabetes mellitus. Despite their key functions in regulating adipose tissue metabolic and immunologic homeostasis under normal and obese conditions, a high-resolution transcriptome annotation system that can capture ATM multifaceted activation profiles has not yet been developed. This is primarily attributed to the complexity of their differentiation/activation process in adipose tissue and their diverse activation profiles in response to microenvironmental cues. Although the concept of multifaceted macrophage action is well-accepted, no current model precisely depicts their dynamically regulated in vivo features. To address this knowledge gap, we generated single-cell transcriptome data from primary bone marrow-derived macrophages under polarizing and non-polarizing conditions to develop new high-resolution algorithms. The outcome was creation of a two-index platform, MacSpectrum (https://macspectrum.uconn.edu), that enables comprehensive high-resolution mapping of macrophage activation states from diverse mixed cell populations. MacSpectrum captured dynamic transitions of macrophage subpopulations under both in vitro and in vivo conditions. Importantly, MacSpectrum revealed unique ""signature"" gene sets in ATMs and circulating monocytes that displayed significant correlation with BMI and homeostasis model assessment of insulin resistance (HOMA-IR) in obese human patients. Thus, MacSpectrum provides unprecedented resolution to decode macrophage heterogeneity and will open new areas of clinical translation.",2019-04-16 +30734933,Targeted Proteomics Comes to the Benchside and the Bedside: Is it Ready for Us?,"While mass spectrometry (MS)-based quantification of small molecules has been successfully used for decades, targeted MS has only recently been used by the proteomics community to investigate clinical questions such as biomarker verification and validation. Targeted MS holds the promise of a paradigm shift in the quantitative determination of proteins. Nevertheless, targeted quantitative proteomics requires improvisation in making sample processing, instruments, and data analysis more accessible. In the backdrop of the genomic era reaching its zenith, certain questions arise: is the proteomic era about to come? If we are at the beginning of a new future for protein quantification, are we prepared to incorporate targeted proteomics at the benchside for basic research and at the bedside for the good of patients? Here, an overview of the knowledge required to perform targeted proteomics as well as its applications is provided. A special emphasis is placed on upcoming areas such as peptidomics, proteoform research, and mass spectrometry imaging, where the utilization of targeted proteomics is expected to bring forth new avenues. The limitations associated with the acceptance of this technique for mainstream usage are also highlighted. Also see the video abstract here https://youtu.be/mieB47B8gZw.",2019-02-08 +26838676,A visual and curatorial approach to clinical variant prioritization and disease gene discovery in genome-wide diagnostics.,"

Background

Genome-wide data are increasingly important in the clinical evaluation of human disease. However, the large number of variants observed in individual patients challenges the efficiency and accuracy of diagnostic review. Recent work has shown that systematic integration of clinical phenotype data with genotype information can improve diagnostic workflows and prioritization of filtered rare variants. We have developed visually interactive, analytically transparent analysis software that leverages existing disease catalogs, such as the Online Mendelian Inheritance in Man database (OMIM) and the Human Phenotype Ontology (HPO), to integrate patient phenotype and variant data into ranked diagnostic alternatives.

Methods

Our tool, ""OMIM Explorer"" ( http://www.omimexplorer.com ), extends the biomedical application of semantic similarity methods beyond those reported in previous studies. The tool also provides a simple interface for translating free-text clinical notes into HPO terms, enabling clinical providers and geneticists to contribute phenotypes to the diagnostic process. The visual approach uses semantic similarity with multidimensional scaling to collapse high-dimensional phenotype and genotype data from an individual into a graphical format that contextualizes the patient within a low-dimensional disease map. The map proposes a differential diagnosis and algorithmically suggests potential alternatives for phenotype queries--in essence, generating a computationally assisted differential diagnosis informed by the individual's personal genome. Visual interactivity allows the user to filter and update variant rankings by interacting with intermediate results. The tool also implements an adaptive approach for disease gene discovery based on patient phenotypes.

Results

We retrospectively analyzed pilot cohort data from the Baylor Miraca Genetics Laboratory, demonstrating performance of the tool and workflow in the re-analysis of clinical exomes. Our tool assigned to clinically reported variants a median rank of 2, placing causal variants in the top 1 % of filtered candidates across the 47 cohort cases with reported molecular diagnoses of exome variants in OMIM Morbidmap genes. Our tool outperformed Phen-Gen, eXtasy, PhenIX, PHIVE, and hiPHIVE in the prioritization of these clinically reported variants.

Conclusions

Our integrative paradigm can improve efficiency and, potentially, the quality of genomic medicine by more effectively utilizing available phenotype information, catalog data, and genomic knowledge.",2016-02-02 +30467523,gganatogram: An R package for modular visualisation of anatograms and tissues based on ggplot2.,"Displaying data onto anatomical structures is a convenient technique to quickly observe tissue related information. However, drawing tissues is a complex task that requires both expertise in anatomy and the arts. While web based applications exist for displaying gene expression on anatograms, other non-genetic disciplines lack similar tools. Moreover, web based tools often lack the modularity associated with packages in programming languages, such as R. Here I present gganatogram, an R package used to plot modular species anatograms based on a combination of the graphical grammar of ggplot2 and the publicly available anatograms from the Expression Atlas. This combination allows for quick and easy, modular, and reproducible generation of anatograms. Using only one command and a data frame with tissue name, group, colour, and  value, this tool enables the user to visualise specific human and mouse tissues with desired colours, grouped by a variable, or displaying a desired value, such as gene-expression, pharmacokinetics, or bacterial load across selected tissues. gganatogram consists of 5 highly annotated organisms, male/female human/mouse, and a cell anatogram. It further consists of 24 other less annotated organisms from the animal and plant kingdom. I hope that this tool will be useful by the wider community in biological sciences. Community members are welcome to submit additional anatograms, which can be incorporated into the package. A stable version gganatogram has been deposited to neuroconductor, and a development version can be found on  github/jespermaag/gganatogram. An interactive shiny app of gganatogram can be found on  https://jespermaag.shinyapps.io/gganatogram/, which allows for non-R users to create anatograms.",2018-09-28 +29390973,MPBoot: fast phylogenetic maximum parsimony tree inference and bootstrap approximation.,"

Background

The nonparametric bootstrap is widely used to measure the branch support of phylogenetic trees. However, bootstrapping is computationally expensive and remains a bottleneck in phylogenetic analyses. Recently, an ultrafast bootstrap approximation (UFBoot) approach was proposed for maximum likelihood analyses. However, such an approach is still missing for maximum parsimony.

Results

To close this gap we present MPBoot, an adaptation and extension of UFBoot to compute branch supports under the maximum parsimony principle. MPBoot works for both uniform and non-uniform cost matrices. Our analyses on biological DNA and protein showed that under uniform cost matrices, MPBoot runs on average 4.7 (DNA) to 7 times (protein data) (range: 1.2-20.7) faster than the standard parsimony bootstrap implemented in PAUP*; but 1.6 (DNA) to 4.1 times (protein data) slower than the standard bootstrap with a fast search routine in TNT (fast-TNT). However, for non-uniform cost matrices MPBoot is 5 (DNA) to 13 times (protein data) (range:0.3-63.9) faster than fast-TNT. We note that MPBoot achieves better scores more frequently than PAUP* and fast-TNT. However, this effect is less pronounced if an intensive but slower search in TNT is invoked. Moreover, experiments on large-scale simulated data show that while both PAUP* and TNT bootstrap estimates are too conservative, MPBoot bootstrap estimates appear more unbiased.

Conclusions

MPBoot provides an efficient alternative to the standard maximum parsimony bootstrap procedure. It shows favorable performance in terms of run time, the capability of finding a maximum parsimony tree, and high bootstrap accuracy on simulated as well as empirical data sets. MPBoot is easy-to-use, open-source and available at http://www.cibiv.at/software/mpboot .",2018-02-02 +22616108,ApiNATOMY: a novel toolkit for visualizing multiscale anatomy schematics with phenotype-related information.,"A significant proportion of biomedical resources carries information that cross references to anatomical structures across multiple scales. To improve the visualization of such resources in their anatomical context, we developed an automated methodology that produces anatomy schematics in a consistent manner,and provides for the overlay of anatomy-related resource information onto the same diagram. This methodology, called ApiNATOMY, draws upon the topology of ontology graphs to automatically lay out treemaps representing body parts as well as semantic metadata linking to such ontologies. More generally, ApiNATOMY treemaps provide an efficient and manageable way to visualize large biomedical ontologies in a meaningful and consistent manner. In the anatomy domain, such treemaps will allow epidemiologists, clinicians, and biomedical scientists to review, and interact with, anatomically aggregated heterogeneous data and model resources. Such an approach supports the visual identification of functional relations between anatomically colocalized resources that may not be immediately amenable to automation by ontology-based inferencing. We also describe the application of ApiNATOMY schematics to integrate, and add value to, human phenotype-related information—results are found at http://apinatomy.org. The long-term goal for the ApiNATOMY toolkit is to support clinical and scientific graphical user interfaces and dashboards for biomedical resource management and data analytics.",2012-05-01 +29990104,A Robust 3D-2D Interactive Tool for Scene Segmentation and Annotation.,"Recent advances of 3D acquisition devices have enabled large-scale acquisition of 3D scene data. Such data, if completely and well annotated, can serve as useful ingredients for a wide spectrum of computer vision and graphics works such as data-driven modeling and scene understanding, object detection and recognition. However, annotating a vast amount of 3D scene data remains challenging due to the lack of an effective tool and/or the complexity of 3D scenes (e.g. clutter, varying illumination conditions). This paper aims to build a robust annotation tool that effectively and conveniently enables the segmentation and annotation of massive 3D data. Our tool works by coupling 2D and 3D information via an interactive framework, through which users can provide high-level semantic annotation for objects. We have experimented our tool and found that a typical indoor scene could be well segmented and annotated in less than 30 minutes by using the tool, as opposed to a few hours if done manually. Along with the tool, we created a dataset of over a hundred 3D scenes associated with complete annotations using our tool. Both the tool and dataset will be available at http://scenenn.net.",2017-11-20 +23270511,A genomic scale map of genetic diversity in Trypanosoma cruzi.,"

Background

Trypanosoma cruzi, the causal agent of Chagas Disease, affects more than 16 million people in Latin America. The clinical outcome of the disease results from a complex interplay between environmental factors and the genetic background of both the human host and the parasite. However, knowledge of the genetic diversity of the parasite, is currently limited to a number of highly studied loci. The availability of a number of genomes from different evolutionary lineages of T. cruzi provides an unprecedented opportunity to look at the genetic diversity of the parasite at a genomic scale.

Results

Using a bioinformatic strategy, we have clustered T. cruzi sequence data available in the public domain and obtained multiple sequence alignments in which one or two alleles from the reference CL-Brener were included. These data covers 4 major evolutionary lineages (DTUs): TcI, TcII, TcIII, and the hybrid TcVI. Using these set of alignments we have identified 288,957 high quality single nucleotide polymorphisms and 1,480 indels. In a reduced re-sequencing study we were able to validate ~ 97% of high-quality SNPs identified in 47 loci. Analysis of how these changes affect encoded protein products showed a 0.77 ratio of synonymous to non-synonymous changes in the T. cruzi genome. We observed 113 changes that introduce or remove a stop codon, some causing significant functional changes, and a number of tri-allelic and tetra-allelic SNPs that could be exploited in strain typing assays. Based on an analysis of the observed nucleotide diversity we show that the T. cruzi genome contains a core set of genes that are under apparent purifying selection. Interestingly, orthologs of known druggable targets show statistically significant lower nucleotide diversity values.

Conclusions

This study provides the first look at the genetic diversity of T. cruzi at a genomic scale. The analysis covers an estimated ~ 60% of the genetic diversity present in the population, providing an essential resource for future studies on the development of new drugs and diagnostics, for Chagas Disease. These data is available through the TcSNP database (http://snps.tcruzi.org).",2012-12-27 +28011765,RADER: a RApid DEcoy Retriever to facilitate decoy based assessment of virtual screening.,"

Summary

Evaluation of the capacity for separating actives from challenging decoys is a crucial metric of performance related to molecular docking or a virtual screening workflow. The Directory of Useful Decoys (DUD) and its enhanced version (DUD-E) provide a benchmark for molecular docking, although they only contain a limited set of decoys for limited targets. DecoyFinder was released to compensate the limitations of DUD or DUD-E for building target-specific decoy sets. However, desirable query template design, generation of multiple decoy sets of similar quality, and computational speed remain bottlenecks, particularly when the numbers of queried actives and retrieved decoys increases to hundreds or more. Here, we developed a program suite called RApid DEcoy Retriever (RADER) to facilitate the decoy-based assessment of virtual screening. This program adopts a novel database-management regime that supports rapid and large-scale retrieval of decoys, enables high portability of databases, and provides multifaceted options for designing initial query templates from a large number of active ligands and generating subtle decoy sets. RADER provides two operational modes: as a command-line tool and on a web server. Validation of the performance and efficiency of RADER was also conducted and is described.

Availability and implementation

RADER web server and a local version are freely available at http://rcidm.org/rader/ .

Contact

lingwang@scut.edu.cn or went@scut.edu.cn .

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +27436239,GlycoGAIT: A web database to browse glycogenes and lectins under gastric inflammatory diseases.,"The perplexing nature of dynamic glycosylation modification plays imperative role in determining the regulatory role of key glycoconjugates involved in immune system. Systematic analysis of change in expression pattern of glycogenes and lectins can bring in a comprehensive understanding of genetic basis of the glycobiological changes occurring in pathological condition. Advancement in the field of glycobiology has capacitated the process of linking gene expression changes of glycogenes with its biological function. This instigated us to systematically analyze changes in expression patterns focusing on glycome genomics under diverse gastrointestinal immune dysfunction background. To necessitate this, as a pilot project, we carefully integrated several publically available databases to construct a glycosylation process associated gene set as well as public expression microarray data associated with gastrointestinal infections into an online database called Glycosylation and Gut Associated Immune Tolerance (GlycoGAIT). Currently the database comprises of 548 well characterized genes belonging to glycogenes and lectins along with gene expression data obtained from human biopsy samples under both H. pylori infection and inflammatory bowel disease (IBD) condition. The user-friendly interface enables the users to quickly compare and interpret changes in expression patterns of glycome genomics under different gut associated inflammatory conditions. The database is available online at: https://apps.connexios.com/glycogait/.",2016-07-18 +29106446,FunImageJ: a Lisp framework for scientific image processing.,"

Summary

FunImageJ is a Lisp framework for scientific image processing built upon the ImageJ software ecosystem. The framework provides a natural functional-style for programming, while accounting for the performance requirements necessary in big data processing commonly encountered in biological image analysis.

Availability and implementation

Freely available plugin to Fiji (http://fiji.sc/#download). Installation and use instructions available at http://imagej.net/FunImageJ.

Contact

kharrington@uidaho.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +30182778,A nanoinformatics decision support tool for the virtual screening of gold nanoparticle cellular association using protein corona fingerprints.,"The increasing use of nanoparticles (NPs) in a wide range of consumer and industrial applications has necessitated significant effort to address the challenge of characterizing and quantifying the underlying nanostructure - biological response relationships to ensure that these novel materials can be exploited responsibly and safely. Such efforts demand reliable experimental data not only in terms of the biological dose-response, but also regarding the physicochemical properties of the NPs and their interaction with the biological environment. The latter has not been extensively studied, as a large surface to bind biological macromolecules is a unique feature of NPs that is not relevant for chemicals or pharmaceuticals, and thus only limited data have been reported in the literature quantifying the protein corona formed when NPs interact with a biological medium and linking this with NP cellular association/uptake. In this work we report the development of a predictive model for the assessment of the biological response (cellular association, which can include both internalized NPs and those attached to the cell surface) of surface-modified gold NPs, based on their physicochemical properties and protein corona fingerprints, utilizing a dataset of 105 unique NPs. Cellular association was chosen as the end-point for the original experimental study due to its relevance to inflammatory responses, biodistribution, and toxicity in vivo. The validated predictive model is freely available online through the Enalos Cloud Platform ( http://enalos.insilicotox.com/NanoProteinCorona/ ) to be used as part of a regulatory or NP safe-by-design decision support system. This online tool will allow the virtual screening of NPs, based on a list of the significant NP descriptors, identifying those NPs that would warrant further toxicity testing on the basis of predicted NP cellular association.",2018-09-05 +23584831,Towards the collaborative curation of the registry underlying Identifiers.org.,"The MIRIAM Registry (http://www.ebi.ac.uk/miriam/) records information about collections of data in the life sciences, as well as where it can be obtained. This information is used, in combination with the resolving infrastructure of Identifiers.org (http://identifiers.org/), to generate globally unique identifiers, in the form of Uniform Resource Identifier. These identifiers are now widely used to provide perennial cross-references and annotations. The growing demand for these identifiers results in a significant increase in curational efforts to maintain the underlying registry. This requires the design and implementation of an economically viable and sustainable solution able to cope with such expansion. We briefly describe the Registry, the current curation duties entailed, and our plans to extend and distribute this workload through collaborative and community efforts.",2013-04-12 +26912952,NABIC Microarray: an integrated database of high throughput data for gene expression profiles.,"

Unlabelled

The National Agricultural Biotechnology Information Center (NABIC) in Korea constructed a web-based database to provide information about gene expression profiles identified in the microorganism, plants, and animals. The deposited archive of NABIC microarray database consists of metadata spreadsheet, matrix spreadsheet, and raw data files. It provides three major functions such as microarray search, viewer and download option of raw data. An information table of five fields (i.e., ownership, basic, series, samples, and protocols) shows the specific description of data for selected DNA microarray.

Availability

The database is available online for free at http://nabic.rda.go.kr/DNAchip.",2015-11-30 +21933848,The curation paradigm and application tool used for manual curation of the scientific literature at the Comparative Toxicogenomics Database.,"The Comparative Toxicogenomics Database (CTD) is a public resource that promotes understanding about the effects of environmental chemicals on human health. CTD biocurators read the scientific literature and convert free-text information into a structured format using official nomenclature, integrating third party controlled vocabularies for chemicals, genes, diseases and organisms, and a novel controlled vocabulary for molecular interactions. Manual curation produces a robust, richly annotated dataset of highly accurate and detailed information. Currently, CTD describes over 349,000 molecular interactions between 6800 chemicals, 20,900 genes (for 330 organisms) and 4300 diseases that have been manually curated from over 25,400 peer-reviewed articles. This manually curated data are further integrated with other third party data (e.g. Gene Ontology, KEGG and Reactome annotations) to generate a wealth of toxicogenomic relationships. Here, we describe our approach to manual curation that uses a powerful and efficient paradigm involving mnemonic codes. This strategy allows biocurators to quickly capture detailed information from articles by generating simple statements using codes to represent the relationships between data types. The paradigm is versatile, expandable, and able to accommodate new data challenges that arise. We have incorporated this strategy into a web-based curation tool to further increase efficiency and productivity, implement quality control in real-time and accommodate biocurators working remotely. Database URL: http://ctd.mdibl.org.",2011-09-20 +28913866,Conversion of array-based single nucleotide polymorphic markers for use in targeted genotyping by sequencing in hexaploid wheat (Triticum aestivum).,"Wheat breeders and academics alike use single nucleotide polymorphisms (SNPs) as molecular markers to characterize regions of interest within the hexaploid wheat genome. A number of SNP-based genotyping platforms are available, and their utility depends upon factors such as the available technologies, number of data points required, budgets and the technical expertise required. Unfortunately, markers can rarely be exchanged between existing and newly developed platforms, meaning that previously generated data cannot be compared, or combined, with more recently generated data sets. We predict that genotyping by sequencing will become the predominant genotyping technology within the next 5-10 years. With this in mind, to ensure that data generated from current genotyping platforms continues to be of use, we have designed and utilized SNP-based capture probes from several thousand existing and publicly available probes from Axiom® and KASP™ genotyping platforms. We have validated our capture probes in a targeted genotyping by sequencing protocol using 31 previously genotyped UK elite hexaploid wheat accessions. Data comparisons between targeted genotyping by sequencing, Axiom® array genotyping and KASP™ genotyping assays, identified a set of 3256 probes which reliably bring together targeted genotyping by sequencing data with the previously available marker data set. As such, these probes are likely to be of considerable value to the wheat community. The probe details, full probe sequences and a custom built analysis pipeline may be freely downloaded from the CerealsDB website (http://www.cerealsdb.uk.net/cerealgenomics/CerealsDB/sequence_capture.php).",2017-10-23 +26586805,COLOMBOS v3.0: leveraging gene expression compendia for cross-species analyses.,"COLOMBOS is a database that integrates publicly available transcriptomics data for several prokaryotic model organisms. Compared to the previous version it has more than doubled in size, both in terms of species and data available. The manually curated condition annotation has been overhauled as well, giving more complete information about samples' experimental conditions and their differences. Functionality-wise cross-species analyses now enable users to analyse expression data for all species simultaneously, and identify candidate genes with evolutionary conserved expression behaviour. All the expression-based query tools have undergone a substantial improvement, overcoming the limit of enforced co-expression data retrieval and instead enabling the return of more complex patterns of expression behaviour. COLOMBOS is freely available through a web application at http://colombos.net/. The complete database is also accessible via REST API or downloadable as tab-delimited text files.",2015-11-19 +29605850,The Pancancer DNA Methylation Trackhub: A Window to The Cancer Genome Atlas Epigenomics Data.,"The Cancer Genome Atlas (TCGA) epigenome data includes the DNA methylation status of tumor and normal tissues of large cohorts for dozens of cancer types. Due to the moderately large data sizes, retrieving and analyzing them requires basic programming skills. Simple data browsing (e.g., candidate gene search) is hampered by the scarcity of easy-to-use data browsers addressed to the broad community of biomedical researchers. We propose a new visualization method depicting the overall DNA methylation status at each TCGA cohort while emphasizing its heterogeneity, thus facilitating the evaluation of the cohort variability and the normal versus tumor differences. Implemented as a trackhub integrated to the University of California Santa Cruz (UCSC) genome browser, it can be easily added to any genome-wide annotation layer.To exemplify the trackhub usage we evaluate local DNA methylation boundaries, the aberrant DNA methylation of a CpG island located at the estrogen receptor 1 (ESR1) in breast and colon cancer, and the hypermethylation of the Homeobox HOXA gene cluster and the EN1 gene in multiple cancer types. The DNA methylation pancancer trackhub is freely available at http://maplab.cat/tcga_450k_trackhub .",2018-01-01 +31506309,Genetic Similarity of Gonococcal Homologs to Meningococcal Outer Membrane Proteins of Serogroup B Vaccine. ,"The human pathogens Neisseria gonorrhoeae and Neisseria meningitidis share high genome identity. Retrospective analysis of surveillance data from New Zealand indicates the potential cross-protective effect of outer membrane vesicle (OMV) meningococcal serogroup B vaccine (MeNZB) against N. gonorrhoeae A licensed OMV-based MenB vaccine, MenB-4C, consists of a recombinant FHbp, NhbA, NadA, and the MeNZB OMV. Previous work has identified several abundantly expressed outer membrane proteins (OMPs) as major components of the MenB-4C OMV with high sequence similarity between N. gonorrhoeae and N. meningitidis, suggesting a mechanism for cross-protection. To build off these findings, we performed comparative genomic analysis on 970 recent N. gonorrhoeae isolates collected through a U.S surveillance system against N. meningitidis serogroup B (NmB) reference sequences. We identified 1,525 proteins that were common to both Neisseria species, of which 57 proteins were predicted to be OMPs using in silico methods. Among the MenB-4C antigens, NhbA showed moderate sequence identity (73%) to the respective gonococcal homolog, was highly conserved within N. gonorrhoeae, and was predicted to be surface expressed. In contrast, the gonococcal FHbp was predicted not to be surface expressed, while NadA was absent in all N. gonorrhoeae isolates. Our work confirmed recent observations (E. A. Semchenko, A. Tan, R. Borrow, and K. L. Seib, Clin Infect Dis, 2018, https://doi.org/10.1093/cid/ciy1061) and describes homologous OMPs from a large panel of epidemiologically relevant N. gonorrhoeae strains in the United States against NmB reference strains. Based on our results, we report a set of OMPs that may contribute to the previously observed cross-protection and provide potential antigen targets to guide the next steps in gonorrhea vaccine development.IMPORTANCE Gonorrhea, a sexually transmitted disease, causes substantial global morbidity and economic burden. New prevention and control measures for this disease are urgently needed, as strains resistant to almost all classes of antibiotics available for treatment have emerged. Previous reports demonstrate that cross-protection from gonococcal infections may be conferred by meningococcal serogroup B (MenB) outer membrane vesicle (OMV)-based vaccines. Among 1,525 common proteins shared across the genomes of both N. gonorrhoeae and N. meningitidis, 57 proteins were predicted to be surface expressed (outer membrane proteins [OMPs]) and thus preferred targets for vaccine development. The majority of these OMPs showed high sequence identity between the 2 bacterial species. Our results provide valuable insight into the meningococcal antigens present in the current OMV-containing MenB-4C vaccine that may contribute to cross-protection against gonorrhea and may inform next steps in gonorrhea vaccine development.",2019-09-10 +28221024,iMet: A Network-Based Computational Tool To Assist in the Annotation of Metabolites from Tandem Mass Spectra.,"Structural annotation of metabolites relies mainly on tandem mass spectrometry (MS/MS) analysis. However, approximately 90% of the known metabolites reported in metabolomic databases do not have annotated spectral data from standards. This situation has fostered the development of computational tools that predict fragmentation patterns in silico and compare these to experimental MS/MS spectra. However, because such methods require the molecular structure of the detected compound to be available for the algorithm, the identification of novel metabolites in organisms relevant for biotechnological and medical applications remains a challenge. Here, we present iMet, a computational tool that facilitates structural annotation of metabolites not described in databases. iMet uses MS/MS spectra and the exact mass of an unknown metabolite to identify metabolites in a reference database that are structurally similar to the unknown metabolite. The algorithm also suggests the chemical transformation that converts the known metabolites into the unknown one. As a proxy for the structural annotation of novel metabolites, we tested 148 metabolites following a leave-one-out cross-validation procedure or by using MS/MS spectra experimentally obtained in our laboratory. We show that for 89% of the 148 metabolites at least one of the top four matches identified by iMet enables the proper annotation of the unknown metabolites. To further validate iMet, we tested 31 metabolites proposed in the 2012-16 CASMI challenges. iMet is freely available at http://imet.seeslab.net .",2017-03-03 +29905875,MISTIC2: comprehensive server to study coevolution in protein families.,"Correlated mutations between residue pairs in evolutionarily related proteins arise from constraints needed to maintain a functional and stable protein. Identifying these inter-related positions narrows down the search for structurally or functionally important sites. MISTIC is a server designed to assist users to calculate covariation in protein families and provide them with an interactive tool to visualize the results. Here, we present MISTIC2, an update to the previous server, that allows to calculate four covariation methods (MIp, mfDCA, plmDCA and gaussianDCA). The results visualization framework has been reworked for improved performance, compatibility and user experience. It includes a circos representation of the information contained in the alignment, an interactive covariation network, a 3D structure viewer and a sequence logo. Others components provide additional information such as residue annotations, a roc curve for assessing contact prediction, data tables and different ways of filtering the data and exporting figures. Comparison of different methods is easily done and scores combination is also possible. A newly implemented web service allows users to access MISTIC2 programmatically using an API to calculate covariation and retrieve results. MISTIC2 is available at: https://mistic2.leloir.org.ar.",2018-07-01 +29432522,WDL-RF: predicting bioactivities of ligand molecules acting with G protein-coupled receptors by combining weighted deep learning and random forest.,"Motivation:Precise assessment of ligand bioactivities (including IC50, EC50, Ki, Kd, etc.) is essential for virtual screening and lead compound identification. However, not all ligands have experimentally determined activities. In particular, many G protein-coupled receptors (GPCRs), which are the largest integral membrane protein family and represent targets of nearly 40% drugs on the market, lack published experimental data about ligand interactions. Computational methods with the ability to accurately predict the bioactivity of ligands can help efficiently address this problem. Results:We proposed a new method, WDL-RF, using weighted deep learning and random forest, to model the bioactivity of GPCR-associated ligand molecules. The pipeline of our algorithm consists of two consecutive stages: (i) molecular fingerprint generation through a new weighted deep learning method, and (ii) bioactivity calculations with a random forest model; where one uniqueness of the approach is that the model allows end-to-end learning of prediction pipelines with input ligands being of arbitrary size. The method was tested on a set of twenty-six non-redundant GPCRs that have a high number of active ligands, each with 200-4000 ligand associations. The results from our benchmark show that WDL-RF can generate bioactivity predictions with an average root-mean square error 1.33 and correlation coefficient (r2) 0.80 compared to the experimental measurements, which are significantly more accurate than the control predictors with different molecular fingerprints and descriptors. In particular, data-driven molecular fingerprint features, as extracted from the weighted deep learning models, can help solve deficiencies stemming from the use of traditional hand-crafted features and significantly increase the efficiency of short molecular fingerprints in virtual screening. Availability and implementation:The WDL-RF web server, as well as source codes and datasets of WDL-RF, is freely available at https://zhanglab.ccmb.med.umich.edu/WDL-RF/ for academic purposes. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +27534855,Distribution of PCR ribotypes among recent Clostridium difficile isolates collected in two districts of Hungary using capillary gel electrophoresis and review of changes in the circulating ribotypes over time.,"Following the first description of a Clostridium difficile case caused by ribotype 027 in Hungary in 2007, the rapid spread of C. difficile infection in different hospitals within the country was observed. The aim of this pilot study was to investigate the distribution of different PCR ribotypes among inpatient and outpatient isolates obtained in two geographically different parts of Hungary. One hundred and ninety-two toxigenic C. difficile isolates collected between 1 October and 1 December 2014 were PCR ribotyped using capillary gel electrophoresis and the database of WEBRIBO (http://webribo.ages.at), which allows the automatic analysis and comparison of capillary-sequencer-based PCR ribotyping data. Altogether, 31 different known ribotypes were found, and 16 isolates showed a novel banding pattern, not included in the current library. Besides the dominance of 027 (33.3 %) among all isolates, there were differences in its presence among isolates obtained from the two regions (45.8 % in the central region and 20.8 % in the south-east region, respectively), whereas the second most prevalent ribotype 036 (19.8 %) was more frequently found among isolates obtained in the south-east region compared with the central region of Hungary (29.1 versus 10.4 %). Similar differences in the spread of different ribotypes, in particular 027, which were found during earlier studies in Hungary may be due to the existing order for admissions of patients to hospitals. We also summarized the changing pattern of PCR ribotypes of Hungarian C. difficile isolates over time, based on earlier published data.",2016-08-17 +24279809,The Rice Oligonucleotide Array Database: an atlas of rice gene expression.,"

Background

Microarray technologies facilitate high-throughput gene expression analysis. However, the diversity of platforms for rice gene expression analysis hinders efficient analysis. Tools to broadly integrate microarray data from different platforms are needed.

Results

In this study, we developed the Rice Oligonucleotide Array Database (ROAD, http://www.ricearray.org) to explore gene expression across 1,867 publicly available rice microarray hybridizations. The ROAD's user-friendly web interface and variety of visualization tools facilitate the extraction of gene expression profiles using gene and microarray element identifications. The ROAD supports meta-analysis of genes expressed in different tissues and at developmental stages. Co-expression analysis tool provides information on co-regulation between genes under general, abiotic and biotic stress conditions. Additionally, functional analysis tools, such as Gene Ontology and KEGG (Kyoto Encyclopedia of Genes and Genomes) Orthology, are embedded in the ROAD. These tools facilitate the identification of meaningful biological patterns in a list of query genes.

Conclusions

The Rice Oligonucleotide Array Database provides comprehensive gene expression profiles for all rice genes, and will be a useful resource for researchers of rice and other grass species.",2012-07-19 +29420675,GCPred: a web tool for guanylyl cyclase functional centre prediction from amino acid sequence.,"Summary:GCPred is a webserver for the prediction of guanylyl cyclase (GC) functional centres from amino acid sequence. GCs are enzymes that generate the signalling molecule cyclic guanosine 3', 5'-monophosphate from guanosine-5'-triphosphate. A novel class of GC centres (GCCs) has been identified in complex plant proteins. Using currently available experimental data, GCPred is created to automate and facilitate the identification of similar GCCs. The server features GCC values that consider in its calculation, the physicochemical properties of amino acids constituting the GCC and the conserved amino acids within the centre. From user input amino acid sequence, the server returns a table of GCC values and graphs depicting deviations from mean values. The utility of this server is demonstrated using plant proteins and the human interleukin-1 receptor-associated kinase family of proteins as example. Availability and implementation:The GCPred server is available at http://gcpred.com. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +30378733,How can we make better graphs? An initiative to increase the graphical expertise and productivity of quantitative scientists.,"Graphics are at the core of exploring and understanding data, communicating results and conclusions, and supporting decision-making. Increasing our graphical expertise can significantly strengthen our impact as professional statisticians and quantitative scientists. In this article, we present a concerted effort to improve the way we create graphics at Novartis. We provide our vision and guiding principles, before describing seven work packages in more detail. The actions, principles, and experiences laid out in this paper are applicable generally, also beyond drug development, which is our field of work. The purpose of this article is to share our experiences and help foster the use of good graphs in pharmaceutical statistics and beyond. A Graphics Principles ""Cheat Sheet"" is available online at https://graphicsprinciples.github.io/.",2018-10-31 +29512488,S2P: A software tool to quickly carry out reproducible biomedical research projects involving 2D-gel and MALDI-TOF MS protein data.,"

Background and objective

2D-gel electrophoresis is widely used in combination with MALDI-TOF mass spectrometry in order to analyze the proteome of biological samples. For instance, it can be used to discover proteins that are differentially expressed between two groups (e.g. two disease conditions, case vs. control, etc.) thus obtaining a set of potential biomarkers. This procedure requires a great deal of data processing in order to prepare data for analysis or to merge and integrate data from different sources. This kind of work is usually done manually (e.g. copying and pasting data into spreadsheet files), which is highly time consuming and distracts the researcher from other important, core tasks. Moreover, engaging in a repetitive process in a non-automated, handling-based manner is prone to error, thus threatening reliability and reproducibility. The objective of this paper is to present S2P, an open source software to overcome these drawbacks.

Methods

S2P is implemented in Java on top of the AIBench framework, and relies on well-established open source libraries to accomplish different tasks.

Results

S2P is an AIBench based desktop multiplatform application, specifically aimed to process 2D-gel and MALDI-mass spectrometry protein identification-based data in a computer-aided, reproducible manner. Different case studies are presented in order to show the usefulness of S2P.

Conclusions

S2P is open source and free to all users at http://www.sing-group.org/s2p. Through its user-friendly GUI interface, S2P dramatically reduces the time that researchers need to invest in order to prepare data for analysis.",2017-12-02 +29983488,"LSAT: Liliaceae Simple Sequences Analysis Tool, a web server.","LSAT is a web-based microsatellite SSR marker designer tool specific for the Liliaceae family. It is developed using HTML, CSS, PHP, Perl and Java scripts. It works without extra add-ons on standard browsers. LSAT provides SSR primer designing service using the web interface. It helps in SSR mining and primer design. LSAT is user friendly with customizable search parameters producing visual output having download options. The current version of LSAT is backed by two data sets, namely, lily EST (Expressed Sequence Tag) from NCBI and lily nr (non redundant) with 4,099 and 216,768 unigenes, respectively. LSAT will be updated regularly upon availability of additional data (either EST and/or transcriptome) on Liliaceae.

Availability

LSAT is available for free at http://210.110.86.160/Lsat/Lsat.html.",2018-04-30 +30329008,Quality assessment for the putative intrinsic disorder in proteins.,"

Motivation

While putative intrinsic disorder is widely used, none of the predictors provides quality assessment (QA) scores. QA scores estimate the likelihood that predictions are correct at a residue level and have been applied in other bioinformatics areas. We recently reported that QA scores derived from putative disorder propensities perform relatively poorly for native disordered residues. Here we design and validate a general approach to construct QA predictors for disorder predictions.

Results

The QUARTER (QUality Assessment for pRotein inTrinsic disordEr pRedictions) toolbox of methods accommodates a diverse set of ten disorder predictors. It builds upon several innovative design elements including use and scaling of selected physicochemical properties of the input sequence, post-processing of disorder propensity scores, and a feature selection that optimizes the predictive models to a specific disorder predictor. We empirically establish that each one of these elements contributes to the overall predictive performance of our tool and that QUARTER's outputs significantly outperform QA scores derived from the outputs generated the disorder predictors. The best performing QA scores for a single disorder predictor identify 13% of residues that are predicted with 98% precision. QA scores computed by combining results of the ten disorder predictors cover 40% of residues with 95% precision. Case studies are used to show how to interpret the QA scores. QA scores based on the high precision combined predictions are applied to analyze disorder in the human proteome.

Availability and implementation

http://biomine.cs.vcu.edu/servers/QUARTER/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +27405533,Knodle: A Support Vector Machines-Based Automatic Perception of Organic Molecules from 3D Coordinates.,"Here we address the problem of the assignment of atom types and bond orders in low molecular weight compounds. For this purpose, we have developed a prediction model based on nonlinear Support Vector Machines (SVM), implemented in a KNOwledge-Driven Ligand Extractor called Knodle, a software library for the recognition of atomic types, hybridization states, and bond orders in the structures of small molecules. We trained the model using an excessive amount of structural data collected from the PDBbindCN database. Accuracy of the results and the running time of our method is comparable with other popular methods, such as NAOMI, fconv, and I-interpret. On the popular Labute's benchmark set consisting of 179 protein-ligand complexes, Knodle makes five to six perception errors, NAOMI makes seven errors, I-interpret makes nine errors, and fconv makes 13 errors. On a larger set of 3,000 protein-ligand structures collected from the PDBBindCN general data set (v2014), Knodle and NAOMI have a comparable accuracy of approximately 3.9% and 4.7% of errors, I-interpret made 6.0% of errors, while fconv produced approximately 12.8% of errors. On a more general set of 332,974 entries collected from the Ligand Expo database, Knodle made 4.5% of errors. Overall, our study demonstrates the efficiency and robustness of nonlinear SVM in structure perception tasks. Knodle is available at https://team.inria.fr/nano-d/software/Knodle .",2016-07-21 +30024383,Evaluation of a Physiologically Based Pharmacokinetic (PBPK) Model for Inorganic Arsenic Exposure Using Data from Two Diverse Human Populations.,"

Background

Multiple epidemiological studies exist for some of the well-studied health endpoints associated with inorganic arsenic (iAs) exposure; however, results are usually expressed in terms of different exposure/dose metrics. Physiologically based pharmacokinetic (PBPK) models may be used to obtain a common exposure metric for application in dose-response meta-analysis.

Objective

A previously published PBPK model for inorganic arsenic (iAs) was evaluated using data sets for arsenic-exposed populations from Bangladesh and the United States.

Methods

The first data set was provided by the Health Effects of Arsenic Longitudinal Study cohort in Bangladesh. The second data set was provided by a study conducted in Churchill County, Nevada, USA. The PBPK model consisted of submodels describing the absorption, distribution, metabolism and excretion (ADME) of iAs and its metabolites monomethylarsenic (MMA) and dimethylarsenic (DMA) acids. The model was used to estimate total arsenic levels in urine in response to oral ingestion of iAs. To compare predictions of the PBPK model against observations, urinary arsenic concentration and creatinine-adjusted urinary arsenic concentration were simulated. As part of the evaluation, both water and dietary intakes of arsenic were estimated and used to generate the associated urine concentrations of the chemical in exposed populations.

Results

When arsenic intake from water alone was considered, the results of the PBPK model underpredicted urinary arsenic concentrations for individuals with low levels of arsenic in drinking water and slightly overpredicted urinary arsenic concentrations in individuals with higher levels of arsenic in drinking water. When population-specific estimates of dietary intakes of iAs were included in exposures, the predictive value of the PBPK model was markedly improved, particularly at lower levels of arsenic intake.

Conclusions

Evaluations of this PBPK model illustrate its adequacy and usefulness for oral exposure reconstructions in human health risk assessment, particularly in individuals who are exposed to relatively low levels of arsenic in water or food. https://doi.org/10.1289/EHP3096.",2018-07-16 +26205660,The gastrin and cholecystokinin receptors mediated signaling network: a scaffold for data analysis and new hypotheses on regulatory mechanisms.,"

Background

The gastrointestinal peptide hormones cholecystokinin and gastrin exert their biological functions via cholecystokinin receptors CCK1R and CCK2R respectively. Gastrin, a central regulator of gastric acid secretion, is involved in growth and differentiation of gastric and colonic mucosa, and there is evidence that it is pro-carcinogenic. Cholecystokinin is implicated in digestion, appetite control and body weight regulation, and may play a role in several digestive disorders.

Results

We performed a detailed analysis of the literature reporting experimental evidence on signaling pathways triggered by CCK1R and CCK2R, in order to create a comprehensive map of gastrin and cholecystokinin-mediated intracellular signaling cascades. The resulting signaling map captures 413 reactions involving 530 molecular species, and incorporates the currently available knowledge into one integrated signaling network. The decomposition of the signaling map into sub-networks revealed 18 modules that represent higher-level structures of the signaling map. These modules allow a more compact mapping of intracellular signaling reactions to known cell behavioral outcomes such as proliferation, migration and apoptosis. The integration of large-scale protein-protein interaction data to this literature-based signaling map in combination with topological analyses allowed us to identify 70 proteins able to increase the compactness of the map. These proteins represent experimentally testable hypotheses for gaining new knowledge on gastrin- and cholecystokinin receptor signaling. The CCKR map is freely available both in a downloadable, machine-readable SBML-compatible format and as a web resource through PAYAO ( http://sblab.celldesigner.org:18080/Payao11/bin/).

Conclusion

We have demonstrated how a literature-based CCKR signaling map together with its protein interaction extensions can be analyzed to generate new hypotheses on molecular mechanisms involved in gastrin- and cholecystokinin-mediated regulation of cellular processes.",2015-07-24 +29569225,A statistical model for helices with applications.,"Motivated by a cutting edge problem related to the shape of α -helices in proteins, we formulate a parametric statistical model, which incorporates the cylindrical nature of the helix. Our focus is to detect a ""kink,"" which is a drastic change in the axial direction of the helix. We propose a statistical model for the straight α -helix and derive the maximum likelihood estimation procedure. The cylinder is an accepted geometric model for α -helices, but our statistical formulation, for the first time, quantifies the uncertainty in atom positions around the cylinder. We propose a change point technique ""Kink-Detector"" to detect a kink location along the helix. Unlike classical change point problems, the change in direction of a helix depends on a simultaneous shift of multiple data points rather than a single data point, and is less straightforward. Our biological building block is crowdsourced data on straight and kinked helices; which has set a gold standard. We use this data to identify salient features to construct Kink-detector, test its performance and gain some insights. We find the performance of Kink-detector comparable to its computational competitor called ""Kink-Finder."" We highlight that identification of kinks by visual assessment can have limitations and Kink-detector may help in such cases. Further, an analysis of crowdsourced curved α -helices finds that Kink-detector is also effective in detecting moderate changes in axial directions.",2018-03-22 +31409597,The Acute Dialysis Orders Objective Structured Clinical Examination (OSCE): Fellow Performance on a Formative Assessment of Acute Kidney Replacement Therapy Competence.,"

Background and objectives

Acute kidney replacement therapy (KRT) prescription is a critical nephrology skill. We administered a formative objective structured clinical examination (OSCE) to nephrology fellows to assess acute KRT medical knowledge, patient care, and systems-based practice competencies.

Design, setting, participants, & measurements

Prospective cohort study of an educational test using the unified model of construct validity. We tested 117 fellows: 25 (four programs) in 2016 and 92 (15 programs) in 2017; 51 first-year and 66 second-year fellows. Using institutional protocols and order sets, fellows wrote orders and answered open-ended questions on a three-scenario OSCE, previously validated by board-certified, practicing clinical nephrologists. Outcomes were overall and scenario pass percentage and score; percent correctly answering predetermined, evidence-based questions; second-year score correlation with in-training examination score; and satisfaction survey.

Results

A total of 76% passed scenario 1 (acute continuous RRT): 92% prescribed a ≥20 ml/kg per hour effluent dose; 63% estimated clearance as effluent volume. Forty-two percent passed scenario 2 (maintenance dialysis initiation); 75% correctly prescribed 3-4 mEq/L K+ dialysate and 12% identified the two absolute, urgent indications for maintenance dialysis initiation (uremic encephalopathy and pericarditis). Six percent passed scenario 3 (acute life-threatening hyperkalemia); 20% checked for rebound hyperkalemia with two separate blood draws. Eighty-three percent correctly withheld intravenous sodium bicarbonate for acute hyperkalemia in a nonacidotic, volume-overloaded patient on maintenance dialysis, and 32% passed overall. Second-year versus first-year fellow overall score was 44.4±4 versus 42.7±5 (one-tailed P=0.02), with 39% versus 24% passing (P=0.08). Second-year in-training examination and OSCE scores were not significantly correlated (r=0.15; P=0.26). Seventy-seven percent of fellows agreed the OSCE was useful in assessing ""proficiency in ordering"" acute KRT. Limitations include lack of a validated criterion test, and unfamiliarity with open-ended question format.

Conclusions

The OSCE can provide quantitative data for formative Accreditation Council for Graduate Medical Education competency assessments and identify opportunities for dialysis curriculum development.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2019_08_08_CJASNPodcast_19_09_.mp3.",2019-08-13 +26861659,ScaleNet: a literature-based model of scale insect biology and systematics. ,"Scale insects (Hemiptera: Coccoidea) are small herbivorous insects found on all continents except Antarctica. They are extremely invasive, and many species are serious agricultural pests. They are also emerging models for studies of the evolution of genetic systems, endosymbiosis and plant-insect interactions. ScaleNet was launched in 1995 to provide insect identifiers, pest managers, insect systematists, evolutionary biologists and ecologists efficient access to information about scale insect biological diversity. It provides comprehensive information on scale insects taken directly from the primary literature. Currently, it draws from 23,477 articles and describes the systematics and biology of 8194 valid species. For 20 years, ScaleNet ran on the same software platform. That platform is no longer viable. Here, we present a new, open-source implementation of ScaleNet. We have normalized the data model, begun the process of correcting invalid data, upgraded the user interface, and added online administrative tools. These improvements make ScaleNet easier to use and maintain and make the ScaleNet data more accurate and extendable. Database URL: http://scalenet.info.",2016-02-09 +30415600,Motif discovery in biological network using expansion tree.,"Networks are powerful representation of topological features in biological systems like protein interaction and gene regulation. In order to understand the design principles of such complex networks, the concept of network motifs emerged. Network motifs are recurrent patterns with statistical significance that can be seen as basic building blocks of complex networks. Identification of network motifs leads to many important applications, such as understanding the modularity and the large-scale structure of biological networks, classification of networks into super-families, protein function annotation, etc. However, identification of network motifs is challenging as it involves graph isomorphism which is computationally hard. Though this problem has been studied extensively in the literature using different computational approaches, we are far from satisfactory results. Motivated by the challenges involved in this field, an efficient and scalable network Motif Discovery algorithm based on Expansion Tree (MODET) is proposed. Pattern growth approach is used in this proposed motif-centric algorithm. Each node of the expansion tree represents a non-isomorphic pattern. The embeddings corresponding to a child node of the expansion tree are obtained from the embeddings of the parent node through vertex addition and edge addition. Further, the proposed algorithm does not involve any graph isomorphism check and the time complexities of these processes are O(n) and O(1) , respectively. The proposed algorithm has been tested on Protein-Protein Interaction (PPI) network obtained from the MINT database. The computational efficiency of the proposed algorithm outperforms most of the existing network motif discovery algorithms.",2018-09-19 +30239015,sCARy! Risk Perceptions in Autonomous Driving: The Influence of Experience on Perceived Benefits and Barriers.,"The increasing development of autonomous vehicles (AVs) influences the future of transportation. Beyond the potential benefits in terms of safety, efficiency, and comfort, also potential risks of novel driving technologies need to be addressed. In this article, we explore risk perceptions toward connected and autonomous driving in comparison to conventional driving. In order to gain a deeper understanding of individual risk perceptions, we adopted a two-step empirical procedure. First, focus groups ( N = 17 ) were carried out to identify relevant risk factors for autonomous and connected driving. Further, a questionnaire was developed, which was answered by 516 German participants. In the questionnaire, three driving technologies (connected, autonomous, conventional) were evaluated via semantic differential (rating scale to identify connotative meaning of technologies). Second, participants rated perceived risk levels (for data, traffic environment, vehicle, and passenger) and perceived benefits and barriers of connected/autonomous driving. Since previous experience with automated functions of driver assistance systems can have an impact on the evaluation, three experience groups have been formed. The effect of experience on benefits and barrier perceptions was also analyzed. Risk perceptions were significantly smaller for conventional driving compared to connected/autonomous driving. With increasing experience, risk perception decreases for novel driving technologies with one exception: the perceived risk in handling data is not influenced by experience. The findings contribute to an understanding of risk perception in autonomous driving, which helps to foster a successful implementation of AVs on the market and to develop public information strategies.",2018-09-21 +25283306,IUTA: a tool for effectively detecting differential isoform usage from RNA-Seq data.,"

Background

Most genes in mammals generate several transcript isoforms that differ in stability and translational efficiency through alternative splicing. Such alternative splicing can be tissue- and developmental stage-specific, and such specificity is sometimes associated with disease. Thus, detecting differential isoform usage for a gene between tissues or cell lines/types (differences in the fraction of total expression of a gene represented by the expression of each of its isoforms) is potentially important for cell and developmental biology.

Results

We present a new method IUTA that is designed to test each gene in the genome for differential isoform usage between two groups of samples. IUTA also estimates isoform usage for each gene in each sample as well as averaged across samples within each group. IUTA is the first method to formulate the testing problem as testing for equal means of two probability distributions under the Aitchison geometry, which is widely recognized as the most appropriate geometry for compositional data (vectors that contain the relative amount of each component comprising the whole). Evaluation using simulated data showed that IUTA was able to provide test results for many more genes than was Cuffdiff2 (version 2.2.0, released in Mar. 2014), and IUTA performed better than Cuffdiff2 for the limited number of genes that Cuffdiff2 did analyze. When applied to actual mouse RNA-Seq datasets from six tissues, IUTA identified 2,073 significant genes with clear patterns of differential isoform usage between a pair of tissues. IUTA is implemented as an R package and is available at http://www.niehs.nih.gov/research/resources/software/biostatistics/iuta/index.cfm.

Conclusions

Both simulation and real-data results suggest that IUTA accurately detects differential isoform usage. We believe that our analysis of RNA-seq data from six mouse tissues represents the first comprehensive characterization of isoform usage in these tissues. IUTA will be a valuable resource for those who study the roles of alternative transcripts in cell development and disease.",2014-10-06 +30070661,Understanding global infrared opacity and hot bands of greenhouse molecules with low vibrational modes from first-principles calculations: the case of CF4.,"Fluorine containing molecules have a particularly long atmospheric lifetime and their very big estimated global warming potentials are expected to rapidly increase in the future. This work is focused on the global theoretical prediction of infrared spectra of the tetrafluoromethane molecule that is considered as a potentially powerful greenhouse gas having the largest estimated lifetime of over 50 000 years in the atmosphere. The presence of relatively low vibrational frequencies makes the Boltzmann population of the excited levels important. Consequently, the ""hot bands"" corresponding to transitions among excited rovibrational states contribute significantly to the CF4 opacity in the infrared even at room temperature conditions but the existing laboratory data analyses are not sufficiently complete. In this work, we construct the first accurate and complete ab initio based line lists for CF4 in the range 0-4000 cm-1, containing rovibrational bands that are the most active in absorption. An efficient basis set compression method was applied to predict more than 700 new bands and subbands via variational nuclear motion calculations. We show that already at room temperature a quasi-continuum of overlapping weak lines appears in the CF4 infrared spectra due to the increasing density of bands and transitions. In order to converge the infrared opacity at room temperature, it was necessary to include a high rotational quantum number up to J = 80 resulting in 2 billion rovibrational transitions. In order to make the cross-section simulation faster, we have partitioned our data into two parts: (a) strong & medium line lists with lower energy levels for calculation of selective absorption features that can be used at various temperatures and (b) compressed ""super-line"" libraries of very weak transitions contributing to the quasi-continuum modelling. Comparisons with raw previously unassigned experimental spectra showed a very good accuracy for integrated absorbance in the entire range of the reported spectra predictions. The data obtained in this work will be made available through the TheoReTS information system (http://theorets.univ-reims.fr, http://theorets.tsu.ru) that contains ab initio born line lists and provides a user-friendly graphical interface for a fast simulation of the CF4 absorption cross-sections and radiance under various temperature conditions from 80 K to 400 K.",2018-08-01 +30693701,Fast and precise image generation of blood vessels embedded in skin.,"A software for fast rendering the visual appearance of a blood vessel located in human skin was developed based on a numerical solution of the radiative transfer equation. The user can specify geometrical properties, such as the depth and the diameter of the vessel, and physiological properties, such as the oxygen saturation of the vessel or the blood concentration in the skin. From these data, the spatially and spectrally resolved reflectance from the skin containing the blood vessel is calculated via Monte Carlo simulations, by which a two-dimensional image is generated. The short calculation time of about a second is achieved by precalculating and storing the spatially resolved reflectance for a variety of combinations of the optical and geometrical properties. This concept gives the user the opportunity to rapidly explore the influence of the physiological and geometrical properties of the investigated blood vessel on its visual appearance. The correctness of the lookup table was validated by comparison with independent Monte Carlo simulations. Rendering examples of different blood vessels in human skins are given. The current version of the software can be downloaded at https://www.ilm-ulm.de/software.",2019-01-01 +27678244,Time-resolved dual RNA-seq reveals extensive rewiring of lung epithelial and pneumococcal transcriptomes during early infection.,"

Background

Streptococcus pneumoniae, the pneumococcus, is the main etiological agent of pneumonia. Pneumococcal infection is initiated by bacterial adherence to lung epithelial cells. The exact transcriptional changes occurring in both host and microbe during infection are unknown. Here, we developed a time-resolved infection model of human lung alveolar epithelial cells by S. pneumoniae and assess the resulting transcriptome changes in both organisms simultaneously by using dual RNA-seq.

Results

Functional analysis of the time-resolved dual RNA-seq data identifies several features of pneumococcal infection. For instance, we show that the glutathione-dependent reactive oxygen detoxification pathway in epithelial cells is activated by reactive oxygen species produced by S. pneumoniae. Addition of the antioxidant resveratrol during infection abates this response. At the same time, pneumococci activate the competence regulon during co-incubation with lung epithelial cells. By comparing transcriptional changes between wild-type encapsulated and mutant unencapsulated pneumococci, we demonstrate that adherent pneumococci, but not free-floating bacteria, repress innate immune responses in epithelial cells including expression of the chemokine IL-8 and the production of antimicrobial peptides. We also show that pneumococci activate several sugar transporters in response to adherence to epithelial cells and demonstrate that this activation depends on host-derived mucins.

Conclusions

We provide a dual-transcriptomics overview of early pneumococcal infection in a time-resolved manner, providing new insights into host-microbe interactions. To allow easy access to the data by the community, a web-based platform was developed ( http://dualrnaseq.molgenrug.nl ). Further database exploration may expand our understanding of epithelial-pneumococcal interaction, leading to novel antimicrobial strategies.",2016-09-27 +30558418,Polypharmacology Browser PPB2: Target Prediction Combining Nearest Neighbors with Machine Learning.,"Here we report PPB2 as a target prediction tool assigning targets to a query molecule based on ChEMBL data. PPB2 computes ligand similarities using molecular fingerprints encoding composition (MQN), molecular shape and pharmacophores (Xfp), and substructures (ECfp4) and features an unprecedented combination of nearest neighbor (NN) searches and Naı̈ve Bayes (NB) machine learning, together with simple NN searches, NB and Deep Neural Network (DNN) machine learning models as further options. Although NN(ECfp4) gives the best results in terms of recall in a 10-fold cross-validation study, combining NN searches with NB machine learning provides superior precision statistics, as well as better results in a case study predicting off-targets of a recently reported TRPV6 calcium channel inhibitor, illustrating the value of this combined approach. PPB2 is available to assess possible off-targets of small molecule drug-like compounds by public access at http://gdb.unibe.ch .",2018-12-31 +30052961,Recommendations from the international evidence-based guideline for the assessment and management of polycystic ovary syndrome.,"

Study question

What is the recommended assessment and management of women with polycystic ovary syndrome (PCOS), based on the best available evidence, clinical expertise and consumer preference?

Summary answer

International evidence-based guidelines, including 166 recommendations and practice points, addressed prioritized questions to promote consistent, evidence-based care and improve the experience and health outcomes of women with PCOS.

What is known already

Previous guidelines either lacked rigorous evidence-based processes, did not engage consumer and international multidisciplinary perspectives, or were outdated. Diagnosis of PCOS remains controversial, and assessment and management are inconsistent. The needs of women with PCOS are not being adequately met and evidence practice gaps persist.

Study design, size, duration

International evidence-based guideline development engaged professional societies and consumer organizations with multidisciplinary experts and women with PCOS directly involved at all stages. Appraisal of Guidelines for Research and Evaluation (AGREE) II-compliant processes were followed, with extensive evidence synthesis. The Grading of Recommendations, Assessment, Development and Evaluation (GRADE) framework was applied across evidence quality, feasibility, acceptability, cost, implementation and ultimately recommendation strength.

Participants/materials, setting, methods

Governance included a six continent international advisory and a project board, five guideline development groups, and consumer and translation committees. Extensive health professional and consumer engagement informed guideline scope and priorities. Engaged international society-nominated panels included pediatrics, endocrinology, gynecology, primary care, reproductive endocrinology, obstetrics, psychiatry, psychology, dietetics, exercise physiology, public health and other experts, alongside consumers, project management, evidence synthesis and translation experts. In total, 37 societies and organizations covering 71 countries engaged in the process. Twenty face-to-face meetings over 15 months addressed 60 prioritized clinical questions involving 40 systematic and 20 narrative reviews. Evidence-based recommendations were developed and approved via consensus voting within the five guideline panels, modified based on international feedback and peer review, with final recommendations approved across all panels.

Main results and the role of chance

The evidence in the assessment and management of PCOS is generally of low to moderate quality. The guideline provides 31 evidence based recommendations, 59 clinical consensus recommendations and 76 clinical practice points all related to assessment and management of PCOS. Key changes in this guideline include: (i) considerable refinement of individual diagnostic criteria with a focus on improving accuracy of diagnosis; (ii) reducing unnecessary testing; (iii) increasing focus on education, lifestyle modification, emotional wellbeing and quality of life; and (iv) emphasizing evidence based medical therapy and cheaper and safer fertility management.

Limitations, reasons for caution

Overall evidence is generally low to moderate quality, requiring significantly greater research in this neglected, yet common condition, especially around refining specific diagnostic features in PCOS. Regional health system variation is acknowledged and a process for guideline and translation resource adaptation is provided.

Wider implications of the findings

The international guideline for the assessment and management of PCOS provides clinicians with clear advice on best practice based on the best available evidence, expert multidisciplinary input and consumer preferences. Research recommendations have been generated and a comprehensive multifaceted dissemination and translation program supports the guideline with an integrated evaluation program.

Study funding/competing interest(s)

The guideline was primarily funded by the Australian National Health and Medical Research Council of Australia (NHMRC) supported by a partnership with ESHRE and the American Society for Reproductive Medicine. Guideline development group members did not receive payment. Travel expenses were covered by the sponsoring organizations. Disclosures of conflicts of interest were declared at the outset and updated throughout the guideline process, aligned with NHMRC guideline processes. Full details of conflicts declared across the guideline development groups are available at https://www.monash.edu/medicine/sphpm/mchri/pcos/guideline in the Register of disclosures of interest. Of named authors, Dr Costello has declared shares in Virtus Health and past sponsorship from Merck Serono for conference presentations. Prof. Laven declared grants from Ferring, Euroscreen and personal fees from Ferring, Euroscreen, Danone and Titus Healthcare. Prof. Norman has declared a minor shareholder interest in an IVF unit. The remaining authors have no conflicts of interest to declare. The guideline was peer reviewed by special interest groups across our partner and collaborating societies and consumer organizations, was independently assessed against AGREE-II criteria, and underwent methodological review. This guideline was approved by all members of the guideline development groups and was submitted for final approval by the NHMRC.",2018-09-01 +31499131,Development of a nomogram to predict outcome after liver resection for hepatocellular carcinoma in Child-Pugh B cirrhosis.,"

Background & aims

Treatment allocation in patients with hepatocellular carcinoma (HCC) on a background of Child-Pugh B (CP-B) cirrhosis is controversial. Liver resection has been proposed in small series with acceptable outcomes, but data are limited. The aim of this study was to evaluate the outcomes of patients undergoing liver resection for HCC in CP-B cirrhosis, focusing on the surgical risks and survival.

Methods

Patients were retrospectively pooled from 14 international referral centers from 2002 to 2017. Postoperative and oncological outcomes were investigated. Prediction models for surgical risks, disease-free survival and overall survival were constructed.

Results

A total of 253 patients were included, of whom 57.3% of patients had a preoperative platelet count <100,000/mm3, 43.5% had preoperative ascites, and 56.9% had portal hypertension. A minor hepatectomy was most commonly performed (84.6%) and 122 (48.2%) were operated on by minimally invasive surgery (MIS). Ninety-day mortality was 4.3% with 6 patients (2.3%) dying from liver failure. One hundred and eight patients (42.7%) experienced complications, of which the most common was ascites (37.5%). Patients undergoing major hepatectomies had higher 90-day mortality (10.3% vs. 3.3%; p = 0.04) and morbidity rates (69.2% vs. 37.9%; p <0.001). Patients undergoing an open hepatectomy had higher morbidity (52.7% vs. 31.9%; p = 0.001) than those undergoing MIS. A prediction model for surgical risk was constructed (https://childb.shinyapps.io/morbidity/). The 5-year overall survival rate was 47%, and 56.9% of patients experienced recurrence. Prediction models for overall survival (https://childb.shinyapps.io/survival/) and disease-free survival (https://childb.shinyapps.io/DFsurvival/) were constructed.

Conclusions

Liver resection should be considered for patients with HCC and CP-B cirrhosis after careful selection according to patient characteristics, tumor pattern and liver function, while aiming to minimize surgical stress. An estimation of the surgical risk and survival advantage may be helpful in treatment allocation, eventually improving postoperative morbidity and achieving safe oncological outcomes.

Lay summary

Liver resection for hepatocellular carcinoma in advanced cirrhosis (Child-Pugh B score) is associated with a high rate of postoperative complications. However, due to the limited therapeutic alternatives in this setting, recent studies have shown promising results after accurate patient selection. In our international multicenter study, we provide 3 clinical models to predict postoperative surgical risks and long-term survival following liver resection, with the aim of improving treatment allocation and eventually clinical outcomes.",2019-09-06 +22102589,UniPathway: a resource for the exploration and annotation of metabolic pathways.,"UniPathway (http://www.unipathway.org) is a fully manually curated resource for the representation and annotation of metabolic pathways. UniPathway provides explicit representations of enzyme-catalyzed and spontaneous chemical reactions, as well as a hierarchical representation of metabolic pathways. This hierarchy uses linear subpathways as the basic building block for the assembly of larger and more complex pathways, including species-specific pathway variants. All of the pathway data in UniPathway has been extensively cross-linked to existing pathway resources such as KEGG and MetaCyc, as well as sequence resources such as the UniProt KnowledgeBase (UniProtKB), for which UniPathway provides a controlled vocabulary for pathway annotation. We introduce here the basic concepts underlying the UniPathway resource, with the aim of allowing users to fully exploit the information provided by UniPathway.",2011-11-18 +27899668,CyanoBase: a large-scale update on its 20th anniversary.,"The first ever cyanobacterial genome sequence was determined two decades ago and CyanoBase (http://genome.microbedb.jp/cyanobase), the first database for cyanobacteria was simultaneously developed to allow this genomic information to be used more efficiently. Since then, CyanoBase has constantly been extended and has received several updates. Here, we describe a new large-scale update of the database, which coincides with its 20th anniversary. We have expanded the number of cyanobacterial genomic sequences from 39 to 376 species, which consists of 86 complete and 290 draft genomes. We have also optimized the user interface for large genomic data to include the use of semantic web technologies and JBrowse and have extended community-based reannotation resources through the re-annotation of Synechocystis sp. PCC 6803 by the cyanobacterial research community. These updates have markedly improved CyanoBase, providing cyanobacterial genome annotations as references for cyanobacterial research.",2016-11-29 +21896882,Functional network construction in Arabidopsis using rule-based machine learning on large-scale data sets.,"The meta-analysis of large-scale postgenomics data sets within public databases promises to provide important novel biological knowledge. Statistical approaches including correlation analyses in coexpression studies of gene expression have emerged as tools to elucidate gene function using these data sets. Here, we present a powerful and novel alternative methodology to computationally identify functional relationships between genes from microarray data sets using rule-based machine learning. This approach, termed ""coprediction,"" is based on the collective ability of groups of genes co-occurring within rules to accurately predict the developmental outcome of a biological system. We demonstrate the utility of coprediction as a powerful analytical tool using publicly available microarray data generated exclusively from Arabidopsis thaliana seeds to compute a functional gene interaction network, termed Seed Co-Prediction Network (SCoPNet). SCoPNet predicts functional associations between genes acting in the same developmental and signal transduction pathways irrespective of the similarity in their respective gene expression patterns. Using SCoPNet, we identified four novel regulators of seed germination (ALTERED SEED GERMINATION5, 6, 7, and 8), and predicted interactions at the level of transcript abundance between these novel and previously described factors influencing Arabidopsis seed germination. An online Web tool to query SCoPNet has been developed as a community resource to dissect seed biology and is available at http://www.vseed.nottingham.ac.uk/.",2011-09-06 +30768790,PPI-Detect: A support vector machine model for sequence-based prediction of protein-protein interactions.,"The prediction of peptide-protein or protein-protein interactions (PPI) is a challenging task, especially if amino acid sequences are the only information available. Machine learning methods allow us to exploit the information content in PPI datasets. However, the numerical codification of these datasets often influences the performance of data mining approaches. Here, we introduce a procedure for the general-purpose numerical codification of polypeptides. This procedure transforms pairs of amino acid sequences into a machine learning-friendly vector, whose elements represent numerical descriptors of residues in proteins. We used this numerical encoding procedure for the development of a support vector machine model (PPI-Detect), which allows predicting whether two proteins will interact or not. PPI-Detect (https://ppi-detect.zmb.uni-due.de/) outperforms state of the art sequence-based predictors of PPI. We employed PPI-Detect for the analysis of derivatives of EPI-X4, an endogenous peptide inhibitor of CXCR4, a G-protein-coupled receptor. There, we identified with high accuracy those peptides which bind better than EPI-X4 to the receptor. Also using PPI-Detect, we designed a novel peptide and then experimentally established its anti-CXCR4 activity. © 2019 Wiley Periodicals, Inc.",2019-02-15 +27899676,TransportDB 2.0: a database for exploring membrane transporters in sequenced genomes from all domains of life.,"All cellular life contains an extensive array of membrane transport proteins. The vast majority of these transporters have not been experimentally characterized. We have developed a bioinformatic pipeline to identify and annotate complete sets of transporters in any sequenced genome. This pipeline is now fully automated enabling it to better keep pace with the accelerating rate of genome sequencing. This manuscript describes TransportDB 2.0 (http://www.membranetransport.org/transportDB2/), a completely updated version of TransportDB, which provides access to the large volumes of data generated by our automated transporter annotation pipeline. The TransportDB 2.0 web portal has been rebuilt to utilize contemporary JavaScript libraries, providing a highly interactive interface to the annotation information, and incorporates analysis tools that enable users to query the database on a number of levels. For example, TransportDB 2.0 includes tools that allow users to select annotated genomes of interest from the thousands of species held in the database and compare their complete transporter complements.",2016-11-28 +23369106,SNPranker 2.0: a gene-centric data mining tool for diseases associated SNP prioritization in GWAS.,"

Background

The capability of correlating specific genotypes with human diseases is a complex issue in spite of all advantages arisen from high-throughput technologies, such as Genome Wide Association Studies (GWAS). New tools for genetic variants interpretation and for Single Nucleotide Polymorphisms (SNPs) prioritization are actually needed. Given a list of the most relevant SNPs statistically associated to a specific pathology as result of a genotype study, a critical issue is the identification of genes that are effectively related to the disease by re-scoring the importance of the identified genetic variations. Vice versa, given a list of genes, it can be of great importance to predict which SNPs can be involved in the onset of a particular disease, in order to focus the research on their effects.

Results

We propose a new bioinformatics approach to support biological data mining in the analysis and interpretation of SNPs associated to pathologies. This system can be employed to design custom genotyping chips for disease-oriented studies and to re-score GWAS results. The proposed method relies (1) on the data integration of public resources using a gene-centric database design, (2) on the evaluation of a set of static biomolecular annotations, defined as features, and (3) on the SNP scoring function, which computes SNP scores using parameters and weights set by users. We employed a machine learning classifier to set default feature weights and an ontological annotation layer to enable the enrichment of the input gene set. We implemented our method as a web tool called SNPranker 2.0 (http://www.itb.cnr.it/snpranker), improving our first published release of this system. A user-friendly interface allows the input of a list of genes, SNPs or a biological process, and to customize the features set with relative weights. As result, SNPranker 2.0 returns a list of SNPs, localized within input and ontologically enriched genes, combined with their prioritization scores.

Conclusions

Different databases and resources are already available for SNPs annotation, but they do not prioritize or re-score SNPs relying on a-priori biomolecular knowledge. SNPranker 2.0 attempts to fill this gap through a user-friendly integrated web resource. End users, such as researchers in medical genetics and epidemiology, may find in SNPranker 2.0 a new tool for data mining and interpretation able to support SNPs analysis. Possible scenarios are GWAS data re-scoring, SNPs selection for custom genotyping arrays and SNPs/diseases association studies.",2013-01-14 +30761339,Draft genome sequence data and analysis of Brachybacterium sp. strain EE-P12 isolated from a laboratory-scale anaerobic reactor.,"The species of the genus Brachybacterium belonging to the family Dermabacteraceae within the phylum Actinobacteria are gram-positive, facultatively anaerobic or aerobic, nonmotile and nonsporeforming bacteria. Cells of Brachybacterium spp. vary in shape from coccoid forms (stationary phase) to rods (exponential phase). Brachybacterium species can be isolated from numerous sources such as poultry deep litter, human gut, soil, food products. Here we describe the draft genome sequence of Brachybacterium sp. EE-P12 that was isolated from a laboratory-scale anaerobic digester. The genome sequencing generated 3,964,988 bp, with a G+C content of 72.2%. This draft genome data has been deposited at DDBJ/ENA/GenBank under the accession number QXCP00000000 (https://www.ncbi.nlm.nih.gov/nuccore/QXCP00000000).",2018-11-26 +22140100,"HGPD: Human Gene and Protein Database, 2012 update.","The Human Gene and Protein Database (HGPD; http://www.HGPD.jp/) is a unique database that stores information on a set of human Gateway entry clones in addition to protein expression and protein synthesis data. The HGPD was launched in November 2008, and 33,275 human Gateway entry clones have been constructed from the open reading frames (ORFs) of full-length cDNA, thus representing the largest collection in the world. Recently, research objectives have focused on the development of new medicines and the establishment of novel diagnostic methods and medical treatments. And, studies using proteins and protein information, which are closely related to gene function, have been undertaken. For this update, we constructed an additional 9974 human Gateway entry clones, giving a total of 43,249. This set of human Gateway entry clones was named the Human Proteome Expression Resource, known as the 'HuPEX'. In addition, we also classified the clones into 10 groups according to protein function. Moreover, in vivo cellular localization data of proteins for 32,651 human Gateway entry clones were included for retrieval from the HGPD. In 'Information Overview', which presents the search results, the ORF region of each cDNA is now displayed allowing the Gateway entry clones to be searched more easily.",2011-12-02 +27141850,Development of Antimicrobial Peptide Prediction Tool for Aquaculture Industries.,"Microbial diseases in fish, plant, animal and human are rising constantly; thus, discovery of their antidote is imperative. The use of antibiotic in aquaculture further compounds the problem by development of resistance and consequent consumer health risk by bio-magnification. Antimicrobial peptides (AMPs) have been highly promising as natural alternative to chemical antibiotics. Though AMPs are molecules of innate immune defense of all advance eukaryotic organisms, fish being heavily dependent on their innate immune defense has been a good source of AMPs with much wider applicability. Machine learning-based prediction method using wet laboratory-validated fish AMP can accelerate the AMP discovery using available fish genomic and proteomic data. Earlier AMP prediction servers are based on multi-phyla/species data, and we report here the world's first AMP prediction server in fishes. It is freely accessible at http://webapp.cabgrid.res.in/fishamp/ . A total of 151 AMPs related to fish collected from various databases and published literature were taken for this study. For model development and prediction, N-terminus residues, C-terminus residues and full sequences were considered. Best models were with kernels polynomial-2, linear and radial basis function with accuracy of 97, 99 and 97 %, respectively. We found that performance of support vector machine-based models is superior to artificial neural network. This in silico approach can drastically reduce the time and cost of AMP discovery. This accelerated discovery of lead AMP molecules having potential wider applications in diverse area like fish and human health as substitute of antibiotics, immunomodulator, antitumor, vaccine adjuvant and inactivator, and also for packaged food can be of much importance for industries.",2016-09-01 +29547902,SMMB: a stochastic Markov blanket framework strategy for epistasis detection in GWAS.,"Motivation:Large scale genome-wide association studies (GWAS) are tools of choice for discovering associations between genotypes and phenotypes. To date, many studies rely on univariate statistical tests for association between the phenotype and each assayed single nucleotide polymorphism (SNP). However, interaction between SNPs, namely epistasis, must be considered when tackling the complexity of underlying biological mechanisms. Epistasis analysis at large scale entails a prohibitive computational burden when addressing the detection of more than two interacting SNPs. In this paper, we introduce a stochastic causal graph-based method, SMMB, to analyze epistatic patterns in GWAS data. Results:We present Stochastic Multiple Markov Blanket algorithm (SMMB), which combines both ensemble stochastic strategy inspired from random forests and Bayesian Markov blanket-based methods. We compared SMMB with three other recent algorithms using both simulated and real datasets. Our method outperforms the other compared methods for a majority of simulated cases of 2-way and 3-way epistasis patterns (especially in scenarii where minor allele frequencies of causal SNPs are low). Our approach performs similarly as two other compared methods for large real datasets, in terms of power, and runs faster. Availability and implementation:Parallel version available on https://ls2n.fr/listelogicielsequipe/DUKe/128/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +22139927,MetaBase--the wiki-database of biological databases.,"Biology is generating more data than ever. As a result, there is an ever increasing number of publicly available databases that analyse, integrate and summarize the available data, providing an invaluable resource for the biological community. As this trend continues, there is a pressing need to organize, catalogue and rate these resources, so that the information they contain can be most effectively exploited. MetaBase (MB) (http://MetaDatabase.Org) is a community-curated database containing more than 2000 commonly used biological databases. Each entry is structured using templates and can carry various user comments and annotations. Entries can be searched, listed, browsed or queried. The database was created using the same MediaWiki technology that powers Wikipedia, allowing users to contribute on many different levels. The initial release of MB was derived from the content of the 2007 Nucleic Acids Research (NAR) Database Issue. Since then, approximately 100 databases have been manually collected from the literature, and users have added information for over 240 databases. MB is synchronized annually with the static Molecular Biology Database Collection provided by NAR. To date, there have been 19 significant contributors to the project; each one is listed as an author here to highlight the community aspect of the project.",2011-12-01 +30950516,Diaphysator: An online application for the exhaustive cartography and user-friendly statistical analysis of long bone diaphyses.,"The cross-sectional geometry (CSG) of long bone diaphyses is used in bioanthropology to evaluate their resistance to biomechanical constraints and to infer life-history-related patterns such as mobility, activity specialization or intensity, sexual dimorphism, body mass and proportions. First limited by technical analytical constraints to the analysis of one or two cross sections per bone, it has evolved into the analysis of cross sections of the full length of the diaphyseal part of long bones. More recently, researchers have developed analytical tools to map the cortical thickness of entire diaphyses to evaluate locomotor signatures. However, none of these analytical tools are easy to use for scientists who are not familiar with computer programming, and some statistical procedures-such as mapping the correlation coefficients of the diaphyseal thickness with various parameters have yet to be made available. Therefore, we developed an automated and open-source application that renders those analyses (both CSG and cortical thickness) in a semiautomated and user friendly manner. This application, called ""Diaphysator"", is associated with another free software (""Extractor"", presented in Dupej et al. (2017). American Journal of Physical Anthropology, 164, 868-876). Diaphysator can be used as an online application (https://diaphysator.shinyapps.io/maps) or as a package for R statistical software. Along with the mean maps of cortical thickness and mean CSG parameter graphs, the users can evaluate the correlations and partial correlations of both CSG parameters at every cross section along the diaphyseal length, and cortical thickness data points of the entire diaphysis, with any factor such as age, sex, stature, and body mass.",2019-04-05 +29566144,EWAS: epigenome-wide association study software 2.0.,"

Motivation

With the development of biotechnology, DNA methylation data showed exponential growth. Epigenome-wide association study (EWAS) provide a systematic approach to uncovering epigenetic variants underlying common diseases/phenotypes. But the EWAS software has lagged behind compared with genome-wide association study (GWAS). To meet the requirements of users, we developed a convenient and useful software, EWAS2.0.

Results

EWAS2.0 can analyze EWAS data and identify the association between epigenetic variations and disease/phenotype. On the basis of EWAS1.0, we have added more distinctive features. EWAS2.0 software was developed based on our 'population epigenetic framework' and can perform: (i) epigenome-wide single marker association study; (ii) epigenome-wide methylation haplotype (meplotype) association study and (iii) epigenome-wide association meta-analysis. Users can use EWAS2.0 to execute chi-square test, t-test, linear regression analysis, logistic regression analysis, identify the association between epi-alleles, identify the methylation disequilibrium (MD) blocks, calculate the MD coefficient, the frequency of meplotype and Pearson's correlation coefficients and carry out meta-analysis and so on. Finally, we expect EWAS2.0 to become a popular software and be widely used in epigenome-wide associated studies in the future.

Availability and implementation

The EWAS software is freely available at http://www.ewas.org.cn or http://www.bioapp.org/ewas.",2018-08-01 +22080561,GABI-Kat SimpleSearch: new features of the Arabidopsis thaliana T-DNA mutant database.,"T-DNA insertion mutants are very valuable for reverse genetics in Arabidopsis thaliana. Several projects have generated large sequence-indexed collections of T-DNA insertion lines, of which GABI-Kat is the second largest resource worldwide. User access to the collection and its Flanking Sequence Tags (FSTs) is provided by the front end SimpleSearch (http://www.GABI-Kat.de). Several significant improvements have been implemented recently. The database now relies on the TAIRv10 genome sequence and annotation dataset. All FSTs have been newly mapped using an optimized procedure that leads to improved accuracy of insertion site predictions. A fraction of the collection with weak FST yield was re-analysed by generating new FSTs. Along with newly found predictions for older sequences about 20,000 new FSTs were included in the database. Information about groups of FSTs pointing to the same insertion site that is found in several lines but is real only in a single line are included, and many problematic FST-to-line links have been corrected using new wet-lab data. SimpleSearch currently contains data from ~71,000 lines with predicted insertions covering 62.5% of the 27,206 nuclear protein coding genes, and offers insertion allele-specific data from 9545 confirmed lines that are available from the Nottingham Arabidopsis Stock Centre.",2011-11-12 +30470762,RNAm5Cfinder: A Web-server for Predicting RNA 5-methylcytosine (m5C) Sites Based on Random Forest.,"5-methylcytosine (m5C) is a common nucleobase modification, and recent investigations have indicated its prevalence in cellular RNAs including mRNA, tRNA and rRNA. With the rapid accumulation of m5C sites data, it becomes not only feasible but also important to build an accurate model to predict m5C sites in silico. For this purpose, here, we developed a web-server named RNAm5Cfinder based on RNA sequence features and machine learning method to predict RNA m5C sites in eight tissue/cell types from mouse and human. We confirmed the accuracy and usefulness of RNAm5Cfinder by independent tests, and the results show that the comprehensive and cell-specific predictors could pinpoint the generic or tissue-specific m5C sites with the Area Under Curve (AUC) no less than 0.77 and 0.87, respectively. RNAm5Cfinder web-server is freely available at http://www.rnanut.net/rnam5cfinder .",2018-11-23 +30216700,HToPred: A Tool for Human Topoisomerase II Inhibitor Prediction.,"The enzyme human topoisomerase IIα (hTopoIIα) is an important anticancer drug target. Due to the availability of multiple inhibitor-binding sites in this enzyme, the anti-hTopoII agents possess high chemical diversity. Chemoinformatics methods can be used to identify lead compounds from large databases for hTopoII inhibitory activity and classify them. In this work, we report the use of machine learning methods to develop classification models for the identification of possible anti-hTopoIIα agents and to classify them as catalytic inhibitors vs. poisons. Initially, an extensive dataset of small molecules which are reported to be evaluated towards hTopoIIα inhibition was collected from ChEMBL database and literature. Using this dataset, predictive models for classifying small molecules into hTopoIIα inhibitors and non-inhibitors were developed. Additionally, the model development was taken up for the prediction of the type of hTopoIIα inactivation. Several molecular fingerprints and physicochemical descriptors of the molecules in the dataset were calculated using the chemoinformatics tool RDKit. Various classifiers were evaluated to establish suitable protocol. Further, ensemble models were developed by bagging of homogenous classifier and selective fusion of heterogeneous classifiers. The models were thoroughly validated with 5-fold cross validation and external validation. The best performing models were incorporated into a tool christened as Human Topoisomerase IIα Inhibitor Prediction (HToPred, http://14.139.57.41/HToPred). A molecular docking based validation for the successful application of HToPred in predicting the mode of enzyme inhibition was performed, which further established the acceptability of this tool. This tool can serve as an important platform to prescreen compounds for anti-hTopoIIα potential.",2018-09-14 +26911352,"Human genetic variation database, a reference database of genetic variations in the Japanese population.","Whole-genome and -exome resequencing using next-generation sequencers is a powerful approach for identifying genomic variations that are associated with diseases. However, systematic strategies for prioritizing causative variants from many candidates to explain the disease phenotype are still far from being established, because the population-specific frequency spectrum of genetic variation has not been characterized. Here, we have collected exomic genetic variation from 1208 Japanese individuals through a collaborative effort, and aggregated the data into a prevailing catalog. In total, we identified 156 622 previously unreported variants. The allele frequencies for the majority (88.8%) were lower than 0.5% in allele frequency and predicted to be functionally deleterious. In addition, we have constructed a Japanese-specific major allele reference genome by which the number of unique mapping of the short reads in our data has increased 0.045% on average. Our results illustrate the importance of constructing an ethnicity-specific reference genome for identifying rare variants. All the collected data were centralized to a newly developed database to serve as useful resources for exploring pathogenic variations. Public access to the database is available at http://www.genome.med.kyoto-u.ac.jp/SnpDB/.",2016-02-25 +28025336,Construction of antimicrobial peptide-drug combination networks from scientific literature based on a semi-automated curation workflow. ,"Considerable research efforts are being invested in the development of novel antimicrobial therapies effective against the growing number of multi-drug resistant pathogens. Notably, the combination of different agents is increasingly explored as means to exploit and improve individual agent actions while minimizing microorganism resistance. Although there are several databases on antimicrobial agents, scientific literature is the primary source of information on experimental antimicrobial combination testing. This work presents a semi-automated database curation workflow that supports the mining of scientific literature and enables the reconstruction of recently documented antimicrobial combinations. Currently, the database contains data on antimicrobial combinations that have been experimentally tested against Pseudomonas aeruginosa, Staphylococcus aureus, Escherichia coli, Listeria monocytogenes and Candida albicans, which are prominent pathogenic organisms and are well-known for their wide and growing resistance to conventional antimicrobials. Researchers are able to explore the experimental results for a single organism or across organisms. Likewise, researchers may look into indirect network associations and identify new potential combinations to be tested. The database is available without charges.Database URL: http://sing.ei.uvigo.es/antimicrobialCombination/.",2016-12-26 +30977537,2018 International Consensus Meeting on Musculoskeletal Infection: Research Priorities from the General Assembly Questions.,"Musculoskeletal infections (MSKI) remain the bane of orthopedic surgery, and result in grievous illness and inordinate costs that threaten healthcare systems. As prevention, diagnosis, and treatment has remained largely unchanged over the last 50 years, a 2nd International Consensus Meeting on Musculoskeletal Infection (ICM 2018, https://icmphilly.com) was completed. Questions pertaining to all areas of MSKI were extensively researched to prepare recommendations, which were discussed and voted on by the delegates using the Delphi methodology. The questions, including the General Assembly (GA) results, have been published (GA questions). However, as critical outcomes include: (i) incidence and cost data that substantiate the problems, and (ii) establishment of research priorities; an ICM 2018 research workgroup (RW) was assembled to accomplish these tasks. Here, we present the result of the RW consensus on the current and projected incidence of infection, and the costs per patient, for all orthopedic subspecialties, which range from 0.1% to 30%, and $17,000 to $150,000. The RW also identified the most important research questions. The Delphi methodology was utilized to initially derive four objective criteria to define a subset of the 164 GA questions that are high priority for future research. Thirty-eight questions (23% of all GA questions) achieved the requisite > 70% agreement vote, and are highlighted in this Consensus article within six thematic categories: acute versus chronic infection, host immunity, antibiotics, diagnosis, research caveats, and modifiable factors. Finally, the RW emphasizes that without appropriate funding to address these high priority research questions, a 3rd ICM on MSKI to address similar issues at greater cost is inevitable.",2019-04-25 +31001324,BayesPI-BAR2: A New Python Package for Predicting Functional Non-coding Mutations in Cancer Patient Cohorts.,"Most of somatic mutations in cancer occur outside of gene coding regions. These mutations may disrupt the gene regulation by affecting protein-DNA interaction. A study of these disruptions is important in understanding tumorigenesis. However, current computational tools process DNA sequence variants individually, when predicting the effect on protein-DNA binding. Thus, it is a daunting task to identify functional regulatory disturbances among thousands of mutations in a patient. Previously, we have reported and validated a pipeline for identifying functional non-coding somatic mutations in cancer patient cohorts, by integrating diverse information such as gene expression, spatial distribution of the mutations, and a biophysical model for estimating protein binding affinity. Here, we present a new user-friendly Python package BayesPI-BAR2 based on the proposed pipeline for integrative whole-genome sequence analysis. This may be the first prediction package that considers information from both multiple mutations and multiple patients. It is evaluated in follicular lymphoma and skin cancer patients, by focusing on sequence variants in gene promoter regions. BayesPI-BAR2 is a useful tool for predicting functional non-coding mutations in whole genome sequencing data: it allows identification of novel transcription factors (TFs) whose binding is altered by non-coding mutations in cancer. BayesPI-BAR2 program can analyze multiple datasets of genome-wide mutations at once and generate concise, easily interpretable reports for potentially affected gene regulatory sites. The package is freely available at http://folk.uio.no/junbaiw/BayesPI-BAR2/.",2019-04-02 +22121217,The Stem Cell Discovery Engine: an integrated repository and analysis system for cancer stem cell comparisons.,"Mounting evidence suggests that malignant tumors are initiated and maintained by a subpopulation of cancerous cells with biological properties similar to those of normal stem cells. However, descriptions of stem-like gene and pathway signatures in cancers are inconsistent across experimental systems. Driven by a need to improve our understanding of molecular processes that are common and unique across cancer stem cells (CSCs), we have developed the Stem Cell Discovery Engine (SCDE)-an online database of curated CSC experiments coupled to the Galaxy analytical framework. The SCDE allows users to consistently describe, share and compare CSC data at the gene and pathway level. Our initial focus has been on carefully curating tissue and cancer stem cell-related experiments from blood, intestine and brain to create a high quality resource containing 53 public studies and 1098 assays. The experimental information is captured and stored in the multi-omics Investigation/Study/Assay (ISA-Tab) format and can be queried in the data repository. A linked Galaxy framework provides a comprehensive, flexible environment populated with novel tools for gene list comparisons against molecular signatures in GeneSigDB and MSigDB, curated experiments in the SCDE and pathways in WikiPathways. The SCDE is available at http://discovery.hsci.harvard.edu.",2011-11-24 +31057068,Fast and memory efficient approach for mapping NGS reads to a reference genome.,"New generation sequencing machines: Illumina and Solexa can generate millions of short reads from a given genome sequence on a single run. Alignment of these reads to a reference genome is a core step in Next-generation sequencing data analysis such as genetic variation and genome re-sequencing etc. Therefore there is a need of a new approach, efficient with respect to memory as well as time to align these enormous reads with the reference genome. Existing techniques such as MAQ, Bowtie, BWA, BWBBLE, Subread, Kart, and Minimap2 require huge memory for whole reference genome indexing and reads alignment. Gapped alignment versions of these techniques are also 20-40% slower than their respective normal versions. In this paper, an efficient approach: WIT for reference genome indexing and reads alignment using Burrows-Wheeler Transform (BWT) and Wavelet Tree (WT) is proposed. Both exact and approximate alignments are possible by it. Experimental work shows that the proposed approach WIT performs the best in case of protein sequence indexing. For indexing, the reference genome space required by WIT is 0.6 N (N is the size of reference genome) whereas existing techniques BWA, Subread, Kart, and Minimap2 require space in between 1.25 N to 5 N. Experimentally, it is also observed that even using such small index size alignment time of proposed approach is comparable in comparison to BWA, Subread, Kart, and Minimap2. Other alignment parameters accuracy and confidentiality are also experimentally shown to be better than Minimap2. The source code of the proposed approach WIT is available at http://www.algorithm-skg.com/wit/home.html .",2019-04-01 +32127833,Impact of the community healthcare plan and the free maternity services programme on maternal and child healthcare utilisation in rural Kenya: a dairy farmer population-based study.,"

Background

Access to and utilisation of quality maternal and child healthcare services is generally recognized as the best way to reduce maternal and child mortality.

Objectives

We evaluated whether the introduction of a voluntary family health insurance programme, combined with quality improvement of healthcare facilities [The Community Health Plan (TCHP)], and the introduction of free access to delivery services in all public facilities [Free Maternity Services programme (FMS)] increased antenatal care utilisation and use of facility deliveries among pregnant women in rural Kenya.

Methods

TCHP was introduced in 2011, whilst the FMS programme was launched in 2013. To measure the impact of TCHP, percentage points (PP) changes in antenatal care utilisation and facility deliveries from the pre-TCHP to the post-TCHP period between the TCHP programme area and a control area were compared in multivariable difference-in-differences analysis. To measure the impact of the FMS programme, PP changes in antenatal care utilisation and facility deliveries from the pre-FMS to the post-FMS period in the pooled TCHP programme and control areas was assessed in multivariable logistic regression analysis. Data was collected through household surveys in 2011 and 2104. Households (n=549) were randomly selected from the member lists of 2 dairy companies, and all full-term pregnancies in the 3.5 years preceding the baseline and follow-up survey among women aged 15-49 at the time of pregnancy were eligible for this study (n=295).

Results

Because only 4.1% of eligible women were insured through TCHP during pregnancy, any increase in utilisation attributable to the TCHP programme could only have come about as a result of the quality improvements in TCHP facilities. Antenatal care utilisation significantly increased after TCHP was introduced (14.4 PP; 95% CI: 4.5-24.3; P=0.004), whereas no effect was observed of the programme on facility deliveries (8.8 PP; 95% CI: -14.1 to +31.7; P=0.450). Facility deliveries significantly increased after the introduction of the FMS programme (27.9 PP; 95% CI: 11.8-44.1; P=0.001), but antenatal care utilisation did not change significantly (4.0 PP; 95% CI: -0.6 to +8.5; P=0.088).

Conclusion

Access to the FMS programme increased facility deliveries substantially and may contribute to improved maternal and new-born health and survival if the quality of delivery services is sustained or further improved. Despite low up-take, TCHP had a positive effect on antenatal care utilisation among uninsured women by improving the quality of existing healthcare facilities. An alignment of the two programmes could potentially lead to optimal results.

Funding

The study was funded by the Health Insurance Fund (http://www.hifund.org/), through a grant from the Dutch Ministry of Foreign Affairs.",2019-09-01 +29490085,Vitamin D supplementation for improvement of chronic low-grade inflammation in patients with type 2 diabetes: a systematic review and meta-analysis of randomized controlled trials.,"Background:Vitamin D has been proposed to have anti-inflammatory properties; however, the effect of vitamin D supplementation on inflammation in type 2 diabetes has not been established. Objective:The aim of this systematic review and meta-analysis was to examine the effect of vitamin D supplementation on inflammatory markers in patients with type 2 diabetes and to identify relevant gaps in knowledge. Data sources:MEDLINE, CINAHL, Embase, and EBM Reviews were searched systematically from inception to January 25, 2017. Study selection:Randomized controlled trials (RCTs) investigating the effects of vitamin D supplementation (any form, route, and duration, and with any cosupplementation) compared with placebo or usual care on inflammatory markers in patients with type 2 diabetes were selected. Data extraction:Study and sample characteristics and aggregate outcome data were extracted, risk of bias was determined, and quality of evidence was assessed using the Grading of Recommendations, Assessment, Development, and Evaluation (GRADE) approach. Results:Twenty-eight RCTs were included, 20 of which had data available for pooling. In meta-analyses of 20 RCTs (n = 1270 participants), vitamin D-supplemented groups had lower levels of C-reactive protein (standardized mean difference [SMD] -0.23; 95%CI, -0.37 to -0.09; P = 0.002) and tumor necrosis factor α (SMD -0.49; 95%CI, -0.84 to -0.15; P = 0.005), a lower erythrocyte sedimentation rate (SMD -0.47; 95%CI, -0.89 to -0.05; P = 0.03), and higher levels of leptin (SMD 0.42; 95%CI, 0.04-0.81; P = 0.03) compared with control groups. No differences were observed for adiponectin, interleukin 6, or E-selectin (all P > 0.05). In meta-regression and subgroup analyses, age, sex, body mass index, duration of diabetes, baseline vitamin D status, and dose and duration of supplementation did not alter the results. Conclusions:This meta-analysis provides level 1 evidence that vitamin D supplementation may reduce chronic low-grade inflammation in patients with type 2 diabetes. Systematic Review Registration:PROSPERO CRD42016047755. Available at: https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=47755 (9/15/2016).",2018-05-01 +29790928,Using meshes for MeSH term enrichment and semantic analyses.,"Summary:Medical Subject Headings (MeSH) is the NLM controlled vocabulary used to manually index articles for MEDLINE/PubMed. MeSH provides unique and comprehensive annotations for life science. The meshes package implements measurement of the semantic similarity of MeSH terms and gene products to help using MeSH vocabulary in knowledge mining. Enrichment analysis to extract the biological meanings from gene list, expression profile and genomic regions is also provided using MeSH annotation. Meshes supports more than 70 species and provides high quality visualization methods to help interpreting analysis results. Availability and implementation:meshes is released under Artistic-2.0 License. The source code and documents are freely available through Bioconductor (https://www.bioconductor.org/packages/meshes). Supplementary information:Supplementary data are available at Bioinformatics online.",2018-11-01 +31394288,Significance of Conducting 2 Types of Fecal Tests in Patients With Ulcerative Colitis.,"

Background & aims

We compared the diagnostic accuracy of the fecal calprotectin (FCP) test vs the fecal immunochemical blood test (FIT) in determining the endoscopic severity and predicting outcomes of patients with ulcerative colitis (UC).

Methods

We performed a nationwide study of 879 patients with UC, enrolled at medical centers across Japan, from March 2015 to March 2017. We collected data on fecal biomarkers, endoscopic severities, and other clinical indices from Cohort 1 (n = 427) and assessed the diagnostic accuracy of FCP measurement and FIT results in determining clinical severity, based on Mayo score, and endoscopic remission, based on Mayo endoscopic sub-score (MES) or UC endoscopic index of severity. We also followed 452 patients in clinical remission from UC (Cohort 2) for 12 months and evaluated the associations of FCP levels and FIT results with clinical recurrence.

Results

The levels of FCP and FIT each correlated with the MES and UC endoscopic index of severity. There were no significant differences in the areas under the curve of FCP vs FIT in distinguishing patients with MES≤1 from those with MES≥2 (P = .394) or in distinguishing patients with MES=0 from those with MES≥1 (P = .178). Among 405 patients in clinical remission at baseline, 38 (9.4%) had UC recurrences within 3 months and 90 (22.2%) had recurrences within 12 months. FCP≥146 mg/kg (hazard ratio [HR], 4.83; 95% confidence interval [CI], 2.80-8.33) and FIT≥77 ng/mL (HR, 2.92; 95% CI, 1.76-4.83) were independently associated with clinical recurrence within 12 months. UC recurred within 12 months in 69% of patients with levels of FCP≥146 mg/kg and FIT ≥77 ng/mL; this value was significantly higher than the rate of recurrence in patients with levels of FCP≥146 mg/kg and FIT <77 ng/mL (31.5%, P < .001) or patients with levels of FCP<146 mg/kg and FIT ≥77 ng/mL (30.0%, P < .001).

Conclusion

In a nationwide study of patients with UC in Japan, we found that the level of FCP and FIT could each identify patients with endoscopic markers of disease severity (MES≥2). The combination of FCP and FIT results can identify patients in remission who are at risk for disease recurrence. Clinical Trials Registry no: UMIN000017650 (http://www.umin.ac.jp/ctr/).",2019-08-05 +27855088,Does the UKCAT predict performance on exit from medical school? A national cohort study.,"

Objectives

Most UK medical programmes use aptitude tests during student selection, but large-scale studies of predictive validity are rare. This study assesses the UK Clinical Aptitude Test (UKCAT: http://www.ukcat.ac.uk), and 4 of its subscales, along with individual and contextual socioeconomic background factors, as predictors of performance during, and on exit from, medical school.

Methods

This was an observational study of 6294 medical students from 30 UK medical programmes who took the UKCAT from 2006 to 2008, for whom selection data from the UK Foundation Programme (UKFPO), the next stage of UK medical education training, were available in 2013. We included candidate demographics, UKCAT (cognitive domains; total scores), UKFPO Educational Performance Measure (EPM) and national exit situational judgement test (SJT). Multilevel modelling was used to assess relationships between variables, adjusting for confounders.

Results

The UKCAT-as a total score and in terms of the subtest scores-has significant predictive validity for performance on the UKFPO EPM and SJT. UKFPO performance was also affected positively by female gender, maturity, white ethnicity and coming from a higher social class area at the time of application to medical school An inverse pattern was seen for a contextual measure of school, with those attending fee-paying schools performing significantly more weakly on the EPM decile, the EPM total and the total UKFPO score, but not the SJT, than those attending other types of school.

Conclusions

This large-scale study, the first to link 2 national databases-UKCAT and UKFPO, has shown that UKCAT is a predictor of medical school outcome. The data provide modest supportive evidence for the UKCAT's role in student selection. The conflicting relationships of socioeconomic contextual measures (area and school) with outcome adds to wider debates about the limitations of these measures, and indicates the need for further research.",2016-10-07 +26508757,LOLA: enrichment analysis for genomic region sets and regulatory elements in R and Bioconductor.,"

Unlabelled

Genomic datasets are often interpreted in the context of large-scale reference databases. One approach is to identify significantly overlapping gene sets, which works well for gene-centric data. However, many types of high-throughput data are based on genomic regions. Locus Overlap Analysis (LOLA) provides easy and automatable enrichment analysis for genomic region sets, thus facilitating the interpretation of functional genomics and epigenomics data.

Availability and implementation

R package available in Bioconductor and on the following website: http://lola.computational-epigenetics.org.",2015-10-27 +24905498,The plant glycosyltransferase clone collection for functional genomics.,"The glycosyltransferases (GTs) are an important and functionally diverse family of enzymes involved in glycan and glycoside biosynthesis. Plants have evolved large families of GTs which undertake the array of glycosylation reactions that occur during plant development and growth. Based on the Carbohydrate-Active enZymes (CAZy) database, the genome of the reference plant Arabidopsis thaliana codes for over 450 GTs, while the rice genome (Oryza sativa) contains over 600 members. Collectively, GTs from these reference plants can be classified into over 40 distinct GT families. Although these enzymes are involved in many important plant specific processes such as cell-wall and secondary metabolite biosynthesis, few have been functionally characterized. We have sought to develop a plant GTs clone resource that will enable functional genomic approaches to be undertaken by the plant research community. In total, 403 (88%) of CAZy defined Arabidopsis GTs have been cloned, while 96 (15%) of the GTs coded by rice have been cloned. The collection resulted in the update of a number of Arabidopsis GT gene models. The clones represent full-length coding sequences without termination codons and are Gateway® compatible. To demonstrate the utility of this JBEI GT Collection, a set of efficient particle bombardment plasmids (pBullet) was also constructed with markers for the endomembrane. The utility of the pBullet collection was demonstrated by localizing all members of the Arabidopsis GT14 family to the Golgi apparatus or the endoplasmic reticulum (ER). Updates to these resources are available at the JBEI GT Collection website http://www.addgene.org/.",2014-07-09 +29994190,Taste Recognition in E-Tongue Using Local Discriminant Preservation Projection.,"Electronic tongue (E-Tongue), as a novel taste analysis tool, shows a promising perspective for taste recognition. In this paper, we constructed a voltammetric E-Tongue system and measured 13 different kinds of liquid samples, such as tea, wine, beverage, functional materials, etc. Owing to the noise of system and a variety of environmental conditions, the acquired E-Tongue data shows inseparable patterns. To this end, from the viewpoint of algorithm, we propose a local discriminant preservation projection (LDPP) model, an under-studied subspace learning algorithm, that concerns the local discrimination and neighborhood structure preservation. In contrast with other conventional subspace projection methods, LDPP has two merits. On one hand, with local discrimination it has a higher tolerance to abnormal data or outliers. On the other hand, it can project the data to a more separable space with local structure preservation. Further, support vector machine, extreme learning machine (ELM), and kernelized ELM (KELM) have been used as classifiers for taste recognition in E-Tongue. Experimental results demonstrate that the proposed E-Tongue is effective for multiple tastes recognition in both efficiency and effectiveness. Particularly, the proposed LDPP-based KELM classifier model achieves the best taste recognition performance of 98%. The developed benchmark data sets and codes will be released and downloaded in http://www.leizhang.tk/ tempcode.html.",2018-01-17 +22325770,Outcome of the first electron microscopy validation task force meeting.,"This Meeting Review describes the proceedings and conclusions from the inaugural meeting of the Electron Microscopy Validation Task Force organized by the Unified Data Resource for 3DEM (http://www.emdatabank.org) and held at Rutgers University in New Brunswick, NJ on September 28 and 29, 2010. At the workshop, a group of scientists involved in collecting electron microscopy data, using the data to determine three-dimensional electron microscopy (3DEM) density maps, and building molecular models into the maps explored how to assess maps, models, and other data that are deposited into the Electron Microscopy Data Bank and Protein Data Bank public data archives. The specific recommendations resulting from the workshop aim to increase the impact of 3DEM in biology and medicine.",2012-02-01 +27242032,URS DataBase: universe of RNA structures and their motifs. ,"The Universe of RNA Structures DataBase (URSDB) stores information obtained from all RNA-containing PDB entries (2935 entries in October 2015). The content of the database is updated regularly. The database consists of 51 tables containing indexed data on various elements of the RNA structures. The database provides a web interface allowing user to select a subset of structures with desired features and to obtain various statistical data for a selected subset of structures or for all structures. In particular, one can easily obtain statistics on geometric parameters of base pairs, on structural motifs (stems, loops, etc.) or on different types of pseudoknots. The user can also view and get information on an individual structure or its selected parts, e.g. RNA-protein hydrogen bonds. URSDB employs a new original definition of loops in RNA structures. That definition fits both pseudoknot-free and pseudoknotted secondary structures and coincides with the classical definition in case of pseudoknot-free structures. To our knowledge, URSDB is the first database supporting searches based on topological classification of pseudoknots and on extended loop classification.Database URL: http://server3.lpm.org.ru/urs/.",2016-05-30 +30628845,Exposure to Perfluoroalkyl Substances during Fetal Life and Pubertal Development in Boys and Girls from the Danish National Birth Cohort.,"

Background

It remains unsettled whether prenatal exposure to perfluoroalkyl substances (PFASs) affects human reproductive health through potential endocrine disruption.

Objectives

We aimed to explore the associations between prenatal exposure to several PFASs and various aspects of pubertal development in boys and girls.

Methods

We studied two samples ([Formula: see text] and 445) from the Puberty Cohort, nested within the Danish National Birth Cohort (DNBC), measuring PFAS in maternal plasma from early gestation. Data on pubertal development were collected biannually from the age of 11 y until full maturation, using web-based questionnaires. Outcomes were age at menarche, voice break, first ejaculation, and Tanner stages 2 to 5 for pubic hair, breast, genital development, and a combined puberty indicator. A regression model for censored data was used to estimate mean difference (months) in age at achieving the pubertal outcomes across tertiles of PFAS concentrations and with a doubling of PFAS concentrations (continuous). For perfluorooctanoic acid (PFOA) and perfluorooctanesulfonic acid (PFOS), a meta-analysis was used to provide a weighted average of the point estimates from samples 1 and 2.

Results

Overall, prenatal exposure to PFOS, perfluorohexane sulfonate (PFHxS), perfluoroheptane sulfonate (PFHpS), perfluorononanoic acid (PFNA), and perfluorodecanoic acid (PDFA) (girls) and PFHxS and PFHpS (boys) was associated with lower mean age at puberty marker onset. PFDA and PFNA exposure was associated with higher mean age at onset of puberty in boys. Nonmonotonic associations in girls (PFOS, PFHpS, PFDA) and boys (PFDA, PFNA) were observed, showing larger mean age differences for the combined puberty indicator in the middle tertile [girls: PFOS: [Formula: see text] mo, 95% confidence interval (CI): [Formula: see text], [Formula: see text]; PFHpS: [Formula: see text] mo, 95% CI: [Formula: see text], 1.85; PFDA: [Formula: see text] mo, 95% CI: [Formula: see text], 1.83; and boys: PFNA: 4.45 mo, 95% CI: [Formula: see text], 10.21; PFDA: 4.59 mo, 95% CI: [Formula: see text], 10.11] than in the highest tertile with the lowest as reference.

Conclusions

Our population-based cohort study suggests sex-specific associations of altered pubertal development with prenatal exposure to PFASs. These findings are novel, and replication is needed. https://doi.org/10.1289/EHP3567.",2019-01-01 +29893907,BeStSel: a web server for accurate protein secondary structure prediction and fold recognition from the circular dichroism spectra.,"Circular dichroism (CD) spectroscopy is a widely used method to study the protein secondary structure. However, for decades, the general opinion was that the correct estimation of β-sheet content is challenging because of the large spectral and structural diversity of β-sheets. Recently, we showed that the orientation and twisting of β-sheets account for the observed spectral diversity, and developed a new method to estimate accurately the secondary structure (PNAS, 112, E3095). BeStSel web server provides the Beta Structure Selection method to analyze the CD spectra recorded by conventional or synchrotron radiation CD equipment. Both normalized and measured data can be uploaded to the server either as a single spectrum or series of spectra. The originality of BeStSel is that it carries out a detailed secondary structure analysis providing information on eight secondary structure components including parallel-β structure and antiparallel β-sheets with three different groups of twist. Based on these, it predicts the protein fold down to the topology/homology level of the CATH protein fold classification. The server also provides a module to analyze the structures deposited in the PDB for BeStSel secondary structure contents in relation to Dictionary of Secondary Structure of Proteins data. The BeStSel server is freely accessible at http://bestsel.elte.hu.",2018-07-01 +30241075,Next generation database search algorithm for forensic mitogenome analyses.,"Mitochondrial DNA (mtDNA) variation is being reported relative to the corrected version of the first sequenced human mitochondrial genome. A review of the existing literature across disciplines that employ mtDNA demonstrates that insertions and deletions are not reported in a standardized way. This may lead to false exclusions of identical sequences, unidentified matches in missing persons mtDNA databases, biased mtDNA database frequency estimates and overestimation of the genetic evidence. Seven years ago we introduced alignment-free database search software (SAM) and implemented it into the mtDNA database EMPOP (https://empop.online) to produce reliable and conservative frequency estimates that are required in the forensic context. However, ambiguity remained in how laboratories have been reporting mitotypes, as often more than one single alignment of a given mtDNA sequence was feasible. In order to overcome this limitation we here describe a concept and provide software for producing stable, harmonized phylogenetic alignment of mtDNA sequences for database searches. The new software SAM 2 will be made available via EMPOP and provide the user with the already established conservative frequency estimates. In addition, SAM 2 offers the rCRS-coded haplotype of a given mtDNA sequence following the established and widely accepted phylogenetic alignment. This provides the user with feedback on how mitotypes are stored in EMPOP and how they should be reported in order to harmonize nomenclature. Finally, this approach does not only permit reliable mtDNA nomenclature in forensics but invites related disciplines to take advantage of a standardized way of reporting mtDNA variation, thus closing the ranks between different genetic fields and supporting dialogue and collaboration between mtDNA scholars from various disciplines.",2018-09-09 +27602200,"Disease, Models, Variants and Altered Pathways-Journeying RGD Through the Magnifying Glass.","Understanding the pathogenesis of disease is instrumental in delineating its progression mechanisms and for envisioning ways to counteract it. In the process, animal models represent invaluable tools for identifying disease-related loci and their genetic components. Amongst them, the laboratory rat is used extensively in the study of many conditions and disorders. The Rat Genome Database (RGD-http://rgd.mcw.edu) has been established to house rat genetic, genomic and phenotypic data. Since its inception, it has continually expanded the depth and breadth of its content. Currently, in addition to rat genes, QTLs and strains, RGD houses mouse and human genes and QTLs and offers pertinent associated data, acquired through manual literature curation and imported via pipelines. A collection of controlled vocabularies and ontologies is employed for the standardized extraction and provision of biological data. The vocabularies/ontologies allow the capture of disease and phenotype associations of rat strains and QTLs, as well as disease and pathway associations of rat, human and mouse genes. A suite of tools enables the retrieval, manipulation, viewing and analysis of data. Genes associated with particular conditions or with altered networks underlying disease pathways can be retrieved. Genetic variants in humans or in sequenced rat strains can be searched and compared. Lists of rat strains and species-specific genes and QTLs can be generated for selected ontology terms and then analyzed, downloaded or sent to other tools. From many entry points, data can be accessed and results retrieved. To illustrate, diabetes is used as a case study to initiate and embark upon an exploratory journey.",2015-11-26 +31528018,"Sequence analysis of the cytochrome c oxidase subunit 1 gene of Sarcoptes scabiei isolated from goats and rabbits in East Java, Indonesia.","

Aim

This study aimed to sequence the Cytochrome c oxidase (COX-1) gene sequence from mitochondrial DNA of Sarcoptes scabiei isolated from Lamongan goats and Mojokerto rabbits, align it with DNA isolated from Zi'gong rabbit (GenBank accession No. EU256389.1), and produce a phylogenetic analysis of S. scabiei COX-1 gene.

Materials and methods

S. scabiei mites were obtained from goats and rabbits, and DNA was extracted using QIAamp DNA Mini Kit. The forward and reverse primer sequences were designed based on the DNA sequence of an S. scabiei COX-1 gene isolated from the Zi'gong rabbit (5'-TCTTAGGGGCTGGATTTAGTATG-3' and 5'-AGTTCCTCTACCAGTTCCAC-3', respectively). To confirm sequencing output, the sequence resulting from the reverse primer was inverted and aligned to the sequence from the forward primer using Clone Manager Professional Version 9 for Windows (Scientific & Educational Software; http://www.scied.com). This alignment was subsequently used to build a phylogenetic tree, using the Neighbor-Joining method, in the MEGA6 program (https://www.megasoftware.net/).

Results

Polymerase chain reaction (PCR) products from S. scabiei isolates from Lamongan goats and Mojokerto rabbits produced bands of around 290 bp with 2% agarose gel electrophoresis. Comparing the DNA sequences of the S. scabiei COX-1 gene with those isolated from Lamongan goats and Mojokerto rabbits showed 99% homology.

Conclusion

PCR products of the S. scabiei COX-1 gene isolated from Lamongan goats and Mojokerto rabbits were around 290 bp long. The sequences had more than 99% homology. The sequences of the COX-1 gene of S. scabiei from Lamongan goats and Mojokerto rabbits were relatively close to the sequence of the gene in S. scabiei obtained from various hosts according to National Center for Biotechnology Information data.",2019-07-05 +28961742,"BSviewer: a genotype-preserving, nucleotide-level visualizer for bisulfite sequencing data.","

Motivation

The bisulfite sequencing technology has been widely used to study the DNA methylation profile in many species. However, most of the current visualization tools for bisulfite sequencing data only provide high-level views (i.e. overall methylation densities) while miss the methylation dynamics at nucleotide level. Meanwhile, they also focus on CpG sites while omit other information (such as genotypes on SNP sites) which could be helpful for interpreting the methylation pattern of the data. A bioinformatics tool that visualizes the methylation statuses at nucleotide level and preserves the most essential information of the sequencing data is thus valuable and needed.

Results

We have developed BSviewer, a lightweight nucleotide-level visualization tool for bisulfite sequencing data. Using an imprinting gene as an example, we show that BSviewer could be specifically helpful for interpreting the data with allele-specific DNA methylation pattern.

Availability and implementation

BSviewer is implemented in Perl and runs on most GNU/Linux platforms. Source code and testing dataset are freely available at http://sunlab.cpy.cuhk.edu.hk/BSviewer/.

Contact

haosun@cuhk.edu.hk.",2017-11-01 +29635150,Model-based approach for cyber-physical attack detection in water distribution systems.,"Modern Water Distribution Systems (WDSs) are often controlled by Supervisory Control and Data Acquisition (SCADA) systems and Programmable Logic Controllers (PLCs) which manage their operation and maintain a reliable water supply. As such, and with the cyber layer becoming a central component of WDS operations, these systems are at a greater risk of being subjected to cyberattacks. This paper offers a model-based methodology based on a detailed hydraulic understanding of WDSs combined with an anomaly detection algorithm for the identification of complex cyberattacks that cannot be fully identified by hydraulically based rules alone. The results show that the proposed algorithm is capable of achieving the best-known performance when tested on the data published in the BATtle of the Attack Detection ALgorithms (BATADAL) competition (http://www.batadal.net).",2018-03-17 +26577058,DBEndo: a web-based endodontic case management tool.,"

Background

The success of endodontic treatment depends-among many other factors-on good documentation. Paper-based records are often difficult to read or incomplete and commercially available tools focus on billing. An electronic record captures the state of treatment at all times. Databases are a common tool in everyday life.

Results

Here, we present a database created for the Charité-Universitätsmedizin Berlin, Germany. Through consistent digital documentation, data analytics of patients, root canal anatomies, instrumentation techniques, efficacy of chemical disinfection, root filling techniques, and corresponding recall success rates, which needed extensive research before, are now easy to perform. Tables and even graphics and data analystics are only one click away and can be exported to other programs.

Conclusions

DBEndo is a database to store and visualise internally, as well as to share endodontic cases online. For academic use we provide the database including all forms and some anonymous data for free at: http://dbendo.charite.de . Through easy import and export of the data, the system is open and flexible.",2015-11-17 +28193156,ECDomainMiner: discovering hidden associations between enzyme commission numbers and Pfam domains.,"

Background

Many entries in the protein data bank (PDB) are annotated to show their component protein domains according to the Pfam classification, as well as their biological function through the enzyme commission (EC) numbering scheme. However, despite the fact that the biological activity of many proteins often arises from specific domain-domain and domain-ligand interactions, current on-line resources rarely provide a direct mapping from structure to function at the domain level. Since the PDB now contains many tens of thousands of protein chains, and since protein sequence databases can dwarf such numbers by orders of magnitude, there is a pressing need to develop automatic structure-function annotation tools which can operate at the domain level.

Results

This article presents ECDomainMiner, a novel content-based filtering approach to automatically infer associations between EC numbers and Pfam domains. ECDomainMiner finds a total of 20,728 non-redundant EC-Pfam associations with a F-measure of 0.95 with respect to a ""Gold Standard"" test set extracted from InterPro. Compared to the 1515 manually curated EC-Pfam associations in InterPro, ECDomainMiner infers a 13-fold increase in the number of EC-Pfam associations.

Conclusion

These EC-Pfam associations could be used to annotate some 58,722 protein chains in the PDB which currently lack any EC annotation. The ECDomainMiner database is publicly available at http://ecdm.loria.fr/ .",2017-02-13 +29084032,DosOpt: A Tool for Personalized Bayesian Dose Adjustment of Vancomycin in Neonates.,"Our main aim has been to design a framework to improve vancomycin dosing in neonates. This required the development and verification of a computerized dose adjustment application, DosOpt, to guide the selection.Model fitting in DosOpt uses Bayesian methods for deriving individual pharmacokinetic (PK) estimates from population priors and patient therapeutic drug monitoring measurements. These are used to simulate concentration-time curves and target-constrained dose optimization. DosOpt was verified by assessing bias and precision through several error metrics and normalized prediction distribution errors on samples simulated from the Anderson et al PK model. The performance of DosOpt was also evaluated using retrospective clinical data. Achieved probabilities of target concentration attainment were benchmarked against corresponding attainments in our clinical retrospective data set.Simulations showed no systemic forecast biases. Normalized prediction distribution error values of the base model were distributed by standardized Gaussian (P = 0.1), showing good model suitability. A retrospective test data set included 149 treatment episodes with 1-10 vancomycin concentration measurements per patient (median 2). Individual concentrations in PK estimation improved probability of target attainment and decreased the variance of the estimation. Including 3 individual concentrations in the kinetics estimation increased the probability of Ctrough attainment within 10-15 mg/L from 16% obtained with no individual data (95% confidence interval, 11%-24%) to 43% (21%-47%).DosOpt uses individual concentration data to estimate kinetics and find optimal doses that increase the probability of achieving desired trough concentrations. Its performance started to exceed target levels attained in retrospective clinical data sets with the inclusion of a single individual input concentration. This tool is freely available at http://www.biit.cs.ut.ee/DosOpt.",2017-12-01 +31318742,Comparing Assessments of Physical Functional Independence in Older Adults With Mobility Limitations.,"

Objectives

The aims of the study were (1) to assess the agreement and correlation between self-reported functional independence and observations of family caregivers in a heterogeneous population of community-dwelling older adults with disabilities and (2) to determine how self-reports and caregiver reports correlate with evaluator rated functional independence over time.

Design

Data were drawn from a larger, randomized controlled trial examining the effects of a caregiver-inclusive intervention on outcomes of care recipients and their family caregivers. Functional independence measures were obtained using a self-report version of the Functional Independence Measure (care recipient self-reported Functional Independence Measure, caregiver self-reported Functional Independence Measure) and the Functional Autonomy Measurement System (evaluator perspective). They were administered at baseline (preintervention) and after the intervention at 6, 22, and 58 wks.

Results

Bivariate correlation analyses of 90 dyads consisting of older care recipients and their family caregivers reported moderate to very strong correlations between the three functional independence measures across all time points (rS = 0.45-0.91, P < 0.01). Bland-Altman analyses revealed a small systematic bias between care recipient and caregiver assessments of functional independence, with participants reporting higher scores across all time points (mean difference = 2.00-2.97).

Conclusions

There is substantial consistency among the self-assessed, caregiver-assessed, and evaluator assessed functional independence of older adults. Caregivers may be used as proxies for community-dwelling older adults without severe cognitive impairments with functional limitations.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: After reviewing this article, readers will be able to: (1) Describe the strength of association between self-reported functional independence and observations of family caregivers in a heterogeneous population of community-dwelling older adults with disabilities over time; (2) Describe the level of agreement between self-reported functional independence and observations of family caregivers over time; and (3) Describe the associations among self-reported, caregiver reported and evaluator rated functional independence over time.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2019-08-01 +30261098,High-affinity heterotetramer formation between the large myelin-associated glycoprotein and the dynein light chain DYNLL1.,"The close association of myelinated axons and their myelin sheaths involves numerous intercellular molecular interactions. For example, myelin-associated glycoprotein (MAG) mediates myelin-to-axon adhesion and signalling via molecules on the axonal surface. However, knowledge about intracellular binding partners of myelin proteins, including MAG, has remained limited. The two splice isoforms of MAG, S- and L-MAG, display distinct cytoplasmic domains and spatiotemporal expression profiles. We used yeast two-hybrid screening to identify interaction partners of L-MAG and found the dynein light chain DYNLL1 (also termed dynein light chain 8). DYNLL1 homodimers are known to facilitate dimerization of target proteins. L-MAG and DYNLL1 associate with high affinity, as confirmed with recombinant proteins in vitro. Structural analyses of the purified complex indicate that the DYNLL1-binding segment is localized close to the L-MAG C terminus, next to the Fyn kinase Tyr phosphorylation site. The crystal structure of the complex between DYNLL1 and its binding segment on L-MAG shows 2 : 2 binding in a parallel arrangement, indicating a heterotetrameric complex. The homology between L-MAG and previously characterized DYNLL1-ligands is limited, and some details of binding site interactions are unique for L-MAG. The structure of the complex between the entire L-MAG cytoplasmic domain and DYNLL1, as well as that of the extracellular domain of MAG, were modelled based on small-angle X-ray scattering data, allowing structural insights into L-MAG interactions on both membrane surfaces. Our data imply that DYNLL1 dimerizes L-MAG, but not S-MAG, through the formation of a specific 2 : 2 heterotetramer. This arrangement is likely to affect, in an isoform-specific manner, the functions of MAG in adhesion and myelin-to-axon signalling. OPEN SCIENCE BADGES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/. Read the Editorial Highlight for this article on page 712.",2018-11-26 +26673694,GtRNAdb 2.0: an expanded database of transfer RNA genes identified in complete and draft genomes.,"Transfer RNAs represent the largest, most ubiquitous class of non-protein coding RNA genes found in all living organisms. The tRNAscan-SE search tool has become the de facto standard for annotating tRNA genes in genomes, and the Genomic tRNA Database (GtRNAdb) was created as a portal for interactive exploration of these gene predictions. Since its published description in 2009, the GtRNAdb has steadily grown in content, and remains the most commonly cited web-based source of tRNA gene information. In this update, we describe not only a major increase in the number of tRNA predictions (>367000) and genomes analyzed (>4370), but more importantly, the integration of new analytic and functional data to improve the quality and biological context of tRNA gene predictions. New information drawn from other sources includes tRNA modification data, epigenetic data, single nucleotide polymorphisms, gene expression and evolutionary conservation. A richer set of analytic data is also presented, including better tRNA functional prediction, non-canonical features, predicted structural impacts from sequence variants and minimum free energy structural predictions. Views of tRNA genes in genomic context are provided via direct links to the UCSC genome browsers. The database can be searched by sequence or gene features, and is available at http://gtrnadb.ucsc.edu/.",2015-12-15 +31257402,TBC1D24-TLDc-related epilepsy exercise-induced dystonia: rescue by antioxidants in a disease model.,"Genetic mutations in TBC1D24 have been associated with multiple phenotypes, with epilepsy being the main clinical manifestation. The TBC1D24 protein consists of the unique association of a Tre2/Bub2/Cdc16 (TBC) domain and a TBC/lysin motif domain/catalytic (TLDc) domain. More than 50 missense and loss-of-function mutations have been described and are spread over the entire protein. Through whole genome/exome sequencing we identified compound heterozygous mutations, R360H and G501R, within the TLDc domain, in an index family with a Rolandic epilepsy exercise-induced dystonia phenotype (http://omim.org/entry/608105). A 20-year long clinical follow-up revealed that epilepsy was self-limited in all three affected patients, but exercise-induced dystonia persisted into adulthood in two. Furthermore, we identified three additional sporadic paediatric patients with a remarkably similar phenotype, two of whom had compound heterozygous mutations consisting of an in-frame deletion I81_K84 and an A500V mutation, and the third carried T182M and G511R missense mutations, overall revealing that all six patients harbour a missense mutation in the subdomain of TLDc between residues 500 and 511. We solved the crystal structure of the conserved Drosophila TLDc domain. This allowed us to predict destabilizing effects of the G501R and G511R mutations and, to a lesser degree, of R360H and potentially A500V. Next, we characterized the functional consequences of a strong and a weak TLDc mutation (TBC1D24G501R and TBC1D24R360H) using Drosophila, where TBC1D24/Skywalker regulates synaptic vesicle trafficking. In a Drosophila model neuronally expressing human TBC1D24, we demonstrated that the TBC1D24G501R TLDc mutation causes activity-induced locomotion and synaptic vesicle trafficking defects, while TBC1D24R360H is benign. The neuronal phenotypes of the TBC1D24G501R mutation are consistent with exacerbated oxidative stress sensitivity, which is rescued by treating TBC1D24G501R mutant animals with antioxidants N-acetylcysteine amide or α-tocopherol as indicated by restored synaptic vesicle trafficking levels and sustained behavioural activity. Our data thus show that mutations in the TLDc domain of TBC1D24 cause Rolandic-type focal motor epilepsy and exercise-induced dystonia. The humanized TBC1D24G501R fly model exhibits sustained activity and vesicle transport defects. We propose that the TBC1D24/Sky TLDc domain is a reactive oxygen species sensor mediating synaptic vesicle trafficking rates that, when dysfunctional, causes a movement disorder in patients and flies. The TLDc and TBC domain mutations' response to antioxidant treatment we observed in the animal model suggests a potential for combining antioxidant-based therapeutic approaches to TBC1D24-associated disorders with previously described lipid-altering strategies for TBC domain mutations.",2019-08-01 +29387738,Ocean currents and acoustic backscatter data from shipboard ADCP measurements at three North Atlantic seamounts between 2004 and 2015.,"Seamounts are amongst the most common physiographic structures of the deep-ocean landscape, but remoteness and geographic complexity have limited the systematic collection of integrated and multidisciplinary data in the past. Consequently, important aspects of seamount ecology and dynamics remain poorly studied. We present a data collection of ocean currents and raw acoustic backscatter from shipboard Acoustic Doppler Current Profiler (ADCP) measurements during six cruises between 2004 and 2015 in the tropical and subtropical Northeast Atlantic to narrow this gap. Measurements were conducted at seamount locations between the island of Madeira and the Portuguese mainland (Ampère, Seine Seamount), as well as east of the Cape Verde archipelago (Senghor Seamount). The dataset includes two-minute ensemble averaged continuous velocity and backscatter profiles, supplemented by spatially gridded maps for each velocity component, error velocity and local bathymetry. The dataset is freely available from the digital data library PANGAEA at https://doi.pangaea.de/10.1594/PANGAEA.883193.",2018-01-28 +29994265,Bioinformatic workflow extraction from scientific texts based on word sense disambiguation and relation extraction. ,"This paper introduces a method for automatic workflow extraction from texts using Process-Oriented Case-Based Reasoning (POCBR). While the current workflow management systems implement mostly different complicated graphical tasks based on advanced distributed solutions (e.g. cloud computing and grid computation), workflow knowledge acquisition from texts using case-based reasoning represents more expressive and semantic cases representations. We propose in this context, an ontology-based workflow extraction framework to acquire processual knowledge from texts. Our methodology extends classic NLP techniques to extract and disambiguate tasks and relations in texts. Using a graph-based representation of workflows and a domain ontology, our extraction process uses a context-aware approach to recognize workflow components: data and control flows. We applied our framework in a technical domain in bioinformatics: i.e. phylogenetic analyses. An evaluation based on workflow semantic similarities on a gold standard proves that our approach provides promising results in the process extraction domain. Both data and implementation of our framework are available in: http://labo.bioinfo.uqam.ca/tgrowler.",2018-06-14 +29478376,"RPPAware: A software suite to preprocess, analyze and visualize reverse phase protein array data.","Reverse Phase Protein Arrays (RPPA) is a high-throughput technology used to profile levels of protein expression. Handling the large datasets generated by RPPA can be facilitated by appropriate software tools. Here, we describe RPPAware, a free and intuitive software suite that was developed specifically for analysis and visualization of RPPA data. RPPAware is a portable tool that requires no installation and was built using Java. Many modules of the tool invoke R to utilize the statistical features. To demonstrate the utility of RPPAware, data generated from screening brain regions of a mouse model of Down syndrome with 62 antibodies were used as a case study. The ease of use and efficiency of RPPAware can accelerate data analysis to facilitate biological discovery. RPPAware 1.0 is freely available under GNU General Public License from the project website at http://downsyndrome.ucdenver.edu/iddrc/rppaware/home.htm along with a full documentation of the tool.",2018-01-15 +27614350,Assessment of cancer and virus antigens for cross-reactivity in human tissues.,"

Motivation

Cross-reactivity (CR) or invocation of autoimmune side effects in various tissues has important safety implications in adoptive immunotherapy directed against selected antigens. The ability to predict CR (on-target and off-target toxicities) may help in the early selection of safer therapeutically relevant target antigens.

Results

We developed a methodology for the calculation of quantitative CR for any defined peptide epitope. Using this approach, we performed assessment of 4 groups of 283 currently known human MHC-class-I epitopes including differentiation antigens, overexpressed proteins, cancer-testis antigens and mutations displayed by tumor cells. In addition, 89 epitopes originating from viral sources were investigated. The natural occurrence of these epitopes in human tissues was assessed based on proteomics abundance data, while the probability of their presentation by MHC-class-I molecules was modelled by the method of Keşmir et al. which combines proteasomal cleavage, TAP affinity and MHC-binding predictions. The results of these analyses for many previously defined peptides are presented as CR indices and tissue profiles. The methodology thus allows for quantitative comparisons of epitopes and is suggested to be suited for the assessment of epitopes of candidate antigens in an early stage of development of adoptive immunotherapy.

Availability and implementation

Our method is implemented as a Java program, with curated datasets stored in a MySQL database. It predicts all naturally possible self-antigens for a given sequence of a therapeutic antigen (or epitope) and after filtering for predicted immunogenicity outputs results as an index and profile of CR to the self-antigens in 22 human tissues. The program is implemented as part of the iCrossR webserver, which is publicly available at http://webclu.bio.wzw.tum.de/icrossr/ CONTACT: d.frishman@wzw.tum.deSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-10 +23200141,miRT: a database of validated transcription start sites of human microRNAs.,"MicroRNAs (miRNAs) are small endogenous non-coding RNAs of about 22 nt in length that take crucial roles in many biological processes. These short RNAs regulate the expression of mRNAs by binding to their 3'-UTRs or by translational repression. Many of the current studies focus on how mature miRNAs regulate mRNAs, however, very limited knowledge is available regarding their transcriptional loci. It is known that primary miRNAs (pri-miRs) are first transcribed from the DNA, followed by the formation of precursor miRNAs (pre-miRs) by endonuclease activity, which finally produces the mature miRNAs. Till date, many of the pre-miRs and mature miRNAs have been experimentally verified. But unfortunately, identification of the loci of pri-miRs, promoters and associated transcription start sites (TSSs) are still in progress. TSSs of only about 40% of the known mature miRNAs in human have been reported. This information, albeit limited, may be useful for further study of the regulation of miRNAs. In this paper, we provide a novel database of validated miRNA TSSs, miRT, by collecting data from several experimental studies that validate miRNA TSSs and are available for full download. We present miRT as a web server and it is also possible to convert the TSS loci between different genome built. miRT might be a valuable resource for advanced research on miRNA regulation, which is freely accessible at: http://www.isical.ac.in/~bioinfo_miu/miRT/miRT.php.",2012-09-29 +22080510,KEGG for integration and interpretation of large-scale molecular data sets.,"Kyoto Encyclopedia of Genes and Genomes (KEGG, http://www.genome.jp/kegg/ or http://www.kegg.jp/) is a database resource that integrates genomic, chemical and systemic functional information. In particular, gene catalogs from completely sequenced genomes are linked to higher-level systemic functions of the cell, the organism and the ecosystem. Major efforts have been undertaken to manually create a knowledge base for such systemic functions by capturing and organizing experimental knowledge in computable forms; namely, in the forms of KEGG pathway maps, BRITE functional hierarchies and KEGG modules. Continuous efforts have also been made to develop and improve the cross-species annotation procedure for linking genomes to the molecular networks through the KEGG Orthology system. Here we report KEGG Mapper, a collection of tools for KEGG PATHWAY, BRITE and MODULE mapping, enabling integration and interpretation of large-scale data sets. We also report a variant of the KEGG mapping procedure to extend the knowledge base, where different types of data and knowledge, such as disease genes and drug targets, are integrated as part of the KEGG molecular networks. Finally, we describe recent enhancements to the KEGG content, especially the incorporation of disease and drug information used in practice and in society, to support translational bioinformatics.",2011-11-10 +29036307,Irys Extract.,"

Summary

Irys Extract is a software tool for generating genomic information from data collected by the BioNano Genomics Irys platform. The tool allows the user easy access to the raw data in the form of cropped images and genetically aligned intensity profiles. The latter are also made compatible with the BED format for using with popular genomic browsers such as the UCSC Genome Browser.

Availability and implementation

Irys Extract has been developed in Matlab R2015a, it was tested to work with IrysView 2.4.0.15879 and AutoDetect 2.1.4.9159, and it currently runs under Microsoft Windows operating systems (7-10). Irys Extract can be downloaded alongside its manual and a demo dataset at http://www.nanobiophotonix.com and https://sites.google.com/site/raniarielly/.

Contact

uv@post.tau.ac.il.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +29900271,Dataset of pollen morphological traits of 56 dominant species among desert vegetation in the eastern arid central Asia.,"The data presented in this article are related to the research article entitled ""Pollen spectrum, a cornerstone for tracing the evolution of the eastern central Asian desert"" (JQSR 5260) (Lu et al., 2018) [1] In this paper, we supply a dataset, which provides a descriptive and general summary of pollen characteristic of desert dominant species in the eastern arid central Asia (ACA). The other important component is the illustration on pollen grains traits under light microscopy (LM) and scanning electron microscopy (SEM). Pollen grains of 56 species are extracted from voucher specimens from the PE herbarium at the Institute of Botany. It is worth noting that these species own special distribution patterns in China. The distribution maps are plotted using the Google Maps and the species distribution data at the county level supplied by the Chinese Virtual Herbarium (http://www.cvh.ac.cn/).",2018-03-31 +28426660,Exhaustive search of linear information encoding protein-peptide recognition.,"High-throughput in vitro methods have been extensively applied to identify linear information that encodes peptide recognition. However, these methods are limited in number of peptides, sequence variation, and length of peptides that can be explored, and often produce solutions that are not found in the cell. Despite the large number of methods developed to attempt addressing these issues, the exhaustive search of linear information encoding protein-peptide recognition has been so far physically unfeasible. Here, we describe a strategy, called DALEL, for the exhaustive search of linear sequence information encoded in proteins that bind to a common partner. We applied DALEL to explore binding specificity of SH3 domains in the budding yeast Saccharomyces cerevisiae. Using only the polypeptide sequences of SH3 domain binding proteins, we succeeded in identifying the majority of known SH3 binding sites previously discovered either in vitro or in vivo. Moreover, we discovered a number of sites with both non-canonical sequences and distinct properties that may serve ancillary roles in peptide recognition. We compared DALEL to a variety of state-of-the-art algorithms in the blind identification of known binding sites of the human Grb2 SH3 domain. We also benchmarked DALEL on curated biological motifs derived from the ELM database to evaluate the effect of increasing/decreasing the enrichment of the motifs. Our strategy can be applied in conjunction with experimental data of proteins interacting with a common partner to identify binding sites among them. Yet, our strategy can also be applied to any group of proteins of interest to identify enriched linear motifs or to exhaustively explore the space of linear information encoded in a polypeptide sequence. Finally, we have developed a webserver located at http://michnick.bcm.umontreal.ca/dalel, offering user-friendly interface and providing different scenarios utilizing DALEL.",2017-04-20 +29698914,Error quantification of osteometric data in forensic anthropology.,"This study evaluates the reliability of osteometric data commonly used in forensic case analyses, with specific reference to the measurements in Data Collection Procedures 2.0 (DCP 2.0). Four observers took a set of 99 measurements four times on a sample of 50 skeletons (each measurement was taken 200 times by each observer). Two-way mixed ANOVAs and repeated measures ANOVAs with pairwise comparisons were used to examine interobserver (between-subjects) and intraobserver (within-subjects) variability. Relative technical error of measurement (TEM) was calculated for measurements with significant ANOVA results to examine the error among a single observer repeating a measurement multiple times (e.g. repeatability or intraobserver error), as well as the variability between multiple observers (interobserver error). Two general trends emerged from these analyses: (1) maximum lengths and breadths have the lowest error across the board (TEM<0.5), and (2) maximum and minimum diameters at midshaft are more reliable than their positionally-dependent counterparts (i.e. sagittal, vertical, transverse, dorso-volar). Therefore, maxima and minima are specified for all midshaft measurements in DCP 2.0. Twenty-two measurements were flagged for excessive variability (either interobserver, intraobserver, or both); 15 of these measurements were part of the standard set of measurements in Data Collection Procedures for Forensic Skeletal Material, 3rd edition. Each measurement was examined carefully to determine the likely source of the error (e.g. data input, instrumentation, observer's method, or measurement definition). For several measurements (e.g. anterior sacral breadth, distal epiphyseal breadth of the tibia) only one observer differed significantly from the remaining observers, indicating a likely problem with the measurement definition as interpreted by that observer; these definitions were clarified in DCP 2.0 to eliminate this confusion. Other measurements were taken from landmarks that are difficult to locate consistently (e.g. pubis length, ischium length); these measurements were omitted from DCP 2.0. This manual is available for free download online (https://fac.utk.edu/wp-content/uploads/2016/03/DCP20_webversion.pdf), along with an accompanying instructional video (https://www.youtube.com/watch?v=BtkLFl3vim4).",2018-04-10 +24558125,Mantra 2.0: an online collaborative resource for drug mode of action and repurposing by network analysis.,"

Summary

Elucidation of molecular targets of a compound [mode of action (MoA)] and its off-targets is a crucial step in drug development. We developed an online collaborative resource (MANTRA 2.0) that supports this process by exploiting similarities between drug-induced transcriptional profiles. Drugs are organized in a network of nodes (drugs) and edges (similarities) highlighting 'communities' of drugs sharing a similar MoA. A user can upload gene expression profiles before and after drug treatment in one or multiple cell types. An automated processing pipeline transforms the gene expression profiles into a unique drug 'node' embedded in the drug-network. Visual inspection of the neighbouring drugs and communities helps in revealing its MoA and to suggest new applications of known drugs (drug repurposing). MANTRA 2.0 allows storing and sharing user-generated network nodes, thus making MANTRA 2.0 a collaborative ever-growing resource.

Availability and implementation

The web tool is freely available for academic use at http://mantra.tigem.it.",2014-02-20 +29895857,Novel phenotype-disease matching tool for rare genetic diseases.,"

Purpose

To improve the accuracy of matching rare genetic diseases based on patient's phenotypes.

Methods

We introduce new methods to prioritize diagnosis of genetic diseases based on integrated semantic similarity (method 1) and ontological overlap (method 2) between the phenotypes expressed by a patient and phenotypes annotated to known diseases.

Results

We evaluated the performance of our methods by two sets of simulated data and one set of patient's data derived from electronic health records. We demonstrated that the two methods achieved significantly improved performance compared with previous methods in correctly prioritizing candidate diseases in all of the three sets. Our methods are freely available as a web application ( https://gddp.

Research

cchmc.org/ ) to aid diagnosis of genetic diseases.

Conclusion

Our methods can capture the diagnostic information embedded in the phenotype ontology, consider all phenotypes exhibited by a patient, and are more robust than the existing methods when phenotypes are incorrectly or imprecisely specified. These methods can assist the diagnosis of rare genetic diseases and help the interpretation of the results of DNA tests.",2018-06-12 +29322528,MR and CT data with multiobserver delineations of organs in the pelvic area-Part of the Gold Atlas project.,"

Purpose

We describe a public dataset with MR and CT images of patients performed in the same position with both multiobserver and expert consensus delineations of relevant organs in the male pelvic region. The purpose was to provide means for training and validation of segmentation algorithms and methods to convert MR to CT like data, i.e., so called synthetic CT (sCT).

Acquisition and validation methods

T1- and T2-weighted MR images as well as CT data were collected for 19 patients at three different departments. Five experts delineated nine organs for each patient based on the T2-weighted MR images. An automatic method was used to fuse the delineations. Starting from each fused delineation, a consensus delineation was agreed upon by the five experts for each organ and patient. Segmentation overlap between user delineations with respect to the consensus delineations was measured to describe the spread of the collected data. Finally, an open-source software was used to create deformation vector fields describing the relation between MR and CT images to further increase the usability of the dataset.

Data format and usage notes

The dataset has been made publically available to be used for academic purposes, and can be accessed from https://zenodo.org/record/583096.

Potential applications

The dataset provides a useful source for training and validation of segmentation algorithms as well as methods to convert MR to CT-like data (sCT). To give some examples: The T2-weighted MR images with their consensus delineations can directly be used as a template in an existing atlas-based segmentation engine; the expert delineations are useful to validate the performance of a segmentation algorithm as they provide a way to measure variability among users which can be compared with the result of an automatic segmentation; and the pairwise deformably registered MR and CT images can be a source for an atlas-based sCT algorithm or for validation of sCT algorithm.",2018-01-24 +30505900,Morphological and molecular datasets for Kaempferia species.,"This study compared morphological and molecular data for identification of Kaempferia species. Each species was deposited in Institute of Bioscience (IBS), Universiti Putra Malaysia (UPM) as voucher specimens and ITS sequences of each species deposited in NCBI (https://www.ncbi.nlm.nih.gov/) as GenBank accessions. DNA was extracted using a modified CTAB method and PCR amplification was completed using Internal Transcribed Spacer (ITS4 and ITS5) markers. PCR amplification of products were viewed under gel electrophoresis. Sequencing was performed and sequence characteristics of ITS rDNA in Kaempferia is shown. Qualitative and qualitative scoring of morphological characters and measuring techniques for Kaempferia species are included. In addition, a brief review of molecular markers used in phylogenetic studies of Zingiberaceae is included in this dataset.",2018-10-27 +29281985,Deep convolutional neural networks for pan-specific peptide-MHC class I binding prediction.,"

Background

Computational scanning of peptide candidates that bind to a specific major histocompatibility complex (MHC) can speed up the peptide-based vaccine development process and therefore various methods are being actively developed. Recently, machine-learning-based methods have generated successful results by training large amounts of experimental data. However, many machine learning-based methods are generally less sensitive in recognizing locally-clustered interactions, which can synergistically stabilize peptide binding. Deep convolutional neural network (DCNN) is a deep learning method inspired by visual recognition process of animal brain and it is known to be able to capture meaningful local patterns from 2D images. Once the peptide-MHC interactions can be encoded into image-like array(ILA) data, DCNN can be employed to build a predictive model for peptide-MHC binding prediction. In this study, we demonstrated that DCNN is able to not only reliably predict peptide-MHC binding, but also sensitively detect locally-clustered interactions.

Results

Nonapeptide-HLA-A and -B binding data were encoded into ILA data. A DCNN, as a pan-specific prediction model, was trained on the ILA data. The DCNN showed higher performance than other prediction tools for the latest benchmark datasets, which consist of 43 datasets for 15 HLA-A alleles and 25 datasets for 10 HLA-B alleles. In particular, the DCNN outperformed other tools for alleles belonging to the HLA-A3 supertype. The F1 scores of the DCNN were 0.86, 0.94, and 0.67 for HLA-A*31:01, HLA-A*03:01, and HLA-A*68:01 alleles, respectively, which were significantly higher than those of other tools. We found that the DCNN was able to recognize locally-clustered interactions that could synergistically stabilize peptide binding. We developed ConvMHC, a web server to provide user-friendly web interfaces for peptide-MHC class I binding predictions using the DCNN. ConvMHC web server can be accessible via http://jumong.kaist.ac.kr:8080/convmhc .

Conclusions

We developed a novel method for peptide-HLA-I binding predictions using DCNN trained on ILA data that encode peptide binding data and demonstrated the reliable performance of the DCNN in nonapeptide binding predictions through the independent evaluation on the latest IEDB benchmark datasets. Our approaches can be applied to characterize locally-clustered patterns in molecular interactions, such as protein/DNA, protein/RNA, and drug/protein interactions.",2017-12-28 +28976309,"Simple, Transparent, and Flexible Automated Quality Assessment Procedures for Ambulatory Electrodermal Activity Data.","

Objective

Electrodermal activity (EDA) is a noninvasive measure of sympathetic activation often used to study emotions, decision making, and health. The use of ""ambulatory"" EDA in everyday life presents novel challenges-frequent artifacts and long recordings-with inconsistent methods available for efficiently and accurately assessing data quality. We developed and validated a simple, transparent, flexible, and automated quality assessment procedure for ambulatory EDA data.

Methods

A total of 20 individuals with autism (5 females, 5-13 years) provided a combined 181 h of EDA data in their home using the Affectiva Q Sensor across 8 weeks. Our procedure identified invalid data using four rules: First, EDA out of range; second, EDA changes too quickly; third, temperature suggests the sensor is not being worn; and fourth, transitional data surrounding segments identified as invalid via the preceding rules. We identified invalid portions of a pseudorandom subset of our data (32.8 h, 18%) using our automated procedure and independent visual inspection by five EDA experts.

Results

Our automated procedure identified 420 min (21%) of invalid data. The five experts agreed strongly with each other (agreement: 98%, Cohen's κ: 0.87) and, thus, were averaged into a ""consensus"" rating. Our procedure exhibited excellent agreement with the consensus rating (sensitivity: 91%, specificity: 99%, accuracy: 92%, κ: 0.739 [95% CI = 0.738, 0.740]).

Conclusion

We developed a simple, transparent, flexible, and automated quality assessment procedure for ambulatory EDA data.

Significance

Our procedure can be used beyond this study to enhance efficiency, transparency, and reproducibility of EDA analyses, with free software available at http://www.cbslab.org/EDAQA.",2017-10-02 +25347823,"MitProNet: A knowledgebase and analysis platform of proteome, interactome and diseases for mammalian mitochondria.","Mitochondrion plays a central role in diverse biological processes in most eukaryotes, and its dysfunctions are critically involved in a large number of diseases and the aging process. A systematic identification of mitochondrial proteomes and characterization of functional linkages among mitochondrial proteins are fundamental in understanding the mechanisms underlying biological functions and human diseases associated with mitochondria. Here we present a database MitProNet which provides a comprehensive knowledgebase for mitochondrial proteome, interactome and human diseases. First an inventory of mammalian mitochondrial proteins was compiled by widely collecting proteomic datasets, and the proteins were classified by machine learning to achieve a high-confidence list of mitochondrial proteins. The current version of MitProNet covers 1124 high-confidence proteins, and the remainders were further classified as middle- or low-confidence. An organelle-specific network of functional linkages among mitochondrial proteins was then generated by integrating genomic features encoded by a wide range of datasets including genomic context, gene expression profiles, protein-protein interactions, functional similarity and metabolic pathways. The functional-linkage network should be a valuable resource for the study of biological functions of mitochondrial proteins and human mitochondrial diseases. Furthermore, we utilized the network to predict candidate genes for mitochondrial diseases using prioritization algorithms. All proteins, functional linkages and disease candidate genes in MitProNet were annotated according to the information collected from their original sources including GO, GEO, OMIM, KEGG, MIPS, HPRD and so on. MitProNet features a user-friendly graphic visualization interface to present functional analysis of linkage networks. As an up-to-date database and analysis platform, MitProNet should be particularly helpful in comprehensive studies of complicated biological mechanisms underlying mitochondrial functions and human mitochondrial diseases. MitProNet is freely accessible at http://bio.scu.edu.cn:8085/MitProNet.",2014-10-27 +30835141,Climate Change and Physical Activity: Estimated Impacts of Ambient Temperatures on Bikeshare Usage in New York City.,"BACKGROUND:Physical activity is one of the best disease prevention strategies, and it is influenced by environmental factors such as temperature. OBJECTIVES:We aimed to illuminate the relation between ambient temperature and bikeshare usage and to project how climate change-induced increasing ambient temperatures may influence active transportation in New York City. METHODS:The analysis leverages Citi Bike® bikeshare data to estimate participation in outdoor bicycling in New York City. Exposure-response functions are estimated for the relation between daily temperature and bike usage from 2013 to 2017. The estimated exposure-response relation is combined with temperature outputs from 21 climate models (run with emissions scenarios RCP4.5 and RCP8.5) to explore how climate change may influence future bike utilization. RESULTS:Estimated daily hours and distance ridden significantly increased as temperatures increased, but then declined at temperatures above 26-28°C. Bike usage may increase by up to 3.1% by 2070 due to climate change. Future ridership increases during the winter, spring, and fall may more than offset future declines in summer ridership. DISCUSSION:Evidence suggesting nonlinear impacts of rising temperatures on health-promoting bicycle ridership demonstrates how challenging it is to anticipate the health consequences of climate change. We project increases in bicycling by mid-century in NYC, but this trend may reverse as temperatures continue to rise further into the future. https://doi.org/10.1289/EHP4039.",2019-03-01 +26826717,Library of binding protein scaffolds (LibBP): a computational platform for selection of binding protein scaffolds.,"

Motivation

Developments in biotechnology have enabled the in vitro evolution of binding proteins. The emerging limitations of antibodies in binding protein engineering have led to suggestions for other proteins as alternative binding protein scaffolds. Most of these proteins were selected based on human intuition rather than systematic analysis of the available data. To improve this strategy, we developed a computational framework for finding desirable binding protein scaffolds by utilizing protein structure and sequence information.

Results

For each protein, its structure and the sequences of evolutionarily-related proteins were analyzed, and spatially contiguous regions composed of highly variable residues were identified. A large number of proteins have these regions, but leucine rich repeats (LRRs), histidine kinase domains and immunoglobulin domains are predominant among them. The candidates suggested as new binding protein scaffolds include histidine kinase, LRR, titin and pentapeptide repeat protein.

Availability and implementation

The database and web-service are accessible via http://bcbl.kaist.ac.kr/LibBP CONTACT: kds@kaist.ac.krSupplementary data: Supplementary data are available at Bioinformatics online.",2016-01-30 +22711793,Identifying aberrant pathways through integrated analysis of knowledge in pharmacogenomics.,"

Motivation

Many complex diseases are the result of abnormal pathway functions instead of single abnormalities. Disease diagnosis and intervention strategies must target these pathways while minimizing the interference with normal physiological processes. Large-scale identification of disease pathways and chemicals that may be used to perturb them requires the integration of information about drugs, genes, diseases and pathways. This information is currently distributed over several pharmacogenomics databases. An integrated analysis of the information in these databases can reveal disease pathways and facilitate novel biomedical analyses.

Results

We demonstrate how to integrate pharmacogenomics databases through integration of the biomedical ontologies that are used as meta-data in these databases. The additional background knowledge in these ontologies can then be used to enable novel analyses. We identify disease pathways using a novel multi-ontology enrichment analysis over the Human Disease Ontology, and we identify significant associations between chemicals and pathways using an enrichment analysis over a chemical ontology. The drug-pathway and disease-pathway associations are a valuable resource for research in disease and drug mechanisms and can be used to improve computational drug repurposing.

Availability

http://pharmgkb-owl.googlecode.com",2012-06-17 +30176795,Genome-wide identification of MADS-box family genes in moso bamboo (Phyllostachys edulis) and a functional analysis of PeMADS5 in flowering.,"

Background

MADS-box genes encode a large family of transcription factors that play significant roles in plant growth and development. Bamboo is an important non-timber forest product worldwide, but previous studies on the moso bamboo (Phyllostachys edulis) MADS-box gene family were not accurate nor sufficiently detailed.

Results

Here, a complete genome-wide identification and characterization of the MADS-box genes in moso bamboo was conducted. There was an unusual lack of type-I MADS-box genes in the bamboo genome database ( http://202.127.18.221/bamboo/index.php ), and some of the PeMADS sequences are fragmented and/or inaccurate. We performed several bioinformatics techniques to obtain more precise sequences using transcriptome assembly. In total, 42 MADS-box genes, including six new type-I MADS-box genes, were identified in bamboo, and their structures, phylogenetic relationships, predicted conserved motifs and promoter cis-elements were systematically investigated. An expression analysis of the bamboo MADS-box genes in floral organs and leaves revealed that several key members are involved in bamboo inflorescence development, like their orthologous genes in Oryza. The ectopic overexpression of one MADS-box gene, PeMADS5, in Arabidopsis triggered an earlier flowering time and the development of an aberrant flower phenotype, suggesting that PeMADS5 acts as a floral activator and is involved in bamboo flowering.

Conclusion

We produced the most comprehensive information on MADS-box genes in moso bamboo. Additionally, a critical PeMADS gene (PeMADS5) responsible for the transition from vegetative to reproductive growth was identified and shown to be related to bamboo floral development.",2018-09-03 +30990809,Genetic mutational status of genes regulating epigenetics: Role of the histone methyltransferase KMT2D in triple negative breast tumors.,"

Purpose

Epigenetic regulating proteins like histone methyltransferases produce variations in several functions, some of them associated with the generation of oncogenic processes. Mutations of genes involved in these functions have been recently associated with cancer, and strategies to modulate their activity are currently in clinical development.

Methods

By using data extracted from the METABRIC study, we searched for mutated genes linked with detrimental outcome in invasive breast carcinoma (n = 772). Then, we used downstream signatures for each mutated gene to associate that signature with clinical prognosis using the online tool ""Genotype-2-Outcome"" (http://www.g-2-o.com). Next, we performed functional annotation analyses to classify genes by functions, and focused on those associated with the epigenetic machinery.

Results

We identified KMT2D, SETD1A and SETD2, included in the lysine methyltransferase activity function, as linked with poor prognosis in invasive breast cancer. KMT2D which codes for a histone methyltransferase that acts as a transcriptional regulator was mutated in 6% of triple negative breast tumors and found to be linked to poor survival. Genes regulated by KMT2D included RAC3, KRT23, or KRT14, among others, which are involved in cell communication and signal transduction. Finally, low expression of KMT2D at the transcriptomic level, which mirror what happens when KMT2D is mutated and functionally inactive, confirmed its prognostic value.

Conclusion

In the present work, we describe epigenetic modulating genes which are found to be mutated in breast cancer. We identify the histone methyltransferase KMT2D, which is mutated in 6% of triple negative tumors and linked with poor survival.",2019-04-16 +31062040,Gambling involvement and problem gambling correlates among European adolescents: results from the European Network for Addictive Behavior study.,"

Purpose

Worldwide, concern has grown over the expansion of gambling among adolescents, who have an increased likelihood of developing risk-taking behaviors. This study aimed to increase knowledge of problem gambling among adolescents in seven European countries and to assess the effect of demographic and lifestyle factors recorded in the European Network for Addictive Behavior survey (https://www.eunetadb.eu).

Methods

A cross-sectional school-based study (n = 13,284) was conducted in Germany, Greece, Iceland, The Netherlands, Poland, Romania and Spain. Anonymous self-completed questionnaires included socio-demographic data, internet usage characteristics, school achievement, parental control, the Internet Addiction Test, the South Oaks Gambling Screen-Revised for Adolescents Test and Achenbach's Youth Self-Report.

Results

12.5% of the participants reported last year gambling activities either online or offline. 3.6% of the study participants and 28.1% of gamblers (either online or offline) were at risk or had a gambling problem. The study results showed that a higher proportion of adolescents was either at risk or had a gambling problem among males, in the older age group, when the parental educational level was lower/middle, and in the absence of siblings. Furthermore, being at risk or having a gambling problem was associated with lower age at first use of the internet, lower school grades, using the internet 6-7 days per week, and problematic internet use. At risk or problem gamblers had higher scores on all scales of problem behavior and lower scores (lower competence) on activities and academic performance.

Conclusions

Our findings underline the need for better gambling legislation and suggest the importance of developing social responsibility tools that may help diminish adolescent gambling involvement, with special attention to males.",2019-05-06 +29552608,Evaluation of 0 ≤ M ≤ 8 earthquake data sets in African - Asian region during 1966-2015.,"This article evaluates the occurrence of 0 [Formula: see text]M[Formula: see text] 8 earthquake data sets for the period of 50 years (that is, January 1, 1966 to December 31, 2015) in African and Western Asia region. It is bounded by latitude 40° S to 40° N and longitude 30° W to 60° E with the focal depth of 0-700 km. Seventy seven thousand, six hundred and ninety-six data points were presented for the analysis. The data used were extracted from earthquake catalog of Advanced National Seismic system via http://quake.geo.berkeley.edu/cnss/, an official website of the Northern California Earthquake Data Centre, USA. Each datum comprised the earthquake occurrence date, time of the earthquake occurrence, epicenter's coordinates, focal depth and magnitude. The Gutenberg-Richter's relationship being the longest observed empirical relationship in seismology, analysis of variance and time series were used to analyze the seismicity of the study area. Annual distributions of earthquake occurrence based on magnitude variations with the limit 0 [Formula: see text]M[Formula: see text] 8 were presented. The two constants a and b in the Gutenberg-Richter's equation, magnitude of completeness (MC) adjusted R-Square and F-value for the period of 1966-1975, 1976-1985, 1986-1995, 1996-2005, 2006-2015, and the entire period of investigation ranging from 1966 to 2015 were determined so as to investigate the variations of these parameters on earthquake occurrence over time. The histograms of earthquake occurrence against magnitude of earthquakes for the selected years (1966-1975, 1976-1985, 1986-1995, 1996-2005, 2006-2015, and 1966-2015), and the decadal frequency distributions of earthquake occurrence were also plotted. The focal depth occurrence for each magnitude bins (0-0.9, 1-1.9, 2-2.9, 3-3.9, 4-4.9, 5-5.9, 6-6.9, 7-7.9, 8-8.9) were grouped into shallow, intermediate, and deep depths ranging from 0 to 70, 71 to 300, and 301 to 700 km as being used in seismology. The neural network analysis was also applied to the magnitude of the earthquake. The network uses a time series magnitude data as input with the output being the magnitude of the following day. If the nature of the earthquakes time series is stochastic, modeling and prediction is possible. The earthquake data sets presented in this article can further be adopted in the study of seismicity pattern, b-value using series of models, earthquake prediction and variations of earthquake parameters on African and/or Arabian plates. When this approach is integrated with other technique(s), it can provide insights to stability of African lithospehric plates especially the coastal region of Africa.",2018-01-31 +28981421,ERDS-exome: a Hybrid Approach for Copy Number Variant Detection from Whole-exome Sequencing Data. ,"Copy number variants (CNVs) play important roles in human disease and evolution. With the rapid development of next-generation sequencing technologies, many tools have been developed for inferring CNVs based on whole-exome sequencing (WES) data. However, as a result of the sparse distribution of exons in the genome, the limitations of the WES technique, and the nature of high-level signal noises in WES data, the efficacy of these variants remains less than desirable. Thus, there is need for the development of an effective tool to achieve a considerable power in WES CNVs discovery. In the present study, we describe a novel method, Estimation by Read Depth (RD) with Single-nucleotide variants from exome sequencing data (ERDS-exome). ERDS-exome employs a hybrid normalization approach to normalize WES data and to incorporate RD and single-nucleotide variation information together as a hybrid signal into a paired hidden Markov model to infer CNVs from WES data. Based on systematic evaluations of real data from the 1000 Genomes Project using other state-of-the-art tools, we observed that ERDS-exome demonstrates higher sensitivity and provides comparable or even better specificity than other tools. ERDS-exome is publicly available at: https://erds-exome.github.io.",2017-10-04 +23193282,Allen Brain Atlas: an integrated spatio-temporal portal for exploring the central nervous system.,"The Allen Brain Atlas (http://www.brain-map.org) provides a unique online public resource integrating extensive gene expression data, connectivity data and neuroanatomical information with powerful search and viewing tools for the adult and developing brain in mouse, human and non-human primate. Here, we review the resources available at the Allen Brain Atlas, describing each product and data type [such as in situ hybridization (ISH) and supporting histology, microarray, RNA sequencing, reference atlases, projection mapping and magnetic resonance imaging]. In addition, standardized and unique features in the web applications are described that enable users to search and mine the various data sets. Features include both simple and sophisticated methods for gene searches, colorimetric and fluorescent ISH image viewers, graphical displays of ISH, microarray and RNA sequencing data, Brain Explorer software for 3D navigation of anatomy and gene expression, and an interactive reference atlas viewer. In addition, cross data set searches enable users to query multiple Allen Brain Atlas data sets simultaneously. All of the Allen Brain Atlas resources can be accessed through the Allen Brain Atlas data portal.",2012-11-28 +26697911,The Biobank Economic Modeling Tool (BEMT): Online Financial Planning to Facilitate Biobank Sustainability.,"

Background

Biospecimens are essential resources for advancing basic and translational research. However, there are little data available regarding the costs associated with operating a biobank, and few resources to enable their long-term sustainability. To support the research community in this effort, the National Institutes of Health, National Cancer Institute's Biorepositories and Biospecimen Research Branch has developed the Biobank Economic Modeling Tool (BEMT). The tool is accessible at http://biospecimens.cancer.gov/resources/bemt.asp.

Methods

To obtain market-based cost information and to inform the development of the tool, a survey was designed and sent to 423 biobank managers and directors across the world. The survey contained questions regarding infrastructure investments, salary costs, funding options, types of biospecimen resources and services offered, as well as biospecimen pricing and service-related costs.

Results

A total of 106 responses were received. The data were anonymized, aggregated, and used to create a comprehensive database of cost and pricing information that was integrated into the web-based tool, the BEMT. The BEMT was built to allow the user to input cost and pricing data through a seven-step process to build a cost profile for their biobank, define direct and indirect costs, determine cost recovery fees, perform financial forecasting, and query the anonymized survey data from comparable biobanks.

Conclusion

A survey was conducted to obtain a greater understanding of the costs involved in operating a biobank. The anonymized survey data was then used to develop the BEMT, a cost modeling tool for biobanks. Users of the tool will be able to create a cost profile for their biobanks' specimens, products and services, establish pricing, and allocate costs for biospecimens based on percent cost recovered, and perform project-specific cost analyses and financial forecasting.",2015-12-01 +30808676,MiR-644a Disrupts Oncogenic Transformation and Warburg Effect by Direct Modulation of Multiple Genes of Tumor-Promoting Pathways.,"Castration-resistant prostate cancer (CRPC) is defined by tumor microenvironment heterogeneity affecting intrinsic cellular mechanisms including dysregulated androgen signaling, aerobic glycolysis (Warburg effect), and aberrant activation of transcription factors including androgen receptor (AR) and c-Myc. Using in vitro, in vivo, and animal models, we find a direct correlation between miR-644a downregulation and dysregulation of essential cellular processes. MiR-644a downregulated expression of diverse tumor microenvironment drivers including c-Myc, AR coregulators, and antiapoptosis factors Bcl-xl and Bcl2. Moreover, miR-644a modulates epithelial-mesenchymal transition (EMT) by directly targeting EMT-promoting factors ZEB1, cdk6, and Snail. Finally, miR-644a expression suppresses the Warburg effect by direct targeting of c-Myc, Akt, IGF1R, and GAPDH expression. RNA sequencing analysis revealed an analogous downregulation of these factors in animal tumor xenografts. These data demonstrate miR-644a mediated fine-tuning of oncogenesis, stimulating pathways and resultant potentiation of enzalutamide therapy in CRPC patients. SIGNIFICANCE: This study demonstrates that miR-644a therapeutically influences the CRPC tumor microenvironment by suppressing androgen signaling and additional genes involved in metabolism, proliferation, Warburg effect, and EMT, to potentiate the enzalutamide therapy.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/79/8/1844/F1.large.jpg.",2019-02-26 +30320215,Soil Viruses Are Underexplored Players in Ecosystem Carbon Processing. ,"Rapidly thawing permafrost harbors ∼30 to 50% of global soil carbon, and the fate of this carbon remains unknown. Microorganisms will play a central role in its fate, and their viruses could modulate that impact via induced mortality and metabolic controls. Because of the challenges of recovering viruses from soils, little is known about soil viruses or their role(s) in microbial biogeochemical cycling. Here, we describe 53 viral populations (viral operational taxonomic units [vOTUs]) recovered from seven quantitatively derived (i.e., not multiple-displacement-amplified) viral-particle metagenomes (viromes) along a permafrost thaw gradient at the Stordalen Mire field site in northern Sweden. Only 15% of these vOTUs had genetic similarity to publicly available viruses in the RefSeq database, and ∼30% of the genes could be annotated, supporting the concept of soils as reservoirs of substantial undescribed viral genetic diversity. The vOTUs exhibited distinct ecology, with different distributions along the thaw gradient habitats, and a shift from soil-virus-like assemblages in the dry palsas to aquatic-virus-like assemblages in the inundated fen. Seventeen vOTUs were linked to microbial hosts (in silico), implicating viruses in infecting abundant microbial lineages from Acidobacteria, Verrucomicrobia, and Deltaproteobacteria, including those encoding key biogeochemical functions such as organic matter degradation. Thirty auxiliary metabolic genes (AMGs) were identified and suggested virus-mediated modulation of central carbon metabolism, soil organic matter degradation, polysaccharide binding, and regulation of sporulation. Together, these findings suggest that these soil viruses have distinct ecology, impact host-mediated biogeochemistry, and likely impact ecosystem function in the rapidly changing Arctic. IMPORTANCE This work is part of a 10-year project to examine thawing permafrost peatlands and is the first virome-particle-based approach to characterize viruses in these systems. This method yielded >2-fold-more viral populations (vOTUs) per gigabase of metagenome than vOTUs derived from bulk-soil metagenomes from the same site (J. B. Emerson, S. Roux, J. R. Brum, B. Bolduc, et al., Nat Microbiol 3:870-880, 2018, https://doi.org/10.1038/s41564-018-0190-y). We compared the ecology of the recovered vOTUs along a permafrost thaw gradient and found (i) habitat specificity, (ii) a shift in viral community identity from soil-like to aquatic-like viruses, (iii) infection of dominant microbial hosts, and (iv) carriage of host metabolic genes. These vOTUs can impact ecosystem carbon processing via top-down (inferred from lysing dominant microbial hosts) and bottom-up (inferred from carriage of auxiliary metabolic genes) controls. This work serves as a foundation which future studies can build upon to increase our understanding of the soil virosphere and how viruses affect soil ecosystem services.",2018-09-01 +29897876,A Gabor Feature-Based Quality Assessment Model for the Screen Content Images.,"In this paper, an accurate and efficient full-reference image quality assessment (IQA) model using the extracted Gabor features, called Gabor feature-based model (GFM), is proposed for conducting objective evaluation of screen content images (SCIs). It is well-known that the Gabor filters are highly consistent with the response of the human visual system (HVS), and the HVS is highly sensitive to the edge information. Based on these facts, the imaginary part of the Gabor filter that has odd symmetry and yields edge detection is exploited to the luminance of the reference and distorted SCI for extracting their Gabor features, respectively. The local similarities of the extracted Gabor features and two chrominance components, recorded in the LMN color space, are then measured independently. Finally, the Gabor-feature pooling strategy is employed to combine these measurements and generate the final evaluation score. Experimental simulation results obtained from two large SCI databases have shown that the proposed GFM model not only yields a higher consistency with the human perception on the assessment of SCIs but also requires a lower computational complexity, compared with that of classical and state-of-the-art IQA models. The source code for the proposed GFM will be available at http://smartviplab.org/pubilcations/GFM.html.",2018-09-01 +29557811,Efficacy of Extracorporeal Shock Wave Therapy for Lower-Limb Tendinopathy: A Meta-analysis of Randomized Controlled Trials.,"

Objective

Extracorporeal shock wave therapy, including radial shock wave and focused shock wave types, is widely used for managing tendinopathies. The difference in efficacy between the 2 shock wave characteristics with different dosage levels remains controversial, and the purpose of this meta-analysis was to examine it for patients with lower-limb tendinopathy.

Design

A comprehensive search of online databases and search engines was performed. This study included randomized controlled trials reporting the efficacy of extracorporeal shock wave therapy in treating lower-limb tendinopathy. The included randomized controlled trials were subjected to a meta-analysis and risk of bias assessment.

Results

In total, 29 randomized controlled trials were included, all of which had a good methodological quality, with a PEDro score of ≥6/10. General extracorporeal shock wave therapy showed significant effects at the immediate follow-up [pain score: standardized mean difference = -1.41, 95% confidence interval = -2.01 to -0.82, P < 0.00001; function: standardized mean difference = 2.59, 95% confidence interval = 1.54 to 3.64, P < 0.00001] as well as at 3, 6, and ≥12 months. In sequence, high-dosage focused shock wave, high-dosage radial shock wave, and low-dosage radial shock wave had superior pooled effects on overall clinical outcomes.

Conclusions

Extracorporeal shock wave therapy exerted a positive overall effect on pain and function for lower-limb tendinopathy. Shock wave types and dosage levels may have different contributions to treatment efficacy.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) Describe benefits of extracorporeal shock wave therapy for individuals with lower-limb tendinopathy; (2) Understand the impact of dosing and type of extracorporeal shock wave therapy has on treatment efficacy; and (3) Identify appropriate indications for incorporating extracorporeal shock wave therapy into the treatment plan for patients with lower-limb tendinopathy.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2018-09-01 +30278585,Identification of key microRNAs and their targets in exosomes of pancreatic cancer using bioinformatics analysis.,"Pancreatic cancer (PC) is one of the most lethal tumors, due to late diagnosis and limited surgical strategies. It has been reported that serum exosomal microRNAs (S-Exo-miRNAs) play a pivotal role as signaling molecules and serve as noninvasive diagnosis methods for PC. The combination of S-Exo-miRNAs with the corresponding target also plays an important role in the tumor microenvironment.Here we investigated S-Exo-miRNAs involved in PC. The gene expression profile was downloaded from the Gene Expression Omnibus (GEO) database. The analysis was carried out using GEO2R. The targets of differentially expressed serum exosomal miRNAs (DE-S-Exo-miRNAs) were predicted by 4 bioinformatic algorithms (miRanda, miRDB, miRWalk, and Targetscan). Further analysis with gene ontology (GO) and Kyoto Encyclopedia of Genomes pathway (KEGG) enrichment analyses were performed with Cytoscape software version 3.4.0. Subsequently, the interaction regulatory network of target genes was performed with the Search Tool for the Retrieval of Interacting Genes (STRING) database (http://www.string-db.org/) and visualized using Cytoscape software.We downloaded the gene expression profile GSE50632, which was based on an Agilent microarray GPL17660 platform containing 4 eligible samples. In total 467 DE-S-Exo-miRNAs were obtained, including 7 overexpressed miRNAs (1.50%), and 460 remaining underexpressed miRNAs (98.50%). The databases miRWalk, miRDB, miRanda, and TargetScan were used to predict their potential targets, which were subsequently submitted to Cytoscape software version 3.4.0 (www.cytoscape.org). Next the functional and pathway enrichment analysis were used for the KEGG pathway and GO categories analysis. The enrichment analysis identified the genes involved in such processes as developmental and negative regulation of multicellular organismal processes, regulation of anatomical structure morphogenesis, regulation of cell death, apoptotic processes and mitogen-activated protein kinase (MAPK) signaling pathway, transforming growth factor - beta (TGF -β) signaling pathway, cyclic adenosine monophosphate (cAMP) signaling pathway, and the phosphatidylinositol-3 kinases/Akt (PI3K-Akt) signaling pathway. Subsequently according to the protein-protein interaction (PPI) network, the top 10 genes were obtained. The enrichment analyses of the genes involved in a significant module revealed that these genes were related to the TGF-β signaling pathway. After reviewing the literature, we identified the apoptosis genes, and their corresponding miRNAs that have a relationship with apoptosis of the tumor.This analysis provides a comprehensive understanding of the roles of S-Exo-miRNAs and the related targets in the development of PC. Additionally, the present study provides promising candidate targets for early diagnosis and therapeutic intervention. However, these predictions require further experimental validation in future studies.",2018-09-01 +28025349,viruSITE-integrated database for viral genomics. ,"Viruses are the most abundant biological entities and the reservoir of most of the genetic diversity in the Earth's biosphere. Viral genomes are very diverse, generally short in length and compared to other organisms carry only few genes. viruSITE is a novel database which brings together high-value information compiled from various resources. viruSITE covers the whole universe of viruses and focuses on viral genomes, genes and proteins. The database contains information on virus taxonomy, host range, genome features, sequential relatedness as well as the properties and functions of viral genes and proteins. All entries in the database are linked to numerous information resources. The above-mentioned features make viruSITE a comprehensive knowledge hub in the field of viral genomics.The web interface of the database was designed so as to offer an easy-to-navigate, intuitive and user-friendly environment. It provides sophisticated text searching and a taxonomy-based browsing system. viruSITE also allows for an alternative approach based on sequence search. A proprietary genome browser generates a graphical representation of viral genomes. In addition to retrieving and visualising data, users can perform comparative genomics analyses using a variety of tools.Database URL: http://www.virusite.org/.",2016-12-26 +29669403,Epigenetic Applications in Adverse Outcome Pathways and Environmental Risk Evaluation.,"BACKGROUND:The epigenome may be an important interface between environmental chemical exposures and human health. However, the links between epigenetic modifications and health outcomes are often correlative and do not distinguish between cause and effect or common-cause relationships. The Adverse Outcome Pathway (AOP) framework has the potential to demonstrate, by way of an inference- and science-based analysis, the causal relationship between chemical exposures, epigenome, and adverse health outcomes. OBJECTIVE:The objective of this work is to discuss the epigenome as a modifier of exposure effects and risk, perspectives for integrating toxicoepigenetic data into an AOP framework, tools for the exploration of epigenetic toxicity, and integration of AOP-guided epigenetic information into science and risk-assessment processes. DISCUSSION:Organizing epigenetic information into the topology of a qualitative AOP network may help describe how a system will respond to epigenetic modifications caused by environmental chemical exposures. However, understanding the biological plausibility, linking epigenetic effects to short- and long-term health outcomes, and including epigenetic studies in the risk assessment process is met by substantive challenges. These obstacles include understanding the complex range of epigenetic modifications and their combinatorial effects, the large number of environmental chemicals to be tested, and the lack of data that quantitatively evaluate the epigenetic effects of environmental exposure. CONCLUSION:We anticipate that epigenetic information organized into AOP frameworks can be consistently used to support biological plausibility and to identify data gaps that will accelerate the pace at which epigenetic information is applied in chemical evaluation and risk-assessment paradigms. https://doi.org/10.1289/EHP2322.",2018-04-12 +31459527,Function Prediction for G Protein-Coupled Receptors through Text Mining and Induction Matrix Completion.,"G protein-coupled receptors (GPCRs) constitute the key component of cellular signal transduction. Accurately annotating the biological functions of GPCR proteins is vital to the understanding of the physiological processes they involve in. With the rapid development of text mining technologies and the exponential growth of biomedical literature, it becomes urgent to explore biological functional information from various literature for systematically and reliably annotating these known GPCRs. We design a novel three-stage approach, TM-IMC, using text mining and inductive matrix completion, for automated prediction of the gene ontology (GO) terms of the GPCR proteins. Large-scale benchmark tests show that inductive matrix completion models contribute to GPCR-GO association prediction for both molecular function and biological process aspects. Moreover, our detailed data analysis shows that information extracted from GPCR-associated literature indeed contributes to the prediction of GPCR-GO associations. The study demonstrated a new avenue to enhance the accuracy of GPCR function annotation through the combination of text mining and induction matrix completion over baseline methods in critical assessment of protein function annotation algorithms and literature-based GO annotation methods. Source codes of TM-IMC and the involved datasets can be freely downloaded from https://zhanglab.ccmb.med.umich.edu/TM-IMC for academic purposes.",2019-02-12 +30233390,Identification of Biologically Essential Nodes via Determinative Power in Logical Models of Cellular Processes.,"A variety of biological networks can be modeled as logical or Boolean networks. However, a simplification of the reality to binary states of the nodes does not ease the difficulty of analyzing the dynamics of large, complex networks, such as signal transduction networks, due to the exponential dependence of the state space on the number of nodes. This paper considers a recently introduced method for finding a fairly small subnetwork, representing a collection of nodes that determine the states of most other nodes with a reasonable level of entropy. The subnetwork contains the most determinative nodes that yield the highest information gain. One of the goals of this paper is to propose an algorithm for finding a suitable subnetwork size. The information gain is quantified by the so-called determinative power of the nodes, which is obtained via the mutual information, a concept originating in information theory. We find the most determinative nodes for 36 network models available in the online database Cell Collective (http://cellcollective.org). We provide statistical information that indicates a weak correlation between the subnetwork size and other variables, such as network size, or maximum and average determinative power of nodes. We observe that the proportion represented by the subnetwork in comparison to the whole network shows a weak tendency to decrease for larger networks. The determinative power of nodes is weakly correlated to the number of outputs of a node, and it appears to be independent of other topological measures such as closeness or betweenness centrality. Once the subnetwork of the most determinative nodes is identified, we generate a biological function analysis of its nodes for some of the 36 networks. The analysis shows that a large fraction of the most determinative nodes are essential and involved in crucial biological functions. The biological pathway analysis of the most determinative nodes shows that they are involved in important disease pathways.",2018-08-31 +28520848,Pattern fusion analysis by adaptive alignment of multiple heterogeneous omics data.,"

Motivation

Integrating different omics profiles is a challenging task, which provides a comprehensive way to understand complex diseases in a multi-view manner. One key for such an integration is to extract intrinsic patterns in concordance with data structures, so as to discover consistent information across various data types even with noise pollution. Thus, we proposed a novel framework called 'pattern fusion analysis' (PFA), which performs automated information alignment and bias correction, to fuse local sample-patterns (e.g. from each data type) into a global sample-pattern corresponding to phenotypes (e.g. across most data types). In particular, PFA can identify significant sample-patterns from different omics profiles by optimally adjusting the effects of each data type to the patterns, thereby alleviating the problems to process different platforms and different reliability levels of heterogeneous data.

Results

To validate the effectiveness of our method, we first tested PFA on various synthetic datasets, and found that PFA can not only capture the intrinsic sample clustering structures from the multi-omics data in contrast to the state-of-the-art methods, such as iClusterPlus, SNF and moCluster, but also provide an automatic weight-scheme to measure the corresponding contributions by data types or even samples. In addition, the computational results show that PFA can reveal shared and complementary sample-patterns across data types with distinct signal-to-noise ratios in Cancer Cell Line Encyclopedia (CCLE) datasets, and outperforms over other works at identifying clinically distinct cancer subtypes in The Cancer Genome Atlas (TCGA) datasets.

Availability and implementation

PFA has been implemented as a Matlab package, which is available at http://www.sysbio.ac.cn/cb/chenlab/images/PFApackage_0.1.rar .

Contact

lnchen@sibs.ac.cn , liujuan@whu.edu.cn or zengtao@sibs.ac.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +27515739,LCA*: an entropy-based measure for taxonomic assignment within assembled metagenomes.,"

Motivation

A perennial problem in the analysis of environmental sequence information is the assignment of reads or assembled sequences, e.g. contigs or scaffolds, to discrete taxonomic bins. In the absence of reference genomes for most environmental microorganisms, the use of intrinsic nucleotide patterns and phylogenetic anchors can improve assembly-dependent binning needed for more accurate taxonomic and functional annotation in communities of microorganisms, and assist in identifying mobile genetic elements or lateral gene transfer events.

Results

Here, we present a statistic called LCA* inspired by Information and Voting theories that uses the NCBI Taxonomic Database hierarchy to assign taxonomy to contigs assembled from environmental sequence information. The LCA* algorithm identifies a sufficiently strong majority on the hierarchy while minimizing entropy changes to the observed taxonomic distribution resulting in improved statistical properties. Moreover, we apply results from the order-statistic literature to formulate a likelihood-ratio hypothesis test and P-value for testing the supremacy of the assigned LCA* taxonomy. Using simulated and real-world datasets, we empirically demonstrate that voting-based methods, majority vote and LCA*, in the presence of known reference annotations, are consistently more accurate in identifying contig taxonomy than the lowest common ancestor algorithm popularized by MEGAN, and that LCA* taxonomy strikes a balance between specificity and confidence to provide an estimate appropriate to the available information in the data.

Availability and implementation

The LCA* has been implemented as a stand-alone Python library compatible with the MetaPathways pipeline; both of which are available on GitHub with installation instructions and use-cases (http://www.github.com/hallamlab/LCAStar/).

Contact

shallam@mail.ubc.caSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-11 +31181171,"Volubility, Consonant Emergence, and Syllabic Structure in Infants and Toddlers Later Diagnosed With Childhood Apraxia of Speech, Speech Sound Disorder, and Typical Development: A Retrospective Video Analysis.","Purpose Studies of infants' early vocalizations have proven helpful in describing the developmental characteristics of various communication disorders. However, few studies have addressed the early vocalizations of infants and toddlers who were later diagnosed, as older children, with childhood apraxia of speech (CAS). We refer to these infants and toddlers as LCAS. Extant studies also often lack a comparison group of infants and toddlers who were later diagnosed, as older children, with a speech sound disorder (SSD). We refer to these infants and toddlers as LSSD. We aimed to compare the volubility, consonant emergence, and syllabic structure from birth to age of 2 years, as observed in home videos, among 3 groups of infants and toddlers: LCAS, LSSD, and typically developing (TD). Method We assessed the speech-language skills of 17 children (3.5-8.8 years old; 7 with CAS, 5 with SSD, and 5 TD) and transcribed home videos (obtained from parents) of these same children from birth to age of 2 years. Early vocalizations were coded as nonresonant or resonant. Nonresonant vocalizations could not be transcribed with the International Phonetic Alphabet. Resonant (speechlike) vocalizations were broadly transcribed, and resonant consonants were categorized by place, manner, and voicing. Results Effect size comparisons revealed LCAS infants and toddlers were less voluble, used fewer resonant consonants, had a less diverse phonetic repertoire, and acquired resonant consonants later than either the LSSD or TD participants. For LSSD infants and toddlers, means for these dependent variables were lower than the means demonstrated by the TD group, but effect size were not strong due to LSSD variability. Conclusions Findings imply there might be clinical ""red flags"" that could assist the identification of infants and toddlers at risk for later diagnosis of CAS. Data did not support red flags for identifying infants and toddlers at risk for later diagnosis of SSD. Because of significant study limitations, results obtained should be considered preliminary. Supplemental Material https://doi.org/10.23641/asha.8233334.",2019-06-10 +27587677,Characterizing leader sequences of CRISPR loci.,"

Motivation

The CRISPR-Cas system is an adaptive immune system in many archaea and bacteria, which provides resistance against invading genetic elements. The first phase of CRISPR-Cas immunity is called adaptation, in which small DNA fragments are excised from genetic elements and are inserted into a CRISPR array generally adjacent to its so called leader sequence at one end of the array. It has been shown that transcription initiation and adaptation signals of the CRISPR array are located within the leader. However, apart from promoters, there is very little knowledge of sequence or structural motifs or their possible functions. Leader properties have mainly been characterized through transcriptional initiation data from single organisms but large-scale characterization of leaders has remained challenging due to their low level of sequence conservation.

Results

We developed a method to successfully detect leader sequences by focusing on the consensus repeat of the adjacent CRISPR array and weak upstream conservation signals. We applied our tool to the analysis of a comprehensive genomic database and identified several characteristic properties of leader sequences specific to archaea and bacteria, ranging from distinctive sizes to preferential indel localization. CRISPRleader provides a full annotation of the CRISPR array, its strand orientation as well as conserved core leader boundaries that can be uploaded to any genome browser. In addition, it outputs reader-friendly HTML pages for conserved leader clusters from our database.

Availability and implementation

CRISPRleader and multiple sequence alignments for all 195 leader clusters are available at http://www.bioinf.uni-freiburg.de/Software/CRISPRleader/

Contact

costa@informatik.uni-freiburg.de or backofen@informatik.uni-freiburg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +30167882,"""MS-Ready"" structures for non-targeted high-resolution mass spectrometry screening studies.","Chemical database searching has become a fixture in many non-targeted identification workflows based on high-resolution mass spectrometry (HRMS). However, the form of a chemical structure observed in HRMS does not always match the form stored in a database (e.g., the neutral form versus a salt; one component of a mixture rather than the mixture form used in a consumer product). Linking the form of a structure observed via HRMS to its related form(s) within a database will enable the return of all relevant variants of a structure, as well as the related metadata, in a single query. A Konstanz Information Miner (KNIME) workflow has been developed to produce structural representations observed using HRMS (""MS-Ready structures"") and links them to those stored in a database. These MS-Ready structures, and associated mappings to the full chemical representations, are surfaced via the US EPA's Chemistry Dashboard ( https://comptox.epa.gov/dashboard/ ). This article describes the workflow for the generation and linking of ~ 700,000 MS-Ready structures (derived from ~ 760,000 original structures) as well as download, search and export capabilities to serve structure identification using HRMS. The importance of this form of structural representation for HRMS is demonstrated with several examples, including integration with the in silico fragmentation software application MetFrag. The structures, search, download and export functionality are all available through the CompTox Chemistry Dashboard, while the MetFrag implementation can be viewed at https://msbi.ipb-halle.de/MetFragBeta/ .",2018-08-30 +30574747,"One-year follow-up showing effects of single intra-articular injection of hyaluronic acid (1,500-2,000 kDa) in symptomatic knee osteoarthritis.","Clinical evidence on knee osteoarthritis suggests that intra-articular administration of hyaluronic acid may be useful in the management of patients with persistent pain. This study assesses the duration of effectiveness of a single intra-articular hyaluronic acid injection in a large population of patients with knee osteoarthritis. This retrospective post-marketing cohort study collected data from the ANTIAGE Registry (http://www.antiagefbf.it/registro), selecting patients of age ≥ 40 years, with symptomatic knee osteoarthritis (Kellgren-Lawrence grade I-III) of ≥ 12 months duration, and ≥12 months of follow-up. Patients had received a single intra-articular injection of high molecular weight hyaluronic acid (1,500-2,000 kDa) at baseline. WOMAC Osteoarthritis Index total scores measured using the LK 3.1 scale and 10 cm VAS pain scores were evaluated before IA Injection and at 6, 9, 10, 11 and 12 months. Blood cell counts, uricemia, erythrocyte sedimentation rates and levels of C-reactive protein were measured at baseline and 12 months. Time from initial treatment to second injection up to 12 months was recorded to assess event-free survival. Included patients (n=187) were 53.5% female and had a mean (±SD) age at baseline of 62 (±16.6) years and mean (±SD) body mass index of 26.2 (±2.5) kg/m2. Mean (±SD) WOMAC index total score and VAS pain scores were 60.9 (±7.1) and 5.9 cm (±1.8), respectively. There were statistically significant reductions compared to baseline in mean WOMAC index total score and VAS pain score at all time points (p less than0.01 at 6 and 9 months; p less than 0.05 at 10, 11 and 12 months for both parameters). These results support the clinical effectiveness and safety of hyaluronic acid for up to 12 months for pain relief and function improvement in patients with knee osteoarthritis, confirming previous data on intra-articular administration of hyaluronic acid as chronic therapy in the management of knee osteoarthritis.",2018-11-01 +29994224,Robust Inductive Matrix Completion Strategy to Explore Associations Between LincRNAs and Human Disease Phenotypes.,"Over the past few years, it has been established that a number of long intergenic non-coding RNAs (lincRNAs) are linked to a wide variety of human diseases. The relationship among many other lincRNAs still remains as puzzle. Validation of such link between the two entities through biological experiments is expensive. However, piles of information about the two are becoming available, thanks to the High Throughput Sequencing (HTS) platforms, Genome Wide Association Studies (GWAS), etc., thereby opening opportunity for cutting-edge machine learning and data mining approaches. However, there are only a few in silico lincRNA-disease association inference tools available to date, and none of these utilizes side information of both the entities. The recently developed Inductive Matrix Completion (IMC) technique provides a recommendation platform among two entities considering respective side information. But, the formulation of IMC is incapable of handling noise and outliers that may present in the dataset, while data sparsity consideration is another issue with the standard IMC method. Thus, a robust version of IMC is needed that can solve these two issues. As a remedy, in this paper, we propose Robust Inductive Matrix Completion (RIMC) using l2,1 norm loss function as well as l2,1 norm based regularization. We applied RIMC to the available association data between human lincRNAs and OMIM disease phenotypes as well as a diverse set of side information about the lincRNAs and the diseases. Our method performs better than the state-of-the-art methods in terms of precision@k and recall@k at the top- k disease prioritization to the subject lincRNAs. We also demonstrate that RIMC is equally effective for querying about novel lincRNAs, as well as predicting rank of a newly known disease for a set of well-characterized lincRNAs. Availability: All the supporting datasets are available at the publicly accessible URL located at http://biomecis.uta.edu/~ashis/res/RIMC/.",2018-06-07 +27556884,Bottled SAFT: A Web App Providing SAFT-γ Mie Force Field Parameters for Thousands of Molecular Fluids.,"Coarse-grained molecular simulation has become a popular tool for modeling simple and complex fluids alike. The defining aspects of a coarse grained model are the force field parameters, which must be determined for each particular fluid. Because the number of molecular fluids of interest in nature and in engineering processes is immense, constructing force field parameter tables by individually fitting to experimental data is a futile task. A step toward solving this challenge was taken recently by Mejía et al., who proposed a correlation that provides SAFT-γ Mie force field parameters for a fluid provided one knows the critical temperature, the acentric factor and a liquid density, all relatively accessible properties. Building on this, we have applied the correlation to more than 6000 fluids, and constructed a web application, called ""Bottled SAFT"", which makes this data set easily searchable by CAS number, name or chemical formula. Alternatively, the application allows the user to calculate parameters for components not present in the database. Once the intermolecular potential has been found through Bottled SAFT, code snippets are provided for simulating the desired substance using the ""raaSAFT"" framework, which leverages established molecular dynamics codes to run the simulations. The code underlying the web application is written in Python using the Flask microframework; this allows us to provide a modern high-performance web app while also making use of the scientific libraries available in Python. Bottled SAFT aims at taking the complexity out of obtaining force field parameters for a wide range of molecular fluids, and facilitates setting up and running coarse-grained molecular simulations. The web application is freely available at http://www.bottledsaft.org . The underlying source code is available on Bitbucket under a permissive license.",2016-08-31 +26883486,Beyond accuracy: creating interoperable and scalable text-mining web services.,"

Unlabelled

The biomedical literature is a knowledge-rich resource and an important foundation for future research. With over 24 million articles in PubMed and an increasing growth rate, research in automated text processing is becoming increasingly important. We report here our recently developed web-based text mining services for biomedical concept recognition and normalization. Unlike most text-mining software tools, our web services integrate several state-of-the-art entity tagging systems (DNorm, GNormPlus, SR4GN, tmChem and tmVar) and offer a batch-processing mode able to process arbitrary text input (e.g. scholarly publications, patents and medical records) in multiple formats (e.g. BioC). We support multiple standards to make our service interoperable and allow simpler integration with other text-processing pipelines. To maximize scalability, we have preprocessed all PubMed articles, and use a computer cluster for processing large requests of arbitrary text.

Availability and implementation

Our text-mining web service is freely available at http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/#curl

Contact

: Zhiyong.Lu@nih.gov.",2016-02-16 +28060965,The Clinical Research Landscape in Rhode Island.,"

Objectives

To present an overview of clinical research activity and the state of medical research funding in Rhode Island.

Methods

We utilized clinicaltrials.gov registry to profile clinical studies between 2011 to 2016. NIH RePORT and other federal databases were used to extract information on levels of federal funding. Previously published hospital financial reports were reviewed for data on hospital-specific total external research funding.

Results

During 2011-2016, 1651 clinical studies were registered in clinicaltrials.gov. Nearly a third of all clinical studies were in oncology (21%) and cardiovascular diseases (10%). Alzheimer's dementia, breast cancer, HIV, and hepatitis C accounted for nearly 17% of all clinical trials. Seventy-five percent (75%) of clinical trials in RI were conducted in hospitals affiliated with Lifespan or Care New England. Financial support for clinical trials largely came from industry (60%) with 23% being supported by the National Institutes of Health (NIH). The rest are funded by nonprofit organizations, charitable foundations, educational institutions, and unlisted concerns. [Full article available at http://rimed.org/rimedicaljournal-2017-01.asp].",2017-01-06 +30172046,CIRCpedia v2: An Updated Database for Comprehensive Circular RNA Annotation and Expression Comparison.,"Circular RNAs (circRNAs) from back-splicing of exon(s) have been recently identified to be broadly expressed in eukaryotes, in tissue- and species-specific manners. Although functions of most circRNAs remain elusive, some circRNAs are shown to be functional in gene expression regulation and potentially relate to diseases. Due to their stability, circRNAs can also be used as biomarkers for diagnosis. Profiling circRNAs by integrating their expression among different samples thus provides molecular basis for further functional study of circRNAs and their potential application in clinic. Here, we report CIRCpedia v2, an updated database for comprehensive circRNA annotation from over 180 RNA-seq datasets across six different species. This atlas allows users to search, browse, and download circRNAs with expression features in various cell types/tissues, including disease samples. In addition, the updated database incorporates conservation analysis of circRNAs between humans and mice. Finally, the web interface also contains computational tools to compare circRNA expression among samples. CIRCpedia v2 is accessible at http://www.picb.ac.cn/rnomics/circpedia.",2018-08-29 +31080346,Sequence-based Prediction of Protein-Protein Interactions Using Gray Wolf Optimizer-Based Relevance Vector Machine.,"Protein-protein interactions (PPIs) are essential to a number of biological processes. The PPIs generated by biological experiment are both time-consuming and expensive. Therefore, many computational methods have been proposed to identify PPIs. However, most of these methods are limited as they are difficult to compute and rely on a large number of homologous proteins. Accordingly, it is urgent to develop effective computational methods to detect PPIs using only protein sequence information. The kernel parameter of relevance vector machine (RVM) is set by experience, which may not obtain the optimal solution, affecting the prediction performance of RVM. In this work, we presented a novel computational approach called GWORVM-BIG, which used Bi-gram (BIG) to represent protein sequences on a position-specific scoring matrix (PSSM) and GWORVM classifier to perform classification for predicting PPIs. More specifically, the proposed GWORVM model can obtain the optimum solution of kernel parameters using gray wolf optimizer approach, which has the advantages of less control parameters, strong global optimization ability, and ease of implementation compared with other optimization algorithms. The experimental results on yeast and human data sets demonstrated the good accuracy and efficiency of the proposed GWORVM-BIG method. The results showed that the proposed GWORVM classifier can significantly improve the prediction performance compared with the RVM model using other optimizer algorithms including grid search (GS), genetic algorithm (GA), and particle swarm optimization (PSO). In addition, the proposed method is also compared with other existing algorithms, and the experimental results further indicated that the proposed GWORVM-BIG model yields excellent prediction performance. For facilitating extensive studies for future proteomics research, the GWORVMBIG server is freely available for academic use at http://219.219.62.123:8888/GWORVMBIG.",2019-05-02 +30802494,"High-quality genome assembly of the silkworm, Bombyx mori.","In 2008, the genome assembly and gene models for the domestic silkworm, Bombyx mori, were published by a Japanese and Chinese collaboration group. However, the genome assembly contains a non-negligible number of misassembled and gap regions due to the presence of many repetitive sequences within the silkworm genome. The erroneous genome assembly occasionally causes incorrect gene prediction. Here we performed hybrid assembly based on 140 × deep sequencing of long (PacBio) and short (Illumina) reads. The remaining gaps in the initial genome assembly were closed using BAC and Fosmid sequences, giving a new total length of 460.3 Mb, with 30 gap regions and an N50 comprising 16.8 Mb in scaffolds and 12.2 Mb in contigs. More RNA-seq and piRNA-seq reads were mapped on the new genome assembly compared with the previous version, indicating that the new genome assembly covers more transcribed regions, including repetitive elements. We performed gene prediction based on the new genome assembly using available mRNA and protein sequence data. The number of gene models was 16,880 with an N50 of 2154 bp. The new gene models reflected more accurate coding sequences and gene sets than old ones. The proportion of repetitive elements was also reestimated using the new genome assembly, and was calculated to be 46.8% in the silkworm genome. The new genome assembly and gene models are provided in SilkBase (http://silkbase.ab.a.u-tokyo.ac.jp).",2019-02-23 +31111198,TAAR1 levels and sub-cellular distribution are cell line but not breast cancer subtype specific.,"Trace amine-associated receptors are G protein-coupled receptors of which TAAR1 is the most well-studied. Recently, Vattai et al. (J Cancer Res Clin Oncol 143:1637-1647 https://doi.org/10.1007/s00432-017-2420-8 , 2017) reported that expression of TAAR1 may be a marker of breast cancer (BC) survival, with a positive correlation also suggested between TAAR1 expression and HER2 positivity. Neither a role for TAAR1 in breast tissue, nor in cancer, had previously been suspected. We, therefore, sought to provide independent validation and to further examine these putative relationships. First, a bioinformatic analysis on 58 total samples including normal breast tissue, BC-related cell lines, and tumour samples representing different BC sub-types found no clear correlation between TAAR1 mRNA levels and any BC subtype, including HER2 + . We next confirmed the bioinformatics data correlated to protein expression using a well validated anti-human TAAR1 antibody. TAAR1 mRNA levels correlated with the relative intensity of immunofluorescence staining in six BC cell lines (MCF-7, T47D, MDA-MB-231, SKBR3, MDA-MB-468, BT-474), but not in the MCF-10A immortalized mammary gland line, which had high mRNA but low protein levels. As expected, TAAR1 protein was intracellular in all cell lines. Surprisingly MCF-7, SKBR3, and MDA-MB-468 showed pronounced nuclear localization. The relative protein expression in MCF-7, MDA-MB-231, and MCF-10A lines was further confirmed by semi-quantitative flow cytometry. Finally, we demonstrate that the commercially available anti-TAAR1 antibody has poor selectivity, which likely explains the lack of correlation with the previous study. Therefore, while we clearly demonstrate variable expression and sub-cellular localization of TAAR1 across BC cell lines, we find no evidence for association with BC subtype.",2019-05-21 +28666356,modPhEA: model organism Phenotype Enrichment Analysis of eukaryotic gene sets.,"

Motivation

Genome-scale phenotypic data are available for many model organisms, yet existing tools to functionally interpret gene sets from these phenotypic data are largely based on mutagenesis-derived phenotypes observed in mouse or human.

Results

Data from both mutagenesis and knockdown experiments are incorporated into modPhEA to allow users to perform enrichment analyses based on phenotypes observed in budding yeast (Saccharomyces cerevisiae), roundworm (Caenorhabditis elegans), fruit fly (Drosophila melanogaster), zebrafish (Danio rerio), mouse (Mus musculus) and humans (Homo sapiens). The phenotypes analysed can be customized to investigate complex traits and gene sets from any fully sequenced animal or fungal genome are also supported by modPhEA.

Availability and implementation

Freely available on the web at http://evol.nhri.org.tw/modPhEA/.

Contact

liaoby@nhri.org.tw.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +29860484,VarAFT: a variant annotation and filtration system for human next generation sequencing data.,"With the rapidly developing high-throughput sequencing technologies known as next generation sequencing or NGS, our approach to gene hunting and diagnosis has drastically changed. In <10 years, these technologies have moved from gene panel to whole genome sequencing and from an exclusively research context to clinical practice. Today, the limit is not the sequencing of one, many or all genes but rather the data analysis. Consequently, the challenge is to rapidly and efficiently identify disease-causing mutations within millions of variants. To do so, we developed the VarAFT software to annotate and pinpoint human disease-causing mutations through access to multiple layers of information. VarAFT was designed both for research and clinical contexts and is accessible to all scientists, regardless of bioinformatics training. Data from multiple samples may be combined to address all Mendelian inheritance modes, cancers or population genetics. Optimized filtration parameters can be stored and re-applied to large datasets. In addition to classical annotations from dbNSFP, VarAFT contains unique features at the disease (OMIM), phenotypic (HPO), gene (Gene Ontology, pathways) and variation levels (predictions from UMD-Predictor and Human Splicing Finder) that can be combined to optimally select candidate pathogenic mutations. VarAFT is freely available at: http://varaft.eu.",2018-07-01 +23967085,Aureolib - a proteome signature library: towards an understanding of staphylococcus aureus pathophysiology.,"Gel-based proteomics is a powerful approach to study the physiology of Staphylococcus aureus under various growth restricting conditions. We analyzed 679 protein spots from a reference 2-dimensional gel of cytosolic proteins of S. aureus COL by mass spectrometry resulting in 521 different proteins. 4,692 time dependent protein synthesis profiles were generated by exposing S. aureus to nine infection-related stress and starvation stimuli (H2O2, diamide, paraquat, NO, fermentation, nitrate respiration, heat shock, puromycin, mupirocin). These expression profiles are stored in an online resource called Aureolib (http://www.aureolib.de). Moreover, information on target genes of 75 regulators and regulatory elements were included in the database. Cross-comparisons of this extensive data collection of protein synthesis profiles using the tools implemented in Aureolib lead to the identification of stress and starvation specific marker proteins. Altogether, 226 protein synthesis profiles showed induction ratios of 2.5-fold or higher under at least one of the tested conditions with 157 protein synthesis profiles specifically induced in response to a single stimulus. The respective proteins might serve as marker proteins for the corresponding stimulus. By contrast, proteins whose synthesis was increased or repressed in response to more than four stimuli are rather exceptional. The only protein that was induced by six stimuli is the universal stress protein SACOL1759. Most strikingly, cluster analyses of synthesis profiles of proteins differentially synthesized under at least one condition revealed only in rare cases a grouping that correlated with known regulon structures. The most prominent examples are the GapR, Rex, and CtsR regulon. In contrast, protein synthesis profiles of proteins belonging to the CodY and σ(B) regulon are widely distributed. In summary, Aureolib is by far the most comprehensive protein expression database for S. aureus and provides an essential tool to decipher more complex adaptation processes in S. aureus during host pathogen interaction.",2013-08-13 +30088146,Health-related quality of life associated with different symptoms in women and in men who suffer from schizophrenia.,"Health-related quality of life (HRQoL) in patients with schizophrenia is related to the severity of psychiatric symptoms. The objective of this study is to analyze whether the symptoms that influence HRQoL are similar in women and men. Data were part of the Pattern study, an international observational investigation which collected data from 1379 outpatients with schizophrenia. Patients were evaluated with the Mini International Neuropsychiatric Inventory, the Clinical Global Impression-Schizophrenia, and the Positive and Negative Syndrome Scale (PANSS), and reported their quality of life using the Schizophrenia Quality of Life Scale (SQLS), the Short Form-36 (SF-36), and the EuroQol-5 Dimension (EQ-5D). Men reported higher HRQoL on all scales. PANSS total score was 80.6 (SD 23.6) for women and 77.9 (SD 22.1) for men. In women, a higher PANSS negative score and a higher PANSS affective score were associated with a lower SQLS score. In men, a higher PANSS positive score and a higher PANSS affective score were associated with a lower SQLS score. The same pattern appeared with EQ-VAS and EQ-5D tariff. In women, greater age and higher PANSS affective score were associated with a lower SF-36 mental component score. In men, higher PANSS affective, positive, and cognitive scores were associated with a lower SF-36 mental component score. This study shows that HRQoL is influenced by different psychiatric symptoms in women and men. This may have significant implications when deciding the main treatment target in patients with schizophrenia.ClinicalTrials.gov Identifier: https://clinicaltrials.gov/ct2/show/NCT01634542.",2018-08-07 +31458826,MCMap-A Computational Tool for Mapping Energy Landscapes of Transient Protein-Protein Interactions.,"MCMap is a tool particularly well-suited for analyzing energy landscapes of transient macromolecular complexes. The program applies a Monte Carlo strategy, where the ligand moves randomly in the electrostatic field of the receptor. By applying importance sampling, the major interaction sites are mapped, resulting in a global distribution of ligand-receptor complexes. This approach displays the dynamic character of transiently interacting protein complexes where not a single complex but an ensemble of complexes better describes the protein interactions. The software provides a broad range of analysis options which allow for relating the simulations to experimental data and for interpreting them on a structural level. The application of MCMap is exemplified by the electron-transfer complex of cytochrome c peroxidase and cytochrome c from baker's yeast. The functionality of MCMap and the visualization of simulation data are in particular demonstrated by studying the dependence of the association on ionic strength and on the oxidation state of the binding partner. Furthermore, microscopically, a repulsion of a second ligand can be seen in the ternary complex upon the change of the oxidation state of the bound cytochrome c. The software is made available as open source software together with the example and can be downloaded free of charge from http://www.bisb.uni-bayreuth.de/index.php?page=downloads.",2018-06-18 +31084509,Vowel Formants in Normal and Loud Speech.,"Purpose This study evaluated how 1st and 2nd vowel formant frequencies (F1, F2) differ between normal and loud speech in multiple speaking tasks to assess claims that loudness leads to exaggerated vowel articulation. Method Eleven healthy German-speaking women produced normal and loud speech in 3 tasks that varied in the degree of spontaneity: reading sentences that contained isolated /i: a: u:/, responding to questions that included target words with controlled consonantal contexts but varying vowel qualities, and a recipe recall task. Loudness variation was elicited naturalistically by changing interlocutor distance. First and 2nd formant frequencies and average sound pressure level were obtained from the stressed vowels in the target words, and vowel space area was calculated from /i: a: u:/. Results Comparisons across many vowels indicated that high, tense vowels showed limited formant variation as a function of loudness. Analysis of /i: a: u:/ across speech tasks revealed vowel space reduction in the recipe retell task compared to the other 2. Loudness changes for F1 were consistent in direction but variable in extent, with few significant results for high tense vowels. Results for F2 were quite varied and frequently not significant. Speakers differed in how loudness and task affected formant values. Finally, correlations between sound pressure level and F1 were generally positive but varied in magnitude across vowels, with the high tense vowels showing very flat slopes. Discussion These data indicate that naturalistically elicited loud speech in typical speakers does not always lead to changes in vowel formant frequencies and call into question the notion that increasing loudness is necessarily an automatic method of expanding the vowel space. Supplemental Material https://doi.org/10.23641/asha.8061740.",2019-05-01 +28137933,Genomic analysis of urogenital and rectal Neisseria meningitidis isolates reveals encapsulated hyperinvasive meningococci and coincident multidrug-resistant gonococci.,"

Objective

Invasive meningococcal disease (IMD) outbreaks in men who have sex with men (MSM) have been associated with meningococcal colonisation of the urethra and rectum, but little is known about this colonisation or co-colonisation with the closely related gonococcus. Whole genome sequencing (WGS) was employed to explore these phenomena.

Methods

Meningococci isolated from the urogenital tract and rectum (n=23) and coincident gonococci (n=14) were analysed by WGS along with contemporary meningococci from IMD (n=11). All isolates were obtained from hospital admissions in Brighton, UK, 2011-2013. Assembled WGS were deposited in the PubMLST/neisseria database (http://pubmlst.org/neisseria) and compared at genomic loci common to gonococci or meningococci.

Results

As expected, most meningococci from IMD were encapsulated and belonged to hyperinvasive lineages. So too were meningococci found in the urogenital tract and rectum, contrasting to those asymptomatically carried in the nasopharynx where such meningococci are rare. Five hyperinvasive meningococcal lineages and four distinct gonococcal genotypes were recovered, including multiresistant ST-1901 (NG MAST-1407) gonococci.

Conclusions

These data were consistent with a predisposition for potentially virulent encapsulated hyperinvasive meningococci to colonise the urethra and rectum, which suggests their involvement in MSM IMD outbreaks. The coincidence of multiresistant gonococci raises wider public health concerns.",2017-01-30 +30511150,Bioinformatics Protocols for Quickly Obtaining Large-Scale Data Sets for Phylogenetic Inferences.,"Useful insight into the evolution of genes and gene families can be provided by the analysis of all available genome datasets rather than just a few, which are usually those of model species. Handling and transforming such datasets into the desired format for downstream analyses is, however, often a difficult and time-consuming task for researchers without a background in informatics. Therefore, we present two simple and fast protocols for data preparation, using an easy-to-install, open-source, cross-platform software application with user-friendly, rich graphical user interface (SEDA; http://www.sing-group.org/seda/index.html ). The first protocol is a substantial improvement over one recently published (López-Fernández et al. Practical applications of computational biology and bioinformatics, 12th International conference. Springer, Cham, pp 88-96 (2019)[1]), which was used to study the evolution of GULO, a gene that encodes the enzyme responsible for the last step of vitamin C synthesis. In this paper, we show how the sequence data file used for the phylogenetic analyses can now be obtained much faster by changing the way coding sequence isoforms are removed, using the newly implemented SEDA operation ""Remove isoforms"". This protocol can be used to easily show that putative functional GULO genes are present in several Prostotomian groups such as Molluscs, Priapulida and Arachnida. Such findings could have been easily missed if only a few Protostomian model species had been used. The second protocol allowed us to identify positively selected amino acid sites in a set of 19 primate HLA immunity genes. Interestingly, the proteins encoded by MHC class II genes can show just as many positively selected amino acid sites as those encoded by classical MHC class I genes. Although a significant percentage of codons, which can be as high as 14.8%, are evolving under positive selection, the main mode of evolution of HLA immunity genes is purifying selection. Using a large number of primate species, the probability of missing the identification of positively selected amino acid sites is lower. Both projects were performed in less than one week, and most of the time was spent running the analyses rather than preparing the files. Such protocols can be easily adapted to answer many other questions using a phylogenetic approach.",2018-12-03 +20672376,pfSNP: An integrated potentially functional SNP resource that facilitates hypotheses generation through knowledge syntheses.,"Currently, >14,000,000 single nucleotide polymorphisms (SNPs) are reported. Identifying phenotype-affecting SNPs among these many SNPs pose significant challenges. Although several Web resources are available that can inform about the functionality of SNPs, these resources are mainly annotation databases and are not very comprehensive. In this article, we present a comprehensive, well-annotated, integrated pfSNP (potentially functional SNPs) Web resource (http://pfs.nus.edu.sg/), which is aimed to facilitate better hypothesis generation through knowledge syntheses mediated by better data integration and a user-friendly Web interface. pfSNP integrates >40 different algorithms/resources to interrogate >14,000,000 SNPs from the dbSNP database for SNPs of potential functional significance based on previous published reports, inferred potential functionality from genetic approaches as well as predicted potential functionality from sequence motifs. Its query interface has the user-friendly ""auto-complete, prompt-as-you-type"" feature and is highly customizable, facilitating different combination of queries using Boolean-logic. Additionally, to facilitate better understanding of the results and aid in hypotheses generation, gene/pathway-level information with text clouds highlighting enriched tissues/pathways as well as detailed-related information are also provided on the results page. Hence, the pfSNP resource will be of great interest to scientists focusing on association studies as well as those interested to experimentally address the functionality of SNPs.",2011-01-01 +30329012,farPPI: a webserver for accurate prediction of protein-ligand binding structures for small-molecule PPI inhibitors by MM/PB(GB)SA methods.,"

Summary

Protein-protein interactions (PPIs) have been regarded as an attractive emerging class of therapeutic targets for the development of new treatments. Computational approaches, especially molecular docking, have been extensively employed to predict the binding structures of PPI-inhibitors or discover novel small molecule PPI inhibitors. However, due to the relatively 'undruggable' features of PPI interfaces, accurate predictions of the binding structures for ligands towards PPI targets are quite challenging for most docking algorithms. Here, we constructed a non-redundant pose ranking benchmark dataset for small-molecule PPI inhibitors, which contains 900 binding poses for 184 protein-ligand complexes. Then, we evaluated the performance of MM/PB(GB)SA approaches to identify the correct binding poses for PPI inhibitors, including two Prime MM/GBSA procedures from the Schrödinger suite and seven different MM/PB(GB)SA procedures from the Amber package. Our results showed that MM/PBSA outperformed the Glide SP scoring function (success rate of 58.6%) and MM/GBSA in most cases, especially the PB3 procedure which could achieve an overall success rate of ∼74%. Moreover, the GB6 procedure (success rate of 68.9%) performed much better than the other MM/GBSA procedures, highlighting the excellent potential of the GBNSR6 implicit solvation model for pose ranking. Finally, we developed the webserver of Fast Amber Rescoring for PPI Inhibitors (farPPI), which offers a freely available service to rescore the docking poses for PPI inhibitors by using the MM/PB(GB)SA methods.

Availability and implementation

farPPI web server is freely available at http://cadd.zju.edu.cn/farppi/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-05-01 +28125523,Machine Learning-Based Classification of 38 Years of Spine-Related Literature Into 100 Research Topics.,"

Study design

Retrospective review.

Objective

To identify the top 100 spine research topics.

Summary of background data

Recent advances in ""machine learning,"" or computers learning without explicit instructions, have yielded broad technological advances. Topic modeling algorithms can be applied to large volumes of text to discover quantifiable themes and trends.

Methods

Abstracts were extracted from the National Library of Medicine PubMed database from five prominent peer-reviewed spine journals (European Spine Journal [ESJ], The Spine Journal [SpineJ], Spine, Journal of Spinal Disorders and Techniques [JSDT], Journal of Neurosurgery: Spine [JNS]). Each abstract was entered into a latent Dirichlet allocation model specified to discover 100 topics, resulting in each abstract being assigned a probability of belonging in a topic. Topics were named using the five most frequently appearing terms within that topic. Significance of increasing (""hot"") or decreasing (""cold"") topic popularity over time was evaluated with simple linear regression.

Results

From 1978 to 2015, 25,805 spine-related research articles were extracted and classified into 100 topics. Top two most published topics included ""clinical, surgeons, guidelines, information, care"" (n = 496 articles) and ""pain, back, low, treatment, chronic"" (424). Top two hot trends included ""disc, cervical, replacement, level, arthroplasty"" (+0.05%/yr, P < 0.001), and ""minimally, invasive, approach, technique"" (+0.05%/yr, P < 0.001). By journal, the most published topics were ESJ-""operative, surgery, postoperative, underwent, preoperative""; SpineJ-""clinical, surgeons, guidelines, information, care""; Spine-""pain, back, low, treatment, chronic""; JNS- ""tumor, lesions, rare, present, diagnosis""; JSDT-""cervical, anterior, plate, fusion, ACDF.""

Conclusion

Topics discovered through latent Dirichlet allocation modeling represent unbiased meaningful themes relevant to spine care. Topic dynamics can provide historical context and direction for future research for aspiring investigators and trainees interested in spine careers. Please explore https://singdc.shinyapps.io/spinetopics.

Level of evidence

N A.",2017-06-01 +30630411,CoMetGeNe: mining conserved neighborhood patterns in metabolic and genomic contexts.,"

Background

In systems biology, there is an acute need for integrative approaches in heterogeneous network mining in order to exploit the continuous flux of genomic data. Simultaneous analysis of the metabolic pathways and genomic context of a given species leads to the identification of patterns consisting in reaction chains catalyzed by products of neighboring genes. Similar such patterns across several species can reveal their mode of conservation throughout the tree of life.

Results

We present CoMetGeNe (COnserved METabolic and GEnomic NEighborhoods), a novel method that identifies metabolic and genomic patterns consisting in maximal trails of reactions being catalyzed by products of neighboring genes. Patterns determined by CoMetGeNe in one species are subsequently employed in order to reflect their degree of conservation across multiple prokaryotic species. These interspecies comparisons help to improve genome annotation and can reveal putative alternative metabolic routes as well as unexpected gene ordering occurrences.

Conclusions

CoMetGeNe is an exploratory tool at both the genomic and the metabolic levels, leading to insights into the conservation of functionally related clusters of neighboring enzyme-coding genes. The open-source CoMetGeNe pipeline is freely available at https://cometgene.lri.fr .",2019-01-10 +30567465,"Large-scale validation of miRNAs by disease association, evolutionary conservation and pathway activity.","The validation of microRNAs (miRNAs) identified by next generation sequencing involves amplification-free and hybridization-based detection of transcripts as criteria for confirming valid miRNAs. Since respective validation is frequently not performed, miRNA repositories likely still contain a substantial fraction of false positive candidates while true miRNAs are not stored in the repositories yet. Especially if downstream analyses are performed with these candidates (e.g. target or pathway prediction), the results may be misleading. In the present study, we evaluated 558 mature miRNAs from miRBase and 1,709 miRNA candidates from next generation sequencing experiments by amplification-free hybridization and investigated their distributions in patients with various disease conditions. Notably, the most significant miRNAs in diseases are often not contained in the miRBase. However, these candidates are evolutionary highly conserved. From the expression patterns, target gene and pathway analyses and evolutionary conservation analyses, we were able to shed light on the complexity of miRNAs in humans. Our data also highlight that a more thorough validation of miRNAs identified by next generation sequencing is required. The results are available in miRCarta ( https://mircarta.cs.uni-saarland.de ).",2018-12-26 +26657895,CANTATAdb: A Collection of Plant Long Non-Coding RNAs.,"Long non-coding RNAs (lncRNAs) represent a class of potent regulators of gene expression that are found in a wide array of eukaryotes; however, our knowledge about these molecules in plants is still very limited. In particular, a number of model plant species still lack comprehensive data sets of lncRNAs and their annotations, and very little is known about their biological roles. To meet these shortcomings, we created an online database of lncRNAs in 10 model plant species. The lncRNAs were identified computationally using dozens of publicly available RNA sequencing (RNA-Seq) libraries. Expression values, coding potential, sequence alignments as well as other types of data provide annotation for the identified lncRNAs. In order to better characterize them, we investigated their potential roles in splicing modulation and deregulation of microRNA functions. The data are freely available for searching, browsing and downloading from an online database called CANTATAdb (http://cantata.amu.edu.pl, http://yeti.amu.edu.pl/CANTATA/).",2015-12-12 +27910954,Mal-Lys: prediction of lysine malonylation sites in proteins integrated sequence-based features with mRMR feature selection.,"Lysine malonylation is an important post-translational modification (PTM) in proteins, and has been characterized to be associated with diseases. However, identifying malonyllysine sites still remains to be a great challenge due to the labor-intensive and time-consuming experiments. In view of this situation, the establishment of a useful computational method and the development of an efficient predictor are highly desired. In this study, a predictor Mal-Lys which incorporated residue sequence order information, position-specific amino acid propensity and physicochemical properties was proposed. A feature selection method of minimum Redundancy Maximum Relevance (mRMR) was used to select optimal ones from the whole features. With the leave-one-out validation, the value of the area under the curve (AUC) was calculated as 0.8143, whereas 6-, 8- and 10-fold cross-validations had similar AUC values which showed the robustness of the predictor Mal-Lys. The predictor also showed satisfying performance in the experimental data from the UniProt database. Meanwhile, a user-friendly web-server for Mal-Lys is accessible at http://app.aporc.org/Mal-Lys/.",2016-12-02 +27412128,Improved multilocus sequence typing of Burkholderia pseudomallei and closely related species.,"The Burkholderiapseudomallei multilocus sequence typing (MLST) database (http://pubmlst.org/bpseudomallei/) contains the largest global sequence repository for B. pseudomallei and its closest genetic relatives. Using conventional MLST and in silico MLST data derived from publicly available whole-genome sequences, we first defined the phylogenetic relatedness of B. pseudomallei and its nearest neighbours. Based on this analysis, we propose that the recently described B. pseudomallei complex (Bpc) should be expanded to encompass B. pseudomallei, Burkholderiahumptydooensis (proposed), Burkholderiamallei, Burkholderiaoklahomensis, Burkholderiathailandensis and three unassigned Burkholderia Clades A, B and C (represented by type strains BDU 5, BDU 8 and MSMB0265, respectively). Of note, the MLST narK locus is present in all Bpc species but is missing in all other Burkholderia spp., including all Burkholderiacepacia complex species, with the exception of most Burkholderiaubonensis strains, which contain narK but encode genetically distinct sequences. The presence of narK is thus indicative of a Bpc strain. Next, we revisited in silico the performance of the existing MLST primers, which prompted redesign of primers targeting the gmhD, lepA, lipA, narK and ndh loci to encompass genetic diversity among Bpc strains and to address amplification/sequencing issues. We show in silico and in vitro that the redesigned primers yield good-quality amplification and sequencing results for the gmhD, lepA, lipA, narK and ndh loci in Bpc species. These primers provide an alternative for amplification and sequencing of MLST loci in Bpc species in cases when poor-quality amplification or sequencing data are obtained using the original MLST primers.",2016-07-13 +26578571,DNA data bank of Japan (DDBJ) progress report.,"The DNA Data Bank of Japan Center (DDBJ Center; http://www.ddbj.nig.ac.jp) maintains and provides public archival, retrieval and analytical services for biological information. The contents of the DDBJ databases are shared with the US National Center for Biotechnology Information (NCBI) and the European Bioinformatics Institute (EBI) within the framework of the International Nucleotide Sequence Database Collaboration (INSDC). Since 2013, the DDBJ Center has been operating the Japanese Genotype-phenotype Archive (JGA) in collaboration with the National Bioscience Database Center (NBDC) in Japan. In addition, the DDBJ Center develops semantic web technologies for data integration and sharing in collaboration with the Database Center for Life Science (DBCLS) in Japan. This paper briefly reports on the activities of the DDBJ Center over the past year including submissions to databases and improvements in our services for data retrieval, analysis, and integration.",2015-11-17 +31725865,PTS: a pharmaceutical target seeker. ,"Identifying protein targets for a bioactive compound is critical in drug discovery. Molecular similarity is a main approach to fish drug targets, and is based upon an axiom that similar compounds may have the same targets. The molecular structural similarity of a compound and the ligand of a known target can be gauged in topological (2D), steric (3D) or static (pharmacophoric) metric. The topologic metric is fast, but unable to represent steric and static profile of a bioactive compound. Steric and static metrics reflect the shape properties of a compound if its structure were experimentally obtained, and could be unreliable if they were based upon the putative conformation data. In this paper, we report a pharmaceutical target seeker (PTS), which searches protein targets for a bioactive compound based upon the static and steric shape comparison by comparing a compound structure against the experimental ligand structure. Especially, the crystal structures of active compounds were taken into similarity calculation and the predicted targets can be filtered according to multi activity thresholds. PTS has a pharmaceutical target database that contains approximately 250 000 ligands annotated with about 2300 protein targets. A visualization tool is provided for a user to examine the result. Database URL: http://www.rcdd.org.cn/PTS.",2017-01-01 +21936816,Collation and data-mining of literature bioactivity data for drug discovery.,"The challenge of translating the huge amount of genomic and biochemical data into new drugs is a costly and challenging task. Historically, there has been comparatively little focus on linking the biochemical and chemical worlds. To address this need, we have developed ChEMBL, an online resource of small-molecule SAR (structure-activity relationship) data, which can be used to support chemical biology, lead discovery and target selection in drug discovery. The database contains the abstracted structures, properties and biological activities for over 700000 distinct compounds and in excess of more than 3 million bioactivity records abstracted from over 40000 publications. Additional public domain resources can be readily integrated into the same data model (e.g. PubChem BioAssay data). The compounds in ChEMBL are largely extracted from the primary medicinal chemistry literature, and are therefore usually 'drug-like' or 'lead-like' small molecules with full experimental context. The data cover a significant fraction of the discovery of modern drugs, and are useful in a wide range of drug design and discovery tasks. In addition to the compound data, ChEMBL also contains information for over 8000 protein, cell line and whole-organism 'targets', with over 4000 of those being proteins linked to their underlying genes. The database is searchable both chemically, using an interactive compound sketch tool, protein sequences, family hierarchies, SMILES strings, compound research codes and key words, and biologically, using a variety of gene identifiers, protein sequence similarity and protein families. The information retrieved can then be readily filtered and downloaded into various formats. ChEMBL can be accessed online at https://www.ebi.ac.uk/chembldb.",2011-10-01 +26660198,CB Database: A change blindness database for objects in natural indoor scenes.,"Change blindness has been a topic of interest in cognitive sciences for decades. Change detection experiments are frequently used for studying various research topics such as attention and perception. However, creating change detection stimuli is tedious and there is no open repository of such stimuli using natural scenes. We introduce the Change Blindness (CB) Database with object changes in 130 colored images of natural indoor scenes. The size and eccentricity are provided for all the changes as well as reaction time data from a baseline experiment. In addition, we have two specialized satellite databases that are subsets of the 130 images. In one set, changes are seen in rooms or in mirrors in those rooms (Mirror Change Database). In the other, changes occur in a room or out a window (Window Change Database). Both the sets have controlled background, change size, and eccentricity. The CB Database is intended to provide researchers with a stimulus set of natural scenes with defined stimulus parameters that can be used for a wide range of experiments. The CB Database can be found at http://search.bwh.harvard.edu/new/CBDatabase.html .",2016-12-01 +27914894,SCOPe: Manual Curation and Artifact Removal in the Structural Classification of Proteins - extended Database.,"SCOPe (Structural Classification of Proteins-extended, http://scop.berkeley.edu) is a database of relationships between protein structures that extends the Structural Classification of Proteins (SCOP) database. SCOP is an expert-curated ordering of domains from the majority of proteins of known structure in a hierarchy according to structural and evolutionary relationships. SCOPe classifies the majority of protein structures released since SCOP development concluded in 2009, using a combination of manual curation and highly precise automated tools, aiming to have the same accuracy as fully hand-curated SCOP releases. SCOPe also incorporates and updates the ASTRAL compendium, which provides several databases and tools to aid in the analysis of the sequences and structures of proteins classified in SCOPe. SCOPe continues high-quality manual classification of new superfamilies, a key feature of SCOP. Artifacts such as expression tags are now separated into their own class, in order to distinguish them from the homology-based annotations in the remainder of the SCOPe hierarchy. SCOPe 2.06 contains 77,439 Protein Data Bank entries, double the 38,221 structures classified in SCOP.",2016-11-30 +28919965,The Cerrado (Brazil) plant cytogenetics database.,"Cerrado is a biodiversity hotspot that has lost ca. 50% of its original vegetation cover and hosts ca. 11,000 species belonging to 1,423 genera of phanerogams. For a fraction of those species some cytogenetic characteristics like chromosome numbers and C-value were available in databases, while other valuable information such as karyotype formula and banding patterns are missing. In order to integrate and share all cytogenetic information published for Cerrado species, including frequency of cytogenetic attributes and scientometrics aspects, Cerrado plant species were searched in bibliographic sources, including the 50 richest genera (with more than 45 taxa) and 273 genera with only one species in Cerrado. Determination of frequencies and the database website (http://cyto.shinyapps.io/cerrado) were developed in R. Studies were pooled by employed technique and decade, showing a rise in non-conventional cytogenetics since 2000. However, C-value estimation, heterochromatin staining and molecular cytogenetics are still not common for any family. For the richest and best sampled families, the following modal 2n counts were observed: Oxalidaceae 2n = 12, Lythraceae 2n = 30, Sapindaceae 2n = 24, Solanaceae 2n = 24, Cyperaceae 2n = 10, Poaceae 2n = 20, Asteraceae 2n = 18 and Fabaceae 2n = 26. Chromosome number information is available for only 16.1% of species, while there are genome size data for only 1.25%, being lower than the global percentages. In general, genome sizes were small, ranging from 2C = ca. 1.5 to ca. 3.5 pg. Intra-specific 2n number variation and higher 2n counts were mainly related to polyploidy, which relates to the prevalence of even haploid numbers above the mode of 2n in most major plant clades. Several orphan genera with almost no cytogenetic studies for Cerrado were identified. This effort represents a complete diagnosis for cytogenetic attributes of plants of Cerrado.",2017-04-25 +30650795,Comparison of MTF measurements using edge method: towards reference data set.,"A sensor's spatial resolution has traditionally been a difficult concept to define, but all would agree that it is inextricably linked to the Ground Sampling Distance (GSD) and Instantaneous Field of View (IFOV) of an imaging sensor system. As a measure of the geospatial quality of imagery, the Modulation Transfer Function (MTF) of the system is often used along with the signal-to-noise ratio (SNR). However, their calculation is not fully standardized. Further, consistent measurements and comparisons are often hard to obtain. Therefore, in the Infrared and Visible Optical Sensors (IVOS) subgroup of the Working Group on Calibration Validation (WGCV) of the Committee for Earth Observation Satellites (CEOS), a team from various countries and professional entities who are involved in MTF measurement was established to address the issue of on-orbit MTF measurements and comparisons. As a first step, a blind comparison of MTF measurements based on the slanted edge approach has been undertaken. A set of both artificial and actual satellite edge images was developed and a first comparison of processing results was generated. In all, seven organizations contributed to the experiment and several significant results were generated in 2016. No single participant produced the best results for all test images as measured by either the closest to the mean result, or closest to the truth for the synthetic test images. In addition, close estimates of the MTF value at Nyquist did not ensure the accuracy of other MTF values at other spatial frequencies. Some algorithm results showed that the accuracy of their estimates depended upon the type of MTF curve that was being analyzed. After the initial analysis, participants were allowed to modify their methodology and reprocess the test images since, in several cases, the results contained errors. Results from the second iteration, in 2017, verified that the anomalies in the experiment's first iteration were due to errors in either coding or methodology, or both. One organization implemented a third trial to fix software errors. This emphasizes the importance of fully understanding both methodology and implementation, in order to ensure accurate and repeatable results. To extend this comparison study, a reference data set, which is composed of edge images and corresponding MTF curves, will be built. A broader audience will be able to access the edge images through the CEOS CalVal Portal (http://calvalportal.ceos.org/). This paper, which is associated with the reference data set, can serve as a new tool to either implement or check, or both, the MTF measurement that relies on the slanted edge method.",2018-12-01 +29761459,Mouse Genome Informatics (MGI) Is the International Resource for Information on the Laboratory Mouse.,"Mouse Genome Informatics (MGI, http://www.informatics.jax.org/ ) web resources provide free access to meticulously curated information about the laboratory mouse. MGI's primary goal is to help researchers investigate the genetic foundations of human diseases by translating information from mouse phenotypes and disease models studies to human systems. MGI provides comprehensive phenotypes for over 50,000 mutant alleles in mice and provides experimental model descriptions for over 1500 human diseases. Curated data from scientific publications are integrated with those from high-throughput phenotyping and gene expression centers. Data are standardized using defined, hierarchical vocabularies such as the Mammalian Phenotype (MP) Ontology, Mouse Developmental Anatomy and the Gene Ontologies (GO). This chapter introduces you to Gene and Allele Detail pages and provides step-by-step instructions for simple searches and those that take advantage of the breadth of MGI data integration.",2018-01-01 +31297411,"On-street parking availaibilty data in San Francisco, from stationary sensors and high-mileage probe vehicles.","This dataset contains records of the measured on-street parking availability in San Francisco, obtained from the public API of the SFpark project. In 2011, the San Francisco Municipal Transportation Agency (SFMTA) started a project on smart parking, called SFpark, whose goal was the improvement of on-street parking management in San Francisco, mostly by means of demand-responsive price adjustments [1]. One of the key points of the project was the collection of information about on-street parking availability. To this aim, about 8,000 parking spaces were equipped with specific sensors in the asphalt, periodically broadcasting availability information. The SFpark project made available a public REST API, returning the number of free parking spaces and total number of provided parking spaces per road segment, for 5,314 parking spaces on 579 road segments in the pilot area. We collected parking availability data from 2013/06/13 until 2013/07/24, by querying this API at approximately 5-min intervals. As a result, we obtained in total about 7 million observations of parking availability on the road segments. These observations represent the first dataset we are providing. In addition, we simulated the achievable sensing coverage of on-street parking availability that could be achieved by a fleet of taxis, if they were equipped with sensors able to detect free parking spaces, like side-scanning ultrasonic sensors [3], or windshield-mounted cameras [4]. In particular, by exploiting real taxi trajectories in San Francisco from the Cabspotting project [5], we first computed the frequencies of taxi visits for each road segment covered by the SFpark sensors. Then, we downsampled the first dataset, in order to have a parking availability information for a road segment at a given time only in presence of a transit of a taxi on that segment at that time. This step was replicated for 5 different sizes of taxi fleets, namely 100, 200, 300, 400, and 486. Consequently, in total six datasets are available for further research in the field of on-street parking dynamics. All these datasets can be downloaded at: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/YLWCSU.",2019-06-04 +29065900,GRcalculator: an online tool for calculating and mining dose-response data.,"BACKGROUND:Quantifying the response of cell lines to drugs or other perturbagens is the cornerstone of pre-clinical drug development and pharmacogenomics as well as a means to study factors that contribute to sensitivity and resistance. In dividing cells, traditional metrics derived from dose-response curves such as IC 50 , AUC, and E max , are confounded by the number of cell divisions taking place during the assay, which varies widely for biological and experimental reasons. Hafner et al. (Nat Meth 13:521-627, 2016) recently proposed an alternative way to quantify drug response, normalized growth rate (GR) inhibition, that is robust to such confounders. Adoption of the GR method is expected to improve the reproducibility of dose-response assays and the reliability of pharmacogenomic associations (Hafner et al. 500-502, 2017). RESULTS:We describe here an interactive website ( www.grcalculator.org ) for calculation, analysis, and visualization of dose-response data using the GR approach and for comparison of GR and traditional metrics. Data can be user-supplied or derived from published datasets. The web tools are implemented in the form of three integrated Shiny applications (grcalculator, grbrowser, and grtutorial) deployed through a Shiny server. Intuitive graphical user interfaces (GUIs) allow for interactive analysis and visualization of data. The Shiny applications make use of two R packages (shinyLi and GRmetrics) specifically developed for this purpose. The GRmetrics R package is also available via Bioconductor and can be used for offline data analysis and visualization. Source code for the Shiny applications and associated packages (shinyLi and GRmetrics) can be accessed at www.github.com/uc-bd2k/grcalculator and www.github.com/datarail/gr_metrics . CONCLUSIONS:GRcalculator is a powerful, user-friendly, and free tool to facilitate analysis of dose-response data. It generates publication-ready figures and provides a unified platform for investigators to analyze dose-response data across diverse cell types and perturbagens (including drugs, biological ligands, RNAi, etc.). GRcalculator also provides access to data collected by the NIH LINCS Program ( http://www.lincsproject.org /) and other public domain datasets. The GRmetrics Bioconductor package provides computationally trained users with a platform for offline analysis of dose-response data and facilitates inclusion of GR metrics calculations within existing R analysis pipelines. These tools are therefore well suited to users in academia as well as industry.",2017-10-24 +24909981,Montreal Archive of Sleep Studies: an open-access resource for instrument benchmarking and exploratory research.,"Manual processing of sleep recordings is extremely time-consuming. Efforts to automate this process have shown promising results, but automatic systems are generally evaluated on private databases, not allowing accurate cross-validation with other systems. In lacking a common benchmark, the relative performances of different systems are not compared easily and advances are compromised. To address this fundamental methodological impediment to sleep study, we propose an open-access database of polysomnographic biosignals. To build this database, whole-night recordings from 200 participants [97 males (aged 42.9 ± 19.8 years) and 103 females (aged 38.3 ± 18.9 years); age range: 18-76 years] were pooled from eight different research protocols performed in three different hospital-based sleep laboratories. All recordings feature a sampling frequency of 256 Hz and an electroencephalography (EEG) montage of 4-20 channels plus standard electro-oculography (EOG), electromyography (EMG), electrocardiography (ECG) and respiratory signals. Access to the database can be obtained through the Montreal Archive of Sleep Studies (MASS) website (http://www.ceams-carsm.ca/en/MASS), and requires only affiliation with a research institution and prior approval by the applicant's local ethical review board. Providing the research community with access to this free and open sleep database is expected to facilitate the development and cross-validation of sleep analysis automation systems. It is also expected that such a shared resource will be a catalyst for cross-centre collaborations on difficult topics such as improving inter-rater agreement on sleep stage scoring.",2014-06-09 +26519406,MaizeGDB: The Maize Genetics and Genomics Database.,"MaizeGDB is the community database for biological information about the crop plant Zea mays. Genomic, genetic, sequence, gene product, functional characterization, literature reference, and person/organization contact information are among the datatypes stored at MaizeGDB. At the project's website ( http://www.maizegdb.org ) are custom interfaces enabling researchers to browse data and to seek out specific information matching explicit search criteria. In addition, pre-compiled reports are made available for particular types of data and bulletin boards are provided to facilitate communication and coordination among members of the community of maize geneticists.",2016-01-01 +29155427,PDB-wide identification of biological assemblies from conserved quaternary structure geometry.,"Protein structures are key to understanding biomolecular mechanisms and diseases, yet their interpretation is hampered by limited knowledge of their biologically relevant quaternary structure (QS). A critical challenge in inferring QS information from crystallographic data is distinguishing biological interfaces from fortuitous crystal-packing contacts. Here, we tackled this problem by developing strategies for aligning and comparing QS states across both homologs and data repositories. QS conservation across homologs proved remarkably strong at predicting biological relevance and is implemented in two methods, QSalign and anti-QSalign, for annotating homo-oligomers and monomers, respectively. QS conservation across repositories is implemented in QSbio (http://www.QSbio.org), which approaches the accuracy of manual curation and allowed us to predict >100,000 QS states across the Protein Data Bank. Based on this high-quality data set, we analyzed pairs of structurally conserved interfaces, and this analysis revealed a striking plasticity whereby evolutionary distant interfaces maintain similar interaction geometries through widely divergent chemical properties.",2017-11-20 +29402227,CoVaCS: a consensus variant calling system.,"

Background

The advent and ongoing development of next generation sequencing technologies (NGS) has led to a rapid increase in the rate of human genome re-sequencing data, paving the way for personalized genomics and precision medicine. The body of genome resequencing data is progressively increasing underlining the need for accurate and time-effective bioinformatics systems for genotyping - a crucial prerequisite for identification of candidate causal mutations in diagnostic screens.

Results

Here we present CoVaCS, a fully automated, highly accurate system with a web based graphical interface for genotyping and variant annotation. Extensive tests on a gold standard benchmark data-set -the NA12878 Illumina platinum genome- confirm that call-sets based on our consensus strategy are completely in line with those attained by similar command line based approaches, and far more accurate than call-sets from any individual tool. Importantly our system exhibits better sensitivity and higher specificity than equivalent commercial software.

Conclusions

CoVaCS offers optimized pipelines integrating state of the art tools for variant calling and annotation for whole genome sequencing (WGS), whole-exome sequencing (WES) and target-gene sequencing (TGS) data. The system is currently hosted at Cineca, and offers the speed of a HPC computing facility, a crucial consideration when large numbers of samples must be analysed. Importantly, all the analyses are performed automatically allowing high reproducibility of the results. As such, we believe that CoVaCS can be a valuable tool for the analysis of human genome resequencing studies. CoVaCS is available at: https://bioinformatics.cineca.it/covacs .",2018-02-05 +27235557,The utility of QSARs in predicting acute fish toxicity of pesticide metabolites: A retrospective validation approach.,"The European Plant Protection Products Regulation 1107/2009 requires that registrants establish whether pesticide metabolites pose a risk to the environment. Fish acute toxicity assessments may be carried out to this end. Considering the total number of pesticide (re-) registrations, the number of metabolites can be considerable, and therefore this testing could use many vertebrates. EFSA's recent ""Guidance on tiered risk assessment for plant protection products for aquatic organisms in edge-of-field surface waters"" outlines opportunities to apply non-testing methods, such as Quantitative Structure Activity Relationship (QSAR) models. However, a scientific evidence base is necessary to support the use of QSARs in predicting acute fish toxicity of pesticide metabolites. Widespread application and subsequent regulatory acceptance of such an approach would reduce the numbers of animals used. The work presented here intends to provide this evidence base, by means of retrospective data analysis. Experimental fish LC50 values for 150 metabolites were extracted from the Pesticide Properties Database (http://sitem.herts.ac.uk/aeru/ppdb/en/atoz.htm). QSAR calculations were performed to predict fish acute toxicity values for these metabolites using the US EPA's ECOSAR software. The most conservative predicted LC50 values generated by ECOSAR were compared with experimental LC50 values. There was a significant correlation between predicted and experimental fish LC50 values (Spearman rs = 0.6304, p < 0.0001). For 62% of metabolites assessed, the QSAR predicted values are equal to or lower than their respective experimental values. Refined analysis, taking into account data quality and experimental variation considerations increases the proportion of sufficiently predictive estimates to 91%. For eight of the nine outliers, there are plausible explanation(s) for the disparity between measured and predicted LC50 values. Following detailed consideration of the robustness of this non-testing approach, it can be concluded there is a strong data driven rationale for the applicability of QSAR models in the metabolite assessment scheme recommended by EFSA. As such there is value in further refining this approach, to improve the method and enable its future incorporation into regulatory guidance and practice.",2016-05-25 +29042480,Prediction of Protein Complexes in Trypanosoma brucei by Protein Correlation Profiling Mass Spectrometry and Machine Learning.,"A disproportionate number of predicted proteins from the genome sequence of the protozoan parasite Trypanosoma brucei, an important human and animal pathogen, are hypothetical proteins of unknown function. This paper describes a protein correlation profiling mass spectrometry approach, using two size exclusion and one ion exchange chromatography systems, to derive sets of predicted protein complexes in this organism by hierarchical clustering and machine learning methods. These hypothesis-generating proteomic data are provided in an open access online data visualization environment (http://134.36.66.166:8083/complex_explorer). The data can be searched conveniently via a user friendly, custom graphical interface. We provide examples of both potential new subunits of known protein complexes and of novel trypanosome complexes of suggested function, contributing to improving the functional annotation of the trypanosome proteome. Data are available via ProteomeXchange with identifier PXD005968.",2017-10-17 +24174544,PlantTFDB 3.0: a portal for the functional and evolutionary study of plant transcription factors.,"With the aim to provide a resource for functional and evolutionary study of plant transcription factors (TFs), we updated the plant TF database PlantTFDB to version 3.0 (http://planttfdb.cbi.pku.edu.cn). After refining the TF classification pipeline, we systematically identified 129 288 TFs from 83 species, of which 67 species have genome sequences, covering main lineages of green plants. Besides the abundant annotation provided in the previous version, we generated more annotations for identified TFs, including expression, regulation, interaction, conserved elements, phenotype information, expert-curated descriptions derived from UniProt, TAIR and NCBI GeneRIF, as well as references to provide clues for functional studies of TFs. To help identify evolutionary relationship among identified TFs, we assigned 69 450 TFs into 3924 orthologous groups, and constructed 9217 phylogenetic trees for TFs within the same families or same orthologous groups, respectively. In addition, we set up a TF prediction server in this version for users to identify TFs from their own sequences.",2013-10-29 +30143029,HiGlass: web-based visual exploration and analysis of genome interaction maps.,"We present HiGlass, an open source visualization tool built on web technologies that provides a rich interface for rapid, multiplex, and multiscale navigation of 2D genomic maps alongside 1D genomic tracks, allowing users to combine various data types, synchronize multiple visualization modalities, and share fully customizable views with others. We demonstrate its utility in exploring different experimental conditions, comparing the results of analyses, and creating interactive snapshots to share with collaborators and the broader public. HiGlass is accessible online at http://higlass.io and is also available as a containerized application that can be run on any platform.",2018-08-24 +24637013,PolyTB: a genomic variation map for Mycobacterium tuberculosis.,"Tuberculosis (TB) caused by Mycobacterium tuberculosis (Mtb) is the second major cause of death from an infectious disease worldwide. Recent advances in DNA sequencing are leading to the ability to generate whole genome information in clinical isolates of M. tuberculosis complex (MTBC). The identification of informative genetic variants such as phylogenetic markers and those associated with drug resistance or virulence will help barcode Mtb in the context of epidemiological, diagnostic and clinical studies. Mtb genomic datasets are increasingly available as raw sequences, which are potentially difficult and computer intensive to process, and compare across studies. Here we have processed the raw sequence data (>1500 isolates, eight studies) to compile a catalogue of SNPs (n = 74,039, 63% non-synonymous, 51.1% in more than one isolate, i.e. non-private), small indels (n = 4810) and larger structural variants (n = 800). We have developed the PolyTB web-based tool (http://pathogenseq.lshtm.ac.uk/polytb) to visualise the resulting variation and important meta-data (e.g. in silico inferred strain-types, location) within geographical map and phylogenetic views. This resource will allow researchers to identify polymorphisms within candidate genes of interest, as well as examine the genomic diversity and distribution of strains. PolyTB source code is freely available to researchers wishing to develop similar tools for their pathogen of interest.",2014-02-15 +26607492,RNA-Enrich: a cut-off free functional enrichment testing method for RNA-seq with improved detection power.,"

Unlabelled

Tests for differential gene expression with RNA-seq data have a tendency to identify certain types of transcripts as significant, e.g. longer and highly-expressed transcripts. This tendency has been shown to bias gene set enrichment (GSE) testing, which is used to find over- or under-represented biological functions in the data. Yet, there remains a surprising lack of tools for GSE testing specific for RNA-seq. We present a new GSE method for RNA-seq data, RNA-Enrich, that accounts for the above tendency empirically by adjusting for average read count per gene. RNA-Enrich is a quick, flexible method and web-based tool, with 16 available gene annotation databases. It does not require a P-value cut-off to define differential expression, and works well even with small sample-sized experiments. We show that adjusting for read counts per gene improves both the type I error rate and detection power of the test.

Availability and implementation

RNA-Enrich is available at http://lrpath.ncibi.org or from supplemental material as R code.

Contact

sartorma@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-25 +27158917,"Interactive Exploration, Analysis, and Visualization of Complex Phenome-Genome Datasets with ASPIREdb.","Identifying variants causal for complex genetic disorders is challenging. With the advent of whole-exome and whole-genome sequencing, computational tools are needed to explore and analyze the list of variants for further validation. Correlating genetic variants with subject phenotype is crucial for the interpretation of the disease-causing mutations. Often such work is done by teams of researchers who need to share information and coordinate activities. To this end, we have developed a powerful, easy to use Web application, ASPIREdb, which allows researchers to search, organize, analyze, and visualize variants and phenotypes associated with a set of human subjects. Investigators can annotate variants using publicly available reference databases and build powerful queries to identify subjects or variants of interest. Functional information and phenotypic associations of these genes are made accessible as well. Burden analysis and additional reporting tools allow investigation of variant properties and phenotype characteristics. Projects can be shared, allowing researchers to work collaboratively to build queries and annotate the data. We demonstrate ASPIREdb's functionality using publicly available data sets, showing how the software can be used to accomplish goals that might otherwise require specialized bioinformatics expertise. ASPIREdb is available at http://aspiredb.chibi.ubc.ca.",2016-05-20 +30138346,Identification of biomarkers for Barcelona Clinic Liver Cancer staging and overall survival of patients with hepatocellular carcinoma.,"The aim of the current study was to identify biomarkers that correlate with the Barcelona Clinic Liver Cancer (BCLC) staging system and prognosis of patients with hepatocellular carcinoma (HCC). We downloaded 4 gene expression datasets from the Gene Expression Omnibus database (http://www.ncbi.nlm.nih.gov/geo), and screened for genes that were differentially expressed between HCC and normal liver tissues, using significance analysis of the microarray algorithm. We used a weighted gene co-expression network analysis (WGCNA) to identify hub genes that correlate with BCLC staging, functional enrichment analysis to associate hub genes with their functions, protein-protein interaction network analysis to identify interactions among hub genes, UALCAN analysis to assess gene expression levels based on tumour stage, and survival analyses to clarify the effects of hub genes on patients' overall survival (OS). We identified 50 relevant hub genes using WGCNA; among them, 13 genes (including TIGD5, C8ORF33, NUDCD1, INSB8, and STIP1) correlated with OS and BCLC staging. Significantly enriched gene ontology biological process terms included RNA processing, non-coding RNA processing and phosphodiester bond hydrolysis, and 6 genes were found to interact with 10 or more hub genes. We identified several candidate biomarkers that correlate with BCLC staging and OS of HCC. These genes might be used for prognostic assessment and selection of HCC patients for surgery, especially those with intermediate or advanced disease.",2018-08-23 +31112286,Urinary tract infection prevention after midurethral slings in pelvic floor reconstructive surgery: A systematic review and meta-analysis.,"INTRODUCTION:Synthetic midurethral slings are the most common procedures currently performed for stress urinary incontinence in women. Infection is a frequent complication of urogynecologic surgery. We performed a systematic review and meta-analysis to identify interventions that successfully prevent infections, including urinary tract infection (UTI) and/or bacteriuria, compared with no intervention, in women undergoing midurethral sling surgery with or without concomitant pelvic reconstructive procedures for prolapse. MATERIAL AND METHODS:The primary outcome was the development of any infection post-midurethral sling placement in women. MEDLINE, Embase, CINAHL and the Cochrane Library were searched for comparative studies from inception to July 2017, with no language restrictions. We used search terms related to midurethral sling, infections and infection-reduction interventions. Two independent reviewers abstracted data and assessed study quality. Pooled effect size estimates were calculated. We conducted meta-analysis of eligible studies. A protocol for this review has been registered and can be accessed online (http://hdl.handle.net/2429/64731). RESULTS:We identified seven eligible studies of infection risk-reducing interventions; all focused on UTIs. Only one study assessed preoperative antibiotics with midurethral sling alone and was halted early because of low UTI rates. All other studies (three randomized control trials and three observational studies) examined whether postoperative antibiotics decrease UTI/bacteriuria rates after midurethral sling with or without reconstructive procedures for pelvic organ prolapse and using bladder catheterization postoperatively. Due to considerable clinical heterogeneity, we only combined four studies for meta-analysis. Postoperative oral prophylactic nitrofurantoin showed no significant benefit in reducing UTI/bacteriuria in women post-midurethral sling with or without concomitant reconstructive pelvic surgery and the need for bladder catheterization, when compared with the reference group (pooled relative risk 0.73, 95% confidence interval [CI] 0.42-1.25). CONCLUSIONS:Based on the best available evidence, postoperative oral nitrofurantoin is not effective at reducing UTI/bacteriuria rates in catheterized women after midurethral sling with or without concomitant pelvic reconstructive surgery for prolapse. For midurethral sling alone, preoperative antibiotic prophylaxis may not be needed for UTI prevention.",2019-06-20 +21609965,Analysis and visualization of Arabidopsis thaliana GWAS using web 2.0 technologies.,"With large-scale genomic data becoming the norm in biological studies, the storing, integrating, viewing and searching of such data have become a major challenge. In this article, we describe the development of an Arabidopsis thaliana database that hosts the geographic information and genetic polymorphism data for over 6000 accessions and genome-wide association study (GWAS) results for 107 phenotypes representing the largest collection of Arabidopsis polymorphism data and GWAS results to date. Taking advantage of a series of the latest web 2.0 technologies, such as Ajax (Asynchronous JavaScript and XML), GWT (Google-Web-Toolkit), MVC (Model-View-Controller) web framework and Object Relationship Mapper, we have created a web-based application (web app) for the database, that offers an integrated and dynamic view of geographic information, genetic polymorphism and GWAS results. Essential search functionalities are incorporated into the web app to aid reverse genetics research. The database and its web app have proven to be a valuable resource to the Arabidopsis community. The whole framework serves as an example of how biological data, especially GWAS, can be presented and accessed through the web. In the end, we illustrate the potential to gain new insights through the web app by two examples, showcasing how it can be used to facilitate forward and reverse genetics research. Database URL: http://arabidopsis.usc.edu/",2011-05-23 +22697250,Normal and compound poisson approximations for pattern occurrences in NGS reads.,"Next generation sequencing (NGS) technologies are now widely used in many biological studies. In NGS, sequence reads are randomly sampled from the genome sequence of interest. Most computational approaches for NGS data first map the reads to the genome and then analyze the data based on the mapped reads. Since many organisms have unknown genome sequences and many reads cannot be uniquely mapped to the genomes even if the genome sequences are known, alternative analytical methods are needed for the study of NGS data. Here we suggest using word patterns to analyze NGS data. Word pattern counting (the study of the probabilistic distribution of the number of occurrences of word patterns in one or multiple long sequences) has played an important role in molecular sequence analysis. However, no studies are available on the distribution of the number of occurrences of word patterns in NGS reads. In this article, we build probabilistic models for the background sequence and the sampling process of the sequence reads from the genome. Based on the models, we provide normal and compound Poisson approximations for the number of occurrences of word patterns from the sequence reads, with bounds on the approximation error. The main challenge is to consider the randomness in generating the long background sequence, as well as in the sampling of the reads using NGS. We show the accuracy of these approximations under a variety of conditions for different patterns with various characteristics. Under realistic assumptions, the compound Poisson approximation seems to outperform the normal approximation in most situations. These approximate distributions can be used to evaluate the statistical significance of the occurrence of patterns from NGS data. The theory and the computational algorithm for calculating the approximate distributions are then used to analyze ChIP-Seq data using transcription factor GABP. Software is available online (www-rcf.usc.edu/∼fsun/Programs/NGS_motif_power/NGS_motif_power.html). In addition, Supplementary Material can be found online (www.liebertonline.com/cmb).",2012-06-01 +32391235,DOVE: An Infectious Disease Outbreak Statistics Visualization System.,"Humans are susceptible to various infectious diseases. However, humanity still has limited responses to emergent and recurrent infectious diseases. Recent developments in medical technology have led to various vaccines being developed, but these vaccines typically require a considerable amount of time to counter infectious diseases. Therefore, one of the best methods to prevent infectious diseases is to continuously update our knowledge with useful information from infectious disease information systems and taking active steps to safeguard ourselves against infectious diseases. Some existing infectious disease information systems simply present infectious disease information in the form of text or transmit it via e-mail. Other systems provide data in the form of files or maps. Most existing systems display text-centric information regarding infectious disease outbreaks. Therefore, understanding infectious disease outbreak information at a glance is difficult for users. In this paper, we propose the infectious disease outbreak statistics visualization system, called to DOVE, which collects infectious disease outbreak statistics from the Korea Centers for Disease Control & Prevention and provides statistical charts with district, time, infectious disease, gender, and age data. Users can easily identify infectious disease outbreak statistics at a glance by simply entering the district, time, and name of an infectious disease into our system. Additionally, each statistical chart allows users to recognize the characteristics of an infectious disease and predict outbreaks by investigating the outbreak trends of that disease. We believe that our system provides effective information to help prevent infectious disease outbreaks. Our system is currently available on the web at http://www.epidemic.co.kr/statistics.",2018-08-24 +31160012,ONCOhabitats: A system for glioblastoma heterogeneity assessment through MRI.,"

Background

Neuroimaging analysis is currently crucial for an early assessment of glioblastoma, to help improving treatment and tumor follow-up. To this end, multiple functional and morphological MRI sequences are usually employed, requiring the development of automated tools capable to extract the relevant information from these sources. In this work we present ONCOhabitats (https://www.oncohabitats.upv.es): an online open access system for glioblastoma analysis based on MRI data.

Methods

ONCOhabitats provides two main services for untreated glioblastomas: (1) malignant tissue segmentation, and (2) vascular heterogeneity assessment of the tumor. The segmentation service implements a deep patch-wise 3D Convolutional Neural Network with residual connections. The vascular heterogeneity assessment service implements the Hemodynamic Tissue Signature (HTS) method patented in P201431289, which aims to identify habitats within the tumor with early prognostic capabilities.

Results

The segmentation service was validated against the BRATS 2017 reference dataset, showing comparable results with current state-of-the-art methods (whole tumor Dice segmentation: 0.89). The vascular heterogeneity assessment service was validated in a retrospective cohort of 50 patients, in a study focused on predicting patient overall survival based on the HTS habitats. Cox proportional hazard regression analysis and Kaplan-Meier survival study showed significant positive correlations (p-value <.05) between the HTS habitats and patient overall survival. ONCOhabitats system also generates radiological reports for each service, including volumetries and perfusion measurements of the different regions of the lesion.

Conclusion

ONCOhabitats system provides open-access services for glioblastoma heterogeneity assessment, implementing consolidated state-of-the-art techniques for medical image analysis. Additionally, we also give access to the scientific community to our computational resources, offering a computational capacity of about 300 cases per day.",2019-05-16 +22120661,SalmonDB: a bioinformatics resource for Salmo salar and Oncorhynchus mykiss.,"SalmonDB is a new multiorganism database containing EST sequences from Salmo salar, Oncorhynchus mykiss and the whole genome sequence of Danio rerio, Gasterosteus aculeatus, Tetraodon nigroviridis, Oryzias latipes and Takifugu rubripes, built with core components from GMOD project, GOPArc system and the BioMart project. The information provided by this resource includes Gene Ontology terms, metabolic pathways, SNP prediction, CDS prediction, orthologs prediction, several precalculated BLAST searches and domains. It also provides a BLAST server for matching user-provided sequences to any of the databases and an advanced query tool (BioMart) that allows easy browsing of EST databases with user-defined criteria. These tools make SalmonDB database a valuable resource for researchers searching for transcripts and genomic information regarding S. salar and other salmonid species. The database is expected to grow in the near feature, particularly with the S. salar genome sequencing project. Database URL: http://genomicasalmones.dim.uchile.cl/",2011-11-26 +28425058,Finding the traces of behavioral and cognitive processes in big data and naturally occurring datasets.,"Today, people generate and store more data than ever before as they interact with both real and virtual environments. These digital traces of behavior and cognition offer cognitive scientists and psychologists an unprecedented opportunity to test theories outside the laboratory. Despite general excitement about big data and naturally occurring datasets among researchers, three ""gaps"" stand in the way of their wider adoption in theory-driven research: the imagination gap, the skills gap, and the culture gap. We outline an approach to bridging these three gaps while respecting our responsibilities to the public as participants in and consumers of the resulting research. To that end, we introduce Data on the Mind ( http://www.dataonthemind.org ), a community-focused initiative aimed at meeting the unprecedented challenges and opportunities of theory-driven research with big data and naturally occurring datasets. We argue that big data and naturally occurring datasets are most powerfully used to supplement-not supplant-traditional experimental paradigms in order to understand human behavior and cognition, and we highlight emerging ethical issues related to the collection, sharing, and use of these powerful datasets.",2017-10-01 +31026256,Genus-wide Leptospira core genome multilocus sequence typing for strain taxonomy and global surveillance.,"Leptospira is a highly heterogeneous bacterial genus that can be divided into three evolutionary lineages and >300 serovars. The causative agents of leptospirosis are responsible of an emerging zoonotic disease worldwide. To advance our understanding of the biodiversity of Leptospira strains at the global level, we evaluated the performance of whole-genome sequencing (WGS) as a genus-wide strain classification and genotyping tool. Herein we propose a set of 545 highly conserved loci as a core genome MLST (cgMLST) genotyping scheme applicable to the entire Leptospira genus, including non-pathogenic species. Evaluation of cgMLST genotyping was undertaken with 509 genomes, including 327 newly sequenced genomes, from diverse species, sources and geographical locations. Phylogenetic analysis showed that cgMLST defines species, clades, subclades, clonal groups and cgMLST sequence types (cgST), with high precision and robustness to missing data. Novel Leptospira species, including a novel subclade named S2 (saprophytes 2), were identified. We defined clonal groups (CG) optimally using a single-linkage clustering threshold of 40 allelic mismatches. While some CGs such as L. interrogans CG6 (serogroup Icterohaemorrhagiae) are globally distributed, others are geographically restricted. cgMLST was congruent with classical MLST schemes, but had greatly improved resolution and broader applicability. Single nucleotide polymorphisms within single cgST groups was limited to <30 SNPs, underlining a potential role for cgMLST in epidemiological surveillance. Finally, cgMLST allowed identification of serogroups and closely related serovars. In conclusion, the proposed cgMLST strategy allows high-resolution genotyping of Leptospira isolates across the phylogenetic breadth of the genus. The unified genomic taxonomy of Leptospira strains, available publicly at http://bigsdb.pasteur.fr/leptospira, will facilitate global harmonization of Leptospira genotyping, strain emergence follow-up and novel collaborative studies of the epidemiology and evolution of this emerging pathogen.",2019-04-26 +29718447,Posterior Summarization in Bayesian Phylogenetics Using Tracer 1.7.,"Bayesian inference of phylogeny using Markov chain Monte Carlo (MCMC) plays a central role in understanding evolutionary history from molecular sequence data. Visualizing and analyzing the MCMC-generated samples from the posterior distribution is a key step in any non-trivial Bayesian inference. We present the software package Tracer (version 1.7) for visualizing and analyzing the MCMC trace files generated through Bayesian phylogenetic inference. Tracer provides kernel density estimation, multivariate visualization, demographic trajectory reconstruction, conditional posterior distribution summary, and more. Tracer is open-source and available at http://beast.community/tracer.",2018-09-01 +28025342,RiceATM: a platform for identifying the association between rice agronomic traits and miRNA expression. ,"MicroRNAs (miRNAs) are known to play critical roles in plant development and stress-response regulation, and they frequently display multi-targeting characteristics. The control of defined rice phenotypes occurs through multiple genes; however, evidence demonstrating the relationship between agronomic traits and miRNA expression profiles is lacking. In this study, we investigated eight yield-related traits in 187 local rice cultivars and profiled the expression levels of 193 miRNAs in these cultivars using microarray analyses. By integrating the miRBase database, the rice annotation project database, and the miRanda and psRNATarget web servers, we constructed a database (RiceATM) that can be employed to investigate the association between rice agronomic traits and miRNA expression. The functions of this platform include phenotype selection, sample grouping, microarray data pretreatment, statistical analysis and target gene predictions. To demonstrate the utility of RiceATM, we used the database to identify four miRNAs associated with the heading date and validated their expression trends in the cultivars with early or late heading date by real-time PCR. RiceATM is a useful tool for researchers seeking to characterize the role of certain miRNAs for a specific phenotype and discover potential biomarkers for breeding or functional studies.Database URL: http://syslab3.nchu.edu.tw/rice/.",2016-12-26 +22543366,DAVID-WS: a stateful web service to facilitate gene/protein list analysis.,"

Summary

The database for annotation, visualization and integrated discovery (DAVID), which can be freely accessed at http://david.abcc.ncifcrf.gov/, is a web-based online bioinformatics resource that aims to provide tools for the functional interpretation of large lists of genes/proteins. It has been used by researchers from more than 5000 institutes worldwide, with a daily submission rate of ∼1200 gene lists from ∼400 unique researchers, and has been cited by more than 6000 scientific publications. However, the current web interface does not support programmatic access to DAVID, and the uniform resource locator (URL)-based application programming interface (API) has a limit on URL size and is stateless in nature as it uses URL request and response messages to communicate with the server, without keeping any state-related details. DAVID-WS (web service) has been developed to automate user tasks by providing stateful web services to access DAVID programmatically without the need for human interactions.

Availability

The web service and sample clients (written in Java, Perl, Python and Matlab) are made freely available under the DAVID License at http://david.abcc.ncifcrf.gov/content.jsp?file=WS.html.",2012-04-27 +29516096,Development and Validation of a Prediction Model for Pain and Functional Outcomes After Lumbar Spine Surgery.,"

Importance

Functional impairment and pain are common indications for the initiation of lumbar spine surgery, but information about expected improvement in these patient-reported outcome (PRO) domains is not readily available to most patients and clinicians considering this type of surgery.

Objective

To assess population-level PRO response after lumbar spine surgery, and develop/validate a prediction tool for PRO improvement.

Design, setting, and participants

This statewide multicenter cohort was based at 15 Washington state hospitals representing approximately 75% of the state's spine fusion procedures. The Spine Surgical Care and Outcomes Assessment Program and the survey center at the Comparative Effectiveness Translational Network prospectively collected clinical and PRO data from adult candidates for lumbar surgery, preoperatively and postoperatively, between 2012 and 2016. Prediction models were derived for PRO improvement 1 year after lumbar fusion surgeries on a random sample of 85% of the data and were validated in the remaining 15%. Surgical candidates from 2012 through 2015 were included; follow-up surveying continued until December 31, 2016, and data analysis was completed from July 2016 to April 2017.

Main outcomes and measures

Functional improvement, defined as a reduction in Oswestry Disability Index score of 15 points or more; and back pain and leg pain improvement, defined a reduction in Numeric Rating Scale score of 2 points or more.

Results

A total of 1965 adult lumbar surgical candidates (mean [SD] age, 61.3 [12.5] years; 944 [59.6%] female) completed baseline surveys before surgery and at least 1 postoperative follow-up survey within 3 years. Of these, 1583 (80.6%) underwent elective lumbar fusion procedures; 1223 (77.3%) had stenosis, and 1033 (65.3%) had spondylolisthesis. Twelve-month follow-up participation rates for each outcome were between 66% and 70%. Improvements were reported in function, back pain, and leg pain at 12 months by 306 of 528 surgical patients (58.0%), 616 of 899 patients (68.5%), and 355 of 464 patients (76.5%), respectively, whose baseline scores indicated moderate to severe symptoms. Among nonoperative patients, 35 (43.8%), 47 (53.4%), and 53 (63.9%) reported improvements in function, back pain, and leg pain, respectively. Demographic and clinical characteristics included in the final prediction models were age, sex, race, insurance status, American Society of Anesthesiologists score, smoking status, diagnoses, prior surgery, prescription opioid use, asthma, and baseline PRO scores. The models had good predictive performance in the validation cohort (concordance statistic, 0.66-0.79) and were incorporated into a patient-facing, web-based interactive tool (https://becertain.shinyapps.io/lumbar_fusion_calculator).

Conclusions and relevance

The PRO response prediction tool, informed by population-level data, explained most of the variability in pain reduction and functional improvement after surgery. Giving patients accurate information about their likelihood of outcomes may be a helpful component in surgery decision making.",2018-07-01 +31517182,Further Tests of the Utility of Integrated Speed-Accuracy Measures in Task Switching.,"Speed and accuracy of performance are central to many theoretical accounts of cognitive processing. In recent years, several integrated performance measures have been proposed. A comparative study of the available measures [Vandierendonck, A. (2017). A comparison of methods to combine speed and accuracy measures of performance: A rejoinder on the binning procedure. Behavior Research Methods, 49, 653-673. DOI: https://doi.org/10.3758/s13428-016-0721-5] concluded that three of the measures, namely inverse efficiency score, rate correct score, and linear integrated speed-accuracy score achieved a balanced integration of speed and accuracy. As a follow-up on that study, these three measures were examined in data analyses from 13 (published and unpublished) experiments in the context of task switching. The correlations of the effect sizes in these integrated scores with the effect sizes obtained in latency and accuracy were high, but varied across the three integrated measures. The efficiency to detect effects supported by the speed and accuracy data was examined by means of signal detection analyses. The three measures efficiently detected effects present in either speed or accuracy, but the rate correct score was less efficient than the other two measures and it signalled a larger number of strong effects unsupported by the speed and accuracy data. It is concluded that while the rate correct score is better avoided, and the usage of the inverse efficiency score should be restricted to data with low overall error rates, the linear integrated speed-accuracy score proves to be valid.",2018-01-12 +28729308,The effect of combined resistance exercise training and vitamin D3 supplementation on musculoskeletal health and function in older adults: a systematic review and meta-analysis.,"

Objectives

In older adults, there is a blunted responsiveness to resistance training and reduced muscle hypertrophy compared with younger adults. There is evidence that both exercise training and vitamin D supplementation may benefit musculoskeletal health in older adults, and it is plausible that in combination their effects may be additive. The aim of this systematic review was to evaluate the effectiveness of combined resistance exercise training and vitamin D3 supplementation on musculoskeletal health in older adults.

Data sources

A comprehensive search of electronic databases, including Science Direct, Medline, PubMed, Google Scholar and Cochrane Central Register of Controlled Trials (Cochrane CENTRAL accessed by Wiley Science) was conducted. Eligible studies were randomised controlled trials including men and women (aged ≥65 years or mean age ≥65 years); enlisting resistance exercise training and vitamin D3 supplementation; including outcomes of muscle strength, function, muscle power, body composition, serum vitamin D/calcium status or quality of life comparing results with a control group. The review was informed by a preregistered protocol (http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42015020157).

Results

Seven studies including a total of 792 participants were identified. Studies were categorised into two groups; group 1 compared vitamin D3 supplementation and exercise training versus exercise alone (describing the additive effect of vitamin D3 supplementation when combined with resistance exercise training) and group 2 compared vitamin D3 supplementation and exercise training versus vitamin D3 supplementation alone (describing the additive effect of resistance exercise training when combined with vitamin D3 supplementation).Meta-analyses for group 1 found muscle strength of the lower limb to be significantly improved within the intervention group (0.98, 95% CI 0.73 to 1.24, p<0.001); all other outcomes showed small but non-significant positive effects for the intervention group. The short physical performance battery (SPPB), timed up and go (TUG), muscle strength of the lower limb and femoral neck bone mineral density showed significantly greater improvements in the intervention group for group 2 comparisons.

Conclusions

This review provides tentative support for the additive effect of resistance exercise and vitamin D3 supplementation for the improvement of muscle strength in older adults. For other functional variables, such as SPPB and TUG, no additional benefit beyond exercise was shown. Further evidence is required to draw firm conclusions or make explicit recommendations regarding combined exercise and vitamin D3 supplementation.",2017-07-20 +29701747,"Cluster Locator, online analysis and visualization of gene clustering.","

Summary

Genes sharing functions, expression patterns or quantitative traits are not randomly distributed along eukaryotic genomes. In order to study the distribution of genes that share a given feature, we present Cluster Locator, an online analysis and visualization tool. Cluster Locator determines the number, size and position of all the clusters formed by the protein-coding genes on a list according to a given maximum gap, the percentage of gene clustering of the list and its statistical significance. The output includes a visual representation of the distribution of genes and gene clusters along the reference genome.

Availability and implementation

Cluster Locator is freely available at http://clusterlocator.bnd.edu.uy/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-10-01 +23736533,EcoGene-RefSeq: EcoGene tools applied to the RefSeq prokaryotic genomes.,"

Summary

EcoGene.org is a genome database and website dedicated to Escherichia coli K-12 substrain MG1655 that is revised daily using information derived from the biomedical literature and in-house analysis. EcoGene is a major source of annotation updates for the MG1655 Genbank record, one of only a few Genbank genome records that are updated by a community effort. The Reference Sequence (RefSeq) database, built by The National Center for Biotechnology Information, comprises a set of duplicate Genbank genome records that can be modified by the NCBI staff annotators. EcoGene-RefSeq is being developed as a stand-alone internet resource to facilitate the usage of EcoGene-based tools on any of the >2400 completed prokaryotic genome records that are currently available at the RefSeq database.

Availability

The web interface of EcoGene-RefSeq is available at http://www.ecogene.org/refseq.

Contact

krudd@med.miami.edu or j.zhou1@miami.edu.",2013-06-04 +29140475,The European Nucleotide Archive in 2017.,"For 35 years the European Nucleotide Archive (ENA; https://www.ebi.ac.uk/ena) has been responsible for making the world's public sequencing data available to the scientific community. Advances in sequencing technology have driven exponential growth in the volume of data to be processed and stored and a substantial broadening of the user community. Here, we outline ENA services and content in 2017 and provide insight into a selection of current key areas of development in ENA driven by challenges arising from the above growth.",2018-01-01 +30128804,"Ambit-SMIRKS: a software module for reaction representation, reaction search and structure transformation.","Ambit-SMIRKS is an open source software, enabling structure transformation via the SMIRKS language and implemented as an extension of Ambit-SMARTS. As part of the Ambit project it builds on top of The Chemistry Development Kit (The CDK). Ambit-SMIRKS provides the following functionalities: parsing of SMIRKS linear notations into internal reaction (transformation) representations based on The CDK objects, application of the stored reactions against target (reactant) molecules for actual transformation of the target chemical objects, reaction searching, stereo information handling, product post-processing, etc. The transformations can be applied on various sites of the reactant molecule in several modes: single, non-overlapping, non-identical, non-homomorphic or externally specified list of sites utilizing efficient substructure searching algorithm. Ambit-SMIRKS handles the molecules stereo information and supports basic chemical stereo elements implemented in The CDK library. The full SMARTS logical expressions syntax for reactions specification is supported, including recursive SMARTS expressions as well as additional syntax extensions. Since its initial development for the purpose of metabolite generation within Toxtree, the Ambit-SMIRKS module was used in various chemoinformatics projects, both developed by the authors of the package and by external teams. We show several use cases of the Ambit-SMIRKS software including standardization of large chemical databases and pathway transformation database and prediction. Ambit-SMIRKS is distributed as a Java library under LGPL license. More information on use cases and applications, including download links is available at http://ambit.sourceforge.net/smirks .",2018-08-20 +31356093,"Tabari Cohort Profile and Preliminary Results in Urban Areas and Mountainous Regions of Mazandaran, Iran.","

Background

The Tabari cohort study (TCS), part of the Prospective Epidemiological Research Studies in IrAN (PERSIAN), is a large longitudinal prospective cohort designed to better understand the risk factors associated with major non-communicable diseases (NCDs) across two urban and mountainous regions in north of Iran.

Methods

The enrollment phase of TCS started in June 2015 and ended in November 2017. During this phase, individuals aged 35-70 years from urban and mountainous regions of Sari township (Mazandaran province) were invited to the cohort center by health volunteers (urban regions) and Behvarz (mountainous areas) using census information. Data was collected based on the PERSIAN cohort study protocols. Hypertension was defind as systolic blood pressure ≥140 mm Hg or a diastolic blood pressure ≥90 mm Hg or history of diagnosis with hypertension or taking antihypertensive medications among participants free from cardiovascular diseases. Diabetes was defined as fasting blood sugar ≥126 mg/dL or a history of diagnosis or taking glucoselowering medications among all participants.

Results

A total of 10,255 participants were enrolled in TCS, 59.5% of whom were female. Among the total population, 7,012 participants were urban residents (68.4%). The prevalence of daily smoking in the total population was 9.1%. Body mass index in 75.9% of participants was ≥25 kg/m2. The prevalence of hypertension, diabetes, and thyroid disorders were 22.2%, 17.2%, and 10.5%, respectively.

Conclusion

The Tabari cohort is different from other cohorts in terms of levels of risk factors associated with NCDs. This study has certain important strengths including its population-based design and large sample size that provides a valid platform for conducting future investigations and trials. A biobank that has been designed to store blood, nail, hair and urine samples for future research is another strength of this study. Researchers who are interested in using the information can refer to the following web page: http://persiancohort.com.",2019-06-01 +30356876,A Web Resource for Designing Subunit Vaccine Against Major Pathogenic Species of Bacteria.,"Evolution has led to the expansion of survival strategies in pathogens including bacteria and emergence of drug resistant strains proved to be a major global threat. Vaccination is a promising strategy to protect human population. Reverse vaccinology is a more robust vaccine development approach especially with the availability of large-scale sequencing data and rapidly dropping cost of the techniques for acquiring such data from various organisms. The present study implements an immunoinformatic approach for screening the possible antigenic proteins among various pathogenic bacteria to systemically arrive at epitope-based vaccine candidates against 14 pathogenic bacteria. Thousand four hundred and fifty nine virulence factors and Five hundred and forty six products of essential genes were appraised as target proteins to predict potential epitopes with potential to stimulate different arms of the immune system. To address the self-tolerance, self-epitopes were identified by mapping on 1000 human proteome and were removed. Our analysis revealed that 21proteins from 5 bacterial species were found as virulent as well as essential to their survival, proved to be most suitable vaccine target against these species. In addition to the prediction of MHC-II binders, B cell and T cell epitopes as well as adjuvants individually from proteins of all 14 bacterial species, a stringent criteria lead us to identify 252 unique epitopes, which are predicted to be T-cell epitopes, B-cell epitopes, MHC II binders and Vaccine Adjuvants. In order to provide service to scientific community, we developed a web server VacTarBac for designing of vaccines against above species of bacteria. This platform integrates a number of tools that includes visualization tools to present antigenicity/epitopes density on an antigenic sequence. These tools will help users to identify most promiscuous vaccine candidates in a pathogenic antigen. This server VacTarBac is available from URL (http://webs.iiitd.edu.in/raghava/vactarbac/).",2018-10-02 +27209279,DNetDB: The human disease network database based on dysfunctional regulation mechanism.,"Disease similarity study provides new insights into disease taxonomy, pathogenesis, which plays a guiding role in diagnosis and treatment. The early studies were limited to estimate disease similarities based on clinical manifestations, disease-related genes, medical vocabulary concepts or registry data, which were inevitably biased to well-studied diseases and offered small chance of discovering novel findings in disease relationships. In other words, genome-scale expression data give us another angle to address this problem since simultaneous measurement of the expression of thousands of genes allows for the exploration of gene transcriptional regulation, which is believed to be crucial to biological functions. Although differential expression analysis based methods have the potential to explore new disease relationships, it is difficult to unravel the upstream dysregulation mechanisms of diseases. We therefore estimated disease similarities based on gene expression data by using differential coexpression analysis, a recently emerging method, which has been proved to be more potential to capture dysfunctional regulation mechanisms than differential expression analysis. A total of 1,326 disease relationships among 108 diseases were identified, and the relevant information constituted the human disease network database (DNetDB). Benefiting from the use of differential coexpression analysis, the potential common dysfunctional regulation mechanisms shared by disease pairs (i.e. disease relationships) were extracted and presented. Statistical indicators, common disease-related genes and drugs shared by disease pairs were also included in DNetDB. In total, 1,326 disease relationships among 108 diseases, 5,598 pathways, 7,357 disease-related genes and 342 disease drugs are recorded in DNetDB, among which 3,762 genes and 148 drugs are shared by at least two diseases. DNetDB is the first database focusing on disease similarity from the viewpoint of gene regulation mechanism. It provides an easy-to-use web interface to search and browse the disease relationships and thus helps to systematically investigate etiology and pathogenesis, perform drug repositioning, and design novel therapeutic interventions.Database URL: http://app.scbit.org/DNetDB/ #.",2016-05-21 +30523161,Human diaphragm atrophy in amyotrophic lateral sclerosis is not predicted by routine respiratory measures. ,"Amyotrophic lateral sclerosis (ALS) patients show progressive respiratory muscle weakness leading to death from respiratory failure. However, there are no data on diaphragm histological changes in ALS patients and how they correlate with routine respiratory measurements.We collected 39 diaphragm biopsies concomitantly with laparoscopic insertion of intradiaphragmatic electrodes during a randomised controlled trial evaluating early diaphragm pacing in ALS (https://clinicaltrials.gov; NCT01583088). Myofibre type, size and distribution were evaluated by immunofluorescence microscopy and correlated with spirometry, respiratory muscle strength and phrenic nerve conduction parameters. The relationship between these variables and diaphragm atrophy was assessed using multivariate regression models.All patients exhibited significant slow- and fast-twitch diaphragmatic atrophy. Vital capacity (VC), maximal inspiratory pressure, sniff nasal inspiratory pressure (SNIP) and twitch transdiaphragmatic pressure did not correlate with the severity of diaphragm atrophy. Inspiratory capacity (IC) correlated modestly with slow-twitch myofibre atrophy. Supine fall in VC correlated weakly with fast-twitch myofibre atrophy. Multivariate analysis showed that IC, SNIP and functional residual capacity were independent predictors of slow-twitch diaphragmatic atrophy, but not fast-twitch atrophy.Routine respiratory tests are poor predictors of diaphragm structural changes. Improved detection of diaphragm atrophy is essential for clinical practice and for management of trials specifically targeting diaphragm muscle function.",2019-02-14 +29323246,ModuleDiscoverer: Identification of regulatory modules in protein-protein interaction networks.,"The identification of disease-associated modules based on protein-protein interaction networks (PPINs) and gene expression data has provided new insights into the mechanistic nature of diverse diseases. However, their identification is hampered by the detection of protein communities within large-scale, whole-genome PPINs. A presented successful strategy detects a PPIN's community structure based on the maximal clique enumeration problem (MCE), which is a non-deterministic polynomial time-hard problem. This renders the approach computationally challenging for large PPINs implying the need for new strategies. We present ModuleDiscoverer, a novel approach for the identification of regulatory modules from PPINs and gene expression data. Following the MCE-based approach, ModuleDiscoverer uses a randomization heuristic-based approximation of the community structure. Given a PPIN of Rattus norvegicus and public gene expression data, we identify the regulatory module underlying a rodent model of non-alcoholic steatohepatitis (NASH), a severe form of non-alcoholic fatty liver disease (NAFLD). The module is validated using single-nucleotide polymorphism (SNP) data from independent genome-wide association studies and gene enrichment tests. Based on gene enrichment tests, we find that ModuleDiscoverer performs comparably to three existing module-detecting algorithms. However, only our NASH-module is significantly enriched with genes linked to NAFLD-associated SNPs. ModuleDiscoverer is available at http://www.hki-jena.de/index.php/0/2/490 (Others/ModuleDiscoverer).",2018-01-11 +21793391,[Survey on clinical evidence of acupuncture therapy for fibromyalgia syndrome].,"

Objective

To evaluate the clinical effect of acupuncture therapy for fibromyalgia syndrome (FMS) by analyzing the available studies so as to provide clinical decision-making reference.

Methods

The published papers on clinical trails for acupuncture treatment of FMS were widely retrieved from Chinese Biomedical Databases (1979 - 2010), www. cnki. net (1979-2010), VIP China Scientific Journal Database (1989- 2010), Digital Periodicals on Wanfang Data (1998 - 2010), PubMed (1966-2010), etc. and by using key words of fibromyalgia syndrome and acupuncture. According to criterion of evidence-based medicine, the evidence from high to low quality levels was selected to answer corresponding clinical questions, and software RevMan 5.0 was used to analyze the final results.

Results

There has been no enough clinical evidence showing definite efficacy of acupuncture for FMS. However, a Level-A study (being in line with conditions of large sample, multi-centers, randomized controlled trails) and a level-C study (having control group, but without distinct randomizing method) showed respectively that acupuncture might be superior to Amitriptyline and Brufen in relieving FMS. Moreover, a piece of evidence that acupuncture combined with western medicine was superior to western medicine alone was allocated to a level-B (having correct randomizing method and control group). Finally, only a level-C evidence proved that laser irradiation on acupoint might be superior to traditional acupuncture in improving FMS.

Conclusion

Acupuncture for FMS has a positive effect, and acupuncture combined with western medicine can strengthen the curative effect. However this conclusion should be proved further by randomized controlled double blind clinical trials with large samples.",2011-06-01 +30509258,Spatiotemporal analysis of malaria for new sustainable control strategies.,"Malaria transmission is highly heterogeneous through time and space, and mapping of this heterogeneity is necessary to better understand local dynamics. New targeted policies are needed as numerous countries have placed malaria elimination on their public health agenda for 2030. In this context, developing national health information systems and collecting information at sufficiently precise scales (at least at the 'week' and 'village' scales), is of strategic importance. In a recent study, Macharia et al. relied on extensive prevalence survey data to develop malaria risk maps for Kenya, including uncertainty assessments specifically designed to support decision-making by the National Malaria Control Program. Targeting local persistent transmission or epidemiologic changes is necessary to maintain efficient control, but also to deploy sustainable elimination strategies against identified transmission bottlenecks such as the reservoir of subpatent infections. Such decision-making tools are paramount to allocate resources based on sound scientific evidence and public health priorities.Please see related article: https://malariajournal.biomedcentral.com/articles/10.1186/s12936-018-2489-9 .",2018-12-04 +24731198,On finding bicliques in bipartite graphs: a novel algorithm and its application to the integration of diverse biological data types.,"

Background

Integrating and analyzing heterogeneous genome-scale data is a huge algorithmic challenge for modern systems biology. Bipartite graphs can be useful for representing relationships across pairs of disparate data types, with the interpretation of these relationships accomplished through an enumeration of maximal bicliques. Most previously-known techniques are generally ill-suited to this foundational task, because they are relatively inefficient and without effective scaling. In this paper, a powerful new algorithm is described that produces all maximal bicliques in a bipartite graph. Unlike most previous approaches, the new method neither places undue restrictions on its input nor inflates the problem size. Efficiency is achieved through an innovative exploitation of bipartite graph structure, and through computational reductions that rapidly eliminate non-maximal candidates from the search space. An iterative selection of vertices for consideration based on non-decreasing common neighborhood sizes boosts efficiency and leads to more balanced recursion trees.

Results

The new technique is implemented and compared to previously published approaches from graph theory and data mining. Formal time and space bounds are derived. Experiments are performed on both random graphs and graphs constructed from functional genomics data. It is shown that the new method substantially outperforms the best previous alternatives.

Conclusions

The new method is streamlined, efficient, and particularly well-suited to the study of huge and diverse biological data. A robust implementation has been incorporated into GeneWeaver, an online tool for integrating and analyzing functional genomics experiments, available at http://geneweaver.org. The enormous increase in scalability it provides empowers users to study complex and previously unassailable gene-set associations between genes and their biological functions in a hierarchical fashion and on a genome-wide scale. This practical computational resource is adaptable to almost any applications environment in which bipartite graphs can be used to model relationships between pairs of heterogeneous entities.",2014-04-15 +30773671,"All ecological models are wrong, but some are useful.","In Focus: Curtsdotter, A., Banks, H. T., Banks, J. E., Jonsson, M., Jonsson, T., Laubmeier, A. N., … Bommarco, R. (2019). Ecosystem function in predator-prey food webs-Confronting dynamic models with empirical data. Journal of Animal Ecology, 88, https://doi.org/10.1111/1365-2656.12892 Species' population dynamics are influenced by a variety of abiotic and biotic factors. Curtsdotter et al. (2019) used a food web model to investigate the role of predator-prey interactions in the population dynamics of the bird cherry-oat aphid Rhopalosiphum padi. Their analysis hinged on linking the observed population dynamics to a mathematical description of the multi-species system via inverse methods-an approach less utilized in ecology but that allows one to search a wide space of possible parameterizations and identify best-fit model parameters. By scrutinizing the fit of this model to observed aphid population dynamics in 10 separate barley fields, they identified fields in which predation was the key driving force; in others, they found that accurate predictions depended on the existence of an unpredictable and unidentified extrinsic driver of aphid mortality. By scrutinizing areas where the model gave poor or biologically counterintuitive fits, their study provides a path forward to better link ecological theory to ecosystem function.",2019-02-01 +26527191,Shared bioinformatics databases within the Unipro UGENE platform.,"Unipro UGENE is an open-source bioinformatics toolkit that integrates popular tools along with original instruments for molecular biologists within a unified user interface. Nowadays, most bioinformatics desktop applications, including UGENE, make use of a local data model while processing different types of data. Such an approach causes an inconvenience for scientists working cooperatively and relying on the same data. This refers to the need of making multiple copies of certain files for every workplace and maintaining synchronization between them in case of modifications. Therefore, we focused on delivering a collaborative work into the UGENE user experience. Currently, several UGENE installations can be connected to a designated shared database and users can interact with it simultaneously. Such databases can be created by UGENE users and be used at their discretion. Objects of each data type, supported by UGENE such as sequences, annotations, multiple alignments, etc., can now be easily imported from or exported to a remote storage. One of the main advantages of this system, compared to existing ones, is the almost simultaneous access of client applications to shared data regardless of their volume. Moreover, the system is capable of storing millions of objects. The storage itself is a regular database server so even an inexpert user is able to deploy it. Thus, UGENE may provide access to shared data for users located, for example, in the same laboratory or institution. UGENE is available at: http://ugene.net/download.html.",2015-09-03 +30479885,beadplexr: reproducible and automated analysis of multiplex bead assays.,"Multiplex bead assays are an extension of the commonly used sandwich ELISA. The advantage over ELISA is that they make simultaneous evaluation of several analytes possible. Several commercial assay systems, where the beads are acquired on a standard flow cytometer, exist. These assay systems come with their own software tool for analysis and evaluation of the concentration of the analyzed analytes. However, these tools are either tied to particular commercial software or impose other limitations to their licenses, such as the number of events which can be analyzed. In addition, all these solutions are 'point and click' which potentially obscures the steps taken in the analysis. Here we present beadplexer, an open-source R-package for the reproducible analysis of multiplex bead assay data. The package makes it possible to automatically identify bead clusters, and provides functionality to easily fit a standard curve and calculate the concentrations of the analyzed analytes. beadplexer is available from CRAN and from https://gitlab.com/ustervbo/beadplexr.",2018-11-16 +27534850,A machine learning strategy for predicting localization of post-translational modification sites in protein-protein interacting regions.,"

Background

One very important functional domain of proteins is the protein-protein interacting region (PPIR), which forms the binding interface between interacting polypeptide chains. Post-translational modifications (PTMs) that occur in the PPIR can either interfere with or facilitate the interaction between proteins. The ability to predict whether sites of protein modifications are inside or outside of PPIRs would be useful in further elucidating the regulatory mechanisms by which modifications of specific proteins regulate their cellular functions.

Results

Using two of the comprehensive databases for protein-protein interaction and protein modification site data (PDB and PhosphoSitePlus, respectively), we created new databases that map PTMs to their locations inside or outside of PPIRs. The mapped PTMs represented only 5 % of all known PTMs. Thus, in order to predict localization within or outside of PPIRs for the vast majority of PTMs, a machine learning strategy was used to generate predictive models from these mapped databases. For the three mapped PTM databases which had sufficient numbers of modification sites for generating models (acetylation, phosphorylation, and ubiquitylation), the resulting models yielded high overall predictive performance as judged by a combined performance score (CPS). Among the multiple properties of amino acids that were used in the classification tasks, hydrophobicity was found to contribute substantially to the performance of the final predictive models. Compared to the other classifiers we also evaluated, the SVM provided the best performance overall.

Conclusions

These models are the first to predict whether PTMs are located inside or outside of PPIRs, as demonstrated by their high predictive performance. The models and data presented here should be useful in prioritizing both known and newly identified PTMs for further studies to determine the functional relationship between specific PTMs and protein-protein interactions. The implemented R package is available online ( http://sysbio.chula.ac.th/PtmPPIR ).",2016-08-17 +26578600,Mouse genome database 2016.,"The Mouse Genome Database (MGD; http://www.informatics.jax.org) is the primary community model organism database for the laboratory mouse and serves as the source for key biological reference data related to mouse genes, gene functions, phenotypes and disease models with a strong emphasis on the relationship of these data to human biology and disease. As the cost of genome-scale sequencing continues to decrease and new technologies for genome editing become widely adopted, the laboratory mouse is more important than ever as a model system for understanding the biological significance of human genetic variation and for advancing the basic research needed to support the emergence of genome-guided precision medicine. Recent enhancements to MGD include new graphical summaries of biological annotations for mouse genes, support for mobile access to the database, tools to support the annotation and analysis of sets of genes, and expanded support for comparative biology through the expansion of homology data.",2015-11-17 +29800845,Compilation and analysis of global surface water concentrations for individual insecticide compounds.,"The decades-long agricultural use of insecticides resulted in frequent contamination of surface waters globally regularly posing high risks for the aquatic biodiversity. However, the concentration levels of individual insecticide compounds have by now not been compiled and reported using global scale data, hampering our knowledge on the insecticide exposure of aquatic ecosystems. Here, we specify measured insecticide concentrations (MICs, comprising in total 11,300 water and sediment concentrations taken from a previous publication) for 28 important insecticide compounds covering four major insecticide classes. Results show that organochlorine and organophosphate insecticides, which dominated the global insecticide market for decades, have been detected most often and at highest concentration levels in surface waters globally. In comparison, MICs of the more recent pyrethroids and neonicotinoids were less often reported and generally at lower concentrations as a result of their later market introduction and lower application rates. An online insecticide classification calculator (ICC; available at: https://static.magic.eco/icc/v1) is provided in order to enable the comparison and classification of prospective MICs with available global insecticide concentrations. Spatial analyses of existing data show that most MICs were reported for surface waters in North America, Asia and Europe, whereas highest concentration levels were detected in Africa, Asia and South America. An evaluation of water and sediment MICs showed that theoretical organic carbon-water partition coefficients (KOC) determined in the laboratory overestimated KOC values based on actual field concentrations by up to a factor of more than 20, with highest deviations found for highly sorptive pyrethroids. Overall, the comprehensive compilation of insecticide field concentrations presented here is a valuable tool for the classification of future surface water monitoring results and serves as important input data for more field relevant toxicity testing approaches and pesticide exposure and risk assessment schemes.",2018-05-26 +27694208,"ORDB, HORDE, ODORactor and other on-line knowledge resources of olfactory receptor-odorant interactions. ","We present here an exploration of the evolution of three well-established, web-based resources dedicated to the dissemination of information related to olfactory receptors (ORs) and their functional ligands, odorants. These resources are: the Olfactory Receptor Database (ORDB), the Human Olfactory Data Explorer (HORDE) and ODORactor. ORDB is a repository of genomic and proteomic information related to ORs and other chemosensory receptors, such as taste and pheromone receptors. Three companion databases closely integrated with ORDB are OdorDB, ORModelDB and OdorMapDB; these resources are part of the SenseLab suite of databases (http://senselab.med.yale.edu). HORDE (http://genome.weizmann.ac.il/horde/) is a semi-automatically populated database of the OR repertoires of human and several mammals. ODORactor (http://mdl.shsmu.edu.cn/ODORactor/) provides information related to OR-odorant interactions from the perspective of the odorant. All three resources are connected to each other via web-links.Database URL: http://senselab.med.yale.edu; http://genome.weizmann.ac.il/horde/; http://mdl.shsmu.edu.cn/ODORactor/.",2016-10-02 +26724815,Probabilistic validation of protein NMR chemical shift assignments.,"Data validation plays an important role in ensuring the reliability and reproducibility of studies. NMR investigations of the functional properties, dynamics, chemical kinetics, and structures of proteins depend critically on the correctness of chemical shift assignments. We present a novel probabilistic method named ARECA for validating chemical shift assignments that relies on the nuclear Overhauser effect data . ARECA has been evaluated through its application to 26 case studies and has been shown to be complementary to, and usually more reliable than, approaches based on chemical shift databases. ARECA is available online at http://areca.nmrfam.wisc.edu/.",2016-01-02 +30290933,"Confronting the Challenge of COPD: What Is New in the Approaches to Diagnosis, Treatment, and Patient Outcomes.","As seen in this CME online activity (available at http://courses.elseviercme.com/730), COPD is the third leading cause of death in the United States among people 65 years of age and older and the fourth leading cause of death among people 45 to 65 years of age. A recent survey reported that about 12 to 15 million people in the United States have physician-diagnosed COPD. However, COPD is significantly underdiagnosed, and data suggest as many as 12 million people in the United States have undiagnosed COPD. In addition to being underdiagnosed, COPD is significantly undertreated, with numerous literature sources indicating that patients with COPD are not receiving guideline concordant pharmacotherapy. Consistent with these sources, an analysis of claims data for > 50,000 patients with COPD indicated a high degree of undertreatment in these patients. For patients using an inhaler, adherence to inhaler medications and correct inhaler device technique are crucial to successful COPD management. However, the literature indicates > 50% of patients with COPD demonstrate poor medication adherence and rates of incorrect inhalation technique range from 35% to 85%. Studies have shown that regular training by a physician improved inhaler techniques among patients with COPD. Further, studies have shown that repeated instruction on inhalation techniques improved both adherence and quality of life outcomes. This CME-certified webcast provides insights into new approaches to identifying patients with undiagnosed COPD, the importance of early initiation of pharmacologic treatment in a guideline-congruent manner, and the importance of repeated patient training in correct inhaler techniques to improve adherence and patient outcomes. Additionally, the online program is part of a larger learning platform (available at https://copd.elsevierresource.com/) that provides free access to the latest clinical information related to the diagnosis and management of COPD. The COPD Learning Center is a freely accessible platform that aims to increase clinical knowledge by providing CME activities, research articles, and resources for health-care providers who treat patients with COPD.",2018-10-01 +30239627,Exploring sequence-based features for the improved prediction of DNA N4-methylcytosine sites in multiple species.,"

Motivation

As one of important epigenetic modifications, DNA N4-methylcytosine (4mC) is recently shown to play crucial roles in restriction-modification systems. For better understanding of their functional mechanisms, it is fundamentally important to identify 4mC modification. Machine learning methods have recently emerged as an effective and efficient approach for the high-throughput identification of 4mC sites, although high predictive error rates are still challenging for existing methods. Therefore, it is highly desirable to develop a computational method to more accurately identify m4C sites.

Results

In this study, we propose a machine learning based predictor, namely 4mcPred-SVM, for the genome-wide detection of DNA 4mC sites. In this predictor, we present a new feature representation algorithm that sufficiently exploits sequence-based information. To improve the feature representation ability, we use a two-step feature optimization strategy, thereby obtaining the most representative features. Using the resulting features and Support Vector Machine (SVM), we adaptively train the optimal models for different species. Comparative results on benchmark datasets from six species indicate that our predictor is able to achieve generally better performance in predicting 4mC sites as compared to the state-of-the-art predictors. Importantly, the sequence-based features can reliably and robust predict 4mC sites, facilitating the discovery of potentially important sequence characteristics for the prediction of 4mC sites.

Availability and implementation

The user-friendly webserver that implements the proposed 4mcPred-SVM is well established, and is freely accessible at http://server.malab.cn/4mcPred-SVM.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +30854411,Global Seismic Nowcasting With Shannon Information Entropy.,"Seismic nowcasting uses counts of small earthquakes as proxy data to estimate the current dynamical state of an earthquake fault system. The result is an earthquake potential score that characterizes the current state of progress of a defined geographic region through its nominal earthquake ""cycle."" The count of small earthquakes since the last large earthquake is the natural time that has elapsed since the last large earthquake (Varotsos et al., 2006, https://doi.org/10.1103/PhysRevE.74.021123). In addition to natural time, earthquake sequences can also be analyzed using Shannon information entropy (""information""), an idea that was pioneered by Shannon (1948, https://doi.org/10.1002/j.1538-7305.1948.tb01338.x). As a first step to add seismic information entropy into the nowcasting method, we incorporate magnitude information into the natural time counts by using event self-information. We find in this first application of seismic information entropy that the earthquake potential score values are similar to the values using only natural time. However, other characteristics of earthquake sequences, including the interevent time intervals, or the departure of higher magnitude events from the magnitude-frequency scaling line, may contain additional information.",2019-01-16 +30049792,PausePred and Rfeet: webtools for inferring ribosome pauses and visualizing footprint density from ribosome profiling data.,"The process of translation is characterized by irregularities in the local decoding rates of specific mRNA codons. This includes the occurrences of long pauses that can take place when ribosomes decode certain peptide sequences, encounter strong RNA secondary structures, or decode ""hungry"" codons. Examples are known where such pausing or stalling is used for regulating protein synthesis. This can be achieved at the level of translation via direct alteration of ribosome progression through mRNA or by altering mRNA stability via NoGo decay. Ribosome pausing has also been implicated in the cotranslational folding of proteins. Ribosome profiling data often are used for inferring the locations of ribosome pauses. However, no dedicated online software is available for this purpose. Here we present PausePred (https://pausepred.ucc.ie/), which can be used to infer ribosome pauses from ribosome profiling (Ribo-seq) data. Peaks of ribosome footprint density are scored based on their magnitude relative to the background density within the surrounding area. The scoring allows the comparison of peaks across the transcriptome or genome. In addition to the score, PausePred reports the coordinates of the pause, the footprint density at the pause site, and the surrounding nucleotide sequence. The pauses can be visualized in the context of Ribo-seq and RNA-seq density plots generated for specific transcripts or genomic regions with the Rfeet tool. PausePred does not require input on the location of protein coding ORFs (although gene annotations can be optionally supplied). As a result, it can be used universally and its output does not depend on ever evolving annotations.",2018-07-26 +29024340,Multiple marker abundance profiling: combining selected reaction monitoring and data-dependent acquisition for rapid estimation of organelle abundance in subcellular samples.,"Measuring changes in protein or organelle abundance in the cell is an essential, but challenging aspect of cell biology. Frequently-used methods for determining organelle abundance typically rely on detection of a very few marker proteins, so are unsatisfactory. In silico estimates of protein abundances from publicly available protein spectra can provide useful standard abundance values but contain only data from tissue proteomes, and are not coupled to organelle localization data. A new protein abundance score, the normalized protein abundance scale (NPAS), expands on the number of scored proteins and the scoring accuracy of lower-abundance proteins in Arabidopsis. NPAS was combined with subcellular protein localization data, facilitating quantitative estimations of organelle abundance during routine experimental procedures. A suite of targeted proteomics markers for subcellular compartment markers was developed, enabling independent verification of in silico estimates for relative organelle abundance. Estimation of relative organelle abundance was found to be reproducible and consistent over a range of tissues and growth conditions. In silico abundance estimations and localization data have been combined into an online tool, multiple marker abundance profiling, available in the SUBA4 toolbox (http://suba.live).",2017-11-20 +22110038,GeneSigDB: a manually curated database and resource for analysis of gene expression signatures.,"GeneSigDB (http://www.genesigdb.org or http://compbio.dfci.harvard.edu/genesigdb/) is a database of gene signatures that have been extracted and manually curated from the published literature. It provides a standardized resource of published prognostic, diagnostic and other gene signatures of cancer and related disease to the community so they can compare the predictive power of gene signatures or use these in gene set enrichment analysis. Since GeneSigDB release 1.0, we have expanded from 575 to 3515 gene signatures, which were collected and transcribed from 1604 published articles largely focused on gene expression in cancer, stem cells, immune cells, development and lung disease. We have made substantial upgrades to the GeneSigDB website to improve accessibility and usability, including adding a tag cloud browse function, facetted navigation and a 'basket' feature to store genes or gene signatures of interest. Users can analyze GeneSigDB gene signatures, or upload their own gene list, to identify gene signatures with significant gene overlap and results can be viewed on a dynamic editable heatmap that can be downloaded as a publication quality image. All data in GeneSigDB can be downloaded in numerous formats including .gmt file format for gene set enrichment analysis or as a R/Bioconductor data file. GeneSigDB is available from http://www.genesigdb.org.",2011-11-21 +31432473,Open Source Tools for Biological Image Analysis.,"Visiting the Bio Imaging Search Engine (BISE) (Bio, BISE, Engine, http://biii.eu/, Imaging, Search) website at the time of writing this article, almost 1200 open source assets (components, workflows, collections) were found. This overwhelming range of offer difficults the fact of making a reasonable choice, especially to newcomers. In the following chapter, we briefly sketch the advantages of the open source software (OSS) particularly used for image analysis in the field of life sciences. We introduce both the general OSS idea as well as some programs used for image analysis. Even more, we outline the history of ImageJ as it has served as a role model for the development of more recent software packages. We focus on the programs that are, to our knowledge, the most relevant and widely used in the field of light microscopy, as well as the most commonly used within our facility. In addition, we briefly discuss recent efforts and approaches aimed to share and compare algorithms and introduce software and data sharing good practices as a promising strategy to facilitate reproducibility, software understanding, and optimal software choice for a given scientific problem in the future.",2019-01-01 +30239574,A clustering linear combination approach to jointly analyze multiple phenotypes for GWAS.,"

Summary

There is an increasing interest in joint analysis of multiple phenotypes for genome-wide association studies (GWASs) based on the following reasons. First, cohorts usually collect multiple phenotypes and complex diseases are usually measured by multiple correlated intermediate phenotypes. Second, jointly analyzing multiple phenotypes may increase statistical power for detecting genetic variants associated with complex diseases. Third, there is increasing evidence showing that pleiotropy is a widespread phenomenon in complex diseases. In this paper, we develop a clustering linear combination (CLC) method to jointly analyze multiple phenotypes for GWASs. In the CLC method, we first cluster individual statistics into positively correlated clusters and then, combine the individual statistics linearly within each cluster and combine the between-cluster terms in a quadratic form. CLC is not only robust to different signs of the means of individual statistics, but also reduce the degrees of freedom of the test statistic. We also theoretically prove that if we can cluster the individual statistics correctly, CLC is the most powerful test among all tests with certain quadratic forms. Our simulation results show that CLC is either the most powerful test or has similar power to the most powerful test among the tests we compared, and CLC is much more powerful than other tests when effect sizes align with inferred clusters. We also evaluate the performance of CLC through a real case study.

Availability and implementation

R code for implementing our method is available at http://www.math.mtu.edu/∼shuzhang/software.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-04-01 +29757393,SNPnexus: assessing the functional relevance of genetic variation to facilitate the promise of precision medicine.,"Broader functional annotation of genetic variation is a valuable means for prioritising phenotypically-important variants in further disease studies and large-scale genotyping projects. We developed SNPnexus to meet this need by assessing the potential significance of known and novel SNPs on the major transcriptome, proteome, regulatory and structural variation models. Since its previous release in 2012, we have made significant improvements to the annotation categories and updated the query and data viewing systems. The most notable changes include broader functional annotation of noncoding variants and expanding annotations to the most recent human genome assembly GRCh38/hg38. SNPnexus has now integrated rich resources from ENCODE and Roadmap Epigenomics Consortium to map and annotate the noncoding variants onto different classes of regulatory regions and noncoding RNAs as well as providing their predicted functional impact from eight popular non-coding variant scoring algorithms and computational methods. A novel functionality offered now is the support for neo-epitope predictions from leading tools to facilitate its use in immunotherapeutic applications. These updates to SNPnexus are in preparation for its future expansion towards a fully comprehensive computational workflow for disease-associated variant prioritization from sequencing data, placing its users at the forefront of translational research. SNPnexus is freely available at http://www.snp-nexus.org.",2018-07-01 +29718426,geno2pheno[ngs-freq]: a genotypic interpretation system for identifying viral drug resistance using next-generation sequencing data.,"Identifying resistance to antiretroviral drugs is crucial for ensuring the successful treatment of patients infected with viruses such as human immunodeficiency virus (HIV) or hepatitis C virus (HCV). In contrast to Sanger sequencing, next-generation sequencing (NGS) can detect resistance mutations in minority populations. Thus, genotypic resistance testing based on NGS data can offer novel, treatment-relevant insights. Since existing web services for analyzing resistance in NGS samples are subject to long processing times and follow strictly rules-based approaches, we developed geno2pheno[ngs-freq], a web service for rapidly identifying drug resistance in HIV-1 and HCV samples. By relying on frequency files that provide the read counts of nucleotides or codons along a viral genome, the time-intensive step of processing raw NGS data is eliminated. Once a frequency file has been uploaded, consensus sequences are generated for a set of user-defined prevalence cutoffs, such that the constructed sequences contain only those nucleotides whose codon prevalence exceeds a given cutoff. After locally aligning the sequences to a set of references, resistance is predicted using the well-established approaches of geno2pheno[resistance] and geno2pheno[hcv]. geno2pheno[ngs-freq] can assist clinical decision making by enabling users to explore resistance in viral populations with different abundances and is freely available at http://ngs.geno2pheno.org.",2018-07-01 +22250003,The Biofuel Feedstock Genomics Resource: a web-based portal and database to enable functional genomics of plant biofuel feedstock species.,"Major feedstock sources for future biofuel production are likely to be high biomass producing plant species such as poplar, pine, switchgrass, sorghum and maize. One active area of research in these species is genome-enabled improvement of lignocellulosic biofuel feedstock quality and yield. To facilitate genomic-based investigations in these species, we developed the Biofuel Feedstock Genomic Resource (BFGR), a database and web-portal that provides high-quality, uniform and integrated functional annotation of gene and transcript assembly sequences from species of interest to lignocellulosic biofuel feedstock researchers. The BFGR includes sequence data from 54 species and permits researchers to view, analyze and obtain annotation at the gene, transcript, protein and genome level. Annotation of biochemical pathways permits the identification of key genes and transcripts central to the improvement of lignocellulosic properties in these species. The integrated nature of the BFGR in terms of annotation methods, orthologous/paralogous relationships and linkage to seven species with complete genome sequences allows comparative analyses for biofuel feedstock species with limited sequence resources. Database URL: http://bfgr.plantbiology.msu.edu.",2012-01-15 +22399675,AntigenMap 3D: an online antigenic cartography resource.,"

Summary

Antigenic cartography is a useful technique to visualize and minimize errors in immunological data by projecting antigens to 2D or 3D cartography. However, a 2D cartography may not be sufficient to capture the antigenic relationship from high-dimensional immunological data. AntigenMap 3D presents an online, interactive, and robust 3D antigenic cartography construction and visualization resource. AntigenMap 3D can be applied to identify antigenic variants and vaccine strain candidates for pathogens with rapid antigenic variations, such as influenza A virus.

Availability and implementation

http://sysbio.cvm.msstate.edu/AntigenMap3D",2012-03-06 +31516335,CORAL: Building up QSAR models for the chromosome aberration test.,"A high level of chromosomal aberrations in peripheral blood lymphocytes may be an early marker of cancer risk, but data on risk of specific cancers and types of chromosomal aberrations are limited. Consequently, the development of predictive models for chromosomal aberrations test is important task. Majority of models for chromosomal aberrations test are so-called knowledge-based rules system. The CORAL software (http://www.insilico.eu/coral, abbreviation of ""CORrelation And Logic"") is an alternative for knowledge-based rules system. In contrast to knowledge-based rules system, the CORAL software gives possibility to estimate the influence upon the predictive potential of a model of different molecular alerts as well as different splits into the training set and validation set. This possibility is not available for the approaches based on the knowledge-based rules system. Quantitative Structure-Activity Relationships (QSAR) for chromosome aberration test are established for five random splits into the training, calibration, and validation sets. The QSAR approach is based on representation of the molecular structure by simplified molecular input-line entry system (SMILES) without data on physicochemical and/or biochemical parameters. In spite of this limitation, the statistical quality of these models is quite good.",2018-05-09 +29956198,Prognostic Impact of Extracapsular Lymph Node Invasion on Survival in Non-small-Cell Lung Cancer: A Systematic Review and Meta-analysis.,"The extracapsular tumor extension (ECE) of nodal metastasis is an important prognostic factor in different types of malignancies. However, there is a lack of recent data in patients with non-small-cell lung cancer (NSCLC). In addition, the TNM staging system does not include ECE status as a prognostic factor. This systematic review and meta-analysis has been conducted to summarize and pool existing data to determine the prognostic role of ECE in patients with lymph node-positive NSCLC. Two authors performed an independent search in PubMed using a predefined keyword list, without language restrictions with publication date since 1990. Prospective or retrospective studies reporting data on prognostic parameters in subjects with NSCLC with positive ECE or with only intracapsular lymph node metastasis were retrieved. Data were summarized using risk ratios (RR) for the survival with 95% confidence intervals (CI). The data was analyzed using Mix 2 (ref: Bax L: MIX 2.0 - Professional software for meta-analysis in Excel. Version 2.015. BiostatXL, 2016. https://www.meta-analysis-made-easy.com ). There 2,105 studies were reviewed. Five studies covering a total of 828 subjects met the inclusion criteria and were included in the meta-analysis. Two hundred and ninety-eight (35.9%) patients were categorized as ECE+, of whom 54 (18.1%) survived at the end of follow-up. In the ECE-negative group, 257 patients (48.4%) survived by the end of follow-up. Thus, ECE status is associated with a significantly decreased survival rate: pooled RR 0.45 (95% CI 0.35-0.59), Q (4) = 4.06, P value = 0.39, and I 2 = 68.00% (95 CI 0.00-79.55%). In conclusion, ECE has a significant impact on survival in NSCLC patients and should be considered in diagnostic and therapeutic decisions in addition to the current TNM staging. Postoperative radiotherapy may be an option in ECE-positive pN1 NSCLC patients.",2018-01-01 +27990826,X!TandemPipeline: A Tool to Manage Sequence Redundancy for Protein Inference and Phosphosite Identification.,"X!TandemPipeline is a software designed to perform protein inference and to manage redundancy in the results of phosphosite identification by database search. It provides the minimal list of proteins or phosphosites that are present in a set of samples using grouping algorithms based on the principle of parsimony. Regarding proteins, a two-level classification is performed, where groups gather proteins sharing at least one peptide and subgroups gather proteins that are not distinguishable according to the identified peptides. Regarding phosphosites, an innovative approach based on the concept of phosphoisland is used to gather overlapping phosphopeptides. The graphical interface of X!TandemPipeline allows the users to launch X!tandem identification, to inspect spectra and to manually validate their assignment to peptides, to launch the grouping program, and to visualize elementary data as well as grouping and redundancy information. Identification results obtained from other search engines can also be processed. X!TandemPipeline results can be exported as ready-to-use tabulated files or as XML files that can be directly used by the PROTICdb database or by the MassChroQ quantification software. X!TandemPipeline runs fast, is easy to use, and can process hundreds of samples simultaneously. It is freely available under the GNU General Public License v3.0 at http://pappso.inra.fr/bioinfo/xtandempipeline/ .",2016-12-19 +27761493,Proteome data from a host-pathogen interaction study with Staphylococcus aureus and human lung epithelial cells.,"To simultaneously obtain proteome data of host and pathogen from an internalization experiment, human alveolar epithelial A549 cells were infected with Staphylococcus aureus HG001 which carried a plasmid (pMV158GFP) encoding a continuously expressed green fluorescent protein (GFP). Samples were taken hourly between 1.5 h and 6.5 h post infection. By fluorescence activated cell sorting GFP-expressing bacteria could be enriched from host cell debris, but also infected host cells could be separated from those which did not carry bacteria after contact (exposed). Additionally, proteome data of A549 cells which were not exposed to S. aureus but underwent the same sample processing steps are provided as a control. Time-resolved changes in bacterial protein abundance were quantified in a label-free approach. Proteome adaptations of host cells were monitored by comparative analysis to a stable isotope labeled cell culture (SILAC) standard. Proteins were extracted from the cells, digested proteolytically, measured by nanoLC-MS/MS, and subsequently identified by database search and then quantified. The data presented here are related to a previously published research article describing the interplay of S. aureus HG001 and human epithelial cells (Surmann et al., 2015 [1]). They have been deposited to the ProteomeXchange platform with the identifiers PRIDE: http://www.ebi.ac.uk/pride/archive/projects/PXD002384 for the S. aureus HG001 proteome dataset and PRIDE: http://www.ebi.ac.uk/pride/archive/projects/PXD002388 for the A549 proteome dataset.",2016-03-19 +29979827,MCRiceRepGP: a framework for the identification of genes associated with sexual reproduction in rice.,"Rice is an important cereal crop, being a staple food for over half of the world's population, and sexual reproduction resulting in grain formation underpins global food security. However, despite considerable research efforts, many of the genes, especially long intergenic non-coding RNA (lincRNA) genes, involved in sexual reproduction in rice remain uncharacterized. With an increasing number of public resources becoming available, information from different sources can be combined to perform gene functional annotation. We report the development of MCRiceRepGP, a machine learning framework which integrates heterogeneous evidence and employs multicriteria decision analysis and machine learning to predict coding and lincRNA genes involved in sexual reproduction in rice. The rice genome was reannotated using deep-sequencing transcriptomic data from reproduction-associated tissue/cell types identifying previously unannotated putative protein-coding genes and lincRNAs. MCRiceRepGP was used for genome-wide discovery of sexual reproduction associated coding and lincRNA genes. The protein-coding and lincRNA genes identified have distinct expression profiles, with a large proportion of lincRNAs reaching maximum expression levels in the sperm cells. Some of the genes are potentially linked to male- and female-specific fertility and heat stress tolerance during the reproductive stage. MCRiceRepGP can be used in combination with other genome-wide studies, such as genome-wide association studies, giving greater confidence that the genes identified are associated with the biological process of interest. As more data, especially about mutant plant phenotypes, become available, the power of MCRiceRepGP will grow, providing researchers with a tool to identify candidate genes for future experiments. MCRiceRepGP is available as a web application (http://mcgplannotator.com/MCRiceRepGP/).",2018-08-16 +30380400,FTO mRNA expression in the lower quartile is associated with bad prognosis in clear cell renal cell carcinoma based on TCGA data mining.,"Fat mass and obesity associated (FTO) is a protein-coding gene, also known as the obesity gene. It has been reported previously to be associated with a variety of malignant cancers, such as breast, thyroid and acute myeloid leukemia. The aim of the present study was to investigate the FTO mRNA expression in human clear cell renal cell carcinoma and its clinical value. FTO mRNA expression and its prognostic value were investigated by bioinformatic analysis of the data from The Cancer Genome Atlas (TCGA, https://cancergenome.nih.gov/). The Kaplan-Meier analysis showed that FTO mRNA expression in the lower quartile is significantly associated with poor survival in clear cell renal cell carcinoma patients (P < 0.0001). This study indicated that higher FTO mRNA expression may have a protective role and it may be a vital molecular marker in the prognosis of clear cell renal cell carcinoma patients.",2018-10-25 +29718424,psRNATarget: a plant small RNA target analysis server (2017 release).,"Plant regulatory small RNAs (sRNAs), which include most microRNAs (miRNAs) and a subset of small interfering RNAs (siRNAs), such as the phased siRNAs (phasiRNAs), play important roles in regulating gene expression. Although generated from genetically distinct biogenesis pathways, these regulatory sRNAs share the same mechanisms for post-translational gene silencing and translational inhibition. psRNATarget was developed to identify plant sRNA targets by (i) analyzing complementary matching between the sRNA sequence and target mRNA sequence using a predefined scoring schema and (ii) by evaluating target site accessibility. This update enhances its analytical performance by developing a new scoring schema that is capable of discovering miRNA-mRNA interactions at higher 'recall rates' without significantly increasing total prediction output. The scoring procedure is customizable for the users to search both canonical and non-canonical targets. This update also enables transmitting and analyzing 'big' data empowered by (a) the implementation of multi-threading chunked file uploading, which can be paused and resumed, using HTML5 APIs and (b) the allocation of significantly more computing nodes to its back-end Linux cluster. The updated psRNATarget server has clear, compelling and user-friendly interfaces that enhance user experiences and present data clearly and concisely. The psRNATarget is freely available at http://plantgrn.noble.org/psRNATarget/.",2018-07-01 +29501166,A comparison of three liquid chromatography (LC) retention time prediction models.,"High-resolution mass spectrometry (HRMS) data has revolutionized the identification of environmental contaminants through non-targeted analysis (NTA). However, chemical identification remains challenging due to the vast number of unknown molecular features typically observed in environmental samples. Advanced data processing techniques are required to improve chemical identification workflows. The ideal workflow brings together a variety of data and tools to increase the certainty of identification. One such tool is chromatographic retention time (RT) prediction, which can be used to reduce the number of possible suspect chemicals within an observed RT window. This paper compares the relative predictive ability and applicability to NTA workflows of three RT prediction models: (1) a logP (octanol-water partition coefficient)-based model using EPI Suite™ logP predictions; (2) a commercially available ACD/ChromGenius model; and, (3) a newly developed Quantitative Structure Retention Relationship model called OPERA-RT. Models were developed using the same training set of 78 compounds with experimental RT data and evaluated for external predictivity on an identical test set of 19 compounds. Both the ACD/ChromGenius and OPERA-RT models outperformed the EPI Suite™ logP-based RT model (R2 = 0.81-0.92, 0.86-0.83, 0.66-0.69 for training-test sets, respectively). Further, both OPERA-RT and ACD/ChromGenius predicted 95% of RTs within a ± 15% chromatographic time window of experimental RTs. Based on these results, we simulated an NTA workflow with a ten-fold larger list of candidate structures generated for formulae of the known test set chemicals using the U.S. EPA's CompTox Chemistry Dashboard (https://comptox.epa.gov/dashboard), RTs for all candidates were predicted using both ACD/ChromGenius and OPERA-RT, and RT screening windows were assessed for their ability to filter out unlikely candidate chemicals and enhance potential identification. Compared to ACD/ChromGenius, OPERA-RT screened out a greater percentage of candidate structures within a 3-min RT window (60% vs. 40%) but retained fewer of the known chemicals (42% vs. 83%). By several metrics, the OPERA-RT model, generated as a proof-of-concept using a limited set of open source data, performed as well as the commercial tool ACD/ChromGenius when constrained to the same small training and test sets. As the availability of RT data increases, we expect the OPERA-RT model's predictive ability will increase.",2018-01-11 +30147338,PRMT7 contributes to the metastasis phenotype in human non-small-cell lung cancer cells possibly through the interaction with HSPA5 and EEF2.,"

Background

Non-small-cell lung cancer (NSCLC) constitutes the leading cause of cancer death in humans. Previous studies revealed the essential role of the protein arginine methyltransferase 7 (PRMT7) in promoting metastasis in breast cancer. However, its function and potential mechanism in NSCLC remain unclear.

Materials and methods

The gene expression of PRMT7 between lung cancer tissues and normal tissues was studied with online database (http://medicalgenome.kribb.re.kr/GENT/). NSCLC cell lines with specific gene overexpression were constructed with lentivirus transduction. Matrigel invasion and colony formation assays were performed to evaluate the invasion and colony formation abilities. Co-immunoprecipitation coupled with mass spectrometry analysis was performed to explore the potential interaction proteins of PRMT7. Bioinformatic analysis was performed with Gene Ontology and Kyoto Encyclopedia of Genes and Genomes databases.

Results

Online analysis of gene expression patterns revealed the relatively high expression of PRMT7 in lung cancer tissues. PRMT7 overexpression was able to promote the invasion and colony formation of A549 and SPC-A1 cells. A total of 19 in-common proteins shared by both NSCLC cell lines were identified to be interacting with PRMT7 and found to participate in a wide variety of pathways and protein-protein interactions according to bioinformatic analysis. Among them, HSPA5 and EEF2 were further investigated for their essential roles in PRMT7-promoted NSCLC cell invasion.

Conclusion

Our results suggested PRMT7 overexpression was able to promote metastasis in NSCLC possibly through the interaction with HSPA5 and EEF2, which provides the potential mechanism of oncogenesis in lung cancer.",2018-08-14 +30106511,Computational Prediction of Carbohydrate-Binding Proteins and Binding Sites.,"Protein-carbohydrate interaction is essential for biological systems, and carbohydrate-binding proteins (CBPs) are important targets when designing antiviral and anticancer drugs. Due to the high cost and difficulty associated with experimental approaches, many computational methods have been developed as complementary approaches to predict CBPs or carbohydrate-binding sites. However, most of these computational methods are not publicly available. Here, we provide a comprehensive review of related studies and demonstrate our two recently developed bioinformatics methods. The method SPOT-CBP is a template-based method for detecting CBPs based on structure through structural homology search combined with a knowledge-based scoring function. This method can yield model complex structure in addition to accurate prediction of CBPs. Furthermore, it has been observed that similarly accurate predictions can be made using structures from homology modeling, which has significantly expanded its applicability. The other method, SPRINT-CBH, is a de novo approach that predicts binding residues directly from protein sequences by using sequence information and predicted structural properties. This approach does not need structurally similar templates and thus is not limited by the current database of known protein-carbohydrate complex structures. These two complementary methods are available at https://sparks-lab.org. © 2018 by John Wiley & Sons, Inc.",2018-08-14 +29955080,Reply to 'Increased food supply mitigates ocean acidification effects on calcification but exacerbates effects on growth'.,In the Brown et al. study 'Increased food supply mitigates ocean acidification effects on calcification but exacerbates effects on growth' they show disagreement with the tested hypothesis and data analysis methodology used in our 2016 study. We acknowledge careful criticism and a constructive dialogue are necessary to progress science and address these issues in this reply.Replying to: Brown et al. Sci. Rep. 8 (2018); https://doi.org/10.1038/s41598-018-28012-w .,2018-06-28 +29322451,Discovering circRNA-microRNA Interactions from CLIP-Seq Data.,"Circular RNAs (circRNAs) represent an abundant group of noncoding RNAs in eukaryotes and are emerging as important regulatory molecules in physiological and pathological processes. However, the precise mechanisms and functions of most of circRNAs remain largely unknown. In this chapter, we describe how to identify circRNA-microRNA interactions from Argonaute (AGO) cross-linking and immunoprecipitation followed by sequencing (CLIP-Seq) and RNA-Seq data using starBase platform and software. We developed three stand-alone computational software, including circSeeker, circAnno, and clipSearch, to identify and annotate circRNAs and their interactions with microRNAs (miRNAs). In addition, we developed interactive Web applications to evaluate circRNA-miRNA interactions identified from CLIP-Seq data and discover the miRNA-sponge circRNAs. starBase platform provides a genome browser to comparatively analyze these interactions at multiple levels. As a means of comprehensively integrating CLIP-Seq and RNA-Seq data, starBase platform is expected to reveal the regulatory networks involving miRNAs and circRNAs. The software and platform are available at http://starbase.sysu.edu.cn/circTools.php.",2018-01-01 +28619013,"MLST genotypes of Campylobacter jejuni isolated from broiler products, dairy cattle and human campylobacteriosis cases in Lithuania.","

Background

Campylobacter (C.) jejuni is the leading cause of human campylobacteriosis worldwide. We performed a molecular epidemiological study to investigate the genetic relationship among C. jejuni strains isolated from human diarrhoeal patients, broiler products and dairy cattle in Lithuania.

Methods

The C. jejuni isolates from human clinical cases, dairy cattle and broiler products were genotyped using multilocus sequence typing (MLST). Allele numbers for each housekeeping gene, sequence type (ST), and clonal complex (CC) were assigned by submitting the DNA sequences to the C. jejuni MLST database ( http://pubmlst.org/campylobacter ). Based on the obtained sequence data of the housekeeping genes a phylogenetic analysis of the strains was performed and a minimum spanning tree (MST) was calculated.

Results

Among the 262 C. jejuni strains (consisting of 43 strains isolated from dairy cattle, 102 strains isolated from broiler products and 117 clinical human C. jejuni strains), 82 different MLST sequence types and 22 clonal complexes were identified. Clonal complexes CC21 and CC353 predominated among the C. jejuni strains. On ST-level, five sequence types (ST-5, ST-21, ST-50, ST-464 and ST-6410) were dominating and these five STs accounted for 35.9% (n = 94) of our isolates. In addition, 51 (19.5%) C. jejuni strains representing 27 (32.9%) STs were reported for the first time in the PubMLST database ( http://pubmlst.org/campylobacter ). The highest Czekanowski index or proportional similarity index (PSI) was calculated for C. jejuni strains isolated from human campylobacteriosis cases and broiler products (PSI = 0.32) suggesting a strong link between broiler strains and human cases. The PSI of dairy cattle and human samples was lower (PSI = 0.11), suggesting a weaker link between bovine strains and human cases. The calculated Simpson's index of all C. jejuni isolates showed a high genetic diversity (D = 0.96).

Conclusion

Our results suggest that broiler products are the most important source of human campylobacteriosis in Lithuania. The study provides information on MLST type distribution and genetic relatedness of C. jejuni strains from humans, broiler products and dairy cattle in Lithuania for the first time, enabling a better understanding of the transmission pathways of C. jejuni in this country.",2017-06-15 +25974630,FuncTree: Functional Analysis and Visualization for Large-Scale Omics Data.,"Exponential growth of high-throughput data and the increasing complexity of omics information have been making processing and interpreting biological data an extremely difficult and daunting task. Here we developed FuncTree (http://bioviz.tokyo/functree), a web-based application for analyzing and visualizing large-scale omics data, including but not limited to genomic, metagenomic, and transcriptomic data. FuncTree allows user to map their omics data onto the ""Functional Tree map"", a predefined circular dendrogram, which represents the hierarchical relationship of all known biological functions defined in the KEGG database. This novel visualization method allows user to overview the broad functionality of their data, thus allowing a more accurate and comprehensive understanding of the omics information. FuncTree provides extensive customization and calculation methods to not only allow user to directly map their omics data to identify the functionality of their data, but also to compute statistically enriched functions by comparing it to other predefined omics data. We have validated FuncTree's analysis and visualization capability by mapping pan-genomic data of three different types of bacterial genera, metagenomic data of the human gut, and transcriptomic data of two different types of human cell expression. All three mapping strongly confirms FuncTree's capability to analyze and visually represent key functional feature of the omics data. We believe that FuncTree's capability to conduct various functional calculations and visualizing the result into a holistic overview of biological function, would make it an integral analysis/visualization tool for extensive omics base research.",2015-05-14 +26393351,"The NIDDK Information Network: A Community Portal for Finding Data, Materials, and Tools for Researchers Studying Diabetes, Digestive, and Kidney Diseases.","The NIDDK Information Network (dkNET; http://dknet.org) was launched to serve the needs of basic and clinical investigators in metabolic, digestive and kidney disease by facilitating access to research resources that advance the mission of the National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK). By research resources, we mean the multitude of data, software tools, materials, services, projects and organizations available to researchers in the public domain. Most of these are accessed via web-accessible databases or web portals, each developed, designed and maintained by numerous different projects, organizations and individuals. While many of the large government funded databases, maintained by agencies such as European Bioinformatics Institute and the National Center for Biotechnology Information, are well known to researchers, many more that have been developed by and for the biomedical research community are unknown or underutilized. At least part of the problem is the nature of dynamic databases, which are considered part of the ""hidden"" web, that is, content that is not easily accessed by search engines. dkNET was created specifically to address the challenge of connecting researchers to research resources via these types of community databases and web portals. dkNET functions as a ""search engine for data"", searching across millions of database records contained in hundreds of biomedical databases developed and maintained by independent projects around the world. A primary focus of dkNET are centers and projects specifically created to provide high quality data and resources to NIDDK researchers. Through the novel data ingest process used in dkNET, additional data sources can easily be incorporated, allowing it to scale with the growth of digital data and the needs of the dkNET community. Here, we provide an overview of the dkNET portal and its functions. We show how dkNET can be used to address a variety of use cases that involve searching for research resources.",2015-09-22 +,"The Garlic and Shallot Core Collection image database of IPK presenting two vegetatively maintained crops in the Federal ex situ genebank for agricultural and horticultural crops at Gatersleben, Germany","Garlic and shallot are important vegetable and spice plants, garlic is also a medicinal crop widely used throughout the world. Both belong to the genus Allium, which has been a main target of the taxonomical research at the IPK for many years. Therefore, the IPK’s living collection of Allium is one of the world’s largest special collections of this genus with garlic and shallot being essential parts of this collection. Furthermore, they are subject to special activities to preserve the material in vitro and in cryopreservation. Both methods help protecting valuable germplasm from abiotic and biotic threats and reduce the maintenance costs in the long term. At present, the garlic collection maintained at the IPK incorporates germplasm of 509 accessions in total including all safety duplicates not offered for exchange. Of shallot, 114 accessions are present as well. Passport data as well as phenotypic data of 159 accessions of garlic, one of great headed garlic and 16 of shallot are included in the Garlic and Shallot Core Collection Database (GSCC). An additional part of the database is a comprehensive collection of images illustrating morphological characteristics of the accessions. Exploration of the information is supported by a web-based application. The GSCC is available at http://www.ipk-gatersleben.de/databases/gscc .",2012-10-01 +28057955,Summaries of Safety Labeling Changes Approved by the FDA: Boxed Warnings Highlights July-September 2016.,"The FDA's MedWatch program safety labeling changes for boxed warnings are compiled quarterly for drugs and therapeutic biologics where important changes have been made to the safety information. Search of Drug Safety Labeling Changes (SLC) database was conducted on October 10, 2016 for date range ""7/1/2016-9/30/2016"", labeling section ""Boxed Warning"". These and other label changes are searchable in the Drug Safety Labeling Changes (SLC) database, where data are available to the public in downloadable and searchable formats. (Drug Safety Labeling Changes are available at: http://www.accessdata.fda.gov/scripts/cder/safetylabelingchanges/?source=govdelivery&utm_medium=email&utm_source=govdelivery.) Boxed warnings are ordinarily used to highlight either: adverse reactions so serious in proportion to the potential benefit from the drug that it is essential that it be considered in assessing the risks and benefits of using the drug; OR serious adverse reactions that can be prevented/reduced in frequency or severity by appropriate use of the drug; OR FDA approved the drug with restrictions to ensure safe use because FDA concluded that the drug can be safely used only if distribution or use is restricted.",2016-12-01 +29297298,2D-EM clustering approach for high-dimensional data through folding feature vectors.,"BACKGROUND:Clustering methods are becoming widely utilized in biomedical research where the volume and complexity of data is rapidly increasing. Unsupervised clustering of patient information can reveal distinct phenotype groups with different underlying mechanism, risk prognosis and treatment response. However, biological datasets are usually characterized by a combination of low sample number and very high dimensionality, something that is not adequately addressed by current algorithms. While the performance of the methods is satisfactory for low dimensional data, increasing number of features results in either deterioration of accuracy or inability to cluster. To tackle these challenges, new methodologies designed specifically for such data are needed. RESULTS:We present 2D-EM, a clustering algorithm approach designed for small sample size and high-dimensional datasets. To employ information corresponding to data distribution and facilitate visualization, the sample is folded into its two-dimension (2D) matrix form (or feature matrix). The maximum likelihood estimate is then estimated using a modified expectation-maximization (EM) algorithm. The 2D-EM methodology was benchmarked against several existing clustering methods using 6 medically-relevant transcriptome datasets. The percentage improvement of Rand score and adjusted Rand index compared to the best performing alternative method is up to 21.9% and 155.6%, respectively. To present the general utility of the 2D-EM method we also employed 2 methylome datasets, again showing superior performance relative to established methods. CONCLUSIONS:The 2D-EM algorithm was able to reproduce the groups in transcriptome and methylome data with high accuracy. This build confidence in the methods ability to uncover novel disease subtypes in new datasets. The design of 2D-EM algorithm enables it to handle a diverse set of challenging biomedical dataset and cluster with higher accuracy than established methods. MATLAB implementation of the tool can be freely accessed online ( http://www.riken.jp/en/research/labs/ims/med_sci_math or http://www.alok-ai-lab.com /).",2017-12-28 +27242037,ArthropodaCyc: a CycADS powered collection of BioCyc databases to analyse and compare metabolism of arthropods. ,"Arthropods interact with humans at different levels with highly beneficial roles (e.g. as pollinators), as well as with a negative impact for example as vectors of human or animal diseases, or as agricultural pests. Several arthropod genomes are available at present and many others will be sequenced in the near future in the context of the i5K initiative, offering opportunities for reconstructing, modelling and comparing their metabolic networks. In-depth analysis of these genomic data through metabolism reconstruction is expected to contribute to a better understanding of the biology of arthropods, thereby allowing the development of new strategies to control harmful species. In this context, we present here ArthropodaCyc, a dedicated BioCyc collection of databases using the Cyc annotation database system (CycADS), allowing researchers to perform reliable metabolism comparisons of fully sequenced arthropods genomes. Since the annotation quality is a key factor when performing such global genome comparisons, all proteins from the genomes included in the ArthropodaCyc database were re-annotated using several annotation tools and orthology information. All functional/domain annotation results and their sources were integrated in the databases for user access. Currently, ArthropodaCyc offers a centralized repository of metabolic pathways, protein sequence domains, Gene Ontology annotations as well as evolutionary information for 28 arthropod species. Such database collection allows metabolism analysis both with integrated tools and through extraction of data in formats suitable for systems biology studies.Database URL: http://arthropodacyc.cycadsys.org/.",2016-05-30 +24891832,The Encyclopedia of Life v2: Providing Global Access to Knowledge About Life on Earth.,"The Encyclopedia of Life (EOL, http://eol.org) aims to provide unprecedented global access to a broad range of information about life on Earth. It currently contains 3.5 million distinct pages for taxa and provides content for 1.3 million of those pages. The content is primarily contributed by EOL content partners (providers) that have a more limited geographic, taxonomic or topical scope. EOL aggregates these data and automatically integrates them based on associated scientific names and other classification information. EOL also provides interfaces for curation and direct content addition. All materials in EOL are either in the public domain or licensed under a Creative Commons license. In addition to the web interface, EOL is also accessible through an Application Programming Interface. In this paper, we review recent developments added for Version 2 of the web site and subsequent releases through Version 2.2, which have made EOL more engaging, personal, accessible and internationalizable. We outline the core features and technical architecture of the system. We summarize milestones achieved so far by EOL to present results of the current system implementation and establish benchmarks upon which to judge future improvements. We have shown that it is possible to successfully integrate large amounts of descriptive biodiversity data from diverse sources into a robust, standards-based, dynamic, and scalable infrastructure. Increasing global participation and the emergence of EOL-powered applications demonstrate that EOL is becoming a significant resource for anyone interested in biological diversity.",2014-04-29 +26582918,ClinVar: public archive of interpretations of clinically relevant variants.,"ClinVar (https://www.ncbi.nlm.nih.gov/clinvar/) at the National Center for Biotechnology Information (NCBI) is a freely available archive for interpretations of clinical significance of variants for reported conditions. The database includes germline and somatic variants of any size, type or genomic location. Interpretations are submitted by clinical testing laboratories, research laboratories, locus-specific databases, OMIM®, GeneReviews™, UniProt, expert panels and practice guidelines. In NCBI's Variation submission portal, submitters upload batch submissions or use the Submission Wizard for single submissions. Each submitted interpretation is assigned an accession number prefixed with SCV. ClinVar staff review validation reports with data types such as HGVS (Human Genome Variation Society) expressions; however, clinical significance is reported directly from submitters. Interpretations are aggregated by variant-condition combination and assigned an accession number prefixed with RCV. Clinical significance is calculated for the aggregate record, indicating consensus or conflict in the submitted interpretations. ClinVar uses data standards, such as HGVS nomenclature for variants and MedGen identifiers for conditions. The data are available on the web as variant-specific views; the entire data set can be downloaded via ftp. Programmatic access for ClinVar records is available through NCBI's E-utilities. Future development includes providing a variant-centric XML archive and a web page for details of SCV submissions.",2015-11-17 +31288850,SLCO4C1 promoter methylation is a potential biomarker for prognosis associated with biochemical recurrence-free survival after radical prostatectomy.,"

Background

Prostate cancer (PC) is a commonly diagnosed malignancy in males, especially in the western hemisphere. The extensive use of multiple biomarkers plays an important role in the diagnosis and prognosis of PC. However, the accuracy of biomarkers for PC prognosis needs to be urgently improved. This study aimed to identify a novel prognostic biomarker for PC.

Materials and methods

Differentially methylated CpG sites were identified from the GSE76938 dataset ( https://www.ncbi.nlm.nih.gov/geo/ ) using R software version 3.1.4. Four significant CpG sites on the SLCO4C1 gene were found to be closely associated with prognosis in PC. Data downloaded from The Cancer Genome Atlas (TCGA) were used for validation. Co-expression and functional enrichment analyses were used to explore the roles of SLCO4C1 in molecular functions, biological processes and cellular components. Total RNA extraction and qRT-PCR were used to reveal the difference in SLCO4C1 expression between tumour and normal tissues. Bisulfite amplicon sequencing (BSAS) was used to identify methylation levels at the CpG sites.

Results

In the GSE76938 cohort, 10,206 CpG sites were identified to be differentially methylated in tumour versus normal prostate tissues. Among the CpG sites, four sites (cg06480736, cg19774478, cg19788741 and cg22149516) located in the promotor region (TSS200-1500) of SLCO4C1 were found to be significantly hypermethylated in tumour tissues. The results were validated in an independent dataset (TCGA PRAD cohort). In the cohort from TCGA, SLCO4C1 expression was negatively correlated with methylation levels at the four sites. The results of qRT-PCR validated that tumour tissues had a relatively lower expression of SLCO4C1. Bisulfite amplicon sequencing (BSAS) further confirmed a higher methylation level at the SLCO4C1 promoter in tumour tissues. SLCO4C1 (cg06480736, cg19774478, cg19788741 and cg22149516) was identified as a significant promising biomarker for biochemical recurrence-free survival in Kaplan-Meier analysis (P < 0.01) and univariate Cox proportional hazards analysis: cg06480736 (HR 15.914, P < 0.001), cg19774478 (HR 9.001, P < 0.001), cg19788741 (HR 10.759, P = 0.003) and cg22149516 (HR 17.144, P = 0.006). However, three sites, namely, cg06480736 (HR 1.809, P = 0.049), cg19774478 (HR 1.903, P = 0.041) and cg22149516 (HR 2.316, P = 0.008), were confirmed in multivariate analysis.

Conclusions

SLCO4C1 promoter methylation, including that at three CpG sites, namely, cg06480736, cg19774478 and cg22149516, is a potential biomarker for risk stratification and might offer significantly relevant prognostic information for PC patients after radical prostatectomy.",2019-07-09 +28334349,DeepBlueR: large-scale epigenomic analysis in R.,"

Motivation

While large amounts of epigenomic data are publicly available, their retrieval in a form suitable for downstream analysis is a bottleneck in current research. The DeepBlue Epigenomic Data Server provides a powerful interface and API for filtering, transforming, aggregating and downloading data from several epigenomic consortia.

Results

To make public epigenomic data conveniently available for analysis in R, we developed an R/Bioconductor package that connects to the DeepBlue Epigenomic Data Server, enabling users to quickly gather and transform epigenomic data from selected experiments for analysis in the Bioconductor ecosystem.

Availability and implementation

http://deepblue.mpi-inf.mpg.de/R .

Requirements

R 3.3, Bioconductor 3.4.

Contact

felipe.albrecht@mpi-inf.mpg.de or markus.list@mpi-inf.mpg.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +26590263,Lynx: a knowledge base and an analytical workbench for integrative medicine.,"Lynx (http://lynx.ci.uchicago.edu) is a web-based database and a knowledge extraction engine. It supports annotation and analysis of high-throughput experimental data and generation of weighted hypotheses regarding genes and molecular mechanisms contributing to human phenotypes or conditions of interest. Since the last release, the Lynx knowledge base (LynxKB) has been periodically updated with the latest versions of the existing databases and supplemented with additional information from public databases. These additions have enriched the data annotations provided by Lynx and improved the performance of Lynx analytical tools. Moreover, the Lynx analytical workbench has been supplemented with new tools for reconstruction of co-expression networks and feature-and-network-based prioritization of genetic factors and molecular mechanisms. These developments facilitate the extraction of meaningful knowledge from experimental data and LynxKB. The Service Oriented Architecture provides public access to LynxKB and its analytical tools via user-friendly web services and interfaces.",2015-11-20 +29762646,Correcting mistakes in predicting distributions.,"Motivation:Many applications monitor predictions of a whole range of features for biological datasets, e.g. the fraction of secreted human proteins in the human proteome. Results and error estimates are typically derived from publications. Results:Here, we present a simple, alternative approximation that uses performance estimates of methods to error-correct the predicted distributions. This approximation uses the confusion matrix (TP true positives, TN true negatives, FP false positives and FN false negatives) describing the performance of the prediction tool for correction. As proof-of-principle, the correction was applied to a two-class (membrane/not) and to a seven-class (localization) prediction. Availability and implementation:Datasets and a simple JavaScript tool available freely for all users at http://www.rostlab.org/services/distributions. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +26573482,"WIDDE: a Web-Interfaced next generation database for genetic diversity exploration, with a first application in cattle.","

Background

The advent and democratization of next generation sequencing and genotyping technologies lead to a huge amount of data for the characterization of population genetic diversity in model and non model-species. However, efficient storage, management, cross-analyzing and exploration of such dense genotyping datasets remain challenging. This is particularly true for the bovine species where many SNP datasets have been generated in various cattle populations with different genotyping tools.

Description

We developed WIDDE, a Web-Interfaced Next Generation Database that stands as a generic tool applicable to a wide range of species and marker types ( http://widde.toulouse.inra.fr). As a first illustration, we hereby describe its first version dedicated to cattle biodiversity, which includes a large and evolving cattle genotyping dataset for over 750,000 SNPs available on 129 (89 public) different cattle populations representative of the world-wide bovine genetic diversity and on 7 outgroup bovid species. This version proposes an optional marker and individual filtering step, an export of genotyping data in different popular formats, and an exploration of genetic diversity through a principal component analysis. Users can also explore their own genotyping data together with data from WIDDE, assign their samples to WIDDE populations based on distance assignment method and supervised clustering, and estimate their ancestry composition relative to the populations represented in the database.

Conclusion

The cattle version of WIDDE represents to our knowledge the first database dedicated to cattle biodiversity and SNP genotyping data that will be very useful for researchers interested in this field. As a generic tool applicable to a wide range of marker types, WIDDE is overall intended to the genetic diversity exploration of any species and will be extended to other species shortly. The structure makes it easy to include additional output formats and new tools dedicated to genetic diversity exploration.",2015-11-14 +25233092,LncRBase: an enriched resource for lncRNA information.,"Long noncoding RNAs (lncRNAs) are noncoding transcripts longer than 200 nucleotides, which show evidence of pervasive transcription and participate in a plethora of cellular regulatory processes. Although several noncoding transcripts have been functionally annotated as lncRNAs within the genome, not all have been proven to fulfill the criteria for a functional regulator and further analyses have to be done in order to include them in a functional cohort. LncRNAs are being classified and reclassified in an ongoing annotation process, and the challenge is fraught with ambiguity, as newer evidences of their biogenesis and functional implication come into light. In our effort to understand the complexity of this still enigmatic biomolecule, we have developed a new database entitled ""LncRBase"" where we have classified and characterized lncRNAs in human and mouse. It is an extensive resource of human and mouse lncRNA transcripts belonging to fourteen distinct subtypes, with a total of 83,201 entries for mouse and 133,361 entries for human: among these, we have newly annotated 8,507 mouse and 14,813 human non coding RNA transcripts (from UCSC and H-InvDB 8.0) as lncRNAs. We have especially considered protein coding gene loci which act as hosts for non coding transcripts. LncRBase includes different lncRNA transcript variants of protein coding genes within LncRBase. LncRBase provides information about the genomic context of different lncRNA subtypes, their interaction with small non coding RNAs (ncRNAs) viz. piwi interacting RNAs (piRNAs) and microRNAs (miRNAs) and their mode of regulation, via association with diverse other genomic elements. Adequate knowledge about genomic origin and molecular features of lncRNAs is essential to understand their functional and behavioral complexities. Overall, LncRBase provides a thorough study on various aspects of lncRNA origin and function and a user-friendly interface to search for lncRNA information. LncRBase is available at http://bicresources.jcbose.ac.in/zhumur/lncrbase.",2014-09-18 +30097004,miREM: an expectation-maximization approach for prioritizing miRNAs associated with gene-set.,"

Background

The knowledge of miRNAs regulating the expression of sets of mRNAs has led to novel insights into numerous and diverse cellular mechanisms. While a single miRNA may regulate many genes, one gene can be regulated by multiple miRNAs, presenting a complex relationship to model for accurate predictions.

Results

Here, we introduce miREM, a program that couples an expectation-maximization (EM) algorithm to the common approach of hypergeometric probability (HP), which improves the prediction and prioritization of miRNAs from gene-sets of interest. miREM has been made available through a web-server ( https://bioinfo-csi.nus.edu.sg/mirem2/ ) that can be accessed through an intuitive graphical user interface. The program incorporates a large compendium of human/mouse miRNA-target prediction databases to enhance prediction. Users may upload their genes of interest in various formats as an input and select whether to consider non-conserved miRNAs, amongst filtering options. Results are reported in a rich graphical interface that allows users to: (i) prioritize predicted miRNAs through a scatterplot of HP p-values and EM scores; (ii) visualize the predicted miRNAs and corresponding genes through a heatmap; and (iii) identify and filter homologous or duplicated predictions by clustering them according to their seed sequences.

Conclusion

We tested miREM using RNAseq datasets from two single ""spiked"" knock-in miRNA experiments and two double knock-out miRNA experiments. miREM predicted these manipulated miRNAs as having high EM scores from the gene set signatures (i.e. top predictions for single knock-in and double knock-out miRNA experiments). Finally, we have demonstrated that miREM predictions are either similar or better than results provided by existing programs.",2018-08-10 +30097542,Accelerated RNA secondary structure design using preselected sequences for helices and loops.,"Nucleic acids can be designed to be nano-machines, pharmaceuticals, or probes. RNA secondary structures can form the basis of self-assembling nanostructures. There are only four natural RNA bases, therefore it can be difficult to design sequences that fold to a single, specified structure because many other structures are often possible for a given sequence. One approach taken by state-of-the-art sequence design methods is to select sequences that fold to the specified structure using stochastic, iterative refinement. The goal of this work is to accelerate design. Many existing iterative methods select and refine sequences one base pair and one unpaired nucleotide at a time. Here, the hypothesis that sequences can be preselected in order to accelerate design was tested. To this aim, a database was built of helix sequences that demonstrate thermodynamic features found in natural sequences and that also have little tendency to cross-hybridize. Additionally, a database was assembled of RNA loop sequences with low helix-formation propensity and little tendency to cross-hybridize with either the helices or other loops. These databases of preselected sequences accelerate the selection of sequences that fold with minimal ensemble defect by replacing some of the trial and error of current refinement approaches. When using the database of preselected sequences as compared to randomly chosen sequences, sequences for natural structures are designed 36 times faster, and random structures are designed six times faster. The sequences selected with the aid of the database have similar ensemble defect as those sequences selected at random. The sequence database is part of RNAstructure package at http://rna.urmc.rochester.edu/RNAstructure.html.",2018-08-10 +30055037,LAMA2 gene mutation update: Toward a more comprehensive picture of the laminin-α2 variome and its related phenotypes.,"Congenital muscular dystrophy type 1A (MDC1A) is one of the main subtypes of early-onset muscle disease, caused by disease-associated variants in the laminin-α2 (LAMA2) gene. MDC1A usually presents as a severe neonatal hypotonia and failure to thrive. Muscle weakness compromises normal motor development, leading to the inability to sit unsupported or to walk independently. The phenotype associated with LAMA2 defects has been expanded to include milder and atypical cases, being now collectively known as LAMA2-related muscular dystrophies (LAMA2-MD). Through an international multicenter collaborative effort, 61 new LAMA2 disease-associated variants were identified in 86 patients, representing the largest number of patients and new disease-causing variants in a single report. The collaborative variant collection was supported by the LOVD-powered LAMA2 gene variant database (https://www.LOVD.nl/LAMA2), updated as part of this work. As of December 2017, the database contains 486 unique LAMA2 variants (309 disease-associated), obtained from direct submissions and literature reports. Database content was systematically reviewed and further insights concerning LAMA2-MD are presented. We focus on the impact of missense changes, especially the c.2461A > C (p.Thr821Pro) variant and its association with late-onset LAMA2-MD. Finally, we report diagnostically challenging cases, highlighting the relevance of modern genetic analysis in the characterization of clinically heterogeneous muscle diseases.",2018-08-10 +27029637,An integrated analysis tool for analyzing hybridization intensities and genotypes using new-generation population-optimized human arrays.,"

Background

Affymetrix Axiom single nucleotide polymorphism (SNP) arrays provide a cost-effective, high-density, and high-throughput genotyping solution for population-optimized analyses. However, no public software is available for the integrated genomic analysis of hybridization intensities and genotypes for this new-generation population-optimized genotyping platform.

Results

A set of statistical methods was developed for an integrated analysis of allele frequency (AF), allelic imbalance (AI), loss of heterozygosity (LOH), long contiguous stretch of homozygosity (LCSH), and copy number variation or alteration (CNV/CNA) on the basis of SNP probe hybridization intensities and genotypes. This study analyzed 3,236 samples that were genotyped using different SNP platforms. The proposed AF adjustment method considerably increased the accuracy of AF estimation. The proposed quick circular binary segmentation algorithm for segmenting copy number reduced the computation time of the original segmentation method by 30-67 %. The proposed CNV/CNA detection, which integrates AI and LOH/LCSH detection, had a promising true positive rate and well-controlled false positive rate in simulation studies. Moreover, our real-time quantitative polymerase chain reaction experiments successfully validated the CNVs/CNAs that were identified in the Axiom data analyses using the proposed methods; some of the validated CNVs/CNAs were not detected in the Affymetrix Array 6.0 data analysis using the Affymetrix Genotyping Console. All the analysis functions are packaged into the ALICE (AF/LOH/LCSH/AI/CNV/CNA Enterprise) software.

Conclusions

ALICE and the used genomic reference databases, which can be downloaded from http://hcyang.stat.sinica.edu.tw/software/ALICE.html , are useful resources for analyzing genomic data from the Axiom and other SNP arrays.",2016-03-31 +28985876,The readability of psychosocial wellness patient resources: improving surgical outcomes.,"

Background

Patient education is increasingly accessed with online resources and is essential for patient satisfaction and clinical outcomes. The average American adult reads at a seventh grade level, and the National Institute of Health (NIH) and the American Medical Association (AMA) recommend that information be written at a sixth-grade reading level. Health literacy plays an important role in the disease course and outcomes of all patients, including those with depression and likely other psychiatric disorders, although this is an area in need of further study. The purpose of this study was to collect and analyze written, online mental health resources on the Veterans Health Administration (VA) website, and other websites, using readability assessment instruments.

Methods

An internet search was performed to identify written patient education information regarding mental health from the VA (the VA Mental Health Website) and top-rated psychiatric hospitals. Seven mental health topics were included in the analysis: generalized anxiety disorder, bipolar, major depressive disorder, posttraumatic stress disorder, schizophrenia, substance abuse, and suicide. Readability analyses were performed using the Gunning Fog Index, the Flesch-Kincaid Grade Level, the Coleman-Liau Index, the SMOG Readability Formula, and the Automated Readability Index. These scores were then combined into a Readability Consensus score. A two-tailed t-test was used to compare the mean values, and statistical significance was set at P < 0.05.

Results

Twelve of the best hospitals for psychiatry 2016-2017 were identified. Nine had educational material. Six of the nine cited the same resource, The StayWell Company, LLC (StayWell Company, LLC; Yardley, PA), for at least one of the mental health topics analyzed. The VA mental health website (http://www.mentalhealth.va.gov) had a significantly higher readability consensus than six of the top psychiatric hospitals (P < 0.05, P = 0.0067, P = 0.019, P = 0.041, P = 0.0093, P = 0.0054, and P = 0.0093). The overall average readability consensus for mental health information on all websites analyzed was 9.52.

Conclusions

Online resources for mental health disorders are more complex than recommended by the NIH and AMA. Efforts to improve readability of mental health and psychosocial wellness resources could benefit patient understanding and outcomes, especially in patients with lower literacy. Surgical outcomes are correlated with patient mental health and psychosocial wellness and thus can be improved with more appropriate levels of readability of psychosocial wellness resources.",2017-06-10 +30269551,nTMS guidance of awake surgery for highly eloquent gliomas.,"Navigated transcranial magnetic stimulation (nTMS) allows for preoperative mapping for eloquent gliomas. Besides surgical planning, it also guides intraoperative stimulation mapping. The authors' routine includes preoperative nTMS plus nTMS-based tractography for motor and language to consult patients, plan surgery, craniotomy, and guide cortical and subcortical stimulation. Here, the authors present this routine in a 48-year-old woman with a glioma of the left middle and superior frontal gyrus reaching the precentral gyrus and superior longitudinal fascicle. Gross-total resection via awake surgery was achieved without deficit. The nTMS data and nTMS-based tractography augment eloquent glioma management far beyond its current application. The video can be found here: https://youtu.be/h4ldgMXL1ys .",2018-10-01 +23046413,PAGED: a pathway and gene-set enrichment database to enable molecular phenotype discoveries.,"

Background

Over the past decade, pathway and gene-set enrichment analysis has evolved into the study of high-throughput functional genomics. Owing to poorly annotated and incomplete pathway data, researchers have begun to combine pathway and gene-set enrichment analysis as well as network module-based approaches to identify crucial relationships between different molecular mechanisms.

Methods

To meet the new challenge of molecular phenotype discovery, in this work, we have developed an integrated online database, the Pathway And Gene Enrichment Database (PAGED), to enable comprehensive searches for disease-specific pathways, gene signatures, microRNA targets, and network modules by integrating gene-set-based prior knowledge as molecular patterns from multiple levels: the genome, transcriptome, post-transcriptome, and proteome.

Results

The online database we developed, PAGED http://bio.informatics.iupui.edu/PAGED is by far the most comprehensive public compilation of gene sets. In its current release, PAGED contains a total of 25,242 gene sets, 61,413 genes, 20 organisms, and 1,275,560 records from five major categories. Beyond its size, the advantage of PAGED lies in the explorations of relationships between gene sets as gene-set association networks (GSANs). Using colorectal cancer expression data analysis as a case study, we demonstrate how to query this database resource to discover crucial pathways, gene signatures, and gene network modules specific to colorectal cancer functional genomics.

Conclusions

This integrated online database lays a foundation for developing tools beyond third-generation pathway analysis approaches on for discovering molecular phenotypes, especially for disease-associated pathway/gene-set enrichment analysis.",2012-09-11 +30169574,Improving the prediction of protein-nucleic acids binding residues via multiple sequence profiles and the consensus of complementary methods.,"

Motivation

The interactions between protein and nucleic acids play a key role in various biological processes. Accurate recognition of the residues that bind nucleic acids can facilitate the study of uncharacterized protein-nucleic acids interactions. The accuracy of existing nucleic acids-binding residues prediction methods is relatively low.

Results

In this work, we introduce NucBind, a novel method for the prediction of nucleic acids-binding residues. NucBind combines the predictions from a support vector machine-based ab-initio method SVMnuc and a template-based method COACH-D. SVMnuc was trained with features from three complementary sequence profiles. COACH-D predicts the binding residues based on homologous templates identified from a nucleic acids-binding library. The proposed methods were assessed and compared with other peering methods on three benchmark datasets. Experimental results show that NucBind consistently outperforms other state-of-the-art methods. Though with higher accuracy, similar to many other ab-initio methods, cross prediction between DNA and RNA-binding residues was also observed in SVMnuc and NucBind. We attribute the success of NucBind to two folds. The first is the utilization of improved features extracted from three complementary sequence profiles in SVMnuc. The second is the combination of two complementary methods: the ab-initio method SVMnuc and the template-based method COACH-D.

Availability and implementation

http://yanglab.nankai.edu.cn/NucBind.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-03-01 +27867804,DFAST and DAGA: web-based integrated genome annotation tools and resources.,"Quality assurance and correct taxonomic affiliation of data submitted to public sequence databases have been an everlasting problem. The DDBJ Fast Annotation and Submission Tool (DFAST) is a newly developed genome annotation pipeline with quality and taxonomy assessment tools. To enable annotation of ready-to-submit quality, we also constructed curated reference protein databases tailored for lactic acid bacteria. DFAST was developed so that all the procedures required for DDBJ submission could be done seamlessly online. The online workspace would be especially useful for users not familiar with bioinformatics skills. In addition, we have developed a genome repository, DFAST Archive of Genome Annotation (DAGA), which currently includes 1,421 genomes covering 179 species and 18 subspecies of two genera, Lactobacillus and Pediococcus, obtained from both DDBJ/ENA/GenBank and Sequence Read Archive (SRA). All the genomes deposited in DAGA were annotated consistently and assessed using DFAST. To assess the taxonomic position based on genomic sequence information, we used the average nucleotide identity (ANI), which showed high discriminative power to determine whether two given genomes belong to the same species. We corrected mislabeled or misidentified genomes in the public database and deposited the curated information in DAGA. The repository will improve the accessibility and reusability of genome resources for lactic acid bacteria. By exploiting the data deposited in DAGA, we found intraspecific subgroups in Lactobacillus gasseri and Lactobacillus jensenii, whose variation between subgroups is larger than the well-accepted ANI threshold of 95% to differentiate species. DFAST and DAGA are freely accessible at https://dfast.nig.ac.jp.",2016-07-14 +24608173,NCG 4.0: the network of cancer genes in the era of massive mutational screenings of cancer genomes.,"NCG 4.0 is the latest update of the Network of Cancer Genes, a web-based repository of systems-level properties of cancer genes. In its current version, the database collects information on 537 known (i.e. experimentally supported) and 1463 candidate (i.e. inferred using statistical methods) cancer genes. Candidate cancer genes derive from the manual revision of 67 original publications describing the mutational screening of 3460 human exomes and genomes in 23 different cancer types. For all 2000 cancer genes, duplicability, evolutionary origin, expression, functional annotation, interaction network with other human proteins and with microRNAs are reported. In addition to providing a substantial update of cancer-related information, NCG 4.0 also introduces two new features. The first is the annotation of possible false-positive cancer drivers, defined as candidate cancer genes inferred from large-scale screenings whose association with cancer is likely to be spurious. The second is the description of the systems-level properties of 64 human microRNAs that are causally involved in cancer progression (oncomiRs). Owing to the manual revision of all information, NCG 4.0 constitutes a complete and reliable resource on human coding and non-coding genes whose deregulation drives cancer onset and/or progression. NCG 4.0 can also be downloaded as a free application for Android smart phones. Database URL: http://bio.ieo.eu/ncg/.",2014-03-07 +28862395,Phelan-McDermid syndrome data network: Integrating patient reported outcomes with clinical notes and curated genetic reports.,"The heterogeneity of patient phenotype data are an impediment to the research into the origins and progression of neuropsychiatric disorders. This difficulty is compounded in the case of rare disorders such as Phelan-McDermid Syndrome (PMS) by the paucity of patient clinical data. PMS is a rare syndromic genetic cause of autism and intellectual deficiency. In this paper, we describe the Phelan-McDermid Syndrome Data Network (PMS_DN), a platform that facilitates research into phenotype-genotype correlation and progression of PMS by: a) integrating knowledge of patient phenotypes extracted from Patient Reported Outcomes (PRO) data and clinical notes-two heterogeneous, underutilized sources of knowledge about patient phenotypes-with curated genetic information from the same patient cohort and b) making this integrated knowledge, along with a suite of statistical tools, available free of charge to authorized investigators on a Web portal https://pmsdn.hms.harvard.edu. PMS_DN is a Patient Centric Outcomes Research Initiative (PCORI) where patients and their families are involved in all aspects of the management of patient data in driving research into PMS. To foster collaborative research, PMS_DN also makes patient aggregates from this knowledge available to authorized investigators using distributed research networks such as the PCORnet PopMedNet. PMS_DN is hosted on a scalable cloud based environment and complies with all patient data privacy regulations. As of October 31, 2016, PMS_DN integrates high-quality knowledge extracted from the clinical notes of 112 patients and curated genetic reports of 176 patients with preprocessed PRO data from 415 patients.",2017-09-01 +26567549,GEneSTATION 1.0: a synthetic resource of diverse evolutionary and functional genomic data for studying the evolution of pregnancy-associated tissues and phenotypes.,"Mammalian gestation and pregnancy are fast evolving processes that involve the interaction of the fetal, maternal and paternal genomes. Version 1.0 of the GEneSTATION database (http://genestation.org) integrates diverse types of omics data across mammals to advance understanding of the genetic basis of gestation and pregnancy-associated phenotypes and to accelerate the translation of discoveries from model organisms to humans. GEneSTATION is built using tools from the Generic Model Organism Database project, including the biology-aware database CHADO, new tools for rapid data integration, and algorithms that streamline synthesis and user access. GEneSTATION contains curated life history information on pregnancy and reproduction from 23 high-quality mammalian genomes. For every human gene, GEneSTATION contains diverse evolutionary (e.g. gene age, population genetic and molecular evolutionary statistics), organismal (e.g. tissue-specific gene and protein expression, differential gene expression, disease phenotype), and molecular data types (e.g. Gene Ontology Annotation, protein interactions), as well as links to many general (e.g. Entrez, PubMed) and pregnancy disease-specific (e.g. PTBgene, dbPTB) databases. By facilitating the synthesis of diverse functional and evolutionary data in pregnancy-associated tissues and phenotypes and enabling their quick, intuitive, accurate and customized meta-analysis, GEneSTATION provides a novel platform for comprehensive investigation of the function and evolution of mammalian pregnancy.",2015-11-14 +28334390,SeqArray-a storage-efficient high-performance data format for WGS variant calls.,"

Motivation

Whole-genome sequencing (WGS) data are being generated at an unprecedented rate. Analysis of WGS data requires a flexible data format to store the different types of DNA variation. Variant call format (VCF) is a general text-based format developed to store variant genotypes and their annotations. However, VCF files are large and data retrieval is relatively slow. Here we introduce a new WGS variant data format implemented in the R/Bioconductor package 'SeqArray' for storing variant calls in an array-oriented manner which provides the same capabilities as VCF, but with multiple high compression options and data access using high-performance parallel computing.

Results

Benchmarks using 1000 Genomes Phase 3 data show file sizes are 14.0 Gb (VCF), 12.3 Gb (BCF, binary VCF), 3.5 Gb (BGT) and 2.6 Gb (SeqArray) respectively. Reading genotypes in the SeqArray package are two to three times faster compared with the htslib C library using BCF files. For the allele frequency calculation, the implementation in the SeqArray package is over 5 times faster than PLINK v1.9 with VCF and BCF files, and over 16 times faster than vcftools. When used in conjunction with R/Bioconductor packages, the SeqArray package provides users a flexible, feature-rich, high-performance programming environment for analysis of WGS variant data.

Availability and implementation

http://www.bioconductor.org/packages/SeqArray.

Contact

zhengx@u.washington.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +30096498,Development of Machine Learning Algorithms for Prediction of 5-Year Spinal Chordoma Survival.,"

Background

Chordomas are locally invasive slow-growing tumors that are difficult to study because of the rarity of the tumors and the lack of significant volumes of patients with longitudinal follow-up. As such, there are currently no machine learning studies in the chordoma literature. The purpose of this study was to develop machine learning models for survival prediction and deploy them as open access web applications as a proof of concept for machine learning in rare nervous system lesions.

Methods

The National Cancer Institute's Surveillance, Epidemiology, and End Results program database was used to identify adult patients diagnosed with spinal chordoma between 1995 and 2010. Four machine learning models were used to predict 5-year survival for spinal chordoma and assessed by discrimination, calibration, and overall performance.

Results

The 5-year overall survival for 265 patients with spinal chordoma was 67.5%. Variables used for prediction were age at diagnosis, tumor size, tumor location, extent of tumor invasion, and extent of surgery. For 5-year survival prediction, the Bayes Point Machine achieved the best performance with a c statistic of 0.80, calibration slope of 1.01, calibration intercept of 0.03, and Brier score of 0.16. This model for 5-year mortality prediction was incorporated into an open access application and can be found online (https://sorg-apps.shinyapps.io/chordoma/).

Conclusions

This analysis of patients with spinal chordoma demonstrated that machine learning models can be developed for survival prediction in rare pathologies and have the potential to serve as the basis for creation of decision support tools in the future.",2018-08-08 +30592774,Novel dual-action prodrug triggers apoptosis in glioblastoma cells by releasing a glutathione quencher and lysine-specific histone demethylase 1A inhibitor.,"Targeting epigenetic mechanisms has shown promise against several cancers but has so far been unsuccessful against glioblastoma (GBM). Altered histone 3 lysine 4 methylation and increased lysine-specific histone demethylase 1A (LSD1) expression in GBM tumours nonetheless suggest that epigenetic mechanisms are involved in GBM. We engineered a dual-action prodrug, which is activated by the high hydrogen peroxide levels associated with GBM cells. This quinone methide phenylaminecyclopropane prodrug releases the LSD1 inhibitor 2-phenylcyclopropylamine with the glutathione scavenger para-quinone methide to trigger apoptosis in GBM cells. Quinone methide phenylaminocyclopropane impaired GBM cell behaviours in two-dimensional and three-dimensional assays, and triggered cell apoptosis in several primary and immortal GBM cell cultures. These results support our double-hit hypothesis of potentially targeting LSD1 and quenching glutathione, in order to impair and kill GBM cells but not healthy astrocytes. Our data suggest this strategy is effective at selectively targeting GBM and potentially other types of cancers. OPEN SCIENCE BADGES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/.",2019-02-03 +30423061,Higher-order molecular organization as a source of biological function.,"

Motivation

Molecular interactions have widely been modelled as networks. The local wiring patterns around molecules in molecular networks are linked with their biological functions. However, networks model only pairwise interactions between molecules and cannot explicitly and directly capture the higher-order molecular organization, such as protein complexes and pathways. Hence, we ask if hypergraphs (hypernetworks), that directly capture entire complexes and pathways along with protein-protein interactions (PPIs), carry additional functional information beyond what can be uncovered from networks of pairwise molecular interactions. The mathematical formalism of a hypergraph has long been known, but not often used in studying molecular networks due to the lack of sophisticated algorithms for mining the underlying biological information hidden in the wiring patterns of molecular systems modelled as hypernetworks.

Results

We propose a new, multi-scale, protein interaction hypernetwork model that utilizes hypergraphs to capture different scales of protein organization, including PPIs, protein complexes and pathways. In analogy to graphlets, we introduce hypergraphlets, small, connected, non-isomorphic, induced sub-hypergraphs of a hypergraph, to quantify the local wiring patterns of these multi-scale molecular hypergraphs and to mine them for new biological information. We apply them to model the multi-scale protein networks of bakers yeast and human and show that the higher-order molecular organization captured by these hypergraphs is strongly related to the underlying biology. Importantly, we demonstrate that our new models and data mining tools reveal different, but complementary biological information compared with classical PPI networks. We apply our hypergraphlets to successfully predict biological functions of uncharacterized proteins.

Availability and implementation

Code and data are available online at http://www0.cs.ucl.ac.uk/staff/natasa/hypergraphlets.",2018-09-01 +28783153,PECAN: library-free peptide detection for data-independent acquisition tandem mass spectrometry data.,"Data-independent acquisition (DIA) is an emerging mass spectrometry (MS)-based technique for unbiased and reproducible measurement of protein mixtures. DIA tandem mass spectrometry spectra are often highly multiplexed, containing product ions from multiple cofragmenting precursors. Detecting peptides directly from DIA data is therefore challenging; most DIA data analyses require spectral libraries. Here we present PECAN (http://pecan.maccosslab.org), a library-free, peptide-centric tool that robustly and accurately detects peptides directly from DIA data. PECAN reports evidence of detection based on product ion scoring, which enables detection of low-abundance analytes with poor precursor ion signal. We demonstrate the chromatographic peak picking accuracy and peptide detection capability of PECAN, and we further validate its detection with data-dependent acquisition and targeted analyses. Lastly, we used PECAN to build a plasma proteome library from DIA data and to query known sequence variants.",2017-08-07 +27478368,PIMADb: A Database of Protein-Protein Interactions in Huge Macromolecular Assemblies.,"Protein-protein interactions play a very important role in the process of cellular functionality. Intricate details about the interactions between the proteins in a macromolecular assembly are important to understand the function and significance of protein complexes. We are reporting about a database of protein-protein interactions in huge macromolecular assemblies (PIMADb) that records the intrinsic details of 189,532 interchain interactions in 40,049 complexes from the Protein Data Bank. These details include the results of the quantification and analysis of all the interactions in the complex. The availability of interprotomer interaction networks can enable the design of point mutation experiments. PIMADb can be accessed from the URL: http://caps.ncbs.res.in/pimadb.",2016-07-19 +30519231,BAC-BROWSER: The Tool for Visualization and Analysis of Prokaryotic Genomes.,"Prokaryotes are actively studied objects in the scope of genomic regulation. Microbiologists need special tools for complex analysis of data to study and identification of regulatory mechanism in bacteria and archaea. We developed a tool BAC-BROWSER, specifically for visualization and analysis of small prokaryotic genomes. BAC-BROWSER provides tools for different types of analysis to study a wide set of regulatory mechanisms of prokaryotes: -transcriptional regulation by transcription factors (TFs), analysis of TFs, their targets, and binding sites.-other regulatory motifs, promoters, terminators and ribosome binding sites-transcriptional regulation by variation of operon structure, alternative starts or ends of transcription.-non-coding RNAs, antisense RNAs-RNA secondary structure, riboswitches-GC content, GC skew, codon usage BAC-browser incorporated free programs accelerating the verification of obtained results: primer design and oligocalculator, vector visualization, the tool for synthetic gene construction. The program is designed for Windows operating system and freely available for download in http://smdb.rcpcm.org/tools/index.html.",2018-11-21 +21375730,"MIR@NT@N: a framework integrating transcription factors, microRNAs and their targets to identify sub-network motifs in a meta-regulation network model.","

Background

To understand biological processes and diseases, it is crucial to unravel the concerted interplay of transcription factors (TFs), microRNAs (miRNAs) and their targets within regulatory networks and fundamental sub-networks. An integrative computational resource generating a comprehensive view of these regulatory molecular interactions at a genome-wide scale would be of great interest to biologists, but is not available to date.

Results

To identify and analyze molecular interaction networks, we developed MIR@NT@N, an integrative approach based on a meta-regulation network model and a large-scale database. MIR@NT@N uses a graph-based approach to predict novel molecular actors across multiple regulatory processes (i.e. TFs acting on protein-coding or miRNA genes, or miRNAs acting on messenger RNAs). Exploiting these predictions, the user can generate networks and further analyze them to identify sub-networks, including motifs such as feedback and feedforward loops (FBL and FFL). In addition, networks can be built from lists of molecular actors with an a priori role in a given biological process to predict novel and unanticipated interactions. Analyses can be contextualized and filtered by integrating additional information such as microarray expression data. All results, including generated graphs, can be visualized, saved and exported into various formats. MIR@NT@N performances have been evaluated using published data and then applied to the regulatory program underlying epithelium to mesenchyme transition (EMT), an evolutionary-conserved process which is implicated in embryonic development and disease.

Conclusions

MIR@NT@N is an effective computational approach to identify novel molecular regulations and to predict gene regulatory networks and sub-networks including conserved motifs within a given biological context. Taking advantage of the M@IA environment, MIR@NT@N is a user-friendly web resource freely available at http://mironton.uni.lu which will be updated on a regular basis.",2011-03-04 +23435068,Genetic Simulation Resources: a website for the registration and discovery of genetic data simulators.,"

Summary

Many simulation methods and programs have been developed to simulate genetic data of the human genome. These data have been widely used, for example, to predict properties of populations retrospectively or prospectively according to mathematically intractable genetic models, and to assist the validation, statistical inference and power analysis of a variety of statistical models. However, owing to the differences in type of genetic data of interest, simulation methods, evolutionary features, input and output formats, terminologies and assumptions for different applications, choosing the right tool for a particular study can be a resource-intensive process that usually involves searching, downloading and testing many different simulation programs. Genetic Simulation Resources (GSR) is a website provided by the National Cancer Institute (NCI) that aims to help researchers compare and choose the appropriate simulation tools for their studies. This website allows authors of simulation software to register their applications and describe them with well-defined attributes, thus allowing site users to search and compare simulators according to specified features.

Availability

http://popmodels.cancercontrol.cancer.gov/gsr.",2013-02-23 +30920607,Protein-quality evaluation of complementary foods in Indian children.,"

Background

The types of food in complementary feeding of infants and young children are important for growth and development. Food protein quality, as measured by the Digestible Indispensable Amino Acid Score (DIAAS), requires the determination of true ileal digestibility of indispensable amino acids (IAAs) in children.

Objectives

First, the aim of this study was to measure the true ileal IAA digestibility of 4 (rice, finger millet, mung bean, and hen egg) commonly consumed complementary foods in children aged <2 y using the dual-isotope tracer method. Second, we calculated the DIAAS of complementary feeding diets and their relation to stunting in a representative Indian rural population.

Design

Rice, finger millet, and mung bean were intrinsically labeled with deuterium oxide (2H2O), whereas egg was labeled through oral dosing of hens with a uniformly 2H-labeled amino acid mixture. True ileal IAA digestibility was determined by the dual-isotope tracer technique. The DIAAS of complementary food protein was calculated in children aged 1-3 y from a nationally representative survey to evaluate its relation with stunting.

Results

True ileal IAA digestibility was lowest in mung bean (65.2% ± 7.1%), followed by finger millet (68.4 %± 5.3%) and rice (78.5% ± 3.5%), and was highest for egg (87.4% ± 4.0%). There was a significant inverse correlation of complementary food DIAAS with stunting in survey data (r = -0.66, P = 0.044). The addition of egg or milk to nationally representative complementary diets theoretically improved the DIAAS from 80 to 100.

Conclusions

The true ileal IAA digestibility of 4 foods commonly consumed in complementary diets showed that the DIAAS was associated with stunting and reinforces the importance of including animal source food (ASF) in diets to improve growth. This trial was registered at http://ctri.nic.in/clinicaltrials/login.php as CTRI/2017/02/007921.",2019-05-01 +30715532,Response to Conservative Treatment for Thumb Carpometacarpal Osteoarthritis Is Associated With Conversion to Surgery: A Prospective Cohort Study.,"BACKGROUND:The current guidelines for treatment of carpometacarpal osteoarthritis recommend starting with conservative treatment before a surgical procedure is considered. OBJECTIVE:The objective was to investigate how response to conservative treatment, in terms of pain and hand function, influences the hazard that patients convert to surgical treatment. DESIGN:This was a multicenter, prospective cohort study. METHODS:Participants comprised 701 patients who received 3 months of hand therapy and an orthosis. Pain and function were measured with the Michigan Hand Questionnaire (MHQ) at baseline and at 6 weeks and 3 months follow-up. Conversion to surgical treatment was recorded from clinical records. Joint modeling (a statistical method of combining prediction models) was used to perform the analysis and to calculate hazard ratios (HRs). RESULTS:The joint analytical model showed that both MHQ pain score at a certain point (HR = 0.93; 95% confidence interval [CI] = 0.92-0.94) and change in MHQ pain score (HR = 1.07; 95% CI = 1.06-1.09) during conservative treatment was significantly associated with conversion to surgical treatment. The joint analytical model between functional outcome and conversion to surgical treatment showed only a significant association between MHQ function at a certain point (HR = 0.97; 95% CI = 0.95-0.99), and no significant association between the change in MHQ score for function (HR = 1.0; 95% CI = 1.0-1.0) and conversion to surgical treatment. LIMITATIONS:Missing data might have resulted in biased estimates. CONCLUSIONS:Self-reported pain and function, as well as change in self-reported pain during treatment, were associated with the hazard of conversion to surgical treatment, whereas change in self-reported functioning was not associated with conversion. Because a reduction in pain during conservative treatment appears to decrease the rate of conversion to surgical treatment, it is advised to structurally monitor pain levels during treatment. Listen to the author interview at https://academic.oup.com/ptj/pages/podcasts.",2019-05-01 +28983246,"Sleep: An Open-Source Python Software for Visualization, Analysis, and Staging of Sleep Data.","We introduce Sleep, a new Python open-source graphical user interface (GUI) dedicated to visualization, scoring and analyses of sleep data. Among its most prominent features are: (1) Dynamic display of polysomnographic data, spectrogram, hypnogram and topographic maps with several customizable parameters, (2) Implementation of several automatic detection of sleep features such as spindles, K-complexes, slow waves, and rapid eye movements (REM), (3) Implementation of practical signal processing tools such as re-referencing or filtering, and (4) Display of main descriptive statistics including publication-ready tables and figures. The software package supports loading and reading raw EEG data from standard file formats such as European Data Format, in addition to a range of commercial data formats. Most importantly, Sleep is built on top of the VisPy library, which provides GPU-based fast and high-level visualization. As a result, it is capable of efficiently handling and displaying large sleep datasets. Sleep is freely available (http://visbrain.org/sleep) and comes with sample datasets and an extensive documentation. Novel functionalities will continue to be added and open-science community efforts are expected to enhance the capacities of this module.",2017-09-21 +30624727,Pan-cancer transcriptomic analysis dissects immune and proliferative functions of APOBEC3 cytidine deaminases.,"APOBEC3 cytidine deaminases are largely known for their innate immune protection from viral infections. Recently, members of the family have been associated with a distinct mutational activity in some cancer types. We report a pan-tissue, pan-cancer analysis of RNA-seq data specific to the APOBEC3 genes in 8,951 tumours, 786 cancer cell lines and 6,119 normal tissues. By deconvolution of levels of different cell types in tumour admixtures, we demonstrate that APOBEC3B (A3B), the primary candidate as a cancer mutagen, shows little association with immune cell types compared to its paralogues. We present a pipeline called RESPECTEx (REconstituting SPecific Cell-Type Expression) and use it to deconvolute cell-type specific expression levels in a given cohort of tumour samples. We functionally annotate APOBEC3 co-expressing genes, and create an interactive visualization tool which 'barcodes' the functional enrichment (http://fraternalilab.kcl.ac.uk/apobec-barcodes/). These analyses reveal that A3B expression correlates with cell cycle and DNA repair genes, whereas the other APOBEC3 members display specificity for immune processes and immune cell populations. We offer molecular insights into the functions of individual APOBEC3 proteins in antiviral and proliferative contexts, and demonstrate the diversification this family of enzymes displays at the transcriptomic level, despite their high similarity in protein sequences and structures.",2019-02-01 +26519470,Deciphering the mechanisms of developmental disorders: phenotype analysis of embryos from mutant mouse lines.,"The Deciphering the Mechanisms of Developmental Disorders (DMDD) consortium is a research programme set up to identify genes in the mouse, which if mutated (or knocked-out) result in embryonic lethality when homozygous, and initiate the study of why disruption of their function has such profound effects on embryo development and survival. The project uses a combination of comprehensive high resolution 3D imaging and tissue histology to identify abnormalities in embryo and placental structures of embryonic lethal lines. The image data we have collected and the phenotypes scored are freely available through the project website (http://dmdd.org.uk). In this article we describe the web interface to the images that allows the embryo data to be viewed at full resolution in different planes, discuss how to search the database for a phenotype, and our approach to organising the data for an embryo and a mutant line so it is easy to comprehend and intuitive to navigate.",2015-10-30 +29900293,Transcriptome analyses of sex differential gene expression in brains of rare minnow (Gobiocypris rarus) and effects of tributyltin exposure.,"RNA-sequencing was used to identify sex-biased gene expression in brains of rare minnow (Gobiocypris rarus) by comparing transcriptomic profiles between females and males. Furthermore, transcriptomic responses to 10 ng/L tributyltin (TBT) in both male and female brains were also investigated to understand whether TBT affects the identified sex-biased genes. Differentially expressed genes (DEGs) were identified using the IDEG6 web tool. In this article, we presented male- and female-biased DEGs, and up-regulated and down-regulated DEGs after TBT exposure. The raw reads data supporting the present analyses has been deposited in NCBI Sequence Read Archive (SRA, http://www.ncbi.nlm.nih.gov/Traces/sra) with accession number PRJNA376634. The data presented in this article are related to the research article entitled ""Transcriptomic analyses of sexual dimorphism of rare minnow (G. rarus) brains and effects of tributyltin exposure"" (doi: 10.1016/j.ecoenv.2018.02.049).",2018-03-29 +28617026,"""The next Big Five Inventory (BFI-2): Developing and assessing a hierarchical model with 15 facets to enhance bandwidth, fidelity, and predictive power"": Correction to Soto and John (2016).","Reports an error in ""The Next Big Five Inventory (BFI-2): Developing and Assessing a Hierarchical Model With 15 Facets to Enhance Bandwidth, Fidelity, and Predictive Power"" by Christopher J. Soto and Oliver P. John (Journal of Personality and Social Psychology, Advanced Online Publication, Apr 7, 2016, np). In the article, all citations to McCrae and Costa (2008), except for the instance in which it appears in the first paragraph of the introduction, should instead appear as McCrae and Costa (2010). The complete citation should read as follows: McCrae, R. R., & Costa, P. T. (2010). NEO Inventories professional manual. Lutz, FL: Psychological Assessment Resources. The attribution to the BFI-2 items that appears in the Table 6 note should read as follows: BFI-2 items adapted from ""Conceptualization, Development, and Initial Validation of the Big Five Inventory-2,"" by C. J. Soto and O. P. John, 2015, Paper presented at the biennial meeting of the Association for Research in Personality. Copyright 2015 by Oliver P. John and Christopher J. Soto. The complete citation in the References list should appear as follows: Soto, C. J., & John, O. P. (2015, June). Conceptualization, development, and initial validation of the Big Five Inventory-2. Paper presented at the biennial meeting of the Association for Research in Personality, St. Louis, MO. Available from http://www.colby.edu/psych/personality-lab/ All versions of this article have been corrected. (The following abstract of the original article appeared in record 2016-17156-001.) Three studies were conducted to develop and validate the Big Five Inventory-2 (BFI-2), a major revision of the Big Five Inventory (BFI). Study 1 specified a hierarchical model of personality structure with 15 facet traits nested within the Big Five domains, and developed a preliminary item pool to measure this structure. Study 2 used conceptual and empirical criteria to construct the BFI-2 domain and facet scales from the preliminary item pool. Study 3 used data from 2 validation samples to evaluate the BFI-2's measurement properties and substantive relations with self-reported and peer-reported criteria. The results of these studies indicate that the BFI-2 is a reliable and valid personality measure, and an important advance over the original BFI. Specifically, the BFI-2 introduces a robust hierarchical structure, controls for individual differences in acquiescent responding, and provides greater bandwidth, fidelity, and predictive power than the original BFI, while still retaining the original measure's conceptual focus, brevity, and ease of understanding. The BFI-2 therefore offers valuable new opportunities for research examining the structure, assessment, development, and life outcomes of personality traits. (PsycINFO Database Record (c) 2017 APA, all rights reserved).",2017-07-01 +24214991,EKPD: a hierarchical database of eukaryotic protein kinases and protein phosphatases.,"We present here EKPD (http://ekpd.biocuckoo.org), a hierarchical database of eukaryotic protein kinases (PKs) and protein phosphatases (PPs), the key molecules responsible for the reversible phosphorylation of proteins that are involved in almost all aspects of biological processes. As extensive experimental and computational efforts have been carried out to identify PKs and PPs, an integrative resource with detailed classification and annotation information would be of great value for both experimentalists and computational biologists. In this work, we first collected 1855 PKs and 347 PPs from the scientific literature and various public databases. Based on previously established rationales, we classified all of the known PKs and PPs into a hierarchical structure with three levels, i.e. group, family and individual PK/PP. There are 10 groups with 149 families for the PKs and 10 groups with 33 families for the PPs. We constructed 139 and 27 Hidden Markov Model profiles for PK and PP families, respectively. Then we systematically characterized ∼50,000 PKs and >10,000 PPs in eukaryotes. In addition, >500 PKs and >400 PPs were computationally identified by ortholog search. Finally, the online service of the EKPD database was implemented in PHP + MySQL + JavaScript.",2013-11-08 +30052767,4mCPred: machine learning methods for DNA N4-methylcytosine sites prediction.,"

Motivation

N4-methylcytosine (4mC), an important epigenetic modification formed by the action of specific methyltransferases, plays an essential role in DNA repair, expression and replication. The accurate identification of 4mC sites aids in-depth research to biological functions and mechanisms. Because, experimental identification of 4mC sites is time-consuming and costly, especially given the rapid accumulation of gene sequences. Supplementation with efficient computational methods is urgently needed.

Results

In this study, we developed a new tool, 4mCPred, for predicting 4mC sites in Caenorhabditis elegans, Drosophila melanogaster, Arabidopsis thaliana, Escherichia coli, Geoalkalibacter subterraneus and Geobacter pickeringii. 4mCPred consists of two independent models, 4mCPred_I and 4mCPred_II, for each species. The predictive results of independent and cross-species tests demonstrated that the performance of 4mCPred_I is a useful tool. To identify position-specific trinucleotide propensity (PSTNP) and electron-ion interaction potential features, we used the F-score method to construct predictive models and to compare their PSTNP features. Compared with other existing predictors, 4mCPred achieved much higher accuracies in rigorous jackknife and independent tests. We also analyzed the importance of different features in detail.

Availability and implementation

The web-server 4mCPred is accessible at http://server.malab.cn/4mCPred/index.jsp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +30949660,Can changing the position of online menu items increase selection of fruit and vegetable snacks? A cluster randomized trial within an online canteen ordering system in Australian primary schools.,"

Background

Manipulating the position of food items within the physical food environment has consistently been found to influence item selection. However, the extent to which this strategy is effective in an online food environment is unknown.

Objective

This study investigated whether an intervention to position fruit and vegetable snack items as the first and last menu items in an online school canteen ordering system increased the selection of those items. It was hypothesized that at follow-up, a higher proportion of online lunch orders in intervention schools would contain the target items (fruit and vegetable snacks) in comparison to control schools.

Design

Six primary schools in New South Wales, Australia, were recruited to a clustered randomized controlled trial conducted over an 8-wk period. Intervention schools received a redesigned menu where the target items were positioned first and last on the online menu. Control schools received no change to their online menu.

Results

During the baseline period 1938 students (1203 intervention, 735 control) placed at least one online lunch order and were included in the study, with 16,109 orders placed throughout the study. There was no significant difference between groups over time in the proportion of orders that contained a ""Fruit and Veggie Snack"" item (OR = 1.136 [95% CI: 0.791, 1.632] P = 0.490).

Conclusions

Evidence from this large trial with robust study design and objectively collected data suggests that positioning fruit and vegetable snack items first and last within an online canteen menu does not increase the selection of these items. Further research is warranted to confirm this finding with other target menu items (e.g., treats) and across other purchasing contexts and online food ordering platforms. This trial was registered at the Australian New Zealand Clinical Trials Registry, http://www.anzctr.org.au/ as ACTRN12616001520426.",2019-05-01 +29474530,MUGAN: Multi-GPU accelerated AmpliconNoise server for rapid microbial diversity assessment. ,"Metagenomic sequencing has become a crucial tool for obtaining a gene catalogue of operational taxonomic units (OTUs) in a microbial community. A typical metagenomic sequencing produces a large amount of data (often in the order of terabytes or more), and computational tools are indispensable for efficient processing. In particular, error correction in metagenomics is crucial for accurate and robust genetic cataloging of microbial communities. However, many existing error-correction tools take a prohibitively long time and often bottleneck the whole analysis pipeline. To overcome this computational hurdle, we analyzed and exploited the data-level parallelism that exists in the error-correction procedure and proposed a tool named MUGAN that exploits both multi-core central processing units (CPUs) and multiple graphics processing units (GPUs) for co-processing. According to the experimental results, our approach reduced not only the time demand for denoising amplicons from approximately 59 hours to only 46 minutes, but also the overestimation of the number of OTUs, estimating 6.7 times less species-level OTUs than the baseline. In addition, our approach provides web-based intuitive visualization of results. Given its efficiency and convenience, we anticipate that our approach would greatly facilitate denoising efforts in metagenomics studies. http://data.snu.ac.kr/pub/mugan. sryoon@snu.ac.kr. Supplementary data are available at Bioinformatics online.",2018-02-20 +27604408,ExonImpact: Prioritizing Pathogenic Alternative Splicing Events.,"Alternative splicing (AS) is a closely regulated process that allows a single gene to encode multiple protein isoforms, thereby contributing to the diversity of the proteome. Dysregulation of the splicing process has been found to be associated with many inherited diseases. However, among the pathogenic AS events, there are numerous ""passenger"" events whose inclusion or exclusion does not lead to significant changes with respect to protein function. In this study, we evaluate the secondary and tertiary structural features of proteins associated with disease-causing and neutral AS events, and show that several structural features are strongly associated with the pathological impact of exon inclusion. We further develop a machine-learning-based computational model, ExonImpact, for prioritizing and evaluating the functional consequences of hitherto uncharacterized AS events. We evaluated our model using several strategies including cross-validation, and data from the Gene-Tissue Expression (GTEx) and ClinVar databases. ExonImpact is freely available at http://watson.compbio.iupui.edu/ExonImpact.",2016-10-03 +30253747,Valection: design optimization for validation and verification studies.,"

Background

Platform-specific error profiles necessitate confirmatory studies where predictions made on data generated using one technology are additionally verified by processing the same samples on an orthogonal technology. However, verifying all predictions can be costly and redundant, and testing a subset of findings is often used to estimate the true error profile.

Results

To determine how to create subsets of predictions for validation that maximize accuracy of global error profile inference, we developed Valection, a software program that implements multiple strategies for the selection of verification candidates. We evaluated these selection strategies on one simulated and two experimental datasets.

Conclusions

Valection is implemented in multiple programming languages, available at: http://labs.oicr.on.ca/boutros-lab/software/valection.",2018-09-25 +26385205,SORTA: a system for ontology-based re-coding and technical annotation of biomedical phenotype data. ,"There is an urgent need to standardize the semantics of biomedical data values, such as phenotypes, to enable comparative and integrative analyses. However, it is unlikely that all studies will use the same data collection protocols. As a result, retrospective standardization is often required, which involves matching of original (unstructured or locally coded) data to widely used coding or ontology systems such as SNOMED CT (clinical terms), ICD-10 (International Classification of Disease) and HPO (Human Phenotype Ontology). This data curation process is usually a time-consuming process performed by a human expert. To help mechanize this process, we have developed SORTA, a computer-aided system for rapidly encoding free text or locally coded values to a formal coding system or ontology. SORTA matches original data values (uploaded in semicolon delimited format) to a target coding system (uploaded in Excel spreadsheet, OWL ontology web language or OBO open biomedical ontologies format). It then semi- automatically shortlists candidate codes for each data value using Lucene and n-gram based matching algorithms, and can also learn from matches chosen by human experts. We evaluated SORTA's applicability in two use cases. For the LifeLines biobank, we used SORTA to recode 90 000 free text values (including 5211 unique values) about physical exercise to MET (Metabolic Equivalent of Task) codes. For the CINEAS clinical symptom coding system, we used SORTA to map to HPO, enriching HPO when necessary (315 terms matched so far). Out of the shortlists at rank 1, we found a precision/recall of 0.97/0.98 in LifeLines and of 0.58/0.45 in CINEAS. More importantly, users found the tool both a major time saver and a quality improvement because SORTA reduced the chances of human mistakes. Thus, SORTA can dramatically ease data (re)coding tasks and we believe it will prove useful for many more projects. Database URL: http://molgenis.org/sorta or as an open source download from http://www.molgenis.org/wiki/SORTA.",2015-09-18 +28053160,The 24th annual Nucleic Acids Research database issue: a look back and upcoming changes.,"This year's Database Issue of Nucleic Acids Research contains 152 papers that include descriptions of 54 new databases and update papers on 98 databases, of which 16 have not been previously featured in NAR As always, these databases cover a broad range of molecular biology subjects, including genome structure, gene expression and its regulation, proteins, protein domains, and protein-protein interactions. Following the recent trend, an increasing number of new and established databases deal with the issues of human health, from cancer-causing mutations to drugs and drug targets. In accordance with this trend, three recently compiled databases that have been selected by NAR reviewers and editors as 'breakthrough' contributions, denovo-db, the Monarch Initiative, and Open Targets, cover human de novo gene variants, disease-related phenotypes in model organisms, and a bioinformatics platform for therapeutic target identification and validation, respectively. We expect these databases to attract the attention of numerous researchers working in various areas of genetics and genomics. Looking back at the past 12 years, we present here the 'golden set' of databases that have consistently served as authoritative, comprehensive, and convenient data resources widely used by the entire community and offer some lessons on what makes a successful database. The Database Issue is freely available online at the https://academic.oup.com/nar web site. An updated version of the NAR Molecular Biology Database Collection is available at http://www.oxfordjournals.org/nar/database/a/.",2017-01-01 +28365738,ABCMdb reloaded: updates on mutations in ATP binding cassette proteins. ,"ABC (ATP-Binding Cassette) proteins with altered function are responsible for numerous human diseases. To aid the selection of positions and amino acids for ABC structure/function studies we have generated a database, ABCMdb (Gyimesi et al. , ABCMdb: a database for the comparative analysis of protein mutations in ABC transporters, and a potential framework for a general application. Hum Mutat 2012; 33:1547-1556.), with interactive tools. The database has been populated with mentions of mutations extracted from full text papers, alignments and structural models. In the new version of the database we aimed to collect the effect of mutations from databases including ClinVar. Because of the low number of available data, even in the case of the widely studied disease-causing ABC proteins, we also included the possible effects of mutations based on SNAP2 and PROVEAN predictions. To aid the interpretation of variations in non-coding regions, the database was supplemented with related DNA level information. Our results emphasize the importance of in silico predictions because of the sparse information available on variants and suggest that mutations at analogous positions in homologous ABC proteins have a strong predictive power for the effects of mutations. Our improved ABCMdb advances the design of both experimental studies and meta-analyses in order to understand drug interactions of ABC proteins and the effects of mutations on functional expression. http://abcm2.hegelab.org.",2017-01-01 +25053252,The Maize TFome--development of a transcription factor open reading frame collection for functional genomics.,"Establishing the architecture of the gene regulatory networks (GRNs) responsible for controlling the transcription of all genes in an organism is a natural development that follows elucidation of the genome sequence. Reconstruction of the GRN requires the availability of a series of molecular tools and resources that so far have been limited to a few model organisms. One such resource consists of collections of transcription factor (TF) open reading frames (ORFs) cloned into vectors that facilitate easy expression in plants or microorganisms. In this study, we describe the development of a publicly available maize TF ORF collection (TFome) of 2034 clones corresponding to 2017 unique gene models in recombination-ready vectors that make possible the facile mobilization of the TF sequences into a number of different expression vectors. The collection also includes several hundred co-regulators (CoREGs), which we classified into well-defined families, and for which we propose here a standard nomenclature, as we have previously done for TFs. We describe the strategies employed to overcome the limitations associated with cloning ORFs from a genome that remains incompletely annotated, with a partial full-length cDNA set available, and with many TF/CoREG genes lacking experimental support. In many instances this required the combination of genome-wide expression data with gene synthesis approaches. The strategies developed will be valuable for developing similar resources for other agriculturally important plants. Information on all the clones generated is available through the GRASSIUS knowledgebase (http://grassius.org/).",2014-08-26 +31513597,"Mother's dietary quality during pregnancy and offspring's dietary quality in adolescence: Follow-up from a national birth cohort study of 19,582 mother-offspring pairs.","

Background

The Developmental Origins of Health and Disease (DOHaD) hypothesis postulates that exposures during early life, such as maternal dietary intake during pregnancy, may have a lifelong impact on the individual's susceptibility to diseases. The individual's own lifestyle habits are obviously an additional factor, but we have only limited knowledge regarding how it may interact with prenatal exposures in determining later disease. To gain further insight into these potentially complex relationships, we examined the longitudinal association between maternal diet quality during pregnancy and diet quality in early adolescence in a contemporary cohort.

Methods and findings

From 1996 to 2003, the Danish National Birth Cohort (DNBC) was established. Women from across the country were enrolled, and dietary intake in midpregnancy was assessed concurrently with a 360-item food frequency questionnaire (FFQ) (https://www.dnbc.dk/-/media/arkiv/projekt-sites/dnbc/kodeboeger/dnbc-food-frequency-questionnaire/dnbc-food-frequency-questionnaire-pdf.pdf?la=en). During 2013-2018, dietary intake was assessed at age 14 years with a 150-item FFQ (https://www.dnbc.dk/-/media/arkiv/projekt-sites/dnbc/kodeboeger/ffq-14/dnbc-ffq-14-english-translation.pdf?la=en) in the DNBC children. Among the 19,582 mother-offspring pairs included in the analyses, the mean age (±standard deviation [SD]) was 30.7 (±4.1) years and 14.0 (±0.0) years for mothers and offspring, respectively. The majority of both mothers (67%) and offspring (76%) were classified as normal weight. For both questionnaires, a Healthy Eating Index (HEI) was developed as an indicator for diet quality based on current Danish Food-Based Dietary Guidelines (FBDG) including eight components: fruits and vegetables, fish, dietary fibres, red meat, saturated fatty acids (SFAs), sodium, sugar-sweetened beverages (SSBs), and added sugar. The HEI score was divided into quartiles; individuals in the highest quartile represented those with the most optimal diet. The maternal HEI score was correlated positively with offspring HEI score (Pearson r = 0.22, p < 0.001). A log-linear binomial model was used to estimate the relative risk of the offspring being in the highest quartile of HEI at age 14 years if the mother was ranked in quartile 4 during pregnancy. Results showed that offspring born to mothers who were in the highest HEI quartile during pregnancy were more likely themselves to be located in the highest HEI quartile at age 14 years (risk ratio [RR]: 2.1, 95% confidence interval [CI]: 2.0, 2.3, p < 0.001). Adjusting for maternal prepregnancy body mass index (BMI), parity, education, alcohol intake, physical activity, smoking, and breastfeeding, as well as offspring total energy intake and sex, did not influence the effect estimates. The limitations of our study include that some attrition bias towards more healthy participants was observed when comparing participants with nonparticipants. Bias in the FFQ method may also have resulted in underrepresentation of adolescents with poorer diet quality.

Conclusions

In this study using data from a large national birth cohort, we observed that maternal diet quality during pregnancy was associated with diet quality of the offspring at age 14 years. These findings indicate the importance of separating early dietary exposures from later dietary exposures when studying dietary aetiologies of diseases postulated to have developmental origins such as, for instance, obesity or asthma in observational settings.",2019-09-12 +26450962,"MEPD: medaka expression pattern database, genes and more.","The Medaka Expression Pattern Database (MEPD; http://mepd.cos.uni-heidelberg.de/) is designed as a repository of medaka expression data for the scientific community. In this update we present two main improvements. First, we have changed the previous clone-centric view for in situ data to a gene-centric view. This is possible because now we have linked all the data present in MEPD to the medaka gene annotation in ENSEMBL. In addition, we have also connected the medaka genes in MEPD to their corresponding orthologous gene in zebrafish, again using the ENSEMBL database. Based on this, we provide a link to the Zebrafish Model Organism Database (ZFIN) to allow researches to compare expression data between these two fish model organisms. As a second major improvement, we have modified the design of the database to enable it to host regulatory elements, promoters or enhancers, expression patterns in addition to gene expression. The combination of gene expression, by traditional in situ, and regulatory element expression, typically by fluorescence reporter gene, within the same platform assures consistency in terms of annotation. In our opinion, this will allow researchers to uncover new insights between the expression domain of genes and their regulatory landscape.",2015-10-07 +29757353,Development and evaluation of a deep learning model for protein-ligand binding affinity prediction.,"Motivation:Structure based ligand discovery is one of the most successful approaches for augmenting the drug discovery process. Currently, there is a notable shift towards machine learning (ML) methodologies to aid such procedures. Deep learning has recently gained considerable attention as it allows the model to 'learn' to extract features that are relevant for the task at hand. Results:We have developed a novel deep neural network estimating the binding affinity of ligand-receptor complexes. The complex is represented with a 3D grid, and the model utilizes a 3D convolution to produce a feature map of this representation, treating the atoms of both proteins and ligands in the same manner. Our network was tested on the CASF-2013 'scoring power' benchmark and Astex Diverse Set and outperformed classical scoring functions. Availability and implementation:The model, together with usage instructions and examples, is available as a git repository at http://gitlab.com/cheminfIBB/pafnucy. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-11-01 +21890895,OPM database and PPM web server: resources for positioning of proteins in membranes.,"The Orientations of Proteins in Membranes (OPM) database is a curated web resource that provides spatial positions of membrane-bound peptides and proteins of known three-dimensional structure in the lipid bilayer, together with their structural classification, topology and intracellular localization. OPM currently contains more than 1200 transmembrane and peripheral proteins and peptides from approximately 350 organisms that represent approximately 3800 Protein Data Bank entries. Proteins are classified into classes, superfamilies and families and assigned to 21 distinct membrane types. Spatial positions of proteins with respect to the lipid bilayer are optimized by the PPM 2.0 method that accounts for the hydrophobic, hydrogen bonding and electrostatic interactions of the proteins with the anisotropic water-lipid environment described by the dielectric constant and hydrogen-bonding profiles. The OPM database is freely accessible at http://opm.phar.umich.edu. Data can be sorted, searched or retrieved using the hierarchical classification, source organism, localization in different types of membranes. The database offers downloadable coordinates of proteins and peptides with membrane boundaries. A gallery of protein images and several visualization tools are provided. The database is supplemented by the PPM server (http://opm.phar.umich.edu/server.php) which can be used for calculating spatial positions in membranes of newly determined proteins structures or theoretical models.",2011-09-02 +30033227,Recommendations from the international evidence-based guideline for the assessment and management of polycystic ovary syndrome.,"

Study question

What is the recommended assessment and management of women with polycystic ovary syndrome (PCOS), based on the best available evidence, clinical expertise, and consumer preference?

Summary answer

International evidence-based guidelines including 166 recommendations and practice points, addressed prioritized questions to promote consistent, evidence-based care and improve the experience and health outcomes of women with PCOS.

What is known already

Previous guidelines either lacked rigorous evidence-based processes, did not engage consumer and international multidisciplinary perspectives, or were outdated. Diagnosis of PCOS remains controversial and assessment and management are inconsistent. The needs of women with PCOS are not being adequately met and evidence practice gaps persist.

Study design, size, duration

International evidence-based guideline development engaged professional societies and consumer organizations with multidisciplinary experts and women with PCOS directly involved at all stages. Appraisal of Guidelines for Research and Evaluation (AGREE) II-compliant processes were followed, with extensive evidence synthesis. The Grading of Recommendations, Assessment, Development, and Evaluation (GRADE) framework was applied across evidence quality, feasibility, acceptability, cost, implementation and ultimately recommendation strength.

Participants/materials, setting, methods

Governance included a six continent international advisory and a project board, five guideline development groups, and consumer and translation committees. Extensive health professional and consumer engagement informed guideline scope and priorities. Engaged international society-nominated panels included pediatrics, endocrinology, gynecology, primary care, reproductive endocrinology, obstetrics, psychiatry, psychology, dietetics, exercise physiology, public health and other experts, alongside consumers, project management, evidence synthesis, and translation experts. Thirty-seven societies and organizations covering 71 countries engaged in the process. Twenty face-to-face meetings over 15 months addressed 60 prioritized clinical questions involving 40 systematic and 20 narrative reviews. Evidence-based recommendations were developed and approved via consensus voting within the five guideline panels, modified based on international feedback and peer review, with final recommendations approved across all panels.

Main results and the role of chance

The evidence in the assessment and management of PCOS is generally of low to moderate quality. The guideline provides 31 evidence based recommendations, 59 clinical consensus recommendations and 76 clinical practice points all related to assessment and management of PCOS. Key changes in this guideline include: i) considerable refinement of individual diagnostic criteria with a focus on improving accuracy of diagnosis; ii) reducing unnecessary testing; iii) increasing focus on education, lifestyle modification, emotional wellbeing and quality of life; and iv) emphasizing evidence based medical therapy and cheaper and safer fertility management.

Limitations, reasons for caution

Overall evidence is generally low to moderate quality, requiring significantly greater research in this neglected, yet common condition, especially around refining specific diagnostic features in PCOS. Regional health system variation is acknowledged and a process for guideline and translation resource adaptation is provided.

Wider implications of the findings

The international guideline for the assessment and management of PCOS provides clinicians with clear advice on best practice based on the best available evidence, expert multidisciplinary input and consumer preferences. Research recommendations have been generated and a comprehensive multifaceted dissemination and translation program supports the guideline with an integrated evaluation program.

Study funding/competing interest(s)

The guideline was primarily funded by the Australian National Health and Medical Research Council of Australia (NHMRC) supported by a partnership with ESHRE and the American Society for Reproductive Medicine. Guideline development group members did not receive payment. Travel expenses were covered by the sponsoring organizations. Disclosures of conflicts of interest were declared at the outset and updated throughout the guideline process, aligned with NHMRC guideline processes. Full details of conflicts declared across the guideline development groups are available at https://www.monash.edu/medicine/sphpm/mchri/pcos/guideline in the Register of disclosures of interest. Of named authors, Dr Costello has declared shares in Virtus Health and past sponsorship from Merck Serono for conference presentations. Prof. Laven declared grants from Ferring, Euroscreen and personal fees from Ferring, Euroscreen, Danone and Titus Healthcare. Prof. Norman has declared a minor shareholder interest in an IVF unit. The remaining authors have no conflicts of interest to declare. The guideline was peer reviewed by special interest groups across our partner and collaborating societies and consumer organizations, was independently assessed against AGREEII criteria and underwent methodological review. This guideline was approved by all members of the guideline development groups and was submitted for final approval by the NHMRC.",2018-07-19 +26077692,Differential activation of immune/inflammatory response-related co-expression modules in the hippocampus across the major psychiatric disorders.,"The Stanley Neuropathology Consortium Integrative Database (SNCID, http://sncid.stanleyresearch.org) is a data-mining tool that includes 379 neuropathology data sets from hippocampus, as well as RNA-Seq data measured in 15 well-matched cases in each of four groups: schizophrenia, bipolar disorder (BPD), major depression (MD) and unaffected controls. We analyzed the neuropathology data from the hippocampus to identify those abnormalities that are shared between psychiatric disorders and those that are specific to each disorder. Of the 379 data sets, 20 of them showed a significant abnormality in at least one disorder as compared with unaffected controls. GABAergic markers and synaptic proteins were mainly abnormal in schizophrenia and the two mood disorders, respectively. Two immune/inflammation-related co-expression modules built from RNA-seq data from both schizophrenia and controls combined were associated with disease status, as well as negatively correlated with the GABAergic markers. The correlation between immune-related modules and schizophrenia was replicated using microarray data from an independent tissue collection. Immune/inflammation-related co-expression modules were also built from RNA-seq data from BPD cases or from MD cases but were not preserved when using data from control cases. Moreover, there was no overlap in the genes that comprise the immune/inflammation response-related modules across the different disorders. Thus, there appears to be differential activation of the immune/inflammatory response, as determined by co-expression of genes, which is associated with the major psychiatric disorders and which is also associated with the abnormal neuropathology in the disorders.",2015-06-16 +26553804,"Reference sequence (RefSeq) database at NCBI: current status, taxonomic expansion, and functional annotation.","The RefSeq project at the National Center for Biotechnology Information (NCBI) maintains and curates a publicly available database of annotated genomic, transcript, and protein sequence records (http://www.ncbi.nlm.nih.gov/refseq/). The RefSeq project leverages the data submitted to the International Nucleotide Sequence Database Collaboration (INSDC) against a combination of computation, manual curation, and collaboration to produce a standard set of stable, non-redundant reference sequences. The RefSeq project augments these reference sequences with current knowledge including publications, functional features and informative nomenclature. The database currently represents sequences from more than 55,000 organisms (>4800 viruses, >40,000 prokaryotes and >10,000 eukaryotes; RefSeq release 71), ranging from a single record to complete genomes. This paper summarizes the current status of the viral, prokaryotic, and eukaryotic branches of the RefSeq project, reports on improvements to data access and details efforts to further expand the taxonomic representation of the collection. We also highlight diverse functional curation initiatives that support multiple uses of RefSeq data including taxonomic validation, genome annotation, comparative genomics, and clinical testing. We summarize our approach to utilizing available RNA-Seq and other data types in our manual curation process for vertebrate, plant, and other species, and describe a new direction for prokaryotic genomes and protein name management.",2015-11-08 +24465676,mUbiSiDa: a comprehensive database for protein ubiquitination sites in mammals.,"

Motivation

Protein ubiquitination is one of the important post-translational modifications by attaching ubiquitin to specific lysine (K) residues in target proteins, and plays important regulatory roles in many cell processes. Recent studies indicated that abnormal protein ubiquitination have been implicated in many diseases by degradation of many key regulatory proteins including tumor suppressor, oncoprotein, and cell cycle regulator. The detailed information of protein ubiquitination sites is useful for scientists to investigate the mechanism of many cell activities and related diseases.

Results

In this study we established mUbiSida for mammalian Ubiquitination Site Database, which provides a scientific community with a comprehensive, freely and high-quality accessible resource of mammalian protein ubiquitination sites. In mUbiSida, we deposited about 35,494 experimentally validated ubiquitinated proteins with 110,976 ubiquitination sites from five species. The mUbiSiDa can also provide blast function to predict novel protein ubiquitination sites in other species by blast the query sequence in the deposit sequences in mUbiSiDa. The mUbiSiDa was designed to be a widely used tool for biologists and biomedical researchers with a user-friendly interface, and facilitate the further research of protein ubiquitination, biological networks and functional proteomics. The mUbiSiDa database is freely available at http://reprod.njmu.edu.cn/mUbiSiDa.",2014-01-17 +30513992,Reoptimized UNRES Potential for Protein Model Quality Assessment. ,"Ranking protein structure models is an elusive problem in bioinformatics. These models are evaluated on both the degree of similarity to the native structure and the folding pathway. Here, we simulated the use of the coarse-grained UNited RESidue (UNRES) force field as a tool to choose the best protein structure models for a given protein sequence among a pool of candidate models, using server data from the CASP11 experiment. Because the original UNRES was optimized for Molecular Dynamics simulations, we reoptimized UNRES using a deep feed-forward neural network, and we show that introducing additional descriptive features can produce better results. Overall, we found that the reoptimized UNRES performs better in selecting the best structures and tracking protein unwinding from its native state. We also found a relatively poor correlation between UNRES values and the model's Template Modeling Score (TMS). This is remedied by reoptimization. We discuss some cases where our reoptimization procedure is useful. The reoptimized version of UNRES (OUNRES) is available at http://mamiris.com and http://www.unres.pl.",2018-12-03 +29241411,Prediction of zinc binding sites in proteins using sequence derived information.,"Zinc is one the most abundant catalytic cofactor and also an important structural component of a large number of metallo-proteins. Hence prediction of zinc metal binding sites in proteins can be a significant step in annotation of molecular function of a large number of proteins. Majority of existing methods for zinc-binding site predictions are based on a data-set of proteins, which has been compiled nearly a decade ago. Hence there is a need to develop zinc-binding site prediction system using the current updated data to include recently added proteins. Herein, we propose a support vector machine-based method, named as ZincBinder, for prediction of zinc metal-binding site in a protein using sequence profile information. The predictor was trained using fivefold cross validation approach and achieved 85.37% sensitivity with 86.20% specificity during training. Benchmarking on an independent non-redundant data-set, which was not used during training, showed better performance of ZincBinder vis-à-vis existing methods. Executable versions, source code, sample datasets, and usage instructions are available at http://proteininformatics.org/mkumar/znbinder/.",2018-01-15 +26487054,Communicative interactions in point-light displays: Choosing among multiple response alternatives.,"Vision scientists are increasingly relying on the point-light technique as a way to investigate the perception of human motion. Unfortunately, the lack of standardized stimulus sets has so far limited the use of this technique for studying social interaction. Here, we describe a new tool to study the interaction between two agents starting from point-light displays: the Communicative Interaction Database - 5AFC format (CID-5). The CID-5 consists of 14 communicative and seven non-communicative individual actions performed by two agents. Stimuli were constructed by combining motion capture techniques and 3-D animation software to provide precise control over the computer-generated actions. For each action stimulus, we provide coordinate files and movie files depicting the action as seen from four different perspectives. Furthermore, the archive contains a text file with a list of five alternative action descriptions to construct forced-choice paradigms. In order to validate the CID-5 format, we provide normative data collected to assess action identification within a 5AFC tasks. The CID-5 archive is freely downloadable from http://bsb-lab.org/research/ and from the supplementary materials of this article.",2016-12-01 +31749503,"Informal Networks of Low-Income Mothers: Support, Burden, and Change.","

Objective

The authors examined the support and burden of low-income, urban mothers' informal networks.

Background

Living or growing up in poverty strongly predicts barriers and instability across several life domains for mothers and their children. Informal networks can play a critical role in promoting maternal and child well-being particularly in the midst of poverty. Understanding informal support and the reciprocal burden it may create is especially relevant for low-income families living with a reduced public safety net in the post-welfare reform era. Therefore, study aims were to measure support and burden among low-income mothers and determine if support and burden change over time.

Method

Data were from the Welfare, Children, Families (WCF) project, a longitudinal study of 2,400 low-income, caregivers of children and adolescents living in Boston, Chicago, or San Antonio (http://web.jhu.edu/threecitystudy/index.html)). We applied latent class analyses to support and burden indicators in four domains-emotional, favor, child care, and financial.

Results

Results supported four profiles of informal networks - healthy, unhealthy, burden only, and support only. Although most mothers had healthy informal networks, approximately one-third experienced no support or support imbalance which related to network changes at later time points. Demographic characteristics largely were not predictive of support profile or profile change.

Conclusion

Although many mothers had healthy support and burden, the most vulnerable did not have consistently healthy informal networks. The identification of a sizable minority of low-income mothers who cannot consistently rely on informal support is significant in light of diminished formal supports available to children and families.",2019-04-08 +30603844,Predicting Functional Modules of Liver Cancer Based on Differential Network Analysis.,"Complex diseases are generally caused by disorders of biological networks or/and mutations in multiple genes. The efficient and systematic identification of functional modules can not only supply effective diagnosis and treatment in clinic, but also benefit in further in-depth analysis of the pathological mechanism of complex diseases. In this study, we applied the method of differential network to identify functional modules between control and disease samples, which are different from most of the current approaches that focus on differential expression. In particular, we applied our approach to analyze transcriptome data of liver cancer in The Cancer Genome Atlas (TCGA, https://cancergenome.nih.gov/), and we obtained two modules associated with liver cancer. One is a functional gene module that contains a set of liver cancer-related genes, and another is an lncRNA (long non-coding RNA) module that includes liver cancer-related lncRNAs. The results of survival analysis and classification show that the functional modules cannot only be used as effective modular biomarkers to identifying liver cancer, but also predict the prognosis of liver cancer. The method can identify functional modules in genes and lncRNA from liver cancer, and these modules can be used to do prognosis detection and further study in mechanism of liver cancer.",2019-01-02 +31070712,Docosahexaenoic acid supplementation of preterm infants and parent-reported symptoms of allergic disease at 7 years corrected age: follow-up of a randomized controlled trial.,"

Background

Docosahexaenoic acid (DHA, 22:6n-3) supplementation in the prenatal period is associated with a reduction in the incidence of some symptoms of allergic disease. Infants born preterm are at increased risk of allergic disease, but it is unknown if DHA supplementation reduces the risk of childhood allergies.

Objectives

The aim of this study was to determine if supplementation of infants born at <33 wk gestation with high-DHA compared with standard-DHA enteral feeds decreases the incidence and severity of parent-reported allergic disease symptoms at a corrected age (CA) of 7 y.

Methods

This study was a follow-up of an Australian multicenter randomized controlled trial. Infants were given high-DHA (∼1% total fatty acids) or standard-DHA (∼0.3% total fatty acids) enteral feeds from 2-4 d of postnatal age until 40 wk postmenstrual age. Parent-reported incidence of respiratory allergic disease symptoms including wheeze and rhinitis at 7 y CA were the main outcomes. Other outcomes included the incidence of eczema symptoms; severity of any symptoms; and the incidence of wheeze, rhinitis, rhinoconjunctivitis, and eczema from birth to 7 y CA.

Results

Data were available for 569 of 657 (87%) children originally randomized. Symptoms of wheeze or rhinitis at 7 y CA did not differ between high- and standard-DHA groups [wheeze: RR: 1.10; 95% CI: 0.73, 1.65; P = 0.66; rhinitis: RR: 1.09; 95% CI: 0.81, 1.46; P = 0.59]. There was no difference in other allergic disease symptoms at 7 y CA or in the severity of symptoms. Parent-reported symptoms of wheeze, rhinitis, rhinoconjunctivitis, or eczema from birth to 7 y CA did not differ between the groups.

Conclusions

High-dose DHA supplementation of infants born at <33 wk gestation did not alter allergic disease symptoms or severity at 7 y CA, or from birth to 7 y CA compared with standard-dose DHA. This trial was registered with the Australian New Zealand Clinical Trials Registry as ANZCTR 12606000327583 (http://www.anzctr.org.au).",2019-06-01 +28968636,MAJIQ-SPEL: web-tool to interrogate classical and complex splicing variations from RNA-Seq data.,"

Summary

Analysis of RNA sequencing (RNA-Seq) data have highlighted the fact that most genes undergo alternative splicing (AS) and that these patterns are tightly regulated. Many of these events are complex, resulting in numerous possible isoforms that quickly become difficult to visualize, interpret and experimentally validate. To address these challenges we developed MAJIQ-SPEL, a web-tool that takes as input local splicing variations (LSVs) quantified from RNA-Seq data and provides users with visualization and quantification of gene isoforms associated with those. Importantly, MAJIQ-SPEL is able to handle both classical (binary) and complex, non-binary, splicing variations. Using a matching primer design algorithm it also suggests to users possible primers for experimental validation by RT-PCR and displays those, along with the matching protein domains affected by the LSV, on UCSC Genome Browser for further downstream analysis.

Availability and implementation

Program and code will be available at http://majiq.biociphers.org/majiq-spel.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +29978613,Exploring the Key Genes and Pathways of Osteoarthritis in Knee Cartilage in a Rat Model Using Gene Expression Profiling.,"

Purpose

To compare differentially expressed genes (DEGs) mediating osteoarthritis (OA) in knee cartilage and in normal knee cartilage in a rat model of OA and to identify their impact on molecular pathways associated with OA.

Materials and methods

A gene expression profile was downloaded from the Gene Expression Omnibus database. Analysis of DEGs was carried out using GEO2R. Enrichment analyses were performed on the Gene Ontology (GO) and Kyoto Encyclopedia of Genes and Genomes pathway using the Search Tool for the Retrieval of Interacting Genes database (http://www.string-db.org/). Subsequently, the regulatory interaction network of OA-associated genes was visualized using Cytoscape software (version 3.4.0; www.cytoscape.org).

Results

In the gene expression profile GSE103416, a total of 99 DEGs were identified. Among them, 76 DEGs (76.77%) were overexpressed, and the remaining 23 DEGs (23.23%) were underexpressed. GO and pathway enrichment analyses of target genes were performed. Using gene-gene interaction network analysis, relevant core genes, including MET, UBB, GNAI3, and GNA13, were shown to hold a potential relationship with the development of OA in cartilage. Using quantitative real-time PCR, the Gna13/cGMP-PKG signaling pathway was identified as a potential research target for therapy and for further understanding the development of OA.

Conclusion

The results of the present study provide a comprehensive understanding of the roles of DEGs in knee cartilage in relation to the development of OA.",2018-08-01 +29912270,Development of a Coding and Crosswalk Tool for Occupations and Industries.,"

Introduction

Job coding into a standard occupation or industry classification is commonly performed in occupational epidemiology and occupational health. Sometimes, it is necessary to code jobs into multiple classifications or to convert job codes from one classification to another. We developed a generic tool, called CAPS-Canada (http://www.caps-canada.ca/), that combines a computer-assisted coding tool covering seven International, Canadian and US occupation and industry classifications and an assistant facilitating crosswalks from one classification to another. The objectives of this paper are to present the different functions of the CAPS-Canada tool and to assess their contribution through an inter-rater reliability study.

Method

The crosswalk assistant was built based on a database of >30,000 jobs coded during a previous project. We evaluated to what extent it would allow automatic translation between pairs of classifications. The influence of CAPS-Canada on agreement between coders was assessed through an inter-rater reliability study comparing three approaches: manual coding, coding with CAPS-Canada without the crosswalk assistant, and coding with the complete tool. The material for this trial consisted of a random sample of 1000 jobs extracted from a case-control study and divided into three subgroups of equivalent size.

Results

Across the classification systems, the crosswalk assistant would provide useful information for 83-99% of jobs (median 95%) in a population similar to ours. Eighteen to eighty-one percent of jobs (median 56%) could be entirely automatically recoded. Based on our sample of 1000 jobs, inter-rater reliability in occupation coding ranged from 35.7 to 66.5% (median 53.7%) depending on the combination of classification/resolution. Compared with manual coding, the use of CAPS-Canada substantially improved inter-rater reliability.

Conclusion

CAPS-Canada is an attractive alternative to manual coding and is particularly relevant for coding a job into multiple classifications or for recoding jobs into other classifications.",2018-08-01 +30202904,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients With Thoracolumbar Spine Trauma: Classification of Injury.,"

Question 1

Are there classification systems for fractures of the thoracolumbar spine that have been shown to be internally valid and reliable (ie, do these instruments provide consistent information between different care providers)?

Recommendation 1

A classification scheme that uses readily available clinical data (eg, computed tomography scans with or without magnetic resonance imaging) to convey injury morphology, such as Thoracolumbar Injury Classification and Severity Scale or the AO Spine Thoracolumbar Spine Injury Classification System, should be used to improve characterization of traumatic thoracolumbar injuries and communication among treating physicians. Strength of Recommendation: Grade B.

Question 2

In treating patients with thoracolumbar fractures, does employing a formally tested classification system for treatment decision-making affect clinical outcomes?

Recommendation 2

There is insufficient evidence to recommend a universal classification system or severity score that will readily guide treatment of all injury types and thereby affect outcomes. Strength of Recommendation: Grade Insufficient The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_2.",2019-01-01 +30357353,piRTarBase: a database of piRNA targeting sites and their roles in gene regulation.,"PIWI-interacting RNAs (piRNAs) are a class of small noncoding RNAs that guard animal genomes against mutation by silencing transposons. In addition, recent studies have reported that piRNAs silence various endogenous genes. Tens of thousands of distinct piRNAs made in animals do not pair well to transposons and currently the functions and targets of piRNAs are largely unexplored. piRTarBase provides a user-friendly interface to access both predicted and experimentally identified piRNA targeting sites in Caenorhabditis elegans. The user can input genes of interest and retrieve a list of piRNA targeting sites on the input genes. Alternatively, the user can input a piRNA and retrieve a list of its mRNA targets. Additionally, piRTarBase integrates published mRNA and small RNA sequencing data, which will help users identify biologically relevant targeting events. Importantly, our analyses suggest that the piRNA sites found by both predictive and experimental approaches are more likely to exhibit silencing effects on their targets than each method alone. Taken together, piRTarBase offers an integrative platform that will help users to identify functional piRNA target sites by evaluating various information. piRTarBase is freely available for academic use at http://cosbi6.ee.ncku.edu.tw/piRTarBase/.",2019-01-01 +26884809,Implementation of the Rank-Weighted Co-localization (RWC) algorithm in multiple image analysis platforms for quantitative analysis of microscopy images.,"

Background

Quantitative co-localization studies strengthen the analysis of fluorescence microscopy-based assays and are essential for illustrating and understanding many cellular processes and interactions. In our earlier study, we presented a rank-based intensity weighting scheme for the quantification of co-localization between structures in fluorescence microscopy images. This method, which uses a combined pixel co-occurrence and intensity correlation approach, is superior to conventional algorithms and provides a more accurate quantification of co-localization.

Findings

In this brief report we provide the source code and implementation of the rank-weighted co-localization (RWC) algorithm in three (two open source and one proprietary) image analysis platforms. The RWC algorithm has been implemented as a plugin for ImageJ, a module for CellProfiler and an Acapella script for Columbus image analysis software tools.

Conclusions

We have provided with a web resource from which users can download plugins and modules implementing the RWC algorithm in various commonly used image analysis platforms. The implementations have been designed for easy incorporation into existing tools in a 'ready-for-use' format. The resources can be accessed through the following web link: http://simpsonlab.pbworks.com/w/page/48541482/Bioinformatic_Tools.",2016-02-16 +26708986,LymPHOS 2.0: an update of a phosphosite database of primary human T cells. ,"LymPHOS is a web-oriented database containing peptide and protein sequences and spectrometric information on the phosphoproteome of primary human T-Lymphocytes. Current release 2.0 contains 15 566 phosphorylation sites from 8273 unique phosphopeptides and 4937 proteins, which correspond to a 45-fold increase over the original database description. It now includes quantitative data on phosphorylation changes after time-dependent treatment with activators of the TCR-mediated signal transduction pathway. Sequence data quality has also been improved with the use of multiple search engines for database searching. LymPHOS can be publicly accessed at http://www.lymphos.org. Database URL: http://www.lymphos.org.",2015-12-26 +23820029,Strengthening the organizational capacity of health professional associations: the FIGO LOGIC Toolkit.,"Health professional associations, including national associations of obstetrics and gynecology, can have a leading role in influencing and developing health policy and practice. However, in low- and middle-resource countries, the organizational capacity to facilitate this role is often insufficient. The International Federation of Gynecology and Obstetrics LOGIC (Leadership in Obstetrics and Gynaecology for Impact and Change) Initiative has been developing the capacity of national associations in Africa and Asia. Through this work, an electronic resource of materials (http://figo-toolkit.org/) has been brought together to support organizational capacity development, addressing domains such as culture, strategic planning, human resources, project and financial management, performance, external relations, membership services, and the development and revision of clinical guidelines.",2013-06-29 +27741327,New functionality of RNAComposer: an application to shape the axis of miR160 precursor structure.,"RNAComposer is a fully automated, web-interfaced system for RNA 3D structure prediction, freely available at http://rnacomposer.cs.put.poznan.pl/ and http://rnacomposer.ibch.poznan.pl/. Its main components are: manually curated database of RNA 3D structure elements, highly efficient computational engine and user-friendly web application. In this paper, we demonstrate how the latest additions to the system allow the user to significantly affect the process of 3D model composition on several computational levels. Although in general our method is based on the knowledge of secondary structure topology, currently the RNAComposer offers a choice of six incorporated programs for secondary structure prediction. It also allows to apply a conditional search in the database of 3D structure elements and introduce user-provided elements into the final 3D model. This new functionality contributes to a significant improvement of the predicted 3D model reliability and it facilitates a better model adjustment to the experimental data. This is exemplified based on the RNAComposer application for modelling of the 3D structures of precursors of the miR160 family members.",2016-10-14 +30142701,"On the Misleading Use of Q F 3 2 for QSAR Model Comparison.","Quantitative Structure - Activity Relationship (QSAR) models play a central role in medicinal chemistry, toxicology and computer-assisted molecular design, as well as a support for regulatory decisions and animal testing reduction. Thus, assessing their predictive ability becomes an essential step for any prospective application. Many metrics have been proposed to estimate the model predictive ability of QSARs, which have created confusion on how models should be evaluated and properly compared. Recently, we showed that the metric Q F 3 2 is particularly well-suited for comparing the external predictivity of different models developed on the same training dataset. However, when comparing models developed on different training data, this function becomes inadequate and only dispersion measures like the root-mean-square error (RMSE) should be used. The intent of this work is to provide clarity on the correct and incorrect uses of Q F 3 2 , discussing its behavior towards the training data distribution and illustrating some cases in which Q F 3 2 estimates may be misleading. Hereby, we encourage the usage of measures of dispersions when models trained on different datasets have to be compared and evaluated.",2018-08-24 +27903896,IMG-ABC: new features for bacterial secondary metabolism analysis and targeted biosynthetic gene cluster discovery in thousands of microbial genomes.,"Secondary metabolites produced by microbes have diverse biological functions, which makes them a great potential source of biotechnologically relevant compounds with antimicrobial, anti-cancer and other activities. The proteins needed to synthesize these natural products are often encoded by clusters of co-located genes called biosynthetic gene clusters (BCs). In order to advance the exploration of microbial secondary metabolism, we developed the largest publically available database of experimentally verified and predicted BCs, the Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters (IMG-ABC) (https://img.jgi.doe.gov/abc/). Here, we describe an update of IMG-ABC, which includes ClusterScout, a tool for targeted identification of custom biosynthetic gene clusters across 40 000 isolate microbial genomes, and a new search capability to query more than 700 000 BCs from isolate genomes for clusters with similar Pfam composition. Additional features enable fast exploration and analysis of BCs through two new interactive visualization features, a BC function heatmap and a BC similarity network graph. These new tools and features add to the value of IMG-ABC's vast body of BC data, facilitating their in-depth analysis and accelerating secondary metabolite discovery.",2016-11-29 +29991528,PERK Regulates Glioblastoma Sensitivity to ER Stress Although Promoting Radiation Resistance.,"The aggressive nature and inherent therapeutic resistance of glioblastoma multiforme (GBM) has rendered the median survival of afflicted patients to 14 months. Therefore, it is imperative to understand the molecular biology of GBM to provide new treatment options to overcome this disease. It has been demonstrated that the protein kinase R-like endoplasmic reticulum kinase (PERK) pathway is an important regulator of the endoplasmic reticulum (ER) stress response. PERK signaling has been observed in other model systems after radiation; however, less is known in the context of GBM, which is frequently treated with radiation-based therapies. To investigate the significance of PERK, we studied activation of the PERK-eIF2α-ATF4 pathway in GBM after ionizing radiation (IR). By inhibiting PERK, it was determined that ionizing radiation (IR)-induced PERK activity led to eIF2α phosphorylation. IR enhanced the prodeath component of PERK signaling in cells treated with Sal003, an inhibitor of phospho-eIF2α phosphatase. Mechanistically, ATF4 mediated the prosurvival activity during the radiation response. The data support the notion that induction of ER stress signaling by radiation contributes to adaptive survival mechanisms during radiotherapy. The data also support a potential role for the PERK/eIF2α/ATF4 axis in modulating cell viability in irradiated GBM.Implications: The dual function of PERK as a mediator of survival and death may be exploited to enhance the efficacy of radiation therapy.Visual Overview: http://mcr.aacrjournals.org/content/16/10/1447/F1.large.jpg Mol Cancer Res; 16(10); 1447-53. ©2018 AACR.",2018-07-10 +30244783,Efficacy of intravaginal dehydroepiandrosterone (DHEA) for symptomatic women in the peri- or postmenopausal phase.,"

Objective

There is uncertainty whether treatment with dehydroepiandrosterone (DHEA) decreases menopausal symptoms for women in the peri- or postmenopausal phase. A previous systematic review considering this subject suggested that DHEA may slightly improve sexual function compared with placebo (CS. Scheffers, S. Armstrong, AEP. Cantineau, C. Farquhar, V. Jordan Dehydroepiandrosterone for women in the peri- or postmenopausal phase. Cochrane Database of Systematic Reviews 2015, Issue 1. Art. No.: CD011066. DOI: https://doi.org/10.1002/14651858.CD011066.pub2). The purpose of this article is to review recent research investigating whether the use of DHEA, and in particular intravaginal DHEA (Prasterone®), improves sexual function.

Methods

We conducted an online search using Medline OVID for recent articles related to DHEA and menopause. We found 48 relevant publications, out of which 14 papers were original research, all related to the development and licensing of intravaginal DHEA. We critically analysed these 14 articles in relation to sexual function.

Results

All the randomised controlled trials assessed the efficacy of vaginal DHEA in women with vulvovaginal atrophy and showed that sexual dysfunction improved with treatment regardless of the level of dyspareunia at baseline. Treatment with DHEA was found to be superior to placebo and at least as efficacious as vaginal oestrogens in improving symptoms.

Conclusion

Intravaginal DHEA appears to be a safe and effective treatment for menopausal vulvovaginal atrophy and dyspareunia in most women. Further studies are required before it can be recommended for women with a history of thrombosis, cardiovascular disease or hormone-sensitive neoplasms.",2018-07-31 +31354054,Multi-parametric analysis reveals metabolic and vascular effects driving differences in BOLD-based cerebrovascular reactivity associated with a history of sport concussion.,"Objective: Identify alterations in cerebrovascular reactivity (CVR) based on the history of sport-related concussion (SRC). Further explore possible mechanisms underlying differences in vascular physiology using hemodynamic parameters modeled using calibrated magnetic resonance imaging (MRI). Method: End-tidal targeting and dual-echo MRI were combined to probe hypercapnic and hyperoxic challenges in athletes with (n = 32) and without (n = 31) a history of SRC. Concurrent blood oxygenation level dependent (BOLD) and arterial spin labeling (ASL) data were used to compute BOLD-CVR, ASL-CVR, and other physiological parameters including resting oxygen extraction fraction (OEF0) and cerebral blood volume (CBV0). Multiple linear and logistic regressions were then used to identify dominant parameters driving group-differences in BOLD-CVR. Results: Robust evidence for elevated BOLD-CVR were found in athletes with SRC history spreading over parts of the cortical hemispheres. Follow-up analyses showed co-localized differences in ASL-CVR (representing modulation of cerebral blood flow) and hemodynamic factors representing static vascular (i.e., CBV0) and metabolic (i.e., OEF0) effects suggesting that group-based differences in BOLD-CVR may be driven by a mixed effect from factors with vascular and metabolic origins. Conclusion: These results emphasize that while BOLD-CVR offers promises as a surrogate non-specific biomarker for cerebrovascular health following SRC, multiple hemodynamic parameters can affect its relative measurements. Abbreviations: [dHb]: concentration of deoxyhemoglobin; AFNI: Analysis of Functional NeuroImages ( https://afni.nimh.nih.gov ); ASL: arterial spin labeling; BIG: position group: defensive and offensive linemen; BIG-SKILL: position group: full backs, linebackers, running backs, tight-ends; BOLD: blood oxygen level dependent; CBF: cerebral blood flow; CMRO2: cerebral metabolic rate of oxygen consumption; CTL: group of control subjects; CVR: cerebrovascular reactivity; fMRI: functional magnetic resonance imaging; FSL: FMRIB software library ( https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/ ); HC: hypercapnia; HO: hyperoxia; HX: group with history of concussion; M: maximal theoretical BOLD signal upon complete removal of venous dHb; pCASL: pseudo-continuous arterial spin labeling; PETCO2: end-tidal carbon dioxide; PETO2: end-tidal oxygen; SCAT: sport-concussion assessment tool; SKILL: position group: defensive backs, kickers, quarterbacks, safeties, wide-receivers; SRC: sport-related concussion.",2019-07-27 +24163098,SMMRNA: a database of small molecule modulators of RNA.,"We have developed SMMRNA, an interactive database, available at http://www.smmrna.org, with special focus on small molecule ligands targeting RNA. Currently, SMMRNA consists of ∼770 unique ligands along with structural images of RNA molecules. Each ligand in the SMMRNA contains information such as Kd, Ki, IC50, ΔTm, molecular weight (MW), hydrogen donor and acceptor count, XlogP, number of rotatable bonds, number of aromatic rings and 2D and 3D structures. These parameters can be explored using text search, advanced search, substructure and similarity-based analysis tools that are embedded in SMMRNA. A structure editor is provided for 3D visualization of ligands. Advance analysis can be performed using substructure and OpenBabel-based chemical similarity fingerprints. Upload facility for both RNA and ligands is also provided. The physicochemical properties of the ligands were further examined using OpenBabel descriptors, hierarchical clustering, binning partition and multidimensional scaling. We have also generated a 3D conformation database of ligands to support the structure and ligand-based screening. SMMRNA provides comprehensive resource for further design, development and refinement of small molecule modulators for selective targeting of RNA molecules.",2013-10-24 +27899667,"Rice SNP-seek database update: new SNPs, indels, and queries.","We describe updates to the Rice SNP-Seek Database since its first release. We ran a new SNP-calling pipeline followed by filtering that resulted in complete, base, filtered and core SNP datasets. Besides the Nipponbare reference genome, the pipeline was run on genome assemblies of IR 64, 93-11, DJ 123 and Kasalath. New genotype query and display features are added for reference assemblies, SNP datasets and indels. JBrowse now displays BAM, VCF and other annotation tracks, the additional genome assemblies and an embedded VISTA genome comparison viewer. Middleware is redesigned for improved performance by using a hybrid of HDF5 and RDMS for genotype storage. Query modules for genotypes, varieties and genes are improved to handle various constraints. An integrated list manager allows the user to pass query parameters for further analysis. The SNP Annotator adds traits, ontology terms, effects and interactions to markers in a list. Web-service calls were implemented to access most data. These features enable seamless querying of SNP-Seek across various biological entities, a step toward semi-automated gene-trait association discovery. URL: http://snp-seek.irri.org.",2016-11-29 +31984360,Annotating and detecting phenotypic information for chronic obstructive pulmonary disease.,"

Objectives

Chronic obstructive pulmonary disease (COPD) phenotypes cover a range of lung abnormalities. To allow text mining methods to identify pertinent and potentially complex information about these phenotypes from textual data, we have developed a novel annotated corpus, which we use to train a neural network-based named entity recognizer to detect fine-grained COPD phenotypic information.

Materials and methods

Since COPD phenotype descriptions often mention other concepts within them (proteins, treatments, etc.), our corpus annotations include both outermost phenotype descriptions and concepts nested within them. Our neural layered bidirectional long short-term memory conditional random field (BiLSTM-CRF) network firstly recognizes nested mentions, which are fed into subsequent BiLSTM-CRF layers, to help to recognize enclosing phenotype mentions.

Results

Our corpus of 30 full papers (available at: http://www.nactem.ac.uk/COPD) is annotated by experts with 27 030 phenotype-related concept mentions, most of which are automatically linked to UMLS Metathesaurus concepts. When trained using the corpus, our BiLSTM-CRF network outperforms other popular approaches in recognizing detailed phenotypic information.

Discussion

Information extracted by our method can facilitate efficient location and exploration of detailed information about phenotypes, for example, those specifically concerning reactions to treatments.

Conclusion

The importance of our corpus for developing methods to extract fine-grained information about COPD phenotypes is demonstrated through its successful use to train a layered BiLSTM-CRF network to extract phenotypic information at various levels of granularity. The minimal human intervention needed for training should permit ready adaption to extracting phenotypic information about other diseases.",2019-04-26 +29554210,SCRAM: a pipeline for fast index-free small RNA read alignment and visualization.,"Summary:Small RNAs play key roles in gene regulation, defense against viral pathogens and maintenance of genome stability, though many aspects of their biogenesis and function remain to be elucidated. SCRAM (Small Complementary RNA Mapper) is a novel, simple-to-use short read aligner and visualization suite that enhances exploration of small RNA datasets. Availability and implementation:The SCRAM pipeline is implemented in Go and Python, and is freely available under MIT license. Source code, multiplatform binaries and a Docker image can be accessed via https://sfletc.github.io/scram/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +30071357,Towards frailty biomarkers: Candidates from genes and pathways regulated in aging and age-related diseases.,"

Objective

Use of the frailty index to measure an accumulation of deficits has been proven a valuable method for identifying elderly people at risk for increased vulnerability, disease, injury, and mortality. However, complementary molecular frailty biomarkers or ideally biomarker panels have not yet been identified. We conducted a systematic search to identify biomarker candidates for a frailty biomarker panel.

Methods

Gene expression databases were searched (http://genomics.senescence.info/genes including GenAge, AnAge, LongevityMap, CellAge, DrugAge, Digital Aging Atlas) to identify genes regulated in aging, longevity, and age-related diseases with a focus on secreted factors or molecules detectable in body fluids as potential frailty biomarkers. Factors broadly expressed, related to several ""hallmark of aging"" pathways as well as used or predicted as biomarkers in other disease settings, particularly age-related pathologies, were identified. This set of biomarkers was further expanded according to the expertise and experience of the authors. In the next step, biomarkers were assigned to six ""hallmark of aging"" pathways, namely (1) inflammation, (2) mitochondria and apoptosis, (3) calcium homeostasis, (4) fibrosis, (5) NMJ (neuromuscular junction) and neurons, (6) cytoskeleton and hormones, or (7) other principles and an extensive literature search was performed for each candidate to explore their potential and priority as frailty biomarkers.

Results

A total of 44 markers were evaluated in the seven categories listed above, and 19 were awarded a high priority score, 22 identified as medium priority and three were low priority. In each category high and medium priority markers were identified.

Conclusion

Biomarker panels for frailty would be of high value and better than single markers. Based on our search we would propose a core panel of frailty biomarkers consisting of (1) CXCL10 (C-X-C motif chemokine ligand 10), IL-6 (interleukin 6), CX3CL1 (C-X3-C motif chemokine ligand 1), (2) GDF15 (growth differentiation factor 15), FNDC5 (fibronectin type III domain containing 5), vimentin (VIM), (3) regucalcin (RGN/SMP30), calreticulin, (4) PLAU (plasminogen activator, urokinase), AGT (angiotensinogen), (5) BDNF (brain derived neurotrophic factor), progranulin (PGRN), (6) α-klotho (KL), FGF23 (fibroblast growth factor 23), FGF21, leptin (LEP), (7) miRNA (micro Ribonucleic acid) panel (to be further defined), AHCY (adenosylhomocysteinase) and KRT18 (keratin 18). An expanded panel would also include (1) pentraxin (PTX3), sVCAM/ICAM (soluble vascular cell adhesion molecule 1/Intercellular adhesion molecule 1), defensin α, (2) APP (amyloid beta precursor protein), LDH (lactate dehydrogenase), (3) S100B (S100 calcium binding protein B), (4) TGFβ (transforming growth factor beta), PAI-1 (plasminogen activator inhibitor 1), TGM2 (transglutaminase 2), (5) sRAGE (soluble receptor for advanced glycosylation end products), HMGB1 (high mobility group box 1), C3/C1Q (complement factor 3/1Q), ST2 (Interleukin 1 receptor like 1), agrin (AGRN), (6) IGF-1 (insulin-like growth factor 1), resistin (RETN), adiponectin (ADIPOQ), ghrelin (GHRL), growth hormone (GH), (7) microparticle panel (to be further defined), GpnmB (glycoprotein nonmetastatic melanoma protein B) and lactoferrin (LTF). We believe that these predicted panels need to be experimentally explored in animal models and frail cohorts in order to ascertain their diagnostic, prognostic and therapeutic potential.",2018-07-30 +30062812,A systematic review of the use of ketogenic diets in adult patients with cancer.,"

Background

A growing body of evidence indicates the importance of nutrition in cancer treatment. Ketogenic diets are one strategy that has been proposed to enhance traditional anticancer therapy. This review summarises the evidence concerning the effect of oral ketogenic diets on anthropometry, metabolism, quality of life (QoL) and tumour effects, at the same time as documenting adverse events and adherence in patients with cancer.

Methods

We searched electronic databases using medical subject headings (MeSH) and text words related to ketogenic diets and cancer. Adult patients following a ketogenic diet as a complementary therapy prior, alongside or after standard anticancer treatment for more than 7 days were included. Studies were assessed for quality using the Critical Appraisal Skills Programme tools (https://www.casp-uk.net).

Results

Eleven studies were included with 102 participants (age range 34-87 years) from early-phase trials, cohort studies and case reports. Studies included participants with brain, rectal or mixed cancer sites at an early or advanced disease stage. The duration of intervention ranged from 2.4 to 134.7 weeks (0.5-31 months). Evidence was inconclusive for nutritional status and adverse events. Mixed results were observed for blood parameters, tumour effects and QoL. Adherence to diet was low (50 out of 102; 49%) and ranged from 23.5% to 100%.

Conclusions

High-quality evidence on the effect of ketogenic diets on anthropometry, metabolism, QoL and tumour effects is currently lacking in oncology patients. Heterogeneity between studies and low adherence to diet affects the current evidence. There is an obvious gap in the evidence, highlighting the need for controlled trials to fully evaluate the intervention.",2018-07-30 +29520580,Spousal cardiometabolic risk factors and incidence of type 2 diabetes: a prospective analysis from the English Longitudinal Study of Ageing.,"AIMS/HYPOTHESIS:In the UK, more than one million people have undiagnosed diabetes and an additional five million are at high risk of developing the disease. Given that early identification of these people is key for both primary and secondary prevention, new screening approaches are needed. Since spouses resemble each other in cardiometabolic risk factors related to type 2 diabetes, we aimed to investigate whether diabetes and cardiometabolic risk factors in one spouse can be used as an indicator of incident type 2 diabetes in the other spouse. METHODS:We analysed data from 3649 men and 3478 women from the English Longitudinal Study of Ageing with information on their own and their spouse's diabetes status and cardiometabolic risk factors. We modelled incidence rates and incidence rate ratios with Poisson regression, using spousal diabetes status or cardiometabolic risk factors (i.e. BMI, waist circumference, systolic and diastolic BP, HDL- and LDL-cholesterol and triacylglycerols) as exposures and type 2 diabetes incidence in the index individual as the outcome. Models were adjusted for two nested sets of covariates. RESULTS:Spousal BMI and waist circumference were associated with incident type 2 diabetes, but with different patterns for men and women. A man's risk of type 2 diabetes increased more steeply with his wife's obesity level, and the association remained statistically significant even after adjustment for the man's own obesity level. Having a wife with a 5 kg/m2 higher BMI (30 kg/m2 vs 25 kg/m2) was associated with a 21% (95% CI 11%, 33%) increased risk of type 2 diabetes. In contrast, the association between incident type 2 diabetes in a woman and her husband's BMI was attenuated after adjusting for the woman's own obesity level. Findings for waist circumference were similar to those for BMI. Regarding other risk factors, we found a statistically significant association only between the risk of type 2 diabetes in women and their husbands' triacylglycerol levels. CONCLUSIONS/INTERPRETATION:The main finding of this study is the sex-specific effect of spousal obesity on the risk of type 2 diabetes. Having an obese spouse increases an individual's risk of type 2 diabetes over and above the effect of the individual's own obesity level among men, but not among women. Our results suggest that a couples-focused approach may be beneficial for the early detection of type 2 diabetes and individuals at high risk of developing type 2 diabetes, especially in men, who are less likely than women to attend health checks. DATA AVAILABILITY:Data were accessed via the UK Data Service under the data-sharing agreement no. 91400 ( https://discover.ukdataservice.ac.uk/catalogue/?sn=5050&type=Data%20catalogue ).",2018-03-08 +21984757,"miREnvironment database: providing a bridge for microRNAs, environmental factors and phenotypes.","

Unlabelled

The interaction between genetic factors and environmental factors has critical roles in determining the phenotype of an organism. In recent years, a number of studies have reported that the dysfunctions on microRNA (miRNAs), environmental factors and their interactions have strong effects on phenotypes and even may result in abnormal phenotypes and diseases, whereas there has been no a database linking miRNAs, environmental factors and phenotypes. Such a resource platform is believed to be of great value in the understanding of miRNAs, environmental factors, especially drugs and diseases. In this study, we constructed the miREnvironment database, which contains a comprehensive collection and curation of experimentally supported interactions among miRNAs, environmental factors and phenotypes. The names of miRNAs, phenotypes, environmental factors, conditions of environmental factors, samples, species, evidence and references were further annotated. miREnvironment represents a biomedical resource for researches on miRNAs, environmental factors and diseases.

Availability

http://cmbi.bjmu.edu.cn/miren.

Contact

cuiqinghua@hsc.pku.edu.cn.",2011-10-07 +26496949,KLIFS: a structural kinase-ligand interaction database.,"Protein kinases play a crucial role in cell signaling and are important drug targets in several therapeutic areas. The KLIFS database contains detailed structural kinase-ligand interaction information derived from all (>2900) structures of catalytic domains of human and mouse protein kinases deposited in the Protein Data Bank in order to provide insights into the structural determinants of kinase-ligand binding and selectivity. The kinase structures have been processed in a consistent manner by systematically analyzing the structural features and molecular interaction fingerprints (IFPs) of a predefined set of 85 binding site residues with bound ligands. KLIFS has been completely rebuilt and extended (>65% more structures) since its first release as a data set, including: novel automated annotation methods for (i) the assessment of ligand-targeted subpockets and the analysis of (ii) DFG and (iii) αC-helix conformations; improved and automated protocols for (iv) the generation of sequence/structure alignments, (v) the curation of ligand atom and bond typing for accurate IFP analysis and (vi) weekly database updates. KLIFS is now accessible via a website (http://klifs.vu-compmedchem.nl) that provides a comprehensive visual presentation of different types of chemical, biological and structural chemogenomics data, and allows the user to easily access, compare, search and download the data.",2015-10-22 +27727438,COSMIC: High-Resolution Cancer Genetics Using the Catalogue of Somatic Mutations in Cancer.,"COSMIC (http://cancer.sanger.ac.uk) is an expert-curated database of somatic mutations in human cancer. Broad and comprehensive in scope, recent releases in 2016 describe over 4 million coding mutations across all human cancer disease types. Mutations are annotated across the entire genome, but expert curation is focused on over 400 key cancer genes. Now encompassing the majority of molecular mutation mechanisms in oncogenetics, COSMIC additionally describes 10 million non-coding mutations, 1 million copy-number aberrations, 9 million gene-expression variants, and almost 8 million differentially methylated CpGs. This information combines a consistent interpretation of the data from the major cancer genome consortia and cancer genome literature with exhaustive hand curation of over 22,000 gene-specific literature publications. This unit describes the graphical Web site in detail; alternative protocols overview other ways the entire database can be accessed, analyzed, and downloaded. © 2016 by John Wiley & Sons, Inc.",2016-10-11 +24352427,Inferring the choreography of parental genomes during fertilization from ultralarge-scale whole-transcriptome analysis.,"Fertilization precisely choreographs parental genomes by using gamete-derived cellular factors and activating genome regulatory programs. However, the mechanism remains elusive owing to the technical difficulties of preparing large numbers of high-quality preimplantation cells. Here, we collected >14 × 10(4) high-quality mouse metaphase II oocytes and used these to establish detailed transcriptional profiles for four early embryo stages and parthenogenetic development. By combining these profiles with other public resources, we found evidence that gene silencing appeared to be mediated in part by noncoding RNAs and that this was a prerequisite for post-fertilization development. Notably, we identified 817 genes that were differentially expressed in embryos after fertilization compared with parthenotes. The regulation of these genes was distinctly different from those expressed in parthenotes, suggesting functional specialization of particular transcription factors prior to first cell cleavage. We identified five transcription factors that were potentially necessary for developmental progression: Foxd1, Nkx2-5, Sox18, Myod1, and Runx1. Our very large-scale whole-transcriptome profile of early mouse embryos yielded a novel and valuable resource for studies in developmental biology and stem cell research. The database is available at http://dbtmee.hgc.jp.",2013-12-01 +21712246,CAMPAIGN: an open-source library of GPU-accelerated data clustering algorithms.,"

Motivation

Data clustering techniques are an essential component of a good data analysis toolbox. Many current bioinformatics applications are inherently compute-intense and work with very large datasets. Sequential algorithms are inadequate for providing the necessary performance. For this reason, we have created Clustering Algorithms for Massively Parallel Architectures, Including GPU Nodes (CAMPAIGN), a central resource for data clustering algorithms and tools that are implemented specifically for execution on massively parallel processing architectures.

Results

CAMPAIGN is a library of data clustering algorithms and tools, written in 'C for CUDA' for Nvidia GPUs. The library provides up to two orders of magnitude speed-up over respective CPU-based clustering algorithms and is intended as an open-source resource. New modules from the community will be accepted into the library and the layout of it is such that it can easily be extended to promising future platforms such as OpenCL.

Availability

Releases of the CAMPAIGN library are freely available for download under the LGPL from https://simtk.org/home/campaign. Source code can also be obtained through anonymous subversion access as described on https://simtk.org/scm/?group_id=453.

Contact

kjk33@cantab.net.",2011-06-27 +28787030,CAM: A quality control pipeline for MNase-seq data.,"Nucleosome organization affects the accessibility of cis-elements to trans-acting factors. Micrococcal nuclease digestion followed by high-throughput sequencing (MNase-seq) is the most popular technology used to profile nucleosome organization on a genome-wide scale. Evaluating the data quality of MNase-seq data remains challenging, especially in mammalian. There is a strong need for a convenient and comprehensive approach to obtain dedicated quality control (QC) for MNase-seq data analysis. Here we developed CAM, which is a comprehensive QC pipeline for MNase-seq data. The CAM pipeline provides multiple informative QC measurements and nucleosome organization profiles on different potentially functional regions for given MNase-seq data. CAM also includes 268 historical MNase-seq datasets from human and mouse as a reference atlas for unbiased assessment. CAM is freely available at: http://www.tongji.edu.cn/~zhanglab/CAM.",2017-08-07 +29314829,Quantitative Toxicity Prediction Using Topology Based Multitask Deep Neural Networks.,"The understanding of toxicity is of paramount importance to human health and environmental protection. Quantitative toxicity analysis has become a new standard in the field. This work introduces element specific persistent homology (ESPH), an algebraic topology approach, for quantitative toxicity prediction. ESPH retains crucial chemical information during the topological abstraction of geometric complexity and provides a representation of small molecules that cannot be obtained by any other method. To investigate the representability and predictive power of ESPH for small molecules, ancillary descriptors have also been developed based on physical models. Topological and physical descriptors are paired with advanced machine learning algorithms, such as the deep neural network (DNN), random forest (RF), and gradient boosting decision tree (GBDT), to facilitate their applications to quantitative toxicity predictions. A topology based multitask strategy is proposed to take the advantage of the availability of large data sets while dealing with small data sets. Four benchmark toxicity data sets that involve quantitative measurements are used to validate the proposed approaches. Extensive numerical studies indicate that the proposed topological learning methods are able to outperform the state-of-the-art methods in the literature for quantitative toxicity analysis. Our online server for computing element-specific topological descriptors (ESTDs) is available at http://weilab.math.msu.edu/TopTox/ .",2018-01-31 +30158740,Nonproliferative and Proliferative Lesions of the Rat and Mouse Endocrine System.,"The INHAND (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) Project (www.toxpath.org/inhand.asp) is a joint initiative among the Societies of Toxicological Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP) and North America (STP) to develop an internationally accepted nomenclature for proliferative and nonproliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature for classifying microscopic lesions observed in the endocrine organs (pituitary gland, pineal gland, thyroid gland, parathyroid glands, adrenal glands and pancreatic islets) of laboratory rats and mice, with color photomicrographs illustrating examples of the lesions. The standardized nomenclature presented in this document is also available electronically on the internet (http://www.goreni.org/). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous and aging lesions as well as lesions induced by exposure to test materials. A widely accepted and utilized international harmonization of nomenclature for endocrine lesions in laboratory animals will decrease confusion among regulatory and scientific research organizations in different countries and provide a common language to increase and enrich international exchanges of information among toxicologists and pathologists.",2018-07-28 +26987082,"CDC Guideline for Prescribing Opioids for Chronic Pain - United States, 2016.","This guideline provides recommendations for primary care clinicians who are prescribing opioids for chronic pain outside of active cancer treatment, palliative care, and end-of-life care. The guideline addresses 1) when to initiate or continue opioids for chronic pain; 2) opioid selection, dosage, duration, follow-up, and discontinuation; and 3) assessing risk and addressing harms of opioid use. CDC developed the guideline using the Grading of Recommendations Assessment, Development, and Evaluation (GRADE) framework, and recommendations are made on the basis of a systematic review of the scientific evidence while considering benefits and harms, values and preferences, and resource allocation. CDC obtained input from experts, stakeholders, the public, peer reviewers, and a federally chartered advisory committee. It is important that patients receive appropriate pain treatment with careful consideration of the benefits and risks of treatment options. This guideline is intended to improve communication between clinicians and patients about the risks and benefits of opioid therapy for chronic pain, improve the safety and effectiveness of pain treatment, and reduce the risks associated with long-term opioid therapy, including opioid use disorder, overdose, and death. CDC has provided a checklist for prescribing opioids for chronic pain (http://stacks.cdc.gov/view/cdc/38025) as well as a website (http://www.cdc.gov/drugoverdose/prescribingresources.html) with additional tools to guide clinicians in implementing the recommendations.",2016-03-18 +30314257,A first preliminary study of the shallow water sponge fauna from Cyprus Island (Eastern Mediterranean).,"Currently, more than 8,500 valid sponge species are reported in the World Porifera Database (http://www.marinespecies.org/porifera/) (van Soest et al. 2018). The Mediterranean Sea sponge fauna, counting almost 700 species, is one of the best documented in the world (Pronzato 2003; Pansini et al. 2011; van Soest et al. 2018) but the eastern part of the basin is by far less studied, in comparison with other Mediterranean areas (Pansini et al. 2000; Voultsiadou Vafidis 2004; Topaloğlu Evcen 2014). A small number of species, mainly belonging to the cosmopolitan genus Spongia (Dictyoceratida), are commonly used as bath sponges. Aim of this work is to provide further information on Cyprus Island sponges in general and on species that had commercial importance in the past.",2018-07-27 +31231515,A curated transcriptome dataset collection to investigate the blood transcriptional response to viral respiratory tract infection and vaccination.,"The human immune defense mechanisms and factors associated with good versus poor health outcomes following viral respiratory tract infections (VRTI), as well as correlates of protection following vaccination against respiratory viruses, remain incompletely understood. To shed further light into these mechanisms, a number of systems-scale studies have been conducted to measure transcriptional changes in blood leukocytes of either naturally or experimentally infected individuals, or in individual's post-vaccination. Here we are making available a public repository, for research investigators for interpretation, a collection of transcriptome datasets obtained from human whole blood and peripheral blood mononuclear cells (PBMC) to investigate the transcriptional responses following viral respiratory tract infection or vaccination against respiratory viruses. In total, Thirty one31 datasets, associated to viral respiratory tract infections and their related vaccination studies, were identified and retrieved from the NCBI Gene Expression Omnibus (GEO) and loaded in a custom web application designed for interactive query and visualization of integrated large-scale data. Quality control checks, using relevant biological markers, were performed. Multiple sample groupings and rank lists were created to facilitate dataset query and interpretation. Via this interface, users can generate web links to customized graphical views, which may be subsequently inserted into manuscripts to report novel findings. The GXB tool enables browsing of a single gene across projects, providing new perspectives on the role of a given molecule across biological systems in the diagnostic and prognostic following VRTI but also in identifying new correlates of protection. This dataset collection is available at: http://vri1.gxbsidra.org/dm3/geneBrowser/list.",2019-03-13 +28423505,Identification and analysis of mutational hotspots in oncogenes and tumour suppressors.,"

Background

The key to interpreting the contribution of a disease-associated mutation in the development and progression of cancer is an understanding of the consequences of that mutation both on the function of the affected protein and on the pathways in which that protein is involved. Protein domains encapsulate function and position-specific domain based analysis of mutations have been shown to help elucidate their phenotypes.

Results

In this paper we examine the domain biases in oncogenes and tumour suppressors, and find that their domain compositions substantially differ. Using data from over 30 different cancers from whole-exome sequencing cancer genomic projects we mapped over one million mutations to their respective Pfam domains to identify which domains are enriched in any of three different classes of mutation; missense, indels or truncations. Next, we identified the mutational hotspots within domain families by mapping small mutations to equivalent positions in multiple sequence alignments of protein domainsWe find that gain of function mutations from oncogenes and loss of function mutations from tumour suppressors are normally found in different domain families and when observed in the same domain families, hotspot mutations are located at different positions within the multiple sequence alignment of the domain.

Conclusions

By considering hotspots in tumour suppressors and oncogenes independently, we find that there are different specific positions within domain families that are particularly suited to accommodate either a loss or a gain of function mutation. The position is also dependent on the class of mutation.We find rare mutations co-located with well-known functional mutation hotspots, in members of homologous domain superfamilies, and we detect novel mutation hotspots in domain families previously unconnected with cancer. The results of this analysis can be accessed through the MOKCa database (http://strubiol.icr.ac.uk/extra/MOKCa).",2017-03-01 +22790981,GFam: a platform for automatic annotation of gene families.,"We have developed GFam, a platform for automatic annotation of gene/protein families. GFam provides a framework for genome initiatives and model organism resources to build domain-based families, derive meaningful functional labels and offers a seamless approach to propagate functional annotation across periodic genome updates. GFam is a hybrid approach that uses a greedy algorithm to chain component domains from InterPro annotation provided by its 12 member resources followed by a sequence-based connected component analysis of un-annotated sequence regions to derive consensus domain architecture for each sequence and subsequently generate families based on common architectures. Our integrated approach increases sequence coverage by 7.2 percentage points and residue coverage by 14.6 percentage points higher than the coverage relative to the best single-constituent database within InterPro for the proteome of Arabidopsis. The true power of GFam lies in maximizing annotation provided by the different InterPro data sources that offer resource-specific coverage for different regions of a sequence. GFam's capability to capture higher sequence and residue coverage can be useful for genome annotation, comparative genomics and functional studies. GFam is a general-purpose software and can be used for any collection of protein sequences. The software is open source and can be obtained from http://www.paccanarolab.org/software/gfam/.",2012-07-11 +28241745,Drug voyager: a computational platform for exploring unintended drug action.,"

Background

The dominant paradigm in understanding drug action focuses on the intended therapeutic effects and frequent adverse reactions. However, this approach may limit opportunities to grasp unintended drug actions, which can open up channels to repurpose existing drugs and identify rare adverse drug reactions. Advances in systems biology can be exploited to comprehensively understand pharmacodynamic actions, although proper frameworks to represent drug actions are still lacking.

Results

We suggest a novel platform to construct a drug-specific pathway in which a molecular-level mechanism of action is formulated based on pharmacologic, pharmacogenomic, transcriptomic, and phenotypic data related to drug response ( http://databio.gachon.ac.kr/tools/ ). In this platform, an adoption of three conceptual levels imitating drug perturbation allows these pathways to be realistically rendered in comparison to those of other models. Furthermore, we propose a new method that exploits functional features of the drug-specific pathways to predict new indications as well as adverse reactions. For therapeutic uses, our predictions significantly overlapped with clinical trials and an up-to-date drug-disease association database. Also, our method outperforms existing methods with regard to classification of active compounds for cancers. For adverse reactions, our predictions were significantly enriched in an independent database derived from the Food and Drug Administration (FDA) Adverse Event Reporting System and meaningfully cover an Adverse Reaction Database provided by Health Canada. Lastly, we discuss several predictions for both therapeutic indications and side-effects through the published literature.

Conclusions

Our study addresses how we can computationally represent drug-signaling pathways to understand unintended drug actions and to facilitate drug discovery and screening.",2017-02-28 +28431087,A method for learning a sparse classifier in the presence of missing data for high-dimensional biological datasets.,"

Motivation

This work addresses two common issues in building classification models for biological or medical studies: learning a sparse model, where only a subset of a large number of possible predictors is used, and training in the presence of missing data. This work focuses on supervised generative binary classification models, specifically linear discriminant analysis (LDA). The parameters are determined using an expectation maximization algorithm to both address missing data and introduce priors to promote sparsity. The proposed algorithm, expectation-maximization sparse discriminant analysis (EM-SDA), produces a sparse LDA model for datasets with and without missing data.

Results

EM-SDA is tested via simulations and case studies. In the simulations, EM-SDA is compared with nearest shrunken centroids (NSCs) and sparse discriminant analysis (SDA) with k-nearest neighbors for imputation for varying mechanism and amount of missing data. In three case studies using published biomedical data, the results are compared with NSC and SDA models with four different types of imputation, all of which are common approaches in the field. EM-SDA is more accurate and sparse than competing methods both with and without missing data in most of the experiments. Furthermore, the EM-SDA results are mostly consistent between the missing and full cases. Biological relevance of the resulting models, as quantified via a literature search, is also presented.

Availability and implementation

A Matlab implementation published under GNU GPL v.3 license is available at http://web.mit.edu/braatzgroup/links.html .

Contact

braatz@mit.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +22402613,A strategy for building neuroanatomy ontologies.,"

Motivation

Advancing our understanding of how nervous systems work will require the ability to store and annotate 3D anatomical datasets, recording morphology, partonomy and connectivity at multiple levels of granularity from subcellular to gross anatomy. It will also require the ability to integrate this data with other data-types including functional, genetic and electrophysiological data. The web ontology language OWL2 provides the means to solve many of these problems. Using it, one can rigorously define and relate classes of anatomical structure using multiple criteria. The resulting classes can be used to annotate datasets recording, for example, gene expression or electrophysiology. Reasoning software can be used to automate classification and error checking and to construct and answer sophisticated combinatorial queries. But for such queries to give consistent and biologically meaningful results, it is important that both classes and the terms (relations) used to relate them are carefully defined.

Results

We formally define a set of relations for recording the spatial and connectivity relationships of neuron classes and brain regions in a broad range of species, from vertebrates to arthropods. We illustrate the utility of our approach via its application in the ontology that drives the Virtual Fly Brain web resource.

Availability and implementation

The relations we define are available from http://purl.obolibrary.org/obo/ro.owl. They are used in the Drosophila anatomy ontology (http://purl.obolibrary.org/obo/fbbt/2011-09-06/), which drives the web resource http://www.virtualflybrain.org",2012-03-07 +22449400,SMS 2.0: an updated database to study the structural plasticity of short peptide fragments in non-redundant proteins.,"The function of a protein molecule is greatly influenced by its three-dimensional (3D) structure and therefore structure prediction will help identify its biological function. We have updated Sequence, Motif and Structure (SMS), the database of structurally rigid peptide fragments, by combining amino acid sequences and the corresponding 3D atomic coordinates of non-redundant (25%) and redundant (90%) protein chains available in the Protein Data Bank (PDB). SMS 2.0 provides information pertaining to the peptide fragments of length 5-14 residues. The entire dataset is divided into three categories, namely, same sequence motifs having similar, intermediate or dissimilar 3D structures. Further, options are provided to facilitate structural superposition using the program structural alignment of multiple proteins (STAMP) and the popular JAVA plug-in (Jmol) is deployed for visualization. In addition, functionalities are provided to search for the occurrences of the sequence motifs in other structural and sequence databases like PDB, Genome Database (GDB), Protein Information Resource (PIR) and Swiss-Prot. The updated database along with the search engine is available over the World Wide Web through the following URL http://cluster.physics.iisc.ernet.in/sms/.",2012-02-01 +29664587,Maternal Smoking during Pregnancy and Early Childhood and Development of Asthma and Rhinoconjunctivitis - a MeDALL Project.,"BACKGROUND:The role of tobacco smoke exposure in the development and persistence of asthma and rhinoconjunctivitis through childhood into adolescence is unclear. OBJECTIVES:We assessed the associations of parental smoking from fetal life through adolescence with asthma and rhinoconjunctivitis during childhood and adolescence. METHODS:We analyzed data for 10,860 participants of five European birth cohort studies from the Mechanisms of the Development of Allergy (MeDALL) consortium. Parental smoking habits and health outcomes (early transient, persistent, and adolescent-onset asthma and rhinoconjunctivitis) were based on questionnaires covering the period from pregnancy to 14-16 y of age. Data were combined and analyzed using a one-stage and two-stage individual participant data meta-analysis. RESULTS:Overall, any maternal smoking during pregnancy tended to be associated with an increased odds of prevalent asthma [adjusted odds ratio (aOR)=1.19 (95% CI: 0.98, 1.43)], but not prevalent rhinoconjunctivitis [aOR=1.05 (95% CI: 0.90, 1.22)], during childhood and adolescence. In analyses with phenotypes related to age of onset and persistence of disease, any maternal smoking during pregnancy was associated with early transient asthma [aOR=1.79 (95% CI: 1.14, 2.83)]. Maternal smoking of ≥10 cigarettes/day during pregnancy was associated with persistent asthma [aOR=1.66 (95% CI: 1.29, 2.15)] and persistent rhinoconjunctivitis [aOR=1.55 (95% CI, 1.09, 2.20)]. Tobacco smoke exposure during fetal life, infancy, childhood, and adolescence was not associated with adolescent-onset asthma or rhinoconjunctivitis. CONCLUSIONS:Findings from this combined analysis of five European birth cohorts strengthen evidence linking early exposure to tobacco smoke with asthma during childhood and adolescence. Children with high early-life exposure were more likely than unexposed children to have early transient and persistent asthma and persistent rhinoconjunctivitis. https://doi.org/10.1289/EHP2738.",2018-04-12 +28605402,Sparse redundancy analysis of high-dimensional genetic and genomic data.,"

Motivation

Recent technological developments have enabled the possibility of genetic and genomic integrated data analysis approaches, where multiple omics datasets from various biological levels are combined and used to describe (disease) phenotypic variations. The main goal is to explain and ultimately predict phenotypic variations by understanding their genetic basis and the interaction of the associated genetic factors. Therefore, understanding the underlying genetic mechanisms of phenotypic variations is an ever increasing research interest in biomedical sciences. In many situations, we have a set of variables that can be considered to be the outcome variables and a set that can be considered to be explanatory variables. Redundancy analysis (RDA) is an analytic method to deal with this type of directionality. Unfortunately, current implementations of RDA cannot deal optimally with the high dimensionality of omics data (p≫n). The existing theoretical framework, based on Ridge penalization, is suboptimal, since it includes all variables in the analysis. As a solution, we propose to use Elastic Net penalization in an iterative RDA framework to obtain a sparse solution.

Results

We proposed sparse redundancy analysis (sRDA) for high dimensional omics data analysis. We conducted simulation studies with our software implementation of sRDA to assess the reliability of sRDA. Both the analysis of simulated data, and the analysis of 485 512 methylation markers and 18,424 gene-expression values measured in a set of 55 patients with Marfan syndrome show that sRDA is able to deal with the usual high dimensionality of omics data.

Availability and implementation

http://uva.csala.me/rda.

Contact

a.csala@amc.uva.nl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +28472230,OMSim: a simulator for optical map data.,"

Motivation

The Bionano Genomics platform allows for the optical detection of short sequence patterns in very long DNA molecules (up to 2.5 Mbp). Molecules with overlapping patterns can be assembled to generate a consensus optical map of the entire genome. In turn, these optical maps can be used to validate or improve de novo genome assembly projects or to detect large-scale structural variation in genomes. Simulated optical map data can assist in the development and benchmarking of tools that operate on those data, such as alignment and assembly software. Additionally, it can help to optimize the experimental setup for a genome of interest. Such a simulator is currently not available.

Results

We have developed a simulator, OMSim, that produces synthetic optical map data that mimics real Bionano Genomics data. These simulated data have been tested for compatibility with the Bionano Genomics Irys software system and the Irys-scaffolding scripts. OMSim is capable of handling very large genomes (over 30 Gbp) with high throughput and low memory requirements.

Availability and implementation

The Python simulation tool and a cross-platform graphical user interface are available as open source software under the GNU GPL v2 license ( http://www.bioinformatics.intec.ugent.be/omsim ).

Contact

jan.fostier@ugent.be.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +35047676,"Protocol for a systematic review of guidelines for rigour in the design, conduct and analysis of biomedical experiments involving laboratory animals.","

Objective

Within the last years, there has been growing awareness of the negative repercussions of unstandardized planning, conduct and reporting of preclinical and biomedical research. Several initiatives have set the aim of increasing validity and reliability in reporting of studies and publications, and publishers have formed similar groups. Additionally, several groups of experts across the biomedical spectrum have published experience and opinion-based guidelines and guidance on potential standardized reporting. While all these guidelines cover reporting of experiments, an important step prior to this should be rigours planning and conduction of studies. The aim of this systematic review is to identify and harmonize existing experimental design, conduct and analysis guidelines relating to internal validity and reproducibility of preclinical animal research. The review will also identify literature describing risks of bias pertaining to the design, conduct and analysis of preclinical biomedical research.

Search strategy

PubMed, Embase and Web of Science will be searched systematically to identify guidelines published in English language in peer-reviewed journals before January 2018 (box 1). All articles or systematic reviews in English language that describe or review guidelines on the internal validity and reproducibility of animal studies will be included. Google search for guidelines published on the websites of major funders and professional organisations can be found in (Box 2).

Screening and annotation

Unique references will be screened in two phases: screening for eligibility based on title and abstract, followed by screening for definitive inclusion based on full text. Screening will be performed in SyRF (http://syrf.org.uk). Each reference will be randomly presented to two independent reviewers. Disagreements between reviewers will be resolved by additional screening of the reference by a third, senior researcher.

Data management and reporting

All data, including extracted text and guidelines, will be stored in the SyRF platform. Elements of the included guidelines will be identified using a standardized extraction form. Reporting will follow the PRISMA guidelines as far as applicable.",2018-09-07 +26338694,ClinLabGeneticist: a tool for clinical management of genetic variants from whole exome sequencing in clinical genetic laboratories.,"Routine clinical application of whole exome sequencing remains challenging due to difficulties in variant interpretation, large dataset management, and workflow integration. We describe a tool named ClinLabGeneticist to implement a workflow in clinical laboratories for management of variant assessment in genetic testing and disease diagnosis. We established an extensive variant annotation data source for the identification of pathogenic variants. A dashboard was deployed to aid a multi-step, hierarchical review process leading to final clinical decisions on genetic variant assessment. In addition, a central database was built to archive all of the genetic testing data, notes, and comments throughout the review process, variant validation data by Sanger sequencing as well as the final clinical reports for future reference. The entire workflow including data entry, distribution of work assignments, variant evaluation and review, selection of variants for validation, report generation, and communications between various personnel is integrated into a single data management platform. Three case studies are presented to illustrate the utility of ClinLabGeneticist. ClinLabGeneticist is freely available to academia at http://rongchenlab.org/software/clinlabgeneticist .",2015-07-29 +30647783,"Weblog ""How Do I Breathe?""-Design and View Statistics.","Introduction  Breathing is one of the vital functions of the body and is essential for the maintenance of life. Preventive measures for respiratory disorders can be used by the population, as well as early self-diagnosis and immediate search for treatment based on knowledge of this subject. Objective  this study developed a weblog on the breathing function targeting young people and analyzed the statistical data of views until the present moment. Methods  The weblog was developed, and the stages of analysis, design, development and implementation were followed. All texts were evaluated by the Flesch Reading Index to verify the language, and the statistical data were analyzed by the number of views, countries with the highest number of views, search terms used, most viewed pages and number of comments on the blog. Results  Issues related to the breathing function and those most cited in the literature were selected. The blog was structured using pages with content and curiosities, with texts with minimum readability of 50%, and was made available on the internet by means of the Wordpress tool. The statistics showed an increase in the number of visits after August 2015; the countries with the highest number of views were Brazil, United States and Portugal; the search terms used were unknown or related to mouth breathing; the most viewed pages related to mouth breathing and the comments addressed questions on mouth breathing, reports and compliments. Conclusion  The blog ""How do I breathe?,"" aimed at young people and containing information about the breathing function, was developed and is available on the internet at the address: https://comoeurespiro.wordpress.com .",2018-07-05 +28165677,Structure of the sliding clamp from the fungal pathogen Aspergillus fumigatus (AfumPCNA) and interactions with Human p21.,"The fungal pathogen Aspergillus fumigatus has been implicated in a drastic increase in life-threatening infections over the past decade. However, compared to other microbial pathogens, little is known about the essential molecular processes of this organism. One such fundamental process is DNA replication. The protein responsible for ensuring processive DNA replication is PCNA (proliferating cell nuclear antigen, also known as the sliding clamp), which clamps the replicative polymerase to DNA. Here we present the first crystal structure of a sliding clamp from a pathogenic fungus (A. fumigatus), at 2.6Å. Surprisingly, the structure bears more similarity to the human sliding clamp than other available fungal sliding clamps. Reflecting this, fluorescence polarization experiments demonstrated that AfumPCNA interacts with the PCNA-interacting protein (PIP-box) motif of human p21 with an affinity (Kd ) of 3.1 μm. Molecular dynamics simulations were carried out to better understand how AfumPCNA interacts with human p21. These simulations revealed that the PIP-box bound to AfuPCNA forms a secondary structure similar to that observed in the human complex, with a central 310 helix contacting the hydrophobic surface pocket of AfumPCNA as well as a β-strand that forms an antiparallel sheet with the AfumPCNA surface. Differences in the 310 helix interaction with PCNA, attributed to residue Thr131 of AfumPCNA, and a less stable β-strand formation, attributed to residues Gln123 and His125 of AfumPCNA, are likely causes of the over 10-fold lower affinity of the p21 PIP-box for AfumPCNA as compared to hPCNA.

Database

The atomic coordinates and structure factors for the Aspergillus fumigatus sliding clamp can be found in the RCSB Protein Data Bank (http://www.rcsb.org) under the accession code 5TUP.",2017-02-27 +30364805,Draft genome sequence data of a tigecycline-resistant Enterobacter cloacae ST93 clinical strain isolated from bloodstream infection.,"Here we report data on the draft genome sequence of a tigecycline-resistant Enterobacter cloacae ST93 clinical isolate TREC1 producing KPC-2 carbapenemase from China. The draft genome sequence of E. cloacae TREC1 consisted of 74 contigs that comprised 5,322,835 bp, and the overall GC content of this strain amounted to 54.63%. In total, 57 tRNA genes, 5 rRNA operons and 5108 protein-coding sequences were identified in the genome. TREC1 belongs to sequence type ST93. Nineteen antimicrobial resistance genes were confirmed. Antimicrobial susceptibility testing revealed that besides colistin this isolate is resistant to all antibiotics including tigecycline. This Whole Genome Shotgun project has been deposited at DDBJ/EMBL/GenBank under the accession number PJZE00000000. (http://www.ncbi.nlm.nih.gov/nuccore/PJZE00000000).",2018-10-05 +30909729,"Stress, Resilience, and Cardiovascular Disease Risk Among Black Women.","BACKGROUND:Empirical data on the link between stress and cardiovascular disease (CVD) risk among black women is limited. We examined associations of stressful life events and social strain with incident CVD among black women and tested for effect modification by resilience. METHODS AND RESULTS:Our analysis included 10 785 black women enrolled in the Women's Health Initiative Observational Study and Clinical Trials cohort. Participants were followed for CVD for up to 23 years (mean, 12.5). Multivariable Cox regression was used to estimate hazard ratios and 95% CIs for associations between stress-related exposures and incident CVD. We included interactions between follow-up time (age) and stressful life events because of evidence of nonproportional hazards. Effect modification by resilience was examined in the sub-cohort of 2765 women with resilience and stressful life events measures. Higher stressful life events were associated with incident CVD at ages 55 (hazard ratio for highest versus lowest quartile=1.80; 95% CI, 1.27-2.54) and 65 (hazard ratio for highest versus lowest quartile=1.40; 95% CI, 1.16-1.68), but not at older ages. Adjustment for CVD risk factors attenuated these associations. Similar associations were observed for social strain. In the sub-cohort of women with updated stressful life events and resilience measures, higher stressful life events were associated with incident CVD in multivariable-adjusted models (hazard ratio=1.61; 95% CI, 1.04-2.51). Resilience did not modify this association nor was resilience independently associated with incident CVD. CONCLUSIONS:In this cohort of older black women, recent reports of stressful life events were related to incident CVD. Resilience was unrelated to incident CVD. CLINICAL TRIALS REGISTRATION:URL: https://www.clinicaltrials.gov . Unique identifier: NCT00000611.",2019-04-01 +29555702,Genotypic and Phenotypic Characterization of the O-Linked Protein Glycosylation System Reveals High Glycan Diversity in Paired Meningococcal Carriage Isolates. ,"Species within the genus Neisseria display significant glycan diversity associated with the O-linked protein glycosylation (pgl) systems due to phase variation and polymorphic genes and gene content. The aim of this study was to examine in detail the pgl genotype and glycosylation phenotype in meningococcal isolates and the changes occurring during short-term asymptomatic carriage. Paired meningococcal isolates derived from 50 asymptomatic meningococcal carriers, taken about 2 months apart, were analyzed with whole-genome sequencing. The O-linked protein glycosylation genes were characterized in detail using the Genome Comparator tool at the https://pubmlst.org/ database. Immunoblotting with glycan-specific antibodies (Abs) was used to investigate the protein glycosylation phenotype. All major pgl locus polymorphisms identified in Neisseria meningitidis to date were present in our isolate collection, with the variable presence of pglG and pglH, both in combination with either pglB or pglB2 We identified significant changes and diversity in the pgl genotype and/or glycan phenotype in 96% of the paired isolates. There was also a high degree of glycan microheterogeneity, in which different variants of glycan structures were found at a given glycoprotein. The main mechanism responsible for the observed differences was phase-variable expression of the involved glycosyltransferases and the O-acetyltransferase. To our knowledge, this is the first characterization of the pgl genotype and glycosylation phenotype in a larger strain collection. This report thus provides important insight into glycan diversity in N. meningitidis and into the phase variability changes that influence the expressed glycoform repertoire during meningococcal carriage.IMPORTANCE Bacterial meningitis is a serious global health problem, and one of the major causative organisms is Neisseria meningitidis, which is also a common commensal in the upper respiratory tract of healthy humans. In bacteria, numerous loci involved in biosynthesis of surface-exposed antigenic structures that are involved in the interaction between bacteria and host are frequently subjected to homologous recombination and phase variation. These mechanisms are well described in Neisseria, and phase variation provides the ability to change these structures reversibly in response to the environment. Protein glycosylation systems are becoming widely identified in bacteria, and yet little is known about the mechanisms and evolutionary forces influencing glycan composition during carriage and disease.",2018-07-25 +30933310,Release parameters during progressive degeneration of dopamine neurons in a mouse model reveal earlier impairment of spontaneous than forced behaviors.,"To determine the role of reduced dopaminergic transmission for declines of forced versus spontaneous behavior, we used a model of Parkinson's disease with progressive degeneration of dopamine (DA) neurons, the MitoPark mouse. Mice were subjected to rotarod tests of motor coordination, and open field and cylinder tests for spontaneous locomotor activity and postural axial support. To measure DA release in dorsal striatum and the shell of Nucleus Accumbens (NAc), we used ex vivo fast-scan cyclic voltammetry in 6- to 24-week-old mice. To determine decline of DA transporter function, we used 18FE-PE2I positron emission tomography. We show here that fast-scan cyclic voltammetry is a sensitive tool to detect evoked DA release dysfunction in MitoPark mice and that electrically evoked DA release is affected earlier in nigrostriatal than mesolimbic DA systems. DA reuptake was also affected more slowly in NAc shell. Positron emission tomography data showed DA uptake to be barely above detection levels in 16- and 20-week-old MitoPark mice. Rotarod performance was not impaired until mice were 16 weeks old, when evoked DA release in striatum had decreased to ≈ 40% of wild-type levels. In contrast, impairment of open field locomotion and rearing began at 10 weeks, in parallel with the initial modest decline of evoked DA release. We conclude that forced behaviors, such as motivation not to fall, can be partially maintained even when DA release is severely compromised, whereas spontaneous behaviors are much more sensitive to impaired DA release, and that presumed secondary non-dopaminergic system alterations do not markedly counteract or aggravate effects of severe impairment of DA release. OPEN SCIENCE BADGES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/.",2019-05-09 +31187346,Acuity VEP: improved with machine learning.,"

Purpose

Acuity-VEP approaches basically all use the information obtained across a number of check sizes (or spatial frequencies) to derive a measure of acuity. Amplitude is always used, sometimes combined with phase or a noise measure. In our approach, we employ steady-state brief-onset low-contrast checkerboard stimulation and obtain amplitude and significance for six different check sizes, yielding 12 numbers. The rule-based ""heuristic algorithm"" (Bach et al. in Br J Ophthalmol 92:396-403, 2008. https://doi.org/10.1136/bjo.2007.130245 ) is successful in over 95% with a limit of agreement (LoA) of ± 0.3LogMAR between behavioral and objective acuity for 109 cases. We here aimed to test whether machine learning techniques with this relatively small dataset could achieve a similar LoA.

Methods

Given recent advances in machine learning (ML), we applied a wide class of ML algorithms to this dataset. This was done within the ""caret"" framework of R using altogether 89 methods, of which rule-based and multiple regression approaches performed best. For cross-validation, using a jackknife (leave-one-out) approach, we predicted each case based on an ML model having been trained on all remaining 108 cases.

Results

The ML approach predicted visual acuity well across many different types of ML algorithms. Using amplitude values only (discarding the p values) improved the outcome. Nearly half of the tested ML algorithms achieved an LoA better than the heuristic algorithm; several ""Random Forest""- or ""multiple regression""-type algorithms achieved an LoA of below ± 0.3. In the cases where the heuristic approach failed, acuity was predicted successfully. We then applied the ML model trained with the Bach et al. [1] dataset to a new dataset from 2018 (78 cases) and found both for the heuristic algorithm and for the ML approach an LoA of ± 0.259, a nearly one-line improvement.

Conclusions

The ML approach appears to be a useful alternative to rule-based analysis of acuity-VEP data. The achieved accuracy is comparable or better (in no case the ML-based acuity differed more than ± 0.29 LogMAR from behavioral acuity), and testability is higher, nearly 100%. Possible pitfalls are examined.",2019-06-11 +29950328,Lowering the Barriers to Routine Whole-Genome Sequencing of Bacteria in the Clinical Microbiology Laboratory. ,"Whole-genome sequencing of bacterial isolates is increasingly being used to predict antibacterial susceptibility and resistance. Mason and coauthors describe the phenotypic susceptibility interpretations of more than 1,300 Staphylococcus aureus isolates tested against a dozen antistaphylococcal agents, and they compared these findings to susceptibility predictions made by analyzing whole-genome sequence data (J Clin Microbiol 56:e01815-17, 2018, https://doi.org/10.1128/JCM.01815-17). The genotype-phenotype susceptibility interpretations correlated in 96.3% (2,720/2,825) of resistant findings and 98.8% (11,504/11,639) of susceptible findings. This work by Mason and colleagues is helping to lower the barriers to using whole-genome sequencing of S. aureus in clinical microbiology practice.",2018-08-27 +29115832,Improved Prediction of Bovine Leucocyte Antigens (BoLA) Presented Ligands by Use of Mass-Spectrometry-Determined Ligand and in Vitro Binding Data.,"Peptide binding to MHC class I molecules is the single most selective step in antigen presentation and the strongest single correlate to peptide cellular immunogenicity. The cost of experimentally characterizing the rules of peptide presentation for a given MHC-I molecule is extensive, and predictors of peptide-MHC interactions constitute an attractive alternative. Recently, an increasing amount of MHC presented peptides identified by mass spectrometry (MS ligands) has been published. Handling and interpretation of MS ligand data is, in general, challenging due to the polyspecificity nature of the data. We here outline a general pipeline for dealing with this challenge and accurately annotate ligands to the relevant MHC-I molecule they were eluted from by use of GibbsClustering and binding motif information inferred from in silico models. We illustrate the approach here in the context of MHC-I molecules (BoLA) of cattle. Next, we demonstrate how such annotated BoLA MS ligand data can readily be integrated with in vitro binding affinity data in a prediction model with very high and unprecedented performance for identification of BoLA-I restricted T-cell epitopes. The prediction model is freely available at http://www.cbs.dtu.dk/services/NetMHCpan/NetBoLApan . The approach has here been applied to the BoLA-I system, but the pipeline is readily applicable to MHC systems in other species.",2017-11-14 +30371784,Fluid accumulation in the staged Fontan procedure: the impact of colloid osmotic pressures.,"

Objectives

Despite Fontan surgery showing improved results, fluid accumulation and oedema formation with pleural effusion are major challenges. Transcapillary fluid balance is dependent on hydrostatic and colloid osmotic pressure (COP) gradients; however, the COP values are not known for Fontan patients. The aim of this study was to evaluate the COP of plasma (COPp) and interstitial fluid (COPi) in children undergoing bidirectional cavopulmonary connection and total cavopulmonary connection.

Methods

This study was designed as a prospective, observational study. Thirty-nine children (age 3 months-4.9 years) undergoing either bidirectional cavopulmonary connection or total cavopulmonary connection procedures were included. Blood samples and interstitial fluid were obtained prior to, during and after the preoperative cardiac catheterization and surgery with the use of cardiopulmonary bypass (CPB). Interstitial fluid was harvested using the wick method when the patient was under general anaesthesia. Plasma and interstitial fluid were measured by a colloid osmometer. Baseline values were compared with data from healthy controls.

Results

Baseline COPp was 20.6 ± 2.8 and 22.0 ± 3.2 mmHg and COPi was 11.3 ± 2.6 and 12.5 ± 3.5 mmHg in the bidirectional cavopulmonary connection group and the total cavopulmonary connection group, respectively. These values were significantly lower than in healthy controls. The COPp was slightly reduced throughout both procedures and normalized after surgery. The COPi increased slightly during the use of CPB and significantly decreased after surgery, resulting in an increased COP gradient and was correlated to pleural effusion.

Conclusions

Fluid accumulation seen after Fontan surgery is associated with changes in COPs, determinants for fluid filtration and lymphatic flow.

Clinicaltrials.gov identifier

NCT 02306057: https://clinicaltrials.gov/ct2/results?cond=&term=NCT+02306057.",2019-04-01 +30044232,Associations between Living Near Water and Risk of Mortality among Urban Canadians.,"

Background

Increasing evidence suggests that residential exposures to natural environments, such as green spaces, are associated with many health benefits. Only a single study has examined the potential link between living near water and mortality.

Objective

We sought to examine whether residential proximity to large, natural water features (e.g., lakes, rivers, coasts, ""blue space"") was associated with cause-specific mortality.

Methods

Our study is based on a population-based cohort of nonimmigrant adults living in the 30 largest Canadian cities [i.e., the 2001 Canadian Census Health and Environment Cohort) (CanCHEC)]. Subjects were drawn from the mandatory 2001 Statistics Canada long-form census, who were linked to the Canadian mortality database and to annual income-tax filings, through 2011. We estimated associations between living within of blue space and deaths from several common causes of death. We adjusted models for many personal and contextual covariates, as well as for exposures to residential greenness and ambient air pollution.

Results

Our cohort included approximately 1.3 million subjects at baseline, 106,180 of whom died from nonaccidental causes during follow-up. We found significant, reduced risks of mortality in the range of 12-17% associated with living within of water in comparison with living farther away, among all causes of death examined, except with external/accidental causes. Protective effects were found to be higher among women and all older adults than among other subjects, and protective effects were found to be highest against deaths from stroke and respiratory-related causes.

Conclusions

Our findings suggest that living near blue spaces in urban areas has important benefits to health, but further work is needed to better understand the drivers of this association. https://doi.org/10.1289/EHP3397.",2018-07-24 +26140937,A Summary of the American Society of Echocardiography Foundation Value-Based Healthcare: Summit 2014: The Role of Cardiovascular Ultrasound in the New Paradigm.,"Value-Based Healthcare: Summit 2014 clearly achieved the three goals set forth at the beginning of this document. First, the live event informed and educated attendees through a discussion of the evolving value-based healthcare environment, including a collaborative effort to define the important role of cardiovascular ultrasound in that environment. Second, publication of these Summit proceedings in the Journal of the American Society of Echocardiography will inform a wider audience of the important insights gathered. Third, moving forward, the ASE will continue to build a ‘‘living resource’’ on its website, http://www.asecho.org, for clinicians, researchers, and administrators to use in advocating for the value of cardiovascular ultrasound in the new value-based healthcare environment. The ASE looks forward to incorporating many of the Summit recommendations as it works with its members, legislators, payers, hospital administrators, and researchers to demonstrate and increase the value of cardiovascular ultrasound. All Summit attendees shared in the infectious enthusiasm generated by this proactive approach to ensuring cardiovascular ultrasound’s place as ‘‘The Value Choice’’ in cardiac imaging.",2015-07-01 +24271399,The Structure-Function Linkage Database.,"The Structure-Function Linkage Database (SFLD, http://sfld.rbvi.ucsf.edu/) is a manually curated classification resource describing structure-function relationships for functionally diverse enzyme superfamilies. Members of such superfamilies are diverse in their overall reactions yet share a common ancestor and some conserved active site features associated with conserved functional attributes such as a partial reaction. Thus, despite their different functions, members of these superfamilies 'look alike', making them easy to misannotate. To address this complexity and enable rational transfer of functional features to unknowns only for those members for which we have sufficient functional information, we subdivide superfamily members into subgroups using sequence information, and lastly into families, sets of enzymes known to catalyze the same reaction using the same mechanistic strategy. Browsing and searching options in the SFLD provide access to all of these levels. The SFLD offers manually curated as well as automatically classified superfamily sets, both accompanied by search and download options for all hierarchical levels. Additional information includes multiple sequence alignments, tab-separated files of functional and other attributes, and sequence similarity networks. The latter provide a new and intuitively powerful way to visualize functional trends mapped to the context of sequence similarity.",2013-11-23 +29926116,Identification of novel high-impact recessively inherited type 2 diabetes risk variants in the Greenlandic population.,"

Aims/hypothesis

In a recent study using a standard additive genetic model, we identified a TBC1D4 loss-of-function variant with a large recessive impact on risk of type 2 diabetes in Greenlanders. The aim of the current study was to identify additional genetic variation underlying type 2 diabetes using a recessive genetic model, thereby increasing the power to detect variants with recessive effects.

Methods

We investigated three cohorts of Greenlanders (B99, n = 1401; IHIT, n = 3115; and BBH, n = 547), which were genotyped using Illumina MetaboChip. Of the 4674 genotyped individuals passing quality control, 4648 had phenotype data available, and type 2 diabetes association analyses were performed for 317 individuals with type 2 diabetes and 2631 participants with normal glucose tolerance. Statistical association analyses were performed using a linear mixed model.

Results

Using a recessive genetic model, we identified two novel loci associated with type 2 diabetes in Greenlanders, namely rs870992 in ITGA1 on chromosome 5 (OR 2.79, p = 1.8 × 10-8), and rs16993330 upstream of LARGE1 on chromosome 22 (OR 3.52, p = 1.3 × 10-7). The LARGE1 variant did not reach the conventional threshold for genome-wide significance (p < 5 × 10-8) but did withstand a study-wide Bonferroni-corrected significance threshold. Both variants were common in Greenlanders, with minor allele frequencies of 23% and 16%, respectively, and were estimated to have large recessive effects on risk of type 2 diabetes in Greenlanders, compared with additively inherited variants previously observed in European populations.

Conclusions/interpretation

We demonstrate the value of using a recessive genetic model in a historically small and isolated population to identify genetic risk variants. Our findings give new insights into the genetic architecture of type 2 diabetes, and further support the existence of high-effect genetic risk factors of potential clinical relevance, particularly in isolated populations.

Data availability

The Greenlandic MetaboChip-genotype data are available at European Genome-Phenome Archive (EGA; https://ega-archive.org/ ) under the accession EGAS00001002641.",2018-06-20 +32625990,Updated pest categorisation of Xylella fastidiosa.,"Following a request from the European Commission, the EFSA Plant Health Panel updated its pest categorisation of Xylella fastidiosa, previously delivered as part of the pest risk assessment published in 2015. X. fastidiosa is a Gram-negative bacterium, responsible for various plant diseases, including Pierce's disease, phony peach disease, citrus variegated chlorosis, olive quick decline syndrome, almond leaf scorch and various other leaf scorch diseases. The pathogen is endemic in the Americas and is present in Iran. In the EU, it is reported in southern Apulia in Italy, on the island of Corsica and in the Provence-Alpes-Côte d'Azur region in France, as well as in the Autonomous region of Madrid, the province of Alicante and the Balearic Islands in Spain. The reported status is 'transient, under eradication', except for the Balearic Islands, Corsica and southern of Apulia, where the status is 'present with a restricted distribution, under containment'. The pathogen is regulated under Council Directive 2000/29/EC and through emergency measures under http://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32015D0789 (as amended http://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32017D2352). The pest could enter the EU via host plants for planting and via infectious insect vectors. The host range includes hundreds of host species listed in the EFSA host plant database. In the EU, host plants are widely distributed and climatic conditions are favourable for its establishment. X. fastidiosa can spread by movement of host plants for planting and infectious insect vectors. X. fastidiosa is known to cause severe direct damage to major crops including almonds, citrus, grapevines, olives, stone fruits and also forest trees, landscape and ornamental trees, with high impacts. The criteria assessed by the Panel for consideration as a potential Union quarantine pest are met (the pathogen is present in the EU, but it has a restricted distribution and is under official control). X. fastidiosa is not considered as a regulated non-quarantine pest (RNQP) as the pathogen may spread also via insect vector transmission.",2018-07-23 +24767140,Risks and benefits of transfusion for children with severe anemia in Africa.,"Severe anemia contributes significantly to child mortality in sub-Saharan Africa. Blood transfusion is used in emergencies but carries risks. In BMC Medicine, Olupot-Olupot and colleagues report the findings of a phase II trial in children with severe anemia in Eastern Uganda. They provide important early safety and efficacy data supporting large volume whole blood transfusion (30 ml/kg) compared with the World Health Organization recommendation of 20 ml/kg. Large volume transfusions result in more rapid and frequent correction of severe anemia; they can be expected to reduce the risk of transfusions, and help manage the scarce resource of donor blood. However, severe anemia arises from varying combinations of acute, sub-acute and chronic etiologies. The Fluid Expansion As Supportive Therapy study reminds us that the risks and benefits of even simple interventions are complex, and that rapid normalization of physiology may not always be the best strategy. There is no substitute for high quality evidence and to this end we strongly support Olupot-Oluput and colleagues' call for a definitive trial of large volume transfusions in severe anemia. Please see related research article http://www.biomedcentral.com/1741-7015/12/67/abstract.",2014-04-25 +27982098,ARN: Analysis and Visualization System for Adipogenic Regulation Network Information.,"Adipogenesis is the process of cell differentiation through which preadipocytes become adipocytes. Lots of research is currently ongoing to identify genes, including their gene products and microRNAs, that correlate with fat cell development. However, information fragmentation hampers the identification of key regulatory genes and pathways. Here, we present a database of literature-curated adipogenesis-related regulatory interactions, designated the Adipogenesis Regulation Network (ARN, http://210.27.80.93/arn/), which currently contains 3101 nodes (genes and microRNAs), 1863 regulatory interactions, and 33,969 expression records associated with adipogenesis, based on 1619 papers. A sentence-based text-mining approach was employed for efficient manual curation of regulatory interactions from approximately 37,000 PubMed abstracts. Additionally, we further determined 13,103 possible node relationships by searching miRGate, BioGRID, PAZAR and TRRUST. ARN also has several useful features: i) regulatory map information; ii) tests to examine the impact of a query node on adipogenesis; iii) tests for the interactions and modes of a query node; iv) prediction of interactions of a query node; and v) analysis of experimental data or the construction of hypotheses related to adipogenesis. In summary, ARN can store, retrieve and analyze adipogenesis-related information as well as support ongoing adipogenesis research and contribute to the discovery of key regulatory genes and pathways.",2016-12-16 +29363431,PGAP-X: extension on pan-genome analysis pipeline.,"

Background

Since PGAP (pan-genome analysis pipeline) was published in 2012, it has been widely employed in bacterial genomics research. Though PGAP has integrated several modules for pan-genomics analysis, how to properly and effectively interpret and visualize the results data is still a challenge.

Result

To well present bacterial genomic characteristics, a novel cross-platform software was developed, named PGAP-X. Four kinds of data analysis modules were developed and integrated: whole genome sequences alignment, orthologous genes clustering, pan-genome profile analysis, and genetic variants analysis. The results from these analyses can be directly visualized in PGAP-X. The modules for data visualization in PGAP-X include: comparison of genome structure, gene distribution by conservation, pan-genome profile curve and variation on genic and genomic region. Meanwhile, result data produced by other programs with similar function can be imported to be further analyzed and visualized in PGAP-X. To test the performance of PGAP-X, we comprehensively analyzed 14 Streptococcus pneumonia strains and 14 Chlamydia trachomatis. The results show that, S. pneumonia strains have higher diversity on genome structure and gene contents than C. trachomatis strains. In addition, S. pneumonia strains might have suffered many evolutionary events, such genomic rearrangements, frequent horizontal gene transfer, homologous recombination, and other evolutionary process.

Conclusion

Briefly, PGAP-X directly presents the characteristics of bacterial genomic diversity with different visualization methods, which could help us to intuitively understand dynamics and evolution in bacterial genomes. The source code and the pre-complied executable programs are freely available from http://pgapx.ybzhao.com .",2018-01-19 +,"Analysis and visualization of H7 influenza using genomic, evolutionary and geographic information in a modular web service","We have reported previously on use of a web‐based application, Supramap (http://supramap.org) for the study of biogeographic, genotypic, and phenotypic evolution. Using Supramap we have developed maps of the spread of drug‐resistant influenza and host shifts in H1N1 and H5N1 influenza and coronaviruses such as SARS. Here we report on another zoonotic pathogen, H7 influenza, and provide an update on the implementation of Supramap as a web service. We find that the emergence of pathogenic strains of H7 is labile with many transitions from high to low pathogenicity, and from low to high pathogenicity. We use Supramap to put these events in a temporal and geospatial context. We identify several lineages of H7 influenza with biomarkers of high pathogenicity in regions that have not been reported in the scientific literature. The original implementation of Supramap was built with tightly coupled client and server software. Now we have decoupled the components to provide a modular web service for POY (http://poyws.org) that can be consumed by a data provider to create a novel application. To demonstrate the web service, we have produced an application, Geogenes (http://geogenes.org). Unlike in Supramap, in which the user is required to create and upload data files, in Geogenes the user works from a graphical interface to query an underlying dataset. Geogenes demonstrates how the web service can provide underlying processing for any sequence and metadata database. © The Willi Hennig Society 2012.",2012-10-01 +28968714,FATHMM-XF: accurate prediction of pathogenic point mutations via extended features.,"

Summary

We present FATHMM-XF, a method for predicting pathogenic point mutations in the human genome. Drawing on an extensive feature set, FATHMM-XF outperforms competitors on benchmark tests, particularly in non-coding regions where the majority of pathogenic mutations are likely to be found.

Availability and implementation

The FATHMM-XF web server is available at http://fathmm.biocompute.org.uk/fathmm-xf/, and as tracks on the Genome Tolerance Browser: http://gtb.biocompute.org.uk. Predictions are provided for human genome version GRCh37/hg19. The data used for this project can be downloaded from: http://fathmm.biocompute.org.uk/fathmm-xf/.

Contact

mark.rogers@bristol.ac.uk or c.campbell@bristol.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-02-01 +24265221,ArchDB 2014: structural classification of loops in proteins.,"The function of a protein is determined by its three-dimensional structure, which is formed by regular (i.e. β-strands and α-helices) and non-periodic structural units such as loops. Compared to regular structural elements, non-periodic, non-repetitive conformational units enclose a much higher degree of variability--raising difficulties in the identification of regularities, and yet represent an important part of the structure of a protein. Indeed, loops often play a pivotal role in the function of a protein and different aspects of protein folding and dynamics. Therefore, the structural classification of protein loops is an important subject with clear applications in homology modelling, protein structure prediction, protein design (e.g. enzyme design and catalytic loops) and function prediction. ArchDB, the database presented here (freely available at http://sbi.imim.es/archdb), represents such a resource and has been an important asset for the scientific community throughout the years. In this article, we present a completely reworked and updated version of ArchDB. The new version of ArchDB features a novel, fast and user-friendly web-based interface, and a novel graph-based, computationally efficient, clustering algorithm. The current version of ArchDB classifies 149,134 loops in 5739 classes and 9608 subclasses.",2013-11-21 +29922297,seq-ImmuCC: Cell-Centric View of Tissue Transcriptome Measuring Cellular Compositions of Immune Microenvironment From Mouse RNA-Seq Data.,"The RNA sequencing approach has been broadly used to provide gene-, pathway-, and network-centric analyses for various cell and tissue samples. However, thus far, rich cellular information carried in tissue samples has not been thoroughly characterized from RNA-Seq data. Therefore, it would expand our horizons to better understand the biological processes of the body by incorporating a cell-centric view of tissue transcriptome. Here, a computational model named seq-ImmuCC was developed to infer the relative proportions of 10 major immune cells in mouse tissues from RNA-Seq data. The performance of seq-ImmuCC was evaluated among multiple computational algorithms, transcriptional platforms, and simulated and experimental datasets. The test results showed its stable performance and superb consistency with experimental observations under different conditions. With seq-ImmuCC, we generated the comprehensive landscape of immune cell compositions in 27 normal mouse tissues and extracted the distinct signatures of immune cell proportion among various tissue types. Furthermore, we quantitatively characterized and compared 18 different types of mouse tumor tissues of distinct cell origins with their immune cell compositions, which provided a comprehensive and informative measurement for the immune microenvironment inside tumor tissues. The online server of seq-ImmuCC are freely available at http://wap-lab.org:3200/immune/.",2018-06-05 +27506385,Predicting gene targets from integrative analyses of summary data from GWAS and eQTL studies for 28 human complex traits.,"Genome-wide association studies (GWAS) have identified hundreds of genetic variants associated with complex traits and diseases. However, elucidating the causal genes underlying GWAS hits remains challenging. We applied the summary data-based Mendelian randomization (SMR) method to 28 GWAS summary datasets to identify genes whose expression levels were associated with traits and diseases due to pleiotropy or causality (the expression level of a gene and the trait are affected by the same causal variant at a locus). We identified 71 genes, of which 17 are novel associations (no GWAS hit within 1 Mb distance of the genes). We integrated all the results in an online database ( http://www.cnsgenomics/shiny/SMRdb/ ), providing important resources to prioritize genes for further follow-up, for example in functional studies.",2016-08-09 +30444294,"Ultrafast 3D Bloch-Siegert B 1 + -mapping using variational modeling.","

Purpose

Highly accelerated B 1 + -mapping based on the Bloch-Siegert shift to allow 3D acquisitions even within a brief period of a single breath-hold.

Theory and methods

The B 1 + dependent Bloch-Siegert phase shift is measured within a highly subsampled 3D-volume and reconstructed using a two-step variational approach, exploiting the different spatial distribution of morphology and B 1 + -field. By appropriate variable substitution the basic non-convex optimization problem is transformed in a sequential solution of two convex optimization problems with a total generalized variation (TGV) regularization for the morphology part and a smoothness constraint for the B 1 + -field. The method is evaluated on 3D in vivo data with retro- and prospective subsampling. The reconstructed B 1 + -maps are compared to a zero-padded low resolution reconstruction and a fully sampled reference.

Results

The reconstructed B 1 + -field maps are in high accordance to the reference for all measurements with a mean error below 1% and a maximum of about 4% for acceleration factors up to 100. The minimal error for different sampling patterns was achieved by sampling a dense region in k-space center with acquisition times of around 10-12 s for 3D-acquistions.

Conclusions

The proposed variational approach enables highly accelerated 3D acquisitions of Bloch-Siegert data and thus full liver coverage in a single breath hold.",2018-10-12 +28011774,"Fast motif matching revisited: high-order PWMs, SNPs and indels.","

Motivation

While the position weight matrix (PWM) is the most popular model for sequence motifs, there is growing evidence of the usefulness of more advanced models such as first-order Markov representations, and such models are also becoming available in well-known motif databases. There has been lots of research of how to learn these models from training data but the problem of predicting putative sites of the learned motifs by matching the model against new sequences has been given less attention. Moreover, motif site analysis is often concerned about how different variants in the sequence affect the sites. So far, though, the corresponding efficient software tools for motif matching have been lacking.

Results

We develop fast motif matching algorithms for the aforementioned tasks. First, we formalize a framework based on high-order position weight matrices for generic representation of motif models with dinucleotide or general q -mer dependencies, and adapt fast PWM matching algorithms to the high-order PWM framework. Second, we show how to incorporate different types of sequence variants , such as SNPs and indels, and their combined effects into efficient PWM matching workflows. Benchmark results show that our algorithms perform well in practice on genome-sized sequence sets and are for multiple motif search much faster than the basic sliding window algorithm.

Availability and implementation

Implementations are available as a part of the MOODS software package under the GNU General Public License v3.0 and the Biopython license ( http://www.cs.helsinki.fi/group/pssmfind ).

Contact

janne.h.korhonen@gmail.com.",2017-02-01 +28832679,A comprehensive simulation study on classification of RNA-Seq data.,"RNA sequencing (RNA-Seq) is a powerful technique for the gene-expression profiling of organisms that uses the capabilities of next-generation sequencing technologies. Developing gene-expression-based classification algorithms is an emerging powerful method for diagnosis, disease classification and monitoring at molecular level, as well as providing potential markers of diseases. Most of the statistical methods proposed for the classification of gene-expression data are either based on a continuous scale (eg. microarray data) or require a normal distribution assumption. Hence, these methods cannot be directly applied to RNA-Seq data since they violate both data structure and distributional assumptions. However, it is possible to apply these algorithms with appropriate modifications to RNA-Seq data. One way is to develop count-based classifiers, such as Poisson linear discriminant analysis and negative binomial linear discriminant analysis. Another way is to bring the data closer to microarrays and apply microarray-based classifiers. In this study, we compared several classifiers including PLDA with and without power transformation, NBLDA, single SVM, bagging SVM (bagSVM), classification and regression trees (CART), and random forests (RF). We also examined the effect of several parameters such as overdispersion, sample size, number of genes, number of classes, differential-expression rate, and the transformation method on model performances. A comprehensive simulation study is conducted and the results are compared with the results of two miRNA and two mRNA experimental datasets. The results revealed that increasing the sample size, differential-expression rate and decreasing the dispersion parameter and number of groups lead to an increase in classification accuracy. Similar with differential-expression studies, the classification of RNA-Seq data requires careful attention when handling data overdispersion. We conclude that, as a count-based classifier, the power transformed PLDA and, as a microarray-based classifier, vst or rlog transformed RF and SVM classifiers may be a good choice for classification. An R/BIOCONDUCTOR package, MLSeq, is freely available at https://www.bioconductor.org/packages/release/bioc/html/MLSeq.html.",2017-08-23 +30194079,Expression of the Vesicular Monoamine Transporter Gene Solute Carrier Family 18 Member 1 (SLC18A1) in Lung Cancer.,"

Background

One aspect of smoking and lung cancer that has not been closely examined, is that regarding genes that may predispose to tobacco dependence. Smoking and mental illness are tightly linked, apparently the result of smokers using cigarettes to self-medicate for mental problems. The gene for solute carrier family 18 member A1 (vesicular monoamine transporter; SLC18A1) is of particular interest in this regard because of its association with schizophrenia, autism and bipolar illness as well as with cancer. In the current study, the relationship of SLC18A1 expression with smoking and lung cancer was analyzed.

Materials and methods

The association between smoking, SLC18A1 expression and overall survival in the lung cancer dataset in The Cancer Genome Atlas was evaluated using the Genomic Data Commons Data Portal (https://portal.gdc.cancer.gov), as well as CbioPortal for Cancer Genomics (http://www.cbioportal.org) and the University of California Santa Cruz Xena browser (https://xenabrowser.net).

Results

Increased expression of SLC18A1 was found to be associated with a significantly increased survival in patients with adenocarcinoma (p=0.0058), but not those with squamous carcinoma (p=0.96). Lifelong never-smokers had the highest SLC18A1 expression. In the Pan Cancer Atlas, increased expression of SLC18A1 places such a tumor in group C5, among immunologically-quiet tumors.

Conclusion

Most never-smokers with lung cancer do not respond to immune checkpoint inhibitors (ICIs). But for unknown reasons, a small proportion do show clinical benefit from the ICI pembrolizumab. Because of the good response of this group, it may be worthwhile assessing their SLC18A1 expression pre-treatment as a marker for potential clinical benefit. If SLC18A1 expression is low, a never-smoker may respond well to ICIs. High levels of expression would indicate a C5 tumor less likely to respond to ICIs. SLC18A1 might complement other biomarkers currently under study in relation to programmed cell death protein 1/programmed cell death protein ligand 1 inhibition.",2018-09-01 +30026590,Adult energy requirements predicted from doubly labeled water.,"

Background

Estimating energy requirements forms an integral part of developing diet and activity interventions. Current estimates often rely on a product of physical activity level (PAL) and a resting metabolic rate (RMR) prediction. PAL estimates, however, typically depend on subjective self-reported activity or a clinician's best guess. Energy-requirement models that do not depend on an input of PAL may provide an attractive alternative.

Methods

Total daily energy expenditure (TEE) measured by doubly labeled water (DLW) and a metabolic chamber from 119 subjects obtained from a database of pre-intervention measurements measured at Pennington Biomedical Research Center were used to develop a metabolic ward and free-living models that predict energy requirements. Graded models, including different combinations of input variables consisting of age, height, weight, waist circumference, body composition, and the resting metabolic rate were developed. The newly developed models were validated and compared to three independent databases.

Results

Sixty-four different linear and nonlinear regression models were developed. The adjusted R2 for models predicting free-living energy requirements ranged from 0.65 with covariates of age, height, and weight to 0.74 in models that included body composition and RMR. Independent validation R2 between actual and predicted TEE varied greatly across studies and between genders with higher coefficients of determination, lower bias, slopes closer to 1, and intercepts closer to zero, associated with inclusion of body composition and RMR covariates. The models were programmed into a user-friendly web-based app available at: http://www.pbrc.edu/research-and-faculty/calculators/energy-requirements/ (Video Demo for Reviewers at: https://www.youtube.com/watch?v=5UKjJeQdODQ ) CONCLUSIONS: Energy-requirement equations that do not require knowledge of activity levels and include all available input variables can provide more accurate baseline estimates. The models are clinically accessible through the web-based application.",2018-07-19 +31053086,"MDA19, a novel CB2 agonist, inhibits hepatocellular carcinoma partly through inactivation of AKT signaling pathway.","

Background

CB2 (cannabinoid receptor 2) agonists have been shown to exert anti-tumor activities in different tumor types. However, there is no study exploring the role of MDA19 (a novel CB2 agonist) in tumors. In this study we aimed to investigate the effects of MDA19 treatment on HCC cell lines, Hep3B and HepG2 and determine the relevant mechanisms.

Results

Cell proliferation analysis, including CCK8 and colony formation assays, indicated that MDA19 treatment inhibited HCC cell proliferation in a dose- and time-dependent manner. Flow cytometry suggested that MDA19 induced cell apoptosis and activation of mitochondrial apoptosis pathway. Transwell assay indicated that HCC cell migration and invasion were significantly inhibited by MDA19 treatment. Mechanism investigation suggested that MDA19 induced inactivation of AKT signaling pathway in HCC cells. In addition, we investigated the function of CB2receptor in HCC and its role in the anti-tumor activity of MDA19. By searching on Kaplan-Meier plotter ( http://kmplot.com/analysis/ ), we found that HCC patients with high CB2 expression had a better survival and CB2 expression was significantly associated with gender, clinical stages and race of HCC patients (P < 0.05). CB2 inhibited the progression of HCC cells and its knockdown could rescue the growth inhibition induced by MDA19 in HCC. Moreover, the inhibitory effect of MDA19 on AKT signaling pathway was also reversed by CB2 knockdown.

Conclusion

Our data suggest that MDA-19 exerts an anti-tumor activity at least partly through inactivation of AKT signaling pathway in HCC. CB2 functions as a tumor suppressor gene in HCC, and MDA19-induced growth inhibition of HCC cells depends on its binding to CB2 to activate it. MDA-19 treatment may be a promising strategy for HCC therapy.

Reviewer

This article was reviewed by Tito Cali, Mohamed Naguib and Bo Chen.",2019-05-03 +26155308,The Chemical Validation and Standardization Platform (CVSP): large-scale automated validation of chemical structure datasets.,"

Background

There are presently hundreds of online databases hosting millions of chemical compounds and associated data. As a result of the number of cheminformatics software tools that can be used to produce the data, subtle differences between the various cheminformatics platforms, as well as the naivety of the software users, there are a myriad of issues that can exist with chemical structure representations online. In order to help facilitate validation and standardization of chemical structure datasets from various sources we have delivered a freely available internet-based platform to the community for the processing of chemical compound datasets.

Results

The chemical validation and standardization platform (CVSP) both validates and standardizes chemical structure representations according to sets of systematic rules. The chemical validation algorithms detect issues with submitted molecular representations using pre-defined or user-defined dictionary-based molecular patterns that are chemically suspicious or potentially requiring manual review. Each identified issue is assigned one of three levels of severity - Information, Warning, and Error - in order to conveniently inform the user of the need to browse and review subsets of their data. The validation process includes validation of atoms and bonds (e.g., making aware of query atoms and bonds), valences, and stereo. The standard form of submission of collections of data, the SDF file, allows the user to map the data fields to predefined CVSP fields for the purpose of cross-validating associated SMILES and InChIs with the connection tables contained within the SDF file. This platform has been applied to the analysis of a large number of data sets prepared for deposition to our ChemSpider database and in preparation of data for the Open PHACTS project. In this work we review the results of the automated validation of the DrugBank dataset, a popular drug and drug target database utilized by the community, and ChEMBL 17 data set. CVSP web site is located at http://cvsp.chemspider.com/.

Conclusion

A platform for the validation and standardization of chemical structure representations of various formats has been developed and made available to the community to assist and encourage the processing of chemical structure files to produce more homogeneous compound representations for exchange and interchange between online databases. While the CVSP platform is designed with flexibility inherent to the rules that can be used for processing the data we have produced a recommended rule set based on our own experiences with the large data sets such as DrugBank, ChEMBL, and data sets from ChemSpider.",2015-06-19 +28606610,Data management and data enrichment for systems biology projects.,"Collecting, curating, interlinking, and sharing high quality data are central to de.NBI-SysBio, the systems biology data management service center within the de.NBI network (German Network for Bioinformatics Infrastructure). The work of the center is guided by the FAIR principles for scientific data management and stewardship. FAIR stands for the four foundational principles Findability, Accessibility, Interoperability, and Reusability which were established to enhance the ability of machines to automatically find, access, exchange and use data. Within this overview paper we describe three tools (SABIO-RK, Excemplify, SEEK) that exemplify the contribution of de.NBI-SysBio services to FAIR data, models, and experimental methods storage and exchange. The interconnectivity of the tools and the data workflow within systems biology projects will be explained. For many years we are the German partner in the FAIRDOM initiative (http://fair-dom.org) to establish a European data and model management service facility for systems biology.",2017-06-10 +26818131,siRNAmod: A database of experimentally validated chemically modified siRNAs.,"Small interfering RNA (siRNA) technology has vast potential for functional genomics and development of therapeutics. However, it faces many obstacles predominantly instability of siRNAs due to nuclease digestion and subsequently biologically short half-life. Chemical modifications in siRNAs provide means to overcome these shortcomings and improve their stability and potency. Despite enormous utility bioinformatics resource of these chemically modified siRNAs (cm-siRNAs) is lacking. Therefore, we have developed siRNAmod, a specialized databank for chemically modified siRNAs. Currently, our repository contains a total of 4894 chemically modified-siRNA sequences, comprising 128 unique chemical modifications on different positions with various permutations and combinations. It incorporates important information on siRNA sequence, chemical modification, their number and respective position, structure, simplified molecular input line entry system canonical (SMILES), efficacy of modified siRNA, target gene, cell line, experimental methods, reference etc. It is developed and hosted using Linux Apache MySQL PHP (LAMP) software bundle. Standard user-friendly browse, search facility and analysis tools are also integrated. It would assist in understanding the effect of chemical modifications and further development of stable and efficacious siRNAs for research as well as therapeutics. siRNAmod is freely available at: http://crdd.osdd.net/servers/sirnamod.",2016-01-28 +28808136,ePlant: Visualizing and Exploring Multiple Levels of Data for Hypothesis Generation in Plant Biology.,"A big challenge in current systems biology research arises when different types of data must be accessed from separate sources and visualized using separate tools. The high cognitive load required to navigate such a workflow is detrimental to hypothesis generation. Accordingly, there is a need for a robust research platform that incorporates all data and provides integrated search, analysis, and visualization features through a single portal. Here, we present ePlant (http://bar.utoronto.ca/eplant), a visual analytic tool for exploring multiple levels of Arabidopsis thaliana data through a zoomable user interface. ePlant connects to several publicly available web services to download genome, proteome, interactome, transcriptome, and 3D molecular structure data for one or more genes or gene products of interest. Data are displayed with a set of visualization tools that are presented using a conceptual hierarchy from big to small, and many of the tools combine information from more than one data type. We describe the development of ePlant in this article and present several examples illustrating its integrative features for hypothesis generation. We also describe the process of deploying ePlant as an ""app"" on Araport. Building on readily available web services, the code for ePlant is freely available for any other biological species research.",2017-08-14 +31555042,The thermodynamics of guest complexation to octa-acid and tetra-endo-methyl octa-acid: reference data for the sixth statistical assessment of modeling of proteins and ligands (SAMPL6).,"Although computer-aided drug design has greatly improved over time, its application in the pharmaceutical industry is still limited by the accuracy of association constant predictions. Towards improving this situation, the Statistical Assessment of the Modeling of Proteins and Ligands (SAMPL) is a series of community-wide blind challenges aimed to advance computational techniques as standard predictive tools in rational drug design (https://en.wikipedia.org/wiki/SAMPL_Challenge). As an empirical contribution to the sixth assessment (SAMPL6), we report here the association constant (Ka ) and thermodynamic parameters (∆G, ∆H, -T∆S) of eight guests (G0-G7) binding to two subtly different hosts (OA and TEMOA) using ITC. Both hosts contain a unique, well-defined binding pocket capable of storing guests with up to ten non-hydrogen atoms, whilst the selection of amphiphilic guests contain a range of saturated and unsaturated substituents from C6 to C10. The thermodynamic data from this study will allow the challenge participants of SAMPL6 to test the accuracy of their computational protocols for calculating host-guest affinities.",2018-11-18 +30020402,ACDtool: a web-server for the generic analysis of large data sets of counts.,"

Motivation

More than 20 years ago, our laboratory published an original statistical test [referred to as the Audic-Claverie (AC) test in the literature] to identify differentially expressed genes from the pairwise comparison of counts of 'expressed sequence tags' determined in different conditions. Despite its antiquity and the publications of more sophisticated packages, this original publication continued to gather more than 200 citations per year, indicating the persistent usefulness of the simple AC test for the community. This prompted us to propose a fully revamped version of the AC test with a user interface adapted to the diverse and much larger datasets produced by contemporary omics techniques.

Results

ACDtool is a freely accessible web-service proposing three types of analyses: (i) the pairwise comparison of individual counts, (ii) pairwise comparisons of arbitrary large lists of counts and (iii) the all-at-once pairwise comparisons of multiple datasets. Statistical computations are implemented using standard R functions and can accommodate all practical ranges of counts as generated by modern omic experiments. ACDtool is well suited for large datasets without replicates.

Availability and implementation

http://www.igs.cnrs-mrs.fr/acdtool/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +28174897,An efficient concordant integrative analysis of multiple large-scale two-sample expression data sets.,"

Motivation

We have proposed a mixture model based approach to the concordant integrative analysis of multiple large-scale two-sample expression datasets. Since the mixture model is based on the transformed differential expression test P-values (z-scores), it is generally applicable to the expression data generated by either microarray or RNA-seq platforms. The mixture model is simple with three normal distribution components for each dataset to represent down-regulation, up-regulation and no differential expression. However, when the number of datasets increases, the model parameter space increases exponentially due to the component combination from different datasets.

Results

In this study, motivated by the well-known generalized estimating equations (GEEs) for longitudinal data analysis, we focus on the concordant components and assume that the proportions of non-concordant components follow a special structure. We discuss the exchangeable, multiset coefficient and autoregressive structures for model reduction, and their related expectation-maximization (EM) algorithms. Then, the parameter space is linear with the number of datasets. In our previous study, we have applied the general mixture model to three microarray datasets for lung cancer studies. We show that more gene sets (or pathways) can be detected by the reduced mixture model with the exchangeable structure. Furthermore, we show that more genes can also be detected by the reduced model. The Cancer Genome Atlas (TCGA) data have been increasingly collected. The advantage of incorporating the concordance feature has also been clearly demonstrated based on TCGA RNA sequencing data for studying two closely related types of cancer.

Availability and implementation

Additional results are included in a supplemental file. Computer program R-functions are freely available at http://home.gwu.edu/∼ylai/research/Concordance.

Contact

ylai@gwu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +29850769,The size matters? A computational tool to design bivalent ligands.,"Motivation:Bivalent ligands are increasingly important such as for targeting G protein-coupled receptor (GPCR) dimers or proteolysis targeting chimeras (PROTACs). They contain two pharmacophoric units that simultaneously bind in their corresponding binding sites, connected with a spacer chain. Here, we report a molecular modelling tool that links the pharmacophore units via the shortest pathway along the receptors van der Waals surface and then scores the solutions providing prioritization for the design of new bivalent ligands. Results:Bivalent ligands of known dimers of GPCRs, PROTACs and a model bivalent antibody/antigen system were analysed. The tool could rapidly assess the preferred linker length for the different systems and recapitulated the best reported results. In the case of GPCR dimers the results suggest that in some cases these ligands might bind to a secondary binding site at the extracellular entrance (vestibule or allosteric site) instead of the orthosteric binding site. Availability and implementation:Freely accessible from the Molecular Operating Environment svl exchange server (https://svl.chemcomp.com/). Supplementary information:Supplementary data are available at Bioinformatics online.",2018-11-01 +30364969,EWAS Atlas: a curated knowledgebase of epigenome-wide association studies.,"Epigenome-Wide Association Study (EWAS) has become increasingly significant in identifying the associations between epigenetic variations and different biological traits. In this study, we develop EWAS Atlas (http://bigd.big.ac.cn/ewas), a curated knowledgebase of EWAS that provides a comprehensive collection of EWAS knowledge. Unlike extant data-oriented epigenetic resources, EWAS Atlas features manual curation of EWAS knowledge from extensive publications. In the current implementation, EWAS Atlas focuses on DNA methylation-one of the key epigenetic marks; it integrates a large number of 329 172 high-quality EWAS associations, involving 112 tissues/cell lines and covering 305 traits, 1830 cohorts and 390 ontology entities, which are completely based on manual curation from 649 studies reported in 401 publications. In addition, it is equipped with a powerful trait enrichment analysis tool, which is capable of profiling trait-trait and trait-epigenome relationships. Future developments include regular curation of recent EWAS publications, incorporation of more epigenetic marks and possible integration of EWAS with GWAS. Collectively, EWAS Atlas is dedicated to the curation, integration and standardization of EWAS knowledge and has the great potential to help researchers dissect molecular mechanisms of epigenetic modifications associated with biological traits.",2019-01-01 +27794553,"FuzDB: database of fuzzy complexes, a tool to develop stochastic structure-function relationships for protein complexes and higher-order assemblies.","FuzDB (http://protdyn-database.org) compiles experimentally observed fuzzy protein complexes, where intrinsic disorder (ID) is maintained upon interacting with a partner (protein, nucleic acid or small molecule) and directly impacts biological function. Entries in the database have both (i) structural evidence demonstrating the structural multiplicity or dynamic disorder of the ID region(s) in the partner bound form of the protein and (ii) in vitro or in vivo biological evidence that indicates the significance of the fuzzy region(s) in the formation, function or regulation of the assembly. Unlike the other intrinsically disordered or unfolded protein databases, FuzDB focuses on ID regions within a biological context, including higher-order assemblies and presents a detailed analysis of the structural and functional data. FuzDB also provides interpretation of experimental results to elucidate the molecular mechanisms by which fuzzy regions-classified on the basis of topology and mechanism-interfere with the structural ensembles and activity of protein assemblies. Regulatory sites generated by alternative splicing (AS) or post-translational modifications (PTMs) are also collected. By assembling all this information, FuzDB could be utilized to develop stochastic structure-function relationships for proteins and could contribute to the emergence of a new paradigm.",2016-10-28 +24498619,The CDC Hemophilia B mutation project mutation list: a new online resource.,"Hemophilia B (HB) is caused by mutations in the human gene F9. The mutation type plays a pivotal role in genetic counseling and prediction of inhibitor development. To help the HB community understand the molecular etiology of HB, we have developed a listing of all F9 mutations that are reported to cause HB based on the literature and existing databases. The Centers for Disease Control and Prevention (CDC) Hemophilia B Mutation Project (CHBMP) mutation list is compiled in an easily accessible format of Microsoft Excel and contains 1083 unique mutations that are reported to cause HB. Each mutation is identified using Human Genome Variation Society (HGVS) nomenclature standards. The mutation types and the predicted changes in amino acids, if applicable, are also provided. Related information including the location of mutation, severity of HB, the presence of inhibitor, and original publication reference are listed as well. Therefore, our mutation list provides an easily accessible resource for genetic counselors and HB researchers to predict inhibitors. The CHBMP mutation list is freely accessible at http://www.cdc.gov/hemophiliamutations.",2013-08-19 +30012671,A RIPK3-PGE2 Circuit Mediates Myeloid-Derived Suppressor Cell-Potentiated Colorectal Carcinogenesis.,"Receptor-interacting protein kinase 3 (RIPK3) is essential for mucosal repair in inflammatory bowel diseases (IBD) and colorectal cancer. However, its role in tumor immunity is unknown. Here, we report that decreased RIPK3 in colorectal cancer correlates with the accumulation of myeloid-derived suppressor cells (MDSC). Deficiency of RIPK3 boosted tumorigenesis via accumulation and immunosuppressive activity of MDSCs. Reduction of RIPK3 in MDSC and colorectal cancer cells elicited NFκB-transcribed COX-2, which catalyzed the synthesis of prostaglandin E2 (PGE2). PGE2 exacerbated the immunosuppressive activity of MDSCs and accelerated tumor growth. Moreover, PGE2 suppressed RIPK3 expression while enhancing expression of NFκB and COX-2 in MDSCs and colorectal cancer cells. Inhibition of COX-2 or PGE2 receptors reversed the immunosuppressive activity of MDSCs and dampened tumorigenesis. Patient databases also delineated the correlation of RIPK3 and COX-2 expression with colorectal cancer survival. Our findings demonstrate a novel signaling circuit by which RIPK3 and PGE2 regulate tumor immunity, providing potential ideas for immunotherapy against colorectal cancer.Significance: A novel signaling circuit involving RIPK3 and PGE2 enhances accumulation and immunosuppressive activity of MDSCs, implicating its potential as a therapeutic target in anticancer immunotherapy.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/19/5586/F1.large.jpg Cancer Res; 78(19); 5586-99. ©2018 AACR.",2018-07-16 +29860082,On testing for spatial correspondence between maps of human brain structure and function.,"A critical issue in many neuroimaging studies is the comparison between brain maps. Nonetheless, it remains unclear how one should test hypotheses focused on the overlap or spatial correspondence between two or more brain maps. This ""correspondence problem"" affects, for example, the interpretation of comparisons between task-based patterns of functional activation, resting-state networks or modules, and neuroanatomical landmarks. To date, this problem has been addressed with remarkable variability in terms of methodological approaches and statistical rigor. In this paper, we address the correspondence problem using a spatial permutation framework to generate null models of overlap by applying random rotations to spherical representations of the cortical surface, an approach for which we also provide a theoretical statistical foundation. We use this method to derive clusters of cognitive functions that are correlated in terms of their functional neuroatomical substrates. In addition, using publicly available data, we formally demonstrate the correspondence between maps of task-based functional activity, resting-state fMRI networks and gyral-based anatomical landmarks. We provide open-access code to implement the methods presented for two commonly-used tools for surface based cortical analysis (https://www.github.com/spin-test). This spatial permutation approach constitutes a useful advance over widely-used methods for the comparison of cortical maps, thereby opening new possibilities for the integration of diverse neuroimaging data.",2018-06-01 +28582503,HUGIn: Hi-C Unifying Genomic Interrogator.,"

Motivation

High throughput chromatin conformation capture (3C) technologies, such as Hi-C and ChIA-PET, have the potential to elucidate the functional roles of non-coding variants. However, most of published genome-wide unbiased chromatin organization studies have used cultured cell lines, limiting their generalizability.

Results

We developed a web browser, HUGIn, to visualize Hi-C data generated from 21 human primary tissues and cell lines. HUGIn enables assessment of chromatin contacts both constitutive across and specific to tissue(s) and/or cell line(s) at any genomic loci, including GWAS SNPs, eQTLs and cis-regulatory elements, facilitating the understanding of both GWAS and eQTL results and functional genomics data.

Availability and implementation

HUGIn is available at http://yunliweb.its.unc.edu/HUGIn.

Contact

yunli@med.unc.edu or hum@ccf.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +30409001,Isotopic and symmetry breaking effects on phosphine spectra under H → D substitutions from ab initio variational calculations.,"Variationally computed infrared spectra in the range [0-5000] cm-1 are reported for the deuterated PH2D and PHD2 molecules from accurate potential energy and dipole moment surfaces initially derived for the major isotopologue PH3( C 3 v ). Energy level and line intensity calculations were performed by using a normal-mode model combined with isotopic and symmetry transformations for the H → D substitutions. Theoretical spectra were computed at 296 K up to J max = 30 and will be made available through the TheoReTS information system (http://theorets.univ-reims.fr, http://theorets.tsu.ru). For the very first time, ab initio intensity predictions of PH2D/PHD2 are in good qualitative agreement with the literature. This work will be useful for spectral intensity analysis for which accurate spectral intensity data are still missing.",2018-11-01 +30124903,A case study evaluating the portability of an executable computable phenotype algorithm across multiple institutions and electronic health record environments.,"Electronic health record (EHR) algorithms for defining patient cohorts are commonly shared as free-text descriptions that require human intervention both to interpret and implement. We developed the Phenotype Execution and Modeling Architecture (PhEMA, http://projectphema.org) to author and execute standardized computable phenotype algorithms. With PhEMA, we converted an algorithm for benign prostatic hyperplasia, developed for the electronic Medical Records and Genomics network (eMERGE), into a standards-based computable format. Eight sites (7 within eMERGE) received the computable algorithm, and 6 successfully executed it against local data warehouses and/or i2b2 instances. Blinded random chart review of cases selected by the computable algorithm shows PPV ≥90%, and 3 out of 5 sites had >90% overlap of selected cases when comparing the computable algorithm to their original eMERGE implementation. This case study demonstrates potential use of PhEMA computable representations to automate phenotyping across different EHR systems, but also highlights some ongoing challenges.",2018-11-01 +25996789,Multiplexed peptide analysis using data-independent acquisition and Skyline.,"Here we describe the use of data-independent acquisition (DIA) on a Q-Exactive mass spectrometer for the detection and quantification of peptides in complex mixtures using the Skyline Targeted Proteomics Environment (freely available online at http://skyline.maccosslab.org). The systematic acquisition of mass spectrometry (MS) or tandem MS (MS/MS) spectra by DIA is in contrast to DDA, in which the acquired MS/MS spectra are only suitable for the identification of a stochastically sampled set of peptides. Similarly to selected reaction monitoring (SRM), peptides can be quantified from DIA data using targeted chromatogram extraction. Unlike SRM, data acquisition is not constrained to a predetermined set of target peptides. In this protocol, a spectral library is generated using data-dependent acquisition (DDA), and chromatograms are extracted from the DIA data for all peptides in the library. As in SRM, quantification using DIA data is based on the area under the curve of extracted MS/MS chromatograms. In addition, a quality control (QC) method suitable for DIA based on targeted MS/MS acquisition is detailed. Not including time spent acquiring data, and time for database searching, the procedure takes ∼1-2 h to complete. Typically, data acquisition requires roughly 1-4 h per sample, and a database search will take 0.5-2 h to complete.",2015-05-21 +26432833,iPPI-DB: an online database of modulators of protein-protein interactions.,"In order to boost the identification of low-molecular-weight drugs on protein-protein interactions (PPI), it is essential to properly collect and annotate experimental data about successful examples. This provides the scientific community with the necessary information to derive trends about privileged physicochemical properties and chemotypes that maximize the likelihood of promoting a given chemical probe to the most advanced stages of development. To this end we have developed iPPI-DB (freely accessible at http://www.ippidb.cdithem.fr), a database that contains the structure, some physicochemical characteristics, the pharmacological data and the profile of the PPI targets of several hundreds modulators of protein-protein interactions. iPPI-DB is accessible through a web application and can be queried according to two general approaches: using physicochemical/pharmacological criteria; or by chemical similarity to a user-defined structure input. In both cases the results are displayed as a sortable and exportable datasheet with links to external databases such as Uniprot, PubMed. Furthermore each compound in the table has a link to an individual ID card that contains its physicochemical and pharmacological profile derived from iPPI-DB data. This includes information about its binding data, ligand and lipophilic efficiencies, location in the PPI chemical space, and importantly similarity with known drugs, and links to external databases like PubChem, and ChEMBL.",2015-10-01 +31067133,"Impacts of Subchronic, High-Level Noise Exposure on Sleep and Metabolic Parameters: A Juvenile Rodent Model.","

Background

Noise is an environmental factor that has been associated with metabolic and sleep disorders. Sleep is a vital function, since it underpins physiologic processes and cognitive recovery and development. However, the effects of chronic noise exposure on the developing organism are still subject to debate.

Objective

The objective of the present study was to assess the effects of subchronic, high-level noise exposure on sleep, apnea, and homeostasis in juvenile rats.

Methods

Twenty-four 3-wk-old male Wistar rats were exposed to noise [[Formula: see text], [Formula: see text]] for 5 wk and 2 d during the 12-h rest period. Data on sleep stages, food and water intake, apnea, and body and organ weight were recorded.

Results

Five weeks of high-level noise exposure were associated with hyperphagia ([Formula: see text]), body weight gain ([Formula: see text]), a heavier thymus ([Formula: see text]), and heavier adrenal glands ([Formula: see text]). A sleep analysis highlighted microstructural differences in the active period: in particular, the mean daily amount of rapid eye movement (REM) sleep as a proportion of total sleep time (TST) was higher. The mean daily amount of non-REM (NREM) sleep was lower in the exposed group, meaning that the intergroup difference in the TST was not significant. During a 1-h, noise-free plethysmographic recording during the rest period, the mean total amount of active wakefulness (AW) was lower in the exposed group (by 9.1 min), whereas the mean duration of an episode of REM sleep was higher (by 1.8 min), and the TST was higher (by 10.7 min).

Discussion

Subchronic exposure of juvenile rats to high-intensity noise during the rest period was associated with some small but significant sleep disturbances, greater food and water intakes, greater body weight gain, and greater thymus and adrenal gland weights. The main effects of noise exposure on sleep were also observed in the 1-h plethysmography session after 5 wk of exposure. https://doi.org/10.1289/EHP4045.",2019-05-01 +30979354,Controlling Complexity of Cerebral Cortex Simulations-II: Streamlined Microcircuits.,"Recently, Markram et al. (2015) presented a model of the rat somatosensory microcircuit (Markram model). Their model is high in anatomical and physiological detail, and its simulation requires supercomputers. The lack of neuroinformatics and computing power is an obstacle for using a similar approach to build models of other cortical areas or larger cortical systems. Simplified neuron models offer an attractive alternative to high-fidelity Hodgkin-Huxley-type neuron models, but their validity in modeling cortical circuits is unclear. We simplified the Markram model to a network of exponential integrate-and-fire (EIF) neurons that runs on a single CPU core in reasonable time. We analyzed the electrophysiology and the morphology of the Markram model neurons with eFel and NeuroM tools, provided by the Blue Brain Project. We then constructed neurons with few compartments and averaged parameters from the reference model. We used the CxSystem simulation framework to explore the role of short-term plasticity and GABA B and NMDA synaptic conductances in replicating oscillatory phenomena in the Markram model. We show that having a slow inhibitory synaptic conductance (GABA B) allows replication of oscillatory behavior in the high-calcium state. Furthermore, we show that qualitatively similar dynamics are seen even with a reduced number of cell types (from 55 to 17 types). This reduction halved the computation time. Our results suggest that qualitative dynamics of cortical microcircuits can be studied using limited neuroinformatics and computing resources supporting parameter exploration and simulation of cortical systems. The simplification procedure can easily be adapted to studying other microcircuits for which sparse electrophysiological and morphological data are available.",2019-04-12 +30472497,Genetic algorithm for assigning weights to gene expressions using functional annotations.,"A method, named genetic algorithm for assigning weights to gene expressions using functional annotations (GAAWGEFA), is developed to assign proper weights to the gene expressions at each time point. The weights are estimated using functional annotations of the genes in a genetic algorithm framework. The method shows gene similarity in an improved manner as compared with other existing methods because it takes advantage of the existing functional annotations of the genes. The weight combination for the expressions at different time points is determined by maximizing the fitness function of GAAWGEFA in terms of the positive predictive value (PPV) for the top 10,000 gene pairs. The performance of the proposed method is primarily compared with Biweight mid correlation (BICOR) and original expression values for the six Saccharomyces cerevisiae datasets and one Bacillus subtilis dataset. The utility of GAAWGEFA is shown in predicting the functions of 48 unclassified genes (using p-value cutoff 10-13) from Saccharomyces cerevisiae microarray data where the expressions are weighted using GAAWGEFA and are clustered using k-medoids algorithm. The related code along with various parameters is available at http://sampa.droppages.com/GAAWGEFA.html.",2018-11-17 +28540697,"In Silico PCR Tools for a Fast Primer, Probe, and Advanced Searching.","The polymerase chain reaction (PCR) is fundamental to molecular biology and is the most important practical molecular technique for the research laboratory. The principle of this technique has been further used and applied in plenty of other simple or complex nucleic acid amplification technologies (NAAT). In parallel to laboratory ""wet bench"" experiments for nucleic acid amplification technologies, in silico or virtual (bioinformatics) approaches have been developed, among which in silico PCR analysis. In silico NAAT analysis is a useful and efficient complementary method to ensure the specificity of primers or probes for an extensive range of PCR applications from homology gene discovery, molecular diagnosis, DNA fingerprinting, and repeat searching. Predicting sensitivity and specificity of primers and probes requires a search to determine whether they match a database with an optimal number of mismatches, similarity, and stability. In the development of in silico bioinformatics tools for nucleic acid amplification technologies, the prospects for the development of new NAAT or similar approaches should be taken into account, including forward-looking and comprehensive analysis that is not limited to only one PCR technique variant. The software FastPCR and the online Java web tool are integrated tools for in silico PCR of linear and circular DNA, multiple primer or probe searches in large or small databases and for advanced search. These tools are suitable for processing of batch files that are essential for automation when working with large amounts of data. The FastPCR software is available for download at http://primerdigital.com/fastpcr.html and the online Java version at http://primerdigital.com/tools/pcr.html .",2017-01-01 +30596639,Statistical investigations of protein residue direct couplings.,"Protein Direct Coupling Analysis (DCA), which predicts residue-residue contacts based on covarying positions within a multiple sequence alignment, has been remarkably effective. This suggests that there is more to learn from sequence correlations than is generally assumed, and calls for deeper investigations into DCA and perhaps into other types of correlations. Here we describe an approach that enables such investigations by measuring, as an estimated p-value, the statistical significance of the association between residue-residue covariance and structural interactions, either internal or homodimeric. Its application to thirty protein superfamilies confirms that direct coupling (DC) scores correlate with 3D pairwise contacts with very high significance. This method also permits quantitative assessment of the relative performance of alternative DCA methods, and of the degree to which they detect direct versus indirect couplings. We illustrate its use to assess, for a given protein, the biological relevance of alternative conformational states, to investigate the possible mechanistic implications of differences between these states, and to characterize subtle aspects of direct couplings. Our analysis indicates that direct pairwise correlations may be largely distinct from correlated patterns associated with functional specialization, and that the joint analysis of both types of correlations can yield greater power. Data, programs, and source code are freely available at http://evaldca.igs.umaryland.edu.",2018-12-31 +31725857,Biomarker identification of hepatocellular carcinoma using a methodical literature mining strategy. ,"Hepatocellular carcinoma (HCC), one of the most common causes of cancer-related deaths, carries a 5-year survival rate of 18%, underscoring the need for robust biomarkers. In spite of the increased availability of HCC related literatures, many of the promising biomarkers reported have not been validated for clinical use. To narrow down the wide range of possible biomarkers for further clinical validation, bioinformaticians need to sort them out using information provided in published works. Biomedical text mining is an automated way to obtain information of interest within the massive collection of biomedical knowledge, thus enabling extraction of data for biomarkers associated with certain diseases. This method can significantly reduce both the time and effort spent on studying important maladies such as liver diseases. Herein, we report a text mining-aided curation pipeline to identify potential biomarkers for liver cancer. The curation pipeline integrates PubMed E-Utilities to collect abstracts from PubMed and recognize several types of named entities by machine learning-based and pattern-based methods. Genes/proteins from evidential sentences were classified as candidate biomarkers using a convolutional neural network. Lastly, extracted biomarkers were ranked depending on several criteria, such as the frequency of keywords and articles and the journal impact factor, and then integrated into a meaningful list for bioinformaticians. Based on the developed pipeline, we constructed MarkerHub, which contains 2128 candidate biomarkers extracted from PubMed publications from 2008 to 2017. Database URL: http://markerhub.iis.sinica.edu.tw.",2017-01-01 +27807048,snpGeneSets: An R Package for Genome-Wide Study Annotation.,"Genome-wide studies (GWS) of SNP associations and differential gene expressions have generated abundant results; next-generation sequencing technology has further boosted the number of variants and genes identified. Effective interpretation requires massive annotation and downstream analysis of these genome-wide results, a computationally challenging task. We developed the snpGeneSets package to simplify annotation and analysis of GWS results. Our package integrates local copies of knowledge bases for SNPs, genes, and gene sets, and implements wrapper functions in the R language to enable transparent access to low-level databases for efficient annotation of large genomic data. The package contains functions that execute three types of annotations: (1) genomic mapping annotation for SNPs and genes and functional annotation for gene sets; (2) bidirectional mapping between SNPs and genes, and genes and gene sets; and (3) calculation of gene effect measures from SNP associations and performance of gene set enrichment analyses to identify functional pathways. We applied snpGeneSets to type 2 diabetes (T2D) results from the NHGRI genome-wide association study (GWAS) catalog, a Finnish GWAS, and a genome-wide expression study (GWES). These studies demonstrate the usefulness of snpGeneSets for annotating and performing enrichment analysis of GWS results. The package is open-source, free, and can be downloaded at: https://www.umc.edu/biostats_software/.",2016-12-07 +31699686,Partnering For Pain: a Priority Setting Partnership to identify patient-oriented research priorities for pediatric chronic pain in Canada.,"

Background

Chronic pain affects 1-3 million Canadian children and adolescents and their families. The primary objective of the Partnering For Pain project was to collaboratively identify the top 10 research priorities in pediatric chronic pain.

Methods

Partnering For Pain took a patient-oriented research approach and followed a modified James Lind Alliance Priority Setting Partnership (PSP) to identify the top research priorities in pediatric chronic pain according to people with lived experience (patients), family members and health care providers (clinicians). The PSP was completed in 4 phases between May and December 2018: 1) national survey of stakeholders, including those with lived experience with pediatric chronic pain, family members and clinicians who treat children with chronic pain, to gather priorities, 2) data processing, 3) interim prioritization by invited patients, family members and clinicians (former research participants or identified through pediatric chronic pain programs, patient partner organizations and steering committee member networks) and 4) in-person priority-setting workshop involving patients, family members and clinicians identified via steering committee networks and partner organizations, with evaluation of patient engagement. The process was led by a national steering committee of patient and parent partners, researchers and clinicians engaged in codesign, analysis and translation of project findings.

Results

In phase 1, 215 Canadians (86 patients [40.0%], 56 family members [26.0%] and 73 clinicians [34.0%]) submitted 540 potential priorities that were developed into 112 unique research questions (phase 2). Of the 112 questions, 63 were rated for importance by 57 participants (19 patients [33%], 17 family members [30%] and 21 clinicians [37%]) in phase 3. In phase 4, 20 participants (6 patients [30%], 6 family members [30%] and 8 clinicians [40%]) discussed the 25 most highly rated questions and reached consensus on the final top 10.

Interpretation

The final priorities address pediatric chronic pain prevention, impact and treatment, as well as delivery, access and coordination of care. The priorities reflect a directed and collaborative call to action to improve existing pediatric pain research and care.

Plain language summary

Chronic pain affects 1 in 5 children and teens. This means that 1-3 million Canadian youth deal with pain lasting months to years. This pain gets in the way of being active, sleeping, going to school, and getting along with friends and family. Youth with chronic pain and their families are experts on what it's like to live with pain, but, until now, research has not asked what issues they care about most. The goal of the Partnering For Pain project was to develop a list of the 10 most important things we still need to learn about chronic pain during childhood according to people who live with it, their families and health care providers. We did this in 4 steps: 1) a survey with 215 people who shared 540 concerns they have about chronic pain in childhood, 2) turning those concerns into questions that can be answered by research, 3) a survey with 57 people who ranked how important each research question was and 4) an in-person discussion with 20 people who chose the top 10 research priorities. Each step included Canadians who have had chronic pain during childhood, their families and health care providers. The final top 10 list has questions about how to better prevent and care for children and teens with chronic pain. These priorities make sure that future research focuses on what is most important to people who will use it in their everyday lives. Project video: https://youtu.be/wA-RwrFiSPk. Project website: www.partneringforpain.com.",2019-10-01 +28541456,CRISPRcloud: a secure cloud-based pipeline for CRISPR pooled screen deconvolution.,"

Summary

We present a user-friendly, cloud-based, data analysis pipeline for the deconvolution of pooled screening data. This tool, CRISPRcloud, serves a dual purpose of extracting, clustering and analyzing raw next generation sequencing files derived from pooled screening experiments while at the same time presenting them in a user-friendly way on a secure web-based platform. Moreover, CRISPRcloud serves as a useful web-based analysis pipeline for reanalysis of pooled CRISPR screening datasets. Taken together, the framework described in this study is expected to accelerate development of web-based bioinformatics tool for handling all studies which include next generation sequencing data.

Availability and implementation

http://crispr.nrihub.org.

Contact

zhandong.liu@bcm.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +,State of the art on the initiatives and activities relevant to risk assessment and risk management of nanotechnologies in the food and agriculture sectors,"The Food and Agriculture Organization of the United Nations (FAO) and World Health Organization (WHO) conducted an international expert meeting on the potential food safety implications of the application of nanotechnologies in the food and agriculture sectors in June 2009.The present report reviews national, regional and international activities on the risk assessment and risk management of nanomaterials in the food and agriculture sectors that have been carried out between 2009 and 2012. The full report of the work is presented in a FAO/WHO paper available at http://www.fao.org/food/food-safety-quality/a-z-index/nano. Information and data have been collected on national and international approaches that identify and implement strategies to address potential hazards associated with the use of nanotechnology-related products or techniques. Selected activities by international governmental and nongovernmental organizations were reviewed and the significant achievements are noted. Meta-analysis of scientific reviews addressing risk assessment of nanotechnologies in the food and agriculture sectors was conducted and key principles for the safety assessment of nanomaterials were identified.It was concluded that although the concepts of potential use of nanomaterials in food and the implied benefits for stakeholders including consumers have not changed significantly since 2009, there are new products being developed and claimed to enter the market and national and international interests in considering the needs for applying regulations on engineered nanomaterials are increasing. The number of published risk assessment of products used in foods that are nanomaterials or contain particles that fall within applicable definitions is growing slowly.Several data gaps with respect to interaction between nanomaterials and food matrices, behaviours of nanomaterials in the human body, methods to determine such interactions and behaviours, and the relevance of such data for risk assessment continue to exist. The international collaboration in the area of nanomaterials and nanotechnology in food and agriculture must be strengthened. International efforts on risk assessment and risk communication may benefit from the experience gained at the national and regional levels. Should a sufficient number of case studies of risk assessment of commercial products become available with time, a review of approaches applied and results obtained could support the development of risk assessment procedures acceptable at the international level.",2014-10-01 +22080509,Database for bacterial group II introns.,"The Database for Bacterial Group II Introns (http://webapps2.ucalgary.ca/~groupii/index.html#) provides a catalogue of full-length, non-redundant group II introns present in bacterial DNA sequences in GenBank. The website is divided into three sections. The first section provides general information on group II intron properties, structures and classification. The second and main section lists information for individual introns, including insertion sites, DNA sequences, intron-encoded protein sequences and RNA secondary structure models. The final section provides tools for identification and analysis of intron sequences. These include a step-by-step guide to identify introns in genomic sequences, a local BLAST tool to identify closest intron relatives to a query sequence, and a boundary-finding tool that predicts 5' and 3' intron-exon junctions in an input DNA sequence. Finally, selected intron data can be downloaded in FASTA format. It is hoped that this database will be a useful resource not only to group II intron and RNA researchers, but also to microbiologists who encounter these unexpected introns in genomic sequences.",2011-11-10 +30576541,Peripheral Biomarkers in Schizophrenia: A Meta-Analysis of Microarray Gene Expression Datasets.,"BACKGROUND:Schizophrenia is a severe psychiatric disorder with a complex pathophysiology. Given its prevalence, high risk of mortality, early onset, and high levels of disability, researchers have attempted to develop early detection strategies for facilitating timely pharmacological and/or nonpharmacological interventions. Here, we performed a meta-analysis of publicly available gene expression datasets in peripheral tissues in schizophrenia and healthy controls to detect consistent patterns of illness-associated gene expression. We also tested whether our earlier finding of a downregulation of NPTX2 expression in the brain of schizophrenia patients replicated in peripheral tissues. METHODS:We conducted a systematic search in the Gene Expression Omnibus repository (https://www.ncbi.nlm.nih.gov/gds/) and identified 3 datasets matching our inclusion criteria: GSE62333, GSE18312, and GSE27383. After quality controls, the total sample size was: schizophrenia (n = 71) and healthy controls (n = 57) (schizophrenia range: n = 12-40; healthy controls range: n = 8-29). RESULTS:The results of the meta-analysis conducted with the GeneMeta package revealed 2 genes with a false discovery rate  < 0.05: atlastin GTPase 3 (ATL3) (upregulated) and arachidonate 15-lipoxygenase, type B (ALOX15B) (downregulated). The result for ATL3 was confirmed using the weighted Z test method, whereas we found a suggestive signal for ALOX15B (false discovery rate < 0.10). CONCLUSIONS:These data point to alterations of peripheral expression of ATL3 in schizophrenia, but did not confirm the significant association signal found for NPTX2 in postmortem brain samples. These findings await replication in newly recruited schizophrenia samples as well as complementary analysis of their encoded peptides in blood.",2019-03-01 +30920877,The Role of Dietary Phytoestrogens and the Nuclear Receptor PPARγ in Adipogenesis: An in Vitro Study.,"BACKGROUND:Phytoestrogens, naturally occurring plant chemicals, have long been thought to confer beneficial effects on human cardiovascular and metabolic health. However, recent epidemiological studies, have yielded conflicting outcomes, in which phytoestrogen consumption was both positively and negatively correlated with adiposity. Interestingly, several dietary phytoestrogens are known to stimulate or inhibit the activity of the peroxisome proliferator-activated receptor gamma (PPARγ), a key physiological regulator of adipogenesis. OBJECTIVE:The objective of this study was to test the hypothesis that the pro- or anti-adipogenic activity of phytoestrogen chemicals is related to the ability to activate PPARγ in adipocytes. METHODS:The effects of resveratrol and the soy isoflavones genistein and daidzein on adipogenesis were examined in cell-based assays using the 3T3-L1 cell model. In parallel, ligand-mediated alterations in PPARγ target gene expression were measured by quantitative polymerase chain reaction. The agonist/antagonist activities of phytoestrogens on PPARγ were further assessed by quantifying their ability to affect recruitment of transcriptional cofactors to the receptor. RESULTS:Resveratrol displayed significant anti-adipogenic activities as exhibited by the ability to antagonize PPARγ-dependent adipocyte differentiation, down-regulate genes involved in lipid metabolism, block cofactor recruitment to PPARγ, and antagonize the effects of the PPARγ agonist rosiglitazone. In contrast, genistein and daidzein functioned as PPARγ agonists while also displaying pro-adipogenic activities. CONCLUSIONS:These data provide biological evidence that the pro- or anti-obesity effects of phytoestrogens are related to their relative agonist/antagonist activity on PPARγ. Thus, PPARγ-activation assays may enable the screening of dietary components and identification of agents with adipogenic activities. https://doi.org/10.1289/EHP3444.",2019-03-01 +28449120,SigSeeker: a peak-calling ensemble approach for constructing epigenetic signatures.,"

Motivation

Epigenetic data are invaluable when determining the regulatory programs governing a cell. Based on use of next-generation sequencing data for characterizing epigenetic marks and transcription factor binding, numerous peak-calling approaches have been developed to determine sites of genomic significance in these data. Such analyses can produce a large number of false positive predictions, suggesting that sites supported by multiple algorithms provide a stronger foundation for inferring and characterizing regulatory programs associated with the epigenetic data. Few methodologies integrate epigenetic based predictions of multiple approaches when combining profiles generated by different tools.

Results

The SigSeeker peak-calling ensemble uses multiple tools to identify peaks, and with user-defined thresholds for peak overlap and signal strength it retains only those peaks that are concordant across multiple tools. Peaks predicted to be co-localized by only a very small number of tools, discovered to be only marginally overlapping, or found to represent significant outliers to the approximation model are removed from the results, providing concise and high quality epigenetic datasets. SigSeeker has been validated using established benchmarks for transcription factor binding and histone modification ChIP-Seq data. These comparisons indicate that the quality of our ensemble technique exceeds that of single tool approaches, enhances existing peak-calling ensembles, and results in epigenetic profiles of higher confidence.

Availability and implementation

http://sigseeker.org.

Contact

lichtenbergj@mail.nih.gov.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +26286194,"Carbohydrate structure database merged from bacterial, archaeal, plant and fungal parts.","The Carbohydrate Structure Databases (CSDBs, http://csdb.glycoscience.ru) store structural, bibliographic, taxonomic, NMR spectroscopic, and other data on natural carbohydrates and their derivatives published in the scientific literature. The CSDB project was launched in 2005 for bacterial saccharides (as BCSDB). Currently, it includes two parts, the Bacterial CSDB and the Plant&Fungal CSDB. In March 2015, these databases were merged to the single CSDB. The combined CSDB includes information on bacterial and archaeal glycans and derivatives (the coverage is close to complete), as well as on plant and fungal glycans and glycoconjugates (almost all structures published up to 1998). CSDB is regularly updated via manual expert annotation of original publications. Both newly annotated data and data imported from other databases are manually curated. The CSDB data are exportable in a number of modern formats, such as GlycoRDF. CSDB provides additional services for simulation of (1)H, (13)C and 2D NMR spectra of saccharides, NMR-based structure prediction, glycan-based taxon clustering and other.",2015-08-18 +29712733,Condensins under the microscope.,"Condensins are key players in mitotic chromosome condensation. Using an elegant combination of state-of-the-art imaging techniques, Walther et al. (2018. J. Cell Biol. https://doi.org/10.1083/jcb.201801048) counted the number of Condensins, examined their behaviors on human mitotic chromosomes, and integrated the quantitative data to propose a new mechanistic model for chromosome condensation.",2018-04-30 +30632664,Pathology of Birt-Hogg-Dubé syndrome: A special reference of pulmonary manifestations in a Japanese population with a comprehensive analysis and review.,"Birt-Hogg-Dubé (BHD) syndrome is a rare genetic disorder characterized by cutaneous fibrofolliculomas, pulmonary cysts and renal cell carcinomas. Affected individuals inherit germline mutations in the folliculin gene (FLCN). Approximately 150 pathogenic FLCN variants have been identified worldwide. Many Japanese probands of BHD syndrome were first identified by pulmonologists and/or radiologists during treatment of pneumothoraces. Lung specimens obtained through video-assisted thoracoscopic surgery (VATS) have characteristic features unique to BHD syndrome; however, pathologists often miss key findings and diagnose patients with ""bullae/blebs"". The pleural and subpleural cysts of BHD syndrome-associated lung diseases are often modified by tissue remodeling and can be difficult to distinguish from emphysematous bullae/blebs. Intraparenchymal unruptured cysts tend to retain distinctive features that are different from other cystic lung diseases. Here, we review the clinicopathological findings of BHD syndrome in a Japanese population based on data from 200 probands diagnosed by genetic testing and a total of 520 symptomatic family members identified through BHD-NET Japan (http://www.bhd-net.jp/). Detailed morphology of pulmonary cysts obtained from VATS and autopsied lung specimens are described, and pathological clues for differentiating miscellaneous cystic lung disorders are discussed.",2019-01-11 +25905099,Metabolonote: a wiki-based database for managing hierarchical metadata of metabolome analyses.,"Metabolomics - technology for comprehensive detection of small molecules in an organism - lags behind the other ""omics"" in terms of publication and dissemination of experimental data. Among the reasons for this are difficulty precisely recording information about complicated analytical experiments (metadata), existence of various databases with their own metadata descriptions, and low reusability of the published data, resulting in submitters (the researchers who generate the data) being insufficiently motivated. To tackle these issues, we developed Metabolonote, a Semantic MediaWiki-based database designed specifically for managing metabolomic metadata. We also defined a metadata and data description format, called ""Togo Metabolome Data"" (TogoMD), with an ID system that is required for unique access to each level of the tree-structured metadata such as study purpose, sample, analytical method, and data analysis. Separation of the management of metadata from that of data and permission to attach related information to the metadata provide advantages for submitters, readers, and database developers. The metadata are enriched with information such as links to comparable data, thereby functioning as a hub of related data resources. They also enhance not only readers' understanding and use of data but also submitters' motivation to publish the data. The metadata are computationally shared among other systems via APIs, which facilitate the construction of novel databases by database developers. A permission system that allows publication of immature metadata and feedback from readers also helps submitters to improve their metadata. Hence, this aspect of Metabolonote, as a metadata preparation tool, is complementary to high-quality and persistent data repositories such as MetaboLights. A total of 808 metadata for analyzed data obtained from 35 biological species are published currently. Metabolonote and related tools are available free of cost at http://metabolonote.kazusa.or.jp/.",2015-04-07 +28531269,Change in the Rate of Biological Aging in Response to Caloric Restriction: CALERIE Biobank Analysis.,"Biological aging measures have been proposed as proxies for extension of healthy life span in trials of geroprotective therapies that aim to slow aging. Several methods to measure biological aging show promise but it is not known if these methods are sensitive to changes caused by geroprotective therapy. We conducted analysis of two proposed methods to quantify biological aging using data from a recently concluded trial of an established geroprotector, caloric restriction. We obtained data from the National Institute on Aging CALERIE randomized trial through its public-access biobank (https://calerie.duke.edu/). The CALERIE trial randomized N = 220 nonobese adults to 25% caloric restriction (n = 145; 11.7% caloric restriction was achieved, on average) or to maintain current diet (n = 75) for 2 years. We analyzed biomarker data collected at baseline, 12-, and 24-month follow-up assessments. We applied published biomarker algorithms to these data to calculate two biological age measures, Klemera-Doubal Method Biological Age and homeostatic dysregulation. Intent-to-treat analysis using mixed-effects growth models of within-person change over time tested if caloric restriction slowed increase in measures of biological aging across follow-up. Analyses of both measures indicated caloric restriction slowed biological aging. Weight loss did not account for the observed effects. Results suggest future directions for testing of geroprotective therapies in humans.",2017-12-01 +30287666,"Socioeconomic position, symptoms of depression and subsequent mental healthcare treatment: a Danish register-based 6-month follow-up study on a population survey.","

Objective

Examine whether the severity of symptoms of depression was associated with the type of mental healthcare treatment (MHCT) received, independent of socioeconomic position (SEP).

Design

Register-based 6-month follow-up study on participants from the Danish General Suburban Population Study (GESUS) 2010-2013, who scored the Major Depression Inventory (MDI).

Participants

Nineteen thousand and eleven respondents from GESUS.

Interventions

The MHCT of the participants was tracked in national registers 4 months prior and 6 months after their MDI scores. MHCT was graduated in levels. SEP was defined by years of formal postsecondary education and income categorised into three levels. Data were analysed using logistic and Poisson regression analyses.

Outcomes

MHCT included number of contacts with: general practitioner (GP), GP mental health counselling, psychologist, psychiatrist, emergency contacts, admissions to psychiatric hospitals and prescriptions of antidepressants.

Results

For 547 respondents with moderate to severe symptoms of depression there was no difference across SEP in use of services, contact (y/n), frequency of contact or level of treatment, except respondents with low SEP had more frequent contact with their GP. However, of the 547 respondents , 10% had no treatment contacts at all, and 47% had no treatment beyond GP consultation. Among respondents with no/few symptoms of depression, postsecondary education ≥3 years was associated with more contact with specialised services (adjusted OR (aOR) 1.92; 95% CI 1.18 to 3.13); however, this difference did not apply for income; additionally, high SEP was associated with fewer prescriptions of antidepressants (education aOR 0.69; CI 0.50 to 0.95; income aOR 0.56, CI 0.39 to 0.80) compared with low SEP.

Conclusion

Participants with symptoms of depression were treated according to the severity of their symptoms, independent of SEP; however, more than half with moderate to severe symptoms received no treatment beyond GP consultation. People in low SEP and no/few symptoms of depression were more often treated with antidepressants. The study was approved by The Danish Data Protection Agency Journal number 2015-41-3984. Accessible at: https://www.datatilsynet.dk/fortegnelsen/soeg-i-fortegnelsen/.",2018-10-03 +29674364,"Health and Prevention Enhancement (H-PEACE): a retrospective, population-based cohort study conducted at the Seoul National University Hospital Gangnam Center, Korea.","PURPOSE:The Health and Prevention Enhancement (H-PEACE) study was designed to investigate the association of diagnostic imaging results, biomarkers and the predisease stage of non-communicable diseases (NCDs), such as malignancies and metabolic diseases, in an average-risk population in Korea. PARTICIPANTS:This study enrolled a large-scale retrospective cohort at the Healthcare System Gangnam Center, Seoul National University Hospital, from October 2003 to December 2014. FINDINGS TO DATE:The baseline and follow-up information collected in the predisease stage of NCDs allows for evaluation of an individual's potential NCD risk, which is necessary for establishing personalised prevention strategies. A total of 91 336 health examinees were included in the cohort, and we repeatedly measured and collected information for 50.9% (n=46 484) of the cohort members. All participants completed structured questionnaires (lifestyle, medical history, mini-dietary assessment index, sex-specific variables and psychiatric assessment), doctors' physical examinations, laboratory blood and urine tests and digital chest X-ray imaging. For participants with available data, we also obtained information on specific diagnostic variables using advanced diagnostic tests, including coronary CT for coronary calcium scores, colonoscopy and brain MRI. Furthermore, 17 455 of the participants who provided informed consent and donated blood samples were enrolled into the Gene-environmental interaction and phenotype study, a subcohort of the H-PEACE, from October 2013, and we analysed genome-wide single-nucleotide polymorphism array data for 6579 of these blood samples. FUTURE PLANS:The data obtained from this cohort will be used to facilitate advanced and accurate diagnostic techniques related to NCDs while considering various phenotypes. Potential collaborators can access the dataset after receiving approval from our institutional review board. Applications can be submitted on the study homepage (http://en-healthcare.snuh.org/HPEACEstudy).",2018-04-19 +30012108,CEDAR OnDemand: a browser extension to generate ontology-based scientific metadata.,"

Background

Public biomedical data repositories often provide web-based interfaces to collect experimental metadata. However, these interfaces typically reflect the ad hoc metadata specification practices of the associated repositories, leading to a lack of standardization in the collected metadata. This lack of standardization limits the ability of the source datasets to be broadly discovered, reused, and integrated with other datasets. To increase reuse, discoverability, and reproducibility of the described experiments, datasets should be appropriately annotated by using agreed-upon terms, ideally from ontologies or other controlled term sources.

Results

This work presents ""CEDAR OnDemand"", a browser extension powered by the NCBO (National Center for Biomedical Ontology) BioPortal that enables users to seamlessly enter ontology-based metadata through existing web forms native to individual repositories. CEDAR OnDemand analyzes the web page contents to identify the text input fields and associate them with relevant ontologies which are recommended automatically based upon input fields' labels (using the NCBO ontology recommender) and a pre-defined list of ontologies. These field-specific ontologies are used for controlling metadata entry. CEDAR OnDemand works for any web form designed in the HTML format. We demonstrate how CEDAR OnDemand works through the NCBI (National Center for Biotechnology Information) BioSample web-based metadata entry.

Conclusion

CEDAR OnDemand helps lower the barrier of incorporating ontologies into standardized metadata entry for public data repositories. CEDAR OnDemand is available freely on the Google Chrome store https://chrome.google.com/webstore/search/CEDAROnDemand.",2018-07-16 +29942004,Author Correction: Structural prediction of protein models using distance restraints derived from cross-linking mass spectrometry data.,"In the version of this article initially published online, the authors used incorrectly defined restraints for specifying the distance between residues when using the HADDOCK portal. Following the publication of a Correspondence by the developers of the HADDOCK portal (Nat. Protoc. https://dx.doi.org/10.1038/s41596-018-0017-6, 2018) and a Reply by the authors of the Protocol (Nat. Protoc. https://dx.doi.org/10.1038/s41596-018-0018-5, 2018), the syntax in step 21 has been corrected. In addition, the input files (available as Supplementary Data 5-7) have been replaced.",2018-07-01 +30007805,Genomic heritability and genome-wide association analysis of anti-Müllerian hormone in Holstein dairy heifers.,"Anti-Müllerian hormone (AMH) is an ovarian growth factor that plays an important role in regulation of ovarian follicle growth. The objectives of this study were to estimate the genomic heritability of AMH and identify genomic regions associated with AMH production in a genome-wide association (GWA) analysis. Concentrations of AMH were determined in 2,905 dairy Holstein heifers genotyped using the Zoetis medium density panel (Zoetis Inclusions, Kalamazoo, MI) with 54,519 single nucleotide polymorphism (SNP) markers remaining after standard genotype quality control edits. A linear mixed model was used to model the random effects of sampling day and genomics on the logarithm of AMH. The genomic heritability (± standard error of the mean) of AMH was estimated to be 0.36 ± 0.03. Our GWA analysis inferred significant associations between AMH and 11 SNP markers on chromosome 11 and 1 SNP marker on chromosome 20. Annotated genes with significant associations were identified using the Ensembl genome database (version 88) of the cow genome (version UMD 3.1; https://www.ensembl.org/biomart). Gene set enrichment analysis revealed that 2 gene ontology (GO) terms were significantly enriched in the list of candidate genes: G-protein coupled receptor signaling pathway (GO:0007186) and the detection of chemical stimulus involved in sensory perception (GO:0050907). The estimated high heritability and previously established associations between AMH and ovarian follicular reserve, fertility, longevity, and superovulatory response in cattle implies that AMH could be used as a biomarker for genetic improvement of reproductive potential.",2018-07-13 +30250964,Systematic review of mobile phone-based teledermatology.,"Teledermatology is an expanding field within dermatology that has grown and become more clinically accepted by both patients and doctors. With approximately 260 million mobile phone users in the US and 4-6 billion worldwide with access to mobile phones, teledermatology serves as a potentially useful tool for diagnosis and management (Samkange-Zeeb and Blettner in Emerg Health Threats J, https://doi.org/10.3134/ehtj.09.005 , 2009). In this review, we provide a detailed overview of mobile phone technology and the accumulating evidence for its incorporation into dermatology. Key questions addressed include accuracy and concordance between mobile teledermatology and face-to-face dermatology for the diagnosis of skin conditions. Similarly, accuracy and concordance were compared for the management of skin conditions. To track the development of mobile phone technology, we also assessed how data were captured, stored, and displayed in teledermatology studies.",2018-09-24 +30972771,What does the Australian public know about occupational therapy for older people? A population survey.,"

Introduction

Occupational therapy can delay functional decline and improve quality of life of older people. Yet people may not seek occupational therapy services as they may not be aware of their scope or benefits. The aim of this study was to ascertain what the general public in Australia knows about occupational therapy services for older people.

Methods

A cross-sectional cohort study completed via a consumer panel provider PureProfile (https://www.pureprofile.com/au/) a company specialising in online survey programming to registered participants of the general public ('panel'). For a fee, a client can include a question in a weekly survey, and receive approximately 1000 responses (including detail about the respondents' gender, age group and place of residence). We asked a free-text question about the participants' understanding of occupational therapy and its role in supporting older adults. We used descriptive statistics to summarise sociodemographic data. Thematic approach to analysis was used to explore themes from the free text responses. Chi-squared test for independence was used to explore association and/or differences between age group, gender, place of residence and understanding about occupational therapy for older people. Included were people aged ≥18 years living in Australia.

Results

We received 1004 responses; about half were female (50.9%) and one-fifth (20.1%) aged 65 or over. Of the 1004 respondents, approximately 10% could provide a good or advanced description of occupational therapy. Over half of the participants had some, but limited knowledge about the profession with references to general rehabilitation, physical therapies and return to work type interventions.

Conclusion

Knowledge about the role of occupational therapy in supporting older people is limited. There is a need to address misconceptions that occupational therapy is only concerned with workplace or physical health-related matters to enable better service engagement in the consumer-driven care model in Australia.",2019-04-10 +24194593,SelenoDB 2.0: annotation of selenoprotein genes in animals and their genetic diversity in humans.,"SelenoDB (http://www.selenodb.org) aims to provide high-quality annotations of selenoprotein genes, proteins and SECIS elements. Selenoproteins are proteins that contain the amino acid selenocysteine (Sec) and the first release of the database included annotations for eight species. Since the release of SelenoDB 1.0 many new animal genomes have been sequenced. The annotations of selenoproteins in new genomes usually contain many errors in major databases. For this reason, we have now fully annotated selenoprotein genes in 58 animal genomes. We provide manually curated annotations for human selenoproteins, whereas we use an automatic annotation pipeline to annotate selenoprotein genes in other animal genomes. In addition, we annotate the homologous genes containing cysteine (Cys) instead of Sec. Finally, we have surveyed genetic variation in the annotated genes in humans. We use exon capture and resequencing approaches to identify single-nucleotide polymorphisms in more than 50 human populations around the world. We thus present a detailed view of the genetic divergence of Sec- and Cys-containing genes in animals and their diversity in humans. The addition of these datasets into the second release of the database provides a valuable resource for addressing medical and evolutionary questions in selenium biology.",2013-11-04 +29166858,ADAGE signature analysis: differential expression analysis with data-defined gene sets.,"

Background

Gene set enrichment analysis and overrepresentation analyses are commonly used methods to determine the biological processes affected by a differential expression experiment. This approach requires biologically relevant gene sets, which are currently curated manually, limiting their availability and accuracy in many organisms without extensively curated resources. New feature learning approaches can now be paired with existing data collections to directly extract functional gene sets from big data.

Results

Here we introduce a method to identify perturbed processes. In contrast with methods that use curated gene sets, this approach uses signatures extracted from public expression data. We first extract expression signatures from public data using ADAGE, a neural network-based feature extraction approach. We next identify signatures that are differentially active under a given treatment. Our results demonstrate that these signatures represent biological processes that are perturbed by the experiment. Because these signatures are directly learned from data without supervision, they can identify uncurated or novel biological processes. We implemented ADAGE signature analysis for the bacterial pathogen Pseudomonas aeruginosa. For the convenience of different user groups, we implemented both an R package (ADAGEpath) and a web server ( http://adage.greenelab.com ) to run these analyses. Both are open-source to allow easy expansion to other organisms or signature generation methods. We applied ADAGE signature analysis to an example dataset in which wild-type and ∆anr mutant cells were grown as biofilms on the Cystic Fibrosis genotype bronchial epithelial cells. We mapped active signatures in the dataset to KEGG pathways and compared with pathways identified using GSEA. The two approaches generally return consistent results; however, ADAGE signature analysis also identified a signature that revealed the molecularly supported link between the MexT regulon and Anr.

Conclusions

We designed ADAGE signature analysis to perform gene set analysis using data-defined functional gene signatures. This approach addresses an important gap for biologists studying non-traditional model organisms and those without extensive curated resources available. We built both an R package and web server to provide ADAGE signature analysis to the community.",2017-11-22 +29241666,Identification of human circadian genes based on time course gene expression profiles by using a deep learning method.,"Circadian genes express periodically in an approximate 24-h period and the identification and study of these genes can provide deep understanding of the circadian control which plays significant roles in human health. Although many circadian gene identification algorithms have been developed, large numbers of false positives and low coverage are still major problems in this field. In this study we constructed a novel computational framework for circadian gene identification using deep neural networks (DNN) - a deep learning algorithm which can represent the raw form of data patterns without imposing assumptions on the expression distribution. Firstly, we transformed time-course gene expression data into categorical-state data to denote the changing trend of gene expression. Two distinct expression patterns emerged after clustering of the state data for circadian genes from our manually created learning dataset. DNN was then applied to discriminate the aperiodic genes and the two subtypes of periodic genes. In order to assess the performance of DNN, four commonly used machine learning methods including k-nearest neighbors, logistic regression, naïve Bayes, and support vector machines were used for comparison. The results show that the DNN model achieves the best balanced precision and recall. Next, we conducted large scale circadian gene detection using the trained DNN model for the remaining transcription profiles. Comparing with JTK_CYCLE and a study performed by Möller-Levet et al. (doi: https://doi.org/10.1073/pnas.1217154110), we identified 1132 novel periodic genes. Through the functional analysis of these novel circadian genes, we found that the GTPase superfamily exhibits distinct circadian expression patterns and may provide a molecular switch of circadian control of the functioning of the immune system in human blood. Our study provides novel insights into both the circadian gene identification field and the study of complex circadian-driven biological control. This article is part of a Special Issue entitled: Accelerating Precision Medicine through Genetic and Genomic Big Data Analysis edited by Yudong Cai & Tao Huang.",2017-12-12 +30010561,Quality-of-Experience for Adaptive Streaming Videos: An Expectation Confirmation Theory Motivated Approach. ,"The dynamic adaptive streaming over HTTP (DASH) provides an inter-operable solution to overcome volatile network conditions, but how the human visual quality-ofexperience (QoE) changes with time-varying video quality is not well-understood. Here, we build a large-scale video database of time-varying quality and design a series of subjective experiments to investigate how humans respond to compression level, spatial and temporal resolution adaptations. Our path-analytic results show that quality adaptations influence the QoE by modifying the perceived quality of subsequent video segments. Specifically, the quality deviation introduced by quality adaptations is asymmetric with respect to the adaptation direction, which is further influenced by other factors such as compression level and content. Furthermore, we propose an objective QoE model by integrating the empirical findings from our subjective experiments and the expectation confirmation theory (ECT). Experimental results show that the proposed ECT-QoE model is in close agreement with subjective opinions and significantly outperforms existing QoE models. The video database together with the code are available online at https://ece.uwaterloo.ca/~zduanmu/tip2018ectqoe/.",2018-07-12 +30446382,The Role of Survivorship Care for Patients with Glioma.,"

Objectives

To discuss the role of survivorship care and survivorship care plans, including a recently developed neuro-oncology-specific plan care (https://www.soc-neuro-onc.org/SNO/Resources/Survivorship_Care_Plan.aspx) for adult patients diagnosed with primary glial neoplasms and the necessary educational needs of oncology nurses.

Data sources

Published peer-reviewed literature and resources from cancer and neuro-oncology professional organizations and patient advocacy organizations.

Conclusion

The current Commission on Cancer mandates adult cancer patients treated with curative intent each receive an individualized survivorship care plan. Patients with glioma are likely to benefit from receiving survivorship care, including survivorship care plans aimed at addressing the complex and evolving needs of this unique patient population throughout their illness trajectory.

Implications for nursing practice

Nurse professionals are critical to the development and implementation of cancer survivorship care. This growing leadership role presents oncology nurses with specific and new educational needs regarding survivorship care.",2018-11-13 +30921601,Large aortic arch plaques correlate with CHADS2 and CHA2DS2-VASc scores in cryptogenic stroke.,"BACKGROUND AND AIMS:Current trends have suggested covert atrial fibrillation as a mechanism of cryptogenic stroke. However, etiological heterogeneity regarding the underlying embolic sources remains a critical issue in cryptogenic stroke. METHODS:CHALLENGE ESUS/CS (Mechanisms of Embolic Stroke Clarified by Transesophageal Echocardiography for Embolic Stroke of Undetermined Source/Cryptogenic Stroke) is a multicenter observational registry of cryptogenic stroke patients admitted to participating hospitals, who underwent transesophageal echocardiography between April 2014 and December 2016. We obtained baseline characteristics, radiological and laboratory data, and echocardiographic findings, especially for embolic sources demonstrated on transesophageal echocardiography, and conducted comparisons according to CHADS2 and CHA2DS2-VASc scores (0-1 vs. ≥2, respectively). This study was registered at http://www.umin.ac.jp/ctr/(UMIN000032957). RESULTS:The study comprised 677 patients (age, 68.7 ± 12.8 years; 455 males; median National Institutes of Health Stroke Scale score, 2) with cryptogenic stroke. On multiple logistic regression analysis, large aortic arch plaque ≥4 mm (odds ratio [OR], 2.25; 95% confidence interval [CI], 1.51-3.36; p < 0.001), with ulcerative or mobile components (OR, 2.37; 95%CI, 1.38-4.06; p = 0.002), was associated with CHADS2 score ≥2. Large aortic arch plaque ≥4 mm (OR, 3.88; 95%CI, 2.07-7.27; p < 0.001) and ulcerative or mobile components (OR, 3.25; 95%CI, 1.44-7.34; p = 0.005) were linked to CHA2DS2-VASc score ≥2. CONCLUSIONS:The CHALLENGE ESUS/CS registry is a large TEE registry, and clarifies potential embolic etiologies of cryptogenic stroke using TEE. Large aortic arch plaques were associated with high CHADS2 and CHA2DS2-VASc scores, and represented important embolic sources in cryptogenic stroke.",2019-03-20 +29092934,Variant Review with the Integrative Genomics Viewer.,"Manual review of aligned reads for confirmation and interpretation of variant calls is an important step in many variant calling pipelines for next-generation sequencing (NGS) data. Visual inspection can greatly increase the confidence in calls, reduce the risk of false positives, and help characterize complex events. The Integrative Genomics Viewer (IGV) was one of the first tools to provide NGS data visualization, and it currently provides a rich set of tools for inspection, validation, and interpretation of NGS datasets, as well as other types of genomic data. Here, we present a short overview of IGV's variant review features for both single-nucleotide variants and structural variants, with examples from both cancer and germline datasets. IGV is freely available at https://www.igv.org Cancer Res; 77(21); e31-34. ©2017 AACR.",2017-11-01 +22039152,ProGlycProt: a repository of experimentally characterized prokaryotic glycoproteins.,"ProGlycProt (http://www.proglycprot.org/) is an open access, manually curated, comprehensive repository of bacterial and archaeal glycoproteins with at least one experimentally validated glycosite (glycosylated residue). To facilitate maximum information at one point, the database is arranged under two sections: (i) ProCGP-the main data section consisting of 95 entries with experimentally characterized glycosites and (ii) ProUGP-a supplementary data section containing 245 entries with experimentally identified glycosylation but uncharacterized glycosites. Every entry in the database is fully cross-referenced and enriched with available published information about source organism, coding gene, protein, glycosites, glycosylation type, attached glycan, associated oligosaccharyl/glycosyl transferases (OSTs/GTs), supporting references, and applicable additional information. Interestingly, ProGlycProt contains as many as 174 entries for which information is unavailable or the characterized glycosites are unannotated in Swiss-Prot release 2011_07. The website supports a dedicated structure gallery of homology models and crystal structures of characterized glycoproteins in addition to two new tools developed in view of emerging information about prokaryotic sequons (conserved sequences of amino acids around glycosites) that are never or rarely seen in eukaryotic glycoproteins. ProGlycProt provides an extensive compilation of experimentally identified glycosites (334) and glycoproteins (340) of prokaryotes that could serve as an information resource for research and technology applications in glycobiology.",2011-10-28 +27199454,Comprehensive database of human E3 ubiquitin ligases: application to aquaporin-2 regulation.,"Aquaporin-2 (AQP2) is regulated in part via vasopressin-mediated changes in protein half-life that are in turn dependent on AQP2 ubiquitination. Here we addressed the question, ""What E3 ubiquitin ligase is most likely to be responsible for AQP2 ubiquitination?"" using large-scale data integration based on Bayes' rule. The first step was to bioinformatically identify all E3 ligase genes coded by the human genome. The 377 E3 ubiquitin ligases identified in the human genome, consisting predominant of HECT, RING, and U-box proteins, have been used to create a publically accessible and downloadable online database (https://hpcwebapps.cit.nih.gov/ESBL/Database/E3-ligases/). We also curated a second database of E3 ligase accessory proteins that included BTB domain proteins, cullins, SOCS-box proteins, and F-box proteins. Using Bayes' theorem to integrate information from multiple large-scale proteomic and transcriptomic datasets, we ranked these 377 E3 ligases with respect to their probability of interaction with AQP2. Application of Bayes' rule identified the E3 ligases most likely to interact with AQP2 as (in order of probability): NEDD4 and NEDD4L (tied for first), AMFR, STUB1, ITCH, ZFPL1. Significantly, the two E3 ligases tied for top rank have also been studied extensively in the reductionist literature as regulatory proteins in renal tubule epithelia. The concordance of conclusions from reductionist and systems-level data provides strong motivation for further studies of the roles of NEDD4 and NEDD4L in the regulation of AQP2 protein turnover.",2016-05-13 +26612862,BIGNASim: a NoSQL database structure and analysis portal for nucleic acids simulation data.,"Molecular dynamics simulation (MD) is, just behind genomics, the bioinformatics tool that generates the largest amounts of data, and that is using the largest amount of CPU time in supercomputing centres. MD trajectories are obtained after months of calculations, analysed in situ, and in practice forgotten. Several projects to generate stable trajectory databases have been developed for proteins, but no equivalence exists in the nucleic acids world. We present here a novel database system to store MD trajectories and analyses of nucleic acids. The initial data set available consists mainly of the benchmark of the new molecular dynamics force-field, parmBSC1. It contains 156 simulations, with over 120 μs of total simulation time. A deposition protocol is available to accept the submission of new trajectory data. The database is based on the combination of two NoSQL engines, Cassandra for storing trajectories and MongoDB to store analysis results and simulation metadata. The analyses available include backbone geometries, helical analysis, NMR observables and a variety of mechanical analyses. Individual trajectories and combined meta-trajectories can be downloaded from the portal. The system is accessible through http://mmb.irbbarcelona.org/BIGNASim/. Supplementary Material is also available on-line at http://mmb.irbbarcelona.org/BIGNASim/SuppMaterial/.",2015-11-26 +29671475,Integrated Social-Behavioral and Ecological Risk Maps to Prioritize Local Public Health Responses to Lyme Disease.,"

Background

The risk of contracting Lyme disease (LD) can vary spatially because of spatial heterogeneity in risk factors such as social-behavior and exposure to ecological risk factors. Integrating these risk factors to inform decision-making should therefore increase the effectiveness of mitigation interventions.

Objectives

The objective of this study was to develop an integrated social-behavioral and ecological risk-mapping approach to identify priority areas for LD interventions.

Methods

The study was conducted in the Montérégie region of Southern Quebec, Canada, where LD is a newly endemic disease. Spatial variation in LD knowledge, risk perceptions, and behaviors in the population were measured using web survey data collected in 2012. These data were used as a proxy for the social-behavioral component of risk. Tick vector population densities were measured in the environment during field surveillance from 2007 to 2012 to provide an index of the ecological component of risk. Social-behavioral and ecological components of risk were combined with human population density to create integrated risk maps. Map predictions were validated by testing the association between high-risk areas and the current spatial distribution of human LD cases.

Results

Social-behavioral and ecological components of LD risk had markedly different distributions within the study region, suggesting that both factors should be considered for locally adapted interventions. The occurrence of human LD cases in a municipality was positively associated with tick density (p<0.01) but was not significantly associated with social-behavioral risk.

Conclusion

This study is an applied demonstration of how integrated social-behavioral and ecological risk maps can be created to assist decision-making. Social survey data are a valuable but underutilized source of information for understanding regional variation in LD exposure, and integrating this information into risk maps provides a novel approach for prioritizing and adapting interventions to the local characteristics of target populations. https://doi.org/10.1289/EHP1943.",2018-04-18 +27423895,TopPIC: a software tool for top-down mass spectrometry-based proteoform identification and characterization.,"Top-down mass spectrometry enables the observation of whole complex proteoforms in biological samples and provides crucial information complementary to bottom-up mass spectrometry. Because of the complexity of top-down mass spectra and proteoforms, it is a challenging problem to efficiently interpret top-down tandem mass spectra in high-throughput proteome-level proteomics studies. We present TopPIC, a tool that efficiently identifies and characterizes complex proteoforms with unknown primary structure alterations, such as amino acid mutations and post-translational modifications, by searching top-down tandem mass spectra against a protein database.

Availability and implementation

http://proteomics.informatics.iupui.edu/software/toppic/ CONTACT: xwliu@iupui.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-16 +30586682,Selection of nonlinear interactions by a forward stepwise algorithm: Application to identifying environmental chemical mixtures affecting health outcomes.,"In this paper, we propose a stepwise forward selection algorithm for detecting the effects of a set of correlated exposures and their interactions on a health outcome of interest when the underlying relationship could potentially be nonlinear. Though the proposed method is very general, our application in this paper remains to be on analysis of multiple pollutants and their interactions. Simultaneous exposure to multiple environmental pollutants could affect human health in a multitude of complex ways. For understanding the health effects of multiple environmental exposures, it is often important to identify and estimate complex interactions among exposures. However, this issue becomes analytically challenging in the presence of potential nonlinearity in the outcome-exposure response surface and a set of correlated exposures. Through simulation studies and analyses of test datasets that were simulated as a part of a data challenge in multipollutant modeling organized by the National Institute of Environmental Health Sciences (http://www.niehs.nih.gov/about/events/pastmtg/2015/statistical/), we illustrate the advantages of our proposed method in comparison with existing alternative approaches. A particular strength of our method is that it demonstrates very low false positives across empirical studies. Our method is also used to analyze a dataset that was released from the Health Outcomes and Measurement of the Environment Study as a benchmark beta-tester dataset as a part of the same workshop.",2018-12-26 +22053084,SMART 7: recent updates to the protein domain annotation resource.,"SMART (Simple Modular Architecture Research Tool) is an online resource (http://smart.embl.de/) for the identification and annotation of protein domains and the analysis of protein domain architectures. SMART version 7 contains manually curated models for 1009 protein domains, 200 more than in the previous version. The current release introduces several novel features and a streamlined user interface resulting in a faster and more comfortable workflow. The underlying protein databases were greatly expanded, resulting in a 2-fold increase in number of annotated domains and features. The database of completely sequenced genomes now includes 1133 species, compared to 630 in the previous release. Domain architecture analysis results can now be exported and visualized through the iTOL phylogenetic tree viewer. 'metaSMART' was introduced as a novel subresource dedicated to the exploration and analysis of domain architectures in various metagenomics data sets. An advanced full text search engine was implemented, covering the complete annotations for SMART and Pfam domains, as well as the complete set of protein descriptions, allowing users to quickly find relevant information.",2011-11-03 +30247766,tsscds2018: A code for automated discovery of chemical reaction mechanisms and solving the kinetics.,"A new software, called tsscds2018, has been developed to discover reaction mechanisms and solve the kinetics in a fully automated fashion. The program employs algorithms based on Graph Theory to find transition state (TS) geometries from accelerated semiempirical dynamics simulations carried out with MOPAC2016. Then, the TSs are connected to the corresponding minima and the reaction network is obtained. Kinetic data like populations vs time or the abundancies of each product can also be obtained with our program thanks to a Kinetic Monte Carlo routine. Highly accurate ab initio potential energy diagrams and kinetics can also be obtained using an interface with Gaussian09. The source code is available on the following site: http://forge.cesga.es/wiki/g/tsscds/HomePage © 2018 Wiley Periodicals, Inc.",2018-09-01 +22058132,PlantNATsDB: a comprehensive database of plant natural antisense transcripts.,"Natural antisense transcripts (NATs), as one type of regulatory RNAs, occur prevalently in plant genomes and play significant roles in physiological and pathological processes. Although their important biological functions have been reported widely, a comprehensive database is lacking up to now. Consequently, we constructed a plant NAT database (PlantNATsDB) involving approximately 2 million NAT pairs in 69 plant species. GO annotation and high-throughput small RNA sequencing data currently available were integrated to investigate the biological function of NATs. PlantNATsDB provides various user-friendly web interfaces to facilitate the presentation of NATs and an integrated, graphical network browser to display the complex networks formed by different NATs. Moreover, a 'Gene Set Analysis' module based on GO annotation was designed to dig out the statistical significantly overrepresented GO categories from the specific NAT network. PlantNATsDB is currently the most comprehensive resource of NATs in the plant kingdom, which can serve as a reference database to investigate the regulatory function of NATs. The PlantNATsDB is freely available at http://bis.zju.edu.cn/pnatdb/.",2011-11-03 +28025197,A novel approach based on KATZ measure to predict associations of human microbiota with non-infectious diseases.,"

Motivation

Accumulating clinical observations have indicated that microbes living in the human body are closely associated with a wide range of human noninfectious diseases, which provides promising insights into the complex disease mechanism understanding. Predicting microbe-disease associations could not only boost human disease diagnostic and prognostic, but also improve the new drug development. However, little efforts have been attempted to understand and predict human microbe-disease associations on a large scale until now.

Results

In this work, we constructed a microbe-human disease association network and further developed a novel computational model of KATZ measure for Human Microbe-Disease Association prediction (KATZHMDA) based on the assumption that functionally similar microbes tend to have similar interaction and non-interaction patterns with noninfectious diseases, and vice versa. To our knowledge, KATZHMDA is the first tool for microbe-disease association prediction. The reliable prediction performance could be attributed to the use of KATZ measurement, and the introduction of Gaussian interaction profile kernel similarity for microbes and diseases. LOOCV and k-fold cross validation were implemented to evaluate the effectiveness of this novel computational model based on known microbe-disease associations obtained from HMDAD database. As a result, KATZHMDA achieved reliable performance with average AUCs of 0.8130 ± 0.0054, 0.8301 ± 0.0033 and 0.8382 in 2-fold and 5-fold cross validation and LOOCV framework, respectively. It is anticipated that KATZHMDA could be used to obtain more novel microbes associated with important noninfectious human diseases and therefore benefit drug discovery and human medical improvement.

Availability and implementation

Matlab codes and dataset explored in this work are available at http://dwz.cn/4oX5mS .

Contacts

xingchen@amss.ac.cn or zhuhongyou@gmail.com or wangxuesongcumt@163.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +29356929,Dealing with the Conflicting Results of Psycholinguistic Experiments: How to Resolve Them with the Help of Statistical Meta-analysis.,"This paper proposes the use of the tools of statistical meta-analysis as a method of conflict resolution with respect to experiments in cognitive linguistics. With the help of statistical meta-analysis, the effect size of similar experiments can be compared, a well-founded and robust synthesis of the experimental data can be achieved, and possible causes of any divergence(s) in the outcomes can be revealed. This application of statistical meta-analysis offers a novel method of how diverging evidence can be dealt with. The workability of this idea is exemplified by a case study dealing with a series of experiments conducted as non-exact replications of Thibodeau and Boroditsky (PLoS ONE 6(2):e16782, 2011. https://doi.org/10.1371/journal.pone.0016782 ).",2018-08-01 +,MulSatDB: a first online database for mulberry microsatellites,"KEY MESSAGE : Simple sequence repeat motifs were mined from the genome and EST sequences of Morus notabilis and archived in MulSatDB. Bioinformatics tools were integrated with the database for the analysis of genomic datasets. Mulberry is a crop of economic importance in sericulture, which shapes the lives of millions of rural people among different Eurasian and Latin American countries. Limited availability of genomic resources has constrained the molecular breeding efforts in mulberry, a poorly studied crop. Microsatellite or simple sequence repeat (SSR) has revolutionized the plant breeding and is used in linkage mapping, association studies, diversity, and parentage analysis, etc. Recent availability of mulberry whole genome assembly provided an opportunity for the development of mulberry-specific DNA markers. In this study, we mined a total of 217,312 microsatellites from whole genome and 961 microsatellites from EST sequences of Morus notabilis. Mono-repeats were predominant among both whole genome and EST sequences. The SSR containing EST sequences were functionally annotated, and SSRs mined from whole genome were mapped on chromosomes of the phylogenetically related genus—Fragaria vesca, to aid the selection of markers based on the function and location. All the mined markers were archived in the mulberry microsatellite database (MulSatDB), and the markers can be retrieved based on different criteria like marker location, repeat kind, motif type and size. Primer3plus and CMap tools are integrated with the database to design primers for PCR amplification and to visualize markers on F. vesca chromosomes, respectively. A blast tool is also integrated to collate new markers with the database. MulSatDB is the first and complete destination for mulberry researchers to browse SSR markers, design primers, and locate markers on strawberry chromosomes. MulSatDB is freely accessible at http://btismysore.in/mulsatdb .",2014-12-01 +23505298,pfsearchV3: a code acceleration and heuristic to search PROSITE profiles.,"

Summary

The PROSITE resource provides a rich and well annotated source of signatures in the form of generalized profiles that allow protein domain detection and functional annotation. One of the major limiting factors in the application of PROSITE in genome and metagenome annotation pipelines is the time required to search protein sequence databases for putative matches. We describe an improved and optimized implementation of the PROSITE search tool pfsearch that, combined with a newly developed heuristic, addresses this limitation. On a modern x86_64 hyper-threaded quad-core desktop computer, the new pfsearchV3 is two orders of magnitude faster than the original algorithm.

Availability and implementation

Source code and binaries of pfsearchV3 are freely available for download at http://web.expasy.org/pftools/#pfsearchV3, implemented in C and supported on Linux. PROSITE generalized profiles including the heuristic cut-off scores are available at the same address.",2013-03-16 +29186328,Meta-analytic principal component analysis in integrative omics application.,"Motivation:With the prevalent usage of microarray and massively parallel sequencing, numerous high-throughput omics datasets have become available in the public domain. Integrating abundant information among omics datasets is critical to elucidate biological mechanisms. Due to the high-dimensional nature of the data, methods such as principal component analysis (PCA) have been widely applied, aiming at effective dimension reduction and exploratory visualization. Results:In this article, we combine multiple omics datasets of identical or similar biological hypothesis and introduce two variations of meta-analytic framework of PCA, namely MetaPCA. Regularization is further incorporated to facilitate sparse feature selection in MetaPCA. We apply MetaPCA and sparse MetaPCA to simulations, three transcriptomic meta-analysis studies in yeast cell cycle, prostate cancer, mouse metabolism and a TCGA pan-cancer methylation study. The result shows improved accuracy, robustness and exploratory visualization of the proposed framework. Availability and implementation:An R package MetaPCA is available online. (http://tsenglab.biostat.pitt.edu/software.htm). Contact:ctseng@pitt.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +30456231,Digital templates and brain atlas dataset for the mouse lemur primate.,"We present a dataset made of 3D digital brain templates and of an atlas of the gray mouse lemur (Microcebus murinus), a small prosimian primate of growing interest for studies of primate biology and evolution. A template image was constructed from in vivo magnetic resonance imaging (MRI) data of 34 animals. This template was then manually segmented into 40 cortical, 74 subcortical and 6 cerebro-spinal fluid (CSF) regions. Additionally, the dataset contains probability maps of gray matter, white matter and CSF. The template, manual segmentation and probability maps can be downloaded in NIfTI-1 format at https://www.nitrc.org/projects/mouselemuratlas. Further construction and validation details are given in ""A 3D population-based brain atlas of the mouse lemur primate with examples of applications in aging studies and comparative anatomy"" (Nadkarni et al., 2018) [1], which also presents applications of the atlas such as automatic assessment of regional age-associated cerebral atrophy and comparative neuroanatomy studies.",2018-10-25 +29688310,"RANGER-DTL 2.0: rigorous reconstruction of gene-family evolution by duplication, transfer and loss.","Summary:RANGER-DTL 2.0 is a software program for inferring gene family evolution using Duplication-Transfer-Loss reconciliation. This new software is highly scalable and easy to use, and offers many new features not currently available in any other reconciliation program. RANGER-DTL 2.0 has a particular focus on reconciliation accuracy and can account for many sources of reconciliation uncertainty including uncertain gene tree rooting, gene tree topological uncertainty, multiple optimal reconciliations and alternative event cost assignments. RANGER-DTL 2.0 is open-source and written in C++ and Python. Availability and implementation:Pre-compiled executables, source code (open-source under GNU GPL) and a detailed manual are freely available from http://compbio.engr.uconn.edu/software/RANGER-DTL/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-09-01 +30297807,Integration of gene expression and brain-wide connectivity reveals the multiscale organization of mouse hippocampal networks.,"Understanding the organization of the hippocampus is fundamental to understanding brain function related to learning, memory, emotions, and diseases such as Alzheimer's disease. Physiological studies in humans and rodents have suggested that there is both structural and functional heterogeneity along the longitudinal axis of the hippocampus. However, the recent discovery of discrete gene expression domains in the mouse hippocampus has provided the opportunity to re-evaluate hippocampal connectivity. To integrate mouse hippocampal gene expression and connectivity, we mapped the distribution of distinct gene expression patterns in mouse hippocampus and subiculum to create the Hippocampus Gene Expression Atlas (HGEA). Notably, previously unknown subiculum gene expression patterns revealed a hidden laminar organization. Guided by the HGEA, we constructed the most detailed hippocampal connectome available using Mouse Connectome Project ( http://www.mouseconnectome.org ) tract tracing data. Our results define the hippocampus' multiscale network organization and elucidate each subnetwork's unique brain-wide connectivity patterns.",2018-10-08 +23487186,The new modern era of yeast genomics: community sequencing and the resulting annotation of multiple Saccharomyces cerevisiae strains at the Saccharomyces Genome Database.,"The first completed eukaryotic genome sequence was that of the yeast Saccharomyces cerevisiae, and the Saccharomyces Genome Database (SGD; http://www.yeastgenome.org/) is the original model organism database. SGD remains the authoritative community resource for the S. cerevisiae reference genome sequence and its annotation, and continues to provide comprehensive biological information correlated with S. cerevisiae genes and their products. A diverse set of yeast strains have been sequenced to explore commercial and laboratory applications, and a brief history of those strains is provided. The publication of these new genomes has motivated the creation of new tools, and SGD will annotate and provide comparative analyses of these sequences, correlating changes with variations in strain phenotypes and protein function. We are entering a new era at SGD, as we incorporate these new sequences and make them accessible to the scientific community, all in an effort to continue in our mission of educating researchers and facilitating discovery.",2013-03-13 +24096364,Analysis of microRNA-target interactions across diverse cancer types.,"Little is known about the extent to which individual microRNAs (miRNAs) regulate common processes of tumor biology across diverse cancer types. Using molecular profiles of >3,000 tumors from 11 human cancer types in The Cancer Genome Atlas, we systematically analyzed expression of miRNAs and mRNAs across cancer types to infer recurrent cancer-associated miRNA-target relationships. As we expected, the inferred relationships were consistent with sequence-based predictions and published data from miRNA perturbation experiments. Notably, miRNAs with recurrent target relationships were frequently regulated by genetic and epigenetic alterations across the studied cancer types. We also identify new examples of miRNAs that coordinately regulate cancer pathways, including the miR-29 family, which recurrently regulates active DNA demethylation pathway members TET1 and TDG. The online resource http://cancerminer.org allows exploration and prioritization of miRNA-target interactions that potentially regulate tumorigenesis.",2013-10-06 +26264899,[Healthcare services research on pain in Germany. A survey].,"Within the last ten years healthcare services research has developed into an independent interdisciplinary field of research. A selective search of the literature was conducted in the database Google Scholar and the database on healthcare services research in Germany (http://versorgungsforschung-deutschland.de) for healthcare services research projects on pain in Germany. Healthcare services research projects were conducted by pharmaceutical companies, patient self-help organizations, scientific societies, statutory health insurance companies and university departments on acute and chronic pain. Valid data on the epidemiology, grading and treatment of chronic pain are available. There was an overuse of opioids and invasive procedures in patients with chronic low back pain, fibromyalgia syndrome and somatoform pain disorders. Databases for patients with chronic pain are currently constructed by pain societies. The fragmentation of data from health insurance companies, old age pension insurances, clinical institutions and population surveys and inconsistencies in diagnosing or encoding chronic pain impede the carrying out of significant longitudinal studies. Based on the data available, the needs of care for patients with chronic pain and the necessary care services cannot be derived. Important topics of future healthcare services research on pain are longitudinal studies on the cost efficacy and risks of inpatient and outpatient pain therapy based on routine data of health insurance companies, old age pension insurances and pain registries, longitudinal studies on ""patient careers"" (i.e. sequences of healthcare) and the identification of potential starting points for control of healthcare.",2015-10-01 +27563486,Pathology Informatics Essentials for Residents: A flexible informatics curriculum linked to Accreditation Council for Graduate Medical Education milestones.,"

Context

Recognition of the importance of informatics to the practice of pathology has surged. Training residents in pathology informatics have been a daunting task for most residency programs in the United States because faculty often lacks experience and training resources. Nevertheless, developing resident competence in informatics is essential for the future of pathology as a specialty.

Objective

The objective of the study is to develop and deliver a pathology informatics curriculum and instructional framework that guides pathology residency programs in training residents in critical pathology informatics knowledge and skills and meets Accreditation Council for Graduate Medical Education Informatics Milestones.

Design

The College of American Pathologists, Association of Pathology Chairs, and Association for Pathology Informatics formed a partnership and expert work group to identify critical pathology informatics training outcomes and to create a highly adaptable curriculum and instructional approach, supported by a multiyear change management strategy.

Results

Pathology Informatics Essentials for Residents (PIER) is a rigorous approach for educating all pathology residents in important pathology informatics knowledge and skills. PIER includes an instructional resource guide and toolkit for incorporating informatics training into residency programs that vary in needs, size, settings, and resources. PIER is available at http://www.apcprods.org/PIER (accessed April 6, 2016).

Conclusions

PIER is an important contribution to informatics training in pathology residency programs. PIER introduces pathology trainees to broadly useful informatics concepts and tools that are relevant to practice. PIER provides residency program directors with a means to implement a standardized informatics training curriculum, to adapt the approach to local program needs, and to evaluate resident performance and progress over time.",2016-07-06 +27899616,The TissueNet v.2 database: A quantitative view of protein-protein interactions across human tissues.,"Knowledge of the molecular interactions of human proteins within tissues is important for identifying their tissue-specific roles and for shedding light on tissue phenotypes. However, many protein-protein interactions (PPIs) have no tissue-contexts. The TissueNet database bridges this gap by associating experimentally-identified PPIs with human tissues that were shown to express both pair-mates. Users can select a protein and a tissue, and obtain a network view of the query protein and its tissue-associated PPIs. TissueNet v.2 is an updated version of the TissueNet database previously featured in NAR. It includes over 40 human tissues profiled via RNA-sequencing or protein-based assays. Users can select their preferred expression data source and interactively set the expression threshold for determining tissue-association. The output of TissueNet v.2 emphasizes qualitative and quantitative features of query proteins and their PPIs. The tissue-specificity view highlights tissue-specific and globally-expressed proteins, and the quantitative view highlights proteins that were differentially expressed in the selected tissue relative to all other tissues. Together, these views allow users to quickly assess the unique versus global functionality of query proteins. Thus, TissueNet v.2 offers an extensive, quantitative and user-friendly interface to study the roles of human proteins across tissues. TissueNet v.2 is available at http://netbio.bgu.ac.il/tissuenet.",2016-11-29 +26478614,National health accounts data from 1996 to 2010: a systematic review.,"

Objective

To collect, compile and evaluate publicly available national health accounts (NHA) reports produced worldwide between 1996 and 2010.

Methods

We downloaded country-generated NHA reports from the World Health Organization global health expenditure database and the Organisation for Economic Co-operation and Development (OECD) StatExtract website. We also obtained reports from Abt Associates, through contacts in individual countries and through an online search. We compiled data in the four main types used in these reports: (i) financing source; (ii) financing agent; (iii) health function; and (iv) health provider. We combined and adjusted data to conform with OECD's first edition of A system of health accounts manual, (2000).

Findings

We identified 872 NHA reports from 117 countries containing a total of 2936 matrices for the four data types. Most countries did not provide complete health expenditure data: only 252 of the 872 reports contained data in all four types. Thirty-eight countries reported an average not-specified-by-kind value greater than 20% for all data types and years. Some countries reported substantial year-on-year changes in both the level and composition of health expenditure that were probably produced by data-generation processes. All study data are publicly available at http://vizhub.healthdata.org/nha/.

Conclusion

Data from NHA reports on health expenditure are often incomplete and, in some cases, of questionable quality. Better data would help finance ministries allocate resources to health systems, assist health ministries in allocating capital within the health sector and enable researchers to make accurate comparisons between health systems.",2015-05-15 +27899600,Prediction of human miRNA target genes using computationally reconstructed ancestral mammalian sequences.,"MicroRNAs (miRNA) are short single-stranded RNA molecules derived from hairpin-forming precursors that play a crucial role as post-transcriptional regulators in eukaryotes and viruses. In the past years, many microRNA target genes (MTGs) have been identified experimentally. However, because of the high costs of experimental approaches, target genes databases remain incomplete. Although several target prediction programs have been developed in the recent years to identify MTGs in silico, their specificity and sensitivity remain low. Here, we propose a new approach called MirAncesTar, which uses ancestral genome reconstruction to boost the accuracy of existing MTGs prediction tools for human miRNAs. For each miRNA and each putative human target UTR, our algorithm makes uses of existing prediction tools to identify putative target sites in the human UTR, as well as in its mammalian orthologs and inferred ancestral sequences. It then evaluates evidence in support of selective pressure to maintain target site counts (rather than sequences), accounting for the possibility of target site turnover. It finally integrates this measure with several simpler ones using a logistic regression predictor. MirAncesTar improves the accuracy of existing MTG predictors by 26% to 157%. Source code and prediction results for human miRNAs, as well as supporting evolutionary data are available at http://cs.mcgill.ca/∼blanchem/mirancestar.",2016-11-29 +27749924,Matching the Diversity of Sulfated Biomolecules: Creation of a Classification Database for Sulfatases Reflecting Their Substrate Specificity.,"Sulfatases cleave sulfate groups from various molecules and constitute a biologically and industrially important group of enzymes. However, the number of sulfatases whose substrate has been characterized is limited in comparison to the huge diversity of sulfated compounds, yielding functional annotations of sulfatases particularly prone to flaws and misinterpretations. In the context of the explosion of genomic data, a classification system allowing a better prediction of substrate specificity and for setting the limit of functional annotations is urgently needed for sulfatases. Here, after an overview on the diversity of sulfated compounds and on the known sulfatases, we propose a classification database, SulfAtlas (http://abims.sb-roscoff.fr/sulfatlas/), based on sequence homology and composed of four families of sulfatases. The formylglycine-dependent sulfatases, which constitute the largest family, are also divided by phylogenetic approach into 73 subfamilies, each subfamily corresponding to either a known specificity or to an uncharacterized substrate. SulfAtlas summarizes information about the different families of sulfatases. Within a family a web page displays the list of its subfamilies (when they exist) and the list of EC numbers. The family or subfamily page shows some descriptors and a table with all the UniProt accession numbers linked to the databases UniProt, ExplorEnz, and PDB.",2016-10-17 +27507827,Glycomics for Microbes and Microbiologists. ,"The recent article ""Lectin-Glycan Interaction Network-Based Identification of Host Receptors of Microbial Pathogenic Adhesins"" by Ielasi et al. describes a new development in microbial carbohydrate analysis [Ielasi FS, Alioscha-Perez M, Donohue D, Claes S, Sahli H, Schols D, Willaert RG, mBio 7(4):e00584-16, 2016, http://dx.doi.org/10.1128/mbio.00584-16]. Specific carbohydrate ligands have been identified from the patterns of lectin binding to oligosaccharides printed on a chip. The new technique links the output to a comprehensive glycan database and offers a number of data visualization options. The graphs highlight the occurrence of potential ligands, organized by organism, tissue, and patterns of association with disease states. The analysis has successfully predicted novel glycoprotein ligands for microbial lectins, including an interaction of E. coli FimH with HIV gp120.",2016-08-09 +30952633,Long Noncoding RNA ELIT-1 Acts as a Smad3 Cofactor to Facilitate TGFβ/Smad Signaling and Promote Epithelial-Mesenchymal Transition.,"TGFβ is involved in various biological processes, including development, differentiation, growth regulation, and epithelial-mesenchymal transition (EMT). In TGFβ/Smad signaling, receptor-activated Smad complexes activate or repress their target gene promoters. Smad cofactors are a group of Smad-binding proteins that promote recruitment of Smad complexes to these promoters. Long noncoding RNAs (lncRNA), which behave as Smad cofactors, have thus far not been identified. Here, we characterize a novel lncRNA EMT-associated lncRNA induced by TGFβ1 (ELIT-1). ELIT-1 was induced by TGFβ stimulation via the TGFβ/Smad pathway in TGFβ-responsive cell lines. ELIT-1 depletion abrogated TGFβ-mediated EMT progression and expression of TGFβ target genes including Snail, a transcription factor critical for EMT. A positive correlation between high expression of ELIT-1 and poor prognosis in patients with lung adenocarcinoma and gastric cancer suggests that ELIT-1 may be useful as a prognostic and therapeutic target. RIP assays revealed that ELIT-1 bound to Smad3, but not Smad2. In conjunction with Smad3, ELIT-1 enhanced Smad-responsive promoter activities by recruiting Smad3 to the promoters of its target genes including Snail, other TGFβ target genes, and ELIT-1 itself. Collectively, these data show that ELIT-1 is a novel trans-acting lncRNA that forms a positive feedback loop to enhance TGFβ/Smad3 signaling and promote EMT progression. SIGNIFICANCE: This study identifies a novel lncRNA ELIT-1 and characterizes its role as a positive regulator of TGFβ/Smad3 signaling and EMT.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/79/11/2821/F1.large.jpg.",2019-04-05 +22965133,RhesusBase: a knowledgebase for the monkey research community.,"Although the rhesus macaque is a unique model for the translational study of human diseases, currently its use in biomedical research is still in its infant stage due to error-prone gene structures and limited annotations. Here, we present RhesusBase for the monkey research community (http://www.rhesusbase.org). We performed strand-specific RNA-Seq studies in 10 macaque tissues and generated 1.2 billion 90-bp paired-end reads, covering >97.4% of the putative exon in macaque transcripts annotated by Ensembl. We found that at least 28.7% of the macaque transcripts were previously mis-annotated, mainly due to incorrect exon-intron boundaries, incomplete untranslated regions (UTRs) and missed exons. Compared with the previous gene models, the revised transcripts show clearer sequence motifs near splicing junctions and the end of UTRs, as well as cleaner patterns of exon-intron distribution for expression tags and cross-species conservation scores. Strikingly, 1292 exon-intron boundary revisions between coding exons corrected the previously mis-annotated open reading frames. The revised gene models were experimentally verified in randomly selected cases. We further integrated functional genomics annotations from >60 categories of public and in-house resources and developed an online accessible database. User-friendly interfaces were developed to update, retrieve, visualize and download the RhesusBase meta-data, providing a 'one-stop' resource for the monkey research community.",2012-09-10 +29161291,DEclust: A statistical approach for obtaining differential expression profiles of multiple conditions.,"High-throughput RNA sequencing technology is widely used to comprehensively detect and quantify cellular gene expression. Thus, numerous analytical methods have been proposed for identifying differentially expressed genes (DEGs) between paired samples such as tumor and control specimens, but few studies have reported methods for analyzing differential expression under multiple conditions. We propose a novel method, DEclust, for differential expression analysis among more than two matched samples from distinct tissues or conditions. As compared to conventional clustering methods, DEclust more accurately extracts statistically significant gene clusters from multi-conditional transcriptome data, particularly when replicates of quantitative experiments are available. DEclust can be used for any multi-conditional transcriptome data, as well as for extending any DEG detection tool for paired samples to multiple samples. Accordingly, DEclust can be used for a wide range of applications for transcriptome data analysis. DEclust is freely available at http://www.dna.bio.keio.ac.jp/software/DEclust.",2017-11-21 +30224349,"A sequence-based, deep learning model accurately predicts RNA splicing branchpoints.","Experimental detection of RNA splicing branchpoints is difficult. To date, high-confidence experimental annotations exist for 18% of 3' splice sites in the human genome. We develop a deep-learning-based branchpoint predictor, LaBranchoR, which predicts a correct branchpoint for at least 75% of 3' splice sites genome-wide. Detailed analysis of cases in which our predicted branchpoint deviates from experimental data suggests a correct branchpoint is predicted in over 90% of cases. We use our predicted branchpoints to identify a novel sequence element upstream of branchpoints consistent with extended U2 snRNA base-pairing, show an association between weak branchpoints and alternative splicing, and explore the effects of genetic variants on branchpoints. We provide genome-wide branchpoint annotations and in silico mutagenesis scores at http://bejerano.stanford.edu/labranchor.",2018-09-17 +22363733,PrionHome: a database of prions and other sequences relevant to prion phenomena.,"Prions are units of propagation of an altered state of a protein or proteins; prions can propagate from organism to organism, through cooption of other protein copies. Prions contain no necessary nucleic acids, and are important both as both pathogenic agents, and as a potential force in epigenetic phenomena. The original prions were derived from a misfolded form of the mammalian Prion Protein PrP. Infection by these prions causes neurodegenerative diseases. Other prions cause non-Mendelian inheritance in budding yeast, and sometimes act as diseases of yeast. We report the bioinformatic construction of the PrionHome, a database of >2000 prion-related sequences. The data was collated from various public and private resources and filtered for redundancy. The data was then processed according to a transparent classification system of prionogenic sequences (i.e., sequences that can make prions), prionoids (i.e., proteins that propagate like prions between individual cells), and other prion-related phenomena. There are eight PrionHome classifications for sequences. The first four classifications are derived from experimental observations: prionogenic sequences, prionoids, other prion-related phenomena, and prion interactors. The second four classifications are derived from sequence analysis: orthologs, paralogs, pseudogenes, and candidate-prionogenic sequences. Database entries list: supporting information for PrionHome classifications, prion-determinant areas (where relevant), and disordered and compositionally-biased regions. Also included are literature references for the PrionHome classifications, transcripts and genomic coordinates, and structural data (including comparative models made for the PrionHome from manually curated alignments). We provide database usage examples for both vertebrate and fungal prion contexts. Using the database data, we have performed a detailed analysis of the compositional biases in known budding-yeast prionogenic sequences, showing that the only abundant bias pattern is for asparagine bias with subsidiary serine bias. We anticipate that this database will be a useful experimental aid and reference resource. It is freely available at: http://libaio.biol.mcgill.ca/prion.",2012-02-20 +21995777,"BRAD, the genetics and genomics database for Brassica plants.","

Background

Brassica species include both vegetable and oilseed crops, which are very important to the daily life of common human beings. Meanwhile, the Brassica species represent an excellent system for studying numerous aspects of plant biology, specifically for the analysis of genome evolution following polyploidy, so it is also very important for scientific research. Now, the genome of Brassica rapa has already been assembled, it is the time to do deep mining of the genome data.

Description

BRAD, the Brassica database, is a web-based resource focusing on genome scale genetic and genomic data for important Brassica crops. BRAD was built based on the first whole genome sequence and on further data analysis of the Brassica A genome species, Brassica rapa (Chiifu-401-42). It provides datasets, such as the complete genome sequence of B. rapa, which was de novo assembled from Illumina GA II short reads and from BAC clone sequences, predicted genes and associated annotations, non coding RNAs, transposable elements (TE), B. rapa genes' orthologous to those in A. thaliana, as well as genetic markers and linkage maps. BRAD offers useful searching and data mining tools, including search across annotation datasets, search for syntenic or non-syntenic orthologs, and to search the flanking regions of a certain target, as well as the tools of BLAST and Gbrowse. BRAD allows users to enter almost any kind of information, such as a B. rapa or A. thaliana gene ID, physical position or genetic marker.

Conclusion

BRAD, a new database which focuses on the genetics and genomics of the Brassica plants has been developed, it aims at helping scientists and breeders to fully and efficiently use the information of genome data of Brassica plants. BRAD will be continuously updated and can be accessed through http://brassicadb.org.",2011-10-13 +27383543,Pathology Informatics Essentials for Residents: A Flexible Informatics Curriculum Linked to Accreditation Council for Graduate Medical Education Milestones.,"

Context

-Recognition of the importance of informatics to the practice of pathology has surged. Training residents in pathology informatics has been a daunting task for most residency programs in the United States because faculty often lacks experience and training resources. Nevertheless, developing resident competence in informatics is essential for the future of pathology as a specialty.

Objective

-To develop and deliver a pathology informatics curriculum and instructional framework that guides pathology residency programs in training residents in critical pathology informatics knowledge and skills, and meets Accreditation Council for Graduate Medical Education Informatics Milestones.

Design

-The College of American Pathologists, Association of Pathology Chairs, and Association for Pathology Informatics formed a partnership and expert work group to identify critical pathology informatics training outcomes and to create a highly adaptable curriculum and instructional approach, supported by a multiyear change management strategy.

Results

-Pathology Informatics Essentials for Residents (PIER) is a rigorous approach for educating all pathology residents in important pathology informatics knowledge and skills. PIER includes an instructional resource guide and toolkit for incorporating informatics training into residency programs that vary in needs, size, settings, and resources. PIER is available at http://www.apcprods.org/PIER (accessed April 6, 2016).

Conclusions

-PIER is an important contribution to informatics training in pathology residency programs. PIER introduces pathology trainees to broadly useful informatics concepts and tools that are relevant to practice. PIER provides residency program directors with a means to implement a standardized informatics training curriculum, to adapt the approach to local program needs, and to evaluate resident performance and progress over time.",2016-07-06 +28185557,Reconstruction of ancestral RNA sequences under multiple structural constraints.,"

Background

Secondary structures form the scaffold of multiple sequence alignment of non-coding RNA (ncRNA) families. An accurate reconstruction of ancestral ncRNAs must use this structural signal. However, the inference of ancestors of a single ncRNA family with a single consensus structure may bias the results towards sequences with high affinity to this structure, which are far from the true ancestors.

Methods

In this paper, we introduce achARNement, a maximum parsimony approach that, given two alignments of homologous ncRNA families with consensus secondary structures and a phylogenetic tree, simultaneously calculates ancestral RNA sequences for these two families.

Results

We test our methodology on simulated data sets, and show that achARNement outperforms classical maximum parsimony approaches in terms of accuracy, but also reduces by several orders of magnitude the number of candidate sequences. To conclude this study, we apply our algorithms on the Glm clan and the FinP-traJ clan from the Rfam database.

Conclusions

Our results show that our methods reconstruct small sets of high-quality candidate ancestors with better agreement to the two target structures than with classical approaches. Our program is freely available at: http://csb.cs.mcgill.ca/acharnement .",2016-11-11 +28800158,Using KBase to Assemble and Annotate Prokaryotic Genomes.,"The DOE Systems Biology Knowledgebase (KBase, http://kbase.us/) is an open-access bioinformatics software and data platform for analyzing plants, microbes, and their communities. KBase enables scientists to create, execute, collaborate on, and share reproducible analyses of their biological data in the context of public data and private collaborator data. For microbiologists researching prokaryotes, KBase offers analysis tools for performing quality control and assessment of Next-Generation Sequencing reads, de novo assembly, genome annotation, and tools for analyzing structural and functional features of genomes. This unit demonstrates an example workflow for taking a comparative and iterative approach to assembly and annotation of prokaryotic genomes using KBase that can be used by microbiologists seeking to perform isolate analysis in a rapid and reproducible fashion. © 2017 by John Wiley & Sons, Inc.",2017-08-11 +27199843,The Experiment Factory: Standardizing Behavioral Experiments.,"The administration of behavioral and experimental paradigms for psychology research is hindered by lack of a coordinated effort to develop and deploy standardized paradigms. While several frameworks (Mason and Suri, 2011; McDonnell et al., 2012; de Leeuw, 2015; Lange et al., 2015) have provided infrastructure and methods for individual research groups to develop paradigms, missing is a coordinated effort to develop paradigms linked with a system to easily deploy them. This disorganization leads to redundancy in development, divergent implementations of conceptually identical tasks, disorganized and error-prone code lacking documentation, and difficulty in replication. The ongoing reproducibility crisis in psychology and neuroscience research (Baker, 2015; Open Science Collaboration, 2015) highlights the urgency of this challenge: reproducible research in behavioral psychology is conditional on deployment of equivalent experiments. A large, accessible repository of experiments for researchers to develop collaboratively is most efficiently accomplished through an open source framework. Here we present the Experiment Factory, an open source framework for the development and deployment of web-based experiments. The modular infrastructure includes experiments, virtual machines for local or cloud deployment, and an application to drive these components and provide developers with functions and tools for further extension. We release this infrastructure with a deployment (http://www.expfactory.org) that researchers are currently using to run a set of over 80 standardized web-based experiments on Amazon Mechanical Turk. By providing open source tools for both deployment and development, this novel infrastructure holds promise to bring reproducibility to the administration of experiments, and accelerate scientific progress by providing a shared community resource of psychological paradigms.",2016-04-26 +28369256,Entropy-based consensus clustering for patient stratification.,"

Motivation

Patient stratification or disease subtyping is crucial for precision medicine and personalized treatment of complex diseases. The increasing availability of high-throughput molecular data provides a great opportunity for patient stratification. Many clustering methods have been employed to tackle this problem in a purely data-driven manner. Yet, existing methods leveraging high-throughput molecular data often suffers from various limitations, e.g. noise, data heterogeneity, high dimensionality or poor interpretability.

Results

Here we introduced an Entropy-based Consensus Clustering (ECC) method that overcomes those limitations all together. Our ECC method employs an entropy-based utility function to fuse many basic partitions to a consensus one that agrees with the basic ones as much as possible. Maximizing the utility function in ECC has a much more meaningful interpretation than any other consensus clustering methods. Moreover, we exactly map the complex utility maximization problem to the classic K -means clustering problem, which can then be efficiently solved with linear time and space complexity. Our ECC method can also naturally integrate multiple molecular data types measured from the same set of subjects, and easily handle missing values without any imputation. We applied ECC to 110 synthetic and 48 real datasets, including 35 cancer gene expression benchmark datasets and 13 cancer types with four molecular data types from The Cancer Genome Atlas. We found that ECC shows superior performance against existing clustering methods. Our results clearly demonstrate the power of ECC in clinically relevant patient stratification.

Availability and implementation

The Matlab package is available at http://scholar.harvard.edu/yyl/ecc .

Contact

yunfu@ece.neu.edu or yyl@channing.harvard.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +25883200,Reliability of biological variation data available in an online database: need for improvement.,"

Background

Biological variation (BV) data enable assessment of the significance of changes in serial measurements observed within a subject and are used to set analytical quality specifications. This data is available in a database held in Westgard website (http://www.westgard.com/biodatabase1.htm). Some limitations of this data, however, have been identified in recent published reviews. The aim of this paper is to show the reliability of the published BV data and to identify ongoing works to address some of its limitations.

Methods

The BV data currently hosted on the Westgard website was examined. Distribution of measurands stratified by the number of cited references upon which the database entry is based and the distribution of papers stratified by publication year, are shown. Moreover, BV data available in literature for glycated hemoglobin, C-reactive protein, glycated albumin, alanine aminotransferase, aspartate aminotransferase and γ-glutamyl transferase are evaluated.

Results

The results obtained show that most BV data come just from a few papers or only one paper and that a lot of publications are dated, therefore this data is too obsolete to be used. Furthermore critical review of the BV database highlights a number of factors that might impact on the reliability of the BV data entries and translation into current practice.

Conclusions

A number of issues clearly undermine the value of the current database. These issues are being considered by the European Federation of Clinical Chemistry and Laboratory Medicine, biological variation working group, in collaboration with a Spanish group responsible for the database updating.",2015-05-01 +26742798,Online Tool to Improve Stratification of Adverse Events in Stroke Clinical Trials.,"

Background and purpose

Knowing characteristic adverse events (AEs) and their incidence among patients participating in acute stroke trials may assist interpretation of future studies. We aimed to develop an online tool to inform stroke trial safety.

Methods

We identified relevant AEs from patients within the Virtual International Stroke Trials Archive (VISTA), using receiver operating characteristic principles. We modeled their incidence on patient age, baseline National Institutes of Health Stroke Scale, and comorbidities using binary logistic regression. Models with an R(2) >5% were deemed powerful enough to predict expected AE incidences and were included. The calculator was developed using programs R and Visual Studios.

Results

Forty-eight of the most common AEs were identified and incorporated into the IschAEmic Stroke Calculator. The calculator, publicly available at http://www.vistacollaboration.org calculates the expected incidence of AEs or groups of AEs in a trial cohort and where possible compares them with the observed incidence.

Conclusions

The IschAEmic Stroke Calculator is an open access resource to support safety interpretation within acute stroke trials. Prediction of AEs with higher likelihood of occurrence may direct preventive clinical measures.",2016-01-07 +29932276,BCCIP binds to and activates its promoter in a YY1-dependent fashion in HCT116 cells.,"The restriction of Yin Yang 1 (YY1) at BRCA2 and CDKN1A/p21-interacting protein (BCCIP) transcriptional start site (TSS) proximal region in several human cancer cell lines was found by analyzation of ChIP-Seq database from UCSC Genome Browser (http://genome.ucsc.edu). However, whether the stabilization of YY1 by BCCIP impacts its recruitment in the BCCIP promoter region is unclear. Here, we present evidence that transcriptional regulation of YY1 on BCCIP is closely related to YY1 stability in HCT116 human colon cancer cells. YY1 stabilization was in turn regulated by BCCIP, suggesting the existence of a BCCIP-YY1 feedback loop in regulating BCCIP transcription by the YY1. Overexpression of BCCIP stabilized YY1 while knockdown of BCCIP reduced YY1 protein level. In addition, direct interaction between YY1 and BCCIP was confirmed by coimmunoprecipitation approach. Also, the N-terminus region of BCCIP, including the internal conserved domain (ICD), was responsible for binding with the amino acid 146-270 of YY1. More importantly, YY1 stability was related to the BCCIP/ICD domain-mediated YY1 ubiquitination pathway. Moreover, a limited BCCIP promoter region containing YY1 binding site (CCGCCATC) was tightly associated with the pGL4-BCCIP-Luc luciferase activity. In ChIP assays, shBCCIP lentiviral-mediated YY1 instability decreased recruitment of the YY1 at BCCIP TSS proximal region, which could not be restored by YY1 overexpression. Furthermore, knockdown of YY1 inhibited the binding of BCCIP itself at BCCIP promoter region proximal to TSS, demonstrating that transcriptional regulation of the YY1 on BCCIP can be modulated by BCCIP itself in a YY1-dependent fashion.",2018-07-04 +30828804,"Crosstalk between mitochondria, calcium channels and actin cytoskeleton modulates noradrenergic activity of locus coeruleus neurons.","Locus coeruleus (LC) is the name of a group of large sized neurons located at the brain stem, which provides the main source of noradrenaline to the central nervous system, virtually, innervating the whole brain. All noradrenergic signalling provided by this nucleus is dependent on an intrinsic pacemaker process. Our study aims to understand how noradrenergic neurons finely tune their pacemaker processes and regulate their activities. Here we present that mitochondrial perturbation in the LC from mice, inhibits spontaneous firing by a hyperpolarizing response that involves Ca2+ entry via L-type Ca2+ channels and the actin cytoskeleton. We found that pharmacological perturbation of mitochondria from LC neurons using the protonophore carbonyl cyanide m-chlorophenylhydrazone (CCCP), induced a dominant hyperpolarizing response when electrophysiological approaches were performed. Surprisingly, the CCCP-induced hyperpolarizing response was dependent on L-type Ca2+ channel-mediated Ca2+ entry, as it was inhibited by: the removal of extracellular Ca2+ ; the addition of Cd2+ ; nifedipine or nicardipine; but not by the intracellular dialysis with the Ca2+ chelator 1,2-Bis(2-Aminophenoxy)ethane-N,N,N',N'-tetraacetic acid, the latter indicating that the response was not because of a global change in [Ca2+ ]c but does not exclude action at intracellular microdomains. Further to this, the incubation of slices with cytochalasin D, an agent that depolymerises the actin cytoskeleton, inhibited the hyperpolarizing response indicating an involvement of the actin cytoskeleton. The data are consistent with the hypothesis that there is a crosstalk between mitochondria and L-type Ca2+ channels leading to modulation of noradrenergic neuronal activity mediated by the actin cytoskeleton. OPEN SCIENCE BADGES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/.",2019-04-02 +28950365,Modeling Site Heterogeneity with Posterior Mean Site Frequency Profiles Accelerates Accurate Phylogenomic Estimation.,"Proteins have distinct structural and functional constraints at different sites that lead to site-specific preferences for particular amino acid residues as the sequences evolve. Heterogeneity in the amino acid substitution process between sites is not modeled by commonly used empirical amino acid exchange matrices. Such model misspecification can lead to artefacts in phylogenetic estimation such as long-branch attraction. Although sophisticated site-heterogeneous mixture models have been developed to address this problem in both Bayesian and maximum likelihood (ML) frameworks, their formidable computational time and memory usage severely limits their use in large phylogenomic analyses. Here we propose a posterior mean site frequency (PMSF) method as a rapid and efficient approximation to full empirical profile mixture models for ML analysis. The PMSF approach assigns a conditional mean amino acid frequency profile to each site calculated based on a mixture model fitted to the data using a preliminary guide tree. These PMSF profiles can then be used for in-depth tree-searching in place of the full mixture model. Compared with widely used empirical mixture models with $k$ classes, our implementation of PMSF in IQ-TREE (http://www.iqtree.org) speeds up the computation by approximately $k$/1.5-fold and requires a small fraction of the RAM. Furthermore, this speedup allows, for the first time, full nonparametric bootstrap analyses to be conducted under complex site-heterogeneous models on large concatenated data matrices. Our simulations and empirical data analyses demonstrate that PMSF can effectively ameliorate long-branch attraction artefacts. In some empirical and simulation settings PMSF provided more accurate estimates of phylogenies than the mixture models from which they derive.",2018-03-01 +28222677,Molecular characterization of invasive capsule null Neisseria meningitidis in South Africa.,"

Background

The meningococcal capsule is an important virulence determinant. Unencapsulated meningococci lacking capsule biosynthesis genes and containing the capsule null locus (cnl) are predominantly non-pathogenic. Rare cases of invasive meningococcal disease caused by cnl isolates belonging to sequence types (ST) and clonal complexes (cc) ST-845 (cc845), ST-198 (cc198), ST-192 (cc192) and ST-53 (cc53) have been documented. The clinical significance of these isolates however remains unclear. We identified four invasive cnl meningococci through laboratory-based surveillance in South Africa from 2003 through 2013, which we aimed to characterize using whole genome data.

Results

One isolate [NG: P1.7-2,30: F1-2: ST-53 (cc53)] contained cnl allele 12, and caused empyema in an adult male with bronchiectasis from tuberculosis, diabetes mellitus and a smoking history. Three isolates were NG: P1.18-11,42-2: FΔ: ST-192 (cc192) and contained cnl allele 2. One patient was an adolescent male with meningitis. The remaining two isolates were from recurrent disease episodes (8 months apart) in a male child with deficiency of the sixth complement component, and with the exception of two single nucleotide polymorphisms, contained identical core genomes. The ST-53 (cc53) isolate possessed alleles for NHBA peptide 191 and fHbp variant 2; whilst the ST-192 (cc192) isolates contained NHBA peptide 704 and fHbp variant 3. All four isolates lacked nadA. Comparison of the South African genomes to 61 additional cnl genomes on the PubMLST Neisseria database ( http://pubmlst.org/neisseria/ ), determined that most putative virulence genes could be found in both invasive and carriage phenotypes.

Conclusions

Although rare, invasive disease by cnl meningococci may be associated with host immunodeficiency and such patients may benefit from protein-based meningococcal vaccines.",2017-02-21 +30130647,Reproducible evaluation of classification methods in Alzheimer's disease: Framework and application to MRI and PET data.,"A large number of papers have introduced novel machine learning and feature extraction methods for automatic classification of Alzheimer's disease (AD). However, while the vast majority of these works use the public dataset ADNI for evaluation, they are difficult to reproduce because different key components of the validation are often not readily available. These components include selected participants and input data, image preprocessing and cross-validation procedures. The performance of the different approaches is also difficult to compare objectively. In particular, it is often difficult to assess which part of the method (e.g. preprocessing, feature extraction or classification algorithms) provides a real improvement, if any. In the present paper, we propose a framework for reproducible and objective classification experiments in AD using three publicly available datasets (ADNI, AIBL and OASIS). The framework comprises: i) automatic conversion of the three datasets into a standard format (BIDS); ii) a modular set of preprocessing pipelines, feature extraction and classification methods, together with an evaluation framework, that provide a baseline for benchmarking the different components. We demonstrate the use of the framework for a large-scale evaluation on 1960 participants using T1 MRI and FDG PET data. In this evaluation, we assess the influence of different modalities, preprocessing, feature types (regional or voxel-based features), classifiers, training set sizes and datasets. Performances were in line with the state-of-the-art. FDG PET outperformed T1 MRI for all classification tasks. No difference in performance was found for the use of different atlases, image smoothing, partial volume correction of FDG PET images, or feature type. Linear SVM and L2-logistic regression resulted in similar performance and both outperformed random forests. The classification performance increased along with the number of subjects used for training. Classifiers trained on ADNI generalized well to AIBL and OASIS. All the code of the framework and the experiments is publicly available: general-purpose tools have been integrated into the Clinica software (www.clinica.run) and the paper-specific code is available at: https://gitlab.icm-institute.org/aramislab/AD-ML.",2018-08-18 +29913065,OptoBase: A Web Platform for Molecular Optogenetics.,"OptoBase is an online platform for molecular optogenetics. At its core is a hand-annotated and ontology-supported database that aims to cover all existing optogenetic switches and publications, which is further complemented with a collection of convenient optogenetics-related web tools. OptoBase is meant both for expert optogeneticists to easily keep track of the field, as well as for all researchers who find optogenetics inviting as a powerful tool to address their biological questions of interest. It is available at https://www.optobase.org . This work also presents OptoBase-based analysis of the trends in molecular optogenetics.",2018-07-03 +30038721,Second-line chemotherapy for the treatment of metastatic pancreatic cancer after first-line gemcitabine-based chemotherapy: a network meta-analysis.,"Guidelines for treatment of metastatic pancreatic cancer recommend a second line based on Fluoropyrimidine (FP) alone or in combination with Oxaliplatin (OXA) or Irinotecan (IRI) after a first line treatment based on Gemcitabine (GEM). We conducted a Bayesian network meta-analysis to compare currently available therapies to treat metastatic pancreatic cancer in the second line, considering as efficacy measures overall survival (OS) and progression free survival (PFS). Published randomized trials were identified using electronic databases (MEDLINE, PubMed, https://clinicaltrials.gov/ and American Society of clinical oncology). 8 studies met the inclusion criteria for a total of 1,587 patients and 7 different therapeutic schemes. The results suggested that the use of IRI-FP-Folinic Acid scheme in the second-line treatment of metastatic pancreatic cancer may offer a benefit in terms of OS and PFS for patients not previously treated with these drugs.",2018-07-03 +26989149,R-Syst::diatom: an open-access and curated barcode database for diatoms and freshwater monitoring. ,"Diatoms are micro-algal indicators of freshwater pollution. Current standardized methodologies are based on microscopic determinations, which is time consuming and prone to identification uncertainties. The use of DNA-barcoding has been proposed as a way to avoid these flaws. Combining barcoding with next-generation sequencing enables collection of a large quantity of barcodes from natural samples. These barcodes are identified as certain diatom taxa by comparing the sequences to a reference barcoding library using algorithms. Proof of concept was recently demonstrated for synthetic and natural communities and underlined the importance of the quality of this reference library. We present an open-access and curated reference barcoding database for diatoms, called R-Syst::diatom, developed in the framework of R-Syst, the network of systematic supported by INRA (French National Institute for Agricultural Research), see http://www.rsyst.inra.fr/en. R-Syst::diatom links DNA-barcodes to their taxonomical identifications, and is dedicated to identify barcodes from natural samples. The data come from two sources, a culture collection of freshwater algae maintained in INRA in which new strains are regularly deposited and barcoded and from the NCBI (National Center for Biotechnology Information) nucleotide database. Two kinds of barcodes were chosen to support the database: 18S (18S ribosomal RNA) and rbcL (Ribulose-1,5-bisphosphate carboxylase/oxygenase), because of their efficiency. Data are curated using innovative (Declic) and classical bioinformatic tools (Blast, classical phylogenies) and up-to-date taxonomy (Catalogues and peer reviewed papers). Every 6 months R-Syst::diatom is updated. The database is available through the R-Syst microalgae website (http://www.rsyst.inra.fr/) and a platform dedicated to next-generation sequencing data analysis, virtual_BiodiversityL@b (https://galaxy-pgtp.pierroton.inra.fr/). We present here the content of the library regarding the number of barcodes and diatom taxa. In addition to these information, morphological features (e.g. biovolumes, chloroplasts…), life-forms (mobility, colony-type) or ecological features (taxa preferenda to pollution) are indicated in R-Syst::diatom. Database URL: http://www.rsyst.inra.fr/.",2016-03-17 +31017859,Diagnostic Accuracy of the Sampling Utterances and Grammatical Analysis Revised (SUGAR) Measures for Identifying Children With Language Impairment.,"Purpose The purpose of this study was twofold: (a) to determine the diagnostic accuracy of the four Sampling Utterances and Grammatical Analysis Revised (SUGAR) metrics, including total number of words, mean length of utteranceSUGAR, words per sentence, and clauses per sentence in differentiating children with language impairment (LI) from those with typical language development, and (b) to compare the average time to collect, transcribe, and analyze 50-utterance language samples for children with LI to those with typical language development. Method Participants were 306 children (LI, 36; typical language development, 270) who ranged in age from 3;0 (years;months) to 7;11. Fifty-utterance conversational language samples were obtained using a conversational protocol. The four SUGAR metrics were calculated from the samples. Results Cut scores of -1 SD for mean length of utteranceSUGAR and -1.25 cut score for clauses per sentence resulted in sensitivity of 97.22%, specificity of 82.96%, a positive likelihood ratio of 5.71, and a negative likelihood ratio of 0.03. On average, it took a total time of 20:20 min ( SD = 4:37, range: 13:11-30:25) to collect, transcribe, and analyze language samples for children with LI. Children with LI took significantly less time to produce 50 utterances, when compared to their typically developing peers. There were no significant differences in the time to transcribe and analyze language samples of children with LI compared to their typically developing peers. Conclusions The SUGAR metrics, in combination with other data sources (e.g., standardized testing, dynamic assessment, observation), can be used to identify preschool- and early elementary-aged children with LI. Furthermore, for children with LI, language sampling and analysis using the SUGAR method can be completed in approximately 20 min. The results of this study indicated the SUGAR measures can effectively and efficiently help in identifying LI. Supplemental Material https://doi.org/10.23641/asha.7728638.",2019-04-01 +29771363,Multiple Sequence Alignment Averaging Improves Phylogeny Reconstruction.,"The classic methodology of inferring a phylogenetic tree from sequence data is composed of two steps. First, a multiple sequence alignment (MSA) is computed. Then, a tree is reconstructed assuming the MSA is correct. Yet, inferred MSAs were shown to be inaccurate and alignment errors reduce tree inference accuracy. It was previously proposed that filtering unreliable alignment regions can increase the accuracy of tree inference. However, it was also demonstrated that the benefit of this filtering is often obscured by the resulting loss of phylogenetic signal. In this work we explore an approach, in which instead of relying on a single MSA, we generate a large set of alternative MSAs and concatenate them into a single SuperMSA. By doing so, we account for phylogenetic signals contained in columns that are not present in the single MSA computed by alignment algorithms. Using simulations, we demonstrate that this approach results, on average, in more accurate trees compared to 1) using an unfiltered MSA and 2) using a single MSA with weights assigned to columns according to their reliability. Next, we explore in which regions of the MSA space our approach is expected to be beneficial. Finally, we provide a simple criterion for deciding whether or not the extra effort of computing a SuperMSA and inferring a tree from it is beneficial. Based on these assessments, we expect our methodology to be useful for many cases in which diverged sequences are analyzed. The option to generate such a SuperMSA is available at http://guidance.tau.ac.il.",2019-01-01 +29741575,REGGAE: a novel approach for the identification of key transcriptional regulators.,"Motivation:Transcriptional regulators play a major role in most biological processes. Alterations in their activities are associated with a variety of diseases and in particular with tumor development and progression. Hence, it is important to assess the effects of deregulated regulators on pathological processes. Results:Here, we present REGulator-Gene Association Enrichment (REGGAE), a novel method for the identification of key transcriptional regulators that have a significant effect on the expression of a given set of genes, e.g. genes that are differentially expressed between two sample groups. REGGAE uses a Kolmogorov-Smirnov-like test statistic that implicitly combines associations between regulators and their target genes with an enrichment approach to prioritize the influence of transcriptional regulators. We evaluated our method in two different application scenarios, which demonstrate that REGGAE is well suited for uncovering the influence of transcriptional regulators and is a valuable tool for the elucidation of complex regulatory mechanisms. Availability and implementation:REGGAE is freely available at https://regulatortrail.bioinf.uni-sb.de. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +27010073,dbPAF: an integrative database of protein phosphorylation in animals and fungi.,"Protein phosphorylation is one of the most important post-translational modifications (PTMs) and regulates a broad spectrum of biological processes. Recent progresses in phosphoproteomic identifications have generated a flood of phosphorylation sites, while the integration of these sites is an urgent need. In this work, we developed a curated database of dbPAF, containing known phosphorylation sites in H. sapiens, M. musculus, R. norvegicus, D. melanogaster, C. elegans, S. pombe and S. cerevisiae. From the scientific literature and public databases, we totally collected and integrated 54,148 phosphoproteins with 483,001 phosphorylation sites. Multiple options were provided for accessing the data, while original references and other annotations were also present for each phosphoprotein. Based on the new data set, we computationally detected significantly over-represented sequence motifs around phosphorylation sites, predicted potential kinases that are responsible for the modification of collected phospho-sites, and evolutionarily analyzed phosphorylation conservation states across different species. Besides to be largely consistent with previous reports, our results also proposed new features of phospho-regulation. Taken together, our database can be useful for further analyses of protein phosphorylation in human and other model organisms. The dbPAF database was implemented in PHP + MySQL and freely available at http://dbpaf.biocuckoo.org.",2016-03-24 +30457571,Generation and quality control of lipidomics data for the alzheimer's disease neuroimaging initiative cohort.,"Alzheimer's disease (AD) is a major public health priority with a large socioeconomic burden and complex etiology. The Alzheimer Disease Metabolomics Consortium (ADMC) and the Alzheimer Disease Neuroimaging Initiative (ADNI) aim to gain new biological insights in the disease etiology. We report here an untargeted lipidomics of serum specimens of 806 subjects within the ADNI1 cohort (188 AD, 392 mild cognitive impairment and 226 cognitively normal subjects) along with 83 quality control samples. Lipids were detected and measured using an ultra-high-performance liquid chromatography quadruple/time-of-flight mass spectrometry (UHPLC-QTOF MS) instrument operated in both negative and positive electrospray ionization modes. The dataset includes a total 513 unique lipid species out of which 341 are known lipids. For over 95% of the detected lipids, a relative standard deviation of better than 20% was achieved in the quality control samples, indicating high technical reproducibility. Association modeling of this dataset and available clinical, metabolomics and drug-use data will provide novel insights into the AD etiology. These datasets are available at the ADNI repository at http://adni.loni.usc.edu/.",2018-11-20 +30539550,Decoding the Atlas of RNA Modifications from Epitranscriptome Sequencing Data.,"Over 100 types of chemical modifications have been identified in protein-coding and noncoding RNAs (ncRNAs). However, the prevalence, regulation, and function of diverse RNA modifications remain largely unknown. In this chapter, we describe how to annotate, visualize, and analyze the RNA modification sites from the high-throughput epitranscriptome sequencing data using RMBase platform and software. We developed two stand-alone computational software, modAnnotator and metaProfile, to annotate and visualize RNA modification sites and their prevalence in the gene body. In addition, we constructed interactive web implementations to decode the atlas of various RNA modifications, including the N6-methyladenosine (m6A) modification, pseudouridine (Ψ) modification, 5-methylcytosine (m5C) modification, and 2'-O-methylation (2'-O-Me) modification, as well as other types of modifications. We also developed web-based interfaces to analyze the associations between RNA modification sites with miRNA target sites and disease-related single-nucleotide polymorphisms (SNPs). Moreover, RMBase provides a genome browser and a web-based modTool to query, annotate, and visualize various RNA modifications. RMBase is expected to provide comprehensive interfaces and tools to facilitate the analysis and functional study of the massive RNA modification sites. The software and platform are available at http://rna.sysu.edu.cn/rmbase/modSoftware.php .",2019-01-01 +30010787,Comparative assessment of different familial aggregation methods in the context of large and unstructured pedigrees.,"

Motivation

Familial aggregation analysis is an important early step for characterizing the genetic determinants of phenotypes in epidemiological studies. To facilitate this analysis, a collection of methods to detect familial aggregation in large pedigrees has been made available recently. However, efficacy of these methods in real world scenarios remains largely unknown. Here, we assess the performance of five aggregation methods to identify individuals or groups of related individuals affected by a Mendelian trait within a large set of decoys. We investigate method performance under a representative set of combinations of causal variant penetrance, trait prevalence and number of affected generations in the pedigree. These methods are then applied to assess familial aggregation of familial hypercholesterolemia and stroke, in the context of the Cooperative Health Research in South Tyrol (CHRIS) study.

Results

We find that in some situations statistical hypothesis testing with a binomial null distribution achieves performance similar to methods that are based on kinship information, while kinship based methods perform better when information is available on fewer generations. Potential case families from the CHRIS study are reported and the results are discussed taking into account insights from the performance assessment.

Availability and implementation

The familial aggregation analysis package is freely available at the Bioconductor repository, http://www.bioconductor.org/packages/FamAgg.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +31166133,Long-Term PM10 Exposure and Cause-Specific Mortality in the Latium Region (Italy): A Difference-in-Differences Approach.,"

Background

The link between particulate matter (PM) exposure and adverse health outcomes has been widely evaluated using large cohort studies. However, the possibility of residual confounding and lack of information about the health effects of PM in rural and suburban areas are unsolved issues.

Objective

Our aim was to estimate the effect of annual PM≤10µg (PM10) exposure on cause-specific mortality in the Latium region (central Italy, of which Rome is the main city) during 2006-2012 using a difference-in-differences approach.

Methods

We estimated daily PM10 concentrations for each 1 km2 of the region from 2006 to 2012 by use of satellite data, land-use predictors, and meteorological parameters. For each of the 378 regional municipalities and each year, we averaged daily PM10 values to obtain annual mean PM10 exposures. We applied a variant of the difference-in-differences approach to estimate the association between PM10 and cause-specific mortality by focusing on within-municipality fluctuations of mortality rates and annual PM exposures around municipality means, therefore controlling by design for confounding from all spatial and temporal potential confounders. Analyses were also stratified by population size of the municipalities to obtain effect estimates in rural and suburban areas of the region.

Results

In the period 2006-2012, we observed deaths due to three causes: 347,699 nonaccidental; 92,787 cardiovascular; and 16,509 respiratory causes. The annual average (standard deviation, SD) PM10 concentration was 21.9 (±4.9) µg/km3 in Latium. For each 1-µg/m3 increase in annual PM10 we estimated increases of 0.8% (95% confidence intervals (CIs): 0.2%, 1.3%), 0.9% (0.0%, 1.8%), and 1.4% (-0.4%, 3.3%) in nonaccidental, cardiovascular, and respiratory mortality, respectively. Similar results were found when we excluded the metropolitan area of Rome from the analysis. Higher effects were estimated in the smaller municipalities, e.g., those with population < 5,000 inhabitants.

Conclusion

Our study suggests a significant association of annual PM10 exposure with nonaccidental and cardiorespiratory mortality in the Latium region, even outside Rome and in suburban and rural areas. https://doi.org/10.1289/EHP3759.",2019-06-05 +24285301,AVPdb: a database of experimentally validated antiviral peptides targeting medically important viruses.,"Antiviral peptides (AVPs) have exhibited huge potential in inhibiting viruses by targeting various stages of their life cycle. Therefore, we have developed AVPdb, available online at http://crdd.osdd.net/servers/avpdb, to provide a dedicated resource of experimentally verified AVPs targeting over 60 medically important viruses including Influenza, HCV, HSV, RSV, HBV, DENV, SARS, etc. However, we have separately provided HIV inhibiting peptides in 'HIPdb'. AVPdb contains detailed information of 2683 peptides, including 624 modified peptides experimentally tested for antiviral activity. In modified peptides a chemical moiety is attached for increasing their efficacy and stability. Detailed information include: peptide sequence, length, source, virus targeted, virus family, cell line used, efficacy (qualitative/quantitative), target step/protein, assay used in determining the efficacy and PubMed reference. The database also furnishes physicochemical properties and predicted structure for each peptide. We have provided user-friendly browsing and search facility along with other analysis tools to help the users. Entering of many synthetic peptide-based drugs in various stages of clinical trials reiterate the importance for the AVP resources. AVPdb is anticipated to cater to the needs of scientific community working for the development of antiviral therapeutics.",2013-11-26 +30790522,Logistic Classification Models for pH-Permeability Profile: Predicting Permeability Classes for the Biopharmaceutical Classification System.,"Permeability is used to describe and evaluate the absorption of drug substances in the human gastrointestinal tract (GIT). Permeability is largely dependent on fluctuating pH that causes the ionization of drug substances and also influences regional absorption in the GIT. Therefore, classification models that characterize permeability at wide ranges of pH were derived in the current study. For this, drug substances were described with six data series that were measured with a parallel artificial membrane permeability assay (PAMPA), including a permeability profile at four pH values (3, 5, 7.4, and 9), and the highest and intrinsic membrane permeability. Logistic regression classification models were developed and compared by using two distinct sets of descriptors: (1) a hydrophobicity descriptor, the logarithm of the octanol-water partition (logPow) or distribution (logD) coefficient and (2) theoretical molecular descriptors. In both cases, models have good classification and descriptive capabilities for the training set (accuracy: 0.76-0.91). Triple validation with three sets of drug substances shows good prediction capability for all models: validation set (accuracy: 0.73-0.91), external validation set (accuracy: 0.72-0.9), and the permeability classes of FDA reference drugs for the biopharmaceutical classification system (BCS) (accuracy: 0.72-0.88). The identification of BCS permeability classes was further improved with decision trees that consolidated predictions from models with each descriptor type. These decision trees have higher confidence and accuracy (0.91 for theoretical molecular descriptors and 0.81 for hydrophobicity descriptors) than the individual models in assigning drug substances into BCS permeability classes. A detailed analysis of classification models and related decision trees suggests that they are suitable for predicting classes of permeability for passively transported drug substances, including specifically within the BCS framework. All developed models are available at the QsarDB repository ( http://dx.doi.org/10.15152/QDB.206 ).",2019-03-11 +28891648,NAOMInova: Interactive Geometric Analysis of Noncovalent Interactions in Macromolecular Structures.,"Noncovalent interactions play an important role in macromolecular complexes. The assessment of molecular interactions is often based on knowledge derived from statistics on structural data. Within the last years, the available data in the Brookhaven Protein Data Bank has increased dramatically, quantitatively as well as qualitatively. This development allows the derivation of enhanced interaction models and motivates new ways of data analysis. Here, we present a method to facilitate the analysis of noncovalent interactions enabling detailed insights into the nature of molecular interactions. The method is integrated into a highly variable framework enabling the adaption to user-specific requirements. NAOMInova, the user interface for our method, allows the generation of specific statistics with respect to the chemical environment of substructures. The substructures as well as the analyzed set of protein structures can be chosen arbitrarily. Although NAOMInova was primarily made for data exploration in protein-ligand crystal structures, it can be used in combination with any structure collection, for example, analysis of a carbonyl in the neighborhood of an aromatic ring on a set of structures resulting from a MD simulation. Additionally, a filter for different atom attributes can be applied including the experimental support by electron density for single atoms. In this publication, we present the underlying algorithmic techniques of our method and show application examples that demonstrate NAOMInova's ability to support individual analysis of noncovalent interactions in protein structures. NAOMInova is available at http://www.zbh.uni-hamburg.de/naominova .",2017-09-11 +30561547,"Multiomics modeling of the immunome, transcriptome, microbiome, proteome and metabolome adaptations during human pregnancy.","

Motivation

Multiple biological clocks govern a healthy pregnancy. These biological mechanisms produce immunologic, metabolomic, proteomic, genomic and microbiomic adaptations during the course of pregnancy. Modeling the chronology of these adaptations during full-term pregnancy provides the frameworks for future studies examining deviations implicated in pregnancy-related pathologies including preterm birth and preeclampsia.

Results

We performed a multiomics analysis of 51 samples from 17 pregnant women, delivering at term. The datasets included measurements from the immunome, transcriptome, microbiome, proteome and metabolome of samples obtained simultaneously from the same patients. Multivariate predictive modeling using the Elastic Net (EN) algorithm was used to measure the ability of each dataset to predict gestational age. Using stacked generalization, these datasets were combined into a single model. This model not only significantly increased predictive power by combining all datasets, but also revealed novel interactions between different biological modalities. Future work includes expansion of the cohort to preterm-enriched populations and in vivo analysis of immune-modulating interventions based on the mechanisms identified.

Availability and implementation

Datasets and scripts for reproduction of results are available through: https://nalab.stanford.edu/multiomics-pregnancy/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +30782736,Electronic patient reported outcomes to support care of patients with traumatic brain injury: PRiORiTy study qualitative protocol.,"

Introduction

Traumatic brain injury (TBI) represents a major health and socioeconomic problem internationally. The expansive nature of injuries results in a heterogeneous population. The degree and type of long-term impacts following TBI and improvement following injury are highly variable. The use of electronic Patient Reported Outcomes Measures (ePROMs) could help identify residual impacts of TBI and support patient management and care. The Patient Reported Outcomes Research in Trauma study is a qualitative study exploring the long-term symptoms and impacts that are experienced by those with TBI and the potential utility of an ePROM platform to collect real-time information on patient symptoms and quality of life to inform treatment and identify support needs.

Methods and analysis

Semi-structured telephone and face-to-face interviews will be conducted with approximately 30-40 individuals recruited from five groups: (1) people with TBI; (2) carers and relatives of individuals with TBI; (3) TBI healthcare professionals; (4) researchers and (5) third sector staff members and volunteers working with those with TBI. Data will be analysed using directed thematic analysis employing an iterative coding frame that will be modified as analysis progresses. Intercoder triangulation will be employed to enhance credibility.

Ethics and dissemination

This study was approved by the West Midlands-Black Country Research Ethics Committee (Ref: 18/WM/0033). Findings will be disseminated via conference presentations, peer-reviewed journals, social media (@CPROR_UoB; http://www.birmingham.ac.uk/cpror) and the National Institute for Health Research Surgical Reconstruction and Microbiology Research Centre.",2019-01-25 +28109179,Quantitative and integrative analysis of paracrine hepatocyte activation by nonparenchymal cells upon lipopolysaccharide induction.,"Gut-derived bacterial lipopolysaccharides (LPS) stimulate the secretion of tumour necrosis factor (TNF) from liver macrophages (MCs), liver sinusoidal endothelial cells (LSECs) and hepatic stellate cells (HSCs), which control the acute phase response in hepatocytes through activation of the NF-κB pathway. The individual and cooperative impact of nonparenchymal cells on this clinically relevant response has not been analysed in detail due to technical limitations. To gain an integrative view on this complex inter- and intracellular communication, we combined a multiscale mathematical model with quantitative, time-resolved experimental data of different primary murine liver cell types. We established a computational model for TNF-induced NF-κB signalling in hepatocytes, accurately describing dose-responsiveness for physiologically relevant cytokine concentrations. TNF secretion profiles were quantitatively measured for all nonparenchymal cell types upon LPS stimulation. This novel approach allowed the analysis of individual and collective paracrine TNF-mediated NF-κB induction in hepatocytes, revealing strongest effects of MCs and LSECs on hepatocellular NF-κB signalling. Simulations suggest that both cell types act together to maximize the NF-κB pathway response induced by low LPS concentrations (0.1 and 1 ng/mL). Higher LPS concentrations (≥ 5 ng/mL) induced sufficient TNF levels from MCs or LSECs to induce a strong and nonadjustable pathway response. Importantly, these simulations also revealed that the initial cytokine secretion (1-2 h after stimulation) rather than final TNF level (10 h after stimulation) defines the hepatocellular NF-κB response. This raises the question whether the current experimental standard of single high-dose cytokine administration is suitable to mimic in vivo cytokine exposure.

Database

The computational models described in this manuscript are available in the JWS database via the following link: https://jjj.bio.vu.nl/database/beuke.",2017-02-17 +29762724,"AAI-profiler: fast proteome-wide exploratory analysis reveals taxonomic identity, misclassification and contamination.","We present AAI-profiler, a web server for exploratory analysis and quality control in comparative genomics. AAI-profiler summarizes proteome-wide sequence search results to identify novel species, assess the need for taxonomic reclassification and detect multi-isolate and contaminated samples. AAI-profiler visualises results using a scatterplot that shows the Average Amino-acid Identity (AAI) from the query proteome to all similar species in the sequence database. Taxonomic groups are indicated by colour and marker styles, making outliers easy to spot. AAI-profiler uses SANSparallel to perform high-performance homology searches, making proteome-wide analysis possible. We demonstrate the efficacy of AAI-profiler in the discovery of a close relationship between two bacterial symbionts of an omnivorous pirate bug (Orius) and a thrip (Frankliniella occidentalis), an important pest in agriculture. The symbionts represent novel species within the genus Rosenbergiella so far described only in floral nectar. AAI-profiler is easy to use, the analysis presented only required two mouse clicks and was completed in a few minutes. AAI-profiler is available at http://ekhidna2.biocenter.helsinki.fi/AAI.",2018-07-01 +29746660,INTERSPIA: a web application for exploring the dynamics of protein-protein interactions among multiple species.,"Proteins perform biological functions through cascading interactions with each other by forming protein complexes. As a result, interactions among proteins, called protein-protein interactions (PPIs) are not completely free from selection constraint during evolution. Therefore, the identification and analysis of PPI changes during evolution can give us new insight into the evolution of functions. Although many algorithms, databases and websites have been developed to help the study of PPIs, most of them are limited to visualize the structure and features of PPIs in a chosen single species with limited functions in the visualization perspective. This leads to difficulties in the identification of different patterns of PPIs in different species and their functional consequences. To resolve these issues, we developed a web application, called INTER-Species Protein Interaction Analysis (INTERSPIA). Given a set of proteins of user's interest, INTERSPIA first discovers additional proteins that are functionally associated with the input proteins and searches for different patterns of PPIs in multiple species through a server-side pipeline, and second visualizes the dynamics of PPIs in multiple species using an easy-to-use web interface. INTERSPIA is freely available at http://bioinfo.konkuk.ac.kr/INTERSPIA/.",2018-07-01 +29949966,DeepFam: deep learning based alignment-free method for protein family modeling and prediction.,"

Motivation

A large number of newly sequenced proteins are generated by the next-generation sequencing technologies and the biochemical function assignment of the proteins is an important task. However, biological experiments are too expensive to characterize such a large number of protein sequences, thus protein function prediction is primarily done by computational modeling methods, such as profile Hidden Markov Model (pHMM) and k-mer based methods. Nevertheless, existing methods have some limitations; k-mer based methods are not accurate enough to assign protein functions and pHMM is not fast enough to handle large number of protein sequences from numerous genome projects. Therefore, a more accurate and faster protein function prediction method is needed.

Results

In this paper, we introduce DeepFam, an alignment-free method that can extract functional information directly from sequences without the need of multiple sequence alignments. In extensive experiments using the Clusters of Orthologous Groups (COGs) and G protein-coupled receptor (GPCR) dataset, DeepFam achieved better performance in terms of accuracy and runtime for predicting functions of proteins compared to the state-of-the-art methods, both alignment-free and alignment-based methods. Additionally, we showed that DeepFam has a power of capturing conserved regions to model protein families. In fact, DeepFam was able to detect conserved regions documented in the Prosite database while predicting functions of proteins. Our deep learning method will be useful in characterizing functions of the ever increasing protein sequences.

Availability and implementation

Codes are available at https://bhi-kimlab.github.io/DeepFam.",2018-07-01 +29788317,FragFit: a web-application for interactive modeling of protein segments into cryo-EM density maps.,"Cryo-electron microscopy (cryo-EM) is a standard method to determine the three-dimensional structures of molecular complexes. However, easy to use tools for modeling of protein segments into cryo-EM maps are sparse. Here, we present the FragFit web-application, a web server for interactive modeling of segments of up to 35 amino acids length into cryo-EM density maps. The fragments are provided by a regularly updated database containing at the moment about 1 billion entries extracted from PDB structures and can be readily integrated into a protein structure. Fragments are selected based on geometric criteria, sequence similarity and fit into a given cryo-EM density map. Web-based molecular visualization with the NGL Viewer allows interactive selection of fragments. The FragFit web-application, accessible at http://proteinformatics.de/FragFit, is free and open to all users, without any login requirements.",2018-07-01 +29788177,GPCRM: a homology modeling web service with triple membrane-fitted quality assessment of GPCR models.,"Due to the involvement of G protein-coupled receptors (GPCRs) in most of the physiological and pathological processes in humans they have been attracting a lot of attention from pharmaceutical industry as well as from scientific community. Therefore, the need for new, high quality structures of GPCRs is enormous. The updated homology modeling service GPCRM (http://gpcrm.biomodellab.eu/) meets those expectations by greatly reducing the execution time of submissions (from days to hours/minutes) with nearly the same average quality of obtained models. Additionally, due to three different scoring functions (Rosetta, Rosetta-MP, BCL::Score) it is possible to select accurate models for the required purposes: the structure of the binding site, the transmembrane domain or the overall shape of the receptor. Currently, no other web service for GPCR modeling provides this possibility. GPCRM is continually upgraded in a semi-automatic way and the number of template structures has increased from 20 in 2013 to over 90 including structures the same receptor with different ligands which can influence the structure not only in the on/off manner. Two types of protein viewers can be used for visual inspection of obtained models. The extended sortable tables with available templates provide links to external databases and display ligand-receptor interactions in visual form.",2018-07-01 +29741647,InterEvDock2: an expanded server for protein docking using evolutionary and biological information from homology models and multimeric inputs.,"Computational protein docking is a powerful strategy to predict structures of protein-protein interactions and provides crucial insights for the functional characterization of macromolecular cross-talks. We previously developed InterEvDock, a server for ab initio protein docking based on rigid-body sampling followed by consensus scoring using physics-based and statistical potentials, including the InterEvScore function specifically developed to incorporate co-evolutionary information in docking. InterEvDock2 is a major evolution of InterEvDock which allows users to submit input sequences - not only structures - and multimeric inputs and to specify constraints for the pairwise docking process based on previous knowledge about the interaction. For this purpose, we added modules in InterEvDock2 for automatic template search and comparative modeling of the input proteins. The InterEvDock2 pipeline was benchmarked on 812 complexes for which unbound homology models of the two partners and co-evolutionary information are available in the PPI4DOCK database. InterEvDock2 identified a correct model among the top 10 consensus in 29% of these cases (compared to 15-24% for individual scoring functions) and at least one correct interface residue among 10 predicted in 91% of these cases. InterEvDock2 is thus a unique protein docking server, designed to be useful for the experimental biology community. The InterEvDock2 web interface is available at http://bioserv.rpbs.univ-paris-diderot.fr/services/InterEvDock2/.",2018-07-01 +29642096,Availability of a New Job-Exposure Matrix (CANJEM) for Epidemiologic and Occupational Medicine Purposes.,"

Objective

The aim of this study was to introduce the Canadian job-exposure matrix (CANJEM).

Methods

Four large case-control studies of cancer were conducted in Montreal, focused on assessing occupational exposures by means of detailed interviews followed by expert assessment of possible occupational exposures. Thirty-one thousand six hundred seventy-three jobs were assessed using a checklist of 258 agents (listed with prevalences at http://expostats.ca/chems). This large exposure database was configured as a JEM.

Results

CANJEM is available in four occupational classification systems. It provides estimates of probability of exposure among workers with a given occupation, and for those exposed, various metrics of exposure. CANJEM can be accessed online (www.canjem.ca) or in a batch version.

Conclusion

CANJEM is a large source of retrospective exposure information, covering most occupations and many agents. CANJEM can be used to support exposure assessment efforts in epidemiology and occupational health.",2018-07-01 +29950009,"SIMPLE: Sparse Interaction Model over Peaks of moLEcules for fast, interpretable metabolite identification from tandem mass spectra.","

Motivation

Recent success in metabolite identification from tandem mass spectra has been led by machine learning, which has two stages: mapping mass spectra to molecular fingerprint vectors and then retrieving candidate molecules from the database. In the first stage, i.e. fingerprint prediction, spectrum peaks are features and considering their interactions would be reasonable for more accurate identification of unknown metabolites. Existing approaches of fingerprint prediction are based on only individual peaks in the spectra, without explicitly considering the peak interactions. Also the current cutting-edge method is based on kernels, which are computationally heavy and difficult to interpret.

Results

We propose two learning models that allow to incorporate peak interactions for fingerprint prediction. First, we extend the state-of-the-art kernel learning method by developing kernels for peak interactions to combine with kernels for peaks through multiple kernel learning (MKL). Second, we formulate a sparse interaction model for metabolite peaks, which we call SIMPLE, which is computationally light and interpretable for fingerprint prediction. The formulation of SIMPLE is convex and guarantees global optimization, for which we develop an alternating direction method of multipliers (ADMM) algorithm. Experiments using the MassBank dataset show that both models achieved comparative prediction accuracy with the current top-performance kernel method. Furthermore SIMPLE clearly revealed individual peaks and peak interactions which contribute to enhancing the performance of fingerprint prediction.

Availability and implementation

The code will be accessed through http://mamitsukalab.org/tools/SIMPLE/.",2018-07-01 +29746699,RepeatsDB-lite: a web server for unit annotation of tandem repeat proteins.,"RepeatsDB-lite (http://protein.bio.unipd.it/repeatsdb-lite) is a web server for the prediction of repetitive structural elements and units in tandem repeat (TR) proteins. TRs are a widespread but poorly annotated class of non-globular proteins carrying heterogeneous functions. RepeatsDB-lite extends the prediction to all TR types and strongly improves the performance both in terms of computational time and accuracy over previous methods, with precision above 95% for solenoid structures. The algorithm exploits an improved TR unit library derived from the RepeatsDB database to perform an iterative structural search and assignment. The web interface provides tools for analyzing the evolutionary relationships between units and manually refine the prediction by changing unit positions and protein classification. An all-against-all structure-based sequence similarity matrix is calculated and visualized in real-time for every user edit. Reviewed predictions can be submitted to RepeatsDB for review and inclusion.",2018-07-01 +29905836,KnotGenome: a server to analyze entanglements of chromosomes.,"The KnotGenome server enables the topological analysis of chromosome model data using three-dimensional coordinate files of chromosomes as input. In particular, it detects prime and composite knots in single chromosomes, and links between chromosomes. The knotting complexity of the chromosome is presented in the form of a matrix diagram that reveals the knot type of the entire polynucleotide chain and of each of its subchains. Links are determined by means of the Gaussian linking integral and the HOMFLY-PT polynomial. Entangled chromosomes are presented graphically in an intuitive way. It is also possible to relax structure with short molecular dynamics runs before the analysis. KnotGenome is freely available at http://knotgenom.cent.uw.edu.pl/.",2018-07-01 +29961081,"Scientific basis of the OCRA method for risk assessment of biomechanical overload of upper limb, as preferred method in ISO standards on biomechanical risk factors.","We are writing in regards to Armstrong et al`s recent discussion paper (1), which addresses the scientific basis of ISO standards on biomechanical risk factors and more specifically the OCRA methodology. The paper comments on the ISO's working methods, but it will be up to the ISO to respond if it sees fit to do so. As the authors of the OCRA method, we wish to respond in a individual capacity. For several years, we have belonged to an ISO working group (ISO TC 159/SC3/WG4) advocating methods for the assessment of biomechanical overload risk; the members of the working group come from various countries and represent public authorities, social partners and researchers with particular expertise in this field. Our decision to send this letter to the editor was motivated by the following position put forth in Armstrong et al`s paper concerning the rigor of development of the ISO ergonomics standards: ""The production of the ISO ergonomics standards differed substantially from the writing of evidence-based practical guidelines. According to the limited information provided in the published documents, the ISO ergonomics standards were not based on a systematic search and appraisal of available literature. It is not clear why the ISO subcommittee preferred one method of risk assessment over others. For instance, the ISO 11228-3 identified three detailed risk assessment methods for repetitive hand exertions at high frequency: OCRA (a concise index for the assessment of exposure to repetitive movements of the upper limbs) (20), ACGIH hand activity level (HAL) (21), and the Strain Index (22), but preferred the OCRA methods without providing a scientific basis or comparison (eg, intra- and inter-observer reliability, strength of association with musculoskeletal disorders (MSD), etc.) even though such comparisons are available in the literature (13, 23). As a result, some statements in ISO 11228-3 appear to be based on personal opinions and are in contrast with scientific evidence from the literature. For instance, the ISO standard includes a statement ""in many epidemiological surveys it (OCRA) has shown itself to be well related with health effects (such as the occurrence of UL-WMSD [upper limb-work related MSD)]"" (13). This statement was not supported by a well-designed epidemiological study in 2007 when the ISO standard was published (19). Indeed, in 2010, Takala and colleagues noted the absence of longitudinal studies on the association between the OCRA index and the risk of MSD. They also pointed out the absence of studies on the repeatability of the OCRA method (13)"". (Note: the references in italic relate to the original paper). We would like to point out that the ISO standards in question (2) were actually developed by the working group, as mandated by ISO, over the period 2000‒2004.The years leading up to the publication of the standard (2005‒2007) were dedicated to the challenging task of democratically seeking the endorsement of the ISO member countries. During this time, no significant changes could be made to the basic text other than those arising from specific observations or comments from the countries. This needs to be taken into account, especially when debating the references underpinning the standard. More specifically, the standard in question (ISO 11228-3) (2) in Annex A, clearly states that the general reference model for assessing ""repetitive, high frequency, low load movements of the upper limbs"" is a Consensus Document, drafted and published in 2001 by the IEA-Technical Committee on Musculoskeletal Disorders, with the endorsement of the International Commission on Occupational Health (ICOH) (3). The study considered at least 14 different methods that have over time been suggested in the literature as briefly summarized in the same ISO standard (2). The recommendations set forth in this vital Consensus Document went on to become the basis for choosing the most appropriate methods to suggest to future users through the standard (OCRA; ACGIH Hand Activity Level (HAL); Strain Index), each with their respective merits and limits in compliance with the criteria set out in the Consensus Document and taking into account their applicability in the field and ability to interpret the results of the risk assessment. It is against this background, and in light of the rationale described in Annex A, that the entire group agreed that the OCRA method was to be considered as the ""preferred"" method, insofar as it was deemed to best match the recommendations laid out in the aforementioned Consensus Document. Furthermore the OCRA method was, at the time, the only risk assessment method supported by the results of several epidemiological, albeit cross-sectional studies, uniquely available in literature. The study was based on a very large number of cases (>5000 cases) with results both of risk evaluation of upper-limb biomechanical overload (using the OCRA method) and of musculo-skeletal clinical examination (assessing the corresponding diseases). Such studies were reported in a special issue of Ergonomics (4), in an updated paper ‒ first published in Italian (5) ‒ also in Ergonomics (6), in the books edited by Elsevier (7), and CRC Taylor & Francis (8). This risk/damage database enabled an estimation (within defined limits) of the risk of upper-limb work-related musculoskeletal disorders at a given OCRA index level. Starting from the established relation among risk indexes and percent of pathological subjects, it was possible to determine the risk limit values provided by the ISO standard (2). With reference to the alleged absence of studies on the repeatability of the OCRA method, we prefer to mention the most recent results obtained by other researchers, rather than our findings, acknowledging the good ""inter-rater reliability"" of the OCRA Checklist, and stating that ""the OCRA Checklist inter-rater reliability scores were among the highest reported in the literature for semi-quantitative physical exposure assessment tools of the upper extremity"" (9) As for the scientific base, we suggest Armstrong et al (1) could get more valuable information about the OCRA methodology looking not only to the 1996 special issue in Italian language (10) ‒ the only publication they mention dealing specifically with OCRA ‒ but to the many updated publications. Some of the most relevant publications in English (as suggested by the publisher) are mentioned in the references here below. Many other publications and manuals in English, Italian, Spanish and Portuguese are available but not reported here due to limitation of space. A complete list of our publications can be found on our website: www.epmresearch.org, where some of the articles are available for download. Simple tools (Excel spreadsheets) for carrying out risk assessments by OCRA can also be freely downloaded from the same website. The validity and usability of OCRA methodology can also be indirectly confirmed by its extensive use around the world. For example, a recent search on ScienceDirect (www.sciencedirect.com/science/journals/all/full-text-access) has recently shown that more than 477 works dealing with OCRA hae been published by different authors in indexed journals to date. In conclusion, we recommend the authors of the discussion paper (1) deepen their analysis of the OCRA methodology [beyond the only cited old 1996 paper (10)] before expressing definite conclusions about the scientific value of the OCRA methodology and about the entire ISO standard-setting system. Our team is always happy to engage with the scientific community and end users of studies on biomechanical overload, as we have also done within the ISO for many years now. ISO working groups arguably offer valuable opportunities to come together at the international level and table discussions between researchers and users. We are researchers who have devoted our life's work to prevention, and intend to continue striving towards that goal, with everyone's help and without bickering, bias, vested interests, or professional rivalry. The health and well-being of workers is all we have ever cared about. We have always been ready to cooperate with those who share this vital objective. References 1. Armstrong T J, Burdorf I A, Descatha A, Farioli A, Graf M, Horie S, Marras W S, Potvin J R, Rempel D, Spatari G, Takala E P, Verbeek J, Violante FS. Scientific basis of ISO standards on biomechanical risk factors. Scand J Work Environ Health ‒ online first. https://doi.org/10.5271/sjweh.3718 2. ISO. ISO 11228-3. Ergonomics - Manual handling - Handling of low loads at high frequency. ISO, 2007. Geneva, Switzerland. 3. Colombini D, Occhipinti E, Delleman D, Fallentin N, Kilbom A, Grieco A. Exposure assessment of upper limb repetitive movements: a consensus document in W. Karwowski International Encyclopaedia of Ergonomics and Human Factors, New York: Taylor & Francis, 2001. 4. Colombini D, Grieco A, Occhipinti E. Occupational musculoskeletal disorders of the upper limbs due to mechanical overload. Ergonomics. Special issue;1998:41(9). 5. Occhipinti, E., Colombini, D. Metodo OCRA: aggiornamento dei valori di riferimento e dei modelli di previsione dell'occorrenza di UL-WMSDs nelle popolazioni lavorative esposte a movimenti e sforzi ripetuti degli arti superiori. [The OCRA method: update of UL-WMSDs reference values and prediction models of occurrence in working populations exposed to repetitive movements and strains of the upper limbs]. La Medicina del Lavoro, 2004. 95;4:305-319 6. Occhipinti E, Colombini D. Updating reference values and predictive models of the OCRA method in the risk assessment of work-related musculoskeletal disorders of the upper limbs. Ergonomics; 2007,50(11):1727-1739. https://doi.org/10.1080/00140130701674331 7. Colombini D, Occhipinti E, Grieco A. Risk assessment and management of repetitive movements and exertions of upper limbs. Amsterdam: Elsevier Science, 2002. 8. Colombini D, Occhipinti E. Risk analysis and management of repetitive actions: a guide for applying the OCRA system (occupational repetitive actions). New York: CRC press, 2016. 9. Paulsen R, Gallu T, Gilkey D, Reiser R, Murgia L, Rosecrance J. The inter-rater reliability of Strain Index and OCRA Checklist task assessments in cheese processing. Applied Ergonomics. 2015;51,199-204. https://doi.org/10.1016/j.apergo.2015.04.019 10. Occhipinti E, Colombini D. Proposal of a concise index for the evaluation of the exposure to repetitive movements of the upper extremity (OCRA index)]. Med Lav. Special issue, 1996 Nov-Dec; 87(6): 526-548.",2018-07-01 +29946688,Teleaudiology Services for Rehabilitation With Hearing Aids in Adults: A Systematic Review.,"

Purpose

This review examined (a) the current evidence from studies on teleaudiology applications for rehabilitation of adults with hearing impairment with hearing aids and (b) whether it is sufficient to support the translation into routine clinical practice.

Method

A search strategy and eligibility criteria were utilized to include articles specifically related to hearing aid fitting and follow-up procedures that are involved in consultations for the rehabilitation of adults, where the service was provided by the clinician by teleaudiology. A search using key words and Medical Subject Headings (MeSH) was conducted on the main electronic databases that index health-related studies. The included studies were assessed using validated evaluation tools for methodological quality, level of evidence, and grade recommendations for application into practice.

Results

Fourteen studies were identified as being within the scope of this review. The evaluation tools showed that none of these studies demonstrated either a strong methodological quality or high level of evidence. Analysis of evidence identified 19 activities, which were classified into service outcomes categories of feasibility, barriers, efficiency, quality, and effectiveness. Recommendations could be made regarding the (a) feasibility, (b) barriers, and (c) efficiency of teleaudiology for the rehabilitation of hearing loss with hearing aids.

Conclusion

This review provides up-to-date evidence for teleaudiology hearing aid services in new and experienced hearing aid users in different practice settings. Findings direct future research priorities to strengthen evidence-based practice. There is a need for further studies of many aspects of teleaudiology services for rehabilitation with hearing aids to support their implementation into clinical practice.

Supplemental material

https://doi.org/10.23641/asha.6534473.",2018-07-01 +29762787,LitVar: a semantic search engine for linking genomic variant data in PubMed and PMC.,"The identification and interpretation of genomic variants play a key role in the diagnosis of genetic diseases and related research. These tasks increasingly rely on accessing relevant manually curated information from domain databases (e.g. SwissProt or ClinVar). However, due to the sheer volume of medical literature and high cost of expert curation, curated variant information in existing databases are often incomplete and out-of-date. In addition, the same genetic variant can be mentioned in publications with various names (e.g. 'A146T' versus 'c.436G>A' versus 'rs121913527'). A search in PubMed using only one name usually cannot retrieve all relevant articles for the variant of interest. Hence, to help scientists, healthcare professionals, and database curators find the most up-to-date published variant research, we have developed LitVar for the search and retrieval of standardized variant information. In addition, LitVar uses advanced text mining techniques to compute and extract relationships between variants and other associated entities such as diseases and chemicals/drugs. LitVar is publicly available at https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/LitVar.",2018-07-01 +29733379,oriTfinder: a web-based tool for the identification of origin of transfers in DNA sequences of bacterial mobile genetic elements.,"oriTfinder is a web server that facilitates the rapid identification of the origin of transfer site (oriT) of a conjugative plasmid or chromosome-borne integrative and conjugative element. The utilized back-end database oriTDB was built upon more than one thousand known oriT regions of bacterial mobile genetic elements (MGEs) as well as the known MGE-encoding relaxases and type IV coupling proteins (T4CP). With a combination of similarity searches for the oriTDB-archived oriT nucleotide sequences and the co-localization of the flanking relaxase homologous genes, the oriTfinder can predict the oriT region with high accuracy in the DNA sequence of a bacterial plasmid or chromosome in minutes. The server also detects the other transfer-related modules, including the potential relaxase gene, T4CP gene and the type IV secretion system gene cluster, and the putative genes coding for virulence factors and acquired antibiotic resistance determinants. oriTfinder may contribute to meeting the increasing demands of re-annotations for bacterial conjugative, mobilizable or non-transferable elements and aid in the rapid risk accession of disease-relevant trait dissemination in pathogenic bacteria of interest. oriTfinder is freely available to all users without any login requirement at http://bioinfo-mml.sjtu.edu.cn/oriTfinder.",2018-07-01 +28077405,VRprofile: gene-cluster-detection-based profiling of virulence and antibiotic resistance traits encoded within genome sequences of pathogenic bacteria.,"VRprofile is a Web server that facilitates rapid investigation of virulence and antibiotic resistance genes, as well as extends these trait transfer-related genetic contexts, in newly sequenced pathogenic bacterial genomes. The used backend database MobilomeDB was firstly built on sets of known gene cluster loci of bacterial type III/IV/VI/VII secretion systems and mobile genetic elements, including integrative and conjugative elements, prophages, class I integrons, IS elements and pathogenicity/antibiotic resistance islands. VRprofile is thus able to co-localize the homologs of these conserved gene clusters using HMMer or BLASTp searches. With the integration of the homologous gene cluster search module with a sequence composition module, VRprofile has exhibited better performance for island-like region predictions than the other widely used methods. In addition, VRprofile also provides an integrated Web interface for aligning and visualizing identified gene clusters with MobilomeDB-archived gene clusters, or a variety set of bacterial genomes. VRprofile might contribute to meet the increasing demands of re-annotations of bacterial variable regions, and aid in the real-time definitions of disease-relevant gene clusters in pathogenic bacteria of interest. VRprofile is freely available at http://bioinfo-mml.sjtu.edu.cn/VRprofile.",2018-07-01 +29939067,"""A cross-language study of decontextualized vocabulary comprehension in toddlerhood and kindergarten readiness"": Correction to Friend et al. (2018).","Reports an error in ""A cross-language study of decontextualized vocabulary comprehension in toddlerhood and kindergarten readiness"" by Margaret Friend, Erin Smolak, Yushuang Liu, Diane Poulin-Dubois and Pascal Zesiger (Developmental Psychology, Advanced Online Publication, Apr 05, 2018, np). In the article, the reference for Legacy, Zesiger, Friend, & Poulin-Dubois (2016) should be Legacy, Zesiger, Friend, & Poulin-Dubois (2018). The correct reference for the article is listed below: Legacy, J., Zesiger, P., Friend, M., & Poulin-Dubois, D. (2018). Vocabulary size and speed of word recognition in very young French-English bilinguals: A longitudinal study. Bilingualism: Language and Cognition, 21, 137-149. https://doi.org/10.1017/S1366728916000833. All versions of this article have been corrected. (The following abstract of the original article appeared in record 2018-13949-001.) Recent studies demonstrate that emerging literacy depends on earlier language achievement. Importantly, most extant work focuses on parent-reported production prior to 30 months of age. Of interest is whether and how directly assessed vocabulary comprehension in the 2nd year of life supports vocabulary and kindergarten readiness in the 4th year. We first contrasted orthogonal indices of parent-reported production and directly assessed vocabulary comprehension and found that comprehension was a stronger predictor of child outcomes. We then assessed prediction from vocabulary comprehension controlling for maternal education, preschool attendance, and child sex. In 3 studies early, decontextualized vocabulary comprehension emerged as a significant predictor of 4th year language and kindergarten readiness accounting for unique variance above demographic control variables. Further we found that the effect of early vocabulary on 4th year kindergarten readiness was not mediated by 4th year vocabulary. This pattern of results emerged in English monolingual children (N = 48) and replicated in French monolingual (N = 58) and French-English bilingual children (N = 34). Our findings suggest that early, decontextualized vocabulary may provide a platform for the establishment of a conceptual system that supports both later vocabulary and kindergarten readiness, including the acquisition of a wide range of concepts including print and number. Differences between parent-reported and directly assessed vocabulary and the mechanisms by which decontextualized vocabulary may contribute to conceptual development are discussed. (PsycINFO Database Record",2018-07-01 +29741643,PANNZER2: a rapid functional annotation web server.,"The unprecedented growth of high-throughput sequencing has led to an ever-widening annotation gap in protein databases. While computational prediction methods are available to make up the shortfall, a majority of public web servers are hindered by practical limitations and poor performance. Here, we introduce PANNZER2 (Protein ANNotation with Z-scoRE), a fast functional annotation web server that provides both Gene Ontology (GO) annotations and free text description predictions. PANNZER2 uses SANSparallel to perform high-performance homology searches, making bulk annotation based on sequence similarity practical. PANNZER2 can output GO annotations from multiple scoring functions, enabling users to see which predictions are robust across predictors. Finally, PANNZER2 predictions scored within the top 10 methods for molecular function and biological process in the CAFA2 NK-full benchmark. The PANNZER2 web server is updated on a monthly schedule and is accessible at http://ekhidna2.biocenter.helsinki.fi/sanspanz/. The source code is available under the GNU Public Licence v3.",2018-07-01 +20949394,Best practices for establishing a biobank.,"A biobank may be defined as the long-term storage of biological samples for research or clinical purposes. In addition to storage facilities, a biobank may comprise a complete organization with biological samples, data, personnel, policies, and procedures for handling specimens and performing other services, such as the management of the database and the planning of scientific studies. This combination of facilities, policies, and processes may also be called a biological resource center (BRC) ( www.iarc.fr ). Research using specimens from biobanks is regulated by European Union (EU) recommendations (Recommendations on Research on Human Biological Materials. The draft recommendation on research on human biological materials was approved by CDBI at its plenary meeting on 20 October 2005) and by voluntary best practices from the U.S. National Cancer Institute (NCI) ( http://biospecimens.cancer.gov ) and other organizations. Best practices for the management of research biobanks vary according to the institution and differing international regulations and standards. However, there are many areas of agreement that have resulted in best practices that should be followed in order to establish a biobank for the custodianship of high-quality specimens and data.",2011-01-01 +28407145,GEPIA: a web server for cancer and normal gene expression profiling and interactive analyses.,"Tremendous amount of RNA sequencing data have been produced by large consortium projects such as TCGA and GTEx, creating new opportunities for data mining and deeper understanding of gene functions. While certain existing web servers are valuable and widely used, many expression analysis functions needed by experimental biologists are still not adequately addressed by these tools. We introduce GEPIA (Gene Expression Profiling Interactive Analysis), a web-based tool to deliver fast and customizable functionalities based on TCGA and GTEx data. GEPIA provides key interactive and customizable functions including differential expression analysis, profiling plotting, correlation analysis, patient survival analysis, similar gene detection and dimensionality reduction analysis. The comprehensive expression analyses with simple clicking through GEPIA greatly facilitate data mining in wide research areas, scientific discussion and the therapeutic discovery process. GEPIA fills in the gap between cancer genomics big data and the delivery of integrated information to end users, thus helping unleash the value of the current data resources. GEPIA is available at http://gepia.cancer-pku.cn/.",2017-07-01 +21253873,Tetrahymena Gene Expression Database (TGED): a resource of microarray data and co-expression analyses for Tetrahymena.,"Tetrahymena thermophila is a model eukaryotic organism. Functional genomic analyses in Tetrahymena present rich opportunities to address fundamental questions of cell and molecular biology. The Tetrahymena Gene Expression Database (TGED; available at http://tged.ihb.ac.cn) is the first expression database of a ciliated protozoan. It covers three major physiological and developmental states: growth, starvation, and conjugation, and can be accessed through a user-friendly web interface. The gene expression profiles and candidate co-expressed genes for each gene can be retrieved using Gene ID or Gene description searches. Descriptions of standardized methods of sample preparation and the opportunity to add new Tetrahymena microarray data will be of great interest to the Tetrahymena research community. TGED is intended to be a resource for all members of the scientific research community who are interested in Tetrahymena and other ciliates.",2011-01-21 +30017874,Elaboration of an instrument to evaluate the recognition of Brazilian melodies in children.,"

Introduction

There is evidence pointing to the importance of the evaluation of musical perception through objective and subjective instruments. In Brazil, there is a shortage of instruments that evaluates musical perception.

Objective

To develop an instrument to evaluate the recognition of traditional Brazilian melodies and investigate the performance of children with typical hearing.

Methods

The study was carried out after approval of the research ethics committee (1.198.607). The instrument was developed in software format with website access, using the languages PHP 5.5.12, Javascript, Cascade style sheets and ""HTML5""; database ""MYSQL 5.6.17"" on the ""Apache 2.4.9"" server. Fifteen melodies of Brazilian folk songs were recorded in piano synthesized timbre, with 12 seconds per melody reproduction and four second intervals between them. A total of 155 schooled children, aged eight to 11 years, of both sexes, with typical hearing participated in the study. The test was performed in a silent room with sound stimuli amplified by a sound box at 65dBNA, positioned at 0 azimuth, and at one meter from the participant, the notebook was used for children to play with on the screen on the title and illustration of the melody they recognized they were listening to. The responses were recorded on their own database.

Results

The instrument titled ""Evaluation of recognition of traditional melodies in children"" can be run on various devices (computers, notebooks, tablets, mobile phones) and operating systems (Windows, Macintosh, Android, Linux). Access: http://192.185.216.17/ivan/home/login.php by login and password. The most easily recognized melody was ""Cai, cai balão"" (89%) and the least recognized was ""Capelinha de melão"" (25.2%). The average time to perform the test was 3'15″.

Conclusion

The development and application of the software proved effective for the studied population. This instrument may contribute to the improvement of protocols for the evaluation of musical perception in children with hearing aid and/or cochlear implants users.",2018-06-30 +29667823,Luciferase Advisor: High-Accuracy Model To Flag False Positive Hits in Luciferase HTS Assays.,"Firefly luciferase is an enzyme that has found ubiquitous use in biological assays in high-throughput screening (HTS) campaigns. The inhibition of luciferase in such assays could lead to a false positive result. This issue has been known for a long time, and there have been significant efforts to identify luciferase inhibitors in order to enhance recognition of false positives in screening assays. However, although a large amount of publicly accessible luciferase counterscreen data is available, to date little effort has been devoted to building a chemoinformatic model that can identify such molecules in a given data set. In this study we developed models to identify these molecules using various methods, such as molecular docking, SMARTS screening, pharmacophores, and machine learning methods. Among the structure-based methods, the pharmacophore-based method showed promising results, with a balanced accuracy of 74.2%. However, machine-learning approaches using associative neural networks outperformed all of the other methods explored, producing a final model with a balanced accuracy of 89.7%. The high predictive accuracy of this model is expected to be useful for advising which compounds are potential luciferase inhibitors present in luciferase HTS assays. The models developed in this work are freely available at the OCHEM platform at http://ochem.eu .",2018-04-18 +29522123,eMolTox: prediction of molecular toxicity with confidence.,"Summary:In this work, we present eMolTox, a web server for the prediction of potential toxicity associated with a given molecule. A total of 174 toxicology-related in vitro/vivo experimental datasets were used for model construction and Mondrian conformal prediction was used to estimate the confidence of the resulting predictions. Toxic substructure analysis is also implemented in eMolTox. eMolTox predicts and displays a wealth of information of potential molecular toxicities for safety analysis in drug development. Availability and implementation:The eMolTox Server is freely available for use on the web at http://xundrug.cn/moltox. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +29437974,HAVCR1 (CD365) and Its Mouse Ortholog Are Functional Hepatitis A Virus (HAV) Cellular Receptors That Mediate HAV Infection. ,"The hepatitis A virus (HAV) cellular receptor 1 (HAVCR1), classified as CD365, was initially discovered as an HAV cellular receptor using an expression cloning strategy. Due to the lack of HAV receptor-negative replication-competent cells, it was not possible to fully prove that HAVCR1 was a functional HAV receptor. However, biochemistry, classical virology, and epidemiology studies further supported the functional role of HAVCR1 as an HAV receptor. Here, we show that an anti-HAVCR1 monoclonal antibody that protected African green monkey kidney (AGMK) cells against HAV infection only partially protected monkey Vero E6 cells and human hepatoma Huh7 cells, indicating that these two cell lines express alternative yet unidentified HAV receptors. Therefore, we focused our work on AGMK cells to further characterize the function of HAVCR1 as an HAV receptor. Advances in clustered regularly interspaced short palindromic repeat/Cas9 technology allowed us to knock out the monkey ortholog of HAVCR1 in AGMK cells. The resulting AGMK HAVCR1 knockout (KO) cells lost susceptibility to HAV infection, including HAV-free viral particles (vpHAV) and exosomes purified from HAV-infected cells (exo-HAV). Transfection of HAVCR1 cDNA into AGMK HAVCR1 KO cells restored susceptibility to vpHAV and exo-HAV infection. Furthermore, transfection of the mouse ortholog of HAVCR1, mHavcr1, also restored the susceptibility of AGMK HAVCR1 KO cells to HAV infection. Taken together, our data clearly show that HAVCR1 and mHavcr1 are functional HAV receptors that mediate HAV infection. This work paves the way for the identification of alternative HAV receptors to gain a complete understanding of their interplay with HAVCR1 in the cell entry and pathogenic processes of HAV.IMPORTANCE HAVCR1, an HAV receptor, is expressed in different cell types, including regulatory immune cells and antigen-presenting cells. How HAV evades the immune response during a long incubation period of up to 4 weeks and the mechanism by which the subsequent necroinflammatory process clears the infection remain a puzzle that most likely involves the HAV-HAVCR1 interaction. Based on negative data, a recent paper from the S. M. Lemon and W. Maury laboratories (A. Das, A. Hirai-Yuki, O. Gonzalez-Lopez, B. Rhein, S. Moller-Tank, R. Brouillette, L. Hensley, I. Misumi, W. Lovell, J. M. Cullen, J. K. Whitmire, W. Maury, and S. M. Lemon, mBio 8:e00969-17, 2017, https://doi.org/10.1128/mBio.00969-17) suggested that HAVCR1 is not a functional HAV receptor, nor it is it required for HAV infection. However, our data, based on regain of the HAV receptor function in HAVCR1 knockout cells transfected with HAVCR1 cDNA, disagree with their findings. Our positive data show conclusively that HAVCR1 is indeed a functional HAV receptor and lays the ground for the identification of alternative HAV receptors and how they interact with HAVCR1 in cell entry and the pathogenesis of HAV.",2018-04-13 +29559573,The Gathering Storm: Is Untreatable Typhoid Fever on the Way? ,"Klemm et al. (mBio 9:e00105-18, 2018, https://doi.org/10.1128/mBio.00105-18) present comprehensive antibiotic sensitivity patterns and genomic sequence data on Salmonella enterica serovar Typhi blood culture isolates from typhoid fever cases during an epidemic in Pakistan. Microbiologic and genomic data pinpoint the identities and locations of the antimicrobial resistance genes and the outbreak strain's lineage. They propose that Salmonella enterica serovar Typhi be added to the list of bacterial pathogens of public health importance that have become extensively drug resistant (XDR). This paper portends possible dire scenarios for typhoid fever control if XDR strains disseminate globally. Since the outbreak strain is of the H58 haplotype, known for its ability to spread worldwide and displace endemic S Typhi, this concern is well-founded. The report of Klemm et al. forewarns the global community to address control of typhoid fever more aggressively through prevention, should therapeutic options disappear. This Commentary frames the Klemm et al. findings within a historic perspective.",2018-03-20 +,"Genetic resources collections of leafy vegetables (lettuce, spinach, chicory, artichoke, asparagus, lamb’s lettuce, rhubarb and rocket salad): composition and gaps","Lettuce, spinach and chicory are generally considered the main leafy vegetables, while a fourth group denoted by ‘minor leafy vegetables’ includes, amongst others, rocket salad, lamb’s lettuce, asparagus, artichoke and rhubarb. Except in the case of lettuce, central crop databases of leafy vegetables were lacking until recently. Here we report on the update of the international Lactuca database and the development of three new central crop databases for each of the other leafy vegetable crop groups. Requests for passport data of accessions available to the user community were addressed to all known European collection holders and to the main collection holders located outside Europe. Altogether, passport data of 17,530 accessions from a total of 129 collections were collected. The four separate databases were made available on line via a common entry page accessible at http://documents.plant.wur.nl/cgn/pgr/LVintro/ . Based on a literature study, an analysis of the gene pool structure of the crops was performed and an inventory was made of the distribution areas of the species involved. The results of these surveys were related to the contents of the newly established databases in order to identify the main collection gaps. Priorities are presented for future germplasm acquisition aimed at improving the coverage of the crop gene pools in ex situ collections.",2012-08-01 +30186253,PGAweb: A Web Server for Bacterial Pan-Genome Analysis.,"An astronomical increase in microbial genome data in recent years has led to strong demand for bioinformatic tools for pan-genome analysis within and across species. Here, we present PGAweb, a user-friendly, web-based tool for bacterial pan-genome analysis, which is composed of two main pan-genome analysis modules, PGAP and PGAP-X. PGAweb provides key interactive and customizable functions that include orthologous clustering, pan-genome profiling, sequence variation and evolution analysis, and functional classification. PGAweb presents features of genomic structural dynamics and sequence diversity with different visualization methods that are helpful for intuitively understanding the dynamics and evolution of bacterial genomes. PGAweb has an intuitive interface with one-click setting of parameters and is freely available at http://PGAweb.vlcc.cn/.",2018-08-21 +26384444,Plasma uric acid concentrations are reduced by fenofibrate: A systematic review and meta-analysis of randomized placebo-controlled trials.,"

Background

Hyperuricaemia increases the risk of gout, but it is also a risk factor for cardiovascular diseases.

Purpose

To conduct a systematic review and meta-analysis of relevant randomized clinical trials to ascertain the effect size of fibrates in modulating plasma uric acid concentrations.

Data sources

Medline (http://www.ncbi.nlm.nih.gov/pubmed), SCOPUS, Web of Science and Google Scholar databases were searched.

Study selection

Studies were included if they met the following inclusion criteria: (i) being a randomized placebo-controlled trial with either parallel or cross-over design, (ii) investigating the impact of fibrate therapy on plasma uric acid concentrations, (iii) presentation of sufficient information on uric acid values at baseline and at the end of follow-up in each group or providing the net change values.

Data extraction

The following data were extracted: (1) first author's name; (2) year of publication; (3) study location; (4) study design; (5) number of participants in the fibrate and placebo groups; (6) type and dose of fibrate; (7) duration of treatment; (8) age, gender and body mass index (BMI) of study participants; (9) baseline levels of total cholesterol, low-density lipoprotein cholesterol (LDL-C), high-density lipoprotein cholesterol (HDL-C), triglycerides, high-sensitivity C-reactive protein (hs-CRP) and glucose; (10) systolic and diastolic blood pressure; and (11) data regarding baseline and follow-up uric acid.

Data synthesis

There was a significant reduction in plasma uric acid concentrations following fenofibrate therapy.

Limitations

Few eligible studies, and most had small population sizes.

Conclusions

Fenofibrate, but not bezafibrate is effective in reducing serum acid uric levels.",2015-09-15 +22820202,SpliceSeq: a resource for analysis and visualization of RNA-Seq data on alternative splicing and its functional impacts.,"

Summary

SpliceSeq is a resource for RNA-Seq data that provides a clear view of alternative splicing and identifies potential functional changes that result from splice variation. It displays intuitive visualizations and prioritized lists of results that highlight splicing events and their biological consequences. SpliceSeq unambiguously aligns reads to gene splice graphs, facilitating accurate analysis of large, complex transcript variants that cannot be adequately represented in other formats.

Availability and implementation

SpliceSeq is freely available at http://bioinformatics.mdanderson.org/main/SpliceSeq:Overview. The application is a Java program that can be launched via a browser or installed locally. Local installation requires MySQL and Bowtie.

Contact

mryan@insilico.us.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-20 +29084633,Examining Lead Exposures in California through State-Issued Health Alerts for Food Contamination and an Exposure-Based Candy Testing Program.,"

Summary

In California, the annual number of children under age 6 y of age with blood lead levels (BLL) ≥10μg/dL is estimated at over 1,000 cases, and up to 10,000 cases when BLL between 4.5 and 9.5 μg/dL are included. State-issued health alerts for food contamination provide one strategy for tracking sources of food-related lead exposures. As well, California passed legislation in 2006 for the Food and Drug Branch (FDB) of the state health department to test and identify lead in candy. This report presents health alert data from California over a 14-y period, compares data before and after the candy testing program began, and examines country of origin, ZIP code data, and time from candy testing to release of health alerts for lead-contaminated candies for 2011-2012. After 2007, health alerts issued for lead in candy and food increased significantly. Analysis of candy-testing data indicated that multiple counties and ZIP codes were affected. Seventeen candies with high lead concentrations were identified, resulting in rapid dissemination (<2wk) of health alerts to local health departments and community clinicians and to the public. Surveillance of lead exposures from state-based food and candy testing programs provides an opportunity to identify and immediately act to remove nonpaint sources of lead affecting children. https://doi.org/10.1289/EHP2582.",2017-10-26 +30004104,Karyotypes of Brazilian non-volant small mammals (Didelphidae and Rodentia): An online tool for accessing the chromosomal diversity.,"We have created a database system named CIPEMAB (CItogenética dos PEquenos MAmíferos Brasileiros) to assemble images of the chromosomes of Brazilian small mammals (Rodents and Marsupials). It includes karyotype information, such as diploid number, karyotype features, idiograms, and sexual chromosomes characteristics. CIPEMAB facilitates quick sharing of information on chromosome research among cytogeneticists as well as researchers in other fields. The database contains more than 300 microscopic images, including karyotypic images obtained from 182 species of small mammals from the literature. Researchers can browse the contents of the database online (http://www.citogenetica.ufes.br). The system enables users to locate images of interest by taxa, and to display the document with detailed information on species names, authors, year of the species publication, and karyotypes pictures in different colorations. CIPEMAB has a wide range of applications, such as comparing various karyotypes of Brazilian species and identifying manuscripts of interest.",2018-06-28 +29330539,Missing Value Imputation Approach for Mass Spectrometry-based Metabolomics Data.,"Missing values exist widely in mass-spectrometry (MS) based metabolomics data. Various methods have been applied for handling missing values, but the selection can significantly affect following data analyses. Typically, there are three types of missing values, missing not at random (MNAR), missing at random (MAR), and missing completely at random (MCAR). Our study comprehensively compared eight imputation methods (zero, half minimum (HM), mean, median, random forest (RF), singular value decomposition (SVD), k-nearest neighbors (kNN), and quantile regression imputation of left-censored data (QRILC)) for different types of missing values using four metabolomics datasets. Normalized root mean squared error (NRMSE) and NRMSE-based sum of ranks (SOR) were applied to evaluate imputation accuracy. Principal component analysis (PCA)/partial least squares (PLS)-Procrustes analysis were used to evaluate the overall sample distribution. Student's t-test followed by correlation analysis was conducted to evaluate the effects on univariate statistics. Our findings demonstrated that RF performed the best for MCAR/MAR and QRILC was the favored one for left-censored MNAR. Finally, we proposed a comprehensive strategy and developed a public-accessible web-tool for the application of missing value imputation in metabolomics ( https://metabolomics.cc.hawaii.edu/software/MetImp/ ).",2018-01-12 +28664177,Data set for fabrication of conformal two-dimensional TiO2 by atomic layer deposition using tetrakis (dimethylamino) titanium (TDMAT) and H2O precursors.,"The data and complementary information presented hare are related to the research article of ""http://dx.doi.org/10.1016/j.matdes.2017.02.016; Materials and Design 120 (2017) 99-108"" [1]. The article provides data and information on the case of atomic layer deposition (ALD) of ultra-thin two-dimensional TiO2 film. The chemical structure of precursors, and the fabrication process were illustrated. The data of spectral ellipsometric measurements and the methods of calculations were presented. Data of root mean square roughness and the average roughness of the ADL TiO2 film are presented. The method of bandgap measurements and the bandgap calculation are also explained in the present data article.",2017-06-09 +29077904,UFBoot2: Improving the Ultrafast Bootstrap Approximation.,"The standard bootstrap (SBS), despite being computationally intensive, is widely used in maximum likelihood phylogenetic analyses. We recently proposed the ultrafast bootstrap approximation (UFBoot) to reduce computing time while achieving more unbiased branch supports than SBS under mild model violations. UFBoot has been steadily adopted as an efficient alternative to SBS and other bootstrap approaches. Here, we present UFBoot2, which substantially accelerates UFBoot and reduces the risk of overestimating branch supports due to polytomies or severe model violations. Additionally, UFBoot2 provides suitable bootstrap resampling strategies for phylogenomic data. UFBoot2 is 778 times (median) faster than SBS and 8.4 times (median) faster than RAxML rapid bootstrap on tested data sets. UFBoot2 is implemented in the IQ-TREE software package version 1.6 and freely available at http://www.iqtree.org.",2018-02-01 +30192161,Evaluation and Optimization of Pharmacokinetic Models for in Vitro to in Vivo Extrapolation of Estrogenic Activity for Environmental Chemicals.,"

Background

To effectively incorporate in vitro data into regulatory use, confidence must be established in the quantitative extrapolation of in vitro activity to relevant end points in animals or humans.

Objective

Our goal was to evaluate and optimize in vitro to in vivo extrapolation (IVIVE) approaches using in vitro estrogen receptor (ER) activity to predict estrogenic effects measured in rodent uterotrophic studies.

Methods

We evaluated three pharmacokinetic (PK) models with varying complexities to extrapolate in vitro to in vivo dosimetry for a group of 29 ER agonists, using data from validated in vitro [U.S. Environmental Protection Agency (U.S. EPA) ToxCast™ ER model] and in vivo (uterotrophic) methods. In vitro activity values were adjusted using mass-balance equations to estimate intracellular exposure via an enrichment factor (EF), and steady-state model calculations were adjusted using fraction of unbound chemical in the plasma ([Formula: see text]) to approximate bioavailability. Accuracy of each model-adjustment combination was assessed by comparing model predictions with lowest effect levels (LELs) from guideline uterotrophic studies.

Results

We found little difference in model predictive performance based on complexity or route-specific modifications. Simple adjustments, applied to account for in vitro intracellular exposure (EF) or chemical bioavailability ([Formula: see text]), resulted in significant improvements in the predictive performance of all models.

Conclusion

Computational IVIVE approaches accurately estimate chemical exposure levels that elicit positive responses in the rodent uterotrophic bioassay. The simplest model had the best overall performance for predicting both oral (PPK_EF) and injection (PPK_[Formula: see text]) LELs from guideline uterotrophic studies, is freely available, and can be parameterized entirely using freely available in silico tools. https://doi.org/10.1289/EHP1655.",2018-09-01 +23153078,P2TF: a comprehensive resource for analysis of prokaryotic transcription factors.,"

Background

Transcription factors (TFs) are DNA-binding proteins that regulate gene expression by activating or repressing transcription. Some have housekeeping roles, while others regulate the expression of specific genes in response to environmental change. The majority of TFs are multi-domain proteins, and they can be divided into families according to their domain organisation. There is a need for user-friendly, rigorous and consistent databases to allow researchers to overcome the inherent variability in annotation between genome sequences.

Description

P2TF (Predicted Prokaryotic Transcription Factors) is an integrated and comprehensive database relating to transcription factor proteins. The current version of the database contains 372,877 TFs from 1,987 completely sequenced prokaryotic genomes and 43 metagenomes. The database provides annotation, classification and visualisation of TF genes and their genetic context, providing researchers with a one-stop shop in which to investigate TFs. The P2TF database analyses TFs in both predicted proteomes and reconstituted ORFeomes, recovering approximately 3% more TF proteins than just screening predicted proteomes. Users are able to search the database with sequence or domain architecture queries, and resulting hits can be aligned to investigate evolutionary relationships and conservation of residues. To increase utility, all searches can be filtered by taxonomy, TF genes can be added to the P2TF cart, and gene lists can be exported for external analysis in a variety of formats.

Conclusions

P2TF is an open resource for biologists, allowing exploration of all TFs within prokaryotic genomes and metagenomes. The database enables a variety of analyses, and results are presented for user exploration as an interactive web interface, which provides different ways to access and download the data. The database is freely available at http://www.p2tf.org/.",2012-11-15 +26582915,piRNA cluster database: a web resource for piRNA producing loci.,"Piwi proteins and their guiding small RNAs, termed Piwi-interacting (pi-) RNAs, are essential for silencing of transposons in the germline of animals. A substantial fraction of piRNAs originates from genomic loci termed piRNA clusters and sequences encoded in these piRNA clusters determine putative targets for the Piwi/piRNA system. In the past decade, studies of piRNA transcriptomes in different species revealed additional roles for piRNAs beyond transposon silencing, reflecting the astonishing plasticity of the Piwi/piRNA system along different phylogenetic branches. Moreover, piRNA transcriptomes can change drastically during development and vary across different tissues.Since piRNA clusters crucially shape piRNA profiles, analysis of these loci is imperative for a thorough understanding of functional and evolutionary aspects of the piRNA pathway. But despite the ever-growing amount of available piRNA sequence data, we know little about the factors that determine differential regulation of piRNA clusters, nor the evolutionary events that cause their gain or loss.In order to facilitate addressing these subjects, we established a user-friendly piRNA cluster database (http://www.smallrnagroup-mainz.de/piRNAclusterDB.html) that provides comprehensive data on piRNA clusters in multiple species, tissues and developmental stages based on small RNA sequence data deposited at NCBI's Sequence Read Archive (SRA).",2015-11-17 +29960763,Impact of antihypertensive agents on arterial stiffness in hypertensive patients.,"

Aims

The present network meta-analysis was performed to comprehensively compare the ability of different types of antihypertensive agents to ameliorate arterial stiffness in hypertensive patients.

Methods and results

To conduct this network meta-analysis, we searched PubMed, the Embase database, and the https://clinicaltrials.gov/ website for all relevant articles concerning clinical trials on hypertension therapy. The last search date was 10 August 2017. As a result, 28 eligible articles were enrolled in our meta-analysis. According to the included studies, there was no significant difference in pulse wave velocity (PWV) between these treatments. The eight types of antihypertension agents outperformed placebo in controlling systolic blood pressure (SBP). Angiotensin-converting enzyme inhibitor (ACEI) outperformed angiotensin II receptor blocker (ARB) in SBP; and angiotensin receptor-neprilysin inhibitor (ARNI) outperformed diuretic (D)in SBP.

Conclusions

This study found that the eight antihypertensive agents show obvious effect on reducing SBP other than arterial stiffness.",2018-06-27 +29945549,Comparative genome-wide characterization leading to simple sequence repeat marker development for Nicotiana.,"

Background

Simple sequence repeats (SSRs) are tandem repeats of DNA that have been used to develop robust genetic markers. These molecular markers are powerful tools for basic and applied studies such as molecular breeding. In the model plants in Nicotiana genus e.g. N. benthamiana, a comprehensive assessment of SSR content has become possible now because several Nicotiana genomes have been sequenced. We conducted a genome-wide SSR characterization and marker development across seven Nicotiana genomes.

Results

Here, we initially characterized 2,483,032 SSRs (repeat units of 1-10 bp) from seven genomic sequences of Nicotiana and developed SSR markers using the GMATA® software package. Of investigated repeat units, mono-, di- and tri-nucleotide SSRs account for 98% of all SSRs in Nicotiana. More complex SSR motifs, although rare, are highly variable between Nicotiana genomes. A total of 1,224,048 non-redundant Nicotiana (NIX) markers were developed, of which 99.98% are novel. An efficient and uniform genotyping protocol for NIX markers was developed and validated. We created a web-based database of NIX marker information including amplicon sizes of alleles in each genome for downloading and online analysis.

Conclusions

The present work constitutes the first deep characterization of SSRs in seven genomes of Nicotiana, and the development of NIX markers for these SSRs. Our online marker database and an efficient genotyping protocol facilitate the application of these markers. The NIX markers greatly expand Nicotiana marker resources, thus providing a useful tool for future research and breeding. We demonstrate a novel protocol for SSR marker development and utilization at the whole genome scale that can be applied to any lineage of organisms. The Tobacco Markers & Primers Database (TMPD) is available at http://biodb.sdau.edu.cn/tmpd/index.html.",2018-06-27 +29316788,SynBioHub: A Standards-Enabled Design Repository for Synthetic Biology.,"The SynBioHub repository ( https://synbiohub.org ) is an open-source software project that facilitates the sharing of information about engineered biological systems. SynBioHub provides computational access for software and data integration, and a graphical user interface that enables users to search for and share designs in a Web browser. By connecting to relevant repositories (e.g., the iGEM repository, JBEI ICE, and other instances of SynBioHub), the software allows users to browse, upload, and download data in various standard formats, regardless of their location or representation. SynBioHub also provides a central reference point for other resources to link to, delivering design information in a standardized format using the Synthetic Biology Open Language (SBOL). The adoption and use of SynBioHub, a community-driven effort, has the potential to overcome the reproducibility challenge across laboratories by helping to address the current lack of information about published designs.",2018-01-30 +26801957,ELASPIC web-server: proteome-wide structure-based prediction of mutation effects on protein stability and binding affinity.,"

Unlabelled

ELASPIC is a novel ensemble machine-learning approach that predicts the effects of mutations on protein folding and protein-protein interactions. Here, we present the ELASPIC webserver, which makes the ELASPIC pipeline available through a fast and intuitive interface. The webserver can be used to evaluate the effect of mutations on any protein in the Uniprot database, and allows all predicted results, including modeled wild-type and mutated structures, to be managed and viewed online and downloaded if needed. It is backed by a database which contains improved structural domain definitions, and a list of curated domain-domain interactions for all known proteins, as well as homology models of domains and domain-domain interactions for the human proteome. Homology models for proteins of other organisms are calculated on the fly, and mutations are evaluated within minutes once the homology model is available.

Availability and implementation

The ELASPIC webserver is available online at http://elaspic.kimlab.org

Contact

pm.kim@utoronto.ca or pi@kimlab.orgSupplementary data: Supplementary data are available at Bioinformatics online.",2016-01-21 +27153650,DisGeNET-RDF: harnessing the innovative power of the Semantic Web to explore the genetic basis of diseases.,

Motivation

DisGeNET-RDF makes available knowledge on the genetic basis of human diseases in the Semantic Web. Gene-disease associations (GDAs) and their provenance metadata are published as human-readable and machine-processable web resources. The information on GDAs included in DisGeNET-RDF is interlinked to other biomedical databases to support the development of bioinformatics approaches for translational research through evidence-based exploitation of a rich and fully interconnected linked open data.

Availability and implementation

http://rdf.disgenet.org/

Contact

support@disgenet.org.,2016-04-22 +30862380,Computational modeling of neuromuscular response to swing-phase robotic knee extension assistance in cerebral palsy.,"Predicting subject-specific responses to exoskeleton assistance may aid in maximizing functional gait outcomes, such as achieving full knee-extension at foot contact in individuals with crouch gait from cerebral palsy (CP). The purpose of this study was to investigate the role of volitional and non-volitional muscle activity in subject-specific responses to knee extension assistance during walking with an exoskeleton. We developed a simulation framework to predict responses to exoskeleton torque by applying a stretch-reflex spasticity model with muscle excitations computed during unassisted walking. The framework was validated with data collected from six individuals with CP. Framework-predicted knee angle at terminal swing was within 4 ± 4° (mean ± sd) of the knee angle measured experimentally without the addition of spasticity. Kinematic responses in two-thirds of the participants could be accurately modeled using only underlying muscle activity and the applied exoskeleton torque; incorporating hamstring spasticity was necessary to recreate the measured kinematics to within 1 ± 1° in the remaining participants. We observed strong positive linear relationships between knee extension and exoskeleton assistance, and strong negative quadratic relationships between knee extension and spasticity. We utilized our framework to identify optimal torque profiles necessary to achieve full knee-extension at foot contact. An angular impulse of 0.061 ± 0.025 Nm·s·kg-1·deg-1 with 0.013 ± 0.002 Nm·kg-1·deg-1 of peak torque and 4.1 ± 1.9 W·kg-1·deg-1 peak mechanical power was required to achieve full knee extension (values normalized by knee excursion). This framework may aid the prescription of exoskeleton control strategies in pathologies with muscle spasticity. https://simtk.org/projects/knee-exo-pred/.",2019-03-07 +25020248,Identification of novel dipeptidyl peptidase-IV and angiotensin-I-converting enzyme inhibitory peptides from meat proteins using in silico analysis.,"Angiotensin-I-converting enzyme (ACE-I, EC 3.4.15.1), renin (EC 3.4.23.15), and dipeptidyl peptidase-IV (DPP-IV, EC 3.4.14.5) play key roles in the control of hypertension and the development of type-2 diabetes and other diseases associated with metabolic syndrome. The aim of this work was to utilize known in silico methodologies, peptide databases and software including ProtParam (http://web.expasy.org/protparam/), Basic Local Alignment Tool (BLAST), ExPASy PeptideCutter (http://web.expasy.org/peptide_cutter/) and BIOPEP (http://www.uwm.edu.pl/biochemia/index.php/pl/biopep) to assess the release of potentially bioactive DPP-IV, renin and ACE-I inhibitory peptides from bovine and porcine meat proteins including hemoglobin, collagen and serum albumin. These proteins were chosen as they are found commonly in meat by-products such as bone, blood and low-value meat cuts. In addition, the bioactivities of identified peptides were confirmed using chemical synthesis and in vitro bioassays. The concentration of peptide required to inhibit the activity of ACE-I and DPP-IV by 50% was determined for selected, active peptides. Novel ACE-I and DPP-IV inhibitory peptides were identified in this study using both in silico analysis and a literature search to streamline enzyme selection for peptide production. These novel peptides included the ACE-I inhibitory tri-peptide Ile-Ile-Tyr and the DPP-IV inhibitory tri-peptide Pro-Pro-Leu corresponding to sequences f (182-184) and f (326-328) of both porcine and bovine serum albumin which can be released following hydrolysis with the enzymes papain and pepsin, respectively. This work demonstrates that meat proteins are a suitable resource for the generation of bioactive peptides and further demonstrates the usefulness of in silico methodologies to streamline identification and generation of bioactive peptides.",2014-07-12 +30446870,Hi-TOM: a platform for high-throughput tracking of mutations induced by CRISPR/Cas systems.,"The CRISPR/Cas system has been extensively applied to make precise genetic modifications in various organisms. Despite its importance and widespread use, large-scale mutation screening remains time-consuming, labour-intensive and costly. Here, we developed Hi-TOM (available at https://doi.org/www.hi-tom.net/hi-tom/ ), an online tool to track the mutations with precise percentage for multiple samples and multiple target sites. We also described a corresponding next-generation sequencing (NGS) library construction strategy by fixing the bridge sequences and barcoding primers. Analysis of the samples from rice, hexaploid wheat and human cells reveals that the Hi-TOM tool has high reliability and sensitivity in tracking various mutations, especially complex chimeric mutations frequently induced by genome editing. Hi-TOM does not require special design of barcode primers, cumbersome parameter configuration or additional data analysis. Thus, the streamlined NGS library construction and comprehensive result output make Hi-TOM particularly suitable for high-throughput identification of all types of mutations induced by CRISPR/Cas systems.",2018-11-13 +28365935,Selecting key genes associated with ovarian cancer based on differential expression network.,"

Purpose

The purpose in this study was to select key genes related to ovarian cancer.

Methods

The gene expression profiles of E-GEOD-6008, E-GEOD-26712, E-GEOD-27651, E-GEOD-14001 were obtained from ArrayExpress database (http://www.ebi.ac.uk/arrayexpress/). Following data recruitment and preprocessing, differentially expressed genes (DEGs) were characterized using Significance Analysis of Microarrays (SAM). Then, a differential expression network (DEN) was constructed using Cytoscape 2.1 software based on differential and non-differential interactions. Pathway analysis was performed based on the Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway database using Pathway Analysis with the nodes contained in the main DEN. Centrality analysis on the DEN was conducted to selected HUB genes. And last, western blot was performed on the selected genes in an independent sample set.

Results

A total of 370 samples (347 ovarian tumors and 23 controls) were selected. In all, 490 DEGs were obtained, which contained 59 upregulated and 431 downregulated genes. A DEN including 875 gene pairs (1028 nodes) was constructed. There were 7 pathways by analyzing the nodes contained in the main DEN. Five HUB genes were gained, and three (UBC, ELAVL1, SIRT1) were both HUB genes and disease genes. Meanwhile, SIRT1 and NEDD4 were downregulated genes. Verification experiments indicated that the expression of SIRT1 and ELAVL1 in the disease group and the normal group were significantly changed.

Conclusions

This study showed that SIRT1 could be chosen as a potential biomarker for promoting detection of ovarian cancer, so as to further understand the molecular pathogenesis of this disease.",2017-01-01 +26275895,Determining conserved metabolic biomarkers from a million database queries.,"

Motivation

Metabolite databases provide a unique window into metabolome research allowing the most commonly searched biomarkers to be catalogued. Omic scale metabolite profiling, or metabolomics, is finding increased utility in biomarker discovery largely driven by improvements in analytical technologies and the concurrent developments in bioinformatics. However, the successful translation of biomarkers into clinical or biologically relevant indicators is limited.

Results

With the aim of improving the discovery of translatable metabolite biomarkers, we present search analytics for over one million METLIN metabolite database queries. The most common metabolites found in METLIN were cross-correlated against XCMS Online, the widely used cloud-based data processing and pathway analysis platform. Analysis of the METLIN and XCMS common metabolite data has two primary implications: these metabolites, might indicate a conserved metabolic response to stressors and, this data may be used to gauge the relative uniqueness of potential biomarkers.

Availability and implementation

METLIN can be accessed by logging on to: https://metlin.scripps.edu

Contact

siuzdak@scripps.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-13 +29945655,DeepCRISPR: optimized CRISPR guide RNA design by deep learning.,"A major challenge for effective application of CRISPR systems is to accurately predict the single guide RNA (sgRNA) on-target knockout efficacy and off-target profile, which would facilitate the optimized design of sgRNAs with high sensitivity and specificity. Here we present DeepCRISPR, a comprehensive computational platform to unify sgRNA on-target and off-target site prediction into one framework with deep learning, surpassing available state-of-the-art in silico tools. In addition, DeepCRISPR fully automates the identification of sequence and epigenetic features that may affect sgRNA knockout efficacy in a data-driven manner. DeepCRISPR is available at http://www.deepcrispr.net/ .",2018-06-26 +30587581,Optimizing model representation for integrative structure determination of macromolecular assemblies.,"Integrative structure determination of macromolecular assemblies requires specifying the representation of the modeled structure, a scoring function for ranking alternative models based on diverse types of data, and a sampling method for generating these models. Structures are often represented at atomic resolution, although ad hoc simplified representations based on generic guidelines and/or trial and error are also used. In contrast, we introduce here the concept of optimizing representation. To illustrate this concept, the optimal representation is selected from a set of candidate representations based on an objective criterion that depends on varying amounts of information available for different parts of the structure. Specifically, an optimal representation is defined as the highest-resolution representation for which sampling is exhaustive at a precision commensurate with the precision of the representation. Thus, the method does not require an input structure and is applicable to any input information. We consider a space of representations in which a representation is a set of nonoverlapping, variable-length segments (i.e., coarse-grained beads) for each component protein sequence. We also implement a method for efficiently finding an optimal representation in our open-source Integrative Modeling Platform (IMP) software (https://integrativemodeling.org/). The approach is illustrated by application to three complexes of two subunits and a large assembly of 10 subunits. The optimized representation facilitates exhaustive sampling and thus can produce a more accurate model and a more accurate estimate of its uncertainty for larger structures than were possible previously.",2018-12-26 +27799469,Plant Reactome: a resource for plant pathways and comparative analysis.,"Plant Reactome (http://plantreactome.gramene.org/) is a free, open-source, curated plant pathway database portal, provided as part of the Gramene project. The database provides intuitive bioinformatics tools for the visualization, analysis and interpretation of pathway knowledge to support genome annotation, genome analysis, modeling, systems biology, basic research and education. Plant Reactome employs the structural framework of a plant cell to show metabolic, transport, genetic, developmental and signaling pathways. We manually curate molecular details of pathways in these domains for reference species Oryza sativa (rice) supported by published literature and annotation of well-characterized genes. Two hundred twenty-two rice pathways, 1025 reactions associated with 1173 proteins, 907 small molecules and 256 literature references have been curated to date. These reference annotations were used to project pathways for 62 model, crop and evolutionarily significant plant species based on gene homology. Database users can search and browse various components of the database, visualize curated baseline expression of pathway-associated genes provided by the Expression Atlas and upload and analyze their Omics datasets. The database also offers data access via Application Programming Interfaces (APIs) and in various standardized pathway formats, such as SBML and BioPAX.",2016-10-30 +30263912,Whole transcriptome sequence data of 5-FU sensitive and 5-FU resistant tumors generated in a mouse model of de novo carcinogenesis.,"We have performed whole transcriptome sequencing of 5-FU resistant and 5-FU sensitive tumors generated in a mouse model of de novo carcinogenesis that closely recapitulates tumor initiation, progression and maintenance in vivo. Tumors were generated using the DMBA/TPA model of chemically induced carcinogenesis [1], tumor-bearing mice were subsequently treated with 5-FU, and tumor growth as well as response to treatment was monitored by measuring tumor volume twice a week. Based on these measurements, we selected two 5-FU resistant and two 5-FU sensitive tumors and performed whole transcriptome sequencing and in order to identify differentially expressed transcripts between the two sets. Data obtained is deposited and available through NCBI SRA (reference number SRP155180 - https://www.ncbi.nlm.nih.gov/sra/?term=SRP155180).",2018-09-07 +26165917,The NITAG Resource Centre (NRC): One-stop shop towards a collaborative platform.,"It has long been acknowledged that there is little interaction between National Immunization Technical Advisory Groups (NITAGs) in the North and even less between those in the North and those in the South. Three international meetings of NITAGs recommended establishing an international network of NITAGs centred on a core functional structure and platform to facilitate future exchanges. The SIVAC Initiative (as part of a WHO Collaborating Center) followed-up with this recommendation, and launched an interactive platform involving all NITAGs worldwide in an active network and open collaboration: the NITAG Resource Center (NRC), accessible at http://www.nitag-resource.org. The NRC offers NITAG members and secretariats a centralized access to NITAG recommendations from around the world, systematic reviews, scientific publications, technical reports, updates from partners, and upcoming immunization events. A dedicated network manager will proactively update all contents through a strong network of regional and national focal points. The NRC is a first step towards a more fruitful and global collaboration between NITAGs.",2015-07-10 +29538768,Easy quantification of template-directed CRISPR/Cas9 editing.,"Template-directed CRISPR/Cas9 editing is a powerful tool for introducing subtle mutations in genomes. However, the success rate of incorporation of the desired mutations at the target site is difficult to predict and therefore must be empirically determined. Here, we adapted the widely used TIDE method for quantification of templated editing events, including point mutations. The resulting TIDER method is a rapid, cheap and accessible tool for testing and optimization of template-directed genome editing strategies. A free web tool for TIDER data analysis is available at http://tide.nki.nl.",2018-06-01 +26148241,"Known structure, unknown function: An inquiry-based undergraduate biochemistry laboratory course.","Undergraduate biochemistry laboratory courses often do not provide students with an authentic research experience, particularly when the express purpose of the laboratory is purely instructional. However, an instructional laboratory course that is inquiry- and research-based could simultaneously impart scientific knowledge and foster a student's research expertise and confidence. We have developed a year-long undergraduate biochemistry laboratory curriculum wherein students determine, via experiment and computation, the function of a protein of known three-dimensional structure. The first half of the course is inquiry-based and modular in design; students learn general biochemical techniques while gaining preparation for research experiments in the second semester. Having learned standard biochemical methods in the first semester, students independently pursue their own (original) research projects in the second semester. This new curriculum has yielded an improvement in student performance and confidence as assessed by various metrics. To disseminate teaching resources to students and instructors alike, a freely accessible Biochemistry Laboratory Education resource is available at http://biochemlab.org.",2015-07-06 +21803786,Tutorial videos of bioinformatics resources: online distribution trial in Japan named TogoTV.,"In recent years, biological web resources such as databases and tools have become more complex because of the enormous amounts of data generated in the field of life sciences. Traditional methods of distributing tutorials include publishing textbooks and posting web documents, but these static contents cannot adequately describe recent dynamic web services. Due to improvements in computer technology, it is now possible to create dynamic content such as video with minimal effort and low cost on most modern computers. The ease of creating and distributing video tutorials instead of static content improves accessibility for researchers, annotators and curators. This article focuses on online video repositories for educational and tutorial videos provided by resource developers and users. It also describes a project in Japan named TogoTV (http://togotv.dbcls.jp/en/) and discusses the production and distribution of high-quality tutorial videos, which would be useful to viewer, with examples. This article intends to stimulate and encourage researchers who develop and use databases and tools to distribute how-to videos as a tool to enhance product usability.",2011-07-29 +24917149,Update on the genomics and basic biology of Brachypodium: International Brachypodium Initiative (IBI).,"The scientific presentations at the First International Brachypodium Conference (abstracts available at http://www.brachy2013.unimore.it) are evidence of the widespread adoption of Brachypodium distachyon as a model system. Furthermore, the wide range of topics presented (genome evolution, roots, abiotic and biotic stress, comparative genomics, natural diversity, and cell walls) demonstrates that the Brachypodium research community has achieved a critical mass of tools and has transitioned from resource development to addressing biological questions, particularly those unique to grasses.",2014-06-07 +27634949,"Verdant: automated annotation, alignment and phylogenetic analysis of whole chloroplast genomes.","

Motivation

Chloroplast genomes are now produced in the hundreds for angiosperm phylogenetics projects, but current methods for annotation, alignment and tree estimation still require some manual intervention reducing throughput and increasing analysis time for large chloroplast systematics projects.

Results

Verdant is a web-based software suite and database built to take advantage a novel annotation program, annoBTD. Using annoBTD, Verdant provides accurate annotation of chloroplast genomes without manual intervention. Subsequent alignment and tree estimation can incorporate newly annotated and publically available plastomes and can accommodate a large number of taxa. Verdant sharply reduces the time required for analysis of assembled chloroplast genomes and removes the need for pipelines and software on personal hardware.

Availability and implementation

Verdant is available at: http://verdant.iplantcollaborative.org/plastidDB/ It is implemented in PHP, Perl, MySQL, Javascript, HTML and CSS with all major browsers supported.

Contact

mrmckain@gmail.comSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-14 +26442528,Genomes to natural products PRediction Informatics for Secondary Metabolomes (PRISM).,"Microbial natural products are an invaluable source of evolved bioactive small molecules and pharmaceutical agents. Next-generation and metagenomic sequencing indicates untapped genomic potential, yet high rediscovery rates of known metabolites increasingly frustrate conventional natural product screening programs. New methods to connect biosynthetic gene clusters to novel chemical scaffolds are therefore critical to enable the targeted discovery of genetically encoded natural products. Here, we present PRISM, a computational resource for the identification of biosynthetic gene clusters, prediction of genetically encoded nonribosomal peptides and type I and II polyketides, and bio- and cheminformatic dereplication of known natural products. PRISM implements novel algorithms which render it uniquely capable of predicting type II polyketides, deoxygenated sugars, and starter units, making it a comprehensive genome-guided chemical structure prediction engine. A library of 57 tailoring reactions is leveraged for combinatorial scaffold library generation when multiple potential substrates are consistent with biosynthetic logic. We compare the accuracy of PRISM to existing genomic analysis platforms. PRISM is an open-source, user-friendly web application available at http://magarveylab.ca/prism/.",2015-10-05 +29190839,"Violence Against Women: Injuries and Deaths in Rhode Island Yongwen Jiang, PhD; Deborah Debare, MMHS; Lynne-Marie Shea, BA; Samara Viner-Brown, MS.","Violence against women is a public health issue. Monitoring assault-related injury and homicide death among women is imperative for understanding this public health issue. We used data from the 2014 Rhode Island emergency department (ED), hospital discharge (HD), and 2004-2014 Rhode Island violent death reporting system (RIVDRS) to provide a broad picture for violence against women injuries and deaths in Rhode Island. ED visit and HD data show that the majority of female assault injuries occurred among women aged 25-44, resided in the core cities, and had public insurance. RIVDRS data showed that over half of the homicides among women were aged 25-64; nearly two in five were non-Hispanic black or Hispanic. Precipitating circumstances include intimate partner violence, a preceding argument or a conflict, and precipitated by another crime. Evidence-informed interventions need to target high-risk populations and urban areas to effectively reduce violence against women. [Full article available at http://rimed.org/rimedicaljournal-2017-12.asp].",2017-12-01 +21216771,The CCPN Metabolomics Project: a fast protocol for metabolite identification by 2D-NMR.,"

Summary

We present here the freely available Metabolomics Project resource specifically designed to work under the CcpNmr Analysis program produced by CCPN (Collaborative Computing Project for NMR) (Vranken et al., 2005, The CCPN data model for NMR spectroscopy: development of a software pipeline. Proteins, 59, 687-696). The project consists of a database of assigned 1D and 2D spectra of many common metabolites. The project aims to help the user to analyze and assign 1D and 2D NMR spectra of unknown metabolite mixtures. Spectra of unknown mixtures can be easily superimposed and compared with the database spectra, thus facilitating their assignment and identification.

Availability

The CCPN Metabolomics Project, together with an annotated example dataset, is freely available via: http://www.ccpn.ac.uk/metabolomics/.",2011-01-06 +26481361,Bovine Genome Database: new tools for gleaning function from the Bos taurus genome.,"We report an update of the Bovine Genome Database (BGD) (http://BovineGenome.org). The goal of BGD is to support bovine genomics research by providing genome annotation and data mining tools. We have developed new genome and annotation browsers using JBrowse and WebApollo for two Bos taurus genome assemblies, the reference genome assembly (UMD3.1.1) and the alternate genome assembly (Btau_4.6.1). Annotation tools have been customized to highlight priority genes for annotation, and to aid annotators in selecting gene evidence tracks from 91 tissue specific RNAseq datasets. We have also developed BovineMine, based on the InterMine data warehousing system, to integrate the bovine genome, annotation, QTL, SNP and expression data with external sources of orthology, gene ontology, gene interaction and pathway information. BovineMine provides powerful query building tools, as well as customized query templates, and allows users to analyze and download genome-wide datasets. With BovineMine, bovine researchers can use orthology to leverage the curated gene pathways of model organisms, such as human, mouse and rat. BovineMine will be especially useful for gene ontology and pathway analyses in conjunction with GWAS and QTL studies.",2015-10-19 +29068748,Recommendation system for immunization coverage and monitoring.,"Immunization averts an expected 2 to 3 million deaths every year from diphtheria, tetanus, pertussis (whooping cough), and measles; however, an additional 1.5 million deaths could be avoided if vaccination coverage was improved worldwide. 11 Data source for immunization records of 1.5 M: http://www.who.int/mediacentre/factsheets/fs378/en/ New vaccination technologies provide earlier diagnoses, personalized treatments and a wide range of other benefits for both patients and health care professionals. Childhood diseases that were commonplace less than a generation ago have become rare because of vaccines. However, 100% vaccination coverage is still the target to avoid further mortality. Governments have launched special campaigns to create an awareness of vaccination. In this paper, we have focused on data mining algorithms for big data using a collaborative approach for vaccination datasets to resolve problems with planning vaccinations in children, stocking vaccines, and tracking and monitoring non-vaccinated children appropriately. Geographical mapping of vaccination records helps to tackle red zone areas, where vaccination rates are poor, while green zone areas, where vaccination rates are good, can be monitored to enable health care staff to plan the administration of vaccines. Our recommendation algorithm assists in these processes by using deep data mining and by accessing records of other hospitals to highlight locations with lower rates of vaccination. The overall performance of the model is good. The model has been implemented in hospitals to control vaccination across the coverage area.",2017-11-10 +25982315,GraP: platform for functional genomics analysis of Gossypium raimondii.,"Cotton (Gossypium spp.) is one of the most important natural fiber and oil crops worldwide. Improvement of fiber yield and quality under changing environments attract much attention from cotton researchers; however, a functional analysis platform integrating omics data is still missing. The success of cotton genome sequencing and large amount of available transcriptome data allows the opportunity to establish a comprehensive analysis platform for integrating these data and related information. A comprehensive database, Platform of Functional Genomics Analysis in Gossypium raimondii (GraP), was constructed to provide multi-dimensional analysis, integration and visualization tools. GraP includes updated functional annotation, gene family classifications, protein-protein interaction networks, co-expression networks and microRNA-target pairs. Moreover, gene set enrichment analysis and cis-element significance analysis tools are also provided for gene batch analysis of high-throughput data sets. Based on these effective services, GraP may offer further information for subsequent studies of functional genes and in-depth analysis of high-throughput data. GraP is publically accessible at http://structuralbiology.cau.edu.cn/GraP/, with all data available for downloading.",2015-05-17 +25574136,POEAS: Automated Plant Phenomic Analysis Using Plant Ontology.,"Biological enrichment analysis using gene ontology (GO) provides a global overview of the functional role of genes or proteins identified from large-scale genomic or proteomic experiments. Phenomic enrichment analysis of gene lists can provide an important layer of information as well as cellular components, molecular functions, and biological processes associated with gene lists. Plant phenomic enrichment analysis will be useful for performing new experiments to better understand plant systems and for the interpretation of gene or proteins identified from high-throughput experiments. Plant ontology (PO) is a compendium of terms to define the diverse phenotypic characteristics of plant species, including plant anatomy, morphology, and development stages. Adoption of this highly useful ontology is limited, when compared to GO, because of the lack of user-friendly tools that enable the use of PO for statistical enrichment analysis. To address this challenge, we introduce Plant Ontology Enrichment Analysis Server (POEAS) in the public domain. POEAS uses a simple list of genes as input data and performs enrichment analysis using Ontologizer 2.0 to provide results in two levels, enrichment results and visualization utilities, to generate ontological graphs that are of publication quality. POEAS also offers interactive options to identify user-defined background population sets, various multiple-testing correction methods, different enrichment calculation methods, and resampling tests to improve statistical significance. The availability of such a tool to perform phenomic enrichment analyses using plant genes as a complementary resource will permit the adoption of PO-based phenomic analysis as part of analytical workflows. POEAS can be accessed using the URL http://caps.ncbs.res.in/poeas.",2014-12-21 +25256301,CMEIAS JFrad: a digital computing tool to discriminate the fractal geometry of landscape architectures and spatial patterns of individual cells in microbial biofilms.,"Image analysis of fractal geometry can be used to gain deeper insights into complex ecophysiological patterns and processes occurring within natural microbial biofilm landscapes, including the scale-dependent heterogeneities of their spatial architecture, biomass, and cell-cell interactions, all driven by the colonization behavior of optimal spatial positioning of organisms to maximize their efficiency in utilization of allocated nutrient resources. Here, we introduce CMEIAS JFrad, a new computing technology that analyzes the fractal geometry of complex biofilm architectures in digital landscape images. The software uniquely features a data-mining opportunity based on a comprehensive collection of 11 different mathematical methods to compute fractal dimension that are implemented into a wizard design to maximize ease-of-use for semi-automatic analysis of single images or fully automatic analysis of multiple images in a batch process. As examples of application, quantitative analyses of fractal dimension were used to optimize the important variable settings of brightness threshold and minimum object size in order to discriminate the complex architecture of freshwater microbial biofilms at multiple spatial scales, and also to differentiate the spatial patterns of individual bacterial cells that influence their cooperative interactions, resource use, and apportionment in situ. Version 1.0 of JFrad is implemented into a software package containing the program files, user manual, and tutorial images that will be freely available at http://cme.msu.edu/cmeias/. This improvement in computational image informatics will strengthen microscopy-based approaches to analyze the dynamic landscape ecology of microbial biofilm populations and communities in situ at spatial resolutions that range from single cells to microcolonies.",2014-09-26 +24214993,CPLM: a database of protein lysine modifications.,"We reported an integrated database of Compendium of Protein Lysine Modifications (CPLM; http://cplm.biocuckoo.org) for protein lysine modifications (PLMs), which occur at active ε-amino groups of specific lysine residues in proteins and are critical for orchestrating various biological processes. The CPLM database was updated from our previously developed database of Compendium of Protein Lysine Acetylation (CPLA), which contained 7151 lysine acetylation sites in 3311 proteins. Here, we manually collected experimentally identified substrates and sites for 12 types of PLMs, including acetylation, ubiquitination, sumoylation, methylation, butyrylation, crotonylation, glycation, malonylation, phosphoglycerylation, propionylation, succinylation and pupylation. In total, the CPLM database contained 203,972 modification events on 189,919 modified lysines in 45,748 proteins for 122 species. With the dataset, we totally identified 76 types of co-occurrences of various PLMs on the same lysine residues, and the most abundant PLM crosstalk is between acetylation and ubiquitination. Up to 53.5% of acetylation and 33.1% of ubiquitination events co-occur at 10 746 lysine sites. Thus, the various PLM crosstalks suggested that a considerable proportion of lysines were competitively and dynamically regulated in a complicated manner. Taken together, the CPLM database can serve as a useful resource for further research of PLMs.",2013-11-08 +29028265,StereoGene: rapid estimation of genome-wide correlation of continuous or interval feature data.,"

Motivation

Genomics features with similar genome-wide distributions are generally hypothesized to be functionally related, for example, colocalization of histones and transcription start sites indicate chromatin regulation of transcription factor activity. Therefore, statistical algorithms to perform spatial, genome-wide correlation among genomic features are required.

Results

Here, we propose a method, StereoGene, that rapidly estimates genome-wide correlation among pairs of genomic features. These features may represent high-throughput data mapped to reference genome or sets of genomic annotations in that reference genome. StereoGene enables correlation of continuous data directly, avoiding the data binarization and subsequent data loss. Correlations are computed among neighboring genomic positions using kernel correlation. Representing the correlation as a function of the genome position, StereoGene outputs the local correlation track as part of the analysis. StereoGene also accounts for confounders such as input DNA by partial correlation. We apply our method to numerous comparisons of ChIP-Seq datasets from the Human Epigenome Atlas and FANTOM CAGE to demonstrate its wide applicability. We observe the changes in the correlation between epigenomic features across developmental trajectories of several tissue types consistent with known biology and find a novel spatial correlation of CAGE clusters with donor splice sites and with poly(A) sites. These analyses provide examples for the broad applicability of StereoGene for regulatory genomics.

Availability and implementation

The StereoGene C ++ source code, program documentation, Galaxy integration scripts and examples are available from the project homepage http://stereogene.bioinf.fbb.msu.ru/.

Contact

favorov@sensi.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +26446136,The new protein topology graph library web server.,"

Summary

We present a new, extended version of the Protein Topology Graph Library web server. The Protein Topology Graph Library describes the protein topology on the super-secondary structure level. It allows to compute and visualize protein ligand graphs and search for protein structural motifs. The new server features additional information on ligand binding to secondary structure elements, increased usability and an application programming interface (API) to retrieve data, allowing for an automated analysis of protein topology.

Availability and implementation

The Protein Topology Graph Library server is freely available on the web at http://ptgl.uni-frankfurt.de. The website is implemented in PHP, JavaScript, PostgreSQL and Apache. It is supported by all major browsers. The VPLG software that was used to compute the protein ligand graphs and all other data in the database is available under the GNU public license 2.0 from http://vplg.sourceforge.net.

Contact

tim.schaefer@bioinformatik.uni-frankfurt.de; ina.koch@bioinformatik.uni-frankfurt.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-06 +29253653,A probabilistic atlas of human brainstem pathways based on connectome imaging data.,"The brainstem is a critical structure that regulates vital autonomic functions, houses the cranial nerves and their nuclei, relays motor and sensory information between the brain and spinal cord, and modulates cognition, mood, and emotions. As a primary relay center, the fiber pathways of the brainstem include efferent and afferent connections among the cerebral cortex, spinal cord, and cerebellum. While diffusion MRI has been successfully applied to map various brain pathways, its application for the in vivo imaging of the brainstem pathways has been limited due to inadequate resolution and large susceptibility-induced distortion artifacts. With the release of high-resolution data from the Human Connectome Project (HCP), there is increasing interest in mapping human brainstem pathways. Previous works relying on HCP data to study brainstem pathways, however, did not consider the prevalence (>80%) of large distortions in the brainstem even after the application of correction procedures from the HCP-Pipeline. They were also limited in the lack of adequate consideration of subject variability in either fiber pathways or region of interests (ROIs) used for bundle reconstruction. To overcome these limitations, we develop in this work a probabilistic atlas of 23 major brainstem bundles using high-quality HCP data passing rigorous quality control. For the large-scale data from the 500-Subject release of HCP, we conducted extensive quality controls to exclude subjects with severe distortions in the brainstem area. After that, we developed a systematic protocol to manually delineate 1300 ROIs on 20 HCP subjects (10 males; 10 females) for the reconstruction of fiber bundles using tractography techniques. Finally, we leveraged our novel connectome modeling techniques including high order fiber orientation distribution (FOD) reconstruction from multi-shell diffusion imaging and topography-preserving tract filtering algorithms to successfully reconstruct the 23 fiber bundles for each subject, which were then used to calculate the probabilistic atlases in the MNI152 space for public release. In our experimental results, we demonstrate that our method yielded anatomically faithful reconstruction of the brainstem pathways and achieved improved performance in comparison with an existing atlas of cerebellar peduncles based on HCP data. These atlases have been publicly released on NITRIC (https://www.nitrc.org/projects/brainstem_atlas/) and can be readily used by brain imaging researchers interested in studying brainstem pathways.",2017-12-16 +30540203,Phys-MAPS: a programmatic physiology assessment for introductory and advanced undergraduates.,"We describe the development of a new, freely available, online, programmatic-level assessment tool, Measuring Achievement and Progress in Science in Physiology, or Phys-MAPS ( http://cperl.lassp.cornell.edu/bio-maps ). Aligned with the conceptual frameworks of Core Principles of Physiology, and Vision and Change Core Concepts, Phys-MAPS can be used to evaluate student learning of core physiology concepts at multiple time points in an undergraduate physiology program, providing a valuable longitudinal tool to gain insight into student thinking and aid in the data-driven reform of physiology curricula. Phys-MAPS questions have a modified multiple true/false design and were developed using an iterative process, including student interviews and physiology expert review to verify scientific accuracy, appropriateness for physiology majors, and clarity. The final version of Phys-MAPS was tested with 2,600 students across 13 universities, has evidence of reliability, and has no significant statement biases. Over 90% of the physiology experts surveyed agreed that each Phys-MAPS statement was scientifically accurate and relevant to a physiology major. When testing each statement for bias, differential item functioning analysis demonstrated only a small effect size (<0.008) of any tested demographic variable. Regarding student performance, Phys-MAPS can also distinguish between lower and upper division students, both across different institutions (average overall scores increase with each level of class standing; two-way ANOVA, P < 0.001) and within each of three sample institutions (each ANOVA, P ≤ 0.001). Furthermore, at the level of individual concepts, only evolution and homeostasis do not demonstrate the typical increase across class standing, suggesting these concepts likely present consistent conceptual challenges for physiology students.",2019-03-01 +30902795,Elevated Heme Synthesis and Uptake Underpin Intensified Oxidative Metabolism and Tumorigenic Functions in Non-Small Cell Lung Cancer Cells.,"Tumors of human non-small cell lung cancer (NSCLC) are heterogeneous but exhibit elevated glycolysis and glucose oxidation relative to benign lung tissues. Heme is a central molecule for oxidative metabolism and ATP generation via mitochondrial oxidative phosphorylation (OXPHOS). Here, we showed that levels of heme synthesis and uptake, mitochondrial heme, oxygen-utilizing hemoproteins, oxygen consumption, ATP generation, and key mitochondrial biogenesis regulators were enhanced in NSCLC cells relative to nontumorigenic cells. Likewise, proteins and enzymes relating to heme and mitochondrial functions were upregulated in human NSCLC tissues relative to normal tissues. Engineered heme-sequestering peptides (HSP) reduced heme uptake, intracellular heme levels, and tumorigenic functions of NSCLC cells. Addition of heme largely reversed the effect of HSPs on tumorigenic functions. Furthermore, HSP2 significantly suppressed the growth of human NSCLC xenograft tumors in mice. HSP2-treated tumors exhibited reduced oxygen consumption rates (OCR) and ATP levels. To further verify the importance of heme in promoting tumorigenicity, we generated NSCLC cell lines with increased heme synthesis or uptake by overexpressing either the rate-limiting heme synthesis enzyme ALAS1 or uptake protein SLC48A1, respectively. These cells exhibited enhanced migration and invasion and accelerated tumor growth in mice. Notably, tumors formed by cells with increased heme synthesis or uptake also displayed elevated OCRs and ATP levels. These data show that elevated heme flux and function underlie enhanced OXPHOS and tumorigenicity of NSCLC cells. Targeting heme flux and function offers a potential strategy for developing therapies for lung cancer. SIGNIFICANCE: These findings show that elevated heme availability due to increased heme synthesis and uptake causes intensified oxygen consumption and ATP generation, promoting tumorigenic functions and tumor growth in NSCLC. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/79/10/2511/F1.large.jpg.",2019-03-22 +28086803,Compendium of TCDD-mediated transcriptomic response datasets in mammalian model systems.,"

Background

2,3,7,8-tetrachlorodibenzo-p-dioxin (TCDD) is the most potent congener of the dioxin class of environmental contaminants. Exposure to TCDD causes a wide range of toxic outcomes, ranging from chloracne to acute lethality. The severity of toxicity is highly dependent on the aryl hydrocarbon receptor (AHR). Binding of TCDD to the AHR leads to changes in transcription of numerous genes. Studies evaluating the transcriptional changes brought on by TCDD may provide valuable insight into the role of the AHR in human health and disease. We therefore compiled a collection of transcriptomic datasets that can be used to aid the scientific community in better understanding the transcriptional effects of ligand-activated AHR.

Results

Specifically, we have created a datasets package - TCDD.Transcriptomics - for the R statistical environment, consisting of 63 unique experiments comprising 377 samples, including various combinations of 3 species (human derived cell lines, mouse and rat), 4 tissue types (liver, kidney, white adipose tissue and hypothalamus) and a wide range of TCDD exposure times and doses. These datasets have been fully standardized using consistent preprocessing and annotation packages (available as of September 14, 2015). To demonstrate the utility of this R package, a subset of ""AHR-core"" genes were evaluated across the included datasets. Ahrr, Nqo1 and members of the Cyp family were significantly induced following exposure to TCDD across the studies as expected while Aldh3a1 was induced specifically in rat liver. Inmt was altered only in liver tissue and primarily by rat-AHR.

Conclusions

Analysis of the ""AHR-core"" genes demonstrates a continued need for studies surrounding the impact of AHR-activity on the transcriptome; genes believed to be consistently regulated by ligand-activated AHR show surprisingly little overlap across species and tissues. Until now, a comprehensive assessment of the transcriptome across these studies was challenging due to differences in array platforms, processing methods and annotation versions. We believe that this package, which is freely available for download ( http://labs.oicr.on.ca/boutros-lab/tcdd-transcriptomics ) will prove to be a highly beneficial resource to the scientific community evaluating the effects of TCDD exposure as well as the variety of functions of the AHR.",2017-01-13 +26323909,Aidi injection combined with radiation in the treatment of non-small cell lung cancer: A meta-analysis evaluation the efficacy and side effects.,"The purpose of this meta-analysis was to assess the clinical efficacy and side effects of Aidi injection combined with radiation in the treatment of non-small cell lung cancer (NSCLC).By searching PubMed, the Cochrane central register of controlled trials, EMBSE and CNKI databases, the efficacy and side effect data of Aidi injection combined with radiation in the treatment of NSCLC from the published clinical studies were collected. The data were pooled using Stata version 11.0 software (http://www.stata.com; Stata Corporation, College Station, TX).Ten clinical studies with 1084 subjects were included in this meta-analysis. The combined data showed the clinical efficacy in experiment group was higher than that of control group (risk ratio [RR] = 1.72, 95% confidence interval [CI]: 1.52-1.96, P = 0.00); four articles reported the life quality improvement. The pooled data showed that the use of Aidi can significantly improve the quality of life in the procedure of radiation (RR = 2.29, 95% CI: 1.76-2.98, P = 0.00); six studies reported the radiation toxicities. The pooled data showed that Aidi injection can significant decrease the radiation pneumonia (OR = 0.46, 95% CI: 0. 34-0.63), radiation esophagitis (OR = 0.53, 95% CI: 0.40-0.71), and marrow suppression (OR = 0.50, 95% CI: 0.42-0.59).Aidi injection can improve the clinical efficacy, quality of life, and decrease the radiation-related toxicities in NSCLC patients who received radiation.",2015-08-01 +30242341,Stealing Cookies in the Twenty-First Century: Measures of Spoken Narrative in Healthy Versus Speakers With Aphasia.,"Purpose Our goal was to evaluate an updated version of the ""Cookie Theft"" picture by obtaining norms based on picture descriptions by healthy controls for total content units (CUs), syllables per CU, and the ratio of left-right CUs. In addition, we aimed to compare these measures from healthy controls to picture descriptions obtained from individuals with poststroke aphasia and primary progressive aphasia (PPA) to assess whether these measures can capture impairments in content and efficiency of communication. Method Using an updated version of this picture, we analyzed descriptions from 50 healthy controls to develop norms for numbers of syllables, total CUs, syllables per CU, and left-right CU. We provide preliminary data from 44 individuals with aphasia (19 with poststroke aphasia and 25 with PPA). Results A total of 96 CUs were established based on the written transcriptions of spoken picture descriptions of the 50 control participants. There was a significant effect of group on total CUs, syllables, syllables per CU, and left-right CUs. The poststroke participants produced significantly fewer total CU and syllables than those with PPA. Each aphasic group produced significantly fewer total CUs, fewer syllables, more syllables per CU, and lower left-right CUs (indicating a right-sided bias) compared to controls. Conclusions Results show that the measures of numbers of syllables, total CUs, syllables per CU, and left-right CUs can distinguish language output of individuals with aphasia from controls and capture impairments in content and efficiency of communication. A limitation of this study is that we evaluated only 44 individuals with aphasia. In the future, we will evaluate other measures, such as CUs per minute, lexical variability, grammaticality, and ratio of nouns to verbs. Supplemental Material https://doi.org/10.23641/asha.7015223.",2019-03-01 +30651357,Acetylation of E2 by P300 Mediates Topoisomerase Entry at the Papillomavirus Replicon. ,"Human papillomavirus (HPV) E2 proteins are integral for the transcription of viral genes and the replication and maintenance of viral genomes in host cells. E2 recruits the viral DNA helicase E1 to the origin. A lysine (K111), highly conserved among almost all papillomavirus (PV) E2 proteins, is a target for P300 (EP300) acetylation and is critical for viral DNA replication (E. J. Quinlan, S. P. Culleton, S. Y. Wu, C. M. Chiang, et al., J Virol 87:1497-1507, 2013, https://doi.org/10.1128/JVI.02771-12; Y. Thomas and E. J. Androphy, J Virol 92:e01912-17, 2018, https://doi.org/10.1128/JVI.01912-17). Since the viral genome exists as a covalently closed circle of double-stranded DNA, topoisomerase 1 (Topo1) is thought to be required for progression of the replication forks. Due to the specific effect of K111 mutations on DNA unwinding (Y. Thomas and E. J. Androphy, J Virol 92:e01912-17, 2018, https://doi.org/10.1128/JVI.01912-17), we demonstrate that the E2 protein targets Topo1 to the viral origin, and this depends on acetylation of K111. The effect was corroborated by functional replication assays, in which higher levels of P300, but not its homolog CBP, caused enhanced replication with wild-type E2 but not the acetylation-defective K111 arginine mutant. These data reveal a novel role for lysine acetylation during viral DNA replication by regulating topoisomerase recruitment to the replication origin.IMPORTANCE Human papillomaviruses affect an estimated 75% of the sexually active adult population in the United States, with 5.5 million new cases emerging every year. More than 200 HPV genotypes have been identified; a subset of them are linked to the development of cancers from these epithelial infections. Specific antiviral medical treatments for infected individuals are not available. This project examines the mechanisms that control viral genome replication and may allow the development of novel therapeutics.",2019-03-21 +31072178,Conversation in Aphasia Across Communication Partners: Exploring Stability of Microlinguistic Measures and Communicative Success.,"Purpose The aim of this study was to determine if people with aphasia demonstrate differences in microlinguistic skills and communicative success in unstructured, nontherapeutic conversations with a home communication partner (Home-P) as compared to a speech-language pathologist communication partner (SLP-P). Method Eight persons with aphasia participated in 2 unstructured, nontherapeutic 15-minute conversations, 1 each with an unfamiliar SLP-P and a Home-P. Utterance-level analysis evaluated communicative success. Two narrow measures of lexical relevance and sentence frame were used to evaluate independent clauses. Two broad lexical and morphosyntactic measures were used to evaluate elliptical and dependent clauses and to evaluate independent clauses for errors beyond lexical relevance and sentence frame (such as phonological and morphosyntactic errors). Utterances were further evaluated for presence of behaviors indicating lexical retrieval difficulty (pauses, repetitions, and false starts) and for referential cohesion. Results No statistical differences occurred for communicative success or for any of the microlinguistic measures between the SLP-P and Home-P conversation conditions. Four measures (2 of lexical retrieval and 1 each of communicative success and grammaticality) showed high correlations across the 2 conversation samples. Individuals showed variation of no more than 10 percentage points between the 2 conversation conditions for 46 of 56 data points. Variation greater than 10 percentage points tended to occur for the measure of referential cohesion and primarily for 1 participant. Conclusions Preliminary findings suggest that these microlinguistic measures and communicative success have potential for reliable comparison across Home-P and SLP-P conversations, with the possible exception of referential cohesion. However, further research is needed with a larger, more diverse sample. These findings suggest future assessment and treatment implications for clinical and research needs. Supplemental Material https://doi.org/10.23641/asha.7616312.",2019-03-01 +29480213,NINDS Common Data Elements for Congenital Muscular Dystrophy Clinical Research: A National Institute for Neurological Disorders and Stroke Project.,"

Background

A Congenital Muscular Dystrophy (CMD) Working Group (WG) consisting of international experts reviewed common data elements (CDEs) previously developed for other neuromuscular diseases (NMDs) and made recommendations for all types of studies on CMD.

Objectives

To develop a comprehensive set of CDEs, data definitions, case report forms and guidelines for use in CMD clinical research to facilitate interoperability of data collection, as part of the CDE project at the National Institute of Neurological Disorders and Stroke (NINDS).

Methods

One working group composed of ten experts reviewed existing NINDS CDEs and outcome measures, evaluated the need for new elements, and provided recommendations for CMD clinical research. The recommendations were compiled, internally reviewed by the CMD working group, and posted online for external public comment. The CMD working group and the NIH CDE team reviewed the final version before release.

Results

The NINDS CMD CDEs and supporting documents are publicly available on the NINDS CDE website (https://www.commondataelements.ninds.nih.gov/CMD.aspx#tab=Data_Standards). Content areas include demographics, social status, health history, physical examination, diagnostic tests, and guidelines for a variety of specific outcomes and endpoints. The CMD CDE WG selected these documents from existing versions that were generated by other disease area working groups. Some documents were tailored to maximize their suitability for the CMD field.

Conclusions

Widespread use of CDEs can facilitate CMD clinical research and trial design, data sharing and retrospective analyses. The CDEs that are most relevant to CMD research are like those generated for other NMDs, and CDE documents tailored to CMD are now available to the public. The existence of a single source for these documents facilitates their use in research studies and offers a clear mechanism for the discussion and update of the information as knowledge is gained.",2018-01-01 +28968749,Omics AnalySIs System for PRecision Oncology (OASISPRO): a web-based omics analysis tool for clinical phenotype prediction.,"

Summary

Precision oncology is an approach that accounts for individual differences to guide cancer management. Omics signatures have been shown to predict clinical traits for cancer patients. However, the vast amount of omics information poses an informatics challenge in systematically identifying patterns associated with health outcomes, and no general purpose data mining tool exists for physicians, medical researchers and citizen scientists without significant training in programming and bioinformatics. To bridge this gap, we built the Omics AnalySIs System for PRecision Oncology (OASISPRO), a web-based system to mine the quantitative omics information from The Cancer Genome Atlas (TCGA). This system effectively visualizes patients' clinical profiles, executes machine-learning algorithms of choice on the omics data and evaluates the prediction performance using held-out test sets. With this tool, we successfully identified genes strongly associated with tumor stage, and accurately predicted patients' survival outcomes in many cancer types, including adrenocortical carcinoma. By identifying the links between omics and clinical phenotypes, this system will facilitate omics studies on precision cancer medicine and contribute to establishing personalized cancer treatment plans.

Availability and implementation

This web-based tool is available at http://tinyurl.com/oasispro; source codes are available at http://tinyurl.com/oasisproSourceCode.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +29410200,A web platform for the network analysis of high-throughput data in melanoma and its use to investigate mechanisms of resistance to anti-PD1 immunotherapy.,"Cellular phenotypes are established and controlled by complex and precisely orchestrated molecular networks. In cancer, mutations and dysregulations of multiple molecular factors perturb the regulation of these networks and lead to malignant transformation. High-throughput technologies are a valuable source of information to establish the complex molecular relationships behind the emergence of malignancy, but full exploitation of this massive amount of data requires bioinformatics tools that rely on network-based analyses. In this report we present the Virtual Melanoma Cell, an online tool developed to facilitate the mining and interpretation of high-throughput data on melanoma by biomedical researches. The platform is based on a comprehensive, manually generated and expert-validated regulatory map composed of signaling pathways important in malignant melanoma. The Virtual Melanoma Cell is a tool designed to accept, visualize and analyze user-generated datasets. It is available at: https://www.vcells.net/melanoma. To illustrate the utilization of the web platform and the regulatory map, we have analyzed a large publicly available dataset accounting for anti-PD1 immunotherapy treatment of malignant melanoma patients.",2018-02-23 +,Biology and Diseases of Rats,"The laboratory rat, Rattus norvegicus, is within the order Rodentia and family Muridae. The genus Rattus contains at least 56 species (retrieved January 28, 2014, from the Integrated Taxonomic Information System online database http://www.itis.gov); however, the Norway rat, R. norvegicus, and the black rat, R. rattus, are the two species most commonly associated with the genus. Rattus rattus preceded R. norvegicus in migration from Asia to Europe and the Americas by several hundred years. The former species reached Europe in the 12th century, and the Americas in the 16th century; whereas, R. norvegicus emerged in the 18th century in Europe and in the 19th century in the Western Hemisphere. Globally, the Norway rat has largely displaced the black rat, probably because of the Norway rat’s larger size and aggressiveness. The domestication and introduction of the albino R. norvegicus is rooted by its use in Europe and America in the 1800s as prey for a sport (rat baiting) in which individuals would wager on which terrier dog would most swiftly kill the largest number of rats confined to a pit. Because of the large numbers of rats needed for this sport, wild rats were purpose-bred, and albinos were selected out by some people as a hobby (Robinson, 1965; Mayhew, 1851).",2015-01-01 +21210251,"BTECH: a platform to integrate genomic, transcriptomic and epigenomic alterations in brain tumors.","The identification of molecular signatures predictive of clinical behavior and outcome in brain tumors has been the focus of many studies in the recent years. Despite the wealth of data that are available in the public domain on alterations in the genome, epigenome and transcriptome of brain tumors, the underlying molecular mechanisms leading to tumor initiation and progression remain largely unknown. Unfortunately, most of these data are scattered in multiple databases and supplementary materials of publications, thus making their retrieval, evaluation, comparison and visualization a rather arduous task. Here we report the development and implementation of an open access database (BTECH), a community resource for the deposition of a wide range of molecular data derived from brain tumor studies. This comprehensive database integrates multiple datasets, including transcript profiles, epigenomic CpG methylation data, DNA copy number alterations and structural chromosomal rearrangements, tumor-associated gene lists, SNPs, genomic features concerning Alu repeats and general genomic annotations. A genome browser has also been developed that allows for the simultaneous visualization of the different datasets and the various annotated features. Besides enabling an integrative view of diverse datasets through the genome browser, we also provide links to the original references for users to have a more accurate understanding of each specific dataset. This integrated platform will facilitate uncovering interactions among genetic and epigenetic factors associated with brain tumor development. BTECH is freely available at http://cmbteg.childrensmemorial.org/.",2011-03-01 +29914348,Predicting drug-disease associations by using similarity constrained matrix factorization.,"

Background

Drug-disease associations provide important information for the drug discovery. Wet experiments that identify drug-disease associations are time-consuming and expensive. However, many drug-disease associations are still unobserved or unknown. The development of computational methods for predicting unobserved drug-disease associations is an important and urgent task.

Results

In this paper, we proposed a similarity constrained matrix factorization method for the drug-disease association prediction (SCMFDD), which makes use of known drug-disease associations, drug features and disease semantic information. SCMFDD projects the drug-disease association relationship into two low-rank spaces, which uncover latent features for drugs and diseases, and then introduces drug feature-based similarities and disease semantic similarity as constraints for drugs and diseases in low-rank spaces. Different from the classic matrix factorization technique, SCMFDD takes the biological context of the problem into account. In computational experiments, the proposed method can produce high-accuracy performances on benchmark datasets, and outperform existing state-of-the-art prediction methods when evaluated by five-fold cross validation and independent testing.

Conclusion

We developed a user-friendly web server by using known associations collected from the CTD database, available at http://www.bioinfotech.cn/SCMFDD/ . The case studies show that the server can find out novel associations, which are not included in the CTD database.",2018-06-19 +28379348,BEESEM: estimation of binding energy models using HT-SELEX data.,"

Motivation

Characterizing the binding specificities of transcription factors (TFs) is crucial to the study of gene expression regulation. Recently developed high-throughput experimental methods, including protein binding microarrays (PBM) and high-throughput SELEX (HT-SELEX), have enabled rapid measurements of the specificities for hundreds of TFs. However, few studies have developed efficient algorithms for estimating binding motifs based on HT-SELEX data. Also the simple method of constructing a position weight matrix (PWM) by comparing the frequency of the preferred sequence with single-nucleotide variants has the risk of generating motifs with higher information content than the true binding specificity.

Results

We developed an algorithm called BEESEM that builds on a comprehensive biophysical model of protein-DNA interactions, which is trained using the expectation maximization method. BEESEM is capable of selecting the optimal motif length and calculating the confidence intervals of estimated parameters. By comparing BEESEM with the published motifs estimated using the same HT-SELEX data, we demonstrate that BEESEM provides significant improvements. We also evaluate several motif discovery algorithms on independent PBM and ChIP-seq data. BEESEM provides significantly better fits to in vitro data, but its performance is similar to some other methods on in vivo data under the criterion of the area under the receiver operating characteristic curve (AUROC). This highlights the limitations of the purely rank-based AUROC criterion. Using quantitative binding data to assess models, however, demonstrates that BEESEM improves on prior models.

Availability and implementation

Freely available on the web at http://stormo.wustl.edu/resources.html .

Contact

stormo@wustl.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +22102592,The Human OligoGenome Resource: a database of oligonucleotide capture probes for resequencing target regions across the human genome.,"Recent exponential growth in the throughput of next-generation DNA sequencing platforms has dramatically spurred the use of accessible and scalable targeted resequencing approaches. This includes candidate region diagnostic resequencing and novel variant validation from whole genome or exome sequencing analysis. We have previously demonstrated that selective genomic circularization is a robust in-solution approach for capturing and resequencing thousands of target human genome loci such as exons and regulatory sequences. To facilitate the design and production of customized capture assays for any given region in the human genome, we developed the Human OligoGenome Resource (http://oligogenome.stanford.edu/). This online database contains over 21 million capture oligonucleotide sequences. It enables one to create customized and highly multiplexed resequencing assays of target regions across the human genome and is not restricted to coding regions. In total, this resource provides 92.1% in silico coverage of the human genome. The online server allows researchers to download a complete repository of oligonucleotide probes and design customized capture assays to target multiple regions throughout the human genome. The website has query tools for selecting and evaluating capture oligonucleotides from specified genomic regions.",2011-11-18 +30266837,End-Stage Kidney Disease following Surgical Management of Kidney Cancer.,"

Background and objectives

We investigated the incidence of ESKD after surgical management of kidney cancer in the Australian state of Queensland, and described patterns in the initiation of kidney replacement therapy resulting from kidney cancer across Australia.

Design, setting, participants, & measurements

All newly diagnosed cases of kidney cancer in the Australian state of Queensland between January of 2009 and December of 2014 were ascertained through the Queensland Cancer Registry. There were 2739 patients included in our analysis. Patients who developed ESKD were identified using international classification of disease-10-coded hospital administrative data. Incidence rate and 3-year cumulative incidence were calculated, and multivariable Cox proportional hazards models were used to identify factors associated with ESKD. Additional descriptive analysis was undertaken of Australian population data.

Results

The incidence rate of ESKD in all patients was 4.9 (95% confidence interval [95% CI], 3.9 to 6.2) per 1000 patient-years. The 3-year cumulative incidence was 1.7%, 1.9%, and 1.0% for all patients, and patients managed with radical or partial nephrectomy, respectively. Apart from preoperative kidney disease, exposures associated with increased ESKD risk were age≥65 years (adjusted hazard ratio [aHR], 2.0; 95% CI, 1.2 to 3.2), male sex (aHR, 2.3; 95% CI, 1.3 to 4.3), preoperative diabetes (aHR, 1.8; 95% CI, 1.0 to 3.3), American Society of Anesthesiologists classification ≥3 (aHR, 4.0; 95% CI, 2.2 to 7.4), socioeconomic disadvantage (aHR, 1.6; 95% CI, 0.9 to 2.7), and postoperative length of hospitalization ≥6 days (aHR, 2.1; 95% CI, 1.4 to 3.0). Australia-wide trends indicate that the rate of kidney replacement therapy after oncologic nephrectomy doubled between 1995 and 2015, from 0.3 to 0.6 per 100,000 per year.

Conclusions

In Queensland between 2009 and 2014, one in 53 patients managed with radical nephrectomy and one in 100 patients managed with partial nephrectomy developed ESKD within 3 years of surgery.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2018_09_28_CJASNPodcast_18_1_.mp3.",2018-09-28 +29165610,Gramene 2018: unifying comparative genomics and pathway resources for plant research.,"Gramene (http://www.gramene.org) is a knowledgebase for comparative functional analysis in major crops and model plant species. The current release, #54, includes over 1.7 million genes from 44 reference genomes, most of which were organized into 62,367 gene families through orthologous and paralogous gene classification, whole-genome alignments, and synteny. Additional gene annotations include ontology-based protein structure and function; genetic, epigenetic, and phenotypic diversity; and pathway associations. Gramene's Plant Reactome provides a knowledgebase of cellular-level plant pathway networks. Specifically, it uses curated rice reference pathways to derive pathway projections for an additional 66 species based on gene orthology, and facilitates display of gene expression, gene-gene interactions, and user-defined omics data in the context of these pathways. As a community portal, Gramene integrates best-of-class software and infrastructure components including the Ensembl genome browser, Reactome pathway browser, and Expression Atlas widgets, and undergoes periodic data and software upgrades. Via powerful, intuitive search interfaces, users can easily query across various portals and interactively analyze search results by clicking on diverse features such as genomic context, highly augmented gene trees, gene expression anatomograms, associated pathways, and external informatics resources. All data in Gramene are accessible through both visual and programmatic interfaces.",2018-01-01 +26946289,dbPEC: a comprehensive literature-based database for preeclampsia related genes and phenotypes.,"Preeclampsia is one of the most common causes of fetal and maternal morbidity and mortality in the world. We built a Database for Preeclampsia (dbPEC) consisting of the clinical features, concurrent conditions, published literature and genes associated with Preeclampsia. We included gene sets associated with severity, concurrent conditions, tissue sources and networks. The published scientific literature is the primary repository for all information documenting human disease. We used semantic data mining to retrieve and extract the articles pertaining to preeclampsia-associated genes and performed manual curation. We deposited the articles, genes, preeclampsia phenotypes and other supporting information into the dbPEC. It is publicly available and freely accessible. Previously, we developed a database for preterm birth (dbPTB) using a similar approach. Using the gene sets in dbPTB, we were able to successfully analyze a genome-wide study of preterm birth including 4000 women and children. We identified important genes and pathways associated with preterm birth that were not otherwise demonstrable using genome-wide approaches. dbPEC serves not only as a resources for genes and articles associated with preeclampsia, it is a robust source of gene sets to analyze a wide range of high-throughput data for gene set enrichment analysis. Database URL: http://ptbdb.cs.brown.edu/dbpec/.",2016-03-05 +29896523,Time-course proteomics dataset to monitor protein-bound methionine oxidation in Bacillus cereus ATCC 14579.,"Aerobic respiratory growth generates endogenous reactive oxygen species (ROS). ROS oxidize protein-bound methionine residues into methionine sulfoxide. Methionine sulfoxide reductases catalyze the reduction of methionine sulfoxide to methionine in proteins. Here, we use high-throughput nanoLC-MS/MS methodology to establish detailed maps of oxidized proteins from Bacillus cereus ATCC 14579 ΔpBClin15 and its mutant for which the methionine sulfoxide reductase AB gene (msrAB) has been inactivated (Madeira et al., 2017) [1]. Lists of oxidized peptides and proteins identified at early exponential, late exponential and stationary growth phases are supplied in this article as data files. Raw data are deposited at the ProteomeXchange Consortium via the PRIDE partner repository with the dataset identifiers, PXD006169 and PDX006205 (http://www.ebi.ac/uk). Given the importance of methionine oxidation in several key cellular processes and its impact in the field of medical and food microbiology, this paper should be useful for further insightful redox studies in B. cereus and its numerous relatives.",2018-03-10 +26578557,LegumeIP 2.0--a platform for the study of gene function and genome evolution in legumes.,"The LegumeIP 2.0 database hosts large-scale genomics and transcriptomics data and provides integrative bioinformatics tools for the study of gene function and evolution in legumes. Our recent updates in LegumeIP 2.0 include gene and protein sequences, gene models and annotations, syntenic regions, protein families and phylogenetic trees for six legume species: Medicago truncatula, Glycine max (soybean), Lotus japonicus, Phaseolus vulgaris (common bean), Cicer arietinum (chickpea) and Cajanus cajan (pigeon pea) and two outgroup reference species: Arabidopsis thaliana and Poplar trichocarpa. Moreover, the LegumeIP 2.0 features the following new data resources and bioinformatics tools: (i) an integrative gene expression atlas for four model legumes that include 550 array hybridizations from M. truncatula, 962 gene expression profiles of G. max, 276 array hybridizations from L. japonicas and 56 RNA-Seq-based gene expression profiles for C. arietinum. These datasets were manually curated and hierarchically organized based on Experimental Ontology and Plant Ontology so that users can browse, search, and retrieve data for their selected experiments. (ii) New functions/analytical tools to query, mine and visualize large-scale gene sequences, annotations and transcriptome profiles. Users may select a subset of expression experiments and visualize and compare expression profiles for multiple genes. The LegumeIP 2.0 database is freely available to the public at http://plantgrn.noble.org/LegumeIP/.",2015-11-17 +26044712,"CATNAP: a tool to compile, analyze and tally neutralizing antibody panels.","CATNAP (Compile, Analyze and Tally NAb Panels) is a new web server at Los Alamos HIV Database, created to respond to the newest advances in HIV neutralizing antibody research. It is a comprehensive platform focusing on neutralizing antibody potencies in conjunction with viral sequences. CATNAP integrates neutralization and sequence data from published studies, and allows users to analyze that data for each HIV Envelope protein sequence position and each antibody. The tool has multiple data retrieval and analysis options. As input, the user can pick specific antibodies and viruses, choose a panel from a published study, or supply their own data. The output superimposes neutralization panel data, virus epidemiological data, and viral protein sequence alignments on one page, and provides further information and analyses. The user can highlight alignment positions, or select antibody contact residues and view position-specific information from the HIV databases. The tool calculates tallies of amino acids and N-linked glycosylation motifs, counts of antibody-sensitive and -resistant viruses in conjunction with each amino acid or N-glycosylation motif, and performs Fisher's exact test to detect potential positive or negative amino acid associations for the selected antibody. Website name: CATNAP (Compile, Analyze and Tally NAb Panels). Website address: http://hiv.lanl.gov/catnap.",2015-06-04 +21976737,Protein Data Bank Japan (PDBj): maintaining a structural data archive and resource description framework format.,"The Protein Data Bank Japan (PDBj, http://pdbj.org) is a member of the worldwide Protein Data Bank (wwPDB) and accepts and processes the deposited data of experimentally determined macromolecular structures. While maintaining the archive in collaboration with other wwPDB partners, PDBj also provides a wide range of services and tools for analyzing structures and functions of proteins, which are summarized in this article. To enhance the interoperability of the PDB data, we have recently developed PDB/RDF, PDB data in the Resource Description Framework (RDF) format, along with its ontology in the Web Ontology Language (OWL) based on the PDB mmCIF Exchange Dictionary. Being in the standard format for the Semantic Web, the PDB/RDF data provide a means to integrate the PDB with other biological information resources.",2011-10-05 +28991892,SCENIC: single-cell regulatory network inference and clustering.,"We present SCENIC, a computational method for simultaneous gene regulatory network reconstruction and cell-state identification from single-cell RNA-seq data (http://scenic.aertslab.org). On a compendium of single-cell data from tumors and brain, we demonstrate that cis-regulatory analysis can be exploited to guide the identification of transcription factors and cell states. SCENIC provides critical biological insights into the mechanisms driving cellular heterogeneity.",2017-10-09 +29513196,Phylogeny analysis from gene-order data with massive duplications.,"

Background

Gene order changes, under rearrangements, insertions, deletions and duplications, have been used as a new type of data source for phylogenetic reconstruction. Because these changes are rare compared to sequence mutations, they allow the inference of phylogeny further back in evolutionary time. There exist many computational methods for the reconstruction of gene-order phylogenies, including widely used maximum parsimonious methods and maximum likelihood methods. However, both methods face challenges in handling large genomes with many duplicated genes, especially in the presence of whole genome duplication.

Methods

In this paper, we present three simple yet powerful methods based on maximum-likelihood (ML) approaches that encode multiplicities of both gene adjacency and gene content information for phylogenetic reconstruction.

Results

Extensive experiments on simulated data sets show that our new method achieves the most accurate phylogenies compared to existing approaches. We also evaluate our method on real whole-genome data from eleven mammals. The package is publicly accessible at http://www.geneorder.org .

Conclusions

Our new encoding schemes successfully incorporate the multiplicity information of gene adjacencies and gene content into an ML framework, and show promising results in reconstruct phylogenies for whole-genome data in the presence of massive duplications.",2017-10-16 +24323624,SABRE2: a database connecting plant EST/full-length cDNA clones with Arabidopsis information.,"The SABRE (Systematic consolidation of Arabidopsis and other Botanical REsources) database cross-searches plant genetic resources through publicly available Arabidopsis information. In SABRE, plant expressed sequence tag (EST)/cDNA clones are related to TAIR (The Arabidoposis Information Resource) gene models and their annotations through sequence similarity. By entering a keyword, SABRE searches and retrieves TAIR gene models and annotations, together with homologous gene clones from various plant species. SABRE thus facilitates using TAIR annotations of Arabidopsis genes for research on homologous genes from other model plants. To expand the application range of SABRE to crop breeding, we have recently upgraded SABRE to SABRE2 (http://sabre.epd.brc.riken.jp/SABRE2.html), by newly adding six model plants (including the major crops barley, soybean, tomato and wheat), and by improving the retrieval interface. The present version has integrated information on >1.5 million plant EST/cDNA clones from the National BioResource Project (NBRP) of Japan. All clones are actual experimental resources from 14 plant species (Arabidoposis, barley, cassava, Chinese cabbage, lotus, morning glory, poplar, Physcomitrella patens, Striga hermonthica, soybean, Thellungiella halophila, tobacco, tomato and wheat), and are available from the core facilities of the NBRP. SABRE2 is thus a useful tool that can contribute towards the improvement of important crop breeds by connecting basic research and crop breeding.",2013-12-09 +,"A molecular phylogeny for the oldest (nonditrysian) lineages of extant Lepidoptera, with implications for classification, comparative morphology and life‐history evolution","Within the insect order Lepidoptera (moths and butterflies), the so‐called nonditrysian superfamilies are mostly species‐poor but highly divergent, offering numerous synapomorphies and strong morphological evidence for deep divergences. Uncertainties remain, however, and tests of the widely accepted morphological framework using other evidence are desirable. The goal of this paper is to test previous hypotheses of nonditrysian phylogeny against a data set consisting of 61 nonditrysian species plus 20 representative Ditrysia and eight outgroups (Trichoptera), nearly all sequenced for 19 nuclear genes (up to 14 700 bp total). We compare our results in detail with those from previous studies of nonditrysians, and review the morphological evidence for and against each grouping The major conclusions are as follows. (i) There is very strong support for Lepidoptera minus Micropterigidae and Agathiphagidae, here termed Angiospermivora, but no definitive resolution of the position of Agathiphagidae, although support is strongest for alliance with Micropterigidae, consistent with another recent molecular study. (ii) There is very strong support for Glossata, which excludes Heterobathmiidae, but weak support for relationships among major homoneurous clades. Eriocraniidae diverge first, corroborating the morphological clade Coelolepida, but the morphological clades Myoglossata and Neolepidoptera are never monophyletic in the molecular trees; both are contradicted by strong support for Lophocoronoidea + Hepialoidea, the latter here including Mnesarchaeoidea syn.n. (iii) The surprising grouping of Acanthopteroctetidae + Neopseustidae, although weakly supported here, is consistent with another recent molecular study. (iv) Heteroneura is very strongly supported, as is a basal split of this clade into Nepticuloidea + Eulepidoptera. Relationships within Nepticuloidea accord closely with recent studies based on fewer genes but many more taxa. (v) Eulepidoptera are split into a very strongly supported clade consisting of Tischeriidae + Palaephatidae + Ditrysia, here termed Euheteroneura, and a moderately supported clade uniting Andesianidae with Adeloidea. (vi) Relationships within Adeloidea are strongly resolved and Tridentaformidae fam.n. is described for the heretofore problematic genus Tridentaforma Davis, which is strongly supported in an isolated position within the clade. (vii) Within Euheteroneura, the molecular evidence is conflicting with respect to the sister group to Ditrysia, but strongly supports paraphyly of Palaephatidae. We decline to change the classification, however, because of strong morphological evidence supporting palaephatid monophyly. (viii) We review the life histories and larval feeding habits of all nonditrysian families and assess the implications of our results for hypotheses about early lepidopteran phytophagy. The first host record for Neopseustidae, which needs confirmation, suggests that larvae of this family may be parasitoids. This published work has been registered in ZooBank: http://zoobank.org/urn:lsid:zoobank.org:pub:C17BB79B‐EF8F‐4925‐AFA0‐2FEF8AC32876.",2015-10-01 +26531826,JASPAR 2016: a major expansion and update of the open-access database of transcription factor binding profiles.,"JASPAR (http://jaspar.genereg.net) is an open-access database storing curated, non-redundant transcription factor (TF) binding profiles representing transcription factor binding preferences as position frequency matrices for multiple species in six taxonomic groups. For this 2016 release, we expanded the JASPAR CORE collection with 494 new TF binding profiles (315 in vertebrates, 11 in nematodes, 3 in insects, 1 in fungi and 164 in plants) and updated 59 profiles (58 in vertebrates and 1 in fungi). The introduced profiles represent an 83% expansion and 10% update when compared to the previous release. We updated the structural annotation of the TF DNA binding domains (DBDs) following a published hierarchical structural classification. In addition, we introduced 130 transcription factor flexible models trained on ChIP-seq data for vertebrates, which capture dinucleotide dependencies within TF binding sites. This new JASPAR release is accompanied by a new web tool to infer JASPAR TF binding profiles recognized by a given TF protein sequence. Moreover, we provide the users with a Ruby module complementing the JASPAR API to ease programmatic access and use of the JASPAR collection of profiles. Finally, we provide the JASPAR2016 R/Bioconductor data package with the data of this release.",2015-11-03 +30225301,Proteomics dataset containing proteins that obscure identification of TOPLESS interactors in Arabidopsis.,"Here we report proteins identified after conducting Tandem Affinity Purification (TAP) of the TOPLESS (TPL) corepressor from Arabidopsis. We generated transgenic plants harboring TPL fused to the GS-TAG, ""Boosting tandem affinity purification of plant protein complexes"" (Van Leene et al., 2008) [1]. Four independent biological replicates of a selected TPL-GS-TAG line were grown simultaneously, crosslinked with formaldehyde, and proteins were isolated from whole plant tissue via TAP. Purified proteins were treated with trypsin, and the peptides were analyzed via mass spectrometry. Datasets are hosted in the MassIVE public repository (reference number: MSV000082477, https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?task=f16255fb7080426a9fe1926b4d3d5862). The data in this article has not been published elsewhere and is original to this work.",2018-08-29 +24128060,PT-Flax (phenotyping and TILLinG of flax): development of a flax (Linum usitatissimum L.) mutant population and TILLinG platform for forward and reverse genetics.,"

Background

Flax (Linum usitatissimum L.) is an economically important fiber and oil crop that has been grown for thousands of years. The genome has been recently sequenced and transcriptomics are providing information on candidate genes potentially related to agronomically-important traits. In order to accelerate functional characterization of these genes we have generated a flax EMS mutant population that can be used as a TILLinG (Targeting Induced Local Lesions in Genomes) platform for forward and reverse genetics.

Results

A population of 4,894 M2 mutant seed families was generated using 3 different EMS concentrations (0.3%, 0.6% and 0.75%) and used to produce M2 plants for subsequent phenotyping and DNA extraction. 10,839 viable M2 plants (4,033 families) were obtained and 1,552 families (38.5%) showed a visual developmental phenotype (stem size and diameter, plant architecture, flower-related). The majority of these families showed more than one phenotype. Mutant phenotype data are organised in a database and can be accessed and searched at UTILLdb (http://urgv.evry.inra.fr/UTILLdb). Preliminary screens were also performed for atypical fiber and seed phenotypes. Genomic DNA was extracted from 3,515 M2 families and eight-fold pooled for subsequent mutant detection by ENDO1 nuclease mis-match cleavage. In order to validate the collection for reverse genetics, DNA pools were screened for two genes coding enzymes of the lignin biosynthesis pathway: Coumarate-3-Hydroxylase (C3H) and Cinnamyl Alcohol Dehydrogenase (CAD). We identified 79 and 76 mutations in the C3H and CAD genes, respectively. The average mutation rate was calculated as 1/41 Kb giving rise to approximately 9,000 mutations per genome. Thirty-five out of the 52 flax cad mutant families containing missense or codon stop mutations showed the typical orange-brown xylem phenotype observed in CAD down-regulated/mutant plants in other species.

Conclusions

We have developed a flax mutant population that can be used as an efficient forward and reverse genetics tool. The collection has an extremely high mutation rate that enables the detection of large numbers of independant mutant families by screening a comparatively low number of M2 families. The population will prove to be a valuable resource for both fundamental research and the identification of agronomically-important genes for crop improvement in flax.",2013-10-15 +29293953,Ontological function annotation of long non-coding RNAs through hierarchical multi-label classification.,"

Motivation

Long non-coding RNAs (lncRNAs) are an enormous collection of functional non-coding RNAs. Over the past decades, a large number of novel lncRNA genes have been identified. However, most of the lncRNAs remain function uncharacterized at present. Computational approaches provide a new insight to understand the potential functional implications of lncRNAs.

Results

Considering that each lncRNA may have multiple functions and a function may be further specialized into sub-functions, here we describe NeuraNetL2GO, a computational ontological function prediction approach for lncRNAs using hierarchical multi-label classification strategy based on multiple neural networks. The neural networks are incrementally trained level by level, each performing the prediction of gene ontology (GO) terms belonging to a given level. In NeuraNetL2GO, we use topological features of the lncRNA similarity network as the input of the neural networks and employ the output results to annotate the lncRNAs. We show that NeuraNetL2GO achieves the best performance and the overall advantage in maximum F-measure and coverage on the manually annotated lncRNA2GO-55 dataset compared to other state-of-the-art methods.

Availability and implementation

The source code and data are available at http://denglab.org/NeuraNetL2GO/.

Contact

leideng@csu.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-05-01 +27683030,MicroRNA regulatory pathway analysis identifies miR-142-5p as a negative regulator of TGF-β pathway via targeting SMAD3.,"MicroRNAs (miRNAs) are non-coding RNAs with functions of posttranscriptional regulation. The abnormally expressed miRNAs have been shown to be crucial contributors and may serve as biomarkers in many diseases. However, determining the biological function of miRNAs is an ongoing challenge. By combining miRNA targets prediction, miRNA and mRNA expression profiles in TCGA cancers, and pathway data, we performed a miRNA-pathway regulation inference by Fisher's exact test for enrichment analysis. Then we constructed a database to show the cancer related miRNA-pathway regulatory network (http://bioinfo.life.hust.edu.cn/miR_path). As one of the miRNAs targeting many cancer related pathways, miR-142-5p potentially regulates the maximum number of genes in TGF-β signaling pathway. We experimentally confirmed that miR-142-5p directly targeted and suppressed SMAD3, a key component in TGF-β signaling. Ectopic overexpression of miR-142-5p significantly promoted tumor cell proliferation and inhibited apoptosis, while silencing of miR-142-5p inhibited the tumor cell proliferation and promoted apoptosis in vitro. These findings indicate that miR-142-5p plays as a negative regulator in TGF-β pathway by targeting SMAD3 and suppresses TGF-β-induced growth inhibition in cancer cells. Our study proved the feasibility of miRNA regulatory pathway analysis and shed light on combining bioinformatics with experiments in the research of complex diseases.",2016-11-01 +29618827,NIPTmer: rapid k-mer-based software package for detection of fetal aneuploidies.,"Non-invasive prenatal testing (NIPT) is a recent and rapidly evolving method for detecting genetic lesions, such as aneuploidies, of a fetus. However, there is a need for faster and cheaper laboratory and analysis methods to make NIPT more widely accessible. We have developed a novel software package for detection of fetal aneuploidies from next-generation low-coverage whole genome sequencing data. Our tool - NIPTmer - is based on counting pre-defined per-chromosome sets of unique k-mers from raw sequencing data, and applying linear regression model on the counts. Additionally, the filtering process used for k-mer list creation allows one to take into account the genetic variance in a specific sample, thus reducing the source of uncertainty. The processing time of one sample is less than 10 CPU-minutes on a high-end workstation. NIPTmer was validated on a cohort of 583 NIPT samples and it correctly predicted 37 non-mosaic fetal aneuploidies. NIPTmer has the potential to reduce significantly the time and complexity of NIPT post-sequencing analysis compared to mapping-based methods. For non-commercial users the software package is freely available at http://bioinfo.ut.ee/NIPTMer/ .",2018-04-04 +24378693,Executive summary: A Quick Reference Guide for Managing Fecal Incontinence (FI).,"This article is an executive summary of A Quick Reference Guide for Managing Fecal Incontinence (FI), which was published September 2013 by the WOCN Society's Continence Committee. The quick reference guide provides an overview of fecal incontinence and how it is commonly managed. The information has been compiled so that nurses can quickly access a wide array of information in a single resource to facilitate patient care and patient/staff education. The topics include a definition of FI, epidemiologic data, psychosocial impact, costs, a brief description of bowel physiology, causes of FI, skin complications, assessment, management, patient education, and recommendations for future research. The complete quick reference guide also includes 5 appendices. The appendices provide additional information about predisposing factors for FI, assessment tools, medications and foods that affect gastric or colonic motility, and examples of FI management systems/collectors. The complete quick reference guide is available in the Members-Only library at the WOCN Society's Web site (http://www.wocn.org), and it can also be purchased from the online bookstore at the Society's Web site. This article provides a synopsis of the key features contained in the complete quick reference guide.",2014-01-01 +28756161,ctPath: Demixing pathway crosstalk effect from transcriptomics data for differential pathway identification.,"Identifying differentially expressed pathways (DEPs) plays important roles in understanding tumor etiology and promoting clinical treatment of cancer or other diseases. By assuming gene expression to be a sparse non-negative linear combination of hidden pathway signals, we propose a pathway crosstalk-based transcriptomics data analysis method (ctPath) for identifying differentially expressed pathways. Biologically, pathways of different functions work in concert at the systematic level. The proposed method interrogates the crosstalks between pathways and discovers hidden pathway signals by mapping high-dimensional transcriptomics data into a low-dimensional pathway space. The resulted pathway signals reflect the activity level of pathways after removing pathway crosstalk effect and allow a robust identification of DEPs from inherently complex and noisy transcriptomics data. CtPath can also correct incomplete and inaccurate pathway annotations which frequently occur in public repositories. Experimental results on both simulation data and real-world cancer data demonstrate the superior performance of ctPath over other popular approaches. R code for ctPath is available for non-commercial use at the URL http://micblab.iim.ac.cn/Download/.",2017-07-27 +31319941,Evaluate Cutpoints: Adaptable continuous data distribution system for determining survival in Kaplan-Meier estimator.,"

Background and objective

Growing evidence of transcriptional and metabolomic differentiation induced many studies which analyze such differentiation in context of outcome of disease progression, treatment or influence of many different factors affecting cellular and tissue metabolism. Particularly, cancer researchers are looking for new biomarkers that can serve as a diagnostic/prognostic factor and its further corresponding relationship regarding clinical effects. As a result of the increasing interest in use of dichotomization of continuous variables involving clinical or epidemiological data (gene expression, biomarkers, biochemical parameters, etc.) there is a large demand for cutoff point determination tools with simultaneous lack of software offering stratification of patients based on continuous and binary variables. Therefore, we developed ""Evaluate Cutpoints"" application offering wide set of statistical and graphical methods for cutpoint optimization enabling stratification of population into two or three groups.

Methods

Application is based on R language including algorithms of packages such as survival, survMisc, OptimalCutpoints, maxstat, Rolr, ggplot2, GGally and plotly offering Kaplan-Meier plots and ROC curves with cutoff point determination.

Results

All capabilities of Evaluate Cutpoints were illustrated with example analysis of estrogen, progesterone and human epidermal growth factor 2 receptors in breast cancer cohort. Through ROC curve the cutoff points were established for expression of ESR1, PGR and ERBB2 in correlation with their immunohistochemical status (cutoff: 1301.253, 243.35, 11,434.438, respectively; sensitivity: 94%, 85%, 64%, respectively; specificity: 93%, 86%, 91%, respectively). Through disease-free survival analysis we divided patients into two and three groups regarding expression of ESR1, PGR and ERBB2. Example algorithm cutp showed that lowered expression of ESR1 and ERBB2 was more favorable (HR = 2.07, p = 0.0412; HR = 2.79, p = 0.0777, respectively), whereas heightened PGR expression was correlated with better prognosis (HR = 0.192, p = 0.0115).

Conclusions

This work presents application Evaluate Cutpoints that is freely available to download at http://wnbikp.umed.lodz.pl/Evaluate-Cutpoints/. Currently, many softwares are used to split continuous variables such as Cutoff Finder and X-Tile, which offer distinct algorithms. Unlike them, Evaluate Cutpoints allows not only dichotomization of populations into groups according to continuous variables and binary variables, but also stratification into three groups as well as manual selection of cutoff point thus preventing potential loss of information.",2019-05-23 +26851400,Software tools for simultaneous data visualization and T cell epitopes and disorder prediction in proteins.,"We have developed EpDis and MassPred, extendable open source software tools that support bioinformatic research and enable parallel use of different methods for the prediction of T cell epitopes, disorder and disordered binding regions and hydropathy calculation. These tools offer a semi-automated installation of chosen sets of external predictors and an interface allowing for easy application of the prediction methods, which can be applied either to individual proteins or to datasets of a large number of proteins. In addition to access to prediction methods, the tools also provide visualization of the obtained results, calculation of consensus from results of different methods, as well as import of experimental data and their comparison with results obtained with different predictors. The tools also offer a graphical user interface and the possibility to store data and the results obtained using all of the integrated methods in the relational database or flat file for further analysis. The MassPred part enables a massive parallel application of all integrated predictors to the set of proteins. Both tools can be downloaded from http://bioinfo.matf.bg.ac.rs/home/downloads.wafl?cat=Software. Appendix A includes the technical description of the created tools and a list of supported predictors.",2016-02-03 +21930248,HCVpro: hepatitis C virus protein interaction database.,"It is essential to catalog characterized hepatitis C virus (HCV) protein-protein interaction (PPI) data and the associated plethora of vital functional information to augment the search for therapies, vaccines and diagnostic biomarkers. In furtherance of these goals, we have developed the hepatitis C virus protein interaction database (HCVpro) by integrating manually verified hepatitis C virus-virus and virus-human protein interactions curated from literature and databases. HCVpro is a comprehensive and integrated HCV-specific knowledgebase housing consolidated information on PPIs, functional genomics and molecular data obtained from a variety of virus databases (VirHostNet, VirusMint, HCVdb and euHCVdb), and from BIND and other relevant biology repositories. HCVpro is further populated with information on hepatocellular carcinoma (HCC) related genes that are mapped onto their encoded cellular proteins. Incorporated proteins have been mapped onto Gene Ontologies, canonical pathways, Online Mendelian Inheritance in Man (OMIM) and extensively cross-referenced to other essential annotations. The database is enriched with exhaustive reviews on structure and functions of HCV proteins, current state of drug and vaccine development and links to recommended journal articles. Users can query the database using specific protein identifiers (IDs), chromosomal locations of a gene, interaction detection methods, indexed PubMed sources as well as HCVpro, BIND and VirusMint IDs. The use of HCVpro is free and the resource can be accessed via http://apps.sanbi.ac.za/hcvpro/ or http://cbrc.kaust.edu.sa/hcvpro/.",2011-09-09 +26381716,Y STR haplotype diversity in central Indian population.,"

Aims

Seventeen Y-STR loci (DYS19, DYS389I, DYS389II, DYS390, DYS391, DYS392, DYS393, DYS385a/b, DYS437, DYS438, DYS439, DYS448, DYS456, DYS458, DYS635 and Y-GATA-H4) were analysed in 173 males belonging to the central Indian population with the aim of studying genetic diversity and adding to the population database.

Methods

Multiplexed PCR amplifications of the 17 Y STR loci were performed using AmpFlSTR® Yfiler® Kit. Amplified products were genotyped using a multi capillary electrophoresis with POP-4 polymer in ABI Prism 3100 Genetic Analyzer. Population genetic diversity and allele frequencies were calculated. The haplotype data obtained in the study was compared with the Y-STR haplotypes reference database (YHRD, http://www.yhrd.org ) and with previously published population data using the AMOVA tool and visualised in two-dimensional multidimensional scaling (MDS) plots.

Results

A total of 147 haplotypes were observed, out of which 125 were unique. Haplotype diversity and discriminating capacity were found to be 0.9979 and 0.8497, respectively. The gene diversity at the loci ranged from 0.398-0.785. Genotype diversity at the locus DYS385a/b was found to be 0.869.

Conclusions

The population of central India was found to be significantly different (p < 0.05) when compared with populations from other parts of the Indian sub-continent and the population data of other countries. The population data generated in this study are useful for forensic, anthropological and demographic studies.",2015-09-18 +22397531,AtPAN: an integrated system for reconstructing transcriptional regulatory networks in Arabidopsis thaliana.,"

Background

Construction of transcriptional regulatory networks (TRNs) is of priority concern in systems biology. Numerous high-throughput approaches, including microarray and next-generation sequencing, are extensively adopted to examine transcriptional expression patterns on the whole-genome scale; those data are helpful in reconstructing TRNs. Identifying transcription factor binding sites (TFBSs) in a gene promoter is the initial step in elucidating the transcriptional regulation mechanism. Since transcription factors usually co-regulate a common group of genes by forming regulatory modules with similar TFBSs. Therefore, the combinatorial interactions of transcription factors must be modeled to reconstruct the gene regulatory networks. Description For systems biology applications, this work develops a novel database called Arabidopsis thaliana Promoter Analysis Net (AtPAN), capable of detecting TFBSs and their corresponding transcription factors (TFs) in a promoter or a set of promoters in Arabidopsis. For further analysis, according to the microarray expression data and literature, the co-expressed TFs and their target genes can be retrieved from AtPAN. Additionally, proteins interacting with the co-expressed TFs are also incorporated to reconstruct co-expressed TRNs. Moreover, combinatorial TFs can be detected by the frequency of TFBSs co-occurrence in a group of gene promoters. In addition, TFBSs in the conserved regions between the two input sequences or homologous genes in Arabidopsis and rice are also provided in AtPAN. The output results also suggest conducting wet experiments in the future.

Conclusions

The AtPAN, which has a user-friendly input/output interface and provide graphical view of the TRNs. This novel and creative resource is freely available online at http://AtPAN.itps.ncku.edu.tw/.",2012-03-08 +30627315,Development and validation of Portable Automated Rapid Testing (PART) measures for auditory research. ,"The current state of consumer-grade electronics means that researchers, clinicians, students, and members of the general public across the globe can create high-quality auditory stimuli using tablet computers, built-in sound hardware, and calibrated consumer-grade headphones. Our laboratories have created a free application that supports this work: PART (Portable Automated Rapid Testing). PART has implemented a range of psychoacoustical tasks including: spatial release from speech-on-speech masking, binaural sensitivity, gap discrimination, temporal modulation, spectral modulation, and spectrotemporal modulation (STM). Here, data from the spatial release and STM tasks are presented. Data were collected across the globe on tablet computers using applications available for free download, built-in sound hardware, and calibrated consumer-grade headphones. Spatial release results were as good or better than those obtained with standard laboratory methods. Spectrotemporal modulation thresholds were obtained rapidly and, for younger normal hearing listeners, were also as good or better than those in the literature. For older hearing impaired listeners, rapid testing resulted in similar thresholds to those reported in the literature. Listeners at five different testing sites produced very similar STM thresholds, despite a variety of testing conditions and calibration routines. Download Spatial Release, PART, and Listen: An Auditory Training Experience for free at https://bgc.ucr.edu/games/.",2018-05-01 +28069635,Evaluating approaches to find exon chains based on long reads.,"Transcript prediction can be modeled as a graph problem where exons are modeled as nodes and reads spanning two or more exons are modeled as exon chains. Pacific Biosciences third-generation sequencing technology produces significantly longer reads than earlier second-generation sequencing technologies, which gives valuable information about longer exon chains in a graph. However, with the high error rates of third-generation sequencing, aligning long reads correctly around the splice sites is a challenging task. Incorrect alignments lead to spurious nodes and arcs in the graph, which in turn lead to incorrect transcript predictions. We survey several approaches to find the exon chains corresponding to long reads in a splicing graph, and experimentally study the performance of these methods using simulated data to allow for sensitivity/precision analysis. Our experiments show that short reads from second-generation sequencing can be used to significantly improve exon chain correctness either by error-correcting the long reads before splicing graph creation, or by using them to create a splicing graph on which the long-read alignments are then projected. We also study the memory and time consumption of various modules, and show that accurate exon chains lead to significantly increased transcript prediction accuracy.

Availability

The simulated data and in-house scripts used for this article are available at http://www.cs.helsinki.fi/group/gsa/exon-chains/exon-chains-bib.tar.bz2.",2018-05-01 +27378301,Recognizing metal and acid radical ion-binding sites by integrating ab initio modeling with template-based transferals.,"

Motivation

More than half of proteins require binding of metal and acid radical ions for their structure and function. Identification of the ion-binding locations is important for understanding the biological functions of proteins. Due to the small size and high versatility of the metal and acid radical ions, however, computational prediction of their binding sites remains difficult.

Results

We proposed a new ligand-specific approach devoted to the binding site prediction of 13 metal ions (Zn2+, Cu2+, Fe2+, Fe3+, Ca2+, Mg2+, Mn2+, Na+, K+) and acid radical ion ligands (CO32-, NO2-, SO42-, PO43-) that are most frequently seen in protein databases. A sequence-based ab initio model is first trained on sequence profiles, where a modified AdaBoost algorithm is extended to balance binding and non-binding residue samples. A composite method IonCom is then developed to combine the ab initio model with multiple threading alignments for further improving the robustness of the binding site predictions. The pipeline was tested using 5-fold cross validations on a comprehensive set of 2,100 non-redundant proteins bound with 3,075 small ion ligands. Significant advantage was demonstrated compared with the state of the art ligand-binding methods including COACH and TargetS for high-accuracy ion-binding site identification. Detailed data analyses show that the major advantage of IonCom lies at the integration of complementary ab initio and template-based components. Ion-specific feature design and binding library selection also contribute to the improvement of small ion ligand binding predictions.

Availability and implementation

http://zhanglab.ccmb.med.umich.edu/IonCom CONTACT: hxz@imut.edu.cn or zhng@umich.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-04 +,Morphometric analysis and taxonomic revision of Anisopteromalus Ruschka (Hymenoptera: Chalcidoidea: Pteromalidae)� –� an integrative approach,"We use an integrative taxonomic approach to revise the genus Anisopteromalus. In particular, we apply multivariate ratio analysis (MRA), a rather new statistical method based on principal component analysis (PCA) and linear discriminant analysis (LDA), to numerous body measurements and combine the data with those from our molecular analysis of Cytb and ITS2 genetic markers (on a subset of species) and all available published data on morphology, karyology, behaviour, host associations and geographic distribution. We demonstrate that the analysis of quantitative characters using MRA plays a major role for the integration of name‐bearing types and thus for the association of taxa with names. Six species are recognized, of which two are new: A. cornis Baur sp.n. and A. quinarius Gokhman & Baur sp.n. For Anisopteromalus calandrae (Howard), a well‐known, cosmopolitan parasitoid of stored‐product pests, we have selected a neotype to foster continuity and stability in the application of this important name. The species was sometimes confused with the related A. quinarius sp.n., another cosmopolitan species that is frequently encountered in similar environments. We also show that several species originally described or later put under Anisopteromalus actually belong to different genera: Cyrtoptyx camerunus (Risbec) comb.n.; Meraporus glaber (Szelényi) comb.n.; Dinarmus schwenkei (Roomi, Khan & Khan) comb.n. Neocatolaccus indicus Ayyar & Mani is confirmed as a junior synonym of Oxysychus sphenopterae (Ferrière) syn.n. and Anisopteromalus calandrae brasiliensis (Domenichini) stat.rev. must be considered as a valid but doubtful taxon. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:BDFE96D3‐D0F4‐4012‐90F5‐9A087F7F5864.",2014-10-01 +28633385,TIminer: NGS data mining pipeline for cancer immunology and immunotherapy.,"

Summary

Recently, a number of powerful computational tools for dissecting tumor-immune cell interactions from next-generation sequencing data have been developed. However, the assembly of analytical pipelines and execution of multi-step workflows are laborious and involve a large number of intermediate steps with many dependencies and parameter settings. Here we present TIminer, an easy-to-use computational pipeline for mining tumor-immune cell interactions from next-generation sequencing data. TIminer enables integrative immunogenomic analyses, including: human leukocyte antigens typing, neoantigen prediction, characterization of immune infiltrates and quantification of tumor immunogenicity.

Availability and implementation

TIminer is freely available at http://icbi.i-med.ac.at/software/timiner/timiner.shtml.

Contact

zlatko.trajanoski@i-med.ac.at.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +27380722,The unique peptidome: Taxon-specific tryptic peptides as biomarkers for targeted metaproteomics.,"The Unique Peptide Finder (http://unipept.ugent.be/peptidefinder) is an interactive web application to quickly hunt for tryptic peptides that are unique to a particular species, genus, or any other taxon. Biodiversity within the target taxon is represented by a set of proteomes selected from a monthly updated list of complete and nonredundant UniProt proteomes, supplemented with proprietary proteomes loaded into persistent local browser storage. The software computes and visualizes pan and core peptidomes as unions and intersections of tryptic peptides occurring in the selected proteomes. In addition, it also computes and displays unique peptidomes as the set of all tryptic peptides that occur in all selected proteomes but not in any UniProt record not assigned to the target taxon. As a result, the unique peptides can serve as robust biomarkers for the target taxon, for example, in targeted metaproteomics studies. Computations are extremely fast since they are underpinned by the Unipept database, the lowest common ancestor algorithm implemented in Unipept and modern web technologies that facilitate in-browser data storage and parallel processing.",2016-08-16 +29592851,Development of a provisional essential medicines list for children in Canada: consensus process.,"

Background

Worldwide, many countries have developed a list of essential medicines for children to improve prescribing. We aimed to create an essential medicines list for children in Canada.

Methods

We adapted the previously created preliminary list of essential medicines for adults in Canada and the WHO Model List of Essential Medicines for Children to create a provisional list of essential medicines for children in Canada. Canadian clinicians made suggestions for changes. Literature relevant to each suggestion was presented to clinician-scientists, who used a modified nominal group technique to make recommendations on the suggestions. Ontario Public Drug Programs prescription data were reviewed to identify commonly prescribed medications missing from the list. Literature relevant to these medications was shared with a clinician-scientist review panel to determine which should be added, and a revised list was developed.

Results

A total of 76 items were removed from the list of essential medicines for adults in Canada because they were not indicated for use in children or were not relevant in the Canadian health care context; 7 medications were added to the child list based on Ontario Public Drugs Programs prescribing data and clinician-scientist review. Suggestions to add, remove or substitute medications were made by peer-reviewers and resulted in removal of 1 medication and replacement of 1 medication. The process produced a provisional list of 67 essential medications for children.

Interpretation

A provisional list of 67 essential medicines for children was created through a peer-reviewed, multistep process based on current clinical evidence, Canadian clinical practice guidelines and historical prescribing data. It is publicly posted at http://cleanmeds.ca/. The list should be further developed based on wider input and should be continuously revised based on emerging evidence of the safety and effectiveness of these medicines in all pediatric age groups.",2018-03-01 +26519912,eHALOPH a Database of Salt-Tolerant Plants: Helping put Halophytes to Work.,"eHALOPH (http://www.sussex.ac.uk/affiliates/halophytes/) is a database of salt-tolerant plants-halophytes. Records of plant species tolerant of salt concentrations of around 80 mM sodium chloride or more have been collected, along with data on plant type, life form, ecotypes, maximum salinity tolerated, the presence or absence of salt glands, photosynthetic pathway, antioxidants, secondary metabolites, compatible solutes, habitat, economic use and whether there are publications on germination, microbial interactions and mycorrhizal status, bioremediation and of molecular data. The database eHALOPH can be used in the analysis of traits associated with tolerance and for informing choice of species that might be used for saline agriculture, bioremediation or ecological restoration and rehabilitation of degraded wetlands or other areas.",2015-10-31 +,"Habitat Selection, the Included Niche, and Coexistence in Plant-Specialist Frogs from Madagascar","Classic niche partitioning suggests that coexistence among asymmetric competitors is facilitated by differential resource use. Coexistence is also possible, however, when a species only has access to resources that are shared with a competitor, providing it is the superior competitor on that resource (the ‘included niche'). To test predictions of these two coexistence mechanisms, we studied habitat selection of two closely related sympatric plant-specialist frogs from Madagascar (Guibemantis bicalcaratus and Guibemantis punctatus). Both species live and breed only in the water-filled leaf axils of Pandanus plants, and previous experiments with their tadpoles demonstrated asymmetric competition. In a 3-yr field study, we: (1) monitored the biotic and abiotic conditions of 348 plants; (2) surveyed these plants for frogs; (3) undertook a mark-recapture study; and (4) conducted an experiment where we manipulated the amount of detritus in plants. We identified several differences in the conditions of the habitats selected by both species (e.g., plant height, canopy cover over the plant, abundance of heterospecifics). Co-occurrence was nevertheless common, and G. punctatus persisted almost exclusively in plants it shared with G. bicalcaratus. As predicted by theory, G. punctatus was the superior competitor, at least in the tadpole stage. The inferior competitor (G. bicalcaratus) had access to exclusive resources not available to G. punctatus, by virtue of a faster developmental rate that permitted reproduction in lower quality plants. While there was some evidence of weak niche partitioning, we conclude that coexistence in these plant-specialist frogs is primarily via an included niche mechanism. Abstract in French is available at http://www.blackwell-synergy.com/loi/btp.",2011-01-01 +30705122,p300 Mediates Muscle Wasting in Lewis Lung Carcinoma.,"C/EBPβ is a key mediator of cancer-induced skeletal muscle wasting. However, the signaling mechanisms that activate C/EBPβ in the cancer milieu are poorly defined. Here, we report cancer-induced muscle wasting requires the transcriptional cofactor p300, which is critical for the activation of C/EBPβ. Conditioned media from diverse types of tumor cells as well as recombinant HSP70 and HSP90 provoked rapid acetylation of C/EBPβ in myotubes, particularly at its Lys39 residue. Overexpression of C/EBPβ with mutated Lys39 impaired Lewis lung carcinoma (LLC)-induced activation of the C/EBPβ-dependent catabolic response, which included upregulation of E3 ligases UBR2 and atrogin1/MAFbx, increased LC3-II, and loss of muscle proteins both in myotubes and mouse muscle. Silencing p300 in myotubes or overexpressing a dominant negative p300 mutant lacking acetyltransferase activity in mouse muscle attenuated LLC tumor-induced muscle catabolism. Administration of pharmacologic p300 inhibitor C646, but not PCAF/GCN5 inhibitor CPTH6, spared LLC tumor-bearing mice from muscle wasting. Furthermore, mice with muscle-specific p300 knockout were resistant to LLC tumor-induced muscle wasting. These data suggest that p300 is a key mediator of LLC tumor-induced muscle wasting whose acetyltransferase activity may be targeted for therapeutic benefit in this disease. SIGNIFICANCE: These findings demonstrate that tumor-induced muscle wasting in mice is abrogated by knockout, mutation of Lys39 or Asp1399, and pharmacologic inhibition of p300.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/79/7/1331/F1.large.jpg.",2019-01-31 +30590725,Heterodimeric DNA motif synthesis and validations.,"Bound by transcription factors, DNA motifs (i.e. transcription factor binding sites) are prevalent and important for gene regulation in different tissues at different developmental stages of eukaryotes. Although considerable efforts have been made on elucidating monomeric DNA motif patterns, our knowledge on heterodimeric DNA motifs are still far from complete. Therefore, we propose to develop a computational approach to synthesize a heterodimeric DNA motif from two monomeric DNA motifs. The approach is sequentially divided into two components (Phases A and B). In Phase A, we propose to develop the inference models on how two DNA monomeric motifs can be oriented and overlapped with each other at nucleotide level. In Phase B, given the two monomeric DNA motifs oriented, we further propose to develop DNA-binding family-specific input-output hidden Markov models (IOHMMs) to synthesize a heterodimeric DNA motif. To validate the approach, we execute and cross-validate it with the experimentally verified 618 heterodimeric DNA motifs across 49 DNA-binding family combinations. We observe that our approach can even ""rescue"" the existing heterodimeric DNA motif pattern (i.e. HOXB2_EOMES) previously published on Nature. Lastly, we apply the proposed approach to infer previously uncharacterized heterodimeric motifs. Their motif instances are supported by DNase accessibility, gene ontology, protein-protein interactions, in vivo ChIP-seq peaks, and even structural data from PDB. A public web-server is built for open accessibility and scientific impact. Its address is listed as follows: http://motif.cs.cityu.edu.hk/custom/MotifKirin.",2019-02-01 +30084960,PredMP: a web server for de novo prediction and visualization of membrane proteins.,"

Motivation

PredMP is the first web service, to our knowledge, that aims at de novo prediction of the membrane protein (MP) 3D structure followed by the embedding of the MP into the lipid bilayer for visualization. Our approach is based on a high-throughput Deep Transfer Learning (DTL) method that first predicts MP contacts by learning from non-MPs and then predicts the 3D model of the MP using the predicted contacts as distance restraints. This algorithm is derived from our previous Deep Learning (DL) method originally developed for soluble protein contact prediction, which has been officially ranked No. 1 in CASP12. The DTL framework in our approach overcomes the challenge that there are only a limited number of solved MP structures for training the deep learning model. There are three modules in the PredMP server: (i) The DTL framework followed by the contact-assisted folding protocol has already been implemented in RaptorX-Contact, which serves as the key module for 3D model generation; (ii) The 1D annotation module, implemented in RaptorX-Property, is used to predict the secondary structure and disordered regions; and (iii) the visualization module to display the predicted MPs embedded in the lipid bilayer guided by the predicted transmembrane topology.

Results

Tested on 510 non-redundant MPs, our server predicts correct folds for ∼290 MPs, which significantly outperforms existing methods. Tested on a blind and live benchmark CAMEO from September 2016 to January 2018, PredMP can successfully model all 10 MPs belonging to the hard category.

Availability and implementation

PredMP is freely accessed on the web at http://www.predmp.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +27465544,IGDD: a database of intronless genes in dicots.,"

Background

Intronless genes are a significant characteristic of prokaryotes. Systematic identification and annotation are primary and crucial steps for determining the functions of intronless genes and understanding their occurrence in eukaryotes.

Description

In this paper, we describe the construction of the Intronless Genes Database in Dicots (IGDD; available at http://bio.njfu.edu.cn/igdd/ ), which contains data for five well-annotated plants including Arabidopsis thaliana, Carica papaya, Populus trichocarpa, Salix suchowensis and Vitis vinifera. Using highly visual settings, IGDD displays the structural and functional annotations, the homolog groups, the syntenic relationships, the expression patterns, and the statistical characteristics of intronless genes. In addition, useful tools such as an advanced search and local BLAST are available through a user-friendly and intuitive web interface.

Conclusion

In conclusion, the IGDD provides a comprehensive and up-to-date platform for researchers to assist the exploration of intronless genes in dicot plants.",2016-07-27 +27681445,MPD3: a useful medicinal plants database for drug designing.,"Medicinal plants are the main natural pools for the discovery and development of new drugs. In the modern era of computer-aided drug designing (CADD), there is need of prompt efforts to design and construct useful database management system that allows proper data storage, retrieval and management with user-friendly interface. An inclusive database having information about classification, activity and ready-to-dock library of medicinal plant's phytochemicals is therefore required to assist the researchers in the field of CADD. The present work was designed to merge activities of phytochemicals from medicinal plants, their targets and literature references into a single comprehensive database named as Medicinal Plants Database for Drug Designing (MPD3). The newly designed online and downloadable MPD3 contains information about more than 5000 phytochemicals from around 1000 medicinal plants with 80 different activities, more than 900 literature references and 200 plus targets. The designed database is deemed to be very useful for the researchers who are engaged in medicinal plants research, CADD and drug discovery/development with ease of operation and increased efficiency. The designed MPD3 is a comprehensive database which provides most of the information related to the medicinal plants at a single platform. MPD3 is freely available at: http://bioinform.info .",2016-09-28 +29362557,Aphyllophoroid fungi in insular woodlands of eastern Ukraine.,"

Background

Fungi play crucial roles in ecosystems and are among the species-richest organism groups on Earth. However, knowledge on their occurrence lags behind the data for animals and plants. Recent analyses of fungal occurrence data from Western, Central and Northern Europe provided important insights into response of fungi to global warming. The consequences of the global changes for biodiversity on a larger geographical scale are not yet understood. Landscapes of Eastern Europe and particularly of eastern Ukraine, with their specific geological history, vegetation and climate, can add substantially new information about fungal diversity in Europe.

New information

We describe the dataset and provide a checklist of aphyllophoroid fungi (non-gilled macroscopic Basidiomycota) from eastern Ukraine sampled in 16 areas between 2007 and 2011. The dataset was managed on the PlutoF biodiversity workbench (http://dx.doi.org/10.15156/BIO/587471) and can also be accessed via Global Biodiversity Information Facility (GBIF, parts of datasets https://doi.org/10.15468/kuspj6 and https://doi.org/10.15468/h7qtfd). This dataset includes 3418 occurences, namely 2727 specimens and 691 observations of fructifications belonging to 349 species of fungi. With these data, the digitised CWU herbarium (V. N. Karazin Kharkiv National University, Ukraine) doubled in size A most detailed description of the substrate's properties and habitat for each record is provided. The specimen records are supplemented by 26 nuclear ribosomal DNA ITS sequences and six 28S sequences. Additionally, 287 photographs depicting diagnostic macro- and microscopic features of fungal fruitbodies as well as studied habitats are linked to the dataset. Most of the specimens have at least one mention in literature and relevant references are displayed as associated with specimen data. In total, 16 publication references are linked to the dataset. The dataset sheds new light on the fungal diversity of Eastern Europe. It is expected to complement other public sources of fungal occurrence information on continental and global levels in addressing macroecological and biogeographical questions.",2017-12-22 +24523465,Identifying Neisseria species by use of the 50S ribosomal protein L6 (rplF) gene.,"The comparison of 16S rRNA gene sequences is widely used to differentiate bacteria; however, this gene can lack resolution among closely related but distinct members of the same genus. This is a problem in clinical situations in those genera, such as Neisseria, where some species are associated with disease while others are not. Here, we identified and validated an alternative genetic target common to all Neisseria species which can be readily sequenced to provide an assay that rapidly and accurately discriminates among members of the genus. Ribosomal multilocus sequence typing (rMLST) using ribosomal protein genes has been shown to unambiguously identify these bacteria. The PubMLST Neisseria database (http://pubmlst.org/neisseria/) was queried to extract the 53 ribosomal protein gene sequences from 44 genomes from diverse species. Phylogenies reconstructed from these genes were examined, and a single 413-bp fragment of the 50S ribosomal protein L6 (rplF) gene was identified which produced a phylogeny that was congruent with the phylogeny reconstructed from concatenated ribosomal protein genes. Primers that enabled the amplification and direct sequencing of the rplF gene fragment were designed to validate the assay in vitro and in silico. Allele sequences were defined for the gene fragment, associated with particular species names, and stored on the PubMLST Neisseria database, providing a curated electronic resource. This approach provides an alternative to 16S rRNA gene sequencing, which can be readily replicated for other organisms for which more resolution is required, and it has potential applications in high-resolution metagenomic studies.",2014-02-12 +27193597,Mapping adipose and muscle tissue expression quantitative trait loci in African Americans to identify genes for type 2 diabetes and obesity.,"Relative to European Americans, type 2 diabetes (T2D) is more prevalent in African Americans (AAs). Genetic variation may modulate transcript abundance in insulin-responsive tissues and contribute to risk; yet, published studies identifying expression quantitative trait loci (eQTLs) in African ancestry populations are restricted to blood cells. This study aims to develop a map of genetically regulated transcripts expressed in tissues important for glucose homeostasis in AAs, critical for identifying the genetic etiology of T2D and related traits. Quantitative measures of adipose and muscle gene expression, and genotypic data were integrated in 260 non-diabetic AAs to identify expression regulatory variants. Their roles in genetic susceptibility to T2D, and related metabolic phenotypes, were evaluated by mining GWAS datasets. eQTL analysis identified 1971 and 2078 cis-eGenes in adipose and muscle, respectively. Cis-eQTLs for 885 transcripts including top cis-eGenes CHURC1, USMG5, and ERAP2 were identified in both tissues. 62.1 % of top cis-eSNPs were within ±50 kb of transcription start sites and cis-eGenes were enriched for mitochondrial transcripts. Mining GWAS databases revealed association of cis-eSNPs for more than 50 genes with T2D (e.g. PIK3C2A, RBMS1, UFSP1), gluco-metabolic phenotypes (e.g. INPP5E, SNX17, ERAP2, FN3KRP), and obesity (e.g. POMC, CPEB4). Integration of GWAS meta-analysis data from AA cohorts revealed the most significant association for cis-eSNPs of ATP5SL and MCCC1 genes, with T2D and BMI, respectively. This study developed the first comprehensive map of adipose and muscle tissue eQTLs in AAs (publically accessible at https://mdsetaa.phs.wakehealth.edu ) and identified genetically regulated transcripts for delineating genetic causes of T2D, and related metabolic phenotypes.",2016-05-19 +29775639,Surgical Management of Lower Urinary Tract Symptoms Attributed to Benign Prostatic Hyperplasia: AUA Guideline.,"PURPOSE:Male lower urinary tract symptoms (LUTS) secondary to benign prostatic hyperplasia (BPH) is common in men and can have negative effects on quality of life (QoL). It is the hope that this Guideline becomes a reference on the effective evidence-based surgical management of LUTS/BPH. MATERIALS AND METHODS:The evidence team searched Ovid MEDLINE, the Cochrane Library, and the Agency for Healthcare Research and Quality (AHRQ) database to identify studies indexed between January 2007 and September 2017. When sufficient evidence existed, the body of evidence was assigned a strength rating of A (high), B (moderate), or C (low) for support of Strong, Moderate, or Conditional Recommendations. In the absence of sufficient evidence, additional information is provided as Clinical Principles and Expert Opinions (table 1 in supplementary unabridged guideline, http://jurology.com/). RESULTS:This Guideline provides updated, evidence-based recommendations regarding management of LUTS/BPH utilizing surgery and minimally invasive surgical therapies; additional statements are made regarding diagnostic and pre-operative tests. Clinical statements are made in comparison to what is generally accepted as the gold standard (i.e. transurethral resection of the prostate [TURP]-monopolar and/or bipolar). This guideline is designed to be used in conjunction with the associated treatment algorithm. CONCLUSIONS:The prevalence and the severity of LUTS increases as men age and is an important diagnosis in the healthcare of patients and the welfare of society. This document will undergo additional literature reviews and updating as the knowledge regarding current treatments and future surgical options continues to expand.",2018-06-11 +30387741,A Note on GRegNetSim: A Tool for the Discrete Simulation and Analysis of Genetic Regulatory Networks. ,"Discrete simulations of genetic regulatory networks were used to study subsystems of yeast successfully. However, implementations of existing models underlying these simulations do not support a graphic interface, and require computations necessary to analyze their results to be done manually. Furthermore, differences between existing models suggest that an enriched model, encompassing both existing models, is needed. We developed a software tool, GRegNetSim, that allows the end-user to describe genetic regulatory networks graphically. The user can specify various transition functions at different nodes of the network, supporting, for example, threshold and gradient effects, and then apply the network to a variety of inputs. GRegNetSim displays the relationship between the inputs and the mode of behavior of the network in a graphic form that is easy to interpret. Furthermore, it can automatically extract statistical data necessary to analyze the simulations. The discrete simulations performed by GRegNetSim can be used to elucidate and predict the behavior, structure and properties of genetic regulatory networks in a unified manner. GRegNetSim is implemented as a Cytoscape App. Installation files, examples and source code, along with a detailed user guide, are freely available at https://sites.google.com/site/gregnetsim/.",2018-10-30 +28808438,ATSAS 2.8: a comprehensive data analysis suite for small-angle scattering from macromolecular solutions.,"ATSAS is a comprehensive software suite for the analysis of small-angle scattering data from dilute solutions of biological macromolecules or nanoparticles. It contains applications for primary data processing and assessment, ab initio bead modelling, and model validation, as well as methods for the analysis of flexibility and mixtures. In addition, approaches are supported that utilize information from X-ray crystallography, nuclear magnetic resonance spectroscopy or atomistic homology modelling to construct hybrid models based on the scattering data. This article summarizes the progress made during the 2.5-2.8 ATSAS release series and highlights the latest developments. These include AMBIMETER, an assessment of the reconstruction ambiguity of experimental data; DATCLASS, a multiclass shape classification based on experimental data; SASRES, for estimating the resolution of ab initio model reconstructions; CHROMIXS, a convenient interface to analyse in-line size exclusion chromatography data; SHANUM, to evaluate the useful angular range in measured data; SREFLEX, to refine available high-resolution models using normal mode analysis; SUPALM for a rapid superposition of low- and high-resolution models; and SASPy, the ATSAS plugin for interactive modelling in PyMOL. All these features and other improvements are included in the ATSAS release 2.8, freely available for academic users from https://www.embl-hamburg.de/biosaxs/software.html.",2017-06-26 +,AB118. Validation of next generation sequencing by Sanger sequencing,"

Background and objective

Development of the next generation sequencing (NGS) platform was driven by the completion of the Human Genome Project in 2003. With the availability of NGS, the time taken for sequencing of humongous genomic regions was greatly reduced and data generated per unit DNA was also significantly increased. Though the cost to use NGS in a clinically setting is far from ideal, economically speaking, there is a significant decrease in the average cost per sequenced base. To validate findings of NGS on mutation detected for FBN1, TGFBR2, RAF1, RTEL1, LMNA, MID2, KCNK9, DMD, SMARCA2 and IQSEC2 by using gold standard, Sanger Sequencing.

Methods

The coordinate of the mutation identified by NGS was used to retrieve the adjacent genomic sequence in UCSC Genome Browser (Available from URL: https://genome.ucsc.edu/). Targeted primers were designed with Primer 3 software (Available from URL: http://primer3.ut.ee/) based on the genomic sequence obtained from UCSC. The following step involves the optimization of a Polymerase Chain Reaction (PCR) with the designed primers to amplify the desired DNA template for the targeted region. Upon optimization, the template is purified and subjected to dye terminator sequencing to generate multiple DNA fragments of varying sizes. Lastly, the DNA fragments will be purified and analysed with an automated sequencer. The sequencer separates the DNA fragments based on their size by carrying out capillary electrophoresis.

Results

A total of 28 cases were validated with Sanger sequencing. Of them, 25 (89.3%) cases concur with the findings from NGS and 3 (10.7%) cases were false-positive calls.

Conclusions

NGS shows promise in the future molecular diagnostic regime, however, at the present moment, it needs to be done concurrently with Sanger sequencing for clinical applications.",2015-09-01 +27725737,circRNADb: A comprehensive database for human circular RNAs with protein-coding annotations.,"It has been known that circular RNAs are widely expressed in human tissues and cells, and play important regulatory roles in physiological or pathological processes. However, there is lack of comprehensively annotated human circular RNAs database. In this study we established a circRNA database, named as circRNADb, containing 32,914 human exonic circRNAs carefully selected from diversified sources. The detailed information of the circRNA, including genomic information, exon splicing, genome sequence, internal ribosome entry site (IRES), open reading frame (ORF) and references were provided in circRNADb. In addition, circRNAs were found to be able to encode proteins, which have not been reported in any species. 16328 circRNAs were annotated to have ORF longer than 100 amino acids, of which 7170 have IRES elements. 46 circRNAs from 37 genes were found to have their corresponding proteins expressed according mass spectrometry. The database provides the function of data search, browse, download, submit and feedback for the user to study particular circular RNA of interest and update the database continually. circRNADb will be built to be a biological information platform for circRNA molecules and related biological functions in the future. The database can be freely available through the web server at http://reprod.njmu.edu.cn/circrnadb.",2016-10-11 +30484023,Three-dimensional descriptors for aminergic GPCRs: dependence on docking conformation and crystal structure.,"Three-dimensional descriptors are often used to search for new biologically active compounds, in both ligand- and structure-based approaches, capturing the spatial orientation of molecules. They frequently constitute an input for machine learning-based predictions of compound activity or quantitative structure-activity relationship modeling; however, the distribution of their values and the accuracy of depicting compound orientations might have an impact on the power of the obtained predictive models. In this study, we analyzed the distribution of three-dimensional descriptors calculated for docking poses of active and inactive compounds for all aminergic G protein-coupled receptors with available crystal structures, focusing on the variation in conformations for different receptors and crystals. We demonstrated that the consistency in compound orientation in the binding site is rather not correlated with the affinity itself, but is more influenced by other factors, such as the number of rotatable bonds and crystal structure used for docking studies. The visualizations of the descriptors distributions were prepared and made available online at http://chem.gmum.net/vischem_stability , which enables the investigation of chemical structures referring to particular data points depicted in the figures. Moreover, the performed analysis can assist in choosing crystal structure for docking studies, helping in selection of conditions providing the best discrimination between active and inactive compounds in machine learning-based experiments.",2018-11-27 +29095980,pedigreejs: a web-based graphical pedigree editor.,"

Motivation

The collection, management and visualization of clinical pedigree (family history) data is a core activity in clinical genetics centres. However, clinical pedigree datasets can be difficult to manage, as they are time consuming to capture, and can be difficult to build, manipulate and visualize graphically. Several standalone graphical pedigree editors and drawing applications exist but there are no freely available lightweight graphical pedigree editors that can be easily configured and incorporated into web applications.

Results

We developed 'pedigreejs', an interactive graphical pedigree editor written in JavaScript, which uses standard pedigree nomenclature. Pedigreejs provides an easily configurable, extensible and lightweight pedigree editor. It makes use of an open-source Javascript library to define a hierarchical layout and to produce images in scalable vector graphics (SVG) format that can be viewed and edited in web browsers.

Availability and implementation

The software is freely available under GPL licence (https://ccge-boadicea.github.io/pedigreejs/).

Contact

tjc29@cam.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +27515742,EnhancerAtlas: a resource for enhancer annotation and analysis in 105 human cell/tissue types.,"

Motivation

Multiple high-throughput approaches have recently been developed and allowed the discovery of enhancers on a genome scale in a single experiment. However, the datasets generated from these approaches are not fully utilized by the research community due to technical challenges such as lack of consensus enhancer annotation and integrative analytic tools.

Results

We developed an interactive database, EnhancerAtlas, which contains an atlas of 2,534,123 enhancers for 105 cell/tissue types. A consensus enhancer annotation was obtained for each cell by summation of independent experimental datasets with the relative weights derived from a cross-validation approach. Moreover, EnhancerAtlas provides a set of useful analytic tools that allow users to query and compare enhancers in a particular genomic region or associated with a gene of interest, and assign enhancers and their target genes from a custom dataset.

Availability and implementation

The database with analytic tools is available at http://www.enhanceratlas.org/ CONTACT: jiang.qian@jhmi.edu or tank1@email.chop.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-10 +25502406,Genetic counselors and health literacy: the role of genetic counselors in developing a web-based resource about the Affordable Care Act.,"The Western States Genetic Services Collaborative (WSGSC) recognized the need for clear and understandable information about the Affordable Care Act (ACA) for families throughout the life course. The genetic counselors working in the WSGSC developed, tested, and implemented a web resource ( http://www.westernstatesgenetics.org/ACA_home.htm ) to help families navigate information about the ACA tailored to their life situation. The training and experience of genetic counselors provide the skills needed to translate complicated information, like that of the ACA, into formats that the general public can comprehend. The website went public in October 2013, and it has been positively received. The development of this website is a good case study in how genetic counseling skills can be applied to public health education and improving health literacy.",2014-12-16 +30611208,Development and validation of a risk score to predict mortality during TB treatment in patients with TB-diabetes comorbidity.,"

Background

Making an accurate prognosis for mortality during tuberculosis (TB) treatment in TB-diabetes (TB-DM) comorbid patients remains a challenge for health professionals, especially in low TB prevalent populations, due to the lack of a standardized prognostic model.

Methods

Using de-identified data from TB-DM patients from Texas, who received TB treatment had a treatment outcome of completed treatment or died before completion, reported to the National TB Surveillance System from January 2010-December 2016, we developed and internally validated a mortality scoring system, based on the regression coefficients.

Results

Of 1227 included TB-DM patients, 112 (9.1%) died during treatment. The score used nine characteristics routinely collected by most TB programs. Patients were divided into three groups based on their score: low-risk (< 12 points), medium-risk (12-21 points) and high-risk (≥22 points). The model had good performance (with an area under the receiver operating characteristic (ROC) curve of 0.83 in development and 0.82 in validation), and good calibration. A practical mobile calculator app was also created ( https://oaa.app.link/Isqia5rN6K ).

Conclusion

Using demographic and clinical characteristics which are available from most TB programs at the patient's initial visits, our simple scoring system had good performance and may be a practical clinical tool for TB health professionals in identifying TB-DM comorbid patients with a high mortality risk.",2019-01-05 +22039163,CELLPEDIA: a repository for human cell information for cell studies and differentiation analyses.,"CELLPEDIA is a repository database for current knowledge about human cells. It contains various types of information, such as cell morphologies, gene expression and literature references. The major role of CELLPEDIA is to provide a digital dictionary of human cells for the biomedical field, including support for the characterization of artificially generated cells in regenerative medicine. CELLPEDIA features (i) its own cell classification scheme, in which whole human cells are classified by their physical locations in addition to conventional taxonomy; and (ii) cell differentiation pathways compiled from biomedical textbooks and journal papers. Currently, human differentiated cells and stem cells are classified into 2260 and 66 cell taxonomy keys, respectively, from which 934 parent-child relationships reported in cell differentiation or transdifferentiation pathways are retrievable. As far as we know, this is the first attempt to develop a digital cell bank to function as a public resource for the accumulation of current knowledge about human cells. The CELLPEDIA homepage is freely accessible except for the data submission pages that require authentication (please send a password request to cell-info@cbrc.jp). Database URL: http://cellpedia.cbrc.jp/",2011-10-29 +29297414,An automatic approach for constructing a knowledge base of symptoms in Chinese.,"BACKGROUND:While a large number of well-known knowledge bases (KBs) in life science have been published as Linked Open Data, there are few KBs in Chinese. However, KBs in Chinese are necessary when we want to automatically process and analyze electronic medical records (EMRs) in Chinese. Of all, the symptom KB in Chinese is the most seriously in need, since symptoms are the starting point of clinical diagnosis. RESULTS:We publish a public KB of symptoms in Chinese, including symptoms, departments, diseases, medicines, and examinations as well as relations between symptoms and the above related entities. To the best of our knowledge, there is no such KB focusing on symptoms in Chinese, and the KB is an important supplement to existing medical resources. Our KB is constructed by fusing data automatically extracted from eight mainstream healthcare websites, three Chinese encyclopedia sites, and symptoms extracted from a larger number of EMRs as supplements. METHODS:Firstly, we design data schema manually by reference to the Unified Medical Language System (UMLS). Secondly, we extract entities from eight mainstream healthcare websites, which are fed as seeds to train a multi-class classifier and classify entities from encyclopedia sites and train a Conditional Random Field (CRF) model to extract symptoms from EMRs. Thirdly, we fuse data to solve the large-scale duplication between different data sources according to entity type alignment, entity mapping, and attribute mapping. Finally, we link our KB to UMLS to investigate similarities and differences between symptoms in Chinese and English. CONCLUSIONS:As a result, the KB has more than 26,000 distinct symptoms in Chinese including 3968 symptoms in traditional Chinese medicine and 1029 synonym pairs for symptoms. The KB also includes concepts such as diseases and medicines as well as relations between symptoms and the above related entities. We also link our KB to the Unified Medical Language System and analyze the differences between symptoms in the two KBs. We released the KB as Linked Open Data and a demo at https://datahub.io/dataset/symptoms-in-chinese .",2017-09-20 +,Identification and mapping of a novel Turnip mosaic virus resistance gene TuRBCS01 in Chinese cabbage (Brassica rapa L.),"We aimed to identify Turnip mosaic virus (TuMV) resistance genes in Chinese cabbage by analysing the TuMV resistance of 43 P₁(resistant), 88 P₂(susceptible), 26 F₁, 104 B₁(F₁ × P₁), 108 B₂(F₁ × P₂) and 509 F₂individuals. All parents and progeny populations were mechanically inoculated with TuMV‐C4. Both F₁and B₁populations showed TuMV resistance. Resistant: susceptible ratios in the B₂and F₂populations were 1 : 1 and 3 : 1, respectively. TuMV resistance in P₁was controlled by a dominant gene, TuRBCS01. Bulked segregation analysis was performed to identify simple sequence repeat or insertion or deletion markers linked to TuRBCS01. Data from 108 B₂individuals with resistant or susceptible phenotypes were analysed using mapmaker/exp 3.0. Polymorphic marker sequences were blast searched on http://brassicadb.org/brad/. TuRBCS01 was found to be linked to eight markers: SAAS_mDN192117a_159 (3.3 cM), SAAS_mDN192117b_196 (4.0 cM), SAAS_mDN192403_148 (13.0 cM), SAAS_mGT084561_233 (6.8 cM), BrID10723 (3.3 cM), mBr4041 (3.3 cM), SAAS_mBr4055_194 (2.6 cM) and mBr4068 (4.0 cM). Further, TuRBCS01 was mapped to a 1.98‐Mb region on chromosome A04 between markers BrID10723 and SAAS_mBr4055_194.",2015-04-01 +30198375,Implementation and Evaluation of a Physical Activity and Dietary Program in Federal Incarcerated Females.,"The purpose of this 3-month quasi-experimental pilot study was to examine the effect of a physical activity and dietary education program on body mass index (BMI) and resilience. Participants were given data-storing pedometers to record their physical activity, attended classes on healthy eating, and used portion control tools from http://ChooseMyPlate.gov . MyPlate usage and commissary purchases were collected weekly. BMI and resilience scores were measured at baseline, 6 weeks, and 12 weeks. Twenty-nine female prisoners completed the study. There was a statistically significant reduction in BMI after 12 weeks (χ2 = 7.56, p = .023) and resilience levels increased but did not reach statistical significance (χ2 = 1.66, p = .437). A physical activity and dietary intervention delivered by a correctional health nurse practitioner was an efficacious approach to reducing BMI and improving resilience among female prisoners.",2018-09-09 +27376128,BcCluster: A Bladder Cancer Database at the Molecular Level.,"

Background

Bladder Cancer (BC) has two clearly distinct phenotypes. Non-muscle invasive BC has good prognosis and is treated with tumor resection and intravesical therapy whereas muscle invasive BC has poor prognosis and requires usually systemic cisplatin based chemotherapy either prior to or after radical cystectomy. Neoadjuvant chemotherapy is not often used for patients undergoing cystectomy. High-throughput analytical omics techniques are now available that allow the identification of individual molecular signatures to characterize the invasive phenotype. However, a large amount of data produced by omics experiments is not easily accessible since it is often scattered over many publications or stored in supplementary files.

Objective

To develop a novel open-source database, BcCluster (http://www.bccluster.org/), dedicated to the comprehensive molecular characterization of muscle invasive bladder carcinoma.

Materials

A database was created containing all reported molecular features significant in invasive BC. The query interface was developed in Ruby programming language (version 1.9.3) using the web-framework Rails (version 4.1.5) (http://rubyonrails.org/).

Results

BcCluster contains the data from 112 published references, providing 1,559 statistically significant features relative to BC invasion. The database also holds 435 protein-protein interaction data and 92 molecular pathways significant in BC invasion. The database can be used to retrieve binding partners and pathways for any protein of interest. We illustrate this possibility using survivin, a known BC biomarker.

Conclusions

BcCluster is an online database for retrieving molecular signatures relative to BC invasion. This application offers a comprehensive view of BC invasiveness at the molecular level and allows formulation of research hypotheses relevant to this phenotype.",2016-01-07 +29776329,CoNVaQ: a web tool for copy number variation-based association studies.,"BACKGROUND:Copy number variations (CNVs) are large segments of the genome that are duplicated or deleted. Structural variations in the genome have been linked to many complex diseases. Similar to how genome-wide association studies (GWAS) have helped discover single-nucleotide polymorphisms linked to disease phenotypes, the extension of GWAS to CNVs has aided the discovery of structural variants associated with human traits and diseases. RESULTS:We present CoNVaQ, an easy-to-use web-based tool for CNV-based association studies. The web service allows users to upload two sets of CNV segments and search for genomic regions where the occurrence of CNVs is significantly associated with the phenotype. CoNVaQ provides two models: a simple statistical model using Fisher's exact test and a novel query-based model matching regions to user-defined queries. For each region, the method computes a global q-value statistic by repeated permutation of samples among the populations. We demonstrate our platform by using it to analyze a data set of HPV-positive and HPV-negative penile cancer patients. CONCLUSIONS:CoNVaQ provides a simple workflow for performing CNV-based association studies. It is made available as a web platform in order to provide a user-friendly workflow for biologists and clinicians to carry out CNV data analysis without installing any software. Through the web interface, users are also able to analyze their results to find overrepresented GO terms and pathways. In addition, our method is also available as a package for the R programming language. CoNVaQ is available at https://convaq.compbio.sdu.dk .",2018-05-18 +29883471,Pathway based therapeutic targets identification and development of an interactive database CampyNIBase of Campylobacter jejuni RM1221 through non-redundant protein dataset.,"The bacterial species Campylobacter jejuni RM1221 (CjR) is the primary cause of campylobacteriosis which poses a global threat for human health. Over the years the efficacy of antibiotic treatment is becoming more fruitless due to the development of multiple drug resistant strains. Therefore, identification of new drug targets is a valuable tool for the development of new treatments for affected patients and can be obtained by targeting essential protein(s) of CjR. We conducted this in silico study in order to identify therapeutic targets by subtractive CjR proteome analysis. The most important proteins of the CjR proteome, which includes chokepoint enzymes, plasmid, virulence and antibiotic resistant proteins were annotated and subjected to subtractive analyses to filter out the CjR essential proteins from duplicate or human homologous proteins. Through the subtractive and characterization analysis we have identified 38 eligible therapeutic targets including 1 potential vaccine target. Also, 12 potential targets were found in interactive network, 5 targets to be dealt with FDA approved drugs and one pathway as potential pathway based drug target. In addition, a comprehensive database 'CampyNIBase' has also been developed. Besides the results of this study, the database is enriched with other information such as 3D models of the identified targets, experimental structures and Expressed Sequence Tag (EST) sequences. This study, including the database might be exploited for future research and the identification of effective therapeutics against campylobacteriosis. URL: (http://nib.portal.gov.bd/site/page/4516e965-8935-4129-8c3f-df95e754c562#Banner).",2018-06-08 +29990050,Simultaneous Clustering and Feature Weighting Using Multiobjective Optimization for Identifying Functionally Similar miRNAs.,"MicroRNAs (miRNAs) are a type of RNAs, which are responsible for monitoring the gene expression values. Recent research asserts that miRNAs form some clustering on chromosomes. The miRNAs belonging to a particular cluster are highly similar in terms of their activity and they are termed as ""coregulated"" miRNAs. The current paper presents an approach that simultaneously performs two tasks: i) clustering of miRNAs into different categories based on some similarity measures ii) identification of proper weight values for different time points with respect to which expression values are available. In general, a large number of expression values are available for a given miRNA data set. All these values may not be suitable to be used equally to measure the similarity between two miRNAs. In the current study, the problem of proper selection of weight values for different time points and then determining the proper partitioning from the given miRNA data set utilizing the similarity computed using the new set of weight values is formulated as an optimization problem where several cluster validity indices are optimized as the goodness measures. To that end, a multiobjective differential evolution based optimization technique is utilized. The supremacy of the proposed technique is tested on three miRNA data sets in comparison to some recent approaches in terms of some popular performance measures like Silhouette index and DB-index. The observations are further supported by statistical and biological significance tests. Supplementary information is available at https://www.iitp.ac.in/~sriparna/journals.html.",2017-12-19 +30371672,Selecting Multiple Biomarker Subsets with Similarly Effective Binary Classification Performances. ,"Biomarker detection is one of the more important biomedical questions for high-throughput 'omics' researchers, and almost all existing biomarker detection algorithms generate one biomarker subset with the optimized performance measurement for a given dataset. However, a recent study demonstrated the existence of multiple biomarker subsets with similarly effective or even identical classification performances. This protocol presents a simple and straightforward methodology for detecting biomarker subsets with binary classification performances, better than a user-defined cutoff. The protocol consists of data preparation and loading, baseline information summarization, parameter tuning, biomarker screening, result visualization and interpretation, biomarker gene annotations, and result and visualization exportation at publication quality. The proposed biomarker screening strategy is intuitive and demonstrates a general rule for developing biomarker detection algorithms. A user-friendly graphical user interface (GUI) was developed using the programming language Python, allowing biomedical researchers to have direct access to their results. The source code and manual of kSolutionVis can be downloaded from http://www.healthinformaticslab.org/supp/resources.php.",2018-10-11 +28968643,CGmapTools improves the precision of heterozygous SNV calls and supports allele-specific methylation detection and visualization in bisulfite-sequencing data.,"

Motivation

DNA methylation is important for gene silencing and imprinting in both plants and animals. Recent advances in bisulfite sequencing allow detection of single nucleotide variations (SNVs) achieving high sensitivity, but accurately identifying heterozygous SNVs from partially C-to-T converted sequences remains challenging.

Results

We designed two methods, BayesWC and BinomWC, that substantially improved the precision of heterozygous SNV calls from ∼80% to 99% while retaining comparable recalls. With these SNV calls, we provided functions for allele-specific DNA methylation (ASM) analysis and visualizing the methylation status on reads. Applying ASM analysis to a previous dataset, we found that an average of 1.5% of investigated regions showed allelic methylation, which were significantly enriched in transposon elements and likely to be shared by the same cell-type. A dynamic fragment strategy was utilized for DMR analysis in low-coverage data and was able to find differentially methylated regions (DMRs) related to key genes involved in tumorigenesis using a public cancer dataset. Finally, we integrated 40 applications into the software package CGmapTools to analyze DNA methylomes. This package uses CGmap as the format interface, and designs binary formats to reduce the file size and support fast data retrieval, and can be applied for context-wise, gene-wise, bin-wise, region-wise and sample-wise analyses and visualizations.

Availability and implementation

The CGmapTools software is freely available at https://cgmaptools.github.io/.

Contact

guoweilong@cau.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-02-01 +30918073,"By Binding CD80 and CD86, the Vaccinia Virus M2 Protein Blocks Their Interactions with both CD28 and CTLA4 and Potentiates CD80 Binding to PD-L1. ","In this article we report that the M2 protein encoded by the vaccinia virus is secreted as a homo-oligomer by infected cells and binds two central costimulation molecules, CD80 (B7-1) and CD86 (B7-2). These interactions block the ligation of the two B7 proteins to both soluble CD28 and soluble cytotoxic T-lymphocyte associated protein 4 (CTLA4) but favor the binding of soluble PD-L1 to soluble CD80. M2L gene orthologues are found in several other poxviruses, and the B7-CD28/CTLA4 blocking activity has been identified for several culture supernatants of orthopoxvirus-infected cells and for a recombinant myxoma virus M2 protein homolog (i.e., Gp120-like protein, or Gp120LP). Overall, these data indicate that the M2 poxvirus family of proteins may be involved in immunosuppressive activities broader than the NF-κB inhibition already reported (R. Gedey, X. L. Jin, O. Hinthong, and J. L. Shisler, J Virol 80:8676-8685, 2006, https://doi.org/10.1128/JVI.00935-06). A Copenhagen vaccinia virus with a deletion of the nonessential M2L locus was generated and compared with its parental virus. This M2L-deleted vaccinia virus, unlike the parental virus, does not generate interference with the B7-CD28/CTLA4/PD-L1 interactions. Moreover, this deletion did not affect any key features of the virus (in vitro replication, oncolytic activities in vitro and in vivo, and intratumoral expression of a transgene in an immunocompetent murine model). Altogether, these first results suggest that the M2 protein has the potential to be used as a new immunosuppressive biotherapeutic and that the M2L-deleted vaccinia virus represents an attractive new oncolytic platform with an improved immunological profile.IMPORTANCE The vaccinia virus harbors in its genome several genes dedicated to the inhibition of the host immune response. Among them, M2L was reported to inhibit the intracellular NF-κB pathway. We report here several new putative immunosuppressive activities of M2 protein. M2 protein is secreted and binds cornerstone costimulatory molecules (CD80/CD86). M2 binding to CD80/CD86 blocks their interaction with soluble CD28/CTLA4 but also favors the soluble PD-L1-CD80 association. These findings open the way for new investigations deciphering the immune system effects of soluble M2 protein. Moreover, a vaccinia virus with a deletion of its M2L has been generated and characterized as a new oncolytic platform. The replication and oncolytic activities of the M2L-deleted vaccinia virus are indistinguishable from those of the parental virus. More investigations are needed to characterize in detail the immune response triggered against both the tumor and the virus by this M2-defective vaccinia virus.",2019-05-15 +30159953,Establishing intra- and inter-vendor reproducibility of T1 relaxation time measurements with 3T MRI.,"

Purpose

Parametric imaging methods (e.g., T1 relaxation time mapping) have been shown to be more reproducible across time and vendors than weighted (e.g., T1 -weighted) images. The purpose of this work was to more extensively evaluate the validity of this assertion.

Methods

Seven volunteers underwent twice-repeated acquisitions of variable flip-angle T1 mapping, including B1 + calibration, on a 3T Philips Achieva and 3T Siemens Trio scanner. Intra-scanner and inter-vendor T1 variability were calculated. To determine T1 reproducibility levels in longitudinal settings, or after changing hardware or software, four additional data sets were acquired from two of the participants; one participant was scanned on a different 3T Siemens Trio scanner and another on the same 3T Philips Achieva scanner but after a software upgrade.

Results

Intra-scanner variability of voxel-wise T1 values was consistent between the two vendors, averaging 0.7/0.7/1.3/1.4% in white matter/cortical gray matter/subcortical gray matter/cerebellum, respectively. We observed, however, a systematic bias between the two vendors of https://doi.org/10.0/7.8/8.6/10.0%, respectively. The T1 bias across two scanners of the same model was greater than intra-scanner variability, although still only at 1.4/1.0/1.9/2.3%, respectively. A greater bias was identified for data sets acquired before/after software upgrade in white matter/cortical gray matter (3.6/2.7%) whereas variability in subcortical gray matter/cerebellum was comparable (1.7/1.9%).

Conclusion

We established intra- and inter-vendor reproducibility levels for a widely used T1 mapping protocol. We anticipate that these results will guide the design of multi-center studies, particularly those encompassing multiple vendors. Furthermore, this baseline level of reproducibility should be established or surpassed during the piloting phase of such studies.",2018-08-29 +29602835,Assessing the health effects associated with occupational radiation exposure in Korean radiation workers: protocol for a prospective cohort study.,"INTRODUCTION:The cancer risk of radiation exposure in the moderate-to-high dose range has been well established. However, the risk remains unclear at low-dose ranges with protracted low-dose rate exposure, which is typical of occupational exposure. Several epidemiological studies of Korean radiation workers have been conducted, but the data were analysed retrospectively in most cases. Moreover, groups with relatively high exposure, such as industrial radiographers, have been neglected. Therefore, we have launched a prospective cohort study of all Korean radiation workers to assess the health effects associated with occupational radiation exposure. METHODS AND ANALYSIS:Approximately 42 000 Korean radiation workers registered with the Nuclear Safety and Security Commission from 2016 to 2017 are the initial target population of this study. Cohort participants are to be enrolled through a nationwide self-administered questionnaire survey between 24 May 2016 and 30 June 2017. As of 31 March 2017, 22 982 workers are enrolled in the study corresponding to a response rate of 75%. This enrolment will be continued at 5-year intervals to update information on existing study participants and recruit newly hired workers. Survey data will be linked with the national dose registry, the national cancer registry, the national vital statistics registry and national health insurance data via personal identification numbers. Age-specific and sex-specific standardised incidence and mortality ratios will be calculated for overall comparisons of cancer risk. For dose-response assessment, excess relative risk (per Gy) and excess absolute risk (per Gy) will be estimated with adjustments for birth year and potential confounders, such as lifestyle factors and socioeconomic status. ETHICS AND DISSEMINATION:This study has received ethical approval from the institutional review board of the Korea Institute of Radiological and Medical Sciences (IRB No. K-1603-002-034). All participants provided written informed consent prior to enrolment. The findings of the study will be disseminated through scientific peer-reviewed journals and be provided to the public, including radiation workers, via the study website (http://www.rhs.kr/) and onsite radiation safety education.",2018-03-30 +27131383,LassoProt: server to analyze biopolymers with lassos.,"The LassoProt server, http://lassoprot.cent.uw.edu.pl/, enables analysis of biopolymers with entangled configurations called lassos. The server offers various ways of visualizing lasso configurations, as well as their time trajectories, with all the results and plots downloadable. Broad spectrum of applications makes LassoProt a useful tool for biologists, biophysicists, chemists, polymer physicists and mathematicians. The server and our methods have been validated on the whole PDB, and the results constitute the database of proteins with complex lassos, supported with basic biological data. This database can serve as a source of information about protein geometry and entanglement-function correlations, as a reference set in protein modeling, and for many other purposes.",2016-04-29 +22108457,Dynamics of the G protein-coupled vasopressin V2 receptor signaling network revealed by quantitative phosphoproteomics.,"G protein-coupled receptors (GPCRs) regulate diverse physiological processes, and many human diseases are due to defects in GPCR signaling. To identify the dynamic response of a signaling network downstream from a prototypical G(s)-coupled GPCR, the vasopressin V2 receptor, we have carried out multireplicate, quantitative phosphoproteomics with iTRAQ labeling at four time points following vasopressin exposure at a physiological concentration in cells isolated from rat kidney. A total of 12,167 phosphopeptides were identified from 2,783 proteins, with 273 changing significantly in abundance with vasopressin. Two-dimensional clustering of phosphopeptide time courses and Gene Ontology terms revealed that ligand binding to the V2 receptor affects more than simply the canonical cyclic adenosine monophosphate-protein kinase A and arrestin pathways under physiological conditions. The regulated proteins included key components of actin cytoskeleton remodeling, cell-cell adhesion, mitogen-activated protein kinase signaling, Wnt/β-catenin signaling, and apoptosis pathways. These data suggest that vasopressin can regulate an array of cellular functions well beyond its classical role in regulating water and solute transport. These results greatly expand the current view of GPCR signaling in a physiological context and shed new light on potential roles for this signaling network in disorders such as polycystic kidney disease. Finally, we provide an online resource of physiologically regulated phosphorylation sites with dynamic quantitative data (http://helixweb.nih.gov/ESBL/Database/TiPD/index.html).",2011-11-21 +29873307,Corrigendum: Potential role of the glycolytic oscillator in acute hypoxia in tumors (2015 Phys. Med. Biol. 60 9215). ,"At the time of publication, our group had performed short tandem repeat (STR) testing on the SCC22B cell line and believed that had been correctly identified. As part of a recent comprehensive process to confirm the identity of cell lines in use in our lab, we repeated STR testing on all cell lines. These results were compared to the ExPASy Cellosaurus database (http://web.expasy.org/cellosaurus/). One cell line used in this manuscript was a near perfect match for T24 (CVCL_0554), a bladder carcinoma cell line commonly found as a cellular contaminant. Although we are unable to test the exact cells used in this manuscript, we believe that the cells labeled as SCC22B are most likely to actually be T24. The authors believe that neither the results nor the conclusions have been significantly changed on the basis of the specific cell line utilized.",2018-06-06 +27773681,Topological language for RNA.,"In this paper we introduce a novel, context-free grammar, RNAFeatures*, capable of generating any RNA structure including pseudoknot structures (pk-structure). We represent pk-structures as orientable fatgraphs, which naturally leads to a filtration by their topological genus. Within this framework, RNA secondary structures correspond to pk-structures of genus zero. RNAFeatures* acts on formal, arc-labeled RNA secondary structures, called λ-structures. λ-structures correspond one-to-one to pk-structures together with some additional information. This information consists of the specific rearrangement of the backbone, by which a pk-structure can be made cross-free. RNAFeatures* is an extension of the grammar for secondary structures and employs an enhancement by labelings of the symbols as well as the production rules. We discuss how to use RNAFeatures* to obtain a stochastic context-free grammar for pk-structures, using data of RNA sequences and structures. The induced grammar facilitates fast Boltzmann sampling and statistical analysis. As a first application, we present an O(nlog (n)) runtime algorithm which samples pk-structures based on ninety tRNA sequences and structures from the Nucleic Acid Database (NDB).

Availability

the source code for simulation results is available at http://staff.vbi.vt.edu/fenixh/TPstructure.zip. The code is written in C and compiled by Xcode.",2016-10-20 +27245161,A rule-based model of insulin signalling pathway.,"

Background

The insulin signalling pathway (ISP) is an important biochemical pathway, which regulates some fundamental biological functions such as glucose and lipid metabolism, protein synthesis, cell proliferation, cell differentiation and apoptosis. In the last years, different mathematical models based on ordinary differential equations have been proposed in the literature to describe specific features of the ISP, thus providing a description of the behaviour of the system and its emerging properties. However, protein-protein interactions potentially generate a multiplicity of distinct chemical species, an issue referred to as ""combinatorial complexity"", which results in defining a high number of state variables equal to the number of possible protein modifications. This often leads to complex, error prone and difficult to handle model definitions.

Results

In this work, we present a comprehensive model of the ISP, which integrates three models previously available in the literature by using the rule-based modelling (RBM) approach. RBM allows for a simple description of a number of signalling pathway characteristics, such as the phosphorylation of signalling proteins at multiple sites with different effects, the simultaneous interaction of many molecules of the signalling pathways with several binding partners, and the information about subcellular localization where reactions take place. Thanks to its modularity, it also allows an easy integration of different pathways. After RBM specification, we simulated the dynamic behaviour of the ISP model and validated it using experimental data. We the examined the predicted profiles of all the active species and clustered them in four clusters according to their dynamic behaviour. Finally, we used parametric sensitivity analysis to show the role of negative feedback loops in controlling the robustness of the system.

Conclusions

The presented ISP model is a powerful tool for data simulation and can be used in combination with experimental approaches to guide the experimental design. The model is available at http://sysbiobig.dei.unipd.it/ was submitted to Biomodels Database ( https://www.ebi.ac.uk/biomodels-main/ # MODEL 1604100005).",2016-06-01 +30225284,Fusarium graminearum1H NMR metabolomics.,"Raw 1H NMR spectra of Fusarium graminearum hyphae can be found at the website of the pesticide metabolomics group (PMG) of the Agricultural University of Athens at the address: http://www.aua.gr/pesticide-metabolomicsgroup/Resources/Fusarium_graminearum_NMR_spectra.html, accession number PMG-01-17. The data set support the research article ""Implication of Fusarium graminearum Primary Metabolism in its Resistance to Benzimidazole Fungicides as revealed by 1H NMR Metabolomics"" [1].",2018-05-01 +26511329,Novel gene sets improve set-level classification of prokaryotic gene expression data.,"

Background

Set-level classification of gene expression data has received significant attention recently. In this setting, high-dimensional vectors of features corresponding to genes are converted into lower-dimensional vectors of features corresponding to biologically interpretable gene sets. The dimensionality reduction brings the promise of a decreased risk of overfitting, potentially resulting in improved accuracy of the learned classifiers. However, recent empirical research has not confirmed this expectation. Here we hypothesize that the reported unfavorable classification results in the set-level framework were due to the adoption of unsuitable gene sets defined typically on the basis of the Gene ontology and the KEGG database of metabolic networks. We explore an alternative approach to defining gene sets, based on regulatory interactions, which we expect to collect genes with more correlated expression. We hypothesize that such more correlated gene sets will enable to learn more accurate classifiers.

Methods

We define two families of gene sets using information on regulatory interactions, and evaluate them on phenotype-classification tasks using public prokaryotic gene expression data sets. From each of the two gene-set families, we first select the best-performing subtype. The two selected subtypes are then evaluated on independent (testing) data sets against state-of-the-art gene sets and against the conventional gene-level approach.

Results

The novel gene sets are indeed more correlated than the conventional ones, and lead to significantly more accurate classifiers. The novel gene sets are indeed more correlated than the conventional ones, and lead to significantly more accurate classifiers.

Conclusion

Novel gene sets defined on the basis of regulatory interactions improve set-level classification of gene expression data. The experimental scripts and other material needed to reproduce the experiments are available at http://ida.felk.cvut.cz/novelgenesets.tar.gz.",2015-10-28 +26743511,Homology-driven assembly of NOn-redundant protEin sequence sets (NOmESS) for mass spectrometry.,"

Unlabelled

To enable mass spectrometry (MS)-based proteomic studies with poorly characterized organisms, we developed a computational workflow for the homology-driven assembly of a non-redundant reference sequence dataset. In the automated pipeline, translated DNA sequences (e.g. ESTs, RNA deep-sequencing data) are aligned to those of a closely related and fully sequenced organism. Representative sequences are derived from each cluster and joined, resulting in a non-redundant reference set representing the maximal available amino acid sequence information for each protein. We here applied NOmESS to assemble a reference database for the widely used model organism Xenopus laevis and demonstrate its use in proteomic applications.

Availability and implementation

NOmESS is written in C#. The source code as well as the executables can be downloaded from http://www.biochem.mpg.de/cox Execution of NOmESS requires BLASTp and cd-hit in addition.

Contact

cox@biochem.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-06 +30169669,Antenatal Vitamin D Status Is Not Associated with Standard Neurodevelopmental Assessments at Age 5 Years in a Well-Characterized Prospective Maternal-Infant Cohort.,"

Background

Although animal studies show evidence for a role of vitamin D during brain development, data from human studies show conflicting signals.

Objective

We aimed to explore associations between maternal and neonatal vitamin D status with childhood neurodevelopmental outcomes.

Methods

Comprehensive clinical, demographic, and lifestyle data were collected prospectively in 734 maternal-infant dyads from the Cork BASELINE Birth Cohort Study. Serum 25-hydroxyvitamin D [25(OH)D] concentrations were quantified at 15 weeks of gestation and in umbilical cord sera at birth via a CDC-accredited liquid chromatography-tandem mass spectrometry method. Children were assessed at age 5 y through the use of the Kaufman Brief Intelligence Test (2nd Edition, KBIT-2) and the Child Behaviour Checklist (CBCL). Linear regression was used to explore associations between 25(OH)D and neurodevelopmental outcomes.

Results

25(OH)D concentrations were <30 nmol/L in 15% of maternal and 45% of umbilical cord sera and <50 nmol/L in 42% of mothers and 80% of cords. At age 5 y, the mean ± SD KBIT-2 intelligence quotient (IQ) composite score was 104.6 ± 8.6; scores were 107.2 ± 10.0 in verbal and 99.8 ± 8.8 in nonverbal tasks. Developmental delay (scores <85) was seen in <3% of children across all domains. The mean ± SD CBCL total problem score was 21.3 ± 17.5; scores in the abnormal/clinical range for internal, external, and total problem scales were present in 12%, 4%, and 6% of participants, respectively. KBIT-2 and CBCL subscale scores at 5 y were not different between children exposed to low antenatal vitamin D status, either at 30 or 50 nmol/L 25(OH)D thresholds. Neither maternal nor cord 25(OH)D (per 10 nmol/L) were associated with KBIT-2 IQ composite scores [adjusted β (95% CI): maternal -0.01 (-0.03, 0.02); cord 0.01 (-0.03, 0.04] or CBCL total problem scores [maternal 0.01 (-0.04, 0.05); cord 0.01 (-0.07, 0.09)].

Conclusion

In this well-characterized prospective maternal-infant cohort, we found no evidence that antenatal 25(OH)D concentrations are associated with neurodevelopmental outcomes at 5 y. The BASELINE Study was registered at www.clinicaltrials.gov as NCT01498965; the SCOPE Study was registered at http://www.anzctr.org.au as ACTRN12607000551493.",2018-10-01 +30411495,Development and validation of an optimized prediction of mortality for candidates awaiting liver transplantation.,"Since 2002, the Model for End-Stage Liver Disease (MELD) has been used to rank liver transplant candidates. However, despite numerous revisions, MELD allocation still does not allow for equitable access to all waitlisted candidates. An optimized prediction of mortality (OPOM) was developed (http://www.opom.online) utilizing machine-learning optimal classification tree models trained to predict a candidate's 3-month waitlist mortality or removal utilizing the Standard Transplant Analysis and Research (STAR) dataset. The Liver Simulated Allocation Model (LSAM) was then used to compare OPOM to MELD-based allocation. Out-of-sample area under the curve (AUC) was also calculated for candidate groups of increasing disease severity. OPOM allocation, when compared to MELD, reduced mortality on average by 417.96 (406.8-428.4) deaths every year in LSAM analysis. Improved survival was noted across all candidate demographics, diagnoses, and geographic regions. OPOM delivered a substantially higher AUC across all disease severity groups. OPOM more accurately and objectively prioritizes candidates for liver transplantation based on disease severity, allowing for more equitable allocation of livers with a resultant significant number of additional lives saved every year. These data demonstrate the potential of machine learning technology to help guide clinical practice, and potentially guide national policy.",2018-12-06 +21089065,Recommendations for genetic variation data capture in developing countries to ensure a comprehensive worldwide data collection.,"Developing countries have significantly contributed to the elucidation of the genetic basis of both common and rare disorders, providing an invaluable resource of cases due to large family sizes, consanguinity, and potential founder effects. Moreover, the recognized depth of genomic variation in indigenous African populations, reflecting the ancient origins of humanity on the African continent, and the effect of selection pressures on the genome, will be valuable in understanding the range of both pathological and nonpathological variations. The involvement of these populations in accurately documenting the extant genetic heterogeneity is more than essential. Developing nations are regarded as key contributors to the Human Variome Project (HVP; http://www.humanvariomeproject.org), a major effort to systematically collect mutations that contribute to or cause human disease and create a cyber infrastructure to tie databases together. However, biomedical research has not been the primary focus in these countries even though such activities are likely to produce economic and health benefits for all. Here, we propose several recommendations and guidelines to facilitate participation of developing countries in genetic variation data documentation, ensuring an accurate and comprehensive worldwide data collection. We also summarize a few well-coordinated genetic data collection initiatives that would serve as paradigms for similar projects.",2011-01-01 +30542989,Cancer Gene Discovery by Network Analysis of Somatic Mutations Using the MUFFINN Server.,"Identifying genes that are capable of inducing tumorigenesis has been a major challenge in cancer research. In many cases, such genes frequently show somatic mutations in tumor samples; thus various computational methods for predicting cancer genes have been developed based on ""significantly mutated genes."" However, this approach is intrinsically limited by the fact that there are many cancer genes infrequently mutated in cancer genomes. Therefore, we recently developed MUFFINN (Mutations For Functional Impact on Network Neighbors), a method for cancer gene prediction based not only on mutation occurrences in each gene but also those of neighbors in functional networks. This enables the identification of cancer genes with infrequent mutation occurrence. We demonstrated that MUFFINN could retrieve known cancer genes more efficiently than gene-based methods and predicted cancer genes with low mutation occurrences in tumor samples. Users can freely access a web server ( http://www.inetbio.org/muffinn ) and run predictions with either public or private data of cancer somatic mutations. For given information of mutation occurrence profiles, the MUFFINN server returns lists of candidate cancer genes by four distinct predictions with different combinations between gene networks and scoring algorithms. Stand-alone software is also available, which allows MUFFINN to be run on local machines with a custom gene network. Here, we present an overall guideline for using the MUFFINN web server and stand-alone software for the discovery of novel cancer genes.",2019-01-01 +27164621,Prioritizing Chemicals for Risk Assessment Using Chemoinformatics: Examples from the IARC Monographs on Pesticides.,"

Background

Identifying cancer hazards is the first step towards cancer prevention. The International Agency for Research on Cancer (IARC) Monographs Programme, which has evaluated nearly 1,000 agents for their carcinogenic potential since 1971, typically selects agents for hazard identification on the basis of public nominations, expert advice, published data on carcinogenicity, and public health importance.

Objectives

Here, we present a novel and complementary strategy for identifying agents for hazard evaluation using chemoinformatics, database integration, and automated text mining.

Discussion

To inform selection among a broad range of pesticides nominated for evaluation, we identified and screened nearly 6,000 relevant chemical structures, after which we systematically compiled information on 980 pesticides, creating network maps that allowed cluster visualization by chemical similarity, pesticide class, and publicly available information concerning cancer epidemiology, cancer bioassays, and carcinogenic mechanisms. For the IARC Monograph meetings that took place in March and June 2015, this approach supported high-priority evaluation of glyphosate, malathion, parathion, tetrachlorvinphos, diazinon, p,p'-dichlorodiphenyltrichloroethane (DDT), lindane, and 2,4-dichlorophenoxyacetic acid (2,4-D).

Conclusions

This systematic approach, accounting for chemical similarity and overlaying multiple data sources, can be used by risk assessors as well as by researchers to systematize, inform, and increase efficiency in selecting and prioritizing agents for hazard identification, risk assessment, regulation, or further investigation. This approach could be extended to an array of outcomes and agents, including occupational carcinogens, drugs, and foods. Citation: Guha N, Guyton KZ, Loomis D, Barupal DK. 2016. Prioritizing chemicals for risk assessment using chemoinformatics: examples from the IARC Monographs on Pesticides. Environ Health Perspect 124:1823-1829; http://dx.doi.org/10.1289/EHP186.",2016-05-10 +30853262,Effects of macronutrient manipulation on postprandial metabolic responses in overweight males with high fasting lipids during simulated shift work: A randomized crossover trial.,"

Background & aims

Meals consumed out of synchronisation with normal circadian rhythms are associated with metabolic dysregulation. Changes in macronutrient composition of meals can improve metabolic responses during the day. Therefore, we aimed to investigate whether macronutrient manipulation of meals alters postprandial glucose and lipid responses and the expression of circadian genes during the night.

Methods

In a randomised crossover trial, 16 overweight males with high fasting lipids were fed isocaloric meals (2.7 MJ) at 0000 h. The meals differed primarily in total fat and total sugars content (control (8% total sugar, 5% saturated fat) vs test (16% total sugar, 26% saturated fat)). Postprandial blood samples were collected for glucose, insulin (3 h) and triglycerides (6 h) and analysed as incremental area under the curve (iAUC). RNA was extracted at 0 h, 2 h and 4 h and changes in expressions of the circadian genes clock and Per 1-3 analysed.

Results

Postprandial glucose (p = 0.04) and insulin iAUC (p = 0.02) were significantly higher after consumption of the test meal compared to the control meal. Postprandial triglyceride iAUC was not statistically different between the two meal types (p = 0.72). No change in circadian gene expression was observed after the two meals.

Conclusions

Our results showed that macronutrient composition affects postprandial metabolic response at night. It emphasizes the need to consider the role and effects of night time eating, when developing metabolic disease prevention strategies for shift workers.

Study id number

ACTRN12618001115224. WEBSITE OF TRIAL REGISTRY: http://www.anzctr.org.au/. Retrospectively registered after data collection.",2019-02-15 +30663772,The phenology of migration in an unpredictable world.,"In Focus: Freshwater, C., Trudel, M., Beacham, T. D., Gauthier, S., Johnson, S. C., Neville, C. & Juanes, F. (2016) Individual variation, population-specific migration behaviours and stochastic processes shape marine migration phenologies. Journal of Animal Ecology, 88, 67-78. https://doi.org/10.1111/1365-2656.12852 Pacific salmon undertake arduous and risky migrations from their freshwater nursery grounds to the coastal ocean, northwards to their feeding grounds, and then back to their freshwater natal habitats to spawn. Understanding the phenology of such migrations has largely been viewed through the lens of microevolution producing optimal strategies that reflect local selection pressures; less emphasis has been placed on quantifying how variation in migration patterns can spread the risks associated with life in variable and unpredictable ecosystems. In this issue, Freshwater et al. use the information contained in ear stones (otoliths) and DNA of migrating juvenile sockeye salmon from the Fraser River of western Canada to quantify variation in the timing of their marine migrations. Not only were there population-specific differences in migration phenology of fish from the same river, but there was substantial variation among individuals from specific populations. These patterns also varied from year to year. Data like these emphasize the risks involved in such migrations and suggest that variation in key migration traits are maintained because of the inherent unpredictability of ecosystems. Management and conservation efforts would be well-served to consider actions that maintain such ecological variation to facilitate meta-population persistence in a rapidly changing world.",2019-01-01 +30217929,"Whole-Genome Sequencing Reveals Elevated Tumor Mutational Burden and Initiating Driver Mutations in African Men with Treatment-Naïve, High-Risk Prostate Cancer.",": African-American men are more likely than any other racial group to die from prostate cancer. The contribution of acquired genomic variation to this racial disparity is largely unknown, as genomic from Africa is lacking. Here, we performed the first tumor-normal paired deep whole-genome sequencing for Africa. A direct study-matched comparison between African- and European-derived, treatment-naïve, high-risk prostate tumors for 15 cases allowed for further comparative analyses of existing data. Excluding a single hypermutated tumor with 55 mutations per megabase, we observed a 1.8-fold increase in small somatic variants in African- versus European-derived tumors (P = 1.02e-04), rising to 4-fold when compared with published tumor-matched data. Furthermore, we observed an increase in oncogenic driver mutations in African tumors (P = 2.92e-03); roughly 30% of impacted genes were novel to prostate cancer, and 79% of recurrent driver mutations appeared early in tumorigenesis. Although complex genomic rearrangements were less frequent in African tumors, we describe a uniquely hyperduplicated tumor affecting 149 transposable elements. Comparable with African Americans, ERG fusions and PIK3CA mutations were absent and PTEN loss less frequent. CCND1 and MYC were frequently gained, with somatic copy-number changes more likely to occur late in tumorigenesis. In addition to traditional prostate cancer gene pathways, genes regulating calcium ion-ATPase signal transduction were disrupted in African tumors. Although preliminary, our results suggest that further validation and investigation into the potential implications for elevated tumor mutational burden and tumor-initiating mutations in clinically unfavorable prostate cancer can improve patient outcomes in Africa. SIGNIFICANCE: The first whole-genome sequencing study for high-risk prostate cancer in African men allows a simultaneous comparison of ethnic differences relative to European populations and of the influences of the environment relative to African-American men. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/78/24/6736/F1.large.jpg.See related commentary by Huang, p. 6726.",2018-09-14 +29036526,DynaPho: a web platform for inferring the dynamics of time-series phosphoproteomics.,"Summary:Large-scale phosphoproteomics studies have improved our understanding of dynamic cellular signaling, but the downstream analysis of phosphoproteomics data is still a bottleneck. We develop DynaPho, a useful web-based tool providing comprehensive and in-depth analyses of time-course phosphoproteomics data, making analysis intuitive and accessible to non-bioinformatics experts. The tool currently implements five analytic modules, which reveal the transition of biological pathways, kinase activity, dynamics of interaction networks and the predicted kinase-substrate associations. These features can assist users in translating their larger-scale time-course phosphoproteomics data into valuable biological discoveries. Availability and implementation:DynaPho is freely available at http://dynapho.jhlab.tw/ . Contact:hsuancheng@ym.edu.tw or yukijuan@ntu.edu.tw . Supplementary information:Supplementary data are available at Bioinformatics online.",2017-07-07 +26818456,UbiSite: incorporating two-layered machine learning method with substrate motifs to predict ubiquitin-conjugation site on lysines.,"

Background

The conjugation of ubiquitin to a substrate protein (protein ubiquitylation), which involves a sequential process--E1 activation, E2 conjugation and E3 ligation, is crucial to the regulation of protein function and activity in eukaryotes. This ubiquitin-conjugation process typically binds the last amino acid of ubiquitin (glycine 76) to a lysine residue of a target protein. The high-throughput of mass spectrometry-based proteomics has stimulated a large-scale identification of ubiquitin-conjugated peptides. Hence, a new web resource, UbiSite, was developed to identify ubiquitin-conjugation site on lysines based on large-scale proteome dataset.

Results

Given a total of 37,647 ubiquitin-conjugated proteins, including 128,026 ubiquitylated peptides, obtained from various resources, this study carries out a large-scale investigation on ubiquitin-conjugation sites based on sequenced and structural characteristics. A TwoSampleLogo reveals that a significant depletion of histidine (H), arginine (R) and cysteine (C) residues around ubiquitylation sites may impact the conjugation of ubiquitins in closed three-dimensional environments. Based on the large-scale ubiquitylation dataset, a motif discovery tool, MDDLogo, has been adopted to characterize the potential substrate motifs for ubiquitin conjugation. Not only are single features such as amino acid composition (AAC), positional weighted matrix (PWM), position-specific scoring matrix (PSSM) and solvent-accessible surface area (SASA) considered, but also the effectiveness of incorporating MDDLogo-identified substrate motifs into a two-layered prediction model is taken into account. Evaluation by five-fold cross-validation showed that PSSM is the best feature in discriminating between ubiquitylation and non-ubiquitylation sites, based on support vector machine (SVM). Additionally, the two-layered SVM model integrating MDDLogo-identified substrate motifs could obtain a promising accuracy and the Matthews Correlation Coefficient (MCC) at 81.06% and 0.586, respectively. Furthermore, the independent testing showed that the two-layered SVM model could outperform other prediction tools, reaching at 85.10% sensitivity, 69.69% specificity, 73.69% accuracy and the 0.483 of MCC value.

Conclusion

The independent testing result indicated the effectiveness of incorporating MDDLogo-identified motifs into the prediction of ubiquitylation sites. In order to provide meaningful assistance to researchers interested in large-scale ubiquitinome data, the two-layered SVM model has been implemented onto a web-based system (UbiSite), which is freely available at http://csb.cse.yzu.edu.tw/UbiSite/ . Two cases given in the UbiSite provide a demonstration of effective identification of ubiquitylation sites with reference to substrate motifs.",2016-01-11 +22545773,MASiVEdb: the Sirevirus Plant Retrotransposon Database.,"

Background

Sireviruses are an ancient genus of the Copia superfamily of LTR retrotransposons, and the only one that has exclusively proliferated within plant genomes. Based on experimental data and phylogenetic analyses, Sireviruses have successfully infiltrated many branches of the plant kingdom, extensively colonizing the genomes of grass species. Notably, it was recently shown that they have been a major force in the make-up and evolution of the maize genome, where they currently occupy ~21% of the nuclear content and ~90% of the Copia population. It is highly likely, therefore, that their life dynamics have been fundamental in the genome composition and organization of a plethora of plant hosts. To assist studies into their impact on plant genome evolution and also facilitate accurate identification and annotation of transposable elements in sequencing projects, we developed MASiVEdb (Mapping and Analysis of SireVirus Elements Database), a collective and systematic resource of Sireviruses in plants.

Description

Taking advantage of the increasing availability of plant genomic sequences, and using an updated version of MASiVE, an algorithm specifically designed to identify Sireviruses based on their highly conserved genome structure, we populated MASiVEdb (http://bat.infspire.org/databases/masivedb/) with data on 16,243 intact Sireviruses (total length >158Mb) discovered in 11 fully-sequenced plant genomes. MASiVEdb is unlike any other transposable element database, providing a multitude of highly curated and detailed information on a specific genus across its hosts, such as complete set of coordinates, insertion age, and an analytical breakdown of the structure and gene complement of each element. All data are readily available through basic and advanced query interfaces, batch retrieval, and downloadable files. A purpose-built system is also offered for detecting and visualizing similarity between user sequences and Sireviruses, as well as for coding domain discovery and phylogenetic analysis.

Conclusion

MASiVEdb is currently the most comprehensive directory of Sireviruses, and as such complements other efforts in cataloguing plant transposable elements and elucidating their role in host genome evolution. Such insights will gradually deepen, as we plan to further improve MASiVEdb by phylogenetically mapping Sireviruses into families, by including data on fragments and solo LTRs, and by incorporating elements from newly-released genomes.",2012-04-30 +31875784,"Molecular Dynamics Mechanisms of the Inhibitory Effects of Abemaciclib, Hymenialdisine, and Indirubin on CDK-6.","BACKGROUND:Cyclin-Dependent Kinases-6 (CDK-6) is a serine/threonine protein kinase with regular activity in the cell cycle. Some inhibitors, such as abemaciclib, hymenialdisine, and indirubin, cause cell arrest by decreasing its activity. OBJECTIVES:The purpose of this study was to evaluate the Molecular Dynamic (MD) effects of abemaciclib, hymenialdisine, and indirubin on the structure of CDK-6. METHODS:The PDB file of CDK-6 was obtained from the Protein Data Bank (http://www.rcsb.org). After the simulation of CDK-6 in the Gromacs software, 200 stages of molecular docking were run on CDK-6 in the presence of the inhibitors using AutoDock 4.2. The simulation of CDK-6 in the presence of inhibitors was performed after docking. RESULTS:Abemaciclib showed the greatest tendency to bind CDK-6 via binding 16 residues in the binding site with hydrogen bonds and hydrophobic bonding. CDK-6 docked to hymenialdisine and indirubin increased the Total Energy (TE) and decreased the radius of gyration (Rg). CDK-6 docked to hymenialdisine significantly decreased the coil secondary structure. CONCLUSION:CDK-6 is inhibited via high binding affinity to abemaciclib, hymenialdisine, and indirubin inhibitors and induces variation in the secondary structure and Rg in the CDK-6 docked to the three inhibitors. It seems that developing a drug with a binding tendency to CDK6 that is similar to those of abemaciclib, indirubin, and hymenialdisine can change the secondary structure of CDK6, possibly more potently, and can be used to develop anticancer drugs. However, additional studies are needed to confirm this argument.",2019-01-01 +30763298,"Interim Estimates of 2018-19 Seasonal Influenza Vaccine Effectiveness - United States, February 2019.","In the United States, annual vaccination against seasonal influenza is recommended for all persons aged ≥6 months (https://www.cdc.gov/flu/protect/whoshouldvax.htm). Effectiveness of seasonal influenza vaccine varies by season. During each influenza season since 2004-05, CDC has estimated the effectiveness of seasonal influenza vaccine to prevent laboratory-confirmed influenza associated with medically attended acute respiratory illness (ARI). This interim report uses data from 3,254 children and adults enrolled in the U.S. Influenza Vaccine Effectiveness Network (U.S. Flu VE Network) during November 23, 2018-February 2, 2019. During this period, overall adjusted vaccine effectiveness against all influenza virus infection associated with medically attended ARI was 47% (95% confidence interval [CI] = 34%-57%). For children aged 6 months-17 years, overall vaccine effectiveness was 61% (44%-73%). Seventy-four percent of influenza A infections for which subtype information was available were caused by A(H1N1)pdm09 viruses. Vaccine effectiveness was estimated to be 46% (30%-58%) against illness caused by influenza A(H1N1)pdm09 viruses. CDC recommends that health care providers continue to administer influenza vaccine because influenza activity is ongoing and the vaccine can still prevent illness, hospitalization, and death associated with currently circulating influenza viruses, or other influenza viruses that might circulate later in the season. During the 2017-18 influenza season, in which influenza A(H3N2) predominated, vaccination was estimated to prevent 7.1 million illnesses, 3.7 million medical visits, 109,000 hospitalizations, and 8,000 deaths (1). Vaccination can also reduce the severity of influenza-associated illness (2). Persons aged ≥6 months who have not yet been vaccinated this season should be vaccinated.",2019-02-15 +29873019,Rapid Classification and Identification of Multiple Microorganisms with Accurate Statistical Significance via High-Resolution Tandem Mass Spectrometry.,"Rapid and accurate identification and classification of microorganisms is of paramount importance to public health and safety. With the advance of mass spectrometry (MS) technology, the speed of identification can be greatly improved. However, the increasing number of microbes sequenced is complicating correct microbial identification even in a simple sample due to the large number of candidates present. To properly untwine candidate microbes in samples containing one or more microbes, one needs to go beyond apparent morphology or simple ""fingerprinting""; to correctly prioritize the candidate microbes, one needs to have accurate statistical significance in microbial identification. We meet these challenges by using peptide-centric representations of microbes to better separate them and by augmenting our earlier analysis method that yields accurate statistical significance. Here, we present an updated analysis workflow that uses tandem MS (MS/MS) spectra for microbial identification or classification. We have demonstrated, using 226 MS/MS publicly available data files (each containing from 2500 to nearly 100,000 MS/MS spectra) and 4000 additional MS/MS data files, that the updated workflow can correctly identify multiple microbes at the genus and often the species level for samples containing more than one microbe. We have also shown that the proposed workflow computes accurate statistical significances, i.e., E values for identified peptides and unified E values for identified microbes. Our updated analysis workflow MiCId, a freely available software for Microorganism Classification and Identification, is available for download at https://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads.html . Graphical Abstract ᅟ.",2018-06-05 +27835690,Vidjil: A Web Platform for Analysis of High-Throughput Repertoire Sequencing.,"

Background

The B and T lymphocytes are white blood cells playing a key role in the adaptive immunity. A part of their DNA, called the V(D)J recombinations, is specific to each lymphocyte, and enables recognition of specific antigenes. Today, with new sequencing techniques, one can get billions of DNA sequences from these regions. With dedicated Repertoire Sequencing (RepSeq) methods, it is now possible to picture population of lymphocytes, and to monitor more accurately the immune response as well as pathologies such as leukemia.

Methods and results

Vidjil is an open-source platform for the interactive analysis of high-throughput sequencing data from lymphocyte recombinations. It contains an algorithm gathering reads into clonotypes according to their V(D)J junctions, a web application made of a sample, experiment and patient database and a visualization for the analysis of clonotypes along the time. Vidjil is implemented in C++, Python and Javascript and licensed under the GPLv3 open-source license. Source code, binaries and a public web server are available at http://www.vidjil.org and at http://bioinfo.lille.inria.fr/vidjil. Using the Vidjil web application consists of four steps: 1. uploading a raw sequence file (typically a FASTQ); 2. running RepSeq analysis software; 3. visualizing the results; 4. annotating the results and saving them for future use. For the end-user, the Vidjil web application needs no specific installation and just requires a connection and a modern web browser. Vidjil is used by labs in hematology or immunology for research and clinical applications.",2016-11-11 +27694207,3DFlu: database of sequence and structural variability of the influenza hemagglutinin at population scale. ,"The influenza virus type A (IVA) is an important pathogen which is able to cause annual epidemics and even pandemics. This fact is the consequence of the antigenic shifts and drifts capabilities of IVA, caused by the high mutation rate and the reassortment capabilities of the virus. The hemagglutinin (HA) protein constitutes the main IVA antigen and has a crucial role in the infection mechanism, being responsible for the recognition of host-specific sialic acid derivatives. Despite the relative abundance of HA sequence and serological studies, comparative structure-based analysis of HA are less investigated. The 3DFlu database contains well annotated HA representatives: 1192 models and 263 crystallographic structures. The relations between these proteins are defined using different metrics and are visualized as a network in the provided web interface. Moreover structural and sequence comparison of the proteins can be explored. Metadata information (e.g. protein identifier, IVA strain, year and location of infection) can enhance the exploration of the presented data. With our database researchers gain a useful tool for the exploration of high quality HA models, viewing and comparing changes in the HA viral subtypes at several information levels (sequence, structure, ESP). The complete and integrated view of those relations might be useful to determine the efficiency of transmission, pathogenicity and for the investigation of evolutionary tendencies of the influenza virus.Database URL: http://nucleus3d.cent.uw.edu.pl/influenza.",2016-10-02 +28957500,fastNGSadmix: admixture proportions and principal component analysis of a single NGS sample.,"

Motivation

Estimation of admixture proportions and principal component analysis (PCA) are fundamental tools in populations genetics. However, applying these methods to low- or mid-depth sequencing data without taking genotype uncertainty into account can introduce biases.

Results

Here we present fastNGSadmix, a tool to fast and reliably estimate admixture proportions and perform PCA from next generation sequencing data of a single individual. The analyses are based on genotype likelihoods of the input sample and a set of predefined reference populations. The method has high accuracy, even at low sequencing depth and corrects for the biases introduced by small reference populations.

Availability and implementation

The admixture estimation method is implemented in C ++ and the PCA method is implemented in R. The code is freely available at http://www.popgen.dk/software/index.php/FastNGSadmix.

Contact

emil.jorsboe@bio.ku.dk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +28334301,Dynamix: dynamic visualization by automatic selection of informative tracks from hundreds of genomic datasets.,"

Motivation

Visualization of genomic data is fundamental for gaining insights into genome function. Yet, co-visualization of a large number of datasets remains a challenge in all popular genome browsers and the development of new visualization methods is needed to improve the usability and user experience of genome browsers.

Results

We present Dynamix, a JBrowse plugin that enables the parallel inspection of hundreds of genomic datasets. Dynamix takes advantage of a priori knowledge to automatically display data tracks with signal within a genomic region of interest. As the user navigates through the genome, Dynamix automatically updates data tracks and limits all manual operations otherwise needed to adjust the data visible on screen. Dynamix also introduces a new carousel view that optimizes screen utilization by enabling users to independently scroll through groups of tracks.

Availability and implementation

Dynamix is hosted at http://furlonglab.embl.de/Dynamix .

Contact

charles.girardot@embl.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +26994912,GO annotation in InterPro: why stability does not indicate accuracy in a sea of changing annotations. ,"The removal of annotation from biological databases is often perceived as an indicator of erroneous annotation. As a corollary, annotation stability is considered to be a measure of reliability. However, diverse data-driven events can affect the stability of annotations in both primary protein sequence databases and the protein family databases that are built upon the sequence databases and used to help annotate them. Here, we describe some of these events and their consequences for the InterPro database, and demonstrate that annotation removal or reassignment is not always linked to incorrect annotation by the curator. Database URL: http://www.ebi.ac.uk/interpro.",2016-03-19 +29684140,Ultra-fast global homology detection with Discrete Cosine Transform and Dynamic Time Warping.,"Motivation:Evolutionary information is crucial for the annotation of proteins in bioinformatics. The amount of retrieved homologs often correlates with the quality of predicted protein annotations related to structure or function. With a growing amount of sequences available, fast and reliable methods for homology detection are essential, as they have a direct impact on predicted protein annotations. Results:We developed a discriminative, alignment-free algorithm for homology detection with quasi-linear complexity, enabling theoretically much faster homology searches. To reach this goal, we convert the protein sequence into numeric biophysical representations. These are shrunk to a fixed length using a novel vector quantization method which uses a Discrete Cosine Transform compression. We then compute, for each compressed representation, similarity scores between proteins with the Dynamic Time Warping algorithm and we feed them into a Random Forest. The WARP performances are comparable with state of the art methods. Availability and implementation:The method is available at http://ibsquare.be/warp. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-09-01 +26578693,TENOR: Database for Comprehensive mRNA-Seq Experiments in Rice.,"Here we present TENOR (Transcriptome ENcyclopedia Of Rice, http://tenor.dna.affrc.go.jp), a database that encompasses large-scale mRNA sequencing (mRNA-Seq) data obtained from rice under a wide variety of conditions. Since the elucidation of the ability of plants to adapt to various growing conditions is a key issue in plant sciences, it is of great interest to understand the regulatory networks of genes responsible for environmental changes. We used mRNA-Seq and performed a time-course transcriptome analysis of rice, Oryza sativa L. (cv. Nipponbare), under 10 abiotic stress conditions (high salinity; high and low phosphate; high, low and extremely low cadmium; drought; osmotic; cold; and flood) and two plant hormone treatment conditions (ABA and jasmonic acid). A large number of genes that were responsive to abiotic stresses and plant hormones were detected by differential expression analysis. Furthermore, several responsive genes were found to encode transcription factors that could control the transcriptional network of stress responses, but the timing of the induction of these genes was not uniform across conditions. A significant number of cis-regulatory elements were enriched in the promoter regions of the responsive genes and were shared among conditions. These data suggest that some key components of gene regulation networks are shared between different stress signaling pathways. All the resources (novel genes identified from mRNA-Seq data, expression profiles, co-expressed genes and cis-regulatory elements) can be searched for and are available in TENOR.",2015-11-16 +29508535,Design and development of a phantom for tomosynthesis with potential for automated analysis via the cloud.,"This paper describes Development of a Phantom for Tomosynthesis with Potential for Automated Analysis via the Cloud. Several studies are underway to investigate the effectiveness of Tomosynthesis Mammographic Image Screening, including the large TMIST project as funded by the National Cancer Institute https://www.cancer.gov/about-cancer/treatment/clinical-trials/nci-supported/tmist. The development of the phantom described in this paper follows initiatives from the FDA, the AAPM TG245 task group, and European Reference Organization (EUREF) for Quality Assured Breast Screening and Diagnostic Services Committee report noting, that no formal endorsement nor recommendation for use has been sought, or granted by any of these groups. This paper reports on the possibility of using this newly developed Tomosynthesis Phantom for Quality Assurance, field testing of image performance, including remote monitoring of DBT system performance, e.g., via transmission over the cloud. The phantom includes tests for: phantom positioning and alignment (important for remote analysis), scan geometry (x and y), chest wall offset, scan slice width and Slice Sensitivity Profile (SSP(z)) slice geometry (slice width), scan slice incrementation (z), z axis geometry bead, low contrast detectability using low contrast spheres, spatial resolution via Point Spread Function (PSF), Image uniformity, Signal to Noise Ratio (SNR), and Contrast to Noise Ratio (CNR) via readings over an Aluminum square. The phantom is designed for use with automated analysis via transmission of images over the cloud and the analysis package includes test of positioning accuracy (roll, pitch, and yaw). Data are shown from several commercial Tomosynthesis Scanners including Fuji, GE, Hologic, IMS-Giotti, and Siemens; however, the focus of this paper is on phantom design, and not in general aimed at direct commercial comparisons, and wherever possible the identity of the data is anonymized. Results of automated analysis of the phantom are shown, and it is demonstrated that reliable analysis of such a phantom can be achieved remotely, including transmission of data through the cloud.",2018-03-06 +28637301,EDEN: evolutionary dynamics within environments.,"

Summary

Metagenomics revolutionized the field of microbial ecology, giving access to Gb-sized datasets of microbial communities under natural conditions. This enables fine-grained analyses of the functions of community members, studies of their association with phenotypes and environments, as well as of their microevolution and adaptation to changing environmental conditions. However, phylogenetic methods for studying adaptation and evolutionary dynamics are not able to cope with big data. EDEN is the first software for the rapid detection of protein families and regions under positive selection, as well as their associated biological processes, from meta- and pangenome data. It provides an interactive result visualization for detailed comparative analyses.

Availability and implementation

EDEN is available as a Docker installation under the GPL 3.0 license, allowing its use on common operating systems, at http://www.github.com/hzi-bifo/eden.

Contact

alice.mchardy@helmholtz-hzi.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +27139400,KATIS: An eHealth System for Complementary Medicine.,"

Background

Much of the information on the complementary medicine is spread across literature and the internet. However, various literature and web resources provide information just of one specialist field. In addition, these resources do not allow users to search for suitable therapies based on patient-specific indications.

Objectives

Aggregating knowledge about complementary medicine into one database makes the search more efficient.

Methods

Data integration is a promising method for providing well-based knowledge. Therefore, integrative methods were used to create the database ALTMEDA, which includes complementary and drug-related data.

Results

Based on this comprehensive database ALTMEDA, the new eHealth system KATIS and the corresponding app ALMEKO for the mobile usage were implemented.

Conclusion

KATIS is a web-based system for complementary medicine. KATIS provides knowledge about ten different specialist fields, which enables users not only to look up a particular complementary therapy, but also to find suitable therapies for indications more efficiently. [http://www.komplementäre-medizin.de].",2016-01-01 +30273912,KInhibition: A Kinase Inhibitor Selection Portal.,"Protein kinases constitute a large class of signaling molecules frequently targeted in research and clinical uses. However, kinase inhibitors are notoriously non-specific, making it difficult to select an appropriate inhibitor for a given kinase. Available data from large-scale kinase inhibitor screens are often difficult to query. Here, we present KInhibition (https://kinhibition.fredhutch.org), an online portal that allows users to search publicly available datasets to find selective inhibitors for a chosen kinase or group of kinases. Compounds are sorted by a KInhibition Selectivity Score, calculated based on compounds' activity against the selected kinase(s) versus activity against all other kinases for which that compound has been profiled. The current version allows users to query four datasets, with a framework that can easily accommodate additional datasets. KInhibition represents a powerful platform through which researchers from broad areas of biology, chemistry, and pharmacology can easily interrogate large datasets to help guide their selection of kinase inhibitors.",2018-09-18 +27742822,miRPathDB: a new dictionary on microRNAs and target pathways.,"In the last decade, miRNAs and their regulatory mechanisms have been intensively studied and many tools for the analysis of miRNAs and their targets have been developed. We previously presented a dictionary on single miRNAs and their putative target pathways. Since then, the number of miRNAs has tripled and the knowledge on miRNAs and targets has grown substantially. This, along with changes in pathway resources such as KEGG, leads to an improved understanding of miRNAs, their target genes and related pathways. Here, we introduce the miRNA Pathway Dictionary Database (miRPathDB), freely accessible at https://mpd.bioinf.uni-sb.de/ With the database we aim to complement available target pathway web-servers by providing researchers easy access to the information which pathways are regulated by a miRNA, which miRNAs target a pathway and how specific these regulations are. The database contains a large number of miRNAs (2595 human miRNAs), different miRNA target sets (14 773 experimentally validated target genes as well as 19 281 predicted targets genes) and a broad selection of functional biochemical categories (KEGG-, WikiPathways-, BioCarta-, SMPDB-, PID-, Reactome pathways, functional categories from gene ontology (GO), protein families from Pfam and chromosomal locations totaling 12 875 categories). In addition to Homo sapiens, also Mus musculus data are stored and can be compared to human target pathways.",2016-10-13 +29718097,RBind: computational network method to predict RNA binding sites.,"Motivation:Non-coding RNA molecules play essential roles by interacting with other molecules to perform various biological functions. However, it is difficult to determine RNA structures due to their flexibility. At present, the number of experimentally solved RNA-ligand and RNA-protein structures is still insufficient. Therefore, binding sites prediction of non-coding RNA is required to understand their functions. Results:Current RNA binding site prediction algorithms produce many false positive nucleotides that are distance away from the binding sites. Here, we present a network approach, RBind, to predict the RNA binding sites. We benchmarked RBind in RNA-ligand and RNA-protein datasets. The average accuracy of 0.82 in RNA-ligand and 0.63 in RNA-protein testing showed that this network strategy has a reliable accuracy for binding sites prediction. Availability and implementation:The codes and datasets are available at https://zhaolab.com.cn/RBind. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-09-01 +29566239,TodoFirGene: Developing Transcriptome Resources for Genetic Analysis of Abies sachalinensis.,"Todo-matsu (Abies sachalinensis) is one of the most important forestry species in Hokkaido, Japan and is distributed from near sea level to the alpine zone. Due to its wide spatial distribution, the species adapts to its environment, displaying phenotypes of ecological relevance. In order to identify candidate genes under natural selection, we collected the transcriptome from the female and male flower, leaf and inner bark. De novo assembly with 34.7 Gb of sequencing reads produced 158,542 transcripts from 69,618 loci, whose estimated coverage reached 95.6% of conserved eukaryotic genes. Homology searches against publicly available databases identified 134,190 (84.6%) transcripts with at least one hit. In total, 28,944 simple sequence repeats (SSRs) and 80,758 single nucleotide variants (SNVs) were detected from 23,570 (14.9%) and 25,366 (16.0%) transcripts, which were valuable for use in genetic analysis of the species. All the annotations were included in a relational database, TodoFirGene, which provides an interface for various queries and homology search, and can be accessed at http://plantomics.mind.meiji.ac.jp/todomatsu/. This database hosts not only the A. sachalinensis transcriptome but also links to the proteomes of 13 other species, allowing a comparative genomic study of plant species.",2018-06-01 +26519402,The Plant Ontology: A Tool for Plant Genomics.,"The use of controlled, structured vocabularies (ontologies) has become a critical tool for scientists in the post-genomic era of massive datasets. Adoption and integration of common vocabularies and annotation practices enables cross-species comparative analyses and increases data sharing and reusability. The Plant Ontology (PO; http://www.plantontology.org/ ) describes plant anatomy, morphology, and the stages of plant development, and offers a database of plant genomics annotations associated to the PO terms. The scope of the PO has grown from its original design covering only rice, maize, and Arabidopsis, and now includes terms to describe all green plants from angiosperms to green algae.This chapter introduces how the PO and other related ontologies are constructed and organized, including languages and software used for ontology development, and provides an overview of the key features. Detailed instructions illustrate how to search and browse the PO database and access the associated annotation data. Users are encouraged to provide input on the ontology through the online term request form and contribute datasets for integration in the PO database.",2016-01-01 +27798100,"Population structure of Han Chinese in the modern Taiwanese population based on 10,000 participants in the Taiwan Biobank project.","The Taiwan Biobank (TWB) aims to build a nationwide research database that integrates genomic/epigenomic profiles, lifestyle patterns, dietary habits, environmental exposure history and long-term health outcomes of 300,000 residents of Taiwan. We describe here an investigation of the population structure of Han Chinese on this Pacific island using genotype data of 591,048 SNPs in an initial freeze of 10,801 unrelated TWB participants. In addition to the North-South cline reported in other Han Chinese populations, we find the Taiwanese Han Chinese clustered into three cline groups: 5% were of northern Han Chinese ancestry, 79.9% were of southern Han Chinese ancestry, and 14.5% belonged to a third (T) group. We also find that this T group is genetically distinct from neighbouring Southeast Asians and Austronesian tribes but similar to other southern Han Chinese. Interestingly, high degree of LD between HLA haplotype A*33:03-B*58:01, an MHC allele being of pathological relevance, and SNPs across the MHC region was observed in subjects with T origin, but not in other Han Chinese. This suggested the T group individuals may have experienced evolutionary events independent from the other southern Han Chinese. Based on the newly-discovered population structure, we detect different loci susceptible to type II diabetes in individuals with southern and northern Han Chinese ancestries. Finally, as one of the largest dataset currently available for the Chinese population, genome-wide statistics for the 10,810 subjects are made publicly accessible through Taiwan View (https://taiwanview.twbiobank.org.tw/index; date last accessed October 14, 2016) to encourage future genetic research and collaborations with the island Taiwan.",2016-12-01 +26054769,STITCHER: A web resource for high-throughput design of primers for overlapping PCR applications.,"Overlapping PCR is routinely used in a wide number of molecular applications. These include stitching PCR fragments together, generating fluorescent transcriptional and translational fusions, inserting mutations, making deletions, and PCR cloning. Overlapping PCR is also used for genotyping by traditional PCR techniques and in detection experiments using techniques such as loop-mediated isothermal amplification (LAMP). STITCHER is a web tool providing a central resource for researchers conducting all types of overlapping PCR experiments with an intuitive interface for automated primer design that's fast, easy to use, and freely available online (http://ohalloranlab.net/STITCHER.html). STITCHER can handle both single sequence and multi-sequence input, and specific features facilitate numerous other PCR applications, including assembly PCR, adapter PCR, and primer walking. Field PCR, and in particular, LAMP, offers promise as an on site tool for pathogen detection in underdeveloped areas, and STITCHER includes off-target detection features for pathogens commonly targeted using LAMP technology.",2015-06-01 +29856911,Global Estimate of Lung Cancer Mortality Attributable to Residential Radon.,"

Background

Radon is the second most important cause of lung cancer, ranked by the World Health Organization as the fifth leading cause of mortality in 2010. An updated database of national radon exposures for 66 countries allows the global burden of lung cancer mortality attributable to radon to be estimated.

Objective

Our goal was to estimate the global population attributable burden of lung cancer mortality in 2012 from residential radon.

Methods

Estimates of the population attributable risk (PAR) of lung cancer mortality from radon were determined using the attributable fraction approach, using three models for excess relative risk of lung cancer from radon.

Results

The estimates of the median PAR of lung cancer mortality from residential radon in 2012 for the 66 countries having representative national radon surveys were consistent, as 16.5%, 14.4%, and 13.6% for the exposure-age-concentration (EAC) model (BEIR VI), the Hunter model, and the Kreuzer model, respectively. The mean PAR using the EAC model ranged from 4.2% (95% CI: 0.9, 11.7) for Japan, to 29.3% (95% CI: 22.9, 35.7) for Armenia, with a median for the 66 countries of 16.5%. Radon-attributable lung cancer deaths for all 66 countries totaled 226,057 in 2012 and represent a median of 3.0% of total cancer deaths.

Conclusions

Consistent findings between the three models used to estimate excess relative risks of lung cancer from radon, and between the attributable fraction methodology and the life table analysis, confirm that residential radon is responsible for a substantial proportion of lung cancer mortality worldwide. https://doi.org/10.1289/EHP2503.",2018-05-31 +,"The relative and absolute frequencies of angiosperm sexual systems: Dioecy, monoecy, gynodioecy, and an updated online database","• Premise of the study: Separating sexual function between different individuals carries risks, especially for sedentary organisms. Nevertheless, many land plants have unisexual gametophytes or sporophytes. This study brings together data and theoretical insights from research over the past 20 yr on the occurrence and frequency of plant sexual systems, focusing on the flowering plants.• Methods: A list of genera with dioecious species, along with other information, is made available (http://www.umsl.edu/∼renners/). Frequencies of other sexual systems are tabulated, and data on the genetic regulation, ecological context, and theoretical benefits of dioecy reviewed.• Key results: There are 15600 dioecious angiosperms in 987 genera and 175 families, or 5–6% of the total species (7% of genera, 43% of families), with somewhere between 871 to 5000 independent origins of dioecy. Some 43% of all dioecious angiosperms are in just 34 entirely dioecious clades, arguing against a consistent negative influence of dioecy on diversification. About 31.6% of the dioecious species are wind-pollinated, compared with 5.5–6.4% of nondioecious angiosperms. Also, 1.4% of all angiosperm genera contain dioecious and monoecious species, while 0.4% contain dioecious and gynodioecious species. All remaining angiosperm sexual systems are rare. Chromosomal sex determination is known from 40 species; environmentally modulated sex allocation is common. Few phylogenetic studies have focused on the evolution of dioecy.• Conclusions: The current focus is on the genetic mechanisms underlying unisexual flowers and individuals. Mixed strategies of sexual and vegetative dispersal, together with plants’ sedentary life style, may often favor polygamous systems in which sexually inconstant individuals can persist. Nevertheless, there are huge entirely dioecious clades of tropical woody plants.",2014-10-01 +27733507,PMDBase: a database for studying microsatellite DNA and marker development in plants.,"Microsatellite DNAs (or SSRs) are important genomic components involved in many important biological functions. SSRs have been extensively exploited as molecular markers for diverse applications including genetic diversity, linkage/association mapping of gene/QTL, marker-assisted selection, variety identification and evolution analysis. However, a comprehensive database or web service for studying microsatellite DNAs and marker development in plants is lacking. Here, we developed a database, PMDBase, which integrates large amounts of microsatellite DNAs from genome sequenced plant species and includes a web service for microsatellite DNAs identification. In PMDBase, 26 230 099 microsatellite DNAs were identified spanning 110 plant species. Up to three pairs of primers were supplied for every microsatellite DNA. For 81 species, genomic features of the microsatellite DNAs (genic or non-genic) were supplied with the corresponding genes or transcripts from public databases. Microsatellite DNAs can be explored through browsing and searching modules with a user-friendly web interface and customized software. Furthermore, we developed MISAweb and embedded Primer3web to help users to identify microsatellite DNAs and design corresponding primers in their own genomic sequences online. All datasets of microsatellite DNAs can be downloaded conveniently. PMDBase will be updated regularly with new available genome data and can be accessed freely via the address http://www.sesame-bioinfo.org/PMDBase.",2016-10-12 +23192552,Molecular network analysis of diseases and drugs in KEGG.,"KEGG (http://www.genome.jp/kegg/) is an integrated database resource for linking genomes or molecular datasets to molecular networks (pathways, etc.) representing higher-level systemic functions of the cell, the organism, and the ecosystem. Major efforts have been undertaken for capturing and representing experimental knowledge as manually drawn KEGG pathway maps and for genome-based generalization of experimental knowledge through the KEGG Orthology (KO) system. Current knowledge on diseases and drugs has also been integrated in the KEGG pathway maps, especially in terms of known disease genes and drug targets. Thus, KEGG can be used as a reference knowledge base for integration and interpretation of large-scale datasets generated by high-throughput experimental technologies, as well for finding their practical values. Here we give an introduction to the KEGG Mapper tools, especially for understanding disease mechanisms and adverse drug interactions.",2013-01-01 +,BiomassID: A biomass type identification system for mobile devices,"Biomass quality assessment is of great importance when one in the biomass industry needs to produce another energy product, such as biofuel or bioenergy, for instance. Usually, the biomass quality is determined using expensive devices, such as mass spectrometers, or complex chemical tests that may need several days to complete. Because of the high costs of such methods, people tend to forsake biomass quality assessment and move on to directly produce bioproducts from any kind of biomass. In this paper, a cheap and fast solution for biomass type identification is proposed and investigated. The quality of biomass can be inferred at a coarse level from the type of biomass. In the proposed approach, biomass type identification is treated as a texture classification problem. A texture classification system developed for mobile devices which is able to distinguish between four types of biomass texture images is presented in this paper. Several state of the art texture classification systems based on machine learning are evaluated in a series of experiments on a data set of biomass texture images. The experiments are conducted to determine the system that can offer the best trade-off between accuracy and speed, since the goal is to implement it on a mobile device with limited processing power and memory. In the end, the selected system can identify the type of biomass from pictures taken with a mobile device camera in a few seconds directly on the respective mobile device. The utility of the system is demonstrated through an iOS application that is freely available for download in the App Store at http://appstore.com/biomassid.",2015-04-01 +30282776,Quantifying Homologous Proteins and Proteoforms.,"Many proteoforms-arising from alternative splicing, post-translational modifications (PTM), or paralogous genes-have distinct biological functions, such as histone PTM proteoforms. However, their quantification by existing bottom-up mass-spectrometry (MS) methods is undermined by peptide-specific biases. To avoid these biases, we developed and implemented a first-principles model (HIquant) for quantifying proteoform stoichiometries. We characterized when MS data allow inferring proteoform stoichiometries by HIquant and derived an algorithm for optimal inference. We applied this algorithm to infer proteoform stoichiometries in two experimental systems that supported rigorous bench-marking: alkylated proteoforms spiked-in at known ratios and endogenous histone 3 PTM proteoforms quantified relative to internal heavy standards. When compared with the benchmarks, the proteoform stoichiometries interfered by HIquant without using external standards had relative error of 5-15% for simple proteoforms and 20-30% for complex proteoforms. A HIquant server is implemented at: https://web.northeastern.edu/slavov/2014HIquant/.",2018-10-03 +29706870,"Studying Axon-Astrocyte Functional Interactions by 3D Two-Photon Ca2+ Imaging: A Practical Guide to Experiments and ""Big Data"" Analysis.","Recent advances in fast volumetric imaging have enabled rapid generation of large amounts of multi-dimensional functional data. While many computer frameworks exist for data storage and analysis of the multi-gigabyte Ca2+ imaging experiments in neurons, they are less useful for analyzing Ca2+ dynamics in astrocytes, where transients do not follow a predictable spatio-temporal distribution pattern. In this manuscript, we provide a detailed protocol and commentary for recording and analyzing three-dimensional (3D) Ca2+ transients through time in GCaMP6f-expressing astrocytes of adult brain slices in response to axonal stimulation, using our recently developed tools to perform interactive exploration, filtering, and time-correlation analysis of the transients. In addition to the protocol, we release our in-house software tools and discuss parameters pertinent to conducting axonal stimulation/response experiments across various brain regions and conditions. Our software tools are available from the Volterra Lab webpage at https://wwwfbm.unil.ch/dnf/group/glia-an-active-synaptic-partner/member/volterra-andrea-volterra in the form of software plugins for Image J (NIH)-a de facto standard in scientific image analysis. Three programs are available: MultiROI_TZ_profiler for interactive graphing of several movable ROIs simultaneously, Gaussian_Filter5D for Gaussian filtering in several dimensions, and Correlation_Calculator for computing various cross-correlation parameters on voxel collections through time.",2018-04-13 +29859695,"Purification, characterization, and gene cloning of a new cold-adapted β-galactosidase from Erwinia sp. E602 isolated in northeast China.","β-Galactosidases are widely used in industry for elimination of lactose from milk products. A new β-galactosidase was obtained from bacterial strain Erwinia sp. E602, newly isolated in northeast China. The enzyme was purified with the methods of ammonium sulfate fractionation, ion exchange, and gel filtration chromatography for further study of the enzymatic characteristics. The purified enzyme had a molecular weight of near 110 kDa. The optimum reaction temperature and pH of this enzyme was determined to be 40°C and 7.0, respectively, indicating that this enzyme was a mesophilic neutral β-galactosidase. Furthermore, the enzyme retained near 10% of the activity at 0°C, which also suggested its cold-adapted property. Kinetics of the β-galactosidase was studied, and the Km (Michaelis constant) and Vmax (maximum enzymatic reaction rate) of this enzyme were 0.21 mmol/L and 263.16 µmol/mg per minute, respectively. The effects of metal ions on the enzymatic activity and the lactose hydrolysis efficiency in milk, as well as its trans-glycosylation activity, were studied in this work. The β-galactosidase coding gene was cloned to be a 3-kb length fragment, which shared at most 81% of identity with the published sequences in NCBI Blast database (https://blast.ncbi.nlm.nih.gov). Results in this work suggested it is a new β-galactosidase and it has potential to be used in dairy and food processing.",2018-05-30 +29775322,Prediction of Human Cytochrome P450 Inhibition Using a Multitask Deep Autoencoder Neural Network.,"Adverse side effects of drug-drug interactions induced by human cytochrome P450 (CYP450) inhibition is an important consideration in drug discovery. It is highly desirable to develop computational models that can predict the inhibitive effect of a compound against a specific CYP450 isoform. In this study, we developed a multitask model for concurrent inhibition prediction of five major CYP450 isoforms, namely, 1A2, 2C9, 2C19, 2D6, and 3A4. The model was built by training a multitask autoencoder deep neural network (DNN) on a large dataset containing more than 13 000 compounds, extracted from the PubChem BioAssay Database. We demonstrate that the multitask model gave better prediction results than that of single-task models, previous reported classifiers, and traditional machine learning methods on an average of five prediction tasks. Our multitask DNN model gave average prediction accuracies of 86.4% for the 10-fold cross-validation and 88.7% for the external test datasets. In addition, we built linear regression models to quantify how the other tasks contributed to the prediction difference of a given task between single-task and multitask models, and we explained under what conditions the multitask model will outperform the single-task model, which suggested how to use multitask DNN models more effectively. We applied sensitivity analysis to extract useful knowledge about CYP450 inhibition, which may shed light on the structural features of these isoforms and give hints about how to avoid side effects during drug development. Our models are freely available at http://repharma.pku.edu.cn/deepcyp/home.php or http://www.pkumdl.cn/deepcyp/home.php .",2018-05-30 +23761453,The FunFOLD2 server for the prediction of protein-ligand interactions.,"The FunFOLD2 server is a new independent server that integrates our novel protein-ligand binding site and quality assessment protocols for the prediction of protein function (FN) from sequence via structure. Our guiding principles were, first, to provide a simple unified resource to make our function prediction software easily accessible to all via a simple web interface and, second, to produce integrated output for predictions that can be easily interpreted. The server provides a clean web interface so that results can be viewed on a single page and interpreted by non-experts at a glance. The output for the prediction is an image of the top predicted tertiary structure annotated to indicate putative ligand-binding site residues. The results page also includes a list of the most likely binding site residues and the types of predicted ligands and their frequencies in similar structures. The protein-ligand interactions can also be interactively visualized in 3D using the Jmol plug-in. The raw machine readable data are provided for developers, which comply with the Critical Assessment of Techniques for Protein Structure Prediction data standards for FN predictions. The FunFOLD2 webserver is freely available to all at the following web site: http://www.reading.ac.uk/bioinf/FunFOLD/FunFOLD_form_2_0.html.",2013-06-12 +27688070,A supervised weighted similarity measure for gene expressions using biological knowledge.,"A supervised similarity measure for Saccharomyces cerevisiae gene expressions is developed which can capture the gene similarity when multiple types of experimental conditions like cell cycle, heat shock are available for all the genes. The measure is called Weighted Pearson correlation (WPC), where the weights are systematically determined for each type of experiment by maximizing the positive predictive value for gene pairs having Pearson correlation greater than 0.80. The positive predictive value is computed by using the annotation information available from yeast GO-Slim process annotations in Saccharomyces Genome Database (SGD). Genes are then clustered by k-medoid algorithm using the newly computed WPC, and functions of 135 unclassified genes are predicted with a p-value cutoff 10-5 using Munich Information for Protein Sequences (MIPS) annotations. Out of these genes, functional categories of 55 gene are predicted with p-value cutoff greater than 10-10 and reported in this investigation. The superiority of WPC as compared to some existing similarity measures like Pearson correlation and Euclidean distance is demonstrated using positive predictive (PPV) values of gene pairs for different Saccharomyces cerevisiae data sets. The related code is available at http://www.sampa.droppages.com/WPC.html.",2016-09-26 +29058722,Deciphering lipid structures based on platform-independent decision rules.,"We achieve automated and reliable annotation of lipid species and their molecular structures in high-throughput data from chromatography-coupled tandem mass spectrometry using decision rule sets embedded in Lipid Data Analyzer (LDA; http://genome.tugraz.at/lda2). Using various low- and high-resolution mass spectrometry instruments with several collision energies, we proved the method's platform independence. We propose that the software's reliability, flexibility, and ability to identify novel lipid molecular species may now render current state-of-the-art lipid libraries obsolete.",2017-10-23 +29931314,SCIP: a single-cell image processor toolbox.,"

Summary

Each cell is a phenotypically unique individual that is influenced by internal and external processes, operating in parallel. To characterize the dynamics of cellular processes one needs to observe many individual cells from multiple points of view and over time, so as to identify commonalities and variability. With this aim, we engineered a software, 'SCIP', to analyze multi-modal, multi-process, time-lapse microscopy morphological and functional images. SCIP is capable of automatic and/or manually corrected segmentation of cells and lineages, automatic alignment of different microscopy channels, as well as detect, count and characterize fluorescent spots (such as RNA tagged by MS2-GFP), nucleoids, Z rings, Min system, inclusion bodies, undefined structures, etc. The results can be exported into *mat files and all results can be jointly analyzed, to allow studying not only each feature and process individually, but also find potential relationships. While we exemplify its use on Escherichia coli, many of its functionalities are expected to be of use in analyzing other prokaryotes and eukaryotic cells as well. We expect SCIP to facilitate the finding of relationships between cellular processes, from small-scale (e.g. gene expression) to large-scale (e.g. cell division), in single cells and cell lineages.

Availability and implementation

http://www.ca3-uninova.org/project_scip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +28584729,MitoSuite: a graphical tool for human mitochondrial genome profiling in massive parallel sequencing.,"Recent rapid advances in high-throughput, next-generation sequencing (NGS) technologies have promoted mitochondrial genome studies in the fields of human evolution, medical genetics, and forensic casework. However, scientists unfamiliar with computer programming often find it difficult to handle the massive volumes of data that are generated by NGS. To address this limitation, we developed MitoSuite, a user-friendly graphical tool for analysis of data from high-throughput sequencing of the human mitochondrial genome. MitoSuite generates a visual report on NGS data with simple mouse operations. Moreover, it analyzes high-coverage sequencing data but runs on a stand-alone computer, without the need for file upload. Therefore, MitoSuite offers outstanding usability for handling massive NGS data, and is ideal for evolutionary, clinical, and forensic studies on the human mitochondrial genome variations. It is freely available for download from the website https://mitosuite.com.",2017-05-30 +29931187,iLoc-lncRNA: predict the subcellular location of lncRNAs by incorporating octamer composition into general PseKNC.,"

Motivation

Long non-coding RNAs (lncRNAs) are a class of RNA molecules with more than 200 nucleotides. They have important functions in cell development and metabolism, such as genetic markers, genome rearrangements, chromatin modifications, cell cycle regulation, transcription and translation. Their functions are generally closely related to their localization in the cell. Therefore, knowledge about their subcellular locations can provide very useful clues or preliminary insight into their biological functions. Although biochemical experiments could determine the localization of lncRNAs in a cell, they are both time-consuming and expensive. Therefore, it is highly desirable to develop bioinformatics tools for fast and effective identification of their subcellular locations.

Results

We developed a sequence-based bioinformatics tool called 'iLoc-lncRNA' to predict the subcellular locations of LncRNAs by incorporating the 8-tuple nucleotide features into the general PseKNC (Pseudo K-tuple Nucleotide Composition) via the binomial distribution approach. Rigorous jackknife tests have shown that the overall accuracy achieved by the new predictor on a stringent benchmark dataset is 86.72%, which is over 20% higher than that by the existing state-of-the-art predictor evaluated on the same tests.

Availability and implementation

A user-friendly webserver has been established at http://lin-group.cn/server/iLoc-LncRNA, by which users can easily obtain their desired results.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +30447012,How families prepare their children for tooth extraction under general anaesthesia: Family and clinical predictors of non-compliance with a 'serious game'. ,"To explore family and clinical factors for usage of an online serious game designed to prepare children with ECC for dental treatment under general anaesthesia. Observational study. Secondary data of 60 children, aged 5-to-7, randomised to the intervention group in a phase-III randomised controlled trial [NIHR Portfolio 10006, ISRCTN: 18265148] testing the efficacy of the serious game http://www.scottga.org (available online). Usage was captured automatically, with each click, in real time. The total number of replays and total number of missing slides per game-run performed by the child, were recorded and used to monitor usage. Compliance outcomes were: total time running the game and number of completely missed slides. 57/60 played the game. Median age of parent/carer was 32. For 74% of the families, fathers resided at home and for 65% the parent/carer had A-levels-to-university education. At recruitment, 70% of the children were reported as anxious/highly-fearful and 37% as ""significantly psychologically disturbed"". Factors for non-compliance were absence of a father at home (P = 0.01) and higher child-anxiety (P = 0.01) and, to a lesser extent, a low parent/carer education level (P = 0.09). Interactive cartoons featuring dental assessment, oral health messages and modelling featured in the more popular slides.",2018-11-17 +30496481,ST131 fimH22 Escherichia coli isolate with a blaCMY-2/IncI1/ST12 plasmid obtained from a patient with bloodstream infection: highly similar to E. coli isolates of broiler origin.,"

Objectives

This study compares the genome of an ST131 CMY-2-producing Escherichia coli isolate from a Danish patient with other ST131 CMY-2-producing E. coli isolates of both human and animal origin.

Methods

In 2016, an ST131 CMY-2-producing E. coli isolate (ESBL20160056) was obtained from a patient with a bloodstream infection. The genome of the ESBL20160056 isolate was compared with genomes from six ST131 CMY-2-producing E. coli isolates obtained from broiler meat imported to Denmark, 15 ST131 CMY-2-producing E. coli isolates obtained from Enterobase (http://enterobase.warwick.ac.uk) and two ST131 CMY-2-producing E. coli from European collaborators. The plasmid from ESBL20160056 was sequenced using a MinION Mk1B (Oxford Nanopore Technologies).

Results

The E. coli isolate from the Danish patient clustered together with 13 other fimH22 ST131 CMY-2-producing E. coli isolates in a distinct clade. The clade consisted of genomes from six E. coli isolates from humans collected in Denmark, Spain, Cambodia and the USA, six E. coli isolates obtained from broiler meat samples imported to Denmark from France, the Netherlands and Germany, and two E. coli isolates obtained from broilers in Belgium and Luxembourg. The 101.5 kb plasmid with blaCMY-2 from ESBL20160056 had an IncI1 replicon and belonged to ST12 using the plasmid MLST scheme. In total, 10 of the 14 ST131 E. coli isolates belonging to the fimH22 clade carried an IncI1 ST12 plasmid with blaCMY-2.

Conclusions

From our data, it seems plausible that the ST131 fimH22 CMY-2-producing E. coli isolate obtained from the Danish patient could have a zoonotic broiler origin.",2019-03-01 +23044550,SemMedDB: a PubMed-scale repository of biomedical semantic predications.,"

Summary

Effective access to the vast biomedical knowledge present in the scientific literature is challenging. Semantic relations are increasingly used in knowledge management applications supporting biomedical research to help address this challenge. We describe SemMedDB, a repository of semantic predications (subject-predicate-object triples) extracted from the entire set of PubMed citations. We propose the repository as a knowledge resource that can assist in hypothesis generation and literature-based discovery in biomedicine as well as in clinical decision-making support.

Availability and implementation

The SemMedDB repository is available as a MySQL database for non-commercial use at http://skr3.nlm.nih.gov/SemMedDB. An UMLS Metathesaurus license is required.

Contact

kilicogluh@mail.nih.gov.",2012-10-08 +26123837,Predictors of Early Hospitalization After Deceased Donor Liver Transplantation.,"

Unlabelled

Hospitalizations after solid organ transplantation may affect patient outcomes. Identification of the factors attributed to them may decrease hospitalization rates, reduce overall cost, and improve post-transplant outcomes. We examined the risk factors for early hospitalization within 30 and 90 days after liver transplantation (LT).

Methods

Data on all deceased donor LT recipients (age ≥18 years) transplanted between 2/28/2002-2/27/2007 and discharged alive from the index hospitalization within 30 days of LT were collected (N = 267). Patients were followed up until December 31, 2013. Logistic regression was used to identify the predictors of 30-day hospitalization, and linear regression was used to identify the factors associated with number of days hospitalized during 30- and 90-day hospitalization after LT. Renal risk index (RRI), a recently developed and validated risk score that accurately predicts the post-LT ESRD based upon recipient factors at LT, was computed using RRI calculator ( http://rri.med.umich.edu ).

Results

One-third and more than half of the patients had at least one 30- and 90-day hospitalization, respectively. RRI decile (OR 1.12, P = 0.02) and serum sodium at LT (OR 0.90, P < 0.001) were independently associated with 30-day hospitalization after adjusting for MELD score. Serum creatinine at LT (β = 4.34, P = 0.001) and pre-LT admission days (β = 0.15, P = 0.027) affected the number of days hospitalized for 90-day hospitalization. RRI was also an independent predictor of post-LT mortality.

Conclusion

Early hospitalizations within 30 and 90 days after deceased donor LT are common. While all post-LT hospitalization cannot be prevented, efforts should be directed toward risk-based post-discharge care, and coordination of effective transitional care through ambulatory clinics. Implementation of such processes may attenuate early post-LT hospitalization and resource utilization and improve quality.",2015-06-30 +28575143,Seeing the wood for the trees: a forest of methods for optimization and omic-network integration in metabolic modelling.,"Metabolic modelling has entered a mature phase with dozens of methods and software implementations available to the practitioner and the theoretician. It is not easy for a modeller to be able to see the wood (or the forest) for the trees. Driven by this analogy, we here present a 'forest' of principal methods used for constraint-based modelling in systems biology. This provides a tree-based view of methods available to prospective modellers, also available in interactive version at http://modellingmetabolism.net, where it will be kept updated with new methods after the publication of the present manuscript. Our updated classification of existing methods and tools highlights the most promising in the different branches, with the aim to develop a vision of how existing methods could hybridize and become more complex. We then provide the first hands-on tutorial for multi-objective optimization of metabolic models in R. We finally discuss the implementation of multi-view machine learning approaches in poly-omic integration. Throughout this work, we demonstrate the optimization of trade-offs between multiple metabolic objectives, with a focus on omic data integration through machine learning. We anticipate that the combination of a survey, a perspective on multi-view machine learning and a step-by-step R tutorial should be of interest for both the beginner and the advanced user.",2018-11-01 +26980515,2P2Idb v2: update of a structural database dedicated to orthosteric modulation of protein-protein interactions. ,"2P2Idb is a hand-curated structural database dedicated to protein-protein interactions with known small molecule orthosteric modulators. It compiles the structural information related to orthosteric inhibitors and their target [i.e. related 3D structures available in the RCSB Protein Data Bank (PDB)] and provides links to other useful databases. 2P2Idb includes all interactions for which both the protein-protein and protein-inhibitor complexes have been structurally characterized. Since its first release in 2010, the database has grown constantly and the current version contains 27 protein-protein complexes and 274 protein-inhibitor complexes corresponding to 242 unique small molecule inhibitors which represent almost a 5-fold increase compared to the previous version. A number of new data have been added, including new protein-protein complexes, binding affinities, molecular descriptors, precalculated interface parameters and links to other webservers. A new query tool has been implemented to search for inhibitors within the database using standard molecular descriptors. A novel version of the 2P2I-inspector tool has been implemented to calculate a series of physical and chemical parameters of the protein interfaces. Several geometrical parameters including planarity, eccentricity and circularity have been added as well as customizable distance cutoffs. This tool has also been extended to protein-ligand interfaces. The 2P2I database thus represents a wealth of structural source of information for scientists interested in the properties of protein-protein interactions and the design of protein-protein interaction modulators. Database URL: http://2p2idb.cnrs-mrs.fr.",2016-03-15 +22140112,HIstome--a relational knowledgebase of human histone proteins and histone modifying enzymes.,"Histones are abundant nuclear proteins that are essential for the packaging of eukaryotic DNA into chromosomes. Different histone variants, in combination with their modification 'code', control regulation of gene expression in diverse cellular processes. Several enzymes that catalyze the addition and removal of multiple histone modifications have been discovered in the past decade, enabling investigations of their role(s) in normal cellular processes and diverse pathological conditions. This sudden influx of data, however, has resulted in need of an updated knowledgebase that compiles, organizes and presents curated scientific information to the user in an easily accessible format. Here, we present HIstome, a browsable, manually curated, relational database that provides information about human histone proteins, their sites of modifications, variants and modifying enzymes. HIstome is a knowledgebase of 55 human histone proteins, 106 distinct sites of their post-translational modifications (PTMs) and 152 histone-modifying enzymes. Entries have been grouped into 5 types of histones, 8 types of post-translational modifications and 14 types of enzymes that catalyze addition and removal of these modifications. The resource will be useful for epigeneticists, pharmacologists and clinicians. HIstome: The Histone Infobase is available online at http://www.iiserpune.ac.in/∼coee/histome/ and http://www.actrec.gov.in/histome/.",2011-12-02 +30007275,Effects of low-carbohydrate- compared with low-fat-diet interventions on metabolic control in people with type 2 diabetes: a systematic review including GRADE assessments.,"

Background

It remains uncertain which diet is best for people with type 2 diabetes (T2D).

Objective

We compared the effects of dietary carbohydrate restriction with fat restriction on markers of metabolic syndrome and quality of life in people with T2D.

Design

This systematic review of randomized controlled trials (RCTs) and controlled clinical trials (CCTs) compares the effects of a low-carbohydrate [≤40% of energy (%)] diet with those of a low-fat (≤30%) diet over a period of ≥4 wk in patients with T2D. Two investigators independently selected studies, extracted data, and assessed risk of bias. The GRADE (Grading of Recommendations Assessment, Development, and Evaluation) approach was used to assess the certainty of evidence. Pooled mean differences (MDs) and 95% CIs were calculated with the use of a random-effects model.

Results

Thirty-three RCTs and 3 CCTs (n = 2161) were included. Glycated hemoglobin declined more in people who consumed low-carbohydrate food than in those who consumed low-fat food in the short term (MD: -1.38%; 95% CI: -2.64%, -0.11%; very-low-certainty evidence). At 1 y, the MD was reduced to -0.36% (95% CI: -0.58%, -0.14%; low-certainty evidence); at 2 y, the difference had disappeared. There is low to high (majority moderate) certainty for small improvements of unclear clinical importance in plasma glucose, triglycerides, and HDL concentrations favoring low-carbohydrate food at half of the prespecified time points. There was little to no difference in LDL concentration or any of the secondary outcomes (body weight, waist circumference, blood pressure, quality of life) in response to either of the diets (very-low- to high-certainty evidence).

Conclusions

Currently available data provide low- to moderate-certainty evidence that dietary carbohydrate restriction to a maximum of 40% yields slightly better metabolic control of uncertain clinical importance than reduction in fat to a maximum of 30% in people with T2D. This systematic review is registered at http://www.crd.york.ac.uk/PROSPERO/display_record.php?ID=CRD42017052467 as CRD42017052467.",2018-08-01 +29151207,Rapid Prediction of Multi-dimensional NMR Data Sets Using FANDAS.,"Solid-state NMR (ssNMR) can provide structural information at the most detailed level and, at the same time, is applicable in highly heterogeneous and complex molecular environments. In the last few years, ssNMR has made significant progress in uncovering structure and dynamics of proteins in their native cellular environments [1-4]. Additionally, ssNMR has proven to be useful in studying large biomolecular complexes as well as membrane proteins at the atomic level [5]. In such studies, innovative labeling schemes have become a powerful approach to tackle spectral crowding. In fact, selecting the appropriate isotope-labeling schemes and a careful choice of the ssNMR experiments to be conducted are critical for applications of ssNMR in complex biomolecular systems. Previously, we have introduced a software tool called FANDAS (Fast Analysis of multidimensional NMR DAta Sets) that supports such investigations from the early stages of sample preparation to the final data analysis [6]. Here, we present a new version of FANDAS, called FANDAS 2.0, with improved user interface and extended labeling scheme options allowing the user to rapidly predict and analyze ssNMR data sets for a given protein-based application. It provides flexible options for advanced users to customize the program for tailored applications. In addition, the list of ssNMR experiments that can be predicted now includes proton (1H) detected pulse sequences. FANDAS 2.0, written in Python, is freely available through a user-friendly web interface at http://milou.science.uu.nl/services/FANDAS .",2018-01-01 +29996888,Prioritization and functional assessment of noncoding variants associated with complex diseases.,"Unraveling functional noncoding variants associated with complex diseases is still a great challenge. We present a novel algorithm, Prioritization And Functional Assessment (PAFA), that prioritizes and assesses the functionality of genetic variants by introducing population differentiation measures and recalibrating training variants. Comprehensive evaluations demonstrate that PAFA exhibits much higher sensitivity and specificity in prioritizing noncoding risk variants than existing methods. PAFA achieves improved performance in distinguishing both common and rare recurrent variants from non-recurrent variants by integrating multiple annotations and metrics. An integrated platform was developed, providing comprehensive functional annotations for noncoding variants by integrating functional genomic data, which can be accessed at http://159.226.67.237:8080/pafa .",2018-07-11 +29142589,FLIM-FRET analyzer: open source software for automation of lifetime-based FRET analysis.,"

Background

Despite the broad use of FRET techniques, available methods for analyzing protein-protein interaction are subject to high labor and lack of systematic analysis. We propose an open source software allowing the quantitative analysis of fluorescence lifetime imaging (FLIM) while integrating the steady-state fluorescence intensity information for protein-protein interaction studies.

Findings

Our developed open source software is dedicated to fluorescence lifetime imaging microscopy (FLIM) data obtained from Becker & Hickl SPC-830. FLIM-FRET analyzer includes: a user-friendly interface enabling automated intensity-based segmentation into single cells, time-resolved fluorescence data fitting to lifetime value for each segmented objects, batch capability, and data representation with donor lifetime versus acceptor/donor intensity quantification as a measure of protein-protein interactions.

Conclusions

The FLIM-FRET analyzer software is a flexible application for lifetime-based FRET analysis. The application, the C#. NET source code, and detailed documentation are freely available at the following URL: http://FLIM-analyzer.ip-korea.org.",2017-11-03 +25544807,Prediction of Gene Activity in Early B Cell Development Based on an Integrative Multi-Omics Analysis. ,"An increasingly common method for predicting gene activity is genome-wide chromatin immuno-precipitation of 'active' chromatin modifications followed by massively parallel sequencing (ChIP-seq). In order to understand better the relationship between developmentally regulated chromatin landscapes and regulation of early B cell development, we determined how differentially active promoter regions were able to predict relative RNA and protein levels at the pre-pro-B and pro-B stages. Herein, we describe a novel ChIP-seq quantification method (cRPKM) to identify active promoters and a multi-omics approach that compares promoter chromatin status with ongoing active transcription (GRO-seq), steady state mRNA (RNA-seq), inferred mRNA stability, and relative proteome abundance measurements (iTRAQ). We demonstrate that active chromatin modifications at promoters are good indicators of transcription and steady state mRNA levels. Moreover, we found that promoters with active chromatin modifications exclusively in one of these cell states frequently predicted the differential abundance of proteins. However, we found that many genes whose promoters have non-differential but active chromatin modifications also displayed changes in abundance of their cognate proteins. As expected, this large class of developmentally and differentially regulated proteins that was uncoupled from chromatin status used mostly post-transcriptional mechanisms. Strikingly, the most differentially abundant protein in our B-cell development system, 2410004B18Rik, was regulated by a post-transcriptional mechanism, which further analyses indicated was mediated by a micro-RNA. These data highlight how this integrated multi-omics data set can be a useful resource in uncovering regulatory mechanisms. This data can be accessed at: https://usegalaxy.org/u/thereddylab/p/prediction-of-gene-activity-based-on-an-integrative-multi-omics-analysis.",2014-02-01 +29842948,GB2sequin - A file converter preparing custom GenBank files for database submission.,"The typical wet lab user often annotates smaller sequences in the GenBank format, but resulting files are not accepted for database submission by NCBI. This makes submission of such annotations a cumbersome task. Here we present ""GB2sequin"" an easy-to-use web application that converts custom annotations in the GenBank format into the NCBI direct submission format Sequin. Additionally, the program generates a ""five-column, tab-delimited feature table"" and a FASTA file. Those are required for submission through BankIt or the update of an existing GenBank entry. We specifically developed ""GB2sequin"" for the regular wet lab researcher with strong focus on user-friendliness and flexibility. The application is equipped with an intuitive graphical interface and a comprehensive documentation. It can be employed to prepare any GenBank file for database submission and is freely available online at https://chlorobox.mpimp-golm.mpg.de/GenBank2Sequin.html.",2018-05-26 +29753807,Genome-wide identification and characterization of lncRNAs and miRNAs in cluster bean (Cyamopsis tetragonoloba).,"Long non coding RNAs (lncRNAs) are a class of non-protein coding RNAs that play a crucial role in most of the biological activities like nodule metabolism, flowering time and male sterility. Quite often, the function of lncRNAs is species-specific in nature. Thus an attempt has been made in cluster bean (Cyamopsis tetragonoloba) for the first time to computationally identify lncRNAs based on a proposed index and study their targeted genes. Further, these targeted genes of lncRNAs were identified and characterized for their role in various biological processes like stress mechanisms, DNA damage repair, cell wall synthesis. Besides, lncRNAs and miRNAs bearing Simple Sequence Repeats (SSRs) were identified that contribute towards biogenesis of small non-coding RNAs. Moreover, five novel endogenous Target Mimic lncRNAs (eTMs) were identified that may disrupt the miRNA-mRNA regulations. For easy understanding and usability, a database CbLncRNAdb has been developed and made available at http://cabgrid.res.in/cblncrnadb.",2018-05-26 +31019489,Googling Boundaries for Operating Mobile Stroke Unit for Stroke Codes.,"Background: Mobile stroke units (MSU) have been proposed to expedite delivery of recombinant tissue plasminogen activator (tPA) and expedite endovascular clot retrieval (ECR). Unexplored questions in the use of MSU include: maximal distance from base, time limit with regards to the use CT imaging, CT Angiography, CT Perfusion, and Telemedicine. We developed a computational model as an app (https://gntem3.shinyapps.io/ambmc/), taking into account traveling time to explore this issue. The aim of this study was to define the operating parameters for an MSU in a large metropolitan city, based on the geography of Melbourne. Methods: There are 2 hospitals (Royal Melbourne Hospital/RMH, Monash Medical Center/MMC) designated to provide state-wide ECR services. In these spatial simulations, the MSU is based at RMH and delivers tPA at the patient's pick-up address and then takes the patient to the nearest ECR center. We extracted the geocode of suburbs in Melbourne and travel time to each hospital using ggmap, an interface to Google Map API. The app contains widgets for varying the processing time at the patient location (default = 30 min), performing CT angiography (default = 10 min), performing telemedicine consultation (default = 15 min). The data were compared against those for usual ambulance metrics (default traveling time = 15 min, processing time at patient's location = 20 min, door to tPA = 60 min, door to groin = 90 min). Varying the widgets allow the viewer to explore the trade-off between the variable of interest and time to therapy at a suburb level. Results: The MSU was superior for delivering tPA to all Melbourne suburbs (up to 76 min from RMH). If the CTA times or processing time at location increased by 20 min then it was superior for providing ECR to only 74.9% of suburbs if the return base was RMH. Addition of CT Perfusion or telemedicine consultation affect the ability of a single hospital to provide ECR but not tPA if these additions can be limited to 20 min. Conclusion: The app can help to define how best to deploy the MSU across Melbourne. This app can be modified and used to optimize operating characteristics of MSU in other centers around the world.",2019-04-04 +29738603,A powerful approach to the study of moderate effect modification in observational studies.,"Effect modification means the magnitude or stability of a treatment effect varies as a function of an observed covariate. Generally, larger and more stable treatment effects are insensitive to larger biases from unmeasured covariates, so a causal conclusion may be considerably firmer if this pattern is noted if it occurs. We propose a new strategy, called the submax-method, that combines exploratory, and confirmatory efforts to determine whether there is stronger evidence of causality-that is, greater insensitivity to unmeasured confounding-in some subgroups of individuals. It uses the joint distribution of test statistics that split the data in various ways based on certain observed covariates. For L binary covariates, the method splits the population L times into two subpopulations, perhaps first men and women, perhaps then smokers and nonsmokers, computing a test statistic from each subpopulation, and appends the test statistic for the whole population, making 2 L + 1 test statistics in total. Although L binary covariates define 2 L interaction groups, only 2 L + 1 tests are performed, and at least L + 1 of these tests use at least half of the data. The submax-method achieves the highest design sensitivity and the highest Bahadur efficiency of its component tests. Moreover, the form of the test is sufficiently tractable that its large sample power may be studied analytically. The simulation suggests that the submax method exhibits superior performance, in comparison with an approach using CART, when there is effect modification of moderate size. Using data from the NHANES I epidemiologic follow-up survey, an observational study of the effects of physical activity on survival is used to illustrate the method. The method is implemented in the R package submax which contains the NHANES example. An online Appendix provides simulation results and further analysis of the example.",2018-05-08 +29757329,Supporting Dynamic Quantization for High-Dimensional Data Analytics.,"Similarity searches are at the heart of exploratory data analysis tasks. Distance metrics are typically used to characterize the similarity between data objects represented as feature vectors. However, when the dimensionality of the data increases and the number of features is large, traditional distance metrics fail to distinguish between the closest and furthest data points. Localized distance functions have been proposed as an alternative to traditional distance metrics. These functions only consider dimensions close to query to compute the distance/similarity. Furthermore, in order to enable interactive explorations of high-dimensional data, indexing support for ad-hoc queries is needed. In this work we set up to investigate whether bit-sliced indices can be used for exploratory analytics such as similarity searches and data clustering for high-dimensional big-data. We also propose a novel dynamic quantization called Query dependent Equi-Depth (QED) quantization and show its effectiveness on characterizing high-dimensional similarity. When applying QED we observe improvements in kNN classification accuracy over traditional distance functions.

Acm reference format

Gheorghi Guzun and Guadalupe Canahuate. 2017. Supporting Dynamic Quantization for High-Dimensional Data Analytics. In Proceedings of Ex-ploreDB'17, Chicago, IL, USA, May 14-19, 2017, 6 pages. https://doi.org/http://dx.doi.org/10.1145/3077331.3077336.",2017-05-01 +27799474,mirDNMR: a gene-centered database of background de novo mutation rates in human.,"De novo germline mutations (DNMs) are the rarest genetic variants proven to cause a considerable number of sporadic genetic diseases, such as autism spectrum disorders, epileptic encephalopathy, schizophrenia, congenital heart disease, type 1 diabetes, and hearing loss. However, it is difficult to accurately assess the cause of DNMs and identify disease-causing genes from the considerable number of DNMs in probands. A common method to this problem is to identify genes that harbor significantly more DNMs than expected by chance, with accurate background DNM rate (DNMR) required. Therefore, in this study, we developed a novel database named mirDNMR for the collection of gene-centered background DNMRs obtained from different methods and population variation data. The database has the following functions: (i) browse and search the background DNMRs of each gene predicted by four different methods, including GC content (DNMR-GC), sequence context (DNMR-SC), multiple factors (DNMR-MF) and local DNA methylation level (DNMR-DM); (ii) search variant frequencies in publicly available databases, including ExAC, ESP6500, UK10K, 1000G and dbSNP and (iii) investigate the DNM burden to prioritize candidate genes based on the four background DNMRs using three statistical methods (TADA, Binomial and Poisson test). As a case study, we successfully employed our database in candidate gene prioritization for a sporadic complex disease: intellectual disability. In conclusion, mirDNMR (https://www.wzgenomics.cn/mirdnmr/) can be widely used to identify the genetic basis of sporadic genetic diseases.",2016-10-30 +21846734,A framework for analytical characterization of monoclonal antibodies based on reactivity profiles in different tissues.,"

Motivation

Monoclonal antibodies (mAbs) are among the most powerful and important tools in biology and medicine. MAb development is of great significance to many research and clinical applications. Therefore, objective mAb classification is essential for categorizing and comparing mAb panels based on their reactivity patterns in different cellular species. However, typical flow cytometric mAb profiles present unique modeling challenges with their non-Gaussian features and intersample variations. It makes accurate mAb classification difficult to do with the currently used kernel-based or hierarchical clustering techniques.

Results

To address these challenges, in the present study we developed a formal two-step framework called mAbprofiler for systematic, parametric characterization of mAb profiles. Further, we measured the reactivity of hundreds of new antibodies in diverse tissues using flow cytometry, which we successfully classified using mAbprofiler. First, mAbprofiler fits a mAb's flow cytometric histogram with a finite mixture model of skew t distributions that is robust against non-Gaussian features, and constructs a precise, smooth and mathematically rigorous profile. Then it performs novel curve clustering of the fitted mAb profiles using a skew t mixture of non-linear regression model that can handle intersample variation. Thus, mAbprofiler provides a new framework for identifying robust mAb classes, all well defined by distinct parametric templates, which can be used for classifying new mAb samples. We validated our classification results both computationally and empirically using mAb profiles of known classification.

Availability and implementation

A demonstration code in R is available at the journal website. The R code implementing the full framework is available from the author website - http://amath.nchu.edu.tw/www/teacher/tilin/software

Contact

saumyadipta_pyne@dfci.harvard.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-16 +28763186,Improving risk adjustment in the PRAiS (Partial Risk Adjustment in Surgery) model for mortality after paediatric cardiac surgery and improving public understanding of its use in monitoring outcomes,"

Background

In 2011, we developed a risk model for 30-day mortality after children’s heart surgery. The PRAiS (Partial Risk Adjustment in Surgery) model uses data on the procedure performed, diagnosis, age, weight and comorbidity. Our treatment of comorbidity was simplistic because of data quality. Software that implements PRAiS is used by the National Congenital Heart Disease Audit (NCHDA) in its audit work. The use of PRAiS triggered the temporary suspension of surgery at one unit in 2013. The public anger that surrounded this illustrated the need for public resources around outcomes monitoring.

Objectives

(1) To improve the PRAiS risk model by incorporating more information about comorbidities. (2) To develop online resources for the public to help them to understand published mortality data.

Design

Objective 1 The outcome measure was death within 30 days of the start of each surgical episode of care. The analysts worked with an expert panel of clinical and data management representatives. Model development followed an iterative process of clinical discussion of risk factors, development of regression models and assessment of model performance under cross-validation. Performance was measured using the area under the receiving operator characteristic (AUROC) curve and calibration in the cross-validation test sets. The final model was further assessed in a 2014–15 validation data set. Objective 2 We developed draft website material that we iteratively tested through four sets of two workshops (one workshop for parents of children who had undergone heart surgery and one workshop for other interested users). Each workshop recruited new participants. The academic psychologists ran two sets of three experiments to explore further understanding of the web content.

Data

We used pseudonymised NCHDA data from April 2009 to April 2014. We later unexpectedly received a further year of data (2014–15), which became a prospective validation set.

Results

Objective 1 The cleaned 2009–14 data comprised 21,838 30-day surgical episodes, with 539 deaths. The 2014–15 data contained 4207 episodes, with 97 deaths. The final regression model included four new comorbidity groupings. Under cross-validation, the model had a median AUROC curve of 0.83 (total range 0.82 to 0.83), a median calibration slope of 0.92 (total range 0.64 to 1.25) and a median intercept of –0.23 (range –1.08 to 0.85). In the validation set, the AUROC curve was 0.86 [95% confidence interval (CI) 0.83 to 0.89], and its calibration slope and intercept were 1.01 (95% CI 0.83 to 1.18) and 0.11 (95% CI –0.45 to 0.67), respectively. We recalibrated the final model on 2009–15 data and updated the PRAiS software. Objective 2 We coproduced a website (http://childrensheartsurgery.info/) that provides interactive exploration of the data, two animations and background information. It was launched in June 2016 and was very well received.

Limitations

We needed to use discharge status as a proxy for 30-day life status for the 14% of overseas patients without a NHS number. We did not have sufficient time or resources to extensively test the usability and take-up of the website following its launch.

Conclusions

The project successfully achieved its stated aims. A key theme throughout has been the importance of collaboration and coproduction. In particular for aim 2, we generated a great deal of generalisable learning about how to communicate complex clinical and mathematical information.

Further work

Extending our codevelopment approach to cover many other aspects of quality measurement across congenital heart disease and other specialised NHS services.

Funding

The National Institute for Health Research Health Services and Delivery Research programme.",2017-08-02 +27357693,VarElect: the phenotype-based variation prioritizer of the GeneCards Suite.,"

Background

Next generation sequencing (NGS) provides a key technology for deciphering the genetic underpinnings of human diseases. Typical NGS analyses of a patient depict tens of thousands non-reference coding variants, but only one or very few are expected to be significant for the relevant disorder. In a filtering stage, one employs family segregation, rarity in the population, predicted protein impact and evolutionary conservation as a means for shortening the variation list. However, narrowing down further towards culprit disease genes usually entails laborious seeking of gene-phenotype relationships, consulting numerous separate databases. Thus, a major challenge is to transition from the few hundred shortlisted genes to the most viable disease-causing candidates.

Results

We describe a novel tool, VarElect ( http://ve.genecards.org ), a comprehensive phenotype-dependent variant/gene prioritizer, based on the widely-used GeneCards, which helps rapidly identify causal mutations with extensive evidence. The GeneCards suite offers an effective and speedy alternative, whereby >120 gene-centric automatically-mined data sources are jointly available for the task. VarElect cashes on this wealth of information, as well as on GeneCards' powerful free-text Boolean search and scoring capabilities, proficiently matching variant-containing genes to submitted disease/symptom keywords. The tool also leverages the rich disease and pathway information of MalaCards, the human disease database, and PathCards, the unified pathway (SuperPaths) database, both within the GeneCards Suite. The VarElect algorithm infers direct as well as indirect links between genes and phenotypes, the latter benefitting from GeneCards' diverse gene-to-gene data links in GenesLikeMe. Finally, our tool offers an extensive gene-phenotype evidence portrayal (""MiniCards"") and hyperlinks to the parent databases.

Conclusions

We demonstrate that VarElect compares favorably with several often-used NGS phenotyping tools, thus providing a robust facility for ranking genes, pointing out their likelihood to be related to a patient's disease. VarElect's capacity to automatically process numerous NGS cases, either in stand-alone format or in VCF-analyzer mode (TGex and VarAnnot), is indispensable for emerging clinical projects that involve thousands of whole exome/genome NGS analyses.",2016-06-23 +27688016,Probiotics for the Prevention of Pediatric Antibiotic-Associated Diarrhea.,"Goldenberg JZ, Lytvyn L, Steurich J, Parkin P, Mahant S, Johnston BC. Probiotics for the prevention of pediatric antibiotic-associated diarrhea.Cochrane Database Syst Rev2015, Issue 12. Art. No.: CD004827. http://dx.doi.org/10.1002/14651858.CD004827.pub4.

Background

Antibiotics are frequently prescribed in children. They alter the microbial balance within the gastrointestinal tract, commonly resulting in antibiotic-associated diarrhea (AAD). Probiotics may prevent AAD via restoration of the gut microflora.

Objectives

The primary objectives were to assess the efficacy and safety of probiotics (any specified strain or dose) used for the prevention of AAD in children.

Search methods

MEDLINE, EMBASE, CENTRAL, CINAHL, AMED, and the Web of Science (inception to November 2014) were searched along with specialized registers including the Cochrane IBD/FBD review group, CISCOM (Centralized Information Service for Complementary Medicine), NHS Evidence, the International Bibliographic Information on Dietary Supplements, as well as trial registries. Letters were sent to authors of included trials, nutraceutical and pharmaceutical companies, and experts in the field requesting additional information on ongoing or unpublished trials. Conference proceedings, dissertation abstracts, and reference lists from included and relevant articles were also searched.

Selection criteria

Randomized, parallel, controlled trials in children (0-18 years) receiving antibiotics, that compare probiotics to placebo, active alternative prophylaxis, or no treatment and measure the incidence of diarrhea secondary to antibiotic use were considered for inclusion.

Data collection and analysis

Study selection, data extraction, and methodological quality assessment using the risk of bias instrument were conducted independently and in duplicate by two authors. Dichotomous data (incidence of diarrhea and adverse events) were combined using a pooled risk ratio (RR) or risk difference (RD), and continuous data (mean duration of diarrhea and mean daily stool frequency) as mean difference (MD), along with their corresponding 95% confidence interval (95% CI). For overall pooled results on the incidence of diarrhea, sensitivity analyses included available case versus extreme-plausible analyses and random- versus fixed-effect models. To explore possible explanations for heterogeneity, a priori subgroup analysis was conducted on probiotic strain, dose, definition of antibiotic-associated diarrhea, and risk of bias. We also conducted post hoc subgroup analyses by patient diagnosis, single versus multi-strain, industry sponsorship, and inpatient status. The overall quality of the evidence supporting the outcomes was evaluated using the GRADE criteria.

Main results

Overall, 23 studies (3938 participants) met the inclusion criteria. Trials included treatment with either Bacillus spp., Bifidobacterium spp., Clostridium butyricum, Lactobacilli spp., Lactococcus spp., Leuconostoc cremoris, Saccharomyces spp., or Streptococcus spp., alone or in combination. Eleven studies used a single-strain probiotic, four combined two probiotic strains, three combined three probiotic strains, one combined four probiotic strains, two combined seven probiotic strains, one included ten probiotic strains, and one study included two probiotic arms that used three and two strains, respectively. The risk of bias was determined to be high or unclear in 13 studies and low in 10 studies. Available case (patients who did not complete the studies were not included in the analysis) results from 22/23 trials reporting on the incidence of diarrhea show a precise benefit from probiotics compared to active, placebo, or no treatment control. The incidence of AAD in the probiotic group was 8% (163/1992) compared to 19% (364/1906) in the control group (RR = 0.46; 95% CI: 0.35-0.61; I2 = 55%, 3898 participants). A GRADE analysis indicated that the overall quality of the evidence for this outcome was moderate. This benefit remained statistically significant in an extreme-plausible (60% of children lost to follow-up in probiotic group and 20% lost to follow-up in the control group had diarrhea) sensitivity analysis, where the incidence of AAD in the probiotic group was 14% (330/2294) compared to 19% (426/2235) in the control group (RR = 0.69; 95% CI: 0.54-0.89; I2 = 63%, 4529 participants). None of the 16 trials (n = 2455) that reported on adverse events documented any serious adverse events attributable to probiotics. Meta-analysis excluded all but an extremely small non-significant difference in adverse events between treatment and control (RD = 0.00, 95% CI: -0.01 to 0.01). The majority of adverse events were in placebo, standard care, or no treatment group. Adverse events reported in the studies include rash, nausea, gas, flatulence, abdominal bloating, abdominal pain, vomiting, increased phlegm, chest pain, constipation, taste disturbance, and low appetite. AUTHORS׳ CONCLUSIONS: Moderate quality evidence suggests a protective effect of probiotics in preventing AAD. Our pooled estimate suggests a precise (RR 0.46; 95% CI: 0.35-0.61) probiotic effect with an NNT of 10. Among the various probiotics evaluated, Lactobacillus rhamnosus or Saccharomyces boulardii at 5-40 billion colony-forming units/day may be appropriate given the modest NNT and the likelihood that adverse events are very rare. It is premature to draw conclusions about the efficacy and safety of other probiotic agents for pediatric AAD. Although no serious adverse events were observed among otherwise healthy children, serious adverse events have been observed in severely debilitated or immunocompromised children with underlying risk factors including central venous catheter use and disorders associated with bacterial/fungal translocation. Until further research has been conducted, probiotic use should be avoided in pediatric populations at risk for adverse events. Future trials would benefit from a standard and valid outcomes to measure AAD.",2016-08-26 +21791039,"Comparative high-throughput transcriptome sequencing and development of SiESTa, the Silene EST annotation database.","

Background

The genus Silene is widely used as a model system for addressing ecological and evolutionary questions in plants, but advances in using the genus as a model system are impeded by the lack of available resources for studying its genome. Massively parallel sequencing cDNA has recently developed into an efficient method for characterizing the transcriptomes of non-model organisms, generating massive amounts of data that enable the study of multiple species in a comparative framework. The sequences generated provide an excellent resource for identifying expressed genes, characterizing functional variation and developing molecular markers, thereby laying the foundations for future studies on gene sequence and gene expression divergence. Here, we report the results of a comparative transcriptome sequencing study of eight individuals representing four Silene and one Dianthus species as outgroup. All sequences and annotations have been deposited in a newly developed and publicly available database called SiESTa, the Silene EST annotation database.

Results

A total of 1,041,122 EST reads were generated in two runs on a Roche GS-FLX 454 pyrosequencing platform. EST reads were analyzed separately for all eight individuals sequenced and were assembled into contigs using TGICL. These were annotated with results from BLASTX searches and Gene Ontology (GO) terms, and thousands of single-nucleotide polymorphisms (SNPs) were characterized. Unassembled reads were kept as singletons and together with the contigs contributed to the unigenes characterized in each individual. The high quality of unigenes is evidenced by the proportion (49%) that have significant hits in similarity searches with the A. thaliana proteome. The SiESTa database is accessible at http://www.siesta.ethz.ch.

Conclusion

The sequence collections established in the present study provide an important genomic resource for four Silene and one Dianthus species and will help to further develop Silene as a plant model system. The genes characterized will be useful for future research not only in the species included in the present study, but also in related species for which no genomic resources are yet available. Our results demonstrate the efficiency of massively parallel transcriptome sequencing in a comparative framework as an approach for developing genomic resources in diverse groups of non-model organisms.",2011-07-26 +29524510,WebPropagate: A Web Server for Network Propagation.,"Network propagation is a powerful tool for genetic analysis which is widely used to identify genes and genetic modules that underlie a process of interest. Here we provide a graphical, web-based platform (http://anat.cs.tau.ac.il/WebPropagate/) in which researchers can easily apply variants of this method to data sets of interest using up-to-date networks of protein-protein interactions in several organisms.",2018-03-07 +25887129,"dbVOR: a database system for importing pedigree, phenotype and genotype data and exporting selected subsets.","

Background

When studying the genetics of a human trait, we typically have to manage both genome-wide and targeted genotype data. There can be overlap of both people and markers from different genotyping experiments; the overlap can introduce several kinds of problems. Most times the overlapping genotypes are the same, but sometimes they are different. Occasionally, the lab will return genotypes using a different allele labeling scheme (for example 1/2 vs A/C). Sometimes, the genotype for a person/marker index is unreliable or missing. Further, over time some markers are merged and bad samples are re-run under a different sample name. We need a consistent picture of the subset of data we have chosen to work with even though there might possibly be conflicting measurements from multiple data sources.

Results

We have developed the dbVOR database, which is designed to hold data efficiently for both genome-wide and targeted experiments. The data are indexed for fast retrieval by person and marker. In addition, we store pedigree and phenotype data for our subjects. The dbVOR database allows us to select subsets of the data by several different criteria and to merge their results into a coherent and consistent whole. Data may be filtered by: family, person, trait value, markers, chromosomes, and chromosome ranges. The results can be presented in columnar, Mega2, or PLINK format.

Conclusions

dbVOR serves our needs well. It is freely available from https://watson.hgen.pitt.edu/register . Documentation for dbVOR can be found at https://watson.hgen.pitt.edu/register/docs/dbvor.html .",2015-03-18 +28334237,"DNA Compass: a secure, client-side site for navigating personal genetic information.","

Motivation

Millions of individuals have access to raw genomic data using direct-to-consumer companies. The advent of large-scale sequencing projects, such as the Precision Medicine Initiative, will further increase the number of individuals with access to their own genomic information. However, querying genomic data requires a computer terminal and computational skill to analyze the data-an impediment for the general public.

Results

DNA Compass is a website designed to empower the public by enabling simple navigation of personal genomic data. Users can query the status of their genomic variants for over 1658 markers or tens of millions of documented single nucleotide polymorphisms (SNPs). DNA Compass presents the relevant genotypes of the user side-by-side with explanatory scientific resources. The genotype data never leaves the user's computer, a feature that provides improved security and performance. More than 12 000 unique users, mainly from the general genetic genealogy community, have already used DNA Compass, demonstrating its utility.

Availability and implementation

DNA Compass is freely available on https://compass.dna.land .

Contact

yaniv@cs.columbia.edu.",2017-07-01 +26144527,Proteogenomics Dashboard for the Human Proteome Project.,"dasHPPboard is a novel proteomics-based dashboard that collects and reports the experiments produced by the Spanish Human Proteome Project consortium (SpHPP) and aims to help HPP to map the entire human proteome. We have followed the strategy of analog genomics projects like the Encyclopedia of DNA Elements (ENCODE), which provides a vast amount of data on human cell lines experiments. The dashboard includes results of shotgun and selected reaction monitoring proteomics experiments, post-translational modifications information, as well as proteogenomics studies. We have also processed the transcriptomics data from the ENCODE and Human Body Map (HBM) projects for the identification of specific gene expression patterns in different cell lines and tissues, taking special interest in those genes having little proteomic evidence available (missing proteins). Peptide databases have been built using single nucleotide variants and novel junctions derived from RNA-Seq data that can be used in search engines for sample-specific protein identifications on the same cell lines or tissues. The dasHPPboard has been designed as a tool that can be used to share and visualize a combination of proteomic and transcriptomic data, providing at the same time easy access to resources for proteogenomics analyses. The dasHPPboard can be freely accessed at: http://sphppdashboard.cnb.csic.es.",2015-07-16 +26655876,Length of hospital stay after hip fracture and risk of early mortality after discharge in New York state: retrospective cohort study.,"

Study question

Can the length of hospital stay for hip fracture affect a patient's risk of death 30 days after discharge?

Methods

In a retrospective cohort study, population based registry data from the New York Statewide Planning and Research Cooperative System (SPARCS) were used to investigate 188,208 patients admitted to hospital for hip fracture in New York state from 2000 to 2011. Patients were aged 50 years and older, and received surgical or non-surgical treatment. The main outcome measure was the mortality rate at 30 days after hospital discharge.

Study answer and limitations

Hospital stays of 11-14 days for hip fracture were associated with a 32% increased odds of death 30 days after discharge, compared with stays lasting one to five days (odds ratio 1.32 (95% confidence interval 1.19 to 1.47)). These odds increased to 103% for stays longer than 14 days (2.03 (1.84 to 2.24)). Other risk factors associated with early mortality included discharge to a hospice facility, older age, metastatic disease, and non-surgical management. The 30 day mortality rate after discharge was 4.5% for surgically treated patients and 10.7% for non-surgically treated patients. These findings might not be generalizable to populations in other US states or in other countries. The administrative claims data used could have been incomplete or include inaccurate coding of diagnoses and comorbid conditions. The database also did not include patient socioeconomic status, which could affect access to care to a greater extent in New York state than in European countries. Specific cause of death was not available because few autopsies are performed in this population.

What this study adds

By contrast with recent findings in Sweden, decreased length of hospital stay for hip fracture was associated with reduced rates of early mortality in a US cohort in New York state. This could reflect critical system differences in the treatment of hip fractures between Europe and the USA.Funding, competing interests, data sharing University of Rochester grant from the Clinical Translational Science Institute for statistical analyses used in this work (National Institutes of Health (UL1 TR000042)) and the National Institutes of Health (K-08 AR060164-01A). No competing interests declared. Data may be obtained through SPARCS at https://www.health.ny.gov/statistics/sparcs/access/.",2015-12-10 +29786766,Bacterial DNA detected on pathologically changed heart valves using 16S rRNA gene amplification.,"Nowadays, dental diseases are one of the most common illnesses in the world. Some of them can lead to translocation of oral bacteria to the bloodstream causing intermittent bacteraemia. Therefore, a potential association between oral infection and cardiovascular diseases has been discussed in recent years as a result of adhesion of oral microbes to the heart valves. The aim of this study was to detect oral bacteria on pathologically changed heart valves not caused by infective endocarditis. In the study, patients with pathologically changed heart valves were involved. Samples of heart valves removed during heart valve replacement surgery were cut into two parts. One aliquot was cultivated aerobically and anaerobically. Bacterial DNA was extracted using Ultra-Deep Microbiome Prep (Molzym GmbH, Bremen, Germany) followed by a 16S rRNA gene PCR amplification using Mastermix 16S Complete kit (Molzym GmbH, Bremen, Germany). Positive PCR products were sequenced and the sequences were analyzed using BLAST database ( http://www.ncbi.nlm.nih/BLAST ). During the study period, 41 samples were processed. Bacterial DNA of the following bacteria was detected in 21 samples: Cutibacterium acnes (formerly Propionibacterium acnes) (n = 11; 52.38% of patients with positive bacterial DNA detection), Staphylococcus sp. (n = 9; 42.86%), Streptococcus sp. (n = 1; 4.76%), Streptococcus sanguinis (n = 4; 19.05%), Streptococcus oralis (n = 1; 4.76%), Carnobacterium sp. (n = 1; 4.76%), Bacillus sp. (n = 2; 9.52%), and Bergeyella sp. (n = 1; 4.76%). In nine samples, multiple bacteria were found. Our results showed significant appearance of bacteria on pathologically changed heart valves in patients with no symptoms of infective endocarditis.",2018-05-22 +26657893,"HRGRN: A Graph Search-Empowered Integrative Database of Arabidopsis Signaling Transduction, Metabolism and Gene Regulation Networks.","The biological networks controlling plant signal transduction, metabolism and gene regulation are composed of not only tens of thousands of genes, compounds, proteins and RNAs but also the complicated interactions and co-ordination among them. These networks play critical roles in many fundamental mechanisms, such as plant growth, development and environmental response. Although much is known about these complex interactions, the knowledge and data are currently scattered throughout the published literature, publicly available high-throughput data sets and third-party databases. Many 'unknown' yet important interactions among genes need to be mined and established through extensive computational analysis. However, exploring these complex biological interactions at the network level from existing heterogeneous resources remains challenging and time-consuming for biologists. Here, we introduce HRGRN, a graph search-empowered integrative database of Arabidopsis signal transduction, metabolism and gene regulatory networks. HRGRN utilizes Neo4j, which is a highly scalable graph database management system, to host large-scale biological interactions among genes, proteins, compounds and small RNAs that were either validated experimentally or predicted computationally. The associated biological pathway information was also specially marked for the interactions that are involved in the pathway to facilitate the investigation of cross-talk between pathways. Furthermore, HRGRN integrates a series of graph path search algorithms to discover novel relationships among genes, compounds, RNAs and even pathways from heterogeneous biological interaction data that could be missed by traditional SQL database search methods. Users can also build subnetworks based on known interactions. The outcomes are visualized with rich text, figures and interactive network graphs on web pages. The HRGRN database is freely available at http://plantgrn.noble.org/hrgrn/.",2015-12-12 +29710096,"Factors Contributing to Preschoolers' Communicative Participation Outcomes: Findings From a Population-Based Longitudinal Cohort Study in Ontario, Canada.","

Purpose

The aim of this study was to identify predictors of communicative participation outcomes for a large cohort of preschoolers with speech and language impairments.

Method

A secondary analysis of longitudinal program evaluation data from Ontario, Canada's Preschool Speech and Language Program was done. Data available for 46,872 children 18-67 months of age (M = 41.76 months, SD = 11.92; 68% boys, 32% girls) were previously used to predict children's communicative participation skill development in 5 levels of function. Demographic and intervention-based variables were added to the models to identify new predictors of growth.

Results

Three demographic and 3 intervention-based variables were statistically significant predictors of children's communicative participation outcomes. Clinically significant predictors included participation in an early learning environment, receipt of speech-language interventions, and the amount of time spent in intervention. These variables impacted predicted outcomes differently, depending on a child's level of communicative function.

Conclusions

This population-based study of preschoolers with speech and language impairments identified predictors of growth in communicative participation skills-an outcome important and meaningful to families but not often explored. A broad picture emerged of factors that may influence the development of communicative participation skills and may be used to predict outcomes for preschoolers. Given the large sample size, these robust findings may be used to predict outcomes outside the Preschool Speech and Language Program as well.

Supplemental material

https://doi.org/10.23641/asha.6024422.",2018-05-01 +29420462,"Advisory Committee on Immunization Practices Recommended Immunization Schedule for Adults Aged 19 Years or Older - United States, 2018.","In October 2017, the Advisory Committee on Immunization Practices (ACIP) voted to approve the Recommended Immunization Schedule for Adults Aged 19 Years or Older, United States, 2018. The 2018 adult immunization schedule summarizes ACIP recommendations in two figures and a table of contraindications and precautions for vaccines recommended for adults, and is intended is to assist health care providers in implementing the current ACIP recommendations for vaccinating adults. The schedule can be found at https://www.cdc.gov/vaccines/schedules.* The full ACIP recommendations for each vaccine are available at https://www.cdc.gov/vaccines/hcp/acip-recs/index.html. The 2018 adult immunization schedule has also been approved by the American College of Physicians (https://www.acponline.org), the American Academy of Family Physicians (https://www.aafp.org), the American College of Obstetricians and Gynecologists (https://www.acog.org), and the American College of Nurse-Midwives (http://www.midwife.org). The ACIP-recommended use of each vaccine is developed after an in-depth review of vaccine-related data, including data on disease epidemiology, vaccine efficacy and effectiveness, vaccine safety, feasibility of program implementation, and economic aspects of immunization policy (1).",2018-02-09 +28521008,GeMSTONE: orchestrated prioritization of human germline mutations in the cloud.,"Integrative analysis of whole-genome/exome-sequencing data has been challenging, especially for the non-programming research community, as it requires simultaneously managing a large number of computational tools. Even computational biologists find it unexpectedly difficult to reproduce results from others or optimize their strategies in an end-to-end workflow. We introduce Germline Mutation Scoring Tool fOr Next-generation sEquencing data (GeMSTONE), a cloud-based variant prioritization tool with high-level customization and a comprehensive collection of bioinformatics tools and data libraries (http://gemstone.yulab.org/). GeMSTONE generates and readily accepts a shareable 'recipe' file for each run to either replicate previous results or analyze new data with identical parameters and provides a centralized workflow for prioritizing germline mutations in human disease within a streamlined workflow rather than a pool of program executions.",2017-07-01 +26981408,De novo transcriptome assembly of two contrasting pumpkin cultivars.,"Cucurbita pepo (squash, pumpkin, gourd), a worldwide-cultivated vegetable of American origin, is extremely variable in fruit characteristics. However, the information associated with genes and genetic markers for pumpkin is very limited. In order to identify new genes and to develop genetic markers, we performed a transcriptome analysis (RNA-Seq) of two contrasting pumpkin cultivars. Leaves and female flowers of cultivars, 'Big Moose' with large round fruits and 'Munchkin' with small round fruits, were harvested for total RNA extraction. We obtained a total of 6 GB (Big Moose; http://www.ncbi.nlm.nih.gov/Traces/sra/?run=SRR3056882) and 5 GB (Munchkin; http://www.ncbi.nlm.nih.gov/Traces/sra/?run=SRR3056883) sequence data (NCBI SRA database SRX1502732 and SRX1502735, respectively), which correspond to 18,055,786 and 14,824,292 150-base reads. After quality assessment, the clean sequences where 17,995,932 and 14,774,486 respectively. The numbers of total transcripts for 'Big Moose' and 'Munchkin' were 84,727 and 68,051, respectively. TransDecoder identified possible coding regions in assembled transcripts. This study provides transcriptome data for two contrasting pumpkin cultivars, which might be useful for genetic marker development and comparative transcriptome analyses.",2016-01-15 +29543360,MicroRNA expression data analysis to identify key miRNAs associated with Alzheimer's disease.,"BACKGROUND:MicroRNAs (miRNAs) have become increasingly prevalent as a result of the association of their deregulation with neurodegenerative disorders, especially Alzheimer's disease (AD). However, the association between miRNAs and AD remains unclear. METHODS:In the present study, Nine representative miRNA datasets were selected for the identification of the critical miRNAs by analyzing the overlapping relationships among them. TargetScan software (http://www.targetscan.org) was used to predict the target genes of these miRNAs. In addition, the Database for Annotation Visualization and Integrated Discovery (DAVID; http://david.abcc.ncifcrf.gov) and TfactS (http://www.tfacts.org) datasets were used for combined analysis of functional enrichment and transcription factor (TF) analysis. RESULTS:Thirteen key miRNAs were identified, of which four were significantly up-regulated (hsa-miR-101,hsa-miR-155, has-miR-34a, has-miR-9) and eight were found to be significantly down-regulated (hsa-let-7d-5p, hsa-let-7 g-5p, hsa-miR-15b, has-miR-191-5p, hsa-miR-125b, has-miR-26b-5p, hsa-miR-29b, hsa-miR-342-3p). The functional enrichment analysis indicated that up-regulated signature miRNA targets were associated with transcription from the RNA polymerase II promoter process and the chemical synaptic transmission process. Down-regulated signature miRNA targets were mostly enriched with respect to positive regulation of transcription from the RNA polymerase II promoter process, p53 signaling, and microRNAs in cancer pathways. TF analysis showed that 87 TFs were influenced by the up-regulated miRNAs, and 134 TFs were influenced by the down-regulated miRNAs. In total, 70 (45.5%) TFs were affected by both up-regulated and down-regulated miRNAs. CONCLUSIONS:In summary, 13 key miRNAs were found to have a vital function in the pathological progress of AD, as well as the target genes and TFs of these miRNAs. The potential functions of these miRNAs as diagnostic and therapeutic targets of the AD are revealed by the present study.",2018-05-21 +27472236,Practical Considerations in Breast Papillary Lesions: A Review of the Literature.,"

Context

-Diagnosis of papillary breast lesions, especially in core biopsies, is challenging for most pathologists, and these lesions pose problems for patient management. Distinction between benign, premalignant, and malignant components of papillary lesions is challenging, and the diagnosis of invasion is problematic in lesions that have circumscribed margins. Obtaining a balance between overtreatment and undertreatment of these lesions is also challenging.

Objectives

-To provide a classification and a description of the histologic and immunohistochemical features and the differential diagnosis of papillary breast lesions, to provide an update on the molecular pathology of papillary breast lesions, and to discuss the recommendations for further investigation and management of papillary breast lesions. This review provides a concise description of the histologic and immunohistochemical features of the different papillary lesions of the breast.

Data sources

-The standard pathology text books on breast pathology and literature on papillary breast lesions were reviewed with the assistance of the PubMed database ( http://www.ncbi.nlm.nih.gov/pubmed ).

Conclusions

-Knowledge of the clinical presentation, histology, immunoprofile, and behavior of papillary breast lesions will assist pathologists with the diagnosis and optimal management of patients with papillary breast lesions.",2016-08-01 +27153594,HLaffy: estimating peptide affinities for Class-1 HLA molecules by learning position-specific pair potentials.,"

Motivation

T-cell epitopes serve as molecular keys to initiate adaptive immune responses. Identification of T-cell epitopes is also a key step in rational vaccine design. Most available methods are driven by informatics and are critically dependent on experimentally obtained training data. Analysis of a training set from Immune Epitope Database (IEDB) for several alleles indicates that the sampling of the peptide space is extremely sparse covering a tiny fraction of the possible nonamer space, and also heavily skewed, thus restricting the range of epitope prediction.

Results

We present a new epitope prediction method that has four distinct computational modules: (i) structural modelling, estimating statistical pair-potentials and constraint derivation, (ii) implicit modelling and interaction profiling, (iii) feature representation and binding affinity prediction and (iv) use of graphical models to extract peptide sequence signatures to predict epitopes for HLA class I alleles.

Conclusions

HLaffy is a novel and efficient epitope prediction method that predicts epitopes for any Class-1 HLA allele, by estimating the binding strengths of peptide-HLA complexes which is achieved through learning pair-potentials important for peptide binding. It relies on the strength of the mechanistic understanding of peptide-HLA recognition and provides an estimate of the total ligand space for each allele. The performance of HLaffy is seen to be superior to the currently available methods.

Availability and implementation

The method is made accessible through a webserver http://proline.biochem.iisc.ernet.in/HLaffy

Contact

: nchandra@biochem.iisc.ernet.in

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-29 +29703197,Phylogenomics resolves the evolutionary chronicle of our squirting closest relatives.,"A recent paper in BMC Biology has resolved the family relationships of sea squirts, one of our closest invertebrate relatives, by using a large phylogenomic data set derived from available genomes and newly generated transcriptomes. The work confirms previous ideas that ascidians (the sea squirts) are not monophyletic, as they include some pelagic jelly-like relatives, and proposes a chronogram for a group that has been difficult to resolve due to their accelerated genome evolution.See research article: https://bmcbiol.biomedcentral.com/articles/10.1186/s12915-018-0499-2.",2018-04-27 +29522418,The Systems Biology Markup Language (SBML): Language Specification for Level 3 Version 2 Core. ,"Computational models can help researchers to interpret data, understand biological functions, and make quantitative predictions. The Systems Biology Markup Language (SBML) is a file format for representing computational models in a declarative form that different software systems can exchange. SBML is oriented towards describing biological processes of the sort common in research on a number of topics, including metabolic pathways, cell signaling pathways, and many others. By supporting SBML as an input/output format, different tools can all operate on an identical representation of a model, removing opportunities for translation errors and assuring a common starting point for analyses and simulations. This document provides the specification for Version 2 of SBML Level 3 Core. The specification defines the data structures prescribed by SBML, their encoding in XML (the eXtensible Markup Language), validation rules that determine the validity of an SBML document, and examples of models in SBML form. The design of Version 2 differs from Version 1 principally in allowing new MathML constructs, making more child elements optional, and adding identifiers to all SBML elements instead of only selected elements. Other materials and software are available from the SBML project website at http://sbml.org/.",2018-03-09 +30300406,BlasterJS: A novel interactive JavaScript visualisation component for BLAST alignment results.,"

Background

The wide range of potential applications has made the Basic Local Alignment Search Tool (BLAST) a ubiquitous tool in the field of Molecular Biology. Within this context, it is increasingly appealing to embed BLAST services within larger Web applications.

Results

This work introduces BlasterJS viewer, a new JavaScript library for the lightweight development of Web-based applications supporting the visualisation of BLAST outputs. BlasterJS detaches from similar data viewers by focusing on the visual and interactive display of sequence similarity results and being completely independent of BLAST services. BlasterJS is compatible with the text outputs generated by the BLAST family of programs, namely BLASTp, BLASTn, BLASTx, tBLASTn, and tBLASTx, and works in all major Web browsers. Furthermore, BlasterJS is available through the EBI's BioJS registry 5, which extends its potential use to a wider scope of bioinformatics applications.

Conclusions

BlasterJS is new Javascript library that enables easy and seamless integration of visual and interactive representations of BLAST outputs in Web-based applications supporting sequence similarity search. BlasterJS is free accessible at http://sing-group.org/blasterjs/.",2018-10-09 +29028987,In silico structural modeling of multiple epigenetic marks on DNA.,

Availability and implementation

The code together with examples and tutorials are available from http://www.cs.ox.ac.uk/mosaics.

Contact

peter.minary@cs.ox.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.,2018-01-01 +25342870,The Beginning Spanish Lexicon: A Web-based interface to calculate phonological similarity among Spanish words in adults learning Spanish as a foreign language.,"A number of resources provide psycholinguistic researchers with information about the words that the typical child or adult knows in a variety of languages. What is currently not available is a resource that provides information about the words that a typical adult learning a foreign language knows. We created such a resource for Spanish: The Beginning Spanish Lexicon. The present report describes the words contained in this web-accessible resource, and the information about those words provided by the interface. This information is freely accessible at: http://www.people.ku.edu/~mvitevit/BegSpanLex.html.",2012-01-01 +24150940,DECIPHER: database for the interpretation of phenotype-linked plausibly pathogenic sequence and copy-number variation.,"The DECIPHER database (https://decipher.sanger.ac.uk/) is an accessible online repository of genetic variation with associated phenotypes that facilitates the identification and interpretation of pathogenic genetic variation in patients with rare disorders. Contributing to DECIPHER is an international consortium of >200 academic clinical centres of genetic medicine and ≥1600 clinical geneticists and diagnostic laboratory scientists. Information integrated from a variety of bioinformatics resources, coupled with visualization tools, provides a comprehensive set of tools to identify other patients with similar genotype-phenotype characteristics and highlights potentially pathogenic genes. In a significant development, we have extended DECIPHER from a database of just copy-number variants to allow upload, annotation and analysis of sequence variants such as single nucleotide variants (SNVs) and InDels. Other notable developments in DECIPHER include a purpose-built, customizable and interactive genome browser to aid combined visualization and interpretation of sequence and copy-number variation against informative datasets of pathogenic and population variation. We have also introduced several new features to our deposition and analysis interface. This article provides an update to the DECIPHER database, an earlier instance of which has been described elsewhere [Swaminathan et al. (2012) DECIPHER: web-based, community resource for clinical interpretation of rare variants in developmental disorders. Hum. Mol. Genet., 21, R37-R44].",2013-10-22 +29029598,A method for named entity normalization in biomedical articles: application to diseases and plants.,"

Background

In biomedical articles, a named entity recognition (NER) technique that identifies entity names from texts is an important element for extracting biological knowledge from articles. After NER is applied to articles, the next step is to normalize the identified names into standard concepts (i.e., disease names are mapped to the National Library of Medicine's Medical Subject Headings disease terms). In biomedical articles, many entity normalization methods rely on domain-specific dictionaries for resolving synonyms and abbreviations. However, the dictionaries are not comprehensive except for some entities such as genes. In recent years, biomedical articles have accumulated rapidly, and neural network-based algorithms that incorporate a large amount of unlabeled data have shown considerable success in several natural language processing problems.

Results

In this study, we propose an approach for normalizing biological entities, such as disease names and plant names, by using word embeddings to represent semantic spaces. For diseases, training data from the National Center for Biotechnology Information (NCBI) disease corpus and unlabeled data from PubMed abstracts were used to construct word representations. For plants, a training corpus that we manually constructed and unlabeled PubMed abstracts were used to represent word vectors. We showed that the proposed approach performed better than the use of only the training corpus or only the unlabeled data and showed that the normalization accuracy was improved by using our model even when the dictionaries were not comprehensive. We obtained F-scores of 0.808 and 0.690 for normalizing the NCBI disease corpus and manually constructed plant corpus, respectively. We further evaluated our approach using a data set in the disease normalization task of the BioCreative V challenge. When only the disease corpus was used as a dictionary, our approach significantly outperformed the best system of the task.

Conclusions

The proposed approach shows robust performance for normalizing biological entities. The manually constructed plant corpus and the proposed model are available at http://gcancer.org/plant and http://gcancer.org/normalization , respectively.",2017-10-13 +21799808,Exploiting publicly available biological and biochemical information for the discovery of novel short linear motifs.,"The function of proteins is often mediated by short linear segments of their amino acid sequence, called Short Linear Motifs or SLiMs, the identification of which can provide important information about a protein function. However, the short length of the motifs and their variable degree of conservation makes their identification hard since it is difficult to correctly estimate the statistical significance of their occurrence. Consequently, only a small fraction of them have been discovered so far. We describe here an approach for the discovery of SLiMs based on their occurrence in evolutionarily unrelated proteins belonging to the same biological, signalling or metabolic pathway and give specific examples of its effectiveness in both rediscovering known motifs and in discovering novel ones. An automatic implementation of the procedure, available for download, allows significant motifs to be identified, automatically annotated with functional, evolutionary and structural information and organized in a database that can be inspected and queried. An instance of the database populated with pre-computed data on seven organisms is accessible through a publicly available server and we believe it constitutes by itself a useful resource for the life sciences (http://www.biocomputing.it/modipath).",2011-07-20 +29206457,ADVERPred-Web Service for Prediction of Adverse Effects of Drugs.,"Application of structure-activity relationships (SARs) for the prediction of adverse effects of drugs (ADEs) has been reported in many published studies. Training sets for the creation of SAR models are usually based on drug label information which allows for the generation of data sets for many hundreds of drugs. Since many ADEs may not be related to drug consumption, one of the main problems in such studies is the quality of data on drug-ADE pairs obtained from labels. The information on ADEs may be included in three sections of the drug labels: ""Boxed warning,"" ""Warnings and Precautions,"" and ""Adverse reactions."" The first two sections, especially Boxed warning, usually contain the most frequent and severe ADEs that have either known or probable relationships to drug consumption. Using this information, we have created manually curated data sets for the five most frequent and severe ADEs: myocardial infarction, arrhythmia, cardiac failure, severe hepatotoxicity, and nephrotoxicity, with more than 850 drugs on average for each effect. The corresponding SARs were built with PASS (Prediction of Activity Spectra for Substances) software and had balanced accuracy values of 0.74, 0.7, 0.77, 0.67, and 0.75, respectively. They were implemented in a freely available ADVERPred web service ( http://www.way2drug.com/adverpred/ ), which enables a user to predict five ADEs based on the structural formula of compound. This web service can be applied for estimation of the corresponding ADEs for hits and lead compounds at the early stages of drug discovery.",2017-12-22 +30301858,Host-Pathogen Interactions: What the EHEC Are We Learning from Host Genome-Wide Screens? ,"Several genome-wide screens have been conducted to identify host cell factors involved in the pathogenesis of bacterial pathogens whose virulence is dependent on type III secretion systems (T3SSs), nanomachines responsible for the translocation of proteins into host cells. In the most recent of these, Pacheco et al. (mBio 9:e01003-18, 2018, http://mbio.asm.org/content/9/3/e01003-18.full) screened a genome-wide CRISPR/Cas9 (clustered regularly interspaced short palindromic repeats with Cas9) knockout library for host proteins involved in the pathogenesis of enterohemorrhagic Escherichia coli (EHEC). Their study revealed an unrecognized link between EHEC's two major virulence determinants (its T3SS and Shiga toxins). We discuss these findings in light of data from three other genome-wide screens. Each of these studies uncovered multiple host cell determinants, which curiously share little to no overlap but primarily are involved in mediating early interactions between T3SSs and host cells. We therefore consider how each screen was performed, the advantages and disadvantages of each, and how follow-up studies might be designed to address these issues.",2018-10-09 +29554223,CMV: visualization for RNA and protein family models and their comparisons.,"Summary:A standard method for the identification of novel RNAs or proteins is homology search via probabilistic models. One approach relies on the definition of families, which can be encoded as covariance models (CMs) or Hidden Markov Models (HMMs). While being powerful tools, their complexity makes it tedious to investigate them in their (default) tabulated form. This specifically applies to the interpretation of comparisons between multiple models as in family clans. The Covariance model visualization tools (CMV) visualize CMs or HMMs to: I) Obtain an easily interpretable representation of HMMs and CMs; II) Put them in context with the structural sequence alignments they have been created from; III) Investigate results of model comparisons and highlight regions of interest. Availability and implementation:Source code (http://www.github.com/eggzilla/cmv), web-service (http://rna.informatik.uni-freiburg.de/CMVS). Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +25516408,MitImpact: an exhaustive collection of pre-computed pathogenicity predictions of human mitochondrial non-synonymous variants.,"Mitochondrial DNA carries a tiny, but fundamental portion of the eukaryotic genetic code. As its nuclear counterpart, it is susceptible to point mutations. Their level of pathogenicity has been assessed for the newly discovered mutations only, leaving some degree of uncertainty on the potential impact of the unknown mutations. Here we present Mitochondrial mutation Impact (MitImpact), a queryable lightweight web interface to a reasoned collection of structurally and evolutionary annotated pathogenicity predictions, obtained by assembling pre-computed with on-the-fly-computed sets of pathogenicity estimations, for all the possible mitochondrial missense variants. It presents itself as a resource for fast and reliable evaluation of gene-specific susceptibility of unknown and verified amino acid changes. MitImpact is freely available at http://bioinformatics.css-mendel.it/ (tools section). ©2014 Wiley Periodicals, Inc.",2014-12-17 +30228932,TumGrowth: An open-access web tool for the statistical analysis of tumor growth curves.,"The analysis of tumor growth curves is standard practice in experimental oncology including tumor immunology. In experimental oncology, cancer cells are inoculated into rodents (mostly mice) and their growth is monitored by measuring tumor diameter, surface or volume over time as a function of distinct treatments. Then, different groups of tumors/treatments are compared among each other for their evolution and possible responses to treatment. The R package TumGrowth has been created as a software tool allowing to carry out a series of statistical comparisons across or between groups of tumor growth curves obtained in a standard laboratory, for experimenters with limited knowledge in statistics. TumGrowth is freely available online at https://kroemerlab.shinyapps.io/TumGrowth/ and can be downloaded into any computer. It offers an exhaustive panoply of tools to visualize and analyze complex data sets including longitudinal, cross-sectional and time-to-endpoint measurements.",2018-08-01 +30180606,Concept drift detection on social network data using cross-recurrence quantification analysis.,"This paper presents our efforts to detect Concept Drifts (changes in data generation processes), using the Cross-Recurrence Quantification Analysis, on time series produced by social network systems. Experiments were performed on the TSViz project (http://www.tsviz.com.br), which collects online tweets associated with predefined hashtags and processes them to generate different time series: one to measure the amount of information contained in textual short messages and another to quantify the positiveness and negativeness of users' sentiments, etc. In that context, this work proposed and evaluated a Concept Drift approach to point out when generating processes change along time, indicating the detection of relevant textual changes in terms of the amount of information and sentiments. As a main contribution, results show that our approach indicates when the most important social events happen, which were confirmed by official news.",2018-08-01 +30101339,A powerful conditional gene-based association approach implicated functionally important genes for schizophrenia.,"

Motivation

It remains challenging to unravel new susceptibility genes of complex diseases and the mechanisms in genome-wide association studies. There are at least two difficulties, isolation of the genuine susceptibility genes from many indirectly associated genes and functional validation of these genes.

Results

We first proposed a novel conditional gene-based association test which can use only summary statistics to isolate independently associated genes of a disease. Applying this method, we detected 185 genes of independent association with schizophrenia. We then designed an in-silico experiment based on expression/co-expression to systematically validate pathogenic potential of these genes. We found that genes of independent association with schizophrenia formed more co-expression pairs in normal post-natal but not pre-natal human brain regions than expected. Interestingly, no co-expression enrichment was found in the brain regions of schizophrenia patients. The genes with independent association also had more significant P-values for differential expression between schizophrenia patients and controls in the brain regions. In contrast, indirectly associated genes or associated genes by other widely-used gene-based tests had no such differential expression and co-expression patterns. In summary, this conditional gene-based association test is effective for isolating directly associated genes from indirectly associated genes, and the results insightfully suggest that common variants might contribute to schizophrenia largely by distorting expression and co-expression in post-natal brains.

Availability and implementation

The conditional gene-based association test has been implemented in a platform 'KGG' in Java and is publicly available at http://grass.cgs.hku.hk/limx/kgg/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +30704475,Computational identification of deleterious synonymous variants in human genomes using a feature-based approach.,"BACKGROUND:Although synonymous single nucleotide variants (sSNVs) do not alter the protein sequences, they have been shown to play an important role in human disease. Distinguishing pathogenic sSNVs from neutral ones is challenging because pathogenic sSNVs tend to have low prevalence. Although many methods have been developed for predicting the functional impact of single nucleotide variants, only a few have been specifically designed for identifying pathogenic sSNVs. RESULTS:In this work, we describe a computational model, IDSV (Identification of Deleterious Synonymous Variants), which uses random forest (RF) to detect deleterious sSNVs in human genomes. We systematically investigate a total of 74 multifaceted features across seven categories: splicing, conservation, codon usage, sequence, pre-mRNA folding energy, translation efficiency, and function regions annotation features. Then, to remove redundant and irrelevant features and improve the prediction performance, feature selection is employed using the sequential backward selection method. Based on the optimized 10 features, a RF classifier is developed to identify deleterious sSNVs. The results on benchmark datasets show that IDSV outperforms other state-of-the-art methods in identifying sSNVs that are pathogenic. CONCLUSIONS:We have developed an efficient feature-based prediction approach (IDSV) for deleterious sSNVs by using a wide variety of features. Among all the features, a compact and useful feature subset that has an important implication for identifying deleterious sSNVs is identified. Our results indicate that besides splicing and conservation features, a new translation efficiency feature is also an informative feature for identifying deleterious sSNVs. While the function regions annotation and sequence features are weakly informative, they may have the ability to discriminate deleterious sSNVs from benign ones when combined with other features. The data and source code are available on website http://bioinfo.ahu.edu.cn:8080/IDSV .",2019-01-31 +30869630,Modeling Variability in Populations of Cells Using Approximated Multivariate Distributions.,"We are interested in studying the evolution of large homogeneous populations of cells, where each cell is assumed to be composed of a group of biological players (species) whose dynamics is governed by a complex biological pathway, identical for all cells. Modeling the inherent variability of the species concentrations in different cells is crucial to understand the dynamics of the population. In this work, we focus on handling this variability by modeling each species by a random variable that evolves over time. This appealing approach runs into the curse of dimensionality since exactly representing a joint probability distribution involving a large set of random variables quickly becomes intractable as the number of variables grows. To make this approach amenable to biopathways, we explore different techniques to (i) approximate the exact joint distribution at a given time point, and (ii) to track its evolution as time elapses. We start with the problem of approximating the probability distribution of biological species in a population of cells at some given time point. Data come from different fine-grained models of biological pathways of increasing complexities, such as (perturbed) Ordinary Differential Equations (ODEs). Classical approximations rely on the strong and unrealistic assumption that variables/species are independent, or that they can be grouped into small independent clusters. We propose instead to use the Chow-Liu tree representation, based on overlapping clusters of two variables, which better captures correlations between variables. Our experiments show that the proposed approximation scheme is more accurate than existing ones to model probability distributions deriving from biopathways. Then we address the problem of tracking the dynamics of a population of cells, that is computing from an initial distribution the evolution of the (approximate) joint distribution of species over time, called the inference problem. We evaluate several approximate inference algorithms (e.g., [14] , [17] ) for coarse-grained abstractions [12], [16] of biological pathways. Using the Chow-Liu tree approximation, we develop a new inference algorithm which is very accurate according to the experiments we report, for a minimal computation overhead. Our implementation is available at https://codeocean.com/capsule/6491669/tree.",2019-03-11 +26365237,ASD v3.0: unraveling allosteric regulation with structural mechanisms and biological networks.,"Allosteric regulation, the most direct and efficient way of regulating protein function, is induced by the binding of a ligand at one site that is topographically distinct from an orthosteric site. Allosteric Database (ASD, available online at http://mdl.shsmu.edu.cn/ASD) has been developed to provide comprehensive information featuring allosteric regulation. With increasing data, fundamental questions pertaining to allostery are currently receiving more attention from the mechanism of allosteric changes in an individual protein to the entire effect of the changes in the interconnected network in the cell. Thus, the following novel features were added to this updated version: (i) structural mechanisms of more than 1600 allosteric actions were elucidated by a comparison of site structures before and after the binding of an modulator; (ii) 261 allosteric networks were identified to unveil how the allosteric action in a single protein would propagate to affect downstream proteins; (iii) two of the largest human allosteromes, protein kinases and GPCRs, were thoroughly constructed; and (iv) web interface and data organization were completely redesigned for efficient access. In addition, allosteric data have largely expanded in this update. These updates are useful for facilitating the investigation of allosteric mechanisms, dynamic networks and drug discoveries.",2015-09-13 +27625390,The PathoYeastract database: an information system for the analysis of gene and genomic transcription regulation in pathogenic yeasts.,"We present the PATHOgenic YEAst Search for Transcriptional Regulators And Consensus Tracking (PathoYeastract - http://pathoyeastract.org) database, a tool for the analysis and prediction of transcription regulatory associations at the gene and genomic levels in the pathogenic yeasts Candida albicans and C. glabrata Upon data retrieval from hundreds of publications, followed by curation, the database currently includes 28 000 unique documented regulatory associations between transcription factors (TF) and target genes and 107 DNA binding sites, considering 134 TFs in both species. Following the structure used for the YEASTRACT database, PathoYeastract makes available bioinformatics tools that enable the user to exploit the existing information to predict the TFs involved in the regulation of a gene or genome-wide transcriptional response, while ranking those TFs in order of their relative importance. Each search can be filtered based on the selection of specific environmental conditions, experimental evidence or positive/negative regulatory effect. Promoter analysis tools and interactive visualization tools for the representation of TF regulatory networks are also provided. The PathoYeastract database further provides simple tools for the prediction of gene and genomic regulation based on orthologous regulatory associations described for other yeast species, a comparative genomics setup for the study of cross-species evolution of regulatory networks.",2016-09-12 +22773116,UBioLab: a web-laboratory for ubiquitous in-silico experiments.,"The huge and dynamic amount of bioinformatic resources (e.g., data and tools) available nowadays in Internet represents a big challenge for biologists –for what concerns their management and visualization– and for bioinformaticians –for what concerns the possibility of rapidly creating and executing in-silico experiments involving resources and activities spread over the WWW hyperspace. Any framework aiming at integrating such resources as in a physical laboratory has imperatively to tackle –and possibly to handle in a transparent and uniform way– aspects concerning physical distribution, semantic heterogeneity, co-existence of different computational paradigms and, as a consequence, of different invocation interfaces (i.e., OGSA for Grid nodes, SOAP for Web Services, Java RMI for Java objects, etc.). The framework UBioLab has been just designed and developed as a prototype following the above objective. Several architectural features –as those ones of being fully Web-based and of combining domain ontologies, Semantic Web and workflow techniques– give evidence of an effort in such a direction. The integration of a semantic knowledge management system for distributed (bioinformatic) resources, a semantic-driven graphic environment for defining and monitoring ubiquitous workflows and an intelligent agent-based technology for their distributed execution allows UBioLab to be a semantic guide for bioinformaticians and biologists providing (i) a flexible environment for visualizing, organizing and inferring any (semantics and computational) ""type"" of domain knowledge (e.g., resources and activities, expressed in a declarative form), (ii) a powerful engine for defining and storing semantic-driven ubiquitous in-silico experiments on the domain hyperspace, as well as (iii) a transparent, automatic and distributed environment for correct experiment executions.",2012-07-09 +28100627,Inflammatory Biomarkers Predict Heart Failure Severity and Prognosis in Patients With Heart Failure With Preserved Ejection Fraction: A Holistic Proteomic Approach. ,"Underlying mechanisms in heart failure (HF) with preserved ejection fraction remain unknown. We investigated cardiovascular plasma biomarkers in HF with preserved ejection fraction and their correlation to diastolic dysfunction, functional class, pathophysiological processes, and prognosis. In 86 stable patients with HF and EF ≥45% in the Karolinska Rennes (KaRen) biomarker substudy, biomarkers were quantified by a multiplex immunoassay. Orthogonal projection to latent structures by partial least square analysis was performed on 87 biomarkers and 240 clinical variables, ranking biomarkers associated with New York Heart Association (NYHA) Functional class and the composite outcome (all-cause mortality and HF hospitalization). Biomarkers significantly correlated with outcome were analyzed by multivariable Cox regression and correlations with echocardiographic measurements performed. The orthogonal partial least square outcome-predicting biomarker pattern was run against the Ingenuity Pathway Analysis (IPA) database, containing annotated data from the public domain. The orthogonal partial least square analyses identified 32 biomarkers correlated with NYHA class and 28 predicting outcomes. Among outcome-predicting biomarkers, growth/differentiation factor-15 was the strongest and an additional 7 were also significant in Cox regression analyses when adjusted for age, sex, and N-terminal probrain natriuretic peptide: adrenomedullin (hazard ratio per log increase 2.53), agouti-related protein; (1.48), chitinase-3-like protein 1 (1.35), C-C motif chemokine 20 (1.35), fatty acid-binding protein (1.33), tumor necrosis factor receptor 1 (2.29), and TNF-related apoptosis-inducing ligand (0.34). Twenty-three of them correlated with diastolic dysfunction (E/e') and 5 with left atrial volume index. The IPA suggested that increased inflammation, immune activation with decreased necrosis and apoptosis preceded poor outcome. In HF with preserved ejection fraction, novel biomarkers of inflammation predict HF severity and prognosis that may complement or even outperform traditional markers, such as N-terminal probrain natriuretic peptide. These findings lend support to a hypothesis implicating global systemic inflammation in HF with preserved ejection fraction. URL: http://www.clinicaltrials.gov; Unique identifier: NCT00774709.",2017-02-01 +22923294,The GlycomeAtlas tool for visualizing and querying glycome data.,"

Motivation

The development of glycomics technologies in recent years has produced a sufficient amount of data to begin analyzing the glycan structures present in various organisms and tissues. In particular, glycan profiling using mass spectrometry (MS) and tandem MS has generated a large amount of data that are waiting to be analyzed. The Consortium for Functional Glycomics (CFG) has provided a web resource for obtaining such glycan profiling data easily. Although an interactive spectrum viewer is provided on the website as a Java applet, it is not necessarily easy to search for particular glycans or to find commonalities between different tissues in a single organism, for example. Therefore, to allow users to better take advantage of the valuable glycome data that can be obtained from mass spectra and other leading technologies, we have developed a tool called Glycome Atlas which is pre-loaded with the data from the CFG and is also able to visualize local glycan profiling data for human and mouse.

Results

We have developed a tool to allow users to visualize and perform queries of glycome data. This tool, called GlycomeAtlas, is pre-loaded with glycome data as provided by the CFG. Moreover, users can load their own local glycome data into this tool to visualize and perform queries on their own data.

Availability

This tool is available at the following URL: http://www.rings.t.soka.ac.jp/GlycomeAtlas/GUI.html.",2012-08-24 +26408852,"Optimization of the cryopreservation of biological resources, Toxoplasma gondii tachyzoites, using flow cytometry.","The conservation of Toxoplasma gondii strains isolated from humans and animals is essential for conducting studies on Toxoplasma. Conservation is the main function of the French Biological Toxoplasma Resource Centre (BRC Toxoplasma, France, http://www.toxocrb.com/). In this study, we have determined the suitability of a standard cryopreservation methodology for different Toxoplasma strains using the viability of tachyzoites assayed by flow cytometry with dual fluorescent labelling (calcein acetoxymethyl ester and propidium iodide) of tachyzoites. This method provides a comparative quantitative assessment of viability after thawing. The results helped to define and refine quality criteria before tachyzoite cryopreservation and optimization of the cryopreservation parameters. The optimized cryopreservation method uses a volume of 1.0 mL containing 8 × 10(6) tachyzoites, in Iscove's Modified Dulbecco's Medium (IMDM) containing 10% foetal calf serum (FCS). The cryoprotectant additive is 10% v/v Me2SO without incubation. A cooling rate of ∼1 °C/min to -80 °C followed, after 48 h, by storage in liquid nitrogen. Thawing was performed using a 37 °C water bath that produced a warming rate of ∼100 °C/min, and samples were then diluted 1:5 in IMDM with 5% FCS, and centrifuged and resuspended for viability assessment.",2015-09-25 +28802325,SNOMED2HL7: A tool to normalize and bind SNOMED CT concepts to the HL7 Reference Information Model.,"

Background

Current clinical research and practice requires interoperability among systems in a complex and highly dynamic domain. There has been a significant effort in recent years to develop integrative common data models and domain terminologies. Such efforts have not completely solved the challenges associated with clinical data that are distributed among different and heterogeneous institutions with different systems to encode the information. Currently, when providing homogeneous interfaces to exploit clinical data, certain transformations still involve manual and time-consuming processes that could be automated.

Objectives

There is a lack of tools to support data experts adopting clinical standards. This absence is especially significant when links between data model and vocabulary are required. The objective of this work is to present SNOMED2HL7, a novel tool to automatically link biomedical concepts from widely used terminologies, and the corresponding clinical context, to the HL7 Reference Information Model (RIM).

Methods

Based on the recommendations of the International Health Terminology Standards Development Organisation (IHTSDO), the SNOMED Normal Form has been implemented within SNOMED2HL7 to decompose and provide a method to reduce the number of options to store the same information. The binding of clinical terminologies to HL7 RIM components is the core of SNOMED2HL7, where terminology concepts have been annotated with the corresponding options within the interoperability standard. A web-based tool has been developed to automatically provide information from the normalization mechanisms and the terminology binding.

Results

SNOMED2HL7 binding coverage includes the majority of the concepts used to annotate legacy systems. It follows HL7 recommendations to solve binding overlaps and provides the binding of the normalized version of the concepts. The first version of the tool, available at http://kandel.dia.fi.upm.es:8078, has been validated in EU funded projects to integrate real world data for clinical research with an 88.47% of accuracy.

Conclusions

This paper presents the first initiative to automatically retrieve concept-centered information required to transform legacy data into widely adopted interoperability standards. Although additional functionality will extend capabilities to automate data transformations, SNOMED2HL7 already provides the functionality required for the clinical interoperability community.",2017-07-05 +30257044,Fast quantitative MRI using controlled saturation magnetization transfer.,"

Purpose

This study demonstrates magnetization transfer (MT) effects directly affect relaxometry measurements and develops a framework that allows single-pool models to be valid in 2-pool MT systems.

Methods

A theoretical framework is developed in which a 2-pool MT system effectively behaves as a single-pool if the RMS RF magnetic field ( B 1 rms {\text{B}}_{1}^{{{\text{rms}}}}) is kept fixed across all measurements. A practical method for achieving controlled saturation magnetization transfer (CSMT) using multiband RF pulses is proposed. Numerical, Phantom, and in vivo validations were performed directly comparing steady state (SS) estimation approaches that under correct single-pool assumptions would be expected to vary in precision but not accuracy.

Results

Numerical simulations predict single-pool estimates obtained from MT model generated data are not consistent for different SS estimation methods, and a systematic underestimation of T2 is expected. Neither effect occurs under the proposed CSMT approach. Both phantom and in vivo experiments corroborate the numerical predictions. Experimental data highlights that even when using the same relaxometry method, different estimates are obtained depending on which combination of flip angles (FAs) and TRs are used if the CSMT approach is not used. Using CSMT, stable measurements of both T1 and T2 are obtained. The measured T1 ( T 1 CSMT ) ) depends on B 1 rms {\text{B}}_{1}^{{{\text{rms}}}}, which is therefore an important parameter to specify.

Conclusion

This work demonstrates that conventional single pool relaxometry, which is highly efficient for human studies, results in unreliable parameter estimates in biological tissues because of MT effects. The proposed CSMT framework is shown to allow single-pool assumptions to be valid, enabling reliable and efficient quantitative imaging to be performed.",2018-09-14 +22712730,PeanutDB: an integrated bioinformatics web portal for Arachis hypogaea transcriptomics.,"

Background

The peanut (Arachis hypogaea) is an important crop cultivated worldwide for oil production and food sources. Its complex genetic architecture (e.g., the large and tetraploid genome possibly due to unique cross of wild diploid relatives and subsequent chromosome duplication: 2n = 4x = 40, AABB, 2800 Mb) presents a major challenge for its genome sequencing and makes it a less-studied crop. Without a doubt, transcriptome sequencing is the most effective way to harness the genome structure and gene expression dynamics of this non-model species that has a limited genomic resource.

Description

With the development of next generation sequencing technologies such as 454 pyro-sequencing and Illumina sequencing by synthesis, the transcriptomics data of peanut is rapidly accumulated in both the public databases and private sectors. Integrating 187,636 Sanger reads (103,685,419 bases), 1,165,168 Roche 454 reads (333,862,593 bases) and 57,135,995 Illumina reads (4,073,740,115 bases), we generated the first release of our peanut transcriptome assembly that contains 32,619 contigs. We provided EC, KEGG and GO functional annotations to these contigs and detected SSRs, SNPs and other genetic polymorphisms for each contig. Based on both open-source and our in-house tools, PeanutDB presents many seamlessly integrated web interfaces that allow users to search, filter, navigate and visualize easily the whole transcript assembly, its annotations and detected polymorphisms and simple sequence repeats. For each contig, sequence alignment is presented in both bird's-eye view and nucleotide level resolution, with colorfully highlighted regions of mismatches, indels and repeats that facilitate close examination of assembly quality, genetic polymorphisms, sequence repeats and/or sequencing errors.

Conclusion

As a public genomic database that integrates peanut transcriptome data from different sources, PeanutDB (http://bioinfolab.muohio.edu/txid3818v1) provides the Peanut research community with an easy-to-use web portal that will definitely facilitate genomics research and molecular breeding in this less-studied crop.",2012-06-19 +30536521,Heat acclimation does not affect maximal aerobic power in thermoneutral normoxic or hypoxic conditions.,"

New findings

What is the central question of this study? Controlled-hyperthermia heat-acclimation protocols induce an array of thermoregulatory and cardiovascular adaptations that facilitate exercise in hot conditions. We investigated whether this ergogenic potential can be transferred to thermoneutral normoxic or hypoxic exercise conditions. What is the main finding and its importance? We showed that heat acclimation did not affect maximal cardiac output or maximal aerobic power in thermoneutral normoxic or hypoxic conditions. Heat acclimation augmented the sweating response in thermoneutral normoxic conditions. The cross-adaptation theory, according to which heat acclimation could facilitate hypoxic exercise capacity, is not supported by our data.

Abstract

Heat acclimation (HA) mitigates heat-induced decrements in maximal aerobic power ( V ̇ O 2 peak ) and augments exercise thermoregulatory responses in the heat. Whether this beneficial effect of HA is observed in hypoxic or thermoneutral conditions remains unresolved. We explored the effects of HA on cardiorespiratory and thermoregulatory responses to exercise in normoxic, hypoxic and hot conditions. Twelve men [ V ̇ O 2 peak 54.7(standard deviation 5.7) ml kg-1 min-1 ] participated in a HA protocol consisting of 10 daily 90-min controlled-hyperthermia (target rectal temperature, Tre  = 38.5°C) exercise sessions. Before and after HA, we determined V ̇ O 2 peak in thermoneutral normoxic (NOR), thermoneutral hypoxic (fractional inspired O2  = 13.5%; HYP) and hot (35°C, 50% relative humidity; HE) conditions in a randomized and counterbalanced order. Preceding each maximal cycling test, a 30-min steady-state exercise bout at 40% of the NOR peak power output was used to evaluate thermoregulatory responses. Heat acclimation induced the expected adaptations in HE: reduced Tre and submaximal heart rate, enhanced sweating response and expanded plasma volume. However, HA did not affect V ̇ O 2 peak or maximal cardiac output (P = 0.61). The peak power output was increased post-HA in NOR (P < 0.001) and HE (P < 0.001) by 41 ± 21 and 26 ± 22 W, respectively, but not in HYP (P = 0.14). Gross mechanical efficiency was higher (P = 0.004), whereas resting Tre and sweating thresholds were lower (P < 0.01) post-HA across environments. Nevertheless, the gain of the sweating response decreased (P = 0.05) in HYP. In conclusion, our data do not support a beneficial cross-over effect of HA on V ̇ O 2 peak in normoxic or hypoxic conditions.",2019-01-25 +30425150,Salmonella enterica Serovar Typhi in Bangladesh: Exploration of Genomic Diversity and Antimicrobial Resistance. ,"Typhoid fever, caused by Salmonella enterica serovar Typhi, is a global public health concern due to increasing antimicrobial resistance (AMR). Characterization of S Typhi genomes for AMR and the evolution of different lineages, especially in countries where typhoid fever is endemic such as Bangladesh, will help public health professionals to better design and implement appropriate preventive measures. We studied whole-genome sequences (WGS) of 536 S Typhi isolates collected in Bangladesh during 1999 to 2013 and compared those sequences with data from a recent outbreak in Pakistan reported previously by E. J. Klemm, S. Shakoor, A. J. Page, F. N. Qamar, et al. (mBio 9:e00105-18, 2018, https://doi.org/10.1128/mBio.00105-18), and a laboratory surveillance in Nepal reported previously by C. D. Britto, Z. A. Dyson, S. Duchene, M. J. Carter, et al. [PLoS Negl. Trop. Dis. 12(4):e0006408, 2018, https://doi.org/10.1371/journal.pntd.0006408]. WGS had high sensitivity and specificity for prediction of ampicillin, chloramphenicol, co-trimoxazole, and ceftriaxone AMR phenotypes but needs further improvement for prediction of ciprofloxacin resistance. We detected a new local lineage of genotype 4.3.1 (named lineage Bd) which recently diverged into a sublineage (named Bdq) containing qnr genes associated with high-level ciprofloxacin resistance. We found a ceftriaxone-resistant isolate with the blaCTX-M-15 gene and a genotype distinct from the genotypes of extensively drug-resistant (XDR) isolates from Pakistan. This result suggests a different source and geographical origin of AMR. Genotype 4.3.1 was dominant in all three countries but formed country-specific clusters in the maximum likelihood phylogenetic tree. Thus, multiple independent genetic events leading to ciprofloxacin and ceftriaxone resistance took place in these neighboring regions of Pakistan, Nepal, and Bangladesh. These independent mutational events may enhance the risk of global spread of these highly resistant clones. A short-term global intervention plan is urgently needed.IMPORTANCE Typhoid fever, caused by Salmonella enterica serovar Typhi, is responsible for an estimated burden of approximately 17 million new episodes per year worldwide. Adequate and timely antimicrobial treatment invariably cures typhoid fever. The increasing antimicrobial resistance (AMR) of S Typhi severely limits the treatment options. We studied whole-genome sequences (WGS) of 536 S Typhi isolates collected in Bangladesh between 1999 and 2013 and compared those sequences with data from a recent outbreak in Pakistan and a laboratory surveillance in Nepal. The analysis suggests that multiple ancestral origins of resistance against ciprofloxacin and ceftriaxone are present in three countries. Such independent genetic events and subsequent dissemination could enhance the risk of a rapid global spread of these highly resistant clones. Given the current treatment challenges, vaccination seems to be the most appropriate short-term intervention to reduce the disease burden of typhoid fever at a time of increasing AMR.",2018-11-13 +21846260,"Factor Xa subsite mapping by proteome-derived peptide libraries improved using WebPICS, a resource for proteomic identification of cleavage sites.","Proteomic identification of protease cleavage site specificity (PICS) is a recent proteomic approach for the easy mapping of protease subsite preferences that determines both the prime- and non-prime side specificity concurrently. Here we greatly facilitate user access by providing an automated and simple web-based data-analysis resource termed WebPics (http://clipserve.clip.ubc.ca/pics/). We demonstrate the utility of WebPics analysis of PICS data by determining the substrate specificity of factor Xa from P6-P6', an important blood coagulation protease that proteolytically generates thrombin from prothrombin. PICS confirms existing data on non-prime site specificity and refines our knowledge of factor Xa prime-site selectivity.",2011-11-01 +27924012,"L1Base 2: more retrotransposition-active LINE-1s, more mammalian genomes.","LINE-1 (L1) insertions comprise as much as 17% of the human genome sequence, and similar proportions have been recorded for other mammalian species. Given the established role of L1 retrotransposons in shaping mammalian genomes, it becomes an important task to track and annotate the sources of this activity: full length elements, able to encode the cis and trans acting components of the retrotransposition machinery. The L1Base database (http://l1base.charite.de) contains annotated full-length sequences of LINE-1 transposons including putatively active L1s. For the new version of L1Base, a LINE-1 annotation tool, L1Xplorer, has been used to mine potentially active L1 retrotransposons from the reference genome sequences of 17 mammals. The current release of the human genome, GRCh38, contains 146 putatively active L1 elements or full length intact L1 elements (FLIs). The newest versions of the mouse, GRCm38 and the rat, Rnor_6.0, genomes contain 2811 and 492 FLIs, respectively. Most likely reflecting the current level of completeness of the genome project, the latest reference sequence of the common chimpanzee genome, PT 2.19, only contains 19 FLIs. Of note, the current assemblies of the dog, CF 3.1 and the sheep, OA 3.1, genomes contain 264 and 598 FLIs, respectively. Further developments in the new version of L1Base include an updated website with implementation of modern web server technologies. including a more responsive design for an improved user experience, as well as the addition of data sharing capabilities for L1Xplorer annotation.",2016-10-18 +28685272,Bayesian inference for psychology. Part II: Example applications with JASP.,"Bayesian hypothesis testing presents an attractive alternative to p value hypothesis testing. Part I of this series outlined several advantages of Bayesian hypothesis testing, including the ability to quantify evidence and the ability to monitor and update this evidence as data come in, without the need to know the intention with which the data were collected. Despite these and other practical advantages, Bayesian hypothesis tests are still reported relatively rarely. An important impediment to the widespread adoption of Bayesian tests is arguably the lack of user-friendly software for the run-of-the-mill statistical problems that confront psychologists for the analysis of almost every experiment: the t-test, ANOVA, correlation, regression, and contingency tables. In Part II of this series we introduce JASP ( http://www.jasp-stats.org ), an open-source, cross-platform, user-friendly graphical software package that allows users to carry out Bayesian hypothesis tests for standard statistical problems. JASP is based in part on the Bayesian analyses implemented in Morey and Rouder's BayesFactor package for R. Armed with JASP, the practical advantages of Bayesian hypothesis testing are only a mouse click away.",2018-02-01 +29508300,Single-Cell Transcriptome Analysis Using SINCERA Pipeline.,"Genome-scale single-cell biology has recently emerged as a powerful technology with important implications for both basic and medical research. There are urgent needs for the development of computational methods or analytic pipelines to facilitate large amounts of single-cell RNA-Seq data analysis. Here, we present a detailed protocol for SINCERA (SINgle CEll RNA-Seq profiling Analysis), a generally applicable analytic pipeline for processing single-cell data from a whole organ or sorted cells. The pipeline supports the analysis for the identification of major cell types, cell type-specific gene signatures, and driving forces of given cell types. In this chapter, we provide step-by-step instructions for the functions and features of SINCERA together with application examples to provide a practical guide for the research community. SINCERA is implemented in R, licensed under the GNU General Public License v3, and freely available from CCHMC PBGE website, https://research.cchmc.org/pbge/sincera.html .",2018-01-01 +26027393,[Molecular mechanisms of lung cancer development at its different stages in nuclear industry workers].,"

Objective

to assess mutational events in exons 5, 7, and 8 of the p53 gene and to reveal mutant p53 protein in verified cases of morphologically altered (proliferative and precancerous changes, lung cancer) and histologically unaltered, lung tissues in workers exposed to occupational radiation.

Material and methods

The investigation used formalin-fixed paraffin-embedded unaltered and altered lung tissue blocks (FFPBs) obtained from the human radiobiological tissue repository. The shelf-life of FFPBs was 5-31 years. An immunohistochemical technique using mouse antibodies against p53 protein (<>, Denmark), stained with diaminobenzidine (DAB) chromogen, was employed to determine p53 protein. DNA was isolated from lung tissue FFPBs with QIAmp DNA FFPE Tissue Kit, (<>, USA). Polymerase chain reaction (PCR) was performed to amplify the p53 gene exons 5, 7, and 8 selected for examination, by applying the sequences of genes and primers, the specificity of which was checked using the online resource (http://www.ncbi.nlm.nih.gov/blast). PCR products were detected by temporal temperature gradient gel-electrophoresis and the Sanger sequencing method. The obtained DNA fragments were analyzed on a sequencer ABI Prism 3100 Genetic Analizer (<>, USA). Computer-aided DNA analysis was made using the BLAST program. A package of applied Statistica 6.0 programs was employed for statistical data processing. Results. Immunohistochemical analysis showed that mutant p53 protein was absent in the cells of unaltered lung tissue and the number of cells with mutant p53 protein increased in all the patients with proliferative and precancerous changes and lung cancer, suggesting p53 protein dysfunction. The total number of p53 gene mutations in exons 5, 7, and 8, if there were proliferative and precancerous lung tissue changes and lung cancer, were 25, 20, and 40%, respectively. All the found mutations were transversions (the substitution of purine for pyrimidine or, conversely), indicating the action of exogenous mutagens.

Conclusion

The results of this investigation have confirmed other investigators' data showing that p53 gene mutations in lung cancer are observed in 40-70% of cases. The differences in the number of cases of altered lung tissue with mutations in the p53 gene (not more than 40%) and in those of p53 protein expression were found in 100%, suggesting the regulation of p53 gene function in the cell at multiple levels.",2015-03-01 +27231038,Interventions for treating oro-antral communications and fistulae due to dental procedures.,"

Background

An oro-antral communication is an unnatural opening between the oral cavity and maxillary sinus. When it fails to close spontaneously, it remains patent and is epithelialized to develop into an oro-antral fistula. Various surgical and non-surgical techniques have been used for treating the condition. Surgical procedures include flaps, grafts and other techniques like re-implantation of third molars. Non-surgical techniques include allogenic materials and xenografts.

Objectives

To assess the effectiveness and safety of various interventions for the treatment of oro-antral communications and fistulae due to dental procedures.

Search methods

We searched the Cochrane Oral Health Group's Trials Register (whole database, to 3 July 2015), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library, 2015, Issue 6), MEDLINE via OVID (1946 to 3 July 2015), EMBASE via OVID (1980 to 3 July 2015), US National Institutes of Health Trials Registry (http://clinicaltrials.gov) (whole database, to 3 July 2015) and the World Health Organization (WHO) International Clinical Trials Registry Platform (http://www.who.int/ictrp/en/) (whole database, to 3 July 2015). We also searched the reference lists of included and excluded trials for any randomised controlled trials (RCTs).

Selection criteria

We included RCTs evaluating any intervention for treating oro-antral communications or oro-antral fistulae due to dental procedures. We excluded quasi-RCTs and cross-over trials. We excluded studies on participants who had oro-antral communications, fistulae or both related to Caldwell-Luc procedure or surgical excision of tumours.

Data collection and analysis

Two review authors independently selected trials. Two review authors assessed trial risk of bias and extracted data independently. We estimated risk ratios (RR) for dichotomous data, with 95% confidence intervals (CI). We assessed the overall quality of the evidence using the GRADE approach.

Main results

We included only one study in this review, which compared two surgical interventions: pedicled buccal fat pad flap and buccal flap for the treatment of oro-antral communications. The study involved 20 participants. The risk of bias was unclear. The relevant outcome reported in this trial was successful (complete) closure of oro-antral communication.The quality of the evidence for the primary outcome was very low. The study did not find evidence of a difference between interventions for the successful (complete) closure of an oro-antral communication (RR 1.00, 95% Cl 0.83 to 1.20) one month after the surgery. All oro-antral communications in both groups were successfully closed so there were no adverse effects due to treatment failure.We did not find trials evaluating any other intervention for treating oro-antral communications or fistulae due to dental procedures.

Authors' conclusions

We found very low quality evidence from a single small study that compared pedicled buccal fat pad and buccal flap. The evidence was insufficient to judge whether there is a difference in the effectiveness of these interventions as all oro-antral communications in the study were successfully closed by one month after surgery. Large, well-conducted RCTs investigating different interventions for the treatment of oro-antral communications and fistulae caused by dental procedures are needed to inform clinical practice.",2016-05-27 +30311749,"Teratosphaeria stem canker of Eucalyptus: two pathogens, one devastating disease.","

Background

Teratosphaeria gauchensis and T. zuluensis are closely related fungi that cause Teratosphaeria (previously Coniothyrium) stem canker disease on Eucalyptus species propagated in plantations for commercial purposes. This disease is present in many countries in which Eucalyptus trees are planted, and continues to spread with the international trade of infected plant germplasm.

Taxonomy

Fungi, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Capnodiales, Teratosphaeriaceae, Teratosphaeria.

Identification

The causal agents form dark masses of pycnidia that are visible on the surface of distinct stem cankers that typically form on young green stem tissues. Accurate diagnosis of the causal agents requires DNA sequence data.

Host range

Nine species of Eucalyptus are known to be affected. Of these, E. grandis and its hybrids, which include some of the most important planting stock globally, appear to be particularly vulnerable.

Disease symptoms

Small necrotic lesions develop on young green stem tissue. These lesions coalesce to form large cankers that exude gum. Epicormic shoots develop below the girdling canker and, in severe cases, trees die.

Useful websites

Mycobank, https://www.mycobank.org; Publications of the Forestry and Agricultural Biotechnology Institute (FABI), https://www.fabinet.up.ac.za/index.php/journals.",2018-11-03 +,"BioNetwork Bench: Database and Software for Storage, Query, and Analysis of Gene and Protein Networks","Gene and protein networks offer a powerful approach for integration of the disparate yet complimentary types of data that result from high-throughput analyses. Although many tools and databases are currently available for accessing such data, they are left unutilized by bench scientists as they generally lack features for effective analysis and integration of both public and private datasets and do not offer an intuitive interface for use by scientists with limited computational expertise. We describe BioNetwork Bench, an open source, user-friendly suite of database and software tools for constructing, querying, and analyzing gene and protein network models. It enables biologists to analyze public as well as private gene expression; interactively query gene expression datasets; integrate data from multiple networks; store and selectively share the data and results. Finally, we describe an application of BioNetwork Bench to the assembly and iterative expansion of a gene network that controls the differentiation of retinal progenitor cells into rod photoreceptors. The tool is available from http://bionetworkbench.sourceforge.net/

Background

The emergence of high-throughput technologies has allowed many biological investigators to collect a great deal of information about the behavior of genes and gene products over time or during a particular disease state. Gene and protein networks offer a powerful approach for integration of the disparate yet complimentary types of data that result from such high-throughput analyses. There are a growing number of public databases, as well as tools for visualization and analysis of networks. However, such databases and tools have yet to be widely utilized by bench scientists, as they generally lack features for effective analysis and integration of both public and private datasets and do not offer an intuitive interface for use by biological scientists with limited computational expertise.

Results

We describe BioNetwork Bench, an open source, user-friendly suite of database and software tools for constructing, querying, and analyzing gene and protein network models. BioNetwork Bench currently supports a broad class of gene and protein network models (eg, weighted and un-weighted, undirected graphs, multi-graphs). It enables biologists to analyze public as well as private gene expression, macromolecular interaction and annotation data; interactively query gene expression datasets; integrate data from multiple networks; query multiple networks for interactions of interest; store and selectively share the data as well as results of analyses. BioNetwork Bench is implemented as a plug-in for, and hence is fully interoperable with, Cytoscape, a popular open-source software suite for visualizing macromolecular interaction networks. Finally, we describe an application of BioNetwork Bench to the problem of assembly and iterative expansion of a gene network that controls the differentiation of retinal progenitor cells into rod photoreceptors.

Conclusions

BioNetwork Bench provides a suite of open source software for construction, querying, and selective sharing of gene and protein networks. Although initially aimed at a community of biologists interested in retinal development, the tool can be adapted easily to work with other biological systems simply by populating the associated database with the relevant datasets.",2012-01-01 +29770486,Direct estimation of 17 O MR images (DIESIS) for quantification of oxygen metabolism in the human brain with partial volume correction.,"PURPOSE:To provide a data post-processing method that corrects for partial volume effects (PVE) and fast T 2 * decay in dynamic 17 O MRI for the mapping of cerebral metabolic rates of oxygen consumption (CMRO2 ). METHODS:CMRO2 is altered in neurodegenerative diseases and tumors and can be measured after 17 O gas inhalation using dynamic 17 O MRI. CMRO2 quantification is difficult because of PVE. To correct for PVE, a direct estimation of the MR images (DIESIS) method is proposed and used in 4 dynamic 17 O MRI data sets of a healthy volunteer acquired on a 3T MRI system. With DIESIS, 17 O MR signal time curves in selected regions were directly estimated based on parcellation of a coregistered 1 H MPRAGE image. RESULTS:Profile likelihood analysis of the DIESIS method showed identifiability of CMRO2 . In white matter (WM), DIESES reduced CMRO2 from 0.97 ± 0.25 µmol/gtissue /min with Kaiser-Bessel gridding reconstruction to 0.85 ± 0.21 µmol/gtissue /min, whereas in gray matter (GM) it increases from 1.3 ± 0.31 µmol/gtissue /min to 1.86 ± 0.36 µmol/gtissue /min; both values are closer to the literature values from the 15 O-PET studies. CONCLUSION:DIESIS provided an increased separation of CMRO2 values in GM and WM brain regions and corrected for partial volume effects in 17 O-MRI inhalation experiments. DIESIS could also be applied to more heterogeneous tissues such as glioblastomas if subregions of the tumor can be represented as additional parcels.",2018-05-16 +,Meta-Analysis of the Effect of Nitrogen Fertilization on Annual Cereal–Legume Intercrop Production,"Numerous studies have been performed to study the effect of N fertilization on cereal–legume intercrops, and their results are sometimes conflicting. Our objective was to do a meta-analysis on cereal–legume intercrops testing the effects of N fertilization on land equivalent ration (LER; partial and total LER), yield ratio, and proportion of legume in the mixture of crop grains. This analysis was based on 17 published studies reporting the results of experiments performed in 15 countries on six species of cereals and 10 species of legumes. Experiments were generally based on replacement (50–50, i.e., in the intercrop, each species is sown at half the sowing rate used for the sole crop) or full substitutive (100–100, i.e., the sowing rate for each crop in the intercrop is identical to that for sole crops) designs. Nitrogen fertilization rates ranged from 0 to 180 kg N ha–¹. The effect of N fertilization and its inter-study variability were analyzed with mixed-effect statistical models, including study as a random effect. Results showed that N fertilization had non-significant effects on average LER and average yield ratio but that the inter-study variability of these effects was large. Nitrogen fertilization was found to significantly decrease the grain proportion of the legume in the mixture and the partial LER of the legume in studies based on C₃ cereal intercrops. The database used for the meta-analysis is freely available (http://www6.versailles-grignon.inra.fr/agronomie/Meta-analysis-in-agronomy/Datasets/Dataset-Intercrop).",2014-09-01 +28922607,AntDAS: Automatic Data Analysis Strategy for UPLC-QTOF-Based Nontargeted Metabolic Profiling Analysis.,"High-quality data analysis methodology remains a bottleneck for metabolic profiling analysis based on ultraperformance liquid chromatography-quadrupole time-of-flight mass spectrometry. The present work aims to address this problem by proposing a novel data analysis strategy wherein (1) chromatographic peaks in the UPLC-QTOF data set are automatically extracted by using an advanced multiscale Gaussian smoothing-based peak extraction strategy; (2) a peak annotation stage is used to cluster fragment ions that belong to the same compound. With the aid of high-resolution mass spectrometer, (3) a time-shift correction across the samples is efficiently performed by a new peak alignment method; (4) components are registered by using a newly developed adaptive network searching algorithm; (5) statistical methods, such as analysis of variance and hierarchical cluster analysis, are then used to identify the underlying marker compounds; finally, (6) compound identification is performed by matching the extracted peak information, involving high-precision m/z and retention time, against our compound library containing more than 500 plant metabolites. A manually designed mixture of 18 compounds is used to evaluate the performance of the method, and all compounds are detected under various concentration levels. The developed method is comprehensively evaluated by an extremely complex plant data set containing more than 2000 components. Results indicate that the performance of the developed method is comparable with the XCMS. The MATLAB GUI code is available from http://software.tobaccodb.org/software/antdas .",2017-09-27 +26586799,NONCODE 2016: an informative and valuable data source of long non-coding RNAs.,"NONCODE (http://www.bioinfo.org/noncode/) is an interactive database that aims to present the most complete collection and annotation of non-coding RNAs, especially long non-coding RNAs (lncRNAs). The recently reduced cost of RNA sequencing has produced an explosion of newly identified data. Revolutionary third-generation sequencing methods have also contributed to more accurate annotations. Accumulative experimental data also provides more comprehensive knowledge of lncRNA functions. In this update, NONCODE has added six new species, bringing the total to 16 species altogether. The lncRNAs in NONCODE have increased from 210 831 to 527,336. For human and mouse, the lncRNA numbers are 167,150 and 130,558, respectively. NONCODE 2016 has also introduced three important new features: (i) conservation annotation; (ii) the relationships between lncRNAs and diseases; and (iii) an interface to choose high-quality datasets through predicted scores, literature support and long-read sequencing method support. NONCODE is also accessible through http://www.noncode.org/.",2015-11-19 +30486921,Removal of Cr(VI) from Aqueous Solutions Using Amino-Functionalized Carbon Nanospheres Adsorbents.," Carbon nanospheres were prepared and functionalized with carboxyl acid groups (CNS-CA), then reacted with 3-aminopropyltriethoxysilane to introduce amino groups onto the surface (CNS-NH₂) by post-synthesis grafting. CNS-NH₂ was acidified in order to convert the amino groups (-NH₂) into ammonium moieties (). Various techniques such as N₂ physisorption, X-ray diffraction, Fourier transform infrared spectroscopy, Raman spectroscopy, thermogravimetry, X-ray photoelectron spectroscopy, and transmission electron microscopy were used to characterize the nanospheres. The removal of chromium ions from aqueous solution using was investigated. Factors influencing the uptake of Cr(VI) ions such as solution pH, adsorbent dose, and initial Cr(VI) ion concentration were investigated. Equilibrium adsorption data fitted the Langmuir model very well. The adsorption maximum capacity of Cr(VI) was found to be 52.38 mg/g. The reusability of results indicated that it can be reused five times successfully without loss of adsorption capacity.",2018-11-01 +23155061,RNApathwaysDB--a database of RNA maturation and decay pathways.,"Many RNA molecules undergo complex maturation, involving e.g. excision from primary transcripts, removal of introns, post-transcriptional modification and polyadenylation. The level of mature, functional RNAs in the cell is controlled not only by the synthesis and maturation but also by degradation, which proceeds via many different routes. The systematization of data about RNA metabolic pathways and enzymes taking part in RNA maturation and degradation is essential for the full understanding of these processes. RNApathwaysDB, available online at http://iimcb.genesilico.pl/rnapathwaysdb, is an online resource about maturation and decay pathways involving RNA as the substrate. The current release presents information about reactions and enzymes that take part in the maturation and degradation of tRNA, rRNA and mRNA, and describes pathways in three model organisms: Escherichia coli, Saccharomyces cerevisiae and Homo sapiens. RNApathwaysDB can be queried with keywords, and sequences of protein enzymes involved in RNA processing can be searched with BLAST. Options for data presentation include pathway graphs and tables with enzymes and literature data. Structures of macromolecular complexes involving RNA and proteins that act on it are presented as 'potato models' using DrawBioPath-a new javascript tool.",2012-11-15 +29085014,POTAGE: A Visualisation Tool for Speeding up Gene Discovery in Wheat.,"POPSEQ Ordered Triticum aestivum Gene Expression (POTAGE) is a web application which accelerates the process of identifying candidate genes for quantitative trait loci (QTL) in hexaploid wheat. This is achieved by leveraging several of the most commonly used data sets in wheat research. These include the Chromosome Survey Sequences, their order along the chromosomes determined by the population sequencing (POPSEQ) approach, the gene predictions and RNA-Seq expression data. POTAGE aggregates those data sets and provides an intuitive interface for biologists to explore the expression of the predicted genes and their functional annotation in a chromosomal context. The interface accelerates some of the laborious and repetitive tasks commonly undertaken in the process of identifying and prioritising genes which may underlie QTL. We illustrate the utility of POTAGE by showing how a short-list of candidate genes can quickly be identified for a QTL linked to pre-harvest sprouting - a major cause of quality and yield loss in wheat production. The candidate genes identified using POTAGE included TaMKK3, which was recently reported as a causal gene for seed dormancy in wheat, and a mutation in its barley ortholog has been shown to reduce pre-harvest sprouting. POTAGE is available at http://crobiad.agwine.adelaide.edu.au/potage .",2017-10-30 +29084964,Common and cell-type specific responses to anti-cancer drugs revealed by high throughput transcript profiling.,"More effective use of targeted anti-cancer drugs depends on elucidating the connection between the molecular states induced by drug treatment and the cellular phenotypes controlled by these states, such as cytostasis and death. This is particularly true when mutation of a single gene is inadequate as a predictor of drug response. The current paper describes a data set of ~600 drug cell line pairs collected as part of the NIH LINCS Program ( http://www.lincsproject.org/ ) in which molecular data (reduced dimensionality transcript L1000 profiles) were recorded across dose and time in parallel with phenotypic data on cellular cytostasis and cytotoxicity. We report that transcriptional and phenotypic responses correlate with each other in general, but whereas inhibitors of chaperones and cell cycle kinases induce similar transcriptional changes across cell lines, changes induced by drugs that inhibit intra-cellular signaling kinases are cell-type specific. In some drug/cell line pairs significant changes in transcription are observed without a change in cell growth or survival; analysis of such pairs identifies drug equivalence classes and, in one case, synergistic drug interactions. In this case, synergy involves cell-type specific suppression of an adaptive drug response.",2017-10-30 +21897156,World Federation of Pediatric Intensive Care and Critical Care Societies: Global Sepsis Initiative.,"

Background

According to World Health Organization estimates, sepsis accounts for 60%-80% of lost lives per year in childhood. Measures appropriate for resource-scarce and resource-abundant settings alike can reduce sepsis deaths. In this regard, the World Federation of Pediatric Intensive Care and Critical Care Societies Board of Directors announces the Global Pediatric Sepsis Initiative, a quality improvement program designed to improve quality of care for children with sepsis.

Objectives

To announce the global sepsis initiative; to justify some of the bundles that are included; and to show some preliminary data and encourage participation.

Methods

The Global Pediatric Sepsis Initiative is developed as a Web-based education, demonstration, and pyramid bundles/checklist tool (http://www.pediatricsepsis.org or http://www.wfpiccs.org). Four health resource categories are included. Category A involves a nonindustrialized setting with mortality rate <5 yrs and >30 of 1,000 children. Category B involves a nonindustrialized setting with mortality rate <5 yrs and <30 of 1,000 children. Category C involves a developing industrialized nation. In category D, developed industrialized nation are determined and separate accompanying administrative and clinical parameters bundles or checklist quality improvement recommendations are provided, requiring greater resources and tasks as resource allocation increased from groups A to D, respectively.

Results

In the vanguard phase, data for 361 children (category A, n = 34; category B, n = 12; category C, n = 84; category D, n = 231) were successfully entered, and quality-assurance reports were sent to the 23 participating international centers. Analysis of bundles for categories C and D showed that reduction in mortality was associated with compliance with the resuscitation (odds ratio, 0.369; 95% confidence interval, 0.188-0.724; p < .0004) and intensive care unit management (odds ratio, 0.277; 95% confidence interval, 0.096-0.80) bundles.

Conclusions

The World Federation of Pediatric Intensive Care and Critical Care Societies Global Pediatric Sepsis Initiative is online. Success in reducing pediatric mortality and morbidity, evaluated yearly as a measure of global child health care quality improvement, requires ongoing active recruitment of international participant centers. Please join us at http://www.pediatricsepsis.org or http://www.wfpiccs.org.",2011-09-01 +20931385,Gramene database: a hub for comparative plant genomics.,"The rich collection of known genetic information and the recent completion of rice genome sequencing project provided the cereal plant researchers a useful tool to investigate the roles of genes and genomic organization that contribute to numerous agronomic traits. Gramene ( http://www.gramene.org ) is a unique database where users are allowed to query and explore the power of genomic colinearity and comparative genomics for genetic and genomic studies on plant genomes. Gramene presents a wholesome perspective by assimilating data from a broad range of publicly available data sources for cereals like rice, sorghum, maize, wild rice, wheat, oats, barley, and other agronomically important crop plants such as poplar and grape, and the model plant Arabidopsis. As part of the process, it preserves the original data, but also reanalyzes for integration into several knowledge domains of maps, markers, genes, proteins, pathways, phenotypes, including Quantitative Trait Loci (QTL) and genetic diversity/natural variation. This allows researchers to use this information resource to decipher the known and predicted interactions between the components of biological systems, and how these interactions regulate plant development. Using examples from rice, this article describes how the database can be helpful to researchers representing an array of knowledge domains ranging from plant biology, plant breeding, molecular biology, genomics, biochemistry, genetics, bioinformatics, and phylogenomics.",2011-01-01 +29751818,Harnessing the evolutionary information on oxygen binding proteins through Support Vector Machines based modules.,"

Objectives

The arrival of free oxygen on the globe, aerobic life is becoming possible. However, it has become very clear that the oxygen binding proteins are widespread in the biosphere and are found in all groups of organisms, including prokaryotes, eukaryotes as well as in fungi, plants, and animals. The exponential growth and availability of fresh annotated protein sequences in the databases motivated us to develop an improved version of ""Oxypred"" for identifying oxygen-binding proteins.

Results

In this study, we have proposed a method for identifying oxy-proteins with two different sequence similarity cutoffs 50 and 90%. A different amino acid composition based Support Vector Machines models was developed, including the evolutionary profiles in the form position-specific scoring matrix (PSSM). The fivefold cross-validation techniques were applied to evaluate the prediction performance. Also, we compared with existing methods, which shows nearly 97% recognition, but, our newly developed models were able to recognize almost 99.99 and 100% in both oxy-50 and 90% similarity models respectively. Our result shows that our approaches are faster and achieve a better prediction performance over the existing methods. The web-server Oxypred2 was developed for an alternative method for identifying oxy-proteins with more additional modules including PSSM, available at http://bioinfo.imtech.res.in/servers/muthu/oxypred2/home.html .",2018-05-11 +29864163,Investigation of protein quaternary structure via stoichiometry and symmetry information.,"The Protein Data Bank (PDB) is the single worldwide archive of experimentally-determined three-dimensional (3D) structures of proteins and nucleic acids. As of January 2017, the PDB housed more than 125,000 structures and was growing by more than 11,000 structures annually. Since the 3D structure of a protein is vital to understand the mechanisms of biological processes, diseases, and drug design, correct oligomeric assembly information is of critical importance. Unfortunately, the biologically relevant oligomeric form of a 3D structure is not directly obtainable by X-ray crystallography, whilst in solution methods (NMR or single particle EM) it is known from the experiment. Instead, this information may be provided by the PDB Depositor as metadata coming from additional experiments, be inferred by sequence-sequence comparisons with similar proteins of known oligomeric state, or predicted using software, such as PISA (Proteins, Interfaces, Structures and Assemblies) or EPPIC (Evolutionary Protein Protein Interface Classifier). Despite significant efforts by professional PDB Biocurators during data deposition, there remain a number of structures in the archive with incorrect quaternary structure descriptions (or annotations). Further investigation is, therefore, needed to evaluate the correctness of quaternary structure annotations. In this study, we aim to identify the most probable oligomeric states for proteins represented in the PDB. Our approach evaluated the performance of four independent prediction methods, including text mining of primary publications, inference from homologous protein structures, and two computational methods (PISA and EPPIC). Aggregating predictions to give consensus results outperformed all four of the independent prediction methods, yielding 83% correct, 9% wrong, and 8% inconclusive predictions, when tested with a well-curated benchmark dataset. We have developed a freely-available web-based tool to make this approach accessible to researchers and PDB Biocurators (http://quatstruct.rcsb.org/).",2018-06-04 +28352762,A meta-analysis of neuroprotective effect for traditional Chinese medicine (TCM) in the treatment of glaucoma.,"

Background

The aim of this study was to evaluate the neuroprotective effect of surgery combined with traditional Chinese medicine(TCM)in the treatment of glaucoma by meta-analysis based on clinical controlled trial.

Methods

All the prospective randomized controlled trialsof surgery combined with TCM in the treatment of glaucoma were searched in the databases of Medline (1960-2015.1), CENTRAL (the Cochrane central register of controlled trials 1989-2015.1, EMBASE (1980∼2015.1) and CNKI (1979-2015.1). Two reviewers independently assessed the quality of the included studies, extracted the relevant data and performed a cross-check. The pooled relative risk (RR) or standard mean difference (SMD) of surgery combined with TCM versus western medicine or surgery alone were calculated as the effect size by meta-analysis method. All the data was analyzed by stata11.0 software (http://www.stata.com; Stata Corporation, College Station, TX).

Results

Finally, eleven clinical controlledtrails with 843 subjects were included in this meta-analysis. The pooled results indicated that the surgery combined with TCM treatment procedure can significant improve the vision recovery rate compared to control group (RR=1.22, 95% CI:1.06∼1.40, P=0.005); And after treatment, the visual field in combined group was significantly improved compared to control group (SMD=0.26∼95% CI:0.09∼0.43, P=0.003).

Conclusion

Surgery combined with TCM can improve the vision recovery rate and the visual fieldin the treatment of glaucoma compared to surgery or western medicine alone.",2016-02-22 +26730351,General pathologist-helper: The new medical app about general pathology.,"

Introduction

Smartphone applications (apps) have become increasingly prevalent in medicine. Due to most pathologists, pathology trainees, technicians, and medical students use smartphones; apps can be a different way for general pathology education. ""General pathologist-helper (GP-HELPER)"" is a novel app developed as a reference tool in general pathology and especially for general pathologists, developed for Android and iOS platforms.

Materials and methods

""GP-HELPER,"" was created using Mobincube website platform. This tool also integrates ""FORUM GP-HELPER,"" an external website created using Miarroba website (http://forum-gp-helper.mboards.com) and ""COMMUNITY GP-HELPER"" a multichannel chat created using Chatango website platform.

Results

The application was released in July 2015, and it is been periodically updated since then. The app has permanent information (offline data) about different pathology protocols (TNM latest edition, protocols regarding management of tumors of unknown primary origin, and flowcharts for some of the most difficult tumors to diagnose) and a database with more than 5000 immunohistochemistry results from different tumors. Online data have links to more than 1100 reference pathology video lectures, 250 antibodies information, more than 70 pathology association websites, 46 pathology providers, and 78 outstanding pathology journal websites. Besides this information, the app has two interactive places such as ""FORUM GP-HELPER"" and ""COMMUNITY GP-HELPER"" that let users to stay in touch everywhere and every time. Expert consult section is also available.

Conclusions

""GP-HELPER"" pretends to integrate offline and online data about pathology with two interactive external places in order to represent a reference tool for general pathologists and associate members.",2015-11-27 +29610253,Deconstructing Pneumococcal Progression from Colonization to Disease. ,"Despite advances in treatment and prevention, the pneumococcus continues to be a dominant cause of severe pneumonia and sepsis and of otitis media, sinusitis, and nonbacteremic pneumonia. Lewnard and colleagues (Infect Immun 86:e00727-17, 2018, https://doi.org/10.1128/IAI.00727-17) used a unique data set of nasopharyngeal and middle ear fluid samples to provide further insight into the progression of nasopharyngeal pneumococcal colonization to disease. They report the comparative rate of progression from colonization to otitis media by serotype, providing insight into how conjugate vaccines that do not reduce the overall prevalence of pneumococci in the nasopharynx dramatically impact the incidence of acute and complex otitis media.",2018-05-22 +30473748,Distinct lung cancer subtypes associate to distinct drivers of tumor progression.,"The main non-small-cell lung cancer (NSCLC) histopathological subtypes are lung adenocarcinomas (LUAD) and lung squamous cell carcinomas (LUSC). To identify candidate progression determinants of NSCLC subtypes, we explored the transcriptomic signatures of LUAD versus LUSC. We then investigated the prognostic impact of the identified tumor-associated determinants. This was done utilizing DNA microarray data from 2,437 NSCLC patients. An independent analysis of a case series of 994 NSCLC was conducted by next-generation sequencing, together with gene expression profiling from GEO (https://www.ncbi.nlm.nih.gov/geo/). This work led us to identify 69 distinct tumor prognostic determinants, which impact on LUAD or LUSC clinical outcome. These included key drivers of tumor growth and cell cycle, transcription factors and metabolic determinants. Such disease determinants appeared vastly different in LUAD versus LUSC, and often had opposite impact on clinical outcome. These findings indicate that distinct tumor progression pathways are at work in the two NSCLC subtypes. Notably, most prognostic determinants would go inappropriately assessed or even undetected when globally investigating unselected NSCLC. Hence, differential consideration for NSCLC subtypes should be taken into account in current clinical evaluation procedures for lung cancer.",2018-10-30 +26154165,From Peer-Reviewed to Peer-Reproduced in Scholarly Publishing: The Complementary Roles of Data Models and Workflows in Bioinformatics.,"

Motivation

Reproducing the results from a scientific paper can be challenging due to the absence of data and the computational tools required for their analysis. In addition, details relating to the procedures used to obtain the published results can be difficult to discern due to the use of natural language when reporting how experiments have been performed. The Investigation/Study/Assay (ISA), Nanopublications (NP), and Research Objects (RO) models are conceptual data modelling frameworks that can structure such information from scientific papers. Computational workflow platforms can also be used to reproduce analyses of data in a principled manner. We assessed the extent by which ISA, NP, and RO models, together with the Galaxy workflow system, can capture the experimental processes and reproduce the findings of a previously published paper reporting on the development of SOAPdenovo2, a de novo genome assembler.

Results

Executable workflows were developed using Galaxy, which reproduced results that were consistent with the published findings. A structured representation of the information in the SOAPdenovo2 paper was produced by combining the use of ISA, NP, and RO models. By structuring the information in the published paper using these data and scientific workflow modelling frameworks, it was possible to explicitly declare elements of experimental design, variables, and findings. The models served as guides in the curation of scientific information and this led to the identification of inconsistencies in the original published paper, thereby allowing its authors to publish corrections in the form of an errata.

Availability

SOAPdenovo2 scripts, data, and results are available through the GigaScience Database: http://dx.doi.org/10.5524/100044; the workflows are available from GigaGalaxy: http://galaxy.cbiit.cuhk.edu.hk; and the representations using the ISA, NP, and RO models are available through the SOAPdenovo2 case study website http://isa-tools.github.io/soapdenovo2/.

Contact

philippe.rocca-serra@oerc.ox.ac.uk and susanna-assunta.sansone@oerc.ox.ac.uk.",2015-07-08 +30887928,VaxiJen Dataset of Bacterial Immunogens: An Update.,"BACKGROUND:Identifying immunogenic proteins is the first stage in vaccine design and development. VaxiJen is the most widely used and highly cited server for immunogenicity prediction. As the developers of VaxiJen, we are obliged to update and improve it regularly. Here, we present an updated dataset of bacterial immunogens containing 317 experimentally proven immunogenic proteins of bacterial origin, of which 60% have been reported during the last 10 years. METHODS:PubMed was searched for papers containing data for novel immunogenic proteins tested on humans till March 2017. Corresponding protein sequences were collected from NCBI and UniProtKB. The set was curated manually for multiple protein fragments, isoforms, and duplicates. RESULTS:The final curated dataset consists of 306 immunogenic proteins tested on humans derived from 47 bacterial microorganisms. Certain proteins have several isoforms. All were considered, and the total protein sequences in the set are 317. The updated set contains 206 new immunogens, compared to the previous VaxiJen bacterial dataset. The average number of immunogens per species is 6.7. The set also contains 12 fusion proteins and 41 peptide fragments and epitopes. The dataset includes the names of bacterial microorganisms, protein names, and protein sequences in FASTA format. CONCLUSION:Currently, the updated VaxiJen bacterial dataset is the best known manually-curated compilation of bacterial immunogens. It is freely available at http://www.ddg-pharmfac.net/vaxi jen/dataset. It can easily be downloaded, searched, and processed. When combined with an appropriate negative dataset, this update could also serve as a training set, allowing enhanced prediction of the potential immunogenicity of unknown protein sequences.",2019-01-01 +30617801,CalCleaveMKL: a Tool for Calpain Cleavage Prediction.,"Calpain, an intracellular Ca2+-dependent cysteine protease, is known to play a role in a wide range of metabolic pathways through limited proteolysis of its substrates. However, only a limited number of these substrates are currently known, with the exact mechanism of substrate recognition and cleavage by calpain still largely unknown.Current sequencing technologies have made it possible to compile large amounts of cleavage data and brought greater understanding of the underlying protein interactions. However, the practical impossibility of exhaustively retrieving substrate sequences through experimentation alone has created the need for efficient computational prediction methods. Such methods must be able to quickly mark substrate candidates and putative cleavage sites for further analysis. While many methods exist for both calpain and other types of proteolytic actions, the expected reliability of these methods depends heavily on the type and complexity of proteolytic action, as well as the availability of well-labeled experimental datasets, which both vary greatly across enzyme families.This chapter introduces CalCleaveMKL: a tool for calpain cleavage prediction based on multiple kernel learning, an extension to the classic support vector machine framework that is able to train complex models based on rich, heterogeneous feature sets, leading to significantly improved prediction quality. Along with its improved accuracy, the method used by CalCleaveMKL provided numerous insights on the respective importance of sequence-related features, such as solvent accessibility and secondary structure. It notably demonstrated there existed significant specificity differences across calpain subtypes, despite previous assumption to the contrary.An online implementation of this prediction tool is available at http://calpain.org .",2019-01-01 +26335531,Scientific workflow optimization for improved peptide and protein identification.,"

Background

Peptide-spectrum matching is a common step in most data processing workflows for mass spectrometry-based proteomics. Many algorithms and software packages, both free and commercial, have been developed to address this task. However, these algorithms typically require the user to select instrument- and sample-dependent parameters, such as mass measurement error tolerances and number of missed enzymatic cleavages. In order to select the best algorithm and parameter set for a particular dataset, in-depth knowledge about the data as well as the algorithms themselves is needed. Most researchers therefore tend to use default parameters, which are not necessarily optimal.

Results

We have applied a new optimization framework for the Taverna scientific workflow management system (http://ms-utils.org/Taverna_Optimization.pdf) to find the best combination of parameters for a given scientific workflow to perform peptide-spectrum matching. The optimizations themselves are non-trivial, as demonstrated by several phenomena that can be observed when allowing for larger mass measurement errors in sequence database searches. On-the-fly parameter optimization embedded in scientific workflow management systems enables experts and non-experts alike to extract the maximum amount of information from the data. The same workflows could be used for exploring the parameter space and compare algorithms, not only for peptide-spectrum matching, but also for other tasks, such as retention time prediction.

Conclusion

Using the optimization framework, we were able to learn about how the data was acquired as well as the explored algorithms. We observed a phenomenon identifying many ammonia-loss b-ion spectra as peptides with N-terminal pyroglutamate and a large precursor mass measurement error. These insights could only be gained with the extension of the common range for the mass measurement error tolerance parameters explored by the optimization framework.",2015-09-03 +26519469,JuncDB: an exon-exon junction database.,"Intron positions upon the mRNA transcript are sometimes remarkably conserved even across distantly related eukaryotic species. This has made the comparison of intron-exon architectures across orthologous transcripts a very useful tool for studying various evolutionary processes. Moreover, the wide range of functions associated with introns may confer biological meaning to evolutionary changes in gene architectures. Yet, there is currently no database that offers such comparative information. Here, we present JuncDB (http://juncdb.carmelab.huji.ac.il/), an exon-exon junction database dedicated to the comparison of architectures between orthologous transcripts. It covers nearly 40,000 sets of orthologous transcripts spanning 88 eukaryotic species. JuncDB offers a user-friendly interface, access to detailed information, instructive graphical displays of the comparative data and easy ways to download data to a local computer. In addition, JuncDB allows the analysis to be carried out either on specific genes, or at a genome-wide level for any selected group of species.",2015-10-30 +29992260,Accurate multiple alignment of distantly related genome sequences using filtered spaced word matches as anchor points.,"

Motivation

Most methods for pairwise and multiple genome alignment use fast local homology search tools to identify anchor points, i.e. high-scoring local alignments of the input sequences. Sequence segments between those anchor points are then aligned with slower, more sensitive methods. Finding suitable anchor points is therefore crucial for genome sequence comparison; speed and sensitivity of genome alignment depend on the underlying anchoring methods.

Results

In this article, we use filtered spaced word matches to generate anchor points for genome alignment. For a given binary pattern representing match and don't-care positions, we first search for spaced-word matches, i.e. ungapped local pairwise alignments with matching nucleotides at the match positions of the pattern and possible mismatches at the don't-care positions. Those spaced-word matches that have similarity scores above some threshold value are then extended using a standard X-drop algorithm; the resulting local alignments are used as anchor points. To evaluate this approach, we used the popular multiple-genome-alignment pipeline Mugsy and replaced the exact word matches that Mugsy uses as anchor points with our spaced-word-based anchor points. For closely related genome sequences, the two anchoring procedures lead to multiple alignments of similar quality. For distantly related genomes, however, alignments calculated with our filtered-spaced-word matches are superior to alignments produced with the original Mugsy program where exact word matches are used to find anchor points.

Availability and implementation

http://spacedanchor.gobics.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-01-01 +30203078,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients With Thoracolumbar Spine Trauma: Prophylaxis and Treatment of Thromboembolic Events.,"

Question 1

Does routine screening for deep venous thrombosis prevent pulmonary embolism (or venous thromboembolism (VTE)-associated morbidity and mortality) in patients with thoracic and lumbar fractures?

Recommendation 1

There is insufficient evidence to recommend for or against routine screening for deep venous thrombosis in preventing pulmonary embolism (or VTE-associated morbidity and mortality) in patients with thoracic and lumbar fractures. Strength of Recommendation: Grade Insufficient.

Question 2

For patients with thoracic and lumbar fractures, is one regimen of VTE prophylaxis superior to others with respect to prevention of pulmonary embolism (or VTE-associated morbidity and mortality)?

Recommendation 2

There is insufficient evidence to recommend a specific regimen of VTE prophylaxis to prevent pulmonary embolism (or VTE-associated morbidity and mortality) in patients with thoracic and lumbar fractures. Strength of Recommendation: Grade Insufficient.

Question 3

Is there a specific treatment regimen for documented VTE that provides fewer complications than other treatments in patients with thoracic and lumbar fractures?

Recommendation 3

There is insufficient evidence to recommend for or against a specific treatment regimen for documented VTE that would provide fewer complications than other treatments in patients with thoracic and lumbar fractures. Strength of Recommendation: Grade Insufficient.

Recommendation 4

Based on published data from pooled (cervical and thoracolumbar) spinal cord injury populations, the use of thromboprophylaxis is recommended to reduce the risk of VTE events in patients with thoracic and lumbar fractures. Consensus Statement by the Workgroup The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_7.",2019-01-01 +26276519,Relative meaning frequencies for 578 homonyms in two Spanish dialects: A cross-linguistic extension of the English eDom norms.,"Relative meaning frequency is a critical factor to consider in studies of semantic ambiguity. In this work, we examined how this measure may change across the European and Rioplatense dialects of Spanish, as well as how the overall distributional properties differ between Spanish and English, using a computer-assisted norming approach based on dictionary definitions (Armstrong, Tokowicz, & Plaut, 2012). The results showed that the two dialects differ considerably in terms of the relative meaning frequencies of their constituent homonyms, and that the overall distributions of relative frequencies vary considerably across languages, as well. These results highlight the need for localized norms to design powerful studies of semantic ambiguity and suggest that dialectal differences may be responsible for some discrepant effects related to homonymy. In quantifying the reliability of the norms, we also established that as few as seven ratings are needed to converge on a highly stable set of ratings. This approach is therefore a very practical means of acquiring essential data in studies of semantic ambiguity, relative to past approaches, such as those based on the classification of free associates. The norms also present new possibilities for studying semantic ambiguity effects within and between populations who speak one or more languages. The norms and associated software are available for download at http://edom.cnbc.cmu.edu/ or http://www.bcbl.eu/databases/edom/ .",2016-09-01 +27530203,"Carboxylic ester hydrolases: Classification and database derived from their primary, secondary, and tertiary structures.","We classified the carboxylic ester hydrolases (CEHs) into families and clans by use of multiple sequence alignments, secondary structure analysis, and tertiary structure superpositions. Our work for the first time has fully established their systematic structural classification. Family members have similar primary, secondary, and tertiary structures, and their active sites and reaction mechanisms are conserved. Families may be gathered into clans by their having similar secondary and tertiary structures, even though primary structures of members of different families are not similar. CEHs were gathered from public databases by use of Basic Local Alignment Search Tool (BLAST) and divided into 91 families, with 36 families being grouped into five clans. Members of one clan have standard α/β-hydrolase folds, while those of other two clans have similar folds but with different sequences of their β-strands. The other two clans have members with six-bladed β-propeller and three-α-helix bundle tertiary structures. Those families not in clans have a large variety of structures or have no members with known structures. At the time of writing, the 91 families contained 321,830 primary structures and 1378 tertiary structures. From these data, we constructed an accessible database: CASTLE (CArboxylic eSTer hydroLasEs, http://www.castle.cbe.iastate.edu).",2016-08-31 +24363285,Plant Genome DataBase Japan (PGDBj): a portal website for the integration of plant genome-related databases.,"The Plant Genome DataBase Japan (PGDBj, http://pgdbj.jp/?ln=en) is a portal website that aims to integrate plant genome-related information from databases (DBs) and the literature. The PGDBj is comprised of three component DBs and a cross-search engine, which provides a seamless search over the contents of the DBs. The three DBs are as follows. (i) The Ortholog DB, providing gene cluster information based on the amino acid sequence similarity. Over 500,000 amino acid sequences of 20 Viridiplantae species were subjected to reciprocal BLAST searches and clustered. Sequences from plant genome DBs (e.g. TAIR10 and RAP-DB) were also included in the cluster with a direct link to the original DB. (ii) The Plant Resource DB, integrating the SABRE DB, which provides cDNA and genome sequence resources accumulated and maintained in the RIKEN BioResource Center and National BioResource Projects. (iii) The DNA Marker DB, providing manually or automatically curated information of DNA markers, quantitative trait loci and related linkage maps, from the literature and external DBs. As the PGDBj targets various plant species, including model plants, algae, and crops important as food, fodder and biofuel, researchers in the field of basic biology as well as a wide range of agronomic fields are encouraged to perform searches using DNA sequences, gene names, traits and phenotypes of interest. The PGDBj will return the search results from the component DBs and various types of linked external DBs.",2013-12-19 +30537646,Transition of cancer in populations in India.,"

Background & objectives

An assessment of transition of cancer in India during the past 30 years, according to changes in demographic and epidemiologic risk factors was undertaken.

Materials & methods

Cancer registry data (http://www.ncdirindia.org), (population coverage <10%), was compared with transition in life-expectancy and prevalence on smoking, alcohol and obesity. We fitted linear regression to the natural logarithm of the estimated incidence rates of various cancer registries in India.

Results

Burden of cancer in India increased from 0.6 million in 1991 to 1.4 million in 2015. Among males, common cancers are lung (12.0%), mouth (11.4%), prostate (7.0%), and tongue (7.0%) and among females, they are breast (21.0%), cervix-uteri (12.1%), ovary (6.9%), and lung (4.9%) in 2012. Increased life-expectancy and population growth as well as increased use of alcohol and increased prevalence of overweight/obesity reflected an increase in all cancers in both genders except a reduction in infection-related cancers such as cervix-uteri and tobacco-related cancers such as pharynx (excludes nasopharynx) and oesophagus.

Interpretation & conclusion

Transition in demographics and epidemiologic risk factors, reflected an increase in all cancers in both genders except a reduction in a few cancers. The increasing incidence of cancer and its associated factors demands a planned approach to reduce its burden. The burden assessment needs to be strengthened by increasing the population coverage of cancer registries. Continued effort for tobacco prevention and public health efforts for reducing obesity and alcohol consumption are needed to reduce the cancer burden.",2018-12-08 +28525568,SCENERY: a web application for (causal) network reconstruction from cytometry data.,"Flow and mass cytometry technologies can probe proteins as biological markers in thousands of individual cells simultaneously, providing unprecedented opportunities for reconstructing networks of protein interactions through machine learning algorithms. The network reconstruction (NR) problem has been well-studied by the machine learning community. However, the potentials of available methods remain largely unknown to the cytometry community, mainly due to their intrinsic complexity and the lack of comprehensive, powerful and easy-to-use NR software implementations specific for cytometry data. To bridge this gap, we present Single CEll NEtwork Reconstruction sYstem (SCENERY), a web server featuring several standard and advanced cytometry data analysis methods coupled with NR algorithms in a user-friendly, on-line environment. In SCENERY, users may upload their data and set their own study design. The server offers several data analysis options categorized into three classes of methods: data (pre)processing, statistical analysis and NR. The server also provides interactive visualization and download of results as ready-to-publish images or multimedia reports. Its core is modular and based on the widely-used and robust R platform allowing power users to extend its functionalities by submitting their own NR methods. SCENERY is available at scenery.csd.uoc.gr or http://mensxmachina.org/en/software/.",2017-07-01 +21592376,Mapping randomized controlled trials of treatments for eczema--the GREAT database (the Global Resource of EczemA Trials: a collection of key data on randomized controlled trials of treatments for eczema from 2000 to 2010).,"

Background

Massive duplication of effort occurs when researchers all over the world undertake extensive searches for randomized controlled trials when preparing systematic reviews, when developing evidence-based guidelines and when applying for research funding for eczema treatments. Such duplication wastes valuable resources.Searching for randomized controlled trials of eczema is a laborious task involving scrutiny of thousands of individual references from diverse electronic databases in order to obtain a few papers of interest. Clinicians and patients who wish to find out more about a particular treatment are at risk of missing the relevant evidence if they are not trained in electronic bibliographic searching. Systematic reviews cannot be relied upon to comprehensively inform current optimal eczema treatments due to incomplete coverage and because many may be out of date.An international, publically available and comprehensive resource which brings together all randomized controlled trials on eczema treatment using a highly sensitive search has the potential to release more filtered knowledge about patient care to those who need it most and to significantly shorten the duration and costs of many clinical eczema research and guideline projects.

Description

The Global Resource of EczemA Trials brings together information on all randomized controlled trials of eczema treatments published from the beginning of 2000 up to the end of 2010 and will be updated every month.We searched the Cochrane Central Register of Controlled Trials in The Cochrane Library and the Cochrane Skin Group Specialised Register, MEDLINE, EMBASE, LILACS, AMED and CINHAL databases. We included 268 RCTs (24th March 2011) covering over 70 different treatment interventions.The structure of the Global Resource of Eczema Trials allows the user as much, or as little, specificity when retrieving information on trials as they wish, in an easy to use format. For each trial, the database gives the citation for the published report and also provides enough information to enable a user to decide whether the trial is worth further scrutiny.

Conclusions

The Global Resource of Eczema Trials has been created to facilitate knowledge mobilization into healthcare and to reduce wastage of research time through unnecessary duplication. The collective time saved by research groups around the world can now be used to make strides in optimising the treatment of eczema, in order to further benefit people with eczema. The database can be accessed free of charge at http://www.greatdatabase.org.uk.",2011-05-18 +30662564,Artificial intelligence-based decision-making for age-related macular degeneration.,"Artificial intelligence (AI) based on convolutional neural networks (CNNs) has a great potential to enhance medical workflow and improve health care quality. Of particular interest is practical implementation of such AI-based software as a cloud-based tool aimed for telemedicine, the practice of providing medical care from a distance using electronic interfaces. Methods: In this study, we used a dataset of labeled 35,900 optical coherence tomography (OCT) images obtained from age-related macular degeneration (AMD) patients and used them to train three types of CNNs to perform AMD diagnosis. Results: Here, we present an AI- and cloud-based telemedicine interaction tool for diagnosis and proposed treatment of AMD. Through deep learning process based on the analysis of preprocessed optical coherence tomography (OCT) imaging data, our AI-based system achieved the same image discrimination rate as that of retinal specialists in our hospital. The AI platform's detection accuracy was generally higher than 90% and was significantly superior (p < 0.001) to that of medical students (69.4% and 68.9%) and equal (p = 0.99) to that of retinal specialists (92.73% and 91.90%). Furthermore, it provided appropriate treatment recommendations comparable to those of retinal specialists. Conclusions: We therefore developed a website for realistic cloud computing based on this AI platform, available at https://www.ym.edu.tw/~AI-OCT/. Patients can upload their OCT images to the website to verify whether they have AMD and require treatment. Using an AI-based cloud service represents a real solution for medical imaging diagnostics and telemedicine.",2019-01-01 +30093489,Protein-RNA interactions: structural characteristics and hotspot amino acids.,"Structural information about protein-RNA complexes supports the understanding of crucial recognition processes in the cell, and it can allow the development of high affinity ligands to interfere with these processes. In this respect, the identification of amino acid hotspots is particularly important. In contrast to protein-protein interactions, in silico approaches for protein-RNA interactions lag behind in their development. Herein, we report an analysis of available protein-RNA structures. We assembled a data set of 322 crystal and NMR structures and analyzed them regarding interface properties. In addition, we describe a computational alanine-scanning approach which provides interaction scores for interface amino acids, allowing the identification of potential hotspots in protein-RNA interfaces. We have made the computational approach available as an online tool, which allows interaction scores to be calculated for any structure of a protein-RNA complex by uploading atomic coordinates to the PRI HotScore web server (https://pri-hotscore.labs.vu.nl).",2018-08-09 +24344970,CVDHD: a cardiovascular disease herbal database for drug discovery and network pharmacology.,"

Background

Cardiovascular disease (CVD) is the leading cause of death and associates with multiple risk factors. Herb medicines have been used to treat CVD long ago in china and several natural products or derivatives (e.g., aspirin and reserpine) are most common drugs all over the world. The objective of this work was to construct a systematic database for drug discovery based on natural products separated from CVD-related medicinal herbs and to research on action mechanism of herb medicines.

Description

The cardiovascular disease herbal database (CVDHD) was designed to be a comprehensive resource for virtual screening and drug discovery from natural products isolated from medicinal herbs for cardiovascular-related diseases. CVDHD comprises 35230 distinct molecules and their identification information (chemical name, CAS registry number, molecular formula, molecular weight, international chemical identifier (InChI) and SMILES), calculated molecular properties (AlogP, number of hydrogen bond acceptor and donors, etc.), docking results between all molecules and 2395 target proteins, cardiovascular-related diseases, pathways and clinical biomarkers. All 3D structures were optimized in the MMFF94 force field and can be freely accessed.

Conclusions

CVDHD integrated medicinal herbs, natural products, CVD-related target proteins, docking results, diseases and clinical biomarkers. By using the methods of virtual screening and network pharmacology, CVDHD will provide a platform to streamline drug/lead discovery from natural products and explore the action mechanism of medicinal herbs. CVDHD is freely available at http://pkuxxj.pku.edu.cn/CVDHD.",2013-12-18 +29751087,Population genetic and evolution analysis of controversial genus Edwardsiella by multilocus sequence typing.,"At present, the genus Edwardsiella compiles five species: E. tarda, E. hoshinae, E. ictaluri, E. piscicida and E. anguillarum. Some species of this genus such us E. ictaluri and E. piscicida are important pathogens of numerous fish species. With the description of the two latter species, the phylogeny of Edwardsiella became more complicated. With the aim to clarify the relationships among all species in the genus, a multilocus sequence typing (MLST) approach was developed and applied to characterize 56 isolates and 6 reference strains belonging to the five Edwardsiella species. Moreover, several analyses based on the MLST scheme were performed to investigate the evolution within the genus, as well as the influence of recombination and mutation in the speciation. Edwardsiella isolates presented a high genetic variability reflected in the fourteen sequence types (ST) represented by a single isolates out of eighteen total ST. Mutation events were considerably more frequent than recombination, although both approximately equal influenced the genetic diversification. However, the speciation among species occurred mostly by recombination. Edwardsiella genus displays a non-clonal population structure with some degree of geographical isolation followed by a population expansion of E. piscicida. A database from this study was created and hosted on pubmlst.org (http://pubmlst.org/edwardsiella/).",2018-05-08 +29745830,MEGADOCK-Web: an integrated database of high-throughput structure-based protein-protein interaction predictions.,"BACKGROUND:Protein-protein interactions (PPIs) play several roles in living cells, and computational PPI prediction is a major focus of many researchers. The three-dimensional (3D) structure and binding surface are important for the design of PPI inhibitors. Therefore, rigid body protein-protein docking calculations for two protein structures are expected to allow elucidation of PPIs different from known complexes in terms of 3D structures because known PPI information is not explicitly required. We have developed rapid PPI prediction software based on protein-protein docking, called MEGADOCK. In order to fully utilize the benefits of computational PPI predictions, it is necessary to construct a comprehensive database to gather prediction results and their predicted 3D complex structures and to make them easily accessible. Although several databases exist that provide predicted PPIs, the previous databases do not contain a sufficient number of entries for the purpose of discovering novel PPIs. RESULTS:In this study, we constructed an integrated database of MEGADOCK PPI predictions, named MEGADOCK-Web. MEGADOCK-Web provides more than 10 times the number of PPI predictions than previous databases and enables users to conduct PPI predictions that cannot be found in conventional PPI prediction databases. In MEGADOCK-Web, there are 7528 protein chains and 28,331,628 predicted PPIs from all possible combinations of those proteins. Each protein structure is annotated with PDB ID, chain ID, UniProt AC, related KEGG pathway IDs, and known PPI pairs. Additionally, MEGADOCK-Web provides four powerful functions: 1) searching precalculated PPI predictions, 2) providing annotations for each predicted protein pair with an experimentally known PPI, 3) visualizing candidates that may interact with the query protein on biochemical pathways, and 4) visualizing predicted complex structures through a 3D molecular viewer. CONCLUSION:MEGADOCK-Web provides a huge amount of comprehensive PPI predictions based on docking calculations with biochemical pathways and enables users to easily and quickly assess PPI feasibilities by archiving PPI predictions. MEGADOCK-Web also promotes the discovery of new PPIs and protein functions and is freely available for use at http://www.bi.cs.titech.ac.jp/megadock-web/ .",2018-05-08 +29752607,"Farseer-NMR: automatic treatment, analysis and plotting of large, multi-variable NMR data.","We present Farseer-NMR ( https://git.io/vAueU ), a software package to treat, evaluate and combine NMR spectroscopic data from sets of protein-derived peaklists covering a range of experimental conditions. The combined advances in NMR and molecular biology enable the study of complex biomolecular systems such as flexible proteins or large multibody complexes, which display a strong and functionally relevant response to their environmental conditions, e.g. the presence of ligands, site-directed mutations, post translational modifications, molecular crowders or the chemical composition of the solution. These advances have created a growing need to analyse those systems' responses to multiple variables. The combined analysis of NMR peaklists from large and multivariable datasets has become a new bottleneck in the NMR analysis pipeline, whereby information-rich NMR-derived parameters have to be manually generated, which can be tedious, repetitive and prone to human error, or even unfeasible for very large datasets. There is a persistent gap in the development and distribution of software focused on peaklist treatment, analysis and representation, and specifically able to handle large multivariable datasets, which are becoming more commonplace. In this regard, Farseer-NMR aims to close this longstanding gap in the automated NMR user pipeline and, altogether, reduce the time burden of analysis of large sets of peaklists from days/weeks to seconds/minutes. We have implemented some of the most common, as well as new, routines for calculation of NMR parameters and several publication-quality plotting templates to improve NMR data representation. Farseer-NMR has been written entirely in Python and its modular code base enables facile extension.",2018-05-11 +29738769,Neutrophils infiltrating pancreatic ductal adenocarcinoma indicate higher malignancy and worse prognosis.,"CD177 is considered to represent neutrophils. We analyzed mRNA expression level of CD177 and clinical follow-up survey of PDAC to estimate overall survival (OS) from Gene Expression Omnibus (GEO) dataset (GSE21501, containing samples from 102 PDAC patients) by R2 platform (http://r2.amc.nl). We also analyzed correlated genes of CD177 by Gene Ontology (GO) and Kyoto Encyclopedia of Genes and Genomes (KEGG) analysis to predict the potential relationship between neutrophils and prognosis of PDAC. We then performed hematoxylin and eosin (H&E) staining and immunohistochemical staining of surgical specimens to verify infiltration of neutrophils in PDAC tissues. After analyzing mRNA expression data and clinical follow-up survey provided in the GEO dataset (GSE21501, containing samples from 102 PDAC patients) and clinicopathological data of 23 PDAC patients, we demonstrated that CD177 was correlated with poor prognosis. The univariate Kaplan-Meier survival analysis revealed that OS was inversely associated with increased expression of CD177 (P = 0.012). Expression of phosphodiesterase (PDE)4D was positively related to CD177 in gene correlation analysis (R = 0.413, P < 0.001) by R2 platform. H&E staining and immunohistochemistry of CD177 in 23 PDAC surgical samples showed accumulation of neutrophils in the stroma and blood vessels around the cancer cells. In addition, immunohistochemical staining showed that CD177 was highly expressed in the stroma and blood vessels around tumor tissues of PDAC, which was similar to H&E staining. Expression of CD177 can be used to represent infiltration of neutrophils, which may have potential prognostic value in PDAC.",2018-05-09 +29370280,CLC-Pred: A freely available web-service for in silico prediction of human cell line cytotoxicity for drug-like compounds.,"In silico methods of phenotypic screening are necessary to reduce the time and cost of the experimental in vivo screening of anticancer agents through dozens of millions of natural and synthetic chemical compounds. We used the previously developed PASS (Prediction of Activity Spectra for Substances) algorithm to create and validate the classification SAR models for predicting the cytotoxicity of chemicals against different types of human cell lines using ChEMBL experimental data. A training set from 59,882 structures of compounds was created based on the experimental data (IG50, IC50, and % inhibition values) from ChEMBL. The average accuracy of prediction (AUC) calculated by leave-one-out and a 20-fold cross-validation procedure during the training was 0.930 and 0.927 for 278 cancer cell lines, respectively, and 0.948 and 0.947 for cytotoxicity prediction for 27 normal cell lines, respectively. Using the given SAR models, we developed a freely available web-service for cell-line cytotoxicity profile prediction (CLC-Pred: Cell-Line Cytotoxicity Predictor) based on the following structural formula: http://way2drug.com/Cell-line/.",2018-01-25 +28724534,"m6aViewer: software for the detection, analysis, and visualization of N6-methyladenosine peaks from m6A-seq/ME-RIP sequencing data.","Recent methods for transcriptome-wide N6-methyladenosine (m6A) profiling have facilitated investigations into the RNA methylome and established m6A as a dynamic modification that has critical regulatory roles in gene expression and may play a role in human disease. However, bioinformatics resources available for the analysis of m6A sequencing data are still limited. Here, we describe m6aViewer-a cross-platform application for analysis and visualization of m6A peaks from sequencing data. m6aViewer implements a novel m6A peak-calling algorithm that identifies high-confidence methylated residues with more precision than previously described approaches. The application enables data analysis through a graphical user interface, and thus, in contrast to other currently available tools, does not require the user to be skilled in computer programming. m6aViewer and test data can be downloaded here: http://dna2.leeds.ac.uk/m6a.",2017-07-19 +26559506,WormExp: a web-based application for a Caenorhabditis elegans-specific gene expression enrichment analysis.,"

Motivation

A particular challenge of the current omics age is to make sense of the inferred differential expression of genes and proteins. The most common approach is to perform a gene ontology (GO) enrichment analysis, thereby relying on a database that has been extracted from a variety of organisms and that can therefore only yield reliable information on evolutionary conserved functions.

Results

We here present a web-based application for a taxon-specific gene set exploration and enrichment analysis, which is expected to yield novel functional insights into newly determined gene sets. The approach is based on the complete collection of curated high-throughput gene expression data sets for the model nematode Caenorhabditis elegans, including 1786 gene sets from more than 350 studies.

Availability and implementation

WormExp is available at http://wormexp.zoologie.uni-kiel.de

Contacts

hschulenburg@zoologie.uni-kiel.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-11 +30073954,Correlates of the Built Environment and Active Travel: Evidence from 20 US Metropolitan Areas.,"

Background

Walking and bicycling are health-promoting and environmentally friendly alternatives to the automobile. Previous studies that explore correlates of active travel and the built environment are for a single metropolitan statistical area (MSA) and results often vary among MSAs.

Objectives

Our goal was to model the relationship between the built environment and active travel for 20 MSAs spanning the continental United States.

Methods

We sourced and processed pedestrian and bicycle traffic counts for 20 U.S. MSAs (n=4,593 count locations), with 1–17 y of data available for each count location and the earliest and latest years of data collection being 1999 and 2016, respectively. Then, we tabulated land use, transport, and sociodemographic variables at 12 buffer sizes (100–3,000 m) for each count location. We employed stepwise linear regression to develop predictive models for morning and afternoon peak-period bicycle and pedestrian traffic volumes.

Results

Built environment features were significant predictors of active travel across all models. Areas with easy access to water and green space, high concentration of jobs, and high rates of active commuting were associated with higher bicycle and pedestrian volumes. Bicycle facilities (e.g., bike lanes, shared lane markings, off-street trails) were correlated with higher bicycle volumes. All models demonstrated reasonable goodness-of-fit for both bicyclists (adj-R2: 0.46–0.61) and pedestrians (adj-R2: 0.42–0.72). Cross-validation results showed that the afternoon peak-period models were more reliable than morning models.

Conclusions

To our knowledge, this is the first study to model multi-city trends in bicycling and walking traffic volumes with the goal of developing generalized estimates of the impact of the built environment on active travel. Our models could be used for exposure assessment (e.g., crashes, air pollution) to inform design of health-promoting cities. https://doi.org/10.1289/EHP3389.",2018-07-30 +30291585,Inter-Laboratory Characterization of the Velocity Field in the FDA Blood Pump Model Using Particle Image Velocimetry (PIV).,"

Purpose

A credible computational fluid dynamics (CFD) model can play a meaningful role in evaluating the safety and performance of medical devices. A key step towards establishing model credibility is to first validate CFD models with benchmark experimental datasets to minimize model-form errors before applying the credibility assessment process to more complex medical devices. However, validation studies to establish benchmark datasets can be cost prohibitive and difficult to perform. The goal of this initiative sponsored by the U.S. Food and Drug Administration is to generate validation data for a simplified centrifugal pump that mimics blood flow characteristics commonly observed in ventricular assist devices.

Methods

The centrifugal blood pump model was made from clear acrylic and included an impeller, with four equally spaced, straight blades, supported by mechanical bearings. Particle Image Velocimetry (PIV) measurements were performed at several locations throughout the pump by three independent laboratories. A standard protocol was developed for the experiments to ensure that the flow conditions were comparable and to minimize systematic errors during PIV image acquisition and processing. Velocity fields were extracted at the pump entrance, blade passage area, back gap region, and at the outlet diffuser regions. A Newtonian blood analog fluid composed of sodium iodide, glycerin, and water was used as the working fluid. Velocity measurements were made for six different pump flow conditions, with the blood-equivalent flow rate ranging between 2.5 and 7 L/min for pump speeds of 2500 and 3500 rpm.

Results

Mean intra- and inter-laboratory variabilities in velocity were ~ 10% at the majority of the measurement locations inside the pump. However, the inter-laboratory variability increased to more than ~ 30% in the exit diffuser region. The variability between the three laboratories for the peak velocity magnitude in the diffuser region ranged from 5 to 25%. The bulk velocity field near the impeller changed proportionally with the rotational speed but was relatively unaffected by the pump flow rate. In contrast, flow in the exit diffuser region was sensitive to both the flow rate and the rotational speed. Specifically, at 3500 rpm, the exit jet tilted toward the inner wall of the diffuser at a flow rate of 2.5 L/min, but the jet tilted towards the outer wall when the flow rate was 7 L/min.

Conclusions

Inter-laboratory experimental mean velocity data (and the corresponding variance) were obtained for the FDA pump model and are available for download at https://nciphub.org/wiki/FDA_CFD . Experimental datasets from the inter-laboratory characterization of benchmark flow models, including the blood pump model presented herein and our previous nozzle model, can be used for validating future CFD studies and to collaboratively develop guidelines on best practices for verification, validation, uncertainty quantification, and credibility assessment of CFD simulations in the evaluation of medical devices (e.g. ASME V&V 40 standards working group).",2018-10-05 +29853795,Prevalence and Possible Role of Candida Species in Patients with Psoriasis: A Systematic Review and Meta-Analysis.,"Although fungal colonization is implicated in the pathogenesis of psoriasis, its prevalence remains unclear. The aim of this systematic review and meta-analysis was to provide an overview on the prevalence of Candida species in patients with psoriasis. We searched databases (MEDLINE, EMBASE, Cochrane Central Register of Controlled Trials, and http://clinicaltrials.gov) to identify studies involving subjects of any age with an established diagnosis of psoriasis and healthy controls, who were tested for carriage of Candida spp. on the skin or mucosal membranes (or saliva and stool), or presented with clinical candidiasis with microbiologically confirmed etiology. We identified nine cross-sectional studies including a total of 1038 subjects with psoriasis (psoriatics) and 669 controls. We found Candida species detection rates for psoriatics were significantly higher than those in the controls, especially in the oral mucosa milieux. These results suggest psoriasis may be one of the systemic diseases that predispose to oral Candida spp. carriage and infection.",2018-05-06 +27720629,[Living kidney donation].,"

Objectives

To review ethical, legal and technical aspects of living kidney donor surgery.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords: Donor nephrectomy; Kidney paired donation; Kidney transplantation; Laparoscopic nephrectomy; Living donor; Organs trafficking; Robotic assisted nephrectomy; Vaginal extraction. French legal documents have been reviewed using the government portal (http://www.legifrance.gouv.fr). Articles were selected according to methods, language of publication and relevance. A total of 6421 articles were identified; after careful selection, 161 publications were considered of interest and were eligible for our review.

Results

The ethical debate focuses on organ shortage, financial incentive, organ trafficking and the recent data suggesting a small but significant increase risk for late renal disease in donor population. Legal decisions aim to increase the number of kidneys available for donation, such as kidney-paired donation that faces several obstacles in France. Laparoscopic approach became widely used, while robotic-assisted donor nephrectomy failed to demonstrate improved outcome as compared with other minimal invasive techniques.

Conclusion

Minimally invasive living donor nephrectomy aims to limit side effects in the donor without increasing the morbidity in this specific population of healthy persons; long term surveillance to prevent the onset of renal disease in mandatory.",2016-10-06 +29560830,QSurface: fast identification of surface expression markers in cancers.,"

Background

Cell surface proteins have provided useful targets and biomarkers for advanced cancer therapies. The recent clinical success of antibody-drug conjugates (ADCs) highlights the importance of finding selective surface antigens for given cancer subtypes. We thus attempted to develop stand-alone software for the analysis of the cell surface transcriptome of patient cancer samples and to prioritize lineage- and/or mutation-specific over-expression markers in cancer cells.

Results

A total of 519 genes were selected as surface proteins, and their expression was profiled in 14 cancer subtypes using patient sample transcriptome data. Lineage/mutation-oriented analysis was used to identify subtype-specific surface markers with statistical confidence. Experimental validation confirmed the unique over-expression of predicted surface markers (MUC4, MSLN, and SLC7A11) in lung cancer cells at the protein level. The differential cell surface gene expression of cell lines may differ from that of tissue samples due to the absence of the tumor microenvironment.

Conclusions

In the present study, advanced 3D models of lung cell lines successfully reproduced the predicted patterns, demonstrating the physiological relevance of cell line-based 3D models in validating surface markers from patient tumor data. Also QSurface software is freely available at http://compbio.sookmyung.ac.kr/~qsurface .",2018-03-19 +27490990,AVCpred: an integrated web server for prediction and design of antiviral compounds.,"Viral infections constantly jeopardize the global public health due to lack of effective antiviral therapeutics. Therefore, there is an imperative need to speed up the drug discovery process to identify novel and efficient drug candidates. In this study, we have developed quantitative structure-activity relationship (QSAR)-based models for predicting antiviral compounds (AVCs) against deadly viruses like human immunodeficiency virus (HIV), hepatitis C virus (HCV), hepatitis B virus (HBV), human herpesvirus (HHV) and 26 others using publicly available experimental data from the ChEMBL bioactivity database. Support vector machine (SVM) models achieved a maximum Pearson correlation coefficient of 0.72, 0.74, 0.66, 0.68, and 0.71 in regression mode and a maximum Matthew's correlation coefficient 0.91, 0.93, 0.70, 0.89, and 0.71, respectively, in classification mode during 10-fold cross-validation. Furthermore, similar performance was observed on the independent validation sets. We have integrated these models in the AVCpred web server, freely available at http://crdd.osdd.net/servers/avcpred. In addition, the datasets are provided in a searchable format. We hope this web server will assist researchers in the identification of potential antiviral agents. It would also save time and cost by prioritizing new drugs against viruses before their synthesis and experimental testing.",2016-09-09 +29018623,voomDDA: discovery of diagnostic biomarkers and classification of RNA-seq data.,"RNA-Seq is a recent and efficient technique that uses the capabilities of next-generation sequencing technology for characterizing and quantifying transcriptomes. One important task using gene-expression data is to identify a small subset of genes that can be used to build diagnostic classifiers particularly for cancer diseases. Microarray based classifiers are not directly applicable to RNA-Seq data due to its discrete nature. Overdispersion is another problem that requires careful modeling of mean and variance relationship of the RNA-Seq data. In this study, we present voomDDA classifiers: variance modeling at the observational level (voom) extensions of the nearest shrunken centroids (NSC) and the diagonal discriminant classifiers. VoomNSC is one of these classifiers and brings voom and NSC approaches together for the purpose of gene-expression based classification. For this purpose, we propose weighted statistics and put these weighted statistics into the NSC algorithm. The VoomNSC is a sparse classifier that models the mean-variance relationship using the voom method and incorporates voom's precision weights into the NSC classifier via weighted statistics. A comprehensive simulation study was designed and four real datasets are used for performance assessment. The overall results indicate that voomNSC performs as the sparsest classifier. It also provides the most accurate results together with power-transformed Poisson linear discriminant analysis, rlog transformed support vector machines and random forests algorithms. In addition to prediction purposes, the voomNSC classifier can be used to identify the potential diagnostic biomarkers for a condition of interest. Through this work, statistical learning methods proposed for microarrays can be reused for RNA-Seq data. An interactive web application is freely available at http://www.biosoft.hacettepe.edu.tr/voomDDA/.",2017-10-06 +29728050,ProLego: tool for extracting and visualizing topological modules in protein structures.,"

Background

In protein design, correct use of topology is among the initial and most critical feature. Meticulous selection of backbone topology aids in drastically reducing the structure search space. With ProLego, we present a server application to explore the component aspect of protein structures and provide an intuitive and efficient way to scan the protein topology space.

Result

We have implemented in-house developed ""topological representation"" in an automated-pipeline to extract protein topology from given protein structure. Using the topology string, ProLego, compares topology against a non-redundant extensive topology database (ProLegoDB) as well as extracts constituent topological modules. The platform offers interactive topology visualization graphs.

Conclusion

ProLego, provides an alternative but comprehensive way to scan and visualize protein topology along with an extensive database of protein topology. ProLego can be found at http://www.proteinlego.com.",2018-05-04 +26220682,SpirPro: A Spirulina proteome database and web-based tools for the analysis of protein-protein interactions at the metabolic level in Spirulina (Arthrospira) platensis C1.,"

Background

Spirulina (Arthrospira) platensis is the only cyanobacterium that in addition to being studied at the molecular level and subjected to gene manipulation, can also be mass cultivated in outdoor ponds for commercial use as a food supplement. Thus, encountering environmental changes, including temperature stresses, is common during the mass production of Spirulina. The use of cyanobacteria as an experimental platform, especially for photosynthetic gene manipulation in plants and bacteria, is becoming increasingly important. Understanding the mechanisms and protein-protein interaction networks that underlie low- and high-temperature responses is relevant to Spirulina mass production. To accomplish this goal, high-throughput techniques such as OMICs analyses are used. Thus, large datasets must be collected, managed and subjected to information extraction. Therefore, databases including (i) proteomic analysis and protein-protein interaction (PPI) data and (ii) domain/motif visualization tools are required for potential use in temperature response models for plant chloroplasts and photosynthetic bacteria.

Descriptions

A web-based repository was developed including an embedded database, SpirPro, and tools for network visualization. Proteome data were analyzed integrated with protein-protein interactions and/or metabolic pathways from KEGG. The repository provides various information, ranging from raw data (2D-gel images) to associated results, such as data from interaction and/or pathway analyses. This integration allows in silico analyses of protein-protein interactions affected at the metabolic level and, particularly, analyses of interactions between and within the affected metabolic pathways under temperature stresses for comparative proteomic analysis. The developed tool, which is coded in HTML with CSS/JavaScript and depicted in Scalable Vector Graphics (SVG), is designed for interactive analysis and exploration of the constructed network. SpirPro is publicly available on the web at http://spirpro.sbi.kmutt.ac.th .

Conclusions

SpirPro is an analysis platform containing an integrated proteome and PPI database that provides the most comprehensive data on this cyanobacterium at the systematic level. As an integrated database, SpirPro can be applied in various analyses, such as temperature stress response networking analysis in cyanobacterial models and interacting domain-domain analysis between proteins of interest.",2015-07-29 +26868054,CCSI: a database providing chromatin-chromatin spatial interaction information. ,"Distal regulatory elements have been shown to regulate gene transcription through spatial interactions, and single nucleotide polymorphisms (SNPs) are linked with distal gene expression by spatial proximity, which helps to explain the causal role of disease-associated SNPs in non-coding region. Therefore, studies on spatial interactions between chromatin have created a new avenue for elucidating the mechanism of transcriptional regulation in disease pathogenesis. Recently, a growing number of chromatin interactions have been revealed by means of 3C, 4C, 5C, ChIA-PET and Hi-C technologies. To interpret and utilize these interactions, we constructed chromatin-chromatin spatial interaction (CCSI) database by integrating and annotating 91 sets of chromatin interaction data derived from published literature, UCSC database and NCBI GEO database, resulting in a total of 3,017,962 pairwise interactions (false discovery rate < 0.05), covering human, mouse and yeast. A web interface has been designed to provide access to the chromatin interactions. The main features of CCSI are (i) showing chromatin interactions and corresponding genes, enhancers and SNPs within the regions in the search page; (ii) offering complete interaction datasets, enhancer and SNP information in the download page; and (iii) providing analysis pipeline for the annotation of interaction data. In conclusion, CCSI will facilitate exploring transcriptional regulatory mechanism in disease pathogenesis associated with spatial interactions among genes, regulatory regions and SNPs. Database URL: http://songyanglab.sysu.edu.cn/ccsi.",2016-02-11 +29297315,Utilizing random Forest QSAR models with optimized parameters for target identification and its application to target-fishing server.,"BACKGROUND:The identification of target molecules is important for understanding the mechanism of ""target deconvolution"" in phenotypic screening and ""polypharmacology"" of drugs. Because conventional methods of identifying targets require time and cost, in-silico target identification has been considered an alternative solution. One of the well-known in-silico methods of identifying targets involves structure activity relationships (SARs). SARs have advantages such as low computational cost and high feasibility; however, the data dependency in the SAR approach causes imbalance of active data and ambiguity of inactive data throughout targets. RESULTS:We developed a ligand-based virtual screening model comprising 1121 target SAR models built using a random forest algorithm. The performance of each target model was tested by employing the ROC curve and the mean score using an internal five-fold cross validation. Moreover, recall rates for top-k targets were calculated to assess the performance of target ranking. A benchmark model using an optimized sampling method and parameters was examined via external validation set. The result shows recall rates of 67.6% and 73.9% for top-11 (1% of the total targets) and top-33, respectively. We provide a website for users to search the top-k targets for query ligands available publicly at http://rfqsar.kaist.ac.kr . CONCLUSIONS:The target models that we built can be used for both predicting the activity of ligands toward each target and ranking candidate targets for a query ligand using a unified scoring scheme. The scores are additionally fitted to the probability so that users can estimate how likely a ligand-target interaction is active. The user interface of our web site is user friendly and intuitive, offering useful information and cross references.",2017-12-28 +27270715,Gene regulation knowledge commons: community action takes care of DNA binding transcription factors. ,"A large gap remains between the amount of knowledge in scientific literature and the fraction that gets curated into standardized databases, despite many curation initiatives. Yet the availability of comprehensive knowledge in databases is crucial for exploiting existing background knowledge, both for designing follow-up experiments and for interpreting new experimental data. Structured resources also underpin the computational integration and modeling of regulatory pathways, which further aids our understanding of regulatory dynamics. We argue how cooperation between the scientific community and professional curators can increase the capacity of capturing precise knowledge from literature. We demonstrate this with a project in which we mobilize biological domain experts who curate large amounts of DNA binding transcription factors, and show that they, although new to the field of curation, can make valuable contributions by harvesting reported knowledge from scientific papers. Such community curation can enhance the scientific epistemic process.Database URL: http://www.tfcheckpoint.org.",2016-06-05 +30065689,Utility of Alternative Effect Size Statistics and the Development of a Web-Based Calculator: Shiny-AESC.,"Alternative displays of effect size statistics can enhance the understandability and impact of validity evidence in a variety of applied settings. Arguably, the proliferation of alternative effect size statistics has been limited due to the lack of user-friendly tools to create them. Common statistical packages do not readily produce these alternative effect sizes and existing tools are outdated and inaccessible. In this paper, I introduce a free-to-use web-based calculator (https://dczhang.shinyapps.io/expectancyApp/) for generating alternative effect size displays from empirical data. This calculator requires no mathematical or programming expertise, and therefore, is ideal for academics and practitioners. I also present results from an empirical study that demonstrates the benefits of alternative effect size displays for enhancing lay people's perceived understandability of validity information and attitudes toward the use of standardized testing for college admissions.",2018-07-17 +29290427,Diagnosing pregnancy status using infrared spectra and milk composition in dairy cows.,"Data on Holstein (16,890), Brown Swiss (31,441), Simmental (25,845), and Alpine Grey (12,535) cows reared in northeastern Italy were used to assess the ability of milk components (fat, protein, casein, and lactose) and Fourier transform infrared (FTIR) spectral data to diagnose pregnancy. Pregnancy status was defined as whether a pregnancy was confirmed by a subsequent calving and no other subsequent inseminations within 90 d of the breeding of specific interest. Milk samples were analyzed for components and FTIR full-spectrum data using a MilkoScan FT+ 6000 (Foss Electric, Hillerød, Denmark). The spectrum covered 1,060 wavenumbers (wn) from 5,010 to 925 cm-1. Pregnancy status was predicted using generalized linear models with fat, protein, lactose, casein, and individual FTIR spectral bands or wavelengths as predictors. We also fitted a generalized linear model as a simultaneous function of all wavelengths (1,060 wn) with a Bayesian variable selection model using the BGLR R-package (https://r-forge.r-project.org/projects/bglr/). Prediction accuracy was determined using the area under a receiver operating characteristic curve based on a 10-fold cross-validation (CV-AUC) assessment based on sensitivities and specificities of phenotypic predictions. Overall, the best prediction accuracies were obtained for the model that included the complete FTIR spectral data. We observed similar patterns across breeds with small differences in prediction accuracy. The highest CV-AUC value was obtained for Alpine Grey cows (CV-AUC = 0.645), whereas Brown Swiss and Simmental cows had similar performance (CV-AUC = 0.630 and 0.628, respectively), followed by Holsteins (CV-AUC = 0.607). For single-wavelength analyses, important peaks were detected at wn 2,973 to 2,872 cm-1 where Fat-B (C-H stretch) is usually filtered, wn 1,773 cm-1 where Fat-A (C=O stretch) is filtered, wn 1,546 cm-1 where protein is filtered, wn 1,468 cm-1 associated with urea and fat, wn 1,399 and 1,245 cm-1 associated with acetone, and wn 1,025 to 1,013 cm-1 where lactose is filtered. In conclusion, this research provides new insight into alternative strategies for pregnancy screening of dairy cows.",2017-12-28 +29730132,"A billion cups: The diversity, traditional uses, safety issues and potential of Chinese herbal teas.","

Ethnopharmacological relevance

Herbal teas have long been consumed by Chinese people for preventive and/or therapeutic healthcare. Although herbal teas are widely consumed by many cultural groups in different regions of China, no thorough review has been undertaken to assess the diversity of the country's herbal tea usage. This literature review, complemented by a quantitative survey in an important tea market in Kunming, begins to fill this knowledge gap.

Aims of the study

The study aims to summarize the current knowledge of plant species used as herbal teas by different cultural groups in different regions of China, with a focus on the teas' perceived traditional healthcare functions, related phytochemical/pharmaceutical research, and safety issues.

Materials and methods

The study involved a comprehensive literature review and a market survey. The literature review was based on published ethnobotanical studies of herbal teas in China. We searched the Web of Science™, ELSEVIER, the China National Knowledge Infrastructure (CNKI) and the China Science and Technology Journal Database to locate relevant studies (including journal articles, Masters/PhD dissertations and books) that were published before March 2017. A species list was compiled based on the review and supplemented with information retrieved from the Scifinder database (https://scifinder.cas.org) and the Chinese Pharmacopoeia (2010). A Use Value Index was employed for ranking the most cited species. Based on the 29 most cited species, we discussed the current research status in relation to healthcare benefits and safety concerns of herbal teas in China. To better understand the current status of the herbal tea market in China, we also surveyed 136 tea vendors at the Xiongda Tea Market in Kunming. Information gathered from the survey included the species sold, the sale prices and the form of the herbal tea product.

Results

The literature identified 759 plant species used as herbal tea in China and the market survey identified an additional 23 species. Most of the species used were from the Leguminosae, Compositae and Lamiaceae families. Twenty two provinces and fourteen ethnic minority groups have records on the consumption of herbal teas. Southern China uses up to 82% of the total species, and 211 out of 759 species are used by minority groups. Thirty categories of traditional healthcare functions are linked with herbal teas, with clearing away heat, relieving toxicity and suppressing cough being the most important functions. There is phytochemical/pharmaceutical evidence to support the claimed healthcare benefits of some Chinese herbal teas. Although Chinese herbal teas are generally safe to consume, overdoses of some herbal teas and some unapproved mixtures of species may cause health risks. Based on our market survey, the prices of most herbal teas range between 100 and 200 RMB (US$15-30) per kg.

Conclusions

A rich array of herbal tea species with various traditional healthcare functions have long been used in China, and as such there is a huge market potential for Chinese herbal teas. More pharmaceutical/phytochemical research is needed to assess a wide range of perceived healthcare benefits of Chinese herbal teas. Our research highlights the need to study herbal teas through an ethnopharmacological perspective and by employing a holistic approach, which requires greater consideration of traditional knowledge in the pharmacological research design. Product safety and sustainability issues should also be considered, so the traditional applications of herbal teas can be transformed to efficient health boosting functional products.",2018-05-03 +30298698,Stress and quality of life in parents of children with phenylketonuria.,"

Background

Phenylketonuria is a hereditary disease caused by the lack or deficiency of phenylalanine hydroxylase enzyme activity. Parents of children with phenylketonuria undergo significant stress during their childcare years. They are also responsible for controlling their children's dietary treatment and this may affect their quality of life. The purpose of this study was to investigate the relationship between stress and quality of life in parents of children with phenylketonuria.

Methods

The present cross-sectional study is a correlation-analytical research performed on parents of children with phenylketonuria in Kerman province in Iran in 2017. In total, 124 parents were chosen by consensus method. Parents completed the perceived stress scale and quality of life (SF36). SPSS software version 18 (https://www.presidion.com/software/ibm-spss-trial-downloads/) was used to analyze the data.

Results

Total scores of stress and quality of life were 28.81 ± 8.74 and 45.97 ± 21.22. There was a significant negative correlation between quality of life and perceived stress (p < 0.001 and ρ = -0.58).

Conclusion

Parents of children with phenylketonuria have moderate quality of life and relatively high stress. The medical community and authorities should take steps to improve the quality of life and reduce stress experienced by parents of children with phenylketonuria.",2018-10-08 +30871477,Predicting protein residue-residue contacts using random forests and deep networks.,"

Background

The ability to predict which pairs of amino acid residues in a protein are in contact with each other offers many advantages for various areas of research that focus on proteins. For example, contact prediction can be used to reduce the computational complexity of predicting the structure of proteins and even to help identify functionally important regions of proteins. These predictions are becoming especially important given the relatively low number of experimentally determined protein structures compared to the amount of available protein sequence data.

Results

Here we have developed and benchmarked a set of machine learning methods for performing residue-residue contact prediction, including random forests, direct-coupling analysis, support vector machines, and deep networks (stacked denoising autoencoders). These methods are able to predict contacting residue pairs given only the amino acid sequence of a protein. According to our own evaluations performed at a resolution of +/- two residues, the predictors we trained with the random forest algorithm were our top performing methods with average top 10 prediction accuracy scores of 85.13% (short range), 74.49% (medium range), and 54.49% (long range). Our ensemble models (stacked denoising autoencoders combined with support vector machines) were our best performing deep network predictors and achieved top 10 prediction accuracy scores of 75.51% (short range), 60.26% (medium range), and 43.85% (long range) using the same evaluation. These tests were blindly performed on targets from the CASP11 dataset; and the results suggested that our models achieved comparable performance to contact predictors developed by groups that participated in CASP11.

Conclusions

Due to the challenging nature of contact prediction, it is beneficial to develop and benchmark a variety of different prediction methods. Our work has produced useful tools with a simple interface that can provide contact predictions to users without requiring a lengthy installation process. In addition to this, we have released our C++ implementation of the direct-coupling analysis method as a standalone software package. Both this tool and our RFcon web server are freely available to the public at http://dna.cs.miami.edu/RFcon /.",2019-03-14 +27120770,ToxCast EPA in Vitro to in Vivo Challenge: Insight into the Rank-I Model.,"The ToxCast EPA challenge was managed by TopCoder in Spring 2014. The goal of the challenge was to develop a model to predict the lowest effect level (LEL) concentration based on in vitro measurements and calculated in silico descriptors. This article summarizes the computational steps used to develop the Rank-I model, which calculated the lowest prediction error for the secret test data set of the challenge. The model was developed using the publicly available Online CHEmical database and Modeling environment (OCHEM), and it is freely available at http://ochem.eu/article/68104 . Surprisingly, this model does not use any in vitro measurements. The logic of the decision steps used to develop the model and the reason to skip inclusion of in vitro measurements is described. We also show that inclusion of in vitro assays would not improve the accuracy of the model.",2016-04-27 +26590260,miRTarBase 2016: updates to the experimentally validated miRNA-target interactions database.,"MicroRNAs (miRNAs) are small non-coding RNAs of approximately 22 nucleotides, which negatively regulate the gene expression at the post-transcriptional level. This study describes an update of the miRTarBase (http://miRTarBase.mbc.nctu.edu.tw/) that provides information about experimentally validated miRNA-target interactions (MTIs). The latest update of the miRTarBase expanded it to identify systematically Argonaute-miRNA-RNA interactions from 138 crosslinking and immunoprecipitation sequencing (CLIP-seq) data sets that were generated by 21 independent studies. The database contains 4966 articles, 7439 strongly validated MTIs (using reporter assays or western blots) and 348 007 MTIs from CLIP-seq. The number of MTIs in the miRTarBase has increased around 7-fold since the 2014 miRTarBase update. The miRNA and gene expression profiles from The Cancer Genome Atlas (TCGA) are integrated to provide an effective overview of this exponential growth in the miRNA experimental data. These improvements make the miRTarBase one of the more comprehensively annotated, experimentally validated miRNA-target interactions databases and motivate additional miRNA research efforts.",2015-11-20 +27384129,mCSM-lig: quantifying the effects of mutations on protein-small molecule affinity in genetic disease and emergence of drug resistance.,"The ability to predict how a mutation affects ligand binding is an essential step in understanding, anticipating and improving the design of new treatments for drug resistance, and in understanding genetic diseases. Here we present mCSM-lig, a structure-guided computational approach for quantifying the effects of single-point missense mutations on affinities of small molecules for proteins. mCSM-lig uses graph-based signatures to represent the wild-type environment of mutations, and small-molecule chemical features and changes in protein stability as evidence to train a predictive model using a representative set of protein-ligand complexes from the Platinum database. We show our method provides a very good correlation with experimental data (up to ρ = 0.67) and is effective in predicting a range of chemotherapeutic, antiviral and antibiotic resistance mutations, providing useful insights for genotypic screening and to guide drug development. mCSM-lig also provides insights into understanding Mendelian disease mutations and as a tool for guiding protein design. mCSM-lig is freely available as a web server at http://structure.bioc.cam.ac.uk/mcsm_lig.",2016-07-07 +30530227,BetaDL: A protein beta-sheet predictor utilizing a deep learning model and independent set solution.,"The sequence-based prediction of beta-residue contacts and beta-sheet structures contain key information for protein structure prediction. However, the determination of beta-sheet structures poses numerous challenges due to long-range beta-residue interactions and the huge number of possible beta-sheet structures. Recently gaining attention has been the prediction of residue contacts based on deep learning models whose results have led to improvement in protein structure prediction. In addition, to reduce the computational complexity of determining beta-sheet structures, it has been suggested that this problem be transformed into graph-based solutions. Consequently, the current work proposes BetaDL, a combination of a deep learning and a graph-based beta-sheet structure predictor. BetaDL adopts deep learning models to capture beta-residue contacts and improve beta-sheet structure predictions. In addition, a graph-based approach is presented to model the beta-sheets conformational space and a new score function is introduced to evaluate beta-sheets. Furthermore, the present study demonstrates that the beta-sheet structure can be predicted within an acceptable computational time by the utilization of a heuristic maximum weight independent set solution. When compared to state-of-the-art methods, experimental results from BetaSheet916 and BetaSheet1452 datasets indicate that BetaDL improves the accuracy of beta-residue contact and beta-sheet structure prediction. Using BetaDL, beta-sheet structures are predicted with a 4% and 6% improvement in the F1-score at the residue and strand levels, respectively. BetaDL's source code and data are available at http://kerg.um.ac.ir/index.php/datasets/#BetaDL.",2018-12-02 +26441671,Databases for multilevel biophysiology research available at Physiome.jp.,"Physiome.jp (http://physiome.jp) is a portal site inaugurated in 2007 to support model-based research in physiome and systems biology. At Physiome.jp, several tools and databases are available to support construction of physiological, multi-hierarchical, large-scale models. There are three databases in Physiome.jp, housing mathematical models, morphological data, and time-series data. In late 2013, the site was fully renovated, and in May 2015, new functions were implemented to provide information infrastructure to support collaborative activities for developing models and performing simulations within the database framework. This article describes updates to the databases implemented since 2013, including cooperation among the three databases, interactive model browsing, user management, version management of models, management of parameter sets, and interoperability with applications.",2015-09-09 +29947757,"BMC3C: binning metagenomic contigs using codon usage, sequence composition and read coverage.","

Motivation

Metagenomics investigates the DNA sequences directly recovered from environmental samples. It often starts with reads assembly, which leads to contigs rather than more complete genomes. Therefore, contig binning methods are subsequently used to bin contigs into genome bins. While some clustering-based binning methods have been developed, they generally suffer from problems related to stability and robustness.

Results

We introduce BMC3C, an ensemble clustering-based method, to accurately and robustly bin contigs by making use of DNA sequence Composition, Coverage across multiple samples and Codon usage. BMC3C begins by searching the proper number of clusters and repeatedly applying the k-means clustering with different initializations to cluster contigs. Next, a weight graph with each node representing a contig is derived from these clusters. If two contigs are frequently grouped into the same cluster, the weight between them is high, and otherwise low. BMC3C finally employs a graph partitioning technique to partition the weight graph into subgraphs, each corresponding to a genome bin. We conduct experiments on both simulated and real-world datasets to evaluate BMC3C, and compare it with the state-of-the-art binning tools. We show that BMC3C has an improved performance compared to these tools. To our knowledge, this is the first time that the codon usage features and ensemble clustering are used in metagenomic contig binning.

Availability and implementation

The codes of BMC3C are available at http://mlda.swu.edu.cn/codes.php?name=BMC3C.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +28874883,xSyn: A Software Tool for Identifying Sophisticated 3-Way Interactions From Cancer Expression Data.,"

Background

Constructing gene co-expression networks from cancer expression data is important for investigating the genetic mechanisms underlying cancer. However, correlation coefficients or linear regression models are not able to model sophisticated relationships among gene expression profiles. Here, we address the 3-way interaction that 2 genes' expression levels are clustered in different space locations under the control of a third gene's expression levels.

Results

We present xSyn, a software tool for identifying such 3-way interactions from cancer gene expression data based on an optimization procedure involving the usage of UPGMA (Unweighted Pair Group Method with Arithmetic Mean) and synergy. The effectiveness is demonstrated by application to 2 real gene expression data sets.

Conclusions

xSyn is a useful tool for decoding the complex relationships among gene expression profiles. xSyn is available at http://www.bdxconsult.com/xSyn.html.",2017-08-28 +30215656,Metabolic deregulation in prostate cancer.,"

Introduction

The prostate exhibits a unique metabolism that changes during initial neoplasia to aggressive prostate cancer (PCa) and metastasis. The study of PCa metabolism thus represents a new avenue for diagnostics, particularly early diagnosis of aggressive PCa cases.

Results

Here, by clustering tissue transcriptomics data from The Cancer Genome Atlas (498 PCa patients), we identified six metabolic subgroups (C1-C6) of PCa that show distinct disease-free survival (DFS) outcomes (p < 0.0001). In particular, we identified at least two subgroups (C5 & C3) that exhibit significant poor prognosis (∼70% and 30-40% relapse within the first 72 months; hazards ratios of 9.4 and 4.4, respectively, relative to the best prognosis cluster C4 that showed <20% relapse even by 120 months). We were able to reproduce the subgroups in several independent datasets including B. S. Taylor et al. (2010) data; 215 patients; DFS p = 0.00088) using a multinomial regression classifier. The subgroups displayed distinct metabolic profiles vis-à-vis normal tissues, measured as 'deregulation' observed for 20 metabolic pathways (using Pathifier; Y. Drier and E. Domany, 2013). In particular, C5 and C3 showed considerable deregulation for pathways involved in synthesis and catabolism of complex forms of lipids and carbohydrates, and these were exhibited in parallel or in the face of glycolysis, a common form of energy production in cancer cells. The subgroups were significantly over-enriched for different sets of genetic alterations [BRCA1, MSH2, FOXA1, TP53 (C5), RB1 and STK11(C3); and AR (C1); p ≤ 8.6 × 10-4], suggesting that distinct sets of alterations underpinning the PCa subgroups that 'push' the subgroups towards their unique metabolic profiles. Finally, applying the classifier to blood protein expression profiles from 42 active surveillance (AS) and 65 advanced castrate resistant PCa (ACRPC) patients (D. Olmos et al., 2012) assigned 70.77% ACPRC and interestingly reassigned 59.52% AS patients to at least one of the poor prognosis subgroups with 35.71% to the metabolically active poor-prognosis subgroup C3.

Conclusion

The identification of PCa subgroups displaying distinct clinical outcomes solely from metabolic expression profiles of PCa tumours reiterates the significant link between deregulated metabolism and PCa outcomes (E. Eidelman et al., 2017). On the other hand, the time to biochemical relapse (rise in PSA levels) was not indicative of early relapse seen for subgroups C3 and C5 (these show considerably late BCR compared to C4). Our study thus highlights specific processes (elevated lipid and carbohydrate metabolism pathways) that could be better indicators than PSA for early diagnosis of aggressive PCa.

Availability

https://maxwellplus.com/research/metabolic-deregulation-in-prostate-cancer/.",2018-10-01 +25990738,"A low-latency, big database system and browser for storage, querying and visualization of 3D genomic data.","Recent releases of genome three-dimensional (3D) structures have the potential to transform our understanding of genomes. Nonetheless, the storage technology and visualization tools need to evolve to offer to the scientific community fast and convenient access to these data. We introduce simultaneously a database system to store and query 3D genomic data (3DBG), and a 3D genome browser to visualize and explore 3D genome structures (3DGB). We benchmark 3DBG against state-of-the-art systems and demonstrate that it is faster than previous solutions, and importantly gracefully scales with the size of data. We also illustrate the usefulness of our 3D genome Web browser to explore human genome structures. The 3D genome browser is available at http://3dgb.cs.mcgill.ca/.",2015-05-18 +28482075,Pathview Web: user friendly pathway visualization and data integration.,"Pathway analysis is widely used in omics studies. Pathway-based data integration and visualization is a critical component of the analysis. To address this need, we recently developed a novel R package called Pathview. Pathview maps, integrates and renders a large variety of biological data onto molecular pathway graphs. Here we developed the Pathview Web server, as to make pathway visualization and data integration accessible to all scientists, including those without the special computing skills or resources. Pathview Web features an intuitive graphical web interface and a user centered design. The server not only expands the core functions of Pathview, but also provides many useful features not available in the offline R package. Importantly, the server presents a comprehensive workflow for both regular and integrated pathway analysis of multiple omics data. In addition, the server also provides a RESTful API for programmatic access and conveniently integration in third-party software or workflows. Pathview Web is openly and freely accessible at https://pathview.uncc.edu/.",2017-07-01 +26251998,PhenomeCentral: a portal for phenotypic and genotypic matchmaking of patients with rare genetic diseases.,"The discovery of disease-causing mutations typically requires confirmation of the variant or gene in multiple unrelated individuals, and a large number of rare genetic diseases remain unsolved due to difficulty identifying second families. To enable the secure sharing of case records by clinicians and rare disease scientists, we have developed the PhenomeCentral portal (https://phenomecentral.org). Each record includes a phenotypic description and relevant genetic information (exome or candidate genes). PhenomeCentral identifies similar patients in the database based on semantic similarity between clinical features, automatically prioritized genes from whole-exome data, and candidate genes entered by the users, enabling both hypothesis-free and hypothesis-driven matchmaking. Users can then contact other submitters to follow up on promising matches. PhenomeCentral incorporates data for over 1,000 patients with rare genetic diseases, contributed by the FORGE and Care4Rare Canada projects, the US NIH Undiagnosed Diseases Program, the EU Neuromics and ANDDIrare projects, as well as numerous independent clinicians and scientists. Though the majority of these records have associated exome data, most lack a molecular diagnosis. PhenomeCentral has already been used to identify causative mutations for several patients, and its ability to find matching patients and diagnose these diseases will grow with each additional patient that is entered.",2015-08-31 +30555515,PlantEAR: Functional Analysis Platform for Plant EAR Motif-Containing Proteins.,"The Ethylene-responsive element binding factor-associated Amphiphilic Repression (EAR) motifs, which were initially identified in members of the Arabidopsis ethylene response factor (ERF) family, are transcriptional repression motifs in plants and are defined by the consensus sequence patterns of either LxLxL or DLNxxP. EAR motif-containing proteins can function as transcription repressors, thus interacting with co-repressors, such as TOPLESS and AtSAP18, affecting the structure of chromatin by histone modifications and thereby repressing gene transcription. EAR motif-containing proteins are highly conserved across diverse plant species and play important roles in hormone signal transduction, stress responses and development, but they have not been identified in most plants. In this study, we identified 20,542 EAR motif-containing proteins from 71 plant species based on a Hidden Markov Model and orthologous gene search, and then we constructed a functional analysis platform for plant EAR motif-containing proteins (PlantEAR, http://structuralbiology.cau.edu.cn/plantEAR) by integrating a variety of functional annotations and processed data. Several tools were provided as functional support for EAR motif-containing proteins, such as browse, search, co-expression and protein-protein interaction (PPI) network analysis as well as cis-element analysis and gene set enrichment analysis (GSEA). In addition, basing on the identified EAR motif-containing proteins, we also explored their distribution in various species and found that the numbers of EAR motif-containing proteins showed an increasing trend in evolution from algae to angiosperms.",2018-11-30 +23826978,"pico-PLAZA, a genome database of microbial photosynthetic eukaryotes.","With the advent of next generation genome sequencing, the number of sequenced algal genomes and transcriptomes is rapidly growing. Although a few genome portals exist to browse individual genome sequences, exploring complete genome information from multiple species for the analysis of user-defined sequences or gene lists remains a major challenge. pico-PLAZA is a web-based resource (http://bioinformatics.psb.ugent.be/pico-plaza/) for algal genomics that combines different data types with intuitive tools to explore genomic diversity, perform integrative evolutionary sequence analysis and study gene functions. Apart from homologous gene families, multiple sequence alignments, phylogenetic trees, Gene Ontology, InterPro and text-mining functional annotations, different interactive viewers are available to study genome organization using gene collinearity and synteny information. Different search functions, documentation pages, export functions and an extensive glossary are available to guide non-expert scientists. To illustrate the versatility of the platform, different case studies are presented demonstrating how pico-PLAZA can be used to functionally characterize large-scale EST/RNA-Seq data sets and to perform environmental genomics. Functional enrichments analysis of 16 Phaeodactylum tricornutum transcriptome libraries offers a molecular view on diatom adaptation to different environments of ecological relevance. Furthermore, we show how complementary genomic data sources can easily be combined to identify marker genes to study the diversity and distribution of algal species, for example in metagenomes, or to quantify intraspecific diversity from environmental strains.",2013-07-04 +26832193,Red blood cell PK deficiency: An update of PK-LR gene mutation database.,"Pyruvate kinase (PK) deficiency is known as being the most common cause of chronic nonspherocytic hemolytic anemia (CNSHA). Clinical PK deficiency is transmitted as an autosomal recessive trait, that can segregate neither in homozygous or in a compound heterozygous modality, respectively. Two PK genes are present in mammals: the pyruvate kinase liver and red blood cells (PK-LR) and the pyruvate kinase muscle (PK-M), of which only the first encodes for the isoenzymes normally expressed in the red blood cells (R-type) and in the liver (L-type). Several reports have been published describing a large variety of genetic defects in PK-LR gene associated to CNSHA. Herein, we present a review of about 250 published mutations and six polymorphisms in PK-LR gene with the corresponding clinical and molecular data. We consulted the PubMed website for searching mutations and papers, along with two main databases: the Leiden Open Variation Database (LOVD, https://grenada.lumc.nl/LOVD2/mendelian_genes/home.php?select_db=PKLR) and Human Gene Mutation Database (HGMD, http://www.hgmd.cf.ac.uk/ac/gene.php?gene=PKLR) for selecting, reviewing and listing the annotated PK-LR gene mutations present in literature. This paper is aimed to provide useful information to clinicians and laboratory professionals regarding overall reported PK-LR gene mutations, also giving the opportunity to harmonize data regarding PK-deficient individuals.",2016-01-12 +29072135,Detection and quantification of mitochondrial DNA deletions from next-generation sequence data.,"

Background

Chromosomal deletions represent an important class of human genetic variation. Various methods have been developed to mine ""next-generation"" sequencing (NGS) data to detect deletions and quantify their clonal abundances. These methods have focused almost exclusively on the nuclear genome, ignoring the mitochondrial chromosome (mtDNA). Detecting mtDNA deletions requires special care. First, the chromosome's relatively small size (16,569 bp) necessitates the ability to detect extremely focal events. Second, the chromosome can be present at thousands of copies in a single cell (in contrast to two copies of nuclear chromosomes), and mtDNA deletions may be present on only a very small percentage of chromosomes. Here we present a method, termed MitoDel, to detect mtDNA deletions from NGS data.

Results

We validate the method on simulated and real data, and show that MitoDel can detect novel and previously-reported mtDNA deletions. We establish that MitoDel can find deletions such as the ""common deletion"" at heteroplasmy levels well below 1%.

Conclusions

MitoDel is a tool for detecting large mitochondrial deletions at low heteroplasmy levels. The tool can be downloaded at http://mendel.gene.cwru.edu/laframboiselab/ .",2017-10-16 +27153654,rnaQUAST: a quality assessment tool for de novo transcriptome assemblies.,"

Unlabelled

Ability to generate large RNA-Seq datasets created a demand for both de novo and reference-based transcriptome assemblers. However, while many transcriptome assemblers are now available, there is still no unified quality assessment tool for RNA-Seq assemblies. We present rnaQUAST-a tool for evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database. rnaQUAST calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts, and outputs them in a user-friendly report.

Availability and implementation

rnaQUAST is implemented in Python and is freely available at http://bioinf.spbau.ru/en/rnaquast

Contact

ap@bioinf.spbau.ru

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-23 +25566534,ncPred: ncRNA-Disease Association Prediction through Tripartite Network-Based Inference.,"

Motivation

Over the past few years, experimental evidence has highlighted the role of microRNAs to human diseases. miRNAs are critical for the regulation of cellular processes, and, therefore, their aberration can be among the triggering causes of pathological phenomena. They are just one member of the large class of non-coding RNAs, which include transcribed ultra-conserved regions (T-UCRs), small nucleolar RNAs (snoRNAs), PIWI-interacting RNAs (piRNAs), large intergenic non-coding RNAs (lincRNAs) and, the heterogeneous group of long non-coding RNAs (lncRNAs). Their associations with diseases are few in number, and their reliability is questionable. In literature, there is only one recent method proposed by Yang et al. (2014) to predict lncRNA-disease associations. This technique, however, lacks in prediction quality. All these elements entail the need to investigate new bioinformatics tools for the prediction of high quality ncRNA-disease associations. Here, we propose a method called ncPred for the inference of novel ncRNA-disease association based on recommendation technique. We represent our knowledge through a tripartite network, whose nodes are ncRNAs, targets, or diseases. Interactions in such a network associate each ncRNA with a disease through its targets. Our algorithm, starting from such a network, computes weights between each ncRNA-disease pair using a multi-level resource transfer technique that at each step takes into account the resource transferred in the previous one.

Results

The results of our experimental analysis show that our approach is able to predict more biologically significant associations with respect to those obtained by Yang et al. (2014), yielding an improvement in terms of the average area under the ROC curve (AUC). These results prove the ability of our approach to predict biologically significant associations, which could lead to a better understanding of the molecular processes involved in complex diseases.

Availability

All the ncPred predictions together with the datasets used for the analysis are available at the following url: http://alpha.dmi.unict.it/ncPred/",2014-12-12 +29688376,GEOMetaCuration: a web-based application for accurate manual curation of Gene Expression Omnibus metadata. ,"Metadata curation has become increasingly important for biological discovery and biomedical research because a large amount of heterogeneous biological data is currently freely available. To facilitate efficient metadata curation, we developed an easy-to-use web-based curation application, GEOMetaCuration, for curating the metadata of Gene Expression Omnibus datasets. It can eliminate mechanical operations that consume precious curation time and can help coordinate curation efforts among multiple curators. It improves the curation process by introducing various features that are critical to metadata curation, such as a back-end curation management system and a curator-friendly front-end. The application is based on a commonly used web development framework of Python/Django and is open-sourced under the GNU General Public License V3. GEOMetaCuration is expected to benefit the biocuration community and to contribute to computational generation of biological insights using large-scale biological data. An example use case can be found at the demo website: http://geometacuration.yubiolab.org.Database URL: https://bitbucket.com/yubiolab/GEOMetaCuration",2018-01-01 +31427232,Gene expression and immunohistochemical analyses identify SOX2 as major risk factor for overall survival and relapse in Ewing sarcoma patients.,"

Background

Up to 30-40% of Ewing sarcoma (EwS) patients with non-metastatic disease develop local or metastatic relapse within a time span of 2-10 years. This is in part caused by the absence of prognostic biomarkers that can identify high-risk patients and thus assign them to risk-adapted monitoring and treatment regimens. Since cancer stemness has been associated with tumour relapse and poor patient outcomes, we investigated in the current study the prognostic potential SOX2 (sex determining region Y box 2) - a major transcription factor involved in development and stemness - which was previously described to contribute to the undifferentiated phenotype of EwS.

Methods

Two independent patient cohorts, one consisting of 189 retrospectively collected EwS tumours with corresponding mRNA expression data (test-cohort) and the other consisting of 141 prospectively collected formalin-fixed and paraffin-embedded resected tumours (validation and cohort), were employed to analyse SOX2 expression levels through DNA microarrays or immunohistochemistry, respectively, and to compare them with clinical parameters and patient outcomes. Two methods were employed to test the validity of the results at both the mRNA and protein levels.

Findings

Both cohorts showed that only a subset of EwS patients (16-20%) expressed high SOX2 mRNA or protein levels, which significantly correlated with poor overall survival. Multivariate analyses of our validation-cohort revealed that high SOX2 expression represents a major risk-factor for poor survival (HR = 3·19; 95%CI 1·74-5·84; p < 0·01) that is independent from metastasis and other known clinical risk-factors at the time of diagnosis. Univariate analyses demonstrated that SOX2-high expression was correlated with tumour relapse (p = 0·002). The median first relapse was at 14·7 months (range: 3·5-180·7).

Interpretation

High SOX2 expression constitutes an independent prognostic biomarker for EwS patients with poor outcomes. This may help to identify patients with localised disease who are at high risk for tumour relapse within the first two years after diagnosis.

Funding

The laboratory of T. G. P. Grünewald is supported by grants from the 'Verein zur Förderung von Wissenschaft und Forschung an der Medizinischen Fakultät der LMU München (WiFoMed)', by LMU Munich's Institutional Strategy LMUexcellent within the framework of the German Excellence Initiative, the 'Mehr LEBEN für krebskranke Kinder - Bettina-Bräu-Stiftung', the Walter Schulz Foundation, the Wilhelm Sander-Foundation (2016.167.1), the Friedrich-Baur foundation, the Matthias-Lackas foundation, the Barbara & Hubertus Trettner foundation, the Dr. Leopold & Carmen Ellinger foundation, the Gert & Susanna Mayer foundation, the Deutsche Forschungsgemeinschaft (DFG 391665916), and by the German Cancer Aid (DKH-111886 and DKH-70112257). J. Li was supported by a scholarship of the China Scholarship Council (CSC), J. Musa was supported by a scholarship of the Kind-Philipp foundation, and T. L. B. Hölting by a scholarship of the German Cancer Aid. M. F. Orth and M. M. L. Knott were supported by scholarships of the German National Academic Foundation. G. Sannino was supported by a scholarship from the Fritz-Thyssen Foundation (FTF-40.15.0.030MN). The work of U. Dirksen is supported by grants from the German Cancer Aid (DKH-108128, DKH-70112018, and DKH-70113419), the ERA-Net-TRANSCAN consortium (project number 01KT1310), and Euro Ewing Consortium (EEC, project number EU-FP7 602,856), both funded under the European Commission Seventh Framework Program FP7-HEALTH (http://cordis.europa.eu/), the Barbara & Hubertus Trettner foundation, and the Gert & Susanna Mayer foundation. G. Hardiman was supported by grants from the National Science Foundation (SC EPSCoR) and National Institutes of Health (U01-DA045300). The laboratory of J. Alonso was supported by Instituto de Salud Carlos III (PI12/00816; PI16CIII/00026); Asociación Pablo Ugarte (TPY-M 1149/13; TRPV 205/18), ASION (TVP 141/17), Fundación Sonrisa de Alex & Todos somos Iván (TVP 1324/15).",2019-08-16 +29240876,Machine learning for classifying tuberculosis drug-resistance from DNA sequencing data.,"

Motivation

Correct and rapid determination of Mycobacterium tuberculosis (MTB) resistance against available tuberculosis (TB) drugs is essential for the control and management of TB. Conventional molecular diagnostic test assumes that the presence of any well-studied single nucleotide polymorphisms is sufficient to cause resistance, which yields low sensitivity for resistance classification.

Summary

Given the availability of DNA sequencing data from MTB, we developed machine learning models for a cohort of 1839 UK bacterial isolates to classify MTB resistance against eight anti-TB drugs (isoniazid, rifampicin, ethambutol, pyrazinamide, ciprofloxacin, moxifloxacin, ofloxacin, streptomycin) and to classify multi-drug resistance.

Results

Compared to previous rules-based approach, the sensitivities from the best-performing models increased by 2-4% for isoniazid, rifampicin and ethambutol to 97% (P < 0.01), respectively; for ciprofloxacin and multi-drug resistant TB, they increased to 96%. For moxifloxacin and ofloxacin, sensitivities increased by 12 and 15% from 83 and 81% based on existing known resistance alleles to 95% and 96% (P < 0.01), respectively. Particularly, our models improved sensitivities compared to the previous rules-based approach by 15 and 24% to 84 and 87% for pyrazinamide and streptomycin (P < 0.01), respectively. The best-performing models increase the area-under-the-ROC curve by 10% for pyrazinamide and streptomycin (P < 0.01), and 4-8% for other drugs (P < 0.01).

Availability and implementation

The details of source code are provided at http://www.robots.ox.ac.uk/~davidc/code.php.

Contact

david.clifton@eng.ox.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-05-01 +28886604,The Imperial County Community Air Monitoring Network: A Model for Community-based Environmental Monitoring for Public Health Action.,"

Summary

The Imperial County Community Air Monitoring Network (the Network) is a collaborative group of community, academic, nongovernmental, and government partners designed to fill the need for more detailed data on particulate matter in an area that often exceeds air quality standards. The Network employs a community-based environmental monitoring process in which the community and researchers have specific, well-defined roles as part of an equitable partnership that also includes shared decision-making to determine study direction, plan research protocols, and conduct project activities. The Network is currently producing real-time particulate matter data from 40 low-cost sensors throughout Imperial County, one of the largest community-based air networks in the United States. Establishment of a community-led air network involves engaging community members to be citizen-scientists in the monitoring, siting, and data collection process. Attention to technical issues regarding instrument calibration and validation and electronic transfer and storage of data is also essential. Finally, continued community health improvements will be predicated on facilitating community ownership and sustainability of the network after research funds have been expended. https://doi.org/10.1289/EHP1772",2017-07-31 +30017358,CalR: A Web-Based Analysis Tool for Indirect Calorimetry Experiments.,"We report a web-based tool for analysis of experiments using indirect calorimetry to measure physiological energy balance. CalR simplifies the process to import raw data files, generate plots, and determine the most appropriate statistical tests for interpretation. Analysis using the generalized linear model (which includes ANOVA and ANCOVA) allows for flexibility in interpreting diverse experimental designs, including those of obesity and thermogenesis. Users also may produce standardized output files for an experiment that can be shared and subsequently re-evaluated using CalR. This framework will provide the transparency necessary to enhance consistency, rigor, and reproducibility. The CalR analysis software will greatly increase the speed and efficiency with which metabolic experiments can be organized, analyzed per accepted norms, and reproduced and will likely become a standard tool for the field. CalR is accessible at https://CalRapp.org/.",2018-07-12 +29036405,FOLD: a method to optimize power in meta-analysis of genetic association studies with overlapping subjects.,"

Motivation

In genetic association studies, meta-analyses are widely used to increase the statistical power by aggregating information from multiple studies. In meta-analyses, participating studies often share the same individuals due to the shared use of publicly available control data or accidental recruiting of the same subjects. As such overlapping can inflate false positive rate, overlapping subjects are traditionally split in the studies prior to meta-analysis, which requires access to genotype data and is not always possible. Fortunately, recently developed meta-analysis methods can systematically account for overlapping subjects at the summary statistics level.

Results

We identify and report a phenomenon that these methods for overlapping subjects can yield low power. For instance, in our simulation involving a meta-analysis of five studies that share 20% of individuals, whereas the traditional splitting method achieved 80% power, none of the new methods exceeded 32% power. We found that this low power resulted from the unaccounted differences between shared and unshared individuals in terms of their contributions towards the final statistic. Here, we propose an optimal summary-statistic-based method termed as FOLD that increases the power of meta-analysis involving studies with overlapping subjects.

Availability and implementation

Our method is available at http://software.buhmhan.com/FOLD.

Contact

mail: buhm.han@amc.seoul.kr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +30044230,A Prospective Cohort Study of Adolescents' Memory Performance and Individual Brain Dose of Microwave Radiation from Wireless Communication.,"

Background

The potential impact of microwave radiofrequency electromagnetic fields (RF-EMF) emitted by wireless communication devices on neurocognitive functions of adolescents is controversial. In a previous analysis, we found changes in figural memory scores associated with a higher cumulative RF-EMF brain dose in adolescents.

Objective

We aimed to follow-up our previous results using a new study population, dose estimation, and approach to controlling for confounding from media usage itself.

Methods

RF-EMF brain dose for each participant was modeled. Multivariable linear regression models were fitted on verbal and figural memory score changes over 1 y and on estimated cumulative brain dose and RF-EMF related and unrelated media usage (n=669-676). Because of the hemispheric lateralization of memory, we conducted a laterality analysis for phone call ear preference. To control for the confounding of media use behaviors, a stratified analysis for different media usage groups was also conducted.

Results

We found decreased figural memory scores in association with an interquartile range (IQR) increase in estimated cumulative RF-EMF brain dose scores: -0.22 (95% CI: -0.47, 0.03; IQR: 953 mJ/kg per day) in the whole sample, -0.39 (95% CI: -0.67, -0.10; IQR: 953 mJ/kg per day) in right-side users (n=532), and -0.26 (95% CI: -0.42, -0.10; IQR: 341 mJ/kg per day) when recorded network operator data were used for RF-EMF dose estimation (n=274). Media usage unrelated to RF-EMF did not show significant associations or consistent patterns, with the exception of consistent (nonsignificant) positive associations between data traffic duration and verbal memory.

Conclusions

Our findings for a cohort of Swiss adolescents require confirmation in other populations but suggest a potential adverse effect of RF-EMF brain dose on cognitive functions that involve brain regions mostly exposed during mobile phone use. https://doi.org/10.1289/EHP2427.",2018-07-23 +30053236,ChemDIS 2: an update of chemical-disease inference system.,"Computational inference of affected functions, pathways and diseases for chemicals could largely accelerate the evaluation of potential effects of chemical exposure on human beings. Previously, we have developed a ChemDIS system utilizing information of interacting targets for chemical-disease inference. With the target information, testable hypotheses can be generated for experimental validation. In this work, we present an update of ChemDIS 2 system featured with more updated datasets and several new functions, including (i) custom enrichment analysis function for single omics data; (ii) multi-omics analysis function for joint analysis of multi-omics data; (iii) mixture analysis function for the identification of interaction and overall effects; (iv) web application programming interface (API) for programmed access to ChemDIS 2. The updated ChemDIS 2 system capable of analyzing more than 430 000 chemicals is expected to be useful for both drug development and risk assessment of environmental chemicals.Database URL: ChemDIS 2 is freely accessible via https://cwtung.kmu.edu.tw/chemdis.",2018-01-01 +29878118,iEnhancer-EL: identifying enhancers and their strength with ensemble learning approach.,"

Motivation

Identification of enhancers and their strength is important because they play a critical role in controlling gene expression. Although some bioinformatics tools were developed, they are limited in discriminating enhancers from non-enhancers only. Recently, a two-layer predictor called 'iEnhancer-2L' was developed that can be used to predict the enhancer's strength as well. However, its prediction quality needs further improvement to enhance the practical application value.

Results

A new predictor called 'iEnhancer-EL' was proposed that contains two layer predictors: the first one (for identifying enhancers) is formed by fusing an array of six key individual classifiers, and the second one (for their strength) formed by fusing an array of ten key individual classifiers. All these key classifiers were selected from 171 elementary classifiers formed by SVM (Support Vector Machine) based on kmer, subsequence profile and PseKNC (Pseudo K-tuple Nucleotide Composition), respectively. Rigorous cross-validations have indicated that the proposed predictor is remarkably superior to the existing state-of-the-art one in this area.

Availability and implementation

A web server for the iEnhancer-EL has been established at http://bioinformatics.hitsz.edu.cn/iEnhancer-EL/, by which users can easily get their desired results without the need to go through the mathematical details.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-11-01 +29669107,DAMBE7: New and Improved Tools for Data Analysis in Molecular Biology and Evolution.,"DAMBE is a comprehensive software package for genomic and phylogenetic data analysis on Windows, Linux, and Macintosh computers. New functions include imputing missing distances and phylogeny simultaneously (paving the way to build large phage and transposon trees), new bootstrapping/jackknifing methods for PhyPA (phylogenetics from pairwise alignments), and an improved function for fast and accurate estimation of the shape parameter of the gamma distribution for fitting rate heterogeneity over sites. Previous method corrects multiple hits for each site independently. DAMBE's new method uses all sites simultaneously for correction. DAMBE, featuring a user-friendly graphic interface, is freely available from http://dambe.bio.uottawa.ca (last accessed, April 17, 2018).",2018-06-01 +27642623,Hepatic SILAC proteomic data from PANDER transgenic model.,"This article contains raw and processed data related to research published in ""Quantitative Proteomic Profiling Reveals Hepatic Lipogenesis and Liver X Receptor Activation in the PANDER Transgenic Model"" (M.G. Athanason, W.A. Ratliff, D. Chaput, C.B. MarElia, M.N. Kuehl, S.M., Jr. Stevens, B.R. Burkhardt (2016)) [1], and was generated by ""spike-in"" SILAC-based proteomic analysis of livers obtained from the PANcreatic-Derived factor (PANDER) transgenic mouse (PANTG) under various metabolic conditions [1]. The mass spectrometry output of the PANTG and wild-type B6SJLF mice liver tissue and resulting proteome search from MaxQuant 1.2.2.5 employing the Andromeda search algorithm against the UniprotKB reference database for Mus musculus has been deposited to the ProteomeXchange Consortium (http://www.proteomexchange.org) via the PRIDE partner repository with dataset identifiers PRIDE: PXD004171 and doi:10.6019/PXD004171. Protein ratio values representing PANTG/wild-type obtained by MaxQuant analysis were input into the Perseus processing suite to determine statistical significance using the Significance A outlier test (p<0.05). Differentially expressed proteins using this approach were input into Ingenuity Pathway Analysis to determined altered pathways and upstream regulators that were altered in PANTG mice.",2016-08-16 +30010789,pLoc_bal-mAnimal: predict subcellular localization of animal proteins by balancing training dataset and PseAAC.,"

Motivation

A cell contains numerous protein molecules. One of the fundamental goals in cell biology is to determine their subcellular locations, which can provide useful clues about their functions. Knowledge of protein subcellular localization is also indispensable for prioritizing and selecting the right targets for drug development. With the avalanche of protein sequences emerging in the post-genomic age, it is highly desired to develop computational tools for timely and effectively identifying their subcellular localization based on the sequence information alone. Recently, a predictor called 'pLoc-mAnimal' was developed for identifying the subcellular localization of animal proteins. Its performance is overwhelmingly better than that of the other predictors for the same purpose, particularly in dealing with the multi-label systems in which some proteins, called 'multiplex proteins', may simultaneously occur in two or more subcellular locations. Although it is indeed a very powerful predictor, more efforts are definitely needed to further improve it. This is because pLoc-mAnimal was trained by an extremely skewed dataset in which some subset (subcellular location) was about 128 times the size of the other subsets. Accordingly, such an uneven training dataset will inevitably cause a biased consequence.

Results

To alleviate such biased consequence, we have developed a new and bias-reducing predictor called pLoc_bal-mAnimal by quasi-balancing the training dataset. Cross-validation tests on exactly the same experiment-confirmed dataset have indicated that the proposed new predictor is remarkably superior to pLoc-mAnimal, the existing state-of-the-art predictor, in identifying the subcellular localization of animal proteins.

Availability and implementation

To maximize the convenience for the vast majority of experimental scientists, a user-friendly web-server for the new predictor has been established at http://www.jci-bioinfo.cn/pLoc_bal-mAnimal/, by which users can easily get their desired results without the need to go through the complicated mathematics.

Supplementary information

Supplementary data are available at Bioinformatics online.",2019-02-01 +28651363,Multimodal mechanistic signatures for neurodegenerative diseases (NeuroMMSig): a web server for mechanism enrichment.,"

Motivation

The concept of a 'mechanism-based taxonomy of human disease' is currently replacing the outdated paradigm of diseases classified by clinical appearance. We have tackled the paradigm of mechanism-based patient subgroup identification in the challenging area of research on neurodegenerative diseases.

Results

We have developed a knowledge base representing essential pathophysiology mechanisms of neurodegenerative diseases. Together with dedicated algorithms, this knowledge base forms the basis for a 'mechanism-enrichment server' that supports the mechanistic interpretation of multiscale, multimodal clinical data.

Availability and implementation

NeuroMMSig is available at http://neurommsig.scai.fraunhofer.de/.

Contact

martin.hofmann-apitius@scai.fraunhofer.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +27473391,TopoICSim: a new semantic similarity measure based on gene ontology.,"

Background

The Gene Ontology (GO) is a dynamic, controlled vocabulary that describes the cellular function of genes and proteins according to tree major categories: biological process, molecular function and cellular component. It has become widely used in many bioinformatics applications for annotating genes and measuring their semantic similarity, rather than their sequence similarity. Generally speaking, semantic similarity measures involve the GO tree topology, information content of GO terms, or a combination of both.

Results

Here we present a new semantic similarity measure called TopoICSim (Topological Information Content Similarity) which uses information on the specific paths between GO terms based on the topology of the GO tree, and the distribution of information content along these paths. The TopoICSim algorithm was evaluated on two human benchmark datasets based on KEGG pathways and Pfam domains grouped as clans, using GO terms from either the biological process or molecular function. The performance of the TopoICSim measure compared favorably to five existing methods. Furthermore, the TopoICSim similarity was also tested on gene/protein sets defined by correlated gene expression, using three human datasets, and showed improved performance compared to two previously published similarity measures. Finally we used an online benchmarking resource which evaluates any similarity measure against a set of 11 similarity measures in three tests, using gene/protein sets based on sequence similarity, Pfam domains, and enzyme classifications. The results for TopoICSim showed improved performance relative to most of the measures included in the benchmarking, and in particular a very robust performance throughout the different tests.

Conclusions

The TopoICSim similarity measure provides a competitive method with robust performance for quantification of semantic similarity between genes and proteins based on GO annotations. An R script for TopoICSim is available at http://bigr.medisin.ntnu.no/tools/TopoICSim.R .",2016-07-29 +29534977,MetaGO: Predicting Gene Ontology of Non-homologous Proteins Through Low-Resolution Protein Structure Prediction and Protein-Protein Network Mapping.,"Homology-based transferal remains the major approach to computational protein function annotations, but it becomes increasingly unreliable when the sequence identity between query and template decreases below 30%. We propose a novel pipeline, MetaGO, to deduce Gene Ontology attributes of proteins by combining sequence homology-based annotation with low-resolution structure prediction and comparison, and partner's homology-based protein-protein network mapping. The pipeline was tested on a large-scale set of 1000 non-redundant proteins from the CAFA3 experiment. Under the stringent benchmark conditions where templates with >30% sequence identity to the query are excluded, MetaGO achieves average F-measures of 0.487, 0.408, and 0.598, for Molecular Function, Biological Process, and Cellular Component, respectively, which are significantly higher than those achieved by other state-of-the-art function annotations methods. Detailed data analysis shows that the major advantage of the MetaGO lies in the new functional homolog detections from partner's homology-based network mapping and structure-based local and global structure alignments, the confidence scores of which can be optimally combined through logistic regression. These data demonstrate the power of using a hybrid model incorporating protein structure and interaction networks to deduce new functional insights beyond traditional sequence homology-based referrals, especially for proteins that lack homologous function templates. The MetaGO pipeline is available at http://zhanglab.ccmb.med.umich.edu/MetaGO/.",2018-03-10 +29771290,Single cell clustering based on cell-pair differentiability correlation and variance analysis.,"Motivation:The rapid advancement of single cell technologies has shed new light on the complex mechanisms of cellular heterogeneity. Identification of intercellular transcriptomic heterogeneity is one of the most critical tasks in single-cell RNA-sequencing studies. Results:We propose a new cell similarity measure based on cell-pair differentiability correlation, which is derived from gene differential pattern among all cell pairs. Through plugging into the framework of hierarchical clustering with this new measure, we further develop a variance analysis based clustering algorithm 'Corr' that can determine cluster number automatically and identify cell types accurately. The robustness and superiority of the proposed algorithm are compared with representative algorithms: shared nearest neighbor (SNN)-Cliq and several other state-of-the-art clustering methods, on many benchmark or real single cell RNA-sequencing datasets in terms of both internal criteria (clustering number and accuracy) and external criteria (purity, adjusted rand index, F1-measure). Moreover, differentiability vector with our new measure provides a new means in identifying potential biomarkers from cancer related single cell datasets even with strong noise. Prognosis analyses from independent datasets of cancers confirmed the effectiveness of our 'Corr' method. Availability and implementation:The source code (Matlab) is available at http://sysbio.sibcb.ac.cn/cb/chenlab/soft/Corr--SourceCodes.zip. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-11-01 +30704412,Load magnitude affects patellar tendon mechanical properties but not collagen or collagen cross-linking after long-term strength training in older adults.,"BACKGROUND:Regular loading of tendons may counteract the negative effects of aging. However, the influence of strength training loading magnitude on tendon mechanical properties and its relation to matrix collagen content and collagen cross-linking is sparsely described in older adults. The purpose of the present study was to compare the effects of moderate or high load resistance training on tendon matrix and its mechanical properties. METHODS:Seventeen women and 19 men, age 62-70 years, were recruited and randomly allocated to 12 months of heavy load resistance training (HRT), moderate load resistance training (MRT) or control (CON). Pre- and post-intervention testing comprised isometric quadriceps strength test (IsoMVC), ultrasound based testing of in vivo patellar tendon (PT) mechanical properties, MRI-based measurement of PT cross-sectional area (CSA), PT biopsies for assessment of fibril morphology, collagen content, enzymatic cross-links, and tendon fluorescence as a measure of advanced glycation end-products (AGEs). RESULTS:Thirty three participants completed the intervention and were included in the data analysis. IsoMVC increased more after HRT (+ 21%) than MRT (+ 8%) and CON (+ 7%) (p < 0.05). Tendon stiffness (p < 0.05) and Young's modulus (p = 0.05) were also differently affected by training load with a reduction in CON and MRT but not in HRT. PT-CSA increased equally after both MRT and HRT. Collagen content, fibril morphology, enzymatic cross-links, and tendon fluorescence were unaffected by training. CONCLUSION:Despite equal improvements in tendon size after moderate and heavy load resistance training, only heavy. load training seemed to maintain tendon mechanical properties in old age. The effect of load magnitude on tendon biomechanics was unrelated to changes of major load bearing matrix components in the tendon core. The study is a sub-study of the LISA study, which was registered at http://clinicaltrials.gov (NCT02123641) April 25th 2014.",2019-01-31 +27164438,BrucellaBase: Genome information resource.,"Brucella sp. causes a major zoonotic disease, brucellosis. Brucella belongs to the family Brucellaceae under the order Rhizobiales of Alphaproteobacteria. We present BrucellaBase, a web-based platform, providing features of a genome database together with unique analysis tools. We have developed a web version of the multilocus sequence typing (MLST) (Whatmore et al., 2007) and phylogenetic analysis of Brucella spp. BrucellaBase currently contains genome data of 510 Brucella strains along with the user interfaces for BLAST, VFDB, CARD, pairwise genome alignment and MLST typing. Availability of these tools will enable the researchers interested in Brucella to get meaningful information from Brucella genome sequences. BrucellaBase will regularly be updated with new genome sequences, new features along with improvements in genome annotations. BrucellaBase is available online at http://www.dbtbrucellosis.in/brucellabase.html or http://59.99.226.203/brucellabase/homepage.html.",2016-05-07 +26077899,Integrated web visualizations for protein-protein interaction databases.,"

Background

Understanding living systems is crucial for curing diseases. To achieve this task we have to understand biological networks based on protein-protein interactions. Bioinformatics has come up with a great amount of databases and tools that support analysts in exploring protein-protein interactions on an integrated level for knowledge discovery. They provide predictions and correlations, indicate possibilities for future experimental research and fill the gaps to complete the picture of biochemical processes. There are numerous and huge databases of protein-protein interactions used to gain insights into answering some of the many questions of systems biology. Many computational resources integrate interaction data with additional information on molecular background. However, the vast number of diverse Bioinformatics resources poses an obstacle to the goal of understanding. We present a survey of databases that enable the visual analysis of protein networks.

Results

We selected M=10 out of N=53 resources supporting visualization, and we tested against the following set of criteria: interoperability, data integration, quantity of possible interactions, data visualization quality and data coverage. The study reveals differences in usability, visualization features and quality as well as the quantity of interactions. StringDB is the recommended first choice. CPDB presents a comprehensive dataset and IntAct lets the user change the network layout. A comprehensive comparison table is available via web. The supplementary table can be accessed on http://tinyurl.com/PPI-DB-Comparison-2015.

Conclusions

Only some web resources featuring graph visualization can be successfully applied to interactive visual analysis of protein-protein interaction. Study results underline the necessity for further enhancements of visualization integration in biochemical analysis tools. Identified challenges are data comprehensiveness, confidence, interactive feature and visualization maturing.",2015-06-16 +30383239,Comparative analysis and prediction of quorum-sensing peptides using feature representation learning and machine learning algorithms. ,"Quorum-sensing peptides (QSPs) are the signal molecules that are closely associated with diverse cellular processes, such as cell-cell communication, and gene expression regulation in Gram-positive bacteria. It is therefore of great importance to identify QSPs for better understanding and in-depth revealing of their functional mechanisms in physiological processes. Machine learning algorithms have been developed for this purpose, showing the great potential for the reliable prediction of QSPs. In this study, several sequence-based feature descriptors for peptide representation and machine learning algorithms are comprehensively reviewed, evaluated and compared. To effectively use existing feature descriptors, we used a feature representation learning strategy that automatically learns the most discriminative features from existing feature descriptors in a supervised way. Our results demonstrate that this strategy is capable of effectively capturing the sequence determinants to represent the characteristics of QSPs, thereby contributing to the improved predictive performance. Furthermore, wrapping this feature representation learning strategy, we developed a powerful predictor named QSPred-FL for the detection of QSPs in large-scale proteomic data. Benchmarking results with 10-fold cross validation showed that QSPred-FL is able to achieve better performance as compared to the state-of-the-art predictors. In addition, we have established a user-friendly webserver that implements QSPred-FL, which is currently available at http://server.malab.cn/QSPred-FL. We expect that this tool will be useful for the high-throughput prediction of QSPs and the discovery of important functional mechanisms of QSPs.",2018-10-31 +30047114,Genetic characteristics of Y-chromosome short tandem repeat haplotypes from cigarette butt samples presumed to be smoked by North Korean men.,"Korea has been divided into South Korea and North Korea for over 70 years. DNA profiles of the North Korean population have never been reported in the Y-chromosome STR Haplotype Reference Database (YHRD; https://yhrd.org ). To investigate genetic features of Y-chromosome STR haplotypes of the North Korean population for the first time. Genomic DNA was isolated from 838 cigarette butts assumed to have been smoked by North Korean men and amplified with PowerPlex Y23 (PPY23) kit. Statistical parameters were calculated using Nei's formula and analysis of molecular variance (AMOVA). Multidimensional scaling (MDS) plot was constructed by the AMOVA tool and neighbor-joining (NJ) tree was constructed by MEGA 6.06. A total of 121 haplotypes were analyzed for PPY23 loci from a sample population. Haplotype diversity and discrimination capacity were 0.9992 and 0.9837, respectively. Genetic diversities ranged from 0.2981 to 0.9716. For the 16 Y-filer loci and eight minimal loci, respectively 90.9 and 82.6% of the matched haplotypes were estimated to belong to haplogroup O, representing the Southeast and East Asian type. The MDS plot and NJ tree indicated that the samples are most closely related to South Korean. In addition, p-value in the pairwise comparison to the South Korean was slightly above statistical significance (p = 0.0534). The Y-STR haplotypes of the samples were unique and highly genetically polymorphic. Despite the separation between North and South Korea for 70 years, they can still be considered a single genetic population, based on Y-STR haplotypes.",2018-04-25 +29755623,Main Considerations of Cardiogenic Shock and Its Predictors: Systematic Review.,"The mortality rate of post-infarction cardiogenic shock (CS) was 80.0-90.0%. Recent studies show a significant reduction of hospital mortality to approximately 50.0%. CS is defined as systemic tissue hypoperfusion resulting from systolic and/or diastolic heart dysfunction, the main cause of which is acute myocardial infarction (AMI). The main predictors are biological markers such as troponin, CKMB and lactate. A systematic literature review and meta-analysis is performed in order to present and correlate the main literary findings on CS and its evolution with possible changes in biomarkers such as troponin, lactate and CKMB. After criteria of literary search with the use of the mesh terms: cardiogenic shock; acute myocardial infarction; biomarkers; troponin; CKMB; lactate; clinical trials and use of the bouleanos ""and"" between the mesh terms and ""or"" among the historical findings. In the main databases such as Pubmed, Medline, Bireme, EBSCO, Scielo, etc., a total of 96 papers that were submitted to the eligibility analysis were collated and, after that, 41 studies were selected, following the rules of systematic review - PRISMA (Transparent reporting of systematic reviews and meta-analyzes-http://www.prisma-statement.org/). Some risk factors for its development in AMI are advanced age, female gender, anterior wall infarction, diabetes mellitus, systemic arterial hypertension, previous history of infarction and angina. The CS associated with AMI depends on its extent and its complications, being the main ones: mitral regurgitation, rupture of the interventricular septum and rupture of the free wall of the left ventricule. The diagnosis is based on the clinical manifestations, such as mental confusion, oliguria, hypotension, tachycardia, fine pulse, sweating, and cold extremities; in hemodynamic aspects: systolic blood pressure was < 90.0 mm Hg or 30 mm Hg below baseline, pulmonary capillary pressure was > 18.0 mm Hg and cardiac index was < 2.2 L/min/m2. Laboratory and imaging exams should be requested to evaluate the possible etiology of CS, its systemic repercussions and comorbidities. The treatment aims at the rapid reestablishment of the blood flow in the affected artery, to improve the patient's prognosis. The biomarkers dosage in the daily clinical practice of the different cardiological centers can facilitate the diagnosis and the conduction of the dubious cases and the best evaluation of the degree of myocardial suffering after CS.",2018-04-25 +26121101,Pilot Randomized Trials in Pediatric Critical Care: A Systematic Review.,"

Objectives

Pilot trials are smaller randomized controlled trials conducted to inform the design and assess the feasibility of a large-scale trials. The objectives of this systematic review were to describe pilot trials in pediatric critical care, their conclusions about the clinical implications of the results, and the need for future research and to determine the frequency of large follow-up trials.

Data sources

The Evidence in Pediatric Intensive Care database (http://epicc.mcmaster.ca), a comprehensive repository of published pediatric critical care randomized controlled trials and the World Health Organization's Clinical Trials Registry Platform.

Study selection

Randomized controlled trials described in the publication as ""pilot,"" ""feasibility,"" ""proof-of-concept,"" ""exploratory,"" ""phase 2,"" ""vanguard,"" or ""preliminary.""

Data extraction

Pairs of reviewers screened studies for eligibility and abstracted data independently.

Data synthesis

We found 32 pilot trials (12.2% of all pediatric critical care randomized controlled trials) published before July 2014, varying in size from 6 to 165 children. Pilot trials were significantly smaller than those not described as pilots, but other key characteristics were not significantly different. The authors of 16 publications (48.4%) included explicit and specific conclusions about the design or feasibility of larger trials based on the results of the pilot trial. In 20 publications (64.5%), the authors made conclusions about clinical efficacy based on results of the pilot trial. Four of the 32 pilot trials (12.9%) led to larger trials, two of which have been published.

Conclusions

Published pilot trials in pediatric critical care often focus on clinical outcomes. They uncommonly report explicit feasibility outcomes, criteria for success, or rationale for the pilot sample size. These pilot trials infrequently lead to larger trials. Understanding and addressing the reasons for this are key to the success of pediatric critical care research.",2015-09-01 +30374666,"Duration of adjuvant immunotherapy-biologic, clinical and economic considerations.","The financial impact of an extensive duration of adjuvant immunotherapy is severe. The clinical and biological rationale for this extensive duration is unclear. This study aims to understand the biologic and clinical rationale for the duration of treatment in designing adjuvant trials and to assess the economic impact of different treatment durations in adjuvant therapy. We searched http://www.clinicaltrials.gov for adjuvant immunotherapy clinical trials. Based on our inclusion and exclusion criteria, we identified 47 trials targeting PD-1, PD-L1, and CTLA-4. We examined the duration of these trials and performed a US based budget impact analysis of three representative trials based on various data sources. Most current adjuvant immunotherapy trials provide treatment for 1 year. Our budget impact analyses estimate that the cost per patient of 1 year treatment with nivolumab for melanoma is $165,000 while the cost of 3 years treatment with ipilimumab for melanoma is more than $1,850,000 assuming full duration of treatment. The annual cost for adjuvant treatment with nivolumab for melanoma is approximately $1.15 billion for the entire target population in the United States assuming full uptake. The necessary duration of adjuvant immunotherapy is unknown. The rationale for duration in current trials is not clear and may be longer than necessary. Non-inferiority trials testing shorter duration of therapies should be conducted. Appropriate mechanisms to fund such trials should be sought out by healthcare payers.",2018-10-29 +26179317,EV@LUTIL: An open access database on occupational exposures to asbestos and man-made mineral fibres.,"

Objectives

The aim of Evalutil is to document occupational exposure to asbestos and man-made mineral fibers.

Methods

These databases provide grouped descriptive and metrological data from observed situations of occupational exposure, collected through the analysis of scientific articles and technical reports by industrial hygienists.

Results

Over 5,000 measurements were collected. We describe the occupations, economic activities, fiber-containing products, and operations on them that have been documented most often. Graphical measurement syntheses of these data show that the situations presented for asbestos and RCF, except mineral wools, report fiber concentrations mainly above historical occupational exposure limits.

Conclusion

Free access to these data in French and in English on the Internet (https://ssl2.isped.u-bordeaux2.fr/eva_003/) helps public health and prevention professionals to identify and characterize occupational exposures to fibers. Extended recently to nanoscale particles, Evalutil continues to contribute to the improvement of knowledge about exposure to inhaled particles and the health risks associated with them.",2015-07-14 +29691962,Gastrointestinal symptoms in children: Primary care and specialist interface.,"AIMS:Gastrointestinal symptoms and diseases represent one of the major reasons for paediatricians' requests for specialist consultations and hospital admissions. One fourth of annual medical consultations for children younger than 6 years can be attributed to gastrointestinal symptoms. High-quality guidelines have been validated worldwide to provide clinical recommendations and support healthcare providers' practice. Nevertheless, overall compliance to standards of care is unsatisfactory, and children with gastrointestinal symptoms frequently undergo expensive, useless specialist consultations and laboratory evaluations. The aim of this study is to review the main epidemiological and clinical aspects, together with management strategies, of the most common gastrointestinal symptoms in children, pointing out pitfalls and practical tips in primary care management, and providing correct indications for specialist consultations. METHODS:For this review, articles published in English from 2000 to January 2018 were identified from the PubMed/Medline (http://www.ncbi.nlm.nih.gov/pubmed/) database and selected on the basis of quality, relevance to the illness and importance in illustrating current management pathways. The search used the following keywords: gastrointestinal symptoms, functional gastrointestinal symptoms, children, primary care, specialist consultations and management. Particular emphasis was placed on evidence-based guidelines and high-quality studies. RESULTS:Functional gastrointestinal symptoms have a high impact on the quality of life of children and families and on healthcare costs. A complete medical history and clinical examination are often sufficient to guide the primary care provider in the diagnosis, further workup or referral to a paediatric gastroenterologist. CONCLUSION:Paediatric gastroenterology outpatients' clinics are among the most crowded specialists, and functional gastrointestinal symptoms and disorders are the most frequent reason for counselling. The number of specialist consultations could be reduced if guidelines were applied in primary care settings.",2018-04-24 +29745839,Multi-target drug repositioning by bipartite block-wise sparse multi-task learning.,"BACKGROUND:Finding potential drug targets is a crucial step in drug discovery and development. Recently, resources such as the Library of Integrated Network-Based Cellular Signatures (LINCS) L1000 database provide gene expression profiles induced by various chemical and genetic perturbations and thereby make it possible to analyze the relationship between compounds and gene targets at a genome-wide scale. Current approaches for comparing the expression profiles are based on pairwise connectivity mapping analysis. However, this method makes the simple assumption that the effect of a drug treatment is similar to knocking down its single target gene. Since many compounds can bind multiple targets, the pairwise mapping ignores the combined effects of multiple targets, and therefore fails to detect many potential targets of the compounds. RESULTS:We propose an algorithm to find sets of gene knock-downs that induce gene expression changes similar to a drug treatment. Assuming that the effects of gene knock-downs are additive, we propose a novel bipartite block-wise sparse multi-task learning model with super-graph structure (BBSS-MTL) for multi-target drug repositioning that overcomes the restrictive assumptions of connectivity mapping analysis. CONCLUSIONS:The proposed method BBSS-MTL is more accurate for predicting potential drug targets than the simple pairwise connectivity mapping analysis on five datasets generated from different cancer cell lines. AVAILABILITY:The code can be obtained at http://gr.xjtu.edu.cn/web/liminli/codes .",2018-04-24 +27130811,Inhaler Technique in Children With Asthma: A Systematic Review.,"

Background

Pediatric asthma is an important public health problem worldwide. The primary methods of medication delivery are inhalation devices.

Objectives

This systematic review examined: 1) what is the prevalence of correct inhaler technique among children with asthma, 2) are educational interventions associated with improved rates of correct inhalation technique, and 3) is improved inhaler technique associated with improved asthma outcomes?

Data sources

We included experimental and observational studies through searches of PubMed, Cochrane Database of Systematic Reviews, Cochrane Central Register of Controlled Trials, CINAHL Complete, and clinicaltrials.gov.

Study eligibility criteria, participants, and interventions

Studies were eligible for this review if at least 1 outcome measure of the study included and reported results of child/adolescent inhaler technique.

Study appraisal and synthesis methods

The following information was extracted from each included study: study design (experimental vs observational), and outcomes data. The Downs and Black checklist was used to appraise study quality.

Results

Twenty-eight studies were eligible for inclusion. We found that inhaler technique is generally very poor among children, but is better when children use their metered-dose inhalers (MDIs) with spacers. Technique in using turbuhalers and diskus inhalers is better than in MDI, but still poor. Counseling children on correct inhaler technique was associated with improved technique among children in multiple studies.

Limitations

We examined articles published in English.

Conclusions and implications of key findings

Inhaler technique in children is generally poor. Physicians and other members of the health care team should instruct children and their caregivers on the proper use of their inhalation devices at every opportunity and correct mistakes when made to ensure effective medication delivery.

Registry

This systematic review was registered under the Centre for Reviews and Dissemination, PROSPERO CRD42015025070 (http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42015025070).",2016-04-26 +29986115,"The primary transcriptome, small RNAs and regulation of antimicrobial resistance in Acinetobacter baumannii ATCC 17978.","We present the first high-resolution determination of transcriptome architecture in the priority pathogen Acinetobacter baumannii. Pooled RNA from 16 laboratory conditions was used for differential RNA-seq (dRNA-seq) to identify 3731 transcriptional start sites (TSS) and 110 small RNAs, including the first identification in A. baumannii of sRNAs encoded at the 3' end of coding genes. Most sRNAs were conserved among sequenced A. baumannii genomes, but were only weakly conserved or absent in other Acinetobacter species. Single nucleotide mapping of TSS enabled prediction of -10 and -35 RNA polymerase binding sites and revealed an unprecedented base preference at position +2 that hints at an unrecognized transcriptional regulatory mechanism. To apply functional genomics to the problem of antimicrobial resistance, we dissected the transcriptional regulation of the drug efflux pump responsible for chloramphenicol resistance, craA. The two craA promoters were both down-regulated >1000-fold when cells were shifted to nutrient limited medium. This conditional down-regulation of craA expression renders cells sensitive to chloramphenicol, a highly effective antibiotic for the treatment of multidrug resistant infections. An online interface that facilitates open data access and visualization is provided as 'AcinetoCom' (http://bioinf.gen.tcd.ie/acinetocom/).",2018-10-01 +26644461,ALCOdb: Gene Coexpression Database for Microalgae.,"In the era of energy and food shortage, microalgae have gained much attention as promising sources of biofuels and food ingredients. However, only a small fraction of microalgal genes have been functionally characterized. Here, we have developed the Algae Gene Coexpression database (ALCOdb; http://alcodb.jp), which provides gene coexpression information to survey gene modules for a function of interest. ALCOdb currently supports two model algae: the green alga Chlamydomonas reinhardtii and the red alga Cyanidioschyzon merolae. Users can retrieve coexpression information for genes of interest through three unique data pages: (i) Coexpressed Gene List; (ii) Gene Information; and (iii) Coexpressed Gene Network. In addition to the basal coexpression information, ALCOdb also provides several advanced functionalities such as an expression profile viewer and a differentially expressed gene search tool. Using these user interfaces, we demonstrated that our gene coexpression data have the potential to detect functionally related genes and are useful in extrapolating the biological roles of uncharacterized genes. ALCOdb will facilitate molecular and biochemical studies of microalgal biological phenomena, such as lipid metabolism and organelle development, and promote the evolutionary understanding of plant cellular systems.",2015-12-07 +30321125,A Short Form of the Lichtenberg Financial Decision Rating Scale.,"Objectives: This article examines the convergent validity and clinical utility of the 34-item short form of the Lichtenberg Financial Decision Rating Scale (LFDRS-SF). A briefer scale can lead to enhanced and efficient use of a person-centered approach to the assessment of financial decision-making.Methods: Using data on 200 community-dwelling older adults from Lichtenberg and colleagues (2017a), convergent validity was examined with cognitive and financial management measures using a correlational and regression approach. Receiver operating curve analyses for predicting decision-making ability classification and suspected financial exploitation classification were used to evaluate clinical utility.Results: The LFDRS-SF total risk score was significantly correlated with both cognitive and financial management measures, and the regression analysis predicted 9% of the LFDRS-SF measure. These results demonstrate not only convergent validity, but also the conceptual and empirical uniqueness of financial decision-making.Conclusions: The LFDRS-SF is a valid tool to assess real-world financial decision-making abilities.Clinical Implications: The LFDRS-SF offers an efficient way to assess financial decision-making. Training on the tool and automatic scoring and recommendations for next steps can be found at https://olderadultnestegg.com.",2018-10-15 +31245720,Virus-induced gene silencing database for phenomics and functional genomics in Nicotiana benthamiana.,"Virus-induced gene silencing (VIGS) is an important forward and reverse genetics method for the study of gene function in many plant species, especially Nicotiana benthamiana. However, despite the widespread use of VIGS, a searchable database compiling the phenotypes observed with this method is lacking. Such a database would allow researchers to know the phenotype associated with the silencing of a large number of individual genes without experimentation. We have developed a VIGS phenomics and functional genomics database (VPGD) that has DNA sequence information derived from over 4,000 N. benthamiana VIGS clones along with the associated silencing phenotype for approximately 1,300 genes. The VPGD has a built-in BLAST search feature that provides silencing phenotype information of specific genes. In addition, a keyword-based search function could be used to find a specific phenotype of interest with the corresponding gene, including its Gene Ontology descriptions. Query gene sequences from other plant species that have not been used for VIGS can also be searched for their homologs and silencing phenotype in N. benthamiana. VPGD is useful for identifying gene function not only in N. benthamiana but also in related Solanaceae plants such as tomato and potato. The database is accessible at http://vigs.noble.org.",2018-04-23 +27575582,Genomic analyses of Neisseria gonorrhoeae reveal an association of the gonococcal genetic island with antimicrobial resistance.,"

Objectives

Antimicrobial resistance (AMR) threatens our ability to treat the sexually transmitted bacterial infection gonorrhoea. The increasing availability of whole genome sequence (WGS) data from Neisseria gonorrhoeae isolates, however, provides us with an opportunity in which WGS can be mined for AMR determinants.

Methods

Chromosomal and plasmid genes implicated in AMR were catalogued on the PubMLST Neisseria database (http://pubmlst.org/neisseria). AMR genotypes were identified in WGS from 289 gonococci for which MICs against several antimicrobial compounds had been determined. Whole genome comparisons were undertaken using whole genome MLST (wgMLST).

Results

Clusters of isolates with distinct AMR genotypes were apparent following wgMLST analysis consistent with the occurrence of genome wide genetic variation. This included the presence of the gonococcal genetic island (GGI), a type 4 secretion system shown to increase recombination and for which possession was significantly associated with AMR to multiple antimicrobials.

Conclusions

Evolution of the gonococcal genome occurs in response to antimicrobial selective pressure resulting in the formation of distinct N. gonorrhoeae populations evidenced by the wgMLST clusters seen here. Genomic islands offer selective advantages to host bacteria and possession of the GGI may, not only facilitate the spread of AMR in gonococcal populations, but may also confer fitness advantages.",2016-08-26 +25972520,Moving the mountain: analysis of the effort required to transform comparative anatomy into computable anatomy.,"The diverse phenotypes of living organisms have been described for centuries, and though they may be digitized, they are not readily available in a computable form. Using over 100 morphological studies, the Phenoscape project has demonstrated that by annotating characters with community ontology terms, links between novel species anatomy and the genes that may underlie them can be made. But given the enormity of the legacy literature, how can this largely unexploited wealth of descriptive data be rendered amenable to large-scale computation? To identify the bottlenecks, we quantified the time involved in the major aspects of phenotype curation as we annotated characters from the vertebrate phylogenetic systematics literature. This involves attaching fully computable logical expressions consisting of ontology terms to the descriptions in character-by-taxon matrices. The workflow consists of: (i) data preparation, (ii) phenotype annotation, (iii) ontology development and (iv) curation team discussions and software development feedback. Our results showed that the completion of this work required two person-years by a team of two post-docs, a lead data curator, and students. Manual data preparation required close to 13% of the effort. This part in particular could be reduced substantially with better community data practices, such as depositing fully populated matrices in public repositories. Phenotype annotation required ∼40% of the effort. We are working to make this more efficient with Natural Language Processing tools. Ontology development (40%), however, remains a highly manual task requiring domain (anatomical) expertise and use of specialized software. The large overhead required for data preparation and ontology development contributed to a low annotation rate of approximately two characters per hour, compared with 14 characters per hour when activity was restricted to character annotation. Unlocking the potential of the vast stores of morphological descriptions requires better tools for efficiently processing natural language, and better community practices towards a born-digital morphology. Database URL: http://kb.phenoscape.org",2015-05-13 +27048349,Genic insights from integrated human proteomics in GeneCards. ,"GeneCards is a one-stop shop for searchable human gene annotations (http://www.genecards.org/). Data are automatically mined from ∼120 sources and presented in an integrated web card for every human gene. We report the application of recent advances in proteomics to enhance gene annotation and classification in GeneCards. First, we constructed the Human Integrated Protein Expression Database (HIPED), a unified database of protein abundance in human tissues, based on the publically available mass spectrometry (MS)-based proteomics sources ProteomicsDB, Multi-Omics Profiling Expression Database, Protein Abundance Across Organisms and The MaxQuant DataBase. The integrated database, residing within GeneCards, compares favourably with its individual sources, covering nearly 90% of human protein-coding genes. For gene annotation and comparisons, we first defined a protein expression vector for each gene, based on normalized abundances in 69 normal human tissues. This vector is portrayed in the GeneCards expression section as a bar graph, allowing visual inspection and comparison. These data are juxtaposed with transcriptome bar graphs. Using the protein expression vectors, we further defined a pairwise metric that helps assess expression-based pairwise proximity. This new metric for finding functional partners complements eight others, including sharing of pathways, gene ontology (GO) terms and domains, implemented in the GeneCards Suite. In parallel, we calculated proteome-based differential expression, highlighting a subset of tissues that overexpress a gene and subserving gene classification. This textual annotation allows users of VarElect, the suite's next-generation phenotyper, to more effectively discover causative disease variants. Finally, we define the protein-RNA expression ratio and correlation as yet another attribute of every gene in each tissue, adding further annotative information. The results constitute a significant enhancement of several GeneCards sections and help promote and organize the genome-wide structural and functional knowledge of the human proteome. Database URL:http://www.genecards.org/.",2016-04-05 +30046160,"KampoDB, database of predicted targets and functional annotations of natural medicines.","Natural medicines (i.e., herbal medicines, traditional formulas) are useful for treatment of multifactorial and chronic diseases. Here, we present KampoDB ( http://wakanmoview.inm.u-toyama.ac.jp/kampo/ ), a novel platform for the analysis of natural medicines, which provides various useful scientific resources on Japanese traditional formulas Kampo medicines, constituent herbal drugs, constituent compounds, and target proteins of these constituent compounds. Potential target proteins of these constituent compounds were predicted by docking simulations and machine learning methods based on large-scale omics data (e.g., genome, proteome, metabolome, interactome). The current version of KampoDB contains 42 Kampo medicines, 54 crude drugs, 1230 constituent compounds, 460 known target proteins, and 1369 potential target proteins, and has functional annotations for biological pathways and molecular functions. KampoDB is useful for mode-of-action analysis of natural medicines and prediction of new indications for a wide range of diseases.",2018-07-25 +28848373,"Specimen records of spiders (Arachnida: Araneae) by monthly census for 3 years in forest areas of Yakushima Island, Japan.","

Background

Spiders (Arachnida: Araneae) are a classic indicator taxon for evaluating the health of natural environments. However, studies of spiders' responses to forest succession under natural and anthropogenic disturbance regimes are lacking. Yakushima Island in southwestern Japan has a unique forest ecosystem, and part of the island is designated as a world natural heritage site by UNESCO. Approximately 90% of Yakushima is covered by forest, including both plantations and natural forests.

New information

We made an inventory of spiders on Yakushima Island by collecting specimens in five forests (two plantations and three natural forests) with Malaise and window traps from 2006 to 2008 (a total of 637 traps). We collected 3487 specimens, representing 31 families and 165 species or morphotypes, including undescribed and unidentified species. All specimens were preserved in 70% ethanol, and all data were gathered into a Darwin Core Archives as sample event data. The data set is available from the GBIF network (http://www.gbif.org/dataset/f851fd75-32b2-4a23-8046-9c8ae7013a3c). Because there have been no spider inventories based on such a systematic trapping survey in Japan, this data set provides new insight into the biodiversity on Yakushima Island.",2017-07-25 +30307684,Multilocus sequence typing of Carnobacterium maltaromaticum strains associated with fish disease and dairy products.,"

Aims

Carnobacterium maltaromaticum is a lactic acid bacterium of technological interest in the field of dairy ripening and food bioprotection and is generally recognized as safe in the United States. As it is associated with fish infections, the European Food Safety Agency did not include this species in the qualified presumption safety list of micro-organisms. This implies that the risk assessment for the species has to be performed at the strain level.

Methods and results

Multilocus sequence typing (MLST) is a tool that (i) potentially allows to discriminate strains isolated from diseased fish from apathogenic strains and (ii) to assess the genetic relatedness between both groups of strains. In this study, we characterized by MLST 21 C. maltaromaticum strains including 16 strains isolated from diseased fish and 5 apathogenic dairy strains isolated from cheese. The resulting population structure was investigated by integrating these new data to the previously published population structure (available at http://pubmlst.org), which represents an overall of 71 strains.

Conclusions

This analysis revealed that none of the strains isolated from diseased fish is assigned to a clonal complex containing cheese isolates, and that 11 strains exhibit singleton genotypes suggesting that the population of diseased fish isolates is not clonal.

Significance and impact of the study

This study thus provides a population structure of C. maltaromaticum that could serve in the future as a reference that could contribute to the risk assessment of C. maltaromaticum strains intended to be used in the food chain.",2018-11-22 +23547943,TRACER: a resource to study the regulatory architecture of the mouse genome.,"

Background

Mammalian genes are regulated through the action of multiple regulatory elements, often distributed across large regions. The mechanisms that control the integration of these diverse inputs into specific gene expression patterns are still poorly understood. New approaches enabling the dissection of these mechanisms in vivo are needed.

Results

Here, we describe TRACER (http://tracerdatabase.embl.de), a resource that centralizes information from a large on-going functional exploration of the mouse genome with different transposon-associated regulatory sensors. Hundreds of insertions have been mapped to specific genomic positions, and their corresponding regulatory potential has been documented by analysis of the expression of the reporter sensor gene in mouse embryos. The data can be easily accessed and provides information on the regulatory activities present in a large number of genomic regions, notably in gene-poor intervals that have been associated with human diseases.

Conclusions

TRACER data enables comparisons with the expression pattern of neighbouring genes, activity of surrounding regulatory elements or with other genomic features, revealing the underlying regulatory architecture of these loci. TRACER mouse lines can also be requested for in vivo transposition and chromosomal engineering, to analyse further regions of interest.",2013-04-02 +24324765,INDIGO - INtegrated data warehouse of microbial genomes with examples from the red sea extremophiles.,"

Background

The next generation sequencing technologies substantially increased the throughput of microbial genome sequencing. To functionally annotate newly sequenced microbial genomes, a variety of experimental and computational methods are used. Integration of information from different sources is a powerful approach to enhance such annotation. Functional analysis of microbial genomes, necessary for downstream experiments, crucially depends on this annotation but it is hampered by the current lack of suitable information integration and exploration systems for microbial genomes.

Results

We developed a data warehouse system (INDIGO) that enables the integration of annotations for exploration and analysis of newly sequenced microbial genomes. INDIGO offers an opportunity to construct complex queries and combine annotations from multiple sources starting from genomic sequence to protein domain, gene ontology and pathway levels. This data warehouse is aimed at being populated with information from genomes of pure cultures and uncultured single cells of Red Sea bacteria and Archaea. Currently, INDIGO contains information from Salinisphaera shabanensis, Haloplasma contractile, and Halorhabdus tiamatea - extremophiles isolated from deep-sea anoxic brine lakes of the Red Sea. We provide examples of utilizing the system to gain new insights into specific aspects on the unique lifestyle and adaptations of these organisms to extreme environments.

Conclusions

We developed a data warehouse system, INDIGO, which enables comprehensive integration of information from various resources to be used for annotation, exploration and analysis of microbial genomes. It will be regularly updated and extended with new genomes. It is aimed to serve as a resource dedicated to the Red Sea microbes. In addition, through INDIGO, we provide our Automatic Annotation of Microbial Genomes (AAMG) pipeline. The INDIGO web server is freely available at http://www.cbrc.kaust.edu.sa/indigo.",2013-12-06 +28825706,chromVAR: inferring transcription-factor-associated accessibility from single-cell epigenomic data.,"Single-cell ATAC-seq (scATAC) yields sparse data that make conventional analysis challenging. We developed chromVAR (http://www.github.com/GreenleafLab/chromVAR), an R package for analyzing sparse chromatin-accessibility data by estimating gain or loss of accessibility within peaks sharing the same motif or annotation while controlling for technical biases. chromVAR enables accurate clustering of scATAC-seq profiles and characterization of known and de novo sequence motifs associated with variation in chromatin accessibility.",2017-08-21 +26717407,Prediction of Intra-Species Protein-Protein Interactions in Enteropathogens Facilitating Systems Biology Study.,"Protein-protein interactions in Escherichia coli (E. coli) has been studied extensively using high throughput methods such as tandem affinity purification followed by mass spectrometry and yeast two-hybrid method. This can in turn be used to understand the mechanisms of bacterial cellular processes. However, experimental characterization of such huge amount of interactions data is not available for other important enteropathogens. Here, we propose a support vector machine (SVM)-based prediction model using the known PPIs data of E. coli that can be used to predict PPIs in other enteropathogens, such as Vibrio cholerae, Salmonella Typhi, Shigella flexneri and Yersinia entrocolitica. Different features such as domain-domain association (DDA), network topology, and sequence information were used in developing the SVM model. The proposed model using DDA, degree and amino acid composition features has achieved an accuracy of 82% and 62% on 5-fold cross validation and blind E. coli datasets, respectively. The predicted interactions were validated by Gene Ontology (GO) semantic similarity measure and String PPIs database (experimental PPIs only). Finally, we have developed a user-friendly webserver named EnPPIpred to predict intra-species PPIs in enteropathogens, which will be of great help for the experimental biologists. The webserver EnPPIpred is freely available at http://bicresources.jcbose.ac.in/ssaha4/EnPPIpred/.",2015-12-30 +28369169,Trainable Weka Segmentation: a machine learning tool for microscopy pixel classification.,"

Summary

State-of-the-art light and electron microscopes are capable of acquiring large image datasets, but quantitatively evaluating the data often involves manually annotating structures of interest. This process is time-consuming and often a major bottleneck in the evaluation pipeline. To overcome this problem, we have introduced the Trainable Weka Segmentation (TWS), a machine learning tool that leverages a limited number of manual annotations in order to train a classifier and segment the remaining data automatically. In addition, TWS can provide unsupervised segmentation learning schemes (clustering) and can be customized to employ user-designed image features or classifiers.

Availability and implementation

TWS is distributed as open-source software as part of the Fiji image processing distribution of ImageJ at http://imagej.net/Trainable_Weka_Segmentation .

Contact

ignacio.arganda@ehu.eus.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +28436919,A machine learning method for fast and accurate characterization of depth-of-interaction gamma cameras.,"Measuring the depth-of-interaction (DOI) of gamma photons enables increasing the resolution of emission imaging systems. Several design variants of DOI-sensitive detectors have been recently introduced to improve the performance of scanners for positron emission tomography (PET). However, the accurate characterization of the response of DOI detectors, necessary to accurately measure the DOI, remains an unsolved problem. Numerical simulations are, at the state of the art, imprecise, while measuring directly the characteristics of DOI detectors experimentally is hindered by the impossibility to impose the depth-of-interaction in an experimental set-up. In this article we introduce a machine learning approach for extracting accurate forward models of gamma imaging devices from simple pencil-beam measurements, using a nonlinear dimensionality reduction technique in combination with a finite mixture model. The method is purely data-driven, not requiring simulations, and is applicable to a wide range of detector types. The proposed method was evaluated both in a simulation study and with data acquired using a monolithic gamma camera designed for PET (the cMiCE detector), demonstrating the accurate recovery of the DOI characteristics. The combination of the proposed calibration technique with maximum- a posteriori estimation of the coordinates of interaction provided a depth resolution of  ≈1.14 mm for the simulated PET detector and  ≈1.74 mm for the cMiCE detector. The software and experimental data are made available at http://occiput.mgh.harvard.edu/depthembedding/.",2017-10-19 +29036419,An accurate algorithm for the detection of DNA fragments from dilution pool sequencing experiments.,"Motivation:The short read lengths of current high-throughput sequencing technologies limit the ability to recover long-range haplotype information. Dilution pool methods for preparing DNA sequencing libraries from high molecular weight DNA fragments enable the recovery of long DNA fragments from short sequence reads. These approaches require computational methods for identifying the DNA fragments using aligned sequence reads and assembling the fragments into long haplotypes. Although a number of computational methods have been developed for haplotype assembly, the problem of identifying DNA fragments from dilution pool sequence data has not received much attention. Results:We formulate the problem of detecting DNA fragments from dilution pool sequencing experiments as a genome segmentation problem and develop an algorithm that uses dynamic programming to optimize a likelihood function derived from a generative model for the sequence reads. This algorithm uses an iterative approach to automatically infer the mean background read depth and the number of fragments in each pool. Using simulated data, we demonstrate that our method, FragmentCut, has 25-30% greater sensitivity compared with an HMM based method for fragment detection and can also detect overlapping fragments. On a whole-genome human fosmid pool dataset, the haplotypes assembled using the fragments identified by FragmentCut had greater N50 length, 16.2% lower switch error rate and 35.8% lower mismatch error rate compared with two existing methods. We further demonstrate the greater accuracy of our method using two additional dilution pool datasets. Availability and implementation:FragmentCut is available from https://bansal-lab.github.io/software/FragmentCut. Contact:vibansal@ucsd.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-01-01 +29681141,A Novel Method for the Development of Environmental Public Health Indicators and Benchmark Dose Estimation Using a Health-Based End Point for Chlorpyrifos.,"

Background

Organophosphorus (OP) compounds are the most widely used group of insecticides in the world. Risk assessments for these chemicals have focused primarily on 10% inhibition of acetylcholinesterase in the brain as the critical metric of effect. Aside from cholinergic effects resulting from acute exposure, many studies suggest a linkage between cognitive deficits and long-term OP exposure.

Objective

In this proof-of-concept study, we focused on one of the most widely used OP insecticides in the world, chlorpyrifos (CPF), and utilized an existing physiologically based pharmacokinetic (PBPK) model and a novel pharmacodynamic (PD) dose-response model to develop a point of departure benchmark dose estimate for cognitive deficits following long-term, low-dose exposure to this chemical in rodents.

Methods

Utilizing a validated PBPK/PD model for CPF, we generated a database of predicted biomarkers of exposure and internal dose metrics in both rat and human. Using simulated peak brain CPF concentrations, we developed a dose-response model to predict CPF-induced spatial memory deficits and correlated these changes to relevant biomarkers of exposure to derive a benchmark dose specific to neurobehavioral changes. We extended these cognitive deficit predictions to humans and simulated corresponding exposures using a model parameterized for humans.

Results

Results from this study indicate that the human-equivalent benchmark dose (BMD) based on a 15% cognitive deficit as an end point is lower than that using the present threshold for 10% brain AChE inhibition. This predicted human-equivalent subchronic BMD threshold compares to occupational exposure levels determined from biomarkers of exposure and corresponds to similar exposure conditions where deficits in cognition are observed.

Conclusions

Quantitative PD models based on neurobehavioral testing in animals offer an important addition to the methodologies used for establishing useful environmental public health indicators and BMDs, and predictions from such models could help inform the human health risk assessment for chlorpyrifos. https://doi.org/10.1289/EHP1743.",2018-04-20 +29678128,SpirPep: an in silico digestion-based platform to assist bioactive peptides discovery from a genome-wide database.,"

Background

Bioactive peptides, including biological sources-derived peptides with different biological activities, are protein fragments that influence the functions or conditions of organisms, in particular humans and animals. Conventional methods of identifying bioactive peptides are time-consuming and costly. To quicken the processes, several bioinformatics tools are recently used to facilitate screening of the potential peptides prior their activity assessment in vitro and/or in vivo. In this study, we developed an efficient computational method, SpirPep, which offers many advantages over the currently available tools.

Results

The SpirPep web application tool is a one-stop analysis and visualization facility to assist bioactive peptide discovery. The tool is equipped with 15 customized enzymes and 1-3 miscleavage options, which allows in silico digestion of protein sequences encoded by protein-coding genes from single, multiple, or genome-wide scaling, and then directly classifies the peptides by bioactivity using an in-house database that contains bioactive peptides collected from 13 public databases. With this tool, the resulting peptides are categorized by each selected enzyme, and shown in a tabular format where the peptide sequences can be tracked back to their original proteins. The developed tool and webpages are coded in PHP and HTML with CSS/JavaScript. Moreover, the tool allows protein-peptide alignment visualization by Generic Genome Browser (GBrowse) to display the region and details of the proteins and peptides within each parameter, while considering digestion design for the desirable bioactivity. SpirPep is efficient; it takes less than 20 min to digest 3000 proteins (751,860 amino acids) with 15 enzymes and three miscleavages for each enzyme, and only a few seconds for single enzyme digestion. Obviously, the tool identified more bioactive peptides than that of the benchmarked tool; an example of validated pentapeptide (FLPIL) from LC-MS/MS was demonstrated. The web and database server are available at http://spirpepapp.sbi.kmutt.ac.th .

Conclusion

SpirPep, a web-based bioactive peptide discovery application, is an in silico-based tool with an overview of the results. The platform is a one-stop analysis and visualization facility; and offers advantages over the currently available tools. This tool may be useful for further bioactivity analysis and the quantitative discovery of desirable peptides.",2018-04-20 +25760677,PGTools: A Software Suite for Proteogenomic Data Analysis and Visualization.,"We describe PGTools, an open source software suite for analysis and visualization of proteogenomic data. PGTools comprises applications, libraries, customized databases, and visualization tools for analysis of mass-spectrometry data using combined proteomic and genomic backgrounds. A single command is sufficient to search databases, calculate false discovery rates, group and annotate proteins, generate peptide databases from RNA-Seq transcripts, identify altered proteins associated with cancer, and visualize genome scale peptide data sets using sophisticated visualization tools. We experimentally confirm a subset of proteogenomic peptides in human PANC-1 cells and demonstrate the utility of PGTools using a colorectal cancer data set that led to the identification of 203 novel protein coding regions missed by conventional proteomic approaches. PGTools should be equally useful for individual proteogenomic investigations as well as international initiatives such as chromosome-centric Human Proteome Project (C-HPP). PGTools is available at http://qcmg.org/bioinformatics/PGTools.",2015-04-17 +31063398,Long-Term Exposure to Ambient Ozone and Progression of Subclinical Arterial Disease: The Multi-Ethnic Study of Atherosclerosis and Air Pollution.,"

Background

Long-term ozone ([Formula: see text]) exposure is associated with cardiovascular mortality, but little is known about the associations between [Formula: see text] and subclinical arterial disease.

Objectives

We studied the longitudinal association of exposure to [Formula: see text] and progression of key subclinical arterial markers in adults: intima-media thickness of common carotid artery ([Formula: see text]), carotid plaque (CP) burden, and coronary artery calcification (CAC).

Methods

CAC was measured one to four times at baseline and at follow-up exams (1999–2012) by computed tomography (CT) in 6,619 healthy adults, recruited at age 45-84 y without cardiovascular disease (CVD), over a mean of 6.5 y (standard deviation: 3.5 y). [Formula: see text] and CP burden were quantified in 3,392 participants using carotid artery ultrasound imaging acquired over a mean of 9 y (1.7 y). Over 91% and 89% participants had at least one follow-up [Formula: see text] and CAC measurement, respectively. Residence-specific [Formula: see text] concentrations were estimated by a validated spatiotemporal model spanning from 1999 to 2012. This model relied on comprehensive monitoring data and geographical variables to predict individualized long-term average concentrations since baseline. Linear mixed models and logistic regression model were used to evaluate relationships of long-term average exposure to [Formula: see text] with longitudinal change in [Formula: see text], CAC, and CP formation, respectively.

Results

Mean progression rates of [Formula: see text] and CAC were [Formula: see text] and [Formula: see text]. CP formation was identified in 55% of the subjects. A [Formula: see text] increase in long-term average [Formula: see text] exposure was associated with a [Formula: see text] [95% confidence interval (CI): 1.4, 9.7] greater increase in [Formula: see text] over 10 y. A [Formula: see text] increase in [Formula: see text] was also associated with new CP formation [odds ratio (OR): 1.2 (95% CI: 1.1, 1.4)] but not CAC progression [[Formula: see text] (95% CI: [Formula: see text], 2)]. Associations were robust in the analysis with extended covariate adjustment, including copollutants, i.e., nitrogen oxides ([Formula: see text]) and particulate matter with diameter [Formula: see text] ([Formula: see text]).

Conclusion

Over almost a decade of follow-up, outdoor [Formula: see text] concentrations were associated with increased rate of carotid wall thickness progression and risk of new plaque formation, suggesting arterial injury in this cohort. https://doi.org/10.1289/EHP3325.",2019-05-01 +30760021,Timely recognition of palliative care needs of patients with advanced chronic heart failure: a pilot study of a Dutch translation of the Needs Assessment Tool: Progressive Disease - Heart Failure (NAT:PD-HF).,"

Background

The Needs Assessment Tool: Progressive Disease - Heart Failure (NAT:PD-HF) was developed to identify and triage palliative care needs in patients with chronic heart failure. A Dutch version is currently lacking.

Aims

The aim of this study was to investigate the feasibility and acceptability of a Dutch NAT:PD-HF in chronic heart failure outpatients; and to gain preliminary data regarding the effect of the NAT:PD-HF on palliative care referral, symptoms, health status, care dependency, caregiver burden and advance directives.

Methods

A mixed methods study including 23 outpatients with advanced chronic heart failure and 20 family caregivers was performed. Nurses conducted patient consultations using a Dutch translation of the NAT:PD-HF and rated acceptability. Before this visit and 4 months later, symptoms, health status, performance status, care dependency, caregiver burden and recorded advance directives were assessed. A focus group with participating nurses discussed barriers and facilitators towards palliative care needs assessment.

Results

Acceptability was rated as 7 (interquartile range 6-7 points) on a 10-point scale. All patients had palliative care needs. In 48% actions were taken, including two patients referred to palliative care. Symptoms, performance status, care dependency, caregiver burden and advance directives were unchanged at 4 months, while health status deteriorated in patients completing follow-up ( n=17). Barriers towards palliative care needs assessment included feeling uncomfortable to initiate discussions and concerns about the ability to address palliative care needs.

Conclusions

The NAT:PD-HF identified palliative care needs in all participants, and triggered action to address these in half. However, training in palliative care communication skills as well as palliative care interventions should accompany the introduction of a palliative care needs assessment tool.

Netherlands national trial register (ntr)

5616. http://www.trialregister.nl/trialreg/admin/rctview.asp?TC=5616.",2019-02-13 +29928570,Bioinformatic screening for key miRNAs and genes associated with myocardial infarction.,"Despite significant advances in understanding of the causes of and treatment of myocardial infarction (MI) in recent years, morbidity and mortality is still high. The aim of this study was to identify miRNA and genes potentially associated with MI. mRNA and miRNA expression datasets were downloaded from the Gene Expression Omnibus database (http://www.ncbi.nlm.nih.gov/geo/). Interactions between miRNA and the expression and function of target genes were analyzed, and a protein-protein interaction network was constructed. The diagnostic value of identified miRNA and genes was assessed. Quantitative RT-PCR was applied to validate the results of the bioinformatics analysis. MiR-27a, miR-31*, miR-1291, miR-139-5p, miR-204, miR-375, and target genes including CX3CR1,HSPA6, and TPM3 had potential diagnostic value. The genes TFEB,IRS2,GRB2,FASLG,LIMS1,CX3CR1,HSPA6,TPM3,LAT2,CEBPD,AQP9, and MAPKAPK2 were associated with recovery from MI. In conclusion, the identified miRNA and genes might be associated with the pathology of MI.",2018-04-19 +29693178,Identifying genes as potential prognostic indicators in patients with serous ovarian cancer resistant to carboplatin using integrated bioinformatics analysis.,"Serous ovarian cancer (SOC) accounts for >50% of all epithelial ovarian cancers. However, patients with SOC present with various degrees of response to platinum‑based chemotherapy and, thus, their survival may differ. The present study aimed to identify the candidate genes involved in the carcinogenesis and drug resistance of SOC by analyzing the microarray datasets GDS1381 and GDS3592. GDS1381 and GDS3592 were downloaded from the Gene Expression Omnibus database (https://www.ncbi.nlm.nih.gov/gds/). A total of 219 differentially expressed genes (DEGs) were identified. Potential genes that may predict the response to carboplatin and, thus, the prognosis of SOC were analyzed. The enriched functions and pathways of DEGs included extracellular region, extracellular space and extracellular exosome, among others. Upon screening the upregulated and downregulated genes on the connectivity map, 10 small‑molecule drugs were identified that may be helpful in improving drug sensitivity in patients with ovarian cancer. A total of 30 hub genes were screened for further analysis after constructing the protein‑to‑protein interaction network. Through survival analysis, comparison of genes across numerous analyses, and immunohistochemistry, GNAI1, non‑structural maintenance of chromosomes (non‑SMC) condensin I complex subunit H (NCAPH), matrix metallopeptidase 9 (MMP9), aurora kinase A (AURKA) and enhancer of zeste 2 polycomb repressive complex 2 subunit (EZH2) were identified as the key molecules that may be involved in the carcinogenesis and carboplatin resistance of SOC. In conclusion, GNAI1, NCAPH, MMP9, AURKA and EZH2 should be examined in further studies for the possibility of their participation in the carcinogenesis and carboplatin response of SOC.",2018-04-19 +29575374,Gene therapy clinical trials worldwide to 2017: An update.,"To date, almost 2600 gene therapy clinical trials have been completed, are ongoing or have been approved worldwide. Our database brings together global information on gene therapy clinical activity from trial databases, official agency sources, published literature, conference presentations and posters kindly provided to us by individual investigators or trial sponsors. This review presents our analysis of clinical trials that, to the best of our knowledge, have been or are being performed worldwide. As of our November 2017 update, we have entries on 2597 trials undertaken in 38 countries. We have analysed the geographical distribution of trials, the disease indications (or other reasons) for trials, the proportions to which different vector types are used, and the genes that have been transferred. Details of the analyses presented, and our searchable database are available via The Journal of Gene Medicine Gene Therapy Clinical Trials Worldwide website at: http://www.wiley.co.uk/genmed/clinical. We also provide an overview of the progress being made in gene therapy clinical trials around the world, and discuss key trends since the previous review, namely the use of chimeric antigen receptor T cells for the treatment of cancer and advancements in genome editing technologies, which have the potential to transform the field moving forward.",2018-04-19 +28518075,Leveraging CyVerse Resources for De Novo Comparative Transcriptomics of Underserved (Non-model) Organisms. ,"This workflow allows novice researchers to leverage advanced computational resources such as cloud computing to carry out pairwise comparative transcriptomics. It also serves as a primer for biologists to develop data scientist computational skills, e.g. executing bash commands, visualization and management of large data sets. All command line code and further explanations of each command or step can be found on the wiki (https://wiki.cyverse.org/wiki/x/dgGtAQ). The Discovery Environment and Atmosphere platforms are connected together through the CyVerse Data Store. As such, once the initial raw sequencing data has been uploaded there is no more need to transfer large data files over an Internet connection, minimizing the amount of time needed to conduct analyses. This protocol is designed to analyze only two experimental treatments or conditions. Differential gene expression analysis is conducted through pairwise comparisons, and will not be suitable to test multiple factors. This workflow is also designed to be manual rather than automated. Each step must be executed and investigated by the user, yielding a better understanding of data and analytical outputs, and therefore better results for the user. Once complete, this protocol will yield de novo assembled transcriptome(s) for underserved (non-model) organisms without the need to map to previously assembled reference genomes (which are usually not available in underserved organism). These de novo transcriptomes are further used in pairwise differential gene expression analysis to investigate genes differing between two experimental conditions. Differentially expressed genes are then functionally annotated to understand the genetic response organisms have to experimental conditions. In total, the data derived from this protocol is used to test hypotheses about biological responses of underserved organisms.",2017-05-09 +,BARLEYMAP: physical and genetic mapping of nucleotide sequences and annotation of surrounding loci in barley,"The BARLEYMAP pipeline was designed to map both genomic sequences and transcripts against sequence-enriched genetic/physical frameworks, with plant breeders as the main target users. It reports the most probable genomic locations of queries after merging results from different resources so that diversity obtained from re-sequencing experiments can be exploited. In addition, the application lists surrounding annotated genes and markers, facilitating downstream analyses. Pre-computed marker datasets can also be created and browsed to facilitate searches and cross referencing. Performance is evaluated by mapping two sets of long transcripts and by locating the physical and genetic positions of four marker collections widely used for high-throughput genotyping of barley cultivars. In addition, genome positions retrieved by BARLEYMAP are compared to positions within a conventional genetic map for a population of recombinant inbred lines, yielding a gene-order accuracy of 96 %. These results reveal advantages and drawbacks of current in silico approaches for barley genomics. A web application to make use of barley data is available at http://floresta.eead.csic.es/barleymap . The pipeline can be set up for any species with similar sequence resources, for which a fully functional standalone version is available for download.",2015-01-01 +28334231,ProtVista: visualization of protein sequence annotations.,"

Summary

ProtVista is a comprehensive visualization tool for the graphical representation of protein sequence features in the UniProt Knowledgebase, experimental proteomics and variation public datasets. The complexity and relationships in this wealth of data pose a challenge in interpretation. Integrative visualization approaches such as provided by ProtVista are thus essential for researchers to understand the data and, for instance, discover patterns affecting function and disease associations.

Availability and implementation

ProtVista is a JavaScript component released as an open source project under the Apache 2 License. Documentation and source code are available at http://ebi-uniprot.github.io/ProtVista/ .

Contact

martin@ebi.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +22643034,3-D QSAutogrid/R: an alternative procedure to build 3-D QSAR models. Methodologies and applications.,"Since it first appeared in 1988 3-D QSAR has proved its potential in the field of drug design and activity prediction. Although thousands of citations now exist in 3-D QSAR, its development was rather slow with the majority of new 3-D QSAR applications just extensions of CoMFA. An alternative way to build 3-D QSAR models, based on an evolution of software, has been named 3-D QSAutogrid/R and has been developed to use only software freely available to academics. 3-D QSAutogrid/R covers all the main features of CoMFA and GRID/GOLPE with implementation by multiprobe/multiregion variable selection (MPGRS) that improves the simplification of interpretation of the 3-D QSAR map. The methodology is based on the integration of the molecular interaction fields as calculated by AutoGrid and the R statistical environment that can be easily coupled with many free graphical molecular interfaces such as UCSF-Chimera, AutoDock Tools, JMol, and others. The description of each R package is reported in detail, and, to assess its validity, 3-D QSAutogrid/R has been applied to three molecular data sets of which either CoMFA or GRID/GOLPE models were reported in order to compare the results. 3-D QSAutogrid/R has been used as the core engine to prepare more that 240 3-D QSAR models forming the very first 3-D QSAR server ( www.3d-qsar.com ) with its code freely available through R-Cran distribution.",2012-06-13 +30610108,"Cortactin Phosphorylation by Casein Kinase 2 Regulates Actin-Related Protein 2/3 Complex Activity, Invadopodia Function, and Tumor Cell Invasion.","Malregulation of the actin cytoskeleton enhances tumor cell motility and invasion. The actin-binding protein cortactin facilitates branched actin network formation through activation of the actin-related protein (Arp) 2/3 complex. Increased cortactin expression due to gene amplification is observed in head and neck squamous cell carcinoma (HNSCC) and other cancers, corresponding with elevated tumor progression and poor patient outcome. Arp2/3 complex activation is responsible for driving increased migration and extracellular matrix (ECM) degradation by governing invadopodia formation and activity. Although cortactin-mediated activation of Arp2/3 complex and invadopodia regulation has been well established, signaling pathways responsible for governing cortactin binding to Arp2/3 are unknown and potentially present a new avenue for anti-invasive therapeutic targeting. Here we identify casein kinase (CK) 2α phosphorylation of cortactin as a negative regulator of Arp2/3 binding. CK2α directly phosphorylates cortactin at a conserved threonine (T24) adjacent to the canonical Arp2/3 binding motif. Phosphorylation of cortactin T24 by CK2α impairs the ability of cortactin to bind Arp2/3 and activate actin nucleation. Decreased invadopodia activity is observed in HNSCC cells with expression of CK2α phosphorylation-null cortactin mutants, shRNA-mediated CK2α knockdown, and with the CK2α inhibitor Silmitasertib. Silmitasertib inhibits HNSCC collective invasion in tumor spheroids and orthotopic tongue tumors in mice. Collectively these data suggest that CK2α-mediated cortactin phosphorylation at T24 is critical in regulating cortactin binding to Arp2/3 complex and pro-invasive activity, identifying a potential targetable mechanism for impairing HNSCC invasion. IMPLICATIONS: This study identifies a new signaling pathway that contributes to enhancing cancer cell invasion.Visual Overview: http://mcr.aacrjournals.org/content/molcanres/17/4/987/F1.large.jpg.",2019-01-04 +27153620,"MOCAT2: a metagenomic assembly, annotation and profiling framework.","

Unlabelled

MOCAT2 is a software pipeline for metagenomic sequence assembly and gene prediction with novel features for taxonomic and functional abundance profiling. The automated generation and efficient annotation of non-redundant reference catalogs by propagating pre-computed assignments from 18 databases covering various functional categories allows for fast and comprehensive functional characterization of metagenomes.

Availability and implementation

MOCAT2 is implemented in Perl 5 and Python 2.7, designed for 64-bit UNIX systems and offers support for high-performance computer usage via LSF, PBS or SGE queuing systems; source code is freely available under the GPL3 license at http://mocat.embl.de

Contact

: bork@embl.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-08 +30021827,The Tail Wagging the Dog (or the Challenges Faced When the Financing of Medicine Gets Ahead of the Science of Medicine). ,"In their article in this issue of the Journal of Clinical Microbiology, S. R. Dominguez et al. (J Clin Microbiol 56:e00632-18, 2018, https://doi.org/10.1128/JCM.00632-18) describe the performance of PCR detection of herpes simplex virus (HSV) DNA versus viral culture in skin and mucosal samples from 7 neonates with HSV disease. This is a significant contribution to our understanding of the optimal diagnostic approach in babies being evaluated for neonatal HSV disease. Many diagnostic laboratories already have made the change to molecular diagnostics for skin and mucosal swab testing, however, in large part due to the labor costs associated with viral cultures. Thus, important studies such as this one are being conducted to support a decision that has already been made in many locations on mostly economic grounds. This small case series supports the decision to use molecular testing for samples from skin and mucosal sites, but larger studies are needed to more fully define the performance characteristics of PCR in this population. Since a false-positive result would commit a baby to months of management that would be unnecessary and have potential harm, it is critical to base diagnostic decision making on data that support the use of a specific test.",2018-09-25 +29709380,Calorie intake and short-term survival of critically ill patients.,"BACKGROUND & AIMS:The association between calorie supply and outcome of critically ill patients is unclear. Results from observational studies contradict findings of randomized studies, and have been questioned because of unrecognized confounding by indication. The present study wanted to re-examine the associations between the daily amount of calorie intake and short-term survival of critically ill patients using several novel statistical approaches. METHODS:9661 critically ill patients from 451 ICUs were extracted from an international database. We examined associations between survival time and three pragmatic nutritional categories (I: <30% of target, II: 30-70%, III: >70%) reflecting different amounts of total daily calorie intake. We compared hazard ratios for the 30-day risk of dying estimated for different hypothetical nutrition support plans (different categories of daily calorie intake during the first 11 days after ICU admission). To minimize indication bias, we used a lag time between nutrition and outcome, we particularly considered daily amounts of calorie intake, and we adjusted results to the route of calorie supply (enteral, parenteral, oral). RESULTS:1974 patients (20.4%) died in hospital before day 30. Median of daily artificial calorie intake was 1.0 kcal/kg [IQR 0.0-4.1] in category I, 12.3 kcal/kg [9.4-15.4] in category II, and 23.5 kcal/kg [19.5-27.8] in category III. When compared to a plan providing daily minimal amounts of calories (category I), the adjusted minimal hazard ratios for a delayed (from day 5-11) or an early (from day 1-11) mildly hypocaloric nutrition (category II) were 0.71 (95% confidence interval [CI], 0.54 to 0.94) and 0.56 (95% CI, 0.38 to 0.82), respectively. No substantial hazard change could be detected, when a delayed or an early, near target calorie intake (category III) was compared to an early, mildly hypocaloric nutrition. CONCLUSIONS:Compared to a severely hypocaloric nutrition, a mildly hypocaloric nutrition is associated with a decreased risk of death. In unselected critically ill patients, this risk cannot be reduced further by providing amounts of calories close to the calculated target. STUDY REGISTRATION:ID number ISRCTN17829198, website http://www.isrctn.org.",2018-04-18 +29329361,pyHVis3D: visualising molecular simulation deduced H-bond networks in 3D: application to T-cell receptor interactions.,"

Motivation

Hydrogen bonds (H-bonds) play an essential role for many molecular interactions but are also often transient, making visualising them in a flexible system challenging.

Results

We provide pyHVis3D which allows for an easy to interpret 3D visualisation of H-bonds resulting from molecular simulations. We demonstrate the power of pyHVis3D by using it to explain the changes in experimentally measured binding affinities for three T-cell receptor/peptide/MHC complexes and mutants of each of these complexes.

Availability and implementation

pyHVis3D can be downloaded for free from http://opig.stats.ox.ac.uk/resources.

Contact

science.bernhard.knapp@gmail.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-06-01 +21388573,Mining semantic networks of bioinformatics e-resources from the literature.,"

Background

There have been a number of recent efforts (e.g. BioCatalogue, BioMoby) to systematically catalogue bioinformatics tools, services and datasets. These efforts rely on manual curation, making it difficult to cope with the huge influx of various electronic resources that have been provided by the bioinformatics community. We present a text mining approach that utilises the literature to automatically extract descriptions and semantically profile bioinformatics resources to make them available for resource discovery and exploration through semantic networks that contain related resources.

Results

The method identifies the mentions of resources in the literature and assigns a set of co-occurring terminological entities (descriptors) to represent them. We have processed 2,691 full-text bioinformatics articles and extracted profiles of 12,452 resources containing associated descriptors with binary and tf*idf weights. Since such representations are typically sparse (on average 13.77 features per resource), we used lexical kernel metrics to identify semantically related resources via descriptor smoothing. Resources are then clustered or linked into semantic networks, providing the users (bioinformaticians, curators and service/tool crawlers) with a possibility to explore algorithms, tools, services and datasets based on their relatedness. Manual exploration of links between a set of 18 well-known bioinformatics resources suggests that the method was able to identify and group semantically related entities.

Conclusions

The results have shown that the method can reconstruct interesting functional links between resources (e.g. linking data types and algorithms), in particular when tf*idf-like weights are used for profiling. This demonstrates the potential of combining literature mining and simple lexical kernel methods to model relatedness between resource descriptors in particular when there are few features, thus potentially improving the resource description, discovery and exploration process. The resource profiles are available at http://gnode1.mib.man.ac.uk/bioinf/semnets.html.",2011-03-07 +29452363,StructureMapper: a high-throughput algorithm for analyzing protein sequence locations in structural data.,"Motivation:StructureMapper is a high-throughput algorithm for automated mapping of protein primary amino sequence locations to existing three-dimensional protein structures. The algorithm is intended for facilitating easy and efficient utilization of structural information in protein characterization and proteomics. StructureMapper provides an analysis of the identified structural locations that includes surface accessibility, flexibility, protein-protein interfacing, intrinsic disorder prediction, secondary structure assignment, biological assembly information and sequence identity percentages, among other metrics. Results:We have showcased the use of the algorithm by estimating the coverage of structural information of the human proteome, identifying critical interface residues in DNA polymerase γ, profiling structurally protease cleavage sites and post-translational modification sites, and by identifying putative, novel phosphoswitches. Availability and implementation:The StructureMapper algorithm is available as an online service and standalone implementation at http://structuremapper.uta.fi. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-07-01 +24997126,piRNAQuest: searching the piRNAome for silencers.,"

Background

PIWI-interacting RNA (piRNA) is a novel and emerging class of small non-coding RNA (sncRNA). Ranging in length from 26-32 nucleotides, this sncRNA is a potent player in guiding the vital regulatory processes within a cellular system. Inspite of having such a wide role within cellular systems, piRNAs are not well organized and classified, so that a researcher can pool out the biologically relevant information concerning this class.

Description

Here we present piRNAQuest- a unified and comprehensive database of 41749 human, 890078 mouse and 66758 rat piRNAs obtained from NCBI and different small RNA sequence experiments. This database provides piRNA annotation based on their localization in gene, intron, intergenic, CDS, 5/UTR, 3/UTR and repetitive regions which has not been done so far. We have also annotated piRNA clusters and have elucidated characteristic motifs within them. We have looked for the presence of piRNAs and piRNA clusters in pseudogenes, which are known to regulate the expression of protein coding transcripts by generating small RNAs. All these will help researchers progress towards solving the unanswered queries on piRNA biogenesis and their mode of action. Further, expression profile for piRNA in different tissues and from different developmental stages has been provided. In addition, we have provided several tools like 'homology search', 'dynamic cluster search' and 'pattern search'. Overall, piRNAQuest will serve as a useful resource for exploring human, mouse and rat piRNAome. The database is freely accessible and available at http://bicresources.jcbose.ac.in/zhumur/pirnaquest/.

Conclusion

piRNAs play a remarkable role in stem cell self-renewal and various vital processes of developmental biology. Although researchers are mining different features on piRNAs, the exact regulatory mechanism is still fuzzy. Thus, understanding the true potential of these small regulatory molecules with respect to their origin, localization and mode of biogenesis is crucial. piRNAQuest will provide us with a better insight on piRNA origin and function which will help to explore the true potential of these sncRNAs.",2014-07-04 +30822387,"Evidence for Urban-Rural Disparity in Temperature-Mortality Relationships in Zhejiang Province, China.","BACKGROUND:Temperature-related mortality risks have mostly been studied in urban areas, with limited evidence for urban-rural differences in the temperature impacts on health outcomes. OBJECTIVES:We investigated whether temperature-mortality relationships vary between urban and rural counties in China. METHODS:We collected daily data on 1 km gridded temperature and mortality in 89 counties of Zhejiang Province, China, for 2009 and 2015. We first performed a two-stage analysis to estimate the temperature effects on mortality in urban and rural counties. Second, we performed meta-regression to investigate the modifying effect of the urbanization level. Stratified analyses were performed by all-cause, nonaccidental (stratified by age and sex), cardiopulmonary, cardiovascular, and respiratory mortality. We also calculated the fraction of mortality and number of deaths attributable to nonoptimum temperatures associated with both cold and heat components. The potential sources of the urban-rural differences were explored using meta-regression with county-level characteristics. RESULTS:Increased mortality risks were associated with low and high temperatures in both rural and urban areas, but rural counties had higher relative risks (RRs), attributable fractions of mortality, and attributable death counts than urban counties. The urban-rural disparity was apparent for cold (first percentile relative to minimum mortality temperature), with an RR of 1.47 [95% confidence interval (CI): 1.32, 1.62] associated with all-cause mortality for urban counties, and 1.98 (95% CI: 1.87, 2.10) for rural counties. Among the potential sources of the urban-rural disparity are age structure, education, GDP, health care services, air conditioners, and occupation types. CONCLUSIONS:Rural residents are more sensitive to both cold and hot temperatures than urban residents in Zhejiang Province, China, particularly the elderly. The findings suggest past studies using exposure-response functions derived from urban areas may underestimate the mortality burden for the population as a whole. The public health agencies aimed at controlling temperature-related mortality should develop area-specific strategies, such as to reduce the urban-rural gaps in access to health care and awareness of risk prevention. Future projections on climate health impacts should consider the urban-rural disparity in mortality risks. https://doi.org/10.1289/EHP3556.",2019-03-01 +27261593,Quantitative methods in electroencephalography to access therapeutic response.,"Pharmacometrics or Quantitative Pharmacology aims to quantitatively analyze the interaction between drugs and patients whose tripod: pharmacokinetics, pharmacodynamics and disease monitoring to identify variability in drug response. Being the subject of central interest in the training of pharmacists, this work was out with a view to promoting this idea on methods to access the therapeutic response of drugs with central action. This paper discusses quantitative methods (Fast Fourier Transform, Magnitude Square Coherence, Conditional Entropy, Generalised Linear semi-canonical Correlation Analysis, Statistical Parametric Network and Mutual Information Function) used to evaluate the EEG signals obtained after administration regimen of drugs, the main findings and their clinical relevance, pointing it as a contribution to construction of different pharmaceutical practice. Peter Anderer et. al in 2000 showed the effect of 20mg of buspirone in 20 healthy subjects after 1, 2, 4, 6 and 8h after oral ingestion of the drug. The areas of increased power of the theta frequency occurred mainly in the temporo-occipital - parietal region. It has been shown by Sampaio et al., 2007 that the use of bromazepam, which allows the release of GABA (gamma amino butyric acid), an inhibitory neurotransmitter of the central nervous system could theoretically promote dissociation of cortical functional areas, a decrease of functional connectivity, a decrease of cognitive functions by means of smaller coherence (electrophysiological magnitude measured from the EEG by software) values. Ahmad Khodayari-Rostamabad et al. in 2015 talk that such a measure could be a useful clinical tool potentially to assess adverse effects of opioids and hence give rise to treatment guidelines. There was the relation between changes in pain intensity and brain sources (at maximum activity locations) during remifentanil infusion despite its potent analgesic effect. The statement of mathematical and computational aspects in the use of clinical data is frequent and elucidation of these aspects we use PhysioNet https://www.physionet.org/, Clinical Database online supported by the National Institutes of Health (National Institutes of Health of United States of America/NIH-USA) for the acquisition of EEG data and the Matlab program to do the simulations with the methods and thus create opportunities greater understanding.",2016-04-16 +28582591,"A tool for integrating genetic and mass spectrometry-based peptide data: Proteogenomics Viewer: PV: A genome browser-like tool, which includes MS data visualization and peptide identification parameters. ","In this manuscript we describe Proteogenomics Viewer, a web-based tool that collects MS peptide identification, indexes to genomic sequence and structure, assigns exon usage, reports the identified protein isoforms with genomic alignments and, most importantly, allows the inspection of MS2 information for proper peptide identification. It also provides all performed indexing to facilitate global analysis of the data. The relevance of such tool is that there has been an increase in the number of proteogenomic efforts to improve the annotation of both genomics and proteomics data, culminating with the release of the two human proteome drafts. It is now clear that mass spectrometry-based peptide identification of uncharacterized sequences, such as those resulting from unpredicted exon joints or non-coding regions, is still prone to a higher than expected false discovery rate. Therefore, proper visualization of the raw data and the corresponding genome alignments are fundamental for further data validation and interpretation. Also see the video abstract here: http://youtu.be/5NzyRvuk4Ac.",2017-06-05 +22232598,Channelpedia: an integrative and interactive database for ion channels.,"Ion channels are membrane proteins that selectively conduct ions across the cell membrane. The flux of ions through ion channels drives electrical and biochemical processes in cells and plays a critical role in shaping the electrical properties of neurons. During the past three decades, extensive research has been carried out to characterize the molecular, structural, and biophysical properties of ion channels. This research has begun to elucidate the role of ion channels in neuronal function and has subsequently led to the development of computational models of ion channel function. Although there have been substantial efforts to consolidate these findings into easily accessible and coherent online resources, a single comprehensive resource is still lacking. The success of these initiatives has been hindered by the sheer diversity of approaches and the variety in data formats. Here, we present ""Channelpedia"" (http://channelpedia.net), which is designed to store information related to ion channels and models and is characterized by an efficient information management framework. Composed of a combination of a database and a wiki-like discussion platform Channelpedia allows researchers to collaborate and synthesize ion channel information from literature. Equipped to automatically update references, Channelpedia integrates and highlights recent publications with relevant information in the database. It is web based, freely accessible and currently contains 187 annotated ion channels with 45 Hodgkin-Huxley models.",2011-12-30 +28582485,COMPASS: the COMPletely Arbitrary Sequence Simulator.,"

Summary

Simulated sequence alignments are frequently used to test bioinformatics tools, but current sequence simulators are limited to defined state spaces. Here, we present the COMPletely Arbitrary Sequence Simulator (COMPASS), which is able to simulate the evolution of absolutely any discrete state space along a tree, for any form of time-reversible model.

Availability and implementation

COMPASS is implemented in Python 2.7, and is freely available for all platforms with the Supplementary Information, as well as at http://labs.carleton.ca/eme/software-and-data.

Contact

alex_wong@carleton.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +29662024,"PKIDB: A Curated, Annotated and Updated Database of Protein Kinase Inhibitors in Clinical Trials. ","The number of protein kinase inhibitors (PKIs) approved worldwide continues to grow steadily, with 39 drugs approved in the period between 2001 and January 2018. PKIs on the market have been the subject of many reviews, and structure-property relationships specific to this class of drugs have been inferred. However, the large number of PKIs under development is often overlooked. In this paper, we present PKIDB (Protein Kinase Inhibitor Database), a monthly-updated database gathering approved PKIs as well as PKIs currently in clinical trials. The database compiles currently 180 inhibitors ranging from phase 0 to 4 clinical trials along with annotations extracted from seven public resources. The distribution and property ranges of standard physicochemical properties are presented. They can be used as filters to better prioritize compound selection for future screening campaigns. Interestingly, more than one-third of the kinase inhibitors violate at least one Lipinski's rule. A Principal Component Analysis (PCA) reveals that Type-II inhibitors are mapped to a distinct chemical space as compared to orally administrated drugs as well as to other types of kinase inhibitors. Using a Principal Moment of Inertia (PMI) analysis, we show that PKIs under development tend to explore new shape territories as compared to approved PKIs. In order to facilitate the analysis of the protein space, the kinome tree has been annotated with all protein kinases being targeted by PKIs. Finally, we analyzed the pipeline of the pharmaceutical companies having PKIs on the market or still under development. We hope that this work will assist researchers in the kinase field in identifying and designing the next generation of kinase inhibitors for still untargeted kinases. The PKIDB database is freely accessible from a website at http://www.icoa.fr/pkidb and can be easily browsed through a user-friendly spreadsheet-like interface.",2018-04-15 +27242033,gEVE: a genome-based endogenous viral element database provides comprehensive viral protein-coding sequences in mammalian genomes. ,"In mammals, approximately 10% of genome sequences correspond to endogenous viral elements (EVEs), which are derived from ancient viral infections of germ cells. Although most EVEs have been inactivated, some open reading frames (ORFs) of EVEs obtained functions in the hosts. However, EVE ORFs usually remain unannotated in the genomes, and no databases are available for EVE ORFs. To investigate the function and evolution of EVEs in mammalian genomes, we developed EVE ORF databases for 20 genomes of 19 mammalian species. A total of 736,771 non-overlapping EVE ORFs were identified and archived in a database named gEVE (http://geve.med.u-tokai.ac.jp). The gEVE database provides nucleotide and amino acid sequences, genomic loci and functional annotations of EVE ORFs for all 20 genomes. In analyzing RNA-seq data with the gEVE database, we successfully identified the expressed EVE genes, suggesting that the gEVE database facilitates studies of the genomic analyses of various mammalian species.Database URL: http://geve.med.u-tokai.ac.jp.",2016-05-30 +29541659,Metagenomic data of DNA viruses of poultry affected with respiratory tract infection.,"The incidence and severity of respiratory diseases in commercial broiler chicken flocks have increased recently in India because of intensification of the broiler industry. Viral population are predominant in respiratory tract infections and they pose continuous economic burden to poultry industry by causing severe economic losses through decreased productivity [1], [2]. To understand viral metagenome of poultry associated with respiratory infections, we performed DNA virome sequencing and data analysis of broilers from 8 districts of Gujarat State in India. We report high quality sequencing reads and highly abundant DNA viral population present in the infected broiler birds. The raw sequencing data used to perform metagenomic analysis is available in the Sequence Read Archive (SRA) under the BioProject No. PRJNA322592 and Accession No. MAUZ00000000, MAVA00000000, MAVB00000000, MAVC00000000, MAVD00000000, MAVE00000000, MAVF00000000, MAVG00000000 (https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA322592).",2017-11-13 +30719239,TEM ExosomeAnalyzer: a computer-assisted software tool for quantitative evaluation of extracellular vesicles in transmission electron microscopy images.,"Extracellular vesicles (EVs) function as important conveyers of information between cells and thus can be exploited as drug delivery systems or disease biomarkers. Transmission electron microscopy (TEM) remains the gold standard method for visualisation of EVs, however the analysis of individual EVs in TEM images is time-consuming if performed manually. Therefore, we present here a software tool for computer-assisted evaluation of EVs in TEM images. TEM ExosomeAnalyzer detects EVs based on their shape and edge contrast criteria and subsequently analyses their size and roundness. The software tool is compatible with common negative staining protocols and isolation methods used in the field of EV research; even with challenging TEM images (EVs both lighter and darker than the background, images containing artefacts or precipitated stain, etc.). If the fully-automatic analysis fails to produce correct results, users can promptly adjust the detected seeds of EVs as well as their boundaries manually. The performance of our tool was evaluated for three different modes with variable levels of human interaction, using two datasets with various heterogeneity. The semi-automatic mode analyses EVs with high success rate in the homogenous dataset (F1 score 0.9094, Jaccard coefficient 0.8218) as well as in the highly heterogeneous dataset containing EVs isolated from cell culture medium and patient samples (F1 score 0.7619, Jaccard coefficient 0.7553). Moreover, the extracted size distribution profiles of EVs isolated from malignant ascites of ovarian cancer patients overlap with those derived by cryo-EM and are comparable to NTA- and TRPS-derived data. In summary, TEM ExosomeAnalyzer is an easy-to-use software tool for evaluation of many types of vesicular microparticles and is available at http://cbia.fi.muni.cz/exosome-analyzer free of charge for non-commercial and research purposes. The web page contains also detailed description how to use the software tool including a video tutorial.",2019-01-21 +25964298,RNA-Redesign: a web server for fixed-backbone 3D design of RNA.,"RNA is rising in importance as a design medium for interrogating fundamental biology and for developing therapeutic and bioengineering applications. While there are several online servers for design of RNA secondary structure, there are no tools available for the rational design of 3D RNA structure. Here we present RNA-Redesign (http://rnaredesign.stanford.edu), an online 3D design tool for RNA. This resource utilizes fixed-backbone design to optimize the sequence identity and nucleobase conformations of an RNA to match a desired backbone, analogous to fundamental tools that underlie rational protein engineering. The resulting sequences suggest thermostabilizing mutations that can be experimentally verified. Further, sequence preferences that differ between natural and computationally designed sequences can suggest whether natural sequences possess functional constraints besides folding stability, such as cofactor binding or conformational switching. Finally, for biochemical studies, the designed sequences can suggest experimental tests of 3D models, including concomitant mutation of base triples. In addition to the designs generated, detailed graphical analysis is presented through an integrated and user-friendly environment.",2015-05-11 +26555596,Continuous Distributed Representation of Biological Sequences for Deep Proteomics and Genomics.,"We introduce a new representation and feature extraction method for biological sequences. Named bio-vectors (BioVec) to refer to biological sequences in general with protein-vectors (ProtVec) for proteins (amino-acid sequences) and gene-vectors (GeneVec) for gene sequences, this representation can be widely used in applications of deep learning in proteomics and genomics. In the present paper, we focus on protein-vectors that can be utilized in a wide array of bioinformatics investigations such as family classification, protein visualization, structure prediction, disordered protein identification, and protein-protein interaction prediction. In this method, we adopt artificial neural network approaches and represent a protein sequence with a single dense n-dimensional vector. To evaluate this method, we apply it in classification of 324,018 protein sequences obtained from Swiss-Prot belonging to 7,027 protein families, where an average family classification accuracy of 93%±0.06% is obtained, outperforming existing family classification methods. In addition, we use ProtVec representation to predict disordered proteins from structured proteins. Two databases of disordered sequences are used: the DisProt database as well as a database featuring the disordered regions of nucleoporins rich with phenylalanine-glycine repeats (FG-Nups). Using support vector machine classifiers, FG-Nup sequences are distinguished from structured protein sequences found in Protein Data Bank (PDB) with a 99.8% accuracy, and unstructured DisProt sequences are differentiated from structured DisProt sequences with 100.0% accuracy. These results indicate that by only providing sequence data for various proteins into this model, accurate information about protein structure can be determined. Importantly, this model needs to be trained only once and can then be applied to extract a comprehensive set of information regarding proteins of interest. Moreover, this representation can be considered as pre-training for various applications of deep learning in bioinformatics. The related data is available at Life Language Processing Website: http://llp.berkeley.edu and Harvard Dataverse: http://dx.doi.org/10.7910/DVN/JMFHTN.",2015-11-10 +29665371,"dbSWEET: An Integrated Resource for SWEET Superfamily to Understand, Analyze and Predict the Function of Sugar Transporters in Prokaryotes and Eukaryotes.","SWEET (Sweet Will Eventually be Exported Transporter) proteins have been recently discovered and form one of the three major families of sugar transporters. Homologs of SWEET are found in both prokaryotes and eukaryotes. Bacterial SWEET homologs have three transmembrane segments forming a triple-helical bundle and the functional form is dimers. Eukaryotic SWEETs have seven transmembrane helical segments forming two triple-helical bundles with a linker helix. Members of SWEET homologs have been shown to be involved in several important physiological processes in plants. However, not much is known regarding the biological significance of SWEET homologs in prokaryotes and in mammals. We have collected more than 2000 SWEET homologs from both prokaryotes and eukaryotes. For each homolog, we have modeled three different conformational states representing outward open, inward open and occluded states. We have provided details regarding substrate-interacting residues and residues forming the selectivity filter for each SWEET homolog. Several search and analysis options are available. The users can generate a phylogenetic tree and structure-based sequence alignment for selected set of sequences. With no metazoan SWEETs functionally characterized, the features observed in the selectivity filter residues can be used to predict the potential substrates that are likely to be transported across the metazoan SWEETs. We believe that this database will help the researchers to design mutational experiments and simulation studies that will aid to advance our understanding of the physiological role of SWEET homologs. This database is freely available to the scientific community at http://bioinfo.iitk.ac.in/bioinfo/dbSWEET/Home.",2018-04-14 +25559128,MUFOLD-DB: a processed protein structure database for protein structure prediction and analysis.,"

Background

Protein structure data in Protein Data Bank (PDB) are widely used in studies of protein function and evolution and in protein structure prediction. However, there are two main barriers in large-scale usage of PDB data: 1) PDB data are highly redundant in terms of sequence and structure similarity; and 2) many PDB files have issues due to inconsistency of data and standards as well as missing residues, so that automated retrieval and analysis are often difficult.

Description

To address these issues, we have created MUFOLD-DB http://mufold.org/mufolddb.php, a web-based database, to collect and process the weekly PDB files thereby providing users with non-redundant, cleaned and partially-predicted structure data. For each of the non-redundant sequences, we annotate the SCOP domain classification and predict structures of missing regions by loop modelling. In addition, evolutional information, secondary structure, disorder region, and processed three-dimensional structure are computed and visualized to help users better understand the protein.

Conclusions

MUFOLD-DB integrates processed PDB sequence and structure data and multiple computational results, provides a friendly interface for users to retrieve, browse and download these data, and offers several useful functionalities to facilitate users' data operation.",2014-12-16 +29165669,ClinVar: improving access to variant interpretations and supporting evidence.,"ClinVar (https://www.ncbi.nlm.nih.gov/clinvar/) is a freely available, public archive of human genetic variants and interpretations of their significance to disease, maintained at the National Institutes of Health. Interpretations of the clinical significance of variants are submitted by clinical testing laboratories, research laboratories, expert panels and other groups. ClinVar aggregates data by variant-disease pairs, and by variant (or set of variants). Data aggregated by variant are accessible on the website, in an improved set of variant call format files and as a new comprehensive XML report. ClinVar recently started accepting submissions that are focused primarily on providing phenotypic information for individuals who have had genetic testing. Submissions may come from clinical providers providing their own interpretation of the variant ('provider interpretation') or from groups such as patient registries that primarily provide phenotypic information from patients ('phenotyping only'). ClinVar continues to make improvements to its search and retrieval functions. Several new fields are now indexed for more precise searching, and filters allow the user to narrow down a large set of search results.",2018-01-01 +30154730,Abnormal Spontaneous Brain Activity in Early Parkinson's Disease With Mild Cognitive Impairment: A Resting-State fMRI Study.,"Mild cognitive impairment (MCI) is a common symptom at the baseline of early Parkinson's disease (PD) diagnosis, but the neural mechanism is unclear. To address the issue, the present study employed resting-state functional magnetic resonance imaging data of 19 drug-naïve PD patients with normal cognition (PD-NC), 10 PD patients with MCI (PD-MCI) and 13 age- and gender-matched healthy controls (HC) from the Parkinson's progression markers initiative (PPMI) (http://www.ppmi-info.org/), and examined abnormal spontaneous brain activities in the PD-MCI. The pattern of spontaneous brain activity was measured by examining the amplitude of low-frequency fluctuations (ALFF) of blood oxygen level dependent signal. Voxel-wise one-way analysis of covariance and post hoc analyses of ALFF were performed under non-parametric permutation tests in a general linear model among the three groups, with age, gender and data center as additional covariates. Statistical significances in the post hoc analysis were corrected by a small volume correction with a cluster-level threshold of p < 0.05 (n = 10000 permutations, FWE-corrected). Correlations of clinical and neuropsychological assessments [i.e., Unified Parkinson's Disease Rating Scale (UPDRS) total score, Montreal Cognitive Assessment (MoCA) and cognitive domains] with the regional ALFF were performed in the PD-MCI group. Compared with the HC, both PD groups exhibited reduced ALFF in the occipital area (Calcarine_R/Cuneus_R). Specially, the PD-MCI group additionally exhibited increased ALFF in the opercular part of right inferior frontal gyrus (Frontal_Inf_Oper_R). Comparing with the PD-NC, the PD-MCI group exhibited significantly higher ALFF in the Frontal_Inf_Oper_R and left fusiform gyus (ps < 0.05). The correlation analysis revealed that the ALFF in the Frontal_Inf_Oper_R was positively correlated with the UPDRS total score (p < 0.05), but marginally negatively correlated with the MoCA score. For cognitive domains, the ALFF in the region also showed a significantly negative correlation with the score of SF test (p < 0.01) and a marginally negative correlation with the score of Symbol-Digit Modalities Test. Together, we concluded hyperactivity in the right inferior frontal gyrus in early PD with MCI, suggesting a compensatory recruitment in response to cognitive decline, which may shed light on thought of dementia progression and potentially comprehensive treatment in PD.",2018-08-14 +30124430,Building a functional connectome of the Drosophila central complex. ,"The central complex is a highly conserved insect brain region composed of morphologically stereotyped neurons that arborize in distinctively shaped substructures. The region is implicated in a wide range of behaviors and several modeling studies have explored its circuit computations. Most studies have relied on assumptions about connectivity between neurons based on their overlap in light microscopy images. Here, we present an extensive functional connectome of Drosophila melanogaster's central complex at cell-type resolution. Using simultaneous optogenetic stimulation, calcium imaging and pharmacology, we tested the connectivity between 70 presynaptic-to-postsynaptic cell-type pairs. We identified numerous inputs to the central complex, but only a small number of output channels. Additionally, the connectivity of this highly recurrent circuit appears to be sparser than anticipated from light microscopy images. Finally, the connectivity matrix highlights the potentially critical role of a class of bottleneck interneurons. All data are provided for interactive exploration on a website.",2018-08-20 +23758809,"MITE Digger, an efficient and accurate algorithm for genome wide discovery of miniature inverted repeat transposable elements.","

Background

Miniature inverted repeat transposable elements (MITEs) are abundant non-autonomous elements, playing important roles in shaping gene and genome evolution. Their characteristic structural features are suitable for automated identification by computational approaches, however, de novo MITE discovery at genomic levels is still resource expensive. Efficient and accurate computational tools are desirable. Existing algorithms process every member of a MITE family, therefore a major portion of the computing task is redundant.

Results

In this study, redundant computing steps were analyzed and a novel algorithm emphasizing on the reduction of such redundant computing was implemented in MITE Digger. It completed processing the whole rice genome sequence database in ~15 hours and produced 332 MITE candidates with low false positive (1.8%) and false negative (0.9%) rates. MITE Digger was also tested for genome wide MITE discovery with four other genomes.

Conclusions

MITE Digger is efficient and accurate for genome wide retrieval of MITEs. Its user friendly interface further facilitates genome wide analyses of MITEs on a routine basis. The MITE Digger program is available at: http://labs.csb.utoronto.ca/yang/MITEDigger.",2013-06-07 +24265775,Using a web-based application to define the accuracy of diagnostic tests when the gold standard is imperfect.,"

Background

Estimates of the sensitivity and specificity for new diagnostic tests based on evaluation against a known gold standard are imprecise when the accuracy of the gold standard is imperfect. Bayesian latent class models (LCMs) can be helpful under these circumstances, but the necessary analysis requires expertise in computational programming. Here, we describe open-access web-based applications that allow non-experts to apply Bayesian LCMs to their own data sets via a user-friendly interface.

Methods/principal findings

Applications for Bayesian LCMs were constructed on a web server using R and WinBUGS programs. The models provided (http://mice.tropmedres.ac) include two Bayesian LCMs: the two-tests in two-population model (Hui and Walter model) and the three-tests in one-population model (Walter and Irwig model). Both models are available with simplified and advanced interfaces. In the former, all settings for Bayesian statistics are fixed as defaults. Users input their data set into a table provided on the webpage. Disease prevalence and accuracy of diagnostic tests are then estimated using the Bayesian LCM, and provided on the web page within a few minutes. With the advanced interfaces, experienced researchers can modify all settings in the models as needed. These settings include correlation among diagnostic test results and prior distributions for all unknown parameters. The web pages provide worked examples with both models using the original data sets presented by Hui and Walter in 1980, and by Walter and Irwig in 1988. We also illustrate the utility of the advanced interface using the Walter and Irwig model on a data set from a recent melioidosis study. The results obtained from the web-based applications were comparable to those published previously.

Conclusions

The newly developed web-based applications are open-access and provide an important new resource for researchers worldwide to evaluate new diagnostic tests.",2013-11-12 +28821228,SG-ADVISER mtDNA: a web server for mitochondrial DNA annotation with data from 200 samples of a healthy aging cohort.,"

Background

Whole genome and exome sequencing usually include reads containing mitochondrial DNA (mtDNA). Yet, state-of-the-art pipelines and services for human nuclear genome variant calling and annotation do not handle mitochondrial genome data appropriately. As a consequence, any researcher desiring to add mtDNA variant analysis to their investigations is forced to explore the literature for mtDNA pipelines, evaluate them, and implement their own instance of the desired tool. This task is far from trivial, and can be prohibitive for non-bioinformaticians.

Results

We have developed SG-ADVISER mtDNA, a web server to facilitate the analysis and interpretation of mtDNA genomic data coming from next generation sequencing (NGS) experiments. The server was built in the context of our SG-ADVISER framework and on top of the MtoolBox platform (Calabrese et al., Bioinformatics 30(21):3115-3117, 2014), and includes most of its functionalities (i.e., assembly of mitochondrial genomes, heteroplasmic fractions, haplogroup assignment, functional and prioritization analysis of mitochondrial variants) as well as a back-end and a front-end interface. The server has been tested with unpublished data from 200 individuals of a healthy aging cohort (Erikson et al., Cell 165(4):1002-1011, 2016) and their data is made publicly available here along with a preliminary analysis of the variants. We observed that individuals over ~90 years old carried low levels of heteroplasmic variants in their genomes.

Conclusions

SG-ADVISER mtDNA is a fast and functional tool that allows for variant calling and annotation of human mtDNA data coming from NGS experiments. The server was built with simplicity in mind, and builds on our own experience in interpreting mtDNA variants in the context of sudden death and rare diseases. Our objective is to provide an interface for non-bioinformaticians aiming to acquire (or contrast) mtDNA annotations via MToolBox. SG-ADVISER web server is freely available to all users at https://genomics.scripps.edu/mtdna .",2017-08-18 +24885079,fPoxDB: fungal peroxidase database for comparative genomics.,"

Background

Peroxidases are a group of oxidoreductases which mediate electron transfer from hydrogen peroxide (H2O2) and organic peroxide to various electron acceptors. They possess a broad spectrum of impact on industry and fungal biology. There are numerous industrial applications using peroxidases, such as to catalyse highly reactive pollutants and to breakdown lignin for recycling of carbon sources. Moreover, genes encoding peroxidases play important roles in fungal pathogenicity in both humans and plants. For better understanding of fungal peroxidases at the genome-level, a novel genomics platform is required. To this end, Fungal Peroxidase Database (fPoxDB; http://peroxidase.riceblast.snu.ac.kr/) has been developed to provide such a genomics platform for this important gene family.

Description

In order to identify and classify fungal peroxidases, 24 sequence profiles were built and applied on 331 genomes including 216 from fungi and Oomycetes. In addition, NoxR, which is known to regulate NADPH oxidases (NoxA and NoxB) in fungi, was also added to the pipeline. Collectively, 6,113 genes were predicted to encode 25 gene families, presenting well-separated distribution along the taxonomy. For instance, the genes encoding lignin peroxidase, manganese peroxidase, and versatile peroxidase were concentrated in the rot-causing basidiomycetes, reflecting their ligninolytic capability. As a genomics platform, fPoxDB provides diverse analysis resources, such as gene family predictions based on fungal sequence profiles, pre-computed results of eight bioinformatics programs, similarity search tools, a multiple sequence alignment tool, domain analysis functions, and taxonomic distribution summary, some of which are not available in the previously developed peroxidase resource. In addition, fPoxDB is interconnected with other family web systems, providing extended analysis opportunities.

Conclusions

fPoxDB is a fungi-oriented genomics platform for peroxidases. The sequence-based prediction and diverse analysis toolkits with easy-to-follow web interface offer a useful workbench to study comparative and evolutionary genomics of peroxidases in fungi.",2014-05-08 +30733194,Loss of FOXP3 and TSC1 Accelerates Prostate Cancer Progression through Synergistic Transcriptional and Posttranslational Regulation of c-MYC.,"Although c-MYC and mTOR are frequently activated proteins in prostate cancer, any interaction between the two is largely untested. Here, we characterize the functional cross-talk between FOXP3-c-MYC and TSC1-mTOR signaling during tumor progression. Deletion of Tsc1 in mouse embryonic fibroblasts (MEF) decreased phosphorylation of c-MYC at threonine 58 (pT58) and increased phosphorylation at serine 62 (pS62), an observation validated in prostate cancer cells. Conversely, inhibition of mTOR increased pT58 but decreased pS62. Loss of both FOXP3 and TSC1 in prostate cancer cells synergistically enhanced c-MYC expression via regulation of c-Myc transcription and protein phosphorylation. This crosstalk between FOXP3 and TSC1 appeared to be mediated by both the mTOR-4EBP1-c-MYC and FOXP3-c-MYC pathways. In mice, Tsc1 and Foxp3 double deletions in the prostate led to prostate carcinomas at an early age; this did not occur in these mice with an added c-Myc deletion. In addition, we observed synergistic antitumor effects of cotreating mice with inhibitors of mTOR and c-MYC in prostate cancer cells and in Foxp3 and Tsc1 double-mutant mice. In human prostate cancer, loss of nuclear FOXP3 is often accompanied by low expression of TSC1. Because loss of FOXP3 transcriptionally induces c-Myc expression and loss of TSC1 activates mTOR signaling, these data suggest cross-talk between FOXP3-c-MYC and TSC1-mTOR signaling that converges on c-MYC to regulate tumor progression. Coadministration of c-MYC and mTOR inhibitors may overcome the resistance to mTOR inhibition commonly observed in prostate cancer cells. SIGNIFICANCE: These results establish the principle of a synergistic action of TSC1 and FOXP3 during prostate cancer progression and provide new therapeutic targets for patients who have prostate cancer with two signaling defects.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/79/7/1413/F1.large.jpg.",2019-02-07 +29644619,"Early (≤ 30 Days), Late (31-360 Days) and Very Late (> 360 Days) Stent Thrombosis in Patients with Insulin-Treated versus Non-Insulin-Treated Type 2 Diabetes Mellitus: A Meta-Analysis.","

Introduction

At this time in 2018, with type 2 diabetes mellitus (T2DM) and coronary artery disease (CAD) still on the rise, the post-percutaneous coronary interventional (PCI) outcomes observed in patients with diabetes mellitus who are on insulin therapy (ITDM) and those who do not require insulin (NITDM) are still controversial and require further investigation. Considering this idea to be of particular interest to the readers, as well as being an important issue in interventional cardiology, we aimed to systematically assess early (≤ 30 days), late (31-360 days) and very late (> 360 days) stent thrombosis in patients with ITDM and NITDM following drug-eluting stent (DES) implantation.

Methods

Well-known online databases (the Cochrane, EMBASE and MEDLINE databases and http://www.ClinicalTrials.gov ) were searched for relevant English publications based on ITDM and NITDM and stent thrombosis following PCI using specific terms. Early stent thrombosis, late stent thrombosis and very late stent thrombosis were the clinical outcomes. The main analysis was carried out using the latest version of the RevMan software (version 5.3) whereby odds ratios (OR), and 95% confidence intervals (CI) were generated.

Results

A total of 8524 participants with T2DM (2273 participants were on insulin therapy and 6251 participants were not) were included. Results of this analysis showed early stent thrombosis to be significantly higher in patients with ITDM (OR 1.81, 95% CI 1.04-3.15; P = 0.04), whereas late and very late stent thromboses were not significantly different following PCI with DES in diabetic patients with versus without insulin therapy (OR 1.44, 95% CI 0.73-2.84, P = 0.30 and OR 0.80, 95% CI 0.33-1.92, P = 0.62, respectively). Late stent thromboses associated exclusively with everolimus-eluting stents (EES) and paclitaxel-eluting stents (PES) were not significantly different in patients with ITDM and NITDM.

Conclusion

Following PCI with DES, early stent thrombosis was significantly higher in patients with ITDM. However, late and very late stent thromboses were not significantly different in patients with type 2 diabetes mellitus who were treated with or without insulin. Comparison with individual DES was not sufficiently powerful to reach a conclusion.",2018-04-11 +29696033,REDIdb 3.0: A Comprehensive Collection of RNA Editing Events in Plant Organellar Genomes.,"RNA editing is an important epigenetic mechanism by which genome-encoded transcripts are modified by substitutions, insertions and/or deletions. It was first discovered in kinetoplastid protozoa followed by its reporting in a wide range of organisms. In plants, RNA editing occurs mostly by cytidine (C) to uridine (U) conversion in translated regions of organelle mRNAs and tends to modify affected codons restoring evolutionary conserved aminoacid residues. RNA editing has also been described in non-protein coding regions such as group II introns and structural RNAs. Despite its impact on organellar transcriptome and proteome complexity, current primary databases still do not provide a specific field for RNA editing events. To overcome these limitations, we developed REDIdb a specialized database for RNA editing modifications in plant organelles. Hereafter we describe its third release containing more than 26,000 events in a completely novel web interface to accommodate RNA editing in its genomics, biological and evolutionary context through whole genome maps and multiple sequence alignments. REDIdb is freely available at http://srv00.recas.ba.infn.it/redidb/index.html.",2018-04-11 +29644931,An Evaluation of a Diabetes Self-Management Education (DSME) Intervention Delivered Using Avatar-Based Technology: Certified Diabetes Educators' Ratings and Perceptions.,"Purpose The purpose of this study was to evaluate the perceptions that certified diabetes educators (CDEs), of diverse health professions, have of a culturally appropriate e-health intervention that used avatar-based technology. Methods Cross-sectional, survey-based design using quantitative and qualitative paradigms. A logic model framed the study, which centered on the broad and general concepts leading to study outcomes. In total, 198 CDEs participated in the evaluation. Participants were mostly female and represented an age range of 26 to 76 years. The profession representative of the sample was registered nurses. Study setting and data collection occurred at https://www.surveymonkey.com/r/AvatarVideoSurvey-for-Certified_Diabetes_Educators . Study instruments used were the Basic Demographics Survey (BD-13), Educational Material Use and Rating of Quality Scale (EMU-ROQ-9), Marlowe-Crowne Social Desirability Survey (MS-SOC-DES-CDE-13), Quality of Avatar Video Rating Scale (QAVRS-7), Recommend Avatar to Patients Scale (RAVTPS-3), Recommend Avatar Video to Health Professionals Scale (RAVTHP-3), and Avatar Video Applications Scale (AVAPP-1). Statistical analysis used included t tests, Pearson product moment correlations, backward stepwise regression, and content/thematic analysis. Results Age, ethnicity, Arab/Middle Eastern, Asian, and white/European descents were significant predictors of a high-quality rating of the video. Thematic and content analysis of the data revealed an overall positive perception of the video. Conclusions An e-health intervention grounded in evidence-based health behavior theories has potential to increase access to diabetes self-management education as evidenced in the ratings and perceptions of the video by CDEs.",2018-04-12 +30833352,A Bifunctional UDP-Sugar 4-Epimerase Supports Biosynthesis of Multiple Cell Surface Polysaccharides in Sinorhizobium meliloti. ,"Sinorhizobium meliloti produces multiple extracellular glycans, including among others, lipopolysaccharides (LPS), and the exopolysaccharides (EPS) succinoglycan (SG) and galactoglucan (GG). These polysaccharides serve cell protective roles. Furthermore, SG and GG promote the interaction of S. meliloti with its host Medicago sativa in root nodule symbiosis. ExoB has been suggested to be the sole enzyme catalyzing synthesis of UDP-galactose in S. meliloti (A. M. Buendia, B. Enenkel, R. Köplin, K. Niehaus, et al. Mol Microbiol 5:1519-1530, 1991, https://doi.org/10.1111/j.1365-2958.1991.tb00799.x). Accordingly, exoB mutants were previously found to be affected in the synthesis of the galactose-containing glycans LPS, SG, and GG and consequently, in symbiosis. Here, we report that the S. meliloti Rm2011 uxs1-uxe-apsS-apsH1-apsE-apsH2 (SMb20458-63) gene cluster directs biosynthesis of an arabinose-containing polysaccharide (APS), which contributes to biofilm formation, and is solely or mainly composed of arabinose. Uxe has previously been identified as UDP-xylose 4-epimerase. Collectively, our data from mutational and overexpression analyses of the APS biosynthesis genes and in vitro enzymatic assays indicate that Uxe functions as UDP-xylose 4- and UDP-glucose 4-epimerase catalyzing UDP-xylose/UDP-arabinose and UDP-glucose/UDP-galactose interconversions, respectively. Overexpression of uxe suppressed the phenotypes of an exoB mutant, evidencing that Uxe can functionally replace ExoB. We suggest that under conditions stimulating expression of the APS biosynthesis operon, Uxe contributes to the synthesis of multiple glycans and thereby to cell protection, biofilm formation, and symbiosis. Furthermore, we show that the C2H2 zinc finger transcriptional regulator MucR counteracts the previously reported CuxR-c-di-GMP-mediated activation of the APS biosynthesis operon. This integrates the c-di-GMP-dependent control of APS production into the opposing regulation of EPS biosynthesis and swimming motility in S. melilotiIMPORTANCE Bacterial extracellular polysaccharides serve important cell protective, structural, and signaling roles. They have particularly attracted attention as adhesives and matrix components promoting biofilm formation, which significantly contributes to resistance against antibiotics. In the root nodule symbiosis between rhizobia and leguminous plants, extracellular polysaccharides have a signaling function. UDP-sugar 4-epimerases are important enzymes in the synthesis of the activated sugar substrates, which are frequently shared between multiple polysaccharide biosynthesis pathways. Thus, these enzymes are potential targets to interfere with these pathways. Our finding of a bifunctional UDP-sugar 4-epimerase in Sinorhizobium meliloti generally advances the knowledge of substrate promiscuity of such enzymes and specifically of the biosynthesis of extracellular polysaccharides involved in biofilm formation and symbiosis in this alphaproteobacterium.",2019-04-24 +22481888,"Xylella fastidiosa comparative genomic database is an information resource to explore the annotation, genomic features, and biology of different strains.","The Xylella fastidiosa comparative genomic database is a scientific resource with the aim to provide a user-friendly interface for accessing high-quality manually curated genomic annotation and comparative sequence analysis, as well as for identifying and mapping prophage-like elements, a marked feature of Xylella genomes. Here we describe a database and tools for exploring the biology of this important plant pathogen. The hallmarks of this database are the high quality genomic annotation, the functional and comparative genomic analysis and the identification and mapping of prophage-like elements. It is available from web site http://www.xylella.lncc.br.",2012-01-01 +28552033,MicroTarget: MicroRNA target gene prediction approach with application to breast cancer.,"MicroRNAs are known to play an essential role in gene regulation in plants and animals. The standard method for understanding microRNA-gene interactions is randomized controlled perturbation experiments. These experiments are costly and time consuming. Therefore, use of computational methods is essential. Currently, several computational methods have been developed to discover microRNA target genes. However, these methods have limitations based on the features that are used for prediction. The commonly used features are complementarity to the seed region of the microRNA, site accessibility, and evolutionary conservation. Unfortunately, not all microRNA target sites are conserved or adhere to exact seed complementary, and relying on site accessibility does not guarantee that the interaction exists. Moreover, the study of regulatory interactions composed of the same tissue expression data for microRNAs and mRNAs is necessary to understand the specificity of regulation and function. We developed MicroTarget to predict a microRNA-gene regulatory network using heterogeneous data sources, especially gene and microRNA expression data. First, MicroTarget employs expression data to learn a candidate target set for each microRNA. Then, it uses sequence data to provide evidence of direct interactions. MicroTarget scores and ranks the predicted targets based on a set of features. The predicted targets overlap with many of the experimentally validated ones. Our results indicate that using expression data in target prediction is more accurate in terms of specificity and sensitivity. Available at: https://bioinformatics.cs.vt.edu/~htorkey/microTarget .",2017-05-02 +21216779,A Java API for working with PubChem datasets.,"

Unlabelled

PubChem is a public repository of chemical structures and associated biological activities. The PubChem BioAssay database contains assay descriptions, conditions and readouts and biological screening results that have been submitted by the biomedical research community. The PubChem web site and Power User Gateway (PUG) web service allow users to interact with the data and raw files are available via FTP. These resources are helpful to many but there can also be great benefit by using a software API to manipulate the data. Here, we describe a Java API with entity objects mapped to the PubChem Schema and with wrapper functions for calling the NCBI eUtilities and PubChem PUG web services. PubChem BioAssays and associated chemical compounds can then be queried and manipulated in a local relational database. Features include chemical structure searching and generation and display of curve fits from stored dose-response experiments, something that is not yet available within PubChem itself. The aim is to provide researchers with a fast, consistent, queryable local resource from which to manipulate PubChem BioAssays in a database agnostic manner. It is not intended as an end user tool but to provide a platform for further automation and tools development.

Availability

http://code.google.com/p/pubchemdb.",2011-01-06 +26635391,DriverDBv2: a database for human cancer driver gene research.,"We previously presented DriverDB, a database that incorporates ∼ 6000 cases of exome-seq data, in addition to annotation databases and published bioinformatics algorithms dedicated to driver gene/mutation identification. The database provides two points of view, 'Cancer' and 'Gene', to help researchers visualize the relationships between cancers and driver genes/mutations. In the updated DriverDBv2 database (http://ngs.ym.edu.tw/driverdb) presented herein, we incorporated >9500 cancer-related RNA-seq datasets and >7000 more exome-seq datasets from The Cancer Genome Atlas (TCGA), International Cancer Genome Consortium (ICGC), and published papers. Seven additional computational algorithms (meaning that the updated database contains 15 in total), which were developed for driver gene identification, are incorporated into our analysis pipeline, and the results are provided in the 'Cancer' section. Furthermore, there are two main new features, 'Expression' and 'Hotspot', in the 'Gene' section. 'Expression' displays two expression profiles of a gene in terms of sample types and mutation types, respectively. 'Hotspot' indicates the hotspot mutation regions of a gene according to the results provided by four bioinformatics tools. A new function, 'Gene Set', allows users to investigate the relationships among mutations, expression levels and clinical data for a set of genes, a specific dataset and clinical features.",2015-12-03 +27507885,ADPriboDB: The database of ADP-ribosylated proteins.,"ADP-ribosylation refers to the addition of one or more ADP-ribose units onto proteins post-translationally. This protein modification is often added by ADP-ribosyltransferases, commonly known as PARPs, but it can also be added by other enzymes, including sirtuins or bacterial toxins. While past literature has utilized a variety of methods to identify ADP-ribosylated proteins, recent proteomics studies bring the power of mass spectrometry to determine sites of the modification. To appreciate the diverse roles of ADP-ribosylation across the proteome, we have created ADPriboDB - a database of ADP-ribosylated proteins (http://ADPriboDB.leunglab.org). Each entry of ADPriboDB is annotated manually by at least two independent curators from the literature between January 1975 and July 2015. The current database includes over 12 400 protein entries from 459 publications, identifying 2389 unique proteins. Here, we describe the structure and the current state of ADPriboDB as well as the criteria for entry inclusion. Using this aggregate data, we identified a statistically significant enrichment of ADP-ribosylated proteins in non-membranous RNA granules. To our knowledge, ADPriboDB is the first publicly available database encapsulating ADP-ribosylated proteins identified from the past 40 years, with a hope to facilitate the research of both basic scientists and clinicians to better understand ADP-ribosylation at the molecular level.",2016-08-09 +29633684,"Portuguese Norms of Name Agreement, Concept Familiarity, Subjective Frequency and Visual Complexity for 150 Colored and Tridimensional Pictures.","Pictures are complex stimuli that require a careful control of several characteristics and attributes standardized for different languages. In this work we present for the first time European Portuguese (EP) norms for name agreement, concept familiarity, subjective frequency and visual complexity for a new set of 150 colored pictures. These pictures were selected to represent exemplars of the most used semantic categories in research and to depict objects which, though familiar to the participants, were rarely used in daily life, which makes them particularly prone to speech failures such as tip-of-the-tongue (TOT) states. Norms were collected from 640 EP native speakers that rated each picture in the four variables through a web-survey procedure. Results showed, as expected, that a large number of pictures in the dataset elicited a TOT response, and additionally that the ratings obtained in each of the dimensions are in line with those observed in other pictorial datasets. Norms can be freely downloaded at https://www.psi.uminho.pt/en/Research/Psycholinguistics/Pages/Databases.aspx.",2018-04-10 +24115039,Expanded classification of hepatitis C virus into 7 genotypes and 67 subtypes: updated criteria and genotype assignment web resource.,"

Unlabelled

The 2005 consensus proposal for the classification of hepatitis C virus (HCV) presented an agreed and uniform nomenclature for HCV variants and the criteria for their assignment into genotypes and subtypes. Since its publication, the available dataset of HCV sequences has vastly expanded through advancement in nucleotide sequencing technologies and an increasing focus on the role of HCV genetic variation in disease and treatment outcomes. The current study represents a major update to the previous consensus HCV classification, incorporating additional sequence information derived from over 1,300 (near-)complete genome sequences of HCV available on public databases in May 2013. Analysis resolved several nomenclature conflicts between genotype designations and using consensus criteria created a classification of HCV into seven confirmed genotypes and 67 subtypes. There are 21 additional complete coding region sequences of unassigned subtype. The study additionally describes the development of a Web resource hosted by the International Committee for Taxonomy of Viruses (ICTV) that maintains and regularly updates tables of reference isolates, accession numbers, and annotated alignments (http://talk.ictvonline.org/links/hcv/hcv-classification.htm). The Flaviviridae Study Group urges those who need to check or propose new genotypes or subtypes of HCV to contact the Study Group in advance of publication to avoid nomenclature conflicts appearing in the literature. While the criteria for assigning genotypes and subtypes remain unchanged from previous consensus proposals, changes are proposed in the assignment of provisional subtypes, subtype numbering beyond ""w,"" and the nomenclature of intergenotypic recombinant.

Conclusion

This study represents an important reference point for the consensus classification of HCV variants that will be of value to researchers working in clinical and basic science fields.",2014-01-01 +30591010,DLAD4U: deriving and prioritizing disease lists from PubMed literature.,"

Background

Due to recent technology advancements, disease related knowledge is growing rapidly. It becomes nontrivial to go through all published literature to identify associations between human diseases and genetic, environmental, and life style factors, disease symptoms, and treatment strategies. Here we report DLAD4U (Disease List Automatically Derived For You), an efficient, accurate and easy-to-use disease search engine based on PubMed literature.

Results

DLAD4U uses the eSearch and eFetch APIs from the National Center for Biotechnology Information (NCBI) to find publications related to a query and to identify diseases from the retrieved publications. The hypergeometric test was used to prioritize identified diseases for displaying to users. DLAD4U accepts any valid queries for PubMed, and the output results include a ranked disease list, information associated with each disease, chronologically-ordered supporting publications, a summary of the run, and links for file export. DLAD4U outperformed other disease search engines in our comparative evaluation using selected genes and drugs as query terms and manually curated data as ""gold standard"". For 100 genes that are associated with only one disease in the gold standard, the Mean Average Precision (MAP) measure from DLAD4U was 0.77, which clearly outperformed other tools. For 10 genes that are associated with multiple diseases in the gold standard, the mean precision, recall and F-measure scores from DLAD4U were always higher than those from other tools. The superior performance of DLAD4U was further confirmed using 100 drugs as queries, with an MAP of 0.90.

Conclusions

DLAD4U is a new, intuitive disease search engine that takes advantage of existing resources at NCBI to provide computational efficiency and uses statistical analyses to ensure accuracy. DLAD4U is publicly available at http://dlad4u.zhang-lab.org .",2018-12-28 +26538599,Tools and data services registry: a community effort to document bioinformatics resources.,"Life sciences are yielding huge data sets that underpin scientific discoveries fundamental to improvement in human health, agriculture and the environment. In support of these discoveries, a plethora of databases and tools are deployed, in technically complex and diverse implementations, across a spectrum of scientific disciplines. The corpus of documentation of these resources is fragmented across the Web, with much redundancy, and has lacked a common standard of information. The outcome is that scientists must often struggle to find, understand, compare and use the best resources for the task at hand.Here we present a community-driven curation effort, supported by ELIXIR-the European infrastructure for biological information-that aspires to a comprehensive and consistent registry of information about bioinformatics resources. The sustainable upkeep of this Tools and Data Services Registry is assured by a curation effort driven by and tailored to local needs, and shared amongst a network of engaged partners.As of November 2015, the registry includes 1785 resources, with depositions from 126 individual registrations including 52 institutional providers and 74 individuals. With community support, the registry can become a standard for dissemination of information about bioinformatics resources: we welcome everyone to join us in this common endeavour. The registry is freely available at https://bio.tools.",2015-11-03 +24165881,SIMAP--the database of all-against-all protein sequence similarities and annotations with new interfaces and increased coverage.,"The Similarity Matrix of Proteins (SIMAP, http://mips.gsf.de/simap/) database has been designed to massively accelerate computationally expensive protein sequence analysis tasks in bioinformatics. It provides pre-calculated sequence similarities interconnecting the entire known protein sequence universe, complemented by pre-calculated protein features and domains, similarity clusters and functional annotations. SIMAP covers all major public protein databases as well as many consistently re-annotated metagenomes from different repositories. As of September 2013, SIMAP contains >163 million proteins corresponding to ∼70 million non-redundant sequences. SIMAP uses the sensitive FASTA search heuristics, the Smith-Waterman alignment algorithm, the InterPro database of protein domain models and the BLAST2GO functional annotation algorithm. SIMAP assists biologists by facilitating the interactive exploration of the protein sequence universe. Web-Service and DAS interfaces allow connecting SIMAP with any other bioinformatic tool and resource. All-against-all protein sequence similarity matrices of project-specific protein collections are generated on request. Recent improvements allow SIMAP to cover the rapidly growing sequenced protein sequence universe. New Web-Service interfaces enhance the connectivity of SIMAP. Novel tools for interactive extraction of protein similarity networks have been added. Open access to SIMAP is provided through the web portal; the portal also contains instructions and links for software access and flat file downloads.",2013-10-27 +26474971,"A genome-wide approach to link genotype to clinical outcome by utilizing next generation sequencing and gene chip data of 6,697 breast cancer patients.","

Background

The use of somatic mutations for predicting clinical outcome is difficult because a mutation can indirectly influence the function of many genes, and also because clinical follow-up is sparse in the relatively young next generation sequencing (NGS) databanks. Here we approach this problem by linking sequence databanks to well annotated gene-chip datasets, using a multigene transcriptomic fingerprint as a link between gene mutations and gene expression in breast cancer patients.

Methods

The database consists of 763 NGS samples containing mutational status for 22,938 genes and RNA-seq data for 10,987 genes. The gene chip database contains 5,934 patients with 10,987 genes plus clinical characteristics. For the prediction, mutations present in a sample are first translated into a 'transcriptomic fingerprint' by running ROC analysis on mutation and RNA-seq data. Then correlation to survival is assessed by computing Cox regression for both up- and downregulated signatures.

Results

According to this approach, the top driver oncogenes having a mutation prevalence over 5 % included AKT1, TRANK1, TRAPPC10, RPGR, COL6A2, RAPGEF4, ATG2B, CNTRL, NAA38, OSBPL10, POTEF, SCLT1, SUN1, VWDE, MTUS2, and PIK3CA, and the top tumor suppressor genes included PHEX, TP53, GGA3, RGS22, PXDNL, ARFGEF1, BRCA2, CHD8, GCC2, and ARMC4. The system was validated by computing correlation between RNA-seq and microarray data (r(2) = 0.73, P < 1E-16). Cross-validation using 20 genes with a prevalence of approximately 5 % confirmed analysis reproducibility.

Conclusions

We established a pipeline enabling rapid clinical validation of a discovered mutation in a large breast cancer cohort. An online interface is available for evaluating any human gene mutation or combinations of maximum three such genes ( http://www.g-2-o.com ).",2015-10-16 +28710774,UCSF ChimeraX: Meeting modern challenges in visualization and analysis.,"UCSF ChimeraX is next-generation software for the visualization and analysis of molecular structures, density maps, 3D microscopy, and associated data. It addresses challenges in the size, scope, and disparate types of data attendant with cutting-edge experimental methods, while providing advanced options for high-quality rendering (interactive ambient occlusion, reliable molecular surface calculations, etc.) and professional approaches to software design and distribution. This article highlights some specific advances in the areas of visualization and usability, performance, and extensibility. ChimeraX is free for noncommercial use and is available from http://www.rbvi.ucsf.edu/chimerax/ for Windows, Mac, and Linux.",2017-09-06 +26415726,Computational probing protein-protein interactions targeting small molecules.,"

Motivation

With the booming of interactome studies, a lot of interactions can be measured in a high throughput way and large scale datasets are available. It is becoming apparent that many different types of interactions can be potential drug targets. Compared with inhibition of a single protein, inhibition of protein-protein interaction (PPI) is promising to improve the specificity with fewer adverse side-effects. Also it greatly broadens the drug target search space, which makes the drug target discovery difficult. Computational methods are highly desired to efficiently provide candidates for further experiments and hold the promise to greatly accelerate the discovery of novel drug targets.

Results

Here, we propose a machine learning method to predict PPI targets in a genomic-wide scale. Specifically, we develop a computational method, named as PrePPItar, to Predict PPIs as drug targets by uncovering the potential associations between drugs and PPIs. First, we survey the databases and manually construct a gold-standard positive dataset for drug and PPI interactions. This effort leads to a dataset with 227 associations among 63 PPIs and 113 FDA-approved drugs and allows us to build models to learn the association rules from the data. Second, we characterize drugs by profiling in chemical structure, drug ATC-code annotation, and side-effect space and represent PPI similarity by a symmetrical S-kernel based on protein amino acid sequence. Then the drugs and PPIs are correlated by Kronecker product kernel. Finally, a support vector machine (SVM), is trained to predict novel associations between drugs and PPIs. We validate our PrePPItar method on the well-established gold-standard dataset by cross-validation. We find that all chemical structure, drug ATC-code, and side-effect information are predictive for PPI target. Moreover, we can increase the PPI target prediction coverage by integrating multiple data sources. Follow-up database search and pathway analysis indicate that our new predictions are worthy of future experimental validation.

Conclusion

In conclusion, PrePPItar can serve as a useful tool for PPI target discovery and provides a general heterogeneous data integrative framework.

Availability and implementation

PrePPItar is available at http://doc.aporc.org/wiki/PrePPItar.

Contact

ycwang@nwipb.cas.cn or ywang@amss.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-28 +29188446,iCHRCloud: Web & Mobile based Child Health Imprints for Smart Healthcare.,"Reducing child mortality with quality care is the prime-most concern of all nations. Thus in current IT era, our healthcare industry needs to focus on adapting information technology in healthcare services. Barring few preliminary attempts to digitalize basic hospital administrative and clinical functions, even today in India, child health and vaccination records are still maintained as paper-based records. Also, error in manually plotting the parameters in growth charts results in missed opportunities for early detection of growth disorders in children. To address these concerns, we present India's first hospital linked, affordable automated vaccination and real-time child's growth monitoring cloud based application- Integrated Child Health Record cloud (iCHRcloud). This application is based on HL7 protocol enabling integration with hospital's HIS/EMR system. It provides Java (Enterprise Service Bus and Hibernate) based web portal for doctors and mobile application for parents, enhancing doctor-parent engagement. It leverages highchart to automate chart preparation and provides access of data via Push Notification (GCM and APNS) to parents on iOS and Android mobile platforms. iCHRcloud has also been recognized as one of the best innovative solution in three nationwide challenges, 2016 in India. iCHRcloud offers a seamless, secure (256 bit HTTPS) and sustainable solution to reduce child mortality. Detail analysis on preliminary data of 16,490 child health records highlight the diversified need of various demographic regions. Thus, primary lesson would be to implement better validation strategies to fulfill the customize requisites of entire population. This paper presents first glimpse of data and power of the analytics in policy framework.",2017-11-29 +26353137,Multi-Orientation Scene Text Detection with Adaptive Clustering.,"Text detection in natural scene images is an important prerequisite for many content-based image analysis tasks, while most current research efforts only focus on horizontal or near horizontal scene text. In this paper, first we present a unified distance metric learning framework for adaptive hierarchical clustering, which can simultaneously learn similarity weights (to adaptively combine different feature similarities) and the clustering threshold (to automatically determine the number of clusters). Then, we propose an effective multi-orientation scene text detection system, which constructs text candidates by grouping characters based on this adaptive clustering. Our text candidates construction method consists of several sequential coarse-to-fine grouping steps: morphology-based grouping via single-link clustering, orientation-based grouping via divisive hierarchical clustering, and projection-based grouping also via divisive clustering. The effectiveness of our proposed system is evaluated on several public scene text databases, e.g., ICDAR Robust Reading Competition data sets (2011 and 2013), MSRA-TD500 and NEOCR. Specifically, on the multi-orientation text data set MSRA-TD500, the f measure of our system is 71 percent, much better than the state-of-the-art performance. We also construct and release a practical challenging multi-orientation scene text data set (USTB-SV1K), which is available at http://prir.ustb.edu.cn/TexStar/MOMV-text-detection/.",2015-09-01 +28506277,Quantifying the mapping precision of genome-wide association studies using whole-genome sequencing data.,"

Background

Understanding the mapping precision of genome-wide association studies (GWAS), that is the physical distances between the top associated single-nucleotide polymorphisms (SNPs) and the causal variants, is essential to design fine-mapping experiments for complex traits and diseases.

Results

Using simulations based on whole-genome sequencing (WGS) data from 3642 unrelated individuals of European descent, we show that the association signals at rare causal variants (minor allele frequency ≤ 0.01) are very unlikely to be mapped to common variants in GWAS using either WGS data or imputed data and vice versa. We predict that at least 80% of the common variants identified from published GWAS using imputed data are within 33.5 Kbp of the causal variants, a resolution that is comparable with that using WGS data. Mapping precision at these loci will improve with increasing sample sizes of GWAS in the future. For rare variants, the mapping precision of GWAS using WGS data is extremely high, suggesting WGS is an efficient strategy to detect and fine-map rare variants simultaneously. We further assess the mapping precision by linkage disequilibrium between GWAS hits and causal variants and develop an online tool (gwasMP) to query our results with different thresholds of physical distance and/or linkage disequilibrium ( http://cnsgenomics.com/shiny/gwasMP ).

Conclusions

Our findings provide a benchmark to inform future design and development of fine-mapping experiments and technologies to pinpoint the causal variants at GWAS loci.",2017-05-16 +25809845,Development of genome-wide insertion/deletion markers in rice based on graphic pipeline platform.,"DNA markers play important roles in plant breeding and genetics. The Insertion/Deletion (InDel) marker is one kind of co-dominant DNA markers widely used due to its low cost and high precision. However, the canonical way of searching for InDel markers is time-consuming and labor-intensive. We developed an end-to-end computational solution (InDel Markers Development Platform, IMDP) to identify genome-wide InDel markers under a graphic pipeline environment. IMDP constitutes assembled genome sequences alignment pipeline (AGA-pipe) and next-generation re-sequencing data mapping pipeline (NGS-pipe). With AGA-pipe we are able to identify 12,944 markers between the genome of rice cultivars Nipponbare and 93-11. Using NGS-pipe, we reported 34,794 InDels from re-sequencing data of rice cultivars Wu-Yun-Geng7 and Guang-Lu-Ai4. Combining AGA-pipe and NGS-pipe, we developed 205,659 InDels in eight japonica and nine indica cultivars and 2,681 InDels showed a subgroup-specific pattern. Polymerase chain reaction (PCR) analysis of subgroup-specific markers indicated that the precision reached 90% (86 of 95). Finally, to make them available to the public, we have integrated the InDels/markers information into a website (Rice InDel Marker Database, RIMD, http://202.120.45.71/). The application of IMDP in rice will facilitate efficiency for development of genome-wide InDel markers, in addition it can be used in other species with reference genome sequences and NGS data.",2015-09-01 +26513174,"MDP, a database linking drug response data to genomic information, identifies dasatinib and statins as a combinatorial strategy to inhibit YAP/TAZ in cancer cells.","Targeted anticancer therapies represent the most effective pharmacological strategies in terms of clinical responses. In this context, genetic alteration of several oncogenes represents an optimal predictor of response to targeted therapy. Integration of large-scale molecular and pharmacological data from cancer cell lines promises to be effective in the discovery of new genetic markers of drug sensitivity and of clinically relevant anticancer compounds. To define novel pharmacogenomic dependencies in cancer, we created the Mutations and Drugs Portal (MDP, http://mdp.unimore.it), a web accessible database that combines the cell-based NCI60 screening of more than 50,000 compounds with genomic data extracted from the Cancer Cell Line Encyclopedia and the NCI60 DTP projects. MDP can be queried for drugs active in cancer cell lines carrying mutations in specific cancer genes or for genetic markers associated to sensitivity or resistance to a given compound. As proof of performance, we interrogated MDP to identify both known and novel pharmacogenomics associations and unveiled an unpredicted combination of two FDA-approved compounds, namely statins and Dasatinib, as an effective strategy to potently inhibit YAP/TAZ in cancer cells.",2015-11-01 +26564970,pubmed.mineR: an R package with text-mining algorithms to analyse PubMed abstracts.,"The PubMed literature database is a valuable source of information for scientific research. It is rich in biomedical literature with more than 24 million citations. Data-mining of voluminous literature is a challenging task. Although several text-mining algorithms have been developed in recent years with focus on data visualization, they have limitations such as speed, are rigid and are not available in the open source. We have developed an R package, pubmed.mineR, wherein we have combined the advantages of existing algorithms, overcome their limitations, and offer user flexibility and link with other packages in Bioconductor and the Comprehensive R Network (CRAN) in order to expand the user capabilities for executing multifaceted approaches. Three case studies are presented, namely, 'Evolving role of diabetes educators', 'Cancer risk assessment' and 'Dynamic concepts on disease and comorbidity' to illustrate the use of pubmed.mineR. The package generally runs fast with small elapsed times in regular workstations even on large corpus sizes and with compute intensive functions. The pubmed.mineR is available at http://cran.rproject. org/web/packages/pubmed.mineR.",2015-10-01 +26476450,PlantPAN 2.0: an update of plant promoter analysis navigator for reconstructing transcriptional regulatory networks in plants.,"Transcription factors (TFs) are sequence-specific DNA-binding proteins acting as critical regulators of gene expression. The Plant Promoter Analysis Navigator (PlantPAN; http://PlantPAN2.itps.ncku.edu.tw) provides an informative resource for detecting transcription factor binding sites (TFBSs), corresponding TFs, and other important regulatory elements (CpG islands and tandem repeats) in a promoter or a set of plant promoters. Additionally, TFBSs, CpG islands, and tandem repeats in the conserve regions between similar gene promoters are also identified. The current PlantPAN release (version 2.0) contains 16 960 TFs and 1143 TF binding site matrices among 76 plant species. In addition to updating of the annotation information, adding experimentally verified TF matrices, and making improvements in the visualization of transcriptional regulatory networks, several new features and functions are incorporated. These features include: (i) comprehensive curation of TF information (response conditions, target genes, and sequence logos of binding motifs, etc.), (ii) co-expression profiles of TFs and their target genes under various conditions, (iii) protein-protein interactions among TFs and their co-factors, (iv) TF-target networks, and (v) downstream promoter elements. Furthermore, a dynamic transcriptional regulatory network under various conditions is provided in PlantPAN 2.0. The PlantPAN 2.0 is a systematic platform for plant promoter analysis and reconstructing transcriptional regulatory networks.",2015-10-17 +23203867,NetwoRx: connecting drugs to networks and phenotypes in Saccharomyces cerevisiae.,"Drug modes of action are complex and still poorly understood. The set of known drug targets is widely acknowledged to be biased and incomplete, and so gives only limited insight into the system-wide effects of drugs. But a high-throughput assay unique to yeast-barcode-based chemogenomic screens-can measure the individual drug response of every yeast deletion mutant in parallel. NetwoRx (http://ophid.utoronto.ca/networx) is the first resource to store data from these extremely valuable yeast chemogenomics experiments. In total, NetwoRx stores data on 5924 genes and 466 drugs. In addition, we applied data-mining approaches to identify yeast pathways, functions and phenotypes that are targeted by particular drugs, compute measures of drug-drug similarity and construct drug-phenotype networks. These data are all available to search or download through NetwoRx; users can search by drug name, gene name or gene set identifier. We also set up automated analysis routines in NetwoRx; users can query new gene sets against the entire collection of drug profiles and retrieve the drugs that target them. We demonstrate with use case examples how NetwoRx can be applied to target specific phenotypes, repurpose drugs using mode of action analysis, investigate bipartite networks and predict new drugs that affect yeast aging.",2012-11-29 +26486520,An evidence-based knowledgebase of metastasis suppressors to identify key pathways relevant to cancer metastasis.,"Metastasis suppressor genes (MS genes) are genes that play important roles in inhibiting the process of cancer metastasis without preventing growth of the primary tumor. Identification of these genes and understanding their functions are critical for investigation of cancer metastasis. Recent studies on cancer metastasis have identified many new susceptibility MS genes. However, the comprehensive illustration of diverse cellular processes regulated by metastasis suppressors during the metastasis cascade is lacking. Thus, the relationship between MS genes and cancer risk is still unclear. To unveil the cellular complexity of MS genes, we have constructed MSGene (http://MSGene.bioinfo-minzhao.org/), the first literature-based gene resource for exploring human MS genes. In total, we manually curated 194 experimentally verified MS genes and mapped to 1448 homologous genes from 17 model species. Follow-up functional analyses associated 194 human MS genes with epithelium/tissue morphogenesis and epithelia cell proliferation. In addition, pathway analysis highlights the prominent role of MS genes in activation of platelets and coagulation system in tumor metastatic cascade. Moreover, global mutation pattern of MS genes across multiple cancers may reveal common cancer metastasis mechanisms. All these results illustrate the importance of MSGene to our understanding on cell development and cancer metastasis.",2015-10-21 +23774715,The autism brain imaging data exchange: towards a large-scale evaluation of the intrinsic brain architecture in autism.,"Autism spectrum disorders (ASDs) represent a formidable challenge for psychiatry and neuroscience because of their high prevalence, lifelong nature, complexity and substantial heterogeneity. Facing these obstacles requires large-scale multidisciplinary efforts. Although the field of genetics has pioneered data sharing for these reasons, neuroimaging had not kept pace. In response, we introduce the Autism Brain Imaging Data Exchange (ABIDE)-a grassroots consortium aggregating and openly sharing 1112 existing resting-state functional magnetic resonance imaging (R-fMRI) data sets with corresponding structural MRI and phenotypic information from 539 individuals with ASDs and 573 age-matched typical controls (TCs; 7-64 years) (http://fcon_1000.projects.nitrc.org/indi/abide/). Here, we present this resource and demonstrate its suitability for advancing knowledge of ASD neurobiology based on analyses of 360 male subjects with ASDs and 403 male age-matched TCs. We focused on whole-brain intrinsic functional connectivity and also survey a range of voxel-wise measures of intrinsic functional brain architecture. Whole-brain analyses reconciled seemingly disparate themes of both hypo- and hyperconnectivity in the ASD literature; both were detected, although hypoconnectivity dominated, particularly for corticocortical and interhemispheric functional connectivity. Exploratory analyses using an array of regional metrics of intrinsic brain function converged on common loci of dysfunction in ASDs (mid- and posterior insula and posterior cingulate cortex), and highlighted less commonly explored regions such as the thalamus. The survey of the ABIDE R-fMRI data sets provides unprecedented demonstrations of both replication and novel discovery. By pooling multiple international data sets, ABIDE is expected to accelerate the pace of discovery setting the stage for the next generation of ASD studies.",2013-06-18 +23251048,CellLineMiner: a knowledge portal for human cell lines.,"

Unlabelled

Experimental models of human tissues and disease phenotypes frequently rely upon immortalized cell lines, which are easily accessible and simple to use due to their infinite capability of cell division. For decades, cell lines have been used to investigate cellular mechanisms of disease and the efficacy of drugs, most prominently for human cancers. However, the large body of knowledge with respect to human cell lines exists primarily in an unstructured fashion, that is, as free text in the scientific literature. Here we present CellLineMiner, a novel text mining-based web database that provides a comprehensive view of human cell line knowledge. The application offers a simple search in all indexed cell lines, accompanied by a rapid display of all identified literature associations. The CellLineMiner is intended to serve as a knowledge resource companion to the cellular model systems used in biomedical research.

Availability

CellLineMiner is accessible at http://dev.pubgene.com/cellmine.",2012-11-13 +28542205,ROTS: An R package for reproducibility-optimized statistical testing.,"Differential expression analysis is one of the most common types of analyses performed on various biological data (e.g. RNA-seq or mass spectrometry proteomics). It is the process that detects features, such as genes or proteins, showing statistically significant differences between the sample groups under comparison. A major challenge in the analysis is the choice of an appropriate test statistic, as different statistics have been shown to perform well in different datasets. To this end, the reproducibility-optimized test statistic (ROTS) adjusts a modified t-statistic according to the inherent properties of the data and provides a ranking of the features based on their statistical evidence for differential expression between two groups. ROTS has already been successfully applied in a range of different studies from transcriptomics to proteomics, showing competitive performance against other state-of-the-art methods. To promote its widespread use, we introduce here a Bioconductor R package for performing ROTS analysis conveniently on different types of omics data. To illustrate the benefits of ROTS in various applications, we present three case studies, involving proteomics and RNA-seq data from public repositories, including both bulk and single cell data. The package is freely available from Bioconductor (https://www.bioconductor.org/packages/ROTS).",2017-05-25 +30590411,Folic acid supplementation enhances arsenic methylation: results from a folic acid and creatine supplementation randomized controlled trial in Bangladesh.,"

Background

Arsenic exposure through drinking water persists in many regions. Inorganic As (InAs) is methylated to monomethyl-arsenical species (MMAs) and dimethyl-arsenical species (DMAs), facilitating urinary excretion. Arsenic methylation is dependent on one-carbon metabolism, which is influenced by nutritional factors such as folate and creatine.

Objective

This study investigated the effects of folic acid (FA) and/or creatine supplementation on the proportion of As metabolites in urine.

Design

In a 24-wk randomized, double-blinded, placebo-controlled trial, 622 participants were assigned to receive FA (400 or 800 μg per day), 3 g creatine per day, 400 μg FA + 3 g creatine per day, or placebo. The majority of participants were folate sufficient; all received As-removal water filters. From wk 12-24, half of the participants receiving FA received placebo.

Results

Among groups receiving FA, the mean decrease in ln(%InAs) and %MMAs and increase in %DMAs exceeded those of the placebo group at wk 6 and 12 (P < 0.05). In the creatine group, the mean decrease in %MMAs exceeded that of the placebo group at wk 6 and 12 (P < 0.05); creatine supplementation did not affect change in %InAs or %DMAs. The decrease in %MMAs at wk 6 and 12 was larger in the 800 µg FA than in the 400 µg FA group (P = 0.034). There were no differences in treatment effects between the 400 µg FA and creatine + FA groups. Data suggest a rebound in As metabolite proportions after FA cessation; at wk 24, log(%InAs) and %DMAs were not significantly different than baseline levels among participants who discontinued FA supplementation.

Conclusions

The results of this study confirm that FA supplementation rapidly and significantly increases methylation of InAs to DMAs. Further research is needed to understand the strong cross-sectional associations between urinary creatinine and As methylation in previous studies. This trial was registered at https://clinicaltrials.gov as NCT01050556.",2019-02-01 +30277498,PiGx: reproducible genomics analysis pipelines with GNU Guix. ,"In bioinformatics, as well as other computationally intensive research fields, there is a need for workflows that can reliably produce consistent output, from known sources, independent of the software environment or configuration settings of the machine on which they are executed. Indeed, this is essential for controlled comparison between different observations and for the wider dissemination of workflows. However, providing this type of reproducibility and traceability is often complicated by the need to accommodate the myriad dependencies included in a larger body of software, each of which generally comes in various versions. Moreover, in many fields (bioinformatics being a prime example), these versions are subject to continual change due to rapidly evolving technologies, further complicating problems related to reproducibility. Here, we propose a principled approach for building analysis pipelines and managing their dependencies with GNU Guix. As a case study to demonstrate the utility of our approach, we present a set of highly reproducible pipelines called PiGx for the analysis of RNA sequencing, chromatin immunoprecipitation sequencing, bisulfite-treated DNA sequencing, and single-cell resolution RNA sequencing. All pipelines process raw experimental data and generate reports containing publication-ready plots and figures, with interactive report elements and standard observables. Users may install these highly reproducible packages and apply them to their own datasets without any special computational expertise beyond the use of the command line. We hope such a toolkit will provide immediate benefit to laboratory workers wishing to process their own datasets or bioinformaticians seeking to automate all, or parts of, their analyses. In the long term, we hope our approach to reproducibility will serve as a blueprint for reproducible workflows in other areas. Our pipelines, along with their corresponding documentation and sample reports, are available at http://bioinformatics.mdc-berlin.de/pigx.",2018-12-01 +29878078,AA9int: SNP interaction pattern search using non-hierarchical additive model set.,"

Motivation

The use of single nucleotide polymorphism (SNP) interactions to predict complex diseases is getting more attention during the past decade, but related statistical methods are still immature. We previously proposed the SNP Interaction Pattern Identifier (SIPI) approach to evaluate 45 SNP interaction patterns/patterns. SIPI is statistically powerful but suffers from a large computation burden. For large-scale studies, it is necessary to use a powerful and computation-efficient method. The objective of this study is to develop an evidence-based mini-version of SIPI as the screening tool or solitary use and to evaluate the impact of inheritance mode and model structure on detecting SNP-SNP interactions.

Results

We tested two candidate approaches: the 'Five-Full' and 'AA9int' method. The Five-Full approach is composed of the five full interaction models considering three inheritance modes (additive, dominant and recessive). The AA9int approach is composed of nine interaction models by considering non-hierarchical model structure and the additive mode. Our simulation results show that AA9int has similar statistical power compared to SIPI and is superior to the Five-Full approach, and the impact of the non-hierarchical model structure is greater than that of the inheritance mode in detecting SNP-SNP interactions. In summary, it is recommended that AA9int is a powerful tool to be used either alone or as the screening stage of a two-stage approach (AA9int+SIPI) for detecting SNP-SNP interactions in large-scale studies.

Availability and implementation

The 'AA9int' and 'parAA9int' functions (standard and parallel computing version) are added in the SIPI R package, which is freely available at https://linhuiyi.github.io/LinHY_Software/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +30309340,Accurate prediction of protein-lncRNA interactions by diffusion and HeteSim features across heterogeneous network.,"

Background

Identifying the interactions between proteins and long non-coding RNAs (lncRNAs) is of great importance to decipher the functional mechanisms of lncRNAs. However, current experimental techniques for detection of lncRNA-protein interactions are limited and inefficient. Many methods have been proposed to predict protein-lncRNA interactions, but few studies make use of the topological information of heterogenous biological networks associated with the lncRNAs.

Results

In this work, we propose a novel approach, PLIPCOM, using two groups of network features to detect protein-lncRNA interactions. In particular, diffusion features and HeteSim features are extracted from protein-lncRNA heterogenous network, and then combined to build the prediction model using the Gradient Tree Boosting (GTB) algorithm. Our study highlights that the topological features of the heterogeneous network are crucial for predicting protein-lncRNA interactions. The cross-validation experiments on the benchmark dataset show that PLIPCOM method substantially outperformed previous state-of-the-art approaches in predicting protein-lncRNA interactions. We also prove the robustness of the proposed method on three unbalanced data sets. Moreover, our case studies demonstrate that our method is effective and reliable in predicting the interactions between lncRNAs and proteins.

Availability

The source code and supporting files are publicly available at: http://denglab.org/PLIPCOM/ .",2018-10-11 +25488330,Structural templates for comparative protein docking.,"Structural characterization of protein-protein interactions is important for understanding life processes. Because of the inherent limitations of experimental techniques, such characterization requires computational approaches. Along with the traditional protein-protein docking (free search for a match between two proteins), comparative (template-based) modeling of protein-protein complexes has been gaining popularity. Its development puts an emphasis on full and partial structural similarity between the target protein monomers and the protein-protein complexes previously determined by experimental techniques (templates). The template-based docking relies on the quality and diversity of the template set. We present a carefully curated, nonredundant library of templates containing 4950 full structures of binary complexes and 5936 protein-protein interfaces extracted from the full structures at 12 Å distance cut-off. Redundancy in the libraries was removed by clustering the PDB structures based on structural similarity. The value of the clustering threshold was determined from the analysis of the clusters and the docking performance on a benchmark set. High structural quality of the interfaces in the template and validation sets was achieved by automated procedures and manual curation. The library is included in the Dockground resource for molecular recognition studies at http://dockground.bioinformatics.ku.edu.",2015-06-13 +23161682,PRGdb 2.0: towards a community-based database model for the analysis of R-genes in plants.,"The Plant Resistance Genes database (PRGdb; http://prgdb.org) is a comprehensive resource on resistance genes (R-genes), a major class of genes in plant genomes that convey disease resistance against pathogens. Initiated in 2009, the database has grown more than 6-fold to recently include annotation derived from recent plant genome sequencing projects. Release 2.0 currently hosts useful biological information on a set of 112 known and 104 310 putative R-genes present in 233 plant species and conferring resistance to 122 different pathogens. Moreover, the website has been completely redesigned with the implementation of Semantic MediaWiki technologies, which makes our repository freely accessed and easily edited by any scientists. To this purpose, we encourage plant biologist experts to join our annotation effort and share their knowledge on resistance-gene biology with the rest of the scientific community.",2012-11-17 +,First Report of Recombinant Potato virus Y Strains Infecting Potato in Jordan,"Potato (Solanum tuberosum L.) is an important vegetable crop in Jordan, occupying second position after olives. In 2012, potatoes were planted on about 6,000 ha with a production of about 141,000 t (2). Potato virus Y (PVY) is a serious problem for potato production worldwide. Recombinant strains of the virus were reported to cause tuber necrotic ringspot disease (PTNRD) in many potato-growing regions of the world. In the last few years, a new recombinant PVYᴺᵀᴺ⁻ᴺᵂ that belongs to PVYZ (3) has been reported in the neighboring Syria. It included three recombination patterns, SYR-I, SYR-II, and SYR-III, and caused severe PTNRD (1). Since PVY is easily transmitted from one region to another by aphid vectors and infected potato seeds, this study was initiated to investigate the possible occurrence of PVY strains in Jordan. In October 2013, 33 leaf samples were collected from symptomatic potato plants cv. Spunta from Wadi Rum, Jordan (GPS coordinates 29°31′37.76″ N, 35°42′48.75″ E), the largest potato-producing area in Jordan. Sampled plants displayed leaf mottling and yellowing, symptoms similar to those caused by PVY. All samples were tested for PVY by DAS-ELISA using the ELISA kit (monoclonal cocktail) developed by BIOREBA (Reinach, Switzerland) to detect all PVY isolates. Twenty-nine samples were found positive for PVY by ELISA. To confirm virus infection, total RNA was extracted from all ELISA-positive samples and used as template in uniplex RT-PCR using strain-specific primers (1). The band pattern of PCR amplicons showed that 12 samples were infected with PVYᴺᵀᴺ⁻ᴺᵂ genotype SYR-III and produced bands of 1,085, 441, and 278 bp. One sample was infected with PVYᴺᵀᴺ (A) and produced bands of 1,307, 633, and 441 bp, and one other sample was infected with PVYᴺᵀᴺ⁻ᴺᵂ genotype SYR-II and produced bands of 1,085 and 441 bp. Mixed infection with PVYᴺᵀᴺ⁻ᴺᵂ genotype SYR-III and PVYᴺᵀᴺ (B) was also detected in one sample producing bands of 278, 441, 1,085, and 1,307 bp. To confirm infection with the recombinant strains, PCR fragments of 278 bp amplified from three samples and 1,085 bp obtained from another three samples were directly sequenced and sequences were deposited in GenBank under accession numbers KJ159968, KJ159969, and KJ159970 for the 278-bp fragment and KJ159974, KJ159975, and KJ159976 for the 1,085-bp fragment. Sequence comparison with other PVY strains available in the NCBI database showed that the 278-bp fragment had the highest nucleotide sequence identity (100%) with PVY isolates SYR-III-A26 (AB461467) and SYR-III-2-4 (AB461457) from Syria. BLAST searches also showed that the 1,085-bp fragment shared 99% nucleotide identities with PVY isolates SYR-II-L3 (AB461482) and SYR-II-Be4 (AB461474) from Aleppo, Syria. To our knowledge, this is the first report of PVY recombinants in Jordan, and the first report of PVYᴺᵀᴺ⁻ᴺᵂ recombinants infecting potato crop outside Syria. Since Europe is the main supplier of potato seeds for farmers in Jordan and Syria, the introduction of PVYᴺᵀᴺ⁻ᴺᵂ to the region could have happened through infected potato seeds. Results of this study create new challenges for potato growers in Jordan as well as other countries in the region.References: (1) M. Chikh Ali et al. J. Virol. Methods 165:15, 2010. (2) FAO. http://faostat.fao.org/ (3) A. V. Karasev and S. M. Gray. Ann. Rev. Phytopathol. 51:571, 2013.",2014-07-01 +27153697,FRAGSION: ultra-fast protein fragment library generation by IOHMM sampling.,"

Motivation

Speed, accuracy and robustness of building protein fragment library have important implications in de novo protein structure prediction since fragment-based methods are one of the most successful approaches in template-free modeling (FM). Majority of the existing fragment detection methods rely on database-driven search strategies to identify candidate fragments, which are inherently time-consuming and often hinder the possibility to locate longer fragments due to the limited sizes of databases. Also, it is difficult to alleviate the effect of noisy sequence-based predicted features such as secondary structures on the quality of fragment.

Results

Here, we present FRAGSION, a database-free method to efficiently generate protein fragment library by sampling from an Input-Output Hidden Markov Model. FRAGSION offers some unique features compared to existing approaches in that it (i) is lightning-fast, consuming only few seconds of CPU time to generate fragment library for a protein of typical length (300 residues); (ii) can generate dynamic-size fragments of any length (even for the whole protein sequence) and (iii) offers ways to handle noise in predicted secondary structure during fragment sampling. On a FM dataset from the most recent Critical Assessment of Structure Prediction, we demonstrate that FGRAGSION provides advantages over the state-of-the-art fragment picking protocol of ROSETTA suite by speeding up computation by several orders of magnitude while achieving comparable performance in fragment quality.

Availability and implementation

Source code and executable versions of FRAGSION for Linux and MacOS is freely available to non-commercial users at http://sysbio.rnet.missouri.edu/FRAGSION/ It is bundled with a manual and example data.

Contact

chengji@missouri.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-18 +28149326,Genome-wide methylation data mirror ancestry information.,"

Background

Genetic data are known to harbor information about human demographics, and genotyping data are commonly used for capturing ancestry information by leveraging genome-wide differences between populations. In contrast, it is not clear to what extent population structure is captured by whole-genome DNA methylation data.

Results

We demonstrate, using three large-cohort 450K methylation array data sets, that ancestry information signal is mirrored in genome-wide DNA methylation data and that it can be further isolated more effectively by leveraging the correlation structure of CpGs with cis-located SNPs. Based on these insights, we propose a method, EPISTRUCTURE, for the inference of ancestry from methylation data, without the need for genotype data.

Conclusions

EPISTRUCTURE can be used to infer ancestry information of individuals based on their methylation data in the absence of corresponding genetic data. Although genetic data are often collected in epigenetic studies of large cohorts, these are typically not made publicly available, making the application of EPISTRUCTURE especially useful for anyone working on public data. Implementation of EPISTRUCTURE is available in GLINT, our recently released toolset for DNA methylation analysis at: http://glint-epigenetics.readthedocs.io.",2017-01-03 +29106528,The First Mitochondrial Genomics and Evolution SMBE-Satellite Meeting: A New Scientific Symbiosis.,"The central role of the mitochondrion for cellular and organismal metabolism is well known, yet its functional role in evolution has rarely been featured in leading international conferences. Moreover, the contribution of mitochondrial genetics to complex disease phenotypes is particularly important, and although major advances have been made in the field of genomics, mitochondrial genomic data have in many cases been overlooked. Accumulating data and new knowledge support a major contribution of this maternally inherited genome, and its interactions with the nucleus, to both major evolutionary processes and diverse disease phenotypes. These advances encouraged us to assemble the first Mitochondrial Genomics and Evolution (MGE) meeting-an SMBE satellite and Israeli Science foundation international conference (Israel, September 2017). Here, we report the content and outcome of the MGE meeting (https://www.mge2017.com/; last accessed November 5, 2017).",2017-11-01 +30306392,Quasi-SMILES: quantitative structure-activity relationships to predict anticancer activity.,"Reliable prediction of anticancer potential of different substances for different cells using unambiguous algorithms is attractive alternative of experimental investigation of impacts of various anticancer agents to various cells. Quasi-SMILES is a sequence of symbols, which represents all available eclectic data, i.e. not only molecular structure, but also different conditions, which can have influence on examined endpoint (e.g. kinds of cells: human breast; human colon; human liver; human lung). In this work, quasi-SMILES have been used to establish predictive models for anticancer activity isoquinoline quinones related to different cells. Descriptor calculated with optimal correlation weights of different fragments of quasi-SMILES defined by the Monte Carlo technique is used to predict pIC50 as a mathematical function of molecular structure and kinds of cells. The using of the so-called index of ideality of correlation for optimization by the Monte Carlo method improves predictive potential of the model. The statistical quality of the models based on correlation weights of fragments of quasi-SMILES is good. The range of correlation coefficient between experimental and calculated pIC50 for external validation set is 0.76-0.89. The statistical stable promoters for increase and for decrease in pIC50 are established. These models can be used to improve quality of pharmaceutical agents. These computational experiments can be reproduced with available on the Internet software ( http://www.insilico.eu/coral ).",2018-10-10 +22619501,Identification of a gene expression signature common to distinct cancer pathways.,Mutations in cancer-causing genes induce changes in gene expression programs critical for malignant cell transformation. Publicly available gene expression profiles produced by modulating the expression of distinct cancer genes may therefore represent a rich resource for the identification of gene signatures common to seemingly unrelated cancer genes. We combined automatic retrieval with manual validation to obtain a data set of high-quality gene microarray profiles. This data set was used to create logical models of the signaling events underlying the observed expression changes produced by various cancer genes and allowed to uncover unknown and verifiable interactions. Data clustering revealed novel sets of gene expression profiles commonly regulated by distinct cancer genes. Our method allows retrieval of significant new information and testable hypotheses from a pool of deposited cancer gene expression experiments that are otherwise not apparent or appear insignificant from single measurements. The complete results are available through a web-application at http://biodata.ethz.ch/cgi-bin/geologic.,2012-05-08 +28545393,quantGenius: implementation of a decision support system for qPCR-based gene quantification.,"

Background

Quantitative molecular biology remains a challenge for researchers due to inconsistent approaches for control of errors in the final results. Due to several factors that can influence the final result, quantitative analysis and interpretation of qPCR data are still not trivial. Together with the development of high-throughput qPCR platforms, there is a need for a tool allowing for robust, reliable and fast nucleic acid quantification.

Results

We have developed ""quantGenius"" ( http://quantgenius.nib.si ), an open-access web application for a reliable qPCR-based quantification of nucleic acids. The quantGenius workflow interactively guides the user through data import, quality control (QC) and calculation steps. The input is machine- and chemistry-independent. Quantification is performed using the standard curve approach, with normalization to one or several reference genes. The special feature of the application is the implementation of user-guided QC-based decision support system, based on qPCR standards, that takes into account pipetting errors, assay amplification efficiencies, limits of detection and quantification of the assays as well as the control of PCR inhibition in individual samples. The intermediate calculations and final results are exportable in a data matrix suitable for further statistical analysis or visualization. We additionally compare the most important features of quantGenius with similar advanced software tools and illustrate the importance of proper QC system in the analysis of qPCR data in two use cases.

Conclusions

To our knowledge, quantGenius is the only qPCR data analysis tool that integrates QC-based decision support and will help scientists to obtain reliable results which are the basis for biologically meaningful data interpretation.",2017-05-25 +29621565,"P. aeruginosa blood stream infection isolates: A ""full house"" of virulence genes in isolates associated with rapid patient death and patient survival.","We have recently characterised the epidemiology of P. aeruginosa blood stream infection (BSI) in a large retrospective multicentre cohort study [1]. Utilising corresponding patient BSI isolates we aimed to characterise the genotypic virulence profile of the P. aeruginosa isolates that were associated with rapid death in the non-neutropenic host. Five P. aeruginosa BSI episodes were identified from a larger cohort of P. aeruginosa BSI episodes previously described by McCarthy et al. [1]. The genotypic profile of another 5 isolates from this cohort in whom the non-neutropenic host had survived one year post the BSI was also analysed for comparison. These isolates underwent Illumina whole genome sequencing, de novo assembly and annotation. A comprehensive suite of virulence genes was collated from the Pseudomonas Genome Database (http://www.pseudomonas.com/) and were searched by BLAST based analysis in assemblies of all BSI isolates [2]. There was extensive conservation of virulence genes across all of the BSI isolates studied. The exoU gene was found in two isolates from patients who died rapidly and in one isolate from a patient that survived one year post BSI. The higA and higB genes were detected in all isolates. The exlA gene was not detected in any of the isolates studied. These findings suggest that to cause a BSI that it is only the virulent P. aeruginosa isolate that succeeds. The virulence gene profile seen was independent of patient outcome. Further phenotypic correlation is required to determine if there is any difference in genotypic expression by the BSI isolates that were associated with rapid death of the host and those BSI isolates associated with host survival at one year.",2018-04-03 +29444641,Oasis 2: improved online analysis of small RNA-seq data.,"

Background

Small RNA molecules play important roles in many biological processes and their dysregulation or dysfunction can cause disease. The current method of choice for genome-wide sRNA expression profiling is deep sequencing.

Results

Here we present Oasis 2, which is a new main release of the Oasis web application for the detection, differential expression, and classification of small RNAs in deep sequencing data. Compared to its predecessor Oasis, Oasis 2 features a novel and speed-optimized sRNA detection module that supports the identification of small RNAs in any organism with higher accuracy. Next to the improved detection of small RNAs in a target organism, the software now also recognizes potential cross-species miRNAs and viral and bacterial sRNAs in infected samples. In addition, novel miRNAs can now be queried and visualized interactively, providing essential information for over 700 high-quality miRNA predictions across 14 organisms. Robust biomarker signatures can now be obtained using the novel enhanced classification module.

Conclusions

Oasis 2 enables biologists and medical researchers to rapidly analyze and query small RNA deep sequencing data with improved precision, recall, and speed, in an interactive and user-friendly environment.

Availability and implementation

Oasis 2 is implemented in Java, J2EE, mysql, Python, R, PHP and JavaScript. It is freely available at https://oasis.dzne.de.",2018-02-14 +29036404,The value of prior knowledge in machine learning of complex network systems.,"

Motivation

Our overall goal is to develop machine-learning approaches based on genomics and other relevant accessible information for use in predicting how a patient will respond to a given proposed drug or treatment. Given the complexity of this problem, we begin by developing, testing and analyzing learning methods using data from simulated systems, which allows us access to a known ground truth. We examine the benefits of using prior system knowledge and investigate how learning accuracy depends on various system parameters as well as the amount of training data available.

Results

The simulations are based on Boolean networks-directed graphs with 0/1 node states and logical node update rules-which are the simplest computational systems that can mimic the dynamic behavior of cellular systems. Boolean networks can be generated and simulated at scale, have complex yet cyclical dynamics and as such provide a useful framework for developing machine-learning algorithms for modular and hierarchical networks such as biological systems in general and cancer in particular. We demonstrate that utilizing prior knowledge (in the form of network connectivity information), without detailed state equations, greatly increases the power of machine-learning algorithms to predict network steady-state node values ('phenotypes') and perturbation responses ('drug effects').

Availability and implementation

Links to codes and datasets here: https://gray.mgh.harvard.edu/people-directory/71-david-craft-phd.

Contact

dcraft@broadinstitute.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +24565382,M-Finder: Uncovering functionally associated proteins from interactome data integrated with GO annotations.,"

Background

Protein-protein interactions (PPIs) play a key role in understanding the mechanisms of cellular processes. The availability of interactome data has catalyzed the development of computational approaches to elucidate functional behaviors of proteins on a system level. Gene Ontology (GO) and its annotations are a significant resource for functional characterization of proteins. Because of wide coverage, GO data have often been adopted as a benchmark for protein function prediction on the genomic scale.

Results

We propose a computational approach, called M-Finder, for functional association pattern mining. This method employs semantic analytics to integrate the genome-wide PPIs with GO data. We also introduce an interactive web application tool that visualizes a functional association network linked to a protein specified by a user. The proposed approach comprises two major components. First, the PPIs that have been generated by high-throughput methods are weighted in terms of their functional consistency using GO and its annotations. We assess two advanced semantic similarity metrics which quantify the functional association level of each interacting protein pair. We demonstrate that these measures outperform the other existing methods by evaluating their agreement to other biological features, such as sequence similarity, the presence of common Pfam domains, and core PPIs. Second, the information flow-based algorithm is employed to discover a set of proteins functionally associated with the protein in a query and their links efficiently. This algorithm reconstructs a functional association network of the query protein. The output network size can be flexibly determined by parameters.

Conclusions

M-Finder provides a useful framework to investigate functional association patterns with any protein. This software will also allow users to perform further systematic analysis of a set of proteins for any specific function. It is available online at http://bionet.ecs.baylor.edu/mfinder.",2013-11-07 +28888135,STRSeq: A catalog of sequence diversity at human identification Short Tandem Repeat loci.,"The STR Sequencing Project (STRSeq) was initiated to facilitate the description of sequence-based alleles at the Short Tandem Repeat (STR) loci targeted in human identification assays. This international collaborative effort, which has been endorsed by the ISFG DNA Commission, provides a framework for communication among laboratories. The initial data used to populate the project are the aggregate alleles observed in targeted sequencing studies across four laboratories: National Institute of Standards and Technology (N=1786), Kings College London (N=1043), University of North Texas Health Sciences Center (N=839), and University of Santiago de Compostela (N=944), for a total of 4612 individuals. STRSeq data are maintained as GenBank records at the U.S. National Center for Biotechnology Information (NCBI), which participates in a daily data exchange with the DNA DataBank of Japan (DDBJ) and the European Nucleotide Archive (ENA). Each GenBank record contains the observed sequence of a STR region, annotation (""bracketing"") of the repeat region and flanking region polymorphisms, information regarding the sequencing assay and data quality, and backward compatible length-based allele designation. STRSeq GenBank records are organized within a BioProject at NCBI (https://www.ncbi.nlm.nih.gov/bioproject/380127), which is sub-divided into: commonly used autosomal STRs, alternate autosomal STRs, Y-chromosomal STRs, and X-chromosomal STRs. Each of these categories is further divided into locus-specific BioProjects. The BioProject hierarchy facilitates access to the GenBank records by browsing, BLAST searching, or ftp download. Future plans include user interface tools at strseq.nist.gov, a pathway for submission of additional allele records by laboratories performing population sample sequencing and interaction with the STRidER web portal for quality control (http://strider.online).",2017-09-01 +24358361,Identification of cytotoxic T lymphocyte epitopes on swine viruses: multi-epitope design for universal T cell vaccine.,"Classical swine fever (CSF), foot-and-mouth disease (FMD) and porcine reproductive and respiratory syndrome (PRRS) are the primary diseases affecting the pig industry globally. Vaccine induced CD8(+) T cell-mediated immune response might be long-lived and cross-serotype and thus deserve further attention. Although large panels of synthetic overlapping peptides spanning the entire length of the polyproteins of a virus facilitate the detection of cytotoxic T lymphocyte (CTL) epitopes, it is an exceedingly costly and cumbersome approach. Alternatively, computational predictions have been proven to be of satisfactory accuracy and are easily performed. Such a method enables the systematic identification of genome-wide CTL epitopes by incorporating epitope prediction tools in analyzing large numbers of viral sequences. In this study, we have implemented an integrated bioinformatics pipeline for the identification of CTL epitopes of swine viruses including the CSF virus (CSFV), FMD virus (FMDV) and PRRS virus (PRRSV) and assembled these epitopes on a web resource to facilitate vaccine design. Identification of epitopes for cross protections to different subtypes of virus are also reported in this study and may be useful for the development of a universal vaccine against such viral infections among the swine population. The CTL epitopes identified in this study have been evaluated in silico and possibly provide more and wider protection in compared to traditional single-reference vaccine design. The web resource is free and open to all users through http://sb.nhri.org.tw/ICES.",2013-12-17 +28525573,NOREVA: normalization and evaluation of MS-based metabolomics data.,"Diverse forms of unwanted signal variations in mass spectrometry-based metabolomics data adversely affect the accuracies of metabolic profiling. A variety of normalization methods have been developed for addressing this problem. However, their performances vary greatly and depend heavily on the nature of the studied data. Moreover, given the complexity of the actual data, it is not feasible to assess the performance of methods by single criterion. We therefore developed NOREVA to enable performance evaluation of various normalization methods from multiple perspectives. NOREVA integrated five well-established criteria (each with a distinct underlying theory) to ensure more comprehensive evaluation than any single criterion. It provided the most complete set of the available normalization methods, with unique features of removing overall unwanted variations based on quality control metabolites and allowing quality control samples based correction sequentially followed by data normalization. The originality of NOREVA and the reliability of its algorithms were extensively validated by case studies on five benchmark datasets. In sum, NOREVA is distinguished for its capability of identifying the well performed normalization method by taking multiple criteria into consideration and can be an indispensable complement to other available tools. NOREVA can be freely accessed at http://server.idrb.cqu.edu.cn/noreva/.",2017-07-01 +28569763,Integrating macromolecular X-ray diffraction data with the graphical user interface iMosflm.,"X-ray crystallography is the predominant source of structural information for biological macromolecules, providing fundamental insights into biological function. The availability of robust and user-friendly software to process the collected X-ray diffraction images makes the technique accessible to a wider range of scientists. iMosflm/MOSFLM (http://www.mrc-lmb.cam.ac.uk/harry/imosflm) is a software package designed to achieve this goal. The graphical user interface (GUI) version of MOSFLM (called iMosflm) is designed to guide inexperienced users through the steps of data integration, while retaining powerful features for more experienced users. Images from almost all commercially available X-ray detectors can be handled using this software. Although the program uses only 2D profile fitting, it can readily integrate data collected in the 'fine phi-slicing' mode (in which the rotation angle per image is less than the crystal mosaic spread by a factor of at least 2), which is commonly used with modern very fast readout detectors. The GUI provides real-time feedback on the success of the indexing step and the progress of data processing. This feedback includes the ability to monitor detector and crystal parameter refinement and to display the average spot shape in different regions of the detector. Data scaling and merging tasks can be initiated directly from the interface. Using this protocol, a data set of 360 images with ∼2,000 reflections per image can be processed in ∼4 min.",2017-06-01 +27081555,iJGVD: an integrative Japanese genome variation database based on whole-genome sequencing.,"The integrative Japanese Genome Variation Database (iJGVD; http://ijgvd.megabank.tohoku.ac.jp/) provides genomic variation data detected by whole-genome sequencing (WGS) of Japanese individuals. Specifically, the database contains variants detected by WGS of 1,070 individuals who participated in a genome cohort study of the Tohoku Medical Megabank Project. In the first release, iJGVD includes >4,300,000 autosomal single nucleotide variants (SNVs) whose minor allele frequencies are >5.0%.",2015-11-26 +,P17.54NOVEL PROTEINS ASSOCIATED WITH INVADOPODIA STRUCTURES IN GLIOBLASTOMA MULTIFORME,"Glioblastoma multiforme (GBM) tumours are diffusely infiltrative making surgical resection virtually impossible. Invasion of brain parenchyma is facilitated by cell migration and degradation of the extracellular matrix (ECM). Invadopodia are actin-rich organelles that protrude from the ventral side of the plasma membrane in direct contact with the ECM and play an important role in mesenchymal cell invasion. We have characterized the ‘invasive potential’ of a panel of established GBM cell lines (n = 9) using QCM gelatin invadopodia assay (Millipore) and performed comparative, quantitative membrane mass spectrometry-based proteomic analyses of highly invasive vs. less-invasive cell lines. All GBM cells produced invadopodia, and there was a significant difference between the most invasive (U87MG) and least invasive (LN229) cells (65%, percentage of total cell area; p = 0.0001). Overall, 1667 quantifiable proteins were identified from duplicate analyses, of which 76% mapped to membrane structures using the David bioinformatics database (http://david.abcc.ncifcrf.gov). The differential abundance of 38 proteins significantly correlated with the degree of invasion (r2 > 0.45 or r2 < -0.45; n ≥ 5; p < 0.05) and are predominantly involved in cellular movement and cell-cell and interactions. Fluorescence microscopy demonstrates co-localisation of novel proteins to invadopodia structures and siRNA knockdown of a target protein confirmed its role in invadopodia-formation. Invadopodia-associated membrane proteins could be novel targets for anti-invasive GBM therapies.",2014-09-01 +29092938,P-MartCancer-Interactive Online Software to Enable Analysis of Shotgun Cancer Proteomic Datasets.,"P-MartCancer is an interactive web-based software environment that enables statistical analyses of peptide or protein data, quantitated from mass spectrometry-based global proteomics experiments, without requiring in-depth knowledge of statistical programming. P-MartCancer offers a series of statistical modules associated with quality assessment, peptide and protein statistics, protein quantification, and exploratory data analyses driven by the user via customized workflows and interactive visualization. Currently, P-MartCancer offers access and the capability to analyze multiple cancer proteomic datasets generated through the Clinical Proteomics Tumor Analysis Consortium at the peptide, gene, and protein levels. P-MartCancer is deployed as a web service (https://pmart.labworks.org/cptac.html), alternatively available via Docker Hub (https://hub.docker.com/r/pnnl/pmart-web/). Cancer Res; 77(21); e47-50. ©2017 AACR.",2017-11-01 +29373760,Automated annotation of mobile antibiotic resistance in Gram-negative bacteria: the Multiple Antibiotic Resistance Annotator (MARA) and database.,"

Background

Multiresistance in Gram-negative bacteria is often due to acquisition of several different antibiotic resistance genes, each associated with a different mobile genetic element, that tend to cluster together in complex conglomerations. Accurate, consistent annotation of resistance genes, the boundaries and fragments of mobile elements, and signatures of insertion, such as DR, facilitates comparative analysis of complex multiresistance regions and plasmids to better understand their evolution and how resistance genes spread.

Objectives

To extend the Repository of Antibiotic resistance Cassettes (RAC) web site, which includes a database of 'features', and the Attacca automatic DNA annotation system, to encompass additional resistance genes and all types of associated mobile elements.

Methods

Antibiotic resistance genes and mobile elements were added to RAC, from existing registries where possible. Attacca grammars were extended to accommodate the expanded database, to allow overlapping features to be annotated and to identify and annotate features such as composite transposons and DR.

Results

The Multiple Antibiotic Resistance Annotator (MARA) database includes antibiotic resistance genes and selected mobile elements from Gram-negative bacteria, distinguishing important variants. Sequences can be submitted to the MARA web site for annotation. A list of positions and orientations of annotated features, indicating those that are truncated, DR and potential composite transposons is provided for each sequence, as well as a diagram showing annotated features approximately to scale.

Conclusions

The MARA web site (http://mara.spokade.com) provides a comprehensive database for mobile antibiotic resistance in Gram-negative bacteria and accurately annotates resistance genes and associated mobile elements in submitted sequences to facilitate comparative analysis.",2018-04-01 +29603941,Hyperspectral database of fruits and vegetables.,"We have built a hyperspectral database of 42 fruits and vegetables. Both the outside (skin) and inside of the objects were imaged. We used a Specim VNIR HS-CL-30-V8E-OEM mirror-scanning hyperspectral camera and took pictures at a spatial resolution of ∼57  px/deg by 800 pixels at a wavelength resolution of ∼1.12  nm. A stable, broadband illuminant was used. Images and software are freely available on our webserver (http://www.allpsych.uni-giessen.de/GHIFVD; pronounced ""gift""). We performed two kinds of analyses on these images. First, when comparing the insides and outsides of the objects, we observed that the insides were lighter than the skins, and that the hues of the insides and skins were significantly correlated (circular correlation=0.638). Second, we compared the color distribution within each object to corresponding human color discrimination thresholds. We found a significant correlation (0.75) between the orientation of ellipses fit to the chromaticity distributions of our fruits and vegetables with the orientations of interpolated MacAdam discrimination ellipses. This indicates a close relationship between sensory processing and the characteristics of environmental objects.",2018-04-01 +29614602,[Molecular typing characterization of food-borne methicillin-resistant Staphylococcus aureus in China].,"Objective: To analyses the antimicrobial resistance and molecular characterization of 21 MRSA isolates cultured from retail foods from different provinces in China, and evaluate the molecular typing methods. Methods: Twenty-one MRSA isolates were obtained from national foodborne pathogen surveillance network in 2012 (Chinese salad, n=3; milk, n=1; cake, n=2; rice, n=1; cold noodle, n=1; spiced beef, n=1; dumpling, n=1; packed meal, n=1; salad, n=1; raw pork, n=9). The antimicrobial resistance of 21 strains to 12 antimicrobial agents was tested by broth dilution method. Polymerase chain reaction (PCR) and DNA sequencing were performed to obtain the genetic types of MLST (ST) and spa typing. The clonal complex (CC) was assigned by eBURST soft and the MLVA type (MT) and MLVA complex (MC) were identified via the database of the MLVA website (http://www.mlva.net). SmaI pulsed-field gel electrophoresis (SmaⅠ-PFGE) was also carried out to obtain the PFGE patterns of 21 strains. The genetic diversity and discriminatory power of typing were calculated by the Simpson's index of diversity (diversity index, DI) to find out the best genotyping method for MRSA. Results: All MRSA isolates showed multi-drug resistance(MDR), and were resistant to oxacillin, benzylpenicillin, clindamycin and erythromycin, and 71.4% (15/21), 47.6% (10/21), 42.9% (9/21) and 28.6% (6/21) of the MRSA isolates were resistant to tetracycline, ciprofloxacin, trimethoprim/sulfamethoxazole and gentamicin, respectively. Moreover, one strain was found to be resistant to all three antimicrobials of levofloxacin, moxifloxacin and rifampicin. Great diversity was found in these food-associated MRSA (6 STs, 7 spa types, and 9 MTs). PFGE patterns were more diverse than those of other three molecular typing methods (19 pulse types). The index of diversity (DI) of PFGE, MLVA, spa typing and MLST was 0.99, 0.80, 0.73, and 0.61, respectively. Among the MRSA isolates, CC9-ST9-t899-MT929-MC2236 (PFGE Cluster Ⅴ) was the most prevalent clone, which were all cultured from raw pork (9 isolates). Besides, two MRSA were identified as CC59-ST338-t437-MT621-MC621 (PFGE Cluster Ⅳ). Different clone had their own resistance spectrum profiles. Conclusion: The food-borne MRSA isolates were all MDR in this study. Different clones had their own resistance spectrum profiles. MLVA represented a promising tool for molecular epidemiology tracing of MRSA in foodborne disease events.",2018-04-01 +30480487,Autologous Blood Patch Injection versus Hydrogel Plug in CT-guided Lung Biopsy: A Prospective Randomized Trial.,"Purpose To compare the effect of autologous blood patch injection (ABPI) with that of a hydrogel plug on the rate of pneumothorax at CT-guided percutaneous lung biopsy. Materials and Methods In this prospective randomized controlled trial ( https://ClinicalTrials.gov , NCT02224924), a noninferiority design was used for ABPI, with a 10% noninferiority margin when compared with the hydrogel plug, with the primary outcome of pneumothorax rate within 2 hours of biopsy. A type I error rate of 0.05 and 90% power were specified with a target study population of 552 participants (276 in each arm). From October 2014 to February 2017, all potential study participants referred for CT-guided lung biopsy (n = 2052) were assessed for enrollment. Results The data safety monitoring board recommended the trial be closed to accrual after an interim analysis met prespecified criteria for early stopping based on noninferiority. The final study group consisted of 453 participants who were randomly assigned to the ABPI (n = 226) or hydrogel plug (n = 227) arms. Of these, 407 underwent lung biopsy. Pneumothorax rates within 2 hours of biopsy were 21% (42 of 199) and 29% (60 of 208); chest tube rates were 9% (18 of 199) and 13% (27 of 208); and delayed pneumothorax rates within 2 weeks after biopsy were 1.4% (three of 199) and 1.5% (three of 208) in the ABPI and hydrogel plug arms, respectively. Conclusion Autologous blood patch injection is noninferior to a hydrogel plug regarding the rate of pneumothorax after CT-guided percutaneous lung biopsy. © RSNA, 2018 Online supplemental material is available for this article.",2018-11-27 +29440686,araGWAB: Network-based boosting of genome-wide association studies in Arabidopsis thaliana.,"Genome-wide association studies (GWAS) have been applied for the genetic dissection of complex phenotypes in Arabidopsis thaliana. However, the significantly associated single-nucleotide polymorphisms (SNPs) could not explain all the phenotypic variations. A major reason for missing true phenotype-associated loci is the strict P-value threshold after adjustment for multiple hypothesis tests to reduce false positives. This statistical limitation can be partly overcome by increasing the sample size, but at a much higher cost. Alternatively, weak phenotype-association signals can be boosted by integrating other types of data. Here, we present a web application for network-based Arabidopsis genome-wide association boosting-araGWAB-which augments the likelihood of association with the given phenotype by integrating GWAS summary statistics (SNP P-values) and co-functional gene network information. The integration utilized the inherent values of SNPs with subthreshold significance, thus substantially increasing the information usage of GWAS data. We found that araGWAB could more effectively retrieve genes known to be associated with various phenotypes relevant to defense against bacterial pathogens, flowering time regulation, and organ development in A. thaliana. We also found that many of the network-boosted candidate genes for the phenotypes were supported by previous publications. The araGWAB is freely available at http://www.inetbio.org/aragwab/ .",2018-02-13 +28155708,PARRoT- a homology-based strategy to quantify and compare RNA-sequencing from non-model organisms.,"

Background

Next-generation sequencing promises the de novo genomic and transcriptomic analysis of samples of interests. However, there are only a few organisms having reference genomic sequences and even fewer having well-defined or curated annotations. For transcriptome studies focusing on organisms lacking proper reference genomes, the common strategy is de novo assembly followed by functional annotation. However, things become even more complicated when multiple transcriptomes are compared.

Results

Here, we propose a new analysis strategy and quantification methods for quantifying expression level which not only generate a virtual reference from sequencing data, but also provide comparisons between transcriptomes. First, all reads from the transcriptome datasets are pooled together for de novo assembly. The assembled contigs are searched against NCBI NR databases to find potential homolog sequences. Based on the searched result, a set of virtual transcripts are generated and served as a reference transcriptome. By using the same reference, normalized quantification values including RC (read counts), eRPKM (estimated RPKM) and eTPM (estimated TPM) can be obtained that are comparable across transcriptome datasets. In order to demonstrate the feasibility of our strategy, we implement it in the web service PARRoT. PARRoT stands for Pipeline for Analyzing RNA Reads of Transcriptomes. It analyzes gene expression profiles for two transcriptome sequencing datasets. For better understanding of the biological meaning from the comparison among transcriptomes, PARRoT further provides linkage between these virtual transcripts and their potential function through showing best hits in SwissProt, NR database, assigning GO terms. Our demo datasets showed that PARRoT can analyze two paired-end transcriptomic datasets of approximately 100 million reads within just three hours.

Conclusions

In this study, we proposed and implemented a strategy to analyze transcriptomes from non-reference organisms which offers the opportunity to quantify and compare transcriptome profiles through a homolog based virtual transcriptome reference. By using the homolog based reference, our strategy effectively avoids the problems that may cause from inconsistencies among transcriptomes. This strategy will shed lights on the field of comparative genomics for non-model organism. We have implemented PARRoT as a web service which is freely available at http://parrot.cgu.edu.tw .",2016-12-22 +26551401,The microtubule-associated molecular pathways may be genetically disrupted in patients with Bipolar Disorder. Insights from the molecular cascades.,"Bipolar Disorder is a severe disease characterized by pathological mood swings from major depressive episodes to manic ones and vice versa. The biological underpinnings of Bipolar Disorder have yet to be defined. As a consequence, pharmacological treatments are suboptimal. In the present paper we test the hypothesis that the molecular pathways involved with the direct targets of lithium, hold significantly more genetic variations associated with BD. A molecular pathway approach finds its rationale in the polygenic nature of the disease. The pathways were tested in a sample of ∼ 7,000 patients and controls. Data are available from the public NIMH database. The definition of the pathways was conducted according to the National Cancer Institute (http://pid.nci.nih.gov/). As a result, 3 out of the 18 tested pathways related to lithium action resisted the permutation analysis and were found to be associated with BD. These pathways were related to Reelin, Integrins and Aurora. A pool of genes selected from the ones linked with the above pathways was further investigated in order to identify the fine molecular mechanics shared by our significant pathways and also their link with lithium mechanism of action. The data obtained point out to a possible involvement of microtubule-related mechanics.",2015-10-23 +26834590,Cross-Species Integrative Functional Genomics in GeneWeaver Reveals a Role for Pafah1b1 in Altered Response to Alcohol.,"Identifying the biological substrates of complex neurobehavioral traits such as alcohol dependency pose a tremendous challenge given the diverse model systems and phenotypic assessments used. To address this problem we have developed a platform for integrated analysis of high-throughput or genome-wide functional genomics studies. A wealth of such data exists, but it is often found in disparate, non-computable forms. Our interactive web-based software system, Gene Weaver (http://www.geneweaver.org), couples curated results from genomic studies to graph-theoretical tools for combinatorial analysis. Using this system we identified a gene underlying multiple alcohol-related phenotypes in four species. A search of over 60,000 gene sets in GeneWeaver's database revealed alcohol-related experimental results including genes identified in mouse genetic mapping studies, alcohol selected Drosophila lines, Rattus differential expression, and human alcoholic brains. We identified highly connected genes and compared these to genes currently annotated to alcohol-related behaviors and processes. The most highly connected gene not annotated to alcohol was Pafah1b1. Experimental validation using a Pafah1b1 conditional knock-out mouse confirmed that this gene is associated with an increased preference for alcohol and an altered thermoregulatory response to alcohol. Although this gene has not been previously implicated in alcohol-related behaviors, its function in various neural mechanisms makes a role in alcohol-related phenomena plausible. By making diverse cross-species functional genomics data readily computable, we were able to identify and confirm a novel alcohol-related gene that may have implications for alcohol use disorders and other effects of alcohol.",2016-01-21 +30254725,Association of TCF7L2 mutation and atypical diabetes in a Uruguayan population.,"

Aim

To investigate if mutations in TCF7L2 are associated with ""atypical diabetes"" in the Uruguayan population.

Methods

Healthy, nondiabetic controls (n = 133) and patients with type 2 diabetes (n = 177) were selected from among the presenting population at level-3 referral healthcare centers in Uruguay. Patients with type 2 diabetes were subgrouped according to ""atypical diabetes"" (n = 92) and ""classical diabetes"" (n = 85). Genotyping for the rs12255372 and rs7903146 single nucleotide polymorphisms (SNPs) in the TCFTL2 gene was carried out with TaqMan® probes. Random samples were sequenced by Macrogen Ltd. (South Korea). Statistical analysis of the SNP data was carried out with the SNPStats online tool (http://bioinfo.iconcologia.net/SNPstats). The best inheritance model was chosen according to the lowest values of Akaike's information criterion and Bayesian information criterion. Differences between groups were determined by unpaired t-tests after checking the normal distribution or were converted to normalize the data. The association of SNPs was tested for matched case-control samples by using χ2 analysis and calculation of odds ratios (ORs) with 95% confidence intervals (CIs). All statistical tests were performed using SPSS v10.0 and EpiInfo7 statistical packages. Significant statistical differences were assumed in all cases showing adjusted P < 0.05.

Results

We genotyped two TCF7L2 SNPs (rs7903146 and rs12255372) in a population-based sample of 310 Uruguayan subjects, including 133 healthy control subjects and 177 clinical diagnosed with type 2 diabetes. For both SNPs analyzed, the best model was the dominant type: rs12255372 = G/G vs G/T+T/T, OR = 0.63, 95%CI: 0.40-0.98, P < 0.05 and rs7903146 = C/C vs C/T+T/T, OR = 0.79, 95%CI: 0.41-1.55, P = 0.3. The rs12255372 SNP showed high association with the type 2 diabetes cases (OR = 1.60, 95%CI: 1.20-2.51, P < 0.05). However, when the type 2 diabetics group was analyzed according to the atypical and classical subgroupings, the association with diabetes existed only for rs12255372 and the classical subgroup (vs controls: OR = 2.1, 95%CI: 1.21-3.75, P < 0.05); no significant differences were found for either SNP or atypical diabetes.

Conclusion

This is the first time SNPs_TCF7L2 were genotyped in a diabetic population stratified by genotype instead of phenotype. Classical and atypical patients showed statistical differences.",2018-09-01 +26719774,Accurate and efficient target prediction using a potency-sensitive influence-relevance voter.,"

Background

A number of algorithms have been proposed to predict the biological targets of diverse molecules. Some are structure-based, but the most common are ligand-based and use chemical fingerprints and the notion of chemical similarity. These methods tend to be computationally faster than others, making them particularly attractive tools as the amount of available data grows.

Results

Using a ChEMBL-derived database covering 490,760 molecule-protein interactions and 3236 protein targets, we conduct a large-scale assessment of the performance of several target-prediction algorithms at predicting drug-target activity. We assess algorithm performance using three validation procedures: standard tenfold cross-validation, tenfold cross-validation in a simulated screen that includes random inactive molecules, and validation on an external test set composed of molecules not present in our database.

Conclusions

We present two improvements over current practice. First, using a modified version of the influence-relevance voter (IRV), we show that using molecule potency data can improve target prediction. Second, we demonstrate that random inactive molecules added during training can boost the accuracy of several algorithms in realistic target-prediction experiments. Our potency-sensitive version of the IRV (PS-IRV) obtains the best results on large test sets in most of the experiments. Models and software are publicly accessible through the chemoinformatics portal at http://chemdb.ics.uci.edu/.",2015-12-29 +29868795,An automated method for detecting alternatively spliced protein domains.,"

Motivation

Alternative splicing (AS) has been demonstrated to play a role in shaping eukaryotic gene diversity at the transcriptional level. However, the impact of AS on the proteome is still controversial. Studies that seek to explore the effect of AS at the proteomic level are hampered by technical difficulties in the cumbersome process of casting forth and back between genome, transcriptome and proteome space coordinates, and the naïve prediction of protein domains in the presence of AS suffers many redundant sequence scans that emerge from constitutively spliced regions that are shared between alternative products of a gene.

Results

We developed the AstaFunk pipeline that computes for every generic transcriptome all domains that are altered by AS events in a systematic and efficient manner. In a nutshell, our method employs Viterbi dynamic programming, which guarantees to find all score-optimal hits of the domains under consideration, while complementary optimizations at different levels avoid redundant and other irrelevant computations. We evaluate AstaFunk qualitatively and quantitatively using RNAseq in well-studied genes with AS, and on large-scale employing entire transcriptomes. Our study confirms complementary reports that the effect of most AS events on the proteome seems to be rather limited, but our results also pinpoint several cases where AS could have a major impact on the function of a protein domain.

Availability and implementation

The JAVA implementation of AstaFunk is available as an open source project on http://astafunk.sammeth.net.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-11-01 +25477381,The DDBJ Japanese Genotype-phenotype Archive for genetic and phenotypic human data.,"The DNA Data Bank of Japan Center (DDBJ Center; http://www.ddbj.nig.ac.jp) maintains and provides public archival, retrieval and analytical services for biological information. Since October 2013, DDBJ Center has operated the Japanese Genotype-phenotype Archive (JGA) in collaboration with our partner institute, the National Bioscience Database Center (NBDC) of the Japan Science and Technology Agency. DDBJ Center provides the JGA database system which securely stores genotype and phenotype data collected from individuals whose consent agreements authorize data release only for specific research use. NBDC has established guidelines and policies for sharing human-derived data and reviews data submission and usage requests from researchers. In addition to the JGA project, DDBJ Center develops Semantic Web technologies for data integration and sharing in collaboration with the Database Center for Life Science. This paper describes the overview of the JGA project, updates to the DDBJ databases, and services for data retrieval, analysis and integration.",2014-12-03 +26873783,"3DBIONOTES: A unified, enriched and interactive view of macromolecular information.","With the advent of high throughput techniques like Next Generation Sequencing, the amount of biological information for genes and proteins is growing faster than ever. Structural information is also rapidly growing, especially in the cryo Electron Microscopy area. However, in many cases, the proteomic and genomic data are spread in multiple databases and with no simple connection to structural information. In this work we present a new web platform that integrates EMDB/PDB structures and UniProt sequences with different sources of protein annotations. The application provides an interactive interface linking sequence and structure, including EM maps, presenting the different sources of information at sequence and structural level. The web application is available at http://3dbionotes.cnb.csic.es.",2016-02-10 +28134551,"""Should I stop or should I go? The role of associations and expectancies"": Correction to Best et al. (2016).","Reports an error in ""Should I stop or should I go? The role of associations and expectancies"" by Maisy Best, Natalia S. Lawrence, Gordon D. Logan, Ian P. L. McLaren and Frederick Verbruggen (Journal of Experimental Psychology: Human Perception and Performance, 2016[Jan], Vol 42[1], 115-137). In the article, there is an error in Table 3 of the Results and third paragraph of the Results section labeled Test phase. In Experiment 4, the study performed an exploratory post-hoc test of the go reaction times in the training phase, contrasting stop-associated and go-associated items. Control items were excluded. Instead of reporting the results of the full analysis (with all three items types included), the authors incorrectly reported the results of this post-hoc analysis in Table 3 and in the main text. The correct analysis is presented below. Note that all other analyses reported in the tables and main text are correct. The R code shared via Open Research Exeter data repository (http://hdl.handle .net/10871/17735) is also correct. The interaction between image type and block is no longer significant when control items are included (p .094; p .037 for the post-hoc test). (The following abstract of the original article appeared in record 2015-40003-001.) Following exposure to consistent stimulus-stop mappings, response inhibition can become automatized with practice. What is learned is less clear, even though this has important theoretical and practical implications. A recent analysis indicates that stimuli can become associated with a stop signal or with a stop goal. Furthermore, expectancy may play an important role. Previous studies that have used stop or no-go signals to manipulate stimulus-stop learning cannot distinguish between stimulus-signal and stimulus-goal associations, and expectancy has not been measured properly. In the present study, participants performed a task that combined features of the go/no-go task and the stop-signal task in which the stop-signal rule changed at the beginning of each block. The go and stop signals were superimposed over 40 task-irrelevant images. Our results show that participants can learn direct associations between images and the stop goal without mediation via the stop signal. Exposure to the image-stop associations influenced task performance during training, and expectancies measured following task completion or measured within the task. But, despite this, we found an effect of stimulus-stop learning on test performance only when the task increased the task-relevance of the images. This could indicate that the influence of stimulus-stop learning on go performance is strongly influenced by attention to both task-relevant and task-irrelevant stimulus features. More generally, our findings suggest a strong interplay between automatic and controlled processes. (PsycINFO Database Record",2017-02-01 +30389715,A Prediction Model to Determine Childhood Epilepsy After 1 or More Paroxysmal Events. ,"The clinical profile of children who had possible seizures is heterogeneous, and accuracy of diagnostic testing is limited. We aimed to develop and validate a prediction model that determines the risk of childhood epilepsy by combining available information at first consultation. We retrospectively collected data of 451 children who visited our outpatient department for diagnostic workup related to 1 or more paroxysmal event(s). At least 1 year of follow-up was available for all children who were diagnosed with epilepsy or in whom diagnosis remained inconclusive. Clinical characteristics (sex, age of first seizure, event description, medical history) and EEG report were used as predictor variables for building a multivariate logistic regression model. Performance was validated in an external cohort (n = 187). Model discrimination was excellent, with an area under the receiver operating characteristic curve of 0.86 (95% confidence interval [CI]; 0.80-0.92), a positive predictive value of 0.93 (95% CI 0.83-0.97) and a negative predictive value of 0.76 (95% CI 0.70-0.80). Model discrimination in a selective subpopulation of children with uncertain diagnosis after initial clinical workup was good, with an area under the receiver operating characteristic curve of 0.73 (95% CI 0.58-0.87). This model may prove to be valuable because predictor variables together with a first interictal EEG can be available at first consultation. A Web application is provided (http://epilepsypredictiontools.info/first-consultation) to facilitate the diagnostic process for clinicians who are confronted with children with paroxysmal events, suspected of having an epileptic origin.",2018-11-02 +26582925,Start2Fold: a database of hydrogen/deuterium exchange data on protein folding and stability.,"Proteins fulfil a wide range of tasks in cells; understanding how they fold into complex three-dimensional (3D) structures and how these structures remain stable while retaining sufficient dynamics for functionality is essential for the interpretation of overall protein behaviour. Since the 1950's, solvent exchange-based methods have been the most powerful experimental means to obtain information on the folding and stability of proteins. Considerable expertise and care were required to obtain the resulting datasets, which, despite their importance and intrinsic value, have never been collected, curated and classified. Start2Fold is an openly accessible database (http://start2fold.eu) of carefully curated hydrogen/deuterium exchange (HDX) data extracted from the literature that is open for new submissions from the community. The database entries contain (i) information on the proteins investigated and the underlying experimental procedures and (ii) the classification of the residues based on their exchange protection levels, also allowing for the instant visualization of the relevant residue groups on the 3D structures of the corresponding proteins. By providing a clear hierarchical framework for the easy sharing, comparison and (re-)interpretation of HDX data, Start2Fold intends to promote a better understanding of how the protein sequence encodes folding and structure as well as the development of new computational methods predicting protein folding and stability.",2015-11-17 +30864814,Long-Term Exposure to Wind Turbine Noise and Risk for Myocardial Infarction and Stroke: A Nationwide Cohort Study.,"

Background

Noise from wind turbines (WTs) is reported as more annoying than traffic noise at similar levels, raising concerns as to whether WT noise (WTN) increases risk for cardiovascular disease, as observed for traffic noise.

Objectives

We aimed to investigate whether long-term exposure to WTN increases risk of myocardial infarction (MI) and stroke.

Methods

We identified all Danish dwellings within a radius 20 times the height of the closest WT and 25% of the dwellings within [Formula: see text] the height of the closest WT. Using data on WT type and simulated hourly wind at each WT, we estimated hourly outdoor and low frequency (LF) indoor WTN for each dwelling and derived 1-y and 5-y running nighttime averages. We used hospital and mortality registries to identify all incident cases of MI ([Formula: see text]) and stroke ([Formula: see text]) among all adults age 25-85 y ([Formula: see text]), who lived in one of these dwellings for [Formula: see text] over the period 1982-2013. We used Poisson regression to estimate incidence rate ratios (IRRs) adjusted for individual- and area-level covariates.

Results

IRRs for MI in association with 5-y nighttime outdoor WTN [Formula: see text] (vs. [Formula: see text]) dB(A) and indoor LF WTN [Formula: see text] (vs. [Formula: see text]) dB(A) were 1.21 [95% confidence interval (CI): 0.91, 1.62; 47 exposed cases] and 1.29 (95% CI: 0.73, 2.28; 12 exposed cases), respectively. IRRs for intermediate categories of outdoor WTN [24-30, 30-36, and [Formula: see text] vs. [Formula: see text]] were slightly above the null and of similar size: 1.08 (95% CI: 1.04, 1.12), 1.07 (95% CI: 1.00, 1.12), and 1.06 (95% CI: 0.93, 1.22), respectively. For stroke, IRRs for the second and third outdoor exposure groups were similar to those for MI, but near or below the null for higher exposures.

Conclusions

We did not find convincing evidence of associations between WTN and MI or stroke. https://doi.org/10.1289/EHP3340.",2019-03-01 +26586798,CPPsite 2.0: a repository of experimentally validated cell-penetrating peptides.,"CPPsite 2.0 (http://crdd.osdd.net/raghava/cppsite/) is an updated version of manually curated database (CPPsite) of cell-penetrating peptides (CPPs). The current version holds around 1850 peptide entries, which is nearly two times than the entries in the previous version. The updated data were curated from research papers and patents published in last three years. It was observed that most of the CPPs discovered/ tested, in last three years, have diverse chemical modifications (e.g. non-natural residues, linkers, lipid moieties, etc.). We have compiled this information on chemical modifications systematically in the updated version of the database. In order to understand the structure-function relationship of these peptides, we predicted tertiary structure of CPPs, possessing both modified and natural residues, using state-of-the-art techniques. CPPsite 2.0 also maintains information about model systems (in vitro/in vivo) used for CPP evaluation and different type of cargoes (e.g. nucleic acid, protein, nanoparticles, etc.) delivered by these peptides. In order to assist a wide range of users, we developed a user-friendly responsive website, with various tools, suitable for smartphone, tablet and desktop users. In conclusion, CPPsite 2.0 provides significant improvements over the previous version in terms of data content.",2015-11-19 +28881965,Deep learning-based subdivision approach for large scale macromolecules structure recovery from electron cryo tomograms.,"

Motivation

Cellular Electron CryoTomography (CECT) enables 3D visualization of cellular organization at near-native state and in sub-molecular resolution, making it a powerful tool for analyzing structures of macromolecular complexes and their spatial organizations inside single cells. However, high degree of structural complexity together with practical imaging limitations makes the systematic de novo discovery of structures within cells challenging. It would likely require averaging and classifying millions of subtomograms potentially containing hundreds of highly heterogeneous structural classes. Although it is no longer difficult to acquire CECT data containing such amount of subtomograms due to advances in data acquisition automation, existing computational approaches have very limited scalability or discrimination ability, making them incapable of processing such amount of data.

Results

To complement existing approaches, in this article we propose a new approach for subdividing subtomograms into smaller but relatively homogeneous subsets. The structures in these subsets can then be separately recovered using existing computation intensive methods. Our approach is based on supervised structural feature extraction using deep learning, in combination with unsupervised clustering and reference-free classification. Our experiments show that, compared with existing unsupervised rotation invariant feature and pose-normalization based approaches, our new approach achieves significant improvements in both discrimination ability and scalability. More importantly, our new approach is able to discover new structural classes and recover structures that do not exist in training data.

Availability and implementation

Source code freely available at http://www.cs.cmu.edu/∼mxu1/software .

Contact

mxu1@cs.cmu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +30568724,"MuSeeQ, a novel supervised image analysis tool for the simultaneous phenotyping of the soluble mucilage and seed morphometric parameters.","

Background

The mucilage is a model to study the polysaccharide biosynthesis since it is produced in large amounts and composed of complex polymers. In addition, it is of great economic interest for its technical and nutritional value. A fast method for phenotyping the released mucilage and the seed morphometric parameters will be useful for fundamental, food, pharmaceutical and breeding researches. Current strategies to phenotype soluble mucilage are restricted to visual evaluations or are highly time-consuming.

Results

Here, we developed a high-throughput phenotyping method for the simultaneous measurement of the soluble mucilage content released on a gel and the seed morphometric parameters. Within this context, we combined a biochemical assay and an open-source computer-aided image analysis tool, MuSeeQ. The biochemical assay consists in sowing seeds on an agarose medium containing the dye toluidine blue O, which specifically stains the mucilage once it is released on the gel. The second part of MuSeeQ is a macro developed in ImageJ allowing to quickly extract and analyse 11 morphometric data of seeds and their respective released mucilages. As an example, MuSeeQ was applied on a flax recombinant inbred lines population (previously screened for fatty acids content.) and revealed significant correlations between the soluble mucilage shape and the concentration of some fatty acids, e.g. C16:0 and C18:2. Other fatty acids were also found to correlate with the seed shape parameters, e.g. C18:0 and C18:2. MuSeeQ was then showed to be used for the analysis of other myxospermous species, including Arabidopsis thaliana and Camelina sativa.

Conclusions

MuSeeQ is a low-cost and user-friendly method which may be used by breeders and researchers for phenotyping simultaneously seeds of specific cultivars, natural variants or mutants and their respective soluble mucilage area released on a gel. The script of MuSeeQ and video tutorials are freely available at http://MuSeeQ.free.fr.",2018-12-18 +29878050,Detection of multi-dimensional co-exclusion patterns in microbial communities.,"

Motivation

Identification of complex relationships among members of microbial communities is key to understand and control the microbiota. Co-exclusion is arguably one of the most important patterns reflecting micro-organisms' intolerance to each other's presence. Knowing these relations opens an opportunity to manipulate microbiotas, personalize anti-microbial and probiotic treatments as well as guide microbiota transplantation. The co-exclusion pattern however, cannot be appropriately described by a linear function nor its strength be estimated using covariance or (negative) Pearson and Spearman correlation coefficients. This manuscript proposes a way to quantify the strength and evaluate the statistical significance of co-exclusion patterns between two, three or more variables describing a microbiota and allows one to extend analysis beyond micro-organism abundance by including other microbiome associated measurements such as, pH, temperature etc., as well as estimate the expected numbers of false positive co-exclusion patterns in a co-exclusion network.

Results

The implemented computational pipeline (CoEx) tested against 2380 microbial profiles (samples) from The Human Microbiome Project resulted in body-site specific pairwise co-exclusion patterns.

Availability and implementation

C++ source code for calculation of the score and P-value for two, three and four dimensional co-exclusion patterns as well as source code and executable files for the CoEx pipeline are available at https://scsb.utmb.edu/labgroups/fofanov/co-exclusion_in_microbial_communities.asp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-11-01 +29130882,Simultaneous enumeration of cancer and immune cell types from bulk tumor gene expression data. ,"Immune cells infiltrating tumors can have important impact on tumor progression and response to therapy. We present an efficient algorithm to simultaneously estimate the fraction of cancer and immune cell types from bulk tumor gene expression data. Our method integrates novel gene expression profiles from each major non-malignant cell type found in tumors, renormalization based on cell-type-specific mRNA content, and the ability to consider uncharacterized and possibly highly variable cell types. Feasibility is demonstrated by validation with flow cytometry, immunohistochemistry and single-cell RNA-Seq analyses of human melanoma and colorectal tumor specimens. Altogether, our work not only improves accuracy but also broadens the scope of absolute cell fraction predictions from tumor gene expression data, and provides a unique novel experimental benchmark for immunogenomics analyses in cancer research (http://epic.gfellerlab.org).",2017-11-13 +30768500,Choosing Wisely in Critical Care: Results of a National Survey From the Critical Care Societies Collaborative.,"

Objectives

Over-utilization of tests, treatments, and procedures is common for hospitalized patients in ICU settings. American Board of Internal Medicine Foundation's Choosing Wisely campaign tasked professional societies to identify sources of overuse in specialty care practice. The purpose of this study was to assess how critical care clinicians were implementing the Critical Care Societies Collaborative Choosing Wisely recommendations in clinical practice.

Design

Descriptive survey methodology with use of Research Electronic Data Capture (https://projectredcap.org/) sent via email newsletter blast or to individual emails of the 150,000 total members of the organizations.

Setting

National survey.

Subjects

ICU physicians, nurses, advanced practice providers including nurse practitioners and physician assistants, and pharmacist members of four national critical care societies in the United States.

Interventions

None.

Measurements and main results

A six-question survey assessed what Choosing Wisely recommendations had been implemented in ICU settings and if the impact was assessed. A total of 2,520 responses were received from clinicians: nurses (61%; n = 1538), physicians (25.9%; n = 647), advanced practice providers (10.5%; n = 263), and pharmacists (2.1%; n = 52), reflecting a 1.6% response rate of the total membership of 150,000 clinicians. Overall, 1,273 respondents (50.6%) reported they were familiar with the Choosing Wisely campaign. Respondents reported that Choosing Wisely recommendations had been integrated in a number of ways including being implemented in clinical care (n = 817; 72.9%), through development of a specific clinical protocol or institutional guideline (n = 736; 65.7%), through development of electronic medical record orders (n = 626; 55.8%), or with integration of longitudinal tracking using an electronic dashboard (n = 213; 19.0%). Some respondents identified that a specific quality improvement initiative was developed related to the Choosing Wisely recommendations (n = 468; 41.7%), or that a research initiative had been conducted (n = 156; 13.9%).

Conclusions

The results provide information on the application of the Choosing Wisely recommendations to clinical practice from a small sample of critical care clinicians. However, as only half of the respondents report implementation, additional strategies are needed to promote the Choosing Wisely recommendations to make impactful change to improve care in ICU settings.",2019-03-01 +29348138,Development and validation of outcome prediction models for aneurysmal subarachnoid haemorrhage: the SAHIT multinational cohort study.,"

Objective

To develop and validate a set of practical prediction tools that reliably estimate the outcome of subarachnoid haemorrhage from ruptured intracranial aneurysms (SAH).

Design

Cohort study with logistic regression analysis to combine predictors and treatment modality.

Setting

Subarachnoid Haemorrhage International Trialists' (SAHIT) data repository, including randomised clinical trials, prospective observational studies, and hospital registries.

Participants

Researchers collaborated to pool datasets of prospective observational studies, hospital registries, and randomised clinical trials of SAH from multiple geographical regions to develop and validate clinical prediction models.

Main outcome measure

Predicted risk of mortality or functional outcome at three months according to score on the Glasgow outcome scale.

Results

Clinical prediction models were developed with individual patient data from 10 936 patients and validated with data from 3355 patients after development of the model. In the validation cohort, a core model including patient age, premorbid hypertension, and neurological grade on admission to predict risk of functional outcome had good discrimination, with an area under the receiver operator characteristics curve (AUC) of 0.80 (95% confidence interval 0.78 to 0.82). When the core model was extended to a ""neuroimaging model,"" with inclusion of clot volume, aneurysm size, and location, the AUC improved to 0.81 (0.79 to 0.84). A full model that extended the neuroimaging model by including treatment modality had AUC of 0.81 (0.79 to 0.83). Discrimination was lower for a similar set of models to predict risk of mortality (AUC for full model 0.76, 0.69 to 0.82). All models showed satisfactory calibration in the validation cohort.

Conclusion

The prediction models reliably estimate the outcome of patients who were managed in various settings for ruptured intracranial aneurysms that caused subarachnoid haemorrhage. The predictor items are readily derived at hospital admission. The web based SAHIT prognostic calculator (http://sahitscore.com) and the related app could be adjunctive tools to support management of patients.",2018-01-18 +29595363,"Emotion identification across adulthood using the Dynamic FACES database of emotional expressions in younger, middle aged, and older adults.","Facial stimuli are widely used in behavioural and brain science research to investigate emotional facial processing. However, some studies have demonstrated that dynamic expressions elicit stronger emotional responses compared to static images. To address the need for more ecologically valid and powerful facial emotional stimuli, we created Dynamic FACES, a database of morphed videos (n = 1026) from younger, middle-aged, and older adults displaying naturalistic emotional facial expressions (neutrality, sadness, disgust, fear, anger, happiness). To assess adult age differences in emotion identification of dynamic stimuli and to provide normative ratings for this modified set of stimuli, healthy adults (n = 1822, age range 18-86 years) categorised for each video the emotional expression displayed, rated the expression distinctiveness, estimated the age of the face model, and rated the naturalness of the expression. We found few age differences in emotion identification when using dynamic stimuli. Only for angry faces did older adults show lower levels of identification accuracy than younger adults. Further, older adults outperformed middle-aged adults' in identification of sadness. The use of dynamic facial emotional stimuli has previously been limited, but Dynamic FACES provides a large database of high-resolution naturalistic, dynamic expressions across adulthood. Information on using Dynamic FACES for research purposes can be found at http://faces.mpib-berlin.mpg.de .",2018-03-29 +24175918,RegPrecise 3.0--a resource for genome-scale exploration of transcriptional regulation in bacteria.,"

Background

Genome-scale prediction of gene regulation and reconstruction of transcriptional regulatory networks in prokaryotes is one of the critical tasks of modern genomics. Bacteria from different taxonomic groups, whose lifestyles and natural environments are substantially different, possess highly diverged transcriptional regulatory networks. The comparative genomics approaches are useful for in silico reconstruction of bacterial regulons and networks operated by both transcription factors (TFs) and RNA regulatory elements (riboswitches).

Description

RegPrecise (http://regprecise.lbl.gov) is a web resource for collection, visualization and analysis of transcriptional regulons reconstructed by comparative genomics. We significantly expanded a reference collection of manually curated regulons we introduced earlier. RegPrecise 3.0 provides access to inferred regulatory interactions organized by phylogenetic, structural and functional properties. Taxonomy-specific collections include 781 TF regulogs inferred in more than 160 genomes representing 14 taxonomic groups of Bacteria. TF-specific collections include regulogs for a selected subset of 40 TFs reconstructed across more than 30 taxonomic lineages. Novel collections of regulons operated by RNA regulatory elements (riboswitches) include near 400 regulogs inferred in 24 bacterial lineages. RegPrecise 3.0 provides four classifications of the reference regulons implemented as controlled vocabularies: 55 TF protein families; 43 RNA motif families; ~150 biological processes or metabolic pathways; and ~200 effectors or environmental signals. Genome-wide visualization of regulatory networks and metabolic pathways covered by the reference regulons are available for all studied genomes. A separate section of RegPrecise 3.0 contains draft regulatory networks in 640 genomes obtained by an conservative propagation of the reference regulons to closely related genomes.

Conclusions

RegPrecise 3.0 gives access to the transcriptional regulons reconstructed in bacterial genomes. Analytical capabilities include exploration of: regulon content, structure and function; TF binding site motifs; conservation and variations in genome-wide regulatory networks across all taxonomic groups of Bacteria. RegPrecise 3.0 was selected as a core resource on transcriptional regulation of the Department of Energy Systems Biology Knowledgebase, an emerging software and data environment designed to enable researchers to collaboratively generate, test and share new hypotheses about gene and protein functions, perform large-scale analyses, and model interactions in microbes, plants, and their communities.",2013-11-01 +28232859,Integration of EGA secure data access into Galaxy. ,"High-throughput molecular profiling techniques are routinely generating vast amounts of data for translational medicine studies. Secure access controlled systems are needed to manage, store, transfer and distribute these data due to its personally identifiable nature. The European Genome-phenome Archive (EGA) was created to facilitate access and management to long-term archival of bio-molecular data. Each data provider is responsible for ensuring a Data Access Committee is in place to grant access to data stored in the EGA. Moreover, the transfer of data during upload and download is encrypted. ELIXIR, a European research infrastructure for life-science data, initiated a project (2016 Human Data Implementation Study) to understand and document the ELIXIR requirements for secure management of controlled-access data. As part of this project, a full ecosystem was designed to connect archived raw experimental molecular profiling data with interpreted data and the computational workflows, using the CTMM Translational Research IT (CTMM-TraIT) infrastructure http://www.ctmm-trait.nl as an example. Here we present the first outcomes of this project, a framework to enable the download of EGA data to a Galaxy server in a secure way. Galaxy provides an intuitive user interface for molecular biologists and bioinformaticians to run and design data analysis workflows. More specifically, we developed a tool -- ega_download_streamer - that can download data securely from EGA into a Galaxy server, which can subsequently be further processed. This tool will allow a user within the browser to run an entire analysis containing sensitive data from EGA, and to make this analysis available for other researchers in a reproducible manner, as shown with a proof of concept study.  The tool ega_download_streamer is available in the Galaxy tool shed: https://toolshed.g2.bx.psu.edu/view/yhoogstrate/ega_download_streamer.",2016-12-12 +28291763,WONKA and OOMMPPAA: analysis of protein-ligand interaction data to direct structure-based drug design.,"In this work, two freely available web-based interactive computational tools that facilitate the analysis and interpretation of protein-ligand interaction data are described. Firstly, WONKA, which assists in uncovering interesting and unusual features (for example residue motions) within ensembles of protein-ligand structures and enables the facile sharing of observations between scientists. Secondly, OOMMPPAA, which incorporates protein-ligand activity data with protein-ligand structural data using three-dimensional matched molecular pairs. OOMMPPAA highlights nuanced structure-activity relationships (SAR) and summarizes available protein-ligand activity data in the protein context. In this paper, the background that led to the development of both tools is described. Their implementation is outlined and their utility using in-house Structural Genomics Consortium (SGC) data sets and openly available data from the PDB and ChEMBL is described. Both tools are freely available to use and download at http://wonka.sgc.ox.ac.uk/WONKA/ and http://oommppaa.sgc.ox.ac.uk/OOMMPPAA/.",2017-02-24 +28453675,CircosVCF: circos visualization of whole-genome sequence variations stored in VCF files.,"

Summary

Visualization of whole-genomic variations in a meaningful manner assists researchers in gaining new insights into the underlying data, especially when it comes in the context of whole genome comparisons. CircosVCF is a web based visualization tool for genome-wide variant data described in VCF files, using circos plots. The user friendly interface of CircosVCF supports an interactive design of the circles in the plot, and the integration of additional information such as experimental data or annotations. The provided visualization capabilities give a broad overview of the genomic relationships between genomes, and allow identification of specific meaningful SNPs regions.

Availability and implementation

CircosVCF was implemented in JavaScript and is available at http://www.ariel.ac.il/research/fbl/software.

Contact

malisa@ariel.ac.il.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +29716618,FOCS: a novel method for analyzing enhancer and gene activity patterns infers an extensive enhancer-promoter map.,"Recent sequencing technologies enable joint quantification of promoters and their enhancer regions, allowing inference of enhancer-promoter links. We show that current enhancer-promoter inference methods produce a high rate of false positive links. We introduce FOCS, a new inference method, and by benchmarking against ChIA-PET, HiChIP, and eQTL data show that it results in lower false discovery rates and at the same time higher inference power. By applying FOCS to 2630 samples taken from ENCODE, Roadmap Epigenomics, FANTOM5, and a new compendium of GRO-seq samples, we provide extensive enhancer-promotor maps ( http://acgt.cs.tau.ac.il/focs ). We illustrate the usability of our maps for deriving biological hypotheses.",2018-05-01 +29370409,Does the endometrial gene expression of fertile women vary within and between cycles?,"STUDY QUESTION:Does gene expression of putative endometrial implantation markers vary in expression between menstrual cycles? SUMMARY ANSWER:In fertile women the expression of certain genes exhibits a pattern of stable regulation.which is not affected even when sampled twice in one cycle. WHAT IS KNOWN ALREADY:Successful implantation occurs in a minority of IVF embryo transfers. In contrast to knowledge regarding the ovulatory process, there is a sparse understanding of endometrial genes critical to implantation. This lack of knowledge hinders progress in this field. STUDY DESIGN, SIZE, DURATION:Endometrial pipelle samples were collected based on blood endocrinological markers at 2 and 7 days post initial LH surge. Five samples were collected over four cycles where the interval between collections ranged from sequential months to three years. PARTICIPANTS/MATERIALS, SETTING, METHODS:Six fertile women attending an IVF clinic for male factor infertility, had samples collected. Global gene expression profiles were obtained from laser-microdissected, endometrial glands and stroma. Nineteen potential proliferation, cytokine and adhesion markers based on previous validated reports were studied. MAIN RESULTS AND THE ROLE OF CHANCE:There was a significant modification between LH+2 and LH+7 of expression for 23 genes-11 in 8 in glands and stroma, 4 in stroma only and 3 in glands only suggesting stable, controlled regulation. Nevertheless, genes exhibited individual characteristics, e.g MKI67 exhibited lower expression at LH+7 than LH+2 and CCL4 higher, whereas TRO expressed limited difference in both cell types. Stability between cycles was demonstrated for gene expression at both LH+2-more than 60% of genes had <25% variation and at LH+7-60% had <30% variation. Further, effects of prior collection of an LH+2 sample on gene expression at LH+7 were not detected. The range of mRNA expression suggested that a clinical/diagnostic sample at LH+2 and LH+7 is likely to be a better index of endometrial function than a single sample. The possibility of redundancy suggests a panel would be more informative than a single marker. LARGE SCALE DATA:Raw and normalized microarray data have been deposited with the EMBL's European Genome-Phenome Archive for collaborative analysis, reference ega-box-815 (Lappalainen I, Almeida-King J, Kumanduri V, Senf A, Spalding JD, Ur-Rehman S, Saunders G, Kandasamy J, Caccamo M, Leinonen R et al. The European Genome-phenome Archive of human data consented for biomedical research. Nat Genet 2015;47:692-695.) [https://www.ebi.ac.uk/ega/home]. LIMITATIONS, REASONS FOR CAUTION:This type of research has difficulties of recruitment of fertile women for multiple blood testing and repeat endometrial biopsies. Therefore, these data had decreased statistical power due to the overall participant numbers. However, the inclusion of four cycles for each participant permitted the aim of obtaining information on intercycle and intracycle variability to be achieved. WIDER IMPLICATIONS OF THE FINDINGS:Our results support the feasibility of a clinical means of identification of a functional receptive endometrium. The robustness of data from individual women suggests that samples from one cycle can generally be applied to subsequent cycles. STUDY FUNDING/COMPETING INTEREST(S):Funding was granted from the Tertiary Education Commission of New Zealand, Contract I.D.:UOOX06007. There are no competing interests.",2018-03-01 +28637337,Matrix completion with side information and its applications in predicting the antigenicity of influenza viruses.,"

Motivation

Low-rank matrix completion has been demonstrated to be powerful in predicting antigenic distances among influenza viruses and vaccines from partially revealed hemagglutination inhibition table. Meanwhile, influenza hemagglutinin (HA) protein sequences are also effective in inferring antigenic distances. Thus, it is natural to integrate HA protein sequence information into low-rank matrix completion model to help infer influenza antigenicity, which is critical to influenza vaccine development.

Results

We have proposed a novel algorithm called biological matrix completion with side information (BMCSI), which first measures HA protein sequence similarities among influenza viruses (especially on epitopes) and then integrates the similarity information into a low-rank matrix completion model to predict influenza antigenicity. This algorithm exploits both the correlations among viruses and vaccines in serological tests and the power of HA sequence in predicting influenza antigenicity. We applied this model into H3N2 seasonal influenza virus data. Comparing to previous methods, we significantly reduced the prediction root-mean-square error in a 10-fold cross validation analysis. Based on the cartographies constructed from imputed data, we showed that the antigenic evolution of H3N2 seasonal influenza is generally S-shaped while the genetic evolution is half-circle shaped. We also showed that the Spearman correlation between genetic and antigenic distances (among antigenic clusters) is 0.83, demonstrating a globally high correspondence and some local discrepancies between influenza genetic and antigenic evolution. Finally, we showed that 4.4%±1.2% genetic variance (corresponding to 3.11 ± 1.08 antigenic distances) caused an antigenic drift event for H3N2 influenza viruses historically.

Availability and implementation

The software and data for this study are available at http://bi.sky.zstu.edu.cn/BMCSI/.

Contact

jialiang.yang@mssm.edu or pinganhe@zstu.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +29620239,A functional polymorphism at miR‑491‑5p binding site in the 3'UTR of MMP9 gene confers increased risk for pressure ulcers after hip fracture.,"The roles of matrix metalloproteinase (MMP)9 in the control of pressure ulcers (PU) after hip fracture as well as how the rs1056629 in MMP9 3'UTR compromises the interaction between MMP9 and miR‑491 were explored. Online miRNA database (http://www.bioguo.org) was utilized to explore gene polymorphism in MMP9 3'UTR that might break the interaction between MMP9 and miRNA. Luciferase assay was utilized to confirm the miRNA targeted MMP9. Real‑time PCR, western blot analysis and immunohistochemistry were carried out to understand the roles of MMP9 in PU as well as how rs1056629 in MMP9 3'UTR compromises the interaction between MMP9 and miR‑491. rs1056629 in MMP9 3'UTR that compromised the interaction between MMP9 and four miRNAs including miR‑194‑3p, miR‑491, miR‑1915‑3p and miR‑941, and only miR‑491 among miR‑194‑3p, miR‑491, miR‑1915‑3p and miR‑941 decreased luciferase activity of wild‑type MMP9 3'UTR, and luciferase activities of mutant‑3 and mutant‑4 MMP9 3'UTR in miR‑491 overexpressing cells was comparable with scramble control. miR‑194‑3p, miR‑491, miR‑1915‑3p and miR‑941 levels in PU group was comparable with healthy control, and miR‑194‑3p, miR‑491, miR‑1915‑3p and miR‑941 in subjects carrying AA genotype was similar with those in AC and CC groups. MMP9 mRNA and protein, and histology score in subjects with PU were much higher, and were also much higher in AA group. Only miR‑491 mimic among miR‑194‑3p, miR‑491, miR‑1915‑3p and miR‑941 mimics downregulated the MMP9 level, and only miR‑491 inhibitor among miR‑194‑3p, miR‑491, miR‑1915‑3p and miR‑941 inhibitors upregulated the MMP9 level. Our study indicated that rs1056629 polymorphism could be a novel biomarker for predicting the occurrence of PU after a hip fracture.",2018-03-28 +29424102,Complex care and contradictions of choice in the safety net.,"This article explores the complicated and often-contradictory notions of choice at play in complex care management (CCM) programmes in the US healthcare safety net. Drawing from longitudinal data collected over two years of ethnographic fieldwork at urban safety-net clinics, our study examines the CCM goal of transforming frequent emergency department (ED) utilisers into 'active' patients who will reduce their service utilisation and thereby contribute to a more rational, cost-effective healthcare system. By considering our data alongside philosopher Annemarie Mol's (2008) conceptualisation of the competing logics of choice and care, we argue that these premises often undermine CCM teams' efforts to support patients and provide the care they need - not only to prevent medical crises, but to overcome socio-economic barriers as well. We assert that while safety-net CCM programmes are held accountable for the degree to which their patients successfully transform into self-managing, cost-effective actors, much of the care CCM staff provide in fact involves attempts to intervene on structural obstacles that impinge on patient choice. CCM programmes thus struggle between an economic imperative to get patients to make better health choices and a moral imperative to provide care in the face of systemic societal neglect. (A virtual abstract of this paper can be viewed at: https://www.youtube.com/channel/UC_979cmCmR9rLrKuD7z0ycA).",2018-02-08 +26141515,mirEX 2.0 - an integrated environment for expression profiling of plant microRNAs.,"

Background

MicroRNAs are the key post-transcriptional regulators of gene expression in development and stress responses. Thus, precisely quantifying the level of each particular microRNA is of utmost importance when studying the biology of any organism.

Description

The mirEX 2.0 web portal ( http://www.combio.pl/mirex ) provides a comprehensive platform for the exploration of microRNA expression data based on quantitative Real Time PCR and NGS sequencing experiments, covering various developmental stages, from wild-type to mutant plants. The portal includes mature and pri-miRNA expression levels detected in three plant species (Arabidopsis thaliana, Hordeum vulgare and Pellia endiviifolia), and in A. thaliana miRNA biogenesis pathway mutants. In total, the database contains information about the expression of 461 miRNAs representing 268 families. The data can be explored through the use of advanced web tools, including (i) a graphical query builder system allowing a combination of any given species, developmental stages and tissues, (ii) a modular presentation of the results in the form of thematic windows, and (iii) a number of user-friendly utilities such as a community-building discussion system and extensive tutorial documentation (e.g., tooltips, exemplary videos and presentations). All data contained within the mirEX 2.0 database can be downloaded for use in further applications in a context-based way from the result windows or from a dedicated web page.

Conclusions

The mirEX 2.0 portal provides the plant research community with easily accessible data and powerful tools for application in multi-conditioned analyses of miRNA expression from important plant species in different biological and developmental backgrounds.",2015-06-16 +29789361,Breaching Pathogeographic Barriers by the Bat White-Nose Fungus. ,"Bat white-nose syndrome has become associated with unparalleled mortality in bat species across the United States since 2006. In a recent article, Drees and colleagues (mBio 8:e01941-17, 2017, https://doi.org/10.1128/mBio.01941-17) utilized both whole-genome sequencing and microsatellite data to explore the origin and spread of the causative agent of bat white-nose syndrome, Pseudogymnoascus destructans The research by Drees et al. supports the hypothesis that P. destructans was introduced into North America from Europe, with molecular dating suggesting a divergence from European isolates approximately 100 years ago. The approaches described in this study are an important contribution toward pinpointing the origins of this infection and underscore the need for more rigorous international biosecurity in order to stem the tide of emerging fungal pathogens.",2018-05-22 +26376976,KGCAK: a K-mer based database for genome-wide phylogeny and complexity evaluation.,"

Background

The K-mer approach, treating genomic sequences as simple characters and counting the relative abundance of each string upon a fixed K, has been extensively applied to phylogeny inference for genome assembly, annotation, and comparison.

Results

To meet increasing demands for comparing large genome sequences and to promote the use of the K-mer approach, we develop a versatile database, KGCAK ( http://kgcak.big.ac.cn/KGCAK/ ), containing ~8,000 genomes that include genome sequences of diverse life forms (viruses, prokaryotes, protists, animals, and plants) and cellular organelles of eukaryotic lineages. It builds phylogeny based on genomic elements in an alignment-free fashion and provides in-depth data processing enabling users to compare the complexity of genome sequences based on K-mer distribution.

Conclusion

We hope that KGCAK becomes a powerful tool for exploring relationship within and among groups of species in a tree of life based on genomic data.",2015-09-16 +27999049,"Improving feedback of surveillance data on antimicrobial consumption, resistance and stewardship in England: putting the data at your Fingertips.","The provision of better access to and use of surveillance data is a key component of the UK 5 Year Antimicrobial Resistance (AMR) Strategy. Since April 2016, PHE has made data on practice (infection prevention and control; antimicrobial stewardship) and outcome (prevalence of AMR, antibiotic use and healthcare-associated infections) available through Fingertips, a publicly accessible web tool (https://fingertips.phe.org.uk/profile/amr-local-indicators). Fingertips provides access to a wide range of public health data presented as thematic profiles, with the above data being available through the 'AMR local indicators' profile. Local data on a range of indicators can be viewed at the level of National Health Service acute trusts, Clinical Commissioning Groups or general practitioner practices, all of which can be compared with the corresponding aggregate values for England to allow benchmarking. The data can be viewed in a range of formats including an overview showing counts and rates, interactive maps, spine charts and graphs that show temporal trends over a range of time scales or allow correlations between pairs of indicators. The aim of the AMR local indicators profile on Fingertips is to support the development of local action plans to optimize antibiotic prescribing and reduce AMR and healthcare-associated infections. Provision of access to relevant information in an easy to use format will help local stakeholders, including healthcare staff, commissioners, Directors of Public Health, academics and the public, to benchmark relevant local AMR data and to monitor the impact of local initiatives to tackle AMR over time.",2017-04-01 +27744178,Computational identification of non-synonymous polymorphisms within regions corresponding to protein interaction sites.,"

Background

Protein-protein interactions (PPI) play an important role in function of all organisms and enable understanding of underlying metabolic processes. Computational predictions of PPIs are an important aspect in proteomics, as experimental methods may result in high degree of false positive results and are more expensive. Although there are many databases collecting predicted PPIs, exploration of genetics information underlying PPI interactions has not been investigated thoroughly. The aim of the present study was to identify genomic locations corresponding to regions involved in predicted PPIs and to collect non-synonymous polymorphisms (nsSNPs) located within those regions; which we termed PPI-SNPs.

Methods

Predicted PPIs were obtained from PiSITE database (http://pisite.hgc.jp). Non-synonymous SNPs mapped on protein structural data (PDBs) were obtained from the UCSC server. Polymorphism locations on protein structures were mapped to predicted PPI regions. DAVID tool was used for pathway enrichment and gene cluster analysis (https://david.ncifcrf.gov/).

Results

We collected 544 polymorphisms located within predicted PPI sites that map to 197 genes. We identified 9 SNPs, previously associated with diseases, but not yet associated with PPI sites. We also found examples in which polymorphisms located within predicted PPI regions are also occurring within previously experimentally validated PPIs and within experimentally determined functional domains.

Conclusions

Our study provides the first catalog of nsSNPs located within predicted PPIs. These prioritized SNPs present the basis for planning experimental validation of SNPs that cause gain or loss of PPIs. Our implementation is expandable, as datasets used are constantly updated.",2016-10-04 +27092463,"Crystal structure and identification of a key amino acid for glucose tolerance, substrate specificity, and transglycosylation activity of metagenomic β-glucosidase Td2F2.","

Unlabelled

β-Glucosidase Td2F2 isolated from a compost metagenome has high glucose tolerance and transglycosylation activity. In this study, we determined the high-resolution crystal structure of Td2F2. It has a unique structure at the -1 subsite that is important for substrate specificity but not for glucose tolerance. To elucidate the mechanism(s) of glucose tolerance, we isolated a glucose-sensitive Td2F2 mutant using random mutagenesis. In this mutant, Asn223 residue located between subsites +1 and +2 was mutated. The Asn223 mutation resulted in reduced glucose tolerance and transglycosylation activity, and drastically changed substrate specificity. These results indicate that the structure between subsites +1 and +2 is critical for the glucose tolerance and substrate specificity of Td2F2. Our findings shed light on the glucose tolerance and transglycosylation activity mechanisms of glycoside hydrolase family 1 β-glucosidases.

Database

The atomic coordinates and structure factors (codes 3WH5, 3WH6, 3WH8, 3WH7, 5AYB, and 5AYI) have been deposited in the Protein Data Bank (http://wwpdb.org/).",2016-05-06 +25948583,SELPHI: correlation-based identification of kinase-associated networks from global phospho-proteomics data sets.,"While phospho-proteomics studies have shed light on the dynamics of cellular signaling, they mainly describe global effects and rarely explore mechanistic details, such as kinase/substrate relationships. Tools and databases, such as NetworKIN and PhosphoSitePlus, provide valuable regulatory details on signaling networks but rely on prior knowledge. They therefore provide limited information on less studied kinases and fewer unexpected relationships given that better studied signaling events can mask condition- or cell-specific 'network wiring'. SELPHI is a web-based tool providing in-depth analysis of phospho-proteomics data that is intuitive and accessible to non-bioinformatics experts. It uses correlation analysis of phospho-sites to extract kinase/phosphatase and phospho-peptide associations, and highlights the potential flow of signaling in the system under study. We illustrate SELPHI via analysis of phospho-proteomics data acquired in the presence of erlotinib-a tyrosine kinase inhibitor (TKI)-in cancer cells expressing TKI-resistant and -sensitive variants of the Epidermal Growth Factor Receptor. In this data set, SELPHI revealed information overlooked by the reporting study, including the known role of MET and EPHA2 kinases in conferring resistance to erlotinib in TKI sensitive strains. SELPHI can significantly enhance the analysis of phospho-proteomics data contributing to improved understanding of sample-specific signaling networks. SELPHI is freely available via http://llama.mshri.on.ca/SELPHI.",2015-05-06 +28984185,STRScan: targeted profiling of short tandem repeats in whole-genome sequencing data.,"

Background

Short tandem repeats (STRs) are found in many prokaryotic and eukaryotic genomes, and are commonly used as genetic markers, in particular for identity and parental testing in DNA forensics. The unstable expansion of some STRs was associated with various genetic disorders (e.g., the Huntington disease), and thus was used in genetic testing for screening individuals at high risk. Traditional STR analyses were based on the PCR amplification of STR loci followed by gel electrophoresis. With the availability of massive whole genome sequencing data, it becomes practical to mine STR profiles in silico from genome sequences. Software tools such as lobSTR and STR-FM have been developed to address these demands, which are, however, built upon whole genome reads mapping tools, and thus may not be sensitive enough.

Results

In this paper, we present a standalone software tool STRScan that uses a greedy algorithm for targeted STR profiling in next-generation sequencing (NGS) data. STRScan was tested on the whole genome sequencing data from Venter genome sequencing and 1000 Genomes Project. The results showed that STRScan can profile 20% more STRs in the target set that are missed by lobSTR.

Conclusion

STRScan is particularly useful for the NGS-based targeted STR profiling, e.g., in genetic and human identity testing. STRScan is available as open-source software at http://darwin.informatics.indiana.edu/str/ .",2017-10-03 +27128449,"The Association between Dust Storms and Daily Non-Accidental Mortality in the United States, 1993-2005.","

Background

The impact of dust storms on human health has been studied in the context of Asian, Saharan, Arabian, and Australian storms, but there has been no recent population-level epidemiological research on the dust storms in North America. The relevance of dust storms to public health is likely to increase as extreme weather events are predicted to become more frequent with anticipated changes in climate through the 21st century.

Objectives

We examined the association between dust storms and county-level non-accidental mortality in the United States from 1993 through 2005.

Methods

Dust storm incidence data, including date and approximate location, are taken from the U.S. National Weather Service storm database. County-level mortality data for the years 1993-2005 were acquired from the National Center for Health Statistics. Distributed lag conditional logistic regression models under a time-stratified case-crossover design were used to study the relationship between dust storms and daily mortality counts over the whole United States and in Arizona and California specifically. End points included total non-accidental mortality and three mortality subgroups (cardiovascular, respiratory, and other non-accidental).

Results

We estimated that for the United States as a whole, total non-accidental mortality increased by 7.4% (95% CI: 1.6, 13.5; p = 0.011) and 6.7% (95% CI: 1.1, 12.6; p = 0.018) at 2- and 3-day lags, respectively, and by an average of 2.7% (95% CI: 0.4, 5.1; p = 0.023) over lags 0-5 compared with referent days. Significant associations with non-accidental mortality were estimated for California (lag 2 and 0-5 day) and Arizona (lag 3), for cardiovascular mortality in the United States (lag 2) and Arizona (lag 3), and for other non-accidental mortality in California (lags 1-3 and 0-5).

Conclusions

Dust storms are associated with increases in lagged non-accidental and cardiovascular mortality. Citation: Crooks JL, Cascio WE, Percy MS, Reyes J, Neas LM, Hilborn ED. 2016. The association between dust storms and daily non-accidental mortality in the United States, 1993-2005. Environ Health Perspect 124:1735-1743; http://dx.doi.org/10.1289/EHP216.",2016-04-29 +29868912,Grocery store interventions to change food purchasing behaviors: a systematic review of randomized controlled trials.,"

Background

Diet is an important determinant of health, and food purchasing is a key antecedent to consumption.

Objective

We set out to evaluate the effectiveness of grocery store interventions to change food purchasing, and to examine whether effectiveness varied based on intervention components, setting, or socioeconomic status.

Design

We conducted a systematic review of randomized controlled trials (search performed June 2017). Studies must have: aimed to change food purchasing; been implemented in grocery stores (real or simulated); reported purchasing; and had a minimal control or compared interventions fulfilling our criteria. Searching, screening, bias assessment, and data extraction followed Cochrane methods. We grouped studies by intervention type (economic, environmental, swaps, and/or education), synthesized results narratively, and conducted an exploratory qualitative comparative analysis.

Results

We included 35 studies representing 89 interventions, >20,000 participants, and >800 stores. Risk of bias was mixed. Economic interventions showed the most promise, with 8 of the 9 studies in real stores and all 6 in simulated environments detecting an effect on purchasing. Swap interventions appeared promising in the 2 studies based in real stores. Store environment interventions showed mixed effects. Education-only interventions appeared effective in simulated environments but not in real stores. Available data suggested that effects of economic interventions did not differ by socioeconomic status, whereas for other interventions impact was variable. In our qualitative comparative analysis, economic interventions (regardless of setting) and environmental and swap interventions in real stores were associated with statistically significant changes in purchasing in the desired direction for ≥1 of the foods targeted by the intervention, whereas education-only interventions in real stores were not.

Conclusions

Findings suggest that interventions implemented in grocery stores-particularly ones that manipulate price, suggest swaps, and perhaps manipulate item availability-have an impact on purchasing and could play a role in public health strategies to improve health. Review protocol registered at https://www.crd.york.ac.uk/PROSPERO/ as CRD42017068809.",2018-06-01 +29561704,"MiR-144 suppresses proliferation, invasion, and migration of breast cancer cells through inhibiting CEP55.","OBJECTIVE:The study aimed to investigate the molecular mechanism of miR-144 and CEP55 as well as the influence of their interaction on the cell proliferation, migration, invasion, cell cycle and cell apoptosis in breast cancer. METHODS:In this study, The Cancer Genome Atlas (TCGA, https://tcga-data.nci.nih.gov/ ) database was used for microarray analysis. The expressions of miR-144 and CEP55 in 40 adjacent tissues and 36 tumor tissues were examined by western blot, qRT-PCR and immunohistochemistry. The target relationship between miR-144 and CEP55 was predicted and confirmed by TargetScan and luciferase reporter assay. The cell proliferation, cell cycle and cell apoptosis in different groups were detected by MTT and flow cytometry assays, while wound healing and transwell assays were used for the cell migration and invasion tests. The regulatory effects of miR-144 and CEP55 on breast tumor were verified through nude mouse model in vivo experiment. RESULTS:MiR-144 was down-regulated in breast cancerous tissues and cells, whereas CEP55 expression was up-regulated in breast cancerous tissues. Moreover, there existed a target relationship between miR-144 and CEP55 and negative correlation on their expressions. MiR-144 could down-regulate CEP55 expression, thereby inhibiting proliferation, invasion, migration, retarding cell cycle and accelerating cell apoptosis. MiR-144 could inhibit cell progression through down-regulating CEP55 in vivo. CONCLUSION:MiR-144 suppressed cell proliferation, migration, invasion and induced cell cycle arrest and cell apoptosis by repressing CEP55. This might provide a promising therapy for clinical treatment.",2018-03-26 +30800727,Amino Acid Composition Determines Peptide Activity Spectrum and Hot-Spot-Based Design of Merecidin. ,"There is a great interest in developing the only human cathelicidin into therapeutic molecules. The major antimicrobial region of human LL-37 corresponds to residues 17-32. The resultant peptide GF-17 shows a broad spectrum of antimicrobial activity against both Gram-positive and negative bacteria. By reducing the hydrophobic content, we previously succeeded in converting the broad-spectrum GF-17 to two narrow-spectrum peptides (GF-17d3 and KR-12) with activity against Gram-negative bacteria. This study demonstrates that substitution of multiple basic amino acids by hydrophobic alanines makes a broad-spectrum peptide 17BIPHE2 (designed based on GF-17d3) active against Staphylococcal pathogens but not other bacteria tested. Taken together, our results reveal distinct charge and hydrophobic requirements for peptides to kill Gram-positive or Gram-negative bacteria. This finding is in line with the bioinformatics analysis of the peptides in the Antimicrobial Peptide Database (http://aps.unmc.edu/AP). In addition, a hot spot arginine is identified and used to design merecidin with reduced toxicity to human cells. Merecidin protects wax moth larvae (Galleria mellonella) from the infection of methicillin-resistant S. aureus USA300. These new selective peptides constitute interesting candidates for future development.",2018-03-26 +29021305,Predicting the Functional Impact of KCNQ1 Variants of Unknown Significance. ,"An emerging standard-of-care for long-QT syndrome uses clinical genetic testing to identify genetic variants of the KCNQ1 potassium channel. However, interpreting results from genetic testing is confounded by the presence of variants of unknown significance for which there is inadequate evidence of pathogenicity. In this study, we curated from the literature a high-quality set of 107 functionally characterized KCNQ1 variants. Based on this data set, we completed a detailed quantitative analysis on the sequence conservation patterns of subdomains of KCNQ1 and the distribution of pathogenic variants therein. We found that conserved subdomains generally are critical for channel function and are enriched with dysfunctional variants. Using this experimentally validated data set, we trained a neural network, designated Q1VarPred, specifically for predicting the functional impact of KCNQ1 variants of unknown significance. The estimated predictive performance of Q1VarPred in terms of Matthew's correlation coefficient and area under the receiver operating characteristic curve were 0.581 and 0.884, respectively, superior to the performance of 8 previous methods tested in parallel. Q1VarPred is publicly available as a web server at http://meilerlab.org/q1varpred. Although a plethora of tools are available for making pathogenicity predictions over a genome-wide scale, previous tools fail to perform in a robust manner when applied to KCNQ1. The contrasting and favorable results for Q1VarPred suggest a promising approach, where a machine-learning algorithm is tailored to a specific protein target and trained with a functionally validated data set to calibrate informatics tools.",2017-10-01 +27000288,Cancer-disease associations: A visualization and animation through medical big data.,"

Objective

Cancer is the primary disease responsible for death and disability worldwide. Currently, prevention and early detection represents the best hope for cure. Knowing the expected diseases that occur with a particular cancer in advance could lead to physicians being able to better tailor their treatment for cancer. The aim of this study was to build an animated visualization tool called as Cancer Associations Map Animation (CAMA), to chart the association of cancers with other disease over time.

Methods

The study population was collected from the Taiwan National Health Insurance Database during the period January 2000 to December 2002, 782 million outpatient visits were used to compute the associations of nine major cancers with other diseases. A motion chart was used to quantify and visualize the associations between diseases and cancers.

Results

The CAMA motion chart that was built successfully facilitated the observation of cancer-disease associations across ages and genders. The CAMA system can be accessed online at http://203.71.86.98/web/runq16.html.

Conclusion

The CAMA animation system is an animated medical data visualization tool which provides a dynamic, time-lapse, animated view of cancer-disease associations across different age groups and gender. Derived from a large, nationwide healthcare dataset, this exploratory data analysis tool can detect cancer comorbidities earlier than is possible by manual inspection. Taking into account the trajectory of cancer-specific comorbidity development may facilitate clinicians and healthcare researchers to more efficiently explore early stage hypotheses, develop new cancer treatment approaches, and identify potential effect modifiers or new risk factors associated with specific cancers.",2016-01-14 +26002883,pwOmics: an R package for pathway-based integration of time-series omics data using public database knowledge.,"

Unlabelled

Characterization of biological processes is progressively enabled with the increased generation of omics data on different signaling levels. Here we present a straightforward approach for the integrative analysis of data from different high-throughput technologies based on pathway and interaction models from public databases. pwOmics performs pathway-based level-specific data comparison of coupled human proteomic and genomic/transcriptomic datasets based on their log fold changes. Separate downstream and upstream analyses results on the functional levels of pathways, transcription factors and genes/transcripts are performed in the cross-platform consensus analysis. These provide a basis for the combined interpretation of regulatory effects over time. Via network reconstruction and inference methods (Steiner tree, dynamic Bayesian network inference) consensus graphical networks can be generated for further analyses and visualization.

Availability and implementation

The R package pwOmics is freely available on Bioconductor (http://www.bioconductor.org/).

Contact

astrid.wachter@med.uni-goettingen.de.",2015-05-21 +29741573,ProteomeVis: a web app for exploration of protein properties from structure to sequence evolution across organisms' proteomes.,"Motivation:Protein evolution spans time scales and its effects span the length of an organism. A web app named ProteomeVis is developed to provide a comprehensive view of protein evolution in the Saccharomyces cerevisiae and Escherichia coli proteomes. ProteomeVis interactively creates protein chain graphs, where edges between nodes represent structure and sequence similarities within user-defined ranges, to study the long time scale effects of protein structure evolution. The short time scale effects of protein sequence evolution are studied by sequence evolutionary rate (ER) correlation analyses with protein properties that span from the molecular to the organismal level. Results:We demonstrate the utility and versatility of ProteomeVis by investigating the distribution of edges per node in organismal protein chain universe graphs (oPCUGs) and putative ER determinants. S.cerevisiae and E.coli oPCUGs are scale-free with scaling constants of 1.79 and 1.56, respectively. Both scaling constants can be explained by a previously reported theoretical model describing protein structure evolution. Protein abundance most strongly correlates with ER among properties in ProteomeVis, with Spearman correlations of -0.49 (P-value < 10-10) and -0.46 (P-value < 10-10) for S.cerevisiae and E.coli, respectively. This result is consistent with previous reports that found protein expression to be the most important ER determinant. Availability and implementation:ProteomeVis is freely accessible at http://proteomevis.chem.harvard.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +25904633,Introducing the PRIDE Archive RESTful web services.,"The PRIDE (PRoteomics IDEntifications) database is one of the world-leading public repositories of mass spectrometry (MS)-based proteomics data and it is a founding member of the ProteomeXchange Consortium of proteomics resources. In the original PRIDE database system, users could access data programmatically by accessing the web services provided by the PRIDE BioMart interface. New REST (REpresentational State Transfer) web services have been developed to serve the most popular functionality provided by BioMart (now discontinued due to data scalability issues) and address the data access requirements of the newly developed PRIDE Archive. Using the API (Application Programming Interface) it is now possible to programmatically query for and retrieve peptide and protein identifications, project and assay metadata and the originally submitted files. Searching and filtering is also possible by metadata information, such as sample details (e.g. species and tissues), instrumentation (mass spectrometer), keywords and other provided annotations. The PRIDE Archive web services were first made available in April 2014. The API has already been adopted by a few applications and standalone tools such as PeptideShaker, PRIDE Inspector, the Unipept web application and the Python-based BioServices package. This application is free and open to all users with no login requirement and can be accessed at http://www.ebi.ac.uk/pride/ws/archive/.",2015-04-22 +26589281,Large-scale machine learning for metagenomics sequence classification.,"

Motivation

Metagenomics characterizes the taxonomic diversity of microbial communities by sequencing DNA directly from an environmental sample. One of the main challenges in metagenomics data analysis is the binning step, where each sequenced read is assigned to a taxonomic clade. Because of the large volume of metagenomics datasets, binning methods need fast and accurate algorithms that can operate with reasonable computing requirements. While standard alignment-based methods provide state-of-the-art performance, compositional approaches that assign a taxonomic class to a DNA read based on the k-mers it contains have the potential to provide faster solutions.

Results

We propose a new rank-flexible machine learning-based compositional approach for taxonomic assignment of metagenomics reads and show that it benefits from increasing the number of fragments sampled from reference genome to tune its parameters, up to a coverage of about 10, and from increasing the k-mer size to about 12. Tuning the method involves training machine learning models on about 10(8) samples in 10(7) dimensions, which is out of reach of standard softwares but can be done efficiently with modern implementations for large-scale machine learning. The resulting method is competitive in terms of accuracy with well-established alignment and composition-based tools for problems involving a small to moderate number of candidate species and for reasonable amounts of sequencing errors. We show, however, that machine learning-based compositional approaches are still limited in their ability to deal with problems involving a greater number of species and more sensitive to sequencing errors. We finally show that the new method outperforms the state-of-the-art in its ability to classify reads from species of lineage absent from the reference database and confirm that compositional approaches achieve faster prediction times, with a gain of 2-17 times with respect to the BWA-MEM short read mapper, depending on the number of candidate species and the level of sequencing noise.

Availability and implementation

Data and codes are available at http://cbio.ensmp.fr/largescalemetagenomics

Contact

pierre.mahe@biomerieux.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-20 +29575684,New mutations and an updated database for the patched-1 (PTCH1) gene.,"

Background

Basal cell nevus syndrome (BCNS) is an autosomal dominant disorder characterized by multiple basal cell carcinomas (BCCs), maxillary keratocysts, and cerebral calcifications. BCNS most commonly is caused by a germline mutation in the patched-1 (PTCH1) gene. PTCH1 mutations are also described in patients with holoprosencephaly.

Methods

We have established a locus-specific database for the PTCH1 gene using the Leiden Open Variation Database (LOVD). We included 117 new PTCH1 variations, in addition to 331 previously published unique PTCH1 mutations. These new mutations were found in 141 patients who had a positive PTCH1 mutation analysis in either the VU University Medical Centre (VUMC) or Maastricht University Medical Centre (MUMC) between 1995 and 2015.

Results

The database contains 331 previously published unique PTCH1 mutations and 117 new PTCH1 variations.

Conclusion

We have established a locus-specific database for the PTCH1 gene using the Leiden Open Variation Database (LOVD). The database provides an open collection for both clinicians and researchers and is accessible online at http://www.lovd.nl/PTCH1.",2018-03-25 +29718246,Covariate-adjusted heatmaps for visualizing biological data via correlation decomposition.,"Motivation:Heatmap is a popular visualization technique in biology and related fields. In this study, we extend heatmaps within the framework of matrix visualization (MV) by incorporating a covariate adjustment process through the estimation of conditional correlations. MV can explore the embedded information structure of high-dimensional large-scale datasets effectively without dimension reduction. The benefit of the proposed covariate-adjusted heatmap is in the exploration of conditional association structures among the subjects or variables that cannot be done with conventional MV. Results:For adjustment of a discrete covariate, the conditional correlation is estimated by the within and between analysis. This procedure decomposes a correlation matrix into the within- and between-component matrices. The contribution of the covariate effects can then be assessed through the relative structure of the between-component to the original correlation matrix while the within-component acts as a residual. When a covariate is of continuous nature, the conditional correlation is equivalent to the partial correlation under the assumption of a joint normal distribution. A test is then employed to identify the variable pairs which possess the most significant differences at varying levels of correlation before and after a covariate adjustment. In addition, a z-score significance map is constructed to visualize these results. A simulation and three biological datasets are employed to illustrate the power and versatility of our proposed method. Availability and implementation:GAP is available to readers and is free to non-commercial applications. The installation instructions, the user's manual, and the detailed tutorials can be found at http://gap.stat.sinica.edu.tw/Software/GAP. Supplementary information:Supplementary Data are available at Bioinformatics online.",2018-10-01 +29353341,Field of View Normalization in Multi-Site Brain MRI.,"Multi-site brain MRI analysis is needed in big data neuroimaging studies, but challenging. The challenges lie in almost every analysis step including skull stripping. The diversities in multi-site brain MR images make it difficult to tune parameters specific to subjects or imaging protocols. Alternatively, using constant parameter settings often leads to inaccurate, inconsistent and even failed skull stripping results. One reason is that images scanned at different sites, under different scanners or protocols, and/or by different technicians often have very different fields of view (FOVs). Normalizing FOV is currently done manually or using ad hoc pre-processing steps, which do not always generalize well to multi-site diverse images. In this paper, we show that (a) a generic FOV normalization approach is possible in multi-site diverse images; we show experiments on images acquired from Philips, GE, Siemens scanners, from 1.0T, 1.5T, 3.0T field of strengths, and from subjects 0-90 years of ages; and (b) generic FOV normalization improves skull stripping accuracy and consistency for multiple skull stripping algorithms; we show this effect for 5 skull stripping algorithms including FSL's BET, AFNI's 3dSkullStrip, FreeSurfer's HWA, BrainSuite's BSE, and MASS. We have released our FOV normalization software at http://www.nitrc.org/projects/normalizefov .",2018-10-01 +26516188,Integrated interactions database: tissue-specific view of the human and model organism interactomes.,"IID (Integrated Interactions Database) is the first database providing tissue-specific protein-protein interactions (PPIs) for model organisms and human. IID covers six species (S. cerevisiae (yeast), C. elegans (worm), D. melonogaster (fly), R. norvegicus (rat), M. musculus (mouse) and H. sapiens (human)) and up to 30 tissues per species. Users query IID by providing a set of proteins or PPIs from any of these organisms, and specifying species and tissues where IID should search for interactions. If query proteins are not from the selected species, IID enables searches across species and tissues automatically by using their orthologs; for example, retrieving interactions in a given tissue, conserved in human and mouse. Interaction data in IID comprises three types of PPI networks: experimentally detected PPIs from major databases, orthologous PPIs and high-confidence computationally predicted PPIs. Interactions are assigned to tissues where their proteins pairs or encoding genes are expressed. IID is a major replacement of the I2D interaction database, with larger PPI networks (a total of 1,566,043 PPIs among 68,831 proteins), tissue annotations for interactions, and new query, analysis and data visualization capabilities. IID is available at http://ophid.utoronto.ca/iid.",2015-10-29 +31105332,Is Two Too Many? Parity and Mothers' Labor Force Exit.,"

Objective

How do women's chances of labor force exit vary by the number of children they have?

Background

Conventional wisdom suggests there may be a tipping point at the second child when women are particularly likely to leave. Women who only ever have one child, by contrast, are thought to be uniquely unlikely to exit.

Method

Using data from the nationally representative 1979-2012 waves of the National Longitudinal Survey of Youth 1979 (https://www.nlsinfo.org/content/cohorts/nlsy79), event history methods estimate the likelihood of labor force exit as women progress across parity transitions.

Results

Results show no evidence for a tipping point around the birth of second children. Women are instead most likely to leave the labor force when they are pregnant with their first child and each subsequent child is associated with a smaller increase in the probability of exit. In addition, women who only ever have one child are less likely to leave the labor force than those who have more children and these differences arise as early as their pregnancies with their first children. College-educated women who only ever have one child are especially unlikely to exit.

Conclusion

Findings thus do not support the second child tipping point hypothesis, but they emphasize the importance of completed parity and the transition to motherhood for mothers' labor force behavior.",2018-10-01 +29855322,Core Hunter 3: flexible core subset selection.,"BACKGROUND:Core collections provide genebank curators and plant breeders a way to reduce size of their collections and populations, while minimizing impact on genetic diversity and allele frequency. Many methods have been proposed to generate core collections, often using distance metrics to quantify the similarity of two accessions, based on genetic marker data or phenotypic traits. Core Hunter is a multi-purpose core subset selection tool that uses local search algorithms to generate subsets relying on one or more metrics, including several distance metrics and allelic richness. RESULTS:In version 3 of Core Hunter (CH3) we have incorporated two new, improved methods for summarizing distances to quantify diversity or representativeness of the core collection. A comparison of CH3 and Core Hunter 2 (CH2) showed that these new metrics can be effectively optimized with less complex algorithms, as compared to those used in CH2. CH3 is more effective at maximizing the improved diversity metric than CH2, still ensures a high average and minimum distance, and is faster for large datasets. Using CH3, a simple stochastic hill-climber is able to find highly diverse core collections, and the more advanced parallel tempering algorithm further increases the quality of the core and further reduces variability across independent samples. We also evaluate the ability of CH3 to simultaneously maximize diversity, and either representativeness or allelic richness, and compare the results with those of the GDOpt and SimEli methods. CH3 can sample equally representative cores as GDOpt, which was specifically designed for this purpose, and is able to construct cores that are simultaneously more diverse, and either are more representative or have higher allelic richness, than those obtained by SimEli. CONCLUSIONS:In version 3, Core Hunter has been updated to include two new core subset selection metrics that construct cores for representativeness or diversity, with improved performance. It combines and outperforms the strengths of other methods, as it (simultaneously) optimizes a variety of metrics. In addition, CH3 is an improvement over CH2, with the option to use genetic marker data or phenotypic traits, or both, and improved speed. Core Hunter 3 is freely available on http://www.corehunter.org .",2018-05-31 +21715385,"The 2011 Bioinformatics Links Directory update: more resources, tools and databases and features to empower the bioinformatics community.","The Bioinformatics Links Directory continues its collaboration with Nucleic Acids Research to collaboratively publish and compile a freely accessible, online collection of tools, databases and resource materials for bioinformatics and molecular biology research. The July 2011 Web Server issue of Nucleic Acids Research adds an additional 78 web server tools and 14 updates to the directory at http://bioinformatics.ca/links_directory/.",2011-07-01 +30059222,Algorithmic Analysis of Cahn-Ingold-Prelog Rules of Stereochemistry: Proposals for Revised Rules and a Guide for Machine Implementation.,"The most recent version of the Cahn-Ingold-Prelog rules for the determination of stereodescriptors as described in Nomenclature of Organic Chemistry: IUPAC Recommendations and Preferred Names 2013 (the ""Blue Book""; Favre and Powell. Royal Society of Chemistry, 2014; http://dx.doi.org/10.1039/9781849733069 ) were analyzed by an international team of cheminformatics software developers. Algorithms for machine implementation were designed, tested, and cross-validated. Deficiencies in Sequence Rules 1b and 2 were found, and proposed language for their modification is presented. A concise definition of an additional rule (""Rule 6"", below) is proposed, which succinctly covers several cases only tangentially mentioned in the 2013 recommendations. Each rule is discussed from the perspective of machine implementation. The four resultant implementations are supported by a 300-compound validation suite in both 2D and 3D structure data file (SDF) format as well as SMILES ( https://cipvalidationsuite.github.io/ValidationSuite ). The validation suites include all significant examples in Chapter 9 of the Blue Book, as well as several additional structures that highlight more complex aspects of the rules not addressed or not clearly analyzed in that work. These additional structures support a case for the need for modifications to the Sequence Rules.",2018-08-17 +,An ontology of fungal subcellular traits,"•Premise of the study: The Fungal Subcellular Ontology used in the Assembling the Fungal Tree of Life project is a taxon-wide ontology (controlled vocabulary for attributes) designed to clarify and integrate the broad range of subcellular characters and character states used in higher-level fungal systematics. As in the algae, cellular characters are important phylogenetic markers in kingdom Fungi. The Fungal Subcellular Ontology has been developed primarily to help researchers, especially systematists, in their search for information on subcellular characters across the Fungi, and it complements existing biological ontologies, including the Gene Ontology. •METHODS: The character and character state data set used in the Assembling the Fungal Tree of Life Structural and Biochemical Database (http://aftol.umn.edu) is the source of terms for generating the ontology. After the terms were accessioned and defined, they were combined in OBO-Edit file format, and the ontology was edited using OBO-Edit, an open source Java tool supported by the Gene Ontology project. •Key results: The Fungal Subcellular Ontology covers both model and nonmodel fungi in great detail and is downloadable in OBO-Edit format at website http://aftol.umn.edu/ontology/fungal_subcellular.obo. •CONCLUSIONS: The ontology provides a controlled vocabulary of fungal subcellular terms and functions as an operating framework for the Assembling the Fungal Tree of Life Structural and Biochemical Database. An ontology-based design enhances reuse of data deposited in the Structural and Biochemical Database from other independent biological and genetic databases. Data integration approaches that advance access to data from the diversity of biological databases are imperative as interdisciplinary research gains importance. In this sense, the Fungal Subcellular Ontology becomes highly relevant to mycologists as well as nonmycologists because fungi interact actively as symbionts and parasites or passively with many other life forms.",2011-09-01 +28334276,MotifMap-RNA: a genome-wide map of RBP binding sites.,"

Motivation

RNA plays a critical role in gene expression and its regulation. RNA binding proteins (RBPs), in turn, are important regulators of RNA. Thanks to the availability of large scale data for RBP binding motifs and in vivo binding sites results in the form of eCLIP experiments, it is now possible to computationally predict RBP binding sites across the whole genome.

Results

We describe MotifMap-RNA, an extension of MotifMap which predicts binding sites for RBP motifs across human and mouse genomes and allows large scale querying of predicted binding sites.

Availability and implementation

The data and corresponding web server are available from: http://motifmap-rna.ics.uci.edu/ as part of the MotifMap web portal.

Contact

rspitale@uci.edu or pfbaldi@uci.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +26503254,"Digital development: a database of cell lineage differentiation in C. elegans with lineage phenotypes, cell-specific gene functions and a multiscale model.","Developmental systems biology is poised to exploit large-scale data from two approaches: genomics and live imaging. The combination of the two offers the opportunity to map gene functions and gene networks in vivo at single-cell resolution using cell tracking and quantification of cellular phenotypes. Here we present Digital Development (http://www.digital-development.org), a database of cell lineage differentiation with curated phenotypes, cell-specific gene functions and a multiscale model. The database stores data from recent systematic studies of cell lineage differentiation in the C. elegans embryo containing ∼ 200 conserved genes, 1400 perturbed cell lineages and 600,000 digitized single cells. Users can conveniently browse, search and download four categories of phenotypic and functional information from an intuitive web interface. This information includes lineage differentiation phenotypes, cell-specific gene functions, differentiation landscapes and fate choices, and a multiscale model of lineage differentiation. Digital Development provides a comprehensive, curated, multidimensional database for developmental biology. The scale, resolution and richness of biological information presented here facilitate exploration of gene-specific and systems-level mechanisms of lineage differentiation in Metazoans.",2015-10-25 +29495575,RPiRLS: Quantitative Predictions of RNA Interacting with Any Protein of Known Sequence. ,"RNA-protein interactions (RPIs) have critical roles in numerous fundamental biological processes, such as post-transcriptional gene regulation, viral assembly, cellular defence and protein synthesis. As the number of available RNA-protein binding experimental data has increased rapidly due to high-throughput sequencing methods, it is now possible to measure and understand RNA-protein interactions by computational methods. In this study, we integrate a sequence-based derived kernel with regularized least squares to perform prediction. The derived kernel exploits the contextual information around an amino acid or a nucleic acid as well as the repetitive conserved motif information. We propose a novel machine learning method, called RPiRLS to predict the interaction between any RNA and protein of known sequences. For the RPiRLS classifier, each protein sequence comprises up to 20 diverse amino acids but for the RPiRLS-7G classifier, each protein sequence is represented by using 7-letter reduced alphabets based on their physiochemical properties. We evaluated both methods on a number of benchmark data sets and compared their performances with two newly developed and state-of-the-art methods, RPI-Pred and IPMiner. On the non-redundant benchmark test sets extracted from the PRIDB, the RPiRLS method outperformed RPI-Pred and IPMiner in terms of accuracy, specificity and sensitivity. Further, RPiRLS achieved an accuracy of 92% on the prediction of lncRNA-protein interactions. The proposed method can also be extended to construct RNA-protein interaction networks. The RPiRLS web server is freely available at http://bmc.med.stu.edu.cn/RPiRLS.",2018-02-28 +28379298,Gracob: a novel graph-based constant-column biclustering method for mining growth phenotype data.,"

Motivation

Growth phenotype profiling of genome-wide gene-deletion strains over stress conditions can offer a clear picture that the essentiality of genes depends on environmental conditions. Systematically identifying groups of genes from such high-throughput data that share similar patterns of conditional essentiality and dispensability under various environmental conditions can elucidate how genetic interactions of the growth phenotype are regulated in response to the environment.

Results

We first demonstrate that detecting such 'co-fit' gene groups can be cast as a less well-studied problem in biclustering, i.e. constant-column biclustering. Despite significant advances in biclustering techniques, very few were designed for mining in growth phenotype data. Here, we propose Gracob, a novel, efficient graph-based method that casts and solves the constant-column biclustering problem as a maximal clique finding problem in a multipartite graph. We compared Gracob with a large collection of widely used biclustering methods that cover different types of algorithms designed to detect different types of biclusters. Gracob showed superior performance on finding co-fit genes over all the existing methods on both a variety of synthetic data sets with a wide range of settings, and three real growth phenotype datasets for E. coli, proteobacteria and yeast.

Availability and implementation

Our program is freely available for download at http://sfb.kaust.edu.sa/Pages/Software.aspx.

Contact

xin.gao@kaust.edu.sa.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +25941089,A database of age-appropriate average MRI templates.,"This article summarizes a life-span neurodevelopmental MRI database. The study of neurostructural development or neurofunctional development has been hampered by the lack of age-appropriate MRI reference volumes. This causes misspecification of segmented data, irregular registrations, and the absence of appropriate stereotaxic volumes. We have created the ""Neurodevelopmental MRI Database"" that provides age-specific reference data from 2 weeks through 89 years of age. The data are presented in fine-grained ages (e.g., 3 months intervals through 1 year; 6 months intervals through 19.5 years; 5 year intervals from 20 through 89 years). The base component of the database at each age is an age-specific average MRI template. The average MRI templates are accompanied by segmented partial volume estimates for segmenting priors, and a common stereotaxic atlas for infant, pediatric, and adult participants. The database is available online (http://jerlab.psych.sc.edu/NeurodevelopmentalMRIDatabase/).",2015-05-02 +23500449,"The DEER database: a bridge connecting drugs, environmental effects, and regulations.","Variability in patient drug responses is observed with increasing frequency, necessitating the establishment of causal associations between factors and drug response phenotypes. This individual variability can be caused by genetic factors and environmental factors (ENFs). Although pharmacogenetics has been instrumental in describing genetic variations, frameworks for understanding the association between ENFs (particularly chemical ENFs) and drug responses are lacking. In this study, we constructed a novel database, DEER, for interpretations of chemical ENF effects on drug responses. DEER includes computational predictions of the associations between chemical ENFs and drug responses. Putative regulatory intermediates such as transcription factors, cytochrome P450s (CYPs), drug targets, and transporters as well as chemical similarities are provided to support our predictions. DEER currently encompasses 579 drugs, 401 chemical ENFs, and 9247 predicted drug-ENF associations. The entire dataset can be easily queried through a search page. The results can be downloaded, and each drug-ENF association with intermediary factors can be displayed via a graphical viewer. DEER is available at http://bsb.kiz.ac.cn:90/DEER/. We expect this approach and resource to be valuable for personalized medicine and drug development.",2013-03-13 +26682918,High-Specificity Targeted Functional Profiling in Microbial Communities with ShortBRED.,"Profiling microbial community function from metagenomic sequencing data remains a computationally challenging problem. Mapping millions of DNA reads from such samples to reference protein databases requires long run-times, and short read lengths can result in spurious hits to unrelated proteins (loss of specificity). We developed ShortBRED (Short, Better Representative Extract Dataset) to address these challenges, facilitating fast, accurate functional profiling of metagenomic samples. ShortBRED consists of two components: (i) a method that reduces reference proteins of interest to short, highly representative amino acid sequences (""markers"") and (ii) a search step that maps reads to these markers to quantify the relative abundance of their associated proteins. After evaluating ShortBRED on synthetic data, we applied it to profile antibiotic resistance protein families in the gut microbiomes of individuals from the United States, China, Malawi, and Venezuela. Our results support antibiotic resistance as a core function in the human gut microbiome, with tetracycline-resistant ribosomal protection proteins and Class A beta-lactamases being the most widely distributed resistance mechanisms worldwide. ShortBRED markers are applicable to other homology-based search tasks, which we demonstrate here by identifying phylogenetic signatures of antibiotic resistance across more than 3,000 microbial isolate genomes. ShortBRED can be applied to profile a wide variety of protein families of interest; the software, source code, and documentation are available for download at http://huttenhower.sph.harvard.edu/shortbred.",2015-12-18 +26311606,Connected brains and minds--The UMCD repository for brain connectivity matrices.,"We describe the USC Multimodal Connectivity Database (http://umcd.humanconnectomeproject.org), an interactive web-based platform for brain connectivity matrix sharing and analysis. The site enables users to download connectivity matrices shared by other users, upload matrices from their own published studies, or select a specific matrix and perform a real-time graph theory-based analysis and visualization of network properties. The data shared on the site span a broad spectrum of functional and structural brain connectivity information from humans across the entire age range (fetal to age 89), representing an array of different neuropsychiatric and neurodegenerative disease populations (autism spectrum disorder, ADHD, and APOE-4 carriers). An analysis combining 7 different datasets shared on the site illustrates the diversity of the data and the potential for yielding deeper insight by assessing new connectivity matrices with respect to population-wide network properties represented in the UMCD.",2015-08-24 +29471580,Prevalence of early and late prematurity is similar among pediatric type 1 diabetes patients and the general population.,"

Background

The incidence of type 1 diabetes mellitus (T1DM) has increased in recent decades, as has the incidence of preterm births (<37 weeks). We aimed to evaluate and compare the prevalence of prematurity and early prematurity (<34 weeks) and birth season variability among T1DM and non-T1DM children.

Methods

A nationwide cross-sectional study was conducted, with linkage of data from 13 paediatric diabetes centers and Israeli National Registries, including T1DM patients and general non-T1DM population, born during 2000 to 2013. Gathered data included ethnicity, gender, birth week, weight, and season. The prevalence of prematurity and birth season were compared with the general population birth registry using Pearson Chi-square test.

Results

The study population included 1452 T1DM patients, 52.7% males, and 2 138 668 subjects in the general non-T1DM population, 51.2% males. The prevalence of late and early prematurity was similar between groups (6.1% and 2.2% in the T1DM group vs 5.6% and 2.0% in the general non-T1DM group, P = 0.25 and P = 0.38, respectively). OR for prematurity among T1DM patients was 1.15 (0.95-1.39), P = 0.16. No difference in birth season was demonstrated between preterm and term, in T1DM and general non-T1DM populations. Ethiopian descent was more prevalent among T1DM patients compared with the non-T1DM population, in both term and preterm born.

Conclusions

This is the largest population-based study, and the first in the Middle East geographical area, indicating that prematurity, including early prematurity, is not associated with T1DM during childhood. The study was registered at https://clinicaltrials.gov/: NCT02929953.",2018-03-24 +24946828,Leaf phenomics: a systematic reverse genetic screen for Arabidopsis leaf mutants.,"The study and eventual manipulation of leaf development in plants requires a thorough understanding of the genetic basis of leaf organogenesis. Forward genetic screens have identified hundreds of Arabidopsis mutants with altered leaf development, but the genome has not yet been saturated. To identify genes required for leaf development we are screening the Arabidopsis Salk Unimutant collection. We have identified 608 lines that exhibit a leaf phenotype with full penetrance and almost constant expressivity and 98 additional lines with segregating mutant phenotypes. To allow indexing and integration with other mutants, the mutant phenotypes were described using a custom leaf phenotype ontology. We found that the indexed mutation is present in the annotated locus for 78% of the 553 mutants genotyped, and that in half of these the annotated T-DNA is responsible for the phenotype. To quickly map non-annotated T-DNA insertions, we developed a reliable, cost-effective and easy method based on whole-genome sequencing. To enable comprehensive access to our data, we implemented a public web application named PhenoLeaf (http://genetics.umh.es/phenoleaf) that allows researchers to query the results of our screen, including text and visual phenotype information. We demonstrated how this new resource can facilitate gene function discovery by identifying and characterizing At1g77600, which we found to be required for proximal-distal cell cycle-driven leaf growth, and At3g62870, which encodes a ribosomal protein needed for cell proliferation and chloroplast function. This collection provides a valuable tool for the study of leaf development, characterization of biomass feedstocks and examination of other traits in this fundamental photosynthetic organ.",2014-07-31 +24312616,Structural and functional analysis of human SOD1 in amyotrophic lateral sclerosis.,"Amyotrophic lateral sclerosis (ALS) is a fatal neurodegenerative disease with familial inheritance (fALS) in 5% to 10% of cases; 25% of those are caused by mutations in the superoxide dismutase 1 (SOD1) protein. More than 100 mutations in the SOD1 gene have been associated with fALS, altering the geometry of the active site, protein folding and the interaction between monomers. We performed a functional analysis of non-synonymous single nucleotide polymorphisms (nsSNPs) in 124 fALS SOD1 mutants. Eleven different algorithms were used to estimate the functional impact of the replacement of one amino acid on protein structure: SNPs&GO, PolyPhen-2, SNAP, PMUT, Sift, PhD-SNP, nsSNPAnalyzer, TANGO, WALTZ, LIMBO and FoldX. For the structural analysis, theoretical models of 124 SNPs of SOD1 were created by comparative modeling using the MHOLline workflow, which includes Modeller and Procheck. Models were aligned with the native protein by the TM-align algorithm. A human-curated database was developed using the server side include in Java, JMOL. The results of this functional analysis indicate that the majority of the 124 natural mutants are harmful to the protein structure and thus corroborate the correlation between the reported mutations and fALS. In the structural analysis, all models showed conformational changes when compared to wild-type SOD1, and the degree of structural alignment varied between them. The SOD1 database converge structural and functional analyses of SOD1; it is a vast resource for the molecular analysis of amyotrophic lateral sclerosis, which allows the user to expand his knowledge on the molecular basis of the disease. The SOD1 database is available at http://bioinfogroup.com/database.",2013-12-02 +26173699,IMG-ABC: A Knowledge Base To Fuel Discovery of Biosynthetic Gene Clusters and Novel Secondary Metabolites.,"

Unlabelled

In the discovery of secondary metabolites, analysis of sequence data is a promising exploration path that remains largely underutilized due to the lack of computational platforms that enable such a systematic approach on a large scale. In this work, we present IMG-ABC (https://img.jgi.doe.gov/abc), an atlas of biosynthetic gene clusters within the Integrated Microbial Genomes (IMG) system, which is aimed at harnessing the power of ""big"" genomic data for discovering small molecules. IMG-ABC relies on IMG's comprehensive integrated structural and functional genomic data for the analysis of biosynthetic gene clusters (BCs) and associated secondary metabolites (SMs). SMs and BCs serve as the two main classes of objects in IMG-ABC, each with a rich collection of attributes. A unique feature of IMG-ABC is the incorporation of both experimentally validated and computationally predicted BCs in genomes as well as metagenomes, thus identifying BCs in uncultured populations and rare taxa. We demonstrate the strength of IMG-ABC's focused integrated analysis tools in enabling the exploration of microbial secondary metabolism on a global scale, through the discovery of phenazine-producing clusters for the first time in Alphaproteobacteria. IMG-ABC strives to fill the long-existent void of resources for computational exploration of the secondary metabolism universe; its underlying scalable framework enables traversal of uncovered phylogenetic and chemical structure space, serving as a doorway to a new era in the discovery of novel molecules.

Importance

IMG-ABC is the largest publicly available database of predicted and experimental biosynthetic gene clusters and the secondary metabolites they produce. The system also includes powerful search and analysis tools that are integrated with IMG's extensive genomic/metagenomic data and analysis tool kits. As new research on biosynthetic gene clusters and secondary metabolites is published and more genomes are sequenced, IMG-ABC will continue to expand, with the goal of becoming an essential component of any bioinformatic exploration of the secondary metabolism world.",2015-07-14 +28334267,ResistoMap-online visualization of human gut microbiota antibiotic resistome.,"

Abstract

We created ResistoMap—a Web-based interactive visualization of the presence of genetic determinants conferring resistance to antibiotics, biocides and heavy metals in human gut microbiota. ResistoMap displays the data on more than 1500 published gut metagenomes of world populations including both healthy subjects and patients. Multiparameter display filters allow visual assessment of the associations between the meta-data and proportions of resistome. The geographic map navigation layer allows to state hypotheses regarding the global trends of antibiotic resistance and correlates the gut resistome variations with the national clinical guidelines on antibiotics application.

Availability and implementation

ResistoMap was implemented using AngularJS, CoffeeScript, D3.js and TopoJSON. The tool is publicly available at http://resistomap.rcpcm.org.

Contact

yarygin@phystech.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +28753663,metaSNV: A tool for metagenomic strain level analysis.,"We present metaSNV, a tool for single nucleotide variant (SNV) analysis in metagenomic samples, capable of comparing populations of thousands of bacterial and archaeal species. The tool uses as input nucleotide sequence alignments to reference genomes in standard SAM/BAM format, performs SNV calling for individual samples and across the whole data set, and generates various statistics for individual species including allele frequencies and nucleotide diversity per sample as well as distances and fixation indices across samples. Using published data from 676 metagenomic samples of different sites in the oral cavity, we show that the results of metaSNV are comparable to those of MIDAS, an alternative implementation for metagenomic SNV analysis, while data processing is faster and has a smaller storage footprint. Moreover, we implement a set of distance measures that allow the comparison of genomic variation across metagenomic samples and delineate sample-specific variants to enable the tracking of specific strain populations over time. The implementation of metaSNV is available at: http://metasnv.embl.de/.",2017-07-28 +28186259,MBV: a method to solve sample mislabeling and detect technical bias in large combined genotype and sequencing assay datasets.,"

Motivation

Large genomic datasets combining genotype and sequence data, such as for expression quantitative trait loci (eQTL) detection, require perfect matching between both data types.

Results

We described here MBV (Match BAM to VCF); a method to quickly solve sample mislabeling and detect cross-sample contamination and PCR amplification bias.

Availability and implementation

MBV is implemented in C ++ as an independent component of the QTLtools software package, the binary and source codes are freely available at https://qtltools.github.io/qtltools/ .

Contact

olivier.delaneau@unige.ch or emmanouil.dermitzakis@unige.ch.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +27285450,Interventions for treating post-extraction bleeding.,"

Background

Post-extraction bleeding (PEB) is a recognised, frequently encountered complication in dental practice, which is defined as bleeding that continues beyond 8 to 12 hours after dental extraction. The incidence of post-extraction bleeding varies from 0% to 26%. If post-extraction bleeding is not managed, complications can range from soft tissue haematomas to severe blood loss. Local causes of bleeding include soft tissue and bone bleeding. Systemic causes include platelet problems, coagulation disorders or excessive fibrinolysis, and inherited or acquired problems (medication induced). There is a wide array of techniques suggested for the treatment of post-extraction bleeding, which include interventions aimed at both local and systemic causes.

Objectives

To assess the effects of interventions for treating different types of post-extraction bleeding.

Search methods

We searched the following electronic databases: The Cochrane Oral Health Group Trials Register (to 22 March 2016); The Cochrane Central Register of Controlled Trials (CENTRAL; The Cochrane Library 2016, Issue 2); MEDLINE via OVID (1946 to 22 March 2016); CINAHL via EBSCO (1937 to 22 March 2016). Due to the ongoing Cochrane project to search EMBASE and add retrieved clinical trials to CENTRAL, we searched only the last 11 months of EMBASE via OVID (1 May 2015 to 22 March 2016). We placed no further restrictions on the language or date of publication. We searched the US National Institutes of Health Trials Register (http://clinicaltrials.gov), and the WHO Clinical Trials Registry Platform for ongoing trials (http://apps.who.int/trialsearch/default.aspx). We also checked the reference lists of excluded trials.

Selection criteria

We considered randomised controlled trials (RCTs) that evaluated any intervention for treating PEB, with male or female participants of any age, regardless of type of teeth (anterior or posterior, mandibular or maxillary). Trials could compare one type of intervention with another, with placebo, or with no treatment.

Data collection and analysis

Three pairs of review authors independently screened search records. We obtained full papers for potentially relevant trials. If data had been extracted, we would have followed the methods described in the Cochrane Handbook for Systematic Reviews of Interventions for the statistical analysis.

Main results

We did not find any randomised controlled trial suitable for inclusion in this review.

Authors' conclusions

We were unable to identify any reports of randomised controlled trials that evaluated the effects of different interventions for the treatment of post-extraction bleeding. In view of the lack of reliable evidence on this topic, clinicians must use their clinical experience to determine the most appropriate means of treating this condition, depending on patient-related factors. There is a need for well designed and appropriately conducted clinical trials on this topic, which conform to the CONSORT statement (www.consort-statement.org/).",2016-06-10 +22419780,SEQanswers: an open access community for collaboratively decoding genomes.,"

Summary

The affordability of high-throughput sequencing has created an unprecedented surge in the use of genomic data in basic, translational and clinical research. The rapid evolution of sequencing technology, coupled with its broad adoption across biology and medicine, necessitates fast, collaborative interdisciplinary discussion. SEQanswers provides a real-time knowledge-sharing resource to address this need, covering experimental and computational aspects of sequencing and sequence analysis. Developers of popular analysis tools are among the >4000 active members, and ~40 peer-reviewed publications have referenced SEQanswers.

Availability

The SEQanswers community is freely accessible at http://SEQanswers.com/",2012-03-13 +28430871,ClusPro PeptiDock: efficient global docking of peptide recognition motifs using FFT.,"

Summary

We present an approach for the efficient docking of peptide motifs to their free receptor structures. Using a motif based search, we can retrieve structural fragments from the Protein Data Bank (PDB) that are very similar to the peptide's final, bound conformation. We use a Fast Fourier Transform (FFT) based docking method to quickly perform global rigid body docking of these fragments to the receptor. According to CAPRI peptide docking criteria, an acceptable conformation can often be found among the top-ranking predictions.

Availability and implementation

The method is available as part of the protein-protein docking server ClusPro at https://peptidock.cluspro.org/nousername.php.

Contact

midas@laufercenter.org or oraf@ekmd.huji.ac.il.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +24027417,POPBAM: Tools for Evolutionary Analysis of Short Read Sequence Alignments.,"

Background

While many bioinformatics tools currently exist for assembling and discovering variants from next-generation sequence data, there are very few tools available for performing evolutionary analyses from these data. Evolutionary and population genomics studies hold great promise for providing valuable insights into natural selection, the effect of mutations on phenotypes, and the origin of species. Thus, there is a need for an extensible and flexible computational tool that can function into a growing number of evolutionary bioinformatics pipelines.

Results

This paper describes the POPBAM software, which is a comprehensive set of computational tools for evolutionary analysis of whole-genome alignments consisting of multiple individuals, from multiple populations or species. POPBAM works directly from BAM-formatted assembly files, calls variant sites, and calculates a variety of commonly used evolutionary sequence statistics. POPBAM is designed primarily to perform analyses in sliding windows across chromosomes or scaffolds. POPBAM accurately measures nucleotide diversity, population divergence, linkage disequilibrium, and the frequency spectrum of mutations from two or more populations. POPBAM can also produce phylogenetic trees of all samples in a BAM file. Finally, I demonstrate that the implementation of POPBAM is both fast and memory-efficient, and also can feasibly scale to the analysis of large BAM files with many individuals and populations. Software: The POPBAM program is written in C/C++ and is available from http://dgarriga.github.io/POPBAM. The program has few dependencies and can be built on a variety of Linux platforms. The program is open-source and users are encouraged to participate in the development of this resource.",2013-09-01 +30797148,Circular RNA circVAPA is up-regulated and exerts oncogenic properties by sponging miR-101 in colorectal cancer.,"Circular RNAs (circRNAs) are a novel class of non-coding RNAs with distinct properties and diverse physiological and pathological functions. However, the functions of circRNAs in colorectal cancer (CRC) remain elusive. This study aimed to investigate the functional roles of circVAPA in CRC. High-throughput RNA sequencing was performed in 4 paired CRC tissues, and circVAPA (hsa_circ_0006990), was identified as a potential functional circRNA. Using quantitative real-time polymerase chain reaction (qRT-PCR), circVAPA was found to be up-regulated in CRC patients' tissues and plasma. Furthermore, circVAPA level was associated with unfavorable clinicopathologic features in CRC. The area under curve (AUC) of ROC was 0.724, suggesting that plasma level of circVAPA could serve as a promising biomarker for CRC detection. Sanger sequencing confirmed the back-splice junction sequences of circVAPA. Actinomycin D and RNase R treatments suggested that circVAPA was highly stable compared with its linear counterpart, and qRT-PCR for the circVAPA level in nuclear and cytoplasmic fractions indicated that circVAPA was predominantly localized in the cytoplasm. Gain-of-function and loss-of-function studies in CRC cell lines indicated that circVAPA could promote CRC cell proliferation, migration, invasion, and inhibit apoptosis. miRanda software (v3.3a) was used to predict target miRNAs of circVAPA. Moreover, target miRNAs associated with the KEGG pathway of COLORECTAL CANCER (Entry: map05210; https://www.kegg.jp/) were screened using DIANA-miRPath v.3 platform (Reverse Search module; TarBase v7.0 method). The analyses by miRanda and miRPath suggested that circVAPA could potentially bind to hsa-miR-101-3p (miR-101) associated with the COLORECTAL CANCER pathway. Luciferase reporter assay confirmed a direct interaction between circVAPA and miR-101. Furthermore, circVAPA had no effect on the expression level of miR-101, and miR-101 over-expression had the similar tumor-suppressing effects as circVAPA silencing. The tumor-promoting effect of circVAPA over-expression could be reversed by the up-regulation of miR-101. These data demonstrated that circVAPA promoted CRC progression by sponging miR-101. In conclusion, we have verified that circVAPA is up-regulated in CRC patients' tissues and plasma, and exerts oncogenic properties by sponging miR-101 in CRC. CircVAPA could serve as a promising biomarker and a therapeutic target for CRC.",2019-02-21 +30594246,Parameter estimation of qualitative biological regulatory networks on high performance computing hardware.,"

Background

Biological Regulatory Networks (BRNs) are responsible for developmental and maintenance related functions in organisms. These functions are implemented by the dynamics of BRNs and are sensitive to regulations enforced by specific activators and inhibitors. The logical modeling formalism by René Thomas incorporates this sensitivity with a set of logical parameters modulated by available regulators, varying with time. With the increase in complexity of BRNs in terms of number of entities and their interactions, the task of parameters estimation becomes computationally expensive with existing sequential SMBioNET tool. We extend the existing sequential implementation of SMBioNET by using a data decomposition approach using a Java messaging library called MPJ Express. The approach divides the parameters space into different regions and each region is then explored in parallel on High Performance Computing (HPC) hardware.

Results

The performance of the parallel approach is evaluated on BRNs of different sizes, and experimental results on multicore and cluster computers showed almost linear speed-up. This parallel code can be executed on a wide range of concurrent hardware including laptops equipped with multicore processors, and specialized distributed memory computer systems. To demonstrate the application of parallel implementation, we selected a case study of Hexosamine Biosynthetic Pathway (HBP) in cancer progression to identify potential therapeutic targets against cancer. A set of logical parameters were computed for HBP model that directs the biological system to a state of recovery. Furthermore, the parameters also suggest a potential therapeutic intervention that restores homeostasis. Additionally, the performance of parallel application was also evaluated on a network (comprising of 23 entities) of Fibroblast Growth Factor Signalling in Drosophila melanogaster.

Conclusions

Qualitative modeling framework is widely used for investigating dynamics of biological regulatory networks. However, computation of model parameters in qualitative modeling is computationally intensive. In this work, we presented results of our Java based parallel implementation that provides almost linear speed-up on both multicore and cluster platforms. The parallel implementation is available at https://psmbionet.github.io .",2018-12-29 +30334310,Determining day-to-day human variation in indirect calorimetry using Bayesian decision theory.,"

New findings

What is the central question of this study? We sought to understand the day-to-day variability of human indirect calorimetry during rest and exercise. Previous work has been unable to separate human day-to-day variability from measurement error and within-trial human variability. We developed models accounting for different levels of human- and machine-level variance and compared the probability density functions using total variation distance. What is the main finding and its importance? After accounting for multiple levels of variance, the average human day-to-day variability of minute ventilation, CO2 output and O2 uptake is 4.0, 1.8 and 2.0%, respectively. This is a new method to understand human variability and directly enhances our understanding of human variance during indirect calorimetry.

Abstract

One of the challenges of precision medicine is understanding when serial measurements taken across days are quantifiably different from each other. Previous work examining gas exchange measured by indirect calorimetry has been unable to separate differential measurement error, within-trial human variance and day-to-day human variance effectively in order to ascertain how variable humans are across testing sessions. We used previously published reliability data to construct models of indirect calorimetry variance and compare these models with methods arising from Bayesian decision theory. These models are conditional on the data upon which they are derived and assume that errors conform to a truncated normal distribution. A serial analysis of modelled probability density functions demonstrated that the average human day-to-day variance in minute ventilation ( V ̇ E ), carbon dioxide output ( V ̇ C O 2 ) and oxygen uptake ( V ̇ O 2 ) was 4.0, 1.8 and 2.0%, respectively. However, the average day-to-day variability masked a wide range of non-linear variance across flow rates, particularly for V ̇ E . This is the first report isolating day-to-day human variability in indirect calorimetry gas exchange from other sources of variability. This method can be extended to other physiological tools, and an extension of this work facilitates a statistical tool to examine within-trial V ̇ O 2 differences, available in a graphical user interface.",2018-10-17 +23782618,Human interactome resource and gene set linkage analysis for the functional interpretation of biologically meaningful gene sets.,"

Motivation

A molecular interaction network can be viewed as a network in which genes with related functions are connected. Therefore, at a systems level, connections between individual genes in a molecular interaction network can be used to infer the collective functional linkages between biologically meaningful gene sets.

Results

We present the human interactome resource and the gene set linkage analysis (GSLA) tool for the functional interpretation of biologically meaningful gene sets observed in experiments. GSLA determines whether an observed gene set has significant functional linkages to established biological processes. When an observed gene set is not enriched by known biological processes, traditional enrichment-based interpretation methods cannot produce functional insights, but GSLA can still evaluate whether those genes work in concert to regulate specific biological processes, thereby suggesting the functional implications of the observed gene set. The quality of human interactome resource and the utility of GSLA are illustrated with multiple assessments.

Availability

http://www.cls.zju.edu.cn/hir/",2013-06-19 +28141874,"Metabox: A Toolbox for Metabolomic Data Analysis, Interpretation and Integrative Exploration.","Similar to genomic and proteomic platforms, metabolomic data acquisition and analysis is becoming a routine approach for investigating biological systems. However, computational approaches for metabolomic data analysis and integration are still maturing. Metabox is a bioinformatics toolbox for deep phenotyping analytics that combines data processing, statistical analysis, functional analysis and integrative exploration of metabolomic data within proteomic and transcriptomic contexts. With the number of options provided in each analysis module, it also supports data analysis of other 'omic' families. The toolbox is an R-based web application, and it is freely available at http://kwanjeeraw.github.io/metabox/ under the GPL-3 license.",2017-01-31 +27367363,DAPPLE 2: a Tool for the Homology-Based Prediction of Post-Translational Modification Sites.,"The post-translational modification of proteins is critical for regulating their function. Although many post-translational modification sites have been experimentally determined, particularly in certain model organisms, experimental knowledge of these sites is severely lacking for many species. Thus, it is important to be able to predict sites of post-translational modification in such species. Previously, we described DAPPLE, a tool that facilitates the homology-based prediction of one particular post-translational modification, phosphorylation, in an organism of interest using known phosphorylation sites from other organisms. Here, we describe DAPPLE 2, which expands and improves upon DAPPLE in three major ways. First, it predicts sites for many post-translational modifications (20 different types) using data from several sources (15 online databases). Second, it has the ability to make predictions approximately 2-7 times faster than DAPPLE depending on the database size and the organism of interest. Third, it simplifies and accelerates the process of selecting predicted sites of interest by categorizing them based on gene ontology terms, keywords, and signaling pathways. We show that DAPPLE 2 can successfully predict known human post-translational modification sites using, as input, known sites from species that are either closely (e.g., mouse) or distantly (e.g., yeast) related to humans. DAPPLE 2 can be accessed at http://saphire.usask.ca/saphire/dapple2 .",2016-07-13 +25607539,A toolkit for ARB to integrate custom databases and externally built phylogenies.,"

Unlabelled

Researchers are perpetually amassing biological sequence data. The computational approaches employed by ecologists for organizing this data (e.g. alignment, phylogeny, etc.) typically scale nonlinearly in execution time with the size of the dataset. This often serves as a bottleneck for processing experimental data since many molecular studies are characterized by massive datasets. To keep up with experimental data demands, ecologists are forced to choose between continually upgrading expensive in-house computer hardware or outsourcing the most demanding computations to the cloud. Outsourcing is attractive since it is the least expensive option, but does not necessarily allow direct user interaction with the data for exploratory analysis. Desktop analytical tools such as ARB are indispensable for this purpose, but they do not necessarily offer a convenient solution for the coordination and integration of datasets between local and outsourced destinations. Therefore, researchers are currently left with an undesirable tradeoff between computational throughput and analytical capability. To mitigate this tradeoff we introduce a software package to leverage the utility of the interactive exploratory tools offered by ARB with the computational throughput of cloud-based resources. Our pipeline serves as middleware between the desktop and the cloud allowing researchers to form local custom databases containing sequences and metadata from multiple resources and a method for linking data outsourced for computation back to the local database. A tutorial implementation of the toolkit is provided in the supporting information, S1 Tutorial.

Availability

http://www.ece.drexel.edu/gailr/EESI/tutorial.php.",2015-01-21 +29554099,The Pathway Coexpression Network: Revealing pathway relationships.,"A goal of genomics is to understand the relationships between biological processes. Pathways contribute to functional interplay within biological processes through complex but poorly understood interactions. However, limited functional references for global pathway relationships exist. Pathways from databases such as KEGG and Reactome provide discrete annotations of biological processes. Their relationships are currently either inferred from gene set enrichment within specific experiments, or by simple overlap, linking pathway annotations that have genes in common. Here, we provide a unifying interpretation of functional interaction between pathways by systematically quantifying coexpression between 1,330 canonical pathways from the Molecular Signatures Database (MSigDB) to establish the Pathway Coexpression Network (PCxN). We estimated the correlation between canonical pathways valid in a broad context using a curated collection of 3,207 microarrays from 72 normal human tissues. PCxN accounts for shared genes between annotations to estimate significant correlations between pathways with related functions rather than with similar annotations. We demonstrate that PCxN provides novel insight into mechanisms of complex diseases using an Alzheimer's Disease (AD) case study. PCxN retrieved pathways significantly correlated with an expert curated AD gene list. These pathways have known associations with AD and were significantly enriched for genes independently associated with AD. As a further step, we show how PCxN complements the results of gene set enrichment methods by revealing relationships between enriched pathways, and by identifying additional highly correlated pathways. PCxN revealed that correlated pathways from an AD expression profiling study include functional clusters involved in cell adhesion and oxidative stress. PCxN provides expanded connections to pathways from the extracellular matrix. PCxN provides a powerful new framework for interrogation of global pathway relationships. Comprehensive exploration of PCxN can be performed at http://pcxn.org/.",2018-03-19 +28961796,In silico identification of rescue sites by double force scanning.,"

Motivation

A deleterious amino acid change in a protein can be compensated by a second-site rescue mutation. These compensatory mechanisms can be mimicked by drugs. In particular, the location of rescue mutations can be used to identify protein regions that can be targeted by small molecules to reactivate a damaged mutant.

Results

We present the first general computational method to detect rescue sites. By mimicking the effect of mutations through the application of forces, the double force scanning (DFS) method identifies the second-site residues that make the protein structure most resilient to the effect of pathogenic mutations. We tested DFS predictions against two datasets containing experimentally validated and putative evolutionary-related rescue sites. A remarkably good agreement was found between predictions and experimental data. Indeed, almost half of the rescue sites in p53 was correctly predicted by DFS, with 65% of remaining sites in contact with DFS predictions. Similar results were found for other proteins in the evolutionary dataset.

Availability and implementation

The DFS code is available under GPL at https://fornililab.github.io/dfs/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +24484385,uPEPperoni: an online tool for upstream open reading frame location and analysis of transcript conservation.,"

Background

Several small open reading frames located within the 5' untranslated regions of mRNAs have recently been shown to be translated. In humans, about 50% of mRNAs contain at least one upstream open reading frame representing a large resource of coding potential. We propose that some upstream open reading frames encode peptides that are functional and contribute to proteome complexity in humans and other organisms. We use the term uPEPs to describe peptides encoded by upstream open reading frames.

Results

We have developed an online tool, termed uPEPperoni, to facilitate the identification of putative bioactive peptides. uPEPperoni detects conserved upstream open reading frames in eukaryotic transcripts by comparing query nucleotide sequences against mRNA sequences within the NCBI RefSeq database. The algorithm first locates the main coding sequence and then searches for open reading frames 5' to the main start codon which are subsequently analysed for conservation. uPEPperoni also determines the substitution frequency for both the upstream open reading frames and the main coding sequence. In addition, the uPEPperoni tool produces sequence identity heatmaps which allow rapid visual inspection of conserved regions in paired mRNAs.

Conclusions

uPEPperoni features user-nominated settings including, nucleotide match/mismatch, gap penalties, Ka/Ks ratios and output mode. The heatmap output shows levels of identity between any two sequences and provides easy recognition of conserved regions. Furthermore, this web tool allows comparison of evolutionary pressures acting on the upstream open reading frame against other regions of the mRNA. Additionally, the heatmap web applet can also be used to visualise the degree of conservation in any pair of sequences. uPEPperoni is freely available on an interactive web server at http://upep-scmb.biosci.uq.edu.au.",2014-02-01 +25433698,Lipid-Pro: a computational lipid identification solution for untargeted lipidomics on data-independent acquisition tandem mass spectrometry platforms.,"

Unlabelled

A major challenge for mass spectrometric-based lipidomics, aiming at describing all lipid species in a biological sample, lies in the computational and bioinformatic processing of the large amount of data that arises after data acquisition. Lipid-Pro is a software tool that supports the identification of lipids by interpreting large datasets generated by liquid chromatography--tandem mass spectrometry (LC-MS/MS) using the advanced data-independent acquisition mode MS(E). In the MS(E) mode, the instrument fragments all molecular ions generated from a sample and records time-resolved molecular ion data as well as fragment ion data for every detectable molecular ion. Lipid-Pro matches the retention time-aligned mass-to-charge ratio data of molecular- and fragment ions with a lipid database and generates a report on all identified lipid species. For generation of the lipid database, Lipid-Pro provides a module for construction of lipid species and their fragments using a flexible building block approach. Hence, Lipid-Pro is an easy to use analysis tool to interpret complex MS(E) lipidomics data and also offers a module to generate a user-specific lipid database.

Availability and implementation

Lipid-Pro is freely available at: http://www.neurogenetics.biozentrum.uni-wuerzburg.de/en/project/services/lipidpro/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-29 +29040688,The online Tabloid Proteome: an annotated database of protein associations.,"A complete knowledge of the proteome can only be attained by determining the associations between proteins, along with the nature of these associations (e.g. physical contact in protein-protein interactions, participation in complex formation or different roles in the same pathway). Despite extensive efforts in elucidating direct protein interactions, our knowledge on the complete spectrum of protein associations remains limited. We therefore developed a new approach that detects protein associations from identifications obtained after re-processing of large-scale, public mass spectrometry-based proteomics data. Our approach infers protein association based on the co-occurrence of proteins across many different proteomics experiments, and provides information that is almost completely complementary to traditional direct protein interaction studies. We here present a web interface to query and explore the associations derived from this method, called the online Tabloid Proteome. The online Tabloid Proteome also integrates biological knowledge from several existing resources to annotate our derived protein associations. The online Tabloid Proteome is freely available through a user-friendly web interface, which provides intuitive navigation and data exploration options for the user at http://iomics.ugent.be/tabloidproteome.",2018-01-01 +28472220,The spike-and-slab lasso Cox model for survival prediction and associated genes detection.,"

Motivation

Large-scale molecular profiling data have offered extraordinary opportunities to improve survival prediction of cancers and other diseases and to detect disease associated genes. However, there are considerable challenges in analyzing large-scale molecular data.

Results

We propose new Bayesian hierarchical Cox proportional hazards models, called the spike-and-slab lasso Cox, for predicting survival outcomes and detecting associated genes. We also develop an efficient algorithm to fit the proposed models by incorporating Expectation-Maximization steps into the extremely fast cyclic coordinate descent algorithm. The performance of the proposed method is assessed via extensive simulations and compared with the lasso Cox regression. We demonstrate the proposed procedure on two cancer datasets with censored survival outcomes and thousands of molecular features. Our analyses suggest that the proposed procedure can generate powerful prognostic models for predicting cancer survival and can detect associated genes.

Availability and implementation

The methods have been implemented in a freely available R package BhGLM ( http://www.ssg.uab.edu/bhglm/ ).

Contact

nyi@uab.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +29633591,Targetome Analysis Revealed Involvement of MiR-126 in Neurotrophin Signaling Pathway: A Possible Role in Prevention of Glioma Development.,"OBJECTIVES:For the first time, we used molecular signaling pathway enrichment analysis to determine possible involvement of miR-126 and IRS-1 in neurotrophin pathway. MATERIALS AND METHODS:In this prospective study, Validated and predicted targets (targetome) of miR-126 were collected following searching miRtarbase (http://mirtarbase.mbc.nctu.edu.tw/) and miRWalk 2.0 databases, respectively. Then, approximate expression of miR-126 targeting in Glioma tissue was examined using UniGene database (http://www.ncbi. nlm.nih.gov/unigene). In silico molecular pathway enrichment analysis was carried out by DAVID 6.7 database (http://david. abcc.ncifcrf.gov/) to explore which signaling pathway is related to miR-126 targeting and how miR-126 attributes to glioma development. RESULTS:MiR-126 exerts a variety of functions in cancer pathogenesis via suppression of expression of target gene including PI3K, KRAS, EGFL7, IRS-1 and VEGF. Our bioinformatic studies implementing DAVID database, showed the involvement of miR-126 target genes in several signaling pathways including cancer pathogenesis, neurotrophin functions, Glioma formation, insulin function, focal adhesion production, chemokine synthesis and secretion and regulation of the actin cytoskeleton. CONCLUSIONS:Taken together, we concluded that miR-126 enhances the formation of glioma cancer stem cell probably via down regulation of IRS-1 in neurotrophin signaling pathway.",2018-03-18 +29560826,GxGrare: gene-gene interaction analysis method for rare variants from high-throughput sequencing data.,"BACKGROUND:With the rapid advancement of array-based genotyping techniques, genome-wide association studies (GWAS) have successfully identified common genetic variants associated with common complex diseases. However, it has been shown that only a small proportion of the genetic etiology of complex diseases could be explained by the genetic factors identified from GWAS. This missing heritability could possibly be explained by gene-gene interaction (epistasis) and rare variants. There has been an exponential growth of gene-gene interaction analysis for common variants in terms of methodological developments and practical applications. Also, the recent advancement of high-throughput sequencing technologies makes it possible to conduct rare variant analysis. However, little progress has been made in gene-gene interaction analysis for rare variants. RESULTS:Here, we propose GxGrare which is a new gene-gene interaction method for the rare variants in the framework of the multifactor dimensionality reduction (MDR) analysis. The proposed method consists of three steps; 1) collapsing the rare variants, 2) MDR analysis for the collapsed rare variants, and 3) detect top candidate interaction pairs. GxGrare can be used for the detection of not only gene-gene interactions, but also interactions within a single gene. The proposed method is illustrated with 1080 whole exome sequencing data of the Korean population in order to identify causal gene-gene interaction for rare variants for type 2 diabetes. CONCLUSION:The proposed GxGrare performs well for gene-gene interaction detection with collapsing of rare variants. GxGrare is available at http://bibs.snu.ac.kr/software/gxgrare which contains simulation data and documentation. Supported operating systems include Linux and OS X.",2018-03-19 +27897013,HARNESSING BIG DATA FOR PRECISION MEDICINE: INFRASTRUCTURES AND APPLICATIONS.,"Precision medicine is a health management approach that accounts for individual differences in genetic backgrounds and environmental exposures. With the recent advancements in high-throughput omics profiling technologies, collections of large study cohorts, and the developments of data mining algorithms, big data in biomedicine is expected to provide novel insights into health and disease states, which can be translated into personalized disease prevention and treatment plans. However, petabytes of biomedical data generated by multiple measurement modalities poses a significant challenge for data analysis, integration, storage, and result interpretation. In addition, patient privacy preservation, coordination between participating medical centers and data analysis working groups, as well as discrepancies in data sharing policies remain important topics of discussion. In this workshop, we invite experts in omics integration, biobank research, and data management to share their perspectives on leveraging big data to enable precision medicine.Workshop website: http://tinyurl.com/PSB17BigData; HashTag: #PSB17BigData.",2017-01-01 +29280997,DeepSig: deep learning improves signal peptide detection in proteins.,"Motivation:The identification of signal peptides in protein sequences is an important step toward protein localization and function characterization. Results:Here, we present DeepSig, an improved approach for signal peptide detection and cleavage-site prediction based on deep learning methods. Comparative benchmarks performed on an updated independent dataset of proteins show that DeepSig is the current best performing method, scoring better than other available state-of-the-art approaches on both signal peptide detection and precise cleavage-site identification. Availability and implementation:DeepSig is available as both standalone program and web server at https://deepsig.biocomp.unibo.it. All datasets used in this study can be obtained from the same website. Contact:pierluigi.martelli@unibo.it. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +30645180,State-Space Representations of Deep Neural Networks.,"This letter deals with neural networks as dynamical systems governed by finite difference equations. It shows that the introduction of k -many skip connections into network architectures, such as residual networks and additive dense networks, defines k th order dynamical equations on the layer-wise transformations. Closed-form solutions for the state-space representations of general k th order additive dense networks, where the concatenation operation is replaced by addition, as well as k th order smooth networks, are found. The developed provision endows deep neural networks with an algebraic structure. Furthermore, it is shown that imposing k th order smoothness on network architectures with d -many nodes per layer increases the state-space dimension by a multiple of k , and so the effective embedding dimension of the data manifold by the neural network is k·d -many dimensions. It follows that network architectures of these types reduce the number of parameters needed to maintain the same embedding dimension by a factor of k2 when compared to an equivalent first-order, residual network. Numerical simulations and experiments on CIFAR10, SVHN, and MNIST have been conducted to help understand the developed theory and efficacy of the proposed concepts.",2019-01-15 +30568854,RIP-MD: a tool to study residue interaction networks in protein molecular dynamics.,"Protein structure is not static; residues undergo conformational rearrangements and, in doing so, create, stabilize or break non-covalent interactions. Molecular dynamics (MD) is a technique used to simulate these movements with atomic resolution. However, given the data-intensive nature of the technique, gathering relevant information from MD simulations is a complex and time consuming process requiring several computational tools to perform these analyses. Among different approaches, the study of residue interaction networks (RINs) has proven to facilitate the study of protein structures. In a RIN, nodes represent amino-acid residues and the connections between them depict non-covalent interactions. Here, we describe residue interaction networks in protein molecular dynamics (RIP-MD), a visual molecular dynamics (VMD) plugin to facilitate the study of RINs using trajectories obtained from MD simulations of proteins. Our software generates RINs from MD trajectory files. The non-covalent interactions defined by RIP-MD include H-bonds, salt bridges, VdWs, cation-π, π-π, Arginine-Arginine, and Coulomb interactions. In addition, RIP-MD also computes interactions based on distances between Cαs and disulfide bridges. The results of the analysis are shown in an user friendly interface. Moreover, the user can take advantage of the VMD visualization capacities, whereby through some effortless steps, it is possible to select and visualize interactions described for a single, several or all residues in a MD trajectory. Network and descriptive table files are also generated, allowing their further study in other specialized platforms. Our method was written in python in a parallelized fashion. This characteristic allows the analysis of large systems impossible to handle otherwise. RIP-MD is available at http://www.dlab.cl/ripmd.",2018-12-07 +23193263,PrePPI: a structure-informed database of protein-protein interactions.,"PrePPI (http://bhapp.c2b2.columbia.edu/PrePPI) is a database that combines predicted and experimentally determined protein-protein interactions (PPIs) using a Bayesian framework. Predicted interactions are assigned probabilities of being correct, which are derived from calculated likelihood ratios (LRs) by combining structural, functional, evolutionary and expression information, with the most important contribution coming from structure. Experimentally determined interactions are compiled from a set of public databases that manually collect PPIs from the literature and are also assigned LRs. A final probability is then assigned to every interaction by combining the LRs for both predicted and experimentally determined interactions. The current version of PrePPI contains ∼2 million PPIs that have a probability more than ∼0.1 of which ∼60 000 PPIs for yeast and ∼370 000 PPIs for human are considered high confidence (probability > 0.5). The PrePPI database constitutes an integrated resource that enables users to examine aggregate information on PPIs, including both known and potentially novel interactions, and that provides structural models for many of the PPIs.",2012-11-27 +30775931,Exposure to Residential Greenness as a Predictor of Cause-Specific Mortality and Stroke Incidence in the Rome Longitudinal Study.,"

Background

Living in areas with higher levels of surrounding greenness and access to urban green areas have been associated with beneficial health outcomes. Some studies suggested a beneficial influence on mortality, but the evidence is still controversial.

Objectives

We used longitudinal data from a large cohort to estimate associations of two measures of residential greenness exposure with cause-specific mortality and stroke incidence.

Methods

We studied a population-based cohort of 1,263,721 residents in Rome aged [Formula: see text], followed from 2001 to 2013. As greenness exposure, we utilized the leaf area index (LAI), which expresses the tree canopy as the leaf area per unit ground surface area, and the normalized difference vegetation index (NDVI) within 300- and [Formula: see text] buffers around home addresses. We estimated the association between the two measures of residential greenness and the outcomes using Cox models, after controlling for relevant individual covariates and contextual characteristics, and explored potential mediation by air pollution [fine particulate matter with aerodynamic diameter [Formula: see text] [Formula: see text] and [Formula: see text]] and road traffic noise.

Results

We observed 198,704 deaths from nonaccidental causes, 81,269 from cardiovascular diseases [CVDs; 29,654 from ischemic heart disease (IHD)], 18,090 from cerebrovascular diseases, and 29,033 incident cases of stroke. Residential greenness, expressed as interquartile range (IQR) increase in LAI within [Formula: see text], was inversely associated with stroke incidence {hazard ratio (HR) 0.977 [95% confidence interval (CI): 0.961, 0.994]} and mortality for nonaccidental [HR 0.988 (95% CI: 0.981, 0.994)], cardiovascular [HR 0.984 (95% CI: 0.974, 0.994)] and cerebrovascular diseases [HR 0.964 (95% CI: 0.943, 0.985)]. Similar results were obtained using NDVI with 300- or [Formula: see text] buffers.

Conclusions

Living in greener areas was associated with better health outcomes in our study, which could be partly due to reduced exposure to environmental hazards. Further research is required to understand the underlying mechanisms. https://doi.org/10.1289/EHP2854.",2019-02-01 +29993817,EBWS: Essential Bioinformatics Web Services for Sequence Analyses. ,"The Essential Bioinformatics Web Services (EBWS) are implemented on a new PHP-based server that provides useful tools for analyses of DNA, RNA, and protein sequences applying a user-friendly interface. Nine Web-based applets are currently available on the Web server. They include reverse complementary DNA and random DNA/RNA/peptide oligomer generators, a pattern sequence searcher, a DNA restriction cutter, a prokaryotic ORF finder, a random DNA/RNA mutation generator. It also includes calculators of melting temperature (TM) of DNA/DNA, RNA/RNA, and DNA/RNA hybrids, a guide RNA (gRNA) generator for the CRISPR/Cas9 system and an annealing temperature calculator for multiplex PCR. The pattern-searching applet has no limitations in the number of motif inputs and applies a toolbox of Regex quantifiers that can be used for defining complex sequence queries of RNA, DNA, and protein sequences. The DNA enzyme digestion program utilizes a large database of 1502 restriction enzymes. The gRNA generator has a database of 25 bacterial genomes searchable for gRNA target sequences and has an option for searching in any genome sequence given by the user. All programs are permanently available online at http://penchovsky.atwebpages.com/applications.php without any restrictions.",2018-03-16 +30065887,Imputing missing distances in molecular phylogenetics.,"Missing data are frequently encountered in molecular phylogenetics, but there has been no accurate distance imputation method available for distance-based phylogenetic reconstruction. The general framework for distance imputation is to explore tree space and distance values to find an optimal combination of output tree and imputed distances. Here I develop a least-square method coupled with multivariate optimization to impute multiple missing distance in a distance matrix or from a set of aligned sequences with missing genes so that some sequences share no homologous sites (whose distances therefore need to be imputed). I show that phylogenetic trees can be inferred from distance matrices with about 10% of distances missing, and the accuracy of the resulting phylogenetic tree is almost as good as the tree from full information. The new method has the advantage over a recently published one in that it does not assume a molecular clock and is more accurate (comparable to maximum likelihood method based on simulated sequences). I have implemented the function in DAMBE software, which is freely available at http://dambe.bio.uottawa.ca.",2018-07-24 +30364075,How to incorporate patient and public perspectives into the design and conduct of research.,"International government guidance recommends patient and public involvement (PPI) to improve the relevance and quality of research.  PPI is defined as research being carried out 'with' or 'by' patients and members of the public rather than 'to', 'about' or 'for' them ( http://www.invo.org.uk/). Patient involvement is different from collecting data from patients as participants.  Ethical considerations also differ.  PPI is about patients actively contributing through discussion to decisions about research design, acceptability, relevance, conduct and governance from study conception to dissemination.  Occasionally patients lead or do research.  The research methods of PPI range from informal discussions to partnership research approaches such as action research, co-production and co-learning. This article discusses how researchers can involve patients when they are applying for research funding and considers some opportunities and pitfalls.  It reviews research funder requirements, draws on the literature and our collective experiences as clinicians, patients, academics and members of UK funding panels.",2018-06-18 +29915334,Highly accurate model for prediction of lung nodule malignancy with CT scans.,"Computed tomography (CT) examinations are commonly used to predict lung nodule malignancy in patients, which are shown to improve noninvasive early diagnosis of lung cancer. It remains challenging for computational approaches to achieve performance comparable to experienced radiologists. Here we present NoduleX, a systematic approach to predict lung nodule malignancy from CT data, based on deep learning convolutional neural networks (CNN). For training and validation, we analyze >1000 lung nodules in images from the LIDC/IDRI cohort. All nodules were identified and classified by four experienced thoracic radiologists who participated in the LIDC project. NoduleX achieves high accuracy for nodule malignancy classification, with an AUC of ~0.99. This is commensurate with the analysis of the dataset by experienced radiologists. Our approach, NoduleX, provides an effective framework for highly accurate nodule malignancy prediction with the model trained on a large patient population. Our results are replicable with software available at http://bioinformatics.astate.edu/NoduleX .",2018-06-18 +28581496,Differential analysis of RNA-seq incorporating quantification uncertainty.,"We describe sleuth (http://pachterlab.github.io/sleuth), a method for the differential analysis of gene expression data that utilizes bootstrapping in conjunction with response error linear modeling to decouple biological variance from inferential variance. sleuth is implemented in an interactive shiny app that utilizes kallisto quantifications and bootstraps for fast and accurate analysis of data from RNA-seq experiments.",2017-06-05 +22856649,A resource for benchmarking the usefulness of protein structure models.,"

Background

Increasingly, biologists and biochemists use computational tools to design experiments to probe the function of proteins and/or to engineer them for a variety of different purposes. The most effective strategies rely on the knowledge of the three-dimensional structure of the protein of interest. However it is often the case that an experimental structure is not available and that models of different quality are used instead. On the other hand, the relationship between the quality of a model and its appropriate use is not easy to derive in general, and so far it has been analyzed in detail only for specific application.

Results

This paper describes a database and related software tools that allow testing of a given structure based method on models of a protein representing different levels of accuracy. The comparison of the results of a computational experiment on the experimental structure and on a set of its decoy models will allow developers and users to assess which is the specific threshold of accuracy required to perform the task effectively.

Conclusions

The ModelDB server automatically builds decoy models of different accuracy for a given protein of known structure and provides a set of useful tools for their analysis. Pre-computed data for a non-redundant set of deposited protein structures are available for analysis and download in the ModelDB database. IMPLEMENTATION, AVAILABILITY AND REQUIREMENTS: Project name: A resource for benchmarking the usefulness of protein structure models. Project home page: http://bl210.caspur.it/MODEL-DB/MODEL-DB_web/MODindex.php.Operating system(s): Platform independent. Programming language: Perl-BioPerl (program); mySQL, Perl DBI and DBD modules (database); php, JavaScript, Jmol scripting (web server). Other requirements: Java Runtime Environment v1.4 or later, Perl, BioPerl, CPAN modules, HHsearch, Modeller, LGA, NCBI Blast package, DSSP, Speedfill (Surfnet) and PSAIA. License: Free. Any restrictions to use by non-academics: No.",2012-08-02 +28334224,Computational modeling of in vivo and in vitro protein-DNA interactions by multiple instance learning.,"

Motivation

The study of transcriptional regulation is still difficult yet fundamental in molecular biology research. While the development of both in vivo and in vitro profiling techniques have significantly enhanced our knowledge of transcription factor (TF)-DNA interactions, computational models of TF-DNA interactions are relatively simple and may not reveal sufficient biological insight. In particular, supervised learning based models for TF-DNA interactions attempt to map sequence-level features ( k -mers) to binding event but usually ignore the location of k -mers, which can cause data fragmentation and consequently inferior model performance.

Results

Here, we propose a novel algorithm based on the so-called multiple-instance learning (MIL) paradigm. MIL breaks each DNA sequence into multiple overlapping subsequences and models each subsequence separately, therefore implicitly takes into consideration binding site locations, resulting in both higher accuracy and better interpretability of the models. The result from both in vivo and in vitro TF-DNA interaction data show that our approach significantly outperform conventional single-instance learning based algorithms. Importantly, the models learned from in vitro data using our approach can predict in vivo binding with very good accuracy. In addition, the location information obtained by our method provides additional insight for motif finding results from ChIP-Seq data. Finally, our approach can be easily combined with other state-of-the-art TF-DNA interaction modeling methods.

Availability and implementation

http://www.cs.utsa.edu/∼jruan/MIL/.

Contact

jianhua.ruan@utsa.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +29806693,Neuronal calcineurin transcriptional targets parallel changes observed in Alzheimer disease brain.,"Synaptic dysfunction and loss are core pathological features in Alzheimer disease (AD). In the vicinity of amyloid-β plaques in animal models, synaptic toxicity occurs and is associated with chronic activation of the phosphatase calcineurin (CN). Indeed, pharmacological inhibition of CN blocks amyloid-β synaptotoxicity. We therefore hypothesized that CN-mediated transcriptional changes may contribute to AD neuropathology and tested this by examining the impact of CN over-expression on neuronal gene expression in vivo. We found dramatic transcriptional down-regulation, especially of synaptic mRNAs, in neurons chronically exposed to CN activation. Importantly, the transcriptional profile parallels the changes in human AD tissue. Bioinformatics analyses suggest that both nuclear factor of activated T cells and numerous microRNAs may all be impacted by CN, and parallel findings are observed in AD. These data and analyses support the hypothesis that at least part of the synaptic failure characterizing AD may result from aberrant CN activation leading to down-regulation of synaptic genes, potentially via activation of specific transcription factors and expression of repressive microRNAs.

Open practices

Open Science: This manuscript was awarded with the Open Materials Badge. For more information see: https://cos.io/our-services/open-science-badges/ Read the Editorial Highlight for this article on page 8.",2018-09-07 +26661513,ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization.,"Reactome is a manually curated pathway annotation database for unveiling high-order biological pathways from high-throughput data. ReactomePA is an R/Bioconductor package providing enrichment analyses, including hypergeometric test and gene set enrichment analyses. A functional analysis can be applied to the genomic coordination obtained from a sequencing experiment to analyze the functional significance of genomic loci including cis-regulatory elements and non-coding regions. Comparison among different experiments is also supported. Moreover, ReactomePA provides several visualization functions to produce highly customizable, publication-quality figures. The source code and documents of ReactomePA are freely available through Bioconductor (http://www.bioconductor.org/packages/ReactomePA).",2016-02-01 +22112530,LIPABASE: a database for 'true' lipase family enzymes.,"Lipase enzymes play an important role in lipid metabolism and are produced by a variety of species. Compared with animal, bacterial and fungal, little is known about plant lipases. Although lipases belong to many different protein families, they have the same architecture, the ?/?-hydrolase fold and a conserved active site signature, the Gly-Xaa-Ser-Xaa-Gly motif. Several studies on enzymatic activity and interfacial activation phenomenon of lipases confirm the presence of consensus sequence and a conserved domain. Lipases can be divided into two main groups: carboxylesterases (EC 3.1.1.1); 'true' lipases (EC 3.1.1.3), which differ in several biochemical features, which allow us to develop a database that regroups all 'true' lipase proprieties to establish relationship between structure and function. LIPABASE is a centralised resource database, which provides information about 'true' lipase from different species. It includes general, taxonomic, physicochemical and molecular data. Access to LIPABASE is free and available at http://www.lipabase-pfba-tun.org.",2011-01-01 +27278817,Mining clinical attributes of genomic variants through assisted literature curation in Egas. ,"The veritable deluge of biological data over recent years has led to the establishment of a considerable number of knowledge resources that compile curated information extracted from the literature and store it in structured form, facilitating its use and exploitation. In this article, we focus on the curation of inherited genetic variants and associated clinical attributes, such as zygosity, penetrance or inheritance mode, and describe the use of Egas for this task. Egas is a web-based platform for text-mining assisted literature curation that focuses on usability through modern design solutions and simple user interactions. Egas offers a flexible and customizable tool that allows defining the concept types and relations of interest for a given annotation task, as well as the ontologies used for normalizing each concept type. Further, annotations may be performed on raw documents or on the results of automated concept identification and relation extraction tools. Users can inspect, correct or remove automatic text-mining results, manually add new annotations, and export the results to standard formats. Egas is compatible with the most recent versions of Google Chrome, Mozilla Firefox, Internet Explorer and Safari and is available for use at https://demo.bmd-software.com/egas/Database URL: https://demo.bmd-software.com/egas/.",2016-06-07 +25838129,Systematic imaging reveals features and changing localization of mRNAs in Drosophila development. ,"mRNA localization is critical for eukaryotic cells and affects numerous transcripts, yet how cells regulate distribution of many mRNAs to their subcellular destinations is still unknown. We combined transcriptomics and systematic imaging to determine the tissue-specific expression and subcellular distribution of 5862 mRNAs during Drosophila oogenesis. mRNA localization is widespread in the ovary and detectable in all of its cell types-the somatic epithelial, the nurse cells, and the oocyte. Genes defined by a common RNA localization share distinct gene features and differ in expression level, 3'UTR length and sequence conservation from unlocalized mRNAs. Comparison of mRNA localizations in different contexts revealed that localization of individual mRNAs changes over time in the oocyte and between ovarian and embryonic cell types. This genome scale image-based resource (Dresden Ovary Table, DOT, http://tomancak-srv1.mpi-cbg.de/DOT/main.html) enables the transition from mechanistic dissection of singular mRNA localization events towards global understanding of how mRNAs transcribed in the nucleus distribute in cells.",2015-04-02 +27904603,Prevalence of nontuberculous mycobacteria isolated from environmental samples in Iran: A meta-analysis.,"

Background

While the most nontuberculous mycobacteria (NTMs) species are considered as opportunistic pathogens, some of them are related to several human infections. It is believed that environment is the main source for these infections. Distribution and scattering pattern of NTMs has not been well studied in Iran and a few studies about this subject have been done, so the aim of this study was to determine prevalence of NTMs in environmental samples from Iran.

Materials and methods

Data about prevalence of NTMs in environmental samples from Iran were obtained by searching databases. The studies presenting cross-sectional or cohort and the papers with sample size ≥30 were included. Then, the meta-analysis was performed using Comprehensive Meta-Analysis software and Cochran's Q and I2 tests. The strategy search was based PRISMA protocol is available online (PRISMA, http://www.prisma-statement.org).

Results

The results of this meta-analysis showed that overall combined prevalence of NTMs in environmental samples from Iran was 38.3%. The frequency of NTM was higher in the north of Iran (73.2%). The most prevalent rapid-growing mycobacterium was Mycobacterium fortuitum (19.8%), and the most dominant slow-growing mycobacterium was Mycobacterium flavescens (16.8%).

Conclusion

In regard to increasing incidence of disease in immunocompromised patients and existence of different types of mycobacteria species in environmental samples, efforts should be focused on measures that will specifically remove NTMs from habitats where susceptible individuals are exposed.",2016-08-01 +26504143,PDID: database of molecular-level putative protein-drug interactions in the structural human proteome.,"

Motivation

Many drugs interact with numerous proteins besides their intended therapeutic targets and a substantial portion of these interactions is yet to be elucidated. Protein-Drug Interaction Database (PDID) addresses incompleteness of these data by providing access to putative protein-drug interactions that cover the entire structural human proteome.

Results

PDID covers 9652 structures from 3746 proteins and houses 16 800 putative interactions generated from close to 1.1 million accurate, all-atom structure-based predictions for several dozens of popular drugs. The predictions were generated with three modern methods: ILbind, SMAP and eFindSite. They are accompanied by propensity scores that quantify likelihood of interactions and coordinates of the putative location of the binding drugs in the corresponding protein structures. PDID complements the current databases that focus on the curated interactions and the BioDrugScreen database that relies on docking to find putative interactions. Moreover, we also include experimentally curated interactions which are linked to their sources: DrugBank, BindingDB and Protein Data Bank. Our database can be used to facilitate studies related to polypharmacology of drugs including repurposing and explaining side effects of drugs.

Availability and implementation

PDID database is freely available at http://biomine.ece.ualberta.ca/PDID/.",2015-10-26 +30424888,"Dengue vaccine: WHO position paper, September 2018 - Recommendations.","This article presents the World Health Organization's (WHO) recommendations on the use of dengue vaccine excerpted from the WHO position paper on dengue vaccine - September 2018, published in the Weekly Epidemiological Record [1]. This position paper replaces the July 2016 WHO position paper concerning the first licensed dengue vaccine, CYD-TDV [2]. The position paper presents new evidence that became available in November 2017. A retrospective analysis of data from clinical trials, using a new serological assay classified trial participants according to their dengue serostatus prior to receipt of the first vaccine dose. The analysis revealed an excess risk of severe dengue in seronegative vaccine recipients compared to seronegative non-vaccinated individuals, while confirming long-term protection in seropositive individuals [3]. The paper provides revised guidance on dengue vaccination strategies from a population health perspective. Footnotes to this paper provide a number of core references including references to grading tables that assess the quality of the scientific evidence, and to the evidence-to-recommendation table. In accordance with its mandate to provide guidance to Member States on health policy matters, WHO issues a series of regularly updated position papers on vaccines and combinations of vaccines against diseases that have an international public health impact. These papers are concerned primarily with the use of vaccines in large-scale immunization programmes; they summarize essential background information on diseases and vaccines, and conclude with WHO's current position on the use of vaccines in the global context. Recommendations on the use of dengue vaccine CYD-TDV were discussed by SAGE in April 2018; evidence presented at the meeting can be accessed at: http://www.who.int/immunization/sage/meetings/2018/april/presentations_background_docs/en/.",2018-11-10 +30639647,Effectiveness and safety of rituximab for the treatment of refractory systemic sclerosis associated calcinosis: A case series and systematic review of the literature.,"

Objective

To analyze the effectiveness and safety of rituximab (RTX) for the treatment of refractory systemic sclerosis (SSc)-associated calcinosis.

Methods

We undertook an observational study of patients with this complication treated with 1 or more cycles of RTX (1 g × 2 weeks) and evaluated for at least 12 months after RTX treatment in a single center. The primary outcome measures of the study were the improvement of calcinosis symptoms (pain, signs of local inflammation, and new episodes of skin ulceration) and the radiologic evolution of the calcification(s).

Results

We treated 8 patients with refractory SSc-related calcinosis with RTX (off-label use). The main indications for RTX were complicated calcinosis unresponsive to previous therapies with concomitant arthritis in 2 patients and refractory arthritis or interstitial lung fibrosing disease in the remaining 6 patients. The mean number of RTX cycles administered was 3.12 ± 2.1 (range, 1-7), the median duration of RTX treatment was 9 months (interquartile range [IQR], 7.5-36 months), and the median follow-up after the first infusion of RTX dose was 19 months (IQR, http://catsalut.gencat.cat/web/.content/minisite/catsalut/proveidors_professionals/medicaments_farmacia/phf_mhda/informes_camse/esclerosi_sistemica/Dictamen-CAMS_-ES_-web.pdf (n.d.) 5-45 months). Four patients (50%) had a significant improvement in clinical symptoms (sustained improvement in the visual analog scale for pain of at least 50% and no new episodes of local inflammation or skin ulceration). Two of these patients (25%) also had a complete resolution or significant reduction in the size of the calcification(s) on X-ray, according with the radiographical scoring system for calcinosis developed by the Scleroderma Clinical Trials Consortium. In the remaining 4 patients (50%), RTX did not provide any significant clinical or radiologic benefit for calcinosis. The frequency of adverse effects was low, occurring in only 1 patient (12.5%), who developed upper respiratory tract infections not requiring hospitalization.

Conclusion

Our preliminary data suggest that RTX may be helpful as a rescue therapy in selected cases of severe and refractory SSc-related calcinosis.",2019-01-11 +30294271,Identification of Antioxidant Proteins With Deep Learning From Sequence Information.,"Antioxidant proteins have been found closely linked to disease control for its ability to eliminate excess free radicals. Because of its medicinal value, the study of identifying antioxidant proteins is on the upsurge. Many machine-learning classifiers have performed poorly owing to the nonlinear and unbalanced nature of biological data. Recently, deep learning techniques showed advantages over many state-of-the-art machine learning methods in various fields. In this study, a deep learning based classifier was proposed to identify antioxidant proteins based on mixed g-gap dipeptide composition feature vector. The classifier employed deep autoencoder to extract nonlinear representation from raw input. The t-Distributed Stochastic Neighbor Embedding (t-SNE) was used for dimensionality reduction. Support vector machine was finally performed for classification. The classifier achieved F 1 score of 0.8842 and MCC of 0.7409 in 10-fold cross validation. Experimental results show that our proposed method outperformed the traditional machine learning methods and could be a promising tool for antioxidant protein identification. For the convenience of others' scientific research, we have developed a user-friendly web server called IDAod for antioxidant protein identification, which can be accessed freely at http://bigroup.uestc.edu.cn/IDAod/.",2018-09-20 +27151197,PathwAX: a web server for network crosstalk based pathway annotation.,"Pathway annotation of gene lists is often used to functionally analyse biomolecular data such as gene expression in order to establish which processes are activated in a given experiment. Databases such as KEGG or GO represent collections of how genes are known to be organized in pathways, and the challenge is to compare a given gene list with the known pathways such that all true relations are identified. Most tools apply statistical measures to the gene overlap between the gene list and pathway. It is however problematic to avoid false negatives and false positives when only using the gene overlap. The pathwAX web server (http://pathwAX.sbc.su.se/) applies a different approach which is based on network crosstalk. It uses the comprehensive network FunCoup to analyse network crosstalk between a query gene list and KEGG pathways. PathwAX runs the BinoX algorithm, which employs Monte-Carlo sampling of randomized networks and estimates a binomial distribution, for estimating the statistical significance of the crosstalk. This results in substantially higher accuracy than gene overlap methods. The system was optimized for speed and allows interactive web usage. We illustrate the usage and output of pathwAX.",2016-05-05 +30134047,Predictive Models for Kinetic Parameters of Cycloaddition Reactions.,"This paper reports SVR (Support Vector Regression) and GTM (Generative Topographic Mapping) modeling of three kinetic properties of cycloaddition reactions: rate constant (logk), activation energy (Ea) and pre-exponential factor (logA). A data set of 1849 reactions, comprising (4+2), (3+2) and (2+2) cycloadditions (CA) were studied in different solvents and at different temperatures. The reactions were encoded by the ISIDA fragment descriptors generated for Condensed Graph of Reaction (CGR). For a given reaction, a CGR condenses structures of all the reactants and products into one single molecular graph, described both by conventional chemical bonds and ""dynamical"" bonds characterizing chemical transformations. Different scenarios of logk assessment were exploited: direct modeling, application of the Arrhenius equation and temperature-scaled GTM landscapes. The logk models with optimal cross-validated statistics (Q2 =0.78-0.94 RMSE=0.45-0.86) have been challenged to predict rates for the external test set of 200 reactions, comprising both reactions that were not present in the training set, and training set transformations performed under different reaction conditions. The models are freely available on our web-server: http://cimm.kpfu.ru/models.",2018-08-22 +,Brazilian Network of Food Data Systems and LATINFOODS Regional Technical Compilation Committee: Food composition activities (2006–2009),"The Brazilian Network of Food Data Systems (BRASILFOODS) has been keeping the Brazilian Food Composition Database-USP (TBCA-USP) (http://www.fcf.usp.br/tabela) since 1998. Besides the constant compilation, analysis and update work in the database, the network tries to innovate through the introduction of food information that may contribute to decrease the risk for non-transmissible chronic diseases, such as the profile of carbohydrates and flavonoids in foods. In 2008, data on carbohydrates, individually analyzed, of 112 foods, and 41 data related to the glycemic response produced by foods widely consumed in the country were included in the TBCA-USP. Data (773) about the different flavonoid subclasses of 197 Brazilian foods were compiled and the quality of each data was evaluated according to the USDAs data quality evaluation system. In 2007, BRASILFOODS/USP and INFOODS/FAO organized the 7th International Food Data Conference “Food Composition and Biodiversity”. This conference was a unique opportunity for interaction between renowned researchers and participants from several countries and it allowed the discussion of aspects that may improve the food composition area. During the period, the LATINFOODS Regional Technical Compilation Committee and BRASILFOODS disseminated to Latin America the Form and Manual for Data Compilation, version 2009, ministered a Food Composition Data Compilation course and developed many activities related to data production and compilation.",2011-01-01 +29531263,"IMPPAT: A curated database of Indian Medicinal Plants, Phytochemistry And Therapeutics.","Phytochemicals of medicinal plants encompass a diverse chemical space for drug discovery. India is rich with a flora of indigenous medicinal plants that have been used for centuries in traditional Indian medicine to treat human maladies. A comprehensive online database on the phytochemistry of Indian medicinal plants will enable computational approaches towards natural product based drug discovery. In this direction, we present, IMPPAT, a manually curated database of 1742 Indian Medicinal Plants, 9596 Phytochemicals, And 1124 Therapeutic uses spanning 27074 plant-phytochemical associations and 11514 plant-therapeutic associations. Notably, the curation effort led to a non-redundant in silico library of 9596 phytochemicals with standard chemical identifiers and structure information. Using cheminformatic approaches, we have computed the physicochemical, ADMET (absorption, distribution, metabolism, excretion, toxicity) and drug-likeliness properties of the IMPPAT phytochemicals. We show that the stereochemical complexity and shape complexity of IMPPAT phytochemicals differ from libraries of commercial compounds or diversity-oriented synthesis compounds while being similar to other libraries of natural products. Within IMPPAT, we have filtered a subset of 960 potential druggable phytochemicals, of which majority have no significant similarity to existing FDA approved drugs, and thus, rendering them as good candidates for prospective drugs. IMPPAT database is openly accessible at: https://cb.imsc.res.in/imppat .",2018-03-12 +27270714,ANItools web: a web tool for fast genome comparison within multiple bacterial strains. ,"Early classification of prokaryotes was based solely on phenotypic similarities, but modern prokaryote characterization has been strongly influenced by advances in genetic methods. With the fast development of the sequencing technology, the ever increasing number of genomic sequences per species offers the possibility for developing distance determinations based on whole-genome information. The average nucleotide identity (ANI), calculated from pair-wise comparisons of all sequences shared between two given strains, has been proposed as the new metrics for bacterial species definition and classification. In this study, we developed the web version of ANItools (http://ani.mypathogen.cn/), which helps users directly get ANI values from online sources. A database covering ANI values of any two strains in a genus was also included (2773 strains, 1487 species and 668 genera). Importantly, ANItools web can automatically run genome comparison between the input genomic sequence and data sequences (Genus and Species levels), and generate a graphical report for ANI calculation results. ANItools web is useful for defining the relationship between bacterial strains, further contributing to the classification and identification of bacterial species using genome data.Database URL: http://ani.mypathogen.cn/.",2016-06-05 +29968796,ChemDIS-Mixture: an online tool for analyzing potential interaction effects of chemical mixtures.,"The assessment of bioactivity and toxicity for mixtures remains a challenging work. Although several computational models have been developed to accelerate the evaluation of chemical-chemical interaction, a specific biological endpoint should be defined before applying the models that usually relies on clinical and experimental data. The development of computational methods is desirable for identifying potential biological endpoints of mixture interactions. To facilitate the identification of potential effects of mixture interactions, a novel online system named ChemDIS-Mixture is proposed to analyze the shared target proteins, and common enriched functions, pathways, and diseases affected by multiple chemicals. Venn diagram tools have been implemented for easy analysis and visualization of interaction targets and effects. Case studies have been provided to demonstrate the capability of ChemDIS-Mixture for identifying potential effects of mixture interactions in clinical studies. ChemDIS-Mixture provides useful functions for the identification of potential effects of coexposure to multiple chemicals. ChemDIS-Mixture is freely accessible at http://cwtung.kmu.edu.tw/chemdis/mixture .",2018-07-03 +28893774,External Evaluation of Two Fluconazole Infant Population Pharmacokinetic Models.,"Fluconazole is an antifungal agent used for the treatment of invasive candidiasis, a leading cause of morbidity and mortality in premature infants. Population pharmacokinetic (PK) models of fluconazole in infants have been previously published by Wade et al. (Antimicrob Agents Chemother 52:4043-4049, 2008, https://doi.org/10.1128/AAC.00569-08) and Momper et al. (Antimicrob Agents Chemother 60:5539-5545, 2016, https://doi.org/10.1128/AAC.00963-16). Here we report the results of the first external evaluation of the predictive performance of both models. We used patient-level data from both studies to externally evaluate both PK models. The predictive performance of each model was evaluated using the model prediction error (PE), mean prediction error (MPE), mean absolute prediction error (MAPE), prediction-corrected visual predictive check (pcVPC), and normalized prediction distribution errors (NPDE). The values of the parameters of each model were reestimated using both the external and merged data sets. When evaluated with the external data set, the model proposed by Wade et al. showed lower median PE, MPE, and MAPE (0.429 μg/ml, 41.9%, and 57.6%, respectively) than the model proposed by Momper et al. (2.45 μg/ml, 188%, and 195%, respectively). The values of the majority of reestimated parameters were within 20% of their respective original parameter values for all model evaluations. Our analysis determined that though both models are robust, the model proposed by Wade et al. had greater accuracy and precision than the model proposed by Momper et al., likely because it was derived from a patient population with a wider age range. This study highlights the importance of the external evaluation of infant population PK models.",2017-11-22 +26553799,DASHR: database of small human noncoding RNAs.,"Small non-coding RNAs (sncRNAs) are highly abundant RNAs, typically <100 nucleotides long, that act as key regulators of diverse cellular processes. Although thousands of sncRNA genes are known to exist in the human genome, no single database provides searchable, unified annotation, and expression information for full sncRNA transcripts and mature RNA products derived from these larger RNAs. Here, we present the Database of small human noncoding RNAs (DASHR). DASHR contains the most comprehensive information to date on human sncRNA genes and mature sncRNA products. DASHR provides a simple user interface for researchers to view sequence and secondary structure, compare expression levels, and evidence of specific processing across all sncRNA genes and mature sncRNA products in various human tissues. DASHR annotation and expression data covers all major classes of sncRNAs including microRNAs (miRNAs), Piwi-interacting (piRNAs), small nuclear, nucleolar, cytoplasmic (sn-, sno-, scRNAs, respectively), transfer (tRNAs), and ribosomal RNAs (rRNAs). Currently, DASHR (v1.0) integrates 187 smRNA high-throughput sequencing (smRNA-seq) datasets with over 2.5 billion reads and annotation data from multiple public sources. DASHR contains annotations for ∼ 48,000 human sncRNA genes and mature sncRNA products, 82% of which are expressed in one or more of the curated tissues. DASHR is available at http://lisanwanglab.org/DASHR.",2015-11-08 +30526975,Clinical Issues in Severe Asthma: Debates and Discussions About Personalizing Patient Management.,"An outsized proportion of asthma-related morbidity and mortality is borne by the 5% to 15% of affected patients who have severe forms of the disease. These patients experience poorly controlled symptoms and frequent exacerbations despite daily treatment with high-dose inhaled corticosteroids and other long-acting controller medications. Ongoing research has elucidated key pathophysiologic processes and other clinical parameters related to asthma severity and persistence. In many cases, the patient's medical history, clinical presentation, and results from biomarker testing can help classify severe asthma phenotypically. Increasingly, this approach can allow health-care providers to personalize maintenance regimens using targeted therapies for the identified endotypes; that is, asthma phenotypes linked to specific underlying disease mechanisms and proinflammatory signaling cascades. Several biologic medications are now available to treat certain cohorts with severe asthma, and a number of other targeted agents are in late-stage development. Pulmonologists and asthma specialists who manage patients with severe asthma need to stay current on the latest published trial data for newer targeted therapies, approvals from the US Food and Drug Administration, and actionable best-practice recommendations on evaluating and treating patients with severe asthma. During this web-based Clinical Issues program (available online at https://courses.elseviercme.com/asthma18/761e), a panel of expert faculty discuss a series of topics related to the pathophysiology and heterogeneity of severe asthma, including the following: characterizing severe asthma phenotypes and endotypes; identification of patients with severe asthma; and the role of biomarkers in asthma phenotyping. The faculty also highlight the identification and management of comorbid conditions commonly associated with asthma. An overview of new and emerging biologic therapies for severe asthma is provided, followed by a detailed discussion on personalizing treatment for patients with severe asthma.",2018-12-01 +30561225,"Best Practices for QSAR Model Reporting: Physical and Chemical Properties, Ecotoxicity, Environmental Fate, Human Health, and Toxicokinetics Endpoints.","

Background

Quantitative and qualitative structure–activity relationships (QSARs) have been used to understand chemical behavior for almost a century. The main source of QSAR models is the scientific literature, but the open question is how well these models are documented.

Objectives

The main aim of this study was to critically analyze the publication practices of QSARs with regard to transparency, potential reproducibility, and independent verification. The focus was on the level of technical completeness of the published QSARs.

Methods

A total of 1,533 QSAR articles reporting 79 individual endpoints, mostly in environmental and health science, were reviewed. The QSAR parameters required for technical completeness were grouped into five categories: chemical structures, experimental endpoint values, descriptor values, mathematical representation of the model, and predicted endpoint values. The data were summarized and discussed using Circos plots.

Results

Altogether, 42.5% of the reviewed articles were found to be potentially reproducible. The potential reproducibility for different endpoint groups varied; the respective rates were 39% for physical and chemical properties, 52% for ecotoxicity, 56% for environmental fate, 30% for human health, and 32% for toxicokinetics. The reproducibility of QSARs is discussed and placed in the context of the reproducibility of the experimental methods. Included are 65 references to open QSAR datasets as examples of models restored from scientific articles.

Discussion

Strikingly poor documentation of QSARs was observed, which reduces the transparency, availability, and consequently, the application of research results in scientific, industrial, and regulatory areas. A list of the components needed to ensure the best practices for QSAR reporting is provided, allowing long-term use and preservation of the models. This list also allows an assessment of the reproducibility of models by interested parties such as journal editors, reviewers, regulators, evaluators, and potential users. https://doi.org/10.1289/EHP3264.",2018-12-01 +30407660,Management of neurogenic bladder in patients with Parkinson's disease: A systematic review.,"

Aims

To assess the different treatment methods in management of neurogenic bladder (NGB) in patients with Parkinson's disease (PD).

Methods

A systematic search was performed in Cochrane library, EMBASE, Proquest, Clinicaltrial.gov, WHO, Google Scholar, MEDLINE via PubMed, Ovid, ongoing trials registers, and conference proceedings in November 11, 2017. All randomized controlled trials (RCTs) or quasi-RCTs comparing any treatment method for management of NGB in patients with PD were included. The titles and abstracts of all identified studies were evaluated independently by two investigators. Once all of the potential related articles were retrieved, each author separately evaluated the full text of each article and the quality of the methodology of the selected studies using the Cochrane appraisal risk of bias checklist and then the data about the patient's outcomes was extracted. We registered the title in Joanna Briggs Institute (JBI) that is available in http://joannabriggs.org/research/registered_titles.aspx.

Results

We included 41 RCTs or quasi-RCTs or three observational study with a total of 1063 patients that evaluated pharmacological, neurosurgical, botulinum toxin, electrical neuromodulation, and behavioral therapy effects on NGB. Among the included studies only solifenacin succinate double-blind, randomized, placebo-controlled study was assessed as low risk of bias, and treatment led to an improvement in urinary incontinence.

Conclusions

Although several interventions are available for treatment NGB in patients with PD, at present there is little or no evidence that treatment improves patient outcomes in this population. Additional large, well designed, randomized studies with improved methodology and reporting focused on patient-centered outcomes are needed.",2018-11-08 +28341700,asymptoticMK: A Web-Based Tool for the Asymptotic McDonald-Kreitman Test.,"The McDonald-Kreitman (MK) test is a widely used method for quantifying the role of positive selection in molecular evolution. One key shortcoming of this test lies in its sensitivity to the presence of slightly deleterious mutations, which can severely bias its estimates. An asymptotic version of the MK test was recently introduced that addresses this problem by evaluating polymorphism levels for different mutation frequencies separately, and then extrapolating a function fitted to that data. Here, we present asymptoticMK, a web-based implementation of this asymptotic MK test. Our web service provides a simple R-based interface into which the user can upload the required data (polymorphism and divergence data for the genomic test region and a neutrally evolving reference region). The web service then analyzes the data and provides plots of the test results. This service is free to use, open-source, and available at http://benhaller.com/messerlab/asymptoticMK.html We provide results from simulations to illustrate the performance and robustness of the asymptoticMK test under a wide range of model parameters.",2017-05-05 +29860391,CASTp 3.0: computed atlas of surface topography of proteins.,"Geometric and topological properties of protein structures, including surface pockets, interior cavities and cross channels, are of fundamental importance for proteins to carry out their functions. Computed Atlas of Surface Topography of proteins (CASTp) is a web server that provides online services for locating, delineating and measuring these geometric and topological properties of protein structures. It has been widely used since its inception in 2003. In this article, we present the latest version of the web server, CASTp 3.0. CASTp 3.0 continues to provide reliable and comprehensive identifications and quantifications of protein topography. In addition, it now provides: (i) imprints of the negative volumes of pockets, cavities and channels, (ii) topographic features of biological assemblies in the Protein Data Bank, (iii) improved visualization of protein structures and pockets, and (iv) more intuitive structural and annotated information, including information of secondary structure, functional sites, variant sites and other annotations of protein residues. The CASTp 3.0 web server is freely accessible at http://sts.bioe.uic.edu/castp/.",2018-07-01 +26454280,SUPER-FOCUS: a tool for agile functional analysis of shotgun metagenomic data.,"

Summary

Analyzing the functional profile of a microbial community from unannotated shotgun sequencing reads is one of the important goals in metagenomics. Functional profiling has valuable applications in biological research because it identifies the abundances of the functional genes of the organisms present in the original sample, answering the question what they can do. Currently, available tools do not scale well with increasing data volumes, which is important because both the number and lengths of the reads produced by sequencing platforms keep increasing. Here, we introduce SUPER-FOCUS, SUbsystems Profile by databasE Reduction using FOCUS, an agile homology-based approach using a reduced reference database to report the subsystems present in metagenomic datasets and profile their abundances. SUPER-FOCUS was tested with over 70 real metagenomes, the results showing that it accurately predicts the subsystems present in the profiled microbial communities, and is up to 1000 times faster than other tools.

Availability and implementation

SUPER-FOCUS was implemented in Python, and its source code and the tool website are freely available at https://edwards.sdsu.edu/SUPERFOCUS.

Contact

redwards@mail.sdsu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-09 +27895719,DRABAL: novel method to mine large high-throughput screening assays using Bayesian active learning.,"

Background

Mining high-throughput screening (HTS) assays is key for enhancing decisions in the area of drug repositioning and drug discovery. However, many challenges are encountered in the process of developing suitable and accurate methods for extracting useful information from these assays. Virtual screening and a wide variety of databases, methods and solutions proposed to-date, did not completely overcome these challenges. This study is based on a multi-label classification (MLC) technique for modeling correlations between several HTS assays, meaning that a single prediction represents a subset of assigned correlated labels instead of one label. Thus, the devised method provides an increased probability for more accurate predictions of compounds that were not tested in particular assays.

Results

Here we present DRABAL, a novel MLC solution that incorporates structure learning of a Bayesian network as a step to model dependency between the HTS assays. In this study, DRABAL was used to process more than 1.4 million interactions of over 400,000 compounds and analyze the existing relationships between five large HTS assays from the PubChem BioAssay Database. Compared to different MLC methods, DRABAL significantly improves the F1Score by about 22%, on average. We further illustrated usefulness and utility of DRABAL through screening FDA approved drugs and reported ones that have a high probability to interact with several targets, thus enabling drug-multi-target repositioning. Specifically DRABAL suggests the Thiabendazole drug as a common activator of the NCP1 and Rab-9A proteins, both of which are designed to identify treatment modalities for the Niemann-Pick type C disease.

Conclusion

We developed a novel MLC solution based on a Bayesian active learning framework to overcome the challenge of lacking fully labeled training data and exploit actual dependencies between the HTS assays. The solution is motivated by the need to model dependencies between existing experimental confirmatory HTS assays and improve prediction performance. We have pursued extensive experiments over several HTS assays and have shown the advantages of DRABAL. The datasets and programs can be downloaded from https://figshare.com/articles/DRABAL/3309562.Graphical abstract.",2016-11-10 +28486666,CSTEA: a webserver for the Cell State Transition Expression Atlas.,"Cell state transition is one of the fundamental events in the development of multicellular organisms, and the transition trajectory path has recently attracted much attention. With the accumulation of large amounts of ""-omics"" data, it is becoming possible to get insights into the molecule mechanisms underlying the transitions between cell states. Here, we present CSTEA (Cell State Transition Expression Atlas), a webserver that organizes, analyzes and visualizes the time-course gene expression data during cell differentiation, cellular reprogramming and trans-differentiation in human and mouse. In particular, CSTEA defines gene signatures for uncharacterized stages during cell state transitions, thereby enabling both experimental and computational biologists to better understand the mechanisms of cell fate determination in mammals. To our best knowledge, CSTEA is the first webserver dedicated to the analysis of time-series gene expression data during cell state transitions. CSTEA is freely available at http://comp-sysbio.org/cstea/.",2017-07-01 +27379044,From DNA to FBA: How to Build Your Own Genome-Scale Metabolic Model.,"Microbiological studies are increasingly relying on in silico methods to perform exploration and rapid analysis of genomic data, and functional genomics studies are supplemented by the new perspectives that genome-scale metabolic models offer. A mathematical model consisting of a microbe's entire metabolic map can be rapidly determined from whole-genome sequencing and annotating the genomic material encoded in its DNA. Flux-balance analysis (FBA), a linear programming technique that uses metabolic models to predict the phenotypic responses imposed by environmental elements and factors, is the leading method to simulate and manipulate cellular growth in silico. However, the process of creating an accurate model to use in FBA consists of a series of steps involving a multitude of connections between bioinformatics databases, enzyme resources, and metabolic pathways. We present the methodology and procedure to obtain a metabolic model using PyFBA, an extensible Python-based open-source software package aimed to provide a platform where functional annotations are used to build metabolic models (http://linsalrob.github.io/PyFBA). Backed by the Model SEED biochemistry database, PyFBA contains methods to reconstruct a microbe's metabolic map, run FBA upon different media conditions, and gap-fill its metabolism. The extensibility of PyFBA facilitates novel techniques in creating accurate genome-scale metabolic models.",2016-06-17 +25921073,FunRich: An open access standalone functional enrichment and interaction network analysis tool.,"As high-throughput techniques including proteomics become more accessible to individual laboratories, there is an urgent need for a user-friendly bioinformatics analysis system. Here, we describe FunRich, an open access, standalone functional enrichment and network analysis tool. FunRich is designed to be used by biologists with minimal or no support from computational and database experts. Using FunRich, users can perform functional enrichment analysis on background databases that are integrated from heterogeneous genomic and proteomic resources (>1.5 million annotations). Besides default human specific FunRich database, users can download data from the UniProt database, which currently supports 20 different taxonomies against which enrichment analysis can be performed. Moreover, the users can build their own custom databases and perform the enrichment analysis irrespective of organism. In addition to proteomics datasets, the custom database allows for the tool to be used for genomics, lipidomics and metabolomics datasets. Thus, FunRich allows for complete database customization and thereby permits for the tool to be exploited as a skeleton for enrichment analysis irrespective of the data type or organism used. FunRich (http://www.funrich.org) is user-friendly and provides graphical representation (Venn, pie charts, bar graphs, column, heatmap and doughnuts) of the data with customizable font, scale and color (publication quality).",2015-06-17 +26505644,Analysis of the ToxCast Chemical-Assay Space Using the Comparative Toxicogenomics Database.,"Many studies have attempted to predict in vivo hazards based on the ToxCast in vitro assay results with the goal of using these predictions to prioritize compounds for conventional toxicity testing. Most of these conventional studies rely on in vivo end points observed using preclinical species (e.g., mice and rats). Although the preclinical animal studies provide valuable insights, there can often be significant disconnects between these studies and safety concerns in humans. One way to address these concerns, for an admittedly more limited set of compounds, is to explore relationships between the in vitro data from human cell lines and observations from human related studies. The Comparative Toxicogenomics Database (CTD; http://ctdbase.org ) is a rich source of data linking chemicals to human diseases/adverse events and pathways. In this study we explored the relationships between ToxCast chemicals, their ToxCast in vitro test results, and their annotations of human disease/adverse event end points as captured in the CTD database. We mined these associations to identify potentially interesting, statistically significant in vitro assay and in vivo toxicity correlations. To the best of our knowledge, this is one of the first studies analyzing the relationships between the ToxCast in vitro assays results and the CTD disease/adverse event end point annotations. The in vitro profiles identified in this analysis may prove useful for prioritizing compounds for toxicity testing, suggesting mechanisms of toxicity, and forecasting potential in vivo human drug induced injury.",2015-11-04 +30433778,SimNano: A Trust Region Strategy for Large-Scale Molecular Systems Energy Minimization Based on Exact Second-Order Derivative Information.,"In this work, a new energy minimization strategy is presented that achieves better convergence properties than the standard algorithms employed in the field (fewer steps and usually a lower minimum) and is also computationally efficient; therefore, it becomes suitable for dealing with large-scale molecular systems. The proposed strategy is integrated into the SimNano energy minimization platform that is also described herein. SimNano relies on the analytical calculation of the molecular systems' gradient vectors and Hessian matrices using the computational modeling framework proposed by the authors ( Chatzieleftheriou , S. ; Adendorff , M. R. ; Lagaros , N. D. Generalized Potential Energy Finite Elements for Modeling Molecular Nanostructures . J. Chem. Inf. Model. 2016 , 56 ( 10 ), 1963 - 1978 ). The basis of the proposed minimization strategy is a trust region algorithm based on exact second-order derivative information. Taking advantage of the Hessian matrices' sparsity, a specialized treatment of the data structure is implemented. The latter is beneficial and often rather necessary, especially in the case of large-scale molecular systems, improving the speed and reducing the memory requirements. In order to demonstrate the efficiency of the proposed energy minimization strategy, several test examples are examined, and the results achieved are compared with those obtained by one of the most popular molecular simulation software packages, i.e., the Large-Scale Atomic/Molecular Massively Parallel Simulator (LAMMPS). The results indicate that the proposed minimization strategy exhibits superior convergence properties compared with the typical algorithms (i.e., nonlinear conjugate gradient algorithm, limited-memory Broyden-Fletcher-Goldfarb-Shanno (LBFGS) algorithm, etc.). The SimNano energy minimization platform can be downloaded from the site http://users.ntua.gr/nlagaros/simnano.html , enabling researchers in the field to build molecular systems and perform energy minimization runs using input files in LAMMPS format.",2018-11-29 +28975141,Integrated spectral photocurrent density and reproducibility analyses of excitonic ZnO/NiO heterojunction.,"In this data article, the excitonic ZnO/NiO heterojunction device (Patel et al., 2017) [1] was measured for the integrated photocurrent density and reproducibility. Photograph of the prepared devices of ZnO/NiO on the FTO/glass is presented. Integrated photocurrent density as a function of photon energy from the sunlight is presented. Quantum efficiency measurement system (McScienceK3100, Korea) compliance with International Measurement System was employed to measure ZnO/NIO devices. These data are shown for the 300-440 nm of segment of the sunlight (AM1.5G, http://rredc.nrel.gov/solar/spectra/am1.5/). Reproducibility measure of ZnO/NiO device was presented for nine devices with the estimated device performance parameters including the open circuit voltage, short circuit current density, fill factor and power conversion efficiency.",2017-09-12 +30081733,MiRNA-BD: an evidence-based bioinformatics model and software tool for microRNA biomarker discovery.,"MicroRNAs (miRNAs) are small non-coding RNAs with the potential as biomarkers for disease diagnosis, prognosis and therapy. In the era of big data and biomedical informatics, computer-aided biomarker discovery has become the current frontier. However, most of the computational models are highly dependent on specific prior knowledge and training-testing procedures, very few are mechanism-guided or evidence-based. To the best of our knowledge, untill now no general rules have been uncovered and applied to miRNA biomarker screening. In this study, we manually collected literature-reported cancer miRNA biomarkers and analyzed their regulatory patterns, including the regulatory modes, biological functions and evolutionary characteristics of their targets in the human miRNA-mRNA network. Two evidences were statistically detected and used to distinguish biomarker miRNAs from others. Based on these observations, we developed a novel bioinformatics model and software tool for miRNA biomarker discovery ( http://sysbio.suda.edu.cn/MiRNA-BD/ ). In contrast to routine methods that focus on miRNA synergic functions, our method searches for vulnerable sites in the miRNA-mRNA network and considers the independent regulatory power of miRNAs, i.e., single-line regulations between miRNAs and mRNAs. The performance comparison demonstrates the generality and precision of our model, which identifies miRNA biomarkers for cancers as well as other complex diseases without training or specific prior knowledge.",2018-09-17 +27867326,Towards a Consistent and Scientifically Accurate Drug Ontology.,"Our use case for comparative effectiveness research requires an ontology of drugs that enables querying National Drug Codes (NDCs) by active ingredient, mechanism of action, physiological effect, and therapeutic class of the drug products they represent. We conducted an ontological analysis of drugs from the realist perspective, and evaluated existing drug terminology, ontology, and database artifacts from (1) the technical perspective, (2) the perspective of pharmacology and medical science (3) the perspective of description logic semantics (if they were available in Web Ontology Language or OWL), and (4) the perspective of our realism-based analysis of the domain. No existing resource was sufficient. Therefore, we built the Drug Ontology (DrOn) in OWL, which we populated with NDCs and other classes from RxNorm using only content created by the National Library of Medicine. We also built an application that uses DrOn to query for NDCs as outlined above, available at: http://ingarden.uams.edu/ingredients. The application uses an OWL-based description logic reasoner to execute end-user queries. DrOn is available at http://code.google.com/p/dr-on.",2013-01-01 +23735126,An optimized algorithm for detecting and annotating regional differential methylation.,"

Background

DNA methylation profiling reveals important differentially methylated regions (DMRs) of the genome that are altered during development or that are perturbed by disease. To date, few programs exist for regional analysis of enriched or whole-genome bisulfate conversion sequencing data, even though such data are increasingly common. Here, we describe an open-source, optimized method for determining empirically based DMRs (eDMR) from high-throughput sequence data that is applicable to enriched whole-genome methylation profiling datasets, as well as other globally enriched epigenetic modification data.

Results

Here we show that our bimodal distribution model and weighted cost function for optimized regional methylation analysis provides accurate boundaries of regions harboring significant epigenetic modifications. Our algorithm takes the spatial distribution of CpGs into account for the enrichment assay, allowing for optimization of the definition of empirical regions for differential methylation. Combined with the dependent adjustment for regional p-value combination and DMR annotation, we provide a method that may be applied to a variety of datasets for rapid DMR analysis. Our method classifies both the directionality of DMRs and their genome-wide distribution, and we have observed that shows clinical relevance through correct stratification of two Acute Myeloid Leukemia (AML) tumor sub-types.

Conclusions

Our weighted optimization algorithm eDMR for calling DMRs extends an established DMR R pipeline (methylKit) and provides a needed resource in epigenomics. Our method enables an accurate and scalable way of finding DMRs in high-throughput methylation sequencing experiments. eDMR is available for download at http://code.google.com/p/edmr/.",2013-04-10 +28815771,"Molstack-Interactive visualization tool for presentation, interpretation, and validation of macromolecules and electron density maps.","Our understanding of the world of biomolecular structures is based upon the interpretation of macromolecular models, of which ∼90% are an interpretation of electron density maps. This structural information guides scientific progress and exploration in many biomedical disciplines. The Protein Data Bank's web portals have made these structures available for mass scientific consumption and greatly broaden the scope of information presented in scientific publications. The portals provide numerous quality metrics; however, the portion of the structure that is most vital for interpretation of the function may have the most difficult to interpret electron density and this ambiguity is not reflected by any single metric. The possible consequences of basing research on suboptimal models make it imperative to inspect the agreement of a model with its experimental evidence. Molstack, a web-based interactive publishing platform for structural data, allows users to present density maps and structural models by displaying a collection of maps and models, including different interpretation of one's own data, re-refinements, and corrections of existing structures. Molstack organizes the sharing and dissemination of these structural models along with their experimental evidence as an interactive session. Molstack was designed with three groups of users in mind; researchers can present the evidence of their interpretation, reviewers and readers can independently judge the experimental evidence of the authors' conclusions, and other researchers can present or even publish their new hypotheses in the context of prior results. The server is available at http://molstack.bioreproducibility.org.",2017-09-13 +27153630,TOPDOM: database of conservatively located domains and motifs in proteins.,"

Unlabelled

The TOPDOM database-originally created as a collection of domains and motifs located consistently on the same side of the membranes in α-helical transmembrane proteins-has been updated and extended by taking into consideration consistently localized domains and motifs in globular proteins, too. By taking advantage of the recently developed CCTOP algorithm to determine the type of a protein and predict topology in case of transmembrane proteins, and by applying a thorough search for domains and motifs as well as utilizing the most up-to-date version of all source databases, we managed to reach a 6-fold increase in the size of the whole database and a 2-fold increase in the number of transmembrane proteins.

Availability and implementation

TOPDOM database is available at http://topdom.enzim.hu The webpage utilizes the common Apache, PHP5 and MySQL software to provide the user interface for accessing and searching the database. The database itself is generated on a high performance computer.

Contact

tusnady.gabor@ttk.mta.hu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-12 +30227169,Amino acid profile in women with gestational diabetes mellitus treated with metformin or insulin.,"

Aims

We compared the effects of metformin and insulin treatments of gestational diabetes mellitus (GDM) on amino acid metabolism.

Methods

217 pregnant women diagnosed with GDM were randomized to receive either metformin or insulin. 1H nuclear magnetic spectroscopy was used to determine serum concentrations of alanine, glutamine, glycine, isoleucine, leucine, valine, histidine, phenylalanine, tyrosine, glucose and lactate at the time of diagnosis and at 36 gestational weeks (gw).

Results

Majority of the amino acid concentrations increased from 30 to 36 gw. The rise in alanine (16% vs. 8%, p < 0.0001), isoleucine (11% vs. 5%, p = 0.035) and lactate (29% vs. 14% p = 0.015) was larger in the metformin group compared to insulin group. Baseline alanine, glycine, isoleucine, leucine, valine and tyrosine were positively related to slightly earlier delivery. Alanine at 36 gw was positively associated with birth weight and glutamine with gestational hypertension or preeclampsia. Lactate at 36 gw was not associated with any adverse outcome.

Conclusions

Compared to insulin metformin caused a greater increase in serum alanine, isoleucine and lactate concentrations. Although the observed differences in the metabolic variables were relatively small and not outright concerning, additional studies and follow-up data are required to ensure the safety of metformin use in pregnancy. The trial was registered in Clinicaltrials.gov, NCT01240785; http://clinicaltrials.gov/ct2/show/NCT01240785.",2018-09-15 +29745858,KF-finder: identification of key factors from host-microbial networks in cervical cancer.,"BACKGROUND:The human body is colonized by a vast number of microbes. Microbiota can benefit many normal life processes, but can also cause many diseases by interfering the regular metabolism and immune system. Recent studies have demonstrated that the microbial community is closely associated with various types of cell carcinoma. The search for key factors, which also refer to cancer causing agents, can provide an important clue in understanding the regulatory mechanism of microbiota in uterine cervix cancer. RESULTS:In this paper, we investigated microbiota composition and gene expression data for 58 squamous and adenosquamous cell carcinoma. A host-microbial covariance network was constructed based on the 16s rRNA and gene expression data of the samples, which consists of 259 abundant microbes and 738 differentially expressed genes (DEGs). To search for risk factors from host-microbial networks, the method of bi-partite betweenness centrality (BpBC) was used to measure the risk of a given node to a certain biological process in hosts. A web-based tool KF-finder was developed, which can efficiently query and visualize the knowledge of microbiota and differentially expressed genes (DEGs) in the network. CONCLUSIONS:Our results suggest that prevotellaceade, tissierellaceae and fusobacteriaceae are the most abundant microbes in cervical carcinoma, and the microbial community in cervical cancer is less diverse than that of any other boy sites in health. A set of key risk factors anaerococcus, hydrogenophilaceae, eubacterium, PSMB10, KCNIP1 and KRT13 have been identified, which are thought to be involved in the regulation of viral response, cell cycle and epithelial cell differentiation in cervical cancer. It can be concluded that permanent changes of microbiota composition could be a major force for chromosomal instability, which subsequently enables the effect of key risk factors in cancer. All our results described in this paper can be freely accessed from our website at http://www.nwpu-bioinformatics.com/KF-finder/ .",2018-04-24 +26527726,MouseNet v2: a database of gene networks for studying the laboratory mouse and eight other model vertebrates.,"Laboratory mouse, Mus musculus, is one of the most important animal tools in biomedical research. Functional characterization of the mouse genes, hence, has been a long-standing goal in mammalian and human genetics. Although large-scale knockout phenotyping is under progress by international collaborative efforts, a large portion of mouse genome is still poorly characterized for cellular functions and associations with disease phenotypes. A genome-scale functional network of mouse genes, MouseNet, was previously developed in context of MouseFunc competition, which allowed only limited input data for network inferences. Here, we present an improved mouse co-functional network, MouseNet v2 (available at http://www.inetbio.org/mousenet), which covers 17 714 genes (>88% of coding genome) with 788 080 links, along with a companion web server for network-assisted functional hypothesis generation. The network database has been substantially improved by large expansion of genomics data. For example, MouseNet v2 database contains 183 co-expression networks inferred from 8154 public microarray samples. We demonstrated that MouseNet v2 is predictive for mammalian phenotypes as well as human diseases, which suggests its usefulness in discovery of novel disease genes and dissection of disease pathways. Furthermore, MouseNet v2 database provides functional networks for eight other vertebrate models used in various research fields.",2015-11-02 +22067447,Ensembl Genomes: an integrative resource for genome-scale data from non-vertebrate species.,"Ensembl Genomes (http://www.ensemblgenomes.org) is an integrative resource for genome-scale data from non-vertebrate species. The project exploits and extends technology (for genome annotation, analysis and dissemination) developed in the context of the (vertebrate-focused) Ensembl project and provides a complementary set of resources for non-vertebrate species through a consistent set of programmatic and interactive interfaces. These provide access to data including reference sequence, gene models, transcriptional data, polymorphisms and comparative analysis. Since its launch in 2009, Ensembl Genomes has undergone rapid expansion, with the goal of providing coverage of all major experimental organisms, and additionally including taxonomic reference points to provide the evolutionary context in which genes can be understood. Against the backdrop of a continuing increase in genome sequencing activities in all parts of the tree of life, we seek to work, wherever possible, with the communities actively generating and using data, and are participants in a growing range of collaborations involved in the annotation and analysis of genomes.",2011-11-08 +28980447,RJSplot: Interactive Graphs with R. ,"Data visualization techniques provide new methods for the generation of interactive graphs. These graphs allow a better exploration and interpretation of data but their creation requires advanced knowledge of graphical libraries. Recent packages have enabled the integration of interactive graphs in R. However, R provides limited graphical packages that allow the generation of interactive graphs for computational biology applications. The present project has joined the analytical power of R with the interactive graphical features of JavaScript in a new R package (RJSplot). It enables the easy generation of interactive graphs in R, provides new visualization capabilities, and contributes to the advance of computational biology analytical methods. At present, 16 interactive graphics are available in RJSplot, such as the genome viewer, Manhattan plots, 3D plots, heatmaps, dendrograms, networks, and so on. The RJSplot package is freely available online at http://rjsplot.net.",2017-10-05 +29684124,iRO-3wPseKNC: identify DNA replication origins by three-window-based PseKNC.,"Motivation:DNA replication is the key of the genetic information transmission, and it is initiated from the replication origins. Identifying the replication origins is crucial for understanding the mechanism of DNA replication. Although several discriminative computational predictors were proposed to identify DNA replication origins of yeast species, they could only be used to identify very tiny parts (250 or 300 bp) of the replication origins. Besides, none of the existing predictors could successfully capture the 'GC asymmetry bias' of yeast species reported by experimental observations. Hence it would not be surprising why their power is so limited. To grasp the CG asymmetry feature and make the prediction able to cover the entire replication regions of yeast species, we develop a new predictor called 'iRO-3wPseKNC'. Results:Rigorous cross validations on the benchmark datasets from four yeast species (Saccharomyces cerevisiae, Schizosaccharomyces pombe, Kluyveromyces lactis and Pichia pastoris) have indicated that the proposed predictor is really very powerful for predicting the entire DNA duplication origins. Availability and implementation:The web-server for the iRO-3wPseKNC predictor is available at http://bioinformatics.hitsz.edu.cn/iRO-3wPseKNC/, by which users can easily get their desired results without the need to go through the mathematical details. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-09-01 +24225319,DNASU plasmid and PSI:Biology-Materials repositories: resources to accelerate biological research.,"The mission of the DNASU Plasmid Repository is to accelerate research by providing high-quality, annotated plasmid samples and online plasmid resources to the research community through the curated DNASU database, website and repository (http://dnasu.asu.edu or http://dnasu.org). The collection includes plasmids from grant-funded, high-throughput cloning projects performed in our laboratory, plasmids from external researchers, and large collections from consortia such as the ORFeome Collaboration and the NIGMS-funded Protein Structure Initiative: Biology (PSI:Biology). Through DNASU, researchers can search for and access detailed information about each plasmid such as the full length gene insert sequence, vector information, associated publications, and links to external resources that provide additional protein annotations and experimental protocols. Plasmids can be requested directly through the DNASU website. DNASU and the PSI:Biology-Materials Repositories were previously described in the 2010 NAR Database Issue (Cormier, C.Y., Mohr, S.E., Zuo, D., Hu, Y., Rolfs, A., Kramer, J., Taycher, E., Kelley, F., Fiacco, M., Turnbull, G. et al. (2010) Protein Structure Initiative Material Repository: an open shared public resource of structural genomics plasmids for the biological community. Nucleic Acids Res., 38, D743-D749.). In this update we will describe the plasmid collection and highlight the new features in the website redesign, including new browse/search options, plasmid annotations and a dynamic vector mapping feature that was developed in collaboration with LabGenius. Overall, these plasmid resources continue to enable research with the goal of elucidating the role of proteins in both normal biological processes and disease.",2013-11-12 +25864439,What to compare and how: Comparative transcriptomics for Evo-Devo.,"Evolutionary developmental biology has grown historically from the capacity to relate patterns of evolution in anatomy to patterns of evolution of expression of specific genes, whether between very distantly related species, or very closely related species or populations. Scaling up such studies by taking advantage of modern transcriptomics brings promising improvements, allowing us to estimate the overall impact and molecular mechanisms of convergence, constraint or innovation in anatomy and development. But it also presents major challenges, including the computational definitions of anatomical homology and of organ function, the criteria for the comparison of developmental stages, the annotation of transcriptomics data to proper anatomical and developmental terms, and the statistical methods to compare transcriptomic data between species to highlight significant conservation or changes. In this article, we review these challenges, and the ongoing efforts to address them, which are emerging from bioinformatics work on ontologies, evolutionary statistics, and data curation, with a focus on their implementation in the context of the development of our database Bgee (http://bgee.org).",2015-04-10 +30254538,Using Online Images to Teach Quantitative Skills via Comparative Neuroanatomy: Applying the Directives of Vision and Change.,"Vision and Change calls for increasing the quantitative skills of biology majors, which includes neuroscience majors. Accordingly, we have devised a module to give students practice at regression analyses, covariance, and ANOVA. This module consists of a quantitative comparative neuroanatomy lab in which students explore the size of the hippocampus relative to the brain in 62 different mammalian species-from an anteater to a zebu. We utilize a digital image library (with appropriate metadata) allowing students to quantify the size of the hippocampus as well as obtain an index of the size of the brain in these various species. Students then answer the following questions: (1) Do brains scale with body size? (2) Does the hippocampus scale with brain size? (3) If we control for body size, does the hippocampus still scale with brain size? (4) How does the hippocampus change as a proportion of brain size? (5) Is the proportional scaling of the hippocampus different among primates, carnivores, and other mammals? (6) Do the data provide evidence for mosaic or concerted evolution? Measures of the pedagogical efficacy showed clear and significant gains on a PreTest vs PostTest assessment of material related to the module. An open ended qualitative measure revealed students' perception of the purposes of the module, which were consistent with the learning goals. This module utilizes open access digital resources and can be performed at any institution. All the materials or links to online resources can be found at https://mdcune.psych.ucla.edu/modules/cna.",2018-09-15 +28180292,3D genome structure modeling by Lorentzian objective function.,"The 3D structure of the genome plays a vital role in biological processes such as gene interaction, gene regulation, DNA replication and genome methylation. Advanced chromosomal conformation capture techniques, such as Hi-C and tethered conformation capture, can generate chromosomal contact data that can be used to computationally reconstruct 3D structures of the genome. We developed a novel restraint-based method that is capable of reconstructing 3D genome structures utilizing both intra-and inter-chromosomal contact data. Our method was robust to noise and performed well in comparison with a panel of existing methods on a controlled simulated data set. On a real Hi-C data set of the human genome, our method produced chromosome and genome structures that are consistent with 3D FISH data and known knowledge about the human chromosome and genome, such as, chromosome territories and the cluster of small chromosomes in the nucleus center with the exception of the chromosome 18. The tool and experimental data are available at https://missouri.box.com/v/LorDG.",2017-02-01 +25592564,Cell Index Database (CELLX): a web tool for cancer precision medicine.,"The Cell Index Database, (CELLX) (http://cellx.sourceforge.net) provides a computational framework for integrating expression, copy number variation, mutation, compound activity, and meta data from cancer cells. CELLX provides the computational biologist a quick way to perform routine analyses as well as the means to rapidly integrate data for offline analysis. Data is accessible through a web interface which utilizes R to generate plots and perform clustering, correlations, and statistical tests for associations within and between data types for ~20,000 samples from TCGA, CCLE, Sanger, GSK, GEO, GTEx, and other public sources. We show how CELLX supports precision oncology through indications discovery, biomarker evaluation, and cell line screening analysis.",2015-01-01 +28968733,MetExploreViz: web component for interactive metabolic network visualization.,

Summary

MetExploreViz is an open source web component that can be easily embedded in any web site. It provides features dedicated to the visualization of metabolic networks and pathways and thus offers a flexible solution to analyse omics data in a biochemical context.

Availability and implementation

Documentation and link to GIT code repository (GPL 3.0 license) are available at this URL: http://metexplore.toulouse.inra.fr/metexploreViz/doc/.,2018-01-01 +31105333,The Qualities of Same-Sex and Different-Sex Couples in Young Adulthood.,"

Objective

Recognition of sexual minorities in social science research is growing and this study contributes to knowledge on this population by comparing the qualities of same-sex and different-sex relationships among young adults.

Background

The findings of studies on this topic cannot be generalized to young adults because they are limited to coresidential unions and based on convenience samples. This study extends prior research by examining multiple relationship qualities among a nationally-representative sample of males and females in ""dating"" and cohabiting relationships.

Method

The authors compare young adults in same-sex and different-sex relationships with respect to relationship quality (commitment, satisfaction, and emotional intimacy) and sexual behavior (sexual frequency and sexual exclusivity). Drawing on the fourth wave of data from the National Longitudinal Study of Adolescent to Adult Health (http://www.cpc.unc.edu/projects/addhealth), they use multiple regression to compare: male respondents with different-sex partners, male respondents with same-sex partners, female respondents with different-sex partners, and female respondents with same-sex partners.

Results

Consistent with previous research, the authors find that respondents in same-sex relationships experience similar levels of commitment, satisfaction, and emotional intimacy as their counterparts in different-sex relationships. They also corroborate the finding that male respondents in same-sex relationships are less likely than other groups of respondents to indicate their relationship is sexually exclusive.

Conclusion

This study provides an empirical basis for understanding the relationships of sexual minority young adults.",2018-10-10 +28617224,ENIGMA-Viewer: interactive visualization strategies for conveying effect sizes in meta-analysis.,"

Background

Global scale brain research collaborations such as the ENIGMA (Enhancing Neuro Imaging Genetics through Meta Analysis) consortium are beginning to collect data in large quantity and to conduct meta-analyses using uniformed protocols. It becomes strategically important that the results can be communicated among brain scientists effectively. Traditional graphs and charts failed to convey the complex shapes of brain structures which are essential to the understanding of the result statistics from the analyses. These problems could be addressed using interactive visualization strategies that can link those statistics with brain structures in order to provide a better interface to understand brain research results.

Results

We present ENIGMA-Viewer, an interactive web-based visualization tool for brain scientists to compare statistics such as effect sizes from meta-analysis results on standardized ROIs (regions-of-interest) across multiple studies. The tool incorporates visualization design principles such as focus+context and visual data fusion to enable users to better understand the statistics on brain structures. To demonstrate the usability of the tool, three examples using recent research data are discussed via case studies.

Conclusions

ENIGMA-Viewer supports presentations and communications of brain research results through effective visualization designs. By linking visualizations of both statistics and structures, users can gain more insights into the presented data that are otherwise difficult to obtain. ENIGMA-Viewer is an open-source tool, the source code and sample data are publicly accessible through the NITRC website ( http://www.nitrc.org/projects/enigmaviewer_20 ). The tool can also be directly accessed online ( http://enigma-viewer.org ).",2017-06-06 +28369277,GenoGAM: genome-wide generalized additive models for ChIP-Seq analysis.,"

Motivation

Chromatin immunoprecipitation followed by deep sequencing (ChIP-Seq) is a widely used approach to study protein-DNA interactions. Often, the quantities of interest are the differential occupancies relative to controls, between genetic backgrounds, treatments, or combinations thereof. Current methods for differential occupancy of ChIP-Seq data rely however on binning or sliding window techniques, for which the choice of the window and bin sizes are subjective.

Results

Here, we present GenoGAM (Genome-wide Generalized Additive Model), which brings the well-established and flexible generalized additive models framework to genomic applications using a data parallelism strategy. We model ChIP-Seq read count frequencies as products of smooth functions along chromosomes. Smoothing parameters are objectively estimated from the data by cross-validation, eliminating ad hoc binning and windowing needed by current approaches. GenoGAM provides base-level and region-level significance testing for full factorial designs. Application to a ChIP-Seq dataset in yeast showed increased sensitivity over existing differential occupancy methods while controlling for type I error rate. By analyzing a set of DNA methylation data and illustrating an extension to a peak caller, we further demonstrate the potential of GenoGAM as a generic statistical modeling tool for genome-wide assays.

Availability and implementation

Software is available from Bioconductor: https://www.bioconductor.org/packages/release/bioc/html/GenoGAM.html .

Contact

gagneur@in.tum.de.

Supplementary information

Supplementary information is available at Bioinformatics online.",2017-08-01 +28334355,A Zoom-Focus algorithm (ZFA) to locate the optimal testing region for rare variant association tests.,"

Motivation

Increasing amounts of whole exome or genome sequencing data present the challenge of analysing rare variants with extremely small minor allele frequencies. Various statistical tests have been proposed, which are specifically configured to increase power for rare variants by conducting the test within a certain bin, such as a gene or a pathway. However, a gene may contain from several to thousands of markers, and not all of them are related to the phenotype. Combining functional and non-functional variants in an arbitrary genomic region could impair the testing power.

Results

We propose a Zoom-Focus algorithm (ZFA) to locate the optimal testing region within a given genomic region. It can be applied as a wrapper function in existing rare variant association tests to increase testing power. The algorithm consists of two steps. In the first step, Zooming, a given genomic region is partitioned by an order of two, and the best partition is located. In the second step, Focusing, the boundaries of the zoomed region are refined. Simulation studies showed that ZFA substantially increased the statistical power of rare variants' tests, including the SKAT, SKAT-O, burden test and the W-test. The algorithm was applied on real exome sequencing data of hypertensive disorder, and identified biologically relevant genetic markers to metabolic disorders that were undetectable by a gene-based method. The proposed algorithm is an efficient and powerful tool to enhance the power of association study for whole exome or genome sequencing data.

Availability and implementation

The ZFA software is available at: http://www2.ccrb.cuhk.edu.hk/statgene/software.html.

Contact

maggiew@cuhk.edu.hk or bzee@cuhk.edu.hk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +22833564,NESdb: a database of NES-containing CRM1 cargoes.,"The leucine-rich nuclear export signal (NES) is the only known class of targeting signal that directs macromolecules out of the cell nucleus. NESs are short stretches of 8-15 amino acids with regularly spaced hydrophobic residues that bind the export karyopherin CRM1. NES-containing proteins are involved in numerous cellular and disease processes. We compiled a database named NESdb that contains 221 NES-containing CRM1 cargoes that were manually curated from the published literature. Each NESdb entry is annotated with information about sequence and structure of both the NES and the cargo protein, as well as information about experimental evidence of NES-mapping and CRM1-mediated nuclear export. NESdb will be updated regularly and will serve as an important resource for nuclear export signals. NESdb is freely available to nonprofit organizations at http://prodata.swmed.edu/LRNes.",2012-07-25 +28655167,Identification of transcript regulatory patterns in cell differentiation.,"

Motivation

Studying transcript regulatory patterns in cell differentiation is critical in understanding its complex nature of the formation and function of different cell types. This is done usually by measuring gene expression at different stages of the cell differentiation. However, if the gene expression data available are only from the mature cells, we have some challenges in identifying transcript regulatory patterns that govern the cell differentiation.

Results

We propose to exploit the information of the lineage of cell differentiation in terms of correlation structure between cell types. We assume that two different cell types that are close in the lineage will exhibit many common genes that are co-expressed relative to those that are far in the lineage. Current analysis methods tend to ignore this correlation by testing for differential expression assuming some sort of independence between cell types. We employ a Bayesian approach to estimate the posterior distribution of the mean of expression in each cell type, by taking into account the cell formation path in the lineage. This enables us to infer genes that are specific in each cell type, indicating the genes are involved in directing the cell differentiation to that particular cell type. We illustrate the method using gene expression data from a study of haematopoiesis.

Availability and implementation

R codes to perform the analysis are available in http://www1.maths.leeds.ac.uk/∼arief/R/CellDiff/.

Contact

a.gusnanto@leeds.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +27230218,The Human Brainnetome Atlas: A New Brain Atlas Based on Connectional Architecture.,"The human brain atlases that allow correlating brain anatomy with psychological and cognitive functions are in transition from ex vivo histology-based printed atlases to digital brain maps providing multimodal in vivo information. Many current human brain atlases cover only specific structures, lack fine-grained parcellations, and fail to provide functionally important connectivity information. Using noninvasive multimodal neuroimaging techniques, we designed a connectivity-based parcellation framework that identifies the subdivisions of the entire human brain, revealing the in vivo connectivity architecture. The resulting human Brainnetome Atlas, with 210 cortical and 36 subcortical subregions, provides a fine-grained, cross-validated atlas and contains information on both anatomical and functional connections. Additionally, we further mapped the delineated structures to mental processes by reference to the BrainMap database. It thus provides an objective and stable starting point from which to explore the complex relationships between structure, connectivity, and function, and eventually improves understanding of how the human brain works. The human Brainnetome Atlas will be made freely available for download at http://atlas.brainnetome.org, so that whole brain parcellations, connections, and functional data will be readily available for researchers to use in their investigations into healthy and pathological states.",2016-05-26 +30154154,TIP: A Web Server for Resolving Tumor Immunophenotype Profiling.,": Systematically tracking the tumor immunophenotype is required to understand the mechanisms of cancer immunity and improve clinical benefit of cancer immunotherapy. However, progress in current research is hindered by the lack of comprehensive immune activity resources and easy-to-use tools for biologists, clinicians, and researchers to conveniently evaluate immune activity during the ""cancer-immunity cycle."" We developed a user-friendly one-stop shop web tool called TIP to comprehensively resolve tumor immunophenotype. TIP has the capability to rapidly analyze and intuitively visualize the activity of anticancer immunity and the extent of tumor-infiltrating immune cells across the seven-step cancer-immunity cycle. Also, we precalculated the pan-cancer immunophenotype for 11,373 samples from 33 The Cancer Genome Atlas human cancers that allow users to obtain and compare immunophenotype of pan-cancer samples. We expect TIP to be useful in a large number of emerging cancer immunity studies and development of effective immunotherapy biomarkers. TIP is freely available for use at http://biocc.hrbmu.edu.cn/TIP/. SIGNIFICANCE: TIP is a one-stop shop platform that can help biologists, clinicians, and researchers conveniently evaluate anticancer immune activity with their own gene expression data.See related commentary by Hirano, p. 6536.",2018-08-28 +29884956,KymoKnot: A web server and software package to identify and locate knots in trajectories of linear or circular polymers.,"The KymoKnot software package and web server identifies and locates physical knots or proper knots in a series of polymer conformations. It is mainly intended as an analysis tool for trajectories of linear or circular polymers, but it can be used on single instances too, e.g. protein structures in PDB format. A key element of the software package is the so-called minimally interfering chain closure algorithm that is used to detect physical knots in open chains and to locate the knotted region in both open and closed chains. The web server offers a user-friendly graphical interface that identifies the knot type and highlights the knotted region on each frame of the trajectory, which the user can visualize interactively from various viewpoints. The dynamical evolution of the knotted region along the chain contour is presented as a kymograph. All data can be downloaded in text format. The KymoKnot package is licensed under the BSD 3-Clause licence. The server is publicly available at http://kymoknot.sissa.it/kymoknot/interactive.php .",2018-06-07 +26673252,CARFMAP: A Curated Pathway Map of Cardiac Fibroblasts.,"The adult mammalian heart contains multiple cell types that work in unison under tightly regulated conditions to maintain homeostasis. Cardiac fibroblasts are a significant and unique population of non-muscle cells in the heart that have recently gained substantial interest in the cardiac biology community. To better understand this renaissance cell, it is essential to systematically survey what has been known in the literature about the cellular and molecular processes involved. We have built CARFMAP (http://visionet.erc.monash.edu.au/CARFMAP), an interactive cardiac fibroblast pathway map derived from the biomedical literature using a software-assisted manual data collection approach. CARFMAP is an information-rich interactive tool that enables cardiac biologists to explore the large body of literature in various creative ways. There is surprisingly little overlap between the cardiac fibroblast pathway map, a foreskin fibroblast pathway map, and a whole mouse organism signalling pathway map from the REACTOME database. Among the use cases of CARFMAP is a common task in our cardiac biology laboratory of identifying new genes that are (1) relevant to cardiac literature, and (2) differentially regulated in high-throughput assays. From the expression profiles of mouse cardiac and tail fibroblasts, we employed CARFMAP to characterise cardiac fibroblast pathways. Using CARFMAP in conjunction with transcriptomic data, we generated a stringent list of six genes that would not have been singled out using bioinformatics analyses alone. Experimental validation showed that five genes (Mmp3, Il6, Edn1, Pdgfc and Fgf10) are differentially regulated in the cardiac fibroblast. CARFMAP is a powerful tool for systems analyses of cardiac fibroblasts, facilitating systems-level cardiovascular research.",2015-12-16 +21216747,Rice TOGO Browser: A platform to retrieve integrated information on rice functional and applied genomics.,"The Rice TOGO Browser is an online public resource designed to facilitate integration and visualization of mapping data of bacterial artificial chromosome (BAC)/P1-derived artificial chromosome (PAC) clones, genes, restriction fragment length polymorphism (RFLP)/simple sequence repeat (SSR) markers and phenotype data represented as quantitative trait loci (QTLs) onto the genome sequence, and to provide a platform for more efficient utilization of genome information from the point of view of applied genomics as well as functional genomics. Three search options, namely keyword search, region search and trait search, generate various types of data in a user-friendly interface with three distinct viewers, a chromosome viewer, an integrated map viewer and a sequence viewer, thereby providing the opportunity to view the position of genes and/or QTLs at the chromosomal level and to retrieve any sequence information in a user-defined genome region. Furthermore, the gene list, marker list and genome sequence in a specified region delineated by RFLP/SSR markers and any sequences designed as primers can be viewed and downloaded to support forward genetics approaches. An additional feature of this database is the graphical viewer for BLAST search to reveal information not only for regions with significant sequence similarity but also for regions adjacent to those with similarity but with no hits between sequences. An easy to use and intuitive user interface can help a wide range of users in retrieving integrated mapping information including agronomically important traits on the rice genome sequence. The database can be accessed at http://agri-trait.dna.affrc.go.jp/.",2011-01-06 +26212482,"VaccineDA: Prediction, design and genome-wide screening of oligodeoxynucleotide-based vaccine adjuvants.","Immunomodulatory oligodeoxynucleotides (IMODNs) are the short DNA sequences that activate the innate immune system via toll-like receptor 9. These sequences predominantly contain unmethylated CpG motifs. In this work, we describe VaccineDA (Vaccine DNA adjuvants), a web-based resource developed to design IMODN-based vaccine adjuvants. We collected and analyzed 2193 experimentally validated IMODNs obtained from the literature. Certain types of nucleotides (e.g., T, GT, TC, TT, CGT, TCG, TTT) are dominant in IMODNs. Based on these observations, we developed support vector machine-based models to predict IMODNs using various compositions. The developed models achieved the maximum Matthews Correlation Coefficient (MCC) of 0.75 with an accuracy of 87.57% using the pentanucleotide composition. The integration of motif information further improved the performance of our model from the MCC of 0.75 to 0.77. Similarly, models were developed to predict palindromic IMODNs and attained a maximum MCC of 0.84 with the accuracy of 91.94%. These models were evaluated using a five-fold cross-validation technique as well as validated on an independent dataset. The models developed in this study were integrated into VaccineDA to provide a wide range of services that facilitate the design of DNA-based vaccine adjuvants (http://crdd.osdd.net/raghava/vaccineda/).",2015-07-27 +28957497,A multi-scenario genome-wide medical population genetics simulation framework.,"

Motivation

Recent technological advances in high-throughput sequencing and genotyping have facilitated an improved understanding of genomic structure and disease-associated genetic factors. In this context, simulation models can play a critical role in revealing various evolutionary and demographic effects on genomic variation, enabling researchers to assess existing and design novel analytical approaches. Although various simulation frameworks have been suggested, they do not account for natural selection in admixture processes. Most are tailored to a single chromosome or a genomic region, very few capture large-scale genomic data, and most are not accessible for genomic communities.

Results

Here we develop a multi-scenario genome-wide medical population genetics simulation framework called 'FractalSIM'. FractalSIM has the capability to accurately mimic and generate genome-wide data under various genetic models on genetic diversity, genomic variation affecting diseases and DNA sequence patterns of admixed and/or homogeneous populations. Moreover, the framework accounts for natural selection in both homogeneous and admixture processes. The outputs of FractalSIM have been assessed using popular tools, and the results demonstrated its capability to accurately mimic real scenarios. They can be used to evaluate the performance of a range of genomic tools from ancestry inference to genome-wide association studies.

Availability and implementation

The FractalSIM package is available at http://www.cbio.uct.ac.za/FractalSIM.

Contact

emile.chimusa@uct.ac.za.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +28971127,"Integrated dataset of anatomical, morphological, and architectural traits for plant species in Madagascar.","In this work, we present a dataset, which provides information on the structural diversity of some endemic tropical species in Madagascar. The data were from CIRAD xylotheque (since 1937), and were also collected during various fieldworks (since 1964). The field notes and photographs were provided by French botanists; particularly by Francis Hallé. The dataset covers 250 plant species with anatomical, morphological, and architectural traits indexed from digitized wood slides and fieldwork documents. The digitized wood slides were constituted by the transverse, tangential, and radial sections with three optical magnifications. The main specific anatomical traits can be found within the digitized area. Information on morphological and architectural traits were indexed from digitized field drawings including notes and photographs. The data are hosted in the website ArchiWood (http://archiwood.cirad.fr).",2017-09-12 +22419844,NeMedPlant: a database of therapeutic applications and chemical constituents of medicinal plants from north-east region of India.,"

Unlabelled

The North-East region of India is one of the twelve mega biodiversity region, containing many rare and endangered species. A curated database of medicinal and aromatic plants from the regions called NeMedPlant is developed. The database contains traditional, scientific and medicinal information about plants and their active constituents, obtained from scholarly literature and local sources. The database is cross-linked with major biochemical databases and analytical tools. The integrated database provides resource for investigations into hitherto unexplored medicinal plants and serves to speed up the discovery of natural productsbased drugs.

Availability

The database is available for free at http://bif.uohyd.ac.in/nemedplant/orhttp://202.41.85.11/nemedplant/",2012-02-28 +30001212,Heuristic Modeling and 3D Stereoscopic Visualization of a Chlamydomonas reinhardtii Cell. ,"The structural modeling and representation of cells is a complex task as different microscopic, spectroscopic and other information resources have to be combined to achieve a three-dimensional representation with high accuracy. Moreover, to provide an appropriate spatial representation of the cell, a stereoscopic 3D (S3D) visualization is favorable. In this work, a structural cell model is created by combining information from various light microscopic and electron microscopic images as well as from publication-related data. At the mesoscopic level each cell component is presented with special structural and visual properties; at the molecular level a cell membrane composition and the underlying modeling method are discussed; and structural information is correlated with those at the functional level (represented by simplified energy-producing metabolic pathways). The organism used as an example is the unicellular Chlamydomonas reinhardtii, which might be important in future alternative energy production processes. Based on the 3D model, an educative S3D animation was created which was shown at conferences. The complete workflow was accomplished by using the open source 3D modeling software Blender. The discussed project including the animation is available from: http://Cm5.CELLmicrocosmos.org.",2018-07-11 +25631804,Evaluation of portability and cost of a fluorescent PCR ribotyping protocol for Clostridium difficile epidemiology.,"Clostridium difficile is the most commonly identified pathogen among health care-associated infections in the United States. There is a need for accurate and low-cost typing tools that produce comparable data across studies (i.e., portable data) to help characterize isolates during epidemiologic investigations of C. difficile outbreaks and sporadic cases of disease. The most popular C. difficile-typing technique is PCR ribotyping, and we previously developed methods using fluorescent PCR primers and amplicon sizing on a Sanger-style sequencer to generate fluorescent PCR ribotyping data. This technique has been used to characterize tens of thousands of C. difficile isolates from cases of disease. Here, we present validation of a protocol for the cost-effective generation of fluorescent PCR ribotyping data. A key component of this protocol is the ability to accurately identify PCR ribotypes against an online database (http://walklab.rcg.montana.edu) at no cost. We present results from a blinded multicenter study to address data portability across four different laboratories and three different sequencing centers. Our standardized protocol and centralized database for typing of C. difficile pathogens will increase comparability between studies so that important epidemiologic linkages between cases of disease and patterns of emergence can be rapidly identified.",2015-01-28 +23410028,Pivotal role of the muscle-contraction pathway in cryptorchidism and evidence for genomic connections with cardiomyopathy pathways in RASopathies.,"

Background

Cryptorchidism is the most frequent congenital disorder in male children; however the genetic causes of cryptorchidism remain poorly investigated. Comparative integratomics combined with systems biology approach was employed to elucidate genetic factors and molecular pathways underlying testis descent.

Methods

Literature mining was performed to collect genomic loci associated with cryptorchidism in seven mammalian species. Information regarding the collected candidate genes was stored in MySQL relational database. Genomic view of the loci was presented using Flash GViewer web tool (http://gmod.org/wiki/Flashgviewer/). DAVID Bioinformatics Resources 6.7 was used for pathway enrichment analysis. Cytoscape plug-in PiNGO 1.11 was employed for protein-network-based prediction of novel candidate genes. Relevant protein-protein interactions were confirmed and visualized using the STRING database (version 9.0).

Results

The developed cryptorchidism gene atlas includes 217 candidate loci (genes, regions involved in chromosomal mutations, and copy number variations) identified at the genomic, transcriptomic, and proteomic level. Human orthologs of the collected candidate loci were presented using a genomic map viewer. The cryptorchidism gene atlas is freely available online: http://www.integratomics-time.com/cryptorchidism/. Pathway analysis suggested the presence of twelve enriched pathways associated with the list of 179 literature-derived candidate genes. Additionally, a list of 43 network-predicted novel candidate genes was significantly associated with four enriched pathways. Joint pathway analysis of the collected and predicted candidate genes revealed the pivotal importance of the muscle-contraction pathway in cryptorchidism and evidence for genomic associations with cardiomyopathy pathways in RASopathies.

Conclusions

The developed gene atlas represents an important resource for the scientific community researching genetics of cryptorchidism. The collected data will further facilitate development of novel genetic markers and could be of interest for functional studies in animals and human. The proposed network-based systems biology approach elucidates molecular mechanisms underlying co-presence of cryptorchidism and cardiomyopathy in RASopathies. Such approach could also aid in molecular explanation of co-presence of diverse and apparently unrelated clinical manifestations in other syndromes.",2013-02-14 +28961713,Genome-scale regression analysis reveals a linear relationship for promoters and enhancers after combinatorial drug treatment.,"

Motivation

Drug combination therapy for treatment of cancers and other multifactorial diseases has the potential of increasing the therapeutic effect, while reducing the likelihood of drug resistance. In order to reduce time and cost spent in comprehensive screens, methods are needed which can model additive effects of possible drug combinations.

Results

We here show that the transcriptional response to combinatorial drug treatment at promoters, as measured by single molecule CAGE technology, is accurately described by a linear combination of the responses of the individual drugs at a genome wide scale. We also find that the same linear relationship holds for transcription at enhancer elements. We conclude that the described approach is promising for eliciting the transcriptional response to multidrug treatment at promoters and enhancers in an unbiased genome wide way, which may minimize the need for exhaustive combinatorial screens.

Availability and implementation

The CAGE sequence data used in this study is available in the DDBJ Sequence Read Archive (http://trace.ddbj.nig.ac.jp/index_e.html), accession number DRP001113.

Contact

xin.gao@kaust.edu.sa or erik.arner@riken.jp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +26484275,Transcriptome analysis of TH2 CD4(+) T cells differentiated from wild-type and NLRP3KO mice.,"The Nod-like receptor NLRP3 is involved in the formation of NLRP3. Up to now, the immunological functions of NLRP3 independently of inflammasome is unclear. In this dataset containing 6 samples (TH0, TH2 cells at day 3 and day 6 in wild type or Nlrp3 deficient cells), we show that NLRP3 expression in CD4(+) T cells supports a T helper 2 (TH2) transcriptional program in a cell-intrinsic manner (raw and normalized data are accessible on Gene Expression Omnibus database under the number GSE54561, http://www.dtd.nlm.nih.gov/geo/query/acc.cgi?acc=GSE54561). Indeed, NLRP3 positively-regulated TH2 program independently of inflammasome formation. These data indicated that TH2 specific genes such as cMaf or Il4 were not induced in Nlrp3 deficient cells. These results demonstrate the capacity of NLRP3 to act as a key transcription factor in TH2 differentiation.",2015-07-09 +28260826,Hybrid Areal Interpolation of Census Counts from 2000 Blocks to 2010 Geographies.,"To measure population changes in areas where census unit boundaries do not align across time, a common approach is to interpolate data from one census's units to another's. This article presents a broad assessment of areal interpolation models for estimating counts of 2000 characteristics in 2010 census units throughout the United States. We interpolate from 2000 census block data using 4 types of ancillary data to guide interpolation: 2010 block densities, imperviousness data, road buffers, and water body polygons. We test 8 binary dasymetric (BD) models and 8 target-density weighting (TDW) models, each using a unique combination of the 4 ancillary data types, and derive 2 hybrid models that blend the best-performing BD and TDW models. The most accurate model is a hybrid that generally gives high weight to TDW (allocating 2000 data in proportion to 2010 densities) but gives increasing weight to a BD model (allocating data uniformly within developed land near roads) in proportion to the estimated 2000-2010 rate of change within each block. Although for most 2010 census units, this hybrid model's estimates differ little from the simplest model's estimates, there are still many areas where the estimates differ considerably. Estimates from the final model, along with lower and upper bounds for each estimate, are publicly available for over 1,000 population and housing characteristics at 10 geographic levels via the National Historical Geographic Information System (NHGIS - http://nhgis.org).",2017-03-01 +28922338,Does the β-receptor antagonist esmolol have analgesic effects?: A randomised placebo-controlled cross-over study on healthy volunteers undergoing the cold pressor test.,"BACKGROUND:Esmolol may attenuate the sympathetic response to pain and reduce postoperative opioid consumption. It is not clear whether esmolol has an analgesic effect per se. OBJECTIVES:The aim of this study was to evaluate the analgesic effect of esmolol in the absence of anaesthetics and opioids. We tested the hypothesis that esmolol would reduce the maximum pain intensity perceived during the cold pressor test (CPT) by 2 points on a 0 to 10 numeric pain rating scale (NRS) compared to placebo. DESIGN:Randomised, placebo-controlled cross-over study. SETTING:Postoperative recovery area, Örebro University Hospital. Study period, November 2013 to February 2014. PARTICIPANTS:Fourteen healthy volunteers. Exclusion criteria included ongoing medication, pregnancy and breastfeeding and participation in other medical trials. INTERVENTIONS:At separate study sessions, participants received interventions: esmolol (0.7 mg kg bolus over 1 min followed by infusion at 10 μg kg min); 0.9% normal saline bolus then remifentanil infusion at 0.2 μg kg min and 0.9% normal saline bolus and infusion according to a random sequence. All infusions were administered over 30 min. MAIN OUTCOME MEASURES:Perceived maximum pain intensity score, pain tolerance and haemodynamic changes during CPT, and occurrence of side-effects to interventions compared to placebo, respectively. RESULTS:Esmolol did not reduce perceived pain intensity or pain tolerance during the CPT. The NRS-max score was similar for esmolol, 8.5 (±1.4) and placebo, 8.4 (±1.3). The mean difference was 0.1 [95% confidence interval (-1.2 to 1.4)], P value equal to 0.83. Remifentanil significantly reduced NRS-max scores, 5.4 (±2.1) compared to placebo, [mean difference -3.1 (95% confidence interval (-4.4 to -1.8)), P < 0.001]. Side-effects were seen with remifentanil but not with esmolol. CONCLUSION:No direct analgesic effect of esmolol could be demonstrated in the present study. The postoperative opioid-sparing effect demonstrated in previous studies, could therefore be secondary to other factors such as avoidance of opioid-induced hyperalgesia, synergy with coadministered opioids or altered pharmacokinetics of those drugs. TRIAL REGISTRATION:European clinical trials database, https://eudract.ema.europa.eu/, EudraCT no. 2011-005780-24.",2018-03-01 +30033070,European Association of Urology Guidelines on Renal Transplantation: Update 2018.,"

Context

The European Association of Urology (EAU) panel on renal transplantation (RT) has released an updated version of the RT guidelines.

Objective

To present the 2018 EAU guidelines on RT.

Evidence acquisition

A broad and comprehensive scoping exercise was performed, encompassing all areas of RT guidelines published between January 1, 2007, and May 31, 2016. Databases covered by the search included Medline, Embase, and the Cochrane Libraries. Previous guidelines were updated, and levels of evidence and grades of recommendation were assigned.

Evidence synthesis

It is strongly recommended to offer pure or hand-assisted laparoscopic/retroperitoneoscopic surgery as the preferential technique for living donor nephrectomy. Decisions on the acceptance of a donor organ should not be based on histological findings alone since this might lead to an unnecessarily high rate of discarded grafts. For ureterovesical anastomosis, a Lich-Gregoir-like extravesical technique protected by a ureteral stent is the preferred technique for minimisation of urinary tract complications. It is also strongly recommended to perform initial rejection prophylaxis with a combination therapy comprising a calcineurin inhibitor (preferably tacrolimus), mycophenolate, steroids, and an induction agent (either basiliximab or anti-thymocyte globulin). The long version of the guidelines is available at the EAU website (http://uroweb.org/guidelines).

Conclusions

These abridged EAU guidelines present updated information on the clinical and surgical management of RT for incorporation into clinical practice.

Patient summary

The European Association of Urology has released the renal transplantation guidelines. The implementation of minimally invasive surgery for organ retrieval and the latest evidence on transplant surgery as well as on immunosuppressive regimens are key factors for minimisation of rejection and achievement of long-term graft survival.",2018-03-01 +29497752,The Relationship Between Confrontation Naming and Story Gist Production in Aphasia.,"

Purpose

The purpose of this study was to examine the relationship between picture naming performance and the ability to communicate the gist, or essential elements, of a story. We also sought to determine if this relationship varied according to Western Aphasia Battery-Revised (WAB-R; Kertesz, 2007) aphasia subtype.

Method

Demographic information, test scores, and transcripts of 258 individuals with aphasia completing 3 narrative tasks were retrieved from the AphasiaBank database. Narratives were subjected to a main concept analysis to determine gist production. A correlation analysis was used to investigate the relationship between naming scores and main concept production for the whole group of persons with aphasia and for WAB-R subtypes separately.

Results

We found strong correlations between naming test scores and narrative gist production for the large sample of persons with aphasia. However, the strength of the correlations varied by WAB-R subtype.

Conclusions

Picture naming may accurately predict gist production for individuals with Broca's and Wernicke's aphasia, but not for other WAB-R subtypes. Given the current reprioritization of outcome measurement, picture naming may not be an appropriate surrogate measure for functional communication for all persons with aphasia.

Supplemental materials

https://doi.org/10.23641/asha.5851848.",2018-03-01 +28011754,Logic programming to infer complex RNA expression patterns from RNA-seq data.,"To meet the increasing demand in the field, numerous long noncoding RNA (lncRNA) databases are available. Given many lncRNAs are specifically expressed in certain cell types and/or time-dependent manners, most lncRNA databases fall short of providing such profiles. We developed a strategy using logic programming to handle the complex organization of organs, their tissues and cell types as well as gender and developmental time points. To showcase this strategy, we introduce 'RenalDB' (http://renaldb.uni-frankfurt.de), a database providing expression profiles of RNAs in major organs focusing on kidney tissues and cells. RenalDB uses logic programming to describe complex anatomy, sample metadata and logical relationships defining expression, enrichment or specificity. We validated the content of RenalDB with biological experiments and functionally characterized two long intergenic noncoding RNAs: LOC440173 is important for cell growth or cell survival, whereas PAXIP1-AS1 is a regulator of cell death. We anticipate RenalDB will be used as a first step toward functional studies of lncRNAs in the kidney.",2018-03-01 +29160179,The MiAge Calculator: a DNA methylation-based mitotic age calculator of human tissue types.,"Cell division is important in human aging and cancer. The estimation of the number of cell divisions (mitotic age) of a given tissue type in individuals is of great interest as it allows not only the study of biological aging (using a new molecular aging target) but also the stratification of prospective cancer risk. Here, we introduce the MiAge Calculator, a mitotic age calculator based on a novel statistical framework, the MiAge model. MiAge is designed to quantitatively estimate mitotic age (total number of lifetime cell divisions) of a tissue using the stochastic replication errors accumulated in the epigenetic inheritance process during cell divisions. With the MiAge model, the MiAge Calculator was built using the training data of DNA methylation measures of 4,020 tumor and adjacent normal tissue samples from eight TCGA cancer types and was tested using the testing data of DNA methylation measures of 2,221 tumor and adjacent normal tissue samples of five other TCGA cancer types. We showed that within each of the thirteen cancer types studied, the estimated mitotic age is universally accelerated in tumor tissues compared to adjacent normal tissues. Across the thirteen cancer types, we showed that worse cancer survivals are associated with more accelerated mitotic age in tumor tissues. Importantly, we demonstrated the utility of mitotic age by showing that the integration of mitotic age and clinical information leads to improved survival prediction in six out of the thirteen cancer types studied. The MiAge Calculator is available at http://www.columbia.edu/∼sw2206/softwares.htm .",2018-02-06 +26233958,MiR-17-92 cluster promotes hepatocarcinogenesis.,"MiR-17-92 cluster is an oncogenic miRNA cluster that is implicated in several cancers, although its role in hepatocarcinogenesis has not been clearly defined. In this study, we show that the miR-17-92 cluster is highly expressed in human hepatocellular carcinoma (HCC) tissues compared to the non-tumorous liver tissues by RT-PCR and in situ hybridization analyses. Increased miR-17-92 cluster expression in HCC tissues was further confirmed by analysis of the RNA-sequencing data of 319 patients available from the Cancer Genome Atlas (TCGA) Data Portal (https://tcga-data.nci.nih.gov/tcga/). To create an animal model that resembles enhanced miR-17-92 in the liver, we developed liver-specific miR-17-92 transgenic mice and the animals were treated with the hepatic carcinogen, diethylnitrosamine (DEN). We observed that the liver-specific miR-17-92 transgenic mice showed significantly increased hepatocellular cancer development compared to the matched wild-type control mice. Forced overexpression of the miR-17-92 cluster in cultured human hepatocellular cancer cells enhanced tumor cell proliferation, colony formation and invasiveness in vitro, whereas inhibition of the miR-17-92 cluster reduced tumor cell growth. By analyzing the miRNA and mRNA sequencing data from the 312 hepatocellular cancer patients available from the TCGA database, we observed that the expression levels of the miR-17-92 cluster members and host gene in the tumor tissues are negatively correlated with several target genes, including CREBL2, PRRG1, NTN4. Our findings demonstrate an important role of the miR-17-92 cluster in hepatocarcinogenesis and suggest the possibility of targeting this pivotal miRNA cluster for potential therapy.",2015-08-01 +22493538,Ssa miRNAs DB: Online repository of in silico predicted miRNAs in Salmo salar.,"

Unlabelled

The Atlantic salmon (Salmo salar) is a very valuable commercial salmonid species. As with other aquaculture species, intensive aquaculture of Atlantic salmon often faces disease problems especially in early life stages which can limit stable production of the species. 'Ssa miRNAs DB', a bioinformatics and manually curated database, aims at providing a comprehensive resource of microRNA in Altantic salmon, with a user friendly interface for a convenient retrieval of each entry by microRNA ID or target gene. The current version of Ssa miRNAs DB involved the prediction of 41 and 266 homologous and novel microRNAs, respectively.

Availability

The database is available for free at http://www.molgenv.com/ssa_mirnas_db_home.php.",2012-03-31 +30774185,ASAP: A new global early warning system to detect anomaly hot spots of agricultural production for food security analysis.,"Monitoring crop and rangeland conditions is highly relevant for early warning and response planning in food insecure areas of the world. Satellite remote sensing can obtain relevant and timely information in such areas where ground data are scattered, non-homogenous, or frequently unavailable. Rainfall estimates provide an outlook of the drivers of vegetation growth, whereas time series of satellite-based biophysical indicators at high temporal resolution provide key information about vegetation status in near real-time and over large areas. The new early warning decision support system ASAP (Anomaly hot Spots of Agricultural Production) builds on the experience of the MARS crop monitoring activities for food insecure areas, that have started in the early 2000's and aims at providing timely information about possible crop production anomalies. The information made available on the website (https://mars.jrc.ec.europa.eu/asap/) directly supports multi-agency early warning initiatives such as for example the GEOGLAM Crop Monitor for Early Warning and provides inputs to more detailed food security assessments that are the basis for the annual Global Report on Food Crises. ASAP is a two-step analysis framework, with a first fully automated step classifying the first sub-national level administrative units into four agricultural production deficit warning categories. Warnings are based on rainfall and vegetation index anomalies computed over crop and rangeland areas and are updated every 10 days. They take into account the timing during the crop season at which they occur, using remote sensing derived phenology per-pixel. The second step involves the monthly analysis at country level by JRC crop monitoring experts of all the information available, including the automatic warnings, crop production and food security-tailored media analysis, high-resolution imagery (e.g. Landsat 8, Sentinel 1 and 2) processed in Google Earth Engine and ancillary maps, graphs and statistics derived from a set of indicators. Countries with potentially critical conditions are marked as minor or major hotspots and a global overview is provided together with short national level narratives.",2019-01-01 +30688513,Organophosphate Pesticide Metabolite Concentrations in Urine during Pregnancy and Offspring Nonverbal IQ at Age 6 Years.,"

Background

Susceptibility to organophosphate (OP) pesticide neurotoxicity may be greatest during the prenatal period; however, previous studies have produced mixed findings concerning in utero OP pesticide exposure and child cognition.

Objectives

Our objective was to determine whether maternal urinary concentrations of OP pesticide metabolites are inversely associated with child nonverbal IQ at 6 y of age and to examine potential effect measure modification by the PON1 gene.

Methods

Data came from 708 mother–child pairs participating in the Generation R Study. Maternal urine concentrations of six dialkylphosphates (DAPs), collected at [Formula: see text], 18–25, and [Formula: see text] of gestation, were determined. Child nonverbal IQ was measured at 6 y of age using the Mosaics and Categories subtests from the Snijders-Oomen Nonverbal Intelligence Test-Revised. PON1 was determined in cord blood for 474 infants. Multiple linear regression models were fit to estimate the DAP-IQ associations and PON1 interactions.

Results

Overall, associations between child nonverbal IQ and maternal DAP concentrations were small and imprecise, and these associations were inconsistent across urine sampling periods. Howover, for a 10-fold difference in total DAP concentration for the [Formula: see text] of gestation samples, adjusted child nonverbal IQ was 3.9 points lower (95% CI: [Formula: see text], [Formula: see text]). Heterogeneity in the DAP–IQ association by PON1 gene allele status was not observed ([Formula: see text]).

Conclusions

Consistent evidence of an association between higher maternal urinary DAP concentrations and lower child IQ scores at 6 y of age was not observed. There was some evidence for an inverse relation of child nonverbal IQ and late pregnancy urinary DAPs, but the estimated association was imprecise. https://doi.org/10.1289/EHP3024.",2019-01-01 +30620212,"Geographic, Demographic, and Temporal Variations in the Association between Heat Exposure and Hospitalization in Brazil: A Nationwide Study between 2000 and 2015.","

Background

Limited evidence is available regarding the association between heat exposure and morbidity in Brazil and how the effect of heat exposure on health outcomes may change over time.

Objectives

This study sought to quantify the geographic, demographic and temporal variations in the heat–hospitalization association in Brazil from 2000–2015.

Methods

Data on hospitalization and meteorological conditions were collected from 1,814 cities during the 2000–2015 hot seasons. Quasi-Poisson regression with constrained lag model was applied to examine city-specific estimates, which were then pooled at the regional and national levels using random-effect meta-analyses. Stratified analyses were performed by sex, 10 age groups, and 11 cause categories. Meta-regression was used to examine the temporal change in estimates of heat effect from 2000 to 2015.

Results

For every 5°C increase in daily mean temperature during the 2000–2015 hot seasons, the estimated risk of hospitalization over lag 0-7 d rose by 4.0% [95% confidence interval (CI): 3.7%, 4.3%] nationwide. Estimated 6.2% [95% empirical CI (eCI): 3.3%, 9.1%] of hospitalizations were attributable to heat exposure, equating to 132 cases (95% eCI: 69%, 192%) per 100,000 residents. The attributable rate was greatest in children [Formula: see text] and was highest for hospitalizations due to infectious and parasitic diseases. Women of reproductive age and those [Formula: see text] had higher heat burden than men. The attributable burden was greatest for cities in the central west and the inland of the northeast; lowest in the north and eastern coast. Over the 16-y period, the estimated heat effects declined insignificantly at the national level.

Conclusions

In Brazil's hot seasons, 6% of hospitalizations were estimated to be attributed to heat exposure. As there was no evidence indicating that thermal adaptation had occurred at the national level, the burden of hospitalization associated with heat exposure in Brazil is likely to increase in the context of global warming. https://doi.org/10.1289/EHP3889.",2019-01-01 +21695066,GENT: gene expression database of normal and tumor tissues.,"

Background

Some oncogenes such as ERBB2 and EGFR are over-expressed in only a subset of patients. Cancer outlier profile analysis is one of computational approaches to identify outliers in gene expression data. A database with a large sample size would be a great advantage when searching for genes over-expressed in only a subset of patients.

Description

GENT (Gene Expression database of Normal and Tumor tissues) is a web-accessible database that provides gene expression patterns across diverse human cancer and normal tissues. More than 40000 samples, profiled by Affymetrix U133A or U133plus2 platforms in many different laboratories across the world, were collected from public resources and combined into two large data sets, helping the identification of cancer outliers that are over-expressed in only a subset of patients. Gene expression patterns in nearly 1000 human cancer cell lines are also provided. In each tissue, users can retrieve gene expression patterns classified by more detailed clinical information.

Conclusions

The large samples size (>24300 for U133plus2 and >16400 for U133A) of GENT provides an advantage in identifying cancer outliers. A cancer cell line gene expression database is useful for target validation by in vitro experiment. We hope GENT will be a useful resource for cancer researchers in many stages from target discovery to target validation. GENT is available at http://medicalgenome.kribb.re.kr/GENT/ or http://genome.kobic.re.kr/GENT/.",2011-05-09 +26578594,SEA: a super-enhancer archive.,"Super-enhancers are large clusters of transcriptional enhancers regarded as having essential roles in driving the expression of genes that control cell identity during development and tumorigenesis. The construction of a genome-wide super-enhancer database is urgently needed to better understand super-enhancer-directed gene expression regulation for a given biology process. Here, we present a specifically designed web-accessible database, Super-Enhancer Archive (SEA, http://sea.edbc.org). SEA focuses on integrating super-enhancers in multiple species and annotating their potential roles in the regulation of cell identity gene expression. The current release of SEA incorporates 83 996 super-enhancers computationally or experimentally identified in 134 cell types/tissues/diseases, including human (75 439, three of which were experimentally identified), mouse (5879, five of which were experimentally identified), Drosophila melanogaster (1774) and Caenorhabditis elegans (904). To facilitate data extraction, SEA supports multiple search options, including species, genome location, gene name, cell type/tissue and super-enhancer name. The response provides detailed (epi)genetic information, incorporating cell type specificity, nearby genes, transcriptional factor binding sites, CRISPR/Cas9 target sites, evolutionary conservation, SNPs, H3K27ac, DNA methylation, gene expression and TF ChIP-seq data. Moreover, analytical tools and a genome browser were developed for users to explore super-enhancers and their roles in defining cell identity and disease processes in depth.",2015-11-17 +29497460,Ontology for Semantic Data Integration in the Domain of IT Benchmarking.,"A domain-specific ontology for IT benchmarking has been developed to bridge the gap between a systematic characterization of IT services and their data-based valuation. Since information is generally collected during a benchmark exercise using questionnaires on a broad range of topics, such as employee costs, software licensing costs, and quantities of hardware, it is commonly stored as natural language text; thus, this information is stored in an intrinsically unstructured form. Although these data form the basis for identifying potentials for IT cost reductions, neither a uniform description of any measured parameters nor the relationship between such parameters exists. Hence, this work proposes an ontology for the domain of IT benchmarking, available at https://w3id.org/bmontology. The design of this ontology is based on requirements mainly elicited from a domain analysis, which considers analyzing documents and interviews with representatives from Small- and Medium-Sized Enterprises and Information and Communications Technology companies over the last eight years. The development of the ontology and its main concepts is described in detail (i.e., the conceptualization of benchmarking events, questionnaires, IT services, indicators and their values) together with its alignment with the DOLCE-UltraLite foundational ontology.",2017-11-13 +26191084,BALL-SNP: combining genetic and structural information to identify candidate non-synonymous single nucleotide polymorphisms.,"

Background

High-throughput genetic testing is increasingly applied in clinics. Next-Generation Sequencing (NGS) data analysis however still remains a great challenge. The interpretation of pathogenicity of single variants or combinations of variants is crucial to provide accurate diagnostic information or guide therapies.

Methods

To facilitate the interpretation of variants and the selection of candidate non-synonymous polymorphisms (nsSNPs) for further clinical studies, we developed BALL-SNP. Starting from genetic variants in variant call format (VCF) files or tabular input, our tool, first, visualizes the three-dimensional (3D) structure of the respective proteins from the Protein Data Bank (PDB) and highlights mutated residues, automatically. Second, a hierarchical bottom up clustering on the nsSNPs within the 3D structure is performed to identify nsSNPs, which are close to each other. The modular and flexible implementation allows for straightforward integration of different databases for pathogenic and benign variants, but also enables the integration of pathogenicity prediction tools. The collected background information of all variants is presented below the 3D structure in an easily interpretable table format.

Results

First, we integrated different data resources into BALL-SNP, including databases containing information on genetic variants such as ClinVar or HUMSAVAR; third party tools that predict stability or pathogenicity in silico such as I-Mutant2.0; and additional information derived from the 3D structure such as a prediction of binding pockets. We then explored the applicability of BALL-SNP on the example of patients suffering from cardiomyopathies. Here, the analysis highlighted accumulation of variations in the genes JUP, VCL, and SMYD2.

Conclusion

Software solutions for analyzing high-throughput genomics data are important to support diagnosis and therapy selection. Our tool BALL-SNP, which is freely available at http://www.ccb.uni-saarland.de/BALL-SNP, combines genetic information with an easily interpretable and interactive, graphical representation of amino acid changes in proteins. Thereby relevant information from databases and computational tools is presented. Beyond this, proximity to functional sites or accumulations of mutations with a potential collective effect can be discovered.",2015-07-01 +29894480,Individual patient variability with the application of the kidney failure risk equation in advanced chronic kidney disease.,"The Kidney Failure Risk Equation (KFRE) predicts the need for dialysis or transplantation using age, sex, estimated glomerular filtration rate (eGFR), and urine albumin to creatinine ratio (ACR). The eGFR and ACR have known biological and analytical variability. We examined the effect of biological and analytical variability of eGFR and ACR on the 2-year KFRE predicted kidney failure probabilities using single measure and the average of repeat measures of simulated eGFR and ACR. Previously reported values for coefficient of variation (CV) for ACR and eGFR were used to calculate day to day variability. Variation was also examined with outpatient laboratory data from patients with an eGFR between 15 and 50 mL/min/1.72 m2. A web application was developed to calculate and model day to day variation in risk. The biological and analytical variability related to ACR and eGFR lead to variation in the predicted probability of kidney failure. A male patient age 50, ACR 30 mg/mmol and eGFR 25, had a day to day variation in risk of 7% (KFRE point estimate: 17%, variability range 14% to 21%). The addition of inter laboratory variation due to different instrumentation increased the variability to 9% (KFRE point estimate 17%, variability range 13% to 22%). Averaging of repeated measures of eGFR and ACR significantly decreased the variability (KFRE point estimate 17%, variability range 15% to 19%). These findings were consistent when using outpatient laboratory data which showed that most patients had a KFRE 2-year risk variability of ≤ 5% (79% of patients). Approximately 13% of patients had variability from 5-10% and 8% had variability > 10%. The mean age (SD) of this cohort was 64 (15) years, 36% were females, the mean (SD) eGFR was 32 (10) ml/min/1.73m2 and median (IQR) ACR was 22.7 (110). Biological and analytical variation intrinsic to the eGFR and ACR may lead to a substantial degree of variability that decreases with repeat measures. Use of a web application may help physicians and patients understand individual patient's risk variability and communicate risk (https://mccudden.shinyapps.io/kfre_app/). The web application allows the user to alter age, gender, eGFR, ACR, CV (for both eGFR and ACR) as well as units of measurements for ACR (g/mol versus mg/g).",2018-06-12 +23272737,"OPTIMAS-DW: a comprehensive transcriptomics, metabolomics, ionomics, proteomics and phenomics data resource for maize.","

Background

Maize is a major crop plant, grown for human and animal nutrition, as well as a renewable resource for bioenergy. When looking at the problems of limited fossil fuels, the growth of the world's population or the world's climate change, it is important to find ways to increase the yield and biomass of maize and to study how it reacts to specific abiotic and biotic stress situations. Within the OPTIMAS systems biology project maize plants were grown under a large set of controlled stress conditions, phenotypically characterised and plant material was harvested to analyse the effect of specific environmental conditions or developmental stages. Transcriptomic, metabolomic, ionomic and proteomic parameters were measured from the same plant material allowing the comparison of results across different omics domains. A data warehouse was developed to store experimental data as well as analysis results of the performed experiments.

Description

The OPTIMAS Data Warehouse (OPTIMAS-DW) is a comprehensive data collection for maize and integrates data from different data domains such as transcriptomics, metabolomics, ionomics, proteomics and phenomics. Within the OPTIMAS project, a 44K oligo chip was designed and annotated to describe the functions of the selected unigenes. Several treatment- and plant growth stage experiments were performed and measured data were filled into data templates and imported into the data warehouse by a Java based import tool. A web interface allows users to browse through all stored experiment data in OPTIMAS-DW including all data domains. Furthermore, the user can filter the data to extract information of particular interest. All data can be exported into different file formats for further data analysis and visualisation. The data analysis integrates data from different data domains and enables the user to find answers to different systems biology questions. Finally, maize specific pathway information is provided.

Conclusions

With OPTIMAS-DW a data warehouse for maize was established, which is able to handle different data domains, comprises several analysis results that will support researchers within their work and supports systems biological research in particular. The system is available at http://www.optimas-bioenergy.org/optimas_dw.",2012-12-29 +24970281,Computational prediction of disease microRNAs in domestic animals.,"

Background

The most important means of identifying diseases before symptoms appear is through the discovery of disease-associated biomarkers. Recently, microRNAs (miRNAs) have become highly useful biomarkers of infectious, genetic and metabolic diseases in human but they have not been well studied in domestic animals. It is probable that many of the animal homologs of human disease-associated miRNAs may be involved in domestic animal diseases. Here we describe a computational biology study in which human disease miRNAs were utilized to predict orthologous miRNAs in cow, chicken, pig, horse, and dog.

Results

We identified 287 human disease-associated miRNAs which had at least one 100% identical animal homolog. The 287 miRNAs were associated with 359 human diseases referenced in 2,863 Pubmed articles. Multiple sequence analysis indicated that over 60% of known horse mature miRNAs found perfect matches in human disease-associated miRNAs, followed by dog (50%). As expected, chicken had the least number of perfect matches (5%). Phylogenetic analysis of miRNA precursors indicated that 85% of human disease pre-miRNAs were highly conserved in animals, showing less than 5% nucleotide substitution rates over evolutionary time. As an example we demonstrated conservation of human hsa-miR-143-3p which is associated with type 2 diabetes and targets AKT1 gene which is highly conserved in pig, horse and dog. Functional analysis of AKT1 gene using Gene Ontology (GO) showed that it is involved in glucose homeostasis, positive regulation of glucose import, positive regulation of glycogen biosynthetic process, glucose transport and response to food.

Conclusions

This data provides the animal and veterinary research community with a resource to assist in generating hypothesis-driven research for discovering animal disease-related miRNA from their datasets and expedite development of prophylactic and disease-treatment strategies and also influence research efforts to identify novel disease models in large animals. Integrated data is available for download at http://agbase.hpc.msstate.edu/cgi-bin/animal_mirna.cgi.",2014-06-27 +30660516,Superior Risk Stratification With Coronary Computed Tomography Angiography Using a Comprehensive Atherosclerotic Risk Score.,"

Objectives

This study was designed to assess the prognostic value of a new comprehensive coronary computed tomography angiography (CTA) score compared with the stenosis severity component of the Coronary Artery Disease-Reporting and Data System (CAD-RADS).

Background

Current risk assessment with coronary CTA is mainly focused on maximal stenosis severity. Integration of plaque extent, location, and composition in a comprehensive model may improve risk stratification.

Methods

A total of 2,134 patients with suspected but without known CAD were included. The predictive value of the comprehensive CTA score (ranging from 0 to 42 and divided into 3 groups: 0 to 5, 6 to 20, and >20) was compared with the CAD-RADS combined into 3 groups (0% to 30%, 30% to 70% and ≥70% stenosis). Its predictive performance was internally and externally validated (using the 5-year follow-up dataset of the CONFIRM [Coronary CT Angiography Evaluation for Clinical Outcomes: An International Multicenter Registry], n = 1,971).

Results

The mean age of patients was 55 ± 13 years, mean follow-up 3.6 ± 2.8 years, and 130 events (myocardial infarction or death) occurred. The new, comprehensive CTA score showed strong and independent predictive value using the Cox proportional hazard analysis. A model including clinical variables plus comprehensive CTA score showed better discrimination of events compared with a model consisting of clinical variables plus CAD-RADS (0.768 vs. 0.742, p = 0.001). Also, the comprehensive CTA score correctly reclassified a significant proportion of patients compared with the CAD-RADS (net reclassification improvement 12.4%, p < 0.001). Good predictive accuracy was reproduced in the external validation cohort.

Conclusions

The new comprehensive CTA score provides better discrimination and reclassification of events compared with the CAD-RADS score based on stenosis severity only. The score retained similar prognostic accuracy when externally validated. Anatomic risk scores can be improved with the addition of extent, location, and compositional measures of atherosclerotic plaque. (Comprehensive CTA risk score calculator is available at: http://18.224.14.19/calcApp/).",2019-01-16 +29482513,Risk of chronic kidney disease in young adults with impaired glucose tolerance/impaired fasting glucose: a retrospective cohort study using electronic primary care records.,"

Background

The risk of chronic kidney disease (CKD) is known to be elevated in patients with diabetes mellitus but the risk of young adults aged 18 to 40 years with impaired glucose tolerance/impaired fasting glucose (IGT/IFG) developing CKD is not well characterised. Furthermore, progression of IGT/IFG to diabetes and subsequent CKD development is not well understood.

Methods

A retrospective cohort study was undertaken using The Health Improvement Network (THIN) database, a large dataset of electronic patient records. THIN database is jointly managed by IMS Health Real World Evidence Solution ( http://www.epic-uk.org/index.html ) and In Practice System (InPs). Cases were aged 18 to 40, with a diagnosis of IGT/IFG and registered at a practice contributing to THIN between 2000 and 2015. The study population consisted of 40,092 patients, including 21,454 (53.5%) female and 18,638 (46.5%) male. The median follow-up was approximately 2 years. The outcome was a diagnosis of CKD determined from either clinical coding or laboratory results. For the primary analysis the unadjusted and adjusted relative risk of CKD in IGT/IFG was compared to age, sex and practice matched controls with normoglycaemia. For the secondary analysis we compared the incidence of CKD before to after a diagnosis of type 2 diabetes (T2DM) in the IGT/IFG study cohort.

Results

The Incidence Rate Ratio (IRR) for CKD for IGT/IFG compared to normoglycaemia was 4.0 [95% confidence interval (CI), 3.2 to 5.1, P < 0.001]. The adjusted IRR was 2.6 [95% CI, 2.0 to 3.4, P < 0.001]. The unadjusted IRR was 8.8 [95% CI, 7.7 to 10.0, P < 0.001] after IGT/IFG patients had developed T2DM and the adjusted IRR was 6.3 [95% CI, 5.5 to 7.2, P < 0.001].

Conclusion

Our results show that young IGT/IFG subjects are also at higher risk of developing CKD. This risk is modulated by the degree of baseline renal function and glucose tolerance, being higher in those developing T2DM.",2018-02-26 +25599550,DIA-Umpire: comprehensive computational framework for data-independent acquisition proteomics.,"As a result of recent improvements in mass spectrometry (MS), there is increased interest in data-independent acquisition (DIA) strategies in which all peptides are systematically fragmented using wide mass-isolation windows ('multiplex fragmentation'). DIA-Umpire (http://diaumpire.sourceforge.net/), a comprehensive computational workflow and open-source software for DIA data, detects precursor and fragment chromatographic features and assembles them into pseudo-tandem MS spectra. These spectra can be identified with conventional database-searching and protein-inference tools, allowing sensitive, untargeted analysis of DIA data without the need for a spectral library. Quantification is done with both precursor- and fragment-ion intensities. Furthermore, DIA-Umpire enables targeted extraction of quantitative information based on peptides initially identified in only a subset of the samples, resulting in more consistent quantification across multiple samples. We demonstrated the performance of the method with control samples of varying complexity and publicly available glycoproteomics and affinity purification-MS data.",2015-01-19 +29490679,Developmental transcriptomics of the brittle star Amphiura filiformis reveals gene regulatory network rewiring in echinoderm larval skeleton evolution.,"

Background

Amongst the echinoderms the class Ophiuroidea is of particular interest for its phylogenetic position, ecological importance and developmental and regenerative biology. However, compared to other echinoderms, notably echinoids (sea urchins), relatively little is known about developmental changes in gene expression in ophiuroids. To address this issue, we have generated and assembled a large RNAseq data set of four key stages of development in the brittle star Amphiura filiformis and a de novo reference transcriptome of comparable quality to that of a model echinoderm-the sea urchin Strongylocentrotus purpuratus. Furthermore, we provide access to the new data via a web interface: http://www.echinonet.eu/shiny/Amphiura_filiformis/ .

Results

We have identified highly conserved genes associated with the development of a biomineralised skeleton. We also identify important class-specific characters, including the independent duplication of the msp130 class of genes in different echinoderm classes and the unique occurrence of spicule matrix (sm) genes in echinoids. Using a new quantification pipeline for our de novo transcriptome, validated with other methodologies, we find major differences between brittle stars and sea urchins in the temporal expression of many transcription factor genes. This divergence in developmental regulatory states is more evident in early stages of development when cell specification begins, rather than when cells initiate differentiation.

Conclusions

Our findings indicate that there has been a high degree of gene regulatory network rewiring and clade-specific gene duplication, supporting the hypothesis of a convergent evolution of larval skeleton development in echinoderms.",2018-02-28 +,Next-Generation Genomics Facility at C-CAMP: Accelerating Genomic Research in India,"Next-Generation Sequencing (NGS; http://www.genome.gov/12513162) is a recent life-sciences technological revolution that allows scientists to decode genomes or transcriptomes at a much faster rate with a lower cost. Genomic-based studies are in a relatively slow pace in India due to the non-availability of genomics experts, trained personnel and dedicated service providers. Using NGS there is a lot of potential to study India's national diversity (of all kinds). We at the Centre for Cellular and Molecular Platforms (C-CAMP) have launched the Next Generation Genomics Facility (NGGF) to provide genomics service to scientists, to train researchers and also work on national and international genomic projects. We have HiSeq1000 from Illumina and GS-FLX Plus from Roche454. The long reads from GS FLX Plus, and high sequence depth from HiSeq1000, are the best and ideal hybrid approaches for de novo and re-sequencing of genomes and transcriptomes. At our facility, we have sequenced around 70 different organisms comprising of more than 388 genomes and 615 transcriptomes – prokaryotes and eukaryotes (fungi, plants and animals). In addition we have optimized other unique applications such as small RNA (miRNA, siRNA etc), long Mate-pair sequencing (2 to 20 Kb), Coding sequences (Exome), Methylome (ChIP-Seq), Restriction Mapping (RAD-Seq), Human Leukocyte Antigen (HLA) typing, mixed genomes (metagenomes) and target amplicons, etc. Translating DNA sequence data from NGS sequencer into meaningful information is an important exercise. Under NGGF, we have bioinformatics experts and high-end computing resources to dissect NGS data such as genome assembly and annotation, gene expression, target enrichment, variant calling (SSR or SNP), comparative analysis etc. Our services (sequencing and bioinformatics) have been utilized by more than 45 organizations (academia and industry) both within India and outside, resulting several publications in peer-reviewed journals and several genomic/transcriptomic data is available at NCBI.",2014-05-01 +21789500,NeuroNames: an ontology for the BrainInfo portal to neuroscience on the web.,"BrainInfo ( http://braininfo.org ) is a growing portal to neuroscientific information on the Web. It is indexed by NeuroNames, an ontology designed to compensate for ambiguities in neuroanatomical nomenclature. The 20-year old ontology continues to evolve toward the ideal of recognizing all names of neuroanatomical entities and accommodating all structural concepts about which neuroscientists communicate, including multiple concepts of entities for which neuroanatomists have yet to determine the best or 'true' conceptualization. To make the definitions of structural concepts unambiguous and terminologically consistent we created a 'default vocabulary' of unique structure names selected from existing terminology. We selected standard names by criteria designed to maximize practicality for use in verbal communication as well as computerized knowledge management. The ontology of NeuroNames accommodates synonyms and homonyms of the standard terms in many languages. It defines complex structures as models composed of primary structures, which are defined in unambiguous operational terms. NeuroNames currently relates more than 16,000 names in eight languages to some 2,500 neuroanatomical concepts. The ontology is maintained in a relational database with three core tables: Names, Concepts and Models. BrainInfo uses NeuroNames to index information by structure, to interpret users' queries and to clarify terminology on remote web pages. NeuroNames is a resource vocabulary of the NLM's Unified Medical Language System (UMLS, 2011) and the basis for the brain regions component of NIFSTD (NeuroLex, 2011). The current version has been downloaded to hundreds of laboratories for indexing data and linking to BrainInfo, which attracts some 400 visitors/day, downloading 2,000 pages/day.",2012-01-01 +27727093,[Lower urinary tract symptoms and pelvic floor dysfunction in renal transplant candidates and recipients].,"

Objectives

To describe lower urinary tract symptoms (LUTS) and their management in renal transplant candidates and recipients.

Material and methods

Relevant publications were identified through Medline (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) database using the following keywords, alone or in association: ""transplantation"", ""prostate hyperplasia"", ""transurethral resection of prostate"", ""urinary incontinence"", ""LUTS"", ""pelvic floor dysfunction"". Articles were selected according to methods, language of publication and relevance. The reference lists were used to identify additional historical studies of interest. Both prospective and retrospective series, in French and English, as well as review articles and case reports were selected. In addition, French national transplant and health agencies (http://www.agence-biomedecine.fr and http://www.has-sante.fr) databases were screened using identical keywords. A total of 991 articles were analyzed and after careful selection, 47 publications were eligible for our review.

Results

Reduction of bladder capacity, due to dialysis and anuria, is correlated with dialysis duration. This reduction is reversible after renal transplantation and does not seem to put renal transplant recipients at risk for medical complications. Transplant procedure generally allows restoration of bladder maximal output, normal bladder capacity and compliance. Medical treatment of LUTS related to prostate hyperplasia (BPH) includes alpha-blockers and finasteride. Silodosin and dutasteride have not been evaluated in that setting. Antimuscarinics may be used with caution, and favor the use of solifenacin at 5mg per day. Surgical treatment of BPH requires a preserved urine output, otherwise must be delayed after transplantation; it may thus be performed safely in the early postoperative course. Botulinum toxin injections and surgical treatment of stress incontinence and prolapse are barely reported in this population.

Conclusion

Precise assessment and optimal management of LUTS in renal transplant candidates and recipients are critical to improve quality of life and to preserve allotransplant function. Literature data lack evidence to propose robust recommendations. However, knowledge of reported specificities in this peculiar setting is mandatory for urologists to provide patients with finest options and optimal treatment timing.",2016-10-07 +21880703,AluHunter: a database of potentially polymorphic Alu insertions for use in primate phylogeny and population genetics.,

Summary

AluHunter is a database of taxon-specific primate Alu elements for use in phylogeny and population genetics. The software automatically isolates potentially polymorphic Alu insertions in sequences submitted to GenBank by screening the elements against reference genomes. The resultant database of variable markers is a valuable resource for researchers interested in characterizing Alu elements in their primate taxon of interest.

Availability and implementation

The AluHunter database can be accessed at http://www.aluhunter.com.

Contact

cmb433@nyu.edu.,2011-08-31 +30054217,Systematic identification and annotation of multiple-variant compound effects at transcription factor binding sites in human genome.,"Understanding the functional effects of genetic variants is crucial in modern genomics and genetics. Transcription factor binding sites (TFBSs) are one of the most important cis-regulatory elements. While multiple tools have been developed to assess functional effects of genetic variants at TFBSs, they usually assume that each variant works in isolation and neglect the potential ""interference"" among multiple variants within the same TFBS. In this study, we presented COPE-TFBS (Context-Oriented Predictor for variant Effect on Transcription Factor Binding Site), a novel method that considers sequence context to accurately predict variant effects on TFBSs. We systematically re-analyzed the sequencing data from both the 1000 Genomes Project and the Genotype-Tissue Expression (GTEx) Project via COPE-TFBS, and identified numbers of novel TFBSs, transformed TFBSs and discordantly annotated TFBSs resulting from multiple variants, further highlighting the necessity of sequence context in accurately annotating genetic variants. COPE-TFBS is freely available for academic use at http://cope.cbi.pku.edu.cn/.",2018-07-07 +30139738,"""Z4"" Complex Member Fusions in NUT Carcinoma: Implications for a Novel Oncogenic Mechanism.","Nuclear protein in testis (NUT) carcinoma (NC) is a rare, distinctly aggressive subtype of squamous carcinoma defined by the presence of NUT-fusion oncogenes resulting from chromosomal translocation. In most cases, the NUT gene (NUTM1) is fused to bromodomain containing 4 (BRD4) forming the BRD4-NUT oncogene. Here, a novel fusion partner to NUT was discovered using next-generation sequencing and FISH from a young patient with an undifferentiated malignant round cell tumor. Interestingly, the NUT fusion identified involved ZNF592, a zinc finger containing protein, which was previously identified as a component of the BRD4-NUT complex. In BRD4-NUT-expressing NC cells, wild-type ZNF592 and other associated ""Z4"" complex proteins, including ZNF532 and ZMYND8, colocalize with BRD4-NUT in characteristic nuclear foci. Furthermore, ectopic expression of BRD4-NUT in a non-NC cell line induces sequestration of Z4 factors to BRD4-NUT foci. Finally, the data demonstrate the specific dependency of NC cells on Z4 modules, ZNF532 and ZNF592. IMPLICATIONS: This study establishes the oncogenic role of Z4 factors in NC, offering potential new targeted therapeutic strategies in this incurable cancer.Visual Overview: http://mcr.aacrjournals.org/content/molcanres/16/12/1826/F1.large.jpg.",2018-08-23 +25990735,"MyProteinNet: build up-to-date protein interaction networks for organisms, tissues and user-defined contexts.","The identification of the molecular pathways active in specific contexts, such as disease states or drug responses, often requires an extensive view of the potential interactions between a subset of proteins. This view is not easily obtained: it requires the integration of context-specific protein list or expression data with up-to-date data of protein interactions that are typically spread across multiple databases. The MyProteinNet web server allows users to easily create such context-sensitive protein interaction networks. Users can automatically gather and consolidate data from up to 11 different databases to create a generic protein interaction network (interactome). They can score the interactions based on reliability and filter them by user-defined contexts including molecular expression and protein annotation. The output of MyProteinNet includes the generic and filtered interactome files, together with a summary of their network attributes. MyProteinNet is particularly geared toward building human tissue interactomes, by maintaining tissue expression profiles from multiple resources. The ability of MyProteinNet to facilitate the construction of up-to-date, context-specific interactomes and its applicability to 11 different organisms and to tens of human tissues, make it a powerful tool in meaningful analysis of protein networks. MyProteinNet is available at http://netbio.bgu.ac.il/myproteinnet.",2015-05-18 +29909982,A Single-Cell Transcriptome Atlas of the Aging Drosophila Brain.,"The diversity of cell types and regulatory states in the brain, and how these change during aging, remains largely unknown. We present a single-cell transcriptome atlas of the entire adult Drosophila melanogaster brain sampled across its lifespan. Cell clustering identified 87 initial cell clusters that are further subclustered and validated by targeted cell-sorting. Our data show high granularity and identify a wide range of cell types. Gene network analyses using SCENIC revealed regulatory heterogeneity linked to energy consumption. During aging, RNA content declines exponentially without affecting neuronal identity in old brains. This single-cell brain atlas covers nearly all cells in the normal brain and provides the tools to study cellular diversity alongside other Drosophila and mammalian single-cell datasets in our unique single-cell analysis platform: SCope (http://scope.aertslab.org). These results, together with SCope, allow comprehensive exploration of all transcriptional states of an entire aging brain.",2018-06-18 +29385402,G2S: a web-service for annotating genomic variants on 3D protein structures.,"Motivation:Accurately mapping and annotating genomic locations on 3D protein structures is a key step in structure-based analysis of genomic variants detected by recent large-scale sequencing efforts. There are several mapping resources currently available, but none of them provides a web API (Application Programming Interface) that supports programmatic access. Results:We present G2S, a real-time web API that provides automated mapping of genomic variants on 3D protein structures. G2S can align genomic locations of variants, protein locations, or protein sequences to protein structures and retrieve the mapped residues from structures. G2S API uses REST-inspired design and it can be used by various clients such as web browsers, command terminals, programming languages and other bioinformatics tools for bringing 3D structures into genomic variant analysis. Availability and implementation:The webserver and source codes are freely available at https://g2s.genomenexus.org. Contact:g2s@genomenexus.org. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +27829360,Disease gene prioritization by integrating tissue-specific molecular networks using a robust multi-network model.,"

Background

Accurately prioritizing candidate disease genes is an important and challenging problem. Various network-based methods have been developed to predict potential disease genes by utilizing the disease similarity network and molecular networks such as protein interaction or gene co-expression networks. Although successful, a common limitation of the existing methods is that they assume all diseases share the same molecular network and a single generic molecular network is used to predict candidate genes for all diseases. However, different diseases tend to manifest in different tissues, and the molecular networks in different tissues are usually different. An ideal method should be able to incorporate tissue-specific molecular networks for different diseases.

Results

In this paper, we develop a robust and flexible method to integrate tissue-specific molecular networks for disease gene prioritization. Our method allows each disease to have its own tissue-specific network(s). We formulate the problem of candidate gene prioritization as an optimization problem based on network propagation. When there are multiple tissue-specific networks available for a disease, our method can automatically infer the relative importance of each tissue-specific network. Thus it is robust to the noisy and incomplete network data. To solve the optimization problem, we develop fast algorithms which have linear time complexities in the number of nodes in the molecular networks. We also provide rigorous theoretical foundations for our algorithms in terms of their optimality and convergence properties. Extensive experimental results show that our method can significantly improve the accuracy of candidate gene prioritization compared with the state-of-the-art methods.

Conclusions

In our experiments, we compare our methods with 7 popular network-based disease gene prioritization algorithms on diseases from Online Mendelian Inheritance in Man (OMIM) database. The experimental results demonstrate that our methods recover true associations more accurately than other methods in terms of AUC values, and the performance differences are significant (with paired t-test p-values less than 0.05). This validates the importance to integrate tissue-specific molecular networks for studying disease gene prioritization and show the superiority of our network models and ranking algorithms toward this purpose. The source code and datasets are available at http://nijingchao.github.io/CRstar/ .",2016-11-10 +29724079,Virtual Fly Brain - Using OWL to support the mapping and genetic dissection of the Drosophila brain.,"A massive effort is underway to map the structure of the Drosophila nervous system and to genetically dissect its function. Virtual Fly Brain (VFB; http://www.virtualflybrain.org) is a popular, OWL-based resource providing neuroinformatics support for this work. It provides: curated descriptions of brain regions and neurons; queries for neurons based on their relationship to gross neuroanatomy; and queries for reagents based on their expression patterns. Query results are enriched by OWL axiomatisation allowing basic mereological reasoning. To keep reasoning fast and scalable, VFB confines expressiveness to the EL profile of OWL. As a result, VFB does not provide queries involving negation, despite there being both demand and sufficient information to support them. Recent developments in reasoning technology may make more expressive queries practical. Here we present design patterns to support queries with negation that are compatible with the mereological reasoning used in VFB.",2014-10-01 +29726907,MICAN-SQ: a sequential protein structure alignment program that is applicable to monomers and all types of oligomers.,"Motivation:Protein structure alignment is a significant tool to understand evolutionary processes and physicochemical properties of proteins. Important targets of structure alignment are not only monomeric but also oligomeric proteins that sometimes include domain swapping or fusions. Although various protein structural alignment programs have been developed, no method is applicable to any protein pair regardless of the number of chain components and oligomeric states with retaining sequential restrictions: structurally equivalent regions must be aligned in the same order along protein sequences. Results:In this paper, we introduced a new sequential protein structural alignment algorithm MICAN-SQ, which is applicable to protein structures in all oligomeric states. In particular, MICAN-SQ allows the complicated structural alignments of proteins with domain swapping or fusion regions. To validate MICAN-SQ, alignment accuracies were evaluated using curated alignments of monomers and examples of domain swapping, and compared with those of pre-existing protein structural alignment programs. The results of this study show that MICAN-SQ has superior accuracy and robustness in comparison with previous programs and offers limited computational times. We also demonstrate that MICAN-SQ correctly aligns very large complexes and fused proteins. The present computations warrant the consideration of MICAN-SQ for studies of evolutionary and physicochemical properties of monomeric structures and all oligomer types. Availability and implementation:The MICAN program was implemented in C. The source code and executable file can be freely downloaded from http://www.tbp.cse.nagoya-u.ac.jp/MICAN/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +21619703,"OntoCAT--simple ontology search and integration in Java, R and REST/JavaScript.","

Background

Ontologies have become an essential asset in the bioinformatics toolbox and a number of ontology access resources are now available, for example, the EBI Ontology Lookup Service (OLS) and the NCBO BioPortal. However, these resources differ substantially in mode, ease of access, and ontology content. This makes it relatively difficult to access each ontology source separately, map their contents to research data, and much of this effort is being replicated across different research groups.

Results

OntoCAT provides a seamless programming interface to query heterogeneous ontology resources including OLS and BioPortal, as well as user-specified local OWL and OBO files. Each resource is wrapped behind easy to learn Java, Bioconductor/R and REST web service commands enabling reuse and integration of ontology software efforts despite variation in technologies. It is also available as a stand-alone MOLGENIS database and a Google App Engine application.

Conclusions

OntoCAT provides a robust, configurable solution for accessing ontology terms specified locally and from remote services, is available as a stand-alone tool and has been tested thoroughly in the ArrayExpress, MOLGENIS, EFO and Gen2Phen phenotype use cases.

Availability

http://www.ontocat.org.",2011-05-29 +30589519,"Can waist circumference provide a new ""third"" dimension to BMI when predicting percentage body fat in children? Insights using allometric modelling.","

Introduction

Body mass index (BMI) is often criticized for not being able to distinguish between lean and fat tissue. Waist circumference (WC), adjusted for stature, is proposed as an alternative weight status index, as it is more sensitive to changes in central adiposity.

Purpose

The purpose of the study is to combine the three dimensions of height, mass, and WC to provide a simple, meaningful, and more accurate index associated with percentage body fat (BF%).

Methods

We employed a four independent sample design. Sample 1 consisted of 551 children (320 boys) (mean ± SD of age = 7.2 ± 2.0 years), recruited from London, UK. Samples 2, 3, and 4 consisted of 5387 children (2649 boys) aged 7 to 17 years recruited from schools in Portugal. Allometric modelling was used to identify the most effective anthropometric index associated with BF%. The data from samples 2, 3, and 4 were used to confirm and cross-validate the model derived in sample 1.

Results

The allometric models from all four samples identified a positive mass exponent and a negative height exponent that was approximately twice that of the mass exponent and a waist circumference exponent that was approximately half the mass exponent. Consequently, the body shape index most strongly associated with BF% was BMI WC . The WC component of the new index can simply be interpreted as a WC ""weighting"" of the traditional BMI.

Conclusions

Compared with using BMI and WC in isolation, BMI WC could provide a more effective and equally noninvasive proxy for BF% in children that can be used in public and community health settings.",2018-12-27 +29028927,Tumor origin detection with tissue-specific miRNA and DNA methylation markers.,"Motivation:A clear identification of the primary site of tumor is of great importance to the next targeted site-specific treatments and could efficiently improve patient's overall survival. Even though many classifiers based on gene expression had been proposed to predict the tumor primary, only a few studies focus on using DNA methylation (DNAm) profiles to develop classifiers, and none of them compares the performance of classifiers based on different profiles. Results:We introduced novel selection strategies to identify highly tissue-specific CpG sites and then used the random forest approach to construct the classifiers to predict the origin of tumors. We also compared the prediction performance by applying similar strategy on miRNA expression profiles. Our analysis indicated that these classifiers had an accuracy of 96.05% (Maximum-Relevance-Maximum-Distance: 90.02-99.99%) or 95.31% (principal component analysis: 79.82-99.91%) on independent DNAm datasets, and an overall accuracy of 91.30% (range 79.33-98.74%) on independent miRNA test sets for predicting tumor origin. This suggests that our feature selection methods are very effective to identify tissue-specific biomarkers and the classifiers we developed can efficiently predict the origin of tumors. We also developed a user-friendly webserver that helps users to predict the tumor origin by uploading miRNA expression or DNAm profile of their interests. Availability and implementation:The webserver, and relative data, code are accessible at http://server.malab.cn/MMCOP/. Contact:zouquan@nclab.net or a.teschendorff@ucl.ac.uk. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-02-01 +28161499,Orthogonal self-guided similarity preserving projection for classification and clustering.,"A suitable feature representation can faithfully preserve the intrinsic structure of data. However, traditional dimensionality reduction (DR) methods commonly use the original input features to define the intrinsic structure, which makes the estimated intrinsic structure unreliable since redundant or noisy features may exist in the original input features. Thus a dilemma is that (1) one needs the most suitable feature representation to define the intrinsic structure of data and (2) one should use the proper intrinsic structure of data to perform feature extraction. To address the problem, in this paper we propose a unified learning framework to simultaneously obtain the optimal feature representation and intrinsic structure of data. The structure is learned from the results of feature learning, and the features are learned to preserve the refined structure of data. By leveraging the interactions between the process of determining the most suitable feature representation and intrinsic structure of data, we can capture accurate structure and obtain the optimal feature representation of data. Experimental results demonstrate that our method outperforms state-of-the-art methods in DR and subspace clustering. The code of the proposed method is available at ""http://www.yongxu.org/lunwen.html "".",2017-01-16 +24931982,GRASP: analysis of genotype-phenotype results from 1390 genome-wide association studies and corresponding open access database.,"

Summary

We created a deeply extracted and annotated database of genome-wide association studies (GWAS) results. GRASP v1.0 contains >6.2 million SNP-phenotype association from among 1390 GWAS studies. We re-annotated GWAS results with 16 annotation sources including some rarely compared to GWAS results (e.g. RNAediting sites, lincRNAs, PTMs).

Motivation

To create a high-quality resource to facilitate further use and interpretation of human GWAS results in order to address important scientific questions.

Results

GWAS have grown exponentially, with increases in sample sizes and markers tested, and continuing bias toward European ancestry samples. GRASP contains >100 000 phenotypes, roughly: eQTLs (71.5%), metabolite QTLs (21.2%), methylation QTLs (4.4%) and diseases, biomarkers and other traits (2.8%). cis-eQTLs, meQTLs, mQTLs and MHC region SNPs are highly enriched among significant results. After removing these categories, GRASP still contains a greater proportion of studies and results than comparable GWAS catalogs. Cardiovascular disease and related risk factors pre-dominate remaining GWAS results, followed by immunological, neurological and cancer traits. Significant results in GWAS display a highly gene-centric tendency. Sex chromosome X (OR = 0.18[0.16-0.20]) and Y (OR = 0.003[0.001-0.01]) genes are depleted for GWAS results. Gene length is correlated with GWAS results at nominal significance (P ≤ 0.05) levels. We show this gene-length correlation decays at increasingly more stringent P-value thresholds. Potential pleotropic genes and SNPs enriched for multi-phenotype association in GWAS are identified. However, we note possible population stratification at some of these loci. Finally, via re-annotation we identify compelling functional hypotheses at GWAS loci, in some cases unrealized in studies to date.

Conclusion

Pooling summary-level GWAS results and re-annotating with bioinformatics predictions and molecular features provides a good platform for new insights.

Availability

The GRASP database is available at http://apps.nhlbi.nih.gov/grasp.",2014-06-01 +26478709,The Register of Antarctic Marine Species (RAMS): a ten-year appraisal.,"The Register of Antarctic Marine Species (RAMS) is a marine species database that manages an authoritative taxonomic list of species occurring in the Southern Ocean. RAMS links with several other initiatives managing biogeographic or genomics information. The current paper aims to briefly present RAMS and provides an updated snapshot of its contents, in the form of a DarwinCore checklist (available through http://ipt.biodiversity.aq/resource.do?r=rams) and illustrative barplots. Moreover, this article presents a ten year appraisal (since the creation of RAMS). This appraisal first focuses on RAMS bibliometrics. We observed that RAMS was cited (Google Scholar) in 50 distinct publications among which 32 were peer-reviewed in 18 different journals. Three journals (Antarctic Science, Polar Biology, ZooKeys) represent almost 40% of these peer-review publications. The second appraisal focuses on the evolution of new RAMS records. We observed an important decrease in data additions since 2011. As a case study, we focused on an original dataset for a specific group (Asteroidea, Echinodermata). It appears that around one hundred species of asteroids are lacking in RAMS despite the relatively high availability of these data. This suggests that the users' community (or collaborative projects such as AquaRES) could be helpful in order to maintain the RAMS database over the long term.",2015-09-30 +28536240,"Reply to ""Tolerogenic insulin peptide therapy precipitates type 1 diabetes"".","In this issue of JEM, Bergman et al. (https://doi.org/10.1084/jem.20160471) challenge the data published in our previous JEM paper on the preventive effect of tolerogenic vaccination with a strong agonist insulin mimetope in type 1 diabetes. Here, we provide a response to these data and suggest that appropriate subimmunogenic conditions are required to induce Foxp3+ regulatory T cell conversion.",2017-05-23 +28453678,Chainy: an universal tool for standardized relative quantification in real-time PCR.,"

Summary

Chainy is a cross-platform web tool providing systematic pipelines and steady criteria to process real-time PCR data, including the calculation of efficiencies from raw data by kinetic methods, evaluation of the suitability of multiple references, standardized normalization using one or more references, and group-wise relative quantification statistical testing. We illustrate the utility of Chainy for differential expression and chromatin immunoprecipitation enrichment (ChIP-QPCR) analysis.

Availability and implementation

Chainy is open source and freely available at http://maplab.cat/chainy.

Contact

imallona@igtp.cat.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +28459977,GSuite HyperBrowser: integrative analysis of dataset collections across the genome and epigenome.,"

Background

Recent large-scale undertakings such as ENCODE and Roadmap Epigenomics have generated experimental data mapped to the human reference genome (as genomic tracks) representing a variety of functional elements across a large number of cell types. Despite the high potential value of these publicly available data for a broad variety of investigations, little attention has been given to the analytical methodology necessary for their widespread utilisation.

Findings

We here present a first principled treatment of the analysis of collections of genomic tracks. We have developed novel computational and statistical methodology to permit comparative and confirmatory analyses across multiple and disparate data sources. We delineate a set of generic questions that are useful across a broad range of investigations and discuss the implications of choosing different statistical measures and null models. Examples include contrasting analyses across different tissues or diseases. The methodology has been implemented in a comprehensive open-source software system, the GSuite HyperBrowser. To make the functionality accessible to biologists, and to facilitate reproducible analysis, we have also developed a web-based interface providing an expertly guided and customizable way of utilizing the methodology. With this system, many novel biological questions can flexibly be posed and rapidly answered.

Conclusions

Through a combination of streamlined data acquisition, interoperable representation of dataset collections, and customizable statistical analysis with guided setup and interpretation, the GSuite HyperBrowser represents a first comprehensive solution for integrative analysis of track collections across the genome and epigenome. The software is available at: https://hyperbrowser.uio.no.",2017-07-01 +29472600,PANDA: Protein function prediction using domain architecture and affinity propagation.,"We developed PANDA (Propagation of Affinity and Domain Architecture) to predict protein functions in the format of Gene Ontology (GO) terms. PANDA at first executes profile-profile alignment algorithm to search against PfamA, KOG, COG, and SwissProt databases, and then launches PSI-BLAST against UniProt for homologue search. PANDA integrates a domain architecture inference algorithm based on the Bayesian statistics that calculates the probability of having a GO term. All the candidate GO terms are pooled and filtered based on Z-score. After that, the remaining GO terms are clustered using an affinity propagation algorithm based on the GO directed acyclic graph, followed by a second round of filtering on the clusters of GO terms. We benchmarked the performance of all the baseline predictors PANDA integrates and also for every pooling and filtering step of PANDA. It can be found that PANDA achieves better performances in terms of area under the curve for precision and recall compared to the baseline predictors. PANDA can be accessed from http://dna.cs.miami.edu/PANDA/ .",2018-02-22 +26229293,Computer aided gene mining for gingerol biosynthesis.,"Inspite of the large body of genomic data obtained from the transcriptome of Zingiber officinale, very few studies have focused on the identification and characterization of miRNAs in gingerol biosynthesis. Zingiber officinale transcriptome was analyzed using EST dataset (38169 total) deposited in public domains. In this paper computational functional annotation of the available ESTs and identification of genes which play a significant role in gingerol biosynthesis are described. Zingiber officinale transcriptome was analyzed using EST dataset (38169 total) from ncbi. ESTs were clustered and assembled, resulting in 8624 contigs and 8821 singletons. Assembled dataset was then submitted to the EST functional annotation workflow including blast, gene ontology (go) analysis, and pathway enrichment by kyoto encyclopedia of genes and genomes (kegg) and interproscan. The unigene datasets were further exploited to identify simple sequence repeats that enable linkage mapping. A total of 409 simple sequence repeats were identified from the contigs. Furthermore we examined the existence of novel miRNAs from the ESTs in rhizome, root and leaf tissues. EST analysis revealed the presence of single hypothetical miRNA in rhizome tissue. The hypothetical miRNA is warranted to play an important role in controlling genes involved in gingerol biosynthesis and hence demands experimental validation. The assembly and associated information of transcriptome data provides a comprehensive functional and evolutionary characterization of genomics of Zingiber officinale. As an effort to make the genomic and transcriptomic data widely available to the public domain, the results were integrated into a web-based Ginger EST database which is freely accessible at http://www.kaubic.in/gingerest/.",2015-06-30 +25414328,INFRAFRONTIER--providing mutant mouse resources as research tools for the international scientific community.,"The laboratory mouse is a key model organism to investigate mechanism and therapeutics of human disease. The number of targeted genetic mouse models of disease is growing rapidly due to high-throughput production strategies employed by the International Mouse Phenotyping Consortium (IMPC) and the development of new, more efficient genome engineering techniques such as CRISPR based systems. We have previously described the European Mouse Mutant Archive (EMMA) resource and how this international infrastructure provides archiving and distribution worldwide for mutant mouse strains. EMMA has since evolved into INFRAFRONTIER (http://www.infrafrontier.eu), the pan-European research infrastructure for the systemic phenotyping, archiving and distribution of mouse disease models. Here we describe new features including improved search for mouse strains, support for new embryonic stem cell resources, access to training materials via a comprehensive knowledgebase and the promotion of innovative analytical and diagnostic techniques.",2014-11-20 +29464735,molUP: A VMD plugin to handle QM and ONIOM calculations using the gaussian software.,"The notable advances obtained by computational (bio)chemistry provided its widespread use in many areas of science, in particular, in the study of reaction mechanisms. These studies involve a huge number of complex calculations, which are often carried out using the Gaussian suite of programs. The preparation of input files and the analysis of the output files are not easy tasks and often involve laborious and complex steps. Taking this into account, we developed molUP: a VMD plugin that offers a complete set of tools that enhance the preparation of QM and ONIOM (QM/MM, QM/QM, and QM/QM/MM) calculations. The starting structures for these calculations can be imported from different chemical formats. A set of tools is available to help the user to examine or modify any geometry parameter. This includes the definition of layers in ONIOM calculations, choosing fixed atoms during geometry optimizations, the recalculation or adjustment of the atomic charges, performing SCANs or IRC calculations, etc. molUP also extracts the geometries from the output files as well as the energies of each of them. All of these tasks are performed in an interactive GUI that is extremely helpful for the user. MolUP was developed to be easy to handle by inexperienced users, but simultaneously to be a fast and flexible graphical interface to allow the advanced users to take full advantage of this plugin. The program is available, free of charges, for macOS, Linux, and Windows at the PortoBioComp page https://www.fc.up.pt/PortoBioComp/database/doku.php?id=molup. © 2018 Wiley Periodicals, Inc.",2018-02-21 +27795738,SciData: a data model and ontology for semantic representation of scientific data.,"With the move toward global, Internet enabled science there is an inherent need to capture, store, aggregate and search scientific data across a large corpus of heterogeneous data silos. As a result, standards development is needed to create an infrastructure capable of representing the diverse nature of scientific data. This paper describes a fundamental data model for scientific data that can be applied to data currently stored in any format, and an associated ontology that affords semantic representation of the structure of scientific data (and its metadata), upon which discipline specific semantics can be applied. Application of this data model to experimental and computational chemistry data are presented, implemented using JavaScript Object Notation for Linked Data. Full examples are available at the project website (Chalk in SciData: a scientific data model. http://stuchalk.github.io/scidata/, 2016).",2016-10-14 +30071882,Exploring the OncoGenomic Landscape of cancer.,"

Background

The widespread incorporation of next-generation sequencing into clinical oncology has yielded an unprecedented amount of molecular data from thousands of patients. A main current challenge is to find out reliable ways to extrapolate results from one group of patients to another and to bring rationale to individual cases in the light of what is known from the cohorts.

Results

We present OncoGenomic Landscapes, a framework to analyze and display thousands of cancer genomic profiles in a 2D space. Our tool allows users to rapidly assess the heterogeneity of large cohorts, enabling the comparison to other groups of patients, and using driver genes as landmarks to aid in the interpretation of the landscapes. In our web-server, we also offer the possibility of mapping new samples and cohorts onto 22 predefined landscapes related to cancer cell line panels, organoids, patient-derived xenografts, and clinical tumor samples.

Conclusions

Contextualizing individual subjects in a more general landscape of human cancer is a valuable aid for basic researchers and clinical oncologists trying to identify treatment opportunities, maybe yet unapproved, for patients that ran out of standard therapeutic options. The web-server can be accessed at https://oglandscapes.irbbarcelona.org /.",2018-08-03 +29461053,Research Progress of Polycyclic Polyprenylated Acylphloroglucinols.,"Polycyclic polyprenylated acylphloroglucinols (PPAPs) are a class of hybrid natural products sharing the mevalonate/methylerythritol phosphate and polyketide biosynthetic pathways and showing considerable structure and bioactivity diversity. This review discusses the progress of research into the chemistry and biological activity of 421 natural PPAPs in the past 11 years as well as in-depth studies of biological activities and total synthesis of some PPAPs isolated before 2006. We created an online database of all PPAPs known to date at http://www.chem.uky.edu/research/grossman/PPAPs . Two subclasses of biosynthetically related metabolites, spirocyclic PPAPs with octahydrospiro[cyclohexan-1,5'-indene]-2,4,6-trione core and complicated PPAPs produced by intramolecular [4 + 2] cycloadditions of MPAPs, are brought into the PPAP family. Some PPAPs' relative or absolute configurations are reassigned or critically discussed, and the confusing trivial names in PPAPs investigations are clarified. Pharmacologic studies have revealed a new molecular mechanism whereby hyperforin and its derivatives regulate neurotransmitter levels by activating TRPC6 as well as the antitumor mechanism of garcinol and its analogues. The antineoplastic potential of some type B PPAPs such as oblongifolin C and guttiferone K has increased significantly. As a result of the recent appearances of innovative synthetic methods and strategies, the total syntheses of 22 natural PPAPs including hyperforin, garcinol, and plukenetione A have been accomplished.",2018-02-20 +25428362,The Sol Genomics Network (SGN)--from genotype to phenotype to breeding.,"The Sol Genomics Network (SGN, http://solgenomics.net) is a web portal with genomic and phenotypic data, and analysis tools for the Solanaceae family and close relatives. SGN hosts whole genome data for an increasing number of Solanaceae family members including tomato, potato, pepper, eggplant, tobacco and Nicotiana benthamiana. The database also stores loci and phenotype data, which researchers can upload and edit with user-friendly web interfaces. Tools such as BLAST, GBrowse and JBrowse for browsing genomes, expression and map data viewers, a locus community annotation system and a QTL analysis tools are available. A new tool was recently implemented to improve Virus-Induced Gene Silencing (VIGS) constructs called the SGN VIGS tool. With the growing genomic and phenotypic data in the database, SGN is now advancing to develop new web-based breeding tools and implement the code and database structure for other species or clade-specific databases.",2014-11-26 +29202807,Granatum: a graphical single-cell RNA-Seq analysis pipeline for genomics scientists.,"BACKGROUND:Single-cell RNA sequencing (scRNA-Seq) is an increasingly popular platform to study heterogeneity at the single-cell level. Computational methods to process scRNA-Seq data are not very accessible to bench scientists as they require a significant amount of bioinformatic skills. RESULTS:We have developed Granatum, a web-based scRNA-Seq analysis pipeline to make analysis more broadly accessible to researchers. Without a single line of programming code, users can click through the pipeline, setting parameters and visualizing results via the interactive graphical interface. Granatum conveniently walks users through various steps of scRNA-Seq analysis. It has a comprehensive list of modules, including plate merging and batch-effect removal, outlier-sample removal, gene-expression normalization, imputation, gene filtering, cell clustering, differential gene expression analysis, pathway/ontology enrichment analysis, protein network interaction visualization, and pseudo-time cell series construction. CONCLUSIONS:Granatum enables broad adoption of scRNA-Seq technology by empowering bench scientists with an easy-to-use graphical interface for scRNA-Seq data analysis. The package is freely available for research use at http://garmiregroup.org/granatum/app.",2017-12-05 +25505667,"Implementing the ""Best Template Searching"" tool into Adenosiland platform.","

Background

Adenosine receptors (ARs) belong to the G protein-coupled receptors (GCPRs) family. The recent release of X-ray structures of the human A2A AR (h A2A AR ) in complex with agonists and antagonists has increased the application of structure-based drug design approaches to this class of receptors. Among them, homology modeling represents the method of choice to gather structural information on the other receptor subtypes, namely A1, A2B, and A3 ARs. With the aim of helping users in the selection of either a template to build its own models or ARs homology models publicly available on our platform, we implemented our web-resource dedicated to ARs, Adenosiland, with the ""Best Template Searching"" facility. This tool is freely accessible at the following web address: http://mms.dsfarm.unipd.it/Adenosiland/ligand.php.

Findings

The template suggestions and homology models provided by the ""Best Template Searching"" tool are guided by the similarity of a query structure (putative or known ARs ligand) with all ligands co-crystallized with hA2A AR subtype. The tool computes several similarity indexes and sort the outcoming results according to the index selected by the user.

Conclusions

We have implemented our web-resource dedicated to ARs Adenosiland with the ""Best Template Searching"" facility, a tool to guide template and models selection for hARs modelling. The underlying idea of our new facility, that is the selection of a template (or models built upon a template) whose co-crystallized ligand shares the highest similarity with the query structure, can be easily extended to other GPCRs.",2013-12-20 +30427548,Spontaneous astrocytic Ca2+ activity abounds in electrically suppressed ischemic penumbra of aged mice.,"Experimental focal cortical ischemic lesions consist of an ischemic core and a potentially salvageable peri-ischemic region, the ischemic penumbra. The activity of neurons and astrocytes is assumed to be suppressed in the penumbra because the electrical function is interrupted, but this is incompletely elucidated. Most experimental stroke studies used young adult animals, whereas stroke is prevalent in the elderly population. Using two-photon imaging in vivo, we here demonstrate extensive but electrically silent, spontaneous Ca2+ activity in neurons and astrocytes in the ischemic penumbra of 18- to 24-month-old mice 2-4 hr after middle cerebral artery occlusion. In comparison, stroke reduced spontaneous Ca2+ activity in neurons and astrocytes in adult mice (3-4 months of age). In aged mice, stroke increased astrocytic spontaneous Ca2+ activity considerably while neuronal spontaneous Ca2+ activity was unchanged. Blockade of action potentials and of purinergic receptors strongly reduced spontaneous Ca2+ activity in both neurons and astrocytes in the penumbra of old stroke mice. This indicates that stroke had a direct influence on mechanisms in presynaptic terminals and on purinergic signaling. Thus, highly dynamic variations in spontaneous Ca2+ activity characterize the electrically compromised penumbra, with remarkable differences between adult and old mice. The data are consistent with the notion that aged neurons and astrocytes take on a different phenotype than young mice. The increased activity of the aged astrocyte phenotype may be harmful to neurons. We suggest that the abundant spontaneous Ca2+ activity in astrocytes in the ischemic penumbra of old mice may be a novel target for neuroprotection strategies. A video abstract of this article can be found at https://youtu.be/AKlwKFsz1qE.",2018-11-14 +25398900,MBGD update 2015: microbial genome database for flexible ortholog analysis utilizing a diverse set of genomic data.,"The microbial genome database for comparative analysis (MBGD) (available at http://mbgd.genome.ad.jp/) is a comprehensive ortholog database for flexible comparative analysis of microbial genomes, where the users are allowed to create an ortholog table among any specified set of organisms. Because of the rapid increase in microbial genome data owing to the next-generation sequencing technology, it becomes increasingly challenging to maintain high-quality orthology relationships while allowing the users to incorporate the latest genomic data available into an analysis. Because many of the recently accumulating genomic data are draft genome sequences for which some complete genome sequences of the same or closely related species are available, MBGD now stores draft genome data and allows the users to incorporate them into a user-specific ortholog database using the MyMBGD functionality. In this function, draft genome data are incorporated into an existing ortholog table created only from the complete genome data in an incremental manner to prevent low-quality draft data from affecting clustering results. In addition, to provide high-quality orthology relationships, the standard ortholog table containing all the representative genomes, which is first created by the rapid classification program DomClust, is now refined using DomRefine, a recently developed program for improving domain-level clustering using multiple sequence alignment information.",2014-11-14 +27276067,DREMECELS: A Curated Database for Base Excision and Mismatch Repair Mechanisms Associated Human Malignancies.,"DNA repair mechanisms act as a warrior combating various damaging processes that ensue critical malignancies. DREMECELS was designed considering the malignancies with frequent alterations in DNA repair pathways, that is, colorectal and endometrial cancers, associated with Lynch syndrome (also known as HNPCC). Since lynch syndrome carries high risk (~40-60%) for both cancers, therefore we decided to cover all three diseases in this portal. Although a large population is presently affected by these malignancies, many resources are available for various cancer types but no database archives information on the genes specifically for only these cancers and disorders. The database contains 156 genes and two repair mechanisms, base excision repair (BER) and mismatch repair (MMR). Other parameters include some of the regulatory processes that have roles in these disease progressions due to incompetent repair mechanisms, specifically BER and MMR. However, our unique database mainly provides qualitative and quantitative information on these cancer types along with methylation, drug sensitivity, miRNAs, copy number variation (CNV) and somatic mutations data. This database would serve the scientific community by providing integrated information on these disease types, thus sustaining diagnostic and therapeutic processes. This repository would serve as an excellent accompaniment for researchers and biomedical professionals and facilitate in understanding such critical diseases. DREMECELS is publicly available at http://www.bioinfoindia.org/dremecels.",2016-06-08 +30514290,Internet videos and colorectal cancer in mainland China: a content analysis.,"

Background

Colorectal cancer incidence and mortality have been increasing in China and as one of the most important health problems facing the nation. Adequate dissemination of correct information about colorectal cancer could help in reducing cancer-related morbidity and mortality. This study aims to assess the completeness and reliability of colorectal cancer-related information available on the video website of Youku in mainland China.

Methods

Youku ( https://www.youku.com /) was searched on September 15, 2016 for the search terms colorectal cancer. Only Chinese videos were included. Two reviewers independently evaluate the videos for characteristics, information source and usefulness. Content was analysed under six categories (aetiology, anatomy, symptoms, preventions, treatments and prognosis). Completeness was evaluated with a checklist developed by the researchers. Any discrepancies were resolved by consensuses. SPSS software was used to analyze data.

Results

There were 242 videos with relevant information about colorectal cancer. The type of source were as follows: independent users, 118 (49%); health information web sites, 60 (25%); medical doctors, 31 (13%); news network, 22 (9%); and hospital/university, 11 (4%). In all, 57% of videos had useful information about colorectal cancer, 21% were misleading. Videos posted by medical doctors (P = 0.021) and health information web sites (p = 0.039) were less incomplete than videos by independent users. Of the Traditional Chinese medicine (TCM) videos, 97 (76%) had information about treatments of colorectal cancer. 30% TCM videos contain misleading information, whose misleading rate was higher than total's (21%).

Conclusions

The colorectal cancer videos in mainland China represented by Youku varied base on ownership and content and information incompleteness were fairly high. It is necessary that professionals adapt to the advanced technology and think useful methods to solve the variable quality of information of internet video websites in mainland China.",2018-12-04 +30344197,"The LEAD (Lung, Heart, Social, Body) Study: Objectives, Methodology, and External Validity of the Population-Based Cohort Study.","

Background

The Lung, hEart, sociAl, boDy (LEAD) Study (ClinicalTrials.gov; NCT01727518; http://clinicaltrials.gov) is a longitudinal, observational, population-based Austrian cohort that aims to investigate the relationship between genetic, environmental, social, developmental and ageing factors influencing respiratory health and comorbidities through life. The general working hypothesis of LEAD is the interaction of these genetic, environmental and socioeconomic factors influences lung development and ageing, the risk of occurrence of several non-communicable diseases (respiratory, cardiovascular, metabolic and neurologic), as well as their phenotypic (ie, clinical) presentation.

Methods

LEAD invited from 2011-2016 a random sample (stratified by age, gender, residential area) of Vienna inhabitants (urban cohort) and all the inhabitants of six villages from Lower Austria (rural cohort). Participants will be followed-up every four years. A number of investigations and measurements were obtained in each of the four domains of the study (Lung, hEart, sociAl, boDy) including data to screen for lung, cardiovascular and metabolic diseases, osteoporosis, and cognitive function. Blood and urine samples are stored in a biobank for future investigations.

Results

A total of 11.423 males (47.6%) and females (52.4%), aged 6-80 years have been included in the cohort. Compared to governmental statistics, the external validity of LEAD with respect to age, gender, citizenship, and smoking status was high.

Conclusions

In conclusion, the LEAD cohort has been established following high quality standards; it is representative of the Austrian population and offers a platform to understand lung development and ageing as a key mechanism of human health both in early and late adulthood.",2018-10-20 +30741335,Optimising Radiation Therapy Dose to the Swallowing Organs at Risk: An In Silico Study of feasibility for Patients with Oropharyngeal Tumours.,"Recent evidence suggests that reducing radiotherapy dose delivered to specific anatomical swallowing structures [Swallowing Organs at Risk (SWOARs)] may improve swallowing outcomes post-treatment for patients with head and neck cancer. However, for those patients with tumours of the oropharynx, which typically directly overlap the SWOARs, reducing dose to these structures may be unachievable without compromising on the treatment of the disease. To assess the feasibility of dose reduction in this cohort, standard IMRT plans (ST-IMRT) and plans with reduced dose to the SWOARs (SW-IMRT) were generated for 25 oropharyngeal cancer patients (Brouwer et al. in Radiother Oncol 117(1):83-90, https://doi.org/10.1016/j.radonc.2015.07.041 , 2015; Christianen et al. in Radiother Oncol 101(3):394-402, https://doi.org/10.1016/j.radonc.2011.05.015 , 2011). ST-IMRT and SW-IMRT plans were compared for: mean dose to the SWOARs, volume of pharynx and larynx receiving 50 Gy and 60 Gy (V50 and V60 respectively) and overlap between the tumour volume and the SWOARs. Additionally, two different SWOARs delineation guidelines (Brouwer et al. in Radiother Oncol 117(1):83-90, https://doi.org/10.1016/j.radonc.2015.07.041 , 2015; Christianen et al. in Radiother Oncol 101(3):394-402, https://doi.org/10.1016/j.radonc.2011.05.015 , 2011) were used to highlight differences in calculated volumes between existing contouring guidelines. Agreement in SWOARs volumes between the two guidelines was calculated using a concordance index (CI). Despite a large overlap between the tumour and SWOARs, significant (p < 0.05) reductions in mean dose to 4 of the 5 SWOARs, and V50/V60 for the pharynx and larynx were achieved with SW-IMRT plans. Low CIs per structure (0.15-0.45) were found between the two guidelines highlighting issues comparing data between studies when different guidelines have been used (Hawkins et al. in Semin Radiat Oncol 28(1):46-52, https://doi.org/10.1016/j.semradonc.2017.08.002 , 2018; Brodin et al. in Int J Radiat Oncol Biol Phys 100(2):391-407, https://doi.org/10.1016/j.ijrobp.2017.09.041 , 2018). This study found reducing dose to the SWOARs is a feasible practice for patients with oropharyngeal cancer. However, future prospective research is needed to determine if the extent of dose reduction achieved equates to clinical benefits.",2019-02-11 +29140531,An update on sORFs.org: a repository of small ORFs identified by ribosome profiling.,"sORFs.org (http://www.sorfs.org) is a public repository of small open reading frames (sORFs) identified by ribosome profiling (RIBO-seq). This update elaborates on the major improvements implemented since its initial release. sORFs.org now additionally supports three more species (zebrafish, rat and Caenorhabditis elegans) and currently includes 78 RIBO-seq datasets, a vast increase compared to the three that were processed in the initial release. Therefore, a novel pipeline was constructed that also enables sORF detection in RIBO-seq datasets comprising solely elongating RIBO-seq data while previously, matching initiating RIBO-seq data was necessary to delineate the sORFs. Furthermore, a novel noise filtering algorithm was designed, able to distinguish sORFs with true ribosomal activity from simulated noise, consequently reducing the false positive identification rate. The inclusion of other species also led to the development of an inner BLAST pipeline, assessing sequence similarity between sORFs in the repository. Building on the proof of concept model in the initial release of sORFs.org, a full PRIDE-ReSpin pipeline was now released, reprocessing publicly available MS-based proteomics PRIDE datasets, reporting on true translation events. Next to reporting those identified peptides, sORFs.org allows visual inspection of the annotated spectra within the Lorikeet MS/MS viewer, thus enabling detailed manual inspection and interpretation.",2018-01-01 +26183165,The sequenced rat brain transcriptome--its use in identifying networks predisposing alcohol consumption.,"

Unlabelled

A quantitative genetic approach, which involves correlation of transcriptional networks with the phenotype in a recombinant inbred (RI) population and in selectively bred lines of rats, and determination of coinciding quantitative trait loci for gene expression and the trait of interest, has been applied in the present study. In this analysis, a novel approach was used that combined DNA-Seq data, data from brain exon array analysis of HXB/BXH RI rat strains and six pairs of rat lines selectively bred for high and low alcohol preference, and RNA-Seq data (including rat brain transcriptome reconstruction) to quantify transcript expression levels, generate co-expression modules and identify biological functions that contribute to the predisposition of consuming varying amounts of alcohol. A gene co-expression module was identified in the RI rat strains that contained both annotated and unannotated transcripts expressed in the brain, and was associated with alcohol consumption in the RI panel. This module was found to be enriched with differentially expressed genes from the selected lines of rats. The candidate genes within the module and differentially expressed genes between high and low drinking selected lines were associated with glia (microglia and astrocytes) and could be categorized as being related to immune function, energy metabolism and calcium homeostasis, as well as glial-neuronal communication. The results of the present study show that there are multiple combinations of genetic factors that can produce the same phenotypic outcome. Although no single gene accounts for predisposition to a particular level of alcohol consumption in every animal model, coordinated differential expression of subsets of genes in the identified pathways produce similar phenotypic outcomes.

Database

The datasets supporting the results of the present study are available at http://phenogen.ucdenver.edu.",2015-07-16 +25888240,An evidence-based approach to identify aging-related genes in Caenorhabditis elegans.,"

Background

Extensive studies have been carried out on Caenorhabditis elegans as a model organism to elucidate mechanisms of aging and the effects of perturbing known aging-related genes on lifespan and behavior. This research has generated large amounts of experimental data that is increasingly difficult to integrate and analyze with existing databases and domain knowledge. To address this challenge, we demonstrate a scalable and effective approach for automatic evidence gathering and evaluation that leverages existing experimental data and literature-curated facts to identify genes involved in aging and lifespan regulation in C. elegans.

Results

We developed a semantic knowledge base for aging by integrating data about C. elegans genes from WormBase with data about 2005 human and model organism genes from GenAge and 149 genes from GenDR, and with the Bio2RDF network of linked data for the life sciences. Using HyQue (a Semantic Web tool for hypothesis-based querying and evaluation) to interrogate this knowledge base, we examined 48,231 C. elegans genes for their role in modulating lifespan and aging. HyQue identified 24 novel but well-supported candidate aging-related genes for further experimental validation.

Conclusions

We use semantic technologies to discover candidate aging genes whose effects on lifespan are not yet well understood. Our customized HyQue system, the aging research knowledge base it operates over, and HyQue evaluations of all C. elegans genes are freely available at http://hyque.semanticscience.org .",2015-02-07 +25656309,BtoxDB: a comprehensive database of protein structural data on toxin-antitoxin systems.,"

Purpose

Toxin-antitoxin (TA) systems are diverse and abundant genetic modules in prokaryotic cells that are typically formed by two genes encoding a stable toxin and a labile antitoxin. Because TA systems are able to repress growth or kill cells and are considered to be important actors in cell persistence (multidrug resistance without genetic change), these modules are considered potential targets for alternative drug design. In this scenario, structural information for the proteins in these systems is highly valuable. In this report, we describe the development of a web-based system, named BtoxDB, that stores all protein structural data on TA systems.

Methods

The BtoxDB database was implemented as a MySQL relational database using PHP scripting language. Web interfaces were developed using HTML, CSS and JavaScript. The data were collected from the PDB, UniProt and Entrez databases. These data were appropriately filtered using specialized literature and our previous knowledge about toxin-antitoxin systems.

Results

The database provides three modules (""Search"", ""Browse"" and ""Statistics"") that enable searches, acquisition of contents and access to statistical data. Direct links to matching external databases are also available.

Conclusions

The compilation of all protein structural data on TA systems in one platform is highly useful for researchers interested in this content. BtoxDB is publicly available at http://www.gurupi.uft.edu.br/btoxdb.",2015-01-17 +30124010,"Why, When and How to Adjust Your P Values?","Currently, numerous papers are published reporting analysis of biological data at different omics levels by making statistical inferences. Of note, many studies, as those published in this Journal, report association of gene(s) at the genomic and transcriptomic levels by undertaking appropriate statistical tests. For instance, genotype, allele or haplotype frequencies at the genomic level or normalized expression levels at the transcriptomic level are compared between the case and control groups using the Chi-square/Fisher's exact test or independent (i.e. two-sampled) t-test respectively, with this culminating into a single numeric, namely the P value (or the degree of the false positive rate), which is used to make or break the outcome of the association test. This approach has flaws but nevertheless remains a standard and convenient approach in association studies. However, what becomes a critical issue is that the same cut-off is used when 'multiple' tests are undertaken on the same case-control (or any pairwise) comparison. Here, in brevity, we present what the P value represents, and why and when it should be adjusted. We also show, with worked examples, how to adjust P values for multiple testing in the R environment for statistical computing (http://www.R-project.org).",2018-08-01 +28327092,metaX: a flexible and comprehensive software for processing metabolomics data.,"

Background

Non-targeted metabolomics based on mass spectrometry enables high-throughput profiling of the metabolites in a biological sample. The large amount of data generated from mass spectrometry requires intensive computational processing for annotation of mass spectra and identification of metabolites. Computational analysis tools that are fully integrated with multiple functions and are easily operated by users who lack extensive knowledge in programing are needed in this research field.

Results

We herein developed an R package, metaX, that is capable of end-to-end metabolomics data analysis through a set of interchangeable modules. Specifically, metaX provides several functions, such as peak picking and annotation, data quality assessment, missing value imputation, data normalization, univariate and multivariate statistics, power analysis and sample size estimation, receiver operating characteristic analysis, biomarker selection, pathway annotation, correlation network analysis, and metabolite identification. In addition, metaX offers a web-based interface ( http://metax.genomics.cn ) for data quality assessment and normalization method evaluation, and it generates an HTML-based report with a visualized interface. The metaX utilities were demonstrated with a published metabolomics dataset on a large scale. The software is available for operation as either a web-based graphical user interface (GUI) or in the form of command line functions. The package and the example reports are available at http://metax.genomics.cn/ .

Conclusions

The pipeline of metaX is platform-independent and is easy to use for analysis of metabolomics data generated from mass spectrometry.",2017-03-21 +21622662,TimeTree2: species divergence times on the iPhone.,"

Summary

Scientists, educators and the general public often need to know times of divergence between species. But they rarely can locate that information because it is buried in the scientific literature, usually in a format that is inaccessible to text search engines. We have developed a public knowledgebase that enables data-driven access to the collection of peer-reviewed publications in molecular evolution and phylogenetics that have reported estimates of time of divergence between species. Users can query the TimeTree resource by providing two names of organisms (common or scientific) that can correspond to species or groups of species. The current TimeTree web resource (TimeTree2) contains timetrees reported from molecular clock analyses in 910 published studies and 17 341 species that span the diversity of life. TimeTree2 interprets complex and hierarchical data from these studies for each user query, which can be launched using an iPhone application, in addition to the website. Published time estimates are now readily accessible to the scientific community, K-12 and college educators, and the general public, without requiring knowledge of evolutionary nomenclature.

Availability

TimeTree2 is accessible from the URL http://www.timetree.org, with an iPhone app available from iTunes (http://itunes.apple.com/us/app/timetree/id372842500?mt=8) and a YouTube tutorial (http://www.youtube.com/watch?v=CxmshZQciwo).",2011-05-26 +26160459,CGMD: An integrated database of cancer genes and markers.,"Integrating cancer genes and markers with experimental evidence might provide valuable information for the further investigation of crosstalk between tumor genes and markers in cancer biology. To achieve this objective, we developed a database known as the Cancer Gene Marker Database (CGMD), which integrates data on tumor genes and markers based on experimental evidence. The major goal of CGMD is to provide the following: 1) current systematic treatment approaches and recent advances in different cancer treatments; 2) the aggregation of different genes and markers by their molecular characteristics and pathway associations; and 3) free access to the data compiled by CGMD at http://cgmd.in/. The database consists of 309 genes and 206 markers, as well as a list of 40 different human cancers, with detailed descriptions of all characterized markers. CGMD provides complete cancer annotations and molecular descriptions of cancer genes and markers such as CpG islands, promoters, exons, PDB structures, active sites and domains.",2015-07-10 +26779425,RNA sequencing of the nephron transcriptome: a technical note.,"To understand the functions of the kidney, the transcriptome of each part of the nephron needs to be profiled using a highly sensitive and unbiased tool. RNA sequencing (RNA-seq) has revolutionized transcriptomic research, enabling researchers to define transcription activity and functions of genomic elements with unprecedented sensitivity and precision. Recently, RNA-seq for polyadenylated messenger RNAs [poly(A)'-mRNAs] and classical microdissection were successfully combined to investigate the transcriptome of glomeruli and 14 different renal tubule segments. A rat kidney is perfused with and incubated in collagenase solution, and the digested kidney was manually dissected under a stereomicroscope. Individual glomeruli and renal tubule segments are identified by their anatomical and morphological characteristics and collected in phosphate-buffered saline. Poly(A)'-tailed mRNAs are released from cell lysate, captured by oligo-dT primers, and made into complementary DNAs (cDNAs) using a highly sensitive reverse transcription method. These cDNAs are sheared by sonication and prepared into adapter-ligated cDNA libraries for Illumina sequencing. Nucleotide sequences reported from the sequencing reaction are mapped to the rat reference genome for gene expression analysis. These RNA-seq transcriptomic data were highly consistent with prior knowledge of gene expression along the nephron. The gene expression data obtained in this work are available as a public Web page (https://helixweb.nih.gov/ESBL/Database/NephronRNAseq/) and can be used to explore the transcriptomic landscape of the nephron.",2015-10-08 +,"Systematics of the New Caledonian endemic genus Taophila Heller (Coleoptera: Chrysomelidae, Eumolpinae) combining morphological, molecular and ecological data, with description of two new species","The genus Taophila Heller, 1916 was considered a monotypic taxon until the recent addition of 11 new species. We examined the recent revision of this genus to evaluate these new taxonomic considerations in the light of molecular data for a sample of 85 individuals in 8 out of 12 Taophila species, and to provide characters for a phylogenetic systematic analysis of the genus. These data include two mitochondrial DNA markers (cox1 and rrnS) and one nuclear DNA marker (wingless) to infer the species phylogeny, and one gene from the chloroplastic DNA (psbA‐trnH) of putatively ingested plant tissue for diet inference. Molecular data support the monophyly of the species studied, except for T. subsericea Heller, 1916, which possibly represents an ancient genetic polymorphism. The molecular phylogeny and a reassessment of morphological characters are used to propose several taxonomic changes that will achieve stable systematics in Taophila, including: (i) description of two new species, T. aphrodita sp.n. and T. gaea sp.n., both related to T. mars Samuelson, 2010; (ii) removal of T. cancellata Samuelson, 2010 from Taophila and its new combination with Dematochroma Baly, 1864; (iii) proposal of two new subgenera in Taophila to reflect deep phylogenetic divergence and profound morphological discrepancies with the anatomy of the generic type and relatives, Jolivetiana subgen.n. (type: Taophila mantillerii Jolivet, Verma & Mille, 2007) and Lapita subgen.n. (type: Taophila mars Samuelson, 2010). The analysis of putative diet sequences retrieved from all the species available for analysis show widespread associations with ferns and legumes, which can be traced back to the hypothetical common ancestor of Taophila, and an apparent generalist diet for the species in Lapita subgen.n. This published work has been register in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:05F9C680‐3325‐409E‐A881‐C8A5AE1D136F.",2014-01-01 +28725772,Pathology Informatics Essentials for Residents: A Flexible Informatics Curriculum Linked to Accreditation Council for Graduate Medical Education Milestones (a secondary publication).,"

Context

Recognition of the importance of informatics to the practice of pathology has surged. Training residents in pathology informatics has been a daunting task for most residency programs in the United States because faculty often lacks experience and training resources. Nevertheless, developing resident competence in informatics is essential for the future of pathology as a specialty.

Objective

To develop and deliver a pathology informatics curriculum and instructional framework that guides pathology residency programs in training residents in critical pathology informatics knowledge and skills, and meets Accreditation Council for Graduate Medical Education Informatics Milestones.

Design

The College of American Pathologists, Association of Pathology Chairs, and Association for Pathology Informatics formed a partnership and expert work group to identify critical pathology informatics training outcomes and to create a highly adaptable curriculum and instructional approach, supported by a multiyear change management strategy.

Results

Pathology Informatics Essentials for Residents (PIER) is a rigorous approach for educating all pathology residents in important pathology informatics knowledge and skills. PIER includes an instructional resource guide and toolkit for incorporating informatics training into residency programs that vary in needs, size, settings, and resources. PIER is available at http://www.apcprods.org/PIER (accessed April 6, 2016).

Conclusions

PIER is an important contribution to informatics training in pathology residency programs. PIER introduces pathology trainees to broadly useful informatics concepts and tools that are relevant to practice. PIER provides residency program directors with a means to implement a standardized informatics training curriculum, to adapt the approach to local program needs, and to evaluate resident performance and progress over time.",2016-01-01 +29665417,Glaucoma Pred: Glaucoma prediction based on Myocilin genotype and phenotype information.,"Glaucoma is the second leading cause of blindness after cataract and is heterogeneous in nature. Employing a genetic approach for the detection of the diseased condition provides an advantage that the gene responsible for the disease can be identified by genetic test. The availability of predictive tests based on the published literature would provide a mechanism for early detection and treatment. The genotype and phenotype information could be a valuable source for predicting the risk of the disease. To this end, a web server has been developed, based on the genotype and phenotype of myocilin mutation, which were identified by familial linkage analysis and case studies. The proposed web server provides clinical data and severity index for a given mutation. The server has several useful options to help clinicians and researchers to identify individuals at a risk of developing the disease. Glaucoma Pred server is available at http://bioserver1.physics.iisc.ac.in/myocilin.",2018-04-14 +25392420,COXPRESdb in 2015: coexpression database for animal species by DNA-microarray and RNAseq-based expression data with multiple quality assessment systems.,"The COXPRESdb (http://coxpresdb.jp) provides gene coexpression relationships for animal species. Here, we report the updates of the database, mainly focusing on the following two points. For the first point, we added RNAseq-based gene coexpression data for three species (human, mouse and fly), and largely increased the number of microarray experiments to nine species. The increase of the number of expression data with multiple platforms could enhance the reliability of coexpression data. For the second point, we refined the data assessment procedures, for each coexpressed gene list and for the total performance of a platform. The assessment of coexpressed gene list now uses more reasonable P-values derived from platform-specific null distribution. These developments greatly reduced pseudo-predictions for directly associated genes, thus expanding the reliability of coexpression data to design new experiments and to discuss experimental results.",2014-11-11 +28013278,ThaleMine: A Warehouse for Arabidopsis Data Integration and Discovery.,"ThaleMine (https://apps.araport.org/thalemine/) is a comprehensive data warehouse that integrates a wide array of genomic information of the model plant Arabidopsis thaliana. The data collection currently includes the latest structural and functional annotation from the Araport11 update, the Col-0 genome sequence, RNA-seq and array expression, co-expression, protein interactions, homologs, pathways, publications, alleles, germplasm and phenotypes. The data are collected from a wide variety of public resources. Users can browse gene-specific data through Gene Report pages, identify and create gene lists based on experiments or indexed keywords, and run GO enrichment analysis to investigate the biological significance of selected gene sets. Developed by the Arabidopsis Information Portal project (Araport, https://www.araport.org/), ThaleMine uses the InterMine software framework, which builds well-structured data, and provides powerful data query and analysis functionality. The warehoused data can be accessed by users via graphical interfaces, as well as programmatically via web-services. Here we describe recent developments in ThaleMine including new features and extensions, and discuss future improvements. InterMine has been broadly adopted by the model organism research community including nematode, rat, mouse, zebrafish, budding yeast, the modENCODE project, as well as being used for human data. ThaleMine is the first InterMine developed for a plant model. As additional new plant InterMines are developed by the legume and other plant research communities, the potential of cross-organism integrative data analysis will be further enabled.",2017-01-01 +28130241,Classification of RNA structure change by 'gazing' at experimental data.,"

Motivation

Mutations (or Single Nucleotide Variants) in folded RiboNucleic Acid structures that cause local or global conformational change are riboSNitches. Predicting riboSNitches is challenging, as it requires making two, albeit related, structure predictions. The data most often used to experimentally validate riboSNitch predictions is Selective 2' Hydroxyl Acylation by Primer Extension, or SHAPE. Experimentally establishing a riboSNitch requires the quantitative comparison of two SHAPE traces: wild-type (WT) and mutant. Historically, SHAPE data was collected on electropherograms and change in structure was evaluated by 'gel gazing.' SHAPE data is now routinely collected with next generation sequencing and/or capillary sequencers. We aim to establish a classifier capable of simulating human 'gazing' by identifying features of the SHAPE profile that human experts agree 'looks' like a riboSNitch.

Results

We find strong quantitative agreement between experts when RNA scientists 'gaze' at SHAPE data and identify riboSNitches. We identify dynamic time warping and seven other features predictive of the human consensus. The classSNitch classifier reported here accurately reproduces human consensus for 167 mutant/WT comparisons with an Area Under the Curve (AUC) above 0.8. When we analyze 2019 mutant traces for 17 different RNAs, we find that features of the WT SHAPE reactivity allow us to improve thermodynamic structure predictions of riboSNitches. This is significant, as accurate RNA structural analysis and prediction is likely to become an important aspect of precision medicine.

Availability and implementation

The classSNitch R package is freely available at http://classsnitch.r-forge.r-project.org .

Contact

alain@email.unc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +28637296,omiXcore: a web server for prediction of protein interactions with large RNA.,"

Summary

Here we introduce omiXcore, a server for calculations of protein binding to large RNAs (> 500 nucleotides). Our webserver allows (i) use of both protein and RNA sequences without size restriction, (ii) pre-compiled library for exploration of human long intergenic RNAs interactions and (iii) prediction of binding sites.

Results

omiXcore was trained and tested on enhanced UV Cross-Linking and ImmunoPrecipitation data. The method discriminates interacting and non-interacting protein-RNA pairs and identifies RNA binding sites with Areas under the ROC curve > 0.80, which suggests that the tool is particularly useful to prioritize candidates for further experimental validation.

Availability and implementation

omiXcore is freely accessed on the web at http://service.tartaglialab.com/grant_submission/omixcore.

Contact

gian.tartaglia@crg.es.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +23460887,In silico estimation of translation efficiency in human cell lines: potential evidence for widespread translational control.,"Recently large scale transcriptome and proteome datasets for human cells have become available. A striking finding from these studies is that the level of an mRNA typically predicts no more than 40% of the abundance of protein. This correlation represents the overall figure for all genes. We present here a bioinformatic analysis of translation efficiency - the rate at which mRNA is translated into protein. We have analysed those human datasets that include genome wide mRNA and protein levels determined in the same study. The analysis comprises five distinct human cell lines that together provide comparable data for 8,170 genes. For each gene we have used levels of mRNA and protein combined with protein stability data from the HeLa cell line to estimate translation efficiency. This was possible for 3,990 genes in one or more cell lines and 1,807 genes in all five cell lines. Interestingly, our analysis and modelling shows that for many genes this estimated translation efficiency has considerable consistency between cell lines. Some deviations from this consistency likely result from the regulation of protein degradation. Others are likely due to known translational control mechanisms. These findings suggest it will be possible to build improved models for the interpretation of mRNA expression data. The results we present here provide a view of translation efficiency for many genes. We provide an online resource allowing the exploration of translation efficiency in genes of interest within different cell lines (http://bioanalysis.otago.ac.nz/TranslationEfficiency).",2013-02-27 +29049600,Speech Perception in Complex Acoustic Environments: Developmental Effects.,"

Purpose

The ability to hear and understand speech in complex acoustic environments follows a prolonged time course of development. The purpose of this article is to provide a general overview of the literature describing age effects in susceptibility to auditory masking in the context of speech recognition, including a summary of findings related to the maturation of processes thought to facilitate segregation of target from competing speech.

Method

Data from published and ongoing studies are discussed, with a focus on synthesizing results from studies that address age-related changes in the ability to perceive speech in the presence of a small number of competing talkers.

Conclusions

This review provides a summary of the current state of knowledge that is valuable for researchers and clinicians. It highlights the importance of considering listener factors, such as age and hearing status, as well as stimulus factors, such as masker type, when interpreting masked speech recognition data.

Presentation video

http://cred.pubs.asha.org/article.aspx?articleid=2601620.",2017-10-01 +23584833,Annotating the biomedical literature for the human variome.,"This article introduces the Variome Annotation Schema, a schema that aims to capture the core concepts and relations relevant to cataloguing and interpreting human genetic variation and its relationship to disease, as described in the published literature. The schema was inspired by the needs of the database curators of the International Society for Gastrointestinal Hereditary Tumours (InSiGHT) database, but is intended to have application to genetic variation information in a range of diseases. The schema has been applied to a small corpus of full text journal publications on the subject of inherited colorectal cancer. We show that the inter-annotator agreement on annotation of this corpus ranges from 0.78 to 0.95 F-score across different entity types when exact matching is measured, and improves to a minimum F-score of 0.87 when boundary matching is relaxed. Relations show more variability in agreement, but several are reliable, with the highest, cohort-has-size, reaching 0.90 F-score. We also explore the relevance of the schema to the InSiGHT database curation process. The schema and the corpus represent an important new resource for the development of text mining solutions that address relationships among patient cohorts, disease and genetic variation, and therefore, we also discuss the role text mining might play in the curation of information related to the human variome. The corpus is available at http://opennicta.com/home/health/variome.",2013-04-12 +25600941,"Enabling systematic, harmonised and large-scale biofilms data computation: the Biofilms Experiment Workbench.","

Background and objective

Biofilms are receiving increasing attention from the biomedical community. Biofilm-like growth within human body is considered one of the key microbial strategies to augment resistance and persistence during infectious processes. The Biofilms Experiment Workbench is a novel software workbench for the operation and analysis of biofilms experimental data. The goal is to promote the interchange and comparison of data among laboratories, providing systematic, harmonised and large-scale data computation.

Methods

The workbench was developed with AIBench, an open-source Java desktop application framework for scientific software development in the domain of translational biomedicine. Implementation favours free and open-source third-parties, such as the R statistical package, and reaches for the Web services of the BiofOmics database to enable public experiment deposition.

Results

First, we summarise the novel, free, open, XML-based interchange format for encoding biofilms experimental data. Then, we describe the execution of common scenarios of operation with the new workbench, such as the creation of new experiments, the importation of data from Excel spreadsheets, the computation of analytical results, the on-demand and highly customised construction of Web publishable reports, and the comparison of results between laboratories.

Conclusions

A considerable and varied amount of biofilms data is being generated, and there is a critical need to develop bioinformatics tools that expedite the interchange and comparison of microbiological and clinical results among laboratories. We propose a simple, open-source software infrastructure which is effective, extensible and easy to understand. The workbench is freely available for non-commercial use at http://sing.ei.uvigo.es/bew under LGPL license.",2015-01-08 +29931279,Accurate prediction of protein contact maps by coupling residual two-dimensional bidirectional long short-term memory with convolutional neural networks.,"

Motivation

Accurate prediction of a protein contact map depends greatly on capturing as much contextual information as possible from surrounding residues for a target residue pair. Recently, ultra-deep residual convolutional networks were found to be state-of-the-art in the latest Critical Assessment of Structure Prediction techniques (CASP12) for protein contact map prediction by attempting to provide a protein-wide context at each residue pair. Recurrent neural networks have seen great success in recent protein residue classification problems due to their ability to propagate information through long protein sequences, especially Long Short-Term Memory (LSTM) cells. Here, we propose a novel protein contact map prediction method by stacking residual convolutional networks with two-dimensional residual bidirectional recurrent LSTM networks, and using both one-dimensional sequence-based and two-dimensional evolutionary coupling-based information.

Results

We show that the proposed method achieves a robust performance over validation and independent test sets with the Area Under the receiver operating characteristic Curve (AUC) > 0.95 in all tests. When compared to several state-of-the-art methods for independent testing of 228 proteins, the method yields an AUC value of 0.958, whereas the next-best method obtains an AUC of 0.909. More importantly, the improvement is over contacts at all sequence-position separations. Specifically, a 8.95%, 5.65% and 2.84% increase in precision were observed for the top L∕10 predictions over the next best for short, medium and long-range contacts, respectively. This confirms the usefulness of ResNets to congregate the short-range relations and 2D-BRLSTM to propagate the long-range dependencies throughout the entire protein contact map 'image'.

Availability and implementation

SPOT-Contact server url: http://sparks-lab.org/jack/server/SPOT-Contact/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-12-01 +24373374,TIPMaP: a web server to establish transcript isoform profiles from reliable microarray probes.,"

Background

Standard 3' Affymetrix gene expression arrays have contributed a significantly higher volume of existing gene expression data than other microarray platforms. These arrays were designed to identify differentially expressed genes, but not their alternatively spliced transcript forms. No resource can currently identify expression pattern of specific mRNA forms using these microarray data, even though it is possible to do this.

Results

We report a web server for expression profiling of alternatively spliced transcripts using microarray data sets from 31 standard 3' Affymetrix arrays for human, mouse and rat species. The tool has been experimentally validated for mRNAs transcribed or not-detected in a human disease condition (non-obstructive azoospermia, a male infertility condition). About 4000 gene expression datasets were downloaded from a public repository. 'Good probes' with complete coverage and identity to latest reference transcript sequences were first identified. Using them, 'Transcript specific probe-clusters' were derived for each platform and used to identify expression status of possible transcripts. The web server can lead the user to datasets corresponding to specific tissues, conditions via identifiers of the microarray studies or hybridizations, keywords, official gene symbols or reference transcript identifiers. It can identify, in the tissues and conditions of interest, about 40% of known transcripts as 'transcribed', 'not-detected' or 'differentially regulated'. Corresponding additional information for probes, genes, transcripts and proteins can be viewed too. We identified the expression of transcripts in a specific clinical condition and validated a few of these transcripts by experiments (using reverse transcription followed by polymerase chain reaction). The experimental observations indicated higher agreements with the web server results, than contradictions. The tool is accessible at http://resource.ibab.ac.in/TIPMaP.

Conclusion

The newly developed online tool forms a reliable means for identification of alternatively spliced transcript-isoforms that may be differentially expressed in various tissues, cell types or physiological conditions. Thus, by making better use of existing data, TIPMaP avoids the dependence on precious tissue-samples, in experiments with a goal to establish expression profiles of alternative splice forms--at least in some cases.",2013-12-27 +30675814,Native American Perspectives on Health and Traditional Ecological Knowledge.,"

Background

Traditional ecological knowledge (TEK) is a conceptual framework that highlights Indigenous knowledge (IK) systems. Although scientific literature has noted the relevance of TEK for environmental research since the 1980s, little attention has been given to how Native American (NA) scholars engage with it to shape tribal-based research on health, nor how non-Native scholars can coordinate their approaches with TEK. This coordination is of particular importance for environmental health sciences (EHS) research exploring interdisciplinary approaches and the integration of environmental and human health.

Objective

Our perspective on TEK arose from a series of Health and Culture Research Group (HCRG) workshops that identified gaps in existing EHS methodologies that are based on a reliance on Euro-American concepts for assessing environmental exposures in tribal communities. These prior methods neither take into account cultural behavior nor community responses to these. Our objective is to consider NA perspectives on TEK when analyzing relationships between health and the environment and to look at how these may be applied to address this gap.

Discussion

The authors—the majority of whom are NA scholars—highlight two research areas that consider health from a TEK perspective: food systems and knowledge of medicinal plants. This research has yielded data, methods, and knowledge that have helped Indigenous communities better define and reduce health risks and protect local natural food resources, and this TEK approach may prove of value to EHS research.

Conclusion

NA perspectives on TEK resulting from the HCRG workshops provide an opportunity for developing more accurate Indigenous health indicators (IHI) reflecting the conceptualizations of health maintained in these communities. This approach has the potential to bridge the scientific study of exposure with methods addressing a tribal perspective on the sociocultural determinants of health, identifying potential new areas of inquiry in EHS that afford nuanced evaluations of exposures and outcomes in tribal communities. https://doi.org/10.1289/EHP1944.",2018-12-01 +29121561,FLIP: An assisting software in structure based drug design using fingerprint of protein-ligand interaction profiles.,"With the growing number of labor-intensive data in the pharmaceutical industries and public domain for protein-ligand complexes, a significant challenge is still remaining in managing and leveraging this vast information. Here, a standalone application is presented for analysis, organization, and illustration of structural data and molecular interactions for exploiting 3D-structures into simple 1D fingerprints. The utility of the approach was shown in unraveling a feasible solution for post-processing of docking results in parallel with providing fruitful analysis for users in order to investigate molecular interactions. Remarkably, all interaction possibilities including (hydrogen bond, water-bridged, electrostatic, and hydrophobic as well as π- π and cation-π interactions) are supported both in the form of fingerprints and compelling reports. These investigations are mainly considered based on right orientation, location, and geometry of the interacting pairs rather than the acquisition of the energy terms. The reasonable efficiency of our application in different models was comparable to recent methods It is clearly presented that FLIP provides a faster way to generate usable fingerprints for ligand and protein binding modes. FLIP is free for academic use and is available at: http://zistrayan.com/development/download/flip/package.zip.",2017-11-01 +28877583,Leaf LIMS: A Flexible Laboratory Information Management System with a Synthetic Biology Focus.,"This paper presents Leaf LIMS, a flexible laboratory information management system (LIMS) designed to address the complexity of synthetic biology workflows. At the project's inception there was a lack of a LIMS designed specifically to address synthetic biology processes, with most systems focused on either next generation sequencing or biobanks and clinical sample handling. Leaf LIMS implements integrated project, item, and laboratory stock tracking, offering complete sample and construct genealogy, materials and lot tracking, and modular assay data capture. Hence, it enables highly configurable task-based workflows and supports data capture from project inception to completion. As such, in addition to it supporting synthetic biology it is ideal for many laboratory environments with multiple projects and users. The system is deployed as a web application through Docker and is provided under a permissive MIT license. It is freely available for download at https://leaflims.github.io .",2017-09-13 +26481321,BubbleGUM: automatic extraction of phenotype molecular signatures and comprehensive visualization of multiple Gene Set Enrichment Analyses.,"

Background

Recent advances in the analysis of high-throughput expression data have led to the development of tools that scaled-up their focus from single-gene to gene set level. For example, the popular Gene Set Enrichment Analysis (GSEA) algorithm can detect moderate but coordinated expression changes of groups of presumably related genes between pairs of experimental conditions. This considerably improves extraction of information from high-throughput gene expression data. However, although many gene sets covering a large panel of biological fields are available in public databases, the ability to generate home-made gene sets relevant to one's biological question is crucial but remains a substantial challenge to most biologists lacking statistic or bioinformatic expertise. This is all the more the case when attempting to define a gene set specific of one condition compared to many other ones. Thus, there is a crucial need for an easy-to-use software for generation of relevant home-made gene sets from complex datasets, their use in GSEA, and the correction of the results when applied to multiple comparisons of many experimental conditions.

Result

We developed BubbleGUM (GSEA Unlimited Map), a tool that allows to automatically extract molecular signatures from transcriptomic data and perform exhaustive GSEA with multiple testing correction. One original feature of BubbleGUM notably resides in its capacity to integrate and compare numerous GSEA results into an easy-to-grasp graphical representation. We applied our method to generate transcriptomic fingerprints for murine cell types and to assess their enrichments in human cell types. This analysis allowed us to confirm homologies between mouse and human immunocytes.

Conclusions

BubbleGUM is an open-source software that allows to automatically generate molecular signatures out of complex expression datasets and to assess directly their enrichment by GSEA on independent datasets. Enrichments are displayed in a graphical output that helps interpreting the results. This innovative methodology has recently been used to answer important questions in functional genomics, such as the degree of similarities between microarray datasets from different laboratories or with different experimental models or clinical cohorts. BubbleGUM is executable through an intuitive interface so that both bioinformaticians and biologists can use it. It is available at http://www.ciml.univ-mrs.fr/applications/BubbleGUM/index.html .",2015-10-19 +29449653,CFTR mutation enhances Dishevelled degradation and results in impairment of Wnt-dependent hematopoiesis.,"Mutations of cystic fibrosis transmembrane conductance regulator (CFTR) cause cystic fibrosis (CF) with a multitude of clinical manifestations. Some CF patients develop clinically significant anemia, suggesting that CFTR may regulate hematopoiesis. Here, we report that cftr mutant zebrafish model exhibits primitive and definitive hematopoietic defects with impaired Wnt signaling. Cftr is found to interact, via its PDZ-binding domain (PDZBD), with Dishevelled (Dvl), a key component of Wnt signaling required for hematopoietic progenitor specification, thus protecting Dvl from Dapper1 (Dpr1)-induced lysosomal degradation. Defective hematopoiesis and impaired Wnt signaling in cftr mutant can be rescued by overexpression of wild-type or channel function-defective G551D mutant CFTR with an intact PDZBD, but not Cftr with mutations in the PDZBD. Analysis of human database ( http://r2.amc.nl ) shows that CFTR is positively correlated with DVL2 and Wnt-related hematopoietic factors in human blood system. The results reveal a previously unrecognized role of CFTR, which is independent of its channel function, in regulating DVL degradation and thus Wnt signaling required for hematopoiesis in both zebrafish and humans, providing an explanation for the anemic phenotype of CF patients.",2018-02-15 +22123736,The UniProt-GO Annotation database in 2011.,"The GO annotation dataset provided by the UniProt Consortium (GOA: http://www.ebi.ac.uk/GOA) is a comprehensive set of evidenced-based associations between terms from the Gene Ontology resource and UniProtKB proteins. Currently supplying over 100 million annotations to 11 million proteins in more than 360,000 taxa, this resource has increased 2-fold over the last 2 years and has benefited from a wealth of checks to improve annotation correctness and consistency as well as now supplying a greater information content enabled by GO Consortium annotation format developments. Detailed, manual GO annotations obtained from the curation of peer-reviewed papers are directly contributed by all UniProt curators and supplemented with manual and electronic annotations from 36 model organism and domain-focused scientific resources. The inclusion of high-quality, automatic annotation predictions ensures the UniProt GO annotation dataset supplies functional information to a wide range of proteins, including those from poorly characterized, non-model organism species. UniProt GO annotations are freely available in a range of formats accessible by both file downloads and web-based views. In addition, the introduction of a new, normalized file format in 2010 has made for easier handling of the complete UniProt-GOA data set.",2011-11-28 +29247873,The Ability of Different Imputation Methods to Preserve the Significant Genes and Pathways in Cancer.,"Deciphering important genes and pathways from incomplete gene expression data could facilitate a better understanding of cancer. Different imputation methods can be applied to estimate the missing values. In our study, we evaluated various imputation methods for their performance in preserving significant genes and pathways. In the first step, 5% genes are considered in random for two types of ignorable and non-ignorable missingness mechanisms with various missing rates. Next, 10 well-known imputation methods were applied to the complete datasets. The significance analysis of microarrays (SAM) method was applied to detect the significant genes in rectal and lung cancers to showcase the utility of imputation approaches in preserving significant genes. To determine the impact of different imputation methods on the identification of important genes, the chi-squared test was used to compare the proportions of overlaps between significant genes detected from original data and those detected from the imputed datasets. Additionally, the significant genes are tested for their enrichment in important pathways, using the ConsensusPathDB. Our results showed that almost all the significant genes and pathways of the original dataset can be detected in all imputed datasets, indicating that there is no significant difference in the performance of various imputation methods tested. The source code and selected datasets are available on http://profiles.bs.ipm.ir/softwares/imputation_methods/.",2017-12-13 +23940608,Genome annotation of Burkholderia sp. SJ98 with special focus on chemotaxis genes.,"Burkholderia sp. strain SJ98 has the chemotactic activity towards nitroaromatic and chloronitroaromatic compounds. Recently our group published draft genome of strain SJ98. In this study, we further sequence and annotate the genome of stain SJ98 to exploit the potential of this bacterium. We specifically annotate its chemotaxis genes and methyl accepting chemotaxis proteins. Genome of Burkholderia sp. SJ98 was annotated using PGAAP pipeline that predicts 7,268 CDSs, 52 tRNAs and 3 rRNAs. Our analysis based on phylogenetic and comparative genomics suggest that Burkholderia sp. YI23 is closest neighbor of the strain SJ98. The genes involved in the chemotaxis of strain SJ98 were compared with genes of closely related Burkholderia strains (i.e. YI23, CCGE 1001, CCGE 1002, CCGE 1003) and with well characterized bacterium E. coli K12. It was found that strain SJ98 has 37 che genes including 19 methyl accepting chemotaxis proteins that involved in sensing of different attractants. Chemotaxis genes have been found in a cluster along with the flagellar motor proteins. We also developed a web resource that provides comprehensive information on strain SJ98 that includes all analysis data (http://crdd.osdd.net/raghava/genomesrs/burkholderia/).",2013-08-05 +26329305,Fluoridated milk for preventing dental caries.,"BACKGROUND:Dental caries remains a major public health problem in most industrialised countries, affecting 60% to 90% of schoolchildren and the vast majority of adults. Milk may provide a relatively cost-effective vehicle for fluoride delivery in the prevention of dental caries. This is an update of a Cochrane review first published in 2005. OBJECTIVES:To assess the effects of milk fluoridation for preventing dental caries at a community level. SEARCH METHODS:We searched the Cochrane Oral Health Group Trials Register (inception to November 2014), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library, 2014, Issue 10), MEDLINE via OVID (1946 to November 2014) and EMBASE via OVID (1980 to November 2014). We also searched the U.S. National Institutes of Health Trials Register (https://clinicaltrials.gov) and the WHO International Clinical Trials Registry Platform (http://apps.who.int/trialsearch) for ongoing trials. We did not place any restrictions on the language or date of publication when searching the electronic databases. SELECTION CRITERIA:Randomised controlled trials (RCTs), with an intervention and follow-up period of at least two years, comparing fluoridated milk with non-fluoridated milk. DATA COLLECTION AND ANALYSIS:Two authors independently assessed trial risk of bias and extracted data. We used standard methodological procedures expected by The Cochrane Collaboration. MAIN RESULTS:We included one unpublished RCT, randomising 180 children aged three years at study commencement. The setting was nursery schools in an area with high prevalence of dental caries and a low level of fluoride in drinking water. Data from 166 participants were available for analysis. The study carried a high risk of bias. After three years, there was a reduction of caries in permanent teeth (mean difference (MD) -0.13, 95% confidence interval (CI) -0.24 to -0.02) and in primary teeth (MD -1.14, 95% CI -1.86 to -0.42), as measured by the decayed, missing and filled teeth index (DMFT for permanent teeth and dmft for primary teeth). For primary teeth, this is a substantial reduction, equivalent to a prevented fraction of 31%. For permanent teeth, the disease level was very low in the study, resulting in a small absolute effect size. The included study did not report any other outcomes of interest for this review (adverse events, dental pain, antibiotic use or requirement for general anaesthesia due to dental procedures). AUTHORS' CONCLUSIONS:There is low quality evidence to suggest fluoridated milk may be beneficial to schoolchildren, contributing to a substantial reduction in dental caries in primary teeth. Due to the low quality of the evidence, further research is likely to have an important impact on our confidence in the estimate of effect and is likely to change the estimate. There was only one relatively small study, which had important methodological limitations on the data for the effectiveness in reducing caries. Furthermore, there was no information about the potential harms of the intervention. Additional RCTs of high quality are needed before we can draw definitive conclusions about the benefits of milk fluoridation.",2015-08-31 +25378310,BRENDA in 2015: exciting developments in its 25th year of existence.,"The BRENDA enzyme information system (http://www.brenda-enzymes.org/) has developed into an elaborate system of enzyme and enzyme-ligand information obtained from different sources, combined with flexible query systems and evaluation tools. The information is obtained by manual extraction from primary literature, text and data mining, data integration, and prediction algorithms. Approximately 300 million data include enzyme function and molecular data from more than 30,000 organisms. The manually derived core contains 3 million data from 77,000 enzymes annotated from 135,000 literature references. Each entry is connected to the literature reference and the source organism. They are complemented by information on occurrence, enzyme/disease relationships from text mining, sequences and 3D structures from other databases, and predicted enzyme location and genome annotation. Functional and structural data of more than 190,000 enzyme ligands are stored in BRENDA. New features improving the functionality and analysis tools were implemented. The human anatomy atlas CAVEman is linked to the BRENDA Tissue Ontology terms providing a connection between anatomical and functional enzyme data. Word Maps for enzymes obtained from PubMed abstracts highlight application and scientific relevance of enzymes. The EnzymeDetector genome annotation tool and the reaction database BKM-react including reactions from BRENDA, KEGG and MetaCyc were improved. The website was redesigned providing new query options.",2014-11-05 +28008948,Lotus Base: An integrated information portal for the model legume Lotus japonicus.,"Lotus japonicus is a well-characterized model legume widely used in the study of plant-microbe interactions. However, datasets from various Lotus studies are poorly integrated and lack interoperability. We recognize the need for a comprehensive repository that allows comprehensive and dynamic exploration of Lotus genomic and transcriptomic data. Equally important are user-friendly in-browser tools designed for data visualization and interpretation. Here, we present Lotus Base, which opens to the research community a large, established LORE1 insertion mutant population containing an excess of 120,000 lines, and serves the end-user tightly integrated data from Lotus, such as the reference genome, annotated proteins, and expression profiling data. We report the integration of expression data from the L. japonicus gene expression atlas project, and the development of tools to cluster and export such data, allowing users to construct, visualize, and annotate co-expression gene networks. Lotus Base takes advantage of modern advances in browser technology to deliver powerful data interpretation for biologists. Its modular construction and publicly available application programming interface enable developers to tap into the wealth of integrated Lotus data. Lotus Base is freely accessible at: https://lotus.au.dk.",2016-12-23 +29392834,Structural mechanisms for the S-nitrosylation-derived protection of mouse galectin-2 from oxidation-induced inactivation revealed by NMR.,"Galectin-2 (Gal-2) is a lectin thought to play protective roles in the gastrointestinal tract. Oxidation of mouse Gal-2 (mGal-2) by hydrogen peroxide (H2 O2 ) results in the loss of sugar-binding activity, whereas S-nitrosylation of mGal-2, which does not change its sugar-binding profile, has been shown to protect the protein from H2 O2 -induced inactivation. One of the two cysteine residues, C57, has been identified as being responsible for controlling H2 O2 -induced inactivation; however, the underlying molecular mechanism has not been elucidated. We performed structural analyses of mGal-2 using nuclear magnetic resonance (NMR) and found that residues near C57 experienced significant chemical shift changes following S-nitrosylation, and that S-nitrosylation slowed the H2 O2 -induced aggregation of mGal-2. We also revealed that S-nitrosylation improves the thermal stability of mGal-2 and that the solvent accessibility and/or local dynamics of residues near C57 and the local dynamics of the core-forming residues in mGal-2 are reduced by S-nitrosylation. Structural models of Gal-2 indicated that C57 is located in a hydrophobic pocket that can be plugged by S-nitrosylation, which was supported by the NMR experiments. Based on these results, we propose two structural mechanisms by which S-nitrosylation protects mGal-2 from H2 O2 -induced aggregation without changing its sugar-binding profile: (a) stabilization of the hydrophobic pocket around C57 that prevents oxidation-induced destabilization of the pocket, and (b) prevention of oxidation of C57 during the transiently unfolded state of the protein, in which the residue is exposed to H2 O2 . DATABASE:Nuclear magnetic resonance assignments for non-S-nitrosylated mGal-2 and S-nitrosylated mGal-2 have been deposited in the BioMagResBank (http://www.bmrb.wisc.edu/) under ID code 27237 for non-S-nitrosylated mGal-2 and ID code 27238 for S-nitrosylated mGal-2.",2018-02-14 +29434460,Relationship Between Comorbidities and Employment Among Veterans with Spinal Cord Injury.,"Objective: To determine the relationship between medical and mental health comorbidities in a large cohort of veterans with spinal cord injury (SCI). Methods: Data were collected from interviews and electronic medical records of veterans with SCI (N = 1,047) who received care at 7 geographically diverse SCI centers within the Department of Veterans Affairs across the country (https://clinicaltrials.gov/ct2/show/NCT01141647). Employment, medical, functional, and psychosocial data underwent cross-sectional analysis. Results: Lack of any documented mental health diagnosis correlated strongly with being employed at the time of enrollment. No single comorbidity was associated with employment at enrollment, but an increased number of medical and/or mental health comorbidities (""health burden"") were associated with a decreased likelihood of employment at the time of enrollment. Conclusion: Further investigation is needed to clarify whether comorbidity severity or combinations of specific comorbidities predict rehabilitation outcome, including employment.",2017-09-27 +30498085,TLR Signaling Is Activated in Lymph Node-Resident CLL Cells and Is Only Partially Inhibited by Ibrutinib.,"Chronic lymphocytic leukemia (CLL) is a malignancy of mature B cells driven by B-cell receptor (BCR) signaling and activated primarily in the lymph node. The Bruton's tyrosine kinase (BTK) inhibitor ibrutinib effectively inhibits BCR-dependent proliferation and survival signals and has emerged as a breakthrough therapy for CLL. However, complete remissions are uncommon and are achieved only after years of continuous therapy. We hypothesized that other signaling pathways that sustain CLL cell survival are only partially inhibited by ibrutinib. In normal B cells, Toll-like receptor (TLR) signaling cooperates with BCR signaling to activate prosurvival NF-κB. Here, we show that an experimentally validated gene signature of TLR activation is overexpressed in lymph node-resident CLL cells compared with cells in the blood. Consistent with TLR activation, we detected phosphorylation of NF-κB, STAT1, and STAT3 in lymph node-resident CLL cells and in cells stimulated with CpG oligonucleotides in vitro. CpG promoted IRAK1 degradation, secretion of IL10, and extended survival of CLL cells in culture. CpG-induced TLR signaling was significantly inhibited by both an IRAK1/4 inhibitor and ibrutinib. Although inhibition of TLR signaling was incomplete with either drug, the combination achieved superior results, including more effective inhibition of TLR-mediated survival signaling. Our data suggest an important role for TLR signaling in CLL pathogenesis and in sustaining the viability of CLL cells during ibrutinib therapy. The combination of ibrutinib with a TLR pathway inhibitor could provide superior antitumor activity and should be investigated in clinical studies. SIGNIFICANCE: CLL relies on the concomitant cooperation of B-cell receptor and Toll-like receptor signaling; inhibition of both pathways is superior to inhibition of either pathway alone. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/79/2/360/F1.large.jpg.",2018-11-29 +26744602,SorGSD: a sorghum genome SNP database.,"

Background

Sorghum (Sorghum bicolor) is one of the most important cereal crops globally and a potential energy plant for biofuel production. In order to explore genetic gain for a range of important quantitative traits, such as drought and heat tolerance, grain yield, stem sugar accumulation, and biomass production, via the use of molecular breeding and genomic selection strategies, knowledge of the available genetic variation and the underlying sequence polymorphisms, is required.

Results

Based on the assembled and annotated genome sequences of Sorghum bicolor (v2.1) and the recently published sorghum re-sequencing data, ~62.9 M SNPs were identified among 48 sorghum accessions and included in a newly developed sorghum genome SNP database SorGSD (http://sorgsd.big.ac.cn). The diverse panel of 48 sorghum lines can be classified into four groups, improved varieties, landraces, wild and weedy sorghums, and a wild relative Sorghum propinquum. SorGSD has a web-based query interface to search or browse SNPs from individual accessions, or to compare SNPs among several lines. The query results can be visualized as text format in tables, or rendered as graphics in a genome browser. Users may find useful annotation from query results including type of SNPs such as synonymous or non-synonymous SNPs, start, stop of splice variants, chromosome locations, and links to the annotation on Phytozome (www.phytozome.net) sorghum genome database. In addition, general information related to sorghum research such as online sorghum resources and literature references can also be found on the website. All the SNP data and annotations can be freely download from the website.

Conclusions

SorGSD is a comprehensive web-portal providing a database of large-scale genome variation across all racial types of cultivated sorghum and wild relatives. It can serve as a bioinformatics platform for a range of genomics and molecular breeding activities for sorghum and for other C4 grasses.",2016-01-07 +24896259,Genome at juncture of early human migration: a systematic analysis of two whole genomes and thirteen exomes from Kuwaiti population subgroup of inferred Saudi Arabian tribe ancestry.,"Population of the State of Kuwait is composed of three genetic subgroups of inferred Persian, Saudi Arabian tribe and Bedouin ancestry. The Saudi Arabian tribe subgroup traces its origin to the Najd region of Saudi Arabia. By sequencing two whole genomes and thirteen exomes from this subgroup at high coverage (>40X), we identify 4,950,724 Single Nucleotide Polymorphisms (SNPs), 515,802 indels and 39,762 structural variations. Of the identified variants, 10,098 (8.3%) exomic SNPs, 139,923 (2.9%) non-exomic SNPs, 5,256 (54.3%) exomic indels, and 374,959 (74.08%) non-exomic indels are 'novel'. Up to 8,070 (79.9%) of the reported novel biallelic exomic SNPs are seen in low frequency (minor allele frequency <5%). We observe 5,462 known and 1,004 novel potentially deleterious nonsynonymous SNPs. Allele frequencies of common SNPs from the 15 exomes is significantly correlated with those from genotype data of a larger cohort of 48 individuals (Pearson correlation coefficient, 0.91; p <2.2×10-16). A set of 2,485 SNPs show significantly different allele frequencies when compared to populations from other continents. Two notable variants having risk alleles in high frequencies in this subgroup are: a nonsynonymous deleterious SNP (rs2108622 [19:g.15990431C>T] from CYP4F2 gene [MIM:*604426]) associated with warfarin dosage levels [MIM:#122700] required to elicit normal anticoagulant response; and a 3' UTR SNP (rs6151429 [22:g.51063477T>C]) from ARSA gene [MIM:*607574]) associated with Metachromatic Leukodystrophy [MIM:#250100]. Hemoglobin Riyadh variant (identified for the first time in a Saudi Arabian woman) is observed in the exome data. The mitochondrial haplogroup profiles of the 15 individuals are consistent with the haplogroup diversity seen in Saudi Arabian natives, who are believed to have received substantial gene flow from Africa and eastern provenance. We present the first genome resource imperative for designing future genetic studies in Saudi Arabian tribe subgroup. The full-length genome sequences and the identified variants are available at ftp://dgr.dasmaninstitute.org and http://dgr.dasmaninstitute.org/DGR/gb.html.",2014-06-04 +29240889,GeoBoost: accelerating research involving the geospatial metadata of virus GenBank records.,"Summary:GeoBoost is a command-line software package developed to address sparse or incomplete metadata in GenBank sequence records that relate to the location of the infected host (LOIH) of viruses. Given a set of GenBank accession numbers corresponding to virus GenBank records, GeoBoost extracts, integrates and normalizes geographic information reflecting the LOIH of the viruses using integrated information from GenBank metadata and related full-text publications. In addition, to facilitate probabilistic geospatial modeling, GeoBoost assigns probability scores for each possible LOIH. Availability and implementation:Binaries and resources required for running GeoBoost are packed into a single zipped file and freely available for download at https://tinyurl.com/geoboost. A video tutorial is included to help users quickly and easily install and run the software. The software is implemented in Java 1.8, and supported on MS Windows and Linux platforms. Contact:gragon@upenn.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +26819632,SequenceCEROSENE: a computational method and web server to visualize spatial residue neighborhoods at the sequence level.,"

Background

To understand the molecular function of biopolymers, studying their structural characteristics is of central importance. Graphics programs are often utilized to conceive these properties, but with the increasing number of available structures in databases or structure models produced by automated modeling frameworks this process requires assistance from tools that allow automated structure visualization. In this paper a web server and its underlying method for generating graphical sequence representations of molecular structures is presented.

Results

The method, called SequenceCEROSENE (color encoding of residues obtained by spatial neighborhood embedding), retrieves the sequence of each amino acid or nucleotide chain in a given structure and produces a color coding for each residue based on three-dimensional structure information. From this, color-highlighted sequences are obtained, where residue coloring represent three-dimensional residue locations in the structure. This color encoding thus provides a one-dimensional representation, from which spatial interactions, proximity and relations between residues or entire chains can be deduced quickly and solely from color similarity. Furthermore, additional heteroatoms and chemical compounds bound to the structure, like ligands or coenzymes, are processed and reported as well. To provide free access to SequenceCEROSENE, a web server has been implemented that allows generating color codings for structures deposited in the Protein Data Bank or structure models uploaded by the user. Besides retrieving visualizations in popular graphic formats, underlying raw data can be downloaded as well. In addition, the server provides user interactivity with generated visualizations and the three-dimensional structure in question.

Conclusions

Color encoded sequences generated by SequenceCEROSENE can aid to quickly perceive the general characteristics of a structure of interest (or entire sets of complexes), thus supporting the researcher in the initial phase of structure-based studies. In this respect, the web server can be a valuable tool, as users are allowed to process multiple structures, quickly switch between results, and interact with generated visualizations in an intuitive manner. The SequenceCEROSENE web server is available at https://biosciences.hs-mittweida.de/seqcerosene.",2016-01-27 +30107069,Estimated burden of serious human fungal diseases in Turkey.,"The current number of fungal infections occurring each year in Turkey is unknown. We estimated the burden of serious human fungal diseases based on the population at risk, existing epidemiological data from 1920 to 2017 and modelling previously described by the LIFE program (http://www.LIFE-worldwide.org). Among the population of Turkey (80.8 million in 2017), approximately 1 785 811 (2.21%) people are estimated to suffer from a serious fungal infection each year. The model used predicts high prevalences of allergic fungal rhinosinusitis episodes (312 994 cases) (392/100 000), of severe asthma with fungal sensitisation (42 989 cases) (53.20 cases/100 000 adults per year), of allergic bronchopulmonary aspergillosis (32 594 cases) (40.33/100 000), of fungal keratitis (26 671 cases) (33/100 000) and of chronic pulmonary aspergillosis (5890 cases) (7.29/100 000). The estimated annual incidence for invasive aspergillosis is lower (3911 cases) (4.84/100 000 annually). Among about 22.5 million women aged 15-50 years, recurrent vulvovaginal candidiasis is estimated to occur in 1 350 371 (3342/100 000) females. The burden of three superficial fungal infections was also estimated: tinea pedis (1.79 million), tinea capitis (43 900) and onychomycosis (1.73 million). Given that the modelling estimates reported in the current study might be substantially under- or overestimated, formal epidemiological and comprehensive surveillance studies are required to validate or modify these estimates.",2018-09-21 +30131901,iMEC: Online Marker Efficiency Calculator.,"

Premise of the study

To accurately design plant genetic studies, the information content of utilized markers and primers must be calculated. Plant genotyping studies should take into account the efficiency of each marker system by calculating different parameters to find the optimal combination of primers. This can be problematic because there are currently no easily accessible applications that can be used to calculate multiple indices together.

Methods and results

The program Online Marker Efficiency Calculator (iMEC) was developed using R for the simple computation of seven polymorphism indices (heterozygosity index, polymorphism information content, discriminating power, effective multiplex ratio, marker index, arithmetic mean heterozygosity, and resolving power). These indices are based on dominant and codominant DNA fingerprinting markers, thus allowing comparison and selection of optimal genetic markers for a given data set.

Conclusions

iMEC simplifies the calculation of diverse indices for the marker of choice to better enable researchers to measure polymorphism information for individual markers. The program is available at https://irscope.shinyapps.io/iMEC/.",2018-06-24 +26092861,"IBiSS, a versatile and interactive tool for integrated sequence and 3D structure analysis of large macromolecular complexes.","

Motivation

In the past few years, an increasing number of crystal and cryo electron microscopy (cryo-EM) structures of large macromolecular complexes, such as the ribosome or the RNA polymerase, have become available from various species. These multi-subunit complexes can be difficult to analyze at the level of amino acid sequence in combination with the 3D structural organization of the complex. Therefore, novel tools for simultaneous analysis of structure and sequence information of complex assemblies are required to better understand the basis of molecular mechanisms and their functional implications.

Results

Here, we present a web-based tool, Integrative Biology of Sequences and Structures (IBiSS), which is designed for interactively displaying 3D structures and selected sequences of subunits from large macromolecular complexes thus allowing simultaneous structure-sequence analysis such as conserved residues involved in catalysis or protein-protein interfaces. This tool comprises a Graphic User Interface and uses a rapid-access internal database, containing the relevant pre-aligned multiple sequences across all species available and 3D structural information. These annotations are automatically retrieved and updated from UniProt and crystallographic and cryo-EM data available in the Protein Data Bank (PDB) and Electron Microscopy Data Bank (EMDB).

Availability and implementation

The database contains all currently available structures of ribosomes, RNA polymerases, nucleosomes, proteasome, photosystem I and II complexes. IBiSS is available at http://ibiss.igbmc.fr

Contact

klaholz@igbmc.fr.",2015-06-19 +25679783,Phylogenetic profiling: how much input data is enough?,"Phylogenetic profiling is a well-established approach for predicting gene function based on patterns of gene presence and absence across species. Much of the recent developments have focused on methodological improvements, but relatively little is known about the effect of input data size on the quality of predictions. In this work, we ask: how many genomes and functional annotations need to be considered for phylogenetic profiling to be effective? Phylogenetic profiling generally benefits from an increased amount of input data. However, by decomposing this improvement in predictive accuracy in terms of the contribution of additional genomes and of additional annotations, we observed diminishing returns in adding more than ∼ 100 genomes, whereas increasing the number of annotations remained strongly beneficial throughout. We also observed that maximising phylogenetic diversity within a clade of interest improves predictive accuracy, but the effect is small compared to changes in the number of genomes under comparison. Finally, we show that these findings are supported in light of the Open World Assumption, which posits that functional annotation databases are inherently incomplete. All the tools and data used in this work are available for reuse from http://lab.dessimoz.org/14_phylprof. Scripts used to analyse the data are available on request from the authors.",2015-02-13 +30050061,Application of partial least squares in exploring the genome selection signatures between populations.,"Natural and artificial selection have led to substantial variation in the phenotypic traits of different populations. Therefore, there is a need to develop methods that are based on cross-population comparisons to discover loci related to specific traits. Here, we suggested a strategy to detect the genome selection signatures between populations based on the partial least squares (PLS) theory. Using the binary population indicator as the response variable in the PLS analysis, alleles under selection between populations were identified from the first PLS component. We explored the theory behind the PLS analysis to reveal its usefulness in detecting the loci under selection. Through the simulation study, the results showed that the PLS method had a better performance than the FST and EigenGWAS methods. In addition, by using the real data hapmap3, we found that rs11150606 in PRSS53 gene and rs1800414 in OCA2 gene were under selection between East Asian populations and three other populations, including African, American, and European populations. We concluded that this strategy was easily carried out and might supplement for the deficiency of the EigenGWAS method in some cases. To facilitate the application of this method, we developed an R script that is freely accessible at http://klab.sjtu.edu.cn/PLS/ .",2018-07-26 +30197593,Neuroscience Information Toolbox: An Open Source Toolbox for EEG-fMRI Multimodal Fusion Analysis.,"Recently, scalp electroencephalography (EEG) and functional magnetic resonance imaging (fMRI) multimodal fusion has been pursued in an effort to study human brain function and dysfunction to obtain more comprehensive information on brain activity in which the spatial and temporal resolutions are both satisfactory. However, a more flexible and easy-to-use toolbox for EEG-fMRI multimodal fusion is still lacking. In this study, we therefore developed a freely available and open-source MATLAB graphical user interface toolbox, known as the Neuroscience Information Toolbox (NIT), for EEG-fMRI multimodal fusion analysis. The NIT consists of three modules: (1) the fMRI module, which has batch fMRI preprocessing, nuisance signal removal, bandpass filtering, and calculation of resting-state measures; (2) the EEG module, which includes artifact removal, extracting EEG features (event onset, power, and amplitude), and marking interesting events; and (3) the fusion module, in which fMRI-informed EEG analysis and EEG-informed fMRI analysis are included. The NIT was designed to provide a convenient and easy-to-use toolbox for researchers, especially for novice users. The NIT can be downloaded for free at http://www.neuro.uestc.edu.cn/NIT.html, and detailed information, including the introduction of NIT, user's manual and example data sets, can also be observed on this website. We hope that the NIT is a promising toolbox for exploring brain information in various EEG and fMRI studies.",2018-08-24 +28472232,Differential privacy-based evaporative cooling feature selection and classification with relief-F and random forests.,"

Motivation

Classification of individuals into disease or clinical categories from high-dimensional biological data with low prediction error is an important challenge of statistical learning in bioinformatics. Feature selection can improve classification accuracy but must be incorporated carefully into cross-validation to avoid overfitting. Recently, feature selection methods based on differential privacy, such as differentially private random forests and reusable holdout sets, have been proposed. However, for domains such as bioinformatics, where the number of features is much larger than the number of observations p≫n , these differential privacy methods are susceptible to overfitting.

Methods

We introduce private Evaporative Cooling, a stochastic privacy-preserving machine learning algorithm that uses Relief-F for feature selection and random forest for privacy preserving classification that also prevents overfitting. We relate the privacy-preserving threshold mechanism to a thermodynamic Maxwell-Boltzmann distribution, where the temperature represents the privacy threshold. We use the thermal statistical physics concept of Evaporative Cooling of atomic gases to perform backward stepwise privacy-preserving feature selection.

Results

On simulated data with main effects and statistical interactions, we compare accuracies on holdout and validation sets for three privacy-preserving methods: the reusable holdout, reusable holdout with random forest, and private Evaporative Cooling, which uses Relief-F feature selection and random forest classification. In simulations where interactions exist between attributes, private Evaporative Cooling provides higher classification accuracy without overfitting based on an independent validation set. In simulations without interactions, thresholdout with random forest and private Evaporative Cooling give comparable accuracies. We also apply these privacy methods to human brain resting-state fMRI data from a study of major depressive disorder.

Availability and implementation

Code available at http://insilico.utulsa.edu/software/privateEC .

Contact

brett-mckinney@utulsa.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +30607661,Effects of Discontinue Rules on Psychometric Properties of Test Scores.,"This paper provides results on a form of adaptive testing that is used frequently in intelligence testing. In these tests, items are presented in order of increasing difficulty. The presentation of items is adaptive in the sense that a session is discontinued once a test taker produces a certain number of incorrect responses in sequence, with subsequent (not observed) responses commonly scored as wrong. The Stanford-Binet Intelligence Scales (SB5; Riverside Publishing Company, 2003) and the Kaufman Assessment Battery for Children (KABC-II; Kaufman and Kaufman, 2004), the Kaufman Adolescent and Adult Intelligence Test (Kaufman and Kaufman 2014) and the Universal Nonverbal Intelligence Test (2nd ed.) (Bracken and McCallum 2015) are some of the many examples using this rule. He and Wolfe (Educ Psychol Meas 72(5):808-826, 2012. https://doi.org/10.1177/0013164412441937 ) compared different ability estimation methods in a simulation study for this discontinue rule adaptation of test length. However, there has been no study, to our knowledge, of the underlying distributional properties based on analytic arguments drawing on probability theory, of what these authors call stochastic censoring of responses. The study results obtained by He and Wolfe (Educ Psychol Meas 72(5):808-826, 2012. https://doi.org/10.1177/0013164412441937 ) agree with results presented by DeAyala et al. (J Educ Meas 38:213-234, 2001) as well as Rose et al. (Modeling non-ignorable missing data with item response theory (IRT; ETS RR-10-11), Educational Testing Service, Princeton, 2010) and Rose et al. (Psychometrika 82:795-819, 2017. https://doi.org/10.1007/s11336-016-9544-7 ) in that ability estimates are biased most when scoring the not observed responses as wrong. This scoring is used operationally, so more research is needed in order to improve practice in this field. The paper extends existing research on adaptivity by discontinue rules in intelligence tests in multiple ways: First, an analytical study of the distributional properties of discontinue rule scored items is presented. Second, a simulation is presented that includes additional scoring rules and uses ability estimators that may be suitable to reduce bias for discontinue rule scored intelligence tests.",2019-01-03 +23895341,Dovetailing biology and chemistry: integrating the Gene Ontology with the ChEBI chemical ontology.,"

Background

The Gene Ontology (GO) facilitates the description of the action of gene products in a biological context. Many GO terms refer to chemical entities that participate in biological processes. To facilitate accurate and consistent systems-wide biological representation, it is necessary to integrate the chemical view of these entities with the biological view of GO functions and processes. We describe a collaborative effort between the GO and the Chemical Entities of Biological Interest (ChEBI) ontology developers to ensure that the representation of chemicals in the GO is both internally consistent and in alignment with the chemical expertise captured in ChEBI.

Results

We have examined and integrated the ChEBI structural hierarchy into the GO resource through computationally-assisted manual curation of both GO and ChEBI. Our work has resulted in the creation of computable definitions of GO terms that contain fully defined semantic relationships to corresponding chemical terms in ChEBI.

Conclusions

The set of logical definitions using both the GO and ChEBI has already been used to automate aspects of GO development and has the potential to allow the integration of data across the domains of biology and chemistry. These logical definitions are available as an extended version of the ontology from http://purl.obolibrary.org/obo/go/extensions/go-plus.owl.",2013-07-29 +28459556,Pred-Skin: A Fast and Reliable Web Application to Assess Skin Sensitization Effect of Chemicals.,"Chemically induced skin sensitization is a complex immunological disease with a profound impact on quality of life and working ability. Despite some progress in developing alternative methods for assessing the skin sensitization potential of chemical substances, there is no in vitro test that correlates well with human data. Computational QSAR models provide a rapid screening approach and contribute valuable information for the assessment of chemical toxicity. We describe the development of a freely accessible web-based and mobile application for the identification of potential skin sensitizers. The application is based on previously developed binary QSAR models of skin sensitization potential from human (109 compounds) and murine local lymph node assay (LLNA, 515 compounds) data with good external correct classification rate (0.70-0.81 and 0.72-0.84, respectively). We also included a multiclass skin sensitization potency model based on LLNA data (accuracy ranging between 0.73 and 0.76). When a user evaluates a compound in the web app, the outputs are (i) binary predictions of human and murine skin sensitization potential; (ii) multiclass prediction of murine skin sensitization; and (iii) probability maps illustrating the predicted contribution of chemical fragments. The app is the first tool available that incorporates quantitative structure-activity relationship (QSAR) models based on human data as well as multiclass models for LLNA. The Pred-Skin web app version 1.0 is freely available for the web, iOS, and Android (in development) at the LabMol web portal ( http://labmol.com.br/predskin/ ), in the Apple Store, and on Google Play, respectively. We will continuously update the app as new skin sensitization data and respective models become available.",2017-05-10 +28575147,Tissue-specific network-based genome wide study of amygdala imaging phenotypes to identify functional interaction modules.,"

Motivation

Network-based genome-wide association studies (GWAS) aim to identify functional modules from biological networks that are enriched by top GWAS findings. Although gene functions are relevant to tissue context, most existing methods analyze tissue-free networks without reflecting phenotypic specificity.

Results

We propose a novel module identification framework for imaging genetic studies using the tissue-specific functional interaction network. Our method includes three steps: (i) re-prioritize imaging GWAS findings by applying machine learning methods to incorporate network topological information and enhance the connectivity among top genes; (ii) detect densely connected modules based on interactions among top re-prioritized genes; and (iii) identify phenotype-relevant modules enriched by top GWAS findings. We demonstrate our method on the GWAS of [18F]FDG-PET measures in the amygdala region using the imaging genetic data from the Alzheimer's Disease Neuroimaging Initiative, and map the GWAS results onto the amygdala-specific functional interaction network. The proposed network-based GWAS method can effectively detect densely connected modules enriched by top GWAS findings. Tissue-specific functional network can provide precise context to help explore the collective effects of genes with biologically meaningful interactions specific to the studied phenotype.

Availability and implementation

The R code and sample data are freely available at http://www.iu.edu/shenlab/tools/gwasmodule/.

Contact

shenli@iu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +25392424,Expediting topology data gathering for the TOPDB database.,"The Topology Data Bank of Transmembrane Proteins (TOPDB, http://topdb.enzim.ttk.mta.hu) contains experimentally determined topology data of transmembrane proteins. Recently, we have updated TOPDB from several sources and utilized a newly developed topology prediction algorithm to determine the most reliable topology using the results of experiments as constraints. In addition to collecting the experimentally determined topology data published in the last couple of years, we gathered topographies defined by the TMDET algorithm using 3D structures from the PDBTM. Results of global topology analysis of various organisms as well as topology data generated by high throughput techniques, like the sequential positions of N- or O-glycosylations were incorporated into the TOPDB database. Moreover, a new algorithm was developed to integrate scattered topology data from various publicly available databases and a new method was introduced to measure the reliability of predicted topologies. We show that reliability values highly correlate with the per protein topology accuracy of the utilized prediction method. Altogether, more than 52,000 new topology data and more than 2600 new transmembrane proteins have been collected since the last public release of the TOPDB database.",2014-11-11 +25881043,GraphSAW: a web-based system for graphical analysis of drug interactions and side effects using pharmaceutical and molecular data.,"

Background

Adverse drug reactions are one of the most common causes of death in industrialized Western countries. Nowadays, empirical data from clinical studies for the approval and monitoring of drugs and molecular databases is available.

Methods

The integration of database information is a promising method for providing well-based knowledge to avoid adverse drug reactions. This paper presents our web-based decision support system GraphSAW which analyzes and evaluates drug interactions and side effects based on data from two commercial and two freely available molecular databases. The system is able to analyze single and combined drug-drug interactions, drug-molecule interactions as well as single and cumulative side effects. In addition, it allows exploring associative networks of drugs, molecules, metabolic pathways, and diseases in an intuitive way. The molecular medication analysis includes the capabilities of the upper features.

Results

A statistical evaluation of the integrated data and top 20 drugs concerning drug interactions and side effects is performed. The results of the data analysis give an overview of all theoretically possible drug interactions and side effects. The evaluation shows a mismatch between pharmaceutical and molecular databases. The concordance of drug interactions was about 12% and 9% of drug side effects. An application case with prescribed data of 11 patients is presented in order to demonstrate the functionality of the system under real conditions. For each patient at least two interactions occured in every medication and about 8% of total diseases were possibly induced by drug therapy.

Conclusions

GraphSAW (http://tunicata.techfak.uni-bielefeld.de/graphsaw/) is meant to be a web-based system for health professionals and researchers. GraphSAW provides comprehensive drug-related knowledge and an improved medication analysis which may support efforts to reduce the risk of medication errors and numerous drastic side effects.",2015-02-28 +23426257,Planform: an application and database of graph-encoded planarian regenerative experiments.,"

Summary

Understanding the mechanisms governing the regeneration capabilities of many organisms is a fundamental interest in biology and medicine. An ever-increasing number of manipulation and molecular experiments are attempting to discover a comprehensive model for regeneration, with the planarian flatworm being one of the most important model species. Despite much effort, no comprehensive, constructive, mechanistic models exist yet, and it is now clear that computational tools are needed to mine this huge dataset. However, until now, there is no database of regenerative experiments, and the current genotype-phenotype ontologies and databases are based on textual descriptions, which are not understandable by computers. To overcome these difficulties, we present here Planform (Planarian formalization), a manually curated database and software tool for planarian regenerative experiments, based on a mathematical graph formalism. The database contains more than a thousand experiments from the main publications in the planarian literature. The software tool provides the user with a graphical interface to easily interact with and mine the database. The presented system is a valuable resource for the regeneration community and, more importantly, will pave the way for the application of novel artificial intelligence tools to extract knowledge from this dataset.

Availability

The database and software tool are freely available at http://planform.daniel-lobo.com.",2013-02-19 +27507702,"Transcriptome sequencing and de novo characterization of Korean endemic land snail, Koreanohadra kurodana for functional transcripts and SSR markers.","The Korean endemic land snail Koreanohadra kurodana (Gastropoda: Bradybaenidae) found in humid areas of broadleaf forests and shrubs have been considered vulnerable as the number of individuals are declining in recent years. The species is poorly characterized at the genomic level that limits the understanding of functions at the molecular and genetics level. In the present study, we performed de novo transcriptome sequencing to produce a comprehensive transcript dataset of visceral mass tissue of K. kurodana by the Illumina paired-end sequencing technology. Over 234 million quality reads were assembled to a total of 315,924 contigs and 191,071 unigenes, with an average and N50 length of 585.6 and 715 bp and 678 and 927 bp, respectively. Overall, 36.32 % of the unigenes found matches to known protein/nucleotide sequences in the public databases. The direction of the unigenes to functional categories was determined using COG, GO, KEGG, and InterProScan protein domain search. The GO analysis search resulted in 22,967 unigenes (12.02 %) being categorized into 40 functional groups. The KEGG annotation revealed that metabolism pathway genes were enriched. The most prominent protein motifs include the zinc finger, ribonuclease H, reverse transcriptase, and ankyrin repeat domains. The simple sequence repeats (SSRs) identified from >1 kb length of unigenes show a dominancy of dinucleotide repeat motifs followed with tri- and tetranucleotide motifs. A number of unigenes were putatively assessed to belong to adaptation and defense mechanisms including heat shock proteins 70, Toll-like receptor 4, AMP-activated protein kinase, aquaporin-2, etc. Our data provide a rich source for the identification and functional characterization of new genes and candidate polymorphic SSR markers in K. kurodana. The availability of transcriptome information ( http://bioinfo.sch.ac.kr/submission/ ) would promote the utilization of the resources for phylogenetics study and genetic diversity assessment.",2016-08-09 +29168754,A Bioinformatic Pipeline for Monitoring of the Mutational Stability of Viral Drug Targets with Deep-Sequencing Technology. ,"The efficient development of antiviral drugs, including efficient antiviral small interfering RNAs (siRNAs), requires continuous monitoring of the strict correspondence between a drug and the related highly variable viral DNA/RNA target(s). Deep sequencing is able to provide an assessment of both the general target conservation and the frequency of particular mutations in the different target sites. The aim of this study was to develop a reliable bioinformatic pipeline for the analysis of millions of short, deep sequencing reads corresponding to selected highly variable viral sequences that are drug target(s). The suggested bioinformatic pipeline combines the available programs and the ad hoc scripts based on an original algorithm of the search for the conserved targets in the deep sequencing data. We also present the statistical criteria for the threshold of reliable mutation detection and for the assessment of variations between corresponding data sets. These criteria are robust against the possible sequencing errors in the reads. As an example, the bioinformatic pipeline is applied to the study of the conservation of RNA interference (RNAi) targets in human immunodeficiency virus 1 (HIV-1) subtype A. The developed pipeline is freely available to download at the website http://virmut.eimb.ru/. Brief comments and comparisons between VirMut and other pipelines are also presented.",2017-11-23 +29270978,General single-index survival regression models for incident and prevalent covariate data and prevalent data without follow-up.,"This article mainly focuses on analyzing covariate data from incident and prevalent cohort studies and a prevalent sample with only baseline covariates of interest and truncation times. Our major task in both research streams is to identify the effects of covariates on a failure time through very general single-index survival regression models without observing survival outcomes. With a strict increase of the survival function in the linear predictor, the ratio of incident and prevalent covariate densities is shown to be a non-degenerate and monotonic function of the linear predictor under covariate-independent truncation. Without such a structural assumption, the conditional density of a truncation time in a prevalent cohort is ensured to be a non-degenerate function of the linear predictor. In light of these features, some innovative approaches, which are based on the maximum rank correlation estimation or the pseudo least integrated squares estimation, are developed to estimate the coefficients of covariates up to a scale factor. Existing theoretical results are further used to establish the n -consistency and asymptotic normality of the proposed estimators. Moreover, extensive simulations are conducted to assess and compare the finite-sample performance of various estimators. To illustrate the methodological ideas, we also analyze data from the Worcester Heart Attack Study and the National Comorbidity Survey Replication.",2017-12-21 +30314484,Dynamic Optimization with Particle Swarms (DOPS): a meta-heuristic for parameter estimation in biochemical models.,"

Background

Mathematical modeling is a powerful tool to analyze, and ultimately design biochemical networks. However, the estimation of the parameters that appear in biochemical models is a significant challenge. Parameter estimation typically involves expensive function evaluations and noisy data, making it difficult to quickly obtain optimal solutions. Further, biochemical models often have many local extrema which further complicates parameter estimation. Toward these challenges, we developed Dynamic Optimization with Particle Swarms (DOPS), a novel hybrid meta-heuristic that combined multi-swarm particle swarm optimization with dynamically dimensioned search (DDS). DOPS uses a multi-swarm particle swarm optimization technique to generate candidate solution vectors, the best of which is then greedily updated using dynamically dimensioned search.

Results

We tested DOPS using classic optimization test functions, biochemical benchmark problems and real-world biochemical models. We performed [Formula: see text] = 25 trials with [Formula: see text] = 4000 function evaluations per trial, and compared the performance of DOPS with other commonly used meta-heuristics such as differential evolution (DE), simulated annealing (SA) and dynamically dimensioned search (DDS). On average, DOPS outperformed other common meta-heuristics on the optimization test functions, benchmark problems and a real-world model of the human coagulation cascade.

Conclusions

DOPS is a promising meta-heuristic approach for the estimation of biochemical model parameters in relatively few function evaluations. DOPS source code is available for download under a MIT license at http://www.varnerlab.org .",2018-10-12 +29906262,Exposure to Polycyclic Aromatic Hydrocarbons and Accelerated DNA Methylation Aging.,"

Background

Aging is related to an increased risk of morbidity and mortality and is affected by environmental factors. Exposure to polycyclic aromatic hydrocarbons (PAHs) is associated with adverse health outcomes; but the association of such exposure with DNA methylation aging, a novel aging marker, is unclear.

Objectives

Our aim was to investigate the association of PAH exposure with methylation aging.

Methods

We trained and validated a methylation age predictor suitable for Chinese populations using whole blood methylation data in 989 Chinese and 160 Caucasians. We defined two aging indicators: δage, as methylation age minus chronological age; and aging rate, the ratio of methylation to chronological age. The association of PAH exposure with aging indicators was evaluated using linear regressions in three panels of healthy Chinese participants (N=539, among the aforementioned 989 Chinese participants) whose exposure levels were assessed by 10 urinary monohydroxy-PAH metabolites.

Results

We developed a methylation age predictor providing accurate predictions in both Chinese individuals and Caucasian persons (R=0.94-0.96, RMSE=3.8-4.3). Among the 10 urinary metabolites that we measured, 1-hydroxypyrene and 9-hydroxyphenanthrene were associated with methylation aging independently of other OH-PAHs and risk factors; 1-unit increase in 1-hydroxypyrene was associated with a 0.53-y increase in Δage [95% confidence interval (CI): 0.18, 0.88; false discovery rate (FDR) FDR=0.004] and 1.17% increase in aging rate (95% CI: 0.36, 1.98; FDR=0.02), whereas for 9-hydroxyphenanthrene, the increase was 0.54-y for Δage (95% CI: 0.17, 0.91; FDR=0.004), and 1.15% for aging rate (95% CI: 0.31, 1.99; FDR=0.02). The association direction was consistent across the three Chinese panels with the association magnitude correlating with the panels' exposure levels; the association was validated by methylation data of purified leukocytes. Several cytosine-phosphoguanines, including those located on FHL2 and ELOVL2, were found associated with both aging indicators and monohydroxy-PAH levels.

Conclusions

We developed a methylation age predictor specific for Chinese populations but also accurate for Caucasian populations. Our findings suggest that exposure to PAHs may be associated with an adverse impact on human aging and epigenetic alterations in Chinese populations. https://doi.org/10.1289/EHP2773.",2018-06-14 +29494924,Imidazolium ionic liquids as effective antiseptics and disinfectants against drug resistant S. aureus: In silico and in vitro studies.,"This paper describes Quantitative Structure-Activity Relationships (QSAR) studies, molecular docking and in vitro antibacterial activity of several potent imidazolium-based ionic liquids (ILs) against S. aureus ATCC 25923 and its clinical isolate. Small set of 131 ILs was collected from the literature and uploaded in the OCHEM database. QSAR methodologies used Associative Neural Networks and Random Forests (WEKA-RF) methods. The predictive ability of the models was tested through cross-validation, giving cross-validated coefficients q2 = 0.82-0.87 for regression models and overall prediction accuracies of 80-82.1% for classification models. The proposed QSAR models are freely available online on OCHEM server at https://ochem.eu/article/107364 and can be used for estimation of antibacterial activity of new imidazolium-based ILs. A series of synthesized 1,3-dialkylimidazolium ILs with predicted activity were evaluated in vitro. The high activity of 7 ILs against S. aureus strain and its clinical isolate was measured and thereafter analyzed by the molecular docking to prokaryotic homologue of a eukaryotic tubulin FtsZ.",2018-02-08 +26566288,DBGC: A Database of Human Gastric Cancer.,"The Database of Human Gastric Cancer (DBGC) is a comprehensive database that integrates various human gastric cancer-related data resources. Human gastric cancer-related transcriptomics projects, proteomics projects, mutations, biomarkers and drug-sensitive genes from different sources were collected and unified in this database. Moreover, epidemiological statistics of gastric cancer patients in China and clinicopathological information annotated with gastric cancer cases were also integrated into the DBGC. We believe that this database will greatly facilitate research regarding human gastric cancer in many fields. DBGC is freely available at http://bminfor.tongji.edu.cn/dbgc/index.do.",2015-11-13 +27153576,Pathway analysis by randomization incorporating structure-PARIS: an update.,"

Motivation

We present an update to the pathway enrichment analysis tool 'Pathway Analysis by Randomization Incorporating Structure (PARIS)' that determines aggregated association signals generated from genome-wide association study results. Pathway-based analyses highlight biological pathways associated with phenotypes. PARIS uses a unique permutation strategy to evaluate the genomic structure of interrogated pathways, through permutation testing of genomic features, thus eliminating many of the over-testing concerns arising with other pathway analysis approaches.

Results

We have updated PARIS to incorporate expanded pathway definitions through the incorporation of new expert knowledge from multiple database sources, through customized user provided pathways, and other improvements in user flexibility and functionality.

Availability and implementation

PARIS is freely available to all users at https://ritchielab.psu.edu/software/paris-download

Contact

jnc43@case.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-07 +22080506,A database of immunoglobulins with integrated tools: DIGIT.,"The DIGIT (Database of ImmunoGlobulins with Integrated Tools) database (http://biocomputing.it/digit) is an integrated resource storing sequences of annotated immunoglobulin variable domains and enriched with tools for searching and analyzing them. The annotations in the database include information on the type of antigen, the respective germline sequences and on pairing information between light and heavy chains. Other annotations, such as the identification of the complementarity determining regions, assignment of their structural class and identification of mutations with respect to the germline, are computed on the fly and can also be obtained for user-submitted sequences. The system allows customized BLAST searches and automatic building of 3D models of the domains to be performed.",2011-11-10 +29112189,Quantification of sensitivity and resistance of breast cancer cell lines to anti-cancer drugs using GR metrics.,"Traditional means for scoring the effects of anti-cancer drugs on the growth and survival of cell lines is based on relative cell number in drug-treated and control samples and is seriously confounded by unequal division rates arising from natural biological variation and differences in culture conditions. This problem can be overcome by computing drug sensitivity on a per-division basis. The normalized growth rate inhibition (GR) approach yields per-division metrics for drug potency (GR50) and efficacy (GRmax) that are analogous to the more familiar IC50 and Emax values. In this work, we report GR-based, proliferation-corrected, drug sensitivity metrics for ~4,700 pairs of breast cancer cell lines and perturbagens. Such data are broadly useful in understanding the molecular basis of therapeutic response and resistance. Here, we use them to investigate the relationship between different measures of drug sensitivity and conclude that drug potency and efficacy exhibit high variation that is only weakly correlated. To facilitate further use of these data, computed GR curves and metrics can be browsed interactively at http://www.GRbrowser.org/.",2017-11-07 +24218641,Structure-aided prediction of mammalian transcription factor complexes in conserved non-coding elements.,"Mapping the DNA-binding preferences of transcription factor (TF) complexes is critical for deciphering the functions of cis-regulatory elements. Here, we developed a computational method that compares co-occurring motif spacings in conserved versus unconserved regions of the human genome to detect evolutionarily constrained binding sites of rigid TF complexes. Structural data were used to estimate TF complex physical plausibility, explore overlapping motif arrangements seldom tackled by non-structure-aware methods, and generate and analyse three-dimensional models of the predicted complexes bound to DNA. Using this approach, we predicted 422 physically realistic TF complex motifs at 18% false discovery rate, the majority of which (326, 77%) contain some sequence overlap between binding sites. The set of mostly novel complexes is enriched in known composite motifs, predictive of binding site configurations in TF-TF-DNA crystal structures, and supported by ChIP-seq datasets. Structural modelling revealed three cooperativity mechanisms: direct protein-protein interactions, potentially indirect interactions and 'through-DNA' interactions. Indeed, 38% of the predicted complexes were found to contain four or more bases in which TF pairs appear to synergize through overlapping binding to the same DNA base pairs in opposite grooves or strands. Our TF complex and associated binding site predictions are available as a web resource at http://bejerano.stanford.edu/complex.",2013-11-11 +29425804,Systematic survey of non-retroviral virus-like elements in eukaryotic genomes.,"Endogenous viral elements (EVEs) are viral sequences that are endogenized in the host cell. Recently, several eukaryotic genomes have been shown to contain EVEs. To improve the understanding of EVEs in eukaryotes, we have developed a system for detecting EVE-like sequences in eukaryotes and conducted a large-scale nucleotide sequence similarity search using all available eukaryotic and viral genome assembly sequences (excluding those from retroviruses) stored in the National Center for Biotechnology Information genome database (as of August 14, 2017). We found that 3856 of 7007 viral genomes were similar to 4098 of 4102 eukaryotic genomes. For those EVE-like sequences, we constructed a database, Predicted Endogenous Viral Elements (pEVE, http://peve.med.u-tokai.ac.jp) which provides comprehensive search results summarized from an evolutionary viewpoint. A comparison of EVE-like sequences among closely related species may be useful to avoid false-positive hits. We believe that our search system and database will facilitate studies on EVEs.",2018-02-06 +30458509,Clinical Progression in Four Cases of Primary Progressive Apraxia of Speech.,"

Purpose

This case series details the clinical progression of patients with primary progressive apraxia of speech (PPAOS) to illustrate, using several methods and supplemental material examples, the changes that occur in speech and language functioning in this patient population.

Method

Four patients who presented with PPAOS were followed between 5 and 6 years. Two patients had predominant articulatory abnormalities (termed phonetic PPAOS), 1 had predominant prosodic abnormalities (prosodic PPAOS), and 1 had relatively equal articulatory and prosodic abnormalities (mixed PPAOS). Detailed speech (including acoustics), language, neurologic, and neuropsychological data were collected.

Results

At initial exam, the patients ranged from 60 to 77 years old, with presenting disease duration of 1.5-10 years. Although all patients presented with an isolated apraxia of speech, all developed varying degrees of aphasia and dysarthria. Patients with phonetic PPAOS developed relatively more severe aphasia than the other 2 patients. All patients eventually had severe functional communication limitations and required alternative or augmentative means of communication, although at varying times postonset of their initial speech problem. Two patients developed dysphagia, 3 showed mild-moderate Parkinsonism, and 2 developed depression. For all patients, simple temporal acoustic measurements documented slowed speech rate over time.

Conclusions

This case series demonstrates that patients who initially present with PPAOS may develop aphasia and dysarthria, cognitive and behavioral changes, and other neurologic signs. Whether these changes can be predicted by the perceptual characteristics of the apraxia of speech is yet to be determined. The detailed longitudinal profiles provide valuable clinical insight into the progression of disease in people with PPAOS.

Supplemental material

https://doi.org/10.23641/asha.7051616.",2018-11-01 +30177993,Children's Consonant Acquisition in 27 Languages: A Cross-Linguistic Review.,"

Purpose

The aim of this study was to provide a cross-linguistic review of acquisition of consonant phonemes to inform speech-language pathologists' expectations of children's developmental capacity by (a) identifying characteristics of studies of consonant acquisition, (b) describing general principles of consonant acquisition, and (c) providing case studies for English, Japanese, Korean, and Spanish.

Method

A cross-linguistic review was undertaken of 60 articles describing 64 studies of consonant acquisition by 26,007 children from 31 countries in 27 languages: Afrikaans, Arabic, Cantonese, Danish, Dutch, English, French, German, Greek, Haitian Creole, Hebrew, Hungarian, Icelandic, Italian, Jamaican Creole, Japanese, Korean, Malay, Maltese, Mandarin (Putonghua), Portuguese, Setswana (Tswana), Slovenian, Spanish, Swahili, Turkish, and Xhosa.

Results

Most studies were cross-sectional and examined single word production. Combining data from 27 languages, most of the world's consonants were acquired by 5;0 years;months old. By 5;0, children produced at least 93% of consonants correctly. Plosives, nasals, and nonpulmonic consonants (e.g., clicks) were acquired earlier than trills, flaps, fricatives, and affricates. Most labial, pharyngeal, and posterior lingual consonants were acquired earlier than consonants with anterior tongue placement. However, there was an interaction between place and manner where plosives and nasals produced with anterior tongue placement were acquired earlier than anterior trills, fricatives, and affricates.

Conclusions

Children across the world acquire consonants at a young age. Five-year-old children have acquired most consonants within their ambient language; however, individual variability should be considered.

Supplemental material

https://doi.org/10.23641/asha.6972857.",2018-11-01 +28225594,Mass Spectral Feature List Optimizer (MS-FLO): A Tool To Minimize False Positive Peak Reports in Untargeted Liquid Chromatography-Mass Spectroscopy (LC-MS) Data Processing.,"Untargeted metabolomics by liquid chromatography-mass spectrometry generates data-rich chromatograms in the form of m/z-retention time features. Managing such datasets is a bottleneck. Many popular data processing tools, including XCMS-online and MZmine2, yield numerous false-positive peak detections. Flagging and removing such false peaks manually is a time-consuming task and prone to human error. We present a web application, Mass Spectral Feature List Optimizer (MS-FLO), to improve the quality of feature lists after initial processing to expedite the process of data curation. The tool utilizes retention time alignments, accurate mass tolerances, Pearson's correlation analysis, and peak height similarity to identify ion adducts, duplicate peak reports, and isotopic features of the main monoisotopic metabolites. Removing such erroneous peaks reduces the overall number of metabolites in data reports and improves the quality of subsequent statistical investigations. To demonstrate the effectiveness of MS-FLO, we processed 28 biological studies and uploaded raw and results data to the Metabolomics Workbench website ( www.metabolomicsworkbench.org ), encompassing 1481 chromatograms produced by two different data processing programs used in-house (MZmine2 and later MS-DIAL). Post-processing of datasets with MS-FLO yielded a 7.8% automated reduction of total peak features and flagged an additional 7.9% of features, per dataset, for review by the user. When manually curated, 87% of these additional flagged features were verified false positives. MS-FLO is an open source web application that is freely available for use at http://msflo.fiehnlab.ucdavis.edu .",2017-03-06 +27797774,Web-igloo: a web based platform for multivariate data visualization.,"

Motivation

The majority of data generated routinely from various experiments are essentially multivariate, often categorized with multiple experimental metadata. Analyzing such results with interactive visualizations often yields interesting and intuitive results which otherwise remains undisclosed.

Results

In this paper, we present Web-Igloo-a GUI based interactive 'feature decomposition independent' multivariate data visualization platform. Web-Igloo is likely to be a valuable contribution in the field of visual data mining, especially for researchers working with but not limited to multi-omics data. To demonstrate its utility, we have used a metagenomic dataset pertaining to the effect of multiple doses of antibiotic treatment on the human gut microbiome.

Availability and implementation

http://metagenomics.atc.tcs.com/webigloo and http://121.241.184.233/webigloo [Freely available for academic use].

Contact

sharmila@atc.tcs.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +21880546,MtbSD--a comprehensive structural database for Mycobacterium tuberculosis.,"The Mycobacterium tuberculosis Structural Database (MtbSD) (http://bmi.icmr.org.in/mtbsd/MtbSD.php) is a relational database for the study of protein structures of M. tuberculosis. It currently holds information on description, reaction catalyzed and domains involved, active sites, structural homologues and similarities between bound and cognate ligands, for all the 857 protein structures that are available for M. tb proteins. The database will be a valuable resource for TB researchers to select the appropriate protein-ligand complex of a given protein for molecular modelling, docking, virtual screening and structure-based drug designing.",2011-08-30 +30814279,Investigating the Biological Relevance of In Vitro-Identified Putative Packaging Signals at the 5' Terminus of Satellite Tobacco Necrosis Virus 1 Genomic RNA. ,"Satellite tobacco necrosis virus 1 (STNV-1) is a model system for in vitro RNA encapsidation studies (N. Patel, E. C. Dykeman, R. H. A. Coutts, G. P. Lomonossoff, et al., Proc Natl Acad Sci U S A 112:2227-2232, 2015, https://doi.org/10.1073/pnas.1420812112; N. Patel, E. Wroblewski, G. Leonov, S. E. V. Phillips, et al., Proc Natl Acad Sci U S A 114:12255-12260, 2017, https://doi.org/10.1073/pnas.1706951114), leading to the identification of degenerate packaging signals (PSs) proposed to be involved in the recognition of its genome by the capsid protein (CP). The aim of the present work was to investigate whether these putative PSs can confer selective packaging of STNV-1 RNA in vivo and to assess the prospects of using decoy RNAs in antiviral therapy. We have developed an in planta packaging assay based on the transient expression of STNV-1 CP and have assessed the ability of the resulting virus-like particles (VLPs) to encapsidate mutant STNV-1 RNAs expected to have different encapsidation potential based on in vitro studies. The results revealed that >90% of the encapsidated RNAs are host derived, although there is some selectivity of packaging for STNV-1 RNA and certain host RNAs. Comparison of the packaging efficiencies of mutant STNV-1 RNAs showed that they are encapsidated mainly according to their abundance within the cells, rather than the presence or absence of the putative PSs previously identified from in vitro studies. In contrast, subsequent infection experiments demonstrated that host RNAs represent only <1% of virion content. Although selective encapsidation of certain host RNAs was noted, no direct correlation could be made between this preference and the presence of potential PSs in the host RNA sequences. Overall, the data illustrate that the differences in RNA packaging efficiency identified through in vitro studies are insufficient to explain the specific packaging of STNV-1 RNA.IMPORTANCE Viruses preferentially encapsidate their own genomic RNA, sometimes as a result of the presence of clearly defined packaging signals (PSs) in their genome sequence. Recently, a novel form of short degenerate PSs has been proposed (N. Patel, E. C. Dykeman, R. H. A. Coutts, G. P. Lomonossoff, et al., Proc Natl Acad Sci U S A 112:2227-2232, 2015, https://doi.org/10.1073/pnas.1420812112; N. Patel, E. Wroblewski, G. Leonov, S. E. V. Phillips, et al., Proc Natl Acad Sci U S A 114:12255-12260, 2017, https://doi.org/10.1073/pnas.1706951114) using satellite tobacco necrosis virus 1 (STNV-1) as a model system for in vitro studies. It has been suggested that competing with these putative PSs may constitute a novel therapeutic approach against pathogenic single-stranded RNA viruses. Our work demonstrates that the previously identified PSs have no discernible significance for the selective packaging of STNV-1 in vivo in the presence and absence of competition or replication: viral sequences are encapsidated mostly on the basis of their abundance within the cell, while encapsidation of host RNAs also occurs. Nevertheless, the putative PSs identified in STNV-1 RNA may still have applications in bionanotechnology, such as the in vitro selective packaging of RNA molecules.",2019-04-17 +30064657,The PreCancer Atlas (PCA).,"Reproduced from https://visualsonline.cancer.gov/details.cfm?imageid=11474. Early detection offers a better chance of saving lives from cancer. The National Cancer Institute (NCI) supports research to improve cancer detection in its early stages, when it may be most treatable, and to accurately assess how likely it is for a precancerous growth to progress to life-threatening disease. The PreCancer Atlas (PCA) of the NCI envisages a histological and multi-omic mapping strategy in time and space to provide detailed molecular, cellular, and structural characterization of premalignant lesions and how they evolve to invasive cancers. The PCA will result in a paradigm shift in our knowledge of events initiating carcinogenesis, which may also be relevant to understanding pathogenesis related to exposure to carcinogens. It will also develop a greater understanding of the biological underpinnings of how premalignant lesions transition to invasive cancers, will help identify largely unknown molecular mechanisms operating in the clinically and microscopically occult phase of human carcinogenesis, and open unprecedented opportunities for the development of effective strategies for the early detection and prevention of cancers. Thus, the PCA represents more than an incremental advance in the field and will generate data that may change the standards of practice in oncology.",2018-07-03 +28649586,Data from salivary gland proteome analysis of female Aedes aegypti Linn.,"Salivary gland proteins from female Aedes aegypti mosquito were extracted and analyzed on high-resolution mass spectrometry. Proteomic data was analysed using two search algorithms SEQUEST and Mascot, which results in acquisition of 83,836 spectra which were assigned to 5417 peptides belonging to 1208 proteins.These proteins were then assigned molecular functions and further analysis revealed biological processes they are involved in using Gene Ontology annotations. Several immunity related pathways were found to be enriched in salivary gland.The data of this study are also related to the research article ""Mosquito-Borne Diseases and Omics: Salivary gland proteome of the female Aedes aegypti mosquito"" (Dhawan et al., 2017) [1]. These data are deposited in ProteomeXchange in the public dataset PXD002468. In addition,a scientific interpretation of this dataset by Dhawan et al. [1] is available at http://dx.doi.org/10.1089/journal.omi.2016.0160.",2017-05-25 +28069593,KNIME4NGS: a comprehensive toolbox for next generation sequencing analysis.,"

Summary

Analysis of Next Generation Sequencing (NGS) data requires the processing of large datasets by chaining various tools with complex input and output formats. In order to automate data analysis, we propose to standardize NGS tasks into modular workflows. This simplifies reliable handling and processing of NGS data, and corresponding solutions become substantially more reproducible and easier to maintain. Here, we present a documented, linux-based, toolbox of 42 processing modules that are combined to construct workflows facilitating a variety of tasks such as DNAseq and RNAseq analysis. We also describe important technical extensions. The high throughput executor (HTE) helps to increase the reliability and to reduce manual interventions when processing complex datasets. We also provide a dedicated binary manager that assists users in obtaining the modules' executables and keeping them up to date. As basis for this actively developed toolbox we use the workflow management software KNIME.

Availability and implementation

See http://ibisngs.github.io/knime4ngs for nodes and user manual (GPLv3 license).

Contact

robert.kueffner@helmholtz-muenchen.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +29399725,"POTENCI: prediction of temperature, neighbor and pH-corrected chemical shifts for intrinsically disordered proteins.","Chemical shifts contain important site-specific information on the structure and dynamics of proteins. Deviations from statistical average values, known as random coil chemical shifts (RCCSs), are extensively used to infer these relationships. Unfortunately, the use of imprecise reference RCCSs leads to biased inference and obstructs the detection of subtle structural features. Here we present a new method, POTENCI, for the prediction of RCCSs that outperforms the currently most authoritative methods. POTENCI is parametrized using a large curated database of chemical shifts for protein segments with validated disorder; It takes pH and temperature explicitly into account, and includes sequence-dependent nearest and next-nearest neighbor corrections as well as second-order corrections. RCCS predictions with POTENCI show root-mean-square values that are lower by 25-78%, with the largest improvements observed for 1Hα and 13C'. It is demonstrated how POTENCI can be applied to analyze subtle deviations from RCCSs to detect small populations of residual structure in intrinsically disorder proteins that were not discernible before. POTENCI source code is available for download, or can be deployed from the URL http://www.protein-nmr.org .",2018-02-05 +29310834,Genetic analysis and clinical description of Greek patients with Peutz-Jeghers syndrome: Creation of a National Registry.,"Peutz-Jeghers syndrome (PJS) is a rare autosomal dominant disorder caused by germline mutations in the STK11 tumor suppressor gene. PJS patients face a cumulative cancer risk as high as 93% for all sites combined. The present study reports the spectrum of STK11 mutations in eight families with clinical diagnosis of PJS, summarizes the clinical characteristics of sixteen mutation carriers and launches a National Registry for PJS in Greece. STK11 loss-of-function (LoF) mutations were detected in 87.5% of index patients. Carriers presented with their first manifestation at a median age of 24.9 years, while early-onset breast cancer was the most frequent malignancy observed, highlighting the need for breast surveillance. Out of the deleterious STK11 mutations identified, two were novel: c.375_376delGT and c.676_679dupAACG, with 57.2% of these potentially occurring de novo. Using all available clinical and genetic data, the National Registry for Greek PJS was established in an attempt to better characterize the syndrome and raise awareness among patients and clinicians (available at https://www.peutzjeghersgreece.org). This is the first comprehensive genetic analysis and clinical characterization of Greek PJS patients, where a high incidence of breast cancer was observed and the first attempt to centralize all data in a National Registry.",2017-11-20 +28334115,fastER: a user-friendly tool for ultrafast and robust cell segmentation in large-scale microscopy.,"

Motivation

Quantitative large-scale cell microscopy is widely used in biological and medical research. Such experiments produce huge amounts of image data and thus require automated analysis. However, automated detection of cell outlines (cell segmentation) is typically challenging due to, e.g. high cell densities, cell-to-cell variability and low signal-to-noise ratios.

Results

Here, we evaluate accuracy and speed of various state-of-the-art approaches for cell segmentation in light microscopy images using challenging real and synthetic image data. The results vary between datasets and show that the tested tools are either not robust enough or computationally expensive, thus limiting their application to large-scale experiments. We therefore developed fastER, a trainable tool that is orders of magnitude faster while producing state-of-the-art segmentation quality. It supports various cell types and image acquisition modalities, but is easy-to-use even for non-experts: it has no parameters and can be adapted to specific image sets by interactively labelling cells for training. As a proof of concept, we segment and count cells in over 200 000 brightfield images (1388 × 1040 pixels each) from a six day time-lapse microscopy experiment; identification of over 46 000 000 single cells requires only about two and a half hours on a desktop computer.

Availability and implementation

C ++ code, binaries and data at https://www.bsse.ethz.ch/csd/software/faster.html .

Contact

oliver.hilsenbeck@bsse.ethz.ch or timm.schroeder@bsse.ethz.ch.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +30379928,Network analysis of the social and demographic influences on name choice within the UK (1838-2016).,"Chosen names reflect changes in societal values, personal tastes and cultural diversity. Vogues in name usage can be easily shown on a case by case basis, by plotting the rise and fall in their popularity over time. However, individual name choices are not made in isolation and trends in naming are better understood as group-level phenomena. Here we use network analysis to examine onomastic (name) datasets in order to explore the influences on name choices within the UK over the last 170 years. Using a large representative sample of approximately 22 million forenames from England and Wales given between 1838 and 2014, along with a complete population sample of births registered between 1996 and 2016, we demonstrate how trends in name usage can be visualised as network graphs. By exploring the structure of these graphs various patterns of name use become apparent, a consequence of external social forces, such as migration, operating in concert with internal mechanisms of change. In general, we show that the topology of network graphs can reveal naming vogues, and that naming vogues in part reflect social and demographic changes. Many name choices are consistent with a self-correcting feedback loop, whereby rarer names become common because there are virtues perceived in their rarity, yet with these perceived virtues lost upon increasing commonality. Towards the present day, we can speculate that the comparatively greater range of media, freedom of movement, and ability to maintain globally-distributed social networks increases the number of possible names, but also ensures they may more quickly be perceived as commonplace. Consequently, contemporary naming vogues are relatively short-lived with many name choices appearing a balance struck between recognisability and rarity. The data are available in multiple forms including via an easy-to-use web interface at http://demos.flourish.studio/namehistory.",2018-10-31 +27554092,Training and evaluation corpora for the extraction of causal relationships encoded in biological expression language (BEL). ,"Success in extracting biological relationships is mainly dependent on the complexity of the task as well as the availability of high-quality training data. Here, we describe the new corpora in the systems biology modeling language BEL for training and testing biological relationship extraction systems that we prepared for the BioCreative V BEL track. BEL was designed to capture relationships not only between proteins or chemicals, but also complex events such as biological processes or disease states. A BEL nanopub is the smallest unit of information and represents a biological relationship with its provenance. In BEL relationships (called BEL statements), the entities are normalized to defined namespaces mainly derived from public repositories, such as sequence databases, MeSH or publicly available ontologies. In the BEL nanopubs, the BEL statements are associated with citation information and supportive evidence such as a text excerpt. To enable the training of extraction tools, we prepared BEL resources and made them available to the community. We selected a subset of these resources focusing on a reduced set of namespaces, namely, human and mouse genes, ChEBI chemicals, MeSH diseases and GO biological processes, as well as relationship types 'increases' and 'decreases'. The published training corpus contains 11 000 BEL statements from over 6000 supportive text excerpts. For method evaluation, we selected and re-annotated two smaller subcorpora containing 100 text excerpts. For this re-annotation, the inter-annotator agreement was measured by the BEL track evaluation environment and resulted in a maximal F-score of 91.18% for full statement agreement. In addition, for a set of 100 BEL statements, we do not only provide the gold standard expert annotations, but also text excerpts pre-selected by two automated systems. Those text excerpts were evaluated and manually annotated as true or false supportive in the course of the BioCreative V BEL track task.Database URL: http://wiki.openbel.org/display/BIOC/Datasets.",2016-08-23 +23016940,HomeoDB2: functional expansion of a comparative homeobox gene database for evolutionary developmental biology.,"Homeobox gene database (HomeoDB), a manually curated database of homeobox genes and their classification, has been well received since its release in 2008. Here, we report HomeoDB2, an expansion and improvement of the original database that provides greater functionality for the user. HomeoDB2 includes all homeobox loci from 10 animal genomes (human, mouse, chicken, frog, zebrafish, amphioxus, nematode, fruitfly, beetle, honeybee) plus tools for downloading sequences, comparing between species and BLAST searching. HomeoDB2 provides a resource for studying the dynamics of homeobox gene evolution, and is freely accessible at http://homeodb.zoo.ox.ac.uk.",2011-11-01 +30258427,Sc-ncDNAPred: A Sequence-Based Predictor for Identifying Non-coding DNA in Saccharomyces cerevisiae.,"With the rapid development of high-speed sequencing technologies and the implementation of many whole genome sequencing project, research in the genomics is advancing from genome sequencing to genome synthesis. Synthetic biology technologies such as DNA-based molecular assemblies, genome editing technology, directional evolution technology and DNA storage technology, and other cutting-edge technologies emerge in succession. Especially the rapid growth and development of DNA assembly technology may greatly push forward the success of artificial life. Meanwhile, DNA assembly technology needs a large number of target sequences of known information as data support. Non-coding DNA (ncDNA) sequences occupy most of the organism genomes, thus accurate recognizing of them is necessary. Although experimental methods have been proposed to detect ncDNA sequences, they are expensive for performing genome wide detections. Thus, it is necessary to develop machine-learning methods for predicting non-coding DNA sequences. In this study, we collected the ncDNA benchmark dataset of Saccharomyces cerevisiae and reported a support vector machine-based predictor, called Sc-ncDNAPred, for predicting ncDNA sequences. The optimal feature extraction strategy was selected from a group included mononucleotide, dimer, trimer, tetramer, pentamer, and hexamer, using support vector machine learning method. Sc-ncDNAPred achieved an overall accuracy of 0.98. For the convenience of users, an online web-server has been built at: http://server.malab.cn/Sc_ncDNAPred/index.jsp.",2018-09-12 +28365735,AnnoSys-implementation of a generic annotation system for schema-based data using the example of biodiversity collection data. ,"Biological research collections holding billions of specimens world-wide provide the most important baseline information for systematic biodiversity research. Increasingly, specimen data records become available in virtual herbaria and data portals. The traditional (physical) annotation procedure fails here, so that an important pathway of research documentation and data quality control is broken. In order to create an online annotation system, we analysed, modeled and adapted traditional specimen annotation workflows. The AnnoSys system accesses collection data from either conventional web resources or the Biological Collection Access Service (BioCASe) and accepts XML-based data standards like ABCD or DarwinCore. It comprises a searchable annotation data repository, a user interface, and a subscription based message system. We describe the main components of AnnoSys and its current and planned interoperability with biodiversity data portals and networks. Details are given on the underlying architectural model, which implements the W3C OpenAnnotation model and allows the adaptation of AnnoSys to different problem domains. Advantages and disadvantages of different digital annotation and feedback approaches are discussed. For the biodiversity domain, AnnoSys proposes best practice procedures for digital annotations of complex records. https://annosys.bgbm.fu-berlin.de/AnnoSys/AnnoSys.",2017-01-01 +28805044,"NMπ-improved re-implementation of NM+, a software for estimating gene dispersal and mating patterns.","This study introduces the NMπ computer program designed for estimation of plant mating system and seed and pollen dispersal kernels. NMπ is a re-implementation of the NM+ program and provides new features such as support for multicore processors, explicit treatment of dioecy, the possibility of incorporating uniparentally cytoplasmic markers, the possibility of assessing assortative mating due to phenotypic similarity and inference about offspring genealogies. The probability model of parentage (the neighbourhood model) accounts for missing data and genotyping errors, which can be estimated along with regular parameters of the mating system. The program has virtually no restrictions with respect to a number of individuals, markers or phenotypic characters. A console version of NMπ can be run under a wide variety of operating systems, including Windows, Linux or Mac OS. For Windows users, a graphical user interface is provided to facilitate operating the software. The program, user manual and example data are available on http://www.ukw.edu.pl/pracownicy/plik/igor_chybicki/3694/.",2017-09-16 +28936638,Examining reproducibility in psychology: A hybrid method for combining a statistically significant original study and a replication.,"The unrealistically high rate of positive results within psychology has increased the attention to replication research. However, researchers who conduct a replication and want to statistically combine the results of their replication with a statistically significant original study encounter problems when using traditional meta-analysis techniques. The original study's effect size is most probably overestimated because it is statistically significant, and this bias is not taken into consideration in traditional meta-analysis. We have developed a hybrid method that does take the statistical significance of an original study into account and enables (a) accurate effect size estimation, (b) estimation of a confidence interval, and (c) testing of the null hypothesis of no effect. We analytically approximate the performance of the hybrid method and describe its statistical properties. By applying the hybrid method to data from the Reproducibility Project: Psychology (Open Science Collaboration, 2015), we demonstrate that the conclusions based on the hybrid method are often in line with those of the replication, suggesting that many published psychological studies have smaller effect sizes than those reported in the original study, and that some effects may even be absent. We offer hands-on guidelines for how to statistically combine an original study and replication, and have developed a Web-based application ( https://rvanaert.shinyapps.io/hybrid ) for applying the hybrid method.",2018-08-01 +25527833,mirPub: a database for searching microRNA publications.,"

Summary

Identifying, amongst millions of publications available in MEDLINE, those that are relevant to specific microRNAs (miRNAs) of interest based on keyword search faces major obstacles. References to miRNA names in the literature often deviate from standard nomenclature for various reasons, since even the official nomenclature evolves. For instance, a single miRNA name may identify two completely different molecules or two different names may refer to the same molecule. mirPub is a database with a powerful and intuitive interface, which facilitates searching for miRNA literature, addressing the aforementioned issues. To provide effective search services, mirPub applies text mining techniques on MEDLINE, integrates data from several curated databases and exploits data from its user community following a crowdsourcing approach. Other key features include an interactive visualization service that illustrates intuitively the evolution of miRNA data, tag clouds summarizing the relevance of publications to particular diseases, cell types or tissues and access to TarBase 6.0 data to oversee genes related to miRNA publications.

Availability and implementation

mirPub is freely available at http://www.microrna.gr/mirpub/.

Contact

vergoulis@imis.athena-innovation.gr or dalamag@imis.athena-innovation.gr

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-20 +29790966,TCRmodel: high resolution modeling of T cell receptors from sequence.,"T cell receptors (TCRs), along with antibodies, are responsible for specific antigen recognition in the adaptive immune response, and millions of unique TCRs are estimated to be present in each individual. Understanding the structural basis of TCR targeting has implications in vaccine design, autoimmunity, as well as T cell therapies for cancer. Given advances in deep sequencing leading to immune repertoire-level TCR sequence data, fast and accurate modeling methods are needed to elucidate shared and unique 3D structural features of these molecules which lead to their antigen targeting and cross-reactivity. We developed a new algorithm in the program Rosetta to model TCRs from sequence, and implemented this functionality in a web server, TCRmodel. This web server provides an easy to use interface, and models are generated quickly that users can investigate in the browser and download. Benchmarking of this method using a set of nonredundant recently released TCR crystal structures shows that models are accurate and compare favorably to models from another available modeling method. This server enables the community to obtain insights into TCRs of interest, and can be combined with methods to model and design TCR recognition of antigens. The TCRmodel server is available at: http://tcrmodel.ibbr.umd.edu/.",2018-07-01 +30456502,Long-term oncologic after robotic versus laparoscopic right colectomy: a prospective randomized study.,"

Objective

The aim of this study was to compare the long-term outcomes of robot-assisted right colectomy (RAC) with those for conventional laparoscopy-assisted right surgery (LAC) for treating right-sided colon cancer.

Background

The enthusiasm for the robotic techniques has gained increasing interest in colorectal malignancies. However, the role of robotic surgery in the oncologic safety has not yet been defined.

Methods

From September 2009 to July 2011, 71 patients with right-sided colonic cancer were randomized in the study. Adjuvant therapy and postoperative follow-up were similar in both groups. The primary and secondary endpoints of the study were hospital stay and survival, respectively. Data were analyzed by intention-to-treat principle.

Results

The RAC and LAC groups did not differ significantly in terms of baseline clinical characteristics. Compared with the LAC group, RAC was associated with longer operation times (195 min vs. 129 min, P < 0.001) and higher cost ($12,235 vs. $10,319, P = 0.013). The median follow-up was 49.23 months (interquartile range 40.63-56.20). The combined 5-year disease-free rate for all tumor stages was 77.4% (95% confidence interval [CI], 60.6-92.1%) in the RAC group and 83.6% (95% CI 72.1-0.97.0%) in the LAC group (P = 0.442). The combined 5-year overall survival rates for all stages were 91.1% (95% CI 78.8-100%) in the RAC group and 91.0% (95% CI 81.3-100%) in the LAC group (P = 0.678). Using multivariate analysis, RAC was not a predictor of recurrence.

Conclusions

RAC appears to similar long-term survival as compared with LAC. However, we did not observe any clinical benefits of RAC which could translate to a decrease in expenditures.

Trial registry

http://www.ClinicalTrials.gov , number NCT00470951.",2018-11-19 +28011769,"The PPI3D web server for searching, analyzing and modeling protein-protein interactions in the context of 3D structures.",

Summary

The PPI3D web server is focused on searching and analyzing the structural data on protein-protein interactions. Reducing the data redundancy by clustering and analyzing the properties of interaction interfaces using Voronoi tessellation makes this software a highly effective tool for addressing different questions related to protein interactions.

Availability and implementation

The server is freely accessible at http://bioinformatics.lt/software/ppi3d/ .

Contact

ceslovas.venclovas@bti.vu.lt.

Supplementary information

Supplementary data are available at Bioinformatics online.,2017-03-01 +28407089,GibbsCluster: unsupervised clustering and alignment of peptide sequences.,"Receptor interactions with short linear peptide fragments (ligands) are at the base of many biological signaling processes. Conserved and information-rich amino acid patterns, commonly called sequence motifs, shape and regulate these interactions. Because of the properties of a receptor-ligand system or of the assay used to interrogate it, experimental data often contain multiple sequence motifs. GibbsCluster is a powerful tool for unsupervised motif discovery because it can simultaneously cluster and align peptide data. The GibbsCluster 2.0 presented here is an improved version incorporating insertion and deletions accounting for variations in motif length in the peptide input. In basic terms, the program takes as input a set of peptide sequences and clusters them into meaningful groups. It returns the optimal number of clusters it identified, together with the sequence alignment and sequence motif characterizing each cluster. Several parameters are available to customize cluster analysis, including adjustable penalties for small clusters and overlapping groups and a trash cluster to remove outliers. As an example application, we used the server to deconvolute multiple specificities in large-scale peptidome data generated by mass spectrometry. The server is available at http://www.cbs.dtu.dk/services/GibbsCluster-2.0.",2017-07-01 +28344774,"The Dockstore: enabling modular, community-focused sharing of Docker-based genomics tools and workflows.","As genomic datasets continue to grow, the feasibility of downloading data to a local organization and running analysis on a traditional compute environment is becoming increasingly problematic. Current large-scale projects, such as the ICGC PanCancer Analysis of Whole Genomes (PCAWG), the Data Platform for the U.S. Precision Medicine Initiative, and the NIH Big Data to Knowledge Center for Translational Genomics, are using cloud-based infrastructure to both host and perform analysis across large data sets. In PCAWG, over 5,800 whole human genomes were aligned and variant called across 14 cloud and HPC environments; the processed data was then made available on the cloud for further analysis and sharing. If run locally, an operation at this scale would have monopolized a typical academic data centre for many months, and would have presented major challenges for data storage and distribution. However, this scale is increasingly typical for genomics projects and necessitates a rethink of how analytical tools are packaged and moved to the data. For PCAWG, we embraced the use of highly portable Docker images for encapsulating and sharing complex alignment and variant calling workflows across highly variable environments. While successful, this endeavor revealed a limitation in Docker containers, namely the lack of a standardized way to describe and execute the tools encapsulated inside the container. As a result, we created the Dockstore ( https://dockstore.org), a project that brings together Docker images with standardized, machine-readable ways of describing and running the tools contained within. This service greatly improves the sharing and reuse of genomics tools and promotes interoperability with similar projects through emerging web service standards developed by the Global Alliance for Genomics and Health (GA4GH).",2017-01-18 +29398655,Community-Led Total Sanitation: A Mixed-Methods Systematic Review of Evidence and Its Quality.,"BACKGROUND:Community-led total sanitation (CLTS) is a widely applied rural behavior change approach for ending open defecation. However, evidence of its impact is unclear. OBJECTIVES:We conducted a systematic review of journal-published and gray literature to a) assess evidence quality, b) summarize CLTS impacts, and c) identify factors affecting implementation and effectiveness. METHODS:Eligible studies were systematically screened and selected for analysis from searches of seven databases and 16 websites. We developed a framework to appraise literature quality. We qualitatively analyzed factors enabling or constraining CLTS, and summarized results from quantitative evaluations. DISCUSSION:We included 200 studies (14 quantitative evaluations, 29 qualitative studies, and 157 case studies). Journal-published literature was generally of higher quality than gray literature. Fourteen quantitative evaluations reported decreases in open defecation, but did not corroborate the widespread claims of open defecation-free (ODF) villages found in case studies. Over one-fourth of the literature overstated conclusions, attributing outcomes and impacts to interventions without an appropriate study design. We identified 43 implementation- and community-related factors reportedly affecting CLTS. This analysis revealed the importance of adaptability, structured posttriggering activities, appropriate community selection, and further research on combining and sequencing CLTS with other interventions. CONCLUSIONS:The evidence base on CLTS effectiveness available to practitioners, policy makers, and program managers to inform their actions is weak. Our results highlight the need for more rigorous research on CLTS impacts as well as applied research initiatives that bring researchers and practitioners together to address implementation challenges to improve rural sanitation efforts. https://doi.org/10.1289/EHP1965.",2018-02-02 +28472395,SinCHet: a MATLAB toolbox for single cell heterogeneity analysis in cancer.,"

Summary

Single-cell technologies allow characterization of transcriptomes and epigenomes for individual cells under different conditions and provide unprecedented resolution for researchers to investigate cellular heterogeneity in cancer. The SinCHet ( gle ell erogeneity) toolbox is developed in MATLAB and has a graphical user interface (GUI) for visualization and user interaction. It analyzes both continuous (e.g. mRNA expression) and binary omics data (e.g. discretized methylation data). The toolbox does not only quantify cellular heterogeneity using S hannon P rofile (SP) at different clonal resolutions but also detects heterogeneity differences using a D statistic between two populations. It is defined as the area under the P rofile of S hannon D ifference (PSD). This flexible tool provides a default clonal resolution using the change point of PSD detected by multivariate adaptive regression splines model; it also allows user-defined clonal resolutions for further investigation. This tool provides insights into emerging or disappearing clones between conditions, and enables the prioritization of biomarkers for follow-up experiments based on heterogeneity or marker differences between and/or within cell populations.

Availability and implementation

The SinCHet software is freely available for non-profit academic use. The source code, example datasets, and the compiled package are available at http://labpages2.moffitt.org/chen/software/ .

Contact

ann.chen@moffitt.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +26721496,NONCODEv4: Annotation of Noncoding RNAs with Emphasis on Long Noncoding RNAs.,"The rapid development of high-throughput sequencing technologies and bioinformatics algorithms now enables detection and profiling of a large number of noncoding transcripts. Long noncoding RNAs (lncRNAs), which are longer than 200 nucleotides, are accumulating with important roles involved in biological processes and tissue physiology. In this chapter, we describe the use of NONCODEv4, a database that provide a comprehensive catalog of noncoding RNAs with particularly detailed annotations for lncRNAs. NONCODEv4 stores more than half million transcripts, of which more than 200,000 are lncRNAs. NONCODEv4 raises the concept of lncRNA genes and explores their expression and functions based on public transcriptome data. NONCODEv4 also integrated a series of online tools and have a web interface easy to use. NONCODEv4 is available at http://www.noncode.org/ http://www.bioinfo.org/ noncode.",2016-01-01 +29391026,Machine learning and medicine: book review and commentary.,"This article is a review of the book ""Master machine learning algorithms, discover how they work and implement them from scratch"" (ISBN: not available, 37 USD, 163 pages) edited by Jason Brownlee published by the Author, edition, v1.10 http://MachineLearningMastery.com . An accompanying commentary discusses some of the issues that are involved with use of machine learning and data mining techniques to develop predictive models for diagnosis or prognosis of disease, and to call attention to additional requirements for developing diagnostic and prognostic algorithms that are generally useful in medicine. Appendix provides examples that illustrate potential problems with machine learning that are not addressed in the reviewed book.",2018-02-01 +25627341,SynBioLGDB: a resource for experimentally validated logic gates in synthetic biology.,"Synthetic biologists have developed DNA/molecular modules that perform genetic logic operations in living cells to track key moments in a cell's life or change the fate of a cell. Increasing evidence has also revealed that diverse genetic logic gates capable of generating a Boolean function play critically important roles in synthetic biology. Basic genetic logic gates have been designed to combine biological science with digital logic. SynBioLGDB (http://bioinformatics.ac.cn/synbiolgdb/) aims to provide the synthetic biology community with a useful resource for efficient browsing and visualization of genetic logic gates. The current version of SynBioLGDB documents more than 189 genetic logic gates with experimental evidence involving 80 AND gates and 16 NOR gates, etc. in three species (Human, Escherichia coli and Bacillus clausii). SynBioLGDB provides a user-friendly interface through which conveniently to query and browse detailed information about these genetic logic gates. SynBioLGDB will enable more comprehensive understanding of the connection of genetic logic gates to execute complex cellular functions in living cells.",2015-01-28 +28062451,RECKONER: read error corrector based on KMC.,"

Summary

Presence of sequencing errors in data produced by next-generation sequencers affects quality of downstream analyzes. Accuracy of them can be improved by performing error correction of sequencing reads. We introduce a new correction algorithm capable of processing eukaryotic close to 500 Mbp-genome-size, high error-rated data using less than 4 GB of RAM in about 35 min on 16-core computer.

Availability and implementation

Program is freely available at http://sun.aei.polsl.pl/REFRESH/reckoner .

Contact

sebastian.deorowicz@polsl.pl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +26080082,Computational Identification of Protein Pupylation Sites by Using Profile-Based Composition of k-Spaced Amino Acid Pairs.,"Prokaryotic proteins are regulated by pupylation, a type of post-translational modification that contributes to cellular function in bacterial organisms. In pupylation process, the prokaryotic ubiquitin-like protein (Pup) tagging is functionally analogous to ubiquitination in order to tag target proteins for proteasomal degradation. To date, several experimental methods have been developed to identify pupylated proteins and their pupylation sites, but these experimental methods are generally laborious and costly. Therefore, computational methods that can accurately predict potential pupylation sites based on protein sequence information are highly desirable. In this paper, a novel predictor termed as pbPUP has been developed for accurate prediction of pupylation sites. In particular, a sophisticated sequence encoding scheme [i.e. the profile-based composition of k-spaced amino acid pairs (pbCKSAAP)] is used to represent the sequence patterns and evolutionary information of the sequence fragments surrounding pupylation sites. Then, a Support Vector Machine (SVM) classifier is trained using the pbCKSAAP encoding scheme. The final pbPUP predictor achieves an AUC value of 0.849 in 10-fold cross-validation tests and outperforms other existing predictors on a comprehensive independent test dataset. The proposed method is anticipated to be a helpful computational resource for the prediction of pupylation sites. The web server and curated datasets in this study are freely available at http://protein.cau.edu.cn/pbPUP/.",2015-06-16 +28177064,CPSS 2.0: a computational platform update for the analysis of small RNA sequencing data.,"

Summary

Next-generation sequencing has been widely applied to understand the complexity of non-coding RNAs (ncRNAs) in the last decades. Here, we present CPSS 2.0, an updated version of CPSS 1.0 for small RNA sequencing data analysis, with the following improvements: (i) a substantial increase of supported species from 10 to 48; (ii) improved strategies applied to detect ncRNAs; (iii) more ncRNAs can be detected and profiled, such as lncRNA and circRNA; (iv) identification of differentially expressed ncRNAs among multiple samples; (v) enhanced visualization interface containing graphs and charts in detailed analysis results. The new version of CPSS is an efficient bioinformatics tool for users in non-coding RNA research.

Availability and implementation

CPSS 2.0 is implemented in PHP + Perl + R and can be freely accessed at http://114.214.166.79/cpss2.0/.

Contact

zyuanwei@ustc.edu.cn or qshi@ustc.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +29285887,Hit Dexter: A Machine-Learning Model for the Prediction of Frequent Hitters.,"False-positive assay readouts caused by badly behaving compounds-frequent hitters, pan-assay interference compounds (PAINS), aggregators, and others-continue to pose a major challenge to experimental screening. There are only a few in silico methods that allow the prediction of such problematic compounds. We report the development of Hit Dexter, two extremely randomized trees classifiers for the prediction of compounds likely to trigger positive assay readouts either by true promiscuity or by assay interference. The models were trained on a well-prepared dataset extracted from the PubChem Bioassay database, consisting of approximately 311 000 compounds tested for activity on at least 50 proteins. Hit Dexter reached MCC and AUC values of up to 0.67 and 0.96 on an independent test set, respectively. The models are expected to be of high value, in particular to medicinal chemists and biochemists who can use Hit Dexter to identify compounds for which extra caution should be exercised with positive assay readouts. Hit Dexter is available as a free web service at http://hitdexter.zbh. uni-hamburg.de.",2018-02-01 +29313247,EmoFinder: The meeting point for Spanish emotional words.,"We present here emoFinder ( http://usc.es/pcc/emofinder ), a Web-based search engine for Spanish word properties taken from different normative databases. The tool incorporates several subjective word properties for 16,375 distinct words. Although it focuses particularly on normative ratings for emotional dimensions (e.g., valence and arousal) and discrete emotional categories (fear, disgust, anger, happiness, and sadness), it also makes available ratings for other word properties that are known to affect word processing (e.g., concreteness, familiarity, contextual availability, and age of acquisition). The tool provides two main functionalities: Users can search for words matched on specific criteria with regard to the selected properties, or users can obtain the properties for a set of words. The output from emoFinder is highly customizable and can be accessed online or exported to a computer. The tool architecture is easily scalable, so that it can be updated to include word properties from new Spanish normative databases as they become available.",2018-02-01 +28342073,A Web-based interface to calculate phonotactic probability for words and nonwords in Modern Standard Arabic.,"A number of databases (Storkel Behavior Research Methods, 45, 1159-1167, 2013) and online calculators (Vitevitch & Luce Behavior Research Methods, Instruments, and Computers, 36, 481-487, 2004) have been developed to provide statistical information about various aspects of language, and these have proven to be invaluable assets to researchers, clinicians, and instructors in the language sciences. The number of such resources for English is quite large and continues to grow, whereas the number of such resources for other languages is much smaller. This article describes the development of a Web-based interface to calculate phonotactic probability in Modern Standard Arabic (MSA). A full description of how the calculator can be used is provided. It can be freely accessed at http://phonotactic.drupal.ku.edu/ .",2018-02-01 +27013586,"KIF23 is an independent prognostic biomarker in glioma, transcriptionally regulated by TCF-4.","Kinesin family member 23 (KIF23), a nuclear protein and a key regulator of cellular cytokinesis, has been found to be overexpressed as an oncogene in glioma. However, the prognostic and clinicopathological features of glioma with KIF23 expression was not clear yet. Here, we analyzed KIF23 expression pattern by using whole genome mRNA expression microarray data from Chinese Glioma Genome Atlas (CGGA) database (http://www.cgga.org.cn), and found that KIF23 overexpression was significantly associated with high grade glioma as well as the higher mortality in survival analysis (log-rank test, p<0.01). The results of the three other validation datasets showed similar findings. Furthermore, KIF23 also served as an independent prognostic biomarker in glioma patients. Finally, functional assay showed that reduction of KIF23 suppressed glioma cell proliferation both in vivo and vitro. Additionally, we found that KIF23 was regulated by TCF-4 at transcriptionally level. Therefore, this evidence indicates KIF23 over-expression is associated with glioma malignancy and conferred a worse survival time in glioma, which suggests KIF23 is a new novel prognostic biomarker with potential therapeutic implications in glioma.",2016-04-01 +26072489,PAGER: constructing PAGs and new PAG-PAG relationships for network biology.,"In this article, we described a new database framework to perform integrative ""gene-set, network, and pathway analysis"" (GNPA). In this framework, we integrated heterogeneous data on pathways, annotated list, and gene-sets (PAGs) into a PAG electronic repository (PAGER). PAGs in the PAGER database are organized into P-type, A-type and G-type PAGs with a three-letter-code standard naming convention. The PAGER database currently compiles 44 313 genes from 5 species including human, 38 663 PAGs, 324 830 gene-gene relationships and two types of 3 174 323 PAG-PAG regulatory relationships-co-membership based and regulatory relationship based. To help users assess each PAG's biological relevance, we developed a cohesion measure called Cohesion Coefficient (CoCo), which is capable of disambiguating between biologically significant PAGs and random PAGs with an area-under-curve performance of 0.98. PAGER database was set up to help users to search and retrieve PAGs from its online web interface. PAGER enable advanced users to build PAG-PAG regulatory networks that provide complementary biological insights not found in gene set analysis or individual gene network analysis. We provide a case study using cancer functional genomics data sets to demonstrate how integrative GNPA help improve network biology data coverage and therefore biological interpretability. The PAGER database can be accessible openly at http://discovery.informatics.iupui.edu/PAGER/.",2015-06-01 +25758743,"Cotton QTLdb: a cotton QTL database for QTL analysis, visualization, and comparison between Gossypium hirsutum and G. hirsutum × G. barbadense populations.","

Key message

A specialized database currently containing more than 2200 QTL is established, which allows graphic presentation, visualization and submission of QTL. In cotton quantitative trait loci (QTL), studies are focused on intraspecific Gossypium hirsutum and interspecific G. hirsutum × G. barbadense populations. These two populations are commercially important for the textile industry and are evaluated for fiber quality, yield, seed quality, resistance, physiological, and morphological trait QTL. With meta-analysis data based on the vast amount of QTL studies in cotton it will be beneficial to organize the data into a functional database for the cotton community. Here we provide a tool for cotton researchers to visualize previously identified QTL and submit their own QTL to the Cotton QTLdb database. The database provides the user with the option of selecting various QTL trait types from either the G. hirsutum or G. hirsutum × G. barbadense populations. Based on the user's QTL trait selection, graphical representations of chromosomes of the population selected are displayed in publication ready images. The database also provides users with trait information on QTL, LOD scores, and explained phenotypic variances for all QTL selected. The CottonQTLdb database provides cotton geneticist and breeders with statistical data on cotton QTL previously identified and provides a visualization tool to view QTL positions on chromosomes. Currently the database (Release 1) contains 2274 QTLs, and succeeding QTL studies will be updated regularly by the curators and members of the cotton community that contribute their data to keep the database current. The database is accessible from http://www.cottonqtldb.org.",2015-03-11 +29150537,Incremental Value of Repeated Risk Factor Measurements for Cardiovascular Disease Prediction in Middle-Aged Korean Adults: Results From the NHIS-HEALS (National Health Insurance System-National Health Screening Cohort). ,"Increasing evidence suggests that repeatedly measured cardiovascular disease (CVD) risk factors may have an additive predictive value compared with single measured levels. Thus, we evaluated the incremental predictive value of incorporating periodic health screening data for CVD prediction in a large nationwide cohort with periodic health screening tests. A total of 467 708 persons aged 40 to 79 years and free from CVD were randomly divided into development (70%) and validation subcohorts (30%). We developed 3 different CVD prediction models: a single measure model using single time point screening data; a longitudinal average model using average risk factor values from periodic screening data; and a longitudinal summary model using average values and the variability of risk factors. The development subcohort included 327 396 persons who had 3.2 health screenings on average and 25 765 cases of CVD over 12 years. The C statistics (95% confidence interval [CI]) for the single measure, longitudinal average, and longitudinal summary models were 0.690 (95% CI, 0.682-0.698), 0.695 (95% CI, 0.687-0.703), and 0.752 (95% CI, 0.744-0.760) in men and 0.732 (95% CI, 0.722-0.742), 0.735 (95% CI, 0.725-0.745), and 0.790 (95% CI, 0.780-0.800) in women, respectively. The net reclassification index from the single measure model to the longitudinal average model was 1.78% in men and 1.33% in women, and the index from the longitudinal average model to the longitudinal summary model was 32.71% in men and 34.98% in women. Using averages of repeatedly measured risk factor values modestly improves CVD predictability compared with single measurement values. Incorporating the average and variability information of repeated measurements can lead to great improvements in disease prediction. URL: https://www.clinicaltrials.gov. Unique identifier: NCT02931500.",2017-11-01 +29028266,CircPro: an integrated tool for the identification of circRNAs with protein-coding potential.,"

Summary

Circular RNAs (circRNAs), a novel class of endogenous RNAs, are widespread in eukaryotic cells. Emerging roles in diverse biological processes suggest that circRNA is a promising key player in RNA world. Most circRNAs are generated through back-splicing of pre-mRNAs, forming a covalently closed loop structure with no 5' caps or 3' polyadenylated tails. In addition, most circRNAs were not associated with translating ribosomes, therefore, circRNAs were deemed to be noncoding. However, the latest research findings revealed that some circRNAs could generate proteins in vivo, which expands the landscape of transcriptome and proteome. To gain insights into the new area of circRNA translation, we introduce an integrated tool capable of detecting circRNAs with protein-coding potential from high-throughput sequencing data.

Availability and implementation

CircPro is available at http://bis.zju.edu.cn/CircPro.

Contact

mchen@zju.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +30367586,A generalized approach to predicting protein-protein interactions between virus and host.,"

Background

Viral infection involves a large number of protein-protein interactions (PPIs) between virus and its host. These interactions range from the initial binding of viral coat proteins to host membrane receptor to the hijacking the host transcription machinery by viral proteins. Therefore, identifying PPIs between virus and its host helps understand the mechanism of viral infections and design antiviral drugs. Many computational methods have been developed to predict PPIs, but most of them are intended for PPIs within a species rather than PPIs across different species such as PPIs between virus and host.

Results

In this study, we developed a prediction model of virus-host PPIs, which is applicable to new viruses and hosts. We tested the prediction model on independent datasets of virus-host PPIs, which were not used in training the model. Despite a low sequence similarity between proteins in training datasets and target proteins in test datasets, the prediction model showed a high performance comparable to the best performance of other methods for single virus-host PPIs.

Conclusions

Our method will be particularly useful to find PPIs between host and new viruses for which little information is available. The program and support data are available at http://bclab.inha.ac.kr/VirusHostPPI .",2018-08-13 +29138245,Clinical Impact of Diabetes Mellitus on Outcomes After Transcatheter Aortic Valve Replacement: Insights From the Society of Thoracic Surgeons/American College of Cardiology Transcatheter Valve Therapy Registry. ,"Diabetes mellitus (DM) adversely affects morbidity and mortality for cardiovascular diseases and procedures. Data evaluating the outcomes of transcatheter aortic valve replacement (TAVR) in diabetic patients are limited by small sample size and contradictory results. We aimed to establish the magnitude of risk and the incremental influence of insulin dependency by examining short- and long-term adverse outcomes according to DM status and therapy in the world's largest TAVR registry. We analyzed data from the Society of Thoracic Surgeons/American College of Cardiology Transcatheter Valve Therapy Registry. In-hospital mortality, 30-day mortality, and 1-year mortality after TAVR in patients with and without DM were evaluated using multivariate modeling. Among 47 643 patients treated with TAVR from November 2011 through September 2015 at 394 US hospitals, there were 17 849 (37.5%) patients with DM. Overall, 6600 of the diabetic patients were insulin treated (IT). Thirty-day mortality was 5.0% in patients with DM (6.1% in IT DM and 4.4% in non-IT DM; P<0.001) versus 5.9% in patients without DM (P<0.001). Overall, 1-year mortality was 21.8% in patients with DM (24.8% in IT DM and 20.1% in non-IT DM; P<0.001) versus 21.2% in patients without DM (P=0.274). In a multivariable model, DM was associated with increased 1-year mortality (hazard ratio, 1.30; 95% confidence interval, 1.13-1.49; P<0.001). Subgroup multivariable analysis showed stronger mortality association in IT diabetics (hazard ratio, 1.57; 95% confidence interval, 1.28-1.91; P<0.001) than in non-IT diabetics (hazard ratio, 1.17; 95% confidence interval, 1.00-1.38; P=0.052). Our data establish the magnitude of short- and long-term risk conferred by DM and the incremental risk conferred by insulin dependency in the performance of TAVR. URL: https://www.clinicaltrials.gov. Unique identifier: NCT01737528.",2017-11-01 +28187410,Jllumina - A comprehensive Java-based API for statistical Illumina Infinium HumanMethylation450 and MethylationEPIC data processing.,"Measuring differential methylation of the DNA is the nowadays most common approach to linking epigenetic modifications to diseases (called epigenome-wide association studies, EWAS). For its low cost, its efficiency and easy handling, the Illumina HumanMethylation450 BeadChip and its successor, the Infinium MethylationEPIC BeadChip, is the by far most popular techniques for conduction EWAS in large patient cohorts. Despite the popularity of this chip technology, raw data processing and statistical analysis of the array data remains far from trivial and still lacks dedicated software libraries enabling high quality and statistically sound downstream analyses. As of yet, only R-based solutions are freely available for low-level processing of the Illumina chip data. However, the lack of alternative libraries poses a hurdle for the development of new bioinformatic tools, in particular when it comes to web services or applications where run time and memory consumption matter, or EWAS data analysis is an integrative part of a bigger framework or data analysis pipeline. We have therefore developed and implemented Jllumina, an open-source Java library for raw data manipulation of Illumina Infinium HumanMethylation450 and Infinium MethylationEPIC BeadChip data, supporting the developer with Java functions covering reading and preprocessing the raw data, down to statistical assessment, permutation tests, and identification of differentially methylated loci. Jllumina is fully parallelizable and publicly available at http://dimmer.compbio.sdu.dk/download.html.",2016-12-18 +28926565,A SNP panel and online tool for checking genotype concordance through comparing QR codes.,"In the current precision medicine era, more and more samples get genotyped and sequenced. Both researchers and commercial companies expend significant time and resources to reduce the error rate. However, it has been reported that there is a sample mix-up rate of between 0.1% and 1%, not to mention the possibly higher mix-up rate during the down-stream genetic reporting processes. Even on the low end of this estimate, this translates to a significant number of mislabeled samples, especially over the projected one billion people that will be sequenced within the next decade. Here, we first describe a method to identify a small set of Single nucleotide polymorphisms (SNPs) that can uniquely identify a personal genome, which utilizes allele frequencies of five major continental populations reported in the 1000 genomes project and the ExAC Consortium. To make this panel more informative, we added four SNPs that are commonly used to predict ABO blood type, and another two SNPs that are capable of predicting sex. We then implement a web interface (http://qrcme.tech), nicknamed QRC (for QR code based Concordance check), which is capable of extracting the relevant ID SNPs from a raw genetic data, coding its genotype as a quick response (QR) code, and comparing QR codes to report the concordance of underlying genetic datasets. The resulting 80 fingerprinting SNPs represent a significant decrease in complexity and the number of markers used for genetic data labelling and tracking. Our method and web tool is easily accessible to both researchers and the general public who consider the accuracy of complex genetic data as a prerequisite towards precision medicine.",2017-09-19 +29783871,EClerize: A customized force-directed graph drawing algorithm for biological graphs with EC attributes.,"Visualizing large-scale data produced by the high throughput experiments as a biological graph leads to better understanding and analysis. This study describes a customized force-directed layout algorithm, EClerize, for biological graphs that represent pathways in which the nodes are associated with Enzyme Commission (EC) attributes. The nodes with the same EC class numbers are treated as members of the same cluster. Positions of nodes are then determined based on both the biological similarity and the connection structure. EClerize minimizes the intra-cluster distance, that is the distance between the nodes of the same EC cluster and maximizes the inter-cluster distance, that is the distance between two distinct EC clusters. EClerize is tested on a number of biological pathways and the improvement brought in is presented with respect to the original algorithm. EClerize is available as a plug-in to Cytoscape ( http://apps.cytoscape.org/apps/eclerize ).",2018-03-26 +29436419,Fungemia Surveillance in Denmark Demonstrates Emergence of Non-albicans Candida Species and Higher Antifungal Usage and Resistance Rates than in Other Nations. ,"Recent changes in the occurrence of fungal species and the difficulties in performing reference antifungal susceptibility testing highlight the importance of surveillance of fungal organisms and antifungal resistance rates. K. M. T. Astvad et al. report results from recent (2012 to 2015) fungemia surveillance in Denmark and compare the results to previous data (2004 to 2011), showing a decrease in Candida albicans infections accompanied by an increase in C. glabrata and C. dubliniensis infections (J Clin Microbiol 56:e01564-17, 2018, https://doi.org/10.1128/JCM.01564-17). Azole resistance among C. tropicalis and C. parapsilosis isolates and echinocandin resistance in C. krusei isolates were higher in Denmark than in other regions. Interestingly, the usage of antifungals is higher in Denmark than in other Nordic countries.",2018-03-26 +28537071,CLMSVault: A Software Suite for Protein Cross-Linking Mass-Spectrometry Data Analysis and Visualization.,"Protein cross-linking mass spectrometry (CL-MS) enables the sensitive detection of protein interactions and the inference of protein complex topology. The detection of chemical cross-links between protein residues can identify intra- and interprotein contact sites or provide physical constraints for molecular modeling of protein structure. Recent innovations in cross-linker design, sample preparation, mass spectrometry, and software tools have significantly improved CL-MS approaches. Although a number of algorithms now exist for the identification of cross-linked peptides from mass spectral data, a dearth of user-friendly analysis tools represent a practical bottleneck to the broad adoption of the approach. To facilitate the analysis of CL-MS data, we developed CLMSVault, a software suite designed to leverage existing CL-MS algorithms and provide intuitive and flexible tools for cross-platform data interpretation. CLMSVault stores and combines complementary information obtained from different cross-linkers and search algorithms. CLMSVault provides filtering, comparison, and visualization tools to support CL-MS analyses and includes a workflow for label-free quantification of cross-linked peptides. An embedded 3D viewer enables the visualization of quantitative data and the mapping of cross-linked sites onto PDB structural models. We demonstrate the application of CLMSVault for the analysis of a noncovalent Cdc34-ubiquitin protein complex cross-linked under different conditions. CLMSVault is open-source software (available at https://gitlab.com/courcelm/clmsvault.git ), and a live demo is available at http://democlmsvault.tyerslab.com/ .",2017-06-05 +30458928,Preclinical Toxicity Studies for Regenerative Medicine in Japan.,"

Purpose

Advances in methods designed to evaluate preclinical toxicity have not kept up with progress in regenerative medicine. Preclinical toxicity studies of regenerative therapies must be designed logically and should be flexible to accurately reflect toxicity of products under development. The purpose of this review is to discuss requirements of preclinical toxicity studies of this type developed in Japan.

Methods

We conducted MEDLINE and PubMed literature searches to identify recent reports relevant to regenerative medicine. Information regarding approved drugs and public announcements, including existing guidelines and guidance in Japan, was collected from the website of Japan's Ministry of Health, Labor and Welfare (https://www.mhlw.go.jp/index.html) and the Pharmaceuticals and Medical Devices Agency (https://www.pmda.go.jp/).

Findings

Four cell therapy products have been developed and approved in Japan so far. The principal preclinical toxicity data submitted to regulatory authorities in the Pharmaceuticals and Medical Devices Agency in Japan are summarized here. The potential for tumor formation, a major concern in such clinical applications, is assessed in 3 ways: tumor-forming capacity of the original cell, quantitation of residual pluripotent stem cells in the product, and the possibility that a tumor will form at the product's engraftment site. Although gene therapy and oncolytic virus products are under development, these types of products are not yet approved in Japan. Guidelines relevant to the development of these products are now being created based on existing guidelines and considerations established by the International Council for Harmonization of Technical Requirements for Pharmaceuticals for Human Use.

Implications

Because of cell tropism and heterologous immunity, animal species or strains useful for preclinical studies of regenerative therapies are often restricted. Nonetheless, preclinical toxicity studies must be designed to predict results relevant to humans.",2018-10-24 +,Eastern Europe's forest cover dynamics from 1985 to 2012 quantified from the full Landsat archive,"In the former “Eastern Bloc” countries, there have been dramatic changes in forest disturbance and forest recovery rates since the collapse of the Soviet Union, due to the transition to open-market economies, and the recent economic crisis. Unfortunately though, Eastern European countries collected their forest statistics inconsistently, and their boundaries have changed, making it difficult to analyze forest dynamics over time. Our goal here was to consistently quantify forest cover change across Eastern Europe since the 1980s based on the Landsat image archive. We developed an algorithm to simultaneously process data from different Landsat platforms and sensors (TM and ETM+) to map annual forest cover loss and decadal forest cover gain. We processed 59,539 Landsat images for 527 footprints across Eastern Europe and European Russia. Our results were highly accurate, with gross forest loss producer's and user's accuracy of >88% and >89%, respectively, and gross forest gain producer's and user's accuracy of >75% and >91%, based on a sample of probability-based validation points. We found substantial changes in the forest cover of Eastern Europe. Net forest cover increased from 1985 to 2012 by 4.7% across the region, but decreased in Estonia and Latvia. Average annual gross forest cover loss was 0.41% of total forest cover area, with a statistically significant increase from 1985 to 2012. Timber harvesting was the main cause of forest loss, accompanied by some insect defoliation and forest conversion, while only 7.4% of the total forest cover loss was due to large-scale wildfires and windstorms. Overall, the countries of Eastern Europe experienced constant levels or declines in forest loss after the collapse of socialism in the late 1980s, but a pronounced increase in loss in the early 2000s. By the late 2000s, however, the global economic crisis coincided with reduced timber harvesting in most countries, except Poland, Czech Republic, Slovakia, and the Baltic states. Most forest disturbance did not result in a permanent forest loss during our study period. Indeed, forest generally recovered fast and only 12% of the areas of forest loss prior to 1995 had not yet recovered by 2012. Our results allow national and sub-national level analysis and are available on-line (http://glad.geog.umd.edu/europe/) to serve as a baseline for further analyses of forest dynamics and its drivers.",2015-03-01 +28780100,Technical note: PaGELL v.1.5: A flexible parametric program for the Bayesian analysis of longevity data within the context of animal breeding.,"This technical note presents the program PaGELL v.1.5 (Parametric Genetic Evaluation of Lifespan in Livestock), a flexible software program to analyze (right-censored) longevity data in livestock populations, with a special emphasis on the genetic evaluation of the breeding stock. This software relies on a parametric generalization of the proportional hazard model; more specifically, the baseline hazard function follows a Weibull process and flexibility is gained by including an additional time-dependent effect with the number of change points defined by the user. The program can accommodate 3 different sources of variation (i.e., systematic, permanent environmental, and additive genetic effects) and both fixed and time-dependent patterns (only for systematic and permanent environmental effects). Analyses are performed within a Bayesian context by sampling from the joint posterior distribution of the model, and model fit can be easily determined by the calculation of the deviance information criterion. Although this software has already been used on field data sets, its performance has been double-checked on simulated data set, and results are presented in this technical note. PaGELL v.1.5 was written in Fortran 95 language and, after compiling with the GNU Fortran Compiler v.4.7 and later, it has been tested in Windows, Linux, and MacOS operating systems (both 32- and 64-bit platforms). This program is available at http://www.casellas.info/files/pageII.zip.",2017-08-02 +28710010,Local network component analysis for quantifying transcription factor activities.,"Transcription factors (TFs) could regulate physiological transitions or determine stable phenotypic diversity. The accurate estimation on TF regulatory signals or functional activities is of great significance to guide biological experiments or elucidate molecular mechanisms, but still remains challenging. Traditional methods identify TF regulatory signals at the population level, which masks heterogeneous regulation mechanisms in individuals or subgroups, thus resulting in inaccurate analyses. Here, we propose a novel computational framework, namely local network component analysis (LNCA), to exploit data heterogeneity and automatically quantify accurate transcription factor activity (TFA) in practical terms, through integrating the partitioned expression sets (i.e., local information) and prior TF-gene regulatory knowledge. Specifically, LNCA adopts an adaptive optimization strategy, which evaluates the local similarities of regulation controls and corrects biases during data integration, to construct the TFA landscape. In particular, we first numerically demonstrate the effectiveness of LNCA for the simulated data sets, compared with traditional methods, such as FastNCA, ROBNCA and NINCA. Then, we apply our model to two real data sets with implicit temporal or spatial regulation variations. The results show that LNCA not only recognizes the periodic mode along the S. cerevisiae cell cycle process, but also substantially outperforms over other methods in terms of accuracy and consistency. In addition, the cross-validation study for glioblastomas multiforme (GBM) indicates that the TFAs, identified by LNCA, can better distinguish clinically distinct tumor groups than the expression values of the corresponding TFs, thus opening a new way to classify tumor subtypes and also providing a novel insight into cancer heterogeneity.

Availability

LNCA was implemented as a Matlab package, which is available at http://sysbio.sibcb.ac.cn/cb/chenlab/software.htm/LNCApackage_0.1.rar.",2017-07-12 +29718115,Efficient flexible backbone protein-protein docking for challenging targets.,"Motivation:Binding-induced conformational changes challenge current computational docking algorithms by exponentially increasing the conformational space to be explored. To restrict this search to relevant space, some computational docking algorithms exploit the inherent flexibility of the protein monomers to simulate conformational selection from pre-generated ensembles. As the ensemble size expands with increased flexibility, these methods struggle with efficiency and high false positive rates. Results:Here, we develop and benchmark RosettaDock 4.0, which efficiently samples large conformational ensembles of flexible proteins and docks them using a novel, six-dimensional, coarse-grained score function. A strong discriminative ability allows an eight-fold higher enrichment of near-native candidate structures in the coarse-grained phase compared to RosettaDock 3.2. It adaptively samples 100 conformations each of the ligand and the receptor backbone while increasing computational time by only 20-80%. In local docking of a benchmark set of 88 proteins of varying degrees of flexibility, the expected success rate (defined as cases with ≥50% chance of achieving 3 near-native structures in the 5 top-ranked ones) for blind predictions after resampling is 77% for rigid complexes, 49% for moderately flexible complexes and 31% for highly flexible complexes. These success rates on flexible complexes are a substantial step forward from all existing methods. Additionally, for highly flexible proteins, we demonstrate that when a suitable conformer generation method exists, the method successfully docks the complex. Availability and implementation:As a part of the Rosetta software suite, RosettaDock 4.0 is available at https://www.rosettacommons.org to all non-commercial users for free and to commercial users for a fee. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-10-01 +27170328,LimiTT: link miRNAs to targets.,"

Background

MicroRNAs (miRNAs) impact various biological processes within animals and plants. They complementarily bind target mRNAs, effecting a post-transcriptional negative regulation on mRNA level. The investigation of miRNA target interactions (MTIs) by high throughput screenings is challenging, as frequently used in silico target prediction tools are prone to emit false positives. This issue is aggravated for niche model organisms, where validated miRNAs and MTIs both have to be transferred from well described model organisms. Even though DBs exist that contain experimentally validated MTIs, they are limited in their search options and they utilize different miRNA and target identifiers.

Results

The implemented pipeline LimiTT integrates four existing DBs containing experimentally validated MTIs. In contrast to other cumulative databases (DBs), LimiTT includes MTI data of 26 species. Additionally, the pipeline enables the identification and enrichment analysis of MTIs with and without species specificity based on dynamic quality criteria. Multiple tabular and graphical outputs are generated to permit the detailed assessment of results.

Conclusion

Our freely available web-based pipeline LimiTT ( https://bioinformatics.mpi-bn.mpg.de/ ) is optimized to determine MTIs with and without species specification. It links miRNAs and/or putative targets with high granularity. The integrated mapping to homologous target identifiers enables the identification of MTIs not only for standard models, but for niche model organisms as well.",2016-05-11 +30204840,In vitro versus in vivo compositional landscapes of histone sequence preferences in eucaryotic genomes.,"

Motivation

Although the nucleosome occupancy along a genome can be in part predicted by in vitro experiments, it has been recently observed that the chromatin organization presents important differences in vitro with respect to in vivo. Such differences mainly regard the hierarchical and regular structures of the nucleosome fiber, whose existence has long been assumed, and in part also observed in vitro, but that does not apparently occur in vivo. It is also well known that the DNA sequence has a role in determining the nucleosome occupancy. Therefore, an important issue is to understand if, and to what extent, the structural differences in the chromatin organization between in vitro and in vivo have a counterpart in terms of the underlying genomic sequences.

Results

We present the first quantitative comparison between the in vitro and in vivo nucleosome maps of two model organisms (S. cerevisiae and C. elegans). The comparison is based on the construction of weighted k-mer dictionaries. Our findings show that there is a good level of sequence conservation between in vitro and in vivo in both the two organisms, in contrast to the abovementioned important differences in chromatin structural organization. Moreover, our results provide evidence that the two organisms predispose themselves differently, in terms of sequence composition and both in vitro and in vivo, for the nucleosome occupancy. This leads to the conclusion that, although the notion of a genome encoding for its own nucleosome occupancy is general, the intrinsic histone k-mer sequence preferences tend to be species-specific.

Availability and implementation

The files containing the dictionaries and the main results of the analysis are available at http://math.unipa.it/rombo/material.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-10-01 +26334643,Fluoridated milk for preventing dental caries.,"

Background

Dental caries remains a major public health problem in most industrialised countries, affecting 60% to 90% of schoolchildren and the vast majority of adults. Milk may provide a relatively cost-effective vehicle for fluoride delivery in the prevention of dental caries. This is an update of a Cochrane review first published in 2005.

Objectives

To assess the effects of milk fluoridation for preventing dental caries at a community level.

Search methods

We searched the Cochrane Oral Health Group Trials Register (inception to November 2014), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library, 2014, Issue 10), MEDLINE via OVID (1946 to November 2014) and EMBASE via OVID (1980 to November 2014). We also searched the U.S. National Institutes of Health Trials Register (https://clinicaltrials.gov) and the WHO International Clinical Trials Registry Platform (http://apps.who.int/trialsearch) for ongoing trials. We did not place any restrictions on the language or date of publication when searching the electronic databases.

Selection criteria

Randomised controlled trials (RCTs), with an intervention and follow-up period of at least two years, comparing fluoridated milk with non-fluoridated milk.

Data collection and analysis

Two authors independently assessed trial risk of bias and extracted data. We used standard methodological procedures expected by The Cochrane Collaboration.

Main results

We included one unpublished RCT, randomising 180 children aged three years at study commencement. The setting was nursery schools in an area with high prevalence of dental caries and a low level of fluoride in drinking water. Data from 166 participants were available for analysis. The study carried a high risk of bias. After three years, there was a reduction of caries in permanent teeth (mean difference (MD) -0.13, 95% confidence interval (CI) -0.24 to -0.02) and in primary teeth (MD -1.14, 95% CI -1.86 to -0.42), as measured by the decayed, missing and filled teeth index (DMFT for permanent teeth and dmft for primary teeth). For primary teeth, this is a substantial reduction, equivalent to a prevented fraction of 31%. For permanent teeth, the disease level was very low in the study, resulting in a small absolute effect size. The included study did not report any other outcomes of interest for this review (adverse events, dental pain, antibiotic use or requirement for general anaesthesia due to dental procedures).

Authors' conclusions

There is low quality evidence to suggest fluoridated milk may be beneficial to schoolchildren, contributing to a substantial reduction in dental caries in primary teeth. Due to the low quality of the evidence, further research is likely to have an important impact on our confidence in the estimate of effect and is likely to change the estimate. There was only one relatively small study, which had important methodological limitations on the data for the effectiveness in reducing caries. Furthermore, there was no information about the potential harms of the intervention. Additional RCTs of high quality are needed before we can draw definitive conclusions about the benefits of milk fluoridation.",2015-09-03 +26564971,Analysis of core-periphery organization in protein contact networks reveals groups of structurally and functionally critical residues.,"The representation of proteins as networks of interacting amino acids, referred to as protein contact networks (PCN), and their subsequent analyses using graph theoretic tools, can provide novel insights into the key functional roles of specific groups of residues. We have characterized the networks corresponding to the native states of 66 proteins (belonging to different families) in terms of their core-periphery organization. The resulting hierarchical classification of the amino acid constituents of a protein arranges the residues into successive layers - having higher core order - with increasing connection density, ranging from a sparsely linked periphery to a densely intra-connected core (distinct from the earlier concept of protein core defined in terms of the three-dimensional geometry of the native state, which has least solvent accessibility). Our results show that residues in the inner cores are more conserved than those at the periphery. Underlining the functional importance of the network core, we see that the receptor sites for known ligand molecules of most proteins occur in the innermost core. Furthermore, the association of residues with structural pockets and cavities in binding or active sites increases with the core order. From mutation sensitivity analysis, we show that the probability of deleterious or intolerant mutations also increases with the core order. We also show that stabilization centre residues are in the innermost cores, suggesting that the network core is critically important in maintaining the structural stability of the protein. A publicly available Web resource for performing core-periphery analysis of any protein whose native state is known has been made available by us at http://www.imsc.res.in/ ~sitabhra/proteinKcore/index.html.",2015-10-01 +30092607,N-glycosylation of the AMPA-type glutamate receptor regulates cell surface expression and tetramer formation affecting channel function.,"The AMPA-type glutamate receptor (AMPA-R) plays a primary role in principal excitatory synaptic transmission and many neuronal functions including synaptic plasticity that underlie learning and memory. N-glycosylation is one of the major post-translational modifications of membrane proteins, but its specific roles in neurons remain largely unknown. AMPA-R subunits are N-glycosylated at their extracellular domains during their biosynthesis in the lumen of the endoplasmic reticulum and Golgi system. Six N-glycosylation sites are presumed to exist in the extracellular domain of GluA1, which is a member of the AMPA-R subunits. We observed that the intracellular trafficking and cell surface expression were strongly suppressed in the GluA1 mutants lacking N-glycans at N63/N363 in HEK293T cells. Multimer analysis using Blue Native-PAGE displayed the impaired tetramer formation in the glycosylation mutants (N63S and N363S), indicating that the mis-transport was caused by impaired tetramer formation. N63S and N363S mutants were primarily degraded via the lysosomal pathway. Flag-tagged N363S GluA1, but not N63S GluA1, expressed in primary cortical neuron cultures prepared from GluA1 knockout mice was observed to localize at the cell surface. Co-expression of GluA2 partially rescued tetramer formation and the cell surface expression of N363S GluA1 but not N63S GluA1, in HEK293T cells. Electrophysiological analysis also demonstrated functional heteromers of N363S GluA1 with GluA2. These data suggest that site-specific N-glycans on GluA1 subunit regulates tetramer formation, intracellular trafficking, and cell surface expression of AMPA-R. OPEN SCIENCE BADGES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/.",2018-11-12 +28137712,Sparse network modeling and metscape-based visualization methods for the analysis of large-scale metabolomics data.,"

Motivation

Recent technological advances in mass spectrometry, development of richer mass spectral libraries and data processing tools have enabled large scale metabolic profiling. Biological interpretation of metabolomics studies heavily relies on knowledge-based tools that contain information about metabolic pathways. Incomplete coverage of different areas of metabolism and lack of information about non-canonical connections between metabolites limits the scope of applications of such tools. Furthermore, the presence of a large number of unknown features, which cannot be readily identified, but nonetheless can represent bona fide compounds, also considerably complicates biological interpretation of the data.

Results

Leveraging recent developments in the statistical analysis of high-dimensional data, we developed a new Debiased Sparse Partial Correlation algorithm (DSPC) for estimating partial correlation networks and implemented it as a Java-based CorrelationCalculator program. We also introduce a new version of our previously developed tool Metscape that enables building and visualization of correlation networks. We demonstrate the utility of these tools by constructing biologically relevant networks and in aiding identification of unknown compounds.

Availability and implementation

http://metscape.med.umich.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +25881165,SNPchiMp v.3: integrating and standardizing single nucleotide polymorphism data for livestock species.,"

Background

In recent years, the use of genomic information in livestock species for genetic improvement, association studies and many other fields has become routine. In order to accommodate different market requirements in terms of genotyping cost, manufacturers of single nucleotide polymorphism (SNP) arrays, private companies and international consortia have developed a large number of arrays with different content and different SNP density. The number of currently available SNP arrays differs among species: ranging from one for goats to more than ten for cattle, and the number of arrays available is increasing rapidly. However, there is limited or no effort to standardize and integrate array- specific (e.g. SNP IDs, allele coding) and species-specific (i.e. past and current assemblies) SNP information.

Results

Here we present SNPchiMp v.3, a solution to these issues for the six major livestock species (cow, pig, horse, sheep, goat and chicken). Original data was collected directly from SNP array producers and specific international genome consortia, and stored in a MySQL database. The database was then linked to an open-access web tool and to public databases. SNPchiMp v.3 ensures fast access to the database (retrieving within/across SNP array data) and the possibility of annotating SNP array data in a user-friendly fashion.

Conclusions

This platform allows easy integration and standardization, and it is aimed at both industry and research. It also enables users to easily link the information available from the array producer with data in public databases, without the need of additional bioinformatics tools or pipelines. In recognition of the open-access use of Ensembl resources, SNPchiMp v.3 was officially credited as an Ensembl E!mpowered tool. Availability at http://bioinformatics.tecnoparco.org/SNPchimp.",2015-04-10 +25533645,A three-plane architectonic atlas of the rat hippocampal region.,"The hippocampal region, comprising the hippocampal formation and the parahippocampal region, has been one of the most intensively studied parts of the brain for decades. Better understanding of its functional diversity and complexity has led to an increased demand for specificity in experimental procedures and manipulations. In view of the complex 3D structure of the hippocampal region, precisely positioned experimental approaches require a fine-grained architectural description that is available and readable to experimentalists lacking detailed anatomical experience. In this paper, we provide the first cyto- and chemoarchitectural description of the hippocampal formation and parahippocampal region in the rat at high resolution and in the three standard sectional planes: coronal, horizontal and sagittal. The atlas uses a series of adjacent sections stained for neurons and for a number of chemical marker substances, particularly parvalbumin and calbindin. All the borders defined in one plane have been cross-checked against their counterparts in the other two planes. The entire dataset will be made available as a web-based interactive application through the Rodent Brain WorkBench (http://www.rbwb.org) which, together with this paper, provides a unique atlas resource.",2015-01-20 +29077743,Proposing a validated clinical app predicting hospitalization cost for extracranial-intracranial bypass surgery.,"

Object

United States healthcare reforms are focused on curtailing rising expenditures. In neurosurgical domain, limited or no data exists identifying potential modifiable targets associated with high-hospitalization cost for cerebrovascular procedures such as extracranial-intracranial (ECIC) bypass. Our study objective was to develop a predictive model of initial cost for patients undergoing bypass surgery.

Methods

In an observational cohort study, we analyzed patients registered in the Nationwide Inpatient Sample (2002-2011) that underwent ECIC bypass. Split-sample 1:1 randomization of the study cohort was performed. Hospital cost data was modelled using ordinary least square to identity potential drivers impacting initial hospitalization cost. Subsequently, a validated clinical app for estimated hospitalization cost is proposed (https://www.neurosurgerycost.com/calc/ec-ic-by-pass).

Results

Overall, 1533 patients [mean age: 45.18 ± 19.51 years; 58% female] underwent ECIC bypass for moyamoya disease [45.1%], cerebro-occlusive disease (COD) [23% without infarction; 12% with infarction], unruptured [12%] and ruptured [4%] aneurysms. Median hospitalization cost was $37,525 (IQR: $16,225-$58,825). Common drivers impacting cost include Asian race, private payer, elective admission, hyponatremia, neurological and respiratory complications, acute renal failure, bypass for moyamoya disease, COD without infarction, medium and high volume centers, hospitals located in Midwest, Northeast, and West region, total number of diagnosis and procedures, days to bypass and post-procedural LOS. Our model was validated in an independent cohort and using 1000-bootstrapped replacement samples.

Conclusions

Identified drivers of hospital cost after ECIC bypass could potentially be used as an adjunct for creation of data driven policies, impact reimbursement criteria, aid in-hospital auditing, and in the cost containment debate.",2017-10-27 +27009807,"The Disease Portals, disease-gene annotation and the RGD disease ontology at the Rat Genome Database. ","The Rat Genome Database (RGD;http://rgd.mcw.edu/) provides critical datasets and software tools to a diverse community of rat and non-rat researchers worldwide. To meet the needs of the many users whose research is disease oriented, RGD has created a series of Disease Portals and has prioritized its curation efforts on the datasets important to understanding the mechanisms of various diseases. Gene-disease relationships for three species, rat, human and mouse, are annotated to capture biomarkers, genetic associations, molecular mechanisms and therapeutic targets. To generate gene-disease annotations more effectively and in greater detail, RGD initially adopted the MEDIC disease vocabulary from the Comparative Toxicogenomics Database and adapted it for use by expanding this framework with the addition of over 1000 terms to create the RGD Disease Ontology (RDO). The RDO provides the foundation for, at present, 10 comprehensive disease area-related dataset and analysis platforms at RGD, the Disease Portals. Two major disease areas are the focus of data acquisition and curation efforts each year, leading to the release of the related Disease Portals. Collaborative efforts to realize a more robust disease ontology are underway. Database URL:http://rgd.mcw.edu.",2016-03-23 +29848382,"Comparative transcriptomics of choroid plexus in Alzheimer's disease, frontotemporal dementia and Huntington's disease: implications for CSF homeostasis.","BACKGROUND:In Alzheimer's disease, there are striking changes in CSF composition that relate to altered choroid plexus (CP) function. Studying CP tissue gene expression at the blood-cerebrospinal fluid barrier could provide further insight into the epithelial and stromal responses to neurodegenerative disease states. METHODS:Transcriptome-wide Affymetrix microarrays were used to determine disease-related changes in gene expression in human CP. RNA from post-mortem samples of the entire lateral ventricular choroid plexus was extracted from 6 healthy controls (Ctrl), 7 patients with advanced (Braak and Braak stage III-VI) Alzheimer's disease (AD), 4 with frontotemporal dementia (FTD) and 3 with Huntington's disease (HuD). Statistics and agglomerative clustering were accomplished with MathWorks, MatLab; and gene set annotations by comparing input sets to GeneGo ( http://www.genego.com ) and Ingenuity ( http://www.ingenuity.com ) pathway sets. Bonferroni-corrected hypergeometric p-values of < 0.1 were considered a significant overlap between sets. RESULTS:Pronounced differences in gene expression occurred in CP of advanced AD patients vs. Ctrls. Metabolic and immune-related pathways including acute phase response, cytokine, cell adhesion, interferons, and JAK-STAT as well as mTOR were significantly enriched among the genes upregulated. Methionine degradation, claudin-5 and protein translation genes were downregulated. Many gene expression changes in AD patients were observed in FTD and HuD (e.g., claudin-5, tight junction downregulation), but there were significant differences between the disease groups. In AD and HuD (but not FTD), several neuroimmune-modulating interferons were significantly enriched (e.g., in AD: IFI-TM1, IFN-AR1, IFN-AR2, and IFN-GR2). AD-associated expression changes, but not those in HuD and FTD, were enriched for upregulation of VEGF signaling and immune response proteins, e.g., interleukins. HuD and FTD patients distinctively displayed upregulated cadherin-mediated adhesion. CONCLUSIONS:Our transcript data for human CP tissue provides genomic and mechanistic insight for differential expression in AD vs. FTD vs. HuD for stromal as well as epithelial components. These choroidal transcriptome characterizations elucidate immune activation, tissue functional resiliency, and CSF metabolic homeostasis. The BCSFB undergoes harmful, but also important functional and adaptive changes in neurodegenerative diseases; accordingly, the enriched JAK-STAT and mTOR pathways, respectively, likely help the CP in adaptive transcription and epithelial repair and/or replacement when harmed by neurodegeneration pathophysiology. We anticipate that these precise CP translational data will facilitate pharmacologic/transgenic therapies to alleviate dementia.",2018-05-31 +,Host‐associated genetic divergence and taxonomy in the Rhinusa pilosa Gyllenhal species complex: an integrative approach,"A combined taxonomic, morphological, molecular and biological study revealed that stem‐galling weevils from the genus Rhinusa associated with toadflaxes from the genus Linaria (Plantaginaceae) are composed of three different species: Rhinusa pilosa, Rhinusa brondelii and Rhinusa rara sp.n. The authentic field host plants are respectively, Linaria vulgaris, Linaria purpurea and Linaria genistifolia/ Linaria dalmatica. These weevil species can be distinguished from each other by a few subtle morphological characteristics, mainly in the shape of the rostrum and of the integument. An analysis of the mitochondrial [cytochrome oxidase subunit II gene (COII) and 16S ribosomal RNA gene (16S)] and nuclear (elongation factor‐1α, EF‐1α) sequence data revealed high genetic divergence among these species. Uncorrected pairwise distances on mtCOII gene were 14.3% between R. pilosa and R. brondelii, 15.7% between R. pilosa and R. rara, while R. brondelii and R. rara were approximately 11% divergent from each other. Divergences obtained on 16S and nuclear EF‐1α genes were congruent. However, substantial intraspecific mitochondrial divergence was recorded for all studied populations of R. pilosa s.s. showing two mtDNA lineages, with estimated COII and 16S divergences of 4% and 1.6%, respectively. Nuclear pseudogenes (Numts) and Wolbachia influence, although recorded within both lineages, were excluded as possible causatives of the mtDNA divergence, while EF‐1α indicated absence of lineage sorting. Species from the R. pilosa complex are estimated to have diverged from each other approximately 7.2 million years ago (mya; late Miocene), while R. brondelii and R. rara diverged from each other about 4.7 mya (early Pliocene). This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:EEDD6248‐01DB‐4B4A‐B79D‐C5606393E3AA.",2015-01-01 +29092002,"pStab: prediction of stable mutants, unfolding curves, stability maps and protein electrostatic frustration.","Summary:We present a web-server for rapid prediction of changes in protein stabilities over a range of temperatures and experimental conditions upon single- or multiple-point substitutions of charged residues. Potential mutants are identified by a charge-shuffling procedure while the stability changes (i.e. an unfolding curve) are predicted employing an ensemble-based statistical-mechanical model. We expect this server to be a simple yet detailed tool for engineering stabilities, identifying electrostatically frustrated residues, generating local stability maps and in constructing fitness landscapes. Availability and implementation:The web-server is freely available at http://pbl.biotech.iitm.ac.in/pStab and supports recent versions of all major browsers. Contact:athi@iitm.ac.in. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-03-01 +27777222,"Comprehensive assessment and performance improvement of effector protein predictors for bacterial secretion systems III, IV and VI.","Bacterial effector proteins secreted by various protein secretion systems play crucial roles in host-pathogen interactions. In this context, computational tools capable of accurately predicting effector proteins of the various types of bacterial secretion systems are highly desirable. Existing computational approaches use different machine learning (ML) techniques and heterogeneous features derived from protein sequences and/or structural information. These predictors differ not only in terms of the used ML methods but also with respect to the used curated data sets, the features selection and their prediction performance. Here, we provide a comprehensive survey and benchmarking of currently available tools for the prediction of effector proteins of bacterial types III, IV and VI secretion systems (T3SS, T4SS and T6SS, respectively). We review core algorithms, feature selection techniques, tool availability and applicability and evaluate the prediction performance based on carefully curated independent test data sets. In an effort to improve predictive performance, we constructed three ensemble models based on ML algorithms by integrating the output of all individual predictors reviewed. Our benchmarks demonstrate that these ensemble models outperform all the reviewed tools for the prediction of effector proteins of T3SS and T4SS. The webserver of the proposed ensemble methods for T3SS and T4SS effector protein prediction is freely available at http://tbooster.erc.monash.edu/index.jsp. We anticipate that this survey will serve as a useful guide for interested users and that the new ensemble predictors will stimulate research into host-pathogen relationships and inspiration for the development of new bioinformatics tools for predicting effector proteins of T3SS, T4SS and T6SS.",2018-01-01 +21367872,Human variation database: an open-source database template for genomic discovery.,"

Motivation

Current public variation databases are based upon collaboratively pooling data into a single database with a single interface available to the public. This gives little control to the collaborator to mine the database and requires that they freely share their data with the owners of the repository. We aim to provide an alternative mechanism: providing the source code and application programming interface (API) of a database, enabling researchers to set up local versions without investing heavily in the development of the resource and allowing for confidential information to remain secure.

Results

We describe an open-source database that can be installed easily at any research facility for the storage and analysis of thousands of next-generation sequencing variations. This database is built using PostgreSQL 8.4 (The PostgreSQL Global Development Group. postgres 8.4: http://www.postgresql.org) and provides a novel method for collating and searching across the reported results from thousands of next-generation sequence samples, as well as rapidly accessing vital information on the origin of the samples. The schema of the database makes rapid and insightful queries simple and enables easy annotation of novel or known genetic variations. A modular and cross-platform Java API is provided to perform common functions, such as generation of standard experimental reports and graphical summaries of modifications to genes. Included libraries allow adopters of the database to quickly develop their own queries.

Availability

The software is available for download through the Vancouver Short Read Analysis Package on Sourceforge, http://vancouvershortr.sourceforge.net. Instructions for use and deployment are provided on the accompanying wiki pages.

Contact

afejes@bcgsc.ca.",2011-03-02 +29181379,Molecular Autopsy for Sudden Death in the Young: Is Data Aggregation the Key?,"The Scripps molecular autopsy study seeks to incorporate genetic testing into the postmortem examination of cases of sudden death in the young (<45 years old). Here, we describe the results from the first 2 years of the study, which consisted of whole exome sequencing (WES) of a cohort of 50 cases predominantly from San Diego County. Apart from the individual description of cases, we analyzed the data at the cohort-level, which brought new perspectives on the genetic causes of sudden death. We investigated the advantages and disadvantages of using WES compared to a gene panel for cardiac disease (usually the first genetic test used by medical examiners). In an attempt to connect complex clinical phenotypes with genotypes, we classified samples by their genetic fingerprint. Finally, we studied the benefits of analyzing the mitochondrial DNA genome. In this regard, we found that half of the cases clinically diagnosed as sudden infant death syndrome had an increased ratio of heteroplasmic variants, and that the variants were also present in the mothers. We believe that community-based data aggregation and sharing will eventually lead to an improved classification of variants. Allele frequencies for the all cases can be accessed via our genomics browser at https://genomics.scripps.edu/browser.",2017-11-09 +29293938,"DyNetViewer: a Cytoscape app for dynamic network construction, analysis and visualization.","Summary:The molecular interactions in a cell are varying with time and surrounded environmental cues. The construction and analysis of dynamic molecular networks can elucidate dynamic cellular mechanisms of different biological functions and provide a chance to understand complex diseases at the systems level. Here, we develop DyNetViewer, a Cytoscape application that provides a range of functionalities for the construction, analysis and visualization of dynamic protein-protein interaction networks. The current version of DyNetViewer consists of four different dynamic network construction methods, twelve topological variation analysis methods and four clustering algorithms. Moreover, visualization of different topological variation of nodes and clusters over time enables users to quickly identify the most variations across many network states. Availability and implementation:DyNetViewer is freely available with tutorials at the Cytoscape (3.4+) App Store (http://apps.cytoscape.org/apps/dynetviewer). Contact:limin@mail.csu.edu.cn. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +29767825,[Palliative sedation : Development and consensus of a German language documentation template].,"BACKGROUND:Palliative sedation (PS) serves as a therapeutic option in cases of otherwise intractable suffering. As the use of sedative and hypnotic medication in many diverse situations is a core competency of anesthesiology, anesthesiologists are confronted with questions of sedative therapy at the end of life in institutions for specialized palliative care, in intensive care units and intermediate care wards. In recent years a number of guidelines have been published internationally but so far no official guidelines exist in Germany. The most recognized document is the European Association for Palliative Care (EAPC) framework on PS. This project aims to develop a German language template for the preparation, application, documentation and evaluation of PS according to the current frameworks, especially the EAPC framework on PS. METHODS:A first draft of the template was generated by the project team using the EAPC framework and individual templates of various institutions, which had been collected during a previous project. Professionals (n = 136) from inpatient and outpatient specialist palliative and hospice care were invited to assess all items of the draft regarding ""relevance"", ""wording"" and ""feasibility"" in an online Delphi survey (Unipark®, Questback, Cologne, Germany). After the second Delphi round an expert panel was asked to reflect the results and generate a final draft. Approval was granted if acceptance exceeded 75% of participants. RESULTS:The 3 rounds of the Delphi process were completed by 64, 46 and 41 participants, respectively. The Delphi process as well as the expert panel led to significant changes of the template. The indications for PS had to be clarified. The significance of documentation of vital parameters, such as oxygen saturation, blood pressure or respiratory rate during PS was intensively discussed. In many teams, predominantly hospice or outpatient palliative care teams, it seems to be difficult to measure these parameters or it is regarded as inappropriate in a palliative care setting. In contrast, the EAPC framework recommends monitoring of vital parameters in cases of intermittent or respite sedation. Finally, a solution was found to support documentation of additional data without the explicit mentioning of specific parameters. After the third Delphi round, all 16 items of the documentation template reached consensus with respect to relevance (82.9-100%), clarity of wording (80.5-100%), and feasibility in practice (78-100%). CONCLUSION:This article provides an empirically based, multiprofessional consented documentation template for PS. Core elements of the documentation of PS are the indications and the decision process towards PS. During the treatment, at least the level of sedation and the symptom burden have to be recorded. The documentation of vital signs during PS remains a highly disputed topic. The presented data suggest that especially in outpatient settings and in hospices measuring and documentation of vital parameters is uncommon and therefore is often regarded as not feasible. This template can help to support the medically and ethically sound use of PS and facilitate research. The template can be accessed at http://www.palliativmedizin.uk-erlangen.de/forschung/downloads/ .",2018-07-01 +28398460,TIN-X: target importance and novelty explorer.,"

Motivation

The increasing amount of peer-reviewed manuscripts requires the development of specific mining tools to facilitate the visual exploration of evidence linking diseases and proteins.

Results

We developed TIN-X, the Target Importance and Novelty eXplorer, to visualize the association between proteins and diseases, based on text mining data processed from scientific literature. In the current implementation, TIN-X supports exploration of data for G-protein coupled receptors, kinases, ion channels, and nuclear receptors. TIN-X supports browsing and navigating across proteins and diseases based on ontology classes, and displays a scatter plot with two proposed new bibliometric statistics: Importance and Novelty.

Availability and implementation

http://www.newdrugtargets.org.

Contact

cbologa@salud.unm.edu.",2017-08-01 +26719891,"Schistosoma mansoni Egg, Adult Male and Female Comparative Gene Expression Analysis and Identification of Novel Genes by RNA-Seq.","

Background

Schistosomiasis is one of the most prevalent parasitic diseases worldwide and is a public health problem. Schistosoma mansoni is the most widespread species responsible for schistosomiasis in the Americas, Middle East and Africa. Adult female worms (mated to males) release eggs in the hepatic portal vasculature and are the principal cause of morbidity. Comparative separate transcriptomes of female and male adult worms were previously assessed with using microarrays and Serial Analysis of Gene Expression (SAGE), thus limiting the possibility of finding novel genes. Moreover, the egg transcriptome was analyzed only once with limited bacterially cloned cDNA libraries.

Methodology/principal findings

To compare the gene expression of S. mansoni eggs, females, and males, we performed RNA-Seq on these three parasite forms using 454/Roche technology and reconstructed the transcriptome using Trinity de novo assembly. The resulting contigs were mapped to the genome and were cross-referenced with predicted Smp genes and H3K4me3 ChIP-Seq public data. For the first time, we obtained separate, unbiased gene expression profiles for S. mansoni eggs and female and male adult worms, identifying enriched biological processes and specific enriched functions for each of the three parasite forms. Transcripts with no match to predicted genes were analyzed for their protein-coding potential and the presence of an encoded conserved protein domain. A set of 232 novel protein-coding genes with putative functions related to reproduction, metabolism, and cell biogenesis was detected, which contributes to the understanding of parasite biology.

Conclusions/significance

Large-scale RNA-Seq analysis using de novo assembly associated with genome-wide information for histone marks in the vicinity of gene models constitutes a new approach to transcriptome analysis that has not yet been explored in schistosomes. Importantly, all data have been consolidated into a UCSC Genome Browser search- and download-tool (http://schistosoma.usp.br/). This database provides new ways to explore the schistosome genome and transcriptome and will facilitate molecular research on this important parasite.",2015-12-31 +25204646,'Isotopo' a database application for facile analysis and management of mass isotopomer data. ,"The composition of stable-isotope labelled isotopologues/isotopomers in metabolic products can be measured by mass spectrometry and supports the analysis of pathways and fluxes. As a prerequisite, the original mass spectra have to be processed, managed and stored to rapidly calculate, analyse and compare isotopomer enrichments to study, for instance, bacterial metabolism in infection. For such applications, we provide here the database application 'Isotopo'. This software package includes (i) a database to store and process isotopomer data, (ii) a parser to upload and translate different data formats for such data and (iii) an improved application to process and convert signal intensities from mass spectra of (13)C-labelled metabolites such as tertbutyldimethylsilyl-derivatives of amino acids. Relative mass intensities and isotopomer distributions are calculated applying a partial least square method with iterative refinement for high precision data. The data output includes formats such as graphs for overall enrichments in amino acids. The package is user-friendly for easy and robust data management of multiple experiments. The 'Isotopo' software is available at the following web link (section Download): http://spp1316.uni-wuerzburg.de/bioinformatics/isotopo/. The package contains three additional files: software executable setup (installer), one data set file (discussed in this article) and one excel file (which can be used to convert data from excel to '.iso' format). The 'Isotopo' software is compatible only with the Microsoft Windows operating system. http://spp1316.uni-wuerzburg.de/bioinformatics/isotopo/.",2014-09-09 +25841438,Generating a focused view of disease ontology cancer terms for pan-cancer data integration and analysis.,"Bio-ontologies provide terminologies for the scientific community to describe biomedical entities in a standardized manner. There are multiple initiatives that are developing biomedical terminologies for the purpose of providing better annotation, data integration and mining capabilities. Terminology resources devised for multiple purposes inherently diverge in content and structure. A major issue of biomedical data integration is the development of overlapping terms, ambiguous classifications and inconsistencies represented across databases and publications. The disease ontology (DO) was developed over the past decade to address data integration, standardization and annotation issues for human disease data. We have established a DO cancer project to be a focused view of cancer terms within the DO. The DO cancer project mapped 386 cancer terms from the Catalogue of Somatic Mutations in Cancer (COSMIC), The Cancer Genome Atlas (TCGA), International Cancer Genome Consortium, Therapeutically Applicable Research to Generate Effective Treatments, Integrative Oncogenomics and the Early Detection Research Network into a cohesive set of 187 DO terms represented by 63 top-level DO cancer terms. For example, the COSMIC term 'kidney, NS, carcinoma, clear_cell_renal_cell_carcinoma' and TCGA term 'Kidney renal clear cell carcinoma' were both grouped to the term 'Disease Ontology Identification (DOID):4467 / renal clear cell carcinoma' which was mapped to the TopNodes_DOcancerslim term 'DOID:263 / kidney cancer'. Mapping of diverse cancer terms to DO and the use of top level terms (DO slims) will enable pan-cancer analysis across datasets generated from any of the cancer term sources where pan-cancer means including or relating to all or multiple types of cancer. The terms can be browsed from the DO web site (http://www.disease-ontology.org) and downloaded from the DO's Apache Subversion or GitHub repositories. Database URL: http://www.disease-ontology.org",2015-04-04 +28937962,Portable Functional Neuroimaging as an Environmental Epidemiology Tool: A How-To Guide for the Use of fNIRS in Field Studies.,"

Summary

The widespread application of functional neuroimaging within the field of environmental epidemiology has the potential to greatly enhance our understanding of how environmental toxicants affect brain function. Because many epidemiological studies take place in remote and frequently changing environments, it is necessary that the primary neuroimaging approach adopted by the epidemiology community be robust to many environments, easy to use, and, preferably, mobile. Here, we outline our use of functional near-infrared spectroscopy (fNIRS) to collect functional brain imaging data from Costa Rican farm workers enrolled in an epidemiological study on the health effects of chronic pesticide exposure. While couched in this perspective, we focus on the methodological considerations that are necessary to conduct a mobile fNIRS study in a diverse range of environments. Thus, this guide is intended to be generalizable to all research scenarios and projects in which fNIRS may be used to collect functional brain imaging data in epidemiological field surveys. https://doi.org/10.1289/EHP2049.",2017-09-21 +30423073,iCFN: an efficient exact algorithm for multistate protein design.,"

Motivation

Multistate protein design addresses real-world challenges, such as multi-specificity design and backbone flexibility, by considering both positive and negative protein states with an ensemble of substates for each. It also presents an enormous challenge to exact algorithms that guarantee the optimal solutions and enable a direct test of mechanistic hypotheses behind models. However, efficient exact algorithms are lacking for multistate protein design.

Results

We have developed an efficient exact algorithm called interconnected cost function networks (iCFN) for multistate protein design. Its generic formulation allows for a wide array of applications such as stability, affinity and specificity designs while addressing concerns such as global flexibility of protein backbones. iCFN treats each substate design as a weighted constraint satisfaction problem (WCSP) modeled through a CFN; and it solves the coupled WCSPs using novel bounds and a depth-first branch-and-bound search over a tree structure of sequences, substates, and conformations. When iCFN is applied to specificity design of a T-cell receptor, a problem of unprecedented size to exact methods, it drastically reduces search space and running time to make the problem tractable. Moreover, iCFN generates experimentally-agreeing receptor designs with improved accuracy compared with state-of-the-art methods, highlights the importance of modeling backbone flexibility in protein design, and reveals molecular mechanisms underlying binding specificity.

Availability and implementation

https://shen-lab.github.io/software/iCFN.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-09-01 +25361970,PomBase 2015: updates to the fission yeast database.,"PomBase (http://www.pombase.org) is the model organism database for the fission yeast Schizosaccharomyces pombe. PomBase provides a central hub for the fission yeast community, supporting both exploratory and hypothesis-driven research. It provides users easy access to data ranging from the sequence level, to molecular and phenotypic annotations, through to the display of genome-wide high-throughput studies. Recent improvements to the site extend annotation specificity, improve usability and allow for monthly data updates. Both in-house curators and community researchers provide manually curated data to PomBase. The genome browser provides access to published high-throughput data sets and the genomes of three additional Schizosaccharomyces species (Schizosaccharomyces cryophilus, Schizosaccharomyces japonicus and Schizosaccharomyces octosporus).",2014-10-31 +28605501,EpiCompare: an online tool to define and explore genomic regions with tissue or cell type-specific epigenomic features.,"

Motivation

The Human Reference Epigenome Map, generated by the Roadmap Epigenomics Consortium, contains thousands of genome-wide epigenomic datasets that describe epigenomes of a variety of different human tissue and cell types. This map has allowed investigators to obtain a much deeper and more comprehensive view of our regulatory genome, e.g. defining regulatory elements including all promoters and enhancers for a given tissue or cell type. An outstanding task is to combine and compare different epigenomes in order to identify regions with epigenomic features specific to certain types of tissues or cells, e.g. lineage-specific regulatory elements. Currently available tools do not directly address this question. This need motivated us to develop a tool that allows investigators to easily identify regions with epigenetic features unique to specific epigenomes that they choose, making detection of common regulatory elements and/or cell type-specific regulatory elements an interactive and dynamic experience.

Results

An online tool EpiCompare was developed to assist investigators in exploring the specificity of epigenomic features across selected tissue and cell types. Investigators can design their test by choosing different combinations of epigenomes, and choosing different classification algorithms provided by our tool. EpiCompare will then identify regions with specified epigenomic features, and provide a quality assessment of the predictions. Investigators can interact with EpiCompare by investigating Roadmap Epigenomics data, or uploading their own data for comparison. We demonstrate that by using specific combinations of epigenomes we can detect developmental lineage-specific enhancers. Finally, prediction results can be readily visualized and further explored in the WashU Epigenome Browser.

Availability and implementation

EpiCompare is freely available on the web at http://epigenome.wustl.edu/EpiCompare/.

Contact

twang@genetics.wustl.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +26876982,ChemProt-3.0: a global chemical biology diseases mapping. ,"ChemProt is a publicly available compilation of chemical-protein-disease annotation resources that enables the study of systems pharmacology for a small molecule across multiple layers of complexity from molecular to clinical levels. In this third version, ChemProt has been updated to more than 1.7 million compounds with 7.8 million bioactivity measurements for 19,504 proteins. Here, we report the implementation of global pharmacological heatmap, supporting a user-friendly navigation of chemogenomics space. This facilitates the visualization and selection of chemicals that share similar structural properties. In addition, the user has the possibility to search by compound, target, pathway, disease and clinical effect. Genetic variations associated to target proteins were integrated, making it possible to plan pharmacogenetic studies and to suggest human response variability to drug. Finally, Quantitative Structure-Activity Relationship models for 850 proteins having sufficient data were implemented, enabling secondary pharmacological profiling predictions from molecular structure. Database URL: http://potentia.cbs.dtu.dk/ChemProt/.",2016-02-13 +25536965,MACE: mutation-oriented profiling of chemical response and gene expression in cancers.,"

Summary

The mutational status of specific cancer lineages can affect the sensitivity to or resistance against cancer drugs. The MACE database provides web-based interactive tools for interpreting large chemical screening and gene expression datasets of cancer cell lines in terms of mutation and lineage categories. GI50 data of chemicals against individual NCI60 cell lines were normalized and organized to statistically identify mutation- or lineage-specific chemical responses. Similarly, DNA microarray data on NCI60 cell lines were processed to analyze mutation- or lineage-specific gene expression signatures. A combined analysis of GI50 and gene expression data to find potential associations between chemicals and genes is also a capability of this system. This database will provide extensive, systematic information to identify lineage- or mutation-specific anticancer agents and related gene targets.

Availability and implementation

The MACE web database is available at http://mace.sookmyung.ac.kr/.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

yoonsj@sookmyung.ac.kr.",2014-12-22 +24326458,Genetic dissection of drought tolerance in chickpea (Cicer arietinum L.).,"

Key message

Analysis of phenotypic data for 20 drought tolerance traits in 1-7 seasons at 1-5 locations together with genetic mapping data for two mapping populations provided 9 QTL clusters of which one present on CaLG04 has a high potential to enhance drought tolerance in chickpea improvement. Chickpea (Cicer arietinum L.) is the second most important grain legume cultivated by resource poor farmers in the arid and semi-arid regions of the world. Drought is one of the major constraints leading up to 50% production losses in chickpea. In order to dissect the complex nature of drought tolerance and to use genomics tools for enhancing yield of chickpea under drought conditions, two mapping populations-ICCRIL03 (ICC 4958 × ICC 1882) and ICCRIL04 (ICC 283 × ICC 8261) segregating for drought tolerance-related root traits were phenotyped for a total of 20 drought component traits in 1-7 seasons at 1-5 locations in India. Individual genetic maps comprising 241 loci and 168 loci for ICCRIL03 and ICCRIL04, respectively, and a consensus genetic map comprising 352 loci were constructed ( http://cmap.icrisat.ac.in/cmap/sm/cp/varshney/). Analysis of extensive genotypic and precise phenotypic data revealed 45 robust main-effect QTLs (M-QTLs) explaining up to 58.20% phenotypic variation and 973 epistatic QTLs (E-QTLs) explaining up to 92.19% phenotypic variation for several target traits. Nine QTL clusters containing QTLs for several drought tolerance traits have been identified that can be targeted for molecular breeding. Among these clusters, one cluster harboring 48% robust M-QTLs for 12 traits and explaining about 58.20% phenotypic variation present on CaLG04 has been referred as ""QTL-hotspot"". This genomic region contains seven SSR markers (ICCM0249, NCPGR127, TAA170, NCPGR21, TR11, GA24 and STMS11). Introgression of this region into elite cultivars is expected to enhance drought tolerance in chickpea.",2013-12-11 +29718103,A powerful approach reveals numerous expression quantitative trait haplotypes in multiple tissues.,"Motivation:Recently many studies showed single nucleotide polymorphisms (SNPs) affect gene expression and contribute to development of complex traits/diseases in a tissue context-dependent manner. However, little is known about haplotype's influence on gene expression and complex traits, which reflects the interaction effect between SNPs. Results:In the present study, we firstly proposed a regulatory region guided eQTL haplotype association analysis approach, and then systematically investigate the expression quantitative trait loci (eQTL) haplotypes in 20 different tissues by the approach. The approach has a powerful design of reducing computational burden by the utilization of regulatory predictions for candidate SNP selection and multiple testing corrections on non-independent haplotypes. The application results in multiple tissues showed that haplotype-based eQTLs not only increased the number of eQTL genes in a tissue specific manner, but were also enriched in loci that associated with complex traits in a tissue-matched manner. In addition, we found that tag SNPs of eQTL haplotypes from whole blood were selectively enriched in certain combination of regulatory elements (e.g. promoters and enhancers) according to predicted chromatin states. In summary, this eQTL haplotype detection approach, together with the application results, shed insights into synergistic effect of sequence variants on gene expression and their susceptibility to complex diseases. Availability and implementation:The executable application 'eHaplo' is implemented in Java and is publicly available at http://grass.cgs.hku.hk/limx/ehaplo/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-09-01 +29635502,Infant feeding and growth: putting the horse before the cart.,"Background:Previous observational studies have consistently shown slower weight and length gains in infants with prolonged breastfeeding than in those who were formula-fed from birth or breastfed for a shorter duration. These studies inferred that prolonged breastfeeding causes slower growth in infancy. Objective:We compared infant growth associated with ≥12 mo of breastfeeding with a shorter duration of breastfeeding on the basis of 3 different analytic approaches to the same data from a randomized trial: intention-to-treat (ITT; ""as randomized""), observational (""as fed""), and instrumental variable (IV; by using randomization as an ""instrument"" to achieve ≥12 mo of breastfeeding). Design:This was a cluster-randomized trial of a breastfeeding-promotion intervention. Anthropometric measurements were obtained at birth and at 1, 2, 3, 6, 9, and 12 mo. Results:The 3 analytic approaches yielded different results. The ITT approach showed more rapid growth in the first 2 mo among infants randomly assigned to the breastfeeding-promotion intervention than among control infants, with a decreasing difference over the ensuing months and nearly identical weight, length, and body mass index by 12 mo. The observational analysis showed a different trend: higher weight and length in infants who were breastfed ≥12 mo than in those who were breastfed <12 mo during the first 3 mo and no difference by 6 mo, while infants who were breastfed <12 mo showed increasingly higher weight and length from 6 to 12 mo. The IV analysis showed a temporal pattern that was similar to that seen in the ITT analysis, but with larger (and less precise) differences between infants breastfed for ≥12 compared with <12 mo. Conclusions:We observed major differences in experimental (ITT and IV) compared with observational approaches to analyzing data obtained from the same children. These approaches lead to opposite causal inferences about the relation between infant feeding and growth and underline the importance of ensuring that the postulated cause (feeding) temporally precedes its hypothesized effect (growth). This trial is registered at http://www.isrctn.org/ as ISRCTN37687716.",2018-04-01 +29672674,TRUmiCount: correctly counting absolute numbers of molecules using unique molecular identifiers.,"Motivation:Counting molecules using next-generation sequencing (NGS) suffers from PCR amplification bias, which reduces the accuracy of many quantitative NGS-based experimental methods such as RNA-Seq. This is true even if molecules are made distinguishable using unique molecular identifiers (UMIs) before PCR amplification, and distinct UMIs are counted instead of reads: Molecules that are lost entirely during the sequencing process will still cause underestimation of the molecule count, and amplification artifacts like PCR chimeras create phantom UMIs and thus cause over-estimation. Results:We introduce the TRUmiCount algorithm to correct for both types of errors. The TRUmiCount algorithm is based on a mechanistic model of PCR amplification and sequencing, whose two parameters have an immediate physical interpretation as PCR efficiency and sequencing depth and can be estimated from experimental data without requiring calibration experiments or spike-ins. We show that our model captures the main stochastic properties of amplification and sequencing, and that it allows us to filter out phantom UMIs and to estimate the number of molecules lost during the sequencing process. Finally, we demonstrate that the phantom-filtered and loss-corrected molecule counts computed by TRUmiCount measure the true number of molecules with considerably higher accuracy than the raw number of distinct UMIs, even if most UMIs are sequenced only once as is typical for single-cell RNA-Seq. Availability and implementation:TRUmiCount is available at http://www.cibiv.at/software/trumicount and through Bioconda (http://bioconda.github.io). Supplementary information:Supplementary information is available at Bioinformatics online.",2018-09-01 +27565795,mRAISE: an alternative algorithmic approach to ligand-based virtual screening.,"Ligand-based virtual screening is a well established method to find new lead molecules in todays drug discovery process. In order to be applicable in day to day practice, such methods have to face multiple challenges. The most important part is the reliability of the results, which can be shown and compared in retrospective studies. Furthermore, in the case of 3D methods, they need to provide biologically relevant molecular alignments of the ligands, that can be further investigated by a medicinal chemist. Last but not least, they have to be able to screen large databases in reasonable time. Many algorithms for ligand-based virtual screening have been proposed in the past, most of them based on pairwise comparisons. Here, a new method is introduced called mRAISE. Based on structural alignments, it uses a descriptor-based bitmap search engine (RAISE) to achieve efficiency. Alignments created on the fly by the search engine get evaluated with an independent shape-based scoring function also used for ranking of compounds. The correct ranking as well as the alignment quality of the method are evaluated and compared to other state of the art methods. On the commonly used Directory of Useful Decoys dataset mRAISE achieves an average area under the ROC curve of 0.76, an average enrichment factor at 1 % of 20.2 and an average hit rate at 1 % of 55.5. With these results, mRAISE is always among the top performing methods with available data for comparison. To access the quality of the alignments calculated by ligand-based virtual screening methods, we introduce a new dataset containing 180 prealigned ligands for 11 diverse targets. Within the top ten ranked conformations, the alignment closest to X-ray structure calculated with mRAISE has a root-mean-square deviation of less than 2.0 Å for 80.8 % of alignment pairs and achieves a median of less than 2.0 Å for eight of the 11 cases. The dataset used to rate the quality of the calculated alignments is freely available at http://www.zbh.uni-hamburg.de/mraise-dataset.html . The table of all PDB codes contained in the ensembles can be found in the supplementary material. The software tool mRAISE is freely available for evaluation purposes and academic use (see http://www.zbh.uni-hamburg.de/raise ).",2016-08-26 +30233295,Deep Supervised Learning Using Local Errors.,"Error backpropagation is a highly effective mechanism for learning high-quality hierarchical features in deep networks. Updating the features or weights in one layer, however, requires waiting for the propagation of error signals from higher layers. Learning using delayed and non-local errors makes it hard to reconcile backpropagation with the learning mechanisms observed in biological neural networks as it requires the neurons to maintain a memory of the input long enough until the higher-layer errors arrive. In this paper, we propose an alternative learning mechanism where errors are generated locally in each layer using fixed, random auxiliary classifiers. Lower layers could thus be trained independently of higher layers and training could either proceed layer by layer, or simultaneously in all layers using local error information. We address biological plausibility concerns such as weight symmetry requirements and show that the proposed learning mechanism based on fixed, broad, and random tuning of each neuron to the classification categories outperforms the biologically-motivated feedback alignment learning technique on the CIFAR10 dataset, approaching the performance of standard backpropagation. Our approach highlights a potential biological mechanism for the supervised, or task-dependent, learning of feature hierarchies. In addition, we show that it is well suited for learning deep networks in custom hardware where it can drastically reduce memory traffic and data communication overheads. Code used to run all learning experiments is available under https://gitlab.com/hesham-mostafa/learning-using-local-erros.git.",2018-08-31 +28552721,Improved metrics for comparing structures of macromolecular assemblies determined by 3D electron-microscopy.,"Recent developments in 3-dimensional electron microcopy (3D-EM) techniques and a concomitant drive to look at complex molecular structures, have led to a rapid increase in the amount of volume data available for biomolecules. This creates a demand for better methods to analyse the data, including improved scores for comparison, classification and integration of data at different resolutions. To this end, we developed and evaluated a set of scoring functions that compare 3D-EM volumes. To test our scores we used a benchmark set of volume alignments derived from the Electron Microscopy Data Bank. We find that the performance of different scores vary with the map-type, resolution and the extent of overlap between volumes. Importantly, adding the overlap information to the local scoring functions can significantly improve their precision and accuracy in a range of resolutions. A combined score involving the local mutual information and overlap (LMI_OV) performs best overall, irrespective of the map category, resolution or the extent of overlap, and we recommend this score for general use. The local mutual information score itself is found to be more discriminatory than cross-correlation coefficient for intermediate-to-low resolution maps or when the map size and density distribution differ significantly. For comparing map surfaces, we implemented two filters to detect the surface points, including one based on the 'extent of surface exposure'. We show that scores that compare surfaces are useful at low resolutions and for maps with evident surface features. All the scores discussed are implemented in TEMPy (http://tempy.ismb.lon.ac.uk/).",2017-05-25 +30166314,One drug-sensitive subunit is sufficient for a near-maximal retigabine effect in KCNQ channels.,"Retigabine is an antiepileptic drug and the first voltage-gated potassium (Kv) channel opener to be approved for human therapeutic use. Retigabine is thought to interact with a conserved Trp side chain in the pore of KCNQ2-5 (Kv7.2-7.5) channels, causing a pronounced hyperpolarizing shift in the voltage dependence of activation. In this study, we investigate the functional stoichiometry of retigabine actions by manipulating the number of retigabine-sensitive subunits in concatenated KCNQ3 channel tetramers. We demonstrate that intermediate retigabine concentrations cause channels to exhibit biphasic conductance-voltage relationships rather than progressive concentration-dependent shifts. This suggests that retigabine can exert its effects in a nearly ""all-or-none"" manner, such that channels exhibit either fully shifted or unshifted behavior. Supporting this notion, concatenated channels containing only a single retigabine-sensitive subunit exhibit a nearly maximal retigabine effect. Also, rapid solution exchange experiments reveal delayed kinetics during channel closure, as retigabine dissociates from channels with multiple drug-sensitive subunits. Collectively, these data suggest that a single retigabine-sensitive subunit can generate a large shift of the KCNQ3 conductance-voltage relationship. In a companion study (Wang et al. 2018. J. Gen. Physiol. https://doi.org/10.1085/jgp.201812014), we contrast these findings with the stoichiometry of a voltage sensor-targeted KCNQ channel opener (ICA-069673), which requires four drug-sensitive subunits for maximal effect.",2018-08-30 +30592257,Urinary Tract Infection in Children.,"

Background

Urinary Tract Infection (UTI) is a common infection in children. Prompt diagnosis and appropriate treatment are very important to reduce the morbidity associated with this condition.

Objective

To provide an update on the evaluation, diagnosis, and treatment of urinary tract infection in children.

Methods

A PubMed search was completed in clinical queries using the key terms ""urinary tract infection"", ""pyelonephritis"" OR ""cystitis"". The search strategy included meta-analyses, randomized controlled trials, clinical trials, observational studies, and reviews. The search was restricted to English literature and the pediatric age group. Patents were searched using the key terms ""urinary tract infection"" ""pyelonephritis"" OR ""cystitis"" from www.google.com/patents, http://espacenet.com, and www.freepatentsonline.com.

Results

Escherichia coli accounts for 80 to 90% of UTI in children. The symptoms and signs are nonspecific throughout infancy. Unexplained fever is the most common symptom of UTI during the first two years of life. After the second year of life, symptoms and signs of pyelonephritis include fever, chills, rigor, flank pain, and costovertebral angle tenderness. Lower tract symptoms and signs include suprapubic pain, dysuria, urinary frequency, urgency, cloudy urine, malodorous urine, and suprapubic tenderness. A urinalysis and urine culture should be performed when UTI is suspected. In the work-up of children with UTI, physicians must judiciously utilize imaging studies to minimize exposure of children to radiation. While waiting for the culture results, prompt antibiotic therapy is indicated for symptomatic UTI based on clinical findings and positive urinalysis to eradicate the infection and improve clinical outcome. The choice of antibiotics should take into consideration local data on antibiotic resistance patterns. Recent patents related to the management of UTI are discussed.

Conclusion

Currently, a second or third generation cephalosporin and amoxicillin-clavulanate are drugs of choice in the treatment of acute uncomplicated UTI. Parenteral antibiotic therapy is recommended for infants ≤ 2 months and any child who is toxic-looking, hemodynamically unstable, immunocompromised, unable to tolerate oral medication, or not responding to oral medication. A combination of intravenous ampicillin and intravenous/intramuscular gentamycin or a third-generation cephalosporin can be used in those situations. Routine antimicrobial prophylaxis is rarely justified, but continuous antimicrobial prophylaxis should be considered for children with frequent febrile UTI.",2019-01-01 +28696583,"Mapping medical marijuana: state laws regulating patients, product safety, supply chains and dispensaries, 2017.","

Aims

(1) To describe open source legal data sets, created for research use, that capture the key provisions of US state medical marijuana laws. The data document how state lawmakers have regulated a medicine that remains, under federal law, a Schedule I illegal drug with no legitimate medical use. (2) To demonstrate the variability that exists across states in rules governing patient access, product safety and dispensary practice.

Methods

Two legal researchers collected and coded state laws governing marijuana patients, product safety and dispensaries in effect on 1 February 2017, creating three empirical legal data sets. We used summary tables to identify the variation in specific statutory provisions specified in each state's medical marijuana law as it existed on 1 February 2017. We compared aspects of these laws to the traditional Federal approach to regulating medicine. Full data sets, codebooks and protocols are available through the Prescription Drug Abuse Policy System (http://www.pdaps.org/; Archived at http://www.webcitation.org/6qv5CZNaZ on 2 June 2017).

Results

Twenty-eight states (including the District of Columbia) have authorized medical marijuana. Twenty-seven specify qualifying diseases, which differ across states. All states protect patient privacy; only 14 protect patients against discrimination. Eighteen states have mandatory product safety testing before any sale. While the majority have package/label regulations, states have a wide range of specific requirements. Most regulate dispensaries (25 states), with considerable variation in specific provisions such as permitted product supply sources number of dispensaries per state and restricting proximity to various types of location.

Conclusions

The federal ban in the United States on marijuana has resulted in a patchwork of regulatory strategies that are not uniformly consistent with the approach usually taken by the Federal government and whose effectiveness remains unknown.",2017-07-21 +30483962,New sagittal classification of AIS: validation by 3D characterization.,"

Introduction and aim

In order to improve surgical planning of sagittal correction in AIS, we proposed a new sagittal classification-Abelin-Genevois et al. Eur Spine J (27(9):2192-2202, 2018. https://doi.org/10.1007/s00586-018-5613-1 ). The main criticism is related to the fact that 2D lateral view results from the projection of the 3D deformity. The aim of this study is to show that the new sagittal classification system is a reliable system to describe the different sagittal scenarios that AIS could create both in 2D and 3D.

Methods

We performed retrospective radiograph analysis of prospectively collected data from 93 consecutive AIS patients who underwent an examination of the whole spine using the EOS® imaging system. 2D (Keops®) and 3D analyses (sterEOS®) provided frontal and sagittal spinal and spinopelvic parameters. In addition, 3D analysis provided apical vertebra rotation (AVR).

Results

Comparing 2D and 3D measurements for the general cohort, excellent correlation can be found for all parameters, but only fairly good for T10L2 and L1S1 angles. The highest variability was observed for T10L2, differences between 2D and 3D measurements being greater when the Cobb angle increased. AVR did not influence concordance between 2D and 3D measurements. Eighty-two percent were similarly classified in 2D and 3D according to the new classification. Misclassified patients were all AIS sagittal type 3 in 3D analysis, thoracolumbar junction (TLJ) lordosis being underestimated on 2D view.

Discussion

In conclusion, for the majority of cases (82%), 2D analysis may provide enough information for decision making when using a semi-automated 2D measurement system. However, in severe cases, especially when Cobb angle exceeds 55°, 3D analysis should be used to get a more accurate view on the thoracolumbar junction behavior. These slides can be retrieved under Electronic Supplementary Material.",2018-11-27 +25332399,Mouse Tumor Biology (MTB): a database of mouse models for human cancer.,"The Mouse Tumor Biology (MTB; http://tumor.informatics.jax.org) database is a unique online compendium of mouse models for human cancer. MTB provides online access to expertly curated information on diverse mouse models for human cancer and interfaces for searching and visualizing data associated with these models. The information in MTB is designed to facilitate the selection of strains for cancer research and is a platform for mining data on tumor development and patterns of metastases. MTB curators acquire data through manual curation of peer-reviewed scientific literature and from direct submissions by researchers. Data in MTB are also obtained from other bioinformatics resources including PathBase, the Gene Expression Omnibus and ArrayExpress. Recent enhancements to MTB improve the association between mouse models and human genes commonly mutated in a variety of cancers as identified in large-scale cancer genomics studies, provide new interfaces for exploring regions of the mouse genome associated with cancer phenotypes and incorporate data and information related to Patient-Derived Xenograft models of human cancers.",2014-10-20 +25209223,"ModelOMatic: fast and automated model selection between RY, nucleotide, amino acid, and codon substitution models.","Molecular phylogenetics is a powerful tool for inferring both the process and pattern of evolution from genomic sequence data. Statistical approaches, such as maximum likelihood and Bayesian inference, are now established as the preferred methods of inference. The choice of models that a researcher uses for inference is of critical importance, and there are established methods for model selection conditioned on a particular type of data, such as nucleotides, amino acids, or codons. A major limitation of existing model selection approaches is that they can only compare models acting upon a single type of data. Here, we extend model selection to allow comparisons between models describing different types of data by introducing the idea of adapter functions, which project aggregated models onto the originally observed sequence data. These projections are implemented in the program ModelOMatic and used to perform model selection on 3722 families from the PANDIT database, 68 genes from an arthropod phylogenomic data set, and 248 genes from a vertebrate phylogenomic data set. For the PANDIT and arthropod data, we find that amino acid models are selected for the overwhelming majority of alignments; with progressively smaller numbers of alignments selecting codon and nucleotide models, and no families selecting RY-based models. In contrast, nearly all alignments from the vertebrate data set select codon-based models. The sequence divergence, the number of sequences, and the degree of selection acting upon the protein sequences may contribute to explaining this variation in model selection. Our ModelOMatic program is fast, with most families from PANDIT taking fewer than 150 s to complete, and should therefore be easily incorporated into existing phylogenetic pipelines. ModelOMatic is available at https://code.google.com/p/modelomatic/.",2014-09-09 +30821159,The PERSIAN Guilan Cohort Study (PGCS).,"The Guilan cohort study was conducted on 10520 men and women between 35-70 years of age in Guilan province and Some'e Sara county, northern Iran, from October 8, 2014 to January 20, 2017 as part of the Prospective Epidemiological Research Studies in Iran (PERSIAN). Eligible participants were contacted over the phone and were invited to refer to the cohort center. Demographic information was inquired during the phone call. Upon arrival of participants at the cohort center, consent forms were filled out and additional data on demographic characteristics, socio-economic status, employment, fuel status and location, lifestyle habits, and sleep and food habits were obtained. Blood pressure and anthropometric indices were measured. Finally, biological samples were collected. There was a participation rate of 83.2%, and a 15-year active follow-up was planned for all of the participants. The results showed that 53.5% of the participants were female and 56.1% of the participants were rural residents. A total of 1738 participants (16.5%) were illiterate. Of the total cohort participants, 4543 (43.2%) were hypertensive. Hypertension was defined as a systolic blood pressure ≥140 mm Hg or a diastolic blood pressure ≥90 mm Hg, or a prior diagnosis of hypertension by a health professional, or taking antihypertensive medications. Approximately one-third of participants (n=3435 or 32.7%) were obese, and most were females (n=2647, 77.1%). Prevalence of diabetes (defined as fasting blood sugar equal or higher than 126 mg/dL or history of diagnosis with diabetes or taking glucose lowering medication) was 24.1% (20.2 % in males and 27.3% in females). We also obtained laboratory samples for basic and genetic scientific research. According to laboratory evaluations, 3,585 (34.1%) of the participants had hematuria, and most of them were women (n=2151 or 60%). The preliminary results of our study demonstrate a high prevalence of metabolic risk factors for Non-Communicable Diseases and mainly cardiovascular diseases in Guilan province, which merit detailed investigation of their intricate relationships. The population-based design of the study as well as its large sample size were the main strengths of our cohort study that makes these investigations feasible. Researchers interested in using the information are invited to visit the following websites: http://www.gums. ac.ir/cohort and http://persiancohort.com/.",2019-01-01 +25398901,MethHC: a database of DNA methylation and gene expression in human cancer.,"We present MethHC (http://MethHC.mbc.nctu.edu.tw), a database comprising a systematic integration of a large collection of DNA methylation data and mRNA/microRNA expression profiles in human cancer. DNA methylation is an important epigenetic regulator of gene transcription, and genes with high levels of DNA methylation in their promoter regions are transcriptionally silent. Increasing numbers of DNA methylation and mRNA/microRNA expression profiles are being published in different public repositories. These data can help researchers to identify epigenetic patterns that are important for carcinogenesis. MethHC integrates data such as DNA methylation, mRNA expression, DNA methylation of microRNA gene and microRNA expression to identify correlations between DNA methylation and mRNA/microRNA expression from TCGA (The Cancer Genome Atlas), which includes 18 human cancers in more than 6000 samples, 6548 microarrays and 12 567 RNA sequencing data.",2014-11-14 +28813517,PRmePRed: A protein arginine methylation prediction tool.,"Protein methylation is an important Post-Translational Modification (PTMs) of proteins. Arginine methylation carries out and regulates several important biological functions, including gene regulation and signal transduction. Experimental identification of arginine methylation site is a daunting task as it is costly as well as time and labour intensive. Hence reliable prediction tools play an important task in rapid screening and identification of possible methylation sites in proteomes. Our preliminary assessment using the available prediction methods on collected data yielded unimpressive results. This motivated us to perform a comprehensive data analysis and appraisal of features relevant in the context of biological significance, that led to the development of a prediction tool PRmePRed with better performance. The PRmePRed perform reasonably well with an accuracy of 84.10%, 82.38% sensitivity, 83.77% specificity, and Matthew's correlation coefficient of 66.20% in 10-fold cross-validation. PRmePRed is freely available at http://bioinfo.icgeb.res.in/PRmePRed/.",2017-08-15 +27419259,Hydrogen Rearrangement Rules: Computational MS/MS Fragmentation and Structure Elucidation Using MS-FINDER Software.,"Compound identification from accurate mass MS/MS spectra is a bottleneck for untargeted metabolomics. In this study, we propose nine rules of hydrogen rearrangement (HR) during bond cleavages in low-energy collision-induced dissociation (CID). These rules are based on the classic even-electron rule and cover heteroatoms and multistage fragmentation. We evaluated our HR rules by the statistics of MassBank MS/MS spectra in addition to enthalpy calculations, yielding three levels of computational MS/MS annotation: ""resolved"" (regular HR behavior following HR rules), ""semiresolved"" (irregular HR behavior), and ""formula-assigned"" (lacking structure assignment). With this nomenclature, 78.4% of a total of 18506 MS/MS fragment ions in the MassBank database and 84.8% of a total of 36370 MS/MS fragment ions in the GNPS database were (semi-) resolved by predicted bond cleavages. We also introduce the MS-FINDER software for structure elucidation. Molecular formulas of precursor ions are determined from accurate mass, isotope ratio, and product ion information. All isomer structures of the predicted formula are retrieved from metabolome databases, and MS/MS fragmentations are predicted in silico. The structures are ranked by a combined weighting score considering bond dissociation energies, mass accuracies, fragment linkages, and, most importantly, nine HR rules. The program was validated by its ability to correctly calculate molecular formulas with 98.0% accuracy for 5063 MassBank MS/MS records and to yield the correct structural isomer with 82.1% accuracy within the top-3 candidates. In a test with 936 manually identified spectra from an untargeted HILIC-QTOF MS data set of human plasma, formulas were correctly predicted in 90.4% of the cases, and the correct isomer structure was retrieved at 80.4% probability within the top-3 candidates, including for compounds that were absent in mass spectral libraries. The MS-FINDER software is freely available at http://prime.psc.riken.jp/ .",2016-08-04 +29300482,Chemotext: A Publicly Available Web Server for Mining Drug-Target-Disease Relationships in PubMed.,"Elucidation of the mechanistic relationships between drugs, their targets, and diseases is at the core of modern drug discovery research. Thousands of studies relevant to the drug-target-disease (DTD) triangle have been published and annotated in the Medline/PubMed database. Mining this database affords rapid identification of all published studies that confirm connections between vertices of this triangle or enable new inferences of such connections. To this end, we describe the development of Chemotext, a publicly available Web server that mines the entire compendium of published literature in PubMed annotated by Medline Subject Heading (MeSH) terms. The goal of Chemotext is to identify all known DTD relationships and infer missing links between vertices of the DTD triangle. As a proof-of-concept, we show that Chemotext could be instrumental in generating new drug repurposing hypotheses or annotating clinical outcomes pathways for known drugs. The Chemotext Web server is freely available at http://chemotext.mml.unc.edu .",2018-01-19 +29671030,Abnormal islet sphingolipid metabolism in type 1 diabetes.,"

Aims/hypothesis

Sphingolipids play important roles in beta cell physiology, by regulating proinsulin folding and insulin secretion and in controlling apoptosis, as studied in animal models and cell cultures. Here we investigate whether sphingolipid metabolism may contribute to the pathogenesis of human type 1 diabetes and whether increasing the levels of the sphingolipid sulfatide would prevent models of diabetes in NOD mice.

Methods

We examined the amount and distribution of sulfatide in human pancreatic islets by immunohistochemistry, immunofluorescence and electron microscopy. Transcriptional analysis was used to evaluate expression of sphingolipid-related genes in isolated human islets. Genome-wide association studies (GWAS) and a T cell proliferation assay were used to identify type 1 diabetes related polymorphisms and test how these affect cellular islet autoimmunity. Finally, we treated NOD mice with fenofibrate, a known activator of sulfatide biosynthesis, to evaluate the effect on experimental autoimmune diabetes development.

Results

We found reduced amounts of sulfatide, 23% of the levels in control participants, in pancreatic islets of individuals with newly diagnosed type 1 diabetes, which were associated with reduced expression of enzymes involved in sphingolipid metabolism. Next, we discovered eight gene polymorphisms (ORMDL3, SPHK2, B4GALNT1, SLC1A5, GALC, PPARD, PPARG and B4GALT1) involved in sphingolipid metabolism that contribute to the genetic predisposition to type 1 diabetes. These gene polymorphisms correlated with the degree of cellular islet autoimmunity in a cohort of individuals with type 1 diabetes. Finally, using fenofibrate, which activates sulfatide biosynthesis, we completely prevented diabetes in NOD mice and even reversed the disease in half of otherwise diabetic animals.

Conclusions/interpretation

These results indicate that islet sphingolipid metabolism is abnormal in type 1 diabetes and suggest that modulation may represent a novel therapeutic approach.

Data availability

The RNA expression data is available online at https://www.dropbox.com/s/93mk5tzl5fdyo6b/Abnormal%20islet%20sphingolipid%20metabolism%20in%20type%201%20diabetes%2C%20RNA%20expression.xlsx?dl=0 . A list of SNPs identified is available at https://www.dropbox.com/s/yfojma9xanpp2ju/Abnormal%20islet%20sphingolipid%20metabolism%20in%20type%201%20diabetes%20SNP.xlsx?dl=0 .",2018-04-18 +27446120,PlantAPA: A Portal for Visualization and Analysis of Alternative Polyadenylation in Plants.,"Alternative polyadenylation (APA) is an important layer of gene regulation that produces mRNAs that have different 3' ends and/or encode diverse protein isoforms. Up to 70% of annotated genes in plants undergo APA. Increasing numbers of poly(A) sites collected in various plant species demand new methods and tools to access and mine these data. We have created an open-access web service called PlantAPA (http://bmi.xmu.edu.cn/plantapa) to visualize and analyze genome-wide poly(A) sites in plants. PlantAPA provides various interactive and dynamic graphics and seamlessly integrates a genome browser that can profile heterogeneous cleavage sites and quantify expression patterns of poly(A) sites across different conditions. Particularly, through PlantAPA, users can analyze poly(A) sites in extended 3' UTR regions, intergenic regions, and ambiguous regions owing to alternative transcription or RNA processing. In addition, it also provides tools for analyzing poly(A) site selections, 3' UTR lengthening or shortening, non-canonical APA site switching, and differential gene expression between conditions, making it more powerful for the study of APA-mediated gene expression regulation. More importantly, PlantAPA offers a bioinformatics pipeline that allows users to upload their own short reads or ESTs for poly(A) site extraction, enabling users to further explore poly(A) site selection using stored PlantAPA poly(A) sites together with their own poly(A) site datasets. To date, PlantAPA hosts the largest database of APA sites in plants, including Oryza sativa, Arabidopsis thaliana, Medicago truncatula, and Chlamydomonas reinhardtii. As a user-friendly web service, PlantAPA will be a valuable addition to the community of biologists studying APA mechanisms and gene expression regulation in plants.",2016-06-21 +28475668,MarDRe: efficient MapReduce-based removal of duplicate DNA reads in the cloud.,"

Summary

This article presents MarDRe, a de novo cloud-ready duplicate and near-duplicate removal tool that can process single- and paired-end reads from FASTQ/FASTA datasets. MarDRe takes advantage of the widely adopted MapReduce programming model to fully exploit Big Data technologies on cloud-based infrastructures. Written in Java to maximize cross-platform compatibility, MarDRe is built upon the open-source Apache Hadoop project, the most popular distributed computing framework for scalable Big Data processing. On a 16-node cluster deployed on the Amazon EC2 cloud platform, MarDRe is up to 8.52 times faster than a representative state-of-the-art tool.

Availability and implementation

Source code in Java and Hadoop as well as a user's guide are freely available under the GNU GPLv3 license at http://mardre.des.udc.es .

Contact

rreye@udc.es.",2017-09-01 +26913838,Adding In Silico Assessment of Potential Splice Aberration to the Integrated Evaluation of BRCA Gene Unclassified Variants.,"Clinical mutation screening of the cancer susceptibility genes BRCA1 and BRCA2 generates many unclassified variants (UVs). Most of these UVs are either rare missense substitutions or nucleotide substitutions near the splice junctions of the protein coding exons. Previously, we developed a quantitative method for evaluation of BRCA gene UVs-the ""integrated evaluation""-that combines a sequence analysis-based prior probability of pathogenicity with patient and/or tumor observational data to arrive at a posterior probability of pathogenicity. One limitation of the sequence analysis-based prior has been that it evaluates UVs from the perspective of missense substitution severity but not probability to disrupt normal mRNA splicing. Here, we calibrated output from the splice-site fitness program MaxEntScan to generate spliceogenicity-based prior probabilities of pathogenicity for BRCA gene variants; these range from 0.97 for variants with high probability to damage a donor or acceptor to 0.02 for exonic variants that do not impact a splice junction and are unlikely to create a de novo donor. We created a database http://priors.hci.utah.edu/PRIORS/ that provides the combined missense substitution severity and spliceogenicity-based probability of pathogenicity for BRCA gene single-nucleotide substitutions. We also updated the BRCA gene Ex-UV LOVD, available at http://hci-exlovd.hci.utah.edu, with 77 re-evaluable variants.",2016-04-15 +26980519,PhyloPro2.0: a database for the dynamic exploration of phylogenetically conserved proteins and their domain architectures across the Eukarya. ,"PhyloPro is a database and accompanying web-based application for the construction and exploration of phylogenetic profiles across the Eukarya. In this update article, we present six major new developments in PhyloPro: (i) integration of Pfam-A domain predictions for all proteins; (ii) new summary heatmaps and detailed level views of domain conservation; (iii) an interactive, network-based visualization tool for exploration of domain architectures and their conservation; (iv) ability to browse based on protein functional categories (GOSlim); (v) improvements to the web interface to enhance drill down capability from the heatmap view; and (vi) improved coverage including 164 eukaryotes and 12 reference species. In addition, we provide improved support for downloading data and images in a variety of formats. Among the existing tools available for phylogenetic profiles, PhyloPro provides several innovative domain-based features including a novel domain adjacency visualization tool. These are designed to allow the user to identify and compare proteins with similar domain architectures across species and thus develop hypotheses about the evolution of lineage-specific trajectories. Database URL: http://www.compsysbio.org/phylopro/.",2016-03-15 +29998348,Investigating the Use of a Nonspeech Task to Measure Tongue-Jaw Differentiation: Findings Across Typical Development.,"

Purpose

Clinically, a task of alternating tongue lateralization has been used to evaluate the ability to independently control the tongue and jaw, with jaw movement interpreted as a sign of poor tongue-jaw differentiation. However, there is a lack of normative data regarding jaw movement during this task and whether this changes over the course of development. This study quantified relative tongue and jaw movement during alternating tongue lateralization for typical speakers across age ranges and examined whether degree of jaw movement varies as a function of age.

Method

Participants were 39 typical children, adolescents, and adults ranging from 6 to 29 years old. A motion capture system was used to track tongue and jaw movement during an alternating tongue lateralization task, and the average relative contribution of the jaw to tongue lateralization was determined for each participant.

Results

Age did not correlate significantly with the average relative contribution of the jaw to tongue lateralization. Typical children, adolescents, and adults exhibited wide variability in the degree of jaw movement during this task.

Conclusion

Variability among typical speakers in alternating tongue lateralization performance makes it challenging to determine if/when performance should be considered atypical. Clinical findings from this task must be interpreted with caution.

Supplemental material

https://doi.org/10.23641/asha.6626222.",2018-08-01 +28003264,A general framework for association analysis of microbial communities on a taxonomic tree.,"

Motivation

: Association analysis of microbiome composition with disease-related outcomes provides invaluable knowledge towards understanding the roles of microbes in the underlying disease mechanisms. Proper analysis of sparse compositional microbiome data is challenging. Existing methods rely on strong assumptions on the data structure and fail to pinpoint the associated microbial communities.

Results

: We develop a general framework to: (i) perform robust association tests for the microbial community that exhibits arbitrary inter-taxa dependencies; (ii) localize lineages on the taxonomic tree that are associated with covariates (e.g. disease status); and (iii) assess the overall association of the whole microbial community with the covariates. Unlike existing methods for microbiome association analysis, our framework does not make any distributional assumptions on the microbiome data; it allows for the adjustment of confounding variables and accommodates excessive zero observations; and it incorporates taxonomic information. We perform extensive simulation studies under a wide-range of scenarios to evaluate the new methods and demonstrate substantial power gain over existing methods. The advantages of the proposed framework are further demonstrated with real datasets from two microbiome studies. The relevant R package miLineage is publicly available.

Availability and implementation

: miLineage package, manual and tutorial are available at https://medschool.vanderbilt.edu/tang-lab/software/miLineage .

Contact

z.tang@vanderbilt.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +26571530,Massive Online Crowdsourced Study of Subjective and Objective Picture Quality.,"Most publicly available image quality databases have been created under highly controlled conditions by introducing graded simulated distortions onto high-quality photographs. However, images captured using typical real-world mobile camera devices are usually afflicted by complex mixtures of multiple distortions, which are not necessarily well-modeled by the synthetic distortions found in existing databases. The originators of existing legacy databases usually conducted human psychometric studies to obtain statistically meaningful sets of human opinion scores on images in a stringently controlled visual environment, resulting in small data collections relative to other kinds of image analysis databases. Toward overcoming these limitations, we designed and created a new database that we call the LIVE In the Wild Image Quality Challenge Database, which contains widely diverse authentic image distortions on a large number of images captured using a representative variety of modern mobile devices. We also designed and implemented a new online crowdsourcing system, which we have used to conduct a very large-scale, multi-month image quality assessment (IQA) subjective study. Our database consists of over 350 000 opinion scores on 1162 images evaluated by over 8100 unique human observers. Despite the lack of control over the experimental environments of the numerous study participants, we demonstrate excellent internal consistency of the subjective data set. We also evaluate several top-performing blind IQA algorithms on it and present insights on how the mixtures of distortions challenge both end users as well as automatic perceptual quality prediction models. The new database is available for public use at http://live.ece.utexas.edu/research/ChallengeDB/index.html.",2015-11-11 +25893845,BOOGIE: Predicting Blood Groups from High Throughput Sequencing Data.,"Over the last decade, we have witnessed an incredible growth in the amount of available genotype data due to high throughput sequencing (HTS) techniques. This information may be used to predict phenotypes of medical relevance, and pave the way towards personalized medicine. Blood phenotypes (e.g. ABO and Rh) are a purely genetic trait that has been extensively studied for decades, with currently over thirty known blood groups. Given the public availability of blood group data, it is of interest to predict these phenotypes from HTS data which may translate into more accurate blood typing in clinical practice. Here we propose BOOGIE, a fast predictor for the inference of blood groups from single nucleotide variant (SNV) databases. We focus on the prediction of thirty blood groups ranging from the well known ABO and Rh, to the less studied Junior or Diego. BOOGIE correctly predicted the blood group with 94% accuracy for the Personal Genome Project whole genome profiles where good quality SNV annotation was available. Additionally, our tool produces a high quality haplotype phase, which is of interest in the context of ethnicity-specific polymorphisms or traits. The versatility and simplicity of the analysis make it easily interpretable and allow easy extension of the protocol towards other phenotypes. BOOGIE can be downloaded from URL http://protein.bio.unipd.it/download/.",2015-04-20 +22735743,VIP DB--a viral protein domain usage and distribution database.,"During the viral infection and replication processes, viral proteins are highly regulated and may interact with host proteins. However, the functions and interaction partners of many viral proteins have yet to be explored. Here, we compiled a VIral Protein domain DataBase (VIP DB) to associate viral proteins with putative functions and interaction partners. We systematically assign domains and infer the functions of proteins and their protein interaction partners from their domain annotations. A total of 2,322 unique domains that were identified from 2,404 viruses are used as a starting point to correlate GO classification, KEGG metabolic pathway annotation and domain-domain interactions. Of the unique domains, 42.7% have GO records, 39.6% have at least one domain-domain interaction record and 26.3% can also be found in either mammals or plants. This database provides a resource to help virologists identify potential roles for viral protein. All of the information is available at http://vipdb.cgu.edu.tw.",2012-06-24 +28728542,CLOVE: classification of genomic fusions into structural variation events.,"

Background

A precise understanding of structural variants (SVs) in DNA is important in the study of cancer and population diversity. Many methods have been designed to identify SVs from DNA sequencing data. However, the problem remains challenging because existing approaches suffer from low sensitivity, precision, and positional accuracy. Furthermore, many existing tools only identify breakpoints, and so not collect related breakpoints and classify them as a particular type of SV. Due to the rapidly increasing usage of high throughput sequencing technologies in this area, there is an urgent need for algorithms that can accurately classify complex genomic rearrangements (involving more than one breakpoint or fusion).

Results

We present CLOVE, an algorithm for integrating the results of multiple breakpoint or SV callers and classifying the results as a particular SV. CLOVE is based on a graph data structure that is created from the breakpoint information. The algorithm looks for patterns in the graph that are characteristic of more complex rearrangement types. CLOVE is able to integrate the results of multiple callers, producing a consensus call.

Conclusions

We demonstrate using simulated and real data that re-classified SV calls produced by CLOVE improve on the raw call set of existing SV algorithms, particularly in terms of accuracy. CLOVE is freely available from http://www.github.com/PapenfussLab .",2017-07-20 +29163006,MATLAB Toolboxes for Reference Electrode Standardization Technique (REST) of Scalp EEG.,"Reference electrode standardization technique (REST) has been increasingly acknowledged and applied as a re-reference technique to transform an actual multi-channels recordings to approximately zero reference ones in electroencephalography/event-related potentials (EEG/ERPs) community around the world in recent years. However, a more easy-to-use toolbox for re-referencing scalp EEG data to zero reference is still lacking. Here, we have therefore developed two open-source MATLAB toolboxes for REST of scalp EEG. One version of REST is closely integrated into EEGLAB, which is a popular MATLAB toolbox for processing the EEG data; and another is a batch version to make it more convenient and efficient for experienced users. Both of them are designed to provide an easy-to-use for novice researchers and flexibility for experienced researchers. All versions of the REST toolboxes can be freely downloaded at http://www.neuro.uestc.edu.cn/rest/Down.html, and the detailed information including publications, comments and documents on REST can also be found from this website. An example of usage is given with comparative results of REST and average reference. We hope these user-friendly REST toolboxes could make the relatively novel technique of REST easier to study, especially for applications in various EEG studies.",2017-10-30 +30666281,The International Collaborative on Fatigue Following Infection (COFFI).,"

Background

The purpose of the Collaborative on Fatigue Following Infection (COFFI) is for investigators of post-infection fatigue (PIF) and other syndromes to collaborate on these enigmatic and poorly understood conditions by studying relatively homogeneous populations with known infectious triggers. Utilizing COFFI, pooled data and stored biosamples will support both epidemiological and laboratory research to better understand the etiology and risk factors for development and progression of PIF.

Methods

COFFI consists of prospective cohorts from the UK, Netherlands, Norway, USA, New Zealand and Australia, with some cohorts closed and some open to recruitment. The 9 cohorts closed to recruitment total over 3,000 participants, including nearly 1000 with infectious mononucleosis (IM), > 500 with Q fever, > 800 with giardiasis, > 600 with campylobacter gastroenteritis (CG), 190 with Legionnaires disease and 60 with Ross River virus. Follow-ups have been at least 6 months and up to 10 years. All studies use the Fukuda criteria for defining chronic fatigue syndrome (CFS).

Results

Preliminary analyses indicated that risk factors for non-recovery from PIF included lower physical fitness, female gender, severity of the acute sickness response, and autonomic dysfunction.

Conclusions

COFFI (https://internationalcoffi.wordpress.com/) is an international collaboration which should be able to answer questions based on pooled data that are not answerable in the individual cohorts. Possible questions may include the following: Do different infectious triggers different PIF syndromes (e.g., CFS vs. irritable bowel syndrome)?; What are longitudinal predictors of PIF and its severity?",2018-01-19 +29800326,eXpression2Kinases (X2K) Web: linking expression signatures to upstream cell signaling networks.,"While gene expression data at the mRNA level can be globally and accurately measured, profiling the activity of cell signaling pathways is currently much more difficult. eXpression2Kinases (X2K) computationally predicts involvement of upstream cell signaling pathways, given a signature of differentially expressed genes. X2K first computes enrichment for transcription factors likely to regulate the expression of the differentially expressed genes. The next step of X2K connects these enriched transcription factors through known protein-protein interactions (PPIs) to construct a subnetwork. The final step performs kinase enrichment analysis on the members of the subnetwork. X2K Web is a new implementation of the original eXpression2Kinases algorithm with important enhancements. X2K Web includes many new transcription factor and kinase libraries, and PPI networks. For demonstration, thousands of gene expression signatures induced by kinase inhibitors, applied to six breast cancer cell lines, are provided for fetching directly into X2K Web. The results are displayed as interactive downloadable vector graphic network images and bar graphs. Benchmarking various settings via random permutations enabled the identification of an optimal set of parameters to be used as the default settings in X2K Web. X2K Web is freely available from http://X2K.cloud.",2018-07-01 +26052348,Why is Tanimoto index an appropriate choice for fingerprint-based similarity calculations?,"

Background

Cheminformaticians are equipped with a very rich toolbox when carrying out molecular similarity calculations. A large number of molecular representations exist, and there are several methods (similarity and distance metrics) to quantify the similarity of molecular representations. In this work, eight well-known similarity/distance metrics are compared on a large dataset of molecular fingerprints with sum of ranking differences (SRD) and ANOVA analysis. The effects of molecular size, selection methods and data pretreatment methods on the outcome of the comparison are also assessed.

Results

A supplier database (https://mcule.com/) was used as the source of compounds for the similarity calculations in this study. A large number of datasets, each consisting of one hundred compounds, were compiled, molecular fingerprints were generated and similarity values between a randomly chosen reference compound and the rest were calculated for each dataset. Similarity metrics were compared based on their ranking of the compounds within one experiment (one dataset) using sum of ranking differences (SRD), while the results of the entire set of experiments were summarized on box and whisker plots. Finally, the effects of various factors (data pretreatment, molecule size, selection method) were evaluated with analysis of variance (ANOVA).

Conclusions

This study complements previous efforts to examine and rank various metrics for molecular similarity calculations. Here, however, an entirely general approach was taken to neglect any a priori knowledge on the compounds involved, as well as any bias introduced by examining only one or a few specific scenarios. The Tanimoto index, Dice index, Cosine coefficient and Soergel distance were identified to be the best (and in some sense equivalent) metrics for similarity calculations, i.e. these metrics could produce the rankings closest to the composite (average) ranking of the eight metrics. The similarity metrics derived from Euclidean and Manhattan distances are not recommended on their own, although their variability and diversity from other similarity metrics might be advantageous in certain cases (e.g. for data fusion). Conclusions are also drawn regarding the effects of molecule size, selection method and data pretreatment on the ranking behavior of the studied metrics. Graphical AbstractA visual summary of the comparison of similarity metrics with sum of ranking differences (SRD).",2015-05-20 +25632109,"PhenoMiner: a quantitative phenotype database for the laboratory rat, Rattus norvegicus. Application in hypertension and renal disease. ","Rats have been used extensively as animal models to study physiological and pathological processes involved in human diseases. Numerous rat strains have been selectively bred for certain biological traits related to specific medical interests. Recently, the Rat Genome Database (http://rgd.mcw.edu) has initiated the PhenoMiner project to integrate quantitative phenotype data from the PhysGen Program for Genomic Applications and the National BioResource Project in Japan as well as manual annotations from biomedical literature. PhenoMiner, the search engine for these integrated phenotype data, facilitates mining of data sets across studies by searching the database with a combination of terms from four different ontologies/vocabularies (Rat Strain Ontology, Clinical Measurement Ontology, Measurement Method Ontology and Experimental Condition Ontology). In this study, salt-induced hypertension was used as a model to retrieve blood pressure records of Brown Norway, Fawn-Hooded Hypertensive (FHH) and Dahl salt-sensitive (SS) rat strains. The records from these three strains served as a basis for comparing records from consomic/congenic/mutant offspring derived from them. We examined the cardiovascular and renal phenotypes of consomics derived from FHH and SS, and of SS congenics and mutants. The availability of quantitative records across laboratories in one database, such as these provided by PhenoMiner, can empower researchers to make the best use of publicly available data. Database URL: http://rgd.mcw.edu.",2015-01-28 +29788182,GlobAl Distribution of GEnetic Traits (GADGET) web server: polygenic trait scores worldwide.,"Human populations from around the world show striking phenotypic variation across a wide variety of traits. Genome-wide association studies (GWAS) are used to uncover genetic variants that influence the expression of heritable human traits; accordingly, population-specific distributions of GWAS-implicated variants may shed light on the genetic basis of human phenotypic diversity. With this in mind, we developed the GlobAl Distribution of GEnetic Traits web server (GADGET http://gadget.biosci.gatech.edu). The GADGET web server provides users with a dynamic visual platform for exploring the relationship between worldwide genetic diversity and the genetic architecture underlying numerous human phenotypes. GADGET integrates trait-implicated single nucleotide polymorphisms (SNPs) from GWAS, with population genetic data from the 1000 Genomes Project, to calculate genome-wide polygenic trait scores (PTS) for 818 phenotypes in 2504 individual genomes. Population-specific distributions of PTS are shown for 26 human populations across 5 continental population groups, with traits ordered based on the extent of variation observed among populations. Users of GADGET can also upload custom trait SNP sets to visualize global PTS distributions for their own traits of interest.",2018-07-01 +29949969,Versatile genome assembly evaluation with QUAST-LG.,"

Motivation

The emergence of high-throughput sequencing technologies revolutionized genomics in early 2000s. The next revolution came with the era of long-read sequencing. These technological advances along with novel computational approaches became the next step towards the automatic pipelines capable to assemble nearly complete mammalian-size genomes.

Results

In this manuscript, we demonstrate performance of the state-of-the-art genome assembly software on six eukaryotic datasets sequenced using different technologies. To evaluate the results, we developed QUAST-LG-a tool that compares large genomic de novo assemblies against reference sequences and computes relevant quality metrics. Since genomes generally cannot be reconstructed completely due to complex repeat patterns and low coverage regions, we introduce a concept of upper bound assembly for a given genome and set of reads, and compute theoretical limits on assembly correctness and completeness. Using QUAST-LG, we show how close the assemblies are to the theoretical optimum, and how far this optimum is from the finished reference.

Availability and implementation

http://cab.spbu.ru/software/quast-lg.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +29718510,ProTox-II: a webserver for the prediction of toxicity of chemicals.,"Advancement in the field of computational research has made it possible for the in silico methods to offer significant benefits to both regulatory needs and requirements for risk assessments, and pharmaceutical industry to assess the safety profile of a chemical. Here, we present ProTox-II that incorporates molecular similarity, pharmacophores, fragment propensities and machine-learning models for the prediction of various toxicity endpoints; such as acute toxicity, hepatotoxicity, cytotoxicity, carcinogenicity, mutagenicity, immunotoxicity, adverse outcomes pathways (Tox21) and toxicity targets. The predictive models are built on data from both in vitro assays (e.g. Tox21 assays, Ames bacterial mutation assays, hepG2 cytotoxicity assays, Immunotoxicity assays) and in vivo cases (e.g. carcinogenicity, hepatotoxicity). The models have been validated on independent external sets and have shown strong performance. ProTox-II provides a freely available webserver for in silico toxicity prediction for toxicologists, regulatory agencies, computational and medicinal chemists, and all users without login at http://tox.charite.de/protox_II. The webserver takes a two-dimensional chemical structure as an input and reports the possible toxicity profile of the chemical for 33 models with confidence scores, and an overall toxicity radar chart along with three most similar compounds with known acute toxicity.",2018-07-01 +29314435,"Mutation update of transcription factor genes FOXE3, HSF4, MAF, and PITX3 causing cataracts and other developmental ocular defects.","Mutations in the transcription factor genes FOXE3, HSF4, MAF, and PITX3 cause congenital lens defects including cataracts that may be accompanied by defects in other components of the eye or in nonocular tissues. We comprehensively describe here all the variants in FOXE3, HSF4, MAF, and PITX3 genes linked to human developmental defects. A total of 52 variants for FOXE3, 18 variants for HSF4, 20 variants for MAF, and 19 variants for PITX3 identified so far in isolated cases or within families are documented. This effort reveals FOXE3, HSF4, MAF, and PITX3 to have 33, 16, 18, and 7 unique causal mutations, respectively. Loss-of-function mutant animals for these genes have served to model the pathobiology of the associated human defects, and we discuss the currently known molecular function of these genes, particularly with emphasis on their role in ocular development. Finally, we make the detailed FOXE3, HSF4, MAF, and PITX3 variant information available in the Leiden Online Variation Database (LOVD) platform at https://www.LOVD.nl/FOXE3, https://www.LOVD.nl/HSF4, https://www.LOVD.nl/MAF, and https://www.LOVD.nl/PITX3. Thus, this article informs on key variants in transcription factor genes linked to cataract, aphakia, corneal opacity, glaucoma, microcornea, microphthalmia, anterior segment mesenchymal dysgenesis, and Ayme-Gripp syndrome, and facilitates their access through Web-based databases.",2018-01-16 +25889518,miREC: a database of miRNAs involved in the development of endometrial cancer.,"

Background

Endometrial cancer (EC) is the most frequently diagnosed gynecological malignancy and the fourth most common cancer diagnosis overall among women. As with many other forms of cancer, it has been shown that certain miRNAs are differentially expressed in EC and these miRNAs are believed to play important roles as regulators of processes involved in the development of the disease. With the rapidly growing number of studies of miRNA expression in EC, there is a need to organize the data, combine the findings from experimental studies of EC with information from various miRNA databases, and make the integrated information easily accessible for the EC research community.

Findings

The miREC database is an organized collection of data and information about miRNAs shown to be differentially expressed in EC. The database can be used to map connections between miRNAs and their target genes in order to identify specific miRNAs that are potentially important for the development of EC. The aim of the miREC database is to integrate all available information about miRNAs and target genes involved in the development of endometrial cancer, and to provide a comprehensive, up-to-date, and easily accessible source of knowledge regarding the role of miRNAs in the development of EC. Database URL: http://www.mirecdb.org .

Conclusions

Several databases have been published that store information about all miRNA targets that have been predicted or experimentally verified to date. It would be a time-consuming task to navigate between these different data sources and literature to gather information about a specific disease, such as endometrial cancer. The miREC database is a specialized data repository that, in addition to miRNA target information, keeps track of the differential expression of genes and miRNAs potentially involved in endometrial cancer development. By providing flexible search functions it becomes easy to search for EC-associated genes and miRNAs from different starting points, such as differential expression and genomic loci (based on genomic aberrations).",2015-03-28 +23180763,The Standard European Vector Architecture (SEVA): a coherent platform for the analysis and deployment of complex prokaryotic phenotypes.,"The 'Standard European Vector Architecture' database (SEVA-DB, http://seva.cnb.csic.es) was conceived as a user-friendly, web-based resource and a material clone repository to assist in the choice of optimal plasmid vectors for de-constructing and re-constructing complex prokaryotic phenotypes. The SEVA-DB adopts simple design concepts that facilitate the swapping of functional modules and the extension of genome engineering options to microorganisms beyond typical laboratory strains. Under the SEVA standard, every DNA portion of the plasmid vectors is minimized, edited for flaws in their sequence and/or functionality, and endowed with physical connectivity through three inter-segment insulators that are flanked by fixed, rare restriction sites. Such a scaffold enables the exchangeability of multiple origins of replication and diverse antibiotic selection markers to shape a frame for their further combination with a large variety of cargo modules that can be used for varied end-applications. The core collection of constructs that are available at the SEVA-DB has been produced as a starting point for the further expansion of the formatted vector platform. We argue that adoption of the SEVA format can become a shortcut to fill the phenomenal gap between the existing power of DNA synthesis and the actual engineering of predictable and efficacious bacteria.",2012-11-23 +29790989,"The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2018 update.","Galaxy (homepage: https://galaxyproject.org, main public server: https://usegalaxy.org) is a web-based scientific analysis platform used by tens of thousands of scientists across the world to analyze large biomedical datasets such as those found in genomics, proteomics, metabolomics and imaging. Started in 2005, Galaxy continues to focus on three key challenges of data-driven biomedical science: making analyses accessible to all researchers, ensuring analyses are completely reproducible, and making it simple to communicate analyses so that they can be reused and extended. During the last two years, the Galaxy team and the open-source community around Galaxy have made substantial improvements to Galaxy's core framework, user interface, tools, and training materials. Framework and user interface improvements now enable Galaxy to be used for analyzing tens of thousands of datasets, and >5500 tools are now available from the Galaxy ToolShed. The Galaxy community has led an effort to create numerous high-quality tutorials focused on common types of genomic analyses. The Galaxy developer and user communities continue to grow and be integral to Galaxy's development. The number of Galaxy public servers, developers contributing to the Galaxy framework and its tools, and users of the main Galaxy server have all increased substantially.",2018-07-01 +29335486,High-resolution TADs reveal DNA sequences underlying genome organization in flies.,"Despite an abundance of new studies about topologically associating domains (TADs), the role of genetic information in TAD formation is still not fully understood. Here we use our software, HiCExplorer (hicexplorer.readthedocs.io) to annotate >2800 high-resolution (570 bp) TAD boundaries in Drosophila melanogaster. We identify eight DNA motifs enriched at boundaries, including a motif bound by the M1BP protein, and two new boundary motifs. In contrast to mammals, the CTCF motif is only enriched on a small fraction of boundaries flanking inactive chromatin while most active boundaries contain the motifs bound by the M1BP or Beaf-32 proteins. We demonstrate that boundaries can be accurately predicted using only the motif sequences at open chromatin sites. We propose that DNA sequence guides the genome architecture by allocation of boundary proteins in the genome. Finally, we present an interactive online database to access and explore the spatial organization of fly, mouse and human genomes, available at http://chorogenome.ie-freiburg.mpg.de .",2018-01-15 +29334821,Development and validation of a facial expression database based on the dimensional and categorical model of emotions.,"The present study describes the development and validation of a facial expression database comprising five different horizontal face angles in dynamic and static presentations. The database includes twelve expression types portrayed by eight Japanese models. This database was inspired by the dimensional and categorical model of emotions: surprise, fear, sadness, anger with open mouth, anger with closed mouth, disgust with open mouth, disgust with closed mouth, excitement, happiness, relaxation, sleepiness, and neutral (static only). The expressions were validated using emotion classification and Affect Grid rating tasks [Russell, Weiss, & Mendelsohn, 1989. Affect Grid: A single-item scale of pleasure and arousal. Journal of Personality and Social Psychology, 57(3), 493-502]. The results indicate that most of the expressions were recognised as the intended emotions and could systematically represent affective valence and arousal. Furthermore, face angle and facial motion information influenced emotion classification and valence and arousal ratings. Our database will be available online at the following URL. https://www.dh.aist.go.jp/database/face2017/ .",2018-01-15 +25392426,Helminth.net: expansions to Nematode.net and an introduction to Trematode.net.,"Helminth.net (http://www.helminth.net) is the new moniker for a collection of databases: Nematode.net and Trematode.net. Within this collection we provide services and resources for parasitic roundworms (nematodes) and flatworms (trematodes), collectively known as helminths. For over a decade we have provided resources for studying nematodes via our veteran site Nematode.net (http://nematode.net). In this article, (i) we provide an update on the expansions of Nematode.net that hosts omics data from 84 species and provides advanced search tools to the broad scientific community so that data can be mined in a useful and user-friendly manner and (ii) we introduce Trematode.net, a site dedicated to the dissemination of data from flukes, flatworm parasites of the class Trematoda, phylum Platyhelminthes. Trematode.net is an independent component of Helminth.net and currently hosts data from 16 species, with information ranging from genomic, functional genomic data, enzymatic pathway utilization to microbiome changes associated with helminth infections. The databases' interface, with a sophisticated query engine as a backbone, is intended to allow users to search for multi-factorial combinations of species' omics properties. This report describes updates to Nematode.net since its last description in NAR, 2012, and also introduces and presents its new sibling site, Trematode.net.",2014-11-11 +29062930,The secondary metabolite bioinformatics portal: Computational tools to facilitate synthetic biology of secondary metabolite production.,"Natural products are among the most important sources of lead molecules for drug discovery. With the development of affordable whole-genome sequencing technologies and other 'omics tools, the field of natural products research is currently undergoing a shift in paradigms. While, for decades, mainly analytical and chemical methods gave access to this group of compounds, nowadays genomics-based methods offer complementary approaches to find, identify and characterize such molecules. This paradigm shift also resulted in a high demand for computational tools to assist researchers in their daily work. In this context, this review gives a summary of tools and databases that currently are available to mine, identify and characterize natural product biosynthesis pathways and their producers based on 'omics data. A web portal called Secondary Metabolite Bioinformatics Portal (SMBP at http://www.secondarymetabolites.org) is introduced to provide a one-stop catalog and links to these bioinformatics resources. In addition, an outlook is presented how the existing tools and those to be developed will influence synthetic biology approaches in the natural products field.",2016-02-05 +26129639,Amoeba-Inspired Heuristic Search Dynamics for Exploring Chemical Reaction Paths.,"We propose a nature-inspired model for simulating chemical reactions in a computationally resource-saving manner. The model was developed by extending our previously proposed heuristic search algorithm, called ""AmoebaSAT [Aono et al. 2013],"" which was inspired by the spatiotemporal dynamics of a single-celled amoeboid organism that exhibits sophisticated computing capabilities in adapting to its environment efficiently [Zhu et al. 2013]. AmoebaSAT is used for solving an NP-complete combinatorial optimization problem [Garey and Johnson 1979], ""the satisfiability problem,"" and finds a constraint-satisfying solution at a speed that is dramatically faster than one of the conventionally known fastest stochastic local search methods [Iwama and Tamaki 2004] for a class of randomly generated problem instances [ http://www.cs.ubc.ca/~hoos/5/benchm.html ]. In cases where the problem has more than one solution, AmoebaSAT exhibits dynamic transition behavior among a variety of the solutions. Inheriting these features of AmoebaSAT, we formulate ""AmoebaChem,"" which explores a variety of metastable molecules in which several constraints determined by input atoms are satisfied and generates dynamic transition processes among the metastable molecules. AmoebaChem and its developed forms will be applied to the study of the origins of life, to discover reaction paths for which expected or unexpected organic compounds may be formed via unknown unstable intermediates and to estimate the likelihood of each of the discovered paths.",2015-07-01 +29896567,"Genomics of the Uncultivated, Periodontitis-Associated Bacterium Tannerella sp. BU045 (Oral Taxon 808). ","Despite decades of research into the human oral microbiome, many species remain uncultivated. The technique of single-cell whole-genome amplification and sequencing provides a means of deriving genome sequences for species that can be informative on biological function and suggest pathways to cultivation. Tannerella forsythia has long been known to be highly associated with chronic periodontitis and to cause periodontitis-like symptoms in experimental animals, and Tannerella sp. BU045 (human oral taxon 808) is an uncultivated relative of this organism. In this work, we extend our previous sequencing of the Tannerella sp. BU063 (human oral taxon 286) genome by sequencing amplified genomes from 11 cells of Tannerella sp. BU045, including 3 genomes that are at least 90% complete. Tannerella sp. BU045 is more closely related to Tannerella sp. BU063 than to T. forsythia by gene content and average nucleotide identity. However, two independent data sets of association with periodontitis, one based on 16S rRNA gene abundance and the other based on gene expression in a metatranscriptomic data set, show that Tannerella sp. BU045 is more highly associated with disease than Tannerella sp. BU063. Comparative genomics shows genes and functions that are shared or unique to the different species, which may direct further research of the pathogenesis of chronic periodontitis. IMPORTANCE Periodontitis (gum disease) affects 47% of adults over 30 in the United States (P. I. Eke, B. A. Dye, L. Wei, G. O. Thornton-Evans, R. J. Genco, et al., J Dent Res 91:914-920, 2012), and it cost between $39 and $396 billion worldwide in 2015 (A. J. Righolt, M. Jevdjevic, W. Marcenes, and S. Listl, J Dent Res, 17 January 2018, https://doi.org/10.1177/0022034517750572). Many bacteria associated with the disease are known only by the DNA sequence of their 16S rRNA gene. In this publication, amplification and sequencing of DNA from single bacterial cells are used to obtain nearly complete genomes of Tannerella sp. BU045, a species of bacteria that is more prevalent in patients with periodontitis than in healthy patients. Comparing the complete genome of this bacterium to genomes of related bacterial species will help to better understand periodontitis and may help to grow this organism in pure culture, which would allow a better understanding of its role in the mouth.",2018-05-01 +28349105,SRM dataset of the proteome of inactivated iron-sulfur cluster biogenesis regulator SufR in Synechocystis sp. PCC 6803.,"This article contains SRM proteomics data related to the research article entitled""Inactivation of iron-sulfur cluster biogenesis regulator SufR in Synechocystis sp. PCC 6803 induces unique iron-dependent protein-level responses"" (L. Vuorijoki, A. Tiwari, P. Kallio, E.M. Aro, 2017) [1]. The data described here provide comprehensive information on the applied SRM assays, together with the results of quantifying 94 Synechocystis sp. PCC 6803 proteins. The data has been deposited in Panorama public (https://panoramaweb.org/labkey/SufR) and in PASSEL under the PASS00765 identifier (http://www.peptideatlas.org/PASS/PASS00765).",2017-03-11 +30417653,Conditioning on Parity in Studies of Perfluoroalkyl Acids and Time to Pregnancy: An Example from the Danish National Birth Cohort.,"

Background

Previous studies have investigated the associations between perfluoroalkyl acids (PFAAs) in women and time to pregnancy (TTP). Inconsistent results may be explained by differences in conditioning on parity.

Objectives

We used causal directed acyclic graphs to illustrate potential confounding related to previous pregnancies and exposure measurement error due to differences in the interpregnancy interval in pregnancy-based studies that include parous women. We exemplified the potential importance of these issues using data from the Danish National Birth Cohort.

Methods

We used discrete time survival models to estimate associations between maternal plasma PFAAs in early pregnancy and TTP in 638 nulliparous and 613 parous women.

Results

PFAA quartiles were not associated with the TTP in nulliparous women. In parous women, higher PFAA quartiles were associated with longer TTP. The strongest associations were estimated for perfluorohexane sulfonate and perfluorooctane sulfonate. PFAA concentrations were higher in women with longer interpregnancy intervals. Accounting for the interpregnancy interval attenuated the estimated associations.

Conclusions

Associations between PFAAs and TTP in parous women may be biased by confounders related to previous pregnancies and exposure measurement error. To avoid these biases, studies that include parous women may need to condition on a) common causes of PFAAs and the TTP in the index pregnancy, b) previous births (a descendant of a collider), c) PFAA levels or common causes of PFAA levels and the TTP in the previous pregnancy (to alleviate collider stratification bias caused by conditioning on previous births), and d) the interpregnancy interval (in pregnancy-based studies). Alternatives would be to restrict studies to nulliparous women or to use toxicokinetic modeling to correct exposure estimates in parous women. These recommendations may be extended to studies of other chemicals with similar toxicokinetic properties. https://doi.org/10.1289/EHP1493.",2018-11-01 +22563442,MycoRRdb: a database of computationally identified regulatory regions within intergenic sequences in mycobacterial genomes.,"The identification of regulatory regions for a gene is an important step towards deciphering the gene regulation. Regulatory regions tend to be conserved under evolution that facilitates the application of comparative genomics to identify such regions. The present study is an attempt to make use of this attribute to identify regulatory regions in the Mycobacterium species followed by the development of a database, MycoRRdb. It consist the regulatory regions identified within the intergenic distances of 25 mycobacterial species. MycoRRdb allows to retrieve the identified intergenic regulatory elements in the mycobacterial genomes. In addition to the predicted motifs, it also allows user to retrieve the Reciprocal Best BLAST Hits across the mycobacterial genomes. It is a useful resource to understand the transcriptional regulatory mechanism of mycobacterial species. This database is first of its kind which specifically addresses cis-regulatory regions and also comprehensive to the mycobacterial species. Database URL: http://mycorrdb.uohbif.in.",2012-04-26 +25754863,"Combining computational models, semantic annotations and simulation experiments in a graph database. ","Model repositories such as the BioModels Database, the CellML Model Repository or JWS Online are frequently accessed to retrieve computational models of biological systems. However, their storage concepts support only restricted types of queries and not all data inside the repositories can be retrieved. In this article we present a storage concept that meets this challenge. It grounds on a graph database, reflects the models' structure, incorporates semantic annotations and simulation descriptions and ultimately connects different types of model-related data. The connections between heterogeneous model-related data and bio-ontologies enable efficient search via biological facts and grant access to new model features. The introduced concept notably improves the access of computational models and associated simulations in a model repository. This has positive effects on tasks such as model search, retrieval, ranking, matching and filtering. Furthermore, our work for the first time enables CellML- and Systems Biology Markup Language-encoded models to be effectively maintained in one database. We show how these models can be linked via annotations and queried. Database URL: https://sems.uni-rostock.de/projects/masymos/",2015-03-08 +25676813,The ARVD/C genetic variants database: 2014 update.,"Arrhythmogenic cardiomyopathy (ACM) is an inherited cardiac disease characterized by myocardial atrophy, fibro-fatty replacement, and a high risk of ventricular arrhythmias that lead to sudden death. In 2009, genetic data from 57 publications were collected in the arrhythmogenic right ventricular dysplasia/cardiomyopathy (ARVD/C) Genetic Variants Database (freeware available at http://www.arvcdatabase.info), which comprised 481 variants in eight ACM-associated genes. In recent years, deep genetic sequencing has increased our knowledge of the genetics of ACM, revealing a large spectrum of nucleotide variations for which pathogenicity needs to be assessed. As of April 20, 2014, we have updated the ARVD/C database into the ARVD/C database to contain more than 1,400 variants in 12 ACM-related genes (PKP2, DSP, DSC2, DSG2, JUP, TGFB3, TMEM43, LMNA, DES, TTN, PLN, CTNNA3) as reported in more than 160 references. Of these, only 411 nucleotide variants have been reported as pathogenic, whereas the significance of the other approximately 1,000 variants is still unknown. This comprehensive collection of ACM genetic data represents a valuable source of information on the spectrum of ACM-associated genes and aims to facilitate the interpretation of genetic data and genetic counseling.",2015-03-19 +29332164,Skewing of the genetic architecture at the ZMYM3 human-specific 5' UTR short tandem repeat in schizophrenia.,"Differential expansion of a number of human short tandem repeats (STRs) at the critical core promoter and 5' untranslated region (UTR) support the hypothesis that at least some of these STRs may provide a selective advantage in human evolution. Following a genome-wide screen of all human protein-coding gene 5' UTRs based on the Ensembl database ( http://www.ensembl.org ), we previously reported that the longest STR in this interval is a (GA)32, which belongs to the X-linked zinc finger MYM-type containing 3 (ZMYM3) gene. In the present study, we analyzed the evolutionary implication of this region across evolution and examined the allele and genotype distribution of the ""exceptionally long"" STR by direct sequencing of 486 Iranian unrelated male subjects consisting of 196 cases of schizophrenia (SCZ) and 290 controls. We found that the ZMYM3 transcript containing the STR is human-specific (ENST00000373998.5). A significant allele variance difference was observed between the cases and controls (Levene's test for equality of variances F = 4.00, p < 0.03). In addition, six alleles were observed in the SCZ patients that were not detected in the control group (""disease-only"" alleles) (mid p exact < 0.0003). Those alleles were at the extreme short and long ends of the allele distribution curve and composed 4% of the genotypes in the SCZ group. In conclusion, we found skewing of the genetic architecture at the ZMYM3 STR in SCZ. Further, we found a bell-shaped distribution of alleles and selection against alleles at the extreme ends of this STR. The ZMYM3 STR sets a prototype, the evolutionary course of which determines the range of alleles in a particular species. Extreme ""disease-only"" alleles and genotypes may change our perspective of adaptive evolution and complex disorders. The ZMYM3 gene ""exceptionally long"" STR should be sequenced in SCZ and other human-specific phenotypes/characteristics.",2018-01-13 +28934726,Application of Adverse Outcome Pathways to U.S. EPA's Endocrine Disruptor Screening Program.,"

Background

The U.S. EPA's Endocrine Disruptor Screening Program (EDSP) screens and tests environmental chemicals for potential effects in estrogen, androgen, and thyroid hormone pathways, and it is one of the only regulatory programs designed around chemical mode of action.

Objectives

This review describes the EDSP's use of adverse outcome pathway (AOP) and toxicity pathway frameworks to organize and integrate diverse biological data for evaluating the endocrine activity of chemicals. Using these frameworks helps to establish biologically plausible links between endocrine mechanisms and apical responses when those end points are not measured in the same assay.

Results

Pathway frameworks can facilitate a weight of evidence determination of a chemical's potential endocrine activity, identify data gaps, aid study design, direct assay development, and guide testing strategies. Pathway frameworks also can be used to evaluate the performance of computational approaches as alternatives for low-throughput and animal-based assays and predict downstream key events. In cases where computational methods can be validated based on performance, they may be considered as alternatives to specific assays or end points.

Conclusions

A variety of biological systems affect apical end points used in regulatory risk assessments, and without mechanistic data, an endocrine mode of action cannot be determined. Because the EDSP was designed to consider mode of action, toxicity pathway and AOP concepts are a natural fit. Pathway frameworks have diverse applications to endocrine screening and testing. An estrogen pathway example is presented, and similar approaches are being used to evaluate alternative methods and develop predictive models for androgen and thyroid pathways. https://doi.org/10.1289/EHP1304.",2017-09-01 +29077792,Unsupervised multiple kernel learning for heterogeneous data integration.,"

Motivation

Recent high-throughput sequencing advances have expanded the breadth of available omics datasets and the integrated analysis of multiple datasets obtained on the same samples has allowed to gain important insights in a wide range of applications. However, the integration of various sources of information remains a challenge for systems biology since produced datasets are often of heterogeneous types, with the need of developing generic methods to take their different specificities into account.

Results

We propose a multiple kernel framework that allows to integrate multiple datasets of various types into a single exploratory analysis. Several solutions are provided to learn either a consensus meta-kernel or a meta-kernel that preserves the original topology of the datasets. We applied our framework to analyse two public multi-omics datasets. First, the multiple metagenomic datasets, collected during the TARA Oceans expedition, was explored to demonstrate that our method is able to retrieve previous findings in a single kernel PCA as well as to provide a new image of the sample structures when a larger number of datasets are included in the analysis. To perform this analysis, a generic procedure is also proposed to improve the interpretability of the kernel PCA in regards with the original data. Second, the multi-omics breast cancer datasets, provided by The Cancer Genome Atlas, is analysed using a kernel Self-Organizing Maps with both single and multi-omics strategies. The comparison of these two approaches demonstrates the benefit of our integration method to improve the representation of the studied biological system.

Availability and implementation

Proposed methods are available in the R package mixKernel, released on CRAN. It is fully compatible with the mixOmics package and a tutorial describing the approach can be found on mixOmics web site http://mixomics.org/mixkernel/.

Contact

jerome.mariette@inra.fr or nathalie.villa-vialaneix@inra.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +30452745,User-Innovated eHealth Solutions for Service Delivery to Older Persons With Hearing Impairment.,"

Purpose

The successful design and innovation of eHealth solutions directly involve end users in the process to seek a better understanding of their needs. This article presents user-innovated eHealth solutions targeting older persons with hearing impairment. Our research question was: What are the key users' needs, expectations, and visions within future hearing rehabilitation service delivery?

Method

We applied a participatory design approach to facilitate the design of future eHealth solutions via focus groups. We involved older persons with hearing impairment (n = 36), significant others (n = 10), and audiologists (n = 8) following 2 methods: (a) human-centered design for interactive systems and (b) user innovation management. Through 3 rounds of focus groups, we facilitated a process progressing from insights and visions for requirements (Phase 1), to app such as paper version wireframes (Phase 2), and to digital prototypes envisioning future eHealth solutions (Phase 3). Each focus group was video-recorded and photographed, resulting in a rich data set that was analyzed through inductive thematic analysis.

Results

The results are presented via (a) a storyboard envisioning future client journeys, (b) 3 key themes for future eHealth solutions, (c) 4 levels of interest and willingness to invest time and effort in digital solutions, and (d) 2 technical savviness types and their different preferences for rehabilitation strategies.

Conclusions

Future eHealth solutions must offer personalized rehabilitation strategies that are appropriate for every person with hearing impairment and their level of technical savviness. Thus, a central requirement is anchoring of digital support in the clients' everyday life situations by facilitating easy access to personalized information, communication, and learning milieus. Moreover, the participants' visions for eHealth solutions call for providing both traditional analogue and digital services.

Supplemental material

https://doi.org/10.23641/asha.7310729.",2018-11-01 +27650316,DBSecSys 2.0: a database of Burkholderia mallei and Burkholderia pseudomallei secretion systems.,"

Background

Burkholderia mallei and B. pseudomallei are the causative agents of glanders and melioidosis, respectively, diseases with high morbidity and mortality rates. B. mallei and B. pseudomallei are closely related genetically; B. mallei evolved from an ancestral strain of B. pseudomallei by genome reduction and adaptation to an obligate intracellular lifestyle. Although these two bacteria cause different diseases, they share multiple virulence factors, including bacterial secretion systems, which represent key components of bacterial pathogenicity. Despite recent progress, the secretion system proteins for B. mallei and B. pseudomallei, their pathogenic mechanisms of action, and host factors are not well characterized.

Results

We previously developed a manually curated database, DBSecSys, of bacterial secretion system proteins for B. mallei. Here, we report an expansion of the database with corresponding information about B. pseudomallei. DBSecSys 2.0 contains comprehensive literature-based and computationally derived information about B. mallei ATCC 23344 and literature-based and computationally derived information about B. pseudomallei K96243. The database contains updated information for 163 B. mallei proteins from the previous database and 61 additional B. mallei proteins, and new information for 281 B. pseudomallei proteins associated with 5 secretion systems, their 1,633 human- and murine-interacting targets, and 2,400 host-B. mallei interactions and 2,286 host-B. pseudomallei interactions. The database also includes information about 13 pathogenic mechanisms of action for B. mallei and B. pseudomallei secretion system proteins inferred from the available literature or computationally. Additionally, DBSecSys 2.0 provides details about 82 virulence attenuation experiments for 52 B. mallei secretion system proteins and 98 virulence attenuation experiments for 61 B. pseudomallei secretion system proteins. We updated the Web interface and data access layer to speed-up users' search of detailed information for orthologous proteins related to secretion systems of the two pathogens.

Conclusions

The updates of DBSecSys 2.0 provide unique capabilities to access comprehensive information about secretion systems of B. mallei and B. pseudomallei. They enable studies and comparisons of corresponding proteins of these two closely related pathogens and their host-interacting partners. The database is available at http://dbsecsys.bhsai.org .",2016-09-20 +30144103,Quantitative Microbial Risk Assessment of Salmonellosis from the Consumption of Australian Pork: Minced Meat from Retail to Burgers Prepared and Consumed at Home.,"Pork burgers could be expected to have an elevated risk of salmonellosis compared to other pork products due to their comminuted nature. A stochastic risk assessment was performed to estimate the risk of salmonellosis from Australian pork burgers and considered risk-affecting factors in the pork supply chain from retail to consumption at home. Conditions modeled included prevalence and concentration of Salmonella in pork mince, time and temperature effects during retail, consumer transport, and domestic storage and the effect of cooking, with the probability of illness from consumption estimated based on these effects. The model was two-dimensional, allowing for the separation of variability and uncertainty. Potential changes to production practices and consumer behaviors were examined through alternative scenarios. Under current conditions in Australia, the mean risk of salmonellosis from consumption of 100 g pork burgers was estimated to be 1.54 × 10 - 8 per serving or one illness per 65,000,000 servings consumed. Under a scenario in which all pork mince consumed is served as pork burgers, and with conservative (i.e., worst-case) assumptions, 0.746 cases of salmonellosis per year from pork burgers in Australia were predicted. Despite the adoption of several conservative assumptions to fill data gaps, it is predicted that pork burgers have a low probability of causing salmonellosis in Australia.",2018-08-24 +28254065,"biomechZoo: An open-source toolbox for the processing, analysis, and visualization of biomechanical movement data.","It is common for biomechanics data sets to contain numerous dependent variables recorded over time, for many subjects, groups, and/or conditions. These data often require standard sorting, processing, and analysis operations to be performed in order to answer research questions. Visualization of these data is also crucial. This manuscript presents biomechZoo, an open-source toolbox that provides tools and graphical user interfaces to help users achieve these goals. The aims of this manuscript are to (1) introduce the main features of the toolbox, including a virtual three-dimensional environment to animate motion data (Director), a data plotting suite (Ensembler), and functions for the computation of three-dimensional lower-limb joint angles, moments, and power and (2) compare these computations to those of an existing validated system. To these ends, the steps required to process and analyze a sample data set via the toolbox are outlined. The data set comprises three-dimensional marker, ground reaction force (GRF), joint kinematic, and joint kinetic data of subjects performing straight walking and 90° turning manoeuvres. Joint kinematics and kinetics processed within the toolbox were found to be similar to outputs from a commercial system. The biomechZoo toolbox represents the work of several years and multiple contributors to provide a flexible platform to examine time-series data sets typical in the movement sciences. The toolbox has previously been used to process and analyse walking, running, and ice hockey data sets, and can integrate existing routines, such as the KineMat toolbox, for additional analyses. The toolbox can help researchers and clinicians new to programming or biomechanics to process and analyze their data through a customizable workflow, while advanced users are encouraged to contribute additional functionality to the project. Students may benefit from using biomechZoo as a learning and research tool. It is hoped that the toolbox can play a role in advancing research in the movement sciences. The biomechZoo m-files, sample data, and help repositories are available online (http://www.biomechzoo.com) under the Apache 2.0 License. The toolbox is supported for Matlab (r2014b or newer, The Mathworks Inc., Natick, USA) for Windows (Microsoft Corp., Redmond, USA) and Mac OS (Apple Inc., Cupertino, USA).",2016-11-18 +27193693,PANTHER-PSEP: predicting disease-causing genetic variants using position-specific evolutionary preservation.,"

Unlabelled

PANTHER-PSEP is a new software tool for predicting non-synonymous genetic variants that may play a causal role in human disease. Several previous variant pathogenicity prediction methods have been proposed that quantify evolutionary conservation among homologous proteins from different organisms. PANTHER-PSEP employs a related but distinct metric based on 'evolutionary preservation': homologous proteins are used to reconstruct the likely sequences of ancestral proteins at nodes in a phylogenetic tree, and the history of each amino acid can be traced back in time from its current state to estimate how long that state has been preserved in its ancestors. Here, we describe the PSEP tool, and assess its performance on standard benchmarks for distinguishing disease-associated from neutral variation in humans. On these benchmarks, PSEP outperforms not only previous tools that utilize evolutionary conservation, but also several highly used tools that include multiple other sources of information as well. For predicting pathogenic human variants, the trace back of course starts with a human 'reference' protein sequence, but the PSEP tool can also be applied to predicting deleterious or pathogenic variants in reference proteins from any of the ∼100 other species in the PANTHER database.

Availability and implementation

PANTHER-PSEP is freely available on the web at http://pantherdb.org/tools/csnpScoreForm.jsp Users can also download the command-line based tool at ftp://ftp.pantherdb.org/cSNP_analysis/PSEP/ CONTACT: pdthomas@usc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-18 +29990255,Meta-Path Methods for Prioritizing Candidate Disease miRNAs.,"MicroRNAs (miRNAs) play critical roles in regulating gene expression at post-transcriptional levels. Numerous experimental studies indicate that alterations and dysregulations in miRNAs are associated with important complex diseases, especially cancers. Predicting potential miRNA-disease association is beneficial not only to explore the pathogenesis of diseases, but also to understand biological processes. In this work, we propose two methods that can effectively predict potential miRNA-disease associations using our reconstructed miRNA and disease similarity networks, which are based on the latest experimental data. We reconstruct a miRNA functional similarity network using the following biological information: the miRNA family information, miRNA cluster information, experimentally valid miRNA-target association and disease-miRNA information. We also reconstruct a disease similarity network using disease functional information and disease semantic information. We present Katz with specific weights and Katz with machine learning, on the comprehensive heterogeneous network. These methods, which achieve corresponding AUC values of 0.897 and 0.919, exhibit performance superior to the existing methods. Comprehensive data networks and reasonable considerations guarantee the high performance of our methods. Contrary to several methods, which cannot work in such situations, the proposed methods also predict associations for diseases without any known related miRNAs. A web service for the download and prediction of relationships between diseases and miRNAs is available at http://lab.malab.cn/soft/MDPredict/.",2017-11-22 +29739908,Updates in the Language of Histoplasma Biodiversity. ,"In a recent article, Sepúlveda et al. (mBio 8:e01339-17, 2017, https://doi.org/10.1128/mBio.01339-17) investigated the genetic structure and evolutionary history of the human pathogen Histoplasma Using whole-genome resequencing data, Sepúlveda et al. found that the Histoplasma genus is composed of at least four strongly differentiated lineages. Their tour de force is to use a smart combination of population genomic approaches to show that the advanced stage of intraspecific divergence observed within Histoplasma does not simply reflect population structure, but instead results from previously unidentified speciation events. The four independently evolving Histoplasma lineages are elevated to the species status and assigned names. The newly described species exhibit medically important differences in phenotype, and these findings, therefore, have important epidemiological implications. This work provides a blueprint for phylogenomic species recognition in fungi, opening the way for a new age of enlightenment in which fungal species are diagnosed using highly discriminatory tools within a hypothesis-testing framework.",2018-05-08 +28375416,"Assault Injury and Homicide Death Profile in Rhode Island, 2004-2014.","Community violence, including assault and homicide, is a public health problem. We provide a profile of assault-related injury and homicide death in Rhode Island to better understand assault/homicide. The 2014 emergency department (ED) visit data, hospital discharge (HD) data, and 2004-2014 Rhode Island Violent Death Reporting System (RIVDRS) data were used for this study. Most assault injuries and homicide deaths were among persons who were 25-44 years old, male, black and Hispanic, living in urban regions, self-pay or public insurance user, and never married. Almost 63% of the homicide decedents tested positive for some illicit substance. Precipitating circumstances include a preceding argument or a conflict, another crime, intimate partner violence, and drug involvement. RIVDRS did not provide an estimate for mental illness related homicides (e.g. command hallucinations). ED, HD, and RIVDRS data can provide a profile of assault injury and homicide death for public health authorities in RI. Interventions need to focus on high-risk populations and areas to effectively prevent assault-related injury and homicide. [Full article available at http://rimed.org/rimedicaljournal-2017-04.asp].",2017-04-03 +25425034,piRBase: a web resource assisting piRNA functional study.,"piRNAs are a class of small RNAs that is most abundantly expressed in the animal germ line. Presently, substantial research is going on to reveal the functions of piRNAs in the epigenetic and post-transcriptional regulation of transposons and genes. A piRNA database for collection, annotation and structuring of these data will be a valuable contribution to the field, and we have therefore developed the piRBase platform which integrates various piRNA-related high-throughput data. piRBase has the largest collection of piRNAs among existing databases, and contains at present 77 million piRNA sequences from nine organisms. Repeat-derived and gene-derived piRNAs, which possibly participate in the regulation of the corresponding elements, have been given particular attention. Furthermore, epigenetic data and reported piRNA targets were also collected. To our knowledge, this is the first piRNA database that systematically integrates epigenetic and post-transcriptional regulation data to support piRNA functional analysis. We believe that piRBase will contribute to a better understanding of the piRNA functions. Database URL: http://www.regulatoryrna.org/database/piRNA/",2014-11-25 +29342452,"Estimated Effect of Temperature on Years of Life Lost: A Retrospective Time-Series Study of Low-, Middle-, and High-Income Regions.","BACKGROUND:Numerous studies have reported a strong association between temperature and mortality. Additional insights can be gained from investigating the effects of temperature on years of life lost (YLL), considering the life expectancy at the time of death. OBJECTIVES:The goal of this work was to assess the association between temperature and YLL at seven low-, middle-, and high-income sites. METHODS:We obtained meteorological and population data for at least nine years from four Health and Demographic Surveillance Sites in Kenya (western Kenya, Nairobi), Burkina Faso (Nouna), and India (Vadu), as well as data from cities in the United States (Philadelphia, Phoenix) and Sweden (Stockholm). A distributed lag nonlinear model was used to estimate the association of daily maximum temperature and daily YLL, lagged 0-14 d. The reference value was set for each site at the temperature with the lowest YLL. RESULTS:Generally, YLL increased with higher temperature, starting day 0. In Nouna, the hottest location, with a minimum YLL temperature at the first percentile, YLL increased consistently with higher temperatures. In Vadu, YLL increased in association with heat, whereas in Nairobi, YLL increased in association with both low and high temperatures. Associations with cold and heat were evident for Phoenix (stronger for heat), Stockholm, and Philadelphia (both stronger for cold). Patterns of associations with mortality were generally similar to those with YLL. CONCLUSIONS:Both high and low temperatures are associated with YLL in high-, middle-, and low-income countries. Policy guidance and health adaptation measures might be improved with more comprehensive indicators of the health burden of high and low temperatures such as YLL. https://doi.org/10.1289/EHP1745.",2018-01-12 +22700703,A decade of Web Server updates at the Bioinformatics Links Directory: 2003-2012.,"The 2012 Bioinformatics Links Directory update marks the 10th special Web Server issue from Nucleic Acids Research. Beginning with content from their 2003 publication, the Bioinformatics Links Directory in collaboration with Nucleic Acids Research has compiled and published a comprehensive list of freely accessible, online tools, databases and resource materials for the bioinformatics and life science research communities. The past decade has exhibited significant growth and change in the types of tools, databases and resources being put forth, reflecting both technology changes and the nature of research over that time. With the addition of 90 web server tools and 12 updates from the July 2012 Web Server issue of Nucleic Acids Research, the Bioinformatics Links Directory at http://bioinformatics.ca/links_directory/ now contains an impressive 134 resources, 455 databases and 1205 web server tools, mirroring the continued activity and efforts of our field.",2012-06-14 +28056767,Bacterial whole genome-based phylogeny: construction of a new benchmarking dataset and assessment of some existing methods.,"

Background

Whole genome sequencing (WGS) is increasingly used in diagnostics and surveillance of infectious diseases. A major application for WGS is to use the data for identifying outbreak clusters, and there is therefore a need for methods that can accurately and efficiently infer phylogenies from sequencing reads. In the present study we describe a new dataset that we have created for the purpose of benchmarking such WGS-based methods for epidemiological data, and also present an analysis where we use the data to compare the performance of some current methods.

Results

Our aim was to create a benchmark data set that mimics sequencing data of the sort that might be collected during an outbreak of an infectious disease. This was achieved by letting an E. coli hypermutator strain grow in the lab for 8 consecutive days, each day splitting the culture in two while also collecting samples for sequencing. The result is a data set consisting of 101 whole genome sequences with known phylogenetic relationship. Among the sequenced samples 51 correspond to internal nodes in the phylogeny because they are ancestral, while the remaining 50 correspond to leaves. We also used the newly created data set to compare three different online available methods that infer phylogenies from whole-genome sequencing reads: NDtree, CSI Phylogeny and REALPHY. One complication when comparing the output of these methods with the known phylogeny is that phylogenetic methods typically build trees where all observed sequences are placed as leafs, even though some of them are in fact ancestral. We therefore devised a method for post processing the inferred trees by collapsing short branches (thus relocating some leafs to internal nodes), and also present two new measures of tree similarity that takes into account the identity of both internal and leaf nodes.

Conclusions

Based on this analysis we find that, among the investigated methods, CSI Phylogeny had the best performance, correctly identifying 73% of all branches in the tree and 71% of all clades. We have made all data from this experiment (raw sequencing reads, consensus whole-genome sequences, as well as descriptions of the known phylogeny in a variety of formats) publicly available, with the hope that other groups may find this data useful for benchmarking and exploring the performance of epidemiological methods. All data is freely available at: https://cge.cbs.dtu.dk/services/evolution_data.php .",2017-01-05 +23190929,Autworks: a cross-disease network biology application for Autism and related disorders.,"

Background

The genetic etiology of autism is heterogeneous. Multiple disorders share genotypic and phenotypic traits with autism. Network based cross-disorder analysis can aid in the understanding and characterization of the molecular pathology of autism, but there are few tools that enable us to conduct cross-disorder analysis and to visualize the results.

Description

We have designed Autworks as a web portal to bring together gene interaction and gene-disease association data on autism to enable network construction, visualization, network comparisons with numerous other related neurological conditions and disorders. Users may examine the structure of gene interactions within a set of disorder-associated genes, compare networks of disorder/disease genes with those of other disorders/diseases, and upload their own sets for comparative analysis.

Conclusions

Autworks is a web application that provides an easy-to-use resource for researchers of varied backgrounds to analyze the autism gene network structure within and between disorders.

Availability

http://autworks.hms.harvard.edu/",2012-11-28 +22638578,iELM--a web server to explore short linear motif-mediated interactions.,"The recent expansion in our knowledge of protein-protein interactions (PPIs) has allowed the annotation and prediction of hundreds of thousands of interactions. However, the function of many of these interactions remains elusive. The interactions of Eukaryotic Linear Motif (iELM) web server provides a resource for predicting the function and positional interface for a subset of interactions mediated by short linear motifs (SLiMs). The iELM prediction algorithm is based on the annotated SLiM classes from the Eukaryotic Linear Motif (ELM) resource and allows users to explore both annotated and user-generated PPI networks for SLiM-mediated interactions. By incorporating the annotated information from the ELM resource, iELM provides functional details of PPIs. This can be used in proteomic analysis, for example, to infer whether an interaction promotes complex formation or degradation. Furthermore, details of the molecular interface of the SLiM-mediated interactions are also predicted. This information is displayed in a fully searchable table, as well as graphically with the modular architecture of the participating proteins extracted from the UniProt and Phospho.ELM resources. A network figure is also presented to aid the interpretation of results. The iELM server supports single protein queries as well as large-scale proteomic submissions and is freely available at http://i.elm.eu.org.",2012-05-25 +28460062,SpartaABC: a web server to simulate sequences with indel parameters inferred using an approximate Bayesian computation algorithm.,"Many analyses for the detection of biological phenomena rely on a multiple sequence alignment as input. The results of such analyses are often further studied through parametric bootstrap procedures, using sequence simulators. One of the problems with conducting such simulation studies is that users currently have no means to decide which insertion and deletion (indel) parameters to choose, so that the resulting sequences mimic biological data. Here, we present SpartaABC, a web server that aims to solve this issue. SpartaABC implements an approximate-Bayesian-computation rejection algorithm to infer indel parameters from sequence data. It does so by extracting summary statistics from the input. It then performs numerous sequence simulations under randomly sampled indel parameters. By computing a distance between the summary statistics extracted from the input and each simulation, SpartaABC retains only parameters behind simulations close to the real data. As output, SpartaABC provides point estimates and approximate posterior distributions of the indel parameters. In addition, SpartaABC allows simulating sequences with the inferred indel parameters. To this end, the sequence simulators, Dawg 2.0 and INDELible were integrated. Using SpartaABC we demonstrate the differences in indel dynamics among three protein-coding genes across mammalian orthologs. SpartaABC is freely available for use at http://spartaabc.tau.ac.il/webserver.",2017-07-01 +,"Biodiversity and biogeography in Madagascar: revision of the endemic flea beetle genus Neodera Duvivier, 1891 with description of 19 new species (Coleoptera, Chrysomelidae, Galerucinae, Alticini)","A revision of the Malagasy flea beetle genus Neodera Duvivier is provided. This genus includes 31 species of which 19 are new to science: Neodera amplicollis sp.n., N. breviantennata sp.n., N. didiensis sp.n., N. diversitarsis sp.n., N. jenisi sp.n., N. kraussi sp.n., N. longicollis sp.n., N. marojejyensis sp.n., N. nigrotibialis sp.n., N. opaca sp.n., N. pauliani sp.n., N. perroti sp.n., N. peyrierasi sp.n., N. reducta sp.n., N. similvadoni sp.n., N. sogai sp.n., N. speronata sp.n., N. straminoides sp.n. and N. vadoni sp.n.. The following two synonymies are proposed: N. fraterna Duvivier = N. amarella Bechyné and N. fulva Jacoby = N. hilari Bechyné. In addition, neotypes for N. emarginata Duvivier and N. fraterna Duvivier, and lectotypes for N. fulva Jacoby, N. imitatrix Duvivier, N. madagassa (Harold), N. picticornis (Harold), N. transversicollis Jacoby and N. varicornis (Harold) are designated. Additionally, the revision comprises a key for the identification of all 31 species considered, some habitus photos, microscope and scanning electron micrographs of many diagnostic characters, including median lobe of aedeagus and spermatheca. A first phylogenetic analysis based on parsimony is also provided. It is used with distributional data to put forward hypotheses about the natural history of the taxon in the light of the species differentiation models proposed thus far for Madagascar. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:CABEB580-7591-4F49-A838-280C6DE0B0F9.",2014-10-01 +25278960,DaVIE: Database for the Visualization and Integration of Epigenetic data.,"One of the challenges in the analysis of large data sets, particularly in a population-based setting, is the ability to perform comparisons across projects. This has to be done in such a way that the integrity of each individual project is maintained, while ensuring that the data are comparable across projects. These issues are beginning to be observed in human DNA methylation studies, as the Illumina 450k platform and next generation sequencing-based assays grow in popularity and decrease in price. This increase in productivity is enabling new insights into epigenetics, but also requires the development of pipelines and software capable of handling the large volumes of data. The specific problems inherent in creating a platform for the storage, comparison, integration, and visualization of DNA methylation data include data storage, algorithm efficiency and ability to interpret the results to derive biological meaning from them. Databases provide a ready-made solution to these issues, but as yet no tools exist that that leverage these advantages while providing an intuitive user interface for interpreting results in a genomic context. We have addressed this void by integrating a database to store DNA methylation data with a web interface to query and visualize the database and a set of libraries for more complex analysis. The resulting platform is called DaVIE: Database for the Visualization and Integration of Epigenetics data. DaVIE can use data culled from a variety of sources, and the web interface includes the ability to group samples by sub-type, compare multiple projects and visualize genomic features in relation to sites of interest. We have used DaVIE to identify patterns of DNA methylation in specific projects and across different projects, identify outlier samples, and cross-check differentially methylated CpG sites identified in specific projects across large numbers of samples. A demonstration server has been setup using GEO data at http://echelon.cmmt.ubc.ca/dbaccess/, with login ""guest"" and password ""guest."" Groups may download and install their own version of the server following the instructions on the project's wiki.",2014-09-18 +25217575,EUROCarbDB(CCRC): a EUROCarbDB node for storing glycomics standard data.,"

Motivation

In the field of glycomics research, several different techniques are used for structure elucidation. Although multiple techniques are often used to increase confidence in structure assignments, most glycomics databases allow storing of only a single type of experimental data. In addition, the methods used to prepare a sample for analysis is seldom recorded making it harder to reproduce the analytical data and results.

Results

We have extended the freely available EUROCarbDB framework to allow the submission of experimental data and the reporting of several orthogonal experimental datasets. The features aim to increase the understandability and reproducibility of the reported data.

Availability and implementation

The installation with the glycan standards is available at http://glycomics.ccrc.uga.edu/eurocarb/. The source code of the project is available at https://code.google.com/p/ucdb/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-12 +,Detecting and Quantifying Low Level Gene Variants in Sanger Sequencing Traces Using the ab1 Peak Reporter Tool,"Automated fluorescent dye-terminator DNA Sequencing using capillary electrophoresis (a.k.a. CE or Sanger sequencing) has been instrumental in the detailed characterization of the human genome and is now widely used as gold standard method for verification of germline mutations. The primary information of the DNA sequencing process is the identification of the nucleotides and of possible sequence variants. A largely unexplored feature of fluorescent Sanger sequencing traces is the quantitative information embedded therein. With the growing need for quantifying somatic mutations in tumor tissue it is desirable to exploit the potential of the quantitative information obtained from sequencing traces. To this end, we have developed the ab1PeakReporter tool that converts Sanger sequencing trace files into comma separated value (.csv) files containing numerical data of peak data characteristics that can be explored and analyzed using conventional spreadsheet software. The web-based tool can be accessed after log-in into a user account at http://apps.lifetechnologies.com/ab1peakreporter. The output file contains the peak height and quality values for each nucleotide and peak height ratios for all 4 bases at any given locus allowing the detection and assessment of subtle changes at any given allele. We demonstrated the utility of this tool by analyzing samples with known amounts of spiked in variant alleles ranging from 2.5%, 5%, 7.5%, 10%, 15% and 25% and show that rare alleles could be convincingly detected around the 5%–7.5% level. In conclusion, enabling the high sensitivity detection of variants occurring at low level using Sanger sequencing will be useful as orthogonal verification method for next generation sequencing projects attempting to detect minor variants.",2014-05-01 +28923831,"International Comparisons of Prevalence, Awareness, and Treatment of Pruritus in People on Hemodialysis.","

Background and objectives

Uremic pruritus in patients on hemodialysis is associated with depression, lower quality of life, and mortality. We studied the prevalence, awareness, and treatment of pruritus to assess how well this important condition is currently managed internationally.

Design, setting, participants, & measurements

Data from 35,452 patients on hemodialysis in up to 17 countries from the Dialysis Outcomes and Practice Patterns Study were analyzed to describe pruritus prevalence from 1996 to 2015. Data from 6256 patients and 268 medical directors in 17 countries in 2012-2015 were analyzed to describe predictors, effects, medical directors' awareness, and treatment of pruritus.

Results

Patients very much or extremely bothered by itching declined from 28% in 1996 to 18% in 2015. In 2012-2015, among patients nearly always or always bothered by itching, pruritus had a major effect on work and social life; 18% used no treatment for pruritus, and 17% did not report itching to health care staff. In total, 69% of medical directors underestimated the prevalence of pruritus in their unit. Managing high serum phosphorus and low Kt/V was ranked as the most important intervention, but no relationship was found between these factors and pruritus; 57% of medical directors used oral antihistamines for first-line chronic treatment of pruritus. Gabapentin was used by 45% as first-, second-, or third-line treatment. Nalfurafine was only used in Japan.

Conclusions

The prevalence of pruritus in people on hemodialysis is decreasing but remains underestimated. Large numbers of patients on hemodialysis with severe pruritus do not receive treatment. There is wide variation in the use of unlicensed medications for the treatment of pruritus. These data provide a benchmark for initiatives to improve the management of uremic pruritus.

Multimedia

This article contains multimedia at https://vimeo.com/49458473This article contains multimedia at vimeo.com/49455976.",2017-09-18 +28678506,Comprehensive Map and Functional Annotation of Human Pituitary and Thyroid Proteome.,"Knowledge about human tissue proteome will provide insights into health organ physiology. To construct a comprehensive data set of human pituitary and thyroid proteins, post-mortem pituitaries and thyroids from 10 normal individuals were used. The pooled samples were prepared using two methods. One part of the sample was processed using 14 high-abundance proteins immunoaffinity column. The other part was directly subjected to digestion. Finally, a total of 7596 proteins in pituitary and 5602 proteins in thyroid with high confidence were identified, with 6623 and 4368 quantified, respectively. A total of 5781 of pituitary and 3178 of thyroid proteins have not been previously reported in the normal pituitary and thyroid proteome. Comparison of pituitary and thyroid proteome indicated that thyroid prefers to be involved in nerve system regeneration and metabolic regulation, while pituitary mainly performs functions of signal transduction and cancer modulation. Our results, for the first time, comprehensively profiled and functionally annotated the largest high-confidence data set of proteome of two important endocrine glands, pituitary and thyroid, which is important for further studies on biomarker identification and molecular mechanisms of pituitary and thyroid disorders. The mapping results can be freely downloaded at http://www.urimarker.com/pituitary/ and http://www.urimarker.com/thyroid/ . The raw data are available via ProteomeXchange with identifier PXD006471.",2017-07-12 +29559822,"Taxonomic annotation of public fungal ITS sequences from the built environment - a report from an April 10-11, 2017 workshop (Aberdeen, UK).","Recent DNA-based studies have shown that the built environment is surprisingly rich in fungi. These indoor fungi - whether transient visitors or more persistent residents - may hold clues to the rising levels of human allergies and other medical and building-related health problems observed globally. The taxonomic identity of these fungi is crucial in such pursuits. Molecular identification of the built mycobiome is no trivial undertaking, however, given the large number of unidentified, misidentified, and technically compromised fungal sequences in public sequence databases. In addition, the sequence metadata required to make informed taxonomic decisions - such as country and host/substrate of collection - are often lacking even from reference and ex-type sequences. Here we report on a taxonomic annotation workshop (April 10-11, 2017) organized at the James Hutton Institute/University of Aberdeen (UK) to facilitate reproducible studies of the built mycobiome. The 32 participants went through public fungal ITS barcode sequences related to the built mycobiome for taxonomic and nomenclatural correctness, technical quality, and metadata availability. A total of 19,508 changes - including 4,783 name changes, 14,121 metadata annotations, and the removal of 99 technically compromised sequences - were implemented in the UNITE database for molecular identification of fungi (https://unite.ut.ee/) and shared with a range of other databases and downstream resources. Among the genera that saw the largest number of changes were Penicillium, Talaromyces, Cladosporium, Acremonium, and Alternaria, all of them of significant importance in both culture-based and culture-independent surveys of the built environment.",2018-01-08 +29311748,Integrated Analysis of Gene Expression Differences in Twins Discordant for Disease and Binary Phenotypes.,"While both genes and environment contribute to phenotype, deciphering environmental contributions to phenotype is a challenge. Furthermore, elucidating how different phenotypes may share similar environmental etiologies also is challenging. One way to identify environmental influences is through a discordant monozygotic (MZ) twin study design. Here, we assessed differential gene expression in MZ discordant twin pairs (affected vs. non-affected) for seven phenotypes, including chronic fatigue syndrome, obesity, ulcerative colitis, major depressive disorder, intermittent allergic rhinitis, physical activity, and intelligence quotient, comparing the spectrum of genes differentially expressed across seven phenotypes individually. Second, we performed meta-analysis for each gene to identify commonalities and differences in gene expression signatures between the seven phenotypes. In our integrative analyses, we found that there may be a common gene expression signature (with small effect sizes) across the phenotypes; however, differences between phenotypes with respect to differentially expressed genes were more prominently featured. Therefore, defining common environmentally induced pathways in phenotypes remains elusive. We make our work accessible by providing a new database (DiscTwinExprDB: http://apps.chiragjpgroup.org/disctwinexprdb/ ) for investigators to study non-genotypic influence on gene expression.",2018-01-08 +27430689,[Association between polymorphism in Vav3 genes and risk of primary prostatic cancer in Chinese Han population].,"

Objective

To study the associations between genetic variations of Vav3 gene and prostate cancer susceptibility.

Methods

Data were collected in a hospital-based and case-control study of 1 015 prostate cancer cases and 1 068 cancer-free controls collecting from a period of time between 2008 and 2012. Based on the online database, NCBI dbSNP (http: //www.ncbi.nlm.nih.gov/projects/SNP) and SNPinfo (http: //snpinfo.niehs.nih.gov/snpfunc.htm). Functional single nucleotide polymorphisms (SNPs) of Vav3 were screened and genotyped, and assessed their associations with risk of prostate cancer by using logistic regression analysis. Furthermore, the associations between SNPs of Vav3 and some clinicopathological parameters were evaluated.

Results

Among the two SNPs investigated, only Vav3 rs12410676 G>A was associated with decreased prostate cancer risk [additive model, OR=0.80 (0.69-0.93), P=0.003; dominant model, OR=0.81 (0.68-0.97), P=0.022; recessive model, OR=0.54 (0.36-0.82), P=0.004]. The combined effect of Vav3 rs8676 G>A and rs12410676 G>A was found as a decreased prostate cancer risk along with the increased variant alleles (P<0.05). Specifically, participants carrying Vav3 rs12410676 AA/AG genotypes were more likely to be at lower prostate cancer risk, compared with participants carrying GG genotypes, in groups of BMI≤25 kg/m(2,) smoking, Gleason>7(4+ 3), and higher invasive prostate cancer. Finally, some positive findings were evidently significant with false positive report probability values at different prior probability levels (0.25, 0.1 and 0.01).

Conclusion

Vav3 SNPs may contribute to the risk of prostate cancer in Eastern Chinese men, but the effect is weak and needs further validation by larger, multicenter and ethnic-based studies.",2016-07-01 +26361227,Structure Based Thermostability Prediction Models for Protein Single Point Mutations with Machine Learning Tools.,"Thermostability issue of protein point mutations is a common occurrence in protein engineering. An application which predicts the thermostability of mutants can be helpful for guiding decision making process in protein design via mutagenesis. An in silico point mutation scanning method is frequently used to find ""hot spots"" in proteins for focused mutagenesis. ProTherm (http://gibk26.bio.kyutech.ac.jp/jouhou/Protherm/protherm.html) is a public database that consists of thousands of protein mutants' experimentally measured thermostability. Two data sets based on two differently measured thermostability properties of protein single point mutations, namely the unfolding free energy change (ddG) and melting temperature change (dTm) were obtained from this database. Folding free energy change calculation from Rosetta, structural information of the point mutations as well as amino acid physical properties were obtained for building thermostability prediction models with informatics modeling tools. Five supervised machine learning methods (support vector machine, random forests, artificial neural network, naïve Bayes classifier, K nearest neighbor) and partial least squares regression are used for building the prediction models. Binary and ternary classifications as well as regression models were built and evaluated. Data set redundancy and balancing, the reverse mutations technique, feature selection, and comparison to other published methods were discussed. Rosetta calculated folding free energy change ranked as the most influential features in all prediction models. Other descriptors also made significant contributions to increasing the accuracy of the prediction models.",2015-09-11 +29118173,mecC-Harboring Methicillin-Resistant Staphylococcus aureus: Hiding in Plain Sight. ,"Previously there was scant data on the performance of laboratory testing to detect mecC-mediated beta-lactam resistance in Staphylococcus aureus Kriegeskorte and colleagues (J Clin Microbiol 56:e00826-17, 2018, https://doi.org/10.1128/JCM.00826-17) report the performance of various clinical tests for the detection of mecC-harboring methicillin-resistant S. aureus (MRSA), which failed to identify from 0 to 41% of tested mecC-harboring MRSA isolates. Changes in practice and new test development are necessary to address the challenge of mecC-harboring MRSA.",2017-12-26 +34706486,Giant taxon-character matrices II: a response to Laing et al. (2017).,"The trend towards big data analyses in evolutionary biology has been observed in phylogenetics via the assembly of giant datasets composed of genomic and phenotypic data. We recently (Simões et al., 2017. Giant taxon-character matrices: Quality of character constructions remains critical regardless of size. Cladistics 33, 198-219) presented a critique of the phylogenetic character concepts used in current morphological datasets, with the caution that giant datasets did not obviate the empirical requirement of rigor in character construction. Laing et al. (2017. Giant taxon-character matrices: The future of morphological systematics. Cladistics, https://doi.org/10.1111/cla.12197) have since argued that we had 'suggested' that large datasets inherently contain flawed characters, and that we had presented a substandard methodology of phylogenetic analysis. Laing et al. concluded by discussing their approach to phylogenetic signal, total evidence and the inevitability of large datasets. We here reply to Laing et al. by reviewing what we actually wrote regarding dataset size, characters and methodology. We show that Laing et al.'s. central premise is unsupported, thus characterizing a Straw Man argument, and deeply misrepresents our original study. In part two, we discuss total evidence and phylogenetic signal issues raised by Laing et al. that are of major consequence to the appropriate construction of large morphological datasets.",2017-11-19 +28065897,TRFBA: an algorithm to integrate genome-scale metabolic and transcriptional regulatory networks with incorporation of expression data.,"

Motivation

Integration of different biological networks and data-types has been a major challenge in systems biology. The present study introduces the transcriptional regulated flux balance analysis (TRFBA) algorithm that integrates transcriptional regulatory and metabolic models using a set of expression data for various perturbations.

Results

TRFBA considers the expression levels of genes as a new continuous variable and introduces two new linear constraints. The first constraint limits the rate of reaction(s) supported by a metabolic gene using a constant parameter (C) that converts the expression levels to the upper bounds of the reactions. Considering the concept of constraint-based modeling, the second set of constraints correlates the expression level of each target gene with that of its regulating genes. A set of constraints and binary variables was also added to prevent the second set of constraints from overlapping. TRFBA was implemented on Escherichia coli and Saccharomyces cerevisiae models to estimate growth rates under various environmental and genetic perturbations. The error sensitivity to the algorithm parameter was evaluated to find the best value of C. The results indicate a significant improvement in the quantitative prediction of growth in comparison with previously presented algorithms. The robustness of the algorithm to change in the expression data and the regulatory network was tested to evaluate the effect of noisy and incomplete data. Furthermore, the use of added constraints for perturbations without their gene expression profile demonstrates that these constraints can be applied to improve the growth prediction of FBA.

Availability and implementation

TRFBA is implemented in Matlab software and requires COBRA toolbox. Source code is freely available at http://sbme.modares.ac.ir .

Contact

: motamedian@modares.ac.ir.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +25414350,GenBank.,"GenBank(®) (http://www.ncbi.nlm.nih.gov/genbank/) is a comprehensive database that contains publicly available nucleotide sequences for over 300 000 formally described species. These sequences are obtained primarily through submissions from individual laboratories and batch submissions from large-scale sequencing projects, including whole-genome shotgun and environmental sampling projects. Most submissions are made using the web-based BankIt or standalone Sequin programs, and GenBank staff assign accession numbers upon data receipt. Daily data exchange with the European Nucleotide Archive and the DNA Data Bank of Japan ensures worldwide coverage. GenBank is accessible through the NCBI Entrez retrieval system, which integrates data from the major DNA and protein sequence databases along with taxonomy, genome, mapping, protein structure and domain information, and the biomedical journal literature via PubMed. BLAST provides sequence similarity searches of GenBank and other sequence databases. Complete bimonthly releases and daily updates of the GenBank database are available by FTP.",2014-11-20 +27084943,MAGIC-web: a platform for untargeted and targeted N-linked glycoprotein identification.,"MAGIC-web is the first web server, to the best of our knowledge, that performs both untargeted and targeted analyses of mass spectrometry-based glycoproteomics data for site-specific N-linked glycoprotein identification. The first two modules, MAGIC and MAGIC+, are designed for untargeted and targeted analysis, respectively. MAGIC is implemented with our previously proposed novel Y1-ion pattern matching method, which adequately detects Y1- and Y0-ion without prior information of proteins and glycans, and then generates in silico MS(2) spectra that serve as input to a database search engine (e.g. Mascot) to search against a large-scale protein sequence database. On top of that, the newly implemented MAGIC+ allows users to determine glycopeptide sequences using their own protein sequence file. The third module, Reports Integrator, provides the service of combining protein identification results from Mascot and glycan-related information from MAGIC-web to generate a complete site-specific protein-glycan summary report. The last module, Glycan Search, is designed for the users who are interested in finding possible glycan structures with specific numbers and types of monosaccharides. The results from MAGIC, MAGIC+ and Reports Integrator can be downloaded via provided links whereas the annotated spectra and glycan structures can be visualized in the browser. MAGIC-web is accessible from http://ms.iis.sinica.edu.tw/MAGIC-web/index.html.",2016-04-15 +27983673,Using miRNA-Analyzer for the Analysis of miRNA Data. ,"MicroRNAs (miRNAs) are small biological molecules that play an important role during the mechanisms of protein formation. Recent findings have demonstrated that they act as both positive and negative regulators of protein formation. Thus, the investigation of miRNAs, i.e., the determination of their level of expression, has developed a huge interest in the scientific community. One of the leading technologies for extracting miRNA data from biological samples is the miRNA Affymetrix platform. It provides the quantification of the level of expression of the miRNA in a sample, thus enabling the accumulation of data and allowing the determination of relationships among miRNA, genes, and diseases. Unfortunately, there is a lack of a comprehensive platform able to provide all the functions needed for the extraction of information from miRNA data. We here present miRNA-Analyzer, a complete software tool providing primary functionalities for miRNA data analysis. The current version of miRNA-Analyzer wraps the Affymetrix QCTool for the preprocessing of binary data files, and then provides feature selection (the filtering by species and by the associated p-value of preprocessed files). Finally, preprocessed and filtered data are analyzed by the Multiple Experiment Viewer (T-MEV) and Short Time Series Expression Miner (STEM) tools, which are also wrapped into miRNA-Analyzer, thus providing a unique environment for miRNA data analysis. The tool offers a plug-in interface so it is easily extensible by adding other algorithms as plug-ins. Users may download the tool freely for academic use at https://sites.google.com/site/mirnaanalyserproject/d.",2016-12-15 +28968726,GradDock: rapid simulation and tailored ranking functions for peptide-MHC Class I docking.,"

Motivation

The identification of T-cell epitopes has many profound translational applications in the areas of transplantation, disease diagnosis, vaccine/therapeutic protein development and personalized immunotherapy. While data-driven methods have been widely used for the prediction of peptide binders with notable successes, the structural modeling of peptide binding to MHC molecules is crucial for understanding the underlying molecular mechanism of the immunological processes.

Results

We developed GradDock, a structure-based method for the rapid and accurate modeling of peptide binding to MHC Class I (pMHC-I). GradDock explicitly models diverse unbound peptides in vacuo and inserts them into the MHC-I groove through a steered gradient descent with a topological correction process. The simulation process yields diverse structural conformations including native-like peptides. We completely revised the Rosetta score terms and developed a new ranking function specifically for pMHC-I. Using the diverse peptides, a linear programming approach is applied to find the optimal weights for the individual Rosetta score terms. Our examination revealed that a refinement of the dihedral angles and a modification of the repulsion can dramatically improve the modeling quality. GradDock is five-times faster than a Rosetta-based docking approach for pMHC-I. We also demonstrate that the predictive capability of GradDock with the re-weighted Rosetta ranking function is consistently more accurate than the Rosetta-based method with the standard Rosetta score (approximately three-times better for a cross-docking set).

Availability and implementation

GradDock is freely available for academic purposes. The program and the ranking score weights for Rosetta are available at http://bel.kaist.ac.kr/research/GradDock.

Contact

hskim76@kaist.ac.kr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-02-01 +26231432,AlignBucket: a tool to speed up 'all-against-all' protein sequence alignments optimizing length constraints.,"

Motivation

The next-generation sequencing era requires reliable, fast and efficient approaches for the accurate annotation of the ever-increasing number of biological sequences and their variations. Transfer of annotation upon similarity search is a standard approach. The procedure of all-against-all protein comparison is a preliminary step of different available methods that annotate sequences based on information already present in databases. Given the actual volume of sequences, methods are necessary to pre-process data to reduce the time of sequence comparison.

Results

We present an algorithm that optimizes the partition of a large volume of sequences (the whole database) into sets where sequence length values (in residues) are constrained depending on a bounded minimal and expected alignment coverage. The idea is to optimally group protein sequences according to their length, and then computing the all-against-all sequence alignments among sequences that fall in a selected length range. We describe a mathematically optimal solution and we show that our method leads to a 5-fold speed-up in real world cases.

Availability and implementation

The software is available for downloading at http://www.biocomp.unibo.it/∼giuseppe/partitioning.html.

Contact

giuseppe.profiti2@unibo.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-30 +27073839,DiMeX: A Text Mining System for Mutation-Disease Association Extraction.,"The number of published articles describing associations between mutations and diseases is increasing at a fast pace. There is a pressing need to gather such mutation-disease associations into public knowledge bases, but manual curation slows down the growth of such databases. We have addressed this problem by developing a text-mining system (DiMeX) to extract mutation to disease associations from publication abstracts. DiMeX consists of a series of natural language processing modules that preprocess input text and apply syntactic and semantic patterns to extract mutation-disease associations. DiMeX achieves high precision and recall with F-scores of 0.88, 0.91 and 0.89 when evaluated on three different datasets for mutation-disease associations. DiMeX includes a separate component that extracts mutation mentions in text and associates them with genes. This component has been also evaluated on different datasets and shown to achieve state-of-the-art performance. The results indicate that our system outperforms the existing mutation-disease association tools, addressing the low precision problems suffered by most approaches. DiMeX was applied on a large set of abstracts from Medline to extract mutation-disease associations, as well as other relevant information including patient/cohort size and population data. The results are stored in a database that can be queried and downloaded at http://biotm.cis.udel.edu/dimex/. We conclude that this high-throughput text-mining approach has the potential to significantly assist researchers and curators to enrich mutation databases.",2016-04-13 +25355511,"The Rat Genome Database 2015: genomic, phenotypic and environmental variations and disease.","The Rat Genome Database (RGD, http://rgd.mcw.edu) provides the most comprehensive data repository and informatics platform related to the laboratory rat, one of the most important model organisms for disease studies. RGD maintains and updates datasets for genomic elements such as genes, transcripts and increasingly in recent years, sequence variations, as well as map positions for multiple assemblies and sequence information. Functional annotations for genomic elements are curated from published literature, submitted by researchers and integrated from other public resources. Complementing the genomic data catalogs are those associated with phenotypes and disease, including strains, QTL and experimental phenotype measurements across hundreds of strains. Data are submitted by researchers, acquired through bulk data pipelines or curated from published literature. Innovative software tools provide users with an integrated platform to query, mine, display and analyze valuable genomic and phenomic datasets for discovery and enhancement of their own research. This update highlights recent developments that reflect an increasing focus on: (i) genomic variation, (ii) phenotypes and diseases, (iii) data related to the environment and experimental conditions and (iv) datasets and software tools that allow the user to explore and analyze the interactions among these and their impact on disease.",2014-10-29 +29520290,"PlantSize Offers an Affordable, Non-destructive Method to Measure Plant Size and Color in Vitro.","Plant size, shape and color are important parameters of plants, which have traditionally been measured by destructive and time-consuming methods. Non-destructive image analysis is an increasingly popular technology to characterize plant development in time. High throughput automatic phenotyping platforms can simultaneously analyze multiple morphological and physiological parameters of hundreds or thousands of plants. Such platforms are, however, expensive and are not affordable for many laboratories. Moreover, determination of basic parameters is sufficient for most studies. Here we describe a non-invasive method, which simultaneously measures basic morphological and physiological parameters of in vitro cultured plants. Changes of plant size, shape and color is monitored by repeated photography with a commercial digital camera using neutral white background. Images are analyzed with the MatLab-based computer application PlantSize, which simultaneously calculates several parameters including rosette size, convex area, convex ratio, chlorophyll and anthocyanin contents of all plants identified on the image. Numerical data are exported in MS Excel-compatible format. Subsequent data processing provides information on growth rates, chlorophyll and anthocyanin contents. Proof-of-concept validation of the imaging technology was demonstrated by revealing small but significant differences between wild type and transgenic Arabidopsis plants overexpressing the HSFA4A transcription factor or the hsfa4a knockout mutant, subjected to different stress conditions. While HSFA4A overexpression was associated with better growth, higher chlorophyll and lower anthocyanin content in saline conditions, the knockout hsfa4a mutant showed hypersensitivity to various stresses. Morphological differences were revealed by comparing rosette size, shape and color of wild type plants with phytochrome B (phyB-9) mutant. While the technology was developed with Arabidopsis plants, it is suitable to characterize plants of other species including crops, in a simple, affordable and fast way. PlantSize is publicly available (http://www.brc.hu/pub/psize/index.html).",2018-02-22 +26452014,Development of a database for chemical mechanism assignments for volatile organic emissions.,"

Unlabelled

The development of a database for making model species assignments when preparing total organic gas (TOG) emissions input for atmospheric models is described. This database currently has assignments of model species for 12 different gas-phase chemical mechanisms for over 1700 chemical compounds and covers over 3000 chemical categories used in five different anthropogenic TOG profile databases or output by two different biogenic emissions models. This involved developing a unified chemical classification system, assigning compounds to mixtures, assigning model species for the mechanisms to the compounds, and making assignments for unknown, unassigned, and nonvolatile mass. The comprehensiveness of the assignments, the contributions of various types of speciation categories to current profile and total emissions data, inconsistencies with existing undocumented model species assignments, and remaining speciation issues and areas of needed work are also discussed. The use of the system to prepare input for SMOKE, the Speciation Tool, and for biogenic models is described in the supplementary materials. The database, associated programs and files, and a users manual are available online at http://www.cert.ucr.edu/~carter/emitdb .

Implications

Assigning air quality model species to the hundreds of emitted chemicals is a necessary link between emissions data and modeling effects of emissions on air quality. This is not easy and makes it difficult to implement new and more chemically detailed mechanisms in models. If done incorrectly, it is similar to errors in emissions speciation or the chemical mechanism used. Nevertheless, making such assignments is often an afterthought in chemical mechanism development and emissions processing, and existing assignments are usually undocumented and have errors and inconsistencies. This work is designed to address some of these problems.",2015-10-01 +27687799,What is the probability of replicating a statistically significant association in genome-wide association studies?,"The goal of genome-wide association studies (GWASs) is to discover genetic variants associated with diseases/traits. Replication is a common validation method in GWASs. We regard an association as true finding when it shows significance in both primary and replication studies. A question worth pondering is what is the probability of a primary association (i.e. a statistically significant association in the primary study) being validated in the replication study? This article systematically reviews the answers to this question from different points of view. As Bayesian methods can help us integrate out the uncertainty about the underlying effect of the primary association, we will mainly focus on the Bayesian view in this article. We refer the Bayesian replication probability as the replication rate (RR). We further describe an estimation method for RR, which makes use of the summary statistics from the primary study. We can use the estimated RR to determine the sample size of the replication study and to check the consistency between the results of the primary study and those of the replication study. We describe an R-package to estimate and apply RR in GWASs. Simulation and real data experiments show that the estimated RR has good prediction and calibration performance. We also use these data to demonstrate the usefulness of RR. The R-package is available at http://bioinformatics.ust.hk/RRate.html.",2017-11-01 +30206788,[Explaining discrepancies in self-reported quality of life in frail older people: a mixed-methods study].,"Most research on (multidimensional) frailty focuses on deficits and risks of adverse outcomes. However, frail older people can still report positive outcomes, such as a relatively high QoL. In order to develop more positively oriented prevention strategies, this exploratory study aimed (a) to identify characteristics related to QoL among frail older people; and (b) to explain discrepancies between higher and lower levels of QoL, with a specific focus on strengths frail older people with a higher QoL still may have. Quantitative and qualitative data was gathered by means of semi-structured interviews with Flemish community-dwelling, frail older people with higher (n = 16) and lower QoL levels (n = 18). Quantitative analyses showed that frail older people with a higher QoL were older, had lower levels of psychological frailty, and reported higher meaning in life compared to those with a lower QoL. Outcomes of qualitative analysis showed that participants in the high QoL subgroup adapted more effectively to difficulties, had more things in prospect, performed more activities, and were more satisfied with their social network compared to the low QoL subgroup. To conclude, this exploratory study suggests possibilities to promote and improve QoL by strengthening specific resources among frail older people.Please note that an English version of this article has been published in BMC Geriatrics: van der Vorst A, Zijlstra GAR, De Witte N, Vogel RGM, Schols JMGA, Kempen GIJM, D‑SCOPE Consortium. Explaining discrepancies in self-reported quality of life in frail older people: a mixed-methods study. BMC Geriatr. 2017;17(1): 251. https://doi.org/10.1186/s12877-017-0641-y .",2018-09-11 +27153725,NMRPro: an integrated web component for interactive processing and visualization of NMR spectra.,"

Unlabelled

The popularity of using NMR spectroscopy in metabolomics and natural products has driven the development of an array of NMR spectral analysis tools and databases. Particularly, web applications are well used recently because they are platform-independent and easy to extend through reusable web components. Currently available web applications provide the analysis of NMR spectra. However, they still lack the necessary processing and interactive visualization functionalities. To overcome these limitations, we present NMRPro, a web component that can be easily incorporated into current web applications, enabling easy-to-use online interactive processing and visualization. NMRPro integrates server-side processing with client-side interactive visualization through three parts: a python package to efficiently process large NMR datasets on the server-side, a Django App managing server-client interaction, and SpecdrawJS for client-side interactive visualization.

Availability and implementation

Demo and installation instructions are available at http://mamitsukalab.org/tools/nmrpro/

Contact

mohamed@kuicr.kyoto-u.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-26 +29383567,Risk Factors Analysis and Death Prediction in Some Life-Threatening Ailments Using Chi-Square Case-Based Reasoning (χ2 CBR) Model.,"A wealth of data are available within the health care system, however, effective analysis tools for exploring the hidden patterns in these datasets are lacking. To alleviate this limitation, this paper proposes a simple but promising hybrid predictive model by suitably combining the Chi-square distance measurement with case-based reasoning technique. The study presents the realization of an automated risk calculator and death prediction in some life-threatening ailments using Chi-square case-based reasoning (χ2 CBR) model. The proposed predictive engine is capable of reducing runtime and speeds up execution process through the use of critical χ2 distribution value. This work also showcases the development of a novel feature selection method referred to as frequent item based rule (FIBR) method. This FIBR method is used for selecting the best feature for the proposed χ2 CBR model at the preprocessing stage of the predictive procedures. The implementation of the proposed risk calculator is achieved through the use of an in-house developed PHP program experimented with XAMP/Apache HTTP server as hosting server. The process of data acquisition and case-based development is implemented using the MySQL application. Performance comparison between our system, the NBY, the ED-KNN, the ANN, the SVM, the Random Forest and the traditional CBR techniques shows that the quality of predictions produced by our system outperformed the baseline methods studied. The result of our experiment shows that the precision rate and predictive quality of our system in most cases are equal to or greater than 70%. Our result also shows that the proposed system executes faster than the baseline methods studied. Therefore, the proposed risk calculator is capable of providing useful, consistent, faster, accurate and efficient risk level prediction to both the patients and the physicians at any time, online and on a real-time basis.",2018-01-30 +26131021,TaxKB: a knowledge base for new taxane-related drug discovery.,"

Background

Taxanes are naturally occurring compounds which belong to a powerful group of chemotherapeutic drugs with anticancer properties. Their current use, clinical efficacy, and unique mechanism of action indicate their potentiality for cancer drug discovery and development thereby promising to reduce the high economy associated with cancer worldwide. Extensive research has been carried out on taxanes with the aim to combat issues of drug resistance, side effects, limited natural supply, and also to increase the therapeutic index of these molecules. These efforts have led to the isolation of many naturally occurring compounds belonging to this family (more than 350 different kinds), and the synthesis of semisynthetic analogs of the naturally existing molecules (>500), and has also led to the characterization of many (>1000) of them. A web-based database system on clinically exploitable taxanes, providing a link between the structure and the pharmacological property of these molecules could help to reduce the druggability gap for these molecules.

Results

Taxane knowledge base (TaxKB, http://bioinfo.au-kbc.org.in/taxane/Taxkb/), is an online multi-tier relational database that currently holds data on 42 parameters of 250 natural and 503 semisynthetic analogs of taxanes. This database provides researchers with much-needed information necessary for drug development. TaxKB enables the user to search data on the structure, drug-likeness, and physicochemical properties of both natural and synthetic taxanes with a ""General Search"" option in addition to a ""Parameter Specific Search."" It displays 2D structure and allows the user to download the 3D structure (a PDB file) of taxanes that can be viewed with any molecular visualization tool. The ultimate aim of TaxKB is to provide information on Absorption, Distribution, Metabolism, and Excretion/Toxicity (ADME/T) as well as data on bioavailability and target interaction properties of candidate anticancer taxanes, ahead of expensive clinical trials.

Conclusion

This first web-based single-information portal will play a central role and help researchers to move forward in taxane-based cancer drug research.",2015-06-28 +29036497,Forecasting residue-residue contact prediction accuracy.,"

Motivation

Apart from meta-predictors, most of today's methods for residue-residue contact prediction are based entirely on Direct Coupling Analysis (DCA) of correlated mutations in multiple sequence alignments (MSAs). These methods are on average ∼40% correct for the 100 strongest predicted contacts in each protein. The end-user who works on a single protein of interest will not know if predictions are either much more or much less correct than 40%, which is especially a problem if contacts are predicted to steer experimental research on that protein.

Results

We designed a regression model that forecasts the accuracy of residue-residue contact prediction for individual proteins with an average error of 7 percentage points. Contacts were predicted with two DCA methods (gplmDCA and PSICOV). The models were built on parameters that describe the MSA, the predicted secondary structure, the predicted solvent accessibility and the contact prediction scores for the target protein. Results show that our models can be also applied to the meta-methods, which was tested on RaptorX.

Availability and implementation

All data and scripts are available from http://comprec-lin.iiar.pwr.edu.pl/dcaQ/.

Contact

malgorzata.kotulska@pwr.edu.pl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +28220696,"Unconventional Oil and Gas Spills: Risks, Mitigation Priorities, and State Reporting Requirements.","Rapid growth in unconventional oil and gas (UOG) has produced jobs, revenue, and energy, but also concerns over spills and environmental risks. We assessed spill data from 2005 to 2014 at 31 481 UOG wells in Colorado, New Mexico, North Dakota, and Pennsylvania. We found 2-16% of wells reported a spill each year. Median spill volumes ranged from 0.5 m3 in Pennsylvania to 4.9 m3 in New Mexico; the largest spills exceeded 100 m3. Seventy-five to 94% of spills occurred within the first three years of well life when wells were drilled, completed, and had their largest production volumes. Across all four states, 50% of spills were related to storage and moving fluids via flowlines. Reporting rates varied by state, affecting spill rates and requiring extensive time and effort getting data into a usable format. Enhanced and standardized regulatory requirements for reporting spills could improve the accuracy and speed of analyses to identify and prevent spill risks and mitigate potential environmental damage. Transparency for data sharing and analysis will be increasingly important as UOG development expands. We designed an interactive spills data visualization tool ( http://snappartnership.net/groups/hydraulic-fracturing/webapp/spills.html ) to illustrate the value of having standardized, public data.",2017-02-21 +29538399,Generalising better: Applying deep learning to integrate deleteriousness prediction scores for whole-exome SNV studies.,"Many automatic classifiers were introduced to aid inference of phenotypical effects of uncategorised nsSNVs (nonsynonymous Single Nucleotide Variations) in theoretical and medical applications. Lately, several meta-estimators have been proposed that combine different predictors, such as PolyPhen and SIFT, to integrate more information in a single score. Although many advances have been made in feature design and machine learning algorithms used, the shortage of high-quality reference data along with the bias towards intensively studied in vitro models call for improved generalisation ability in order to further increase classification accuracy and handle records with insufficient data. Since a meta-estimator basically combines different scoring systems with highly complicated nonlinear relationships, we investigated how deep learning (supervised and unsupervised), which is particularly efficient at discovering hierarchies of features, can improve classification performance. While it is believed that one should only use deep learning for high-dimensional input spaces and other models (logistic regression, support vector machines, Bayesian classifiers, etc) for simpler inputs, we still believe that the ability of neural networks to discover intricate structure in highly heterogenous datasets can aid a meta-estimator. We compare the performance with various popular predictors, many of which are recommended by the American College of Medical Genetics and Genomics (ACMG), as well as available deep learning-based predictors. Thanks to hardware acceleration we were able to use a computationally expensive genetic algorithm to stochastically optimise hyper-parameters over many generations. Overfitting was hindered by noise injection and dropout, limiting coadaptation of hidden units. Although we stress that this work was not conceived as a tool comparison, but rather an exploration of the possibilities of deep learning application in ensemble scores, our results show that even relatively simple modern neural networks can significantly improve both prediction accuracy and coverage. We provide open-access to our finest model via the web-site: http://score.generesearch.ru/services/badmut/.",2018-03-14 +30675798,Estimating the Effects of PM2.5 on Life Expectancy Using Causal Modeling Methods.,"

Background

Many cohort studies have reported associations between PM2.5 and the hazard of dying, but few have used formal causal modeling methods, estimated marginal effects, or directly modeled the loss of life expectancy.

Objective

Our goal was to directly estimate the effect of PM2.5 on the distribution of life span using causal modeling techniques.

Methods

We derived nonparametric estimates of the distribution of life expectancy as a function of PM2.5 using data from 16,965,154 Medicare beneficiaries in the Northeastern and mid-Atlantic region states (129,341,959 person-years of follow-up and 6,334,905 deaths). We fit separate inverse probability-weighted logistic regressions for each year of age to estimate the risk of dying at that age given the average PM2.5 concentration at each subject's residence ZIP code in the same year, and we used Monte Carlo simulations to estimate confidence intervals.

Results

The estimated mean age at death for a population with an annual average PM2.5 exposure of 12 μg/m3 (the 2012 National Ambient Air Quality Standard) was 0.89 y less (95% CI: 0.88, 0.91) than estimated for a counterfactual PM2.5 exposure of 7.5 μg/m3. In comparison, life expectancy at 65 y of age increased by 0.9 y between 2004 and 2013 in the United States. We estimated that 23.5% of the Medicare population would die before 76 y of age if exposed to PM2.5 at 12 μg/m3 compared with 20.1% if exposed to an annual average of 7.5 μg/m3.

Conclusions

We believe that this is the first study to directly estimate the effect of PM2.5 on the distribution of age at death using causal modeling techniques to control for confounding. We find that reducing PM2.5 concentrations below the 2012 U.S. annual standard would substantially increase life expectancy in the Medicare population. https://doi.org/10.1289/EHP3130.",2018-12-01 +27153724,Cas-Database: web-based genome-wide guide RNA library design for gene knockout screens using CRISPR-Cas9.,"

Motivation

CRISPR-derived RNA guided endonucleases (RGENs) have been widely used for both gene knockout and knock-in at the level of single or multiple genes. RGENs are now available for forward genetic screens at genome scale, but single guide RNA (sgRNA) selection at this scale is difficult.

Results

We develop an online tool, Cas-Database, a genome-wide gRNA library design tool for Cas9 nucleases from Streptococcus pyogenes (SpCas9). With an easy-to-use web interface, Cas-Database allows users to select optimal target sequences simply by changing the filtering conditions. Furthermore, it provides a powerful way to select multiple optimal target sequences from thousands of genes at once for the creation of a genome-wide library. Cas-Database also provides a web application programming interface (web API) for advanced bioinformatics users.

Availability and implementation

Free access at http://www.rgenome.net/cas-database/

Contact

sangsubae@hanyang.ac.kr or jskim01@snu.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-24 +26834843,MetFrag relaunched: incorporating strategies beyond in silico fragmentation.,"

Background

The in silico fragmenter MetFrag, launched in 2010, was one of the first approaches combining compound database searching and fragmentation prediction for small molecule identification from tandem mass spectrometry data. Since then many new approaches have evolved, as has MetFrag itself. This article details the latest developments to MetFrag and its use in small molecule identification since the original publication.

Results

MetFrag has gone through algorithmic and scoring refinements. New features include the retrieval of reference, data source and patent information via ChemSpider and PubChem web services, as well as InChIKey filtering to reduce candidate redundancy due to stereoisomerism. Candidates can be filtered or scored differently based on criteria like occurence of certain elements and/or substructures prior to fragmentation, or presence in so-called ""suspect lists"". Retention time information can now be calculated either within MetFrag with a sufficient amount of user-provided retention times, or incorporated separately as ""user-defined scores"" to be included in candidate ranking. The changes to MetFrag were evaluated on the original dataset as well as a dataset of 473 merged high resolution tandem mass spectra (HR-MS/MS) and compared with another open source in silico fragmenter, CFM-ID. Using HR-MS/MS information only, MetFrag2.2 and CFM-ID had 30 and 43 Top 1 ranks, respectively, using PubChem as a database. Including reference and retention information in MetFrag2.2 improved this to 420 and 336 Top 1 ranks with ChemSpider and PubChem (89 and 71 %), respectively, and even up to 343 Top 1 ranks (PubChem) when combining with CFM-ID. The optimal parameters and weights were verified using three additional datasets of 824 merged HR-MS/MS spectra in total. Further examples are given to demonstrate flexibility of the enhanced features.

Conclusions

In many cases additional information is available from the experimental context to add to small molecule identification, which is especially useful where the mass spectrum alone is not sufficient for candidate selection from a large number of candidates. The results achieved with MetFrag2.2 clearly show the benefit of considering this additional information. The new functions greatly enhance the chance of identification success and have been incorporated into a command line interface in a flexible way designed to be integrated into high throughput workflows. Feedback on the command line version of MetFrag2.2 available at http://c-ruttkies.github.io/MetFrag/ is welcome.",2016-01-29 +30702928,Associations between Coarse Particulate Matter Air Pollution and Cause-Specific Mortality: A Nationwide Analysis in 272 Chinese Cities.,"

Background

Coarse particulate matter with aerodynamic diameter between 2.5 and [Formula: see text] ([Formula: see text]) air pollution is a severe environmental problem in developing countries, but its challenges to public health were rarely evaluated.

Objective

We aimed to investigate the associations between day-to-day changes in [Formula: see text] and cause-specific mortality in China.

Methods

We conducted a nationwide daily time-series analysis in 272 main Chinese cities from 2013 to 2015. The associations between [Formula: see text] concentrations and mortality were analyzed in each city using overdispersed generalized additive models. Two-stage Bayesian hierarchical models were used to estimate national and regional average associations, and random-effect models were used to pool city-specific concentration-response curves. Two-pollutant models were adjusted for fine particles with aerodynamic diameter [Formula: see text] ([Formula: see text]) or gaseous pollutants.

Results

Overall, we observed positive and approximately linear concentration-response associations between [Formula: see text] and daily mortality. A [Formula: see text] increase in [Formula: see text] was associated with higher mortality due to nonaccidental causes [0.23%; 95% posterior interval (PI): 0.13, 0.33], cardiovascular diseases (CVDs; 0.25%; 95% PI: 0.13, 0.37), coronary heart disease (CHD; 0.21%; 95% PI: 0.05, 0.36), stroke (0.21%; 95% PI: 0.08, 0.35), respiratory diseases (0.26%; 95% PI: 0.07, 0.46), and chronic obstructive pulmonary disease (COPD; 0.34%; 95% PI: 0.12, 0.57). Associations were stronger for cities in southern vs. northern China, with significant differences for total and cardiovascular mortality. Associations with [Formula: see text] were of similar magnitude to those for [Formula: see text] in both single- and two-pollutant models with mutual adjustment. Associations were robust to adjustment for gaseous pollutants other than nitrogen dioxide and sulfur dioxide. Meta-regression indicated that a larger positive correlation between [Formula: see text] and [Formula: see text] predicted stronger city-specific associations between [Formula: see text] and total mortality.

Conclusions

This analysis showed significant associations between short-term [Formula: see text] exposure and daily nonaccidental and cardiopulmonary mortality based on data from 272 cities located throughout China. Associations appeared to be independent of exposure to [Formula: see text], carbon monoxide, and ozone. https://doi.org/10.1289/EHP2711.",2019-01-01 +30608455,Early versus late clinical outcomes following same day discharge after elective percutaneous coronary intervention: A systematic review and meta-analysis.,"

Background

Nowadays 57% of the cardiologists based in the United Kingdom and 32% of the cardiologists based in Canada utilize same day discharge (SDD) following elective percutaneous coronary intervention (PCI) as a routine practice. In this analysis, we aimed to systematically assess early versus late clinical outcomes following SDD after elective PCI.

Methods

The Medical Literature Analysis and Retrieval System Online, the Cochrane Central, the Resources from the United States National Library of Medicine (www.ClinicalTrials.gov: http://www.clinicaltrials.gov) and EMBASE were carefully searched for relevant English publications which reported early versus late clinical outcomes in patients who were discharged on the same day following revascularization by PCI. Relevant clinical outcomes which were reported in the original studies were considered as the endpoints in this analysis. Odd ratios (OR) and 95% confidence intervals (CI) were used to represent the data, and RevMan 5.3 was used as the statistical software.

Results

A total number of 21, 687 participants (enrollment time period from the year 1998 to the year 2015) were assigned to this analysis. When early versus late clinical outcomes were compared in patients who were discharged on the same day following elective PCI, major adverse cardiac events (OR: 0.75, 95% CI: 0.31-1.79; P = .51), mortality (OR: 0.26, 95% CI: 0.06-1.06; P = .06), stroke (OR: 1.46, 95% CI: 0.72-2.94; P = .29), arrhythmia (OR: 1.30, 95% CI: 0.64-2.63; P = .47), hematoma (OR: 1.00, 95% CI: 0.60-1.66; P = 1.00) and major bleeding from access site (OR: 1.68, 95% CI: 0.22-12.85; P = .62) were not significantly different. Post-procedural myocardial infarction (OR: 2.01, 95% CI: 0.71-5.70; P = .19) and minor bleeding from access site (OR: 6.61, 95% CI: 0.86-50.66; P = .07) were also similarly manifested. However, re-hospitalization was significantly higher in those patients with late clinical outcomes (OR: 0.18, 95% CI: 0.07-0.44; P = .0002).

Conclusions

In those patients who were discharged from the hospital on the same day following elective PCI, no significant difference was observed in the assessed early versus late clinical outcomes. However, late clinical outcomes resulted in a significantly higher rate of re-hospitalization. Larger studies should confirm this hypothesis.",2019-01-01 +27337171,TarNet: An Evidence-Based Database for Natural Medicine Research.,"

Background

Complex diseases seriously threaten human health. Drug discovery approaches based on ""single genes, single drugs, and single targets"" are limited in targeting complex diseases. The development of new multicomponent drugs for complex diseases is imperative, and the establishment of a suitable solution for drug group-target protein network analysis is a key scientific problem that must be addressed. Herbal medicines have formed the basis of sophisticated systems of traditional medicine and have given rise to some key drugs that remain in use today. The search for new molecules is currently taking a different route, whereby scientific principles of ethnobotany and ethnopharmacognosy are being used by chemists in the discovery of different sources and classes of compounds.

Results

In this study, we developed TarNet, a manually curated database and platform of traditional medicinal plants with natural compounds that includes potential bio-target information. We gathered information on proteins that are related to or affected by medicinal plant ingredients and data on protein-protein interactions (PPIs). TarNet includes in-depth information on both plant-compound-protein relationships and PPIs. Additionally, TarNet can provide researchers with network construction analyses of biological pathways and protein-protein interactions (PPIs) associated with specific diseases. Researchers can upload a gene or protein list mapped to our PPI database that has been manually curated to generate relevant networks. Multiple functions are accessible for network topological calculations, subnetwork analyses, pathway analyses, and compound-protein relationships.

Conclusions

TarNet will serve as a useful analytical tool that will provide information on medicinal plant compound-affected proteins (potential targets) and system-level analyses for systems biology and network pharmacology researchers. TarNet is freely available at http://www.herbbol.org:8001/tarnet, and detailed tutorials on the program are also available.",2016-06-23 +24246343,"A Computer-Interpretable Version of the AACE, AME, ETA Medical Guidelines for Clinical Practice for the Diagnosis and Management of Thyroid Nodules.","

Objective

Clinical practice guidelines (CPGs) could have a more consistent and meaningful impact on clinician behavior if they were delivered as electronic algorithms that provide patient-specific advice during patient-physician encounters. We developed a computer-interpretable algorithm for U.S. and European users for the purpose of diagnosis and management of thyroid nodules that is based on the ""AACE, AME, ETA Medical Guidelines for Clinical Practice for the Diagnosis and Management of Thyroid Nodules,"" a narrative, evidence-based CPG.

Methods

We initially employed the guideline-modeling language GuideLine Interchange Format, version 3, known as GLIF3, which emphasizes the organization of a care algorithm into a flowchart. The flowchart specified the sequence of tasks required to evaluate a patient with a thyroid nodule. PROforma, a second guideline-modeling language, was then employed to work with data that are not necessarily obtained in a rigid flowchart sequence. Tallis-a user-friendly web-based ""enactment tool""- was then used as the ""execution engine"" (computer program). This tool records and displays tasks that are done and prompts users to perform the next indicated steps. The development process was iteratively performed by clinical experts and knowledge engineers.

Results

We developed an interactive web-based electronic algorithm that is based on a narrative CPG. This algorithm can be used in a variety of regions, countries, and resource-specific settings.

Conclusion

Electronic guidelines provide patient-specific decision support that could standardize care and potentially improve the quality of care. The ""demonstrator"" electronic thyroid nodule guideline that we describe in this report is available at http://demos.deontics.com/trace-review-app (username: reviewer; password: tnodule1). The demonstrator must be more extensively ""trialed"" before it is recommended for routine use.",2014-04-01 +29617928,DeFine: deep convolutional neural networks accurately quantify intensities of transcription factor-DNA binding and facilitate evaluation of functional non-coding variants.,"The complex system of gene expression is regulated by the cell type-specific binding of transcription factors (TFs) to regulatory elements. Identifying variants that disrupt TF binding and lead to human diseases remains a great challenge. To address this, we implement sequence-based deep learning models that accurately predict the TF binding intensities to given DNA sequences. In addition to accurately classifying TF-DNA binding or unbinding, our models are capable of accurately predicting real-valued TF binding intensities by leveraging large-scale TF ChIP-seq data. The changes in the TF binding intensities between the altered sequence and the reference sequence reflect the degree of functional impact for the variant. This enables us to develop the tool DeFine (Deep learning based Functional impact of non-coding variants evaluator, http://define.cbi.pku.edu.cn) with improved performance for assessing the functional impact of non-coding variants including SNPs and indels. DeFine accurately identifies the causal functional non-coding variants from disease-associated variants in GWAS. DeFine is an effective and easy-to-use tool that facilities systematic prioritization of functional non-coding variants.",2018-06-01 +29961821,SDADB: a functional annotation database of protein structural domains. ,"Annotating functional terms with individual domains is essential for understanding the functions of full-length proteins. We describe SDADB, a functional annotation database for structural domains. SDADB provides associations between gene ontology (GO) terms and SCOP domains calculated with an integrated framework. GO annotations are assigned probabilities of being correct, which are estimated with a Bayesian network by taking advantage of structural neighborhood mappings, SCOP-InterPro domain mapping information, position-specific scoring matrices (PSSMs) and sequence homolog features, with the most substantial contribution coming from high-coverage structure-based domain-protein mappings. The domain-protein mappings are computed using large-scale structure alignment. SDADB contains ontological terms with probabilistic scores for more than 214 000 distinct SCOP domains. It also provides additional features include 3D structure alignment visualization, GO hierarchical tree view, search, browse and download options.Database URL: http://sda.denglab.org.",2018-01-01 +29322439,Analysis of Circular RNAs Using the Web Tool CircInteractome.,"Circular RNAs (circRNAs) are generated through nonlinear back splicing, during which the 5' and 3' ends are covalently joined. Consequently, the lack of free ends makes them very stable compared to their counterpart linear RNAs. By selectively interacting with microRNAs and RNA-binding proteins (RBPs), circRNAs have been shown to influence gene expression programs. We designed a web tool, CircInteractome, in order to (1) explore potential interactions of circRNAs with RBPs, (2) design specific divergent primers to detect circRNAs, (3) study tissue- and cell-specific circRNAs, (4) identify gene-specific circRNAs, (5) explore potential miRNAs interacting with circRNAs, and (6) design specific siRNAs to silence circRNAs. Here, we review the CircInteractome tool and explain recent updates to the site. The database is freely accessible at http://circinteractome.nia.nih.gov .",2018-01-01 +30053269,PITDB: a database of translated genomic elements.,"PITDB is a freely available database of translated genomic elements (TGEs) that have been observed in PIT (proteomics informed by transcriptomics) experiments. In PIT, a sample is analyzed using both RNA-seq transcriptomics and proteomic mass spectrometry. Transcripts assembled from RNA-seq reads are used to create a library of sample-specific amino acid sequences against which the acquired mass spectra are searched, permitting detection of any TGE, not just those in canonical proteome databases. At the time of writing, PITDB contains over 74 000 distinct TGEs from four species, supported by more than 600 000 peptide spectrum matches. The database, accessible via http://pitdb.org, provides supporting evidence for each TGE, often from multiple experiments and an indication of the confidence in the TGE's observation and its type, ranging from known protein (exact match to a UniProt protein sequence), through multiple types of protein variant including various splice isoforms, to a putative novel molecule. PITDB's modern web interface allows TGEs to be viewed individually or by species or experiment, and downloaded for further analysis. PITDB is for bench scientists seeking to share their PIT results, for researchers investigating novel genome products in model organisms and for those wishing to construct proteomes for lesser studied species.",2018-01-01 +29961817,LncCeRBase: a database of experimentally validated human competing endogenous long non-coding RNAs. ,"Long non-coding RNAs (lncRNAs) are endogenous molecules longer than 200 nucleotides, and lack coding potential. LncRNAs that interact with microRNAs (miRNAs) are known as a competing endogenous RNAs (ceRNAs) and have the ability to regulate the expression of target genes. The ceRNAs play an important role in the initiation and progression of various cancers. However, until now, there is no a database including a collection of experimentally verified, human ceRNAs. We developed the LncCeRBase database, which encompasses 432 lncRNA-miRNA-mRNA interactions, including 130 lncRNAs, 214 miRNAs and 245 genes from 300 publications. In addition, we compiled the signaling pathways associated with the included lncRNA-miRNA-mRNA interactions as a tool to explore their functions. LncCeRBase is useful for understanding the regulatory mechanisms of lncRNA.Database URL: http://lnccerbase.it1004.com.",2018-01-01 +29492894,DNA Multiple Sequence Alignment Guided by Protein Domains: The MSA-PAD 2.0 Method.,"Multiple sequence alignment (MSA) is a fundamental component in many DNA sequence analyses including metagenomics studies and phylogeny inference. When guided by protein profiles, DNA multiple alignments assume a higher precision and robustness. Here we present details of the use of the upgraded version of MSA-PAD (2.0), which is a DNA multiple sequence alignment framework able to align DNA sequences coding for single/multiple protein domains guided by PFAM or user-defined annotations. MSA-PAD has two alignment strategies, called ""Gene"" and ""Genome,"" accounting for coding domains order and genomic rearrangements, respectively. Novel options were added to the present version, where the MSA can be guided by protein profiles provided by the user. This allows MSA-PAD 2.0 to run faster and to add custom protein profiles sometimes not present in PFAM database according to the user's interest. MSA-PAD 2.0 is currently freely available as a Web application at https://recasgateway.cloud.ba.infn.it/ .",2018-01-01 +29136215,MultitaskProtDB-II: an update of a database of multitasking/moonlighting proteins.,"Multitasking, or moonlighting, is the capability of some proteins to execute two or more biological functions. MultitaskProtDB-II is a database of multifunctional proteins that has been updated. In the previous version, the information contained was: NCBI and UniProt accession numbers, canonical and additional biological functions, organism, monomeric/oligomeric states, PDB codes and bibliographic references. In the present update, the number of entries has been increased from 288 to 694 moonlighting proteins. MultitaskProtDB-II is continually being curated and updated. The new database also contains the following information: GO descriptors for the canonical and moonlighting functions, three-dimensional structure (for those proteins lacking PDB structure, a model was made using Itasser and Phyre), the involvement of the proteins in human diseases (78% of human moonlighting proteins) and whether the protein is a target of a current drug (48% of human moonlighting proteins). These numbers highlight the importance of these proteins for the analysis and explanation of human diseases and target-directed drug design. Moreover, 25% of the proteins of the database are involved in virulence of pathogenic microorganisms, largely in the mechanism of adhesion to the host. This highlights their importance for the mechanism of microorganism infection and vaccine design. MultitaskProtDB-II is available at http://wallace.uab.es/multitaskII.",2018-01-01 +29126174,miRTarBase update 2018: a resource for experimentally validated microRNA-target interactions.,"MicroRNAs (miRNAs) are small non-coding RNAs of ∼ 22 nucleotides that are involved in negative regulation of mRNA at the post-transcriptional level. Previously, we developed miRTarBase which provides information about experimentally validated miRNA-target interactions (MTIs). Here, we describe an updated database containing 422 517 curated MTIs from 4076 miRNAs and 23 054 target genes collected from over 8500 articles. The number of MTIs curated by strong evidence has increased ∼1.4-fold since the last update in 2016. In this updated version, target sites validated by reporter assay that are available in the literature can be downloaded. The target site sequence can extract new features for analysis via a machine learning approach which can help to evaluate the performance of miRNA-target prediction tools. Furthermore, different ways of browsing enhance user browsing specific MTIs. With these improvements, miRTarBase serves as more comprehensively annotated, experimentally validated miRNA-target interactions databases in the field of miRNA related research. miRTarBase is available at http://miRTarBase.mbc.nctu.edu.tw/.",2018-01-01 +29077946,ARED-Plus: an updated and expanded database of AU-rich element-containing mRNAs and pre-mRNAs.,"Here we present an updated version of the AU-Rich Element Database (ARED-Plus) that is freely available at http://brp.kfshrc.edu.sa/ared. AREs are conserved sequence elements that were first discovered in the 3'UTR of mammalian transcripts. Over the past years, we compiled a series of ARE databases that revealed the extent and wide distribution of ARE-containing genes. For this update, we adopted an optimized search algorithm with improved specificity and sensitivity in ARE selection. The designation of the different ARE clusters was simplified by directly correlating the number of the ARE cluster to the number of overlapping AUUUA pentamers. Additionally, the new database was expanded to include genes with intronic AREs (pre-mRNAs) and their characteristics since recent observations reported their abundance and biological significance. Several enhancements were incorporated such as customized column view, additional search options and live search functionalities. The new version includes links to AREsite and AREScore, two related ARE assessment algorithms for further evaluation of the ARE characteristics. ARED-Plus now contains an updated repertoire of AREs in the human transcriptome that may be useful in several research fields.",2018-01-01 +29077896,mirTrans: a resource of transcriptional regulation on microRNAs for human cell lines.,"The cell-specific information of transcriptional regulation on microRNAs (miRNAs) is crucial to the precise understanding of gene regulations in various physiological and pathological processes existed in different tissues and cell types. The database, mirTrans, provides comprehensive information about cell-specific transcription of miRNAs including the transcriptional start sites (TSSs) of miRNAs, transcription factor (TF) to miRNA regulations and miRNA promoter sequences. mirTrans also maps the experimental H3K4me3 and DHS (DNase-I hypersensitive site) marks within miRNA promoters and expressed sequence tags (ESTs) within transcribed regions. The current version of database covers 35 259 TSSs and over 2.3 million TF-miRNA regulations for 1513 miRNAs in a total of 54 human cell lines. These cell lines span most of the biological systems, including circulatory system, digestive system and nervous system. Information for both the intragenic miRNAs and intergenic miRNAs is offered. Particularly, the quality of miRNA TSSs and TF-miRNA regulations is evaluated by literature curation. 23 447 TSS records and 2148 TF-miRNA regulations are supported by special experiments as a result of literature curation. EST coverage is also used to evaluate the accuracy of miRNA TSSs. Interface of mirTrans is friendly designed and convenient to make downloads (http://mcube.nju.edu.cn/jwang/lab/soft/mirtrans/ or http://120.27.239.192/mirtrans/).",2018-01-01 +29069475,APPRIS 2017: principal isoforms for multiple gene sets.,"The APPRIS database (http://appris-tools.org) uses protein structural and functional features and information from cross-species conservation to annotate splice isoforms in protein-coding genes. APPRIS selects a single protein isoform, the 'principal' isoform, as the reference for each gene based on these annotations. A single main splice isoform reflects the biological reality for most protein coding genes and APPRIS principal isoforms are the best predictors of these main proteins isoforms. Here, we present the updates to the database, new developments that include the addition of three new species (chimpanzee, Drosophila melangaster and Caenorhabditis elegans), the expansion of APPRIS to cover the RefSeq gene set and the UniProtKB proteome for six species and refinements in the core methods that make up the annotation pipeline. In addition APPRIS now provides a measure of reliability for individual principal isoforms and updates with each release of the GENCODE/Ensembl and RefSeq reference sets. The individual GENCODE/Ensembl, RefSeq and UniProtKB reference gene sets for six organisms have been merged to produce common sets of splice variants.",2018-01-01 +29036693,ICG: a wiki-driven knowledgebase of internal control genes for RT-qPCR normalization.,"Real-time quantitative PCR (RT-qPCR) has become a widely used method for accurate expression profiling of targeted mRNA and ncRNA. Selection of appropriate internal control genes for RT-qPCR normalization is an elementary prerequisite for reliable expression measurement. Here, we present ICG (http://icg.big.ac.cn), a wiki-driven knowledgebase for community curation of experimentally validated internal control genes as well as their associated experimental conditions. Unlike extant related databases that focus on qPCR primers in model organisms (mainly human and mouse), ICG features harnessing collective intelligence in community integration of internal control genes for a variety of species. Specifically, it integrates a comprehensive collection of more than 750 internal control genes for 73 animals, 115 plants, 12 fungi and 9 bacteria, and incorporates detailed information on recommended application scenarios corresponding to specific experimental conditions, which, collectively, are of great help for researchers to adopt appropriate internal control genes for their own experiments. Taken together, ICG serves as a publicly editable and open-content encyclopaedia of internal control genes and accordingly bears broad utility for reliable RT-qPCR normalization and gene expression characterization in both model and non-model organisms.",2018-01-01 +29036403,CSCD: a database for cancer-specific circular RNAs.,"Circular RNA (circRNA) is a large group of RNA family extensively existed in cells and tissues. High-throughput sequencing provides a way to view circRNAs across different samples, especially in various diseases. However, there is still no comprehensive database for exploring the cancer-specific circRNAs. We collected 228 total RNA or polyA(-) RNA-seq samples from both cancer and normal cell lines, and identified 272 152 cancer-specific circRNAs. A total of 950 962 circRNAs were identified in normal samples only, and 170 909 circRNAs were identified in both tumor and normal samples, which could be further used as non-tumor background. We constructed a cancer-specific circRNA database (CSCD, http://gb.whu.edu.cn/CSCD). To understand the functional effects of circRNAs, we predicted the microRNA response element sites and RNA binding protein sites for each circRNA. We further predicted potential open reading frames to highlight translatable circRNAs. To understand the association between the linear splicing and the back-splicing, we also predicted the splicing events in linear transcripts of each circRNA. As the first comprehensive cancer-specific circRNA database, we believe CSCD could significantly contribute to the research for the function and regulation of cancer-associated circRNAs.",2018-01-01 +28326995,MultiPic: A standardized set of 750 drawings with norms for six European languages.,"Numerous studies in psychology, cognitive neuroscience and psycholinguistics have used pictures of objects as stimulus materials. Currently, authors engaged in cross-linguistic work or wishing to run parallel studies at multiple sites where different languages are spoken must rely on rather small sets of black-and-white or colored line drawings. These sets are increasingly experienced as being too limited. Therefore, we constructed a new set of 750 colored pictures of concrete concepts. This set, MultiPic, constitutes a new valuable tool for cognitive scientists investigating language, visual perception, memory and/or attention in monolingual or multilingual populations. Importantly, the MultiPic databank has been normed in six different European languages (British English, Spanish, French, Dutch, Italian and German). All stimuli and norms are freely available at http://www.bcbl.eu/databases/multipic .",2018-01-01 +30196491,Use of IMGT® Databases and Tools for Antibody Engineering and Humanization.,"IMGT®, the international ImMunoGeneTics information system® ( http://www.imgt.org ), was created in 1989 by Marie-Paule Lefranc (Université de Montpellier and CNRS) to manage the huge diversity of the antigen receptors, immunoglobulins (IG) or antibodies, and T cell receptors (TR). The founding of IMGT® marked the advent of immunoinformatics, which emerged at the interface between immunogenetics and bioinformatics. Standardized sequence and structure analysis of antibody using IMGT® databases and tools allow one to bridge, for the first time, the gap between antibody sequences and three-dimensional (3D) structures. This is achieved through the IMGT Scientific chart rules, based on the IMGT-ONTOLOGY concepts of classification (IMGT gene and allele nomenclature), description (IMGT standardized labels), and numerotation (IMGT unique numbering and IMGT Collier de Perles). IMGT® is acknowledged as the global reference for immunogenetics and immunoinformatics, and its standards are particularly useful for antibody engineering and humanization. IMGT® databases for antibody nucleotide sequences and genes include IMGT/LIGM-DB and IMGT/GENE-DB, respectively, and nucleotide sequence analysis is performed by the IMGT/V-QUEST and IMGT/JunctionAnalysis tools and for NGS by IMGT/HighV-QUEST. In this chapter, we focus on IMGT® databases and tools for amino acid sequences, two-dimensional (2D) and three-dimensional (3D) structures: the IMGT/DomainGapAlign and IMGT Collier de Perles tools and the IMGT/2Dstructure-DB and IMGT/3Dstructure-DB database. IMGT/mAb-DB provides the query interface for monoclonal antibodies (mAb), fusion proteins for immune applications (FPIA), and composite proteins for clinical applications (CPCA) and related proteins of interest (RPI) and links to the proposed and recommended lists of the World Health Organization International Nonproprietary Name (WHO INN) programme, to IMGT/2Dstructure-DB for amino acid sequences, and to IMGT/3Dstructure-DB and its associated tools (IMGT/StructuralQuery, IMGT/DomainSuperimpose) for crystallized antibodies.",2018-01-01 +29688352,OliveNet™: a comprehensive library of compounds from Olea europaea. ,"Accumulated epidemiological, clinical and experimental evidence has indicated the beneficial health effects of the Mediterranean diet, which is typified by the consumption of virgin olive oil (VOO) as a main source of dietary fat. At the cellular level, compounds derived from various olive (Olea europaea), matrices, have demonstrated potent antioxidant and anti-inflammatory effects, which are thought to account, at least in part, for their biological effects. Research efforts are expanding into the characterization of compounds derived from Olea europaea, however, the considerable diversity and complexity of the vast array of chemical compounds have made their precise identification and quantification challenging. As such, only a relatively small subset of olive-derived compounds has been explored for their biological activity and potential health effects to date. Although there is adequate information describing the identification or isolation of olive-derived compounds, these are not easily searchable, especially when attempting to acquire chemical or biological properties. Therefore, we have created the OliveNet™ database containing a comprehensive catalogue of compounds identified from matrices of the olive, including the fruit, leaf and VOO, as well as in the wastewater and pomace accrued during oil production. From a total of 752 compounds, chemical analysis was sufficient for 676 individual compounds, which have been included in the database. The database is curated and comprehensively referenced containing information for the 676 compounds, which are divided into 13 main classes and 47 subclasses. Importantly, with respect to current research trends, the database includes 222 olive phenolics, which are divided into 13 subclasses. To our knowledge, OliveNet™ is currently the only curated open access database with a comprehensive collection of compounds associated with Olea europaea.Database URL: https://www.mccordresearch.com.au.",2018-01-01 +29473531,Diabetic Complications and Insight into Antidiabetic Potentialities of Ethno- Medicinal Plants: A Review.,"

Background

The naturally inspired treatment options for several disease conditions and human-health related disorders such as diabetes mellitus have gained considerable research interest. In this context, naturally occurring plants and herbs with medicinal functionalities have gained special place than ever before in the current medicinal world.

Objective

The objective of this review is to extend the current knowledge in the clinical field related to the diabetic complications. A special focus has also been given to the anti-diabetic potentialities of ethnomedicinal plants.

Method

Herein, we reviewed and compiled salient information from the authentic bibliographic databases including PubMed, Scopus, Elsevier, Springer, Bentham Science and other scientific databases. The patents were searched and reviewed from http://www.freepatentsonline.com.

Results

Diabetes mellitus is a group of metabolic disorders associated with the endocrine system that resulted in hyperglycemic conditions. Metabolic disorders can cause many complications such as neuropathy, retinopathy, nephropathy, ischemic heart disease, stroke, and microangiopathy. Traditional botanical therapies have been used around the world to treat diabetes. Among several medications and different medicines, various herbs are known to cure and control diabetes; also have no side effects. History has shown that medicinal plants have long been used for traditional healing around the world to treat diabetes. More than 800 plants around the world are shown by ethnobotanical information as traditional remedies for the treatment of diabetes. Several parts of these plants have been evaluated and appreciated for hypoglycemic activity. Medicinal plants have been found to be more effective than conventional drug compounds with no/fewer side effects and relatively inexpensive. In this review paper, we have reviewed plants with anti-diabetic and related beneficial medicinal effects.

Conclusion

This review may be helpful for researchers, diabetic patient and decision makers in the field of ethnobotanical sciences. These efforts may also provide treatment to everyone and focus on the role of traditional novel medicine plants that have anti-diabetic abilities.",2018-01-01 +29228298,SeedStor: A Germplasm Information Management System and Public Database.,"SeedStor (https://www.seedstor.ac.uk) acts as the publicly available database for the seed collections held by the Germplasm Resources Unit (GRU) based at the John Innes Centre, Norwich, UK. The GRU is a national capability supported by the Biotechnology and Biological Sciences Research Council (BBSRC). The GRU curates germplasm collections of a range of temperate cereal, legume and Brassica crops and their associated wild relatives, as well as precise genetic stocks, near-isogenic lines and mapping populations. With >35,000 accessions, the GRU forms part of the UK's plant conservation contribution to the Multilateral System (MLS) of the International Treaty for Plant Genetic Resources for Food and Agriculture (ITPGRFA) for wheat, barley, oat and pea. SeedStor is a fully searchable system that allows our various collections to be browsed species by species through to complicated multipart phenotype criteria-driven queries. The results from these searches can be downloaded for later analysis or used to order germplasm via our shopping cart. The user community for SeedStor is the plant science research community, plant breeders, specialist growers, hobby farmers and amateur gardeners, and educationalists. Furthermore, SeedStor is much more than a database; it has been developed to act internally as a Germplasm Information Management System that allows team members to track and process germplasm requests, determine regeneration priorities, handle cost recovery and Material Transfer Agreement paperwork, manage the Seed Store holdings and easily report on a wide range of the aforementioned tasks.",2018-01-01 +29194489,mirDIP 4.1-integrative database of human microRNA target predictions.,"MicroRNAs are important regulators of gene expression, achieved by binding to the gene to be regulated. Even with modern high-throughput technologies, it is laborious and expensive to detect all possible microRNA targets. For this reason, several computational microRNA-target prediction tools have been developed, each with its own strengths and limitations. Integration of different tools has been a successful approach to minimize the shortcomings of individual databases. Here, we present mirDIP v4.1, providing nearly 152 million human microRNA-target predictions, which were collected across 30 different resources. We also introduce an integrative score, which was statistically inferred from the obtained predictions, and was assigned to each unique microRNA-target interaction to provide a unified measure of confidence. We demonstrate that integrating predictions across multiple resources does not cumulate prediction bias toward biological processes or pathways. mirDIP v4.1 is freely available at http://ophid.utoronto.ca/mirDIP/.",2018-01-01 +29145625,mSignatureDB: a database for deciphering mutational signatures in human cancers.,"Cancer is a genetic disease caused by somatic mutations; however, the understanding of the causative biological processes generating these mutations is limited. A cancer genome bears the cumulative effects of mutational processes during tumor development. Deciphering mutational signatures in cancer is a new topic in cancer research. The Wellcome Trust Sanger Institute (WTSI) has categorized 30 reference signatures in the COSMIC database based on the analyses of ∼10 000 sequencing datasets from TCGA and ICGC. Large cohorts and bioinformatics skills are required to perform the same analysis as WTSI. The quantification of known signatures in custom cohorts is not possible under the current framework of the COSMIC database, which motivates us to construct a database for mutational signatures in cancers and make such analyses more accessible to general researchers. mSignatureDB (http://tardis.cgu.edu.tw/msignaturedb) integrates R packages and in-house scripts to determine the contributions of the published signatures in 15 780 individual tumors from 73 TCGA/ICGC cancer projects, making comparison of signature patterns within and between projects become possible. mSignatureDB also allows users to perform signature analysis on their own datasets, quantifying contributions of signatures at sample resolution, which is a unique feature of mSignatureDB not available in other related databases.",2018-01-01 +29126202,ActiveDriverDB: human disease mutations and genome variation in post-translational modification sites of proteins.,"Interpretation of genetic variation is needed for deciphering genotype-phenotype associations, mechanisms of inherited disease, and cancer driver mutations. Millions of single nucleotide variants (SNVs) in human genomes are known and thousands are associated with disease. An estimated 21% of disease-associated amino acid substitutions corresponding to missense SNVs are located in protein sites of post-translational modifications (PTMs), chemical modifications of amino acids that extend protein function. ActiveDriverDB is a comprehensive human proteo-genomics database that annotates disease mutations and population variants through the lens of PTMs. We integrated >385,000 published PTM sites with ∼3.6 million substitutions from The Cancer Genome Atlas (TCGA), the ClinVar database of disease genes, and human genome sequencing projects. The database includes site-specific interaction networks of proteins, upstream enzymes such as kinases, and drugs targeting these enzymes. We also predicted network-rewiring impact of mutations by analyzing gains and losses of kinase-bound sequence motifs. ActiveDriverDB provides detailed visualization, filtering, browsing and searching options for studying PTM-associated mutations. Users can upload mutation datasets interactively and use our application programming interface in pipelines. Integrative analysis of mutations and PTMs may help decipher molecular mechanisms of phenotypes and disease, as exemplified by case studies of TP53, BRCA2 and VHL. The open-source database is available at https://www.ActiveDriverDB.org.",2018-01-01 +29112718,Rfam 13.0: shifting to a genome-centric resource for non-coding RNA families.,"The Rfam database is a collection of RNA families in which each family is represented by a multiple sequence alignment, a consensus secondary structure, and a covariance model. In this paper we introduce Rfam release 13.0, which switches to a new genome-centric approach that annotates a non-redundant set of reference genomes with RNA families. We describe new web interface features including faceted text search and R-scape secondary structure visualizations. We discuss a new literature curation workflow and a pipeline for building families based on RNAcentral. There are 236 new families in release 13.0, bringing the total number of families to 2687. The Rfam website is http://rfam.org.",2018-01-01 +29077884,HEDD: Human Enhancer Disease Database.,"Enhancers, as specialized genomic cis-regulatory elements, activate transcription of their target genes and play an important role in pathogenesis of many human complex diseases. Despite recent systematic identification of them in the human genome, currently there is an urgent need for comprehensive annotation databases of human enhancers with a focus on their disease connections. In response, we built the Human Enhancer Disease Database (HEDD) to facilitate studies of enhancers and their potential roles in human complex diseases. HEDD currently provides comprehensive genomic information for ∼2.8 million human enhancers identified by ENCODE, FANTOM5 and RoadMap with disease association scores based on enhancer-gene and gene-disease connections. It also provides Web-based analytical tools to visualize enhancer networks and score enhancers given a set of selected genes in a specific gene network. HEDD is freely accessible at http://zdzlab.einstein.yu.edu/1/hedd.php.",2018-01-01 +29039006,Information Resources for Functional Genomics Studies in Brachypodium distachyon.,"Online tools and databases play an essential role in the promotion of functional genomics studies. Several resources for information regarding Brachypodium distachyon (Brachypodium) are available on the Web. In this chapter, we focus on recently published resources for Brachypodium research. The Brachypodium.org website ( http://www.brachypodium.org /) is an information portal that provides links to various genomic resources regarding Brachypodium, including genome annotation and re-sequencing datasets of accessions. RIKEN Full-length cDNA Database (RBFLDB, http://brachy.bmep.riken.jp/ver.1/index.pl ) is a web-accessible database that provides information of Brachypodium full-length cDNAs (FLcDNAs) collected in RIKEN and updated gene structures of Brachypodium based on the FLcDNA sequences as well as results of comparative analyses with available sequence resources for Triticeae crops, wheat, and barley. We introduce the functionalities and availability of these important information resources. Furthermore, we also present brief descriptions of useful online tools that facilitate Brachypodium functional genomics studies.",2018-01-01 +28985416,EVLncRNAs: a manually curated database for long non-coding RNAs validated by low-throughput experiments.,"Long non-coding RNAs (lncRNAs) play important functional roles in various biological processes. Early databases were utilized to deposit all lncRNA candidates produced by high-throughput experimental and/or computational techniques to facilitate classification, assessment and validation. As more lncRNAs are validated by low-throughput experiments, several databases were established for experimentally validated lncRNAs. However, these databases are small in scale (with a few hundreds of lncRNAs only) and specific in their focuses (plants, diseases or interactions). Thus, it is highly desirable to have a comprehensive dataset for experimentally validated lncRNAs as a central repository for all of their structures, functions and phenotypes. Here, we established EVLncRNAs by curating lncRNAs validated by low-throughput experiments (up to 1 May 2016) and integrating specific databases (lncRNAdb, LncRANDisease, Lnc2Cancer and PLNIncRBase) with additional functional and disease-specific information not covered previously. The current version of EVLncRNAs contains 1543 lncRNAs from 77 species that is 2.9 times larger than the current largest database for experimentally validated lncRNAs. Seventy-four percent lncRNA entries are partially or completely new, comparing to all existing experimentally validated databases. The established database allows users to browse, search and download as well as to submit experimentally validated lncRNAs. The database is available at http://biophy.dzu.edu.cn/EVLncRNAs.",2018-01-01 +30152276,HYPO: A Database of Human Hypothetical Proteins.,"

Background

There are genes whose function remains obscure as they may not have similarities to known regions in the genome. Such known 'unknown' genes constituting the Open Reading Frames (ORF) that remain in the epigenome are termed as orphan genes and the proteins encoded by them but having no experimental evidence of translation are termed as 'Hypothetical Proteins' (HPs).

Objectives

We have enhanced our former database of Hypothetical Proteins (HP) in human (HypoDB) with added annotation, application programming interfaces and descriptive features. The database hosts 1000+ manually curated records of the known 'unknown' regions in the human genome. The new updated version of HypoDB with functionalities (Blast, Match) is freely accessible at http://www.bioclues.org/hypo2.

Methods

The total collection of HPs were checked using experimentally validated sets (from Swiss-Prot) or non-experimentally validated set (TrEMBL) or the complete set (UniProtKB). The database was designed with java at the core backend, integrated with databases, viz. EMBL, PIR, HPRD and those including descriptors for structural databases, interaction and association databases.

Results

The HypoDB constituted Application Programming Interfaces (API) for implicitly searching resources linking them to other databases like NCBI Link-out in addition to multiple search capabilities along with advanced searches using integrated bio-tools, viz. Match and BLAST were incorporated.

Conclusion

The HypoDB is perhaps the only open-source HP database with a range of tools for common bioinformatics retrievals and serves as a standby reference to researchers who are interested in finding candidate sequences for their potential experimental work.",2018-01-01 +29961819,dbLGL: an online leukemia gene and literature database for the retrospective comparison of adult and childhood leukemia genetics with literature evidence. ,"Leukemia is a group of cancers with increased numbers of immature or abnormal leucocytes that originated in the bone marrow and other blood-forming organs. The development of differentially diagnostic biomarkers for different subtypes largely depends on understanding the biological pathways and regulatory mechanisms associated with leukemia-implicated genes. Unfortunately, the leukemia-implicated genes that have been identified thus far are scattered among thousands of published studies, and no systematic summary of the differences between adult and childhood leukemia exists with regard to the causative genetic mutations and genetic mechanisms of the various subtypes. In this study, we performed a systematic literature review of those susceptibility genes reported in small-scale experiments and built an online gene database containing a total of 1805 leukemia-associated genes, available at http://soft.bioinfo-minzhao.org/lgl/. Our comparison of genes from the four primary subtypes and between adult and childhood cases identified a number of potential genes related to patient survival. These curated genes can satisfy a growing demand for further integrating genomics screening for leukemia-associated low-frequency mutated genes.Database URL: http://soft.bioinfo-minzhao.org/lgl/.",2018-01-01 +29788229,SubtiWiki in 2018: from genes and proteins to functional network annotation of the model organism Bacillus subtilis.,"Living cells are made up of individual parts, i.e. the genome, the proteins, the RNA and lipid molecules as well as the metabolites and ions. However, life depends on the functional interaction among these components which is often organized in networks. Here, we present the recent development of SubtiWiki, the integrated database for the model bacterium Bacillus subtilis (http://subtiwiki.uni-goettingen.de/). SubtiWiki is based on a relational database and provides access to published information about the genes and proteins of B. subtilis and about metabolic and regulatory pathways. We have included a network visualization tool that can be used to visualize regulatory as well as protein-protein interaction networks. The resulting interactive graphical presentations allow the user to detect novel associations and thus to develop novel hypotheses that can then be tested experimentally. To facilitate the mobile use of SubtiWiki, we provide enhanced versions of the SubtiWiki App that are available for iOS and Android devices. Importantly, the App allows to link private notes and pictures to the gene/protein pages that can be synchronized on multiple devices. SubtiWiki has become one of the most complete resources of knowledge on a living organism.",2018-01-01 +29334885,2016 United Kingdom national guideline on the sexual health care of men who have sex with men.,"This guideline is intended for use in UK Genitourinary medicine clinics and sexual health services but is likely to be of relevance in all sexual health settings, including general practice and Contraception and Sexual Health (CASH) services, where men who have sex with men (MSM) seek sexual health care or where addressing the sexual health needs of MSM may have public health benefits. For the purposes of this document, MSM includes all gay, bisexual and all other males who have sex with other males and both cis and trans men. This document does not provide guidance on the treatment of particular conditions where this is covered in other British Association for Sexual Health and HIV (BASHH) Guidelines but outlines best practice in multiple aspects of the sexual health care of MSM. Where prevention of sexually transmitted infections including HIV can be addressed as an integral part of clinical care, this is consistent with the concept of combination prevention and is included. The document is designed primarily to provide guidance on the direct clinical care of MSM but also makes reference to the design and delivery of services with the aim of supporting clinicians and commissioners in providing effective services. Methodology This document was produced in accordance with the guidance set out in the BASHH CEG's document 'Framework for guideline development and assessment' published in 2010 at http://www.bashh.org/guidelines and with reference to the Agree II instrument. Following the production of the updated framework in April 2015, the GRADE system for assessing evidence was adopted and the draft recommendations were regraded. Search strategy (see also Appendix 1) Ovid Medline 1946 to December 2014, Medline daily update, Embase 1974 to December 2014, Pubmed NeLH Guidelines Database, Cochrane library from 2000 to December 2014. Search language English only. The search for Section 3 was conducted on PubMed to December 2014. Priority was given to peer-reviewed papers published in scientific journals, although for many issues evidence includes conference abstracts listed on the Embase database. In addition, for 'Identification of problematic recreational drug and alcohol use' section and 'Sexual problems and dysfunctions in MSM' section, searches included PsycINFO. Methods Article titles and abstracts were reviewed and if relevant the full text article was obtained. Priority was given to randomised controlled trial and systematic review evidence, and recommendations made and graded on the basis of best available evidence. Piloting and feedback The first draft of the guideline was circulated to the writing group and to a small group of relevant experts, third sector partners and patient representatives who were invited to comment on the whole document and specifically on particular sections. The revised draft was reviewed by the CEG and then reviewed by the BASHH patient/public panel and posted on the BASHH website for public consultation. The final draft was piloted before publication. Guideline update The guidelines will be reviewed and revised in five years' time, 2022.",2018-01-01 +29156006,DIANA-TarBase v8: a decade-long collection of experimentally supported miRNA-gene interactions.,"DIANA-TarBase v8 (http://www.microrna.gr/tarbase) is a reference database devoted to the indexing of experimentally supported microRNA (miRNA) targets. Its eighth version is the first database indexing >1 million entries, corresponding to ∼670 000 unique miRNA-target pairs. The interactions are supported by >33 experimental methodologies, applied to ∼600 cell types/tissues under ∼451 experimental conditions. It integrates information on cell-type specific miRNA-gene regulation, while hundreds of thousands of miRNA-binding locations are reported. TarBase is coming of age, with more than a decade of continuous support in the non-coding RNA field. A new module has been implemented that enables the browsing of interactions through different filtering combinations. It permits easy retrieval of positive and negative miRNA targets per species, methodology, cell type and tissue. An incorporated ranking system is utilized for the display of interactions based on the robustness of their supporting methodologies. Statistics, pie-charts and interactive bar-plots depicting the database content are available through a dedicated result page. An intuitive interface is introduced, providing a user-friendly application with flexible options to different queries.",2018-01-01 +29106599,ASpedia: a comprehensive encyclopedia of human alternative splicing.,"Alternative splicing confers the human genome complexity by increasing the diversity of expressed mRNAs. Hundreds or thousands of splicing regions have been identified through differential alternative splicing analysis of high-throughput datasets. However, it is hard to explain the functional impact of each splicing event. Protein domain formation and nonsense-mediated decay are considered the main functional features of splicing. However, other functional features such as miRNA target sites, phosphorylation sites and single-nucleotide variations are directly affected by alternative splicing and affect downstream function. Hence, we established ASpedia: a comprehensive database for human alternative splicing annotation, which encompasses a range of functions, from genomic annotation to isoform-specific function (ASpedia, http://combio.snu.ac.kr/aspedia). The database provides three features: (i) genomic annotation extracted from DNA, RNA and proteins; (ii) transcription and regulation elements analyzed from next-generation sequencing datasets; and (iii) isoform-specific functions collected from known and published datasets. The ASpedia web application includes three components: an annotation database, a retrieval system and a browser specialized in the identification of human alternative splicing events. The retrieval system supports multiple AS event searches resulting from high-throughput analysis and the AS browser comprises genome tracks. Thus, ASpedia facilitates the systemic annotation of the functional impacts of multiple AS events.",2018-01-01 +29069402,SEECancer: a resource for somatic events in evolution of cancer genome.,"Cancer cells progressively evolve from a premalignant to a malignant state, which is driven by accumulating somatic alterations that confer normal cells a fitness advantage. Improvements in high-throughput sequencing techniques have led to an increase in construction of tumor phylogenetics and identification of somatic driver events that specifically occurred in different tumor progression stages. Here, we developed the SEECancer database (http://biocc.hrbmu.edu.cn/SEECancer), which aims to present the comprehensive cancer evolutionary stage-specific somatic events (including early-specific, late-specific, relapse-specific, metastasis-specific, drug-resistant and drug-induced genomic events) and their temporal orders. By manually curating over 10 000 published articles, 1231 evolutionary stage-specific genomic events and 5772 temporal orders involving 82 human cancers and 23 tissue origins were collected and deposited in the SEECancer database. Each entry contains the somatic event, evolutionary stage, cancer type, detection approach and relevant evidence. SEECancer provides a user-friendly interface for browsing, searching and downloading evolutionary stage-specific somatic events and temporal relationships in various cancers. With increasing attention on cancer genome evolution, the necessary information in SEECancer will facilitate understanding of cancer etiology and development of evolutionary therapeutics, and help clinicians to discover biomarkers for monitoring tumor progression.",2018-01-01 +28991830,Development and Validation of an Empiric Tool to Predict Favorable Neurologic Outcomes Among PICU Patients.,"

Objectives

To create a novel tool to predict favorable neurologic outcomes during ICU stay among children with critical illness.

Design

Logistic regression models using adaptive lasso methodology were used to identify independent factors associated with favorable neurologic outcomes. A mixed effects logistic regression model was used to create the final prediction model including all predictors selected from the lasso model. Model validation was performed using a 10-fold internal cross-validation approach.

Setting

Virtual Pediatric Systems (VPS, LLC, Los Angeles, CA) database.

Patients

Patients less than 18 years old admitted to one of the participating ICUs in the Virtual Pediatric Systems database were included (2009-2015).

Interventions

None.

Measurements and main results

A total of 160,570 patients from 90 hospitals qualified for inclusion. Of these, 1,675 patients (1.04%) were associated with a decline in Pediatric Cerebral Performance Category scale by at least 2 between ICU admission and ICU discharge (unfavorable neurologic outcome). The independent factors associated with unfavorable neurologic outcome included higher weight at ICU admission, higher Pediatric Index of Morality-2 score at ICU admission, cardiac arrest, stroke, seizures, head/nonhead trauma, use of conventional mechanical ventilation and high-frequency oscillatory ventilation, prolonged hospital length of ICU stay, and prolonged use of mechanical ventilation. The presence of chromosomal anomaly, cardiac surgery, and utilization of nitric oxide were associated with favorable neurologic outcome. The final online prediction tool can be accessed at https://soipredictiontool.shinyapps.io/GNOScore/. Our model predicted 139,688 patients with favorable neurologic outcomes in an internal validation sample when the observed number of patients with favorable neurologic outcomes was among 139,591 patients. The area under the receiver operating curve for the validation model was 0.90.

Conclusions

This proposed prediction tool encompasses 20 risk factors into one probability to predict favorable neurologic outcome during ICU stay among children with critical illness. Future studies should seek external validation and improved discrimination of this prediction tool.",2018-01-01 +30019654,Complications of Diabetes: An Insight into Genetic Polymorphism and Role of Insulin.,"

Background

Diabetes Mellitus (DM) is an advanced and chronic endocrine disorder characterized by an insufficiency of insulin secretion from pancreatic β-cells and liver, adipose tissues, and skeletal muscles.

Objective

The main objective of this study is to understand the mechanism and genes which are responsible for the prevalence of diabetes. The study also covers various types of diabetic complications with special reference to insulin role and defects.

Methods

The scientific literature and patents were reviewed and analyzed based on their suitability and relevance to the theme of the study. The scientific literature was covered from the authentic databases such as Elsevier, Springer, and Bentham Science. The patents were reviewed from http://www.freepatentsonline.com.

Results

Glucokinase (ATP: D-glucose-6-phosphotransferase; GCK), initiates glycolysis and acts as a glucose sensor and metabolic signal producer in liver and pancreas. PCR-sequencing showed qualitative differences in diabetic patients in comparison to healthy subjects. Glucokinase is the most important component in glucose detection of pancreatic islet beta cells in diabetes because glucokinase mutations can be one of the most common single gene disorders described. It is known that a genetic variation of a human glucokinase gene, including a point mutation, causes MODY, the concentration of plasma glucose increased and it is supposed to be the cause of diabetes of the present study subjects. Owing to hyperglycemia and individual components of the insulin resistance (metabolic) syndrome, people with Type II DM are prone to the high threat for microvascular complications (including nephropathy, retinopathy, and neuropathy) and macrovascular complications (such as Ischemic Heart Disease). There were also significant differences (P < 0.0001) in glycation levels (0.90, 0.4838mole/mole), random blood sugar (348.8, 105.8mg/dL), cholesterol levels (235.3, 161.8mg/dL), low density lipoprotein in diabetic subjects (155.3, 28.46mg/dL) and in healthy donors. GCK gene mutations were found in 70% of the patients while 30% are non-mutated.

Conclusion

In conclusion, lipids, glucose, and protein play an essential role in the initiation of AGE's or diabetic complications (Micro and Macrovascular Complications). The importance of the clinical results should also be recognized in the genetic analysis of heterogeneous disorders as NIDDM/ Type II DM.",2018-01-01 +29688378,Biopanning data bank 2018: hugging next generation phage display. ,The BDB database is available at http://immunet.cn/bdb.,2018-01-01 +29688360,The NCBI BioCollections Database. ,"The rapidly growing set of GenBank submissions includes sequences that are derived from vouchered specimens. These are associated with culture collections, museums, herbaria and other natural history collections, both living and preserved. Correct identification of the specimens studied, along with a method to associate the sample with its institution, is critical to the outcome of related studies and analyses. The National Center for Biotechnology Information BioCollections Database was established to allow the association of specimen vouchers and related sequence records to their home institutions. This process also allows cross-linking from the home institution for quick identification of all records originating from each collection. Database URL: https://www.ncbi.nlm.nih.gov/biocollections",2018-01-01 +29177508,MVP: a microbe-phage interaction database.,"Phages invade microbes, accomplish host lysis and are of vital importance in shaping the community structure of environmental microbiota. More importantly, most phages have very specific hosts; they are thus ideal tools to manipulate environmental microbiota at species-resolution. The main purpose of MVP (Microbe Versus Phage) is to provide a comprehensive catalog of phage-microbe interactions and assist users to select phage(s) that can target (and potentially to manipulate) specific microbes of interest. We first collected 50 782 viral sequences from various sources and clustered them into 33 097 unique viral clusters based on sequence similarity. We then identified 26 572 interactions between 18 608 viral clusters and 9245 prokaryotes (i.e. bacteria and archaea); we established these interactions based on 30 321 evidence entries that we collected from published datasets, public databases and re-analysis of genomic and metagenomic sequences. Based on these interactions, we calculated the host range for each of the phage clusters and accordingly grouped them into subgroups such as 'species-', 'genus-' and 'family-' specific phage clusters. MVP is equipped with a modern, responsive and intuitive interface, and is freely available at: http://mvp.medgenius.info.",2018-01-01 +29145643,"The MEROPS database of proteolytic enzymes, their substrates and inhibitors in 2017 and a comparison with peptidases in the PANTHER database.","The MEROPS database (http://www.ebi.ac.uk/merops/) is an integrated source of information about peptidases, their substrates and inhibitors. The hierarchical classification is: protein-species, family, clan, with an identifier at each level. The MEROPS website moved to the EMBL-EBI in 2017, requiring refactoring of the code-base and services provided. The interface to sequence searching has changed and the MEROPS protein sequence libraries can be searched at the EMBL-EBI with HMMER, FastA and BLASTP. Cross-references have been established between MEROPS and the PANTHER database at both the family and protein-species level, which will help to improve curation and coverage between the resources. Because of the increasing size of the MEROPS sequence collection, in future only sequences of characterized proteins, and from completely sequenced genomes of organisms of evolutionary, medical or commercial significance will be added. As an example, peptidase homologues in four proteomes from the Asgard superphylum of Archaea have been identified and compared to other archaean, bacterial and eukaryote proteomes. This has given insights into the origins and evolution of peptidase families, including an expansion in the number of proteasome components in Asgard archaeotes and as organisms increase in complexity. Novel structures for proteasome complexes in archaea are postulated.",2018-01-01 +29112716,Gene3D: Extensive prediction of globular domains in proteins.,"Gene3D (http://gene3d.biochem.ucl.ac.uk) is a database of globular domain annotations for millions of available protein sequences. Gene3D has previously featured in the Database issue of NAR and here we report a significant update to the Gene3D database. The current release, Gene3D v16, has significantly expanded its domain coverage over the previous version and now contains over 95 million domain assignments. We also report a new method for dealing with complex domain architectures that exist in Gene3D, arising from discontinuous domains. Amongst other updates, we have added visualization tools for exploring domain annotations in the context of other sequence features and in gene families. We also provide web-pages to visualize other domain families that co-occur with a given query domain family.",2018-01-01 +29106588,NLSdb-major update for database of nuclear localization signals and nuclear export signals.,"NLSdb is a database collecting nuclear export signals (NES) and nuclear localization signals (NLS) along with experimentally annotated nuclear and non-nuclear proteins. NES and NLS are short sequence motifs related to protein transport out of and into the nucleus. The updated NLSdb now contains 2253 NLS and introduces 398 NES. The potential sets of novel NES and NLS have been generated by a simple 'in silico mutagenesis' protocol. We started with motifs annotated by experiments. In step 1, we increased specificity such that no known non-nuclear protein matched the refined motif. In step 2, we increased the sensitivity trying to match several different families with a motif. We then iterated over steps 1 and 2. The final set of 2253 NLS motifs matched 35% of 8421 experimentally verified nuclear proteins (up from 21% for the previous version) and none of 18 278 non-nuclear proteins. We updated the web interface providing multiple options to search protein sequences for NES and NLS motifs, and to evaluate your own signal sequences. NLSdb can be accessed via Rostlab services at: https://rostlab.org/services/nlsdb/.",2018-01-01 +29077942,MetalPDB in 2018: a database of metal sites in biological macromolecular structures.,"MetalPDB (http://metalweb.cerm.unifi.it/) is a database providing information on metal-binding sites detected in the three-dimensional (3D) structures of biological macromolecules. MetalPDB represents such sites as 3D templates, called Minimal Functional Sites (MFSs), which describe the local environment around the metal(s) independently of the larger context of the macromolecular structure. The 2018 update of MetalPDB includes new contents and tools. A major extension is the inclusion of proteins whose structures do not contain metal ions although their sequences potentially contain a known MFS. In addition, MetalPDB now provides extensive statistical analyses addressing several aspects of general metal usage within the PDB, across protein families and in catalysis. Users can also query MetalPDB to extract statistical information on structural aspects associated with individual metals, such as preferred coordination geometries or aminoacidic environment. A further major improvement is the functional annotation of MFSs; the annotation is manually performed via a password-protected annotator interface. At present, ∼50% of all MFSs have such a functional annotation. Other noteworthy improvements are bulk query functionality, through the upload of a list of PDB identifiers, and ftp access to MetalPDB contents, allowing users to carry out in-depth analyses on their own computational infrastructure.",2018-01-01 +29040625,RISE: a database of RNA interactome from sequencing experiments.,"We present RISE (http://rise.zhanglab.net), a database of RNA Interactome from Sequencing Experiments. RNA-RNA interactions (RRIs) are essential for RNA regulation and function. RISE provides a comprehensive collection of RRIs that mainly come from recent transcriptome-wide sequencing-based experiments like PARIS, SPLASH, LIGR-seq, and MARIO, as well as targeted studies like RIA-seq, RAP-RNA and CLASH. It also includes interactions aggregated from other primary databases and publications. The RISE database currently contains 328,811 RNA-RNA interactions mainly in human, mouse and yeast. While most existing RNA databases mainly contain interactions of miRNA targeting, notably, more than half of the RRIs in RISE are among mRNA and long non-coding RNAs. We compared different RRI datasets in RISE and found limited overlaps in interactions resolved by different techniques and in different cell lines. It may suggest technology preference and also dynamic natures of RRIs. We also analyzed the basic features of the human and mouse RRI networks and found that they tend to be scale-free, small-world, hierarchical and modular. The analysis may nominate important RNAs or RRIs for further investigation. Finally, RISE provides a Circos plot and several table views for integrative visualization, with extensive molecular and functional annotations to facilitate exploration of biological functions for any RRI of interest.",2018-01-01 +29757429,AlloFinder: a strategy for allosteric modulator discovery and allosterome analyses.,"Allostery tweaks innumerable biological processes and plays a fundamental role in human disease and drug discovery. Exploration of allostery has thus been regarded as a crucial requirement for research on biological mechanisms and the development of novel therapeutics. Here, based on our previously developed allosteric data and methods, we present an interactive platform called AlloFinder that identifies potential endogenous or exogenous allosteric modulators and their involvement in human allosterome. AlloFinder automatically amalgamates allosteric site identification, allosteric screening and allosteric scoring evaluation of modulator-protein complexes to identify allosteric modulators, followed by allosterome mapping analyses of predicted allosteric sites and modulators in human proteome. This web server exhibits prominent performance in the reemergence of allosteric metabolites and exogenous allosteric modulators in known allosteric proteins. Specifically, AlloFinder enables identification of allosteric metabolites for metabolic enzymes and screening of potential allosteric compounds for disease-related targets. Significantly, the feasibility of AlloFinder to discover allosteric modulators was tested in a real case of signal transduction and activation of transcription 3 (STAT3) and validated by mutagenesis and functional experiments. Collectively, AlloFinder is expected to contribute to exploration of the mechanisms of allosteric regulation between metabolites and metabolic enzymes, and to accelerate allosteric drug discovery. The AlloFinder web server is freely available to all users at http://mdl.shsmu.edu.cn/ALF/.",2018-07-01 +29216772,Proteomic Cinderella: Customized analysis of bulky MS/MS data in one night.,"Proteomic challenges, stirred up by the advent of high-throughput technologies, produce large amount of MS data. Nowadays, the routine manual search does not satisfy the ""speed"" of modern science any longer. In our work, the necessity of single-thread analysis of bulky data emerged during interpretation of HepG2 proteome profiling results for proteoforms searching. We compared the contribution of each of the eight search engines (X!Tandem, MS-GF[Formula: see text], MS Amanda, MyriMatch, Comet, Tide, Andromeda, and OMSSA) integrated in an open-source graphical user interface SearchGUI ( http://searchgui.googlecode.com ) into total result of proteoforms identification and optimized set of engines working simultaneously. We also compared the results of our search combination with Mascot results using protein kit UPS2, containing 48 human proteins. We selected combination of X!Tandem, MS-GF[Formula: see text] and OMMSA as the most time-efficient and productive combination of search. We added homemade java-script to automatize pipeline from file picking to report generation. These settings resulted in rise of the efficiency of our customized pipeline unobtainable by manual scouting: the analysis of 192 files searched against human proteome (42153 entries) downloaded from UniProt took 11[Formula: see text]h.",2017-11-13 +27070572,SAAFEC: Predicting the Effect of Single Point Mutations on Protein Folding Free Energy Using a Knowledge-Modified MM/PBSA Approach.,"

Unlabelled

Folding free energy is an important biophysical characteristic of proteins that reflects the overall stability of the 3D structure of macromolecules. Changes in the amino acid sequence, naturally occurring or made in vitro, may affect the stability of the corresponding protein and thus could be associated with disease. Several approaches that predict the changes of the folding free energy caused by mutations have been proposed, but there is no method that is clearly superior to the others. The optimal goal is not only to accurately predict the folding free energy changes, but also to characterize the structural changes induced by mutations and the physical nature of the predicted folding free energy changes. Here we report a new method to predict the Single Amino Acid Folding free Energy Changes (SAAFEC) based on a knowledge-modified Molecular Mechanics Poisson-Boltzmann (MM/PBSA) approach. The method is comprised of two main components: a MM/PBSA component and a set of knowledge based terms delivered from a statistical study of the biophysical characteristics of proteins. The predictor utilizes a multiple linear regression model with weighted coefficients of various terms optimized against a set of experimental data. The aforementioned approach yields a correlation coefficient of 0.65 when benchmarked against 983 cases from 42 proteins in the ProTherm database.

Availability

the webserver can be accessed via http://compbio.clemson.edu/SAAFEC/.",2016-04-07 +29949980,Protein threading using residue co-variation and deep learning.,"

Motivation

Template-based modeling, including homology modeling and protein threading, is a popular method for protein 3D structure prediction. However, alignment generation and template selection for protein sequences without close templates remain very challenging.

Results

We present a new method called DeepThreader to improve protein threading, including both alignment generation and template selection, by making use of deep learning (DL) and residue co-variation information. Our method first employs DL to predict inter-residue distance distribution from residue co-variation and sequential information (e.g. sequence profile and predicted secondary structure), and then builds sequence-template alignment by integrating predicted distance information and sequential features through an ADMM algorithm. Experimental results suggest that predicted inter-residue distance is helpful to both protein alignment and template selection especially for protein sequences without very close templates, and that our method outperforms currently popular homology modeling method HHpred and threading method CNFpred by a large margin and greatly outperforms the latest contact-assisted protein threading method EigenTHREADER.

Availability and implementation

http://raptorx.uchicago.edu/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +29079769,UClncR: Ultrafast and comprehensive long non-coding RNA detection from RNA-seq.,"Long non-coding RNA (lncRNA) is a large class of gene transcripts with regulatory functions discovered in recent years. Many more are expected to be revealed with accumulation of RNA-seq data from diverse types of normal and diseased tissues. However, discovering novel lncRNAs and accurately quantifying known lncRNAs is not trivial from massive RNA-seq data. Herein we describe UClncR, an Ultrafast and Comprehensive lncRNA detection pipeline to tackle the challenge. UClncR takes standard RNA-seq alignment file, performs transcript assembly, predicts lncRNA candidates, quantifies and annotates both known and novel lncRNA candidates, and generates a convenient report for downstream analysis. The pipeline accommodates both un-stranded and stranded RNA-seq so that lncRNAs overlapping with other genes can be predicted and quantified. UClncR is fully parallelized in a cluster environment yet allows users to run samples sequentially without a cluster. The pipeline can process a typical RNA-seq sample in a matter of minutes and complete hundreds of samples in a matter of hours. Analysis of predicted lncRNAs from two test datasets demonstrated UClncR's accuracy and their relevance to sample clinical phenotypes. UClncR would facilitate researchers' novel lncRNA discovery significantly and is publically available at http://bioinformaticstools.mayo.edu/research/UClncR .",2017-10-27 +28029405,Distributed learning: Developing a predictive model based on data from multiple hospitals without data leaving the hospital - A real life proof of concept.,"

Purpose

One of the major hurdles in enabling personalized medicine is obtaining sufficient patient data to feed into predictive models. Combining data originating from multiple hospitals is difficult because of ethical, legal, political, and administrative barriers associated with data sharing. In order to avoid these issues, a distributed learning approach can be used. Distributed learning is defined as learning from data without the data leaving the hospital.

Patients and methods

Clinical data from 287 lung cancer patients, treated with curative intent with chemoradiation (CRT) or radiotherapy (RT) alone were collected from and stored in 5 different medical institutes (123 patients at MAASTRO (Netherlands, Dutch), 24 at Jessa (Belgium, Dutch), 34 at Liege (Belgium, Dutch and French), 48 at Aachen (Germany, German) and 58 at Eindhoven (Netherlands, Dutch)). A Bayesian network model is adapted for distributed learning (watch the animation: http://youtu.be/nQpqMIuHyOk). The model predicts dyspnea, which is a common side effect after radiotherapy treatment of lung cancer.

Results

We show that it is possible to use the distributed learning approach to train a Bayesian network model on patient data originating from multiple hospitals without these data leaving the individual hospital. The AUC of the model is 0.61 (95%CI, 0.51-0.70) on a 5-fold cross-validation and ranges from 0.59 to 0.71 on external validation sets.

Conclusion

Distributed learning can allow the learning of predictive models on data originating from multiple hospitals while avoiding many of the data sharing barriers. Furthermore, the distributed learning approach can be used to extract and employ knowledge from routine patient data from multiple hospitals while being compliant to the various national and European privacy laws.",2016-10-28 +25916842,"IslandViewer 3: more flexible, interactive genomic island discovery, visualization and analysis.","IslandViewer (http://pathogenomics.sfu.ca/islandviewer) is a widely used web-based resource for the prediction and analysis of genomic islands (GIs) in bacterial and archaeal genomes. GIs are clusters of genes of probable horizontal origin, and are of high interest since they disproportionately encode genes involved in medically and environmentally important adaptations, including antimicrobial resistance and virulence. We now report a major new release of IslandViewer, since the last release in 2013. IslandViewer 3 incorporates a completely new genome visualization tool, IslandPlot, enabling for the first time interactive genome analysis and gene search capabilities using synchronized circular, horizontal and vertical genome views. In addition, more curated virulence factors and antimicrobial resistance genes have been incorporated, and homologs of these genes identified in closely related genomes using strict filters. Pathogen-associated genes have been re-calculated for all pre-computed complete genomes. For user-uploaded genomes to be analysed, IslandViewer 3 can also now handle incomplete genomes, with an improved queuing system on compute nodes to handle user demand. Overall, IslandViewer 3 represents a significant new version of this GI analysis software, with features that may make it more broadly useful for general microbial genome analysis and visualization.",2015-04-27 +24904398,A simple tool for neuroimaging data sharing.,"Data sharing is becoming increasingly common, but despite encouragement and facilitation by funding agencies, journals, and some research efforts, most neuroimaging data acquired today is still not shared due to political, financial, social, and technical barriers to sharing data that remain. In particular, technical solutions are few for researchers that are not a part of larger efforts with dedicated sharing infrastructures, and social barriers such as the time commitment required to share can keep data from becoming publicly available. We present a system for sharing neuroimaging data, designed to be simple to use and to provide benefit to the data provider. The system consists of a server at the International Neuroinformatics Coordinating Facility (INCF) and user tools for uploading data to the server. The primary design principle for the user tools is ease of use: the user identifies a directory containing Digital Imaging and Communications in Medicine (DICOM) data, provides their INCF Portal authentication, and provides identifiers for the subject and imaging session. The user tool anonymizes the data and sends it to the server. The server then runs quality control routines on the data, and the data and the quality control reports are made public. The user retains control of the data and may change the sharing policy as they need. The result is that in a few minutes of the user's time, DICOM data can be anonymized and made publicly available, and an initial quality control assessment can be performed on the data. The system is currently functional, and user tools and access to the public image database are available at http://xnat.incf.org/.",2014-05-21 +27399597,Development of a Korean Fracture Risk Score (KFRS) for Predicting Osteoporotic Fracture Risk: Analysis of Data from the Korean National Health Insurance Service.,"

Background

Asian-specific prediction models for estimating individual risk of osteoporotic fractures are rare. We developed a Korean fracture risk prediction model using clinical risk factors and assessed validity of the final model.

Methods

A total of 718,306 Korean men and women aged 50-90 years were followed for 7 years in a national system-based cohort study. In total, 50% of the subjects were assigned randomly to the development dataset and 50% were assigned to the validation dataset. Clinical risk factors for osteoporotic fracture were assessed at the biennial health check. Data on osteoporotic fractures during the follow-up period were identified by ICD-10 codes and the nationwide database of the National Health Insurance Service (NHIS).

Results

During the follow-up period, 19,840 osteoporotic fractures were reported (4,889 in men and 14,951 in women) in the development dataset. The assessment tool called the Korean Fracture Risk Score (KFRS) is comprised of a set of nine variables, including age, body mass index, recent fragility fracture, current smoking, high alcohol intake, lack of regular exercise, recent use of oral glucocorticoid, rheumatoid arthritis, and other causes of secondary osteoporosis. The KFRS predicted osteoporotic fractures over the 7 years. This score was validated using an independent dataset. A close relationship with overall fracture rate was observed when we compared the mean predicted scores after applying the KFRS with the observed risks after 7 years within each 10th of predicted risk.

Conclusion

We developed a Korean specific prediction model for osteoporotic fractures. The KFRS was able to predict risk of fracture in the primary population without bone mineral density testing and is therefore suitable for use in both clinical setting and self-assessment. The website is available at http://www.nhis.or.kr.",2016-07-11 +25380778,A Web-based database of genetic association studies in cutaneous melanoma enhanced with network-driven data exploration tools. ,"The publicly available online database MelGene provides a comprehensive, regularly updated, collection of data from genetic association studies in cutaneous melanoma (CM), including random-effects meta-analysis results of all eligible polymorphisms. The updated database version includes data from 192 publications with information on 1114 significantly associated polymorphisms across 280 genes, along with new front-end and back-end capabilities. Various types of relationships between data are calculated and visualized as networks. We constructed 13 different networks containing the polymorphisms and the genes included in MelGene. We explored the derived network representations under the following questions: (i) are there nodes that deserve consideration regarding their network connectivity characteristics? (ii) What is the relation of either the genome-wide or nominally significant CM polymorphisms/genes with the ones highlighted by the network representation? We show that our network approach using the MelGene data reveals connections between statistically significant genes/ polymorphisms and other genes/polymorphisms acting as 'hubs' in the reconstructed networks. To the best of our knowledge, this is the first database containing data from a comprehensive field synopsis and systematic meta-analyses of genetic polymorphisms in CM that provides user-friendly tools for in-depth molecular network visualization and exploration. The proposed network connections highlight potentially new loci requiring further investigation of their relation to melanoma risk. Database URL: http://www.melgene.org.",2014-11-07 +26541939,De novo protein conformational sampling using a probabilistic graphical model.,"Efficient exploration of protein conformational space remains challenging especially for large proteins when assembling discretized structural fragments extracted from a protein structure data database. We propose a fragment-free probabilistic graphical model, FUSION, for conformational sampling in continuous space and assess its accuracy using 'blind' protein targets with a length up to 250 residues from the CASP11 structure prediction exercise. The method reduces sampling bottlenecks, exhibits strong convergence, and demonstrates better performance than the popular fragment assembly method, ROSETTA, on relatively larger proteins with a length of more than 150 residues in our benchmark set. FUSION is freely available through a web server at http://protein.rnet.missouri.edu/FUSION/.",2015-11-06 +28336542,"Seten: a tool for systematic identification and comparison of processes, phenotypes, and diseases associated with RNA-binding proteins from condition-specific CLIP-seq profiles.","RNA-binding proteins (RBPs) control the regulation of gene expression in eukaryotic genomes at post-transcriptional level by binding to their cognate RNAs. Although several variants of CLIP (crosslinking and immunoprecipitation) protocols are currently available to study the global protein-RNA interaction landscape at single-nucleotide resolution in a cell, currently there are very few tools that can facilitate understanding and dissecting the functional associations of RBPs from the resulting binding maps. Here, we present Seten, a web-based and command line tool, which can identify and compare processes, phenotypes, and diseases associated with RBPs from condition-specific CLIP-seq profiles. Seten uses BED files resulting from most peak calling algorithms, which include scores reflecting the extent of binding of an RBP on the target transcript, to provide both traditional functional enrichment as well as gene set enrichment results for a number of gene set collections including BioCarta, KEGG, Reactome, Gene Ontology (GO), Human Phenotype Ontology (HPO), and MalaCards Disease Ontology for several organisms including fruit fly, human, mouse, rat, worm, and yeast. It also provides an option to dynamically compare the associated gene sets across data sets as bubble charts, to facilitate comparative analysis. Benchmarking of Seten using eCLIP data for IGF2BP1, SRSF7, and PTBP1 against their corresponding CRISPR RNA-seq in K562 cells as well as randomized negative controls, demonstrated that its gene set enrichment method outperforms functional enrichment, with scores significantly contributing to the discovery of true annotations. Comparative performance analysis using these CRISPR control data sets revealed significantly higher precision and comparable recall to that observed using ChIP-Enrich. Seten's web interface currently provides precomputed results for about 200 CLIP-seq data sets and both command line as well as web interfaces can be used to analyze CLIP-seq data sets. We highlight several examples to show the utility of Seten for rapid profiling of various CLIP-seq data sets. Seten is available on http://www.iupui.edu/∼sysbio/seten/.",2017-03-23 +24040258,Visual search of neuropil-enriched RNAs from brain in situ hybridization data through the image analysis pipeline hippo-ATESC.,"

Motivation

RNA molecules specifically enriched in the neuropil of neuronal cells and in particular in dendritic spines are of great interest for neurobiology in virtue of their involvement in synaptic structure and plasticity. The systematic recognition of such molecules is therefore a very important task. High resolution images of RNA in situ hybridization experiments contained in the Allen Brain Atlas (ABA) represent a very rich resource to identify them and have been so far exploited for this task through human-expert analysis. However, software tools that may automatically address the same objective are not very well developed.

Results

In this study we describe an automatic method for exploring in situ hybridization data and discover neuropil-enriched RNAs in the mouse hippocampus. We called it Hippo-ATESC (Automatic Texture Extraction from the Hippocampal region using Soft Computing). Bioinformatic validation showed that the Hippo-ATESC is very efficient in the recognition of RNAs which are manually identified by expert curators as neuropil-enriched on the same image series. Moreover, we show that our method can also highlight genes revealed by microdissection-based methods but missed by human visual inspection. We experimentally validated our approach by identifying a non-coding transcript enriched in mouse synaptosomes. The code is freely available on the web at http://ibislab.ce.unipr.it/software/hippo/.",2013-09-09 +22553861,Assay Guidance Manual,"This eBook is a comprehensive, crucial resource for investigators optimizing assays to evaluate collections of molecules with the overall goal of developing probes that modulate the activity of biological targets, pathways or cellular phenotypes. Such probes might be candidates for further optimization and investigation in drug discovery and development. Originally written as a guide for therapeutic project teams within a major pharmaceutical company, this manual has been adapted to provide guidelines for scientists in academic, non-profit, government and industrial research laboratories to develop assay formats compatible with High Throughput Screening (HTS) and Structure Activity Relationship (SAR) measurements of new and known molecular entities. Topics addressed in this manual include: Descriptions of assay formats that are compatible with HTS and determination of SAR. Selection and development of optimal assay reagents. Optimizations and troubleshooting for assay protocols with respect to sensitivity, dynamic range, signal intensity and stability. Adaptations of assays for automation and scaling to microtiter plate formats. Instrumentation . Sources of assay artifacts and interferences . Statistical validation of assay performance parameters. Secondary assays for chemical probe validation and SAR refinement. Data standards for reporting the results of screening and SAR assays. In vivo assay development and validation. Assay development and validation for siRNA-based high-throughput screens. The National Center for Advancing Translational Sciences (NCATS) manages the content of the Assay Guidance Manual with input from industry, academia and government experts. More than 100 authors from around the globe have contributed content to this free resource, which is updated quarterly with contributions by experienced scientists from multiple disciplines working in drug discovery and development worldwide. For more information about the Assay Guidance Manual and related training opportunities, visit https://ncats.nih.gov/expertise/preclinical/agm.",2012-05-04 +25907632,The KM-parkin-DB: A Sub-set MutationView Database Specialized for PARK2 (PARKIN) Variants.,"We previously isolated PARKIN (PARK2) as a gene responsible for a unique sort of Parkinson disease, namely Autosomal Recessive Juvenile Parkinsonism (ARJP). In this study, we surveyed all the available literature describing PARK2 gene/Parkin protein mutations found in Parkinson disease patients. Only carefully evaluated data were deposited in the graphical database MutationView (http://mutview.dmb.med.keio.ac.jp) to construct KM-parkin-DB, an independent sub-set database. Forty-four articles were selected for data curation regarding clinical information such as ethnic origins, manifested symptoms, onset age, and hereditary patterns as well as mutation details including base changes and zygosity. A total of 366 cases were collected from 39 ethnic origins and 96 pathogenic mutations were found. PARK2 gene mutations were found also in some general Parkinson disease patients. The majority (63%) of mutations in PARK2 were restricted to two particular domains (UBL and RING1) of the Parkin protein. In these domains, two major mutations, a large deletion (DelEx3) and a point mutation (p.Arg275Trp), were located.",2015-06-03 +25355510,AraNet v2: an improved database of co-functional gene networks for the study of Arabidopsis thaliana and 27 other nonmodel plant species.,"Arabidopsis thaliana is a reference plant that has been studied intensively for several decades. Recent advances in high-throughput experimental technology have enabled the generation of an unprecedented amount of data from A. thaliana, which has facilitated data-driven approaches to unravel the genetic organization of plant phenotypes. We previously published a description of a genome-scale functional gene network for A. thaliana, AraNet, which was constructed by integrating multiple co-functional gene networks inferred from diverse data types, and we demonstrated the predictive power of this network for complex phenotypes. More recently, we have observed significant growth in the availability of omics data for A. thaliana as well as improvements in data analysis methods that we anticipate will further enhance the integrated database of co-functional networks. Here, we present an updated co-functional gene network for A. thaliana, AraNet v2 (available at http://www.inetbio.org/aranet), which covers approximately 84% of the coding genome. We demonstrate significant improvements in both genome coverage and accuracy. To enhance the usability of the network, we implemented an AraNet v2 web server, which generates functional predictions for A. thaliana and 27 nonmodel plant species using an orthology-based projection of nonmodel plant genes on the A. thaliana gene network.",2014-10-29 +28949986,HTS-Net: An integrated regulome-interactome approach for establishing network regulation models in high-throughput screenings.,"High-throughput RNAi screenings (HTS) allow quantifying the impact of the deletion of each gene in any particular function, from virus-host interactions to cell differentiation. However, there has been less development for functional analysis tools dedicated to RNAi analyses. HTS-Net, a network-based analysis program, was developed to identify gene regulatory modules impacted in high-throughput screenings, by integrating transcription factors-target genes interaction data (regulome) and protein-protein interaction networks (interactome) on top of screening z-scores. HTS-Net produces exhaustive HTML reports for results navigation and exploration. HTS-Net is a new pipeline for RNA interference screening analyses that proves better performance than simple gene rankings by z-scores, by re-prioritizing genes and replacing them in their biological context, as shown by the three studies that we reanalyzed. Formatted input data for the three studied datasets, source code and web site for testing the system are available from the companion web site at http://htsnet.marseille.inserm.fr/. We also compared our program with existing algorithms (CARD and hotnet2).",2017-09-26 +29152729,Clinical Pharmacogenetics Implementation Consortium (CPIC) Guideline for Dihydropyrimidine Dehydrogenase Genotype and Fluoropyrimidine Dosing: 2017 Update.,"The purpose of this guideline is to provide information for the interpretation of clinical dihydropyrimidine dehydrogenase (DPYD) genotype tests so that the results can be used to guide dosing of fluoropyrimidines (5-fluorouracil and capecitabine). Detailed guidelines for the use of fluoropyrimidines, their clinical pharmacology, as well as analyses of cost-effectiveness are beyond the scope of this document. The Clinical Pharmacogenetics Implementation Consortium (CPIC® ) guidelines consider the situation of patients for which genotype data are already available (updates available at https://cpicpgx.org/guidelines/guideline-for-fluoropyrimidines-and-dpyd/).",2017-11-20 +21353266,"A legume specific protein database (LegProt) improves the number of identified peptides, confidence scores and overall protein identification success rates for legume proteomics.","A legume specific protein database (LegProt) has been created containing sequences from seven legume species, i.e., Glycine max, Lotus japonicus, Medicago sativa, Medicago truncatula, Lupinusalbus, Phaseolus vulgaris, and Pisum sativum. The database consists of amino acid sequences translated from predicted gene models and 6-frame translations of tentative consensus (TC) sequences assembled from expressed sequence tags (ESTs) and singleton ESTs. This database was queried using mass spectral data for protein identification and identification success rates were compared to the NCBI nr database. Specifically, Mascot MS/MS ion searches of tandem nano-LC Q-TOFMS/MS mass spectral data showed that relative to the NCBI nr protein database, the LegProt database yielded a 54% increase in the average protein score (i.e., from NCBI nr 480 to LegProt 739) and a 50% increase in the average number of matched peptides (i.e., from NCBI nr 8 to LegProt 12). The overall identification success rate also increased from 88% (NCBI nr) to 93% (LegProt). Mascot peptide mass fingerprinting (PMF) searches of the LegProt database using MALDI-TOFMS data yielded a significant increase in the identification success rate from 19% (NCBI nr) to 34% (LegProt) while the average scores and average number of matched peptides showed insignificant changes. The results demonstrate that the LegProt database significantly increases legume protein identification success rates and the confidence levels compared to the commonly used NCBI nr. These improvements are primarily due to the presence of a large number of legume specific TC sequences in the LegProt database that were not found in NCBI nr. The LegProt database is freely available for download (http://bioinfo.noble.org/manuscript-support/legumedb) and will serve as a valuable resource for legume proteomics.",2011-02-23 +29361894,The visualCMAT: A web-server to select and interpret correlated mutations/co-evolving residues in protein families.,"The visualCMAT web-server was designed to assist experimental research in the fields of protein/enzyme biochemistry, protein engineering, and drug discovery by providing an intuitive and easy-to-use interface to the analysis of correlated mutations/co-evolving residues. Sequence and structural information describing homologous proteins are used to predict correlated substitutions by the Mutual information-based CMAT approach, classify them into spatially close co-evolving pairs, which either form a direct physical contact or interact with the same ligand (e.g. a substrate or a crystallographic water molecule), and long-range correlations, annotate and rank binding sites on the protein surface by the presence of statistically significant co-evolving positions. The results of the visualCMAT are organized for a convenient visual analysis and can be downloaded to a local computer as a content-rich all-in-one PyMol session file with multiple layers of annotation corresponding to bioinformatic, statistical and structural analyses of the predicted co-evolution, or further studied online using the built-in interactive analysis tools. The online interactivity is implemented in HTML5 and therefore neither plugins nor Java are required. The visualCMAT web-server is integrated with the Mustguseal web-server capable of constructing large structure-guided sequence alignments of protein families and superfamilies using all available information about their structures and sequences in public databases. The visualCMAT web-server can be used to understand the relationship between structure and function in proteins, implemented at selecting hotspots and compensatory mutations for rational design and directed evolution experiments to produce novel enzymes with improved properties, and employed at studying the mechanism of selective ligand's binding and allosteric communication between topologically independent sites in protein structures. The web-server is freely available at https://biokinet.belozersky.msu.ru/visualcmat and there are no login requirements.",2017-12-28 +28362818,VivaxGEN: An open access platform for comparative analysis of short tandem repeat genotyping data in Plasmodium vivax populations.,"

Background

The control and elimination of Plasmodium vivax will require a better understanding of its transmission dynamics, through the application of genotyping and population genetics analyses. This paper describes VivaxGEN (http://vivaxgen.menzies.edu.au), a web-based platform that has been developed to support P. vivax short tandem repeat data sharing and comparative analyses.

Results

The VivaxGEN platform provides a repository for raw data generated by capillary electrophoresis (FSA files), with fragment analysis and standardized allele calling tools. The query system of the platform enables users to filter, select and differentiate samples and alleles based on their specified criteria. Key population genetic analyses are supported including measures of population differentiation (FST), expected heterozygosity (HE), linkage disequilibrium (IAS), neighbor-joining analysis and Principal Coordinate Analysis. Datasets can also be formatted and exported for application in commonly used population genetic software including GENEPOP, Arlequin and STRUCTURE. To date, data from 10 countries, including 5 publicly available data sets have been shared with VivaxGEN.

Conclusions

VivaxGEN is well placed to facilitate regional overviews of P. vivax transmission dynamics in different endemic settings and capable to be adapted for similar genetic studies of P. falciparum and other organisms.",2017-03-31 +26018427,The human transmembrane proteome.,"

Background

Transmembrane proteins have important roles in cells, as they are involved in energy production, signal transduction, cell-cell interaction, cell-cell communication and more. In human cells, they are frequently targets for pharmaceuticals; therefore, knowledge about their properties and structure is crucial. Topology of transmembrane proteins provide a low resolution structural information, which can be a starting point for either laboratory experiments or modelling their 3D structures.

Results

Here, we present a database of the human α-helical transmembrane proteome, including the predicted and/or experimentally established topology of each transmembrane protein, together with the reliability of the prediction. In order to distinguish transmembrane proteins in the proteome as well as for topology prediction, we used a newly developed consensus method (CCTOP) that incorporates recent state of the art methods, with tested accuracies on a novel human benchmark protein set. CCTOP utilizes all available structure and topology data as well as bioinformatical evidences for topology prediction in a probabilistic framework provided by the hidden Markov model. This method shows the highest accuracy (98.5 % for discrinimating between transmembrane and non-transmembrane proteins and 84 % for per protein topology prediction) among the dozen tested topology prediction methods. Analysis of the human proteome with the CCTOP indicates that it contains 4998 (26 %) transmembrane proteins. Besides predicting topology, reliability of the predictions is estimated as well, and it is demonstrated that the per protein prediction accuracies of more than 60 % of the predictions are over 98 % on the benchmark sets and most probably on the predicted human transmembrane proteome too.

Conclusions

Here, we present the most accurate prediction of the human transmembrane proteome together with the experimental topology data. These data, as well as various statistics about the human transmembrane proteins and their topologies can be downloaded from and can be visualized at the website of the human transmembrane proteome ( http://htp.enzim.hu ).",2015-05-28 +22610854,Immune epitope database analysis resource.,"The immune epitope database analysis resource (IEDB-AR: http://tools.iedb.org) is a collection of tools for prediction and analysis of molecular targets of T- and B-cell immune responses (i.e. epitopes). Since its last publication in the NAR webserver issue in 2008, a new generation of peptide:MHC binding and T-cell epitope predictive tools have been added. As validated by different labs and in the first international competition for predicting peptide:MHC-I binding, their predictive performances have improved considerably. In addition, a new B-cell epitope prediction tool was added, and the homology mapping tool was updated to enable mapping of discontinuous epitopes onto 3D structures. Furthermore, to serve a wider range of users, the number of ways in which IEDB-AR can be accessed has been expanded. Specifically, the predictive tools can be programmatically accessed using a web interface and can also be downloaded as software packages.",2012-05-18 +30062776,Nicotine and electronic cigarette (E-Cig) exposure decreases brain glucose utilization in ischemic stroke.,"Previous studies in our laboratory have shown that nicotine exposure decreases glucose transport across the blood-brain barrier in ischemia-reperfusion conditions. We hypothesize that nicotine can also dysregulate brain parenchymal glucose utilization by altering glucose transporters with effects on sensitivity to ischemic stroke. In this study, we investigated the effects of nicotine exposure on neuronal glucose utilization using an in vitro ischemic stroke model. We also tested the effects of e-Cig vaping on ischemic brain glucose utilization using an acute brain slice technique. Primary cortical neurons and brain slices were subjected to oxygen-glucose deprivation followed by reoxygenation to mimic ischemia-reperfusion injury. We estimated brain cell glucose utilization by measuring the uptake of [3 H] deoxy-d-glucose. Immunofluorescence and western blotting were done to characterize glucose transporters (GLUTs) and α7 nicotinic acetylcholine receptor (nAChR) expression. Furthermore, we used a glycolytic stress test to measure the effects of nicotine exposure on neuronal glucose metabolism. We observed that short- and long-term nicotine/cotinine exposure significantly decreased neuronal glucose utilization in ischemic conditions and the non-specific nAChR antagonist, mecamylamine reversed this effect. Nicotine/cotinine exposure also decreased neuronal GLUT1 and up-regulated α7 nAChR expression and decreased glycolysis. Exposure of mice to e-Cig vapor for 7 days likewise decreases brain glucose uptake under normoxic and ischemic conditions along with down-regulation of GLUT1 and GLUT3 expressions. These data support, from a cerebrovascular perspective, that nicotine and/or e-Cig vaping induce a state of glucose deprivation at the neurovascular unit which could lead to enhanced ischemic brain injury and/or stroke risk. OPEN PRACTICES: Open Science: This manuscript was awarded with the Open Materials Badge. For more information see: https://cos.io/our-services/open-science-badges/.",2018-10-18 +21223570,A platform for processing expression of short time series (PESTS).,"

Background

Time course microarray profiles examine the expression of genes over a time domain. They are necessary in order to determine the complete set of genes that are dynamically expressed under given conditions, and to determine the interaction between these genes. Because of cost and resource issues, most time series datasets contain less than 9 points and there are few tools available geared towards the analysis of this type of data.

Results

To this end, we introduce a platform for Processing Expression of Short Time Series (PESTS). It was designed with a focus on usability and interpretability of analyses for the researcher. As such, it implements several standard techniques for comparability as well as visualization functions. However, it is designed specifically for the unique methods we have developed for significance analysis, multiple test correction and clustering of short time series data. The central tenet of these methods is the use of biologically relevant features for analysis. Features summarize short gene expression profiles, inherently incorporate dependence across time, and allow for both full description of the examined curve and missing data points.

Conclusions

PESTS is fully generalizable to other types of time series analyses. PESTS implements novel methods as well as several standard techniques for comparability and visualization functions. These features and functionality make PESTS a valuable resource for a researcher's toolkit. PESTS is available to download for free to academic and non-profit users at http://www.mailman.columbia.edu/academic-departments/biostatistics/research-service/software-development.",2011-01-11 +25760404,BaAMPs: the database of biofilm-active antimicrobial peptides.,"Antimicrobial peptides (AMPs) are increasingly being considered as novel agents against biofilms. The development of AMP-based anti-biofilm strategies strongly relies on the design of sequences optimized to target specific features of sessile bacterial/fungal communities. Although several AMP databases have been created and successfully exploited for AMP design, all of these use data collected on peptides tested against planktonic microorganisms. Here, an open-access, manually curated database of AMPs specifically assayed against microbial biofilms (BaAMPs) is presented for the first time. In collecting relevant data from the literature an effort was made to define a minimal standard set of essential information including, for each AMP, the microbial species and biofilm conditions against which it was tested, and the specific assay and peptide concentration used. The availability of these data in an organized framework will benefit anti-biofilm research and support the design of novel molecules active against biofilm. BaAMPs is accessible at http://www.baamps.it.",2015-01-01 +30028513,"Structure of monomeric full-length ARC sheds light on molecular flexibility, protein interactions, and functional modalities.","The activity-regulated cytoskeleton-associated protein (ARC) is critical for long-term synaptic plasticity and memory formation. Acting as a protein interaction hub, ARC regulates diverse signalling events in postsynaptic neurons. A protein interaction site is present in the ARC C-terminal domain (CTD), a bilobar structure homologous to the retroviral Gag capsid domain. We hypothesized that detailed knowledge of the three-dimensional molecular structure of monomeric full-length ARC is crucial to understand its function; therefore, we set out to determine the structure of ARC to understand its various functional modalities. We purified recombinant ARC and analyzed its structure using small-angle X-ray scattering and synchrotron radiation circular dichroism spectroscopy. Monomeric full-length ARC has a compact, closed structure, in which the oppositely charged N-terminal domain (NTD) and CTD are juxtaposed, and the flexible linker between them is not extended. The modeled structure of ARC is supported by intramolecular live-cell Förster resonance energy transfer imaging in rat hippocampal slices. Peptides from several postsynaptic proteins, including stargazin, bind to the N-lobe, but not to the C-lobe, of the bilobar CTD. This interaction does not induce large-scale conformational changes in the CTD or flanking unfolded regions. The ARC NTD contains long helices, predicted to form an anti-parallel coiled coil; binding of ARC to phospholipid membranes requires the NTD. Our data support a role for the ARC NTD in oligomerization as well as lipid membrane binding. The findings have important implications for the structural organization of ARC with respect to distinct functions, such as postsynaptic signal transduction and virus-like capsid formation. Open Practices Open Science: This manuscript was awarded with the Open Materials Badge. For more information see: https://cos.io/our-services/open-science-badges/.",2018-09-26 +23193253,Library of Apicomplexan Metabolic Pathways: a manually curated database for metabolic pathways of apicomplexan parasites.,"The Library of Apicomplexan Metabolic Pathways (LAMP, http://www.llamp.net) is a web database that provides near complete mapping from genes to the central metabolic functions for some of the prominent intracellular parasites of the phylum Apicomplexa. This phylum includes the causative agents of malaria, toxoplasmosis and theileriosis-diseases with a huge economic and social impact. A number of apicomplexan genomes have been sequenced, but the accurate annotation of gene function remains challenging. We have adopted an approach called metabolic reconstruction, in which genes are systematically assigned to functions within pathways/networks for Toxoplasma gondii, Neospora caninum, Cryptosporidium and Theileria species, and Babesia bovis. Several functions missing from pathways have been identified, where the corresponding gene for an essential process appears to be absent from the current genome annotation. For each species, LAMP contains interactive diagrams of each pathway, hyperlinked to external resources and annotated with detailed information, including the sources of evidence used. We have also developed a section to highlight the overall metabolic capabilities of each species, such as the ability to synthesize or the dependence on the host for a particular metabolite. We expect this new database will become a valuable resource for fundamental and applied research on the Apicomplexa.",2012-11-27 +28396730,The incidence of MRSA infections in the United States: is a more comprehensive tracking system needed?,"A review of epidemiological studies on the incidence of MRSA infections overtime was performed along with an analysis of data available for download from Hospital Compare (https://data.medicare.gov/data/hospital-compare). We found the estimations of the incidence of MRSA infections varied widely depending upon the type of population studied, the types of infections captured and in the definitions and terminology used to describe the results. We could not find definitive evidence that the incidence of MRSA infections in U.S. community or facilities is decreasing significantly. Of concern are recent data reported to the National Healthcare Safety Network (NHSN) on MRSA bloodstream infections which indicate that by the end of 2015 there had been little change in the average facility Standardized Infection Ratio (0.988), compared to a 2010-2011 baseline and is significantly increased compared to the previous year. This is in contradistinction to the recent Veterans Administration study which reported over an 80% reduction in MRSA infections. However, this discrepancy may be due to the inability to reconcile the baselines of the two data sets; and the observed increase may be artifactual due to aberrations in the NHSN tracking system. Our review supports the need for implementation of a comprehensive tracking and monitoring system involving all types of healthcare facilities for multi-drug resistant organisms, along with concomitant funding for both staff and infrastructure. Without such a system, determining the effectiveness of interventions such as antibiotic stewardship and chlorhexidine bathing will be hindered.",2017-04-07 +22477438,eDom: norming software and relative meaning frequencies for 544 English homonyms.,"Words that are homonyms-that is, for which a single written and spoken form is associated with multiple, unrelated interpretations, such as COMPOUND, which can denote an < enclosure > or a < composite > meaning-are an invaluable class of items for studying word and discourse comprehension. When using homonyms as stimuli, it is critical to control for the relative frequencies of each interpretation, because this variable can drastically alter the empirical effects of homonymy. Currently, the standard method for estimating these frequencies is based on the classification of free associates generated for a homonym, but this approach is both assumption-laden and resource-demanding. Here, we outline an alternative norming methodology based on explicit ratings of the relative meaning frequencies of dictionary definitions. To evaluate this method, we collected and analyzed data in a norming study involving 544 English homonyms, using the eDom norming software that we developed for this purpose. Dictionary definitions were generally sufficient to exhaustively cover word meanings, and the methods converged on stable norms with fewer data and less effort on the part of the experimenter. The predictive validity of the norms was demonstrated in analyses of lexical decision data from the English Lexicon Project (Balota et al., Behavior Research Methods, 39, 445-459, 2007), and from Armstrong and Plaut (Proceedings of the 33rd Annual Meeting of the Cognitive Science Society, 2223-2228, 2011). On the basis of these results, our norming method obviates relying on the unsubstantiated assumptions involved in estimating relative meaning frequencies on the basis of classification of free associates. Additional details of the norming procedure, the meaning frequency norms, and the source code, standalone binaries, and user manual for the software are available at http://edom.cnbc.cmu.edu .",2012-12-01 +28583674,Three-dimensional data-tracking dynamic optimization simulations of human locomotion generated by direct collocation.,"The aim of this study was to perform full-body three-dimensional (3D) dynamic optimization simulations of human locomotion by driving a neuromusculoskeletal model toward in vivo measurements of body-segmental kinematics and ground reaction forces. Gait data were recorded from 5 healthy participants who walked at their preferred speeds and ran at 2m/s. Participant-specific data-tracking dynamic optimization solutions were generated for one stride cycle using direct collocation in tandem with an OpenSim-MATLAB interface. The body was represented as a 12-segment, 21-degree-of-freedom skeleton actuated by 66 muscle-tendon units. Foot-ground interaction was simulated using six contact spheres under each foot. The dynamic optimization problem was to find the set of muscle excitations needed to reproduce 3D measurements of body-segmental motions and ground reaction forces while minimizing the time integral of muscle activations squared. Direct collocation took on average 2.7±1.0h and 2.2±1.6h of CPU time, respectively, to solve the optimization problems for walking and running. Model-computed kinematics and foot-ground forces were in good agreement with corresponding experimental data while the calculated muscle excitation patterns were consistent with measured EMG activity. The results demonstrate the feasibility of implementing direct collocation on a detailed neuromusculoskeletal model with foot-ground contact to accurately and efficiently generate 3D data-tracking dynamic optimization simulations of human locomotion. The proposed method offers a viable tool for creating feasible initial guesses needed to perform predictive simulations of movement using dynamic optimization theory. The source code for implementing the model and computational algorithm may be downloaded at http://simtk.org/home/datatracking.",2017-05-19 +30423106,Topology independent structural matching discovers novel templates for protein interfaces.,"

Motivation

Protein-protein interactions (PPI) are essential for the function of the cellular machinery. The rapid growth of protein-protein complexes with known 3D structures offers a unique opportunity to study PPI to gain crucial insights into protein function and the causes of many diseases. In particular, it would be extremely useful to compare interaction surfaces of monomers, as this would enable the pinpointing of potential interaction surfaces based solely on the monomer structure, without the need to predict the complete complex structure. While there are many structural alignment algorithms for individual proteins, very few have been developed for protein interfaces, and none that can align only the interface residues to other interfaces or surfaces of interacting monomer subunits in a topology independent (non-sequential) manner.

Results

We present InterComp, a method for topology and sequence-order independent structural comparisons. The method is general and can be applied to various structural comparison applications. By representing residues as independent points in space rather than as a sequence of residues, InterComp can be applied to a wide range of problems including interface-surface comparisons and interface-interface comparisons. We demonstrate a use-case by applying InterComp to find similar protein interfaces on the surface of proteins. We show that InterComp pinpoints the correct interface for almost half of the targets (283 of 586) when considering the top 10 hits, and for 24% of the top 1, even when no templates can be found with regular sequence-order dependent structural alignment methods.

Availability and implementation

The source code and the datasets are available at: http://wallnerlab.org/InterComp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-09-01 +30256157,Infant Dietary Exposures to Environmental Chemicals and Infant/Child Health: A Critical Assessment of the Literature.,"

Background

The benefits of breastfeeding to the infant and mother have been well documented. It is also well known that breast milk contains environmental chemicals, and numerous epidemiological studies have explored relationships between background levels of chemicals in breast milk and health outcomes in infants and children.

Objectives

In this paper, we examine epidemiological literature to address the following question: Are infant exposures to background levels of environmental chemicals in breast milk and formula associated with adverse health effects? We critically review this literature a) to explore whether exposure-outcome associations are observed across studies, and b) to assess the literature quality.

Methods

We reviewed literature identified from electronic literature searches. We explored whether exposure-outcome associations are observed across studies by assessing the quality (using a modified version of a previously published quality assessment tool), consistency, and strengths and weaknesses in the literature. The epidemiological literature included cohorts from several countries and examined infants/children either once or multiple times over weeks to years. Health outcomes included four broad categories: growth and maturation, morbidity, biomarkers, and neurodevelopment.

Results

The available literature does not provide conclusive evidence of consistent or clinically relevant health consequences to infants exposed to environmental chemicals in breast milk at background levels.

Conclusions

It is clear that more research would better inform our understanding of the potential for health impacts from infant dietary exposures to environmental chemicals. A critical data gap is a lack of research on environmental chemicals in formula and infant/child health outcomes. https://doi.org/10.1289/EHP1954.",2018-09-01 +29688313,Automatic selection of verification tools for efficient analysis of biochemical models.,"Motivation:Formal verification is a computational approach that checks system correctness (in relation to a desired functionality). It has been widely used in engineering applications to verify that systems work correctly. Model checking, an algorithmic approach to verification, looks at whether a system model satisfies its requirements specification. This approach has been applied to a large number of models in systems and synthetic biology as well as in systems medicine. Model checking is, however, computationally very expensive, and is not scalable to large models and systems. Consequently, statistical model checking (SMC), which relaxes some of the constraints of model checking, has been introduced to address this drawback. Several SMC tools have been developed; however, the performance of each tool significantly varies according to the system model in question and the type of requirements being verified. This makes it hard to know, a priori, which one to use for a given model and requirement, as choosing the most efficient tool for any biological application requires a significant degree of computational expertise, not usually available in biology labs. The objective of this article is to introduce a method and provide a tool leading to the automatic selection of the most appropriate model checker for the system of interest. Results:We provide a system that can automatically predict the fastest model checking tool for a given biological model. Our results show that one can make predictions of high confidence, with over 90% accuracy. This implies significant performance gain in verification time and substantially reduces the 'usability barrier' enabling biologists to have access to this powerful computational technology. Availability and implementation:SMC Predictor tool is available at http://www.smcpredictor.com. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-09-01 +29991502,Remission of Spontaneous Canine Tumors after Systemic Cellular Viroimmunotherapy.,"Dogs with spontaneous tumors treated in veterinary hospitals offer an excellent opportunity for studying immunotherapies, including oncolytic viruses. Oncolytic viruses have advanced into the clinic as an intratumorally administered therapeutic; however, intravenous delivery has been hindered by neutralization in the blood. To circumvent this hurdle, mesenchymal stem cells have been used as a ""Trojan horse."" Here, we present the treatment of 27 canine patients with cancer with canine mesenchymal stem cells infected with ICOCAV17, a canine oncolytic adenovirus. No significant adverse effects were found. The response rate was 74%, with 14.8% showing complete responses, including total remissions of lung metastasis. We detected virus infection, stromal degeneration, and immune cell infiltration in tumor biopsies after 4 weeks of treatment. The increased presence of antiadenoviral antibodies in the peripheral blood of treated dogs did not appear to prevent the clinical benefit of this therapy. These data indicate that oncolytic viruses loaded in mesenchymal stem cells represent an effective cancer immunotherapy.Significance: The classical clinical limitations of antitumoral viroimmunotherapy can be overcome by use of mesenchymal stem cells.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/17/4891/F1.large.jpg Cancer Res; 78(17); 4891-901. ©2018 AACR.",2018-07-10 +25392412,The Addgene repository: an international nonprofit plasmid and data resource.,"The Addgene Repository (http://www.addgene.org) was founded to accelerate research and discovery by improving access to useful, high-quality research materials and information. The repository archives plasmids generated by scientists, conducts quality control, annotates the associated data and makes the plasmids and their data available to the scientific community. Plasmid associated data undergoes ongoing curation by members of the scientific community and by Addgene scientists. The growing database contains information on >31,000 unique plasmids spanning most experimental biological systems and organisms. The library includes a large number of plasmid tools for use in a wide variety of research areas, such as empty backbones, lentiviral resources, fluorescent protein vectors and genome engineering tools. The Addgene Repository database is always evolving with new plasmid deposits so it contains currently pertinent resources while ensuring the information on earlier deposits is still available. Custom search and browse features are available to access information on the diverse collection. Extensive educational materials and information are provided by the database curators to support the scientists that are accessing the repository's materials and data.",2014-11-11 +25926964,Capturing domain knowledge from multiple sources: the rare bone disorders use case.,"

Background

Lately, ontologies have become a fundamental building block in the process of formalising and storing complex biomedical information. The community-driven ontology curation process, however, ignores the possibility of multiple communities building, in parallel, conceptualisations of the same domain, and thus providing slightly different perspectives on the same knowledge. The individual nature of this effort leads to the need of a mechanism to enable us to create an overarching and comprehensive overview of the different perspectives on the domain knowledge.

Results

We introduce an approach that enables the loose integration of knowledge emerging from diverse sources under a single coherent interoperable resource. To accurately track the original knowledge statements, we record the provenance at very granular levels. We exemplify the approach in the rare bone disorders domain by proposing the Rare Bone Disorders Ontology (RBDO). Using RBDO, researchers are able to answer queries, such as: ""What phenotypes describe a particular disorder and are common to all sources?"" or to understand similarities between disorders based on divergent groupings (classifications) provided by the underlying sources.

Availability

RBDO is available at http://purl.org/skeletome/rbdo. In order to support lightweight query and integration, the knowledge captured by RBDO has also been made available as a SPARQL Endpoint at http://bio-lark.org/se_skeldys.html.",2015-04-17 +29972057,Engagement and higher order skill proficiency of students completing a medical physiology course in three diverse learning environments.,"The primary aim of this study was to determine whether levels of student engagement, higher order skill proficiency, and knowledge acquisition demonstrated by medical students would differ when completing the same course in three diverse learning environments. Following Institutional Review Board approval, 56 first-year medical students, registered at the same medical school but attending class at three different campus centers, were enrolled in the study. All participants were completing a medical physiology course that utilized the same learning objectives but relied on different faculty incorporating diverse methodologies (percentage of class devoted to active learning strategies), course format (6-wk block vs. 17-wk semester), and student attendance. Students completed a validated survey of student engagement (SSE), a proctored online problem-based assessment of higher order skill proficiency [Collegiate Learning Assessment (CLA+); http://cae.org ], and the National Board of Medical Examiners (NBME) Physiology subject exam. In this limited sample, results indicate no significant differences between campus sites for any of the variables assessed. Levels of engagement were lower than expected compared with published values for graduate students. Higher order skill proficiency assessed by CLA+ was significantly higher than values reported for college seniors nationally. Surprisingly, SSE offered no prediction of performance on CLA+ or NBME, as there were no significant correlations between variables. These data indicate that, although first-year medical students may not perceive themselves as highly engaged, they are adept in using higher order skills and excel in meeting course learning objectives, regardless of learning environment.",2018-09-01 +22535208,"PaxDb, a database of protein abundance averages across all three domains of life.","Although protein expression is regulated both temporally and spatially, most proteins have an intrinsic, ""typical"" range of functionally effective abundance levels. These extend from a few molecules per cell for signaling proteins, to millions of molecules for structural proteins. When addressing fundamental questions related to protein evolution, translation and folding, but also in routine laboratory work, a simple rough estimate of the average wild type abundance of each detectable protein in an organism is often desirable. Here, we introduce a meta-resource dedicated to integrating information on absolute protein abundance levels; we place particular emphasis on deep coverage, consistent post-processing and comparability across different organisms. Publicly available experimental data are mapped onto a common namespace and, in the case of tandem mass spectrometry data, re-processed using a standardized spectral counting pipeline. By aggregating and averaging over the various samples, conditions and cell-types, the resulting integrated data set achieves increased coverage and a high dynamic range. We score and rank each contributing, individual data set by assessing its consistency against externally provided protein-network information, and demonstrate that our weighted integration exhibits more consistency than the data sets individually. The current PaxDb-release 2.1 (at http://pax-db.org/) presents whole-organism data as well as tissue-resolved data, and covers 85,000 proteins in 12 model organisms. All values can be seamlessly compared across organisms via pre-computed orthology relationships.",2012-04-24 +23644394,"Tracking the relevance of the WHO Framework Convention on Tobacco Control in legislation and litigation through the online resource, Tobacco Control Laws.","The WHO Framework Convention on Tobacco Control is increasingly referenced and incorporated into the objectives, definitions and provisions of domestic legislation worldwide. It is also relied upon by courts in interpreting and upholding strong tobacco control measures challenged by the tobacco industry. In this special communication, we describe these trends and explore the important new online resource-Tobacco Control Laws (http://www.tobaccocontrollaws.org)--that has been used to track them.",2013-05-04 +28436466,Genome-wide profiling of heritable and de novo STR variations.,"Short tandem repeats (STRs) are highly variable elements that play a pivotal role in multiple genetic diseases, population genetics applications, and forensic casework. However, it has proven problematic to genotype STRs from high-throughput sequencing data. Here, we describe HipSTR, a novel haplotype-based method for robustly genotyping and phasing STRs from Illumina sequencing data, and we report a genome-wide analysis and validation of de novo STR mutations. HipSTR is freely available at https://hipstr-tool.github.io/HipSTR.",2017-04-24 +29196969,Rapid and reliable protein structure determination via chemical shift threading.,"Protein structure determination using nuclear magnetic resonance (NMR) spectroscopy can be both time-consuming and labor intensive. Here we demonstrate how chemical shift threading can permit rapid, robust, and accurate protein structure determination using only chemical shift data. Threading is a relatively old bioinformatics technique that uses a combination of sequence information and predicted (or experimentally acquired) low-resolution structural data to generate high-resolution 3D protein structures. The key motivations behind using NMR chemical shifts for protein threading lie in the fact that they are easy to measure, they are available prior to 3D structure determination, and they contain vital structural information. The method we have developed uses not only sequence and chemical shift similarity but also chemical shift-derived secondary structure, shift-derived super-secondary structure, and shift-derived accessible surface area to generate a high quality protein structure regardless of the sequence similarity (or lack thereof) to a known structure already in the PDB. The method (called E-Thrifty) was found to be very fast (often < 10 min/structure) and to significantly outperform other shift-based or threading-based structure determination methods (in terms of top template model accuracy)-with an average TM-score performance of 0.68 (vs. 0.50-0.62 for other methods). Coupled with recent developments in chemical shift refinement, these results suggest that protein structure determination, using only NMR chemical shifts, is becoming increasingly practical and reliable. E-Thrifty is available as a web server at http://ethrifty.ca .",2017-12-01 +26013919,NIG_MoG: a mouse genome navigator for exploring intersubspecific genetic polymorphisms.,"The National Institute of Genetics Mouse Genome database (NIG_MoG; http://molossinus.lab.nig.ac.jp/msmdb/) primarily comprises the whole-genome sequence data of two inbred mouse strains, MSM/Ms and JF1/Ms. These strains were established at NIG and originated from the Japanese subspecies Mus musculus molossinus. NIG_MoG provides visualized genome polymorphism information, browsing single-nucleotide polymorphisms and short insertions and deletions in the genomes of MSM/Ms and JF1/Ms with respect to C57BL/6J (whose genome is predominantly derived from the West European subspecies M. m. domesticus). This allows users, especially wet-lab biologists, to intuitively recognize intersubspecific genome divergence in these mouse strains using visual data. The database also supports the in silico screening of bacterial artificial chromosome (BAC) clones that contain genomic DNA from MSM/Ms and the standard classical laboratory strain C57BL/6N. NIG_MoG is thus a valuable navigator for exploring mouse genome polymorphisms and BAC clones that are useful for studies of gene function and regulation based on intersubspecific genome divergence.",2015-05-27 +28188908,HIVE-heptagon: A sensible variant-calling algorithm with post-alignment quality controls.,"Advances in high-throughput sequencing (HTS) technologies have greatly increased the availability of genomic data and potential discovery of clinically significant genomic variants. However, numerous issues still exist with the analysis of these data, including data complexity, the absence of formally agreed upon best practices, and inconsistent reproducibility. Toward a more robust and reproducible variant-calling paradigm, we propose a series of selective noise filtrations and post-alignment quality control (QC) techniques that may reduce the rate of false variant calls. We have implemented both novel and refined post-alignment QC mechanisms to augment existing pre-alignment QC measures. These techniques can be used independently or in combination to identify and correct issues caused during data generation or early analysis stages. The adoption of these procedures by the broader scientific community is expected to improve the identification of clinically significant variants both in terms of computational efficiency and in the confidence of the results.

Availability

https://hive.biochemistry.gwu.edu/.",2017-02-08 +22140107,"SNPedia: a wiki supporting personal genome annotation, interpretation and analysis.","SNPedia (http://www.SNPedia.com) is a wiki resource of the functional consequences of human genetic variation as published in peer-reviewed studies. Online since 2006 and freely available for personal use, SNPedia has focused on the medical, phenotypic and genealogical associations of single nucleotide polymorphisms. Entries are formatted to allow associations to be assigned to single genotypes as well as sets of genotypes (genosets). In this article, we discuss the growth of this resource and its use by affiliated software to create personal genome reports.",2011-12-02 +30578582,Sex differences in the circulatory responses to an isocapnic cold pressor test.,"

New findings

What is the central question of this study? Do sex differences exist in the cardiorespiratory responses to an isocapnic cold pressor test (CPT)? What is the main finding and its importance? During the CPT, there were no sex differences in the respiratory response; however, females demonstrated a reduced mean arterial pressure and reduced dilatation of the common carotid artery. Given that the CPT is predictive of future cardiovascular events, these data have clinical implications for improving the utility of the CPT to determine cardiovascular health risk. Sex differences should be taken into consideration when conducting and interpreting a CPT.

Abstract

The cold pressor test (CPT) elicits a transient increase in sympathetic nervous activity, minute ventilation ( V ̇ E ), mean arterial pressure (MAP) and common carotid artery (CCA) diameter in healthy individuals. Although the extent of dilatation of the CCA in response to the CPT has been used as a clinical indicator of cardiovascular health status, the potential sex differences have yet to be explored. In response to a CPT, we hypothesized that elevations in V ̇ E and MAP and dilatation of the CCA would be attenuated in females compared with males. In 20 young, healthy participants (10 females), we measured the respiratory, cardiovascular and CCA responses during a CPT, which consisted of a 3 min immersion of the right foot into 0-1 ice water. Blood pressure (via finger photoplethysmography), heart rate (via electrocardiogram) and CCA diameter and velocity (via Duplex ultrasound) were simultaneously recorded immediately before and during the CPT. During the CPT, while controlling end-tidal gases to baseline values, the main findings were as follows: (i) no sex differences were present in absolute or relative changes in V ̇ E (P = 0.801 and P = 0.179, respectively); (ii) the relative MAP and CCA diameter response were reduced in females by 51 and 55%, respectively (P = 0.008 and P = 0.029 versus males, respectively); and (iii) the relative MAP responses was positively correlated with the dilatation of the CCA in males (r = 0.42, P = 0.019), in females (r = 0.43, P = 0.019) and in males and females combined (r = 0.55, P < 0.001). Given that the CPT is used as a clinical tool to assess cardiovascular health status, sex differences should be considered in future studies.",2019-01-22 +27153688,A simple method to control over-alignment in the MAFFT multiple sequence alignment program.,"

Motivation

We present a new feature of the MAFFT multiple alignment program for suppressing over-alignment (aligning unrelated segments). Conventional MAFFT is highly sensitive in aligning conserved regions in remote homologs, but the risk of over-alignment is recently becoming greater, as low-quality or noisy sequences are increasing in protein sequence databases, due, for example, to sequencing errors and difficulty in gene prediction.

Results

The proposed method utilizes a variable scoring matrix for different pairs of sequences (or groups) in a single multiple sequence alignment, based on the global similarity of each pair. This method significantly increases the correctly gapped sites in real examples and in simulations under various conditions. Regarding sensitivity, the effect of the proposed method is slightly negative in real protein-based benchmarks, and mostly neutral in simulation-based benchmarks. This approach is based on natural biological reasoning and should be compatible with many methods based on dynamic programming for multiple sequence alignment.

Availability and implementation

The new feature is available in MAFFT versions 7.263 and higher. http://mafft.cbrc.jp/alignment/software/

Contact

katoh@ifrec.osaka-u.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-26 +26013810,PatternQuery: web application for fast detection of biomacromolecular structural patterns in the entire Protein Data Bank.,"Well defined biomacromolecular patterns such as binding sites, catalytic sites, specific protein or nucleic acid sequences, etc. precisely modulate many important biological phenomena. We introduce PatternQuery, a web-based application designed for detection and fast extraction of such patterns. The application uses a unique query language with Python-like syntax to define the patterns that will be extracted from datasets provided by the user, or from the entire Protein Data Bank (PDB). Moreover, the database-wide search can be restricted using a variety of criteria, such as PDB ID, resolution, and organism of origin, to provide only relevant data. The extraction generally takes a few seconds for several hundreds of entries, up to approximately one hour for the whole PDB. The detected patterns are made available for download to enable further processing, as well as presented in a clear tabular and graphical form directly in the browser. The unique design of the language and the provided service could pave the way towards novel PDB-wide analyses, which were either difficult or unfeasible in the past. The application is available free of charge at http://ncbr.muni.cz/PatternQuery.",2015-05-26 +28361687,PINTnet: construction of condition-specific pathway interaction network by computing shortest paths on weighted PPI.,"

Background

Identifying perturbed pathways in a given condition is crucial in understanding biological phenomena. In addition to identifying perturbed pathways individually, pathway analysis should consider interactions among pathways. Currently available pathway interaction prediction methods are based on the existence of overlapping genes between pathways, protein-protein interaction (PPI) or functional similarities. However, these approaches just consider the pathways as a set of genes, thus they do not take account of topological features. In addition, most of the existing approaches do not handle the explicit gene expression quantity information that is routinely measured by RNA-sequecing.

Results

To overcome these technical issues, we developed a new pathway interaction network construction method using PPI, closeness centrality and shortest paths. We tested our approach on three different high-throughput RNA-seq data sets: pregnant mice data to reveal the role of serotonin on beta cell mass, bone-metastatic breast cancer data and autoimmune thyroiditis data to study the role of IFN- α. Our approach successfully identified the pathways reported in the original papers. For the pathways that are not directly mentioned in the original papers, we were able to find evidences of pathway interactions by the literature search. Our method outperformed two existing approaches, overlapping gene-based approach (OGB) and protein-protein interaction-based approach (PB), in experiments with the three data sets.

Conclusion

Our results show that PINTnet successfully identified condition-specific perturbed pathways and the interactions between the pathways. We believe that our method will be very useful in characterizing biological mechanisms at the pathway level. PINTnet is available at http://biohealth.snu.ac.kr/software/PINTnet/ .",2017-03-14 +29920424,Loop modelling 1.0.,"Engineering surface loops is a sub-topic of protein engineering that is used routinely in many research fields in academia and industry alike. We provide some tools that search in the PDB for loops satisfying a wide variety of constraints. We illustrate the usefulness of these tools by applying them to a series of recently published studies that included loop engineering or loop modelling. LoopFinder finds loops that fit between two anchor stretches of typically 2, 3, or 4 amino acids each. ProDA find loops of a given length with predefined secondary structure, residue types, hydrophobicity, etc. WHAT IF has gotten a series of new options to scan the whole PDB for loops combining the LoopFinder and ProDA techniques. The open nature of these tools will allow bioinformaticians in this field to easily design their own loop modelling software around our tools.

Availability and implementation

LoopFinder is a stand-alone Fortran program that is likely to compile and run on every computer. The LoopFinder source code, data files, and documentation are freely available from swift.cmbi.ru.nl/gv/loops/. ProDA is free to all users. There is no login requirement. It is available at: http://bioinf.modares.ac.ir/software/linda/. WHAT IF is shareware that is available from https://swift.cmbi.ru.nl/whatif/.",2018-06-07 +25324176,SraTailor: graphical user interface software for processing and visualizing ChIP-seq data.,"Raw data from ChIP-seq (chromatin immunoprecipitation combined with massively parallel DNA sequencing) experiments are deposited in public databases as SRAs (Sequence Read Archives) that are publically available to all researchers. However, to graphically visualize ChIP-seq data of interest, the corresponding SRAs must be downloaded and converted into BigWig format, a process that involves complicated command-line processing. This task requires users to possess skill with script languages and sequence data processing, a requirement that prevents a wide range of biologists from exploiting SRAs. To address these challenges, we developed SraTailor, a GUI (Graphical User Interface) software package that automatically converts an SRA into a BigWig-formatted file. Simplicity of use is one of the most notable features of SraTailor: entering an accession number of an SRA and clicking the mouse are the only steps required to obtain BigWig-formatted files and to graphically visualize the extents of reads at given loci. SraTailor is also able to make peak calls, generate files of other formats, process users' own data, and accept various command-line-like options. Therefore, this software makes ChIP-seq data fully exploitable by a wide range of biologists. SraTailor is freely available at http://www.devbio.med.kyushu-u.ac.jp/sra_tailor/, and runs on both Mac and Windows machines.",2014-10-17 +26862144,CrAgDb--a database of annotated chaperone repertoire in archaeal genomes. ,"Chaperones are a diverse class of ubiquitous proteins that assist other cellular proteins in folding correctly and maintaining their native structure. Many different chaperones cooperate to constitute the 'proteostasis' machinery in the cells. It has been proposed earlier that archaeal organisms could be ideal model systems for deciphering the basic functioning of the 'protein folding machinery' in higher eukaryotes. Several chaperone families have been characterized in archaea over the years but mostly one protein at a time, making it difficult to decipher the composition and mechanistics of the protein folding system as a whole. In order to deal with these lacunae, we have developed a database of all archaeal chaperone proteins, CrAgDb (Chaperone repertoire in Archaeal genomes). The data have been presented in a systematic way with intuitive browse and search facilities for easy retrieval of information. Access to these curated datasets should expedite large-scale analysis of archaeal chaperone networks and significantly advance our understanding of operation and regulation of the protein folding machinery in archaea. Researchers could then translate this knowledge to comprehend the more complex protein folding pathways in eukaryotic systems. The database is freely available at http://14.139.227.92/mkumar/cragdb/.",2016-02-08 +23275950,[Evolution and infection biology of hemolytic-uremic syndrome (HUS) associated E. coli (HUSEC)].,"Shiga toxin (Stx)-producing Escherichia coli (STEC), which cause hemolytic-uremic syndrome (HUS), are designated as HUSEC. Their exceptional genome variability driven by evolutionary diversification permits fast adaptation to changed environmental conditions. The HUSEC collection (http://www.ehec.org), which has been established at the Institute for Hygiene in Münster, contains 42 EHEC reference strains (HUSEC001-HUSEC042). It represents a unique repository collection of pathogens and is extremely helpful for the analysis of evolutionary changes and fixed properties in the STEC that cause the most severe host injury. Such genomic attributes include slowly evolving loci, mobile genetic elements that often encode virulence factors and are assimilated via horizontal gene transfer. Current evolutionary models indicate that numerous outbreak strains evolved recently and that highly pathogenic HUSEC descend from less pathogenic progenitors. However, additional data suggest that HUSEC have small effective population sizes. The HUSEC collection is also a valuable resource with which to study important non-Shiga toxin virulence factors.",2013-01-01 +28981423,PREMER: A Tool to Infer Biological Networks.,"Inferring the structure of unknown cellular networks is a main challenge in computational biology. Data-driven approaches based on information theory can determine the existence of interactions among network nodes automatically. However, the elucidation of certain features-such as distinguishing between direct and indirect interactions or determining the direction of a causal link-requires estimating information-theoretic quantities in a multidimensional space. This can be a computationally demanding task, which acts as a bottleneck for the application of elaborate algorithms to large-scale network inference problems. The computational cost of such calculations can be alleviated by the use of compiled programs and parallelization. To this end, we have developed PREMER (Parallel Reverse Engineering with Mutual information & Entropy Reduction), a software toolbox that can run in parallel and sequential environments. It uses information theoretic criteria to recover network topology and determine the strength and causality of interactions, and allows incorporating prior knowledge, imputing missing data, and correcting outliers. PREMER is a free, open source software tool that does not require any commercial software. Its core algorithms are programmed in FORTRAN 90 and implement OpenMP directives. It has user interfaces in Python and MATLAB/Octave, and runs on Windows, Linux, and OSX (https://sites.google.com/site/premertoolbox/).",2017-10-04 +29274973,Short communication: Signs of host genetic regulation in the microbiome composition in 2 dairy breeds: Holstein and Brown Swiss.,"This study aimed to evaluate whether the host genotype exerts any genetic control on the microbiome composition of the rumen in cattle. Microbial DNA was extracted from 18 samples of ruminal content from 2 breeds (Holstein and Brown Swiss). Reads were processed using mothur (https://www.mothur.org/) in 16S and 18S rRNA gene-based analyses. Then, reads were classified at the genus clade, resulting in 3,579 operational taxonomic units (OTU) aligned against the 16S database and 184 OTU aligned against the 18S database. After filtering on relative abundance (>0.1%) and penetrance (95%), 25 OTU were selected for the analyses (17 bacteria, 1 archaea, and 7 ciliates). Association with the genetic background of the host animal based on the principal components of a genomic relationship matrix based on single nucleotide polymorphism markers was analyzed using Bayesian methods. Fifty percent of the bacteria and archaea genera were associated with the host genetic background, including Butyrivibrio, Prevotella, Paraprevotella, and Methanobrevibacter as main genera. Forty-three percent of the ciliates analyzed were also associated with the genetic background of the host. In total, 48% of microbes were associated with the host genetic background. The results in this study support the hypothesis and provide some evidence that there exists a host genetic component in cattle that can partially regulate the composition of the microbiome.",2017-12-21 +29342451,Decadal Changes in the Edible Supply of Seafood and Methylmercury Exposure in the United States.,"BACKGROUND:Methylmercury (MeHg) exposure is associated with adverse effects on neurodevelopment and cardiovascular health. Previous work indicates most MeHg is from marine fish sold in the commercial market, but does not fully resolve supply regions globally. This information is critical for linking changes in environmental MeHg levels to human exposure in the U.S. population. OBJECTIVES:We used available data to estimate the geographic origins of seafood consumed in the United States (major ocean basins, coastal fisheries, aquaculture, freshwater) and how shifts in edible supply affected MeHg exposures between 2000-2002 and 2010-2012. METHODS:Source regions for edible seafood and MeHg exposure in the United States were characterized from national and international landing, export and import data from the Food and Agricultural Organization of the United Nations and the U.S. National Marine Fisheries Service. RESULTS:Our analysis suggests 37% of U.S. population-wide MeHg exposure is from mainly domestic coastal systems and 45% from open ocean ecosystems. We estimate that the Pacific Ocean alone supplies more than half of total MeHg exposure. Aquaculture and freshwater fisheries together account for an estimated 18% of total MeHg intake. Shifts in seafood types and supply regions between 2000-2002 and 2010-2012 reflect changes in consumer preferences (e.g., away from canned light meat tuna), global ecosystem shifts (e.g., northern migration of cod stocks), and increasing supply from aquaculture (e.g., shrimp and salmon). CONCLUSION:Our findings indicate global actions that reduce anthropogenic Hg emissions will be beneficial for U.S. seafood consumers because open ocean ecosystems supply a large fraction of their MeHg exposure. However, our estimates suggest that domestic actions can provide the greatest benefit for coastal seafood consumers. https://doi.org/10.1289/EHP2644.",2018-01-16 +,Global surface reflectance products from Landsat: Assessment using coincident MODIS observations,"Global, long-term monitoring of changes in Earth's land surface requires quantitative comparisons of satellite images acquired under widely varying atmospheric conditions. Although physically based estimates of surface reflectance (SR) ultimately provide the most accurate representation of Earth's surface properties, there has never been a globally consistent SR dataset at the spatial resolution (b1 ha) or temporal extent (~40 years) of the Landsat mission. To increase the consistency and robustness of Landsat-based land cover monitoring, we atmospherically corrected the Global Land Survey (GLS) Landsat dataset using the Landsat Ecosystem Disturbance Adaptive Processing System (LEDAPS) implementation of the Second Simulation of the Satellite Signal in the Solar Spectrum(6S) radiative transfer model. The GLS provides synoptic, orthorectified, cloud-free Landsat coverage of Earth's land area in four nominal epochs (1975, 1990, 2000, and 2005). This paper presents the resulting GLS surface reflectance dataset and a global assessment of the 2000- and 2005-epoch data against coincident Moderate Resolution Imaging Spectroradiometer (MODIS) daily SR and Normalized Bidirectional Distribution Function-Adjusted Reflectance (NBAR) measurements. Agreement with respect to MODIS SR and NBAR data is very high, with overall discrepancies (Root-Mean-Squared Deviation (RMSD)) between 1.3 and 2.8 percent reflectance for Landsat-7 Enhanced ThematicMapper Plus (ETM+) and between 2.2 and 3.5 percent reflectance for Landsat-5 Thematic Mapper (TM). The resulting Landsat surface reflectance dataset and the associated qualitymetrics for each image are hosted on the Global Land Cover Facilityweb site for free download http://www.landcover.org/data/gls_SR). This new repository will provide consistent, calibrated, multi-decadal image data for robust land cover change detection and monitoring across the Earth sciences.",2013-07-01 +23161690,The TP53 website: an integrative resource centre for the TP53 mutation database and TP53 mutant analysis.,"A novel resource centre for TP53 mutations and mutants has been developed (http://p53.fr). TP53 gene dysfunction can be found in the majority of human cancer types. The potential use of TP53 mutation as a biomarker for clinical studies or exposome analysis has led to the publication of thousands of reports describing the TP53 gene status in >10,000 tumours. The UMD TP53 mutation database was created in 1990 and has been regularly updated. The 2012 release of the database has been carefully curated, and all suspicious reports have been eliminated. It is available either as a flat file that can be easily manipulated or as novel multi-platform analytical software that has been designed to analyse various aspects of TP53 mutations. Several tools to ascertain TP53 mutations are also available for download. We have developed TP53MULTLoad, a manually curated database providing comprehensive details on the properties of 2549 missense TP53 mutants. More than 100,000 entries have been arranged in 39 different activity fields, such as change of transactivation on various promoters, apoptosis or growth arrest. For several hot spot mutants, multiple gain of function activities are also included. The database can be easily browsed via a graphical user interface.",2012-11-17 +22080559,"The Aspergillus Genome Database (AspGD): recent developments in comprehensive multispecies curation, comparative genomics and community resources.","The Aspergillus Genome Database (AspGD; http://www.aspgd.org) is a freely available, web-based resource for researchers studying fungi of the genus Aspergillus, which includes organisms of clinical, agricultural and industrial importance. AspGD curators have now completed comprehensive review of the entire published literature about Aspergillus nidulans and Aspergillus fumigatus, and this annotation is provided with streamlined, ortholog-based navigation of the multispecies information. AspGD facilitates comparative genomics by providing a full-featured genomics viewer, as well as matched and standardized sets of genomic information for the sequenced aspergilli. AspGD also provides resources to foster interaction and dissemination of community information and resources. We welcome and encourage feedback at aspergillus-curator@lists.stanford.edu.",2011-11-12 +27153591,DUDes: a top-down taxonomic profiler for metagenomics.,"

Motivation

Species identification and quantification are common tasks in metagenomics and pathogen detection studies. The most recent techniques are built on mapping the sequenced reads against a reference database (e.g. whole genomes, marker genes, proteins) followed by application-dependent analysis steps. Although these methods have been proven to be useful in many scenarios, there is still room for improvement in species and strain level detection, mainly for low abundant organisms.

Results

We propose a new method: DUDes, a reference-based taxonomic profiler that introduces a novel top-down approach to analyze metagenomic Next-generation sequencing (NGS) samples. Rather than predicting an organism presence in the sample based only on relative abundances, DUDes first identifies possible candidates by comparing the strength of the read mapping in each node of the taxonomic tree in an iterative manner. Instead of using the lowest common ancestor we propose a new approach: the deepest uncommon descendent. We showed in experiments that DUDes works for single and multiple organisms and can identify low abundant taxonomic groups with high precision.

Availability and implementation

DUDes is open source and it is available at http://sf.net/p/dudes

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

renardB@rki.de.",2016-03-24 +27942560,"Dataset for analysing the relationships among economic growth, fossil fuel and non-fossil fuel consumption.","The data presented in this article are related to the research article entitled 'Economic Growth, Fossil Fuel and Non-Fossil Consumption: A Pooled Mean Group Analysis using Proxies for Capital' (J. Asafu-Adjaye, D. Byrne, M. Alvarez, 2016) [1]. This article describes data modified from three publicly available data sources: the World Bank׳s World Development Indicators (http://databank.worldbank.org/data/reports.aspx?source=world-development-indicators), the U.S. Energy Information Administration׳s International Energy Statistics (http://www.eia.gov/cfapps/ipdbproject/IEDIndex3.cfm?tid=44&pid=44&aid=2) and the Barro-Lee Educational Attainment Dataset (http://www.barrolee.com). These data can be used to examine the relationships between economic growth and different forms of energy consumption. The dataset is made publicly available to promote further analyses.",2016-11-26 +28453677,AFS: identification and quantification of species composition by metagenomic sequencing.,"

Summary

DNA-based methods to detect and quantify taxon composition in biological materials are often based on species-specific polymerase chain reaction, limited to detecting species targeted by the assay. Next-generation sequencing overcomes this drawback by untargeted shotgun sequencing of whole metagenomes at affordable cost. Here we present AFS, a software pipeline for quantification of species composition in food. AFS uses metagenomic shotgun sequencing and sequence read counting to infer species proportions. Using Illumina data from a reference sausage comprising four species, we reveal that AFS is independent of the sequencing assay and library preparation protocol. Cost-saving short (50-bp) single-end reads and Nextera ® library preparation yield reliable results.

Availability and implementation

Datasets, binaries and usage instructions are available under http://all-food-seq.sourceforge.net. Raw data is available at NCBI's SRA with accession number PRJNA271645.

Contact

hankeln@uni-mainz.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +28706299,Findings of a 1303 Korean whole-exome sequencing study.,"Ethnically specific data on genetic variation are crucial for understanding human biology and for clinical interpretation of variant pathogenicity. We analyzed data obtained by deep sequencing 1303 Korean whole exomes; the data were generated by three independent whole exome sequencing projects (named the KOEX study). The primary focus of this study was to comprehensively analyze the variant statistics, investigate secondary findings that may have clinical actionability, and identify loci that should be cautiously interpreted for pathogenicity. A total of 495 729 unique variants were identified at exonic regions, including 169 380 nonsynonymous variants and 4356 frameshift insertion/deletions. Among these, 76 607 were novel coding variants. On average, each individual had 7136 nonsynonymous single-nucleotide variants and 74 frameshift insertion/deletions. We classified 13 pathogenic and 13 likely pathogenic variants in 56 genes that may have clinical actionability according to the guidelines of the American College of Medical Genetics and Genomics, and the Association for Molecular Pathology. The carrier frequency of these 26 variants was 2.46% (95% confidence interval 1.73-3.46). To identify loci that require cautious interpretation in clinical sequencing, we identified 18 genes that are prone to sequencing errors, and 671 genes that are highly polymorphic and carry excess nonsynonymous variants. The catalog of identified variants, its annotation and frequency information are publicly available (http://koex.snu.ac.kr). These findings should be useful resources for investigating ethnically specific characteristics in human health and disease.",2017-07-14 +29273515,Nasal vs Oronasal CPAP for OSA Treatment: A Meta-Analysis.,"BACKGROUND:Nasal CPAP is the ""gold standard"" treatment for OSA. However, oronasal masks are frequently used in clinical practice. The aim of this study was to perform a meta-analysis of all randomized and nonrandomized trials that compared nasal vs oronasal masks on CPAP level, residual apnea-hypopnea index (AHI), and CPAP adherence to treat OSA. METHODS:The Cochrane Central Register of Controlled Trials, Medline, and Web of Science were searched for relevant studies in any language with the following terms: ""sleep apnea"" and ""CPAP"" or ""sleep apnea"" and ""oronasal mask"" or ""OSA"" and ""oronasal CPAP"" or ""oronasal mask"" and ""adherence."" Studies on CPAP treatment for OSA were included, based on the following criteria: (1) original article; (2) randomized or nonrandomized trials; and (3) comparison between nasal and oronasal CPAP including pressure level, and/or residual AHI, and/or CPAP adherence. RESULTS:We identified five randomized and eight nonrandomized trials (4,563 patients) that reported CPAP level and/or residual AHI and/or CPAP adherence. Overall, the random-effects meta-analysis revealed that as compared with nasal, oronasal masks were associated with a significantly higher CPAP level (Hedges' g, -0.59; 95% CI, -0.82 to -0.37; P < .001) (on average, +1.5 cm H2O), higher residual AHI (Hedges' g, -0.34; 95% CI, -0.52 to -0.17; P < .001) (+2.8 events/h), and a poorer adherence (Hedges' g, 0.50; 95% CI, 0.21-0.79; P = .001) (-48 min/night). CONCLUSIONS:Oronasal masks are associated with a higher CPAP level, higher residual AHI, and poorer adherence than nasal masks. TRIAL REGISTRY:PROSPERO database; No.: CRD42017064584; URL: https://www.crd.york.ac.uk/prospero/.",2017-12-19 +25405079,Identification of rare alternative splicing events in MS/MS data reveals a significant fraction of alternative translation initiation sites.,"Integration of transcriptome data is a crucial step for the identification of rare protein variants in mass-spectrometry (MS) data with important consequences for all branches of biotechnology research. Here, we used Splooce, a database of splicing variants recently developed by us, to search MS data derived from a variety of human tumor cell lines. More than 800 new protein variants were identified whose corresponding MS spectra were specific to protein entries from Splooce. Although the types of splicing variants (exon skipping, alternative splice sites and intron retention) were found at the same frequency as in the transcriptome, we observed a large variety of modifications at the protein level induced by alternative splicing events. Surprisingly, we found that 40% of all protein modifications induced by alternative splicing led to the use of alternative translation initiation sites. Other modifications include frameshifts in the open reading frame and inclusion or deletion of peptide sequences. To make the dataset generated here available to the community in a more effective form, the Splooce portal (http://www.bioinformatics-brazil.org/splooce) was modified to report the alternative splicing events supported by MS data.",2014-11-13 +25125445,PlantCAZyme: a database for plant carbohydrate-active enzymes. ,"PlantCAZyme is a database built upon dbCAN (database for automated carbohydrate active enzyme annotation), aiming to provide pre-computed sequence and annotation data of carbohydrate active enzymes (CAZymes) to plant carbohydrate and bioenergy research communities. The current version contains data of 43,790 CAZymes of 159 protein families from 35 plants (including angiosperms, gymnosperms, lycophyte and bryophyte mosses) and chlorophyte algae with fully sequenced genomes. Useful features of the database include: (i) a BLAST server and a HMMER server that allow users to search against our pre-computed sequence data for annotation purpose, (ii) a download page to allow batch downloading data of a specific CAZyme family or species and (iii) protein browse pages to provide an easy access to the most comprehensive sequence and annotation data. http://cys.bios.niu.edu/plantcazyme/",2014-08-14 +28358118,CombiROC: an interactive web tool for selecting accurate marker combinations of omics data.,"Diagnostic accuracy can be improved considerably by combining multiple markers, whose performance in identifying diseased subjects is usually assessed via receiver operating characteristic (ROC) curves. The selection of multimarker signatures is a complicated process that requires integration of data signatures with sophisticated statistical methods. We developed a user-friendly tool, called CombiROC, to help researchers accurately determine optimal markers combinations from diverse omics methods. With CombiROC data from different domains, such as proteomics and transcriptomics, can be analyzed using sensitivity/specificity filters: the number of candidate marker panels rising from combinatorial analysis is easily optimized bypassing limitations imposed by the nature of different experimental approaches. Leaving to the user full control on initial selection stringency, CombiROC computes sensitivity and specificity for all markers combinations, performances of best combinations and ROC curves for automatic comparisons, all visualized in a graphic interface. CombiROC was designed without hard-coded thresholds, allowing a custom fit to each specific data: this dramatically reduces the computational burden and lowers the false negative rates given by fixed thresholds. The application was validated with published data, confirming the marker combination already originally described or even finding new ones. CombiROC is a novel tool for the scientific community freely available at http://CombiROC.eu.",2017-03-30 +28481978,Application of the cghRA framework to the genomic characterization of Diffuse Large B-Cell Lymphoma.,"

Motivation

Although sequencing-based technologies are becoming the new reference in genome analysis, comparative genomic hybridization arrays (aCGH) still constitute a simple and reliable approach for copy number analysis. The most powerful algorithms to analyze such data have been freely provided by the scientific community for many years, but combining them is a complex scripting task.

Results

The cghRA framework combines a user-friendly graphical interface and a powerful object-oriented command-line interface to handle a full aCGH analysis, as is illustrated in an original series of 107 Diffuse Large B-Cell Lymphomas. New algorithms for copy-number calling, polymorphism detection and minimal common region prioritization were also developed and validated. While their performances will only be demonstrated with aCGH, these algorithms could actually prove useful to any copy-number analysis, whatever the technique used.

Availability and implementation

R package and source for Linux, MS Windows and MacOS are freely available at http://bioinformatics.ovsa.fr/cghRA.

Contact

mareschal@ovsa.fr or fabrice.jardin@chb.unicancer.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +31505679,"A Survey Instrument to Assess Intake of Dietary Supplements, Related Products, and Caffeine in High-Use Populations.","Substantial data on the use of dietary supplements by the general adult population are available, but many population subgroups have not been extensively studied. Because military service members and young people consume large amounts of dietary supplements, including for enhancement of physical performance, weight control, and bodybuilding, which can be dangerous, we developed a comprehensive questionnaire to characterize patterns of supplement use in these and other populations. The questionnaire has been used to study >7000 military service members and 1000 college students. This supplement article presents a detailed description of the questionnaire, which contains comprehensive questions on demographic characteristics, exercise habits, attitudes with regard to dietary supplements, and amount of money spent on supplements. Intakes of specific dietary supplements and caffeine, frequency of use, and reasons for use are assessed. The questionnaire was designed for studying dietary supplement and caffeine intake patterns with the use of paper-and-pencil administration to military populations and was modified for use with college students and for computer and Web administration. It is available online at https://go.usa.gov/xn9FP and in the Supplemental File for this publication. It can be used to study other populations if minor modifications are made. The online version of the questionnairewill be updated periodically as newversions become available. In conclusion, a validated, detailed, noncopyrighted questionnaire designed to assess the use of dietary supplements, energy drinks (and related products), and caffeine is available for use in diverse populations. The format of the questionnaire is adaptable to computer administration and scoring, and it can be customized for specific subpopulations, locations, and product categories including updates that reflect changes in the availability of supplements or availability of new products.",2018-08-01 +27899279,WormBase ParaSite - a comprehensive resource for helminth genomics.,"The number of publicly available parasitic worm genome sequences has increased dramatically in the past three years, and research interest in helminth functional genomics is now quickly gathering pace in response to the foundation that has been laid by these collective efforts. A systematic approach to the organisation, curation, analysis and presentation of these data is clearly vital for maximising the utility of these data to researchers. We have developed a portal called WormBase ParaSite (http://parasite.wormbase.org) for interrogating helminth genomes on a large scale. Data from over 100 nematode and platyhelminth species are integrated, adding value by way of systematic and consistent functional annotation (e.g. protein domains and Gene Ontology terms), gene expression analysis (e.g. alignment of life-stage specific transcriptome data sets), and comparative analysis (e.g. orthologues and paralogues). We provide several ways of exploring the data, including genome browsers, genome and gene summary pages, text search, sequence search, a query wizard, bulk downloads, and programmatic interfaces. In this review, we provide an overview of the back-end infrastructure and analysis behind WormBase ParaSite, and the displays and tools available to users for interrogating helminth genomic data.",2016-11-27 +27346526,"Delineation of Spatial Variability in the Temperature-Mortality Relationship on Extremely Hot Days in Greater Vancouver, Canada.","

Background

Climate change has increased the frequency and intensity of extremely hot weather. The health risks associated with extemely hot weather are not uniform across affected areas owing to variability in heat exposure and social vulnerability, but these differences are challenging to map with precision.

Objectives

We developed a spatially and temporally stratified case-crossover approach for delineation of areas with higher and lower risks of mortality on extremely hot days and applied this approach in greater Vancouver, Canada.

Methods

Records of all deaths with an extremely hot day as a case day or a control day were extracted from an administrative vital statistics database spanning the years of 1998-2014. Three heat exposure and 11 social vulnerability variables were assigned at the residential location of each decedent. Conditional logistic regression was used to estimate the odds ratio for a 1°C increase in daily mean temperature at a fixed site with an interaction term for decedents living above and below different values of the spatial variables.

Results

The heat exposure and social vulnerability variables with the strongest spatially stratified results were the apparent temperature and the labor nonparticipation rate, respectively. Areas at higher risk had values ≥ 34.4°C for the maximum apparent temperature and ≥ 60% of the population neither employed nor looking for work. These variables were combined in a composite index to quantify their interaction and to enhance visualization of high-risk areas.

Conclusions

Our methods provide a data-driven framework for spatial delineation of the temperature--mortality relationship by heat exposure and social vulnerability. The results can be used to map and target the most vulnerable areas for public health intervention. Citation: Ho HC, Knudby A, Walker BB, Henderson SB. 2017. Delineation of spatial variability in the temperature-mortality relationship on extremely hot days in greater Vancouver, Canada. Environ Health Perspect 125:66-75; http://dx.doi.org/10.1289/EHP224.",2016-06-27 +24609470,OMIGA: Optimized Maker-Based Insect Genome Annotation.,"Insects are one of the largest classes of animals on Earth and constitute more than half of all living species. The i5k initiative has begun sequencing of more than 5,000 insect genomes, which should greatly help in exploring insect resource and pest control. Insect genome annotation remains challenging because many insects have high levels of heterozygosity. To improve the quality of insect genome annotation, we developed a pipeline, named Optimized Maker-Based Insect Genome Annotation (OMIGA), to predict protein-coding genes from insect genomes. We first mapped RNA-Seq reads to genomic scaffolds to determine transcribed regions using Bowtie, and the putative transcripts were assembled using Cufflink. We then selected highly reliable transcripts with intact coding sequences to train de novo gene prediction software, including Augustus. The re-trained software was used to predict genes from insect genomes. Exonerate was used to refine gene structure and to determine near exact exon/intron boundary in the genome. Finally, we used the software Maker to integrate data from RNA-Seq, de novo gene prediction, and protein alignment to produce an official gene set. The OMIGA pipeline was used to annotate the draft genome of an important insect pest, Chilo suppressalis, yielding 12,548 genes. Different strategies were compared, which demonstrated that OMIGA had the best performance. In summary, we present a comprehensive pipeline for identifying genes in insect genomes that can be widely used to improve the annotation quality in insects. OMIGA is provided at http://ento.njau.edu.cn/omiga.html .",2014-03-09 +30300409,A mathematical model for predicting the adult height of girls with idiopathic central precocious puberty: A European validation.,"

Background

A previous single-center study established a mathematical model for predicting the adult height (AH) in girls with idiopathic central precocious puberty (CPP).

Objective

To perform internal and external validations by comparing the actual AH to the calculated AH established by this model and to update it.

Methods

The original formula, calculated AH (cm) = 2.21 (height at initial evaluation, SD) + 2.32 (target height, SD) - 1.83 (luteinizing hormone/follicle-stimulating hormone peaks ratio) + 159.68, was established in a sample of 134 girls (group 4) and was applied to additional girls with CPP seen in the same center (group 1, n = 35), in Germany (group 2, n = 43) and in the Netherlands (group 3, n = 72). This formula has been updated based on these extended data, and both versions are available at the following location: http://www.kamick.org/lemaire/med/girls-cpp15.html.

Results

Despite the differences among the 4 groups in terms of their characteristics at the initial evaluation and the percentages of patients treated with the gonadotropin-releasing hormone analogue, they have similar calculated and actual AHs. The actual AHs are 162.2±7.0, 163.0±7.6, 162.4±7.7 and 162.1±5.6 cm in groups 1 to 4, respectively. They are highly correlated with the AHs calculated by the formula established in the original group (group 4), with R at 0.84, 0.67 and 0.69 in groups 1 to 3, respectively. When the actual AHs and the AHs predicted by the Bayley and Pinneau method are compared, the R is 0.76, 0.51 and 0.64 in groups 1 to 3, respectively. The absolute differences between actual AHs and the calculated AHs are greater than 1 SD (5.6 cm) in 15%, 35% and 28% of the patients in groups 1 to 3, respectively.

Conclusion

This study validates and updates the previously established formula for predicting AH in girls with CPP. This updated formula can help clinicians to make treatment decisions.",2018-10-09 +26415807,Mass Casualty Incident Primary Triage Methods in China.,"

Objective

To evaluate the technical characteristics and application of mass casualty incident (MCI) primary triage (PT) methods applied in China.

Data sources

Chinese literature was searched by Chinese Academic Journal Network Publishing Database (founded in June 2014). The English literature was searched by PubMed (MEDLINE) (1950 to June 2014). We also searched Official Websites of Chinese Central Government's (http://www.gov.cn/), National Health and Family Planning Commission of China (http://www.nhfpc.gov.cn/), and China Earthquake Information (http://www.csi.ac.cn/).

Study selection

We included studies associated with mass casualty events related to China, the PT applied in China, guidelines and standards, and application and development of the carding PT method in China.

Results

From 3976 potentially relevant articles, 22 met the inclusion criteria, 20 Chinese, and 2 English. These articles included 13 case reports, 3 retrospective analyses of MCI, two methods introductions, three national or sectoral criteria, and one simulated field testing and validation. There were a total of 19 kinds of MCI PT methods that have been reported in China from 1950 to 2014. In addition, there were 15 kinds of PT methods reported in the literature from the instance of the application.

Conclusions

The national and sectoral current triage criteria are developed mainly for earthquake relief. Classification is not clear. Vague criteria (especially between moderate and severe injuries) operability are not practical. There are no triage methods and research for children and special populations. There is no data and evidence supported triage method. We should revise our existing classification and criteria so it is clearer and easier to be grasped in order to build a real, practical, and efficient PT method.",2015-10-01 +26896848,MGFD: the maize gene families database. ,"Most gene families are transcription factor (TF) families, which have fundamental roles in almost all biological processes (development, growth and response to environmental factors) and have been employed to manipulate various types of metabolic, developmental and stress response pathways in plants. Maize (Zea mays) is one of the most important cereal crops in the world due its importance to human nutrition and health. Thus, identifying and annotating all the gene families in maize is an important primary step in defining their functions and understanding their roles in the regulation of diverse biological processes. In this study, we identified 96 predicted maize gene families and systematically characterized all 5826 of the genes in those families. We have also developed a comprehensive database of maize gene families (the MGFD). To further explore the functions of these gene families, we extensively annotated the genes, including such basic information as protein sequence features, gene structure, Gene Ontology classifications, phylogenetic relationships and expression profiles. The MGFD has a user-friendly web interface with multiple browse and search functions, as well as data downloading. The MGFD is freely available to users at http://mgfd.ahau.edu.cn/. Database URL: http://mgfd.ahau.edu.cn/.",2016-02-20 +25737049,Physical rehabilitation for critical illness myopathy and neuropathy.,"

Background

Intensive care unit (ICU) acquired or generalised weakness due to critical illness myopathy (CIM) and polyneuropathy (CIP) are major causes of chronically impaired motor function that can affect activities of daily living and quality of life. Physical rehabilitation of those affected might help to improve activities of daily living.

Objectives

Our primary objective was to assess the effects of physical rehabilitation therapies and interventions for people with CIP and CIM in improving activities of daily living such as walking, bathing, dressing and eating. Secondary objectives were to assess effects on muscle strength and quality of life, and to assess adverse effects of physical rehabilitation.

Search methods

On 16 July 2014 we searched the Cochrane Neuromuscular Disease Group Specialized Register and on 14 July 2014 we searched CENTRAL, MEDLINE, EMBASE and CINAHL Plus. In July 2014, we searched the Physiotherapy Evidence Database (PEDro, http://www.pedro.org.au/) and three trials registries for ongoing trials and further data about included studies. There were no language restrictions. We also handsearched relevant conference proceedings and screened reference lists to identify further trials.

Selection criteria

We planned to include randomised controlled trials (RCTs), quasi-RCTs and randomised controlled cross-over trials of any rehabilitation intervention in people with acquired weakness syndrome due to CIP/CIM.

Data collection and analysis

We would have extracted data, assessed the risk of bias and classified the quality of evidence for outcomes in duplicate, according to the standard procedures of The Cochrane Collaboration. Outcome data collection would have been for activities of daily living (for example, mobility, walking, transfers and self care). Secondary outcomes included muscle strength, quality of life and adverse events.

Main results

The search strategy retrieved 3587 references. After examination of titles and abstracts, we retrieved the full text of 24 potentially relevant studies. None of these studies met the inclusion criteria of our review. No data were suitable to be included in a meta-analysis.

Authors' conclusions

There are no published RCTs or quasi-RCTs that examine whether physical rehabilitation interventions improve activities of daily living for people with CIP and CIM. Large RCTs, which are feasible, need to be conducted to explore the role of physical rehabilitation interventions for people with CIP and CIM.",2015-03-04 +,"8th International Food Data Conference: Quality food composition data, key for health and trade","The 8th International Food Data Conference, with the main theme of “Quality food composition data: key for health and trade”, was organised by the Institute of Nutrition, Mahidol University, Thailand, from 1 to 3 October 2009, under the auspices of the International Network of Food Data System (INFOODS) and the International Union of Nutritional Sciences (IUNS) Task Force. Over 140 delegates from 43 countries attended the conference, which included 2 keynote addresses, 8 special lectures, 32 oral and 80 poster presentations. The conference programme, abstracts of oral and poster presentations, power point slide shows of oral presentations and the Bangkok Declaration are all available on the conference website: http://www.inmu.mahidol.ac.th/8ifdc/. The conference allowed participants to disseminate up-to-date knowledge and the latest information pertaining to food composition databases, to exchange knowledge and experience and to discuss issues of mutual interest.",2011-01-01 +30472995,A Prediction Tool to Facilitate Risk-Stratified Screening for Squamous Cell Skin Cancer.,"Cutaneous squamous cell cancers (cSCCs) present an under-recognized health issue among non-Hispanic whites, one that is likely to increase as populations age. cSCC risks vary considerably among non-Hispanic whites, and this heterogeneity indicates the need for risk-stratified screening strategies that are guided by patients' personal characteristics and clinical histories. Here we describe cSCCscore, a prediction tool that uses patients' covariates and clinical histories to assign them personal probabilities of developing cSCCs within 3 years after risk assessment. cSCCscore uses a statistical model for the occurrence and timing of a patient's cSCCs, whose parameters we estimated using cohort data from 66,995 patients in the Kaiser Permanente Northern California healthcare system. We found that patients' covariates and histories explained approximately 75% of their interpersonal cSCC risk variation. Using cross-validated performance measures, we also found cSCCscore's predictions to be moderately well calibrated to the patients' observed cSCC incidence. Moreover, cSCCscore discriminated well between patients who subsequently did and did not develop a new primary cSCC within 3 years after risk assignment, with area under the receiver operating characteristic curve of approximately 85%. Thus, cSCCscore can facilitate more informed management of non-Hispanic white patients at cSCC risk. cSCCscore's predictions are available at https://researchapps.github.io/cSCCscore/.",2018-07-02 +25495537,solGS: a web-based tool for genomic selection.,"

Background

Genomic selection (GS) promises to improve accuracy in estimating breeding values and genetic gain for quantitative traits compared to traditional breeding methods. Its reliance on high-throughput genome-wide markers and statistical complexity, however, is a serious challenge in data management, analysis, and sharing. A bioinformatics infrastructure for data storage and access, and user-friendly web-based tool for analysis and sharing output is needed to make GS more practical for breeders.

Results

We have developed a web-based tool, called solGS, for predicting genomic estimated breeding values (GEBVs) of individuals, using a Ridge-Regression Best Linear Unbiased Predictor (RR-BLUP) model. It has an intuitive web-interface for selecting a training population for modeling and estimating genomic estimated breeding values of selection candidates. It estimates phenotypic correlation and heritability of traits and selection indices of individuals. Raw data is stored in a generic database schema, Chado Natural Diversity, co-developed by multiple database groups. Analysis output is graphically visualized and can be interactively explored online or downloaded in text format. An instance of its implementation can be accessed at the NEXTGEN Cassava breeding database, http://cassavabase.org/solgs.

Conclusions

solGS enables breeders to store raw data and estimate GEBVs of individuals online, in an intuitive and interactive workflow. It can be adapted to any breeding program.",2014-12-14 +24372041,miRror2.0: a platform for assessing the joint action of microRNAs in cell regulation.,"microRNAs (miRNAs) are short, noncoding RNAs that negatively regulate the levels of mRNA post-transcriptionally. Recent experiments revealed thousands of mRNA-miRNA pairs in which multiple miRNAs may bind the same transcript. These results raised the notion of miRNAs teamwork for a wide range of cellular context. miRror2.0 utilizes the miRNA-target predictions from over a dozen programs and resources and unifies them under a common statistical basis. The platform, called miRror2.0, considers the combinatorial regulation by miRNAs in different tissues, cell lines and under a broad range of conditions. A flexible setting permits the selection of the preferred combination of miRNA-target prediction resources as well as the statistical parameters for the analysis. miRror2.0 covers six major model organisms including human and mouse. Importantly, the system is capable of analyzing hundreds of genes that were subjected to miRNAs' regulation. Activating miRror2.0 by introducing thousands of genes from miRNA overexpression experiments successfully identified the objective miRNAs. The output from miRror2.0 is a list of genes that is optimally regulated by a defined set of miRNAs. A symmetric application of miRror2.0 starts with a set of miRNAs, and the system then seeks the preferred set of genes that are regulated by that miRNA composition. The results from miRror2.0 are empowered by an iterative procedure called PSI-miRror. PSI-miRror tests the robustness of miRror2.0 prediction. It allows a refinement of the initial list of genes in view of the miRNAs that optimally regulate this list. We present miRror2.0 as a valuable resource for supporting cellular experimentalists that seek recovery of combinatorial regulation by miRNAs from noisy experimental data. miRror2.0 is available at http://www.mirrorsuite.cs.huji.ac.il .",2013-12-01 +25541727,ReNE: a cytoscape plugin for regulatory network enhancement.,"One of the biggest challenges in the study of biological regulatory mechanisms is the integration, americanmodeling, and analysis of the complex interactions which take place in biological networks. Despite post transcriptional regulatory elements (i.e., miRNAs) are widely investigated in current research, their usage and visualization in biological networks is very limited. Regulatory networks are commonly limited to gene entities. To integrate networks with post transcriptional regulatory data, researchers are therefore forced to manually resort to specific third party databases. In this context, we introduce ReNE, a Cytoscape 3.x plugin designed to automatically enrich a standard gene-based regulatory network with more detailed transcriptional, post transcriptional, and translational data, resulting in an enhanced network that more precisely models the actual biological regulatory mechanisms. ReNE can automatically import a network layout from the Reactome or KEGG repositories, or work with custom pathways described using a standard OWL/XML data format that the Cytoscape import procedure accepts. Moreover, ReNE allows researchers to merge multiple pathways coming from different sources. The merged network structure is normalized to guarantee a consistent and uniform description of the network nodes and edges and to enrich all integrated data with additional annotations retrieved from genome-wide databases like NCBI, thus producing a pathway fully manageable through the Cytoscape environment. The normalized network is then analyzed to include missing transcription factors, miRNAs, and proteins. The resulting enhanced network is still a fully functional Cytoscape network where each regulatory element (transcription factor, miRNA, gene, protein) and regulatory mechanism (up-regulation/down-regulation) is clearly visually identifiable, thus enabling a better visual understanding of its role and the effect in the network behavior. The enhanced network produced by ReNE is exportable in multiple formats for further analysis via third party applications. ReNE can be freely installed from the Cytoscape App Store (http://apps.cytoscape.org/apps/rene) and the full source code is freely available for download through a SVN repository accessible at http://www.sysbio.polito.it/tools_svn/BioInformatics/Rene/releases/. ReNE enhances a network by only integrating data from public repositories, without any inference or prediction. The reliability of the introduced interactions only depends on the reliability of the source data, which is out of control of ReNe developers.",2014-12-26 +25301850,PDB-wide collection of binding data: current status of the PDBbind database.,"

Motivation

Molecular recognition between biological macromolecules and organic small molecules plays an important role in various life processes. Both structural information and binding data of biomolecular complexes are indispensable for depicting the underlying mechanism in such an event. The PDBbind database was created to collect experimentally measured binding data for the biomolecular complexes throughout the Protein Data Bank (PDB). It thus provides the linkage between structural information and energetic properties of biomolecular complexes, which is especially desirable for computational studies or statistical analyses.

Results

Since its first public release in 2004, the PDBbind database has been updated on an annual basis. The latest release (version 2013) provides experimental binding affinity data for 10,776 biomolecular complexes in PDB, including 8302 protein-ligand complexes and 2474 other types of complexes. In this article, we will describe the current methods used for compiling PDBbind and the updated status of this database. We will also review some typical applications of PDBbind published in the scientific literature.

Availability and implementation

All contents of this database are freely accessible at the PDBbind-CN Web server at http://www.pdbbind-cn.org/.

Contact

wangrx@mail.sioc.ac.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-09 +29255285,iDTI-ESBoost: Identification of Drug Target Interaction Using Evolutionary and Structural Features with Boosting.,"Prediction of new drug-target interactions is critically important as it can lead the researchers to find new uses for old drugs and to disclose their therapeutic profiles or side effects. However, experimental prediction of drug-target interactions is expensive and time-consuming. As a result, computational methods for predictioning new drug-target interactions have gained a tremendous interest in recent times. Here we present iDTI-ESBoost, a prediction model for identification of drug-target interactions using evolutionary and structural features. Our proposed method uses a novel data balancing and boosting technique to predict drug-target interaction. On four benchmark datasets taken from a gold standard data, iDTI-ESBoost outperforms the state-of-the-art methods in terms of area under receiver operating characteristic (auROC) curve. iDTI-ESBoost also outperforms the latest and the best-performing method found in the literature in terms of area under precision recall (auPR) curve. This is significant as auPR curves are argued as suitable metric for comparison for imbalanced datasets similar to the one studied here. Our reported results show the effectiveness of the classifier, balancing methods and the novel features incorporated in iDTI-ESBoost. iDTI-ESBoost is a novel prediction method that has for the first time exploited the structural features along with the evolutionary features to predict drug-protein interactions. We believe the excellent performance of iDTI-ESBoost both in terms of auROC and auPR would motivate the researchers and practitioners to use it to predict drug-target interactions. To facilitate that, iDTI-ESBoost is implemented and made publicly available at: http://farshidrayhan.pythonanywhere.com/iDTI-ESBoost/ .",2017-12-18 +29949962,PrimAlign: PageRank-inspired Markovian alignment for large biological networks.,"

Motivation

Cross-species analysis of large-scale protein-protein interaction (PPI) networks has played a significant role in understanding the principles deriving evolution of cellular organizations and functions. Recently, network alignment algorithms have been proposed to predict conserved interactions and functions of proteins. These approaches are based on the notion that orthologous proteins across species are sequentially similar and that topology of PPIs between orthologs is often conserved. However, high accuracy and scalability of network alignment are still a challenge.

Results

We propose a novel pairwise global network alignment algorithm, called PrimAlign, which is modeled as a Markov chain and iteratively transited until convergence. The proposed algorithm also incorporates the principles of PageRank. This approach is evaluated on tasks with human, yeast and fruit fly PPI networks. The experimental results demonstrate that PrimAlign outperforms several prevalent methods with statistically significant differences in multiple evaluation measures. PrimAlign, which is multi-platform, achieves superior performance in runtime with its linear asymptotic time complexity. Further evaluation is done with synthetic networks and results suggest that popular topological measures do not reflect real precision of alignments.

Availability and implementation

The source code is available at http://web.ecs.baylor.edu/faculty/cho/PrimAlign.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +27081154,3CDB: a manually curated database of chromosome conformation capture data. ,"Chromosome conformation capture (3C) is a biochemical technology to analyse contact frequencies between selected genomic sites in a cell population. Its recent genomic variants, e.g. Hi-C/ chromatin interaction analysis by paired-end tag (ChIA-PET), have enabled the study of nuclear organization at an unprecedented level. However, due to the inherent low resolution and ultrahigh cost of Hi-C/ChIA-PET, 3C is still the gold standard for determining interactions between given regulatory DNA elements, such as enhancers and promoters. Therefore, we developed a database of 3C determined functional chromatin interactions (3CDB;http://3cdb.big.ac.cn). To construct 3CDB, we searched PubMed and Google Scholar with carefully designed keyword combinations and retrieved more than 5000 articles from which we manually extracted 3319 interactions in 17 species. Moreover, we proposed a systematic evaluation scheme for data reliability and classified the interactions into four categories. Contact frequencies are not directly comparable as a result of various modified 3C protocols employed among laboratories. Our evaluation scheme provides a plausible solution to this long-standing problem in the field. A user-friendly web interface was designed to assist quick searches in 3CDB. We believe that 3CDB will provide fundamental information for experimental design and phylogenetic analysis, as well as bridge the gap between molecular and systems biologists who must now contend with noisy high-throughput data.Database URL:http://3cdb.big.ac.cn.",2016-04-14 +27887574,FluxFix: automatic isotopologue normalization for metabolic tracer analysis.,"

Background

Isotopic tracer analysis by mass spectrometry is a core technique for the study of metabolism. Isotopically labeled atoms from substrates, such as [13C]-labeled glucose, can be traced by their incorporation over time into specific metabolic products. Mass spectrometry is often used for the detection and differentiation of the isotopologues of each metabolite of interest. For meaningful interpretation, mass spectrometry data from metabolic tracer experiments must be corrected to account for the naturally occurring isotopologue distribution. The calculations required for this correction are time consuming and error prone and existing programs are often platform specific, non-intuitive, commercially licensed and/or limited in accuracy by using theoretical isotopologue distributions, which are prone to artifacts from noise or unresolved interfering signals.

Results

Here we present FluxFix ( http://fluxfix.science ), an application freely available on the internet that quickly and reliably transforms signal intensity values into percent mole enrichment for each isotopologue measured. 'Unlabeled' data, representing the measured natural isotopologue distribution for a chosen analyte, is entered by the user. This data is used to generate a correction matrix according to a well-established algorithm. The correction matrix is applied to labeled data, also entered by the user, thus generating the corrected output data. FluxFix is compatible with direct copy and paste from spreadsheet applications including Excel (Microsoft) and Google sheets and automatically adjusts to account for input data dimensions. The program is simple, easy to use, agnostic to the mass spectrometry platform, generalizable to known or unknown metabolites, and can take input data from either a theoretical natural isotopologue distribution or an experimentally measured one.

Conclusions

Our freely available web-based calculator, FluxFix ( http://fluxfix.science ), quickly and reliably corrects metabolic tracer data for natural isotopologue abundance enabling faster, more robust and easily accessible data analysis.",2016-11-25 +29471406,Multiobjective multifactor dimensionality reduction to detect SNP-SNP interactions.,"

Motivation

Single-nucleotide polymorphism (SNP)-SNP interactions (SSIs) are popular markers for understanding disease susceptibility. Multifactor dimensionality reduction (MDR) can successfully detect considerable SSIs. Currently, MDR-based methods mainly adopt a single-objective function (a single measure based on contingency tables) to detect SSIs. However, generally, a single-measure function might not yield favorable results due to potential model preferences and disease complexities.

Approach

This study proposes a multiobjective MDR (MOMDR) method that is based on a contingency table of MDR as an objective function. MOMDR considers the incorporated measures, including correct classification and likelihood rates, to detect SSIs and adopts set theory to predict the most favorable SSIs with cross-validation consistency. MOMDR enables simultaneously using multiple measures to determine potential SSIs.

Results

Three simulation studies were conducted to compare the detection success rates of MOMDR and single-objective MDR (SOMDR), revealing that MOMDR had higher detection success rates than SOMDR. Furthermore, the Wellcome Trust Case Control Consortium dataset was analyzed by MOMDR to detect SSIs associated with coronary artery disease. Availability and implementation: MOMDR is freely available at https://goo.gl/M8dpDg.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +29878180,OmicsNet: a web-based tool for creation and visual analysis of biological networks in 3D space.,"Biological networks play increasingly important roles in omics data integration and systems biology. Over the past decade, many excellent tools have been developed to support creation, analysis and visualization of biological networks. However, important limitations remain: most tools are standalone programs, the majority of them focus on protein-protein interaction (PPI) or metabolic networks, and visualizations often suffer from 'hairball' effects when networks become large. To help address these limitations, we developed OmicsNet - a novel web-based tool that allows users to easily create different types of molecular interaction networks and visually explore them in a three-dimensional (3D) space. Users can upload one or multiple lists of molecules of interest (genes/proteins, microRNAs, transcription factors or metabolites) to create and merge different types of biological networks. The 3D network visualization system was implemented using the powerful Web Graphics Library (WebGL) technology that works natively in most major browsers. OmicsNet supports force-directed layout, multi-layered perspective layout, as well as spherical layout to help visualize and navigate complex networks. A rich set of functions have been implemented to allow users to perform coloring, shading, topology analysis, and enrichment analysis. OmicsNet is freely available at http://www.omicsnet.ca.",2018-07-01 +30134831,RNAfitme: a webserver for modeling nucleobase and nucleoside residue conformation in fixed-backbone RNA structures.,"

Background

Computational RNA 3D structure prediction and modeling are rising as complementary approaches to high-resolution experimental techniques for structure determination. They often apply to substitute or complement them. Recently, researchers' interests have directed towards in silico methods to fit, remodel and refine RNA tertiary structure models. Their power lies in a problem-specific exploration of RNA conformational space and efficient optimization procedures. The aim is to improve the accuracy of models obtained either computationally or experimentally.

Results

Here, we present RNAfitme, a versatile webserver tool for remodeling of nucleobase- and nucleoside residue conformations in the fixed-backbone RNA 3D structures. Our approach makes use of dedicated libraries that define RNA conformational space. They have been built upon torsional angle characteristics of PDB-deposited RNA structures. RNAfitme can be applied to reconstruct full-atom model of RNA from its backbone; remodel user-selected nucleobase/nucleoside residues in a given RNA structure; predict RNA 3D structure based on the sequence and the template of a homologous molecule of the same size; refine RNA 3D model by reducing steric clashes indicated during structure quality assessment. RNAfitme is a publicly available tool with an intuitive interface. It is freely accessible at http://rnafitme.cs.put.poznan.pl/ CONCLUSIONS: RNAfitme has been applied in various RNA 3D remodeling scenarios for several types of input data. Computational experiments proved its efficiency, accuracy, and usefulness in the processing of RNAs of any size. Fidelity of RNAfitme predictions has been thoroughly tested for RNA 3D structures determined experimentally and modeled in silico.",2018-08-22 +29800452,SMARTIV: combined sequence and structure de-novo motif discovery for in-vivo RNA binding data.,"Gene expression regulation is highly dependent on binding of RNA-binding proteins (RBPs) to their RNA targets. Growing evidence supports the notion that both RNA primary sequence and its local secondary structure play a role in specific Protein-RNA recognition and binding. Despite the great advance in high-throughput experimental methods for identifying sequence targets of RBPs, predicting the specific sequence and structure binding preferences of RBPs remains a major challenge. We present a novel webserver, SMARTIV, designed for discovering and visualizing combined RNA sequence and structure motifs from high-throughput RNA-binding data, generated from in-vivo experiments. The uniqueness of SMARTIV is that it predicts motifs from enriched k-mers that combine information from ranked RNA sequences and their predicted secondary structure, obtained using various folding methods. Consequently, SMARTIV generates Position Weight Matrices (PWMs) in a combined sequence and structure alphabet with assigned P-values. SMARTIV concisely represents the sequence and structure motif content as a single graphical logo, which is informative and easy for visual perception. SMARTIV was examined extensively on a variety of high-throughput binding experiments for RBPs from different families, generated from different technologies, showing consistent and accurate results. Finally, SMARTIV is a user-friendly webserver, highly efficient in run-time and freely accessible via http://smartiv.technion.ac.il/.",2018-07-01 +29191560,MSBIS: A Multi-Step Biomedical Informatics Screening Approach for Identifying Medications that Mitigate the Risks of Metoclopramide-Induced Tardive Dyskinesia.,"In 2009 the U.S. Food and Drug Administration (FDA) placed a black box warning on metoclopramide (MCP) due to the increased risks and prevalence of tardive dyskinesia (TD). In this study, we developed a multi-step biomedical informatics screening (MSBIS) approach leveraging publicly available bioactivity and drug safety data to identify concomitant drugs that mitigate the risks of MCP-induced TD. MSBIS includes (1) TargetSearch (http://dxulab.org/software) bioinformatics scoring for drug anticholinergic activity using CHEMBL bioactivity data; (2) unadjusted odds ratio (UOR) scoring for indications of TD-mitigating effects using the FDA Adverse Event Reporting System (FAERS); (3) adjusted odds ratio (AOR) re-scoring by removing the effect of cofounding factors (age, gender, reporting year); (4) logistic regression (LR) coefficient scoring for confirming the best TD-mitigating drug candidates. Drugs with increasing TD protective potential and statistical significance were obtained at each screening step. Fentanyl is identified as the most promising drug against MCP-induced TD (coefficient: -2.68; p-value<0.01). The discovery is supported by clinical reports that patients fully recovered from MCP-induced TD after fentanyl-induced general anesthesia. Loperamide is identified as a potent mitigating drug against a broader range of drug-induced movement disorders through pharmacokinetic modifications. Using drug-induced TD as an example, we demonstrated that MSBIS is an efficient in silico tool for unknown drug-drug interaction detection, drug repurposing, and combination therapy design.",2017-11-22 +29248385,"A cytosolic sensor, PmDDX41, mediates antiviral immune response in black tiger shrimp Penaeus monodon.","DEAD (Asp-Glu-Ala-Asp)-box polypeptide 41 (DDX41), a receptor belonging to the DExD family, has recently been identified as an intracellular DNA sensor in vertebrates. Here, we report on the identification and functional characterization of PmDDX41, the first cytosolic DNA sensor in shrimp. By searching a Penaeus monodon expressed sequence tag (EST) database (http://pmonodon.biotec.or.th), three cDNA fragments exhibiting similarity to DDX41 in various species were identified and assembled, resulting in a complete open reading frame of PmDDX41 that contains 1863-bp and encodes a putative protein of 620 amino acids. PmDDX41 shares 83% and 79% similarity to DDX41 homolog from the bee Apis florea and fruit fly Drosophila melanogaster, respectively and contains three conserved domains in the protein: DEADc domain, HELICc domain, and zinc finger domain. The transcript of PmDDX41 was detected in all tested tissues and was up-regulated upon infection with a DNA virus, white spot syndrome virus (WSSV). However, PmDDX41 mRNA expression was not significantly changed and down-regulated in response to a bacterium, Vibrio harveyi, or an RNA virus, yellow head virus (YHV), respectively, compared with the control phosphate-buffered saline-injected shrimp. Furthermore, the suppression of PmDDX41 by dsRNA-mediated gene silencing resulted in more rapid death of WSSV-infected shrimp and a significant decrease in the mRNA expression levels of several immune-related genes (PmIKKβ, PmIKKɛ, PmRelish, PmCactus, PmDorsal, PmPEN3, PmPEN5, and ALFPm6). These results suggest that PmDDX41 is involved in the antiviral response, probably via a DNA-sensing pathway that is triggered through the IκB kinase complex and leads to the activation of several immune-related genes.",2017-12-14 +29761520,Predicting lysine-malonylation sites of proteins using sequence and predicted structural features.,"Malonylation is a recently discovered post-translational modification (PTM) in which a malonyl group attaches to a lysine (K) amino acid residue of a protein. In this work, a novel machine learning model, SPRINT-Mal, is developed to predict malonylation sites by employing sequence and predicted structural features. Evolutionary information and physicochemical properties are found to be the two most discriminative features whereas a structural feature called half-sphere exposure provides additional improvement to the prediction performance. SPRINT-Mal trained on mouse data yields robust performance for 10-fold cross validation and independent test set with Area Under the Curve (AUC) values of 0.74 and 0.76 and Matthews' Correlation Coefficient (MCC) of 0.213 and 0.20, respectively. Moreover, SPRINT-Mal achieved comparable performance when testing on H. sapiens proteins without species-specific training but not in bacterium S. erythraea. This suggests similar underlying physicochemical mechanisms between mouse and human but not between mouse and bacterium. SPRINT-Mal is freely available as an online server at: http://sparks-lab.org/server/SPRINT-Mal/. © 2018 Wiley Periodicals, Inc.",2018-05-14 +30361942,Investigation of Spatial Distributions and Temporal Trends of Triclosan in Canadian Surface Waters.,"Triclosan is widely used in personal care products (skin creams, toothpastes, soaps, deodorants, body spray) and cleaning products (dishwashing detergent and all-purpose cleaners) (Halden in Environ Sci Technol 48:3603-3611, 2014). In 2001, it was selected for screening-level risk assessment under the Canadian Environmental Protection Act (HC and EC in Preliminary assessment. Triclosan. Chemical abstracts Service Number 3380-34-5, 2012. http://www.ec.gc.ca/ese-ees/default.asp?lang=En&n=6EF68BEC-1 ), and its physicochemical and toxicological characteristics indicate that there may be a risk to aquatic environments due to releases of the chemical in Canada. A surveillance initiative across Canada has included sampling at 44 sites from July 2012 to March 2018. Triclosan was detected in 226 of 918 samples; concentrations ranged from less than 6 to 874 ng L-1, and the detections averaged 54.23 ng L-1 (standard deviation; 97.6 ng L-1). However, using the entire dataset (including censored data estimated with the Kaplan-Meier model), the mean triclosan concentration was 17.95 ng L-1, and the standard deviation was 52.84 ng L-1. Three samples at Wascana Creek (downstream), Saskatchewan, had concentrations above the Federal Environmental Quality Guidelines of 470 ng L-1, indicating a potential risk to the aquatic ecosystem. In this study, triclosan in samples collected downstream from municipal wastewater treatment plant discharges usually demonstrated higher concentrations than upstream samples. Based on the results of this study, it is hypothesized that triclosan concentration have fluctuated between years of this study but not in an overall or significant increase or decreasing trend. Triclosan concentrations and detections also are more prevalent in urban than in rural or mixed development rivers. Performance evaluation of triclosan concentrations in the Canadian environment is scheduled to be reassessed by 2024. Therefore, a 3-year sampling program should be in place across Canada by 2021.",2018-10-25 +26578584,InsectBase: a resource for insect genomes and transcriptomes.,"The genomes and transcriptomes of hundreds of insects have been sequenced. However, insect community lacks an integrated, up-to-date collection of insect gene data. Here, we introduce the first release of InsectBase, available online at http://www.insect-genome.com. The database encompasses 138 insect genomes, 116 insect transcriptomes, 61 insect gene sets, 36 gene families of 60 insects, 7544 miRNAs of 69 insects, 96,925 piRNAs of Drosophila melanogaster and Chilo suppressalis, 2439 lncRNA of Nilaparvata lugens, 22,536 pathways of 78 insects, 678,881 untranslated regions (UTR) of 84 insects and 160,905 coding sequences (CDS) of 70 insects. This release contains over 12 million sequences and provides search functionality, a BLAST server, GBrowse, insect pathway construction, a Facebook-like network for the insect community (iFacebook), and phylogenetic analysis of selected genes.",2015-11-17 +23139896,Next generation sequence analysis and computational genomics using graphical pipeline workflows.,"Whole-genome and exome sequencing have already proven to be essential and powerful methods to identify genes responsible for simple Mendelian inherited disorders. These methods can be applied to complex disorders as well, and have been adopted as one of the current mainstream approaches in population genetics. These achievements have been made possible by next generation sequencing (NGS) technologies, which require substantial bioinformatics resources to analyze the dense and complex sequence data. The huge analytical burden of data from genome sequencing might be seen as a bottleneck slowing the publication of NGS papers at this time, especially in psychiatric genetics. We review the existing methods for processing NGS data, to place into context the rationale for the design of a computational resource. We describe our method, the Graphical Pipeline for Computational Genomics (GPCG), to perform the computational steps required to analyze NGS data. The GPCG implements flexible workflows for basic sequence alignment, sequence data quality control, single nucleotide polymorphism analysis, copy number variant identification, annotation, and visualization of results. These workflows cover all the analytical steps required for NGS data, from processing the raw reads to variant calling and annotation. The current version of the pipeline is freely available at http://pipeline.loni.ucla.edu. These applications of NGS analysis may gain clinical utility in the near future (e.g., identifying miRNA signatures in diseases) when the bioinformatics approach is made feasible. Taken together, the annotation tools and strategies that have been developed to retrieve information and test hypotheses about the functional role of variants present in the human genome will help to pinpoint the genetic risk factors for psychiatric disorders.",2012-08-01 +26255309,MetazSecKB: the human and animal secretome and subcellular proteome knowledgebase. ,"The subcellular location of a protein is a key factor in determining the molecular function of the protein in an organism. MetazSecKB is a secretome and subcellular proteome knowledgebase specifically designed for metazoan, i.e. human and animals. The protein sequence data, consisting of over 4 million entries with 121 species having a complete proteome, were retrieved from UniProtKB. Protein subcellular locations including secreted and 15 other subcellular locations were assigned based on either curated experimental evidence or prediction using seven computational tools. The protein or subcellular proteome data can be searched and downloaded using several different types of identifiers, gene name or keyword(s), and species. BLAST search and community annotation of subcellular locations are also supported. Our primary analysis revealed that the proteome sizes, secretome sizes and other subcellular proteome sizes vary tremendously in different animal species. The proportions of secretomes vary from 3 to 22% (average 8%) in metazoa species. The proportions of other major subcellular proteomes ranged approximately 21-43% (average 31%) in cytoplasm, 20-37% (average 30%) in nucleus, 3-19% (average 12%) as plasma membrane proteins and 3-9% (average 6%) in mitochondria. We also compared the protein families in secretomes of different primates. The Gene Ontology and protein family domain analysis of human secreted proteins revealed that these proteins play important roles in regulation of human structure development, signal transduction, immune systems and many other biological processes. Database URL: http://proteomics.ysu.edu/secretomes/animal/index.php.",2015-08-08 +27986083,IMP: a pipeline for reproducible reference-independent integrated metagenomic and metatranscriptomic analyses.,"Existing workflows for the analysis of multi-omic microbiome datasets are lab-specific and often result in sub-optimal data usage. Here we present IMP, a reproducible and modular pipeline for the integrated and reference-independent analysis of coupled metagenomic and metatranscriptomic data. IMP incorporates robust read preprocessing, iterative co-assembly, analyses of microbial community structure and function, automated binning, as well as genomic signature-based visualizations. The IMP-based data integration strategy enhances data usage, output volume, and output quality as demonstrated using relevant use-cases. Finally, IMP is encapsulated within a user-friendly implementation using Python and Docker. IMP is available at http://r3lab.uni.lu/web/imp/ (MIT license).",2016-12-16 +26473729,Gene-Set Local Hierarchical Clustering (GSLHC)--A Gene Set-Based Approach for Characterizing Bioactive Compounds in Terms of Biological Functional Groups.,"Gene-set-based analysis (GSA), which uses the relative importance of functional gene-sets, or molecular signatures, as units for analysis of genome-wide gene expression data, has exhibited major advantages with respect to greater accuracy, robustness, and biological relevance, over individual gene analysis (IGA), which uses log-ratios of individual genes for analysis. Yet IGA remains the dominant mode of analysis of gene expression data. The Connectivity Map (CMap), an extensive database on genomic profiles of effects of drugs and small molecules and widely used for studies related to repurposed drug discovery, has been mostly employed in IGA mode. Here, we constructed a GSA-based version of CMap, Gene-Set Connectivity Map (GSCMap), in which all the genomic profiles in CMap are converted, using gene-sets from the Molecular Signatures Database, to functional profiles. We showed that GSCMap essentially eliminated cell-type dependence, a weakness of CMap in IGA mode, and yielded significantly better performance on sample clustering and drug-target association. As a first application of GSCMap we constructed the platform Gene-Set Local Hierarchical Clustering (GSLHC) for discovering insights on coordinated actions of biological functions and facilitating classification of heterogeneous subtypes on drug-driven responses. GSLHC was shown to tightly clustered drugs of known similar properties. We used GSLHC to identify the therapeutic properties and putative targets of 18 compounds of previously unknown characteristics listed in CMap, eight of which suggest anti-cancer activities. The GSLHC website http://cloudr.ncu.edu.tw/gslhc/ contains 1,857 local hierarchical clusters accessible by querying 555 of the 1,309 drugs and small molecules listed in CMap. We expect GSCMap and GSLHC to be widely useful in providing new insights in the biological effect of bioactive compounds, in drug repurposing, and in function-based classification of complex diseases.",2015-10-16 +25428375,The RCSB Protein Data Bank: views of structural biology for basic and applied research and education.,"The RCSB Protein Data Bank (RCSB PDB, http://www.rcsb.org) provides access to 3D structures of biological macromolecules and is one of the leading resources in biology and biomedicine worldwide. Our efforts over the past 2 years focused on enabling a deeper understanding of structural biology and providing new structural views of biology that support both basic and applied research and education. Herein, we describe recently introduced data annotations including integration with external biological resources, such as gene and drug databases, new visualization tools and improved support for the mobile web. We also describe access to data files, web services and open access software components to enable software developers to more effectively mine the PDB archive and related annotations. Our efforts are aimed at expanding the role of 3D structure in understanding biology and medicine.",2014-11-26 +27987165,FLAGdb++: A Bioinformatic Environment to Study and Compare Plant Genomes.,"Today, the growing knowledge and data accumulation on plant genomes do not solve in a simple way the task of gene function inference. Because data of different types are coming from various sources, we need to integrate and analyze them to help biologists in this task. We created FLAGdb++ ( http://tools.ips2.u-psud.fr/FLAGdb ) to take up this challenge for a selection of plant genomes. In order to enrich gene function predictions, structural and functional annotations of the genomes are explored to generate meta-data and to compare them. Since data are numerous and complex, we focused on accessibility and visualization with an original and user-friendly interface. In this chapter we present the main tools of FLAGdb++ and a use-case to explore a gene family: structural and functional properties of this family and research of orthologous genes in the other plant genomes.",2017-01-01 +30739867,Molecular Signature of CAID Syndrome: Noncanonical Roles of SGO1 in Regulation of TGF-β Signaling and Epigenomics.,"BACKGROUND & AIMS:A generalized human pacemaking syndrome, chronic atrial and intestinal dysrhythmia (CAID) (OMIM 616201), is caused by a homozygous SGO1 mutation (K23E), leading to chronic intestinal pseudo-obstruction and arrhythmias. Because CAID patients do not show phenotypes consistent with perturbation of known roles of SGO1, we hypothesized that noncanonical roles of SGO1 drive the clinical manifestations observed. METHODS:To identify a molecular signature for CAID syndrome, we achieved unbiased screens in cell lines and gut tissues from CAID patients vs wild-type controls. We performed RNA sequencing along with stable isotope labeling with amino acids in cell culture. In addition, we determined the genome-wide DNA methylation and chromatin accessibility signatures using reduced representative bisulfite sequencing and assay for transposase-accessible chromatin with high-throughput sequencing. Functional studies included patch-clamp, quantitation of transforming growth factor-β (TGF-β) signaling, and immunohistochemistry in CAID patient gut biopsy specimens. RESULTS:Proteome and transcriptome studies converge on cell-cycle regulation, cardiac conduction, and smooth muscle regulation as drivers of CAID syndrome. Specifically, the inward rectifier current, an important regulator of cellular function, was disrupted. Immunohistochemistry confirmed overexpression of Budding Uninhibited By Benzimidazoles 1 (BUB1) in patients, implicating the TGF-β pathway in CAID pathogenesis. Canonical TGF-β signaling was up-regulated and uncoupled from noncanonical signaling in CAID patients. Reduced representative bisulfite sequencing and assay for transposase-accessible chromatin with high-throughput sequencing experiments showed significant changes of chromatin states in CAID, pointing to epigenetic regulation as a possible pathologic mechanism. CONCLUSIONS:Our findings point to impaired inward rectifier potassium current, dysregulation of canonical TGF-β signaling, and epigenetic regulation as potential drivers of intestinal and cardiac manifestations of CAID syndrome. Transcript profiling and genomics data are as follows: repository URL: https://www.ncbi.nlm.nih.gov/geo; SuperSeries GSE110612 was composed of the following subseries: GSE110309, GSE110576, and GSE110601.",2018-10-24 +28065896,GE-mini: a mobile APP for large-scale gene expression visualization.,"

Summary

The Cancer Genome Atlas (TCGA) and Genotype-Tissue Expression (GTEx) projects produced large-scale RNA sequencing data, which provides an opportunity for performing integrated expression analysis for all genes across tens of thousands of tumor and normal tissue specimens. Rapid access to and easy visualization of such valuable data could facilitate research in a wide biological area. Here, we present the GE-mini APP for smart phones, a mobile visualization tool for integrated gene expression data based on both TCGA and GTEx. This gene-centric expression viewer provides a convenient method for displaying expression profiles of all available tumor and tissue types, while allowing drilling down to detailed views for specific tissue types.

Availability and implementation

Both the iOS and Android APPs are freely available to all non-commercial users in App Store and Google Play. The QR codes of App store and Google play are also provided for scanning and download. The GE-mini web server is also available at http://gemini.cancer-pku.cn/ .

Contacts

tangzefang@pku.edu.cn or huxueda@pku.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +29638227,A toolbox of immunoprecipitation-grade monoclonal antibodies to human transcription factors.,"A key component of efforts to address the reproducibility crisis in biomedical research is the development of rigorously validated and renewable protein-affinity reagents. As part of the US National Institutes of Health (NIH) Protein Capture Reagents Program (PCRP), we have generated a collection of 1,406 highly validated immunoprecipitation- and/or immunoblotting-grade mouse monoclonal antibodies (mAbs) to 737 human transcription factors, using an integrated production and validation pipeline. We used HuProt human protein microarrays as a primary validation tool to identify mAbs with high specificity for their cognate targets. We further validated PCRP mAbs by means of multiple experimental applications, including immunoprecipitation, immunoblotting, chromatin immunoprecipitation followed by sequencing (ChIP-seq), and immunohistochemistry. We also conducted a meta-analysis that identified critical variables that contribute to the generation of high-quality mAbs. All validation data, protocols, and links to PCRP mAb suppliers are available at http://proteincapture.org.",2018-03-19 +28853897,Progress on the HUPO Draft Human Proteome: 2017 Metrics of the Human Proteome Project.,"The Human Proteome Organization (HUPO) Human Proteome Project (HPP) continues to make progress on its two overall goals: (1) completing the protein parts list, with an annual update of the HUPO draft human proteome, and (2) making proteomics an integrated complement to genomics and transcriptomics throughout biomedical and life sciences research. neXtProt version 2017-01-23 has 17 008 confident protein identifications (Protein Existence [PE] level 1) that are compliant with the HPP Guidelines v2.1 ( https://hupo.org/Guidelines ), up from 13 664 in 2012-12 and 16 518 in 2016-04. Remaining to be found by mass spectrometry and other methods are 2579 ""missing proteins"" (PE2+3+4), down from 2949 in 2016. PeptideAtlas 2017-01 has 15 173 canonical proteins, accounting for nearly all of the 15 290 PE1 proteins based on MS data. These resources have extensive data on PTMs, single amino acid variants, and splice isoforms. The Human Protein Atlas v16 has 10 492 highly curated protein entries with tissue and subcellular spatial localization of proteins and transcript expression. Organ-specific popular protein lists have been generated for broad use in quantitative targeted proteomics using SRM-MS or DIA-SWATH-MS studies of biology and disease.",2017-10-09 +29232891,The Effect of Polyphenol-Rich Interventions on Cardiovascular Risk Factors in Haemodialysis: A Systematic Review and Meta-Analysis. ,"End-stage kidney disease is a strong risk factor for cardiovascular-specific mortality. Polyphenol-rich interventions may attenuate cardiovascular disease risk factors; however, this has not been systematically evaluated in the hemodialysis population. Using the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) guidelines, the following databases were searched: Cochrane Library (http://www.cochranelibrary.com/), MEDLINE (https://health.ebsco.com/products/medline-with-full-text), Embase (https://www.elsevier.com/solutions/embase-biomedical-research), and CINAHL (https://www.ebscohost.com/nursing/products/cinahl-databases/cinahl-complete). Meta-analyses were conducted for measures of lipid profile, inflammation, oxidative stress, and blood pressure. Risk of bias was assessed using the Cochrane Collaboration Risk of Bias tool and quality of the body of evidence was assessed by the Grading of Recommendations, Assessment, Development and Evaluation (GRADE) methodology. Twelve studies were included for review. Polyphenol-rich interventions included soy, cocoa, pomegranate, grape, and turmeric. Polyphenol-rich interventions significantly improved diastolic blood pressure (Mean Difference (MD) -5.62 mmHg (95% Confidence Interval (CI) -8.47, -2.78); I² = 2%; p = 0.0001), triglyceride levels (MD -26.52 mg/dL (95% CI -47.22, -5.83); I² = 57%; p = 0.01), and myeloperoxidase (MD -90.10 (95% CI -135.84, -44.36); I² = 0%; p = 0.0001). Included studies generally had low or unclear risks of bias. The results of this review provide preliminary support for the use of polyphenol-rich interventions for improving cardiovascular risk markers in haemodialysis patients. Due to the limited number of studies for individual polyphenol interventions, further studies are required to provide recommendations regarding individual polyphenol intervention and dose.",2017-12-11 +30160308,The Effect of Endothelial Cells on UVB-induced DNA Damage and Transformation of Keratinocytes In 3D Polycaprolactone Scaffold Co-culture System.,"Nitric oxide ( NO · ) plays an important role in the regulation of redox balance in keratinocytes post-UVB exposure. Since endothelial cells releases NO · for a prolonged time post-UVB, we determined whether human umbilical vein endothelial cells (HUVEC) could have an effect on UVB-induced DNA damage and transformation of their adjacent keratinocytes (HaCaT) using a 3D cell co-culturing system. Our data show that the levels of DNA breaks and/or cyclobutane pyrimidine dimer (CPD) along with γH2AX are higher in the co-cultured than in the mono-cultured keratinocytes post-UVB. The NO · level in the co-cultured cells is increased approximately 3-fold more than in mono-cultured HaCaT cells within 1-hour post-UVB but then is reduced quickly in co-cultured HaCaT cells comparing to mono-cultured cells from 6 to 24 h post-UVB. However, the peroxynitrite (ONOO- ) level is higher in the co-cultured than in the mono-cultured HaCaT cells in whole period post-UVB. Furthermore, while expression level of inducible nitric oxide synthase (iNOS) is increased, the ratio of coupled/uncoupled eNOS is reduced in co-cultured HaCaT cells compared to mono-cultured HaCaT cells. Finally, the co-cultured cells have a significantly increased transformation efficiency after repeating UVB exposure compared to mono-culture HaCaT cells. Our results suggest that endothelial cells could enhance NO · /ONOO- imbalance and promote transformation of adjacent keratinocytes.",2018-10-22 +30484039,A Mathematical Model of Fluid Transport in an Accurate Reconstruction of Parotid Acinar Cells.,"Salivary gland acinar cells use the calcium ([Formula: see text]) ion as a signalling messenger to regulate a diverse range of intracellular processes, including the secretion of primary saliva. Although the underlying mechanisms responsible for saliva secretion are reasonably well understood, the precise role played by spatially heterogeneous intracellular [Formula: see text] signalling in these cells remains uncertain. In this study, we use a mathematical model, based on new and unpublished experimental data from parotid acinar cells (measured in excised lobules of mouse parotid gland), to investigate how the structure of the cell and the spatio-temporal properties of [Formula: see text] signalling influence the production of primary saliva. We combine a new [Formula: see text] signalling model [described in detail in a companion paper: Pages et al. in Bull Math Biol 2018, submitted] with an existing secretion model (Vera-Sigüenza et al. in Bull Math Biol 80:255-282, 2018. https://doi.org/10.1007/s11538-017-0370-6 ) and solve the resultant model in an anatomically accurate three-dimensional cell. Our study yields three principal results. Firstly, we show that spatial heterogeneities of [Formula: see text] concentration in either the apical or basal regions of the cell have no significant effect on the rate of primary saliva secretion. Secondly, in agreement with previous work (Palk et al., in J Theor Biol 305:45-53, 2012. https://doi.org/10.1016/j.jtbi.2012.04.009 ) we show that the frequency of [Formula: see text] oscillation has no significant effect on the rate of primary saliva secretion, which is determined almost entirely by the mean (over time) of the apical and basal [Formula: see text]. Thirdly, it is possible to model the rate of primary saliva secretion as a quasi-steady-state function of the cytosolic [Formula: see text] averaged over the entire cell when modelling the flow rate is the only interest, thus ignoring all the dynamic complexity not only of the fluid secretion mechanism but also of the intracellular heterogeneity of [Formula: see text]. Taken together, our results demonstrate that an accurate multiscale model of primary saliva secretion from a single acinar cell can be constructed by ignoring the vast majority of the spatial and temporal complexity of the underlying mechanisms.",2018-11-27 +28544911,ARIANNA: A research environment for neuroimaging studies in autism spectrum disorders.,"The complexity and heterogeneity of Autism Spectrum Disorders (ASD) require the implementation of dedicated analysis techniques to obtain the maximum from the interrelationship among many variables that describe affected individuals, spanning from clinical phenotypic characterization and genetic profile to structural and functional brain images. The ARIANNA project has developed a collaborative interdisciplinary research environment that is easily accessible to the community of researchers working on ASD (https://arianna.pi.infn.it). The main goals of the project are: to analyze neuroimaging data acquired in multiple sites with multivariate approaches based on machine learning; to detect structural and functional brain characteristics that allow the distinguishing of individuals with ASD from control subjects; to identify neuroimaging-based criteria to stratify the population with ASD to support the future development of personalized treatments. Secure data handling and storage are guaranteed within the project, as well as the access to fast grid/cloud-based computational resources. This paper outlines the web-based architecture, the computing infrastructure and the collaborative analysis workflows at the basis of the ARIANNA interdisciplinary working environment. It also demonstrates the full functionality of the research platform. The availability of this innovative working environment for analyzing clinical and neuroimaging information of individuals with ASD is expected to support researchers in disentangling complex data thus facilitating their interpretation.",2017-05-17 +27863956,The International Human Epigenome Consortium Data Portal.,"The International Human Epigenome Consortium (IHEC) coordinates the production of reference epigenome maps through the characterization of the regulome, methylome, and transcriptome from a wide range of tissues and cell types. To define conventions ensuring the compatibility of datasets and establish an infrastructure enabling data integration, analysis, and sharing, we developed the IHEC Data Portal (http://epigenomesportal.ca/ihec). The portal provides access to >7,000 reference epigenomic datasets, generated from >600 tissues, which have been contributed by seven international consortia: ENCODE, NIH Roadmap, CEEHRC, Blueprint, DEEP, AMED-CREST, and KNIH. The portal enhances the utility of these reference maps by facilitating the discovery, visualization, analysis, download, and sharing of epigenomics data. The IHEC Data Portal is the official source to navigate through IHEC datasets and represents a strategy for unifying the distributed data produced by international research consortia.",2016-11-15 +29946806,Multimodal treatment in locally advanced gastric cancer.,"According to the data of the GLOBOCAN-network of the World Health Organization, there were 952,000 (6.8% of the total) new cases of gastric cancer in 2012, making it the fifth most common malignancy in the world. It represents a substantive change since the very first estimates in 1975 when stomach cancer was the most common neoplasm. More than 70% of cases (677,000 cases) occur in developing countries, and half the world total occurs in Eastern Asia, mainly in China. Gastric cancer is the third leading cause of cancer death in both sexes worldwide (Globocan, Estimated cancer incidence, mortality and prevalence worldwide in 2012, http://globocan.iarc.fr , 2012). Annually, worldwide 723,000 patients die of this tumor entity. Interestingly, a strong change in incidence rates in relation to the anatomical-topographic localization of the primary tumors in the stomach and esophagus has been experienced. While the frequency of proximal gastric carcinoma and adenocarcinoma of the cardiac and subcardiac region in Europe and North America has been constantly rising, distal gastric carcinomas have become less common (Torre et al. in JAMA 65:87-108, 2015). Furthermore, the relative incidence of esophageal adenocarcinoma (mostly localized in the distal esophagus) has strongly increased (Jemal et al. in JAMA 58:71-96, 2008; Crew and Neugut 31:450-464, 2004; Pohl and Welch 97:142-146, 2005).",2018-06-26 +30466046,Intrinsic-overlapping co-expression module detection with application to Alzheimer's Disease.,"Genes interact with each other and may cause perturbation in the molecular pathways leading to complex diseases. Often, instead of any single gene, a subset of genes interact, forming a network, to share common biological functions. Such a subnetwork is called a functional module or motif. Identifying such modules and central key genes in them, that may be responsible for a disease, may help design patient-specific drugs. In this study, we consider the neurodegenerative Alzheimer's Disease (AD) and identify potentially responsible genes from functional motif analysis. We start from the hypothesis that central genes in genetic modules are more relevant to a disease that is under investigation and identify hub genes from the modules as potential marker genes. Motifs or modules are often non-exclusive or overlapping in nature. Moreover, they sometimes show intrinsic or hierarchical distributions with overlapping functional roles. To the best of our knowledge, no prior work handles both the situations in an integrated way. We propose a non-exclusive clustering approach, CluViaN (Clustering Via Network) that can detect intrinsic as well as overlapping modules from gene co-expression networks constructed using microarray expression profiles. We compare our method with existing methods to evaluate the quality of modules extracted. CluViaN reports the presence of intrinsic and overlapping motifs in different species not reported by any other research. We further apply our method to extract significant AD specific modules using CluViaN and rank them based the number of genes from a module involved in the disease pathways. Finally, top central genes are identified by topological analysis of the modules. We use two different AD phenotype data for experimentation. We observe that central genes, namely PSEN1, APP, NDUFB2, NDUFA1, UQCR10, PPP3R1 and a few more, play significant roles in the AD. Interestingly, our experiments also find a hub gene, PML, which has recently been reported to play a role in plasticity, circadian rhythms and the response to proteins which can cause neurodegenerative disorders. MUC4, another hub gene that we find experimentally is yet to be investigated for its potential role in AD. A software implementation of CluViaN in Java is available for download at https://sites.google.com/site/swarupnehu/publications/resources/CluViaN Software.rar.",2018-11-09 +25304780,Accurate estimation of haplotype frequency from pooled sequencing data and cost-effective identification of rare haplotype carriers by overlapping pool sequencing.,"

Motivation

A variety of hypotheses have been proposed for finding the missing heritability of complex diseases in genome-wide association studies. Studies have focused on the value of haplotype to improve the power of detecting associations with disease. To facilitate haplotype-based association analysis, it is necessary to accurately estimate haplotype frequencies of pooled samples.

Results

Taking advantage of databases that contain prior haplotypes, we present Ehapp based on the algorithm for solving the system of linear equations to estimate the frequencies of haplotypes from pooled sequencing data. Effects of various factors in sequencing on the performance are evaluated using simulated data. Our method could estimate the frequencies of haplotypes with only about 3% average relative difference for pooled sequencing of the mixture of 10 haplotypes with total coverage of 50×. When unknown haplotypes exist, our method maintains excellent performance for haplotypes with actual frequencies >0.05. Comparisons with present method on simulated data in conjunction with publicly available Illumina sequencing data indicate that our method is state of the art for many sequencing study designs. We also demonstrate the feasibility of applying overlapping pool sequencing to identify rare haplotype carriers cost-effectively.

Availability and implementation

Ehapp (in Perl) for the Linux platforms is available online (http://bioinfo.seu.edu.cn/Ehapp/).

Contact

xsun@seu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-09 +30130176,Pixel Objectness: Learning to Segment Generic Objects Automatically in Images and Videos.,"We propose an end-to-end learning framework for segmenting generic objects in both images and videos. Given a novel image or video, our approach produces a pixel-level mask for all ""object-like"" regions-even for object categories never seen during training. We formulate the task as a structured prediction problem of assigning an object/background label to each pixel, implemented using a deep fully convolutional network. When applied to a video, our model further incorporates a motion stream, and the network learns to combine both appearance and motion and attempts to extract all prominent objects whether they are moving or not. Beyond the core model, a second contribution of our approach is how it leverages varying strengths of training annotations. Pixel-level annotations are quite difficult to obtain, yet crucial for training a deep network approach for segmentation. Thus we propose ways to exploit weakly labeled data for learning dense foreground segmentation. For images, we show the value in mixing object category examples with image-level labels together with relatively few images with boundary-level annotations. For video, we show how to bootstrap weakly annotated videos together with the network trained for image segmentation. Through experiments on multiple challenging image and video segmentation benchmarks, our method offers consistently strong results and improves the state-of-the-art for fully automatic segmentation of generic (unseen) objects. In addition, we demonstrate how our approach benefits image retrieval and image retargeting, both of which flourish when given our high-quality foreground maps. Code, models, and videos are at: http://vision.cs.utexas.edu/projects/pixelobjectness/.",2018-08-17 +26861916,ApoCanD: Database of human apoptotic proteins in the context of cancer.,"In the past decade, apoptosis pathway has gained a serious consideration being a critical cellular process in determining the cancer progression. Inverse relationship between cancer progression and apoptosis rate has been well established in the literature. It causes apoptosis proteins under the investigative scanner for developing anticancer therapies, which certainly got a success in the case of few apoptosis proteins as drug targets. In the present study, we have developed a dedicated database of 82 apoptosis proteins called ApoCanD. This database comprises of crucial information of apoptosis proteins in the context of cancer. Genomic status of proteins in the form of mutation, copy number variation and expression in thousands of tumour samples and cancer cell lines are the major bricks of this database. In analysis, we have found that TP53 and MYD88 are the two most frequently mutated proteins in cancer. Availability of other information e.g. gene essentiality data, tertiary structure, sequence alignments, sequences profiles, post-translational modifications makes it even more useful for the researchers. A user-friendly web interface is provided to ameliorate the use of ApoCanD. We anticipate that, this database will facilitate the research community working in the field of apoptosis and cancer. The database can be accessed at: http://crdd.osdd.net/raghava/apocand.",2016-02-10 +25392417,"CeCaFDB: a curated database for the documentation, visualization and comparative analysis of central carbon metabolic flux distributions explored by 13C-fluxomics.","The Central Carbon Metabolic Flux Database (CeCaFDB, available at http://www.cecafdb.org) is a manually curated, multipurpose and open-access database for the documentation, visualization and comparative analysis of the quantitative flux results of central carbon metabolism among microbes and animal cells. It encompasses records for more than 500 flux distributions among 36 organisms and includes information regarding the genotype, culture medium, growth conditions and other specific information gathered from hundreds of journal articles. In addition to its comprehensive literature-derived data, the CeCaFDB supports a common text search function among the data and interactive visualization of the curated flux distributions with compartmentation information based on the Cytoscape Web API, which facilitates data interpretation. The CeCaFDB offers four modules to calculate a similarity score or to perform an alignment between the flux distributions. One of the modules was built using an inter programming algorithm for flux distribution alignment that was specifically designed for this study. Based on these modules, the CeCaFDB also supports an extensive flux distribution comparison function among the curated data. The CeCaFDB is strenuously designed to address the broad demands of biochemists, metabolic engineers, systems biologists and members of the -omics community.",2014-11-11 +31240274,CoGe LoadExp+: A web-based suite that integrates next-generation sequencing data analysis workflows and visualization. ,"To make genomic and epigenomic analyses more widely available to the biological research community, we have created LoadExp+, a suite of bioinformatics workflows integrated with the web-based comparative genomics platform, CoGe. LoadExp+ allows users to perform transcriptomic (RNA-seq), epigenomic (bisulfite-seq), chromatin-binding (ChIP-seq), variant identification (SNPs), and population genetics analyses against any genome in CoGe, including genomes integrated by users themselves. Through LoadExp+'s integration with CoGe's existing features, all analyses are available for visualization and additional downstream processing, and are available for export to CyVerse's data management and analysis platforms. LoadExp+ provides easy-to-use functionality to manage genomics and epigenomics data throughout its entire lifecycle using a publicly available web-based platform and facilitates greater accessibility of genomics analyses to researchers of all skill levels. LoadExp+ can be accessed at https://genomevolution.org.",2017-07-20 +23228284,CBS: an open platform that integrates predictive methods and epigenetics information to characterize conserved regulatory features in multiple Drosophila genomes.,"

Background

Information about the composition of regulatory regions is of great value for designing experiments to functionally characterize gene expression. The multiplicity of available applications to predict transcription factor binding sites in a particular locus contrasts with the substantial computational expertise that is demanded to manipulate them, which may constitute a potential barrier for the experimental community.

Results

CBS (Conserved regulatory Binding Sites, http://compfly.bio.ub.es/CBS) is a public platform of evolutionarily conserved binding sites and enhancers predicted in multiple Drosophila genomes that is furnished with published chromatin signatures associated to transcriptionally active regions and other experimental sources of information. The rapid access to this novel body of knowledge through a user-friendly web interface enables non-expert users to identify the binding sequences available for any particular gene, transcription factor, or genome region.

Conclusions

The CBS platform is a powerful resource that provides tools for data mining individual sequences and groups of co-expressed genes with epigenomics information to conduct regulatory screenings in Drosophila.",2012-12-10 +21602267,PINTA: a web server for network-based gene prioritization from expression data.,"PINTA (available at http://www.esat.kuleuven.be/pinta/; this web site is free and open to all users and there is no login requirement) is a web resource for the prioritization of candidate genes based on the differential expression of their neighborhood in a genome-wide protein-protein interaction network. Our strategy is meant for biological and medical researchers aiming at identifying novel disease genes using disease specific expression data. PINTA supports both candidate gene prioritization (starting from a user defined set of candidate genes) as well as genome-wide gene prioritization and is available for five species (human, mouse, rat, worm and yeast). As input data, PINTA only requires disease specific expression data, whereas various platforms (e.g. Affymetrix) are supported. As a result, PINTA computes a gene ranking and presents the results as a table that can easily be browsed and downloaded by the user.",2011-05-20 +30115696,"Oxygen-Enhanced and Dynamic Contrast-Enhanced Optoacoustic Tomography Provide Surrogate Biomarkers of Tumor Vascular Function, Hypoxia, and Necrosis.","Measuring the functional status of tumor vasculature, including blood flow fluctuations and changes in oxygenation, is important in cancer staging and therapy monitoring. Current clinically approved imaging modalities suffer long procedure times and limited spatiotemporal resolution. Optoacoustic tomography (OT) is an emerging clinical imaging modality that may overcome these challenges. By acquiring data at multiple wavelengths, OT can interrogate hemoglobin concentration and oxygenation directly and resolve contributions from injected contrast agents. In this study, we tested whether two dynamic OT techniques, oxygen-enhanced (OE) and dynamic contrast-enhanced (DCE)-OT, could provide surrogate biomarkers of tumor vascular function, hypoxia, and necrosis. We found that vascular maturity led to changes in vascular function that affected tumor perfusion, modulating the DCE-OT signal. Perfusion in turn regulated oxygen availability, driving the OE-OT signal. In particular, we demonstrate for the first time a strong per-tumor and spatial correlation between imaging biomarkers derived from these in vivo techniques and tumor hypoxia quantified ex vivo Our findings indicate that OT may offer a significant advantage for localized imaging of tumor response to vascular-targeted therapies when compared with existing clinical DCE methods.Significance: Imaging biomarkers derived from optoacoustic tomography can be used as surrogate measures of tumor perfusion and hypoxia, potentially yielding rapid, multiparametric, and noninvasive cancer staging and therapeutic response monitoring in the clinic.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/20/5980/F1.large.jpg Cancer Res; 78(20); 5980-91. ©2018 AACR.",2018-08-16 +26615193,PDBFlex: exploring flexibility in protein structures.,"The PDBFlex database, available freely and with no login requirements at http://pdbflex.org, provides information on flexibility of protein structures as revealed by the analysis of variations between depositions of different structural models of the same protein in the Protein Data Bank (PDB). PDBFlex collects information on all instances of such depositions, identifying them by a 95% sequence identity threshold, performs analysis of their structural differences and clusters them according to their structural similarities for easy analysis. The PDBFlex contains tools and viewers enabling in-depth examination of structural variability including: 2D-scaling visualization of RMSD distances between structures of the same protein, graphs of average local RMSD in the aligned structures of protein chains, graphical presentation of differences in secondary structure and observed structural disorder (unresolved residues), difference distance maps between all sets of coordinates and 3D views of individual structures and simulated transitions between different conformations, the latter displayed using JSMol visualization software.",2015-11-28 +28881984,"Estimation of time-varying growth, uptake and excretion rates from dynamic metabolomics data.","

Motivation

Technological advances in metabolomics have made it possible to monitor the concentration of extracellular metabolites over time. From these data, it is possible to compute the rates of uptake and excretion of the metabolites by a growing cell population, providing precious information on the functioning of intracellular metabolism. The computation of the rate of these exchange reactions, however, is difficult to achieve in practice for a number of reasons, notably noisy measurements, correlations between the concentration profiles of the different extracellular metabolites, and discontinuties in the profiles due to sudden changes in metabolic regime.

Results

We present a method for precisely estimating time-varying uptake and excretion rates from time-series measurements of extracellular metabolite concentrations, specifically addressing all of the above issues. The estimation problem is formulated in a regularized Bayesian framework and solved by a combination of extended Kalman filtering and smoothing. The method is shown to improve upon methods based on spline smoothing of the data. Moreover, when applied to two actual datasets, the method recovers known features of overflow metabolism in Escherichia coli and Lactococcus lactis , and provides evidence for acetate uptake by L. lactis after glucose exhaustion. The results raise interesting perspectives for further work on rate estimation from measurements of intracellular metabolites.

Availability and implementation

The Matlab code for the estimation method is available for download at https://team.inria.fr/ibis/rate-estimation-software/ , together with the datasets.

Contact

eugenio.cinquemani@inria.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +28600227,miRTarVis+: Web-based interactive visual analytics tool for microRNA target predictions.,"In this paper, we present miRTarVis+, a Web-based interactive visual analytics tool for miRNA target predictions and integrative analyses of multiple prediction results. Various microRNA (miRNA) target prediction algorithms have been developed to improve sequence-based miRNA target prediction by exploiting miRNA-mRNA expression profile data. There are also a few analytics tools to help researchers predict targets of miRNAs. However, there still is a need for improving the performance for miRNA prediction algorithms and more importantly for interactive visualization tools for an integrative analysis of multiple prediction results. miRTarVis+ has an intuitive interface to support the analysis pipeline of load, filter, predict, and visualize. It can predict targets of miRNA by adopting Bayesian inference and maximal information-based nonparametric exploration (MINE) analyses as well as conventional correlation and mutual information analyses. miRTarVis+ supports an integrative analysis of multiple prediction results by providing an overview of multiple prediction results and then allowing users to examine a selected miRNA-mRNA network in an interactive treemap and node-link diagram. To evaluate the effectiveness of miRTarVis+, we conducted two case studies using miRNA-mRNA expression profile data of asthma and breast cancer patients and demonstrated that miRTarVis+ helps users more comprehensively analyze targets of miRNA from miRNA-mRNA expression profile data. miRTarVis+ is available at http://hcil.snu.ac.kr/research/mirtarvisplus.",2017-06-07 +29375390,The Effects of Tai Chi Training in Patients with Heart Failure: A Systematic Review and Meta-Analysis.,"Heart Failure (HF) is associated with significantly high morbidity and mortality. We performed a meta-analysis and updated new evidences from randomized controlled trials (RCTs) to determine the effects of Tai Chi (TC) in patients with HF. Electronic literature search of Medline, PubMed, EMBASE, the Cochrane Library, China national knowledge infrastructure (CNKI), and Wan Fang Database was conducted from inception of their establishment until 2017. And we also searched Clinical Trials Registries (https://clinicaltrials.gov/ and www.controlled-trials.com) for on-going studies. A total of 11 trials with 656 patients were available for analysis. The results suggested that TC was associated with an obviously improved 6-min walk distance [6MWD, weighted mean difference (WMD) 65.29 m; 95% CI 32.55-98.04] and quality of life (Qol, WMD -11.52 points; 95% CI -16.5 to -6.98) and left ventricular ejection fraction (LVEF, WMD 9.94%; 95% CI 6.95 to 12.93). TC was shown to reduce serum B-type natriuretic peptide [BNP, standard mean difference (SMD) -1.08 pg/mL; 95% CI -1.91 to -0.26] and heart rate (HR, WMD -2.52 bpm; 95% CI -3.49 to -1.55). In summary, our meta-analysis demonstrated the clinical evidence about TC for HF is inconclusive. TC could improve 6MWD, Qol and LVEF in patients with HF and may reduce BNP and HR. However, there is a lack of evidence to support TC altering other important long-term clinical outcomes so far. Further larger and more sustainable RCTs are urgently needed to investigate the effects of TC.",2017-12-07 +29224730,A validated calculator to estimate risk of cesarean after an induction of labor with an unfavorable cervix.,"

Background

Induction of labor occurs in >20% of pregnancies, which equates to approximately 1 million women undergoing an induction in the United States annually. Regardless of how common inductions are, our ability to predict induction success is limited. Although multiple risk factors for a failed induction have been identified, risk factors alone are not enough to quantify an actual risk of cesarean for an individual woman undergoing a cesarean.

Objective

The objective of this study was to derive and validate a prediction model for cesarean after induction with an unfavorable cervix and to create a Web-based calculator to assist in patient counseling.

Study design

Derivation and validation of a prediction model for cesarean delivery after induction was performed as part of a planned secondary analysis of a large randomized trial. A predictive model for cesarean delivery was derived using multivariable logistic regression from a large randomized trial on induction methods (n = 491) that took place from 2013 through 2015 at an academic institution. Full-term (≥37 weeks) women carrying a singleton gestation with intact membranes and an unfavorable cervix (Bishop score ≤6 and dilation ≤2 cm) undergoing an induction were included in this trial. Both nulliparous and multiparous women were included. Women with a prior cesarean were excluded. Refinement of the prediction model was performed using an observational cohort of women from the same institution who underwent an induction (n = 364) during the trial period. An external validation was performed utilizing a publicly available database (Consortium for Safe Labor) that includes information for >200,000 deliveries from 19 hospitals across the United States from 2002 through 2008. After applying the same inclusion and exclusion criteria utilized in the derivation cohort, a total of 8466 women remained for analysis. The discriminative power of each model was assessed using a bootstrap, bias-corrected area under the curve.

Results

The cesarean delivery rates in the derivation and external validation groups were: 27.7% (n = 136/491) and 26.4% (n = 2235/8466). In multivariable modeling, nulliparity, gestation age ≥40 weeks, body mass index at delivery, modified Bishop score, and height were significantly associated with cesarean. A nomogram and calculator were created and found to have an area under the curve in the external validation cohort of 0.73 (95% confidence interval, 0.72-0.74).

Conclusion

A nomogram and user-friendly Web-based calculator that incorporates 5 variables known at the start of induction has been developed and validated. It can be found at: http://www.uphs.upenn.edu/obgyn/labor-induction-calculator/. This calculator can be used to augment patient counseling for women undergoing an induction with an unfavorable cervix.",2017-12-07 +26088801,"Rchemcpp: a web service for structural analoging in ChEMBL, Drugbank and the Connectivity Map.","

Unlabelled

We have developed Rchempp, a web service that identifies structurally similar compounds (structural analogs) in large-scale molecule databases. The service allows compounds to be queried in the widely used ChEMBL, DrugBank and the Connectivity Map databases. Rchemcpp utilizes the best performing similarity functions, i.e. molecule kernels, as measures for structural similarity. Molecule kernels have proven superior performance over other similarity measures and are currently excelling at machine learning challenges. To considerably reduce computational time, and thereby make it feasible as a web service, a novel efficient prefiltering strategy has been developed, which maintains the sensitivity of the method. By exploiting information contained in public databases, the web service facilitates many applications crucial for the drug development process, such as prioritizing compounds after screening or reducing adverse side effects during late phases. Rchemcpp was used in the DeepTox pipeline that has won the Tox21 Data Challenge and is frequently used by researchers in pharmaceutical companies.

Availability and implementation

The web service and the R package are freely available via http://shiny.bioinf.jku.at/Analoging/ and via Bioconductor.

Contact

hochreit@bioinf.jku.at

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-17 +26384372,Deep Question Answering for protein annotation. ,"Biomedical professionals have access to a huge amount of literature, but when they use a search engine, they often have to deal with too many documents to efficiently find the appropriate information in a reasonable time. In this perspective, question-answering (QA) engines are designed to display answers, which were automatically extracted from the retrieved documents. Standard QA engines in literature process a user question, then retrieve relevant documents and finally extract some possible answers out of these documents using various named-entity recognition processes. In our study, we try to answer complex genomics questions, which can be adequately answered only using Gene Ontology (GO) concepts. Such complex answers cannot be found using state-of-the-art dictionary- and redundancy-based QA engines. We compare the effectiveness of two dictionary-based classifiers for extracting correct GO answers from a large set of 100 retrieved abstracts per question. In the same way, we also investigate the power of GOCat, a GO supervised classifier. GOCat exploits the GOA database to propose GO concepts that were annotated by curators for similar abstracts. This approach is called deep QA, as it adds an original classification step, and exploits curated biological data to infer answers, which are not explicitly mentioned in the retrieved documents. We show that for complex answers such as protein functional descriptions, the redundancy phenomenon has a limited effect. Similarly usual dictionary-based approaches are relatively ineffective. In contrast, we demonstrate how existing curated data, beyond information extraction, can be exploited by a supervised classifier, such as GOCat, to massively improve both the quantity and the quality of the answers with a +100% improvement for both recall and precision. Database URL: http://eagl.unige.ch/DeepQA4PA/.",2015-09-16 +29795526,An improved assembly and annotation of the melon (Cucumis melo L.) reference genome.,"We report an improved assembly (v3.6.1) of the melon (Cucumis melo L.) genome and a new genome annotation (v4.0). The optical mapping approach allowed correcting the order and the orientation of 21 previous scaffolds and permitted to correctly define the gap-size extension along the 12 pseudomolecules. A new comprehensive annotation was also built in order to update the previous annotation v3.5.1, released more than six years ago. Using an integrative annotation pipeline, based on exhaustive RNA-Seq collections and ad-hoc transposable element annotation, we identified 29,980 protein-coding loci. Compared to the previous version, the v4.0 annotation improved gene models in terms of completeness of gene structure, UTR regions definition, intron-exon junctions and reduction of fragmented genes. More than 8,000 new genes were identified, one third of them being well supported by RNA-Seq data. To make all the new resources easily exploitable and completely available for the scientific community, a redesigned Melonomics genomic platform was released at http://melonomics.net . The resources produced in this work considerably increase the reliability of the melon genome assembly and resolution of the gene models paving the way for further studies in melon and related species.",2018-05-24 +30158930,Silent Witness: Dual-Species Transcriptomics Reveals Epithelial Immunological Quiescence to Helminth Larval Encounter and Fostered Larval Development.,"Gastrointestinal nematodes are among the most prevalent parasites infecting humans and livestock worldwide. Infective larvae of the soil-transmitted nematode Ascaris spp. enter the host and start tissue migration by crossing the intestinal epithelial barrier. The initial interaction of the intestinal epithelium with the parasite, however, has received little attention. In a time-resolved interaction model of porcine intestinal epithelial cells (IPEC-J2) and infective Ascaris suum larvae, we addressed the early transcriptional changes occurring simultaneously in both organisms using dual-species RNA-Seq. Functional analysis of the host response revealed an overall induction of metabolic activity, without induction of immune responsive genes or immune signaling pathways and showing suppression of chemotactic genes like CXCL8/IL-8 or CHI3L1. Ascaris larvae, when getting in contact with the epithelium, showed induction of genes that orchestrate motor activity and larval development, such as myosin, troponin, myoglobin, and protein disulfide isomerase 2 (PDI-2). In addition, excretory-secretory products that likely facilitate parasite invasion were increased, among them, aspartic protease 6 or hyaluronidase. Integration of host and pathogen data in an interspecies gene co-expression network indicated links between nematode fatty acid biosynthesis and host ribosome assembly/protein synthesis. In summary, our study provides new molecular insights into the early factors of parasite invasion, while at the same time revealing host immunological unresponsiveness. Reproducible software for dual RNA-Seq analysis of non-model organisms is available at https://gitlab.com/mkuhring/project_asuum and can be applied to similar studies.",2018-08-15 +26650099,Magnification devices for endodontic therapy.,"

Background

After the introduction of microsurgical principles in endodontics involving new techniques for root canal treatment, there has been a drive to enhance the visualisation of the surgical field. It is important to know if the technical advantages for the operator brought in by magnification devices such as surgical microscopes, endoscopes and magnifying loupes, are also associated with advantages for the patient in terms of improvement of clinical and radiographic outcomes. This version updates the review published in 2009.

Objectives

To evaluate and compare the effects of endodontic treatment performed with the aid of magnification devices versus endodontic treatment without magnification devices. We also aimed to compare the different magnification devices used in endodontics with one another.

Search methods

The following electronic databases were searched: the Cochrane Oral Health Group Trials Register (to 13 October 2015), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library, 2015, Issue 9), MEDLINE via OVID (1946 to 13 October 2015) and EMBASE via OVID (1980 to 13 October 2015). We searched the US National Institutes of Health Trials Register (http://clinicaltrials.gov) and the WHO Clinical Trials Registry Platform for ongoing trials. No restrictions were placed on the language or date of publication when searching the electronic databases.

Selection criteria

We considered all randomised controlled trials (RCTs) and quasi-randomised controlled trials comparing endodontic therapy performed with versus without one or more magnification devices, as well as randomised and quasi-randomised trials comparing two or more magnification devices used as an adjunct to endodontic therapy.

Data collection and analysis

We conducted screening of search results independently and in duplicate. We obtained full papers for potentially relevant trials. The Cochrane Collaboration statistical guidelines were to be followed for data synthesis.

Main results

No trials met the inclusion criteria for this review.

Authors' conclusions

No article was identified in the current literature that satisfied the criteria for inclusion. It is unknown if and how the type of magnification device affects the treatment outcome, considering the high number of factors that may have a significant impact on the success of endodontic surgical procedure. This should be investigated by further long-term, well-designed RCTs that conform to the CONSORT statement (www.consort-statement.org/).",2015-12-09 +26502805,Polygenic inheritance of cryptorchidism susceptibility in the LE/orl rat.,"

Study hypothesis

Susceptibility to inherited cryptorchidism in the LE/orl rat may be associated with genetic loci that influence developmental patterning of the gubernaculum by the fetal testis.

Study finding

Cryptorchidism in the LE/orl rat is associated with a unique combination of homozygous minor alleles at multiple loci, and the encoded proteins are co-localized with androgen receptor (AR) and Leydig cells in fetal gubernaculum and testis, respectively.

What is known already

Prior studies have shown aberrant perinatal gubernacular migration, muscle patterning defects and reduced fetal testicular testosterone in the LE/orl strain. In addition, altered expression of androgen-responsive, cytoskeletal and muscle-related transcripts in the LE/orl fetal gubernaculum suggest a role for defective AR signaling in cryptorchidism susceptibility.

Study design, samples/materials, methods

The long-term LE/orl colony and short-term colonies of outbred Crl:LE and Crl:SD, and inbred WKY/Ncrl rats were maintained for studies. Animals were intercrossed (LE/orl X WKY/Ncrl), and obligate heterozygotes were reciprocally backcrossed to LE/orl rats to generate 54 F2 males used for genotyping and/or linkage analysis. At least five fetuses per gestational time point from two or more litters were used for quantitative real-time RT-PCR (qRT-PCR) and freshly harvested embryonic (E) day 17 gubernaculum was used to generate conditionally immortalized cell lines. We completed genotyping and gene expression analyses using genome-wide microsatellite markers and single nucleotide polymorphism (SNP) arrays, PCR amplification, direct sequencing, restriction enzyme digest with fragment analysis, whole genome sequencing (WGS), and qRT-PCR. Linkage analysis was performed in Haploview with multiple testing correction, and qRT-PCR data were analyzed using ANOVA after log transformation. Imaging was performed using custom and commercial antibodies directed at candidate proteins in gubernaculum and testis tissues, and gubernaculum cell lines.

Main results and the role of chance

LE/orl rats showed reduced fertility and fecundity, and higher risk of perinatal death as compared with Crl:LE rats, but there were no differences in breeding outcomes between normal and unilaterally cryptorchid males. Linkage analysis identified multiple peaks, and with selective breeding of outbred Crl:LE and Crl:SD strains for alleles within two of the most significant (P < 0.003) peaks on chromosomes 6 and 16, we were able to generate a non-LE/orl cryptorchid rat. Associated loci contain potentially functional minor alleles (0.25-0.36 in tested rat strains) including an exonic deletion in Syne2, a large intronic insertion in Ncoa4 (an AR coactivator) and potentially deleterious variants in Solh/Capn15, Ankrd28, and Hsd17b2. Existing WGS data indicate that homozygosity for these combined alleles does not occur in any other sequenced rat strain. We observed a modifying effect of the Syne2(del) allele on expression of other candidate genes, particularly Ncoa4, and for muscle and hormone-responsive transcripts. The selected candidate genes/proteins are highly expressed, androgen-responsive and/or co-localized with developing muscle and AR in fetal gubernaculum, and co-localized with Leydig cells in fetal testis.

Limitations, reasons for caution

The present study identified multiple cryptorchidism-associated linkage peaks in the LE/orl rat, containing potentially causal alleles. These are strong candidate susceptibility loci, but further studies are needed to demonstrate functional relevance to the phenotype.

Wider implications of the findings

Association data from both human and rat models of spontaneous, nonsyndromic cryptorchidism support a polygenic etiology of the disease. Both the present study and a human genome-wide association study suggest that common variants with weak effects contribute to susceptibility, and may exist in genes encoding proteins that participate in AR signaling in the developing gubernaculum. These findings have potential implications for the gene-environment interaction in the etiology of cryptorchidism.

Large scale data

Sequences were deposited in the Rat Genome Database (RGD, http://rgd.mcw.edu/).

Study funding and competing interests

This work was supported by: R01HD060769 from the Eunice Kennedy Shriver National Institute for Child Health and Human Development (NICHD), 2P20GM103446 and P20GM103464 from the National Institute of General Medical Sciences (NIGMS), and Nemours Biomedical Research. The authors have no competing interests to declare.",2015-10-26 +29244010,RocSampler: regularizing overlapping protein complexes in protein-protein interaction networks.,"

Background

In recent years, protein-protein interaction (PPI) networks have been well recognized as important resources to elucidate various biological processes and cellular mechanisms. In this paper, we address the problem of predicting protein complexes from a PPI network. This problem has two difficulties. One is related to small complexes, which contains two or three components. It is relatively difficult to identify them due to their simpler internal structure, but unfortunately complexes of such sizes are dominant in major protein complex databases, such as CYC2008. Another difficulty is how to model overlaps between predicted complexes, that is, how to evaluate different predicted complexes sharing common proteins because CYC2008 and other databases include such protein complexes. Thus, it is critical how to model overlaps between predicted complexes to identify them simultaneously.

Results

In this paper, we propose a sampling-based protein complex prediction method, RocSampler (Regularizing Overlapping Complexes), which exploits, as part of the whole scoring function, a regularization term for the overlaps of predicted complexes and that for the distribution of sizes of predicted complexes. We have implemented RocSampler in MATLAB and its executable file for Windows is available at the site, http://imi.kyushu-u.ac.jp/~om/software/RocSampler/ .

Conclusions

We have applied RocSampler to five yeast PPI networks and shown that it is superior to other existing methods. This implies that the design of scoring functions including regularization terms is an effective approach for protein complex prediction.",2017-12-06 +26451011,Systematic meta-analyses and field synopsis of genetic association studies in colorectal adenomas.,"

Background

Low penetrance genetic variants, primarily single nucleotide polymorphisms, have substantial influence on colorectal cancer (CRC) susceptibility. Most CRCs develop from colorectal adenomas (CRA). Here we report the first comprehensive field synopsis that catalogues all genetic association studies on CRA, with a parallel online database [http://www.chs.med.ed.ac.uk/CRAgene/].

Methods

We performed a systematic review, reviewing 9750 titles, and then extracted data from 130 publications reporting on 181 polymorphisms in 74 genes. We conducted meta-analyses to derive summary effect estimates for 37 polymorphisms in 26 genes. We applied the Venice criteria and Bayesian False Discovery Probability (BFDP) to assess the levels of the credibility of associations.

Results

We considered the association with the rs6983267 variant at 8q24 as 'highly credible', reaching genome-wide statistical significance in at least one meta-analysis model. We identified 'less credible' associations (higher heterogeneity, lower statistical power, BFDP > 0.02) with a further four variants of four independent genes: MTHFR c.677C>T p.A222V (rs1801133), TP53 c.215C>G p.R72P (rs1042522), NQO1 c.559C>T p.P187S (rs1800566), and NAT1 alleles imputed as fast acetylator genotypes. For the remaining 32 variants of 22 genes for which positive associations with CRA risk have been previously reported, the meta-analyses revealed no credible evidence to support these as true associations.

Conclusions

The limited number of credible associations between low penetrance genetic variants and CRA reflects the lower volume of evidence and associated lack of statistical power to detect associations of the magnitude typically observed for genetic variants and chronic diseases. The CRA gene database provides context for CRA genetic association data and will help inform future research directions.",2015-10-07 +28540415,Incidence of osteosynthesis of members in France.,"

Purpose

Little data is available on the number of osteosynthesis of limbs in a country. Incidence of osteosynthesis is an essential element for the formation and organization of care. Based on the data from the work of the Hospital Information Technology Agency ( http://www.atih.sante.fr/ ) and available in open access, we wanted to know the incidence of the number of osteosynthesis performed in France and their evolution over ten years between 2006 and 2015.

Methods

The data analyzed are derived from the website of the technical agency of information on the hospitalization (ATIH).

Results and conclusions

In France, in 2015, 267,999 limb osteosyntheses were performed. Between 2006 and 2015, the number of osteosynthesis increased by 9.1%. The incidence of limb osteosynthesis is 403.7 per 100,000 people, rising 3.9% between 2006 and 2015. In comparison, the incidence of hip prostheses increased by 12.6%, knee prosthesis by 57.4%. The main bias of the study is of course the quality of the coding of the surgeons and the establishments, a type of fracture that can enter into one or several categories of acts. In ten years, the incidence of osteosynthesis has increased little in France. The evolution is more pronounced on fractures affecting mainly the elderly, fracture of the upper end of the femur, fracture of the distal end of the radius and fracture of the ankle. The incidence of many acts of osteosynthesis is very low and therefore responsible for a weak experience for most surgeons.",2017-05-24 +28968757,"Novel features and enhancements in BioBin, a tool for the biologically inspired binning and association analysis of rare variants.","

Motivation

BioBin is an automated bioinformatics tool for the multi-level biological binning of sequence variants. Herein, we present a significant update to BioBin which expands the software to facilitate a comprehensive rare variant analysis and incorporates novel features and analysis enhancements.

Results

In BioBin 2.3, we extend our software tool by implementing statistical association testing, updating the binning algorithm, as well as incorporating novel analysis features providing for a robust, highly customizable, and unified rare variant analysis tool.

Availability and implementation

The BioBin software package is open source and freely available to users at http://www.ritchielab.com/software/biobin-download.

Contact

mdritchie@geisinger.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-02-01 +28968815,A novel SCCA approach via truncated ℓ1-norm and truncated group lasso for brain imaging genetics.,"

Motivation

Brain imaging genetics, which studies the linkage between genetic variations and structural or functional measures of the human brain, has become increasingly important in recent years. Discovering the bi-multivariate relationship between genetic markers such as single-nucleotide polymorphisms (SNPs) and neuroimaging quantitative traits (QTs) is one major task in imaging genetics. Sparse Canonical Correlation Analysis (SCCA) has been a popular technique in this area for its powerful capability in identifying bi-multivariate relationships coupled with feature selection. The existing SCCA methods impose either the ℓ1-norm or its variants to induce sparsity. The ℓ0-norm penalty is a perfect sparsity-inducing tool which, however, is an NP-hard problem.

Results

In this paper, we propose the truncated ℓ1-norm penalized SCCA to improve the performance and effectiveness of the ℓ1-norm based SCCA methods. Besides, we propose an efficient optimization algorithms to solve this novel SCCA problem. The proposed method is an adaptive shrinkage method via tuning τ. It can avoid the time intensive parameter tuning if given a reasonable small τ. Furthermore, we extend it to the truncated group-lasso (TGL), and propose TGL-SCCA model to improve the group-lasso-based SCCA methods. The experimental results, compared with four benchmark methods, show that our SCCA methods identify better or similar correlation coefficients, and better canonical loading profiles than the competing methods. This demonstrates the effectiveness and efficiency of our methods in discovering interesting imaging genetic associations.

Availability and implementation

The Matlab code and sample data are freely available at http://www.iu.edu/∼shenlab/tools/tlpscca/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +24981076,Integrative analysis of independent transcriptome data for rare diseases.,"High-throughput technologies used to interrogate transcriptomes have been generating a great amount of publicly available gene expression data. For rare diseases that lack of clinical samples and research funding, there is a practical benefit to jointly analyze existing data sets commonly related to a specific rare disease. In this study, we collected a number of independently generated transcriptome data sets from four species: human, fly, mouse and worm. All data sets included samples with both normal and abnormal mitochondrial function. We reprocessed each data set to standardize format, scale and gene annotation and used HomoloGene database to map genes between species. Standardized procedure was also applied to compare gene expression profiles of normal and abnormal mitochondrial function to identify differentially expressed genes. We further used meta-analysis and other integrative analyses to recognize patterns across data sets and species. Novel insights related to mitochondrial dysfunction was revealed via these analyses, such as a group of genes consistently dysregulated by impaired mitochondrial function in multiple species. This study created a template for the study of rare diseases using genomic technologies and advanced statistical methods. All data and results generated by this study are freely available and stored at http://goo.gl/nOGWC2, to support further data mining.",2014-06-27 +28610091,LepNet: The Lepidoptera of North America Network.,"The Lepidoptera of North America Network, or LepNet, is a digitization effort recently launched to mobilize biodiversity data from 3 million specimens of butterflies and moths in United States natural history collections (http://www.lep-net.org/). LepNet was initially conceived as a North American effort but the project seeks collaborations with museums and other organizations worldwide. The overall goal is to transform Lepidoptera specimen data into readily available digital formats to foster global research in taxonomy, ecology and evolutionary biology.",2017-03-23 +26787666,GeneValidator: identify problems with protein-coding gene predictions.,"

Unlabelled

: Genomes of emerging model organisms are now being sequenced at very low cost. However, obtaining accurate gene predictions remains challenging: even the best gene prediction algorithms make substantial errors and can jeopardize subsequent analyses. Therefore, many predicted genes must be time-consumingly visually inspected and manually curated. We developed GeneValidator (GV) to automatically identify problematic gene predictions and to aid manual curation. For each gene, GV performs multiple analyses based on comparisons to gene sequences from large databases. The resulting report identifies problematic gene predictions and includes extensive statistics and graphs for each prediction to guide manual curation efforts. GV thus accelerates and enhances the work of biocurators and researchers who need accurate gene predictions from newly sequenced genomes.

Availability and implementation

GV can be used through a web interface or in the command-line. GV is open-source (AGPL), available at https://wurmlab.github.io/tools/genevalidator

Contact

: y.wurm@qmul.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-18 +26210358,VCF-Miner: GUI-based application for mining variants and annotations stored in VCF files.,"Next-generation sequencing platforms are widely used to discover variants associated with disease. The processing of sequencing data involves read alignment, variant calling, variant annotation and variant filtering. The standard file format to hold variant calls is the variant call format (VCF) file. According to the format specifications, any arbitrary annotation can be added to the VCF file for downstream processing. However, most downstream analysis programs disregard annotations already present in the VCF and re-annotate variants using the annotation provided by that particular program. This precludes investigators who have collected information on variants from literature or other sources from including these annotations in the filtering and mining of variants. We have developed VCF-Miner, a graphical user interface-based stand-alone tool, to mine variants and annotation stored in the VCF. Powered by a MongoDB database engine, VCF-Miner enables the stepwise trimming of non-relevant variants. The grouping feature implemented in VCF-Miner can be used to identify somatic variants by contrasting variants in tumor and in normal samples or to identify recessive/dominant variants in family studies. It is not limited to human data, but can also be extended to include non-diploid organisms. It also supports copy number or any other variant type supported by the VCF specification. VCF-Miner can be used on a personal computer or large institutional servers and is freely available for download from http://bioinformaticstools.mayo.edu/research/vcf-miner/.",2015-07-25 +29536635,"Rational design of isonicotinic acid hydrazide derivatives with antitubercular activity: Machine learning, molecular docking, synthesis and biological testing.","The problem of designing new antitubercular drugs against multiple drug-resistant tuberculosis (MDR-TB) was addressed using advanced machine learning methods. As there are only few published measurements against MDR-TB, we collected a large literature data set and developed models against the non-resistant H37Rv strain. The predictive accuracy of these models had a coefficient of determination q2  = .7-.8 (regression models) and balanced accuracies of about 80% (classification models) with cross-validation and independent test sets. The models were applied to screen a virtual chemical library, which was designed to have MDR-TB activity. The seven most promising compounds were identified, synthesized and tested. All of them showed activity against the H37Rv strain, and three molecules demonstrated activity against the MDR-TB strain. The docking analysis indicated that the discovered molecules could bind enoyl reductase, InhA, which is required in mycobacterial cell wall development. The models are freely available online (http://ochem.eu/article/103868) and can be used to predict potential anti-TB activity of new chemicals.",2018-05-06 +29186337,CytoCtrlAnalyser: a Cytoscape app for biomolecular network controllability analysis.,"Summary:Studying the controllability of biomolecular networks can result in profound knowledge about molecular biological systems. However, there is no comprehensive and easy-to-use platform for analyzing controllability of biomolecular networks although various algorithms for analyzing complex network controllability have been proposed recently. In this application note, we develop the CytoCtrlAnalyser which is a Cytoscape app to provide a comprehensive platform for analyzing controllability of biomolecular networks. Nine algorithms have been integrated in CytoCtrlAnalyser. With network topologies and customized control settings imported into CytoCtrlAnalyser, users can identify the steering nodes which should be actuated by input control signals for achieving different control objectives as well as investigate the importance of nodes from different perspectives in the controllability of networks. CytoCtrlAnalyser offers a tool for many promising applications, such as identification of potential drug targets or biologically important nodes in biomolecular networks. Availability and implementation:Freely available for downloading at http://apps.cytoscape.org/apps/cytoctrlanalyser. Contact:faw341@mail.usask.ca. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +30019843,Long-term outcomes after treatment of bare-metal stent restenosis with paclitaxel-coated balloon catheters or everolimus-eluting stents: 3-year follow-up of the TIS clinical study.,"

Background

The efficacy of paclitaxel-eluting balloon catheters (PEB) and drug-eluting stents for treatment of bare-metal stent restenosis (BMS-ISR) have been demonstrated in several studies with follow-up times of 9 to 12 months; however, the long-term outcomes of ISR treatment are less defined.

Objectives

We aimed to compare the long-term efficacy of PEB and everolimus-eluting stents (EES) for the treatment of BMS-ISR.

Methods

We analyzed 3-year clinical follow-up data from patients included in the TIS randomized clinical study. A total of 136 patients with BMS-ISR were allocated to receive treatment with either PEB or EES (68 patients with 74 ISR lesions per group).

Results

The PEB and EES groups did not significantly differ in major adverse cardiac events-free survival (MACE; P = .211; including individual events: CV death: P = .622; myocardial infarction: P = .650 or target vessel revascularization: P = .286) at 3-year clinical follow-up. No event-free survival differences were found between the groups regarding overall mortality (P = .818), definite stent thrombosis (P = .165) or the second MACE (P = .270).

Conclusions

At the 3-year follow-up, no significant differences in clinical outcomes were found between iopromide-coated PEB and EES for the treatment of BMS-ISR. (ClinicalTrials.gov; https://clinicaltrials.gov; NCT01735825).",2018-07-18 +25765347,"ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparison and visualization.","

Unlabelled

ChIPseeker is an R package for annotating ChIP-seq data analysis. It supports annotating ChIP peaks and provides functions to visualize ChIP peaks coverage over chromosomes and profiles of peaks binding to TSS regions. Comparison of ChIP peak profiles and annotation are also supported. Moreover, it supports evaluating significant overlap among ChIP-seq datasets. Currently, ChIPseeker contains 15 000 bed file information from GEO database. These datasets can be downloaded and compare with user's own data to explore significant overlap datasets for inferring co-regulation or transcription factor complex for further investigation.

Availability and implementation

ChIPseeker is released under Artistic-2.0 License. The source code and documents are freely available through Bioconductor (http://www.bioconductor.org/packages/release/bioc/html/ChIPseeker.html).",2015-03-11 +25338821,Effects of food processing on polyphenol contents: a systematic analysis using Phenol-Explorer data.,"SCOPE: The Phenol-Explorer web database (http://www.phenol-explorer.eu) was recently updated with new data on polyphenol retention due to food processing. Here, we analyze these data to investigate the effect of different variables on polyphenol content and make recommendations aimed at refining estimation of intake in epidemiological studies. METHODS AND RESULTS: Data on the effects of processing upon 161 polyphenols compiled for the Phenol-Explorer database were analyzed to investigate the effects of polyphenol structure, food, and process upon polyphenol loss. These were expressed as retention factors (RFs), fold changes in polyphenol content due to processing. Domestic cooking of common plant foods caused considerable losses (median RF = 0.45-0.70), although variability was high. Food storage caused fewer losses, regardless of food or polyphenol (median RF = 0.88, 0.95, 0.92 for ambient, refrigerated, and frozen storage, respectively). The food under study was often a more important determinant of retention than the process applied. CONCLUSION: Phenol-Explorer data enable polyphenol losses due to processing from many different foods to be rapidly compared. Where experimentally determined polyphenol contents of a processed food are not available, only published RFs matching at least the food and polyphenol of interest should be used when building food composition tables for epidemiological studies.",2014-11-24 +27524661,Music interventions for improving psychological and physical outcomes in cancer patients.,"

Background

Having cancer may result in extensive emotional, physical and social suffering. Music interventions have been used to alleviate symptoms and treatment side effects in cancer patients.

Objectives

To assess and compare the effects of music therapy and music medicine interventions for psychological and physical outcomes in people with cancer.

Search methods

We searched the Cochrane Central Register of Controlled Trials (CENTRAL) (2016, Issue 1), MEDLINE, Embase, CINAHL, PsycINFO, LILACS, Science Citation Index, CancerLit, CAIRSS, Proquest Digital Dissertations, ClinicalTrials.gov, Current Controlled Trials, the RILM Abstracts of Music Literature, http://www.wfmt.info/Musictherapyworld/ and the National Research Register. We searched all databases, except for the last two, from their inception to January 2016; the other two are no longer functional, so we searched them until their termination date. We handsearched music therapy journals, reviewed reference lists and contacted experts. There was no language restriction.

Selection criteria

We included all randomized and quasi-randomized controlled trials of music interventions for improving psychological and physical outcomes in adult and pediatric patients with cancer. We excluded participants undergoing biopsy and aspiration for diagnostic purposes.

Data collection and analysis

Two review authors independently extracted the data and assessed the risk of bias. Where possible, we presented results in meta-analyses using mean differences and standardized mean differences. We used post-test scores. In cases of significant baseline difference, we used change scores.

Main results

We identified 22 new trials for inclusion in this update. In total, the evidence of this review rests on 52 trials with a total of 3731 participants. We included music therapy interventions offered by trained music therapists, as well as music medicine interventions, which are defined as listening to pre-recorded music, offered by medical staff. We categorized 23 trials as music therapy trials and 29 as music medicine trials.The results suggest that music interventions may have a beneficial effect on anxiety in people with cancer, with a reported average anxiety reduction of 8.54 units (95% confidence interval (CI) -12.04 to -5.05, P < 0.0001) on the Spielberger State Anxiety Inventory - State Anxiety (STAI-S) scale (range 20 to 80) and -0.71 standardized units (13 studies, 1028 participants; 95% CI -0.98 to -0.43, P < 0.00001; low quality evidence) on other anxiety scales, a moderate to strong effect. Results also suggested a moderately strong, positive impact on depression (7 studies, 723 participants; standardized mean difference (SMD): -0.40, 95% CI -0.74 to -0.06, P = 0.02; very low quality evidence), but because of the very low quality of the evidence for this outcome, this result needs to be interpreted with caution. We found no support for an effect of music interventions on mood or distress.Music interventions may lead to small reductions in heart rate, respiratory rate and blood pressure but do not appear to impact oxygen saturation level. We found a large pain-reducing effect (7 studies, 528 participants; SMD: -0.91, 95% CI -1.46 to -0.36, P = 0.001, low quality evidence). In addition, music interventions had a small to moderate treatment effect on fatigue (6 studies, 253 participants; SMD: -0.38, 95% CI -0.72 to -0.04, P = 0.03; low quality evidence), but we did not find strong evidence for improvement in physical functioning.The results suggest a large effect of music interventions on patients' quality of life (QoL), but the results were highly inconsistent across studies, and the pooled effect size for the music medicine and music therapy studies was accompanied by a large confidence interval (SMD: 0.98, 95% CI -0.36 to 2.33, P = 0.15, low quality evidence). A comparison between music therapy and music medicine interventions suggests a moderate effect of music therapy interventions for patients' quality of life (QoL) (3 studies, 132 participants; SMD: 0.42, 95% CI 0.06 to 0.78, P = 0.02; very low quality evidence), but we found no evidence of an effect for music medicine interventions. A comparison between music therapy and music medicine studies was also possible for anxiety, depression and mood, but we found no difference between the two types of interventions for these outcomes.The results of single studies suggest that music listening may reduce the need for anesthetics and analgesics as well as decrease recovery time and duration of hospitalization, but more research is needed for these outcomes.We could not draw any conclusions regarding the effect of music interventions on immunologic functioning, coping, resilience or communication outcomes because either we could not pool the results of the studies that included these outcomes or we could only identify one trial. For spiritual well-being, we found no evidence of an effect in adolescents or young adults, and we could not draw any conclusions in adults.The majority of studies included in this review update presented a high risk of bias, and therefore the quality of evidence is low.

Authors' conclusions

This systematic review indicates that music interventions may have beneficial effects on anxiety, pain, fatigue and QoL in people with cancer. Furthermore, music may have a small effect on heart rate, respiratory rate and blood pressure. Most trials were at high risk of bias and, therefore, these results need to be interpreted with caution.",2016-08-15 +28132024,"MosaicHunter: accurate detection of postzygotic single-nucleotide mosaicism through next-generation sequencing of unpaired, trio, and paired samples.","Genomic mosaicism arising from postzygotic mutations has long been associated with cancer and more recently with non-cancer diseases. It has also been detected in healthy individuals including healthy parents of children affected with genetic disorders, highlighting its critical role in the origin of genetic mutations. However, most existing software for the genome-wide identification of single-nucleotide mosaicisms (SNMs) requires a paired control tissue obtained from the same individual which is often unavailable for non-cancer individuals and sometimes missing in cancer studies. Here, we present MosaicHunter (http://mosaichunter.cbi.pku.edu.cn), a bioinformatics tool that can identify SNMs in whole-genome and whole-exome sequencing data of unpaired samples without matched controls using Bayesian genotypers. We evaluate the accuracy of MosaicHunter on both simulated and real data and demonstrate that it has improved performance compared with other somatic mutation callers. We further demonstrate that incorporating sequencing data of the parents can be an effective approach to significantly improve the accuracy of detecting SNMs in an individual when a matched control sample is unavailable. Finally, MosaicHunter also has a paired mode that can take advantage of matched control samples when available, making it a useful tool for detecting SNMs in both non-cancer and cancer studies.",2017-06-01 +23197657,H-InvDB in 2013: an omics study platform for human functional gene and transcript discovery.,"H-InvDB (http://www.h-invitational.jp/) is a comprehensive human gene database started in 2004. In the latest version, H-InvDB 8.0, a total of 244 709 human complementary DNA was mapped onto the hg19 reference genome and 43 829 gene loci, including nonprotein-coding ones, were identified. Of these loci, 35 631 were identified as potential protein-coding genes, and 22 898 of these were identical to known genes. In our analysis, 19 309 annotated genes were specific to H-InvDB and not found in RefSeq and Ensembl. In fact, 233 genes of the 19 309 turned out to have protein functions in this version of H-InvDB; they were annotated as unknown protein functions in the previous version. Furthermore, 11 genes were identified as known Mendelian disorder genes. It is advantageous that many biologically functional genes are hidden in the H-InvDB unique genes. As large-scale proteomic projects have been conducted to elucidate the functions of all human proteins, we have enhanced the proteomic information with an advanced protein view and new subdatabase of protein complexes (Protein Complex Database with quality index). We propose that H-InvDB is an important resource for finding novel candidate targets for medical care and drug development.",2012-11-28 +29064699,mRNA-Sequencing Analysis Reveals Transcriptional Changes in Root of Maize Seedlings Treated with Two Increasing Concentrations of a New Biostimulant.,"Biostimulants are a wide range of natural or synthetic products containing substances and/or microorganisms that can stimulate plant processes to improve nutrient uptake, nutrient efficiency, tolerance to abiotic stress, and crop quality ( http://www.biostimulants.eu/ , accessed September 27, 2017). The use of biostimulants is proposed as an advanced solution to face the demand for sustainable agriculture by ensuring optimal crop performances and better resilience to environment changes. The proposed approach is to predict and characterize the function of natural compounds as biostimulants. In this research, plant growth assessments and transcriptomic approaches are combined to investigate and understand the specific mode(s) of action of APR, a new product provided by the ILSA group (Arzignano, Vicenza). Maize seedlings (B73) were kept in a climatic chamber and grown in a solid medium to test the effects of two different combinations of the protein hydrolysate APR (A1 and A1/2). Data on root growth evidenced a significant enhancement of the dry weight of both roots and root/shoot ratio in response to APR. Transcriptomic profiles of lateral roots of maize seedlings treated with two increasing concentrations of APR were studied by mRNA-sequencing analysis (RNA-seq). Pairwise comparisons of the RNA-seq data identified a total of 1006 differentially expressed genes between treated and control plants. The two APR concentrations were demonstrated to affect the expression of genes involved in both common and specific pathways. On the basis of the putative function of the isolated differentially expressed genes, APR has been proposed to enhance plant response to adverse environmental conditions.",2017-11-07 +29058410,Spin System Modeling of Nuclear Magnetic Resonance Spectra for Applications in Metabolomics and Small Molecule Screening.,"The exceptionally rich information content of nuclear magnetic resonance (NMR) spectra is routinely used to identify and characterize molecules and molecular interactions in a wide range of applications, including clinical biomarker discovery, drug discovery, environmental chemistry, and metabolomics. The set of peak positions and intensities from a reference NMR spectrum generally serves as the identifying signature for a compound. Reference spectra normally are collected under specific conditions of pH, temperature, and magnetic field strength, because changes in conditions can distort the identifying signatures of compounds. A spin system matrix that parametrizes chemical shifts and coupling constants among spins provides a much richer feature set for a compound than a spectral signature based on peak positions and intensities. Spin system matrices expand the applicability of NMR spectral libraries beyond the specific conditions under which data were collected. In addition to being able to simulate spectra at any field strength, spin parameters can be adjusted to systematically explore alterations in chemical shift patterns due to variations in other experimental conditions, such as compound concentration, pH, or temperature. We present methodology and software for efficient interactive optimization of spin parameters against experimental 1D-1H NMR spectra of small molecules. We have used the software to generate spin system matrices for a set of key mammalian metabolites and are also using the software to parametrize spectra of small molecules used in NMR-based ligand screening. The software, along with optimized spin system matrix data for a growing number of compounds, is available from http://gissmo.nmrfam.wisc.edu/ .",2017-11-07 +27797771,Imputing gene expression to maximize platform compatibility.,"Microarray measurements of gene expression constitute a large fraction of publicly shared biological data, and are available in the Gene Expression Omnibus (GEO). Many studies use GEO data to shape hypotheses and improve statistical power. Within GEO, the Affymetrix HG-U133A and HG-U133 Plus 2.0 are the two most commonly used microarray platforms for human samples; the HG-U133 Plus 2.0 platform contains 54 220 probes and the HG-U133A array contains a proper subset (21 722 probes). When different platforms are involved, the subset of common genes is most easily compared. This approach results in the exclusion of substantial measured data and can limit downstream analysis. To predict the expression values for the genes unique to the HG-U133 Plus 2.0 platform, we constructed a series of gene expression inference models based on genes common to both platforms. Our model predicts gene expression values that are within the variability observed in controlled replicate studies and are highly correlated with measured data. Using six previously published studies, we also demonstrate the improved performance of the enlarged feature space generated by our model in downstream analysis.

Availability and implementation

The gene inference model described in this paper is available as a R package (affyImpute), which can be downloaded at http://simtk.org/home/affyimpute.

Contact

rbaltman@stanford.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +26590403,SBR-Blood: systems biology repository for hematopoietic cells.,"Extensive research into hematopoiesis (the development of blood cells) over several decades has generated large sets of expression and epigenetic profiles in multiple human and mouse blood cell types. However, there is no single location to analyze how gene regulatory processes lead to different mature blood cells. We have developed a new database framework called hematopoietic Systems Biology Repository (SBR-Blood), available online at http://sbrblood.nhgri.nih.gov, which allows user-initiated analyses for cell type correlations or gene-specific behavior during differentiation using publicly available datasets for array- and sequencing-based platforms from mouse hematopoietic cells. SBR-Blood organizes information by both cell identity and by hematopoietic lineage. The validity and usability of SBR-Blood has been established through the reproduction of workflows relevant to expression data, DNA methylation, histone modifications and transcription factor occupancy profiles.",2015-11-20 +27607098,Proposed Nomogram Predicting the Individual Risk of Malignancy in the Patients With Branch Duct Type Intraductal Papillary Mucinous Neoplasms of the Pancreas.,"

Objectives

This study evaluated individual risks of malignancy and proposed a nomogram for predicting malignancy of branch duct type intraductal papillary mucinous neoplasms (BD-IPMNs) using the large database for IPMN.

Background

Although consensus guidelines list several malignancy predicting factors in patients with BD-IPMN, those variables have different predictability and individual quantitative prediction of malignancy risk is limited.

Methods

Clinicopathological factors predictive of malignancy were retrospectively analyzed in 2525 patients with biopsy proven BD-IPMN at 22 tertiary hospitals in Korea and Japan. The patients with main duct dilatation >10 mm and inaccurate information were excluded.

Results

The study cohort consisted of 2258 patients. Malignant IPMNs were defined as those with high grade dysplasia and associated invasive carcinoma. Of 2258 patients, 986 (43.7%) had low, 443 (19.6%) had intermediate, 398 (17.6%) had high grade dysplasia, and 431 (19.1%) had invasive carcinoma. To construct and validate the nomogram, patients were randomly allocated into training and validation sets, with fixed ratios of benign and malignant lesions. Multiple logistic regression analysis resulted in five variables (cyst size, duct dilatation, mural nodule, serum CA19-9, and CEA) being selected to construct the nomogram. In the validation set, this nomogram showed excellent discrimination power through a 1000 times bootstrapped calibration test.

Conclusion

A nomogram predicting malignancy in patients with BD-IPMN was constructed using a logistic regression model. This nomogram may be useful in identifying patients at risk of malignancy and for selecting optimal treatment methods. The nomogram is freely available at http://statgen.snu.ac.kr/software/nomogramIPMN.",2017-12-01 +29106449,AlloSigMA: allosteric signaling and mutation analysis server.,"

Motivation

Allostery is an omnipresent mechanism of the function modulation in proteins via either effector binding or mutations in the exosites. Despite the growing number of online servers and databases devoted to prediction/classification of allosteric sites and their characteristics, there is a lack of resources for an efficient and quick estimation of the causality and energetics of allosteric communication.

Results

The AlloSigMA server implements a unique approach on the basis of the recently introduced structure-based statistical mechanical models of allosteric signaling. It provides an interactive framework for estimating the allosteric free energy as a result of the ligand(s) binding, mutation(s) and their combinations. Latent regulatory exosites and allosteric effect of mutations can be detected and explored, facilitating the research efforts in protein engineering and allosteric drug design.

Availability and implementation

The AlloSigMA server is freely available at http://allosigma.bii.a-star.edu.sg/home/.

Contact

igorb@bii.a-star.edu.sg.",2017-12-01 +29100769,Arthroscopic Debridement for Primary Degenerative Osteoarthritis of the Elbow Leads to Significant Improvement in Range of Motion and Clinical Outcomes: A Systematic Review.,"

Purpose

The purpose of this investigation was to determine whether arthroscopic debridement of primary elbow osteoarthritis results in statistically significant and clinically relevant improvement in (1) elbow range of motion and (2) clinical outcomes with (3) low complication and reoperation rates.

Methods

A systematic review was registered with PROSPERO and performed using PRISMA guidelines. Databases were searched for studies that investigated the outcomes of arthroscopic debridement for the treatment of primary osteoarthritis of the elbow in adult human patients. Study methodological quality was analyzed. Studies that included post-traumatic arthritis were excluded. Elbow motion and all elbow-specific patient-reported outcome scores were eligible for analysis. Comparisons between preoperative and postoperative values from each study were made using 2-sample Z-tests (http://in-silico.net/tools/statistics/ztest) using a P value < .05.

Results

Nine articles (209 subjects, 213 elbows, 187 males, 22 females, mean age 45.7 ± 7.1 years, mean follow-up 41.7 ± 16.3. months; 75% right, 25% left; 79% dominant elbow, 21% nondominant) were analyzed. Elbow extension (23.4°-10.7°, Δ 12.7°), flexion (115.9°-128.7°, Δ 12.8°), and global arc of motion (94.5°-117.6°, Δ 23.1°) had statistically significant and clinically relevant improvement following arthroscopic debridement (P < .0001 for all). There was also a statistically significant (P < .0001) and clinically relevant improvement in the Mayo Elbow Performance Score (60.7-84.6, Δ 23.9) postoperatively. Six patients (2.8%) had postoperative complications. Nine (4.2%) underwent reoperation.

Conclusions

Elbow arthroscopic debridement for primary degenerative osteoarthritis results in statistically significant and clinically relevant improvement in elbow range of motion and clinical outcomes with low complication and reoperation rates.

Level of evidence

Systematic review of level IV studies.",2017-12-01 +29525981,NeuroPP: A Tool for the Prediction of Neuropeptide Precursors Based on Optimal Sequence Composition.,"Neuropeptides (NPs) are short secreted peptides produced mainly in the nervous system and digestive system. They activate signaling cascades to control a wide range of biological functions, such as metabolism, sensation, and behavior. NPs are typically produced from a larger NP precursor (NPP) which includes a signal peptide sequence, one or more NP sequences, and other sequences. With the drastic growth of unknown protein sequences generated in the post-genomic age, it is highly desired to develop computational methods for identifying NPP rapidly and efficiently. In this article, we developed a predictor for NPPs based on optimized sequence composition of single amino acid, dipeptide, and tripeptide. Evaluated with independent data set, the predictor showed excellent performance that achieved an accuracy of 88.65% with AUC of 0.95. The corresponding web server was developed, which is freely available at http://i.uestc.edu.cn/neuropeptide/neuropp/home.html . It can help relevant researchers to screen candidate NP precursor, shorten experimental cycle, and reduce costs.",2018-03-10 +29351544,"Long-Term Exposure to Fine Particulate Matter, Blood Pressure, and Incident Hypertension in Taiwanese Adults.","BACKGROUND:Long-term exposure to particulate matter (PM) air pollution may increase blood pressure and the risk of hypertension. However, epidemiological evidence is scarce and inconsistent. OBJECTIVES:We investigated the associations between long-term exposure to PM with an aerodynamic diameter <2.5μm (PM2.5), blood pressure, and incident hypertension in a large Taiwanese cohort. METHODS:We studied 361,560 adults ≥18y old from a large cohort who participated in a standard medical examination program during 2001 to 2014. Among this group, 125,913 nonhypertensive participants were followed up. A satellite-based spatiotemporal model was used to estimate the 2-y average PM2.5 concentrations at each participant's address. Multivariable linear regression was used in the cross-sectional data analysis with the 361,560 participants to investigate the associations between PM2.5 and systolic blood pressure (SBP), diastolic blood pressure (DBP), and pulse pressure (PP), and Cox proportional hazard regression was used in the cohort data analysis with the 125,913 participants to investigate the associations between PM2.5 and incident hypertension. RESULTS:Each 10-μg/m3 increment in the 2-y average PM2.5 concentration was associated with increases of 0.45 mmHg [95% confidence interval (CI): 0.40, 0.50], 0.07 mmHg (95% CI: 0.04, 0.11), and 0.38 mmHg (95% CI: 0.33, 0.42) in SBP, DBP, and PP, respectively, after adjusting for a wide range of covariates and possible confounders. Each 10-μg/m3 increment in the 2-y average PM2.5 concentration was associated with an increase of 3% in the risk of developing hypertension [hazard ratio=1.03 (95% CI: 1.01, 1.05)]. Stratified and sensitivity analyses yielded similar results. CONCLUSIONS:Long-term exposure to PM2.5 air pollution is associated with higher blood pressure and an increased risk of hypertension. These findings reinforce the importance of air pollution mitigation strategies to reduce the risk of cardiovascular disease. https://doi.org/10.1289/EHP2466.",2018-01-18 +28516912,A complete tool set for molecular QTL discovery and analysis.,"Population scale studies combining genetic information with molecular phenotypes (for example, gene expression) have become a standard to dissect the effects of genetic variants onto organismal phenotypes. These kinds of data sets require powerful, fast and versatile methods able to discover molecular Quantitative Trait Loci (molQTL). Here we propose such a solution, QTLtools, a modular framework that contains multiple new and well-established methods to prepare the data, to discover proximal and distal molQTLs and, finally, to integrate them with GWAS variants and functional annotations of the genome. We demonstrate its utility by performing a complete expression QTL study in a few easy-to-perform steps. QTLtools is open source and available at https://qtltools.github.io/qtltools/.",2017-05-18 +26822210,ASDB: a resource for probing protein functions with small molecules.,"

Unlabelled

: Identifying chemical probes or seeking scaffolds for a specific biological target is important for protein function studies. Therefore, we create the Annotated Scaffold Database (ASDB), a computer-readable and systematic target-annotated scaffold database, to serve such needs. The scaffolds in ASDB were derived from public databases including ChEMBL, DrugBank and TCMSP, with a scaffold-based classification approach. Each scaffold was assigned with an InChIKey as its unique identifier, energy-minimized 3D conformations, and other calculated properties. A scaffold is also associated with drugs, natural products, drug targets and medical indications. The database can be retrieved through text or structure query tools. ASDB collects 333 601 scaffolds, which are associated with 4368 targets. The scaffolds consist of 3032 scaffolds derived from drugs and 5163 scaffolds derived from natural products. For given scaffolds, scaffold-target networks can be generated from the database to demonstrate the relations of scaffolds and targets.

Availability and implementation

ASDB is freely available at http://www.rcdd.org.cn/asdb/with the major web browsers.

Contact

junxu@biochemomes.com or xujun9@mail.sysu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-28 +29280994,PRAPI: post-transcriptional regulation analysis pipeline for Iso-Seq.,"

Summary

The single-molecule real-time (SMRT) isoform sequencing (Iso-Seq) based on Pacific Bioscience (PacBio) platform has received increasing attention for its ability to explore full-length isoforms. Thus, comprehensive tools for Iso-Seq bioinformatics analysis are extremely useful. Here, we present a one-stop solution for Iso-Seq analysis, called PRAPI to analyze alternative transcription initiation (ATI), alternative splicing (AS), alternative cleavage and polyadenylation (APA), natural antisense transcripts (NAT), and circular RNAs (circRNAs) comprehensively. PRAPI is capable of combining Iso-Seq full-length isoforms with short read data, such as RNA-Seq or polyadenylation site sequencing (PAS-seq) for differential expression analysis of NAT, AS, APA and circRNAs. Furthermore, PRAPI can annotate new genes and correct mis-annotated genes when gene annotation is available. Finally, PRAPI generates high-quality vector graphics to visualize and highlight the Iso-Seq results.

Availability and implementation

The Dockerfile of PRAPI is available at http://www.bioinfor.org/tool/PRAPI.

Contact

lfgu@fafu.edu.cn.",2018-05-01 +29098713,Large-scale automated function prediction of protein sequences and an experimental case study validation on PTEN transcript variants.,"Recent advances in computing power and machine learning empower functional annotation of protein sequences and their transcript variations. Here, we present an automated prediction system UniGOPred, for GO annotations and a database of GO term predictions for proteomes of several organisms in UniProt Knowledgebase (UniProtKB). UniGOPred provides function predictions for 514 molecular function (MF), 2909 biological process (BP), and 438 cellular component (CC) GO terms for each protein sequence. UniGOPred covers nearly the whole functionality spectrum in Gene Ontology system and it can predict both generic and specific GO terms. UniGOPred was run on CAFA2 challenge target protein sequences and it is categorized within the top 10 best performing methods for the molecular function category. In addition, the performance of UniGOPred is higher compared to the baseline BLAST classifier in all categories of GO. UniGOPred predictions are compared with UniProtKB/TrEMBL database annotations as well. Furthermore, the proposed tool's ability to predict negatively associated GO terms that defines the functions that a protein does not possess, is discussed. UniGOPred annotations were also validated by case studies on PTEN protein variants experimentally and on CHD8 protein variants with literature. UniGOPred protein functional annotation system is available as an open access tool at http://cansyl.metu.edu.tr/UniGOPred.html.",2017-11-29 +26339155,NABIC SNP: an integrated database for SNP markers.,"UNLABELLED:The National Agricultural Biotechnology Information Center (NABIC) constructed a web-based database to provide information about 54,310 single nucleotide polymorphisms (SNPs) identified in the seven species in a high-throughput manner. The database consists of three major functional categories: SNP marker search, detailed information viewer and download of SNP sequence. The SNP annotation table provides detailed information such as ownership information, basic information, bio-entry information, reference, comments, features, and sequence data. AVAILABILITY:The database is available online for free at http://nabic.rda.go.kr/SNP.",2015-07-31 +26826444,The IPD-IMGT/HLA Database - New developments in reporting HLA variation.,"IPD-IMGT/HLA is a constituent of the Immuno Polymorphism Database (IPD), which was developed to provide a centralised system for the study of polymorphism in genes of the immune system. The IPD project works with specialist groups of nomenclature committees who provide and curate individual sections before they are submitted to IPD for online publication. The primary database within the IPD project is the IPD-IMGT/HLA Database, which provides a locus-specific database for the hyper-polymorphic allele sequences of the genes in the HLA system, also known as the human Major Histocompatibility Complex. The IPD-IMGT/HLA Database was first released over 17 years ago, building on the work of the WHO Nomenclature Committee for Factors of the HLA system that was initiated in 1968. The IPD-IMGT/HLA Database enhanced this work by providing the HLA community with an online, searchable repository of highly curated HLA sequences. Many of the genes encode proteins of the immune system and are hyper polymorphic, with some genes currently having over 4000 known allelic variants. Through the work of the HLA Informatics Group and in collaboration with the European Bioinformatics Institute we are able to provide public access to this data through the website, http://www.ebi.ac.uk/ipd/imgt/hla.",2016-01-27 +26578565,"PCOSKB: A KnowledgeBase on genes, diseases, ontology terms and biochemical pathways associated with PolyCystic Ovary Syndrome.","Polycystic ovary syndrome (PCOS) is one of the major causes of female subfertility worldwide and ≈ 7-10% of women in reproductive age are affected by it. The affected individuals exhibit varying types and levels of comorbid conditions, along with the classical PCOS symptoms. Extensive studies on PCOS across diverse ethnic populations have resulted in a plethora of information on dysregulated genes, gene polymorphisms and diseases linked to PCOS. However, efforts have not been taken to collate and link these data. Our group, for the first time, has compiled PCOS-related information available through scientific literature; cross-linked it with molecular, biochemical and clinical databases and presented it as a user-friendly, web-based online knowledgebase for the benefit of the scientific and clinical community. Manually curated information on associated genes, single nucleotide polymorphisms, diseases, gene ontology terms and pathways along with supporting reference literature has been collated and included in PCOSKB (http://pcoskb.bicnirrh.res.in).",2015-11-17 +29804286,Synaptic activity induces input-specific rearrangements in a targeted synaptic protein interaction network.,"Cells utilize dynamic, network-level rearrangements in highly interconnected protein interaction networks to transmit and integrate information from distinct signaling inputs. Despite the importance of protein interaction network dynamics, the organizational logic underlying information flow through these networks is not well understood. Previously, we developed the quantitative multiplex co-immunoprecipitation platform, which allows for the simultaneous and quantitative measurement of the amount of co-association between large numbers of proteins in shared complexes. Here, we adapt quantitative multiplex co-immunoprecipitation to define the activity-dependent dynamics of an 18-member protein interaction network in order to better understand the underlying principles governing glutamatergic signal transduction. We first establish that immunoprecipitation detected by flow cytometry can detect activity-dependent changes in two known protein-protein interactions (Homer1-mGluR5 and PSD-95-SynGAP). We next demonstrate that neuronal stimulation elicits a coordinated change in our targeted protein interaction network, characterized by the initial dissociation of Homer1 and SynGAP-containing complexes followed by increased associations among glutamate receptors and PSD-95. Finally, we show that stimulation of distinct glutamate receptor types results in different modular sets of protein interaction network rearrangements, and that cells activate both modules in order to integrate complex inputs. This analysis demonstrates that cells respond to distinct types of glutamatergic input by modulating different combinations of protein co-associations among a targeted network of proteins. Our data support a model of synaptic plasticity in which synaptic stimulation elicits dissociation of pre-existing multiprotein complexes, opening binding slots in scaffold proteins and allowing for the recruitment of additional glutamatergic receptors. Open Science: This manuscript was awarded with the Open Materials Badge. For more information see: https://cos.io/our-services/open-science-badges/.",2018-09-01 +29902523,Analytical symmetry detection in protein assemblies. II. Dihedral and cubic symmetries.,"Protein assemblies are often symmetric, as this organization has many advantages compared to individual proteins. Complex protein structures thus very often possess high-order symmetries. Detection and analysis of these symmetries has been a challenging problem and no efficient algorithms have been developed so far. This paper presents the extension of our cyclic symmetry detection method for higher-order symmetries with multiple symmetry axes. These include dihedral and cubic, i.e., tetrahedral, octahedral, and icosahedral, groups. Our method assesses the quality of a particular symmetry group and also determines all of its symmetry axes with a machine precision. The method comprises discrete and continuous optimization steps and is applicable to assemblies with multiple chains in the asymmetric subunits or to those with pseudo-symmetry. We implemented the method in C++ and exhaustively tested it on all 51,358 symmetric assemblies from the Protein Data Bank (PDB). It allowed us to study structural organization of symmetric assemblies solved by X-ray crystallography, and also to assess the symmetry annotation in the PDB. For example, in 1.6% of the cases we detected a higher symmetry group compared to the PDB annotation, and we also detected several cases with incorrect annotation. The method is available at http://team.inria.fr/nano-d/software/ananas. The graphical user interface of the method built for the SAMSON platform is available at http://samson-connect.net.",2018-06-15 +29708698,"State Differences in the Cost of Job-Related Health Insurance, 2012","Health insurance provided by employers is the source of medical coverage for most Americans under age 65. The cost of employer-sponsored health insurance varies considerably based on the State where the employer is located and the number of persons covered by the plan. This Statistical Brief presents State variations from the national average of the cost of job-related health insurance and how these costs are shared by employers and their employees. The Brief specifically examines the average premiums and employee contributions for private-sector establishments in 2012 in the 10 most populous states based on the 2010 Decennial Census. This analysis is based on the most recent data available from the Insurance Component of the Medical Expenditure Panel Survey (MEPS-IC). Estimates for all other States and the District of Columbia are available on the MEPS Web site (http://meps.ahrq.gov/mepsweb/). Only those estimates with statistically significant differences from the national average using a multiple comparison procedure of estimates at the 0.05 percent significance level are noted in the text. These estimates are also identified in the tables, with those above the national average noted with two asterisks (**) and those below the national average noted with one asterisk (*).",2018-05-01 +32096036,Tau Interacting Proteins: Gaining Insight into the Roles of Tau in Health and Disease.,"Tau is most intensely studied in relation to its executive role in Tauopathies, a family of neurodegenerative disorders characterized by the accumulation of Tau aggregates [15, 21, 38, 75, 89, 111, 121, 135, 175, 176, 192]. Tau aggregation in the different Tauopathies differs in the affected cell type, the structure of aggregates and Tau isoform composition. However, in all Tauopathies, accumulation of pathological Tau in well-characterized and well-defined brain regions, correlates strongly with symptoms associated with the dysfunction of this brain region. Hence, symptoms of neurodegenerative Tauopathies can range from motoric to cognitive and behavioral symptoms, even extending to deterioration of vital functions when the disease progresses, or combinations of different symptoms governed by the affected brain regions. The most common Tauopathies are corticobasal degeneration (CBD), Pick's disease, progressive supranuclear palsy (PSP) and frontotemporal dementias with parkinsonism linked to chromosome 17 (FTDP-17). However a growing number of diseases are characterized by Tau aggregation amounting to a large family of more than 20 disorders [176]. Most Tauopathies are sporadic, and are hence linked to a combination of environmental and genetic risk factors. However, mutations in MAPT have been identified which are autosomal dominantly linked to Tauopathies, including FTDP, PSP and CBD [94, 163, 185] (Alzforum, https://www.alzforum.org/mutations/mapt ). More than 80 mutations have been identified in MAPT, both in intronic and exonic regions of the human MAPT. These mutations can be classified as missense mutations or splicing mutations. Most missense mutations cluster in or near the microtubule binding site of Tau, while most splicing mutations affect the splicing of exon 10 (encoding the R2 domain), and hence affect the 3R/4R ratio. While Alzheimer's disease (AD), is the most prevalent Tauopathy, no mutations in MAPT associated with AD have been identified. Brains of AD patients are pathologically characterized by the combined presence of amyloid plaques and neurofibrillary tangles [171]. Familial forms of AD, termed early onset familial AD (EOFAD) with clinical mutations in APP or PS1/2, have an early onset, and are invariably characterized by the combined presence of amyloid and Tau pathology [24, 80, 170]. These EOFAD cases, identify a causal link between APP/PS1 misprocessing and the development of Tau pathology and neurodegeneration [80, 170]. Furthermore, combined genetic, pathological, biomarker and in vivo modelling data, indicate that amyloid pathology precedes Tau pathology, and support a role for Aβ as initiator and Tau as executor in the pathogenetic process of AD [80, 96, 97]. Hence, AD is often considered as a secondary Tauopathy (similar as for Down syndrome patients), in contrast to the primary Tauopathies described above. Tau aggregates in Tauopathies vary with respect to the ratio of different Tau isoforms (3R/4R), to the cell types displaying Tau aggregation and the structure of the aggregates. However, in all Tauopathies a strong correlation between progressive development of pathological Tau accumulation and the loss of the respective brain functions is observed.",2019-01-01 +23175607,"CyanoLyase: a database of phycobilin lyase sequences, motifs and functions.","CyanoLyase (http://cyanolyase.genouest.org/) is a manually curated sequence and motif database of phycobilin lyases and related proteins. These enzymes catalyze the covalent ligation of chromophores (phycobilins) to specific binding sites of phycobiliproteins (PBPs). The latter constitute the building bricks of phycobilisomes, the major light-harvesting systems of cyanobacteria and red algae. Phycobilin lyases sequences are poorly annotated in public databases. Sequences included in CyanoLyase were retrieved from all available genomes of these organisms and a few others by similarity searches using biochemically characterized enzyme sequences and then classified into 3 clans and 32 families. Amino acid motifs were computed for each family using Protomata learner. CyanoLyase also includes BLAST and a novel pattern matching tool (Protomatch) that allow users to rapidly retrieve and annotate lyases from any new genome. In addition, it provides phylogenetic analyses of all phycobilin lyases families, describes their function, their presence/absence in all genomes of the database (phyletic profiles) and predicts the chromophorylation of PBPs in each strain. The site also includes a thorough bibliography about phycobilin lyases and genomes included in the database. This resource should be useful to scientists and companies interested in natural or artificial PBPs, which have a number of biotechnological applications, notably as fluorescent markers.",2012-11-21 +27256311,Benchmarking the next generation of homology inference tools.,"

Motivation

Over the last decades, vast numbers of sequences were deposited in public databases. Bioinformatics tools allow homology and consequently functional inference for these sequences. New profile-based homology search tools have been introduced, allowing reliable detection of remote homologs, but have not been systematically benchmarked. To provide such a comparison, which can guide bioinformatics workflows, we extend and apply our previously developed benchmark approach to evaluate the 'next generation' of profile-based approaches, including CS-BLAST, HHSEARCH and PHMMER, in comparison with the non-profile based search tools NCBI-BLAST, USEARCH, UBLAST and FASTA.

Method

We generated challenging benchmark datasets based on protein domain architectures within either the PFAM + Clan, SCOP/Superfamily or CATH/Gene3D domain definition schemes. From each dataset, homologous and non-homologous protein pairs were aligned using each tool, and standard performance metrics calculated. We further measured congruence of domain architecture assignments in the three domain databases.

Results

CSBLAST and PHMMER had overall highest accuracy. FASTA, UBLAST and USEARCH showed large trade-offs of accuracy for speed optimization.

Conclusion

Profile methods are superior at inferring remote homologs but the difference in accuracy between methods is relatively small. PHMMER and CSBLAST stand out with the highest accuracy, yet still at a reasonable computational cost. Additionally, we show that less than 0.1% of Swiss-Prot protein pairs considered homologous by one database are considered non-homologous by another, implying that these classifications represent equivalent underlying biological phenomena, differing mostly in coverage and granularity.

Availability and implementation

Benchmark datasets and all scripts are placed at (http://sonnhammer.org/download/Homology_benchmark).

Contact

forslund@embl.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +22067451,IDEAL: Intrinsically Disordered proteins with Extensive Annotations and Literature.,"IDEAL, Intrinsically Disordered proteins with Extensive Annotations and Literature (http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/), is a collection of knowledge on experimentally verified intrinsically disordered proteins. IDEAL contains manual annotations by curators on intrinsically disordered regions, interaction regions to other molecules, post-translational modification sites, references and structural domain assignments. In particular, IDEAL explicitly describes protean segments that can be transformed from a disordered state to an ordered state. Since in most cases they can act as molecular recognition elements upon binding of partner proteins, IDEAL provides a data resource for functional regions of intrinsically disordered proteins. The information in IDEAL is provided on a user-friendly graphical view and in a computer-friendly XML format.",2011-11-08 +29067135,The braingraph.org database of high resolution structural connectomes and the brain graph tools.,"Based on the data of the NIH-funded Human Connectome Project, we have computed structural connectomes of 426 human subjects in five different resolutions of 83, 129, 234, 463 and 1015 nodes and several edge weights. The graphs are given in anatomically annotated GraphML format that facilitates better further processing and visualization. For 96 subjects, the anatomically classified sub-graphs can also be accessed, formed from the vertices corresponding to distinct lobes or even smaller regions of interests of the brain. For example, one can easily download and study the connectomes, restricted to the frontal lobes or just to the left precuneus of 96 subjects using the data. Partially directed connectomes of 423 subjects are also available for download. We also present a GitHub-deposited set of tools, called the Brain Graph Tools, for several processing tasks of the connectomes on the site http://braingraph.org.",2017-06-20 +26806463,Essential proteins and possible therapeutic targets of Wolbachia endosymbiont and development of FiloBase--a comprehensive drug target database for Lymphatic filariasis.,"Lymphatic filariasis (Lf) is one of the oldest and most debilitating tropical diseases. Millions of people are suffering from this prevalent disease. It is estimated to infect over 120 million people in at least 80 nations of the world through the tropical and subtropical regions. More than one billion people are in danger of getting affected with this life-threatening disease. Several studies were suggested its emerging limitations and resistance towards the available drugs and therapeutic targets for Lf. Therefore, better medicine and drug targets are in demand. We took an initiative to identify the essential proteins of Wolbachia endosymbiont of Brugia malayi, which are indispensable for their survival and non-homologous to human host proteins. In this current study, we have used proteome subtractive approach to screen the possible therapeutic targets for wBm. In addition, numerous literatures were mined in the hunt for potential drug targets, drugs, epitopes, crystal structures, and expressed sequence tag (EST) sequences for filarial causing nematodes. Data obtained from our study were presented in a user friendly database named FiloBase. We hope that information stored in this database may be used for further research and drug development process against filariasis. URL: http://filobase.bicpu.edu.in.",2016-01-25 +23572411,Analysis of Latino populations from GALA and MEC studies reveals genomic loci with biased local ancestry estimation.,"

Motivation

Local ancestry analysis of genotype data from recently admixed populations (e.g. Latinos, African Americans) provides key insights into population history and disease genetics. Although methods for local ancestry inference have been extensively validated in simulations (under many unrealistic assumptions), no empirical study of local ancestry accuracy in Latinos exists to date. Hence, interpreting findings that rely on local ancestry in Latinos is challenging.

Results

Here, we use 489 nuclear families from the mainland USA, Puerto Rico and Mexico in conjunction with 3204 unrelated Latinos from the Multiethnic Cohort study to provide the first empirical characterization of local ancestry inference accuracy in Latinos. Our approach for identifying errors does not rely on simulations but on the observation that local ancestry in families follows Mendelian inheritance. We measure the rate of local ancestry assignments that lead to Mendelian inconsistencies in local ancestry in trios (MILANC), which provides a lower bound on errors in the local ancestry estimates. We show that MILANC rates observed in simulations underestimate the rate observed in real data, and that MILANC varies substantially across the genome. Second, across a wide range of methods, we observe that loci with large deviations in local ancestry also show enrichment in MILANC rates. Therefore, local ancestry estimates at such loci should be interpreted with caution. Finally, we reconstruct ancestral haplotype panels to be used as reference panels in local ancestry inference and show that ancestry inference is significantly improved by incoroprating these reference panels.

Availability and implementation

We provide the reconstructed reference panels together with the maps of MILANC rates as a public resource for researchers analyzing local ancestry in Latinos at http://bogdanlab.pathology.ucla.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-09 +22135291,Rhea--a manually curated resource of biochemical reactions.,"Rhea (http://www.ebi.ac.uk/rhea) is a comprehensive resource of expert-curated biochemical reactions. Rhea provides a non-redundant set of chemical transformations for use in a broad spectrum of applications, including metabolic network reconstruction and pathway inference. Rhea includes enzyme-catalyzed reactions (covering the IUBMB Enzyme Nomenclature list), transport reactions and spontaneously occurring reactions. Rhea reactions are described using chemical species from the Chemical Entities of Biological Interest ontology (ChEBI) and are stoichiometrically balanced for mass and charge. They are extensively manually curated with links to source literature and other public resources on metabolism including enzyme and pathway databases. This cross-referencing facilitates the mapping and reconciliation of common reactions and compounds between distinct resources, which is a common first step in the reconstruction of genome scale metabolic networks and models.",2011-12-01 +29360322,"Trends in Health Care Expenditures for Children under Age 18: 2001, 2006, and 2011","This Statistical Brief compares summary statistics on health care expenditures and expenditure distributions by type of service and sources of payment for children under age 18 in 2001, 2006, and 2011. The estimates are derived from data collected in the Medical Expenditure Panel Survey Household (MEPS-HC) and Medical Provider Components (MEPS-MPC) on the U.S. civilian noninstitutionalized population. Health care expenses in MEPS represent payments to physicians, hospitals, and other health care providers for services reported by respondents to the MEPS-HC. Estimates for 2001 and 2006 were adjusted to 2011 dollars based on the Gross Domestic Product (GDP) Price Index to remove the impact of medical price inflation on comparisons (http://www.meps.ahrq.gov/mepsweb/about_meps/Price_Index.shtml). All differences between estimates noted in the text are statistically significant at the 0.05 level or better.",2018-01-24 +27519173,ISOexpresso: a web-based platform for isoform-level expression analysis in human cancer.,"

Background

Alternative splicing events that result in the production of multiple gene isoforms reveals important molecular mechanisms. Gene isoforms are often differentially expressed across organs and tissues, developmental stages, and disease conditions. Specifically, recent studies show that aberrant regulation of alternative splicing frequently occurs in cancer to affect tumor cell transformation and growth. While analysis of isoform expression is important for discovering tumor-specific isoform signatures and interpreting relevant genomic mutations, there is currently no web-based, easy-to-use, and publicly available platform for this purpose.

Description

We developed ISOexpresso to provide information regarding isoform existence and expression, which can be grouped by cancer vs. normal conditions, cancer types, and tissue types. ISOexpresso implements two main functions: First, the Isoform Expression View function creates visualizations for condition-specific RNA/isoform expression patterns upon query of a gene of interest. With this function, users can easily determine the major isoform (the most expressed isoform in a sample) of a gene with respect to the condition and check whether it matches the known canonical isoform. ISOexpresso outputs expression levels of all known transcripts to check alterations of expression landscape and to find potential tumor-specific isoforms. Second, the User Data Annotation function supports annotation of genomic variants to determine the most plausible consequence of a variation (e.g., an amino acid change) among many possible interpretations. As most coding sequence mutations are effective through the subsequent transcription and translation, ISOexpresso automatically prioritizes transcripts that act as backbones for mutation effect prediction by their relative expression. By employing ISOexpresso, we could investigate the consistency between the most expressed and known canonical/principal isoforms, as well as infer candidate tumor-specific isoforms based on their expression levels. In addition, we confirmed that ISOexpresso could easily reproduce previously known isoform expression patterns: recurrent observation of a major isoform across tissues, differential isoform expression patterns in a given tissue, and switching of major isoform during tumorigenesis.

Conclusions

ISOexpresso serves as a web-based, easy-to-use platform for isoform expression and alteration analysis based on large-scale cancer database. We anticipate that ISOexpresso will expedite formulation and confirmation of novel hypotheses by providing isoform-level perspectives on cancer research. The ISOexpresso database is available online at http://wiki.tgilab.org/ISOexpresso/ .",2016-08-12 +,NI-05IMMEDIATE POST-RESECTION PERICAVITARIAN DWI HYPERINTENSITY IN GLIOBLASTOMA PATIENTS IS PREDICTIVE OF PATIENT OUTCOME,"BACKGROUND: Some investigators studied the impact of postoperative infarction, infection and surgical complications on Glioblastoma patient survival using Magnetic Resonance Diffusion-Weighted Imaging (MR-DWI). We attempt to investigate the prognostic role of postoperative DWI hyperintensity in patients with GBM. METHODS AND MATERIALS: We obtained immediate postoperative (24-72 hours post resection) brain MRI images of 60 GBM patients from our institution's imaging archive and The Cancer Imaging Archive (TCIA). Using 3D Slicer 4.3.1 (http://www.slicer.org) 2 trained neuroradiologists experienced in tumor volumetry segmented residual enhancement on post-contrast T1 Weighted Imaging (T1WI) and pericavitarian edema/invasion on T2 FLAIR sequence. Segmentation maps were then overlaid over Diffusion Weighted Images (DWI) and Apparent Diffusion Coefficient (ADC) maps. ADC values around resection cavity were obtained. We stratified patients into two categories; facilitated diffusion group and restricted diffusion group. We included different variables in our analysis such as, age, gender, preoperative KPS status, tumor location, preoperative volumetric analysis and post operative volumetric data (quantitative evaluation of extent of resection). We used Kaplan Meier curves for survival analysis after adjustment for potential confounding factors, such as age, tumor location and extent of resection. RESULTS: We created a new non-invasive imaging biomarker that can predict patient outcome in immediate postoperative setting. CONCLUSION: DWI radiophenotypes and ADC map values can be used as a non-invasive prognostic tool for patient overall survival and progression-free survival.",2014-11-01 +22080548,Major submissions tool developments at the European Nucleotide Archive.,"The European Nucleotide Archive (ENA; http://www.ebi.ac.uk/ena), Europe's primary nucleotide sequence resource, captures and presents globally comprehensive nucleic acid sequence and associated information. Covering the spectrum from raw data to assembled and functionally annotated genomes, the ENA has witnessed a dramatic growth resulting from advances in sequencing technology and ever broadening application of the methodology. During 2011, we have continued to operate and extend the broad range of ENA services. In particular, we have released major new functionality in our interactive web submission system, Webin, through developments in template-based submissions for annotated sequences and support for raw next-generation sequence read submissions.",2011-11-12 +21418831,[A genetic and clinical study in a family with familial hypercholesterolemia].,"

Objective

To investigate the low density lipoprotein receptor (LDLR) gene and apolipoprotein (Apo) B gene mutation in a Chinese family with familial hypercholesterolemia (FH) and give the kindreds clinical check-ups.

Methods

After physical examination, the kindreds underwent ECG and ultrasound checks. Blood samples were tested for lipid profiles. The promoter and all eighteen exons of LDLR gene were investigated by using PCR and agarose gel electrophoresis in combination with DNA sequence analysis. The results were compared with the normal sequences in GenBank and FH database (www.ucl.ac.uk/fh) to find mutations. In addition, the apolipoprotein B100 gene for known mutations (R3500Q, R3531C, R3501W and R3480W) that cause familial defective ApoB100 (FDB) was also tested using the same method.

Results

A novel homozygous G > A mutation at the 1581 bp of exon 10 was detected in the proband and his siblings. It caused a substitution of amino acid Glu to Gly at codon 496. A novel heterozygous G > A mutation at the 1581 bp of exon 10 was detected in his parents. No mutations of R3500Q, R3531C, R3501W and R3480W of ApoB100 were observed. ECGs were normal. Atherosclerosis were found in all family members by ultrasound checks.

Conclusions

The homozygous G > A mutation at the 1581 bp of exon 10 was first determined in our country. The change of amino acid Glu to Gly is responsible for FH of the family. The type of the gene mutation was not found in the FH database (www. ucl.ac.uk/fh). It's a new type of LDLR mutation.",2011-02-01 +29976669,Adenovirus E1A Activation Domain Regulates H3 Acetylation Affecting Varied Steps in Transcription at Different Viral Promoters. ,"How histone acetylation promotes transcription is not clearly understood. Here, we confirm an interaction between p300 and the adenovirus 2 large E1A activation domain (AD) and map the interacting regions in E1A by observing colocalization at an integrated lacO array of fusions of LacI-mCherry to E1A fragments with YFP-p300. Viruses with mutations in E1A subdomains were constructed and analyzed for kinetics of early viral RNA expression and association of acetylated H3K9, K18, K27, TBP, and RNA polymerase II (Pol II) across the viral genome. The results indicate that this E1A interaction with p300 is required for H3K18 and H3K27 acetylation at the E2early, E3, and E4 promoters and is required for TBP and Pol II association with the E2early promoter. In contrast, H3K18/27 acetylation was not required for TBP and Pol II association with the E3 and E4 promoters but was required for E4 transcription at a step subsequent to Pol II preinitiation complex assembly.IMPORTANCE Despite a wealth of data associating promoter and enhancer region histone N-terminal tail lysine acetylation with transcriptional activity, there are relatively few examples of studies that establish causation between these histone posttranslational modifications and transcription. While hypoacetylation of histone H3 lysines 18 and 27 is associated with repression, the step(s) in the overall process of transcription that is blocked at a hypoacetylated promoter is not clearly established in most instances. Studies presented here confirm that the adenovirus 2 large E1A protein activation domain interacts with p300, as reported previously (P. Pelka, J. N. G. Ablack, J. Torchia, A. S. Turnell, R. J. A. Grand, J. S. Mymryk, Nucleic Acids Res 37:1095-1106, 2009, https://doi.org/10.1093/nar/gkn1057), and that the resulting acetylation of H3K18/27 affects varied steps in transcription at different viral promoters.",2018-08-29 +27924043,AraPheno: a public database for Arabidopsis thaliana phenotypes.,"Natural genetic variation makes it possible to discover evolutionary changes that have been maintained in a population because they are advantageous. To understand genotype-phenotype relationships and to investigate trait architecture, the existence of both high-resolution genotypic and phenotypic data is necessary. Arabidopsis thaliana is a prime model for these purposes. This herb naturally occurs across much of the Eurasian continent and North America. Thus, it is exposed to a wide range of environmental factors and has been subject to natural selection under distinct conditions. Full genome sequencing data for more than 1000 different natural inbred lines are available, and this has encouraged the distributed generation of many types of phenotypic data. To leverage these data for meta analyses, AraPheno (https://arapheno.1001genomes.org) provide a central repository of population-scale phenotypes for A. thaliana inbred lines. AraPheno includes various features to easily access, download and visualize the phenotypic data. This will facilitate a comparative analysis of the many different types of phenotypic data, which is the base to further enhance our understanding of the genotype-phenotype map.",2016-10-24 +29360330,"Trends in Health Care Expenditures for the Elderly, Age 65 and Older: 2001, 2006, and 2011","This Statistical Brief compares summary statistics on health care expenditures and expenditure distributions by type of service and sources of payment for the elderly (age 65 and over) in 2001, 2006, and 2011. The estimates are derived from data collected in the Medical Expenditure Panel Survey Household (MEPS-HC) and Medical Provider Components (MEPS-MPC) on the U.S. civilian noninstitutionalized population. Health care expenses in MEPS represent payments to physicians, hospitals, and other health care providers for services reported by respondents to the MEPS-HC. Estimates for 2001 and 2006 were adjusted to 2011 dollars based on the Gross Domestic Product (GDP) Price Index to remove the impact of medical price inflation on comparisons (http://www.meps.ahrq.gov/mepsweb/about_meps/Price_Index.shtml). All differences between estimates noted in the text are statistically significant at the 0.05 level or better.",2018-01-24 +25267794,The Cancer Genomics Hub (CGHub): overcoming cancer through the power of torrential data. ,"The Cancer Genomics Hub (CGHub) is the online repository of the sequencing programs of the National Cancer Institute (NCI), including The Cancer Genomics Atlas (TCGA), the Cancer Cell Line Encyclopedia (CCLE) and the Therapeutically Applicable Research to Generate Effective Treatments (TARGET) projects, with data from 25 different types of cancer. The CGHub currently contains >1.4 PB of data, has grown at an average rate of 50 TB a month and serves >100 TB per week. The architecture of CGHub is designed to support bulk searching and downloading through a Web-accessible application programming interface, enforce patient genome confidentiality in data storage and transmission and optimize for efficiency in access and transfer. In this article, we describe the design of these three components, present performance results for our transfer protocol, GeneTorrent, and finally report on the growth of the system in terms of data stored and transferred, including estimated limits on the current architecture. Our experienced-based estimates suggest that centralizing storage and computational resources is more efficient than wide distribution across many satellite labs. Database URL: https://cghub.ucsc.edu.",2014-09-29 +27942567,Data on fossil fuel availability for Shared Socioeconomic Pathways.,"The data files contain the assumptions and results for the construction of cumulative availability curves for coal, oil and gas for the five Shared Socioeconomic Pathways. The files include the maximum availability (also known as cumulative extraction cost curves) and the assumptions that are applied to construct the SSPs. The data is differentiated into twenty regions. The resulting cumulative availability curves are plotted and the aggregate data as well as cumulative availability curves are compared across SSPs. The methodology, the data sources and the assumptions are documented in a related article (N. Bauer, J. Hilaire, R.J. Brecha, J. Edmonds, K. Jiang, E. Kriegler, H.-H. Rogner, F. Sferra, 2016) [1] under DOI: http://dx.doi.org/10.1016/j.energy.2016.05.088.",2016-11-18 +25182364,LBVS: an online platform for ligand-based virtual screening using publicly accessible databases.,"Abundant data on compound bioactivity and publicly accessible chemical databases increase opportunities for ligand-based drug discovery. In order to make full use of the data, an online platform for ligand-based virtual screening (LBVS) using publicly accessible databases has been developed. LBVS adopts Bayesian learning approach to create virtual screening models because of its noise tolerance, speed, and efficiency in extracting knowledge from data. LBVS currently includes data derived from BindingDB and ChEMBL. Three validation approaches have been employed to evaluate the virtual screening models created from LBVS. The tenfold cross validation results of twenty different LBVS models demonstrate that LBVS achieves an average AUC value of 0.86. Our internal and external testing results indicate that LBVS is predictive for lead identifications. LBVS can be publicly accessed at http://rcdd.sysu.edu.cn/lbvs.",2014-09-03 +29329102,Arsenic Exposure from Drinking Water and Urinary Metabolomics: Associations and Long-Term Reproducibility in Bangladesh Adults.,"

Background

Chronic exposure to inorganic arsenic from drinking water has been associated with a host of cancer and noncancer diseases. The application of metabolomics in epidemiologic studies may allow researchers to identify biomarkers associated with arsenic exposure and its health effects.

Objective

Our goal was to evaluate the long-term reproducibility of urinary metabolites and associations between reproducible metabolites and arsenic exposure.

Methods

We studied samples and data from 112 nonsmoking participants (58 men and 54 women) who were free of any major chronic diseases and who were enrolled in the Health Effects of Arsenic Longitudinal Study (HEALS), a large prospective cohort study in Bangladesh. Using a global gas chromatography-mass spectrometry platform, we measured metabolites in their urine samples, which were collected at baseline and again 2 y apart, and estimated intraclass correlation coefficients (ICCs). Linear regression was used to assess the association between arsenic exposure at baseline and metabolite levels in baseline urine samples.

Results

We identified 2,519 molecular features that were present in all 224 urine samples from the 112 participants, of which 301 had an ICC of ≥0.60. Of the 301 molecular features, water arsenic was significantly related to 31 molecular features and urinary arsenic was significantly related to 74 molecular features after adjusting for multiple comparisons. Six metabolites with a confirmed identity were identified from the 82 molecular features that were significantly associated with either water arsenic or urinary arsenic after adjustment for multiple comparisons.

Conclusions

Our study identified urinary metabolites with long-term reproducibility that were associated with arsenic exposure. The data established the feasibility of using metabolomics in future larger studies. https://doi.org/10.1289/EHP1992.",2018-01-12 +29179110,LAND-deFeND - An innovative database structure for landslides and floods and their consequences.,"Information on historical landslides and floods - collectively called ""geo-hydrological hazards - is key to understand the complex dynamics of the events, to estimate the temporal and spatial frequency of damaging events, and to quantify their impact. A number of databases on geo-hydrological hazards and their consequences have been developed worldwide at different geographical and temporal scales. Of the few available database structures that can handle information on both landslides and floods some are outdated and others were not designed to store, organize, and manage information on single phenomena or on the type and monetary value of the damages and the remediation actions. Here, we present the LANDslides and Floods National Database (LAND-deFeND), a new database structure able to store, organize, and manage in a single digital structure spatial information collected from various sources with different accuracy. In designing LAND-deFeND, we defined four groups of entities, namely: nature-related, human-related, geospatial-related, and information-source-related entities that collectively can describe fully the geo-hydrological hazards and their consequences. In LAND-deFeND, the main entities are the nature-related entities, encompassing: (i) the ""phenomenon"", a single landslide or local inundation, (ii) the ""event"", which represent the ensemble of the inundations and/or landslides occurred in a conventional geographical area in a limited period, and (iii) the ""trigger"", which is the meteo-climatic or seismic cause (trigger) of the geo-hydrological hazards. LAND-deFeND maintains the relations between the nature-related entities and the human-related entities even where the information is missing partially. The physical model of the LAND-deFeND contains 32 tables, including nine input tables, 21 dictionary tables, and two association tables, and ten views, including specific views that make the database structure compliant with the EC INSPIRE and the Floods Directives. The LAND-deFeND database structure is open, and freely available from http://geomorphology.irpi.cnr.it/tools.",2017-11-24 +22434840,Aptamer Base: a collaborative knowledge base to describe aptamers and SELEX experiments.,"Over the past several decades, rapid developments in both molecular and information technology have collectively increased our ability to understand molecular recognition. One emerging area of interest in molecular recognition research includes the isolation of aptamers. Aptamers are single-stranded nucleic acid or amino acid polymers that recognize and bind to targets with high affinity and selectivity. While research has focused on collecting aptamers and their interactions, most of the information regarding experimental methods remains in the unstructured and textual format of peer reviewed publications. To address this, we present the Aptamer Base, a database that provides detailed, structured information about the experimental conditions under which aptamers were selected and their binding affinity quantified. The open collaborative nature of the Aptamer Base provides the community with a unique resource that can be updated and curated in a decentralized manner, thereby accommodating the ever evolving field of aptamer research. DATABASE URL: http://aptamer.freebase.com.",2012-03-20 +30182387,Altered excitability and exocytosis in chromaffin cells from the R6/1 mouse model of Huntington's disease is linked to over-expression of mutated huntingtin.,"As the peripheral sympathoadrenal axis is tightly controlled by the cortex via hypothalamus and brain stem, the central pathological features of Hunting's disease, (HD) that is, deposition of mutated huntingtin and synaptic dysfunctions, could also be expressed in adrenal chromaffin cells. To test this hypothesis we here present a thorough investigation on the pathological and functional changes undergone by chromaffin cells (CCs) from 2-month (2 m) to 7-month (7 m) aged wild-type (WT) and R6/1 mouse model of Huntington's disease (HD), stimulated with acetylcholine (ACh) or high [K+ ] (K+ ). In order to do this, we used different techniques such as inmunohistochemistry, patch-clamp, and amperometric recording. With respect to WT cells, some of the changes next summarized were already observed in HD mice at a pre-disease stage (2 m); however, they were more pronounced at 7 m when motor deficits were clearly established, as follows: (i) huntingtin over-expression as nuclear aggregates in CCs; (ii) smaller CC size with decreased dopamine β-hydroxylase expression, indicating lesser number of chromaffin secretory vesicles; (iii) reduced adrenal tissue catecholamine content; (iv) reduced Na+ currents with (v) membrane hyperpolarization and reduced ACh-evoked action potentials; (v) reduced [Ca2+ ]c transients with faster Ca2+ clearance; (vi) diminished quantal secretion with smaller vesicle quantal size; (vii) faster kinetics of the exocytotic fusion pore, pore expansion, and closure. On the basis of these data, the hypothesis is here raised in the sense that nuclear deposition of mutated huntingtin in adrenal CCs of R6/1 mice could be primarily responsible for poorer Na+ channel expression and function, giving rise to profound depression of cell excitability, altered Ca2+ handling and exocytosis. OPEN PRACTICES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/. Cover Image for this issue: doi: 10.1111/jnc.14201.",2018-11-12 +27231148,Nutritional intervention as part of functional rehabilitation in older people with reduced functional ability: a systematic review and meta-analysis of randomised controlled studies.,"

Background

Nutritional intervention is increasingly recognised as having an important role in functional rehabilitation for older people. Nonetheless, a greater understanding of the functional benefit of nutritional interventions is needed.

Methods

A systematic review and meta-analysis examined randomised controlled trials (RCTs) published between 2007 and 2014 with the aim of determining whether nutritional intervention combined with rehabilitation benefited older people with reduced functional ability. Six electronic databases were searched. RCTs including people aged 65 years and older with reduced physical, social and/or cognitive function were included. PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines were followed, and gradepro computer software (http://gradepro.org) was used for the quality assessment of critical and important outcomes. Included studies considered to be clinical homogenous were combined in a meta-analysis.

Results

Of the 788 studies screened, five were identified for inclusion. Nutritional intervention given with functional rehabilitation improved energy and protein intake, although it failed to provide any improvement in final body weight, hand-grip strength or muscle strength. There was no difference between groups in the critical outcomes; balance, cognition, activities of daily living and mortality at long-term follow-up. Nutritional intervention given with functional rehabilitation was associated with an increased likelihood of both mortality (odds ratio = 1.77; 95% confidence interval = 1.13-2.76) and hospitalisation (odds ratio = 2.29; 95% confidence interval = 1.10-4.79) during the intervention. Meta-analysis of the baseline data showed that, overall, the intervention cohort had a lower body weight and cognition.

Conclusions

This meta-analysis highlights concerns regarding the quality of the randomisation of participants at baseline. Future high-quality research is essential to establish whether older people with loss of functional abilities can benefit from nutritional intervention.",2016-05-27 +29311278,Bacterial Surface Spreading Is More Efficient on Nematically Aligned Polysaccharide Substrates. ,"Biofilm-forming bacteria typically deposit layers of polysaccharides on the surfaces they inhabit; hence, polysaccharides are their immediate environment on such surfaces. Previously, we showed that many biofilm-forming bacteria preferentially spread in the direction of aligned and densely packed polysaccharide fibers in compressed substrates, a behavior we referred to as polymertropism. This arrangement of polysaccharide fibers is likely to be similar to that found in the ""slime"" trails deposited by many biofilm-forming bacteria and would explain previous observations that bacteria tend to follow these trails of polysaccharides. Here, we show that groups of cells or flares spread more rapidly on substrates containing aligned and densely packed polysaccharide fibers. Flares also persist longer, tend to hold their trajectories parallel to the long axes of polysaccharide fibers longer, and ultimately show an increase in displacement away from their origin. On the basis of these findings and others, we propose a model for polymertropism. Namely, we suggest that the packing of the aligned polymers increases the efficiency of surface spreading in the direction of the polymer's long axes; therefore, bacteria tend to spread more rapidly in this direction. Additional work suggests that bacteria can leverage polymertropism, and presumably more efficient surface spreading, for a survival advantage. In particular, when two bacterial species were placed in close proximity and in competition with each other, the ability of one species to move rapidly and directly away from the other by utilizing the aligned polymers of compressed agar substrates led to a clear survival benefit.IMPORTANCE The directed movement of bacteria on compressed substrates was first described in the 1940s and referred to as elasticotaxis (R. Y. Stanier, J Bacteriol 44:405-412, 1942). More recently, this behavior was referred to as polymertropism, as it seems to be a response to the nematic alignment and tight packing of polymers in the substrate (D. J. Lemon, X. Yang, P. Srivastava, Y. Y. Luk, A. G. Garza, Sci Rep 7:7643, 2017, https://doi.org/10.1038/s41598-017-07486-0). The data presented here suggest that bacteria are more efficient at surface spreading when the polymers in the substrate are arranged in this manner. These data also suggest that bacteria can leverage polymertropism, and presumably more efficient surface spreading, for a survival advantage. Namely, one bacterial species was able to use its strong polymertropism response to escape from and survive competition with another species that normally outcompetes it.",2018-03-12 +28875543,PDBsum: Structural summaries of PDB entries.,"PDBsum is a web server providing structural information on the entries in the Protein Data Bank (PDB). The analyses are primarily image-based and include protein secondary structure, protein-ligand and protein-DNA interactions, PROCHECK analyses of structural quality, and many others. The 3D structures can be viewed interactively in RasMol, PyMOL, and a JavaScript viewer called 3Dmol.js. Users can upload their own PDB files and obtain a set of password-protected PDBsum analyses for each. The server is freely accessible to all at: http://www.ebi.ac.uk/pdbsum.",2017-10-27 +28137711,'COV'COP' allows to detect CNVs responsible for inherited diseases among amplicons sequencing data.,"

Summary

In order to help molecular geneticists to rapidly identify CNVs responsible for inherited diseases among amplicons sequencing data generated by NGS, we designed a user-friendly tool ' Cov'Cop '. Using the run's coverage file provided by the sequencer, Cov'Cop simultaneously analyzes all the patients of the run using a two-stage algorithm containing correction and normalization levels and provides an easily understandable output, showing with various colors, potentially deleted and duplicated amplicons.

Availability and implementation

https://git.unilim.fr/merilp02/CovCop.

Contact

asliabaldini@unilim.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +26066708,TRRUST: a reference database of human transcriptional regulatory interactions.,"The reconstruction of transcriptional regulatory networks (TRNs) is a long-standing challenge in human genetics. Numerous computational methods have been developed to infer regulatory interactions between human transcriptional factors (TFs) and target genes from high-throughput data, and their performance evaluation requires gold-standard interactions. Here we present a database of literature-curated human TF-target interactions, TRRUST (transcriptional regulatory relationships unravelled by sentence-based text-mining, http://www.grnpedia.org/trrust), which currently contains 8,015 interactions between 748 TF genes and 1,975 non-TF genes. A sentence-based text-mining approach was employed for efficient manual curation of regulatory interactions from approximately 20 million Medline abstracts. To the best of our knowledge, TRRUST is the largest publicly available database of literature-curated human TF-target interactions to date. TRRUST also has several useful features: i) information about the mode-of-regulation; ii) tests for target modularity of a query TF; iii) tests for TF cooperativity of a query target; iv) inferences about cooperating TFs of a query TF; and v) prioritizing associated pathways and diseases with a query TF. We observed high enrichment of TF-target pairs in TRRUST for top-scored interactions inferred from high-throughput data, which suggests that TRRUST provides a reliable benchmark for the computational reconstruction of human TRNs.",2015-06-12 +29729051,Preliminary assessment of stable nitrogen and oxygen isotopic composition of USGS51 and USGS52 nitrous oxide reference gases and perspectives on calibration needs.,"Despite a long history and growing interest in isotopic analyses of N2 O, there is a lack of isotopically characterized N2 O isotopic reference materials (standards) to enable normalization and reporting of isotope-delta values. Here we report the isotopic characterization of two pure N2 O gas reference materials, USGS51 and USGS52, which are now available for laboratory calibration (https://isotopes.usgs.gov/lab/referencematerials.html).A total of 400 sealed borosilicate glass tubes of each N2 O reference gas were prepared from a single gas filling of a high vacuum line. We demonstrated isotopic homogeneity via dual-inlet isotope-ratio mass spectrometry. Isotopic analyses of these reference materials were obtained from eight laboratories to evaluate interlaboratory variation and provide preliminary isotopic characterization of their δ15 N, δ18 O, δ15 Nα , δ15 Nβ and site preference (SP ) values.The isotopic homogeneity of both USGS51 and USGS52 was demonstrated by one-sigma standard deviations associated with the determinations of their δ15 N, δ18 O, δ15 Nα , δ15 Nβ and SP values of 0.12 mUr or better. The one-sigma standard deviations of SP measurements of USGS51 and USGS52 reported by eight laboratories participating in the interlaboratory comparison were 1.27 and 1.78 mUr, respectively.The agreement of isotope-delta values obtained in the interlaboratory comparison was not sufficient to provide reliable accurate isotope measurement values for USGS51 and USGS52. We propose that provisional values for the isotopic composition of USGS51 and USGS52 determined at the Tokyo Institute of Technology can be adopted for normalizing and reporting sample data until further refinements are achieved through additional calibration efforts.",2018-08-01 +29161266,Knowledge-based prediction of protein backbone conformation using a structural alphabet.,"Libraries of structural prototypes that abstract protein local structures are known as structural alphabets and have proven to be very useful in various aspects of protein structure analyses and predictions. One such library, Protein Blocks, is composed of 16 standard 5-residues long structural prototypes. This form of analyzing proteins involves drafting its structure as a string of Protein Blocks. Predicting the local structure of a protein in terms of protein blocks is the general objective of this work. A new approach, PB-kPRED is proposed towards this aim. It involves (i) organizing the structural knowledge in the form of a database of pentapeptide fragments extracted from all protein structures in the PDB and (ii) applying a knowledge-based algorithm that does not rely on any secondary structure predictions and/or sequence alignment profiles, to scan this database and predict most probable backbone conformations for the protein local structures. Though PB-kPRED uses the structural information from homologues in preference, if available. The predictions were evaluated rigorously on 15,544 query proteins representing a non-redundant subset of the PDB filtered at 30% sequence identity cut-off. We have shown that the kPRED method was able to achieve mean accuracies ranging from 40.8% to 66.3% depending on the availability of homologues. The impact of the different strategies for scanning the database on the prediction was evaluated and is discussed. Our results highlight the usefulness of the method in the context of proteins without any known structural homologues. A scoring function that gives a good estimate of the accuracy of prediction was further developed. This score estimates very well the accuracy of the algorithm (R2 of 0.82). An online version of the tool is provided freely for non-commercial usage at http://www.bo-protscience.fr/kpred/.",2017-11-21 +29159759,Phylogenetic Classification of Seed Plants of Taiwan.,"BACKGROUND:Biological classification, the hierarchical arrangement of scientific names of organisms, constitutes the core infrastructure of biological databases. For an efficient management of biological databases, adopting a stable and universal biological classification system is crucial. Currently in Taiwan Biodiversity Information Facility (TaiBIF; http://taibif.tw/ ), the national portal website that integrates Taiwan's biodiversity information databases, angiosperms are arranged according to Cronquist's System of Classification, which is not compatible with current trend of the Angiosperm Phylogeny Group (APG) classification. To consolidate the function and management of the database, TaiBIF is moving to adopt the APG IV classification and Christenhusz et al. (Phytotaxa 19:55-70, 2011)'s classification of gymnosperms, which we summarize as the Phylogenetic Classification of Seed Plants of Taiwan. RESULTS:The Phylogenetic Classification of Seed Plants of Taiwan places gymnosperms in five families [vs. eight families in the Flora of Taiwan (FOT)] and angiosperms in 210 families (vs. 193 families in FOT). Three FOT gymnosperm families are synonymized in current treatment. Of the 210 APG IV families, familial circumscriptions of 114 families are identical with FOT and 50 families are recircumscription of FOT, with 46 families newly added. Of the 29 FOT families not included in current classification, two families are excluded and 27 families are synonymized. CONCLUSIONS:The adoption of the Phylogenetic Classification of Seed Plants of Taiwan in TaiBIF will provide better service and efficient management of the nation's biodiversity information databases.",2017-11-21 +25368506,Quality Control for RNA-Seq (QuaCRS): An Integrated Quality Control Pipeline.,"QuaCRS (Quality Control for RNA-Seq) is an integrated, simplified quality control (QC) system for RNA-seq data that allows easy execution of several open-source QC tools, aggregation of their output, and the ability to quickly identify quality issues by performing meta-analyses on QC metrics across large numbers of samples in different studies. It comprises two main sections. First is the QC Pack wrapper, which executes three QC tools: FastQC, RNA-SeQC, and selected functions from RSeQC. Combining these three tools into one wrapper provides increased ease of use and provides a much more complete view of sample data quality than any individual tool. Second is the QC database, which displays the resulting metrics in a user-friendly web interface. It was designed to allow users with less computational experience to easily generate and view QC information for their data, to investigate individual samples and aggregate reports of sample groups, and to sort and search samples based on quality. The structure of the QuaCRS database is designed to enable expansion with additional tools and metrics in the future. The source code for not-for-profit use and a fully functional sample user interface with mock data are available at http://bioserv.mps.ohio-state.edu/QuaCRS/.",2014-10-15 +25142412,Dynameomics: data-driven methods and models for utilizing large-scale protein structure repositories for improving fragment-based loop prediction.,"Protein function is intimately linked to protein structure and dynamics yet experimentally determined structures frequently omit regions within a protein due to indeterminate data, which is often due protein dynamics. We propose that atomistic molecular dynamics simulations provide a diverse sampling of biologically relevant structures for these missing segments (and beyond) to improve structural modeling and structure prediction. Here we make use of the Dynameomics data warehouse, which contains simulations of representatives of essentially all known protein folds. We developed novel computational methods to efficiently identify, rank and retrieve small peptide structures, or fragments, from this database. We also created a novel data model to analyze and compare large repositories of structural data, such as contained within the Protein Data Bank and the Dynameomics data warehouse. Our evaluation compares these structural repositories for improving loop predictions and analyzes the utility of our methods and models. Using a standard set of loop structures, containing 510 loops, 30 for each loop length from 4 to 20 residues, we find that the inclusion of Dynameomics structures in fragment-based methods improves the quality of the loop predictions without being dependent on sequence homology. Depending on loop length, ∼ 25-75% of the best predictions came from the Dynameomics set, resulting in lower main chain root-mean-square deviations for all fragment lengths using the combined fragment library. We also provide specific cases where Dynameomics fragments provide better predictions for NMR loop structures than fragments from crystal structures. Online access to these fragment libraries is available at http://www.dynameomics.org/fragments.",2014-09-03 +25392425,Rfam 12.0: updates to the RNA families database.,"The Rfam database (available at http://rfam.xfam.org) is a collection of non-coding RNA families represented by manually curated sequence alignments, consensus secondary structures and annotation gathered from corresponding Wikipedia, taxonomy and ontology resources. In this article, we detail updates and improvements to the Rfam data and website for the Rfam 12.0 release. We describe the upgrade of our search pipeline to use Infernal 1.1 and demonstrate its improved homology detection ability by comparison with the previous version. The new pipeline is easier for users to apply to their own data sets, and we illustrate its ability to annotate RNAs in genomic and metagenomic data sets of various sizes. Rfam has been expanded to include 260 new families, including the well-studied large subunit ribosomal RNA family, and for the first time includes information on short sequence- and structure-based RNA motifs present within families.",2014-11-11 +29554205,RapidRMSD: rapid determination of RMSDs corresponding to motions of flexible molecules.,"Motivation:The root mean square deviation (RMSD) is one of the most used similarity criteria in structural biology and bioinformatics. Standard computation of the RMSD has a linear complexity with respect to the number of atoms in a molecule, making RMSD calculations time-consuming for the large-scale modeling applications, such as assessment of molecular docking predictions or clustering of spatially proximate molecular conformations. Previously, we introduced the RigidRMSD algorithm to compute the RMSD corresponding to the rigid-body motion of a molecule. In this study, we go beyond the limits of the rigid-body approximation by taking into account conformational flexibility of the molecule. We model the flexibility with a reduced set of collective motions computed with e.g. normal modes or principal component analysis. Results:The initialization of our algorithm is linear in the number of atoms and all the subsequent evaluations of RMSD values between flexible molecular conformations depend only on the number of collective motions that are selected to model the flexibility. Therefore, our algorithm is much faster compared to the standard RMSD computation for large-scale modeling applications. We demonstrate the efficiency of our method on several clustering examples, including clustering of flexible docking results and molecular dynamics (MD) trajectories. We also demonstrate how to use the presented formalism to generate pseudo-random constant-RMSD structural molecular ensembles and how to use these in cross-docking. Availability and implementation:We provide the algorithm written in C++ as the open-source RapidRMSD library governed by the BSD-compatible license, which is available at http://team.inria.fr/nano-d/software/RapidRMSD/. The constant-RMSD structural ensemble application and clustering of MD trajectories is available at http://team.inria.fr/nano-d/software/nolb-normal-modes/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +27136074,Digital Three-Dimensional Automation of the Modified Huddart and Bodenham Scoring System for Patients With Cleft Lip and Palate.,"

Objective

The modified Huddart and Bodenham scoring system assesses maxillary arch constriction and surgical outcomes in cleft lip and palate. This project automates modified Huddart and Bodenham scoring using three-dimensional digital models.

Design

Development of a novel software tool.

Setting

The design, construction, development, and testing of the system was carried out at Dundee Dental Hospital.

Patients, participants

Subjects with cleft lip and palate.

Interventions

A plug-in has been developed using an open three-dimensional development platform: Rhinoceros, version 5 ( http://www.rhino3d.co.uk ). Users select cusps on mandibular and maxillary teeth on three-dimensional digital models. A three-dimensional cubic spline generates a mandibular curve, and a best-fit horizontal mandibular reference plane is produced using a least-squares method. Horizontal distances projected from the shortest three-dimensional distances were subsequently calculated between the maxillary cusps and the mandibular curve to calculate the modified Huddart and Bodenham score.

Main outcome measures

Automatic scoring of digital models using the modified Huddart and Bodenham system produces similar results to manual scoring.

Results

By standardizing outcome assessment in cleft care, multicenter comparisons for audit and research can be simplified, allowing centers throughout the world to upload three-dimensional digital models or intraoral scans of the dental arches for remote scoring. Thereafter, these data can feed back into the global database on orofacial clefting as part of the World Health Organization's international collaborative ""Global Burden of Disease"" research project for craniofacial anomalies.

Conclusions

The automated system facilitates quicker and more reliable outcome assessments by minimizing human errors.",2016-05-02 +29049599,Auditory Scene Analysis: An Attention Perspective.,"

Purpose

This review article provides a new perspective on the role of attention in auditory scene analysis.

Method

A framework for understanding how attention interacts with stimulus-driven processes to facilitate task goals is presented. Previously reported data obtained through behavioral and electrophysiological measures in adults with normal hearing are summarized to demonstrate attention effects on auditory perception-from passive processes that organize unattended input to attention effects that act at different levels of the system. Data will show that attention can sharpen stream organization toward behavioral goals, identify auditory events obscured by noise, and limit passive processing capacity.

Conclusions

A model of attention is provided that illustrates how the auditory system performs multilevel analyses that involve interactions between stimulus-driven input and top-down processes. Overall, these studies show that (a) stream segregation occurs automatically and sets the basis for auditory event formation; (b) attention interacts with automatic processing to facilitate task goals; and (c) information about unattended sounds is not lost when selecting one organization over another. Our results support a neural model that allows multiple sound organizations to be held in memory and accessed simultaneously through a balance of automatic and task-specific processes, allowing flexibility for navigating noisy environments with competing sound sources.

Presentation video

http://cred.pubs.asha.org/article.aspx?articleid=2601618.",2017-10-01 +28640810,LASSIM-A network inference toolbox for genome-wide mechanistic modeling.,"Recent technological advancements have made time-resolved, quantitative, multi-omics data available for many model systems, which could be integrated for systems pharmacokinetic use. Here, we present large-scale simulation modeling (LASSIM), which is a novel mathematical tool for performing large-scale inference using mechanistically defined ordinary differential equations (ODE) for gene regulatory networks (GRNs). LASSIM integrates structural knowledge about regulatory interactions and non-linear equations with multiple steady state and dynamic response expression datasets. The rationale behind LASSIM is that biological GRNs can be simplified using a limited subset of core genes that are assumed to regulate all other gene transcription events in the network. The LASSIM method is implemented as a general-purpose toolbox using the PyGMO Python package to make the most of multicore computers and high performance clusters, and is available at https://gitlab.com/Gustafsson-lab/lassim. As a method, LASSIM works in two steps, where it first infers a non-linear ODE system of the pre-specified core gene expression. Second, LASSIM in parallel optimizes the parameters that model the regulation of peripheral genes by core system genes. We showed the usefulness of this method by applying LASSIM to infer a large-scale non-linear model of naïve Th2 cell differentiation, made possible by integrating Th2 specific bindings, time-series together with six public and six novel siRNA-mediated knock-down experiments. ChIP-seq showed significant overlap for all tested transcription factors. Next, we performed novel time-series measurements of total T-cells during differentiation towards Th2 and verified that our LASSIM model could monitor those data significantly better than comparable models that used the same Th2 bindings. In summary, the LASSIM toolbox opens the door to a new type of model-based data analysis that combines the strengths of reliable mechanistic models with truly systems-level data. We demonstrate the power of this approach by inferring a mechanistically motivated, genome-wide model of the Th2 transcription regulatory system, which plays an important role in several immune related diseases.",2017-06-22 +27779619,A database of human exposomes and phenomes from the US National Health and Nutrition Examination Survey.,"The National Health and Nutrition Examination Survey (NHANES) is a population survey implemented by the Centers for Disease Control and Prevention (CDC) to monitor the health of the United States whose data is publicly available in hundreds of files. This Data Descriptor describes a single unified and universally accessible data file, merging across 255 separate files and stitching data across 4 surveys, encompassing 41,474 individuals and 1,191 variables. The variables consist of phenotype and environmental exposure information on each individual, specifically (1) demographic information, physical exam results (e.g., height, body mass index), laboratory results (e.g., cholesterol, glucose, and environmental exposures), and (4) questionnaire items. Second, the data descriptor describes a dictionary to enable analysts find variables by category and human-readable description. The datasets are available on DataDryad and a hands-on analytics tutorial is available on GitHub. Through a new big data platform, BD2K Patient Centered Information Commons (http://pic-sure.org), we provide a new way to browse the dataset via a web browser (https://nhanes.hms.harvard.edu) and provide application programming interface for programmatic access.",2016-10-25 +25378329,"MatrixDB, the extracellular matrix interaction database: updated content, a new navigator and expanded functionalities.","MatrixDB (http://matrixdb.ibcp.fr) is a freely available database focused on interactions established by extracellular proteins and polysaccharides. It is an active member of the International Molecular Exchange (IMEx) consortium and has adopted the PSI-MI standards for annotating and exchanging interaction data, either at the MIMIx or IMEx level. MatrixDB content has been updated by curation and by importing extracellular interaction data from other IMEx databases. Other major changes include the creation of a new website and the development of a novel graphical navigator, iNavigator, to build and expand interaction networks. Filters may be applied to build sub-networks based on a list of biomolecules, a specified interaction detection method and/or an expression level by tissue, developmental stage, and health state (UniGene data). Any molecule of the network may be selected and its partners added to the network at any time. Networks may be exported under Cytoscape and tabular formats and as images, and may be saved for subsequent re-use.",2014-11-06 +28632946,Emerging Biomarkers of Illness Severity: Urinary Metabolites Associated with Sepsis and Necrotizing Methicillin-Resistant Staphylococcus aureus Pneumonia.,"Our objective was to illustrate the potential of metabolomics to identify novel biomarkers of illness severity in a child with fatal necrotizing pneumonia caused by methicillin-resistant Staphylococcus aureus (MRSA). We present a case report with two control groups and a metabolomics analysis: an infant with fatal MRSA pneumonia, four children with influenza pneumonia (pneumonia control group), and seven healthy children with no known infections (healthy control group). Urine samples were collected from all children. Metabolites were identified and quantified using 1 H-nuclear magnetic resonance spectrometry. Normalized metabolite concentration data from children with influenza pneumonia and healthy controls were compared by using an unpaired Student t test. To identify differentiating metabolites of MRSA pneumonia, the fold change of each metabolite was calculated by dividing each urine metabolite concentration of the patient with fatal MRSA pneumonia by the median urine concentration values of the same metabolite of the patients with influenza pneumonia and healthy controls, respectively. MetScape (http://metscape.ncibi.org/), a bioinformatics tool, was used for data visualization and interpretation. Urine metabolite concentrations previously identified as associated with sepsis in children (e.g., 3-hydroxybutyrate, carnitine, and creatinine) were higher in the patient with fatal MRSA pneumonia compared with those of patients with influenza pneumonia and healthy controls. The concentrations of additional metabolites-acetone, acetoacetate, choline, fumarate, glucose, and 3-aminoisobutyrate-were more than 25-fold higher in the patient with MRSA pneumonia than those of patients with influenza pneumonia and healthy controls. These metabolic changes in the urine preceded the clinical severe sepsis phenotype, suggesting that detection of the extent of metabolic disruption can aid in the early identification of a sepsis phenotype in advance of the clinical diagnosis. These data also support the utility of metabolomics for the development of clinical assays to identify patients with pediatric pneumonia at high risk for deterioration.",2017-07-28 +28552653,High-throughput metaproteomics data analysis with Unipept: A tutorial.,"In recent years, shotgun metaproteomics has established itself as an important tool to study the composition of complex ecosystems and microbial communities. Two key steps in metaproteomics data analysis are the inference of proteins from the identified peptides, and the determination of the taxonomic origin and function of these proteins. This tutorial therefore introduces the Unipept command line interface (http://unipept.ugent.be/clidocs) as a platform-independent tool for such metaproteomics data analyses. First, a detailed overview is given of the available Unipept commands and their functions. Next, the power of the Unipept command line interface is illustrated using two case studies that analyze a single tryptic peptide, and a set of peptides retrieved from a shotgun metaproteomics experiment, respectively. Finally, the analysis results obtained using these command line tools are compared with the interactive taxonomic analysis that is available on the Unipept website.",2017-05-24 +26430546,ICeE an interface for C. elegans experiments.,"An increasing number of laboratories are using the COPAS Biosort™ to implement high-throughput approaches to tackle diverse biological problems. While providing a powerful tool for generating quantitative data, the utility of the Biosort is currently limited by the absence of resources for data management. We describe a simple electronic database designed to allow easy storage and retrieval of Biosort data for C. elegans, but that has a wide potential application for organizing electronic files and data sets. ICeE is an Open Source application. The code and accompanying documentation are freely available via the web at http://www.ciml.univ-mrs.fr/EWBANK_jonathan/software.html.",2014-07-01 +27355821,Characterizing Blood Metabolomics Profiles Associated with Self-Reported Food Intakes in Female Twins.,"Using dietary biomarkers in nutritional epidemiological studies may better capture exposure and improve the level at which diet-disease associations can be established and explored. Here, we aimed to identify and evaluate reproducibility of novel biomarkers of reported habitual food intake using targeted and non-targeted metabolomic blood profiling in a large twin cohort. Reported intakes of 71 food groups, determined by FFQ, were assessed against 601 fasting blood metabolites in over 3500 adult female twins from the TwinsUK cohort. For each metabolite, linear regression analysis was undertaken in the discovery group (excluding MZ twin pairs discordant [≥1 SD apart] for food group intake) with each food group as a predictor adjusting for age, batch effects, BMI, family relatedness and multiple testing (1.17x10-6 = 0.05/[71 food groups x 601 detected metabolites]). Significant results were then replicated (non-targeted: P<0.05; targeted: same direction) in the MZ discordant twin group and results from both analyses meta-analyzed. We identified and replicated 180 significant associations with 39 food groups (P<1.17x10-6), overall consisting of 106 different metabolites (74 known and 32 unknown), including 73 novel associations. In particular we identified trans-4-hydroxyproline as a potential marker of red meat intake (0.075[0.009]; P = 1.08x10-17), ergothioneine as a marker of mushroom consumption (0.181[0.019]; P = 5.93x10-22), and three potential markers of fruit consumption (top association: apple and pears): including metabolites derived from gut bacterial transformation of phenolic compounds, 3-phenylpropionate (0.024[0.004]; P = 1.24x10-8) and indolepropionate (0.026[0.004]; P = 2.39x10-9), and threitol (0.033[0.003]; P = 1.69x10-21). With the largest nutritional metabolomics dataset to date, we have identified 73 novel candidate biomarkers of food intake for potential use in nutritional epidemiological studies. We compiled our findings into the DietMetab database (http://www.twinsuk.ac.uk/dietmetab-data/), an online tool to investigate our top associations.",2016-06-29 +28673253,An improved filtering algorithm for big read datasets and its application to single-cell assembly.,"

Background

For single-cell or metagenomic sequencing projects, it is necessary to sequence with a very high mean coverage in order to make sure that all parts of the sample DNA get covered by the reads produced. This leads to huge datasets with lots of redundant data. A filtering of this data prior to assembly is advisable. Brown et al. (2012) presented the algorithm Diginorm for this purpose, which filters reads based on the abundance of their k-mers.

Methods

We present Bignorm, a faster and quality-conscious read filtering algorithm. An important new algorithmic feature is the use of phred quality scores together with a detailed analysis of the k-mer counts to decide which reads to keep.

Results

We qualify and recommend parameters for our new read filtering algorithm. Guided by these parameters, we remove in terms of median 97.15% of the reads while keeping the mean phred score of the filtered dataset high. Using the SDAdes assembler, we produce assemblies of high quality from these filtered datasets in a fraction of the time needed for an assembly from the datasets filtered with Diginorm.

Conclusions

We conclude that read filtering is a practical and efficient method for reducing read data and for speeding up the assembly process. This applies not only for single cell assembly, as shown in this paper, but also to other projects with high mean coverage datasets like metagenomic sequencing projects. Our Bignorm algorithm allows assemblies of competitive quality in comparison to Diginorm, while being much faster. Bignorm is available for download at https://git.informatik.uni-kiel.de/axw/Bignorm .",2017-07-03 +22710135,TWARIT: an extremely rapid and efficient approach for phylogenetic classification of metagenomic sequences.,"Phylogenetic assignment of individual sequence reads to their respective taxa, referred to as 'taxonomic binning', constitutes a key step of metagenomic analysis. Existing binning methods have limitations either with respect to time or accuracy/specificity of binning. Given these limitations, development of a method that can bin vast amounts of metagenomic sequence data in a rapid, efficient and computationally inexpensive manner can profoundly influence metagenomic analysis in computational resource poor settings. We introduce TWARIT, a hybrid binning algorithm, that employs a combination of short-read alignment and composition-based signature sorting approaches to achieve rapid binning rates without compromising on binning accuracy and specificity. TWARIT is validated with simulated and real-world metagenomes and the results demonstrate significantly lower overall binning times compared to that of existing methods. Furthermore, the binning accuracy and specificity of TWARIT are observed to be comparable/superior to them. A web server implementing TWARIT algorithm is available at http://metagenomics.atc.tcs.com/Twarit/",2012-06-15 +,Conflict of Interest in Sports Medicine : Does it Affect Our Judgement?,"

Objectives:

The American Academy of Orthopaedic Surgeons (AAOS) and other orthopaedic societies require members who present original research to disclose conflicts so that audiences can make informed decisions when interpreting data. To what degree members use this information when interpreting studies has never been investigated. The purpose of our study was to evaluate how a reported conflict of interest by the primary research team can influence the perceived value of data presented in original research.

Methods:

We devised a hypothetical prospective study (https://www.surveymonkey.com/s/MPCCLCX) and asked orthopaedic surgeons and non operative sports medicine specialists to rate the perceived clinical value of the data that was obtained based upon variations of study design, statistical significance of outcomes between treatment groups, and characteristics of the research setting (academic v. private institution). The research team in question was disclosed to have the following conflict of interest: the project was funded by a pharmaceutical company and that all authors received compensation for consulting services.

Results:

750 sports medicine physicians were sent a survey request to participate in this IRB approved study. 522 responses were obtained for an overall response rate of 70%. 99% of respondents were orthopaedic surgeons. The majority of respondents were from the Northeastern U.S. (32%) and male (96%). Most have been in practice for over 20 years (40%) and were from private practice single specialty groups (58%). 80% of respondents strongly agreed with the statement that conflict disclosure is important when interpreting study results. 62% of respondents reported always reading the disclosure slide during academy or other meeting presentations. 41% of respondents reported always using this information when deciding how to interpret scientific data. Using a case series design with significant positive results at an academic center, 24% reported that the study was likely trustworthy. When the setting of the study was changed to a community hospital, this number decreased to 5%. When no significant difference was found between the groups, 42% believed the study to be trustworthy. When the study design was Level I evidence (RCT trial) and at an academic center, 57% believed the study to be trustworthy. With the same criteria but at a community hospital, this number decreased to 39%. When the results of this design showed no difference among groups, the majority of respondents believed the study to be trustworthy (62%).

Conclusion:

Although the majority of orthopaedic surgeons in our analysis believed that disclosure of conflict of interest is important, less than half used this information when interpreting studies. Changing the study design from a case series to a randomized controlled trial improved the perceived reliability of the data, but was not as important as the reporting of “negative” results.",2013-09-01 +30169719,Authoritative parent feeding style is associated with better child dietary quality at dinner among low-income minority families.,"

Background

Parent feeding styles have been linked to child weight status across multiple studies. However, to our knowledge, the link between feeding styles and children's dietary quality, a more proximal outcome, has not been investigated.

Objective

The purpose of this study was to examine the relation between parent feeding styles and dietary quality of Head Start preschoolers' dinner meals.

Design

The amount of food served and consumed by children was measured by using a standardized digital photography method during 3 in-home dinner observations of low-income minority families in Houston, Texas. Trained dietitians entered food served and consumed into the Nutrient Data System for Research 2009 for nutrient analysis. Overall dietary quality of the food served and consumed at dinner was evaluated by using the Healthy Eating Index 2010 (HEI-2010). Parent feeding style was assessed with the use of the Caregiver's Feeding Style Questionnaire (CFSQ). On the basis of a parent's level of demandingness and responsiveness to his or her child during feeding, the CFSQ categorizes parent feeding into 4 styles: authoritative (high demandingness and high responsiveness), authoritarian (high demandingness and low responsiveness), indulgent (low demandingness and high responsiveness), or uninvolved (low demandingness and low responsiveness).

Results

For the overall sample, the mean ± SD HEI score for dinner served was 44.2 ± 8.4, and the mean ± SD HEI score for dinner consumed was 43.4 ± 7.0. In the fully adjusted model, ANCOVA indicated that the authoritative parent feeding style was associated with significantly higher child dietary quality compared with the authoritarian feeding style (mean ± SEE HEI consumed-authoritative 45.5 ± 0.9; authoritarian: 41.9 ± 0.7; P = 0.001).

Conclusions

Parent feeding style contributes to the overall dietary quality of children, and among low-income minority preschoolers an authoritative feeding style was associated with the highest dietary quality of the 4 feeding styles. Interventions to promote feeding practices that contribute to authoritative feeding are needed to improve the dietary quality of preschool children at dinner. This trial was registered at https://clinicaltrials.gov as NCT02696278.",2018-10-01 +24912761,Russian normative data for 375 action pictures and verbs.,"The present article introduces a Russian-language database of 375 action pictures and associated verbs with normative data. The pictures were normed for name agreement, conceptual familiarity, and subjective visual complexity, and measures of age of acquisition, imageability, and image agreement were collected for the verbs. Values of objective visual complexity, as well as information about verb frequency, length, argument structure, instrumentality, and name relation, are also provided. Correlations between these parameters are presented, along with a comparative analysis of the Russian name agreement norms and those collected in other languages. The full set of pictorial stimuli and the obtained norms may be freely downloaded from http://neuroling.ru/en/db.htm for use in research and for clinical purposes.",2015-09-01 +26471457,BioSynther: a customized biosynthetic potential explorer.,"

Motivation

One of the most promising applications of biosynthetic methods is to produce chemical products of high value from the ready-made chemicals. To explore the biosynthetic potentials of a chemical as a synthesis precursor, biosynthetic databases and related chemoinformatics tools are urgently needed. In the present work, a web-based tool, BioSynther, is developed to explore the biosynthetic potentials of precursor chemicals using BKM-react, Rhea, and more than 50,000 in-house RxnFinder reactions manually curated. BioSynther allows researchers to explore biosynthetic potentials, through so far known biochemical reactions, step by step interactively, which could be used as a useful tool in metabolic engineering and synthetic biology.

Availability and implementation

BioSynther is available at: http://www.lifemodules.org/BioSynther/.

Contact

hu_qn@tib.cas.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-14 +,"Revision and phylogeny of Syrphetodes (Coleoptera: Ulodidae): implications for biogeography, alpinization and conservation","The genus Syrphetodes Broun is revised to include a total of 13 species. Most of the species are restricted in their distributions, are rarely collected and have been attributed conservation status in New Zealand. Eleven species are described as new: three from Northland (S. relictus sp.n., Te Paki; S. insularis sp.n., Three Kings Islands; S. magnus sp.n., Hokianga), one from the central North Island (S. obtusus sp.n.), one from Central Otago (S. nunni sp.n., Waikaia Bush), and seven from the southern Alps (S. cirrhopogon sp.n., Aspiring National Park; S. occiduus sp.n., Westland; S. melanopogon sp.n., Mt Dewar, Paparoa Range; S. defectus sp.n., northern Paparoa Range; S. marrisi sp.n., Mt Domett, Northwest Nelson; S. carinatus sp.n., Victoria Range). Eleven synonymies are proposed: S. crenatus Broun (= S. dorsalis Broun, syn.n.), S. marginatus Pascoe (= S. bullatus Sharp, syn.n.; S. sylvius Broun, syn.n.; S. cordipennis Broun, syn.n.; S. punctatus Broun, syn.n.; S. simplex Broun, syn.n.; S. nodosalis Broun, syn.n.; S. truncatus Broun, syn.n.; S. variegatus Broun, syn.n.; S. pensus Broun, syn.n.; S. thoracicus Broun, syn.n.). The phylogenetic relationships among the species were reconstructed using morphological (25 adult characters) and DNA sequence (nuclear 28S rDNA and mitochondrial cytochrome c oxidase subunit I) data. A morphological analysis rooted with Trachyderastes resulted in a split between lowland and high‐altitude species and a well‐supported group from Northland. Molecular trees rooted with representatives of Trachyderastes Kaszab (New Caledonia), Meryx Latrielle (Australia), Ulodes Erichson (Australia) and three New Zealand genera (Arthopus Sharp, Brouniphylax Strand, Exohadrus Broun) resulted in the following tree: ((Ulodes, Brouniphylax) (Exohadrus, Arthopus)) (Syrphetodes (Meryx, Trachyderastes)). Species relationships within Syrphetodes included a strongly supported northern North Island clade and an alpine clade either as sister taxon to S. crenatus and S. marginatus or sister remaining lowland lineages. Combined phylogenetic analyses also showed paritial congruence with separate partitions. The distributions of the lowland species, in particular those from the North Island, correspond to islands that existed in the Pliocene. The alpine, black‐coloured lineage, found above the treeline, is monophyletic based on several characters (e.g. lack of abdominal flanges and reduced scalation) and, in some reconstructions, the tan‐coloured S. cirrhopogon is sister taxon to the remaining black‐coloured species. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:697E68E8‐EE90‐46C1‐A009‐78A794E0EF4F.",2015-01-01 +25361969,VaDE: a manually curated database of reproducible associations between various traits and human genomic polymorphisms.,"Genome-wide association studies (GWASs) have identified numerous single nucleotide polymorphisms (SNPs) associated with the development of common diseases. However, it is clear that genetic risk factors of common diseases are heterogeneous among human populations. Therefore, we developed a database of genomic polymorphisms that are reproducibly associated with disease susceptibilities, drug responses and other traits for each human population: 'VarySysDB Disease Edition' (VaDE; http://bmi-tokai.jp/VaDE/). SNP-trait association data were obtained from the National Human Genome Research Institute GWAS (NHGRI GWAS) catalog and RAvariome, and we added detailed information of sample populations by curating original papers. In addition, we collected and curated original papers, and registered the detailed information of SNP-trait associations in VaDE. Then, we evaluated reproducibility of associations in each population by counting the number of significantly associated studies. VaDE provides literature-based SNP-trait association data and functional genomic region annotation for SNP functional research. SNP functional annotation data included experimental data of the ENCODE project, H-InvDB transcripts and the 1000 Genome Project. A user-friendly web interface was developed to assist quick search, easy download and fast swapping among viewers. We believe that our database will contribute to the future establishment of personalized medicine and increase our understanding of genetic factors underlying diseases.",2014-10-31 +29733382,"An integrative model for alternative polyadenylation, IntMAP, delineates mTOR-modulated endoplasmic reticulum stress response.","3'-untranslated regions (UTRs) can vary through the use of alternative polyadenylation sites during pre-mRNA processing. Multiple publically available pipelines combining high profiling technologies and bioinformatics tools have been developed to catalog changes in 3'-UTR lengths. In our recent RNA-seq experiments using cells with hyper-activated mammalian target of rapamycin (mTOR), we found that cellular mTOR activation leads to transcriptome-wide alternative polyadenylation (APA), resulting in the activation of multiple cellular pathways. Here, we developed a novel bioinformatics algorithm, IntMAP, which integrates RNA-Seq and PolyA Site (PAS)-Seq data for a comprehensive characterization of APA events. By applying IntMAP to the datasets from cells with hyper-activated mTOR, we identified novel APA events that could otherwise not be identified by either profiling method alone. Several transcription factors including Cebpg (CCAAT/enhancer binding protein gamma) were among the newly discovered APA transcripts, indicating that diverse transcriptional networks may be regulated by mTOR-coordinated APA. The prevention of APA in Cebpg using the CRISPR/cas9-mediated genome editing tool showed that mTOR-driven 3'-UTR shortening in Cebpg is critical in protecting cells from endoplasmic reticulum (ER) stress. Taken together, we present IntMAP as a new bioinformatics algorithm for APA analysis by which we expand our understanding of the physiological role of mTOR-coordinated APA events to ER stress response. IntMAP toolbox is available at http://compbio.cs.umn.edu/IntMAP/.",2018-07-01 +30213739,Introduction to Open Surgical Skills Curriculum: Randomized Trial of Self-Paced vs Group Video Tutorial Viewing.,"

Objective

At our residency program, incoming interns are traditionally taught fundamental open surgical skills like suturing and knot tying in a group setting by viewing 12 instructional videos consecutively followed by individual baseline skill testing. We sought to evaluate if introduction to open surgical skills via self-paced viewing of video tutorials, as opposed to traditional group viewing, results in improved surgical skill acquisition in Obstetrics and Gynecology (OBGYN) interns as measured by higher proficiency score with decreased workload stress and anxiety.

Design, setting, participants

A randomized control trial was conducted in which OBGYN PGY-1 residents in 2015 and 2016 (N = 35) were introduced to basic open surgical skills, such as knot tying and suturing, by viewing 12 video tutorials produced at UTSW (https://youtu.be/4w3hyL9muVU) for a surgical skills curriculum. Residents were randomized to 2 groups: group viewing vs self-paced viewing. Performance scores were calculated based on time and accuracy while workload and anxiety were measured by preand post-testing surveys using the National Aeronautics and Space Administration-Task Load Index and Spielberger State-Trait Anxiety Inventory 6 item questionnaires.

Results

There was no significant difference in proficiency score between the group vs self-paced viewing in 8 out of 12 tasks using the Wilcoxon signed rank test (p > 0.10). There was no statistically significant differences in workload stress based on the National Aeronautics and Space Administration-Task Load Index questionnaire (p = 0.399) or self-reported anxiety based on the Spielberger State-Trait Anxiety Inventory 6 item questionnaire (p = 0.607).

Conclusions

Contrary to recent educational data suggesting self-paced learning may improve outcomes, viewing instructional videos in a group setting continues to be a time efficient method to introduce basic open surgical skills to incoming OBGYN interns.",2018-09-10 +30198761,Pathways from racial discrimination to cortisol/DHEA imbalance: protective role of religious involvement.,"Objective: Racial discrimination (RD) is hypothesized to dysregulate the production of stress reactive hormones among African Americans. Psychological processes that may mediate the association between RD and such dysregulation (e.g. cortisol/DHEA ratio) are not well articulated. Organizational religious involvement (ORI) has been discussed as a psychological protective factor within the context of RD, but our understanding of ORI as a physiological protective factor remains limited. We evaluated whether RD was directly and indirectly (through depressive symptoms) associated with an imbalance of cortisol and DHEA hormones, and whether ORI buffered these direct and/or indirect pathways.Design: Data were drawn from the Flint Adolescent Study, an ongoing interview study of youth that began in 1994. Participants were 188 African American emerging adults (47.3% Female, ages 20-22). We used mediation and moderated-mediation analyses, as outlined by Hayes [2012. PROCESS SPSS Macro. [Computer Software and Manual]. http://www.afhayes.com/public/process.pdf], to evaluate the study aims.Results: We found that depressive symptoms mediated the association between RD and the cortisol/DHEA ratio. We also found that depressive symptoms mediated the association between RD and the cortisol/DHEA ratio for individuals reporting low and moderate levels of ORI, but not at high levels.Conclusions: Our findings support the socio-psychobiological model of racism and health [Chae et al. 2011. ""Conceptualizing Racial Disparities in Health: Advancement of a Socio-Psychobiological Approach."" Du Bois Review: Social Science Research on Race 8 (1): 63-77. doi:10.1017/S1742058X11000166] and suggest that the psychological toll of RD can confer physiological consequences. Moreover, ORI may disrupt pathways from RD to cortisol/DHEA ratio by buffering the psychological toll of RD.",2018-09-10 +30629211,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Role of Whole Brain Radiation Therapy in Adults With Newly Diagnosed Metastatic Brain Tumors.,"

Target population

Adult patients (older than 18 yr of age) with newly diagnosed brain metastases.

Question

If whole brain radiation therapy (WBRT) is used, is there an optimal dose/fractionation schedule?

Recommendations

Level 1:  A standard WBRT dose/fractionation schedule (ie, 30 Gy in 10 fractions or a biological equivalent dose [BED] of 39 Gy10) is recommended as altered dose/fractionation schedules do not result in significant differences in median survival or local control. Level 3: Due to concerns regarding neurocognitive effects, higher dose per fraction schedules (such as 20 Gy in 5 fractions) are recommended only for patients with poor performance status or short predicted survival. Level 3: WBRT can be recommended to improve progression-free survival for patients with more than 4 brain metastases.

Question

What impact does tumor histopathology or molecular status have on the decision to use WBRT, the dose fractionation scheme to be utilized, and its outcomes?

Recommendations

There is insufficient evidence to support the choice of any particular dose/fractionation regimen based on histopathology. Molecular status may have an impact on the decision to delay WBRT in subgroups of patients, but there is not sufficient data to make a more definitive recommendation.

Question

Separate from survival outcomes, what are the neurocognitive consequences of WBRT, and what steps can be taken to minimize them?

Recommendations

Level 2: Due to neurocognitive toxicity, local therapy (surgery or SRS) without WBRT is recommended for patients with ≤4 brain metastases amenable to local therapy in terms of size and location. Level 2:  Given the association of neurocognitive toxicity with increasing total dose and dose per fraction of WBRT, WBRT doses exceeding 30 Gy given in 10 fractions, or similar biologically equivalent doses, are not recommended, except in patients with poor performance status or short predicted survival. Level 2: If prophylactic cranial irradiation (PCI) is given to prevent brain metastases for small cell lung cancer, the recommended WBRT dose/fractionation regimen is 25 Gy in 10 fractions, and because this can be associated with neurocognitive decline, patients should be told of this risk at the same time they are counseled about the possible survival benefits. Level 3: Patients having WBRT (given for either existing brain metastases or as PCI) should be offered 6 mo of memantine to potentially delay, lessen, or prevent the associated neurocognitive toxicity.

Question

Does the addition of WBRT after surgical resection or radiosurgery improve progression-free or overall survival outcomes when compared to surgical resection or radiosurgery alone?

Recommendations

Level 2: WBRT is not recommended in WHO performance status 0 to 2 patients with up to 4 brain metastases because, compared to surgical resection or radiosurgery alone, the addition of WBRT improves intracranial progression-free survival but not overall survival. Level 2: In WHO performance status 0 to 2 patients with up to 4 brain metastases where the goal is minimizing neurocognitive toxicity, as opposed to maximizing progression-free survival and overall survival, local therapy (surgery or radiosurgery) without WBRT is recommended. Level 3: Compared to surgical resection or radiosurgery alone, the addition of WBRT is not recommended for patients with more than 4 brain metastases unless the metastases' volume exceeds 7 cc, or there are more than 15 metastases, or the size or location of the metastases are not amenable to surgical resection or radiosurgery.The full guideline can be found at: https://www.cns.org/guidelines/guidelines-treatment-adults-metastatic-brain-tumors/chapter_3.",2019-03-01 +28187415,Estimation of biomass composition from genomic and transcriptomic information.,"Given the great potential impact of the growing number of complete genome-scale metabolic network reconstructions of microorganisms, bioinformatics tools are needed to simplify and accelerate the course of knowledge in this field. One essential component of a genome-scale metabolic model is its biomass equation, whose maximization is one of the most common objective functions used in Flux Balance Analysis formulations. Some components of biomass, such as amino acids and nucleotides, can be estimated from genome information, providing reliable data without the need of performing lab experiments. In this work a java tool is proposed that estimates microbial biomass composition in amino acids and nucleotides, from genome and transcriptomic information, using as input files sequences in FASTA format and files with transcriptomic data in the csv format. This application allows to obtain the results rapidly and is also a user-friendly tool for users with any or little background in informatics (http://darwin.di.uminho.pt/biomass/). The results obtained using this tool are fairly close to experimental data, showing that the estimation of amino acid and nucleotide compositions from genome information and from transcriptomic data is a good alternative when no experimental data is available.",2016-11-27 +,The evolution of myrmicine ants: phylogeny and biogeography of a hyperdiverse ant clade (Hymenoptera: Formicidae),"This study investigates the evolutionary history of a hyperdiverse clade, the ant subfamily Myrmicinae (Hymenoptera: Formicidae), based on analyses of a data matrix comprising 251 species and 11 nuclear gene fragments. Under both maximum likelihood and Bayesian methods of inference, we recover a robust phylogeny that reveals six major clades of Myrmicinae, here treated as newly defined tribes and occurring as a pectinate series: Myrmicini, Pogonomyrmecini trib.n., Stenammini, Solenopsidini, Attini and Crematogastrini. Because we condense the former 25 myrmicine tribes into a new six‐tribe scheme, membership in some tribes is now notably different, especially regarding Attini. We demonstrate that the monotypic genus Ankylomyrma is neither in the Myrmicinae nor even a member of the more inclusive formicoid clade—rather it is a poneroid ant, sister to the genus Tatuidris (Agroecomyrmecinae). Several species‐rich myrmicine genera are shown to be nonmonophyletic, including Pogonomyrmex, Aphaenogaster, Messor, Monomorium, Pheidole, Temnothorax and Tetramorium. We propose a number of generic synonymies to partially alleviate these problems (senior synonym listed first): Pheidole = Anisopheidole syn.n. = Machomyrma syn.n.; Temnothorax = Chalepoxenus syn.n. = Myrmoxenus syn.n. = Protomognathus syn.n.; Tetramorium = Rhoptromyrmex syn.n. = Anergates syn.n. = Teleutomyrmex syn.n. The genus Veromessor stat.r. is resurrected for the New World species previously placed in Messor; Syllophopsis stat.r. is resurrected from synonymy under Monomorium to contain the species in the hildebrandti group; Trichomyrmex stat.r. is resurrected from synonymy under Monomorium to contain the species in the scabriceps‐ and destructor‐groups; and the monotypic genus Epelysidris stat.r. is reinstated for Monomorium brocha. Bayesian divergence dating indicates that the crown group Myrmicinae originated about 98.6 Ma (95% highest probability density 87.9–109.6 Ma) but the six major clades are considerably younger, with age estimates ranging from 52.3 to 71.1 Ma. Although these and other suprageneric taxa arose mostly in the middle Eocene or earlier, a number of prominent, species‐rich genera, such as Pheidole, Cephalotes, Strumigenys, Crematogaster and Tetramorium, have estimated crown group origins in the late Eocene or Oligocene. Most myrmicine species diversity resides in the two sister clades, Attini and Crematogastrini, which are estimated to have originated and diversified extensively in the Neotropics and Paleotropics, respectively. The newly circumscribed Myrmicini is Holarctic in distribution, and ancestral range estimation suggests a Nearctic origin. The Pogonomyrmecini and Solenopsidini are reconstructed as being Neotropical in origin, but they have subsequently colonized the Nearctic region (Pogonomyrmecini) and many parts of the Old World as well as the Nearctic region (Solenopsidini), respectively. The Stenammini have flourished primarily in the northern hemisphere, and are most likely of Nearctic origin, but selected lineages have dispersed to the northern Neotropics and the Paleotropics. Thus the evolutionary history of the Myrmicinae has played out on a global stage over the last 100 Ma, with no single region being the principal generator of species diversity. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub: BB6829C4‐DA79‐45FE‐979E‐9749E237590E.",2015-01-01 +30057342,Characterizing functional consequences of DNA copy number alterations in breast and ovarian tumors by spaceMap.,"We propose a novel conditional graphical model - spaceMap - to construct gene regulatory networks from multiple types of high dimensional omic profiles. A motivating application is to characterize the perturbation of DNA copy number alterations (CNAs) on downstream protein levels in tumors. Through a penalized multivariate regression framework, spaceMap jointly models high dimensional protein levels as responses and high dimensional CNAs as predictors. In this setup, spaceMap infers an undirected network among proteins together with a directed network encoding how CNAs perturb the protein network. spaceMap can be applied to learn other types of regulatory relationships from high dimensional molecular profiles, especially those exhibiting hub structures. Simulation studies show spaceMap has greater power in detecting regulatory relationships over competing methods. Additionally, spaceMap includes a network analysis toolkit for biological interpretation of inferred networks. We applies spaceMap to the CNAs, gene expression and proteomics data sets from CPTAC-TCGA breast (n=77) and ovarian (n=174) cancer studies. Each cancer exhibits disruption of 'ion transmembrane transport' and 'regulation from RNA polymerase II promoter' by CNA events unique to each cancer. Moreover, using protein levels as a response yields a more functionally-enriched network than using RNA expressions in both cancer types. The network results also help to pinpoint crucial cancer genes and provide insights on the functional consequences of important CNA in breast and ovarian cancers. The R package spaceMap - including vignettes and documentation - is hosted on https://topherconley.github.io/spacemap.",2018-07-26 +29631189,Systematic feature selection improves accuracy of methylation-based forensic age estimation in Han Chinese males.,"Estimating individual age from biomarkers may provide key information facilitating forensic investigations. Recent progress has shown DNA methylation at age-associated CpG sites as the most informative biomarkers for estimating the individual age of an unknown donor. Optimal feature selection plays a critical role in determining the performance of the final prediction model. In this study we investigate methylation levels at 153 age-associated CpG sites from 21 previously reported genomic regions using the EpiTYPER system for their predictive power on individual age in 390 Han Chinese males ranging from 15 to 75 years of age. We conducted a systematic feature selection using a stepwise backward multiple linear regression analysis as well as an exhaustive searching algorithm. Both approaches identified the same subset of 9 CpG sites, which in linear combination provided the optimal model fitting with mean absolute deviation (MAD) of 2.89 years of age and explainable variance (R2) of 0.92. The final model was validated in two independent Han Chinese male samples (validation set 1, N = 65, MAD = 2.49, R2 = 0.95, and validation set 2, N = 62, MAD = 3.36, R2 = 0.89). Other competing models such as support vector machine and artificial neural network did not outperform the linear model to any noticeable degree. The validation set 1 was additionally analyzed using Pyrosequencing technology for cross-platform validation and was termed as validation set 3. Directly applying our model, in which the methylation levels were detected by the EpiTYPER system, to the data from pyrosequencing technology showed, however, less accurate results in terms of MAD (validation set 3, N = 65 Han Chinese males, MAD = 4.20, R2 = 0.93), suggesting the presence of a batch effect between different data generation platforms. This batch effect could be partially overcome by a z-score transformation (MAD = 2.76, R2 = 0.93). Overall, our systematic feature selection identified 9 CpG sites as the optimal subset for forensic age estimation and the prediction model consisting of these 9 markers demonstrated high potential in forensic practice. An age estimator implementing our prediction model allowing missing markers is freely available at http://liufan.big.ac.cn/AgePrediction.",2018-03-23 +28881973,BIOSSES: a semantic sentence similarity estimation system for the biomedical domain.,"

Motivation

The amount of information available in textual format is rapidly increasing in the biomedical domain. Therefore, natural language processing (NLP) applications are becoming increasingly important to facilitate the retrieval and analysis of these data. Computing the semantic similarity between sentences is an important component in many NLP tasks including text retrieval and summarization. A number of approaches have been proposed for semantic sentence similarity estimation for generic English. However, our experiments showed that such approaches do not effectively cover biomedical knowledge and produce poor results for biomedical text.

Methods

We propose several approaches for sentence-level semantic similarity computation in the biomedical domain, including string similarity measures and measures based on the distributed vector representations of sentences learned in an unsupervised manner from a large biomedical corpus. In addition, ontology-based approaches are presented that utilize general and domain-specific ontologies. Finally, a supervised regression based model is developed that effectively combines the different similarity computation metrics. A benchmark data set consisting of 100 sentence pairs from the biomedical literature is manually annotated by five human experts and used for evaluating the proposed methods.

Results

The experiments showed that the supervised semantic sentence similarity computation approach obtained the best performance (0.836 correlation with gold standard human annotations) and improved over the state-of-the-art domain-independent systems up to 42.6% in terms of the Pearson correlation metric.

Availability and implementation

A web-based system for biomedical semantic sentence similarity computation, the source code, and the annotated benchmark data set are available at: http://tabilab.cmpe.boun.edu.tr/BIOSSES/ .

Contact

gizemsogancioglu@gmail.com or arzucan.ozgur@boun.edu.tr.",2017-07-01 +29401076,SmartTots Update Regarding Anesthetic Neurotoxicity in the Developing Brain.,"SmartTots (http://smarttots.org/) represents a public-private partnership between the International Anesthesia Research Society and the US Food and Drug Administration. Over the past 7 years, SmartTots has worked in collaboration with various stakeholders to determine whether anesthetic drugs have detrimental effects on the developing brain. SmartTots has funded clinical and preclinical studies, organized meetings, served as a repository of peer-reviewed information, and facilitated the development of consensus-based statements. Here, we report advances in the field of anesthetic neurotoxicity and provide an update on SmartTots' activities. Clinical studies have provided some reassurance that a brief exposure to anesthetic drugs does not cause overt, persistent cognitive deficits. New recommendations aim to increase the reproducibility and ""clinical relevance"" of data from studies of laboratory animals. Overall, the field has advanced substantially; however, it remains paramount to definitively resolve whether anesthetic drugs are neurotoxic to the immature brain. The results of SmartTots efforts will either ally unwarranted fears or substantially change pediatric anesthetic practice and prompt studies to identify neuroprotective strategies.",2018-04-01 +25614757,"WallProtDB, a database resource for plant cell wall proteomics.","

Background

During the last fifteen years, cell wall proteomics has become a major research field with the publication of more than 50 articles describing plant cell wall proteomes. The WallProtDB database has been designed as a tool to facilitate the inventory, the interpretation of cell wall proteomics data and the comparisons between cell wall proteomes.

Results

WallProtDB (http://www.polebio.lrsv.ups-tlse.fr/WallProtDB/) presently contains 2170 proteins and ESTs identified experimentally in 36 cell wall proteomics studies performed on 11 different plant species. Two criteria have to be met for entering WallProtDB. First one is related to the identification of proteins. Only proteins identified in plant with available genomic or ESTs data are considered to ensure unambiguous identification. Second criterion is related to the difficulty to obtain clean cell wall fractions. Indeed, since cell walls constitute an open compartment difficult to isolate, numerous proteins predicted to be intracellular and/or having functions inside the cell have been identified in cell wall extracts. Then, except proteins predicted to be plasma membrane proteins, only proteins having a predicted signal peptide and no known intracellular retention signal are included in the database. In addition, WallProtDB contains information about the strategies used to obtain cell wall protein extracts and to identify proteins by mass spectrometry and bioinformatics. Mass spectrometry data are included when available. All the proteins of WallProtDB are linked to ProtAnnDB, another database, which contains structural and functional bioinformatics annotations of proteins as well as links to other databases (Aramemnon, CAZy, Planet, Phytozome). A list of references in the cell wall proteomics field is also provided.

Conclusions

WallProtDB aims at becoming a cell wall proteome reference database. It can be updated at any time on request and provide a support for sharing cell wall proteomics data and literature references with researchers interested in plant cell wall biology.",2015-01-16 +27114492,UbiNet: an online resource for exploring the functional associations and regulatory networks of protein ubiquitylation. ,"Protein ubiquitylation catalyzed by E3 ubiquitin ligases are crucial in the regulation of many cellular processes. Owing to the high throughput of mass spectrometry-based proteomics, a number of methods have been developed for the experimental determination of ubiquitylation sites, leading to a large collection of ubiquitylation data. However, there exist no resources for the exploration of E3-ligase-associated regulatory networks of for ubiquitylated proteins in humans. Therefore, the UbiNet database was developed to provide a full investigation of protein ubiquitylation networks by incorporating experimentally verified E3 ligases, ubiquitylated substrates and protein-protein interactions (PPIs). To date, UbiNet has accumulated 43 948 experimentally verified ubiquitylation sites from 14 692 ubiquitylated proteins of humans. Additionally, we have manually curated 499 E3 ligases as well as two E1 activating and 46 E2 conjugating enzymes. To delineate the regulatory networks among E3 ligases and ubiquitylated proteins, a total of 430 530 PPIs were integrated into UbiNet for the exploration of ubiquitylation networks with an interactive network viewer. A case study demonstrated that UbiNet was able to decipher a scheme for the ubiquitylation of tumor proteins p63 and p73 that is consistent with their functions. Although the essential role of Mdm2 in p53 regulation is well studied, UbiNet revealed that Mdm2 and additional E3 ligases might be implicated in the regulation of other tumor proteins by protein ubiquitylation. Moreover, UbiNet could identify potential substrates for a specific E3 ligase based on PPIs and substrate motifs. With limited knowledge about the mechanisms through which ubiquitylated proteins are regulated by E3 ligases, UbiNet offers users an effective means for conducting preliminary analyses of protein ubiquitylation. The UbiNet database is now freely accessible via http://csb.cse.yzu.edu.tw/UbiNet/ The content is regularly updated with the literature and newly released data.Database URL: http://csb.cse.yzu.edu.tw/UbiNet/.",2016-04-25 +27794558,Efficient detection of differentially methylated regions using DiMmeR.,"

Motivation

Epigenome-wide association studies (EWAS) generate big epidemiological datasets. They aim for detecting differentially methylated DNA regions that are likely to influence transcriptional gene activity and, thus, the regulation of metabolic processes. The by far most widely used technology is the Illumina Methylation BeadChip, which measures the methylation levels of 450 (850) thousand cytosines, in the CpG dinucleotide context in a set of patients compared to a control group. Many bioinformatics tools exist for raw data analysis. However, most of them require some knowledge in the programming language R, have no user interface, and do not offer all necessary steps to guide users from raw data all the way down to statistically significant differentially methylated regions (DMRs) and the associated genes.

Results

Here, we present DiMmeR (Discovery of Multiple Differentially Methylated Regions), the first free standalone software that interactively guides with a user-friendly graphical user interface (GUI) scientists the whole way through EWAS data analysis. It offers parallelized statistical methods for efficiently identifying DMRs in both Illumina 450K and 850K EPIC chip data. DiMmeR computes empirical P -values through randomization tests, even for big datasets of hundreds of patients and thousands of permutations within a few minutes on a standard desktop PC. It is independent of any third-party libraries, computes regression coefficients, P -values and empirical P -values, and it corrects for multiple testing.

Availability and implementation

DiMmeR is publicly available at http://dimmer.compbio.sdu.dk .

Contact

diogoma@bmb.sdu.dk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +25305456,"First report on the antibody verification of HLA-DR, HLA-DQ and HLA-DP epitopes recorded in the HLA Epitope Registry.","The International Registry of Antibody-Defined HLA Epitopes (http://www.epregistry.com.br) has been recently established as a tool to understand humoral responses to HLA mismatches. These epitopes can be structurally defined as eplets by three-dimensional molecular modeling and amino acid sequence differences between HLA antigens. A major goal is to identify HLA eplets that have been verified experimentally with informative antibodies. This report addresses class II epitopes encoded by genes in the HLA-D region. Our analysis included reviews of many publications about epitope specificity of class II reactive human and murine monoclonal antibodies and informative alloantibodies from HLA sensitized patients as well as our own antibody testing results. As of July 1, 2014, 24 HLA-DRB1/3/4/5, 15 DQB, 3 DQA and 8 DPB antibody-verified epitopes have been identified and recorded. The Registry is still a work-in-progress and will become a useful resource for HLA professionals interested in histocompatibility testing at the epitope level and investigating antibody responses to HLA mismatches in transplant patients.",2014-10-13 +30462587,"The Exact VC Dimension of the WiSARD n -Tuple Classifier.","The Wilkie, Stonham, and Aleksander recognition device (WiSARD) n -tuple classifier is a multiclass weightless neural network capable of learning a given pattern in a single step. Its architecture is determined by the number of classes it should discriminate. A target class is represented by a structure called a discriminator, which is composed of N RAM nodes, each of them addressed by an n -tuple. Previous studies were carried out in order to mitigate an important problem of the WiSARD n -tuple classifier: having its RAM nodes saturated when trained by a large data set. Finding the VC dimension of the WiSARD n -tuple classifier was one of those studies. Although no exact value was found, tight bounds were discovered. Later, the bleaching technique was proposed as a means to avoid saturation. Recent empirical results with the bleaching extension showed that the WiSARD n -tuple classifier can achieve high accuracies with low variance in a great range of tasks. Theoretical studies had not been conducted with that extension previously. This work presents the exact VC dimension of the basic two-class WiSARD n -tuple classifier, which is linearly proportional to the number of RAM nodes belonging to a discriminator, and exponentially to their addressing tuple length, precisely N(2n-1)+1 . The exact VC dimension of the bleaching extension to the WiSARD n -tuple classifier, whose value is the same as that of the basic model, is also produced. Such a result confirms that the bleaching technique is indeed an enhancement to the basic WiSARD n -tuple classifier as it does no harm to the generalization capability of the original paradigm.",2018-11-21 +26578555,"SugarBindDB, a resource of glycan-mediated host-pathogen interactions.","The SugarBind Database (SugarBindDB) covers knowledge of glycan binding of human pathogen lectins and adhesins. It is a curated database; each glycan-protein binding pair is associated with at least one published reference. The core data element of SugarBindDB is a set of three inseparable components: the pathogenic agent, a lectin/adhesin and a glycan ligand. Each entity (agent, lectin or ligand) is described by a range of properties that are summarized in an entity-dedicated page. Several search, navigation and visualisation tools are implemented to investigate the functional role of glycans in pathogen binding. The database is cross-linked to protein and glycan-relaled resources such as UniProtKB and UniCarbKB. It is tightly bound to the latter via a substructure search tool that maps each ligand to full structures where it occurs. Thus, a glycan-lectin binding pair of SugarBindDB can lead to the identification of a glycan-mediated protein-protein interaction, that is, a lectin-glycoprotein interaction, via substructure search and the knowledge of site-specific glycosylation stored in UniCarbKB. SugarBindDB is accessible at: http://sugarbind.expasy.org.",2015-11-17 +28153040,BicPAMS: software for biological data analysis with pattern-based biclustering.,"

Background

Biclustering has been largely applied for the unsupervised analysis of biological data, being recognised today as a key technique to discover putative modules in both expression data (subsets of genes correlated in subsets of conditions) and network data (groups of coherently interconnected biological entities). However, given its computational complexity, only recent breakthroughs on pattern-based biclustering enabled efficient searches without the restrictions that state-of-the-art biclustering algorithms place on the structure and homogeneity of biclusters. As a result, pattern-based biclustering provides the unprecedented opportunity to discover non-trivial yet meaningful biological modules with putative functions, whose coherency and tolerance to noise can be tuned and made problem-specific.

Methods

To enable the effective use of pattern-based biclustering by the scientific community, we developed BicPAMS (Biclustering based on PAttern Mining Software), a software that: 1) makes available state-of-the-art pattern-based biclustering algorithms (BicPAM (Henriques and Madeira, Alg Mol Biol 9:27, 2014), BicNET (Henriques and Madeira, Alg Mol Biol 11:23, 2016), BicSPAM (Henriques and Madeira, BMC Bioinforma 15:130, 2014), BiC2PAM (Henriques and Madeira, Alg Mol Biol 11:1-30, 2016), BiP (Henriques and Madeira, IEEE/ACM Trans Comput Biol Bioinforma, 2015), DeBi (Serin and Vingron, AMB 6:1-12, 2011) and BiModule (Okada et al., IPSJ Trans Bioinf 48(SIG5):39-48, 2007)); 2) consistently integrates their dispersed contributions; 3) further explores additional accuracy and efficiency gains; and 4) makes available graphical and application programming interfaces.

Results

Results on both synthetic and real data confirm the relevance of BicPAMS for biological data analysis, highlighting its essential role for the discovery of putative modules with non-trivial yet biologically significant functions from expression and network data.

Conclusions

BicPAMS is the first biclustering tool offering the possibility to: 1) parametrically customize the structure, coherency and quality of biclusters; 2) analyze large-scale biological networks; and 3) tackle the restrictive assumptions placed by state-of-the-art biclustering algorithms. These contributions are shown to be key for an adequate, complete and user-assisted unsupervised analysis of biological data.

Software

BicPAMS and its tutorial available in http://www.bicpams.com .",2017-02-02 +28003262,Defining the clonality of peripheral T cell lymphomas using RNA-seq.,"

Motivation

In T-cell lymphoma, malignant T cells arising from a founding clone share an identical T cell receptor (TCR) and can be identified by the over-representation of this TCR relative to TCRs from the patient's repertoire of normal T cells. Here, we demonstrate that TCR information extracted from RNA-seq data can provide a higher resolution view of peripheral T cell lymphomas (PTCLs) than that provided by conventional methods.

Results

For 60 subjects with PTCL, flow cytometry/FACS was used to identify and sort aberrant T cell populations from diagnostic lymph node cell suspensions. For samples that did not appear to contain aberrant T cell populations, T helper (T H ), T follicular helper (T FH ) and cytotoxic T lymphocyte (CTL) subsets were sorted. RNA-seq was performed on sorted T cell populations, and TCR alpha and beta chain sequences were extracted and quantified directly from the RNA-seq data. 96% of the immunophenotypically aberrant samples had a dominant T cell clone readily identifiable by RNA-seq. Of the samples where no aberrant population was found by flow cytometry, 80% had a dominant clone by RNA-seq. This demonstrates the increased sensitivity and diagnostic ability of RNA-seq over flow cytometry and shows that the presence of a normal immunophenotype does not exclude clonality.

Availability and implementation

R scripts used in the processing of the data are available online at https://www.github.com/scottdbrown/RNAseq-TcellClonality.

Contacts

rholt@bcgsc.ca or ksavage@bccancer.bc.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +25432166,Faster sequence homology searches by clustering subsequences.,"

Motivation

Sequence homology searches are used in various fields. New sequencing technologies produce huge amounts of sequence data, which continuously increase the size of sequence databases. As a result, homology searches require large amounts of computational time, especially for metagenomic analysis.

Results

We developed a fast homology search method based on database subsequence clustering, and implemented it as GHOSTZ. This method clusters similar subsequences from a database to perform an efficient seed search and ungapped extension by reducing alignment candidates based on triangle inequality. The database subsequence clustering technique achieved an ∼2-fold increase in speed without a large decrease in search sensitivity. When we measured with metagenomic data, GHOSTZ is ∼2.2-2.8 times faster than RAPSearch and is ∼185-261 times faster than BLASTX.

Availability and implementation

The source code is freely available for download at http://www.bi.cs.titech.ac.jp/ghostz/

Contact

akiyama@cs.titech.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-27 +29518242,Exploring anti-quorum sensing and anti-virulence based strategies to fight Candida albicans infections: an in silico approach. ,"The complex virulence attributes of Candida albicans are an attractive target to exploit in the development of new antifungals and anti-virulence strategies to combat C. albicans infections. Particularly, quorum sensing (QS) has been reported as critical for virulence regulation in C. albicans. This work presents two knowledge networks with up-to-date information about QS regulation and experimentally tested anti-QS and anti-virulence agents for C. albicans. A semi-automatic bioinformatics workflow that combines literature mining and expert curation was used to retrieve otherwise scattered information from the scientific literature. The network representation offers an innovative and continuously updatable means for the Candida research community to query QS and virulence data systematically and in a user-friendly way. Notably, the reconstructed networks show the complexity of QS regulation and the impact that some molecules have on the inhibition of virulence mechanisms responsible for infection establishment (e.g. hyphal development) and perseverance (e.g. biofilm formation). In the future, the compiled knowledge may be used to build decision-making models that help infer new knowledge of practical significance. The knowledge networks are publicly available at http://pcquorum.org/. This Web platform enables the exploration of fungal virulence cues as well as reported inhibitors in a user-friendly fashion.",2018-05-01 +27958387,Performance Evaluation and Online Realization of Data-driven Normalization Methods Used in LC/MS based Untargeted Metabolomics Analysis.,"In untargeted metabolomics analysis, several factors (e.g., unwanted experimental &biological variations and technical errors) may hamper the identification of differential metabolic features, which requires the data-driven normalization approaches before feature selection. So far, ≥16 normalization methods have been widely applied for processing the LC/MS based metabolomics data. However, the performance and the sample size dependence of those methods have not yet been exhaustively compared and no online tool for comparatively and comprehensively evaluating the performance of all 16 normalization methods has been provided. In this study, a comprehensive comparison on these methods was conducted. As a result, 16 methods were categorized into three groups based on their normalization performances across various sample sizes. The VSN, the Log Transformation and the PQN were identified as methods of the best normalization performance, while the Contrast consistently underperformed across all sub-datasets of different benchmark data. Moreover, an interactive web tool comprehensively evaluating the performance of 16 methods specifically for normalizing LC/MS based metabolomics data was constructed and hosted at http://server.idrb.cqu.edu.cn/MetaPre/. In summary, this study could serve as a useful guidance to the selection of suitable normalization methods in analyzing the LC/MS based metabolomics data.",2016-12-13 +25617413,3USS: a web server for detecting alternative 3'UTRs from RNA-seq experiments.,"

Unlabelled

Protein-coding genes with multiple alternative polyadenylation sites can generate mRNA 3'UTR sequences of different lengths, thereby causing the loss or gain of regulatory elements, which can affect stability, localization and translation efficiency. 3USS is a web-server developed with the aim of giving experimentalists the possibility to automatically identify alternative 3 ': UTRs (shorter or longer with respect to a reference transcriptome), an option that is not available in standard RNA-seq data analysis procedures. The tool reports as putative novel the 3 ': UTRs not annotated in available databases. Furthermore, if data from two related samples are uploaded, common and specific alternative 3 ': UTRs are identified and reported by the server.

Availability and implementation

3USS is freely available at http://www.biocomputing.it/3uss_server.",2015-01-22 +29432514,Improving SNP prioritization and pleiotropic architecture estimation by incorporating prior knowledge using graph-GPA.,"Summary:Integration of genetic studies for multiple phenotypes is a powerful approach to improving the identification of genetic variants associated with complex traits. Although it has been shown that leveraging shared genetic basis among phenotypes, namely pleiotropy, can increase statistical power to identify risk variants, it remains challenging to effectively integrate genome-wide association study (GWAS) datasets for a large number of phenotypes. We previously developed graph-GPA, a Bayesian hierarchical model that integrates multiple GWAS datasets to boost statistical power for the identification of risk variants and to estimate pleiotropic architecture within a unified framework. Here we propose a novel improvement of graph-GPA which incorporates external knowledge about phenotype-phenotype relationship to guide the estimation of genetic correlation and the association mapping. The application of graph-GPA to GWAS datasets for 12 complex diseases with a prior disease graph obtained from a text mining of biomedical literature illustrates its power to improve the identification of risk genetic variants and to facilitate understanding of genetic relationship among complex diseases. Availability and implementation:graph-GPA is implemented as an R package 'GGPA', which is publicly available at http://dongjunchung.github.io/GGPA/. DDNet, a web interface to query diseases of interest and download a prior disease graph obtained from a text mining of biomedical literature, is publicly available at http://www.chunglab.io/ddnet/. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +29360926,OPAL: prediction of MoRF regions in intrinsically disordered protein sequences.,"Motivation:Intrinsically disordered proteins lack stable 3-dimensional structure and play a crucial role in performing various biological functions. Key to their biological function are the molecular recognition features (MoRFs) located within long disordered regions. Computationally identifying these MoRFs from disordered protein sequences is a challenging task. In this study, we present a new MoRF predictor, OPAL, to identify MoRFs in disordered protein sequences. OPAL utilizes two independent sources of information computed using different component predictors. The scores are processed and combined using common averaging method. The first score is computed using a component MoRF predictor which utilizes composition and sequence similarity of MoRF and non-MoRF regions to detect MoRFs. The second score is calculated using half-sphere exposure (HSE), solvent accessible surface area (ASA) and backbone angle information of the disordered protein sequence, using information from the amino acid properties of flanks surrounding the MoRFs to distinguish MoRF and non-MoRF residues. Results:OPAL is evaluated using test sets that were previously used to evaluate MoRF predictors, MoRFpred, MoRFchibi and MoRFchibi-web. The results demonstrate that OPAL outperforms all the available MoRF predictors and is the most accurate predictor available for MoRF prediction. It is available at http://www.alok-ai-lab.com/tools/opal/. Contact:ashwini@hgc.jp or alok.sharma@griffith.edu.au. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-06-01 +22941959,Interfaces to PeptideAtlas: a case study of standard data access systems.,"Access to public data sets is important to the scientific community as a resource to develop new experiments or validate new data. Projects such as the PeptideAtlas, Ensembl and The Cancer Genome Atlas (TCGA) offer both access to public data and a repository to share their own data. Access to these data sets is often provided through a web page form and a web service API. Access technologies based on web protocols (e.g. http) have been in use for over a decade and are widely adopted across the industry for a variety of functions (e.g. search, commercial transactions, and social media). Each architecture adapts these technologies to provide users with tools to access and share data. Both commonly used web service technologies (e.g. REST and SOAP), and custom-built solutions over HTTP are utilized in providing access to research data. Providing multiple access points ensures that the community can access the data in the simplest and most effective manner for their particular needs. This article examines three common access mechanisms for web accessible data: BioMart, caBIG, and Google Data Sources. These are illustrated by implementing each over the PeptideAtlas repository and reviewed for their suitability based on specific usages common to research. BioMart, Google Data Sources, and caBIG are each suitable for certain uses. The tradeoffs made in the development of the technology are dependent on the uses each was designed for (e.g. security versus speed). This means that an understanding of specific requirements and tradeoffs is necessary before selecting the access technology.",2011-11-22 +29281006,PINE-SPARKY.2 for automated NMR-based protein structure research.,"Summary:Nuclear magnetic resonance (NMR) spectroscopy, along with X-ray crystallography and cryoelectron microscopy, is one of the three major tools that enable the determination of atomic-level structural models of biological macromolecules. Of these, NMR has the unique ability to follow important processes in solution, including conformational changes, internal dynamics and protein-ligand interactions. As a means for facilitating the handling and analysis of spectra involved in these types of NMR studies, we have developed PINE-SPARKY.2, a software package that integrates and automates discrete tasks that previously required interaction with separate software packages. The graphical user interface of PINE-SPARKY.2 simplifies chemical shift assignment and verification, automated detection of secondary structural elements, predictions of flexibility and hydrophobic cores, and calculation of three-dimensional structural models. Availability and implementation:PINE-SPARKY.2 is available in the latest version of NMRFAM-SPARKY from the National Magnetic Resonance Facility at Madison (http://pine.nmrfam.wisc.edu/download_packages.html), the NMRbox Project (https://nmrbox.org) and to subscribers to the SBGrid (https://sbgrid.org). For a detailed description of the program, see http://www.nmrfam.wisc.edu/pine-sparky2.htm. Contact:whlee@nmrfam.wisc.edu or markley@nmrfam.wisc.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +29186378,PipelineDog: a simple and flexible graphic pipeline construction and maintenance tool.,"Summary:Analysis pipelines are an essential part of bioinformatics research, and ad hoc pipelines are frequently created by researchers for prototyping and proof-of-concept purposes. However, most existing pipeline management system or workflow engines are too complex for rapid prototyping or learning the pipeline concept. A lightweight, user-friendly and flexible solution is thus desirable. In this study, we developed a new pipeline construction and maintenance tool, PipelineDog. This is a web-based integrated development environment with a modern web graphical user interface. It offers cross-platform compatibility, project management capabilities, code formatting and error checking functions and an online repository. It uses an easy-to-read/write script system that encourages code reuse. With the online repository, it also encourages sharing of pipelines, which enhances analysis reproducibility and accountability. For most users, PipelineDog requires no software installation. Overall, this web application provides a way to rapidly create and easily manage pipelines. Availability and implementation:PipelineDog web app is freely available at http://web.pipeline.dog. The command line version is available at http://www.npmjs.com/package/pipelinedog and online repository at http://repo.pipeline.dog. Contact:ysun@kean.edu or xing@biology.rutgers.edu or ysun@diagnoa.com. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +29956040,Mass spectrometric characterization of siderophores produced by Pseudomonas taiwanensis VLB120 assisted by stable isotope labeling of nitrogen source.,"The structures of three previously unknown siderophores produced by the fluorescent, biotechnologically relevant Pseudomonas taiwanensis (P. taiwanensis) VLB120 bacteria were elucidated by means of hydrophilic interaction liquid chromatography (HILIC) hyphenated to high-resolution tandem mass spectrometry (HRMS/MS). They could be verified as iron(III)- and aluminum(III) complexes as well as the protonated molecules of the siderophores formed by in-source fragmentation. The siderophores were separated according to their different acyl side chains and additionally according their central ions. One of the siderophores was identified as pyoverdine with a malic acid (hydroxy succinic acid) amide side chain and a peptide moiety consisting of Orn-Asp-OHAsn-Thr-AcOHOrn-Ser-cOHOrn. The other analytes were assigned to an azotobactin with the identical peptide chain linked to the characteristic chromophoric unit and a pyoverdine with a variation in the amino acid sequence. Proline is directly linked to the pyoverdine chromophore instead of ornithine. Acidic and enzymatic hydrolyses were carried out to analyze the individual amino acids. Beside OHAsn, each amino acid of the peptide part was identified by HILIC-HRMS and comparison to authentic standards. Additionally, 15N-labeled cellular supernatants were analyzed by means of HRMS/MS. The results of the MS/MS experiments complemented by accurate mass data facilitated elucidation of the structures studied in this work and provided further confirmation of the three recently described pyoverdines of P. taiwanensis VLB120 (Baune et al. in Biometals 30:589-597, 2017. https://doi.org/10.1007/s10534-017-0029-7 ).",2018-06-28 +26648082,Targeting glycolysis in the malaria parasite Plasmodium falciparum.,"

Unlabelled

Glycolysis is the main pathway for ATP production in the malaria parasite Plasmodium falciparum and essential for its survival. Following a sensitivity analysis of a detailed kinetic model for glycolysis in the parasite, the glucose transport reaction was identified as the step whose activity needed to be inhibited to the least extent to result in a 50% reduction in glycolytic flux. In a subsequent inhibitor titration with cytochalasin B, we confirmed the model analysis experimentally and measured a flux control coefficient of 0.3 for the glucose transporter. In addition to the glucose transporter, the glucokinase and phosphofructokinase had high flux control coefficients, while for the ATPase a small negative flux control coefficient was predicted. In a broader comparative analysis of glycolytic models, we identified a weakness in the P. falciparum pathway design with respect to stability towards perturbations in the ATP demand.

Database

The mathematical model described here has been submitted to the JWS Online Cellular Systems Modelling Database and can be accessed at http://jjj.bio.vu.nl/database/vanniekerk1. The SEEK-study including the experimental data set is available at DOI 10.15490/seek.1.

Investigation

56 (http://dx.doi.org/10.15490/seek.1.

Investigation

56).",2016-01-04 +30319409,"Effectiveness of a Patient-Tailored, Pharmacist-Led Intervention Program to Enhance Adherence to Antihypertensive Medication: The CATI Study.","Introduction: Non-adherence to medication is a complex health care problem. In spite of substantial efforts, up till now little progress has been made to effectively tackle the problem with adherence-enhancing interventions. The aim of this study was to investigate the effectiveness of a patient-tailored, pharmacist-led and theory-driven intervention program aimed to enhance self-reported adherence to antihypertensive medication. Materials and Methods: A parallel-group randomized controlled trial in 20 community pharmacies with nine months follow-up was conducted. Patients (45-75 years) using antihypertensive medication and considered non-adherent based on both pharmacy dispensing data and a self-report questionnaire were eligible to participate. The intervention program consisted of two consultations with the pharmacist to identify participants' barriers to adhere to medication and to counsel participants in overcoming these barriers. The primary outcome was self-reported medication adherence. Secondary outcomes were beliefs about medicines, illness perceptions, quality of life and blood pressure. Mixed-model and generalized estimating equation (GEE) analyses were used to assess overall effects of the intervention program and effects per time point. Results: 170 patients were included. No significant differences between intervention and control groups were found in self-reported adherence, quality of life, illness perceptions, beliefs about medicines (concern scale), and blood pressure. After nine months, intervention participants had significantly stronger beliefs about the necessity of using their medicines as compared to control participants (mean difference 1.25 [95% CI: 0.27 to 2.24], p = 0.012). Discussion: We do not recommend to implement the intervention program in the current form for this study population. Future studies should focus on how to select eligible patient groups with appropriate measures in order to effectively target adherence-enhancing interventions. Trial Register: NTR5017 http://www.trialregister.nl/trialreg/admin/rctview.asp?TC=5017.",2018-09-26 +25177485,KEGGscape: a Cytoscape app for pathway data integration.,"In this paper, we present KEGGscape a pathway data integration and visualization app for Cytoscape ( http://apps.cytoscape.org/apps/keggscape). KEGG is a comprehensive public biological database that contains large collection of human curated pathways. KEGGscape utilizes the database to reproduce the corresponding hand-drawn pathway diagrams with as much detail as possible in Cytoscape. Further, it allows users to import pathway data sets to visualize biologist-friendly diagrams using the Cytoscape core visualization function (Visual Style) and the ability to perform pathway analysis with a variety of Cytoscape apps. From the analyzed data, users can create complex and interactive visualizations which cannot be done in the KEGG PATHWAY web application. Experimental data with Affymetrix E. coli chips are used as an example to demonstrate how users can integrate pathways, annotations, and experimental data sets to create complex visualizations that clarify biological systems using KEGGscape and other Cytoscape apps.",2014-07-01 +26157797,Bioinformatics and Moonlighting Proteins.,"Multitasking or moonlighting is the capability of some proteins to execute two or more biochemical functions. Usually, moonlighting proteins are experimentally revealed by serendipity. For this reason, it would be helpful that Bioinformatics could predict this multifunctionality, especially because of the large amounts of sequences from genome projects. In the present work, we analyze and describe several approaches that use sequences, structures, interactomics, and current bioinformatics algorithms and programs to try to overcome this problem. Among these approaches are (a) remote homology searches using Psi-Blast, (b) detection of functional motifs and domains, (c) analysis of data from protein-protein interaction databases (PPIs), (d) match the query protein sequence to 3D databases (i.e., algorithms as PISITE), and (e) mutation correlation analysis between amino acids by algorithms as MISTIC. Programs designed to identify functional motif/domains detect mainly the canonical function but usually fail in the detection of the moonlighting one, Pfam and ProDom being the best methods. Remote homology search by Psi-Blast combined with data from interactomics databases (PPIs) has the best performance. Structural information and mutation correlation analysis can help us to map the functional sites. Mutation correlation analysis can only be used in very specific situations - it requires the existence of multialigned family protein sequences - but can suggest how the evolutionary process of second function acquisition took place. The multitasking protein database MultitaskProtDB (http://wallace.uab.es/multitask/), previously published by our group, has been used as a benchmark for the all of the analyses.",2015-06-24 +29068161,"DCC mutation update: Congenital mirror movements, isolated agenesis of the corpus callosum, and developmental split brain syndrome.","The deleted in colorectal cancer (DCC) gene encodes the netrin-1 (NTN1) receptor DCC, a transmembrane protein required for the guidance of commissural axons. Germline DCC mutations disrupt the development of predominantly commissural tracts in the central nervous system (CNS) and cause a spectrum of neurological disorders. Monoallelic, missense, and predicted loss-of-function DCC mutations cause congenital mirror movements, isolated agenesis of the corpus callosum (ACC), or both. Biallelic, predicted loss-of-function DCC mutations cause developmental split brain syndrome (DSBS). Although the underlying molecular mechanisms leading to disease remain poorly understood, they are thought to stem from reduced or perturbed NTN1 signaling. Here, we review the 26 reported DCC mutations associated with abnormal CNS development in humans, including 14 missense and 12 predicted loss-of-function mutations, and discuss their associated clinical characteristics and diagnostic features. We provide an update on the observed genotype-phenotype relationships of congenital mirror movements, isolated ACC and DSBS, and correlate this to our current understanding of the biological function of DCC in the development of the CNS. All mutations and their associated phenotypes were deposited into a locus-specific LOVD (https://databases.lovd.nl/shared/genes/DCC).",2017-11-11 +28716166,Invited review: Helping dairy farmers to improve economic performance utilizing data-driving decision support tools.,"The objective of this review paper is to describe the development and application of a suite of more than 40 computerized dairy farm decision support tools contained at the University of Wisconsin-Madison (UW) Dairy Management website http://DairyMGT.info. These data-driven decision support tools are aimed to help dairy farmers improve their decision-making, environmental stewardship and economic performance. Dairy farm systems are highly dynamic in which changing market conditions and prices, evolving policies and environmental restrictions together with every time more variable climate conditions determine performance. Dairy farm systems are also highly integrated with heavily interrelated components such as the dairy herd, soils, crops, weather and management. Under these premises, it is critical to evaluate a dairy farm following a dynamic integrated system approach. For this approach, it is crucial to use meaningful data records, which are every time more available. These data records should be used within decision support tools for optimal decision-making and economic performance. Decision support tools in the UW-Dairy Management website (http://DairyMGT.info) had been developed using combination and adaptation of multiple methods together with empirical techniques always with the primary goal for these tools to be: (1) highly user-friendly, (2) using the latest software and computer technologies, (3) farm and user specific, (4) grounded on the best scientific information available, (5) remaining relevant throughout time and (6) providing fast, concrete and simple answers to complex farmers' questions. DairyMGT.info is a translational innovative research website in various areas of dairy farm management that include nutrition, reproduction, calf and heifer management, replacement, price risk and environment. This paper discusses the development and application of 20 selected (http://DairyMGT.info) decision support tools.",2017-07-18 +24952371,Characteristics of international websites with information on developmental disabilities.,"The Internet often serves as a primary resource for individuals seeking health-related information, and a large and growing number of websites contain information related to developmental disabilities. This paper presents the results of an international evaluation of the characteristics and content of the top 10 ranked results (i.e., not including sponsored results - pay-per-click) returned when one of five terms related to developmental disabilities (i.e., ADHD, autism, down syndrome, learning disability, intellectual disability) was entered into one of six country specific Google online search engines (i.e., Australia (https://www.google.com.au), Canada (https://www.google.ca), Ireland (https://www.google.ie), New Zealand (https://www.google.co.nz), the United Kingdom (https://www.google.co.uk), and the United States (https://www.google.com)) on October 22, 2013. Collectively, we found that international consumers of websites related to developmental disabilities will encounter different websites with differing content and terminology, and should be critical consumers to ensure they locate the information they are seeking.",2014-06-19 +25686636,Characterizing spatial distributions of astrocytes in the mammalian retina.,"

Motivation

In addition to being involved in retinal vascular growth, astrocytes play an important role in diseases and injuries, such as glaucomatous neuro-degeneration and retinal detachment. Studying astrocytes, their morphological cell characteristics and their spatial relationships to the surrounding vasculature in the retina may elucidate their role in these conditions.

Results

Our results show that in normal healthy retinas, the distribution of observed astrocyte cells does not follow a uniform distribution. The cells are significantly more densely packed around the blood vessels than a uniform distribution would predict. We also show that compared with the distribution of all cells, large cells are more dense in the vicinity of veins and toward the optic nerve head whereas smaller cells are often more dense in the vicinity of arteries. We hypothesize that since veinal astrocytes are known to transport toxic metabolic waste away from neurons they may be more critical than arterial astrocytes and therefore require larger cell bodies to process waste more efficiently.

Availability and implementation

A 1/8th size down-sampled version of the seven retinal image mosaics described in this article can be found on BISQUE (Kvilekval et al., 2010) at http://bisque.ece.ucsb.edu/client_service/view?resource=http://bisque.ece.ucsb.edu/data_service/dataset/6566968.",2015-02-16 +26302176,Finding mouse models of human lymphomas and leukemia's using the Jackson laboratory mouse tumor biology database.,"Many mouse models have been created to study hematopoietic cancer types. There are over thirty hematopoietic tumor types and subtypes, both human and mouse, with various origins, characteristics and clinical prognoses. Determining the specific type of hematopoietic lesion produced in a mouse model and identifying mouse models that correspond to the human subtypes of these lesions has been a continuing challenge for the scientific community. The Mouse Tumor Biology Database (MTB; http://tumor.informatics.jax.org) is designed to facilitate use of mouse models of human cancer by providing detailed histopathologic and molecular information on lymphoma subtypes, including expertly annotated, on line, whole slide scans, and providing a repository for storing information on and querying these data for specific lymphoma models.",2015-08-21 +29165996,iTop-Q: an Intelligent Tool for Top-down Proteomics Quantitation Using DYAMOND Algorithm.,"Top-down proteomics using liquid chromatogram coupled with mass spectrometry has been increasingly applied for analyzing intact proteins to study genetic variation, alternative splicing, and post-translational modifications (PTMs) of the proteins (proteoforms). However, only a few tools have been developed for charge state deconvolution, monoisotopic/average molecular weight determination and quantitation of proteoforms from LC-MS1 spectra. Though Decon2LS and MASH Suite Pro have been available to provide intraspectrum charge state deconvolution and quantitation, manual processing is still required to quantify proteoforms across multiple MS1 spectra. An automated tool for interspectrum quantitation is a pressing need. Thus, in this paper, we present a user-friendly tool, called iTop-Q (intelligent Top-down Proteomics Quantitation), that automatically performs large-scale proteoform quantitation based on interspectrum abundance in top-down proteomics. Instead of utilizing single spectrum for proteoform quantitation, iTop-Q constructs extracted ion chromatograms (XICs) of possible proteoform peaks across adjacent MS1 spectra to calculate abundances for accurate quantitation. Notably, iTop-Q is implemented with a newly proposed algorithm, called DYAMOND, using dynamic programming for charge state deconvolution. In addition, iTop-Q performs proteoform alignment to support quantitation analysis across replicates/samples. The performance evaluations on an in-house standard data set and a public large-scale yeast lysate data set show that iTop-Q achieves highly accurate quantitation, more consistent quantitation than using intraspectrum quantitation. Furthermore, the DYAMOND algorithm is suitable for high charge state deconvolution and can distinguish shared peaks in coeluting proteoforms. iTop-Q is publicly available for download at http://ms.iis.sinica.edu.tw/COmics/Software_iTop-Q .",2017-12-06 +29844124,Twist1 Regulates Vimentin through Cul2 Circular RNA to Promote EMT in Hepatocellular Carcinoma.,"Twist is a critical epithelial-mesenchymal transition (EMT)-inducing transcription factor that increases expression of vimentin. How Twist1 regulates this expression remains unclear. Here, we report that Twist1 regulates Cullin2 (Cul2) circular RNA to increase expression of vimentin in EMT. Twist1 bound the Cul2 promoter to activate its transcription and to selectively promote expression of Cul2 circular RNA (circ-10720), but not mRNA. circ-10720 positively correlated with Twist1, tumor malignance, and poor prognosis in hepatocellular carcinoma (HCC). Twist1 promoted vimentin expression by increasing levels of circ-10720, which can absorb miRNAs that target vimentin. circ-10720 knockdown counteracted the tumor-promoting activity of Twist1 in vitro and in patient-derived xenograft and diethylnitrosamine-induced TetOn-Twist1 transgenic mouse HCC models. These data unveil a mechanism by which Twist1 regulates vimentin during EMT. They also provide potential therapeutic targets for HCC treatment and provide new insight for circular RNA (circRNA)-based diagnostic and therapeutic strategies.Significance: A circRNA-based mechanism drives Twist1-mediated regulation of vimentin during EMT and provides potential therapeutic targets for treatment of HCC.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/15/4150/F1.large.jpg Cancer Res; 78(15); 4150-62. ©2018 AACR.",2018-05-29 +28007037,Block network mapping approach to quantitative trait locus analysis.,"

Background

Advances in experimental biology have enabled the collection of enormous troves of data on genomic variation in living organisms. The interpretation of this data to extract actionable information is one of the keys to developing novel therapeutic strategies to treat complex diseases. Network organization of biological data overcomes measurement noise in several biological contexts. Does a network approach, combining information about the linear organization of genomic markers with correlative information on these markers in a Bayesian formulation, lead to an analytic method with higher power for detecting quantitative trait loci?

Results

Block Network Mapping, combining Similarity Network Fusion (Wang et al., NM 11:333-337, 2014) with a Bayesian locus likelihood evaluation, leads to large improvements in area under the receiver operating characteristic and power over interval mapping with expectation maximization. The method has a monotonically decreasing false discovery rate as a function of effect size, unlike interval mapping.

Conclusions

Block Network Mapping provides an alternative data-driven approach to mapping quantitative trait loci that leverages correlations in the sampled genotypes. The evaluation methodology can be combined with existing approaches such as Interval Mapping. Python scripts are available at http://lbm.niddk.nih.gov/vipulp/ . Genotype data is available at http://churchill-lab.jax.org/website/GattiDOQTL .",2016-12-22 +26836976,dbAARD & AGP: A computational pipeline for the prediction of genes associated with age related disorders.,"The atrocious behavioral and physiological shift with aging accelerate occurrence of deleterious disorders. Contemporary research is focused at uncovering the role of genetic associations in age-related disorders (ARDs). While the completion of the Human Genome Project and the HapMap project has generated huge amount of data on genetic variations; Genome-Wide Association Studies (GWAS) have identified genetic variations, essentially SNPs associated with several disorders including ARDs. However, a repository that houses all such ARD associations is lacking. The present work is aimed at filling this void. A database, dbAARD (database of Aging and Age Related Disorders) has been developed which hosts information on more than 3000 genetic variations significantly (p-value <0.05) associated with 51 ARDs. Furthermore, a machine learning based gene prediction tool AGP (Age Related Disorders Gene Prediction) has been constructed by employing rotation forest algorithm, to prioritize genes associated with ARDs. The tool achieved an overall accuracy in terms of precision 75%, recall 76%, F-measure 76% and AUC 0.85. Both the web resources have been made available online at http://genomeinformatics.dce.edu/dbAARD/ and http://genomeinformatics.dce.edu/AGP/ respectively for easy retrieval and usage by the scientific community. We believe that this work may facilitate the analysis of plethora of variants associated with ARDs and provide cues for deciphering the biology of aging.",2016-02-02 +30446783,"The 6-m timed hop test is a prognostic factor for outcomes in patients with meniscal tears treated with exercise therapy or arthroscopic partial meniscectomy: a secondary, exploratory analysis of the Odense-Oslo meniscectomy versus exercise (OMEX) trial.","

Purpose

To identify the prognostic factors for 2-year patient-reported outcomes in middle-aged patients with degenerative meniscal tears treated with exercise therapy (ET) or arthroscopic partial meniscectomy (APM).

Methods

One hundred and seven patients, with mean age 49.6 (SD 6.2) years and BMI 25.7 (SD 3.7), were included in this analysis of data from the OMEX trial ( http://www.clinicaltrials.gov NCT01002794). Linear and Poisson regression models were built to explore the associations between potential prognostic factors (patient characteristics, knee function-related and disease-related factors) and 2-year patient-reported outcomes: the Knee Injury and Osteoarthritis Outcome Score (KOOS) subscales Pain, Symptoms, ADL, Sport/Rec, QoL and 5-point Global Rating of Change scales for knee pain (GRC Pain) and function (GRC Function). Analyses were performed for the whole cohort and for the two treatment groups (n = 55 and 52) with adjustments for age, sex, BMI and baseline KOOS.

Results

For the whole cohort, a 1-s better baseline 6-m timed hop test result was associated with 3.1-7.1 points better 2-year scores for all KOOS subscales (95% CIs 1.1-5.2 to 4.1-10.1 points). A 1.61-2.80 s better test was associated with scores equivalent to previously calculated clinical relevant differences for each KOOS subscale. For the groups of patients treated with ET and APM, respectively, 2.09-3.60 s and 0.63-1.99 s better tests were associated with clinical relevant differences. For the whole cohort, a 1-s better test was associated with 26% (95% CI 15-38%) and 22% (95% CI 11-34%) higher possibility for better or much better GRC Pain and Function scores. Patients treated with ET had 17% (95% CI 2-33%) increased possibility for better or much better GRC Pain score, and patients treated with APM had 65% (95% CI 32-108%) and 70% (95% CI 38-109%) increased possibility for better or much better GRC Pain and Function scores.

Conclusions

The 6-m timed hop test result was a significant prognostic factor for 2-year patient-reported outcomes in middle-aged patients with degenerative meniscal tears, especially in those treated with APM.

Level of evidence

II.",2018-11-16 +29291020,"About miRNAs, miRNA seeds, target genes and target pathways.","miRNAs are typically repressing gene expression by binding to the 3' UTR, leading to degradation of the mRNA. This process is dominated by the eight-base seed region of the miRNA. Further, miRNAs are known not only to target genes but also to target significant parts of pathways. A logical line of thoughts is: miRNAs with similar (seed) sequence target similar sets of genes and thus similar sets of pathways. By calculating similarity scores for all 3.25 million pairs of 2,550 human miRNAs, we found that this pattern frequently holds, while we also observed exceptions. Respective results were obtained for both, predicted target genes as well as experimentally validated targets. We note that miRNAs target gene set similarity follows a bimodal distribution, pointing at a set of 282 miRNAs that seems to target genes with very high specificity. Further, we discuss miRNAs with different (seed) sequences that nonetheless regulate similar gene sets or pathways. Most intriguingly, we found miRNA pairs that regulate different gene sets but similar pathways such as miR-6886-5p and miR-3529-5p. These are jointly targeting different parts of the MAPK signaling cascade. The main goal of this study is to provide a general overview on the results, to highlight a selection of relevant results on miRNAs, miRNA seeds, target genes and target pathways and to raise awareness for artifacts in respective comparisons. The full set of information that allows to infer detailed results on each miRNA has been included in miRPathDB, the miRNA target pathway database (https://mpd.bioinf.uni-sb.de).",2017-11-09 +24682735,The eGenVar data management system--cataloguing and sharing sensitive data and metadata for the life sciences.,"Systematic data management and controlled data sharing aim at increasing reproducibility, reducing redundancy in work, and providing a way to efficiently locate complementing or contradicting information. One method of achieving this is collecting data in a central repository or in a location that is part of a federated system and providing interfaces to the data. However, certain data, such as data from biobanks or clinical studies, may, for legal and privacy reasons, often not be stored in public repositories. Instead, we describe a metadata cataloguing system and a software suite for reporting the presence of data from the life sciences domain. The system stores three types of metadata: file information, file provenance and data lineage, and content descriptions. Our software suite includes both graphical and command line interfaces that allow users to report and tag files with these different metadata types. Importantly, the files remain in their original locations with their existing access-control mechanisms in place, while our system provides descriptions of their contents and relationships. Our system and software suite thereby provide a common framework for cataloguing and sharing both public and private data. Database URL: http://bigr.medisin.ntnu.no/data/eGenVar/.",2014-03-28 +25264971,Applying differentially expressed genes from rodent models of chronic stress to research of stress-related disease: an online database.,"

Objective

To systematically collect differentially expressed genes (DEGs) from rodent models of chronic stress (CS) and apply them to research of stress-related disease. CS is an important environmental factor that may affect numerous complex diseases. Its relevant DEGs identified from rodent models provide valuable information for understanding the mechanisms underlying stress-related diseases. Currently, no suitable data tool have been developed to use such data.

Methods

We systematically searched and reviewed publications in PubMed. CS-DEGs were collected from original studies that reported gene expression statuses in rodent models of CS. CS disease overlapping genes, CS pathways and CS pathway clusters, and CS regulatory elements were analyzed on the basis of CS-DEGs. An online database was developed to store and manage curated CS-DEGs and analyzed data.

Results

A total of 2956 CS-DEGs were collected from 195 articles, among which 815 genes are shared among CS and seven stress-related diseases. Nine hundred twenty-seven CS pathway clusters were identified. Three types of CS regulatory elements are predicted for all CS genes. An online database (CS-DEGs), freely available at http://cs.psych.ac.cn, includes and presents CS-DEGs and all analyzed data.

Conclusions

CS-DEGs is the first gene database on CS research. It enables researchers to apply rodent expression data in candidate gene and pathway identification for stress-related disease study.",2014-10-01 +25586512,Mathematical modeling improves EC50 estimations from classical dose-response curves.,"

Unlabelled

The β-adrenergic response is impaired in failing hearts. When studying β-adrenergic function in vitro, the half-maximal effective concentration (EC50 ) is an important measure of ligand response. We previously measured the in vitro contraction force response of chicken heart tissue to increasing concentrations of adrenaline, and observed a decreasing response at high concentrations. The classical interpretation of such data is to assume a maximal response before the decrease, and to fit a sigmoid curve to the remaining data to determine EC50 . Instead, we have applied a mathematical modeling approach to interpret the full dose-response curve in a new way. The developed model predicts a non-steady-state caused by a short resting time between increased concentrations of agonist, which affect the dose-response characterization. Therefore, an improved estimate of EC50 may be calculated using steady-state simulations of the model. The model-based estimation of EC50 is further refined using additional time-resolved data to decrease the uncertainty of the prediction. The resulting model-based EC50 (180-525 nm) is higher than the classically interpreted EC50 (46-191 nm). Mathematical modeling thus makes it possible to re-interpret previously obtained datasets, and to make accurate estimates of EC50 even when steady-state measurements are not experimentally feasible.

Database

The mathematical models described here have been submitted to the JWS Online Cellular Systems Modelling Database, and may be accessed at http://jjj.bio.vu.nl/database/nyman.",2015-02-06 +27821047,Chromosome3D: reconstructing three-dimensional chromosomal structures from Hi-C interaction frequency data using distance geometry simulated annealing.,"

Background

Reconstructing three-dimensional structures of chromosomes is useful for visualizing their shapes in a cell and interpreting their function. In this work, we reconstruct chromosomal structures from Hi-C data by translating contact counts in Hi-C data into Euclidean distances between chromosomal regions and then satisfying these distances using a structure reconstruction method rigorously tested in the field of protein structure determination.

Results

We first evaluate the robustness of the overall reconstruction algorithm on noisy simulated data at various levels of noise by comparing with some of the state-of-the-art reconstruction methods. Then, using simulated data, we validate that Spearman's rank correlation coefficient between pairwise distances in the reconstructed chromosomal structures and the experimental chromosomal contact counts can be used to find optimum conversion rules for transforming interaction frequencies to wish distances. This strategy is then applied to real Hi-C data at chromosome level for optimal transformation of interaction frequencies to wish distances and for ranking and selecting structures. The chromosomal structures reconstructed from a real-world human Hi-C dataset by our method were validated by the known two-compartment feature of the human chromosome organization. We also show that our method is robust with respect to the change of the granularity of Hi-C data, and consistently produces similar structures at different chromosomal resolutions.

Conclusion

Chromosome3D is a robust method of reconstructing chromosome three-dimensional models using distance restraints obtained from Hi-C interaction frequency data. It is available as a web application and as an open source tool at http://sysbio.rnet.missouri.edu/chromosome3d/ .",2016-11-07 +26268340,Novel HIV-1 Integrase Inhibitor Development by Virtual Screening Based on QSAR Models.,"HIV-1 integrase (IN) plays an important role in the life cycle of HIV and is responsible for integration of the virus into the human genome. We present computational approaches used to design novel HIV-1 IN inhibitors. We created an IN inhibitor database by collecting experimental data from the literature. We developed quantitative structure-activity relationship (QSAR) models of HIV-1 IN strand transfer (ST) inhibitors using this database. The prediction accuracy of these models was estimated by external 5-fold cross-validation as well as with an additional validation set of 308 structurally distinct compounds from the publicly accessible BindingDB database. The validated models were used to screen a small combinatorial library of potential synthetic candidates to identify hits, with a subsequent docking approach applied to further filter out compounds to arrive at a small set of potential HIV-1 IN inhibitors. As result, 236 compounds with good druglikeness properties and with correct docking poses were identified as potential candidates for synthesis. One of the six compounds finally chosen for synthesis was experimentally confirmed to inhibit the ST reaction with an IC50(ST) of 37 µM. The IN inhibitor database is available for download from http://cactus.nci.nih.gov/download/iidb/.",2016-01-01 +21649883,"A comprehensive gene expression atlas of sex- and tissue-specificity in the malaria vector, Anopheles gambiae.","

Background

The mosquito, Anopheles gambiae, is the primary vector of human malaria, a disease responsible for millions of deaths each year. To improve strategies for controlling transmission of the causative parasite, Plasmodium falciparum, we require a thorough understanding of the developmental mechanisms, physiological processes and evolutionary pressures affecting life-history traits in the mosquito. Identifying genes expressed in particular tissues or involved in specific biological processes is an essential part of this process.

Results

In this study, we present transcription profiles for ~82% of annotated Anopheles genes in dissected adult male and female tissues. The sensitivity afforded by examining dissected tissues found gene activity in an additional 20% of the genome that is undetected when using whole-animal samples. The somatic and reproductive tissues we examined each displayed patterns of sexually dimorphic and tissue-specific expression. By comparing expression profiles with Drosophila melanogaster we also assessed which genes are well conserved within the Diptera versus those that are more recently evolved.

Conclusions

Our expression atlas and associated publicly available database, the MozAtlas (http://www.tissue-atlas.org), provides information on the relative strength and specificity of gene expression in several somatic and reproductive tissues, isolated from a single strain grown under uniform conditions. The data will serve as a reference for other mosquito researchers by providing a simple method for identifying where genes are expressed in the adult, however, in addition our resource will also provide insights into the evolutionary diversity associated with gene expression levels among species.",2011-06-07 +,"Automation of Bioinformatics Workflows using CloVR, a Cloud Virtual Resource","Exponential growth of biological data, mainly due to revolutionary developments in NGS technologies in past couple of years, created a multitude of challenges in downstream data analysis using bioinformatics approaches. To handle such tsunami of data, bioinformatics analysis must be carried out in an automated and parallel fashion. A successful analysis often requires more than a few computational steps and bootstrapping these individual steps (scripts) into components and the components into pipelines certainly makes bioinformatics a reproducible and manageable segment of scientific research. CloVR (http://clovr.org) is one such flexible framework that facilitates the abstraction of bioinformatics workflows into executable pipelines. CloVR comes packaged with various built-in bioinformatics pipelines that can make use of multicore processing power when run on servers and/or cloud. CloVR is amenable to build custom pipelines based on individual laboratory requirements. CloVR is available as a single executable virtual image file that comes bundled with pre-installed and pre-configured bioinformatics tools and packages and thus circumvents the cumbersome installation difficulties. CloVR is highly portable and can be run on traditional desktop/laptop computers, central servers and cloud compute farms. In conclusion, CloVR provides built-in automated analysis pipelines for microbial genomics with a scope to develop and integrate custom-workflows that make use of parallel processing power when run on compute clusters, there by addressing the bioinformatics challenges with NGS data.",2013-05-01 +30032027,InterPreT cancer survival: A dynamic web interactive prediction cancer survival tool for health-care professionals and cancer epidemiologists.,"

Background

There are a variety of ways for quantifying cancer survival with each measure having advantages and disadvantages. Distinguishing these measures and how they should be interpreted has led to confusion among scientists, the media, health care professionals and patients. This motivates the development of tools to facilitate communication and interpretation of these statistics.

Methods

""InterPreT Cancer Survival"" is a newly developed, publicly available, online interactive cancer survival tool targeted towards health-care professionals and epidemiologists (http://interpret.le.ac.uk). It focuses on the correct interpretation of commonly reported cancer survival measures facilitated through the use of dynamic interactive graphics. Statistics presented are based on parameter estimates obtained from flexible parametric relative survival models using large population-based English registry data containing information on survival across 6 cancer sites; Breast, Colon, Rectum, Stomach, Melanoma and Lung.

Results

Through interactivity, the tool improves understanding of various measures and how survival or mortality may vary by age and sex. Routine measures of cancer survival are reported, however, individualised estimates using crude probabilities are advocated, which is more appropriate for patients or health care professionals. The results are presented in various interactive formats facilitating understanding of individual risk and differences between various measures.

Conclusions

""InterPreT Cancer Survival"" is presented as an educational tool which engages the user through interactive features to improve the understanding of commonly reported cancer survival statistics. The tool has received positive feedback from a Cancer Research UK patient sounding board and there are further plans to incorporate more disease characteristics, e.g. stage.",2018-07-20 +26669964,CircInteractome: A web tool for exploring circular RNAs and their interacting proteins and microRNAs.,"Circular RNAs (circRNAs) are widely expressed in animal cells, but their biogenesis and functions are poorly understood. CircRNAs have been shown to act as sponges for miRNAs and may also potentially sponge RNA-binding proteins (RBPs) and are thus predicted to function as robust posttranscriptional regulators of gene expression. The joint analysis of large-scale transcriptome data coupled with computational analyses represents a powerful approach to elucidate possible biological roles of ribonucleoprotein (RNP) complexes. Here, we present a new web tool, CircInteractome (circRNA interactome), for mapping RBP- and miRNA-binding sites on human circRNAs. CircInteractome searches public circRNA, miRNA, and RBP databases to provide bioinformatic analyses of binding sites on circRNAs and additionally analyzes miRNA and RBP sites on junction and junction-flanking sequences. CircInteractome also allows the user the ability to (1) identify potential circRNAs which can act as RBP sponges, (2) design junction-spanning primers for specific detection of circRNAs of interest, (3) design siRNAs for circRNA silencing, and (4) identify potential internal ribosomal entry sites (IRES). In sum, the web tool CircInteractome, freely accessible at http://circinteractome.nia.nih.gov, facilitates the analysis of circRNAs and circRNP biology.",2016-01-01 +26363021,LncReg: a reference resource for lncRNA-associated regulatory networks. ,"Long non-coding RNAs (lncRNAs) are critical in the regulation of various biological processes. In recent years, plethora of lncRNAs have been identified in mammalian genomes through different approaches, and the researchers are constantly reporting the regulatory roles of these lncRNAs, which leads to complexity of literature about particular lncRNAs. Therefore, for the convenience of the researchers, we collected regulatory relationships of the lncRNAs and built a database called 'LncReg'. This database is developed by collecting 1081 validated lncRNA-associated regulatory entries, including 258 non-redundant lncRNAs and 571 non-redundant genes. With regulatory relationships information, LncReg can provide overall perspectives of regulatory networks of lncRNAs and comprehensive data for bioinformatics research, which is useful for understanding the functional roles of lncRNAs. Database URL: http://bioinformatics.ustc.edu.cn/lncreg/.",2015-09-10 +25024351,yStreX: yeast stress expression database. ,"Over the past decade genome-wide expression analyses have been often used to study how expression of genes changes in response to various environmental stresses. Many of these studies (such as effects of oxygen concentration, temperature stress, low pH stress, osmotic stress, depletion or limitation of nutrients, addition of different chemical compounds, etc.) have been conducted in the unicellular Eukaryal model, yeast Saccharomyces cerevisiae. However, the lack of a unifying or integrated, bioinformatics platform that would permit efficient and rapid use of all these existing data remain an important issue. To facilitate research by exploiting existing transcription data in the field of yeast physiology, we have developed the yStreX database. It is an online repository of analyzed gene expression data from curated data sets from different studies that capture genome-wide transcriptional changes in response to diverse environmental transitions. The first aim of this online database is to facilitate comparison of cross-platform and cross-laboratory gene expression data. Additionally, we performed different expression analyses, meta-analyses and gene set enrichment analyses; and the results are also deposited in this database. Lastly, we constructed a user-friendly Web interface with interactive visualization to provide intuitive access and to display the queried data for users with no background in bioinformatics. Database URL: http://www.ystrexdb.com.",2014-07-14 +29569316,Improved prediction of fungal effector proteins from secretomes with EffectorP 2.0.,"Plant-pathogenic fungi secrete effector proteins to facilitate infection. We describe extensive improvements to EffectorP, the first machine learning classifier for fungal effector prediction. EffectorP 2.0 is now trained on a larger set of effectors and utilizes a different approach based on an ensemble of classifiers trained on different subsets of negative data, offering different views on classification. EffectorP 2.0 achieves an accuracy of 89%, compared with 82% for EffectorP 1.0 and 59.8% for a small size classifier. Important features for effector prediction appear to be protein size, protein net charge as well as the amino acids serine and cysteine. EffectorP 2.0 decreases the number of predicted effectors in secretomes of fungal plant symbionts and saprophytes by 40% when compared with EffectorP 1.0. However, EffectorP 1.0 retains value, and combining EffectorP 1.0 and 2.0 results in a stringent classifier with a low false positive rate of 9%. EffectorP 2.0 predicts significant enrichments of effectors in 12 of 13 sets of infection-induced proteins from diverse fungal pathogens, whereas a small cysteine-rich classifier detects enrichment in only seven of 13. EffectorP 2.0 will fast track the prioritization of high-confidence effector candidates for functional validation and aid in improving our understanding of effector biology. EffectorP 2.0 is available at http://effectorp.csiro.au.",2018-05-11 +26612867,The Dfam database of repetitive DNA families.,"Repetitive DNA, especially that due to transposable elements (TEs), makes up a large fraction of many genomes. Dfam is an open access database of families of repetitive DNA elements, in which each family is represented by a multiple sequence alignment and a profile hidden Markov model (HMM). The initial release of Dfam, featured in the 2013 NAR Database Issue, contained 1143 families of repetitive elements found in humans, and was used to produce more than 100 Mb of additional annotation of TE-derived regions in the human genome, with improved speed. Here, we describe recent advances, most notably expansion to 4150 total families including a comprehensive set of known repeat families from four new organisms (mouse, zebrafish, fly and nematode). We describe improvements to coverage, and to our methods for identifying and reducing false annotation. We also describe updates to the website interface. The Dfam website has moved to http://dfam.org. Seed alignments, profile HMMs, hit lists and other underlying data are available for download.",2015-11-26 +29312575,CIPPN: computational identification of protein pupylation sites by using neural network.,"Recently, experiments revealed the pupylation to be a signal for the selective regulation of proteins in several serious human diseases. As one of the most significant post translational modification in the field of biology and disease, pupylation has the ability to playing the key role in the regulation various diseases' biological processes. Meanwhile, effectively identification such type modification will be helpful for proteins to perform their biological functions and contribute to understanding the molecular mechanism, which is the foundation of drug design. The existing algorithms of identification such types of modified sites often have some defects, such as low accuracy and time-consuming. In this research, the pupylation sites' identification model, CIPPN, demonstrates better performance than other existing approaches in this field. The proposed predictor achieves Acc value of 89.12 and Mcc value of 0.7949 in 10-fold cross-validation tests in the Pupdb Database (http://cwtung.kmu.edu.tw/pupdb). Significantly, such algorithm not only investigates the sequential, structural and evolutionary hallmarks around pupylation sites but also compares the differences of pupylation from the environmental, conservative and functional characterization of substrates. Therefore, the proposed feature description approach and algorithm results prove to be useful for further experimental investigation of such modification's identification.",2017-11-06 +25693925,Construction and validation of a detailed kinetic model of glycolysis in Plasmodium falciparum.,"The enzymes in the Embden-Meyerhof-Parnas pathway of Plasmodium falciparum trophozoites were kinetically characterized and their integrated activities analyzed in a mathematical model. For validation of the model, we compared model predictions for steady-state fluxes and metabolite concentrations of the hexose phosphates with experimental values for intact parasites. The model, which is completely based on kinetic parameters that were measured for the individual enzymes, gives an accurate prediction of the steady-state fluxes and intermediate concentrations. This is the first detailed kinetic model for glucose metabolism in P. falciparum, one of the most prolific malaria-causing protozoa, and the high predictive power of the model makes it a strong tool for future drug target identification studies. The modelling workflow is transparent and reproducible, and completely documented in the SEEK platform, where all experimental data and model files are available for download.The mathematical models described in the present study have been submitted to the JWS Online Cellular Systems Modelling Database (http://jjj.bio.vu.nl/database/penkler). The investigation and complete experimental data set is available on SEEK (10.15490/seek.1.56).",2015-03-23 +23813539,A sense inventory for clinical abbreviations and acronyms created using clinical notes and medical dictionary resources.,"

Objective

To create a sense inventory of abbreviations and acronyms from clinical texts.

Methods

The most frequently occurring abbreviations and acronyms from 352,267 dictated clinical notes were used to create a clinical sense inventory. Senses of each abbreviation and acronym were manually annotated from 500 random instances and lexically matched with long forms within the Unified Medical Language System (UMLS V.2011AB), Another Database of Abbreviations in Medline (ADAM), and Stedman's Dictionary, Medical Abbreviations, Acronyms & Symbols, 4th edition (Stedman's). Redundant long forms were merged after they were lexically normalized using Lexical Variant Generation (LVG).

Results

The clinical sense inventory was found to have skewed sense distributions, practice-specific senses, and incorrect uses. Of 440 abbreviations and acronyms analyzed in this study, 949 long forms were identified in clinical notes. This set was mapped to 17,359, 5233, and 4879 long forms in UMLS, ADAM, and Stedman's, respectively. After merging long forms, only 2.3% matched across all medical resources. The UMLS, ADAM, and Stedman's covered 5.7%, 8.4%, and 11% of the merged clinical long forms, respectively. The sense inventory of clinical abbreviations and acronyms and anonymized datasets generated from this study are available for public use at http://www.bmhi.umn.edu/ihi/research/nlpie/resources/index.htm ('Sense Inventories', website).

Conclusions

Clinical sense inventories of abbreviations and acronyms created using clinical notes and medical dictionary resources demonstrate challenges with term coverage and resource integration. Further work is needed to help with standardizing abbreviations and acronyms in clinical care and biomedicine to facilitate automated processes such as text-mining and information extraction.",2013-06-27 +27796840,An IoT-cloud Based Wearable ECG Monitoring System for Smart Healthcare.,"Public healthcare has been paid an increasing attention given the exponential growth human population and medical expenses. It is well known that an effective health monitoring system can detect abnormalities of health conditions in time and make diagnoses according to the gleaned data. As a vital approach to diagnose heart diseases, ECG monitoring is widely studied and applied. However, nearly all existing portable ECG monitoring systems cannot work without a mobile application, which is responsible for data collection and display. In this paper, we propose a new method for ECG monitoring based on Internet-of-Things (IoT) techniques. ECG data are gathered using a wearable monitoring node and are transmitted directly to the IoT cloud using Wi-Fi. Both the HTTP and MQTT protocols are employed in the IoT cloud in order to provide visual and timely ECG data to users. Nearly all smart terminals with a web browser can acquire ECG data conveniently, which has greatly alleviated the cross-platform issue. Experiments are carried out on healthy volunteers in order to verify the reliability of the entire system. Experimental results reveal that the proposed system is reliable in collecting and displaying real-time ECG data, which can aid in the primary diagnosis of certain heart diseases.",2016-10-29 +29668844,DeepEfflux: a 2D convolutional neural network model for identifying families of efflux proteins in transporters.,"

Motivation

Efflux protein plays a key role in pumping xenobiotics out of the cells. The prediction of efflux family proteins involved in transport process of compounds is crucial for understanding family structures, functions and energy dependencies. Many methods have been proposed to classify efflux pump transporters without considerations of any pump specific of efflux protein families. In other words, efflux proteins protect cells from extrusion of foreign chemicals. Moreover, almost all efflux protein families have the same structure based on the analysis of significant motifs. The motif sequences consisting of the same amount of residues will have high degrees of residue similarity and thus will affect the classification process. Consequently, it is challenging but vital to recognize the structures and determine energy dependencies of efflux protein families. In order to efficiently identify efflux protein families with considering about pump specific, we developed a 2 D convolutional neural network (2 D CNN) model called DeepEfflux. DeepEfflux tried to capture the motifs of sequences around hidden target residues to use as hidden features of families. In addition, the 2 D CNN model uses a position-specific scoring matrix (PSSM) as an input. Three different datasets, each for one family of efflux protein, was fed into DeepEfflux, and then a 5-fold cross validation approach was used to evaluate the training performance.

Results

The model evaluation results show that DeepEfflux outperforms traditional machine learning algorithms. Furthermore, the accuracy of 96.02%, 94.89% and 90.34% for classes A, B and C, respectively, in the independent test results show that our model can perform well and can be used as a reliable tool for identifying families of efflux proteins in transporters.

Availability and implementation

The online version of deepefflux is available at http://deepefflux.irit.fr. The source code of deepefflux is available both on the deepefflux website and at http://140.138.155.216/deepefflux/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-09-01 +23046449,IPAD: the Integrated Pathway Analysis Database for Systematic Enrichment Analysis.,"

Background

Next-Generation Sequencing (NGS) technologies and Genome-Wide Association Studies (GWAS) generate millions of reads and hundreds of datasets, and there is an urgent need for a better way to accurately interpret and distill such large amounts of data. Extensive pathway and network analysis allow for the discovery of highly significant pathways from a set of disease vs. healthy samples in the NGS and GWAS. Knowledge of activation of these processes will lead to elucidation of the complex biological pathways affected by drug treatment, to patient stratification studies of new and existing drug treatments, and to understanding the underlying anti-cancer drug effects. There are approximately 141 biological human pathway resources as of Jan 2012 according to the Pathguide database. However, most currently available resources do not contain disease, drug or organ specificity information such as disease-pathway, drug-pathway, and organ-pathway associations. Systematically integrating pathway, disease, drug and organ specificity together becomes increasingly crucial for understanding the interrelationships between signaling, metabolic and regulatory pathway, drug action, disease susceptibility, and organ specificity from high-throughput omics data (genomics, transcriptomics, proteomics and metabolomics).

Results

We designed the Integrated Pathway Analysis Database for Systematic Enrichment Analysis (IPAD, http://bioinfo.hsc.unt.edu/ipad), defining inter-association between pathway, disease, drug and organ specificity, based on six criteria: 1) comprehensive pathway coverage; 2) gene/protein to pathway/disease/drug/organ association; 3) inter-association between pathway, disease, drug, and organ; 4) multiple and quantitative measurement of enrichment and inter-association; 5) assessment of enrichment and inter-association analysis with the context of the existing biological knowledge and a ""gold standard"" constructed from reputable and reliable sources; and 6) cross-linking of multiple available data sources.IPAD is a comprehensive database covering about 22,498 genes, 25,469 proteins, 1956 pathways, 6704 diseases, 5615 drugs, and 52 organs integrated from databases including the BioCarta, KEGG, NCI-Nature curated, Reactome, CTD, PharmGKB, DrugBank, PharmGKB, and HOMER. The database has a web-based user interface that allows users to perform enrichment analysis from genes/proteins/molecules and inter-association analysis from a pathway, disease, drug, and organ.Moreover, the quality of the database was validated with the context of the existing biological knowledge and a ""gold standard"" constructed from reputable and reliable sources. Two case studies were also presented to demonstrate: 1) self-validation of enrichment analysis and inter-association analysis on brain-specific markers, and 2) identification of previously undiscovered components by the enrichment analysis from a prostate cancer study.

Conclusions

IPAD is a new resource for analyzing, identifying, and validating pathway, disease, drug, organ specificity and their inter-associations. The statistical method we developed for enrichment and similarity measurement and the two criteria we described for setting the threshold parameters can be extended to other enrichment applications. Enriched pathways, diseases, drugs, organs and their inter-associations can be searched, displayed, and downloaded from our online user interface. The current IPAD database can help users address a wide range of biological pathway related, disease susceptibility related, drug target related and organ specificity related questions in human disease studies.",2012-09-11 +27672114,Reconstructing the Backbone of the Saccharomycotina Yeast Phylogeny Using Genome-Scale Data.,"Understanding the phylogenetic relationships among the yeasts of the subphylum Saccharomycotina is a prerequisite for understanding the evolution of their metabolisms and ecological lifestyles. In the last two decades, the use of rDNA and multilocus data sets has greatly advanced our understanding of the yeast phylogeny, but many deep relationships remain unsupported. In contrast, phylogenomic analyses have involved relatively few taxa and lineages that were often selected with limited considerations for covering the breadth of yeast biodiversity. Here we used genome sequence data from 86 publicly available yeast genomes representing nine of the 11 known major lineages and 10 nonyeast fungal outgroups to generate a 1233-gene, 96-taxon data matrix. Species phylogenies reconstructed using two different methods (concatenation and coalescence) and two data matrices (amino acids or the first two codon positions) yielded identical and highly supported relationships between the nine major lineages. Aside from the lineage comprised by the family Pichiaceae, all other lineages were monophyletic. Most interrelationships among yeast species were robust across the two methods and data matrices. However, eight of the 93 internodes conflicted between analyses or data sets, including the placements of: the clade defined by species that have reassigned the CUG codon to encode serine, instead of leucine; the clade defined by a whole genome duplication; and the species Ascoidea rubescens These phylogenomic analyses provide a robust roadmap for future comparative work across the yeast subphylum in the disciplines of taxonomy, molecular genetics, evolutionary biology, ecology, and biotechnology. To further this end, we have also provided a BLAST server to query the 86 Saccharomycotina genomes, which can be found at http://y1000plus.org/blast.",2016-12-07 +,78 Asthma Admission Rates in Germany: An Analysis of the Nationwide DRG-Statistic of the Year 2009,"

Background

Within the OECD Health Care Quality Indicators (HCQI) Project up to 21 countries participated in calculations of 6 indicators on care for chronic conditions. Those so-called Health Promotion, Prevention and Primary Care Indicators originally had been introduced by the US Agency for Healthcare Research and Quality and rely on the principal diagnoses of an adult hospitalization stored in a hospital administrative database. 2007 age-sex standardized asthma admission rates varied considerably across the countries and ranged from 17 (Italy) to 120 (United States) admissions per 100,000 population (OECD mean: 51). It was concluded that asthma outpatient treatment was not optimal in countries reporting higher rates. Germany provided the third lowest asthma admission rate of 21 (Health at a Glance 2009 OECD Indicators. http://www.oecd.org/health/healthataglance). As data collections from various countries can differ in, e.g. coding responsibility, incentives for coding, and implementation of coding guidelines, international variations cannot exclusively be explained by differences in health system performance. This study aimed to calculate asthma admission rates separately for all 16 Federal States of Germany, assuming national comparisons are not biased by these factors.

Methods

Using the 2009 nationwide Diagnosis Related Groups statistic we calculated age-sex standardized asthma admission rates according to the OECD HCQI Data Collection Guidelines.

Results

Among all adult hospitalizations (15 years or older) we found 14,399 admissions with a principal diagnosis code of asthma. Related to the corresponding population of 70,779,623, the crude rate is 20.34 admissions per 100,000. Age and sex standardized rate is 20.20 (95% Confidence-Interval, 19.86-20.54). Among the 16 Federal States of Germany age-standardized rates ranges from 7.62 in Berlin (95% CI, 6.17-9.08) to 20.26 in North Rhine-Westphalia (95% CI, 19.13-21.39) among men and from 16.15 in Berlin (95% CI, 14.07-18.23) to 36.70 in Bremen (95% CI, 29.89-43.98) among women, respectively.

Conclusions

Prevention Quality Indicators calculated on national hospital administrative databases might be a useful tool to identify national variations of asthma admission rates reflecting areas with differences in outpatient care. Reasons for the differences found, e.g., a varying regional density of primary care providers or regional differences on asthma prevalence are in focus of further investigations.",2012-02-01 +23066107,TSGene: a web resource for tumor suppressor genes.,"Tumor suppressor genes (TSGs) are guardian genes that play important roles in controlling cell proliferation processes such as cell-cycle checkpoints and inducing apoptosis. Identification of these genes and understanding their functions are critical for further investigation of tumorigenesis. So far, many studies have identified numerous TSGs and illustrated their functions in various types of tumors or normal samples. Furthermore, accumulating evidence has shown that non-coding RNAs can act as TSGs to prevent the tumorigenesis processes. Therefore, there is a growing demand to integrate TSGs with large-scale experimental evidence (e.g. gene expression and epigenetic signatures) to provide a comprehensive resource for further investigation of TSGs and their molecular mechanisms in cancer. To achieve this goal, we first developed a comprehensive literature-based database called TSGene (tumor suppressor gene database), freely available at http://bioinfo.mc.vanderbilt.edu/TSGene/. In the current release, TSGene contains 716 human (637 protein-coding and 79 non-coding genes), 628 mouse and 567 rat TSGs curated from UniProtKB, the Tumor Associated Gene database and 5795 PubMed abstracts. Additionally, the TSGene provides detailed annotations for each TSG, such as cancer mutations, gene expressions, methylation sites, TF regulations and protein-protein interactions.",2012-10-12 +27794042,"The RCSB protein data bank: integrative view of protein, gene and 3D structural information.","The Research Collaboratory for Structural Bioinformatics Protein Data Bank (RCSB PDB, http://rcsb.org), the US data center for the global PDB archive, makes PDB data freely available to all users, from structural biologists to computational biologists and beyond. New tools and resources have been added to the RCSB PDB web portal in support of a 'Structural View of Biology.' Recent developments have improved the User experience, including the high-speed NGL Viewer that provides 3D molecular visualization in any web browser, improved support for data file download and enhanced organization of website pages for query, reporting and individual structure exploration. Structure validation information is now visible for all archival entries. PDB data have been integrated with external biological resources, including chromosomal position within the human genome; protein modifications; and metabolic pathways. PDB-101 educational materials have been reorganized into a searchable website and expanded to include new features such as the Geis Digital Archive.",2016-10-27 +26692809,"Online database for mosquito (Diptera, Culicidae) occurrence records in French Guiana.","A database providing information on mosquito specimens (Arthropoda: Diptera: Culicidae) collected in French Guiana is presented. Field collections were initiated in 2013 under the auspices of the CEnter for the study of Biodiversity in Amazonia (CEBA: http://www.labexceba.fr/en/). This study is part of an ongoing process aiming to understand the distribution of mosquitoes, including vector species, across French Guiana. Occurrences are recorded after each collecting trip in a database managed by the laboratory Evolution et Diversité Biologique (EDB), Toulouse, France. The dataset is updated monthly and is available online. Voucher specimens and their associated DNA are stored at the laboratory Ecologie des Forêts de Guyane (Ecofog), Kourou, French Guiana. The latest version of the dataset is accessible through EDB's Integrated Publication Toolkit at http://130.120.204.55:8080/ipt/resource.do?r=mosquitoes_of_french_guiana or through the Global Biodiversity Information Facility data portal at http://www.gbif.org/dataset/5a8aa2ad-261c-4f61-a98e-26dd752fe1c5 It can also be viewed through the Guyanensis platform at http://guyanensis.ups-tlse.fr.",2015-11-05 +25505092,NMRFAM-SPARKY: enhanced software for biomolecular NMR spectroscopy.,"

Unlabelled

SPARKY (Goddard and Kneller, SPARKY 3) remains the most popular software program for NMR data analysis, despite the fact that development of the package by its originators ceased in 2001. We have taken over the development of this package and describe NMRFAM-SPARKY, which implements new functions reflecting advances in the biomolecular NMR field. NMRFAM-SPARKY has been repackaged with current versions of Python and Tcl/Tk, which support new tools for NMR peak simulation and graphical assignment determination. These tools, along with chemical shift predictions from the PACSY database, greatly accelerate protein side chain assignments. NMRFAM-SPARKY supports automated data format interconversion for interfacing with a variety of web servers including, PECAN , PINE, TALOS-N, CS-Rosetta, SHIFTX2 and PONDEROSA-C/S.

Availability and implementation

The software package, along with binary and source codes, if desired, can be downloaded freely from http://pine.nmrfam.wisc.edu/download_packages.html. Instruction manuals and video tutorials can be found at http://www.nmrfam.wisc.edu/nmrfam-sparky-distribution.htm.

Contact

whlee@nmrfam.wisc.edu or markley@nmrfam.wisc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-12 +29900534,Absence of infiltrating peripheral myeloid cells in the brains of mouse models of lysosomal storage disorders.,"Approximately 70 lysosomal storage diseases are currently known, resulting from mutations in genes encoding lysosomal enzymes and membrane proteins. Defects in lysosomal enzymes that hydrolyze sphingolipids have been relatively well studied. Gaucher disease is caused by the loss of activity of glucocerebrosidase, leading to accumulation of glucosylceramide. Gaucher disease exhibits a number of subtypes, with types 2 and 3 showing significant neuropathology. Sandhoff disease results from the defective activity of β-hexosaminidase, leading to accumulation of ganglioside GM2. Niemann-Pick type C disease is primarily caused by the loss of activity of the lysosomal membrane protein, NPC1, leading to storage of cholesterol and sphingosine. All three disorders display significant neuropathology, accompanied by neuroinflammation. It is commonly assumed that neuroinflammation is the result of infiltration of monocyte-derived macrophages into the brain; for instance, cells resembling lipid-engorged macrophages ('Gaucher cells') have been observed in the brain of Gaucher disease patients. We now review the evidence that inflammatory macrophages are recruited into the brain in these diseases and then go on to provide some experimental data that, at least in the three mouse models tested, monocyte-derived macrophages do not appear to infiltrate the brain. Resident microglia, which are phenotypically distinct from infiltrating macrophages, are the only myeloid population present in significant numbers within the brain parenchyma in these authentic mouse models, even during the late symptomatic stages of disease when there is substantial neuroinflammation. OPEN SCIENCE BADGES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/. This article is part of the Special Issue ""Lysosomal Storage Disorders"".",2018-08-09 +27592709,Rail-RNA: scalable analysis of RNA-seq splicing and coverage.,"

Motivation

RNA sequencing (RNA-seq) experiments now span hundreds to thousands of samples. Current spliced alignment software is designed to analyze each sample separately. Consequently, no information is gained from analyzing multiple samples together, and it requires extra work to obtain analysis products that incorporate data from across samples.

Results

We describe Rail-RNA, a cloud-enabled spliced aligner that analyzes many samples at once. Rail-RNA eliminates redundant work across samples, making it more efficient as samples are added. For many samples, Rail-RNA is more accurate than annotation-assisted aligners. We use Rail-RNA to align 667 RNA-seq samples from the GEUVADIS project on Amazon Web Services in under 16 h for US$0.91 per sample. Rail-RNA outputs alignments in SAM/BAM format; but it also outputs (i) base-level coverage bigWigs for each sample; (ii) coverage bigWigs encoding normalized mean and median coverages at each base across samples analyzed; and (iii) exon-exon splice junctions and indels (features) in columnar formats that juxtapose coverages in samples in which a given feature is found. Supplementary outputs are ready for use with downstream packages for reproducible statistical analysis. We use Rail-RNA to identify expressed regions in the GEUVADIS samples and show that both annotated and unannotated (novel) expressed regions exhibit consistent patterns of variation across populations and with respect to known confounding variables.

Availability and implementation

Rail-RNA is open-source software available at http://rail.bio.

Contacts

anellore@gmail.com or langmea@cs.jhu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +29098485,"IDAC-Dose 2.1, an internal dosimetry program for diagnostic nuclear medicine based on the ICRP adult reference voxel phantoms.","

Background

To date, the estimated radiation-absorbed dose to organs and tissues in patients undergoing diagnostic examinations in nuclear medicine is derived via calculations based on models of the human body and the biokinetic behaviour of the radiopharmaceutical. An internal dosimetry computer program, IDAC-Dose2.1, was developed based on the International Commission on Radiological Protection (ICRP)-specific absorbed fractions and computational framework of internal dose assessment given for reference adults in ICRP Publication 133. The program uses the radionuclide decay database of ICRP Publication 107 and considers 83 different source regions irradiating 47 target tissues, defining the effective dose as presented in ICRP Publications 60 and 103. The computer program was validated against another ICRP dosimetry program, Dose and Risk Calculation (DCAL), that employs the same computational framework in evaluation of occupational and environmental intakes of radionuclides. IDAC-Dose2.1 has a sub-module for absorbed dose calculations in spherical structures of different volumes and composition; this sub-module is intended for absorbed dose estimates in radiopharmaceutical therapy. For nine specific alpha emitters, the absorbed dose contribution from their decay products is also included in the committed absorbed dose calculations.

Results

The absorbed doses and effective dose of 131I-iodide determined by IDAC-Dose2.1 were validated against the dosimetry program DCAL, showing identical results. IDAC-Dose2.1 was used to calculate absorbed doses for intravenously administered 18F-FDG and orally administered 99mTc-pertechnetate and 131I-iodide, three frequently used radiopharmaceuticals. Using the tissue weighting factors from ICRP Publication 103, the effective dose per administered activity was estimated to be 0.016 mSv/MBq for 18F-FDG, 0.014 mSv/MBq for 99mTc-pertechnetate, and 16 mSv/MBq for 131I-iodide.

Conclusions

The internal dosimetry program IDAC-Dose2.1 was developed and applied to three radiopharmaceuticals for validation against DCAL and to generate improved absorbed dose estimations for diagnostic nuclear medicine using specific absorbed fraction values of the ICRP computational voxel phantoms. The sub-module for absorbed dose calculations in spherical structures 1 mm to 9 cm in diameter and different tissue composition was included to broaden the clinical usefulness of the program. The IDAC-Dose2.1 program is free software for research and available for download at http://www.idac-dose.org .",2017-11-03 +27794555,HiLive: real-time mapping of illumina reads while sequencing.,"

Motivation

Next Generation Sequencing is increasingly used in time critical, clinical applications. While read mapping algorithms have always been optimized for speed, they follow a sequential paradigm and only start after finishing of the sequencing run and conversion of files. Since Illumina machines write intermediate output results, HiLive performs read mapping while still sequencing and thereby drastically reduces crucial overall sample analysis time, e.g. in precision medicine.

Methods

We present HiLive as a novel real time read mapper that implements a k-mer based alignment strategy. HiLive continuously reads intermediate BCL files produced by Illumina sequencers and then extends initial k-mer matches by increasingly produced data from the sequencer.

Results

We applied HiLive on real human transcriptome data to show that final read alignments are reported within few minutes after the end of a full Illumina HiSeq 1500 run, while already the necessary conversion to FASTQ files as the standard input to current read mapping methods takes roughly five times as long. Further, we show on simulated and real data that HiLive has comparable accuracy to recent read mappers.

Availability and implementation

HiLive and its source code are freely available from https://gitlab.com/SimonHTausch/HiLive .

Contact

renardB@rki.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +29092007,GRASS: semi-automated NMR-based structure elucidation of saccharides.,"Motivation:Carbohydrates play crucial roles in various biochemical processes and are useful for developing drugs and vaccines. However, in case of carbohydrates, the primary structure elucidation is usually a sophisticated task. Therefore, they remain the least structurally characterized class of biomolecules, and it hampers the progress in glycochemistry and glycobiology. Creating a usable instrument designed to assist researchers in natural carbohydrate structure determination would advance glycochemistry in biomedical and pharmaceutical applications. Results:We present GRASS (Generation, Ranking and Assignment of Saccharide Structures), a novel method for semi-automated elucidation of carbohydrate and derivative structures which uses unassigned 13C NMR spectra and information obtained from chromatography, optical, chemical and other methods. This approach is based on new methods of carbohydrate NMR simulation recently reported as the most accurate. It combines a broad diversity of supported structural features, high accuracy and performance. Availability and implementation:GRASS is implemented in a free web tool available at http://csdb.glycoscience.ru/grass.html. Contact:kapaev_roman@mail.ru or netbox@toukach.ru. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-03-01 +29989085,HBPred: a tool to identify growth hormone-binding proteins.,"Hormone-binding protein (HBP) is a kind of soluble carrier protein and can selectively and non-covalently interact with hormone. HBP plays an important role in life growth, but its function is still unclear. Correct recognition of HBPs is the first step to further study their function and understand their biological process. However, it is difficult to correctly recognize HBPs from more and more proteins through traditional biochemical experiments because of high experimental cost and long experimental period. To overcome these disadvantages, we designed a computational method for identifying HBPs accurately in the study. At first, we collected HBP data from UniProt to establish a high-quality benchmark dataset. Based on the dataset, the dipeptide composition was extracted from HBP residue sequences. In order to find out the optimal features to provide key clues for HBP identification, the analysis of various (ANOVA) was performed for feature ranking. The optimal features were selected through the incremental feature selection strategy. Subsequently, the features were inputted into support vector machine (SVM) for prediction model construction. Jackknife cross-validation results showed that 88.6% HBPs and 81.3% non-HBPs were correctly recognized, suggesting that our proposed model was powerful. This study provides a new strategy to identify HBPs. Moreover, based on the proposed model, we established a webserver called HBPred, which could be freely accessed at http://lin-group.cn/server/HBPred.",2018-05-22 +29097748,KIXBASE: A comprehensive web resource for identification and exploration of KIX domains.,"The KIX domain has emerged in the last two decades as a critical site of interaction for transcriptional assembly, regulation and gene expression. Discovered in 1994, this conserved, triple helical globular domain has been characterised in various coactivator proteins of yeast, mammals and plants, including the p300/CBP (a histone acetyl transferase), MED15 (a subunit of the mediator complex of RNA polymerase II), and RECQL5 helicases. In this work, we describe the first rigorous meta analysis of KIX domains across all forms of life, leading to the development of KIXBASE, a predictive web server and global repository for detection and analysis of KIX domains. To our knowledge, KIXBASE comprises the largest online collection of KIX sequences, enabling assessments at the level of both sequence and structure, incorporating PSIPRED and MUSTER at the backend for further annotation and quality assessment. In addition, KIXBASE provides useful information about critical aspects of KIX domains such as their intrinsic disorder, hydrophobicity profiles, functional classification and annotation based on domain architectures. KIXBASE represents a significant enrichment of the currently annotated KIX dataset, especially in the plant kingdom, thus highlighting potential targets for biochemical characterization. The KIX webserver and database are both freely available to the scientific community, at http://www.nipgr.res.in/kixbase/home.php .",2017-11-02 +25887162,Causal biological network database: a comprehensive platform of causal biological network models focused on the pulmonary and vascular systems.,"With the wealth of publications and data available, powerful and transparent computational approaches are required to represent measured data and scientific knowledge in a computable and searchable format. We developed a set of biological network models, scripted in the Biological Expression Language, that reflect causal signaling pathways across a wide range of biological processes, including cell fate, cell stress, cell proliferation, inflammation, tissue repair and angiogenesis in the pulmonary and cardiovascular context. This comprehensive collection of networks is now freely available to the scientific community in a centralized web-based repository, the Causal Biological Network database, which is composed of over 120 manually curated and well annotated biological network models and can be accessed at http://causalbionet.com. The website accesses a MongoDB, which stores all versions of the networks as JSON objects and allows users to search for genes, proteins, biological processes, small molecules and keywords in the network descriptions to retrieve biological networks of interest. The content of the networks can be visualized and browsed. Nodes and edges can be filtered and all supporting evidence for the edges can be browsed and is linked to the original articles in PubMed. Moreover, networks may be downloaded for further visualization and evaluation. Database URL: http://causalbionet.com",2015-04-17 +29875778,MU-LOC: A Machine-Learning Method for Predicting Mitochondrially Localized Proteins in Plants.,"Targeting and translocation of proteins to the appropriate subcellular compartments are crucial for cell organization and function. Newly synthesized proteins are transported to mitochondria with the assistance of complex targeting sequences containing either an N-terminal pre-sequence or a multitude of internal signals. Compared with experimental approaches, computational predictions provide an efficient way to infer subcellular localization of a protein. However, it is still challenging to predict plant mitochondrially localized proteins accurately due to various limitations. Consequently, the performance of current tools can be improved with new data and new machine-learning methods. We present MU-LOC, a novel computational approach for large-scale prediction of plant mitochondrial proteins. We collected a comprehensive dataset of plant subcellular localization, extracted features including amino acid composition, protein position weight matrix, and gene co-expression information, and trained predictors using deep neural network and support vector machine. Benchmarked on two independent datasets, MU-LOC achieved substantial improvements over six state-of-the-art tools for plant mitochondrial targeting prediction. In addition, MU-LOC has the advantage of predicting plant mitochondrial proteins either possessing or lacking N-terminal pre-sequences. We applied MU-LOC to predict candidate mitochondrial proteins for the whole proteome of Arabidopsis and potato. MU-LOC is publicly available at http://mu-loc.org.",2018-05-23 +29911924,Comprehensive search for accessory proteins encoded with archaeal and bacterial type III CRISPR-cas gene cassettes reveals 39 new cas gene families.,"A study was undertaken to identify conserved proteins that are encoded adjacent to cas gene cassettes of Type III CRISPR-Cas (Clustered Regularly Interspaced Short Palindromic Repeats - CRISPR associated) interference modules. Type III modules have been shown to target and degrade dsDNA, ssDNA and ssRNA and are frequently intertwined with cofunctional accessory genes, including genes encoding CRISPR-associated Rossman Fold (CARF) domains. Using a comparative genomics approach, and defining a Type III association score accounting for coevolution and specificity of flanking genes, we identified and classified 39 new Type III associated gene families. Most archaeal and bacterial Type III modules were seen to be flanked by several accessory genes, around half of which did not encode CARF domains and remain of unknown function. Northern blotting and interference assays in Synechocystis confirmed that one particular non-CARF accessory protein family was involved in crRNA maturation. Non-CARF accessory genes were generally diverse, encoding nuclease, helicase, protease, ATPase, transporter and transmembrane domains with some encoding no known domains. We infer that additional families of non-CARF accessory proteins remain to be found. The method employed is scalable for potential application to metagenomic data once automated pipelines for annotation of CRISPR-Cas systems have been developed. All accessory genes found in this study are presented online in a readily accessible and searchable format for researchers to audit their model organism of choice: http://accessory.crispr.dk .",2018-06-19 +27879621,Online Ratings of ASOPRS Surgeons: What Do Your Patients Really Think of You?,"

Purpose

To characterize patient evaluations of American Society of Ophthalmic Plastic and Reconstructive Surgery (ASOPRS) surgeons on a popular online physician rating website in an effort to determine which factors play a role in determining the likelihood of a patient recommending an ASOPRS surgeon to family and friends.

Methods

After obtaining approval and access from Healthgrades.com, the website database was searched for 612 U.S.-based ASOPRS members using their name as published on http://www.asoprs.org/ as of May 2015. For each surgeon, the total number of ratings and average ratings were recorded under each category. The evaluator recommendation, defined as the response to the questions of ""likelihood of recommending Dr. X to family and friends,"" constituted the main outcome measure. Variables from each surgeon were compared using unpaired t tests, with statistical significance set at p < 0.05. Correlations were analyzed using Spearman correlation (rs), with coefficients of greater than or equal to 0.40 or less than or equal to -0.40 considered significant.

Results

Five-hundred nineteen members (85%) had at least 1 rating while 222 members (36%) had 10 or more ratings. The mean number of ratings for all rated members was 11.4 (range, 1-77; standard deviation [SD] = 11.1) and mean evaluator recommendation score was 4.16 (range, 1-5; SD = 0.79). There was a strong negative correlation between total wait time and evaluator recommendation score (rs = -0.409, p < 0.001). The average number of ratings and rating scores for all categories were not significantly different when comparing male with female members. University-employed members had significantly fewer ratings (8.46; range, 1-52; SD = 9.3) compared with other members (11.9; range, 1-77; SD = 11.3) (p < 0.016). There were no differences in any other rating score when comparing those university-employed members with other members.

Conclusion

Online patient-reported evaluations of ASOPRS surgeons appear high in many categories. Long wait times correlate strongly with lower recommendation scores. Further study is required to determine how online patient reviews correlate to objective outcome measures, and how these reviews affect surgeon selection by patients.",2017-11-01 +29683130,An inventory of continental U.S. terrestrial candidate ecological restoration areas based on landscape context.,"Landscape context is an important factor in restoration ecology, but the use of landscape context for site prioritization has not been as fully developed. We used morphological image processing to identify candidate ecological restoration areas based on their proximity to existing natural vegetation. We identified 1,102,720 candidate ecological restoration areas across the continental United States. Candidate ecological restoration areas were concentrated in the Great Plains and eastern United States. We populated the database of candidate ecological restoration areas with 17 attributes related to site content and context, including factors such as soil fertility and roads (site content), and number and area of potentially conjoined vegetated regions (site context) to facilitate its use for site prioritization. We demonstrate the utility of the database in the state of North Carolina, U.S.A. for a restoration objective related to restoration of water quality (mandated by the U.S. Clean Water Act), wetlands, and forest. The database will be made publicly available on the U.S. Environmental Protection Agency's EnviroAtlas website (http://enviroatlas.epa.gov) for stakeholders interested in ecological restoration.",2017-11-01 +29381991,RNA sequencing uncovers the key microRNAs potentially contributing to sudden sensorineural hearing loss.,"This study aimed to identify miRNAs that may contribute to the pathogenesis of sudden sensorineural hearing loss (SSNHL) by RNA-seq (RNA-sequencing).RNA was extracted from SSNHL patients and healthy volunteers, respectively. Sequencing was performed on HiSeq4000 platform. After filtering, clean reads were mapped to the human reference genome hg19. Differential expression analysis of miRNAs between the SSNHL samples and the normal samples was performed using DEseq to identify differentially expressed microRNAs (DEMs). The target genes of the DEMs were predicted using the online tool miRWalk, which were then mapped to DAVID (https://david.ncifcrf.gov/) for functional annotation based on GO database and for pathway enrichment analysis based on KEGG. Finally, a miRNA-target-protein-protein interaction (PPIs) network was constructed using the DEMs and their target genes with interaction.Differential expression analysis reveals 24 DEMs between the SSNHL group and control group. A total of 1083 target genes were predicted. GO functional annotation analysis reveals that the target genes in the top 10 terms are mainly related to the development of salivary glands, neurotransmission, dendritic development, and other processes. KEGG pathway enrichment analysis reveals that the target genes were functionally enriched in pathways arachidonic acid metabolism, complement and coagulation cascades, linoleic acid metabolism, and MAPK signaling pathway. In the miRNA-target-PPI network, hsa-miR-34a/548n/15a/143/23a/210/1255a/18b/ /1180/99b had the most target genes; genes YWHAG, GSK3B, CDC42, NR3C1, LCK, UNC119, SIN3A, and NFKB2, interact with most other genes among all the predicted target genes.Hsa-miR-34a/15a/23a/210/18b/548n/143 is likely to have a role in the pathogenesis of SSNHL.",2017-11-01 +29016165,"""Falsifiability is not optional"": Correction to LeBel et al. (2017).","Reports an error in ""Falsifiability is not optional"" by Etienne P. LeBel, Derek Berger, Lorne Campbell and Timothy J. Loving (Journal of Personality and Social Psychology, 2017[Aug], Vol 113[2], 254-261). In the reply, there were two errors in the References list. The publishing year for the 14th and 21st articles was cited incorrectly as 2016. The in-text acronym associated with these citations should read instead as FER2017 and LCL2017. The correct References list citations should read as follows, respectively: Finkel, E. J., Eastwick, P. W., & Reis, H. T. (2017). Replicability and other features of a high-quality science: Toward a balanced and empirical approach. Journal of Personality and Social Psychology, 113, 244-253. http://dx.doi.org/10.1037/pspi0000075 LeBel, E. P., Campbell, L., & Loving, T. J. (2017). Benefits of open and high-powered research outweigh costs. Journal of Personality and Social Psychology, 113, 230-243. http://dx.doi.org/10 .1037/pspi0000049. The online version of this article has been corrected. (The following abstract of the original article appeared in record 2017-30567-003.) Finkel, Eastwick, and Reis (2016; FER2016) argued the post-2011 methodological reform movement has focused narrowly on replicability, neglecting other essential goals of research. We agree multiple scientific goals are essential, but argue, however, a more fine-grained language, conceptualization, and approach to replication is needed to accomplish these goals. Replication is the general empirical mechanism for testing and falsifying theory. Sufficiently methodologically similar replications, also known as direct replications, test the basic existence of phenomena and ensure cumulative progress is possible a priori. In contrast, increasingly methodologically dissimilar replications, also known as conceptual replications, test the relevance of auxiliary hypotheses (e.g., manipulation and measurement issues, contextual factors) required to productively investigate validity and generalizability. Without prioritizing replicability, a field is not empirically falsifiable. We also disagree with FER2016's position that ""bigger samples are generally better, but . . . that very large samples could have the downside of commandeering resources that would have been better invested in other studies"" (abstract). We identify problematic assumptions involved in FER2016's modifications of our original research-economic model, and present an improved model that quantifies when (and whether) it is reasonable to worry that increasing statistical power will engender potential trade-offs. Sufficiently powering studies (i.e., >80%) maximizes both research efficiency and confidence in the literature (research quality). Given that we are in agreement with FER2016 on all key open science points, we are eager to start seeing the accelerated rate of cumulative knowledge development of social psychological phenomena such a sufficiently transparent, powered, and falsifiable approach will generate. (PsycINFO Database Record",2017-11-01 +27543790,Comprehensive characterization of tissue-specific circular RNAs in the human and mouse genomes.,"Circular RNA (circRNA) is a group of RNA family generated by RNA circularization, which was discovered ubiquitously across different species and tissues. However, there is no global view of tissue specificity for circRNAs to date. Here we performed the comprehensive analysis to characterize the features of human and mouse tissue-specific (TS) circRNAs. We identified in total 302 853 TS circRNAs in the human and mouse genome, and showed that the brain has the highest abundance of TS circRNAs. We further confirmed the existence of circRNAs by reverse transcription polymerase chain reaction (RT-PCR). We also characterized the genomic location and conservation of these TS circRNAs and showed that the majority of TS circRNAs are generated from exonic regions. To further understand the potential functions of TS circRNAs, we identified microRNAs and RNA binding protein, which might bind to TS circRNAs. This process suggested their involvement in development and organ differentiation. Finally, we constructed an integrated database TSCD (Tissue-Specific CircRNA Database: http://gb.whu.edu.cn/TSCD) to deposit the features of TS circRNAs. This study is the first comprehensive view of TS circRNAs in human and mouse, which shed light on circRNA functions in organ development and disorders.",2017-11-01 +25619558,OntoMate: a text-mining tool aiding curation at the Rat Genome Database. ,"The Rat Genome Database (RGD) is the premier repository of rat genomic, genetic and physiologic data. Converting data from free text in the scientific literature to a structured format is one of the main tasks of all model organism databases. RGD spends considerable effort manually curating gene, Quantitative Trait Locus (QTL) and strain information. The rapidly growing volume of biomedical literature and the active research in the biological natural language processing (bioNLP) community have given RGD the impetus to adopt text-mining tools to improve curation efficiency. Recently, RGD has initiated a project to use OntoMate, an ontology-driven, concept-based literature search engine developed at RGD, as a replacement for the PubMed (http://www.ncbi.nlm.nih.gov/pubmed) search engine in the gene curation workflow. OntoMate tags abstracts with gene names, gene mutations, organism name and most of the 16 ontologies/vocabularies used at RGD. All terms/ entities tagged to an abstract are listed with the abstract in the search results. All listed terms are linked both to data entry boxes and a term browser in the curation tool. OntoMate also provides user-activated filters for species, date and other parameters relevant to the literature search. Using the system for literature search and import has streamlined the process compared to using PubMed. The system was built with a scalable and open architecture, including features specifically designed to accelerate the RGD gene curation process. With the use of bioNLP tools, RGD has added more automation to its curation workflow. Database URL: http://rgd.mcw.edu.",2015-01-25 +25270877,CODEX: a next-generation sequencing experiment database for the haematopoietic and embryonic stem cell communities.,"CODEX (http://codex.stemcells.cam.ac.uk/) is a user-friendly database for the direct access and interrogation of publicly available next-generation sequencing (NGS) data, specifically aimed at experimental biologists. In an era of multi-centre genomic dataset generation, CODEX provides a single database where these samples are collected, uniformly processed and vetted. The main drive of CODEX is to provide the wider scientific community with instant access to high-quality NGS data, which, irrespective of the publishing laboratory, is directly comparable. CODEX allows users to immediately visualize or download processed datasets, or compare user-generated data against the database's cumulative knowledge-base. CODEX contains four types of NGS experiments: transcription factor chromatin immunoprecipitation coupled to high-throughput sequencing (ChIP-Seq), histone modification ChIP-Seq, DNase-Seq and RNA-Seq. These are largely encompassed within two specialized repositories, HAEMCODE and ESCODE, which are focused on haematopoiesis and embryonic stem cell samples, respectively. To date, CODEX contains over 1000 samples, including 221 unique TFs and 93 unique cell types. CODEX therefore provides one of the most complete resources of publicly available NGS data for the direct interrogation of transcriptional programmes that regulate cellular identity and fate in the context of mammalian development, homeostasis and disease.",2014-09-30 +30084505,Accelerating compressed sensing in parallel imaging reconstructions using an efficient circulant preconditioner for cartesian trajectories.,"

Purpose

Design of a preconditioner for fast and efficient parallel imaging (PI) and compressed sensing (CS) reconstructions for Cartesian trajectories.

Theory

PI and CS reconstructions become time consuming when the problem size or the number of coils is large, due to the large linear system of equations that has to be solved in 1 and 2 -norm based reconstruction algorithms. Such linear systems can be solved efficiently using effective preconditioning techniques.

Methods

In this article we construct such a preconditioner by approximating the system matrix of the linear system, which comprises the data fidelity and includes total variation and wavelet regularization, by a matrix that is block circulant with circulant blocks. Due to this structure, the preconditioner can be constructed quickly and its inverse can be evaluated fast using only two fast Fourier transformations. We test the performance of the preconditioner for the conjugate gradient method as the linear solver, integrated into the well-established Split Bregman algorithm.

Results

The designed circulant preconditioner reduces the number of iterations required in the conjugate gradient method by almost a factor of 5. The speed up results in a total acceleration factor of approximately 2.5 for the entire reconstruction algorithm when implemented in MATLAB, while the initialization time of the preconditioner is negligible.

Conclusion

The proposed preconditioner reduces the reconstruction time for PI and CS in a Split Bregman implementation without compromising reconstruction stability and can easily handle large systems since it is Fourier-based, allowing for efficient computations.",2018-08-07 +29373861,The Sister Study Cohort: Baseline Methods and Participant Characteristics.,"

Background

The Sister Study was designed to address gaps in the study of environment and breast cancer by taking advantage of more frequent breast cancer diagnoses among women with a sister history of breast cancer and the presumed enrichment of shared environmental and genetic exposures.

Objective

The Sister Study sought a large cohort of women never diagnosed with breast cancer but who had a sister (full or half) diagnosed with breast cancer.

Methods

A multifaceted national effort employed novel strategies to recruit a diverse cohort, and collected biological and environmental samples and extensive data on potential breast cancer risk factors.

Results

The Sister Study enrolled 50,884 U.S. and Puerto Rican women 35-74y of age (median 56 y). Although the majority were non-Hispanic white, well educated, and economically well off, substantial numbers of harder-to-recruit women also enrolled (race/ethnicity other than non-Hispanic white: 16%; no college degree: 35%; household income <$50,000: 26%). Although all had a biologic sister with breast cancer, 16.5% had average or lower risk of breast cancer according to the Breast Cancer Risk Assessment Tool (Gail score). Most were postmenopausal (66%), parous with a first full-term pregnancy <30y of age (79%), never-smokers (56%) with body mass indexes (BMIs) of <29.9kg/m2 (70%). Few (5%) reported any cancer prior to enrollment.

Conclusions

The Sister Study is a unique cohort designed to efficiently study environmental and genetic risk factors for breast cancer. Extensive exposure data over the life-course and baseline specimens provide important opportunities for studying breast cancer and other health outcomes in women. Collaborations are welcome. https://doi.org/10.1289/EHP1923.",2017-12-20 +28164800,Measurement equivalence: A non-technical primer on categorical multi-group confirmatory factor analysis in school psychology.,"Evidence-based interventions (EBIs) have become a central component of school psychology research and practice, but EBIs are dependent upon the availability and use of evidence-based assessments (EBAs) with diverse student populations. Multi-group confirmatory factor analysis (MG-CFA) is an analytical tool that can be used to examine the validity and measurement equivalence/invariance of scores across diverse groups. The objective of this article is to provide a conceptual and procedural overview of categorical MG-CFA, as well as an illustrated example based on data from the Social and Academic Behavior Risk Screener (SABRS) - a tool designed for use in school-based interventions. This article serves as a non-technical primer on the topic of MG-CFA with ordinal (rating scale) data and does so through the framework of examining equivalence of measures used for EBIs within multi-tiered models - an understudied topic. To go along with the illustrated example, we have provided supplementary files that include sample data, Mplus input code, and an annotated guide for understanding the input code (http://dx.doi.org/10.1016/j.jsp.2016.11.002). Data needed to reproduce analyses in this article are available as supplemental materials (online only) in the Appendix of this article.",2017-01-03 +28148799,Computational Prediction of the Heterodimeric and Higher-Order Structure of gpE1/gpE2 Envelope Glycoproteins Encoded by Hepatitis C Virus. ,"Despite the recent success of newly developed direct-acting antivirals against hepatitis C, the disease continues to be a global health threat due to the lack of diagnosis of most carriers and the high cost of treatment. The heterodimer formed by glycoproteins E1 and E2 within the hepatitis C virus (HCV) lipid envelope is a potential vaccine candidate and antiviral target. While the structure of E1/E2 has not yet been resolved, partial crystal structures of the E1 and E2 ectodomains have been determined. The unresolved parts of the structure are within the realm of what can be modeled with current computational modeling tools. Furthermore, a variety of additional experimental data is available to support computational predictions of E1/E2 structure, such as data from antibody binding studies, cryo-electron microscopy (cryo-EM), mutational analyses, peptide binding analysis, linker-scanning mutagenesis, and nuclear magnetic resonance (NMR) studies. In accordance with these rich experimental data, we have built an in silico model of the full-length E1/E2 heterodimer. Our model supports that E1/E2 assembles into a trimer, which was previously suggested from a study by Falson and coworkers (P. Falson, B. Bartosch, K. Alsaleh, B. A. Tews, A. Loquet, Y. Ciczora, L. Riva, C. Montigny, C. Montpellier, G. Duverlie, E. I. Pecheur, M. le Maire, F. L. Cosset, J. Dubuisson, and F. Penin, J. Virol. 89:10333-10346, 2015, https://doi.org/10.1128/JVI.00991-15). Size exclusion chromatography and Western blotting data obtained by using purified recombinant E1/E2 support our hypothesis. Our model suggests that during virus assembly, the trimer of E1/E2 may be further assembled into a pentamer, with 12 pentamers comprising a single HCV virion. We anticipate that this new model will provide a useful framework for HCV envelope structure and the development of antiviral strategies.IMPORTANCE One hundred fifty million people have been estimated to be infected with hepatitis C virus, and many more are at risk for infection. A better understanding of the structure of the HCV envelope, which is responsible for attachment and fusion, could aid in the development of a vaccine and/or new treatments for this disease. We draw upon computational techniques to predict a full-length model of the E1/E2 heterodimer based on the partial crystal structures of the envelope glycoproteins E1 and E2. E1/E2 has been widely studied experimentally, and this provides valuable data, which has assisted us in our modeling. Our proposed structure is used to suggest the organization of the HCV envelope. We also present new experimental data from size exclusion chromatography that support our computational prediction of a trimeric oligomeric state of E1/E2.",2017-03-29 +28749987,"DUDE-Seq: Fast, flexible, and robust denoising for targeted amplicon sequencing.","We consider the correction of errors from nucleotide sequences produced by next-generation targeted amplicon sequencing. The next-generation sequencing (NGS) platforms can provide a great deal of sequencing data thanks to their high throughput, but the associated error rates often tend to be high. Denoising in high-throughput sequencing has thus become a crucial process for boosting the reliability of downstream analyses. Our methodology, named DUDE-Seq, is derived from a general setting of reconstructing finite-valued source data corrupted by a discrete memoryless channel and effectively corrects substitution and homopolymer indel errors, the two major types of sequencing errors in most high-throughput targeted amplicon sequencing platforms. Our experimental studies with real and simulated datasets suggest that the proposed DUDE-Seq not only outperforms existing alternatives in terms of error-correction capability and time efficiency, but also boosts the reliability of downstream analyses. Further, the flexibility of DUDE-Seq enables its robust application to different sequencing platforms and analysis pipelines by simple updates of the noise model. DUDE-Seq is available at http://data.snu.ac.kr/pub/dude-seq.",2017-07-27 +29181236,Machine Learning to Improve the Effectiveness of ANRS in Predicting HIV Drug Resistance.,"

Objectives

Human immunodeficiency virus infection and acquired immune deficiency syndrome (HIV/AIDS) is one of the major burdens of disease in developing countries, and the standard-of-care treatment includes prescribing antiretroviral drugs. However, antiretroviral drug resistance is inevitable due to selective pressure associated with the high mutation rate of HIV. Determining antiretroviral resistance can be done by phenotypic laboratory tests or by computer-based interpretation algorithms. Computer-based algorithms have been shown to have many advantages over laboratory tests. The ANRS (Agence Nationale de Recherches sur le SIDA) is regarded as a gold standard in interpreting HIV drug resistance using mutations in genomes. The aim of this study was to improve the prediction of the ANRS gold standard in predicting HIV drug resistance.

Methods

A genome sequence and HIV drug resistance measures were obtained from the Stanford HIV database (http://hivdb.stanford.edu/). Feature selection was used to determine the most important mutations associated with resistance prediction. These mutations were added to the ANRS rules, and the difference in the prediction ability was measured.

Results

This study uncovered important mutations that were not associated with the original ANRS rules. On average, the ANRS algorithm was improved by 79% ± 6.6%. The positive predictive value improved by 28%, and the negative predicative value improved by 10%.

Conclusions

The study shows that there is a significant improvement in the prediction ability of ANRS gold standard.",2017-10-31 +23163954,Identification and profiling of novel microRNAs in the Brassica rapa genome based on small RNA deep sequencing.,"

Background

MicroRNAs (miRNAs) are one of the functional non-coding small RNAs involved in the epigenetic control of the plant genome. Although plants contain both evolutionary conserved miRNAs and species-specific miRNAs within their genomes, computational methods often only identify evolutionary conserved miRNAs. The recent sequencing of the Brassica rapa genome enables us to identify miRNAs and their putative target genes. In this study, we sought to provide a more comprehensive prediction of B. rapa miRNAs based on high throughput small RNA deep sequencing.

Results

We sequenced small RNAs from five types of tissue: seedlings, roots, petioles, leaves, and flowers. By analyzing 2.75 million unique reads that mapped to the B. rapa genome, we identified 216 novel and 196 conserved miRNAs that were predicted to target approximately 20% of the genome's protein coding genes. Quantitative analysis of miRNAs from the five types of tissue revealed that novel miRNAs were expressed in diverse tissues but their expression levels were lower than those of the conserved miRNAs. Comparative analysis of the miRNAs between the B. rapa and Arabidopsis thaliana genomes demonstrated that redundant copies of conserved miRNAs in the B. rapa genome may have been deleted after whole genome triplication. Novel miRNA members seemed to have spontaneously arisen from the B. rapa and A. thaliana genomes, suggesting the species-specific expansion of miRNAs. We have made this data publicly available in a miRNA database of B. rapa called BraMRs. The database allows the user to retrieve miRNA sequences, their expression profiles, and a description of their target genes from the five tissue types investigated here.

Conclusions

This is the first report to identify novel miRNAs from Brassica crops using genome-wide high throughput techniques. The combination of computational methods and small RNA deep sequencing provides robust predictions of miRNAs in the genome. The finding of numerous novel miRNAs, many with few target genes and low expression levels, suggests the rapid evolution of miRNA genes. The development of a miRNA database, BraMRs, enables us to integrate miRNA identification, target prediction, and functional annotation of target genes. BraMRs will represent a valuable public resource with which to study the epigenetic control of B. rapa and other closely related Brassica species. The database is available at the following link: http://bramrs.rna.kr [1].",2012-11-19 +28878825,RNA-sequence data normalization through in silico prediction of reference genes: the bacterial response to DNA damage as case study.,"

Background

Measuring how gene expression changes in the course of an experiment assesses how an organism responds on a molecular level. Sequencing of RNA molecules, and their subsequent quantification, aims to assess global gene expression changes on the RNA level (transcriptome). While advances in high-throughput RNA-sequencing (RNA-seq) technologies allow for inexpensive data generation, accurate post-processing and normalization across samples is required to eliminate any systematic noise introduced by the biochemical and/or technical processes. Existing methods thus either normalize on selected known reference genes that are invariant in expression across the experiment, assume that the majority of genes are invariant, or that the effects of up- and down-regulated genes cancel each other out during the normalization.

Results

Here, we present a novel method, moose2 , which predicts invariant genes in silico through a dynamic programming (DP) scheme and applies a quadratic normalization based on this subset. The method allows for specifying a set of known or experimentally validated invariant genes, which guides the DP. We experimentally verified the predictions of this method in the bacterium Escherichia coli, and show how moose2 is able to (i) estimate the expression value distances between RNA-seq samples, (ii) reduce the variation of expression values across all samples, and (iii) to subsequently reveal new functional groups of genes during the late stages of DNA damage. We further applied the method to three eukaryotic data sets, on which its performance compares favourably to other methods. The software is implemented in C++ and is publicly available from http://grabherr.github.io/moose2/.

Conclusions

The proposed RNA-seq normalization method, moose2 , is a valuable alternative to existing methods, with two major advantages: (i) in silico prediction of invariant genes provides a list of potential reference genes for downstream analyses, and (ii) non-linear artefacts in RNA-seq data are handled adequately to minimize variations between replicates.",2017-09-05 +30425855,Increased Corneal Toricity after Long-Term Orthokeratology Lens Wear.,"

Purpose

To investigate the change in corneal toricity and associated refractive astigmatism after discontinuation of long-term orthokeratology (ortho-k) lens wear.

Methods

This study investigated 136 subjects aged between 6 and 14 (9.1 ± 1.5) years old at the commencement of ortho-k treatment, who had been undergoing overnight ortho-k treatment for 24 to 72 (37.4 ± 11.9) months. Corneal refractive power and manifest refraction were measured and compared before ortho-k and 1 month after discontinuation of ortho-k lens wear. Changes in corneal curvature were analyzed. Corneal curvature data from a historical longitudinal study were used as control.

Results

Compared to pre-ortho-k values, the corneal curvature became significantly flatter in the flatter meridian (-0.22 ± 0.27 D, P < 0.001) and steeper in the steeper meridian (0.06 ± 0.34 D, P=0.032) after cessation of ortho-k lens wear, resulting in a significant increase in corneal toricity (0.28 ± 0.43 D, P < 0.001), which is associated with an increase in refractive astigmatism (0.57 ± 0.57 D, r=0.465, P < 0.001). The amount of residual corneal flattening in the flatter meridian is significantly affected by the length of ortho-k treatment (t=-2.965, P=0.004) and the baseline age of subject (t=-2.841, P=0.005), but not by the baseline spherical or cylindrical refractive error (both P > 0.05). In the historical control group, there is no significant change in the corneal curvature over two years in children wearing spectacle lenses (both meridians, P > 0.05). Change of corneal toricity was more significant in the ortho-k group than in the spectacle control group (P=0.001).

Conclusions

Long-term ortho-k lens wear increases corneal toricity after discontinuation of the treatment, which is associated with an increase in refractive astigmatism. A more pronounced change in corneal toricity was found in subjects who were younger to start ortho-k and have been in a longer period of treatment. This trial is registered with http://www.chictr.org.cn (ChiCTR-TNRC-11001210).",2018-10-23 +29487213,Transparency in authors' contributions and responsibilities to promote integrity in scientific publication.,"In keeping with the growing movement in scientific publishing toward transparency in data and methods, we propose changes to journal authorship policies and procedures to provide insight into which author is responsible for which contributions, better assurance that the list is complete, and clearly articulated standards to justify earning authorship credit. To accomplish these goals, we recommend that journals adopt common and transparent standards for authorship, outline responsibilities for corresponding authors, adopt the Contributor Roles Taxonomy (CRediT) (docs.casrai.org/CRediT) methodology for attributing contributions, include this information in article metadata, and require authors to use the ORCID persistent digital identifier (https://orcid.org). Additionally, we recommend that universities and research institutions articulate expectations about author roles and responsibilities to provide a point of common understanding for discussion of authorship across research teams. Furthermore, we propose that funding agencies adopt the ORCID identifier and accept the CRediT taxonomy. We encourage scientific societies to further authorship transparency by signing on to these recommendations and promoting them through their meetings and publications programs.",2018-02-27 +24931985,Privacy preserving protocol for detecting genetic relatives using rare variants.,"

Motivation

High-throughput sequencing technologies have impacted many areas of genetic research. One such area is the identification of relatives from genetic data. The standard approach for the identification of genetic relatives collects the genomic data of all individuals and stores it in a database. Then, each pair of individuals is compared to detect the set of genetic relatives, and the matched individuals are informed. The main drawback of this approach is the requirement of sharing your genetic data with a trusted third party to perform the relatedness test.

Results

In this work, we propose a secure protocol to detect the genetic relatives from sequencing data while not exposing any information about their genomes. We assume that individuals have access to their genome sequences but do not want to share their genomes with anyone else. Unlike previous approaches, our approach uses both common and rare variants which provide the ability to detect much more distant relationships securely. We use a simulated data generated from the 1000 genomes data and illustrate that we can easily detect up to fifth degree cousins which was not possible using the existing methods. We also show in the 1000 genomes data with cryptic relationships that our method can detect these individuals.

Availability

The software is freely available for download at http://genetics.cs.ucla.edu/crypto/.",2014-06-01 +,A prioritized crop wild relative inventory to help underpin global food security,"The potentially devastating impacts of climate change on biodiversity and food security, together with the growing world population, means taking action to conserve crop wild relative (CWR) diversity is no longer an option—it is an urgent priority. CWR are species closely related to crops, including their progenitors, which have potential to contribute traits for crop improvement. However, their utilisation is hampered by a lack of systematic conservation which in turn is due to a lack of clarity over their identity. We used gene pool and taxon group concepts to estimate CWR relatedness for 173 priority crops to create the Harlan and de Wet inventory of globally important CWR taxa. Further taxa more remotely related to crops were added if they have historically been found to have useful traits for crop improvement. The inventory contains 1667 taxa, divided between 37 families, 108 genera, 1392 species and 299 sub-specific taxa. The region with the highest number of priority CWR is western Asia with 262 taxa, followed by China with 222 and southeastern Europe with 181. Within the primary gene pool, 242 taxa were found to be under-represented in ex situ collections and the countries identified as the highest priority for further germplasm collection are China, Mexico and Brazil. The inventory database is web-enabled (http://www.cwrdiversity.org/checklist/) and can be used to facilitate in situ and ex situ conservation planning at global, regional and national levels.",2013-11-01 +26414983,American Academy of Sleep Medicine (AASM) Position Paper for the Use of Telemedicine for the Diagnosis and Treatment of Sleep Disorders.,"The American Academy of Sleep Medicine's (AASM) Taskforce on Sleep Telemedicine supports telemedicine as a means of advancing patient health by improving access to the expertise of Board-Certified Sleep Medicine Specialists. However, such access improvement needs to be anchored in attention to quality and value in diagnosing and treating sleep disorders. Telemedicine is also useful to promote professionalism through patient care coordination and communication between other specialties and sleep medicine. Many of the principles and key concepts adopted here are based on U.S. industry standards, with special consideration given to the body of work by the American Telemedicine Association (http://www.americantelemed.org/), and abide by standards endorsed by the American Medical Association (http://www.ama-assn.org/). Practitioners who wish to integrate sleep telemedicine into their practice should have a clear understanding of the salient issues, key terminology, and the following recommendations from the AASM. The Taskforce recommends the following: • Clinical care standards for telemedicine services should mirror those of live office visits, including all aspects of diagnosis and treatment decisions as would be reasonably expected in traditional office-based encounters. • Clinical judgment should be exercised when determining the scope and extent of telemedicine applications in the diagnosis and treatment of specific patients and sleep disorders. • Live Interactive Telemedicine for sleep disorders, if utilized in a manner consistent with the principles outlined in this document, should be recognized and reimbursed in a manner competitive or comparable with traditional in-person visits. • Roles, expectations, and responsibilities of providers involved in the delivery of sleep telemedicine should be defined, including those at originating sites and distant sites. • The practice of telemedicine should aim to promote a care model in which sleep specialists, patients, primary care providers, and other members of the healthcare team aim to improve the value of healthcare delivery in a coordinated fashion. • Appropriate technical standards should be upheld throughout the telemedicine care delivery process, at both the originating and distant sites, and specifically meet the standards set forth by the Health Insurance Portability and Accountability Act (HIPAA). • Methods that aim to improve the utility of telemedicine exist and should be explored, including the utilization of patient presenters, local resources and providers, adjunct testing, and add-on technologies. • Quality Assurance processes should be in place for telemedicine care delivery models that aim to capture process measures, patient outcomes, and patient/provider experiences with the model(s) employed. • Time for data management, quality processes, and other aspects of care delivery related to telemedicine encounters should be recognized in value-based care delivery models. • The use of telemedicine services and its equipment should adhere to strict professional and ethical standards so as not to violate the intent of the telemedicine interaction while aiming to improve overall patient access, quality, and/or value of care. • When billing for telemedicine services, it is recommended that patients, providers, and others rendering services understand payor reimbursements, and that there be financial transparency throughout the process. • Telemedicine utilization for sleep medicine is likely to rapidly expand, as are broader telehealth applications in general; further research into the impact and outcomes of these are needed. This document serves as a resource by defining issues and terminology and explaining recommendations. However, it is not intended to supersede regulatory or credentialing recommendations and guidelines. It is intended to support and be consistent with professional and ethical standards of the profession.",2015-10-15 +27862010,Extension of research data repository system to support direct compute access to biomedical datasets: enhancing Dataverse to support large datasets.,"Access to experimental X-ray diffraction image data is important for validation and reproduction of macromolecular models and indispensable for the development of structural biology processing methods. In response to the evolving needs of the structural biology community, we recently established a diffraction data publication system, the Structural Biology Data Grid (SBDG, data.sbgrid.org), to preserve primary experimental datasets supporting scientific publications. All datasets published through the SBDG are freely available to the research community under a public domain dedication license, with metadata compliant with the DataCite Schema (schema.datacite.org). A proof-of-concept study demonstrated community interest and utility. Publication of large datasets is a challenge shared by several fields, and the SBDG has begun collaborating with the Institute for Quantitative Social Science at Harvard University to extend the Dataverse (dataverse.org) open-source data repository system to structural biology datasets. Several extensions are necessary to support the size and metadata requirements for structural biology datasets. In this paper, we describe one such extension-functionality supporting preservation of file system structure within Dataverse-which is essential for both in-place computation and supporting non-HTTP data transfers.",2016-11-10 +29186336,3dRPC: a web server for 3D RNA-protein structure prediction.,"RNA-protein interactions occur in many biological processes. To understand the mechanism of these interactions one needs to know three-dimensional (3D) structures of RNA-protein complexes. 3dRPC is an algorithm for prediction of 3D RNA-protein complex structures and consists of a docking algorithm RPDOCK and a scoring function 3dRPC-Score. RPDOCK is used to sample possible complex conformations of an RNA and a protein by calculating the geometric and electrostatic complementarities and stacking interactions at the RNA-protein interface according to the features of atom packing of the interface. 3dRPC-Score is a knowledge-based potential that uses the conformations of nucleotide-amino-acid pairs as statistical variables and that is used to choose the near-native complex-conformations obtained from the docking method above. Recently, we built a web server for 3dRPC. The users can easily use 3dRPC without installing it locally. RNA and protein structures in PDB (Protein Data Bank) format are the only needed input files. It can also incorporate the information of interface residues or residue-pairs obtained from experiments or theoretical predictions to improve the prediction. Availability and implementation:The address of 3dRPC web server is http://biophy.hust.edu.cn/3dRPC. Contact:yxiao@hust.edu.cn.",2018-04-01 +30539787,Enhancer of Zeste Homologue 2 Inhibition Attenuates TGF-β Dependent Hepatic Stellate Cell Activation and Liver Fibrosis.,"

Background & aims

Transdifferentiation of hepatic stellate cells (HSCs) into myofibroblasts is a key event in the pathogenesis of liver fibrosis. Transforming growth factor β (TGF-β) and platelet-derived growth factor (PDGF) are canonical HSC activators after liver injury. The aim of this study was to analyze the epigenetic modulators that differentially control TGF-β and PDGF signaling pathways.

Methods

We performed a transcriptomic comparison of HSCs treated with TGF-β or PDGF-BB using RNA sequencing. Among the targets that distinguish these 2 pathways, we focused on the histone methyltransferase class of epigenetic modulators.

Results

Enhancer of zeste homolog 2 (EZH2) was expressed differentially, showing significant up-regulation in HSCs activated with TGF-β but not with PDGF-BB. Indeed, EZH2 inhibition using either a pharmacologic (GSK-503) or a genetic (small interfering RNA) approach caused a significant attenuation of TGF-β-induced fibronectin, collagen 1α1, and α-smooth muscle actin, both at messenger RNA and protein levels. Conversely, adenoviral overexpression of EZH2 in HSCs resulted in a significant stimulation of fibronectin protein and messenger RNA levels in TGF-β-treated cells. Finally, we conducted in vivo experiments with mice chronically treated with carbon tetrachloride or bile duct ligation. Administration of GSK-503 to mice receiving either carbon tetrachloride or bile duct ligation led to attenuated fibrosis as assessed by Trichrome and Sirius red stains, hydroxyproline, and α-smooth muscle actin/collagen protein assays.

Conclusions

TGF-β and PDGF share redundant and distinct transcriptomic targets, with the former predominating in HSC activation. The EZH2 histone methyltransferase is preferentially involved in the TGF-β as opposed to the PDGF signaling pathway. Inhibition of EZH2 attenuates fibrogenic gene transcription in TGF-β-treated HSCs and reduces liver fibrosis in vivo. The data discussed in this publication have been deposited in NCBI's Gene Expression Omnibus and are accessible through GEO Series accession number GSE119606 (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE119606).",2018-09-15 +28342581,Neurodevelopmental outcomes in infants undergoing general anesthesia.,"

Purpose

Preclinical data strongly suggest that all agents used for general anesthesia (GA) have detrimental effects on the developing brain. However, clinical data are unclear. The purpose of this study was to use a cohort of infants who underwent GA and understand their neurodevelopmental outcomes.

Methods

A cohort of infants who underwent GA was selected between 2010 and 2011, and a control group was created. Data regarding GA, procedures, and outcomes were collected in 2015. The cohort was divided into controls, GA without surgery, GA and surgery once, and multiple general anesthetics. Both univariate and multivariate analysis were performed, and a p value of less than 0.05 was considered significant.

Results

457 patients, 121 controls, and 336 cases were included. Median follow-up was 5.1years. While developmental delay and the need for speech therapy were higher with GA, this did not correlate with the duration of GA. Patients having GA for MRI had the poorest outcomes. Multivariate analysis using combined binary outcome measures for psychiatric and neurologic outcomes did not show any significant difference for duration of anesthesia, age at anesthesia, or induction and maintenance agents.

Conclusions

These data suggest that GA during the first year of life may have few significant neurodevelopmental effects compared to controls. Additionally, the duration of GA did not correlate with neurodevelopmental outcomes.

Type of study

Retrospective Case Control Cohort Study.

Level of evidence

3 b (according to Oxford Center for EBM Levels of Evidence, March 2009, http://www.cebm.net/oxford-centre-evidence-based-medicine-levels-evidence-march-2009/).",2017-03-16 +30133618,"Evolution of incidence, mortality and cost of nontraumatic abdominal emergencies treated in Brasil in a period of nine years.","To evaluate the incidence, mortality and cost of non-traumatic abdominal emergencies treated in Brazilian emergency departments.This paper used DataSus information from 2008 to 2016 (http://www.tabnet.datasus.gov.br). The number of hospitalizations, costs - AIH length of stay and mortality rates were described in acute appendicitis, acute cholecystitis, acute pancreatitis, acute diverticulitis, gastric and duodenal ulcer, and inflammatory intestinal disease.The disease that had the highest growth in hospitalization was diverticular bowel disease with an increase of 68.2%. For the period of nine years, there were no significant changes in the average length of hospital stay, with the highest increase in gastric and duodenal ulcer with a growth of 15.9%. The mortality rate of gastric and duodenal ulcer disease increased by 95.63%, which is significantly high when compared to the other diseases. All had their costs increased but the one that proportionally had the highest increase in the last nine years was the duodenal and gastric ulcer, with an increase of 85.4%.Non-traumatic abdominal emergencies are extremely prevalent. Hence, the importance of having updated and comparative data on the mortality rate, number of hospitalization and cost generated by these diseases to provide better healthcare services in public hospitals.",2018-04-01 +27161830,Global De Novo Protein-Protein Interactome Elucidates Interactions of Drought-Responsive Proteins in Horse Gram (Macrotyloma uniflorum).,"Inspired by the availability of de novo transcriptome of horse gram (Macrotyloma uniflorum) and recent developments in systems biology studies, the first ever global protein-protein interactome (PPI) map was constructed for this highly drought-tolerant legume. Large-scale studies of PPIs and the constructed database would provide rationale behind the interplay at cascading translational levels for drought stress-adaptive mechanisms in horse gram. Using a bidirectional approach (interolog and domain-based), a high-confidence interactome map and database for horse gram was constructed. Available transcriptomic information for shoot and root tissues of a sensitive (M-191; genotype 1) and a drought-tolerant (M-249; genotype 2) genotype of horse gram was utilized to draw comparative PPI subnetworks under drought stress. High-confidence 6804 interactions were predicted among 1812 proteins covering about one-fourth of the horse gram proteome. The highest number of interactions (33.86%) in horse gram interactome matched with Arabidopsis PPI data. The top five hub nodes mostly included ubiquitin and heat-shock-related proteins. Higher numbers of PPIs were found to be responsive in shoot tissue (416) and root tissue (2228) of genotype 2 compared with shoot tissue (136) and root tissue (579) of genotype 1. Characterization of PPIs using gene ontology analysis revealed that kinase and transferase activities involved in signal transduction, cellular processes, nucleocytoplasmic transport, protein ubiquitination, and localization of molecules were most responsive to drought stress. Hence, these could be framed in stress adaptive mechanisms of horse gram. Being the first legume global PPI map, it would provide new insights into gene and protein regulatory networks for drought stress tolerance mechanisms in horse gram. Information compiled in the form of database (MauPIR) will provide the much needed high-confidence systems biology information for horse gram genes, proteins, and involved processes. This information would ease the effort and increase the efficacy for similar studies on other legumes. Public access is available at http://14.139.59.221/MauPIR/ .",2016-05-17 +26517951,PhyreStorm: A Web Server for Fast Structural Searches Against the PDB.,"The identification of structurally similar proteins can provide a range of biological insights, and accordingly, the alignment of a query protein to a database of experimentally determined protein structures is a technique commonly used in the fields of structural and evolutionary biology. The PhyreStorm Web server has been designed to provide comprehensive, up-to-date and rapid structural comparisons against the Protein Data Bank (PDB) combined with a rich and intuitive user interface. It is intended that this facility will enable biologists inexpert in bioinformatics access to a powerful tool for exploring protein structure relationships beyond what can be achieved by sequence analysis alone. By partitioning the PDB into similar structures, PhyreStorm is able to quickly discard the majority of structures that cannot possibly align well to a query protein, reducing the number of alignments required by an order of magnitude. PhyreStorm is capable of finding 93±2% of all highly similar (TM-score>0.7) structures in the PDB for each query structure, usually in less than 60s. PhyreStorm is available at http://www.sbg.bio.ic.ac.uk/phyrestorm/.",2015-10-27 +30276849,Influence of the extracellular matrix on water mobility in subcortical gray matter.,"

Purpose

Water mobility in tissues is related to the microstructure that modulates diffusion and spin relaxation. Previous work has shown that the extracellular matrix (ECM) impacts water diffusion in cartilage. To investigate if similar contributions to image contrast exist for brain, which is characterized by a substantially lower ECM content, diffusion and relaxation were studied in fixed samples from goat and human thalamus before and after enzymatic digestion of ECM compounds. Selected experiments in human corpus callosum were included for comparing subcortical gray matter and white matter.

Methods

Digestion of matrix components was achieved by treatment with hyaluronidase. Nonlocalized pulsed field gradient measurements were performed with b values between 0.6 and 18,000 s/mm2 at 3T and temperatures between 0°C and 20°C, in addition to T1 and T2 relaxation measurements. The data were fitted to multiexponential models to account for different water compartments. After the measurements, the samples were sliced and stained for ECM-sensitive markers to verify efficient digestion.

Results

Microstructural alterations associated with hyaluronan digestion did not lead to measurable effects on water diffusion or T 2 . However, T1 of the main relaxographic component, attributed to intra-/extracellular water, decreased by 7%.

Conclusion

Investigations with very strong gradients did not reveal a detectable effect on water diffusion or T 2 after hyaluronan removal, indicating that the brain ECM content is too low to produce a detectable effect. The subtle alteration of T1 upon hyaluronidase treatment might reflect a modulation of intercompartmental water exchange properties.",2018-09-14 +26727469,dEMBF: A Comprehensive Database of Enzymes of Microalgal Biofuel Feedstock.,"Microalgae have attracted wide attention as one of the most versatile renewable feedstocks for production of biofuel. To develop genetically engineered high lipid yielding algal strains, a thorough understanding of the lipid biosynthetic pathway and the underpinning enzymes is essential. In this work, we have systematically mined the genomes of fifteen diverse algal species belonging to Chlorophyta, Heterokontophyta, Rhodophyta, and Haptophyta, to identify and annotate the putative enzymes of lipid metabolic pathway. Consequently, we have also developed a database, dEMBF (Database of Enzymes of Microalgal Biofuel Feedstock), which catalogues the complete list of identified enzymes along with their computed annotation details including length, hydrophobicity, amino acid composition, subcellular location, gene ontology, KEGG pathway, orthologous group, Pfam domain, intron-exon organization, transmembrane topology, and secondary/tertiary structural data. Furthermore, to facilitate functional and evolutionary study of these enzymes, a collection of built-in applications for BLAST search, motif identification, sequence and phylogenetic analysis have been seamlessly integrated into the database. dEMBF is the first database that brings together all enzymes responsible for lipid synthesis from available algal genomes, and provides an integrative platform for enzyme inquiry and analysis. This database will be extremely useful for algal biofuel research. It can be accessed at http://bbprof.immt.res.in/embf.",2016-01-04 +30281914,Up-regulation and tumor-promoting role of SPHK1 were attenuated by miR-330-3p in gastric cancer.,"We intended to clarify the role of sphingosine kinase 1 (SPHK1) in gastric cancer (GC) using both in vitro and in vivo assays. The study was designed to identify novel therapeutic targets for GC treatment. Differential analysis was utilized to dissect two gene expression omnibus series (GSE49515 and GSE79973) microarray data form Gene Expression Omnibus (GEO) (https://www.ncbi.nlm.nih.gov/geo/) dataset. MRNA and protein expressions were determined by quantitative polymerase chain reaction and Western blot, respectively. GC cell growth was measured by MTT assays and verified by in vivo analysis. Cell cycle and cell apoptosis were detected via flow cytometer observation. Cell migration and invasion were assessed by wound healing assays and Transwell assays. The targeting relationship between miRNA and SPHK1/S1PR1 was identified via dual-luciferase assay. Twenty-four common differentially expressed genes were screened out from two gene expression omnibus series (GSE49515 and GSE79973), among which SPHK1 was chosen for its higher fold change. We found elevated SPHK1 expression in GC tissues and cells, along with an increased concentration of SPHK1-generated sphingosine-1-phosphate (S1P) in both GC serum and tissue. SPHK1 knockdown significantly suppressed cell proliferation, migration, and invasion of MKN1 and KATO3 cells. It also blocked cell cycle and induced apoptosis in MKN1 and KATO3 cells. Silencing of SPHK1 also refrained tumor growth and inhibited S1P level. MiR-330-3p directly targeted SPHK1 and S1PR1. Overexpressed miR-330-3p in MKN1 cells repressed SPHK1 and S1PR1 expressions like their chemical inhibitors-SPHK1 inhibitors (FTY720) and S1PR1 inhibitors (VPC23019), and acted anti-tumor both in vitro and in vivo. Our study provides evidence that SPHK1 was promotive for GC tumor growth and cell biological behaviors, and that miR-330-3p targeted 3'-UTR of SPHK1 and inhibited its expression. SPHK1 was expected to become a new molecular marker and miR-330-3p a novel therapeutic target for GC. © 2018 IUBMB Life, 70(11):1164-1176, 2018.",2018-10-03 +22165817,HOMER: a human organ-specific molecular electronic repository.,"

Background

Each organ has a specific function in the body. ""Organ-specificity"" refers to differential expressions of the same gene across different organs. An organ-specific gene/protein is defined as a gene/protein whose expression is significantly elevated in a specific human organ. An ""organ-specific marker"" is defined as an organ-specific gene/protein that is also implicated in human diseases related to the organ. Previous studies have shown that identifying specificity for the organ in which a gene or protein is significantly differentially expressed, can lead to discovery of its function. Most currently available resources for organ-specific genes/proteins either allow users to access tissue-specific expression over a limited range of organs, or do not contain disease information such as disease-organ relationship and disease-gene relationship.

Results

We designed an integrated Human Organ-specific Molecular Electronic Repository (HOMER, http://bio.informatics.iupui.edu/homer), defining human organ-specific genes/proteins, based on five criteria: 1) comprehensive organ coverage; 2) gene/protein to disease association; 3) disease-organ association; 4) quantification of organ-specificity; and 5) cross-linking of multiple available data sources.HOMER is a comprehensive database covering about 22,598 proteins, 52 organs, and 4,290 diseases integrated and filtered from organ-specific proteins/genes and disease databases like dbEST, TiSGeD, HPA, CTD, and Disease Ontology. The database has a Web-based user interface that allows users to find organ-specific genes/proteins by gene, protein, organ or disease, to explore the histogram of an organ-specific gene/protein, and to identify disease-related organ-specific genes by browsing the disease data online.Moreover, the quality of the database was validated with comparison to other known databases and two case studies: 1) an association analysis of organ-specific genes with disease and 2) a gene set enrichment analysis of organ-specific gene expression data.

Conclusions

HOMER is a new resource for analyzing, identifying, and characterizing organ-specific molecules in association with disease-organ and disease-gene relationships. The statistical method we developed for organ-specific gene identification can be applied to other organism. The current HOMER database can successfully answer a variety of questions related to organ specificity in human diseases and can help researchers in discovering and characterizing organ-specific genes/proteins with disease relevance.",2011-10-18 +31489175,CyTargetLinker app update: A flexible solution for network extension in Cytoscape. ,"Here, we present an update of the open-source CyTargetLinker app for Cytoscape ( http://apps.cytoscape.org/apps/cytargetlinker) that introduces new automation features. CyTargetLinker provides a simple interface to extend networks with links to relevant data and/or knowledge extracted from so-called linksets. The linksets are provided on the CyTargetLinker website ( https://cytargetlinker.github.io/) or can be custom-made for specific use cases. The new automation feature enables users to programmatically execute the app's functionality in Cytoscape (command line tool) and with external tools (e.g. R, Jupyter, Python, etc). This allows users to share their analysis workflows and therefore increase repeatability and reproducibility. Three use cases demonstrate automated workflows, combinations with other Cytoscape apps and core Cytoscape functionality. We first extend a protein-protein interaction network created with the stringApp, with compound-target interactions and disease-gene annotations. In the second use case, we created a workflow to load differentially expressed genes from an experimental dataset and extend it with gene-pathway associations. Lastly, we chose an example outside the biological domain and used CyTargetLinker to create an author-article-journal network for the five authors of this manuscript using a two-step extension mechanism. With 400 downloads per month in the last year and nearly 20,000 downloads in total, CyTargetLinker shows the adoption and relevance of the app in the field of network biology. In August 2019, the original publication was cited in 83 articles demonstrating the applicability in biomedical research.",2018-06-14 +,CaseFinder: A Flexible Real-time Online Surveillance Registry for Infectious Disease Physicians to Report Cases of Carbapenem-resistant Enterobacteriaceae (CRE),"

Objective

To create a flexible online surveillance system for infectious disease experts to report cases of emerging infectious diseases.

Introduction

The Infectious Disease Society of America’s Emerging Infections Network (EIN) is a sentinel network of over 1,200 practicing infectious disease physicians, supported by the Centers for Disease Control and Prevention (CDC). In January 2012, the EIN listserv fielded a member inquiry about treatment recommendations for a complicated polymicrobial wound infection in a traveler returning to the United States from India. The posting led to a member-to-member communication that resulted in shipment of clinical microbiology isolates from one member’s hospital to another’s research laboratory. Molecular evaluation of the clinical isolates uncovered previously undetected carriage of the emerging NDM-1 enzyme in 2 of the Enterobacteriaceae species. Based on this interaction, we built a flexible online surveillance registry (CaseFinder) for infectious disease physicians to report cases of CRE.

Methods

To ascertain the frequency and nature of CRE infections treated by EIN members, a survey was sent to EIN members in July 2012 that elicited risk factors and clinical features associated with CRE. Survey opt-out items also allowed respondents to specify that they had not treated any CRE infections. Concurrently, we developed a formal relational data model for CRE infection survey data, allowing for analysis and visualization. The data model was implemented in Python using the Object-Relational Mapping provided by the Django web framework, which we used to implement the backend server component to the online registry. An interactive front-end web application, written in Javascript using the jQuery library, retrieves data via the AJAX web protocol. Geolocated data is visualized using the OpenLayers library to render map tiles and provide interactive controls such as panning and zooming.

Results

The crowd-sourced online registry for infectious disease experts to report CRE infections, called CaseFinder (http://casefinder.org/), was developed, released, and seeded with data from the EIN survey. To date, a total of 69 cases have been submitted, describing 53 infections with Klebsiella pneumoniae, 7 with Escherichia coli and 9 with other Enterobacteriaceae, representing 7 of 9 US Census divisions. Another 214 members have indicated that they have not seen any cases to date. CaseFinder includes: an online data entry component (to supplant the original EIN listserv survey); real-time filtering of data; and interactive maps that geolocate survey responses using the first 2 digits of the treating facility’s zip code. Users can filter data based on species, clinical features (age, gender), resistance profile, or 2-digit zip code. CaseFinder can also display clinical case data in an exportable line-item format.

Conclusions

We have created a web-based data registry for CRE infections in the US. Populated by EIN survey responses, the registry already has a collection 283 data points—69 cases of CRE and 214 reports indicating the absence of cases—and is open for ongoing submission of data represented in real time. This system can serve as a de facto national surveillance system for CRE infections - an important but not yet universally reportable condition. Our platform can be expanded to map and track other emerging infections seen by infectious diseases physicians. We are currently working to incorporate molecular fingerprinting and typing information into the data model. The site will also provide incentives for infectious disease experts to submit cases in underrepresented geographic areas. In future efforts we will incorporate “machine learning” techniques to leverage knowledge from infectious disease experts on existing cases and provide features such as an intelligent automated alert system.",2013-01-01 +23075266,Using Google Analytics to evaluate the impact of the CyberTraining project.,"A focus on results and impact should be at the heart of every project's approach to research and dissemination. This article discusses the potential of Google Analytics (GA: http://google.com/analytics ) as an effective resource for measuring the impact of academic research output and understanding the geodemographics of users of specific Web 2.0 content (e.g., intervention and prevention materials, health promotion and advice). This article presents the results of GA analyses as a resource used in measuring the impact of the EU-funded CyberTraining project, which provided a well-grounded, research-based training manual on cyberbullying for trainers through the medium of a Web-based eBook ( www.cybertraining-project.org ). The training manual includes review information on cyberbullying, its nature and extent across Europe, analyses of current projects, and provides resources for trainers working with the target groups of pupils, parents, teachers, and other professionals. Results illustrate the promise of GA as an effective tool for measuring the impact of academic research and project output with real potential for tracking and understanding intra- and intercountry regional variations in the uptake of prevention and intervention materials, thus enabling precision focusing of attention to those regions.",2012-10-17 +29800466,"A randomized, subject and rater-blinded, placebo-controlled trial of dimethyl fumarate for obstructive sleep apnea. ","To investigate the therapeutic effect of dimethyl fumarate (DMF, an immunomodulatory agent) on obstructive sleep apnea (OSA), and potential influence of any such effect by selected proinflammatory molecules. Patients with OSA who deferred positive airway pressure therapy were randomized (2:1) to receive DMF or placebo for 4 months. Participants underwent polysomnography before randomization and at 4 months. Blood was collected monthly. The primary outcome was the mean group change in respiratory disturbance index (δ-RDI). Secondary analyses focused on the association between treatment effect of DMF (on RDI) and expression of plasma cytokines and chemokines, or nuclear factor κ-B (NFκB) signaling molecules in peripheral blood mononuclear cells. N = 65 participants were randomized. N = 50 participants (DMF = 35, placebo = 15) had complete data for final analyses. The mean difference in δ-RDI between groups was 13.3 respiratory events/hour of sleep: -3.1+/-12.9 vs. 10.2+/-13.1 in DMF and placebo groups, respectively (mixed-effects model treatment effect: β = -0.14, SE = 0.062, p = 0.033). Plasma levels of TNF-α showed only nonsignificant decreases, and IL-10 and IL-13 only nonsignificant increases, in DMF-treated participants compared with placebo. No significant interaction or main effect on RDI for selected cytokines and chemokines was found. Participants with a therapeutic response to DMF did experience significant reductions in intracellular NFκB signaling molecules at 4 months. Overall, DMF was well-tolerated. The immunomodulatory drug DMF partially ameliorates OSA severity. Suppression of systemic inflammation through reduction of NFκB signaling may mediate this effect. ClinicalTrials.gov, NCT02438137, https://clinicaltrials.gov/ct2/show/NCT02438137?term=NCT02438137&rank=1.",2018-08-01 +29635306,Pleiotropic mapping and annotation selection in genome-wide association studies with penalized Gaussian mixture models.,"Motivation:Genome-wide association studies (GWASs) have identified many genetic loci associated with complex traits. A substantial fraction of these identified loci is associated with multiple traits-a phenomena known as pleiotropy. Identification of pleiotropic associations can help characterize the genetic relationship among complex traits and can facilitate our understanding of disease etiology. Effective pleiotropic association mapping requires the development of statistical methods that can jointly model multiple traits with genome-wide single nucleic polymorphisms (SNPs) together. Results:We develop a joint modeling method, which we refer to as the integrative MApping of Pleiotropic association (iMAP). iMAP models summary statistics from GWASs, uses a multivariate Gaussian distribution to account for phenotypic correlation, simultaneously infers genome-wide SNP association pattern using mixture modeling and has the potential to reveal causal relationship between traits. Importantly, iMAP integrates a large number of SNP functional annotations to substantially improve association mapping power, and, with a sparsity-inducing penalty, is capable of selecting informative annotations from a large, potentially non-informative set. To enable scalable inference of iMAP to association studies with hundreds of thousands of individuals and millions of SNPs, we develop an efficient expectation maximization algorithm based on an approximate penalized regression algorithm. With simulations and comparisons to existing methods, we illustrate the benefits of iMAP in terms of both high association mapping power and accurate estimation of genome-wide SNP association patterns. Finally, we apply iMAP to perform a joint analysis of 48 traits from 31 GWAS consortia together with 40 tissue-specific SNP annotations generated from the Roadmap Project. Availability and implementation:iMAP is freely available at http://www.xzlab.org/software.html. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-08-01 +25957349,Tax4Fun: predicting functional profiles from metagenomic 16S rRNA data.,"

Motivation

The characterization of phylogenetic and functional diversity is a key element in the analysis of microbial communities. Amplicon-based sequencing of marker genes, such as 16S rRNA, is a powerful tool for assessing and comparing the structure of microbial communities at a high phylogenetic resolution. Because 16S rRNA sequencing is more cost-effective than whole metagenome shotgun sequencing, marker gene analysis is frequently used for broad studies that involve a large number of different samples. However, in comparison to shotgun sequencing approaches, insights into the functional capabilities of the community get lost when restricting the analysis to taxonomic assignment of 16S rRNA data.

Results

Tax4Fun is a software package that predicts the functional capabilities of microbial communities based on 16S rRNA datasets. We evaluated Tax4Fun on a range of paired metagenome/16S rRNA datasets to assess its performance. Our results indicate that Tax4Fun provides a good approximation to functional profiles obtained from metagenomic shotgun sequencing approaches.

Availability and implementation

Tax4Fun is an open-source R package and applicable to output as obtained from the SILVAngs web server or the application of QIIME with a SILVA database extension. Tax4Fun is freely available for download at http://tax4fun.gobics.de/.

Contact

kasshau@gwdg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-07 +29253072,K2 and K2*: efficient alignment-free sequence similarity measurement based on Kendall statistics.,"

Motivation

Alignment-free sequence comparison methods can compute the pairwise similarity between a huge number of sequences much faster than sequence-alignment based methods.

Results

We propose a new non-parametric alignment-free sequence comparison method, called K2, based on the Kendall statistics. Comparing to the other state-of-the-art alignment-free comparison methods, K2 demonstrates competitive performance in generating the phylogenetic tree, in evaluating functionally related regulatory sequences, and in computing the edit distance (similarity/dissimilarity) between sequences. Furthermore, the K2 approach is much faster than the other methods. An improved method, K2*, is also proposed, which is able to determine the appropriate algorithmic parameter (length) automatically, without first considering different values. Comparative analysis with the state-of-the-art alignment-free sequence similarity methods demonstrates the superiority of the proposed approaches, especially with increasing sequence length, or increasing dataset sizes.

Availability and implementation

The K2 and K2* approaches are implemented in the R language as a package and is freely available for open access (http://community.wvu.edu/daadjeroh/projects/K2/K2_1.0.tar.gz).

Contact

yueljiang@163.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-05-01 +26315902,database.bio: a web application for interpreting human variations.,"

Unlabelled

Rapid advances of next-generation sequencing technology have led to the integration of genetic information with clinical care. Genetic basis of diseases and response to drugs provide new ways of disease diagnosis and safer drug usage. This integration reveals the urgent need for effective and accurate tools to analyze genetic variants. Due to the number and diversity of sources for annotation, automating variant analysis is a challenging task. Here, we present database.bio, a web application that combines variant annotation, prioritization and visualization so as to support insight into the individual genetic characteristics. It enhances annotation speed by preprocessing data on a supercomputer, and reduces database space via a unified database representation with compressed fields.

Availability and implementation

Freely available at https://database.bio.",2015-08-26 +28505599,A new method for quantifying the performance of EEG blind source separation algorithms by referencing a simultaneously recorded ECoG signal.,"Blind source separation (BSS) algorithms extract neural signals from electroencephalography (EEG) data. However, it is difficult to quantify source separation performance because there is no criterion to dissociate neural signals and noise in EEG signals. This study develops a method for evaluating BSS performance. The idea is neural signals in EEG can be estimated by comparison with simultaneously measured electrocorticography (ECoG). Because the ECoG electrodes cover the majority of the lateral cortical surface and should capture most of the original neural sources in the EEG signals. We measured real EEG and ECoG data and developed an algorithm for evaluating BSS performance. First, EEG signals are separated into EEG components using the BSS algorithm. Second, the EEG components are ranked using the correlation coefficients of the ECoG regression and the components are grouped into subsets based on their ranks. Third, canonical correlation analysis estimates how much information is shared between the subsets of the EEG components and the ECoG signals. We used our algorithm to compare the performance of BSS algorithms (PCA, AMUSE, SOBI, JADE, fastICA) via the EEG and ECoG data of anesthetized nonhuman primates. The results (Best case >JADE = fastICA >AMUSE = SOBI ≥ PCA >random separation) were common to the two subjects. To encourage the further development of better BSS algorithms, our EEG and ECoG data are available on our Web site (http://neurotycho.org/) as a common testing platform.",2017-01-29 +28045544,"Chembench: A Publicly Accessible, Integrated Cheminformatics Portal.","The enormous increase in the amount of publicly available chemical genomics data and the growing emphasis on data sharing and open science mandates that cheminformaticians also make their models publicly available for broad use by the scientific community. Chembench is one of the first publicly accessible, integrated cheminformatics Web portals. It has been extensively used by researchers from different fields for curation, visualization, analysis, and modeling of chemogenomics data. Since its launch in 2008, Chembench has been accessed more than 1 million times by more than 5000 users from a total of 98 countries. We report on the recent updates and improvements that increase the simplicity of use, computational efficiency, accuracy, and accessibility of a broad range of tools and services for computer-assisted drug design and computational toxicology available on Chembench. Chembench remains freely accessible at https://chembench.mml.unc.edu.",2017-01-19 +,Searchable Core Facility Database: Building Resource Bridges,"The VGN Searchable Core Facility Database (http://vgn.uvm.edu/corefacilities) is a directory of Core Facilities primarily focused on North America but with entries from around the world. It is a tool intended to foster collaboration and assist cores in growing their user base and providing networking opportunities. It is populated with Core Facilities that have voluntarily listed themselves and would like to be contacted by researchers and other core facilities for potential collaborations. BENEFITS: + Allows researchers to locate resources needed for their studies Provides a channel for facilities to collaborate Facilitates cores to reach financial sustainability Researchers are able to perform searches online by service offerings, location, association, and key phrases to find a facility that will best meet their needs. Information listed for individual cores include: short description of core, contact name, email, address, services offered, hyperlink to website, equipment, and date of last revision of information. The data can be exported to excel. The database currently lists 353 cores, representing 45 states plus DC, 135 institutions, and 12 associations.",2012-01-01 +26719120,TOMATOMA Update: Phenotypic and Metabolite Information in the Micro-Tom Mutant Resource.,"TOMATOMA (http://tomatoma.nbrp.jp/) is a tomato mutant database providing visible phenotypic data of tomato mutant lines generated by ethylmethane sulfonate (EMS) treatment or γ-ray irradiation in the genetic background of Micro-Tom, a small and rapidly growing variety. To increase mutation efficiency further, mutagenized M3 seeds were subjected to a second round of EMS treatment; M3M1 populations were generated. These plants were self-pollinated, and 4,952 lines of M3M2 mutagenized seeds were generated. We checked for visible phenotypes in the M3M2 plants, and 618 mutant lines with 1,194 phenotypic categories were identified. In addition to the phenotypic information, we investigated Brix values and carotenoid contents in the fruits of individual mutants. Of 466 samples from 171 mutant lines, Brix values and carotenoid contents were between 3.2% and 11.6% and 6.9 and 37.3 µg g(-1) FW, respectively. This metabolite information concerning the mutant fruits would be useful in breeding programs as well as for the elucidation of metabolic regulation. Researchers are able to browse and search this phenotypic and metabolite information and order seeds of individual mutants via TOMATOMA. Our new Micro-Tom double-mutagenized populations and the metabolic information could provide a valuable genetic toolkit to accelerate tomato research and potential breeding programs.",2015-12-30 +26433226,"BacWGSTdb, a database for genotyping and source tracking bacterial pathogens.","Whole genome sequencing has become one of the routine methods in molecular epidemiological practice. In this study, we present BacWGSTdb (http://bacdb.org/BacWGSTdb), a bacterial whole genome sequence typing database which is designed for clinicians, clinical microbiologists and hospital epidemiologists. This database borrows the population structure from the current multi-locus sequence typing (MLST) scheme and adopts a hierarchical data structure: species, clonal complex and isolates. When users upload the pre-assembled genome sequences to BacWGSTdb, it offers the functionality of bacterial genotyping at both traditional MLST and whole-genome levels. More importantly, users are told which isolates in the public database are phylogenetically close to the query isolate, along with their clinical information such as host, isolation source, disease, collection time and geographical location. In this way, BacWGSTdb offers a rapid and convenient platform for worldwide users to address a variety of clinical microbiological issues such as source tracking bacterial pathogens.",2015-10-03 +27026632,De Novo Aneurysm Formation in Carriers of Saccular Intracranial Aneurysm Disease in Eastern Finland.,"

Background and purpose

Formation of new (de novo) aneurysms in patients carrying saccular intracranial aneurysm (sIA) disease has been published, but data from population-based cohorts are scarce.

Methods

Kuopio sIA database (http://www.uef.fi/ns) contains all unruptured and ruptured sIA patients admitted to Kuopio University Hospital from its Eastern Finnish catchment population. We studied the incidence and risk factors for de novo sIA formation in 1419 sIA patients with ≥5 years of angiographic follow-up, a total follow-up of 18 526 patient-years.

Results

There were 42 patients with a total of 56 de novo sIAs, diagnosed in a median of 11.7 years after the first sIA diagnosis. The cumulative incidence of de novo sIAs was 0.23% per patient-year and that of subarachnoid hemorrhage from a ruptured de novo sIA 0.05% per patient-year. The risk of de novo sIA discovery per patient-year increased with younger age at the first sIA diagnosis: 2.2% in the patients aged <20 years and 0.46% in the patients aged between 20 and 39 years. In Cox regression analysis, smoking history and younger age at the first sIA diagnosis significantly associated with de novo sIA formation, but female sex, multiple sIAs, and sIA family did not.

Conclusions

Patients aged < 40 years at the first sIA diagnosis are in a significant risk of developing de novo sIAs, and they should be scheduled for long-term angiographic follow-up. Smoking increases the risk of de novo sIA formation, suggesting long-term follow-up for smokers. Antismoking efforts are highly recommended for sIA patients.",2016-03-29 +27899619,The neXtProt knowledgebase on human proteins: 2017 update.,"The neXtProt human protein knowledgebase (https://www.nextprot.org) continues to add new content and tools, with a focus on proteomics and genetic variation data. neXtProt now has proteomics data for over 85% of the human proteins, as well as new tools tailored to the proteomics community.Moreover, the neXtProt release 2016-08-25 includes over 8000 phenotypic observations for over 4000 variations in a number of genes involved in hereditary cancers and channelopathies. These changes are presented in the current neXtProt update. All of the neXtProt data are available via our user interface and FTP site. We also provide an API access and a SPARQL endpoint for more technical applications.",2016-11-29 +23543116,Development and characterization of cDNA resources for the common marmoset: one of the experimental primate models.,"The common marmoset is a new world monkey, which has become a valuable experimental animal for biomedical research. This study developed cDNA libraries for the common marmoset from five different tissues. A total of 290 426 high-quality EST sequences were obtained, where 251 587 sequences (86.5%) had homology (1E(-100)) with the Refseqs of six different primate species, including human and marmoset. In parallel, 270 673 sequences (93.2%) were aligned to the human genome. When 247 090 sequences were assembled into 17 232 contigs, most of the sequences (218 857 or 15 089 contigs) were located in exonic regions, indicating that these genes are expressed in human and marmoset. The other 5578 sequences (or 808 contigs) mapping to the human genome were not located in exonic regions, suggesting that they are not expressed in human. Furthermore, a different set of 118 potential coding sequences were not similar to any Refseqs in any species, and, thus, may represent unknown genes. The cDNA libraries developed in this study are available through RIKEN Bio Resource Center. A Web server for the marmoset cDNAs is available at http://marmoset.nig.ac.jp/index.html, where each marmoset EST sequence has been annotated by reference to the human genome. These new libraries will be a useful genetic resource to facilitate research in the common marmoset.",2013-03-29 +,CheS-Mapper 2.0 for visual validation of (Q)SAR models,"

Background

Sound statistical validation is important to evaluate and compare the overall performance of (Q)SAR models. However, classical validation does not support the user in better understanding the properties of the model or the underlying data. Even though, a number of visualization tools for analyzing (Q)SAR information in small molecule datasets exist, integrated visualization methods that allow the investigation of model validation results are still lacking.

Results

We propose visual validation, as an approach for the graphical inspection of (Q)SAR model validation results. The approach applies the 3D viewer CheS-Mapper, an open-source application for the exploration of small molecules in virtual 3D space. The present work describes the new functionalities in CheS-Mapper 2.0, that facilitate the analysis of (Q)SAR information and allows the visual validation of (Q)SAR models. The tool enables the comparison of model predictions to the actual activity in feature space. The approach is generic: It is model-independent and can handle physico-chemical and structural input features as well as quantitative and qualitative endpoints.

Conclusions

Visual validation with CheS-Mapper enables analyzing (Q)SAR information in the data and indicates how this information is employed by the (Q)SAR model. It reveals, if the endpoint is modeled too specific or too generic and highlights common properties of misclassified compounds. Moreover, the researcher can use CheS-Mapper to inspect how the (Q)SAR model predicts activity cliffs. The CheS-Mapper software is freely available at http://ches-mapper.org.

Graphical abstract

Comparing actual and predicted activity values with CheS-Mapper.",2014-01-01 +29109711,miPepBase: A Database of Experimentally Verified Peptides Involved in Molecular Mimicry.,"Autoimmune diseases emerge due to several reasons, of which molecular mimicry i.e., similarity between the host's and pathogen's interacting peptides is an important reason. In the present study we have reported a database of only experimentally verified peptide sequences, which exhibit molecular mimicry. The database is named as miPepBase (Mimicry Peptide Database) and contains comprehensive information about mimicry proteins and peptides of both host (and model organism) and pathogen. It also provides information about physicochemical properties of protein and mimicry peptides, which might be helpful in predicting the nature of protein and optimization of protein expression. The miPepBase can be searched using a keyword or, by autoimmune disease(s) or by a combination of host and pathogen taxonomic group or their name. To facilitate the search of proteins and/or epitope in miPepBase, which is similar to the user's interest, BLAST search tool is also incorporated. miPepBase is an open access database and available at http://proteininformatics.org/mkumar/mipepbase.",2017-10-23 +22424087,PupDB: a database of pupylated proteins.,"

Background

Prokaryotic ubiquitin-like protein (Pup), the firstly identified post-translational protein modifier in prokaryotes, is an important signal for the selective degradation of proteins. Recently, large-scale proteomics technology has been applied to identify a large number of pupylated proteins. The development of a database for managing pupylated proteins and pupylation sites is important for further analyses.

Description

A database named PupDB is constructed by collecting experimentally identified pupylated proteins and pupylation sites from published studies and integrating the information of pupylated proteins with corresponding structures and functional annotations. PupDB is a web-based database with tools for browses and searches of pupylated proteins and interactive displays of protein structures and pupylation sites.

Conclusions

The structured and searchable database PupDB is expected to provide a useful resource for further analyzing the substrate specificity, identifying pupylated proteins in other organisms and developing computational tools for predicting pupylation sites. PupDB is freely available at http://cwtung.kmu.edu.tw/pupdb.",2012-03-16 +26592761,Identification of Bacillus strains by MALDI TOF MS using geometric approach.,"Microorganism identification by MALDI TOF mass-spectrometry is based on the comparison of the mass spectrum of the studied organism with those of reference strains. It is a rapid and reliable method. However, commercial databases and programs are mostly designed for identification of clinically important strains and can be used only for particular mass spectrometer models. The need for open platforms and reference databases is obvious. In this study we describe a geometric approach for microorganism identification by mass spectra and demonstrate its capabilities by analyzing 24 strains belonging to the Bacillus pumilus group. This method is based on representing mass spectra as points on a multidimensional space, which allows us to use geometric distances to compare the spectra. Delimitation of microorganisms performed by geometric approach correlates well with the results of molecular phylogenetic analysis and clustering using Biotyper 3.1. All three methods used allowed us to reliably divide the strains into two groups corresponding to closely related species, Bacillus pumilus and Bacillus altitudinis. The method developed by us will be implemented in a Web interface designed for using open reference databases for microorganism identification. The data is available at http://www.bionet.nsc.ru/mbl/database/database.html.",2015-11-23 +25937880,Making species checklists understandable to machines - a shift from relational databases to ontologies.,"

Background

The scientific names of plants and animals play a major role in Life Sciences as information is indexed, integrated, and searched using scientific names. The main problem with names is their ambiguous nature, because more than one name may point to the same taxon and multiple taxa may share the same name. In addition, scientific names change over time, which makes them open to various interpretations. Applying machine-understandable semantics to these names enables efficient processing of biological content in information systems. The first step is to use unique persistent identifiers instead of name strings when referring to taxa. The most commonly used identifiers are Life Science Identifiers (LSID), which are traditionally used in relational databases, and more recently HTTP URIs, which are applied on the Semantic Web by Linked Data applications.

Results

We introduce two models for expressing taxonomic information in the form of species checklists. First, we show how species checklists are presented in a relational database system using LSIDs. Then, in order to gain a more detailed representation of taxonomic information, we introduce meta-ontology TaxMeOn to model the same content as Semantic Web ontologies where taxa are identified using HTTP URIs. We also explore how changes in scientific names can be managed over time.

Conclusions

The use of HTTP URIs is preferable for presenting the taxonomic information of species checklists. An HTTP URI identifies a taxon and operates as a web address from which additional information about the taxon can be located, unlike LSID. This enables the integration of biological data from different sources on the web using Linked Data principles and prevents the formation of information silos. The Linked Data approach allows a user to assemble information and evaluate the complexity of taxonomical data based on conflicting views of taxonomic classifications. Using HTTP URIs and Semantic Web technologies also facilitate the representation of the semantics of biological data, and in this way, the creation of more ""intelligent"" biological applications and services.",2014-09-08 +25637033,Exploration and visualization of connectivity in the adult mouse brain.,"The Allen Mouse Brain Connectivity Atlas is a mesoscale whole brain axonal projection atlas of the C57Bl/6J mouse brain. All data were aligned to a common template in 3D space to generate a comprehensive and quantitative database of inter-areal and cell-type-specific projections. A suite of computational tools were developed to search and visualize the projection labeling experiments, available at http://connectivity.brain-map.org. We present three use cases illustrating how these publicly-available tools can be used to perform analyses of long range brain region connectivity. The use cases make extensive use of advanced visualization tools integrated with the atlas including projection density histograms, 3D computed anterograde and retrograde projection paths, and multi-specimen projection composites. These tools offer convenient access to detailed axonal projection information in the adult mouse brain and the ability to perform data analysis and visualization of projection fields and neuroanatomy in an integrated manner.",2015-01-27 +21827651,Development of a classification scheme for disease-related enzyme information.,"

Background

BRENDA (BRaunschweig ENzyme DAtabase, http://www.brenda-enzymes.org) is a major resource for enzyme related information. First and foremost, it provides data which are manually curated from the primary literature. DRENDA (Disease RElated ENzyme information DAtabase) complements BRENDA with a focus on the automatic search and categorization of enzyme and disease related information from title and abstracts of primary publications. In a two-step procedure DRENDA makes use of text mining and machine learning methods.

Results

Currently enzyme and disease related references are biannually updated as part of the standard BRENDA update. 910,897 relations of EC-numbers and diseases were extracted from titles or abstracts and are included in the second release in 2010. The enzyme and disease entity recognition has been successfully enhanced by a further relation classification via machine learning. The classification step has been evaluated by a 5-fold cross validation and achieves an F1 score between 0.802 ± 0.032 and 0.738 ± 0.033 depending on the categories and pre-processing procedures. In the eventual DRENDA content every category reaches a classification specificity of at least 96.7% and a precision that ranges from 86-98% in the highest confidence level, and 64-83% for the smallest confidence level associated with higher recall.

Conclusions

The DRENDA processing chain analyses PubMed, locates references with disease-related information on enzymes and categorises their focus according to the categories causal interaction, therapeutic application, diagnostic usage and ongoing research. The categorisation gives an impression on the focus of the located references. Thus, the relation categorisation can facilitate orientation within the rapidly growing number of references with impact on diseases and enzymes. The DRENDA information is available as additional information in BRENDA.",2011-08-09 +25313160,Open TG-GATEs: a large-scale toxicogenomics database.,"Toxicogenomics focuses on assessing the safety of compounds using gene expression profiles. Gene expression signatures from large toxicogenomics databases are expected to perform better than small databases in identifying biomarkers for the prediction and evaluation of drug safety based on a compound's toxicological mechanisms in animal target organs. Over the past 10 years, the Japanese Toxicogenomics Project consortium (TGP) has been developing a large-scale toxicogenomics database consisting of data from 170 compounds (mostly drugs) with the aim of improving and enhancing drug safety assessment. Most of the data generated by the project (e.g. gene expression, pathology, lot number) are freely available to the public via Open TG-GATEs (Toxicogenomics Project-Genomics Assisted Toxicity Evaluation System). Here, we provide a comprehensive overview of the database, including both gene expression data and metadata, with a description of experimental conditions and procedures used to generate the database. Open TG-GATEs is available from http://toxico.nibio.go.jp/english/index.html.",2014-10-13 +26940364,dbPHCC: a database of prognostic biomarkers for hepatocellular carcinoma that provides online prognostic modeling.,"

Background

Hepatocellular carcinoma (HCC) is one of the most common malignant cancers with a poor prognosis. For decades, more and more biomarkers were found to effect on HCC prognosis, but these studies were scattered and there were no unified identifiers. Therefore, we built the database of prognostic biomarkers and models for hepatocellular carcinoma (dbPHCC).

Methods

dbPHCC focuses on biomarkers which were related to HCC prognosis by traditional experiments rather than high-throughput technology. All of the prognostic biomarkers came from literatures issued during 2002 to 2014 in PubMed and were manually selected. dbPHCC collects comprehensive information of candidate biomarkers and HCC prognosis.

Results

dbPHCC mainly contains 567 biomarkers: 323 proteins, 154 genes, and 90 microRNAs. For each biomarker, the reference information, experimental conditions, and prognostic information are shown. Based on two available patient cohort data sets, an exemplified prognostic model was constructed using 15 phosphotransferases in dbPHCC. The web interface does not only provide a full range of browsing and searching, but also provides online analysis tools. dbPHCC is available at http://lifecenter.sgst.cn/dbphcc/

Conclusions

dbPHCC provides a comprehensive and convenient search and analysis platform for HCC prognosis research.

General significance

dbPHCC is the first database to focus on experimentally verified individual biomarkers, which are related to HCC prognosis. Prognostic markers in dbPHCC have the potential to be therapeutic drug targets and may help in designing new treatments to improve survival of HCC patients. This article is part of a Special Issue entitled ""System Genetics"" Guest Editor: Dr. Yudong Cai and Dr. Tao Huang.",2016-03-02 +27899646,FAIRDOMHub: a repository and collaboration environment for sharing systems biology research.,"The FAIRDOMHub is a repository for publishing FAIR (Findable, Accessible, Interoperable and Reusable) Data, Operating procedures and Models (https://fairdomhub.org/) for the Systems Biology community. It is a web-accessible repository for storing and sharing systems biology research assets. It enables researchers to organize, share and publish data, models and protocols, interlink them in the context of the systems biology investigations that produced them, and to interrogate them via API interfaces. By using the FAIRDOMHub, researchers can achieve more effective exchange with geographically distributed collaborators during projects, ensure results are sustained and preserved and generate reproducible publications that adhere to the FAIR guiding principles of data stewardship.",2016-11-28 +27814676,Q-nexus: a comprehensive and efficient analysis pipeline designed for ChIP-nexus.,"

Background

ChIP-nexus, an extension of the ChIP-exo protocol, can be used to map the borders of protein-bound DNA sequences at nucleotide resolution, requires less input DNA and enables selective PCR duplicate removal using random barcodes. However, the use of random barcodes requires additional preprocessing of the mapping data, which complicates the computational analysis. To date, only a very limited number of software packages are available for the analysis of ChIP-exo data, which have not yet been systematically tested and compared on ChIP-nexus data.

Results

Here, we present a comprehensive software package for ChIP-nexus data that exploits the random barcodes for selective removal of PCR duplicates and for quality control. Furthermore, we developed bespoke methods to estimate the width of the protected region resulting from protein-DNA binding and to infer binding positions from ChIP-nexus data. Finally, we applied our peak calling method as well as the two other methods MACE and MACS2 to the available ChIP-nexus data.

Conclusions

The Q-nexus software is efficient and easy to use. Novel statistics about duplication rates in consideration of random barcodes are calculated. Our method for the estimation of the width of the protected region yields unbiased signatures that are highly reproducible for biological replicates and at the same time very specific for the respective factors analyzed. As judged by the irreproducible discovery rate (IDR), our peak calling algorithm shows a substantially better reproducibility. An implementation of Q-nexus is available at http://charite.github.io/Q/ .",2016-11-04 +28881874,Oncopression: gene expression compendium for cancer with matched normal tissues.,"

Motivation

Expression profile of normal tissue is primary source to find genes showing aberrant expression pattern specific in matched cancer tissue, but sample number of normal control in public gene expression repositories is disproportionally small compared to cancer and scattered in several datasets.

Results

We built oncopression by integrating several datasets into one large dataset for comprehensive analysis about 25 types of human cancers including 20 640 cancer samples and 6801 normal control profiles. Expression profiles in cancers can be directly compared to normal tissue counterparts. Validity of the integration was tested using immunohistochemical staining results and principal component analysis. We have utilized the pre-release version of oncopression to identify cancer-specific genes in several studies.

Availability and implementation

Free access at http://www.oncopression.com and all expression data are available for download at the site.

Contacts

cchoi@kaist.ac.kr or jungsullee@gmail.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +25717196,Automated benchmarking of peptide-MHC class I binding predictions.,"

Motivation

Numerous in silico methods predicting peptide binding to major histocompatibility complex (MHC) class I molecules have been developed over the last decades. However, the multitude of available prediction tools makes it non-trivial for the end-user to select which tool to use for a given task. To provide a solid basis on which to compare different prediction tools, we here describe a framework for the automated benchmarking of peptide-MHC class I binding prediction tools. The framework runs weekly benchmarks on data that are newly entered into the Immune Epitope Database (IEDB), giving the public access to frequent, up-to-date performance evaluations of all participating tools. To overcome potential selection bias in the data included in the IEDB, a strategy was implemented that suggests a set of peptides for which different prediction methods give divergent predictions as to their binding capability. Upon experimental binding validation, these peptides entered the benchmark study.

Results

The benchmark has run for 15 weeks and includes evaluation of 44 datasets covering 17 MHC alleles and more than 4000 peptide-MHC binding measurements. Inspection of the results allows the end-user to make educated selections between participating tools. Of the four participating servers, NetMHCpan performed the best, followed by ANN, SMM and finally ARB.

Availability and implementation

Up-to-date performance evaluations of each server can be found online at http://tools.iedb.org/auto_bench/mhci/weekly. All prediction tool developers are invited to participate in the benchmark. Sign-up instructions are available at http://tools.iedb.org/auto_bench/mhci/join.

Contact

mniel@cbs.dtu.dk or bpeters@liai.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-25 +28379338,CMDR based differential evolution identifies the epistatic interaction in genome-wide association studies.,"

Motivation

Detecting epistatic interactions in genome-wide association studies (GWAS) is a computational challenge. Such huge numbers of single-nucleotide polymorphism (SNP) combinations limit the some of the powerful algorithms to be applied to detect the potential epistasis in large-scale SNP datasets.

Approach

We propose a new algorithm which combines the differential evolution (DE) algorithm with a classification based multifactor-dimensionality reduction (CMDR), termed DECMDR. DECMDR uses the CMDR as a fitness measure to evaluate values of solutions in DE process for scanning the potential statistical epistasis in GWAS.

Results

The results indicated that DECMDR outperforms the existing algorithms in terms of detection success rate by the large simulation and real data obtained from the Wellcome Trust Case Control Consortium. For running time comparison, DECMDR can efficient to apply the CMDR to detect the significant association between cases and controls amongst all possible SNP combinations in GWAS.

Availability and implementation

DECMDR is freely available at https://goo.gl/p9sLuJ .

Contact

chuang@isu.edu.tw or e0955767257@yahoo.com.tw.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +26746786,"Integrating 400 million variants from 80,000 human samples with extensive annotations: towards a knowledge base to analyze disease cohorts.","

Background

Data from a plethora of high-throughput sequencing studies is readily available to researchers, providing genetic variants detected in a variety of healthy and disease populations. While each individual cohort helps gain insights into polymorphic and disease-associated variants, a joint perspective can be more powerful in identifying polymorphisms, rare variants, disease-associations, genetic burden, somatic variants, and disease mechanisms.

Description

We have set up a Reference Variant Store (RVS) containing variants observed in a number of large-scale sequencing efforts, such as 1000 Genomes, ExAC, Scripps Wellderly, UK10K; various genotyping studies; and disease association databases. RVS holds extensive annotations pertaining to affected genes, functional impacts, disease associations, and population frequencies. RVS currently stores 400 million distinct variants observed in more than 80,000 human samples.

Conclusions

RVS facilitates cross-study analysis to discover novel genetic risk factors, gene-disease associations, potential disease mechanisms, and actionable variants. Due to its large reference populations, RVS can also be employed for variant filtration and gene prioritization.

Availability

A web interface to public datasets and annotations in RVS is available at https://rvs.u.hpc.mssm.edu/.",2016-01-08 +28520890,VCF.Filter: interactive prioritization of disease-linked genetic variants from sequencing data.,"Next generation sequencing is widely used to link genetic variants to diseases, and it has massively accelerated the diagnosis and characterization of rare genetic diseases. After initial bioinformatic data processing, the interactive analysis of genome, exome, and panel sequencing data typically starts from lists of genetic variants in VCF format. Medical geneticists filter and annotate these lists to identify variants that may be relevant for the disease under investigation, or to select variants that are reported in a clinical diagnostics setting. We developed VCF.Filter to facilitate the search for disease-linked variants, providing a standalone Java program with a user-friendly interface for interactive variant filtering and annotation. VCF.Filter allows the user to define a broad range of filtering criteria through a graphical interface. Common workflows such as trio analysis and cohort-based filtering are pre-configured, and more complex analyses can be performed using VCF.Filter's support for custom annotations and filtering criteria. All filtering is documented in the results file, thus providing traceability of the interactive variant prioritization. VCF.Filter is an open source tool that is freely and openly available at http://vcffilter.rarediseases.at.",2017-07-01 +,Deconstructing the control of the spotted alfalfa aphid Therioaphis maculata,"The control of insect pests and other taxa may be a result of many factors that are difficult to separate and quantify. Introduced parasitoids, host plant resistance, pathogens and native predators led to the successful control of the spotted alfalfa aphid (SAA; Therioaphis maculata Monell) in California and elsewhere, although the relative contribution of each factor remained largely unknown. The relative contribution of each control factor was estimated using a weather‐driven physiologically‐based demographic system model consisting of alfalfa, SAA, its three exotic parasitoids [Aphelinus semiflavus Howard, Praon palitans Muesebeck and Trioxys complanatus (Quilis)], a native coccinellid beetle [Hippodamia convergens (Guérin‐Menéville)], a fungal pathogen [Erynia neoaphidis Remaudière & Hennebert (Zygomycetes: Entomophthorales)] and host plant resistance (HPR). Daily weather data for the period 1995–2006 from 142 locations in Arizona and California were used to drive the model. The factors were introduced to the model singly or in combination to assess their effects in suppressing simulated SAA populations using SAA‐days m⁻² year⁻¹ (i.e. density) as the metric of control. Data from selected runs were mapped using the geographic information system grass (http://grass.osgeo.org). The simulation data across all factor combinations, years and locations were summarized using linear multiple regression, with the dependent variable being log₁₀ SAA‐days m⁻² year⁻¹ and the independent variables being the presence–absence (0, 1) of the various factors and their interactions. Marginal analysis of the regression model (∂y/∂xᵢ) enabled separation of the average effects of the different factors (xᵢ) given the average effects of the other factors. Alone, each factor failed to control SAA, as did combinations of the parasitoids and coccinellid predation. Control was predicted across all ecological zones only when all mortality factors were included. The marginal analysis suggests that the order of importance of the mortality factors is HPR > coccinellid beetles > T. complanatus > P. palitans > A. semiflavus > the fungal pathogen. The variability of control by coccinellid beetles and the fungal pathogen was high and hence unreliable.",2013-08-01 +29358944,A Powerful Gene-Based Test Accommodating Common and Low-Frequency Variants to Detect Both Main Effects and Gene-Gene Interaction Effects in Case-Control Studies.,"Next-generation sequencing (NGS) has been widely used in genetic association studies to identify both common and rare variants associated with complex diseases. Various statistical association tests have been developed to analyze NGS data; however, most focus on identifying the marginal effects of a set of genetic variants on the disease. Only a few association tests for NGS data analysis have considered the interaction effects between genes. We developed three powerful gene-based gene-gene interaction tests for testing both the main effects and the interaction effects of common, low-frequency, and common with low-frequency variant pairs between two genes (the IGOF tests) in case-control studies using NGS data. We performed a comprehensive simulation study to verify that the proposed tests had appropriate type I error rates and significantly higher power than did other interaction tests for analyzing NGS data. The tests were applied to a whole-exome sequencing dataset for autism spectrum disorder (ASD) and the significant results were evaluated in another independent ASD cohort. The IGOF tests were implemented in C++ and are available at http://igof.sourceforge.net.",2017-01-01 +24849579,MaxLink: network-based prioritization of genes tightly linked to a disease seed set.,"

Unlabelled

MaxLink, a guilt-by-association network search algorithm, has been made available as a web resource and a stand-alone version. Based on a user-supplied list of query genes, MaxLink identifies and ranks genes that are tightly linked to the query list. This functionality can be used to predict potential disease genes from an initial set of genes with known association to a disease. The original algorithm, used to identify and rank novel genes potentially involved in cancer, has been updated to use a more statistically sound method for selection of candidate genes and made applicable to other areas than cancer. The algorithm has also been made faster by re-implementation in C++, and the Web site uses FunCoup 3.0 as the underlying network.

Availability and implementation

MaxLink is freely available at http://maxlink.sbc.su.se both as a web service and a stand-alone application for download.",2014-05-20 +26746174,Pleurochrysome: A Web Database of Pleurochrysis Transcripts and Orthologs Among Heterogeneous Algae.,"Pleurochrysis is a coccolithophorid genus, which belongs to the Coccolithales in the Haptophyta. The genus has been used extensively for biological research, together with Emiliania in the Isochrysidales, to understand distinctive features between the two coccolithophorid-including orders. However, molecular biological research on Pleurochrysis such as elucidation of the molecular mechanism behind coccolith formation has not made great progress at least in part because of lack of comprehensive gene information. To provide such information to the research community, we built an open web database, the Pleurochrysome (http://bioinf.mind.meiji.ac.jp/phapt/), which currently stores 9,023 unique gene sequences (designated as UNIGENEs) assembled from expressed sequence tag sequences of P. haptonemofera as core information. The UNIGENEs were annotated with gene sequences sharing significant homology, conserved domains, Gene Ontology, KEGG Orthology, predicted subcellular localization, open reading frames and orthologous relationship with genes of 10 other algal species, a cyanobacterium and the yeast Saccharomyces cerevisiae. This sequence and annotation information can be easily accessed via several search functions. Besides fundamental functions such as BLAST and keyword searches, this database also offers search functions to explore orthologous genes in the 12 organisms and to seek novel genes. The Pleurochrysome will promote molecular biological and phylogenetic research on coccolithophorids and other haptophytes by helping scientists mine data from the primary transcriptome of P. haptonemofera.",2016-01-07 +28127582,"Dataset on the absorption of PCDTBT:PC70BM layers and the electro-optical characteristics of air-stable, large-area PCDTBT:PC70BM-based polymer solar cell modules, deposited with a custom built slot-die coater.","The data presented in this article is related to the research article entitled ""Fabrication of air-stable, large-area, PCDTBT:PC70BM polymer solar cell modules using a custom built slot-die coater"" (D.I. Kutsarov, E. New, F. Bausi, A. Zoladek-Lemanczyk, F.A. Castro, S.R.P. Silva, 2016) [1]. The repository name and reference number for the raw data from the abovementioned publication can be found under: https://doi.org/10.15126/surreydata.00813106. In this data in brief article, additional information about the absorption properties of PCDTBT:PC70BM layers deposited from a 12.5 mg/ml and 15 mg/ml photoactive layer dispersion are shown. Additionally, the best and average J-V curves of single cells, fabricated from the 10 and 15 mg/ml dispersions, are presented.",2017-01-11 +25725060,"MnTEdb, a collective resource for mulberry transposable elements. ","Mulberry has been used as an economically important food crop for the domesticated silkworm for thousands of years, resulting in one of the oldest and well-known plant-herbivore interactions. The genome of Morus notabilis has now been sequenced and there is an opportunity to mine the transposable element (TE) data. To better understand the roles of TEs in structural, functional and evolutionary dynamics of the mulberry genome, a specific, comprehensive and user-friendly web-based database, MnTEdb, was constructed. It was built based on a detailed and accurate identification of all TEs in mulberry. A total of 5925 TEs belonging to 13 superfamilies and 1062 families were deposited in this database. MnTEdb enables users to search, browse and download the mulberry TE sequences. Meanwhile, data mining tools, including BLAST, GetORF, HMMER, Sequence Extractor and JBrowse were also integrated into MnTEdb. MnTEdb will assist researchers to efficiently take advantage of our newly annotated TEs, which facilitate their studies in the origin, amplification and evolution of TEs, as well as the comparative analysis among the different species. Database URL: http://morus.swu.edu.cn/mntedb/",2015-02-27 +27152122,Building a glaucoma interaction network using a text mining approach.,"

Background

The volume of biomedical literature and its underlying knowledge base is rapidly expanding, making it beyond the ability of a single human being to read through all the literature. Several automated methods have been developed to help make sense of this dilemma. The present study reports on the results of a text mining approach to extract gene interactions from the data warehouse of published experimental results which are then used to benchmark an interaction network associated with glaucoma. To the best of our knowledge, there is, as yet, no glaucoma interaction network derived solely from text mining approaches. The presence of such a network could provide a useful summative knowledge base to complement other forms of clinical information related to this disease.

Results

A glaucoma corpus was constructed from PubMed Central and a text mining approach was applied to extract genes and their relations from this corpus. The extracted relations between genes were checked using reference interaction databases and classified generally as known or new relations. The extracted genes and relations were then used to construct a glaucoma interaction network. Analysis of the resulting network indicated that it bears the characteristics of a small world interaction network. Our analysis showed the presence of seven glaucoma linked genes that defined the network modularity. A web-based system for browsing and visualizing the extracted glaucoma related interaction networks is made available at http://neurogene.spd.louisville.edu/GlaucomaINViewer/Form1.aspx.

Conclusions

This study has reported the first version of a glaucoma interaction network using a text mining approach. The power of such an approach is in its ability to cover a wide range of glaucoma related studies published over many years. Hence, a bigger picture of the disease can be established. To the best of our knowledge, this is the first glaucoma interaction network to summarize the known literature. The major findings were a set of relations that could not be found in existing interaction databases and that were found to be new, in addition to a smaller subnetwork consisting of interconnected clusters of seven glaucoma genes. Future improvements can be applied towards obtaining a better version of this network.",2016-05-05 +22110032,DAMPD: a manually curated antimicrobial peptide database.,"The demand for antimicrobial peptides (AMPs) is rising because of the increased occurrence of pathogens that are tolerant or resistant to conventional antibiotics. Since naturally occurring AMPs could serve as templates for the development of new anti-infectious agents to which pathogens are not resistant, a resource that contains relevant information on AMP is of great interest. To that extent, we developed the Dragon Antimicrobial Peptide Database (DAMPD, http://apps.sanbi.ac.za/dampd) that contains 1232 manually curated AMPs. DAMPD is an update and a replacement of the ANTIMIC database. In DAMPD an integrated interface allows in a simple fashion querying based on taxonomy, species, AMP family, citation, keywords and a combination of search terms and fields (Advanced Search). A number of tools such as Blast, ClustalW, HMMER, Hydrocalculator, SignalP, AMP predictor, as well as a number of other resources that provide additional information about the results are also provided and integrated into DAMPD to augment biological analysis of AMPs.",2011-11-21 +21267068,A high-resolution anatomical atlas of the transcriptome in the mouse embryo.,"Ascertaining when and where genes are expressed is of crucial importance to understanding or predicting the physiological role of genes and proteins and how they interact to form the complex networks that underlie organ development and function. It is, therefore, crucial to determine on a genome-wide level, the spatio-temporal gene expression profiles at cellular resolution. This information is provided by colorimetric RNA in situ hybridization that can elucidate expression of genes in their native context and does so at cellular resolution. We generated what is to our knowledge the first genome-wide transcriptome atlas by RNA in situ hybridization of an entire mammalian organism, the developing mouse at embryonic day 14.5. This digital transcriptome atlas, the Eurexpress atlas (http://www.eurexpress.org), consists of a searchable database of annotated images that can be interactively viewed. We generated anatomy-based expression profiles for over 18,000 coding genes and over 400 microRNAs. We identified 1,002 tissue-specific genes that are a source of novel tissue-specific markers for 37 different anatomical structures. The quality and the resolution of the data revealed novel molecular domains for several developing structures, such as the telencephalon, a novel organization for the hypothalamus, and insight on the Wnt network involved in renal epithelial differentiation during kidney development. The digital transcriptome atlas is a powerful resource to determine co-expression of genes, to identify cell populations and lineages, and to identify functional associations between genes relevant to development and disease.",2011-01-18 +29113560,Efficient RNA structure comparison algorithms.,"Recently proposed relative addressing-based ([Formula: see text]) RNA secondary structure representation has important features by which an RNA structure database can be stored into a suffix array. A fast substructure search algorithm has been proposed based on binary search on this suffix array. Using this substructure search algorithm, we present a fast algorithm that finds the largest common substructure of given multiple RNA structures in [Formula: see text] format. The multiple RNA structure comparison problem is NP-hard in its general formulation. We introduced a new problem for comparing multiple RNA structures. This problem has more strict similarity definition and objective, and we propose an algorithm that solves this problem efficiently. We also develop another comparison algorithm that iteratively calls this algorithm to locate nonoverlapping large common substructures in compared RNAs. With the new resulting tools, we improved the RNASSAC website (linked from http://faculty.tamuc.edu/aarslan ). This website now also includes two drawing tools: one specialized for preparing RNA substructures that can be used as input by the search tool, and another one for automatically drawing the entire RNA structure from a given structure sequence.",2017-10-19 +26590402,"EffectiveDB--updates and novel features for a better annotation of bacterial secreted proteins and Type III, IV, VI secretion systems.","Protein secretion systems play a key role in the interaction of bacteria and hosts. EffectiveDB (http://effectivedb.org) contains pre-calculated predictions of bacterial secreted proteins and of intact secretion systems. Here we describe a major update of the database, which was previously featured in the NAR Database Issue. EffectiveDB bundles various tools to recognize Type III secretion signals, conserved binding sites of Type III chaperones, Type IV secretion peptides, eukaryotic-like domains and subcellular targeting signals in the host. Beyond the analysis of arbitrary protein sequence collections, the new release of EffectiveDB also provides a 'genome-mode', in which protein sequences from nearly complete genomes or metagenomic bins can be screened for the presence of three important secretion systems (Type III, IV, VI). EffectiveDB contains pre-calculated predictions for currently 1677 bacterial genomes from the EggNOG 4.0 database and for additional bacterial genomes from NCBI RefSeq. The new, user-friendly and informative web portal offers a submission tool for running the EffectiveDB prediction tools on user-provided data.",2015-11-20 +25224438,TMDB: a literature-curated database for small molecular compounds found from tea.,"

Background

Tea is one of the most consumed beverages worldwide. The healthy effects of tea are attributed to a wealthy of different chemical components from tea. Thousands of studies on the chemical constituents of tea had been reported. However, data from these individual reports have not been collected into a single database. The lack of a curated database of related information limits research in this field, and thus a cohesive database system should necessarily be constructed for data deposit and further application.

Description

The Tea Metabolome database (TMDB), a manually curated and web-accessible database, was developed to provide detailed, searchable descriptions of small molecular compounds found in Camellia spp. esp. in the plant Camellia sinensis and compounds in its manufactured products (different kinds of tea infusion). TMDB is currently the most complete and comprehensive curated collection of tea compounds data in the world. It contains records for more than 1393 constituents found in tea with information gathered from 364 published books, journal articles, and electronic databases. It also contains experimental 1H NMR and 13C NMR data collected from the purified reference compounds or collected from other database resources such as HMDB. TMDB interface allows users to retrieve tea compounds entries by keyword search using compound name, formula, occurrence, and CAS register number. Each entry in the TMDB contains an average of 24 separate data fields including its original plant species, compound structure, formula, molecular weight, name, CAS registry number, compound types, compound uses including healthy benefits, reference literatures, NMR, MS data, and the corresponding ID from databases such as HMDB and Pubmed. Users can also contribute novel regulatory entries by using a web-based submission page. The TMDB database is freely accessible from the URL of http://pcsb.ahau.edu.cn:8080/TCDB/index.jsp. The TMDB is designed to address the broad needs of tea biochemists, natural products chemists, nutritionists, and members of tea related research community.

Conclusion

The TMDB database provides a solid platform for collection, standardization, and searching of compounds information found in tea. As such this database will be a comprehensive repository for tea biochemistry and tea health research community.",2014-09-16 +28991473,INTerface Builder: A Fast Protein-Protein Interface Reconstruction Tool.,"INTerface Builder (INTBuilder) is a fast, easy-to-use program to compute protein-protein interfaces. It is designed to retrieve interfaces from molecular docking software outputs in an empirically determined linear complexity. INTBuilder directly reads the output formats of popular docking programs like ATTRACT, HEX, MAXDo, and ZDOCK, as well as a more generic format and Protein Data Bank (PDB) files. It identifies interacting surfaces at both residue and atom resolutions. INTerface Builder is an open source software written in C and freely available for noncommercial use (CeCILL license) at https://www.lcqb.upmc.fr/INTBuilder .",2017-10-19 +27660812,Small punch tensile/fracture test data and 3D specimen surface data on Grade 91 ferritic/martensitic steel from cryogenic to room temperature.,"Raw data from small punch tensile/fracture tests at two displacement rates in the temperature range from -196 °C to room temperature on Grade 91 ferritic/martensitic steel are presented. A number of specimens were analyzed after testing by means of X-ray computed tomography (CT). Based on the CT volume data detailed 3D surface maps of the specimens were established. All data are open access and available from Online Data Information Network (ODIN)https://odin.jrc.ec.europa.eu. The data presented in the current work has been analyzed in the research article ""On the determination of the ductile to brittle transition temperature from small punch tests on Grade 91 ferritic-martensitic steel"" (M. Bruchhausen, S. Holmström, J.-M. Lapetite, S. Ripplinger, 2015) [1].",2016-09-03 +22538508,Computational structural analysis of proteins of Mycobacterium tuberculosis and a resource for identifying off-targets.,"Advancement in technology has helped to solve structures of several proteins including M. tuberculosis (MTB) proteins. Identifying similarity between protein structures could not only yield valuable clues to their function, but can also be employed for motif finding, protein docking and off-target identification. The current study has undertaken analysis of structures of all MTB gene products with available structures was analyzed. Majority of the MTB proteins belonged to the α/β class. 23 different protein folds are used in the MTB protein structures. Of these, the TIM barrel fold was found to be highly conserved even at very low sequence identity. We identified 21 paralogs and 27 analogs of MTB based on domains and EC classification. Our analysis revealed that many of the current drug targets share structural similarity with other proteins within the MTB genome, which could probably be off-targets. Results of this analysis have been made available in the Mycobacterium tuberculosis Structural Database (http://bmi.icmr.org.in/mtbsd/MtbSD.php/search.php) which is a useful resource for current and novel drug targets of MTB.",2012-04-27 +28711973,PIWI-interacting RNAs as novel regulators of pancreatic beta cell function.,"

Aims/hypothesis

P-element induced Wimpy testis (PIWI)-interacting RNAs (piRNAs) are small non-coding RNAs that interact with PIWI proteins and guide them to silence transposable elements. They are abundantly expressed in germline cells and play key roles in spermatogenesis. There is mounting evidence that piRNAs are also present in somatic cells, where they may accomplish additional regulatory tasks. The aim of this study was to identify the piRNAs expressed in pancreatic islets and to determine whether they are involved in the control of beta cell activities.

Methods

piRNA profiling of rat pancreatic islets was performed by microarray analysis. The functions of piRNAs were investigated by silencing the two main Piwi genes or by modulating the level of selected piRNAs in islet cells.

Results

We detected about 18,000 piRNAs in rat pancreatic islets, many of which were differentially expressed throughout islet postnatal development. Moreover, we identified changes in the level of several piRNAs in the islets of Goto-Kakizaki rats, a well-established animal model of type 2 diabetes. Silencing of Piwil2 or Piwil4 genes in adult rat islets caused a reduction in the level of several piRNAs and resulted in defective insulin secretion and increased resistance of the cells to cytokine-induced cell death. Furthermore, overexpression in the islets of control animals of two piRNAs that are upregulated in diabetic rats led to a selective defect in glucose-induced insulin release.

Conclusions/interpretation

Our results provide evidence for a role of PIWI proteins and their associated piRNAs in the control of beta cell functions, and suggest a possible involvement in the development of type 2 diabetes.

Data availability

Data have been deposited in Gene Expression Omnibus repository under the accession number GSE93792. Data can be accessed via the following link: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?token=ojklueugdzehpkv&acc=GSE93792.",2017-07-16 +29994452,HARD-PnP: PnP Optimization Using a Hybrid Approximate Representation.,"This paper proposes a Hybrid Approximate Representation (HAR) based on unifying several efficient approximations of the generalized reprojection error (which is known as the gold standard for multiview geometry). The HAR is an over-parameterization scheme where the approximation is applied simultaneously in multiple parameter spaces. A joint minimization scheme ""HAR-Descent"" can then solve the PnP problem efficiently, while remaining robust to approximation errors and local minima. The technique is evaluated extensively, including numerous synthetic benchmark protocols and the real-world data evaluations used in previous works. The proposed technique was found to have runtime complexity comparable to the fastest O(n) techniques, and up to 10 times faster than current state of the art minimization approaches. In addition, the accuracy exceeds that of all 9 previous techniques tested, providing definitive state of the art performance on the benchmarks, across all 90 of the experiments in the paper and supplementary material, which can be found on the Computer Society Digital Library at http://doi.ieeecomputersociety.org/10.1109/TPAMI.2018.2806446.",2018-02-15 +29955829,"""Do I Sound Straight?"": Acoustic Correlates of Actual and Perceived Sexual Orientation and Masculinity/Femininity in Men's Speech.","

Purpose

This study aims to give an integrative answer on which speech stereotypes exist toward German gay and straight men, whether and how acoustic correlates of actual and perceived sexual orientation are connected, and how this relates to masculinity/femininity. Hence, it tests speech stereotype accuracy in the context of sexual orientation.

Method

Twenty-five gay and 26 straight German speakers provided data for a fine-grained psychological self-assessment (e.g., masculinity/femininity) and explicit speech stereotypes. They were recorded for an extensive set of read and spontaneous speech samples using microphones and nasometry. Recordings were analyzed for a variety of acoustic parameters (e.g., fundamental frequency and nasalance). Seventy-four listeners categorized speakers as gay or straight on the basis of the same sentence.

Results

Most relevant explicitly expressed speech stereotypes encompass voice pitch, nasality, chromaticity, and smoothness. Demonstrating implicit stereotypes, speakers were perceived as sounding straighter, the lower their median f0, center of gravity in /s/, and mean F2. However, based on actual sexual orientation, straight men only showed lower mean F1 than gay men. Additionally, we found evidence that actual masculinity/femininity and the degree of sexual orientation were reflected in gay and straight men's speech.

Conclusion

Implicit and explicit speech stereotypes about gay and straight men do not contain a kernel of truth, and differences within groups are more important than differences between them.

Supplemental material

https://doi.org/10.23641/asha.6484001.",2018-07-01 +,Web-based networking of herbal gardens for exchange of planting material,"In recent years the demand for medicinal and aromatic plants has grown rapidly at national and international level because of it a vital role in supply nutrients as well as strengthen human’s various VITAL systems to protect from innumerous diseases. The collection, conservation and exchange of planting material of medicinal plants are some of the major problems in this sector. Taking these things into consideration, a web based network of Herbal Garden in India (HGI) has been designed and developed at Directorate of Medicinal and Aromatic Plants Research (DMAPR), Anand, Gujarat, India. HGI is a web based decision making system and provides information about the herbal gardens in India and facilitates the exchange of planting material. HGI has been developed using Tomcat6, Struts 2.0, JavaScript & MySQL and hosted at http://www.herbalgardenindia.org. System has been designed using modular approach and has separate modules for garden curators (nodal officers) and general users. Nodal officers have the right to insert, edit/update and delete data related to their respective gardens. General users have the flexibility to access the information about the herbal gardens of India and the system facilitate to approach nearby garden for the required planting material.",2014-04-01 +26337239,"Carbohydrate Structure Database: tools for statistical analysis of bacterial, plant and fungal glycomes. ","Carbohydrates are biological blocks participating in diverse and crucial processes both at cellular and organism levels. They protect individual cells, establish intracellular interactions, take part in the immune reaction and participate in many other processes. Glycosylation is considered as one of the most important modifications of proteins and other biologically active molecules. Still, the data on the enzymatic machinery involved in the carbohydrate synthesis and processing are scattered, and the advance on its study is hindered by the vast bulk of accumulated genetic information not supported by any experimental evidences for functions of proteins that are encoded by these genes. In this article, we present novel instruments for statistical analysis of glycomes in taxa. These tools may be helpful for investigating carbohydrate-related enzymatic activities in various groups of organisms and for comparison of their carbohydrate content. The instruments are developed on the Carbohydrate Structure Database (CSDB) platform and are available freely on the CSDB web-site at http://csdb.glycoscience.ru. Database URL: http://csdb.glycoscience.ru.",2015-09-03 +27603023,Using MetaboAnalyst 3.0 for Comprehensive Metabolomics Data Analysis.,"MetaboAnalyst (http://www.metaboanalyst.ca) is a comprehensive Web application for metabolomic data analysis and interpretation. MetaboAnalyst handles most of the common metabolomic data types from most kinds of metabolomics platforms (MS and NMR) for most kinds of metabolomics experiments (targeted, untargeted, quantitative). In addition to providing a variety of data processing and normalization procedures, MetaboAnalyst also supports a number of data analysis and data visualization tasks using a range of univariate, multivariate methods such as PCA (principal component analysis), PLS-DA (partial least squares discriminant analysis), heatmap clustering and machine learning methods. MetaboAnalyst also offers a variety of tools for metabolomic data interpretation including MSEA (metabolite set enrichment analysis), MetPA (metabolite pathway analysis), and biomarker selection via ROC (receiver operating characteristic) curve analysis, as well as time series and power analysis. This unit provides an overview of the main functional modules and the general workflow of the latest version of MetaboAnalyst (MetaboAnalyst 3.0), followed by eight detailed protocols. © 2016 by John Wiley & Sons, Inc.",2016-09-07 +30478944,"Beyond the traditional simulation design for evaluating type 1 error control: From the ""theoretical"" null to ""empirical"" null.","When evaluating a newly developed statistical test, an important step is to check its type 1 error (T1E) control using simulations. This is often achieved by the standard simulation design S0 under the so-called ""theoretical"" null of no association. In practice, the whole-genome association analyses scan through a large number of genetic markers ( G s) for the ones associated with an outcome of interest ( Y ), where Y comes from an alternative while the majority of G s are not associated with Y ; the Y - G relationships are under the ""empirical"" null. This reality can be better represented by two other simulation designs, where design S1.1 simulates Y from analternative model based on G , then evaluates its association with independently generated G n e w ; while design S1.2 evaluates the association between permutated Y and G . More than a decade ago, Efron (2004) has noted the important distinction between the ""theoretical"" and ""empirical"" null in false discovery rate control. Using scale tests for variance heterogeneity, direct univariate, and multivariate interaction tests as examples, here we show that not all null simulation designs are equal. In examining the accuracy of a likelihood ratio test, while simulation design S0 suggested the method being accurate, designs S1.1 and S1.2 revealed its increased empirical T1E rate if applied in real data setting. The inflation becomes more severe at the tail and does not diminish as sample size increases. This is an important observation that calls for new practices for methods evaluation and T1E control interpretation.",2018-11-26 +26540668,A Dataset for Breast Cancer Histopathological Image Classification.,"Today, medical image analysis papers require solid experiments to prove the usefulness of proposed methods. However, experiments are often performed on data selected by the researchers, which may come from different institutions, scanners, and populations. Different evaluation measures may be used, making it difficult to compare the methods. In this paper, we introduce a dataset of 7909 breast cancer histopathology images acquired on 82 patients, which is now publicly available from http://web.inf.ufpr.br/vri/breast-cancer-database. The dataset includes both benign and malignant images. The task associated with this dataset is the automated classification of these images in two classes, which would be a valuable computer-aided diagnosis tool for the clinician. In order to assess the difficulty of this task, we show some preliminary results obtained with state-of-the-art image classification systems. The accuracy ranges from 80% to 85%, showing room for improvement is left. By providing this dataset and a standardized evaluation protocol to the scientific community, we hope to gather researchers in both the medical and the machine learning field to advance toward this clinical application.",2015-10-30 +26576653,JSpeciesWS: a web server for prokaryotic species circumscription based on pairwise genome comparison.,"

Unlabelled

JSpecies Web Server (JSpeciesWS) is a user-friendly online service for in silico calculating the extent of identity between two genomes, a parameter routinely used in the process of polyphasic microbial species circumscription. The service measures the average nucleotide identity (ANI) based on BLAST+ (ANIb) and MUMmer (ANIm), as well as correlation indexes of tetra-nucleotide signatures (Tetra). In addition, it provides a Tetra Correlation Search function, which allows to rapidly compare selected genomes against a continuously updated reference database with currently about 32 000 published whole and draft genome sequences. For comparison, own genomes can be uploaded and references can be selected from the JSpeciesWS reference database. The service indicates whether two genomes share genomic identities above or below the species embracing thresholds, and serves as a fast way to allocate unknown genomes in the frame of the hitherto sequenced species.

Availability and implementation

JSpeciesWS is available at http://jspecies.ribohost.com/jspeciesws

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

mrichter@ribocon.com.",2015-11-16 +30154828,A New Machine Learning-Based Framework for Mapping Uncertainty Analysis in RNA-Seq Read Alignment and Gene Expression Estimation.,"One of the main benefits of using modern RNA-Sequencing (RNA-Seq) technology is the more accurate gene expression estimations compared with previous generations of expression data, such as the microarray. However, numerous issues can result in the possibility that an RNA-Seq read can be mapped to multiple locations on the reference genome with the same alignment scores, which occurs in plant, animal, and metagenome samples. Such a read is so-called a multiple-mapping read (MMR). The impact of these MMRs is reflected in gene expression estimation and all downstream analyses, including differential gene expression, functional enrichment, etc. Current analysis pipelines lack the tools to effectively test the reliability of gene expression estimations, thus are incapable of ensuring the validity of all downstream analyses. Our investigation into 95 RNA-Seq datasets from seven plant and animal species (totaling 1,951 GB) indicates an average of roughly 22% of all reads are MMRs. Here we present a machine learning-based tool called GeneQC (Gene expression Quality Control), which can accurately estimate the reliability of each gene's expression level derived from an RNA-Seq dataset. The underlying algorithm is designed based on extracted genomic and transcriptomic features, which are then combined using elastic-net regularization and mixture model fitting to provide a clearer picture of mapping uncertainty for each gene. GeneQC allows researchers to determine reliable expression estimations and conduct further analysis on the gene expression that is of sufficient quality. This tool also enables researchers to investigate continued re-alignment methods to determine more accurate gene expression estimates for those with low reliability. Application of GeneQC reveals high level of mapping uncertainty in plant samples and limited, severe mapping uncertainty in animal samples. GeneQC is freely available at http://bmbl.sdstate.edu/GeneQC/home.html.",2018-08-14 +28858827,A Method for Identifying Prevalent Chemical Combinations in the U.S. Population.,"

Background

Through the food and water they ingest, the air they breathe, and the consumer products with which they interact at home and at work, humans are exposed to tens of thousands of chemicals, many of which have not been evaluated to determine their potential toxicities. Furthermore, while current chemical testing tends to focus on individual chemicals, the exposures that people actually experience involve mixtures of chemicals. Unfortunately, the number of mixtures that can be formed from the thousands of environmental chemicals is enormous, and testing all of them would be impossible.

Objectives

We seek to develop and demonstrate a method for identifying those mixtures that are most prevalent in humans.

Methods

We applied frequent itemset mining, a technique traditionally used for market basket analysis, to biomonitoring data from the 2009-2010 cycle of the continuous National Health and Nutrition Examination Survey (NHANES) to identify combinations of chemicals that frequently co-occur in people.

Results

We identified 90 chemical combinations consisting of relatively few chemicals that occur in at least 30% of the U.S. population, as well as three supercombinations consisting of relatively many chemicals that occur in a small but nonnegligible proportion of the U.S. population.

Conclusions

We demonstrated how FIM can be used in conjunction with biomonitoring data to narrow a large number of possible chemical combinations down to a smaller set of prevalent chemical combinations. https://doi.org/10.1289/EHP1265.",2017-08-24 +26499107,SMPD1 Mutation Update: Database and Comprehensive Analysis of Published and Novel Variants.,"Niemann-Pick Types A and B (NPA/B) diseases are autosomal recessive lysosomal storage disorders caused by the deficient activity of acid sphingomyelinase (ASM) because of the mutations in the SMPD1 gene. Here, we provide a comprehensive updated review of already reported and newly identified SMPD1 variants. Among them, 185 have been found in NPA/B patients. Disease-causing variants are equally distributed along the SMPD1 gene; most of them are missense (65.4%) or frameshift (19%) mutations. The most frequently reported mutation worldwide is the p.R610del, clearly associated with an attenuated NP disease type B phenotype. The available information about the impact of 52 SMPD1 variants on ASM mRNA and/or enzymatic activity has been collected and whenever possible, phenotype/genotype correlations were established. In addition, we created a locus-specific database easily accessible at http://www.inpdr.org/genes that catalogs the 417 SMPD1 variants reported to date and provides data on their in silico predicted effects on ASM protein function or mRNA splicing. The information reviewed in this article, providing new insights into the genotype/phenotype correlation, is extremely valuable to facilitate diagnosis and genetic counseling of families affected by NPA/B.",2015-12-01 +28605458,Prioritizing tests of epistasis through hierarchical representation of genomic redundancies.,"Epistasis is defined as a statistical interaction between two or more genomic loci in terms of their association with a phenotype of interest. Epistatic loci that are identified using data from Genome-Wide Association Studies (GWAS) provide insights into the interplay among multiple genetic factors, with applications including assessment of susceptibility to complex diseases, decision making in precision medicine, and gaining insights into disease mechanisms. Since the number of genomic loci assayed by GWAS is extremely large (usually in the order of millions), identification of epistatic loci is a statistically difficult and computationally intensive problem. Even when only pairwise interactions are considered, the size of the search space ranges from hundreds of millions to billions of locus pairs. The large number of statistical tests performed also makes sufficient type one error correction imperative. Consequently, efficient algorithms are required to filter the tests that are performed and evaluate large GWAS data sets in a reasonable amount of computation time. It has been observed that many pairwise tests are redundant due to correlations in their genotype values across samples, known as linkage disequilibrium. However, algorithms that have been developed for efficient identification of epistatic loci do not systematically exploit linkage disequilibrium. Here, we propose a new algorithm for fast epistasis detection based on hierarchical representation of linkage disequilibrium (LinDen). We utilize redundancies in genotype patterns between neighboring loci to generate a hierarchical structure and execute a branch-and-bound search to prioritize loci testing based on approximations of a test statistic for pairs of locus groups. The hierarchical organization of tests performed by LinDen allows for efficient scaling based on the screened loci. We test LinDen comprehensively on three data sets obtained from the Wellcome Trust Case Control Consortium: type two diabetes, psoriasis, and hypertension. Our results show that, as compared other state-of-the-art tools for fast epistasis detection, LinDen drastically reduces the number of tests performed while discovering statistically significant locus pairs. LinDen is implemented in C++ and is available as open source at http://compbio.

Case

edu/linden/.",2017-08-01 +26895996,[Cystic Fibrosis Cloud database: An information system for storage and management of clinical and microbiological data of cystic fibrosis patients].,"The epidemiological and clinical management of cystic fibrosis (CF) patients suffering from acute pulmonary exacerbations or chronic lung infections demands continuous updating of medical and microbiological processes associated with the constant evolution of pathogens during host colonization. In order to monitor the dynamics of these processes, it is essential to have expert systems capable of storing and subsequently extracting the information generated from different studies of the patients and microorganisms isolated from them. In this work we have designed and developed an on-line database based on an information system that allows to store, manage and visualize data from clinical studies and microbiological analysis of bacteria obtained from the respiratory tract of patients suffering from cystic fibrosis. The information system, named Cystic Fibrosis Cloud database is available on the http://servoy.infocomsa.com/cfc_database site and is composed of a main database and a web-based interface, which uses Servoy's product architecture based on Java technology. Although the CFC database system can be implemented as a local program for private use in CF centers, it can also be used, updated and shared by different users who can access the stored information in a systematic, practical and safe manner. The implementation of the CFC database could have a significant impact on the monitoring of respiratory infections, the prevention of exacerbations, the detection of emerging organisms, and the adequacy of control strategies for lung infections in CF patients.",2016-01-01 +23879659,Reannotation and extended community resources for the genome of the non-seed plant Physcomitrella patens provide insights into the evolution of plant gene structures and functions.,"

Background

The moss Physcomitrella patens as a model species provides an important reference for early-diverging lineages of plants and the release of the genome in 2008 opened the doors to genome-wide studies. The usability of a reference genome greatly depends on the quality of the annotation and the availability of centralized community resources. Therefore, in the light of accumulating evidence for missing genes, fragmentary gene structures, false annotations and a low rate of functional annotations on the original release, we decided to improve the moss genome annotation.

Results

Here, we report the complete moss genome re-annotation (designated V1.6) incorporating the increased transcript availability from a multitude of developmental stages and tissue types. We demonstrate the utility of the improved P. patens genome annotation for comparative genomics and new extensions to the cosmoss.org resource as a central repository for this plant ""flagship"" genome. The structural annotation of 32,275 protein-coding genes results in 8387 additional loci including 1456 loci with known protein domains or homologs in Plantae. This is the first release to include information on transcript isoforms, suggesting alternative splicing events for at least 10.8% of the loci. Furthermore, this release now also provides information on non-protein-coding loci. Functional annotations were improved regarding quality and coverage, resulting in 58% annotated loci (previously: 41%) that comprise also 7200 additional loci with GO annotations. Access and manual curation of the functional and structural genome annotation is provided via the http://www.cosmoss.org model organism database.

Conclusions

Comparative analysis of gene structure evolution along the green plant lineage provides novel insights, such as a comparatively high number of loci with 5'-UTR introns in the moss. Comparative analysis of functional annotations reveals expansions of moss house-keeping and metabolic genes and further possibly adaptive, lineage-specific expansions and gains including at least 13% orphan genes.",2013-07-23 +26891066,Evaluation of state-of-the-art segmentation algorithms for left ventricle infarct from late Gadolinium enhancement MR images.,"Studies have demonstrated the feasibility of late Gadolinium enhancement (LGE) cardiovascular magnetic resonance (CMR) imaging for guiding the management of patients with sequelae to myocardial infarction, such as ventricular tachycardia and heart failure. Clinical implementation of these developments necessitates a reproducible and reliable segmentation of the infarcted regions. It is challenging to compare new algorithms for infarct segmentation in the left ventricle (LV) with existing algorithms. Benchmarking datasets with evaluation strategies are much needed to facilitate comparison. This manuscript presents a benchmarking evaluation framework for future algorithms that segment infarct from LGE CMR of the LV. The image database consists of 30 LGE CMR images of both humans and pigs that were acquired from two separate imaging centres. A consensus ground truth was obtained for all data using maximum likelihood estimation. Six widely-used fixed-thresholding methods and five recently developed algorithms are tested on the benchmarking framework. Results demonstrate that the algorithms have better overlap with the consensus ground truth than most of the n-SD fixed-thresholding methods, with the exception of the Full-Width-at-Half-Maximum (FWHM) fixed-thresholding method. Some of the pitfalls of fixed thresholding methods are demonstrated in this work. The benchmarking evaluation framework, which is a contribution of this work, can be used to test and benchmark future algorithms that detect and quantify infarct in LGE CMR images of the LV. The datasets, ground truth and evaluation code have been made publicly available through the website: https://www.cardiacatlas.org/web/guest/challenges.",2016-01-28 +26626150,MERAV: a tool for comparing gene expression across human tissues and cell types.,"The oncogenic transformation of normal cells into malignant, rapidly proliferating cells requires major alterations in cell physiology. For example, the transformed cells remodel their metabolic processes to supply the additional demand for cellular building blocks. We have recently demonstrated essential metabolic processes in tumor progression through the development of a methodological analysis of gene expression. Here, we present the Metabolic gEne RApid Visualizer (MERAV, http://merav.wi.mit.edu), a web-based tool that can query a database comprising ∼4300 microarrays, representing human gene expression in normal tissues, cancer cell lines and primary tumors. MERAV has been designed as a powerful tool for whole genome analysis which offers multiple advantages: one can search many genes in parallel; compare gene expression among different tissue types as well as between normal and cancer cells; download raw data; and generate heatmaps; and finally, use its internal statistical tool. Most importantly, MERAV has been designed as a unique tool for analyzing metabolic processes as it includes matrixes specifically focused on metabolic genes and is linked to the Kyoto Encyclopedia of Genes and Genomes pathway search.",2015-11-30 +28506212,GUIDEseq: a bioconductor package to analyze GUIDE-Seq datasets for CRISPR-Cas nucleases.,"

Background

Genome editing technologies developed around the CRISPR-Cas9 nuclease system have facilitated the investigation of a broad range of biological questions. These nucleases also hold tremendous promise for treating a variety of genetic disorders. In the context of their therapeutic application, it is important to identify the spectrum of genomic sequences that are cleaved by a candidate nuclease when programmed with a particular guide RNA, as well as the cleavage efficiency of these sites. Powerful new experimental approaches, such as GUIDE-seq, facilitate the sensitive, unbiased genome-wide detection of nuclease cleavage sites within the genome. Flexible bioinformatics analysis tools for processing GUIDE-seq data are needed.

Results

Here, we describe an open source, open development software suite, GUIDEseq, for GUIDE-seq data analysis and annotation as a Bioconductor package in R. The GUIDEseq package provides a flexible platform with more than 60 adjustable parameters for the analysis of datasets associated with custom nuclease applications. These parameters allow data analysis to be tailored to different nuclease platforms with different length and complexity in their guide and PAM recognition sequences or their DNA cleavage position. They also enable users to customize sequence aggregation criteria, and vary peak calling thresholds that can influence the number of potential off-target sites recovered. GUIDEseq also annotates potential off-target sites that overlap with genes based on genome annotation information, as these may be the most important off-target sites for further characterization. In addition, GUIDEseq enables the comparison and visualization of off-target site overlap between different datasets for a rapid comparison of different nuclease configurations or experimental conditions. For each identified off-target, the GUIDEseq package outputs mapped GUIDE-Seq read count as well as cleavage score from a user specified off-target cleavage score prediction algorithm permitting the identification of genomic sequences with unexpected cleavage activity.

Conclusion

The GUIDEseq package enables analysis of GUIDE-data from various nuclease platforms for any species with a defined genomic sequence. This software package has been used successfully to analyze several GUIDE-seq datasets. The software, source code and documentation are freely available at http://www.bioconductor.org/packages/release/bioc/html/GUIDEseq.html .",2017-05-15 +22430798,MethLAB: a graphical user interface package for the analysis of array-based DNA methylation data.,"Recent evidence suggests that DNA methylation changes may underlie numerous complex traits and diseases. The advent of commercial, array-based methods to interrogate DNA methylation has led to a profusion of epigenetic studies in the literature. Array-based methods, such as the popular Illumina GoldenGate and Infinium platforms, estimate the proportion of DNA methylated at single-base resolution for thousands of CpG sites across the genome. These arrays generate enormous amounts of data, but few software resources exist for efficient and flexible analysis of these data. We developed a software package called MethLAB (http://genetics.emory.edu/conneely/MethLAB) using R, an open source statistical language that can be edited to suit the needs of the user. MethLAB features a graphical user interface (GUI) with a menu-driven format designed to efficiently read in and manipulate array-based methylation data in a user-friendly manner. MethLAB tests for association between methylation and relevant phenotypes by fitting a separate linear model for each CpG site. These models can incorporate both continuous and categorical phenotypes and covariates, as well as fixed or random batch or chip effects. MethLAB accounts for multiple testing by controlling the false discovery rate (FDR) at a user-specified level. Standard output includes a spreadsheet-ready text file and an array of publication-quality figures. Considering the growing interest in and availability of DNA methylation data, there is a great need for user-friendly open source analytical tools. With MethLAB, we present a timely resource that will allow users with no programming experience to implement flexible and powerful analyses of DNA methylation data.",2012-03-01 +29420694,L1000FWD: fireworks visualization of drug-induced transcriptomic signatures.,"

Motivation

As part of the NIH Library of Integrated Network-based Cellular Signatures program, hundreds of thousands of transcriptomic signatures were generated with the L1000 technology, profiling the response of human cell lines to over 20 000 small molecule compounds. This effort is a promising approach toward revealing the mechanisms-of-action (MOA) for marketed drugs and other less studied potential therapeutic compounds.

Results

L1000 fireworks display (L1000FWD) is a web application that provides interactive visualization of over 16 000 drug and small-molecule induced gene expression signatures. L1000FWD enables coloring of signatures by different attributes such as cell type, time point, concentration, as well as drug attributes such as MOA and clinical phase. Signature similarity search is implemented to enable the search for mimicking or opposing signatures given as input of up and down gene sets. Each point on the L1000FWD interactive map is linked to a signature landing page, which provides multifaceted knowledge from various sources about the signature and the drug. Notably such information includes most frequent diagnoses, co-prescribed drugs and age distribution of prescriptions as extracted from the Mount Sinai Health System electronic medical records. Overall, L1000FWD serves as a platform for identifying functions for novel small molecules using unsupervised clustering, as well as for exploring drug MOA.

Availability and implementation

L1000FWD is freely accessible at: http://amp.pharm.mssm.edu/L1000FWD.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-06-01 +28172632,SEQualyzer: interactive tool for quality control and exploratory analysis of high-throughput RNA structural profiling data.,"

Summary

To serve numerous functional roles, RNA must fold into specific structures. Determining these structures is thus of paramount importance. The recent advent of high-throughput sequencing-based structure profiling experiments has provided important insights into RNA structure and widened the scope of RNA studies. However, as a broad range of approaches continues to emerge, a universal framework is needed to quantitatively ensure consistent and high-quality data. We present SEQualyzer, a visual and interactive application that makes it easy and efficient to gauge data quality, screen for transcripts with high-quality information and identify discordant replicates in structure profiling experiments. Our methods rely on features common to a wide range of protocols and can serve as standards for quality control and analyses.

Availability and implementation

SEQualyzer is written in R, is platform-independent, and is freely available at http://bme.ucdavis.edu/aviranlab/SEQualyzer.

Contact

saviran@ucdavis.edu

Supplementary informantion

Supplementary data are available at Bioinformatics online.",2017-02-01 +27375472,NeuroPigPen: A Scalable Toolkit for Processing Electrophysiological Signal Data in Neuroscience Applications Using Apache Pig.,"The recent advances in neurological imaging and sensing technologies have led to rapid increase in the volume, rate of data generation, and variety of neuroscience data. This ""neuroscience Big data"" represents a significant opportunity for the biomedical research community to design experiments using data with greater timescale, large number of attributes, and statistically significant data size. The results from these new data-driven research techniques can advance our understanding of complex neurological disorders, help model long-term effects of brain injuries, and provide new insights into dynamics of brain networks. However, many existing neuroinformatics data processing and analysis tools were not built to manage large volume of data, which makes it difficult for researchers to effectively leverage this available data to advance their research. We introduce a new toolkit called NeuroPigPen that was developed using Apache Hadoop and Pig data flow language to address the challenges posed by large-scale electrophysiological signal data. NeuroPigPen is a modular toolkit that can process large volumes of electrophysiological signal data, such as Electroencephalogram (EEG), Electrocardiogram (ECG), and blood oxygen levels (SpO2), using a new distributed storage model called Cloudwave Signal Format (CSF) that supports easy partitioning and storage of signal data on commodity hardware. NeuroPigPen was developed with three design principles: (a) Scalability-the ability to efficiently process increasing volumes of data; (b) Adaptability-the toolkit can be deployed across different computing configurations; and (c) Ease of programming-the toolkit can be easily used to compose multi-step data processing pipelines using high-level programming constructs. The NeuroPigPen toolkit was evaluated using 750 GB of electrophysiological signal data over a variety of Hadoop cluster configurations ranging from 3 to 30 Data nodes. The evaluation results demonstrate that the toolkit is highly scalable and adaptable, which makes it suitable for use in neuroscience applications as a scalable data processing toolkit. As part of the ongoing extension of NeuroPigPen, we are developing new modules to support statistical functions to analyze signal data for brain connectivity research. In addition, the toolkit is being extended to allow integration with scientific workflow systems. NeuroPigPen is released under BSD license at: https://sites.google.com/a/case.edu/neuropigpen/.",2016-06-06 +25352573,Development of the ECODAB into a relational database for Escherichia coli O-antigens and other bacterial polysaccharides.,"Escherichia coli O-antigen database (ECODAB) is a web-based application to support the collection of E. coli O-antigen structures, polymerase and flippase amino acid sequences, NMR chemical shift data of O-antigens as well as information on glycosyltransferases (GTs) involved in the assembly of O-antigen polysaccharides. The database content has been compiled from scientific literature. Furthermore, the system has evolved from being a repository to one that can be used for generating novel data on its own. GT specificity is suggested through sequence comparison with GTs whose function is known. The migration of ECODAB to a relational database has allowed the automation of all processes to update, retrieve and present information, thereby, endowing the system with greater flexibility and improved overall performance. ECODAB is freely available at http://www.casper.organ.su.se/ECODAB/. Currently, data on 169 E. coli unique O-antigen entries and 338 GTs is covered. Moreover, the scope of the database has been extended so that polysaccharide structure and related information from other bacteria subsequently can be added, for example, from Streptococcus pneumoniae.",2014-10-28 +29668671,Propagating annotations of molecular networks using in silico fragmentation.,"The annotation of small molecules is one of the most challenging and important steps in untargeted mass spectrometry analysis, as most of our biological interpretations rely on structural annotations. Molecular networking has emerged as a structured way to organize and mine data from untargeted tandem mass spectrometry (MS/MS) experiments and has been widely applied to propagate annotations. However, propagation is done through manual inspection of MS/MS spectra connected in the spectral networks and is only possible when a reference library spectrum is available. One of the alternative approaches used to annotate an unknown fragmentation mass spectrum is through the use of in silico predictions. One of the challenges of in silico annotation is the uncertainty around the correct structure among the predicted candidate lists. Here we show how molecular networking can be used to improve the accuracy of in silico predictions through propagation of structural annotations, even when there is no match to a MS/MS spectrum in spectral libraries. This is accomplished through creating a network consensus of re-ranked structural candidates using the molecular network topology and structural similarity to improve in silico annotations. The Network Annotation Propagation (NAP) tool is accessible through the GNPS web-platform https://gnps.ucsd.edu/ProteoSAFe/static/gnps-theoretical.jsp.",2018-04-18 +26819881,Rethinking Mass Spectrometry-Based Small Molecule Identification Strategies in Metabolomics.,"The CASMI 2013 (Critical Assessment of Small Molecule Identification 2013, http://casmi-contest.org/) contest was held to systematically evaluate strategies used for mass spectrometry-based identification of small molecules. The results of the contest highlight that, because of the extensive efforts made towards the construction of databases and search tools, database-assisted small molecule identification can now automatically annotate some metabolite signals found in the metabolome data. In this commentary, the current state of metabolite annotation is compared with that of transcriptomics and proteomics. The comparison suggested that certain limitations in the metabolite annotation process need to be addressed, such as (i) the completeness of the database, (ii) the conversion between raw data and structure, (iii) the one-to-one correspondence between measured data and correct search results, and (iv) the false discovery rate in database search results.",2014-08-16 +26450949,ProPepper: a curated database for identification and analysis of peptide and immune-responsive epitope composition of cereal grain protein families. ,"ProPepper is a database that contains prolamin proteins identified from true grasses (Poaceae), their peptides obtained with single- and multi-enzyme in silico digestions as well as linear T- and B-cell-specific epitopes that are responsible for wheat-related food disorders. The integrated database and analysis platform contains datasets that are collected from multiple public databases (UniprotKB, IEDB, NCBI GenBank), manually curated and annotated, and interpreted in three main data tables: Protein-, Peptide- and Epitope list views that are cross-connected by unique identifications. Altogether 21 genera and 80 different species are represented. Currently, the database contains 2146 unique and complete protein sequences related to 2618 GenBank entries and 35 657 unique peptide sequences that are a result of 575 110 unique digestion events obtained by in silico digestion methods involving six proteolytic enzymes and their combinations. The interface allows advanced global and parametric search functions along with a download option, with direct connections to the relevant public databases. Database URL: https://propepper.net.",2015-10-08 +26615197,StreptomeDB 2.0--an extended resource of natural products produced by streptomycetes.,"Over the last decades, the genus Streptomyces has stirred huge interest in the scientific community as a source of bioactive compounds. The majority of all known antibiotics is isolated from these bacterial strains, as well as a variety of other drugs such as antitumor agents, immunosuppressants and antifungals. To the best of our knowledge, StreptomeDB was the first database focusing on compounds produced by streptomycetes. The new version presented herein represents a major step forward: its content has been increased to over 4000 compounds and more than 2500 host organisms. In addition, we have extended the background information and included hundreds of new manually curated references to literature. The latest update features a unique scaffold-based navigation system, which enables the exploration of the chemical diversity of StreptomeDB on a structural basis. We have included a phylogenetic tree, based on 16S rRNA sequences, which comprises more than two-thirds of the included host organisms. It enables visualizing the frequency, appearance, and persistence of compounds and scaffolds in an evolutionary context. Additionally, we have included predicted MS- and NMR-spectra of thousands of compounds for assignment of experimental data. The database is freely accessible via http://www.pharmaceutical-bioinformatics.org/streptomedb.",2015-11-28 +26980512,ncRNA orthologies in the vertebrate lineage. ,"Annotation of orthologous and paralogous genes is necessary for many aspects of evolutionary analysis. Methods to infer these homology relationships have traditionally focused on protein-coding genes and evolutionary models used by these methods normally assume the positions in the protein evolve independently. However, as our appreciation for the roles of non-coding RNA genes has increased, consistently annotated sets of orthologous and paralogous ncRNA genes are increasingly needed. At the same time, methods such as PHASE or RAxML have implemented substitution models that consider pairs of sites to enable proper modelling of the loops and other features of RNA secondary structure. Here, we present a comprehensive analysis pipeline for the automatic detection of orthologues and paralogues for ncRNA genes. We focus on gene families represented in Rfam and for which a specific covariance model is provided. For each family ncRNA genes found in all Ensembl species are aligned using Infernal, and several trees are built using different substitution models. In parallel, a genomic alignment that includes the ncRNA genes and their flanking sequence regions is built with PRANK. This alignment is used to create two additional phylogenetic trees using the neighbour-joining (NJ) and maximum-likelihood (ML) methods. The trees arising from both the ncRNA and genomic alignments are merged using TreeBeST, which reconciles them with the species tree in order to identify speciation and duplication events. The final tree is used to infer the orthologues and paralogues following Fitch's definition. We also determine gene gain and loss events for each family using CAFE. All data are accessible through the Ensembl Comparative Genomics ('Compara') API, on our FTP site and are fully integrated in the Ensembl genome browser, where they can be accessed in a user-friendly manner. Database URL: http://www.ensembl.org.",2016-03-15 +27678076,A drug target slim: using gene ontology and gene ontology annotations to navigate protein-ligand target space in ChEMBL.,"

Background

The process of discovering new drugs is a lengthy, time-consuming and expensive process. Modern day drug discovery relies heavily on the rapid identification of novel 'targets', usually proteins that can be modulated by small molecule drugs to cure or minimise the effects of a disease. Of the 20,000 proteins currently reported as comprising the human proteome, just under a quarter of these can potentially be modulated by known small molecules Storing information in curated, actively maintained drug discovery databases can help researchers access current drug discovery information quickly. However with the increase in the amount of data generated from both experimental and in silico efforts, databases can become very large very quickly and information retrieval from them can become a challenge. The development of database tools that facilitate rapid information retrieval is important to keep up with the growth of databases.

Description

We have developed a Gene Ontology-based navigation tool (Gene Ontology Tree) to help users retrieve biological information to single protein targets in the ChEMBL drug discovery database. 99 % of single protein targets in ChEMBL have at least one GO annotation associated with them. There are 12,500 GO terms associated to 6200 protein targets in the ChEMBL database resulting in a total of 140,000 annotations. The slim we have created, the 'ChEMBL protein target slim' allows broad categorisation of the biology of 90 % of the protein targets using just 300 high level, informative GO terms. We used the GO slim method of assigning fewer higher level GO groupings to numerous very specific lower level terms derived from the GOA to describe a set of GO terms relevant to proteins in ChEMBL. We then used the slim created to provide a web based tool that allows a quick and easy navigation of protein target space. Terms from the GO are used to capture information on protein molecular function, biological process and subcellular localisations. The ChEMBL database also provides compound information for small molecules that have been tested for their effects on these protein targets. The 'ChEMBL protein target slim' provides a means of firstly describing the biology of protein drug targets and secondly allows users to easily establish a connection between biological and chemical information regarding drugs and drug targets in ChEMBL. The 'ChEMBL protein target slim' is available as a browsable 'Gene Ontology Tree' on the ChEMBL site under the browse targets tab ( https://www.ebi.ac.uk/chembl/target/browser ). A ChEMBL protein target slim OBO file containing the GO slim terms pertinent to ChEMBL is available from the GOC website ( http://geneontology.org/page/go-slim-and-subset-guide ).

Conclusions

We have created a protein target navigation tool based on the 'ChEMBL protein target slim'. The 'ChEMBL protein target slim' provides a way of browsing protein targets in ChEMBL using high level GO terms that describe the molecular functions, processes and subcellular localisations of protein drug targets in drug discovery. The tool also allows user to establish a link between ontological groupings representing protein target biology to relevant compound information in ChEMBL. We have demonstrated by the use of a simple example how the 'ChEMBL protein target slim' can be used to link biological processes with drug information based on the information in the ChEMBL database. The tool has potential to aid in areas of drug discovery such as drug repurposing studies or drug-disease-protein pathways.",2016-09-27 +26555441,PharmDB-K: Integrated Bio-Pharmacological Network Database for Traditional Korean Medicine.,"Despite the growing attention given to Traditional Medicine (TM) worldwide, there is no well-known, publicly available, integrated bio-pharmacological Traditional Korean Medicine (TKM) database for researchers in drug discovery. In this study, we have constructed PharmDB-K, which offers comprehensive information relating to TKM-associated drugs (compound), disease indication, and protein relationships. To explore the underlying molecular interaction of TKM, we integrated fourteen different databases, six Pharmacopoeias, and literature, and established a massive bio-pharmacological network for TKM and experimentally validated some cases predicted from the PharmDB-K analyses. Currently, PharmDB-K contains information about 262 TKMs, 7,815 drugs, 3,721 diseases, 32,373 proteins, and 1,887 side effects. One of the unique sets of information in PharmDB-K includes 400 indicator compounds used for standardization of herbal medicine. Furthermore, we are operating PharmDB-K via phExplorer (a network visualization software) and BioMart (a data federation framework) for convenient search and analysis of the TKM network. Database URL: http://pharmdb-k.org, http://biomart.i-pharm.org.",2015-11-10 +28146597,Using Plan-Do-Study-Act Cycle to Enhance Completeness of Suicide Firearm Reporting.,"The Rhode Island Violent Death Reporting System (RIVDRS) collects comprehensive surveillance data on violent deaths to support violence prevention programs in Rhode Island and nationwide. Successful collection of firearm information is critical to understanding gun violence in public health. A recent quality improvement (QI) project was performed to improve gun information collection in the RIVDRS program. Our aim was to increase the presence of firearm model information for 2014 suicides from 50% to 80% by December 31, 2015. We used the 2014 RIVDRS data and the Plan-Do-Study-Act cycle for this project. Our efforts achieved a 50% increase in the number of firearm model reporting. If we work more closely with police departments, they may understand the data importance, and be more likely to include the firearm information in their reports. We describe this process and provide lessons learned that can be generalizable to other states' violent death reporting system. [Full article available at http://rimed.org/rimedicaljournal-2017-02.asp].",2017-02-01 +30565770,Smoking during pregnancy increases chemerin expression in neonatal tissue.,"

New findings

What is the central question of this study? Is chemerin, an adipokine implicated in obesity, increased in neonates following in utero cigarette smoke exposure. What is the main finding and its importance? Chemerin mRNA expression was increased and chemerin DNA methylation was decreased in babies born to mothers who smoked during pregnancy. These data provide a potential mechanism that may be mediating the increased obesity risk in individuals that are born to mothers who smoked during pregnancy.

Abstract

It has been shown that in utero tobacco exposure increases offspring risk for obesity, but the mechanisms responsible for this increased risk are not well understood. Chemerin is an adipokine that regulates adipocyte differentiation. This chemokine is elevated in obese individuals and with smoke exposure, but its levels have not been measured in neonates exposed to cigarette smoke in utero. We examined chemerin gene expression [n = 31 non-smoker (NS) and 15 smoker (S)] and DNA methylation (n = 28 NS and n = 11 S) in skin collected from babies born to mothers who smoked during pregnancy as compared to non-smoking controls. Quality RNA and DNA were isolated from foreskin tissue following circumcision, and chemerin gene expression and DNA methylation were assessed. Further, in a second cohort, we utilized primary dermal foreskin fibroblasts as a functional measure of adipogenesis in living cells (n = 11 NS and n = 8 S). Cells were stimulated with an adipogenic cocktail, mRNA was isolated from cells after 14 days, and chemerin gene expression assessed via real-time PCR. Chemerin mRNA was elevated in both whole tissue (NS: 2409.20 ± 555.28 counts and S: 2966.72 ± 636.84 counts; P < 0.01) and primary fibroblasts (NS: 1.12 ± 0.55 2 Δ Δ C T and S: 2.13 ± 1.34 2 Δ Δ C T ; P = 0.04) collected from infants born to smoking mothers. Chemerin DNA methylation was reduced in whole tissue of offspring born to smokers (NS: 4.18 ± 1.28 and S: 3.07 ± 1.31%; P = 0.02), which may contribute to the increased gene expression. Neonates born to mothers who smoke during pregnancy exhibit distinct changes in chemerin gene expression in response to in utero tobacco smoke exposure which are regulated in part by epigenetic alterations.",2018-11-22 +24861615,PIQMIe: a web server for semi-quantitative proteomics data management and analysis.,"We present the Proteomics Identifications and Quantitations Data Management and Integration Service or PIQMIe that aids in reliable and scalable data management, analysis and visualization of semi-quantitative mass spectrometry based proteomics experiments. PIQMIe readily integrates peptide and (non-redundant) protein identifications and quantitations from multiple experiments with additional biological information on the protein entries, and makes the linked data available in the form of a light-weight relational database, which enables dedicated data analyses (e.g. in R) and user-driven queries. Using the web interface, users are presented with a concise summary of their proteomics experiments in numerical and graphical forms, as well as with a searchable protein grid and interactive visualization tools to aid in the rapid assessment of the experiments and in the identification of proteins of interest. The web server not only provides data access through a web interface but also supports programmatic access through RESTful web service. The web server is available at http://piqmie.semiqprot-emc.cloudlet.sara.nl or http://www.bioinformatics.nl/piqmie. This website is free and open to all users and there is no login requirement.",2014-05-26 +29186344,MOCASSIN-prot: a multi-objective clustering approach for protein similarity networks.,"Motivation:Proteins often include multiple conserved domains. Various evolutionary events including duplication and loss of domains, domain shuffling, as well as sequence divergence contribute to generating complexities in protein structures, and consequently, in their functions. The evolutionary history of proteins is hence best modeled through networks that incorporate information both from the sequence divergence and the domain content. Here, a game-theoretic approach proposed for protein network construction is adapted into the framework of multi-objective optimization, and extended to incorporate clustering refinement procedure. Results:The new method, MOCASSIN-prot, was applied to cluster multi-domain proteins from ten genomes. The performance of MOCASSIN-prot was compared against two protein clustering methods, Markov clustering (TRIBE-MCL) and spectral clustering (SCPS). We showed that compared to these two methods, MOCASSIN-prot, which uses both domain composition and quantitative sequence similarity information, generates fewer false positives. It achieves more functionally coherent protein clusters and better differentiates protein families. Availability and implementation:MOCASSIN-prot, implemented in Perl and Matlab, is freely available at http://bioinfolab.unl.edu/emlab/MOCASSINprot. Contact:emoriyama2@unl.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +29843589,RnaSeqSampleSize: real data based sample size estimation for RNA sequencing.,"

Background

One of the most important and often neglected components of a successful RNA sequencing (RNA-Seq) experiment is sample size estimation. A few negative binomial model-based methods have been developed to estimate sample size based on the parameters of a single gene. However, thousands of genes are quantified and tested for differential expression simultaneously in RNA-Seq experiments. Thus, additional issues should be carefully addressed, including the false discovery rate for multiple statistic tests, widely distributed read counts and dispersions for different genes.

Results

To solve these issues, we developed a sample size and power estimation method named RnaSeqSampleSize, based on the distributions of gene average read counts and dispersions estimated from real RNA-seq data. Datasets from previous, similar experiments such as the Cancer Genome Atlas (TCGA) can be used as a point of reference. Read counts and their dispersions were estimated from the reference's distribution; using that information, we estimated and summarized the power and sample size. RnaSeqSampleSize is implemented in R language and can be installed from Bioconductor website. A user friendly web graphic interface is provided at http://cqs.mc.vanderbilt.edu/shiny/RnaSeqSampleSize/ .

Conclusions

RnaSeqSampleSize provides a convenient and powerful way for power and sample size estimation for an RNAseq experiment. It is also equipped with several unique features, including estimation for interested genes or pathway, power curve visualization, and parameter optimization.",2018-05-30 +26194460,Online combination algorithm for non-invasive assessment of chronic hepatitis B related liver fibrosis and cirrhosis in resource-limited settings.,"

Objective

The use of commercially available noninvasive markers for chronic hepatitis B (CHB) related fibrosis is not widely available in developing countries so clinicians in those countries frequently use free alternatives. We aimed to create an optimized algorithm for selection of patients with the highest probability for presence/absence of significant liver fibrosis and cirrhosis based on the use of multiple free scores.

Methods

We evaluated six free noninvasive markers for CHB related fibrosis against liver biopsy and selected the best thresholds for prediction/exclusion of significant fibrosis and cirrhosis in CHB patients. Algorithm based on four scores and their corresponding thresholds was created.

Results

The calculator based on developed algorithm can be found at http://www.chb-lfc.com. We evaluated 211 patients in main group and 65 patients in external validation group. We selected four scores for creation of combination algorithm. The algorithm was able to classify 123/211 (58.3%) patients with a 93.5% accuracy of correct classification for prediction of presence/absence of significant fibrosis in main group. In validation group, the algorithm was able to classify 48/65 (73.8%) of patients with 93.8% (45/48) overall accuracy. When used to predict presence/absence of cirrhosis, the algorithm was able to correctly classify 181/211 (85.8%) and 59/65 (90.8%) of patients in main and validation group, respectively, with an overall accuracy of 97.8% and 98.3%, respectively.

Conclusion

Developed algorithm based on routine laboratory tests is a usable, applicable and accurate tool for diagnosis of CHB related fibrosis and cirrhosis, suitable for resource-limited settings where more expensive modalities are unavailable.",2015-07-18 +25393771,Computational survey of sequence specificity for protein terminal tags covering nine organisms and its application to protein identification.,"In 1998, Wilkins et al. (J. Mol. Biol. 1998, 278, 599-608) reported high specificity in terminal regions (terminal tags) of 15 519 proteins from five organisms and proposed a methodology for identifying proteins by terminal tags. However, their examined sequence data were not based on complete genome sequences. Here, we examined current proteome data (217 249 entries from UniProt 2013_6 complete/reference proteome for nine organisms including human) in terms of the specificity of terminal tags and their computational annotation. One example from the results indicated that the specificity of N-terminal tags plateaued at 28% at a length of six residues for human; even when using both N- and C-terminal tags, specificity was merely 66%. In order to determine the cause of these low specificities, the annotation of proteins sharing terminal tags with other proteins was examined. The results suggested that a large majority were phylogenetically or functionally related, whereas nonrelated proteins sharing terminal tags made up less than 1% of human proteome data. On the basis of these findings, we constructed the terminal tag sequence database ProteinCarta (http://ms3d.jp/software/proteincarta/), which includes all terminal tags of proteomes from the nine organisms analyzed here, in order to confirm the specificity of terminal tags and to identify the parent protein.",2014-12-02 +23658685,Generation and analysis of the expressed sequence tags from the mycelium of Ganoderma lucidum.,"Ganoderma lucidum (G. lucidum) is a medicinal mushroom renowned in East Asia for its potential biological effects. To enable a systematic exploration of the genes associated with the various phenotypes of the fungus, the genome consortium of G. lucidum has carried out an expressed sequence tag (EST) sequencing project. Using a Sanger sequencing based approach, 47,285 ESTs were obtained from in vitro cultures of G. lucidum mycelium of various durations. These ESTs were further clustered and merged into 7,774 non-redundant expressed loci. The features of these expressed contigs were explored in terms of over-representation, alternative splicing, and natural antisense transcripts. Our results provide an invaluable information resource for exploring the G. lucidum transcriptome and its regulation. Many cases of the genes over-represented in fast-growing dikaryotic mycelium are closely related to growth, such as cell wall and bioactive compound synthesis. In addition, the EST-genome alignments containing putative cassette exons and retained introns were manually curated and then used to make inferences about the predominating splice-site recognition mechanism of G. lucidum. Moreover, a number of putative antisense transcripts have been pinpointed, from which we noticed that two cases are likely to reveal hitherto undiscovered biological pathways. To allow users to access the data and the initial analysis of the results of this project, a dedicated web site has been created at http://csb2.ym.edu.tw/est/.",2013-05-02 +21959865,NetSlim: high-confidence curated signaling maps.,"We previously developed NetPath as a resource for comprehensive manually curated signal transduction pathways. The pathways in NetPath contain a large number of molecules and reactions which can sometimes be difficult to visualize or interpret given their complexity. To overcome this potential limitation, we have developed a set of more stringent curation and inclusion criteria for pathway reactions to generate high-confidence signaling maps. NetSlim is a new resource that contains this 'core' subset of reactions for each pathway for easy visualization and manipulation. The pathways in NetSlim are freely available at http://www.netpath.org/netslim.",2011-09-29 +29981671,Evaluation of P-POSSUM Risk Scoring System in Prediction of Morbidity and Mortality after Pancreaticoduodenectomy.,"Background: POSSUM and P-POSSUM are risk scores recommended by ERAS Society for the preoperative evaluation of patients undergoing major surgery. Methods: This study includes 113 consecutive pancreaticoduodenectomy performed in a single centre between July 2013-December 2015. Patients data were prospectively collected using Excel 2009 and retrospectively analysed with R v3.2.4 software. Biological status score, surgical severity score and risk scores for complications and death were calculated using: http://www.riskprediction. org.uk/index-pp.php. Results: Morbidity rate was 61,95%: 19,47% general complications, 14,16% wound infections and 28,32% PD specific complications (11,5% POPF; 8,85% DGE and 6,19% PPH). Comparing the observed and estimated morbidity and mortality, we obtained statistical significant results (p=0,05 and p=0,03, respectivelly). When we considered only specific PD complications and subsequent mortality, there was no longer significant difference between observed and estimated values (p=0,8 and p=0,86).The under ROC curve aria was 0,61 for morbidity and 0,64 for specific PD morbidity, respectively 0,61 for mortality and 0,68 for specific PD complications related mortality.

Conclusion

P-POSSUM represents a useful tool for appreciating the complication and death risk after PD, but better results could be obtain by considering also specific PD risk factors.",2018-05-01 +29106451,pLoc-mHum: predict subcellular localization of multi-location human proteins via general PseAAC to winnow out the crucial GO information.,"

Motivation

For in-depth understanding the functions of proteins in a cell, the knowledge of their subcellular localization is indispensable. The current study is focused on human protein subcellular location prediction based on the sequence information alone. Although considerable efforts have been made in this regard, the problem is far from being solved yet. Most existing methods can be used to deal with single-location proteins only. Actually, proteins with multi-locations may have some special biological functions that are particularly important for both basic research and drug design.

Results

Using the multi-label theory, we present a new predictor called 'pLoc-mHum' by extracting the crucial GO (Gene Ontology) information into the general PseAAC (Pseudo Amino Acid Composition). Rigorous cross-validations on a same stringent benchmark dataset have indicated that the proposed pLoc-mHum predictor is remarkably superior to iLoc-Hum, the state-of-the-art method in predicting the human protein subcellular localization.

Availability and implementation

To maximize the convenience of most experimental scientists, a user-friendly web-server for the new predictor has been established at http://www.jci-bioinfo.cn/pLoc-mHum/, by which users can easily get their desired results without the need to go through the complicated mathematics involved.

Contact

xcheng@gordonlifescience.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-05-01 +26708333,simDEF: definition-based semantic similarity measure of gene ontology terms for functional similarity analysis of genes.,"

Motivation

Measures of protein functional similarity are essential tools for function prediction, evaluation of protein-protein interactions (PPIs) and other applications. Several existing methods perform comparisons between proteins based on the semantic similarity of their GO terms; however, these measures are highly sensitive to modifications in the topological structure of GO, tend to be focused on specific analytical tasks and concentrate on the GO terms themselves rather than considering their textual definitions.

Results

We introduce simDEF, an efficient method for measuring semantic similarity of GO terms using their GO definitions, which is based on the Gloss Vector measure commonly used in natural language processing. The simDEF approach builds optimized definition vectors for all relevant GO terms, and expresses the similarity of a pair of proteins as the cosine of the angle between their definition vectors. Relative to existing similarity measures, when validated on a yeast reference database, simDEF improves correlation with sequence homology by up to 50%, shows a correlation improvement >4% with gene expression in the biological process hierarchy of GO and increases PPI predictability by > 2.5% in F1 score for molecular function hierarchy.

Availability and implementation

Datasets, results and source code are available at http://kiwi.cs.dal.ca/Software/simDEF CONTACT: ahmad.pgh@dal.ca or beiko@cs.dal.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-26 +30092815,IDTAXA: a novel approach for accurate taxonomic classification of microbiome sequences.,"

Background

Microbiome studies often involve sequencing a marker gene to identify the microorganisms in samples of interest. Sequence classification is a critical component of this process, whereby sequences are assigned to a reference taxonomy containing known sequence representatives of many microbial groups. Previous studies have shown that existing classification programs often assign sequences to reference groups even if they belong to novel taxonomic groups that are absent from the reference taxonomy. This high rate of ""over classification"" is particularly detrimental in microbiome studies because reference taxonomies are far from comprehensive.

Results

Here, we introduce IDTAXA, a novel approach to taxonomic classification that employs principles from machine learning to reduce over classification errors. Using multiple reference taxonomies, we demonstrate that IDTAXA has higher accuracy than popular classifiers such as BLAST, MAPSeq, QIIME, SINTAX, SPINGO, and the RDP Classifier. Similarly, IDTAXA yields far fewer over classifications on Illumina mock microbial community data when the expected taxa are absent from the training set. Furthermore, IDTAXA offers many practical advantages over other classifiers, such as maintaining low error rates across varying input sequence lengths and withholding classifications from input sequences composed of random nucleotides or repeats.

Conclusions

IDTAXA's classifications may lead to different conclusions in microbiome studies because of the substantially reduced number of taxa that are incorrectly identified through over classification. Although misclassification error is relatively minor, we believe that many remaining misclassifications are likely caused by errors in the reference taxonomy. We describe how IDTAXA is able to identify many putative mislabeling errors in reference taxonomies, enabling training sets to be automatically corrected by eliminating spurious sequences. IDTAXA is part of the DECIPHER package for the R programming language, available through the Bioconductor repository or accessible online ( http://DECIPHER.codes ).",2018-08-09 +28960548,Integrative structure modeling with the Integrative Modeling Platform.,"Building models of a biological system that are consistent with the myriad data available is one of the key challenges in biology. Modeling the structure and dynamics of macromolecular assemblies, for example, can give insights into how biological systems work, evolved, might be controlled, and even designed. Integrative structure modeling casts the building of structural models as a computational optimization problem, for which information about the assembly is encoded into a scoring function that evaluates candidate models. Here, we describe our open source software suite for integrative structure modeling, Integrative Modeling Platform (https://integrativemodeling.org), and demonstrate its use.",2017-10-10 +27978814,SwitchFinder - a novel method and query facility for discovering dynamic gene expression patterns.,"

Background

Biological systems and processes are highly dynamic. To gain insights into their functioning time-resolved measurements are necessary. Time-resolved gene expression data captures temporal behaviour of the genes genome-wide under various biological conditions: in response to stimuli, during cell cycle, differentiation or developmental programs. Dissecting dynamic gene expression patterns from this data may shed light on the functioning of the gene regulatory system. The present approach facilitates this discovery. The fundamental idea behind it is the following: there are change-points (switches) in the gene behaviour separating intervals of increasing and decreasing activity, whereas the intervals may have different durations. Elucidating the switch-points is important for the identification of biologically meanigfull features and patterns of the gene dynamics.

Results

We developed a statistical method, called SwitchFinder, for the analysis of time-series data, in particular gene expression data, based on a change-point model. Fitting the model to the gene expression time-courses indicates switch-points between increasing and decreasing activities of each gene. Two types of the model - based on linear and on generalized logistic function - were used to capture the data between the switch-points. Model inference was facilitated with the Bayesian methodology using Markov chain Monte Carlo (MCMC) technique Gibbs sampling. Further on, we introduced features of the switch-points: growth, decay, spike and cleft, which reflect important dynamic aspects. With this, the gene expression profiles are represented in a qualitative manner - as sets of the dynamic features at their onset-times. We developed a Web application of the approach, enabling to put queries to the gene expression time-courses and to deduce groups of genes with common dynamic patterns. SwitchFinder was applied to our original data - the gene expression time-series measured in neuroblastoma cell line upon treatment with all-trans retinoic acid (ATRA). The analysis revealed eight patterns of the gene expression responses to ATRA, indicating the induction of the BMP, WNT, Notch, FGF and NTRK-receptor signaling pathways involved in cell differentiation, as well as the repression of the cell-cycle related genes.

Conclusions

SwitchFinder is a novel approach to the analysis of biological time-series data, supporting inference and interactive exploration of its inherent dynamic patterns, hence facilitating biological discovery process. SwitchFinder is freely available at https://newbioinformatics.eu/switchfinder.",2016-12-15 +29390075,IW-Scoring: an Integrative Weighted Scoring framework for annotating and prioritizing genetic variations in the noncoding genome.,"The vast majority of germline and somatic variations occur in the noncoding part of the genome, only a small fraction of which are believed to be functional. From the tens of thousands of noncoding variations detectable in each genome, identifying and prioritizing driver candidates with putative functional significance is challenging. To address this, we implemented IW-Scoring, a new Integrative Weighted Scoring model to annotate and prioritise functionally relevant noncoding variations. We evaluate 11 scoring methods, and apply an unsupervised spectral approach for subsequent selective integration into two linear weighted functional scoring schemas for known and novel variations. IW-Scoring produces stable high-quality performance as the best predictors for three independent data sets. We demonstrate the robustness of IW-Scoring in identifying recurrent functional mutations in the TERT promoter, as well as disease SNPs in proximity to consensus motifs and with gene regulatory effects. Using follicular lymphoma as a paradigmatic cancer model, we apply IW-Scoring to locate 11 recurrently mutated noncoding regions in 14 follicular lymphoma genomes, and validate 9 of these regions in an extension cohort, including the promoter and enhancer regions of PAX5. Overall, IW-Scoring demonstrates greater versatility in identifying trait- and disease-associated noncoding variants. Scores from IW-Scoring as well as other methods are freely available from http://www.snp-nexus.org/IW-Scoring/.",2018-05-01 +26553811,PASS2 database for the structure-based sequence alignment of distantly related SCOP domain superfamilies: update to version 5 and added features.,"Structure-based sequence alignment is an essential step in assessing and analysing the relationship of distantly related proteins. PASS2 is a database that records such alignments for protein domain superfamilies and has been constantly updated periodically. This update of the PASS2 version, named as PASS2.5, directly corresponds to the SCOPe 2.04 release. All SCOPe structural domains that share less than 40% sequence identity, as defined by the ASTRAL compendium of protein structures, are included. The current version includes 1977 superfamilies and has been assembled utilizing the structure-based sequence alignment protocol. Such an alignment is obtained initially through MATT, followed by a refinement through the COMPARER program. The JOY program has been used for structural annotations of such alignments. In this update, we have automated the protocol and focused on inclusion of new features such as mapping of GO terms, absolutely conserved residues among the domains in a superfamily and inclusion of PDBs, that are absent in SCOPe 2.04, using the HMM profiles from the alignments of the superfamily members and are provided as a separate list. We have also implemented a more user-friendly manner of data presentation and options for downloading more features. PASS2.5 version is available at http://caps.ncbs.res.in/pass2/.",2015-11-08 +29194474,Power Analysis for Genetic Association Test (PAGEANT) provides insights to challenges for rare variant association studies.,"

Motivation

Genome-wide association studies are now shifting focus from analysis of common to rare variants. As power for association testing for individual rare variants may often be low, various aggregate level association tests have been proposed to detect genetic loci. Typically, power calculations for such tests require specification of large number of parameters, including effect sizes and allele frequencies of individual variants, making them difficult to use in practice. We propose to approximate power to a varying degree of accuracy using a smaller number of key parameters, including the total genetic variance explained by multiple variants within a locus.

Results

We perform extensive simulation studies to assess the accuracy of the proposed approximations in realistic settings. Using these simplified power calculations, we develop an analytic framework to obtain bounds on genetic architecture of an underlying trait given results from genome-wide association studies with rare variants. Finally, we provide insights into the required quality of annotation/functional information for identification of likely causal variants to make meaningful improvement in power.

Availability and implementation

A shiny application that allows a variety of Power Analysis of GEnetic AssociatioN Tests (PAGEANT), in R is made publicly available at https://andrewhaoyu.shinyapps.io/PAGEANT/.

Contact

nilanjan@jhu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-05-01 +24948611,Lynx web services for annotations and systems analysis of multi-gene disorders.,"Lynx is a web-based integrated systems biology platform that supports annotation and analysis of experimental data and generation of weighted hypotheses on molecular mechanisms contributing to human phenotypes and disorders of interest. Lynx has integrated multiple classes of biomedical data (genomic, proteomic, pathways, phenotypic, toxicogenomic, contextual and others) from various public databases as well as manually curated data from our group and collaborators (LynxKB). Lynx provides tools for gene list enrichment analysis using multiple functional annotations and network-based gene prioritization. Lynx provides access to the integrated database and the analytical tools via REST based Web Services (http://lynx.ci.uchicago.edu/webservices.html). This comprises data retrieval services for specific functional annotations, services to search across the complete LynxKB (powered by Lucene), and services to access the analytical tools built within the Lynx platform.",2014-06-19 +26181593,"Identification of Ohnolog Genes Originating from Whole Genome Duplication in Early Vertebrates, Based on Synteny Comparison across Multiple Genomes.","Whole genome duplications (WGD) have now been firmly established in all major eukaryotic kingdoms. In particular, all vertebrates descend from two rounds of WGDs, that occurred in their jawless ancestor some 500 MY ago. Paralogs retained from WGD, also coined 'ohnologs' after Susumu Ohno, have been shown to be typically associated with development, signaling and gene regulation. Ohnologs, which amount to about 20 to 35% of genes in the human genome, have also been shown to be prone to dominant deleterious mutations and frequently implicated in cancer and genetic diseases. Hence, identifying ohnologs is central to better understand the evolution of vertebrates and their susceptibility to genetic diseases. Early computational analyses to identify vertebrate ohnologs relied on content-based synteny comparisons between the human genome and a single invertebrate outgroup genome or within the human genome itself. These approaches are thus limited by lineage specific rearrangements in individual genomes. We report, in this study, the identification of vertebrate ohnologs based on the quantitative assessment and integration of synteny conservation between six amniote vertebrates and six invertebrate outgroups. Such a synteny comparison across multiple genomes is shown to enhance the statistical power of ohnolog identification in vertebrates compared to earlier approaches, by overcoming lineage specific genome rearrangements. Ohnolog gene families can be browsed and downloaded for three statistical confidence levels or recompiled for specific, user-defined, significance criteria at http://ohnologs.curie.fr/. In the light of the importance of WGD on the genetic makeup of vertebrates, our analysis provides a useful resource for researchers interested in gaining further insights on vertebrate evolution and genetic diseases.",2015-07-16 +25270639,BioVLAB-MMIA-NGS: microRNA-mRNA integrated analysis using high-throughput sequencing data.,"

Motivation

It is now well established that microRNAs (miRNAs) play a critical role in regulating gene expression in a sequence-specific manner, and genome-wide efforts are underway to predict known and novel miRNA targets. However, the integrated miRNA-mRNA analysis remains a major computational challenge, requiring powerful informatics systems and bioinformatics expertise.

Results

The objective of this study was to modify our widely recognized Web server for the integrated mRNA-miRNA analysis (MMIA) and its subsequent deployment on the Amazon cloud (BioVLAB-MMIA) to be compatible with high-throughput platforms, including next-generation sequencing (NGS) data (e.g. RNA-seq). We developed a new version called the BioVLAB-MMIA-NGS, deployed on both Amazon cloud and on a high-performance publicly available server called MAHA. By using NGS data and integrating various bioinformatics tools and databases, BioVLAB-MMIA-NGS offers several advantages. First, sequencing data is more accurate than array-based methods for determining miRNA expression levels. Second, potential novel miRNAs can be detected by using various computational methods for characterizing miRNAs. Third, because miRNA-mediated gene regulation is due to hybridization of an miRNA to its target mRNA, sequencing data can be used to identify many-to-many relationship between miRNAs and target genes with high accuracy.

Availability and implementation

http://epigenomics.snu.ac.kr/biovlab_mmia_ngs/.",2014-09-29 +28398289,A high-throughput molecular data resource for cutaneous neurofibromas.,"Neurofibromatosis type 1 (NF1) is a genetic disorder with a range of clinical manifestations such as widespread growth of benign tumours called neurofibromas, pain, learning disorders, bone deformities, vascular abnormalities and even malignant tumours. With the establishment of the Children's Tumour Foundation biobank, neurofibroma samples can now be collected directly from patients to be analysed by the larger scientific community. This work describes a pilot study to characterize one class of neurofibroma, cutaneous neurofibromas, by molecularly profiling of ~40 cutaneous neurofibromas collected from 11 individual patients. Data collected from each tumour includes (1) SNP Arrays, (2) Whole genome sequencing (WGS) and (3) RNA-Sequencing. These data are now freely available for further analysis at http://www.synapse.org/cutaneousNF.",2017-04-11 +29949748,An in-silico method for identifying aggregation rate enhancer and mitigator mutations in proteins.,"Newly synthesized polypeptides must pass stringent quality controls in cells to ensure appropriate folding and function. However, mutations, environmental stresses and aging can reduce efficiencies of these controls, leading to accumulation of protein aggregates, amyloid fibrils and plaques. In-vitro experiments have shown that even single amino acid substitutions can drastically enhance or mitigate protein aggregation kinetics. In this work, we have collected a dataset of 220 unique mutations in 25 proteins and classified them as enhancers or mitigators on the basis of their effect on protein aggregation rate. The data were analyzed via machine learning to identify features capable of distinguishing between aggregation rate enhancers and mitigators. Our initial Support Vector Machine (SVM) model separated such mutations with an overall accuracy of 69%. When local secondary structures at the mutation sites were considered, the accuracies further improved by 13-15%. The machine-learnt features are distinct for each secondary structure class at mutation sites. Protein stability and flexibility changes are important features for mutations in α-helices. β-strand propensity, polarity and charge become important when mutations occur in β-strands and ability to form secondary structure, helical tendency and aggregation propensity are important for mutations lying in coils. These results have been incorporated into a sequence-based algorithm (available at http://www.iitm.ac.in/bioinfo/aggrerate-disc/) capable of predicting whether a mutation will enhance or mitigate a protein's aggregation rate. This algorithm will find several applications towards understanding protein aggregation in human diseases, enable in-silico optimization of biopharmaceuticals and enzymes for improved biophysical attributes and de novo design of bio-nanomaterials.",2018-06-24 +27747841,The efficacy of exercise in preventing injury in adult male football: a systematic review of randomised controlled trials.,"

Background

Injury prevention measures might reduce the impact of injury on footballers and football clubs. Increasing research has evaluated the use of exercise for injury prevention. However, research has focused on adolescent females. No high-quality systematic reviews have evaluated the efficacy of all forms of exercise on preventing injury in adult male football.

Objective

Our objective was to conduct a systematic review to evaluate the efficacy of exercise in preventing injury in adult male football.

Data sources

Comprehensive searches of electronic databases CINAHL (Cumulative Index to Nursing and Allied Health Literature), MEDLINE, Embase, AMED (The Allied and Complementary Medicine Database), the Cochrane Central Register of Controlled Trials, PEDro (The Physiotherapy Evidence Database), SPORTDiscus™, the National Research Register, Current Controlled Trials website (York), and http://www.ClinicalTrials.gov were conducted using predefined search terms to identify relevant studies published up to 1 March 2013. Screening of references, searches of grey literature, and hand searches of relevant journals were also employed.

Study selection

Included studies were randomized controlled trials using injury incidence as an outcome measure to evaluate the efficacy of an exercise intervention on uninjured male footballers aged 16 years and over. Articles not written in English were excluded.

Data extraction

Two researchers independently searched data sources, screened studies for eligibility, evaluated risk of bias, and extracted data using predefined criteria.

Study appraisal and synthesis methods

Risk of bias of included trials was assessed using the Cochrane Collaboration's tool for assessing risk of bias. There was insufficient trial comparability (outcome measures, interventions, injury type) for meta-analysis, and a qualitative analysis was performed.

Results

Eight trials (n = 3,355) from five countries met the inclusion criteria. All trials were assessed as having a high risk of bias. Two trials reported statistically significant reductions in hamstring injuries with eccentric exercise, and two reported statistically significant reductions in recurrent ankle sprains with proprioceptive exercise. Four trials showed no statistically significant difference in injury incidence with exercise interventions targeting a range of injuries.

Limitations

Notable limitations of included trials included poor reporting and limited blinding. A high risk of bias and insufficient comparability across trials prevented quantitative data synthesis.

Conclusions

Limitations in the context of study quality and heterogeneity resulted in an inability to reach a clear conclusion regarding efficacy of exercise for injury prevention in adult male football. Future low risk of bias, properly powered, and comprehensively reported trials are warranted to evaluate the efficacy of exercise on injury prevention. The use of eccentric hamstring exercise for hamstring injury prevention and proprioceptive training for recurrent ankle sprain prevention might be a good focus for future trials, as existing trials with a high risk of bias suggest an effect.",2015-01-20 +28088356,Advancing the prediction accuracy of protein-protein interactions by utilizing evolutionary information from position-specific scoring matrix and ensemble classifier.,"Protein-Protein Interactions (PPIs) are essential to most biological processes and play a critical role in most cellular functions. With the development of high-throughput biological techniques and in silico methods, a large number of PPI data have been generated for various organisms, but many problems remain unsolved. These factors promoted the development of the in silico methods based on machine learning to predict PPIs. In this study, we propose a novel method by combining ensemble Rotation Forest (RF) classifier and Discrete Cosine Transform (DCT) algorithm to predict the interactions among proteins. Specifically, the protein amino acids sequence is transformed into Position-Specific Scoring Matrix (PSSM) containing biological evolution information, and then the feature vector is extracted to present protein evolutionary information using DCT algorithm; finally, the ensemble rotation forest model is used to predict whether a given protein pair is interacting or not. When performed on Yeast and H. pylori data sets, the proposed method achieved excellent results with an average accuracy of 98.54% and 88.27%. In addition, we achieved good prediction accuracy of 98.08%, 92.75%, 98.87% and 98.72% on independent data sets (C.elegans, E.coli, H.sapiens and M.musculus). In order to further evaluate the performance of our method, we compare it with the state-of-the-art Support Vector Machine (SVM) classifier and get good results. As a web server, the source code and Yeast data sets used in this article are freely available at http://202.119.201.126:8888/DCTRF/.",2017-01-11 +28426817,Evaluation of machine learning algorithms and structural features for optimal MRI-based diagnostic prediction in psychosis.,"A relatively large number of studies have investigated the power of structural magnetic resonance imaging (sMRI) data to discriminate patients with schizophrenia from healthy controls. However, very few of them have also included patients with bipolar disorder, allowing the clinically relevant discrimination between both psychotic diagnostics. To assess the efficacy of sMRI data for diagnostic prediction in psychosis we objectively evaluated the discriminative power of a wide range of commonly used machine learning algorithms (ridge, lasso, elastic net and L0 norm regularized logistic regressions, a support vector classifier, regularized discriminant analysis, random forests and a Gaussian process classifier) on main sMRI features including grey and white matter voxel-based morphometry (VBM), vertex-based cortical thickness and volume, region of interest volumetric measures and wavelet-based morphometry (WBM) maps. All possible combinations of algorithms and data features were considered in pairwise classifications of matched samples of healthy controls (N = 127), patients with schizophrenia (N = 128) and patients with bipolar disorder (N = 128). Results show that the selection of feature type is important, with grey matter VBM (without data reduction) delivering the best diagnostic prediction rates (averaging over classifiers: schizophrenia vs. healthy 75%, bipolar disorder vs. healthy 63% and schizophrenia vs. bipolar disorder 62%) whereas algorithms usually yielded very similar results. Indeed, those grey matter VBM accuracy rates were not even improved by combining all feature types in a single prediction model. Further multi-class classifications considering the three groups simultaneously made evident a lack of predictive power for the bipolar group, probably due to its intermediate anatomical features, located between those observed in healthy controls and those found in patients with schizophrenia. Finally, we provide MRIPredict (https://www.nitrc.org/projects/mripredict/), a free tool for SPM, FSL and R, to easily carry out voxelwise predictions based on VBM images.",2017-04-20 +28333216,"BuddySuite: Command-Line Toolkits for Manipulating Sequences, Alignments, and Phylogenetic Trees.","The ability to manipulate sequence, alignment, and phylogenetic tree files has become an increasingly important skill in the life sciences, whether to generate summary information or to prepare data for further downstream analysis. The command line can be an extremely powerful environment for interacting with these resources, but only if the user has the appropriate general-purpose tools on hand. BuddySuite is a collection of four independent yet interrelated command-line toolkits that facilitate each step in the workflow of sequence discovery, curation, alignment, and phylogenetic reconstruction. Most common sequence, alignment, and tree file formats are automatically detected and parsed, and over 100 tools have been implemented for manipulating these data. The project has been engineered to easily accommodate the addition of new tools, is written in the popular programming language Python, and is hosted on the Python Package Index and GitHub to maximize accessibility. Documentation for each BuddySuite tool, including usage examples, is available at http://tiny.cc/buddysuite_wiki. All software is open source and freely available through http://research.nhgri.nih.gov/software/BuddySuite.",2017-06-01 +23496983,"Medicine for global health: can ""simple interventions"" improve the worldwide burden of disease?","Improvements to medical practice and delivery of treatment has been the focus of many international collaborations aiming to address the delivery of appropriate health care in low- and middle-income countries. However, this is compounded by various social, cultural as well as resource allocation issues. This Editorial marks the launch of an article collection on Medicine for Global Health (http://www.biomedcentral.com/bmcmed/series/medicine_for_global_health), and here, guest editor Gretchen Birbeck discusses the challenges, importance and increasing relevance of global health.",2013-03-14 +27153716,Multiple structure single parameter: analysis of a single protein nano environment descriptor characterizing a shared loci on structurally aligned proteins.,"

Motivation

A graphical representation of physicochemical and structural descriptors attributed to amino acid residues occupying the same topological position in different, structurally aligned proteins can provide a more intuitive way to associate possible functional implications to identified variations in structural characteristics. This could be achieved by observing selected characteristics of amino acids and of their corresponding nano environments, described by the numerical value of matching descriptor. For this purpose, a web-based tool called multiple structure single parameter (MSSP) was developed and here presented.

Results

MSSP produces a two-dimensional plot of a single protein descriptor for a number of structurally aligned protein chains. From a total of 150 protein descriptors available in MSSP, selected of >1500 parameters stored in the STING database, it is possible to create easily readable and highly informative XY-plots, where X-axis contains the amino acid position in the multiple structural alignment, and Y-axis contains the descriptor's numerical values for each aligned structure. To illustrate one of possible MSSP contributions to the investigation of changes in physicochemical and structural properties of mutants, comparing them with the cognate wild-type structure, the oncogenic mutation of M918T in RET kinase is presented. The comparative analysis of wild-type and mutant structures shows great changes in their electrostatic potential. These variations are easily depicted at the MSSP-generated XY-plot.

Availability and implementation

The web server is freely available at http://www.cbi.cnptia.embrapa.br/SMS/STINGm/MPA/index.html Web server implemented in Perl, Java and JavaScript and JMol or Protein Viewer as structure visualizers.

Contact

goran.neshich@embrapa.br or gneshich@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-15 +25274736,ViRBase: a resource for virus-host ncRNA-associated interactions.,"Increasing evidence reveals that diverse non-coding RNAs (ncRNAs) play critically important roles in viral infection. Viruses can use diverse ncRNAs to manipulate both cellular and viral gene expression to establish a host environment conducive to the completion of the viral life cycle. Many host cellular ncRNAs can also directly or indirectly influence viral replication and even target virus genomes. ViRBase (http://www.rna-society.org/virbase) aims to provide the scientific community with a resource for efficient browsing and visualization of virus-host ncRNA-associated interactions and interaction networks in viral infection. The current version of ViRBase documents more than 12,000 viral and cellular ncRNA-associated virus-virus, virus-host, host-virus and host-host interactions involving more than 460 non-redundant ncRNAs and 4400 protein-coding genes from between more than 60 viruses and 20 hosts. Users can query, browse and manipulate these virus-host ncRNA-associated interactions. ViRBase will be of help in uncovering the generic organizing principles of cellular virus-host ncRNA-associated interaction networks in viral infection.",2014-10-01 +,|SE|S|AM|E| Barcode: NGS‐oriented software for amplicon characterization – application to species and environmental barcoding,"Progress in NGS technologies has opened up new opportunities for characterizing biodiversity, both for individual specimen identification and for environmental barcoding. Although the amount of data available to biologist is increasing, user‐friendly tools to facilitate data analysis have yet to be developed. Our aim, with |SE|S|AM|E| Barcode, is to provide such support through a unified platform. The sequences are analysed through a pipeline that (i) processes NGS amplicon runs, filtering markers and samples, (ii) builds reference libraries and finally (iii) identifies (barcodes) the sequences in each amplicon from the reference library. We use a simulated data set for specimen identification and a recently published data set for environmental barcoding to validate the method. The results obtained are consistent with the expected characterizations (in silico and previously published, respectively). |SE|S|AM|E| Barcode and its documentation are freely available under the Creative Commons Attribution‐NonCommercial‐ShareAlike 3.0 Unported Licence for Windows and Linux from http://www1.montpellier.inra.fr/CBGP/NGS/.",2012-11-01 +25150248,SUBAcon: a consensus algorithm for unifying the subcellular localization data of the Arabidopsis proteome.,"

Motivation

Knowing the subcellular location of proteins is critical for understanding their function and developing accurate networks representing eukaryotic biological processes. Many computational tools have been developed to predict proteome-wide subcellular location, and abundant experimental data from green fluorescent protein (GFP) tagging or mass spectrometry (MS) are available in the model plant, Arabidopsis. None of these approaches is error-free, and thus, results are often contradictory.

Results

To help unify these multiple data sources, we have developed the SUBcellular Arabidopsis consensus (SUBAcon) algorithm, a naive Bayes classifier that integrates 22 computational prediction algorithms, experimental GFP and MS localizations, protein-protein interaction and co-expression data to derive a consensus call and probability. SUBAcon classifies protein location in Arabidopsis more accurately than single predictors.

Availability

SUBAcon is a useful tool for recovering proteome-wide subcellular locations of Arabidopsis proteins and is displayed in the SUBA3 database (http://suba.plantenergy.uwa.edu.au). The source code and input data is available through the SUBA3 server (http://suba.plantenergy.uwa.edu.au//SUBAcon.html) and the Arabidopsis SUbproteome REference (ASURE) training set can be accessed using the ASURE web portal (http://suba.plantenergy.uwa.edu.au/ASURE).",2014-08-22 +25610339,A Dataset of Deep-Sea Fishes Surveyed by Research Vessels in the Waters around Taiwan.,"The study of deep-sea fish fauna is hampered by a lack of data due to the difficulty and high cost incurred in its surveys and collections. Taiwan is situated along the edge of the Eurasia fig, at the junction of three Large Marine Ecosystems or Ecoregions of the East China Sea, South China Sea and the Philippines. As nearly two-thirds of its surrounding marine ecosystems are deep-sea environments, Taiwan is expected to hold a rich diversity of deep-sea fish. However, in the past, no research vessels were employed to collect fish data on site. Only specimens, caught by bottom trawl fishing in the waters hundreds of meters deep and missing precise locality information, were collected from Dasi and Donggang fishing harbors. Began in 2001, with the support of National Science Council, research vessels were made available to take on the task of systematically collecting deep-sea fish specimens and occurrence records in the waters surrounding Taiwan. By the end of 2006, a total of 3,653 specimens, belonging to 26 orders, 88 families, 198 genera and 366 species, were collected in addition to data such as sampling site geographical coordinates and water depth, and fish body length and weight. The information, all accessible from the ""Database of Taiwan's Deep-Sea Fauna and Its Distribution (http://deepsea.biodiv.tw/)"" as part of the ""Fish Database of Taiwan,"" can benefit the study of temporal and spatial changes in distribution and abundance of fish fauna in the context of global deep-sea biodiversity.",2014-12-19 +26483767,Clinical utilization of genomics data produced by the international Pseudomonas aeruginosa consortium.,"The International Pseudomonas aeruginosa Consortium is sequencing over 1000 genomes and building an analysis pipeline for the study of Pseudomonas genome evolution, antibiotic resistance and virulence genes. Metadata, including genomic and phenotypic data for each isolate of the collection, are available through the International Pseudomonas Consortium Database (http://ipcd.ibis.ulaval.ca/). Here, we present our strategy and the results that emerged from the analysis of the first 389 genomes. With as yet unmatched resolution, our results confirm that P. aeruginosa strains can be divided into three major groups that are further divided into subgroups, some not previously reported in the literature. We also provide the first snapshot of P. aeruginosa strain diversity with respect to antibiotic resistance. Our approach will allow us to draw potential links between environmental strains and those implicated in human and animal infections, understand how patients become infected and how the infection evolves over time as well as identify prognostic markers for better evidence-based decisions on patient care.",2015-09-29 +26586801,HOCOMOCO: expansion and enhancement of the collection of transcription factor binding sites models.,"Models of transcription factor (TF) binding sites provide a basis for a wide spectrum of studies in regulatory genomics, from reconstruction of regulatory networks to functional annotation of transcripts and sequence variants. While TFs may recognize different sequence patterns in different conditions, it is pragmatic to have a single generic model for each particular TF as a baseline for practical applications. Here we present the expanded and enhanced version of HOCOMOCO (http://hocomoco.autosome.ru and http://www.cbrc.kaust.edu.sa/hocomoco10), the collection of models of DNA patterns, recognized by transcription factors. HOCOMOCO now provides position weight matrix (PWM) models for binding sites of 601 human TFs and, in addition, PWMs for 396 mouse TFs. Furthermore, we introduce the largest up to date collection of dinucleotide PWM models for 86 (52) human (mouse) TFs. The update is based on the analysis of massive ChIP-Seq and HT-SELEX datasets, with the validation of the resulting models on in vivo data. To facilitate a practical application, all HOCOMOCO models are linked to gene and protein databases (Entrez Gene, HGNC, UniProt) and accompanied by precomputed score thresholds. Finally, we provide command-line tools for PWM and diPWM threshold estimation and motif finding in nucleotide sequences.",2015-11-19 +29248621,Asthma Is a Risk Factor for Respiratory Exacerbations Without Increased Rate of Lung Function Decline: Five-Year Follow-up in Adult Smokers From the COPDGene Study.,"

Background

Previous investigations in adult smokers from the COPDGene Study have shown that early-life respiratory disease is associated with reduced lung function, COPD, and airway thickening. Using 5-year follow-up data, we assessed disease progression in subjects who had experienced early-life respiratory disease. We hypothesized that there are alternative pathways to reaching reduced FEV1 and that subjects who had childhood pneumonia, childhood asthma, or asthma-COPD overlap (ACO) would have less lung function decline than subjects without these conditions.

Methods

Subjects returning for 5-year follow-up were assessed. Childhood pneumonia was defined by self-reported pneumonia at < 16 years. Childhood asthma was defined as self-reported asthma diagnosed by a health professional at < 16 years. ACO was defined as subjects with COPD who self-reported asthma diagnosed by a health-professional at ≤ 40 years. Smokers with and those without these early-life respiratory diseases were compared on measures of disease progression.

Results

Follow-up data from 4,915 subjects were examined, including 407 subjects who had childhood pneumonia, 323 subjects who had childhood asthma, and 242 subjects with ACO. History of childhood asthma or ACO was associated with an increased exacerbation frequency (childhood asthma, P < .001; ACO, P = .006) and odds of severe exacerbations (childhood asthma, OR, 1.41; ACO, OR, 1.42). History of childhood pneumonia was associated with increased exacerbations in subjects with COPD (absolute difference [β], 0.17; P = .04). None of these early-life respiratory diseases were associated with an increased rate of lung function decline or progression on CT scans.

Conclusions

Subjects who had early-life asthma are at increased risk of developing COPD and of having more active disease with more frequent and severe respiratory exacerbations without an increased rate of lung function decline over a 5-year period.

Trial registry

ClinicalTrials.gov; No. NCT00608764; https://clinicaltrials.gov.",2017-12-15 +25814993,"Unity in diversity, a systems approach to regulating plant cell physiology by 2-oxoglutarate-dependent dioxygenases.","Could a disjoint group of enzymes synchronize their activities and execute a complex multi-step, measurable, and reproducible response? Here, I surmise that the alpha-ketoglutarate dependent superfamily of non-haem iron (II) dioxygenases could influence cell physiology as a cohesive unit, and that the broad spectra of substrates transformed is an absolute necessity to this portrayal. This eclectic group comprises members from all major taxa, and participates in pesticide breakdown, hypoxia signaling, and osmotic stress neutralization. The oxidative decarboxylation of 2-oxoglutarate to succinate is coupled with a concomitant substrate hydroxylation and, in most cases, is followed by an additional specialized conversion. The domain profile of a protein sequence was used as an index of miscellaneous reaction chemistry and interpreted alongside existent kinetic data in a linear model of integrated function. Statistical parameters were inferred by the creation of a novel, empirically motivated flat-file database of over 3800 sequences (DB2OG) with putative 2-oxoglutarate dependent activity. The collated information was categorized on the basis of existing annotation schema. The data suggests that 2OG-dependent enzymes incorporate several desirable features of a systems level player. DB2OG, is free, accessible without a login to all users, and available at the following URL (http://comp-biol.theacms.in/DB2OG.html).",2015-03-11 +28168878,Network Marker Selection for Untargeted LC-MS Metabolomics Data.,"Untargeted metabolomics using high-resolution liquid chromatography-mass spectrometry (LC-MS) is becoming one of the major areas of high-throughput biology. Functional analysis, that is, analyzing the data based on metabolic pathways or the genome-scale metabolic network, is critical in feature selection and interpretation of metabolomics data. One of the main challenges in the functional analyses is the lack of the feature identity in the LC-MS data itself. By matching mass-to-charge ratio (m/z) values of the features to theoretical values derived from known metabolites, some features can be matched to one or more known metabolites. When multiple matchings occur, in most cases only one of the matchings can be true. At the same time, some known metabolites are missing in the measurements. Current network/pathway analysis methods ignore the uncertainty in metabolite identification and the missing observations, which could lead to errors in the selection of significant subnetworks/pathways. In this paper, we propose a flexible network feature selection framework that combines metabolomics data with the genome-scale metabolic network. The method adopts a sequential feature screening procedure and machine learning-based criteria to select important subnetworks and identify the optimal feature matching simultaneously. Simulation studies show that the proposed method has a much higher sensitivity than the commonly used maximal matching approach. For demonstration, we apply the method on a cohort of healthy subjects to detect subnetworks associated with the body mass index (BMI). The method identifies several subnetworks that are supported by the current literature, as well as detects some subnetworks with plausible new functional implications. The R code is available at http://web1.sph.emory.edu/users/tyu8/MSS.",2017-02-17 +29458357,Genomic screening for monogenic forms of diabetes.,"Adult-onset, or type II diabetes mellitus (T2DM) has a complex genetic architecture, from hundreds of genes with low penetrance, common susceptibility variants (e.g., TCF7L2), to a set of more than ten genes that, when mutated, can cause a single-gene or Mendelian form of T2DM (e.g., GCK). It is a clinical challenge to identify patients with the uncommon (2-3%) form of T2DM, typically classified as maturity-onset diabetes of the young (MODY). Bansal et al. (BMC Med 15:213, 2017) used a gene panel test approach to test patients with diabetes for single-gene causes of MODY. They found that nearly 2% of younger patients had pathogenic variants in one of seven genes. These data confirm prior studies showing that Mendelian or single-gene MODY can masquerade as garden variety T2DM. The implications of these results for wider general medicine and the future implementation of clinical genome sequencing are discussed.Please see related article: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-017-0977-3.",2018-02-20 +29515452,AdipoCount: A New Software for Automatic Adipocyte Counting.,"Obesity has spread worldwide and become a common health problem in modern society. One typical feature of obesity is the excessive accumulation of fat in adipocytes, which occurs through the following two physiological phenomena: hyperplasia (increase in quantity) and hypertrophy (increase in size) of adipocytes. In clinical and scientific research, the accurate quantification of the number and diameter of adipocytes is necessary for assessing obesity. In this study, we present a new automatic adipocyte counting system, AdipoCount, which is based on image processing algorithms. Comparing with other existing adipocyte counting tools, AdipoCount is more accurate and supports further manual correction. AdipoCount counts adipose cells by the following three-step process: (1) It detects the image edges, which are used to segment the membrane of adipose cells; (2) It uses a watershed-based algorithm to re-segment the missing dyed membrane; and (3) It applies a domain connectivity analysis to count the cells. The outputs of this system are the labels and the statistical data of all adipose cells in the image. The AdipoCount software is freely available for academic use at: http://www.csbio.sjtu.edu.cn/bioinf/AdipoCount/.",2018-02-20 +25274737,RiceVarMap: a comprehensive database of rice genomic variations.,"Rice Variation Map (RiceVarMap, http:/ricevarmap.ncpgr.cn) is a database of rice genomic variations. The database provides comprehensive information of 6,551,358 single nucleotide polymorphisms (SNPs) and 1,214,627 insertions/deletions (INDELs) identified from sequencing data of 1479 rice accessions. The SNP genotypes of all accessions were imputed and evaluated, resulting in an overall missing data rate of 0.42% and an estimated accuracy greater than 99%. The SNP/INDEL genotypes of all accessions are available for online query and download. Users can search SNPs/INDELs by identifiers of the SNPs/INDELs, genomic regions, gene identifiers and keywords of gene annotation. Allele frequencies within various subpopulations and the effects of the variation that may alter the protein sequence of a gene are also listed for each SNP/INDEL. The database also provides geographical details and phenotype images for various rice accessions. In particular, the database provides tools to construct haplotype networks and design PCR-primers by taking into account surrounding known genomic variations. These data and tools are highly useful for exploring genetic variations and evolution studies of rice and other species.",2014-10-01 +26527728,SATPdb: a database of structurally annotated therapeutic peptides.,"SATPdb (http://crdd.osdd.net/raghava/satpdb/) is a database of structurally annotated therapeutic peptides, curated from 22 public domain peptide databases/datasets including 9 of our own. The current version holds 19192 unique experimentally validated therapeutic peptide sequences having length between 2 and 50 amino acids. It covers peptides having natural, non-natural and modified residues. These peptides were systematically grouped into 10 categories based on their major function or therapeutic property like 1099 anticancer, 10585 antimicrobial, 1642 drug delivery and 1698 antihypertensive peptides. We assigned or annotated structure of these therapeutic peptides using structural databases (Protein Data Bank) and state-of-the-art structure prediction methods like I-TASSER, HHsearch and PEPstrMOD. In addition, SATPdb facilitates users in performing various tasks that include: (i) structure and sequence similarity search, (ii) peptide browsing based on their function and properties, (iii) identification of moonlighting peptides and (iv) searching of peptides having desired structure and therapeutic activities. We hope this database will be useful for researchers working in the field of peptide-based therapeutics.",2015-11-02 +28407117,NNAlign: a platform to construct and evaluate artificial neural network models of receptor-ligand interactions.,"Peptides are extensively used to characterize functional or (linear) structural aspects of receptor-ligand interactions in biological systems, e.g. SH2, SH3, PDZ peptide-recognition domains, the MHC membrane receptors and enzymes such as kinases and phosphatases. NNAlign is a method for the identification of such linear motifs in biological sequences. The algorithm aligns the amino acid or nucleotide sequences provided as training set, and generates a model of the sequence motif detected in the data. The webserver allows setting up cross-validation experiments to estimate the performance of the model, as well as evaluations on independent data. Many features of the training sequences can be encoded as input, and the network architecture is highly customizable. The results returned by the server include a graphical representation of the motif identified by the method, performance values and a downloadable model that can be applied to scan protein sequences for occurrence of the motif. While its performance for the characterization of peptide-MHC interactions is widely documented, we extended NNAlign to be applicable to other receptor-ligand systems as well. Version 2.0 supports alignments with insertions and deletions, encoding of receptor pseudo-sequences, and custom alphabets for the training sequences. The server is available at http://www.cbs.dtu.dk/services/NNAlign-2.0.",2017-07-01 +29267876,A coarse-grained model for DNA origami.,"Modeling tools provide a valuable support for DNA origami design. However, current solutions have limited application for conformational analysis of the designs. In this work we present a tool for a thorough study of DNA origami structure and dynamics. The tool is based on a novel coarse-grained model dedicated to geometry optimization and conformational analysis of DNA origami. We explored the ability of the model to predict dynamic behavior, global shapes, and fine details of two single-layer systems designed in hexagonal and square lattices using atomic force microscopy, Förster resonance energy transfer spectroscopy, and all-atom molecular dynamic simulations for validation of the results. We also examined the performance of the model for multilayer systems by simulation of DNA origami with published cryo-electron microscopy and atomic force microscopy structures. A good agreement between the simulated and experimental data makes the model suitable for conformational analysis of DNA origami objects. The tool is available at http://vsb.fbb.msu.ru/cosm as a web-service and as a standalone version.",2018-02-01 +29698921,"Design, synthesis and evaluation of novel sulfonamides as potential anticancer agents.","Based on modern literature data about biological activity of E7010 derivatives, a series of new sulfonamides as potential anticancer drugs were rationally designed by QSAR modeling methods Сlassification learning QSAR models to predict the tubulin polymerization inhibition activity of novel sulfonamides as potential anticancer agents were created using the Online Chemical Modeling Environment (OCHEM) and are freely available online on OCHEM server at https://ochem.eu/article/107790. A series of sulfonamides with predicted activity were synthesized and tested against 60 human cancer cell lines with growth inhibition percent values. The highest antiproliferative activity against leukemia (cell lines K-562 and MOLT-4), non-small cell lung cancer (cell line NCI-H522), colon cancer (cell lines NT29 and SW-620), melanoma (cell lines MALME-3M and UACC-257), ovarian cancer (cell lines IGROV1 and OVCAR-3), renal cancer (cell lines ACHN and UO-31), breast cancer (cell line T-47D) was found for compounds 4-9. According to the docking results the compounds 4-9 induce cytotoxicity by the disruption of the microtubule dynamics by inhibiting tubulin polymerization via effective binding into colchicine domain, similar the E7010.",2018-04-10 +27703842,RBioplot: an easy-to-use R pipeline for automated statistical analysis and data visualization in molecular biology and biochemistry.,"

Background

Statistical analysis and data visualization are two crucial aspects in molecular biology and biology. For analyses that compare one dependent variable between standard (e.g., control) and one or multiple independent variables, a comprehensive yet highly streamlined solution is valuable. The computer programming language R is a popular platform for researchers to develop tools that are tailored specifically for their research needs. Here we present an R package RBioplot that takes raw input data for automated statistical analysis and plotting, highly compatible with various molecular biology and biochemistry lab techniques, such as, but not limited to, western blotting, PCR, and enzyme activity assays.

Method

The package is built based on workflows operating on a simple raw data layout, with minimum user input or data manipulation required. The package is distributed through GitHub, which can be easily installed through one single-line R command. A detailed installation guide is available at http://kenstoreylab.com/?page_id=2448. Users can also download demo datasets from the same website.

Results and discussion

By integrating selected functions from existing statistical and data visualization packages with extensive customization, RBioplot features both statistical analysis and data visualization functionalities. Key properties of RBioplot include: -Fully automated and comprehensive statistical analysis, including normality test, equal variance test, Student's t-test and ANOVA (with post-hoc tests);-Fully automated histogram, heatmap and joint-point curve plotting modules;-Detailed output files for statistical analysis, data manipulation and high quality graphs;-Axis range finding and user customizable tick settings;-High user-customizability.",2016-09-28 +28453676,fastMitoCalc: an ultra-fast program to estimate mitochondrial DNA copy number from whole-genome sequences.,

Availability and implementation

fastMitoCalc is available at https://lgsun.irp.nia.nih.gov/hsgu/software/mitoAnalyzer/index.html.

Contact

jun.ding@nih.gov.

Supplementary information

Supplementary data are available at Bioinformatics online.,2017-05-01 +27245222,ODMedit: uniform semantic annotation for data integration in medicine based on a public metadata repository.,"

Background

The volume and complexity of patient data - especially in personalised medicine - is steadily increasing, both regarding clinical data and genomic profiles: Typically more than 1,000 items (e.g., laboratory values, vital signs, diagnostic tests etc.) are collected per patient in clinical trials. In oncology hundreds of mutations can potentially be detected for each patient by genomic profiling. Therefore data integration from multiple sources constitutes a key challenge for medical research and healthcare.

Methods

Semantic annotation of data elements can facilitate to identify matching data elements in different sources and thereby supports data integration. Millions of different annotations are required due to the semantic richness of patient data. These annotations should be uniform, i.e., two matching data elements shall contain the same annotations. However, large terminologies like SNOMED CT or UMLS don't provide uniform coding. It is proposed to develop semantic annotations of medical data elements based on a large-scale public metadata repository. To achieve uniform codes, semantic annotations shall be re-used if a matching data element is available in the metadata repository.

Results

A web-based tool called ODMedit ( https://odmeditor.uni-muenster.de/ ) was developed to create data models with uniform semantic annotations. It contains ~800,000 terms with semantic annotations which were derived from ~5,800 models from the portal of medical data models (MDM). The tool was successfully applied to manually annotate 22 forms with 292 data items from CDISC and to update 1,495 data models of the MDM portal.

Conclusion

Uniform manual semantic annotation of data models is feasible in principle, but requires a large-scale collaborative effort due to the semantic richness of patient data. A web-based tool for these annotations is available, which is linked to a public metadata repository.",2016-06-01 +21593080,ParkDB: a Parkinson's disease gene expression database.,"Parkinson's disease (PD) is a common, adult-onset, neuro-degenerative disorder characterized by the degeneration of cardinal motor signs mainly due to the loss of dopaminergic neurons in the substantia nigra. To date, researchers still have limited understanding of the key molecular events that provoke neurodegeneration in this disease. Here, we present ParkDB, the first queryable database dedicated to gene expression in PD. ParkDB contains a complete set of re-analyzed, curated and annotated microarray datasets. This resource enables scientists to identify and compare expression signatures involved in PD and dopaminergic neuron differentiation under different biological conditions and across species. Database URL: http://www2.cancer.ucl.ac.uk/Parkinson_Db2/",2011-05-18 +28970546,TM-Aligner: Multiple sequence alignment tool for transmembrane proteins with reduced time and improved accuracy.,"Membrane proteins plays significant role in living cells. Transmembrane proteins are estimated to constitute approximately 30% of proteins at genomic scale. It has been a difficult task to develop specific alignment tools for transmembrane proteins due to limited number of experimentally validated protein structures. Alignment tools based on homology modeling provide fairly good result by recapitulating 70-80% residues in reference alignment provided all input sequences should have known template structures. However, homology modeling tools took substantial amount of time, thus aligning large numbers of sequences becomes computationally demanding. Here we present TM-Aligner, a new tool for transmembrane protein sequence alignment. TM-Aligner is based on Wu-Manber and dynamic string matching algorithm which has significantly improved its accuracy and speed of multiple sequence alignment. We compared TM-Aligner with prevailing other popular tools and performed benchmarking using three separate reference sets, BaliBASE3.0 reference set7 of alpha-helical transmembrane proteins, structure based alignment of transmembrane proteins from Pfam database and structure alignment from GPCRDB. Benchmarking against reference datasets indicated that TM-Aligner is more advanced method having least turnaround time with significant improvements over the most accurate methods such as PROMALS, MAFFT, TM-Coffee, Kalign, ClustalW, Muscle and PRALINE. TM-Aligner is freely available through http://lms.snu.edu.in/TM-Aligner/ .",2017-10-02 +28485719,A hybrid organic-inorganic perovskite dataset.,"Hybrid organic-inorganic perovskites (HOIPs) have been attracting a great deal of attention due to their versatility of electronic properties and fabrication methods. We prepare a dataset of 1,346 HOIPs, which features 16 organic cations, 3 group-IV cations and 4 halide anions. Using a combination of an atomic structure search method and density functional theory calculations, the optimized structures, the bandgap, the dielectric constant, and the relative energies of the HOIPs are uniformly prepared and validated by comparing with relevant experimental and/or theoretical data. We make the dataset available at Dryad Digital Repository, NoMaD Repository, and Khazana Repository (http://khazana.uconn.edu/), hoping that it could be useful for future data-mining efforts that can explore possible structure-property relationships and phenomenological models. Progressive extension of the dataset is expected as new organic cations become appropriate within the HOIP framework, and as additional properties are calculated for the new compounds found.",2017-05-09 +29468193,"SCCmecFinder, a Web-Based Tool for Typing of Staphylococcal Cassette Chromosome mec in Staphylococcus aureus Using Whole-Genome Sequence Data. ","Typing of methicillin-resistant Staphylococcus aureus (MRSA) is important in infection control and surveillance. The current nomenclature of MRSA includes the genetic background of the S. aureus strain determined by multilocus sequence typing (MLST) or equivalent methods like spa typing and typing of the mobile genetic element staphylococcal cassette chromosome mec (SCCmec), which carries the mecA or mecC gene. Whereas MLST and spa typing are relatively simple, typing of SCCmec is less trivial because of its heterogeneity. Whole-genome sequencing (WGS) provides the essential data for typing of the genetic background and SCCmec, but so far, no bioinformatic tools for SCCmec typing have been available. Here, we report the development and evaluation of SCCmecFinder for characterization of the SCCmec element from S. aureus WGS data. SCCmecFinder is able to identify all SCCmec element types, designated I to XIII, with subtyping of SCCmec types IV (2B) and V (5C2). SCCmec elements are characterized by two different gene prediction approaches to achieve correct annotation, a Basic Local Alignment Search Tool (BLAST)-based approach and a k-mer-based approach. Evaluation of SCCmecFinder by using a diverse collection of clinical isolates (n = 93) showed a high typeability level of 96.7%, which increased to 98.9% upon modification of the default settings. In conclusion, SCCmecFinder can be an alternative to more laborious SCCmec typing methods and is freely available at https://cge.cbs.dtu.dk/services/SCCmecFinder. IMPORTANCE SCCmec in MRSA is acknowledged to be of importance not only because it contains the mecA or mecC gene but also for staphylococcal adaptation to different environments, e.g., in hospitals, the community, and livestock. Typing of SCCmec by PCR techniques has, because of its heterogeneity, been challenging, and whole-genome sequencing has only partially solved this since no good bioinformatic tools have been available. In this article, we describe the development of a new bioinformatic tool, SCCmecFinder, that includes most of the needs for infection control professionals and researchers regarding the interpretation of SCCmec elements. The software detects all of the SCCmec elements accepted by the International Working Group on the Classification of Staphylococcal Cassette Chromosome Elements, and users will be prompted if diverging and potential new elements are uploaded. Furthermore, SCCmecFinder will be curated and updated as new elements are found and it is easy to use and freely accessible.",2018-01-01 +24551056,BiNA: a visual analytics tool for biological network data.,"Interactive visual analysis of biological high-throughput data in the context of the underlying networks is an essential task in modern biomedicine with applications ranging from metabolic engineering to personalized medicine. The complexity and heterogeneity of data sets require flexible software architectures for data analysis. Concise and easily readable graphical representation of data and interactive navigation of large data sets are essential in this context. We present BiNA--the Biological Network Analyzer--a flexible open-source software for analyzing and visualizing biological networks. Highly configurable visualization styles for regulatory and metabolic network data offer sophisticated drawings and intuitive navigation and exploration techniques using hierarchical graph concepts. The generic projection and analysis framework provides powerful functionalities for visual analyses of high-throughput omics data in the context of networks, in particular for the differential analysis and the analysis of time series data. A direct interface to an underlying data warehouse provides fast access to a wide range of semantically integrated biological network databases. A plugin system allows simple customization and integration of new analysis algorithms or visual representations. BiNA is available under the 3-clause BSD license at http://bina.unipax.info/.",2014-02-13 +25378319,Cyclebase 3.0: a multi-organism database on cell-cycle regulation and phenotypes.,"The eukaryotic cell division cycle is a highly regulated process that consists of a complex series of events and involves thousands of proteins. Researchers have studied the regulation of the cell cycle in several organisms, employing a wide range of high-throughput technologies, such as microarray-based mRNA expression profiling and quantitative proteomics. Due to its complexity, the cell cycle can also fail or otherwise change in many different ways if important genes are knocked out, which has been studied in several microscopy-based knockdown screens. The data from these many large-scale efforts are not easily accessed, analyzed and combined due to their inherent heterogeneity. To address this, we have created Cyclebase--available at http://www.cyclebase.org--an online database that allows users to easily visualize and download results from genome-wide cell-cycle-related experiments. In Cyclebase version 3.0, we have updated the content of the database to reflect changes to genome annotation, added new mRNA and protein expression data, and integrated cell-cycle phenotype information from high-content screens and model-organism databases. The new version of Cyclebase also features a new web interface, designed around an overview figure that summarizes all the cell-cycle-related data for a gene.",2014-11-05 +24618344,Drug2Gene: an exhaustive resource to explore effectively the drug-target relation network.,"

Background

Information about drug-target relations is at the heart of drug discovery. There are now dozens of databases providing drug-target interaction data with varying scope, and focus. Therefore, and due to the large chemical space, the overlap of the different data sets is surprisingly small. As searching through these sources manually is cumbersome, time-consuming and error-prone, integrating all the data is highly desirable. Despite a few attempts, integration has been hampered by the diversity of descriptions of compounds, and by the fact that the reported activity values, coming from different data sets, are not always directly comparable due to usage of different metrics or data formats.

Description

We have built Drug2Gene, a knowledge base, which combines the compound/drug-gene/protein information from 19 publicly available databases. A key feature is our rigorous unification and standardization process which makes the data truly comparable on a large scale, allowing for the first time effective data mining in such a large knowledge corpus. As of version 3.2, Drug2Gene contains 4,372,290 unified relations between compounds and their targets most of which include reported bioactivity data. We extend this set with putative (i.e. homology-inferred) relations where sufficient sequence homology between proteins suggests they may bind to similar compounds. Drug2Gene provides powerful search functionalities, very flexible export procedures, and a user-friendly web interface.

Conclusions

Drug2Gene v3.2 has become a mature and comprehensive knowledge base providing unified, standardized drug-target related information gathered from publicly available data sources. It can be used to integrate proprietary data sets with publicly available data sets. Its main goal is to be a 'one-stop shop' to identify tool compounds targeting a given gene product or for finding all known targets of a drug. Drug2Gene with its integrated data set of public compound-target relations is freely accessible without restrictions at http://www.drug2gene.com.",2014-03-11 +27924442,Spanish norms for completion of restricted length word stems.,"This study presents a normative database of Spanish restricted length word stems that provides useful information for the selection of stimuli in memory experiments with Word Stem Completion (WSC) tasks. The database includes indices relative to stems (total baseline completion, priming baseline completion, priming, number of completions, ratio between given and deleted letters, and syllabic structure), and indices relative to characteristics of the words used to obtain the stems (frequency, familiarity, number of meanings, length, number of syllables, arousal, and valence). A WSC task was performed by 515 participants to calculate priming and baseline indices. An Exploratory Factor Analysis showed that these indices are grouped in four factors: perceptual, lexical, emotional, and response competition. Stepwise regression analyses performed with these factors showed that the lexical, response competition, and perceptual factors predict priming baseline completion, while only the lexical factor predicts priming. The model that best explains the relationship between priming and priming baseline completion was a cubic model, and the optimum baseline values for achieving priming were between .31 and .36. These norms can be downloaded as Supplemental Materials for this article from https://nuvol.uv.es/owncloud/index.php/s/hpj9by1qbENdjfj .",2017-10-01 +27800578,SCEGRAM: An image database for semantic and syntactic inconsistencies in scenes.,"Our visual environment is not random, but follows compositional rules according to what objects are usually found where. Despite the growing interest in how such semantic and syntactic rules - a scene grammar - enable effective attentional guidance and object perception, no common image database containing highly-controlled object-scene modifications has been publically available. Such a database is essential in minimizing the risk that low-level features drive high-level effects of interest, which is being discussed as possible source of controversial study results. To generate the first database of this kind - SCEGRAM - we took photographs of 62 real-world indoor scenes in six consistency conditions that contain semantic and syntactic (both mild and extreme) violations as well as their combinations. Importantly, always two scenes were paired, so that an object was semantically consistent in one scene (e.g., ketchup in kitchen) and inconsistent in the other (e.g., ketchup in bathroom). Low-level salience did not differ between object-scene conditions and was generally moderate. Additionally, SCEGRAM contains consistency ratings for every object-scene condition, as well as object-absent scenes and object-only images. Finally, a cross-validation using eye-movements replicated previous results of longer dwell times for both semantic and syntactic inconsistencies compared to consistent controls. In sum, the SCEGRAM image database is the first to contain well-controlled semantic and syntactic object-scene inconsistencies that can be used in a broad range of cognitive paradigms (e.g., verbal and pictorial priming, change detection, object identification, etc.) including paradigms addressing developmental aspects of scene grammar. SCEGRAM can be retrieved for research purposes from http://www.scenegrammarlab.com/research/scegram-database/ .",2017-10-01 +27928746,Affective norms for 720 French words rated by children and adolescents (FANchild).,"FANchild (French Affective Norms for Children) provides norms of valence and arousal for a large corpus of French words (N = 720) rated by 908 French children and adolescents (ages 7, 9, 11, and 13). The ratings were made using the Self-Assessment Manikin (Lang, 1980). Because it combines evaluations of arousal and valence and includes ratings provided by 7-, 9-, 11-, and 13-year-olds, this database complements and extends existing French-language databases. Good response reliability was observed in each of the four age groups. Despite a significant level of consensus, we found age differences in both the valence and arousal ratings: Seven- and 9-year-old children gave higher mean valence and arousal ratings than did the other age groups. Moreover, the tendency to judge words positively (i.e., positive bias) decreased with age. This age- and sex-related database will enable French-speaking researchers to study how the emotional character of words influences their cognitive processing, and how this influence evolves with age. FANchild is available at https://www.researchgate.net/profile/Catherine_Monnier/contributions .",2017-10-01 +28902678,Brief Report: High Need to Switch cART or Comedication With the Initiation of DAAs in Elderly HIV/HCV-Coinfected Patients.,"

Background

To describe the use of nonantiretroviral comedication and combination antiretroviral therapy (cART) in patients coinfected with HIV/hepatitis C virus (HCV) and to predict the potential for drug-drug interactions (DDIs) with direct-acting antivirals (DAAs) against HCV.

Methods

This is a retrospective, cross-sectional study, using the Dutch, nationwide ATHENA observational HIV cohort database. All patients with a known HIV/HCV coinfection on January 1, 2015, were included. Comedication and cART registered in the database were listed. The potential for DDIs between DAAs and comedication/cART were predicted using http://hep-druginteractions.org. DDIs were categorized as: (1) no clinically relevant DDI; (2) possible DDI; (3) contraindication; or (4) no information available.

Results

We included 777 patients of whom 488 (63%) used nonantiretroviral comedication. At risk for a category 2/3 DDI with nonantiretroviral comedications were 299 patients (38%). Most DDIs were predicted with paritaprevir/ritonavir, ombitasvir ± dasabuvir (47% of the drugs) and least with grazoprevir/elbasvir (11% of the drugs). Concerning cART, daclatasvir/sofosbuvir is the most favorable combination as no cART is contraindicated with this combination. In genotype 1/4 patients, grazoprevir/elbasvir is least favorable as 75% of the patients must alter their cART.

Conclusions

This study showed that comedication use in the aging HIV/HCV population is frequent and diverse. There is a high potential for DDIs between DAAs and comedication/cART.",2017-10-01 +25894297,The purchasable chemical space: a detailed picture.,"The screening of a reduced yet diverse and synthesizable region of the chemical space is a critical step in drug discovery. The ZINC database is nowadays routinely used to freely access and screen millions of commercially available compounds. We collected ∼125 million compounds from chemical catalogs and the ZINC database, yielding more than 68 million unique molecules, including a large portion of described natural products (NPs) and drugs. The data set was filtered using advanced medicinal chemistry rules to remove potentially toxic, promiscuous, metabolically labile, or reactive compounds. We studied the physicochemical properties of this compilation and identified millions of NP-like, fragment-like, inhibitors of protein-protein interactions (i-PPIs) like, and drug-like compounds. The related focused libraries were subjected to a detailed scaffold diversity analysis and compared to reference NPs and marketed drugs. This study revealed thousands of diverse chemotypes with distinct representations of building block combinations among the data sets. An analysis of the stereogenic and shape complexity properties of the libraries also showed that they present well-defined levels of complexity, following the tendency: i-PPIs-like < drug-like < fragment-like < NP-like. As the collected compounds have huge interest in drug discovery and particularly virtual screening and library design, we offer a freely available collection comprising over 37 million molecules under: http://pbox.pharmaceutical-bioinformatics.org , as well as the filtering rules used to build the focused libraries described herein.",2015-04-30 +28633280,MotifHyades: expectation maximization for de novo DNA motif pair discovery on paired sequences.,"

Motivation

In higher eukaryotes, protein-DNA binding interactions are the central activities in gene regulation. In particular, DNA motifs such as transcription factor binding sites are the key components in gene transcription. Harnessing the recently available chromatin interaction data, computational methods are desired for identifying the coupling DNA motif pairs enriched on long-range chromatin-interacting sequence pairs (e.g. promoter-enhancer pairs) systematically.

Results

To fill the void, a novel probabilistic model (namely, MotifHyades) is proposed and developed for de novo DNA motif pair discovery on paired sequences. In particular, two expectation maximization algorithms are derived for efficient model training with linear computational complexity. Under diverse scenarios, MotifHyades is demonstrated faster and more accurate than the existing ad hoc computational pipeline. In addition, MotifHyades is applied to discover thousands of DNA motif pairs with higher gold standard motif matching ratio, higher DNase accessibility and higher evolutionary conservation than the previous ones in the human K562 cell line. Lastly, it has been run on five other human cell lines (i.e. GM12878, HeLa-S3, HUVEC, IMR90, and NHEK), revealing another thousands of novel DNA motif pairs which are characterized across a broad spectrum of genomic features on long-range promoter-enhancer pairs.

Availability and implementation

The matrix-algebra-optimized versions of MotifHyades and the discovered DNA motif pairs can be found in http://bioinfo.cs.cityu.edu.hk/MotifHyades.

Contact

kc.w@cityu.edu.hk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +28187412,Omics Fusion - A Platform for Integrative Analysis of Omics Data.,"We present Omics Fusion, a new web-based platform for integrative analysis of omics data. Omics Fusion provides a collection of new and established tools and visualization methods to support researchers in exploring omics data, validating results or understanding how to adjust experiments in order to make new discoveries. It is easily extendible and new visualization methods are added continuously. It is available for free under: https://fusion.cebitec.uni-bielefeld.de/.",2016-12-18 +28299173,Systematic assessment of multi-gene predictors of pan-cancer cell line sensitivity to drugs exploiting gene expression data. ,"Background: Selected gene mutations are routinely used to guide the selection of cancer drugs for a given patient tumour. Large pharmacogenomic data sets, such as those by Genomics of Drug Sensitivity in Cancer (GDSC) consortium, were introduced to discover more of these single-gene markers of drug sensitivity. Very recently, machine learning regression has been used to investigate how well cancer cell line sensitivity to drugs is predicted depending on the type of molecular profile. The latter has revealed that gene expression data is the most predictive profile in the pan-cancer setting. However, no study to date has exploited GDSC data to systematically compare the performance of machine learning models based on multi-gene expression data against that of widely-used single-gene markers based on genomics data. Methods: Here we present this systematic comparison using Random Forest (RF) classifiers exploiting the expression levels of 13,321 genes and an average of 501 tested cell lines per drug. To account for time-dependent batch effects in IC 50 measurements, we employ independent test sets generated with more recent GDSC data than that used to train the predictors and show that this is a more realistic validation than standard k-fold cross-validation. Results and Discussion: Across 127 GDSC drugs, our results show that the single-gene markers unveiled by the MANOVA analysis tend to achieve higher precision than these RF-based multi-gene models, at the cost of generally having a poor recall (i.e. correctly detecting only a small part of the cell lines sensitive to the drug). Regarding overall classification performance, about two thirds of the drugs are better predicted by the multi-gene RF classifiers. Among the drugs with the most predictive of these models, we found pyrimethamine, sunitinib and 17-AAG. Conclusions: Thanks to this unbiased validation, we now know that this type of models can predict in vitro tumour response to some of these drugs. These models can thus be further investigated on in vivo tumour models. R code to facilitate the construction of alternative machine learning models and their validation in the presented benchmark is available at http://ballester.marseille.inserm.fr/gdsc.transcriptomicDatav2.tar.gz.",2016-12-28 +26545201,Interventions for the treatment of keratocystic odontogenic tumours.,"

Background

The keratocystic odontogenic tumours (KCOTs) account for between about 2% and 11% of all jaw cysts and can occur at any age. They are more common in males than females with a male:female ratio of approximately 2:1. Although they are benign, KCOTs are locally very aggressive and have a tendency to recur after treatment. Reported recurrence rates range from 3% to 60%. The traditional method for the treatment of most KCOTs is surgical enucleation. However, due to the lining of the cyst being delicate and the fact that they frequently recur, this method alone is not sufficient. Adjunctive surgical treatment has been proposed in addition to the surgical enucleation, such as removal of the peripheral bone (ostectomy) or resection of the cyst with surrounding bone (en-bloc) resection. Other adjunctive treatments proposed are: cryotherapy (freezing) with liquid nitrogen and the use of the fixative Carnoy's solution placed in the cyst cavity after enucleation; both of which attempt to address residual tissue to prevent recurrence.

Objectives

To assess the available evidence comparing the effectiveness of interventions for the treatment of KCOTs.

Search methods

We searched the following electronic databases: the Cochrane Oral Health Group Trials Register (to 17 March 2015), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library, 2015, Issue 2), MEDLINE via Ovid (1946 to 17 March 2015) and EMBASE via Ovid (1980 to 17 March 2015). We searched the US National Institutes of Health Trials Register (http://clinicaltrials.gov) and the WHO Clinical Trials Registry Platform for ongoing trials. No restrictions were placed on the language or date of publication when searching the electronic databases.

Selection criteria

Randomised controlled trials comparing one modality of intervention with another with or without adjunctive treatment for the treatment of KCOTs. Adults, over the age of 18 with a validated diagnosis of solitary KCOTs arising in the jaw bones of the maxilla or mandible. Patients with known Gorlin syndrome were to be excluded.

Data collection and analysis

Review authors screened trials for inclusion. Full papers were obtained for relevant and potentially relevant trials. If data had been extracted, it would have been synthesised using the fixed-effect model, if substantial clinical diversity were identified between studies we planned to use the random-effects model with studies grouped by action provided there were four or more studies included in the meta-analysis, and we would have explored the heterogeneity between the included studies.

Main results

No randomised controlled trials that met the inclusion criteria were identified.

Authors' conclusions

There are no published randomised controlled trials relevant to this review question, therefore no conclusions could be reached about the effectiveness or otherwise of the interventions considered in this review. There is a need for well designed and conducted randomised controlled trials to evaluate treatments for KCOTs.",2015-11-05 +29568436,A structural examination and collision cross section database for over 500 metabolites and xenobiotics using drift tube ion mobility spectrometry.,"The confident identification of metabolites and xenobiotics in biological and environmental studies is an analytical challenge due to their immense dynamic range, vast chemical space and structural diversity. Ion mobility spectrometry (IMS) is widely used for small molecule analyses since it can separate isomeric species and be easily coupled with front end separations and mass spectrometry for multidimensional characterizations. However, to date IMS metabolomic and exposomic studies have been limited by an inadequate number of accurate collision cross section (CCS) values for small molecules, causing features to be detected but not confidently identified. In this work, we utilized drift tube IMS (DTIMS) to directly measure CCS values for over 500 small molecules including primary metabolites, secondary metabolites and xenobiotics. Since DTIMS measurements do not need calibrant ions or calibration like some other IMS techniques, they avoid calibration errors which can cause problems in distinguishing structurally similar molecules. All measurements were performed in triplicate in both positive and negative polarities with nitrogen gas and seven different electric fields, so that relative standard deviations (RSD) could be assessed for each molecule and structural differences studied. The primary metabolites analyzed to date have come from key metabolism pathways such as glycolysis, the pentose phosphate pathway and the tricarboxylic acid cycle, while the secondary metabolites consisted of classes such as terpenes and flavonoids, and the xenobiotics represented a range of molecules from antibiotics to polycyclic aromatic hydrocarbons. Different CCS trends were observed for several of the diverse small molecule classes and when urine features were matched to the database, the addition of the IMS dimension greatly reduced the possible number of candidate molecules. This CCS database and structural information are freely available for download at http://panomics.pnnl.gov/metabolites/ with new molecules being added frequently.",2017-09-28 +21898268,Seed bioinformatics.,"Analysis of gene expression data sets is a potent tool for gene function prediction, cis-element discovery, and hypothesis generation for the model plant Arabidopsis thaliana, and more recently for other agriculturally relevant species. In the case of Arabidopsis thaliana, experiments conducted by individual researchers to document its transcriptome have led to large numbers of data sets being made publicly available for data mining by the so-called ""electronic northerns,"" co-expression analysis and other methods. Given that approximately 50% of the genes in Arabidopsis have no function ascribed to them by ""conventional"" homology searches, and that only around 10% of the genes have had their function experimentally determined in the laboratory, these analyses can accelerate the identification of potential gene function at the click of a mouse. This chapter covers the use of bioinformatic data mining tools available at the Bio-Array Resource ( http://www.bar.utoronto.ca ) and elsewhere for hypothesis generation in the context of seed biology.",2011-01-01 +26158728,Heterogeneous Network Edge Prediction: A Data Integration Approach to Prioritize Disease-Associated Genes.,"The first decade of Genome Wide Association Studies (GWAS) has uncovered a wealth of disease-associated variants. Two important derivations will be the translation of this information into a multiscale understanding of pathogenic variants and leveraging existing data to increase the power of existing and future studies through prioritization. We explore edge prediction on heterogeneous networks--graphs with multiple node and edge types--for accomplishing both tasks. First we constructed a network with 18 node types--genes, diseases, tissues, pathophysiologies, and 14 MSigDB (molecular signatures database) collections--and 19 edge types from high-throughput publicly-available resources. From this network composed of 40,343 nodes and 1,608,168 edges, we extracted features that describe the topology between specific genes and diseases. Next, we trained a model from GWAS associations and predicted the probability of association between each protein-coding gene and each of 29 well-studied complex diseases. The model, which achieved 132-fold enrichment in precision at 10% recall, outperformed any individual domain, highlighting the benefit of integrative approaches. We identified pleiotropy, transcriptional signatures of perturbations, pathways, and protein interactions as influential mechanisms explaining pathogenesis. Our method successfully predicted the results (with AUROC = 0.79) from a withheld multiple sclerosis (MS) GWAS despite starting with only 13 previously associated genes. Finally, we combined our network predictions with statistical evidence of association to propose four novel MS genes, three of which (JAK2, REL, RUNX3) validated on the masked GWAS. Furthermore, our predictions provide biological support highlighting REL as the causal gene within its gene-rich locus. Users can browse all predictions online (http://het.io). Heterogeneous network edge prediction effectively prioritized genetic associations and provides a powerful new approach for data integration across multiple domains.",2015-07-09 +29557978,"A suite of global, cross-scale topographic variables for environmental and biodiversity modeling.","Topographic variation underpins a myriad of patterns and processes in hydrology, climatology, geography and ecology and is key to understanding the variation of life on the planet. A fully standardized and global multivariate product of different terrain features has the potential to support many large-scale research applications, however to date, such datasets are unavailable. Here we used the digital elevation model products of global 250 m GMTED2010 and near-global 90 m SRTM4.1dev to derive a suite of topographic variables: elevation, slope, aspect, eastness, northness, roughness, terrain roughness index, topographic position index, vector ruggedness measure, profile/tangential curvature, first/second order partial derivative, and 10 geomorphological landform classes. We aggregated each variable to 1, 5, 10, 50 and 100 km spatial grains using several aggregation approaches. While a cross-correlation underlines the high similarity of many variables, a more detailed view in four mountain regions reveals local differences, as well as scale variations in the aggregated variables at different spatial grains. All newly-developed variables are available for download at Data Citation 1 and for download and visualization at http://www.earthenv.org/topography.",2018-03-20 +28958771,[Ablative therapy in urology: Good practice and perspective].,"

Introduction

To expose the main point of discussion from present ablative therapies' guidelines and propose global perspectives.

Materials and methods

A review of the scientific literature was performed in Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com/) using different associations of keywords ""ablative therapy"" ; ""prostate cancer""; ""kidney cancer""; ""guidelines""; ""hybrid operating room"". Publications obtained were selected based on methodology, language and relevance.

Results

Present guidelines on ablative therapies in urology are, considering authors and organs, either particularly prudent (EAU guidelines for prostate and kidney) or relatively optimistic (CIRSE guidelines). This discrepancy is related to a low level of proof. So, a new approach is mandatory: more homogeneous in methodology, and especially more open to a new organization sparing economic efficiency. The objective will be to get multifunctional and multidisciplinaries platforms, in facts and in minds. It will induce, in the future, a deep reflection about training and boundaries' specialties.

Conclusion

Ablative therapies represent a crucial stake for urology and a clear example of medicosurgical evolution in future, based on new technologies (energy, robotic, imaging). A serious and deep reflection is necessary to prepare it and be deeply involved in.",2017-09-27 +25361972,MobiDB 2.0: an improved database of intrinsically disordered and mobile proteins.,"MobiDB (http://mobidb.bio.unipd.it/) is a database of intrinsically disordered and mobile proteins. Intrinsically disordered regions are key for the function of numerous proteins. Here we provide a new version of MobiDB, a centralized source aimed at providing the most complete picture on different flavors of disorder in protein structures covering all UniProt sequences (currently over 80 million). The database features three levels of annotation: manually curated, indirect and predicted. Manually curated data is extracted from the DisProt database. Indirect data is inferred from PDB structures that are considered an indication of intrinsic disorder. The 10 predictors currently included (three ESpritz flavors, two IUPred flavors, two DisEMBL flavors, GlobPlot, VSL2b and JRONN) enable MobiDB to provide disorder annotations for every protein in absence of more reliable data. The new version also features a consensus annotation and classification for long disordered regions. In order to complement the disorder annotations, MobiDB features additional annotations from external sources. Annotations from the UniProt database include post-translational modifications and linear motifs. Pfam annotations are displayed in graphical form and are link-enabled, allowing the user to visit the corresponding Pfam page for further information. Experimental protein-protein interactions from STRING are also classified for disorder content.",2014-10-31 +29309683,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on Hearing Preservation Outcomes in Patients With Sporadic Vestibular Schwannomas.,"Question 1:What is the overall probability of maintaining serviceable hearing following stereotactic radiosurgery utilizing modern dose planning, at 2, 5, and 10 yr following treatment? Recommendation:Level 3: Individuals who meet these criteria and are considering stereotactic radiosurgery should be counseled that there is moderately high probability (>50%-75%) of hearing preservation at 2 yr, moderately high probability (>50%-75%) of hearing preservation at 5 yr, and moderately low probability (>25%-50%) of hearing preservation at 10 yr. Question 2:Among patients with AAO-HNS (American Academy of Otolaryngology-Head and Neck Surgery hearing classification) class A or GR (Gardner-Robertson hearing classification) grade I hearing at baseline, what is the overall probability of maintaining serviceable hearing following stereotactic radiosurgery, utilizing modern dose planning, at 2, 5, and 10 yr following treatment? Recommendation:Level 3: Individuals who meet these criteria and are considering stereotactic radiosurgery should be counseled that there is a high probability (>75%-100%) of hearing preservation at 2 yr, moderately high probability (>50%-75%) of hearing preservation at 5 yr, and moderately low probability (>25%-50%) of hearing preservation at 10 yr. Question 3:What patient- and tumor-related factors influence progression to nonserviceable hearing following stereotactic radiosurgery using ≤13 Gy to the tumor margin? Recommendation:Level 3: Individuals who meet these criteria and are considering stereotactic radiosurgery should be counseled regarding the probability of successful hearing preservation based on the following prognostic data: the most consistent prognostic features associated with maintenance of serviceable hearing are good preoperative word recognition and/or pure tone thresholds with variable cut-points reported, smaller tumor size, marginal tumor dose ≤12 Gy, and cochlear dose ≤4 Gy. Age and sex are not strong predictors of hearing preservation outcome. Question 4:What is the overall probability of maintaining serviceable hearing following microsurgical resection of small to medium-sized sporadic vestibular schwannomas early after surgery, at 2, 5, and 10 yr following treatment? Recommendation:Level 3: Individuals who meet these criteria and are considering microsurgical resection should be counseled that there is a moderately low probability (>25%-50%) of hearing preservation immediately following surgery, moderately low probability (>25%-50%) of hearing preservation at 2 yr, moderately low probability (>25%-50%) of hearing preservation at 5 yr, and moderately low probability (>25%-50%) of hearing preservation at 10 yr. Question 5:Among patients with AAO-HNS class A or GR grade I hearing at baseline, what is the overall probability of maintaining serviceable hearing following microsurgical resection of small to medium-sized sporadic vestibular schwannomas early after surgery, at 2, 5, and 10 yr following treatment? Recommendation:Level 3: Individuals who meet these criteria and are considering microsurgical resection should be counseled that there is a moderately high probability (>50%-75%) of hearing preservation immediately following surgery, moderately high probability (>50%-75%) of hearing preservation at 2 yr, moderately high probability (>50%-75%) of hearing preservation at 5 yr, and moderately low probability (>25%-50%) of hearing preservation at 10 yr. Question 6:What patient- and tumor-related factors influence progression to nonserviceable hearing following microsurgical resection of small to medium-sized sporadic vestibular schwannomas? Recommendation:Level 3: Individuals who meet these criteria and are considering microsurgical resection should be counseled regarding the probability of successful hearing preservation based on the following prognostic data: the most consistent prognostic features associated with maintenance of serviceable hearing are good preoperative word recognition and/or pure tone thresholds with variable cut-points reported, smaller tumor size commonly less than 1 cm, and presence of a distal internal auditory canal cerebrospinal fluid fundal cap. Age and sex are not strong predictors of hearing preservation outcome. Question 7:What is the overall probability of maintaining serviceable hearing with conservative observation of vestibular schwannomas at 2, 5, and 10 yr following diagnosis? Recommendation:Level 3: Individuals who meet these criteria and are considering observation should be counseled that there is a high probability (>75%-100%) of hearing preservation at 2 yr, moderately high probability (>50%-75%) of hearing preservation at 5 yr, and moderately low probability (>25%-50%) of hearing preservation at 10 yr. Question 8:Among patients with AAO-HNS class A or GR grade I hearing at baseline, what is the overall probability of maintaining serviceable hearing with conservative observation at 2 and 5 yr following diagnosis? Recommendation:Level 3: Individuals who meet these criteria and are considering stereotactic radiosurgery should be counseled that there is a high probability (>75%-100%) of hearing preservation at 2 yr, and moderately high probability (>50%-75%) of hearing preservation at 5 yr. Insufficient data were available to determine the probability of hearing preservation at 10 yr for this population subset. Question 9:What patient and tumor-related factors influence progression to nonserviceable hearing during conservative observation? Recommendation:Level 3: Individuals who meet these criteria and are considering observation should be counseled regarding probability of successful hearing preservation based on the following prognostic data: the most consistent prognostic features associated with maintenance of serviceable hearing are good preoperative word recognition and/or pure tone thresholds with variable cut-points reported, as well as nongrowth of the tumor. Tumor size at the time of diagnosis, age, and sex do not predict future development of nonserviceable hearing during observation.  The full guideline can be found at: https://www.cns.org/guidelines/guidelines-manage-ment-patients-vestibular-schwannoma/chapter_3.",2018-02-01 +,"Revision of the Cales noacki species complex (Hymenoptera, Chalcidoidea, Aphelinidae)","The genus Cales (Hymenoptera: Aphelinidae) includes 13 species worldwide, of which 10 form a highly morphologically uniform species complex with a native range in the Neotropical region. We recognize ten species previously attributed to a single Neotropical species, Cales noacki Howard, which in the strict sense is a species broadly disseminated to control woolly whitefly. A neotype is designated for C. noacki, and it is redescribed based on specimens molecularly determined to be conspecific with the neotype. Newly described species include: C. bicolor Mottern, n.sp., C. breviclava Mottern, n.sp., C. brevisensillum Mottern n.sp., C. curvigladius Mottern, n.sp., C. longiseta Mottern, n.sp., C. multisensillum Mottern n.sp., C. noyesi Mottern, n.sp., C. parvigladius Mottern, n.sp. and C. rosei Mottern, n.sp. Species are delimited based on a combination of morphological and molecular data (28S‐D2 rDNA and COI). Additional specimens are included in the phylogenetic analyses and although these likely represent several new species, we lack sufficient specimen sampling to describe them at this time. Cales are highly morphologically conserved and character‐poor, resulting in several cryptic species. A molecular phylogeny of the known Neotropical species based on 28S‐D2–5 rDNA and a 390‐bp segment of COI is included, and identification keys to males and females are provided. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:7FEB0479‐9B2E‐48E8‐8603‐4B7C2759D4EC.",2014-04-01 +29666960,Consciousness Indexing and Outcome Prediction with Resting-State EEG in Severe Disorders of Consciousness.,"We applied the following methods to resting-state EEG data from patients with disorders of consciousness (DOC) for consciousness indexing and outcome prediction: microstates, entropy (i.e. approximate, permutation), power in alpha and delta frequency bands, and connectivity (i.e. weighted symbolic mutual information, symbolic transfer entropy, complex network analysis). Patients with unresponsive wakefulness syndrome (UWS) and patients in a minimally conscious state (MCS) were classified into these two categories by fitting and testing a generalised linear model. We aimed subsequently to develop an automated system for outcome prediction in severe DOC by selecting an optimal subset of features using sequential floating forward selection (SFFS). The two outcome categories were defined as UWS or dead, and MCS or emerged from MCS. Percentage of time spent in microstate D in the alpha frequency band performed best at distinguishing MCS from UWS patients. The average clustering coefficient obtained from thresholding beta coherence performed best at predicting outcome. The optimal subset of features selected with SFFS consisted of the frequency of microstate A in the 2-20 Hz frequency band, path length obtained from thresholding alpha coherence, and average path length obtained from thresholding alpha coherence. Combining these features seemed to afford high prediction power. Python and MATLAB toolboxes for the above calculations are freely available under the GNU public license for non-commercial use ( https://qeeg.wordpress.com ).",2018-04-17 +30415847,Short communication: Meta-analysis of dairy cows fed conventional sorghum or corn silages compared with brown midrib sorghum silage.,"A meta-analysis was conducted to compare the effects of feeding dairy cows conventional sorghum silage (CSS) or conventional corn silage (CCS) compared with brown midrib sorghum silage (BMRSS) diets on dry matter intake (DMI), milk production, and milk composition. Data from 9 published articles (1984 to 2015) were used to contrast diets with CSS (7 means comparisons; 104 cows) or CCS (13 means comparisons; 204 cows) versus BMRSS diets. Statistical analysis was performed using fixed or random effects models with the Metafor package of R (https://www.R-project.org). The degree of heterogeneity was measured with the I2 statistic, and publication bias was determined with funnel plots and Egger's regression test. Other sources of heterogeneity of response were analyzed through meta-regression. Estimated effect size was calculated for DMI, milk production, and milk composition. No evidence of publication bias was observed for any variable tested. The highest degree of heterogeneity (I2 = 41.5 and 72.6%) was observed for DMI among dependent variables tested in both comparisons, indicating that intake responses to silage type are rather inconsistent; in contrast, milk production had the lowest degree of heterogeneity (I2 = 0%), supporting the idea that the responses of this variable to silage type were very consistent across studies. Compared with BMRSS diets, cows fed CSS diets exhibited decreased milk production (1.64 kg/d), milk fat concentration (0.09%), milk fat yield (0.08 kg/d), milk protein yield (0.04 kg/d), and milk lactose yield (0.16 kg/d) and tended to decrease DMI (0.83 kg/d). Compared with CCS diets, cows fed BMRSS diets increased milk fat concentration (0.10%), but decreased milk protein concentration (0.06%) and tended to increase lactose yield (0.08 kg/d). Meta-regression indicated that days in milk affected DMI and milk production when CSS diets were compared with BMRSS diets, and DMI when CCS diets were compared with BMRSS diets. Additionally, the inclusion rate of silage in the diet and dietary neutral detergent fiber affected yields of milk fat and lactose, respectively, when CCS and BMRSS diets were compared. Overall, lactation performance improved when cows were fed diets formulated with BMRSS compared with CSS, but performance was not different for cows fed BMRSS and CCS diets. However, the small sample size may have influenced these results by increasing the margin of the error and, concurrently, the power of the meta-analysis. Results of this analysis suggest that additional research is needed to explore the effects of days in milk and the inclusion rates of silages in the diets when comparing BMRSS with CSS or CCS.",2018-11-08 +30040874,Distinct disruptions in Land's cycle remodeling of glycerophosphocholines in murine cortex mark symptomatic onset and progression in two Alzheimer's disease mouse models.,"Changes in glycerophosphocholine metabolism are observed in Alzheimer's disease; however, it is not known whether these metabolic disruptions are linked to cognitive decline. Here, using unbiased lipidomic approaches and direct biochemical assessments, we profiled Land's cycle lipid remodeling in the hippocampus, frontal cortex, and temporal-parietal-entorhinal cortices of human amyloid beta precursor protein (ΑβPP) over-expressing mice. We identified a cortex-specific hypo-metabolic signature at symptomatic onset and a cortex-specific hyper-metabolic signature of Land's cycle glycerophosphocholine remodeling over the course of progressive behavioral decline. When N5 TgCRND8 and ΑβPPS we /PSIdE9 mice first exhibited deficits in the Morris Water Maze, levels of lyso-phosphatidylcholines, LPC(18:0/0:0), LPC(16:0/0:0), LPC(24:6/0:0), LPC(25:6/0:0), the lyso-platelet-activating factor (PAF), LPC(O-18:0/0:0), and the PAF, PC(O-22:6/2:0), declined as a result of reduced calcium-dependent cytosolic phospholipase A2 α (cPLA2 α) activity in all cortices but not hippocampus. Chronic intermittent hypoxia, an environmental risk factor that triggers earlier learning memory impairment in ΑβPPS we /PSIdE9 mice, elicited these same metabolic changes in younger animals. Thus, this lipidomic signature of phenoconversion appears age-independent. By contrast, in symptomatic N5 TgCRND8 mice, cPLA2 α activity progressively increased; overall Lyso-phosphatidylcholines (LPC) and LPC(O) and PC(O-18:1/2:0) levels progressively rose. Enhanced cPLA2 α activity was only detected in transgenic mice; however, age-dependent increases in the PAF acetylhydrolase 1b α1 to α2 expression ratio, evident in both transgenic and non-transgenic mice, reduced PAF hydrolysis thereby contributing to PAF accumulation. Taken together, these data identify distinct age-independent and age-dependent disruptions in Land's cycle metabolism linked to symptomatic onset and progressive behavioral decline in animals with pre-existing Αβ pathology. OPEN SCIENCE BADGES: This article has received a badge for *Open Materials* because it provided all relevant information to reproduce the study in the manuscript. The complete Open Science Disclosure form for this article can be found at the end of the article. More information about the Open Practices badges can be found at https://cos.io/our-services/open-science-badges/.",2018-11-08 +28726806,Data sharing as a national quality improvement program: reporting on BRCA1 and BRCA2 variant-interpretation comparisons through the Canadian Open Genetics Repository (COGR).,"PurposeThe purpose of this study was to develop a national program for Canadian diagnostic laboratories to compare DNA-variant interpretations and resolve discordant-variant classifications using the BRCA1 and BRCA2 genes as a case study.MethodsBRCA1 and BRCA2 variant data were uploaded and shared through the Canadian Open Genetics Repository (COGR; http://www.opengenetics.ca). A total of 5,554 variant observations were submitted; classification differences were identified and comparison reports were sent to participating laboratories. Each site had the opportunity to reclassify variants. The data were analyzed before and after the comparison report process to track concordant- or discordant-variant classifications by three different models.ResultsVariant-discordance rates varied by classification model: 38.9% of variants were discordant when using a five-tier model, 26.7% with a three-tier model, and 5.0% with a two-tier model. After the comparison report process, the proportion of discordant variants dropped to 30.7% with the five-tier model, to 14.2% with the three-tier model, and to 0.9% using the two-tier model.ConclusionWe present a Canadian interinstitutional quality improvement program for DNA-variant interpretations. Sharing of variant knowledge by clinical diagnostic laboratories will allow clinicians and patients to make more informed decisions and lead to better patient outcomes.",2017-07-20 +28231282,Mindboggling morphometry of human brains.,"Mindboggle (http://mindboggle.info) is an open source brain morphometry platform that takes in preprocessed T1-weighted MRI data and outputs volume, surface, and tabular data containing label, feature, and shape information for further analysis. In this article, we document the software and demonstrate its use in studies of shape variation in healthy and diseased humans. The number of different shape measures and the size of the populations make this the largest and most detailed shape analysis of human brains ever conducted. Brain image morphometry shows great potential for providing much-needed biological markers for diagnosing, tracking, and predicting progression of mental health disorders. Very few software algorithms provide more than measures of volume and cortical thickness, while more subtle shape measures may provide more sensitive and specific biomarkers. Mindboggle computes a variety of (primarily surface-based) shapes: area, volume, thickness, curvature, depth, Laplace-Beltrami spectra, Zernike moments, etc. We evaluate Mindboggle's algorithms using the largest set of manually labeled, publicly available brain images in the world and compare them against state-of-the-art algorithms where they exist. All data, code, and results of these evaluations are publicly available.",2017-02-23 +28472263,A deep learning framework for improving long-range residue-residue contact prediction using a hierarchical strategy.,"

Motivation

Residue-residue contacts are of great value for protein structure prediction, since contact information, especially from those long-range residue pairs, can significantly reduce the complexity of conformational sampling for protein structure prediction in practice. Despite progresses in the past decade on protein targets with abundant homologous sequences, accurate contact prediction for proteins with limited sequence information is still far from satisfaction. Methodologies for these hard targets still need further improvement.

Results

We presented a computational program DeepConPred, which includes a pipeline of two novel deep-learning-based methods (DeepCCon and DeepRCon) as well as a contact refinement step, to improve the prediction of long-range residue contacts from primary sequences. When compared with previous prediction approaches, our framework employed an effective scheme to identify optimal and important features for contact prediction, and was only trained with coevolutionary information derived from a limited number of homologous sequences to ensure robustness and usefulness for hard targets. Independent tests showed that 59.33%/49.97%, 64.39%/54.01% and 70.00%/59.81% of the top L/5, top L/10 and top 5 predictions were correct for CASP10/CASP11 proteins, respectively. In general, our algorithm ranked as one of the best methods for CASP targets.

Availability and implementation

All source data and codes are available at http://166.111.152.91/Downloads.html .

Contact

hgong@tsinghua.edu.cn or zengjy321@tsinghua.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +29141195,Determination of the effect of Pinellia ternata (Thunb.) Breit. on nervous system development by proteomics.,"

Ethnopharmacological relevance

Banxia (BX) is the dried tuber of Pinellia ternata (Thunb.) Breit., a commonly prescribed Chinese medicinal herb for the treatment of cough, phlegm, and vomiting in pregnant women. However, raw BX has been demonstrated to exert toxic effects on reproduction and the precise and comprehensive mechanisms remain elusive.

Aim of the study

We applied an iTRAQ (isobaric tags for relative and absolute quantitation, iTRAQ)-based proteomic method to explore the mechanisms of raw BX-induced fetal toxicity in mice.

Materials and methods

The mice were separated into two groups, control mice and BX-treated mice. From gestation days 6-8, the control group was treated with normal saline and the BX group was exposed to BX suspension (2.275g/kg/day). Gastrulae were obtained and analyzed using the quantitative proteomic approach of iTRAQ coupled to liquid chromatography-tandem mass spectrometry (LC-MS/MS). A multi-omics data analysis tool, OmicsBean (http://www.omicsbean.cn), was employed to conduct bioinformatic analysis of differentially abundant proteins (DAPs). Quantitative real-time PCR (qRT-PCR) and western blotting methods were applied to detect the protein expression levels and validate the quality of the proteomics.

Results

A total of 1245 proteins were identified with < 1% false discovery rate (FDR) and 583 protein abundance changes were confidently assessed. Moreover, 153 proteins identified in BX-treated samples showed significant differences in abundance. Bioinformatics analysis showed that the functions of 37 DAPs were predominantly related to nervous system development. The expression levels of the selected proteins for quantification by qRT-PCR or western blotting were consistent with the results in iTRAQ-labeled proteomics data.

Conclusion

The results suggested that oral administration of BX in mice may cause fetal abnormality of the nervous system. The findings may be helpful to elucidate the underlying mechanisms of BX-induced embryotoxicity.",2017-11-13 +23663819,Evaluation of a large-scale weight management program using the consolidated framework for implementation research (CFIR).,"

Background

In the United States, as in many other parts of the world, the prevalence of overweight/obesity is at epidemic proportions in the adult population and even higher among Veterans. To address the high prevalence of overweight/obesity among Veterans, the MOVE!(®) weight management program was disseminated nationally to Veteran Affairs (VA) medical centers. The objective of this paper is two-fold: to describe factors that explain the wide variation in implementation of MOVE!; and to illustrate, step-by-step, how to apply a theory-based framework using qualitative data.

Methods

Five VA facilities were selected to maximize variation in implementation effectiveness and geographic location. Twenty-four key stakeholders were interviewed about their experiences in implementing MOVE!. The Consolidated Framework for Implementation Research (CFIR) was used to guide collection and analysis of qualitative data. Constructs that most strongly influence implementation effectiveness were identified through a cross-case comparison of ratings.

Results

Of the 31 CFIR constructs assessed, ten constructs strongly distinguished between facilities with low versus high program implementation effectiveness. The majority (six) were related to the inner setting: networks and communications; tension for change; relative priority; goals and feedback; learning climate; and leadership engagement. One construct each, from intervention characteristics (relative advantage) and outer setting (patient needs and resources), plus two from process (executing and reflecting) also strongly distinguished between high and low implementation. Two additional constructs weakly distinguished, 16 were mixed, three constructs had insufficient data to assess, and one was not applicable. Detailed descriptions of how each distinguishing construct manifested in study facilities and a table of recommendations is provided.

Conclusions

This paper presents an approach for using the CFIR to code and rate qualitative data in a way that will facilitate comparisons across studies. An online Wiki resource (http://www.wiki.cfirwiki.net) is available, in addition to the information presented here, that contains much of the published information about the CFIR and its constructs and sub-constructs. We hope that the described approach and open access to the CFIR will generate wide use and encourage dialogue and continued refinement of both the framework and approaches for applying it.",2013-05-10 +29765161,Expanding the BLUP alphabet for genomic prediction adaptable to the genetic architectures of complex traits.,"Improvement of statistical methods is crucial for realizing the potential of increasingly dense genetic markers. Bayesian methods treat all markers as random effects, exhibit an advantage on dense markers, and offer the flexibility of using different priors. In contrast, genomic best linear unbiased prediction (gBLUP) is superior in computing speed, but only superior in prediction accuracy for extremely complex traits. Currently, the existing variety in the BLUP method is insufficient for adapting to new sequencing technologies and traits with different genetic architectures. In this study, we found two ways to change the kinship derivation in the BLUP method that improve prediction accuracy while maintaining the computational advantage. First, using the settlement under progressively exclusive relationship (SUPER) algorithm, we substituted all available markers with estimated quantitative trait nucleotides (QTNs) to derive kinship. Second, we compressed individuals into groups based on kinship, and then used the groups as random effects instead of individuals. The two methods were named as SUPER BLUP (sBLUP) and compressed BLUP (cBLUP). Analyses on both simulated and real data demonstrated that these two methods offer flexibility for evaluating a variety of traits, covering a broadened realm of genetic architectures. For traits controlled by small numbers of genes, sBLUP outperforms Bayesian LASSO (least absolute shrinkage and selection operator). For traits with low heritability, cBLUP outperforms both gBLUP and Bayesian LASSO methods. We implemented these new BLUP alphabet series methods in an R package, Genome Association and Prediction Integrated Tool (GAPIT), available at http://zzlab.net/GAPIT .",2018-05-16 +22194640,IMG: the Integrated Microbial Genomes database and comparative analysis system.,"The Integrated Microbial Genomes (IMG) system serves as a community resource for comparative analysis of publicly available genomes in a comprehensive integrated context. IMG integrates publicly available draft and complete genomes from all three domains of life with a large number of plasmids and viruses. IMG provides tools and viewers for analyzing and reviewing the annotations of genes and genomes in a comparative context. IMG's data content and analytical capabilities have been continuously extended through regular updates since its first release in March 2005. IMG is available at http://img.jgi.doe.gov. Companion IMG systems provide support for expert review of genome annotations (IMG/ER: http://img.jgi.doe.gov/er), teaching courses and training in microbial genome analysis (IMG/EDU: http://img.jgi.doe.gov/edu) and analysis of genomes related to the Human Microbiome Project (IMG/HMP: http://www.hmpdacc-resources.org/img_hmp).",2012-01-01 +29220467,Query expansion using MeSH terms for dataset retrieval: OHSU at the bioCADDIE 2016 dataset retrieval challenge. ,https://biocaddie.org/benchmark-data.,2017-01-01 +26305108,PASS Targets: Ligand-based multi-target computational system based on a public data and naïve Bayes approach.,"Estimation of interactions between drug-like compounds and drug targets is very important for drug discovery and toxicity assessment. Using data extracted from the 19th version of the ChEMBL database ( https://www.ebi.ac.uk/chembl ) as a training set and a Bayesian-like method realized in PASS software ( http://www.way2drug.com/PASSOnline ), we developed a computational tool for the prediction of interactions between protein targets and drug-like compounds. After training, PASS Targets became able to predict interactions of drug-like compounds with 2507 protein targets from different organisms based on analysis of structure-activity relationships for 589,107 different chemical compounds. The prediction accuracy, estimated as AUC ROC calculated by the leave-one-out cross-validation and 20-fold cross-validation procedures, was about 96%. Average AUC ROC value was about 90% for the external test set from approximately 700 known drugs interacting with 206 protein targets.",2015-08-25 +29220453,A publicly available benchmark for biomedical dataset retrieval: the reference standard for the 2016 bioCADDIE dataset retrieval challenge. ,https://biocaddie.org/benchmark-data.,2017-01-01 +26826210,An international survey on the current use of electroacupuncture.,"

Background

Despite many research publications, it is unclear how widely electroacupuncture (EA) and related modalities are used in everyday practice. It is also uncertain who uses them, for what conditions, and with what results. We aimed to survey practitioners about their use of and training in EA. We also sought to determine how much the open-access English-language database at http://www.electroacupunctureknowledge.com (EAK) is used, or might be used in the future, if updated.

Methods

A survey was developed using several rounds of consultation with a focus group and others. Professional acupuncture membership organisations were contacted to assess their willingness to notify their members. The survey was tested before its launch.

Results

Thirty-four professional organisations agreed to participate, together with two research bodies and six UK training institutes. Potentially, around 50 000 professionals practising acupuncture knew about the survey, to which there were 768 responses. Data were analysed for respondent demographics. Around 70% used EA, but <25% used related electrotherapy modalities. Men were more likely than women to use more than one modality. Only around 7% of respondents used non-traditional acupuncture modalities without prior training. However, awareness and usage of the EAK database was low, although around 80% of respondents stated they might use the database in the future, primarily to improve clinical practice.

Conclusions

To the best of our knowledge, this is the largest survey on EA and related modalities ever conducted. As such, its results are likely to be of interest to acupuncture and other practitioners (whether or not they use EA), patients, policymakers, and funding agencies.",2016-01-29 +27606777,Analyzing and interpreting genome data at the network level with ConsensusPathDB.,"ConsensusPathDB consists of a comprehensive collection of human (as well as mouse and yeast) molecular interaction data integrated from 32 different public repositories and a web interface featuring a set of computational methods and visualization tools to explore these data. This protocol describes the use of ConsensusPathDB (http://consensuspathdb.org) with respect to the functional and network-based characterization of biomolecules (genes, proteins and metabolites) that are submitted to the system either as a priority list or together with associated experimental data such as RNA-seq. The tool reports interaction network modules, biochemical pathways and functional information that are significantly enriched by the user's input, applying computational methods for statistical over-representation, enrichment and graph analysis. The results of this protocol can be observed within a few minutes, even with genome-wide data. The resulting network associations can be used to interpret high-throughput data mechanistically, to characterize and prioritize biomarkers, to integrate different omics levels, to design follow-up functional assay experiments and to generate topology for kinetic models at different scales.",2016-09-08 +27899611,The ExAC browser: displaying reference data information from over 60 000 exomes.,"Worldwide, hundreds of thousands of humans have had their genomes or exomes sequenced, and access to the resulting data sets can provide valuable information for variant interpretation and understanding gene function. Here, we present a lightweight, flexible browser framework to display large population datasets of genetic variation. We demonstrate its use for exome sequence data from 60 706 individuals in the Exome Aggregation Consortium (ExAC). The ExAC browser provides gene- and transcript-centric displays of variation, a critical view for clinical applications. Additionally, we provide a variant display, which includes population frequency and functional annotation data as well as short read support for the called variant. This browser is open-source, freely available at http://exac.broadinstitute.org, and has already been used extensively by clinical laboratories worldwide.",2016-11-28 +26157620,Multi-level machine learning prediction of protein-protein interactions in Saccharomyces cerevisiae.,"Accurate identification of protein-protein interactions (PPI) is the key step in understanding proteins' biological functions, which are typically context-dependent. Many existing PPI predictors rely on aggregated features from protein sequences, however only a few methods exploit local information about specific residue contacts. In this work we present a two-stage machine learning approach for prediction of protein-protein interactions. We start with the carefully filtered data on protein complexes available for Saccharomyces cerevisiae in the Protein Data Bank (PDB) database. First, we build linear descriptions of interacting and non-interacting sequence segment pairs based on their inter-residue distances. Secondly, we train machine learning classifiers to predict binary segment interactions for any two short sequence fragments. The final prediction of the protein-protein interaction is done using the 2D matrix representation of all-against-all possible interacting sequence segments of both analysed proteins. The level-I predictor achieves 0.88 AUC for micro-scale, i.e., residue-level prediction. The level-II predictor improves the results further by a more complex learning paradigm. We perform 30-fold macro-scale, i.e., protein-level cross-validation experiment. The level-II predictor using PSIPRED-predicted secondary structure reaches 0.70 precision, 0.68 recall, and 0.70 AUC, whereas other popular methods provide results below 0.6 threshold (recall, precision, AUC). Our results demonstrate that multi-scale sequence features aggregation procedure is able to improve the machine learning results by more than 10% as compared to other sequence representations. Prepared datasets and source code for our experimental pipeline are freely available for download from: http://zubekj.github.io/mlppi/ (open source Python implementation, OS independent).",2015-07-02 +27824337,3-dimensional electron microscopic imaging of the zebrafish olfactory bulb and dense reconstruction of neurons.,"Large-scale reconstructions of neuronal populations are critical for structural analyses of neuronal cell types and circuits. Dense reconstructions of neurons from image data require ultrastructural resolution throughout large volumes, which can be achieved by automated volumetric electron microscopy (EM) techniques. We used serial block face scanning EM (SBEM) and conductive sample embedding to acquire an image stack from an olfactory bulb (OB) of a zebrafish larva at a voxel resolution of 9.25×9.25×25 nm3. Skeletons of 1,022 neurons, 98% of all neurons in the OB, were reconstructed by manual tracing and efficient error correction procedures. An ergonomic software package, PyKNOSSOS, was created in Python for data browsing, neuron tracing, synapse annotation, and visualization. The reconstructions allow for detailed analyses of morphology, projections and subcellular features of different neuron types. The high density of reconstructions enables geometrical and topological analyses of the OB circuitry. Image data can be accessed and viewed through the neurodata web services (http://www.neurodata.io). Raw data and reconstructions can be visualized in PyKNOSSOS.",2016-11-08 +27875984,Plastid: nucleotide-resolution analysis of next-generation sequencing and genomics data.,"

Background

Next-generation sequencing (NGS) informs many biological questions with unprecedented depth and nucleotide resolution. These assays have created a need for analytical tools that enable users to manipulate data nucleotide-by-nucleotide robustly and easily. Furthermore, because many NGS assays encode information jointly within multiple properties of read alignments - for example, in ribosome profiling, the locations of ribosomes are jointly encoded in alignment coordinates and length - analytical tools are often required to extract the biological meaning from the alignments before analysis. Many assay-specific pipelines exist for this purpose, but there remains a need for user-friendly, generalized, nucleotide-resolution tools that are not limited to specific experimental regimes or analytical workflows.

Results

Plastid is a Python library designed specifically for nucleotide-resolution analysis of genomics and NGS data. As such, Plastid is designed to extract assay-specific information from read alignments while retaining generality and extensibility to novel NGS assays. Plastid represents NGS and other biological data as arrays of values associated with genomic or transcriptomic positions, and contains configurable tools to convert data from a variety of sources to such arrays. Plastid also includes numerous tools to manipulate even discontinuous genomic features, such as spliced transcripts, with nucleotide precision. Plastid automatically handles conversion between genomic and feature-centric coordinates, accounting for splicing and strand, freeing users of burdensome accounting. Finally, Plastid's data models use consistent and familiar biological idioms, enabling even beginners to develop sophisticated analytical workflows with minimal effort.

Conclusions

Plastid is a versatile toolkit that has been used to analyze data from multiple NGS assays, including RNA-seq, ribosome profiling, and DMS-seq. It forms the genomic engine of our ORF annotation tool, ORF-RATER, and is readily adapted to novel NGS assays. Examples, tutorials, and extensive documentation can be found at https://plastid.readthedocs.io .",2016-11-22 +27980643,Genome-wide QTL and eQTL analyses using Mendel.,"Pedigree genome-wide association studies (GWAS) (Option 29) in the current version of the Mendel software is an optimized subroutine for performing large-scale genome-wide quantitative trait locus (QTL) analysis. This analysis (a) works for random sample data, pedigree data, or a mix of both; (b) is highly efficient in both run time and memory requirement; (c) accommodates both univariate and multivariate traits; (d) works for autosomal and x-linked loci; (e) correctly deals with missing data in traits, covariates, and genotypes; (f) allows for covariate adjustment and constraints among parameters; (g) uses either theoretical or single nucleotide polymorphism (SNP)-based empirical kinship matrix for additive polygenic effects; (h) allows extra variance components such as dominant polygenic effects and household effects; (i) detects and reports outlier individuals and pedigrees; and (j) allows for robust estimation via the t-distribution. This paper assesses these capabilities on the genetics analysis workshop 19 (GAW19) sequencing data. We analyzed simulated and real phenotypes for both family and random sample data sets. For instance, when jointly testing the 8 longitudinally measured systolic blood pressure and diastolic blood pressure traits, it takes Mendel 78 min on a standard laptop computer to read, quality check, and analyze a data set with 849 individuals and 8.3 million SNPs. Genome-wide expression QTL analysis of 20,643 expression traits on 641 individuals with 8.3 million SNPs takes 30 h using 20 parallel runs on a cluster. Mendel is freely available at http://www.genetics.ucla.edu/software.",2016-10-18 +29508291,RNA-Seq-Based Transcript Structure Analysis with TrBorderExt.,"RNA-Seq has become a routine strategy for genome-wide gene expression comparisons in bacteria. Despite lower resolution in transcript border parsing compared with dRNA-Seq, TSS-EMOTE, Cappable-seq, Term-seq, and others, directional RNA-Seq still illustrates its advantages: low cost, quantification and transcript border analysis with a medium resolution (±10-20 nt). To facilitate mining of directional RNA-Seq datasets especially with respect to transcript structure analysis, we developed a tool, TrBorderExt, which can parse transcript start sites and termination sites accurately in bacteria. A detailed protocol is described in this chapter for how to use the software package step by step to identify bacterial transcript borders from raw RNA-Seq data. The package was developed with Perl and R programming languages, and is accessible freely through the website: http://www.szu-bioinf.org/TrBorderExt .",2018-01-01 +29028895,CCmiR: a computational approach for competitive and cooperative microRNA binding prediction.,"MOTIVATION:The identification of microRNA (miRNA) target sites is important. In the past decade, dozens of computational methods have been developed to predict miRNA target sites. Despite their existence, rarely does a method consider the well-known competition and cooperation among miRNAs when attempts to discover target sites. To fill this gap, we developed a new approach called CCmiR, which takes the cooperation and competition of multiple miRNAs into account in a statistical model to predict their target sites. RESULTS:Tested on four different datasets, CCmiR predicted miRNA target sites with a high recall and a reasonable precision, and identified known and new cooperative and competitive miRNAs supported by literature. Compared with three state-of-the-art computational methods, CCmiR had a higher recall and a higher precision. AVAILABILITY AND IMPLEMENTATION:CCmiR is freely available at http://hulab.ucf.edu/research/projects/miRNA/CCmiR. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2018-01-01 +26424083,MGDB: a comprehensive database of genes involved in melanoma. ,"The Melanoma Gene Database (MGDB) is a manually curated catalog of molecular genetic data relating to genes involved in melanoma. The main purpose of this database is to establish a network of melanoma related genes and to facilitate the mechanistic study of melanoma tumorigenesis. The entries describing the relationships between melanoma and genes in the current release were manually extracted from PubMed abstracts, which contains cumulative to date 527 human melanoma genes (422 protein-coding and 105 non-coding genes). Each melanoma gene was annotated in seven different aspects (General Information, Expression, Methylation, Mutation, Interaction, Pathway and Drug). In addition, manually curated literature references have also been provided to support the inclusion of the gene in MGDB and establish its association with melanoma. MGDB has a user-friendly web interface with multiple browse and search functions. We hoped MGDB will enrich our knowledge about melanoma genetics and serve as a useful complement to the existing public resources. Database URL: http://bioinfo.ahu.edu.cn:8080/Melanoma/index.jsp.",2015-09-30 +26298580,PPDB: A Tool for Investigation of Plants Physiology Based on Gene Ontology.,"Representing the way forward, from functional genomics and its ontology to functional understanding and physiological model, in a computationally tractable fashion is one of the ongoing challenges faced by computational biology. To tackle the standpoint, we herein feature the applications of contemporary database management to the development of PPDB, a searching and browsing tool for the Plants Physiology Database that is based upon the mining of a large amount of gene ontology data currently available. The working principles and search options associated with the PPDB are publicly available and freely accessible online ( http://www.iitr.ac.in/ajayshiv/ ) through a user-friendly environment generated by means of Drupal-6.24. By knowing that genes are expressed in temporally and spatially characteristic patterns and that their functionally distinct products often reside in specific cellular compartments and may be part of one or more multicomponent complexes, this sort of work is intended to be relevant for investigating the functional relationships of gene products at a system level and, thus, helps us approach to the full physiology.",2015-08-23 +27924294,Data for a pre-performance test of self-developed electronic tongue sensors.,"This article presents data, which can be applied for a pre-performance test of self-developed electronic tongue sensors. Contained data is related to the research article ""Impact of Sodium Lauryl Sulfate in oral liquids on E-Tongue Measurements"" (http://dx.doi.org/10.1016/j.ijpharm.2016.10.045; (L.I. Immohr, R. Turner, M. Pein-Hackelbusch, 2016) [1]). Sensor responses were obtained from 10 subsequent measurements and four different concentrations of quinine hydrochloride by electronic tongue (TS-5000Z, Insent Inc., Atsugi-Shi, Japan) measurements. Based on the data for the pre-performance testing, which were calculated based on the fluctuation range of the sensor responses around the median, stability criteria and required preconditions cycles were defined.",2016-11-19 +26635144,De novo and comparative transcriptome analysis of cultivated and wild spinach.,"Spinach (Spinacia oleracea L.) is an economically important green leafy vegetable crop. In this study, we performed deep transcriptome sequencing for nine spinach accessions: three from cultivated S. oleracea, three from wild S. turkestanica and three from wild S. tetrandra. A total of approximately 100 million high-quality reads were generated, which were de novo assembled into 72,151 unigenes with a total length of 46.5 Mb. By comparing sequences of these unigenes against different protein databases, nearly 60% of them were annotated and 50% could be assigned with Gene Ontology terms. A total of 387 metabolic pathways were predicted from the assembled spinach unigenes. From the transcriptome sequencing data, we were able to identify a total of ~320,000 high-quality single nucleotide polymorphisms (SNPs). Phylogenetic analyses using SNPs as well as gene expression profiles indicated that S. turkestanica was more closely related to the cultivated S. oleracea than S. tetrandra. A large number of genes involved in responses to biotic and abiotic stresses were found to be differentially expressed between the cultivated and wild spinach. Finally, an interactive online database (http://www.spinachbase.org) was developed to allow the research community to efficiently retrieve, query, mine and analyze our transcriptome dataset.",2015-12-04 +28937906,Protein Activation in Periapical Reaction to Iodoform Containing Root Canal Sealer.,"

Objectives

An association between root canal sealers and periapical lesions in primary dentition has been suggested, yet the chemical-protein interactions that may be involved in it have not been studied. The present study explored root sealer components' effect on periapical tissue proteins using bioinformatics tools.

Study design

For each chemical component of Endoflas F.S. root sealing material we identified the known and predicted target proteins, using STITCH (search tool for interactions of chemicals http://stitch.embl.de/ ). Identified target proteins were grouped into functional categories using the annotation clustering tool from DAVID, the Database for Annotation, Visualization and Integrated Discovery ( http://david.abcc.ncifcrf.gov/ ). STRING Protein-Protein Interaction network database identified associations between the proteins.

Results

Sixteen proteins identified with STITCH served as input to DAVID annotation clustering tool. Only ZnO and Eugenol targeted proteins had statistically significant annotations. Gene Ontology terms of ZnO and Eugenol targeted proteins demonstrated that these proteins respond to mechanical stimulus and to oxidative stress. They highlight these proteins' role in the positive regulation of transcription, gene expression, cell proliferation and apoptosis, and their complementary role in the negative regulation of cell death.

Conclusion

When stimulated by Zinc Oxide, Eugenol and Calcium hydroxide, chemical-protein and subsequent protein-protein interactions result in cell proliferation in the periapical area. Our findings indicate that certain root sealers components may cause enlargement of the permanent tooth follicle. Dentists should be aware of this phenomenon and radiographically monitor root canal treated teeth until shedding.",2017-09-22 +26896846,NALDB: nucleic acid ligand database for small molecules targeting nucleic acid. ,"Nucleic acid ligand database (NALDB) is a unique database that provides detailed information about the experimental data of small molecules that were reported to target several types of nucleic acid structures. NALDB is the first ligand database that contains ligand information for all type of nucleic acid. NALDB contains more than 3500 ligand entries with detailed pharmacokinetic and pharmacodynamic information such as target name, target sequence, ligand 2D/3D structure, SMILES, molecular formula, molecular weight, net-formal charge, AlogP, number of rings, number of hydrogen bond donor and acceptor, potential energy along with their Ki, Kd, IC50 values. All these details at single platform would be helpful for the development and betterment of novel ligands targeting nucleic acids that could serve as a potential target in different diseases including cancers and neurological disorders. With maximum 255 conformers for each ligand entry, our database is a multi-conformer database and can facilitate the virtual screening process. NALDB provides powerful web-based search tools that make database searching efficient and simplified using option for text as well as for structure query. NALDB also provides multi-dimensional advanced search tool which can screen the database molecules on the basis of molecular properties of ligand provided by database users. A 3D structure visualization tool has also been included for 3D structure representation of ligands. NALDB offers an inclusive pharmacological information and the structurally flexible set of small molecules with their three-dimensional conformers that can accelerate the virtual screening and other modeling processes and eventually complement the nucleic acid-based drug discovery research. NALDB can be routinely updated and freely available on bsbe.iiti.ac.in/bsbe/naldb/HOME.php. Database URL: http://bsbe.iiti.ac.in/bsbe/naldb/HOME.php.",2016-02-20 +,Stress-Driven Changes in the Strength of Facilitation on Tree Seedling Establishment in West African Woodlands,"The strength of competitive and facilitative interactions in plant communities is expected to change along resource gradients. Contrasting theoretical models predict that with increasing abiotic stress, facilitative effects are higher, lower, or similar than those found under more productive conditions. While these predictions have been tested in stressful environments such as arid and alpine ecosystems, they have hardly been tested for more productive African woodlands. We experimentally assessed the strength of tree seedling facilitation by nurse trees in mesic and dry woodlands in Benin, West Africa. We planted seedlings of the drought-sensitive Afzelia africana and the drought-tolerant Khaya senegalensis under three microsite conditions (closed woodland, woodland gap, and open fields). Seedling survival was greater within woodlands compared with open fields in both the mesic and dry woodlands. The relative benefits in seedling survival were larger at the dry site, especially for the drought-sensitive species. Nevertheless, plant interactions became neutral or negative during the dry season in the drier woodland, indicating that the net positive effects may be lost under very stressful abiotic conditions. We conclude that facilitation also occurs in the relatively more productive conditions of African woodlands. Our results underscore the role of environmental variation in space and time, and the stress tolerance of species, in explaining competitive and facilitative interactions within plant communities. Abstract in French is available at http://www.blackwell-synergy.com/loi/btp.",2011-01-01 +30101351,Dietary intake of one-carbon metabolism nutrients and DNA methylation in peripheral blood.,"

Background

Folate and other one-carbon metabolism nutrients are essential to enable DNA methylation to occur, but the extent to which their dietary intake influences methylation in adulthood is unclear.

Objective

We assessed associations between dietary intake of these nutrients and DNA methylation in peripheral blood, overall and at specific genomic locations.

Design

We conducted a cross-sectional study using baseline data and samples from 5186 adult participants in the Melbourne Collaborative Cohort Study (MCCS). Nutrient intake was estimated from a food-frequency questionnaire. DNA methylation was measured by using the Illumina Infinium HumanMethylation450 BeadChip array (HM450K). We assessed associations of intakes of folate, riboflavin, vitamins B-6 and B-12, methionine, choline, and betaine with methylation at individual cytosine-guanine dinucleotides (CpGs), and with median (genome-wide) methylation across all CpGs, CpGs in gene bodies, and CpGs in gene promoters. We also assessed associations with methylation at long interspersed nuclear element 1 (LINE-1), satellite 2 (Sat2), and Arthrobacter luteus restriction endonuclease (Alu) repetitive elements for a subset of participants. We used linear mixed regression, adjusting for age, sex, country of birth, smoking, energy intake from food, alcohol intake, Mediterranean diet score, and batch effects to assess log-linear associations with dietary intake of each nutrient. In secondary analyses, we assessed associations with low or high intakes defined by extreme quintiles.

Results

No evidence of log-linear association was observed at P < 10-7 between the intake of one-carbon metabolism nutrients and methylation at individual CpGs. Low intake of riboflavin was associated with higher methylation at CpG cg21230392 in the first exon of PROM1 (P = 5.0 × 10-8). No consistent evidence of association was observed with genome-wide or repetitive element measures of methylation.

Conclusion

Our findings suggest that dietary intake of one-carbon metabolism nutrients in adulthood, as measured by a food-frequency questionnaire, has little association with blood DNA methylation. An association with low intake of riboflavin requires replication in independent cohorts. This study was registered at http://www.clinicaltrials.gov as NCT03227003.",2018-09-01 +25161662,Using the SUBcellular database for Arabidopsis proteins to localize the Deg protease family.,"Sub-functionalization during the expansion of gene families in eukaryotes has occurred in part through specific subcellular localization of different family members. To better understand this process in plants, compiled records of large-scale proteomic and fluorescent protein localization datasets can be explored and bioinformatic predictions for protein localization can be used to predict the gaps in experimental data. This process can be followed by targeted experiments to test predictions. The SUBA3 database is a free web-service at http://suba.plantenergy.uwa.edu.au that helps users to explore reported experimental data and predictions concerning proteins encoded by gene families and to define the experiments required to locate these homologous sets of proteins. Here we show how SUBA3 can be used to explore the subcellular location of the Deg protease family of ATP-independent serine endopeptidases (Deg1-Deg16). Combined data integration and new experiments refined location information for Deg1 and Deg9, confirmed Deg2, Deg5, and Deg8 in plastids and Deg 15 in peroxisomes and provide substantial experimental evidence for mitochondrial localized Deg proteases. Two of these, Deg3 and Deg10, additionally localized to the plastid, revealing novel dual-targeted Deg proteases in the plastid and the mitochondrion. SUBA3 is continually updated to ensure that researchers can use the latest published data when planning the experimental steps remaining to localize gene family functions.",2014-08-12 +27493193,FARAO: the flexible all-round annotation organizer.,"With decreasing costs of generating DNA sequence data, genome and metagenome projects have become accessible to a wider scientific community. However, to extract meaningful information and visualize the data remain challenging. We here introduce FARAO, a highly scalable software for organization, visualization and integration of annotation and read coverage data that can also combine output data from several bioinformatics tools. The capabilities of FARAO can greatly aid analyses of genomic and metagenomic datasets.

Availability and implementation

FARAO is implemented in Perl and is supported under Unix-like operative systems, including Linux and macOS. The Perl source code is freely available for download under the MIT License from http://microbiology.se/software/farao/ CONTACT: johan.bengtsson-palme@microbiology.seSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-04 +26582928,WeGET: predicting new genes for molecular systems by weighted co-expression.,"We have developed the Weighted Gene Expression Tool and database (WeGET, http://weget.cmbi.umcn.nl) for the prediction of new genes of a molecular system by correlated gene expression. WeGET utilizes a compendium of 465 human and 560 murine gene expression datasets that have been collected from multiple tissues under a wide range of experimental conditions. It exploits this abundance of expression data by assigning a high weight to datasets in which the known genes of a molecular system are harmoniously up- and down-regulated. WeGET ranks new candidate genes by calculating their weighted co-expression with that system. A weighted rank is calculated for human genes and their mouse orthologs. Then, an integrated gene rank and p-value is computed using a rank-order statistic. We applied our method to predict novel genes that have a high degree of co-expression with Gene Ontology terms and pathways from KEGG and Reactome. For each query set we provide a list of predicted novel genes, computed weights for transcription datasets used and cell and tissue types that contributed to the final predictions. The performance for each query set is assessed by 10-fold cross-validation. Finally, users can use the WeGET to predict novel genes that co-express with a custom query set.",2015-11-17 +25899006,Seeking effective interventions to treat complex wounds: an overview of systematic reviews.,"

Background

Numerous, often multi-faceted regimens are available for treating complex wounds, yet the evidence of these interventions is recondite across the literature. We aimed to identify effective interventions to treat complex wounds through an overview of systematic reviews.

Methods

MEDLINE (OVID interface, 1946 until October 26, 2012), EMBASE (OVID interface, 1947 until October 26, 2012), and the Cochrane Database of Systematic Reviews (Issue 10 of 12, 2012) were searched on October 26, 2012. Systematic reviews that examined adults receiving care for their complex wounds were included. Two reviewers independently screened the literature, abstracted data, and assessed study quality using the Assessment of Multiple Systematic Reviews (AMSTAR) tool.

Results

Overall, 99 systematic reviews were included after screening 6,200 titles and abstracts and 422 full-texts; 54 were systematic reviews with a meta-analysis (including data on over 54,000 patients) and 45 were systematic reviews without a meta-analysis. Overall, 44% of included reviews were rated as being of high quality (AMSTAR score ≥ 8). Based on data from systematic reviews including a meta-analysis with an AMSTAR score ≥ 8, promising interventions for complex wounds were identified. These included bandages or stockings (multi-layer, high compression) and wound cleansing for venous leg ulcers; four-layer bandages for mixed arterial/venous leg ulcers; biologics, ultrasound, and hydrogel dressings for diabetic leg/foot ulcers; hydrocolloid dressings, electrotherapy, air-fluidized beds, and alternate foam mattresses for pressure ulcers; and silver dressings and ultrasound for unspecified mixed complex wounds. For surgical wound infections, topical negative pressure and vacuum-assisted closure were promising interventions, but this was based on evidence from moderate to low quality systematic reviews.

Conclusions

Numerous interventions can be utilized for patients with varying types of complex wounds, yet few treatments were consistently effective across all outcomes throughout the literature. Clinicians and patients can use our results to tailor effective treatment according to type of complex wound. Network meta-analysis will be of benefit to decision-makers, as it will permit multiple treatment comparisons and ranking of the effectiveness of all interventions. Please see related article: http://dx.doi.org/10.1186/s12916-015-0326-3.",2015-04-22 +28942106,"Familiarity with Long-acting Reversible Contraceptives among Obstetrics and Gynecology, Family Medicine, and Pediatrics Residents: Results of a 2015 National Survey and Implications for Contraceptive Provision for Adolescents.","

Study objective

To assess familiarity with long-acting reversible contraceptives (LARC) among current obstetrics and gynecology (OB/GYN), family medicine (FM), and pediatrics senior residents in the United States. DESIGN, SETTING, PARTICIPANTS, INTERVENTIONS, AND MAIN OUTCOME MEASURES: We selected 156 OB/GYN, FM, and pediatrics residency programs using the American Medical Association Freida database. Senior residents completed a survey addressing any training they had received on LARC, and rated their comfort level counseling about and inserting LARC. Residents rated their likelihood of recommending LARC to an adolescent, nulliparous patient, and indicated whether they would like additional training on LARC. Descriptive and analytic statistics were generated using R statistical software (The R Project for Statistical Computing; https://www.r-project.org).

Results

The survey was completed by 326 of 1,583 residents (20.6% response rate); at least 1 resident completed the survey at 105 (67.3%) of the residency programs contacted. Most programs (84.8%) provided some training on LARC. Residents in OB/GYN programs were comfortable counseling about and inserting contraceptive implants (97%, 83%), copper intrauterine devices (IUDs; 100%, 86%), and levonorgestrel (LNG) IUDs (100%, 86%). In FM programs, fewer residents were comfortable counseling about and inserting contraceptive implants (71%, 47%), copper IUDs (68%, 21%), and LNG IUDs (79%, 18%). Residents in pediatrics programs had low comfort levels counseling about contraceptive implants (14%), copper IUDs (14%), and LNG IUDs (25%); no pediatrics residents were comfortable inserting LARC. OB/GYN residents were significantly more likely to recommend a LARC to an adolescent, nulliparous patient (P = .019). Most pediatric and FM residents desired additional training on LARC (82.7% and 60.7%, respectively).

Conclusion

This study shows that knowledge gaps exist regarding LARC among FM and pediatrics residents.",2017-09-21 +25361968,Genenames.org: the HGNC resources in 2015.,"The HUGO Gene Nomenclature Committee (HGNC) based at the European Bioinformatics Institute (EMBL-EBI) assigns unique symbols and names to human genes. To date the HGNC have assigned over 39,000 gene names and, representing an increase of over 5000 entries in the past two years. As well as increasing the size of our database, we have continued redesigning our website http://www.genenames.org and have modified, updated and improved many aspects of the site including a faster and more powerful search, a vastly improved HCOP tool and a REST service to increase the number of ways users can retrieve our data. This article provides an overview of our current online data and resources, and highlights the changes we have made in recent years.",2014-10-31 +24838565,DINIES: drug-target interaction network inference engine based on supervised analysis.,"DINIES (drug-target interaction network inference engine based on supervised analysis) is a web server for predicting unknown drug-target interaction networks from various types of biological data (e.g. chemical structures, drug side effects, amino acid sequences and protein domains) in the framework of supervised network inference. The originality of DINIES lies in prediction with state-of-the-art machine learning methods, in the integration of heterogeneous biological data and in compatibility with the KEGG database. The DINIES server accepts any 'profiles' or precalculated similarity matrices (or 'kernels') of drugs and target proteins in tab-delimited file format. When a training data set is submitted to learn a predictive model, users can select either known interaction information in the KEGG DRUG database or their own interaction data. The user can also select an algorithm for supervised network inference, select various parameters in the method and specify weights for heterogeneous data integration. The server can provide integrative analyses with useful components in KEGG, such as biological pathways, functional hierarchy and human diseases. DINIES (http://www.genome.jp/tools/dinies/) is publicly available as one of the genome analysis tools in GenomeNet.",2014-05-16 +30367593,On the impact of uncertain gene tree rooting on duplication-transfer-loss reconciliation.,"

Background

Duplication-Transfer-Loss (DTL) reconciliation is a powerful and increasingly popular technique for studying the evolution of microbial gene families. DTL reconciliation requires the use of rooted gene trees to perform the reconciliation with the species tree, and the standard technique for rooting gene trees is to assign a root that results in the minimum reconciliation cost across all rootings of that gene tree. However, even though it is well understood that many gene trees have multiple optimal roots, only a single optimal root is randomly chosen to create the rooted gene tree and perform the reconciliation. This remains an important overlooked and unaddressed problem in DTL reconciliation, leading to incorrect evolutionary inferences. In this work, we perform an in-depth analysis of the impact of uncertain gene tree rooting on the computed DTL reconciliation and provide the first computational tools to quantify and negate the impact of gene tree rooting uncertainty on DTL reconciliation.

Results

Our analysis of a large data set of over 4500 gene families from 100 species shows that a large fraction of gene trees have multiple optimal rootings, that these multiple roots often, but not always, appear closely clustered together in the same region of the gene tree, that many aspects of the reconciliation remain conserved across the multiple rootings, that gene tree error has a profound impact on the prevalence and structure of multiple optimal rootings, and that there are specific interesting patterns in the reconciliation of those gene trees that have multiple optimal roots.

Conclusions

Our results show that unrooted gene trees can be meaningfully reconciled and high-quality evolutionary information can be obtained from them even after accounting for multiple optimal rootings. In addition, the techniques and tools introduced in this paper make it possible to systematically avoid incorrect evolutionary inferences caused by incorrect or uncertain gene tree rooting. These tools have been implemented in the phylogenetic reconciliation software package RANGER-DTL 2.0, freely available from http://compbio.engr.uconn.edu/software/RANGER-DTL/ .",2018-08-13 +28379490,"DAMBE6: New Tools for Microbial Genomics, Phylogenetics, and Molecular Evolution.","DAMBE is a comprehensive software workbench for data analysis in molecular biology, phylogenetics, and evolution. Several important new functions have been added since version 5 of DAMBE: 1) comprehensive genomic profiling of translation initiation efficiency of different genes in different prokaryotic species, 2) a new index of translation elongation (ITE) that takes into account both tRNA-mediated selection and background mutation on codon-anticodon adaptation, 3) a new and accurate phylogenetic approach based on pairwise alignment only, which is useful for highly divergent sequences from which a reliable multiple sequence alignment is difficult to obtain. Many other functions have been updated and improved including PWM for motif characterization, Gibbs sampler for de novo motif discovery, hidden Markov models for protein secondary structure prediction, self-organizing map for nonlinear clustering of transcriptomic data, comprehensive sequence alignment, and phylogenetic functions. DAMBE features a graphic, user-friendly and intuitive interface, and is freely available from http://dambe.bio.uottawa.ca.",2017-06-01 +28968770,CRISPR-RT: a web application for designing CRISPR-C2c2 crRNA with improved target specificity.,"

Summary

CRISPR-Cas systems have been successfully applied in genome editing. Recently, the CRISPR-C2c2 system has been reported as a tool for RNA editing. Here we describe CRISPR-RT (CRISPR RNA-Targeting), the first web application to help biologists design crRNAs with improved target specificity for the CRISPR-C2c2 system. CRISPR-RT allows users to set up a wide range of parameters, making it highly flexible for current and future research in CRISPR-based RNA editing. CRISPR-RT covers major model organisms and can be easily extended to cover other species. CRISPR-RT will empower researchers in RNA editing.

Availability and implementation

Freely available at http://bioinfolab.miamioh.edu/CRISPR-RT.

Contact

liangc@miamioh.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +28498958,compleXView: a server for the interpretation of protein abundance and connectivity information to identify protein complexes.,"The molecular understanding of cellular processes requires the identification and characterization of the involved protein complexes. Affinity-purification and mass spectrometric analysis (AP-MS) are performed on a routine basis to detect proteins assembled in complexes. In particular, protein abundances obtained by quantitative mass spectrometry and direct protein contacts detected by crosslinking and mass spectrometry (XL-MS) provide complementary datasets for revealing the composition, topology and interactions of modules in a protein network. Here, we aim to combine quantitative and connectivity information by a webserver tool in order to infer protein complexes. In a first step, modeling protein abundances and functional annotations from Gene Ontology (GO) results in a network which, in a second step, is integrated with connectivity data from XL-MS analysis in order to complement and validate the protein complexes in the network. The output of our integrative approach is a quantitative protein interaction map which is supplemented with topological information of the detected protein complexes. compleXView is built up by two independent modules which are dedicated to the analysis of label-free AP-MS data and to the visualization of the detected complexes in a network together with crosslink-derived distance restraints. compleXView is available to all users without login requirements at http://xvis.genzentrum.lmu.de/compleXView.",2017-07-01 +26810894,BLAST-based structural annotation of protein residues using Protein Data Bank.,"

Background

In the era of next-generation sequencing where thousands of genomes have been already sequenced; size of protein databases is growing with exponential rate. Structural annotation of these proteins is one of the biggest challenges for the computational biologist. Although, it is easy to perform BLAST search against Protein Data Bank (PDB) but it is difficult for a biologist to annotate protein residues from BLAST search.

Results

A web-server StarPDB has been developed for structural annotation of a protein based on its similarity with known protein structures. It uses standard BLAST software for performing similarity search of a query protein against protein structures in PDB. This server integrates wide range modules for assigning different types of annotation that includes, Secondary-structure, Accessible surface area, Tight-turns, DNA-RNA and Ligand modules. Secondary structure module allows users to predict regular secondary structure states to each residue in a protein. Accessible surface area predict the exposed or buried residues in a protein. Tight-turns module is designed to predict tight turns like beta-turns in a protein. DNA-RNA module developed for predicting DNA and RNA interacting residues in a protein. Similarly, Ligand module of server allows one to predicted ligands, metal and nucleotides ligand interacting residues in a protein.

Conclusions

In summary, this manuscript presents a web server for comprehensive annotation of a protein based on similarity search. It integrates number of visualization tools that facilitate users to understand structure and function of protein residues. This web server is available freely for scientific community from URL http://crdd.osdd.net/raghava/starpdb .",2016-01-25 +29950008,MicroPheno: predicting environments and host phenotypes from 16S rRNA gene sequencing using a k-mer based representation of shallow sub-samples.,"

Motivation

Microbial communities play important roles in the function and maintenance of various biosystems, ranging from the human body to the environment. A major challenge in microbiome research is the classification of microbial communities of different environments or host phenotypes. The most common and cost-effective approach for such studies to date is 16S rRNA gene sequencing. Recent falls in sequencing costs have increased the demand for simple, efficient and accurate methods for rapid detection or diagnosis with proved applications in medicine, agriculture and forensic science. We describe a reference- and alignment-free approach for predicting environments and host phenotypes from 16S rRNA gene sequencing based on k-mer representations that benefits from a bootstrapping framework for investigating the sufficiency of shallow sub-samples. Deep learning methods as well as classical approaches were explored for predicting environments and host phenotypes.

Results

A k-mer distribution of shallow sub-samples outperformed Operational Taxonomic Unit (OTU) features in the tasks of body-site identification and Crohn's disease prediction. Aside from being more accurate, using k-mer features in shallow sub-samples allows (i) skipping computationally costly sequence alignments required in OTU-picking and (ii) provided a proof of concept for the sufficiency of shallow and short-length 16S rRNA sequencing for phenotype prediction. In addition, k-mer features predicted representative 16S rRNA gene sequences of 18 ecological environments, and 5 organismal environments with high macro-F1 scores of 0.88 and 0.87. For large datasets, deep learning outperformed classical methods such as Random Forest and Support Vector Machine.

Availability and implementation

The software and datasets are available at https://llp.berkeley.edu/micropheno.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-07-01 +26578591,SomamiR 2.0: a database of cancer somatic mutations altering microRNA-ceRNA interactions.,"SomamiR 2.0 (http://compbio.uthsc.edu/SomamiR) is a database of cancer somatic mutations in microRNAs (miRNA) and their target sites that potentially alter the interactions between miRNAs and competing endogenous RNAs (ceRNA) including mRNAs, circular RNAs (circRNA) and long noncoding RNAs (lncRNA). Here, we describe the recent major updates to the SomamiR database. We expanded the scope of the database by including somatic mutations that impact the interactions between miRNAs and two classes of non-coding RNAs, circRNAs and lncRNAs. Recently, a large number of miRNA target sites have been discovered by newly emerged high-throughput technologies for mapping the miRNA interactome. We have mapped 388 247 somatic mutations to the experimentally identified miRNA target sites. The updated database also includes a list of somatic mutations in the miRNA seed regions, which contain the most important guiding information for miRNA target recognition. A recently developed webserver, miR2GO, was integrated with the database to provide a seamless pipeline for assessing functional impacts of somatic mutations in miRNA seed regions. Data and functions from multiple sources including biological pathways and genome-wide association studies were updated and integrated with SomamiR 2.0 to make it a better platform for functional analysis of somatic mutations altering miRNA-ceRNA interactions.",2015-11-17 +,Pool‐hmm: a Python program for estimating the allele frequency spectrum and detecting selective sweeps from next generation sequencing of pooled samples,"Due to its cost effectiveness, next generation sequencing of pools of individuals (Pool‐Seq) is becoming a popular strategy for genome‐wide estimation of allele frequencies in population samples. As the allele frequency spectrum provides information about past episodes of selection, Pool‐seq is also a promising design for genomic scans for selection. However, no software tool has yet been developed for selection scans based on Pool‐Seq data. We introduce Pool‐hmm, a Python program for the estimation of allele frequencies and the detection of selective sweeps in a Pool‐Seq sample. Pool‐hmm includes several options that allow a flexible analysis of Pool‐Seq data, and can be run in parallel on several processors. Source code and documentation for Pool‐hmm is freely available at https://qgsp.jouy.inra.fr/.",2013-03-01 +29034284,"Data from roadside screening for psychoactive substances, alcohol and illicit drugs, among Spanish drivers in 2015.","The data presented in this article are related to the paper ""Prevalence of psychoactive substances, alcohol and illicit drugs, in Spanish drivers: A roadside study in 2015"". (https://doi.org/10.1016/j.forsciint.2017.07.005) Domingo-(Salvany et al., 2017) [1]. In that paper it was not possible to directly compare 2015 results with previous editions for various reasons, one of which was the lack of a similar weighting procedure. The present paper provides 2015 figures of roadside screening tests which are weighted for traffic flow intensity and therefore allow direct comparisons with the screening tests conducted among Spanish drivers in 2008 and 2013.",2017-09-20 +28939336,[Evaluation and results of ablative therapies in prostate cancer].,"

Objective

To perform a state of the art about methods of evaluation and present results in ablative therapies for localized prostate cancer.

Methods

A review of the scientific literature was performed in Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of keywords. Publications obtained were selected based on methodology, language and relevance. After selection, 102 articles were analysed.

Results

Analyse the results of ablative therapies is presently difficult considering the heterogeneity of indications, techniques and follow-up. However, results from the most recent and homogeneous studies are encouraging. Oncologically, postoperative biopsies (the most important criteria) are negative (without any tumor cells in the treated area) in 75 to 95%. Functionally, urinary and sexual pre-operative status is spared (or recovered early) in more than 90% of the patients treated. More and more studies underline also the correlation between the results and the technique used considering the volume of the gland and, moreover, the ""index lesion"" localization.

Conclusion

The post-treatment pathological evaluation by biopsies (targeted with MRI or, perhaps in a near future, with innovative ultrasonography) is the corner stone of oncological evaluation of ablative therapies. Ongoing trials will allow to standardize the follow-up and determine the best indication and the best techniques in order to optimize oncological and functional results for each patient treated.",2017-09-20 +25566299,MeioBase: a comprehensive database for meiosis.,"Meiosis is a special type of cell division process necessary for the sexual reproduction of all eukaryotes. The ever expanding meiosis research calls for an effective and specialized database that is not readily available yet. To fill this gap, we have developed a knowledge database MeioBase (http://meiosis.ibcas.ac.cn), which is comprised of two core parts, Resources and Tools. In the Resources part, a wealth of meiosis data collected by curation and manual review from published literatures and biological databases are integrated and organized into various sections, such as Cytology, Pathway, Species, Interaction, and Expression. In the Tools part, some useful tools have been integrated into MeioBase, such as Search, Download, Blast, Comparison, My Favorites, Submission, and Advice. With a simplified and efficient web interface, users are able to search against the database with gene model IDs or keywords, and batch download the data for local investigation. We believe that MeioBase can greatly facilitate the researches related to meiosis.",2014-12-16 +26971988,"[Predictive factors of clinically significant drug-drug interactions among regimens based on protease inhibitors, non-nucleoside reverse transcriptase inhibitors and raltegravir].","

Background and objective

To determine the prevalence and types of clinically significant drug-drug interactions (CSDI) in the drug regimens of HIV-infected patients receiving antiretroviral treatment.

Material and methods

Design

retrospective review of database. Centre: Hospital Universitario Severo Ochoa, Infectious Unit.

Participants

one hundred and forty-two participants followed by one of the authors were selected from January 1985 to December 2014.

Data collection

from their outpatient medical records we reviewed information from the last available visit of the participants, in relation to HIV infection, comorbidities, demographics and the drugs that they were receiving; both antiretroviral drugs and drugs not related to HIV infection. We defined CSDI from the information sheet and/or database on antiretroviral drug interactions of the University of Liverpool (http://www.hiv-druginteractions.org) and we developed a diagnostic tool to predict the possibility of CSDI. By multivariate logistic regression analysis and by estimating the diagnostic performance curve obtained, we identified a quick tool to predict the existence of drug interactions.

Results

Of 142 patients, 39 (29.11%) had some type of CSDI and in 11.2% 2 or more interactions were detected. In only one patient the combination of drugs was contraindicated (this patient was receiving darunavir/r and quetiapine). In multivariate analyses, predictors of CSDI were regimen type (PI or NNRTI) and the use of 3 or more non-antiretroviral drugs (AUC 0.886, 95% CI 0.828 to 0.944; P=.0001). The risk was 18.55 times in those receiving NNRTI and 27,95 times in those receiving IP compared to those taking raltegravir.

Conclusions

Drug interactions, including those defined as clinically significant, are common in HIV-infected patients treated with antiretroviral drugs, and the risk is greater in IP-based regimens. Raltegravir-based prescribing, especially in patients who receive at least 3 non-HIV drugs could avoid interactions.",2016-03-11 +,"Evolution of the Malagasy endemic genus Nanos Westwood, 1842 (Coleoptera, Scarabaeidae, Epilissini)","The traditionally defined ‘Nanos group’, composed of the genera Nanos Westwood, 1842, Cambefortantus Paulian, 1986 and Apotolamprus Olsoufieff, 1947, represents the most recent Malagasy dung beetle radiation. Species in this group have been ecologically very successful with many being numerically dominant in local dung beetle communities in Madagascar. In this study the phylogenetic relationships of species in this group are inferred using molecular data from mitochondrial (cytochrome c oxidase I) and nuclear (rudimentary, topoisomerase I and 28S) genes.The monophyly of Apotolamprus is supported both by molecular and morphological characters, but that of Nanos, supported by only one morphological character, is questioned. Congruent species groups can be defined within Nanos on the base of morphology and molecular results. In addition to the phylogenetic study, the revision of the genus Nanos Westwood, 1842, s.l., is presented. Nanos antsihanakensis (Lebis, 1953) stat.n. is re‐established. Thirteen new species� –� Nanos pseudofusconitens sp.n., Nanos magnus sp.n., Nanos marojejyensis sp.n., Nanos bemarahaensis sp.n., Nanos andreiae sp.n., Nanos mirjae sp.n., Nanos pseudorubromaculatus sp.n., Nanos pseudominutus sp.n., Nanos mixtus sp.n., Nanos ranomafanaensis sp.n., Nanos manongorivoensis sp.n., N. pseudoviettei sp.n. and N. constricticollis sp.n.� –� are described and compared with their most closely related taxa. Sphaerocanthon fallaciosus Lebis, 1953, is synonymised with Nanos fusconitens (Fairmaire, 1899) syn.n. and Nanos neoelectrinus Montreuil & Viljanen, 2007, with Nanos humeralis Paulian, 1975 syn.n. Lectotypes are designated for Epilissus fusconitens var. agaboides Boucomont, 1937, Epilissus punctatus Boucomont, 1937, Epilissus sinuatipes Boucomont, 1937, Epilissus semiscribrosus Fairmaire, 1898, Epilissus fusconitens Fairmaire, 1899, and Sphaerocanthon vadoni Lebis, 1953. Aedeagus and male pro‐ and metatibiae are illustrated for each species. This published work has been registered in Zoobank, http://zoobank.org/urn:lsid:zoobank.org:pub:C1F29A37‐E380‐4D87‐871F‐039227547156.",2014-07-01 +29089957,Development and Evaluation of a Barley 50k iSelect SNP Array.,"High-throughput genotyping arrays continue to be an attractive, cost-effective alternative to sequencing based approaches. We have developed a new 50k Illumina Infinium iSelect genotyping array for barley, a cereal crop species of major international importance. The majority of SNPs on the array have been extracted from variants called in exome capture data of a wide range of European barley germplasm. We used the recently published barley pseudomolecule assembly to map the exome capture data, which allowed us to generate markers with accurate physical positions and detailed gene annotation. Markers from an existing and widely used barley 9k Infinium iSelect array were carried over onto the 50k chip for backward compatibility. The array design featured 49,267 SNP markers that converted into 44,040 working assays, of which 43,461 were scorable in GenomeStudio. Of the working assays, 6,251 are from the 9k iSelect platform. We validated the SNPs by comparing the genotype calls from the new array to legacy datasets. Rates of agreement averaged 98.1 and 93.9% respectively for the legacy 9k iSelect SNP set (Comadran et al., 2012) and the exome capture SNPs. To test the utility of the 50k chip for genetic mapping, we genotyped a segregating population derived from a Golden Promise × Morex cross (Liu et al., 2014) and mapped over 14,000 SNPs to genetic positions which showed a near exact correspondence to their known physical positions. Manual adjustment of the cluster files used by the interpreting software for genotype scoring improved results substantially, but migration of cluster files between sites led to a deterioration of results, suggesting that local adjustment of cluster files is required on a site-per-site basis. Information relating to the markers on the chip is available online at https://ics.hutton.ac.uk/50k.",2017-10-17 +25591449,PathPPI: an integrated dataset of human pathways and protein-protein interactions.,"Integration of pathway and protein-protein interaction (PPI) data can provide more information that could lead to new biological insights. PPIs are usually represented by a simple binary model, whereas pathways are represented by more complicated models. We developed a series of rules for transforming protein interactions from pathway to binary model, and the protein interactions from seven pathway databases, including PID, BioCarta, Reactome, NetPath, INOH, SPIKE and KEGG, were transformed based on these rules. These pathway-derived binary protein interactions were integrated with PPIs from other five PPI databases including HPRD, IntAct, BioGRID, MINT and DIP, to develop integrated dataset (named PathPPI). More detailed interaction type and modification information on protein interactions can be preserved in PathPPI than other existing datasets. Comparison analysis results indicate that most of the interaction overlaps values (O AB) among these pathway databases were less than 5%, and these databases must be used conjunctively. The PathPPI data was provided at http://proteomeview.hupo.org.cn/PathPPI/PathPPI.html.",2015-01-15 +30102602,Evaluation of Prenatal Exposure to Bisphenol Analogues on Development and Long-Term Health of the Mammary Gland in Female Mice.,"

Background

Continued efforts to phase out bisphenol A (BPA) from consumer products have been met with the challenges of finding safer alternatives.

Objectives

This study aimed to determine whether early-life exposure to BPA and its related analogues, bisphenol AF (BPAF) and bisphenol S (BPS), could affect female pubertal mammary gland development and long-term mammary health in mice.

Methods

Timed pregnant CD-1 mice were exposed to vehicle, BPA (0.5, 5, 50 mg/kg), BPAF (0.05, 0.5, 5 mg/kg), or BPS (0.05, 0.5, 5 mg/kg) via oral gavage between gestation days 10–17. Mammary glands were collected from resulting female offspring at postnatal day (PND) 20, 28, 35, and 56, and at 3, 8, and 14 months for whole mount, histopathological evaluation, and quantitative real-time polymerase chain reaction (qPCR); serum steroid concentrations were also measured at these time points.

Results

In the bisphenol-exposed mice, accelerated mammary gland development was evident during early puberty and persisted into adulthood. By late adulthood, mammary glands from bisphenol-exposed female offspring exhibited adverse morphology in comparison with controls; most prominent were undifferentiated duct ends, significantly more lobuloalveolar hyperplasia and perivascular inflammation, and various tumors, including adenocarcinomas. Effects were especially prominent in the BPAF 5 mg/kg and BPS 0.5 mg/kg groups. Serum steroid concentrations and mammary mRNA levels of Esr1, Pgr, Ar, and Gper1 were similar to controls.

Conclusions

These data demonstrate that prenatal exposure of mice to BPAF or BPS induced precocious development of the mammary gland, and that siblings were significantly more susceptible to spontaneous preneoplastic epithelial lesions and inflammation, with an incidence greater than that observed in vehicle- and BPA-exposed animals. https://doi.org/10.1289/EHP3189.",2018-08-10 +29961657,WIC Participation and Blood Lead Levels among Children 1-5 Years: 2007-2014.,"

Background

The CDC recommends a targeted strategy for childhood blood lead screening based on participation in federal programs, such as Medicaid and the Special Supplemental Nutrition Program for Women, Infants, and Children (WIC). Yet, there is scarcity of data on blood lead levels (BLLs) among WIC participants.

Objective

Our objective was to investigate whether children participating in WIC and not enrolled in Medicaid, who have not been targeted in the historical Medicaid-focused screening strategy, have higher BLLs than children in neither of these programs.

Methods

The analysis included 3,180 children 1-5 y of age in the National Health and Nutrition Examination Surveys conducted in 2007-2014. Log-binomial regression, which allows direct estimation of prevalence ratios, was used to examine associations between WIC participation (in conjunction with Medicaid enrollment) and having BLLs ≥5 μg/dL with adjustment for age (1-2 vs. 3-5 y).

Results

The percentage of children participating in ""WIC only,"" ""Medicaid only,"" ""both WIC and Medicaid,"" and ""neither"" were 18.9%, 10.8%, 25.4%, and 44.9%, respectively. ""WIC only,"" ""Medicaid only,"" and ""both WIC and Medicaid"" children were more likely to have BLLs ≥5 μg/dL than children who were not enrolled in either program, with adjusted prevalence ratios of 3.29 [95% confidence interval (CI): 1.19, 9.09], 4.56 (95% CI: 2.18, 9.55), and 2.58 (95% CI: 1.18, 5.63).

Conclusions

Children participating in WIC but not Medicaid were more likely to have BLLs ≥5 μg/dL than children who were not enrolled in either program. These findings may inform public health recommendations and clinical practice guidelines. +https://doi.org/10.1289/EHP2384",2018-06-29 +25291878,[Mapping of the key oncology indicators available in France].,"

Background

Available data in the field of oncology in France are scattered due to the large number of available indicators and their sources. In order to facilitate identification and analysis of these indicators, the French National Cancer Institute (INCa) has mapped the main indicators available in oncology.

Methods

Mapping was based on the needs of various categories of potential users. Standardized interviews were conducted face-to-face or by email among representatives to determine their needs and expectations. The underlying data sources were also identified: databases, national surveys, websites. A first selection of indicators was proposed in the report entitled ""La situation du cancer en France en 2009"" (""The state of cancer in France in 2009"") and was expanded. Data collection concerning indicators was performed among INCa correspondents for each theme.

Results

Several themes were defined: epidemiology, prevention and risk factors, screening, medical demography, health care offer, living conditions, costs and expenses, research. Data were classified according to: geographical coverage, age, gender, type of cancer, occupational categories. This information was collected for each indicator selected and was made available via the cancer data website (http://lesdonnees.e-cancer.fr).

Conclusions

The available oncology indicators are numerous and scattered. Mapping can be a useful tool to facilitate access to these indicators. It should be regularly updated to reflect the most recent data.",2014-05-01 +25288655,MetaProx: the database of metagenomic proximons. ,"MetaProx is the database of metagenomic proximons: a searchable repository of proximon objects conceived with two specific goals. The first objective is to accelerate research involving metagenomic functional interactions by providing a database of metagenomic operon candidates. Proximons represent a special subset of directons (series of contiguous co-directional genes) where each member gene is in close proximity to its neighbours with respect to intergenic distance. As a result, proximons represent significant operon candidates where some subset of proximons is the set of true metagenomic operons. Proximons are well suited for the inference of metagenomic functional networks because predicted functional linkages do not rely on homology-dependent information that is frequently unavailable in metagenomic scenarios. The second objective is to explore representations for semistructured biological data that can offer an alternative to the traditional relational database approach. In particular, we use a serialized object implementation and advocate a Data as Data policy where the same serialized objects can be used at all levels (database, search tool and saved user file) without conversion or the use of human-readable markups. MetaProx currently includes 4,210,818 proximons consisting of 8 \,926,993 total member genes. Database URL: http://metaprox.uwaterloo.ca.",2014-10-06 +30388036,"Evidence for differential control of muscle sympathetic single units during mild sympathoexcitation in young, healthy humans.","Two subpopulations of muscle sympathetic single units with opposite discharge characteristics have been identified during low-level cardiopulmonary baroreflex loading and unloading in middle-aged adults and patients with heart failure. The present study sought to determine whether similar subpopulations are present in young healthy adults during cardiopulmonary baroreflex unloading ( study 1) and rhythmic handgrip exercise ( study 2). Continuous hemodynamic and multiunit and single unit muscle sympathetic nerve activity (MSNA) data were collected at baseline and during nonhypotensive lower body negative pressure (LBNP; n = 12) and 40% maximal voluntary contraction rhythmic handgrip exercise (RHG; n = 24). Single unit MSNA responses were classified as anticipated or paradoxical based on whether changes were concordant or discordant with the multiunit MSNA response, respectively. LBNP and RHG both increased multiunit MSNA burst frequency (∆5 ± 3 bursts/min, P < 0.001; ∆5 ± 8 bursts/min, P = 0.005), burst amplitude (∆5 ± 7%, P = 0.04; ∆13 ± 14%, P < 0.001), and total MSNA (∆302 ± 191 AU/min, P = 0.001; ∆585 ± 556 AU/min, P < 0.001). During LBNP and RHG, 43 and 64 muscle single units were identified, respectively, which increased spike frequency (∆9 ± 11 spikes/min, P < 0.001; ∆10 ± 19 spikes/min, P < 0.001) and the probability of multiple spike firing (∆10 ± 12%, P < 0.001; ∆11 ± 26%, P = 0.001). During LBNP and RHG, 36 (84%) and 39 (61%) single units possessed anticipated firing responses (∆12 ± 10 spikes/min, P < 0.001; ∆19 ± 19 spikes/min, P < 0.001), whereas 7 (16%) and 25 (39%) single units exhibited paradoxical reductions (∆-3 ± 1 spikes/min, P = 0.003; ∆-4 ± 5 spikes/min, P < 0.001). The observation of divergent subpopulations of muscle sympathetic single units in healthy young humans during two mild sympathoexcitatory stressors supports differential control at the fiber level as a fundamental characteristic of human sympathetic regulation. NEW & NOTEWORTHY The activity of muscle sympathetic single units was recorded during cardiopulmonary baroreceptor unloading and rhythmic handgrip exercise in young healthy humans. During both stressors, the majority of single units (84% and 61%) exhibited anticipated behavior concordant with the integrated muscle sympathetic response, whereas a smaller proportion (16% and 39%) exhibited paradoxical sympathoinhibition. These results support differential control of postganglionic muscle sympathetic fibers as a characteristic of human sympathetic regulation during mild sympathoexcitatory stress. Listen to this article's corresponding podcast at https://ajpheart.podbean.com/e/differential-control-of-sympathetic-outflow-in-young-humans/ .",2018-11-02 +29743287,PTPN12/PTP-PEST Regulates Phosphorylation-Dependent Ubiquitination and Stability of Focal Adhesion Substrates in Invasive Glioblastoma Cells.,"Glioblastoma (GBM) is an invasive brain cancer with tumor cells that disperse from the primary mass, escaping surgical resection and invariably giving rise to lethal recurrent lesions. Here we report that PTP-PEST, a cytoplasmic protein tyrosine phosphatase, controls GBM cell invasion by physically bridging the focal adhesion protein Crk-associated substrate (Cas) to valosin-containing protein (Vcp), an ATP-dependent protein segregase that selectively extracts ubiquitinated proteins from multiprotein complexes and targets them for degradation via the ubiquitin proteasome system. Both Cas and Vcp are substrates for PTP-PEST, with the phosphorylation status of tyrosine 805 (Y805) in Vcp impacting affinity for Cas in focal adhesions and controlling ubiquitination levels and protein stability. Perturbing PTP-PEST-mediated phosphorylation of Cas and Vcp led to alterations in GBM cell-invasive growth in vitro and in preclinical mouse models. Collectively, these data reveal a novel regulatory mechanism involving PTP-PEST, Vcp, and Cas that dynamically balances phosphorylation-dependent ubiquitination of key focal proteins involved in GBM cell invasion.Significance: PTP-PEST balances GBM cell growth and invasion by interacting with the ATP-dependent ubiquitin segregase Vcp/p97 and regulating phosphorylation and stability of the focal adhesion protein p130Cas.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/14/3809/F1.large.jpg Cancer Res; 78(14); 3809-22. ©2018 AACR.",2018-05-09 +22120663,INOH: ontology-based highly structured database of signal transduction pathways.,"The Integrating Network Objects with Hierarchies (INOH) database is a highly structured, manually curated database of signal transduction pathways including Mammalia, Xenopus laevis, Drosophila melanogaster, Caenorhabditis elegans and canonical. Since most pathway knowledge resides in scientific articles, the database focuses on curating and encoding textual knowledge into a machine-processable form. We use a hierarchical pathway representation model with a compound graph, and every pathway component in the INOH database is annotated by a set of uniquely developed ontologies. Finally, we developed the Similarity Search using the combination of a compound graph and hierarchical ontologies. The INOH database is to be a good resource for many users who want to analyze a large protein network. INOH ontologies and 73 signal transduction and 29 metabolic pathway diagrams (including over 6155 interactions and 3395 protein entities) are freely available in INOH XML and BioPAX formats. Database URL: http://www.inoh.org/",2011-11-26 +24622612,GigaDB: promoting data dissemination and reproducibility.,"Often papers are published where the underlying data supporting the research are not made available because of the limitations of making such large data sets publicly and permanently accessible. Even if the raw data are deposited in public archives, the essential analysis intermediaries, scripts or software are frequently not made available, meaning the science is not reproducible. The GigaScience journal is attempting to address this issue with the associated data storage and dissemination portal, the GigaScience database (GigaDB). Here we present the current version of GigaDB and reveal plans for the next generation of improvements. However, most importantly, we are soliciting responses from you, the users, to ensure that future developments are focused on the data storage and dissemination issues that still need resolving. Database URL: http://www.gigadb.org.",2014-03-12 +26282194,Hypothalamic-pituitary-adrenal (HPA) axis suppression after treatment with glucocorticoid therapy for childhood acute lymphoblastic leukaemia.,"

Background

Glucocorticoids play a major role in the treatment of acute lymphoblastic leukaemia (ALL). However, supraphysiological doses can suppress the hypothalamic-pituitary-adrenal (HPA) axis. HPA axis suppression resulting in reduced cortisol response may cause an impaired stress response and an inadequate host defence against infections, which remains a cause of morbidity and death. Suppression commonly occurs in the first days after cessation of glucocorticoid therapy, but the exact duration is unclear. This review is an update of a previously published Cochrane review.

Objectives

To examine the occurrence and duration of HPA axis suppression after (each cycle of) glucocorticoid therapy for childhood ALL.

Search methods

We searched the Cochrane Central Register of Controlled Trials (CENTRAL; Issue 6, 2014), MEDLINE/PubMed (from 1945 to June 2014), and EMBASE/Ovid (from 1980 to June 2014). In addition, we searched reference lists of relevant articles, conference proceedings (the International Society for Paediatric Oncology and the American Society of Clinical Oncology from 2005 to 2013), and ongoing trial databases (the ISRCTN register and the NIH register via http://www.controlled-trials.com in June 2014).

Selection criteria

All study designs, except case reports and patient series with fewer than 10 children, examining the effect of glucocorticoid therapy for childhood ALL on the HPA axis function.

Data collection and analysis

Two review authors independently performed the study selection. One review author performed the data extraction and 'Risk of bias' assessment, which another review author checked.

Main results

We identified eight studies (total of 218 children), including two randomised controlled trials (RCTs), that assessed the adrenal function. None of the studies assessed the HPA axis at the level of the hypothalamus, pituitary, or both. Due to substantial differences between studies, we could not pool results. All of the studies had some methodological limitations. The included studies demonstrated that adrenal insufficiency occurs in nearly all children in the first days after cessation of glucocorticoid treatment for childhood ALL. The majority of children recovered within a few weeks, but a small number of children had ongoing adrenal insufficiency lasting up to 34 weeks. In the RCTs, the occurrence and duration of adrenal insufficiency did not differ between the prednisone and dexamethasone arms. In one study, it appeared that treatment with fluconazole prolonged the duration of adrenal insufficiency. Furthermore, one of the studies evaluated the presence of infections or stress episodes, or both as a risk factor for adrenal insufficiency. The authors found no relationship between the presence of infection/stress and adrenal insufficiency.

Authors' conclusions

We concluded that adrenal insufficiency commonly occurs in the first days after cessation of glucocorticoid therapy for childhood ALL, but the exact duration is unclear. Since no data on the level of the hypothalamus and the pituitary were available, we cannot make any conclusions regarding those outcomes. Clinicians should consider prescribing glucocorticoid replacement therapy during periods of serious stress in the first weeks after cessation of glucocorticoid therapy for childhood ALL to reduce the risk of life-threatening complications. However, more high-quality research is needed for evidence-based guidelines for glucocorticoid replacement therapy.Special attention should be paid to patients receiving fluconazole therapy, and perhaps similar antifungal drugs, as this may prolong the duration of adrenal insufficiency.Finally, it would be relevant to further investigate the relationship between present infection/stress and adrenal insufficiency in a larger, separate study specially designed for this purpose.",2015-08-17 +24974201,The Amordad database engine for metagenomics.,"

Motivation

Several technical challenges in metagenomic data analysis, including assembling metagenomic sequence data or identifying operational taxonomic units, are both significant and well known. These forms of analysis are increasingly cited as conceptually flawed, given the extreme variation within traditionally defined species and rampant horizontal gene transfer. Furthermore, computational requirements of such analysis have hindered content-based organization of metagenomic data at large scale.

Results

In this article, we introduce the Amordad database engine for alignment-free, content-based indexing of metagenomic datasets. Amordad places the metagenome comparison problem in a geometric context, and uses an indexing strategy that combines random hashing with a regular nearest neighbor graph. This framework allows refinement of the database over time by continual application of random hash functions, with the effect of each hash function encoded in the nearest neighbor graph. This eliminates the need to explicitly maintain the hash functions in order for query efficiency to benefit from the accumulated randomness. Results on real and simulated data show that Amordad can support logarithmic query time for identifying similar metagenomes even as the database size reaches into the millions.

Availability and implementation

Source code, licensed under the GNU general public license (version 3) is freely available for download from http://smithlabresearch.org/amordad

Contact

andrewds@usc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-27 +24526713,The 3DGD: a database of genome 3D structure.,"

Unlabelled

The studies of chromatin 3D structure help us to understand its formation and function. Techniques combining chromosome conformation capture and next generation sequencing can capture chromatin structure information and has been applied to several different species and cell lines. We built 3DGD (3D Genome Database), a database that currently collected Hi-C data on four species, for easy accessing and visualization of chromatin 3D structure data. With the integration of other omics data such as genome-wide protein-DNA-binding data, this data source would be useful for researchers interested in chromatin structure and its biological functions.

Availability and implementation

The 3DGD v1.1, data browser, downloadable files and documentation are available at: http://3dgd.biosino.org/.",2014-02-12 +29735548,TNFRSF19 Inhibits TGFβ Signaling through Interaction with TGFβ Receptor Type I to Promote Tumorigenesis.,"Genetic susceptibility underlies the pathogenesis of cancer. We and others have previously identified a novel susceptibility gene TNFRSF19, which encodes an orphan member of the TNF receptor superfamily known to be associated with nasopharyngeal carcinoma (NPC) and lung cancer risk. Here, we show that TNFRSF19 is highly expressed in NPC and is required for cell proliferation and NPC development. However, unlike most of the TNF receptors, TNFRSF19 was not involved in NFκB activation or associated with TRAF proteins. We identified TGFβ receptor type I (TβRI) as a specific binding partner for TNFRSF19. TNFRSF19 bound the kinase domain of TβRI in the cytoplasm, thereby blocking Smad2/3 association with TβRI and subsequent signal transduction. Ectopic expression of TNFRSF19 in normal epithelial cells conferred resistance to the cell-cycle block induced by TGFβ, whereas knockout of TNFRSF19 in NPC cells unleashed a potent TGFβ response characterized by upregulation of Smad2/3 phosphorylation and TGFβ target gene transcription. Furthermore, elevated TNFRSF19 expression correlated with reduced TGFβ activity and poor prognosis in patients with NPC. Our data reveal that gain of function of TNFRSF19 in NPC represents a mechanism by which tumor cells evade the growth-inhibitory action of TGFβ.Significance:TNFRSF19, a susceptibility gene for nasopharyngeal carcinoma and other cancers, functions as a potent inhibitor of the TGFβ signaling pathway.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/13/3469/F1.large.jpg Cancer Res; 78(13); 3469-83. ©2018 AACR.",2018-05-07 +29280166,Pyrite: A blender plugin for visualizing molecular dynamics simulations using industry-standard rendering techniques.,"Molecular dynamics (MD) simulations provide critical insights into many biological mechanisms. Programs such as VMD, Chimera, and PyMOL can produce impressive simulation visualizations, but they lack many advanced rendering algorithms common in the film and video-game industries. In contrast, the modeling program Blender includes such algorithms but cannot import MD-simulation data. MD trajectories often require many gigabytes of memory/disk space, complicating Blender import. We present Pyrite, a Blender plugin that overcomes these limitations. Pyrite allows researchers to visualize MD simulations within Blender, with full access to Blender's cutting-edge rendering techniques. We expect Pyrite-generated images to appeal to students and non-specialists alike. A copy of the plugin is available at http://durrantlab.com/pyrite/, released under the terms of the GNU General Public License Version 3. © 2017 Wiley Periodicals, Inc.",2017-12-26 +26220891,GeneMatcher: a matching tool for connecting investigators with an interest in the same gene.,"Here, we describe an overview and update on GeneMatcher (http://www.genematcher.org), a freely accessible Web-based tool developed as part of the Baylor-Hopkins Center for Mendelian Genomics. We created GeneMatcher with the goal of identifying additional individuals with rare phenotypes who had variants in the same candidate disease gene. We also wanted to facilitate connections to basic scientists working on orthologous genes in model systems with the goal of connecting their work to human Mendelian phenotypes. Meeting these goals will enhance the identification of novel Mendelian genes. Launched in September, 2013, GeneMatcher now has 2,178 candidate genes from 486 submitters spread across 38 countries entered in the database (June 1, 2015). GeneMatcher is also part of the Matchmaker Exchange (http://matchmakerexchange.org/) with an Application Programing Interface enabling submitters to query other databases of genetic variants and phenotypes without having to create accounts and data entries in multiple systems.",2015-08-13 +29334505,"The association of chronic air pollutants with coronary artery spasm, vasospastic angina, and endothelial dysfunction.","BACKGROUND:We evaluated the effect of chronic exposure to air pollutants (APs) on coronary endothelial function and significant coronary artery spasm (CAS) as assessed by intracoronary acetylcholine (ACH) provocation test. PATIENTS AND METHODS:A total of 6430 patients with typical or atypical chest pain who underwent intracoronary ACH provocation test were enrolled. We obtained data on APs from the Korean National Institute of Environmental Research (http://www.nier.go.kr/). APs are largely divided into two types: particulate matter with aerodynamic diameter of less than or equal to 10 µm in size (PM10) and gaseous pollutants such as nitrogen dioxide, sulfur dioxide, carbon monoxide, and ozone. The primary endpoint is the incidence of significant CAS and its associated parameters during ACH provocation test. RESULTS:The incidence of CAS was positively correlated with an exposure duration of PM10, whereas nitrogen dioxide, sulfur dioxide, carbon monoxide, and ozone were shown to be unrelated to CAS. During the ACH provocation test, as PM10 increased, the frequency of CAS was increased, and the incidence of transient ST-segment elevation was also increased. There was a trend toward higher incidence of spontaneous spasm as PM10 increased. The mean exposure level of PM10 was 51.3±25.4 µg/m. The CAS risk increased by 4% when the level of PM10 increased by 20 µg/m by an adjusted Cox regression analysis. CONCLUSION:CAS incidence is closely related to exposure to PMs but not to gaseous pollutants. Particularly, higher exposure concentrations and longer exposure duration of PM10 increased the risk of CAS. These important findings provide a plausible mechanism that links air pollution to vasospastic angina and provide new insights into environmental factors.",2018-06-01 +28918872,[Management of ablative therapies in prostate cancer].,"

Objectives

To describe the specific modalities of ablative therapies management in prostate cancer.

Materials and methods

A review of the scientific literature was performed in Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of keywords. Publications obtained were selected based on methodology, language and relevance. After selection, 61 articles were analysed.

Results

Development of innovations such as ablative therapies in prostate cancer induces specific modalities in their management, during pre-, per- and post-procedure. More than for classical and well-known treatments, the decision to propose an ablative therapy requires analysis and consensus of medical staff and patient's agreement. Patient's specificities and economical aspects must also be considered. Procedures and follow-up must be realized by referents actors.

Conclusion

Indication, procedure and follow-up of ablative therapies in prostate cancer require specific modalities. They must be respected in order to optimize the results and to obtain a precise and objective evaluation for defining future indications.",2017-09-15 +28918871,[Indications and limits of ablative therapies in prostate cancer].,"

Objective

To perform a state of the art about indications and limits of ablative therapies for localized prostate cancer.

Methods

A review of the scientific literature was performed in Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of keywords. Publications obtained were selected based on methodology, language and relevance. After selection, 107 articles were analysed.

Results

The objective to combine reduction of side effects and oncological control has induced recent development of several ablative therapies. Beyond this heterogeneity, some preferential indications appear: unilateral cancer of low risk (but with significant volume, excluding active surveillance) or intermediate risk (excluding majority of grade 4); treatment targeted the index lesion, by quarter or hemi-ablation, based on biopsy and mpMRI. In addition, indications must considered specific limits of each energy, such as gland volume and tumor localization.

Conclusion

Based on new imaging and biopsy, ablative therapies will probably increased its role in the future in management of localize prostate cancer. The multiple ongoing trials will certainly be helpful to better define their indications and limits.",2017-09-15 +25049509,Detection of quantitative trait Loci affecting fat deposition traits in pigs.,"Quantitative trait loci (QTL) associated with fat deposition traits in pigs are important gene positions in a chromosome that influence meat quality of pork. For QTL study, a three generation resource population was constructed from a cross between Korean native boars and Landrace sows. A total of 240 F2 animals from intercross of F1 were produced. 80 microsatellite markers covering chromosomes 1 to 10 were selected to genotype the resource population. Intervals between adjacent markers were approximately 19 cM. Linkage analysis was performed using CRIMAP software version 2.4 with a FIXED option to obtain the map distances. For QTL analysis, the public web-based software, QTL express (http://www.qtl.cap.ed.ac.uk) was used. Two significant and two suggestive QTL were identified on SSC 6, 7, and 8 as affecting body fat and IMF traits. For QTL affecting IMF, the most significant association was detected between marker sw71 and sw1881 on SSC 6, and a suggestive QTL was identified between sw268 and sw205 on SSC8. These QTL accounted for 26.58% and 12.31% of the phenotypic variance, respectively. A significant QTL affecting IMF was detected at position 105 cM between markers sw71 and sw1881 on SSC 6.",2012-11-01 +22034591,Proteome-wide post-translational modification statistics: frequency analysis and curation of the swiss-prot database. ,"Post-translational modifications (PTMs) broadly contribute to the recent explosion of proteomic data and possess a complexity surpassing that of protein design. PTMs are the chemical modification of a protein after its translation, and have wide effects broadening its range of functionality. Based on previous estimates, it is widely believed that more than half of proteins are glycoproteins. Whereas mutations can only occur once per position, different forms of post-translational modifications may occur in tandem. With the number and abundances of modifications constantly being discovered, there is no method to readily assess their relative levels. Here we report the relative abundances of each PTM found experimentally and putatively, from high-quality, manually curated, proteome-wide data, and show that at best, less than one-fifth of proteins are glycosylated. We make available to the academic community a continuously updated resource (http://selene.princeton.edu/PTMCuration) containing the statistics so scientists can assess ""how many"" of each PTM exists.",2011-09-01 +25393678,GPA: a statistical approach to prioritizing GWAS results by integrating pleiotropy and annotation.,"Results from Genome-Wide Association Studies (GWAS) have shown that complex diseases are often affected by many genetic variants with small or moderate effects. Identifications of these risk variants remain a very challenging problem. There is a need to develop more powerful statistical methods to leverage available information to improve upon traditional approaches that focus on a single GWAS dataset without incorporating additional data. In this paper, we propose a novel statistical approach, GPA (Genetic analysis incorporating Pleiotropy and Annotation), to increase statistical power to identify risk variants through joint analysis of multiple GWAS data sets and annotation information because: (1) accumulating evidence suggests that different complex diseases share common risk bases, i.e., pleiotropy; and (2) functionally annotated variants have been consistently demonstrated to be enriched among GWAS hits. GPA can integrate multiple GWAS datasets and functional annotations to seek association signals, and it can also perform hypothesis testing to test the presence of pleiotropy and enrichment of functional annotation. Statistical inference of the model parameters and SNP ranking is achieved through an EM algorithm that can handle genome-wide markers efficiently. When we applied GPA to jointly analyze five psychiatric disorders with annotation information, not only did GPA identify many weak signals missed by the traditional single phenotype analysis, but it also revealed relationships in the genetic architecture of these disorders. Using our hypothesis testing framework, statistically significant pleiotropic effects were detected among these psychiatric disorders, and the markers annotated in the central nervous system genes and eQTLs from the Genotype-Tissue Expression (GTEx) database were significantly enriched. We also applied GPA to a bladder cancer GWAS data set with the ENCODE DNase-seq data from 125 cell lines. GPA was able to detect cell lines that are biologically more relevant to bladder cancer. The R implementation of GPA is currently available at http://dongjunchung.github.io/GPA/.",2014-11-13 +28651291,"Development of a Reference Standard Library of Chloroplast Genome Sequences, GenomeTrakrCP.","Precise, species-level identification of plants in foods and dietary supplements is difficult. While the use of DNA barcoding regions (short regions of DNA with diagnostic utility) has been effective for many inquiries, it is not always a robust approach for closely related species, especially in highly processed products. The use of fully sequenced chloroplast genomes, as an alternative to short diagnostic barcoding regions, has demonstrated utility for closely related species. The U. S. Food and Drug Administration (FDA) has also developed species-specific DNA-based assays targeting plant species of interest by utilizing chloroplast genome sequences. Here, we introduce a repository of complete chloroplast genome sequences called GenomeTrakrCP, which will be publicly available at the National Center for Biotechnology Information (NCBI). Target species for inclusion are plants found in foods and dietary supplements, toxin producers, common contaminants and adulterants, and their close relatives. Publicly available data will include annotated assemblies, raw sequencing data, and voucher information with each NCBI accession associated with an authenticated reference herbarium specimen. To date, 40 complete chloroplast genomes have been deposited in GenomeTrakrCP (https://www.ncbi.nlm.nih.gov/bioproject/PRJNA325670/), and this will be expanded in the future.",2017-06-26 +25099887,GHOSTX: an improved sequence homology search algorithm using a query suffix array and a database suffix array.,"DNA sequences are translated into protein coding sequences and then further assigned to protein families in metagenomic analyses, because of the need for sensitivity. However, huge amounts of sequence data create the problem that even general homology search analyses using BLASTX become difficult in terms of computational cost. We designed a new homology search algorithm that finds seed sequences based on the suffix arrays of a query and a database, and have implemented it as GHOSTX. GHOSTX achieved approximately 131-165 times acceleration over a BLASTX search at similar levels of sensitivity. GHOSTX is distributed under the BSD 2-clause license and is available for download at http://www.bi.cs.titech.ac.jp/ghostx/. Currently, sequencing technology continues to improve, and sequencers are increasingly producing larger and larger quantities of data. This explosion of sequence data makes computational analysis with contemporary tools more difficult. We offer this tool as a potential solution to this problem.",2014-08-06 +25800162,PSOVina: The hybrid particle swarm optimization algorithm for protein-ligand docking.,"Protein-ligand docking is an essential step in modern drug discovery process. The challenge here is to accurately predict and efficiently optimize the position and orientation of ligands in the binding pocket of a target protein. In this paper, we present a new method called PSOVina which combined the particle swarm optimization (PSO) algorithm with the efficient Broyden-Fletcher-Goldfarb-Shannon (BFGS) local search method adopted in AutoDock Vina to tackle the conformational search problem in docking. Using a diverse data set of 201 protein-ligand complexes from the PDBbind database and a full set of ligands and decoys for four representative targets from the directory of useful decoys (DUD) virtual screening data set, we assessed the docking performance of PSOVina in comparison to the original Vina program. Our results showed that PSOVina achieves a remarkable execution time reduction of 51-60% without compromising the prediction accuracies in the docking and virtual screening experiments. This improvement in time efficiency makes PSOVina a better choice of a docking tool in large-scale protein-ligand docking applications. Our work lays the foundation for the future development of swarm-based algorithms in molecular docking programs. PSOVina is freely available to non-commercial users at http://cbbio.cis.umac.mo .",2015-02-10 +26556651,"Finding the Subcellular Location of Barley, Wheat, Rice and Maize Proteins: The Compendium of Crop Proteins with Annotated Locations (cropPAL).","Barley, wheat, rice and maize provide the bulk of human nutrition and have extensive industrial use as agricultural products. The genomes of these crops each contains >40,000 genes encoding proteins; however, the major genome databases for these species lack annotation information of protein subcellular location for >80% of these gene products. We address this gap, by constructing the compendium of crop protein subcellular locations called crop Proteins with Annotated Locations (cropPAL). Subcellular location is most commonly determined by fluorescent protein tagging of live cells or mass spectrometry detection in subcellular purifications, but can also be predicted from amino acid sequence or protein expression patterns. The cropPAL database collates 556 published studies, from >300 research institutes in >30 countries that have been previously published, as well as compiling eight pre-computed subcellular predictions for all Hordeum vulgare, Triticum aestivum, Oryza sativa and Zea mays protein sequences. The data collection including metadata for proteins and published studies can be accessed through a search portal http://crop-PAL.org. The subcellular localization information housed in cropPAL helps to depict plant cells as compartmentalized protein networks that can be investigated for improving crop yield and quality, and developing new biotechnological solutions to agricultural challenges.",2015-11-09 +28691494,Two New Tools for Glycopeptide Analysis Researchers: A Glycopeptide Decoy Generator and a Large Data Set of Assigned CID Spectra of Glycopeptides.,"The glycopeptide analysis field is tightly constrained by a lack of effective tools that translate mass spectrometry data into meaningful chemical information, and perhaps the most challenging aspect of building effective glycopeptide analysis software is designing an accurate scoring algorithm for MS/MS data. We provide the glycoproteomics community with two tools to address this challenge. The first tool, a curated set of 100 expert-assigned CID spectra of glycopeptides, contains a diverse set of spectra from a variety of glycan types; the second tool, Glycopeptide Decoy Generator, is a new software application that generates glycopeptide decoys de novo. We developed these tools so that emerging methods of assigning glycopeptides' CID spectra could be rigorously tested. Software developers or those interested in developing skills in expert (manual) analysis can use these tools to facilitate their work. We demonstrate the tools' utility in assessing the quality of one particular glycopeptide software package, GlycoPep Grader, which assigns glycopeptides to CID spectra. We first acquired the set of 100 expert assigned CID spectra; then, we used the Decoy Generator (described herein) to generate 20 decoys per target glycopeptide. The assigned spectra and decoys were used to test the accuracy of GlycoPep Grader's scoring algorithm; new strengths and weaknesses were identified in the algorithm using this approach. Both newly developed tools are freely available. The software can be downloaded at http://glycopro.chem.ku.edu/GPJ.jar.",2017-07-25 +22142560,A comparative transcriptome analysis reveals expression profiles conserved across three Eimeria spp. of domestic fowl and associated with multiple developmental stages.,"Coccidiosis of the domestic fowl is a worldwide disease caused by seven species of protozoan parasites of the genus Eimeria. The genome of the model species, Eimeria tenella, presents a complexity of 55-60MB distributed in 14 chromosomes. Relatively few studies have been undertaken to unravel the complexity of the transcriptome of Eimeria parasites. We report here the generation of more than 45,000 open reading frame expressed sequence tag (ORESTES) cDNA reads of E. tenella, Eimeria maxima and Eimeria acervulina, covering several developmental stages: unsporulated oocysts, sporoblastic oocysts, sporulated oocysts, sporozoites and second generation merozoites. All reads were assembled to constitute gene indices and submitted to a comprehensive functional annotation pipeline. In the case of E. tenella, we also incorporated publicly available ESTs to generate an integrated body of information. Orthology analyses have identified genes conserved across different apicomplexan parasites, as well as genes restricted to the genus Eimeria. Digital expression profiles obtained from ORESTES/EST countings, submitted to clustering analyses, revealed a high conservation pattern across the three Eimeria spp. Distance trees showed that unsporulated and sporoblastic oocysts constitute a distinct clade in all species, with sporulated oocysts forming a more external branch. This latter stage also shows a close relationship with sporozoites, whereas first and second generation merozoites are more closely related to each other than to sporozoites. The profiles were unambiguously associated with the distinct developmental stages and strongly correlated with the order of the stages in the parasite life cycle. Finally, we present The Eimeria Transcript Database (http://www.coccidia.icb.usp.br/eimeriatdb), a website that provides open access to all sequencing data, annotation and comparative analysis. We expect this repository to represent a useful resource to the Eimeria scientific community, helping to define potential candidates for the development of new strategies to control coccidiosis of the domestic fowl.",2011-11-22 +29103698,Development of a multilocus sequence typing scheme for Rhodococcus equi.,"Rhodococcus equi causes pulmonary and extrapulmonary infections in animals and humans, with endemic situations and significant young foal mortality in stud farms worldwide. Despite its economic impact in the horse-breeding industry, the broad geographic and host distribution, global diversity and population structure of R. equi remain poorly characterised. In this context, we developed a multilocus sequence typing (MLST) scheme using 89 clinical and environmental R. equi of various origins and eight Rhodococcus sp. Data can be accessed at http://pubmlst.org/rhodococcus/. A clonal R. equi population was observed with 16 out of 37 sequence types (STs) grouped into six clonal complexes (CC) based on single-locus variants. One of the six CCs (CC3) is not host-specific, suggesting potential exchanges between different R. equi reservoirs. Most of the virulent equine R. equi CCs/unlinked STs were plasmid-type-specific. Despite this, marked genetic variability with the circulation of multiple R. equi genotypes was generally observed even within the same animal. Focusing on outbreaks, data indicated (i) the potential contagious transmission of R. equi during the 2012-Mayotte equine outbreak because of the poor genotype diversity of clinical strains; (ii) a potential porcine outbreak among the 30 Belgian farms investigated in 2013. This first Rhodococcus equi MLST is a powerful tool for further epidemiological investigations and population biology studies of R. equi isolates.",2017-08-19 +28918151,Internal transcribed spacer (ITS) sequencing reveals considerable fungal diversity in dairy products.,"Fungi are important spoilage organisms in dairy products. However, little is known about the diversity of naturally occurring spoilage fungi in raw milk and processed dairy products, due at least in part to the fact that classical fungal identification methods require considerable expertise. To gain further insight into the fungal diversity in the dairy system, we isolated fungi from raw milk, raw and pasteurized milk cheese, and yogurt using the selective dichloran rose bengal chloramphenicol agar. In total, 361 fungal isolates were obtained and further characterized by DNA sequencing of the internal transcribed spacer (ITS) region and the nuclear ribosomal large subunit (LSU) rRNA gene if needed. We conducted BLAST (https://blast.ncbi.nlm.nih.gov/Blast.cgi) searches of the ITS region sequences against the UNITE Database (https://unite.ut.ee/analysis.php), and selected other databases if needed, which allowed identification to the species level of 183 isolates and to the genus level of 107 of the 346 isolates that were successfully ITS sequenced. The isolates characterized represented 3 phyla and 19 genera; the most common genera isolated were Penicillium (25% of isolates), Debaryomyces (18%), and Candida (9%). This study not only provides, by using modern molecular tools, a baseline understanding of the types of fungi in dairy products, but also confirms that ITS sequencing is a useful approach for identification of fungal organisms found in the dairy food chain.",2017-09-13 +29316780,PSEN1 p.Met233Val in a Complex Neurodegenerative Movement and Neuropsychiatric Disorder.,"Mutations in presenilin 1 (PSEN1) are the most common cause of autosomal dominant Alzheimer's disease. Here, we report a Canadian-Vietnamese family carrying a PSEN1 p.Met233Val mutation with an exceptionally early and severe presentation that includes a wide range of atypical symptoms, including prominent ataxia, Parkinsonism, spasticity, dystonia, action tremor, myoclonus, bulbar symptoms, seizures, hallucinations and behavioral changes. Whole-exome sequencing (WES) was performed on the affected proband after many assessments over several years proved diagnostically inconclusive. The results were analyzed using the AnnEx ""Annotated Exomes"" browser (http://annex.can.ubc.ca), a web-based platform that facilitates WES variant annotation and interpretation. High-throughput sequencing can be especially informative for complex neurological disorders, and WES warrants consideration as a first-line clinical test. Data analyses facilitated by web-based bioinformatics tools have great potential for novel insight, although confirmatory, diagnostically accredited Sanger sequencing is recommended prior to reporting.",2018-01-11 +29675032,FSPP: A Tool for Genome-Wide Prediction of smORF-Encoded Peptides and Their Functions.,"smORFs are small open reading frames of less than 100 codons. Recent low throughput experiments showed a lot of smORF-encoded peptides (SEPs) played crucial rule in processes such as regulation of transcription or translation, transportation through membranes and the antimicrobial activity. In order to gather more functional SEPs, it is necessary to have access to genome-wide prediction tools to give profound directions for low throughput experiments. In this study, we put forward a functional smORF-encoded peptides predictor (FSPP) which tended to predict authentic SEPs and their functions in a high throughput method. FSPP used the overlap of detected SEPs from Ribo-seq and mass spectrometry as target objects. With the expression data on transcription and translation levels, FSPP built two co-expression networks. Combing co-location relations, FSPP constructed a compound network and then annotated SEPs with functions of adjacent nodes. Tested on 38 sequenced samples of 5 human cell lines, FSPP successfully predicted 856 out of 960 annotated proteins. Interestingly, FSPP also highlighted 568 functional SEPs from these samples. After comparison, the roles predicted by FSPP were consistent with known functions. These results suggest that FSPP is a reliable tool for the identification of functional small peptides. FSPP source code can be acquired at https://www.bioinfo.org/FSPP.",2018-04-05 +35658165,Surgery versus primary endocrine therapy for operable primary breast cancer in elderly women (70 years plus).,"

Background

Several studies have evaluated the clinical effectiveness of endocrine therapy alone in women aged 70 years or over with operable breast cancer and who are fit for surgery.

Objectives

To systematically review the evidence for the clinical effectiveness of surgery (with or without adjuvant endocrine therapy) in comparison to primary endocrine therapy in the treatment of operable breast cancer in women aged 70 years and over, both in terms of local progression and mortality.

Search methods

We conducted an updated search of the Cochrane Breast Cancer Group's Specialised Register (27th March 2013) and new searches of the Cochrane Central Register of Controlled Trials (CENTRAL, 2013, Issue 3), MEDLINE, EMBASE, the World Health Organization's International Clinical Trials Registry Platform (apps.who.int/trialsearch/) and www.

Clinicaltrials

gov, using the search terms 'early breast cancer', 'endocrine therapy', 'psychosocial' or 'surgery'.

Selection criteria

Randomised trials comparing surgery, with or without adjuvant endocrine therapy, to primary endocrine therapy in the management of women aged 70 years or over with early breast cancer and who were fit for surgery.

Data collection and analysis

We assessed studies for eligibility and quality, and two review authors independently extracted data from published trials. We derived hazard ratios for time-to-event outcomes, where possible, and used a fixed-effect model for meta-analysis. We extracted toxicity and quality-of-life data, where present. Where outcome data were not available, we contacted trialists and requested unpublished data.

Main results

We identified seven eligible trials, of which six had published time-to-event data and one was published only in abstract form with no usable data. The quality of the allocation concealment was adequate in three studies and unclear in the remainder. In each case the endocrine therapy used was tamoxifen. Data, based on an estimated 1081 deaths in 1571 women, did not show a statistically significant difference in favour of either surgery or primary endocrine therapy in respect of overall survival. However, there was a statistically significant difference in terms of progression-free survival, which favoured surgery with (474 participants) or without endocrine therapy (164 participants). The hazard ratios (HRs) for overall survival were: HR 0.98 (95% confidence interval (CI) 0.81 to 1.20, P = 0.85; 3 trials, 495 participants) for surgery alone versus primary endocrine therapy; HR 0.86 (95% CI 0.73 to 1.00, P = 0.06; 3 trials, 1076 participants) for surgery plus endocrine therapy versus primary endocrine therapy. The HRs for progression-free survival were: HR 0.55 (95% CI 0.39 to 0.77, P = 0.0006) for surgery alone versus primary endocrine therapy; HR 0.65 (95% CI 0.53 to 0.81, P = 0.0001) for surgery plus endocrine therapy versus primary endocrine therapy (each comparison based on only one trial). Tamoxifen-related adverse effects included hot flushes, skin rash, vaginal discharge, indigestion, breast pain, sleepiness, headache, vertigo, itching, hair loss, cystitis, acute thrombophlebitis, nausea, and indigestion. Surgery-related adverse effects included paraesthesia on the ipsilateral arm and lateral thoracic wall in those who had axillary clearance. One study suggested that those undergoing surgery suffered more psychosocial morbidity at three months post-surgery, although this difference had disappeared by two years.

Authors' conclusions

Primary endocrine therapy should only be offered to women with oestrogen receptor (ER)-positive tumours who are unfit for surgery, at increased risk of serious surgical or anaesthetic complications if subjected to surgery, or who refuse surgery. In a cohort of women with significant co-morbid disease and ER-positive tumours it is possible that primary endocrine therapy may be a superior option to surgery. Trials are needed to evaluate the clinical effectiveness of aromatase inhibitors as primary therapy for an infirm older population with ER-positive tumours.",2014-05-19 +28090199,MASTR-MS: a web-based collaborative laboratory information management system (LIMS) for metabolomics.,"

Background

An increasing number of research laboratories and core analytical facilities around the world are developing high throughput metabolomic analytical and data processing pipelines that are capable of handling hundreds to thousands of individual samples per year, often over multiple projects, collaborations and sample types. At present, there are no Laboratory Information Management Systems (LIMS) that are specifically tailored for metabolomics laboratories that are capable of tracking samples and associated metadata from the beginning to the end of an experiment, including data processing and archiving, and which are also suitable for use in large institutional core facilities or multi-laboratory consortia as well as single laboratory environments.

Results

Here we present MASTR-MS, a downloadable and installable LIMS solution that can be deployed either within a single laboratory or used to link workflows across a multisite network. It comprises a Node Management System that can be used to link and manage projects across one or multiple collaborating laboratories; a User Management System which defines different user groups and privileges of users; a Quote Management System where client quotes are managed; a Project Management System in which metadata is stored and all aspects of project management, including experimental setup, sample tracking and instrument analysis, are defined, and a Data Management System that allows the automatic capture and storage of raw and processed data from the analytical instruments to the LIMS.

Conclusion

MASTR-MS is a comprehensive LIMS solution specifically designed for metabolomics. It captures the entire lifecycle of a sample starting from project and experiment design to sample analysis, data capture and storage. It acts as an electronic notebook, facilitating project management within a single laboratory or a multi-node collaborative environment. This software is being developed in close consultation with members of the metabolomics research community. It is freely available under the GNU GPL v3 licence and can be accessed from, https://muccg.github.io/mastr-ms/.",2016-12-27 +25935546,u-CARE: user-friendly Comprehensive Antibiotic resistance Repository of Escherichia coli.,"

Background and aims

Despite medical advancements, Escherichia coli-associated infections remain a major public health concern and although an abundant information about E. coli and its antibiotic resistance mechanisms is available, no effective tool exists that integrates gene and genomic data in context to drug resistance, thus raising a need to develop a repository that facilitates integration and assimilation of factors governing drug resistance in E. coli.

Descriptions

User-friendly Comprehensive Antibiotic resistance Repository of Escherichia coli (u-CARE) is a manually curated catalogue of 52 antibiotics with reported resistance, 107 genes, transcription factors and single nucleotide polymorphism (SNPs) involved in multiple drug resistance of this pathogen. Each gene page provides detailed information about its resistance mechanisms, while antibiotic page consists of summary, chemical description and structural descriptors with links to external public databases like GO, CDD, DEG, Ecocyc, KEGG, Drug Bank, PubChem and UniProt. Moreover, the database integrates this reductive information to holistic data such as strain-specific and segment-specific pathogenic islands and operons. In addition, the database offers rich user interface for the visualisation and retrieval of information using various search criteria such as sequence, keyword, image and class search.

Conclusions

u-CARE is aimed to cater to the needs of researchers working in the field of antimicrobial drug resistance with minimal knowledge of bioinformatics. This database is also intended as a guide book to medical practitioners to avoid use of antibiotics against which resistance has already been reported in E. coli. The database is available from: http://www.e-bioinformatics.net/ucare.",2015-05-02 +29379098,DNA structure at the plasmid origin-of-transfer indicates its potential transfer range.,"Horizontal gene transfer via plasmid conjugation enables antimicrobial resistance (AMR) to spread among bacteria and is a major health concern. The range of potential transfer hosts of a particular conjugative plasmid is characterised by its mobility (MOB) group, which is currently determined based on the amino acid sequence of the plasmid-encoded relaxase. To facilitate prediction of plasmid MOB groups, we have developed a bioinformatic procedure based on analysis of the origin-of-transfer (oriT), a merely 230 bp long non-coding plasmid DNA region that is the enzymatic substrate for the relaxase. By computationally interpreting conformational and physicochemical properties of the oriT region, which facilitate relaxase-oriT recognition and initiation of nicking, MOB groups can be resolved with over 99% accuracy. We have shown that oriT structural properties are highly conserved and can be used to discriminate among MOB groups more efficiently than the oriT nucleotide sequence. The procedure for prediction of MOB groups and potential transfer range of plasmids was implemented using published data and is available at http://dnatools.eu/MOB/plasmid.html .",2018-01-29 +26853435,Comparative analysis of Cu (I)-catalyzed alkyne-azide cycloaddition (CuAAC) and strain-promoted alkyne-azide cycloaddition (SPAAC) in O-GlcNAc proteomics.,"O-linked β-N-acetylglucosamine (O-GlcNAc) is emerging as an essential protein post-translational modification in a range of organisms. It is involved in various cellular processes such as nutrient sensing, protein degradation, gene expression, and is associated with many human diseases. Despite its importance, identifying O-GlcNAcylated proteins is a major challenge in proteomics. Here, using peracetylated N-azidoacetylglucosamine (Ac4 GlcNAz) as a bioorthogonal chemical handle, we described a gel-based mass spectrometry method for the identification of proteins with O-GlcNAc modification in A549 cells. In addition, we made a labeling efficiency comparison between two modes of azide-alkyne bioorthogonal reactions in click chemistry: copper-catalyzed azide-alkyne cycloaddition (CuAAC) with Biotin-Diazo-Alkyne and stain-promoted azide-alkyne cycloaddition (SPAAC) with Biotin-DIBO-Alkyne. After conjugation with click chemistry in vitro and enrichment via streptavidin resin, proteins with O-GlcNAc modification were separated by SDS-PAGE and identified with mass spectrometry. Proteomics data analysis revealed that 229 putative O-GlcNAc modified proteins were identified with Biotin-Diazo-Alkyne conjugated sample and 188 proteins with Biotin-DIBO-Alkyne conjugated sample, among which 114 proteins were overlapping. Interestingly, 74 proteins identified from Biotin-Diazo-Alkyne conjugates and 46 verified proteins from Biotin-DIBO-Alkyne conjugates could be found in the O-GlcNAc modified proteins database dbOGAP (http://cbsb.lombardi.georgetown.edu/hulab/OGAP.html). These results suggested that CuAAC with Biotin-Diazo-Alkyne represented a more powerful method in proteomics with higher protein identification and better accuracy compared to SPAAC. The proteomics credibility was also confirmed by the molecular function and cell component gene ontology (GO). Together, the method we reported here combining metabolic labeling, click chemistry, affinity-based enrichment, SDS-PAGE separation, and mass spectrometry, would be adaptable for other post-translationally modified proteins in proteomics.",2016-03-01 +28035029,geneAttribution: trait agnostic identification of candidate genes associated with noncoding variation.,"

Motivation

We have developed geneAttribution, an R package that assigns candidate causal gene(s) to a risk variant identified by a genetic association study such as a GWAS. The method combines user-supplied functional annotation such as expression quantitative trait loci (eQTL) or Hi-C genome conformation data and reports the most likely candidate genes. In the absence of annotation data, geneAttribution relies on the distances between the genes and the input variant.

Availability and implementation

The package is freely available from http://www.bioconductor.org/ . A quick-start vignette is included with the package.

Contact

wustera@gene.com.",2017-02-01 +27797782,ImageJ-MATLAB: a bidirectional framework for scientific image analysis interoperability.,"

Summary

ImageJ-MATLAB is a lightweight Java library facilitating bi-directional interoperability between MATLAB and ImageJ. By defining a standard for translation between matrix and image data structures, researchers are empowered to select the best tool for their image-analysis tasks.

Availability and implementation

Freely available extension to ImageJ2 ( http://imagej.net/Downloads ). Installation and use instructions available at http://imagej.net/MATLAB_Scripting. Tested with ImageJ 2.0.0-rc-54 , Java 1.8.0_66 and MATLAB R2015b.

Contact

eliceiri@wisc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +29036400,Prediction and modeling of pre-analytical sampling errors as a strategy to improve plasma NMR metabolomics data.,"

Motivation

Biobanks are important infrastructures for life science research. Optimal sample handling regarding e.g. collection and processing of biological samples is highly complex, with many variables that could alter sample integrity and even more complex when considering multiple study centers or using legacy samples with limited documentation on sample management. Novel means to understand and take into account such variability would enable high-quality research on archived samples.

Results

This study investigated whether pre-analytical sample variability could be predicted and reduced by modeling alterations in the plasma metabolome, measured by NMR, as a function of pre-centrifugation conditions (1-36 h pre-centrifugation delay time at 4 °C and 22 °C) in 16 individuals. Pre-centrifugation temperature and delay times were predicted using random forest modeling and performance was validated on independent samples. Alterations in the metabolome were modeled at each temperature using a cluster-based approach, revealing reproducible effects of delay time on energy metabolism intermediates at both temperatures, but more pronounced at 22 °C. Moreover, pre-centrifugation delay at 4 °C resulted in large, specific variability at 3 h, predominantly of lipids. Pre-analytical sample handling error correction resulted in significant improvement of data quality, particularly at 22 °C. This approach offers the possibility to predict pre-centrifugation delay temperature and time in biobanked samples before use in costly downstream applications. Moreover, the results suggest potential to decrease the impact of undesired, delay-induced variability. However, these findings need to be validated in multiple, large sample sets and with analytical techniques covering a wider range of the metabolome, such as LC-MS.

Availability and implementation

The sampleDrift R package is available at https://gitlab.com/CarlBrunius/sampleDrift.

Contact

carl.brunius@chalmers.se.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +26589635,Brassica database (BRAD) version 2.0: integrating and mining Brassicaceae species genomic resources. ,"The Brassica database (BRAD) was built initially to assist users apply Brassica rapa and Arabidopsis thaliana genomic data efficiently to their research. However, many Brassicaceae genomes have been sequenced and released after its construction. These genomes are rich resources for comparative genomics, gene annotation and functional evolutionary studies of Brassica crops. Therefore, we have updated BRAD to version 2.0 (V2.0). In BRAD V2.0, 11 more Brassicaceae genomes have been integrated into the database, namely those of Arabidopsis lyrata, Aethionema arabicum, Brassica oleracea, Brassica napus, Camelina sativa, Capsella rubella, Leavenworthia alabamica, Sisymbrium irio and three extremophiles Schrenkiella parvula, Thellungiella halophila and Thellungiella salsuginea. BRAD V2.0 provides plots of syntenic genomic fragments between pairs of Brassicaceae species, from the level of chromosomes to genomic blocks. The Generic Synteny Browser (GBrowse_syn), a module of the Genome Browser (GBrowse), is used to show syntenic relationships between multiple genomes. Search functions for retrieving syntenic and non-syntenic orthologs, as well as their annotation and sequences are also provided. Furthermore, genome and annotation information have been imported into GBrowse so that all functional elements can be visualized in one frame. We plan to continually update BRAD by integrating more Brassicaceae genomes into the database. Database URL: http://brassicadb.org/brad/.",2015-11-20 +,SNPknow: a web server for functional annotation of cattle SNP markers,"Single nucleotide polymorphisms (SNP) microarray technology provides new insights to identify the genetic factors associated with the traits of interest. To meet the immediate need for a framework of genome-wide association study (GWAS), we have developed SNPknow, a suite of CGI-based tools that provide enrichment analysis and functional annotation for cattle SNP markers and allow the users to navigate and analysis large sets of high-dimensional data from the gene ontology (GO) annotation systems. SNPknow is the only web server currently providing functional annotations of cattle SNP markers in three commercial platforms and dbSNP database. The web server may be particularly beneficial for the analysis of combining SNP association analysis with the gene set enrichment analysis and is freely available at http://klab.sjtu.edu.cn/SNPknow.",2011-06-01 +27531214,Cross-species Conservation of context-specific networks.,"

Background

Many large data compendia on context-specific high-throughput genomic and regulatory data have been made available by international research consortia such as ENCODE, TCGA, and Epigenomics Roadmap. The use of these resources is impaired by the sheer size of the available big data and big metadata. Many of these context-specific data can be modeled as data derived regulatory networks (DDRNs) representing the complex and complicated interactions between transcription factors and target genes. These DDRNs are useful for the understanding of regulatory mechanisms and helpful for interpreting biomedical data.

Results

The Cross-species Conservation framework (CroCo) provides a network-oriented view on the ENCODE regulatory data (CroCo network repository), convenient ways to access and browse networks and metadata, and a method to combine networks across compendia, experimental techniques, and species (CroCo tool suite). DDRNs can be combined with additional information and networks derived from the literature, curated resources, and computational predictions in order to enable detailed exploration and cross checking of regulatory interactions. Applications of the CroCo framework range from simple evidence look-up for user-defined regulatory interactions to the identification of conserved sub-networks in diverse cell-lines, conditions, and even species.

Conclusion

CroCo adds an intuitive unifying view on the data from the ENCODE projects via a comprehensive repository of derived context-specific regulatory networks and enables flexible cross-context, cross-species, and cross-compendia comparison via a basis set of analysis tools. The CroCo web-application and Cytoscape plug-in are freely available at: http://services.bio.ifi.lmu.de/croco-web . The web-page links to a detailed system description, a user guide, and tutorial videos presenting common use cases of the CroCo framework.",2016-08-17 +25234927,circBase: a database for circular RNAs.,"Recently, several laboratories have reported thousands of circular RNAs (circRNAs) in animals. Numerous circRNAs are highly stable and have specific spatiotemporal expression patterns. Even though a function for circRNAs is unknown, these features make circRNAs an interesting class of RNAs as possible biomarkers and for further research. We developed a database and website, ""circBase,"" where merged and unified data sets of circRNAs and the evidence supporting their expression can be accessed, downloaded, and browsed within the genomic context. circBase also provides scripts to identify known and novel circRNAs in sequencing data. The database is freely accessible through the web server at http://www.circbase.org/.",2014-09-18 +29633452,"Research protocol: The initiation, design and establishment of the Global Angelman Syndrome Registry.","BACKGROUND:Angelman syndrome (AS) is a rare neurodevelopmental disorder affecting between 1 in 15 000 and 1 in 24 000 individuals. The condition results in severe developmental and expressive language delays, motor impairments and a unique behavioural phenotype consisting of excessive laughter, smiling and sociability. While many studies have contributed knowledge about the causes and natural history of the syndrome, large scale longitudinal studies are required to advance research and therapeutics for this rare syndrome. METHOD:This article describes the protocol for the Global Angelman Syndrome Registry, and some initial findings. Due to the rarity of AS and the variability in symptom presentation, the registry team will strive for complete case ascertainment. Parents and caregivers will submit data to the registry via a secure internet connection. The registry consists of 10 modules that cover patient demographics; developmental, diagnostic, medical and surgical history, behaviour and development, epilepsy, medications and interventions and sleep. RESULTS:Since its launch at https://angelmanregistry.info in September 2016, almost 470 individuals with AS have been signed up to the registry worldwide: 59% are from North and South America, 23% are from Europe, 17% are from the Asia Pacific region and 1% are from the Middle East or Africa. The majority of registrants are children, with only 16% aged over 20 years. Most participants indicated a chromosome deletion (76%), with fewer participants indicating a mutation, uniparental disomy or imprinting defect (20%). CONCLUSION:Findings indicate a need to consider recruitment strategies that target caregivers of older children and adults, and parents and caregivers from non-English speaking backgrounds.",2018-05-01 +26218982,Complete mitochondrial genome database and standardized classification system for Canis lupus familiaris.,"To contribute to the complete mitogenome database of the species Canis lupus familiaris and shed more light on its origin, we have sequenced mitochondrial genomes of 120 modern dogs from worldwide populations. Together with all the previously published mitogenome sequences of acceptable quality, we have reconstructed a global phylogenetic tree of 555 C. l. familiaris mitogenomes and standardized haplogroup nomenclature. The phylogenetic tree presented here and available online at http://clf.mtdna.tree.cm.umk.pl/ could be further used by forensic and evolutionary geneticists as well cynologists, for data quality control and unambiguous haplogroup classification. Our in-depth phylogeographic analysis of all C. l. familiaris mitogenomes confirmed that domestic dogs may have originated in East Asia during the Mesolithic and Upper Paleolithic time periods and started to expand to other parts of the world during Neolithic times.",2015-07-17 +30004235,The Piezo1 cation channel mediates uterine artery shear stress mechanotransduction and vasodilation during rat pregnancy.,"During mammalian pregnancy, the uterine circulation must undergo substantial vasodilation and growth to maintain sufficient uteroplacental perfusion. Although we and others have shown that nitric oxide (NO) is a key mediator of these processes, the mechanisms that augment uterine artery NO signaling during gestation have not been identified. We hypothesized that Piezo1, a recently discovered cation channel, may be involved in the process of shear stress mechanotransduction, as other studies have shown that it is both mechanosensitive and linked to NO production. Surprisingly, there are no studies on Piezo1 in the uterine circulation. Our aims in the present study were to determine whether this novel channel is 1) present in uterine arteries, 2) regulated by gestation, 3) functionally relevant (able to elicit rises in intracellular Ca2+ concentration and vasodilation), and 4) linked to NO. Immunohistochemistry confirmed that Piezo1 is present in uterine arteries, primarily but not exclusively in endothelial cells. Western blot analysis showed that its protein expression was elevated during gestation. In pressurized main uterine arteries, pharmacological activation of Piezo1 by Yoda1 produced near maximal vasodilation and was associated with significant increases in intracellular Ca2+ concentration in endothelial cell sheets. Shear stress induced by intraluminal flow produced reversible vasodilations that were inhibited >50% by GsMTx-4, a Piezo1 inhibitor, and by Nω-nitro-l-arginine methyl ester/ Nω-nitro-l-arginine, inhibitors of NO synthase. These findings are the first to implicate a functional role for Piezo1 in the uterine circulation as a mechanosensor of endothelial shear stress. Moreover, our data demonstrate that Piezo1 activation leads to vasodilation via NO and indicate that its molecular expression is upregulated during pregnancy. NEW & NOTEWORTHY This is the first study to highlight Piezo1 in the uterine circulation. As a potentially important endothelial mechanosensor of shear stress, Piezo1 may be linked to mechanisms that support increased uteroplacental perfusion during pregnancy. Listen to this article's corresponding podcast at https://ajpheart.podbean.com/e/piezo1-mechanotransduction-in-the-uterine-circulation/ .",2018-07-13 +29716964,Nuclear Receptor CAR Suppresses GADD45B-p38 MAPK Signaling to Promote Phenobarbital-induced Proliferation in Mouse Liver.,"Phenobarbital, a nongenotoxic hepatocarcinogen, induces hepatic proliferation and promotes development of hepatocellular carcinoma (HCC) in rodents. Nuclear receptor constitutive active/androstane receptor (NR1I3/CAR) regulates the induction and promotion activities of phenobarbital. Here, it is demonstrated that phenobarbital treatment results in dephosphorylation of a tumor suppressor p38 MAPK in the liver of C57BL/6 and C3H/HeNCrlBR mice. The molecular mechanism entails CAR binding and inhibition of the growth arrest and DNA-damage-inducible 45 beta (GADD45B)-MAPK kinase 6 (MKK6) scaffold to repress phosphorylation of p38 MAPK. Phenobarbital-induced hepatocyte proliferation, as determined by BrdUrd incorporation, was significantly reduced in both male and female livers of GADD45B knockout (KO) mice compared with the wild-type mice. The phenobarbital-induced proliferation continued until 48 hours after phenobarbital injection in only the C57BL/6 males, but neither in males of GADD45B KO mice nor in females of C57BL/6 and GADD45B KO mice. Thus, these data reveal nuclear receptor CAR interacts with GADD45B to repress p38 MAPK signaling and elicit hepatocyte proliferation in male mice.Implications: This GADD45B-regulated male-predominant proliferation can be expanded as a phenobarbital promotion signal of HCC development in future studies.Visual Overview: http://mcr.aacrjournals.org/content/molcanres/16/8/1309/F1.large.jpg Mol Cancer Res; 16(8); 1309-18. ©2018 AACR.",2018-05-01 +29415140,Analysis of the presence of erroneous Qnr sequences in GenBank.,"Background:Twenty years ago the first transferable mechanism of quinolone resistance (TMQR), QnrA, was described. Thereafter, innumerable TMQRs, either Qnr related or not, were described. Ten years ago the exponential description of Qnr genes/alleles led to the proposal of a common nomenclature. Objectives:This analysis aims to determine the degree of correctness of the Qnr sequences currently present in GenBank. Methods:The Qnr amino acid type sequence of the first allele (e.g. QnrA1) of each Qnr family present in http://www.lahey.org/qnrStudies/ was compared with what is present in GenBank. Only the first 30 obtained annealings or those with a >90% identity were considered. No synthetic or chromosomal sequences (other than those included in http://www.lahey.org/qnrStudies/) were included in the analyses. Results:Overall, 1657 amino acid sequences were analysed: 224 QnrA, 499 QnrB, 1 QnrC, 102 QnrD, 13 QnrE, 758 QnrS and 60 QnrVC. Of these, 340 (20.5%) sequences presented a major error, including erroneous gene name, erroneous Qnr family attribution, erroneous allele identification, presence of partial sequences with allele assignation and/or erroneous initial codon. In addition, 449 (27.1%) Qnr sequences were present in GenBank with a partial identification or not identified as Qnr. Finally, nine new transferable Qnr alleles were detected. Conclusions:These data highlight the frequent presence of erroneously identified qnr genes in GenBank and the need to be fully adherent to current nomenclature rules.",2018-05-01 +30155360,Global mapping of potential natural vegetation: an assessment of machine learning algorithms for estimating land potential.,"Potential natural vegetation (PNV) is the vegetation cover in equilibrium with climate, that would exist at a given location if not impacted by human activities. PNV is useful for raising public awareness about land degradation and for estimating land potential. This paper presents results of assessing machine learning algorithms-neural networks (nnet package), random forest (ranger), gradient boosting (gbm), K-nearest neighborhood (class) and Cubist-for operational mapping of PNV. Three case studies were considered: (1) global distribution of biomes based on the BIOME 6000 data set (8,057 modern pollen-based site reconstructions), (2) distribution of forest tree taxa in Europe based on detailed occurrence records (1,546,435 ground observations), and (3) global monthly fraction of absorbed photosynthetically active radiation (FAPAR) values (30,301 randomly-sampled points). A stack of 160 global maps representing biophysical conditions over land, including atmospheric, climatic, relief, and lithologic variables, were used as explanatory variables. The overall results indicate that random forest gives the overall best performance. The highest accuracy for predicting BIOME 6000 classes (20) was estimated to be between 33% (with spatial cross-validation) and 68% (simple random sub-setting), with the most important predictors being total annual precipitation, monthly temperatures, and bioclimatic layers. Predicting forest tree species (73) resulted in mapping accuracy of 25%, with the most important predictors being monthly cloud fraction, mean annual and monthly temperatures, and elevation. Regression models for FAPAR (monthly images) gave an R-square of 90% with the most important predictors being total annual precipitation, monthly cloud fraction, CHELSA bioclimatic layers, and month of the year, respectively. Further developments of PNV mapping could include using all GBIF records to map the global distribution of plant species at different taxonomic levels. This methodology could also be extended to dynamic modeling of PNV, so that future climate scenarios can be incorporated. Global maps of biomes, FAPAR and tree species at one km spatial resolution are available for download via http://dx.doi.org/10.7910/DVN/QQHCIK.",2018-08-22 +22527514,The Protein Structure Initiative Structural Biology Knowledgebase Technology Portal: a structural biology web resource.,"The Technology Portal of the Protein Structure Initiative Structural Biology Knowledgebase (PSI SBKB; http://technology.sbkb.org/portal/ ) is a web resource providing information about methods and tools that can be used to relieve bottlenecks in many areas of protein production and structural biology research. Several useful features are available on the web site, including multiple ways to search the database of over 250 technological advances, a link to videos of methods on YouTube, and access to a technology forum where scientists can connect, ask questions, get news, and develop collaborations. The Technology Portal is a component of the PSI SBKB ( http://sbkb.org ), which presents integrated genomic, structural, and functional information for all protein sequence targets selected by the Protein Structure Initiative. Created in collaboration with the Nature Publishing Group, the SBKB offers an array of resources for structural biologists, such as a research library, editorials about new research advances, a featured biological system each month, and a functional sleuth for searching protein structures of unknown function. An overview of the various features and examples of user searches highlight the information, tools, and avenues for scientific interaction available through the Technology Portal.",2012-04-06 +29281004,Computational identification of binding energy hot spots in protein-RNA complexes using an ensemble approach.,"Motivation:Identifying RNA-binding residues, especially energetically favored hot spots, can provide valuable clues for understanding the mechanisms and functional importance of protein-RNA interactions. Yet, limited availability of experimentally recognized energy hot spots in protein-RNA crystal structures leads to the difficulties in developing empirical identification approaches. Computational prediction of RNA-binding hot spot residues is still in its infant stage. Results:Here, we describe a computational method, PrabHot (Prediction of protein-RNA binding hot spots), that can effectively detect hot spot residues on protein-RNA binding interfaces using an ensemble of conceptually different machine learning classifiers. Residue interaction network features and new solvent exposure characteristics are combined together and selected for classification with the Boruta algorithm. In particular, two new reference datasets (benchmark and independent) have been generated containing 107 hot spots from 47 known protein-RNA complex structures. In 10-fold cross-validation on the training dataset, PrabHot achieves promising performances with an AUC score of 0.86 and a sensitivity of 0.78, which are significantly better than that of the pioneer RNA-binding hot spot prediction method HotSPRing. We also demonstrate the capability of our proposed method on the independent test dataset and gain a competitive advantage as a result. Availability and implementation:The PrabHot webserver is freely available at http://denglab.org/PrabHot/. Contact:leideng@csu.edu.cn. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-05-01 +26578582,Enhanced annotations and features for comparing thousands of Pseudomonas genomes in the Pseudomonas genome database.,"The Pseudomonas Genome Database (http://www.pseudomonas.com) is well known for the application of community-based annotation approaches for producing a high-quality Pseudomonas aeruginosa PAO1 genome annotation, and facilitating whole-genome comparative analyses with other Pseudomonas strains. To aid analysis of potentially thousands of complete and draft genome assemblies, this database and analysis platform was upgraded to integrate curated genome annotations and isolate metadata with enhanced tools for larger scale comparative analysis and visualization. Manually curated gene annotations are supplemented with improved computational analyses that help identify putative drug targets and vaccine candidates or assist with evolutionary studies by identifying orthologs, pathogen-associated genes and genomic islands. The database schema has been updated to integrate isolate metadata that will facilitate more powerful analysis of genomes across datasets in the future. We continue to place an emphasis on providing high-quality updates to gene annotations through regular review of the scientific literature and using community-based approaches including a major new Pseudomonas community initiative for the assignment of high-quality gene ontology terms to genes. As we further expand from thousands of genomes, we plan to provide enhancements that will aid data visualization and analysis arising from whole-genome comparative studies including more pan-genome and population-based approaches.",2015-11-17 +29986029,Consumption of Coffee but Not of Other Caffeine-Containing Beverages Reduces the Risk of End-Stage Renal Disease in the Singapore Chinese Health Study.,"

Background

Cross-sectional studies suggest that coffee drinking is associated with better renal function. However, to our knowledge, no prospective study has examined its relation with the risk of end-stage renal disease (ESRD).

Objective

We examined the relations between coffee, tea, soda, and total caffeine consumption and the risk of ESRD among middle-aged and older Chinese in Singapore.

Methods

We used data from the Singapore Chinese Health Study, a prospective cohort of 63,257 men and women aged 45-74 y at recruitment from 1993 to 1998. Baseline information on the consumption of caffeinated coffee and other caffeinated beverages (tea and sodas), habitual diet, medical history, and lifestyle factors was obtained via in-person interviews. The standard serving size of 1 cup was assigned as 237 mL in the questionnaire. Incident ESRD cases were identified via linkage with the nationwide registry. We used multivariable Cox regression models to estimate HRs and 95% CIs of ESRD risk associated with the consumption of caffeinated beverages, with adjustment for potential confounders.

Results

After a mean follow-up of 16.8 y, 1143 cohort subjects developed ESRD. Compared with those who drank coffee less than daily, the HR (95% CI) was 0.91 (0.79, 1.05) for those who drank 1 cup of coffee/d and 0.82 (0.71, 0.96) for those who drank ≥2 cups/d (P-trend = 0.012). When stratified by sex, this association was observed in men but not in women. Compared with those who drank less than daily, the HR (95% CI) for drinking ≥2 cups/d was 0.71 (0.57, 0.87) among men and 0.97 (0.78, 1.19) among women (P-interaction = 0.03). Conversely, intakes of tea, soda, or total caffeine were not associated with the risk of ESRD in multivariable models.

Conclusion

The consumption of ≥2 cups of coffee/d may reduce the risk of ESRD in the general population, especially among men. This study was registered at http://www.clinicaltrials.gov as NCT03356340.",2018-08-01 +28680265,RapaNet: A Web Tool for the Co-Expression Analysis of Brassica rapa Genes.,"Accumulated microarray data are used for assessing gene function by providing statistical values for co-expressed genes; however, only a limited number of Web tools are available for analyzing the co-expression of genes of Brassica rapa. We have developed a Web tool called RapaNet (http://bioinfo.mju.ac.kr/arraynet/brassica300k/query/), which is based on a data set of 143 B rapa microarrays compiled from various organs and at different developmental stages during exposure to biotic or abiotic stress. RapaNet visualizes correlated gene expression information via correlational networks and phylogenetic trees using Pearson correlation coefficient (r). In addition, RapaNet provides hierarchical clustering diagrams, scatterplots of log ratio intensities, related pathway maps, and cis-element lists of promoter regions. To ascertain the functionality of RapaNet, the correlated genes encoding ribosomal protein (L7Ae), photosystem II protein D1 (psbA), and cytochrome P450 monooxygenase in glucosinolate biosynthesis (CYP79F1) were retrieved from RapaNet and compared with their Arabidopsis homologues. An analysis of the co-expressed genes revealed their shared and unique features.",2017-06-19 +28572075,Estimating Inorganic Arsenic Exposure from U.S. Rice and Total Water Intakes.,"

Background

Among nonoccupationally exposed U.S. residents, drinking water and diet are considered primary exposure pathways for inorganic arsenic (iAs). In drinking water, iAs is the primary form of arsenic (As), while dietary As speciation techniques are used to differentiate iAs from less toxic arsenicals in food matrices.

Objectives

Our goal was to estimate the distribution of iAs exposure rates from drinking water intakes and rice consumption in the U.S. population and ethnic- and age-based subpopulations.

Methods

The distribution of iAs in drinking water was estimated by population, weighting the iAs concentrations for each drinking water utility in the Second Six-Year Review data set. To estimate the distribution of iAs concentrations in rice ingested by U.S. consumers, 54 grain-specific, production-weighted composites of rice obtained from U.S. mills were extracted and speciated using both a quantitative dilute nitric acid extraction and speciation (DNAS) and an in vitro gastrointestinal assay to provide an upper bound and bioaccessible estimates, respectively. Daily drinking water intake and rice consumption rate distributions were developed using data from the What We Eat in America (WWEIA) study.

Results

Using these data sets, the Stochastic Human Exposure and Dose Simulation (SHEDS) model estimated mean iAs exposures from drinking water and rice were 4.2 μg/day and 1.4 μg/day, respectively, for the entire U.S. population. The Tribal, Asian, and Pacific population exhibited the highest mean daily exposure of iAs from cooked rice (2.8 μg/day); the mean exposure rate for children between ages 1 and 2 years in this population is 0.104 μg/kg body weight (BW)/day.

Conclusions

An average consumer drinking 1.5 L of water daily that contains between 2 and 3 ng iAs/mL is exposed to approximately the same amount of iAs as a mean Tribal, Asian, and Pacific consumer is exposed to from rice. https://doi.org/10.1289/EHP418.

Background

Among nonoccupationally exposed U.S. residents, drinking water and diet are considered primary exposure pathways for inorganic arsenic (iAs). In drinking water, iAs is the primary form of arsenic (As), while dietary As speciation techniques are used to differentiate iAs from less toxic arsenicals in food matrices.

Objectives

Our goal was to estimate the distribution of iAs exposure rates from drinking water intakes and rice consumption in the U.S. population and ethnic- and age-based subpopulations.

Methods

The distribution of iAs in drinking water was estimated by population, weighting the iAs concentrations for each drinking water utility in the Second Six-Year Review data set. To estimate the distribution of iAs concentrations in rice ingested by U.S. consumers, 54 grain-specific, production-weighted composites of rice obtained from U.S. mills were extracted and speciated using both a quantitative dilute nitric acid extraction and speciation (DNAS) and an in vitro gastrointestinal assay to provide an upper bound and bioaccessible estimates, respectively. Daily drinking water intake and rice consumption rate distributions were developed using data from the What We Eat in America (WWEIA) study.

Results

Using these data sets, the Stochastic Human Exposure and Dose Simulation (SHEDS) model estimated mean iAs exposures from drinking water and rice were [Formula: see text] and [Formula: see text], respectively, for the entire U.S. population. The Tribal, Asian, and Pacific population exhibited the highest mean daily exposure of iAs from cooked rice ([Formula: see text]); the mean exposure rate for children between ages 1 and 2 years in this population is [Formula: see text] body weight (BW)/day.

Conclusions

An average consumer drinking 1.5 L of water daily that contains between 2 and [Formula: see text] is exposed to approximately the same amount of iAs as a mean Tribal, Asian, and Pacific consumer is exposed to from rice. https://doi.org/10.1289/EHP418.",2017-05-30 +28886750,Evidence-based gene models for structural and functional annotations of the oil palm genome.,"

Background

Oil palm is an important source of edible oil. The importance of the crop, as well as its long breeding cycle (10-12 years) has led to the sequencing of its genome in 2013 to pave the way for genomics-guided breeding. Nevertheless, the first set of gene predictions, although useful, had many fragmented genes. Classification and characterization of genes associated with traits of interest, such as those for fatty acid biosynthesis and disease resistance, were also limited. Lipid-, especially fatty acid (FA)-related genes are of particular interest for the oil palm as they specify oil yields and quality. This paper presents the characterization of the oil palm genome using different gene prediction methods and comparative genomics analysis, identification of FA biosynthesis and disease resistance genes, and the development of an annotation database and bioinformatics tools.

Results

Using two independent gene-prediction pipelines, Fgenesh++ and Seqping, 26,059 oil palm genes with transcriptome and RefSeq support were identified from the oil palm genome. These coding regions of the genome have a characteristic broad distribution of GC3 (fraction of cytosine and guanine in the third position of a codon) with over half the GC3-rich genes (GC3 ≥ 0.75286) being intronless. In comparison, only one-seventh of the oil palm genes identified are intronless. Using comparative genomics analysis, characterization of conserved domains and active sites, and expression analysis, 42 key genes involved in FA biosynthesis in oil palm were identified. For three of them, namely EgFABF, EgFABH and EgFAD3, segmental duplication events were detected. Our analysis also identified 210 candidate resistance genes in six classes, grouped by their protein domain structures.

Conclusions

We present an accurate and comprehensive annotation of the oil palm genome, focusing on analysis of important categories of genes (GC3-rich and intronless), as well as those associated with important functions, such as FA biosynthesis and disease resistance. The study demonstrated the advantages of having an integrated approach to gene prediction and developed a computational framework for combining multiple genome annotations. These results, available in the oil palm annotation database ( http://palmxplore.mpob.gov.my ), will provide important resources for studies on the genomes of oil palm and related crops.

Reviewers

This article was reviewed by Alexander Kel, Igor Rogozin, and Vladimir A. Kuznetsov.",2017-09-08 +28934093,Combined Prenatal Pesticide Exposure and Folic Acid Intake in Relation to Autism Spectrum Disorder.,"

Background

Maternal folic acid (FA) protects against developmental toxicity from certain environmental chemicals.

Objective

We examined combined exposures to maternal FA and pesticides in relation to autism spectrum disorder (ASD).

Methods

Participants were California children born from 2000-2007 who were enrolled in the Childhood Autism Risks from Genetics and the Environment (CHARGE) case-control study at age 2-5 y, were clinically confirmed to have ASD (n=296) or typical development (n=220), and had information on maternal supplemental FA and pesticide exposures. Maternal supplemental FA and household pesticide product use were retrospectively collected in telephone interviews from 2003-2011. High vs. low daily FA intake was dichotomized at 800μg (median). Mothers' addresses were linked to a statewide database of commercial applications to estimate agricultural pesticide exposure.

Results

High FA intake (≥800μg) during the first pregnancy month and no known pesticide exposure was the reference group for all analyses. Compared with this group, ASD was increased in association with <800μg FA and any indoor pesticide exposure {adjusted odds ratio [OR]=2.5 [95% confidence interval (CI): 1.3, 4.7]} compared with low FA [OR=1.2 (95% CI: 0.7, 2.2)] or indoor pesticides [OR=1.7 (95% CI: 1.1, 2.8)] alone. ORs for the combination of low FA and regular pregnancy exposure (≥6 mo) to pet pesticides or to outdoor sprays and foggers were 3.9 (95% CI: 1.4, 11.5) and 4.1 (95% CI: 1.7, 10.1), respectively. ORs for low maternal FA and agricultural pesticide exposure 3 mo before or after conception were 2.2 (95% CI: 0.7, 6.5) for chlorpyrifos, 2.3 (95% CI: 0.98, 5.3) for organophosphates, 2.1 (95% CI: 0.9, 4.8) for pyrethroids, and 1.5 (95% CI: 0.5, 4.8) for carbamates. Except for carbamates, these ORs were approximately two times greater than those for either exposure alone or for the expected ORs for combined exposures under multiplicative or additive models.

Conclusions

In this study population, associations between pesticide exposures and ASD were attenuated among those with high versus low FA intake during the first month of pregnancy. Confirmatory and mechanistic studies are needed. https://doi.org/10.1289/EHP604.",2017-09-08 +24888382,"The Allergic Airway Inflammation Repository--a user-friendly, curated resource of mRNA expression levels in studies of allergic airways.","Public microarray databases allow analysis of expression levels of candidate genes in different contexts. However, finding relevant microarray data is complicated by the large number of available studies. We have compiled a user-friendly, open-access database of mRNA microarray experiments relevant to allergic airway inflammation, the Allergic Airway Inflammation Repository (AAIR, http://aair.cimed.ike.liu.se/). The aim is to allow allergy researchers to determine the expression profile of their genes of interest in multiple clinical data sets and several experimental systems quickly and intuitively. AAIR also provides quick links to other relevant information such as experimental protocols, related literature and raw data files.",2014-06-03 +30028296,Residential Surrounding Greenness and Cognitive Decline: A 10-Year Follow-up of the Whitehall II Cohort.,"

Background

Evidence on beneficial associations of green space with cognitive function in older adults is very scarce and mainly limited to cross-sectional studies.

Objectives

We aimed to investigate the association between long-term residential surrounding greenness and cognitive decline.

Methods

This longitudinal study was based on three waves of data from the Whitehall II cohort, providing a 10-y follow-up (1997-1999 to 2007-2009) of 6,506 participants (45-68 y old) from the United Kingdom. Residential surrounding greenness was obtained across buffers of 500 and around the participants' residential addresses at each follow-up using satellite images on greenness (Normalized Difference Vegetation Index; NDVI) from a summer month in every follow-up period. Cognitive tests assessed reasoning, short-term memory, and verbal fluency. The cognitive scores were standardized and summarized in a global cognition z-score. To quantify the impact of greenness on repeated measurements of cognition, linear mixed effect models were developed that included an interaction between age and the indicator of greenness, and controlled for covariates including individual and neighborhood indicators of socioeconomic status (SES).

Results

In a fully adjusted model, an interquartile range (IQR) increase in NDVI was associated with a difference in the global cognition z-score of 0.020 [95% confidence interval (CI): 0.003, 0.037; p=0.02] in the 500-m buffer and of 0.021 (95% CI: 0.003, 0.039; p=0.02) in the 1,000-m buffer over 10 y. The associations with cognitive decline over the study period were stronger among women than among men.

Conclusions

Higher residential surrounding greenness was associated with slower cognitive decline over a 10-y follow-up period in the Whitehall II cohort of civil servants. https://doi.org/10.1289/EHP2875.",2018-07-12 +25252780,PHI-DAC: protein homology database through dihedral angle conservation.,"

Unlabelled

Finding related conformations in the Protein Data Bank is essential in many areas of bioscience. To assist this task, we designed a dihedral angle database for searching protein segment homologs. The search engine relies on encoding of the protein coordinates into text characters representing amino acid sequence, φ and ψ dihedral angles. The search engine is advantageous owing to its high speed and interactive nature and is expected to assist scientists in discovering conformation homologs and evolutionary kinship. The search engine is fast, with query times lasting a few seconds, and freely available at http://tarshish.md.biu.ac.il/∼samsona.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-23 +29220508,Model-based design of bistable cell factories for metabolic engineering.,"

Motivation

Metabolism can exhibit dynamic phenomena like bistability due to the presence of regulatory motifs like the positive feedback loop. As cell factories, microorganisms with bistable metabolism can have a high and a low product flux at the two stable steady states, respectively. The exclusion of metabolic regulation and network dynamics limits the ability of pseudo-steady state stoichiometric models to detect the presence of bistability, and reliably assess the outcomes of design perturbations to metabolic networks.

Results

Using kinetic models of metabolism, we assess the change in the bistable characteristics of the network, and suggest designs based on perturbations to the positive feedback loop to enable the network to produce at its theoretical maximum rate. We show that the most optimal production design in parameter space, for a small bistable metabolic network, may exist at the boundary of the bistable region separating it from the monostable region of low product fluxes. The results of our analysis can be broadly applied to other bistable metabolic networks with similar positive feedback network topologies. This can complement existing model-based design strategies by providing a smaller number of feasible designs that need to be tested in vivo.

Availability and implementation

http://lmse.biozone.utoronto.ca/downloads/.

Contact

krishna.mahadevan@utoronto.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-04-01 +29708963,Automated evaluation of quaternary structures from protein crystals.,"A correct assessment of the quaternary structure of proteins is a fundamental prerequisite to understanding their function, physico-chemical properties and mode of interaction with other proteins. Currently about 90% of structures in the Protein Data Bank are crystal structures, in which the correct quaternary structure is embedded in the crystal lattice among a number of crystal contacts. Computational methods are required to 1) classify all protein-protein contacts in crystal lattices as biologically relevant or crystal contacts and 2) provide an assessment of how the biologically relevant interfaces combine into a biological assembly. In our previous work we addressed the first problem with our EPPIC (Evolutionary Protein Protein Interface Classifier) method. Here, we present our solution to the second problem with a new method that combines the interface classification results with symmetry and topology considerations. The new algorithm enumerates all possible valid assemblies within the crystal using a graph representation of the lattice and predicts the most probable biological unit based on the pairwise interface scoring. Our method achieves 85% precision (ranging from 76% to 90% for different oligomeric types) on a new dataset of 1,481 biological assemblies with consensus of PDB annotations. Although almost the same precision is achieved by PISA, currently the most popular quaternary structure assignment method, we show that, due to the fundamentally different approach to the problem, the two methods are complementary and could be combined to improve biological assembly assignments. The software for the automatic assessment of protein assemblies (EPPIC version 3) has been made available through a web server at http://www.eppic-web.org.",2018-04-30 +27923875,Accounting for Protein Subcellular Localization: A Compartmental Map of the Rat Liver Proteome.,"Accurate knowledge of the intracellular location of proteins is important for numerous areas of biomedical research including assessing fidelity of putative protein-protein interactions, modeling cellular processes at a system-wide level and investigating metabolic and disease pathways. Many proteins have not been localized, or have been incompletely localized, partly because most studies do not account for entire subcellular distribution. Thus, proteins are frequently assigned to one organelle whereas a significant fraction may reside elsewhere. As a step toward a comprehensive cellular map, we used subcellular fractionation with classic balance sheet analysis and isobaric labeling/quantitative mass spectrometry to assign locations to >6000 rat liver proteins. We provide quantitative data and error estimates describing the distribution of each protein among the eight major cellular compartments: nucleus, mitochondria, lysosomes, peroxisomes, endoplasmic reticulum, Golgi, plasma membrane and cytosol. Accounting for total intracellular distribution improves quality of organelle assignments and assigns proteins with multiple locations. Protein assignments and supporting data are available online through the Prolocate website (http://prolocate.cabm.rutgers.edu). As an example of the utility of this data set, we have used organelle assignments to help analyze whole exome sequencing data from an infant dying at 6 months of age from a suspected neurodegenerative lysosomal storage disorder of unknown etiology. Sequencing data was prioritized using lists of lysosomal proteins comprising well-established residents of this organelle as well as novel candidates identified in this study. The latter included copper transporter 1, encoded by SLC31A1, which we localized to both the plasma membrane and lysosome. The patient harbors two predicted loss of function mutations in SLC31A1, suggesting that this may represent a heretofore undescribed recessive lysosomal storage disease gene.",2016-12-06 +29995627,"Physiologically Based Pharmacokinetic (PBPK) Modeling of the Bisphenols BPA, BPS, BPF, and BPAF with New Experimental Metabolic Parameters: Comparing the Pharmacokinetic Behavior of BPA with Its Substitutes.","

Background

The endocrine disrupting chemical bisphenol A (BPA) has been facing stricter regulations in recent years. BPA analogs, such as the bisphenols S, F, and AF (BPS, BPF, and BPAF) are increasingly used as replacement chemicals, although they were found to exert estrogenic effects similar to those of BPA. Research has shown that only the parent compounds have affinity to the estrogen receptors, suggesting that the pharmacokinetic behavior of bisphenols (BPs) can influence their potency.

Objectives

Our goal was to compare the pharmacokinetic behaviors of BPA, BPS, BPF, and BPAF for different age groups after environmentally relevant external exposures by taking into account substance-specific metabolism kinetics and partitioning behavior. This comparison allowed us to investigate the consequences of replacing BPA with other BPs.

Methods

We readjusted a physiologically based pharmacokinetic (PBPK) model for peroral exposure to BPA and extended it to include dermal exposure. We experimentally assessed hepatic and intestinal glucuronidation kinetics of BPS, BPF, and BPAF to parametrize the model for these BPs and calibrated the BPS model with a biomonitoring study. We used the PBPK models to compare resulting internal exposures and focused on females of childbearing age in a two-dimensional Monte Carlo uncertainty analysis.

Results

Within environmentally relevant concentration ranges, BPAF and BPS were glucuronized at highest and lowest rates, respectively, in the intestine and the liver. The predominant routes of BPS and BPAF exposure were peroral and dermal exposure, respectively. The calibration of the BPS model with measured concentrations showed that enterohepatic recirculation may be important. Assuming equal external exposures, BPS exposure led to the highest internal concentrations of unconjugated BPs.

Conclusions

Our data suggest that the replacement of BPA with structural analogs may not lower the risk for endocrine disruption. Exposure to both BPS and BPAF might be more critical than BPA exposure, if their respective estrogenic potencies are taken into account. https://doi.org/10.1289/EHP2739.",2018-07-10 +27922095,Co-expression network analyses identify functional modules associated with development and stress response in Gossypium arboreum.,"Cotton is an economically important crop, essential for the agriculture and textile industries. Through integrating transcriptomic data, we discovered that multi-dimensional co-expression network analysis was powerful for predicting cotton gene functions and functional modules. Here, the recently available transcriptomic data on Gossypium arboreum, including data on multiple growth stages of tissues and stress treatment samples were applied to construct a co-expression network exploring multi-dimensional expression (development and stress) through multi-layered approaches. Based on differential gene expression and network analysis, a fibre development regulatory module of the gene GaKNL1 was found to regulate the second cell wall through repressing the activity of REVOLUTA, and a tissue-selective module of GaJAZ1a was examined in response to water stress. Moreover, comparative genomics analysis of the JAZ1-related regulatory module revealed high conservation across plant species. In addition, 1155 functional modules were identified through integrating the co-expression network, module classification and function enrichment tools, which cover functions such as metabolism, stress responses, and transcriptional regulation. In the end, an online platform was built for network analysis (http://structuralbiology.cau.edu.cn/arboreum), which could help to refine the annotation of cotton gene function and establish a data mining system to identify functional genes or modules with important agronomic traits.",2016-12-06 +27443354,SubsMatch 2.0: Scanpath comparison and classification based on subsequence frequencies.,"Our eye movements are driven by a continuous trade-off between the need for detailed examination of objects of interest and the necessity to keep an overview of our surrounding. In consequence, behavioral patterns that are characteristic for our actions and their planning are typically manifested in the way we move our eyes to interact with our environment. Identifying such patterns from individual eye movement measurements is however highly challenging. In this work, we tackle the challenge of quantifying the influence of experimental factors on eye movement sequences. We introduce an algorithm for extracting sequence-sensitive features from eye movements and for the classification of eye movements based on the frequencies of small subsequences. Our approach is evaluated against the state-of-the art on a novel and a very rich collection of eye movements data derived from four experimental settings, from static viewing tasks to highly dynamic outdoor settings. Our results show that the proposed method is able to classify eye movement sequences over a variety of experimental designs. The choice of parameters is discussed in detail with special focus on highlighting different aspects of general scanpath shape. Algorithms and evaluation data are available at: http://www.ti.uni-tuebingen.de/scanpathcomparison.html .",2017-06-01 +27503226,shinyGEO: a web-based application for analyzing gene expression omnibus datasets.,"The Gene Expression Omnibus (GEO) is a public repository of gene expression data. Although GEO has its own tool, GEO2R, for data analysis, evaluation of single genes is not straightforward and survival analysis in specific GEO datasets is not possible without bioinformatics expertise. We describe a web application, shinyGEO, that allows a user to download gene expression data sets directly from GEO in order to perform differential expression and survival analysis for a gene of interest. In addition, shinyGEO supports customized graphics, sample selection, data export and R code generation so that all analyses are reproducible. The availability of shinyGEO makes GEO datasets more accessible to non-bioinformaticians, promising to lead to better understanding of biological processes and genetic diseases such as cancer.

Availability and implementation

Web application and source code are available from http://gdancik.github.io/shinyGEO/ CONTACT: dancikg@easternct.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-08 +29327339,Electronic registry for the management of childhood obesity in Greece. ,"Obesity in childhood and adolescence represents a major health problem in our century. In Greece, more than 30%-35% of children and adolescents are either overweight or obese. Using information and communication technologies, we developed a ""National Registry for the Prevention and Management of Overweight and Obesity in Childhood and Adolescence"" for guidance and training of Pediatricians and General Practitioners. The application supports interoperability with other national infrastructures and multi-layered security spanning preventive, detective and administrative controls. The Patient Summary Dataset includes information on medical history, family history, medications, immunizations, clinical examination and laboratory findings and appointment booking service. The application was launched in September 2015 and is accessible by: http://app.childhood-obesity.gr/. Based on the data that the doctor registers, the system calculates a personalized therapeutic algorithm that provides information on diet, physical exercise and sleep, as well as guidance on laboratory investigations and referral to specialized centres. A pilot study performed in 1270 children and adolescents indicated that using this system resulted in a reduction in obesity rates by 30% and overweight rates by 35% within 1 year. This National e-Health System appears to be effective in the management of overweight and obesity in childhood and adolescence.",2018-02-11 +28205675,BCFtools/csq: haplotype-aware variant consequences.,"

Motivation

Prediction of functional variant consequences is an important part of sequencing pipelines, allowing the categorization and prioritization of genetic variants for follow up analysis. However, current predictors analyze variants as isolated events, which can lead to incorrect predictions when adjacent variants alter the same codon, or when a frame-shifting indel is followed by a frame-restoring indel. Exploiting known haplotype information when making consequence predictions can resolve these issues.

Results

BCFtools/csq is a fast program for haplotype-aware consequence calling which can take into account known phase. Consequence predictions are changed for 501 of 5019 compound variants found in the 81.7M variants in the 1000 Genomes Project data, with an average of 139 compound variants per haplotype. Predictions match existing tools when run in localized mode, but the program is an order of magnitude faster and requires an order of magnitude less memory.

Availability and implementation

The program is freely available for commercial and non-commercial use in the BCFtools package which is available for download from http://samtools.github.io/bcftools .

Contact

pd3@sanger.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +27035433,Identifying Cancer Subtypes from miRNA-TF-mRNA Regulatory Networks and Expression Data.,"

Background

Identifying cancer subtypes is an important component of the personalised medicine framework. An increasing number of computational methods have been developed to identify cancer subtypes. However, existing methods rarely use information from gene regulatory networks to facilitate the subtype identification. It is widely accepted that gene regulatory networks play crucial roles in understanding the mechanisms of diseases. Different cancer subtypes are likely caused by different regulatory mechanisms. Therefore, there are great opportunities for developing methods that can utilise network information in identifying cancer subtypes.

Results

In this paper, we propose a method, weighted similarity network fusion (WSNF), to utilise the information in the complex miRNA-TF-mRNA regulatory network in identifying cancer subtypes. We firstly build the regulatory network where the nodes represent the features, i.e. the microRNAs (miRNAs), transcription factors (TFs) and messenger RNAs (mRNAs) and the edges indicate the interactions between the features. The interactions are retrieved from various interatomic databases. We then use the network information and the expression data of the miRNAs, TFs and mRNAs to calculate the weight of the features, representing the level of importance of the features. The feature weight is then integrated into a network fusion approach to cluster the samples (patients) and thus to identify cancer subtypes. We applied our method to the TCGA breast invasive carcinoma (BRCA) and glioblastoma multiforme (GBM) datasets. The experimental results show that WSNF performs better than the other commonly used computational methods, and the information from miRNA-TF-mRNA regulatory network contributes to the performance improvement. The WSNF method successfully identified five breast cancer subtypes and three GBM subtypes which show significantly different survival patterns. We observed that the expression patterns of the features in some miRNA-TF-mRNA sub-networks vary across different identified subtypes. In addition, pathway enrichment analyses show that the top pathways involving the most differentially expressed genes in each of the identified subtypes are different. The results would provide valuable information for understanding the mechanisms characterising different cancer subtypes and assist the design of treatment therapies. All datasets and the R scripts to reproduce the results are available online at the website: http://nugget.unisa.edu.au/Thuc/cancersubtypes/.",2016-04-01 +25627242,Genome-wide study of correlations between genomic features and their relationship with the regulation of gene expression.,"The broad class of tasks in genetics and epigenetics can be reduced to the study of various features that are distributed over the genome (genome tracks). The rapid and efficient processing of the huge amount of data stored in the genome-scale databases cannot be achieved without the software packages based on the analytical criteria. However, strong inhomogeneity of genome tracks hampers the development of relevant statistics. We developed the criteria for the assessment of genome track inhomogeneity and correlations between two genome tracks. We also developed a software package, Genome Track Analyzer, based on this theory. The theory and software were tested on simulated data and were applied to the study of correlations between CpG islands and transcription start sites in the Homo sapiens genome, between profiles of protein-binding sites in chromosomes of Drosophila melanogaster, and between DNA double-strand breaks and histone marks in the H. sapiens genome. Significant correlations between transcription start sites on the forward and the reverse strands were observed in genomes of D. melanogaster, Caenorhabditis elegans, Mus musculus, H. sapiens, and Danio rerio. The observed correlations may be related to the regulation of gene expression in eukaryotes. Genome Track Analyzer is freely available at http://ancorr.eimb.ru/.",2015-01-27 +24504151,"Southern African Treatment Resistance Network (SATuRN) RegaDB HIV drug resistance and clinical management database: supporting patient management, surveillance and research in southern Africa.","Substantial amounts of data have been generated from patient management and academic exercises designed to better understand the human immunodeficiency virus (HIV) epidemic and design interventions to control it. A number of specialized databases have been designed to manage huge data sets from HIV cohort, vaccine, host genomic and drug resistance studies. Besides databases from cohort studies, most of the online databases contain limited curated data and are thus sequence repositories. HIV drug resistance has been shown to have a great potential to derail the progress made thus far through antiretroviral therapy. Thus, a lot of resources have been invested in generating drug resistance data for patient management and surveillance purposes. Unfortunately, most of the data currently available relate to subtype B even though >60% of the epidemic is caused by HIV-1 subtype C. A consortium of clinicians, scientists, public health experts and policy markers working in southern Africa came together and formed a network, the Southern African Treatment and Resistance Network (SATuRN), with the aim of increasing curated HIV-1 subtype C and tuberculosis drug resistance data. This article describes the HIV-1 data curation process using the SATuRN Rega database. The data curation is a manual and time-consuming process done by clinical, laboratory and data curation specialists. Access to the highly curated data sets is through applications that are reviewed by the SATuRN executive committee. Examples of research outputs from the analysis of the curated data include trends in the level of transmitted drug resistance in South Africa, analysis of the levels of acquired resistance among patients failing therapy and factors associated with the absence of genotypic evidence of drug resistance among patients failing therapy. All these studies have been important for informing first- and second-line therapy. This database is a free password-protected open source database available on www.bioafrica.net. Database URL: http://www.bioafrica.net/regadb/",2014-02-06 +22032181,BioNØT: a searchable database of biomedical negated sentences.,"

Background

Negated biomedical events are often ignored by text-mining applications; however, such events carry scientific significance. We report on the development of BioNØT, a database of negated sentences that can be used to extract such negated events.

Description

Currently BioNØT incorporates ≈32 million negated sentences, extracted from over 336 million biomedical sentences from three resources: ≈2 million full-text biomedical articles in Elsevier and the PubMed Central, as well as ≈20 million abstracts in PubMed. We evaluated BioNØT on three important genetic disorders: autism, Alzheimer's disease and Parkinson's disease, and found that BioNØT is able to capture negated events that may be ignored by experts.

Conclusions

The BioNØT database can be a useful resource for biomedical researchers. BioNØT is freely available at http://bionot.askhermes.org/. In future work, we will develop semantic web related technologies to enrich BioNØT.",2011-10-27 +30181178,Elevation of Stromal-Derived Mediators of Inflammation Promote Prostate Cancer Progression in African-American Men.,"Progress in prostate cancer racial disparity research has been hampered by a lack of appropriate research tools and better understanding of the tumor biology. Recent gene expression studies suggest that the tumor microenvironment (TME) may contribute to racially disparate clinical outcomes in prostate cancer. Analysis of the prostate TME has shown increased reactive stroma associated with chronic inflammatory infiltrates in African-American (AA) compared with European-American (EA) patients with prostate cancer. To better understand stromal drivers of changes in TME, we isolated prostate fibroblasts (PrF) from AA (PrF-AA) and EA (PrF-EA) prostate cancer tissues and studied their functional characteristics. PrF-AA showed increased growth response to androgens FGF2 and platelet-derived growth factor. Compared with PrF-EA, conditioned media from PrF-AA significantly enhanced the proliferation and motility of prostate cancer cell lines. Expression of markers associated with myofibroblast activation (αSMA, vimentin, and tenascin-C) was elevated in PrF-AA In vivo tumorigenicity of an AA patient-derived prostatic epithelial cell line E006AA was significantly increased in the presence of PrF-AA compared with PrF-EA, and RNA-seq data and cytokine array analysis identified a panel of potential proinflammatory paracrine mediators (BDNF, CHI3L1, DPPIV, FGF7, IL18BP, IL6, and VEGF) to be enriched in PrF-AA E006AA cell lines showed increased responsiveness to BDNF ligand compared with EA-derived LNCaP and C4-2B cells. Addition of a TrkB-specific antagonist significantly reduced the protumorigenic effects induced by PrF-AA compared with PrF-EA These findings suggest that fibroblasts in the TME of AA patients may contribute to the health disparity observed in the incidence and progression of prostate cancer tumors.Significance: These findings suggest that stromal cells in the tumor microenvironment of African-American men promote progression of prostate cancer by increasing levels of a specific set of pro-inflammatory molecules compared with European-American men.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/21/6134/F1.large.jpg Cancer Res; 78(21); 6134-45. ©2018 AACR.",2018-09-04 +29601923,Evaluation and Management of Testosterone Deficiency: AUA Guideline.,"PURPOSE:There has been a marked increase in testosterone prescriptions in the past decade resulting in a growing need to give practicing clinicians proper guidance on the evaluation and management of the testosterone deficient patient. MATERIALS AND METHODS:A systematic review utilized research from the Mayo Clinic Evidence Based Practice Center and additional supplementation by the authors. Evidence-based statements were based on body of evidence strength Grade A, B, or C and were designated as Strong, Moderate, and Conditional Recommendations with additional statements presented in the form of Clinical Principles or Expert Opinions (table 1 in supplementary unabridged guideline, http://jurology.com/). RESULTS:This guideline was developed by a multi-disciplinary panel to inform clinicians on the proper assessment of patients with testosterone deficiency and the safe and effective management of men on testosterone therapy. Additional statements were developed to guide the clinician on the appropriate care of patients who are at risk for or have cardiovascular disease or prostate cancer as well as patients who are interested in preserving fertility. CONCLUSIONS:The care of testosterone deficient patients should focus on accurate assessment of total testosterone levels, symptoms, and signs as well as proper on-treatment monitoring to ensure therapeutic testosterone levels are reached and symptoms are ameliorated. Future longitudinal observational studies and clinical trials of significant duration in this space will improve diagnostic techniques and treatment of men with testosterone deficiency as well as provide more data on the adverse events that may be associated with testosterone therapy.",2018-03-28 +27553625,Stepwise iterative maximum likelihood clustering approach.,"

Background

Biological/genetic data is a complex mix of various forms or topologies which makes it quite difficult to analyze. An abundance of such data in this modern era requires the development of sophisticated statistical methods to analyze it in a reasonable amount of time. In many biological/genetic analyses, such as genome-wide association study (GWAS) analysis or multi-omics data analysis, it is required to cluster the plethora of data into sub-categories to understand the subtypes of populations, cancers or any other diseases. Traditionally, the k-means clustering algorithm is a dominant clustering method. This is due to its simplicity and reasonable level of accuracy. Many other clustering methods, including support vector clustering, have been developed in the past, but do not perform well with the biological data, either due to computational reasons or failure to identify clusters.

Results

The proposed SIML clustering algorithm has been tested on microarray datasets and SNP datasets. It has been compared with a number of clustering algorithms. On MLL datasets, SIML achieved highest clustering accuracy and rand score on 4/9 cases; similarly on SRBCT dataset, it got for 3/5 cases; on ALL subtype it got highest clustering accuracy for 5/7 cases and highest rand score for 4/7 cases. In addition, SIML overall clustering accuracy on a 3 cluster problem using SNP data were 97.3, 94.7 and 100 %, respectively, for each of the clusters.

Conclusions

In this paper, considering the nature of biological data, we proposed a maximum likelihood clustering approach using a stepwise iterative procedure. The advantage of this proposed method is that it not only uses the distance information, but also incorporate variance information for clustering. This method is able to cluster when data appeared in overlapping and complex forms. The experimental results illustrate its performance and usefulness over other clustering methods. A Matlab package of this method (SIML) is provided at the web-link http://www.riken.jp/en/research/labs/ims/med_sci_math/ .",2016-08-24 +29075029,Radiographic prediction of inferior alveolar nerve injury in third molar surgery.,"Data sourcesMedline, Embase, relevant dental journals, reference lists of included studies and the World Health Organisation International Clinical Trials Registry.Study selectionStudies evaluating the predictive accuracy of panoramic radiography for postoperative inferior alveolar nerve (IAN) injury reporting on at least one of the seven signs of IAN injury and providing data to calculate false-positive (FP), true-positive (TP), false-negative (FN) and true-negative (TN) proportions were included.Data extraction and synthesisData were abstracted independently by two reviewers. Positive predictive value (PPV), negative predictive value (NPV), sensitivity and specificity were extracted or calculated. Overall pooled estimates of sensitivity, specificity, positive likelihood ratio (LR+), negative likelihood ratio (LR) (LR-) and diagnostic odds ratio (DOR), with 95% confidence intervals (CIs) were calculated using a random effects model. Summary receiver operating characteristic (SROC) curves were also generated. Study quality was assessed using the Quality Assessment of Diagnostic Accuracy Studies (QUADAS-2) tool (http://www.bristol.ac.uk/social-community-medicine/projects/quadas/quadas-2/).ResultsEight studies were included. Only one study was considered to be at low risk of bias, one at low risk and the remainder at unclear risk. A summary of the pooled sensitivity, specificity and diagnostic odds ratios are shown in the table.ConclusionsFor all seven signs, the added value of panoramic radiography is too low to consider it appropriate for ruling out postoperative IAN in the decision-making before MM3 surgery. The added value of panoramic radiography for determining the presence of diversion of the canal, interruption of the white line of the canal and darkening of the root can be considered sufficient for ruling in the risk of postoperative IAN injury in the decision-making before MM3 surgery.",2017-10-01 +25817355,Deep Sequencing in Microdissected Renal Tubules Identifies Nephron Segment-Specific Transcriptomes.,"The function of each renal tubule segment depends on the genes expressed therein. High-throughput methods used for global profiling of gene expression in unique cell types have shown low sensitivity and high false positivity, thereby limiting the usefulness of these methods in transcriptomic research. However, deep sequencing of RNA species (RNA-seq) achieves highly sensitive and quantitative transcriptomic profiling by sequencing RNAs in a massive, parallel manner. Here, we used RNA-seq coupled with classic renal tubule microdissection to comprehensively profile gene expression in each of 14 renal tubule segments from the proximal tubule through the inner medullary collecting duct of rat kidneys. Polyadenylated mRNAs were captured by oligo-dT primers and processed into adapter-ligated cDNA libraries that were sequenced using an Illumina platform. Transcriptomes were identified to a median depth of 8261 genes in microdissected renal tubule samples (105 replicates in total) and glomeruli (5 replicates). Manual microdissection allowed a high degree of sample purity, which was evidenced by the observed distributions of well established cell-specific markers. The main product of this work is an extensive database of gene expression along the nephron provided as a publicly accessible webpage (https://helixweb.nih.gov/ESBL/Database/NephronRNAseq/index.html). The data also provide genome-wide maps of alternative exon usage and polyadenylation sites in the kidney. We illustrate the use of the data by profiling transcription factor expression along the renal tubule and mapping metabolic pathways.",2015-03-27 +27980506,Atomistic modelling of scattering data in the Collaborative Computational Project for Small Angle Scattering (CCP-SAS).,"The capabilities of current computer simulations provide a unique opportunity to model small-angle scattering (SAS) data at the atomistic level, and to include other structural constraints ranging from molecular and atomistic energetics to crystallography, electron microscopy and NMR. This extends the capabilities of solution scattering and provides deeper insights into the physics and chemistry of the systems studied. Realizing this potential, however, requires integrating the experimental data with a new generation of modelling software. To achieve this, the CCP-SAS collaboration (http://www.ccpsas.org/) is developing open-source, high-throughput and user-friendly software for the atomistic and coarse-grained molecular modelling of scattering data. Robust state-of-the-art molecular simulation engines and molecular dynamics and Monte Carlo force fields provide constraints to the solution structure inferred from the small-angle scattering data, which incorporates the known physical chemistry of the system. The implementation of this software suite involves a tiered approach in which GenApp provides the deployment infrastructure for running applications on both standard and high-performance computing hardware, and SASSIE provides a workflow framework into which modules can be plugged to prepare structures, carry out simulations, calculate theoretical scattering data and compare results with experimental data. GenApp produces the accessible web-based front end termed SASSIE-web, and GenApp and SASSIE also make community SAS codes available. Applications are illustrated by case studies: (i) inter-domain flexibility in two- to six-domain proteins as exemplified by HIV-1 Gag, MASP and ubiquitin; (ii) the hinge conformation in human IgG2 and IgA1 antibodies; (iii) the complex formed between a hexameric protein Hfq and mRNA; and (iv) synthetic 'bottlebrush' polymers.",2016-10-14 +27711040,Notice to Readers: Update to Reporting of Pneumonia and Influenza Mortality.,"The current issue of MMWR (week 39) will be the last to include data from the 122 Cities Mortality Reporting System (122 CMRS) in Notifiable Disease and Mortality Tables, Table III (""Deaths in 122 cities"" [http://www.cdc.gov/mmwr/volumes/65/wr/mm6539md.htm?s_cid=mm6539md_w#table-17]). Beginning in the publication for the week ending October 8, 2016 (week 40), data from the National Center for Health Statistics (NCHS) Mortality Surveillance System will replace the information reported in Table III, and the 122 Cities Mortality Reporting System (122 CMRS) will be retired. The NCHS Mortality Surveillance System provides improvements in the data, including reports by the week of death and a consistent pneumonia and influenza (P&I) case definition across all sites. These improvements, along with recent and continuing increases in the timeliness of death certificate data, have led CDC to update the P&I mortality surveillance platform from the 122 CMRS to the NCHS Mortality Surveillance System.",2016-10-07 +26633994,Transcriptional program for nitrogen starvation-induced lipid accumulation in Chlamydomonas reinhardtii.,"

Background

Algae accumulate lipids to endure different kinds of environmental stresses including macronutrient starvation. Although this response has been extensively studied, an in depth understanding of the transcriptional regulatory network (TRN) that controls the transition into lipid accumulation remains elusive. In this study, we used a systems biology approach to elucidate the transcriptional program that coordinates the nitrogen starvation-induced metabolic readjustments that drive lipid accumulation in Chlamydomonas reinhardtii.

Results

We demonstrate that nitrogen starvation triggered differential regulation of 2147 transcripts, which were co-regulated in 215 distinct modules and temporally ordered as 31 transcriptional waves. An early-stage response was triggered within 12 min that initiated growth arrest through activation of key signaling pathways, while simultaneously preparing the intracellular environment for later stages by modulating transport processes and ubiquitin-mediated protein degradation. Subsequently, central metabolism and carbon fixation were remodeled to trigger the accumulation of triacylglycerols. Further analysis revealed that these waves of genome-wide transcriptional events were coordinated by a regulatory program orchestrated by at least 17 transcriptional regulators, many of which had not been previously implicated in this process. We demonstrate that the TRN coordinates transcriptional downregulation of 57 metabolic enzymes across a period of nearly 4 h to drive an increase in lipid content per unit biomass. Notably, this TRN appears to also drive lipid accumulation during sulfur starvation, while phosphorus starvation induces a different regulatory program. The TRN model described here is available as a community-wide web-resource at http://networks.systemsbiology.net/chlamy-portal.

Conclusions

In this work, we have uncovered a comprehensive mechanistic model of the TRN controlling the transition from N starvation to lipid accumulation. The program coordinates sequentially ordered transcriptional waves that simultaneously arrest growth and lead to lipid accumulation. This study has generated predictive tools that will aid in devising strategies for the rational manipulation of regulatory and metabolic networks for better biofuel and biomass production.",2015-12-02 +26503253,rVarBase: an updated database for regulatory features of human variants.,"We present here the rVarBase database (http://rv.psych.ac.cn), an updated version of the rSNPBase database, to provide reliable and detailed regulatory annotations for known and novel human variants. This update expands the database to include additional types of human variants, such as copy number variations (CNVs) and novel variants, and include additional types of regulatory features. Now rVarBase annotates variants in three dimensions: chromatin states of the surrounding regions, overlapped regulatory elements and variants' potential target genes. Two new types of regulatory elements (lncRNAs and miRNA target sites) have been introduced to provide additional annotation. Detailed information about variants' overlapping transcription factor binding sites (TFBSs) (often less than 15 bp) within experimentally supported TF-binding regions (∼ 150 bp) is provided, along with the binding motifs of matched TF families. Additional types of extended variants and variant-associated phenotypes were also added. In addition to the enrichment in data content, an element-centric search module was added, and the web interface was refined. In summary, rVarBase hosts more types of human variants and includes more types of up-to-date regulatory information to facilitate in-depth functional research and to provide practical clues for experimental design.",2015-10-25 +22024348,HNOCDB: a comprehensive database of genes and miRNAs relevant to head and neck and oral cancer.,"In spite of the wide prevalence of head, neck and oral cancer, HNOC, there is no integrated database on genes and miRNAs associated with all the carcinoma subtypes of HNOC. The objective is to compile a multilayered and comprehensive database of HNOC as a user-friendly resource for researchers devising novel therapeutic strategies. We present HNOCDB, the head, neck and oral cancer database, with the following key features: (i) it tabulates all the different categories of HNOC separately under appropriate subtype-names, and then puts them together in a table headlined All; (ii) the oncogenes/oncomiRs that cause HNOC are listed; their mutations, methylations and polymorphisms loci are marked, and the variations in their expression profiles relative to the normal are recorded; (iii) HNOCDB contains a chromosomal map of HNOC genes and miRNA; (iv) contains references that experimentally validate the reason for the inclusion of the genes and the miRNAs in HNOCDB. HNOCDB is freely accessible for academic and non-profit users via http://gyanxet.com/hno.html.",2011-10-22 +27673369,"Web Server for Peak Detection, Baseline Correction, and Alignment in Two-Dimensional Gas Chromatography Mass Spectrometry-Based Metabolomics Data.","Two-dimensional gas chromatography time-of-flight mass spectrometry (GC×GC/TOF-MS) is superior for chromatographic separation and provides great sensitivity for complex biological fluid analysis in metabolomics. However, GC×GC/TOF-MS data processing is currently limited to vendor software and typically requires several preprocessing steps. In this work, we implement a web-based platform, which we call GC2MS, to facilitate the application of recent advances in GC×GC/TOF-MS, especially for metabolomics studies. The core processing workflow of GC2MS consists of blob/peak detection, baseline correction, and blob alignment. GC2MS treats GC×GC/TOF-MS data as pictures and clusters the pixels as blobs according to the brightness of each pixel to generate a blob table. GC2MS then aligns the blobs of two GC×GC/TOF-MS data sets according to their distance and similarity. The blob distance and similarity are the Euclidean distance of the first and second retention times of two blobs and the Pearson's correlation coefficient of the two mass spectra, respectively. GC2MS also directly corrects the raw data baseline. The analytical performance of GC2MS was evaluated using GC×GC/TOF-MS data sets of Angelica sinensis compounds acquired under different experimental conditions and of human plasma samples. The results show that GC2MS is an easy-to-use tool for detecting peaks and correcting baselines, and GC2MS is able to align GC×GC/TOF-MS data sets acquired under different experimental conditions. GC2MS is freely accessible at http://gc2ms.web.cmdm.tw .",2016-10-13 +27667791,LongISLND: in silico sequencing of lengthy and noisy datatypes.,"LongISLND is a software package designed to simulate sequencing data according to the characteristics of third generation, single-molecule sequencing technologies. The general software architecture is easily extendable, as demonstrated by the emulation of Pacific Biosciences (PacBio) multi-pass sequencing with P5 and P6 chemistries, producing data in FASTQ, H5, and the latest PacBio BAM format. We demonstrate its utility by downstream processing with consensus building and variant calling.

Availability and implementation

LongISLND is implemented in Java and available at http://bioinform.github.io/longislnd CONTACT: hugo.lam@roche.comSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-25 +25137014,"New glycoproteomics software, GlycoPep Evaluator, generates decoy glycopeptides de novo and enables accurate false discovery rate analysis for small data sets.","Glycoproteins are biologically significant large molecules that participate in numerous cellular activities. In order to obtain site-specific protein glycosylation information, intact glycopeptides, with the glycan attached to the peptide sequence, are characterized by tandem mass spectrometry (MS/MS) methods such as collision-induced dissociation (CID) and electron transfer dissociation (ETD). While several emerging automated tools are developed, no consensus is present in the field about the best way to determine the reliability of the tools and/or provide the false discovery rate (FDR). A common approach to calculate FDRs for glycopeptide analysis, adopted from the target-decoy strategy in proteomics, employs a decoy database that is created based on the target protein sequence database. Nonetheless, this approach is not optimal in measuring the confidence of N-linked glycopeptide matches, because the glycopeptide data set is considerably smaller compared to that of peptides, and the requirement of a consensus sequence for N-glycosylation further limits the number of possible decoy glycopeptides tested in a database search. To address the need to accurately determine FDRs for automated glycopeptide assignments, we developed GlycoPep Evaluator (GPE), a tool that helps to measure FDRs in identifying glycopeptides without using a decoy database. GPE generates decoy glycopeptides de novo for every target glycopeptide, in a 1:20 target-to-decoy ratio. The decoys, along with target glycopeptides, are scored against the ETD data, from which FDRs can be calculated accurately based on the number of decoy matches and the ratio of the number of targets to decoys, for small data sets. GPE is freely accessible for download and can work with any search engine that interprets ETD data of N-linked glycopeptides. The software is provided at https://desairegroup.ku.edu/research.",2014-08-28 +29017357,Automated hexahedral meshing of knee cartilage structures - application to data from the osteoarthritis initiative.,"We propose a fully automated methodology for hexahedral meshing of patient-specific structures of the human knee obtained from magnetic resonance images, i.e. femoral/tibial cartilages and menisci. We select eight patients from the Osteoarthritis Initiative and validate our methodology using MATLAB on a laptop computer. We obtain the patient-specific meshes in an average of three minutes, while faithfully representing the geometries with well-shaped elements. We hope to provide a fundamentally different means to test hypotheses on the mechanisms of disease progression by integrating our patient-specific FE meshes with data from individual patients. Download both our meshes and software at http://im.engr.uconn.edu/downloads.php .",2017-10-10 +27006731,Airway foreign bodies: A critical review for a common pediatric emergency.,"

Background

Airway foreign bodies (AFBs) is an interdisciplinary area between emergency medicine, pediatrics and otolaryngology. It is a life-threatening condition that is not infrequently seen; however, it is poorly covered in medical literature. Accidental aspiration of an element into airways is a widespread clinical scenario among children under 3 years, predominantly males. Moreover, it is the leading cause of infantile deaths and the fourth one among preschool children.

Data resources

A systemic search was conducted in July 2015 using PubMed/PubMed Central Database of The National Center for Biotechnology Information (NCBI) (http://www.ncbi.nlm.nih.gov/). A total of 1 767 articles were identified and most of them were meta-analyses, systematic reviews, and case series. Those thoroughly discussing assessment and management of AFBs were retrieved.

Results

AFBs episodes may be either witnessed or missed. Presence of a witness for the inhalation is diagnostic. The later usually present with persistent active cough. A classical triad of paroxysmal cough, wheezing, and dyspnoea/decreased air entry was reported, though many presentations have inconsistent findings. Hence, diagnosis requires high index of clinical suspicion. Flexible fibro-optic bronchoscopy is the gold standard of diagnosis, whereas inhaled objects are best retrieved by rigid bronchoscopes.

Conclusions

Close supervision of pediatrics is the hallmark of prevention. Caregivers should ensure a safe surrounding milieu, including the toys their offspring play with. Immediate complications result from direct obstruction or injury by the inhaled object. Alternatively, prolonged lodging traps air and induces inflammatory response causing atelectesis and pneumonia, respectively.",2016-01-01 +29069445,TCSBN: a database of tissue and cancer specific biological networks.,"Biological networks provide new opportunities for understanding the cellular biology in both health and disease states. We generated tissue specific integrated networks (INs) for liver, muscle and adipose tissues by integrating metabolic, regulatory and protein-protein interaction networks. We also generated human co-expression networks (CNs) for 46 normal tissues and 17 cancers to explore the functional relationships between genes as well as their relationships with biological functions, and investigate the overlap between functional and physical interactions provided by CNs and INs, respectively. These networks can be employed in the analysis of omics data, provide detailed insight into disease mechanisms by identifying the key biological components and eventually can be used in the development of efficient treatment strategies. Moreover, comparative analysis of the networks may allow for the identification of tissue-specific targets that can be used in the development of drugs with the minimum toxic effect to other human tissues. These context-specific INs and CNs are presented in an interactive website http://inetmodels.com without any limitation.",2018-01-01 +29381131,Systematic Reviews Published in the Cochrane Library April-May 2017.,"The Cochrane Library of Systematic Reviews is published monthly online ( http://www.thecochranelibrary.com ). The library currently contains 7332 complete reviews and 2520 protocols for reviews in production. In addition, there are citations of 1,055,253 randomized controlled trials and 15,764 cited papers in the Cochrane Methodology Register. The Health Technology Assessment database contains some 17,000 citations. The impact factor of the Cochrane Library stands at 6.1. This report attempted to identify all relevant reviews published in the 2 months to May 31, 2017. Eight reviews have been identified that have potential relevance for practitioners in pain and palliative medicine. Readers are encouraged to access the full report for any articles of interest, as only a brief commentary is provided.",2017-09-01 +30318837,Sodium bicarbonate supplementation does not improve elite women's team sport running or field hockey skill performance.,"Team sports, such as field hockey, incorporate high-intensity repeated sprints, interspersed with low-intensity running, which can result in acidosis. The aim of the present study was to examine the effect of acute sodium bicarbonate (SB) supplementation on team sport running and skill performance. Eight elite female field hockey players (age 23 ± 5 years, body mass 62.6 ± 8.4 kg, height 1.66 ± 0.05 m) completed three Field Hockey Skill Tests (FHST) interspersed with four sets of the Loughborough Intermittent Shuttle Test (LIST). Prior to exercise, participants were supplemented with capsules equivalent to 0.2 g·kg-1 body mass (BM) of a placebo (maltodextrin) or 0.3 g·kg-1 BM SB. Field hockey skill performance incorporated overall performance time (PFT), movement time (MT), decision-making time (DMT), and penalty time (PT). Sprint time (ST), rating of perceived exertion (RPE), blood lactate concentration, bicarbonate anion ( HCO 3 - ) concentration, pH, and base excess were measured at various time points. Data (mean ± SD) were analyzed using a two-way analysis of variance (ANOVA) with repeated measures, with Hedges g effect sizes used to interpret the magnitude of differences. Bicarbonate anion concentration (+5.4 ± 2.6 mmol·L-1 ) and pH (+0.06 ± 0.03) were greater during the bicarbonate trial compared with the placebo (P < 0.001). Bicarbonate did not alter PFT (placebo: 87.9 ± 6.9 sec; bicarbonate: 89.0 ± 7.8 sec, P = 0.544, g = 0.14), MT, DMT, PT (all P > 0.30) or ST (placebo: 2.87 ± 0.12 sec; bicarbonate: 2.86 ± 0.12 sec, P = 0.893, g = -0.08). RPE was lower during the SB condition (placebo: 13 ± 2; bicarbonate: 12 ± 2, P = 0.021, g = -0.41). Acute ingestion of bicarbonate did not improve sprint or sport-specific skill performance. Bicarbonate ingestion did result in a lower perception of effort during team-sport running, which may have performance implications in a competitive match situation.",2018-09-01 +29860529,Evolution of Pediatric Inflammatory Bowel Disease Unclassified (IBD-U): Incorporated With Serological and Gene Expression Profiles.,"Background:Inflammatory bowel disease (IBD) mainly consists of Crohn's disease (CD) and ulcerative colitis (UC). About 10%-15% of patients with IBD cannot be firmly diagnosed with CD or UC; hence, they are initially diagnosed as inflammatory bowel disease unclassified (IBD-U). Having a firm diagnosis is clearly preferred to guide treatment choices, and better understanding of the nature of IBD-U is required. Methods:We performed an analysis of a subset of pediatric subjects from an inception IBD cohort of patients initially enrolled in a prospective multicenter study (the RISK study). Initial diagnosis and 2-year follow-up data from the subjects diagnosed with IBD-U were analyzed. An expert panel verified final diagnosis using predefined criteria as a guide. Serological and disease-relevant ileal and rectal tissue gene expression profiles were investigated. The use and the time to initiate anti-TNFα treatment was analyzed among the outcome groups. Results:A total of 1411 subjects were enrolled with initial diagnosis of IBD, and among them, 136 subjects were initially diagnosed as IBD-U at enrollment. And 26% were reclassified as UC and 14% as CD within 2 years of diagnosis, while 60% remained as IBD-U. Of those who were reclassified, there was a 2:1 ratio, UC (n = 35) to CD (n = 19). The molecular and serological features of IBD-U at the end of follow-up were very similar to UC and very different from CD. There was less likelihood of receiving anti-TNFα agents if the diagnosis was IBD-U compared with CD (P < 0.0001). Conclusions:In our cohort, 60% of the IBD-U subjects remained as unclassified at 2 years; of those subsequently classified, a higher percentage followed a course more similar to UC. Most of the IBD-U subjects at diagnosis had serological and molecular signatures that are very similar to UC. Although the atypical presentations made the clinician to make an interim diagnosis of IBD-U, results of the molecular and serological factors performed at the time of diagnosis suggests that they were very similar to UC. However, long-term studies are needed to better understand the natural history and molecular characterization of pediatric onset IBD-U. 10.1093/ibd/izy136_video1Video 1.Video 1. Watch now at https://academic.oup.com/ibd/article-lookup/doi/10.1093/ibd/izy136izy136.video15791389938001.",2018-09-01 +27896960,ON THE POWER AND LIMITS OF SEQUENCE SIMILARITY BASED CLUSTERING OF PROTEINS INTO FAMILIES.,"Over the last decades, we have observed an ongoing tremendous growth of available sequencing data fueled by the advancements in wet-lab technology. The sequencing information is only the beginning of the actual understanding of how organisms survive and prosper. It is, for instance, equally important to also unravel the proteomic repertoire of an organism. A classical computational approach for detecting protein families is a sequence-based similarity calculation coupled with a subsequent cluster analysis. In this work we have intensively analyzed various clustering tools on a large scale. We used the data to investigate the behavior of the tools' parameters underlining the diversity of the protein families. Furthermore, we trained regression models for predicting the expected performance of a clustering tool for an unknown data set and aimed to also suggest optimal parameters in an automated fashion. Our analysis demonstrates the benefits and limitations of the clustering of proteins with low sequence similarity indicating that each protein family requires its own distinct set of tools and parameters. All results, a tool prediction service, and additional supporting material is also available online under http://proteinclustering.compbio.sdu.dk.",2017-01-01 +29118973,The 2017 Bioinformatics Open Source Conference (BOSC). ,"The Bioinformatics Open Source Conference (BOSC) is a meeting organized by the Open Bioinformatics Foundation (OBF), a non-profit group dedicated to promoting the practice and philosophy of Open Source software development and Open Science within the biological research community. The 18th annual BOSC ( http://www.open-bio.org/wiki/BOSC_2017) took place in Prague, Czech Republic in July 2017. The conference brought together nearly 250 bioinformatics researchers, developers and users of open source software to interact and share ideas about standards, bioinformatics software development, open and reproducible science, and this year's theme, open data. As in previous years, the conference was preceded by a two-day collaborative coding event open to the bioinformatics community, called the OBF Codefest.",2017-10-19 +27575624,OpenMS: a flexible open-source software platform for mass spectrometry data analysis.,"High-resolution mass spectrometry (MS) has become an important tool in the life sciences, contributing to the diagnosis and understanding of human diseases, elucidating biomolecular structural information and characterizing cellular signaling networks. However, the rapid growth in the volume and complexity of MS data makes transparent, accurate and reproducible analysis difficult. We present OpenMS 2.0 (http://www.openms.de), a robust, open-source, cross-platform software specifically designed for the flexible and reproducible analysis of high-throughput MS data. The extensible OpenMS software implements common mass spectrometric data processing tasks through a well-defined application programming interface in C++ and Python and through standardized open data formats. OpenMS additionally provides a set of 185 tools and ready-made workflows for common mass spectrometric data processing tasks, which enable users to perform complex quantitative mass spectrometric analyses with ease.",2016-08-01 +27624719,An Excel Spreadsheet Model for States and Districts to Assess the Cost-Benefit of School Nursing Services.,"This paper describes a user-friendly, Excel spreadsheet model and two data collection instruments constructed by the authors to help states and districts perform cost-benefit analyses of school nursing services delivered by full-time school nurses. Prior to applying the model, states or districts need to collect data using two forms: ""Daily Nurse Data Collection Form"" and the ""Teacher Survey."" The former is used to record daily nursing activities, including number of student health encounters, number of medications administered, number of student early dismissals, and number of medical procedures performed. The latter is used to obtain estimates for the time teachers spend addressing student health issues. Once inputs are entered in the model, outputs are automatically calculated, including program costs, total benefits, net benefits, and benefit-cost ratio. The spreadsheet model, data collection tools, and instructions are available at the NASN website ( http://www.nasn.org/The/CostBenefitAnalysis ).",2016-09-20 +28865861,Liver transcriptome analysis reveals important factors involved in the metabolic adaptation of the transition cow.,"During early lactation, dairy cows experience a severe metabolic load often resulting in the development of various diseases. The inevitable deficiency in nutrients and energy at the onset of lactation requires an optimal adaptation of the hepatic metabolism to overcome metabolic stress. We conducted a whole-liver transcriptome analysis for the transition cow to identify novel factors crucial for metabolic adaptation. Liver samples were obtained from 6 Red Holstein dairy cows (parity 2 to 7, mean ± standard deviation: 3.7 ± 2.3) at 3 time points: T1 = 22 ± 4 d antepartum, T2 = 10 ± 2 d postpartum, and T3 = 17 ± 2 d postpartum. Using RNA sequencing (RNA-seq), we studied the transcriptomic profile of the transition cow before and after parturition. We performed a differential gene expression analysis (DGEA) and gene-set enrichment analysis (GSEA) for biological processes (gene ontology, GO) and pathways (Kyoto Encyclopedia of Genes and Genomes, KEGG). Among the 10,186 expressed genes, we discovered 1,063 differentially expressed genes (false discovery rate = 5%). The GSEA revealed 16 biological processes and 7 pathways significantly (false discovery rate = 5%) associated with the hepatic changes of the transition cow. Our results confirm that major hepatic changes are related to energy mobilization after parturition; in particular, they are related to fatty acid oxidation/metabolism, cholesterol metabolism, and gluconeogenesis. Using the STRING database (https://string-db.org/), we investigated interactions between significant genes and identified 9 key genes (CYP7A1, APOA1, CREM, LOC522146, CYP2C87, HMGCR, FDFT1, SGLE, and CYP26A1) through which the different processes involved in the metabolic adaptation interact. Comparing our main results with the literature, we could identify further genes that have not yet been associated with the transition period (e.g., CPT1B, ADIPOR2, LEPR, CREB3L3, and CCND1) and that are mainly involved in processes controlled by AMP-activated protein kinase, an important regulator of energy homeostasis.",2017-08-31 +28859620,Orthonome - a new pipeline for predicting high quality orthologue gene sets applicable to complete and draft genomes.,"

Background

Distinguishing orthologous and paralogous relationships between genes across multiple species is essential for comparative genomic analyses. Various computational approaches have been developed to resolve these evolutionary relationships, but strong trade-offs between precision and recall of orthologue prediction remains an ongoing challenge.

Results

Here we present Orthonome, an orthologue prediction pipeline, designed to reduce the trade-off between orthologue capture rates (recall) and accuracy of multi-species orthologue prediction. The pipeline compares sequence domains and then forms sequence-similar clusters before using phylogenetic comparisons to identify inparalogues. It then corrects sequence similarity metrics for fragment and gene length bias using a novel scoring metric capturing relationships between full length as well as fragmented genes. The remaining genes are then brought together for the identification of orthologues within a phylogenetic framework. The orthologue predictions are further calibrated along with inparalogues and gene births, using synteny, to identify novel orthologous relationships. We use 12 high quality Drosophila genomes to show that, compared to other orthologue prediction pipelines, Orthonome provides orthogroups with minimal error but high recall. Furthermore, Orthonome is resilient to suboptimal assembly/annotation quality, with the inclusion of draft genomes from eight additional Drosophila species still providing >6500 1:1 orthologues across all twenty species while retaining a better combination of accuracy and recall than other pipelines. Orthonome is implemented as a searchable database and query tool along with multiple-sequence alignment browsers for all sets of orthologues. The underlying documentation and database are accessible at http://www.orthonome.com .

Conclusion

We demonstrate that Orthonome provides a superior combination of orthologue capture rates and accuracy on complete and draft drosophilid genomes when tested alongside previously published pipelines. The study also highlights a greater degree of evolutionary conservation across drosophilid species than earlier thought.",2017-08-31 +25828689,"PFR²: a curated database of planktonic foraminifera 18S ribosomal DNA as a resource for studies of plankton ecology, biogeography and evolution.","Planktonic foraminifera (Rhizaria) are ubiquitous marine pelagic protists producing calcareous shells with conspicuous morphology. They play an important role in the marine carbon cycle, and their exceptional fossil record serves as the basis for biochronostratigraphy and past climate reconstructions. A major worldwide sampling effort over the last two decades has resulted in the establishment of multiple large collections of cryopreserved individual planktonic foraminifera samples. Thousands of 18S rDNA partial sequences have been generated, representing all major known morphological taxa across their worldwide oceanic range. This comprehensive data coverage provides an opportunity to assess patterns of molecular ecology and evolution in a holistic way for an entire group of planktonic protists. We combined all available published and unpublished genetic data to build PFR(2), the Planktonic foraminifera Ribosomal Reference database. The first version of the database includes 3322 reference 18S rDNA sequences belonging to 32 of the 47 known morphospecies of extant planktonic foraminifera, collected from 460 oceanic stations. All sequences have been rigorously taxonomically curated using a six-rank annotation system fully resolved to the morphological species level and linked to a series of metadata. The PFR(2) website, available at http://pfr2.sb-roscoff.fr, allows downloading the entire database or specific sections, as well as the identification of new planktonic foraminiferal sequences. Its novel, fully documented curation process integrates advances in morphological and molecular taxonomy. It allows for an increase in its taxonomic resolution and assures that integrity is maintained by including a complete contingency tracking of annotations and assuring that the annotations remain internally consistent.",2015-04-15 +27797769,Cancer driver gene discovery through an integrative genomics approach in a non-parametric Bayesian framework.,"

Motivation

Comprehensive catalogue of genes that drive tumor initiation and progression in cancer is key to advancing diagnostics, therapeutics and treatment. Given the complexity of cancer, the catalogue is far from complete yet. Increasing evidence shows that driver genes exhibit consistent aberration patterns across multiple-omics in tumors. In this study, we aim to leverage complementary information encoded in each of the omics data to identify novel driver genes through an integrative framework. Specifically, we integrated mutations, gene expression, DNA copy numbers, DNA methylation and protein abundance, all available in The Cancer Genome Atlas (TCGA) and developed iDriver, a non-parametric Bayesian framework based on multivariate statistical modeling to identify driver genes in an unsupervised fashion. iDriver captures the inherent clusters of gene aberrations and constructs the background distribution that is used to assess and calibrate the confidence of driver genes identified through multi-dimensional genomic data.

Results

We applied the method to 4 cancer types in TCGA and identified candidate driver genes that are highly enriched with known drivers. (e.g.: P < 3.40 × 10 -36 for breast cancer). We are particularly interested in novel genes and observed multiple lines of supporting evidence. Using systematic evaluation from multiple independent aspects, we identified 45 candidate driver genes that were not previously known across these 4 cancer types. The finding has important implications that integrating additional genomic data with multivariate statistics can help identify cancer drivers and guide the next stage of cancer genomics research.

Availability and implementation

The C ++ source code is freely available at https://medschool.vanderbilt.edu/cgg/ .

Contacts

hai.yang@vanderbilt.edu or bingshan.li@Vanderbilt.Edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +27515514,COMAN: a web server for comprehensive metatranscriptomics analysis.,"

Background

Microbiota-oriented studies based on metagenomic or metatranscriptomic sequencing have revolutionised our understanding on microbial ecology and the roles of both clinical and environmental microbes. The analysis of massive metatranscriptomic data requires extensive computational resources, a collection of bioinformatics tools and expertise in programming.

Results

We developed COMAN (Comprehensive Metatranscriptomics Analysis), a web-based tool dedicated to automatically and comprehensively analysing metatranscriptomic data. COMAN pipeline includes quality control of raw reads, removal of reads derived from non-coding RNA, followed by functional annotation, comparative statistical analysis, pathway enrichment analysis, co-expression network analysis and high-quality visualisation. The essential data generated by COMAN are also provided in tabular format for additional analysis and integration with other software. The web server has an easy-to-use interface and detailed instructions, and is freely available at http://sbb.hku.hk/COMAN/ CONCLUSIONS: COMAN is an integrated web server dedicated to comprehensive functional analysis of metatranscriptomic data, translating massive amount of reads to data tables and high-standard figures. It is expected to facilitate the researchers with less expertise in bioinformatics in answering microbiota-related biological questions and to increase the accessibility and interpretation of microbiota RNA-Seq data.",2016-08-11 +,PS3-52: Heart Health in Your Pocket: Lessons Learned from the Development of a Smartphone App,"

Background/Aims

As part of the Million Hearts national initiative to prevent cardiovascular disease (CVD), there are increasing calls to leverage health information technology. The Marshfield Clinic developed a Heart Health Mobile application (app) that is designed to improve awareness of CVD risk and promote risk factor control among regional smartphone users. It deploys an engaging user interface that provides a brief CVD risk assessment that takes into account self-reported behavioral, familial, and biometric risk factors, including blood pressure and lipids. Users are then directed to nearby community pharmacies, clinics, and other locations where more advanced CVD risk factor screenings can be obtained. Along with social media connections and measurement prompts, basic education materials are provided on key CVD prevention topics such as hypertension, dyslipidemia, weight management, and tobacco cessation.

Methods

A multidisciplinary team of 24 members was created to develop the app over a 30-day timeframe. This team included a broad cross-section of clinical professionals from medicine, epidemiology, health IT, usability and graphic designers, business analytics, and marketing. An Agile programming method was used to promote adaptive planning and evolutionary development in self-organizing, cross-functional teams.

Results

The iOS app was successfully developed, tested, and launched within the 30-day timeframe. It was submitted competitively as part of the Million Hearts Risk Check Challenge, a CVD prevention app contest sponsored by Office of the National Coordinator for Health Information Technology. The final product is shown in detail at http://www.youtube.com/watch?v=qfESTQipjtw. The app was developed in six different languages, and epidemiologic data on downloads, unique users, geo-segmentation, risk factor profile, and customer loyalty, among other data points, are actively collected.

Conclusions

Health-related consumer smartphone apps can be developed rapidly and brought to scale as part of healthcare delivery systems’ business and clinical strategies. They provide users with important information, education, and directions on CVD prevention and have wide-ranging potential across numerous health conditions. From an HMORN perspective, such apps also provide real-time data collection methods that can be used to identify health trends at a lower cost (and comparable quality) relative to traditional population research methods.",2013-09-01 +28854211,Identification of metal ion binding sites based on amino acid sequences.,"The identification of metal ion binding sites is important for protein function annotation and the design of new drug molecules. This study presents an effective method of analyzing and identifying the binding residues of metal ions based solely on sequence information. Ten metal ions were extracted from the BioLip database: Zn2+, Cu2+, Fe2+, Fe3+, Ca2+, Mg2+, Mn2+, Na+, K+ and Co2+. The analysis showed that Zn2+, Cu2+, Fe2+, Fe3+, and Co2+ were sensitive to the conservation of amino acids at binding sites, and promising results can be achieved using the Position Weight Scoring Matrix algorithm, with an accuracy of over 79.9% and a Matthews correlation coefficient of over 0.6. The binding sites of other metals can also be accurately identified using the Support Vector Machine algorithm with multifeature parameters as input. In addition, we found that Ca2+ was insensitive to hydrophobicity and hydrophilicity information and Mn2+ was insensitive to polarization charge information. An online server was constructed based on the framework of the proposed method and is freely available at http://60.31.198.140:8081/metal/HomePage/HomePage.html.",2017-08-30 +29322920,Investigation and identification of functional post-translational modification sites associated with drug binding and protein-protein interactions.,"BACKGROUND:Protein post-translational modification (PTM) plays an essential role in various cellular processes that modulates the physical and chemical properties, folding, conformation, stability and activity of proteins, thereby modifying the functions of proteins. The improved throughput of mass spectrometry (MS) or MS/MS technology has not only brought about a surge in proteome-scale studies, but also contributed to a fruitful list of identified PTMs. However, with the increase in the number of identified PTMs, perhaps the more crucial question is what kind of biological mechanisms these PTMs are involved in. This is particularly important in light of the fact that most protein-based pharmaceuticals deliver their therapeutic effects through some form of PTM. Yet, our understanding is still limited with respect to the local effects and frequency of PTM sites near pharmaceutical binding sites and the interfaces of protein-protein interaction (PPI). Understanding PTM's function is critical to our ability to manipulate the biological mechanisms of protein. RESULTS:In this study, to understand the regulation of protein functions by PTMs, we mapped 25,835 PTM sites to proteins with available three-dimensional (3D) structural information in the Protein Data Bank (PDB), including 1785 modified PTM sites on the 3D structure. Based on the acquired structural PTM sites, we proposed to use five properties for the structural characterization of PTM substrate sites: the spatial composition of amino acids, residues and side-chain orientations surrounding the PTM substrate sites, as well as the secondary structure, division of acidity and alkaline residues, and solvent-accessible surface area. We further mapped the structural PTM sites to the structures of drug binding and PPI sites, identifying a total of 1917 PTM sites that may affect PPI and 3951 PTM sites associated with drug-target binding. An integrated analytical platform (CruxPTM), with a variety of methods and online molecular docking tools for exploring the structural characteristics of PTMs, is presented. In addition, all tertiary structures of PTM sites on proteins can be visualized using the JSmol program. CONCLUSION:Resolving the function of PTM sites is important for understanding the role that proteins play in biological mechanisms. Our work attempted to delineate the structural correlation between PTM sites and PPI or drug-target binding. CurxPTM could help scientists narrow the scope of their PTM research and enhance the efficiency of PTM identification in the face of big proteome data. CruxPTM is now available at http://csb.cse.yzu.edu.tw/CruxPTM/ .",2017-12-21 +25604335,Methanogenic archaea database containing physiological and biochemical characteristics.,"The methanogenic archaea are a group of micro-organisms that have developed a unique metabolic pathway for obtaining energy. There are 150 characterized species in this group; however, novel species continue to be discovered. Since methanogens are considered a crucial part of the carbon cycle in the anaerobic ecosystem, characterization of these micro-organisms is important for understanding anaerobic ecology. A methanogens database (MDB; http://metanogen.biotech.uni.wroc.pl/), including physiological and biochemical characteristics of methanogens, was constructed based on the descriptions of isolated type strains. Analysis of the data revealed that methanogens are able to grow from 0 to 122 °C. Methanogens growing at the same temperature may have very different growth rates. There is no clear correlation between the optimal growth temperature and the DNA G+C content. The following substrate preferences are observed in the database: 74.5% of archaea species utilize H2+CO2, 33% utilize methyl compounds and 8.5% utilize acetate. Utilization of methyl compounds (mainly micro-organisms belonging to the genera Methanosarcina and Methanolobus ) is seldom accompanied by an ability to utilize H2+CO2. Very often, data for described species are incomplete, especially substrate preferences. Additional research leading to completion of missing information and development of standards, especially for substrate utilization, would be very helpful.",2015-01-20 +26323714,RNASeqMetaDB: a database and web server for navigating metadata of publicly available mouse RNA-Seq datasets.,"

Unlabelled

Gene targeting is a protocol for introducing a mutation to a specific gene in an organism. Because of the importance of in vivo assessment of gene function and modeling of human diseases, this technique has been widely adopted to generate a large number of mutant mouse models. Due to the recent breakthroughs in high-throughput sequencing technologies, RNA-Seq experiments have been performed on many of these mouse models, leading to hundreds of publicly available datasets. To facilitate the reuse of these datasets, we collected the associated metadata and organized them in a database called RNASeqMetaDB. The metadata were manually curated to ensure annotation consistency. We developed a web server to allow easy database navigation and data querying. Users can search the database using multiple parameters like genes, diseases, tissue types, keywords and associated publications in order to find datasets that match their interests. Summary statistics of the metadata are also presented on the web server showing interesting global patterns of RNA-Seq studies.

Availability and implementation

Freely available on the web at http://rnaseqmetadb.ece.tamu.edu.",2015-08-30 +28057679,SPOT-ligand 2: improving structure-based virtual screening by binding-homology search on an expanded structural template library.,"

Motivation

The high cost of drug discovery motivates the development of accurate virtual screening tools. Binding-homology, which takes advantage of known protein-ligand binding pairs, has emerged as a powerful discrimination technique. In order to exploit all available binding data, modelled structures of ligand-binding sequences may be used to create an expanded structural binding template library.

Results

SPOT-Ligand 2 has demonstrated significantly improved screening performance over its previous version by expanding the template library 15 times over the previous one. It also performed better than or similar to other binding-homology approaches on the DUD and DUD-E benchmarks.

Availability and implementation

The server is available online at http://sparks-lab.org .

Contacts

yaoqi.zhou@griffith.edu.au or yuedong.yang@griffith.edu.au.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +26870952,Muscle Logic: New Knowledge Resource for Anatomy Enables Comprehensive Searches of the Literature on the Feeding Muscles of Mammals.,"

Background

In recent years large bibliographic databases have made much of the published literature of biology available for searches. However, the capabilities of the search engines integrated into these databases for text-based bibliographic searches are limited. To enable searches that deliver the results expected by comparative anatomists, an underlying logical structure known as an ontology is required.

Development and testing of the ontology

Here we present the Mammalian Feeding Muscle Ontology (MFMO), a multi-species ontology focused on anatomical structures that participate in feeding and other oral/pharyngeal behaviors. A unique feature of the MFMO is that a simple, computable, definition of each muscle, which includes its attachments and innervation, is true across mammals. This construction mirrors the logical foundation of comparative anatomy and permits searches using language familiar to biologists. Further, it provides a template for muscles that will be useful in extending any anatomy ontology. The MFMO is developed to support the Feeding Experiments End-User Database Project (FEED, https://feedexp.org/), a publicly-available, online repository for physiological data collected from in vivo studies of feeding (e.g., mastication, biting, swallowing) in mammals. Currently the MFMO is integrated into FEED and also into two literature-specific implementations of Textpresso, a text-mining system that facilitates powerful searches of a corpus of scientific publications. We evaluate the MFMO by asking questions that test the ability of the ontology to return appropriate answers (competency questions). We compare the results of queries of the MFMO to results from similar searches in PubMed and Google Scholar.

Results and significance

Our tests demonstrate that the MFMO is competent to answer queries formed in the common language of comparative anatomy, but PubMed and Google Scholar are not. Overall, our results show that by incorporating anatomical ontologies into searches, an expanded and anatomically comprehensive set of results can be obtained. The broader scientific and publishing communities should consider taking up the challenge of semantically enabled search capabilities.",2016-02-12 +29570032,Classroom-Based Physical Activity and Sedentary Behavior Interventions in Adolescents: A Systematic Review and Meta-Analysis.,"

Background

It is reported that 81% of adolescents are insufficiently active. Schools play a pivotal role in promoting physical activity (PA) and reducing sedentary behavior (SB). The aim of this systematic review and meta-analysis was to evaluate classroom-based PA and SB interventions in adolescents.

Methods

A search strategy was developed using the Population Intervention Comparison Outcome Study (PICOS) design framework. Articles were screened using strict inclusion criteria. Study quality was assessed using the Effective Public Health Practice Project quality assessment tool ( http://www.ephpp.ca/tools.html ). Outcome data for preintervention and postintervention were extracted, and effect sizes were calculated using Cohen's d.

Results

The strategy yielded 7574 potentially relevant articles. Nine studies were included for review. Study quality was rated as strong for 1 study, moderate for 5 studies, and weak for 3 studies. Five studies were included for meta-analyses, which suggested that the classroom-based interventions had a nonsignificant effect on PA (P = .55, d = 0.05) and a small, nonsignificant effect on SB (P = .16, d = -0.11).

Conclusion

Only 9 relevant studies were found, and the effectiveness of the classroom-based PA and SB interventions varied. Based on limited empirical studies, there is not enough evidence to determine the most effective classroom-based methodology to increase PA and SB.",2018-03-23 +30044229,Longitudinal Effects of Developmental Bisphenol A Exposure on Epigenome-Wide DNA Hydroxymethylation at Imprinted Loci in Mouse Blood.,"

Background

Epigenetic machinery plays an important role in genomic imprinting, a developmental process that establishes parent-of-origin-specific monoallelic gene expression. Although a number of studies have investigated the role of 5-methylcytosine in imprinting control, the contribution of 5-hydroxymethylcytosine (5-hmC) to this epigenetic phenomenon remains unclear.

Objectives

Using matched mouse blood samples (from mice at 2, 4, and 10 months of age), our objective was to examine the effects of perinatal bisphenol A (BPA) exposure (50 μg/kg diet) on longitudinal 5-hmC patterns at imprinted regions. We also aimed to test the hypothesis that 5-hmC would show defined patterns at imprinted genes that persist across the life course.

Methods

Genome-wide 5-hmC levels were measured using hydroxymethylated DNA immunoprecipitation sequencing (HMeDIP-seq). Modeling of differential hydroxymethylation by BPA exposure was performed using a pipeline of bioinformatics tools, including the csaw R package.

Results

Based on BPA exposure, we identified 5,950 differentially hydroxymethylated regions (DHMRs), including 12 DHMRs that were annotated to murine imprinted genes—Gnas, Grb10, Plagl1, Klf14, Pde10a, Snrpn, Airn, Cmah, Ppp1r9a, Kcnq1, Phactr2, and Pde4d. When visualized, these imprinted gene DHMRs showed clear, consistent patterns of differential 5-hmC by developmental BPA exposure that persisted throughout adulthood.

Conclusions

These data show long-term establishment of 5-hmC marks at imprinted loci during development. Further, the effect of perinatal BPA exposure on 5-hmC at specific imprinted loci indicates that developmental exposure to environmental toxicants may alter long-term imprinted gene regulation via an epigenetic mechanism. https://doi.org/10.1289/EHP3441.",2018-07-23 +26527724,"RegulonDB version 9.0: high-level integration of gene regulation, coexpression, motif clustering and beyond.","RegulonDB (http://regulondb.ccg.unam.mx) is one of the most useful and important resources on bacterial gene regulation,as it integrates the scattered scientific knowledge of the best-characterized organism, Escherichia coli K-12, in a database that organizes large amounts of data. Its electronic format enables researchers to compare their results with the legacy of previous knowledge and supports bioinformatics tools and model building. Here, we summarize our progress with RegulonDB since our last Nucleic Acids Research publication describing RegulonDB, in 2013. In addition to maintaining curation up-to-date, we report a collection of 232 interactions with small RNAs affecting 192 genes, and the complete repertoire of 189 Elementary Genetic Sensory-Response units (GENSOR units), integrating the signal, regulatory interactions, and metabolic pathways they govern. These additions represent major progress to a higher level of understanding of regulated processes. We have updated the computationally predicted transcription factors, which total 304 (184 with experimental evidence and 120 from computational predictions); we updated our position-weight matrices and have included tools for clustering them in evolutionary families. We describe our semiautomatic strategy to accelerate curation, including datasets from high-throughput experiments, a novel coexpression distance to search for 'neighborhood' genes to known operons and regulons, and computational developments.",2015-11-02 +25294921,FungiFun2: a comprehensive online resource for systematic analysis of gene lists from fungal species.,"

Summary

Systematically extracting biological meaning from omics data is a major challenge in systems biology. Enrichment analysis is often used to identify characteristic patterns in candidate lists. FungiFun is a user-friendly Web tool for functional enrichment analysis of fungal genes and proteins. The novel tool FungiFun2 uses a completely revised data management system and thus allows enrichment analysis for 298 currently available fungal strains published in standard databases. FungiFun2 offers a modern Web interface and creates interactive tables, charts and figures, which users can directly manipulate to their needs.

Availability and implementation

FungiFun2, examples and tutorials are publicly available at https://elbe.hki-jena.de/fungifun/.

Contact

steffen.priebe@hki-jena.de or joerg.linde@hki-jena.de.",2014-10-07 +23196969,Genome evolution in the cold: Antarctic icefish muscle transcriptome reveals selective duplications increasing mitochondrial function.,"Antarctic notothenioids radiated over millions of years in subzero waters, evolving peculiar features, such as antifreeze glycoproteins and absence of heat shock response. Icefish, family Channichthyidae, also lack oxygen-binding proteins and display extreme modifications, including high mitochondrial densities in aerobic tissues. A genomic expansion accompanying the evolution of these fish was reported, but paucity of genomic information limits the understanding of notothenioid cold adaptation. We reconstructed and annotated the first skeletal muscle transcriptome of the icefish Chionodraco hamatus providing a new resource for icefish genomics (http://compgen.bio.unipd.it/chamatusbase/, last accessed December 12, 2012). We exploited deep sequencing of this energy-dependent tissue to test the hypothesis of selective duplication of genes involved in mitochondrial function. We developed a bioinformatic approach to univocally assign C. hamatus transcripts to orthology groups extracted from phylogenetic trees of five model species. Chionodraco hamatus duplicates were recorded for each orthology group allowing the identification of duplicated genes specific to the icefish lineage. Significantly more duplicates were found in the icefish when transcriptome data were compared with whole-genome data of model species. Indeed, duplicated genes were significantly enriched in proteins with mitochondrial localization, involved in mitochondrial function and biogenesis. In cold conditions and without oxygen-carrying proteins, energy production is challenging. The combination of high mitochondrial densities and the maintenance of duplicated genes involved in mitochondrial biogenesis and aerobic respiration might confer a selective advantage by improving oxygen diffusion and energy supply to aerobic tissues. Our results provide new insights into the genomic basis of icefish cold adaptation.",2013-01-01 +29678129,ClustAGE: a tool for clustering and distribution analysis of bacterial accessory genomic elements.,"

Background

The non-conserved accessory genome of bacteria can be associated with important adaptive characteristics that can contribute to niche specificity or pathogenicity of strains. High degrees of structural and compositional diversity in genomic islands and other elements of the accessory genome can complicate characterization of accessory genome contents among populations of strains. Methods for easily and effectively defining the distributions of discrete elements of the accessory genome among bacterial strains in a population are needed to explore the relationships between the flexible genome and bacterial adaptive traits.

Results

We have developed the open-source software package ClustAGE. This program, written in Perl, uses BLAST to cluster nucleotide accessory genomic elements from the genomes of multiple bacterial strains and to identify their distribution within the study population. The program output can be used in combination with strain phenotype data or other characteristics to detect associations. Optional graphical output is available for visualizing accessory genome gene content and distribution patterns. The capabilities of the software are demonstrated on a collection of 14 Pseudomonas aeruginosa genome sequences.

Conclusions

The ClustAGE software and utilities are effective for identifying characteristics and distributions of accessory genomic elements among groups of bacterial genomes. The ability to easily and effectively characterize the accessory genome of a sequence collection may provide a better understanding of the accessory genome's contribution to a species' adaptation and pathogenesis. The ClustAGE source code can be downloaded from https://clustage.sourceforge.io and a limited web-based implementation is available at http://vfsmspineagent.fsm.northwestern.edu/cgi-bin/clustage.cgi .",2018-04-20 +28413818,Data on enhanced expression and purification of camelid single domain antibodies from Escherichia coli classical inclusion bodies.,"Heterologous expression of high amounts of recombinant proteins is a milestone for research and industrial purposes. Single domain antibodies (sdAbs) are heavy-chain only antibody fragments with applications in the biotechnological, medical and industrial fields. The simple nature and small size of sdAbs allows for efficient expression of the soluble molecule in different hosts. However, in some cases, it results in low functional protein yield. To overcome this limitation, expression of a 6xHistag sdAb was attempted in different conditions in Escherichia coli BL21(DE3) cells. Data showed that high amount of sdAb can be expressed in E. coli classical inclusion bodies, efficiently extracted by urea in a short-time, and properly purified by metal ion affinity chromatography. These data originate from the research article ""Enhanced expression and purification of camelid single domain VHH antibodies from classical inclusion bodies"" Maggi and Scotti (2017) [1] (DOI: http://dx.doi.org/10.1016/j.pep.2017.02.007).",2017-03-31 +29028884,Protein-protein interaction specificity is captured by contact preferences and interface composition.,"Motivation:Large-scale computational docking will be increasingly used in future years to discriminate protein-protein interactions at the residue resolution. Complete cross-docking experiments make in silico reconstruction of protein-protein interaction networks a feasible goal. They ask for efficient and accurate screening of the millions structural conformations issued by the calculations. Results:We propose CIPS (Combined Interface Propensity for decoy Scoring), a new pair potential combining interface composition with residue-residue contact preference. CIPS outperforms several other methods on screening docking solutions obtained either with all-atom or with coarse-grain rigid docking. Further testing on 28 CAPRI targets corroborates CIPS predictive power over existing methods. By combining CIPS with atomic potentials, discrimination of correct conformations in all-atom structures reaches optimal accuracy. The drastic reduction of candidate solutions produced by thousands of proteins docked against each other makes large-scale docking accessible to analysis. Availability and implementation:CIPS source code is freely available at http://www.lcqb.upmc.fr/CIPS. Contact:alessandra.carbone@lip6.fr. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-02-01 +21245031,OrchidBase: a collection of sequences of the transcriptome derived from orchids.,"Orchids are one of the most ecological and evolutionarily significant plants, and the Orchidaceae is one of the most abundant families of the angiosperms. Genetic databases will be useful not only for gene discovery but also for future genomic annotation. For this purpose, OrchidBase was established from 37,979,342 sequence reads collected from 11 in-house Phalaenopsis orchid cDNA libraries. Among them, 41,310 expressed sequence tags (ESTs) were obtained by using Sanger sequencing, whereas 37,908,032 reads were obtained by using next-generation sequencing (NGS) including both Roche 454 and Solexa Illumina sequencers. These reads were assembled into 8,501 contigs and 76,116 singletons, resulting in 84,617 non-redundant transcribed sequences with an average length of 459 bp. The analysis pipeline of the database is an automated system written in Perl and C#, and consists of the following components: automatic pre-processing of EST reads, assembly of raw sequences, annotation of the assembled sequences and storage of the analyzed information in SQL databases. A web application was implemented with HTML and a Microsoft .NET Framework C# program for browsing and querying the database, creating dynamic web pages on the client side, analyzing gene ontology (GO) and mapping annotated enzymes to KEGG pathways. The online resources for putative annotation can be searched either by text or by using BLAST, and the results can be explored on the website and downloaded. Consequently, the establishment of OrchidBase will provide researchers with a high-quality genetic resource for data mining and facilitate efficient experimental studies on orchid biology and biotechnology. The OrchidBase database is freely available at http://lab.fhes.tn.edu.tw/est.",2011-01-17 +29183290,"Modeling, validation and verification of three-dimensional cell-scaffold contacts from terabyte-sized images.","BACKGROUND:Cell-scaffold contact measurements are derived from pairs of co-registered volumetric fluorescent confocal laser scanning microscopy (CLSM) images (z-stacks) of stained cells and three types of scaffolds (i.e., spun coat, large microfiber, and medium microfiber). Our analysis of the acquired terabyte-sized collection is motivated by the need to understand the nature of the shape dimensionality (1D vs 2D vs 3D) of cell-scaffold interactions relevant to tissue engineers that grow cells on biomaterial scaffolds. RESULTS:We designed five statistical and three geometrical contact models, and then down-selected them to one from each category using a validation approach based on physically orthogonal measurements to CLSM. The two selected models were applied to 414 z-stacks with three scaffold types and all contact results were visually verified. A planar geometrical model for the spun coat scaffold type was validated from atomic force microscopy images by computing surface roughness of 52.35 nm ±31.76 nm which was 2 to 8 times smaller than the CLSM resolution. A cylindrical model for fiber scaffolds was validated from multi-view 2D scanning electron microscopy (SEM) images. The fiber scaffold segmentation error was assessed by comparing fiber diameters from SEM and CLSM to be between 0.46% to 3.8% of the SEM reference values. For contact verification, we constructed a web-based visual verification system with 414 pairs of images with cells and their segmentation results, and with 4968 movies with animated cell, scaffold, and contact overlays. Based on visual verification by three experts, we report the accuracy of cell segmentation to be 96.4% with 94.3% precision, and the accuracy of cell-scaffold contact for a statistical model to be 62.6% with 76.7% precision and for a geometrical model to be 93.5% with 87.6% precision. CONCLUSIONS:The novelty of our approach lies in (1) representing cell-scaffold contact sites with statistical intensity and geometrical shape models, (2) designing a methodology for validating 3D geometrical contact models and (3) devising a mechanism for visual verification of hundreds of 3D measurements. The raw and processed data are publicly available from https://isg.nist.gov/deepzoomweb/data/ together with the web -based verification system.",2017-11-28 +28575203,Structurexplor: a platform for the exploration of structural features of RNA secondary structures.,"

Summary

Discovering function-related structural features, such as the cloverleaf shape of transfer RNA secondary structures, is essential to understand RNA function. With this aim, we have developed a platform, named Structurexplor, to facilitate the exploration of structural features in populations of RNA secondary structures. It has been designed and developed to help biologists interactively search for, evaluate and select interesting structural features that can potentially explain RNA functions.

Availability and implementation

Structurxplor is a web application available at http://structurexplor.dinf.usherbrooke.ca. The source code can be found at http://jpsglouzon.github.io/structurexplor/.

Contact

shengrui.wang@usherbrooke.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +27378294,ChAsE: chromatin analysis and exploration tool.,": We present ChAsE, a cross-platform desktop application developed for interactive visualization, exploration and clustering of epigenomic data such as ChIP-seq experiments. ChAsE is designed and developed in close collaboration with several groups of biologists and bioinformaticians with a focus on usability and interactivity. Data can be analyzed through k-means clustering, specifying presence or absence of signal in epigenetic data and performing set operations between clusters. Results can be explored in an interactive heat map and profile plot interface and exported for downstream analysis or as high quality figures suitable for publications.

Availability and implementation

Software, source code (MIT License), data and video tutorials available at http://chase.cs.univie.ac.at CONTACT: : mkarimi@brc.ubc.ca or torsten.moeller@univie.ac.atSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-04 +28842838,MOSAIC: a web-interface for statistical analyses in ecotoxicology.,"In ecotoxicology, bioassays are standardly conducted in order to measure acute or chronic effects of potentially toxic substances on reproduction, growth, and/or survival of living animals. MOSAIC, standing for MOdeling and StAtistical tools for ecotoxICology, is a user-friendly web interface dedicated to the mathematical and statistical modelling of such standard bioassay data. Its simple use makes MOSAIC a turnkey decision-making tool for ecotoxicologists and regulators. Without wasting time on extensive mathematical and statistical technicalities, users are provided with advanced and innovative methods for a valuable quantitative environmental risk assessment. MOSAIC is available at http://pbil.univ-lyon1.fr/software/mosaic/ .",2017-08-26 +,Searchable Core Facility Database: Building Resource Bridges,"The VGN Searchable Core Facility Database (http://vgn.uvm.edu/corefacilities) is a directory of Core Facilities primarily focused on North America but with entries from around the world. It is a tool intended to foster collaboration and assist cores in growing their user base and providing networking opportunities. It is populated with Core Facilities that have voluntarily listed themselves and would like to be contacted by researchers and other core facilities for potential collaborations. Benefits: Allows researchers to locate resources needed for their studies; Provides a channel for facilities to collaborate; and Facilitates cores to reach financial sustainability. Researchers are able to perform searches online by service offerings, location, association, and key phrases to find a facility that will best meet their needs. Information listed for individual cores include: short description of core, contact name, email, address, services offered, hyperlink to website, equipment, and date of last revision of information. The data can be exported to an excel readable XML file. The database currently lists 292 cores, representing 39 states plus DC, 104 institutions, and 10 associations.",2011-10-01 +28847550,Effect of Statins on COPD: A Meta-Analysis of Randomized Controlled Trials.,"

Background

Much controversy persists regarding the place of statin drugs in the treatment of patients with COPD. This systematic review and meta-analysis sought to determine the clinical efficacy of statin therapy in COPD.

Methods

We searched MEDLINE, EMBASE, the Cochrane Database, and PubMed for relevant clinical studies. Randomized controlled trials (RCTs) comparing the effects of statin drugs with placebo in COPD populations were included. Pooled estimates were calculated using a random-effects model. Heterogeneity was determined using the I2 statistic.

Results

Ten trials with a total of 1,471 patients were included. Statin treatment was associated with a larger improvement in exercise capacity, lung function, and St. George's Respiratory Questionnaire score compared with placebo, but there were no statistically significant differences in inflammatory markers, all-cause mortality, and safety outcomes; however, subgroup analysis indicated that statin drugs improved clinical outcomes in the subjects from trials enrolling patients with overt cardiovascular disease (CVD), elevated baseline C-reactive protein levels, or a high cholesterol level.

Conclusions

The findings from this systematic review suggest a role for statin drugs in patients with COPD and coexisting CVD, evidence of increased systemic inflammation, or hyperlipidemia with respect to improving exercise tolerance and pulmonary function. These findings need to be confirmed by RCTs specifically designed to test this hypothesis and identify appropriate patients for statin use.

Trial registry

PROSPERO: CRD42017060594; https://www.crd.york.ac.uk/PROSPERO/.",2017-08-25 +29880482,Hypoxic Tumor-Derived Exosomal miR-301a Mediates M2 Macrophage Polarization via PTEN/PI3Kγ to Promote Pancreatic Cancer Metastasis.,"Exosomes are emerging as important mediators of the cross-talk between tumor cells and the microenvironment. However, the mechanisms by which exosomes modulate tumor development under hypoxia in pancreatic cancer remain largely unknown. Here, we found that hypoxic exosomes derived from pancreatic cancer cells activate macrophages to the M2 phenotype in a HIF1a or HIF2a-dependent manner, which then facilitates the migration, invasion, and epithelial-mesenchymal transition of pancreatic cancer cells. Given that exosomes have been shown to transport miRNAs to alter cellular functions, we discovered that miR-301a-3p was highly expressed in hypoxic pancreatic cancer cells and enriched in hypoxic pancreatic cancer cell-derived exosomes. Circulating exosomal miR-301a-3p levels positively associated with depth of invasion, lymph node metastasis, late TNM stage, and poor prognosis of pancreatic cancer. Hypoxic exosomal miR-301a-3p induced the M2 polarization of macrophages via activation of the PTEN/PI3Kγ signaling pathway. Coculturing of pancreatic cancer cells with macrophages in which miR-301a-3p was upregulated or treated with hypoxic exosomes enhanced their metastatic capacity. Collectively, these data indicate that pancreatic cancer cells generate miR-301a-3p-rich exosomes in a hypoxic microenvironment, which then polarize macrophages to promote malignant behaviors of pancreatic cancer cells. Targeting exosomal miR-301a-3p may provide a potential diagnosis and treatment strategy for pancreatic cancer.Significance: These findings identify an exosomal miRNA critical for microenvironmental cross-talk that may prove to be a potential target for diagnosis and treatment of pancreatic cancer.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/16/4586/F1.large.jpg Cancer Res; 78(16); 4586-98. ©2018 AACR.",2018-06-07 +25336621,DBTMEE: a database of transcriptome in mouse early embryos.,"DBTMEE (http://dbtmee.hgc.jp/) is a searchable and browsable database designed to manipulate gene expression information from our ultralarge-scale whole-transcriptome analysis of mouse early embryos. Since integrative approaches with multiple public analytical data have become indispensable for studying embryogenesis due to technical challenges such as biological sample collection, we intend DBTMEE to be an integrated gateway for the research community. To do so, we combined the gene expression profile with various public resources. Thereby, users can extensively investigate molecular characteristics among totipotent, pluripotent and differentiated cells while taking genetic and epigenetic characteristics into consideration. We have also designed user friendly web interfaces that enable users to access the data quickly and easily. DBTMEE will help to promote our understanding of the enigmatic fertilization dynamics.",2014-10-21 +29028267,VariantTools: an extensible framework for developing and testing variant callers.,"

Motivation

Variant calling is the complex task of separating real polymorphisms from errors. The appropriate strategy will depend on characteristics of the sample, the sequencing methodology and on the questions of interest.

Results

We present VariantTools, an extensible framework for developing and testing variant callers. There are facilities for reproducibly tallying, filtering, flagging and annotating variants. The tools are extensible, modular and flexible, so that they are tunable to particular use cases, and they interoperate with existing analysis software so that they can be embedded in established work flows.

Availability and implementation

VariantTools is available from http://www.bioconductor.org/.

Contact

michafla@gene.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-10-01 +27508253,Data on NAEP 2011 writing assessment prior computer use.,"This data article contains information based on the 2011 National Assessment of Educational Progress in Writing Restricted-Use Data, available from the National Center for Education Statistics (NCES Pub. No. 2014476). https://nces.ed.gov/nationsreportcard/researchcenter/datatools.aspx. The data include the statistical relationships between survey reports of teachers and students regarding prior use of computers and other technology and writing achievement levels on the 2011 computer-based NAEP writing assessment. This data article accompanies ""The Effects of Prior Computer Use on Computer-Based Writing: The 2011 NAEP Writing Assessment"" [1].",2016-07-06 +24918550,dbCerEx: a web-based database for the analysis of cervical cancer transcriptomes.,"

Background

Cervical cancers are ranked the second-most hazardous ailments among women worldwide. In the past two decades, microarray technologies have been applied to study genes involved in malignancy progress. However, in most of the published microarray studies, only a few genes were reported leaving rather a large amount of data unused. Also, RNA-Seq data has become more standard for transcriptome analysis and is widely applied in cancer studies. There is a growing demand for a tool to help the experimental researchers who are keen to explore cervical cancer gene therapy, but lack computer expertise to access and analyze the high throughput gene expression data.

Description

The dbCerEx database is designed to retrieve and process gene expression data from cervical cancer samples. It includes the genome wide expression profiles of cervical cancer samples, as well as a web utility to cluster genes with similar expression patterns. This feature will help researchers conduct further research to uncover novel gene functions.

Conclusion

The dbCerEx database is freely available for non-commercial use at http://128.135.207.10/dbCerEx/, and will be updated and integrated with more features as needed.",2014-06-11 +28070539,Data on genome analysis of Mycoplasmagallisepticum during intracellular infection.,"The genus Mycoplasma relates to Gram-positive bacteria that lack a cell wall and are capable to cause chronic disease in humans and animals. Among the agents of infection and disease in domestic poultry and wild birds, Mycoplasma gallisepticum is the most important mycoplasma species, causing considerable losses in the poultry industry. In the present paper, we provide data on adaptation of M. gallisepticum to the eukaryotic host cells on the genomic level. The major changes were predominantly localized in the VlhA-hemagglutinin genes which are important components of pathogenesis. The ability of mycoplasmas to change dramatically the repertoire of surface antigens and to vary the immunogenicity of these components allows them to remain undetected by the immune system of the host. The data presented in this article are related to the article entitled ""Phase Transition of the Bacterium upon Invasion of a Host Cell as a Mechanism of Adaptation: a Mycoplasma gallisepticum Model."" (Matyushkina et al., 2016) [1]. Data posted in repository https://www.ncbi.nlm.nih.gov/bioproject/315515. Bioproject ID: PRJNA315515.",2016-12-08 +29297379,Constructing an integrated gene similarity network for the identification of disease genes.,"BACKGROUND:Discovering novel genes that are involved human diseases is a challenging task in biomedical research. In recent years, several computational approaches have been proposed to prioritize candidate disease genes. Most of these methods are mainly based on protein-protein interaction (PPI) networks. However, since these PPI networks contain false positives and only cover less half of known human genes, their reliability and coverage are very low. Therefore, it is highly necessary to fuse multiple genomic data to construct a credible gene similarity network and then infer disease genes on the whole genomic scale. RESULTS:We proposed a novel method, named RWRB, to infer causal genes of interested diseases. First, we construct five individual gene (protein) similarity networks based on multiple genomic data of human genes. Then, an integrated gene similarity network (IGSN) is reconstructed based on similarity network fusion (SNF) method. Finally, we employee the random walk with restart algorithm on the phenotype-gene bilayer network, which combines phenotype similarity network, IGSN as well as phenotype-gene association network, to prioritize candidate disease genes. We investigate the effectiveness of RWRB through leave-one-out cross-validation methods in inferring phenotype-gene relationships. Results show that RWRB is more accurate than state-of-the-art methods on most evaluation metrics. Further analysis shows that the success of RWRB is benefited from IGSN which has a wider coverage and higher reliability comparing with current PPI networks. Moreover, we conduct a comprehensive case study for Alzheimer's disease and predict some novel disease genes that supported by literature. CONCLUSIONS:RWRB is an effective and reliable algorithm in prioritizing candidate disease genes on the genomic scale. Software and supplementary information are available at http://nclab.hit.edu.cn/~tianzhen/RWRB/ .",2017-09-20 +26507856,Nucleotide binding database NBDB--a collection of sequence motifs with specific protein-ligand interactions.,"NBDB database describes protein motifs, elementary functional loops (EFLs) that are involved in binding of nucleotide-containing ligands and other biologically relevant cofactors/coenzymes, including ATP, AMP, ATP, GMP, GDP, GTP, CTP, PAP, PPS, FMN, FAD(H), NAD(H), NADP, cAMP, cGMP, c-di-AMP and c-di-GMP, ThPP, THD, F-420, ACO, CoA, PLP and SAM. The database is freely available online at http://nbdb.bii.a-star.edu.sg. In total, NBDB contains data on 249 motifs that work in interactions with 24 ligands. Sequence profiles of EFL motifs were derived de novo from nonredundant Uniprot proteome sequences. Conserved amino acid residues in the profiles interact specifically with distinct chemical parts of nucleotide-containing ligands, such as nitrogenous bases, phosphate groups, ribose, nicotinamide, and flavin moieties. Each EFL profile in the database is characterized by a pattern of corresponding ligand-protein interactions found in crystallized ligand-protein complexes. NBDB database helps to explore the determinants of nucleotide and cofactor binding in different protein folds and families. NBDB can also detect fragments that match to profiles of particular EFLs in the protein sequence provided by user. Comprehensive information on sequence, structures, and interactions of EFLs with ligands provides a foundation for experimental and computational efforts on design of required protein functions.",2015-10-26 +28472372,ProteinsPlus: a web portal for structure analysis of macromolecules.,"With currently more than 126 000 publicly available structures and an increasing growth rate, the Protein Data Bank constitutes a rich data source for structure-driven research in fields like drug discovery, crop science and biotechnology in general. Typical workflows in these areas involve manifold computational tools for the analysis and prediction of molecular functions. Here, we present the ProteinsPlus web server that offers a unified easy-to-use interface to a broad range of tools for the early phase of structure-based molecular modeling. This includes solutions for commonly required pre-processing tasks like structure quality assessment (EDIA), hydrogen placement (Protoss) and the search for alternative conformations (SIENA). Beyond that, it also addresses frequent problems as the generation of 2D-interaction diagrams (PoseView), protein-protein interface classification (HyPPI) as well as automatic pocket detection and druggablity assessment (DoGSiteScorer). The unified ProteinsPlus interface covering all featured approaches provides various facilities for intuitive input and result visualization, case-specific parameterization and download options for further processing. Moreover, its generalized workflow allows the user a quick familiarization with the different tools. ProteinsPlus also stores the calculated results temporarily for future request and thus facilitates convenient result communication and re-access. The server is freely available at http://proteins.plus.",2017-07-01 +26748441,Structural basis of substrate specificity in porcine RNase 4.,"

Unlabelled

RNase 4, a member of the RNase A superfamily with substrate preference for uridine, has roles in host defence, angiogenesis and neurodegenerative diseases. It also exhibits the highest interspecies amino acid sequence similarity amongst RNase A family members. However, compared to other members of the RNase A family, including eosinophil-derived neurotoxin, eosinophil cationic protein and angiogenin, little is known about the molecular basis of substrate specificity in RNase 4. Here we report high to medium resolution structures of native porcine RNase 4 (PL3), a 'substrate-specificity' determining mutant D80A and their respective complexes with deoxyuridine 5'-monophosphate (dUMP) and deoxycytidine 5'-monophosphate (dCMP). These structures provide insight into the structural basis of the uridine versus cytosine substrate specificity in RNase 4: in the D80A mutant (D80A•dCMP), the side chain of Arg101 is positioned further away from the substrate-binding pocket due to the loss of the Asp80 side chain, reducing the repulsion force on the less favoured dCMP from Arg101 and allowing the ligand to occupy the binding pocket. This can also explain the observation that the ligand in the D80A•dCMP complex is stabilized only by a small number of hydrogen bonds. Compared to the previously reported structure of the human RNase 4•2'-deoxyuridine 3'-phosphate complex, the structure of PL3•dUMP complex shows additional hydrogen bonds between the ligand and the protein. In addition, the interaction between Arg101 and the dUMP ligand is absent. These observed differences are probably the result of the flexibility and different 'positioning' of the phosphate group among the mononucleotide ligands.

Database

The atomic coordinates and structure factors for PL3 (5AR6), D80A (5ARJ), PL3∙dUMP (5ARK) and D80A∙dCMP (5ARL) complexes have been deposited with the Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers University, New Brunswick, NJ, USA (http://www.rcsb.org/).",2016-02-05 +24484917,"Data model, dictionaries, and desiderata for biomolecular simulation data indexing and sharing.","

Background

Few environments have been developed or deployed to widely share biomolecular simulation data or to enable collaborative networks to facilitate data exploration and reuse. As the amount and complexity of data generated by these simulations is dramatically increasing and the methods are being more widely applied, the need for new tools to manage and share this data has become obvious. In this paper we present the results of a process aimed at assessing the needs of the community for data representation standards to guide the implementation of future repositories for biomolecular simulations.

Results

We introduce a list of common data elements, inspired by previous work, and updated according to feedback from the community collected through a survey and personal interviews. These data elements integrate the concepts for multiple types of computational methods, including quantum chemistry and molecular dynamics. The identified core data elements were organized into a logical model to guide the design of new databases and application programming interfaces. Finally a set of dictionaries was implemented to be used via SQL queries or locally via a Java API built upon the Apache Lucene text-search engine.

Conclusions

The model and its associated dictionaries provide a simple yet rich representation of the concepts related to biomolecular simulations, which should guide future developments of repositories and more complex terminologies and ontologies. The model still remains extensible through the decomposition of virtual experiments into tasks and parameter sets, and via the use of extended attributes. The benefits of a common logical model for biomolecular simulations was illustrated through various use cases, including data storage, indexing, and presentation. All the models and dictionaries introduced in this paper are available for download at http://ibiomes.chpc.utah.edu/mediawiki/index.php/Downloads.",2014-01-30 +27996047,A genome-scale Escherichia coli kinetic metabolic model k-ecoli457 satisfying flux data for multiple mutant strains.,"Kinetic models of metabolism at a genome scale that faithfully recapitulate the effect of multiple genetic interventions would be transformative in our ability to reliably design novel overproducing microbial strains. Here, we introduce k-ecoli457, a genome-scale kinetic model of Escherichia coli metabolism that satisfies fluxomic data for wild-type and 25 mutant strains under different substrates and growth conditions. The k-ecoli457 model contains 457 model reactions, 337 metabolites and 295 substrate-level regulatory interactions. Parameterization is carried out using a genetic algorithm by simultaneously imposing all available fluxomic data (about 30 measured fluxes per mutant). The Pearson correlation coefficient between experimental data and predicted product yields for 320 engineered strains spanning 24 product metabolites is 0.84. This is substantially higher than that using flux balance analysis, minimization of metabolic adjustment or maximization of product yield exhibiting systematic errors with correlation coefficients of, respectively, 0.18, 0.37 and 0.47 (k-ecoli457 is available for download at http://www.maranasgroup.com).",2016-12-20 +27568099,DiffusionKit: A light one-stop solution for diffusion MRI data analysis.,"

Background

Diffusion magnetic resonance imaging (dMRI) techniques are receiving increasing attention due to their ability to characterize the arrangement map of white matter in vivo. However, the existing toolkits for dMRI analysis that have accompanied this surge possess noticeable limitations, such as large installation size, an incomplete pipeline, and a lack of cross-platform support.

New method

In this work, we developed a light, one-stop, cross-platform solution for dMRI data analysis, called DiffusionKit. It delivers a complete pipeline, including data format conversion, dMRI preprocessing, local reconstruction, white matter fiber tracking, fiber statistical analyses and various visualization schemes. Furthermore, DiffusionKit is a self-contained executable toolkit, without the need to install any other software.

Results

The DiffusionKit package is implemented in C/C++ and Qt/VTK, is freely available at http://diffusion.brainnetome.org and https://www.nitrc.org/projects/diffusionkit. The website of DiffusionKit includes test data, a complete tutorial and a series of tutorial examples. A mailing list has also been established for update notification and questions and answers.

Comparison with existing methods

DiffusionKit provides a full-function pipeline for dMRI data analysis, including data processing, modeling and visualization. Additionally, it provides both a graphical user interface (GUI) and command-line functions, which are helpful for batch processing. The standalone installation package has a small size and cross-platform support.

Conclusions

DiffusionKit provides a complete pipeline with cutting-edge methods for dMRI data analysis, including both a GUI interface and command-line functions. The rich functions for both data analysis and visualization will facilitate and benefit dMRI research.",2016-08-24 +30786676,Medical care of patients with disorders of aromatic amino acid metabolism: a report based on the Polish National Health Fund data records.,"

Introduction

Patients with disorders of aromatic amino acid metabolism are a heterogeneous group. They vary in morbidity and medical care requirements. Polish newborn screening program allows for quick diagnosis of some inborn errors of metabolism (such as classical phenylketonuria, mild hyperphenylalaninemias, tyrosinemia type 1 and tyrosinemia type 2) and subsequent immediate treatment.

The aim of the study

To evaluate the effect of the Polish public healthcare system in terms of management and access to health care services for children and adults with disorders of aromatic amino acid metabolism.

Material and methods

The analysis was based on the National Health Fund (NFZ) reporting data for 2009-2015. The analysis included patients with disorders of aromatic amino acid metabolism converting ICD-10 coding according to the International Classification of Diseases. The analysis covered patients with codes E70, E70.0, E70.1, E70.2, E70.3, E70.8, E70.9. The analysis was prepared as part of the mapping of health needs in metabolic diseases, http://www.mapypotrzebzdrowotnych.mz.gov.pl/.

Results

In 2009-2015, 4090 patients with disorders of aromatic amino acid metabolism were registered in the NFZ system. The largest number of patients were hospitalized and registered in outpatient specialistic care (AOS) in the first year of life. After the second year of life, the number of hospitalized patients was almost zero, and the number of children (< 18 years) with AOS according to age was stable. After the 18 years of age the number of patients in the AOS gradually decreased. The population of patients aged 0-28 years accounted for 99% of all cases, after 28 years of age were only one percent of the total population. There were 95 deaths, the average age of death was 77 years. In the whole study group the highest number of deaths was recorded after 70 years of age, 21% of all deaths were reported in both working-age patients children (2 deaths). Patients with classical phenylketonuria were the most commonly reported in the AOS. 22% of patients were coded with ICD-10 as E70 without extension.

Conclusions

Children aged 0-18 years with disorders of amino acid metabolism had full access to a well-organized specialized medical care system in Poland. In contrast, care for adult patients with the disorders was limited. It is necessary to properly code the disease using ICD-10 extension codes in order to avoid inconsistency in data reporting or misdiagnosis.",2018-01-01 +26087011,Improved Carbohydrate Structure Generalization Scheme for (1)H and (13)C NMR Simulations.,"The improved Carbohydrate Structure Generalization Scheme has been developed for the simulation of (13)C and (1)H NMR spectra of oligo- and polysaccharides and their derivatives, including those containing noncarbohydrate constituents found in natural glycans. Besides adding the (1)H NMR calculations, we improved the accuracy and performance of prediction and optimized the mathematical model of the precision estimation. This new approach outperformed other methods of chemical shift simulation, including database-driven, neural net-based, and purely empirical methods and quantum-mechanical calculations at high theory levels. It can process structures with rarely occurring and noncarbohydrate constituents unsupported by the other methods. The algorithm is transparent to users and allows tracking used reference NMR data to original publications. It was implemented in the Glycan-Optimized Dual Empirical Spectrum Simulation (GODESS) web service, which is freely available at the platform of the Carbohydrate Structure Database (CSDB) project ( http://csdb.glycoscience.ru).",2015-07-13 +29133277,OrthoGNC: A Software for Accurate Identification of Orthologs Based on Gene Neighborhood Conservation.,"Orthology relations can be used to transfer annotations from one gene (or protein) to another. Hence, detecting orthology relations has become an important task in the post-genomic era. Various genomic events, such as duplication and horizontal gene transfer, can cause erroneous assignment of orthology relations. In closely-related species, gene neighborhood information can be used to resolve many ambiguities in orthology inference. Here we present OrthoGNC, a software for accurately predicting pairwise orthology relations based on gene neighborhood conservation. Analyses on simulated and real data reveal the high accuracy of OrthoGNC. In addition to orthology detection, OrthoGNC can be employed to investigate the conservation of genomic context among potential orthologs detected by other methods. OrthoGNC is freely available online at http://bs.ipm.ir/softwares/orthognc and http://tinyurl.com/orthoGNC.",2017-11-11 +27932913,"LifeWatchGreece Portal development: architecture, implementation and challenges for a biodiversity research e-infrastructure.","

Background

Biodiversity data is characterized by its cross-disciplinary character, the extremely broad range of data types and structures, and the plethora of different data sources providing resources for the same piece of information in a heterogeneous way. Since the web inception two decades ago, there are multiple initiatives to connect, aggregate, share, and publish biodiversity data, and to establish data and work flows in order to analyze them. The European program LifeWatch aims at establishing a distributed network of nodes implementing virtual research environment in Europe to facilitate the work of biodiversity researchers and managers. LifeWatchGreece is one of these nodes where a portal was developed offering access to a suite of virtual laboratories and e-services.

New information

Despite its strict definition in information technology, in practice ""portal"" is a fairly broad term that embraces many web architectures. In the biodiversity domain, the term ""portal"" is usually used to indicate either a web site that provides access to a single or an aggregation of data repositories (like: http://indiabiodiversity.org/, http://www.mountainbiodiversity.org/, http://data.freshwaterbiodiversity.eu), a web site that gathers information about various online biodiversity tools (like http://test-eubon.ebd.csic.es/, http://marine.lifewatch.eu/) or a web site that just gathers information and news about the biodiversity domain (like http://chm.moew.government.bg). LifeWatchGreece's portal takes the concept of a portal a step further. In strict IT terms, LifeWatchGreece's portal is partly a portal, partly a platform and partly an aggregator. It includes a number of biodiversity-related web tools integrated into a centrally-controlled software ecosystem. This ecosystem includes subsystems for access control, traffic monitoring, user notifications and web tool management. These subsystems are shared to all the web tools that have been integrated to the portal and thereby are part of this ecosystem. These web tools do not consist in external and completely independent web applications as it happens in most other portals. A quite obvious (to the user) indication of this is the Single-Sign-On (SSO) functionality for all tools and the common user interface wrapper that most of these tools use. Another example of a less obvious functionality is the common user profile that is shared and can be utilized by all tools (e.g user's timezone).",2016-11-01 +28657151,Joint genotype- and ancestry-based genome-wide association studies in admixed populations.,"In genome-wide association studies (GWAS) genetic loci that influence complex traits are localized by inspecting associations between genotypes of genetic markers and the values of the trait of interest. On the other hand, admixture mapping, which is performed in case of populations consisting of a recent mix of two ancestral groups, relies on the ancestry information at each locus (locus-specific ancestry). Recently it has been proposed to jointly model genotype and locus-specific ancestry within the framework of single marker tests. Here, we extend this approach for population-based GWAS in the direction of multimarker models. A modified version of the Bayesian information criterion is developed for building a multilocus model that accounts for the differential correlation structure due to linkage disequilibrium (LD) and admixture LD. Simulation studies and a real data example illustrate the advantages of this new approach compared to single-marker analysis or modern model selection strategies based on separately analyzing genotype and ancestry data, as well as to single-marker analysis combining genotypic and ancestry information. Depending on the signal strength, our procedure automatically chooses whether genotypic or locus-specific ancestry markers are added to the model. This results in a good compromise between the power to detect causal mutations and the precision of their localization. The proposed method has been implemented in R and is available at http://www.math.uni.wroc.pl/~mbogdan/admixtures/.",2017-06-28 +25136539,Development and Evaluation of a Web-based Computer-Assisted Personal Interview System (CAPIS) for Open-ended Dietary Assessments among Koreans.,"The accuracy of dietary assessments has emerged as a major concern in nutritional epidemiology and new dietary assessment tools using computer technology to increase accuracy have been developed in many countries. The purpose of this study was to develop a web-based computer-assisted personal interview system (CAPIS) for conducting dietary assessment and to evaluate its practical utilization among Koreans. The client software was developed using Microsoft's ClickOnce technology, which allows communication with a database system via an http server to add or retrieve data. The system consists of a tracking system for the subject and researcher, a data-input system during the interview, a calculation system for estimating food and nutrient intake, a data-output system for presenting the results, and an evaluation system for assessing the adequacy of nutrient and food intake. Databases of the nutrient composition of common food (n = 3,642), recipes for common dishes (n = 1,886), and photos of serving sizes for food and dishes (n = 4,152) were constructed, and logical processes for data collection, calculation, and output were developed. The functionality, on-site applicability, and efficiency of CAPIS were evaluated in a convenience sample of 181 participants (61 males, 120 females; aged 24 to 85) by comparing with manual 24 hour recall method with paper questionnaire. The CAPIS was functioned adequately in the field survey in terms of completeness of function, security, and compliance of researcher and subjects. Regarding on-site applicability, 23.2%, 32.6%, 35.4%, and 43.7% of subjects reported that CAPIS was easier to recall their diet, to estimate the amount consumed, to communicate with the interviewer, and to concentrate on the interview than the manual method with paper questionnaire, respectively. Although CAPIS required more interview time (9 min 42 sec) compared to the manual method (7 min 30 sec), it saved time and cost for data coding and entry (15 min 35 sec) and gave high satisfaction from the prompt feedback after interview to the subjects, which increase efficiency to apply on the field survey. Our results suggest that the newly developed CAPIS is suitable for conducting personal interviews for dietary assessment in Korean population.",2014-07-29 +,"Aerial survey and spatial analysis of sources of light pollution in Berlin, Germany","Aerial observations of light pollution can fill an important gap between ground based surveys and nighttime satellite data. Terrestrially bound surveys are labor intensive and are generally limited to a small spatial extent, and while existing satellite data cover the whole world, they are limited to coarse resolution. This paper describes the production of a high resolution (1m) mosaic image of the city of Berlin, Germany at night. The dataset is spatially analyzed to identify the major sources of light pollution in the city based on urban land use data. An area-independent ‘brightness factor’ is introduced that allows direct comparison of the light emission from differently sized land use classes, and the percentage area with values above average brightness is calculated for each class. Using this methodology, lighting associated with streets has been found to be the dominant source of zenith directed light pollution (31.6%), although other land use classes have much higher average brightness. These results are compared with other urban light pollution quantification studies. The minimum resolution required for an analysis of this type is found to be near 10m. Future applications of high resolution datasets such as this one could include: studies of the efficacy of light pollution mitigation measures, improved light pollution simulations, economic and energy use, the relationship between artificial light and ecological parameters (e.g. circadian rhythm, fitness, mate selection, species distributions, migration barriers and seasonal behavior), or the management of nightscapes. To encourage further scientific inquiry, the mosaic data is freely available at Pangaea: http://dx.doi.org/10.1594/PANGAEA.785492.",2012-11-01 +25023141,Genome-wide analysis of the heritability of amyotrophic lateral sclerosis.,"

Importance

Considerable advances have been made in our understanding of the genetics underlying amyotrophic lateral sclerosis (ALS). Nevertheless, for the majority of patients who receive a diagnosis of ALS, the role played by genetics is unclear. Further elucidation of the genetic architecture of this disease will help clarify the role of genetic variation in ALS populations.

Objective

To estimate the relative importance of genetic factors in a complex disease such as ALS by accurately quantifying heritability using genome-wide data derived from genome-wide association studies.

Design, setting, and participants

We applied the genome-wide complex trait analysis algorithm to 3 genome-wide association study data sets that were generated from ALS case-control cohorts of European ancestry to estimate the heritability of ALS. Cumulatively, these data sets contained genotype data from 1223 cases and 1591 controls that had been previously generated and are publically available on the National Center for Biotechnology Information database of genotypes and phenotypes website (http://www.ncbi.nlm.nih.gov/gap). The cohorts genotyped as part of these genome-wide association study efforts include the InCHIANTI (aging in the Chianti area) Study, the Piemonte and Valle d'Aosta Register for Amyotrophic Lateral Sclerosis, the National Institute of Neurological Disorders and Stroke Repository, and an ALS specialty clinic in Helsinki, Finland.

Main outcomes and measures

A linear mixed model was used to account for all known single-nucleotide polymorphisms simultaneously and to quantify the phenotypic variance present in ostensibly outbred individuals. Variance measures were used to estimate heritability.

Results

With our meta-analysis, which is based on genome-wide genotyping data, we estimated the overall heritability of ALS to be approximately 21.0% (95% CI, 17.1-24.9) (SE = 2.0%), indicating that additional genetic variation influencing risk of ALS loci remains to be identified. Furthermore, we identified 17 regions of the genome that display significantly high heritability estimates. Eleven of these regions represent novel candidate regions for ALS risk.

Conclusions and relevance

We found the heritability of ALS to be significantly higher than previously reported. We also identified multiple, novel genomic regions that we hypothesize may contain causative risk variants that influence susceptibility to ALS.",2014-09-01 +30074988,Combined bacterial and fungal intestinal microbiota analyses: Impact of storage conditions and DNA extraction protocols.,"

Background

The human intestinal microbiota contains a vast community of microorganisms increasingly studied using high-throughput DNA sequencing. Standardized protocols for storage and DNA extraction from fecal samples have been established mostly for bacterial microbiota analysis. Here, we investigated the impact of storage and DNA extraction on bacterial and fungal community structures detected concomitantly.

Methods

Fecal samples from healthy adults were stored at -80°C as such or diluted in RNAlater® and subjected to 2 extraction protocols with mechanical lysis: the Powersoil® MoBio kit or the International Human Microbiota Standard (IHMS) Protocol Q. Libraries of the 12 samples targeting the V3-V4 16S and the ITS1 regions were prepared using Metabiote® (Genoscreen) and sequenced on GS-FLX-454. Sequencing data were analysed using SHAMAN (http://shaman.pasteur.fr/). The bacterial and fungal microbiota were compared in terms of diversity and relative abundance.

Results

We obtained 171869 and 199089 quality-controlled reads for 16S and ITS, respectively. All 16S reads were assigned to 41 bacterial genera; only 52% of ITS reads were assigned to 40 fungal genera/section. Rarefaction curves were satisfactory in 3/3 and 2/3 subjects for 16S and ITS, respectively. PCoA showed important inter-individual variability of intestinal microbiota largely overweighing the effect of storage or extraction. Storage in RNAlater® impacted (downward trend) the relative abundances of 7/41 bacterial and 6/40 fungal taxa, while extraction impacted randomly 18/41 bacterial taxa and 1/40 fungal taxon.

Conclusion

Our results showed that RNAlater® moderately impacts bacterial or fungal community structures, while extraction significantly influences the bacterial composition. For combined bacterial and fungal intestinal microbiota analysis, immediate sample freezing should be preferred when feasible, but storage in RNAlater® remains an option under unfavourable conditions or for concomitant metatranscriptomic analysis; and extraction should rely on protocols validated for bacterial analysis, such as IHMS Protocol Q, and including a powerful mechanical lysis, essential for fungal extraction.",2018-08-03 +29745829,A gene profiling deconvolution approach to estimating immune cell composition from complex tissues.,"BACKGROUND:A new emerged cancer treatment utilizes intrinsic immune surveillance mechanism that is silenced by those malicious cells. Hence, studies of tumor infiltrating lymphocyte populations (TILs) are key to the success of advanced treatments. In addition to laboratory methods such as immunohistochemistry and flow cytometry, in silico gene expression deconvolution methods are available for analyses of relative proportions of immune cell types. RESULTS:Herein, we used microarray data from the public domain to profile gene expression pattern of twenty-two immune cell types. Initially, outliers were detected based on the consistency of gene profiling clustering results and the original cell phenotype notation. Subsequently, we filtered out genes that are expressed in non-hematopoietic normal tissues and cancer cells. For every pair of immune cell types, we ran t-tests for each gene, and defined differentially expressed genes (DEGs) from this comparison. Equal numbers of DEGs were then collected as candidate lists and numbers of conditions and minimal values for building signature matrixes were calculated. Finally, we used v -Support Vector Regression to construct a deconvolution model. The performance of our system was finally evaluated using blood biopsies from 20 adults, in which 9 immune cell types were identified using flow cytometry. The present computations performed better than current state-of-the-art deconvolution methods. CONCLUSIONS:Finally, we implemented the proposed method into R and tested extensibility and usability on Windows, MacOS, and Linux operating systems. The method, MySort, is wrapped as the Galaxy platform pluggable tool and usage details are available at https://testtoolshed.g2.bx.psu.edu/view/moneycat/mysort/e3afe097e80a .",2018-05-08 +22638580,AVPpred: collection and prediction of highly effective antiviral peptides.,"In the battle against viruses, antiviral peptides (AVPs) had demonstrated the immense potential. Presently, more than 15 peptide-based drugs are in various stages of clinical trials. Emerging and re-emerging viruses further emphasize the efforts to accelerate antiviral drug discovery efforts. Despite, huge importance of the field, no dedicated AVP resource is available. In the present study, we have collected 1245 peptides which were experimentally checked for antiviral activity targeting important human viruses like influenza, HIV, HCV and SARS, etc. After removing redundant peptides, 1056 peptides were divided into 951 training and 105 validation data sets. We have exploited various peptides sequence features, i.e. motifs and alignment followed by amino acid composition and physicochemical properties during 5-fold cross validation using Support Vector Machine. Physiochemical properties-based model achieved maximum 85% accuracy and 0.70 Matthew's Correlation Coefficient (MCC). Performance of this model on the experimental validation data set showed 86% accuracy and 0.71 MCC which is far better than the general antimicrobial peptides prediction methods. Therefore, AVPpred-the first web server for predicting the highly effective AVPs would certainly be helpful to researchers working on peptide-based antiviral development. The web server is freely available at http://crdd.osdd.net/servers/avppred.",2012-05-25 +26173767,"Mediterranean Founder Mutation Database (MFMD): Taking Advantage from Founder Mutations in Genetics Diagnosis, Genetic Diversity and Migration History of the Mediterranean Population.","The Mediterranean basin has been the theater of migration crossroads followed by settlement of several societies and cultures in prehistoric and historical times, with important consequences on genetic and genomic determinisms. Here, we present the Mediterranean Founder Mutation Database (MFMD), established to offer web-based access to founder mutation information in the Mediterranean population. Mutation data were collected from the literature and other online resources and systematically reviewed and assembled into this database. The information provided for each founder mutation includes DNA change, amino-acid change, mutation type and mutation effect, as well as mutation frequency and coalescence time when available. Currently, the database contains 383 founder mutations found in 210 genes related to 219 diseases. We believe that MFMD will help scientists and physicians to design more rapid and less expensive genetic diagnostic tests. Moreover, the coalescence time of founder mutations gives an overview about the migration history of the Mediterranean population. MFMD can be publicly accessed from http://mfmd.pasteur.ma.",2015-07-30 +31099769,"Malaria Surveillance - United States, 2016.","

Problem/condition

Malaria in humans is caused by intraerythrocytic protozoa of the genus Plasmodium. These parasites are transmitted by the bite of an infective female Anopheles species mosquito. The majority of malaria infections in the United States occur among persons who have traveled to regions with ongoing malaria transmission. However, malaria is occasionally acquired by persons who have not traveled out of the country through exposure to infected blood products, congenital transmission, laboratory exposure, or local mosquitoborne transmission. Malaria surveillance in the United States is conducted to provide information on its occurrence (e.g., temporal, geographic, and demographic), guide prevention and treatment recommendations for travelers and patients, and facilitate transmission control measures if locally acquired cases are identified.

Period covered

This report summarizes confirmed malaria cases in persons with onset of illness in 2016 and summarizes trends in previous years.

Description of system

Malaria cases diagnosed by blood film microscopy, polymerase chain reaction, or rapid diagnostic tests are reported to local and state health departments by health care providers or laboratory staff members. Case investigations are conducted by local and state health departments, and reports are transmitted to CDC through the National Malaria Surveillance System (NMSS), the National Notifiable Diseases Surveillance System (NNDSS), or direct CDC consultations. CDC reference laboratories provide diagnostic assistance and conduct antimalarial drug resistance marker testing on blood samples submitted by health care providers or local or state health departments. This report summarizes data from the integration of all NMSS and NNDSS cases, CDC reference laboratory reports, and CDC clinical consultations.

Results

CDC received reports of 2,078 confirmed malaria cases with onset of symptoms in 2016, including two congenital cases, three cryptic cases, and one case acquired through blood transfusion. The number of malaria cases diagnosed in the United States has been increasing since the mid-1970s. However, in 2015 a decrease occurred in the number of cases, specifically from the region of West Africa, likely due to altered travel related to the Ebola virus disease outbreak. The number of confirmed malaria cases in 2016 represents a 36% increase compared with 2015, and the 2016 total is 153 more cases than in 2011, which previously had the highest number of cases (1,925 cases). In 2016, a total of 1,729 cases originated from Africa, and 1,061 (61.4%) of these came from West Africa. P. falciparum accounted for the majority of the infections (1,419 [68.2%]), followed by P. vivax (251 [12.1%]). Fewer than 2% of patients were infected by two species (23 [1.1%]). The infecting species was not reported or was undetermined in 10.8% of cases. CDC provided diagnostic assistance for 12.1% of confirmed cases and tested 10.8% of specimens with P. falciparum infections for antimalarial resistance markers. Of the U.S. resident patients who reported reason for travel, 69.4% were travelers who were visiting friends and relatives. The proportion of U.S. residents with malaria who reported taking any chemoprophylaxis in 2016 (26.3%) was similar to that in 2015 (26.6%), and adherence was poor among those who took chemoprophylaxis. Among the 964 U.S. residents with malaria for whom information on chemoprophylaxis use and travel region were known, 94.0% of patients with malaria did not adhere to or did not take a CDC-recommended chemoprophylaxis regimen. Among 795 women with malaria, 50 were pregnant, and one had adhered to mefloquine chemoprophylaxis. Forty-one (2.0%) malaria cases occurred among U.S. military personnel in 2016, a comparable proportion to that in 2015 (23 cases [1.5%]). Among all reported cases in 2016, a total of 306 (14.7%) were classified as severe illnesses, and seven persons died. In 2016, CDC analyzed 144 P. falciparum-positive and nine P. falciparum mixed species samples for surveillance of antimalarial resistance markers (although certain loci were untestable in some samples); genetic polymorphisms associated with resistance to pyrimethamine were identified in 142 (97.9%), to sulfadoxine in 98 (70.5%), to chloroquine in 67 (44.7%), to mefloquine in six (4.3%), and to atovaquone in one (<1.0%). The completeness of key variables (e.g., species, country of acquisition, and resident status) was 79.4% in 2016 and 75.7% in 2015.

Interpretation

The number of reported malaria cases in 2016 continued a decades-long increasing trend and is the highest since 1972. The importation of malaria reflects the overall increase in global travel trends to and from areas where malaria is endemic; a transient decrease in the acquisition of cases, predominantly from West Africa, occurred in 2015. In 2016, more cases (absolute number) originated from regions of the world with widespread malaria transmission. Since the early 2000s, worldwide interventions to reduce malaria have been successful; however, progress has plateaued in recent years, the disease remains endemic in many regions, and the use of appropriate prevention measures by travelers remains inadequate.

Public health actions

The best way to prevent malaria is to take chemoprophylaxis medication during travel to a country where malaria is endemic. Malaria infections can be fatal if not diagnosed and treated promptly with antimalarial medications appropriate for the patient's age and medical history, the likely country of malaria acquisition, and previous use of antimalarial chemoprophylaxis. In 2018, two tafenoquine-based antimalarials were approved by the Food and Drug Administration (FDA) for use in the United States. Arakoda was approved for use by adults for chemoprophylaxis and is available as a weekly dosage that is convenient during travel, which might improve adherence and also can prevent relapses from P. vivax and P. ovale infections. Krintafel was approved for radical cure of P. vivax infections in those >16 years old. In April 2019, intravenous artesunate became the first-line medication for treatment of severe malaria in the United States. Because intravenous artesunate is not FDA approved, it is available from CDC under an investigational new drug protocol. Detailed recommendations for preventing malaria are available to the general public at the CDC website (https://www.cdc.gov/malaria/travelers/drugs.html). Health care providers should consult the CDC Guidelines for Treatment of Malaria in the United States and contact the CDC's Malaria Hotline for case management advice when needed. Malaria treatment recommendations are available online (https://www.cdc.gov/malaria/diagnosis_treatment) and from the Malaria Hotline (770-488-7788 or toll-free at 855-856-4713). Persons submitting malaria case reports (care providers, laboratories, and state and local public health officials) should provide complete information because incomplete reporting compromises case investigations and efforts to prevent infections and examine trends in malaria cases. Adherence to recommended malaria prevention strategies is low among U.S. travelers; reasons for nonadherence include prematurely stopping after leaving the area where malaria was endemic, forgetting to take the medication, and experiencing a side effect. Molecular surveillance of antimalarial drug resistance markers (https://www.cdc.gov/malaria/features/ars.html) enables CDC to track, guide treatment, and manage drug resistance in malaria parasites both domestically and internationally. More samples are needed to improve the completeness of antimalarial drug resistance analysis; therefore, CDC requests that blood specimens be submitted for all cases of malaria diagnosed in the United States.",2019-05-17 +29671403,KDiamend: a package for detecting key drivers in a molecular ecological network of disease.,"BACKGROUND:Microbial abundance profiles are applied widely to understand diseases from the aspect of microbial communities. By investigating the abundance associations of species or genes, we can construct molecular ecological networks (MENs). The MENs are often constructed by calculating the Pearson correlation coefficient (PCC) between genes. In this work, we also applied multimodal mutual information (MMI) to construct MENs. The members which drive the concerned MENs are referred to as key drivers. RESULTS:We proposed a novel method to detect the key drivers. First, we partitioned the MEN into subnetworks. Then we identified the most pertinent subnetworks to the disease by measuring the correlation between the abundance pattern and the delegated phenotype-the variable representing the disease phenotypes. Last, for each identified subnetwork, we detected the key driver by PageRank. We developed a package named KDiamend and applied it to the gut and oral microbial data to detect key drivers for Type 2 diabetes (T2D) and Rheumatoid Arthritis (RA). We detected six T2D-relevant subnetworks and three key drivers of them are related to the carbohydrate metabolic process. In addition, we detected nine subnetworks related to RA, a disease caused by compromised immune systems. The extracted subnetworks include InterPro matches (IPRs) concerned with immunoglobulin, Sporulation, biofilm, Flaviviruses, bacteriophage, etc., while the development of biofilms is regarded as one of the drivers of persistent infections. CONCLUSION:KDiamend is feasible to detect key drivers and offers insights to uncover the development of diseases. The package is freely available at http://www.deepomics.org/pipelines/3DCD6955FEF2E64A/ .",2018-04-11 +27837958,[The REporting of studies Conducted using Observational Routinely-collected health Data (RECORD) statement].,"Routinely collected health data, obtained for administrative and clinical purposes without specific a priori research goals, are increasingly used for research. The rapid evolution and availability of these data have revealed issues not addressed by existing reporting guidelines, such as Strengthening the Reporting of Observational Studies in Epidemiology (STROBE). The REporting of studies Conducted using Observational Routinely collected health Data (RECORD) statement was created to fill these gaps. RECORD was created as an extension to the STROBE statement to address reporting items specific to observational studies using routinely collected health data. RECORD consists of a checklist of 13 items related to the title, abstract, introduction, methods, results, and discussion section of articles, and other information required for inclusion in such research reports. This document contains the checklist as well as explanatory and elaboration information to enhance the use of the checklist. Examples of good reporting for each RECORD checklist item are also included. This document, as well as the accompanying website and message board (http://www.record-statement.org), will improve the implementation and understanding of RECORD. By implementing RECORD, authors, journals editors, and peer reviewers can enhance transparency of research reporting.",2016-09-28 +25402006,In silico prediction of physical protein interactions and characterization of interactome orphans.,"Protein-protein interactions (PPIs) are useful for understanding signaling cascades, predicting protein function, associating proteins with disease and fathoming drug mechanism of action. Currently, only ∼ 10% of human PPIs may be known, and about one-third of human proteins have no known interactions. We introduce FpClass, a data mining-based method for proteome-wide PPI prediction. At an estimated false discovery rate of 60%, we predicted 250,498 PPIs among 10,531 human proteins; 10,647 PPIs involved 1,089 proteins without known interactions. We experimentally tested 233 high- and medium-confidence predictions and validated 137 interactions, including seven novel putative interactors of the tumor suppressor p53. Compared to previous PPI prediction methods, FpClass achieved better agreement with experimentally detected PPIs. We provide an online database of annotated PPI predictions (http://ophid.utoronto.ca/fpclass/) and the prediction software (http://www.cs.utoronto.ca/~juris/data/fpclass/).",2014-11-17 +28610996,From cheminformatics to structure-based design: Web services and desktop applications based on the NAOMI library.,"Nowadays, computational approaches are an integral part of life science research. Problems related to interpretation of experimental results, data analysis, or visualization tasks highly benefit from the achievements of the digital era. Simulation methods facilitate predictions of physicochemical properties and can assist in understanding macromolecular phenomena. Here, we will give an overview of the methods developed in our group that aim at supporting researchers from all life science areas. Based on state-of-the-art approaches from structural bioinformatics and cheminformatics, we provide software covering a wide range of research questions. Our all-in-one web service platform ProteinsPlus (http://proteins.plus) offers solutions for pocket and druggability prediction, hydrogen placement, structure quality assessment, ensemble generation, protein-protein interaction classification, and 2D-interaction visualization. Additionally, we provide a software package that contains tools targeting cheminformatics problems like file format conversion, molecule data set processing, SMARTS editing, fragment space enumeration, and ligand-based virtual screening. Furthermore, it also includes structural bioinformatics solutions for inverse screening, binding site alignment, and searching interaction patterns across structure libraries. The software package is available at http://software.zbh.uni-hamburg.de.",2017-06-11 +21824513,phiGENOME: an integrative navigation throughout bacteriophage genomes.,"phiGENOME is a web-based genome browser generating dynamic and interactive graphical representation of phage genomes stored in the phiSITE, database of gene regulation in bacteriophages. phiGENOME is an integral part of the phiSITE web portal (http://www.phisite.org/phigenome) and it was optimised for visualisation of phage genomes with the emphasis on the gene regulatory elements. phiGENOME consists of three components: (i) genome map viewer built using Adobe Flash technology, providing dynamic and interactive graphical display of phage genomes; (ii) sequence browser based on precisely formatted HTML tags, providing detailed exploration of genome features on the sequence level and (iii) regulation illustrator, based on Scalable Vector Graphics (SVG) and designed for graphical representation of gene regulations. Bringing 542 complete genome sequences accompanied with their rich annotations and references, makes phiGENOME a unique information resource in the field of phage genomics.",2011-07-30 +28164798,Introduction to bifactor polytomous item response theory analysis.,"A bifactor item response theory model can be used to aid in the interpretation of the dimensionality of a multifaceted questionnaire that assumes continuous latent variables underlying the propensity to respond to items. This model can be used to describe the locations of people on a general continuous latent variable as well as on continuous orthogonal specific traits that characterize responses to groups of items. The bifactor graded response (bifac-GR) model is presented in contrast to a correlated traits (or multidimensional GR model) and unidimensional GR model. Bifac-GR model specification, assumptions, estimation, and interpretation are demonstrated with a reanalysis of data (Campbell, 2008) on the Shared Activities Questionnaire. We also show the importance of marginalizing the slopes for interpretation purposes and we extend the concept to the interpretation of the information function. To go along with the illustrative example analyses, we have made available supplementary files that include command file (syntax) examples and outputs from flexMIRT, IRTPRO, R, Mplus, and STATA. Supplementary data to this article can be found online at http://dx.doi.org/10.1016/j.jsp.2016.11.001. Data needed to reproduce analyses in this article are available as supplemental materials (online only) in the Appendix of this article.",2016-12-29 +28603764,Data on endogenous bovine ovarian follicular cells peptides and small proteins obtained through Top-down High Resolution Mass Spectrometry.,"The endogenous peptides and small proteins extracted from bovine ovarian follicular cells (oocytes, cumulus and granulosa cells) were identified by Top-down High Resolution Mass Spectrometry (TD-HR-MS/MS) in order to annotate peptido- and proteoforms detected using qualitative and quantitative profiling method based on ICM-MS (Intact Cell Matrix-Assisted Laser Desorption/Ionization Time-of-Flight Mass Spectrometry). The description and analysis of these Top-down MS data in the context of oocyte quality biomarkers research are available in the original research article of Labas et al. (2017) http://dx.doi.org/10.1016/j.jprot.2017.03.027[1]. Raw data derived from this peptidomic/proteomic analysis have been deposited to the ProteomeXchange Consortium via the PRIDE partner repository (dataset identifier PXD004892). Here, we described the inventory of all identified peptido- and proteoforms including their biochemical and structural features, and functional annotation of correspondent proteins. This peptide/protein inventory revealed that TD-HR-MS/MS was appropriate method for both global and targeted proteomic analysis of ovarian tissues, and it can be further employed as a reference for other studies on follicular cells including single oocytes.",2017-05-26 +29360320,"State Differences in the Cost of Job-Related Health Insurance, 2013","Health insurance provided by employers is the source of medical coverage for most Americans under age 65. The cost of employer-sponsored health insurance varies considerably based on the State where the employer is located and the number of persons covered by the plan. This Statistical Brief presents State variations from the national average of the cost of job-related health insurance and how these costs are shared by employers and their employees. The Brief specifically examines the average premiums and employee contributions for private sector establishments in 2013 in the 10 most populous states based on the 2010 Decennial Census. This analysis is based on the most recent data available from the Insurance Component of the Medical Expenditure Panel Survey (MEPS-IC). Estimates for all other States and the District of Columbia are available on the MEPS Web site (http://www.meps.ahrq.gov). Only those estimates with statistically significant differences from the national average using a multiple comparison procedure of estimates at the 0.05 percent significance level are noted in the text. These estimates are also identified in the tables, with those above the national average noted with two asterisks (**) and those below the national average noted with one asterisk (*).",2018-01-24 +27363592,"Variations in the Genome: The Mutation Detection 2015 Meeting on Detection, Genome Sequencing, and Interpretation.","The content of the 13th Mutation Detection meeting (Leiden, April 2015) is summarized in this report. Topics discussed at the meeting included current challenges of clinical NGS, advances in bioinformatics, data quality control, single cell analysis and RNA sequencing, among others. Social, ethical and regulatory challenges of genomic data handling and data sharing were the focus of an expert panel debate. The 14th International Symposium on Variants in the Genome will take place in Santiago de Compostela, June 5-8, 2017. http://isv.variome.org.",2016-08-23 +25982853,ProtDCal: A program to compute general-purpose-numerical descriptors for sequences and 3D-structures of proteins.,"

Background

The exponential growth of protein structural and sequence databases is enabling multifaceted approaches to understanding the long sought sequence-structure-function relationship. Advances in computation now make it possible to apply well-established data mining and pattern recognition techniques to these data to learn models that effectively relate structure and function. However, extracting meaningful numerical descriptors of protein sequence and structure is a key issue that requires an efficient and widely available solution.

Results

We here introduce ProtDCal, a new computational software suite capable of generating tens of thousands of features considering both sequence-based and 3D-structural descriptors. We demonstrate, by means of principle component analysis and Shannon entropy tests, how ProtDCal's sequence-based descriptors provide new and more relevant information not encoded by currently available servers for sequence-based protein feature generation. The wide diversity of the 3D-structure-based features generated by ProtDCal is shown to provide additional complementary information and effectively completes its general protein encoding capability. As demonstration of the utility of ProtDCal's features, prediction models of N-linked glycosylation sites are trained and evaluated. Classification performance compares favourably with that of contemporary predictors of N-linked glycosylation sites, in spite of not using domain-specific features as input information.

Conclusions

ProtDCal provides a friendly and cross-platform graphical user interface, developed in the Java programming language and is freely available at: http://bioinf.sce.carleton.ca/ProtDCal/ . ProtDCal introduces local and group-based encoding which enhances the diversity of the information captured by the computed features. Furthermore, we have shown that adding structure-based descriptors contributes non-redundant additional information to the features-based characterization of polypeptide systems. This software is intended to provide a useful tool for general-purpose encoding of protein sequences and structures for applications is protein classification, similarity analyses and function prediction.",2015-05-16 +25378307,DEEP: a general computational framework for predicting enhancers.,"Transcription regulation in multicellular eukaryotes is orchestrated by a number of DNA functional elements located at gene regulatory regions. Some regulatory regions (e.g. enhancers) are located far away from the gene they affect. Identification of distal regulatory elements is a challenge for the bioinformatics research. Although existing methodologies increased the number of computationally predicted enhancers, performance inconsistency of computational models across different cell-lines, class imbalance within the learning sets and ad hoc rules for selecting enhancer candidates for supervised learning, are some key questions that require further examination. In this study we developed DEEP, a novel ensemble prediction framework. DEEP integrates three components with diverse characteristics that streamline the analysis of enhancer's properties in a great variety of cellular conditions. In our method we train many individual classification models that we combine to classify DNA regions as enhancers or non-enhancers. DEEP uses features derived from histone modification marks or attributes coming from sequence characteristics. Experimental results indicate that DEEP performs better than four state-of-the-art methods on the ENCODE data. We report the first computational enhancer prediction results on FANTOM5 data where DEEP achieves 90.2% accuracy and 90% geometric mean (GM) of specificity and sensitivity across 36 different tissues. We further present results derived using in vivo-derived enhancer data from VISTA database. DEEP-VISTA, when tested on an independent test set, achieved GM of 80.1% and accuracy of 89.64%. DEEP framework is publicly available at http://cbrc.kaust.edu.sa/deep/.",2014-11-05 +25976557,Manual perineal support at the time of childbirth: a systematic review and meta-analysis.,"

Background

Genital tract trauma is common with vaginal births and is associated with significant morbidity, particularly with obstetric anal sphincter injuries (OASIS). Debate continues regarding the effectiveness of perineal support during childbirth in reducing the risk of trauma.

Objectives

This review aimed to assess the effect of routine 'hands on'/manual perineal support (MPS) during childbirth, versus ad hoc/no perineal support ('hands off/poised'), on the risk and degree of perineal trauma.

Search strategy

This review is registered on PROSPERO (http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42014007058). We searched the CENTRAL, Embase, Medline, CINAHL, and OVIDs midwifery and infant care databases (from inception to December 2014).

Selection criteria

Published randomised controlled trials (RCTs) and non-randomised studies (NRSs) evaluating any 'hands on' perineal support technique during childbirth.

Data collection and analysis

Two reviewers independently assessed trials for inclusion, data extraction, and methodological quality. Discrepancies were resolved by discussion with a third reviewer.

Main results

We included five RCTs and seven NRSs in the review. Meta-analysis of RCTs did not demonstrate a statistically significant protective effect of MPS on the risk of OASIS (three studies, 6647 women; relative risk, RR 1.03; 95% confidence interval, 95% CI 0.32-3.36; statistical test for heterogeneity I(2) = 71%). Meta-analysis of NRSs showed a significant reduction in the risk of OASIS with MPS (three studies, 74,744 women; RR 0.45; 95% CI 0.40-0.50; I(2) = 32%).

Conclusion

Current evidence is insufficient to drive change in practice. An adequately powered randomised trial with an efficient design to evaluate the complex interventions adopted as part of MPS policies, ensuring controlled childbirth, is urgently needed.",2015-05-15 +24347543,Spatial dilemmas of diffusible public goods.,"The emergence of cooperation is a central question in evolutionary biology. Microorganisms often cooperate by producing a chemical resource (a public good) that benefits other cells. The sharing of public goods depends on their diffusion through space. Previous theory suggests that spatial structure can promote evolution of cooperation, but the diffusion of public goods introduces new phenomena that must be modeled explicitly. We develop an approach where colony geometry and public good diffusion are described by graphs. We find that the success of cooperation depends on a simple relation between the benefits and costs of the public good, the amount retained by a producer, and the average amount retained by each of the producer's neighbors. These quantities are derived as analytic functions of the graph topology and diffusion rate. In general, cooperation is favored for small diffusion rates, low colony dimensionality, and small rates of decay of the public good. DOI: http://dx.doi.org/10.7554/eLife.01169.001.",2013-12-17 +29982347,Are serum concentrations of vitamin B-12 causally related to cardiometabolic risk factors and disease? A Mendelian randomization study.,"

Background

Several observational studies have shown that low serum vitamin B-12 is associated with increased body mass index (BMI) and adverse cardiometabolic outcomes. However, it is unclear if these associations reflect a causal effect of vitamin B-12 on cardiometabolic risk factors and diseases, latent confounding, or reverse causality.

Objectives

The aims of this study were to investigate 1) the possible causal relation between vitamin B-12 and indicators of body fat, lipid, and glucose variables; type 2 diabetes (T2D); and cardiovascular disease by using a 2-sample Mendelian randomization (MR) method and 2) the possible pleiotropic role of fucosyltransferase 2 (FUT2).

Design

We selected 11 single nucleotide polymorphisms (SNPs) robustly associated with serum concentrations of vitamin B-12 in a previous genomewide association study (GWAS) in 45,576 individuals. We performed 2-sample MR analyses of the relation between vitamin B-12 and cardiometabolic risk factors and diseases with the use of publicly available GWAS summary statistics for 15 outcomes in ≤339,224 individuals. The robustness of results was tested with sensitivity analyses by using MR Egger regression and weighted-median estimation, and by performing additional analyses excluding a variant in the FUT2 gene, which may be pleiotropic.

Results

We found a suggestive causal relation between vitamin B-12 and fasting glucose and β cell function [homeostatic model assessment (HOMA) of β cell function (HOMA-B)]. However, we found no evidence that serum concentrations of vitamin B-12 were causally related to BMI, waist-to-hip ratio, plasma leptin, body fat, fasting insulin, insulin resistance (from HOMA of insulin resistance), glycated hemoglobin, triglycerides, T2D, coronary artery disease, or HDL, LDL, or total cholesterol.

Conclusions

We found no evidence that serum concentrations of vitamin B-12 are causally related to body weight or the majority of cardiometabolic outcomes investigated. However, vitamin B-12 may have a causal effect on fasting glucose and HOMA-B, although these results will require replication in large independent data sets. This trialwas registered at http://www.isrctn.com/ISRCTN47414943 as ISRCTN47414943.",2018-08-01 +29947812,"Serum Zinc Concentrations in the US Population Are Related to Sex, Age, and Time of Blood Draw but Not Dietary or Supplemental Zinc.","

Background

Serum zinc concentration is used to assess the zinc status of populations. Cutoffs for serum zinc were developed on the basis of data from the second NHANES (1976-1980).

Objective

The objective of this study was to evaluate serum zinc concentrations in the US population and to determine factors affecting serum zinc with the use of NHANES 2011-2014.

Methods

Serum zinc was determined in males and females aged ≥6 y with the use of NHANES 2011-2014 (n = 4347). Dietary zinc intake was determined, and factors affecting serum zinc were identified with the use of regression models adjusting for sex, age, fasting status, and time of blood draw. ORs were calculated to identify factors associated with the risk of being below the serum zinc cutoff, and the prevalence of low serum zinc in the US was calculated. P < 0.01 was considered significant.

Results

Mean ± SE serum zinc concentrations in males and females were 84.9 ± 0.8 and 80.6 ± 0.6 μg/dL, respectively (P < 0.0001). Regression models with serum zinc as the dependent variable indicated that afternoon and evening blood draws (β = -9.7 and -15.3; P < 0.0001) were negatively associated with serum zinc concentrations and serum albumin (β = 16.1; P < 0.0001) and hemoglobin (β = 1.0; P = 0.0048) were positively associated with serum zinc concentrations. Hypoalbuminemia (OR = 11.2; 99% CI: 3.4, 37.3), anemia in females (OR: 3.4; 99% CI: 1.7, 6.9), and pregnancy (OR: 9.6; 99% CI: 2.9, 31.9) increased the odds of being below the serum zinc cutoff (P < 0.0001 for all). Zinc from diet or supplements did not affect serum zinc (P > 0.01). Approximately 3.8% of children (<10 y), 8.6% of males (≥10 y), and 8.2% of females (≥10 y) were below the serum zinc cutoff.

Conclusions

Factors such as sex, age, and time of blood draw should be considered when using serum zinc concentration to determine the zinc status of a population. Caution is advised when interpreting serum zinc concentration in populations with a high prevalence of hypoalbuminemia or anemia. This trial was registered at http://www.isrctn.com as ISRCTN96013840.",2018-08-01 +26598385,Effectiveness of Practices To Increase Timeliness of Providing Targeted Therapy for Inpatients with Bloodstream Infections: a Laboratory Medicine Best Practices Systematic Review and Meta-analysis.,"

Background

Bloodstream infection (BSI) is a major cause of morbidity and mortality throughout the world. Rapid identification of bloodstream pathogens is a laboratory practice that supports strategies for rapid transition to direct targeted therapy by providing for timely and effective patient care. In fact, the more rapidly that appropriate antimicrobials are prescribed, the lower the mortality for patients with sepsis. Rapid identification methods may have multiple positive impacts on patient outcomes, including reductions in mortality, morbidity, hospital lengths of stay, and antibiotic use. In addition, the strategy can reduce the cost of care for patients with BSIs.

Objectives

The purpose of this review is to evaluate the evidence for the effectiveness of three rapid diagnostic practices in decreasing the time to targeted therapy for hospitalized patients with BSIs. The review was performed by applying the Centers for Disease Control and Prevention's (CDC's) Laboratory Medicine Best Practices Initiative (LMBP) systematic review methods for quality improvement (QI) practices and translating the results into evidence-based guidance (R. H. Christenson et al., Clin Chem 57:816-825, 2011, http://dx.doi.org/10.1373/clinchem.2010.157131).

Search strategy

A comprehensive literature search was conducted to identify studies with measurable outcomes. A search of three electronic bibliographic databases (PubMed, Embase, and CINAHL), databases containing ""gray"" literature (unpublished academic, government, or industry evidence not governed by commercial publishing) (CIHI, NIHR, SIGN, and other databases), and the Cochrane database for English-language articles published between 1990 and 2011 was conducted in July 2011.

Dates of search

The dates of our search were from 1990 to July 2011.

Selection criteria

Animal studies and non-English publications were excluded. The search contained the following medical subject headings: bacteremia; bloodstream infection; time factors; health care costs; length of stay; morbidity; mortality; antimicrobial therapy; rapid molecular techniques, polymerase chain reaction (PCR); in situ hybridization, fluorescence; treatment outcome; drug therapy; patient care team; pharmacy service, hospital; hospital information systems; Gram stain; pharmacy service; and spectrometry, mass, matrix-assisted laser desorption-ionization. Phenotypic as well as the following key words were searched: targeted therapy; rapid identification; rapid; Gram positive; Gram negative; reduce(ed); cost(s); pneumoslide; PBP2; tube coagulase; matrix-assisted laser desorption/ionization time of flight; MALDI TOF; blood culture; EMR; electronic reporting; call to provider; collaboration; pharmacy; laboratory; bacteria; yeast; ICU; and others. In addition to the electronic search being performed, a request for unpublished quality improvement data was made to the clinical laboratory community.

Main results

Rapid molecular testing with direct communication significantly improves timeliness compared to standard testing. Rapid phenotypic techniques with direct communication likely improve the timeliness of targeted therapy. Studies show a significant and homogeneous reduction in mortality associated with rapid molecular testing combined with direct communication.

Authors' conclusions

No recommendation is made for or against the use of the three assessed practices of this review due to insufficient evidence. The overall strength of evidence is suggestive; the data suggest that each of these three practices has the potential to improve the time required to initiate targeted therapy and possibly improve other patient outcomes, such as mortality. The meta-analysis results suggest that the implementation of any of the three practices may be more effective at increasing timeliness to targeted therapy than routine microbiology techniques for identification of the microorganisms causing BSIs. Based on the included studies, results for all three practices appear applicable across multiple microorganisms, including methicillin-resistant Staphylococcus aureus (MRSA), methicillin-sensitive S. aureus (MSSA), Candida species, and Enterococcus species.",2016-01-01 +22582382,Draft genome sequences of the diarrheagenic Escherichia coli collection.,"We report the draft genome sequences of the collection referred to as the Escherichia coli DECA collection, which was assembled to contain representative isolates of the 15 most common diarrheagenic clones in humans (http://shigatox.net/new/). These genomes represent a valuable resource to the community of researchers who examine these enteric pathogens.",2012-06-01 +26481353,ECMDB 2.0: A richer resource for understanding the biochemistry of E. coli.,"ECMDB or the Escherichia coli Metabolome Database (http://www.ecmdb.ca) is a comprehensive database containing detailed information about the genome and metabolome of E. coli (K-12). First released in 2012, the ECMDB has undergone substantial expansion and many modifications over the past 4 years. This manuscript describes the most recent version of ECMDB (ECMDB 2.0). In particular, it provides a comprehensive update of the database that was previously described in the 2013 NAR Database Issue and details many of the additions and improvements made to the ECMDB over that time. Some of the most important or significant enhancements include a 13-fold increase in the number of metabolic pathway diagrams (from 125 to 1650), a 3-fold increase in the number of compounds linked to pathways (from 1058 to 3280), the addition of dozens of operon/metabolite signalling pathways, a 44% increase in the number of compounds in the database (from 2610 to 3760), a 7-fold increase in the number of compounds with NMR or MS spectra (from 412 to 3261) and a massive increase in the number of external links to other E. coli or chemical resources. These additions, along with many other enhancements aimed at improving the ease or speed of querying, searching and viewing the data within ECMDB should greatly facilitate the understanding of not only the metabolism of E. coli, but also allow the in-depth exploration of its extensive metabolic networks, its many signalling pathways and its essential biochemistry.",2015-10-19 +28087422,Nano-QSAR in cell biology: Model of cell viability as a mathematical function of available eclectic data.,"The prediction of biochemical endpoints is an important task of the modern medicinal chemistry, cell biology, and nanotechnology. Simplified molecular input-line entry system (SMILES) is a tool for representation of the molecular structure. In particular, SMILES can be used to build up the quantitative structure - property/activity relationships (QSPRs/QSARs). The QSPR/QSAR is a tool to predict an endpoint for a new substance, which has not been examined in experiment. Quasi-SMILES are representation of eclectic data related to an endpoint. In contrast to traditional SMILES, which are representation of the molecular structure, the quasi-SMILES are representation of conditions (in principle, the molecular structure also can be taken into account in quasi-SMILES). In this work, the quasi-SMILES were used to build up model for cell viability under impact of the metal-oxides nanoparticles by means of the CORAL software (http://www.insilico.eu/coral). The eclectic data for the quasi-SMILES are (i) molecular structure of metals-oxides; (ii) concentration of the nanoparticles; and (iii) the size of nanoparticles. The significance of different eclectic facts has been estimated. Mechanistic interpretation and the domain of applicability for the model are suggested. The statistical quality of the models is satisfactory for three different random distribution of available data into the training (sub-training and calibration) and the validation sets.",2017-01-11 +28774981,Draft Whole-Genome Sequences for Two Pigmentiphaga Isolates Recovered from Human Clinical Materials. ,"Features from two Pigmentiphaga isolates referred to Canada's National Microbiology Laboratory from human clinical materials were described previously (N. Bridger, S. Drews, T. Burdz, D. Wiebe, A. L. Pacheco, B. Ng, and K. Bernard, J Med Microbiol 62:708-711, 2013, https://doi.org/10.1099/jmm.0.051615-0). Whole-genome sequencing was performed on strains NML030171 and NML080357; the sequences were found to have 5.86 and 5.73 Mb of clean data and G+C contents of 67.5 and 66.74 mol%, respectively.",2017-08-03 +25038066,In-depth characterization of the cerebrospinal fluid (CSF) proteome displayed through the CSF proteome resource (CSF-PR).,"In this study, the human cerebrospinal fluid (CSF) proteome was mapped using three different strategies prior to Orbitrap LC-MS/MS analysis: SDS-PAGE and mixed mode reversed phase-anion exchange for mapping the global CSF proteome, and hydrazide-based glycopeptide capture for mapping glycopeptides. A maximal protein set of 3081 proteins (28,811 peptide sequences) was identified, of which 520 were identified as glycoproteins from the glycopeptide enrichment strategy, including 1121 glycopeptides and their glycosylation sites. To our knowledge, this is the largest number of identified proteins and glycopeptides reported for CSF, including 417 glycosylation sites not previously reported. From parallel plasma samples, we identified 1050 proteins (9739 peptide sequences). An overlap of 877 proteins was found between the two body fluids, whereas 2204 proteins were identified only in CSF and 173 only in plasma. All mapping results are freely available via the new CSF Proteome Resource (http://probe.uib.no/csf-pr), which can be used to navigate the CSF proteome and help guide the selection of signature peptides in targeted quantitative proteomics.",2014-07-18 +27153643,Sparse group factor analysis for biclustering of multiple data sources.,"

Motivation

Modelling methods that find structure in data are necessary with the current large volumes of genomic data, and there have been various efforts to find subsets of genes exhibiting consistent patterns over subsets of treatments. These biclustering techniques have focused on one data source, often gene expression data. We present a Bayesian approach for joint biclustering of multiple data sources, extending a recent method Group Factor Analysis to have a biclustering interpretation with additional sparsity assumptions. The resulting method enables data-driven detection of linear structure present in parts of the data sources.

Results

Our simulation studies show that the proposed method reliably infers biclusters from heterogeneous data sources. We tested the method on data from the NCI-DREAM drug sensitivity prediction challenge, resulting in an excellent prediction accuracy. Moreover, the predictions are based on several biclusters which provide insight into the data sources, in this case on gene expression, DNA methylation, protein abundance, exome sequence, functional connectivity fingerprints and drug sensitivity.

Availability and implementation

http://research.cs.aalto.fi/pml/software/GFAsparse/

Contacts

: kerstin.bunte@googlemail.com or samuel.kaski@aalto.fi.",2016-04-19 +26217350,Chernobyl seed project. Advances in the identification of differentially abundant proteins in a radio-contaminated environment.,"Plants have the ability to grow and successfully reproduce in radio-contaminated environments, which has been highlighted by nuclear accidents at Chernobyl (1986) and Fukushima (2011). The main aim of this article is to summarize the advances of the Chernobyl seed project which has the purpose to provide proteomic characterization of plants grown in the Chernobyl area. We present a summary of comparative proteomic studies on soybean and flax seeds harvested from radio-contaminated Chernobyl areas during two successive generations. Using experimental design developed for radio-contaminated areas, altered abundances of glycine betaine, seed storage proteins, and proteins associated with carbon assimilation into fatty acids were detected. Similar studies in Fukushima radio-contaminated areas might complement these data. The results from these Chernobyl experiments can be viewed in a user-friendly format at a dedicated web-based database freely available at http://www.chernobylproteomics.sav.sk.",2015-07-06 +22451271,1001 Proteomes: a functional proteomics portal for the analysis of Arabidopsis thaliana accessions.,"

Motivation

The sequencing of over a thousand natural strains of the model plant Arabidopsis thaliana is producing unparalleled information at the genetic level for plant researchers. To enable the rapid exploitation of these data for functional proteomics studies, we have created a resource for the visualization of protein information and proteomic datasets for sequenced natural strains of A. thaliana.

Results

The 1001 Proteomes portal can be used to visualize amino acid substitutions or non-synonymous single-nucleotide polymorphisms in individual proteins of A. thaliana based on the reference genome Col-0. We have used the available processed sequence information to analyze the conservation of known residues subject to protein phosphorylation among these natural strains. The substitution of amino acids in A. thaliana natural strains is heavily constrained and is likely a result of the conservation of functional attributes within proteins. At a practical level, we demonstrate that this information can be used to clarify ambiguously defined phosphorylation sites from phosphoproteomic studies. Protein sets of available natural variants are available for download to enable proteomic studies on these accessions. Together this information can be used to uncover the possible roles of specific amino acids in determining the structure and function of proteins in the model plant A. thaliana. An online portal to enable the community to exploit these data can be accessed at http://1001proteomes.masc-proteomics.org/",2012-03-25 +25592592,Refining literature curated protein interactions using expert opinions.,"The availability of high-quality physical interaction datasets is a prerequisite for system-level analysis of interactomes and supervised models to predict protein-protein interactions (PPIs). One source is literature-curated PPI databases in which pairwise associations of proteins published in the scientific literature are deposited. However, PPIs may not be clearly labelled as physical interactions affecting the quality of the entire dataset. In order to obtain a high-quality gold standard dataset for PPIs between human immunodeficiency virus (HIV-1) and its human host, we adopted a crowd-sourcing approach. We collected expert opinions and utilized an expectation-maximization based approach to estimate expert labeling quality. These estimates are used to infer the probability of a reported PPI actually being a direct physical interaction given the set of expert opinions. The effectiveness of our approach is demonstrated through synthetic data experiments and a high quality physical interaction network between HIV and human proteins is obtained. Since many literature-curated databases suffer from similar challenges, the framework described herein could be utilized in refining other databases. The curated data is available at http://www.cs.bilkent.edu.tr/~oznur.tastan/supp/psb2015/.",2015-01-01 +25196204,Vanno: a visualization-aided variant annotation tool.,"Next-generation sequencing (NGS) technologies have revolutionized the field of genetics and are trending toward clinical diagnostics. Exome and targeted sequencing in a disease context represent a major NGS clinical application, considering its utility and cost-effectiveness. With the ongoing discovery of disease-associated genes, various gene panels have been launched for both basic research and diagnostic tests. However, the fundamental inconsistencies among the diverse annotation sources, software packages, and data formats have complicated the subsequent analysis. To manage disease-associated NGS data, we developed Vanno, a Web-based application for in-depth analysis and rapid evaluation of disease-causative genome sequence alterations. Vanno integrates information from biomedical databases, functional predictions from available evaluation models, and mutation landscapes from TCGA cancer types. A highly integrated framework that incorporates filtering, sorting, clustering, and visual analytic modules is provided to facilitate exploration of oncogenomics datasets at different levels, such as gene, variant, protein domain, or three-dimensional structure. Such design is crucial for the extraction of knowledge from sequence alterations and translating biological insights into clinical applications. Taken together, Vanno supports almost all disease-associated gene tests and exome sequencing panels designed for NGS, providing a complete solution for targeted and exome sequencing analysis. Vanno is freely available at http://cgts.cgu.edu.tw/vanno.",2015-02-01 +28379994,"GOTHiC, a probabilistic model to resolve complex biases and to identify real interactions in Hi-C data.","Hi-C is one of the main methods for investigating spatial co-localisation of DNA in the nucleus. However, the raw sequencing data obtained from Hi-C experiments suffer from large biases and spurious contacts, making it difficult to identify true interactions. Existing methods use complex models to account for biases and do not provide a significance threshold for detecting interactions. Here we introduce a simple binomial probabilistic model that resolves complex biases and distinguishes between true and false interactions. The model corrects biases of known and unknown origin and yields a p-value for each interaction, providing a reliable threshold based on significance. We demonstrate this experimentally by testing the method against a random ligation dataset. Our method outperforms previous methods and provides a statistical framework for further data analysis, such as comparisons of Hi-C interactions between different conditions. GOTHiC is available as a BioConductor package (http://www.bioconductor.org/packages/release/bioc/html/GOTHiC.html).",2017-04-05 +28130237,Identification of protein complexes by integrating multiple alignment of protein interaction networks.,"

Motivation

Protein complexes are one of the keys to studying the behavior of a cell system. Many biological functions are carried out by protein complexes. During the past decade, the main strategy used to identify protein complexes from high-throughput network data has been to extract near-cliques or highly dense subgraphs from a single protein-protein interaction (PPI) network. Although experimental PPI data have increased significantly over recent years, most PPI networks still have many false positive interactions and false negative edge loss due to the limitations of high-throughput experiments. In particular, the false negative errors restrict the search space of such conventional protein complex identification approaches. Thus, it has become one of the most challenging tasks in systems biology to automatically identify protein complexes.

Results

In this study, we propose a new algorithm, NEOComplex ( NE CC- and O rtholog-based Complex identification by multiple network alignment), which integrates functional orthology information that can be obtained from different types of multiple network alignment (MNA) approaches to expand the search space of protein complex detection. As part of our approach, we also define a new edge clustering coefficient (NECC) to assign weights to interaction edges in PPI networks so that protein complexes can be identified more accurately. The NECC is based on the intuition that there is functional information captured in the common neighbors of the common neighbors as well. Our results show that our algorithm outperforms well-known protein complex identification tools in a balance between precision and recall on three eukaryotic species: human, yeast, and fly. As a result of MNAs of the species, the proposed approach can tolerate edge loss in PPI networks and even discover sparse protein complexes which have traditionally been a challenge to predict.

Availability and implementation

http://acolab.ie.nthu.edu.tw/bionetwork/NEOComplex.

Contact

bab@csail.mit.edu or csliao@ie.nthu.edu.tw.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +29270449,Draft genomes of Enterococcus faecium strains isolated from human feces before and after eradication therapy against Helicobacter pylori.,"The abundance of Enterococci in the human intestinal microbiota environment is usually < 0.1% of the total bacterial fraction. The multiple resistance to antibiotics of the opportunistic Enterococcus spp. is alarming for the world medical community because of their high prevalence among clinically significant strains of microorganisms. Enterococci are able to collect different mobile genetic elements and transmit resistance to antibiotics to wide range of Gram-positive and Gram-negative species of microorganisms, including the transmission of vancomycin resistance to methicillin-resistant strains of Staphylococcus aureus. The number of infections caused by antibiotics resistant strains of Enterococcus spp. is increasing. Here we present a draft genomes of Enterococcus faecium strains. These strains were isolated from human feces before and after (1 month) Helicobacter pylori eradication therapy. The samples were subject to whole-genome sequencing using Illumina HiSeq. 2500 platform. The data is available at NCBI https://www.ncbi.nlm.nih.gov/bioproject/PRJNA412824.",2017-11-27 +26955064,An Informatics Approach to Evaluating Combined Chemical Exposures from Consumer Products: A Case Study of Asthma-Associated Chemicals and Potential Endocrine Disruptors.,"

Background

Simultaneous or sequential exposure to multiple environmental stressors can affect chemical toxicity. Cumulative risk assessments consider multiple stressors but it is impractical to test every chemical combination to which people are exposed. New methods are needed to prioritize chemical combinations based on their prevalence and possible health impacts.

Objectives

We introduce an informatics approach that uses publicly available data to identify chemicals that co-occur in consumer products, which account for a significant proportion of overall chemical load.

Methods

Fifty-five asthma-associated and endocrine disrupting chemicals (target chemicals) were selected. A database of 38,975 distinct consumer products and 32,231 distinct ingredient names was created from online sources, and PubChem and the Unified Medical Language System were used to resolve synonymous ingredient names. Synonymous ingredient names are different names for the same chemical (e.g., vitamin E and tocopherol).

Results

Nearly one-third of the products (11,688 products, 30%) contained ≥ 1 target chemical and 5,229 products (13%) contained > 1. Of the 55 target chemicals, 31 (56%) appear in ≥ 1 product and 19 (35%) appear under more than one name. The most frequent three-way chemical combination (2-phenoxyethanol, methyl paraben, and ethyl paraben) appears in 1,059 products. Further work is needed to assess combined chemical exposures related to the use of multiple products.

Conclusions

The informatics approach increased the number of products considered in a traditional analysis by two orders of magnitude, but missing/incomplete product labels can limit the effectiveness of this approach. Such an approach must resolve synonymy to ensure that chemicals of interest are not missed. Commonly occurring chemical combinations can be used to prioritize cumulative toxicology risk assessments.

Citation

Gabb HA, Blake C. 2016. An informatics approach to evaluating combined chemical exposures from consumer products: a case study of asthma-associated chemicals and potential endocrine disruptors. Environ Health Perspect 124:1155-1165; http://dx.doi.org/10.1289/ehp.1510529.",2016-03-08 +29631306,A Comparison of GFR Estimation Formulae in Pediatric Oncology.,"

Background

Application of potentially nephrotoxic chemotherapy requires continuous monitoring of renal function for toxicity and dosing. Novel pediatric glomerular filtration rate (GFR) estimating equations including cystatin C have been proposed to enhance the reliability of GFR calculation.

Materials and methods

We examined a pediatric oncologic data set with a total of 363 GFR measurements. An analysis of distribution characteristics and comparison of medians was performed to compare creatinine and cystatin C-based GFR estimating formulae. Furthermore, we investigated the clinical impact of different equations in regard to therapeutic consequences.

Results

Significant differences in estimated GFR values were calculated depending on the applied formula (range of median GFR from 94.8 to 180.9 mL/min per 1.73 m2) which may result in different therapeutic consequences for the use of potentially nephrotoxic chemotherapeutic agents. Significant correlation for all examined formulae was identified, however there were large fluctuations among the correlation coefficients ranging from 0.254 to 1.0.

Conclusion

This study compares proposed pediatric GFR estimating equations in a clinical setting. It underlines the current limitations and difficulties of GFR estimation including potential dosing errors. Cystitis C-based equations can be used as alternatives to creatinine-based estimations when the appropriate laboratory method has been applied. A comparative calculator for pediatric GFR estimating equations along with background information is provided at http://gfr.pedz.de and may support clinical decision-making.",2018-04-09 +29844118,Integrative Modeling Identifies Key Determinants of Inhibitor Sensitivity in Breast Cancer Cell Lines.,"Cancer cell lines differ greatly in their sensitivity to anticancer drugs as a result of different oncogenic drivers and drug resistance mechanisms operating in each cell line. Although many of these mechanisms have been discovered, it remains a challenge to understand how they interact to render an individual cell line sensitive or resistant to a particular drug. To better understand this variability, we profiled a panel of 30 breast cancer cell lines in the absence of drugs for their mutations, copy number aberrations, mRNA, protein expression and protein phosphorylation, and for response to seven different kinase inhibitors. We then constructed a knowledge-based, Bayesian computational model that integrates these data types and estimates the relative contribution of various drug sensitivity mechanisms. The resulting model of regulatory signaling explained the majority of the variability observed in drug response. The model also identified cell lines with an unexplained response, and for these we searched for novel explanatory factors. Among others, we found that 4E-BP1 protein expression, and not just the extent of phosphorylation, was a determinant of mTOR inhibitor sensitivity. We validated this finding experimentally and found that overexpression of 4E-BP1 in cell lines that normally possess low levels of this protein is sufficient to increase mTOR inhibitor sensitivity. Taken together, our work demonstrates that combining experimental characterization with integrative modeling can be used to systematically test and extend our understanding of the variability in anticancer drug response.Significance: By estimating how different oncogenic mutations and drug resistance mechanisms affect the response of cancer cells to kinase inhibitors, we can better understand and ultimately predict response to these anticancer drugs.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/15/4396/F1.large.jpg Cancer Res; 78(15); 4396-410. ©2018 AACR.",2018-05-29 +30073952,Fine Particulate Air Pollution and Adverse Birth Outcomes: Effect Modification by Regional Nonvolatile Oxidative Potential.,"

Background

Prenatal exposure to fine particulate matter air pollution with aerodynamic diameter ≤2.5 μm (PM2.5) has been associated with preterm delivery and low birth weight (LBW), but few studies have examined possible effect modification by oxidative potential.

Objectives

The aim of this study was to evaluate if regional differences in the oxidative potential of PM2.5 modify the relationship between PM2.5 and adverse birth outcomes.

Methods

A retrospective cohort study was conducted using 196,171 singleton births that occurred in 31 cities in the province of Ontario, Canada, from 2006 to 2012. Daily air pollution data were collected from ground monitors, and city-level PM2.5 oxidative potential was measured. We used random-effects meta-analysis to combine the estimates of effect from regression models across cities on preterm birth, term LBW, and term birth weight and used meta-regression to evaluate the modifying effect of PM2.5 oxidative potential.

Results

An interquartile increase (2.6 μg/m3) in first-trimester PM2.5 was positively associated with term LBW among women in the highest quartile of glutathione (GSH)-related oxidative potential [odds ratio (OR)=1.28; 95% confidence interval (CI): 1.10, 1.48], but not the lowest quartile (OR=0.99; 95% CI: 0.87, 1.14; p-interaction=0.03). PM2.5 on the day of delivery also was associated with preterm birth among women in the highest quartile of GSH-related oxidative potential [hazard ratio (HR)=1.02; 95% CI: 1.01, 1.04], but not the lowest quartile [HR=0.97; 95% CI: 0.95, 1.00; p-interaction=0.04]. Between-city differences in ascorbate (AA)-related oxidative potential did not significantly modify associations with PM2.5.

Conclusions

Between-city differences in GSH-related oxidative potential may modify the impact of PM2.5 on the risk of term LBW and preterm birth. https://doi.org/10.1289/EHP2535.",2018-07-31 +24867943,CWig: compressed representation of Wiggle/BedGraph format.,"

Motivation

BigWig, a format to represent read density data, is one of the most popular data types. They can represent the peak intensity in ChIP-seq, the transcript expression in RNA-seq, the copy number variation in whole genome sequencing, etc. UCSC Encode project uses the bigWig format heavily for storage and visualization. Of 5.2 TB Encode hg19 database, 1.6 TB (31% of the total space) is used to store bigWig files. BigWig format not only saves a lot of space but also supports fast queries that are crucial for interactive analysis and browsing. In our benchmark, bigWig often has similar size to the gzipped raw data, while is still able to support ∼ 5000 random queries per second.

Results

Although bigWig is good enough at the moment, both storage space and query time are expected to become limited when sequencing gets cheaper. This article describes a new method to store density data named CWig. The format uses on average one-third of the size of existing bigWig files and improves random query speed up to 100 times.

Availability and implementation

http://genome.ddns.comp.nus.edu.sg/∼cwig.",2014-05-27 +25392411,PyIgClassify: a database of antibody CDR structural classifications.,"Classification of the structures of the complementarity determining regions (CDRs) of antibodies is critically important for antibody structure prediction and computational design. We have previously performed a clustering of antibody CDR conformations and defined a systematic nomenclature consisting of the CDR, length and an integer starting from the largest to the smallest cluster in the data set (e.g. L1-11-1). We present PyIgClassify (for Python-based immunoglobulin classification; available at http://dunbrack2.fccc.edu/pyigclassify/), a database and web server that provides access to assignments of all CDR structures in the PDB to our classification system. The database includes assignments to the IMGT germline V regions for heavy and light chains for several species. For humanized antibodies, the assignment of the frameworks is to human germlines and the CDRs to the germlines of mice or other species sources. The database can be searched by PDB entry, cluster identifier and IMGT germline group (e.g. human IGHV1). The entire database is downloadable so that users may filter the data as needed for antibody structure analysis, prediction and design.",2014-11-11 +30581194,Web-based applications to simulate drinking water inorganic chloramine chemistry.,"Two web-based applications (WBAs) relevant to drinking water practice are presented to simulate (1) inorganic chloramine formation and stability, including an example inorganic chloramine demand reaction for organic matter and (2) breakpoint curves. The model underlying both WBAs is a well-established inorganic chloramine formation and decay model. The WBAs were developed to be freely accessible over the Internet as web pages (https://usepaord.shinyapps.io/Unified-Combo/ and https://usepaord.shinyapps.io/Breakpoint-Curve/), providing drinking water practitioners (e.g., operators, regulators, engineers, professors, and students) learning tools to explore inorganic chloramine chemistry in an interactive manner without requiring proprietary software or user modeling expertise. The WBAs allow the user to specify two side-by-side simulations, providing a direct comparison of impacts associated with changing simulation conditions (e.g., free chlorine, free ammonia, and total organic carbon concentrations; pH; total alkalinity; and temperature). Once completed, the user may download simulation data to use offline. The WBAs' implementation, validation, and example simulations are described.",2018-01-01 +29695676,[Notable Adverse Events Associated with Concomitant Use of Health Foods and Drugs Derived from the Analysis of HFNet Data on the Safety and Effectiveness of Health Foods].,"Health foods are commonly consumed at their own discretion by patients with various diseases who are also being treated with conventional drugs. Both health foods and drugs are diverse, and enormous numbers of possible combinations exist, so that it is very difficult to identify adverse events that may occur due to their interactions. Here, we analyzed the characteristics of adverse events related to the concomitant use of health foods and drugs using data from the ""Information system on safety and effectiveness for health foods (HFNet)"" website (https://hfnet.nibiohn.go.jp/) compiled by the Food Function and Labeling Department of the National Institute of Health and Nutrition of Japan. We identified 64 reports and 71 patients, and characterized them according to symptom severity and drug classification. The analysis revealed that symptoms of liver dysfunction were mainly reported in patients receiving high-risk drugs, such as antiepileptic, antineoplastic, antiarrhythmic, and antithrombotic drugs, concomitantly with health foods or drugs. However, journal articles describing health food and drug interactions generally did not provide sufficient information about the ingredients of the health foods.",2018-01-01 +23029291,ThioFinder: a web-based tool for the identification of thiopeptide gene clusters in DNA sequences.,"Thiopeptides are a growing class of sulfur-rich, highly modified heterocyclic peptides that are mainly active against Gram-positive bacteria including various drug-resistant pathogens. Recent studies also reveal that many thiopeptides inhibit the proliferation of human cancer cells, further expanding their application potentials for clinical use. Thiopeptide biosynthesis shares a common paradigm, featuring a ribosomally synthesized precursor peptide and conserved posttranslational modifications, to afford a characteristic core system, but differs in tailoring to furnish individual members. Identification of new thiopeptide gene clusters, by taking advantage of increasing information of DNA sequences from bacteria, may facilitate new thiopeptide discovery and enrichment of the unique biosynthetic elements to produce novel drug leads by applying the principle of combinatorial biosynthesis. In this study, we have developed a web-based tool ThioFinder to rapidly identify thiopeptide biosynthetic gene cluster from DNA sequence using a profile Hidden Markov Model approach. Fifty-four new putative thiopeptide biosynthetic gene clusters were found in the sequenced bacterial genomes of previously unknown producing microorganisms. ThioFinder is fully supported by an open-access database ThioBase, which contains the sufficient information of the 99 known thiopeptides regarding the chemical structure, biological activity, producing organism, and biosynthetic gene (cluster) along with the associated genome if available. The ThioFinder website offers researchers a unique resource and great flexibility for sequence analysis of thiopeptide biosynthetic gene clusters. ThioFinder is freely available at http://db-mml.sjtu.edu.cn/ThioFinder/.",2012-09-24 +29723244,"The ""social brain"" is highly sensitive to the mere presence of social information: An automated meta-analysis and an independent study.","How the human brain processes social information is an increasingly researched topic in psychology and neuroscience, advancing our understanding of basic human cognition and psychopathologies. Neuroimaging studies typically seek to isolate one specific aspect of social cognition when trying to map its neural substrates. It is unclear if brain activation elicited by different social cognitive processes and task instructions are also spontaneously elicited by general social information. In this study, we investigated whether these brain regions are evoked by the mere presence of social information using an automated meta-analysis and confirmatory data from an independent study of simple appraisal of social vs. non-social images. Results of 1,000 published fMRI studies containing the keyword of ""social"" were subject to an automated meta-analysis (http://neurosynth.org). To confirm that significant brain regions in the meta-analysis were driven by a social effect, these brain regions were used as regions of interest (ROIs) to extract and compare BOLD fMRI signals of social vs. non-social conditions in the independent study. The NeuroSynth results indicated that the dorsal and ventral medial prefrontal cortex, posterior cingulate cortex, bilateral amygdala, bilateral occipito-temporal junction, right fusiform gyrus, bilateral temporal pole, and right inferior frontal gyrus are commonly engaged in studies with a prominent social element. The social-non-social contrast in the independent study showed a strong resemblance to the NeuroSynth map. ROI analyses revealed that a social effect was credible in 9 out of the 11 NeuroSynth regions in the independent dataset. The findings support the conclusion that the ""social brain"" is highly sensitive to the mere presence of social information.",2018-05-03 +29921698,Computational Characterization of Suppressive Immune Microenvironments in Glioblastoma.,"The immunosuppressive microenvironment in glioblastoma (GBM) prevents an efficient antitumoral immune response and enables tumor formation and growth. Although an understanding of the nature of immunosuppression is still largely lacking, it is important for successful cancer treatment through immune system modulation. To gain insight into immunosuppression in GBM, we performed a computational analysis to model relative immune cell content and type of immune response in each GBM tumor sample from The Cancer Genome Atlas RNA-seq data set. We uncovered high variability in immune system-related responses and in the composition of the microenvironment across the cohort, suggesting immunologic diversity. Immune cell compositions were associated with typical alterations such as IDH mutation or inactivating NF1 mutation/deletion. Furthermore, our analysis identified three GBM subgroups presenting different adaptive immune responses: negative, humoral, and cellular-like. These subgroups were linked to transcriptional GBM subtypes and typical genetic alterations. All G-CIMP and IDH-mutated samples were in the negative group, which was also enriched by cases with focal amplification of CDK4 and MARCH9. IDH1-mutated samples showed lower expression and higher DNA methylation of MHC-I-type HLA genes. Overall, our analysis reveals heterogeneity in the immune microenvironment of GBM and identifies new markers for immunosuppression. Characterization of diverse immune responses will facilitate patient stratification and improve personalized immunotherapy in the future.Significance: This study utilizes a computational approach to characterize the immune environments in glioblastoma and shows that glioblastoma immune microenvironments can be classified into three major subgroups, which are linked to typical glioblastoma alterations such as IDH mutation, NF1 inactivation, and CDK4-MARCH9 locus amplification.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/19/5574/F1.large.jpg Cancer Res; 78(19); 5574-85. ©2018 AACR.",2018-06-19 +29524011,HIVprotI: an integrated web based platform for prediction and design of HIV proteins inhibitors.,"A number of anti-retroviral drugs are being used for treating Human Immunodeficiency Virus (HIV) infection. Due to emergence of drug resistant strains, there is a constant quest to discover more effective anti-HIV compounds. In this endeavor, computational tools have proven useful in accelerating drug discovery. Although methods were published to design a class of compounds against a specific HIV protein, but an integrated web server for the same is lacking. Therefore, we have developed support vector machine based regression models using experimentally validated data from ChEMBL repository. Quantitative structure activity relationship based features were selected for predicting inhibition activity of a compound against HIV proteins namely protease (PR), reverse transcriptase (RT) and integrase (IN). The models presented a maximum Pearson correlation coefficient of 0.78, 0.76, 0.74 and 0.76, 0.68, 0.72 during tenfold cross-validation on IC50 and percent inhibition datasets of PR, RT, IN respectively. These models performed equally well on the independent datasets. Chemical space mapping, applicability domain analyses and other statistical tests further support robustness of the predictive models. Currently, we have identified a number of chemical descriptors that are imperative in predicting the compound inhibition potential. HIVprotI platform ( http://bioinfo.imtech.res.in/manojk/hivproti ) would be useful in virtual screening of inhibitors as well as designing of new molecules against the important HIV proteins for therapeutics development.",2018-03-09 +25809586,Xylitol-containing products for preventing dental caries in children and adults.,"

Background

Dental caries is a highly prevalent chronic disease which affects the majority of people. It has been postulated that the consumption of xylitol could help to prevent caries. The evidence on the effects of xylitol products is not clear and therefore it is important to summarise the available evidence to determine its effectiveness and safety.

Objectives

To assess the effects of different xylitol-containing products for the prevention of dental caries in children and adults.

Search methods

We searched the following electronic databases: the Cochrane Oral Health Group Trials Register (to 14 August 2014), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library, 2014, Issue 7), MEDLINE via OVID (1946 to 14 August 2014), EMBASE via OVID (1980 to 14 August 2014), CINAHL via EBSCO (1980 to 14 August 2014), Web of Science Conference Proceedings (1990 to 14 August 2014), Proquest Dissertations and Theses (1861 to 14 August 2014). We searched the US National Institutes of Health Trials Register (http://clinicaltrials.gov) and the WHO Clinical Trials Registry Platform for ongoing trials. No restrictions were placed on the language or date of publication when searching the electronic databases.

Selection criteria

We included randomised controlled trials assessing the effects of xylitol products on dental caries in children and adults.

Data collection and analysis

Two review authors independently screened the results of the electronic searches, extracted data and assessed the risk of bias of the included studies. We attempted to contact study authors for missing data or clarification where feasible. For continuous outcomes, we used means and standard deviations to obtain the mean difference and 95% confidence interval (CI). We used the continuous data to calculate prevented fractions (PF) and 95% CIs to summarise the percentage reduction in caries. For dichotomous outcomes, we reported risk ratios (RR) and 95% CIs. As there were less than four studies included in the meta-analysis, we used a fixed-effect model. We planned to use a random-effects model in the event that there were four or more studies in a meta-analysis.

Main results

We included 10 studies that analysed a total of 5903 participants. One study was assessed as being at low risk of bias, two were assessed as being at unclear risk of bias, with the remaining seven being at high risk of bias.The main finding of the review was that, over 2.5 to 3 years of use, a fluoride toothpaste containing 10% xylitol may reduce caries by 13% when compared to a fluoride-only toothpaste (PF -0.13, 95% CI -0.18 to -0.08, 4216 children analysed, low-quality evidence).The remaining evidence on children, from small single studies with risk of bias issues and great uncertainty associated with the effect estimates, was insufficient to determine a benefit from xylitol products. One study reported that xylitol syrup (8 g per day) reduced caries by 58% (95% CI 33% to 83%, 94 infants analysed, low quality evidence) when compared to a low-dose xylitol syrup (2.67 g per day) consumed for 1 year.The following results had 95% CIs that were compatible with both a reduction and an increase in caries associated with xylitol: xylitol lozenges versus no treatment in children (very low quality body of evidence); xylitol sucking tablets versus no treatment in infants (very low quality body of evidence); xylitol tablets versus control (sorbitol) tablets in infants (very low quality body of evidence); xylitol wipes versus control wipes in infants (low quality body of evidence).There was only one study investigating the effects of xylitol lozenges, when compared to control lozenges, in adults (low quality body of evidence). The effect estimate had a 95% CI that was compatible with both a reduction and an increase in caries associated with xylitol.Four studies reported that there were no adverse effects from any of the interventions. Two studies reported similar rates of adverse effects between study arms. The remaining studies either mentioned adverse effects but did not report any usable data, or did not mention them at all. Adverse effects include sores in the mouth, cramps, bloating, constipation, flatulence, and loose stool or diarrhoea.

Authors' conclusions

We found some low quality evidence to suggest that fluoride toothpaste containing xylitol may be more effective than fluoride-only toothpaste for preventing caries in the permanent teeth of children, and that there are no associated adverse-effects from such toothpastes. The effect estimate should be interpreted with caution due to high risk of bias and the fact that it results from two studies that were carried out by the same authors in the same population. The remaining evidence we found is of low to very low quality and is insufficient to determine whether any other xylitol-containing products can prevent caries in infants, older children, or adults.",2015-03-26 +29059333,The AraGWAS Catalog: a curated and standardized Arabidopsis thaliana GWAS catalog.,"The abundance of high-quality genotype and phenotype data for the model organism Arabidopsis thaliana enables scientists to study the genetic architecture of many complex traits at an unprecedented level of detail using genome-wide association studies (GWAS). GWAS have been a great success in A. thaliana and many SNP-trait associations have been published. With the AraGWAS Catalog (https://aragwas.1001genomes.org) we provide a publicly available, manually curated and standardized GWAS catalog for all publicly available phenotypes from the central A. thaliana phenotype repository, AraPheno. All GWAS have been recomputed on the latest imputed genotype release of the 1001 Genomes Consortium using a standardized GWAS pipeline to ensure comparability between results. The catalog includes currently 167 phenotypes and more than 222 000 SNP-trait associations with P < 10-4, of which 3887 are significantly associated using permutation-based thresholds. The AraGWAS Catalog can be accessed via a modern web-interface and provides various features to easily access, download and visualize the results and summary statistics across GWAS.",2018-01-01 +25203647,Accelerating translational research by clinically driven development of an informatics platform--a case study.,"Translational medicine is becoming increasingly dependent upon data generated from health care, clinical research, and molecular investigations. This increasing rate of production and diversity in data has brought about several challenges, including the need to integrate fragmented databases, enable secondary use of patient clinical data from health care in clinical research, and to create information systems that clinicians and biomedical researchers can readily use. Our case study effectively integrates requirements from the clinical and biomedical researcher perspectives in a translational medicine setting. Our three principal achievements are (a) a design of a user-friendly web-based system for management and integration of clinical and molecular databases, while adhering to proper de-identification and security measures; (b) providing a real-world test of the system functionalities using clinical cohorts; and (c) system integration with a clinical decision support system to demonstrate system interoperability. We engaged two active clinical cohorts, 747 psoriasis patients and 2001 rheumatoid arthritis patients, to demonstrate efficient query possibilities across the data sources, enable cohort stratification, extract variation in antibody patterns, study biomarker predictors of treatment response in RA patients, and to explore metabolic profiles of psoriasis patients. Finally, we demonstrated system interoperability by enabling integration with an established clinical decision support system in health care. To assure the usefulness and usability of the system, we followed two approaches. First, we created a graphical user interface supporting all user interactions. Secondly we carried out a system performance evaluation study where we measured the average response time in seconds for active users, http errors, and kilobits per second received and sent. The maximum response time was found to be 0.12 seconds; no server or client errors of any kind were detected. In conclusion, the system can readily be used by clinicians and biomedical researchers in a translational medicine setting.",2014-09-09 +22359434,"The capsicum transcriptome DB: a ""hot"" tool for genomic research.","Chili pepper (Capsicum annuum) is an economically important crop with no available public genome sequence. We describe a genomic resource to facilitate Capsicum annuum research. A collection of Expressed Sequence Tags (ESTs) derived from five C. annuum organs (root, stem, leaf, flower and fruit) were sequenced using the Sanger method and multiple leaf transcriptomes were deeply sampled using with GS-pyrosequencing. A hybrid assembly of 1,324,516 raw reads yielded 32,314 high quality contigs as validated by coverage and identity analysis with existing pepper sequences. Overall, 75.5% of the contigs had significant sequence similarity to entries in nucleic acid and protein databases; 23% of the sequences have not been previously reported for C. annuum and expand sequence resources for this species. A MySQL database and a user-friendly Web interface were constructed with search-tools that permit queries of the ESTs including sequence, functional annotation, Gene Ontology classification, metabolic pathways, and assembly information. The Capsicum Transcriptome DB is free available from http://www.bioingenios.ira.cinvestav.mx:81/Joomla/",2012-01-06 +28549446,"""gnparser"": a powerful parser for scientific names based on Parsing Expression Grammar.","

Background

Scientific names in biology act as universal links. They allow us to cross-reference information about organisms globally. However variations in spelling of scientific names greatly diminish their ability to interconnect data. Such variations may include abbreviations, annotations, misspellings, etc. Authorship is a part of a scientific name and may also differ significantly. To match all possible variations of a name we need to divide them into their elements and classify each element according to its role. We refer to this as 'parsing' the name. Parsing categorizes name's elements into those that are stable and those that are prone to change. Names are matched first by combining them according to their stable elements. Matches are then refined by examining their varying elements. This two stage process dramatically improves the number and quality of matches. It is especially useful for the automatic data exchange within the context of ""Big Data"" in biology.

Results

We introduce Global Names Parser (gnparser). It is a Java tool written in Scala language (a language for Java Virtual Machine) to parse scientific names. It is based on a Parsing Expression Grammar. The parser can be applied to scientific names of any complexity. It assigns a semantic meaning (such as genus name, species epithet, rank, year of publication, authorship, annotations, etc.) to all elements of a name. It is able to work with nested structures as in the names of hybrids. gnparser performs with ≈99% accuracy and processes 30 million name-strings/hour per CPU thread. The gnparser library is compatible with Scala, Java, R, Jython, and JRuby. The parser can be used as a command line application, as a socket server, a web-app or as a RESTful HTTP-service. It is released under an Open source MIT license.

Conclusions

Global Names Parser (gnparser) is a fast, high precision tool for biodiversity informaticians and biologists working with large numbers of scientific names. It can replace expensive and error-prone manual parsing and standardization of scientific names in many situations, and can quickly enhance the interoperability of distributed biological information.",2017-05-26 +28180281,"Large-scale analysis of microRNA expression, epi-transcriptomic features and biogenesis.","MicroRNAs are important genetic regulators in both animals and plants. They have a range of functions spanning development, differentiation, growth, metabolism and disease. The advent of next-generation sequencing technologies has made it a relatively straightforward task to detect these molecules and their relative expression via sequencing. There are a large number of published studies with deposited datasets. However, there are currently few resources that capitalize on these data to better understand the features, distribution and biogenesis of miRNAs. Herein, we focus on Human and Mouse for which the majority of data are available. We reanalyse sequencing data from 461 samples into a coordinated catalog of microRNA expression. We use this to perform large-scale analyses of miRNA function and biogenesis. These analyses include global expression comparison, co-expression of miRNA clusters and the prediction of miRNA strand-specificity and underlying constraints. Additionally, we report for the first time a global analysis of miRNA epi-transcriptomic modifications and assess their prevalence across tissues, samples and families. Finally, we report a list of potentially mis-annotated miRNAs in miRBase based on their aggregated modification profiles. The results have been collated into a comprehensive online repository of miRNA expression and features such as modifications and RNA editing events, which is available at: http://wwwdev.ebi.ac.uk/enright-dev/miratlas. We believe these findings will further contribute to our understanding of miRNA function in animals and benefit the miRNA community in general.",2017-02-01 +26361768,A user-friendly web portal for analyzing conformational changes in structures of Mycobacterium tuberculosis.,"Initiation of the Tuberculosis Structural Consortium has resulted in the expansion of the Mycobacterium tuberculosis (MTB) protein structural database. Currently, 969 experimentally solved structures are available for 354 MTB proteins. This includes multiple crystal structures for a given protein under different functional conditions, such as the presence of different ligands or mutations. In depth analysis of the multiple structures reveal that subtle differences exist in conformations of a given protein under varied conditions. Therefore, it is immensely important to understand the conformational differences between the multiple structures of a given protein in order to select the most suitable structure for molecular docking and structure-based drug designing. Here, we introduce a web portal ( http://bmi.icmr.org.in/mtbsd/torsion.php ) that we developed to provide comparative data on the ensemble of available structures of MTB proteins, such as Cα root means square deviation (RMSD), sequence identity, presence of mutations and torsion angles. Additionally, torsion angles were used to perform principal component analysis (PCA) to identify the conformational differences between the structures. Additionally, we present a few case studies to demonstrate this database. Graphical Abstract Conformational changes seen in the structures of the enoyl-ACP reductase protein encoded by the Mycobacterial gene inhA.",2015-09-11 +,Digitization and online availability of original collecting mission data to improve data quality and enhance the conservation and use of plant genetic resources,"Ex situ conservation in genebanks is the most important way of conserving plant genetic resources for food and agriculture (PGRFA) (FAO 2010). The use of germplasm conserved in genebanks depends to a large extent on the quality and quantity of data available about each accession. Initial selection of accessions for use in research or breeding is often made based on the available passport information, which describes the source of the material. Availability of collecting site description or geographic coordinates is considered a quality indicator in particular for accessions of wild species and landraces (Van Hintum et al. in Plant Genet Resour Charact Util 9(3):478–485, 2011). However lack or unavailability of accession specific data, including passport and location data, continues to represent a constraint to enhanced utilization of accessions (FAO 2010; Khoury et al. in Genet Resour Crop Evol 57(4):625–639, 2010). Collecting mission reports and collecting forms provide original data, including location data, about materials collected and distributed to genebanks. The International Board for Plant Genetic Resources and its successor, the International Plant Genetic Resources Institute (now Bioversity International) have supported the collection of over 225,000 samples of PGRFA during the last quarter of the past century. The documentation gathered at the time of their collection has recently been digitized, passport data extracted, and made available through the web ( http://www.central-repository.cgiar.org/ ; http://singer.cgiar.org/index.jsp?page=biomissions ), where it can be consulted to integrate and improve the quality of passport data. Collected samples can be linked to accessions in genebanks. The original collecting mission reports often include eco-geographic, environmental, biotic and climate data that can be used to improve knowledge about the accessions and facilitate their utilization.",2012-06-01 +26509288,ProBiS-CHARMMing: Web Interface for Prediction and Optimization of Ligands in Protein Binding Sites.,"Proteins often exist only as apo structures (unligated) in the Protein Data Bank, with their corresponding holo structures (with ligands) unavailable. However, apoproteins may not represent the amino-acid residue arrangement upon ligand binding well, which is especially problematic for molecular docking. We developed the ProBiS-CHARMMing web interface by connecting the ProBiS ( http://probis.cmm.ki.si ) and CHARMMing ( http://www.charmming.org ) web servers into one functional unit that enables prediction of protein-ligand complexes and allows for their geometry optimization and interaction energy calculation. The ProBiS web server predicts ligands (small compounds, proteins, nucleic acids, and single-atom ligands) that may bind to a query protein. This is achieved by comparing its surface structure against a nonredundant database of protein structures and finding those that have binding sites similar to that of the query protein. Existing ligands found in the similar binding sites are then transposed to the query according to predictions from ProBiS. The CHARMMing web server enables, among other things, minimization and potential energy calculation for a wide variety of biomolecular systems, and it is used here to optimize the geometry of the predicted protein-ligand complex structures using the CHARMM force field and to calculate their interaction energies with the corresponding query proteins. We show how ProBiS-CHARMMing can be used to predict ligands and their poses for a particular binding site, and minimize the predicted protein-ligand complexes to obtain representations of holoproteins. The ProBiS-CHARMMing web interface is freely available for academic users at http://probis.nih.gov.",2015-11-09 +28971119,Raman and Infrared spectroscopies and X-ray diffraction data on bupivacaine and ropivacaine complexed with 2-hydroxypropyl-β-cyclodextrin.,"The data presented in this article are related to the research article entitled ""Probing the dynamics of complexed local anesthetics via neutron scattering spectroscopy and DFT calculations (http://dx.doi.org/10.1016/j.ijpharm.2017.03.051)"" (Martins et al., 2017) [1]. This work shows the molecular and structural behavior of the local anesthetics (LAs) bupivacaine (BVC, C18H28N2O) and ropivacaine (RVC, C17H26N2O) before and after complexation with the water-soluble oligosaccharide 2-hydroxypropyl-β-cyclodextrin (HP-β-CD).",2017-09-04 +27801969,High performance computation of landscape genomic models including local indicators of spatial association.,"With the increasing availability of both molecular and topo-climatic data, the main challenges facing landscape genomics - that is the combination of landscape ecology with population genomics - include processing large numbers of models and distinguishing between selection and demographic processes (e.g. population structure). Several methods address the latter, either by estimating a null model of population history or by simultaneously inferring environmental and demographic effects. Here we present samβada, an approach designed to study signatures of local adaptation, with special emphasis on high performance computing of large-scale genetic and environmental data sets. samβada identifies candidate loci using genotype-environment associations while also incorporating multivariate analyses to assess the effect of many environmental predictor variables. This enables the inclusion of explanatory variables representing population structure into the models to lower the occurrences of spurious genotype-environment associations. In addition, samβada calculates local indicators of spatial association for candidate loci to provide information on whether similar genotypes tend to cluster in space, which constitutes a useful indication of the possible kinship between individuals. To test the usefulness of this approach, we carried out a simulation study and analysed a data set from Ugandan cattle to detect signatures of local adaptation with samβada, bayenv, lfmm and an FST outlier method (FDIST approach in arlequin) and compare their results. samβada - an open source software for Windows, Linux and Mac OS X available at http://lasig.epfl.ch/sambada - outperforms other approaches and better suits whole-genome sequence data processing.",2016-11-28 +29036596,DelPhiForce web server: electrostatic forces and energy calculations and visualization.,"

Summary

Electrostatic force is an essential component of the total force acting between atoms and macromolecules. Therefore, accurate calculations of electrostatic forces are crucial for revealing the mechanisms of many biological processes. We developed a DelPhiForce web server to calculate and visualize the electrostatic forces at molecular level. DelPhiForce web server enables modeling of electrostatic forces on individual atoms, residues, domains and molecules, and generates an output that can be visualized by VMD software. Here we demonstrate the usage of the server for various biological problems including protein-cofactor, domain-domain, protein-protein, protein-DNA and protein-RNA interactions.

Availability and implementation

The DelPhiForce web server is available at: http://compbio.clemson.edu/delphi-force.

Contact

delphi@clemson.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +27477210,"BioVLAB-mCpG-SNP-EXPRESS: A system for multi-level and multi-perspective analysis and exploration of DNA methylation, sequence variation (SNPs), and gene expression from multi-omics data.","Measuring gene expression, DNA sequence variation, and DNA methylation status is routinely done using high throughput sequencing technologies. To analyze such multi-omics data and explore relationships, reliable bioinformatics systems are much needed. Existing systems are either for exploring curated data or for processing omics data in the form of a library such as R. Thus scientists have much difficulty in investigating relationships among gene expression, DNA sequence variation, and DNA methylation using multi-omics data. In this study, we report a system called BioVLAB-mCpG-SNP-EXPRESS for the integrated analysis of DNA methylation, sequence variation (SNPs), and gene expression for distinguishing cellular phenotypes at the pairwise and multiple phenotype levels. The system can be deployed on either the Amazon cloud or a publicly available high-performance computing node, and the data analysis and exploration of the analysis result can be conveniently done using a web-based interface. In order to alleviate analysis complexity, all the process are fully automated, and graphical workflow system is integrated to represent real-time analysis progression. The BioVLAB-mCpG-SNP-EXPRESS system works in three stages. First, it processes and analyzes multi-omics data as input in the form of the raw data, i.e., FastQ files. Second, various integrated analyses such as methylation vs. gene expression and mutation vs. methylation are performed. Finally, the analysis result can be explored in a number of ways through a web interface for the multi-level, multi-perspective exploration. Multi-level interpretation can be done by either gene, gene set, pathway or network level and multi-perspective exploration can be explored from either gene expression, DNA methylation, sequence variation, or their relationship perspective. The utility of the system is demonstrated by performing analysis of phenotypically distinct 30 breast cancer cell line data set. BioVLAB-mCpG-SNP-EXPRESS is available at http://biohealth.snu.ac.kr/software/biovlab_mcpg_snp_express/.",2016-07-28 +21444651,Complex principal component and correlation structure of 16 yeast genomic variables.,"A quickly growing number of characteristics reflecting various aspects of gene function and evolution can be either measured experimentally or computed from DNA and protein sequences. The study of pairwise correlations between such quantitative genomic variables as well as collective analysis of their interrelations by multidimensional methods have delivered crucial insights into the processes of molecular evolution. Here, we present a principal component analysis (PCA) of 16 genomic variables from Saccharomyces cerevisiae, the largest data set analyzed so far. Because many missing values and potential outliers hinder the direct calculation of principal components, we introduce the application of Bayesian PCA. We confirm some of the previously established correlations, such as evolutionary rate versus protein expression, and reveal new correlations such as those between translational efficiency, phosphorylation density, and protein age. Although the first principal component primarily contrasts genomic change and protein expression, the second component separates variables related to gene existence and expressed protein functions. Enrichment analysis on genes affecting variable correlations unveils classes of influential genes. For example, although ribosomal and nuclear transport genes make important contributions to the correlation between protein isoelectric point and molecular weight, protein synthesis and amino acid metabolism genes help cause the lack of significant correlation between propensity for gene loss and protein age. We present the novel Quagmire database (Quantitative Genomics Resource) which allows exploring relationships between more genomic variables in three model organisms-Escherichia coli, S. cerevisiae, and Homo sapiens (http://webclu.bio.wzw.tum.de:18080/quagmire).",2011-03-28 +29077808,TITINdb-a computational tool to assess titin's role as a disease gene.,"

Summary

Large numbers of rare and unique titin missense variants have been discovered in both healthy and disease cohorts, thus the correct classification of variants as pathogenic or non-pathogenic has become imperative. Due to titin's large size (363 coding exons), current web applications are unable to map titin variants to domain structures. Here, we present a web application, TITINdb, which integrates titin structure, variant, sequence and isoform information, along with pre-computed predictions of the impact of non-synonymous single nucleotide variants, to facilitate the correct classification of titin variants.

Availability and implementation

TITINdb can be freely accessed at http://fraternalilab.kcl.ac.uk/TITINdb.

Contact

franca.fraternali@kcl.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +28966675,A database for orphan genes in Poaceae.,"Orphan genes refer to a group of protein-coding genes lacking recognizable homologs in the other organisms. Extensive studies have demonstrated that numerous newly sequenced genomes contain a significant number of orphan genes, which have important roles in plant's responses to the environment. Due to a lack of phylogenetic conservation, the origin of orphan genes and their functions are currently not well defined. In the present study, a Poaceae orphan genes database (POGD; http://bioinfo.ahau.edu.cn/pogd) was established to serve as a user-friendly web interface for entry browsing, searching and downloading orphan genes from various plants. Four Poaceae species, including Brachypodium distachyon, Oryza sativa, Sorghum bicolor and Zea mays, are included in the current version of POGD. The database provides gene descriptions (chromosome strands, physical location), gene product records (protein length, isoelectric point, molecular weight as well as gene and protein sequences) and functional annotations (cellular role, gene ontology category, subcellular localization prediction). Basic Local Alignment Search Tool and comparative analyses were also provided on the website. POGD will serve as a comprehensive and reliable repository, which will help uncover regulatory mechanisms of orphan genes and may assist in the development of comparative genomics in plant biology.",2017-08-09 +28791579,Visualizing the Microscopic World.,"Visualization can be a motivating way of teaching students about the microscopic world. This can become even more exciting if the information is based on accurate computational results rather than on crude approximations that eventually might create unreal alternative perceptions. Here, we report on a VMD plug-in, named vmdMagazine, which can turn computational simulations into stunning high-impact video presentations, suitable for classes/lectures and even conferences. The software will help students/audience to understand atoms and molecules better and learn to like them. The present paper is meant to give a general idea of the software's potential, showing how it works and how it can be used for educational purposes. The software is freely available at: http://www.fc.up.pt/PortoBioComp/database/doku.php?id=vmdmagazine .",2017-08-09 +29028888,RRDB: a comprehensive and non-redundant benchmark for RNA-RNA docking and scoring.,"Motivation:With the discovery of more and more noncoding RNAs and their versatile functions, RNA-RNA interactions have received increased attention. Therefore, determination of their complex structures is valuable to understand the molecular mechanism of the interactions. Given the high cost of experimental methods, computational approaches like molecular docking have played an important role in the determination of complex structures, in which a benchmark is critical for the development of docking algorithms. Results:Meeting the need, we have developed the first comprehensive and nonredundant RNA-RNA docking benchmark (RRDB). The diverse dataset of 123 targets consists of 78 unbound-unbound and 45 bound-unbound (or unbound-bound) test cases. The dataset was classified into three groups according to the interface conformational changes between bound and unbound structures: 47 'easy', 38 'medium' and 38 'difficult' targets. A docking test with the benchmark using ZDOCK 2.1 demonstrated the challenging nature of the RNA-RNA docking problem and the important value of the present benchmark. The bound and unbound cases of the benchmark will be beneficial for the development and optimization of docking and scoring algorithms for RNA-RNA interactions. Availability and implementation:The benchmark is available at http://huanglab.phys.hust.edu.cn/RRDbenchmark/. Contact:huangsy@hust.edu.cn. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-02-01 +28792081,Heterogeneity in reporting on urinary outcome and cure after surgical interventions for stress urinary incontinence in adult neuro-urological patients: A systematic review.,"

Aims

To describe all outcome parameters and definitions of cure used to report on outcome of surgical interventions for stress urinary incontinence (SUI) in neuro-urological (NU) patients.

Methods

This systematic review was performed and reported according to the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) statement. The study protocol was registered and published (CRD42016033303; http://www.crd.york.ac.uk/PROSPERO). Medline, Embase, Cochrane controlled trials databases, and clinicaltrial.gov were systematically searched for relevant publications until February 2017.

Results

A total of 3168 abstracts were screened. Seventeen studies reporting on SUI surgeries in NU patients were included. Sixteen different outcome parameters and nine definitions of cure were used. Six studies reported on objective outcome parameters mainly derived from urodynamic investigations. All studies reported on one or more subjective outcome parameters. Patient-reported pad use (reported during interview) was the most commonly used outcome parameter. Only three of 17 studies used standardized questionnaires (two on impact of incontinence and one on quality of life). Overall, a high risk of bias was found.

Conclusions

We found a considerable heterogeneity in outcome parameters and definitions of cure used to report on outcome of surgical interventions for SUI in NU patients. The results of this systematic review may begin the dialogue to a future consensus on this topic. Standardization of outcome parameters and definitions of cure would enable researchers and clinicians to consistently compare outcomes of different studies and therapies.",2017-08-09 +28708269,Heme Oxygenase Database (HemeOxDB) and QSAR Analysis of Isoform 1 Inhibitors.,"Due to increasing interest in the field of heme oxygenases (HOs), we built a ligand database called HemeOxDB that includes the entire set of known HO-1 and HO-2 inhibitors, resulting in more than 400 compounds. The HemeOxDB is available online at http://www.researchdsf.unict.it/hemeoxdb/, and having a robust search engine allows end users to build complex queries, sort tabulated results, and generate color-coded two- and three-dimensional graphs. This database will grow to be a tool for the design of potent and selective HO-1 or HO-2 inhibitors. We were also interested in virtually searching for alternative inhibitors, and, for the first time in the field of HOs, a quantitative structure-activity relationship (QSAR) model was built using half-maximal inhibitory concentration (IC50 ) values of the whole set of known HO-1 inhibitors, taken from the HemeOxDB and employing the Monte Carlo technique. The statistical quality suggested that the model is robust and possesses desirable predictive potential. The screening of US Food and Drug Administration (FDA)-approved drugs, external to our dataset, suggested new predicted inhibitors, opening the way for replacing imidazole groups. The HemeOxDB and the QSAR model reported herein may help in prospectively identifying or repurposing new drugs with optimal structural attributes for HO enzyme inhibition.",2017-08-09 +28584451,optCluster: An R Package for Determining the Optimal Clustering Algorithm.,"There exist numerous programs and packages that perform validation for a given clustering solution; however, clustering algorithms fare differently as judged by different validation measures. If more than one performance measure is used to evaluate multiple clustering partitions, an optimal result is often difficult to determine by visual inspection alone. This paper introduces optCluster, an R package that uses a single function to simultaneously compare numerous clustering partitions (created by different algorithms and/or numbers of clusters) and obtain a ""best"" option for a given dataset. The method of weighted rank aggregation is utilized by this package to objectively aggregate various performance measure scores, thereby taking away the guesswork that often follows a visual inspection of cluster results. The optCluster package contains biological validation measures as well as clustering algorithms developed specifically for RNA sequencing data, making it a useful tool for clustering genomic data.

Availability

This package is available for free through the Comprehensive R Archive Network (CRAN) at http://cran.rproject.org/web/packages/optCluster/.",2017-03-31 +27017950,NeisseriaBase: a specialised Neisseria genomic resource and analysis platform.,"Background. The gram-negative Neisseria is associated with two of the most potent human epidemic diseases: meningococcal meningitis and gonorrhoea. In both cases, disease is caused by bacteria colonizing human mucosal membrane surfaces. Overall, the genus shows great diversity and genetic variation mainly due to its ability to acquire and incorporate genetic material from a diverse range of sources through horizontal gene transfer. Although a number of databases exist for the Neisseria genomes, they are mostly focused on the pathogenic species. In this present study we present the freely available NeisseriaBase, a database dedicated to the genus Neisseria encompassing the complete and draft genomes of 15 pathogenic and commensal Neisseria species. Methods. The genomic data were retrieved from National Center for Biotechnology Information (NCBI) and annotated using the RAST server which were then stored into the MySQL database. The protein-coding genes were further analyzed to obtain information such as calculation of GC content (%), predicted hydrophobicity and molecular weight (Da) using in-house Perl scripts. The web application was developed following the secure four-tier web application architecture: (1) client workstation, (2) web server, (3) application server, and (4) database server. The web interface was constructed using PHP, JavaScript, jQuery, AJAX and CSS, utilizing the model-view-controller (MVC) framework. The in-house developed bioinformatics tools implemented in NeisseraBase were developed using Python, Perl, BioPerl and R languages. Results. Currently, NeisseriaBase houses 603,500 Coding Sequences (CDSs), 16,071 RNAs and 13,119 tRNA genes from 227 Neisseria genomes. The database is equipped with interactive web interfaces. Incorporation of the JBrowse genome browser in the database enables fast and smooth browsing of Neisseria genomes. NeisseriaBase includes the standard BLAST program to facilitate homology searching, and for Virulence Factor Database (VFDB) specific homology searches, the VFDB BLAST is also incorporated into the database. In addition, NeisseriaBase is equipped with in-house designed tools such as the Pairwise Genome Comparison tool (PGC) for comparative genomic analysis and the Pathogenomics Profiling Tool (PathoProT) for the comparative pathogenomics analysis of Neisseria strains. Discussion. This user-friendly database not only provides access to a host of genomic resources on Neisseria but also enables high-quality comparative genome analysis, which is crucial for the expanding scientific community interested in Neisseria research. This database is freely available at http://neisseria.um.edu.my.",2016-03-17 +29033655,"Waarnemingen.be - Plant occurrences in Flanders and the Brussels Capital Region, Belgium.","Waarnemingen.be - Plant occurrences in Flanders and the Brussels Capital Region, Belgium is a species occurrence dataset published by Natuurpunt. The dataset contains almost 1.2 million plant occurrences of 1,222 native vascular plant species, mostly recorded by volunteers (citizen scientists), mainly since 2008. The occurrences are derived from the database http://www.waarnemingen.be, hosted by Stichting Natuurinformatie and managed by the nature conservation NGO Natuurpunt. Together with the datasets Florabank1 (Van Landuyt and Brosens 2017) and the Belgian IFBL (Instituut voor Floristiek van België en Luxemburg) Flora Checklists (Van Landuyt and Noé 2015), the dataset represents the most complete overview of indigenous plants in Flanders and the Brussels Capital Region.",2017-08-08 +24771340,NetVenn: an integrated network analysis web platform for gene lists.,"Many lists containing biological identifiers, such as gene lists, have been generated in various genomics projects. Identifying the overlap among gene lists can enable us to understand the similarities and differences between the data sets. Here, we present an interactome network-based web application platform named NetVenn for comparing and mining the relationships among gene lists. NetVenn contains interactome network data publically available for several species and supports a user upload of customized interactome network data. It has an efficient and interactive graphic tool that provides a Venn diagram view for comparing two to four lists in the context of an interactome network. NetVenn also provides a comprehensive annotation of genes in the gene lists by using enriched terms from multiple functional databases. In addition, it allows for mapping the gene expression data, providing information of transcription status of genes in the network. The power graph analysis tool is integrated in NetVenn for simplified visualization of gene relationships in the network. NetVenn is freely available at http://probes.pw.usda.gov/NetVenn or http://wheat.pw.usda.gov/NetVenn.",2014-04-25 +24214963,Manually curated database of rice proteins.,"'Manually Curated Database of Rice Proteins' (MCDRP) available at http://www.genomeindia.org/biocuration is a unique curated database based on published experimental data. Semantic integration of scientific data is essential to gain a higher level of understanding of biological systems. Since the majority of scientific data is available as published literature, text mining is an essential step before the data can be integrated and made available for computer-based search in various databases. However, text mining is a tedious exercise and thus, there is a large gap in the data available in curated databases and published literature. Moreover, data in an experiment can be perceived from several perspectives, which may not reflect in the text-based curation. In order to address such issues, we have demonstrated the feasibility of digitizing the experimental data itself by creating a database on rice proteins based on in-house developed data curation models. Using these models data of individual experiments have been digitized with the help of universal ontologies. Currently, the database has data for over 1800 rice proteins curated from >4000 different experiments of over 400 research articles. Since every aspect of the experiment such as gene name, plant type, tissue and developmental stage has been digitized, experimental data can be rapidly accessed and integrated.",2013-11-07 +28200033,Genome-wide genetic heterogeneity discovery with categorical covariates.,"

Motivation

Genetic heterogeneity is the phenomenon that distinct genetic variants may give rise to the same phenotype. The recently introduced algorithm Fast Automatic Interval Search ( FAIS ) enables the genome-wide search of candidate regions for genetic heterogeneity in the form of any contiguous sequence of variants, and achieves high computational efficiency and statistical power. Although FAIS can test all possible genomic regions for association with a phenotype, a key limitation is its inability to correct for confounders such as gender or population structure, which may lead to numerous false-positive associations.

Results

We propose FastCMH , a method that overcomes this problem by properly accounting for categorical confounders, while still retaining statistical power and computational efficiency. Experiments comparing FastCMH with FAIS and multiple kinds of burden tests on simulated data, as well as on human and Arabidopsis samples, demonstrate that FastCMH can drastically reduce genomic inflation and discover associations that are missed by standard burden tests.

Availability and implementation

An R package fastcmh is available on CRAN and the source code can be found at: https://www.bsse.ethz.ch/mlcb/research/bioinformatics-and-computational-biology/fastcmh.html.

Contact

felipe.llinares@bsse.ethz.ch.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +27450113,The archiving and dissemination of biological structure data.,"The global Protein Data Bank (PDB) was the first open-access digital archive in biology. The history and evolution of the PDB are described, together with the ways in which molecular structural biology data and information are collected, curated, validated, archived, and disseminated by the members of the Worldwide Protein Data Bank organization (wwPDB; http://wwpdb.org). Particular emphasis is placed on the role of community in establishing the standards and policies by which the PDB archive is managed day-to-day.",2016-07-21 +26289158,TMalphaDB and TMbetaDB: web servers to study the structural role of sequence motifs in α-helix and β-barrel domains of membrane proteins.,"

Background

Membrane proteins represent over 25 % of human protein genes and account for more than 60 % of drug targets due to their accessibility from the extracellular environment. The increasing number of available crystal structures of these proteins in the Protein Data Bank permits an initial estimation of their structural properties.

Description

We have developed two web servers-TMalphaDB for α-helix bundles and TMbetaDB for β-barrels-to analyse the growing repertoire of available crystal structures of membrane proteins. TMalphaDB and TMbetaDB permit to search for these specific sequence motifs in a non-redundant structure database of transmembrane segments and quantify structural parameters such as ϕ and ψ backbone dihedral angles, χ1 side chain torsion angle, unit bend and unit twist.

Conclusions

The structural information offered by TMalphaDB and TMbetaDB permits to quantify structural distortions induced by specific sequence motifs, and to elucidate their role in the 3D structure. This specific structural information has direct implications in homology modeling of the growing sequences of membrane proteins lacking experimental structure. TMalphaDB and TMbetaDB are freely available at http://lmc.uab.cat/TMalphaDB and http://lmc.uab.cat/TMbetaDB.",2015-08-20 +28784999,AOD: the antioxidant protein database.,"An antioxidant is a molecule that can prevent free radicals from causing damages in organisms. The increasing studies on antioxidants calls for a specialized database that is not readily available yet. To this end, in the present study, the Antioxidant Database (AOD) was developed to help researchers understand and reveal the biological functions of antioxidant proteins. AOD is freely available at http://lin.uestc.edu.cn/AODdatabase/index.aspx . The current release of AOD consists of 710 antioxidant proteins. Information including taxonomy, source organism, subcellular location, gene ontology, catalytic activity and function of antioxidant proteins are all extracted from UniProtKB/Swiss-Prot and captured in AOD. In addition, two web-based tools for performing sequence similarity search and computationally identification of antioxidants were also integrated in AOD. We believe that AOD will greatly facilitate the researches on antioxidants.",2017-08-07 +30599039,Amino acid 118 in the Deafness Causing (DFNA20/26) ACTG1 gene is a Mutational Hot Spot.,"

Background

Hearing loss is an economically and socially important cause of human morbidity, affecting 360 million people (over 5% of the world's population), of whom 32 million are children. Of the estimated minimum of 50% of hereditary hearing loss, non-syndromic hearing loss (NSHL) accounts for more than 70%. The autosomal dominant non-syndromic hearing loss (ADNSHL) is highly heterogeneous. To date, 67 ADNSHL loci (DFNA1-67) have been mapped; however, only 35 causative genes have been cloned since 1997 (http://hereditaryhearingloss.org/).

Methods

To identify the genetic basis of hereditary hearing loss in a Chinese family with ADNSHL, we undertook a targeted sequencing of 180 genes using a custom capture panel (MiamiOtoGenes).

Results

The onset of hearing loss in the family occurred between the ages of 15 and 18 years. Hearing loss was bilateral, started in the high frequency and progressed to lower frequencies. The c.353A>T (K118M) in the AC TG1 gene was identified by panel and was confirmed by Sanger sequencing and was present in all affected family members. So far, five of the 23 DFNA20/26 families worldwide have been found to carry mutation involving the residue K118.

Conclusions

This is the first report of K118M mutation in the ACTG1 gene causing hearing loss in the Chinese population. The present data are in line with previous evidence to suggest that codon K118 of ACTG1 may represent a mutational hot spot that justifies a mutation screen for diagnostic purpose in the genetically heterogeneous group of DFNA20/26.",2018-04-28 +28466792,Orthoscape: a cytoscape application for grouping and visualization KEGG based gene networks by taxonomy and homology principles.,"

Background

There are many available software tools for visualization and analysis of biological networks. Among them, Cytoscape ( http://cytoscape.org/ ) is one of the most comprehensive packages, with many plugins and applications which extends its functionality by providing analysis of protein-protein interaction, gene regulatory and gene co-expression networks, metabolic, signaling, neural as well as ecological-type networks including food webs, communities networks etc. Nevertheless, only three plugins tagged 'network evolution' found in Cytoscape official app store and in literature. We have developed a new Cytoscape 3.0 application Orthoscape aimed to facilitate evolutionary analysis of gene networks and visualize the results.

Results

Orthoscape aids in analysis of evolutionary information available for gene sets and networks by highlighting: (1) the orthology relationships between genes; (2) the evolutionary origin of gene network components; (3) the evolutionary pressure mode (diversifying or stabilizing, negative or positive selection) of orthologous groups in general and/or branch-oriented mode. The distinctive feature of Orthoscape is the ability to control all data analysis steps via user-friendly interface.

Conclusion

Orthoscape allows its users to analyze gene networks or separated gene sets in the context of evolution. At each step of data analysis, Orthoscape also provides for convenient visualization and data manipulation.",2017-01-27 +28093410,FASTdoop: a versatile and efficient library for the input of FASTA and FASTQ files for MapReduce Hadoop bioinformatics applications.,"

Summary

MapReduce Hadoop bioinformatics applications require the availability of special-purpose routines to manage the input of sequence files. Unfortunately, the Hadoop framework does not provide any built-in support for the most popular sequence file formats like FASTA or BAM. Moreover, the development of these routines is not easy, both because of the diversity of these formats and the need for managing efficiently sequence datasets that may count up to billions of characters. We present FASTdoop, a generic Hadoop library for the management of FASTA and FASTQ files. We show that, with respect to analogous input management routines that have appeared in the Literature, it offers versatility and efficiency. That is, it can handle collections of reads, with or without quality scores, as well as long genomic sequences while the existing routines concentrate mainly on NGS sequence data. Moreover, in the domain where a comparison is possible, the routines proposed here are faster than the available ones. In conclusion, FASTdoop is a much needed addition to Hadoop-BAM.

Availability and implementation

The software and the datasets are available at http://www.di.unisa.it/FASTdoop/ .

Contact

umberto.ferraro@uniroma1.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +28172348,ARResT/Interrogate: an interactive immunoprofiler for IG/TR NGS data.,"

Motivation

The study of immunoglobulins and T cell receptors using next-generation sequencing has finally allowed exploring immune repertoires and responses in their immense variability and complexity. Unsurprisingly, their analysis and interpretation is a highly convoluted task.

Results

We thus implemented ARResT/Interrogate, a web-based, interactive application. It can organize and filter large amounts of immunogenetic data by numerous criteria, calculate several relevant statistics, and present results in the form of multiple interconnected visualizations.

Availability and implementation

ARResT/Interrogate is implemented primarily in R, and is freely available at http://bat.infspire.org/arrest/interrogate/

Contact

nikos.darzentas@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +27964755,"XGR software for enhanced interpretation of genomic summary data, illustrated by application to immunological traits.","

Background

Biological interpretation of genomic summary data such as those resulting from genome-wide association studies (GWAS) and expression quantitative trait loci (eQTL) studies is one of the major bottlenecks in medical genomics research, calling for efficient and integrative tools to resolve this problem.

Results

We introduce eXploring Genomic Relations (XGR), an open source tool designed for enhanced interpretation of genomic summary data enabling downstream knowledge discovery. Targeting users of varying computational skills, XGR utilises prior biological knowledge and relationships in a highly integrated but easily accessible way to make user-input genomic summary datasets more interpretable. We show how by incorporating ontology, annotation, and systems biology network-driven approaches, XGR generates more informative results than conventional analyses. We apply XGR to GWAS and eQTL summary data to explore the genomic landscape of the activated innate immune response and common immunological diseases. We provide genomic evidence for a disease taxonomy supporting the concept of a disease spectrum from autoimmune to autoinflammatory disorders. We also show how XGR can define SNP-modulated gene networks and pathways that are shared and distinct between diseases, how it achieves functional, phenotypic and epigenomic annotations of genes and variants, and how it enables exploring annotation-based relationships between genetic variants.

Conclusions

XGR provides a single integrated solution to enhance interpretation of genomic summary data for downstream biological discovery. XGR is released as both an R package and a web-app, freely available at http://galahad.well.ox.ac.uk/XGR .",2016-12-13 +29126218,LIBRA-WA: a web application for ligand binding site detection and protein function recognition.,"

Summary

Recently, LIBRA, a tool for active/ligand binding site prediction, was described. LIBRA's effectiveness was comparable to similar state-of-the-art tools; however, its scoring scheme, output presentation, dependence on local resources and overall convenience were amenable to improvements. To solve these issues, LIBRA-WA, a web application based on an improved LIBRA engine, has been developed, featuring a novel scoring scheme consistently improving LIBRA's performance, and a refined algorithm that can identify binding sites hosted at the interface between different subunits. LIBRA-WA also sports additional functionalities like ligand clustering and a completely redesigned interface for an easier analysis of the output. Extensive tests on 373 apoprotein structures indicate that LIBRA-WA is able to identify the biologically relevant ligand/ligand binding site in 357 cases (∼96%), with the correct prediction ranking first in 349 cases (∼98% of the latter, ∼94% of the total). The earlier stand-alone tool has also been updated and dubbed LIBRA+, by integrating LIBRA-WA's improved engine for cross-compatibility purposes.

Availability and implementation

LIBRA-WA and LIBRA+ are available at: http://www.computationalbiology.it/software.html.

Contact

polticel@uniroma3.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +28796635,Detecting and Attributing Health Burdens to Climate Change.,"

Background

Detection and attribution of health impacts caused by climate change uses formal methods to determine a) whether the occurrence of adverse health outcomes has changed, and b) the extent to which that change could be attributed to climate change. There have been limited efforts to undertake detection and attribution analyses in health.

Objective

Our goal was to show a range of approaches for conducting detection and attribution analyses.

Results

Case studies for heatwaves, Lyme disease in Canada, and Vibrio emergence in northern Europe highlight evidence that climate change is adversely affecting human health. Changes in rates and geographic distribution of adverse health outcomes were detected, and, in each instance, a proportion of the observed changes could, in our judgment, be attributed to changes in weather patterns associated with climate change.

Conclusions

The results of detection and attribution studies can inform evidence-based risk management to reduce current, and plan for future, changes in health risks associated with climate change. Gaining a better understanding of the size, timing, and distribution of the climate change burden of disease and injury requires reliable long-term data sets, more knowledge about the factors that confound and modify the effects of climate on health, and refinement of analytic techniques for detection and attribution. At the same time, significant advances are possible in the absence of complete data and statistical certainty: there is a place for well-informed judgments, based on understanding of underlying processes and matching of patterns of health, climate, and other determinants of human well-being. https://doi.org/10.1289/EHP1509.",2017-08-07 +27678198,Web-based bioinformatics workflows for end-to-end RNA-seq data computation and analysis in agricultural animal species.,"

Background

Remarkable advances in Next Generation Sequencing (NGS) technologies, bioinformatics algorithms and computational technologies have significantly accelerated genomic research. However, complicated NGS data analysis still remains as a major bottleneck. RNA-seq, as one of the major area in the NGS field, also confronts great challenges in data analysis.

Results

To address the challenges in RNA-seq data analysis, we developed a web portal that offers three integrated workflows that can perform end-to-end compute and analysis, including sequence quality control, read-mapping, transcriptome assembly, reconstruction and quantification, and differential analysis. The first workflow utilizes Tuxedo (Tophat, Cufflink, Cuffmerge and Cuffdiff suite of tools). The second workflow deploys Trinity for de novo assembly and uses RSEM for transcript quantification and EdgeR for differential analysis. The third combines STAR, RSEM, and EdgeR for data analysis. All these workflows support multiple samples and multiple groups of samples and perform differential analysis between groups in a single workflow job submission. The calculated results are available for download and post-analysis. The supported animal species include chicken, cow, duck, goat, pig, horse, rabbit, sheep, turkey, as well as several other model organisms including yeast, C. elegans, Drosophila, and human, with genomic sequences and annotations obtained from ENSEMBL. The RNA-seq portal is freely available from http://weizhongli-lab.org/RNA-seq .

Conclusions

The web portal offers not only bioinformatics software, workflows, computation and reference data, but also an integrated environment for complex RNA-seq data analysis for agricultural animal species. In this project, our aim is not to develop new RNA-seq tools, but to build web workflows for using popular existing RNA-seq methods and make these tools more accessible to the communities.",2016-09-27 +26604044,Refining prognosis after trans-arterial chemo-embolization for hepatocellular carcinoma.,"

Background & aims

To develop an individual prognostic calculator for patients with unresectable hepatocellular carcinoma (HCC) undergoing trans-arterial chemo-embolization (TACE).

Methods

Data from two prospective databases, regarding 361 patients who received TACE as first-line therapy (2000-2012), were reviewed in order to refine available prognostic tools and to develop a continuous individual web-based prognostic calculator. Patients with neoplastic portal vein invasion were excluded from the analysis. The model was built following a bootstrap resampling procedure aimed at identifying prognostic predictors and by carrying out a 10-fold cross-validation for accuracy assessment by means of Harrell's c-statistic.

Results

Number of tumours, serum albumin, serum total bilirubin, alpha-foetoprotein and maximum tumour size were selected as predictors of mortality following TACE with the bootstrap resampling technique. In the 10-fold cross-validation cohort, the model showed a Harrell's c-statistic of 0.649 (95% CI: 0.610-0.688), significantly higher than that of the Hepatoma Arterial-embolization Prognostic (HAP) score (0.589; 95% CI: 0.552-0.626; P = 0.001) and of the modified HAP-II score (0.611; 95% CI: 0.572-0.650; P = 0.005). Akaike's information criterion for the model was 2520; for the mHAP-II it was 2544 and for the HAP score it was 2554. A web-based calculator was developed for quick consultation at http://www.livercancer.eu/mhap3.html.

Conclusions

The proposed individual prognostic model can provide an accurate prognostic prediction for each patient with unresectable HCC following treatment with TACE without class stratification. The availability of an online calculator can help physicians in daily clinical practice.",2015-12-23 +27869815,TACO produces robust multisample transcriptome assemblies from RNA-seq.,"Accurate transcript structure and abundance inference from RNA sequencing (RNA-seq) data is foundational for molecular discovery. Here we present TACO, a computational method to reconstruct a consensus transcriptome from multiple RNA-seq data sets. TACO employs novel change-point detection to demarcate transcript start and end sites, leading to improved reconstruction accuracy compared with other tools in its class. The tool is available at http://tacorna.github.io and can be readily incorporated into RNA-seq analysis workflows.",2016-11-21 +30089664,Peripheral Blood B Cell Depletion after Rituximab and Complete Response in Lupus Nephritis.,"

Background and objectives

Incomplete peripheral blood B cell depletion after rituximab in lupus nephritis might correlate with inability to reduce tubulointerstitial lymphoid aggregates in the kidney, which together could be responsible for inadequate response to treatment. We utilized data from the Lupus Nephritis Assessment with Rituximab (LUNAR) study to characterize the variability of peripheral blood B cell depletion after rituximab and assess its association with complete response in patients with lupus nephritis.

Design, setting, participants, & measurements

We analyzed 68 participants treated with rituximab. Peripheral blood B cell depletion was defined as 0 cells/µl, termed ""complete peripheral depletion,"" assessed over 78 weeks. Logistic regression was used to estimate the association between characteristics of complete peripheral depletion and complete response (defined as urine protein-to-creatinine ratio <0.5 mg/mg, and normal serum creatinine or an increase in creatinine <15%, if normal at baseline), assessed at week 78.

Results

A total of 53 (78%) participants achieved complete peripheral depletion (0 cells/µl) in a median time of 182 days (interquartile range, 80-339).The median duration of complete peripheral depletion was 71 days (interquartile range, 14-158). Twenty-five (47%) participants with complete peripheral depletion achieved complete response, compared with two (13%) without. Complete peripheral depletion was associated with complete response (unadjusted odds ratio [OR], 5.8; 95% confidence interval [95% CI], 1.2 to 28; P=0.03). Longer time to achieving complete peripheral depletion was associated with a lower likelihood of complete response (unadjusted OR, 0.89; 95% CI, 0.81 to 0.98; P=0.02). Complete peripheral depletion lasting >71 days (the median) was associated with complete response (unadjusted OR, 4.1; 95% CI, 1.5 to 11; P=0.008).

Conclusions

There was substantial variability in peripheral blood B cell depletion in patients with lupus nephritis treated with rituximab from the LUNAR trial. Achievement of complete peripheral depletion, as well as the rapidity and duration of complete peripheral depletion, were associated with complete response at week 78.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2018_09_06_CJASNPodcast_18_10_.mp3.",2018-08-08 +24677806,"MSDA, a proteomics software suite for in-depth Mass Spectrometry Data Analysis using grid computing.","One of the major bottlenecks in the proteomics field today resides in the computational interpretation of the massive data generated by the latest generation of high-throughput MS instruments. MS/MS datasets are constantly increasing in size and complexity and it becomes challenging to comprehensively process such huge datasets and afterwards deduce most relevant biological information. The Mass Spectrometry Data Analysis (MSDA, https://msda.unistra.fr) online software suite provides a series of modules for in-depth MS/MS data analysis. It includes a custom databases generation toolbox, modules for filtering and extracting high-quality spectra, for running high-performance database and de novo searches, and for extracting modified peptides spectra and functional annotations. Additionally, MSDA enables running the most computationally intensive steps, namely database and de novo searches, on a computer grid thus providing a net time gain of up to 99% for data processing.",2014-03-12 +30055062,Right ventricular dyssynchrony during hypoxic breathing but not during exercise in healthy subjects: a speckle tracking echocardiography study.,"

New findings

What is the central question of this study? Right ventricular dyssynchrony in severe pulmonary hypertension is associated with a poor prognosis. However, it has recently been observed in patients with lung or connective tissue disease and pulmonary artery pressure at the upper limits of normal. The mechanisms of right ventricular dyssynchrony in pulmonary hypertension remain uncertain. What is the main finding and its importance? Acute hypoxic breathing, but not normoxic exercise, induces an increase in right ventricular dyssynchrony detected by speckle tracking echocardiography in healthy subjects. These results add new insights into the determinants of right ventricular dyssynchrony, suggesting a role for systemic factors added to afterload in the pathophysiology of right ventricular inhomogeneity of contraction.

Abstract

Pulmonary hypertension (PH) has been shown to be associated with regional inhomogeneity (or dyssynchrony) of right ventricular (RV) contraction. Right ventricular dyssynchrony is an independent predictor of decreased survival in advanced PH, but has also been reported in patients with only mildly elevated pulmonary artery pressure (PAP). The mechanisms of RV dyssynchrony in PH remain uncertain. Our aim was to evaluate RV regional function in healthy subjects during acute hypoxia and during exercise. Seventeen healthy subjects (24 ± 6 years) underwent a speckle tracking echocardiography of the RV at rest in normoxia and every 15 min during a 60 min exposure to hypoxic breathing ( F I O 2 12%). Ten of the subjects also underwent an incremental cycle ergometry in normoxia to 100 W, with the same echocardiographic measurements. Dyssynchrony was measured as the SD of the times to peak systolic strain of the four basal and mid RV segments corrected for the heart rate (RV-SD4). RV-SD4 increased during hypoxia from 12 ± 7 to 22 ± 11 ms in spite of mild increases in mean PAP (mPAP) from 15 ± 2 to 20 ± 2 mmHg and pulmonary vascular resistance (PVR) from 1.18 ± 0.15 to 1.4 ± 0.15 Wood units (WU). During exercise RV-SD4 did not significantly change (from 12 ± 6 ms to 14 ± 6 ms), while mPAP increased to 25 ± 2 mmHg and PVR was unchanged. These data show that in healthy subjects, RV contraction is inhomogeneous in hypoxia but not during exercise. Since PAP increases more during exercise, RV dyssynchrony in hypoxia may be explained by a combination of mechanical (RV afterload) and systemic (hypoxia) factors.",2018-08-25 +28854983,Identification of cis-regulatory mutations generating de novo edges in personalized cancer gene regulatory networks.,"The identification of functional non-coding mutations is a key challenge in the field of genomics. Here we introduce μ-cisTarget to filter, annotate and prioritize cis-regulatory mutations based on their putative effect on the underlying ""personal"" gene regulatory network. We validated μ-cisTarget by re-analyzing the TAL1 and LMO1 enhancer mutations in T-ALL, and the TERT promoter mutation in melanoma. Next, we re-sequenced the full genomes of ten cancer cell lines and used matched transcriptome data and motif discovery to identify master regulators with de novo binding sites that result in the up-regulation of nearby oncogenic drivers. μ-cisTarget is available from http://mucistarget.aertslab.org .",2017-08-30 +25433699,Computer-assisted curation of a human regulatory core network from the biological literature.,"

Motivation

A highly interlinked network of transcription factors (TFs) orchestrates the context-dependent expression of human genes. ChIP-chip experiments that interrogate the binding of particular TFs to genomic regions are used to reconstruct gene regulatory networks at genome-scale, but are plagued by high false-positive rates. Meanwhile, a large body of knowledge on high-quality regulatory interactions remains largely unexplored, as it is available only in natural language descriptions scattered over millions of scientific publications. Such data are hard to extract and regulatory data currently contain together only 503 regulatory relations between human TFs.

Results

We developed a text-mining-assisted workflow to systematically extract knowledge about regulatory interactions between human TFs from the biological literature. We applied this workflow to the entire Medline, which helped us to identify more than 45 000 sentences potentially describing such relationships. We ranked these sentences by a machine-learning approach. The top-2500 sentences contained ∼900 sentences that encompass relations already known in databases. By manually curating the remaining 1625 top-ranking sentences, we obtained more than 300 validated regulatory relationships that were not present in a regulatory database before. Full-text curation allowed us to obtain detailed information on the strength of experimental evidences supporting a relationship.

Conclusions

We were able to increase curated information about the human core transcriptional network by >60% compared with the current content of regulatory databases. We observed improved performance when using the network for disease gene prioritization compared with the state-of-the-art.

Availability and implementation

Web-service is freely accessible at http://fastforward.sys-bio.net/.

Contact

leser@informatik.hu-berlin.de or nils.bluethgen@charite.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-29 +24297255,iPfam: a database of protein family and domain interactions found in the Protein Data Bank.,"The database iPfam, available at http://ipfam.org, catalogues Pfam domain interactions based on known 3D structures that are found in the Protein Data Bank, providing interaction data at the molecular level. Previously, the iPfam domain-domain interaction data was integrated within the Pfam database and website, but it has now been migrated to a separate database. This allows for independent development, improving data access and giving clearer separation between the protein family and interactions datasets. In addition to domain-domain interactions, iPfam has been expanded to include interaction data for domain bound small molecule ligands. Functional annotations are provided from source databases, supplemented by the incorporation of Wikipedia articles where available. iPfam (version 1.0) contains >9500 domain-domain and 15 500 domain-ligand interactions. The new website provides access to this data in a variety of ways, including interactive visualizations of the interaction data.",2013-12-01 +27752523,Data on Vietnamese patients׳ financial burdens and risk of destitution.,"The research process started in the first week of August 10, 2014 and ended early February 2015, obtaining qualified data for 330 patients from many hospitals in northern Vietnam. Its expansion was performed for an enlarged dataset through May 2015, containing 900 records. This article exemplifies the attempt to examine the likelihood of destitution among Vietnamese patients due to insufficient insurance coverage, cost of treatment and patient׳s status of residency during a curative hospital stay. The result suggests that the patients, who are poor and come from rural areas, face serious obstacles in accessing health care services. This data article presents attributes and values of the data set used in the article provided at DOI: http://dx.doi.org/10.1186/s40064-015-1279-x Vuong (2015) [4].",2016-09-30 +24490620,Characterization of the glutathione S-transferase gene family through ESTs and expression analyses within common and pigmented cultivars of Citrus sinensis (L.) Osbeck.,"

Background

Glutathione S-transferases (GSTs) represent a ubiquitous gene family encoding detoxification enzymes able to recognize reactive electrophilic xenobiotic molecules as well as compounds of endogenous origin. Anthocyanin pigments require GSTs for their transport into the vacuole since their cytoplasmic retention is toxic to the cell. Anthocyanin accumulation in Citrus sinensis (L.) Osbeck fruit flesh determines different phenotypes affecting the typical pigmentation of Sicilian blood oranges. In this paper we describe: i) the characterization of the GST gene family in C. sinensis through a systematic EST analysis; ii) the validation of the EST assembly by exploiting the genome sequences of C. sinensis and C. clementina and their genome annotations; iii) GST gene expression profiling in six tissues/organs and in two different sweet orange cultivars, Cadenera (common) and Moro (pigmented).

Results

We identified 61 GST transcripts, described the full- or partial-length nature of the sequences and assigned to each sequence the GST class membership exploiting a comparative approach and the classification scheme proposed for plant species. A total of 23 full-length sequences were defined. Fifty-four of the 61 transcripts were successfully aligned to the C. sinensis and C. clementina genomes. Tissue specific expression profiling demonstrated that the expression of some GST transcripts was 'tissue-affected' and cultivar specific. A comparative analysis of C. sinensis GSTs with those from other plant species was also considered. Data from the current analysis are accessible at http://biosrv.cab.unina.it/citrusGST/, with the aim to provide a reference resource for C. sinensis GSTs.

Conclusions

This study aimed at the characterization of the GST gene family in C. sinensis. Based on expression patterns from two different cultivars and on sequence-comparative analyses, we also highlighted that two sequences, a Phi class GST and a Mapeg class GST, could be involved in the conjugation of anthocyanin pigments and in their transport into the vacuole, specifically in fruit flesh of the pigmented cultivar.",2014-02-03 +25388589,iPathCons and iPathDB: an improved insect pathway construction tool and the database. ,"Insects are one of the most successful animal groups on earth. Some insects, such as the silkworm and honeybee, are beneficial to humans, whereas others are notorious pests of crops. At present, the genomes of 38 insects have been sequenced and made publically available. In addition, the transcriptomes of dozens of insects have been sequenced. As gene data rapidly accumulate, constructing the pathway of molecular interactions becomes increasingly important for entomological research. Here, we developed an improved tool, iPathCons, for knowledge-based construction of pathways from the transcriptomes or the official gene sets of genomes. Considering the high evolution diversity in insects, iPathCons uses a voting system for Kyoto Encyclopedia of Genes and Genomes Orthology assignment. Both stand-alone software and a web server of iPathCons are provided. Using iPathCons, we constructed the pathways of molecular interactions of 52 insects, including 37 genome-sequenced and 15 transcriptome-sequenced ones. These pathways are available in the iPathDB, which provides searches, web server, data downloads, etc. This database will be highly useful for the insect research community. Database URL: http://ento.njau.edu.cn/ipath/",2014-11-11 +28777851,[Analysis of genomic copy number variations in 36 fetuses with heart malformations using next-generation sequencing].,"

Objective

To explore the implications of copy number variations (CNVs) for congenital heart diseases (CHD) in fetuses.

Methods

G-banding karyotype analysis and next-generation sequencing (NGS) technology were performed on cord blood samples derived from 36 fetuses with CHD. Pathological implication of the CNVs was explored through comparison against the International Genomic Polymorphism Database (http://www.ebi.ac.uk/dgva/), Phenotype Database (http://decipher.sanger.ac.uk/), and the Human Genome Database at UCSC (http://genome.ucsc.edu/cgi-bin/hgGateway).

Results

G-banding karyotype analysis has identified 7 chromosomal abnormalities. For the remaining 28 cases, NGS has identified 4 microdeletions and microduplications, which involved chromosomes 2, 13, 14, 16 and 22. The largest involved a 6.8 Mb microdeletion, while the smallest involved a 280 kb microduplication. The chromosomal breakpoints in 1 case were delineated. One case of Noonan syndrome and one case of 22q11.2 deletion were diagnosed.

Conclusion

NGS can accurately determine the origins of derivative chromosomes and facilitate identification of pathogenic CNVs/genes. It can serve as a useful complement for conventional G-banding and reduce the recurrence risk.",2017-08-01 +28766075,Protein binding hot spots prediction from sequence only by a new ensemble learning method.,"Hot spots are interfacial core areas of binding proteins, which have been applied as targets in drug design. Experimental methods are costly in both time and expense to locate hot spot areas. Recently, in-silicon computational methods have been widely used for hot spot prediction through sequence or structure characterization. As the structural information of proteins is not always solved, and thus hot spot identification from amino acid sequences only is more useful for real-life applications. This work proposes a new sequence-based model that combines physicochemical features with the relative accessible surface area of amino acid sequences for hot spot prediction. The model consists of 83 classifiers involving the IBk (Instance-based k means) algorithm, where instances are encoded by important properties extracted from a total of 544 properties in the AAindex1 (Amino Acid Index) database. Then top-performance classifiers are selected to form an ensemble by a majority voting technique. The ensemble classifier outperforms the state-of-the-art computational methods, yielding an F1 score of 0.80 on the benchmark binding interface database (BID) test set.

Availability

http://www2.ahu.edu.cn/pchen/web/HotspotEC.htm .",2017-08-01 +28682969,Early Colonoscopy Improves the Outcome of Patients With Symptomatic Colorectal Cancer.,"

Background

Long waiting times from early symptoms to diagnosis and treatment may influence the staging and prognosis of patients with colorectal cancer. We analyzed the effect of colonoscopy timing on the outcome of these patients.

Objective

This study aimed to compare the outcome (tumoral staging and long-term survival) of patients with suspected colorectal cancer according to diagnostic colonoscopy timing.

Design

This study is an analysis of a prospectively maintained database.

Settings

The study was conducted at the Open Access Endoscopy Service of the tertiary public healthcare center Hospital Universitario de Canarias, in the Spanish island of Tenerife.

Patients

Consecutive patients diagnosed of colorectal cancer between February 2008 and October 2010, fulfilling 1 or more National Institute for Health and Clinical Excellence criteria, were assigned to early colonoscopy (<30 days from referral) or to standard-schedule colonoscopy at the discretion of the referring physician. Tumor staging (TNM classification) at diagnosis and long-term survival after treatment were compared in both strategies.

Main outcome measures

The primary outcomes measured were the stage at presentation and overall survival, as determined by prompt or standard referral.

Results

Overall, 257 patients with colorectal cancer were diagnosed (101 at early colonoscopy and 156 at standard-schedule colonoscopy). TNM stages I and II were found in 52 (54.2%) and 60 (41.7%) patients in the early colonoscopy group and standard-schedule colonoscopy group. Stage IV was confirmed in 13 patients (13.5%) diagnosed in the early colonoscopy group and in 40 (28%) detected in the standard-schedule colonoscopy group. Survival rates at 12 and 60 months after treatment were significantly higher in the early colonoscopy group compared with the standard-schedule colonoscopy group (p < 0.001).

Limitations

Controlled randomization of early versus standard-referral colonoscopy, size and scope of analysis, the time interval from symptom onset to first physician assessment, and the different locations of colorectal cancer between groups were limitations of the study.

Conclusions

Colonoscopy within 30 days from referral improves outcome in patients with symptomatic colorectal cancer. See Video Abstract at http://journals.lww.com/dcrjournal/Pages/videogallery.aspx.",2017-08-01 +28724130,Early Postimplant Speech Perception and Language Skills Predict Long-Term Language and Neurocognitive Outcomes Following Pediatric Cochlear Implantation.,"

Purpose

We sought to determine whether speech perception and language skills measured early after cochlear implantation in children who are deaf, and early postimplant growth in speech perception and language skills, predict long-term speech perception, language, and neurocognitive outcomes.

Method

Thirty-six long-term users of cochlear implants, implanted at an average age of 3.4 years, completed measures of speech perception, language, and executive functioning an average of 14.4 years postimplantation. Speech perception and language skills measured in the 1st and 2nd years postimplantation and open-set word recognition measured in the 3rd and 4th years postimplantation were obtained from a research database in order to assess predictive relations with long-term outcomes.

Results

Speech perception and language skills at 6 and 18 months postimplantation were correlated with long-term outcomes for language, verbal working memory, and parent-reported executive functioning. Open-set word recognition was correlated with early speech perception and language skills and long-term speech perception and language outcomes. Hierarchical regressions showed that early speech perception and language skills at 6 months postimplantation and growth in these skills from 6 to 18 months both accounted for substantial variance in long-term outcomes for language and verbal working memory that was not explained by conventional demographic and hearing factors.

Conclusion

Speech perception and language skills measured very early postimplantation, and early postimplant growth in speech perception and language, may be clinically relevant markers of long-term language and neurocognitive outcomes in users of cochlear implants.

Supplemental materials

https://doi.org/10.23641/asha.5216200.",2017-08-01 +28787666,ERas is constitutively expressed in full term placenta of pregnant cows.,"ERas is a new gene recently found in mouse embryonic stem (ES) cells and localized on the X chromosome. It plays a role in mouse ES cell survival and is constitutively active without any mutations. It was also found to be responsible for the maintenance of quiescence of the hepatic stellate cells (HSCs), liver-resident mesenchymal stem cells, the activation of which results in liver fibrosis. This gene was not present in human ES cells. ERas was found to be activated in a significant population of human gastric cancer, where ERAS may play a crucial role in gastric cancer cell survival and metastases to liver via down-regulation of E-cadherin. ERas gene has been found to be expressed both in ES cells and adult tissues of cynomolgus monkey. Cynomolgus ERAS did not promote cell proliferation or induce tumor formation. ERAS was also detected in normal and neoplastic urothelium of the urinary bladder in cattle, where bovine ERAS formed a constitutive complex with platelet derived growth factor β receptor (PDGFβR) resulting in the activation of AKT signaling. Here, molecular and morphological findings of ERAS in the full term placenta of pregnant cows have been investigated for the first time. ERAS was studied by reverse transcriptase PCR (RT-PCR). Alignment of the sequence detects a 100% identity with all transcript variant bovine ERas mRNAs, present in the GenBank database (http://www.ncbi.nlm.nih.gov). Furthermore, ERAS was detected by Western blot and investigated by real time PCR that revealed an amount of ERAS more than ERAS found in normal bovine urothelium but less than ERAS present in the liver. Immunohistochemical examination revealed the presence of ERAS protein both at the level of plasma membrane and in cytoplasm of epithelial cells lining caruncular crypts and in trophoblasts of villi. An evident ERAS immunoreactivity was also seen throughout the chorionic and uterine gland epithelium. Although this is not a functional study and further investigations will be warranted, it is conceivable that ERAS may have pleiotropic effects in the placenta, some of which, like normal urothelial cells, might lead to activation of AKT pathway. We speculate that ERAS may play a key role in cellular processes such as cell differentiation and movement. Accordingly, we believe it may be an important factor involved in trophoblast invasiveness via AKT signaling pathway. Therefore, ERas gene is a functional gene which contributes to homeostasis of bovine placenta.",2017-08-01 +27977431,Effectiveness of Extracorporeal Shock Wave Therapy Without Local Anesthesia in Patients With Recalcitrant Plantar Fasciitis: A Meta-Analysis of Randomized Controlled Trials.,"

Objective

The objective of this meta-analysis was to investigate the efficacy of extracorporeal shock wave therapy in the treatment of recalcitrant plantar fasciitis without local anesthesia.

Methods

The Cochrane Library, EMBASE, PubMed, and Web of Science databases were searched from inception to September 2015 for randomized controlled trials comparing ESWT without local anesthesia versus placebo for treatment of plantar fasciitis in adults. The primary outcome was the 12-week post-intervention success rate of reducing the visual analog scale score by 60% from baseline at the first step in the morning, reducing the VAS score by 60% from baseline during daily activities, reducing the Roles and Maudsley score, reducing overall heel pain, and reducing pain after applying a force meter.

Results

Nine studies were included in the meta-analysis. Compared with placebo, ESWT significantly improved the success rate of reducing overall heel pain, reducing the VAS score by 60% at the first step in the morning and during daily activities, improving the Roles and Maudsley score to excellent or good, and reducing heel pain after application of a pressure meter.

Conclusions

ESWT seems to be particularly effective in relieving pain associated with RPF. ESWT should be considered when traditional treatments have failed.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to (1) understand the recovery rates for nonsurgical treatment of plantar fasciitis, (2) understand the role of extracorporeal shockwave therapy (ESWT) in the treatment of recalcitrant plantar fasciitis, and (3) understand the indications to incorporate ESWT in the treatment plan of recalcitrant plantar fasciitis.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this activity for a maximum of 1.5 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2017-08-01 +27508267,"CZTS x Se1-x nanocrystals: Composition dependent method of preparation, morphological characterization and cyclic voltammetry data analysis.","In this article, synthesis procedures of preparation of copper zinc tin sulpho-selenide (CZTS x Se1-x ) alloy nanocrystals and the data acquired for the material characterization are presented. This data article is related to the research article doi: http://dx.doi.org/10.1016/j.solmat.2016.06.030 (Jadhav et al., 2016) [1]. FTIR data have been presented which helped in confirmation of adsorption of oleylamine on CZTS x Se1-x . Transmission electron microscopy (TEM), Field emission scanning electron microscopy (FESEM) and atomic force microscopy (AFM) data have been presented which have been used to reveal the morphological details of the nanocrystals. The Energy dispersive X-ray analysis (EDAX) based elemental mapping data has been presented to confirm the elemental composition of nanocrystals. Procedure for the preparation of CZTS x Se1-x based working electrode for the CV measurements have been given. The summary table for the optical, electrochemical band gaps, valance and conduction band edges as a function of composition are listed for the ready reference.",2016-07-19 +27222852,CO2 and O2 solubility and diffusivity data in food products stored in data warehouse structured by ontology.,"This data article contains values of oxygen and carbon dioxide solubility and diffusivity measured in various model and real food products. These data are stored in a public repository structured by ontology. These data can be retrieved through the @Web tool, a user-friendly interface to capitalise and query data. The @Web tool is accessible online at http://pfl.grignon.inra.fr/atWeb/.",2016-04-26 +24136511,"From proteomics to systems biology: MAPA, MASS WESTERN, PROMEX, and COVAIN as a user-oriented platform.","Genome sequencing and systems biology are revolutionizing life sciences. Proteomics emerged as a fundamental technique of this novel research area as it is the basis for gene function analysis and modeling of dynamic protein networks. Here a complete proteomics platform suited for functional genomics and systems biology is presented. The strategy includes MAPA (mass accuracy precursor alignment; http://www.univie.ac.at/mosys/software.html ) as a rapid exploratory analysis step; MASS WESTERN for targeted proteomics; COVAIN ( http://www.univie.ac.at/mosys/software.html ) for multivariate statistical analysis, data integration, and data mining; and PROMEX ( http://www.univie.ac.at/mosys/databases.html ) as a database module for proteogenomics and proteotypic peptides for targeted analysis. Moreover, the presented platform can also be utilized to integrate metabolomics and transcriptomics data for the analysis of metabolite-protein-transcript correlations and time course analysis using COVAIN. Examples for the integration of MAPA and MASS WESTERN data, proteogenomic and metabolic modeling approaches for functional genomics, phosphoproteomics by integration of MOAC (metal-oxide affinity chromatography) with MAPA, and the integration of metabolomics, transcriptomics, proteomics, and physiological data using this platform are presented. All software and step-by-step tutorials for data processing and data mining can be downloaded from http://www.univie.ac.at/mosys/software.html.",2014-01-01 +25166490,isoMETLIN: a database for isotope-based metabolomics.,"The METLIN metabolite database has become one of the most widely used resources in metabolomics for making metabolite identifications. However, METLIN is not designed to identify metabolites that have been isotopically labeled. As a result, unbiasedly tracking the transformation of labeled metabolites with isotope-based metabolomics is a challenge. Here, we introduce a new database, called isoMETLIN (http://isometlin.scripps.edu/), that has been developed specifically to identify metabolites incorporating isotopic labels. isoMETLIN enables users to search all computed isotopologues derived from METLIN on the basis of mass-to-charge values and specified isotopes of interest, such as (13)C or (15)N. Additionally, isoMETLIN contains experimental MS/MS data on hundreds of isotopomers. These data assist in localizing the position of isotopic labels within a metabolite. From these experimental MS/MS isotopomer spectra, precursor atoms can be mapped to fragments. The MS/MS spectra of additional isotopomers can then be computationally generated and included within isoMETLIN. Given that isobaric isotopomers cannot be separated chromatographically or by mass but are likely to occur simultaneously in a biological system, we have also implemented a spectral-mixing function in isoMETLIN. This functionality allows users to combine MS/MS spectra from various isotopomers in different ratios to obtain a theoretical MS/MS spectrum that matches the MS/MS spectrum from a biological sample. Thus, by searching MS and MS/MS experimental data, isoMETLIN facilitates the identification of isotopologues as well as isotopomers from biological samples and provides a platform to drive the next generation of isotope-based metabolomic studies.",2014-09-19 +29761932,Attentional selection and suppression in children and adults.,"The fundamental role of covert spatial attention is to enhance the processing of attended items while simultaneously ignoring irrelevant items. However, relatively little is known about how brain electrophysiological activities associated with target selection and distractor suppression are involved as they develop and become fully functional. The current study aimed to identify the neurophysiological bases of the development of covert spatial attention, focusing on electroencephalographic (EEG) markers of attentional selection (N2pc) and suppression (PD ). EEG data were collected from healthy young adults and typically developing children (9-15 years old) as they searched for a shape singleton target in either the absence or the presence of a salient-but-irrelevant color singleton distractor. The ERP results showed that a lateral shape target elicited a smaller N2pc in children compared with adults regardless of whether a distractor was present or not. Moreover, the target-elicited N2pc was always followed by a similar positivity in both age groups. Counterintuitively, a lateral salient-but-irrelevant distractor elicited a large PD in children with low behavioral accuracy, whereas high-accuracy children exhibited a small and ""adult-like"" PD . More importantly, we found no evidence for a correlation between the target-elicited N2pc and the distractor-elicited PD in either age group. Our results provide neurophysiological evidence for the developmental differences between target selection and distractor suppression. Compared with adults, 9-15-year-old children deploy insufficient attentional selection resources to targets but use ""adult-like"" or even more attentional suppression resources to resist irrelevant distractors. A video abstract of this article can be viewed at: https://www.youtube.com/watch?v=NhWapx0d75I.",2018-05-15 +,Litterfall Dynamics Under Different Tropical Forest Restoration Strategies in Costa Rica,"In degraded tropical pastures, active restoration strategies have the potential to facilitate forest regrowth at rates that are faster than natural recovery, enhancing litterfall, and nutrient inputs to the forest floor. We evaluated litterfall and nutrient dynamics under four treatments: plantation (entire area planted), tree islands (planting in six patches of three sizes), control (same age natural regeneration), and young secondary forest (7-9-yr-old natural regeneration). Treatments were established in plots of 50 × 50 m at six replicate sites in southern Costa Rica and the annual litterfall production was measured 5 yr after treatment establishment. Planted species included two native timber-producing hardwoods (Terminalia amazonia and Vochysia guatemalensis) interplanted with two N-fixing species (Inga edulis and Erythrina poeppigiana). Litter production was highest in secondary forests (7.3 Mg/ha/yr) and plantations (6.3), intermediate in islands (3.5), and lowest in controls (1.4). Secondary forests had higher input of all nutrients except N when compared with the plantation plots. Inga contributed 70 percent of leaffall in the plantations, demonstrating the influence that one species can have on litter quantity and quality. Although tree islands had lower litterfall rates, they were similar to plantations in inputs of Mg, K, P, Zn, and Mn. Tree islands increased litter production and nutrient inputs more quickly than natural regeneration. In addition to being less resource intensive than conventional plantations, this planting design promotes a more rapid increase in litter diversity and more spatial heterogeneity, which can accelerate the rate of nutrient cycling and facilitate forest recovery. Abstract in Spanish is available at http://www.blackwell-synergy.com/loi/btp.",2011-05-01 +23153189,A gene expression atlas of the domestic pig.,"

Background

This work describes the first genome-wide analysis of the transcriptional landscape of the pig. A new porcine Affymetrix expression array was designed in order to provide comprehensive coverage of the known pig transcriptome. The new array was used to generate a genome-wide expression atlas of pig tissues derived from 62 tissue/cell types. These data were subjected to network correlation analysis and clustering.

Results

The analysis presented here provides a detailed functional clustering of the pig transcriptome where transcripts are grouped according to their expression pattern, so one can infer the function of an uncharacterized gene from the company it keeps and the locations in which it is expressed. We describe the overall transcriptional signatures present in the tissue atlas, where possible assigning those signatures to specific cell populations or pathways. In particular, we discuss the expression signatures associated with the gastrointestinal tract, an organ that was sampled at 15 sites along its length and whose biology in the pig is similar to human. We identify sets of genes that define specialized cellular compartments and region-specific digestive functions. Finally, we performed a network analysis of the transcription factors expressed in the gastrointestinal tract and demonstrate how they sub-divide into functional groups that may control cellular gastrointestinal development.

Conclusions

As an important livestock animal with a physiology that is more similar than mouse to man, we provide a major new resource for understanding gene expression with respect to the known physiology of mammalian tissues and cells. The data and analyses are available on the websites http://biogps.org and http://www.macrophages.com/pig-atlas.",2012-11-15 +28775115,Association of Concentric Left Ventricular Hypertrophy With Subsequent Change in Left Ventricular End-Diastolic Volume: The Dallas Heart Study. ,"In the conventional paradigm of the progression of left ventricular hypertrophy, a thick-walled left ventricle (LV) ultimately transitions to a dilated cardiomyopathy. There are scant data in humans demonstrating whether this transition occurs commonly without an interval myocardial infarction. Participants (n=1282) from the Dallas Heart Study underwent serial cardiac magnetic resonance ≈7 years apart. Those with interval cardiovascular events and a dilated LV (increased LV end-diastolic volume [EDV] indexed to body surface area) at baseline were excluded. Multivariable linear regression models tested the association of concentric hypertrophy (increased LV mass and LV mass/volume0.67) with change in LVEDV. The study cohort had a median age of 44 years, 57% women, 43% black, and 11% (n=142) baseline concentric hypertrophy. The change in LVEDV in those with versus without concentric hypertrophy was 1 mL (-9 to 12) versus -2 mL (-11 to 7), respectively, P<0.01. In multivariable linear regression models, concentric hypertrophy was associated with larger follow-up LVEDV (P≤0.01). The progression to a dilated LV was uncommon (2%, n=25). In the absence of interval myocardial infarction, concentric hypertrophy was associated with a small, but significantly greater, increase in LVEDV after 7-year follow-up. However, the degree of LV enlargement was minimal, and few participants developed a dilated LV. These data suggest that if concentric hypertrophy does progress to a dilated cardiomyopathy, such a transition would occur over a much longer timeframe (eg, decades) and perhaps less common than previously thought. URL: http://www.clinicaltrials.gov. Unique identifier: NCT00344903.",2017-08-01 +28759896,An analysis of diagnoses that drive readmission: What can we learn from the hospitals in Southern New England with the highest and lowest readmission performance?,"Background: The Hospital Readmission Reduction Program was instituted by the Centers for Medicare & Medicaid Services in 2012 to incentivize hospitals to reduce readmissions.

Objective

To examine the most common diagnoses driving readmissions among fee-for-service Medicare beneficiaries in the hospitals with the highest and lowest readmission performance in Southern New England from 2014 to 2016.

Methods

This is a retrospective observational study using publicly available Hospital Compare data and Medicare Part A claims data. Hospitals were ranked based on risk-adjusted excess readmission ratios. Patient demographic and hospital characteristics were compared for the two cohorts using t-tests. The percentages of readmissions in each cohort attributable to the top three readmission diagnoses were examined.

Results

Highest-performing hospitals readmitted a significantly lower percentage of black patients (p=0.03), were less urban (p<0.01), and had higher Hospital Compare Star ratings (p=0.01). Lowest-performing hospitals readmitted higher percentages of patients for sepsis (9.4% [95%CI: 8.8%-10.0%] vs. 8.1% [95%CI: 7.4%-8.7%]) and complications of device, implant, or graft (3.2% [95%CI: 2.5%-3.9%] vs. 0.2% [95%CI: 0.1%-0.6%]), compared to highest-performing hospitals.

Conclusions

Ongoing efforts to improve care transitions may be strengthened by targeting early infection surveillance, promoting adherence to surgical treatment guidelines, and improving communication between hospitals and post-acute care facilities. [Full article available at http://rimed.org/rimedicaljournal-2017-08.asp].",2017-08-01 +28624766,DHEA protects mitochondria against dual modes of apoptosis and necroptosis in human granulosa HO23 cells.,"Because ovarian granulosa cells are essential for oocyte maturation and development, we validated human granulosa HO23 cells to evaluate the ability of the DHEA to prevent cell death after starvation. The present study was aimed to investigate whether DHEA could protect against starvation-induced apoptosis and necroptosis in human oocyte granulosa HO23 cells. The starvation was induced by treatment of serum-free (SF) medium for 4 h in vitro Starvation-induced mitochondrial depolarization, cytochrome c release and caspase-3 activation were largely prevented by DHEA in HO23 cells. We found that treatment with DHEA can restore starvation-induced reactive oxygen species (ROS) generation and mitochondrial membrane potential imbalance. In addition, treatment of DHEA prevents cell death via upregulation of cytochrome c and downregulation of BAX in mitochondria. Most importantly, DHEA is ameliorated to mitochondrial function mediated through the decrease in mitochondrial ROS, maintained mitochondrial morphology, and enhancing the ability of cell proliferation and ROS scavenging. Our present data strongly indicate that DHEA reduces programmed cell death (apoptosis and necroptosis) in granulosa HO23 cells through multiple interactions with the mitochondrion-dependent programmed cell death pathway. Taken together, our data suggest that the presence of DHEA could be beneficial to protect human oocyte granulosa HO23 cells under in vitro culture conditions during various assisted reproductive technology (ART) programs.Free Chinese abstract: A Chinese translation of this abstract is freely available at http://www.reproduction-online.org/content/154/2/101/suppl/DC1.",2017-08-01 +26305468,A Catalog of Proteins Expressed in the AG Secreted Fluid during the Mature Phase of the Chinese Mitten Crabs (Eriocheir sinensis).,"The accessory gland (AG) is an important component of the male reproductive system of arthropods, its secretions enhance fertility, some AG proteins bind to the spermatozoa and affect its function and properties. Here we report the first comprehensive catalog of the AG secreted fluid during the mature phase of the Chinese mitten crab (Eriocheir sinensis). AG proteins were separated by one-dimensional gel electrophoresis and analyzed by reverse phase high-performance liquid chromatography coupled with tandem mass spectrometry (HPLC-MS/MS). Altogether, the mass spectra of 1173 peptides were detected (1067 without decoy and contaminants) which allowed for the identification of 486 different proteins annotated upon the NCBI database (http://www.ncbi.nlm.nih.gov/) and our transcritptome dataset. The mass spectrometry proteomics data have been deposited at the ProteomeXchange with identifier PXD000700. An extensive description of the AG proteome will help provide the basis for a better understanding of a number of reproductive mechanisms, including potentially spermatophore breakdown, dynamic functional and morphological changes in sperm cells and sperm acrosin enzyme vitality. Thus, the comprehensive catalog of proteins presented here can serve as a valuable reference for future studies of sperm maturation and regulatory mechanisms involved in crustacean reproduction.",2015-08-25 +22125387,DiseaseComps: a metric that discovers similar diseases based upon common toxicogenomic profiles at CTD.,"

Unlabelled

The Comparative Toxicogenomics Database (CTD) is a free resource that describes chemical-gene-disease networks to help understand the effects of environmental exposures on human health. The database contains more than 13,500 chemical-disease and 14,200 gene-disease interactions. In CTD, chemicals and genes are associated with a disease via two types of relationships: as a biomarker or molecular mechanism for the disease (M-type) or as a real or putative therapy for the disease (T-type). We leveraged these curated datasets to compute similarity indices that can be used to produce lists of comparable diseases (""DiseaseComps"") based upon shared toxicogenomic profiles. This new metric now classifies diseases with common molecular characteristics, instead of the traditional approach of using histology or tissue of origin to define the disorder. In the dawning era of ""personalized medicine"", this feature provides a new way to view and describe diseases and will help develop testable hypotheses about chemical-gene-disease networks.

Availability

The database is available for free at http://ctd.mdibl.org/",2011-10-14 +28234475,Activity Landscape Plotter: A Web-Based Application for the Analysis of Structure-Activity Relationships.,"Activity landscape modeling is a powerful method for the quantitative analysis of structure-activity relationships. This cheminformatics area is in continuous growth, and several quantitative and visual approaches are constantly being developed. However, these approaches often fall into disuse due to their limited access. Herein, we present Activity Landscape Plotter as the first freely available web-based tool to automatically analyze structure-activity relationships of compound data sets. Based on the concept of activity landscape modeling, the online service performs pairwise structure and activity relationships from an input data set supplied by the user. For visual analysis, Activity Landscape Plotter generates Structure-Activity Similarity and Dual-Activity Difference maps. The user can interactively navigate through the maps and export all the pairwise structure-activity information as comma delimited files. Activity Landscape Plotter is freely accessible at https://unam-shiny-difacquim.shinyapps.io/ActLSmaps /.",2017-03-02 +28094294,Version 1.1 of the international spinal cord injury skin and thermoregulation function basic data set.,"

Objective

To describe the changes made to the international spinal cord injury (SCI) skin and thermoregulation function basic data set in version 1.1.

Setting

International.

Methods

An international working group reviewed suggested changes to the international SCI skin and thermoregulation function basic data set version 1.0. These changes were discussed and the agreed changes were made. Subsequently, the recommended adjustments were circulated for review to the International Spinal Cord Society (ISCoS) Executive and Scientific Committees, the American Spinal Injury Association (ASIA) Board, around 40 national and international societies, and to interested individuals who had signed up wishing to have the opportunity to review. In addition, the suggested changes were displayed at the ISCoS and ASIA websites for at least a month for possible comments.

Results

The recommendation 'largest diameter, including undermining' is changed to: 'Largest undermining', and a description of how to measure this is inserted. The 'smallest opening diameter' is changed to: 'Width' as the maximum dimension perpendicular to the length axis. In the literature, there is a tendency to replace 'grades' or 'stages' with 'categories'; therefore, the word 'category' is used instead of 'grade' or 'stage'.

Conclusions

Impracticable measurements have been adjusted and new terminology adopted. All are to be found on ISCoS website: http://www.iscos.org.uk/international-sci-skin-and-thermoregulation-function-data-sets.",2017-01-17 +28937959,Prenatal Fluoride Exposure and Cognitive Outcomes in Children at 4 and 6-12 Years of Age in Mexico.,"

Background

Some evidence suggests that fluoride may be neurotoxic to children. Few of the epidemiologic studies have been longitudinal, had individual measures of fluoride exposure, addressed the impact of prenatal exposures or involved more than 100 participants.

Objective

Our aim was to estimate the association of prenatal exposure to fluoride with offspring neurocognitive development.

Methods

We studied participants from the Early Life Exposures in Mexico to Environmental Toxicants (ELEMENT) project. An ion-selective electrode technique was used to measure fluoride in archived urine samples taken from mothers during pregnancy and from their children when 6-12 y old, adjusted for urinary creatinine and specific gravity, respectively. Child intelligence was measured by the General Cognitive Index (GCI) of the McCarthy Scales of Children's Abilities at age 4 and full scale intelligence quotient (IQ) from the Wechsler Abbreviated Scale of Intelligence (WASI) at age 6-12.

Results

We had complete data on 299 mother-child pairs, of whom 287 and 211 had data for the GCI and IQ analyses, respectively. Mean (SD) values for urinary fluoride in all of the mothers (n=299) and children with available urine samples (n=211) were 0.90 (0.35) mg/L and 0.82 (0.38) mg/L, respectively. In multivariate models we found that an increase in maternal urine fluoride of 0.5mg/L (approximately the IQR) predicted 3.15 (95% CI: -5.42, -0.87) and 2.50 (95% CI -4.12, -0.59) lower offspring GCI and IQ scores, respectively.

Conclusions

In this study, higher prenatal fluoride exposure, in the general range of exposures reported for other general population samples of pregnant women and nonpregnant adults, was associated with lower scores on tests of cognitive function in the offspring at age 4 and 6-12 y. https://doi.org/10.1289/EHP655.",2017-09-19 +26195308,CycloBranch: De Novo Sequencing of Nonribosomal Peptides from Accurate Product Ion Mass Spectra.,"Nonribosomal peptides have a wide range of biological and medical applications. Their identification by tandem mass spectrometry remains a challenging task. A new open-source de novo peptide identification engine CycloBranch was developed and successfully applied in identification or detailed characterization of 11 linear, cyclic, branched, and branch-cyclic peptides. CycloBranch is based on annotated building block databases the size of which is defined by the user according to ribosomal or nonribosomal peptide origin. The current number of involved nonisobaric and isobaric building blocks is 287 and 521, respectively. Contrary to all other peptide sequencing tools utilizing either peptide libraries or peptide fragment libraries, CycloBranch represents a true de novo sequencing engine developed for accurate mass spectrometric data. It is a stand-alone and cross-platform application with a graphical and user-friendly interface; it supports mzML, mzXML, mgf, txt, and baf file formats and can be run in parallel on multiple threads. It can be downloaded for free from http://ms.biomed.cas.cz/cyclobranch/ , where the User's manual and video tutorials can be found.",2015-07-21 +27312412,RADIS: analysis of RAD-seq data for interspecific phylogeny.,"

Unlabelled

In an attempt to make the processing of RAD-seq data easier and allow rapid and automated exploration of parameters/data for phylogenetic inference, we introduce the perl pipeline RADIS Users of RADIS can let their raw Illumina data be processed up to phylogenetic tree inference, or stop (and restart) the process at some point. Different values for key parameters can be explored in a single analysis (e.g. loci building, sample/loci selection), making possible a thorough exploration of data. RADIS relies on Stacks for demultiplexing of data, removing PCR duplicates and building individual and catalog loci. Scripts have been specifically written for trimming of reads and loci/sample selection. Finally, RAxML is used for phylogenetic inferences, though other software may be utilized.

Availability and implementation

RADIS is written in perl, designed to run on Linux and Unix platforms. RADIS and its manual are freely available from http://www1.montpellier.inra.fr/CBGP/software/RADIS/.

Contact

astrid.cruaud@supagro.inra.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-16 +25925569,NGL Viewer: a web application for molecular visualization.,"The NGL Viewer (http://proteinformatics.charite.de/ngl) is a web application for the visualization of macromolecular structures. By fully adopting capabilities of modern web browsers, such as WebGL, for molecular graphics, the viewer can interactively display large molecular complexes and is also unaffected by the retirement of third-party plug-ins like Flash and Java Applets. Generally, the web application offers comprehensive molecular visualization through a graphical user interface so that life scientists can easily access and profit from available structural data. It supports common structural file-formats (e.g. PDB, mmCIF) and a variety of molecular representations (e.g. 'cartoon, spacefill, licorice'). Moreover, the viewer can be embedded in other web sites to provide specialized visualizations of entries in structural databases or results of structure-related calculations.",2015-04-29 +28858811,Network Clustering Analysis Using Mixture Exponential-Family Random Graph Models and Its Application in Genetic Interaction Data.,"

Motivation

Epistatic miniarrary profile (EMAP) studies have enabled the mapping of large-scale genetic interaction networks and generated large amounts of data in model organisms. It provides an incredible set of molecular tools and advanced technologies that should be efficiently understanding the relationship between the genotypes and phenotypes of individuals. However, the network information gained from EMAP cannot be fully exploited using the traditional statistical network models. Because the genetic network is always heterogeneous, for example, the network structure features for one subset of nodes are different from those of the left nodes. Exponential-family random graph models (ERGMs) are a family of statistical models, which provide a principled and flexible way to describe the structural features (e.g., the density, centrality, and assortativity) of an observed network. However, the single ERGM is not enough to capture this heterogeneity of networks. In this paper, we consider a mixture ERGM (MixtureEGRM) networks, which model a network with several communities, where each community is described by a single EGRM.

Results

EM algorithm is a classical method to solve the mixture problem, however, it will be very slow when the data size is huge in the numerous applications. We adopt an efficient novel online graph clustering algorithm to classify the graph nodes and estimate the ERGM parameters for the MixtureERGM. In comparison studies, the MixtureERGM outperforms the role analysis for the network cluster in which the mixture of exponential-family random graph model is developed for many ego-network according to their roles. One genetic interaction network of yeast and two real social networks (provided as supplemental materials, which can be found on the Computer Society Digital Library at http://doi.ieeecomputersociety.org/10.1109/TCBB.2017.2743711) show the wide potential application of the MixtureERGM.",2017-08-24 +29876371,"Data on genome sequencing, analysis and annotation of a pathogenic Bacillus cereus 062011msu.","Bacillus species 062011 msu is a harmful pathogenic strain responsible for causing abscessation in sheep and goat population studied by Mariappan et al. (2012) [1]. The organism specifically targets the female sheep and goat population and results in the reduction of milk and meat production. In the present study, we have performed the whole genome sequencing of the pathogenic isolate using the Ion Torrent sequencing platform and generated 458,944 raw reads with an average length of 198.2 bp. The genome sequence was assembled, annotated and analysed for the genetic islands, metabolic pathways, orthologous groups, virulence factors and antibiotic resistance genes associated with the pathogen. Simultaneously the 16S rRNA sequencing study and genome sequence comparison data confirmed that the strain belongs to the species Bacillus cereus and exhibits 99% sequence homo;logy with the genomes of B. cereus ATCC 10987 and B. cereus FRI-35. Hence, we have renamed the organism as Bacillus cereus 062011msu. The Whole Genome Shotgun (WGS) project has been deposited at DDBJ/ENA/GenBank under the accession NTMF00000000 (https://www.ncbi.nlm.nih.gov/bioproject/PRJNA404036(SAMN07629099)).",2018-01-03 +28460090,Identification of active miRNA promoters from nuclear run-on RNA sequencing.,"The genome-wide identification of microRNA transcription start sites (miRNA TSSs) is essential for understanding how miRNAs are regulated in development and disease. In this study, we developed mirSTP (mirna transcription Start sites Tracking Program), a probabilistic model for identifying active miRNA TSSs from nascent transcriptomes generated by global run-on sequencing (GRO-seq) and precision run-on sequencing (PRO-seq). MirSTP takes advantage of characteristic bidirectional transcription signatures at active TSSs in GRO/PRO-seq data, and provides accurate TSS prediction for human intergenic miRNAs at a high resolution. MirSTP performed better than existing generalized and experiment specific methods, in terms of the enrichment of various promoter-associated marks. MirSTP analysis of 27 human cell lines in 183 GRO-seq and 28 PRO-seq experiments identified TSSs for 480 intergenic miRNAs, indicating a wide usage of alternative TSSs. By integrating predicted miRNA TSSs with matched ENCODE transcription factor (TF) ChIP-seq data, we connected miRNAs into the transcriptional circuitry, which provides a valuable source for understanding the complex interplay between TF and miRNA. With mirSTP, we not only predicted TSSs for 72 miRNAs, but also identified 12 primary miRNAs with significant RNA polymerase pausing alterations after JQ1 treatment; each miRNA was further validated through BRD4 binding to its predicted promoter. MirSTP is available at http://bioinfo.vanderbilt.edu/mirSTP/.",2017-07-01 +26217722,"Data for the characterization of the HSP70 family during osmotic stress in banana, a non-model crop.","Here, we present the data from an in-depth analysis of the HSP70 family in the non-model banana during osmotic stress [1]. First, a manual curation of HSP70 sequences from the banana genome was performed and updated on the Musa hub http://banana-genome.cirad.fr/. These curated protein sequences were then introduced into our in-house Mascot database for an in-depth look at the HSP70 protein profiles in banana meristem cultures and roots during osmotic stress. A 2D-DIGE LC MS/MS approach was chosen to identify and quantify the different paralogs and allelic variants in the HSP70 spots.",2015-02-13 +27987172,Exploring Plant Co-Expression and Gene-Gene Interactions with CORNET 3.0.,"Selecting and filtering a reference expression and interaction dataset when studying specific pathways and regulatory interactions can be a very time-consuming and error-prone task. In order to reduce the duplicated efforts required to amass such datasets, we have created the CORNET (CORrelation NETworks) platform which allows for easy access to a wide variety of data types: coexpression data, protein-protein interactions, regulatory interactions, and functional annotations. The CORNET platform outputs its results in either text format or through the Cytoscape framework, which is automatically launched by the CORNET website.CORNET 3.0 is the third iteration of the web platform designed for the user exploration of the coexpression space of plant genomes, with a focus on the model species Arabidopsis thaliana. Here we describe the platform: the tools, data, and best practices when using the platform. We indicate how the platform can be used to infer networks from a set of input genes, such as upregulated genes from an expression experiment. By exploring the network, new target and regulator genes can be discovered, allowing for follow-up experiments and more in-depth study. We also indicate how to avoid common pitfalls when evaluating the networks and how to avoid over interpretation of the results.All CORNET versions are available at http://bioinformatics.psb.ugent.be/cornet/ .",2017-01-01 +25098325,MediaDB: a database of microbial growth conditions in defined media.,"Isolating pure microbial cultures and cultivating them in the laboratory on defined media is used to more fully characterize the metabolism and physiology of organisms. However, identifying an appropriate growth medium for a novel isolate remains a challenging task. Even organisms with sequenced and annotated genomes can be difficult to grow, despite our ability to build genome-scale metabolic networks that connect genomic data with metabolic function. The scientific literature is scattered with information about defined growth media used successfully for cultivating a wide variety of organisms, but to date there exists no centralized repository to inform efforts to cultivate less characterized organisms by bridging the gap between genomic data and compound composition for growth media. Here we present MediaDB, a manually curated database of defined media that have been used for cultivating organisms with sequenced genomes, with an emphasis on organisms with metabolic network models. The database is accessible online, can be queried by keyword searches or downloaded in its entirety, and can generate exportable individual media formulation files. The data assembled in MediaDB facilitate comparative studies of organism growth media, serve as a starting point for formulating novel growth media, and contribute to formulating media for in silico investigation of metabolic networks. MediaDB is freely available for public use at https://mediadb.systemsbiology.net.",2014-08-06 +28287984,Learning a No-Reference Quality Assessment Model of Enhanced Images With Big Data.,"In this paper, we investigate into the problem of image quality assessment (IQA) and enhancement via machine learning. This issue has long attracted a wide range of attention in computational intelligence and image processing communities, since, for many practical applications, e.g., object detection and recognition, raw images are usually needed to be appropriately enhanced to raise the visual quality (e.g., visibility and contrast). In fact, proper enhancement can noticeably improve the quality of input images, even better than originally captured images, which are generally thought to be of the best quality. In this paper, we present two most important contributions. The first contribution is to develop a new no-reference (NR) IQA model. Given an image, our quality measure first extracts 17 features through analysis of contrast, sharpness, brightness and more, and then yields a measure of visual quality using a regression module, which is learned with big-data training samples that are much bigger than the size of relevant image data sets. The results of experiments on nine data sets validate the superiority and efficiency of our blind metric compared with typical state-of-the-art full-reference, reduced-reference and NA IQA methods. The second contribution is that a robust image enhancement framework is established based on quality optimization. For an input image, by the guidance of the proposed NR-IQA measure, we conduct histogram modification to successively rectify image brightness and contrast to a proper level. Thorough tests demonstrate that our framework can well enhance natural images, low-contrast images, low-light images, and dehazed images. The source code will be released at https://sites.google.com/site/guke198701/publications.",2017-03-06 +28472498,BusyBee Web: metagenomic data analysis by bootstrapped supervised binning and annotation.,"Metagenomics-based studies of mixed microbial communities are impacting biotechnology, life sciences and medicine. Computational binning of metagenomic data is a powerful approach for the culture-independent recovery of population-resolved genomic sequences, i.e. from individual or closely related, constituent microorganisms. Existing binning solutions often require a priori characterized reference genomes and/or dedicated compute resources. Extending currently available reference-independent binning tools, we developed the BusyBee Web server for the automated deconvolution of metagenomic data into population-level genomic bins using assembled contigs (Illumina) or long reads (Pacific Biosciences, Oxford Nanopore Technologies). A reversible compression step as well as bootstrapped supervised binning enable quick turnaround times. The binning results are represented in interactive 2D scatterplots. Moreover, bin quality estimates, taxonomic annotations and annotations of antibiotic resistance genes are computed and visualized. Ground truth-based benchmarks of BusyBee Web demonstrate comparably high performance to state-of-the-art binning solutions for assembled contigs and markedly improved performance for long reads (median F1 scores: 70.02-95.21%). Furthermore, the applicability to real-world metagenomic datasets is shown. In conclusion, our reference-independent approach automatically bins assembled contigs or long reads, exhibits high sensitivity and precision, enables intuitive inspection of the results, and only requires FASTA-formatted input. The web-based application is freely accessible at: https://ccb-microbe.cs.uni-saarland.de/busybee.",2017-07-01 +27663493,Biospark: scalable analysis of large numerical datasets from biological simulations and experiments using Hadoop and Spark.,"Data-parallel programming techniques can dramatically decrease the time needed to analyze large datasets. While these methods have provided significant improvements for sequencing-based analyses, other areas of biological informatics have not yet adopted them. Here, we introduce Biospark, a new framework for performing data-parallel analysis on large numerical datasets. Biospark builds upon the open source Hadoop and Spark projects, bringing domain-specific features for biology.

Availability and implementation

Source code is licensed under the Apache 2.0 open source license and is available at the project website: https://www.assembla.com/spaces/roberts-lab-public/wiki/Biospark CONTACT: eroberts@jhu.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-22 +27629153,Alignment of time-resolved data from high throughput experiments.,"To better understand the dynamics of the underlying processes in cells, it is necessary to take measurements over a time course. Modern high-throughput technologies are often used for this purpose to measure the behavior of cell products like metabolites, peptides, proteins, [Formula: see text]RNA or mRNA at different points in time. Compared to classical time series, the number of time points is usually very limited and the measurements are taken at irregular time intervals. The main reasons for this are the costs of the experiments and the fact that the dynamic behavior usually shows a strong reaction and fast changes shortly after a stimulus and then slowly converges to a certain stable state. Another reason might simply be missing values. It is common to repeat the experiments and to have replicates in order to carry out a more reliable analysis. The ideal assumptions that the initial stimulus really started exactly at the same time for all replicates and that the replicates are perfectly synchronized are seldom satisfied. Therefore, there is a need to first adjust or align the time-resolved data before further analysis is carried out. Dynamic time warping (DTW) is considered as one of the common alignment techniques for time series data with equidistant time points. In this paper, we modified the DTW algorithm so that it can align sequences with measurements at different, non-equidistant time points with large gaps in between. This type of data is usually known as time-resolved data characterized by irregular time intervals between measurements as well as non-identical time points for different replicates. This new algorithm can be easily used to align time-resolved data from high-throughput experiments and to come across existing problems such as time scarcity and existing noise in the measurements. We propose a modified method of DTW to adapt requirements imposed by time-resolved data by use of monotone cubic interpolation splines. Our presented approach provides a nonlinear alignment of two sequences that neither need to have equi-distant time points nor measurements at identical time points. The proposed method is evaluated with artificial as well as real data. The software is available as an R package tra (Time-Resolved data Alignment) which is freely available at: http://public.ostfalia.de/klawonn/tra.zip .",2016-08-03 +30338045,"23rd International Colloquium on Animal Cytogenetics and Genomics (23 ICACG) June 9-12, 2018, Saint-Petersburg, Russia.","In memory of Ingemar Gustavsson 23rd International Colloquium on Animal Cytogenetics and Genomics (23 ICACG) took place in June 9-12, 2018 in Saint-Petersburg, Russia. Organized biennially, the Colloquium runs from 1970. From its very start this meeting is associated with the name of Ingemar Gustavsson to whom we dedicated the Colloquium 2018. The long and productive career of Ingemar Gustavsson had focused on chromosomes and their fundamental role in animal physiology, fertility, health and production in the context of agriculture and veterinary medicine. His meticulous analysis of breeding data performed back in 1964-69 resulted in the unequivocal identification of an association between heterozygosity for the 1/29 translocation in Swedish cattle and reduction in the fertility of the breed. Eventually, the argument in favor of selective elimination of bulls carrying the translocation from the breeding programs prevailed and the field of modern veterinary cytogenetics was established. Participants from fourteen different countries attended the 23 ICACG in Russia, the country having long lasting traditions in cytogenetics and the Scientific schools of N.K. Koltzov, S.S. Chetverikov and A.S. Serebrovsky, geneticists who made important conceptual contributions to studies of chromosomes and genes, population genetics and evolutionary theory as early as in the beginning of the XX-th century. All the abstracts received were subdivided between plenary and seven scientific sessions covering the issues in evolutionary and comparative cytogenetics, cytogenetics and genomes of domestic animals, meiosis studies, particular chromosome analyses, clinical cytogenetics, karyotypes and genomes of vertebrate and invertebrate animals, chromatin studies. In the abstract text below each presentation is marked with a capital letter: ""L"" stands for lectures, ""O"" for oral presentations and ""P"" for poster presentations. We gratefully acknowledge the support from the Saint-Petersburg Association of Scientists and Scholars (SPbSU), Veterinary Genetics Center ZOOGEN, Russian Foundation for Basic Research (RFBR), VEUK, Helicon, Axioma BIO, BioVitrum, Sartorius, DIA-M companies. The current collected abstracts comprise written contributions of the presentations during the 23 ICACG and were edited by Svetlana Galkina and Maria Vishnevskaya. The next Colloquium - 24 ICACG - will be held at the University of Kent in Canterbury (UK) in 2020. Please, cite abstracts as follows: Gall JG (2018) Giant chromosomes and deep sequences: what the amphibian egg tells us about transcription. In: Galkina SA, Vishnevskaya MS, Mikhailova EI (Eds) 23rd Inernational Colloquium on Animal Cytogenetics and Genomics (23rdICACG), June 9-12, 2018, St Petersburg, Russia. Comparative Cytogenetics 12(3): p-p. https://doi.org/10.3897/CompCytogen.v12i3.27748.",2018-08-16 +24667251,"A framework for organizing cancer-related variations from existing databases, publications and NGS data using a High-performance Integrated Virtual Environment (HIVE).","Years of sequence feature curation by UniProtKB/Swiss-Prot, PIR-PSD, NCBI-CDD, RefSeq and other database biocurators has led to a rich repository of information on functional sites of genes and proteins. This information along with variation-related annotation can be used to scan human short sequence reads from next-generation sequencing (NGS) pipelines for presence of non-synonymous single-nucleotide variations (nsSNVs) that affect functional sites. This and similar workflows are becoming more important because thousands of NGS data sets are being made available through projects such as The Cancer Genome Atlas (TCGA), and researchers want to evaluate their biomarkers in genomic data. BioMuta, an integrated sequence feature database, provides a framework for automated and manual curation and integration of cancer-related sequence features so that they can be used in NGS analysis pipelines. Sequence feature information in BioMuta is collected from the Catalogue of Somatic Mutations in Cancer (COSMIC), ClinVar, UniProtKB and through biocuration of information available from publications. Additionally, nsSNVs identified through automated analysis of NGS data from TCGA are also included in the database. Because of the petabytes of data and information present in NGS primary repositories, a platform HIVE (High-performance Integrated Virtual Environment) for storing, analyzing, computing and curating NGS data and associated metadata has been developed. Using HIVE, 31 979 nsSNVs were identified in TCGA-derived NGS data from breast cancer patients. All variations identified through this process are stored in a Curated Short Read archive, and the nsSNVs from the tumor samples are included in BioMuta. Currently, BioMuta has 26 cancer types with 13 896 small-scale and 308 986 large-scale study-derived variations. Integration of variation data allows identifications of novel or common nsSNVs that can be prioritized in validation studies. Database URL: BioMuta: http://hive.biochemistry.gwu.edu/tools/biomuta/index.php; CSR: http://hive.biochemistry.gwu.edu/dna.cgi?cmd=csr; HIVE: http://hive.biochemistry.gwu.edu.",2014-03-25 +29091816,Chemical analysis of the Alphaproteobacterium strain MOLA1416 associated with the marine lichen Lichina pygmaea.,"Alphaproteobacterium strain MOLA1416, related to Mycoplana ramosa DSM 7292 and Chelativorans intermedius CC-MHSW-5 (93.6% 16S rRNA sequence identity) was isolated from the marine lichen, Lichina pygmaea and its chemical composition was characterized by a metabolomic network analysis using LC-MS/MS data. Twenty-five putative different compounds were revealed using a dereplication workflow based on MS/MS signatures available through GNPS (https://gnps.ucsd.edu/). In total, ten chemical families were highlighted including isocoumarins, macrolactones, erythrinan alkaloids, prodiginines, isoflavones, cyclohexane-diones, sterols, diketopiperazines, amino-acids and most likely glucocorticoids. Among those compounds, two known metabolites (13 and 26) were isolated and structurally identified and metabolite 26 showed a high cytotoxic activity against B16 melanoma cell lines with an IC50 0.6 ± 0.07 μg/mL.",2017-11-05 +22701460,The seed proteome web portal.,"The Seed Proteome Web Portal (SPWP; http://www.seed-proteome.com/) gives access to information both on quantitative seed proteomic data and on seed-related protocols. Firstly, the SPWP provides access to the 475 different Arabidopsis seed proteins annotated from two dimensional electrophoresis (2DE) maps. Quantitative data are available for each protein according to their accumulation profile during the germination process. These proteins can be retrieved either in list format or directly on scanned 2DE maps. These proteomic data reveal that 40% of seed proteins maintain a stable abundance over germination, up to radicle protrusion. During sensu stricto germination (24 h upon imbibition) about 50% of the proteins display quantitative variations, exhibiting an increased abundance (35%) or a decreasing abundance (15%). Moreover, during radicle protrusion (24-48 h upon imbibition), 41% proteins display quantitative variations with an increased (23%) or a decreasing abundance (18%). In addition, an analysis of the seed proteome revealed the importance of protein post-translational modifications as demonstrated by the poor correlation (r(2) = 0.29) between the theoretical (predicted from Arabidopsis genome) and the observed protein isoelectric points. Secondly, the SPWP is a relevant technical resource for protocols specifically dedicated to Arabidopsis seed proteome studies. Concerning 2D electrophoresis, the user can find efficient procedures for sample preparation, electrophoresis coupled with gel analysis, and protein identification by mass spectrometry, which we have routinely used during the last 12 years. Particular applications such as the detection of oxidized proteins or de novo synthesized proteins radiolabeled by [(35)S]-methionine are also given in great details. Future developments of this portal will include proteomic data from studies such as dormancy release and protein turnover through de novo protein synthesis analyses during germination.",2012-06-11 +28738060,Community detection in sequence similarity networks based on attribute clustering.,"Networks are powerful tools for the presentation and analysis of interactions in multi-component systems. A commonly studied mesoscopic feature of networks is their community structure, which arises from grouping together similar nodes into one community and dissimilar nodes into separate communities. Here, the community structure of protein sequence similarity networks is determined with a new method: Attribute Clustering Dependent Communities (ACDC). Sequence similarity has hitherto typically been quantified by the alignment score or its expectation value. However, pair alignments with the same score or expectation value cannot thus be differentiated. To overcome this deficiency, the method constructs, for pair alignments, an extended alignment metric, the link attribute vector, which includes the score and other alignment characteristics. Rescaling components of the attribute vectors qualitatively identifies a systematic variation of sequence similarity within protein superfamilies. The problem of community detection is then mapped to clustering the link attribute vectors, selection of an optimal subset of links and community structure refinement based on the partition density of the network. ACDC-predicted communities are found to be in good agreement with gold standard sequence databases for which the ""ground truth"" community structures (or families) are known. ACDC is therefore a community detection method for sequence similarity networks based entirely on pair similarity information. A serial implementation of ACDC is available from https://cmb.ornl.gov/resources/developments.",2017-07-24 +28088185,iGC-an integrated analysis package of gene expression and copy number alteration.,"

Background

With the advancement in high-throughput technologies, researchers can simultaneously investigate gene expression and copy number alteration (CNA) data from individual patients at a lower cost. Traditional analysis methods analyze each type of data individually and integrate their results using Venn diagrams. Challenges arise, however, when the results are irreproducible and inconsistent across multiple platforms. To address these issues, one possible approach is to concurrently analyze both gene expression profiling and CNAs in the same individual.

Results

We have developed an open-source R/Bioconductor package (iGC). Multiple input formats are supported and users can define their own criteria for identifying differentially expressed genes driven by CNAs. The analysis of two real microarray datasets demonstrated that the CNA-driven genes identified by the iGC package showed significantly higher Pearson correlation coefficients with their gene expression levels and copy numbers than those genes located in a genomic region with CNA. Compared with the Venn diagram approach, the iGC package showed better performance.

Conclusion

The iGC package is effective and useful for identifying CNA-driven genes. By simultaneously considering both comparative genomic and transcriptomic data, it can provide better understanding of biological and medical questions. The iGC package's source code and manual are freely available at https://www.bioconductor.org/packages/release/bioc/html/iGC.html .",2017-01-14 +24574116,MGDB: crossing the marker genes of a user microarray with a database of public-microarrays marker genes.,"

Summary

The microarrays performed by scientific teams grow exponentially. These microarray data could be useful for researchers around the world, but unfortunately they are underused. To fully exploit these data, it is necessary (i) to extract these data from a repository of the high-throughput gene expression data like Gene Expression Omnibus (GEO) and (ii) to make the data from different microarrays comparable with tools easy to use for scientists. We have developed these two solutions in our server, implementing a database of microarray marker genes (Marker Genes Data Base). This database contains the marker genes of all GEO microarray datasets and it is updated monthly with the new microarrays from GEO. Thus, researchers can see whether the marker genes of their microarray are marker genes in other microarrays in the database, expanding the analysis of their microarray to the rest of the public microarrays. This solution helps not only to corroborate the conclusions regarding a researcher's microarray but also to identify the phenotype of different subsets of individuals under investigation, to frame the results with microarray experiments from other species, pathologies or tissues, to search for drugs that promote the transition between the studied phenotypes, to detect undesirable side effects of the treatment applied, etc. Thus, the researcher can quickly add relevant information to his/her studies from all of the previous analyses performed in other studies as long as they have been deposited in public repositories.

Availability

Marker-gene database tool: http://ibb.uab.es/mgdb",2014-02-25 +24914232,Finding abbreviations in biomedical literature: three BioC-compatible modules and four BioC-formatted corpora. ,"BioC is a recently created XML format to share text data and annotations, and an accompanying input/output library to promote interoperability of data and tools for natural language processing of biomedical text. This article reports the use of BioC to address a common challenge in processing biomedical text information-that of frequent entity name abbreviation. We selected three different abbreviation definition identification modules, and used the publicly available BioC code to convert these independent modules into BioC-compatible components that interact seamlessly with BioC-formatted data, and other BioC-compatible modules. In addition, we consider four manually annotated corpora of abbreviations in biomedical text: the Ab3P corpus of 1250 PubMed abstracts, the BIOADI corpus of 1201 PubMed abstracts, the old MEDSTRACT corpus of 199 PubMed(®) citations and the Schwartz and Hearst corpus of 1000 PubMed abstracts. Annotations in these corpora have been re-evaluated by four annotators and their consistency and quality levels have been improved. We converted them to BioC-format and described the representation of the annotations. These corpora are used to measure the three abbreviation-finding algorithms and the results are given. The BioC-compatible modules, when compared with their original form, have no difference in their efficiency, running time or any other comparable aspects. They can be conveniently used as a common pre-processing step for larger multi-layered text-mining endeavors. Database URL: Code and data are available for download at the BioC site: http://bioc.sourceforge.net.",2014-06-09 +29438560,HIITE: HIV-1 incidence and infection time estimator.,"

Motivation

Around 2.1 million new HIV-1 infections were reported in 2015, alerting that the HIV-1 epidemic remains a significant global health challenge. Precise incidence assessment strengthens epidemic monitoring efforts and guides strategy optimization for prevention programs. Estimating the onset time of HIV-1 infection can facilitate optimal clinical management and identify key populations largely responsible for epidemic spread and thereby infer HIV-1 transmission chains. Our goal is to develop a genomic assay estimating the incidence and infection time in a single cross-sectional survey setting.

Results

We created a web-based platform, HIV-1 incidence and infection time estimator (HIITE), which processes envelope gene sequences using hierarchical clustering algorithms and informs the stage of infection, along with time since infection for incident cases. HIITE's performance was evaluated using 585 incident and 305 chronic specimens' envelope gene sequences collected from global cohorts including HIV-1 vaccine trial participants. HIITE precisely identified chronically infected individuals as being chronic with an error less than 1% and correctly classified 94% of recently infected individuals as being incident. Using a mixed-effect model, an incident specimen's time since infection was estimated from its single lineage diversity, showing 14% prediction error for time since infection. HIITE is the first algorithm to inform two key metrics from a single time point sequence sample. HIITE has the capacity for assessing not only population-level epidemic spread but also individual-level transmission events from a single survey, advancing HIV prevention and intervention programs.

Availability and implementation

Web-based HIITE and source code of HIITE are available at http://www.hayounlee.org/software.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-06-01 +28079128,Parallel-META 3: Comprehensive taxonomical and functional analysis platform for efficient comparison of microbial communities.,"The number of metagenomes is increasing rapidly. However, current methods for metagenomic analysis are limited by their capability for in-depth data mining among a large number of microbiome each of which carries a complex community structure. Moreover, the complexity of configuring and operating computational pipeline also hinders efficient data processing for the end users. In this work we introduce Parallel-META 3, a comprehensive and fully automatic computational toolkit for rapid data mining among metagenomic datasets, with advanced features including 16S rRNA extraction for shotgun sequences, 16S rRNA copy number calibration, 16S rRNA based functional prediction, diversity statistics, bio-marker selection, interaction network construction, vector-graph-based visualization and parallel computing. Application of Parallel-META 3 on 5,337 samples with 1,117,555,208 sequences from diverse studies and platforms showed it could produce similar results as QIIME and PICRUSt with much faster speed and lower memory usage, which demonstrates its ability to unravel the taxonomical and functional dynamics patterns across large datasets and elucidate ecological links between microbiome and the environment. Parallel-META 3 is implemented in C/C++ and R, and integrated into an executive package for rapid installation and easy access under Linux and Mac OS X. Both binary and source code packages are available at http://bioinfo.single-cell.cn/parallel-meta.html.",2017-01-12 +26748546,Kinetic and structural characterization of amyloid-β peptide hydrolysis by human angiotensin-1-converting enzyme.,"

Unlabelled

Angiotensin-1-converting enzyme (ACE), a zinc metallopeptidase, consists of two homologous catalytic domains (N and C) with different substrate specificities. Here we report kinetic parameters of five different forms of human ACE with various amyloid beta (Aβ) substrates together with high resolution crystal structures of the N-domain in complex with Aβ fragments. For the physiological Aβ(1-16) peptide, a novel ACE cleavage site was found at His14-Gln15. Furthermore, Aβ(1-16) was preferentially cleaved by the individual N-domain; however, the presence of an inactive C-domain in full-length somatic ACE (sACE) greatly reduced enzyme activity and affected apparent selectivity. Two fluorogenic substrates, Aβ(4-10)Q and Aβ(4-10)Y, underwent endoproteolytic cleavage at the Asp7-Ser8 bond with all ACE constructs showing greater catalytic efficiency for Aβ(4-10)Y. Surprisingly, in contrast to Aβ(1-16) and Aβ(4-10)Q, sACE showed positive domain cooperativity and the double C-domain (CC-sACE) construct no cooperativity towards Aβ(4-10)Y. The structures of the Aβ peptide-ACE complexes revealed a common mode of peptide binding for both domains which principally targets the C-terminal P2' position to the S2' pocket and recognizes the main chain of the P1' peptide. It is likely that N-domain selectivity for the amyloid peptide is conferred through the N-domain specific S2' residue Thr358. Additionally, the N-domain can accommodate larger substrates through movement of the N-terminal helices, as suggested by the disorder of the hinge region in the crystal structures. Our findings are important for the design of domain selective inhibitors as the differences in domain selectivity are more pronounced with the truncated domains compared to the more physiological full-length forms.

Database

The atomic coordinates and structure factors for N-domain ACE with Aβ peptides 4-10 (5AM8), 10-16 (5AM9), 1-16 (5AMA), 35-42 (5AMB) and (4-10)Y (5AMC) complexes have been deposited in the Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers University, New Brunswick, NJ, USA (http://www.rcsb.org/).",2016-02-09 +22916227,CLEARPOND: cross-linguistic easy-access resource for phonological and orthographic neighborhood densities.,"Past research has demonstrated cross-linguistic, cross-modal, and task-dependent differences in neighborhood density effects, indicating a need to control for neighborhood variables when developing and interpreting research on language processing. The goals of the present paper are two-fold: (1) to introduce CLEARPOND (Cross-Linguistic Easy-Access Resource for Phonological and Orthographic Neighborhood Densities), a centralized database of phonological and orthographic neighborhood information, both within and between languages, for five commonly-studied languages: Dutch, English, French, German, and Spanish; and (2) to show how CLEARPOND can be used to compare general properties of phonological and orthographic neighborhoods across languages. CLEARPOND allows researchers to input a word or list of words and obtain phonological and orthographic neighbors, neighborhood densities, mean neighborhood frequencies, word lengths by number of phonemes and graphemes, and spoken-word frequencies. Neighbors can be defined by substitution, deletion, and/or addition, and the database can be queried separately along each metric or summed across all three. Neighborhood values can be obtained both within and across languages, and outputs can optionally be restricted to neighbors of higher frequency. To enable researchers to more quickly and easily develop stimuli, CLEARPOND can also be searched by features, generating lists of words that meet precise criteria, such as a specific range of neighborhood sizes, lexical frequencies, and/or word lengths. CLEARPOND is freely-available to researchers and the public as a searchable, online database and for download at http://clearpond.northwestern.edu.",2012-08-20 +28934096,Children's Lead Exposure: A Multimedia Modeling Analysis to Guide Public Health Decision-Making.,"

Background

Drinking water and other sources for lead are the subject of public health concerns around the Flint, Michigan, drinking water and East Chicago, Indiana, lead in soil crises. In 2015, the U.S. Environmental Protection Agency (EPA)'s National Drinking Water Advisory Council (NDWAC) recommended establishment of a ""health-based, household action level"" for lead in drinking water based on children's exposure.

Objectives

The primary objective was to develop a coupled exposure-dose modeling approach that can be used to determine what drinking water lead concentrations keep children's blood lead levels (BLLs) below specified values, considering exposures from water, soil, dust, food, and air. Related objectives were to evaluate the coupled model estimates using real-world blood lead data, to quantify relative contributions by the various media, and to identify key model inputs.

Methods

A modeling approach using the EPA's Stochastic Human Exposure and Dose Simulation (SHEDS)-Multimedia and Integrated Exposure Uptake and Biokinetic (IEUBK) models was developed using available data. This analysis for the U.S. population of young children probabilistically simulated multimedia exposures and estimated relative contributions of media to BLLs across all population percentiles for several age groups.

Results

Modeled BLLs compared well with nationally representative BLLs (0-23% relative error). Analyses revealed relative importance of soil and dust ingestion exposure pathways and associated Pb intake rates; water ingestion was also a main pathway, especially for infants.

Conclusions

This methodology advances scientific understanding of the relationship between lead concentrations in drinking water and BLLs in children. It can guide national health-based benchmarks for lead and related community public health decisions. https://doi.org/10.1289/EHP1605.",2017-09-12 +27153612,ProbFold: a probabilistic method for integration of probing data in RNA secondary structure prediction.,"

Motivation

Recently, new RNA secondary structure probing techniques have been developed, including Next Generation Sequencing based methods capable of probing transcriptome-wide. These techniques hold great promise for improving structure prediction accuracy. However, each new data type comes with its own signal properties and biases, which may even be experiment specific. There is therefore a growing need for RNA structure prediction methods that can be automatically trained on new data types and readily extended to integrate and fully exploit multiple types of data.

Results

Here, we develop and explore a modular probabilistic approach for integrating probing data in RNA structure prediction. It can be automatically trained given a set of known structures with probing data. The approach is demonstrated on SHAPE datasets, where we evaluate and selectively model specific correlations. The approach often makes superior use of the probing data signal compared to other methods. We illustrate the use of ProbFold on multiple data types using both simulations and a small set of structures with both SHAPE, DMS and CMCT data. Technically, the approach combines stochastic context-free grammars (SCFGs) with probabilistic graphical models. This approach allows rapid adaptation and integration of new probing data types.

Availability and implementation

ProbFold is implemented in C ++. Models are specified using simple textual formats. Data reformatting is done using separate C ++ programs. Source code, statically compiled binaries for x86 Linux machines, C ++ programs, example datasets and a tutorial is available from http://moma.ki.au.dk/prj/probfold/

Contact

: jakob.skou@clin.au.dk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-21 +28652307,Both MisR (CpxR) and MisS (CpxA) Are Required for Neisseria gonorrhoeae Infection in a Murine Model of Lower Genital Tract Infection. ,"During infection, Neisseria gonorrhoeae senses and responds to stress; such responses may be modulated by MisRS (NGO0177 and NGO0176), a two-component system that is a homolog of CpxRA. In Escherichia coli, CpxRA senses and responds to envelope stress; CpxA is a sensor kinase/phosphatase for CpxR, a response regulator. When a cpxA mutant is grown in medium containing glucose, CpxR is phosphorylated by acetyl phosphate but cannot be dephosphorylated, resulting in constitutive activation. Kandler and coworkers (J. L. Kandler, C. L. Holley, J. L. Reimche, V. Dhulipala, J. T. Balthazar, A. Muszyński, R. W. Carlson, and W. M. Shafer, Antimicrob Agents Chemother 60:4690-4700, 2016, https://doi.org/10.1128/AAC.00823-16) showed that MisR (CpxR) is required for the maintenance of membrane integrity and resistance to antimicrobial peptides, suggesting a role in gonococcal survival in vivo Here, we evaluated the contributions of MisR and MisS (CpxA) to gonococcal infection in a murine model of cervicovaginal colonization and identified MisR-regulated genes using RNA sequencing (RNA-Seq). The deletion of misR or misS severely reduced the capacity of N. gonorrhoeae to colonize mice or maintain infection over a 7-day period and reduced microbial fitness after exposure to heat shock. Compared to the wild type (WT), the inactivation of misR identified 157 differentially regulated genes, most of which encoded putative envelope proteins. The inactivation of misS identified 17 differentially regulated genes compared to the WT and 139 differentially regulated genes compared to the misR mutant, 111 of which overlapped those differentially expressed in the comparison of the WT versus the misR mutant. These data indicate that an intact MisRS system is required for gonococcal infection of mice. Provided the MisR is constitutively phosphorylated in the misS mutant, the data suggest that controlled but not constitutive activation is required for gonococcal infection in mice.",2017-08-18 +25177839,Computed tomography angiography or magnetic resonance angiography for detection of intracranial vascular malformations in patients with intracerebral haemorrhage.,"

Background

Intracranial vascular malformations (brain or pial/dural arteriovenous malformations/fistulae, and aneurysms) are the leading cause of intracerebral haemorrhage (ICH) in young adults. Early identification of the intracranial vascular malformation may improve outcome if treatment can prevent ICH recurrence. Catheter intra-arterial digital subtraction angiography (IADSA) is considered the reference standard for the detection an intracranial vascular malformation as the cause of ICH. Computed tomography angiography (CTA) and magnetic resonance angiography (MRA) are less invasive than IADSA and may be as accurate for identifying some causes of ICH.

Objectives

To evaluate the diagnostic test accuracy of CTA and MRA versus IADSA for the detection of intracranial vascular malformations as a cause of ICH.

Search methods

We searched MEDLINE (1948 to August 2013), EMBASE (1980 to August 2013), MEDION (August 2013), the Database of Abstracts of Reviews of Effects (DARE; August 2013), the Health Technology Assessment Database (HTA; August 2013), ClinicalTrials.gov (August 2013), and WHO ICTRP (International Clinical Trials Register Portfolio; August 2013). We also performed a cited reference search for forward tracking of relevant articles on Google Scholar (http://scholar.google.com/), screened bibliographies, and contacted authors to identify additional studies.

Selection criteria

We selected studies reporting data that could be used to construct contingency tables that compared CTA or MRA, or both, with IADSA in the same patients for the detection of intracranial vascular malformations following ICH.

Data collection and analysis

Two authors (CBJ and RA-SS) independently extracted data on study characteristics and measures of test accuracy. Two authors (CBJ and PMW) independently extracted data on test characteristics. We obtained data restricted to the subgroup undergoing IADSA in studies using multiple reference standards. We combined data using the bivariate model. We generated forest plots of the sensitivity and specificity of CTA and MRA and created a summary receiver operating characteristic plot.

Main results

Eleven studies (n = 927 participants) met our inclusion criteria. Eight studies compared CTA with IADSA (n = 526) and three studies compared MRA with IADSA (n = 401). Methodological quality varied considerably among studies, with partial verification bias in 7/11 (64%) and retrospective designs in 5/10 (50%). In studies of CTA, the pooled estimate of sensitivity was 0.95 (95% confidence interval (CI) 0.90 to 0.97) and specificity was 0.99 (95% CI 0.95 to 1.00). The results remained robust in a sensitivity analysis in which only studies evaluating adult patients (≥ 16 years of age) were included. In studies of MRA, the pooled estimate of sensitivity was 0.98 (95% CI 0.80 to 1.00) and specificity was 0.99 (95% CI 0.97 to 1.00). An indirect comparison of CTA and MRA using a bivariate model incorporating test type as one of the parameters failed to reveal a statistically significant difference in sensitivity or specificity between the two imaging modalities (P value = 0.6).

Authors' conclusions

CTA and MRA appear to have good sensitivity and specificity following ICH for the detection of intracranial vascular malformations, although several of the included studies had methodological shortcomings (retrospective designs and partial verification bias in particular) that may have increased apparent test accuracy.",2014-09-01 +29069280,PROSPERous: high-throughput prediction of substrate cleavage sites for 90 proteases with improved accuracy.,"

Summary

Proteases are enzymes that specifically cleave the peptide backbone of their target proteins. As an important type of irreversible post-translational modification, protein cleavage underlies many key physiological processes. When dysregulated, proteases' actions are associated with numerous diseases. Many proteases are highly specific, cleaving only those target substrates that present certain particular amino acid sequence patterns. Therefore, tools that successfully identify potential target substrates for proteases may also identify previously unknown, physiologically relevant cleavage sites, thus providing insights into biological processes and guiding hypothesis-driven experiments aimed at verifying protease-substrate interaction. In this work, we present PROSPERous, a tool for rapid in silico prediction of protease-specific cleavage sites in substrate sequences. Our tool is based on logistic regression models and uses different scoring functions and their pairwise combinations to subsequently predict potential cleavage sites. PROSPERous represents a state-of-the-art tool that enables fast, accurate and high-throughput prediction of substrate cleavage sites for 90 proteases.

Availability and implementation

http://prosperous.erc.monash.edu/.

Contact

jiangning.song@monash.edu or geoff.webb@monash.edu or r.pike@latrobe.edu.au.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-02-01 +25352553,"STRING v10: protein-protein interaction networks, integrated over the tree of life.","The many functional partnerships and interactions that occur between proteins are at the core of cellular processing and their systematic characterization helps to provide context in molecular systems biology. However, known and predicted interactions are scattered over multiple resources, and the available data exhibit notable differences in terms of quality and completeness. The STRING database (http://string-db.org) aims to provide a critical assessment and integration of protein-protein interactions, including direct (physical) as well as indirect (functional) associations. The new version 10.0 of STRING covers more than 2000 organisms, which has necessitated novel, scalable algorithms for transferring interaction information between organisms. For this purpose, we have introduced hierarchical and self-consistent orthology annotations for all interacting proteins, grouping the proteins into families at various levels of phylogenetic resolution. Further improvements in version 10.0 include a completely redesigned prediction pipeline for inferring protein-protein associations from co-expression data, an API interface for the R computing environment and improved statistical analysis for enrichment tests in user-provided networks.",2014-10-28 +30838924,Are Seizure Detection Devices Ready for Prime Time?,"Standards for testing and clinical validation of seizure detection Beniczky S, Ryvlin P. Epilepsia. 2018;59(S1):9-13. https://doi.org/10.1111/epi.14049 To increase the quality of studies on seizure detection devices, we propose standards for testing and clinical validation of such devices. We identified 4 key features that are important for studies on seizure detection devices: subjects, recordings, data analysis and alarms, and reference standard. For each of these features, we list the specific aspects that need to be addressed in the studies, and depending on these, studies are classified into 5 phases (0-4). We propose a set of outcome measures that need to be reported, and we propose standards for reporting the results. These standards will help in designing and reporting studies on seizure detection devices, they will give readers clear information on the level of evidence provided by the studies, and they will help regulatory bodies in assessing the quality of the validation studies. These standards are flexible, allowing classification of the studies into one of the 5 phases. We propose actions that can facilitate development of novel methods and devices. User-based evaluation of applicability and usability of a wearable accelerometer device for detecting bilateral tonic-clonic seizures: a field study Meritam P, Ryvlin P, Beniczky S. Epilepsia. 2018;59(S1):48-52. https://doi.org/10.1111/epi.14051 Clinical validation studies of seizure detection devices conducted in epilepsy monitoring units (EMUs) can be biased by the artificial environment. We report a field (phase 4) study of a wearable accelerometer device (Epi-Care) that has previously been validated in EMUs for detecting bilateral tonic-clonic seizures (BTCS). Seventy-one patients using the device (or their caregivers) completed the modified Post-Study System Usability Questionnaire. Median time patients had been using the device was 15 months (range = 24 days to 6 years). In 10% of cases, patients stopped using the device due to reasons related to the device. The median sensitivity (90%) and false alarm rate (0.1/day) were similar to what had been determined in EMUs. Patients and caregivers were overall satisfied with the device (median = 5.5 on the 7-point Likert scale), considered the technical aspects satisfactory, and considered the device comfortable and efficient. Adverse effects occurred in 11% but were only mild: skin irritation at the wrist and interference with home electronic appliances. In 55%, the device influenced the number of seizures logged into the seizure diary, and in 40%, it contributed to fewer seizure-related injuries. This field study demonstrates the applicability and usability of the wearable accelerometer device for detecting BTCS. Wearable devices for sudden unexpected death in epilepsy prevention Ryvlin P, Ciumas C, Wisniewski I, Beniczky S. Epilepsia. 2018;59(S1):61-66. https://doi.org/10.1111/epi.14054 Sudden unexpected death in epilepsy (SUDEP) is most often associated with the occurrence of generalized tonic-clonic seizures (GTCS), a seizure type that can now be detected with high sensitivity and specificity by wearable or bed devices. The recent development in such devices and their performance offer multiple opportunities to tackle SUDEP and its prevention. Reliable GTCS detection might help physicians optimize antiepileptic treatment, which could in turn reduce the risk of SUDEP. Generalized tonic-clonic seizures-triggered alarms can lead to immediate intervention by caregivers that are also likely to decrease the odd of SUDEP. The biosignals used to detect GTCS might provide novel SUDEP biomarkers, in particular, by informing on several important characteristics of the ictal and postictal periods (type of GTCS, duration of tonic phase, rotation in the prone position, presence and duration of postictal immobility and bradycardia, rise in electrodermal activity). Other biosensors not yet used for detecting GTCS might provide complementary information, such as the presence and intensity of ictal/postictal hypoxemia. The above biomarkers, if strongly predictive, could help identify patients at very high risk of SUDEP, enabling better assessment of individual risk, as well as selection of appropriate patients for clinical studies aiming at preventing SUDEP. The same biosignals could also be used as ancillary biomarkers to test the impact of various interventions before moving to highly challenging randomized controlled trials with SUDEP as a primary outcome.",2019-01-01 +28192776,"CORAL and Nano-QFAR: Quantitative feature - Activity relationships (QFAR) for bioavailability of nanoparticles (ZnO, CuO, Co3O4, and TiO2).","Quantitative feature - activity relationships (QFAR) approach was applied to prediction of bioavailability of metal oxide nanoparticles. ZnO, CuO, Co3O4, and TiO2 nanoxides were considered. The computational model for bioavailability of investigated species is asserted. The model was calculated using the Monte Carlo method. The CORAL free software (http://www.insilico.eu/coral) was used in this study. The developed model was tested by application of three different splits of data into the training and validation sets. So-called, quasi-SMILES are used to represent the conditions of action of metal oxide nanoparticles. A new paradigm of building up predictive models of endpoints related to nanomaterials is suggested. The paradigm is the following ""An endpoint is a mathematical function of available eclectic data (conditions)"". Recently, the paradigm has been checked up with endpoints related to metal oxide nanoparticles, fullerenes, and multi-walled carbon-nanotubes.",2017-02-23 +26472727,An Integrated Multiomics Approach to Identify Candidate Antigens for Serodiagnosis of Human Onchocerciasis.,"Improved diagnostic methods are needed to support ongoing efforts to eliminate onchocerciasis (river blindness). This study used an integrated approach to identify adult female Onchocerca volvulus antigens that can be explored for developing serodiagnostic tests. The first step was to develop a detailed multi-omics database of all O. volvulus proteins deduced from the genome, gene transcription data for different stages of the parasite including eight individual female worms (providing gene expression information for 94.8% of all protein coding genes), and the adult female worm proteome (detecting 2126 proteins). Next, female worm proteins were purified with IgG antibodies from onchocerciasis patients and identified using LC-MS with a high-resolution hybrid quadrupole-time-of-flight mass spectrometer. A total of 241 immunoreactive proteins were identified among those bound by IgG from infected individuals but not IgG from uninfected controls. These included most of the major diagnostic antigens described over the past 25 years plus many new candidates. Proteins of interest were prioritized for further study based on a lack of conservation with orthologs in the human host and other helminthes, their expression pattern across the life cycle, and their consistent expression among individual female worms. Based on these criteria, we selected 33 proteins that should be carried forward for testing as serodiagnostic antigens to supplement existing diagnostic tools. These candidates, together with the extensive pan-omics dataset generated in this study are available to the community (http://nematode.net) to facilitate basic and translational research on onchocerciasis.",2015-10-15 +27854363,Indel variant analysis of short-read sequencing data with Scalpel.,"As the second most common type of variation in the human genome, insertions and deletions (indels) have been linked to many diseases, but the discovery of indels of more than a few bases in size from short-read sequencing data remains challenging. Scalpel (http://scalpel.sourceforge.net) is an open-source software for reliable indel detection based on the microassembly technique. It has been successfully used to discover mutations in novel candidate genes for autism, and it is extensively used in other large-scale studies of human diseases. This protocol gives an overview of the algorithm and describes how to use Scalpel to perform highly accurate indel calling from whole-genome and whole-exome sequencing data. We provide detailed instructions for an exemplary family-based de novo study, but we also characterize the other two supported modes of operation: single-sample and somatic analysis. Indel normalization, visualization and annotation of the mutations are also illustrated. Using a standard server, indel discovery and characterization in the exonic regions of the example sequencing data can be completed in ∼5 h after read mapping.",2016-11-17 +25111118,Genotype-based databases for variants causing rare diseases.,"Inherited diseases are the result of DNA sequence changes. In recessive diseases, the clinical phenotype results from the combined functional effects of variants in both copies of the gene. In some diseases there is often considerable variability of clinical presentation or disease severity, which may be predicted by the genotype. Additional effects may be triggered by environmental factors, as well as genetic modifiers which could be nucleotide polymorphisms in related genes, e.g. maternal ApoE or ABCA1 genotypes which may have an influence on the phenotype of SLOS individuals. Here we report the establishment of genotype variation databases for various rare diseases which provide individual clinical phenotypes associated with genotypes and include data about possible genetic modifiers. These databases aim to be an easy public access to information on rare and private variants with clinical data, which will facilitate the interpretation of genetic variants. The created databases include ACAD8 (isobutyryl-CoA dehydrogenase deficiency (IBD)), ACADSB (short-chain acyl-CoA dehydrogenase (SCAD) deficiency), AUH (3-methylglutaconic aciduria (3-MGCA)), DHCR7 (Smith-Lemli-Opitz syndrome), HMGCS2 (3-hydroxy-3-methylglutaryl-CoA synthase 2 deficiency), HSD17B10 (17-beta-hydroxysteroid dehydrogenase X deficiency), FKBP14 (Ehlers-Danlos syndrome with progressive kyphoscoliosis, myopathy, and hearing loss; EDSKMH) and ROGDI (Kohlschütter-Tönz syndrome). These genes have been selected because of our specific research interests in these rare and metabolic diseases. The aim of the database was to include all identified individuals with variants in these specific genes. Identical genotypes are listed multiple times if they were found in several patients, phenotypic descriptions and biochemical data are included as detailed as possible in view also of validating the proposed pathogenicity of these genotypes. For DHCR7 genetic modifier data (maternal APOE and ABCA1 genotypes) is also included. Databases are available at http://databases.lovd.nl/shared/genes and will be updated based on periodic literature reviews and submitted reports.",2014-08-08 +22915736,Plant B vitamin pathways and their compartmentation: a guide for the perplexed.,"The B vitamins and the cofactors derived from them are essential for life. B vitamin synthesis in plants is consequently as crucial to plants themselves as it is to humans and animals, whose B vitamin nutrition depends largely on plants. The synthesis and salvage pathways for the seven plant B vitamins are now broadly known, but certain enzymes and many transporters have yet to be identified, and the subcellular locations of various reactions are unclear. Although very substantial, what is not known about plant B vitamin pathways is regrettably difficult to discern from the literature or from biochemical pathway databases. Nor do databases accurately represent all that is known about B vitamin pathways-above all their compartmentation-because the facts are scattered throughout the literature, and thus hard to piece together. These problems (i) deter discoveries because newcomers to B vitamins cannot see which mysteries still need solving; and (ii) impede metabolic reconstruction and modelling of B vitamin pathways because genes for reactions or transport steps are missing. This review therefore takes a fresh approach to capture current knowledge of B vitamin pathways in plants. The synthesis pathways, key salvage routes, and their subcellular compartmentation are surveyed in depth, and encoded in the SEED database (http://pubseed.theseed.org/seedviewer.cgi?page=PlantGateway) for Arabidopsis and maize. The review itself and the encoded pathways specifically identify enigmatic or missing reactions, enzymes, and transporters. The SEED-encoded B vitamin pathway collection is a publicly available, expertly curated, one-stop resource for metabolic reconstruction and modeling.",2012-08-21 +27578323,Fusing literature and full network data improves disease similarity computation.,"

Background

Identifying relatedness among diseases could help deepen understanding for the underlying pathogenic mechanisms of diseases, and facilitate drug repositioning projects. A number of methods for computing disease similarity had been developed; however, none of them were designed to utilize information of the entire protein interaction network, using instead only those interactions involving disease causing genes. Most of previously published methods required gene-disease association data, unfortunately, many diseases still have very few or no associated genes, which impeded broad adoption of those methods. In this study, we propose a new method (MedNetSim) for computing disease similarity by integrating medical literature and protein interaction network. MedNetSim consists of a network-based method (NetSim), which employs the entire protein interaction network, and a MEDLINE-based method (MedSim), which computes disease similarity by mining the biomedical literature.

Results

Among function-based methods, NetSim achieved the best performance. Its average AUC (area under the receiver operating characteristic curve) reached 95.2 %. MedSim, whose performance was even comparable to some function-based methods, acquired the highest average AUC in all semantic-based methods. Integration of MedSim and NetSim (MedNetSim) further improved the average AUC to 96.4 %. We further studied the effectiveness of different data sources. It was found that quality of protein interaction data was more important than its volume. On the contrary, higher volume of gene-disease association data was more beneficial, even with a lower reliability. Utilizing higher volume of disease-related gene data further improved the average AUC of MedNetSim and NetSim to 97.5 % and 96.7 %, respectively.

Conclusions

Integrating biomedical literature and protein interaction network can be an effective way to compute disease similarity. Lacking sufficient disease-related gene data, literature-based methods such as MedSim can be a great addition to function-based algorithms. It may be beneficial to steer more resources torward studying gene-disease associations and improving the quality of protein interaction data. Disease similarities can be computed using the proposed methods at http:// www.digintelli.com:8000/ .",2016-08-30 +24478623,Vobi One: a data processing software package for functional optical imaging.,"Optical imaging is the only technique that allows to record the activity of a neuronal population at the mesoscopic scale. A large region of the cortex (10-20 mm diameter) is directly imaged with a CCD camera while the animal performs a behavioral task, producing spatio-temporal data with an unprecedented combination of spatial and temporal resolutions (respectively, tens of micrometers and milliseconds). However, researchers who have developed and used this technique have relied on heterogeneous software and methods to analyze their data. In this paper, we introduce Vobi One, a software package entirely dedicated to the processing of functional optical imaging data. It has been designed to facilitate the processing of data and the comparison of different analysis methods. Moreover, it should help bring good analysis practices to the community because it relies on a database and a standard format for data handling and it provides tools that allow producing reproducible research. Vobi One is an extension of the BrainVISA software platform, entirely written with the Python programming language, open source and freely available for download at https://trac.int.univ-amu.fr/vobi_one.",2014-01-24 +,Near real-time disturbance detection using satellite image time series,"Near real-time monitoring of ecosystem disturbances is critical for rapidly assessing and addressing impacts on carbon dynamics, biodiversity, and socio-ecological processes. Satellite remote sensing enables cost-effective and accurate monitoring at frequent time steps over large areas. Yet, generic methods to detect disturbances within newly captured satellite images are lacking. We propose a multi-purpose time-series-based disturbance detection approach that identifies and models stable historical variation to enable change detection within newly acquired data. Satellite image time series of vegetation greenness provide a global record of terrestrial vegetation productivity over the past decades. Here, we assess and demonstrate the method by applying it to (1) simulated time series of vegetation greenness data from satellite data, (2) real-world satellite greenness image time series between February 2000 and July 2011 covering Somalia to detect drought-related vegetation disturbances. First, simulation results illustrate that disturbances are successfully detected in near real-time while being robust to seasonality and noise. Second, major drought-related disturbance corresponding with most drought-stressed regions in Somalia are detected from mid-2010 onwards. The method can analyse in-situ or satellite data time series of biophysical indicators from local to global scale since it is fast, does not depend on thresholds and does not require time series gap filling. While the data and methods used are appropriate for proof-of-concept development of global scale disturbance monitoring, specific applications (e.g., drought or deforestation monitoring) mandate integration within an operational monitoring framework (e.g., http://www.fews.net/).",2012-08-01 +27508233,"Data on optimized production and characterization of alkaline proteases from newly isolated alkaliphiles from Lonar soda lake, India.","Alkaline proteases are one of the industrially important enzymes and generally preferred from alkaliphilic sources. Here we have provided the data on optimized production and characterization of alkaline proteases from five newly isolated and identified alkaliphiles from Lonar soda lake, India. The data provided for optimization of physicochemical parameters for maximum alkaline proteases production is based on OVAT (one variable at a time) approach. Alkaline protease production (U/mL) recorded by using different agro industrial residues is included in the given data. Further readers can find more information in our previously published research article where we have already described about the methods used and comparative analysis of the data recorded regarding optimized production, characterization and application of alkaline proteases isolated from Lonar soda lake isolates (http://dx.doi.org/10.1016/j.bcab.2016.06.002) [1]. The data provided here by us is useful to other researchers for setting up various suitable statistical models to perform optimization studies other than OVAT approach.",2016-07-05 +28742084,Systematic tissue-specific functional annotation of the human genome highlights immune-related DNA elements for late-onset Alzheimer's disease.,"Continuing efforts from large international consortia have made genome-wide epigenomic and transcriptomic annotation data publicly available for a variety of cell and tissue types. However, synthesis of these datasets into effective summary metrics to characterize the functional non-coding genome remains a challenge. Here, we present GenoSkyline-Plus, an extension of our previous work through integration of an expanded set of epigenomic and transcriptomic annotations to produce high-resolution, single tissue annotations. After validating our annotations with a catalog of tissue-specific non-coding elements previously identified in the literature, we apply our method using data from 127 different cell and tissue types to present an atlas of heritability enrichment across 45 different GWAS traits. We show that broader organ system categories (e.g. immune system) increase statistical power in identifying biologically relevant tissue types for complex diseases while annotations of individual cell types (e.g. monocytes or B-cells) provide deeper insights into disease etiology. Additionally, we use our GenoSkyline-Plus annotations in an in-depth case study of late-onset Alzheimer's disease (LOAD). Our analyses suggest a strong connection between LOAD heritability and genetic variants contained in regions of the genome functional in monocytes. Furthermore, we show that LOAD shares a similar localization of SNPs to monocyte-functional regions with Parkinson's disease. Overall, we demonstrate that integrated genome annotations at the single tissue level provide a valuable tool for understanding the etiology of complex human diseases. Our GenoSkyline-Plus annotations are freely available at http://genocanyon.med.yale.edu/GenoSkyline.",2017-07-24 +26484264,Aristolochic acids - Induced transcriptomic responses in rat renal proximal tubule cells in vitro.,"Aristolochic acids (AAs) are the active components of herbal drugs derived from Aristolochia species that have been used for medicinal purposes since antiquity. However, AAs have recently been discovered to be highly nephrotoxic and induced urothelial cancer in humans and malignant tumors in the kidney and urinary tract of rodents. In this study, we exposed rat renal proximal tubule cells in vitro to a sub-cytotoxic level of AAs at three different time points (6 h, 24 h and 72 h). We then analyzed the gene expression profile after the compound exposure. Functional analysis with Ingenuity Pathways Analysis and DAVID tools revealed that at the late time point (72 h) there are many significantly altered genes involved in cancer-related pathways such as p53 signaling. MIAMI-compliant microarray data are deposited in the NCBI GEO database under accession number GSE68687 and can be found at: http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE68687.",2015-06-01 +25858285,Using Ontology Fingerprints to disambiguate gene name entities in the biomedical literature.,"Ambiguous gene names in the biomedical literature are a barrier to accurate information extraction. To overcome this hurdle, we generated Ontology Fingerprints for selected genes that are relevant for personalized cancer therapy. These Ontology Fingerprints were used to evaluate the association between genes and biomedical literature to disambiguate gene names. We obtained 93.6% precision for the test gene set and 80.4% for the area under a receiver-operating characteristics curve for gene and article association. The core algorithm was implemented using a graphics processing unit-based MapReduce framework to handle big data and to improve performance. We conclude that Ontology Fingerprints can help disambiguate gene names mentioned in text and analyse the association between genes and articles. Database URL: http://www.ontologyfingerprint.org",2015-04-08 +28795103,RNA-seq data of Oryza sativa cultivar Kuku Belang under PEG treatment.,"Drought stress is the main abiotic factor affecting rice production. Rain-fed upland rice which is grown on unbounded fields and totally dependent on rainfall for moisture is more prone to drought stress compared to rice from other ecosystems. However, upland rice has adapted to this limited water condition, thus are more drought tolerant than rice from other ecosystems. We performed the first transcriptome sequencing of drought tolerant indica upland rice cultivar Kuku Belang to identify differentially expressed genes related to drought tolerance mechanism. Raw reads for non-treated and PEG-treated Oryza sativa subspecies indica cv. Kuku Belang were deposited in the NCBI SRA database with accession number SRP074520 (https://www.ncbi.nlm.nih.gov/sra?term=SRP074520).",2017-07-20 +28743678,The Saliva Exposome for Monitoring of Individuals' Health Trajectories.,"

Background

There is increasing evidence that environmental, rather than genetic, factors are the major causes of most chronic diseases. By measuring entire classes of chemicals in archived biospecimens, exposome-wide association studies (EWAS) are being conducted to investigate associations between a myriad of exposures received during life and chronic diseases.

Objectives

Because the intraindividual variability in biomarker levels, arising from changes in environmental exposures from conception onwards, leads to attenuation of exposure-disease associations, we posit that saliva can be collected repeatedly in longitudinal studies to reduce exposure-measurement errors in EWAS.

Methods

From the literature and an open-source saliva-metabolome database, we obtained concentrations of 1,233 chemicals that had been detected in saliva. We connected salivary metabolites with human metabolic pathways and PubMed Medical Subject Heading (MeSH) terms, and performed pathway enrichment and pathway topology analyses.

Results

One hundred ninety-six salivary metabolites were mapped into 49 metabolic pathways and connected with human metabolic diseases, central nervous system diseases, and neoplasms. We found that the saliva exposome represents at least 14 metabolic pathways, including amino acid metabolism, TCA cycle, gluconeogenesis, glutathione metabolism, pantothenate and CoA biosynthesis, and butanoate metabolism.

Conclusions

Saliva contains molecular information worthy of interrogation via EWAS. The simplicity of specimen collection suggests that saliva offers a practical alternative to blood for measurements that can be used to characterize individual exposomes. https://doi.org/10.1289/EHP1011.",2017-07-20 +29847083,"Prenatal Mancozeb Exposure, Excess Manganese, and Neurodevelopment at 1 Year of Age in the Infants' Environmental Health (ISA) Study.","BACKGROUND:Although growing evidence suggests that early-life excess manganese (Mn) impairs neurodevelopment, data on the neurodevelopmental effects of mancozeb, a fungicide containing Mn, and its main metabolite ethylenethiourea (ETU) are limited. OBJECTIVE:We examined whether prenatal mancozeb exposure and excess Mn were associated with neurodevelopment in 355 1-y-old infants living near banana plantations with frequent aerial mancozeb spraying in Costa Rica. METHODS:We measured urinary ETU, hair Mn, and blood Mn concentrations in samples collected 1-3 times during pregnancy from mothers enrolled in the Infants' Environmental Health (ISA) study. We then assessed neurodevelopment in their 1-y-old infants using the Bayley Scales of Infant and Toddler Development, 3rd edition (BSID-III). We estimated exposure-outcome associations using linear regression models adjusted for maternal education, parity, gestational age at birth, child age, Home Observation for Measurement of the Environment score, and location of neurodevelopmental assessment. RESULTS:Median (P25-P75) urinary ETU, hair Mn, and blood Mn measured during pregnancy were 3.3 μg/L (2.4-4.9; specific gravity-corrected), 1.7 μg/g (0.9-4.1), and 24.0 μg/L (20.3-28.0), respectively. Among girls, higher ETU was associated with lower social-emotional scores [β per 10-fold increase=-7.4 points (95% CI: -15.2, 0.4)], whereas higher hair Mn was associated with lower cognitive scores [-3.0 (-6.1, 0.1)]. Among boys, higher hair Mn was associated with lower social-emotional scores [-4.6 (-8.5, -0.8)]. We observed null associations for blood Mn, language, and motor outcomes. CONCLUSIONS:Our findings indicate that maternal exposure to mancozeb and excess Mn during pregnancy may have adverse and sex-specific effects on infant neurodevelopment. https://doi.org/10.1289/EHP1955.",2018-05-29 +25300483,sc-PDB: a 3D-database of ligandable binding sites--10 years on.,"The sc-PDB database (available at http://bioinfo-pharma.u-strasbg.fr/scPDB/) is a comprehensive and up-to-date selection of ligandable binding sites of the Protein Data Bank. Sites are defined from complexes between a protein and a pharmacological ligand. The database provides the all-atom description of the protein, its ligand, their binding site and their binding mode. Currently, the sc-PDB archive registers 9283 binding sites from 3678 unique proteins and 5608 unique ligands. The sc-PDB database was publicly launched in 2004 with the aim of providing structure files suitable for computational approaches to drug design, such as docking. During the last 10 years we have improved and standardized the processes for (i) identifying binding sites, (ii) correcting structures, (iii) annotating protein function and ligand properties and (iv) characterizing their binding mode. This paper presents the latest enhancements in the database, specifically pertaining to the representation of molecular interaction and to the similarity between ligand/protein binding patterns. The new website puts emphasis in pictorial analysis of data.",2014-10-09 +29599715,DynaSim: A MATLAB Toolbox for Neural Modeling and Simulation.,"DynaSim is an open-source MATLAB/GNU Octave toolbox for rapid prototyping of neural models and batch simulation management. It is designed to speed up and simplify the process of generating, sharing, and exploring network models of neurons with one or more compartments. Models can be specified by equations directly (similar to XPP or the Brian simulator) or by lists of predefined or custom model components. The higher-level specification supports arbitrarily complex population models and networks of interconnected populations. DynaSim also includes a large set of features that simplify exploring model dynamics over parameter spaces, running simulations in parallel using both multicore processors and high-performance computer clusters, and analyzing and plotting large numbers of simulated data sets in parallel. It also includes a graphical user interface (DynaSim GUI) that supports full functionality without requiring user programming. The software has been implemented in MATLAB to enable advanced neural modeling using MATLAB, given its popularity and a growing interest in modeling neural systems. The design of DynaSim incorporates a novel schema for model specification to facilitate future interoperability with other specifications (e.g., NeuroML, SBML), simulators (e.g., NEURON, Brian, NEST), and web-based applications (e.g., Geppetto) outside MATLAB. DynaSim is freely available at http://dynasimtoolbox.org. This tool promises to reduce barriers for investigating dynamics in large neural models, facilitate collaborative modeling, and complement other tools being developed in the neuroinformatics community.",2018-03-15 +28086253,[Vision Loss after Silicone Oil Surgery].,"Silicone oil is an intraocular tamponade that is essential for the treatment of complicated retinal detachment. As a long-term tamponade, it improves retinal reattachment and visual outcome. Unexpectedly, surgery with silicone oil tamponade may result in irreversible visual loss of unknown origin. In this report, we provide a general overview of unexplained visual loss after surgery with silicone oil. The frequency of such reports has increased continuously in recent years. The German Retina Society - supported by Retinanet (http://retina-net.uni-koeln.de) - has initiated data collection to gather information about such cases, in cooperation with Cologne University Eye Hospital. Ophthalmologists can provide data about cases of unexplained visual loss anonymously via the ""Cologne Clinical Trials Centre"" or via augenklinik-silikonoel@uk-koeln.de.",2017-01-13 +25355519,COSMIC: exploring the world's knowledge of somatic mutations in human cancer.,"COSMIC, the Catalogue Of Somatic Mutations In Cancer (http://cancer.sanger.ac.uk) is the world's largest and most comprehensive resource for exploring the impact of somatic mutations in human cancer. Our latest release (v70; Aug 2014) describes 2 002 811 coding point mutations in over one million tumor samples and across most human genes. To emphasize depth of knowledge on known cancer genes, mutation information is curated manually from the scientific literature, allowing very precise definitions of disease types and patient details. Combination of almost 20,000 published studies gives substantial resolution of how mutations and phenotypes relate in human cancer, providing insights into the stratification of mutations and biomarkers across cancer patient populations. Conversely, our curation of cancer genomes (over 12,000) emphasizes knowledge breadth, driving discovery of unrecognized cancer-driving hotspots and molecular targets. Our high-resolution curation approach is globally unique, giving substantial insight into molecular biomarkers in human oncology. In addition, COSMIC also details more than six million noncoding mutations, 10,534 gene fusions, 61,299 genome rearrangements, 695,504 abnormal copy number segments and 60,119,787 abnormal expression variants. All these types of somatic mutation are annotated to both the human genome and each affected coding gene, then correlated across disease and mutation types.",2014-10-29 +30466985,Withania somnifera (Indian ginseng) in male infertility: An evidence-based systematic review and meta-analysis.,"

Background

Withania somnifera Dunal, commonly known as Indian ginseng, has been in use since ancient times as anti-stress agent, aphrodisiac, for impotence and infertility treatment.

Purpose

To evaluate the efficacy and safety of W. somnifera treatment in infertile men.

Study design

An evidence-based systematic review and meta-analysis using Preferred Reporting Items for Systematic reviews and Meta-Analyses (PRISMA) guidelines.

Methods

Published literature was searched in PubMed/MEDLINE, EMBASE, Scopus, the Cochrane Library, and DHARA. Grey literature was assessed from the WHO International Clinical Trials Registry Platform (http://apps.who.int/trialsearch/) and the US National Institutes of Health (https://clinicaltrials.gov/).

Results

Four clinical trials (comprising 5 publications: observational, n = 4; randomized controlled trial [RCT], n = 1) were included in the study. As only one RCT included, meta-analysis of RCT was not performed; however, systematically reviewed data demonstrated statistical (p ≤ .002 versus baseline) increase in sperm concentration (167%), semen volume (59%), and sperm motility (57%) in oligospermic males after 90 days of W. somnifera treatment, as well, serum testosterone (17%) and luteinizing hormone (34%) levels. Meta-analysis of observational (versus pre-treatment) studies showed that W. somnifera treatment significantly improved semen parameters (semen volume: mean difference [MD], 0.28  ml; 95% confidence interval [CI], 0.12 to 0.43; p = .0004; sperm concentration: MD, 13.57 million/ml; 95% CI, 11.12 to 16.01; p < .00001; sperm motility: MD, 8.50%; 95% CI, 7.36 to 9.63; p < .00001) with 14% of pregnancy outcome success rate in normozoospermic men. Meta-analysis findings also evidenced significant improvement in serum hormonal profile, oxidative biomarkers and antioxidant vitamins in seminal plasma. No adverse effects were reported in infertile men taking W. somnifera treatment.

Conclusion

Due to a small number of eligible studies, the available data, though promising, are too limited to provide novel and sufficiently robust evidence of the benefits of W. somnifera in male infertility. Additional RCTs of high quality with a larger sample size are warranted to further strength clinical use of W. somnifera in treating male factor infertility. Future research also needs to elucidate the molecular mechanism(s) of W. somnifera as well its active principles in male infertility.",2017-11-29 +28724555,"Susceptibility Testing for the Polymyxins: Two Steps Back, Three Steps Forward?","Optimizing and standardizing susceptibility testing for the polymyxins have become pressing issues, given the rise in multidrug-resistant Gram-negative bacilli. Recently, both the CLSI and EUCAST have recommended broth microdilution (BMD) (without polysorbate) as the reference method for polymyxin susceptibility testing. In this issue, K. L. Chew et al. (J Clin Microbiol 55:2609-2616, 2017, https://doi.org/10.1128/JCM.00268-17) compare the performances of three commercial BMD panels and the Etest to the reference, BMD, for polymyxin B and colistin, using 76 Enterobacteriaceae isolates (21 of which were mcr-1 positive). Although none of the commercial BMD panels strictly met FDA performance standards in this evaluation, possibly because of the small number isolates tested, the Sensititre panel achieved >90% categorical agreement for both polymyxin compounds. These results also reaffirm CLSI and EUCAST guidance that gradient diffusion testing, which had unacceptable error rates, should be abandoned. In a simulated analysis with lowered breakpoints (susceptible, ≤1 mg/liter; intermediate, 2 mg/liter; resistant, ≥4 mg/liter), error rates and agreement were improved across the various methods and the rate of detection of mcr-1-positive isolates improved. These observations, taken together with recent pharmacokinetic data on optimizing target attainment for the polymyxins, suggest that more-stringent (lower) breakpoints may be reasonable, although such an approach may be limited by the inherent reliability of current testing methodologies and a lack of robust clinical correlative data, which are sorely needed.",2017-07-19 +29151019,bcROCsurface: an R package for correcting verification bias in estimation of the ROC surface and its volume for continuous diagnostic tests.,"

Background

Receiver operating characteristic (ROC) surface analysis is usually employed to assess the accuracy of a medical diagnostic test when there are three ordered disease status (e.g. non-diseased, intermediate, diseased). In practice, verification bias can occur due to missingness of the true disease status and can lead to a distorted conclusion on diagnostic accuracy. In such situations, bias-corrected inference tools are required.

Results

This paper introduce an R package, named bcROCsurface, which provides utility functions for verification bias-corrected ROC surface analysis. The shiny web application of the correction for verification bias in estimation of the ROC surface analysis is also developed.

Conclusion

bcROCsurface may become an important tool for the statistical evaluation of three-class diagnostic markers in presence of verification bias. The R package, readme and example data are available on CRAN. The web interface enables users less familiar with R to evaluate the accuracy of diagnostic tests, and can be found at http://khanhtoduc.shinyapps.io/bcROCsurface_shiny/ .",2017-11-18 +28717899,The Effects of Public Disclosure of Industry Payments to Physicians on Patient Trust: A Randomized Experiment.,"

Background

Financial ties between physicians and the pharmaceutical and medical device industry are common, but little is known about how patient trust is affected by these ties.

Objective

The purpose of this study was to evaluate how viewing online public disclosure of industry payments affects patients' trust ratings for physicians, the medical profession, and the pharmaceutical and medical device industry.

Design

This was a randomized experimental evaluation.

Participants

There were 278 English-speaking participants over age 18 who had seen a healthcare provider in the previous 12 months who took part in the study.

Interventions

Participants searched for physicians on an online disclosure database, viewed payments from industry to the physicians, and assigned trust ratings. Participants were randomized to view physicians who received no payment ($0), low payment ($250-300), or high payment (>$13,000) from industry, or to a control arm in which they did not view the disclosure website. They also were asked to search for and then rate trust in their own physician.

Main measures

Primary outcomes were trust in individual physician, medical profession, and industry. These scales measure trust as a composite of honesty, fidelity, competence, and global trust.

Key results

Compared to physicians who received no payments, physicians who received payments over $13,000 received lower ratings for honesty [mean (SD): 3.36 (0.86) vs. 2.75 (0.95), p < 0.001] and fidelity [3.19 (0.65) vs. 2.89 (0.68), p = 0.01]. Among the 7.9% of participants who found their own physician on the website, ratings for honesty and fidelity decreased as the industry payment to the physician increased (honesty: Spearman's ρ = -0.52, p = 0.02; fidelity: Spearman's ρ = -0.55, p = 0.01). Viewing the disclosure website did not affect trust ratings for the medical profession or industry.

Conclusions

Disclosure of industry payments to physicians affected perceptions of individual physician honesty and fidelity, but not perceptions of competence. Disclosure did not affect trust ratings for the medical profession or the pharmaceutical and medical device industry. ClinicalTrials.gov identifier: NCT02179632 ( https://clinicaltrials.gov/ct2/show/NCT02179632 ).",2017-07-17 +28171628,Sequence-structure relations of biopolymers.,"

Motivation

DNA data is transcribed into single-stranded RNA, which folds into specific molecular structures. In this paper we pose the question to what extent sequence- and structure-information correlate. We view this correlation as structural semantics of sequence data that allows for a different interpretation than conventional sequence alignment. Structural semantics could enable us to identify more general embedded ‘patterns’ in DNA and RNA sequences.

Results

We compute the partition function of sequences with respect to a fixed structure and connect this computation to the mutual information of a sequence–structure pair for RNA secondary structures. We present a Boltzmann sampler and obtain the a priori probability of specific sequence patterns. We present a detailed analysis for the three PDB-structures, 2JXV (hairpin), 2N3R (3-branch multi-loop) and 1EHZ (tRNA). We localize specific sequence patterns, contrast the energy spectrum of the Boltzmann sampled sequences versus those sequences that refold into the same structure and derive a criterion to identify native structures. We illustrate that there are multiple sequences in the partition function of a fixed structure, each having nearly the same mutual information, that are nevertheless poorly aligned. This indicates the possibility of the existence of relevant patterns embedded in the sequences that are not discoverable using alignments.

Availability and implementation

The source code is freely available at http://staff.vbi.vt.edu/fenixh/Sampler.zip

Contact

duckcr@vbi.vt.edu

Supplimentary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +25953079,GBIS: the information system of the German Genebank.,"The German Federal ex situ Genebank of Agricultural and Horticultural Crop Species is the largest collection of its kind in the countries of the European Union and amongst the 10 largest collections worldwide. Beside its enormous scientific value as a safeguard of plant biodiversity, the plant genetic resources maintained are also of high importance for breeders to provide new impulses. The complex processes of managing such a collection are supported by the Genebank Information System (GBIS). GBIS is an important source of information for researchers and plant breeders, e.g. for identifying appropriate germplasm for breeding purposes. In addition, the access to genebank material as a sovereign task is also of high interest to the general public. Moreover, GBIS acts as a data source for global information systems, such as the Global Biodiversity Information Facility (GBIF) or the European Search Catalogue for Plant Genetic Resources (EURISCO). Database URL: http://gbis.ipk-gatersleben.de/",2015-05-07 +28616455,Ultra high-field (7 T) multi-resolution fMRI data for orientation decoding in visual cortex.,"Multivariate pattern classification methods have been successfully applied to decode orientation of visual grating stimuli from BOLD fMRI activity recorded in human visual cortex (Kamitani and Tong, 2005; Haynes and Rees, 2005) [12], [10]. Though there has been extensive research investigating the true spatial scale of the orientation specific signals (Op de Beeck, 2010; Swisher et al., 2010; Alink et al., 2013; Freeman et al., 2011, 2013) [2], [15], [1], [4], [5], it remained inconclusive what spatial acquisition resolution is required, or is optimal, for decoding analyses. The research article entitled ""The effect of acquisition resolution on orientation decoding from V1 BOLD fMRI at 7 T"" Sengupta et al. (2017) [14] studied the effect of spatial acquisition resolution and also analyzed the strength and spatial scale of orientation discriminating signals. In this article, for the first time, we present empirical ultra high-field fMRI data, obtained as a part of the aforementioned study, which were recorded at four spatial resolutions (0.8 mm, 1.4 mm, 2 mm, and 3 mm isotropic voxel size) for orientation decoding in visual cortex. The dataset is compliant with the BIDS (Brain Imaging Data Structure) format, and freely available from the OpenfMRI portal (dataset accession number: http://openfmri.org/dataset/ds000113c ds000113c).",2017-05-24 +28328931,"X-Ray microtomography for ant taxonomy: An exploration and case study with two new Terataner (Hymenoptera, Formicidae, Myrmicinae) species from Madagascar.","We explore the potential of x-ray micro computed tomography (μCT) for the field of ant taxonomy by using it to enhance the descriptions of two remarkable new species of the ant genus Terataner: T. balrog sp. n. and T. nymeria sp. n.. We provide an illustrated worker-based species identification key for all species found on Madagascar, as well as detailed taxonomic descriptions, which include diagnoses, discussions, measurements, natural history data, high-quality montage images and distribution maps for both new species. In addition to conventional morphological examination, we have used virtual reconstructions based on volumetric μCT scanning data for the species descriptions. We also include 3D PDFs, still images of virtual reconstructions, and 3D rotation videos for both holotype workers and one paratype queen. The complete μCT datasets have been made available online (Dryad, https://datadryad.org) and represent the first cybertypes in ants (and insects). We discuss the potential of μCT scanning and critically assess the usefulness of cybertypes for ant taxonomy.",2017-03-22 +25311246,trieFinder: an efficient program for annotating Digital Gene Expression (DGE) tags.,"

Background

Quantification of a transcriptional profile is a useful way to evaluate the activity of a cell at a given point in time. Although RNA-Seq has revolutionized transcriptional profiling, the costs of RNA-Seq are still significantly higher than microarrays, and often the depth of data delivered from RNA-Seq is in excess of what is needed for simple transcript quantification. Digital Gene Expression (DGE) is a cost-effective, sequence-based approach for simple transcript quantification: by sequencing one read per molecule of RNA, this technique can be used to efficiently count transcripts while obviating the need for transcript-length normalization and reducing the total numbers of reads necessary for accurate quantification. Here, we present trieFinder, a program specifically designed to rapidly map, parse, and annotate DGE tags of various lengths against cDNA and/or genomic sequence databases.

Results

The trieFinder algorithm maps DGE tags in a two-step process. First, it scans FASTA files of RefSeq, UniGene, and genomic DNA sequences to create a database of all tags that can be derived from a predefined restriction site. Next, it compares the experimental DGE tags to this tag database, taking advantage of the fact that the tags are stored as a prefix tree, or ""trie"", which allows for linear-time searches for exact matches. DGE tags with mismatches are analyzed by recursive calls in the data structure. We find that, in terms of alignment speed, the mapping functionality of trieFinder compares favorably with Bowtie.

Conclusions

trieFinder can quickly provide the user an annotation of the DGE tags from three sources simultaneously, simplifying transcript quantification and novel transcript detection, delivering the data in a simple parsed format, obviating the need to post-process the alignment results. trieFinder is available at http://research.nhgri.nih.gov/software/trieFinder/.",2014-10-13 +22080560,MIPModDB: a central resource for the superfamily of major intrinsic proteins.,"The channel proteins belonging to the major intrinsic proteins (MIP) superfamily are diverse and are found in all forms of life. Water-transporting aquaporin and glycerol-specific aquaglyceroporin are the prototype members of the MIP superfamily. MIPs have also been shown to transport other neutral molecules and gases across the membrane. They have internal homology and possess conserved sequence motifs. By analyzing a large number of publicly available genome sequences, we have identified more than 1000 MIPs from diverse organisms. We have developed a database MIPModDB which will be a unified resource for all MIPs. For each MIP entry, this database contains information about the source, gene structure, sequence features, substitutions in the conserved NPA motifs, structural model, the residues forming the selectivity filter and channel radius profile. For selected set of MIPs, it is possible to derive structure-based sequence alignment and evolutionary relationship. Sequences and structures of selected MIPs can be downloaded from MIPModDB database which is freely available at http://bioinfo.iitk.ac.in/MIPModDB.",2011-11-12 +,Development of a multi-temporal Kalman filter approach to geostationary active fire detection & fire radiative power (FRP) estimation,"Most active fire detection algorithms applied to data from geostationary Earth Observation (EO) satellites are adjustments of those originally developed for polar-orbiting systems, and thus the high temporal imaging frequencies offered from geostationary systems are often not fully utilized within such detection approaches. Here we present a new active fire detection algorithm that fully exploits geostationary data's temporal dimension, including both for detecting actively burning fires and quantifying their fire radiative power (FRP). The approach uses a robust matching algorithm to model each pixels diurnal temperature cycle (DTC) in the middle infra-red (MIR) spectral band, the most important band for active fire detection. For each pixel, a Kalman filter (KF) is used to blend the set of basis DTCs with the actual geostationary observations during times of confirmed cloud- and fire-free measurements, allowing for estimates of the pixels true non-fire ‘background’ signal to be provided throughout the day, even when a fire maybe present. This is different to the standard ‘spatial contextual’ approach, where the non-fire background signal is estimated from nearby non-fire pixels. A series of spectral thresholds are then applied to the analyzed pixel in order to identify whether the actual observation departs sufficiently from the estimated non-fire signal to confidently suggest that the pixel contains an active fire. If it does, the difference between the fire pixel and non-fire background signal estimate in the MIR is used to estimate the fire radiative power (FRP) output. We apply this new ‘Kalman Filter Algorithm’ (KFA) to one month of African imagery acquired by the Spinning Enhanced Visible and Infrared Imager (SEVIRI), which is carried onboard the geostationary Meteosat satellite. We compare the resulting active fire detections and FRPs to those produced using the prototype (offline) version of the ‘spatial contextual’ based ‘Fire Thermal Anomalies’ (FTA) algorithm, here termed the pFTA, now used to generate the operational SEVIRI FRP-PIXEL product in the EUMETSAT Land Surface Analysis Satellite Applications Facility (http://landsaf.meteo.pt/), and also compare results to simultaneous detections from the MODIS MOD14/MYD14 active fire products. The KFA shows some advantages over the pFTA algorithm, detecting a greater number of fire pixels, up to ~80% more at the peak of the diurnal fire cycle. These additional fire pixels are primarily low FRP fires (<30MW), which results in the overall fire radiative energy (FRE) increasing by only 20% compared to the pFTA algorithm, though this is still a substantial difference. Comparison against simultaneous MODIS active fire observations confirms that the KFA detects more of the MODIS-detected active fires than does the pFTA (60% more), but at the expense of doubling the false alarm rate. Analysis of the ability of the KFA to aid in the estimation of FRP by providing a more certain estimate of the fire pixels background signal indicates a small 0.2K reduction (rmsd) between the ‘estimate’ of the background temperature and the ‘truth’. One limitation of the KFA approach currently is that it is computationally costly, and also requires the full day diurnal variation to be measured prior to the multi-temporal fire detection process. In its present form it is therefore unsuited to the generation of real-time products. We recommend future work concentrate on extracting maximum performance and utility of geostationary active fire observations by blending both ‘spatial contextual’ and ‘multi-temporal’ approaches.",2014-09-01 +29111135,Digital gene atlas of neonate common marmoset brain.,"Interest in the common marmoset (Callithrix jacchus) as a primate model animal has grown recently, in part due to the successful demonstration of transgenic marmosets. However, there is some debate as to the suitability of marmosets, compared to more widely used animal models, such as the macaque monkey and mouse. Especially, the usage of marmoset for animal models of human cognition and mental disorders, is still yet to be fully explored. To examine the prospects of the marmoset model for neuroscience research, the Marmoset Gene Atlas (https://gene-atlas.bminds.brain.riken.jp/) provides a whole brain gene expression atlas in the common marmoset. We employ in situ hybridization (ISH) to systematically analyze gene expression in neonate marmoset brains, which allows us to compare expression with other model animals such as mouse. We anticipate that these data will provide sufficient information to develop tools that enable us to reveal marmoset brain structure, function, cellular and molecular organization for primate brain research.",2017-10-27 +28597075,Improved pregnancy outcomes in women with type 1 and type 2 diabetes but substantial clinic-to-clinic variations: a prospective nationwide study.,"

Aims/hypothesis

The aim of this prospective nationwide study was to examine antenatal pregnancy care and pregnancy outcomes in women with type 1 and type 2 diabetes, and to describe changes since 2002/2003.

Methods

This national population-based cohort included 3036 pregnant women with diabetes from 155 maternity clinics in England and Wales who delivered during 2015. The main outcome measures were maternal glycaemic control, preterm delivery (before 37 weeks), infant large for gestational age (LGA), and rates of congenital anomaly, stillbirth and neonatal death.

Results

Of 3036 women, 1563 (51%) had type 1, 1386 (46%) had type 2 and 87 (3%) had other types of diabetes. The percentage of women achieving HbA1c < 6.5% (48 mmol/mol) in early pregnancy varied greatly between clinics (median [interquartile range] 14.3% [7.7-22.2] for type 1, 37.0% [27.3-46.2] for type 2). The number of infants born preterm (21.7% vs 39.7%) and LGA (23.9% vs 46.4%) were lower for women with type 2 compared with type 1 diabetes (both p < 0.001). The prevalence rates for congenital anomaly (46.2/1000 births for type 1, 34.6/1000 births for type 2) and neonatal death (8.1/1000 births for type 1, 11.4/1000 births for type 2) were unchanged since 2002/2003. Stillbirth rates are almost 2.5 times lower than in 2002/2003 (10.7 vs 25.8/1000 births for type 1, p = 0.0012; 10.5 vs 29.2/1000 births for type 2, p = 0.0091).

Conclusions/interpretation

Stillbirth rates among women with type 1 and type 2 diabetes have decreased since 2002/2003. Rates of preterm delivery and LGA infants are lower in women with type 2 compared with type 1 diabetes. In women with type 1 diabetes, suboptimal glucose control and high rates of perinatal morbidity persist with substantial variations between clinics.

Data availability

Further details of the data collection methodology, individual clinic data and the full audit reports for healthcare professionals and service users are available from http://content.digital.nhs.uk/npid .",2017-06-08 +26817607,RDDpred: a condition-specific RNA-editing prediction model from RNA-seq data.,"

Background

RNA-editing is an important post-transcriptional RNA sequence modification performed by two catalytic enzymes, ""ADAR""(A-to-I) and ""APOBEC""(C-to-U). By utilizing high-throughput sequencing technologies, the biological function of RNA-editing has been actively investigated. Currently, RNA-editing is considered to be a key regulator that controls various cellular functions, such as protein activity, alternative splicing pattern of mRNA, and substitution of miRNA targeting site. DARNED, a public RDD database, reported that there are more than 300-thousands RNA-editing sites detected in human genome(hg19). Moreover, multiple studies suggested that RNA-editing events occur in highly specific conditions. According to DARNED, 97.62 % of registered editing sites were detected in a single tissue or in a specific condition, which also supports that the RNA-editing events occur condition-specifically. Since RNA-seq can capture the whole landscape of transcriptome, RNA-seq is widely used for RDD prediction. However, significant amounts of false positives or artefacts can be generated when detecting RNA-editing from RNA-seq. Since it is difficult to perform experimental validation at the whole-transcriptome scale, there should be a powerful computational tool to distinguish true RNA-editing events from artefacts.

Result

We developed RDDpred, a Random Forest RDD classifier. RDDpred reports potentially true RNA-editing events from RNA-seq data. RDDpred was tested with two publicly available RNA-editing datasets and successfully reproduced RDDs reported in the two studies (90 %, 95 %) while rejecting false-discoveries (NPV: 75 %, 84 %).

Conclusion

RDDpred automatically compiles condition-specific training examples without experimental validations and then construct a RDD classifier. As far as we know, RDDpred is the very first machine-learning based automated pipeline for RDD prediction. We believe that RDDpred will be very useful and can contribute significantly to the study of condition-specific RNA-editing. RDDpred is available at http://biohealth.snu.ac.kr/software/RDDpred .",2016-01-11 +21981551,Establishment of the genetic/genomic competency center for education.,"

Purpose

Develop a trans-disciplinary repository of genomics education resources using a Web-based learning management system. The repository maps and organizes genetic-genomic information and materials relevant to educators by healthcare discipline-specific competencies and performance indicators.

Methods

An interdisciplinary project team was established to guide toolkit repository building and usability testing. The toolkit was built using the X-CREDIT software on the Moodle learning management platform, which includes a mapping matrix and browsing function that captures teaching resources in a searchable database linked to competencies, knowledge areas, performance indicators, learning activities and resources, and outcome assessments. Discipline-specific advisory groups assisted in resource identification, competency mapping, and peer review. The toolkit is multidisciplinary, currently including physician assistants and nurses, and provides a resource crosslink to discipline-specific competencies. All resources have a detailed description, and users may contribute new resources, which are peer reviewed for relevance and accuracy by an editorial board. Alpha and beta testing using online usability surveys that included toolkit exercises helped refine the structure, look, and navigation of the final website.

Findings

One hundred thirty faculty-124 nursing and 6 physician assistant faculty-agreed to participate. Of those, 59 users (45.4% response rate) completed the online usability survey. Nearly all users (94.9%) were able to find a competency that was relevant to their topic, and 85.4% were able to locate the relevant performance indicators. The majority (86.5%) felt the model adequately described the relationships between competencies, performance indicators, learning activities-resources, and assessments, and made conceptual sense. Survey respondents reported font color and size made the information difficult to read, windows were not large enough, and the ""shopping cart"" concept was confusing; all of these areas have been modified for the final toolkit version.

Conclusions

Alpha and beta testing of the toolkit revealed that users can successfully obtain educational materials by searching competencies and performance indicators. The platform is accessible on the Internet at http://www.g-2-c-2.org and can be continually updated as new resources become available.

Clinical relevance

Faculty members need easy access to a wide range of accurate, current resources to facilitate integration of genomics into the curriculum.",2011-08-26 +27981205,Data files for ab initio calculations of the lattice parameter and elastic stiffness coefficients of bcc Fe with solutes.,"We present computed datasets on changes in the lattice parameter and elastic stiffness coefficients of bcc Fe due to substitutional Al, B, Cu, Mn, and Si solutes, and octahedral interstitial C and N solutes. The data is calculated using the methodology based on density functional theory (DFT) presented in Ref. (M.R. Fellinger, L.G. Hector Jr., D.R. Trinkle, 2017) [1]. All the DFT calculations were performed using the Vienna Ab initio Simulations Package (VASP) (G. Kresse, J. Furthmüller, 1996) [2]. The data is stored in the NIST dSpace repository (http://hdl.handle.net/11256/671).",2016-11-29 +24962434,Gene Set Enrichment Analysis (GSEA) of Toxoplasma gondii expression datasets links cell cycle progression and the bradyzoite developmental program.,"

Background

Large amounts of microarray expression data have been generated for the Apicomplexan parasite Toxoplasma gondii in an effort to identify genes critical for virulence or developmental transitions. However, researchers' ability to analyze this data is limited by the large number of unannotated genes, including many that appear to be conserved hypothetical proteins restricted to Apicomplexa. Further, differential expression of individual genes is not always informative and often relies on investigators to draw big-picture inferences without the benefit of context. We hypothesized that customization of gene set enrichment analysis (GSEA) to T. gondii would enable us to rigorously test whether groups of genes serving a common biological function are co-regulated during the developmental transition to the latent bradyzoite form.

Results

Using publicly available T. gondii expression microarray data, we created Toxoplasma gene sets related to bradyzoite differentiation, oocyst sporulation, and the cell cycle. We supplemented these with lists of genes derived from community annotation efforts that identified contents of the parasite-specific organelles, rhoptries, micronemes, dense granules, and the apicoplast. Finally, we created gene sets based on metabolic pathways annotated in the KEGG database and Gene Ontology terms associated with gene annotations available at http://www.toxodb.org. These gene sets were used to perform GSEA analysis using two sets of published T. gondii expression data that characterized T. gondii stress response and differentiation to the latent bradyzoite form.

Conclusions

GSEA provides evidence that cell cycle regulation and bradyzoite differentiation are coupled. Δgcn5A mutants unable to induce bradyzoite-associated genes in response to alkaline stress have different patterns of cell cycle and bradyzoite gene expression from stressed wild-type parasites. Extracellular tachyzoites resemble a transitional state that differs in gene expression from both replicating intracellular tachyzoites and in vitro bradyzoites by expressing genes that are enriched in bradyzoites as well as genes that are associated with the G1 phase of the cell cycle. The gene sets we have created are readily modified to reflect ongoing research and will aid researchers' ability to use a knowledge-based approach to data analysis facilitating the development of new insights into the intricate biology of Toxoplasma gondii.",2014-06-24 +27096413,QSoas: A Versatile Software for Data Analysis.,"Undoubtedly, the most natural way to confirm a model is to quantitatively verify its predictions. However, this is not done systematically, and one of the reasons for that is the lack of appropriate tools for analyzing data, because the existing tools do not implement the required models or they lack the flexibility required to perform data analysis in a reasonable time. We present QSoas, an open-source, cross-platform data analysis program written to overcome these problems. In addition to standard data analysis procedures and full automation using scripts, QSoas features a very powerful data fitting interface with support for arbitrary functions, differential equation and kinetic system integration, and flexible global fits. QSoas is available from http://www.qsoas.org .",2016-05-03 +27084938,DeepBlue epigenomic data server: programmatic data retrieval and analysis of epigenome region sets.,"Large amounts of epigenomic data are generated under the umbrella of the International Human Epigenome Consortium, which aims to establish 1000 reference epigenomes within the next few years. These data have the potential to unravel the complexity of epigenomic regulation. However, their effective use is hindered by the lack of flexible and easy-to-use methods for data retrieval. Extracting region sets of interest is a cumbersome task that involves several manual steps: identifying the relevant experiments, downloading the corresponding data files and filtering the region sets of interest. Here we present the DeepBlue Epigenomic Data Server, which streamlines epigenomic data analysis as well as software development. DeepBlue provides a comprehensive programmatic interface for finding, selecting, filtering, summarizing and downloading region sets. It contains data from four major epigenome projects, namely ENCODE, ROADMAP, BLUEPRINT and DEEP. DeepBlue comes with a user manual, examples and a well-documented application programming interface (API). The latter is accessed via the XML-RPC protocol supported by many programming languages. To demonstrate usage of the API and to enable convenient data retrieval for non-programmers, we offer an optional web interface. DeepBlue can be openly accessed at http://deepblue.mpi-inf.mpg.de.",2016-04-15 +29967264,CTGF Mediates Tumor-Stroma Interactions between Hepatoma Cells and Hepatic Stellate Cells to Accelerate HCC Progression.,"Connective tissue growth factor (CTGF) is a matricellular protein related to hepatic fibrosis. This study aims to clarify the roles of CTGF in hepatocellular carcinoma (HCC), which usually develops from fibrotic liver. CTGF was overexpressed in 93 human HCC compared with nontumorous tissues, primarily in tumor cells. Increased CTGF expression was associated with clinicopathologic malignancy of HCC. CTGF was upregulated in hepatoma cells in hepatocyte-specific Kras-mutated mice (Alb-Cre KrasLSL-G12D/+). Hepatocyte-specific knockout of CTGF in these mice (Alb-Cre KrasLSL-G12D/+ CTGFfl/fl) decreased liver tumor number and size. Hepatic stellate cells (HSC) were present in both human and murine liver tumors, and α-SMA expression, a marker of HSC activation, positively correlated with CTGF expression. Forced expression of CTGF did not affect growth of PLC/PRF/5 cells, a hepatoma cell line with little CTGF expression, but facilitated their growth in the presence of LX-2 cells, an HSC line. The growth of HepG2 cells, which express high levels of CTGF, was promoted by coculture with LX-2 cells compared with monoculture. Growth promotion by LX-2 cells was negated by an anti-CTGF antibody in both culture and xenografts. Coculturing LX-2 cells with HepG2 cells drove LX-2-derived production of IL6, which led to STAT-3 activation and proliferation of HepG2 cells. An anti-CTGF antibody reduced IL6 production in LX-2 cells and suppressed STAT-3 activation in HepG2 cells. In conclusion, our data identify tumor cell-derived CTGF as a keystone in the HCC microenvironment, activating nearby HSC that transmit progrowth signals to HCC cells, and this interaction is susceptible to inhibition by an anti-CTGF antibody.Significance: Protumor cross-talk between cancer cells and hepatic stellate cells presents an opportunity for therapeutic intervention against HCC.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/17/4902/F1.large.jpg Cancer Res; 78(17); 4902-14. ©2018 AACR.",2018-07-02 +,The simple fool's guide to population genomics via RNA‐Seq: an introduction to high‐throughput sequencing data analysis,"High‐throughput sequencing technologies are currently revolutionizing the field of biology and medicine, yet bioinformatic challenges in analysing very large data sets have slowed the adoption of these technologies by the community of population biologists. We introduce the ‘Simple Fool's Guide to Population Genomics via RNA‐seq’ (SFG), a document intended to serve as an easy‐to‐follow protocol, walking a user through one example of high‐throughput sequencing data analysis of nonmodel organisms. It is by no means an exhaustive protocol, but rather serves as an introduction to the bioinformatic methods used in population genomics, enabling a user to gain familiarity with basic analysis steps. The SFG consists of two parts. This document summarizes the steps needed and lays out the basic themes for each and a simple approach to follow. The second document is the full SFG, publicly available at http://sfg.stanford.edu, that includes detailed protocols for data processing and analysis, along with a repository of custom‐made scripts and sample files. Steps included in the SFG range from tissue collection to de novo assembly, blast annotation, alignment, gene expression, functional enrichment, SNP detection, principal components and FST outlier analyses. Although the technical aspects of population genomics are changing very quickly, our hope is that this document will help population biologists with little to no background in high‐throughput sequencing and bioinformatics to more quickly adopt these new techniques.",2012-11-01 +24247530,Addition of a breeding database in the Genome Database for Rosaceae.,"Breeding programs produce large datasets that require efficient management systems to keep track of performance, pedigree, geographical and image-based data. With the development of DNA-based screening technologies, more breeding programs perform genotyping in addition to phenotyping for performance evaluation. The integration of breeding data with other genomic and genetic data is instrumental for the refinement of marker-assisted breeding tools, enhances genetic understanding of important crop traits and maximizes access and utility by crop breeders and allied scientists. Development of new infrastructure in the Genome Database for Rosaceae (GDR) was designed and implemented to enable secure and efficient storage, management and analysis of large datasets from the Washington State University apple breeding program and subsequently expanded to fit datasets from other Rosaceae breeders. The infrastructure was built using the software Chado and Drupal, making use of the Natural Diversity module to accommodate large-scale phenotypic and genotypic data. Breeders can search accessions within the GDR to identify individuals with specific trait combinations. Results from Search by Parentage lists individuals with parents in common and results from Individual Variety pages link to all data available on each chosen individual including pedigree, phenotypic and genotypic information. Genotypic data are searchable by markers and alleles; results are linked to other pages in the GDR to enable the user to access tools such as GBrowse and CMap. This breeding database provides users with the opportunity to search datasets in a fully targeted manner and retrieve and compare performance data from multiple selections, years and sites, and to output the data needed for variety release publications and patent applications. The breeding database facilitates efficient program management. Storing publicly available breeding data in a database together with genomic and genetic data will further accelerate the cross-utilization of diverse data types by researchers from various disciplines. Database URL: http://www.rosaceae.org/breeders_toolbox.",2013-11-18 +28703645,An analysis of topics and vocabulary in Chinese oral narratives by normal speakers and speakers with fluent aphasia.,"This study analysed the topic and vocabulary of Chinese speakers based on language samples of personal recounts in a large spoken Chinese database recently made available in the public domain, i.e. Cantonese AphasiaBank ( http://www.speech.hku.hk/caphbank/search/ ). The goal of the analysis is to offer clinicians a rich source for selecting ecologically valid training materials for rehabilitating Chinese-speaking people with aphasia (PWA) in the design and planning of culturally and linguistically appropriate treatments. Discourse production of 65 Chinese-speaking PWA of fluent types (henceforth, PWFA) and their non-aphasic controls narrating an important event in their life were extracted from Cantonese AphasiaBank. Analyses of topics and vocabularies in terms of part-of-speech, word frequency, lexical semantics, and diversity were conducted. There was significant overlap in topics between the two groups. While the vocabulary was larger for controls than that of PWFA as expected, they were similar in distribution across parts-of-speech, frequency of occurrence, and the ratio of concrete to abstract items in major open word classes. Moreover, proportionately more different verbs than nouns were employed at the individual level for both speaker groups. The findings provide important implications for guiding directions of aphasia rehabilitation not only of fluent but also non-fluent Chinese aphasic speakers.",2017-07-13 +26671799,A Machine Learning Based Approach to de novo Sequencing of Glycans from Tandem Mass Spectrometry Spectrum.,"Recently, glycomics has been actively studied and various technologies for glycomics have been rapidly developed. Currently, tandem mass spectrometry (MS/MS) is one of the key experimental tools for identification of structures of oligosaccharides. MS/MS can observe MS/MS peaks of fragmented glycan ions including cross-ring ions resulting from internal cleavages, which provide valuable information to infer glycan structures. Thus, the aim of de novo sequencing of glycans is to find the most probable assignments of observed MS/MS peaks to glycan substructures without databases. However, there are few satisfiable algorithms for glycan de novo sequencing from MS/MS spectra. We present a machine learning based approach to de novo sequencing of glycans from MS/MS spectrum. First, we build a suitable model for the fragmentation of glycans including cross-ring ions, and implement a solver that employs Lagrangian relaxation with a dynamic programming technique. Then, to optimize scores for the algorithm, we introduce a machine learning technique called structured support vector machines that enable us to learn parameters including scores for cross-ring ions from training data, i.e., known glycan mass spectra. Furthermore, we implement additional constraints for core structures of well-known glycan types including N-linked glycans and O-linked glycans. This enables us to predict more accurate glycan structures if the glycan type of given spectra is known. Computational experiments show that our algorithm performs accurate de novo sequencing of glycans. The implementation of our algorithm and the datasets are available at http://glyfon.dna.bio.keio.ac.jp/.",2015-11-01 +28704505,CSmiRTar: Condition-Specific microRNA targets database.,"MicroRNAs (miRNAs) are functional RNA molecules which play important roles in the post-transcriptional regulation. miRNAs regulate their target genes by repressing translation or inducing degradation of the target genes' mRNAs. Many databases have been constructed to provide computationally predicted miRNA targets. However, they cannot provide the miRNA targets expressed in a specific tissue and related to a specific disease at the same time. Moreover, they cannot provide the common targets of multiple miRNAs and the common miRNAs of multiple genes at the same time. To solve these two problems, we construct a database called CSmiRTar (Condition-Specific miRNA Targets). CSmiRTar collects computationally predicted targets of 2588 human miRNAs and 1945 mouse miRNAs from four most widely used miRNA target prediction databases (miRDB, TargetScan, microRNA.org and DIANA-microT) and implements functional filters which allows users to search (i) a miRNA's targets expressed in a specific tissue or/and related to a specific disease, (ii) multiple miRNAs' common targets expressed in a specific tissue or/and related to a specific disease, (iii) a gene's miRNAs related to a specific disease, and (iv) multiple genes' common miRNAs related to a specific disease. We believe that CSmiRTar will be a useful database for biologists to study the molecular mechanisms of post-transcriptional regulation in human or mouse. CSmiRTar is available at http://cosbi.ee.ncku.edu.tw/CSmiRTar/ or http://cosbi4.ee.ncku.edu.tw/CSmiRTar/.",2017-07-13 +27779621,"A studyforrest extension, simultaneous fMRI and eye gaze recordings during prolonged natural stimulation.","Here we present an update of the studyforrest (http://studyforrest.org) dataset that complements the previously released functional magnetic resonance imaging (fMRI) data for natural language processing with a new two-hour 3 Tesla fMRI acquisition while 15 of the original participants were shown an audio-visual version of the stimulus motion picture. We demonstrate with two validation analyses that these new data support modeling specific properties of the complex natural stimulus, as well as a substantial within-subject BOLD response congruency in brain areas related to the processing of auditory inputs, speech, and narrative when compared to the existing fMRI data for audio-only stimulation. In addition, we provide participants' eye gaze location as recorded simultaneously with fMRI, and an additional sample of 15 control participants whose eye gaze trajectories for the entire movie were recorded in a lab setting-to enable studies on attentional processes and comparative investigations on the potential impact of the stimulation setting on these processes.",2016-10-25 +28700586,Systematic identification and characterization of regulatory elements derived from human endogenous retroviruses.,"Human endogenous retroviruses (HERVs) and other long terminal repeat (LTR)-type retrotransposons (HERV/LTRs) have regulatory elements that possibly influence the transcription of host genes. We systematically identified and characterized these regulatory elements based on publicly available datasets of ChIP-Seq of 97 transcription factors (TFs) provided by ENCODE and Roadmap Epigenomics projects. We determined transcription factor-binding sites (TFBSs) using the ChIP-Seq datasets and identified TFBSs observed on HERV/LTR sequences (HERV-TFBSs). Overall, 794,972 HERV-TFBSs were identified. Subsequently, we identified ""HERV/LTR-shared regulatory element (HSRE),"" defined as a TF-binding motif in HERV-TFBSs, shared within a substantial fraction of a HERV/LTR type. HSREs could be an indication that the regulatory elements of HERV/LTRs are present before their insertions. We identified 2,201 HSREs, comprising specific associations of 354 HERV/LTRs and 84 TFs. Clustering analysis showed that HERV/LTRs can be grouped according to the TF binding patterns; HERV/LTR groups bounded to pluripotent TFs (e.g., SOX2, POU5F1, and NANOG), embryonic endoderm/mesendoderm TFs (e.g., GATA4/6, SOX17, and FOXA1/2), hematopoietic TFs (e.g., SPI1 (PU1), GATA1/2, and TAL1), and CTCF were identified. Regulatory elements of HERV/LTRs tended to locate nearby and/or interact three-dimensionally with the genes involved in immune responses, indicating that the regulatory elements play an important role in controlling the immune regulatory network. Further, we demonstrated subgroup-specific TF binding within LTR7, LTR5B, and LTR5_Hs, indicating that gains or losses of the regulatory elements occurred during genomic invasions of the HERV/LTRs. Finally, we constructed dbHERV-REs, an interactive database of HERV/LTR regulatory elements (http://herv-tfbs.com/). This study provides fundamental information in understanding the impact of HERV/LTRs on host transcription, and offers insights into the transcriptional modulation systems of HERV/LTRs and ancestral HERVs.",2017-07-12 +29369053,Acute Phase Predictors of 6-Month Functional Outcome in Italian Stroke Patients Eligible for In-Hospital Rehabilitation.,"PURPOSE:The aim of the study was to assess early poststroke prognostic factors in patients admitted for postacute phase rehabilitation. METHODS:A 1-yr multicenter prospective project was conducted in four Italian regions on 352 patients who were hospitalized after a first stroke and were eligible for postacute rehabilitation. Clinical data were collected in the stroke or acute care units (acute phase), then in rehabilitation units (postacute phase), and, subsequently, after a 6-mo poststroke period (follow-up). Clinical outcome measures were represented using the Barthel Index and the modified Rankin Scale. Univariate and multivariate analyses were performed to identify the most important prognostic index. RESULTS:Modified Rankin Scale score, minor neurologic impairment, and early out-of-bed mobilization (within 2 days after the stroke) proved to be important factors related to a better recovery according to Barthel Index (power of prediction = 37%). Similarly, age, premorbid modified Rankin Scale score, and early out-of-bed mobilization were seen to be significant factors in achieving better overall participation and activity according to the modified Rankin Scale (power of prediction = 48%). Barthel Index at admission and certain co-morbidities were also significant prognostic factors correlated with a better outcome. CONCLUSIONS:According to the Barthel Index and modified Rankin Scale, early mobilization is an early predictor of favorable outcome. TO CLAIM CME CREDITS:Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) Incorporate prognostic factors of good clinical outcomes after stroke in developing treatment plans for patients admitted to rehabilitation; (2) Identify acute phase indicators associated with favorable 6-mo outcome after stroke; and (3) Recognize the cut-off for early mobilization linked to better outcome in stroke survivors admitted to rehabilitation. LEVEL:Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2018-07-01 +28369284,Spresso: an ultrafast compound pre-screening method based on compound decomposition.,"

Motivation

Recently, the number of available protein tertiary structures and compounds has increased. However, structure-based virtual screening is computationally expensive owing to docking simulations. Thus, methods that filter out obviously unnecessary compounds prior to computationally expensive docking simulations have been proposed. However, the calculation speed of these methods is not fast enough to evaluate ≥ 10 million compounds.

Results

In this article, we propose a novel, docking-based pre-screening protocol named Spresso (Speedy PRE-Screening method with Segmented cOmpounds). Partial structures (fragments) are common among many compounds; therefore, the number of fragment variations needed for evaluation is smaller than that of compounds. Our method increases calculation speeds by ∼200-fold compared to conventional methods.

Availability and implementation

Spresso is written in C ++ and Python, and is available as an open-source code (http://www.bi.cs.titech.ac.jp/spresso/) under the GPLv3 license.

Contact

akiyama@c.titech.ac.jp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +22447773,Making sense out of massive data by going beyond differential expression.,"With the rapid growth of publicly available high-throughput transcriptomic data, there is increasing recognition that large sets of such data can be mined to better understand disease states and mechanisms. Prior gene expression analyses, both large and small, have been dichotomous in nature, in which phenotypes are compared using clearly defined controls. Such approaches may require arbitrary decisions about what are considered ""normal"" phenotypes, and what each phenotype should be compared to. Instead, we adopt a holistic approach in which we characterize phenotypes in the context of a myriad of tissues and diseases. We introduce scalable methods that associate expression patterns to phenotypes in order both to assign phenotype labels to new expression samples and to select phenotypically meaningful gene signatures. By using a nonparametric statistical approach, we identify signatures that are more precise than those from existing approaches and accurately reveal biological processes that are hidden in case vs. control studies. Employing a comprehensive perspective on expression, we show how metastasized tumor samples localize in the vicinity of the primary site counterparts and are overenriched for those phenotype labels. We find that our approach provides insights into the biological processes that underlie differences between tissues and diseases beyond those identified by traditional differential expression analyses. Finally, we provide an online resource (http://concordia.csail.mit.edu) for mapping users' gene expression samples onto the expression landscape of tissue and disease.",2012-03-23 +22188791,THEME: a web tool for loop-design microarray data analysis.,"A number of recent studies have shown that loop-design is more efficient than reference control design. Data analysis for loop-design microarray experiments is commonly undertaken using linear models and statistical tests. These techniques require specialized knowledge in statistical programming. However, limited loop-design web-based tools are available. We have developed the THEME (Tsing Hua Engine of Microarray Experiment) that exploits all necessary data analysis tools for loop-design microarray studies. THEME allows users to construct linear models and to apply multiple user-defined statistical tests of hypotheses for detection of DEG (differentially expressed genes). Users can modify entries of design matrix for experimental design as well as that of contrast matrix for statistical tests of hypotheses. The output of multiple user-defined statistical tests of hypotheses, DEG lists, can be cross-validated. The web platform provides data assessment and visualization tools that significantly assist users when evaluating the performance of microarray experimental procedures. THEME is also a MIAME (Minimal Information About a Microarray Experiment) compliant system, which enables users to export formatted files for GEO (Gene Expression Omnibus) submission. THEME offers comprehensive web services to biologists for data analysis of loop-design microarray experiments. This web-based resource is especially useful for core facility service as well as collaboration projects when researchers are not at the same site. Data analysis procedures, starting from uploading raw data files to retrieving DEG lists, can be flexibly operated with natural workflows. These features make THEME a reliable and powerful on-line system for data analysis of loop-design microarrays. The THEME server is available at http://metadb.bmes.nthu.edu.tw/theme/.",2011-12-20 +28746162,"Lung cancer and annual mean exposure to outdoor air pollution in Crete, Greece.","The increasing burden of lung cancer (LC) in Crete, Greece, has raised certain concerns about the potential association of environmental risk factors with LC. The aim of this study was to assess outdoor air pollution (OAP) and the risk for LC mortality for the first time in Crete using LC primary data. 5057 LC cases (diagnosed from 1992 to 2013) were obtained from the Cancer Registry of Crete (http://www.crc.uoc.gr) and followed up until 2014. The age-standardized incidence and mortality rates (ASIR) were calculated. Data on OAP indicators [particulate matter (PM)2.5, between 2.5 and 10 μm (PM2.5-10), PM10, PM2.5 absorbance (black carbon measure), nitrogen dioxide (NO2), and nitrogen oxides (NOx)] were collected. Spatial statistics were calculated and the binary logistic regression model was constructed at α=0.05 in IBM SPSS 24 and ArcMap 10.3.1. LC in Crete accounts for 40.2 new cases/100 000/year for both sexes (ASIRmales=73.1 new cases/100 000/year; ASIRfemales=11.8 new cases/100 000/year). Annual median estimates of environmental concentrations in Crete were as follows: PM2.5=20.7 (±1.5) µg/m, PM10=38.9 (±2.5) µg/m, PM2.5-10=59.6 (±3.7) µg/m, PM2.5 absorbance=1.2 (±0.3)×10/m, NO2=15.2 (±3.8) µg/m, and NOx=20.1 (±4.9) µg/m. A statistically significant association was observed between OAP and LC mortality (mean correlation coefficient=0.75; P<0.05). The highest risk for 5-year LC mortality was found in the major urban centers and several south-east and north-west rural regions of Crete (relative risk=3.2, 95% confidence interval=1.6-4.7). OAP seems to be an important determinant of LC mortality. Targeted interventions should be performed in the high-risk areas.",2017-09-01 +27402901,A subpopulation model to analyze heterogeneous cell differentiation dynamics.,"

Motivation

Cell differentiation is steered by extracellular signals that activate a cell type specific transcriptional program. Molecular mechanisms that drive the differentiation can be analyzed by combining mathematical modeling with population average data. For standard mathematical models, the population average data is informative only if the measurements come from a homogeneous cell culture. In practice, however, the differentiation efficiencies are always imperfect. Consequently, cell cultures are inherently mixtures of several cell types, which have different molecular mechanisms and exhibit quantitatively different dynamics. There is an urgent need for data-driven mathematical modeling approaches that can detect possible heterogeneity and, further, recover the molecular mechanisms from heterogeneous data.

Results

We develop a novel method that models a heterogeneous population using homogeneous subpopulations that evolve in parallel. Different subpopulations can represent different cell types and each subpopulation can have cell type specific molecular mechanisms. We present statistical methodology that can be used to quantify the effect of heterogeneity and to infer the subpopulation specific molecular interactions. After a proof of principle study with simulated data, we apply our methodology to analyze the differentiation of human Th17 cells using time-course RNA sequencing data. We construct putative molecular networks driving the T cell activation and Th17 differentiation and allow the cell populations to be split into two subpopulations in the case of heterogeneous samples. Our analysis shows that the heterogeneity indeed has a statistically significant effect on observed dynamics and, furthermore, our statistical methodology can infer both the subpopulation specific molecular mechanisms and the effect of heterogeneity.

Availability and implementation

An implementation of the method is available at http://research.ics.aalto.fi/csb/software/subpop/ CONTACT: jukka.intosalmi@aalto.fi or harri.lahdesmaki@aalto.fiSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-10 +24980130,"Finding needles in haystacks: linking scientific names, reference specimens and molecular data for Fungi. ","DNA phylogenetic comparisons have shown that morphology-based species recognition often underestimates fungal diversity. Therefore, the need for accurate DNA sequence data, tied to both correct taxonomic names and clearly annotated specimen data, has never been greater. Furthermore, the growing number of molecular ecology and microbiome projects using high-throughput sequencing require fast and effective methods for en masse species assignments. In this article, we focus on selecting and re-annotating a set of marker reference sequences that represent each currently accepted order of Fungi. The particular focus is on sequences from the internal transcribed spacer region in the nuclear ribosomal cistron, derived from type specimens and/or ex-type cultures. Re-annotated and verified sequences were deposited in a curated public database at the National Center for Biotechnology Information (NCBI), namely the RefSeq Targeted Loci (RTL) database, and will be visible during routine sequence similarity searches with NR_prefixed accession numbers. A set of standards and protocols is proposed to improve the data quality of new sequences, and we suggest how type and other reference sequences can be used to improve identification of Fungi. Database URL: http://www.ncbi.nlm.nih.gov/bioproject/PRJNA177353.",2014-06-30 +25398903,PNRD: a plant non-coding RNA database.,"The first ncRNA found was an alanine tRNA in baker's yeast, and the first detected microRNAs (miRNAs) promoted ncRNA research to a whole new level. Research on ncRNAs in animals has focused on the medical field, while in plant scientists are more concerned with improving agronomic traits. In 2010, we constructed a plant miRNA database named PMRD to meet the demand for miRNA research in plants. To provide a way to do fundamental research on plant ncRNAs and take full advantage of tremendous public resources, we designed an updated platform called plant ncRNA database (PNRD) based on its predecessor PMRD, which is accessible at http://structuralbiology.cau.edu.cn/PNRD. We collected a total of 25739 entries of 11 different types of ncRNAs from 150 plant species. Targets of miRNAs were extended to 178138 pairs in 46 species, while the number of miRNA expression profiles reached 35. Improvements in PNRD are not only the larger amounts of data, but also better service, such as a more user-friendly interface, more multifunctional and browsing options and more background data for users to download. We also integrated currently prevalent technologies and toolkits to strengthen the capability of the database and provide a one-stop service for scientific users.",2014-11-14 +24788790,"ModuleRole: a tool for modulization, role determination and visualization in protein-protein interaction networks.","

Unlabelled

Rapidly increasing amounts of (physical and genetic) protein-protein interaction (PPI) data are produced by various high-throughput techniques, and interpretation of these data remains a major challenge. In order to gain insight into the organization and structure of the resultant large complex networks formed by interacting molecules, using simulated annealing, a method based on the node connectivity, we developed ModuleRole, a user-friendly web server tool which finds modules in PPI network and defines the roles for every node, and produces files for visualization in Cytoscape and Pajek. For given proteins, it analyzes the PPI network from BioGRID database, finds and visualizes the modules these proteins form, and then defines the role every node plays in this network, based on two topological parameters Participation Coefficient and Z-score. This is the first program which provides interactive and very friendly interface for biologists to find and visualize modules and roles of proteins in PPI network. It can be tested online at the website http://www.bioinfo.org/modulerole/index.php, which is free and open to all users and there is no login requirement, with demo data provided by ""User Guide"" in the menu Help. Non-server application of this program is considered for high-throughput data with more than 200 nodes or user's own interaction datasets. Users are able to bookmark the web link to the result page and access at a later time. As an interactive and highly customizable application, ModuleRole requires no expert knowledge in graph theory on the user side and can be used in both Linux and Windows system, thus a very useful tool for biologist to analyze and visualize PPI networks from databases such as BioGRID.

Availability

ModuleRole is implemented in Java and C, and is freely available at http://www.bioinfo.org/modulerole/index.php. Supplementary information (user guide, demo data) is also available at this website. API for ModuleRole used for this program can be obtained upon request.",2014-05-01 +28744113,Apolipoprotein ε7 allele in memory complaints: insights through protein structure prediction.,"

Purpose

APOE ε7 gene is a rare mutant form of APOE ε3. The mutation occurs in the lipid-binding domain of APOE. Based on the protein's structure, APOE ε7 is expected to function in lipid and β-amyloid metabolism, similar to APOE ε4. However, unlike that for APOE ε4, the mechanisms responsible for Alzheimer's disease (AD) cases associated with APOE ε7 expression have not been elucidated. The present study aims to investigate the association between APOE ε7 expression and cognitive impairment.

Methods

APOE was sequenced in DNA samples collected from 344 memory-complaint patients who visited the memory clinic, and from 345 non-memory-complaint individuals from the health promotion center. The protein structures of ApoE3, ApoE4, and ApoE7 were predicted.

Results

Three ε3/ε7 heterozygote individuals who were all classified under the memory-complaint group were identified. Of these, two subjects were clinically diagnosed with AD with small vessel disease, and the remaining individual was diagnosed with subjective cognitive impairment. This study predicted the protein structures of ApoE3, ApoE4, and ApoE7 and determined the three-dimensional structure of the carboxy terminus of ApoE7, which participates in an electrostatic domain interaction similar to that of APOE ε4. APOE K244 or K245 mutations for APOE ε7 were not found in the Korean reference genome database, which contains information (http://152.99.75.168/KRGDB/browser/mainBrowser.jsp) from 622 healthy individuals.

Conclusion

As verified by the results of structural prediction, APOE ε7 could serve as another risk factor for cognitive impairment and is particularly associated with vascular disease. However, additional studies are required to validate the pathogenic nature of APOE ε7.",2017-07-11 +28744232,Production and Comprehension of Pantomimes Used to Depict Objects.,"Pantomime, gesture in absence of speech, has no conventional meaning. Nevertheless, individuals seem to be able to produce pantomimes and derive meaning from pantomimes. A number of studies has addressed the use of co-speech gesture, but little is known on pantomime. Therefore, the question of how people construct and understand pantomimes arises in gesture research. To determine how people use pantomimes, we asked participants to depict a set of objects using pantomimes only. We annotated what representation techniques people produced. Furthermore, using judgment tasks, we assessed the pantomimes' comprehensibility. Analyses showed that similar techniques were used to depict objects across individuals. Objects with a default depiction method were better comprehended than objects for which there was no such default. More specifically, tools and objects depicted using a handling technique were better understood. The open-answer experiment showed low interpretation accuracy. Conversely, the forced-choice experiment showed ceiling effects. These results suggest that across individuals, similar strategies are deployed to produce pantomime, with the handling technique as the apparent preference. This might indicate that the production of pantomimes is based on mental representations which are intrinsically similar. Furthermore, pantomime conveys semantically rich, but ambiguous, information, and its interpretation is much dependent on context. This pantomime database is available online: https://dataverse.nl/dataset.xhtml?persistentId=hdl:10411/QZHO6M. This can be used as a baseline with which we can compare clinical groups.",2017-07-11 +25433696,LigDig: a web server for querying ligand-protein interactions.,"

Unlabelled

LigDig is a web server designed to answer questions that previously required several independent queries to diverse data sources. It also performs basic manipulations and analyses of the structures of protein-ligand complexes. The LigDig webserver is modular in design and consists of seven tools, which can be used separately, or via linking the output from one tool to the next, in order to answer more complex questions. Currently, the tools allow a user to: (i) perform a free-text compound search, (ii) search for suitable ligands, particularly inhibitors, of a protein and query their interaction network, (iii) search for the likely function of a ligand, (iv) perform a batch search for compound identifiers, (v) find structures of protein-ligand complexes, (vi) compare three-dimensional structures of ligand binding sites and (vii) prepare coordinate files of protein-ligand complexes for further calculations.

Availability and implementation

LigDig makes use of freely available databases, including ChEMBL, PubChem and SABIO-RK, and software programs, including cytoscape.js, PDB2PQR, ProBiS and Fconv. LigDig can be used by non-experts in bio- and chemoinformatics. LigDig is available at: http://mcm.h-its.org/ligdig.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-29 +26323904,Tumor M2 pyruvate kinase in diagnosis of nonsmall cell lung cancer: A meta-analysis based on Chinese population.,"

Objective

The purpose of this study was to evaluate the value of tumor M2 pyruvate kinase (tumor M2-PK) in the diagnosis of nonsmall cell lung cancer.

Methods

The diagnosis clinical studies of tumor M2-PK in the diagnosis of nonsmall cell lung cancer were electronic researched in the Medline, EMBASE, WANFANG, and CNIK databases. The data of true positive, false positive, false negative, and true negative were extracted from each of the individual studies. We use  Stata11.0 (http://www.stata.com; Stata Corporation, College Station, TX) and MetaDiSc 1.4 software to pool the diagnostic sensitivity, specificity, and diagnostic area under the receiver operating characteristic (ROC).

Results

Eleven diagnostic clinical studies with 1294 subjects were included in this diagnostic meta-analysis. The combined sensitivity, specificity, positive likely hood ratio, negative likely hood ratio were 0.69 (0.65-0.72), 0.92 (0.89-0.94), 7.84 (5.92-10.38), 0.36 (0.32-0.40). And the area under the ROC curve was 0.92 (0.90-0.94).

Conclusion

Serum tumor M2-PK can be a potential biomarker for diagnosis of nonsmall cell lung cancer.",2015-08-01 +29559993,Griffin: A Tool for Symbolic Inference of Synchronous Boolean Molecular Networks.,"Boolean networks are important models of biochemical systems, located at the high end of the abstraction spectrum. A number of Boolean gene networks have been inferred following essentially the same method. Such a method first considers experimental data for a typically underdetermined ""regulation"" graph. Next, Boolean networks are inferred by using biological constraints to narrow the search space, such as a desired set of (fixed-point or cyclic) attractors. We describe Griffin, a computer tool enhancing this method. Griffin incorporates a number of well-established algorithms, such as Dubrova and Teslenko's algorithm for finding attractors in synchronous Boolean networks. In addition, a formal definition of regulation allows Griffin to employ ""symbolic"" techniques, able to represent both large sets of network states and Boolean constraints. We observe that when the set of attractors is required to be an exact set, prohibiting additional attractors, a naive Boolean coding of this constraint may be unfeasible. Such cases may be intractable even with symbolic methods, as the number of Boolean constraints may be astronomically large. To overcome this problem, we employ an Artificial Intelligence technique known as ""clause learning"" considerably increasing Griffin's scalability. Without clause learning only toy examples prohibiting additional attractors are solvable: only one out of seven queries reported here is answered. With clause learning, by contrast, all seven queries are answered. We illustrate Griffin with three case studies drawn from the Arabidopsis thaliana literature. Griffin is available at: http://turing.iimas.unam.mx/griffin.",2018-03-06 +,Atypical antipsychotic agents; Peas in a pod or chalk and cheese?,"With escalating health expenditure and a shrinking purse, there is increased focus on the cost efficacy of still patented versus generic medications in general, and for atypical antipsychotics in particular. In a recent BMC Medicine article, Godman and colleagues presented data indicating poor uptake of the off patent atypical antipsychotic risperidone, arguing for authorities to mandate its greater use. This is under the assumption of clinical equivalence of atypical antipsychotics. This commentary argues that there are clinically meaningful differences between atypical antipsychotics and important inter-individual heterogeneity in clinical response and tolerability. Access to a broad range of atypical antipsychotics enables clinicians to tailor care, taking consideration of differential efficacy and adverse effects profile in order to meet the needs of individual patients with improved real world effectiveness of treatment. Restriction of agent choice risks detracting from optimal clinical care, with possible poorer outcomes and greater costs of care. A balance between encouraging use of cheapest in class agent and allowing access to various atypical agents for tailored care is likely to produce optimal health outcomes. Please see related article: http://www.biomedcentral.com/1741-7015/12/98.",2014-01-01 +28549074,CWDPRNP: a tool for cervid prion sequence analysis in program R.,"

Summary

Chronic wasting disease is a fatal, neurological disease caused by an infectious prion protein, which affects economically and ecologically important members of the family Cervidae. Single nucleotide polymorphisms within the prion protein gene have been linked to differential susceptibility to the disease in many species. Wildlife managers are seeking to determine the frequencies of disease-associated alleles and genotypes and delineate spatial genetic patterns. The CWDPRNP package, implemented in program R, provides a unified framework for analyzing prion protein gene variability and spatial structure.

Availability and implementation

The CWDPRNP package, manual and example data files are available at http://ecosystems.psu.edu/research/labs/walter-lab/additional-labs/population-genetics-lab. This package is available for all commonly used platforms.

Contact

wlm159psu@gmail.com.",2017-10-01 +29228271,LitPathExplorer: a confidence-based visual text analytics tool for exploring literature-enriched pathway models.,"Motivation:Pathway models are valuable resources that help us understand the various mechanisms underpinning complex biological processes. Their curation is typically carried out through manual inspection of published scientific literature to find information relevant to a model, which is a laborious and knowledge-intensive task. Furthermore, models curated manually cannot be easily updated and maintained with new evidence extracted from the literature without automated support. Results:We have developed LitPathExplorer, a visual text analytics tool that integrates advanced text mining, semi-supervised learning and interactive visualization, to facilitate the exploration and analysis of pathway models using statements (i.e. events) extracted automatically from the literature and organized according to levels of confidence. LitPathExplorer supports pathway modellers and curators alike by: (i) extracting events from the literature that corroborate existing models with evidence; (ii) discovering new events which can update models; and (iii) providing a confidence value for each event that is automatically computed based on linguistic features and article metadata. Our evaluation of event extraction showed a precision of 89% and a recall of 71%. Evaluation of our confidence measure, when used for ranking sampled events, showed an average precision ranging between 61 and 73%, which can be improved to 95% when the user is involved in the semi-supervised learning process. Qualitative evaluation using pair analytics based on the feedback of three domain experts confirmed the utility of our tool within the context of pathway model exploration. Availability and implementation:LitPathExplorer is available at http://nactem.ac.uk/LitPathExplorer_BI/. Contact:sophia.ananiadou@manchester.ac.uk. Supplementary information:Supplementary data are available at Bioinformatics online.",2018-04-01 +28695303,A comparison of cosegregation analysis methods for the clinical setting.,"Quantitative cosegregation analysis can help evaluate the pathogenicity of genetic variants. However, genetics professionals without statistical training often use simple methods, reporting only qualitative findings. We evaluate the potential utility of quantitative cosegregation in the clinical setting by comparing three methods. One thousand pedigrees each were simulated for benign and pathogenic variants in BRCA1 and MLH1 using United States historical demographic data to produce pedigrees similar to those seen in the clinic. These pedigrees were analyzed using two robust methods, full likelihood Bayes factors (FLB) and cosegregation likelihood ratios (CSLR), and a simpler method, counting meioses. Both FLB and CSLR outperform counting meioses when dealing with pathogenic variants, though counting meioses is not far behind. For benign variants, FLB and CSLR greatly outperform as counting meioses is unable to generate evidence for benign variants. Comparing FLB and CSLR, we find that the two methods perform similarly, indicating that quantitative results from either of these methods could be combined in multifactorial calculations. Combining quantitative information will be important as isolated use of cosegregation in single families will yield classification for less than 1% of variants. To encourage wider use of robust cosegregation analysis, we present a website ( http://www.analyze.myvariant.org ) which implements the CSLR, FLB, and Counting Meioses methods for ATM, BRCA1, BRCA2, CHEK2, MEN1, MLH1, MSH2, MSH6, and PMS2. We also present an R package, CoSeg, which performs the CSLR analysis on any gene with user supplied parameters. Future variant classification guidelines should allow nuanced inclusion of cosegregation evidence against pathogenicity.",2018-04-01 +24470572,ChEpiMod: a knowledgebase for chemical modulators of epigenome reader domains.,"

Context

Epigenome reader domains are rapidly emerging as a new class of drug targets for a wide array of human diseases. To facilitate study of structure-activity relationship and small-molecule ligand design for these domains, we have created ChEpiMod. ChEpiMod is a free knowledgebase of chemical modulators with documented modulatory activity for epigenome reader domains.

Methods

ChEpiMod organizes information about chemical modulators and their associated binding-affinity data, as well as available structures of epigenome readers from the Protein Data Bank. The data are gathered from the literature and patents. Entries are supplemented by annotation. The current version of ChEpiMod covers six epigenome reader domain families (Bromodomain, PHD finger, Chromodomain, MBT, PWWP and Tudor). The database can be used to browse existing chemical modulators and bioactivity data, as well as, all available structures of readers and their molecular interactions. The database is updated weekly.

Availability

ChEpiMod is freely available at http://chepimod.org

Contact

ming-ming.zhou@mssm.edu

Supplementary information

Supplementary data is available at Bioinformatics online.",2014-01-27 +28575391,Protein-Sol: a web tool for predicting protein solubility from sequence.,"

Motivation

Protein solubility is an important property in industrial and therapeutic applications. Prediction is a challenge, despite a growing understanding of the relevant physicochemical properties.

Results

Protein-Sol is a web server for predicting protein solubility. Using available data for Escherichia coli protein solubility in a cell-free expression system, 35 sequence-based properties are calculated. Feature weights are determined from separation of low and high solubility subsets. The model returns a predicted solubility and an indication of the features which deviate most from average values. Two other properties are profiled in windowed calculation along the sequence: fold propensity, and net segment charge. The utility of these additional features is demonstrated with the example of thioredoxin.

Availability and implementation

The Protein-Sol webserver is available at http://protein-sol.manchester.ac.uk.

Contact

jim.warwicker@manchester.ac.uk.",2017-10-01 +28096085,Improved orthology inference with Hieranoid 2.,"

Motivation

The initial step in many orthology inference methods is the computationally demanding establishment of all pairwise protein similarities across all analysed proteomes. The quadratic scaling with proteomes has become a major bottleneck. A remedy is offered by the Hieranoid algorithm which reduces the complexity to linear by hierarchically aggregating ortholog groups from InParanoid along a species tree.

Results

We have further developed the Hieranoid algorithm in many ways. Major improvements have been made to the construction of multiple sequence alignments and consensus sequences. Hieranoid version 2 was evaluated with standard benchmarks that reveal a dramatic increase in the coverage/accuracy tradeoff over version 1, such that it now compares favourably with the best methods. The new parallelized cluster mode allows Hieranoid to be run on large data sets in a much shorter timespan than InParanoid, yet at similar accuracy.

Contact

mateusz.kaduk@scilifelab.se.

Availability and implementation

Perl code freely available at http://hieranoid.sbc.su.se/ .

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +30073950,"Traffic-Related Air Pollution, APOEε4 Status, and Neurodevelopmental Outcomes among School Children Enrolled in the BREATHE Project (Catalonia, Spain).","

Background

Traffic-related air pollution is emerging as a risk factor for Alzheimer's disease (AD) and impaired brain development. Individual differences in vulnerability to air pollution may involve the ε4 allele of Apolipoprotein E (APOE) gene, the primary genetic risk factor for AD.

Objective

We analyzed whether the association between traffic air pollution and neurodevelopmental outcomes is modified by APOEε4 status in children.

Methods

Data on parent-reported behavior problems (total difficulties scores, Strengths and Difficulties Questionnaire), teacher-reported attention-deficit hyperactivity disorder (ADHD) symptom scores, cognitive performance trajectories (computerized tests of inattentiveness and working memory repeated 2-4 times during January 2012-March 2013), and APOE genotypes were obtained for 1,667 children age 7-11 y attending 39 schools in or near Barcelona. Basal ganglia volume (putamen, caudate, and globus pallidum) was measured in 163 of the children by MRI (October 2012-April 2014.) Average annual outdoor polycyclic aromatic hydrocarbons (PAHs), elemental carbon (EC), and nitrogen dioxide (NO2) concentrations were estimated based on measurements at each school (two 1-wk campaigns conducted 6 months apart in 2012).

Results

APOEε4 allele carriers had significantly higher behavior problem scores than noncarriers, and adverse associations with PAHs and NO2 were stronger or limited to ε4 carriers for behavior problem scores (P-interaction 0.03 and 0.04), caudate volume (P-interaction 0.04 and 0.03), and inattentiveness trajectories (P-interaction 0.15 and 0.08, respectively). Patterns of associations with the same outcomes were similar for EC.

Conclusion

PAHs, EC, and NO2 were associated with higher behavior problem scores, smaller reductions in inattentiveness over time, and smaller caudate volume in APOEε4 allele carriers in our study population, and corresponding associations were weak or absent among ε4 noncarriers. These findings support a potential role of APOE in biological mechanisms that may contribute to associations between air pollution and neurobehavioral outcomes in children. https://doi.org/10.1289/EHP2246.",2018-08-02 +29606906,Small Supernumerary Marker Chromosome May Provide Information on Dosage-insensitive Pericentric Regions in Human.,"

Background

Cytogenetically visible chromosomal imbalances in humans are deleterious and adverse in the majority of the cases. However, healthy persons living with chromosomal imbalances in the range of several megabasepairs (Mbps) in size, like carriers of small Supernumerary Marker Chromosomes (sSMCs) exist.

Materials & methods

The identification of healthy sSMC carriers with euchromatic centromere-near (ECN) imbalances led to the following proposal: ECN-regions do not contain any dosage sensitive genes. Due to own previous work, dosage-insensitive pericentric ECN-regions were already determined with an accuracy of 0.3 and 5 Mbp. Based on this data we established 43 new pericentromeric probe sets spanning about 3-5 Mbp of each euchromatic human chromosome arm starting from the known insensitive regions towards distal. Such so called pericentromeric-critical region fluorescence in situ hybridization (PeCR-FISH) probe sets were applied exemplarily and successful here in 15 sSMC cases as available from the Else Kröner-Fresenius-sSMC-cellbank .

Conclusion

Most of the involved sSMC breakpoints could be characterized as a higher resolution than before. An unexpected result was that in 5/15 cases cryptic mosaicism was characterized. The latter is also to be considered to have potentially an influence on the clinical outcome in these so-called discontinuous sSMCs. Overall, the suitability of PeCR-FISH to characterize sSMCs was proven; the potential of this probe set to further delineate sizes of dosage insensitive pericentric regions is obvious but dependent on suited cases. Furthermore, discontinuous sSMCs can be identified by this approach and this new subtype of sSMC needs to be studied in more detail in future.",2018-04-01 +28165473,cryoSPARC: algorithms for rapid unsupervised cryo-EM structure determination.,"Single-particle electron cryomicroscopy (cryo-EM) is a powerful method for determining the structures of biological macromolecules. With automated microscopes, cryo-EM data can often be obtained in a few days. However, processing cryo-EM image data to reveal heterogeneity in the protein structure and to refine 3D maps to high resolution frequently becomes a severe bottleneck, requiring expert intervention, prior structural knowledge, and weeks of calculations on expensive computer clusters. Here we show that stochastic gradient descent (SGD) and branch-and-bound maximum likelihood optimization algorithms permit the major steps in cryo-EM structure determination to be performed in hours or minutes on an inexpensive desktop computer. Furthermore, SGD with Bayesian marginalization allows ab initio 3D classification, enabling automated analysis and discovery of unexpected structures without bias from a reference map. These algorithms are combined in a user-friendly computer program named cryoSPARC (http://www.cryosparc.com).",2017-02-06 +24712981,SFGD: a comprehensive platform for mining functional information from soybean transcriptome data and its use in identifying acyl-lipid metabolism pathways.,"

Background

Soybean (Glycine max L.) is one of the world's most important leguminous crops producing high-quality protein and oil. Increasing the relative oil concentration in soybean seeds is many researchers' goal, but a complete analysis platform of functional annotation for the genes involved in the soybean acyl-lipid pathway is still lacking. Following the success of soybean whole-genome sequencing, functional annotation has become a major challenge for the scientific community. Whole-genome transcriptome analysis is a powerful way to predict genes with biological functions. It is essential to build a comprehensive analysis platform for integrating soybean whole-genome sequencing data, the available transcriptome data and protein information. This platform could also be used to identify acyl-lipid metabolism pathways.

Description

In this study, we describe our construction of the Soybean Functional Genomics Database (SFGD) using Generic Genome Browser (Gbrowse) as the core platform. We integrated microarray expression profiling with 255 samples from 14 groups' experiments and mRNA-seq data with 30 samples from four groups' experiments, including spatial and temporal transcriptome data for different soybean development stages and environmental stresses. The SFGD includes a gene co-expression regulatory network containing 23,267 genes and 1873 miRNA-target pairs, and a group of acyl-lipid pathways containing 221 enzymes and more than 1550 genes. The SFGD also provides some key analysis tools, i.e. BLAST search, expression pattern search and cis-element significance analysis, as well as gene ontology information search and single nucleotide polymorphism display.

Conclusion

The SFGD is a comprehensive database integrating genome and transcriptome data, and also for soybean acyl-lipid metabolism pathways. It provides useful toolboxes for biologists to improve the accuracy and robustness of soybean functional genomics analysis, further improving understanding of gene regulatory networks for effective crop improvement. The SFGD is publically accessible at http://bioinformatics.cau.edu.cn/SFGD/, with all data available for downloading.",2014-04-08 +29507796,The pathological and molecular diagnosis of malignant pleural mesothelioma: a literature review.,"Malignant pleural mesothelioma (MPM), an asbestos-induced tumor, represents significant diagnostic challenges for pathologists. Its histological diagnosis is stepwise and should be based on morphological assessment, supported by clinical and radiological findings, and supplemented with immunohistochemistry (IHC) and, more recently, molecular tests. The main diagnostic dilemmas are the differential diagnoses with benign mesothelial proliferations and other pleural malignant tumors. The present review is an update regarding the morphological, immunohistochemical, and molecular features with respect to MPM diagnosis. Data sources include a survey of the biomedical literature from PubMed (http://www.ncbi.nlm.nih.gov/pubmed) and textbooks focusing on the pathological diagnosis of MPM and associated immunohistochemical and molecular markers. The histological findings of MPM could facilitate its diagnosis and provide important prognostic information. The immunohistochemical approach should rest on the application of a panel including positive (mesothelial-related) and negative markers with greater than 80% sensitivity and specificity, which need to be selected based on morphology and clinical information. Moreover, in challenging cases, fluorescent in situ hybridization (FISH) testing for the p16 deletion and IHC to evaluate the loss of BRCA1-associated protein 1 (BAP1) expression could be useful in distinguishing benign from malignant pleural proliferations.",2018-01-01 +27749513,The Utility of Brain Natriuretic Peptide in Pediatric Cardiology: A Review.,"

Objective

The aim of this article is to evaluate the clinical utility of brain natriuretic peptide in pediatric patients, examining the diagnostic value, management, and prognostic relevance, by critical assessment of the literature.

Data sources

In December 2015, a literature search was performed (PubMed access to MEDLINE citations; http://www.ncbi.nlm.nih.gov/PubMed/) and included these Medical Subject Headings and text terms for the key words: ""brain natriuretic peptide,"" ""amino-terminal pro-brain natriuretic peptide,"" ""children,"" ""neonate/s,"" ""newborn/s,"" ""infant/s,"" and ""echocardiography.""

Study selection

Each article title and abstract was screened to identify relevant studies. The search strategy was limited to published studies in English language concerning brain natriuretic peptide/amino-terminal pro-brain natriuretic peptide in pediatric patients.

Data extraction

Data on age, gender, type of clinical condition, brain natriuretic peptide assay method, cardiac function variables evaluated by echocardiography, and prognosis were extracted.

Data synthesis

Brain natriuretic peptide reference values in healthy newborns, infants, and children are presented. Brain natriuretic peptide diagnostic accuracy in newborns, infants, and children suspected to have congenital heart defects is discussed, and brain natriuretic peptide prognostic value reviewed. The data suggest that the determination of brain natriuretic peptide levels improves the diagnostic accuracy in the assessment of heart disease in the pediatric population. Brain natriuretic peptide assay may increase the accuracy of neonatal screening programs for diagnosing congenital heart defects. Echocardiographic variables correlated to brain natriuretic peptide levels. Additionally, brain natriuretic peptide levels predicted adverse outcomes in the postoperative period.

Conclusions

Brain natriuretic peptide assessment is a reliable test to diagnose significant structural or functional cardiovascular disease in children. In the integrated follow-up of these cases, several physiologic and clinical variables must be considered; brain natriuretic peptide may be an additional helpful marker. Nevertheless, larger prospective studies are warranted to elucidate the true prognostic value of brain natriuretic peptide in pediatric patients.",2016-11-01 +29701446,Prenatal Organophosphate Pesticide Exposure and Traits Related to Autism Spectrum Disorders in a Population Living in Proximity to Agriculture.,"

Background

Prenatal exposure to organophosphate (OP) pesticides has been linked with poorer neurodevelopment and behaviors related to autism spectrum disorders (ASD) in previous studies, including in the Center for Health Assessment of Mothers and Children of Salinas (CHAMACOS) study, a birth cohort living in the agricultural Salinas Valley in California.

Objectives

To investigate the association of prenatal exposure to OP pesticides with traits related to ASD, in childhood and adolescents in CHAMACOS.

Methods

We assessed OP exposure during pregnancy with measurements of dialkyl phosphates (DAP) metabolites in urine, and residential proximity to OP use during pregnancy using California's Pesticide Use Reporting (PUR) data and estimated associations with ASD-related traits using linear regression models. We measured traits reported by parents and teachers as well as the child's performance on tests that evaluate the ability to use facial expressions to recognize the mental state of others at 7, 101/2, and 14 years of age.

Results

Prenatal DAPs were associated with poorer parent and teacher reported social behavior [e.g., a 10-fold DAP increase was associated with a 2.7-point increase (95% confidence interval (CI): 0.9, 4.5) in parent-reported Social Responsiveness Scale, Version 2, T-scores at age 14]. We did not find clear evidence of associations between residential proximity to OP use during pregnancy and ASD-related traits.

Conclusions

These findings contribute mixed evidence linking OP pesticide exposures with traits related to developmental disorders like ASD. Subtle pesticide-related effects on ASD-related traits among a population with ubiquitous exposure could result in a rise in cases of clinically diagnosed disorders like ASD. https://doi.org/10.1289/EHP2580.",2018-04-25 +27892496,EWAS: epigenome-wide association studies software 1.0 - identifying the association between combinations of methylation levels and diseases.,"Similar to the SNP (single nucleotide polymorphism) data, there is non-random association of the DNA methylation level (we call it methylation disequilibrium, MD) between neighboring methylation loci. For the case-control study of complex diseases, it is important to identify the association between methylation levels combination types (we call it methylecomtype) and diseases/phenotypes. We extended the classical framework of SNP haplotype-based association study in population genetics to DNA methylation level data, and developed a software EWAS to identify the disease-related methylecomtypes. EWAS can provide the following basic functions: (1) calculating the DNA methylation disequilibrium coefficient between two CpG loci; (2) identifying the MD blocks across the whole genome; (3) carrying out case-control association study of methylecomtypes and identifying the disease-related methylecomtypes. For a DNA methylation level data set including 689 samples (354 cases and 335 controls) and 473864 CpG loci, it takes only about 25 min to complete the full scan. EWAS v1.0 can rapidly identify the association between combinations of methylation levels (methylecomtypes) and diseases. EWAS v1.0 is freely available at: http://www.ewas.org.cn or http://www.bioapp.org/ewas.",2016-11-28 +28728142,Strategies to Improve Private-Well Water Quality: A North Carolina Perspective.,"

Background

Evidence suggests that the 44.5 million U.S. residents drawing their drinking water from private wells face higher risks of waterborne contaminant exposure than those served by regulated community water supplies. Among U.S. states, North Carolina (N.C.) has the second-largest population relying on private wells, making it a useful microcosm to study challenges to maintaining private-well water quality.

Objectives

This paper summarizes recommendations from a two-day summit to identify options to improve drinking-water quality for N.C. residents served by private wells.

Methods

The Research Triangle Environmental Health Collaborative invited 111 participants with knowledge of private-well water challenges to attend the Summit. Participants worked in small groups that focused on specific aspects and reconvened in plenary sessions to formulate consensus recommendations.

Discussion

Summit participants highlighted four main barriers to ensuring safe water for residents currently relying on private wells: (1) a database of private well locations is unavailable; (2) racial disparities have perpetuated reliance on private wells in some urbanized areas; (3) many private-well users lack information or resources to monitor and maintain their wells; and (4) private-well support programs are fragmented and lack sufficient resources. The Summit produced 10 consensus recommendations for ways to overcome these barriers.

Conclusions

The Summit recommendations, if undertaken, could improve the health of North Carolinians facing elevated risks of exposure to waterborne contaminants because of their reliance on inadequately monitored and maintained private wells. Because many of the challenges in N.C. are common nationwide, these recommendations could serve as models for other states. https://doi.org/10.1289/EHP890.",2017-07-07 +29126148,Consensus coding sequence (CCDS) database: a standardized set of human and mouse protein-coding regions supported by expert curation.,"The Consensus Coding Sequence (CCDS) project provides a dataset of protein-coding regions that are identically annotated on the human and mouse reference genome assembly in genome annotations produced independently by NCBI and the Ensembl group at EMBL-EBI. This dataset is the product of an international collaboration that includes NCBI, Ensembl, HUGO Gene Nomenclature Committee, Mouse Genome Informatics and University of California, Santa Cruz. Identically annotated coding regions, which are generated using an automated pipeline and pass multiple quality assurance checks, are assigned a stable and tracked identifier (CCDS ID). Additionally, coordinated manual review by expert curators from the CCDS collaboration helps in maintaining the integrity and high quality of the dataset. The CCDS data are available through an interactive web page (https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi) and an FTP site (ftp://ftp.ncbi.nlm.nih.gov/pub/CCDS/). In this paper, we outline the ongoing work, growth and stability of the CCDS dataset and provide updates on new collaboration members and new features added to the CCDS user interface. We also present expert curation scenarios, with specific examples highlighting the importance of an accurate reference genome assembly and the crucial role played by input from the research community.",2018-01-01 +28459916,Direct Oral Anticoagulants (DOACs): Current Status Among Distinct Patient Subgroups.,"The landscape of anticoagulant therapy for atrial fibrillation and deep-vein thrombosis has evolved considerably in the last decade with the advent of Novel or Direct-Acting Oral Antiocoagulants (DOACs). The initial phase III randomized controlled trials established the individual DOACs as viable alternatives to warfarin for thromboprophylaxis but generalizations to the larger population were limited by the small number of protocol subjects with renal insufficiency, congestive heart failure, advanced age and other comorbidities. All the DOACs have some degree of renal excretion and while safe and effective in patients with mild to moderate renal insufficiency, dose adjustment is necessary based on creatinine clearance. Subsequent data registries and real-world experience with DOACs have continued to refine their role in these particular patient subgroups. Off-label use with both under- and overdosing is not uncommon in renal failure and carries increased risk. Their increasing use among the elderly, in patients with heart failure, hepatic and renal insufficiency and among the Asian population has been shown to be relatively safe and effective compared to warfarin. Gaps in our current understanding of this new class of anticoagulants will continue to narrow as additional data becomes available through ongoing registries and real-world experience. [Full article available at http://rimed.org/rimedicaljournal-2017-05.asp].",2017-05-01 +26608454,Corticotomies and Orthodontic Tooth Movement: A Systematic Review.,"

Purpose

A systematic review was conducted to examine the evidence for the effectiveness and safety of corticotomy-facilitated orthodontics.

Materials and methods

Electronic databases (Ovid Medline, EMBASE, Cochrane, SCOPUS, and Web of Science) were searched for articles that examined the rate of corticotomy-facilitated orthodontic tooth movement and its effects on the periodontium, root resorption, and tooth vitality. Unpublished literature was searched electronically through ClinicalTrials.gov (http://www.clinicaltrials.gov) and the ISRCTN registry (http://www.controlled-trials.com). Relevant orthodontic journals and reference lists also were checked for eligible studies. Randomized clinical trials (RCTs) and controlled clinical trials (CCTs) were considered. Two article reviewers independently assessed the search results, screened the relevant articles, performed data extraction, and evaluated the methodologic quality of the studies.

Results

Fourteen eligible articles (6 RCTs and 8 CCTs) were included in the review. There was a statistically meaningful increase in the rate of tooth movement compared with controls for all corticotomy techniques assessed. Some studies reported that acceleration in tooth movement was only temporary (lasting a few months). Corticotomy procedures did not seem to produce unwanted adverse effects on the periodontium, root resorption, and tooth vitality. The quality of the body of evidence was regarded as low owing to the presence of multiple methodologic issues, high risks of bias, and heterogeneity in the included articles.

Conclusion

Corticotomy procedures can produce statistically and clinically meaningful temporary increases in the rate of orthodontic tooth movement with minimal side-effects. Additional high-quality randomized clinical trials are needed to allow more definitive conclusions.",2015-10-24 +23514033,The Enzyme Portal: a case study in applying user-centred design methods in bioinformatics.,"User-centred design (UCD) is a type of user interface design in which the needs and desires of users are taken into account at each stage of the design process for a service or product; often for software applications and websites. Its goal is to facilitate the design of software that is both useful and easy to use. To achieve this, you must characterise users' requirements, design suitable interactions to meet their needs, and test your designs using prototypes and real life scenarios.For bioinformatics, there is little practical information available regarding how to carry out UCD in practice. To address this we describe a complete, multi-stage UCD process used for creating a new bioinformatics resource for integrating enzyme information, called the Enzyme Portal (http://www.ebi.ac.uk/enzymeportal). This freely-available service mines and displays data about proteins with enzymatic activity from public repositories via a single search, and includes biochemical reactions, biological pathways, small molecule chemistry, disease information, 3D protein structures and relevant scientific literature.We employed several UCD techniques, including: persona development, interviews, 'canvas sort' card sorting, user workflows, usability testing and others. Our hope is that this case study will motivate the reader to apply similar UCD approaches to their own software design for bioinformatics. Indeed, we found the benefits included more effective decision-making for design ideas and technologies; enhanced team-working and communication; cost effectiveness; and ultimately a service that more closely meets the needs of our target audience.",2013-03-20 +26215638,FusionCancer: a database of cancer fusion genes derived from RNA-seq data.,"

Background

Fusion genes are chimeric results originated from previous separate genes with aberrant functions. The resulting protein products may lead to abnormal status of expression levels, functions and action sites, which in return may cause the abnormal proliferation of cells and cancer development.

Results

With the emergence of next-generation sequencing technology, RNA-seq has spurred gene fusion discovery in various cancer types. In this work, we compiled 591 recently published RNA-seq datasets in 15 kinds of human cancer, and the gene fusion events were comprehensively identified. Based on the results, a database was developed for gene fusion in cancers (FusionCancer), with the attempt to provide a user-friendly utility for the cancer research community. A flexible query engine has been developed for the acquisition of annotated information of cancer fusion genes, which would help users to determine the chimera events leading to functional changes. FusionCancer can be accessible at the following hyperlink website: http://donglab.ecnu.edu.cn/databases/FusionCancer/

Conclusion

To the best of our knowledge, FusionCancer is the first comprehensive fusion gene database derived only from cancer RNA-seq data.",2015-07-28 +28398459,MISA-web: a web server for microsatellite prediction.,"

Motivation

Microsatellites are a widely-used marker system in plant genetics and forensics. The development of reliable microsatellite markers from resequencing data is challenging.

Results

We extended MISA, a computational tool assisting the development of microsatellite markers, and reimplemented it as a web-based application. We improved compound microsatellite detection and added the possibility to display and export MISA results in GFF3 format for downstream analysis.

Availability and implementation

MISA-web can be accessed under http://misaweb.ipk-gatersleben.de/. The website provides tutorials, usage note as well as download links to the source code.

Contact

scholz@ipk-gatersleben.de.",2017-08-01 +28618241,Use of Adjuvant Bisphosphonates and Other Bone-Modifying Agents in Breast Cancer: A Cancer Care Ontario and American Society of Clinical Oncology Clinical Practice Guideline.,"Purpose To make recommendations regarding the use of bisphosphonates and other bone-modifying agents as adjuvant therapy for patients with breast cancer. Methods Cancer Care Ontario and ASCO convened a Working Group and Expert Panel to develop evidence-based recommendations informed by a systematic review of the literature. Results Adjuvant bisphosphonates were found to reduce bone recurrence and improve survival in postmenopausal patients with nonmetastatic breast cancer. In this guideline, postmenopausal includes patients with natural menopause or that induced by ovarian suppression or ablation. Absolute benefit is greater in patients who are at higher risk of recurrence, and almost all trials were conducted in patients who also received systemic therapy. Most studies evaluated zoledronic acid or clodronate, and data are extremely limited for other bisphosphonates. While denosumab was found to reduce fractures, long-term survival data are still required. Recommendations It is recommended that, if available, zoledronic acid (4 mg intravenously every 6 months) or clodronate (1,600 mg/d orally) be considered as adjuvant therapy for postmenopausal patients with breast cancer who are deemed candidates for adjuvant systemic therapy. Further research comparing different bone-modifying agents, doses, dosing intervals, and durations is required. Risk factors for osteonecrosis of the jaw and renal impairment should be assessed, and any pending dental or oral health problems should be dealt with prior to starting treatment. Data for adjuvant denosumab look promising but are currently insufficient to make any recommendation. Use of these agents to reduce fragility fractures in patients with low bone mineral density is beyond the scope of the guideline. Recommendations are not meant to restrict such use of bone-modifying agents in these situations. Additional information at www.asco.org/breast-cancer-adjuvant-bisphosphonates-guideline , www.asco.org/guidelineswiki , https://www.cancercareontario.ca/guidelines-advice/types-of-cancer/breast .",2017-03-06 +28350385,"Molecular, phenotypic, and sample-associated data to describe pluripotent stem cell lines and derivatives.","The use of induced pluripotent stem cells (iPSC) derived from independent patients and sources holds considerable promise to improve the understanding of development and disease. However, optimized use of iPSC depends on our ability to develop methods to efficiently qualify cell lines and protocols, monitor genetic stability, and evaluate self-renewal and differentiation potential. To accomplish these goals, 57 stem cell lines from 10 laboratories were differentiated to 7 different states, resulting in 248 analyzed samples. Cell lines were differentiated and characterized at a central laboratory using standardized cell culture methodologies, protocols, and metadata descriptors. Stem cell and derived differentiated lines were characterized using RNA-seq, miRNA-seq, copy number arrays, DNA methylation arrays, flow cytometry, and molecular histology. All materials, including raw data, metadata, analysis and processing code, and methodological and provenance documentation are publicly available for re-use and interactive exploration at https://www.synapse.org/pcbc. The goal is to provide data that can improve our ability to robustly and reproducibly use human pluripotent stem cells to understand development and disease.",2017-03-28 +29983907,Rapid onset of action and reduced nasal hyperreactivity: new targets in allergic rhinitis management.,"

Background

This article summarizes a EUFOREA symposium, presented during the European Rhinology Research Forum in Brussels (9-10 November 2017; https://www.rhinologyresearch.eu/) which focused on novel pathways and therapeutic approaches in allergic rhinitis (AR).

Main body

AR remains under-diagnosed, under-estimated and under-treated. A key component in understanding the AR landscape has been the realization of a significant mismatch between how physicians instruct AR patients to manage their disease and what AR patients actually do in real life. Data from the Allergy Diary (developed by MACVIA ARIA) showed that AR patients take their medication prn, rapidly switch treatments, often experience poor control, use multiple therapies and stop treatment when symptoms are controlled. Better control of AR may be achievable by using an AR treatment which has a rapid onset of action and which effectively targets breakthrough symptoms. Indeed, AR patients report complete symptom relief, lack of breakthrough symptoms, rapid onset of action, safety and use on an 'as needed' basis as key targets for new nasal sprays. MP-AzeFlu comprises intranasal azelastine and fluticasone propionate (FP) in a novel formulation delivered in a single device. It is the first AR treatment to break the 5 min onset of action threshold and provides clinically relevant symptom relief in 15 min, much faster than that noted for FP + oral loratadine. MP-AzeFlu also significantly reduces nasal hyperresponsiveness (NHR) which may be responsible for the breakthrough symptoms frequently reported by AR patients. Mechanisms underlying MP-AzeFlu's effect include inhibition of mast cell degranulation, stabilization of the mucosal barrier, synergistic inhibition of inflammatory cell recruitment and a unique desensitization of sensory neurons expressing the transient receptor potential A1 and V1 channels.

Conclusion

With the most rapid onset of action and onset of clinically-relevant effect of any AR medication currently available, and proven efficacy in the treatment of NHR, MP-AzeFlu is an AR treatment which provides what patients want, and fits how patients manage their AR in real life.",2018-06-25 +28796630,Opportunities and Challenges for Personal Heat Exposure Research.,"

Background

Environmental heat exposure is a public health concern. The impacts of environmental heat on mortality and morbidity at the population scale are well documented, but little is known about specific exposures that individuals experience.

Objectives

The first objective of this work was to catalyze discussion of the role of personal heat exposure information in research and risk assessment. The second objective was to provide guidance regarding the operationalization of personal heat exposure research methods.

Discussion

We define personal heat exposure as realized contact between a person and an indoor or outdoor environment that poses a risk of increases in body core temperature and/or perceived discomfort. Personal heat exposure can be measured directly with wearable monitors or estimated indirectly through the combination of time-activity and meteorological data sets. Complementary information to understand individual-scale drivers of behavior, susceptibility, and health and comfort outcomes can be collected from additional monitors, surveys, interviews, ethnographic approaches, and additional social and health data sets. Personal exposure research can help reveal the extent of exposure misclassification that occurs when individual exposure to heat is estimated using ambient temperature measured at fixed sites and can provide insights for epidemiological risk assessment concerning extreme heat.

Conclusions

Personal heat exposure research provides more valid and precise insights into how often people encounter heat conditions and when, where, to whom, and why these encounters occur. Published literature on personal heat exposure is limited to date, but existing studies point to opportunities to inform public health practice regarding extreme heat, particularly where fine-scale precision is needed to reduce health consequences of heat exposure. https://doi.org/10.1289/EHP556.",2017-08-01 +26315904,LIBRA: LIgand Binding site Recognition Application.,"

Motivation

In recent years, structural genomics and ab initio molecular modeling activities are leading to the availability of a large number of structural models of proteins whose biochemical function is not known. The aim of this study was the development of a novel software tool that, given a protein's structural model, predicts the presence and identity of active sites and/or ligand binding sites.

Results

The algorithm implemented by ligand binding site recognition application (LIBRA) is based on a graph theory approach to find the largest subset of similar residues between an input protein and a collection of known functional sites. The algorithm makes use of two predefined databases for active sites and ligand binding sites, respectively, derived from the Catalytic Site Atlas and the Protein Data Bank. Tests indicate that LIBRA is able to identify the correct binding/active site in 90% of the cases analyzed, 90% of which feature the identified site as ranking first. As far as ligand binding site recognition is concerned, LIBRA outperforms other structure-based ligand binding sites detection tools with which it has been compared.

Availability and implementation

The application, developed in Java SE 7 with a Swing GUI embedding a JMol applet, can be run on any OS equipped with a suitable Java Virtual Machine (JVM), and is available at the following URL: http://www.computationalbiology.it/software/LIBRAv1.zip.",2015-08-26 +26166372,MVsCarta: A protein database of matrix vesicles to aid understanding of biomineralization.,"Matrix vesicles (MVs) are membranous nanovesicles released by chondrocytes, osteoblasts, and odontoblasts. They play a critical role in modulating mineralization. Here, we present a manually curated database of MV proteins, namely MVsCara to provide comprehensive information on MVs of protein components. In the current version, the database contains 2,713 proteins of six organisms identified in bone, cartilage, tooth tissues, and cells capable of producing a mineralized bone matrix. The MVsCarta database is now freely assessed at http://bioinf.xmu.edu.cn/MVsCarta. The search and browse methods were developed for better retrieval of data. In addition, bioinformatic tools like Gene Ontology (GO) analysis, network visualization and protein-protein interaction analysis were implemented for a functional understanding of MVs components. Similar database hasn't been reported yet. We believe that this free web-based database might serve as a useful repository to elucidate the novel function and regulation of MVs during mineralization, and to stimulate the advancement of MV studies.",2015-06-01 +25629585,MAGIC: an automated N-linked glycoprotein identification tool using a Y1-ion pattern matching algorithm and in silico MS² approach.,"Glycosylation is a highly complex modification influencing the functions and activities of proteins. Interpretation of intact glycopeptide spectra is crucial but challenging. In this paper, we present a mass spectrometry-based automated glycopeptide identification platform (MAGIC) to identify peptide sequences and glycan compositions directly from intact N-linked glycopeptide collision-induced-dissociation spectra. The identification of the Y1 (peptideY0 + GlcNAc) ion is critical for the correct analysis of unknown glycoproteins, especially without prior knowledge of the proteins and glycans present in the sample. To ensure accurate Y1-ion assignment, we propose a novel algorithm called Trident that detects a triplet pattern corresponding to [Y0, Y1, Y2] or [Y0-NH3, Y0, Y1] from the fragmentation of the common trimannosyl core of N-linked glycopeptides. To facilitate the subsequent peptide sequence identification by common database search engines, MAGIC generates in silico spectra by overwriting the original precursor with the naked peptide m/z and removing all of the glycan-related ions. Finally, MAGIC computes the glycan compositions and ranks them. For the model glycoprotein horseradish peroxidase (HRP) and a 5-glycoprotein mixture, a 2- to 31-fold increase in the relative intensities of the peptide fragments was achieved, which led to the identification of 7 tryptic glycopeptides from HRP and 16 glycopeptides from the mixture via Mascot. In the HeLa cell proteome data set, MAGIC processed over a thousand MS(2) spectra in 3 min on a PC and reported 36 glycopeptides from 26 glycoproteins. Finally, a remarkable false discovery rate of 0 was achieved on the N-glycosylation-free Escherichia coli data set. MAGIC is available at http://ms.iis.sinica.edu.tw/COmics/Software_MAGIC.html .",2015-01-28 +29382747,Targeting JAK2 reduces GVHD and xenograft rejection through regulation of T cell differentiation.,"Janus kinase 2 (JAK2) signal transduction is a critical mediator of the immune response. JAK2 is implicated in the onset of graft-versus-host disease (GVHD), which is a significant cause of transplant-related mortality after allogeneic hematopoietic cell transplantation (allo-HCT). Transfer of JAK2-/- donor T cells to allogeneic recipients leads to attenuated GVHD yet maintains graft-versus-leukemia. Th1 differentiation among JAK2-/- T cells is significantly decreased compared with wild-type controls. Conversely, iTreg and Th2 polarization is significantly increased among JAK2-/- T cells. Pacritinib is a multikinase inhibitor with potent activity against JAK2. Pacritinib significantly reduces GVHD and xenogeneic skin graft rejection in distinct rodent models and maintains donor antitumor immunity. Moreover, pacritinib spares iTregs and polarizes Th2 responses as observed among JAK2-/- T cells. Collectively, these data clearly identify JAK2 as a therapeutic target to control donor alloreactivity and promote iTreg responses after allo-HCT or solid organ transplantation. As such, a phase I/II acute GVHD prevention trial combining pacritinib with standard immune suppression after allo-HCT is actively being investigated (https://clinicaltrials.gov/ct2/show/NCT02891603).",2018-01-30 +26457534,"Interindividual methylomic variation across blood, cortex, and cerebellum: implications for epigenetic studies of neurological and neuropsychiatric phenotypes.","Given the tissue-specific nature of epigenetic processes, the assessment of disease-relevant tissue is an important consideration for epigenome-wide association studies (EWAS). Little is known about whether easily accessible tissues, such as whole blood, can be used to address questions about interindividual epigenomic variation in inaccessible tissues, such as the brain. We quantified DNA methylation in matched DNA samples isolated from whole blood and 4 brain regions (prefrontal cortex, entorhinal cortex, superior temporal gyrus, and cerebellum) from 122 individuals. We explored co-variation between tissues and the extent to which methylomic variation in blood is predictive of interindividual variation identified in the brain. For the majority of DNA methylation sites, interindividual variation in whole blood is not a strong predictor of interindividual variation in the brain, although the relationship with cortical regions is stronger than with the cerebellum. Variation at a subset of probes is strongly correlated across tissues, even in instances when the actual level of DNA methylation is significantly different between them. A substantial proportion of this co-variation, however, is likely to result from genetic influences. Our data suggest that for the majority of the genome, a blood-based EWAS for disorders where brain is presumed to be the primary tissue of interest will give limited information relating to underlying pathological processes. These results do not, however, discount the utility of using a blood-based EWAS to identify biomarkers of disease phenotypes manifest in the brain. We have generated a searchable database for the interpretation of data from blood-based EWAS analyses ( http://epigenetics.essex.ac.uk/bloodbrain/).",2015-01-01 +25261191,Maize and millet transcription factors annotated using comparative genomic and transcriptomic data.,"

Background

Transcription factors (TFs) contain DNA-binding domains (DBDs) and regulate gene expression by binding to specific DNA sequences. In addition, there are proteins, called transcription coregulators (TCs), which lack DBDs but can alter gene expression through interaction with TFs or RNA Polymerase II. Therefore, it is interesting to identify and classify the TFs and TCs in a genome. In this study, maize (Zea mays) and foxtail millet (Setaria italica), two important species for the study of C4 photosynthesis and kranz anatomy, were selected.

Result

We conducted a comprehensive genome-wide annotation of TFs and TCs in maize B73 and in two strains of foxtail millet, Zhang gu and Yugu1, and classified them into families. To gain additional support for our predictions, we searched for their homologous genes in Arabidopsis or rice and studied their gene expression level using RNA-seq and microarray data. We identified many new TF and TC families in these two species, and described some evolutionary and functional aspects of the 9 new maize TF families. Moreover, we detected many pseudogenes and transposable elements in current databases. In addition, we examined tissue expression preferences of TF and TC families and identified tissue/condition-specific TFs and TCs in maize and millet. Finally, we identified potential C4-related TF and TC genes in maize and millet.

Conclusions

Our results significantly expand current TF and TC annotations in maize and millet. We provided supporting evidence for our annotation from genomic and gene expression data and identified TF and TC genes with tissue preference in expression. Our study may facilitate the study of regulation of gene expression, tissue morphogenesis, and C4 photosynthesis in maize and millet. The data we generated in this study are available at http://sites.google.com/site/jjlmmtf.",2014-09-27 +25465051,"A-WINGS: an integrated genome database for Pleurocybella porrigens (Angel's wing oyster mushroom, Sugihiratake).","

Background

The angel's wing oyster mushroom (Pleurocybella porrigens, Sugihiratake) is a well-known delicacy. However, its potential risk in acute encephalopathy was recently revealed by a food poisoning incident. To disclose the genes underlying the accident and provide mechanistic insight, we seek to develop an information infrastructure containing omics data. In our previous work, we sequenced the genome and transcriptome using next-generation sequencing techniques. The next step in achieving our goal is to develop a web database to facilitate the efficient mining of large-scale omics data and identification of genes specifically expressed in the mushroom.

Findings

This paper introduces a web database A-WINGS (http://bioinf.mind.meiji.ac.jp/a-wings/) that provides integrated genomic and transcriptomic information for the angel's wing oyster mushroom. The database contains structure and functional annotations of transcripts and gene expressions. Functional annotations contain information on homologous sequences from NCBI nr and UniProt, Gene Ontology, and KEGG Orthology. Digital gene expression profiles were derived from RNA sequencing (RNA-seq) analysis in the fruiting bodies and mycelia. The omics information stored in the database is freely accessible through interactive and graphical interfaces by search functions that include 'GO TREE VIEW' browsing, keyword searches, and BLAST searches.

Conclusions

The A-WINGS database will accelerate omics studies on specific aspects of the angel's wing oyster mushroom and the family Tricholomataceae.",2014-12-03 +26394400,Validation of the RRE-90 Scale to Predict Stroke Risk after Transient Symptoms with Infarction: A Prospective Cohort Study.,"

Background and purpose

The risk of stroke after a transient ischemic attack (TIA) for patients with a positive diffusion-weighted image (DWI), i.e., transient symptoms with infarction (TSI), is much higher than for those with a negative DWI. The aim of this study was to validate the predictive value of a web-based recurrence risk estimator (RRE; http://www.nmr.mgh.harvard.edu/RRE/) of TSI.

Methods

Data from the prospective hospital-based TIA database of the First Affiliated Hospital of Zhengzhou University were analyzed. The RRE and ABCD2 scores were calculated within 7 days of symptom onset. The predictive outcome was ischemic stroke occurrence at 90 days. The receiver-operating characteristics curves were plotted, and the predictive value of the two models was assessed by computing the C statistics.

Results

A total of 221 eligible patients were prospectively enrolled, of whom 46 (20.81%) experienced a stroke within 90 days. The 90-day stroke risk in high-risk TSI patients (RRE ≥4) was 3.406-fold greater than in those at low risk (P <0.001). The C statistic of RRE (0.681; 95% confidence interval [CI], 0.592-0.771) was statistically higher than that of ABCD2 score (0.546; 95% CI, 0.454-0.638; Z = 2.115; P = 0.0344) at 90 days.

Conclusion

The RRE score had a higher predictive value than the ABCD2 score for assessing the 90-day risk of stroke after TSI.",2015-09-22 +27663501,Improved methods for multi-trait fine mapping of pleiotropic risk loci.,"

Motivation

Genome-wide association studies (GWAS) have identified thousands of regions in the genome that contain genetic variants that increase risk for complex traits and diseases. However, the variants uncovered in GWAS are typically not biologically causal, but rather, correlated to the true causal variant through linkage disequilibrium (LD). To discern the true causal variant(s), a variety of statistical fine-mapping methods have been proposed to prioritize variants for functional validation.

Results

In this work we introduce a new approach, fastPAINTOR, that leverages evidence across correlated traits, as well as functional annotation data, to improve fine-mapping accuracy at pleiotropic risk loci. To improve computational efficiency, we describe an new importance sampling scheme to perform model inference. First, we demonstrate in simulations that by leveraging functional annotation data, fastPAINTOR increases fine-mapping resolution relative to existing methods. Next, we show that jointly modeling pleiotropic risk regions improves fine-mapping resolution compared to standard single trait and pleiotropic fine mapping strategies. We report a reduction in the number of SNPs required for follow-up in order to capture 90% of the causal variants from 23 SNPs per locus using a single trait to 12 SNPs when fine-mapping two traits simultaneously. Finally, we analyze summary association data from a large-scale GWAS of lipids and show that these improvements are largely sustained in real data.

Availability and implementation

The fastPAINTOR framework is implemented in the PAINTOR v3.0 package which is publicly available to the research community http://bogdan.bioinformatics.ucla.edu/software/paintor CONTACT: gkichaev@ucla.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-22 +26391769,Genome-wide cataloging and analysis of alternatively spliced genes in cereal crops.,"

Background

Protein functional diversity at the post-transcriptional level is regulated through spliceosome mediated pre-mRNA alternative splicing (AS) events and that has been widely demonstrated to be a key player in regulating the functional diversity in plants. Identification and analysis of AS genes in cereal crop plants are critical for crop improvement and understanding regulatory mechanisms.

Results

We carried out the comparative analyses of the functional landscapes of the AS using the consensus assembly of expressed sequence tags and available mRNA sequences in four cereal plants. We identified a total of 8,734 in Oryza sativa subspecies (ssp) japonica, 2,657 in O. sativa ssp indica, 3,971 in Sorghum bicolor, and 10,687 in Zea mays AS genes. Among the identified AS events, intron retention remains to be the dominant type accounting for 23.5 % in S. bicolor, and up to 55.8 % in O. sativa ssp indica. We identified a total of 887 AS genes that were conserved among Z. mays, S. bicolor, and O. sativa ssp japonica; and 248 AS genes were found to be conserved among all four studied species or ssp. Furthermore, we identified 53 AS genes conserved with Brachypodium distachyon. Gene Ontology classification of AS genes revealed functional assignment of these genes in many biological processes with diverse molecular functions.

Conclusions

AS is common in cereal plants. The AS genes identified in four cereal crops in this work provide the foundation for further studying the roles of AS in regulation of cereal plant growth and development. The data can be accessed at Plant Alternative Splicing Database (http://proteomics.ysu.edu/altsplice/).",2015-09-21 +34541090,DNA-free Genome Editing of Chlamydomonas reinhardtii Using CRISPR and Subsequent Mutant Analysis.,"We successfully introduced targeted knock-out of gene of interest in Chlamydomonas reinhardtii by using DNA-free CRISPR. In this protocol, the detailed procedures of an entire workflow cover from the initial target selection of CRISPR to the mutant analysis using next generation sequencing (NGS) technology. Furthermore, we introduce a web-based set of tools, named CRISPR RGEN tools (http://www.rgenome.net/), which provides all required tools from CRISPR target design to NGS data analysis.",2017-06-05 +28066370,Extended Multilocus Sequence Analysis to Describe the Global Population Structure of the Genus Brucella: Phylogeography and Relationship to Biovars.,"An extended multilocus sequence analysis (MLSA) scheme applicable to the Brucella, an expanding genus that includes zoonotic pathogens that severely impact animal and human health across large parts of the globe, was developed. The scheme, which extends a previously described nine locus scheme by examining sequences at 21 independent genetic loci in order to increase discriminatory power, was applied to a globally and temporally diverse collection of over 500 isolates representing all 12 known Brucella species providing an expanded and detailed understanding of the population genetic structure of the group. Over 100 sequence types (STs) were identified and analysis of data provided insights into both the global evolutionary history of the genus, suggesting that early emerging Brucella abortus lineages might be confined to Africa while some later lineages have spread worldwide, and further evidence of the existence of lineages with restricted host or geographical ranges. The relationship between biovar, long used as a crude epidemiological marker, and genotype was also examined and showed decreasing congruence in the order Brucella suis > B. abortus > Brucella melitensis. Both the previously described nine locus scheme and the extended 21 locus scheme have been made available at http://pubmlst.org/brucella/ to allow the community to interrogate existing data and compare with newly generated data.",2016-12-21 +24688832,Integrative study of Arabidopsis thaliana metabolomic and transcriptomic data with the interactive MarVis-Graph software.,"State of the art high-throughput technologies allow comprehensive experimental studies of organism metabolism and induce the need for a convenient presentation of large heterogeneous datasets. Especially, the combined analysis and visualization of data from different high-throughput technologies remains a key challenge in bioinformatics. We present here the MarVis-Graph software for integrative analysis of metabolic and transcriptomic data. All experimental data is investigated in terms of the full metabolic network obtained from a reference database. The reactions of the network are scored based on the associated data, and sub-networks, according to connected high-scoring reactions, are identified. Finally, MarVis-Graph scores the detected sub-networks, evaluates them by means of a random permutation test and presents them as a ranked list. Furthermore, MarVis-Graph features an interactive network visualization that provides researchers with a convenient view on the results. The key advantage of MarVis-Graph is the analysis of reactions detached from their pathways so that it is possible to identify new pathways or to connect known pathways by previously unrelated reactions. The MarVis-Graph software is freely available for academic use and can be downloaded at: http://marvis.gobics.de/marvis-graph.",2014-03-13 +28882004,When loss-of-function is loss of function: assessing mutational signatures and impact of loss-of-function genetic variants.,"

Motivation

Loss-of-function genetic variants are frequently associated with severe clinical phenotypes, yet many are present in the genomes of healthy individuals. The available methods to assess the impact of these variants rely primarily upon evolutionary conservation with little to no consideration of the structural and functional implications for the protein. They further do not provide information to the user regarding specific molecular alterations potentially causative of disease.

Results

To address this, we investigate protein features underlying loss-of-function genetic variation and develop a machine learning method, MutPred-LOF, for the discrimination of pathogenic and tolerated variants that can also generate hypotheses on specific molecular events disrupted by the variant. We investigate a large set of human variants derived from the Human Gene Mutation Database, ClinVar and the Exome Aggregation Consortium. Our prediction method shows an area under the Receiver Operating Characteristic curve of 0.85 for all loss-of-function variants and 0.75 for proteins in which both pathogenic and neutral variants have been observed. We applied MutPred-LOF to a set of 1142 de novo vari3ants from neurodevelopmental disorders and find enrichment of pathogenic variants in affected individuals. Overall, our results highlight the potential of computational tools to elucidate causal mechanisms underlying loss of protein function in loss-of-function variants.

Availability and implementation

http://mutpred.mutdb.org.

Contact

predrag@indiana.edu.",2017-07-01 +28881964,Orthologous Matrix (OMA) algorithm 2.0: more robust to asymmetric evolutionary rates and more scalable hierarchical orthologous group inference.,"

Motivation

Accurate orthology inference is a fundamental step in many phylogenetics and comparative analysis. Many methods have been proposed, including OMA (Orthologous MAtrix). Yet substantial challenges remain, in particular in coping with fragmented genes or genes evolving at different rates after duplication, and in scaling to large datasets. With more and more genomes available, it is necessary to improve the scalability and robustness of orthology inference methods.

Results

We present improvements in the OMA algorithm: (i) refining the pairwise orthology inference step to account for same-species paralogs evolving at different rates, and (ii) minimizing errors in the pairwise orthology verification step by testing the consistency of pairwise distance estimates, which can be problematic in the presence of fragmentary sequences. In addition we introduce a more scalable procedure for hierarchical orthologous group (HOG) clustering, which are several orders of magnitude faster on large datasets. Using the Quest for Orthologs consortium orthology benchmark service, we show that these changes translate into substantial improvement on multiple empirical datasets.

Availability and implementation

This new OMA 2.0 algorithm is used in the OMA database ( http://omabrowser.org ) from the March 2017 release onwards, and can be run on custom genomes using OMA standalone version 2.0 and above ( http://omabrowser.org/standalone ).

Contact

christophe.dessimoz@unil.ch or adrian.altenhoff@inf.ethz.ch.",2017-07-01 +28486635,GeSeq - versatile and accurate annotation of organelle genomes.,"We have developed the web application GeSeq (https://chlorobox.mpimp-golm.mpg.de/geseq.html) for the rapid and accurate annotation of organellar genome sequences, in particular chloroplast genomes. In contrast to existing tools, GeSeq combines batch processing with a fully customizable reference sequence selection of organellar genome records from NCBI and/or references uploaded by the user. For the annotation of chloroplast genomes, the application additionally provides an integrated database of manually curated reference sequences. GeSeq identifies genes or other feature-encoding regions by BLAT-based homology searches and additionally, by profile HMM searches for protein and rRNA coding genes and two de novo predictors for tRNA genes. These unique features enable the user to conveniently compare the annotations of different state-of-the-art methods, thus supporting high-quality annotations. The main output of GeSeq is a GenBank file that usually requires only little curation and is instantly visualized by OGDRAW. GeSeq also offers a variety of optional additional outputs that facilitate downstream analyzes, for example comparative genomic or phylogenetic studies.",2017-07-01 +28482028,Web3DMol: interactive protein structure visualization based on WebGL.,"A growing number of web-based databases and tools for protein research are being developed. There is now a widespread need for visualization tools to present the three-dimensional (3D) structure of proteins in web browsers. Here, we introduce our 3D modeling program-Web3DMol-a web application focusing on protein structure visualization in modern web browsers. Users submit a PDB identification code or select a PDB archive from their local disk, and Web3DMol will display and allow interactive manipulation of the 3D structure. Featured functions, such as sequence plot, fragment segmentation, measure tool and meta-information display, are offered for users to gain a better understanding of protein structure. Easy-to-use APIs are available for developers to reuse and extend Web3DMol. Web3DMol can be freely accessed at http://web3dmol.duapp.com/, and the source code is distributed under the MIT license.",2017-07-01 +28472432,"agriGO v2.0: a GO analysis toolkit for the agricultural community, 2017 update.","The agriGO platform, which has been serving the scientific community for >10 years, specifically focuses on gene ontology (GO) enrichment analyses of plant and agricultural species. We continuously maintain and update the databases and accommodate the various requests of our global users. Here, we present our updated agriGO that has a largely expanded number of supporting species (394) and datatypes (865). In addition, a larger number of species have been classified into groups covering crops, vegetables, fish, birds and insects closely related to the agricultural community. We further improved the computational efficiency, including the batch analysis and P-value distribution (PVD), and the user-friendliness of the web pages. More visualization features were added to the platform, including SEACOMPARE (cross comparison of singular enrichment analysis), direct acyclic graph (DAG) and Scatter Plots, which can be merged by choosing any significant GO term. The updated platform agriGO v2.0 is now publicly accessible at http://systemsbiology.cau.edu.cn/agriGOv2/.",2017-07-01 +28472397,Olelo: a web application for intuitive exploration of biomedical literature.,"Researchers usually query the large biomedical literature in PubMed via keywords, logical operators and filters, none of which is very intuitive. Question answering systems are an alternative to keyword searches. They allow questions in natural language as input and results reflect the given type of question, such as short answers and summaries. Few of those systems are available online but they experience drawbacks in terms of long response times and they support a limited amount of question and result types. Additionally, user interfaces are usually restricted to only displaying the retrieved information. For our Olelo web application, we combined biomedical literature and terminologies in a fast in-memory database to enable real-time responses to researchers' queries. Further, we extended the built-in natural language processing features of the database with question answering and summarization procedures. Combined with a new explorative approach of document filtering and a clean user interface, Olelo enables a fast and intelligent search through the ever-growing biomedical literature. Olelo is available at http://www.hpi.de/plattner/olelo.",2017-07-01 +28472367,PIGSPro: prediction of immunoGlobulin structures v2.,"PIGSpro is a significant upgrade of the popular PIGS server for the prediction of the structure of immunoglobulins. The software has been completely rewritten in python following a similar pipeline as in the original method, but including, at various steps, relevant modifications found to improve its prediction accuracy, as demonstrated here. The steps of the pipeline include the selection of the appropriate framework for predicting the conserved regions of the molecule by homology; the target template alignment for this portion of the molecule; the selection of the main chain conformation of the hypervariable loops according to the canonical structure model, the prediction of the third loop of the heavy chain (H3) for which complete canonical structures are not available and the packing of the light and heavy chain if derived from different templates. Each of these steps has been improved including updated methods developed along the years. Last but not least, the user interface has been completely redesigned and an automatic monthly update of the underlying database has been implemented. The method is available as a web server at http://biocomputing.it/pigspro.",2017-07-01 +28526811,Proteomic analysis of cerebrospinal fluid from children with central nervous system tumors identifies candidate proteins relating to tumor metastatic spread.,"Central nervous system (CNS) tumors are the most common solid tumors in childhood. Since the sensitivity of combined cerebrospinal fluid (CSF) cytology and radiological neuroimaging in detecting meningeal metastases remains relatively low, we sought to characterize the CSF proteome of patients with CSF tumors to identify biomarkers predictive of metastatic spread. CSF samples from 27 children with brain tumors and 13 controls (extra-CNS non-Hodgkin lymphoma) were processed using core-shell hydrogel nanoparticles, and analyzed with reverse-phase liquid chromatography/electrospray tandem mass spectrometry (LC-MS/MS). Candidate proteins were identified with Fisher's exact test and/or a univariate logistic regression model. Reverse phase protein array (RPPA), Western blot (WB), and ELISA were used in the training set and in an independent set of CFS samples (60 cases, 14 controls) to validate our discovery findings. Among the 558 non-redundant proteins identified by LC-MS/MS, 147 were missing from the CSF database at http://www.biosino.org. Fourteen of the 26 final top-candidate proteins were chosen for validation with WB, RPPA and ELISA methods. Six proteins (type 1 collagen, insulin-like growth factor binding protein 4, procollagen C-endopeptidase enhancer 1, glial cell-line derived neurotrophic factor receptor α2, inter-alpha-trypsin inhibitor heavy chain 4, neural proliferation and differentiation control protein-1) revealed the ability to discriminate metastatic cases from controls. Combining a unique dataset of CSFs from pediatric CNS tumors with a novel enabling nanotechnology led us to identify CSF proteins potentially related to metastatic status.",2017-07-01 +28459991,GASS-WEB: a web server for identifying enzyme active sites based on genetic algorithms.,"Enzyme active sites are important and conserved functional regions of proteins whose identification can be an invaluable step toward protein function prediction. Most of the existing methods for this task are based on active site similarity and present limitations including performing only exact matches on template residues, template size restraints, despite not being capable of finding inter-domain active sites. To fill this gap, we proposed GASS-WEB, a user-friendly web server that uses GASS (Genetic Active Site Search), a method based on an evolutionary algorithm to search for similar active sites in proteins. GASS-WEB can be used under two different scenarios: (i) given a protein of interest, to match a set of specific active site templates; or (ii) given an active site template, looking for it in a database of protein structures. The method has shown to be very effective on a range of experiments and was able to correctly identify >90% of the catalogued active sites from the Catalytic Site Atlas. It also managed to achieve a Matthew correlation coefficient of 0.63 using the Critical Assessment of protein Structure Prediction (CASP 10) dataset. In our analysis, GASS was ranking fourth among 18 methods. GASS-WEB is freely available at http://gass.unifei.edu.br/.",2017-07-01 +28387820,GalaxyHomomer: a web server for protein homo-oligomer structure prediction from a monomer sequence or structure.,"Homo-oligomerization of proteins is abundant in nature, and is often intimately related with the physiological functions of proteins, such as in metabolism, signal transduction or immunity. Information on the homo-oligomer structure is therefore important to obtain a molecular-level understanding of protein functions and their regulation. Currently available web servers predict protein homo-oligomer structures either by template-based modeling using homo-oligomer templates selected from the protein structure database or by ab initio docking of monomer structures resolved by experiment or predicted by computation. The GalaxyHomomer server, freely accessible at http://galaxy.seoklab.org/homomer, carries out template-based modeling, ab initio docking or both depending on the availability of proper oligomer templates. It also incorporates recently developed model refinement methods that can consistently improve model quality. Moreover, the server provides additional options that can be chosen by the user depending on the availability of information on the monomer structure, oligomeric state and locations of unreliable/flexible loops or termini. The performance of the server was better than or comparable to that of other available methods when tested on benchmark sets and in a recent CASP performed in a blind fashion.",2017-07-01 +27883049,mQC: A Heuristic Quality-Control Metric for High-Throughput Drug Combination Screening.,"Quality control (QC) metrics are critical in high throughput screening (HTS) platforms to ensure reliability and confidence in assay data and downstream analyses. Most reported HTS QC metrics are designed for plate level or single well level analysis. With the advent of high throughput combination screening there is a need for QC metrics that quantify the quality of combination response matrices. We introduce a predictive, interpretable, matrix-level QC metric, mQC, based on a mix of data-derived and heuristic features. mQC accurately reproduces the expert assessment of combination response quality and correctly identifies unreliable response matrices that can lead to erroneous or misleading characterization of synergy. When combined with the plate-level QC metric, Z', mQC provides a more appropriate determination of the quality of a drug combination screen. Retrospective analysis on a number of completed combination screens further shows that mQC is able to identify problematic screens whereas plate-level QC was not able to. In conclusion, our data indicates that mQC is a reliable QC filter that can be used to identify problematic drug combinations matrices and prevent further analysis on erroneously active combinations as well as for troubleshooting failed screens. The R source code of mQC is available at http://matrix.ncats.nih.gov/mQC.",2016-11-24 +28881966,DextMP: deep dive into text for predicting moonlighting proteins.,"

Motivation

Moonlighting proteins (MPs) are an important class of proteins that perform more than one independent cellular function. MPs are gaining more attention in recent years as they are found to play important roles in various systems including disease developments. MPs also have a significant impact in computational function prediction and annotation in databases. Currently MPs are not labeled as such in biological databases even in cases where multiple distinct functions are known for the proteins. In this work, we propose a novel method named DextMP, which predicts whether a protein is a MP or not based on its textual features extracted from scientific literature and the UniProt database.

Results

DextMP extracts three categories of textual information for a protein: titles, abstracts from literature, and function description in UniProt. Three language models were applied and compared: a state-of-the-art deep unsupervised learning algorithm along with two other language models of different types, Term Frequency-Inverse Document Frequency in the bag-of-words and Latent Dirichlet Allocation in the topic modeling category. Cross-validation results on a dataset of known MPs and non-MPs showed that DextMP successfully predicted MPs with over 91% accuracy with significant improvement over existing MP prediction methods. Lastly, we ran DextMP with the best performing language models and text-based feature combinations on three genomes, human, yeast and Xenopus laevis , and found that about 2.5-35% of the proteomes are potential MPs.

Availability and implementation

Code available at http://kiharalab.org/DextMP .

Contact

dkihara@purdue.edu.",2017-07-01 +28206754,High-Quality Dataset of Protein-Bound Ligand Conformations and Its Application to Benchmarking Conformer Ensemble Generators.,"We developed a cheminformatics pipeline for the fully automated selection and extraction of high-quality protein-bound ligand conformations from X-ray structural data. The pipeline evaluates the validity and accuracy of the 3D structures of small molecules according to multiple criteria, including their fit to the electron density and their physicochemical and structural properties. Using this approach, we compiled two high-quality datasets from the Protein Data Bank (PDB): a comprehensive dataset and a diversified subset of 4626 and 2912 structures, respectively. The datasets were applied to benchmarking seven freely available conformer ensemble generators: Balloon (two different algorithms), the RDKit standard conformer ensemble generator, the Experimental-Torsion basic Knowledge Distance Geometry (ETKDG) algorithm, Confab, Frog2 and Multiconf-DOCK. Substantial differences in the performance of the individual algorithms were observed, with RDKit and ETKDG generally achieving a favorable balance of accuracy, ensemble size and runtime. The Platinum datasets are available for download from http://www.zbh.uni-hamburg.de/platinum_dataset .",2017-02-16 +24108186,The semantic measures library and toolkit: fast computation of semantic similarity and relatedness using biomedical ontologies.,"

Unlabelled

The semantic measures library and toolkit are robust open-source and easy to use software solutions dedicated to semantic measures. They can be used for large-scale computations and analyses of semantic similarities between terms/concepts defined in terminologies and ontologies. The comparison of entities (e.g. genes) annotated by concepts is also supported. A large collection of measures is available. Not limited to a specific application context, the library and the toolkit can be used with various controlled vocabularies and ontology specifications (e.g. Open Biomedical Ontology, Resource Description Framework). The project targets both designers and practitioners of semantic measures providing a JAVA library, as well as a command-line tool that can be used on personal computers or computer clusters.

Availability and implementation

Downloads, documentation, tutorials, evaluation and support are available at http://www.semantic-measures-library.org.",2013-10-09 +24186831,Variobox: automatic detection and annotation of human genetic variants.,"Triggered by the sequencing of the human genome, personalized medicine has been one of the fastest growing research areas in the last decade. Multiple software and hardware technologies have been developed by several projects, culminating in the exponential growth of genetic data. Considering the technological developments in this field, it is now fairly easy and inexpensive to obtain genetic profiles for unique individuals, such as those performed by several genetic analysis companies. The availability of computational tools that simplify genetic data analysis and the disclosure of biomedical evidences are of utmost importance. We present Variobox, a desktop tool to annotate, analyze, and compare human genes. Variobox obtains variant annotation data from WAVe, protein metadata annotations from Protein Data Bank, and sequences are obtained from Locus Reference Genomic or RefSeq databases. To explore the data, Variobox provides an advanced sequence visualization that enables agile navigation through genetic regions. DNA sequencing data can be compared with reference sequences retrieved from LRG or RefSeq records, identifying and automatically annotating new potential variants. These features and data, ranging from patient sequences to HGVS-compliant variant descriptions, are combined in an intuitive interface to analyze genes and variants. Variobox is a Java application, available at http://bioinformatics.ua.pt/variobox.",2013-11-21 +27587697,Genome wide predictions of miRNA regulation by transcription factors.,"

Motivation

Reconstructing regulatory networks from expression and interaction data is a major goal of systems biology. While much work has focused on trying to experimentally and computationally determine the set of transcription-factors (TFs) and microRNAs (miRNAs) that regulate genes in these networks, relatively little work has focused on inferring the regulation of miRNAs by TFs. Such regulation can play an important role in several biological processes including development and disease. The main challenge for predicting such interactions is the very small positive training set currently available. Another challenge is the fact that a large fraction of miRNAs are encoded within genes making it hard to determine the specific way in which they are regulated.

Results

To enable genome wide predictions of TF-miRNA interactions, we extended semi-supervised machine-learning approaches to integrate a large set of different types of data including sequence, expression, ChIP-seq and epigenetic data. As we show, the methods we develop achieve good performance on both a labeled test set, and when analyzing general co-expression networks. We next analyze mRNA and miRNA cancer expression data, demonstrating the advantage of using the predicted set of interactions for identifying more coherent and relevant modules, genes, and miRNAs. The complete set of predictions is available on the supporting website and can be used by any method that combines miRNAs, genes, and TFs.

Availability and implementation

Code and full set of predictions are available from the supporting website: http://cs.cmu.edu/~mruffalo/tf-mirna/

Contact

zivbj@cs.cmu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +27587657,TANDEM: a two-stage approach to maximize interpretability of drug response models based on multiple molecular data types.,"

Motivation

Clinical response to anti-cancer drugs varies between patients. A large portion of this variation can be explained by differences in molecular features, such as mutation status, copy number alterations, methylation and gene expression profiles. We show that the classic approach for combining these molecular features (Elastic Net regression on all molecular features simultaneously) results in models that are almost exclusively based on gene expression. The gene expression features selected by the classic approach are difficult to interpret as they often represent poorly studied combinations of genes, activated by aberrations in upstream signaling pathways.

Results

To utilize all data types in a more balanced way, we developed TANDEM, a two-stage approach in which the first stage explains response using upstream features (mutations, copy number, methylation and cancer type) and the second stage explains the remainder using downstream features (gene expression). Applying TANDEM to 934 cell lines profiled across 265 drugs (GDSC1000), we show that the resulting models are more interpretable, while retaining the same predictive performance as the classic approach. Using the more balanced contributions per data type as determined with TANDEM, we find that response to MAPK pathway inhibitors is largely predicted by mutation data, while predicting response to DNA damaging agents requires gene expression data, in particular SLFN11 expression.

Availability and implementation

TANDEM is available as an R package on CRAN (for more information, see http://ccb.nki.nl/software/tandem).

Contact

m.michaut@nki.nl or l.wessels@nki.nl

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +28662047,MDD-Palm: Identification of protein S-palmitoylation sites with substrate motifs based on maximal dependence decomposition.,"S-palmitoylation, the covalent attachment of 16-carbon palmitic acids to a cysteine residue via a thioester linkage, is an important reversible lipid modification that plays a regulatory role in a variety of physiological and biological processes. As the number of experimentally identified S-palmitoylated peptides increases, it is imperative to investigate substrate motifs to facilitate the study of protein S-palmitoylation. Based on 710 non-homologous S-palmitoylation sites obtained from published databases and the literature, we carried out a bioinformatics investigation of S-palmitoylation sites based on amino acid composition. Two Sample Logo indicates that positively charged and polar amino acids surrounding S-palmitoylated sites may be associated with the substrate site specificity of protein S-palmitoylation. Additionally, maximal dependence decomposition (MDD) was applied to explore the motif signatures of S-palmitoylation sites by categorizing a large-scale dataset into subgroups with statistically significant conservation of amino acids. Single features such as amino acid composition (AAC), amino acid pair composition (AAPC), position specific scoring matrix (PSSM), position weight matrix (PWM), amino acid substitution matrix (BLOSUM62), and accessible surface area (ASA) were considered, along with the effectiveness of incorporating MDD-identified substrate motifs into a two-layered prediction model. Evaluation by five-fold cross-validation showed that a hybrid of AAC and PSSM performs best at discriminating between S-palmitoylation and non-S-palmitoylation sites, according to the support vector machine (SVM). The two-layered SVM model integrating MDD-identified substrate motifs performed well, with a sensitivity of 0.79, specificity of 0.80, accuracy of 0.80, and Matthews Correlation Coefficient (MCC) value of 0.45. Using an independent testing dataset (613 S-palmitoylated and 5412 non-S-palmitoylated sites) obtained from the literature, we demonstrated that the two-layered SVM model could outperform other prediction tools, yielding a balanced sensitivity and specificity of 0.690 and 0.694, respectively. This two-layered SVM model has been implemented as a web-based system (MDD-Palm), which is now freely available at http://csb.cse.yzu.edu.tw/MDDPalm/.",2017-06-29 +28666416,RGIFE: a ranked guided iterative feature elimination heuristic for the identification of biomarkers.,"

Background

Current -omics technologies are able to sense the state of a biological sample in a very wide variety of ways. Given the high dimensionality that typically characterises these data, relevant knowledge is often hidden and hard to identify. Machine learning methods, and particularly feature selection algorithms, have proven very effective over the years at identifying small but relevant subsets of variables from a variety of application domains, including -omics data. Many methods exist with varying trade-off between the size of the identified variable subsets and the predictive power of such subsets. In this paper we focus on an heuristic for the identification of biomarkers called RGIFE: Rank Guided Iterative Feature Elimination. RGIFE is guided in its biomarker identification process by the information extracted from machine learning models and incorporates several mechanisms to ensure that it creates minimal and highly predictive features sets.

Results

We compare RGIFE against five well-known feature selection algorithms using both synthetic and real (cancer-related transcriptomics) datasets. First, we assess the ability of the methods to identify relevant and highly predictive features. Then, using a prostate cancer dataset as a case study, we look at the biological relevance of the identified biomarkers.

Conclusions

We propose RGIFE, a heuristic for the inference of reduced panels of biomarkers that obtains similar predictive performance to widely adopted feature selection methods while selecting significantly fewer feature. Furthermore, focusing on the case study, we show the higher biological relevance of the biomarkers selected by our approach. The RGIFE source code is available at: http://ico2s.org/software/rgife.html .",2017-06-30 +27158644,"Data on simulated interpersonal touch, individual differences and the error-related negativity.","The dataset includes data from the electroencephalogram study reported in our paper: 'Effects of simulated interpersonal touch and trait intrinsic motivation on the error-related negativity' (doi:10.1016/j.neulet.2016.01.044) (Tjew-A-Sin et al., 2016) [1]. The data was collected at the psychology laboratories at the Vrije Universiteit Amsterdam in 2012 among a Dutch-speaking student sample. The dataset consists of the measures described in the paper, as well as additional (exploratory) measures including the Five-Factor Personality Inventory, the Connectedness to Nature Scale, the Rosenberg Self-esteem Scale and a scale measuring life stress. The data can be used for replication purposes, meta-analyses, and exploratory analyses, as well as cross-cultural comparisons of touch and/or ERN effects. The authors also welcome collaborative research based on re-analyses of the data. The data described is available at a data repository called the DANS archive: http://persistent-identifier.nl/?identifier=urn:nbn:nl:ui:13-tzbk-gg.",2016-04-13 +28349685,PTMOracle: A Cytoscape App for Covisualizing and Coanalyzing Post-Translational Modifications in Protein Interaction Networks.,"Post-translational modifications of proteins (PTMs) act as key regulators of protein activity and of protein-protein interactions (PPIs). To date, it has been difficult to comprehensively explore functional links between PTMs and PPIs. To address this, we developed PTMOracle, a Cytoscape app for coanalyzing PTMs within PPI networks. PTMOracle also allows extensive data to be integrated and coanalyzed with PPI networks, allowing the role of domains, motifs, and disordered regions to be considered. For proteins of interest, or a whole proteome, PTMOracle can generate network visualizations to reveal complex PTM-associated relationships. This is assisted by OraclePainter for coloring proteins by modifications, OracleTools for network analytics, and OracleResults for exploring tabulated findings. To illustrate the use of PTMOracle, we investigate PTM-associated relationships and their role in PPIs in four case studies. In the yeast interactome and its rich set of PTMs, we construct and explore histone-associated and domain-domain interaction networks and show how integrative approaches can predict kinases involved in phosphodegrons. In the human interactome, a phosphotyrosine-associated network is analyzed but highlights the sparse nature of human PPI networks and lack of PTM-associated data. PTMOracle is open source and available at the Cytoscape app store: http://apps.cytoscape.org/apps/ptmoracle .",2017-04-06 +25978526,Global Association of Cold Spells and Adverse Health Effects: A Systematic Review and Meta-Analysis.,"

Background

There is substantial evidence that mortality increases in low temperatures. Less is known about the role of prolonged cold periods denoted as cold spells.

Objective

We conducted the first systematic review and meta-analysis to summarize the evidence on the adverse health effects of cold spells in varying climates.

Data sources and extraction

Four databases (Ovid Medline, PubMed, Scopus, Web of Science) were searched for all years and languages available. ""Cold spell"" was defined as an event below a temperature threshold lasting for a minimum duration of 2 days. Of 1,527 identified articles, 26 satisfied our eligibility criteria for the systematic review, and 9 were eligible for meta-analyses. The articles were grouped by the three main study questions into Overall-effect Group, Added-effect Group, and Temperature-change-effect Group.

Data synthesis

Based on random-effects models in the meta-analyses, cold spells were associated with increased mortality from all or all nonaccidental causes (summary rate ratio = 1.10; 95% CI: 1.04, 1.17 based on 9 estimates from five studies), cardiovascular diseases (1.11; 95% CI: 1.03, 1.19; 12 estimates from eight studies), and respiratory diseases (1.21; 95% CI: 0.97, 1.51; 8 estimates from four studies). Estimated associations were stronger for people ≥ 65 years of age (1.06; 95% CI: 1.00, 1.12) than for people 0-64 years of age (1.01; 95% CI: 1.00, 1.03). Study-specific effect estimates from a limited number of studies suggested an increased morbidity related to cold spells, but it was not possible to quantitatively summarize the evidence.

Conclusions

Cold spells are associated with increased mortality rates in populations around the world. The body of evidence suggests that cold spells also have other adverse health effects. There was substantial heterogeneity among the studies, which should be taken into account in the interpretation of the results.

Citation

Ryti NR, Guo Y, Jaakkola JJ. 2016. Global association of cold spells and adverse health effects: a systematic review and meta-analysis. Environ Health Perspect 124:12-22; http://dx.doi.org/10.1289/ehp.1408104.",2015-05-15 +26240168,Databases of Conformations and NMR Structures of Glycan Determinants.,"The present study reports a comprehensive nuclear magnetic resonance (NMR) characterization and a systematic conformational sampling of the conformational preferences of 170 glycan moieties of glycosphingolipids as produced in large-scale quantities by bacterial fermentation. These glycans span across a variety of families including the blood group antigens (A, B and O), core structures (Types 1, 2 and 4), fucosylated oligosaccharides (core and lacto-series), sialylated oligosaccharides (Types 1 and 2), Lewis antigens, GPI-anchors and globosides. A complementary set of about 100 glycan determinants occurring in glycoproteins and glycosaminoglycans has also been structurally characterized using molecular mechanics-based computation. The experimental and computational data generated are organized in two relational databases that can be queried by the user through a user-friendly search engine. The NMR ((1)H and (13)C, COSY, TOCSY, HMQC, HMBC correlation) spectra and 3D structures are available for visualization and download in commonly used structure formats. Emphasis has been given to the use of a common nomenclature for the structural encoding of the carbohydrates and each glycan molecule is described by four different types of representations in order to cope with the different usages in chemistry and biology. These web-based databases were developed with non-proprietary software and are open access for the scientific community available at http://glyco3d.cermav.cnrs.fr.",2015-08-03 +25691443,Discriminating precursors of common fragments for large-scale metabolite profiling by triple quadrupole mass spectrometry.,"

Motivation

The goal of large-scale metabolite profiling is to compare the relative concentrations of as many metabolites extracted from biological samples as possible. This is typically accomplished by measuring the abundances of thousands of ions with high-resolution and high mass accuracy mass spectrometers. Although the data from these instruments provide a comprehensive fingerprint of each sample, identifying the structures of the thousands of detected ions is still challenging and time intensive. An alternative, less-comprehensive approach is to use triple quadrupole (QqQ) mass spectrometry to analyze predetermined sets of metabolites (typically fewer than several hundred). This is done using authentic standards to develop QqQ experiments that specifically detect only the targeted metabolites, with the advantage that the need for ion identification after profiling is eliminated.

Results

Here, we propose a framework to extend the application of QqQ mass spectrometers to large-scale metabolite profiling. We aim to provide a foundation for designing QqQ multiple reaction monitoring (MRM) experiments for each of the 82 696 metabolites in the METLIN metabolite database. First, we identify common fragmentation products from the experimental fragmentation data in METLIN. Then, we model the likelihoods of each precursor structure in METLIN producing each common fragmentation product. With these likelihood estimates, we select ensembles of common fragmentation products that minimize our uncertainty about metabolite identities. We demonstrate encouraging performance and, based on our results, we suggest how our method can be integrated with future work to develop large-scale MRM experiments.

Availability and implementation

Our predictions, Supplementary results, and the code for estimating likelihoods and selecting ensembles of fragmentation reactions are made available on the lab website at http://pattilab.wustl.edu/FragPred.",2015-02-16 +28658305,Predicting human protein subcellular localization by heterogeneous and comprehensive approaches.,"Drug development and investigation of protein function both require an understanding of protein subcellular localization. We developed a system, REALoc, that can predict the subcellular localization of singleplex and multiplex proteins in humans. This system, based on comprehensive strategy, consists of two heterogeneous systematic frameworks that integrate one-to-one and many-to-many machine learning methods and use sequence-based features, including amino acid composition, surface accessibility, weighted sign aa index, and sequence similarity profile, as well as gene ontology function-based features. REALoc can be used to predict localization to six subcellular compartments (cell membrane, cytoplasm, endoplasmic reticulum/Golgi, mitochondrion, nucleus, and extracellular). REALoc yielded a 75.3% absolute true success rate during five-fold cross-validation and a 57.1% absolute true success rate in an independent database test, which was >10% higher than six other prediction systems. Lastly, we analyzed the effects of Vote and GANN models on singleplex and multiplex localization prediction efficacy. REALoc is freely available at http://predictor.nchu.edu.tw/REALoc.",2017-06-28 +28931371,VISMapper: ultra-fast exhaustive cartography of viral insertion sites for gene therapy.,"

Background

The possibility of integrating viral vectors to become a persistent part of the host genome makes them a crucial element of clinical gene therapy. However, viral integration has associated risks, such as the unintentional activation of oncogenes that can result in cancer. Therefore, the analysis of integration sites of retroviral vectors is a crucial step in developing safer vectors for therapeutic use.

Results

Here we present VISMapper, a vector integration site analysis web server, to analyze next-generation sequencing data for retroviral vector integration sites. VISMapper can be found at: http://vismapper.babelomics.org .

Conclusions

Because it uses novel mapping algorithms VISMapper is remarkably faster than previous available programs. It also provides a useful graphical interface to analyze the integration sites found in the genomic context.",2017-09-20 +28334257,"Fast, clash-free RNA conformational morphing using molecular junctions.","

Motivation

Non-coding ribonucleic acids (ncRNA) are functional RNA molecules that are not translated into protein. They are extremely dynamic, adopting diverse conformational substates, which enables them to modulate their interaction with a large number of other molecules. The flexibility of ncRNA provides a challenge for probing their complex 3D conformational landscape, both experimentally and computationally.

Results

Despite their conformational diversity, ncRNAs mostly preserve their secondary structure throughout the dynamic ensemble. Here we present a kinematics-based procedure to morph an RNA molecule between conformational substates, while avoiding inter-atomic clashes. We represent an RNA as a kinematic linkage, with fixed groups of atoms as rigid bodies and rotatable bonds as degrees of freedom. Our procedure maintains RNA secondary structure by treating hydrogen bonds between base pairs as constraints. The constraints define a lower-dimensional, secondary-structure constraint manifold in conformation space, where motions are largely governed by molecular junctions of unpaired nucleotides. On a large benchmark set, we show that our morphing procedure compares favorably to peer algorithms, and can approach goal conformations to within a low all-atom RMSD by directing fewer than 1% of its atoms. Our results suggest that molecular junctions can modulate 3D structural rearrangements, while secondary structure elements guide large parts of the molecule along the transition to the correct final conformation.

Availability and implementation

The source code, binaries and data are available at https://simtk.org/home/kgs .

Contact

amelie.heliou@polytechnique.edu or vdbedem@stanford.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +25348215,MetMSLine: an automated and fully integrated pipeline for rapid processing of high-resolution LC-MS metabolomic datasets.,"

Unlabelled

MetMSLine represents a complete collection of functions in the R programming language as an accessible GUI for biomarker discovery in large-scale liquid-chromatography high-resolution mass spectral datasets from acquisition through to final metabolite identification forming a backend to output from any peak-picking software such as XCMS. MetMSLine automatically creates subdirectories, data tables and relevant figures at the following steps: (i) signal smoothing, normalization, filtration and noise transformation (PreProc.QC.LSC.R); (ii) PCA and automatic outlier removal (Auto.PCA.R); (iii) automatic regression, biomarker selection, hierarchical clustering and cluster ion/artefact identification (Auto.MV.Regress.R); (iv) Biomarker-MS/MS fragmentation spectra matching and fragment/neutral loss annotation (Auto.MS.MS.match.R) and (v) semi-targeted metabolite identification based on a list of theoretical masses obtained from public databases (DBAnnotate.R).

Availability and implementation

All source code and suggested parameters are available in an un-encapsulated layout on http://wmbedmands.github.io/MetMSLine/. Readme files and a synthetic dataset of both X-variables (simulated LC-MS data), Y-variables (simulated continuous variables) and metabolite theoretical masses are also available on our GitHub repository.",2014-10-27 +28875072,PrePhyloPro: phylogenetic profile-based prediction of whole proteome linkages.,"Direct and indirect functional links between proteins as well as their interactions as part of larger protein complexes or common signaling pathways may be predicted by analyzing the correlation of their evolutionary patterns. Based on phylogenetic profiling, here we present a highly scalable and time-efficient computational framework for predicting linkages within the whole human proteome. We have validated this method through analysis of 3,697 human pathways and molecular complexes and a comparison of our results with the prediction outcomes of previously published co-occurrency model-based and normalization methods. Here we also introduce PrePhyloPro, a web-based software that uses our method for accurately predicting proteome-wide linkages. We present data on interactions of human mitochondrial proteins, verifying the performance of this software. PrePhyloPro is freely available at http://prephylopro.org/phyloprofile/.",2017-08-28 +24884810,"GO2MSIG, an automated GO based multi-species gene set generator for gene set enrichment analysis.","

Background

Despite the widespread use of high throughput expression platforms and the availability of a desktop implementation of Gene Set Enrichment Analysis (GSEA) that enables non-experts to perform gene set based analyses, the availability of the necessary precompiled gene sets is rare for species other than human.

Results

A software tool (GO2MSIG) was implemented that combines data from various publicly available sources and uses the Gene Ontology (GO) project term relationships to produce GSEA compatible hierarchical GO based gene sets for all species for which association data is available. Annotation sources include the GO association database (which contains data for over 200000 species), the Entrez gene2go table, and various manufacturers' array annotation files. This enables the creation of gene sets from the most up-to-date annotation data available. Additional features include the ability to restrict by evidence code, to remap gene descriptors, to filter by set size and to speed up repeat queries by caching the GO term hierarchy. Synonymous GO terms are remapped to the version preferred by the GO ontology supplied. The tool can be used in standalone form, or via a web interface. Prebuilt gene set collections constructed from the September 2013 GO release are also available for common species including human. In contrast human GO based sets available from the Broad Institute itself date from 2008.

Conclusions

GO2MSIG enables the bioinformatician and non-bioinformatician alike to generate gene sets required for GSEA analysis for almost any organism for which GO term association data exists. The output gene sets may be used directly within GSEA and do not require knowledge of programming languages such as Perl, R or Python. The output sets can also be used with other analysis software such as ErmineJ that accept gene sets in the same format. Source code can be downloaded and installed locally from http://www.bioinformatics.org/go2msig/releases/ or used via the web interface at http://www.go2msig.org/cgi-bin/go2msig.cgi.",2014-05-17 +24923819,Curation accuracy of model organism databases. ,"Manual extraction of information from the biomedical literature-or biocuration-is the central methodology used to construct many biological databases. For example, the UniProt protein database, the EcoCyc Escherichia coli database and the Candida Genome Database (CGD) are all based on biocuration. Biological databases are used extensively by life science researchers, as online encyclopedias, as aids in the interpretation of new experimental data and as golden standards for the development of new bioinformatics algorithms. Although manual curation has been assumed to be highly accurate, we are aware of only one previous study of biocuration accuracy. We assessed the accuracy of EcoCyc and CGD by manually selecting curated assertions within randomly chosen EcoCyc and CGD gene pages and by then validating that the data found in the referenced publications supported those assertions. A database assertion is considered to be in error if that assertion could not be found in the publication cited for that assertion. We identified 10 errors in the 633 facts that we validated across the two databases, for an overall error rate of 1.58%, and individual error rates of 1.82% for CGD and 1.40% for EcoCyc. These data suggest that manual curation of the experimental literature by Ph.D-level scientists is highly accurate. Database URL: http://ecocyc.org/, http://www.candidagenome.org//",2014-06-12 +25084271,GMEnzy: a genetically modified enzybiotic database.,"GMEs are genetically modified enzybiotics created through molecular engineering approaches to deal with the increasing problem of antibiotic resistance prevalence. We present a fully manually curated database, GMEnzy, which focuses on GMEs and their design strategies, production and purification methods, and biological activity data. GMEnzy collects and integrates all available GMEs and their related information into one web based database. Currently GMEnzy holds 186 GMEs from published literature. The GMEnzy interface is easy to use, and allows users to rapidly retrieve data according to desired search criteria. GMEnzy's construction will increase the efficiency and convenience of improving these bioactive proteins for specific requirements, and will expand the arsenal available for researches to control drug-resistant pathogens. This database will prove valuable for researchers interested in genetically modified enzybiotics studies. GMEnzy is freely available on the Web at http://biotechlab.fudan.edu.cn/database/gmenzy/.",2014-08-01 +27764717,CAFÉ-Map: Context Aware Feature Mapping for mining high dimensional biomedical data.,"Feature selection and ranking is of great importance in the analysis of biomedical data. In addition to reducing the number of features used in classification or other machine learning tasks, it allows us to extract meaningful biological and medical information from a machine learning model. Most existing approaches in this domain do not directly model the fact that the relative importance of features can be different in different regions of the feature space. In this work, we present a context aware feature ranking algorithm called CAFÉ-Map. CAFÉ-Map is a locally linear feature ranking framework that allows recognition of important features in any given region of the feature space or for any individual example. This allows for simultaneous classification and feature ranking in an interpretable manner. We have benchmarked CAFÉ-Map on a number of toy and real world biomedical data sets. Our comparative study with a number of published methods shows that CAFÉ-Map achieves better accuracies on these data sets. The top ranking features obtained through CAFÉ-Map in a gene profiling study correlate very well with the importance of different genes reported in the literature. Furthermore, CAFÉ-Map provides a more in-depth analysis of feature ranking at the level of individual examples.

Availability

CAFÉ-Map Python code is available at: http://faculty.pieas.edu.pk/fayyaz/software.html#cafemap . The CAFÉ-Map package supports parallelization and sparse data and provides example scripts for classification. This code can be used to reconstruct the results given in this paper.",2016-10-11 +24833803,FaSD-somatic: a fast and accurate somatic SNV detection algorithm for cancer genome sequencing data.,"

Unlabelled

Recent advances in high-throughput sequencing technologies have enabled us to sequence large number of cancer samples to reveal novel insights into oncogenetic mechanisms. However, the presence of intratumoral heterogeneity, normal cell contamination and insufficient sequencing depth, together pose a challenge for detecting somatic mutations. Here we propose a fast and an accurate somatic single-nucleotide variations (SNVs) detection program, FaSD-somatic. The performance of FaSD-somatic is extensively assessed on various types of cancer against several state-of-the-art somatic SNV detection programs. Benchmarked by somatic SNVs from either existing databases or de novo higher-depth sequencing data, FaSD-somatic has the best overall performance. Furthermore, FaSD-somatic is efficient, it finishes somatic SNV calling within 14 h on 50X whole genome sequencing data in paired samples.

Availability and implementation

The program, datasets and supplementary files are available at http://jjwanglab.org/FaSD-somatic/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-14 +24234451,The MIntAct project--IntAct as a common curation platform for 11 molecular interaction databases.,"IntAct (freely available at http://www.ebi.ac.uk/intact) is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. IntAct has developed a sophisticated web-based curation tool, capable of supporting both IMEx- and MIMIx-level curation. This tool is now utilized by multiple additional curation teams, all of whom annotate data directly into the IntAct database. Members of the IntAct team supply appropriate levels of training, perform quality control on entries and take responsibility for long-term data maintenance. Recently, the MINT and IntAct databases decided to merge their separate efforts to make optimal use of limited developer resources and maximize the curation output. All data manually curated by the MINT curators have been moved into the IntAct database at EMBL-EBI and are merged with the existing IntAct dataset. Both IntAct and MINT are active contributors to the IMEx consortium (http://www.imexconsortium.org).",2013-11-13 +21209370,Large-scale phosphoproteomic analysis of membrane proteins in renal proximal and distal tubule.,"Recent advances in mass spectrometry (MS) have provided means for large-scale phosphoproteomic profiling of specific tissues. Here, we report results from large-scale tandem MS [liquid chromatography (LC)-MS/MS]-based phosphoproteomic profiling of biochemically isolated membranes from the renal cortex, with focus on transporters and regulatory proteins. Data sets were filtered (by target-decoy analysis) to limit false-positive identifications to <2%. A total of 7,125 unique nonphosphorylated and 743 unique phosphorylated peptides were identified. Among the phosphopeptides identified were sites on transporter proteins, i.e., solute carrier (Slc, n = 63), ATP-binding cassette (Abc, n = 4), and aquaporin (Aqp, n = 3) family proteins. Database searches reveal that a majority of the phosphorylation sites identified in transporter proteins were previously unreported. Most of the Slc family proteins are apical or basolateral transporters expressed in proximal tubule cells, including proteins known to mediate transport of glucose, amino acids, organic ions, and inorganic ions. In addition, we identified potentially important phosphorylation sites for transport proteins from distal nephron segments, including the bumetanide-sensitive Na-K-2Cl cotransporter (Slc12a1 or NKCC2) at Ser(87), Thr(101), and Ser(126) and the thiazide-sensitive Na-Cl cotransporter (Slc12a3 or NCC) at Ser(71) and Ser(124). A subset of phosphorylation sites in regulatory proteins coincided with known functional motifs, suggesting specific regulatory roles. An online database from this study (http://dir.nhlbi.nih.gov/papers/lkem/rcmpd/) provides a resource for future studies of transporter regulation.",2011-01-05 +28651553,IG and TR single chain fragment variable (scFv) sequence analysis: a new advanced functionality of IMGT/V-QUEST and IMGT/HighV-QUEST.,"

Background

IMGT®, the international ImMunoGeneTics information system® ( http://www.imgt.org ), was created in 1989 in Montpellier, France (CNRS and Montpellier University) to manage the huge and complex diversity of the antigen receptors, and is at the origin of immunoinformatics, a science at the interface between immunogenetics and bioinformatics. Immunoglobulins (IG) or antibodies and T cell receptors (TR) are managed and described in the IMGT® databases and tools at the level of receptor, chain and domain. The analysis of the IG and TR variable (V) domain rearranged nucleotide sequences is performed by IMGT/V-QUEST (online since 1997, 50 sequences per batch) and, for next generation sequencing (NGS), by IMGT/HighV-QUEST, the high throughput version of IMGT/V-QUEST (portal begun in 2010, 500,000 sequences per batch). In vitro combinatorial libraries of engineered antibody single chain Fragment variable (scFv) which mimic the in vivo natural diversity of the immune adaptive responses are extensively screened for the discovery of novel antigen binding specificities. However the analysis of NGS full length scFv (~850 bp) represents a challenge as they contain two V domains connected by a linker and there is no tool for the analysis of two V domains in a single chain.

Methods

The functionality ""Analyis of single chain Fragment variable (scFv)"" has been implemented in IMGT/V-QUEST and, for NGS, in IMGT/HighV-QUEST for the analysis of the two V domains of IG and TR scFv. It proceeds in five steps: search for a first closest V-REGION, full characterization of the first V-(D)-J-REGION, then search for a second V-REGION and full characterization of the second V-(D)-J-REGION, and finally linker delimitation.

Results

For each sequence or NGS read, positions of the 5'V-DOMAIN, linker and 3'V-DOMAIN in the scFv are provided in the 'V-orientated' sense. Each V-DOMAIN is fully characterized (gene identification, sequence description, junction analysis, characterization of mutations and amino changes). The functionality is generic and can analyse any IG or TR single chain nucleotide sequence containing two V domains, provided that the corresponding species IMGT reference directory is available.

Conclusion

The ""Analysis of single chain Fragment variable (scFv)"" implemented in IMGT/V-QUEST and, for NGS, in IMGT/HighV-QUEST provides the identification and full characterization of the two V domains of full-length scFv (~850 bp) nucleotide sequences from combinatorial libraries. The analysis can also be performed on concatenated paired chains of expressed antigen receptor IG or TR repertoires.",2017-06-26 +25724487,"Progress in mind: focus on alcohol use disorders, an elsevier resource centre.","Harmful use of alcohol is one of the top five risks for burden of disease globally and in Europe; in 2012, 3.3 million net deaths (approximately 6% of all global deaths) were attributable to this risk factor. It is also linked to the development of a wide spectrum of alcohol use disorders, ranging from mild manifestations to a severe disease known as alcohol dependence. Alcohol dependence is a progressive, chronic, and relapsing brain disease resulting from the prolonged effects of alcohol on the brain. Alcohol dependence imposes a significant societal burden, with indirect societal costs reaching up to 0.64% of European countries׳ annual gross domestic product. With these facts in mind, it is important to recognize and manage alcohol dependence. Although the biological mechanisms behind the development of alcohol dependence are not fully known, factors that have been shown to influence its development include genetic predisposition, psychological problems, and social interactions. Alcohol use has also been linked to the development of hypertension, liver cirrhosis, chronic pancreatitis, multiple types of cancer, and psychiatric comorbidities such as depression and anxiety disorders. With such severe effects on both individuals and society, it is important to recognize the characteristic signs and symptoms of alcohol dependence and explore new ways to better manage patients with this brain disease. Effective treatment approaches for alcohol dependence include biological, behavioral, and social components addressing the multiple aspects of this disease. Comprehensive, educational platforms in which to explore the many facets of this disease such as the Progress in Mind: Focus on Alcohol Use Disorders Resource Centre, will provide clinicians with the tools necessary for recognizing patients with alcohol dependence and managing their disease along with related comorbidities. Online Access: http://progressinmind.elsevierresource.com.",2015-02-25 +27168721,Thyroid Cancer and Tumor Collaborative Registry (TCCR).,"A multicenter, web-based Thyroid Cancer and Tumor Collaborative Registry (TCCR, http://tccr.unmc.edu) allows for the collection and management of various data on thyroid cancer (TC) and thyroid nodule (TN) patients. The TCCR is coupled with OpenSpecimen, an open-source biobank management system, to annotate biospecimens obtained from the TCCR subjects. The demographic, lifestyle, physical activity, dietary habits, family history, medical history, and quality of life data are provided and may be entered into the registry by subjects. Information on diagnosis, treatment, and outcome is entered by the clinical personnel. The TCCR uses advanced technical and organizational practices, such as (i) metadata-driven software architecture (design); (ii) modern standards and best practices for data sharing and interoperability (standardization); (iii) Agile methodology (project management); (iv) Software as a Service (SaaS) as a software distribution model (operation); and (v) the confederation principle as a business model (governance). This allowed us to create a secure, reliable, user-friendly, and self-sustainable system for TC and TN data collection and management that is compatible with various end-user devices and easily adaptable to a rapidly changing environment. Currently, the TCCR contains data on 2,261 subjects and data on more than 28,000 biospecimens. Data and biological samples collected by the TCCR are used in developing diagnostic, prevention, treatment, and survivorship strategies against TC.",2016-05-03 +29270911,A network-based meta-analysis for characterizing the genetic landscape of human aging.,"Great amounts of omics data are generated in aging research, but their diverse and partly complementary nature requires integrative analysis approaches for investigating aging processes and connections to age-related diseases. To establish a broader picture of the genetic and epigenetic landscape of human aging we performed a large-scale meta-analysis of 6600 human genes by combining 35 datasets that cover aging hallmarks, longevity, changes in DNA methylation and gene expression, and different age-related diseases. To identify biological relationships between aging-associated genes we incorporated them into a protein interaction network and characterized their network neighborhoods. In particular, we computed a comprehensive landscape of more than 1000 human aging clusters, network regions where genes are highly connected and where gene products commonly participate in similar processes. In addition to clusters that capture known aging processes such as nutrient-sensing and mTOR signaling, we present a number of clusters with a putative functional role in linking different aging processes as promising candidates for follow-up studies. To enable their detailed exploration, all datasets and aging clusters are made freely available via an interactive website ( https://gemex.eurac.edu/bioinf/age/ ).",2017-12-21 +29511354,SWPhylo - A Novel Tool for Phylogenomic Inferences by Comparison of Oligonucleotide Patterns and Integration of Genome-Based and Gene-Based Phylogenetic Trees.,"Modern phylogenetic studies may benefit from the analysis of complete genome sequences of various microorganisms. Evolutionary inferences based on genome-scale analysis are believed to be more accurate than the gene-based alternative. However, the computational complexity of current phylogenomic procedures, inappropriateness of standard phylogenetic tools to process genome-wide data, and lack of reliable substitution models which correlates with alignment-free phylogenomic approaches deter microbiologists from using these opportunities. For example, the super-matrix and super-tree approaches of phylogenomics use multiple integrated genomic loci or individual gene-based trees to infer an overall consensus tree. However, these approaches potentially multiply errors of gene annotation and sequence alignment not mentioning the computational complexity and laboriousness of the methods. In this article, we demonstrate that the annotation- and alignment-free comparison of genome-wide tetranucleotide frequencies, termed oligonucleotide usage patterns (OUPs), allowed a fast and reliable inference of phylogenetic trees. These were congruent to the corresponding whole genome super-matrix trees in terms of tree topology when compared with other known approaches including 16S ribosomal RNA and GyrA protein sequence comparison, complete genome-based MAUVE, and CVTree methods. A Web-based program to perform the alignment-free OUP-based phylogenomic inferences was implemented at http://swphylo.bi.up.ac.za/. Applicability of the tool was tested on different taxa from subspecies to intergeneric levels. Distinguishing between closely related taxonomic units may be enforced by providing the program with alignments of marker protein sequences, eg, GyrA.",2018-02-20 +27883891,Analysis of Cell Lineage Trees by Exact Bayesian Inference Identifies Negative Autoregulation of Nanog in Mouse Embryonic Stem Cells.,"Many cellular effectors of pluripotency are dynamically regulated. In principle, regulatory mechanisms can be inferred from single-cell observations of effector activity across time. However, rigorous inference techniques suitable for noisy, incomplete, and heterogeneous data are lacking. Here, we introduce stochastic inference on lineage trees (STILT), an algorithm capable of identifying stochastic models that accurately describe the quantitative behavior of cell fate markers observed using time-lapse microscopy data collected from proliferating cell populations. STILT performs exact Bayesian parameter inference and stochastic model selection using a particle-filter-based algorithm. We use STILT to investigate the autoregulation of Nanog, a heterogeneously expressed core pluripotency factor, in mouse embryonic stem cells. STILT rejects the possibility of positive Nanog autoregulation with high confidence; instead, model predictions indicate weak negative feedback. We use STILT for rational experimental design and validate model predictions using novel experimental data. STILT is available for download as an open source framework from http://www.imsb.ethz.ch/research/claassen/Software/stilt---stochastic-inference-on-lineage-trees.html.",2016-11-01 +28969586,SimBA: A methodology and tools for evaluating the performance of RNA-Seq bioinformatic pipelines.,"

Background

The evolution of next-generation sequencing (NGS) technologies has led to increased focus on RNA-Seq. Many bioinformatic tools have been developed for RNA-Seq analysis, each with unique performance characteristics and configuration parameters. Users face an increasingly complex task in understanding which bioinformatic tools are best for their specific needs and how they should be configured. In order to provide some answers to these questions, we investigate the performance of leading bioinformatic tools designed for RNA-Seq analysis and propose a methodology for systematic evaluation and comparison of performance to help users make well informed choices.

Results

To evaluate RNA-Seq pipelines, we developed a suite of two benchmarking tools. SimCT generates simulated datasets that get as close as possible to specific real biological conditions accompanied by the list of genomic incidents and mutations that have been inserted. BenchCT then compares the output of any bioinformatics pipeline that has been run against a SimCT dataset with the simulated genomic and transcriptional variations it contains to give an accurate performance evaluation in addressing specific biological question. We used these tools to simulate a real-world genomic medicine question s involving the comparison of healthy and cancerous cells. Results revealed that performance in addressing a particular biological context varied significantly depending on the choice of tools and settings used. We also found that by combining the output of certain pipelines, substantial performance improvements could be achieved.

Conclusion

Our research emphasizes the importance of selecting and configuring bioinformatic tools for the specific biological question being investigated to obtain optimal results. Pipeline designers, developers and users should include benchmarking in the context of their biological question as part of their design and quality control process. Our SimBA suite of benchmarking tools provides a reliable basis for comparing the performance of RNA-Seq bioinformatics pipelines in addressing a specific biological question. We would like to see the creation of a reference corpus of data-sets that would allow accurate comparison between benchmarks performed by different groups and the publication of more benchmarks based on this public corpus. SimBA software and data-set are available at http://cractools.gforge.inria.fr/softwares/simba/ .",2017-09-29 +24364365,Database construction for PromoterCAD: synthetic promoter design for mammals and plants.,"Synthetic promoters can control a gene's timing, location, and expression level. The PromoterCAD web server ( http://promotercad.org ) allows the design of synthetic promoters to control plant gene expression, by novel arrangement of cis-regulatory elements. Recently, we have expanded PromoterCAD's scope with additional plant and animal data: (1) PLACE (Plant Cis-acting Regulatory DNA Elements), including various sized sequence motifs; (2) PEDB (Mammalian Promoter/Enhancer Database), including gene expression data for mammalian tissues. The plant PromoterCAD data now contains 22 000 Arabidopsis thaliana genes, 2 200 000 microarray measurements in 20 growth conditions and 79 tissue organs and developmental stages, while the new mammalian PromoterCAD data contains 679 Mus musculus genes and 65 000 microarray measurements in 96 tissue organs and cell types ( http://promotercad.org/mammal/ ). This work presents step-by-step instructions for adding both regulatory motif and gene expression data to PromoterCAD, to illustrate how users can expand PromoterCAD functionality for their own applications and organisms.",2014-01-06 +29429333,"Orthogonal Information Encoding in Living Cells with High Error-Tolerance, Safety, and Fidelity.","Information encoding in DNA is of great interest but its applications in vivo might be questionable since errors could be enriched exponentially by cellular replications and the artificial sequences may interfere with the natural ones. Here, a novel self-error-detecting, three-base block encoding scheme (SED3B) is proposed for reliable and orthogonal information encoding in living cells. SED3B utilizes a novel way to add error detecting bases in small data blocks which can combine with the inherent redundancy of DNA molecules for effective error correction. Errors at a rate of 19% can be corrected as shown by error-prone PCR experiments with E. coli cells. Calculations based on this preliminary result show that SED3B encoded information in E. coli can be reliable for more than 12 000 years of continuous replication. Importantly, SED3B encoded sequences do not share sequence space to all reported natural DNA sequences except for some short tandem repeats, indicating a low biological relevance of encoded sequences for the first time. These features make SED3B attractive for broad orthogonal information encoding purposes in living cells, for example, comments/barcodes encoding in synthetic biology. For proof of concept, 10 different barcodes were encoded in E. coli cells. After continuous replications for 10 days including exposure to ultraviolet for 2-3 min (lethality >60%) per day, all barcodes were fully recovered, proving the stability of the encoded information. An online encoding-decoding system is implemented and available at http://biosystem.bt1.tu-harburg.de/sed3b/ .",2018-02-21 +24297256,NCBI's Database of Genotypes and Phenotypes: dbGaP.,"The Database of Genotypes and Phenotypes (dbGap, http://www.ncbi.nlm.nih.gov/gap) is a National Institutes of Health-sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of genotype and phenotype. Information in dbGaP is organized as a hierarchical structure and includes the accessioned objects, phenotypes (as variables and datasets), various molecular assay data (SNP and Expression Array data, Sequence and Epigenomic marks), analyses and documents. Publicly accessible metadata about submitted studies, summary level data, and documents related to studies can be accessed freely on the dbGaP website. Individual-level data are accessible via Controlled Access application to scientists across the globe.",2013-12-01 +28453668,A mass graph-based approach for the identification of modified proteoforms using top-down tandem mass spectra.,"

Motivation

Although proteomics has rapidly developed in the past decade, researchers are still in the early stage of exploring the world of complex proteoforms, which are protein products with various primary structure alterations resulting from gene mutations, alternative splicing, post-translational modifications, and other biological processes. Proteoform identification is essential to mapping proteoforms to their biological functions as well as discovering novel proteoforms and new protein functions. Top-down mass spectrometry is the method of choice for identifying complex proteoforms because it provides a 'bird's eye view' of intact proteoforms. The combinatorial explosion of various alterations on a protein may result in billions of possible proteoforms, making proteoform identification a challenging computational problem.

Results

We propose a new data structure, called the mass graph, for efficient representation of proteoforms and design mass graph alignment algorithms. We developed TopMG, a mass graph-based software tool for proteoform identification by top-down mass spectrometry. Experiments on top-down mass spectrometry datasets showed that TopMG outperformed existing methods in identifying complex proteoforms.

Availability and implementation

http://proteomics.informatics.iupui.edu/software/topmg/.

Contact

xwliu@iupui.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +25378330,Recent improvements to Binding MOAD: a resource for protein-ligand binding affinities and structures.,"For over 10 years, Binding MOAD (Mother of All Databases; http://www.BindingMOAD.org) has been one of the largest resources for high-quality protein-ligand complexes and associated binding affinity data. Binding MOAD has grown at the rate of 1994 complexes per year, on average. Currently, it contains 23,269 complexes and 8156 binding affinities. Our annual updates curate the data using a semi-automated literature search of the references cited within the PDB file, and we have recently upgraded our website and added new features and functionalities to better serve Binding MOAD users. In order to eliminate the legacy application server of the old platform and to accommodate new changes, the website has been completely rewritten in the LAMP (Linux, Apache, MySQL and PHP) environment. The improved user interface incorporates current third-party plugins for better visualization of protein and ligand molecules, and it provides features like sorting, filtering and filtered downloads. In addition to the field-based searching, Binding MOAD now can be searched by structural queries based on the ligand. In order to remove redundancy, Binding MOAD records are clustered in different families based on 90% sequence identity. The new Binding MOAD, with the upgraded platform, features and functionalities, is now equipped to better serve its users.",2014-11-06 +25416803,DIANA-TarBase v7.0: indexing more than half a million experimentally supported miRNA:mRNA interactions.,"microRNAs (miRNAs) are short non-coding RNA species, which act as potent gene expression regulators. Accurate identification of miRNA targets is crucial to understanding their function. Currently, hundreds of thousands of miRNA:gene interactions have been experimentally identified. However, this wealth of information is fragmented and hidden in thousands of manuscripts and raw next-generation sequencing data sets. DIANA-TarBase was initially released in 2006 and it was the first database aiming to catalog published experimentally validated miRNA:gene interactions. DIANA-TarBase v7.0 (http://www.microrna.gr/tarbase) aims to provide for the first time hundreds of thousands of high-quality manually curated experimentally validated miRNA:gene interactions, enhanced with detailed meta-data. DIANA-TarBase v7.0 enables users to easily identify positive or negative experimental results, the utilized experimental methodology, experimental conditions including cell/tissue type and treatment. The new interface provides also advanced information ranging from the binding site location, as identified experimentally as well as in silico, to the primer sequences used for cloning experiments. More than half a million miRNA:gene interactions have been curated from published experiments on 356 different cell types from 24 species, corresponding to 9- to 250-fold more entries than any other relevant database. DIANA-TarBase v7.0 is freely available.",2014-11-21 +28453624,Inferring Rates and Length-Distributions of Indels Using Approximate Bayesian Computation.,"The most common evolutionary events at the molecular level are single-base substitutions, as well as insertions and deletions (indels) of short DNA segments. A large body of research has been devoted to develop probabilistic substitution models and to infer their parameters using likelihood and Bayesian approaches. In contrast, relatively little has been done to model indel dynamics, probably due to the difficulty in writing explicit likelihood functions. Here, we contribute to the effort of modeling indel dynamics by presenting SpartaABC, an approximate Bayesian computation (ABC) approach to infer indel parameters from sequence data (either aligned or unaligned). SpartaABC circumvents the need to use an explicit likelihood function by extracting summary statistics from simulated sequences. First, summary statistics are extracted from the input sequence data. Second, SpartaABC samples indel parameters from a prior distribution and uses them to simulate sequences. Third, it computes summary statistics from the simulated sets of sequences. By computing a distance between the summary statistics extracted from the input and each simulation, SpartaABC can provide an approximation to the posterior distribution of indel parameters as well as point estimates. We study the performance of our methodology and show that it provides accurate estimates of indel parameters in simulations. We next demonstrate the utility of SpartaABC by studying the impact of alignment errors on the inference of positive selection. A C ++ program implementing SpartaABC is freely available in http://spartaabc.tau.ac.il.",2017-05-01 +26992782,Zisland Explorer: detect genomic islands by combining homogeneity and heterogeneity properties.,"Genomic islands are genomic fragments of alien origin in bacterial and archaeal genomes, usually involved in symbiosis or pathogenesis. In this work, we described Zisland Explorer, a novel tool to predict genomic islands based on the segmental cumulative GC profile. Zisland Explorer was designed with a novel strategy, as well as a combination of the homogeneity and heterogeneity of genomic sequences. While the sequence homogeneity reflects the composition consistence within each island, the heterogeneity measures the composition bias between an island and the core genome. The performance of Zisland Explorer was evaluated on the data sets of 11 different organisms. Our results suggested that the true-positive rate (TPR) of Zisland Explorer was at least 10.3% higher than that of four other widely used tools. On the other hand, the new tool did not lose overall accuracy with the improvement in the TPR and showed better equilibrium among various evaluation indexes. Also, Zisland Explorer showed better accuracy in the prediction of experimental island data. Overall, the tool provides an alternative solution over other tools, which expands the field of island prediction and offers a supplement to increase the performance of the distinct predicting strategy. We have provided a web service as well as a graphical user interface and open-source code across multiple platforms for Zisland Explorer, which is available at http://cefg.uestc.edu.cn/Zisland_Explorer/ or http://tubic.tju.edu.cn/Zisland_Explorer/.",2017-05-01 +29916808,Evaluation and Management Strategies for Per- and Polyfluoroalkyl Substances (PFASs) in Drinking Water Aquifers: Perspectives from Impacted U.S. Northeast Communities.,"

Background

Multiple Northeast U.S. communities have discovered per- and polyfluoroalkyl substances (PFASs) in drinking water aquifers in excess of health-based regulatory levels or advisories. Regional stakeholders (consultants, regulators, and others) need technical background and tools to mitigate risks associated with exposure to PFAS-affected groundwater.

Objectives

The aim was to identify challenges faced by stakeholders to extend best practices to other regions experiencing PFAS releases and to establish a framework for research strategies and best management practices.

Methods and approach

Management challenges were identified during stakeholder engagement events connecting attendees with PFAS experts in focus areas, including fate/transport, toxicology, and regulation. Review of the literature provided perspective on challenges in all focus areas. Publicly available data were used to characterize sources of PFAS impacts in groundwater and conduct a geospatial case study of potential source locations relative to drinking water aquifers in Rhode Island.

Discussion

Challenges in managing PFAS impacts in drinking water arise from the large number of relevant PFASs, unconsolidated information regarding sources, and limited studies on some PFASs. In particular, there is still considerable uncertainty regarding human health impacts of PFASs. Frameworks sequentially evaluating exposure, persistence, and treatability can prioritize PFASs for evaluation of potential human health impacts. A regional case study illustrates how risk-based, geospatial methods can help address knowledge gaps regarding potential sources of PFASs in drinking water aquifers and evaluate risk of exposure.

Conclusion

Lessons learned from stakeholder engagement can assist in developing strategies for management of PFASs in other regions. However, current management practices primarily target a subset of PFASs for which in-depth studies are available. Exposure to less-studied, co-occurring PFASs remains largely unaddressed. Frameworks leveraging the current state of science can be applied toward accelerating this process and reducing exposure to total PFASs in drinking water, even as research regarding health effects continues. https://doi.org/10.1289/EHP2727.",2018-06-15 +28796757,"Summary of Notifiable Infectious Diseases and Conditions - United States, 2015.","The Summary of Notifiable Infectious Diseases and Conditions - United States, 2015 (hereafter referred to as the summary) contains the official statistics, in tabular and graphical form, for the reported occurrence of nationally notifiable infectious diseases and conditions in the United States for 2015. Unless otherwise noted, data are final totals for 2015 reported as of June 30, 2016. These statistics are collected and compiled from reports sent by U.S. state and territories, New York City, and District of Columbia health departments to the National Notifiable Diseases Surveillance System (NNDSS), which is operated by CDC in collaboration with the Council of State and Territorial Epidemiologists (CSTE). This summary is available at https://www.cdc.gov/MMWR/MMWR_nd/index.html. This site also includes summary publications from previous years.",2017-08-11 +25378338,"HIV-1, human interaction database: current status and new features.","The 'Human Immunodeficiency Virus Type 1 (HIV-1), Human Interaction Database', available through the National Library of Medicine at http://www.ncbi.nlm.nih.gov/genome/viruses/retroviruses/hiv-1/interactions, serves the scientific community exploring the discovery of novel HIV vaccine candidates and therapeutic targets. Each HIV-1 human protein interaction can be retrieved without restriction by web-based downloads and ftp protocols and includes: Reference Sequence (RefSeq) protein accession numbers, National Center for Biotechnology Information Gene identification numbers, brief descriptions of the interactions, searchable keywords for interactions and PubMed identification numbers (PMIDs) of journal articles describing the interactions. In addition to specific HIV-1 protein-human protein interactions, included are interaction effects upon HIV-1 replication resulting when individual human gene expression is blocked using siRNA. A total of 3142 human genes are described participating in 12,786 protein-protein interactions, along with 1316 replication interactions described for each of 1250 human genes identified using small interfering RNA (siRNA). Together the data identifies 4006 human genes involved in 14,102 interactions. With the inclusion of siRNA interactions we introduce a redesigned web interface to enhance viewing, filtering and downloading of the combined data set.",2014-11-06 +24760958,qcML: an exchange format for quality control metrics from mass spectrometry experiments.,"Quality control is increasingly recognized as a crucial aspect of mass spectrometry based proteomics. Several recent papers discuss relevant parameters for quality control and present applications to extract these from the instrumental raw data. What has been missing, however, is a standard data exchange format for reporting these performance metrics. We therefore developed the qcML format, an XML-based standard that follows the design principles of the related mzML, mzIdentML, mzQuantML, and TraML standards from the HUPO-PSI (Proteomics Standards Initiative). In addition to the XML format, we also provide tools for the calculation of a wide range of quality metrics as well as a database format and interconversion tools, so that existing LIMS systems can easily add relational storage of the quality control data to their existing schema. We here describe the qcML specification, along with possible use cases and an illustrative example of the subsequent analysis possibilities. All information about qcML is available at http://code.google.com/p/qcml.",2014-04-23 +25414346,ChiTaRS 2.1--an improved database of the chimeric transcripts and RNA-seq data with novel sense-antisense chimeric RNA transcripts.,"Chimeric RNAs that comprise two or more different transcripts have been identified in many cancers and among the Expressed Sequence Tags (ESTs) isolated from different organisms; they might represent functional proteins and produce different disease phenotypes. The ChiTaRS 2.1 database of chimeric transcripts and RNA-Seq data (http://chitars.bioinfo.cnio.es/) is the second version of the ChiTaRS database and includes improvements in content and functionality. Chimeras from eight organisms have been collated including novel sense-antisense (SAS) chimeras resulting from the slippage of the sense and anti-sense intragenic regions. The new database version collects more than 29,000 chimeric transcripts and indicates the expression and tissue specificity for 333 entries confirmed by RNA-seq reads mapping the chimeric junction sites. User interface allows for rapid and easy analysis of evolutionary conservation of fusions, literature references and experimental data supporting fusions in different organisms. More than 1428 cancer breakpoints have been automatically collected from public databases and manually verified to identify their correct cross-references, genomic sequences and junction sites. As a result, the ChiTaRS 2.1 collection of chimeras from eight organisms and human cancer breakpoints extends our understanding of the evolution of chimeric transcripts in eukaryotes as well as their functional role in carcinogenic processes.",2014-11-20 +28275348,CerebroMatic: A Versatile Toolbox for Spline-Based MRI Template Creation.,"Brain image spatial normalization and tissue segmentation rely on prior tissue probability maps. Appropriately selecting these tissue maps becomes particularly important when investigating ""unusual"" populations, such as young children or elderly subjects. When creating such priors, the disadvantage of applying more deformation must be weighed against the benefit of achieving a crisper image. We have previously suggested that statistically modeling demographic variables, instead of simply averaging images, is advantageous. Both aspects (more vs. less deformation and modeling vs. averaging) were explored here. We used imaging data from 1914 subjects, aged 13 months to 75 years, and employed multivariate adaptive regression splines to model the effects of age, field strength, gender, and data quality. Within the spm/cat12 framework, we compared an affine-only with a low- and a high-dimensional warping approach. As expected, more deformation on the individual level results in lower group dissimilarity. Consequently, effects of age in particular are less apparent in the resulting tissue maps when using a more extensive deformation scheme. Using statistically-described parameters, high-quality tissue probability maps could be generated for the whole age range; they are consistently closer to a gold standard than conventionally-generated priors based on 25, 50, or 100 subjects. Distinct effects of field strength, gender, and data quality were seen. We conclude that an extensive matching for generating tissue priors may model much of the variability inherent in the dataset which is then not contained in the resulting priors. Further, the statistical description of relevant parameters (using regression splines) allows for the generation of high-quality tissue probability maps while controlling for known confounds. The resulting CerebroMatic toolbox is available for download at http://irc.cchmc.org/software/cerebromatic.php.",2017-02-22 +24163255,The pancreatic expression database: recent extensions and updates.,"The Pancreatic Expression Database (PED, http://www.pancreasexpression.org) is the only device currently available for mining of pancreatic cancer literature data. It brings together the largest collection of multidimensional pancreatic data from the literature including genomic, proteomic, microRNA, methylomic and transcriptomic profiles. PED allows the user to ask specific questions on the observed levels of deregulation among a broad range of specimen/experimental types including healthy/patient tissue and body fluid specimens, cell lines and murine models as well as related treatments/drugs data. Here we provide an update to PED, which has been previously featured in the Database issue of this journal. Briefly, PED data content has been substantially increased and expanded to cover methylomics studies. We introduced an extensive controlled vocabulary that records specific details on the samples and added data from large-scale meta-analysis studies. The web interface has been improved/redesigned with a quick search option to rapidly extract information about a gene/protein of interest and an upload option allowing users to add their own data to PED. We added a user guide and implemented integrated graphical tools to overlay and visualize retrieved information. Interoperability with biomart-compatible data sets was significantly improved to allow integrative queries with pancreatic cancer data.",2013-10-25 +29990126,Sequence-based prediction of putative transcription factor binding sites in DNA sequences of any length. ,"A transcription factor (TF) is a protein that regulates gene expression by binding to specific DNA sequences. Despite the recent advances in experimental techniques for identifying transcription factor binding sites (TFBS) in DNA sequences, a large number of TFBS are to be unveiled in many species. Several computational methods developed for predicting TFBS in DNA are tissue- or species-specific methods, so cannot be used without prior knowledge of tissue or species. Some computational methods are applicable to finding TFBS in short DNA sequences only. In this paper we propose a new learning method for predicting TFBS in DNA of any length using the composition, transition and distribution of nucleotides and amino acids in DNA and TF sequences. In independent testing of the method on datasets that were not used in training the method, its accuracy and MCC were as high as 81.84% and 0.634, respectively. The proposed method can be a useful aid for selecting potential TFBS in a large amount of DNA sequences before conducting biochemical experiments to empirically determine TFBS. The program and data sets are available at http://bclab.inha.ac.kr/TFbinding.",2017-11-14 +22975402,Transversus abdominis plane (TAP) blocks-a review.,"

Introduction

Effective post-operative pain management can positively influence patient outcome. Multimodal analgesic regimes are often limited by side-effects. Epidural analgesia may be resource-consuming, restrict mobility and have negative cardiovascular and gastrointestinal consequences. Consequently, there is a need for regional anaesthetic techniques to minimise opioid use, and provide alternatives to epidurals, especially within the context of minimally invasive abdominal surgery and enhanced recovery programmes. This review aims to evaluate the evidence base underlying Transversus abdominis plane (TAP) blockade.

Methods

A literature search was performed using the PubMed database (http://www.ncbi.nlm.nih.gov/pubmed/) using the parameters 'transversus abdominis plane' and 'TAP'. The references within were then searched for applicable studies. Case reports and correspondence were excluded.

Findings

Thirteen studies assessed technique and mechanisms of action. Fourteen clinical studies involved a total of 1250 patients. Seven studies (6 Randomised Controlled Trials, RCTs) demonstrated reductions in post-operative morphine requirements (33.3%-73.1%). Five RCTs demonstrated concomitant improvements in pain scores. Five RCTs demonstrated reduced opioid side effects. The one study assessing functional outcome (a Prospective Controlled Trial, PCT) demonstrated earlier return of gastrointestinal function and hospital discharge.

Conclusion

The limited evidence to date suggests that TAP blockade is an effective adjunct to multimodal post-operative analgesia following a range of abdominal surgical procedures. Whether TAP blocks are a viable alternative to epidural analgesia remains to be determined. However, it is likely that as this technique grows in popularity its role, particularly that in enhanced recovery programmes, will be better delineated and refined.",2012-09-10 +29036591,PLACNETw: a web-based tool for plasmid reconstruction from bacterial genomes.,"

Summary

PLACNET is a graph-based tool for reconstruction of plasmids from next generation sequence pair-end datasets. PLACNET graphs contain two types of nodes (assembled contigs and reference genomes) and two types of edges (scaffold links and homology to references). Manual pruning of the graphs is a necessary requirement in PLACNET, but this is difficult for users without solid bioinformatic background. PLACNETw, a webtool based on PLACNET, provides an interactive graphic interface, automates BLAST searches, and extracts the relevant information for decision making. It allows a user with domain expertise to visualize the scaffold graphs and related information of contigs as well as reference sequences, so that the pruning operations can be done interactively from a personal computer without the need for additional tools. After successful pruning, each plasmid becomes a separate connected component subgraph. The resulting data are automatically downloaded by the user.

Availability and implementation

PLACNETw is freely available at https://castillo.dicom.unican.es/upload/.

Contact

delacruz@unican.es.

Supplementary information

A tutorial video and several solved examples are available at https://castillo.dicom.unican.es/placnetw_video/ and https://castillo.dicom.unican.es/examples/.",2017-12-01 +22984621,Insect Innate Immunity Database (IIID): an annotation tool for identifying immune genes in insect genomes.,"The innate immune system is an ancient component of host defense. Since innate immunity pathways are well conserved throughout many eukaryotes, immune genes in model animals can be used to putatively identify homologous genes in newly sequenced genomes of non-model organisms. With the initiation of the ""i5k"" project, which aims to sequence 5,000 insect genomes by 2016, many novel insect genomes will soon become publicly available, yet few annotation resources are currently available for insects. Thus, we developed an online tool called the Insect Innate Immunity Database (IIID) to provide an open access resource for insect immunity and comparative biology research (http://www.vanderbilt.edu/IIID). The database provides users with simple exploratory tools to search the immune repertoires of five insect models (including Nasonia), spanning three orders, for specific immunity genes or genes within a particular immunity pathway. As a proof of principle, we used an initial database with only four insect models to annotate potential immune genes in the parasitoid wasp genus Nasonia. Results specify 306 putative immune genes in the genomes of N. vitripennis and its two sister species N. giraulti and N. longicornis. Of these genes, 146 were not found in previous annotations of Nasonia immunity genes. Combining these newly identified immune genes with those in previous annotations, Nasonia possess 489 putative immunity genes, the largest immune repertoire found in insects to date. While these computational predictions need to be complemented with functional studies, the IIID database can help initiate and augment annotations of the immune system in the plethora of insect genomes that will soon become available.",2012-09-12 +26983023,A Systems Biology Interpretation of Array Comparative Genomic Hybridization (aCGH) Data through Phylogenetics.,"Array Comparative Genomic Hybridization (aCGH) is a rapid screening technique to detect gene deletions and duplications, providing an overview of chromosomal aberrations throughout the entire genome of a tumor, without the need for cell culturing. However, the heterogeneity of aCGH data obfuscates existing methods of data analysis. Analysis of aCGH data from a systems biology perspective or in the context of total aberrations is largely absent in the published literature. We present here a novel alternative to the functional analysis of aCGH data using the phylogenetic paradigm that is well-suited to high dimensional datasets of heterogeneous nature, but has not been widely adapted to aCGH data. Maximum parsimony phylogenetic analysis sorts out genetic data through the simplest presentation of the data on a cladogram, a graphical evolutionary tree, thus providing a powerful and efficient method for aCGH data analysis. For example, the cladogram models the multiphasic changes in the cancer genome and identifies shared early mutations in the disease progression, providing a simple yet powerful means of aCGH data interpretation. As such, applying maximum parsimony phylogenetic analysis to aCGH results allows for the differentiation between drivers and passenger genes aberrations in cancer specimens. In addition to offering a novel methodology to analyze aCGH results, we present here a crucial software suite that we wrote to carry out the analysis. In a broader context, we wish to underscore that phylogenetic analysis of aCGH data is a non-parametric method that circumvents the pitfalls and frustrations of standard analytical techniques that rely on parametric statistics. Organizing the data in a cladogram as explained in this research article provides insights into the disease common aberrations, as well as the disease subtypes and their shared aberrations (the synapomorphies) of each subtype. Hence, we report the method and make the software suite publicly and freely available at http://software.phylomcs.com so that researchers can test alternative and innovative approaches to the analysis of aCGH data.",2016-03-01 +25629077,Genetic variability of microRNA regulome in human.,"MicroRNAs are currently being extensively studied due to their important role as post-transcriptional regulators. During miRNA biogenesis, precursors undergo two cleavage steps performed by Drosha-DGCR8 (Microprocessor) cleaving of pri-miRNA to produce pre-miRNA and Dicer-mediated cleaving to create mature miRNA. Genetic variants within human miRNA regulome have been shown to influence miRNA expression, target interaction and to affect the phenotype. In this study, we reviewed the literature, existing bioinformatics tools and catalogs associated with polymorphic miRNA regulome, and organized them into four categories: (1) polymorphisms located within miRNA genes (miR-SNPs), (2) transcription factor-binding sites/miRNA regulatory regions (miR-rSNPs), (3) miRNA target sites (miR-TS-SNPs), and 4. miRNA silencing machinery (miR-SM-SNPs). Since the miR-SM-SNPs have not been systematically studied yet, we have collected polymorphisms associated with miRNA silencing machinery. We have developed two catalogs containing genetic variability within: (1) genes encoding three main catalytic components of the silencing machinery, DROSHA, DGCR8, and DICER1; (2) miRNA genes itself, overlapping Drosha and Dicer cleavage sites. The developed resource of polymorphisms is available online (http://www.integratomics-time.com/miRNA-regulome) and will be useful for further functional studies and development of biomarkers associated with diseases and phenotypic traits.",2014-09-15 +27340825,Historical Prediction Modeling Approach for Estimating Long-Term Concentrations of PM2.5 in Cohort Studies before the 1999 Implementation of Widespread Monitoring.,"

Introduction

Recent cohort studies have used exposure prediction models to estimate the association between long-term residential concentrations of fine particulate matter (PM2.5) and health. Because these prediction models rely on PM2.5 monitoring data, predictions for times before extensive spatial monitoring present a challenge to understanding long-term exposure effects. The U.S. Environmental Protection Agency (EPA) Federal Reference Method (FRM) network for PM2.5 was established in 1999.

Objectives

We evaluated a novel statistical approach to produce high-quality exposure predictions from 1980 through 2010 in the continental United States for epidemiological applications.

Methods

We developed spatio-temporal prediction models using geographic predictors and annual average PM2.5 data from 1999 through 2010 from the FRM and the Interagency Monitoring of Protected Visual Environments (IMPROVE) networks. Temporal trends before 1999 were estimated by using a) extrapolation based on PM2.5 data in FRM/IMPROVE, b) PM2.5 sulfate data in the Clean Air Status and Trends Network, and c) visibility data across the Weather Bureau Army Navy network. We validated the models using PM2.5 data collected before 1999 from IMPROVE, California Air Resources Board dichotomous sampler monitoring (CARB dichot), the Children's Health Study (CHS), and the Inhalable Particulate Network (IPN).

Results

In our validation using pre-1999 data, the prediction model performed well across three trend estimation approaches when validated using IMPROVE and CHS data (R2 = 0.84-0.91) with lower R2 values in early years. Model performance using CARB dichot and IPN data was worse (R2 = 0.00-0.85) most likely because of fewer monitoring sites and inconsistent sampling methods.

Conclusions

Our prediction modeling approach will allow health effects estimation associated with long-term exposures to PM2.5 over extended time periods ≤ 30 years. Citation: Kim SY, Olives C, Sheppard L, Sampson PD, Larson TV, Keller JP, Kaufman JD. 2017. Historical prediction modeling approach for estimating long-term concentrations of PM2.5 in cohort studies before the 1999 implementation of widespread monitoring. Environ Health Perspect 125:38-46; http://dx.doi.org/10.1289/EHP131.",2016-06-24 +28635627,"Molecular Quantum Similarity, Chemical Reactivity and Database Screening of 3D Pharmacophores of the Protein Kinases A, B and G from Mycobacterium tuberculosis. ","Mycobacterium tuberculosis remains one of the world's most devastating pathogens. For this reason, we developed a study involving 3D pharmacophore searching, selectivity analysis and database screening for a series of anti-tuberculosis compounds, associated with the protein kinases A, B, and G. This theoretical study is expected to shed some light onto some molecular aspects that could contribute to the knowledge of the molecular mechanics behind interactions of these compounds, with anti-tuberculosis activity. Using the Molecular Quantum Similarity field and reactivity descriptors supported in the Density Functional Theory, it was possible to measure the quantification of the steric and electrostatic effects through the Overlap and Coulomb quantitative convergence (alpha and beta) scales. In addition, an analysis of reactivity indices using global and local descriptors was developed, identifying the binding sites and selectivity on these anti-tuberculosis compounds in the active sites. Finally, the reported pharmacophores to PKn A, B and G, were used to carry out database screening, using a database with anti-tuberculosis drugs from the Kelly Chibale research group (http://www.kellychibaleresearch.uct.ac.za/), to find the compounds with affinity for the specific protein targets associated with PKn A, B and G. In this regard, this hybrid methodology (Molecular Mechanic/Quantum Chemistry) shows new insights into drug design that may be useful in the tuberculosis treatment today.",2017-06-21 +28454514,QueryOR: a comprehensive web platform for genetic variant analysis and prioritization.,"

Background

Whole genome and exome sequencing are contributing to the extraordinary progress in the study of human genetic variants. In this fast developing field, appropriate and easily accessible tools are required to facilitate data analysis.

Results

Here we describe QueryOR, a web platform suitable for searching among known candidate genes as well as for finding novel gene-disease associations. QueryOR combines several innovative features that make it comprehensive, flexible and easy to use. Instead of being designed on specific datasets, it works on a general XML schema specifying formats and criteria of each data source. Thanks to this flexibility, new criteria can be easily added for future expansion. Currently, up to 70 user-selectable criteria are available, including a wide range of gene and variant features. Moreover, rather than progressively discarding variants taking one criterion at a time, the prioritization is achieved by a global positive selection process that considers all transcript isoforms, thus producing reliable results. QueryOR is easy to use and its intuitive interface allows to handle different kinds of inheritance as well as features related to sharing variants in different patients. QueryOR is suitable for investigating single patients, families or cohorts.

Conclusions

QueryOR is a comprehensive and flexible web platform eligible for an easy user-driven variant prioritization. It is freely available for academic institutions at http://queryor.cribi.unipd.it/ .",2017-04-28 +26170051,The TimeStudio Project: An open source scientific workflow system for the behavioral and brain sciences.,"This article describes a new open source scientific workflow system, the TimeStudio Project, dedicated to the behavioral and brain sciences. The program is written in MATLAB and features a graphical user interface for the dynamic pipelining of computer algorithms developed as TimeStudio plugins. TimeStudio includes both a set of general plugins (for reading data files, modifying data structures, visualizing data structures, etc.) and a set of plugins specifically developed for the analysis of event-related eyetracking data as a proof of concept. It is possible to create custom plugins to integrate new or existing MATLAB code anywhere in a workflow, making TimeStudio a flexible workbench for organizing and performing a wide range of analyses. The system also features an integrated sharing and archiving tool for TimeStudio workflows, which can be used to share workflows both during the data analysis phase and after scientific publication. TimeStudio thus facilitates the reproduction and replication of scientific studies, increases the transparency of analyses, and reduces individual researchers' analysis workload. The project website ( http://timestudioproject.com ) contains the latest releases of TimeStudio, together with documentation and user forums.",2016-06-01 +27846806,GEN3VA: aggregation and analysis of gene expression signatures from related studies.,"

Background

Genome-wide gene expression profiling of mammalian cells is becoming a staple of many published biomedical and biological research studies. Such data is deposited into data repositories such as the Gene Expression Omnibus (GEO) for potential reuse. However, these repositories currently do not provide simple interfaces to systematically analyze collections of related studies.

Results

Here we present GENE Expression and Enrichment Vector Analyzer (GEN3VA), a web-based system that enables the integrative analysis of aggregated collections of tagged gene expression signatures identified and extracted from GEO. Each tagged collection of signatures is presented in a report that consists of heatmaps of the differentially expressed genes; principal component analysis of all signatures; enrichment analysis with several gene set libraries across all signatures, which we term enrichment vector analysis; and global mapping of small molecules that are predicted to reverse or mimic each signature in the aggregate. We demonstrate how GEN3VA can be used to identify common molecular mechanisms of aging by analyzing tagged signatures from 244 studies that compared young vs. old tissues in mammalian systems. In a second case study, we collected 86 signatures from treatment of human cells with dexamethasone, a glucocorticoid receptor (GR) agonist. Our analysis confirms consensus GR target genes and predicts potential drug mimickers.

Conclusions

GEN3VA can be used to identify, aggregate, and analyze themed collections of gene expression signatures from diverse but related studies. Such integrative analyses can be used to address concerns about data reproducibility, confirm results across labs, and discover new collective knowledge by data reuse. GEN3VA is an open-source web-based system that is freely available at: http://amp.pharm.mssm.edu/gen3va .",2016-11-15 +27013597,EHDViz: clinical dashboard development using open-source technologies.,"

Objective

To design, develop and prototype clinical dashboards to integrate high-frequency health and wellness data streams using interactive and real-time data visualisation and analytics modalities.

Materials and methods

We developed a clinical dashboard development framework called electronic healthcare data visualization (EHDViz) toolkit for generating web-based, real-time clinical dashboards for visualising heterogeneous biomedical, healthcare and wellness data. The EHDViz is an extensible toolkit that uses R packages for data management, normalisation and producing high-quality visualisations over the web using R/Shiny web server architecture. We have developed use cases to illustrate utility of EHDViz in different scenarios of clinical and wellness setting as a visualisation aid for improving healthcare delivery.

Results

Using EHDViz, we prototyped clinical dashboards to demonstrate the contextual versatility of EHDViz toolkit. An outpatient cohort was used to visualise population health management tasks (n=14,221), and an inpatient cohort was used to visualise real-time acuity risk in a clinical unit (n=445), and a quantified-self example using wellness data from a fitness activity monitor worn by a single individual was also discussed (n-of-1). The back-end system retrieves relevant data from data source, populates the main panel of the application and integrates user-defined data features in real-time and renders output using modern web browsers. The visualisation elements can be customised using health features, disease names, procedure names or medical codes to populate the visualisations. The source code of EHDViz and various prototypes developed using EHDViz are available in the public domain at http://ehdviz.dudleylab.org.

Conclusions

Collaborative data visualisations, wellness trend predictions, risk estimation, proactive acuity status monitoring and knowledge of complex disease indicators are essential components of implementing data-driven precision medicine. As an open-source visualisation framework capable of integrating health assessment, EHDViz aims to be a valuable toolkit for rapid design, development and implementation of scalable clinical data visualisation dashboards.",2016-03-24 +26207595,Geographic distribution of ATP7B mutations in Wilson disease.,"

Context

Geographic distribution of ATP7B mutations in different populations.

Objective

To summarise common mutations in the ATP7B gene and graphically illustrate their prevalence in different populations.

Methods

A literature search was done using PubMed and the Wilson Disease Mutation Database (http://www.wilsondisease.med.ualberta.ca/database).

Results

p.His1069Gln is the most prevalent mutation seen in Europe. In the Mediterranean countries, the array of prevalent mutations is different from the rest of Europe. In Far East Asian countries, the mutation p.Arg778Leu is the most common. In India, no single mutation seems to be dominant, owing to the vast ethnic diversity of the country. The p.Cys271* mutation is dominant in the east, west and south, but not reported in the north. In the Middle East, data from Saudi Arabia shows the p.Gln1399Arg mutation as the most prevalent. In the US, the p.His1069Gln is dominant, whereas in Brazil the mutation c.3402delC dominates.

Conclusion

Clinical features in WD patients can be misleading and often absent. Genetic testing is used to confirm the diagnosis. However, owing to the large gene size and vast diversity in the mutations, genetic testing can be time-consuming and tedious. This study reviews ATP7B mutations seen in different populations and can help develop time-saving methods and expediate the process of genetic analysis of WD.",2015-07-24 +,Using district-level occurrences in MaxEnt for predicting the invasion potential of an exotic insect pest in India,"Insect pests are a major threat to agricultural biosecurity across the world, causing substantial economic losses. Majority of the species distribution modeling studies use precise coordinates (latitude/longitude) of species occurrences in MaxEnt (or maximum entropy model). However, lack of precise coordinates of insect pest occurrences at national/regional level is a common problem for many countries including India. This is because of the limited resources, lack of nationally coordinated surveys, and growers/farmers’ privacy issues; district-level occurrences are commonly available (e.g., National Agricultural Pest Information System or NAPIS in the United States; http://pest.ceris.purdue.edu/). We demonstrated the use of MaxEnt to generate a preliminary, district-level map of the potential risk of invasion by an exotic cotton mealybug Phenacoccus solenopsis (Tinsley) (Hemiptera: Pseudococcidae) in India. District-level occurrence data were integrated with bioclimatic variables (values averaged within districts) using MaxEnt. The MaxEnt model performed better than random with an average test AUC value of 0.86 (±0.05). Our model predictions matched closely with the documented occurrence of P. solenopsis in all nine cotton growing states, and also predicted suitable habitats in other districts across India. The greatest threat of P. solenopsis infestations were predicted in most districts of Gujarat, Maharashtra, Andhra Pradesh, southwestern Punjab, northwestern Rajasthan, and western Haryana. Precipitation of coldest quarter, temperature annual range, and precipitation seasonality were the strongest predictors associated with P. solenopsis distribution. Precipitation of coldest quarter was negatively correlated with P. solenopsis occurrence. Mapping the potential distribution of invasive species is an iterative process, and our study is the first attempt to model national-level risk assessment of P. solenopsis in India. Our results can be used for selecting monitoring and surveillance sites and designing local, regional and national-level integrated pest management policies for cotton and other cultivated crops in India. The maps of potential pest distributions are urgently needed by agriculture managers and policymakers. Our approach can be used in other countries that lack precise coordinates of insect pest occurrences and generate a preliminary map of potential risk because it may be too late to wait for the precise coordinates of pest occurrences to generate a perfect map.",2014-04-01 +28968719,LightDock: a new multi-scale approach to protein-protein docking.,"

Motivation

Computational prediction of protein-protein complex structure by docking can provide structural and mechanistic insights for protein interactions of biomedical interest. However, current methods struggle with difficult cases, such as those involving flexible proteins, low-affinity complexes or transient interactions. A major challenge is how to efficiently sample the structural and energetic landscape of the association at different resolution levels, given that each scoring function is often highly coupled to a specific type of search method. Thus, new methodologies capable of accommodating multi-scale conformational flexibility and scoring are strongly needed.

Results

We describe here a new multi-scale protein-protein docking methodology, LightDock, capable of accommodating conformational flexibility and a variety of scoring functions at different resolution levels. Implicit use of normal modes during the search and atomic/coarse-grained combined scoring functions yielded improved predictive results with respect to state-of-the-art rigid-body docking, especially in flexible cases.

Availability and implementation

The source code of the software and installation instructions are available for download at https://life.bsc.es/pid/lightdock/.

Contact

juanf@bsc.es.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +27248514,SMART: Statistical Metabolomics Analysis-An R Tool.,"Metabolomics data provide unprecedented opportunities to decipher metabolic mechanisms by analyzing hundreds to thousands of metabolites. Data quality concerns and complex batch effects in metabolomics must be appropriately addressed through statistical analysis. This study developed an integrated analysis tool for metabolomics studies to streamline the complete analysis flow from initial data preprocessing to downstream association analysis. We developed Statistical Metabolomics Analysis-An R Tool (SMART), which can analyze input files with different formats, visually represent various types of data features, implement peak alignment and annotation, conduct quality control for samples and peaks, explore batch effects, and perform association analysis. A pharmacometabolomics study of antihypertensive medication was conducted and data were analyzed using SMART. Neuromedin N was identified as a metabolite significantly associated with angiotensin-converting-enzyme inhibitors in our metabolome-wide association analysis (p = 1.56 × 10(-4) in an analysis of covariance (ANCOVA) with an adjustment for unknown latent groups and p = 1.02 × 10(-4) in an ANCOVA with an adjustment for hidden substructures). This endogenous neuropeptide is highly related to neurotensin and neuromedin U, which are involved in blood pressure regulation and smooth muscle contraction. The SMART software, a user guide, and example data can be downloaded from http://www.stat.sinica.edu.tw/hsinchou/metabolomics/SMART.htm .",2016-06-01 +27466620,plasmidSPAdes: assembling plasmids from whole genome sequencing data.,"

Motivation

Plasmids are stably maintained extra-chromosomal genetic elements that replicate independently from the host cell's chromosomes. Although plasmids harbor biomedically important genes, (such as genes involved in virulence and antibiotics resistance), there is a shortage of specialized software tools for extracting and assembling plasmid data from whole genome sequencing projects.

Results

We present the plasmidSPAdes algorithm and software tool for assembling plasmids from whole genome sequencing data and benchmark its performance on a diverse set of bacterial genomes.

Availability and implementation

plasmidSPAdes is publicly available at http://spades.bioinf.spbau.ru/plasmidSPAdes/ CONTACT: d.antipov@spbu.ruSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-27 +24608034,BCL2DB: database of BCL-2 family members and BH3-only proteins.,"BCL2DB (http://bcl2db.ibcp.fr) is a database designed to integrate data on BCL-2 family members and BH3-only proteins. These proteins control the mitochondrial apoptotic pathway and probably many other cellular processes as well. This large protein group is formed by a family of pro-apoptotic and anti-apoptotic homologs that have phylogenetic relationships with BCL-2, and by a collection of evolutionarily and structurally unrelated proteins characterized by the presence of a region of local sequence similarity with BCL-2, termed the BH3 motif. BCL2DB is monthly built, thanks to an automated procedure relying on a set of homemade profile HMMs computed from seed reference sequences representative of the various BCL-2 homologs and BH3-only proteins. The BCL2DB entries integrate data from the Ensembl, Ensembl Genomes, European Nucleotide Archive and Protein Data Bank databases and are enriched with specific information like protein classification into orthology groups and distribution of BH motifs along the sequences. The Web interface allows for easy browsing of the site and fast access to data, as well as sequence analysis with generic and specific tools. BCL2DB provides a helpful and powerful tool to both 'BCL-2-ologists' and researchers working in the various fields of physiopathology. Database URL: http://bcl2db.ibcp.fr.",2014-03-06 +27886717,RUbioSeq+: A multiplatform application that executes parallelized pipelines to analyse next-generation sequencing data.,"

Background and objective

To facilitate routine analysis and to improve the reproducibility of the results, next-generation sequencing (NGS) analysis requires intuitive, efficient and integrated data processing pipelines.

Methods

We have selected well-established software to construct a suite of automated and parallelized workflows to analyse NGS data for DNA-seq (single-nucleotide variants (SNVs) and indels), CNA-seq, bisulfite-seq and ChIP-seq experiments.

Results

Here, we present RUbioSeq+, an updated and extended version of RUbioSeq, a multiplatform application that incorporates a suite of automated and parallelized workflows to analyse NGS data. This new version includes: (i) an interactive graphical user interface (GUI) that facilitates its use by both biomedical researchers and bioinformaticians, (ii) a new pipeline for ChIP-seq experiments, (iii) pair-wise comparisons (case-control analyses) for DNA-seq experiments, (iv) and improvements in the parallelized and multithreaded execution options. Results generated by our software have been experimentally validated and accepted for publication.

Conclusions

RUbioSeq+ is free and open to all users at http://rubioseq.bioinfo.cnio.es/.",2016-10-26 +25254103,WikiPathways App for Cytoscape: Making biological pathways amenable to network analysis and visualization.,"In this paper we present the open-source WikiPathways app for Cytoscape ( http://apps.cytoscape.org/apps/wikipathways) that can be used to import biological pathways for data visualization and network analysis. WikiPathways is an open, collaborative biological pathway database that provides fully annotated pathway diagrams for manual download or through web services. The WikiPathways app allows users to load pathways in two different views: as an annotated pathway ideal for data visualization and as a simple network to perform computational analysis. An example pathway and dataset are used to demonstrate the functionality of the WikiPathways app and how they can be combined and used together with other apps. More than 3000 downloads in the first 12 months following its release in August 2013 highlight the importance and adoption of the app in the network biology field.",2014-07-01 +28203700,Threshold-seq: a tool for determining the threshold in short RNA-seq datasets.,"

Summary

We present 'Threshold-seq,' a new approach for determining thresholds in deep-sequencing datasets of short RNA transcripts. Threshold-seq addresses the critical question of how many reads need to support a short RNA molecule in a given dataset before it can be considered different from 'background.' The proposed scheme is easy to implement and incorporate into existing pipelines.

Availability and implementation

Source code of Threshold-seq is freely available as an R package at: http://cm.jefferson.edu/threshold-seq/.

Contact

isidore.rigoutsos@jefferson.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +29714963,Green Space Visits among Adolescents: Frequency and Predictors in the PIAMA Birth Cohort Study.,"BACKGROUND:Green space may influence health through several pathways, for example, increased physical activity, enhanced social cohesion, reduced stress, and improved air quality. For green space to increase physical activity and social cohesion, spending time in green spaces is likely to be important. OBJECTIVES:We examined whether adolescents visit green spaces and for what purposes. Furthermore, we assessed the predictors of green space visits. METHODS:In this cross-sectional study, data for 1911 participants of the Dutch PIAMA (Prevention and Incidence of Asthma and Mite Allergy) birth cohort were analyzed. At age 17, adolescents reported how often they visited green spaces for physical activities, social activities, relaxation, and to experience nature and quietness. We assessed the predictors of green space visits altogether and for different purposes by log-binomial regression. RESULTS:Fifty-three percent of the adolescents visited green spaces at least once a week in summer, mostly for physical and social activities. Adolescents reporting that a green environment was (very) important to them visited green spaces most frequently {adjusted prevalence ratio (PR) [95% confidence interval (CI)] very vs. not important: 6.84 (5.10, 9.17) for physical activities and 4.76 (3.72, 6.09) for social activities}. Boys and adolescents with highly educated fathers visited green spaces more often for physical and social activities. Adolescents who own a dog visited green spaces more often to experience nature and quietness. Green space visits were not associated with the objectively measured quantity of residential green space, i.e., the average normalized difference vegetation index (NDVI) and percentages of urban, agricultural, and natural green space in circular buffers around the adolescents' homes. CONCLUSIONS:Subjective variables are stronger predictors of green space visits in adolescents than the objectively measured quantity of residential green space. https://doi.org/10.1289/EHP2429.",2018-04-30 +27984044,BFDCA: A Comprehensive Tool of Using Bayes Factor for Differential Co-Expression Analysis.,"Comparing the gene-expression profiles between biological conditions is useful for understanding gene regulation underlying complex phenotypes. Along this line, analysis of differential co-expression (DC) has gained attention in the recent years, where genes under one condition have different co-expression patterns compared with another. We developed an R package Bayes Factor approach for Differential Co-expression Analysis (BFDCA) for DC analysis. BFDCA is unique in integrating various aspects of DC patterns (including Shift, Cross, and Re-wiring) into one uniform Bayes factor. We tested BFDCA using simulation data and experimental data. Simulation results indicate that BFDCA outperforms existing methods in accuracy and robustness of detecting DC pairs and DC modules. Results of using experimental data suggest that BFDCA can cluster disease-related genes into functional DC subunits and estimate the regulatory impact of disease-related genes well. BFDCA also achieves high accuracy in predicting case-control phenotypes by using significant DC gene pairs as markers. BFDCA is publicly available at http://dx.doi.org/10.17632/jdz4vtvnm3.1.",2016-10-27 +25963834,Global Proteomics Analysis of the Response to Starvation in C. elegans.,"Periodic starvation of animals induces large shifts in metabolism but may also influence many other cellular systems and can lead to adaption to prolonged starvation conditions. To date, there is limited understanding of how starvation affects gene expression, particularly at the protein level. Here, we have used mass-spectrometry-based quantitative proteomics to identify global changes in the Caenorhabditis elegans proteome due to acute starvation of young adult animals. Measuring changes in the abundance of over 5,000 proteins, we show that acute starvation rapidly alters the levels of hundreds of proteins, many involved in central metabolic pathways, highlighting key regulatory responses. Surprisingly, we also detect changes in the abundance of chromatin-associated proteins, including specific linker histones, histone variants, and histone posttranslational modifications associated with the epigenetic control of gene expression. To maximize community access to these data, they are presented in an online searchable database, the Encyclopedia of Proteome Dynamics (http://www.peptracker.com/epd/).",2015-05-11 +26657901,Cancer incidence and survival in Lynch syndrome patients receiving colonoscopic and gynaecological surveillance: first report from the prospective Lynch syndrome database.,"

Objective

Estimates of cancer risk and the effects of surveillance in Lynch syndrome have been subject to bias, partly through reliance on retrospective studies. We sought to establish more robust estimates in patients undergoing prospective cancer surveillance.

Design

We undertook a multicentre study of patients carrying Lynch syndrome-associated mutations affecting MLH1, MSH2, MSH6 or PMS2. Standardised information on surveillance, cancers and outcomes were collated in an Oracle relational database and analysed by age, sex and mutated gene.

Results

1942 mutation carriers without previous cancer had follow-up including colonoscopic surveillance for 13 782 observation years. 314 patients developed cancer, mostly colorectal (n=151), endometrial (n=72) and ovarian (n=19). Cancers were detected from 25 years onwards in MLH1 and MSH2 mutation carriers, and from about 40 years in MSH6 and PMS2 carriers. Among first cancer detected in each patient the colorectal cancer cumulative incidences at 70 years by gene were 46%, 35%, 20% and 10% for MLH1, MSH2, MSH6 and PMS2 mutation carriers, respectively. The equivalent cumulative incidences for endometrial cancer were 34%, 51%, 49% and 24%; and for ovarian cancer 11%, 15%, 0% and 0%. Ten-year crude survival was 87% after any cancer, 91% if the first cancer was colorectal, 98% if endometrial and 89% if ovarian.

Conclusions

The four Lynch syndrome-associated genes had different penetrance and expression. Colorectal cancer occurred frequently despite colonoscopic surveillance but resulted in few deaths. Using our data, a website has been established at http://LScarisk.org enabling calculation of cumulative cancer risks as an aid to genetic counselling in Lynch syndrome.",2015-12-09 +24533660,Variation ontology: annotator guide.,"

Background

Systematic representation of information related to genetic and non-genetic variations is required to allow large scale studies, data mining and data integration, and to make it possible to reveal novel relationships between genotype and phenotype. Although lots of variation data is available it is often difficult to use due to lack of systematics.

Results

A novel ontology, Variation Ontology (VariO http://variationontology.org), was developed for annotation of effects, consequences and mechanisms of variations. In this article instructions are provided on how VariO annotations are made. The major levels for description are the three molecules, namely DNA, RNA and protein. They are further divided to four major sublevels: variation type, function, structure, and property, and further up to eight sublevels. VariO annotation summarizes existing knowledge about a variation and its effects and formalizes it so that computational analyses are efficient. The annotations should be made on as many levels as possible. VariO annotations are made in reference to normal states, which vary for each data item including e.g. reference sequences, wild type properties, and activities.

Conclusions

Detailed instructions together with examples are provided to indicate how VariO can be used for annotation of variations and their effects. A dedicated tool has been developed for annotation and will be further developed to cover also evidence for the annotations. VariO is suitable for annotation of data in many types of databases. As several different kinds of databases are in a process of adapting VariO annotations it is important to have guidelines to guarantee consistent annotation.",2014-02-17 +27600083,SNPConvert: SNP Array Standardization and Integration in Livestock Species. ,"One of the main advantages of single nucleotide polymorphism (SNP) array technology is providing genotype calls for a specific number of SNP markers at a relatively low cost. Since its first application in animal genetics, the number of available SNP arrays for each species has been constantly increasing. However, conversely to that observed in whole genome sequence data analysis, SNP array data does not have a common set of file formats or coding conventions for allele calling. Therefore, the standardization and integration of SNP array data from multiple sources have become an obstacle, especially for users with basic or no programming skills. Here, we describe the difficulties related to handling SNP array data, focusing on file formats, SNP allele coding, and mapping. We also present SNPConvert suite, a multi-platform, open-source, and user-friendly set of tools to overcome these issues. This tool, which can be integrated with open-source and open-access tools already available, is a first step towards an integrated system to standardize and integrate any type of raw SNP array data. The tool is available at: https://github. com/nicolazzie/SNPConvert.git.",2016-06-09 +27130330,G-DOC Plus - an integrative bioinformatics platform for precision medicine.,"

Background

G-DOC Plus is a data integration and bioinformatics platform that uses cloud computing and other advanced computational tools to handle a variety of biomedical BIG DATA including gene expression arrays, NGS and medical images so that they can be analyzed in the full context of other omics and clinical information.

Results

G-DOC Plus currently holds data from over 10,000 patients selected from private and public resources including Gene Expression Omnibus (GEO), The Cancer Genome Atlas (TCGA) and the recently added datasets from REpository for Molecular BRAin Neoplasia DaTa (REMBRANDT), caArray studies of lung and colon cancer, ImmPort and the 1000 genomes data sets. The system allows researchers to explore clinical-omic data one sample at a time, as a cohort of samples; or at the level of population, providing the user with a comprehensive view of the data. G-DOC Plus tools have been leveraged in cancer and non-cancer studies for hypothesis generation and validation; biomarker discovery and multi-omics analysis, to explore somatic mutations and cancer MRI images; as well as for training and graduate education in bioinformatics, data and computational sciences. Several of these use cases are described in this paper to demonstrate its multifaceted usability.

Conclusion

G-DOC Plus can be used to support a variety of user groups in multiple domains to enable hypothesis generation for precision medicine research. The long-term vision of G-DOC Plus is to extend this translational bioinformatics platform to stay current with emerging omics technologies and analysis methods to continue supporting novel hypothesis generation, analysis and validation for integrative biomedical research. By integrating several aspects of the disease and exposing various data elements, such as outpatient lab workup, pathology, radiology, current treatments, molecular signatures and expected outcomes over a web interface, G-DOC Plus will continue to strengthen precision medicine research. G-DOC Plus is available at: https://gdoc.georgetown.edu .",2016-04-30 +25348397,ComPPI: a cellular compartment-specific database for protein-protein interaction network analysis.,"Here we present ComPPI, a cellular compartment-specific database of proteins and their interactions enabling an extensive, compartmentalized protein-protein interaction network analysis (URL: http://ComPPI.LinkGroup.hu). ComPPI enables the user to filter biologically unlikely interactions, where the two interacting proteins have no common subcellular localizations and to predict novel properties, such as compartment-specific biological functions. ComPPI is an integrated database covering four species (S. cerevisiae, C. elegans, D. melanogaster and H. sapiens). The compilation of nine protein-protein interaction and eight subcellular localization data sets had four curation steps including a manually built, comprehensive hierarchical structure of >1600 subcellular localizations. ComPPI provides confidence scores for protein subcellular localizations and protein-protein interactions. ComPPI has user-friendly search options for individual proteins giving their subcellular localization, their interactions and the likelihood of their interactions considering the subcellular localization of their interacting partners. Download options of search results, whole-proteomes, organelle-specific interactomes and subcellular localization data are available on its website. Due to its novel features, ComPPI is useful for the analysis of experimental results in biochemistry and molecular biology, as well as for proteome-wide studies in bioinformatics and network science helping cellular biology, medicine and drug design.",2014-10-27 +29334898,seq-seq-pan: building a computational pan-genome data structure on whole genome alignment.,"

Background

The increasing application of next generation sequencing technologies has led to the availability of thousands of reference genomes, often providing multiple genomes for the same or closely related species. The current approach to represent a species or a population with a single reference sequence and a set of variations cannot represent their full diversity and introduces bias towards the chosen reference. There is a need for the representation of multiple sequences in a composite way that is compatible with existing data sources for annotation and suitable for established sequence analysis methods. At the same time, this representation needs to be easily accessible and extendable to account for the constant change of available genomes.

Results

We introduce seq-seq-pan, a framework that provides methods for adding or removing new genomes from a set of aligned genomes and uses these to construct a whole genome alignment. Throughout the sequential workflow the alignment is optimized for generating a representative linear presentation of the aligned set of genomes, that enables its usage for annotation and in downstream analyses.

Conclusions

By providing dynamic updates and optimized processing, our approach enables the usage of whole genome alignment in the field of pan-genomics. In addition, the sequential workflow can be used as a fast alternative to existing whole genome aligners for aligning closely related genomes. seq-seq-pan is freely available at https://gitlab.com/rki_bioinformatics.",2018-01-15 +23497449,Novel semantic similarity measure improves an integrative approach to predicting gene functional associations.,"

Background

Elucidation of the direct/indirect protein interactions and gene associations is required to fully understand the workings of the cell. This can be achieved through the use of both low- and high-throughput biological experiments and in silico methods. We present GAP (Gene functional Association Predictor), an integrative method for predicting and characterizing gene functional associations. GAP integrates different biological features using a novel taxonomy-based semantic similarity measure in predicting and prioritizing high-quality putative gene associations. The proposed similarity measure increases information gain from the available gene annotations. The annotation information is incorporated from several public pathway databases, Gene Ontology annotations as well as drug and disease associations from the scientific literature.

Results

We evaluated GAP by comparing its prediction performance with several other well-known functional interaction prediction tools over a comprehensive dataset of known direct and indirect interactions, and observed significantly better prediction performance. We also selected a small set of GAP's highly-scored novel predicted pairs (i.e., currently not found in any known database or dataset), and by manually searching the literature for experimental evidence accessible in the public domain, we confirmed different categories of predicted functional associations with available evidence of interaction. We also provided extra supporting evidence for subset of the predicted functionally-associated pairs using an expert curated database of genes associated to autism spectrum disorders.

Conclusions

GAP's predicted ""functional interactome"" contains ≈1M highly-scored predicted functional associations out of which about 90% are novel (i.e., not experimentally validated). GAP's novel predictions connect disconnected components and singletons to the main connected component of the known interactome. It can, therefore, be a valuable resource for biologists by providing corroborating evidence for and facilitating the prioritization of potential direct or indirect interactions for experimental validation. GAP is freely accessible through a web portal: http://ophid.utoronto.ca/gap.",2013-03-14 +29486390,Markovian encoding models in human splice site recognition using SVM.,"Splice site recognition is among the most significant and challenging tasks in bioinformatics due to its key role in gene annotation. Effective prediction of splice site requires nucleotide encoding methods that reveal the characteristics of DNA sequences to provide appropriate features to serve as input of machine learning classifiers. Markovian models are the most influential encoding methods that highly used for pattern recognition in biological data. However, a direct performance comparison of these methods in splice site domain has not been assessed yet. This study compares various Markovian encoding models for splice site prediction utilizing support vector machine, as the most outstanding learning method in the domain, and conducts a new precise evaluation of Markovian approaches that corrects this limitation. Moreover, a novel sequence encoding approach based on third order Markov model (MM3) is proposed. The experimental results show that the proposed method, namely MM3-SVM, performs significantly better than thirteen best known state-of-the-art algorithms, while tested on HS3D dataset considering several performance criteria. Further, it achieved higher prediction accuracy than several well-known tools like NNsplice, MEM, MM1, WMM, and GeneID, using an independent test set of 50 genes. We also developed MMSVM, a web tool to predict splice sites in any human sequence using the proposed approach. The MMSVM web server can be assessed at https://pashaei.shinyapps.io/mmsvm.",2018-02-14 +24479510,VIP Barcoding: composition vector-based software for rapid species identification based on DNA barcoding.,"Species identification based on short sequences of DNA markers, that is, DNA barcoding, has emerged as an integral part of modern taxonomy. However, software for the analysis of large and multilocus barcoding data sets is scarce. The Basic Local Alignment Search Tool (BLAST) is currently the fastest tool capable of handling large databases (e.g. >5000 sequences), but its accuracy is a concern and has been criticized for its local optimization. However, current more accurate software requires sequence alignment or complex calculations, which are time-consuming when dealing with large data sets during data preprocessing or during the search stage. Therefore, it is imperative to develop a practical program for both accurate and scalable species identification for DNA barcoding. In this context, we present VIP Barcoding: a user-friendly software in graphical user interface for rapid DNA barcoding. It adopts a hybrid, two-stage algorithm. First, an alignment-free composition vector (CV) method is utilized to reduce searching space by screening a reference database. The alignment-based K2P distance nearest-neighbour method is then employed to analyse the smaller data set generated in the first stage. In comparison with other software, we demonstrate that VIP Barcoding has (i) higher accuracy than Blastn and several alignment-free methods and (ii) higher scalability than alignment-based distance methods and character-based methods. These results suggest that this platform is able to deal with both large-scale and multilocus barcoding data with accuracy and can contribute to DNA barcoding for modern taxonomy. VIP Barcoding is free and available at http://msl.sls.cuhk.edu.hk/vipbarcoding/.",2014-03-07 +29193970,Predicted Biological Activity of Purchasable Chemical Space.,"Whereas 400 million distinct compounds are now purchasable within the span of a few weeks, the biological activities of most are unknown. To facilitate access to new chemistry for biology, we have combined the Similarity Ensemble Approach (SEA) with the maximum Tanimoto similarity to the nearest bioactive to predict activity for every commercially available molecule in ZINC. This method, which we label SEA+TC, outperforms both SEA and a naïve-Bayesian classifier via predictive performance on a 5-fold cross-validation of ChEMBL's bioactivity data set (version 21). Using this method, predictions for over 40% of compounds (>160 million) have either high significance (pSEA ≥ 40), high similarity (ECFP4MaxTc ≥ 0.4), or both, for one or more of 1382 targets well described by ligands in the literature. Using a further 1347 less-well-described targets, we predict activities for an additional 11 million compounds. To gauge whether these predictions are sensible, we investigate 75 predictions for 50 drugs lacking a binding affinity annotation in ChEMBL. The 535 million predictions for over 171 million compounds at 2629 targets are linked to purchasing information and evidence to support each prediction and are freely available via https://zinc15.docking.org and https://files.docking.org .",2017-12-29 +26078786,ChemDIS: a chemical-disease inference system based on chemical-protein interactions.,"

Background

The characterization of toxicities associated with environmental and industrial chemicals is required for risk assessment. However, we lack the toxicological data for a large portion of chemicals due to the high cost of experiments for a huge number of chemicals. The development of computational methods for identifying potential risks associated with chemicals is desirable for generating testable hypothesis to accelerate the hazard identification process.

Results

A chemical-disease inference system named ChemDIS was developed to facilitate hazard identification for chemicals. The chemical-protein interactions from a large database STITCH and protein-disease relationship from disease ontology and disease ontology lite were utilized for chemical-protein-disease inferences. Tools with user-friendly interfaces for enrichment analysis of functions, pathways and diseases were implemented and integrated into ChemDIS. An analysis on maleic acid and sibutramine showed that ChemDIS could be a useful tool for the identification of potential functions, pathways and diseases affected by poorly characterized chemicals.

Conclusions

ChemDIS is an integrated chemical-disease inference system for poorly characterized chemicals with potentially affected functions and pathways for experimental validation. ChemDIS server is freely accessible at http://cwtung.kmu.edu.tw/chemdis.",2015-06-15 +24234438,Developments in FINDbase worldwide database for clinically relevant genomic variation allele frequencies.,"FINDbase (http://www.findbase.org) aims to document frequencies of clinically relevant genomic variations, namely causative mutations and pharmacogenomic markers, worldwide. Each database record includes the population, ethnic group or geographical region, the disorder name and the related gene, accompanied by links to any related databases and the genetic variation together with its frequency in that population. Here, we report, in addition to the regular data content updates, significant developments in FINDbase, related to data visualization and querying, data submission, interrelation with other resources and a new module for genetic disease summaries. In particular, (i) we have developed new data visualization tools that facilitate data querying and comparison among different populations, (ii) we have generated a new FINDbase module, built around Microsoft's PivotViewer (http://www.getpivot.com) software, based on Microsoft Silverlight technology (http://www.silverlight.net), that includes 259 genetic disease summaries from five populations, systematically collected from the literature representing the documented genetic makeup of these populations and (iii) the implementation of a generic data submission tool for every module currently available in FINDbase.",2013-11-14 +24302579,DPRP: a database of phenotype-specific regulatory programs derived from transcription factor binding data.,"Gene expression profiling has been extensively used in the past decades, resulting in an enormous amount of expression data available in public databases. These data sets are informative in elucidating transcriptional regulation of genes underlying various biological and clinical conditions. However, it is usually difficult to identify transcription factors (TFs) responsible for gene expression changes directly from their own expression, as TF activity is often regulated at the posttranscriptional level. In recent years, technical advances have made it possible to systematically determine the target genes of TFs by ChIP-seq experiments. To identify the regulatory programs underlying gene expression profiles, we constructed a database of phenotype-specific regulatory programs (DPRP, http://syslab.nchu.edu.tw/DPRP/) derived from the integrative analysis of TF binding data and gene expression data. DPRP provides three methods: the Fisher's Exact Test, the Kolmogorov-Smirnov test and the BASE algorithm to facilitate the application of gene expression data for generating new hypotheses on transcriptional regulatory programs in biological and clinical studies.",2013-12-02 +29201980,Experimental data of co-crystals of Etravirine and L-tartaric acid.,"Etravirine is a drug used alongside other medication in the treatment of HIV and is a non-nucleoside reverse transcriptase inhibitor. It is a BCS class IV drug, having low solubility and high permeability (Drugbank, https://www.drugbank.ca/drugs/DB06414) [1]. As a result, large doses of the drug are required for treatment. Two pills have to be taken twice a day, making it a ""pill burden"" (Intelence, http://www.intelence.com/hcp/dosing/administration-options) [2]. Therefore, attempts of co-crystallizing Etravirine are attractive as the solubility of the drug tends to increase in this solid form (Schultheiss and Newman, 2009) [3]. In this study Etravirine co-crystals were synthesized in the molar ratios 1:1, 1:2 and 2:1 with L-tartaric acid as the co-former. Both slow evaporation and physical mixture was performed to mix the components. DSC values of final products are presented as well as FTIR spectra to observe the altered intermolecular interactions. A chemical stability test was performed after seven days using area under curve data from an HPLC instrument.",2017-11-07 +27836980,Proteome Profiling Outperforms Transcriptome Profiling for Coexpression Based Gene Function Prediction.,"Coexpression of mRNAs under multiple conditions is commonly used to infer cofunctionality of their gene products despite well-known limitations of this ""guilt-by-association"" (GBA) approach. Recent advancements in mass spectrometry-based proteomic technologies have enabled global expression profiling at the protein level; however, whether proteome profiling data can outperform transcriptome profiling data for coexpression based gene function prediction has not been systematically investigated. Here, we address this question by constructing and analyzing mRNA and protein coexpression networks for three cancer types with matched mRNA and protein profiling data from The Cancer Genome Atlas (TCGA) and the Clinical Proteomic Tumor Analysis Consortium (CPTAC). Our analyses revealed a marked difference in wiring between the mRNA and protein coexpression networks. Whereas protein coexpression was driven primarily by functional similarity between coexpressed genes, mRNA coexpression was driven by both cofunction and chromosomal colocalization of the genes. Functionally coherent mRNA modules were more likely to have their edges preserved in corresponding protein networks than functionally incoherent mRNA modules. Proteomic data strengthened the link between gene expression and function for at least 75% of Gene Ontology (GO) biological processes and 90% of KEGG pathways. A web application Gene2Net (http://cptac.gene2net.org) developed based on the three protein coexpression networks revealed novel gene-function relationships, such as linking ERBB2 (HER2) to lipid biosynthetic process in breast cancer, identifying PLG as a new gene involved in complement activation, and identifying AEBP1 as a new epithelial-mesenchymal transition (EMT) marker. Our results demonstrate that proteome profiling outperforms transcriptome profiling for coexpression based gene function prediction. Proteomics should be integrated if not preferred in gene function and human disease studies.",2016-11-11 +25640659,SecReT6: a web-based resource for type VI secretion systems found in bacteria.,"SecReT6 (http://db-mml.sjtu.edu.cn/SecReT6/) is an integrated database providing comprehensive information on type VI secretion systems (T6SSs) in bacteria. T6SSs are a class of sophisticated cell contact-dependent apparatuses involved in mediating antagonistic or synergistic communications between bacteria and/or bacteria and eukaryotes. These apparatuses have recently been found to be widely distributed among Gram-negative bacterial species. SecReT6 offers a unique, readily explorable archive of known and putative T6SSs, and cognate effectors found in bacteria. It currently contains data on 11 167 core T6SS components mapping to 906 T6SSs found in 498 bacterial strains representing 240 species, as well as a collection of over 600 directly relevant references. Also collated and archived were 1340 diverse candidate secreted effectors which were experimentally shown and/or predicted to be delivered by T6SSs into target eukaryotic and/or prokaryotic cells as well as 196 immunity proteins. A broad range of T6SS gene cluster detection and comparative analysis tools are readily accessible via SecReT6, which may aid identification of effectors and immunity proteins around the T6SS core components. This database will be regularly updated to ensure its ongoing maximal utility and relevance to the scientific research community.",2015-07-01 +28161902,Phylogenomic analysis of gene co-expression networks reveals the evolution of functional modules.,"Molecular evolutionary studies correlate genomic and phylogenetic information with the emergence of new traits of organisms. These traits are, however, the consequence of dynamic gene networks composed of functional modules, which might not be captured by genomic analyses. Here, we established a method that combines large-scale genomic and phylogenetic data with gene co-expression networks to extensively study the evolutionary make-up of modules in the moss Physcomitrella patens, and in the angiosperms Arabidopsis thaliana and Oryza sativa (rice). We first show that younger genes are less annotated than older genes. By mapping genomic data onto the co-expression networks, we found that genes from the same evolutionary period tend to be connected, whereas old and young genes tend to be disconnected. Consequently, the analysis revealed modules that emerged at a specific time in plant evolution. To uncover the evolutionary relationships of the modules that are conserved across the plant kingdom, we added phylogenetic information that revealed duplication and speciation events on the module level. This combined analysis revealed an independent duplication of cell wall modules in bryophytes and angiosperms, suggesting a parallel evolution of cell wall pathways in land plants. We provide an online tool allowing plant researchers to perform these analyses at http://www.gene2function.de.",2017-03-23 +,bgc: Software for Bayesian estimation of genomic clines,"Introgression in admixed populations can be used to identify candidate loci that might underlie adaptation or reproductive isolation. The Bayesian genomic cline model provides a framework for quantifying variable introgression in admixed populations and identifying regions of the genome with extreme introgression that are potentially associated with variation in fitness. Here we describe the bgc software, which uses Markov chain Monte Carlo to estimate the joint posterior probability distribution of the parameters in the Bayesian genomic cline model and designate outlier loci. This software can be used with next‐generation sequence data, accounts for uncertainty in genotypic state, and can incorporate information from linked loci on a genetic map. Output from the analysis is written to an HDF5 file for efficient storage and manipulation. This software is written in C++. The source code, software manual, compilation instructions and example data sets are available under the GNU Public License at http://sites.google.com/site/bgcsoftware/.",2012-11-01 +29703719,Sialic Acid Blockade Suppresses Tumor Growth by Enhancing T-cell-Mediated Tumor Immunity.,"Sialic acid sugars on the surface of cancer cells have emerged as potent immune modulators that contribute to the immunosuppressive microenvironment and tumor immune evasion. However, the mechanisms by which these sugars modulate antitumor immunity as well as therapeutic strategies directed against them are limited. Here we report that intratumoral injections with a sialic acid mimetic Ac53FaxNeu5Ac block tumor sialic acid expression in vivo and suppress tumor growth in multiple tumor models. Sialic acid blockade had a major impact on the immune cell composition of the tumor, enhancing tumor-infiltrating natural killer cell and CD8+ T-cell numbers while reducing regulatory T-cell and myeloid regulatory cell numbers. Sialic acid blockade enhanced cytotoxic CD8+ T-cell-mediated killing of tumor cells in part by facilitating antigen-specific T-cell-tumor cell clustering. Sialic acid blockade also synergized with adoptive transfer of tumor-specific CD8+ T cells in vivo and enhanced CpG immune adjuvant therapy by increasing dendritic cell activation and subsequent CD8+ T-cell responses. Collectively, these data emphasize the crucial role of sialic acids in tumor immune evasion and provide proof of concept that sialic acid blockade creates an immune-permissive tumor microenvironment for CD8+ T-cell-mediated tumor immunity, either as single treatment or in combination with other immune-based intervention strategies.Significance: Sialic acid sugars function as important modulators of the immunosuppressive tumor microenvironment that limit potent antitumor immunity.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/13/3574/F1.large.jpg Cancer Res; 78(13); 3574-88. ©2018 AACR.",2018-04-27 +28079879,The ClusPro web server for protein-protein docking.,"The ClusPro server (https://cluspro.org) is a widely used tool for protein-protein docking. The server provides a simple home page for basic use, requiring only two files in Protein Data Bank (PDB) format. However, ClusPro also offers a number of advanced options to modify the search; these include the removal of unstructured protein regions, application of attraction or repulsion, accounting for pairwise distance restraints, construction of homo-multimers, consideration of small-angle X-ray scattering (SAXS) data, and location of heparin-binding sites. Six different energy functions can be used, depending on the type of protein. Docking with each energy parameter set results in ten models defined by centers of highly populated clusters of low-energy docked structures. This protocol describes the use of the various options, the construction of auxiliary restraints files, the selection of the energy parameters, and the analysis of the results. Although the server is heavily used, runs are generally completed in <4 h.",2017-01-12 +29954286,ProBAPred: Inferring protein-protein binding affinity by incorporating protein sequence and structural features.,"Protein-protein binding interaction is the most prevalent biological activity that mediates a great variety of biological processes. The increasing availability of experimental data of protein-protein interaction allows a systematic construction of protein-protein interaction networks, significantly contributing to a better understanding of protein functions and their roles in cellular pathways and human diseases. Compared to well-established classification for protein-protein interactions (PPIs), limited work has been conducted for estimating protein-protein binding free energy, which can provide informative real-value regression models for characterizing the protein-protein binding affinity. In this study, we propose a novel ensemble computational framework, termed ProBAPred (Protein-protein Binding Affinity Predictor), for quantitative estimation of protein-protein binding affinity. A large number of sequence and structural features, including physical-chemical properties, binding energy and conformation annotations, were collected and calculated from currently available protein binding complex datasets and the literature. Feature selection based on the WEKA package was performed to identify and characterize the most informative and contributing feature subsets. Experiments on the independent test showed that our ensemble method achieved the lowest Mean Absolute Error (MAE; 1.657 kcal/mol) and the second highest correlation coefficient ( R-value=0.467 ), compared with the existing methods. The datasets and source codes of ProBAPred, and the supplementary materials in this study can be downloaded at http://lightning.med.monash.edu/probapred/ for academic use. We anticipate that the developed ProBAPred regression models can facilitate computational characterization and experimental studies of protein-protein binding affinity.",2018-04-26 +25522035,The Yeast Nucleosome Atlas (YNA) database: an integrative gene mining platform for studying chromatin structure and its regulation in yeast.,"

Background

Histone modification and remodeling play crucial roles in regulating gene transcription. These post-translational modifications of histones function in a combinatorial fashion and can be recognized by specific histone-binding proteins, thus regulating gene transcription. Therefore, understanding the combinatorial patterns of the histone code is vital to understanding the associated biological processes. However, most of the datasets regarding histone modification and chromatin regulation are scattered across various studies, and no comprehensive search and query tool has yet been made available to retrieve genes bearing specific histone modification patterns and regulatory proteins.

Description

For this reason, we developed the Yeast Nucleosome Atlas database, or the YNA database, which integrates the available experimental data on nucleosome occupancy, histone modifications, the binding occupancy of regulatory proteins, and gene expression data, and provides the genome-wide gene miner to retrieve genes with a specific combination of these chromatin-related datasets. Moreover, the biological significance analyzer, which analyzes the enrichments of histone modifications, binding occupancy, transcription rate, and functionality of the retrieved genes, was constructed to help researchers to gain insight into the correlation among chromatin regulation and transcription.

Conclusions

Compared to previously established genome browsing databases, YNA provides a powerful gene mining and retrieval interface, and is an investigation tool that can assist users to generate testable hypotheses for studying chromatin regulation during transcription. YNA is available online at http://cosbi3.ee.ncku.edu.tw/yna/.",2014-12-08 +28615067,Correspondence on Lovell et al.: identification of chicken genes previously assumed to be evolutionarily lost.,"Through RNA-Seq analyses, we identified 137 genes that are missing in chicken, including the long-sought-after nephrin and tumor necrosis factor genes. These genes tended to cluster in GC-rich regions that have poor coverage in genome sequence databases. Hence, the occurrence of syntenic groups of vertebrate genes that have not been observed in Aves does not prove the evolutionary loss of such genes.Please see related Research article: http://dx.doi.org/10.1186/s13059-014-0565-1 and Please see response from Lovell et al: https://www.dx.doi.org/10.1186/s13059-017-1234-y.",2017-06-14 +29340519,Evaluating the current state of the art of Huntington disease research: a scientometric analysis.,"Huntington disease (HD) is an incurable neurodegenerative disorder caused by a dominant mutation on the 4th chromosome. We aim to present a scientometric analysis of the extant scientific undertakings devoted to better understanding HD. Therefore, a quantitative study was performed to examine the current state-of-the-art approaches that foster researchers' understandings of the current knowledge, research trends, and research gaps regarding this disorder. We performed literature searches of articles that were published up to September 2016 in the ""ISI Web of Science™"" (http://apps.webofknowledge.com/). The keyword used was ""Huntington disease"". Of the initial 14,036 articles that were obtained, 7732 were eligible for inclusion in the study according to their relevance. Data were classified according to language, country of publication, year, and area of concentration. The country leader regarding the number of studies published on HD is the United States, accounting for nearly 30% of all publications, followed by England and Germany, who have published 10 and 7% of all publications, respectively. Regarding the language in which the articles were written, 98% of publications were in English. The first publication to be found on HD was published in 1974. A surge of publications on HD can be seen from 1996 onward. In relation to the various knowledge areas that emerged, most publications were in the fields of neuroscience and neurology, likely because HD is a neurodegenerative disorder. Publications written in areas such as psychiatry, genetics, and molecular biology also predominated.",2018-01-11 +29325576,Reanalysis of Chinese Treponema pallidum samples: all Chinese samples cluster with SS14-like group of syphilis-causing treponemes.,"

Objective

Treponema pallidum subsp. pallidum (TPA) is the causative agent of syphilis. Genetic analyses of TPA reference strains and human clinical isolates have revealed two genetically distinct groups of syphilis-causing treponemes, called Nichols-like and SS14-like groups. So far, no genetic intermediates, i.e. strains containing a mixed pattern of Nichols-like and SS14-like genomic sequences, have been identified. Recently, Sun et al. (Oncotarget 2016. https://doi.org/10.18632/oncotarget.10154 ) described a new ""phylogenetic group"" (called Lineage 2) among Chinese TPA strains. This lineage exhibited a ""mosaic genomic structure"" of Nichols-like and SS14-like lineages.

Results

We reanalyzed the primary sequencing data (Project Number PRJNA305961) from the Sun et al. publication with respect to the molecular basis of Lineage 2. While Sun et al. based the analysis on several selected genomic single nucleotide variants (SNVs) and a subset of highly variable but phylogenetically poorly informative genes, which may confound the phylogenetic analysis, our reanalysis primarily focused on a complete set of whole genomic SNVs. Based on our reanalysis, only two separate TPA clusters were identified: one consisted of Nichols-like TPA strains, the other was formed by the SS14-like TPA strains, including all Chinese strains.",2018-01-11 +28447333,Efficacy and tolerability of lithium in treating acute mania in youth with bipolar disorder: protocol for a systematic review.,"

Background

Epidemiological, clinical, and high-risk studies have provided evidence that the peak period for onset of diagnosable episodes of mania and hypomania starts in mid-to-late adolescence. Moreover, clinically significant manic symptoms may occur even earlier, especially in children at familial risk. Lithium is the gold standard treatment for acute mania in adults, yet to our knowledge, there is no published systematic review assessing lithium treatment of mania in children or adolescents. This is a major gap in knowledge needed to inform clinical practice.

Aim

As a working group within the ISBD Task Force on Lithium Treatment ( http://www.isbd.org/active-task-forces ), our aim is to complete a systematic review of the efficacy, tolerability, and acceptability of lithium compared with placebo and other active drugs in treating mania in children and adolescents diagnosed with bipolar disorder.

Methods

We will include double- or single-blind randomized controlled trials in patients aged less than 18 years. No restrictions will be made by study publication date or language. Several electronic databases will be searched along with secondary sources such as bibliographies and trial registry websites for published and unpublished studies. Response rates to lithium compared with placebo or other active drugs will be the primary efficacy outcome. Primary tolerability and acceptability outcomes will be rates of serious adverse events and dropouts, respectively. Secondary outcomes will include rates of remission, severity of manic symptoms at different time points, and incidence of specific adverse events.

Discussion

Findings from this systematic review are critically needed to inform clinical practice. We should not generalize findings from adult studies, as children and adolescents are undergoing accelerated physiological and brain development. Therefore, efficacy, tolerability, and acceptability of lithium treatment of acute mania in children compared to adults may be very different. This systematic review has been registered in PROSPERO (CRD42017055675).",2017-06-13 +26484140,High-throughput whole-genome sequencing of E14 mouse embryonic stem cells.,"Mouse E14 embryonic stem cells (ESCs) are the most used ESC line, often employed for genome-wide studies involving next generation sequencing analysis [1-5]. More than 2 × 10 E9 sequences made on Illumina platform derived from the genome of E14 embryonic stem cells cultured in our laboratory were used to build a database of about 2.7 × 10 E6 single nucleotide variant [6]. The database was validated using other two sequencing datasets from other laboratory and high overlap was observed. The identified variants are enriched on intergenic regions, but several thousands reside on gene exons and regulatory regions, such as promoters, enhancers, splicing site and untranslated regions of RNA, thus indicating high probability of an important functional impact on the molecular biology of these cells. We created a new E14 genome assembly including the new identified variants and used it to map reads from next generation sequencing data generated in our laboratory or in others on E14 cell line. We observed an increase in the number of mapped reads of about 5%. CpG dinucleotide showed the higher variation frequency, probably because it could be a target of DNA methylation. Data were deposited in GEO datasets under reference GSM1283021 and here: http://epigenetics.hugef-research.org/data.php.",2014-11-07 +22135298,PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse.,"PhosphoSitePlus (http://www.phosphosite.org) is an open, comprehensive, manually curated and interactive resource for studying experimentally observed post-translational modifications, primarily of human and mouse proteins. It encompasses 1,30,000 non-redundant modification sites, primarily phosphorylation, ubiquitinylation and acetylation. The interface is designed for clarity and ease of navigation. From the home page, users can launch simple or complex searches and browse high-throughput data sets by disease, tissue or cell line. Searches can be restricted by specific treatments, protein types, domains, cellular components, disease, cell types, cell lines, tissue and sequences or motifs. A few clicks of the mouse will take users to substrate pages or protein pages with sites, sequences, domain diagrams and molecular visualization of side-chains known to be modified; to site pages with information about how the modified site relates to the functions of specific proteins and cellular processes and to curated information pages summarizing the details from one record. PyMOL and Chimera scripts that colorize reactive groups on residues that are modified can be downloaded. Features designed to facilitate proteomic analyses include downloads of modification sites, kinase-substrate data sets, sequence logo generators, a Cytoscape plugin and BioPAX download to enable pathway visualization of the kinase-substrate interactions in PhosphoSitePlus®.",2011-12-01 +29871886,Risk Factors and Outcomes of Rapid Correction of Severe Hyponatremia.,"BACKGROUND AND OBJECTIVES:Rapid correction of severe hyponatremia can result in serious neurologic complications, including osmotic demyelination. Few data exist on incidence and risk factors of rapid correction or osmotic demyelination. DESIGN, SETTING, PARTICIPANTS, & MEASUREMENTS:In a retrospective cohort of 1490 patients admitted with serum sodium <120 mEq/L to seven hospitals in the Geisinger Health System from 2001 to 2017, we examined the incidence and risk factors of rapid correction and osmotic demyelination. Rapid correction was defined as serum sodium increase of >8 mEq/L at 24 hours. Osmotic demyelination was determined by manual chart review of all available brain magnetic resonance imaging reports. RESULTS:Mean age was 66 years old (SD=15), 55% were women, and 67% had prior hyponatremia (last outpatient sodium <135 mEq/L). Median change in serum sodium at 24 hours was 6.8 mEq/L (interquartile range, 3.4-10.2), and 606 patients (41%) had rapid correction at 24 hours. Younger age, being a woman, schizophrenia, lower Charlson comorbidity index, lower presentation serum sodium, and urine sodium <30 mEq/L were associated with greater risk of rapid correction. Prior hyponatremia, outpatient aldosterone antagonist use, and treatment at an academic center were associated with lower risk of rapid correction. A total of 295 (20%) patients underwent brain magnetic resonance imaging on or after admission, with nine (0.6%) patients showing radiologic evidence of osmotic demyelination. Eight (0.5%) patients had incident osmotic demyelination, of whom five (63%) had beer potomania, five (63%) had hypokalemia, and seven (88%) had sodium increase >8 mEq/L over a 24-hour period before magnetic resonance imaging. Five patients with osmotic demyelination had apparent neurologic recovery. CONCLUSIONS:Among patients presenting with severe hyponatremia, rapid correction occurred in 41%; nearly all patients with incident osmotic demyelination had a documented episode of rapid correction. PODCAST:This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2018_06_05_CJASNPodcast_18_7_G.mp3.",2018-06-05 +25333826,Unified and isomer-specific NMR metabolomics database for the accurate analysis of (13)C-(1)H HSQC spectra.,"A new metabolomics database and query algorithm for the analysis of (13)C-(1)H HSQC spectra is introduced, which unifies NMR spectroscopic information on 555 metabolites from both the Biological Magnetic Resonance Data Bank (BMRB) and Human Metabolome Database (HMDB). The new database, termed Complex Mixture Analysis by NMR (COLMAR) (13)C-(1)H HSQC database, can be queried via an interactive, easy to use web interface at http://spin.ccic.ohio-state.edu/index.php/hsqc/index . Our new HSQC database separately treats slowly exchanging isomers that belong to the same metabolite, which permits improved query in cases where lowly populated isomers are below the HSQC detection limit. The performance of our new database and query web server compares favorably with the one of existing web servers, especially for spectra of samples of high complexity, including metabolite mixtures from the model organisms Drosophila melanogaster and Escherichia coli. For such samples, our web server has on average a 37% higher accuracy (true positive rate) and a 82% lower false positive rate, which makes it a useful tool for the rapid and accurate identification of metabolites from (13)C-(1)H HSQC spectra at natural abundance. This information can be combined and validated with NMR data from 2D TOCSY-type spectra that provide connectivity information not present in HSQC spectra.",2014-11-05 +22440904,Quantitative proteomics identifies vasopressin-responsive nuclear proteins in collecting duct cells.,"Vasopressin controls transport in the renal collecting duct, in part, by regulating transcription. This complex process, which can involve translocation and/or modification of transcriptional regulators, is not completely understood. Here, we applied a method for large-scale profiling of nuclear proteins to quantify vasopressin-induced changes in the nuclear proteome of cortical collecting duct (mpkCCD) cells. Using stable isotope labeling and tandem mass spectrometry, we quantified 3987 nuclear proteins and identified significant changes in the abundance of 65, including previously established targets of vasopressin signaling in the collecting duct. Vasopressin-induced changes in the abundance of the transcription factors JunB, Elf3, Gatad2b, and Hmbox1; transcriptional co-regulators Ctnnb1 (β-catenin) and Crebbp; subunits of the Mediator complex; E3 ubiquitin ligase Nedd4; nuclear transport regulator RanGap1; and several proteins associated with tight junctions and adherens junctions. Bioinformatic analysis showed that many of the quantified transcription factors have putative binding sites in the 5'-flanking regions of genes coding for the channel proteins Aqp2, Aqp3, Scnn1b (ENaCβ), and Scnn1g (ENaCγ), which are known targets of vasopressin. Immunoblotting demonstrated that the increase in β-catenin in nuclear fractions was accompanied by an even larger increase in its phosphorylated form (pSer552). The findings provide a new online database resource for nuclear proteomics (http://helixweb.nih.gov/ESBL/Database/mNPD/) and generate new hypotheses regarding vasopressin-mediated transcriptional regulation in the collecting duct.",2012-03-22 +29222529,Prediction of cassava protein interactome based on interolog method.,"Cassava is a starchy root crop whose role in food security becomes more significant nowadays. Together with the industrial uses for versatile purposes, demand for cassava starch is continuously growing. However, in-depth study to uncover the mystery of cellular regulation, especially the interaction between proteins, is lacking. To reduce the knowledge gap in protein-protein interaction (PPI), genome-scale PPI network of cassava was constructed using interolog-based method (MePPI-In, available at http://bml.sbi.kmutt.ac.th/ppi ). The network was constructed from the information of seven template plants. The MePPI-In included 90,173 interactions from 7,209 proteins. At least, 39 percent of the total predictions were found with supports from gene/protein expression data, while further co-expression analysis yielded 16 highly promising PPIs. In addition, domain-domain interaction information was employed to increase reliability of the network and guide the search for more groups of promising PPIs. Moreover, the topology and functional content of MePPI-In was similar to the networks of Arabidopsis and rice. The potential contribution of MePPI-In for various applications, such as protein-complex formation and prediction of protein function, was discussed and exemplified. The insights provided by our MePPI-In would hopefully enable us to pursue precise trait improvement in cassava.",2017-12-08 +30002593,"Systematics of the ant genus Proceratium Roger (Hymenoptera, Formicidae, Proceratiinae) in China - with descriptions of three new species based on micro-CT enhanced next-generation-morphology.","The genus Proceratium Roger, 1863 contains cryptic, subterranean ants that are seldom sampled and rare in natural history collections. Furthermore, most Proceratium specimens are extremely hairy and, due to their enlarged and curved gaster, often mounted suboptimally. As a consequence, the poorly observable physical characteristics of the material and its scarcity result in a rather challenging alpha taxonomy of this group. In this study, the taxonomy of the Chinese Proceratium fauna is reviewed and updated by combining examinations of traditional light microscopy with x-ray microtomography (micro-CT). Based on micro-CT scans of seven out of eight species, virtual 3D surface models were generated that permit in-depth comparative analyses of specimen morphology in order to overcome the difficulties to examine physical material of Proceratium. Eight Chinese species are recognized, of which three are newly described: Proceratium bruelheidei Staab, Xu & Hita Garcia, sp. n. and P. kepingmaisp. n. belong to the P. itoi clade and have been collected in the subtropical forests of southeast China, whereas P. shoheisp. n. belongs to the P. stictum clade and it is only known from a tropical forest of Yunnan Province. Proceratium nujiangense Xu, 2006 syn. n. is proposed as a junior synonym of P. zhaoi Xu, 2000. These taxonomic acts raise the number of known Chinese Proceratium species to eight. In order to integrate the new species into the existing taxonomic system and to facilitate identifications, an illustrated key to the worker caste of all Chinese species is provided, supplemented by species accounts with high-resolution montage images and still images of volume renderings of 3D models based on micro-CT. Moreover, cybertype datasets are provided for the new species, as well as digital datasets for the remaining species that include the raw micro-CT scan data, 3D surface models, 3D rotation videos, and all light photography and micro-CT still images. These datasets are available online (Dryad, Staab et al. 2018, http://dx.doi.org/10.5061/dryad.h6j0g4p).",2018-06-04 +27736832,Foodborne (1973-2013) and Waterborne (1971-2013) Disease Outbreaks - United States.,"CDC collects data on foodborne and waterborne disease outbreaks reported by all U.S. states and territories through the Foodborne Disease Outbreak Surveillance System (FDOSS) (http://www.cdc.gov/foodsafety/fdoss/surveillance/index.html) and the Waterborne Disease and Outbreak Surveillance System (WBDOSS) http://www.cdc.gov/healthywater/surveillance), respectively. These two systems are the primary source of national data describing the number of reported outbreaks; outbreak-associated illnesses, hospitalizations, and deaths; etiologic agents; water source or implicated foods; settings of exposure; and other factors associated with recognized foodborne and waterborne disease outbreaks in the United States.",2016-10-14 +29651980,Local Food Systems Food Safety Concerns. ,"Foodborne disease causes an estimated 48 million illnesses and 3,000 deaths annually (Scallan E, et al., Emerg Infect Dis 17:7-15, 2011), with U.S. economic costs estimated at $152 billion to $1.4 trillion annually (Roberts T, Am J Agric Econ 89:1183-1188, 2007; Scharff RL, http://www.pewtrusts.org/en/research-and-analysis/reports/0001/01/01/healthrelated-costs-from-foodborne-illness-in-the-united-states, 2010). An increasing number of these illnesses are associated with fresh fruits and vegetables. An analysis of outbreaks from 1990 to 2003 found that 12% of outbreaks and 20% of outbreak-related illnesses were associated with produce (Klein S, Smith DeWaal CS, Center for Science in the Public Interest, https://cspinet.org/sites/default/files/attachment/ddreport.pdf, June 2008; Lynch M, Tauxe R, Hedberg C, Epidemiol Infect 137:307-315, 2009). These food safety problems have resulted in various stakeholders recommending the shift to a more preventative and risk-based food safety system. A modern risk-based food safety system takes a farm-to-fork preventative approach to food safety and relies on the proactive collection and analysis of data to better understand potential hazards and risk factors, to design and evaluate interventions, and to prioritize prevention efforts. Such a system focuses limited resources at the points in the food system with the likelihood of having greatest benefit to public health. As shared kitchens, food hubs, and local food systems such as community supported agriculture are becoming more prevalent throughout the United States, so are foodborne illness outbreaks at these locations. At these locations, many with limited resources, food safety methods of prevention are rarely the main focus. This lack of focus on food safety knowledge is why a growing number of foodborne illness outbreaks are occurring at these locations.",2018-04-01 +29687979,Evaluating the Association between Artificial Light-at-Night Exposure and Breast and Prostate Cancer Risk in Spain (MCC-Spain Study).,"BACKGROUND:Night shift work, exposure to light at night (ALAN) and circadian disruption may increase the risk of hormone-dependent cancers. OBJECTIVES:We evaluated the association of exposure to ALAN during sleeping time with breast and prostate cancer in a population based multicase-control study (MCC-Spain), among subjects who had never worked at night. We evaluated chronotype, a characteristic that may relate to adaptation to light at night. METHODS:We enrolled 1,219 breast cancer cases, 1,385 female controls, 623 prostate cancer cases, and 879 male controls from 11 Spanish regions in 2008-2013. Indoor ALAN information was obtained through questionnaires. Outdoor ALAN was analyzed using images from the International Space Station (ISS) available for Barcelona and Madrid for 2012-2013, including data of remotely sensed upward light intensity and blue light spectrum information for each geocoded longest residence of each MCC-Spain subject. RESULTS:Among Barcelona and Madrid participants with information on both indoor and outdoor ALAN, exposure to outdoor ALAN in the blue light spectrum was associated with breast cancer [adjusted odds ratio (OR) for highest vs. lowest tertile, OR=1.47; 95% CI: 1.00, 2.17] and prostate cancer (OR=2.05; 95% CI: 1.38, 3.03). In contrast, those exposed to the highest versus lowest intensity of outdoor ALAN were more likely to be controls than cases, particularly for prostate cancer. Compared with those who reported sleeping in total darkness, men who slept in ""quite illuminated"" bedrooms had a higher risk of prostate cancer (OR=2.79; 95% CI: 1.55, 5.04), whereas women had a slightly lower risk of breast cancer (OR=0.77; 95% CI: 0.39, 1.51). CONCLUSION:Both prostate and breast cancer were associated with high estimated exposure to outdoor ALAN in the blue-enriched light spectrum. https://doi.org/10.1289/EHP1837.",2018-04-23 +26626277,Lack of integrase inhibitors associated resistance mutations among HIV-1C isolates.,"

Background

Although biochemical analysis of HIV-1 integrase enzyme suggested the use of integrase inhibitors (INIs) against HIV-1C, different viral subtypes may favor different mutational pathways potentially leading to varying levels of drug resistance. Thus, the aim of this study was to search for the occurrence and natural evolution of integrase polymorphisms and/or resistance mutations in HIV-1C Ethiopian clinical isolates prior to the introduction of INIs.

Methods

Plasma samples from chronically infected drug naïve patients (N = 45), of whom the PR and RT sequence was determined previously, were used to generate population based sequences of HIV-1 integrase. HIV-1 subtype was determined using the REGA HIV-1 subtyping tool. Resistance mutations were interpreted according to the Stanford HIV drug resistance database ( http://hivdb.stanford.edu ) and the updated International Antiviral Society (IAS)-USA mutation lists. Moreover, rates of polymorphisms in the current isolates were compared with South African and global HIV-1C isolates.

Results

All subjects were infected with HIV-1C concordant to the protease (PR) and reverse transcriptase (RT) regions. Neither major resistance-associated IN mutations (T66I/A/K, E92Q/G, T97A, Y143HCR, S147G, Q148H/R/K, and N155H) nor silent mutations known to change the genetic barrier were observed. Moreover, the DDE-catalytic motif (D64G/D116G/E152 K) and signature HHCC zinc-binding motifs at codon 12, 16, 40 and 43 were found to be highly conserved. However, compared to other South African subtype C isolates, the rate of polymorphism was variable at various positions.

Conclusion

Although the sample size is small, the findings suggest that this drug class could be effective in Ethiopia and other southern African countries where HIV-1C is predominantly circulating. The data will contribute to define the importance of integrase polymorphism and to improve resistance interpretation algorithms in HIV-1C isolates.",2015-12-01 +28652975,Draft genome sequence of Massilia sp. KIM isolated from South African grassland biome soils.,"Massilia sp. are aerobic, Gram-negative, rod-shaped bacteria that are found in air, water, and soils. Here we describe the draft genome sequence of Massilia sp. KIM, isolated from the South African grassland soils. The total length of the genome was estimated at 5.73 Mb, comprised of 17 contigs. The draft genome has been deposited in the DDBJ/EMBL/GenBank under the accession MVAD10000000 and is available for download at: https://www.ncbi.nlm.nih.gov/nuccore/MVAD00000000. Additionally, the raw short reads are available in the NCBI SRA database under the accession number: SRR5469241.",2017-06-10 +29854244,Combining mechanism-based prediction with patient-based profiling for psoriasis metabolomics biomarker discovery.,"Psoriasis is a chronic, debilitating skin condition that affects approximately 125 million individuals worldwide. The cause of psoriasis appears multifactorial, and no unified mitigating signal or single antigenic target has been identified to date. Metabolomic studies hold great potential for explaining disease mechanism, facilitating early diagnosis, and identifying potential therapeutic areas. Here, we present an integrated disease metabolomic biomarker discovery strategy that combines mechanism-based biomarker discovery with clinical sample-based metabolomic profiling. We applied this strategy in identifying and understanding metabolite biomarkers for psoriasis. The key innovation of our strategy is a novel mechanism-based metabolite prediction system, mmPredict, which assimilates vast amounts of existing knowledge of diseases and metabolites. mmPredict first constructed a psoriasis-specific mouse mutational phenotype profile. It then constructed phenotype profiles for a total of 259,170 chemicals/metabolites using known chemical genetics and human metabolomic data. Metabolites were then prioritized based on the phenotypic similarities between disease- and metabolites. We evaluated mmPredict using 150 metabolites identified using our in-house metabolome profiling study of psoriasis patient samples. mmPredict found 96 of the 150 metabolites and ranked them highly (recall: 0.64, mean ranking: 8.73%, median ranking: 2.33%, p-value: 4.75E-44). These results show that mmPredict is consistent with, as well as a complement to, traditional human metabolomic profiling studies. We then developed a strategy to combine outputs from both systems and found that the oxidative product of linoleic acid, 13(S)-hydroxy-9Z,11E-octadecadienoic acid (13- HODE), ranked highly by both mmPredict and our in-house experiments. Our integrated analysis indicates that 13- HODE may be a mechanistic link between psoriasis and cardiovascular comorbidities associated with psoriasis. In summary, we developed an integrated metabolomic prediction system that combines both human metabolomic studies and mechanism-based prediction and demonstrated its application in the skin disease psoriasis. Our system is highly general and can be applied to other diseases when patient-based metabolomic profiling data becomes more increasingly available. Data is publicly available at: http://nlp. CASE:edu/public/data/mmPredict_PSO.",2017-01-01 +24203703,"CottonGen: a genomics, genetics and breeding database for cotton research.","CottonGen (http://www.cottongen.org) is a curated and integrated web-based relational database providing access to publicly available genomic, genetic and breeding data for cotton. CottonGen supercedes CottonDB and the Cotton Marker Database, with enhanced tools for easier data sharing, mining, visualization and data retrieval of cotton research data. CottonGen contains annotated whole genome sequences, unigenes from expressed sequence tags (ESTs), markers, trait loci, genetic maps, genes, taxonomy, germplasm, publications and communication resources for the cotton community. Annotated whole genome sequences of Gossypium raimondii are available with aligned genetic markers and transcripts. These whole genome data can be accessed through genome pages, search tools and GBrowse, a popular genome browser. Most of the published cotton genetic maps can be viewed and compared using CMap, a comparative map viewer, and are searchable via map search tools. Search tools also exist for markers, quantitative trait loci (QTLs), germplasm, publications and trait evaluation data. CottonGen also provides online analysis tools such as NCBI BLAST and Batch BLAST.",2013-11-06 +26882539,An open access pilot freely sharing cancer genomic data from participants in Texas.,"Genomic data sharing in cancer has been restricted to aggregate or controlled-access initiatives to protect the privacy of research participants. By limiting access to these data, it has been argued that the autonomy of individuals who decide to participate in data sharing efforts has been superseded and the utility of the data as research and educational tools reduced. In a pilot Open Access (OA) project from the CPRIT-funded Texas Cancer Research Biobank, many Texas cancer patients were willing to openly share genomic data from tumor and normal matched pair specimens. For the first time, genetic data from 7 human cancer cases with matched normal are freely available without requirement for data use agreements nor any major restriction except that end users cannot attempt to re-identify the participants (http://txcrb.org/open.html).",2016-02-16 +29077811,Reactome enhanced pathway visualization.,"

Motivation

Reactome is a free, open-source, open-data, curated and peer-reviewed knowledge base of biomolecular pathways. Pathways are arranged in a hierarchical structure that largely corresponds to the GO biological process hierarchy, allowing the user to navigate from high level concepts like immune system to detailed pathway diagrams showing biomolecular events like membrane transport or phosphorylation. Here, we present new developments in the Reactome visualization system that facilitate navigation through the pathway hierarchy and enable efficient reuse of Reactome visualizations for users' own research presentations and publications.

Results

For the higher levels of the hierarchy, Reactome now provides scalable, interactive textbook-style diagrams in SVG format, which are also freely downloadable and editable. Repeated diagram elements like 'mitochondrion' or 'receptor' are available as a library of graphic elements. Detailed lower-level diagrams are now downloadable in editable PPTX format as sets of interconnected objects.

Availability and implementation

http://reactome.org.

Contact

fabregat@ebi.ac.uk or hhe@ebi.ac.uk.",2017-11-01 +27943406,Fast Genome-Wide QTL Association Mapping on Pedigree and Population Data.,"Since most analysis software for genome-wide association studies (GWAS) currently exploit only unrelated individuals, there is a need for efficient applications that can handle general pedigree data or mixtures of both population and pedigree data. Even datasets thought to consist of only unrelated individuals may include cryptic relationships that can lead to false positives if not discovered and controlled for. In addition, family designs possess compelling advantages. They are better equipped to detect rare variants, control for population stratification, and facilitate the study of parent-of-origin effects. Pedigrees selected for extreme trait values often segregate a single gene with strong effect. Finally, many pedigrees are available as an important legacy from the era of linkage analysis. Unfortunately, pedigree likelihoods are notoriously hard to compute. In this paper, we reexamine the computational bottlenecks and implement ultra-fast pedigree-based GWAS analysis. Kinship coefficients can either be based on explicitly provided pedigrees or automatically estimated from dense markers. Our strategy (a) works for random sample data, pedigree data, or a mix of both; (b) entails no loss of power; (c) allows for any number of covariate adjustments, including correction for population stratification; (d) allows for testing SNPs under additive, dominant, and recessive models; and (e) accommodates both univariate and multivariate quantitative traits. On a typical personal computer (six CPU cores at 2.67 GHz), analyzing a univariate HDL (high-density lipoprotein) trait from the San Antonio Family Heart Study (935,392 SNPs on 1,388 individuals in 124 pedigrees) takes less than 2 min and 1.5 GB of memory. Complete multivariate QTL analysis of the three time-points of the longitudinal HDL multivariate trait takes less than 5 min and 1.5 GB of memory. The algorithm is implemented as the Ped-GWAS Analysis (Option 29) in the Mendel statistical genetics package, which is freely available for Macintosh, Linux, and Windows platforms from http://genetics.ucla.edu/software/mendel.",2016-12-12 +28449110,veqtl-mapper: variance association mapping for molecular phenotypes.,"

Motivation

Genetic loci associated with the variance of phenotypic traits have been of recent interest as they can be signatures of genetic interactions, gene by environment interactions, parent of origin effects and canalization. We present a fast efficient tool to map loci affecting variance of gene expression and other molecular phenotypes in cis. Results: Applied to the publicly available Geuvadis gene expression dataset, we identify 816 loci associated with variance of gene expression using an additive model, and 32 showing differences in variance between homozygous and heterozygous alleles, signatures of parent of origin effects.

Availability and implementation

Documentation and links to source code and binaries for linux can be found at https://funpopgen.github.io/veqm/ .

Contact

andrew.brown@unige.ch.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +29415450,Design and Implementation of Cloud-Centric Configuration Repository for DIY IoT Applications. ,"The Do-It-Yourself (DIY) vision for the design of a smart and customizable IoT application demands the involvement of the general public in its development process. The general public lacks the technical knowledge for programming state-of-the-art prototyping and development kits. The latest IoT kits, for example, Raspberry Pi, are revolutionizing the DIY paradigm for IoT, and more than ever, a DIY intuitive programming interface is required to enable the masses to interact with and customize the behavior of remote IoT devices on the Internet. However, in most cases, these DIY toolkits store the resultant configuration data in local storage and, thus, cannot be accessed remotely. This paper presents the novel implementation of such a system, which not only enables the general public to customize the behavior of remote IoT devices through a visual interface, but also makes the configuration available everywhere and anytime by leveraging the power of cloud-based platforms. The interface enables the visualization of the resources exposed by remote embedded resources in the form of graphical virtual objects (VOs). These VOs are used to create the service design through simple operations like drag-and-drop and the setting of properties. The configuration created as a result is maintained as an XML document, which is ingested by the cloud platform, thus making it available to be used anywhere. We use the HTTP approach for the communication between the cloud and IoT toolbox and the cloud and real devices, but for communication between the toolbox and actual resources, CoAP is used. Finally, a smart home case study has been implemented and presented in order to assess the effectiveness of the proposed work.",2018-02-06 +24234444,CollecTF: a database of experimentally validated transcription factor-binding sites in Bacteria.,"The influx of high-throughput data and the need for complex models to describe the interaction of prokaryotic transcription factors (TF) with their target sites pose new challenges for TF-binding site databases. CollecTF (http://collectf.umbc.edu) compiles data on experimentally validated, naturally occurring TF-binding sites across the Bacteria domain, placing a strong emphasis on the transparency of the curation process, the quality and availability of the stored data and fully customizable access to its records. CollecTF integrates multiple sources of data automatically and openly, allowing users to dynamically redefine binding motifs and their experimental support base. Data quality and currency are fostered in CollecTF by adopting a sustainable model that encourages direct author submissions in combination with in-house validation and curation of published literature. CollecTF entries are periodically submitted to NCBI for integration into RefSeq complete genome records as link-out features, maximizing the visibility of the data and enriching the annotation of RefSeq files with regulatory information. Seeking to facilitate comparative genomics and machine-learning analyses of regulatory interactions, in its initial release CollecTF provides domain-wide coverage of two TF families (LexA and Fur), as well as extensive representation for a clinically important bacterial family, the Vibrionaceae.",2013-11-14 +28664706,Neuromarkers of Post-Traumatic Stress Disorder (PTSD) in a patient after bilateral hand amputation - ERP case study.,"Introduction. There is a lack in the worldwide literature of reports on the Neuromarkers of Post-Traumatic Stress Disorder (PTSD) in patients after bilateral hand amputation The aim of this study was to test a hypothesis regarding developing Post-Traumatic Stress Disorder (PTSD) in a patient after bilateral hand amputation with the use of Event Related Potentials (ERPs). On the basis of previous research, the amplitudes of P3 ERP components elicited in the cued GO/NOGO tasks have been chosen as candidates for neuromarkers of PTSD. Case study. A 24-year-old patient had undergone bilateral hand amputation 12 months previously. The patient was repeatedly operated on (he had undergone successful bilateral hand replantation) and despite the severity of the injuries, he recovered. However, the patient complained of flashbacks, anxiety and sleep difficulties. Specialist tests showed the presence of PTSD. The patient participated in the cued GO/NOGO task (Kropotov, 2009) with recording 19-channel EEG. P3 GO and NOGO waves in this task were found to be significantly smaller, in comparison to a group of healthy control subjects of the same age (N=23) taken from the HBI normative database (https://www.hbimed.com/). This observed pattern of ERP waves in the patient corresponds to the pattern found in PTSD patients. Conclusions. ERPs in a GO/NOGO task can be used in the assessment of the functional brain changes induced by chronic PTSD.",2017-06-08 +28613173,Large-Scale Crowdsourced Study for Tone-Mapped HDR Pictures.,"Measuring digital picture quality, as perceived by human observers, is increasingly important in many applications in which humans are the ultimate consumers of visual information. Standard dynamic range (SDR) images provide 8 b/color/pixel. High dynamic range (HDR) images, usually created from multiple exposures of the same scene, can provide 16 or 32 b/color/pixel, but need to be tonemapped to SDR for display on standard monitors. Multiexposure fusion (MEF) techniques bypass HDR creation by fusing an exposure stack directly to SDR images to achieve aesthetically pleasing luminance and color distributions. Many HDR and MEF databases have a relatively small number of images and human opinion scores, obtained under stringently controlled conditions, thereby limiting realistic viewing. Moreover, many of these databases are intended to compare tone-mapping algorithms, rather than being specialized for developing and comparing image quality assessment models. To overcome these challenges, we conducted a massively crowdsourced online subjective study. The primary contributions described in this paper are: 1) the new ESPL-LIVE HDR Image Database that we created containing diverse images obtained by tone-mapping operators and MEF algorithms, with and without post-processing; 2) a large-scale subjective study that we conducted using a crowdsourced platform to gather more than 300 000 opinion scores on 1811 images from over 5000 unique observers; and 3) a detailed study of the correlation performance of the state-of-the-art no-reference image quality assessment algorithms against human opinion scores of these images. The database is available at http://signal.ece.utexas.edu/%7Edebarati/HDRDatabase.zip.",2017-06-08 +28595649,Data-driven prediction of adverse drug reactions induced by drug-drug interactions.,"

Background

The expanded use of multiple drugs has increased the occurrence of adverse drug reactions (ADRs) induced by drug-drug interactions (DDIs). However, such reactions are typically not observed in clinical drug-development studies because most of them focus on single-drug therapies. ADR reporting systems collect information on adverse health effects caused by both single drugs and DDIs. A major challenge is to unambiguously identify the effects caused by DDIs and to attribute them to specific drug interactions. A computational method that provides prospective predictions of potential DDI-induced ADRs will help to identify and mitigate these adverse health effects.

Method

We hypothesize that drug-protein interactions can be used as independent variables in predicting ADRs. We constructed drug pair-protein interaction profiles for ~800 drugs using drug-protein interaction information in the public domain. We then constructed statistical models to score drug pairs for their potential to induce ADRs based on drug pair-protein interaction profiles.

Results

We used extensive clinical database information to construct categorical prediction models for drug pairs that are likely to induce ADRs via synergistic DDIs and showed that model performance deteriorated only slightly, with a moderate amount of false positives and false negatives in the training samples, as evaluated by our cross-validation analysis. The cross validation calculations showed an average prediction accuracy of 89% across 1,096 ADR models that captured the deleterious effects of synergistic DDIs. Because the models rely on drug-protein interactions, we made predictions for pairwise combinations of 764 drugs that are currently on the market and for which drug-protein interaction information is available. These predictions are publicly accessible at http://avoid-db.bhsai.org . We used the predictive models to analyze broader aspects of DDI-induced ADRs, showing that ~10% of all combinations have the potential to induce ADRs via DDIs. This allowed us to identify potential DDI-induced ADRs not yet clinically reported. The ability of the models to quantify adverse effects between drug classes also suggests that we may be able to select drug combinations that minimize the risk of ADRs.

Conclusion

Almost all information on DDI-induced ADRs is generated after drug approval. This situation poses significant health risks for vulnerable patient populations with comorbidities. To help mitigate the risks, we developed a robust probabilistic approach to prospectively predict DDI-induced ADRs. Based on this approach, we developed prediction models for 1,096 ADRs and used them to predict the propensity of all pairwise combinations of nearly 800 drugs to be associated with these ADRs via DDIs. We made the predictions publicly available via internet access.",2017-06-08 +28594829,"Mendel,MD: A user-friendly open-source web tool for analyzing WES and WGS in the diagnosis of patients with Mendelian disorders.","Whole exome and whole genome sequencing have both become widely adopted methods for investigating and diagnosing human Mendelian disorders. As pangenomic agnostic tests, they are capable of more accurate and agile diagnosis compared to traditional sequencing methods. This article describes new software called Mendel,MD, which combines multiple types of filter options and makes use of regularly updated databases to facilitate exome and genome annotation, the filtering process and the selection of candidate genes and variants for experimental validation and possible diagnosis. This tool offers a user-friendly interface, and leads clinicians through simple steps by limiting the number of candidates to achieve a final diagnosis of a medical genetics case. A useful innovation is the ""1-click"" method, which enables listing all the relevant variants in genes present at OMIM for perusal by clinicians. Mendel,MD was experimentally validated using clinical cases from the literature and was tested by students at the Universidade Federal de Minas Gerais, at GENE-Núcleo de Genética Médica in Brazil and at the Children's University Hospital in Dublin, Ireland. We show in this article how it can simplify and increase the speed of identifying the culprit mutation in each of the clinical cases that were received for further investigation. Mendel,MD proved to be a reliable web-based tool, being open-source and time efficient for identifying the culprit mutation in different clinical cases of patients with Mendelian Disorders. It is also freely accessible for academic users on the following URL: https://mendelmd.org.",2017-06-08 +25324316,"EzCatDB: the enzyme reaction database, 2015 update.","The EzCatDB database (http://ezcatdb.cbrc.jp/EzCatDB/) has emphasized manual classification of enzyme reactions from the viewpoints of enzyme active-site structures and their catalytic mechanisms based on literature information, amino acid sequences of enzymes (UniProtKB) and the corresponding tertiary structures from the Protein Data Bank (PDB). Reaction types such as hydrolysis, transfer, addition, elimination, isomerization, hydride transfer and electron transfer have been included in the reaction classification, RLCP. This database includes information related to ligand molecules on the enzyme structures in the PDB data, classified in terms of cofactors, substrates, products and intermediates, which are also necessary to elucidate the catalytic mechanisms. Recently, the database system was updated. The 3D structures of active sites for each PDB entry can be viewed using Jmol or Rasmol software. Moreover, sequence search systems of two types were developed for the EzCatDB database: EzCat-BLAST and EzCat-FORTE. EzCat-BLAST is suitable for quick searches, adopting the BLAST algorithm, whereas EzCat-FORTE is more suitable for detecting remote homologues, adopting the algorithm for FORTE protein structure prediction software. Another system, EzMetAct, is also available to searching for major active-site structures in EzCatDB, for which PDB-formatted queries can be searched.",2014-10-16 +29741556,"Eating at food outlets and leisure places and ""on the go"" is associated with less-healthy food choices than eating at home and in school in children: cross-sectional data from the UK National Diet and Nutrition Survey Rolling Program (2008-2014).","

Background

Where children eat has been linked to variations in diet quality, including the consumption of low-nutrient, energy-dense food, a recognized risk factor for obesity.

Objective

The aim of this study was to provide a comprehensive analysis of consumption patterns and nutritional intake by eating location in British children with the use of a nationally representative survey.

Design

Cross-sectional data from 4636 children (80,075 eating occasions) aged 1.5-18 y from the UK National Diet and Nutrition Survey Rolling Program (2008-2014) were analyzed. Eating locations were categorized as home, school, work, leisure places, food outlets, and ""on the go."" Foods were classified into core (considered important or acceptable within a healthy diet) and noncore (all other foods). Other variables included the percentage of meals eaten at home, sex, ethnicity, body mass index, income, frequency of eating out, takeaway meal consumption, alcohol consumption, and smoking.

Results

The main eating location across all age groups was at home (69-79% of eating occasions), with the highest energy intakes. One-third of children from the least-affluent families consumed ≤25% of meals at home. Eating more at home was associated with less sugar and takeaway food consumption. Eating occasions in leisure places, food outlets, and ""on the go"" combined increased with age, from 5% (1.5-3 y) to 7% (11-18 y), with higher energy intakes from noncore foods in these locations. The school environment was associated with higher intakes of core foods and reduced intakes of noncore foods in children aged 4-10 y who ate school-sourced foods.

Conclusions

Home and school eating are associated with better food choices, whereas other locations are associated with poor food choices. Effective, sustained initiatives targeted at behaviors and improving access to healthy foods in leisure centers and food outlets, including food sold to eat ""on the go,"" may improve food choices. Home remains an important target for intervention through family and nutrition education, outreach, and social marketing campaigns. This trial was registered with the ISRTCN registry (https://www.isrctn.com) as ISRCTN17261407.",2018-06-01 +22053086,Comprehensive survey and geometric classification of base triples in RNA structures.,"Base triples are recurrent clusters of three RNA nucleobases interacting edge-to-edge by hydrogen bonding. We find that the central base in almost all triples forms base pairs with the other two bases of the triple, providing a natural way to geometrically classify base triples. Given 12 geometric base pair families defined by the Leontis-Westhof nomenclature, combinatoric enumeration predicts 108 potential geometric base triple families. We searched representative atomic-resolution RNA 3D structures and found instances of 68 of the 108 predicted base triple families. Model building suggests that some of the remaining 40 families may be unlikely to form for steric reasons. We developed an on-line resource that provides exemplars of all base triples observed in the structure database and models for unobserved, predicted triples, grouped by triple family, as well as by three-base combination (http://rna.bgsu.edu/Triples). The classification helps to identify recurrent triple motifs that can substitute for each other while conserving RNA 3D structure, with applications in RNA 3D structure prediction and analysis of RNA sequence evolution.",2011-11-03 +24923822,MorusDB: a resource for mulberry genomics and genome biology. ,"Mulberry is an important cultivated plant that has received the attention of biologists interested in sericulture and plant-insect interaction. Morus notabilis, a wild mulberry species with a minimal chromosome number is an ideal material for whole-genome sequencing and assembly. The genome and transcriptome of M. notabilis were sequenced and analyzed. In this article, a web-based and open-access database, the Morus Genome Database (MorusDB), was developed to enable easy-to-access and data mining. The MorusDB provides an integrated data source and an easy accession of mulberry large-scale genomic sequencing and assembly, predicted genes and functional annotations, expressed sequence tags (ESTs), transposable elements (TEs), Gene Ontology (GO) terms, horizontal gene transfers between mulberry and silkworm and ortholog and paralog groups. Transcriptome sequencing data for M. notabilis root, leaf, bark, winter bud and male flower can also be searched and downloaded. Furthermore, MorusDB provides an analytical workbench with some built-in tools and pipelines, such as BLAST, Search GO, Mulberry GO and Mulberry GBrowse, to facilitate genomic studies and comparative genomics. The MorusDB provides important genomic resources for scientists working with mulberry and other Moraceae species, which include many important fruit crops. Designed as a basic platform and accompanied by the SilkDB, MorusDB strives to be a comprehensive platform for the silkworm-mulberry interaction studies. Database URL: http://morus.swu.edu.cn/morusdb.",2014-06-11 +28519717,Phytophthora cinnamomi.,"Phytophthora cinnamomi is one of the most devastating plant pathogens in the world. It infects close to 5000 species of plants, including many of importance in agriculture, forestry and horticulture. The inadvertent introduction of P. cinnamomi into natural ecosystems, including a number of recognized Global Biodiversity Hotspots, has had disastrous consequences for the environment and the biodiversity of flora and fauna. The genus Phytophthora belongs to the Class Oomycetes, a group of fungus-like organisms that initiate plant disease through the production of motile zoospores. Disease control is difficult in agricultural and forestry situations and even more challenging in natural ecosystems as a result of the scale of the problem and the limited range of effective chemical inhibitors. The development of sustainable control measures for the future management of P. cinnamomi requires a comprehensive understanding of the cellular and molecular basis of pathogen development and pathogenicity. The application of next-generation sequencing technologies to generate genomic and transcriptomic data promises to underpin a new era in P. cinnamomi research and discovery. The aim of this review is to integrate bioinformatic analyses of P. cinnamomi sequence data with current knowledge of the cellular and molecular basis of P. cinnamomi growth, development and plant infection. The goal is to provide a framework for future research by highlighting potential pathogenicity genes, shedding light on their possible functions and identifying suitable targets for future control measures.

Taxonomy

Phytophthora cinnamomi Rands; Kingdom Chromista; Phylum Oomycota or Pseudofungi; Class Oomycetes; Order Peronosporales; Family Peronosporaceae; genus Phytophthora.

Host range

Infects about 5000 species of plants, including 4000 Australian native species. Host plants important for agriculture and forestry include avocado, chestnut, macadamia, oak, peach and pineapple.

Disease symptoms

A root pathogen which causes rotting of fine and fibrous roots, but which can also cause stem cankers. Root damage may inhibit water movement from roots to shoots, leading to dieback of young shoots. USEFUL WEBSITES: http://fungidb.org/fungidb/; http://genome.jgi.doe.gov/Phyci1/Phyci1.home.html; http://www.ncbi.nlm.nih.gov/assembly/GCA_001314365.1; http://www.ncbi.nlm.nih.gov/assembly/GCA_001314505.1.",2017-08-22 +27876823,Alignment-free Transcriptomic and Metatranscriptomic Comparison Using Sequencing Signatures with Variable Length Markov Chains.,"The comparison between microbial sequencing data is critical to understand the dynamics of microbial communities. The alignment-based tools analyzing metagenomic datasets require reference sequences and read alignments. The available alignment-free dissimilarity approaches model the background sequences with Fixed Order Markov Chain (FOMC) yielding promising results for the comparison of microbial communities. However, in FOMC, the number of parameters grows exponentially with the increase of the order of Markov Chain (MC). Under a fixed high order of MC, the parameters might not be accurately estimated owing to the limitation of sequencing depth. In our study, we investigate an alternative to FOMC to model background sequences with the data-driven Variable Length Markov Chain (VLMC) in metatranscriptomic data. The VLMC originally designed for long sequences was extended to apply to high-throughput sequencing reads and the strategies to estimate the corresponding parameters were developed. The flexible number of parameters in VLMC avoids estimating the vast number of parameters of high-order MC under limited sequencing depth. Different from the manual selection in FOMC, VLMC determines the MC order adaptively. Several beta diversity measures based on VLMC were applied to compare the bacterial RNA-Seq and metatranscriptomic datasets. Experiments show that VLMC outperforms FOMC to model the background sequences in transcriptomic and metatranscriptomic samples. A software pipeline is available at https://d2vlmc.codeplex.com.",2016-11-23 +29971021,A Systematic Review of Amenable Resilience Factors That Moderate and/or Mediate the Relationship Between Childhood Adversity and Mental Health in Young People.,"Background: Up to half of Western children and adolescents experience at least one type of childhood adversity. Individuals with a history of childhood adversity have an increased risk of psychopathology. Resilience enhancing factors reduce the risk of psychopathology following childhood adversity. A comprehensive overview of empirically supported resilience factors is critically important for interventions aimed to increase resilience in young people. Moreover, such an overview may aid the development of novel resilience theories. Therefore, we conducted the first systematic review of social, emotional, cognitive and/or behavioral resilience factors after childhood adversity. Methods: We systematically searched Web of Science, PsycINFO, and Scopus (e.g., including MEDLINE) for English, Dutch, and German literature. We included cohort studies that examined whether a resilience factor was a moderator and/or a mediator for the relationship between childhood adversity and psychopathology in young people (mean age 13-24). Therefore, studies were included if the resilience factor was assessed prior to psychopathology, and childhood adversity was assessed no later than the resilience factor. Study data extraction was based on the STROBE report and study quality was assessed with an adapted version of Downs and Black's scale. The preregistered protocol can be found at: http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42016051978. Results: The search identified 1969 studies, of which 22 were included (eight nationalities, study sample n range: 59-6780). We found empirical support for 13 of 25 individual-level (e.g., high self-esteem, low rumination), six of 12 family-level (e.g., high family cohesion, high parental involvement), and one of five community-level resilience factors (i.e., high social support), to benefit mental health in young people exposed to childhood adversity. Single vs. multiple resilience factor models supported the notion that resilience factors should not be studied in isolation, and that interrelations between resilience factors should be taken into account when predicting psychopathology after childhood adversity. Conclusions: Interventions that improve individual, family, and/or social support resilience factors may reduce the risk of psychopathology following childhood adversity. Future research should scrutinize whether resilience factors function as a complex interrelated system that benefits mental health resilience after childhood adversity.",2018-06-19 +27295683,Data Management for Heterogeneous Genomic Datasets.,"Next Generation Sequencing (NGS), a family of technologies for reading DNA and RNA, is changing biological research, and will soon change medical practice, by quickly providing sequencing data and high-level features of numerous individual genomes in different biological and clinical conditions. The availability of millions of whole genome sequences may soon become the biggest and most important ""big data"" problem of mankind. In this exciting framework, we recently proposed a new paradigm to raise the level of abstraction in NGS data management, by introducing a GenoMetric Query Language (GMQL) and demonstrating its usefulness through several biological query examples. Leveraging on that effort, here we motivate and formalize GMQL operations, especially focusing on the most characteristic and domain-specific ones. Furthermore, we address their efficient implementation and illustrate the architecture of the new software system that we have developed for their execution on big genomic data in a cloud computing environment, providing the evaluation of its performance. The new system implementation is available for download at the GMQL website (http://www.bioinformatics.deib.polimi.it/GMQL/); GMQL can also be tested through a set of predefined queries on ENCODE and Roadmap Epigenomics data at http://www.bioinformatics.deib.polimi.it/GMQL/queries/.",2016-06-07 +25065645,The genome-scale metabolic network of Ectocarpus siliculosus (EctoGEM): a resource to study brown algal physiology and beyond.,"Brown algae (stramenopiles) are key players in intertidal ecosystems, and represent a source of biomass with several industrial applications. Ectocarpus siliculosus is a model to study the biology of these organisms. Its genome has been sequenced and a number of post-genomic tools have been implemented. Based on this knowledge, we report the reconstruction and analysis of a genome-scale metabolic network for E. siliculosus, EctoGEM (http://ectogem.irisa.fr). This atlas of metabolic pathways consists of 1866 reactions and 2020 metabolites, and its construction was performed by means of an integrative computational approach for identifying metabolic pathways, gap filling and manual refinement. The capability of the network to produce biomass was validated by flux balance analysis. EctoGEM enabled the reannotation of 56 genes within the E. siliculosus genome, and shed light on the evolution of metabolic processes. For example, E. siliculosus has the potential to produce phenylalanine and tyrosine from prephenate and arogenate, but does not possess a phenylalanine hydroxylase, as is found in other stramenopiles. It also possesses the complete eukaryote molybdenum co-factor biosynthesis pathway, as well as a second molybdopterin synthase that was most likely acquired via horizontal gene transfer from cyanobacteria by a common ancestor of stramenopiles. EctoGEM represents an evolving community resource to gain deeper understanding of the biology of brown algae and the diversification of physiological processes. The integrative computational method applied for its reconstruction will be valuable to set up similar approaches for other organisms distant from biological benchmark models.",2014-08-27 +22927906,Genometa--a fast and accurate classifier for short metagenomic shotgun reads.,"

Unlabelled

Metagenomic studies use high-throughput sequence data to investigate microbial communities in situ. However, considerable challenges remain in the analysis of these data, particularly with regard to speed and reliable analysis of microbial species as opposed to higher level taxa such as phyla. We here present Genometa, a computationally undemanding graphical user interface program that enables identification of bacterial species and gene content from datasets generated by inexpensive high-throughput short read sequencing technologies. Our approach was first verified on two simulated metagenomic short read datasets, detecting 100% and 94% of the bacterial species included with few false positives or false negatives. Subsequent comparative benchmarking analysis against three popular metagenomic algorithms on an Illumina human gut dataset revealed Genometa to attribute the most reads to bacteria at species level (i.e. including all strains of that species) and demonstrate similar or better accuracy than the other programs. Lastly, speed was demonstrated to be many times that of BLAST due to the use of modern short read aligners. Our method is highly accurate if bacteria in the sample are represented by genomes in the reference sequence but cannot find species absent from the reference. This method is one of the most user-friendly and resource efficient approaches and is thus feasible for rapidly analysing millions of short reads on a personal computer.

Availability

The Genometa program, a step by step tutorial and Java source code are freely available from http://genomics1.mh-hannover.de/genometa/ and on http://code.google.com/p/genometa/. This program has been tested on Ubuntu Linux and Windows XP/7.",2012-08-21 +27086506,HiQuant: Rapid Postquantification Analysis of Large-Scale MS-Generated Proteomics Data.,"Recent advances in mass-spectrometry-based proteomics are now facilitating ambitious large-scale investigations of the spatial and temporal dynamics of the proteome; however, the increasing size and complexity of these data sets is overwhelming current downstream computational methods, specifically those that support the postquantification analysis pipeline. Here we present HiQuant, a novel application that enables the design and execution of a postquantification workflow, including common data-processing steps, such as assay normalization and grouping, and experimental replicate quality control and statistical analysis. HiQuant also enables the interpretation of results generated from large-scale data sets by supporting interactive heatmap analysis and also the direct export to Cytoscape and Gephi, two leading network analysis platforms. HiQuant may be run via a user-friendly graphical interface and also supports complete one-touch automation via a command-line mode. We evaluate HiQuant's performance by analyzing a large-scale, complex interactome mapping data set and demonstrate a 200-fold improvement in the execution time over current methods. We also demonstrate HiQuant's general utility by analyzing proteome-wide quantification data generated from both a large-scale public tyrosine kinase siRNA knock-down study and an in-house investigation into the temporal dynamics of the KSR1 and KSR2 interactomes. Download HiQuant, sample data sets, and supporting documentation at http://hiquant.primesdb.eu .",2016-05-16 +27993952,Structural identifiability of equilibrium ligand-binding parameters.,"Understanding the interactions of proteins with their ligands requires knowledge of molecular properties, such as binding site affinities and the effects that binding at one site exerts on binding at other sites (cooperativity). These properties cannot be measured directly and are usually estimated by fitting binding data with models that contain these quantities as parameters. In this study, we present a general method for answering the critical question of whether these parameters are identifiable (i.e., whether their estimates are accurate and unique). In cases in which parameter estimates are not unique, our analysis provides insight into the fundamental causes of nonidentifiability. This approach can thus serve as a guide for the proper design and analysis of protein-ligand binding experiments. We show that the equilibrium total binding relation can be reduced to a conserved mathematical form for all models composed solely of bimolecular association reactions and to a related, conserved form for all models composed of arbitrary combinations of binding and conformational equilibria. This canonical mathematical structure implies a universal parameterization of the binding relation that is consistent with virtually any physically reasonable binding model, for proteins with any number of binding sites. Matrix algebraic methods are used to prove that these universal parameter sets are structurally identifiable (SI; i.e., identifiable under conditions of noiseless data). A general approach for assessing and understanding the factors governing practical identifiability (i.e., the identifiability under conditions of real, noisy data) of these SI parameter sets is presented in the companion paper by Middendorf and Aldrich (2017. J. Gen. Physiol. https://doi.org/10.1085/jgp.201611703).",2016-12-19 +27412091,VisualGraphX: interactive graph visualization within Galaxy.,"

Motivation

We developed VisualGraphX, a web-based, interactive visualization tool for large-scale graphs. Current graph visualization tools that follow the rich-internet paradigm lack an interactive and scalable visualization of graph-based data. VisualGraphX aims to provide a universal graph visualization tool that empowers the users to efficiently explore the data for themselves at a large scale. It is available as a visualization plugin for the Galaxy platform, such that VisualGraphX can be integrated into custom analysis pipelines.

Availability and implementation

VisualGraphX has been released as a visualization plugin for the Galaxy platform under AFL 3.0 and is available with instructions and application data at http://gitlab.com/comptrans/VisualGraphX/ CONTACT: bjoern.voss@ibvt.uni-stuttgart.de.",2016-07-13 +25313157,"Xenbase, the Xenopus model organism database; new virtualized system, data types and genomes.","Xenbase (http://www.xenbase.org), the Xenopus frog model organism database, integrates a wide variety of data from this biomedical model genus. Two closely related species are represented: the allotetraploid Xenopus laevis that is widely used for microinjection and tissue explant-based protocols, and the diploid Xenopus tropicalis which is used for genetics and gene targeting. The two species are extremely similar and protocols, reagents and results from each species are often interchangeable. Xenbase imports, indexes, curates and manages data from both species; all of which are mapped via unique IDs and can be queried in either a species-specific or species agnostic manner. All our services have now migrated to a private cloud to achieve better performance and reliability. We have added new content, including providing full support for morpholino reagents, used to inhibit mRNA translation or splicing and binding to regulatory microRNAs. New genomes assembled by the JGI for both species and are displayed in Gbrowse and are also available for searches using BLAST. Researchers can easily navigate from genome content to gene page reports, literature, experimental reagents and many other features using hyperlinks. Xenbase has also greatly expanded image content for figures published in papers describing Xenopus research via PubMedCentral.",2014-10-13 +28588237,TCM-Mesh: The database and analytical system for network pharmacology analysis for TCM preparations.,"With the advancement of systems biology research, we have already seen great progress in pharmacology studies, especially in network pharmacology. Network pharmacology has been proven to be effective for establishing the ""compounds-proteins/genes-diseases"" network, and revealing the regulation principles of small molecules in a high-throughput manner, thus would be very effective for the analysis of drug combinations, especially for TCM preparations. In this work, we have proposed the TCM-Mesh system, which records TCM-related information collected from various resources and could serve for network pharmacology analysis for TCM preparations in a high-throughput manner (http://mesh.tcm.microbioinformatics.org/). Currently, the database contains 6,235 herbs, 383,840 compounds, 14,298 genes, 6,204 diseases, 144,723 gene-disease associations, 3,440,231 pairs of gene interactions, 163,221 side effect records and 71 toxic records, and web-based software construct a network between herbs and treated diseases, which will help to understand the underlying mechanisms for TCM preparations at molecular levels. We have used 1,293 FDA-approved drugs, as well as compounds from an herbal material Panax ginseng and a patented drug Liuwei Dihuang Wan (LDW) for evaluating our database. By comparison of different databases, as well as checking against literature, we have demonstrated the completeness, effectiveness, and accuracy of our database.",2017-06-06 +27320805,Quality improvement advice.,"A resource aimed at supporting care professionals to undertake quality improvement projects has been added to the NHS Improvement website. The guide covers aspects of service improvement, from managing successful projects to using evidence-based improvement tools, measuring outcomes and disseminating results. Go to http://tinyurl.com/89vpmqj.",2012-03-01 +28780907,"Country of qualification is linked to doctors' General Medical Council performance assessment rate, but is it linked to their clinical competence?","Mehdizah and colleagues recently described the prevalence of General Medical Council regulatory performance assessments by doctors' country of primary medical qualification. This article has caused anger within the UK-international medical community because it identifies graduates of certain countries with significantly raised prevalence.The present article comments on evidence from published Royal College of General Practitioners' data that support these conclusions. However, in an increasingly international age of medical education, the ambiguity of attributions of qualifying from a certain country needs addressing. Some medical students of British nationality, for example, who fail to obtain a place at a UK medical school, train in medical schools abroad, and thus may be identified as international medical graduates.Please see related article: https://bmcmededuc.biomedcentral.com/articles/10.1186/s12909-017-0903-6 .",2017-08-07 +25399423,"dbSNO 2.0: a resource for exploring structural environment, functional and disease association and regulatory network of protein S-nitrosylation.","Given the increasing number of proteins reported to be regulated by S-nitrosylation (SNO), it is considered to act, in a manner analogous to phosphorylation, as a pleiotropic regulator that elicits dual effects to regulate diverse pathophysiological processes by altering protein function, stability, and conformation change in various cancers and human disorders. Due to its importance in regulating protein functions and cell signaling, dbSNO (http://dbSNO.mbc.nctu.edu.tw) is extended as a resource for exploring structural environment of SNO substrate sites and regulatory networks of S-nitrosylated proteins. An increasing interest in the structural environment of PTM substrate sites motivated us to map all manually curated SNO peptides (4165 SNO sites within 2277 proteins) to PDB protein entries by sequence identity, which provides the information of spatial amino acid composition, solvent-accessible surface area, spatially neighboring amino acids, and side chain orientation for 298 substrate cysteine residues. Additionally, the annotations of protein molecular functions, biological processes, functional domains and human diseases are integrated to explore the functional and disease associations for S-nitrosoproteome. In this update, users are allowed to search a group of interested proteins/genes and the system reconstructs the SNO regulatory network based on the information of metabolic pathways and protein-protein interactions. Most importantly, an endogenous yet pathophysiological S-nitrosoproteomic dataset from colorectal cancer patients was adopted to demonstrate that dbSNO could discover potential SNO proteins involving in the regulation of NO signaling for cancer pathways.",2014-11-15 +28405948,A De-Identification Pipeline for Ultrasound Medical Images in DICOM Format.,"Clinical data sharing between healthcare institutions, and between practitioners is often hindered by privacy protection requirements. This problem is critical in collaborative scenarios where data sharing is fundamental for establishing a workflow among parties. The anonymization of patient information burned in DICOM images requires elaborate processes somewhat more complex than simple de-identification of textual information. Usually, before sharing, there is a need for manual removal of specific areas containing sensitive information in the images. In this paper, we present a pipeline for ultrasound medical image de-identification, provided as a free anonymization REST service for medical image applications, and a Software-as-a-Service to streamline automatic de-identification of medical images, which is freely available for end-users. The proposed approach applies image processing functions and machine-learning models to bring about an automatic system to anonymize medical images. To perform character recognition, we evaluated several machine-learning models, being Convolutional Neural Networks (CNN) selected as the best approach. For accessing the system quality, 500 processed images were manually inspected showing an anonymization rate of 89.2%. The tool can be accessed at https://bioinformatics.ua.pt/dicom/anonymizer and it is available with the most recent version of Google Chrome, Mozilla Firefox and Safari. A Docker image containing the proposed service is also publicly available for the community.",2017-04-13 +24330312,Marmal-aid--a database for Infinium HumanMethylation450.,"

Background

DNA methylation is indispensible for normal human genome function. Currently there is an increasingly large number of DNA methylomic data being released in the public domain allowing for an opportunity to investigate the relationships between the DNA methylome, genome function, and human phenotypes. The Illumina450K is one of the most popular platforms for assessing DNA methylation with over 10,000 samples available in the public domain. However, accessing all this data requires downloading each individual experiment and due to inconsistent annotation, accessing the right data can be a challenge.

Description

Here we introduce 'Marmal-aid', the first standardised database for DNA methylation (freely available at http://marmal-aid.org). In Marmal-aid, the majority of publicly available Illumina HumanMethylation450 data is incorporated into a single repository allowing for re-processing of data including normalisation and imputation of missing values. The database is accessible in two ways: (1) Using an R package to allow for incorporation into existing analysis pipelines which can then be easily queried to gain insight into the functionality of certain CpG sites. This is aimed at a bioinformatician with experience in R. (2) Using a graphical interface allowing general biologists to query a pre-defined set of tissues (currently 15) providing a reference database of the methylation state in these tissues for the 450,000 CpG sites profiled by the Illumina HumanMethylation450.

Conclusion

Marmal-aid is the largest publicly available Illumina HumanMethylation450 methylation database combining Illumina HumanMethylation450 data from a number of sources into a single location with a single common annotation format. This allows for automated extraction using the R package and inclusion into existing analysis pipelines. Marmal-aid also provides a easy to use GUI to visualise methylation data in user defined genomic regions for various reference tissues.",2013-12-12 +26446144,The Contribution of National Spontaneous Reporting Systems to Detect Signals of Torsadogenicity: Issues Emerging from the ARITMO Project.,"

Introduction

Spontaneous reporting systems (SRSs) are pivotal for signal detection, especially for rare events with a high drug-attributable component, such as torsade de pointes (TdP). Use of different national SRSs is rarely attempted because of inherent difficulties, but should be considered on the assumption that rare events are diluted in international databases.

Objective

The aim was to describe TdP-related events associated with antipsychotics, H1-antihistamines and anti-infectives in three national SRSs (in Italy, Germany and France) and highlight potential signals of torsadogenicity through a combined literature evaluation.

Methods

A common search strategy was applied to extract TdP-related events: (1) TdP, (2) QT interval abnormalities, (3) ventricular fibrillation/tachycardia, and (4) sudden cardiac death. Signals of disproportionate reporting (SDRs) were calculated for TdP + QT interval abnormalities and defined by a lower limit of the 95 % confidence interval of the reporting odds ratio (ROR) >1. Among SDRs with at least three cases without concomitant pro-arrhythmic drugs, we defined potential new signal of torsadogenicity as drugs with no published evidence from (a) the crediblemeds(®) website ( http://www.crediblemeds.com , as of November 1st, 2014); (b) studies on the FDA Adverse Event Reporting System (FAERS); and (c) safety trials or pharmaco-epidemiological studies (as of December 16th, 2014).

Results

Overall, 3505 cases were retrieved (1372, 1468, and 801 for France, Germany and Italy, respectively). Antipsychotics were mainly recorded in Germany (792 cases), whereas antibiotics peaked at 515 and 491 (France and Italy, respectively). Forty-one drugs met criteria for SDRs in at least one single source, of which 31 were detected only from one single SRS: 18, ten and three (French, German and Italian SRS, respectively). By contrast, only five SDRs were detected in all national data sources (amisulpride, aripiprazole, haloperidol, olanzapine, risperidone). Overall, five potential new signals of torsadogenicity were identified: flupentixol, ganciclovir, levocetirizine, oxatomide and tiapride.

Conclusions

We found differences across and within national SRSs in the reporting of drug-induced TdP, which finally resulted in five potential new signals of torsadogenicity. These findings warrant targeted pharmacovigilance studies to formally assess the existence of actual drug-event associations.",2016-01-01 +26072509,IgRepertoireConstructor: a novel algorithm for antibody repertoire construction and immunoproteogenomics analysis.,"

Unlabelled

The analysis of concentrations of circulating antibodies in serum (antibody repertoire) is a fundamental, yet poorly studied, problem in immunoinformatics. The two current approaches to the analysis of antibody repertoires [next generation sequencing (NGS) and mass spectrometry (MS)] present difficult computational challenges since antibodies are not directly encoded in the germline but are extensively diversified by somatic recombination and hypermutations. Therefore, the protein database required for the interpretation of spectra from circulating antibodies is custom for each individual. Although such a database can be constructed via NGS, the reads generated by NGS are error-prone and even a single nucleotide error precludes identification of a peptide by the standard proteomics tools. Here, we present the IgRepertoireConstructor algorithm that performs error-correction of immunosequencing reads and uses mass spectra to validate the constructed antibody repertoires.

Availability and implementation

IgRepertoireConstructor is open source and freely available as a C++ and Python program running on all Unix-compatible platforms. The source code is available from http://bioinf.spbau.ru/igtools.

Contact

ppevzner@ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-01 +28583129,Co-expressed Pathways DataBase for Tomato: a database to predict pathways relevant to a query gene.,"

Background

Gene co-expression, the similarity of gene expression profiles under various experimental conditions, has been used as an indicator of functional relationships between genes, and many co-expression databases have been developed for predicting gene functions. These databases usually provide users with a co-expression network and a list of strongly co-expressed genes for a query gene. Several of these databases also provide functional information on a set of strongly co-expressed genes (i.e., provide biological processes and pathways that are enriched in these strongly co-expressed genes), which is generally analyzed via over-representation analysis (ORA). A limitation of this approach may be that users can predict gene functions only based on the strongly co-expressed genes.

Results

In this study, we developed a new co-expression database that enables users to predict the function of tomato genes from the results of functional enrichment analyses of co-expressed genes while considering the genes that are not strongly co-expressed. To achieve this, we used the ORA approach with several thresholds to select co-expressed genes, and performed gene set enrichment analysis (GSEA) applied to a ranked list of genes ordered by the co-expression degree. We found that internal correlation in pathways affected the significance levels of the enrichment analyses. Therefore, we introduced a new measure for evaluating the relationship between the gene and pathway, termed the percentile (p)-score, which enables users to predict functionally relevant pathways without being affected by the internal correlation in pathways. In addition, we evaluated our approaches using receiver operating characteristic curves, which concluded that the p-score could improve the performance of the ORA.

Conclusions

We developed a new database, named Co-expressed Pathways DataBase for Tomato, which is available at http://cox-path-db.kazusa.or.jp/tomato . The database allows users to predict pathways that are relevant to a query gene, which would help to infer gene functions.",2017-06-05 +28369168,ConKit: a python interface to contact predictions.,"

Summary

Recent advances in protein residue contact prediction algorithms have led to the emergence of many new methods and a variety of file formats. We present ConKit , an open source, modular and extensible Python interface which allows facile conversion between formats and provides an interface to analyses of sequence alignments and sets of contact predictions.

Availability and implementation

ConKit is available via the Python Package Index. The documentation can be found at http://www.conkit.org . ConKit is licensed under the BSD 3-Clause.

Contact

hlfsimko@liverpool.ac.uk or drigden@liverpool.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +24288377,Recent updates and developments to plant genome size databases.,"Two plant genome size databases have been recently updated and/or extended: the Plant DNA C-values database (http://data.kew.org/cvalues), and GSAD, the Genome Size in Asteraceae database (http://www.asteraceaegenomesize.com). While the first provides information on nuclear DNA contents across land plants and some algal groups, the second is focused on one of the largest and most economically important angiosperm families, Asteraceae. Genome size data have numerous applications: they can be used in comparative studies on genome evolution, or as a tool to appraise the cost of whole-genome sequencing programs. The growing interest in genome size and increasing rate of data accumulation has necessitated the continued update of these databases. Currently, the Plant DNA C-values database (Release 6.0, Dec. 2012) contains data for 8510 species, while GSAD has 1219 species (Release 2.0, June 2013), representing increases of 17 and 51%, respectively, in the number of species with genome size data, compared with previous releases. Here we provide overviews of the most recent releases of each database, and outline new features of GSAD. The latter include (i) a tool to visually compare genome size data between species, (ii) the option to export data and (iii) a webpage containing information about flow cytometry protocols.",2013-11-27 +27085803,ProViz-a web-based visualization tool to investigate the functional and evolutionary features of protein sequences.,"Low-throughput experiments and high-throughput proteomic and genomic analyses have created enormous quantities of data that can be used to explore protein function and evolution. The ability to consolidate these data into an informative and intuitive format is vital to our capacity to comprehend these distinct but complementary sources of information. However, existing tools to visualize protein-related data are restricted by their presentation, sources of information, functionality or accessibility. We introduce ProViz, a powerful browser-based tool to aid biologists in building hypotheses and designing experiments by simplifying the analysis of functional and evolutionary features of proteins. Feature information is retrieved in an automated manner from resources describing protein modular architecture, post-translational modification, structure, sequence variation and experimental characterization of functional regions. These features are mapped to evolutionary information from precomputed multiple sequence alignments. Data are displayed in an interactive and information-rich yet intuitive visualization, accessible through a simple protein search interface. This allows users with limited bioinformatic skills to rapidly access data pertinent to their research. Visualizations can be further customized with user-defined data either manually or using a REST API. ProViz is available at http://proviz.ucd.ie/.",2016-04-16 +24892985,NqA: an R-based algorithm for the normalization and analysis of microRNA quantitative real-time polymerase chain reaction data.,"In this note, we propose an R function named NqA (Normalization qPCR Array, where qPCR is quantitative real-time polymerase chain reaction) suitable for the identification of a set of microRNAs (miRNAs) to be used for data normalization in view of subsequent validation studies with qPCR data. NqA is available through the website of the Fondazione IRCCS Istituto Nazionale dei Tumori of Milan (http://www.istitutotumori.mi.it/modules.php?name=Content&pa=showpage&pid=812) with a dedicated user's guide. We applied our function on a qPCR dataset downloaded from the Gene Expression Omnibus (GEO) database. Results show that NqA provides a functional subset of reference miRNAs and a set of promising significantly modulated miRNAs for subsequent validation studies.",2014-06-02 +29048458,A probabilistic pathway score (PROPS) for classification with applications to inflammatory bowel disease.,"

Summary

Gene-based supervised machine learning classification models have been widely used to differentiate disease states, predict disease progression and determine effective treatment options. However, many of these classifiers are sensitive to noise and frequently do not replicate in external validation sets. For complex, heterogeneous diseases, these classifiers are further limited by being unable to capture varying combinations of genes that lead to the same phenotype. Pathway-based classification can overcome these challenges by using robust, aggregate features to represent biological mechanisms. In this work, we developed a novel pathway-based approach, PRObabilistic Pathway Score, which uses genes to calculate individualized pathway scores for classification. Unlike previous individualized pathway-based classification methods that use gene sets, we incorporate gene interactions using probabilistic graphical models to more accurately represent the underlying biology and achieve better performance. We apply our method to differentiate two similar complex diseases, ulcerative colitis (UC) and Crohn's disease (CD), which are the two main types of inflammatory bowel disease (IBD). Using five IBD datasets, we compare our method against four gene-based and four alternative pathway-based classifiers in distinguishing CD from UC. We demonstrate superior classification performance and provide biological insight into the top pathways separating CD from UC.

Availability and implementation

PROPS is available as a R package, which can be downloaded at http://simtk.org/home/props or on Bioconductor.

Contact

rbaltman@stanford.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-03-01 +25707592,Web resources for model organism studies.,"An ever-growing number of resources on model organisms have emerged with the continued development of sequencing technologies. In this paper, we review 13 databases of model organisms, most of which are reported by the National Institutes of Health of the United States (NIH; http://www.nih.gov/science/models/). We provide a brief description for each database, as well as detail its data source and types, functions, tools, and availability of access. In addition, we also provide a quality assessment about these databases. Significantly, the organism databases instituted in the early 1990s--such as the Mouse Genome Database (MGD), Saccharomyces Genome Database (SGD), and FlyBase--have developed into what are now comprehensive, core authority resources. Furthermore, all of the databases mentioned here update continually according to user feedback and with advancing technologies.",2015-02-20 +26454279,Fast and accurate non-sequential protein structure alignment using a new asymmetric linear sum assignment heuristic.,"

Motivation

The three dimensional tertiary structure of a protein at near atomic level resolution provides insight alluding to its function and evolution. As protein structure decides its functionality, similarity in structure usually implies similarity in function. As such, structure alignment techniques are often useful in the classifications of protein function. Given the rapidly growing rate of new, experimentally determined structures being made available from repositories such as the Protein Data Bank, fast and accurate computational structure comparison tools are required. This paper presents SPalignNS, a non-sequential protein structure alignment tool using a novel asymmetrical greedy search technique.

Results

The performance of SPalignNS was evaluated against existing sequential and non-sequential structure alignment methods by performing trials with commonly used datasets. These benchmark datasets used to gauge alignment accuracy include (i) 9538 pairwise alignments implied by the HOMSTRAD database of homologous proteins; (ii) a subset of 64 difficult alignments from set (i) that have low structure similarity; (iii) 199 pairwise alignments of proteins with similar structure but different topology; and (iv) a subset of 20 pairwise alignments from the RIPC set. SPalignNS is shown to achieve greater alignment accuracy (lower or comparable root-mean squared distance with increased structure overlap coverage) for all datasets, and the highest agreement with reference alignments from the challenging dataset (iv) above, when compared with both sequentially constrained alignments and other non-sequential alignments.

Availability and implementation

SPalignNS was implemented in C++. The source code, binary executable, and a web server version is freely available at: http://sparks-lab.org

Contact

yaoqi.zhou@griffith.edu.au.",2015-10-10 +28881976,Increasing the power of meta-analysis of genome-wide association studies to detect heterogeneous effects.,"

Motivation

Meta-analysis is essential to combine the results of genome-wide association studies (GWASs). Recent large-scale meta-analyses have combined studies of different ethnicities, environments and even studies of different related phenotypes. These differences between studies can manifest as effect size heterogeneity. We previously developed a modified random effects model (RE2) that can achieve higher power to detect heterogeneous effects than the commonly used fixed effects model (FE). However, RE2 cannot perform meta-analysis of correlated statistics, which are found in recent research designs, and the identified variants often overlap with those found by FE.

Results

Here, we propose RE2C, which increases the power of RE2 in two ways. First, we generalized the likelihood model to account for correlations of statistics to achieve optimal power, using an optimization technique based on spectral decomposition for efficient parameter estimation. Second, we designed a novel statistic to focus on the heterogeneous effects that FE cannot detect, thereby, increasing the power to identify new associations. We developed an efficient and accurate p -value approximation procedure using analytical decomposition of the statistic. In simulations, RE2C achieved a dramatic increase in power compared with the decoupling approach (71% vs. 21%) when the statistics were correlated. Even when the statistics are uncorrelated, RE2C achieves a modest increase in power. Applications to real genetic data supported the utility of RE2C. RE2C is highly efficient and can meta-analyze one hundred GWASs in one day.

Availability and implementation

The software is freely available at http://software.buhmhan.com/RE2C .

Contact

buhm.han@amc.seoul.kr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +27428829,HattCI: Fast and Accurate attC site Identification Using Hidden Markov Models.,"Integrons are genetic elements that facilitate the horizontal gene transfer in bacteria and are known to harbor genes associated with antibiotic resistance. The gene mobility in the integrons is governed by the presence of attC sites, which are 55 to 141-nucleotide-long imperfect inverted repeats. Here we present HattCI, a new method for fast and accurate identification of attC sites in large DNA data sets. The method is based on a generalized hidden Markov model that describes each core component of an attC site individually. Using twofold cross-validation experiments on a manually curated reference data set of 231 attC sites from class 1 and 2 integrons, HattCI showed high sensitivities of up to 91.9% while maintaining satisfactory false-positive rates. When applied to a metagenomic data set of 35 microbial communities from different environments, HattCI found a substantially higher number of attC sites in the samples that are known to contain more horizontally transferred elements. HattCI will significantly increase the ability to identify attC sites and thus integron-mediated genes in genomic and metagenomic data. HattCI is implemented in C and is freely available at http://bioinformatics.math.chalmers.se/HattCI .",2016-07-18 +28966993,Correlating Chemical Sensitivity with Low Level Activation of Mechanotransduction Pathways in Hematologic Malignancies.,"Large-scale screening has revealed that human hematopoietic cancer cell lines are generally more sensitive to various classes of drugs than cell lines established from solid tumors. A detailed examination of data in the Cancer Therapeutics Response Portal (http://portals.broadinstitute.org/ctrp/) suggests that this enhanced sensitivity is due to lower basal levels of activation of TAZ-TEAD mechanotransduction pathways in hematopoietic versus non-hematopoietic cells. Translation inhibitors such as omacetaxine mepesuccinate (homoharringtonine) fall into this category of hematopoietic-selective compounds. Moreover, additional molecular determinants of sensitivity suggest that homoharringtonine might show therapeutic efficacy in certain patients with advanced hematologic malignancies despite activation of these pathways.",2017-07-01 +28881989,Direct AUC optimization of regulatory motifs.,"

Motivation

The discovery of transcription factor binding site (TFBS) motifs is essential for untangling the complex mechanism of genetic variation under different developmental and environmental conditions. Among the huge amount of computational approaches for de novo identification of TFBS motifs, discriminative motif learning (DML) methods have been proven to be promising for harnessing the discovery power of accumulated huge amount of high-throughput binding data. However, they have to sacrifice accuracy for speed and could fail to fully utilize the information of the input sequences.

Results

We propose a novel algorithm called CDAUC for optimizing DML-learned motifs based on the area under the receiver-operating characteristic curve (AUC) criterion, which has been widely used in the literature to evaluate the significance of extracted motifs. We show that when the considered AUC loss function is optimized in a coordinate-wise manner, the cost function of each resultant sub-problem is a piece-wise constant function, whose optimal value can be found exactly and efficiently. Further, a key step of each iteration of CDAUC can be efficiently solved as a computational geometry problem. Experimental results on real world high-throughput datasets illustrate that CDAUC outperforms competing methods for refining DML motifs, while being one order of magnitude faster. Meanwhile, preliminary results also show that CDAUC may also be useful for improving the interpretability of convolutional kernels generated by the emerging deep learning approaches for predicting TF sequences specificities.

Availability and implementation

CDAUC is available at: https://drive.google.com/drive/folders/0BxOW5MtIZbJjNFpCeHlBVWJHeW8 .

Contact

dshuang@tongji.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +26662846,Systems Toxicology of Male Reproductive Development: Profiling 774 Chemicals for Molecular Targets and Adverse Outcomes.,"

Background

Trends in male reproductive health have been reported for increased rates of testicular germ cell tumors, low semen quality, cryptorchidism, and hypospadias, which have been associated with prenatal environmental chemical exposure based on human and animal studies.

Objective

In the present study we aimed to identify significant correlations between environmental chemicals, molecular targets, and adverse outcomes across a broad chemical landscape with emphasis on developmental toxicity of the male reproductive system.

Methods

We used U.S. EPA's animal study database (ToxRefDB) and a comprehensive literature analysis to identify 774 chemicals that have been evaluated for adverse effects on male reproductive parameters, and then used U.S. EPA's in vitro high-throughput screening (HTS) database (ToxCastDB) to profile their bioactivity across approximately 800 molecular and cellular features.

Results

A phenotypic hierarchy of testicular atrophy, sperm effects, tumors, and malformations, a composite resembling the human testicular dysgenesis syndrome (TDS) hypothesis, was observed in 281 chemicals. A subset of 54 chemicals with male developmental consequences had in vitro bioactivity on molecular targets that could be condensed into 156 gene annotations in a bipartite network.

Conclusion

Computational modeling of available in vivo and in vitro data for chemicals that produce adverse effects on male reproductive end points revealed a phenotypic hierarchy across animal studies consistent with the human TDS hypothesis. We confirmed the known role of estrogen and androgen signaling pathways in rodent TDS, and importantly, broadened the list of molecular targets to include retinoic acid signaling, vascular remodeling proteins, G-protein coupled receptors (GPCRs), and cytochrome P450s.

Citation

Leung MC, Phuong J, Baker NC, Sipes NS, Klinefelter GR, Martin MT, McLaurin KW, Setzer RW, Darney SP, Judson RS, Knudsen TB. 2016. Systems toxicology of male reproductive development: profiling 774 chemicals for molecular targets and adverse outcomes. Environ Health Perspect 124:1050-1061; http://dx.doi.org/10.1289/ehp.1510385.",2015-12-11 +26142185,JASSA: a comprehensive tool for prediction of SUMOylation sites and SIMs.,"

Motivation

Post-translational modification by the Small Ubiquitin-like Modifier (SUMO) proteins, a process termed SUMOylation, is involved in many fundamental cellular processes. SUMO proteins are conjugated to a protein substrate, creating an interface for the recruitment of cofactors harboring SUMO-interacting motifs (SIMs). Mapping both SUMO-conjugation sites and SIMs is required to study the functional consequence of SUMOylation. To define the best candidate sites for experimental validation we designed JASSA, a Joint Analyzer of SUMOylation site and SIMs.

Results

JASSA is a predictor that uses a scoring system based on a Position Frequency Matrix derived from the alignment of experimental SUMOylation sites or SIMs. Compared with existing web-tools, JASSA displays on par or better performances. Novel features were implemented towards a better evaluation of the prediction, including identification of database hits matching the query sequence and representation of candidate sites within the secondary structural elements and/or the 3D fold of the protein of interest, retrievable from deposited PDB files.

Availability and implementation

JASSA is freely accessible at http://www.jassa.fr/. Website is implemented in PHP and MySQL, with all major browsers supported.

Contact

guillaume.beauclair@inserm.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-02 +28334340,Meta-analytic framework for liquid association.,"

Motivation

Although coexpression analysis via pair-wise expression correlation is popularly used to elucidate gene-gene interactions at the whole-genome scale, many complicated multi-gene regulations require more advanced detection methods. Liquid association (LA) is a powerful tool to detect the dynamic correlation of two gene variables depending on the expression level of a third variable (LA scouting gene). LA detection from single transcriptomic study, however, is often unstable and not generalizable due to cohort bias, biological variation and limited sample size. With the rapid development of microarray and NGS technology, LA analysis combining multiple gene expression studies can provide more accurate and stable results.

Results

In this article, we proposed two meta-analytic approaches for LA analysis (MetaLA and MetaMLA) to combine multiple transcriptomic studies. To compensate demanding computing, we also proposed a two-step fast screening algorithm for more efficient genome-wide screening: bootstrap filtering and sign filtering. We applied the methods to five Saccharomyces cerevisiae datasets related to environmental changes. The fast screening algorithm reduced 98% of running time. When compared with single study analysis, MetaLA and MetaMLA provided stronger detection signal and more consistent and stable results. The top triplets are highly enriched in fundamental biological processes related to environmental changes. Our method can help biologists understand underlying regulatory mechanisms under different environmental exposure or disease states.

Availability and implementation

A MetaLA R package, data and code for this article are available at http://tsenglab.biostat.pitt.edu/software.htm.

Contact

ctseng@pitt.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +26484228,Whole transcriptome microarrays identify long non-coding RNAs associated with cardiac hypertrophy.,"Long non-coding RNAs (lncRNAs) have recently emerged as a novel group of non-coding RNAs able to regulate gene expression. While their role in cardiac disease is only starting to be understood, their involvement in cardiac hypertrophy is poorly known. We studied the association between lncRNAs and left ventricular hypertrophy using whole transcriptome microarrays. Wild-type mice and mice overexpressing the adenosine A2A receptor were subjected to transverse aortic constriction (TAC) to induce left ventricular hypertrophy. Expression profiles of lncRNAs in the heart were characterized using genome-wide microarrays. An analytical pipeline was specifically developed to extract lncRNA data from microarrays. We identified 2 lncRNAs up-regulated and 3 lncRNAs down-regulated in the hearts of A2A-receptor overexpressing-mice subjected to TAC compared to wild-type mice. Differential expression of these 2 lncRNAs was validated by quantitative PCR. Complete microarray dataset is available at Gene Expression Omnibus (GEO) database (http://www.ncbi.nlm.nih.gov/geo/) under the accession number GSE45423. Here, we describe in details the experimental design, microarray performance and analysis.",2015-05-29 +28881993,Genomes as documents of evolutionary history: a probabilistic macrosynteny model for the reconstruction of ancestral genomes.,"

Motivation

It has been argued that whole-genome duplication (WGD) exerted a profound influence on the course of evolution. For the purpose of fully understanding the impact of WGD, several formal algorithms have been developed for reconstructing pre-WGD gene order in yeast and plant. However, to the best of our knowledge, those algorithms have never been successfully applied to WGD events in teleost and vertebrate, impeded by extensive gene shuffling and gene losses.

Results

Here, we present a probabilistic model of macrosynteny (i.e. conserved linkage or chromosome-scale distribution of orthologs), develop a variational Bayes algorithm for inferring the structure of pre-WGD genomes, and study estimation accuracy by simulation. Then, by applying the method to the teleost WGD, we demonstrate effectiveness of the algorithm in a situation where gene-order reconstruction algorithms perform relatively poorly due to a high rate of rearrangement and extensive gene losses. Our high-resolution reconstruction reveals previously overlooked small-scale rearrangements, necessitating a revision to previous views on genome structure evolution in teleost and vertebrate.

Conclusions

We have reconstructed the structure of a pre-WGD genome by employing a variational Bayes approach that was originally developed for inferring topics from millions of text documents. Interestingly, comparison of the macrosynteny and topic model algorithms suggests that macrosynteny can be regarded as documents on ancestral genome structure. From this perspective, the present study would seem to provide a textbook example of the prevalent metaphor that genomes are documents of evolutionary history.

Availability and implementation

The analysis data are available for download at http://www.gen.tcd.ie/molevol/supp_data/MacrosyntenyTGD.zip , and the software written in Java is available upon request.

Contact

yoichiro.nakatani@tcd.ie or aoife.mclysaght@tcd.ie.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +30321008,[Breast cancer screening : a tool for informed decision making].,"

Introduction

Our population has an intuitive approach of the screening and is mostly enthusiastic to participate. The related inquiries on breast cancer screening shows that most women misunderstood this advantages as well as disadvantages. Consequently, the Kenniscentrum (Brussels) started a study in order to present those in neutral messages.

Methodology

This aims allowing women to make an informed decision. These messages are obviously based on the best methodologies applied on Belgian data. The content of these messages is especially based on the IPDAS criteria (international Patient Decision Aid Standards).

Results

Three visual graphical presentations including these messages were designed for four age brackets (40-49 y.,50-59 y.,60-69 y., 70-79 y.). The first visual presents the burden of breast cancer among other causes of death. The second is related to the impact of screening or no screening on detection and on mortality, this one computed over the ten years after the screening. The third visual shows consequences (positive or negative result, eventual additional investigations needed) of each mammography at short or mid-term.

Discussion

This tool can be used in the context of informed decision or shared decision making. This tool is in the public domain and can be downloaded, in French or Dutch, on KCE (https://kce.fgov.be, tab breast).",2018-01-01 +29309522,Individual and Population Trajectories of Influenza Antibody Titers Over Multiple Seasons in a Tropical Country.,"Seasonal influenza epidemics occur year-round in the tropics, complicating the planning of vaccination programs. We built an individual-level longitudinal model of baseline antibody levels, time of infection, and the subsequent rise and decay of antibodies postinfection using influenza A(H1N1)pdm09 data from 2 sources in Singapore: 1) a noncommunity cohort with real-time polymerase chain reaction-confirmed infections and at least 1 serological sample collected from each participant between May and October 2009 (n = 118) and 2) a community cohort with up to 6 serological samples collected between May 2009 and October 2010 (n = 760). The model was hierarchical, to account for interval censoring and interindividual variation. Model parameters were estimated via a reversible jump Markov chain Monte Carlo algorithm using custom-designed R (https://www.r-project.org/) and C++ (https://isocpp.org/) code. After infection, antibody levels peaked at 4-7 weeks, with a half-life of 26.5 weeks, followed by a slower decrease up to 1 year to approximately preinfection levels. After the third wave, the seropositivity rate and the population-level antibody titer dropped to the same level as they were at the end of the first pandemic wave. The results of this analysis are consistent with the hypothesis that the population-level effect of individuals' waxing and waning antibodies influences influenza seasonality in the tropics.",2018-01-01 +28666322,SVM-dependent pairwise HMM: an application to protein pairwise alignments.,"

Motivation

Methods able to provide reliable protein alignments are crucial for many bioinformatics applications. In the last years many different algorithms have been developed and various kinds of information, from sequence conservation to secondary structure, have been used to improve the alignment performances. This is especially relevant for proteins with highly divergent sequences. However, recent works suggest that different features may have different importance in diverse protein classes and it would be an advantage to have more customizable approaches, capable to deal with different alignment definitions.

Results

Here we present Rigapollo, a highly flexible pairwise alignment method based on a pairwise HMM-SVM that can use any type of information to build alignments. Rigapollo lets the user decide the optimal features to align their protein class of interest. It outperforms current state of the art methods on two well-known benchmark datasets when aligning highly divergent sequences.

Availability and implementation

A Python implementation of the algorithm is available at http://ibsquare.be/rigapollo.

Contact

wim.vranken@vub.be.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +29028923,Detecting presence of mutational signatures in cancer with confidence.,"MOTIVATION:Cancers arise as the result of somatically acquired changes in the DNA of cancer cells. However, in addition to the mutations that confer a growth advantage, cancer genomes accumulate a large number of somatic mutations resulting from normal DNA damage and repair processes as well as carcinogenic exposures or cancer related aberrations of DNA maintenance machinery. These mutagenic processes often produce characteristic mutational patterns called mutational signatures. The decomposition of a cancer genome's mutation catalog into mutations consistent with such signatures can provide valuable information about cancer etiology. However, the results from different decomposition methods are not always consistent. Hence, one needs to be able to not only decompose a patient's mutational profile into signatures but also establish the accuracy of such decomposition. RESULTS:We proposed two complementary ways of measuring confidence and stability of decomposition results and applied them to analyze mutational signatures in breast cancer genomes. We identified both very stable and highly unstable signatures, as well as signatures that previously have not been associated with breast cancer. We also provided additional support for the novel signatures. Our results emphasize the importance of assessing the confidence and stability of inferred signature contributions. AVAILABILITY AND IMPLEMENTATION:All tools developed in this paper have been implemented in an R package, called SignatureEstimation, which is available from https://www.ncbi.nlm.nih.gov/CBBresearch/Przytycka/index.cgi\#signatureestimation. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2018-01-01 +29360599,An Empirically Derived Pediatric Cardiac Inotrope Score Associated With Pediatric Heart Surgery.,"We aimed to empirically derive an inotrope score to predict real-time outcomes using the doses of inotropes after pediatric cardiac surgery. The outcomes evaluated included in-hospital mortality, prolonged hospital length of stay, and composite poor outcome (mortality or prolonged hospital length of stay). The study population included patients <18 years of age undergoing heart operations (with or without cardiopulmonary bypass) of varying complexity. To create this novel pediatric cardiac inotrope score (PCIS), we collected the data on the highest doses of 4 commonly used inotropes (epinephrine, norepinephrine, dopamine, and milrinone) in the first 24 hours after heart operation. We employed a hierarchical framework by representing discrete probability models with continuous latent variables that depended on the dosage of drugs for a particular patient. We used Bayesian conditional probit regression to model the effects of the inotropes on the mean of the latent variables. We then used Markov chain Monte Carlo simulations for simulating posterior samples to create a score function for each of the study outcomes. The training dataset utilized 1030 patients to make the scientific model. An online calculator for the tool can be accessed at https://soipredictiontool.shinyapps.io/InotropeScoreApp. The newly proposed empiric PCIS demonstrated a high degree of discrimination for predicting study outcomes in children undergoing heart operations. The newly proposed empiric PCIS provides a novel measure to predict real-time outcomes using the doses of inotropes among children undergoing heart operations of varying complexity.",2018-01-31 +24369151,Phylogeny-based classification of microbial communities.,"

Motivation

Next-generation sequencing coupled with metagenomics has led to the rapid growth of sequence databases and enabled a new branch of microbiology called comparative metagenomics. Comparative metagenomic analysis studies compositional patterns within and between different environments providing a deep insight into the structure and function of complex microbial communities. It is a fast growing field that requires the development of novel supervised learning techniques for addressing challenges associated with metagenomic data, e.g. sensitivity to the choice of sequence similarity cutoff used to define operational taxonomic units (OTUs), high dimensionality and sparsity of the data and so forth. On the other hand, the natural properties of microbial community data may provide useful information about the structure of the data. For example, similarity between species encoded by a phylogenetic tree captures the relationship between OTUs and may be useful for the analysis of complex microbial datasets where the diversity patterns comprise features at multiple taxonomic levels. Even though some of the challenges have been addressed by learning algorithms in the literature, none of the available methods take advantage of the inherent properties of metagenomic data.

Results

We proposed a novel supervised classification method for microbial community samples, where each sample is represented as a set of OTU frequencies, which takes advantage of the natural structure in microbial community data encoded by a phylogenetic tree. This model allows us to take advantage of environment-specific compositional patterns that may contain features at multiple granularity levels. Our method is based on the multinomial logistic regression model with a tree-guided penalty function. Additionally, we proposed a new simulation framework for generating 16S ribosomal RNA gene read counts that may be useful in comparative metagenomics research. Our experimental results on simulated and real data show that the phylogenetic information used in our method improves the classification accuracy.

Availability and implementation

http://www.cs.ucr.edu/~tanaseio/metaphyl.htm.",2013-12-24 +26315811,A quantitative approach to polar organic reactivity.,"A method is presented which allows one to predict toxic effects which are triggered by the formation of covalent bonds between electron-deficient (electrophilic) compounds and biological electron-rich (nucleophilic) targets, as proteins or nucleic acids. It is based on our comprehensive nucleophilicity and electrophilicity scales, which we constructed as an aid for the planning of organic syntheses. For the construction of these scales, rate constants for the reactions of benzhydrylium ions (aryl2CH(+)) and structurally related quinone methides with nucleophiles have been measured and correlated by the equation lg k(20 °C) = sN(E + N), which yields absolute rate constants k (L mol(-1) s(-1)) from one parameter for electrophiles (the electrophilicity E) and two for nucleophiles (the nucleophilicity parameter N and the susceptibility sN). A freely accessible database (http://www.cup.uni-muenchen.de/oc/mayr/DBintro.html) is described, which presently comprises data for 1000 nucleophiles and 260 electrophiles and provides links to the original literature reports. The kinetic scales are complemented by a thermodynamic counterpart, which enables one to calculate association constants K (L mol(-1)) of electrophiles with nucleophiles from the empirical Lewis acidity parameters LA and Lewis basicity parameters LB by the equation lg K (20°C) = LA + LB.",2015-07-01 +,"SPECIES: MAP: : a web-based application for visualizing the overlap of distributions and pollution events, with a list of fishes put at risk by the 2010 Gulf of Mexico oil spill","The 2010 Gulf of Mexico oil spill was the largest in history outside of warfare and because the spill occurred in the deep sea, its impact on the biota will be difficult to assess. To help address this problem we have created SPECIES: MAP: ( http://speciesmap.org ), a web-based application (web app) that allows a user to synthesize data on the oil spill with distributional records and other information on marine species. We have combined satellite image data collected over the course of the oil spill with locality data from historical collection records of fish species in a geographic information system. In doing so, we have created maps to assess which species were potentially in the region of the spill and to what degree their range was exposed to pollution. To evaluate the impact of the spill, we examined and categorized various levels of overlap between the observed surface range of the 2010 spill with collections records for 124 fish species including all 77 endemic to the Gulf of Mexico. More than half of all species examined (including more than half of all endemics) were found to have population records in the region of the spill. SPECIES: MAP: contains interaction maps for all the species examined and these data can be used to target post-spill collections, to evaluate changes in habitat, and to discover extirpations or extinctions in response to environmental disturbances.",2012-06-01 +25086704,Integrating biological pathways and genomic profiles with ChiBE 2.,"

Background

Dynamic visual exploration of detailed pathway information can help researchers digest and interpret complex mechanisms and genomic datasets.

Results

ChiBE is a free, open-source software tool for visualizing, querying, and analyzing human biological pathways in BioPAX format. The recently released version 2 can search for neighborhoods, paths between molecules, and common regulators/targets of molecules, on large integrated cellular networks in the Pathway Commons database as well as in local BioPAX models. Resulting networks can be automatically laid out for visualization using a graphically rich, process-centric notation. Profiling data from the cBioPortal for Cancer Genomics and expression data from the Gene Expression Omnibus can be overlaid on these networks.

Conclusions

ChiBE's new capabilities are organized around a genomics-oriented workflow and offer a unique comprehensive pathway analysis solution for genomics researchers. The software is freely available at http://code.google.com/p/chibe.",2014-08-03 +27754999,Reciprocal Causation Between Functional Independence and Mental Health 1 and 2 Years After Traumatic Brain Injury: A Cross-Lagged Panel Structural Equation Model.,"

Objective

The research attempting to disentangle the directionality of relationships between mental health and functional outcomes after traumatic brain injury (TBI) is growing but has yielded equivocal findings or focused on isolated predictors or isolated outcomes. The purpose of the current study was to use cross-lagged panel and structural equation modeling (SEM) techniques to examine causality between comprehensive indices of mental health (depression, anxiety, and life satisfaction) and functional independence in a national sample of individuals with TBI over the first 2 years after injury.

Design

Participants were 4,674 individuals with TBI from the TBI Model Systems Database.

Results

The SEM, which yielded good fit indices, suggested that individuals with TBI with greater mental health problems at 1 and 2 years after injury had lower functional independence at those same time points. The standardized path loadings for mental health problems and for functional independence over time were large, suggesting a high degree of consistency in mental health and functional independence across 1 and 2 years. In terms of cross-lag, mental health at Time 1 did not exert a unique effect on functional independence at Time 2, but functional independence at Time 1 exerted a statistically significant but quite small unique effect on mental health at Time 2.

Conclusions

This combination of results suggests that functional independence is only slightly more causal than mental health in the relationship between mental health and functional independence over the first 2 years post-TBI, and that instead, reciprocal causality is a more likely scenario.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) understand the nature of the relationship between mental health problems and functional independence after traumatic brain injury; (2) learn about a novel methodological technique for examining the connections between variables over time; and (3) understand when ongoing support for individuals with traumatic brain injury is necessary.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this activity for a maximum of 1.5 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2017-06-01 +28432734,"Monogenic diabetes syndromes: Locus-specific databases for Alström, Wolfram, and Thiamine-responsive megaloblastic anemia.","We developed a variant database for diabetes syndrome genes, using the Leiden Open Variation Database platform, containing observed phenotypes matched to the genetic variations. We populated it with 628 published disease-associated variants (December 2016) for: WFS1 (n = 309), CISD2 (n = 3), ALMS1 (n = 268), and SLC19A2 (n = 48) for Wolfram type 1, Wolfram type 2, Alström, and Thiamine-responsive megaloblastic anemia syndromes, respectively; and included 23 previously unpublished novel germline variants in WFS1 and 17 variants in ALMS1. We then investigated genotype-phenotype relations for the WFS1 gene. The presence of biallelic loss-of-function variants predicted Wolfram syndrome defined by insulin-dependent diabetes and optic atrophy, with a sensitivity of 79% (95% CI 75%-83%) and specificity of 92% (83%-97%). The presence of minor loss-of-function variants in WFS1 predicted isolated diabetes, isolated deafness, or isolated congenital cataracts without development of the full syndrome (sensitivity 100% [93%-100%]; specificity 78% [73%-82%]). The ability to provide a prognostic prediction based on genotype will lead to improvements in patient care and counseling. The development of the database as a repository for monogenic diabetes gene variants will allow prognostic predictions for other diabetes syndromes as next-generation sequencing expands the repertoire of genotypes and phenotypes. The database is publicly available online at https://lovd.euro-wabb.org.",2017-06-01 +29578323,House Dust Endotoxin Association with Chronic Bronchitis and Emphysema.,"BACKGROUND:Endotoxin has been reported to be associated with chronic bronchitis or emphysema (CBE) at high occupational exposures. However, whether levels found in domestic environments have similar effects is unknown. OBJECTIVES:We aimed to study the association between house dust endotoxin and CBE in a sample representative of the U.S. population. METHODS:We analyzed data from 3,393 participants ≥20 y old from the National Health and Nutrition Examination Survey (NHANES) 2005-2006. House dust from bedding and from bedroom floors was analyzed for endotoxin content. NHANES participants received questionnaires and underwent examination as well as extensive laboratory testing. Logistic regression was used to examine the association of endotoxin levels with CBE diagnosis and symptoms, adjusting for covariates. The survey design and weights were applied so that estimates were nationally representative and so that statistical inferences were made appropriately. RESULTS:The median endotoxin concentration in house dust was 14.61 EU/mg dust, and CBE was reported by 8.2% of participants. In the adjusted analysis, one unit (EU/mg) increase in log10-transformed endotoxin concentrations was associated with a 27% increase in the odds of CBE diagnosis [OR=1.27 (95% CI: 1.00, 1.61)] and a 78% increase in the odds of chronic bronchitis symptoms (defined as cough and phlegm for ≥3 mo in a year for ≥2 y) [OR=1.78 (95% CI: 1.01, 3.12)]. Sensitization to inhalant allergens (p=0.001) modified the relationship between endotoxin and CBE diagnosis, with stronger associations observed in sensitized participants [OR=2.46 (95% CI: 1.72, 3.50) for a unit increase in log10-endotoxin]. CONCLUSIONS:In a population-based sample of U.S. adults, endotoxin levels in homes were associated with a self-reported history of CBE diagnosis and chronic bronchitis symptoms, with stronger associations among people sensitized to inhalant allergens. https://doi.org/10.1289/EHP2452.",2018-03-23 +28988138,MouseTox: An online toxicity assessment tool for small molecules through Enalos Cloud platform.,"Advances in the drug discovery research substantially depend on in silico methods and techniques that capitalize on experimental data to enable the accurate property/activity assessment by employing a variety of computational techniques. These in silico tools can significantly reduce expensive and time consuming experimental procedures required and are strongly recommended to avoid animal testing, especially as far as toxicity evaluation and risk assessment is concerned. In this context, in the present work we aim to develop a predictive model for the cytotoxic effects of a wide range of compounds based solely on calculated molecular descriptors that account for their topological, geometric and structural characteristics. The developed model was fully validated and was released online via Enalos Cloud platform accessible through http://enalos.insilicotox.com/MouseTox/. This ready-to-use web service offers, through a user-friendly interface, free access to the model results and therefore can act as a toxicity prediction tool for the risk assessment of novel compounds, without any special requirements or prior programming skills.",2017-10-05 +28813649,A Case for a Human Immuno-Peptidome Project Consortium.,"A multidisciplinary group of researchers gathered at the Hönggerberg Campus at ETH Zurich, Switzerland, for the first meeting on the Human Immuno-Peptidome Project (https://hupo.org/human-immuno-peptidome-project/). The long-term goal of this project is to map the entire repertoire of peptides presented by human leukocyte antigen molecules using mass spectrometry technologies, and make its robust analysis accessible to any immunologist. Here we outline the specific challenges identified toward this goal, and within this framework, describe the structure of a multipronged program aimed at addressing these challenges and implementing solutions at a community-wide level. Pillars of that program are: (1) method and technology development, (2) standardization, (3) effective data sharing, and (4) education. If successful, this community-driven endeavor might provide a roadmap toward new paradigms in immunology.",2017-08-01 +27882299,Application of target capture sequencing of exons and conserved non-coding sequences to 20 inbred rat strains.,"We report sequence data obtained by our recently devised target capture method TargetEC applied to 20 inbred rat strains. This method encompasses not only all annotated exons but also highly conserved non-coding sequences shared among vertebrates. The total length of the target regions covers 146.8 Mb. On an average, we obtained 31.7 × depth of target coverage and identified 154,330 SNVs and 24,368 INDELs for each strain. This corresponds to 470,037 unique SNVs and 68,652 unique INDELs among the 20 strains. The sequence data can be accessed at DDBJ/EMBL/GenBank under accession number PRJDB4648, and the identified variants have been deposited at http://bioinfo.sls.kyushu-u.ac.jp/rat_target_capture/20_strains.vcf.gz.",2016-11-14 +27809781,PSE-HMM: genome-wide CNV detection from NGS data using an HMM with Position-Specific Emission probabilities.,"

Background

Copy Number Variation (CNV) is envisaged to be a major source of large structural variations in the human genome. In recent years, many studies apply Next Generation Sequencing (NGS) data for the CNV detection. However, still there is a necessity to invent more accurate computational tools.

Results

In this study, mate pair NGS data are used for the CNV detection in a Hidden Markov Model (HMM). The proposed HMM has position specific emission probabilities, i.e. a Gaussian mixture distribution. Each component in the Gaussian mixture distribution captures a different type of aberration that is observed in the mate pairs, after being mapped to the reference genome. These aberrations may include any increase (decrease) in the insertion size or change in the direction of mate pairs that are mapped to the reference genome. This HMM with Position-Specific Emission probabilities (PSE-HMM) is utilized for the genome-wide detection of deletions and tandem duplications. The performance of PSE-HMM is evaluated on a simulated dataset and also on a real data of a Yoruban HapMap individual, NA18507.

Conclusions

PSE-HMM is effective in taking observation dependencies into account and reaches a high accuracy in detecting genome-wide CNVs. MATLAB programs are available at http://bs.ipm.ir/softwares/PSE-HMM/ .",2016-11-03 +28424247,Location in the spleen dictates the function of murine neutrophils.,"In this issue of JEM, Deniset et al. (https://doi.org/10.1084/jem.20161621) provide new data that extend our knowledge on the mechanisms whereby Streptococcus pneumoniae is cleared by the spleen. The authors identify novel populations of murine splenic neutrophils that localize in the red pulp and the marginal zone. During the acute phases of S. pneumoniae infection, these populations of splenic neutrophils act in concert with specialized macrophage and B cell populations to provide very rapid innate immune protection.",2017-04-19 +29574440,Projecting the effects of tobacco control policies in the USA through microsimulation: a study protocol.,"

Introduction

Smoking remains the leading cause of preventable death in the USA but can be reduced through policy interventions. Computational models of smoking can provide estimates of the projected impact of tobacco control policies and can be used to inform public health decision making. We outline a protocol for simulating the effects of tobacco policies on population health outcomes.

Methods and analysis

We extend the Smoking History Generator (SHG), a microsimulation model based on data from the National Health Interview Surveys, to evaluate the effects of tobacco control policies on projections of smoking prevalence and mortality in the USA. The SHG simulates individual life trajectories including smoking initiation, cessation and mortality. We illustrate the application of the SHG policy module for four types of tobacco control policies at the national and state levels: smoke-free air laws, cigarette taxes, increasing tobacco control programme expenditures and raising the minimum age of legal access to tobacco. Smoking initiation and cessation rates are modified by age, birth cohort, gender and years since policy implementation. Initiation and cessation rate modifiers are adjusted for differences across age groups and the level of existing policy coverage. Smoking prevalence, the number of population deaths avoided, and life-years gained are calculated for each policy scenario at the national and state levels. The model only considers direct individual benefits through reduced smoking and does not consider benefits through reduced exposure to secondhand smoke.

Ethics and dissemination

A web-based interface is being developed to integrate the results of the simulations into a format that allows the user to explore the projected effects of tobacco control policies in the USA. Usability testing is being conducted in which experts provide feedback on the interface. Development of this tool is under way, and a publicly accessible website is available at http://www.tobaccopolicyeffects.org.",2018-03-23 +28379287,ViPTree: the viral proteomic tree server.,"

Summary

ViPTree is a web server provided through GenomeNet to generate viral proteomic trees for classification of viruses based on genome-wide similarities. Users can upload viral genomes sequenced either by genomics or metagenomics. ViPTree generates proteomic trees for the uploaded genomes together with flexibly selected reference viral genomes. ViPTree also serves as a platform to visually investigate genomic alignments and automatically annotated gene functions for the uploaded viral genomes, thus providing virus researchers the first choice for classifying and understanding newly sequenced viral genomes.

Availability and implementation

ViPTree is freely available at: http://www.genome.jp/viptree .

Contact

goto@kuicr.kyoto-u.ac.jp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +24304901,GPCRDB: an information system for G protein-coupled receptors.,"For the past 20 years, the GPCRDB (G protein-coupled receptors database; http://www.gpcr.org/7tm/) has been a 'one-stop shop' for G protein-coupled receptor (GPCR)-related data. The GPCRDB contains experimental data on sequences, ligand-binding constants, mutations and oligomers, as well as many different types of computationally derived data, such as multiple sequence alignments and homology models. The GPCRDB also provides visualization and analysis tools, plus a number of query systems. In the latest GPCRDB release, all multiple sequence alignments, and >65,000 homology models, have been significantly improved, thanks to a recent flurry of GPCR X-ray structure data. Tools were introduced to browse X-ray structures, compare binding sites, profile similar receptors and generate amino acid conservation statistics. Snake plots and helix box diagrams can now be custom coloured (e.g. by chemical properties or mutation data) and saved as figures. A series of sequence alignment visualization tools has been added, and sequence alignments can now be created for subsets of sequences and sequence positions, and alignment statistics can be produced for any of these subsets.",2013-12-03 +26647370,SBMDb: first whole genome putative microsatellite DNA marker database of sugarbeet for bioenergy and industrial applications. ,"DNA marker plays important role as valuable tools to increase crop productivity by finding plausible answers to genetic variations and linking the Quantitative Trait Loci (QTL) of beneficial trait. Prior approaches in development of Short Tandem Repeats (STR) markers were time consuming and inefficient. Recent methods invoking the development of STR markers using whole genomic or transcriptomics data has gained wide importance with immense potential in developing breeding and cultivator improvement approaches. Availability of whole genome sequences and in silico approaches has revolutionized bulk marker discovery. We report world's first sugarbeet whole genome marker discovery having 145 K markers along with 5 K functional domain markers unified in common platform using MySQL, Apache and PHP in SBMDb. Embedded markers and corresponding location information can be selected for desired chromosome, location/interval and primers can be generated using Primer3 core, integrated at backend. Our analyses revealed abundance of 'mono' repeat (76.82%) over 'di' repeats (13.68%). Highest density (671.05 markers/Mb) was found in chromosome 1 and lowest density (341.27 markers/Mb) in chromosome 6. Current investigation of sugarbeet genome marker density has direct implications in increasing mapping marker density. This will enable present linkage map having marker distance of ∼2 cM, i.e. from 200 to 2.6 Kb, thus facilitating QTL/gene mapping. We also report e-PCR-based detection of 2027 polymorphic markers in panel of five genotypes. These markers can be used for DUS test of variety identification and MAS/GAS in variety improvement program. The present database presents wide source of potential markers for developing and implementing new approaches for molecular breeding required to accelerate industrious use of this crop, especially for sugar, health care products, medicines and color dye. Identified markers will also help in improvement of bioenergy trait of bioethanol and biogas production along with reaping advantage of crop efficiency in terms of low water and carbon footprint especially in era of climate change. Database URL: http://webapp.cabgrid.res.in/sbmdb/.",2015-12-08 +23649736,ASFinder: a tool for genome-wide identification of alternatively splicing transcripts from EST-derived sequences.,"Expressed Sequence Tags (ESTs) are a rich resource for identifying Alternatively Splicing (AS) genes. The ASFinder webserver is designed to identify AS isoforms from EST-derived sequences. Two approaches are implemented in ASFinder. If no genomic sequences are provided, the server performs a local BLASTN to identify AS isoforms from ESTs having both ends aligned but an internal segment unaligned. Otherwise, ASFinder uses SIM4 to map ESTs to the genome, then the overlapping ESTs that are mapped to the same genomic locus and have internal variable exon/intron boundaries are identified as AS isoforms. The tool is available at http://proteomics.ysu.edu/tools/ASFinder.html.",2013-01-01 +28165113,RelExplain-integrating data and networks to explain biological processes.,"

Motivation

The goal of many genome-wide experiments is to explain the changes between the analyzed conditions. Typically, the analysis is started with a set of differential genes DG and the first step is to identify the set of relevant biological processes BP . Current enrichment methods identify the involved biological process via statistically significant overrepresentation of differential genes in predefined sets, but do not further explain how the differential genes interact with each other or which other genes might be important for the enriched process. Other network-based methods determine subnetworks of interacting genes containing many differential genes, but do not employ process knowledge for a more focused analysis.

Results

RelExplain is a method to analyze a given biological process bp (e.g. identified by enrichment) in more detail by computing an explanation using the measured DG and a given network. An explanation is a subnetwork that contains the differential genes in the process bp and connects them in the best way given the experimental data using also genes that are not differential or not in bp . RelExplain takes into account the functional annotations of nodes and the edge consistency of the measurements. Explanations are compact networks of the relevant part of the bp and additional nodes that might be important for the bp . Our evaluation showed that RelExplain is better suited to retrieve manually curated subnetworks from unspecific networks than other algorithms. The interactive RelExplain tool allows to compute and inspect sub-optimal and alternative optimal explanations.

Availability and implementation

A webserver is available at https://services.bio.ifi.lmu.de/relexplain .

Contact

berchtold@bio.ifi.lmu.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +27540267,ReadXplorer 2-detailed read mapping analysis and visualization from one single source.,"

Motivation

The vast amount of already available and currently generated read mapping data requires comprehensive visualization, and should benefit from bioinformatics tools offering a wide spectrum of analysis functionality from just one source. Appropriate handling of multiple mapped reads during mapping analyses remains an issue that demands improvement.

Results

The capabilities of the read mapping analysis and visualization tool ReadXplorer were vastly enhanced. Here, we present an even finer granulated read mapping classification, improving the level of detail for analyses and visualizations. The spectrum of automatic analysis functions has been broadened to include genome rearrangement detection as well as correlation analysis between two mapping data sets. Existing functions were refined and enhanced, namely the computation of differentially expressed genes, the read count and normalization analysis and the transcription start site detection. Additionally, ReadXplorer 2 features a highly improved support for large eukaryotic data sets and a command line version, enabling its integration into workflows. Finally, the new version is now able to display any kind of tabular results from other bioinformatics tools.

Availability and implementation

http://www.readxplorer.org CONTACT: readxplorer@computational.bio.uni-giessen.deSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-18 +28987008,mvmapper: Interactive spatial mapping of genetic structures.,"Characterizing genetic structure across geographic space is a fundamental challenge in population genetics. Multivariate statistical analyses are powerful tools for summarizing genetic variability, but geographic information and accompanying metadata are not always easily integrated into these methods in a user-friendly fashion. Here, we present a deployable Python-based web-tool, mvmapper, for visualizing and exploring results of multivariate analyses in geographic space. This tool can be used to map results of virtually any multivariate analysis of georeferenced data, and routines for exporting results from a number of standard methods have been integrated in the R package adegenet, including principal components analysis (PCA), spatial PCA, discriminant analysis of principal components, principal coordinates analysis, nonmetric dimensional scaling and correspondence analysis. mvmapper's greatest strength is facilitating dynamic and interactive exploration of the statistical and geographic frameworks side by side, a task that is difficult and time-consuming with currently available tools. Source code and deployment instructions, as well as a link to a hosted instance of mvmapper, can be found at https://popphylotools.github.io/mvMapper/.",2017-10-23 +28781548,A free-access online key to identify Amazonian ferns.,"There is urgent need for more data on species distributions in order to improve conservation planning. A crucial but challenging aspect of producing high-quality data is the correct identification of organisms. Traditional printed floras and dichotomous keys are difficult to use for someone not familiar with the technical jargon. In poorly known areas, such as Amazonia, they also become quickly outdated as new species are described or ranges extended. Recently, online tools have allowed developing dynamic, interactive, and accessible keys that make species identification possible for a broader public. In order to facilitate identifying plants collected in field inventories, we developed an internet-based free-access tool to identify Amazonian fern species. We focused on ferns, because they are easy to collect and their edaphic affinities are relatively well known, so they can be used as an indicator group for habitat mapping. Our key includes 302 terrestrial and aquatic entities mainly from lowland Amazonian forests. It is a free-access key, so the user can freely choose which morphological features to use and in which order to assess them. All taxa are richly illustrated, so specimens can be identified by a combination of character choices, visual comparison, and written descriptions. The identification tool was developed in Lucid 3.5 software and it is available at http://keyserver.lucidcentral.org:8080/sandbox/keys.jsp.",2017-03-22 +27131784,ICM: a web server for integrated clustering of multi-dimensional biomedical data.,"Large-scale efforts for parallel acquisition of multi-omics profiling continue to generate extensive amounts of multi-dimensional biomedical data. Thus, integrated clustering of multiple types of omics data is essential for developing individual-based treatments and precision medicine. However, while rapid progress has been made, methods for integrated clustering are lacking an intuitive web interface that facilitates the biomedical researchers without sufficient programming skills. Here, we present a web tool, named Integrated Clustering of Multi-dimensional biomedical data (ICM), that provides an interface from which to fuse, cluster and visualize multi-dimensional biomedical data and knowledge. With ICM, users can explore the heterogeneity of a disease or a biological process by identifying subgroups of patients. The results obtained can then be interactively modified by using an intuitive user interface. Researchers can also exchange the results from ICM with collaborators via a web link containing a Project ID number that will directly pull up the analysis results being shared. ICM also support incremental clustering that allows users to add new sample data into the data of a previous study to obtain a clustering result. Currently, the ICM web server is available with no login requirement and at no cost at http://biotech.bmi.ac.cn/icm/.",2016-04-30 +28228269,Diverse Central Projection Patterns of Retinal Ganglion Cells.,"Understanding how >30 types of retinal ganglion cells (RGCs) in the mouse retina each contribute to visual processing in the brain will require more tools that label and manipulate specific RGCs. We screened and analyzed retinal expression of Cre recombinase using 88 transgenic driver lines. In many lines, Cre was expressed in multiple RGC types and retinal cell classes, but several exhibited more selective expression. We comprehensively mapped central projections from RGCs labeled in 26 Cre lines using viral tracers, high-throughput imaging, and a data processing pipeline. We identified over 50 retinorecipient regions and present a quantitative retina-to-brain connectivity map, enabling comparisons of target-specificity across lines. Projections to two major central targets were notably correlated: RGCs projecting to the outer shell or core regions of the lateral geniculate projected to superficial or deep layers within the superior colliculus, respectively. Retinal images and projection data are available online at http://connectivity.brain-map.org.",2017-02-01 +28974218,Anaconda: AN automated pipeline for somatic COpy Number variation Detection and Annotation from tumor exome sequencing data.,"

Background

Copy number variations (CNVs) are the main genetic structural variations in cancer genome. Detecting CNVs in genetic exome region is efficient and cost-effective in identifying cancer associated genes. Many tools had been developed accordingly and yet these tools lack of reliability because of high false negative rate, which is intrinsically caused by genome exonic bias.

Results

To provide an alternative option, here, we report Anaconda, a comprehensive pipeline that allows flexible integration of multiple CNV-calling methods and systematic annotation of CNVs in analyzing WES data. Just by one command, Anaconda can generate CNV detection result by up to four CNV detecting tools. Associated with comprehensive annotation analysis of genes involved in shared CNV regions, Anaconda is able to deliver a more reliable and useful report in assistance with CNV-associate cancer researches.

Conclusion

Anaconda package and manual can be freely accessed at http://mcg.ustc.edu.cn/bsc/ANACONDA/ .",2017-10-03 +27153580,chromPlot: visualization of genomic data in chromosomal context.,"

Unlabelled

: Visualizing genomic data in chromosomal context can help detecting errors in data processing and may suggest new hypotheses to be tested. Here, we report a new tool for displaying large and diverse genomic data along chromosomes. The software is implemented in R so that visualization can be easily integrated with its numerous packages for processing genomic data. It supports simultaneous visualization of multiple tracks of data. Large genomic regions such as QTLs or synteny tracts may be shown along histograms of number of genes, genetic variants, or any other type of genomic element. Tracks can also contain values for continuous or categorical variables and the user can choose among points, connected lines, colored segments, or histograms for representing data. chromPlot takes data from tables in data.frame in GRanges formats. The information necessary to draw chromosomes for mouse and human is included with the package. For other organisms, chromPlot can read Gap and cytoBandIdeo tables from the UCSC Genome Browser. We present common use cases here, and a full tutorial is included as the package's vignette.

Availability and implementation

chromPlot is distributed under a GLP2 licence at http://www.bioconductor.org

Contact

raverdugo@u.uchile.cl

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-09 +25819075,LOTUS-DB: an integrative and interactive database for Nelumbo nucifera study.,"Besides its important significance in plant taxonomy and phylogeny, sacred lotus (Nelumbo nucifera Gaertn.) might also hold the key to the secrets of aging, which attracts crescent attentions from researchers all over the world. The genetic or molecular studies on this species depend on its genome information. In 2013, two publications reported the sequencing of its full genome, based on which we constructed a database named as LOTUS-DB. It will provide comprehensive information on the annotation, gene function and expression for the sacred lotus. The information will facilitate users to efficiently query and browse genes, graphically visualize genome and download a variety of complex data information on genome DNA, coding sequence (CDS), transcripts or peptide sequences, promoters and markers. It will accelerate researches on gene cloning, functional identification of sacred lotus, and hence promote the studies on this species and plant genomics as well. Database URL: http://lotus-db.wbgcas.cn",2015-03-27 +25236462,Proteomic analysis and prediction of human phosphorylation sites in subcellular level reveal subcellular specificity.,"

Motivation

Protein phosphorylation is the most common post-translational modification (PTM) regulating major cellular processes through highly dynamic and complex signaling pathways. Large-scale comparative phosphoproteomic studies have frequently been done on whole cells or organs by conventional bottom-up mass spectrometry approaches, i.e at the phosphopeptide level. Using this approach, there is no way to know from where the phosphopeptide signal originated. Also, as a consequence of the scale of these studies, important information on the localization of phosphorylation sites in subcellular compartments (SCs) is not surveyed.

Results

Here, we present a first account of the emerging field of subcellular phosphoproteomics where a support vector machine (SVM) approach was combined with a novel algorithm of discrete wavelet transform (DWT) to facilitate the identification of compartment-specific phosphorylation sites and to unravel the intricate regulation of protein phosphorylation. Our data reveal that the subcellular phosphorylation distribution is compartment type dependent and that the phosphorylation displays site-specific sequence motifs that diverge between SCs.

Availability and implementation

The method and database both are available as a web server at: http://bioinfo.ncu.edu.cn/SubPhos.aspx.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-17 +26430790,Time trends in avoidable cancer mortality in Switzerland and neighbouring European countries 1996-2010.,"

Question under study

What are the trends in avoidable cancer mortality in Switzerland and neighbouring countries?

Methods

Mortality data and population estimates 1996-2010 were obtained from the Swiss Federal Statistical Office for Switzerland and the World Health Organization Mortality Database (http://www.who.int/healthinfo/mortality_data/en/) for Austria, Germany, France and Italy. Age standardised mortality rates (ASMRs, European standard) per 100 000 person-years were calculated for the population <75 years old by sex for the following groups of cancer deaths: (1) avoidable through primary prevention; (2) avoidable through early detection and treatment; (3) avoidable through improved treatment and medical care; and (4) remaining cancer deaths. To assess time trends in ASMRs, estimated annual percentage changes (EAPCs) with 95% confidence intervals (95% CIs) were calculated.

Results

In Switzerland and neighbouring countries cancer mortality in persons <75 years old continuously decreased 1996-2010. Avoidable cancer mortality decreased in all groups of avoidable cancer deaths in both sexes, with one exception. ASMRs for causes avoidable through primary prevention increased in females in all countries (in Switzerland from 16.2 to 20.3 per 100 000 person years, EAPC 2.0 [95% CI 1.4 to 2.6]). Compared with its neighbouring countries, Switzerland showed the lowest rates for all groups of avoidable cancer mortality in males 2008-2010.

Conclusion

Overall avoidable cancer mortality decreased, indicating achievements in cancer care and related health policies. However, increasing trends in avoidable cancer mortality through primary prevention for females suggest there is a need in Switzerland and its European neighbouring countries to improve primary prevention.",2015-10-02 +29637905,Realistic modeling of deep brain stimulation implants for electromagnetic MRI safety studies.,"We propose a framework for electromagnetic (EM) simulation of deep brain stimulation (DBS) patients in radiofrequency (RF) coils. We generated a model of a DBS patient using post-operative head and neck computed tomography (CT) images stitched together into a 'virtual CT' image covering the entire length of the implant. The body was modeled as homogeneous. The implant path extracted from the CT data contained self-intersections, which we corrected automatically using an optimization procedure. Using the CT-derived DBS path, we built a model of the implant including electrodes, helicoidal internal conductor wires, loops, extension cables, and the implanted pulse generator. We also built four simplified models with straight wires, no extension cables and no loops to assess the impact of these simplifications on safety predictions. We simulated EM fields induced by the RF birdcage body coil in the body model, including at the DBS lead tip at both 1.5 Tesla (64 MHz) and 3 Tesla (123 MHz). We also assessed the robustness of our simulation results by systematically varying the EM properties of the body model and the position and length of the DBS implant (sensitivity analysis). The topology correction algorithm corrected all self-intersection and curvature violations of the initial path while introducing minimal deformations (open-source code available at http://ptx.martinos.org/index.php/Main_Page). The unaveraged lead-tip peak SAR predicted by the five DBS models (0.1 mm resolution grid) ranged from 12.8 kW kg-1 (full model, helicoidal conductors) to 43.6 kW kg-1 (no loops, straight conductors) at 1.5 T (3.4-fold variation) and 18.6 kW kg-1 (full model, straight conductors) to 73.8 kW kg-1 (no loops, straight conductors) at 3 T (4.0-fold variation). At 1.5 T and 3 T, the variability of lead-tip peak SAR with respect to the conductivity ranged between 18% and 30%. Variability with respect to the position and length of the DBS implant ranged between 9.5% and 27.6%.",2018-05-04 +28571781,Development of a Sigma-2 Receptor affinity filter through a Monte Carlo based QSAR analysis.,"For the first time in sigma-2 (σ2) receptor field, a quantitative structure-activity relationship (QSAR) model has been built using pKi values of the whole set of known selective σ2 receptor ligands (548 compounds), taken from the Sigma-2 Receptor Selective Ligands Database (S2RSLDB) (http://www.researchdsf.unict.it/S2RSLDB/), through the Monte Carlo technique and employing the software CORAL. The model has been developed by using a large and structurally diverse set of compounds, allowing for a prediction of different populations of chemical compounds endpoint (σ2 receptor pKi). The statistical quality reached, suggested that model for pKi determination is robust and possesses a satisfactory predictive potential. The statistical quality is high for both visible and invisible sets. The screening of the FDA approved drugs, external to our dataset, suggested that sixteen compounds might be repositioned as σ2 receptor ligands (predicted pKi≥8). A literature check showed that six of these compounds have already been tested for affinity at σ2 receptor and, of these, two (Flunarizine and Terbinafine) have shown an experimental σ2 receptor pKi>7. This suggests that this QSAR model may be used as focusing screening filter in order to prospectively find or repurpose new drugs with high affinity for the σ2 receptor, and overall allowing for an enhanced hit rate respect to a random screening.",2017-05-29 +23959375,GnpIS: an information system to integrate genetic and genomic data from plants and fungi.,"Data integration is a key challenge for modern bioinformatics. It aims to provide biologists with tools to explore relevant data produced by different studies. Large-scale international projects can generate lots of heterogeneous and unrelated data. The challenge is to integrate this information with other publicly available data. Nucleotide sequencing throughput has been improved with new technologies; this increases the need for powerful information systems able to store, manage and explore data. GnpIS is a multispecies integrative information system dedicated to plant and fungi pests. It bridges genetic and genomic data, allowing researchers access to both genetic information (e.g. genetic maps, quantitative trait loci, markers, single nucleotide polymorphisms, germplasms and genotypes) and genomic data (e.g. genomic sequences, physical maps, genome annotation and expression data) for species of agronomical interest. GnpIS is used by both large international projects and plant science departments at the French National Institute for Agricultural Research. Here, we illustrate its use. Database URL: http://urgi.versailles.inra.fr/gnpis.",2013-08-19 +29894117,Health Impacts of Citywide and Localized Power Outages in New York City.,"

Background

Previous studies investigated potential health effects of large-scale power outages, including the massive power failure that affected the northeastern United States and Ontario, Canada, in August 2003, and outages associated with major storms. However, information on localized outages is limited.

Objective

The study sought to examine potential health impacts of citywide and localized outages in New York City (NYC).

Methods

Along with the citywide 2003 outage, localized outages in July 1999 and July 2006 were identified. We additionally investigated localized, warm- and cold-weather outages that occurred in any of 66 NYC electric-grid networks during 2002–2014 using New York State Public Service Commission data. Mortality and hospitalizations were geocoded and linked to the networks. Associations were estimated using Poisson time-series regression, including examining distributed lags and adjusting for temperature and temporal trends. Network-specific estimates were pooled by season.

Results

Respiratory disease hospitalizations were associated with the 2006 localized outage [cumulative relative risk [CRR] over 0–1 lag day, lag01=2.26 (95% confidence interval [CI]: 1.08, 4.74)] and the 2003 citywide outage, but not with other localized, warm-weather outages. Renal disease hospitalizations were associated with the 2003 citywide outage, and with localized, warm-weather outages, pooled across networks [RR at lag3=1.16 (95% CI: 1.00, 1.34)], but not the 2006 localized outage. All-cause mortality was positively associated with the 1999, 2003, and 2006 outages (significant for the 2003 outage only), but not with other localized, warm-weather outages. Localized, cold-weather outages were associated with all-cause mortality [lag01 CRR=1.06 (95% CI: 1.01, 1.12)] and cardiovascular disease hospitalizations [lag01 CRR=1.14 (95% CI: 1.03, 1.26)], and fewer respiratory disease hospitalizations [lag03 CRR=0.77 (95% CI: 0.61, 0.97)].

Conclusions

Localized outages may affect health. This information can inform preparedness efforts and underscores the public health importance of ensuring electric grid resiliency to climate change. https://doi.org/10.1289/EHP2154.",2018-06-11 +27995167,Theoretical and analyzed data related to thermal degradation kinetics of poly (L-lactic acid)/chitosan-grafted-oligo L-lactic acid (PLA/CH-g-OLLA) bionanocomposite films.,"The theoretical and analyzed data incorporated in this article are related to the recently published research article entitled ""Thermal degradation behaviour of nanoamphiphilic chitosan dispersed poly (lactic acid) bionanocomposite films"" (http://dx.doi.org/10.1016/j.ijbiomac.2016.11.024) (A.K. Pal, V. Katiyar, 2016) [1]. Supplementary information and data (both raw and analyzed) are related to thermal degradation kinetics and explains various model fitting and is conversional methods, which are used in this research work to enhance the knowledge about degradation behaviour of PLA/CH-g-OLLA bionanocomposite system. Non-isothermal degradation kinetics of such polymeric system was proposed using Kissinger, Kissinger-Akahira-Sunose, Flynn-Wall-Ozawa and Augis and Bennett models to estimate the activation energies (Ea ) and R2 values.",2016-12-07 +29270816,"Arctic berry extracts target the gut-liver axis to alleviate metabolic endotoxaemia, insulin resistance and hepatic steatosis in diet-induced obese mice.","

Aims/hypothesis

There is growing evidence that fruit polyphenols exert beneficial effects on the metabolic syndrome, but the underlying mechanisms remain poorly understood. In the present study, we aimed to analyse the effects of polyphenolic extracts from five types of Arctic berries in a model of diet-induced obesity.

Methods

Male C57BL/6 J mice were fed a high-fat/high-sucrose (HFHS) diet and orally treated with extracts of bog blueberry (BBE), cloudberry (CLE), crowberry (CRE), alpine bearberry (ABE), lingonberry (LGE) or vehicle (HFHS) for 8 weeks. An additional group of standard-chow-fed, vehicle-treated mice was included as a reference control for diet-induced obesity. OGTTs and insulin tolerance tests were conducted, and both plasma insulin and C-peptide were assessed throughout the OGTT. Quantitative PCR, western blot analysis and ELISAs were used to assess enterohepatic immunometabolic features. Faecal DNA was extracted and 16S rRNA gene-based analysis was used to profile the gut microbiota.

Results

Treatment with CLE, ABE and LGE, but not with BBE or CRE, prevented both fasting hyperinsulinaemia (mean ± SEM [pmol/l]: chow 67.2 ± 12.3, HFHS 153.9 ± 19.3, BBE 114.4 ± 14.3, CLE 82.5 ± 13.0, CRE 152.3 ± 24.4, ABE 90.6 ± 18.0, LGE 95.4 ± 10.5) and postprandial hyperinsulinaemia (mean ± SEM AUC [pmol/l × min]: chow 14.3 ± 1.4, HFHS 31.4 ± 3.1, BBE 27.2 ± 4.0, CLE 17.7 ± 2.2, CRE 32.6 ± 6.3, ABE 22.7 ± 18.0, LGE 23.9 ± 2.5). None of the berry extracts affected C-peptide levels or body weight gain. Levels of hepatic serine phosphorylated Akt were 1.6-, 1.5- and 1.2-fold higher with CLE, ABE and LGE treatment, respectively, and hepatic carcinoembryonic antigen-related cell adhesion molecule (CEACAM)-1 tyrosine phosphorylation was 0.6-, 0.7- and 0.9-fold increased in these mice vs vehicle-treated, HFHS-fed mice. These changes were associated with reduced liver triacylglycerol deposition, lower circulating endotoxins, alleviated hepatic and intestinal inflammation, and major gut microbial alterations (e.g. bloom of Akkermansia muciniphila, Turicibacter and Oscillibacter) in CLE-, ABE- and LGE-treated mice.

Conclusions/interpretation

Our findings reveal novel mechanisms by which polyphenolic extracts from ABE, LGE and especially CLE target the gut-liver axis to protect diet-induced obese mice against metabolic endotoxaemia, insulin resistance and hepatic steatosis, which importantly improves hepatic insulin clearance. These results support the potential benefits of these Arctic berries and their integration into health programmes to help attenuate obesity-related chronic inflammation and metabolic disorders.

Data availability

All raw sequences have been deposited in the public European Nucleotide Archive server under accession number PRJEB19783 ( https://www.ebi.ac.uk/ena/data/view/PRJEB19783 ).",2017-12-21 +28186228,PhosphoPICK-SNP: quantifying the effect of amino acid variants on protein phosphorylation.,"

Motivation

Genome-wide association studies are identifying single nucleotide variants (SNVs) linked to various diseases, however the functional effect caused by these variants is often unknown. One potential functional effect, the loss or gain of protein phosphorylation sites, can be induced through variations in key amino acids that disrupt or introduce valid kinase binding patterns. Current methods for predicting the effect of SNVs on phosphorylation operate on the sequence content of reference and variant proteins. However, consideration of the amino acid sequence alone is insufficient for predicting phosphorylation change, as context factors determine kinase-substrate selection.

Results

We present here a method for quantifying the effect of SNVs on protein phosphorylation through an integrated system of motif analysis and context-based assessment of kinase targets. By predicting the effect that known variants across the proteome have on phosphorylation, we are able to use this background of proteome-wide variant effects to quantify the significance of novel variants for modifying phosphorylation. We validate our method on a manually curated set of phosphorylation change-causing variants from the primary literature, showing that the method predicts known examples of phosphorylation change at high levels of specificity. We apply our approach to data-sets of variants in phosphorylation site regions, showing that variants causing predicted phosphorylation loss are over-represented among disease-associated variants.

Availability and implementation

The method is freely available as a web-service at the website http://bioinf.scmb.uq.edu.au/phosphopick/snp.

Contact

m.boden@uq.edu.au.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +27504081,Condor: a simulation tool for flash X-ray imaging.,"Flash X-ray imaging has the potential to determine structures down to molecular resolution without the need for crystallization. The ability to accurately predict the diffraction signal and to identify the optimal experimental configuration within the limits of the instrument is important for successful data collection. This article introduces Condor, an open-source simulation tool to predict X-ray far-field scattering amplitudes of isolated particles for customized experimental designs and samples, which the user defines by an atomic or a refractive index model. The software enables researchers to test whether their envisaged imaging experiment is feasible, and to optimize critical parameters for reaching the best possible result. It also aims to support researchers who intend to create or advance reconstruction algorithms by simulating realistic test data. Condor is designed to be easy to use and can be either installed as a Python package or used from its web interface (http://lmb.icm.uu.se/condor). X-ray free-electron lasers have high running costs and beam time at these facilities is precious. Data quality can be substantially improved by using simulations to guide the experimental design and simplify data analysis.",2016-07-14 +24828308,Metabolomic Characterization of Knockout Mutants in Arabidopsis: Development of a Metabolite Profiling Database for Knockout Mutants in Arabidopsis.,"Despite recent intensive research efforts in functional genomics, the functions of only a limited number of Arabidopsis (Arabidopsis thaliana) genes have been determined experimentally, and improving gene annotation remains a major challenge in plant science. As metabolite profiling can characterize the metabolomic phenotype of a genetic perturbation in the plant metabolism, it provides clues to the function(s) of genes of interest. We chose 50 Arabidopsis mutants, including a set of characterized and uncharacterized mutants, that resemble wild-type plants. We performed metabolite profiling of the plants using gas chromatography-mass spectrometry. To make the data set available as an efficient public functional genomics tool for hypothesis generation, we developed the Metabolite Profiling Database for Knock-Out Mutants in Arabidopsis (MeKO). It allows the evaluation of whether a mutation affects metabolism during normal plant growth and contains images of mutants, data on differences in metabolite accumulation, and interactive analysis tools. Nonprocessed data, including chromatograms, mass spectra, and experimental metadata, follow the guidelines set by the Metabolomics Standards Initiative and are freely downloadable. Proof-of-concept analysis suggests that MeKO is highly useful for the generation of hypotheses for genes of interest and for improving gene annotation. MeKO is publicly available at http://prime.psc.riken.jp/meko/.",2014-05-14 +26163693,Application of learning to rank to protein remote homology detection.,"

Motivation

Protein remote homology detection is one of the fundamental problems in computational biology, aiming to find protein sequences in a database of known structures that are evolutionarily related to a given query protein. Some computational methods treat this problem as a ranking problem and achieve the state-of-the-art performance, such as PSI-BLAST, HHblits and ProtEmbed. This raises the possibility to combine these methods to improve the predictive performance. In this regard, we are to propose a new computational method called ProtDec-LTR for protein remote homology detection, which is able to combine various ranking methods in a supervised manner via using the Learning to Rank (LTR) algorithm derived from natural language processing.

Results

Experimental results on a widely used benchmark dataset showed that ProtDec-LTR can achieve an ROC1 score of 0.8442 and an ROC50 score of 0.9023 outperforming all the individual predictors and some state-of-the-art methods. These results indicate that it is correct to treat protein remote homology detection as a ranking problem, and predictive performance improvement can be achieved by combining different ranking approaches in a supervised manner via using LTR.

Availability and implementation

For users' convenience, the software tools of three basic ranking predictors and Learning to Rank algorithm were provided at http://bioinformatics.hitsz.edu.cn/ProtDec-LTR/home/

Contact

bliu@insun.hit.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-10 +27478519,Biomedical Big Data Training Collaborative (BBDTC): An effort to bridge the talent gap in biomedical science and research.,"The BBDTC (https://biobigdata.ucsd.edu) is a community-oriented platform to encourage high-quality knowledge dissemination with the aim of growing a well-informed biomedical big data community through collaborative efforts on training and education. The BBDTC collaborative is an e-learning platform that supports the biomedical community to access, develop and deploy open training materials. The BBDTC supports Big Data skill training for biomedical scientists at all levels, and from varied backgrounds. The natural hierarchy of courses allows them to be broken into and handled as modules. Modules can be reused in the context of multiple courses and reshuffled, producing a new and different, dynamic course called a playlist. Users may create playlists to suit their learning requirements and share it with individual users or the wider public. BBDTC leverages the maturity and design of the HUBzero content-management platform for delivering educational content. To facilitate the migration of existing content, the BBDTC supports importing and exporting course material from the edX platform. Migration tools will be extended in the future to support other platforms. Hands-on training software packages, i.e., toolboxes, are supported through Amazon EC2 and Virtualbox virtualization technologies, and they are available as: (i) downloadable lightweight Virtualbox Images providing a standardized software tool environment with software packages and test data on their personal machines, and (ii) remotely accessible Amazon EC2 Virtual Machines for accessing biomedical big data tools and scalable big data experiments. At the moment, the BBDTC site contains three open Biomedical big data training courses with lecture contents, videos and hands-on training utilizing VM toolboxes, covering diverse topics. The courses have enhanced the hands-on learning environment by providing structured content that users can use at their own pace. A four course biomedical big data series is planned for development in 2016.",2016-06-01 +29280501,"'Just because a doctor says something, doesn't mean that [it] will happen': self-perception as having a Fertility Problem among Infertility Patients.","Only some individuals who have the medically defined condition 'infertility' adopt a self-definition as having a fertility problem, which has implications for social and behavioural responses, yet there is no clear consensus on why some people and not others adopt a medical label. We use interview data from 28 women and men who sought medical infertility treatment to understand variations in self-identification. Results highlight the importance of identity disruption for understanding the dialectical relationship between medical contact and self-identification, as well as how diagnosis acts both as a category and a process. Simultaneously integrating new medical knowledge from testing and treatment with previous fertility self-perceptions created difficulty for settling on an infertility self-perception. Four response categories emerged for adopting a self-perception of having a fertility problem: (i) the non-adopters - never adopting the self-perception pre- or post-medical contact; (ii) uncertain - not being fully committed to the self-perception pre- or post-medical contact; (iii) assuming the label - not having prior fertility concerns but adopting the self-perception post-medical contact; and (iv) solidifying a tentative identity - not being fully committed to a self-perception pre-medical contact, but fully committed post-medical contact. (A virtual abstract of this paper can be viewed at: https://www.youtube.com/channel/UC_979cmCmR9rLrKuD7z0ycA).",2017-12-27 +24214957,bNAber: database of broadly neutralizing HIV antibodies.,"The discovery of broadly neutralizing antibodies (bNAbs) has provided an enormous impetus to the HIV vaccine research and to entire immunology. The bNAber database at http://bNAber.org provides open, user-friendly access to detailed data on the rapidly growing list of HIV bNAbs, including neutralization profiles, sequences and three-dimensional structures (when available). It also provides an extensive list of visualization and analysis tools, such as heatmaps to analyse neutralization data as well as structure and sequence viewers to correlate bNAbs properties with structural and sequence features of individual antibodies. The goal of the bNAber database is to enable researchers in this field to easily compare and analyse available information on bNAbs thereby supporting efforts to design an effective vaccine for HIV/AIDS. The bNAber database not only provides easy access to data that currently is scattered in the Supplementary Materials sections of individual papers, but also contributes to the development of general standards of data that have to be presented with the discovery of new bNAbs and a universal mechanism of how such data can be shared.",2013-11-07 +24694117,Quantum coupled mutation finder: predicting functionally or structurally important sites in proteins using quantum Jensen-Shannon divergence and CUDA programming.,"

Background

The identification of functionally or structurally important non-conserved residue sites in protein MSAs is an important challenge for understanding the structural basis and molecular mechanism of protein functions. Despite the rich literature on compensatory mutations as well as sequence conservation analysis for the detection of those important residues, previous methods often rely on classical information-theoretic measures. However, these measures usually do not take into account dis/similarities of amino acids which are likely to be crucial for those residues. In this study, we present a new method, the Quantum Coupled Mutation Finder (QCMF) that incorporates significant dis/similar amino acid pair signals in the prediction of functionally or structurally important sites.

Results

The result of this study is twofold. First, using the essential sites of two human proteins, namely epidermal growth factor receptor (EGFR) and glucokinase (GCK), we tested the QCMF-method. The QCMF includes two metrics based on quantum Jensen-Shannon divergence to measure both sequence conservation and compensatory mutations. We found that the QCMF reaches an improved performance in identifying essential sites from MSAs of both proteins with a significantly higher Matthews correlation coefficient (MCC) value in comparison to previous methods. Second, using a data set of 153 proteins, we made a pairwise comparison between QCMF and three conventional methods. This comparison study strongly suggests that QCMF complements the conventional methods for the identification of correlated mutations in MSAs.

Conclusions

QCMF utilizes the notion of entanglement, which is a major resource of quantum information, to model significant dissimilar and similar amino acid pair signals in the detection of functionally or structurally important sites. Our results suggest that on the one hand QCMF significantly outperforms the previous method, which mainly focuses on dissimilar amino acid signals, to detect essential sites in proteins. On the other hand, it is complementary to the existing methods for the identification of correlated mutations. The method of QCMF is computationally intensive. To ensure a feasible computation time of the QCMF's algorithm, we leveraged Compute Unified Device Architecture (CUDA).The QCMF server is freely accessible at http://qcmf.informatik.uni-goettingen.de/.",2014-04-03 +29990222,Robust Distance Metric Learning via Bayesian Inference.,"Distance metric learning (DML) has achieved great success in many computer vision tasks. However, most existing DML algorithms are based on point estimation, and thus are sensitive to the choice of training examples and tend to be over-fitting in the presence of label noise. In this paper, we present a robust DML algorithm based on Bayesian inference. In particular, our method is essentially a Bayesian extension to a previous classic DML method-large margin nearest neighbor classification and we use stochastic variational inference to estimate the posterior distribution of the transformation matrix. Furthermore, we theoretically show that the proposed algorithm is robust against label noise in the sense that an arbitrary point with label noise has bounded influence on the learnt model. With some reasonable assumptions, we derive a generalization error bound of this method in the presence of label noise. We also show that the DML hypothesis class in which our model lies is probably approximately correct-learnable and give the sample complexity. The effectiveness of the proposed method1 is demonstrated with state of the art performance on three popular data sets with different types of label noise.1 A MATLAB implementation of this method is made available at http://parnec.nuaa.edu.cn/xtan/Publication.htm.",2017-12-11 +29957590,Spatial Modeling to Identify Sociodemographic Predictors of Hydraulic Fracturing Wastewater Injection Wells in Ohio Census Block Groups.,"

Background

Hydraulically fractured wells produce 2-14 million liters of wastewater, which may contain toxic and radioactive compounds. The wastewater is predominantly disposed of using Class II injection wells.

Objective

Our objective was to evaluate the relationship between sociodemographic characteristics and injection well locations in Ohio.

Methods

Using state and federal data sources, we classified Ohio census block groups by presence of injection wells, number of hydraulically fractured wells, sociodemographic factors (median household income, % white, population density, % ≥high school education, median age, voter turnout), and geographic information (land area, water area, situated over shale). We modeled the odds of having at least one injection well within a block group with respect to all covariates using three multivariable models incorporating different spatial components to account for similarities in neighboring block groups.

Results

In bivariate analyses, block groups with injection wells (n=156) compared with those without (n=9,049) had lower population density (71 vs. 2,210 people/mi2 or 27 vs. 854 people/km2), larger median area (43.5 vs. 1.35 km2), higher median age (42.8 vs. 40.2 y), and higher % white (98.1% vs. 92.1%). After adjustment using a spatial logistic regression model, the odds of a block group containing an injection well were 16% lower per $10,000 increase in median income [odds ratio(OR)=0.837; 95% credible interval (CI): 0.719, 0.961] and 97% lower per 1,000 people/mi2 (or per 386 people/km2) increase (OR=0.030; 95% CI=0.008, 0.072). Block groups on shale and those containing fewer hydraulically fractured wells were more likely to include an injection well. Percentage white, median age, % ≥high school education, and % voter turnout were not significant predictors of injection well presence.

Conclusion

In Ohio, injection wells were inversely associated with block groups' median incomes after adjusting for other sociodemographic and geographic variables. Research is needed to determine whether residents in census blocks with injection wells face increased risk of chemical exposures or adverse health outcomes. https://doi.org/10.1289/EHP2663.",2018-06-27 +25270878,CancerPPD: a database of anticancer peptides and proteins.,"CancerPPD (http://crdd.osdd.net/raghava/cancerppd/) is a repository of experimentally verified anticancer peptides (ACPs) and anticancer proteins. Data were manually collected from published research articles, patents and from other databases. The current release of CancerPPD consists of 3491 ACP and 121 anticancer protein entries. Each entry provides comprehensive information related to a peptide like its source of origin, nature of the peptide, anticancer activity, N- and C-terminal modifications, conformation, etc. Additionally, CancerPPD provides the information of around 249 types of cancer cell lines and 16 different assays used for testing the ACPs. In addition to natural peptides, CancerPPD contains peptides having non-natural, chemically modified residues and D-amino acids. Besides this primary information, CancerPPD stores predicted tertiary structures as well as peptide sequences in SMILES format. Tertiary structures of peptides were predicted using the state-of-art method, PEPstr and secondary structural states were assigned using DSSP. In order to assist users, a number of web-based tools have been integrated, these include keyword search, data browsing, sequence and structural similarity search. We believe that CancerPPD will be very useful in designing peptide-based anticancer therapeutics.",2014-09-30 +26344127,Standardized food images: A photographing protocol and image database.,"The regulation of food intake has gained much research interest because of the current obesity epidemic. For research purposes, food images are a good and convenient alternative for real food because many dietary decisions are made based on the sight of foods. Food pictures are assumed to elicit anticipatory responses similar to real foods because of learned associations between visual food characteristics and post-ingestive consequences. In contemporary food science, a wide variety of images are used which introduces between-study variability and hampers comparison and meta-analysis of results. Therefore, we created an easy-to-use photographing protocol which enables researchers to generate high resolution food images appropriate for their study objective and population. In addition, we provide a high quality standardized picture set which was characterized in seven European countries. With the use of this photographing protocol a large number of food images were created. Of these images, 80 were selected based on their recognizability in Scotland, Greece and The Netherlands. We collected image characteristics such as liking, perceived calories and/or perceived healthiness ratings from 449 adults and 191 children. The majority of the foods were recognized and liked at all sites. The differences in liking ratings, perceived calories and perceived healthiness between sites were minimal. Furthermore, perceived caloric content and healthiness ratings correlated strongly (r ≥ 0.8) with actual caloric content in both adults and children. The photographing protocol as well as the images and the data are freely available for research use on http://nutritionalneuroscience.eu/. By providing the research community with standardized images and the tools to create their own, comparability between studies will be improved and a head-start is made for a world-wide standardized food image database.",2015-09-04 +25932650,TMREC: A Database of Transcription Factor and MiRNA Regulatory Cascades in Human Diseases.,"Over the past decades, studies have reported that the combinatorial regulation of transcription factors (TFs) and microRNAs (miRNAs) is essential for the appropriate execution of biological events and developmental processes. Dysregulations of these regulators often cause diseases. However, there are no available resources on the regulatory cascades of TFs and miRNAs in the context of human diseases. To fulfill this vacancy, we established the TMREC database in this study. First, we integrated curated transcriptional and post-transcriptional regulations to construct the TF and miRNA regulatory network. Next, we identified all linear paths using the Breadth First Search traversal method. Finally, we used known disease-related genes and miRNAs to measure the strength of association between cascades and diseases. Currently, TMREC consists of 74,248 cascades and 25,194 cascade clusters, involving in 412 TFs, 266 miRNAs and 545 diseases. With the expanding of experimental support regulation data, we will regularly update the database. TMREC aims to help experimental biologists to comprehensively analyse gene expression regulation, to understand the aetiology and to predict novel therapeutic targets. TMREC is freely available at http://bioinfo.hrbmu.edu.cn/TMREC/.",2015-05-01 +27348712,The Perseus computational platform for comprehensive analysis of (prote)omics data.,"A main bottleneck in proteomics is the downstream biological analysis of highly multivariate quantitative protein abundance data generated using mass-spectrometry-based analysis. We developed the Perseus software platform (http://www.perseus-framework.org) to support biological and biomedical researchers in interpreting protein quantification, interaction and post-translational modification data. Perseus contains a comprehensive portfolio of statistical tools for high-dimensional omics data analysis covering normalization, pattern recognition, time-series analysis, cross-omics comparisons and multiple-hypothesis testing. A machine learning module supports the classification and validation of patient groups for diagnosis and prognosis, and it also detects predictive protein signatures. Central to Perseus is a user-friendly, interactive workflow environment that provides complete documentation of computational methods used in a publication. All activities in Perseus are realized as plugins, and users can extend the software by programming their own, which can be shared through a plugin store. We anticipate that Perseus's arsenal of algorithms and its intuitive usability will empower interdisciplinary analysis of complex large data sets.",2016-06-27 +25784642,"Brain Tumor Database, a free relational database for collection and analysis of brain tumor patient information.","In this study, we describe the development and utilization of a relational database designed to manage the clinical and radiological data of patients with brain tumors. The Brain Tumor Database was implemented using MySQL v.5.0, while the graphical user interface was created using PHP and HTML, thus making it easily accessible through a web browser. This web-based approach allows for multiple institutions to potentially access the database. The BT Database can record brain tumor patient information (e.g. clinical features, anatomical attributes, and radiological characteristics) and be used for clinical and research purposes. Analytic tools to automatically generate statistics and different plots are provided. The BT Database is a free and powerful user-friendly tool with a wide range of possible clinical and research applications in neurology and neurosurgery. The BT Database graphical user interface source code and manual are freely available at http://tumorsdatabase.altervista.org.",2015-03-01 +28552937,Philanthro-metrics: Mining multi-million-dollar gifts.,"The Million Dollar List (MDL, online at http://www.milliondollarlist.org) is a compilation of publicly announced charitable donations of $1 million or more from across the United States since 2000; as of December 2016, the database contains close to 80,000 gifts made by U.S. individuals, corporations, foundations, and other grant-making nonprofit organizations. This paper discusses the unique value of the Million Dollar List and provides unique insights to key questions such as: How does distance affect giving? How do networks impact million-dollar-plus gifts? Understanding the geospatial and temporal dimensions of philanthropy can assist researchers and policymakers to better understand the role of private funding in innovation and discovery. Moreover, the results from the paper emphasize the importance of philanthropy for fueling research and development in science, the arts, environment, and health. The paper also includes the limitations of the presented analyses and promising future work.",2017-05-26 +27591082,HAPRAP: a haplotype-based iterative method for statistical fine mapping using GWAS summary statistics.,"

Motivation

Fine mapping is a widely used approach for identifying the causal variant(s) at disease-associated loci. Standard methods (e.g. multiple regression) require individual level genotypes. Recent fine mapping methods using summary-level data require the pairwise correlation coefficients ([Formula: see text]) of the variants. However, haplotypes rather than pairwise [Formula: see text], are the true biological representation of linkage disequilibrium (LD) among multiple loci. In this article, we present an empirical iterative method, HAPlotype Regional Association analysis Program (HAPRAP), that enables fine mapping using summary statistics and haplotype information from an individual-level reference panel.

Results

Simulations with individual-level genotypes show that the results of HAPRAP and multiple regression are highly consistent. In simulation with summary-level data, we demonstrate that HAPRAP is less sensitive to poor LD estimates. In a parametric simulation using Genetic Investigation of ANthropometric Traits height data, HAPRAP performs well with a small training sample size (N < 2000) while other methods become suboptimal. Moreover, HAPRAP's performance is not affected substantially by single nucleotide polymorphisms (SNPs) with low minor allele frequencies. We applied the method to existing quantitative trait and binary outcome meta-analyses (human height, QTc interval and gallbladder disease); all previous reported association signals were replicated and two additional variants were independently associated with human height. Due to the growing availability of summary level data, the value of HAPRAP is likely to increase markedly for future analyses (e.g. functional prediction and identification of instruments for Mendelian randomization).

Availability and implementation

The HAPRAP package and documentation are available at http://apps.biocompute.org.uk/haprap/ CONTACT: : jie.zheng@bristol.ac.uk or tom.gaunt@bristol.ac.ukSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-01 +28520403,"Integrated Strategy for Unknown EI-MS Identification Using Quality Control Calibration Curve, Multivariate Analysis, EI-MS Spectral Database, and Retention Index Prediction.","Compound identification using unknown electron ionization (EI) mass spectra in gas chromatography coupled with mass spectrometry (GC-MS) is challenging in untargeted metabolomics, natural product chemistry, or exposome research. While the total count of EI-MS records included in publicly or commercially available databases is over 900 000, efficient use of this huge database has not been achieved in metabolomics. Therefore, we proposed a ""four-step"" strategy for the identification of biologically significant metabolites using an integrated cheminformatics approach: (i) quality control calibration curve to reduce background noise, (ii) variable selection by hypothesis testing in principal component analysis for the efficient selection of target peaks, (iii) searching the EI-MS spectral database, and (iv) retention index (RI) filtering in combination with RI predictions. In this study, the new MS-FINDER spectral search engine was developed and utilized for searching EI-MS databases using mass spectral similarity with the evaluation of false discovery rate. Moreover, in silico derivatization software, MetaboloDerivatizer, was developed to calculate the chemical properties of derivative compounds, and all retention indexes in EI-MS databases were predicted using a simple mathematical model. The strategy was showcased in the identification of three novel metabolites (butane-1,2,3-triol, 3-deoxyglucosone, and palatinitol) in Chinese medicine Senkyu for quality assessment, as validated using authentic standard compounds. All tools and curated public EI-MS databases are freely available in the 'Computational MS-based metabolomics' section of the RIKEN PRIMe Web site ( http://prime.psc.riken.jp ).",2017-05-26 +28290805,[The Prediction Model of Cardiovascular Events Among the Russian Population: Methodological Aspects].,"Modeling is the common approach for predicting not only the population health, but also the social and economic burden of disease, which is an important argument while making decisions in health care and prevention.

Aim

To develop the model for predicting cardiovascular risk, applicable for the assessment of clinical and socio-economic effects of preventive and therapeutic actions at the level of the whole population or part (region, city, group of patients).

Material and methods

An analytical model for making decision was performed by using a Markov model consisting of Markov states and probabilities of transition from one state to another within a certain time interval. The model included risk factors and cardiovascular diseases (blood pressure, cholesterol, smoking) and probabilities of transition between them. Data was standardized by age for both males and females. Multivariate sensitivity analysis was performed. The literature search conducted using eLIBRARY.RU (http://elibrary.ru) and CyberLeninka (http://cyberleninka.ru). Consultations with experts in the field of coronary heart disease, stroke, heart failure were carried out.

Results

The model, allowing to compare the outcomes of two scenarios (absence/presence of intervention). The model included risk factors: arterial hypertension, smoking, hypercholesterolemia, and important CVD: coronary artery disease, myocardial infarction, unstable angina, heart failure, chronic heart failure after myocardial infarction, transient ischemic attack, stroke, atrial fibrillation. There was absorbent state - death. At the output from the model the patient state was defined as the sum of the Markov states characteristics during the model time horizon. Each result had the cost and outcome, which values could be calculated by simulation modeling (""cohort simulation""). The data analysis from prospective study had shown that mortality increases with age, as expected, but in different age groups impact of cardiovascular causes was different and declined with age. In the case of the blood pressure there was the expected increase of the death risk with the growth of pressure levels, both for males and females, except for males 60-64 years old who had a minimal risk of death at the blood pressure 140-149/90-99 mmHg, and among males with normal blood pressure the risk was higher. Smoking was associated with an expected increase of the death risk among all age groups in both sexes. In males, aged 40-64 years, the death risk was higher at the normal levels of cholesterol (2-5 mmol/l), than at the cholesterol levels equal 5-7 mmol/l. There were no data sources to assess probability of occurrence of the risk factors (hypertension, smoking, hypercholesterolemia) in patients who did not have these factors previously in our studies, and available literature. This requires the prospective studies on at least two slices of surveys (not just with the endpoint analysis). Analysis of the literature on search of prospective Russian studies that would evaluate the probability of transition from one state to another, and consultations with experts have identified that currently conducted studies do not provide all the necessary probability of transition on the basis of national data. In the absence of local data for the model is acceptable to use the results of meta-analyzes of international studies.

Conclusion

Markov model will allow for prediction the effectiveness of different interventions, including their socio-economic consequences. The created model will allow in the future to make changes with the appearance of the results of new studies or new data in order to improve modeling accuracy.",2016-12-01 +28108451,GWAR: robust analysis and meta-analysis of genome-wide association studies.,"

Motivation

In the context of genome-wide association studies (GWAS), there is a variety of statistical techniques in order to conduct the analysis, but, in most cases, the underlying genetic model is usually unknown. Under these circumstances, the classical Cochran-Armitage trend test (CATT) is suboptimal. Robust procedures that maximize the power and preserve the nominal type I error rate are preferable. Moreover, performing a meta-analysis using robust procedures is of great interest and has never been addressed in the past. The primary goal of this work is to implement several robust methods for analysis and meta-analysis in the statistical package Stata and subsequently to make the software available to the scientific community.

Results

The CATT under a recessive, additive and dominant model of inheritance as well as robust methods based on the Maximum Efficiency Robust Test statistic, the MAX statistic and the MIN2 were implemented in Stata. Concerning MAX and MIN2, we calculated their asymptotic null distributions relying on numerical integration resulting in a great gain in computational time without losing accuracy. All the aforementioned approaches were employed in a fixed or a random effects meta-analysis setting using summary data with weights equal to the reciprocal of the combined cases and controls. Overall, this is the first complete effort to implement procedures for analysis and meta-analysis in GWAS using Stata.

Availability and implementation

A Stata program and a web-server are freely available for academic users at http://www.compgen.org/tools/GWAR.

Contact

pbagos@compgen.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +25988315,"ImmuSort, a database on gene plasticity and electronic sorting for immune cells.","Gene expression is highly dynamic and plastic. We present a new immunological database, ImmuSort. Unlike other gene expression databases, ImmuSort provides a convenient way to view global differential gene expression data across thousands of experimental conditions in immune cells. It enables electronic sorting, which is a bioinformatics process to retrieve cell states associated with specific experimental conditions that are mainly based on gene expression intensity. A comparison of gene expression profiles reveals other applications, such as the evaluation of immune cell biomarkers and cell subsets, identification of cell specific and/or disease-associated genes or transcripts, comparison of gene expression in different transcript variants and probe set quality evaluation. A plasticity score is introduced to measure gene plasticity. Average rank and marker evaluation scores are used to evaluate biomarkers. The current version includes 31 human and 17 mouse immune cell groups, comprising 10,422 and 3,929 microarrays derived from public databases, respectively. A total of 20,283 human and 20,963 mouse genes are available to query in the database. Examples show the distinct advantages of the database. The database URL is http://202.85.212.211/Account/ImmuSort.html.",2015-05-19 +27993787,Improved VCF normalization for accurate VCF comparison.,"

Motivation

The Variant Call Format (VCF) is widely used to store data about genetic variation. Variant calling workflows detect potential variants in large numbers of short sequence reads generated by DNA sequencing and report them in VCF format. To evaluate the accuracy of variant callers, it is critical to correctly compare their output against a reference VCF file containing a gold standard set of variants. However, comparing VCF files is a complicated task as an individual genomic variant can be represented in several different ways and is therefore not necessarily reported in a unique way by different software.

Results

We introduce a VCF normalization method called Best Alignment Normalisation (BAN) that results in more accurate VCF file comparison. BAN applies all the variations in a VCF file to the reference genome to create a sample genome, and then recalls the variants by aligning this sample genome back with the reference genome. Since the purpose of BAN is to get an accurate result at the time of VCF comparison, we define a better normalization method as the one resulting in less disagreement between the outputs of different VCF comparators.

Availability and implementation

The BAN Linux bash script along with required software are publicly available on https://sites.google.com/site/banadf16.

Contact

A.Bayat@unsw.edu.au.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +34179267,Measurement of Arabidopsis thaliana Plant Traits Using the PHENOPSIS Phenotyping Platform.,"High-throughput phenotyping of plant traits is a powerful tool to further our understanding of plant growth and its underlying physiological, molecular, and genetic determinisms. This protocol describes the methodology of a standard phenotyping experiment in PHENOPSIS automated platform, which was engineered in INRA-LEPSE (https://www6.montpellier.inra.fr/lepse) and custom-made by Optimalog company. The seminal method was published by Granier et al. (2006). The platform is used to explore and test various ecophysiological hypotheses (Tisné et al., 2010; Baerenfaller et al., 2012; Vile et al., 2012; Bac-Molenaar et al., 2015; Rymaszewski et al., 2017). Here, the focus concerns the preparation and management of experiments, as well as measurements of growth-related traits (e.g., projected rosette area, total leaf area and growth rate), water status-related traits (e.g., leaf dry matter content and relative water content), and plant architecture-related traits (e.g., stomatal density and index and lamina/petiole ratio). Briefly, a completely randomized (block) design is set up in the growth chamber. Next, the substrate is prepared, its initial water content is measured and pots are filled. Seeds are sown onto the soil surface and germinated prior to the experiment. After germination, soil watering and image (visible, infra-red, fluorescence) acquisition are planned by the user and performed by the automaton. Destructive measurements may be performed during the experiment. Data extraction from images and estimation of growth-related trait values involves semi-automated procedures and statistical processing.",2018-02-20 +25399406,Toxicogenomics directory of chemically exposed human hepatocytes.,"A long-term goal of numerous research projects is to identify biomarkers for in vitro systems predicting toxicity in vivo. Often, transcriptomics data are used to identify candidates for further evaluation. However, a systematic directory summarizing key features of chemically influenced genes in human hepatocytes is not yet available. To bridge this gap, we used the Open TG-GATES database with Affymetrix files of cultivated human hepatocytes incubated with chemicals, further sets of gene array data with hepatocytes from human donors generated in this study, and publicly available genome-wide datasets of human liver tissue from patients with non-alcoholic steatohepatitis (NASH), cirrhosis, and hepatocellular cancer (HCC). After a curation procedure, expression data of 143 chemicals were included into a comprehensive biostatistical analysis. The results are summarized in the publicly available toxicotranscriptomics directory ( http://wiki.toxbank.net/toxicogenomics-map/ ) which provides information for all genes whether they are up- or downregulated by chemicals and, if yes, by which compounds. The directory also informs about the following key features of chemically influenced genes: (1) Stereotypical stress response. When chemicals induce strong expression alterations, this usually includes a complex but highly reproducible pattern named 'stereotypical response.' On the other hand, more specific expression responses exist that are induced only by individual compounds or small numbers of compounds. The directory differentiates if the gene is part of the stereotypical stress response or if it represents a more specific reaction. (2) Liver disease-associated genes. Approximately 20 % of the genes influenced by chemicals are up- or downregulated, also in liver disease. Liver disease genes deregulated in cirrhosis, HCC, and NASH that overlap with genes of the aforementioned stereotypical chemical stress response include CYP3A7, normally expressed in fetal liver; the phase II metabolizing enzyme SULT1C2; ALDH8A1, known to generate the ligand of RXR, one of the master regulators of gene expression in the liver; and several genes involved in normal liver functions: CPS1, PCK1, SLC2A2, CYP8B1, CYP4A11, ABCA8, and ADH4. (3) Unstable baseline genes. The process of isolating and the cultivation of hepatocytes was sufficient to induce some stress leading to alterations in the expression of genes, the so-called unstable baseline genes. (4) Biological function. Although more than 2,000 genes are transcriptionally influenced by chemicals, they can be assigned to a relatively small group of biological functions, including energy and lipid metabolism, inflammation and immune response, protein modification, endogenous and xenobiotic metabolism, cytoskeletal organization, stress response, and DNA repair. In conclusion, the introduced toxicotranscriptomics directory offers a basis for a rationale choice of candidate genes for biomarker evaluation studies and represents an easy to use source of background information on chemically influenced genes.",2014-11-16 +28168870,Annotation of functional impact of voltage-gated sodium channel mutations.,"Voltage-gated sodium channels are pore-forming transmembrane proteins that selectively allow sodium ions to flow across the plasma membrane according to the electro-chemical gradient thus mediating the rising phase of action potentials in excitable cells and playing key roles in physiological processes such as neurotransmission, skeletal muscle contraction, heart rhythm, and pain sensation. Genetic variations in the nine human genes encoding these channels are known to cause a large range of diseases affecting the nervous and cardiac systems. Understanding the molecular effect of genetic variations is critical for elucidating the pathologic mechanisms of known variations and in predicting the effect of newly discovered ones. To this end, we have created a Web-based tool, the Ion Channels Variants Portal, which compiles all variants characterized functionally in the human sodium channel genes. This portal describes 672 variants each associated with at least one molecular or clinical phenotypic impact, for a total of 4,658 observations extracted from 264 different research articles. These data were captured as structured annotations using standardized vocabularies and ontologies, such as the Gene Ontology and the Ion Channel ElectroPhysiology Ontology. All these data are available to the scientific community via neXtProt at https://www.nextprot.org/portals/navmut.",2017-02-28 +25262355,NrichD database: sequence databases enriched with computationally designed protein-like sequences aid in remote homology detection.,"NrichD (http://proline.biochem.iisc.ernet.in/NRICHD/) is a database of computationally designed protein-like sequences, augmented into natural sequence databases that can perform hops in protein sequence space to assist in the detection of remote relationships. Establishing protein relationships in the absence of structural evidence or natural 'intermediately related sequences' is a challenging task. Recently, we have demonstrated that the computational design of artificial intermediary sequences/linkers is an effective approach to fill naturally occurring voids in protein sequence space. Through a large-scale assessment we have demonstrated that such sequences can be plugged into commonly employed search databases to improve the performance of routinely used sequence search methods in detecting remote relationships. Since it is anticipated that such data sets will be employed to establish protein relationships, two databases that have already captured these relationships at the structural and functional domain level, namely, the SCOP database and the Pfam database, have been 'enriched' with these artificial intermediary sequences. NrichD database currently contains 3,611,010 artificial sequences that have been generated between 27,882 pairs of families from 374 SCOP folds. The data sets are freely available for download. Additional features include the design of artificial sequences between any two protein families of interest to the user.",2014-09-27 +24855436,Polytraits: A database on biological traits of marine polychaetes.,"The study of ecosystem functioning - the role which organisms play in an ecosystem - is becoming increasingly important in marine ecological research. The functional structure of a community can be represented by a set of functional traits assigned to behavioural, reproductive and morphological characteristics. The collection of these traits from the literature is however a laborious and time-consuming process, and gaps of knowledge and restricted availability of literature are a common problem. Trait data are not yet readily being shared by research communities, and even if they are, a lack of trait data repositories and standards for data formats leads to the publication of trait information in forms which cannot be processed by computers. This paper describes Polytraits (http://polytraits.lifewatchgreece.eu), a database on biological traits of marine polychaetes (bristle worms, Polychaeta: Annelida). At present, the database contains almost 20,000 records on morphological, behavioural and reproductive characteristics of more than 1,000 marine polychaete species, all referenced by literature sources. All data can be freely accessed through the project website in different ways and formats, both human-readable and machine-readable, and have been submitted to the Encyclopedia of Life for archival and integration with trait information from other sources.",2014-01-17 +28263273,Hospital Administration and Nursing Leadership in Disasters: An Exploratory Study Using Concept Mapping.,"Strong leadership is critical in disaster situations when ""patient surge"" challenges a hospital's capacity to respond and normally acceptable patterns of care are disrupted. Activation of the emergency operations plan triggers an incident command system structure for leadership decision making. Yet, implementation of the emergency operations plan and incident command system protocols is ultimately subject to nursing and hospital leadership at the service- and unit level. The results of these service-/unit-based leadership decisions have the potential to directly impact staff and patient safety, quality of care, and ultimately, patient outcomes. Despite the critical nature of these events, nurse leaders and administrators receive little education regarding leadership and decision making during disaster events. The purpose of this study is to identify essential competencies of nursing and hospital administrators' leadership during disaster events. An integrative mixed-methods design combining qualitative and quantitative approaches to data collection and analysis was used. Five focus groups were conducted with nurse leaders and hospital administrators at a large urban hospital in the Northeastern United States in a collaborative group process to generate relevant leadership competencies. Concept Systems Incorporated was used to sort, prioritize, and analyze the data (http://conceptsystemsinc.com/). The results suggest that participants' institutional knowledge (of existing resources, communications, processes) and prior disaster experience increase leadership competence.",2017-04-01 +24559061,An online tool for mapping insecticide resistance in major Anopheles vectors of human malaria parasites and review of resistance status for the Afrotropical region.,"

Background

Malaria control programmes across Africa and beyond are facing increasing insecticide resistance in the major anopheline vectors. In order to preserve or prolong the effectiveness of the main malaria vector interventions, up-to-date and easily accessible insecticide resistance data that are interpretable at operationally-relevant scales are critical. Herein we introduce and demonstrate the usefulness of an online mapping tool, IR Mapper.

Methods

A systematic search of published, peer-reviewed literature was performed and Anopheles insecticide susceptibility and resistance mechanisms data were extracted and added to a database after a two-level verification process. IR Mapper ( http://www.irmapper.com) was developed using the ArcGIS for JavaScript Application Programming Interface and ArcGIS Online platform for exploration and projection of these data.

Results

Literature searches yielded a total of 4,084 susceptibility data points for 1,505 populations, and 2,097 resistance mechanisms data points for 1,000 populations of Anopheles spp. tested via recommended WHO methods from 54 countries between 1954 and 2012. For the Afrotropical region, data were most abundant for populations of An. gambiae, and pyrethroids and DDT were more often used in susceptibility assays (51.1 and 26.8% of all reports, respectively) than carbamates and organophosphates. Between 2001 and 2012, there was a clear increase in prevalence and distribution of confirmed resistance of An. gambiae s.l. to pyrethroids (from 41 to 87% of the mosquito populations tested) and DDT (from 64 to 91%) throughout the Afrotropical region. Metabolic resistance mechanisms were detected in western and eastern African populations and the two kdr mutations (L1014S and L1014F) were widespread. For An. funestus s.l., relatively few populations were tested, although in 2010-2012 resistance was reported in 50% of 10 populations tested. Maps are provided to illustrate the use of IR Mapper and the distribution of insecticide resistance in malaria vectors in Africa.

Conclusions

The increasing pyrethroid and DDT resistance in Anopheles in the Afrotropical region is alarming. Urgent attention should be afforded to testing An. funestus populations especially for metabolic resistance mechanisms. IR Mapper is a useful tool for investigating temporal and spatial trends in Anopheles resistance to support the pragmatic use of insecticidal interventions.",2014-02-21 +27045824,Integration and Querying of Genomic and Proteomic Semantic Annotations for Biomedical Knowledge Extraction.,"Understanding complex biological phenomena involves answering complex biomedical questions on multiple biomolecular information simultaneously, which are expressed through multiple genomic and proteomic semantic annotations scattered in many distributed and heterogeneous data sources; such heterogeneity and dispersion hamper the biologists' ability of asking global queries and performing global evaluations. To overcome this problem, we developed a software architecture to create and maintain a Genomic and Proteomic Knowledge Base (GPKB), which integrates several of the most relevant sources of such dispersed information (including Entrez Gene, UniProt, IntAct, Expasy Enzyme, GO, GOA, BioCyc, KEGG, Reactome, and OMIM). Our solution is general, as it uses a flexible, modular, and multilevel global data schema based on abstraction and generalization of integrated data features, and a set of automatic procedures for easing data integration and maintenance, also when the integrated data sources evolve in data content, structure, and number. These procedures also assure consistency, quality, and provenance tracking of all integrated data, and perform the semantic closure of the hierarchical relationships of the integrated biomedical ontologies. At http://www.bioinformatics.deib.polimi.it/GPKB/, a Web interface allows graphical easy composition of queries, although complex, on the knowledge base, supporting also semantic query expansion and comprehensive explorative search of the integrated data to better sustain biomedical knowledge extraction.",2016-03-01 +22135352,A Bayesian antedependence model for whole genome prediction.,"Hierarchical mixed effects models have been demonstrated to be powerful for predicting genomic merit of livestock and plants, on the basis of high-density single-nucleotide polymorphism (SNP) marker panels, and their use is being increasingly advocated for genomic predictions in human health. Two particularly popular approaches, labeled BayesA and BayesB, are based on specifying all SNP-associated effects to be independent of each other. BayesB extends BayesA by allowing a large proportion of SNP markers to be associated with null effects. We further extend these two models to specify SNP effects as being spatially correlated due to the chromosomally proximal effects of causal variants. These two models, that we respectively dub as ante-BayesA and ante-BayesB, are based on a first-order nonstationary antedependence specification between SNP effects. In a simulation study involving 20 replicate data sets, each analyzed at six different SNP marker densities with average LD levels ranging from r(2) = 0.15 to 0.31, the antedependence methods had significantly (P < 0.01) higher accuracies than their corresponding classical counterparts at higher LD levels (r(2) > 0. 24) with differences exceeding 3%. A cross-validation study was also conducted on the heterogeneous stock mice data resource (http://mus.well.ox.ac.uk/mouse/HS/) using 6-week body weights as the phenotype. The antedependence methods increased cross-validation prediction accuracies by up to 3.6% compared to their classical counterparts (P < 0.001). Finally, we applied our method to other benchmark data sets and demonstrated that the antedependence methods were more accurate than their classical counterparts for genomic predictions, even for individuals several generations beyond the training data.",2011-11-30 +26821470,Summary information of human health hazard assessment of existing chemical substances (I).,"Under the Chemical Substances Control Law (CSCL) in Japan, initial hazard information tor existing chemical substances has been collected by the Ministry of Health, Labour and Welfare, Japan (MHLW) to assess potential initial risks to human health. We have reviewed all collected toxicity information pertaining to acute toxicity, repeated dose toxicity, genotoxicity, and/or reproductive/developmental toxicity and performed hazard assessments. Approximately 150 substances are currently undergoing review and assessment. For clarification and evaluation of each toxicity study, we have created a dossier (a collection of study data containing a detailed summary of the methods, results, and conclusions of each study) in English using the International Uniform Chemical Information Database (IUCLID) version 5. The IUCLID dossier format is widely used and has been accepted as one of the most beneficial formats for providing summarized chemical substance toxicity assessments. In this report, as a contribution to our ongoing hazard assessment activity, we present summary hazard information related to the potential human health effects of the following 5 chemical substances: 4-chlorobenzoyl chloride (CAS: 122-01-0); benzenesulfonic acid, 4-hydroxy-, tin (2+) salt (CAS: 70974- 33-3); chlorocyclohexane (CAS: 542-18-7); 1,3-cyclohexanedimethanamine (CAS: 2579-20-6); and 1,3,5-triazine-2,4,6 (1H,3H,5H) -trithione (CAS: 638-16-4). The IUCLID dossiers created for these 5 chemical substances will be made available via the Japan Existing Chemical Data Base (JECDB) at . Additional human health hazard information on existing chemical substances will be provided using the same methodology and website when it is available.",2015-01-01 +28555482,Neonatal mortality in East Africa and West Africa: a geographic analysis of district-level demographic and health survey data.,"Under-five child mortality declined 47% since 2000 following the implementation of the United Nation's (UN) Millennium Development Goals. To further reduce under-five child mortality, the UN's Sustainable Development Goals (SDGs) will focus on interventions to address neonatal mortality, a major contributor of under-five mortality. The African region has the highest neonatal mortality rate (28.0 per 1000 live births), followed by that of the Eastern Mediterranean (26.6) and South-East Asia (24.3). This study used the Demographic and Health Survey Birth Recode data (http://dhsprogram.com/data/File-Types-and-Names.cfm) to identify high-risk districts and countries for neonatal mortality in two sub-regions of Africa - East Africa and West Africa. Geographically weighted Poisson regression models were estimated to capture the spatially varying relationships between neonatal mortality and dimensions of potential need i) care around the time of delivery, ii) maternal education, and iii) women's empowerment. In East Africa, neonatal mortality was significantly associated with home births, mothers without an education and mothers whose husbands decided on contraceptive practices, controlling for rural residency. In West Africa, neonatal mortality was also significantly associated with home births, mothers with a primary education and mothers who did not want or plan their last child. Importantly, neonatal mortality associated with home deliveries were explained by maternal exposure to unprotected water sources in East Africa and older maternal age and female sex of infants in West Africa. Future SDG-interventions may target these dimensions of need in priority high-risk districts and countries, to further reduce the burden of neonatal mortality in Africa.",2017-05-26 +28809811,Mutation Clusters from Cancer Exome. ,"We apply our statistically deterministic machine learning/clustering algorithm *K-means (recently developed in https://ssrn.com/abstract=2908286) to 10,656 published exome samples for 32 cancer types. A majority of cancer types exhibit a mutation clustering structure. Our results are in-sample stable. They are also out-of-sample stable when applied to 1389 published genome samples across 14 cancer types. In contrast, we find in- and out-of-sample instabilities in cancer signatures extracted from exome samples via nonnegative matrix factorization (NMF), a computationally-costly and non-deterministic method. Extracting stable mutation structures from exome data could have important implications for speed and cost, which are critical for early-stage cancer diagnostics, such as novel blood-test methods currently in development.",2017-08-15 +24991954,CORTECON: a temporal transcriptome analysis of in vitro human cerebral cortex development from human embryonic stem cells.,"Many neurological and psychiatric disorders affect the cerebral cortex, and a clearer understanding of the molecular processes underlying human corticogenesis will provide greater insight into such pathologies. To date, knowledge of gene expression changes accompanying corticogenesis is largely based on murine data. Here we present a searchable, comprehensive, temporal gene expression data set encompassing cerebral cortical development from human embryonic stem cells (hESCs). Using a modified differentiation protocol that yields neurons suggestive of prefrontal cortex, we identified sets of genes and long noncoding RNAs that significantly change during corticogenesis and those enriched for disease-associations. Numerous alternatively spliced genes with varying temporal patterns of expression are revealed, including TGIF1, involved in holoprosencephaly, and MARK1, involved in autism. We have created a database (http://cortecon.neuralsci.org/) that provides online, query-based access to changes in RNA expression and alternatively spliced transcripts during human cortical development.",2014-07-01 +26225242,PhIN: A Protein Pharmacology Interaction Network Database.,"Network pharmacology is a new and hot concept in drug discovery for its ability to investigate the complexity of polypharmacology, and becomes more and more important in drug development. Here we report a protein pharmacology interaction network database (PhIN), aiming to assist multitarget drug discovery by providing comprehensive and flexible network pharmacology analysis. Overall, PhIN contains 1,126,060 target-target interaction pairs in terms of shared compounds and 3,428,020 pairs in terms of shared scaffolds, which involve 12,419,700 activity data, 9,414 targets, 314 viral targets, 652 pathways, 1,359,400 compounds, and 309,556 scaffolds. Using PhIN, users can obtain interacting target networks within or across human pathways, between human and virus, by defining the number of shared compounds or scaffolds under an activity cutoff. We expect PhIN to be a useful tool for multitarget drug development. PhIN is freely available at http://cadd.pharmacy.nankai.edu.cn/phin/.",2015-03-18 +27590176,BESSiE: a software for linear model BLUP and Bayesian MCMC analysis of large-scale genomic data.,"

Background

The advent of genomic marker data has triggered the development of various Bayesian algorithms for estimation of marker effects, but software packages implementing these algorithms are not readily available, or are limited to a single algorithm, uni-variate analysis or a limited number of factors. Moreover, script based environments like R may not be able to handle large-scale genomic data or exploit model properties which save computing time or memory (RAM).

Results

BESSiE is a software designed for best linear unbiased prediction (BLUP) and Bayesian Markov chain Monte Carlo analysis of linear mixed models allowing for continuous and/or categorical multivariate, repeated and missing observations, various random and fixed factors and large-scale genomic marker data. BESSiE covers the algorithms genomic BLUP, single nucleotide polymorphism (SNP)-BLUP, BayesA, BayesB, BayesC[Formula: see text] and BayesR for estimating marker effects and/or summarised genomic values. BESSiE is parameter file driven, command line operated and available for Linux environments. BESSiE executable, manual and a collection of examples can be downloaded http://turing.une.edu.au/~agbu-admin/BESSiE/ .

Conclusion

BESSiE allows the user to compare several different Bayesian and BLUP algorithms for estimating marker effects from large data sets in complex models with the same software by small alterations in the parameter file. The program has no hard-coded limitations for number of factors, observations or genetic markers.",2016-09-02 +24408216,tbvar: A comprehensive genome variation resource for Mycobacterium tuberculosis.,"Mycobacterium tuberculosis, along with closely related species, commonly known as M. tuberculosis complex (MTBC), causes tuberculosis in humans and other organisms. Tuberculosis is a disease with high morbidity and mortality, especially in the third world. The genetic variability between clinical isolates of MTBC has been poorly understood, although recent years have seen the re-sequencing of a large number of clinical isolates of MTBC from around the world. The availability of genomic data of multiple isolates in public domain would potentially offer a unique opportunity toward understanding the variome of the organism and the functional consequences of the variations. This nevertheless has been limited by the lack of systematic curation and analysis of data sets available in public domain. In this report, we have re-analyzed re-sequencing data sets corresponding to >450 isolates of MTBC available in public domain to create a comprehensive variome map of MTBC comprising >29 000 single nucleotide variations. Using a systematic computational pipeline, we have annotated potential functional variants and drug-resistance-associated variants from the variome. We have made available this data set as a searchable database. Apart from a user-friendly interface, the database also has a novel option to annotate variants from clinical re-sequencing data sets of MTBC. To the best of our knowledge, tbvar is the largest and most comprehensive genome variation resources for MTBC. Database URL: http://genome.igib.res.in/tbvar/",2014-01-09 +25849373,Systematic functional profiling of transcription factor networks in Cryptococcus neoformans.,"Cryptococcus neoformans causes life-threatening meningoencephalitis in humans, but its overall biological and pathogenic regulatory circuits remain elusive, particularly due to the presence of an evolutionarily divergent set of transcription factors (TFs). Here, we report the construction of a high-quality library of 322 signature-tagged gene-deletion strains for 155 putative TF genes previously predicted using the DNA-binding domain TF database, and examine their in vitro and in vivo phenotypic traits under 32 distinct growth conditions. At least one phenotypic trait is exhibited by 145 out of 155 TF mutants (93%) and ∼85% of them (132/155) are functionally characterized for the first time in this study. The genotypic and phenotypic data for each TF are available in the C. neoformans TF phenome database (http://tf.cryptococcus.org). In conclusion, our phenome-based functional analysis of the C. neoformans TF mutant library provides key insights into transcriptional networks of basidiomycetous fungi and human fungal pathogens.",2015-04-07 +29036499,VCF-Explorer: filtering and analysing whole genome VCF files.,"

Summary

The decreasing cost in high-throughput technologies led to a number of sequencing projects consisting of thousands of whole genomes. The paradigm shift from exome to whole genome brings a significant increase in the size of output files. Most of the existing tools which are developed to analyse exome files are not adequate for larger VCF files produced by whole genome studies. In this work we present VCF-Explorer, a variant analysis software capable of handling large files. Memory efficiency and avoiding computationally costly pre-processing step enable to carry out the analysis to be performed with ordinary computers. VCF-Explorer provides an easy to use environment where users can define various types of queries based on variant and sample genotype level annotations. VCF-Explorer can be run in different environments and computational platforms ranging from a standard laptop to a high performance server.

Availability and implementation

VCF-Explorer is freely available at: http://vcfexplorer.sourceforge.net/.

Contact

mete.akgun@tubitak.gov.tr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +25413662,Curve matching: a data-driven technique to improve individual prediction of childhood growth.,"Longitudinal growth data are valuable for predicting and interpreting future growth of individual children. This note explores the idea of 'curve matching', a new technique to improve prediction of future growth of an individual child. The key idea is to find existing children in existing databases that are similar to the current child. The growth patterns of the matched children suggest how the current child might evolve in the future. This paper describes the various conceptual and practical issues that need to be addressed before the idea can take off. A demo implementation is available at http://vps.stefvanbuuren.nl:3838/frisodemo/.",2014-11-18 +22096234,dbDEPC 2.0: updated database of differentially expressed proteins in human cancers.,"A large amount of differentially expressed proteins (DEPs) have been identified in various cancer proteomics experiments, curation and annotation of these proteins are important in deciphering their roles in oncogenesis and tumor progression, and may further help to discover potential protein biomarkers for clinical applications. In 2009, we published the first database of DEPs in human cancers (dbDEPCs). In this updated version of 2011, dbDEPC 2.0 has more than doubly expanded to over 4000 protein entries, curated from 331 experiments across 20 types of human cancers. This resource allows researchers to search whether their interested proteins have been reported changing in certain cancers, to compare their own proteomic discovery with previous studies, to picture selected protein expression heatmap across multiple cancers and to relate protein expression changes with aberrance in other genetic level. New important developments include addition of experiment design information, advanced filter tools for customer-specified analysis and a network analysis tool. We expect dbDEPC 2.0 to be a much more powerful tool than it was in its first release and can serve as reference to both proteomics and cancer researchers. dbDEPC 2.0 is available at http://lifecenter.sgst.cn/dbdepc/index.do.",2011-11-16 +29506120,Granulosa cells from human primordial and primary follicles show differential global gene expression profiles.,"

Study question

Can novel genetic candidates involved in follicle dormancy, activation and integrity be identified from transcriptomic profiles of isolated granulosa cells from human primordial and primary follicles?

Summary answer

The granulosa cell compartment of the human primordial and primary follicle was extensively enriched in signal transducer and activator of transcription 3 (STAT3) and cAMP-response element binding protein (CREB) signalling, and several other putative signalling pathways that may also be mediators of follicle growth and development were identified.

What is known already

Mechanistic target of rapamycin kinase (mTOR) signalling and the factors Forkhead Box L2 (FOXL2) and KIT proto-oncogene receptor tyrosine kinase (KITL) may be involved in defining the early steps of mammalian follicular recruitment through complex bidirectional signalling between the oocyte and granulosa cells. cAMP/protein kinase K (PKA)/CREB signalling is a feature of FSH-induced regulation of granulosa cell steroidogenesis that is essential to normal human fertility.

Study design, size, duration

A class comparison study was carried out on primordial follicles (n = 539 follicles) and primary follicles (n = 261) follicles) donated by three women having ovarian tissue cryopreserved before chemotherapy.

Participants/materials, setting, methods

RNA samples from isolates of laser capture micro-dissected oocytes and follicles from the primordial and primary stage, respectively, were sequenced on the HiSeq Illumina platform. Data mapping, quality control, filtering, FPKM (fragments per kilobase of exon per million) normalization and comparisons were performed. The granulosa cell contribution in whole follicle isolates was extracted in silico. Modelling of complex biological systems was performed using Ingenuity Pathway Analysis (IPA). For validation of transcriptomic findings, we performed quantitative RT-PCR of selected candidate genes. Furthermore, we interrogated the in situ localization of selected corresponding proteins using immunofluorescence.

Main results and the role of chance

Our differentially expressed gene analysis revealed a number of transcripts in the granulosa cells to be significantly down- (736 genes) or up- (294 genes) regulated during the human primordial-to-primary follicle transition. The IPA analysis revealed enriched canonical signalling pathways not previously associated with granulosa cells from human primordial and primary follicles. Immunofluorescent staining of human ovarian tissue explored the intra-ovarian localization of FOG2, and FOXL2, which revealed the presence of forkhead box L2 (FOXL2) in both oocytes and granulosa cells in primary follicles, with a more enriched staining in the granulosa cells in primary follicles. Friend of GATA 2 (FOG2) stained strongly in oocytes in primordial follicles, with a shift towards granulosa cell as follicle stage advanced.

Large scale data

http://users-birc.au.dk/biopv/published_data/ernst_et_al_GC_2017/.

Limitations reasons for caution

This is a descriptive study, and no functional assays were employed. The study was based on a limited number of patients, and it is acknowledged that natural biological variance exists in human samples. Strict filters were applied to accommodate the in silico extraction of the granulosa cell contribution. In support of this, quantitative RT-PCR was used to confirm selected candidate genes, and immunofluorescent staining was employed to interrogate the intra-ovarian distribution of selected corresponding proteins. Moreover, it is unknown whether the primordial follicles analysed represent those still in the resting pool, or those from the cohort that have entered the growing pool.

Wider implications of the findings

We present, for the first time, a detailed description of global gene activity in the human granulosa cell compartment of primordial and primary follicles. These results may be utilized in the development of novel clinical treatment strategies aimed at improving granulosa cell function.

Study funding/competing interest(s)

E.H.E. was supported by the Health Faculty, Aarhus University and Kong Christian Den Tiendes Fond. K.L.H. was supported by a grant from Fondens til Lægevidenskabens Fremme and Kong Christian Den Tiendes Fond. No authors have competing interests to declare.",2018-04-01 +26484227,Gene expression changes of single skeletal muscle fibers in response to modulation of the mitochondrial calcium uniporter (MCU).,"The mitochondrial calcium uniporter (MCU) gene codifies for the inner mitochondrial membrane (IMM) channel responsible for mitochondrial Ca(2 +) uptake. Cytosolic Ca(2 +) transients are involved in sarcomere contraction through cycles of release and storage in the sarcoplasmic reticulum. In addition cytosolic Ca(2 +) regulates various signaling cascades that eventually lead to gene expression reprogramming. Mitochondria are strategically placed in close contact with the ER/SR, thus cytosolic Ca(2 +) transients elicit large increases in the [Ca(2 +)] of the mitochondrial matrix ([Ca(2 +)]mt). Mitochondrial Ca(2 +) uptake regulates energy production and cell survival. In addition, we recently showed that MCU-dependent mitochondrial Ca(2 +) uptake controls skeletal muscle trophism. In the same report, we dissected the effects of MCU-dependent mitochondrial Ca(2 +) uptake on gene expression through microarray gene expression analysis upon modulation of MCU expression by in vivo AAV infection. Analyses were performed on single skeletal muscle fibers at two time points (7 and 14 days post-AAV injection). Raw and normalized data are available on the GEO database (http://www.ncbi.nlm.nih.gov/geo/) (GSE60931).",2015-05-30 +26400040,Xtalk: a path-based approach for identifying crosstalk between signaling pathways.,"

Motivation

Cells communicate with their environment via signal transduction pathways. On occasion, the activation of one pathway can produce an effect downstream of another pathway, a phenomenon known as crosstalk. Existing computational methods to discover such pathway pairs rely on simple overlap statistics.

Results

We present Xtalk, a path-based approach for identifying pairs of pathways that may crosstalk. Xtalk computes the statistical significance of the average length of multiple short paths that connect receptors in one pathway to the transcription factors in another. By design, Xtalk reports the precise interactions and mechanisms that support the identified crosstalk. We applied Xtalk to signaling pathways in the KEGG and NCI-PID databases. We manually curated a gold standard set of 132 crosstalking pathway pairs and a set of 140 pairs that did not crosstalk, for which Xtalk achieved an area under the receiver operator characteristic curve of 0.65, a 12% improvement over the closest competing approach. The area under the receiver operator characteristic curve varied with the pathway, suggesting that crosstalk should be evaluated on a pathway-by-pathway level. We also analyzed an extended set of 658 pathway pairs in KEGG and to a set of more than 7000 pathway pairs in NCI-PID. For the top-ranking pairs, we found substantial support in the literature (81% for KEGG and 78% for NCI-PID). We provide examples of networks computed by Xtalk that accurately recovered known mechanisms of crosstalk.

Availability and implementation

The XTALK software is available at http://bioinformatics.cs.vt.edu/~murali/software. Crosstalk networks are available at http://graphspace.org/graphs?tags=2015-bioinformatics-xtalk.

Contact

ategge@vt.edu, murali@cs.vt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-23 +29346336,"Respiratory Syncytial Virus Seasonality - United States, 2014-2017.","Respiratory syncytial virus (RSV) is a leading cause of lower respiratory tract infection in young children worldwide (1-3). In the United States, RSV infection results in >57,000 hospitalizations and 2 million outpatient visits each year among children aged <5 years (3). Recent studies have highlighted the importance of RSV in adults as well as children (4). CDC reported RSV seasonality nationally, by U.S. Department of Health and Human Services (HHS) regions* and for the state of Florida, using a new statistical method that analyzes polymerase chain reaction (PCR) laboratory detections reported to the National Respiratory and Enteric Virus Surveillance System (NREVSS) (https://www.cdc.gov/surveillance/nrevss/index.html). Nationally, across three RSV seasons, lasting from the week ending July 5, 2014 through July 1, 2017, the median RSV onset occurred at week 41 (mid-October), and lasted 31 weeks until week 18 (early May). The median national peak occurred at week 5 (early February). Using these new methods, RSV season circulation patterns differed from those reported from previous seasons (5). Health care providers and public health officials use RSV circulation data to guide diagnostic testing and to time the administration of RSV immunoprophylaxis for populations at high risk for severe respiratory illness (6). With several vaccines and other immunoprophlyaxis products in development, estimates of RSV circulation are also important to the design of clinical trials and future vaccine effectiveness studies.",2018-01-19 +28961788,FAF-Drugs4: free ADME-tox filtering computations for chemical biology and early stages drug discovery.,"

Motivation

Identification of small molecules that could be interesting starting points for drug discovery or to investigate a biological system as in chemical biology endeavours is both time consuming and costly. In silico approaches that assist the design of quality compound collections or help to prioritize molecules before synthesis or purchase are therefore valuable. Here quality refers to the selection of molecules that pass one or several selected filters that can be tuned by the users according to the project and the stage of the project. These filters can involve prediction of physicochemical properties, search for toxicophores or other unwanted chemical groups.

Results

FAF-Drugs4 is a novel version of our online server dedicated to the preparation and annotation of compound collections. The tool is now faster and several parameters have been optimized. In addition, a new service referred to as FAF-QED, an implementation of the quantitative estimate of drug-likeness method, is now available.

Availability and implementation

The server is available at http://fafdrugs4.mti.univ-paris-diderot.fr.

Contact

Bruno.Villoutreix@inserm.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +25288656,miRIAD-integrating microRNA inter- and intragenic data. ,"MicroRNAs (miRNAs) are a class of small (∼22 nucleotides) non-coding RNAs that post-transcriptionally regulate gene expression by interacting with target mRNAs. A majority of miRNAs is located within intronic or exonic regions of protein-coding genes (host genes), and increasing evidence suggests a functional relationship between these miRNAs and their host genes. Here, we introduce miRIAD, a web-service to facilitate the analysis of genomic and structural features of intragenic miRNAs and their host genes for five species (human, rhesus monkey, mouse, chicken and opossum). miRIAD contains the genomic classification of all miRNAs (inter- and intragenic), as well as classification of all protein-coding genes into host or non-host genes (depending on whether they contain an intragenic miRNA or not). We collected and processed public data from several sources to provide a clear visualization of relevant knowledge related to intragenic miRNAs, such as host gene function, genomic context, names of and references to intragenic miRNAs, miRNA binding sites, clusters of intragenic miRNAs, miRNA and host gene expression across different tissues and expression correlation for intragenic miRNAs and their host genes. Protein-protein interaction data are also presented for functional network analysis of host genes. In summary, miRIAD was designed to help the research community to explore, in a user-friendly environment, intragenic miRNAs, their host genes and functional annotations with minimal effort, facilitating hypothesis generation and in-silico validations. Database URL: http://www.miriad-database.org.",2014-10-06 +24132038,Inhaled nitric oxide does not reduce mortality in patients with acute respiratory distress syndrome regardless of severity: systematic review and meta-analysis.,"

Objective

Treatment with inhaled nitric oxide improves oxygenation but not survival in mechanically ventilated patients with acute respiratory distress syndrome, but the effect may depend on the severity of hypoxemia. Our objective was to determine whether nitric oxide reduces hospital mortality in patients with severe acute respiratory distress syndrome (PaO2/FIO2 ≤ 100 mm Hg) but not in patients with mild-moderate acute respiratory distress syndrome (100 < PaO2/FIO2 ≤ 300 mm Hg) at the time of randomization.

Data sources

Data were collected from Medline, Embase, and Cochrane CENTRAL electronic databases (inception to May 2013); proceedings from five conferences (to May 2013); and trial registries (http://www.clinicaltrials.gov and http://www.controlled-trials.com). No language restrictions were applied.

Study selection

Two authors independently selected parallel-group randomized controlled trials comparing nitric oxide with control (placebo or no gas) in mechanically ventilated adults or postneonatal children with acute respiratory distress syndrome.

Data extraction

Two authors independently extracted data from included trials. Trial investigators provided subgroup data. Meta-analyses used within-trial subgroups and random-effects models.

Data synthesis

Nine trials (n = 1,142 patients) met inclusion criteria. Overall methodological quality was good. Nitric oxide did not reduce mortality in patients with severe acute respiratory distress syndrome (risk ratio, 1.01 [95% CI, 0.78-1.32]; p = 0.93; n = 329, six trials) or mild-moderate acute respiratory distress syndrome (risk ratio, 1.12 [95% CI, 0.89-1.42]; p = 0.33; n = 740, seven trials). Risk ratios were similar between subgroups (interaction p = 0.53). There was no between-trial heterogeneity in any analysis (I = 0%). Varying the PaO2/FIO2 threshold between 70 and 200 mm Hg, in increments of 10 mm Hg, did not identify any threshold at which the nitric oxide-treated patients had lower mortality relative to controls.

Conclusions

Nitric oxide does not reduce mortality in adults or children with acute respiratory distress syndrome, regardless of the degree of hypoxemia. Given the lack of related ongoing or recently completed randomized trials, new data addressing the effectiveness of nitric oxide in patients with acute respiratory distress syndrome and severe hypoxemia will not be available for the foreseeable future.",2014-02-01 +26231431,HHalign-Kbest: exploring sub-optimal alignments for remote homology comparative modeling.,"

Motivation

The HHsearch algorithm, implementing a hidden Markov model (HMM)-HMM alignment method, has shown excellent alignment performance in the so-called twilight zone (target-template sequence identity with ∼20%). However, an optimal alignment by HHsearch may contain small to large errors, leading to poor structure prediction if these errors are located in important structural elements.

Results

HHalign-Kbest server runs a full pipeline, from the generation of suboptimal HMM-HMM alignments to the evaluation of the best structural models. In the HHsearch framework, it implements a novel algorithm capable of generating k-best HMM-HMM suboptimal alignments rather than only the optimal one. For large proteins, a directed acyclic graph-based implementation reduces drastically the memory usage. Improved alignments were systematically generated among the top k suboptimal alignments. To recognize them, corresponding structural models were systematically generated and evaluated with Qmean score. The method was benchmarked over 420 targets from the SCOP30 database. In the range of HHsearch probability of 20-99%, average quality of the models (TM-score) raised by 4.1-16.3% and 8.0-21.0% considering the top 1 and top 10 best models, respectively.

Availability and implementation

http://bioserv.rpbs.univ-paris-diderot.fr/services/HHalign-Kbest/ (source code and server).

Contact

guerois@cea.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-30 +27547801,Biophysical characterization data of the artificial protein Octarellin V.1 and binding test with its X-ray helpers.,"The artificial protein Octarellin V.1 (http://dx.doi.org/10.1016/j.jsb.2016.05.004[1]) was obtained through a direct evolution process over the de novo designed Octarellin V (http://dx.doi.org/10.1016/S0022-2836(02)01206-8[2]). The protein has been characterized by circular dichroism and fluorescence techniques, in order to obtain data related to its thermo and chemical stability. Moreover, the data for the secondary structure content studied by circular dichroism and infra red techniques is reported for the Octarellin V and V.1. Two crystallization helpers, nanobodies (http://dx.doi.org/10.1038/nprot.2014.039[3]) and αRep (http://dx.doi.org/10.1016/j.jmb.2010.09.048[4]), have been used to create stable complexes. Here we present the data obtained of the binding characterization of the Octarellin V.1 with the crystallization helpers by isothermal titration calorimetry.",2016-07-26 +28698358,Integrative CAGE and DNA Methylation Profiling Identify Epigenetically Regulated Genes in NSCLC.,"Lung cancer is the leading cause of cancer-related deaths worldwide. The majority of cancer driver mutations have been identified; however, relevant epigenetic regulation involved in tumorigenesis has only been fragmentarily analyzed. Epigenetically regulated genes have a great theranostic potential, especially in tumors with no apparent driver mutations. Here, epigenetically regulated genes were identified in lung cancer by an integrative analysis of promoter-level expression profiles from Cap Analysis of Gene Expression (CAGE) of 16 non-small cell lung cancer (NSCLC) cell lines and 16 normal lung primary cell specimens with DNA methylation data of 69 NSCLC cell lines and 6 normal lung epithelial cells. A core set of 49 coding genes and 10 long noncoding RNAs (lncRNA), which are upregulated in NSCLC cell lines due to promoter hypomethylation, was uncovered. Twenty-two epigenetically regulated genes were validated (upregulated genes with hypomethylated promoters) in the adenocarcinoma and squamous cell cancer subtypes of lung cancer using The Cancer Genome Atlas data. Furthermore, it was demonstrated that multiple copies of the REP522 DNA repeat family are prominently upregulated due to hypomethylation in NSCLC cell lines, which leads to cancer-specific expression of lncRNAs, such as RP1-90G24.10, AL022344.4, and PCAT7. Finally, Myeloma Overexpressed (MYEOV) was identified as the most promising candidate. Functional studies demonstrated that MYEOV promotes cell proliferation, survival, and invasion. Moreover, high MYEOV expression levels were associated with poor prognosis.Implications: This report identifies a robust list of 22 candidate driver genes that are epigenetically regulated in lung cancer; such genes may complement the known mutational drivers.Visual Overview: http://mcr.aacrjournals.org/content/molcanres/15/10/1354/F1.large.jpg Mol Cancer Res; 15(10); 1354-65. ©2017 AACR.",2017-07-11 +22084196,BGMUT: NCBI dbRBC database of allelic variations of genes encoding antigens of blood group systems.,"Analogous to human leukocyte antigens, blood group antigens are surface markers on the erythrocyte cell membrane whose structures differ among individuals and which can be serologically identified. The Blood Group Antigen Gene Mutation Database (BGMUT) is an online repository of allelic variations in genes that determine the antigens of various human blood group systems. The database is manually curated with allelic information collated from scientific literature and from direct submissions from research laboratories. Currently, the database documents sequence variations of a total of 1251 alleles of all 40 gene loci that together are known to affect antigens of 30 human blood group systems. When available, information on the geographic or ethnic prevalence of an allele is also provided. The BGMUT website also has general information on the human blood group systems and the genes responsible for them. BGMUT is a part of the dbRBC resource of the National Center for Biotechnology Information, USA, and is available online at http://www.ncbi.nlm.nih.gov/projects/gv/rbc/xslcgi.fcgi?cmd=bgmut. The database should be of use to members of the transfusion medicine community, those interested in studies of genetic variation and related topics such as human migrations, and students as well as members of the general public.",2011-11-13 +25969449,GalaxyPepDock: a protein-peptide docking tool based on interaction similarity and energy optimization.,"Protein-peptide interactions are involved in a wide range of biological processes and are attractive targets for therapeutic purposes because of their small interfaces. Therefore, effective protein-peptide docking techniques can provide the basis for potential therapeutic applications by enabling an atomic-level understanding of protein interactions. With the increasing number of protein-peptide structures deposited in the protein data bank, the prediction accuracy of protein-peptide docking can be enhanced by utilizing the information provided by the database. The GalaxyPepDock web server, which is freely accessible at http://galaxy.seoklab.org/pepdock, performs similarity-based docking by finding templates from the database of experimentally determined structures and building models using energy-based optimization that allows for structural flexibility. The server can therefore effectively model the structural differences between the template and target protein-peptide complexes. The performance of GalaxyPepDock is superior to those of the other currently available web servers when tested on the PeptiDB set and on recently released complex structures. When tested on the CAPRI target 67, GalaxyPepDock generates models that are more accurate than the best server models submitted during the CAPRI blind prediction experiment.",2015-05-12 +30003124,E47 Governs the MYC-CDKN1B/p27KIP1-RB Network to Growth Arrest PDA Cells Independent of CDKN2A/p16INK4A and Wild-Type p53.,"

Background & aims

Oncogenic mutations in KRAS, coupled with inactivation of p53, CDKN2A/p16INK4A, and SMAD4, drive progression of pancreatic ductal adenocarcinoma (PDA). Overexpression of MYC and deregulation of retinoblastoma (RB) further promote cell proliferation and make identifying a means to therapeutically alter cell-cycle control pathways in PDA a significant challenge. We previously showed that the basic helix-loop-helix transcription factor E47 induced stable growth arrest in PDA cells in vitro and in vivo. Here, we identified molecular mechanisms that underlie E47-induced growth arrest in low-passage, patient-derived primary and established PDA cell lines.

Methods

RNA sequencing was used to profile E47-dependent transcriptomes in 5 PDA cell lines. Gene Ontology analysis identified cell-cycle control as the most altered pathway. Small interfering RNA/short hairpin RNA knockdown, small-molecule inhibitors, and viral expression were used to examine the function of E47-dependent genes in cell-cycle arrest. Cell morphology, expression of molecular markers, and senescence-associated β-galactosidase activity assays identified cellular senescence.

Results

E47 uniformly inhibited PDA cell-cycle progression by decreasing expression of MYC, increasing the level of CDKN1B/p27KIP1, and restoring RB tumor-suppressor function. The molecular mechanisms by which E47 elicited these changes included altering both RNA transcript levels and protein stability of MYC and CDKN1B/p27KIP1. At the cellular level, E47 elicited a senescence-like phenotype characterized by increased senescence-associated β-galactosidase activity and altered expression of senescence markers.

Conclusions

E47 governs a highly conserved network of cell-cycle control genes, including MYC, CDKN1B/p27KIP1, and RB, which can induce a senescence-like program in PDA cells that lack CDKN2A/p16INK4A and wild-type p53. RNA sequencing data are available at the National Center for Biotechnology Information GEO at https://www.ncbi.nlm.nih.gov/geo/; accession number: GSE100327.",2018-05-16 +,"Extended and global phylogenetic view of the Bacillus cereus group population by combination of MLST, AFLP, and MLEE genotyping data","The Bacillus cereus group of bacteria includes species that can cause food-poisoning or spoilage, such as B. cereus, as well as Bacillus anthracis, the cause of anthrax. In the present report we have conducted a multi-datatype analysis using tools from the HyperCAT database (http://mlstoslo.uio.no/) that we recently developed, combining data from multilocus sequence typing (Tourasse et al., 2010), amplified fragment length polymorphism, and multilocus enzyme electrophoresis typing techniques. We provide a comprehensive snapshot of the B. cereus group population, incorporating 2213 isolates including 450 from food and dairy products, in the form of both phylogenetic supertrees and superclusters of genetically closely related isolates. Our main findings include the detection of phylogenetically separated groups of isolates possibly representing novel evolutionary lineages within the B. cereus group, a putative new branch of B. anthracis, as well as new groups of related strains containing both environmental and clinical isolates. In addition, the multi-datatype analysis revealed to a larger extent than previously recognized that food-borne isolates can share identical genotyping profiles with strains from various other origins. Altogether, the global analysis confirms and extends the results underlining the opportunistic nature of B. cereus group organisms, and the fact that isolates responsible for disease outbreaks and contamination of foodstuffs can originate from various genetic backgrounds.",2011-04-01 +28521551,Systematic Reviews Published in the Cochrane Library January-March 2017.,"The Cochrane Library of Systematic Reviews is now only published monthly online ( http://www.thecochranelibrary.com ). The methods for searching have changed and are in flux. This report attempted to identify all relevant reviews published in the last 3 months to March 30, 2017. The current version contains 7243 complete reviews and 2544 protocols for reviews in production. In addition, there are citations of 1,036,153 randomized controlled trials (first time passing the million mark) and 15,700 cited papers in the Cochrane Methodology Register. The Health Technology Assessment database contains some 17,000 citations. Six reviews have been identified that have potential relevance for practitioners in pain and palliative medicine. The impact factor of the Cochrane Library stands at 6.1. Readers are encouraged to access the full report for any articles of interest, as only a brief commentary is provided.",2017-05-19 +27046867,Hierarchical Maximum Likelihood Clustering Approach.,"

Objective

In this paper, we focused on developing a clustering approach for biological data. In many biological analyses, such as multiomics data analysis and genome-wide association studies analysis, it is crucial to find groups of data belonging to subtypes of diseases or tumors.

Methods

Conventionally, the k-means clustering algorithm is overwhelmingly applied in many areas including biological sciences. There are, however, several alternative clustering algorithms that can be applied, including support vector clustering. In this paper, taking into consideration the nature of biological data, we propose a maximum likelihood clustering scheme based on a hierarchical framework.

Results

This method can perform clustering even when the data belonging to different groups overlap. It can also perform clustering when the number of samples is lower than the data dimensionality.

Conclusion

The proposed scheme is free from selecting initial settings to begin the search process. In addition, it does not require the computation of the first and second derivative of likelihood functions, as is required by many other maximum likelihood-based methods.

Significance

This algorithm uses distribution and centroid information to cluster a sample and was applied to biological data. A MATLAB implementation of this method can be downloaded from the web link http://www.riken.jp/en/research/labs/ims/med_sci_math/.",2016-03-24 +26893951,Conversion and Data Quality Assessment of Electronic Health Record Data at a Korean Tertiary Teaching Hospital to a Common Data Model for Distributed Network Research.,"

Objectives

A distributed research network (DRN) has the advantages of improved statistical power, and it can reveal more significant relationships by increasing sample size. However, differences in data structure constitute a major barrier to integrating data among DRN partners. We describe our experience converting Electronic Health Records (EHR) to the Observational Health Data Sciences and Informatics (OHDSI) Common Data Model (CDM).

Methods

We transformed the EHR of a hospital into Observational Medical Outcomes Partnership (OMOP) CDM ver. 4.0 used in OHDSI. All EHR codes were mapped and converted into the standard vocabulary of the CDM. All data required by the CDM were extracted, transformed, and loaded (ETL) into the CDM structure. To validate and improve the quality of the transformed dataset, the open-source data characterization program ACHILLES was run on the converted data.

Results

Patient, drug, condition, procedure, and visit data from 2.07 million patients who visited the subject hospital from July 1994 to November 2014 were transformed into the CDM. The transformed dataset was named the AUSOM. ACHILLES revealed 36 errors and 13 warnings in the AUSOM. We reviewed and corrected 28 errors. The summarized results of the AUSOM processed with ACHILLES are available at http://ami.ajou.ac.kr:8080/.

Conclusions

We successfully converted our EHRs to a CDM and were able to participate as a data partner in an international DRN. Converting local records in this manner will provide various opportunities for researchers and data holders.",2016-01-31 +29362522,"Next-generation morphological character discovery and evaluation: an X-ray micro-CT enhanced revision of the ant genus Zasphinctus Wheeler (Hymenoptera, Formicidae, Dorylinae) in the Afrotropics.","New technologies for imaging and analysis of morphological characters offer opportunities to enhance revisionary taxonomy and better integrate it with the rest of biology. In this study, we revise the Afrotropical fauna of the ant genus Zasphinctus Wheeler, and use high-resolution X-ray microtomography (micro-CT) to analyse a number of morphological characters of taxonomic and biological interest. We recognise and describe three new species: Z. obamaisp. n., Z. sarowiwaisp. n., and Z. wilsonisp. n. The species delimitations are based on the morphological examination of all physical specimens in combination with 3D scans and volume reconstructions. Based on this approach, we present a new taxonomic discrimination system for the regional fauna that consists of a combination of easily observable morphological characters visible at magnifications of around 80-100 ×, less observable characters that require higher magnifications, as well as characters made visible through virtual dissections that would otherwise require destructive treatment. Zasphinctus are rarely collected ants and the material available to us is comparatively scarce. Consequently, we explore the use of micro-CT as a non-invasive tool for the virtual examination, manipulation, and dissection of such rare material. Furthermore, we delineate the treated species by providing a diagnostic character matrix illustrated by numerous images and supplement that with additional evidence in the form of stacked montage images, 3D PDFs and 3D rotation videos of scans of major body parts and full body (in total we provide 16 stacked montage photographs, 116 images of 3D reconstructions, 15 3D rotation videos, and 13 3D PDFs). In addition to the comparative morphology analyses used for species delimitations, we also apply micro-CT data to examine certain traits, such as mouthparts, cuticle thickness, and thoracic and abdominal muscles in order to assess their taxonomic usefulness or gain insights into the natural history of the genus. The complete datasets comprising the raw micro-CT data, 3D PDFs, 3D rotation videos, still images of 3D models, and coloured montage photos have been made available online as cybertypes (Dryad, http://dx.doi.org/10.5061/dryad.4s3v1).",2017-08-23 +24271392,"Network portal: a database for storage, analysis and visualization of biological networks.","The ease of generating high-throughput data has enabled investigations into organismal complexity at the systems level through the inference of networks of interactions among the various cellular components (genes, RNAs, proteins and metabolites). The wider scientific community, however, currently has limited access to tools for network inference, visualization and analysis because these tasks often require advanced computational knowledge and expensive computing resources. We have designed the network portal (http://networks.systemsbiology.net) to serve as a modular database for the integration of user uploaded and public data, with inference algorithms and tools for the storage, visualization and analysis of biological networks. The portal is fully integrated into the Gaggle framework to seamlessly exchange data with desktop and web applications and to allow the user to create, save and modify workspaces, and it includes social networking capabilities for collaborative projects. While the current release of the database contains networks for 13 prokaryotic organisms from diverse phylogenetic clades (4678 co-regulated gene modules, 3466 regulators and 9291 cis-regulatory motifs), it will be rapidly populated with prokaryotic and eukaryotic organisms as relevant data become available in public repositories and through user input. The modular architecture, simple data formats and open API support community development of the portal.",2013-11-23 +24291233,Prediction of posttranslational modification sites from amino acid sequences with kernel methods.,"Post-translational modification (PTM) is the chemical modification of a protein after its translation and one of the later steps in protein biosynthesis for many proteins. It plays an important role which modifies the end product of gene expression and contributes to biological processes and diseased conditions. However, the experimental methods for identifying PTM sites are both costly and time-consuming. Hence computational methods are highly desired. In this work, a novel encoding method PSPM (position-specific propensity matrices) is developed. Then a support vector machine (SVM) with the kernel matrix computed by PSPM is applied to predict the PTM sites. The experimental results indicate that the performance of new method is better or comparable with the existing methods. Therefore, the new method is a useful computational resource for the identification of PTM sites. A unified standalone software PTMPred is developed. It can be used to predict all types of PTM sites if the user provides the training datasets. The software can be freely downloaded from http://www.aporc.org/doc/wiki/PTMPred.",2013-11-27 +29879044,Exploring the microRNA profiles as potential diagnostic probes for oligo- and polymetastatic prognosis of lung metastasis(es) patients.,"Presuming the stage of metastatic lung cancer is divided by its location, an intermediate state of ≤5 cumulative metastasis is defined as oligometastases (OM) and a widespread state of >5 cumulative metastasis as polymetastases (PM). According to the phenotypes, the different metastatic cancer patients can be treated with different methods: the OM patients can be treated by a metastasis-directed local therapy method, whereas the PM patients are not recommended to take such a treatment. It is also believed that the patients at the initial OM stage may progress to the PM stage. Currently, the OM- and PM-metastatic cancer patients can be identified by traditional imaging methods. However, the current methods are found to be insufficient for the discrimination. It hence is meaningful and important to develop new diagnostic methods for a better prediction to the patients following by selecting a correct metastasis-directed treatment.MicroRNAs (miRNAs) can be used as the genetic probes for the new diagnostic methods. In this study, a bioinformatics strategy was employed to screen the microRNAs as potential diagnostic probes for distinguishing the OM and PM lung metastases patients. The expression profiles of microarray data of GSE38698 were downloaded from Gene Expression Omnibus (http://www.ncbi.nlm.nih.gov/geo/) including the information from 63 patients: 24 PM and 39 OM patients. The microRNA expression patterns of tumor samples were identified for the OM and PM patients who were treated with the high-dose radiotherapy. Followed by analyzing the functional enrichment pathways, an early diagnosis model of OM and PM groups was identified with different expression genes (DEGs). The ratios of PM/OM were calculated by setting a high significance in the expressions of 377 mature miRNAs in the profile [log2 (PM/OM) >1 and P < .05]. Through a high combination power [area under the curve (AUC) ≥ 0.875] with the superior sensitivity and specificity, a panel of 10 miRNAs including 7 upregulation and 3 downregulation expressions were identified as potential probes for discriminating the PM and OM patients from the receiving operation characteristic (ROC). Considering the possible involvements of cancer progress, the interconnected axon guidance, cancer metastasis pathways, proteoglycans, and Mitogen-activated protein kinases signaling pathway and endocytosis were suggested for the subsequent miRNA target analysis. The results may reveal a biological significance that a profile of miRNAs can be used as the potential probes to identify the patients at the OM or PM stages and figure out the metastasis-directed treatment methods for the patients at the different metastasis stages.",2018-06-01 +23937709,The Vertebrate Trait Ontology: a controlled vocabulary for the annotation of trait data across species.,"

Background

The use of ontologies to standardize biological data and facilitate comparisons among datasets has steadily grown as the complexity and amount of available data have increased. Despite the numerous ontologies available, one area currently lacking a robust ontology is the description of vertebrate traits. A trait is defined as any measurable or observable characteristic pertaining to an organism or any of its substructures. While there are several ontologies to describe entities and processes in phenotypes, diseases, and clinical measurements, one has not been developed for vertebrate traits; the Vertebrate Trait Ontology (VT) was created to fill this void.

Description

Significant inconsistencies in trait nomenclature exist in the literature, and additional difficulties arise when trait data are compared across species. The VT is a unified trait vocabulary created to aid in the transfer of data within and between species and to facilitate investigation of the genetic basis of traits. Trait information provides a valuable link between the measurements that are used to assess the trait, the phenotypes related to the traits, and the diseases associated with one or more phenotypes. Because multiple clinical and morphological measurements are often used to assess a single trait, and a single measurement can be used to assess multiple physiological processes, providing investigators with standardized annotations for trait data will allow them to investigate connections among these data types.

Conclusions

The annotation of genomic data with ontology terms provides unique opportunities for data mining and analysis. Links between data in disparate databases can be identified and explored, a strategy that is particularly useful for cross-species comparisons or in situations involving inconsistent terminology. The VT provides a common basis for the description of traits in multiple vertebrate species. It is being used in the Rat Genome Database and Animal QTL Database for annotation of QTL data for rat, cattle, chicken, swine, sheep, and rainbow trout, and in the Mouse Phenome Database to annotate strain characterization data. In these databases, data are also cross-referenced to applicable terms from other ontologies, providing additional avenues for data mining and analysis. The ontology is available at http://bioportal.bioontology.org/ontologies/50138.",2013-08-09 +29036294,RealityConvert: a tool for preparing 3D models of biochemical structures for augmented and virtual reality.,"

Motivation

There is a growing interest for the broad use of Augmented Reality (AR) and Virtual Reality (VR) in the fields of bioinformatics and cheminformatics to visualize complex biological and chemical structures. AR and VR technologies allow for stunning and immersive experiences, offering untapped opportunities for both research and education purposes. However, preparing 3D models ready to use for AR and VR is time-consuming and requires a technical expertise that severely limits the development of new contents of potential interest for structural biologists, medicinal chemists, molecular modellers and teachers.

Results

Herein we present the RealityConvert software tool and associated website, which allow users to easily convert molecular objects to high quality 3D models directly compatible for AR and VR applications. For chemical structures, in addition to the 3D model generation, RealityConvert also generates image trackers, useful to universally call and anchor that particular 3D model when used in AR applications. The ultimate goal of RealityConvert is to facilitate and boost the development and accessibility of AR and VR contents for bioinformatics and cheminformatics applications.

Availability and implementation

http://www.realityconvert.com.

Contact

dfourch@ncsu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +29525976,Investigating the mechanical response of paediatric bone under bending and torsion using finite element analysis.,"Fractures of bone account 25% of all paediatric injuries (Cooper et al. in J Bone Miner Res 19:1976-1981, 2004. https://doi.org/10.1359/JBMR.040902 ). These can be broadly categorised into accidental or inflicted injuries. The current clinical approach to distinguish between these two is based on the clinician's judgment, which can be subjective. Furthermore, there is a lack of studies on paediatric bone to provide evidence-based information on bone strength, mainly due to the difficulties of obtaining paediatric bone samples. There is a need to investigate the behaviour of children's bones under external loading. Such data will critically enhance our understanding of injury tolerance of paediatric bones under various loading conditions, related to injuries, such as bending and torsional loads. The aim of this study is therefore to investigate the response of paediatric femora under two types of loading conditions, bending and torsion, using a CT-based finite element approach, and to determine a relationship between bone strength and age/body mass of the child. Thirty post-mortem CT scans of children aged between 0 and 3 years old were used in this study. Two different boundary conditions were defined to represent four-point bending and pure torsional loads. The principal strain criterion was used to estimate the failure moment for both loading conditions. The results showed that failure moment of the bone increases with the age and mass of the child. The predicted failure moment for bending, external and internal torsions were 0.8-27.9, 1.0-31.4 and 1.0-30.7 Nm, respectively. To the authors' knowledge, this is the first report on infant bone strength in relation to age/mass using models developed from modern medical images. This technology may in future help advance the design of child, car restrain system, and more accurate computer models of children.",2018-03-10 +29352511,Draft genome and reference transcriptomic resources for the urticating pine defoliator Thaumetopoea pityocampa (Lepidoptera: Notodontidae).,"The pine processionary moth Thaumetopoea pityocampa (Lepidoptera: Notodontidae) is the main pine defoliator in the Mediterranean region. Its urticating larvae cause severe human and animal health concerns in the invaded areas. This species shows a high phenotypic variability for various traits, such as phenology, fecundity and tolerance to extreme temperatures. This study presents the construction and analysis of extensive genomic and transcriptomic resources, which are an obligate prerequisite to understand their underlying genetic architecture. Using a well-studied population from Portugal with peculiar phenological characteristics, the karyotype was first determined and a first draft genome of 537 Mb total length was assembled into 68,292 scaffolds (N50 = 164 kb). From this genome assembly, 29,415 coding genes were predicted. To circumvent some limitations for fine-scale physical mapping of genomic regions of interest, a 3X coverage BAC library was also developed. In particular, 11 BACs from this library were individually sequenced to assess the assembly quality. Additionally, de novo transcriptomic resources were generated from various developmental stages sequenced with HiSeq and MiSeq Illumina technologies. The reads were de novo assembled into 62,376 and 63,175 transcripts, respectively. Then, a robust subset of the genome-predicted coding genes, the de novo transcriptome assemblies and previously published 454/Sanger data were clustered to obtain a high-quality and comprehensive reference transcriptome consisting of 29,701 bona fide unigenes. These sequences covered 99% of the cegma and 88% of the busco highly conserved eukaryotic genes and 84% of the busco arthropod gene set. Moreover, 90% of these transcripts could be localized on the draft genome. The described information is available via a genome annotation portal (http://bipaa.genouest.org/sp/thaumetopoea_pityocampa/).",2018-02-12 +25697819,The GPMDB REST interface.,"

Unlabelled

The Global Proteome Machine and Database (GPMDB) representational state transfer (REST) service was designed to provide simplified access to the proteomics information in GPMDB using a stable set of methods and parameters. Version 1 of this interface gives access to 25 methods for retrieving experimental information about protein post-translational modifications, amino acid variants, alternate splicing variants and protein cleavage patterns.

Availability and implementation

GPMDB data and database tables are freely available for commercial and non-commercial use. All software is also freely available, under the Artistic License. http://rest.thegpm.org/1 (GPMDB REST Service), http://wiki.thegpm.org/wiki/GPMDB_REST (Service description and help), and http://www.thegpm.org (GPM main project description and documentation). The code for the interface and an example REST client is available at ftp://ftp.thegpm.org/repos/gpmdb_rest",2015-02-19 +27549343,Crowdsourced assessment of common genetic contribution to predicting anti-TNF treatment response in rheumatoid arthritis.,"Rheumatoid arthritis (RA) affects millions world-wide. While anti-TNF treatment is widely used to reduce disease progression, treatment fails in ∼one-third of patients. No biomarker currently exists that identifies non-responders before treatment. A rigorous community-based assessment of the utility of SNP data for predicting anti-TNF treatment efficacy in RA patients was performed in the context of a DREAM Challenge (http://www.synapse.org/RA_Challenge). An open challenge framework enabled the comparative evaluation of predictions developed by 73 research groups using the most comprehensive available data and covering a wide range of state-of-the-art modelling methodologies. Despite a significant genetic heritability estimate of treatment non-response trait (h(2)=0.18, P value=0.02), no significant genetic contribution to prediction accuracy is observed. Results formally confirm the expectations of the rheumatology community that SNP information does not significantly improve predictive performance relative to standard clinical traits, thereby justifying a refocusing of future efforts on collection of other data.",2016-08-23 +28287981,PerPAS: Topology-Based Single Sample Pathway Analysis Method.,"Identification of intracellular pathways that play key roles in cancer progression and drug resistance is a prerequisite for developing targeted cancer treatments. The era of personalized medicine calls for computational methods that can function with one sample or a very small set of samples. Developing such methods is challenging because standard statistical approaches pose several limiting assumptions, such as number of samples, that prevent their application when approaches to one. We have developed a novel pathway analysis method called PerPAS to estimate pathway activity at a single sample level by integrating pathway topology and transcriptomics data. In addition, PerPAS is able to identify altered pathways between cancer and control samples as well as to identify key nodes that contribute to the pathway activity. In our case study using breast cancer data, we show that PerPAS can identify highly altered pathways that are associated with patient survival. PerPAS identified four pathways that were associated with patient survival and were successfully validated in three independent breast cancer cohorts. In comparison to two other pathway analysis methods that function at a single sample level, PerPAS had superior performance in both synthetic and breast cancer expression datasets. PerPAS is a free R package (http://csbi.ltdk.helsinki.fi/pub/czliu/perpas/).",2017-03-08 +27893832,Evolutionary Algorithm for RNA Secondary Structure Prediction Based on Simulated SHAPE Data.,"

Background

Non-coding RNAs perform a wide range of functions inside the living cells that are related to their structures. Several algorithms have been proposed to predict RNA secondary structure based on minimum free energy. Low prediction accuracy of these algorithms indicates that free energy alone is not sufficient to predict the functional secondary structure. Recently, the obtained information from the SHAPE experiment greatly improves the accuracy of RNA secondary structure prediction by adding this information to the thermodynamic free energy as pseudo-free energy.

Method

In this paper, a new method is proposed to predict RNA secondary structure based on both free energy and SHAPE pseudo-free energy. For each RNA sequence, a population of secondary structures is constructed and their SHAPE data are simulated. Then, an evolutionary algorithm is used to improve each structure based on both free and pseudo-free energies. Finally, a structure with minimum summation of free and pseudo-free energies is considered as the predicted RNA secondary structure.

Results and conclusions

Computationally simulating the SHAPE data for a given RNA sequence requires its secondary structure. Here, we overcome this limitation by employing a population of secondary structures. This helps us to simulate the SHAPE data for any RNA sequence and consequently improves the accuracy of RNA secondary structure prediction as it is confirmed by our experiments. The source code and web server of our proposed method are freely available at http://mostafa.ut.ac.ir/ESD-Fold/.",2016-11-28 +29182755,Meta-analysis of DNA double-strand break response kinetics.,"Most proteins involved in the DNA double-strand break response (DSBR) accumulate at the damage sites, where they perform functions related to damage signaling, chromatin remodeling and repair. Over the last two decades, studying the accumulation of many DSBR proteins provided information about their functionality and underlying mechanisms of action. However, comparison and systemic interpretation of these data is challenging due to their scattered nature and differing experimental approaches. Here, we extracted, analyzed and compared the available results describing accumulation of 79 DSBR proteins at sites of DNA damage, which can be further explored using Cumulus (http://www.dna-repair.live/cumulus/)-the accompanying interactive online application. Despite large inter-study variability, our analysis revealed that the accumulation of most proteins starts immediately after damage induction, occurs in parallel and peaks within 15-20 min. Various DSBR pathways are characterized by distinct accumulation kinetics with major non-homologous end joining proteins being generally faster than those involved in homologous recombination, and signaling and chromatin remodeling factors accumulating with varying speeds. Our meta-analysis provides, for the first time, comprehensive overview of the temporal organization of the DSBR in mammalian cells and could serve as a reference for future mechanistic studies of this complex process.",2017-12-01 +24291661,Chemical annotation of small and peptide-like molecules at the Protein Data Bank.,"Over the past decade, the number of polymers and their complexes with small molecules in the Protein Data Bank archive (PDB) has continued to increase significantly. To support scientific advancements and ensure the best quality and completeness of the data files over the next 10 years and beyond, the Worldwide PDB partnership that manages the PDB archive is developing a new deposition and annotation system. This system focuses on efficient data capture across all supported experimental methods. The new deposition and annotation system is composed of four major modules that together support all of the processing requirements for a PDB entry. In this article, we describe one such module called the Chemical Component Annotation Tool. This tool uses information from both the Chemical Component Dictionary and Biologically Interesting molecule Reference Dictionary to aid in annotation. Benchmark studies have shown that the Chemical Component Annotation Tool provides significant improvements in processing efficiency and data quality. Database URL: http://wwpdb.org.",2013-11-29 +29856651,High-intensity resistance exercise with low repetitions maintains endothelial function.,"Resistance exercise impairs endothelial function, and this impairment is thought to be mediated by sustained elevation in blood pressure. Herein, we tested the hypothesis that resistance exercise-induced endothelial dysfunction would be prevented by high-intensity resistance exercise with low repetitions. This type of resistance exercise is known to induce temporal elevation in blood pressure due to low repetitions and a long resting period between sets. Thirteen young healthy subjects completed three randomized experimental trials as follows: 1) moderate-intensity exercise with moderate repetitions (moderate-moderate trial), 2) low-intensity exercise with high repetitions (low-high trial), and 3) high-intensity exercise with low repetitions (high-low trial). After baseline brachial artery flow-mediated dilation (FMD) and blood pressure measurements, subjects performed resistance exercise according to the different types of trials. Thereafter, brachial artery FMD and blood pressure measurements were repeated 10, 30, and 60 min after the exercise. Exercise-induced increases in blood flow and shear rate were significantly lower in the high-low trial than in the other two trials ( P < 0.05). Although systolic blood pressures were significantly elevated after exercise in all trials ( P < 0.05), the magnitudes of rise in blood pressure increase were significantly lower in the high-low trial than in the moderate-moderate and low-high trials ( P < 0.05). Moderate-moderate and low-high trials caused a significant impairment in brachial artery FMD ( P < 0.05), which could be prevented through high-intensity resistance exercise with low repetitions (  > 0.05). In conclusion, endothelial function was maintained by conducting high-intensity resistance exercise with low repetitions. NEW & NOTEWORTHY Data from the present study reveal that high-intensity resistance exercise with low repetitions can maintain endothelial function. Thus, this study provides the first evidence that the detrimental vascular effects of resistance exercise are preventable when resistance exercise is performed in high intensity with low repetitions. Listen to this article's corresponding podcast at https://ajpheart.podbean.com/e/type-of-resistance-exercise-and-endothelial-function/ (Japanese version: https://ajpheart.podbean.com/e/japanese-language-podcast-type-of-resistance-exercise-and-endothelial-function/ ).",2018-06-01 +,Human Cell Line and Tissue Sample Authentication,"Background: Short Tandem Repeat (STR) genotyping analysis is a proven technology for uniquely identifying virtually all human samples. STR genotyping was adopted as the preferred technology for identification of human tissue culture cell lines by the ATCC Standards Development Organization (ASN-0002: Authentication of Human Cell Lines: Standardization of STR Profiling). We developed new automation-compatible protocols/systems for generating STR profiles from human cell lines or tissue samples. Methods: We adapted the STR genotyping systems routinely used for forensic and paternity testing to better meet the needs of genomic core facilities. Modifications include balancing for higher amounts of template DNA, configuring the reagents for compatibility with high throughput robotic workstations, and supporting electrophoretic separation and analysis on a wider variety of instrument and software platforms. Results: The GenePrint® 10 and GenePrint® 21 Systems allow for multiplexed genotyping of 9 or 20 STR loci, plus the amelogenin gender marker. STR analysis with these loci uniquely identify virtually all human cell lines and tissue samples (body fluids, tissues and extracted DNA) and confirm the absence of cross-contamination or a sample switch. The GenePrint® 10 System is compatible with purified DNA or direct amplification from cells deposited on FTA® Cards (GE Whatman). Both genotyping systems meet the ASN-0002 standard and include the loci represented on the National Center for Biotechnology Information (NCBI) human cell line database: http://www.ncbi.nlm.nih.gov/biosample?term=human%20cell%20line%20STR%20profile Conclusions: STR genotyping analysis with the GenePrint® 10 and GenePrint® 21 Systems can establish human cell line or tissue sample identity and confirm the absence of contamination with other human cell lines or tissues. The methods are compatible with DNA concentrations, robotic protocols, instrumentation, and genotyping software typically used in genomics core facilities.",2013-05-01 +27924016,Bio-TDS: bioscience query tool discovery system.,"Bioinformatics and computational biology play a critical role in bioscience and biomedical research. As researchers design their experimental projects, one major challenge is to find the most relevant bioinformatics toolkits that will lead to new knowledge discovery from their data. The Bio-TDS (Bioscience Query Tool Discovery Systems, http://biotds.org/) has been developed to assist researchers in retrieving the most applicable analytic tools by allowing them to formulate their questions as free text. The Bio-TDS is a flexible retrieval system that affords users from multiple bioscience domains (e.g. genomic, proteomic, bio-imaging) the ability to query over 12 000 analytic tool descriptions integrated from well-established, community repositories. One of the primary components of the Bio-TDS is the ontology and natural language processing workflow for annotation, curation, query processing, and evaluation. The Bio-TDS's scientific impact was evaluated using sample questions posed by researchers retrieved from Biostars, a site focusing on BIOLOGICAL DATA ANALYSIS: The Bio-TDS was compared to five similar bioscience analytic tool retrieval systems with the Bio-TDS outperforming the others in terms of relevance and completeness. The Bio-TDS offers researchers the capacity to associate their bioscience question with the most relevant computational toolsets required for the data analysis in their knowledge discovery process.",2016-10-18 +29228193,DeepSF: deep convolutional neural network for mapping protein sequences to folds.,"

Motivation

Protein fold recognition is an important problem in structural bioinformatics. Almost all traditional fold recognition methods use sequence (homology) comparison to indirectly predict the fold of a target protein based on the fold of a template protein with known structure, which cannot explain the relationship between sequence and fold. Only a few methods had been developed to classify protein sequences into a small number of folds due to methodological limitations, which are not generally useful in practice.

Results

We develop a deep 1D-convolution neural network (DeepSF) to directly classify any protein sequence into one of 1195 known folds, which is useful for both fold recognition and the study of sequence-structure relationship. Different from traditional sequence alignment (comparison) based methods, our method automatically extracts fold-related features from a protein sequence of any length and maps it to the fold space. We train and test our method on the datasets curated from SCOP1.75, yielding an average classification accuracy of 75.3%. On the independent testing dataset curated from SCOP2.06, the classification accuracy is 73.0%. We compare our method with a top profile-profile alignment method-HHSearch on hard template-based and template-free modeling targets of CASP9-12 in terms of fold recognition accuracy. The accuracy of our method is 12.63-26.32% higher than HHSearch on template-free modeling targets and 3.39-17.09% higher on hard template-based modeling targets for top 1, 5 and 10 predicted folds. The hidden features extracted from sequence by our method is robust against sequence mutation, insertion, deletion and truncation, and can be used for other protein pattern recognition problems such as protein clustering, comparison and ranking.

Availability and implementation

The DeepSF server is publicly available at: http://iris.rnet.missouri.edu/DeepSF/.

Contact

chengji@missouri.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-04-01 +28734034,Protein features as determinants of wild-type glycoside hydrolase thermostability.,"Thermostable enzymes for conversion of lignocellulosic biomass into biofuels have significant advantages over enzymes with more moderate themostability due to the challenging application conditions. Experimental discovery of thermostable enzymes is highly cost intensive, and the development of in-silico methods guiding the discovery process would be of high value. To develop such an in-silico method and provide the data foundation of it, we determined the melting temperatures of 602 fungal glycoside hydrolases from the families GH5, 6, 7, 10, 11, 43, and AA9 (formerly GH61). We, then used sequence and homology modeled structure information of these enzymes to develop the ThermoP melting temperature prediction method. Futhermore, in the context of thermostability, we determined the relative importance of 160 molecular features, such as amino acid frequencies and spatial interactions, and exemplified their biological significance. The presented prediction method is made publicly available at http://www.cbs.dtu.dk/services/ThermoP.",2017-08-10 +24174539,pE-DB: a database of structural ensembles of intrinsically disordered and of unfolded proteins.,"The goal of pE-DB (http://pedb.vib.be) is to serve as an openly accessible database for the deposition of structural ensembles of intrinsically disordered proteins (IDPs) and of denatured proteins based on nuclear magnetic resonance spectroscopy, small-angle X-ray scattering and other data measured in solution. Owing to the inherent flexibility of IDPs, solution techniques are particularly appropriate for characterizing their biophysical properties, and structural ensembles in agreement with these data provide a convenient tool for describing the underlying conformational sampling. Database entries consist of (i) primary experimental data with descriptions of the acquisition methods and algorithms used for the ensemble calculations, and (ii) the structural ensembles consistent with these data, provided as a set of models in a Protein Data Bank format. PE-DB is open for submissions from the community, and is intended as a forum for disseminating the structural ensembles and the methodologies used to generate them. While the need to represent the IDP structures is clear, methods for determining and evaluating the structural ensembles are still evolving. The availability of the pE-DB database is expected to promote the development of new modeling methods and leads to a better understanding of how function arises from disordered states.",2013-10-29 +25951377,DeTEXT: A Database for Evaluating Text Extraction from Biomedical Literature Figures.,"Hundreds of millions of figures are available in biomedical literature, representing important biomedical experimental evidence. Since text is a rich source of information in figures, automatically extracting such text may assist in the task of mining figure information. A high-quality ground truth standard can greatly facilitate the development of an automated system. This article describes DeTEXT: A database for evaluating text extraction from biomedical literature figures. It is the first publicly available, human-annotated, high quality, and large-scale figure-text dataset with 288 full-text articles, 500 biomedical figures, and 9308 text regions. This article describes how figures were selected from open-access full-text biomedical articles and how annotation guidelines and annotation tools were developed. We also discuss the inter-annotator agreement and the reliability of the annotations. We summarize the statistics of the DeTEXT data and make available evaluation protocols for DeTEXT. Finally we lay out challenges we observed in the automated detection and recognition of figure text and discuss research directions in this area. DeTEXT is publicly available for downloading at http://prir.ustb.edu.cn/DeTEXT/.",2015-05-07 +24243849,P³DB 3.0: From plant phosphorylation sites to protein networks.,"In the past few years, the Plant Protein Phosphorylation Database (P(3)DB, http://p3db.org) has become one of the most significant in vivo data resources for studying plant phosphoproteomics. We have substantially updated P(3)DB with respect to format, new datasets and analytic tools. In the P(3)DB 3.0, there are altogether 47 923 phosphosites in 16 477 phosphoproteins curated across nine plant organisms from 32 studies, which have met our multiple quality standards for acquisition of in vivo phosphorylation site data. Centralized by these phosphorylation data, multiple related data and annotations are provided, including protein-protein interaction (PPI), gene ontology, protein tertiary structures, orthologous sequences, kinase/phosphatase classification and Kinase Client Assay (KiC Assay) data--all of which provides context for the phosphorylation event. In addition, P(3)DB 3.0 incorporates multiple network viewers for the above features, such as PPI network, kinase-substrate network, phosphatase-substrate network, and domain co-occurrence network to help study phosphorylation from a systems point of view. Furthermore, the new P(3)DB reflects a community-based design through which users can share datasets and automate data depository processes for publication purposes. Each of these new features supports the goal of making P(3)DB a comprehensive, systematic and interactive platform for phosphoproteomics research.",2013-11-15 +24518929,"Cohort Profile: Estonian Biobank of the Estonian Genome Center, University of Tartu.","The Estonian Biobank cohort is a volunteer-based sample of the Estonian resident adult population (aged ≥18 years). The current number of participants-close to 52000--represents a large proportion, 5%, of the Estonian adult population, making it ideally suited to population-based studies. General practitioners (GPs) and medical personnel in the special recruitment offices have recruited participants throughout the country. At baseline, the GPs performed a standardized health examination of the participants, who also donated blood samples for DNA, white blood cells and plasma tests and filled out a 16-module questionnaire on health-related topics such as lifestyle, diet and clinical diagnoses described in WHO ICD-10. A significant part of the cohort has whole genome sequencing (100), genome-wide single nucleotide polymorphism (SNP) array data (20 000) and/or NMR metabolome data (11 000) available (http://www.geenivaramu.ee/for-scientists/data-release/). The data are continuously updated through periodical linking to national electronic databases and registries. A part of the cohort has been re-contacted for follow-up purposes and resampling, and targeted invitations are possible for specific purposes, for example people with a specific diagnosis. The Estonian Genome Center of the University of Tartu is actively collaborating with many universities, research institutes and consortia and encourages fellow scientists worldwide to co-initiate new academic or industrial joint projects with us.",2014-02-11 +27853510,CoNet app: inference of biological association networks using Cytoscape.,"Here we present the Cytoscape app version of our association network inference tool CoNet. Though CoNet was developed with microbial community data from sequencing experiments in mind, it is designed to be generic and can detect associations in any data set where biological entities (such as genes, metabolites or species) have been observed repeatedly. The CoNet app supports Cytoscape 2.x and 3.x and offers a variety of network inference approaches, which can also be combined. Here we briefly describe its main features and illustrate its use on microbial count data obtained by 16S rDNA sequencing of arctic soil samples. The CoNet app is available at: http://apps.cytoscape.org/apps/conet.",2016-06-27 +24961236,"BioC implementations in Go, Perl, Python and Ruby. ","As part of a communitywide effort for evaluating text mining and information extraction systems applied to the biomedical domain, BioC is focused on the goal of interoperability, currently a major barrier to wide-scale adoption of text mining tools. BioC is a simple XML format, specified by DTD, for exchanging data for biomedical natural language processing. With initial implementations in C++ and Java, BioC provides libraries of code for reading and writing BioC text documents and annotations. We extend BioC to Perl, Python, Go and Ruby. We used SWIG to extend the C++ implementation for Perl and one Python implementation. A second Python implementation and the Ruby implementation use native data structures and libraries. BioC is also implemented in the Google language Go. BioC modules are functional in all of these languages, which can facilitate text mining tasks. BioC implementations are freely available through the BioC site: http://bioc.sourceforge.net. Database URL: http://bioc.sourceforge.net/",2014-06-23 +24570022,Brazilian genetic database of chromosome X.,"The X chromosome is a singular source of information in population genetics, anthropological research and in forensic cases. Thus, many researchers have been interested in characterizing X chromosome markers in different populations. The Brazilian Genetic Database of Chromosome X (BGBX--Banco Genético Brasileiro do Cromossomo X) website is freely available in Portuguese and English versions and was developed with the main purpose of compiling all Brazilian population genetic data for X chromosome short tandem repeats (X-STRs) markers published in scientific journals searchable via PubMed. Furthermore, this database presents other relevant information concerning X-STRs, such as genetic and physical locations, allele structure, nomenclature, mutation rates, primers described in the literature and likelihood ratio calculation. The entire scientific community is now encouraged to submit their X-STR population genetic data to this website, available at http://www.bgbx.com.br. Regarding future prospects of BGBX, the authors intend to expand the website with data and information of X-linked insertion-deletion polymorphisms.",2014-02-26 +28479868,GMDR: Versatile Software for Detecting Gene-Gene and Gene-Environ- ment Interactions Underlying Complex Traits.,"Identification of multifactor gene-gene (G×G) and gene-environment (G×E) interactions underlying complex traits poses one of the great challenges to today's genetic study. Development of the generalized multifactor dimensionality reduction (GMDR) method provides a practicable solution to problems in detection of interactions. To exploit the opportunities brought by the availability of diverse data, it is in high demand to develop the corresponding GMDR software that can handle a breadth of phenotypes, such as continuous, count, dichotomous, polytomous nominal, ordinal, survival and multivariate, and various kinds of study designs, such as unrelated case-control, family-based and pooled unrelated and family samples, and also allows adjustment for covariates. We developed a versatile GMDR package to implement this serial of GMDR analyses for various scenarios (e.g., unified analysis of unrelated and family samples) and large-scale (e.g., genome-wide) data. This package includes other desirable features such as data management and preprocessing. Permutation testing strategies are also built in to evaluate the threshold or empirical p values. In addition, its performance is scalable to the computational resources. The software is available at http://www.soph.uab.edu/ssg/software or http://ibi.zju.edu.cn/software.",2016-10-01 +25754864,"mycoCLAP, the database for characterized lignocellulose-active proteins of fungal origin: resource and text mining curation support. ","Enzymes active on components of lignocellulosic biomass are used for industrial applications ranging from food processing to biofuels production. These include a diverse array of glycoside hydrolases, carbohydrate esterases, polysaccharide lyases and oxidoreductases. Fungi are prolific producers of these enzymes, spurring fungal genome sequencing efforts to identify and catalogue the genes that encode them. To facilitate the functional annotation of these genes, biochemical data on over 800 fungal lignocellulose-degrading enzymes have been collected from the literature and organized into the searchable database, mycoCLAP (http://mycoclap.fungalgenomics.ca). First implemented in 2011, and updated as described here, mycoCLAP is capable of ranking search results according to closest biochemically characterized homologues: this improves the quality of the annotation, and significantly decreases the time required to annotate novel sequences. The database is freely available to the scientific community, as are the open source applications based on natural language processing developed to support the manual curation of mycoCLAP. Database URL: http://mycoclap.fungalgenomics.ca.",2015-03-08 +29253727,Proper name retrieval and structural integrity of cerebral cortex in midlife: A cross-sectional study.,"There is currently little understanding on whether retrieval of proper names differs in midlife compared to young adulthood and if so, whether the age differences in this ability are associated with differences in structural integrity of the cerebral cortex. To answer these questions, we studied retrieval of proper names in 115 cognitively healthy middle-aged persons (49.7, ±3.2), comparing their performance on a tip-of-the-tongue (TOT) task with that of 68 young persons (25.4, ±3.5) from the Cam-Can data repository (http://www.mrc-cbu.cam.ac.uk/datasets/camcan/). Grey matter (GM) density and cortical thickness were used as indices of structural integrity of the cerebral cortex. The middle-aged (MA) group experienced more TOTs during proper names retrieval than young adults (YA), (t = 3.789, p < .005) and had considerably less GM density and cortical thickness across a range of brain areas bilaterally. Small clusters in left BA 45 and right BA 44 (cortical thickness) and in right BA 40 (volumetry) revealed group differences when accounting for TOTs. However, we observed no correlations between MA's TOT scores and GM volumes or cortical thickness of the brain regions typically reported as implicated in retrieval of proper names: left anterior temporal lobe, left insula, and left superior and middle temporal gyri.",2017-12-15 +29807556,"Analysis of founders and performance test effects on an autochthonous horse population through pedigree analysis: structure, genetic variability and inbreeding.","The Maremmano is an autochthonous Italian horse breed, which probably descended from the native horses of the Etruscans (VI century B.C.); the Studbook was acknowledged in 1980, and it includes 12 368 horses born from that year up to 2015. The aim of this study was to evaluate the effect of the selection program on the genetic variability of the Maremmano population; the analysis was performed using both the 'Endog v 4.8' program available at http://webs.ucm.es/info/prodanim/html/JP_Web.htm and in-house software on official pedigree data. Four Reference Populations were considered, and the most important one was the population of the 12 368 Maremmano horses officially registered in the National Studbook. The pedigree completeness of this population was very good because it was more than 90% at the third parental generation and more than 70% at the fifth generation; the pedigree traced back to a maximum of 10.50 generations with an average of 3.30 complete generations and 5.70 equivalent complete generations. The average generation interval was 10.65±4.72 years, with stallions used for longer periods than mares. The intervals ranged from 10.15±4.45 (mother-daughter) to 10.99±4.93 (father-daughter). The effective number of founders (f e) was 74 and the effective number of ancestors (f a) was 30 so that the ratio f e/f a was 2.47. The founder genome equivalents (f g) was 13.72 with a ratio f g/f e equal to 0.18. The mean of the genetic conservation index was 5.55±3.37, and it ranged from 0.81 to 21.32. The average inbreeding coefficient was 2.94%, with an increase of 0.1%/year, and the average relatedness coefficient was 5.52%. The effective population size (N e) computed by an individual increase in inbreeding was 68.1±13.00; the N e on equivalent generations was 42.00, and this value slightly increased to 42.20 when computed by Log regression on equivalent generations. The analysis confirmed the presence of seven traditional male lines. The percentage of Thoroughbred blood in the foals born in 2015 was 20.30% and has increased 0.21%/year since 1980; in particular, it increased more than twice (0.51%/year) until 1993 and afterwards slightly fluctuated. The pedigree analysis confirmed the completeness of genealogical information and the traditional importance that breeders gave to the male lines; although the genetic diversity of Maremmano seemed to be not endangered by the selection program, some effects on the population structure were found and a more scientific approach to genetic conservation should be incorporated in the selection plans.",2018-05-29 +29681142,"Modeling the Present and Future Incidence of Pediatric Hand, Foot, and Mouth Disease Associated with Ambient Temperature in Mainland China.","

Background

There is limited evidence about the association between ambient temperature and the incidence of pediatric hand, foot, and mouth disease (HFMD) nationwide in China.

Objectives

We examined the childhood temperature-HFMD associations across mainland China, and we projected the change in HFMD cases due to projected temperature change by the 2090s.

Methods

Data on daily HFMD (children 0-14 y old) counts and weather were collected from 362 sites during 2009-2014. Daily temperature by the 2090s was downscaled under the Representative Concentration Pathway (RCP) 4.5 and 8.5 scenarios. Temperature-HFMD associations were quantified using a two-stage Poisson regression with a distributed lag nonlinear model. The impact of changes in temperature on the incidence of HFMD was estimated by combining the fitted temperature-HFMD associations with projected temperatures under each scenario, assuming a constant population structure. Sensitivity analyses were performed to assess the influence of primary model assumptions.

Results

During 2009-2014, >11 million HFMD cases were reported. In most regions, the temperature-HFMD association had an inverted U shape with a peak at approximately 20°C, but the association leveled off or continued to increase in the Inner Mongolia and Northeast regions. When estimates were pooled across all regions and the population size was held constant, the projected incidence of HFMD increased by 3.2% [95% empirical confidence interval (eCI): −13.5%, 20.0%] and 5.3% (95% eCI: −33.3%, 44.0%) by the 2090s under the RCP 4.5 and 8.5 scenarios, respectively. However, regional projections suggest that HFMD may decrease with climate change in temperate areas of central and eastern China.

Conclusion

Our estimates suggest that the association between temperature and HFMD varies across China and that the future impact of climate change on HFMD incidence will vary as well. Other factors, including changes in the size of the population at risk (children 0-14 y old) will also influence future HFMD trends. https://doi.org/10.1289/EHP3062.",2018-04-20 +29527090,Sexualizing Media Use and Self-Objectification: A Meta-Analysis.,"Objectification theorists suggest that exposure to sexualizing media increases self-objectification among individuals. Correlational and experimental research examining this relation has received growing attention. The aim of this meta-analysis was to investigate the influence of sexualizing media use on self-objectification among women and men. For this purpose, we analyzed 54 papers yielding 50 independent studies and 261 effect sizes. The data revealed a positive, moderate effect of sexualizing media on self-objectification (r = .19). The effect was significant and robust, 95% CI [.15, .23], p < .0001. We identified a conditional effect of media type, suggesting that the use of video games and/or online media led to stronger self-objectification effects when compared to television use. Other sample characteristics or study characteristics did not moderate the overall effect. Thus, our findings highlight the importance of sexualizing media exposure on women's and men's objectified self-concept. We discuss future research directions and implications for practice. We hope that the article will stimulate researchers in their future work to address the research gaps outlined here. Moreover, we hope that the findings will encourage practitioners and parents to reflect on the role of the use of sexualizing media in the development of individuals' self-objectification. Additional online materials for this article are available on PWQ's website at http://journals.sagepub.com/doi/suppl10.1177/0361684317743019.",2017-12-15 +28146599,"Mark, Set, Go! School-Based Nutrition and Physical Activity Program: A Five-Year Evaluation.","Mark, Set Go! is a school-based intervention addressing pediatric obesity in an urban, underserved community. This study evaluates its impact on participants' knowledge, attitudes and behavior related to nutrition, physical activity and screen time.

Method

Participants, 954 fifth- and sixth-grade public school students, received a 9-week classroom-based intervention led by high school peer educators. A matched design analyzed paired data from pre/post intervention knowledge, attitude and behavior surveys, heights, weights and 24 hour pedometer recordings.

Results

787 students (82.4%) completed both a pre- and post-test. Participants demonstrated improvement in knowledge, self-reported screen time, daily exercise and sweetened beverage consumption. Changes were greater for girls. A statistically significant decrease in BMI was noted overall, for boys and for overweight students, among the 443 participants (46%) with paired BMI data.

Conclusions

This school-based peer educator led intervention was effective in improving participant knowledge and healthy behaviors. [Full article available at http://rimed.org/rimedicaljournal-2017-02.asp].",2017-02-01 +24812337,SEK: sparsity exploiting k-mer-based estimation of bacterial community composition.,"

Motivation

Estimation of bacterial community composition from a high-throughput sequenced sample is an important task in metagenomics applications. As the sample sequence data typically harbors reads of variable lengths and different levels of biological and technical noise, accurate statistical analysis of such data is challenging. Currently popular estimation methods are typically time-consuming in a desktop computing environment.

Results

Using sparsity enforcing methods from the general sparse signal processing field (such as compressed sensing), we derive a solution to the community composition estimation problem by a simultaneous assignment of all sample reads to a pre-processed reference database. A general statistical model based on kernel density estimation techniques is introduced for the assignment task, and the model solution is obtained using convex optimization tools. Further, we design a greedy algorithm solution for a fast solution. Our approach offers a reasonably fast community composition estimation method, which is shown to be more robust to input data variation than a recently introduced related method.

Availability and implementation

A platform-independent Matlab implementation of the method is freely available at http://www.ee.kth.se/ctsoftware; source code that does not require access to Matlab is currently being tested and will be made available later through the above Web site.",2014-05-07 +29748373,slan+ Monocytes and Macrophages Mediate CD20-Dependent B-cell Lymphoma Elimination via ADCC and ADCP.,"Terminal tissue differentiation and function of slan+ monocytes in cancer is largely unexplored. Our recent studies demonstrated that slan+ monocytes differentiate into a distinct subset of dendritic cells (DC) in human tonsils and that slan+ cells colonize metastatic carcinoma-draining lymph nodes. Herein, we report by retrospective analysis of multi-institutional cohorts that slan+ cells infiltrate various types of non-Hodgkin lymphomas (NHL), particularly the diffuse large B-cell lymphoma (DLBCL) group, including the most aggressive, nodal and extranodal, forms. Nodal slan+ cells displayed features of either immature DC or macrophages, in the latter case ingesting tumor cells and apoptotic bodies. We also found in patients with DLBCL that peripheral blood slan+ monocytes, but not CD14+ monocytes, increased in number and displayed highly efficient rituximab-mediated antibody-dependent cellular cytotoxicity, almost equivalent to that exerted by NK cells. Notably, slan+ monocytes cultured in conditioned medium from nodal DLBCL (DCM) acquired a macrophage-like phenotype, retained CD16 expression, and became very efficient in rituximab-mediated antibody-dependent cellular phagocytosis (ADCP). Macrophages derived from DCM-treated CD14+ monocytes performed very efficient rituximab-mediated ADCP, however, using different FcγRs from those used by slan+ macrophages. Our observations shed new light on the complexity of the immune microenvironment of DLBCL and demonstrate plasticity of slan+ monocytes homing to cancer tissues. Altogether, data identify slan+ monocytes and macrophages as prominent effectors of antibody-mediated tumor cell targeting in patients with DLBCL.Significance: slan+ monocytes differentiate into macrophages that function as prominent effectors of antibody-mediated tumor cell targeting in lymphoma.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/13/3544/F1.large.jpg Cancer Res; 78(13); 3544-59. ©2018 AACR.",2018-05-10 +22301074,Manual GO annotation of predictive protein signatures: the InterPro approach to GO curation.,"InterPro amalgamates predictive protein signatures from a number of well-known partner databases into a single resource. To aid with interpretation of results, InterPro entries are manually annotated with terms from the Gene Ontology (GO). The InterPro2GO mappings are comprised of the cross-references between these two resources and are the largest source of GO annotation predictions for proteins. Here, we describe the protocol by which InterPro curators integrate GO terms into the InterPro database. We discuss the unique challenges involved in integrating specific GO terms with entries that may describe a diverse set of proteins, and we illustrate, with examples, how InterPro hierarchies reflect GO terms of increasing specificity. We describe a revised protocol for GO mapping that enables us to assign GO terms to domains based on the function of the individual domain, rather than the function of the families in which the domain is found. We also discuss how taxonomic constraints are dealt with and those cases where we are unable to add any appropriate GO terms. Expert manual annotation of InterPro entries with GO terms enables users to infer function, process or subcellular information for uncharacterized sequences based on sequence matches to predictive models. Database URL: http://www.ebi.ac.uk/interpro. The complete InterPro2GO mappings are available at: ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/external2go/interpro2go.",2012-02-01 +28921375,Convex-PL: a novel knowledge-based potential for protein-ligand interactions deduced from structural databases using convex optimization.,"We present a novel optimization approach to train a free-shape distance-dependent protein-ligand scoring function called Convex-PL. We do not impose any functional form of the scoring function. Instead, we decompose it into a polynomial basis and deduce the expansion coefficients from the structural knowledge base using a convex formulation of the optimization problem. Also, for the training set we do not generate false poses with molecular docking packages, but use constant RMSD rigid-body deformations of the ligands inside the binding pockets. This allows the obtained scoring function to be generally applicable to scoring of structural ensembles generated with different docking methods. We assess the Convex-PL scoring function using data from D3R Grand Challenge 2 submissions and the docking test of the CASF 2013 study. We demonstrate that our results outperform the other 20 methods previously assessed in CASF 2013. The method is available at http://team.inria.fr/nano-d/software/Convex-PL/ .",2017-09-18 +27182546,"RNA-sequencing data analysis of uterus in ovariectomized rats fed with soy protein isolate, 17β-estradiol and casein.","This data file describes the bioinformatics analysis of uterine RNA-seq data comparing genome wide effects of feeding soy protein isolate compared to casein to ovariectomized female rats age 64 days relative to treatment of casein fed rats with 5 μg/kg/d estradiol and relative to rats treated with estradiol and also fed soy protein isolate. Complete raw data files were deposited in the gene Expression Omnibus (GEO) at NCBI (http:/www.ncbi.nlm.nih.gov.geo/) under the GEO accession number GEO: GSE69819. Data presented here incudes a summary of the differential expression analysis with top 30 genes up- and down-regulated by soy protein isolate (SPI), estradiol (E2) and SPI+E2. Additional functional annotation analysis of KEGG pathways is also presented for each treatment, together with networks of interaction between those pathways. Further interpretation and discussion of this data can be found in the article ""Uterine responses to feeding soy protein isolate and treatment with 17β-estradiol differ in ovariectomized female rats"" Ronis et al. (2016) [1].",2016-04-21 +27956353,Batch-processing of imaging or liquid-chromatography mass spectrometry datasets and De Novo sequencing of polyketide siderophores.,"The open-source and cross-platform software CycloBranch was utilized for dereplication of organic compounds from mass spectrometry imaging imzML datasets and its functions were illustrated on microbial siderophores. The pixel-to-pixel batch-processing was analogous to liquid chromatography mass spectrometry data. Each data point represented here by accurate m/z values and the corresponding ion intensities was matched against integrated compound libraries. The fine isotopic structure matching was also embedded into CycloBranch dereplication process. The siderophores' characterization from single-pixel mass spectra was further supported by their de novo sequencing. New ketide building block library was utilized by CycloBranch to characterize the siderophores in images and mixtures and nomenclature of fragment ion series of linear and cyclic polyketide siderophores was proposed. The software is freely available at http://ms.biomed.cas.cz/cyclobranch. This article is part of a Special Issue entitled: MALDI Imaging, edited by Dr. Corinna Henkel and Prof. Peter Hoffmann.",2016-12-09 +29594435,Higher serum carotenoids associated with improvement of non-alcoholic fatty liver disease in adults: a prospective study.,"

Purpose

Previous studies have suggested that serum carotenoids might be inversely associated with non-alcoholic fatty liver disease (NAFLD), but little data came from longitudinal studies. We prospectively examined the associations between serum-carotenoid levels and NAFLD severity and the intermediary effects of retinol-binding protein 4 (RBP4), HOMA insulin-resistance index (HOMA-IR), body mass index (BMI), and serum triglycerides in middle-aged and elderly Chinese adults.

Methods

This prospective study included 3336 Chinese adults (40-75 years). We assessed serum concentrations of carotenoids at baseline and determined serum RBP4, triglycerides, and HOMA-IR levels at year 3. Abdominal ultrasonography was conducted to assess the presence and degree of NAFLD at years 3 and 6.

Results

The 2687 subjects who completed both NAFLD tests were classified into stable, improved and progressed groups according to changes in the degree of NAFLD between two visits. Analyses of covariance showed that ln-transformed serum concentrations of α-carotene, β-cryptoxanthin, β-carotene, lycopene, lutein/zeaxanthin, and total carotenoids were positively associated with NAFLD improvement (all p-trend < 0.05). After multivariable adjustment, mean differences in serum carotenoids were higher by 29.6% (β-carotene), 18.2% (α-carotene), 15.6% (β-cryptoxanthin), 11.5% (lycopene), 8.9% (lutein/zeaxanthin), and 16.6% (total carotenoids) in the improved vs. progressed subjects. Path analyses indicated the carotenoid-NAFLD association was mediated by lowering serum RBP4, triglycerides, HOMA-IR, and BMI, which were positively associated with the prevalence and progression of NAFLD.

Conclusions

In middle-aged and elderly adults, higher serum-carotenoid concentrations were favorably associated with NAFLD improvement, mediated by reducing serum RBP4, triglycerides, HOMA-IR, and BMI.

Trial registrations

This study has been registered at http://www.clinicaltrials.gov as NCT03179657.",2018-03-29 +25358824,Students' performance during practical examination on whole slide images using view path tracking.,"

Background

Whole slide images (WSIs) used in medical education can provide new insights into how histological slides are viewed by students. We created software infrastructure which tracks viewed WSI areas, used it during a practical exam in oral pathology and analyzed collected data to discover students' viewing behavior.

Methods

A view path tracking solution, which requires no specialized equipment, has been implemented on a virtual microscopy software platform (WebMicroscope, Fimmic Ltd, Helsinki, Finland). Our method dynamically tracks view paths across the whole WSI area and all zoom levels, while collecting the viewing behavior data centrally from many simultaneous WSI users. We used this approach during the exam to track how all students (N = 88) viewed WSIs (50 per student) when answering exam questions (with no time limit). About 74,000 records with information about subsequently displayed WSI areas were saved in the central database. Gathered data was processed and analyzed in multiple ways. Generated images and animations showed view fields and paths marked on WSI thumbnails, either for a single student or multiple students answering the same question. A set of statistics was designed and implemented to automatically discover certain viewing patterns, especially for multiple students and WSIs. Calculated metrics included average magnification level on which a WSI was displayed, dispersion of view fields, total viewing time, total number of view fields and a measure depicting how much a student was focused on diagnostic areas of a slide.

Results

Generated visualizations allowed us to visually discover some characteristic viewing patterns for selected questions and students. Calculated measures confirmed certain observations and enabled generalization of some findings across many students or WSIs. In most questions selected for the analysis, students answering incorrectly tended to view the slides longer, go through more view fields, which were also more dispersed - all compared to students who answered the questions correctly.

Conclusions

Designed and implemented view path tracking appeared to be a useful method of uncovering how students view WSIs during an exam in oral pathology. Proposed analysis methods, which include visualizations and automatically calculated statistics, were successfully used to discover viewing patterns.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_208.",2014-10-30 +24285306,PortEco: a resource for exploring bacterial biology through high-throughput data and analysis tools.,"PortEco (http://porteco.org) aims to collect, curate and provide data and analysis tools to support basic biological research in Escherichia coli (and eventually other bacterial systems). PortEco is implemented as a 'virtual' model organism database that provides a single unified interface to the user, while integrating information from a variety of sources. The main focus of PortEco is to enable broad use of the growing number of high-throughput experiments available for E. coli, and to leverage community annotation through the EcoliWiki and GONUTS systems. Currently, PortEco includes curated data from hundreds of genome-wide RNA expression studies, from high-throughput phenotyping of single-gene knockouts under hundreds of annotated conditions, from chromatin immunoprecipitation experiments for tens of different DNA-binding factors and from ribosome profiling experiments that yield insights into protein expression. Conditions have been annotated with a consistent vocabulary, and data have been consistently normalized to enable users to find, compare and interpret relevant experiments. PortEco includes tools for data analysis, including clustering, enrichment analysis and exploration via genome browsers. PortEco search and data analysis tools are extensively linked to the curated gene, metabolic pathway and regulation content at its sister site, EcoCyc.",2013-11-26 +24423103,Evaluation of the performance improvement CME paradigm for pain management in the long-term care setting.,"

Objective

A performance improvement continuing medical education (PI CME) activity was designed to assist clinicians with accurately identifying and appropriately managing persistent pain in long-term care facility (LTCF) residents.

Design

Volunteer LTCFs participated in a three-stage PI CME model consisting of: 1) baseline assessment, 2) implementation of practice improvement interventions, and 3) reassessment. Expert faculty chose performance measures and interventions for the activity. A champion was designated ateach LTCF to collect resident charts and enter data into an online database.

Setting

Eight LTCFs located across the United States participated in the activity.

Patients

Fifty resident charts were randomly selected by each LTCF champion (25 for stage 1 and 25 for stage 3); a total of 350 charts were reviewed.

Interventions

In addition to a toolkit containing numerous performance improvement resources, an in-service meeting led by an expert faculty member was conducted at each LTCF.

Outcome measures

Stage 3 data were collected 6 weeks after implementation of interventions and compared with stage 1 baseline data to measure change in performance.

Results

Aggregate data collected from seven LTCFs completing the PI CME activity through stage 3 revealed improvements from baseline in four of five performance measures.

Conclusions

This CME activity allowed for collection of data demonstrating performance improvement in persistent pain management. The tools used as part of the intervention (available at http://www.achlpicme.org/LTC/toolkit) may help other clinicians enhance their management of LTCF residents with persistent pain.",2014-01-14 +28761915,Data for proteome analysis of Bacillus lehensis G1 in starch-containing medium.,"Bacillus lehensis G1 is a cyclodextrin glucanotransferase (CGTase) producer, which can degrade starch into cyclodextrin. Here, we present the proteomics data of B. lehensis cultured in starch-containing medium, which is related to the article ""Proteome-based identification of signal peptides for improved secretion of recombinant cyclomaltodextrin glucanotransferase in Escherichia coli"" (Ling et. al, in press). This dataset was generated to better understand the secretion of proteins involved in starch utilization for bacterial sustained growth. A 2-DE proteomic technique was used and the proteins were tryptically digested followed by detection using MALDI-TOF/TOF. Proteins were classified into functional groups using the information available in SubtiList webserver (http://genolist.pasteur.fr/SubtiList/).",2017-07-14 +25344501,iDoComp: a compression scheme for assembled genomes.,"

Motivation

With the release of the latest next-generation sequencing (NGS) machine, the HiSeq X by Illumina, the cost of sequencing a Human has dropped to a mere $4000. Thus we are approaching a milestone in the sequencing history, known as the $1000 genome era, where the sequencing of individuals is affordable, opening the doors to effective personalized medicine. Massive generation of genomic data, including assembled genomes, is expected in the following years. There is crucial need for compression of genomes guaranteed of performing well simultaneously on different species, from simple bacteria to humans, which will ease their transmission, dissemination and analysis. Further, most of the new genomes to be compressed will correspond to individuals of a species from which a reference already exists on the database. Thus, it is natural to propose compression schemes that assume and exploit the availability of such references.

Results

We propose iDoComp, a compressor of assembled genomes presented in FASTA format that compresses an individual genome using a reference genome for both the compression and the decompression. In terms of compression efficiency, iDoComp outperforms previously proposed algorithms in most of the studied cases, with comparable or better running time. For example, we observe compression gains of up to 60% in several cases, including H.sapiens data, when comparing with the best compression performance among the previously proposed algorithms.

Availability

iDoComp is written in C and can be downloaded from: http://www.stanford.edu/~iochoa/iDoComp.html (We also provide a full explanation on how to run the program and an example with all the necessary files to run it.).",2014-10-24 +27275151,DCC: a Swiss army knife for structure factor analysis and validation.,"Since 2008, X-ray structure depositions to the Protein Data Bank archive (PDB) have required submission of experimental data in the form of structure factor files. RCSB PDB has developed the program DCC to allow worldwide PDB (wwPDB; http://wwpdb.org) biocurators, using a single command-line program, to invoke a number of third-party software packages to compare the model file with the experimental data. DCC functionality includes structure factor validation, electron-density map generation and slicing, local electron-density analysis, and residual B factor analysis. DCC outputs a summary containing various crystallographic statistics in PDBx/mmCIF format for use in automatic data processing and archiving pipelines.",2016-04-18 +25990727,APPRIS WebServer and WebServices.,"This paper introduces the APPRIS WebServer (http://appris.bioinfo.cnio.es) and WebServices (http://apprisws.bioinfo.cnio.es). Both the web servers and the web services are based around the APPRIS Database, a database that presently houses annotations of splice isoforms for five different vertebrate genomes. The APPRIS WebServer and WebServices provide access to the computational methods implemented in the APPRIS Database, while the APPRIS WebServices also allows retrieval of the annotations. The APPRIS WebServer and WebServices annotate splice isoforms with protein structural and functional features, and with data from cross-species alignments. In addition they can use the annotations of structure, function and conservation to select a single reference isoform for each protein-coding gene (the principal protein isoform). APPRIS principal isoforms have been shown to agree overwhelmingly with the main protein isoform detected in proteomics experiments. The APPRIS WebServer allows for the annotation of splice isoforms for individual genes, and provides a range of visual representations and tools to allow researchers to identify the likely effect of splicing events. The APPRIS WebServices permit users to generate annotations automatically in high throughput mode and to interrogate the annotations in the APPRIS Database. The APPRIS WebServices have been implemented using REST architecture to be flexible, modular and automatic.",2015-05-18 +26609647,Evidence-based Neuro Linguistic Psychotherapy: a meta-analysis.,"

Background

Neuro Linguistic Programming (NLP) Framework has enjoyed enormous popularity in the field of applied psychology. NLP has been used in business, education, law, medicine and psychotherapy to identify people's patterns and alter their responses to stimuli, so they are better able to regulate their environment and themselves. NLP looks at achieving goals, creating stable relationships, eliminating barriers such as fears and phobias, building self-confidence, and self-esteem, and achieving peak performance. Neuro Linguistic Psychotherapy (NLPt) encompasses NLP as framework and set of interventions in the treatment of individuals with different psychological and/or social problems. We aimed systematically to analyse the available data regarding the effectiveness of Neuro Linguistic Psychotherapy (NLPt).

Subjects and methods

The present work is a meta-analysis of studies, observational or randomized controlled trials, for evaluating the efficacy of Neuro Linguistic Programming in individuals with different psychological and/or social problems. The databases searched to identify studies in English and German language: CENTRAL in the Cochrane Library; PubMed; ISI Web of Knowledge (include results also from Medline and the Web of Science); PsycINFO (including PsycARTICLES); Psyndex; Deutschsprachige Diplomarbeiten der Psychologie (database of theses in Psychology in German language), Social SciSearch; National library of health and two NLP-specific research databases: one from the NLP Community (http://www.nlp.de/cgi-bin/research/nlprdb.cgi?action=res_entries) and one from the NLP Group (http://www.nlpgrup.com/bilimselarastirmalar/bilimsel-arastirmalar-4.html#Zweig154).

Results

From a total number of 425 studies, 350 were removed and considered not relevant based on the title and abstract. Included, in the final analysis, are 12 studies with numbers of participants ranging between 12 and 115 subjects. The vast majority of studies were prospective observational. The actual paper represents the first meta-analysis evaluating the effectiveness of NLP therapy for individuals with social/psychological problems. The overall meta-analysis found that the NLP therapy may add an overall standardized mean difference of 0.54 with a confidence interval of CI=[0.20; 0.88].

Conclusion

Neuro-Linguistic Psychotherapy as a psychotherapeutic modality grounded in theoretical frameworks, methodologies and interventions scientifically developed, including models developed by NLP, shows results that can hold its ground in comparison with other psychotherapeutic methods.",2015-12-01 +28491942,A dataset of multi-contrast population-averaged brain MRI atlases of a Parkinson׳s disease cohort.,"Parkinson׳s disease (PD) is a neurodegenerative disease that primarily affects the motor functions of the patients. Research and surgical treatment of PD (e.g., deep brain stimulation) often require human brain atlases for structural identification or as references for anatomical normalization. However, two pitfalls exist for many current atlases used for PD. First, most atlases do not represent the disease-specific anatomy as they are based on healthy young subjects. Second, subcortical structures, such as the subthalamic nucleus (STN) used in deep brain stimulation procedures, are often not well visualized. The dataset described in this Data in Brief is a population-averaged atlas that was made with 3 T MRI scans of 25 PD patients, and contains 5 image contrasts: T1w (FLASH & MPRAGE), T2*w, T1-T2* fusion, phase, and an R2* map. While the T1w, T2*w, and T1-T2* fusion templates provide excellent anatomical details for both cortical and sub-cortical structures, the phase and R2* map contain bio-chemical features. Probabilistic tissue maps of whiter matter, grey matter, and cerebrospinal fluid are provided for the atlas. We also manually segmented eight subcortical structures: caudate nucleus, putamen, globus pallidus internus and externus (GPi & GPe), thalamus, STN, substantia nigra (SN), and the red nucleus (RN). Lastly, a co-registered histology-derived digitized atlas containing 123 anatomical structures is included. The dataset is made freely available at the MNI data repository accessible through the link http://nist.mni.mcgill.ca/?p=1209.",2017-04-15 +28200055,LASER server: ancestry tracing with genotypes or sequence reads.,"

Summary

To enable direct comparison of ancestry background in different studies, we developed LASER to estimate individual ancestry by placing either sezquenced or genotyped samples in a common ancestry space, regardless of the sequencing strategy or genotyping array used to characterize each sample. Here we describe the LASER server to facilitate application of the method to a wide range of genetic studies. The server provides genetic ancestry estimation for different geographic regions and user-friendly interactive visualization of the results.

Availability and implementation

The LASER server is freely accessible at http://laser.sph.umich.edu/.

Contact

dtaliun@umich.edu or wangcl@gis.a-star.edu.sg.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +24822057,tRNADB-CE: tRNA gene database well-timed in the era of big sequence data.,"The tRNA gene data base curated by experts ""tRNADB-CE"" (http://trna.ie.niigata-u.ac.jp) was constructed by analyzing 1,966 complete and 5,272 draft genomes of prokaryotes, 171 viruses', 121 chloroplasts', and 12 eukaryotes' genomes plus fragment sequences obtained by metagenome studies of environmental samples. 595,115 tRNA genes in total, and thus two times of genes compiled previously, have been registered, for which sequence, clover-leaf structure, and results of sequence-similarity and oligonucleotide-pattern searches can be browsed. To provide collective knowledge with help from experts in tRNA researches, we added a column for enregistering comments to each tRNA. By grouping bacterial tRNAs with an identical sequence, we have found high phylogenetic preservation of tRNA sequences, especially at the phylum level. Since many species-unknown tRNAs from metagenomic sequences have sequences identical to those found in species-known prokaryotes, the identical sequence group (ISG) can provide phylogenetic markers to investigate the microbial community in an environmental ecosystem. This strategy can be applied to a huge amount of short sequences obtained from next-generation sequencers, as showing that tRNADB-CE is a well-timed database in the era of big sequence data. It is also discussed that batch-learning self-organizing-map with oligonucleotide composition is useful for efficient knowledge discovery from big sequence data.",2014-05-01 +27412093,LAMPLINK: detection of statistically significant SNP combinations from GWAS data.,"One of the major issues in genome-wide association studies is to solve the missing heritability problem. While considering epistatic interactions among multiple SNPs may contribute to solving this problem, existing software cannot detect statistically significant high-order interactions. We propose software named LAMPLINK, which employs a cutting-edge method to enumerate statistically significant SNP combinations from genome-wide case-control data. LAMPLINK is implemented as a set of additional functions to PLINK, and hence existing procedures with PLINK can be applicable. Applied to the 1000 Genomes Project data, LAMPLINK detected a combination of five SNPs that are statistically significantly accumulated in the Japanese population.

Availability and implementation

LAMPLINK is available at http://a-terada.github.io/lamplink/ CONTACT: terada@cbms.k.u-tokyo.ac.jp or sese.jun@aist.go.jpSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-13 +21082426,InterPro protein classification.,"Improvements in nucleotide sequencing technology have resulted in an ever increasing number of nucleotide and protein sequences being deposited in databases. Unfortunately, the ability to manually classify and annotate these sequences cannot keep pace with their rapid generation, resulting in an increased bias toward unannotated sequence. Automatic annotation tools can help redress the balance. There are a number of different groups working to produce protein signatures that describe protein families, functional domains or conserved sites within related groups of proteins. Protein signature databases include CATH-Gene3D, HAMAP, PANTHER, Pfam, PIRSF, PRINTS, ProDom, PROSITE, SMART, SUPERFAMILY, and TIGRFAMs. Their approaches range from characterising small conserved motifs that can identify members of a family or subfamily, to the use of hidden Markov models that describe the conservation of residues over entire domains or whole proteins. To increase their value as protein classification tools, protein signatures from these 11 databases have been combined into one, powerful annotation tool: the InterPro database (http://www.ebi.ac.uk/interpro/) (Hunter et al., Nucleic Acids Res 37:D211-D215, 2009). InterPro is an open-source protein resource used for the automatic annotation of proteins, and is scalable to the analysis of entire new genomes through the use of a downloadable version of InterProScan, which can be incorporated into an existing local pipeline. InterPro provides structural information from PDB (Kouranov et al., Nucleic Acids Res 34:D302-D305, 2006), its classification in CATH (Cuff et al., Nucleic Acids Res 37:D310-D314, 2009) and SCOP (Andreeva et al., Nucleic Acids Res 36:D419-D425, 2008), as well as homology models from ModBase (Pieper et al., Nucleic Acids Res 37:D347-D354, 2009) and SwissModel (Kiefer et al., Nucleic Acids Res 37:D387-D392, 2009), allowing a direct comparison of the protein signatures with the available structural information. This chapter reviews the signature methods found in the InterPro database, and provides an overview of the InterPro resource itself.",2011-01-01 +28499267,A novel germline TP53 mutation p.Pro190Arg detected in a patient with lung and bilateral breast cancers.,"

Purpose

Li-Fraumeni syndrome (LFS) is a rare genetic disease with strong predispositions to multiple early-onset neoplasms, mostly sarcomas, breast cancers, brain tumors and adrenocortical carcinomas (LFS core cancers). In most LFS families the germline mutations of TP53 tumor suppressor gene were found. Lung cancer does not belong to the core cancers of LFS, however its higher incidence is observed in families with TP53 mutations. Our aim was to search for TP53 mutations in female lung cancer patients whose clinico-demographic characteristics suggested a probable genetic predisposition to the disease.

Materials and methods

The coding region of TP53 from blood DNA was sequenced using Sanger method. The functioning of detected mutation was tested by luciferase reporter assay.

Results

We found a nucleotide substitution c.569C>G, p.Pro190Arg, which was not described in the TP53 germline mutation database (http://p53.iarc.fr/TP53GermlineMutations.aspx). The mutation destroys the ability of p53 to transactivate BAX promoter and significantly reduces transactivation potential of p53 toward the promoter of MDM2 gen.

Conclusion

We identified novel germline mutation of TP53.",2017-05-09 +27922621,Sharing brain mapping statistical results with the neuroimaging data model.,"Only a tiny fraction of the data and metadata produced by an fMRI study is finally conveyed to the community. This lack of transparency not only hinders the reproducibility of neuroimaging results but also impairs future meta-analyses. In this work we introduce NIDM-Results, a format specification providing a machine-readable description of neuroimaging statistical results along with key image data summarising the experiment. NIDM-Results provides a unified representation of mass univariate analyses including a level of detail consistent with available best practices. This standardized representation allows authors to relay methods and results in a platform-independent regularized format that is not tied to a particular neuroimaging software package. Tools are available to export NIDM-Result graphs and associated files from the widely used SPM and FSL software packages, and the NeuroVault repository can import NIDM-Results archives. The specification is publically available at: http://nidm.nidash.org/specs/nidm-results.html.",2016-12-06 +25157073,Overview of the gene ontology task at BioCreative IV. ,"Gene ontology (GO) annotation is a common task among model organism databases (MODs) for capturing gene function data from journal articles. It is a time-consuming and labor-intensive task, and is thus often considered as one of the bottlenecks in literature curation. There is a growing need for semiautomated or fully automated GO curation techniques that will help database curators to rapidly and accurately identify gene function information in full-length articles. Despite multiple attempts in the past, few studies have proven to be useful with regard to assisting real-world GO curation. The shortage of sentence-level training data and opportunities for interaction between text-mining developers and GO curators has limited the advances in algorithm development and corresponding use in practical circumstances. To this end, we organized a text-mining challenge task for literature-based GO annotation in BioCreative IV. More specifically, we developed two subtasks: (i) to automatically locate text passages that contain GO-relevant information (a text retrieval task) and (ii) to automatically identify relevant GO terms for the genes in a given article (a concept-recognition task). With the support from five MODs, we provided teams with >4000 unique text passages that served as the basis for each GO annotation in our task data. Such evidence text information has long been recognized as critical for text-mining algorithm development but was never made available because of the high cost of curation. In total, seven teams participated in the challenge task. From the team results, we conclude that the state of the art in automatically mining GO terms from literature has improved over the past decade while much progress is still needed for computer-assisted GO curation. Future work should focus on addressing remaining technical challenges for improved performance of automatic GO concept recognition and incorporating practical benefits of text-mining tools into real-world GO annotation. http://www.biocreative.org/tasks/biocreative-iv/track-4-GO/.",2014-08-25 +27412088,cisASE: a likelihood-based method for detecting putative cis-regulated allele-specific expression in RNA sequencing data.,"

Motivation

Allele-specific expression (ASE) is a useful way to identify cis-acting regulatory variation, which provides opportunities to develop new therapeutic strategies that activate beneficial alleles or silence mutated alleles at specific loci. However, multiple problems hinder the identification of ASE in next-generation sequencing (NGS) data.

Results

We developed cisASE, a likelihood-based method for detecting ASE on single nucleotide variant (SNV), exon and gene levels from sequencing data without requiring phasing or parental information. cisASE uses matched DNA-seq data to control technical bias and copy number variation (CNV) in putative cis-regulated ASE identification. Compared with state-of-the-art methods, cisASE exhibits significantly increased accuracy and speed. cisASE works moderately well for datasets without DNA-seq and thus is widely applicable. By applying cisASE to real datasets, we identified specific ASE characteristics in normal and cancer tissues, thus indicating that cisASE has potential for wide applications in cancer genomics.

Availability and implementation

cisASE is freely available at http://lifecenter.sgst.cn/cisASE CONTACT: biosinodx@gmail.com or yxli@sibs.ac.cnSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-13 +28968623,"Responsible Opioid Prescribing for Chronic Pain: Interpreting the CDC Guideline, Understanding New Rhode Island Regulations.","New Rhode Island regulations require physicians and other licensed practitioners to make significant adjustments to comply with new requirements for prescribing narcotics for chronic pain. Responding to the opioid epidemic, the new rules are intended to improve patient safety by changing physicians' prescribing patterns. However, the new rules may overlook the importance of treatment-access problems and the importance of buprenorphine products for treating pain and opioid dependence. Empirical data have demonstrated the safety and efficacy of buprenorphine in treating opioid-dependent patients with chronic pain, including those with and without substance abuse histories, but access to buprenorphine treatment remains limited throughout the state. The new regulations call upon physicians to make use of consultation services, which are also of limited availability. Although well intentioned, the new rules may contribute to treatment-access problems, and patients with chronic pain may resort to higher-risk ""street"" drugs when they are unable to access safe but effective medical treatment. [Full article available at http://rimed.org/rimedicaljournal-2017-10.asp].",2017-10-02 +28713404,"Genome-Wide Identification and Analysis of Genes, Conserved between japonica and indica Rice Cultivars, that Respond to Low-Temperature Stress at the Vegetative Growth Stage.","Cold stress is very detrimental to crop production. However, only a few genes in rice have been identified with known functions related to cold tolerance. To meet this agronomic challenge more effectively, researchers must take global approaches to select useful candidate genes and find the major regulatory factors. We used five Gene expression omnibus series data series of Affymetrix array data, produced with cold stress-treated samples from the NCBI Gene Expression Omnibus (http://www.ncbi.nlm.nih.gov/geo/), and identified 502 cold-inducible genes common to both japonica and indica rice cultivars. From them, we confirmed that the expression of two randomly chosen genes was increased by cold stress in planta. In addition, overexpression of OsWRKY71 enhanced cold tolerance in 'Dongjin,' the tested japonica cultivar. Comparisons between japonica and indica rice, based on calculations of plant survival rates and chlorophyll fluorescence, confirmed that the japonica rice was more cold-tolerant. Gene Ontology enrichment analysis indicate that the 'L-phenylalanine catabolic process,' within the Biological Process category, was the most highly overrepresented under cold-stress conditions, implying its significance in that response in rice. MapMan analysis classified 'Major Metabolic' processes and 'Regulatory Gene Modules' as two other major determinants of the cold-stress response and suggested several key cis-regulatory elements. Based on these results, we proposed a model that includes a pathway for cold stress-responsive signaling. Results from our functional analysis of the main signal transduction and transcription regulation factors identified in that pathway will provide insight into novel regulatory metabolism(s), as well as a foundation by which we can develop crop plants with enhanced cold tolerance.",2017-06-30 +27896026,jicbioimage: a tool for automated and reproducible bioimage analysis.,"There has been steady improvement in methods for capturing bioimages. However analysing these images still remains a challenge. The Python programming language provides a powerful and flexible environment for scientific computation. It has a wide range of supporting libraries for image processing but lacks native support for common bioimage formats, and requires specific code to be written to ensure that suitable audit trails are generated and analyses are reproducible. Here we describe the development of a Python tool that: (1) allows users to quickly view and explore microscopy data; (2) generate reproducible analyses, encoding a complete history of image transformations from raw data to final result; and (3) scale up analyses from initial exploration to high throughput processing pipelines, with a minimal amount of extra effort. The tool, jicbioimage, is open source and freely available online at http://jicbioimage.readthedocs.io.",2016-11-15 +25746136,Low cost whole-organism screening of compounds for anthelmintic activity.,"Due to major problems with drug resistance in parasitic nematodes of animals, there is a substantial need and excellent opportunities to develop new anthelmintics via genomic-guided and/or repurposing approaches. In the present study, we established a practical and cost-effective whole-organism assay for the in vitro-screening of compounds for activity against parasitic stages of the nematode Haemonchus contortus (barber's pole worm). The assay is based on the use of exsheathed L3 (xL3) and L4 stages of H. contortus of small ruminants (sheep and goats). Using this assay, we screened a panel of 522 well-curated kinase inhibitors (GlaxoSmithKline, USA; code: PKIS2) for activity against H. contortus by measuring the inhibition of larval motility using an automated image analysis system. We identified two chemicals within the compound classes biphenyl amides and pyrazolo[1,5-α]pyridines, which reproducibly inhibit both xL3 and L4 motility and development, with IC50s of 14-47 μM. Given that these inhibitors were designed as anti-inflammatory drugs for use in humans and fit the Lipinski rule-of-five (including bioavailability), they show promise for hit-to-lead optimisation and repurposing for use against parasitic nematodes. The screening assay established here has significant advantages over conventional methods, particularly in terms of ease of use, throughput, time and cost. Although not yet fully automated, the current assay is readily suited to the screening of hundreds to thousands of compounds for subsequent hit-to-lead optimisation. The current assay is highly adaptable to many parasites of socioeconomic importance, including those causing neglected tropical diseases. This aspect is of major relevance, given the urgent need to deliver the goals of the London Declaration (http://unitingtocombatntds.org/resource/london-declaration) through the rapid and efficient repurposing of compounds in public-private partnerships.",2015-03-05 +25102069,Panorama: a targeted proteomics knowledge base.,"Panorama is a web application for storing, sharing, analyzing, and reusing targeted assays created and refined with Skyline,1 an increasingly popular Windows client software tool for targeted proteomics experiments. Panorama allows laboratories to store and organize curated results contained in Skyline documents with fine-grained permissions, which facilitates distributed collaboration and secure sharing of published and unpublished data via a web-browser interface. It is fully integrated with the Skyline workflow and supports publishing a document directly to a Panorama server from the Skyline user interface. Panorama captures the complete Skyline document information content in a relational database schema. Curated results published to Panorama can be aggregated and exported as chromatogram libraries. These libraries can be used in Skyline to pick optimal targets in new experiments and to validate peak identification of target peptides. Panorama is open-source and freely available. It is distributed as part of LabKey Server,2 an open source biomedical research data management system. Laboratories and organizations can set up Panorama locally by downloading and installing the software on their own servers. They can also request freely hosted projects on https://panoramaweb.org , a Panorama server maintained by the Department of Genome Sciences at the University of Washington.",2014-08-18 +29450493,"The Impact of Age, Background Noise, Semantic Ambiguity, and Hearing Loss on Recognition Memory for Spoken Sentences.","

Purpose

The goal of this study was to determine how background noise, linguistic properties of spoken sentences, and listener abilities (hearing sensitivity and verbal working memory) affect cognitive demand during auditory sentence comprehension.

Method

We tested 30 young adults and 30 older adults. Participants heard lists of sentences in quiet and in 8-talker babble at signal-to-noise ratios of +15 dB and +5 dB, which increased acoustic challenge but left the speech largely intelligible. Half of the sentences contained semantically ambiguous words to additionally manipulate cognitive challenge. Following each list, participants performed a visual recognition memory task in which they viewed written sentences and indicated whether they remembered hearing the sentence previously.

Results

Recognition memory (indexed by d') was poorer for acoustically challenging sentences, poorer for sentences containing ambiguous words, and differentially poorer for noisy high-ambiguity sentences. Similar patterns were observed for Z-transformed response time data. There were no main effects of age, but age interacted with both acoustic clarity and semantic ambiguity such that older adults' recognition memory was poorer for acoustically degraded high-ambiguity sentences than the young adults'. Within the older adult group, exploratory correlation analyses suggested that poorer hearing ability was associated with poorer recognition memory for sentences in noise, and better verbal working memory was associated with better recognition memory for sentences in noise.

Conclusions

Our results demonstrate listeners' reliance on domain-general cognitive processes when listening to acoustically challenging speech, even when speech is highly intelligible. Acoustic challenge and semantic ambiguity both reduce the accuracy of listeners' recognition memory for spoken sentences.

Supplemental materials

https://doi.org/10.23641/asha.5848059.",2018-03-01 +28369166,Cpipe: a comprehensive computational platform for sequence and structure-based analyses of Cysteine residues.,"

Summary

Due to their chemical plasticity, Cysteine residues (Cys) can serve many different functions. Identification and classification of reactive Cys isn't a trivial job: currently, no available tool exists for an all-round, comprehensive (inclusive of all different functional types) analysis of Cys; herein we present a computational platform called Cp i pe, dedicated to this task: it implements state-of-the art protocols, elaborating and displaying a wealth of information, sufficiently orthogonal to allow a thorough evaluation of all major aspects of Cys reactivity.

Availability and implementation

Cp i pe is implemented in Python and freely available at http://cpipe.explora-biotech.com/cpipe/start.py . All major browsers are supported.

Contact

s.marino@explora-biotech.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +26026167,RPdb: a database of experimentally verified cellular reprogramming records.,"

Unlabelled

Many cell lines can be reprogrammed to other cell lines by forced expression of a few transcription factors or by specifically designed culture methods, which have attracted a great interest in the field of regenerative medicine and stem cell research. Plenty of cell lines have been used to generate induced pluripotent stem cells (IPSCs) by expressing a group of genes and microRNAs. These IPSCs can differentiate into somatic cells to promote tissue regeneration. Similarly, many somatic cells can be directly reprogrammed to other cells without a stem cell state. All these findings are helpful in searching for new reprogramming methods and understanding the biological mechanism inside. However, to the best of our knowledge, there is still no database dedicated to integrating the reprogramming records. We built RPdb (cellular reprogramming database) to collect cellular reprogramming information and make it easy to access. All entries in RPdb are manually extracted from more than 2000 published articles, which is helpful for researchers in regenerative medicine and cell biology.

Availability and implementation

RPdb is freely available on the web at http://bioinformatics.ustc.edu.cn/rpdb with all major browsers supported.

Contact

aoli@ustc.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-29 +23224380,Introducing a primer for career development and promotion: succeeding as a psychologist in an academic health center.,"Noting a lack of such a resource, the authors developed a primer summarizing key concepts for career development and promotion for psychologists working in an academic health center. The present article presents a brief summary of the primer; however, the full version is available as an APAHC membership benefit (or for a small fee for non-members) by visiting http://www.div12.org/section8/index.html and is a supplement to the December issue of Volume 19 of the Journal of Clinical Psychology in Medical Settings (Supplementary material 1). The primer complements other APAHC membership benefits, which may be helpful for early career or more seasoned psychologists planning for career transitions.",2012-12-01 +28453611,modSaRa: a computationally efficient R package for CNV identification.,"

Summary

Chromosomal copy number variation (CNV) refers to a polymorphism that a DNA segment presents deletion or duplication in the population. The computational algorithms developed to identify this type of variation are usually of high computational complexity. Here we present a user-friendly R package, modSaRa, designed to perform copy number variants identification. The package is developed based on a change-point based method with optimal computational complexity and desirable accuracy. The current version of modSaRa package is a comprehensive tool with integration of preprocessing steps and main CNV calling steps.

Availability and implementation

modSaRa is an R package written in R, C ++ and Rcpp and is now freely available for download at http://c2s2.yale.edu/software/modSaRa .

Contact

heping.zhang@yale.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +26767846,Predicting the functions of a protein from its ability to associate with other molecules.,"

Background

All proteins associate with other molecules. These associated molecules are highly predictive of the potential functions of proteins. The association of a protein and a molecule can be determined from their co-occurrences in biomedical abstracts. Extensive semantically related co-occurrences of a protein's name and a molecule's name in the sentences of biomedical abstracts can be considered as indicative of the association between the protein and the molecule. Dependency parsers extract textual relations from a text by determining the grammatical relations between words in a sentence. They can be used for determining the textual relations between proteins and molecules. Despite their success, they may extract textual relations with low precision. This is because they do not consider the semantic relationships between terms in a sentence (i.e., they consider only the structural relationships between the terms). Moreover, they may not be well suited for complex sentences and for long-distance textual relations.

Results

We introduce an information extraction system called PPFBM that predicts the functions of unannotated proteins from the molecules that associate with these proteins. PPFBM represents each protein by the other molecules that associate with it in the abstracts referenced in the protein's entries in reliable biological databases. It automatically extracts each co-occurrence of a protein-molecule pair that represents semantic relationship between the pair. Towards this, we present novel semantic rules that identify the semantic relationship between each co-occurrence of a protein-molecule pair using the syntactic structures of sentences and linguistics theories. PPFBM determines the functions of an un-annotated protein p as follows. First, it determines the set S r of annotated proteins that is semantically similar to p by matching the molecules representing p and the annotated proteins. Then, it assigns p the functional category FC if the significance of the frequency of occurrences of S r in abstracts associated with proteins annotated with FC is statistically significantly different than the significance of the frequency of occurrences of S r in abstracts associated with proteins annotated with all other functional categories. We evaluated the quality of PPFBM by comparing it experimentally with two other systems. Results showed marked improvement.

Conclusions

The experimental results demonstrated that PPFBM outperforms other systems that predict protein function from the textual information found within biomedical abstracts. This is because these system do not consider the semantic relationships between terms in a sentence (i.e., they consider only the structural relationships between the terms). PPFBM's performance over these system increases steadily as the number of training protein increases. That is, PPFBM's prediction performance becomes more accurate constantly, as the size of training proteins gets larger. This is because every time a new set of test proteins is added to the current set of training proteins. A demo of PPFBM that annotates each input Yeast protein (SGD (Saccharomyces Genome Database). Available at: http://www.yeastgenome.org/download-data/curation) with the functions of Gene Ontology terms is available at: (see Appendix for more details about the demo) http://ecesrvr.kustar.ac.ae:8080/PPFBM/.",2016-01-15 +29220375,Construction and validation of a psychometric scale to measure awareness on consumption of irradiated foods.,"Although food irradiation has been used to ensure food safety, most consumers are unaware of the basic concepts of irradiation, misinterpreting information and demonstrating a negative attitude toward food items treated with ionizing radiation. This research is aimed at developing a tool to assess the awareness on the consumption of irradiated food. The sample was composed by employees from different social classes and school levels of Brazilian universities, who reflect the end-users of the irradiated foods, representative of the views of lay consumers. The total number of respondents was 614. In order to assess the Awareness Scale on Consumption of Irradiated Foods (ASCIF), an instrument has been developed and submitted to semantic tests and judge's validation. The instrument, that included 32 items, contemplated four construct factors: concepts (6 items), awareness (10 items), labeling (7 items) and safety of Irradiated foods (9 items). The data were collected by electronic means, through the site . By using exploratory factorial analysis (EFA) 4 factors have been found. They summarize the 31 items included. These factors account for 64.32% of the variance of the items and the internal consistency of the factors has been deemed good. An Exploratory Structural Equation Modeling (ESEM) was conducted to evaluate the factor structure of the instrument. The proposed instrument has been found to meet consistency criteria as an efficient tool for indicating assessing potential challenges and opportunities for the irradiated food markets.",2017-12-08 +25352543,RNAcentral: an international database of ncRNA sequences.,"The field of non-coding RNA biology has been hampered by the lack of availability of a comprehensive, up-to-date collection of accessioned RNA sequences. Here we present the first release of RNAcentral, a database that collates and integrates information from an international consortium of established RNA sequence databases. The initial release contains over 8.1 million sequences, including representatives of all major functional classes. A web portal (http://rnacentral.org) provides free access to data, search functionality, cross-references, source code and an integrated genome browser for selected species.",2014-10-28 +27137891,RNAex: an RNA secondary structure prediction server enhanced by high-throughput structure-probing data.,"Several high-throughput technologies have been developed to probe RNA base pairs and loops at the transcriptome level in multiple species. However, to obtain the final RNA secondary structure, extensive effort and considerable expertise is required to statistically process the probing data and combine them with free energy models. Therefore, we developed an RNA secondary structure prediction server that is enhanced by experimental data (RNAex). RNAex is a web interface that enables non-specialists to easily access cutting-edge structure-probing data and predict RNA secondary structures enhanced by in vivo and in vitro data. RNAex annotates the RNA editing, RNA modification and SNP sites on the predicted structures. It provides four structure-folding methods, restrained MaxExpect, SeqFold, RNAstructure (Fold) and RNAfold that can be selected by the user. The performance of these four folding methods has been verified by previous publications on known structures. We re-mapped the raw sequencing data of the probing experiments to the whole genome for each species. RNAex thus enables users to predict secondary structures for both known and novel RNA transcripts in human, mouse, yeast and Arabidopsis The RNAex web server is available at http://RNAex.ncrnalab.org/.",2016-05-02 +29990220,Transfer Learning for Multicenter Classification of Chronic Obstructive Pulmonary Disease.,"Chronic obstructive pulmonary disease (COPD) is a lung disease that can be quantified using chest computed tomography scans. Recent studies have shown that COPD can be automatically diagnosed using weakly supervised learning of intensity and texture distributions. However, up till now such classifiers have only been evaluated on scans from a single domain, and it is unclear whether they would generalize across domains, such as different scanners or scanning protocols. To address this problem, we investigate classification of COPD in a multicenter dataset with a total of 803 scans from three different centers, four different scanners, with heterogenous subject distributions. Our method is based on Gaussian texture features, and a weighted logistic classifier, which increases the weights of samples similar to the test data. We show that Gaussian texture features outperform intensity features previously used in multicenter classification tasks. We also show that a weighting strategy based on a classifier that is trained to discriminate between scans from different domains can further improve the results. To encourage further research into transfer learning methods for the classification of COPD, upon acceptance of this paper we will release two feature datasets used in this study on http://bigr.nl/research/projects/copd.",2017-11-03 +29729661,"A Longitudinal Study Examining Changes in Street Connectivity, Land Use, and Density of Dwellings and Walking for Transport in Brisbane, Australia.","

Background

Societies face the challenge of keeping people active as they age. Walkable neighborhoods have been associated with physical activity, but more rigorous analytical approaches are needed.

Objectives

We used longitudinal data from adult residents of Brisbane, Australia (40-65 years of age at baseline) to estimate effects of changes in neighborhood characteristics over a 6-y period on the likelihood of walking for transport.

Methods

Analyses included 2,789-9,747 How Areas Influence Health and Activity (HABITAT) cohort participants from 200 neighborhoods at baseline (2007) who completed up to three follow-up questionnaires (through 2013). Principal components analysis was used to derive a proxy measure of walkability preference. Environmental predictors were changes in street connectivity, residential density, and land use mix within a one-kilometer network buffer. Associations with any walking and minutes of walking were estimated using logistic and linear regression, including random effects models adjusted for time-varying confounders and a measure of walkability preference, and fixed effects models of changes in individuals to eliminate confounding by time-invariant characteristics.

Results

Any walking for transport (vs. none) was increased in association with an increase in street connectivity (+10 intersections, fixed effects OR=1.19; 95% confidence interval (CI): 1.07, 1.32), residential density (+5 dwellings/hectare, OR=1.10; 95% CI: 1.05, 1.15), and land-use mix (10% increase, OR=1.12; 95% CI: 1.00, 1.26). Associations with minutes of walking were positive based on random effects models, but null for fixed effects models. The association between land-use mix and any walking appeared to be limited to participants in the highest tertile of increased street connectivity (fixed effects OR=1.17; 95% CI: 0.99, 1.35 for a 1-unit increase in land-use mix; interaction p-value=0.05).

Conclusions

Increases in street connectivity, residential density, and land-use heterogeneity were associated with walking for transport among middle-age residents of Brisbane, Australia. https://doi.org/10.1289/EHP2080.",2018-05-03 +24001185,inTB - a data integration platform for molecular and clinical epidemiological analysis of tuberculosis.,"

Background

Tuberculosis is currently the second highest cause of death from infectious diseases worldwide. The emergence of multi and extensive drug resistance is threatening to make tuberculosis incurable. There is growing evidence that the genetic diversity of Mycobacterium tuberculosis may have important clinical consequences. Therefore, combining genetic, clinical and socio-demographic data is critical to understand the epidemiology of this infectious disease, and how virulence and other phenotypic traits evolve over time. This requires dedicated bioinformatics platforms, capable of integrating and enabling analyses of this heterogeneous data.

Results

We developed inTB, a web-based system for integrated warehousing and analysis of clinical, socio-demographic and molecular data for Mycobacterium sp. isolates. As a database it can organize and display data from any of the standard genotyping methods (SNP, MIRU-VNTR, RFLP and spoligotype), as well as an extensive array of clinical and socio-demographic variables that are used in multiple countries to characterize the disease. Through the inTB interface it is possible to insert and download data, browse the database and search specific parameters. New isolates are automatically classified into strains according to an internal reference, and data uploaded or typed in is checked for internal consistency. As an analysis framework, the system provides simple, point and click analysis tools that allow multiple types of data plotting, as well as simple ways to download data for external analysis. Individual trees for each genotyping method are available, as well as a super tree combining all of them. The integrative nature of inTB grants the user the ability to generate trees for filtered subsets of data crossing molecular and clinical/socio-demografic information. inTB is built on open source software, can be easily installed locally and easily adapted to other diseases. Its design allows for use by research laboratories, hospitals or public health authorities. The full source code as well as ready to use packages is available at http://www.evocell.org/inTB.

Conclusions

To the best of our knowledge, this is the only system capable of integrating different types of molecular data with clinical and socio-demographic data, empowering researchers and clinicians with easy to use analysis tools that were not possible before.",2013-08-30 +27332228,Integration and Analysis of Heterogeneous Colorectal Cancer Data for Translational Research.,"Cancer is the number one cause of death in Australia with colorectal cancer being the second most common cancer type. The translation of cancer research into clinical practice is hindered by the lack of integration of heterogeneous and autonomous data from various data sources. Integration of heterogeneous data can offer researchers a comprehensive source for biospecimen identification, hypothesis formulation, hypothesis validation, cohort discovery and biomarker discovery. Alongside the increasing prominence of big data, various translational research tools such as tranSMART have emerged that can converge and analyse different types of data. In this study, we show the integration of different data types from a significant Australian colorectal cancer cohort. Additionally, colorectal cancer datasets from The Cancer Genome Atlas were also integrated for comparison. These integrated data are accessible via http://www.tcrn.unsw.edu.au/transmart. The use of translational research tools for data integration can provide a cost-effective and rapid approach to translational cancer research.",2016-01-01 +22133378,Characterizing the citrus cultivar Carrizo genome through 454 shotgun sequencing.,"The citrus cultivar Carrizo is the single most important rootstock to the US citrus industry and has resistance or tolerance to a number of major citrus diseases, including citrus tristeza virus, foot rot, and Huanglongbing (HLB, citrus greening). A Carrizo genomic sequence database providing approximately 3.5×genome coverage (haploid genome size approximately 367 Mb) was populated through 454 GS FLX shotgun sequencing. Analysis of the repetitive DNA fraction indicated a total interspersed repeat fraction of 36.5%. Assembly and characterization of abundant citrus Ty3/gypsy elements revealed a novel type of element containing open reading frames encoding a viral RNA-silencing suppressor protein (RNA binding protein, rbp) and a plant cytokinin riboside 5′-monophosphate phosphoribohydrolase-related protein (LONELY GUY, log). Similar gypsy elements were identified in the Populus trichocarpa genome. Gene-coding region analysis indicated that 24.4% of the nonrepetitive reads contained genic regions. The depth of genome coverage was sufficient to allow accurate assembly of constituent genes, including a putative phloem-expressed gene. The development of the Carrizo database (http://citrus.pw.usda.gov/) will contribute to characterization of agronomically significant loci and provide a publicly available genomic resource to the citrus research community.",2011-12-01 +24225321,The genome portal of the Department of Energy Joint Genome Institute: 2014 updates.,"The U.S. Department of Energy (DOE) Joint Genome Institute (JGI), a national user facility, serves the diverse scientific community by providing integrated high-throughput sequencing and computational analysis to enable system-based scientific approaches in support of DOE missions related to clean energy generation and environmental characterization. The JGI Genome Portal (http://genome.jgi.doe.gov) provides unified access to all JGI genomic databases and analytical tools. The JGI maintains extensive data management systems and specialized analytical capabilities to manage and interpret complex genomic data. A user can search, download and explore multiple data sets available for all DOE JGI sequencing projects including their status, assemblies and annotations of sequenced genomes. Here we describe major updates of the Genome Portal in the past 2 years with a specific emphasis on efficient handling of the rapidly growing amount of diverse genomic data accumulated in JGI.",2013-11-12 +29566027,MicroRNA expression in serum samples of sulfur mustard veterans as a diagnostic gateway to improve care.,"Sulfur mustard is a vesicant chemical warfare agent, which has been used during Iraq-Iran-war. Many veterans and civilians still suffer from long-term complications of sulfur mustard exposure, especially in their lung. Although the lung lesions of these patients are similar to Chronic Obstructive Pulmonary Disease (COPD), there are some differences due to different etiology and clinical care. Less is known on the molecular mechanism of sulfur mustard patients and specific treatment options. microRNAs are master regulators of many biological pathways and proofed to be stable surrogate markers in body fluids. Based on that microRNA expression for serum samples of sulfur mustard patients were examined, to establish specific microRNA patterns as a basis for diagnostic use and insight into affected molecular pathways. Patients were categorized based on their long-term complications into three groups and microRNA serum levels were measured. The differentially regulated microRNAs and their corresponding gene targets were identified. Cell cycle arrest, ageing and TGF-beta signaling pathways showed up to be the most deregulated pathways. The candidate microRNA miR-143-3p could be validated on all individual patients. In a ROC analysis miR-143-3p turned out to be a suitable diagnostic biomarker in the mild and severe categories of patients. Further microRNAs which might own a link to the biology of the sulfur mustard patients are miR-365a-3p, miR-200a-3p, miR-663a. miR-148a-3p, which showed up only in a validation study, might be linked to the airway complications of the sulfur mustard patients. All the other candidate microRNAs do not directly link to COPD phenotype or lung complications. In summary the microRNA screening study characterizes several molecular differences in-between the clinical categories of the sulfur mustard exposure groups and established some useful microRNA biomarkers. qPCR raw data is available via the Gene Expression Omnibus https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110797.",2018-03-22 +29496609,The dorsal striatum and the dynamics of the consensus connectomes in the frontal lobe of the human brain.,"In the applications of the graph theory, it is unusual that one considers numerous, pairwise different graphs on the very same set of vertices. In the case of human braingraphs or connectomes, however, this is the standard situation: the nodes correspond to anatomically identified cerebral regions, and two vertices are connected by an edge if a diffusion MRI-based workflow identifies a fiber of axons, running between the two regions, corresponding to the two vertices. Therefore, if we examine the braingraphs of n subjects, then we have n graphs on the very same, anatomically identified vertex set. It is a natural idea to describe the k-frequently appearing edges in these graphs: the edges that are present between the same two vertices in at least k out of the n graphs. Based on the NIH-funded large Human Connectome Project's public data release, we have reported the construction of the Budapest Reference Connectome Server http://www.connectome.pitgroup.org that generates and visualizes these k-frequently appearing edges. We call the graphs of the k-frequently appearing edges ""k-consensus connectomes"" since an edge could be included only if it is present in at least k graphs out of n. Considering the whole human brain, we have reported a surprising property of these consensus connectomes earlier. In the present work we are focusing on the frontal lobe of the brain, and we report here a similarly surprising dynamical property of the consensus connectomes when k is gradually changed from k = n to k = 1: the connections between the nodes of the frontal lobe are seemingly emanating from those nodes that were connected to sub-cortical structures of the dorsal striatum: the caudate nucleus, and the putamen. We hypothesize that this dynamic behavior copies the axonal fiber development of the frontal lobe. An animation of the phenomenon is presented at https://youtu.be/wBciB2eW6_8.",2018-02-26 +28472109,EnHERV: Enrichment analysis of specific human endogenous retrovirus patterns and their neighboring genes.,"Human endogenous retroviruses (HERVs) are flanked by long terminal repeats (LTRs), which contain the regulation part of the retrovirus. Remaining HERVs constitute 7% to 8% of the present day human genome, and most have been identified as solo LTRs. The HERV sequences have been associated with several molecular functions as well as certain diseases in human, but their roles in human diseases are yet to be established. We designed EnHERV to make accessible the identified endogenous retrovirus repetitive sequences from Repbase Update (a database of eukaryotic repetitive elements) that are present in the human genome. Defragmentation process was done to improve the RepeatMasker annotation output. The defragmented elements were used as core database in EnHERV. EnHERV is available at http://sysbio.chula.ac.th/enherv and can be searched using either gene lists of user interest or HERV characteristics. Besides the search function, EnHERV also provides an enrichment analysis function that allows users to perform enrichment analysis between selected HERV characteristics and user-input gene lists, especially genes with the expression profile of a certain disease. EnHERV will facilitate exploratory studies of specific HERV characteristics that control gene expression patterns related to various disease conditions. Here we analyzed 25 selected HERV groups/names from all four HERV superfamilies, using the sense and anti-sense directions of the HERV and gene expression profiles from 49 specific tissue and disease conditions. We found that intragenic HERVs were associated with down-regulated genes in most cancer conditions and in psoriatic skin tissues and associated with up-regulated genes in immune cells particularly from systemic lupus erythematosus (SLE) patients. EnHERV allowed the analysis of how different types of LTRs were differentially associated with specific gene expression profiles in particular disease conditions for further studies into their mechanisms and functions.",2017-05-04 +28413616,-A curated transcriptomic dataset collection relevant to embryonic development associated with in vitro fertilization in healthy individuals and patients with polycystic ovary syndrome.,"The collection of large-scale datasets available in public repositories is rapidly growing and providing opportunities to identify and fill gaps in different fields of biomedical research. However, users of these datasets should be able to selectively browse datasets related to their field of interest. Here we made available a collection of transcriptome datasets related to human follicular cells from normal individuals or patients with polycystic ovary syndrome, in the process of their development, during in vitro fertilization. After RNA-seq dataset exclusion and careful selection based on study description and sample information, 12 datasets, encompassing a total of 85 unique transcriptome profiles, were identified in NCBI Gene Expression Omnibus and uploaded to the Gene Expression Browser (GXB), a web application specifically designed for interactive query and visualization of integrated large-scale data. Once annotated in GXB, multiple sample grouping has been made in order to create rank lists to allow easy data interpretation and comparison. The GXB tool also allows the users to browse a single gene across multiple projects to evaluate its expression profiles in multiple biological systems/conditions in a web-based customized graphical views. The curated dataset is accessible at the following link: http://ivf.gxbsidra.org/dm3/landing.gsp.",2017-02-23 +28431164,Low Activity Microstates During Sleep. ,"To better understand the distinct activity patterns of the brain during sleep, we observed and investigated periods of diminished oscillatory and population spiking activity lasting for seconds during non-rapid eye movement (non-REM) sleep, which we call ""LOW"" activity sleep. We analyzed spiking and local field potential (LFP) activity of hippocampal CA1 region alongside neocortical electroencephalogram (EEG) and electromyogram (EMG) in 19 sessions from four male Long-Evans rats (260-360 g) during natural wake/sleep across the 24-hr cycle as well as data from other brain regions obtained from http://crcns.org.1,2. LOW states lasted longer than OFF/DOWN states and were distinguished by a subset of ""LOW-active"" cells. LOW activity sleep was preceded and followed by increased sharp-wave ripple activity. We also observed decreased slow-wave activity and sleep spindles in the hippocampal LFP and neocortical EEG upon LOW onset, with a partial rebound immediately after LOW. LOW states demonstrated activity patterns consistent with sleep but frequently transitioned into microarousals and showed EMG and LFP differences from small-amplitude irregular activity during quiet waking. Their likelihood decreased within individual non-REM epochs yet increased over the course of sleep. By analyzing data from the entorhinal cortex of rats,1 as well as the hippocampus, the medial prefrontal cortex, the postsubiculum, and the anterior thalamus of mice,2 obtained from http://crcns.org, we confirmed that LOW states corresponded to markedly diminished activity simultaneously in all of these regions. We propose that LOW states are an important microstate within non-REM sleep that provide respite from high-activity sleep and may serve a restorative function.",2017-06-01 +24255551,A Signaling Network of Thyroid-Stimulating Hormone. ,"Human thyroid stimulating hormone (TSH) is a glycoprotein secreted by the anterior part of the pituitary gland. TSH plays an important physiological role in the regulation of hypothalamic-pituitary-thyroid axis by modulating the release of the thyroid hormones from the thyroid gland. It induces iodine uptake by the thyroid, promotes thyroid epithelial differentiation and growth, and protects thyroid cells from apoptosis. Impairment of TSH signal transduction pathway leads to thyroid disorders such as goitre, hypothyroidism and hyperthyroidism, which can have complex clinical manifestations. TSH signaling is largely effected through two separate pathways, the adenylate cyclase and the phospholipase C pathways. In spite of its biomedical importance, a concise signaling map of TSH pathway is not available in the public domain. Therefore, we have generated a detailed signaling map of TSH pathway by systematically cataloging the molecular reactions induced by TSH including protein-protein interactions, post-translational modifications, protein translocation events and activation/inhibition reactions. We have cataloged 40 molecular association events, 42 enzyme-substrate reactions and 16 protein translocation events in TSH signaling pathway resource. Additionally, we have documented 208 genes, which are differentially regulated by TSH. We have provided the details of TSH pathway through NetPath (http://www.netpath.org), which is a publicly available resource for human signaling pathways developed by our group. We have also depicted the map of TSH signaling using NetSlim criteria (http://www.netpath.org/netslim/) and provided pathway maps in Wikipathways (http://www.wikipathways.org/). We anticipate that the availability of TSH pathway as a community resource will enhance further biomedical investigations into the function and effects of this important hormone.",2011-10-01 +24713438,MetDisease--connecting metabolites to diseases via literature.,"

Motivation

In recent years, metabolomics has emerged as an approach to perform large-scale characterization of small molecules in biological systems. Metabolomics posed a number of bioinformatics challenges associated in data analysis and interpretation. Genome-based metabolic reconstructions have established a powerful framework for connecting metabolites to genes through metabolic reactions and enzymes that catalyze them. Pathway databases and bioinformatics tools that use this framework have proven to be useful for annotating experimental metabolomics data. This framework can be used to infer connections between metabolites and diseases through annotated disease genes. However, only about half of experimentally detected metabolites can be mapped to canonical metabolic pathways. We present a new Cytoscape 3 plug-in, MetDisease, which uses an alternative approach to link metabolites to disease information. MetDisease uses Medical Subject Headings (MeSH) disease terms mapped to PubChem compounds through literature to annotate compound networks.

Availability and implementation

MetDisease can be downloaded from http://apps.cytoscape.org/apps/metdisease or installed via the Cytoscape app manager. Further information about MetDisease can be found at http://metdisease.ncibi.org

Contact

akarnovs@med.umich.edu

Supplementary information

Supplementary Data are available at Bioinformatics online.",2014-04-08 +25480373,Graphical algorithm for integration of genetic and biological data: proof of principle using psoriasis as a model.,"

Motivation

Pathway analysis to reveal biological mechanisms for results from genetic association studies have great potential to better understand complex traits with major human disease impact. However, current approaches have not been optimized to maximize statistical power to identify enriched functions/pathways, especially when the genetic data derives from studies using platforms (e.g. Immunochip and Metabochip) customized to have pre-selected markers from previously identified top-rank loci. We present here a novel approach, called Minimum distance-based Enrichment Analysis for Genetic Association (MEAGA), with the potential to address both of these important concerns.

Results

MEAGA performs enrichment analysis using graphical algorithms to identify sub-graphs among genes and measure their closeness in interaction database. It also incorporates a statistic summarizing the numbers and total distances of the sub-graphs, depicting the overlap between observed genetic signals and defined function/pathway gene-sets. MEAGA uses sampling technique to approximate empirical and multiple testing-corrected P-values. We show in simulation studies that MEAGA is more powerful compared to count-based strategies in identifying disease-associated functions/pathways, and the increase in power is influenced by the shortest distances among associated genes in the interactome. We applied MEAGA to the results of a meta-analysis of psoriasis using Immunochip datasets, and showed that associated genes are significantly enriched in immune-related functions and closer with each other in the protein-protein interaction network.

Availability and implementation

http://genome.sph.umich.edu/wiki/MEAGA CONTACT: : tsoi.teen@gmail.com or goncalo@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-04 +27542402,"A protein network descriptor server and its use in studying protein, disease, metabolic and drug targeted networks.","The genetic, proteomic, disease and pharmacological studies have generated rich data in protein interaction, disease regulation and drug activities useful for systems-level study of the biological, disease and drug therapeutic processes. These studies are facilitated by the established and the emerging computational methods. More recently, the network descriptors developed in other disciplines have become more increasingly used for studying the protein-protein, gene regulation, metabolic, disease networks. There is an inadequate coverage of these useful network features in the public web servers. We therefore introduced upto 313 literature-reported network descriptors in PROFEAT web server, for describing the topological, connectivity and complexity characteristics of undirected unweighted (uniform binding constants and molecular levels), undirected edge-weighted (varying binding constants), undirected node-weighted (varying molecular levels), undirected edge-node-weighted (varying binding constants and molecular levels) and directed unweighted (oriented process) networks. The usefulness of the PROFEAT computed network descriptors is illustrated by their literature-reported applications in studying the protein-protein, gene regulatory, gene co-expression, protein-drug and metabolic networks. PROFEAT is accessible free of charge at http://bidd2.nus.edu.sg/cgi-bin/profeat2016/main.cgi.",2017-11-01 +27515740,ChemTreeMap: an interactive map of biochemical similarity in molecular datasets.,"

Motivation

What if you could explain complex chemistry in a simple tree and share that data online with your collaborators? Computational biology often incorporates diverse chemical data to probe a biological question, but the existing tools for chemical data are ill-suited for the very large datasets inherent to bioinformatics. Furthermore, existing visualization methods often require an expert chemist to interpret the patterns. Biologists need an interactive tool for visualizing chemical information in an intuitive, accessible way that facilitates its integration into today's team-based biological research.

Results

ChemTreeMap is an interactive, bioinformatics tool designed to explore chemical space and mine the relationships between chemical structure, molecular properties, and biological activity. ChemTreeMap synergistically combines extended connectivity fingerprints and a neighbor-joining algorithm to produce a hierarchical tree with branch lengths proportional to molecular similarity. Compound properties are shown by leaf color, size and outline to yield a user-defined visualization of the tree. Two representative analyses are included to demonstrate ChemTreeMap's capabilities and utility: assessing dataset overlap and mining structure-activity relationships.

Availability and implementation

The examples from this paper may be accessed at http://ajing.github.io/ChemTreeMap/ Code for the server and client are available in the Supplementary Information, at the aforementioned github site, and on Docker Hub (https://hub.docker.com) with the nametag ajing/chemtreemap.

Contact

carlsonh@umich.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-11 +23236444,Digital surveillance: a novel approach to monitoring the illegal wildlife trade.,"A dearth of information obscures the true scale of the global illegal trade in wildlife. Herein, we introduce an automated web crawling surveillance system developed to monitor reports on illegally traded wildlife. A resource for enforcement officials as well as the general public, the freely available website, http://www.healthmap.org/wildlifetrade, provides a customizable visualization of worldwide reports on interceptions of illegally traded wildlife and wildlife products. From August 1, 2010 to July 31, 2011, publicly available English language illegal wildlife trade reports from official and unofficial sources were collected and categorized by location and species involved. During this interval, 858 illegal wildlife trade reports were collected from 89 countries. Countries with the highest number of reports included India (n = 146, 15.6%), the United States (n = 143, 15.3%), South Africa (n = 75, 8.0%), China (n = 41, 4.4%), and Vietnam (n = 37, 4.0%). Species reported as traded or poached included elephants (n = 107, 12.5%), rhinoceros (n = 103, 12.0%), tigers (n = 68, 7.9%), leopards (n = 54, 6.3%), and pangolins (n = 45, 5.2%). The use of unofficial data sources, such as online news sites and social networks, to collect information on international wildlife trade augments traditional approaches drawing on official reporting and presents a novel source of intelligence with which to monitor and collect news in support of enforcement against this threat to wildlife conservation worldwide.",2012-12-07 +29578659,Drilling and Production Activity Related to Unconventional Gas Development and Severity of Preterm Birth.,"BACKGROUND:Studies of unconventional gas development (UGD) and preterm birth (PTB) have not presented risk estimates by well development phase or trimester. OBJECTIVE:We examined phase and trimester-specific associations between UGD activity and PTB. METHODS:We conducted a case-control study of women with singleton births in the Barnett Shale area, Texas, from 30 November 2010 to 29 November 2012. We individually age- and race/ethnicity-matched five controls to each PTB case (n=13,328) and truncated controls' time at risk according to the matched case's gestational age. We created phase-specific UGD-activity metrics: a) inverse squared distance-weighted (IDW) count of wells in the drilling phase ≤0.5 mi (804.7 meters) of the residence and b) IDW sum of natural gas produced ≤0.5 mi of the residence. We also constructed trimester- and gestation-specific metrics. Metrics were categorized as follows: zero wells (reference), first, second, third tertiles of UGD activity. Analyses were repeated by PTB severity: extreme, very, and moderate (<28, 28 to<32, and 32 to<37 completed weeks). Data were analyzed using conditional logistic regression. RESULTS:We found increased odds of PTB in the third tertile of the UGD drilling {odds ratio (OR)=1.20 [95% confidence interval (CI): 1.06, 1.37]} and UGD-production [OR=1.15 (1.05, 1.26)] metrics. Among women in the third tertile of UGD-production, associations were strongest in trimesters one [OR=1.18 (1.02, 1.37)] and two [OR=1.14 (0.99, 1.31). The greatest risk was observed for extremely PTB [third tertile ORs: UGD drilling, 2.00 (1.23, 3.24); UGD production, 1.53 (1.03-2.27)]. CONCLUSIONS:We found evidence of differences in phase- and trimester-specific associations of UGD and PTB and indication of particular risk associated with extremely preterm birth. Future studies should focus on quantifying specific chemical and nonchemical stressors associated with UGD. https://doi.org/10.1289/EHP2622.",2018-03-20 +22761927,HEMD: an integrated tool of human epigenetic enzymes and chemical modulators for therapeutics.,"

Background

Epigenetic mechanisms mainly include DNA methylation, post-translational modifications of histones, chromatin remodeling and non-coding RNAs. All of these processes are mediated and controlled by enzymes. Abnormalities of the enzymes are involved in a variety of complex human diseases. Recently, potent natural or synthetic chemicals are utilized to establish the quantitative contributions of epigenetic regulation through the enzymes and provide novel insight for developing new therapeutics. However, the development of more specific and effective epigenetic therapeutics requires a more complete understanding of the chemical epigenomic landscape.

Description

Here, we present a human epigenetic enzyme and modulator database (HEMD), the database which provides a central resource for the display, search, and analysis of the structure, function, and related annotation for human epigenetic enzymes and chemical modulators focused on epigenetic therapeutics. Currently, HEMD contains 269 epigenetic enzymes and 4377 modulators in three categories (activators, inhibitors, and regulators). Enzymes are annotated with detailed description of epigenetic mechanisms, catalytic processes, and related diseases, and chemical modulators with binding sites, pharmacological effect, and therapeutic uses. Integrating the information of epigenetic enzymes in HEMD should allow for the prediction of conserved features for proteins and could potentially classify them as ideal targets for experimental validation. In addition, modulators curated in HEMD can be used to investigate potent epigenetic targets for the query compound and also help chemists to implement structural modifications for the design of novel epigenetic drugs.

Conclusions

HEMD could be a platform and a starting point for biologists and medicinal chemists for furthering research on epigenetic therapeutics. HEMD is freely available at http://mdl.shsmu.edu.cn/HEMD/.",2012-06-25 +26645251,Comparative benefits and harms of second generation antidepressants and cognitive behavioral therapies in initial treatment of major depressive disorder: systematic review and meta-analysis.,"

Study question

What are the benefits and harms of second generation antidepressants and cognitive behavioral therapies (CBTs) in the initial treatment of a current episode of major depressive disorder in adults?

Methods

This was a systematic review including qualitative assessment and meta-analyses using random and fixed effects models. Medline, Embase, the Cochrane Library, the Allied and Complementary Medicine Database, PsycINFO, and the Cumulative Index to Nursing and Allied Health Literature were searched from January 1990 through January 2015. The 11 randomized controlled trials included compared a second generation antidepressant CBT. Ten trials compared antidepressant monotherapy with CBT alone; three compared antidepressant monotherapy with antidepressant plus CBT.

Summary answer and limitations

Meta-analyses found no statistically significant difference in effectiveness between second generation antidepressants and CBT for response (risk ratio 0.91, 0.77 to 1.07), remission (0.98, 0.73 to 1.32), or change in 17 item Hamilton Rating Scale for Depression score (weighted mean difference, -0.38, -2.87 to 2.10). Similarly, no significant differences were found in rates of overall study discontinuation (risk ratio 0.90, 0.49 to 1.65) or discontinuation attributable to lack of efficacy (0.40, 0.05 to 2.91). Although more patients treated with a second generation antidepressant than receiving CBT withdrew from studies because of adverse events, the difference was not statistically significant (risk ratio 3.29, 0.42 to 25.72). No conclusions could be drawn about other outcomes because of lack of evidence. Results should be interpreted cautiously given the low strength of evidence for most outcomes. The scope of this review was limited to trials that enrolled adult patients with major depressive disorder and compared a second generation antidepressant with CBT, and many of the included trials had methodological shortcomings that may limit confidence in some of the findings.

What this study adds

Second generation antidepressants and CBT have evidence bases of benefits and harms in major depressive disorder. Available evidence suggests no difference in treatment effects of second generation antidepressants and CBT, either alone or in combination, although small numbers may preclude detection of small but clinically meaningful differences. Funding, competing interests, data sharing This project was funded under contract from the Agency for Healthcare Research and Quality by the RTI-UNC Evidence-based Practice Center. Detailed methods and additional information are available in the full report, available at http://effectivehealthcare.ahrq.gov/.",2015-12-08 +28199698,Efficient Inference of Recent and Ancestral Recombination within Bacterial Populations.,"Prokaryotic evolution is affected by horizontal transfer of genetic material through recombination. Inference of an evolutionary tree of bacteria thus relies on accurate identification of the population genetic structure and recombination-derived mosaicism. Rapidly growing databases represent a challenge for computational methods to detect recombinations in bacterial genomes. We introduce a novel algorithm called fastGEAR which identifies lineages in diverse microbial alignments, and recombinations between them and from external origins. The algorithm detects both recent recombinations (affecting a few isolates) and ancestral recombinations between detected lineages (affecting entire lineages), thus providing insight into recombinations affecting deep branches of the phylogenetic tree. In simulations, fastGEAR had comparable power to detect recent recombinations and outstanding power to detect the ancestral ones, compared with state-of-the-art methods, often with a fraction of computational cost. We demonstrate the utility of the method by analyzing a collection of 616 whole-genomes of a recombinogenic pathogen Streptococcus pneumoniae, for which the method provided a high-resolution view of recombination across the genome. We examined in detail the penicillin-binding genes across the Streptococcus genus, demonstrating previously undetected genetic exchanges between different species at these three loci. Hence, fastGEAR can be readily applied to investigate mosaicism in bacterial genes across multiple species. Finally, fastGEAR correctly identified many known recombination hotspots and pointed to potential new ones. Matlab code and Linux/Windows executables are available at https://users.ics.aalto.fi/~pemartti/fastGEAR/ (last accessed February 6, 2017).",2017-05-01 +28158709,Cytomegalovirus DNA Detection by Polymerase Chain Reaction in Cerebrospinal Fluid of Infants With Congenital Infection: Associations With Clinical Evaluation at Birth and Implications for Follow-up.,"

Background

DNA detection of human cytomegalovirus (hCMV) in cerebrospinal fluid (CSF) by polymerase chain reaction (PCR) is a marker of central nervous system (CNS) involvement in congenital hCMV infection (cCMV), but its prognostic value is unknown.

Methods

A multicenter, retrospective study was performed using the Spanish Congenital Cytomegalovirus Infection Database (REDICCMV; http://www.cmvcongenito.es). Newborns with cCMV and a lumbar puncture performed were included and classified according to their hCMV-PCR in CSF result (positive/negative). Clinical characteristics, neuroimaging abnormalities, plasma viral load, and audiological and neurological outcomes of both groups were compared.

Results

A total of 136 neonates were included in the study: 21 (15.4%) with positive CSF hCMV-PCR and 115 (84.6%) with negative results. Seventeen patients (81%) in the positive group were symptomatic at birth compared with 52.2% of infants in the negative group (odds ratio [OR], 3.86; 95% confidence interval [CI], 1.28-14.1; P = .01). Only 4 asymptomatic newborns (6.8%) had a positive CSF hCMV-PCR. There were no differences between groups regarding the rate of microcephaly, neuroimaging abnormalities, neurological sequelae at 6 months of age, or plasma viral load. Sensorineural hearing loss (SNHL) at birth was associated with a positive CSF hCMV-PCR result (OR, 3.49; 95% CI, 1.08-11.27; P = .04), although no association was found at 6 months of age.

Conclusions

A positive hCMV-PCR result in CSF is associated with symptomatic cCMV and SNHL at birth. However, no differences in neuroimaging studies, plasma viral load, or outcomes at 6 months were found. These results suggest that hCMV-PCR in CSF may not be a useful prognostic marker in cCMV.",2017-05-01 +28459877,Database for the ampC alleles in Acinetobacter baumannii.,"Acinetobacter baumannii is a troublesome opportunistic pathogen with a high capacity for clonal dissemination. We announce the establishment of a database for the ampC locus in A. baumannii, in which novel ampC alleles are differentiated based on the occurrence of ≥ 1 nucleotide change, regardless of whether it is silent or missense. The database is openly accessible at the pubmlst platform for A. baumannii (http://pubmlst.org/abaumannii/). Forty-eight distinctive alleles of the ampC locus have so far been identified and deposited in the database. Isolates from clonal complex 1 (CC1), according to the Pasteur multilocus sequence typing scheme, had a variety of the ampC locus alleles, including alleles 1, 3, 4, 5, 6, 7, 8, 13, 14, 17, and 18. On the other hand, isolates from CC2 had the ampC alleles 2, 3, 19, 20, 21, 22, 23, 24, 26, 27, 28, and 46. Allele 3 was characteristic for sequence types ST3 or ST32. The ampC alleles 10, 16, and 25 were characteristic for CC10, ST16, and CC25, respectively. Our study points out that novel gene databases, in which alleles are numbered based on differences in their nucleotide identities, should replace traditional records that use amino acid substitutions to define new alleles.",2017-05-01 +28130238,The JWS online simulation database.,"

Summary

JWS Online is a web-based platform for construction, simulation and exchange of models in standard formats. We have extended the platform with a database for curated simulation experiments that can be accessed directly via a URL, allowing one-click reproduction of published results. Users can modify the simulation experiments and export them in standard formats. The Simulation database thus lowers the bar on exploring computational models, helps users create valid simulation descriptions and improves the reproducibility of published simulation experiments.

Availability and implementation

The Simulation Database is available on line at https://jjj.bio.vu.nl/models/experiments/ .

Contact

jls@sun.ac.za .",2017-05-01 +27678460,MeSHDD: Literature-based drug-drug similarity for drug repositioning.,"

Objective

Drug repositioning is a promising methodology for reducing the cost and duration of the drug discovery pipeline. We sought to develop a computational repositioning method leveraging annotations in the literature, such as Medical Subject Heading (MeSH) terms.

Methods

We developed software to determine significantly co-occurring drug-MeSH term pairs and a method to estimate pair-wise literature-derived distances between drugs.

Results

We found that literature-based drug-drug similarities predicted the number of shared indications across drug-drug pairs. Clustering drugs based on their similarity revealed both known and novel drug indications. We demonstrate the utility of our approach by generating repositioning hypotheses for the commonly used diabetes drug metformin.

Conclusion

Our study demonstrates that literature-derived similarity is useful for identifying potential repositioning opportunities. We provided open-source code and deployed a free-to-use, interactive application to explore our database of similarity-based drug clusters (available at http://apps.chiragjpgroup.org/MeSHDD/ ).",2017-05-01 +29036452,A nonparametric significance test for sampled networks.,"

Motivation

Our work is motivated by an interest in constructing a protein-protein interaction network that captures key features associated with Parkinson's disease. While there is an abundance of subnetwork construction methods available, it is often far from obvious which subnetwork is the most suitable starting point for further investigation.

Results

We provide a method to assess whether a subnetwork constructed from a seed list (a list of nodes known to be important in the area of interest) differs significantly from a randomly generated subnetwork. The proposed method uses a Monte Carlo approach. As different seed lists can give rise to the same subnetwork, we control for redundancy by constructing a minimal seed list as the starting point for the significance test. The null model is based on random seed lists of the same length as a minimum seed list that generates the subnetwork; in this random seed list the nodes have (approximately) the same degree distribution as the nodes in the minimum seed list. We use this null model to select subnetworks which deviate significantly from random on an appropriate set of statistics and might capture useful information for a real world protein-protein interaction network.

Availability and implementation

The software used in this paper are available for download at https://sites.google.com/site/elliottande/. The software is written in Python and uses the NetworkX library.

Contact

ande.elliott@gmail.com or felix.reed-tsochas@sbs.ox.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +28968641,MemBrain-contact 2.0: a new two-stage machine learning model for the prediction enhancement of transmembrane protein residue contacts in the full chain.,"

Motivation

Inter-residue contacts in proteins have been widely acknowledged to be valuable for protein 3 D structure prediction. Accurate prediction of long-range transmembrane inter-helix residue contacts can significantly improve the quality of simulated membrane protein models.

Results

In this paper, we present an updated MemBrain predictor, which aims to predict transmembrane protein residue contacts. Our new model benefits from an efficient learning algorithm that can mine latent structural features, which exist in original feature space. The new MemBrain is a two-stage inter-helix contact predictor. The first stage takes sequence-based features as inputs and outputs coarse contact probabilities for each residue pair, which will be further fed into convolutional neural network together with predictions from three direct-coupling analysis approaches in the second stage. Experimental results on the training dataset show that our method achieves an average accuracy of 81.6% for the top L/5 predictions using a strict sequence-based jackknife cross-validation. Evaluated on the test dataset, MemBrain can achieve 79.4% prediction accuracy. Moreover, for the top L/5 predicted long-range loop contacts, the prediction performance can reach an accuracy of 56.4%. These results demonstrate that the new MemBrain is promising for transmembrane protein's contact map prediction.

Availability and implementation

http://www.csbio.sjtu.edu.cn/bioinf/MemBrain/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +26047590,Visual annotation display (VLAD): a tool for finding functional themes in lists of genes.,"Experiments that employ genome scale technology platforms frequently result in lists of tens to thousands of genes with potential significance to a specific biological process or disease. Searching for biologically relevant connections among the genes or gene products in these lists is a common data analysis task. We have implemented a software application for uncovering functional themes in sets of genes based on their annotations to bio-ontologies, such as the gene ontology and the mammalian phenotype ontology. The application, called VisuaL Annotation Display (VLAD), performs a statistical analysis to test for the enrichment of ontology terms in a set of genes submitted by a researcher. The results for each analysis using VLAD includes a table of ontology terms, sorted in decreasing order of significance. Each row contains the term, statistics such as the number of annotated terms, the p value, etc., and the symbols of annotated genes. An accompanying graphical display shows portions of the ontology hierarchy, where node sizes are scaled based on p values. Although numerous ontology term enrichment programs already exist, VLAD is unique in that it allows users to upload their own annotation files and ontologies for customized term enrichment analyses, supports the analysis of multiple gene sets at once, provides interfaces to customize graphical output, and is tightly integrated with functional and biological details about mouse genes in the Mouse Genome Informatics (MGI) database. VLAD is available as a web-based application from the MGI web site (http://proto.informatics.jax.org/prototypes/vlad/).",2015-06-06 +28762507,Increasing physical activity for veterans in the Mental Health Intensive Case Management Program: A community-based intervention.,"

Aims and objectives

Individuals with severe mental illness (SMI), experience increased mortality-20 years greater disparity for men and 15 years greater disparity for women-compared to the general population (Thornicroft G. Physical health disparities and mental illness: The scandal of premature mortality. Br J Psychiatr. 2011;199:441-442). Numerous factors contribute to premature mortality in persons with SMI, including suicide and accidental death (Richardson RC, Faulkner G, McDevitt J, Skrinar GS, Hutchinson D, Piette JD. Integrating physical activity into mental health services for persons with serious mental illness. Psychiatr Serv. 2005;56(3):324-331; Thornicroft G. Physical health disparities and mental illness: The scandal of premature mortality. Br J Psychiatr. 2011;199:441-442), but research has shown that adverse health behaviors-including smoking, low rate of physical activity, poor diet, and high alcohol consumption-also significantly contribute to premature deaths (Jones J. Life expectancy in mental illness. Psychiatry Services. 2010. Retrieved from http://psychcentral.com/news/2010/07/13/life-expectancy-in-mental-illness). This quality improvement (QI) project sought to improve health and wellness for veterans in the Mental Health Intensive Case Management Program (MHICM), which is a community-based intensive program for veterans with SMI at risk for decompensation and frequent hospitalizations. At the time of this QI project, the program had 69 veterans who were assessed and treated weekly in their homes. The project introduced a pedometer steps intervention adapted from the VA MOVE! Program-a physical activity and weight management program-with the addition of personalized assistance from trained mental health professionals in the veteran's home environment. Because a large percentage of the veterans in the MHICM program had high blood pressure and increased weight, these outcomes were the focus of this project. Through mental health case management involvement and the comfort of their familiar living environment, veterans were assisted to meet their physical and mental health goals with a program that could easily be integrated into their daily lives.

Background

Healthy People 2020 developed goals to improve levels of physical activity and has ranked physical activity as a leading health indicator (US DHHS. Office of Disease Prevention and Health Promotion. Physical activity topic overview. In Healthy People 2020. 2016. Retrieved from https://www.healthypeople.gov/2020/topics-objectives/topic/physical-activity). Individuals with SMI are significantly less active than the general population (Shor and Shalev, 2014). It is sometimes difficult for the average individual to obtain the recommended 10,000 steps and even more difficult for those with SMI. Lifestyle modifications, in particular diet and exercise, are recommended for improvement of chronic disease outcomes (US Preventive Services Counseling Task Force, 2016). The health benefits of physical activity for people with SMI are mixed (Pearsall R, Smith D, Pelosi A, Geddes J. Exercise therapy in adults with serious mental illness: A systematic review and meta-analysis. BMC Psychiatr. 2014;14:117). Some studies found significant physical health benefits, while others did not. However, according to a review by Soundy et al., physical exercise is shown to not only have physical benefits but also psychosocial benefits. One of the barriers that hinder participation in physical activities is accessibility (Shor and Shalev, 2014). Integrating a more personalized supported, and in-home pedometer program into mental healthcare should ensure better access to interventions that could possibly reverse the causes of premature death.

Methods

The program was offered to 69 veterans in the MHICM. Forty-nine agreed to start the program and 20 declined. Twenty-five clients actually started the program with 17 veterans completing it. Preimplementation data included collecting blood pressure and weight measures for all veterans in the MHICM program. Additionally, a focus group was held with case managers to obtain a group perspective on motivating veterans to participate in this program. Further, a teaching session was held to review pedometers use, the client video, the client booklet, methods for getting veterans started, and the progression of the walking intervention. The pedometer physical activity intervention continued for 2 months. At the end of the 2 months, aggregate de-identified data on number of steps, blood pressure, and weight were collected. At the end of the program, the data were reviewed, synthesized, and analyzed, being careful to account for potentially intervening conditions and other chronic illnesses.

Results

The postimplementation data revealed that the mean weight decreased by 9 lbs. The percentage of controlled blood pressure increased from 60 to 84, while the percentage of uncontrolled blood pressure decreased from 40 to 16.

Conclusion

Implementation of a multiple component personalized exercise intervention program for veterans with SMI contributed to reduction in weight and blood pressure.",2017-08-01 +27288501,RareVariantVis: new tool for visualization of causative variants in rare monogenic disorders using whole genome sequencing data.,"

Motivation

The search for causative genetic variants in rare diseases of presumed monogenic inheritance has been boosted by the implementation of whole exome (WES) and whole genome (WGS) sequencing. In many cases, WGS seems to be superior to WES, but the analysis and visualization of the vast amounts of data is demanding.

Results

To aid this challenge, we have developed a new tool-RareVariantVis-for analysis of genome sequence data (including non-coding regions) for both germ line and somatic variants. It visualizes variants along their respective chromosomes, providing information about exact chromosomal position, zygosity and frequency, with point-and-click information regarding dbSNP IDs, gene association and variant inheritance. Rare variants as well as de novo variants can be flagged in different colors. We show the performance of the RareVariantVis tool in the Genome in a Bottle WGS data set.

Availability and implementation

https://www.bioconductor.org/packages/3.3/bioc/html/RareVariantVis.html

Contact

tomasz.stokowy@k2.uib.no

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-10 +24185695,The Nucleic Acid Database: new features and capabilities.,"The Nucleic Acid Database (NDB) (http://ndbserver.rutgers.edu) is a web portal providing access to information about 3D nucleic acid structures and their complexes. In addition to primary data, the NDB contains derived geometric data, classifications of structures and motifs, standards for describing nucleic acid features, as well as tools and software for the analysis of nucleic acids. A variety of search capabilities are available, as are many different types of reports. This article describes the recent redesign of the NDB Web site with special emphasis on new RNA-derived data and annotations and their implementation and integration into the search capabilities.",2013-10-31 +29992322,KM-express: an integrated online patient survival and gene expression analysis tool for the identification and functional characterization of prognostic markers in breast and prostate cancers. ,"The identification and functional characterization of novel biomarkers in cancer requires survival analysis and gene expression analysis of both patient samples and cell line models. To help facilitate this process, we have developed KM-Express. KM-Express holds an extensive manually curated transcriptomic profile of 45 different datasets for prostate and breast cancer with phenotype and pathoclinical information, spanning from clinical samples to cell lines. KM-Express also contains The Cancer Genome Atlas datasets for 30 other cancer types with matching cell line expression data for 23 of them. We present KM-Express as a hypothesis generation tool for researchers to identify potential new prognostic RNA biomarkers as well as targets for further downstream functional cell-based studies. Specifically, KM-Express allows users to compare the expression level of genes in different groups of patients based on molecular, genetic, clinical and pathological status. Moreover, KM-Express aids the design of biological experiments based on the expression profile of the genes in different cell lines. Thus, KM-Express provides a one-stop analysis from bench work to clinical prospects. We have used this tool to successfully evaluate the prognostic potential of previously published biomarkers for prostate cancer and breast cancer. We believe KM-Express will accelerate the translation of biomedical research from bench to bed.Database URL: http://ec2-52-201-246-161.compute-1.amazonaws.com/kmexpress/index.php.",2018-01-01 +27378299,Icarus: visualizer for de novo assembly evaluation.,": Data visualization plays an increasingly important role in NGS data analysis. With advances in both sequencing and computational technologies, it has become a new bottleneck in genomics studies. Indeed, evaluation of de novo genome assemblies is one of the areas that can benefit from the visualization. However, even though multiple quality assessment methods are now available, existing visualization tools are hardly suitable for this purpose. Here, we present Icarus-a novel genome visualizer for accurate assessment and analysis of genomic draft assemblies, which is based on the tool QUAST. Icarus can be used in studies where a related reference genome is available, as well as for non-model organisms. The tool is available online and as a standalone application.

Availability and implementation

http://cab.spbu.ru/software/icarus CONTACT: aleksey.gurevich@spbu.ruSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-04 +27723775,Evaluating Transcription Factor Activity Changes by Scoring Unexplained Target Genes in Expression Data.,"Several methods predict activity changes of transcription factors (TFs) from a given regulatory network and measured expression data. But available gene regulatory networks are incomplete and contain many condition-dependent regulations that are not relevant for the specific expression measurement. It is not known which combination of active TFs is needed to cause a change in the expression of a target gene. A method to systematically evaluate the inferred activity changes is missing. We present such an evaluation strategy that indicates for how many target genes the observed expression changes can be explained by a given set of active TFs. To overcome the problem that the exact combination of active TFs needed to activate a gene is typically not known, we assume a gene to be explained if there exists any combination for which the predicted active TFs can possibly explain the observed change of the gene. We introduce the i-score (inconsistency score), which quantifies how many genes could not be explained by the set of activity changes of TFs. We observe that, even for these minimal requirements, published methods yield many unexplained target genes, i.e. large i-scores. This holds for all methods and all expression datasets we evaluated. We provide new optimization methods to calculate the best possible (minimal) i-score given the network and measured expression data. The evaluation of this optimized i-score on a large data compendium yields many unexplained target genes for almost every case. This indicates that currently available regulatory networks are still far from being complete. Both the presented Act-SAT and Act-A* methods produce optimal sets of TF activity changes, which can be used to investigate the difficult interplay of expression and network data. A web server and a command line tool to calculate our i-score and to find the active TFs associated with the minimal i-score is available from https://services.bio.ifi.lmu.de/i-score.",2016-10-10 +28453667,Linearity of network proximity measures: implications for set-based queries and significance testing.,"

Motivation

In recent years, various network proximity measures have been proposed to facilitate the use of biomolecular interaction data in a broad range of applications. These applications include functional annotation, disease gene prioritization, comparative analysis of biological systems and prediction of new interactions. In such applications, a major task is the scoring or ranking of the nodes in the network in terms of their proximity to a given set of 'seed' nodes (e.g. a group of proteins that are identified to be associated with a disease, or are deferentially expressed in a certain condition). Many different network proximity measures are utilized for this purpose, and these measures are quite diverse in terms of the benefits they offer.

Results

We propose a unifying framework for characterizing network proximity measures for set-based queries. We observe that many existing measures are linear, in that the proximity of a node to a set of nodes can be represented as an aggregation of its proximity to the individual nodes in the set. Based on this observation, we propose methods for processing of set-based proximity queries that take advantage of sparse local proximity information. In addition, we provide an analytical framework for characterizing the distribution of proximity scores based on reference models that accurately capture the characteristics of the seed set (e.g. degree distribution and biological function). The resulting framework facilitates computation of exact figures for the statistical significance of network proximity scores, enabling assessment of the accuracy of Monte Carlo simulation based estimation methods.

Availability and implementation

Implementations of the methods in this paper are available at https://bioengine.case.edu/crosstalker which includes a robust visualization for results viewing.

Contact

stm@case.edu or mxk331@case.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +27528229,OASIS 2: online application for survival analysis 2 with features for the analysis of maximal lifespan and healthspan in aging research.,"Online application for survival analysis (OASIS) has served as a popular and convenient platform for the statistical analysis of various survival data, particularly in the field of aging research. With the recent advances in the fields of aging research that deal with complex survival data, we noticed a need for updates to the current version of OASIS. Here, we report OASIS 2 (http://sbi.postech.ac.kr/oasis2), which provides extended statistical tools for survival data and an enhanced user interface. In particular, OASIS 2 enables the statistical comparison of maximal lifespans, which is potentially useful for determining key factors that limit the lifespan of a population. Furthermore, OASIS 2 provides statistical and graphical tools that compare values in different conditions and times. That feature is useful for comparing age-associated changes in physiological activities, which can be used as indicators of ""healthspan."" We believe that OASIS 2 will serve as a standard platform for survival analysis with advanced and user-friendly statistical tools for experimental biologists in the field of aging research.",2016-08-01 +25179504,Differential motif enrichment analysis of paired ChIP-seq experiments.,"

Background

Motif enrichment analysis of transcription factor ChIP-seq data can help identify transcription factors that cooperate or compete. Previously, little attention has been given to comparative motif enrichment analysis of pairs of ChIP-seq experiments, where the binding of the same transcription factor is assayed under different conditions. Such comparative analysis could potentially identify the distinct regulatory partners/competitors of the assayed transcription factor under different conditions or at different stages of development.

Results

We describe a new methodology for identifying sequence motifs that are differentially enriched in one set of DNA or RNA sequences relative to another set, and apply it to paired ChIP-seq experiments. We show that, using paired ChIP-seq data for a single transcription factor, differential motif enrichment analysis identifies all the known key transcription factors involved in the transformation of non-cancerous immortalized breast cells (MCF10A-ER-Src cells) into cancer stem cells whereas non-differential motif enrichment analysis does not. We also show that differential motif enrichment analysis identifies regulatory motifs that are significantly enriched at constrained locations within the bound promoters, and that these motifs are not identified by non-differential motif enrichment analysis. Our methodology differs from other approaches in that it leverages both comparative enrichment and positional enrichment of motifs in ChIP-seq peak regions or in the promoters of genes bound by the transcription factor.

Conclusions

We show that differential motif enrichment analysis of paired ChIP-seq experiments offers biological insights not available from non-differential analysis. In contrast to previous approaches, our method detects motifs that are enriched in a constrained region in one set of sequences, but not enriched in the same region in the comparative set. We have enhanced the web-based CentriMo algorithm to allow it to perform the constrained differential motif enrichment analysis described in this paper, and CentriMo's on-line interface (http://meme.ebi.edu.au) provides dozens of databases of DNA- and RNA-binding motifs from a full range of organisms. All data and output files presented here are available at http://research.imb.uq.edu.au/t.bailey/supplementary\_data/Lesluyes2014.",2014-09-02 +25282047,An algorithm of discovering signatures from DNA databases on a computer cluster.,"

Background

Signatures are short sequences that are unique and not similar to any other sequence in a database that can be used as the basis to identify different species. Even though several signature discovery algorithms have been proposed in the past, these algorithms require the entirety of databases to be loaded in the memory, thus restricting the amount of data that they can process. It makes those algorithms unable to process databases with large amounts of data. Also, those algorithms use sequential models and have slower discovery speeds, meaning that the efficiency can be improved.

Results

In this research, we are debuting the utilization of a divide-and-conquer strategy in signature discovery and have proposed a parallel signature discovery algorithm on a computer cluster. The algorithm applies the divide-and-conquer strategy to solve the problem posed to the existing algorithms where they are unable to process large databases and uses a parallel computing mechanism to effectively improve the efficiency of signature discovery. Even when run with just the memory of regular personal computers, the algorithm can still process large databases such as the human whole-genome EST database which were previously unable to be processed by the existing algorithms.

Conclusions

The algorithm proposed in this research is not limited by the amount of usable memory and can rapidly find signatures in large databases, making it useful in applications such as Next Generation Sequencing and other large database analysis and processing. The implementation of the proposed algorithm is available at http://www.cs.pu.edu.tw/~fang/DDCSDPrograms/DDCSD.htm.",2014-10-05 +27497441,Metrics for rapid quality control in RNA structure probing experiments.,"

Motivation

The diverse functionalities of RNA can be attributed to its capacity to form complex and varied structures. The recent proliferation of new structure probing techniques coupled with high-throughput sequencing has helped RNA studies expand in both scope and depth. Despite differences in techniques, most experiments face similar challenges in reproducibility due to the stochastic nature of chemical probing and sequencing. As these protocols expand to transcriptome-wide studies, quality control becomes a more daunting task. General and efficient methodologies are needed to quantify variability and quality in the wide range of current and emerging structure probing experiments.

Results

We develop metrics to rapidly and quantitatively evaluate data quality from structure probing experiments, demonstrating their efficacy on both small synthetic libraries and transcriptome-wide datasets. We use a signal-to-noise ratio concept to evaluate replicate agreement, which has the capacity to identify high-quality data. We also consider and compare two methods to assess variability inherent in probing experiments, which we then utilize to evaluate the coverage adjustments needed to meet desired quality. The developed metrics and tools will be useful in summarizing large-scale datasets and will help standardize quality control in the field.

Availability and implementation

The data and methods used in this article are freely available at: http://bme.ucdavis.edu/aviranlab/SPEQC_software CONTACT: saviran@ucdavis.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-06 +26879667,NETIMIS: Dynamic Simulation of Health Economics Outcomes Using Big Data.,"Many healthcare organizations are now making good use of electronic health record (EHR) systems to record clinical information about their patients and the details of their healthcare. Electronic data in EHRs is generated by people engaged in complex processes within complex environments, and their human input, albeit shaped by computer systems, is compromised by many human factors. These data are potentially valuable to health economists and outcomes researchers but are sufficiently large and complex enough to be considered part of the new frontier of 'big data'. This paper describes emerging methods that draw together data mining, process modelling, activity-based costing and dynamic simulation models. Our research infrastructure includes safe links to Leeds hospital's EHRs with 3 million secondary and tertiary care patients. We created a multidisciplinary team of health economists, clinical specialists, and data and computer scientists, and developed a dynamic simulation tool called NETIMIS (Network Tools for Intervention Modelling with Intelligent Simulation; http://www.netimis.com ) suitable for visualization of both human-designed and data-mined processes which can then be used for 'what-if' analysis by stakeholders interested in costing, designing and evaluating healthcare interventions. We present two examples of model development to illustrate how dynamic simulation can be informed by big data from an EHR. We found the tool provided a focal point for multidisciplinary team work to help them iteratively and collaboratively 'deep dive' into big data.",2016-02-01 +28776938,HPSLPred: An Ensemble Multi-Label Classifier for Human Protein Subcellular Location Prediction with Imbalanced Source. ,"Predicting the subcellular localization of proteins is an important and challenging problem. Traditional experimental approaches are often expensive and time-consuming. Consequently, a growing number of research efforts employ a series of machine learning approaches to predict the subcellular location of proteins. There are two main challenges among the state-of-the-art prediction methods. First, most of the existing techniques are designed to deal with multi-class rather than multi-label classification, which ignores connections between multiple labels. In reality, multiple locations of particular proteins imply that there are vital and unique biological significances that deserve special focus and cannot be ignored. Second, techniques for handling imbalanced data in multi-label classification problems are necessary, but never employed. For solving these two issues, we have developed an ensemble multi-label classifier called HPSLPred, which can be applied for multi-label classification with an imbalanced protein source. For convenience, a user-friendly webserver has been established at http://server.malab.cn/HPSLPred.",2017-09-01 +29250090,Inference of Transcription Regulatory Network in Low Phytic Acid Soybean Seeds.,"A dominant loss of function mutation in myo-inositol phosphate synthase (MIPS) gene and recessive loss of function mutations in two multidrug resistant protein type-ABC transporter genes not only reduce the seed phytic acid levels in soybean, but also affect the pathways associated with seed development, ultimately resulting in low emergence. To understand the regulatory mechanisms and identify key genes that intervene in the seed development process in low phytic acid crops, we performed computational inference of gene regulatory networks in low and normal phytic acid soybeans using a time course transcriptomic data and multiple network inference algorithms. We identified a set of putative candidate transcription factors and their regulatory interactions with genes that have functions in myo-inositol biosynthesis, auxin-ABA signaling, and seed dormancy. We evaluated the performance of our unsupervised network inference method by comparing the predicted regulatory network with published regulatory interactions in Arabidopsis. Some contrasting regulatory interactions were observed in low phytic acid mutants compared to non-mutant lines. These findings provide important hypotheses on expression regulation of myo-inositol metabolism and phytohormone signaling in developing low phytic acid soybeans. The computational pipeline used for unsupervised network learning in this study is provided as open source software and is freely available at https://lilabatvt.github.io/LPANetwork/.",2017-11-30 +28218686,Natural Products as Chemopreventive Agents by Potential Inhibition of the Kinase Domain in ErbB Receptors. ,"Small molecules found in natural products provide therapeutic benefits due to their pharmacological or biological activity, which may increase or decrease the expression of human epidermal growth factor receptor (HER), a promising target in the modification of signaling cascades involved in excessive cellular growth. In this study, in silico molecular protein-ligand docking protocols were performed with AutoDock Vina in order to evaluate the interaction of 800 natural compounds (NPs) from the NatProd Collection (http://www.msdiscovery.com/natprod.html), with four human HER family members: HER1 (PDB: 2ITW), HER2 (PDB: 3PP0), HER3 (PDB: 3LMG) and HER4 (PDB: 2R4B). The best binding affinity values (kcal/mol) for docking pairs were obtained for HER1-podototarin (-10.7), HER2-hecogenin acetate (-11.2), HER3-hesperidin (-11.5) and HER4-theaflavin (-10.7). The reliability of the theoretical calculations was evaluated employing published data on HER inhibition correlated with in silico binding calculations. IC50 values followed a significant linear relationship with the theoretical binding Affinity data for HER1 (R = 0.656, p < 0.0001) and HER2 (R = 0.543, p < 0.0001), but not for HER4 (R = 0.364, p > 0.05). In short, this methodology allowed the identification of several NPs as HER inhibitors, being useful in the discovery and design of more potent and selective anticancer drugs.",2017-02-17 +24897119,A MLVA genotyping scheme for global surveillance of the citrus pathogen Xanthomonas citri pv. citri suggests a worldwide geographical expansion of a single genetic lineage.,"MultiLocus Variable number of tandem repeat Analysis (MLVA) has been extensively used to examine epidemiological and evolutionary issues on monomorphic human pathogenic bacteria, but not on bacterial plant pathogens of agricultural importance albeit such tools would improve our understanding of their epidemiology, as well as of the history of epidemics on a global scale. Xanthomonas citri pv. citri is a quarantine organism in several countries and a major threat for the citrus industry worldwide. We screened the genomes of Xanthomonas citri pv. citri strain IAPAR 306 and of phylogenetically related xanthomonads for tandem repeats. From these in silico data, an optimized MLVA scheme was developed to assess the global diversity of this monomorphic bacterium. Thirty-one minisatellite loci (MLVA-31) were selected to assess the genetic structure of 129 strains representative of the worldwide pathological and genetic diversity of X. citri pv. citri. Based on Discriminant Analysis of Principal Components (DAPC), four pathotype-specific clusters were defined. DAPC cluster 1 comprised strains that were implicated in the major geographical expansion of X. citri pv. citri during the 20th century. A subset of 12 loci (MLVA-12) resolved 89% of the total diversity and matched the genetic structure revealed by MLVA-31. MLVA-12 is proposed for routine epidemiological identification of X. citri pv. citri, whereas MLVA-31 is proposed for phylogenetic and population genetics studies. MLVA-31 represents an opportunity for international X. citri pv. citri genotyping and data sharing. The MLVA-31 data generated in this study was deposited in the Xanthomonas citri genotyping database (http://www.biopred.net/MLVA/).",2014-06-04 +28580299,Whole-genome shotgun sequence of phenazine-producing endophytic Streptomyces kebangsaanensis SUK12.,"Streptomyces sp. produces bioactive compounds with a broad spectrum of activities. Streptomyces kebangsaanesis SUK12 has been identified as a novel endophytic bacteria isolated from ethnomedicinal plant Portulaca olerace, and was found to produce the phenazine class of biologically active antimicrobial metabolites. The potential use of the phenazines has led to our research interest in determining the genome sequence of Streptomyces kebangsaanensis SUK12. This Whole Genome Shotgun project has been deposited at DDBJ/ENA/GenBank under the accession number PRJNA269542. The raw sequence data are available [https://www.ncbi.nlm.nih.gov/Traces/study/?acc=SRP105770].",2017-05-24 +25246425,Hybrid curation of gene-mutation relations combining automated extraction and crowdsourcing. ,"This article describes capture of biological information using a hybrid approach that combines natural language processing to extract biological entities and crowdsourcing with annotators recruited via Amazon Mechanical Turk to judge correctness of candidate biological relations. These techniques were applied to extract gene- mutation relations from biomedical abstracts with the goal of supporting production scale capture of gene-mutation-disease findings as an open source resource for personalized medicine. The hybrid system could be configured to provide good performance for gene-mutation extraction (precision ∼82%; recall ∼70% against an expert-generated gold standard) at a cost of $0.76 per abstract. This demonstrates that crowd labor platforms such as Amazon Mechanical Turk can be used to recruit quality annotators, even in an application requiring subject matter expertise; aggregated Turker judgments for gene-mutation relations exceeded 90% accuracy. Over half of the precision errors were due to mismatches against the gold standard hidden from annotator view (e.g., incorrect EntrezGene identifier or incorrect mutation position extracted), or incomplete task instructions (e.g., the need to exclude nonhuman mutations). The hybrid curation model provides a readily scalable cost-effective approach to curation, particularly if coupled with expert human review to filter precision errors. We plan to generalize the framework and make it available as open source software. http://www.mitre.org/publications/technical-papers/hybrid-curation-of-gene-mutation-relations-combining-automated.",2014-09-22 +27808512,Screening Explorer-An Interactive Tool for the Analysis of Screening Results.,"Screening Explorer is a web-based application that allows for an intuitive evaluation of the results of screening experiments using complementary metrics in the field. The usual evaluation of screening results implies the separate generation and apprehension of the ROC, predictiveness, and enrichment curves and their global metrics. Similarly, partial metrics need to be calculated repeatedly for different fractions of a data set and there exists no handy tool that allows reading partial metrics simultaneously on different charts. For a deeper understanding of the results of screening experiments, we rendered their analysis straightforward by linking these metrics interactively in an interactive usable web-based application. We also implemented simple consensus scoring methods based on scores normalization, standardization (z-scores), and compounds ranking to evaluate the enrichments that can be expected through methods combination. Two demonstration data sets allow the users to easily apprehend the functions of this tool that can be applied to the analysis of virtual and experimental screening results. Screening Explorer is freely accessible at http://stats.drugdesign.fr .",2016-11-22 +27284565,Assessing the perceived quality of brachial artery Flow Mediated Dilation studies for inclusion in meta-analyses and systematic reviews: Description of data employed in the development of a scoring ;tool based on currently accepted guidelines.,"Brachial artery Flow Mediated Dilation (FMD) is widely used as a non-invasive measure of endothelial function. Adherence to expert consensus guidelines on FMD measurement has been found to be of vital importance to obtain reproducible data. This article lists the literature data which was considered in the development of a tool to aid in the objective judgement of the extent to which published studies adhered to expert guidelines for FMD measurement. Application of this tool in a systematic review of FMD studies (http://dx.doi.org/10.1016/j.atherosclerosis.2016.03.011) (Greyling et al., 2016 [1]) indicated that adherence to expert consensus guidelines is strongly correlated to the reproducibility of FMD data.",2016-05-13 +22587966,Mycobacterium tuberculosis and Clostridium difficille interactomes: demonstration of rapid development of computational system for bacterial interactome prediction.,"

Background

Protein-protein interaction (PPI) networks (interactomes) of most organisms, except for some model organisms, are largely unknown. Experimental methods including high-throughput techniques are highly resource intensive. Therefore, computational discovery of PPIs can accelerate biological discovery by presenting ""most-promising"" pairs of proteins that are likely to interact. For many bacteria, genome sequence, and thereby genomic context of proteomes, is readily available; additionally, for some of these proteomes, localization and functional annotations are also available, but interactomes are not available. We present here a method for rapid development of computational system to predict interactome of bacterial proteomes. While other studies have presented methods to transfer interologs across species, here, we propose transfer of computational models to benefit from cross-species annotations, thereby predicting many more novel interactions even in the absence of interologs. Mycobacterium tuberculosis (Mtb) and Clostridium difficile (CD) have been used to demonstrate the work.

Results

We developed a random forest classifier over features derived from Gene Ontology annotations and genetic context scores provided by STRING database for predicting Mtb and CD interactions independently. The Mtb classifier gave a precision of 94% and a recall of 23% on a held out test set. The Mtb model was then run on all the 8 million protein pairs of the Mtb proteome, resulting in 708 new interactions (at 94% expected precision) or 1,595 new interactions at 80% expected precision. The CD classifier gave a precision of 90% and a recall of 16% on a held out test set. The CD model was run on all the 8 million protein pairs of the CD proteome, resulting in 143 new interactions (at 90% expected precision) or 580 new interactions (at 80% expected precision). We also compared the overlap of predictions of our method with STRING database interactions for CD and Mtb and also with interactions identified recently by a bacterial 2-hybrid system for Mtb. To demonstrate the utility of transfer of computational models, we made use of the developed Mtb model and used it to predict CD protein-pairs. The cross species model thus developed yielded a precision of 88% at a recall of 8%. To demonstrate transfer of features from other organisms in the absence of feature-based and interaction-based information, we transferred missing feature values from Mtb orthologs into the CD data. In transferring this data from orthologs (not interologs), we showed that a large number of interactions can be predicted.

Conclusions

Rapid discovery of (partial) bacterial interactome can be made by using existing set of GO and STRING features associated with the organisms. We can make use of cross-species interactome development, when there are not even sufficient known interactions to develop a computational prediction system. Computational model of well-studied organism(s) can be employed to make the initial interactome prediction for the target organism. We have also demonstrated successfully, that annotations can be transferred from orthologs in well-studied organisms enabling accurate predictions for organisms with no annotations. These approaches can serve as building blocks to address the challenges associated with feature coverage, missing interactions towards rapid interactome discovery for bacterial organisms.

Availability

The predictions for all Mtb and CD proteins are made available at: http://severus.dbmi.pitt.edu/TB and http://severus.dbmi.pitt.edu/CD respectively for browsing as well as for download.",2012-03-21 +25054277,"Prevalence of amyotrophic lateral sclerosis - United States, 2010-2011.","

Problem/condition

Amyotrophic lateral sclerosis (ALS), commonly known as Lou Gehrig's disease, is a progressive and fatal neuromuscular disease for which no cure has been identified. Although ALS has no known definitive cause, familial ALS (a hereditary form) occurs in 5%-10% of cases. Many hypotheses have been formulated about what causes ALS, including chemical exposures, occupational exposure, military service, infectious agents, nutritional intake, physical activity, and trauma. Worldwide, ALS affects white males aged >60 years more often than any other group. In the United States, ALS surveillance is necessary to estimate the incidence and prevalence of ALS and collect data on risk factors. ALS is not a nationally notifiable condition in the United States (i.e., it is not a reportable condition in all jurisdictions), and individual state reporting requirements differ, with Massachusetts being the only state that mandates reporting.

Period covered

October 19, 2010-December 31, 2011.

Description of system

In 2009, the federal Agency for Toxic Substances and Disease Registry (ATSDR) implemented the National ALS Registry to collect and analyze data regarding persons with ALS in the United States. The main goals of the Registry, as defined by the 2008 ALS Registry Act, are to describe the incidence and prevalence of ALS better, examine risk factors such as environmental and occupational exposures, and characterize the demographics of those living with ALS. The Registry uses a two-pronged approach to identify all cases of ALS. The first approach uses four existing national administrative databases (maintained by Medicare, Medicaid, the Veterans Health Administration, and the Veterans Benefits Administration) to identify prevalence of ALS. The second approach uses a secure web portal (http://www.cdc.gov/als) that was launched to the public on October 19, 2010, to identify cases not included in the four national administrative databases and to collect risk-factor data on known ALS cases. ALS patients who have registered via the web portal can complete brief risk-factor surveys online that are intended to attain a better understanding of ALS (e.g., genetics and environmental and occupational exposures) and help determine disease progression.

Results

During October 19, 2010-December 31, 2011, a total of 12,187 persons meeting the surveillance case definition of definite ALS were identified by the Registry, for a prevalence of 3.9 cases of ALS per 100,000 persons in the U.S. general population. Incidence cannot be measured because the date of diagnosis was not noted in all patient records. Overall, ALS was more common among white males, non-Hispanics, and persons aged 60-69 years. The age groups with the lowest number of persons with ALS were age 18-39 years and age >80 years. Males had a higher prevalence rate of ALS than females overall and across all data sources.

Interpretation

This is the first (and to date the only) effort to estimate the national prevalence of ALS in the United States. Using the combined approach of the national databases and the web-based portal enables researchers to estimate ALS prevalence more accurately. Registry findings for the prevalence of ALS are consistent with findings from long-established ALS registries in Europe and from smaller-scale epidemiologic studies conducted previously in the United States. Although incidence cannot be measured with Registry data at this time, incidence is being measured in smaller geographic areas that have participated in ATSDR's State and Metropolitan Area ALS surveillance projects.

Public health actions

Data collected by the National ALS Registry are being used to better describe the prevalence of ALS in the United States and to help facilitate research. The combined approach of using national administrative databases and a self-enrollment web portal to collect data is novel and potentially could be used for other non-notifiable diseases such as Parkinson's disease or multiple sclerosis. ATSDR is working closely with ALS advocacy and support groups, researchers, health-care professionals, and others to promote the National ALS Registry in order to capture all cases of ALS. To further enhance and strengthen the Registry, ATSDR is 1) adding new modules to the portal to examine other potential risk factors, 2) launching a feasibility study for a novel ALS biorepository (available at http://wwwn.cdc.gov/als/ALSBioRegistry.aspx) linked to the Registry that would potentially provide biologic specimens from patient enrollees to help researchers learn more about disease etiology, 3) engaging in surveillance activities in selected states and large metropolitan areas to help test the completeness of the Registry as well as calculating incidence in these areas, and 4) using the Registry to recruit patient enrollees for new clinical trials and epidemiologic studies. Additional information about the National ALS Registry is available at http://www.cdc.gov/als or by calling toll-free at 1-877-442-9719.",2014-07-01 +27419846,Structural and Physico-Chemical Interpretation (SPCI) of QSAR Models and Its Comparison with Matched Molecular Pair Analysis.,"This paper describes the Structural and Physico-Chemical Interpretation (SPCI) approach, which is an extension of a recently reported method for interpretation of quantitative structure-activity relationship (QSAR) models. This approach can efficiently be used to reveal structural motifs and the major physicochemical factors affecting the investigated properties. Its efficacy was demonstrated both on the classical Free-Wilson data set and on several data sets with different end points (permeability of the blood-brain barrier, fibrinogen receptor antagonists, acute oral toxicity). Structure-activity patterns extracted from QSAR models with SPCI were in good correspondence with experimentally observed relationships and molecular docking, regardless of the machine learning method used. Comparison of SPCI with the matched molecular pair (MMP) method clearly shows an advantage of our approach over MMP, especially for small or structurally diverse data sets. The developed approach has been implemented in the SPCI software tool with a graphical user interface, which is publicly available at http://qsar4u.com/pages/sirms_qsar.php .",2016-07-29 +28137292,Correcting for cell-type effects in DNA methylation studies: reference-based method outperforms latent variable approaches in empirical studies.,"Based on an extensive simulation study, McGregor and colleagues recently recommended the use of surrogate variable analysis (SVA) to control for the confounding effects of cell-type heterogeneity in DNA methylation association studies in scenarios where no cell-type proportions are available. As their recommendation was mainly based on simulated data, we sought to replicate findings in two large-scale empirical studies. In our empirical data, SVA did not fully correct for cell-type effects, its performance was somewhat unstable, and it carried a risk of missing true signals caused by removing variation that might be linked to actual disease processes. By contrast, a reference-based correction method performed well and did not show these limitations. A disadvantage of this approach is that if reference methylomes are not (publicly) available, they will need to be generated once for a small set of samples. However, given the notable risk we observed for cell-type confounding, we argue that, to avoid introducing false-positive findings into the literature, it could be well worth making this investment.Please see related Correspondence article: https://genomebiology.biomedcentral.com/articles/10/1186/s13059-017-1149-7 and related Research article: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0935-y.",2017-01-30 +27634946,AKT: ancestry and kinship toolkit.,"

Motivation

Ancestry and Kinship Toolkit (AKT) is a statistical genetics tool for analysing large cohorts of whole-genome sequenced samples. It can rapidly detect related samples, characterize sample ancestry, calculate correlation between variants, check Mendel consistency and perform data clustering. AKT brings together the functionality of many state-of-the-art methods, with a focus on speed and a unified interface. We believe it will be an invaluable tool for the curation of large WGS datasets.

Availability and implementation

The source code is available at https://illumina.github.io/akt CONTACTS: joconnell@illumina.com or rudy.d.arthur@gmail.comSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-14 +,Functional and Structural Overview of G-Protein-Coupled Receptors Comprehensively Obtained from Genome Sequences,"An understanding of the functional mechanisms of G-protein-coupled receptors (GPCRs) is very important for GPCR-related drug design. We have developed an integrated GPCR database (SEVENS http://sevens.cbrc.jp/) that includes 64,090 reliable GPCR genes comprehensively identified from 56 eukaryote genome sequences, and overviewed the sequences and structure spaces of the GPCRs. In vertebrates, the number of receptors for biological amines, peptides, etc. is conserved in most species, whereas the number of chemosensory receptors for odorant, pheromone, etc. significantly differs among species. The latter receptors tend to be single exon type or a few exon type and show a high ratio in the numbers of GPCRs, whereas some families, such as Class B and Class C receptors, have long lengths due to the presence of many exons. Statistical analyses of amino acid residues reveal that most of the conserved residues in Class A GPCRs are found in the cytoplasmic half regions of transmembrane (TM) helices, while residues characteristic to each subfamily found on the extracellular half regions. The 69 of Protein Data Bank (PDB) entries of complete or fragmentary structures could be mapped on the TM/loop regions of Class A GPCRs covering 14 subfamilies.",2011-04-01 +28444590,Transcripts of pectin-degrading enzymes and isolation of complete cDNA sequence of a pectate lyase gene induced by coffee white stem borer (Xylotrechus quadripes) in the bark tissue of Coffea canephora (robusta coffee).,"Of the two commercially cultivated coffee (Coffea) species, C. arabica (arabica) is highly susceptible and C. canephora (robusta) is highly resistant to the insect pest Xylotrechus quadripes (Coleoptera: Cerambycidae), commonly known as coffee white stem borer (CWSB). We constructed a forward-subtracted cDNA library by Suppression Subtractive Hybridization (SSH) from robusta bark tissue for profiling genes induced by CWSB infestation. Among the 265 unigenes of the SSH EST library, 7 unigenes (5 contigs and 2 singletons) matching different pectin-degrading enzymes were discovered. These ESTs matched one pectate lyase, three polygalacturonases, and one pectin acetylesterase gene. Quantitative real-time PCR (qRT-PCR) revealed that CWSB infestation strongly induces the pectate lyase gene at 72 h. Complete cDNA sequence of the pectate lyase gene was obtained through 3' and 5' RACE reactions. It was a 1595 bp long sequence that included full CDS and both UTRs. Against C. canephora genome sequences in Coffee Genome Hub database ( http://coffee-genome.org/ ), it had 22 matches to different pectate lyase genes mapped on 9 of the 11 pseudochromosomes, the top match being Cc07_g00190 Pectate lyase. In NCBI database, it matched pectate lyase sequences of several plants. Apart from C. canephora, the closest pectate lyase matches were from Sesamum indicum and Nicotiana tabacum. The pectinolytic enzymes discovered here are thought to play a role in the production of oligogalacturonides (OGs) which act as Damage-Associated Molecular Pattern (DAMP) signals eliciting innate immunity in plants. The pectate lyase gene, induced by CWSB infestation, along with other endogenous pectinolytic enzymes and CWSB-specific elicitors, may be involved in triggering basal defense responses to protect the CWSB-damaged tissue against pathogens, as well as to contain CWSB in robusta.",2017-04-25 +28031187,PhosD: inferring kinase-substrate interactions based on protein domains.,"

Motivation

Identifying the kinase-substrate relationships is vital to understanding the phosphorylation events and various biological processes, especially signal transductions. Although large amount of phosphorylation sites have been detected, unfortunately, it is rarely known which kinases activate those sites. Despite distinct computational approaches have been proposed to predict the kinase-substrate interactions, the prediction accuracy still needs to be improved.

Results

In this paper, we propose a novel probabilistic model named as PhosD to predict kinase-substrate relationships based on protein domains with the assumption that kinase-substrate interactions are accomplished with kinase-domain interactions. By further taking into account protein-protein interactions, our PhosD outperforms other popular approaches on several benchmark datasets with higher precision. In addition, some of our predicted kinase-substrate relationships are validated by signaling pathways, indicating the predictive power of our approach. Furthermore, we notice that given a kinase, the more substrates are known for the kinase the more accurate its predicted substrates will be, and the domains involved in kinase-substrate interactions are found to be more conserved across proteins phosphorylated by multiple kinases. These findings can help develop more efficient computational approaches in the future.

Availability and implementation

The data and results are available at http://comp-sysbio.org/phosd.

Contact

xm_zhao@tongji.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +28941844,Finding optimum width of discretization for gene expressions using functional annotations.,"Discretizing gene expression values is an important step in data preprocessing as it helps in reducing noise and experimental errors. This in turn provides better results in various tasks such as gene regulatory network analysis and disease prediction. A supervised discretization method for gene expressions using gene annotation is developed. The method is called ""Gene Annotation Based Discretization"" (GABD) where the discretization width is determined by maximizing the positive predictive value (PPV), computed using gene annotations, for top 20,000 gene pairs. The method can capture the gene similarity better than those obtained using original expressions. The performance of GABD is compared with some existing discretization methods like equal width discretization, equal frequency discretization and k-means discretization in terms of positive predictive value (PPV). The utility of GABD is also shown by clustering genes using k-medoid algorithm and thereby predicting the function of 23 unclassified Saccharomyces cerevisiae genes using p-value cut off 10-10. The source code for GABD is available at http://www.sampa.droppages.com/GABD.html.",2017-09-18 +24165883,IMG 4 version of the integrated microbial genomes comparative analysis system.,"The Integrated Microbial Genomes (IMG) data warehouse integrates genomes from all three domains of life, as well as plasmids, viruses and genome fragments. IMG provides tools for analyzing and reviewing the structural and functional annotations of genomes in a comparative context. IMG's data content and analytical capabilities have increased continuously since its first version released in 2005. Since the last report published in the 2012 NAR Database Issue, IMG's annotation and data integration pipelines have evolved while new tools have been added for recording and analyzing single cell genomes, RNA Seq and biosynthetic cluster data. Different IMG datamarts provide support for the analysis of publicly available genomes (IMG/W: http://img.jgi.doe.gov/w), expert review of genome annotations (IMG/ER: http://img.jgi.doe.gov/er) and teaching and training in the area of microbial genome analysis (IMG/EDU: http://img.jgi.doe.gov/edu).",2013-10-27 +28039167,SNP interaction pattern identifier (SIPI): an intensive search for SNP-SNP interaction patterns.,"

Motivation

Testing SNP-SNP interactions is considered as a key for overcoming bottlenecks of genetic association studies. However, related statistical methods for testing SNP-SNP interactions are underdeveloped.

Results

We propose the SNP Interaction Pattern Identifier (SIPI), which tests 45 biologically meaningful interaction patterns for a binary outcome. SIPI takes non-hierarchical models, inheritance modes and mode coding direction into consideration. The simulation results show that SIPI has higher power than MDR (Multifactor Dimensionality Reduction), AA_Full, Geno_Full (full interaction model with additive or genotypic mode) and SNPassoc in detecting interactions. Applying SIPI to the prostate cancer PRACTICAL consortium data with approximately 21 000 patients, the four SNP pairs in EGFR-EGFR , EGFR-MMP16 and EGFR-CSF1 were found to be associated with prostate cancer aggressiveness with the exact or similar pattern in the discovery and validation sets. A similar match for external validation of SNP-SNP interaction studies is suggested. We demonstrated that SIPI not only searches for more meaningful interaction patterns but can also overcome the unstable nature of interaction patterns.

Availability and implementation

The SIPI software is freely available at http://publichealth.lsuhsc.edu/LinSoftware/ .

Contact

hlin1@lsuhsc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +25348408,CATH: comprehensive structural and functional annotations for genome sequences.,"The latest version of the CATH-Gene3D protein structure classification database (4.0, http://www.cathdb.info) provides annotations for over 235,000 protein domain structures and includes 25 million domain predictions. This article provides an update on the major developments in the 2 years since the last publication in this journal including: significant improvements to the predictive power of our functional families (FunFams); the release of our 'current' putative domain assignments (CATH-B); a new, strictly non-redundant data set of CATH domains suitable for homology benchmarking experiments (CATH-40) and a number of improvements to the web pages.",2014-10-27 +29086076,Scoria: a Python module for manipulating 3D molecular data.,"Third-party packages have transformed the Python programming language into a powerful computational-biology tool. Package installation is easy for experienced users, but novices sometimes struggle with dependencies and compilers. This presents a barrier that can hinder the otherwise broad adoption of new tools. We present Scoria, a Python package for manipulating three-dimensional molecular data. Unlike similar packages, Scoria requires no dependencies, compilation, or system-wide installation. One can incorporate the Scoria source code directly into their own programs. But Scoria is not designed to compete with other similar packages. Rather, it complements them. Our package leverages others (e.g. NumPy, SciPy), if present, to speed and extend its own functionality. To show its utility, we use Scoria to analyze a molecular dynamics trajectory. Our FootPrint script colors the atoms of one chain by the frequency of their contacts with a second chain. We are hopeful that Scoria will be a useful tool for the computational-biology community. A copy is available for download free of charge (Apache License 2.0) at http://durrantlab.com/scoria/ . Graphical abstract .",2017-09-18 +22096228,SubtiWiki--a comprehensive community resource for the model organism Bacillus subtilis.,"In the post-genomic era, most components of a cell are known and they can be quantified by large-scale functional genomics approaches. However, genome annotation is the bottleneck that hampers our understanding of living cells and organisms. Up-to-date functional annotation is of special importance for model organisms that provide a frame of reference for studies with other relevant organisms. We have generated a Wiki-type database for the Gram-positive model bacterium Bacillus subtilis, SubtiWiki (http://subtiwiki.uni-goettingen.de/). This Wiki is centered around the individual genes and gene products of B. subtilis and provides information on each aspect of gene function and expression as well as protein activity and its control. SubtiWiki is accompanied by two companion databases SubtiPathways and SubtInteract that provide graphical representations of B. subtilis metabolism and its regulation and of protein-protein interactions, respectively. The diagrams of both databases are easily navigatable using the popular Google maps API, and they are extensively linked with the SubtiWiki gene pages. Moreover, each gene/gene product was assigned to one or more functional categories and transcription factor regulons. Pages for the specific categories and regulons provide a rapid overview of functionally related genes/proteins. Today, SubtiWiki can be regarded as one of the most complete inventories of knowledge on a living organism in one single resource.",2011-11-16 +27831888,Fuzzy-Rough Entropy Measure and Histogram Based Patient Selection for miRNA Ranking in Cancer.,"MicroRNAs (miRNAs) are known as an important indicator of cancers. The presence of cancer can be detected by identifying the responsible miRNAs. A fuzzy-rough entropy measure (FREM) is developed which can rank the miRNAs and thereby identify the relevant ones. FREM is used to determine the relevance of a miRNA in terms of separability between normal and cancer classes. While computing the FREM for a miRNA, fuzziness takes care of the overlapping between normal and cancer expressions, whereas rough lower approximation determines their class sizes. MiRNAs are sorted according to the highest relevance (i.e., the capability of class separation) and a percentage among them is selected from the top ranked ones. FREM is also used to determine the redundancy between two miRNAs and the redundant ones are removed from the selected set, as per the necessity. A histogram based patient selection method is also developed which can help to reduce the number of patients to be dealt during the computation of FREM, while compromising very little with the performance of the selected miRNAs for most of the data sets. The superiority of the FREM as compared to some existing methods is demonstrated extensively on six data sets in terms of sensitivity, specificity, and score. While for these data sets the score of the miRNAs selected by our method varies from 0.70 to 0.91 using SVM, those results vary from 0.37 to 0.90 for some other methods. Moreover, all the selected miRNAs corroborate with the findings of biological investigations or pathway analysis tools. The source code of FREM is available at http://www.jayanta.droppages.com/FREM.html.",2016-11-01 +27998275,Efficient randomization of biological networks while preserving functional characterization of individual nodes.,"

Background

Networks are popular and powerful tools to describe and model biological processes. Many computational methods have been developed to infer biological networks from literature, high-throughput experiments, and combinations of both. Additionally, a wide range of tools has been developed to map experimental data onto reference biological networks, in order to extract meaningful modules. Many of these methods assess results' significance against null distributions of randomized networks. However, these standard unconstrained randomizations do not preserve the functional characterization of the nodes in the reference networks (i.e. their degrees and connection signs), hence including potential biases in the assessment.

Results

Building on our previous work about rewiring bipartite networks, we propose a method for rewiring any type of unweighted networks. In particular we formally demonstrate that the problem of rewiring a signed and directed network preserving its functional connectivity (F-rewiring) reduces to the problem of rewiring two induced bipartite networks. Additionally, we reformulate the lower bound to the iterations' number of the switching-algorithm to make it suitable for the F-rewiring of networks of any size. Finally, we present BiRewire3, an open-source Bioconductor package enabling the F-rewiring of any type of unweighted network. We illustrate its application to a case study about the identification of modules from gene expression data mapped on protein interaction networks, and a second one focused on building logic models from more complex signed-directed reference signaling networks and phosphoproteomic data.

Conclusions

BiRewire3 it is freely available at https://www.bioconductor.org/packages/BiRewire/ , and it should have a broad application as it allows an efficient and analytically derived statistical assessment of results from any network biology tool.",2016-12-20 +29358497,OxyR-Dependent Transcription Response of Sinorhizobium meliloti to Oxidative Stress. ,"Reactive oxygen species such as peroxides play an important role in plant development, cell wall maturation, and defense responses. During nodulation with the host plant Medicago sativa, Sinorhizobium meliloti cells are exposed to H2O2 in infection threads and developing nodules (R. Santos, D. Hérouart, S. Sigaud, D. Touati, and A. Puppo, Mol Plant Microbe Interact 14:86-89, 2001, https://doi.org/10.1094/MPMI.2001.14.1.86). S. meliloti cells likely also experience oxidative stress, from both internal and external sources, during life in the soil. Here, we present microarray transcription data for S. meliloti wild-type cells compared to a mutant deficient in the key oxidative regulatory protein OxyR, each in response to H2O2 treatment. Several alternative sigma factor genes are upregulated in the response to H2O2; the stress sigma gene rpoE2 shows OxyR-dependent induction by H2O2, while rpoH1 expression is induced by H2O2 irrespective of the oxyR genotype. The activity of the RpoE2 sigma factor in turn causes increased expression of two more sigma factor genes, rpoE5 and rpoH2 Strains with deletions of rpoH1 showed improved survival in H2O2 as well as increased levels of oxyR and total catalase expression. These results imply that ΔrpoH1 strains are primed to deal with oxidative stress. This work presents a global view of S. meliloti gene expression changes, and of regulation of those changes, in response to H2O2IMPORTANCE Like all aerobic organisms, the symbiotic nitrogen-fixing bacterium Sinorhizobium meliloti experiences oxidative stress throughout its complex life cycle. This report describes the global transcriptional changes that S. meliloti makes in response to H2O2 and the roles of the OxyR transcriptional regulator and the RpoH1 sigma factor in regulating those changes. By understanding the complex regulatory response of S. meliloti to oxidative stress, we may further understand the role that reactive oxygen species play as both stressors and potential signals during symbiosis.",2018-03-12 +25267793,An online system for metabolic network analysis. ,"Metabolic networks have become one of the centers of attention in life sciences research with the advancements in the metabolomics field. A vast array of studies analyzes metabolites and their interrelations to seek explanations for various biological questions, and numerous genome-scale metabolic networks have been assembled to serve for this purpose. The increasing focus on this topic comes with the need for software systems that store, query, browse, analyze and visualize metabolic networks. PathCase Metabolomics Analysis Workbench (PathCaseMAW) is built, released and runs on a manually created generic mammalian metabolic network. The PathCaseMAW system provides a database-enabled framework and Web-based computational tools for browsing, querying, analyzing and visualizing stored metabolic networks. PathCaseMAW editor, with its user-friendly interface, can be used to create a new metabolic network and/or update an existing metabolic network. The network can also be created from an existing genome-scale reconstructed network using the PathCaseMAW SBML parser. The metabolic network can be accessed through a Web interface or an iPad application. For metabolomics analysis, steady-state metabolic network dynamics analysis (SMDA) algorithm is implemented and integrated with the system. SMDA tool is accessible through both the Web-based interface and the iPad application for metabolomics analysis based on a metabolic profile. PathCaseMAW is a comprehensive system with various data input and data access subsystems. It is easy to work with by design, and is a promising tool for metabolomics research and for educational purposes. Database URL: http://nashua.case.edu/PathwaysMAW/Web.",2014-09-29 +25258092,Exploring the sequence context of phosphorylatable amino acids: the contribution of the upgraded MAPRes tool.,"Several models that predict where post-translational modifications are likely to occur and formulate the corresponding association rules are available to analyze the functional potential of a protein sequence, but an algorithm incorporating the functional groups of the involved amino acids in the sequence analyses process is not yet available. In its previous version, MAPRes was utilized to investigate the influence of the surrounding amino acids of post- translationally and co-translationally modifiable sites. The MAPRes has been upgraded to take into account the different biophysical and biochemical properties of the amino acids that have the potential to influence different post- translational modifications (PTMs). In the present study, the upgraded version of MAPRes was implemented on phosphorylated Ser/Thr/Tyr data by considering the polarity and charge of the surrounding amino acids. The patterns mined by MAPRes incorporating structural information on polarity and charge of amino acids suggest distinct structure-function relationships for phosphorylated serines in a multifunctional protein such as the insulin-receptor substrate-1 (IRS-1) protein. The new version of MAPRes is freely available at http://www.imsb.edu.pk/Database.htm.",2015-03-01 +29467108,The Association of Long-Term Exposure to Particulate Matter Air Pollution with Brain MRI Findings: The ARIC Study.,"BACKGROUND:Increasing evidence links higher particulate matter (PM) air pollution exposure to late-life cognitive impairment. However, few studies have considered associations between direct estimates of long-term past exposures and brain MRI findings indicative of neurodegeneration or cerebrovascular disease. OBJECTIVE:Our objective was to quantify the association between brain MRI findings and PM exposures approximately 5 to 20 y prior to MRI in the Atherosclerosis Risk in Communities (ARIC) study. METHODS:ARIC is based in four U.S. sites: Washington County, Maryland; Minneapolis suburbs, Minnesota; Forsyth County, North Carolina; and Jackson, Mississippi. A subset of ARIC participants underwent 3T brain MRI in 2011-2013 (n=1,753). We estimated mean exposures to PM with an aerodynamic diameter less than 10 or 2.5μm (PM10 and PM2.5) in 1990-1998, 1999-2007, and 1990-2007 at the residential addresses of eligible participants with MRI data. We estimated site-specific associations between PM and brain MRI findings and used random-effect, inverse variance-weighted meta-analysis to combine them. RESULTS:In pooled analyses, higher mean PM2.5 and PM10 exposure in all time periods were associated with smaller deep-gray brain volumes, but not other MRI markers. Higher PM2.5 exposures were consistently associated with smaller total and regional brain volumes in Minnesota, but not elsewhere. CONCLUSIONS:Long-term past PM exposure in was not associated with markers of cerebrovascular disease. Higher long-term past PM exposures were associated with smaller deep-gray volumes overall, and higher PM2.5 exposures were associated with smaller brain volumes in the Minnesota site. Further work is needed to understand the sources of heterogeneity across sites. https://doi.org/10.1289/EHP2152.",2018-02-16 +27485445,NRGC: a novel referential genome compression algorithm.,"

Motivation

Next-generation sequencing techniques produce millions to billions of short reads. The procedure is not only very cost effective but also can be done in laboratory environment. The state-of-the-art sequence assemblers then construct the whole genomic sequence from these reads. Current cutting edge computing technology makes it possible to build genomic sequences from the billions of reads within a minimal cost and time. As a consequence, we see an explosion of biological sequences in recent times. In turn, the cost of storing the sequences in physical memory or transmitting them over the internet is becoming a major bottleneck for research and future medical applications. Data compression techniques are one of the most important remedies in this context. We are in need of suitable data compression algorithms that can exploit the inherent structure of biological sequences. Although standard data compression algorithms are prevalent, they are not suitable to compress biological sequencing data effectively. In this article, we propose a novel referential genome compression algorithm (NRGC) to effectively and efficiently compress the genomic sequences.

Results

We have done rigorous experiments to evaluate NRGC by taking a set of real human genomes. The simulation results show that our algorithm is indeed an effective genome compression algorithm that performs better than the best-known algorithms in most of the cases. Compression and decompression times are also very impressive.

Availability and implementation

The implementations are freely available for non-commercial purposes. They can be downloaded from: http://www.engr.uconn.edu/~rajasek/NRGC.zip CONTACT: rajasek@engr.uconn.edu.",2016-08-02 +25589875,"An occurence records database of French Guiana harvestmen (Arachnida, Opiliones).","This dataset provides information on specimens of harvestmen (Arthropoda, Arachnida, Opiliones) collected in French Guiana. Field collections have been initiated in 2012 within the framework of the CEnter for the Study of Biodiversity in Amazonia (CEBA: www.labex-ceba.fr/en/). This dataset is a work in progress.  Occurrences are recorded in an online database stored at the EDB laboratory after each collecting trip and the dataset is updated on a monthly basis. Voucher specimens and associated DNA are also stored at the EDB laboratory until deposition in natural history Museums. The latest version of the dataset is publicly and freely accessible through our Integrated Publication Toolkit at http://130.120.204.55:8080/ipt/resource.do?r=harvestmen_of_french_guiana or through the Global Biodiversity Information Facility data portal at http://www.gbif.org/dataset/3c9e2297-bf20-4827-928e-7c7eefd9432c.",2014-12-25 +27840039,Step-by-step guide to building an inexpensive 3D printed motorized positioning stage for automated high-content screening microscopy.,"High-content screening microscopy relies on automation infrastructure that is typically proprietary, non-customizable, costly and requires a high level of skill to use and maintain. The increasing availability of rapid prototyping technology makes it possible to quickly engineer alternatives to conventional automation infrastructure that are low-cost and user-friendly. Here, we describe a 3D printed inexpensive open source and scalable motorized positioning stage for automated high-content screening microscopy and provide detailed step-by-step instructions to re-building the device, including a comprehensive parts list, 3D design files in STEP (Standard for the Exchange of Product model data) and STL (Standard Tessellation Language) format, electronic circuits and wiring diagrams as well as software code. System assembly including 3D printing requires approx. 30h. The fully assembled device is light-weight (1.1kg), small (33×20×8cm) and extremely low-cost (approx. EUR 250). We describe positioning characteristics of the stage, including spatial resolution, accuracy and repeatability, compare imaging data generated with our device to data obtained using a commercially available microplate reader, demonstrate its suitability to high-content microscopy in 96-well high-throughput screening format and validate its applicability to automated functional Cl-- and Ca2+-imaging with recombinant HEK293 cells as a model system. A time-lapse video of the stage during operation and as part of a custom assembled screening robot can be found at https://vimeo.com/158813199.",2016-11-02 +22360713,Two anatomic resources of canine pelvic limb muscles based on CT and MRI.,"Advances in magnetic resonance (MR) imaging and three-dimensional (3D) modeling software provide the tools necessary to create sophisticated, interactive anatomic resources that can assist in the interpretation of MR images of extremities, and learning the structure and function of limb musculature. Modeling provides advantages over dissection or consultation of print atlases because of the associated speed, flexibility, 3D nature, and elimination of superimposed arrows and labels. Our goals were to create a diagnostic atlas of pelvic limb muscles that will facilitate interpretation of MR images of patients with muscle injury and to create a 3D model of the canine pelvic limb musculature to facilitate anatomic learning. To create these resources, we used structural segmentation of MR images, a process that groups image pixels into anatomically meaningful regions. The Diagnostic Atlas is an interactive, multiplanar, web-based MR atlas of the canine pelvic limb musculature that was created by manually segmenting clinically analogous MR sequences. Higher resolution volumetric MR and computed tomography (CT) data were segmented into separately labeled volumes of data and then transformed into a multilayered 3D computer model. The 3D Model serves as a resource for students of gross anatomy, encouraging integrative learning with its highly interactive and selective display capabilities. For clinicians, the 3D Model also serves to bridge the gap between topographic and tomographic anatomy, displaying both formats alongside, or even superimposed over each other. Both projects are hosted on an open-access website, http://3dvetanatomy.ncsu.edu/",2012-02-24 +24271396,The European Bioinformatics Institute's data resources 2014.,"Molecular Biology has been at the heart of the 'big data' revolution from its very beginning, and the need for access to biological data is a common thread running from the 1965 publication of Dayhoff's 'Atlas of Protein Sequence and Structure' through the Human Genome Project in the late 1990s and early 2000s to today's population-scale sequencing initiatives. The European Bioinformatics Institute (EMBL-EBI; http://www.ebi.ac.uk) is one of three organizations worldwide that provides free access to comprehensive, integrated molecular data sets. Here, we summarize the principles underpinning the development of these public resources and provide an overview of EMBL-EBI's database collection to complement the reviews of individual databases provided elsewhere in this issue.",2013-11-23 +28520707,"Using Molecular Characterization to Support Investigations of Aquatic Facility-Associated Outbreaks of Cryptosporidiosis - Alabama, Arizona, and Ohio, 2016.","Cryptosporidiosis is a nationally notifiable gastrointestinal illness caused by parasitic protozoa of the genus Cryptosporidium, which can cause profuse, watery diarrhea that can last up to 2-3 weeks in immunocompetent patients and can lead to life-threatening wasting and malabsorption in immunocompromised patients. Fecal-oral transmission of Cryptosporidium oocysts, the parasite's infectious life stage, occurs via ingestion of contaminated recreational water, drinking water, or food, or following contact with infected persons or animals, particularly preweaned bovine calves (1). The typical incubation period is 2-10 days. Since 2004, the annual incidence of nationally notified cryptosporidiosis has risen approximately threefold in the United States (1). Cryptosporidium also has emerged as the leading etiology of nationally notified recreational water-associated outbreaks, particularly those associated with aquatic facilities (i.e., physical places that contain one or more aquatic venues [e.g., pools] and support infrastructure) (2). As of February 24, 2017, a total of 13 (54%) of 24 states reporting provisional data detected at least 32 aquatic facility-associated cryptosporidiosis outbreaks in 2016. In comparison, 20 such outbreaks were voluntarily reported to CDC via the National Outbreak Reporting System for 2011, 16 for 2012, 13 for 2013, and 16 for 2014. This report highlights cryptosporidiosis outbreaks associated with aquatic facilities in three states (Alabama, Arizona, and Ohio) in 2016. This report also illustrates the use of CryptoNet, the first U.S. molecularly based surveillance system for a parasitic disease, to further elucidate Cryptosporidium chains of transmission and cryptosporidiosis epidemiology. CryptoNet data can be used to optimize evidence-based prevention strategies. Not swimming when ill with diarrhea is key to preventing and controlling aquatic facility-associated cryptosporidiosis outbreaks (https://www.cdc.gov/healthywater/swimming/swimmers/steps-healthy-swimming.html).",2017-05-19 +24574115,BioPlat: a software for human cancer biomarker discovery.,"

Summary

Development of effective tools such as oligo-microarrays and next-generation sequencing methods for monitoring gene expression on a large scale has resulted in the discovery of gene signatures with prognostic/predictive value in various malignant neoplastic diseases. However, with the exponential growth of gene expression databases, biologists are faced with the challenge of extracting useful information from these repositories. Here, we present a software package, BioPlat (Biomarkers Platform), which allows biologists to identify novel prognostic and predictive cancer biomarkers based on the data mining of gene expression signatures and gene expression profiling databases. BioPlat has been designed as an easy-to-use and flexible desktop software application, which provides a set of analytical tools related to data extraction, preprocessing, filtering, gene expression signature calculation, in silico validation, feature selection and annotation that leverage the integration and reuse of gene expression signatures in the context of follow-up data.

Availability and implementation

BioPlat is a platform-independent software implemented in Java and supported on GNU/Linux and MS Windows, which is freely available for download at http://www.cancergenomics.net.",2014-02-25 +27605103,stringMLST: a fast k-mer based tool for multilocus sequence typing.,"Rapid and accurate identification of the sequence type (ST) of bacterial pathogens is critical for epidemiological surveillance and outbreak control. Cheaper and faster next-generation sequencing (NGS) technologies have taken preference over the traditional method of amplicon sequencing for multilocus sequence typing (MLST). But data generated by NGS platforms necessitate quality control, genome assembly and sequence similarity searching before an isolate's ST can be determined. These are computationally intensive and time consuming steps, which are not ideally suited for real-time molecular epidemiology. Here, we present stringMLST, an assembly- and alignment-free, lightweight, platform-independent program capable of rapidly typing bacterial isolates directly from raw sequence reads. The program implements a simple hash table data structure to find exact matches between short sequence strings (k-mers) and an MLST allele library. We show that stringMLST is more accurate, and order of magnitude faster, than its contemporary genome-based ST detection tools.

Availability and implementation

The source code and documentations are available at http://jordan.biology.gatech.edu/page/software/stringMLST CONTACT: lavanya.rishishwar@gatech.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-07 +28610787,SpolSimilaritySearch - A web tool to compare and search similarities between spoligotypes of Mycobacterium tuberculosis complex.,"Spoligotyping is one of the most commonly used polymerase chain reaction (PCR)-based methods for identification and study of genetic diversity of Mycobacterium tuberculosis complex (MTBC). Despite its known limitations if used alone, the methodology is particularly useful when used in combination with other methods such as mycobacterial interspersed repetitive units - variable number of tandem DNA repeats (MIRU-VNTRs). At a worldwide scale, spoligotyping has allowed identification of information on 103,856 MTBC isolates (corresponding to 98049 clustered strains plus 5807 unique isolates from 169 countries of patient origin) contained within the SITVIT2 proprietary database of the Institut Pasteur de la Guadeloupe. The SpolSimilaritySearch web-tool described herein (available at: http://www.pasteur-guadeloupe.fr:8081/SpolSimilaritySearch) incorporates a similarity search algorithm allowing users to get a complete overview of similar spoligotype patterns (with information on presence or absence of 43 spacers) in the aforementioned worldwide database. This tool allows one to analyze spread and evolutionary patterns of MTBC by comparing similar spoligotype patterns, to distinguish between widespread, specific and/or confined patterns, as well as to pinpoint patterns with large deleted blocks, which play an intriguing role in the genetic epidemiology of M. tuberculosis. Finally, the SpolSimilaritySearch tool also provides with the country distribution patterns for each queried spoligotype.",2017-04-20 +28427473,Intestinal Behçet and Crohn's disease: two sides of the same coin.,"Behçet's disease (BD) and Crohn's disease (CD) are chronic immune-mediated, inflammatory disorders affecting many different systems (joints, skin, eyes, gastrointestinal and biliary tracts). Both disorders have fluctuating courses and when gastrointestinal symptoms are prevalent, differential diagnosis can be difficult. BD involves the gastrointestinal tract in 10-15% of cases with localized lesions in the ileocecal region. The clinical picture is heterogeneous with various clusters of disease expression. CD is a chronic inflammatory disorder, which can affect any part of the intestinal tract, as well as extra-intestinal tissue. Factors that contribute towards the pathogenesis of both disease include the host's genetic profile, and immune system, and environmental factors such as the gut microbiota. The aim of this manuscript is to provide a narrative review of clinical features of BD and CD, highlighting the importance of differential diagnosis and therapeutic approach, especially in the presence of gastrointestinal involvement. A comprehensive search of published literature using the Pubmed ( http://www.ncbi.nlm.nih.gov/pubmed/ ) database was carried out to identify all articles published in English from 1999 to October 2016, using 4 key terms: ""Behçet Disease"", ""Intestinal Behçet's Disease"", ""Crohn's Disease"" and"" Inflammatory Bowel Disease"".",2017-04-20 +28425725,"""Defining 'peerness' in peer-delivered health and wellness interventions for serious mental illness"": Correction to Muralidharan et al. (2017).","Reports an error in ""Defining ""peerness"" in peer-delivered health and wellness interventions for serious mental illness"": Response to letter to the editor"" by Jody Silver and Patricia B. Nemec (Psychiatric Rehabilitation Journal, 2017[Mar], Vol 40[1], 116). The article was mislabeled as Editorial and should be a Comment. The Response to Letter to the Editor section should be a Reply and now has its own http://dx.doi.org/10.1037/ h0101580. (The following abstract of the original article appeared in record 2017-13876-001.) Replies to comments by Muralidharan et al (see record 2017-13255-009) on the original article by Silver and Nemec (see record 2016-43088-001). The original authors thank the commentators for raising additional questions regarding ""peerness."" They were honored that their paper prompted this thought and effort to submit comments. (PsycINFO Database Record",2017-04-20 +27761508,Dataset of the Botrytis cinerea phosphoproteome induced by different plant-based elicitors.,"Phosphorylation is one of the main post-translational modification (PTM) involved in signaling network in the ascomycete Botrytis cinerea, one of the most relevant phytopathogenic fungus. The data presented in this article provided a differential mass spectrometry-based analysis of the phosphoproteome of B. cinerea under two different phenotypical conditions induced by the use of two different elicitors: glucose and deproteinized Tomate Cell Walls (TCW). A total 1138 and 733 phosphoproteins were identified for glucose and TCW culture conditions respectively. Raw data are deposited at the ProteomeXchange Consortium via the PRIDE partner repository with the data set identifier (PRIDE: http://www.ebi.ac.uk/pride/archive/projects/PXD003099). Further interpretation and discussion of these data are provided in our research article entitled ""Phosphoproteome analysis of B.cinerea in response to different plant-based elicitors"" (Liñeiro et al., 2016) [1].",2016-04-22 +28728141,Estimates of Soil Ingestion in a Population of Chinese Children.,"

Background

China's soil pollution poses serious health risks. However, data regarding the soil ingestion rate (SIR) of the Chinese population, which is critical to assessing associated health risks, are lacking.

Objectives

We estimated soil ingestion of 177 Chinese children from Guangdong, Hubei, and Gansu Provinces.

Methods

We conducted this investigation by employing a tracer mass-balance method. We collected a duplicate of all food consumed and all feces and urine excreted on 1 d (n=153) and over 3 consecutive d (n=24), as well as soil samples from play areas and drinking-water samples. We analyzed concentrations of the tracer elements Al, Ba, Ce, Mn, Sc, Ti, V, and Y in these samples using ICP-AES and ICP-MS and estimated the SIR for each subject.

Results

The estimated SIR data based on each tracer element were characterized by a skewed distribution, as well as higher inter-tracer and inter-subject variation, with several outliers. After removing the outliers, daily SIR median (range) values in milligrams per day were Al, 27.8 (−42.0 to 257.3); Ba, 36.5 (−230.3 to 412.7); Ce, 35.3 (−21.2 to 225.8); Mn, 146.6 (−1259.4 to 1827.7); Sc, 54.8 (−4.5 to 292.0); Ti, 36.7 (−233.7 to 687.0); V, 92.1 (10.4 to 308.0); and Y, 59.1 (−18.4 to 283.0). Daily SIR median/95th percentile (range) values based on the best tracer method (BTM) were 51.7/216.6 (−9.5 to 297.6) mg/d.

Conclusions

Based on the BTM, recommended SIR values for the general population of Chinese children (2.5 to 12 years old) are 52 mg/d for the central tendency and 217 mg/d for the upper percentile. We did not differentiate between outside soil and indoor dust. Considering the lower concentration of tracer elements in indoor dust than outside soil, actual soil and dust ingestion rates could be higher. https://doi.org/10.1289/EHP930.",2017-07-05 +27466625,Joint sparse canonical correlation analysis for detecting differential imaging genetics modules.,"

Motivation

Imaging genetics combines brain imaging and genetic information to identify the relationships between genetic variants and brain activities. When the data samples belong to different classes (e.g. disease status), the relationships may exhibit class-specific patterns that can be used to facilitate the understanding of a disease. Conventional approaches often perform separate analysis on each class and report the differences, but ignore important shared patterns.

Results

In this paper, we develop a multivariate method to analyze the differential dependency across multiple classes. We propose a joint sparse canonical correlation analysis method, which uses a generalized fused lasso penalty to jointly estimate multiple pairs of canonical vectors with both shared and class-specific patterns. Using a data fusion approach, the method is able to detect differentially correlated modules effectively and efficiently. The results from simulation studies demonstrate its higher accuracy in discovering both common and differential canonical correlations compared to conventional sparse CCA. Using a schizophrenia dataset with 92 cases and 116 controls including a single nucleotide polymorphism (SNP) array and functional magnetic resonance imaging data, the proposed method reveals a set of distinct SNP-voxel interaction modules for the schizophrenia patients, which are verified to be both statistically and biologically significant.

Availability and implementation

The Matlab code is available at https://sites.google.com/site/jianfang86/JSCCA CONTACT: wyp@tulane.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-27 +28484602,PPIMpred: a web server for high-throughput screening of small molecules targeting protein-protein interaction.,"PPIMpred is a web server that allows high-throughput screening of small molecules for targeting specific protein-protein interactions, namely Mdm2/P53, Bcl2/Bak and c-Myc/Max. Three different kernels of support vector machine (SVM), namely, linear, polynomial and radial basis function (RBF), and two other machine learning techniques including Naive Bayes and Random Forest were used to train the models. A fivefold cross-validation technique was used to measure the performance of these classifiers. The RBF kernel of SVM outperformed and/or was comparable with all other methods with accuracy values of 83%, 79% and 90% for Mdm2/P53, Bcl2/Bak and c-Myc/Max, respectively. About 80% of the predicted SVM scores of training/testing datasets from Mdm2/P53 and Bcl2/Bak have significant IC50 values and docking scores. The proposed models achieved an accuracy of 66-90% with blind sets. The three mentioned (Mdm2/P53, Bcl2/Bak and c-Myc/Max) proposed models were screened in a large dataset of 265 242 small chemicals from National Cancer Institute open database. To further realize the robustness of this approach, hits with high and random SVM scores were used for molecular docking in AutoDock Vina wherein the molecules with high and random predicted SVM scores yielded moderately significant docking scores (p-values < 0.1). In addition to the above-mentioned classification scheme, this web server also allows users to get the structural and chemical similarities with known chemical modulators or drug-like molecules based on Tanimoto coefficient similarity search algorithm. PPIMpred is freely available at http://bicresources.jcbose.ac.in/ssaha4/PPIMpred/.",2017-04-19 +27153589,Inferring gene targets of drugs and chemical compounds from gene expression profiles.,"

Motivation

Finding genes which are directly perturbed or targeted by drugs is of great interest and importance in drug discovery. Several network filtering methods have been created to predict the gene targets of drugs from gene expression data based on an ordinary differential equation model of the gene regulatory network (GRN). A critical step in these methods involves inferring the GRN from the expression data, which is a very challenging problem on its own. In addition, existing network filtering methods require computationally intensive parameter tuning or expression data from experiments with known genetic perturbations or both.

Results

We developed a method called DeltaNet for the identification of drug targets from gene expression data. Here, the gene target predictions were directly inferred from the data without a separate step of GRN inference. DeltaNet formulation led to solving an underdetermined linear regression problem, for which we employed least angle regression (DeltaNet-LAR) or LASSO regularization (DeltaNet-LASSO). The predictions using DeltaNet for expression data of Escherichia coli, yeast, fruit fly and human were significantly more accurate than those using network filtering methods, namely mode of action by network identification (MNI) and sparse simultaneous equation model (SSEM). Furthermore, DeltaNet using LAR did not require any parameter tuning and could provide computational speed-up over existing methods.

Conclusion

DeltaNet is a robust and numerically efficient tool for identifying gene perturbations from gene expression data. Importantly, the method requires little to no expert supervision, while providing accurate gene target predictions.

Availability and implementation

DeltaNet is available on http://www.cabsel.ethz.ch/tools/DeltaNet

Contact

rudi.gunawan@chem.ethz.ch

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-18 +22057919,Re-annotation of two hyperthermophilic archaea Pyrococcus abyssi GE5 and Pyrococcus furiosus DSM 3638.,"Pyrococcus abyssi GE5 (P. aby) and Pyrococcus furiosus DSM 3638 (P. fur) are two model hyperthermophilic archaea. However, their annotations in public databases are unsatisfactory. In this article, the two genomes were re-annotated according to the following steps. (i) All ""hypothetical genes"" in the original annotation were re-identified based on the Z-curve method, and some of them were recognized as non-coding open reading frames (ORFs). Evidence showed that the recognized non-coding ORFs were highly unlikely to encode proteins. (ii) The translation initiation sites (TISs) of all the annotated genes were re-located, and more than 10% of the TISs were shifted to 5'-upstream or 3'-downstream regions. (iii) The functions of the refined ""hypothetical genes"" were predicted using sequence alignment tools, more than 200 originally annotated ""hypothetical genes"" in either of the two hyperthermophiles were assigned functions. A large number of these functions have reference support or experimentally characterized homologues. All the refined information will serve as a valuable resource for research on P. aby and P. fur, which may be helpful in the exploration of thermal adaptation mechanisms. The complete re-annotation files of P. aby and P. fur are available at http://211.69.128.148/download/ .",2011-11-06 +21958208,Experimental annotation of the human pathogen Histoplasma capsulatum transcribed regions using high-resolution tiling arrays.,"

Background

The fungal pathogen Histoplasma capsulatum is thought to be the most common cause of fungal respiratory infections in immunocompetent humans, yet little is known about its biology. Here we provide the first genome-wide studies to experimentally validate its genome annotation. A functional interrogation of the Histoplasma genome provides critical support for continued investigation into the biology and pathogenesis of H. capsulatum and related fungi.

Results

We employed a three-pronged approach to provide a functional annotation for the H. capsulatum G217B strain. First, we probed high-density tiling arrays with labeled cDNAs from cells grown under diverse conditions. These data defined 6,172 transcriptionally active regions (TARs), providing validation of 6,008 gene predictions. Interestingly, 22% of these predictions showed evidence of anti-sense transcription. Additionally, we detected transcription of 264 novel genes not present in the original gene predictions. To further enrich our analysis, we incorporated expression data from whole-genome oligonucleotide microarrays. These expression data included profiling under growth conditions that were not represented in the tiling experiment, and validated an additional 2,249 gene predictions. Finally, we compared the G217B gene predictions to other available fungal genomes, and observed that an additional 254 gene predictions had an ortholog in a different fungal species, suggesting that they represent genuine coding sequences.

Conclusions

These analyses yielded a high confidence set of validated gene predictions for H. capsulatum. The transcript sets resulting from this study are a valuable resource for further experimental characterization of this ubiquitous fungal pathogen. The data is available for interactive exploration at http://histo.ucsf.edu.",2011-09-29 +28549079,Computational modeling of immune system of the fish for a more effective vaccination in aquaculture.,

Motivation

A computational model equipped with the main immunological features of the sea bass (Dicentrarchus labrax L.) immune system was used to predict more effective vaccination in fish. The performance of the model was evaluated by using the results of two in vivo vaccinations trials against L. anguillarum and P. damselae.

Results

Tests were performed to select the appropriate doses of vaccine and infectious bacteria to set up the model. Simulation outputs were compared with the specific antibody production and the expression of BcR and TcR gene transcripts in spleen. The model has shown a good ability to be used in sea bass and could be implemented for different routes of vaccine administration even with more than two pathogens. The model confirms the suitability of in silico methods to optimize vaccine doses and the immune response to them. This model could be applied to other species to optimize the design of new vaccination treatments of fish in aquaculture.

Availability and implementation

The method is available at http://www.iac.cnr.it/∼filippo/c-immsim/.

Contact

nromano@unitus.it.

Supplementary information

Supplementary data are available at Bioinformatics online.,2017-10-01 +28056764,Temperature-dependent sRNA transcriptome of the Lyme disease spirochete.,"

Background

Transmission of Borrelia burgdorferi from its tick vector to a vertebrate host requires extensive reprogramming of gene expression. Small regulatory RNAs (sRNA) have emerged in the last decade as important regulators of bacterial gene expression. Despite the widespread observation of sRNA-mediated gene regulation, only one sRNA has been characterized in the Lyme disease spirochete B. burgdorferi. We employed an sRNA-specific deep-sequencing approach to identify the small RNA transcriptome of B. burgdorferi at both 23 °C and 37 °C, which mimics in vitro the transmission from the tick vector to the mammalian host.

Results

We identified over 1000 sRNAs in B. burgdorferi revealing large amounts of antisense and intragenic sRNAs, as well as characteristic intergenic and 5' UTR-associated sRNAs. A large fraction of the novel sRNAs (43%) are temperature-dependent and differentially expressed at the two temperatures, suggesting a role in gene regulation for adaptation during transmission. In addition, many genes important for maintenance of Borrelia during its enzootic cycle are associated with antisense RNAs or 5' UTR sRNAs. RNA-seq data were validated for twenty-two of the sRNAs via Northern blot analyses.

Conclusions

Our study demonstrates that sRNAs are abundant and differentially expressed by environmental conditions suggesting that gene regulation via sRNAs is a common mechanism utilized in B. burgdorferi. In addition, the identification of antisense and intragenic sRNAs impacts the broadly used loss-of-function genetic approach used to study gene function and increases the coding potential of a small genome. To facilitate access to the analyzed RNA-seq data we have set-up a website at http://www.cibiv.at/~niko/bbdb/ that includes a UCSC browser track hub. By clicking on the respective link, researchers can interactively inspect the data in the UCSC genome browser (Kent et al., Genome Res 12:996-1006, 2002).",2017-01-05 +29045062,Tokyo Guidelines 2018: flowchart for the management of acute cholecystitis.,"We propose a new flowchart for the treatment of acute cholecystitis (AC) in the Tokyo Guidelines 2018 (TG18). Grade III AC was not indicated for straightforward laparoscopic cholecystectomy (Lap-C). Following analysis of subsequent clinical investigations and drawing on Big Data in particular, TG18 proposes that some Grade III AC can be treated by Lap-C when performed at advanced centers with specialized surgeons experienced in this procedure and for patients that satisfy certain strict criteria. For Grade I, TG18 recommends early Lap-C if the patients meet the criteria of Charlson comorbidity index (CCI) ≤5 and American Society of Anesthesiologists physical status classification (ASA-PS) ≤2. For Grade II AC, if patients meet the criteria of CCI ≤5 and ASA-PS ≤2, TG18 recommends early Lap-C performed by experienced surgeons; and if not, after medical treatment and/or gallbladder drainage, Lap-C would be indicated. TG18 proposes that Lap-C is indicated in Grade III patients with strict criteria. These are that the patients have favorable organ system failure, and negative predictive factors, who meet the criteria of CCI ≤3 and ASA-PS ≤2 and who are being treated at an advanced center (where experienced surgeons practice). If the patient is not considered suitable for early surgery, TG18 recommends early/urgent biliary drainage followed by delayed Lap-C once the patient's overall condition has improved. Free full articles and mobile app of TG18 are available at: http://www.jshbps.jp/modules/en/index.php?content_id=47. Related clinical questions and references are also included.",2017-12-20 +29158538,PinAPL-Py: A comprehensive web-application for the analysis of CRISPR/Cas9 screens.,"Large-scale genetic screens using CRISPR/Cas9 technology have emerged as a major tool for functional genomics. With its increased popularity, experimental biologists frequently acquire large sequencing datasets for which they often do not have an easy analysis option. While a few bioinformatic tools have been developed for this purpose, their utility is still hindered either due to limited functionality or the requirement of bioinformatic expertise. To make sequencing data analysis of CRISPR/Cas9 screens more accessible to a wide range of scientists, we developed a Platform-independent Analysis of Pooled Screens using Python (PinAPL-Py), which is operated as an intuitive web-service. PinAPL-Py implements state-of-the-art tools and statistical models, assembled in a comprehensive workflow covering sequence quality control, automated sgRNA sequence extraction, alignment, sgRNA enrichment/depletion analysis and gene ranking. The workflow is set up to use a variety of popular sgRNA libraries as well as custom libraries that can be easily uploaded. Various analysis options are offered, suitable to analyze a large variety of CRISPR/Cas9 screening experiments. Analysis output includes ranked lists of sgRNAs and genes, and publication-ready plots. PinAPL-Py helps to advance genome-wide screening efforts by combining comprehensive functionality with user-friendly implementation. PinAPL-Py is freely accessible at http://pinapl-py.ucsd.edu with instructions and test datasets.",2017-11-20 +28414562,A Review of Cochrane Systematic Reviews of Interventions Relevant to Orthoptic Practice.,"

Aim

To present an overview of the range of systematic reviews on intervention trials pertinent to orthoptic practice, produced by the Cochrane Eyes and Vision group (CEV).

Methods

We searched the 2016 Cochrane Library database (31.03.2016) to identify completed reviews and protocols of direct relevance to orthoptic practice. These reviews are currently completed and published, available on www.thecochranelibrary.com (free to UK health employees) or via the CEV website (http://eyes.cochrane.org/) .

Results

We found 27 completed CEV reviews across the topics of strabismus, amblyopia, refractive errors, and low vision. Seven completed CEV protocols addressed topics of strabismus, amblyopia, refractive errors, low vision, and screening. We found 3 completed Cochrane Stroke reviews addressing visual field loss, eye movement impairment, and age-related vision loss.

Conclusions

The systematic review process presents an important opportunity for any clinician to contribute to the establishment of reliable, evidence-based orthoptic practice. Each review has an abstract and plain language summary that many non-clinicians find useful, followed by a full copy of the review (background, objectives, methods, results, discussion) with a conclusion section that is divided into implications for practice and implications for research. The current reviews provide patients/parents/carers with information about various different conditions and treatment options, but also provide clinicians with a summary of the available evidence on interventions, to use as a guide for both clinical practice and future research planning. The reviews identified in this overview highlight the evidence available for effective interventions for strabismus, amblyopia, refractive errors, and low vision or stroke rehabilitation as well as the gaps in the evidence base. Thus, a demand exists for future robust, randomized, controlled trials of such interventions of importance in orthoptic practice.",2017-04-17 +28837067,IonchanPred 2.0: A Tool to Predict Ion Channels and Their Types. ,"Ion channels (IC) are ion-permeable protein pores located in the lipid membranes of all cells. Different ion channels have unique functions in different biological processes. Due to the rapid development of high-throughput mass spectrometry, proteomic data are rapidly accumulating and provide us an opportunity to systematically investigate and predict ion channels and their types. In this paper, we constructed a support vector machine (SVM)-based model to quickly predict ion channels and their types. By considering the residue sequence information and their physicochemical properties, a novel feature-extracted method which combined dipeptide composition with the physicochemical correlation between two residues was employed. A feature selection strategy was used to improve the performance of the model. Comparison results of in jackknife cross-validation demonstrated that our method was superior to other methods for predicting ion channels and their types. Based on the model, we built a web server called IonchanPred which can be freely accessed from http://lin.uestc.edu.cn/server/IonchanPredv2.0.",2017-08-24 +29021969,*K-means and cluster models for cancer signatures.,"We present *K-means clustering algorithm and source code by expanding statistical clustering methods applied in https://ssrn.com/abstract=2802753 to quantitative finance. *K-means is statistically deterministic without specifying initial centers, etc. We apply *K-means to extracting cancer signatures from genome data without using nonnegative matrix factorization (NMF). *K-means' computational cost is a fraction of NMF's. Using 1389 published samples for 14 cancer types, we find that 3 cancers (liver cancer, lung cancer and renal cell carcinoma) stand out and do not have cluster-like structures. Two clusters have especially high within-cluster correlations with 11 other cancers indicating common underlying structures. Our approach opens a novel avenue for studying such structures. *K-means is universal and can be applied in other fields. We discuss some potential applications in quantitative finance.",2017-08-02 +24136997,IMG/M 4 version of the integrated metagenome comparative analysis system.,"IMG/M (http://img.jgi.doe.gov/m) provides support for comparative analysis of microbial community aggregate genomes (metagenomes) in the context of a comprehensive set of reference genomes from all three domains of life, as well as plasmids, viruses and genome fragments. IMG/M's data content and analytical tools have expanded continuously since its first version was released in 2007. Since the last report published in the 2012 NAR Database Issue, IMG/M's database architecture, annotation and data integration pipelines and analysis tools have been extended to copewith the rapid growth in the number and size of metagenome data sets handled by the system. IMG/M data marts provide support for the analysis of publicly available genomes, expert review of metagenome annotations (IMG/M ER: http://img.jgi.doe.gov/mer) and Human Microbiome Project (HMP)-specific metagenome samples (IMG/M HMP: http://img.jgi.doe.gov/imgm_hmp).",2013-10-16 +27207881,DIANA-mirExTra v2.0: Uncovering microRNAs and transcription factors with crucial roles in NGS expression data.,"Differential expression analysis (DEA) is one of the main instruments utilized for revealing molecular mechanisms in pathological and physiological conditions. DIANA-mirExTra v2.0 (http://www.microrna.gr/mirextrav2) performs a combined DEA of mRNAs and microRNAs (miRNAs) to uncover miRNAs and transcription factors (TFs) playing important regulatory roles between two investigated states. The web server uses as input miRNA/RNA-Seq read count data sets that can be uploaded for analysis. Users can combine their data with 350 small-RNA-Seq and 65 RNA-Seq in-house analyzed libraries which are provided by DIANA-mirExTra v2.0.The web server utilizes miRNA:mRNA, TF:mRNA and TF:miRNA interactions derived from extensive experimental data sets. More than 450 000 miRNA interactions and 2 000 000 TF binding sites from specific or high-throughput techniques have been incorporated, while accurate miRNA TSS annotation is obtained from microTSS experimental/in silico framework. These comprehensive data sets enable users to perform analyses based solely on experimentally supported information and to uncover central regulators within sequencing data: miRNAs controlling mRNAs and TFs regulating mRNA or miRNA expression. The server also supports predicted miRNA:gene interactions from DIANA-microT-CDS for 4 species (human, mouse, nematode and fruit fly). DIANA-mirExTra v2.0 has an intuitive user interface and is freely available to all users without any login requirement.",2016-05-20 +25879163,KPP: KEGG Pathway Painter.,"

Background

High-throughput technologies became common tools to decipher genome-wide changes of gene expression (GE) patterns. Functional analysis of GE patterns is a daunting task as it requires often recourse to the public repositories of biological knowledge. On the other hand, in many cases researcher's inquiry can be served by a comprehensive glimpse. The KEGG PATHWAY database is a compilation of manually verified maps of biological interactions represented by the complete set of pathways related to signal transduction and other cellular processes. Rapid mapping of the differentially expressed genes to the KEGG pathways may provide an idea about the functional relevance of the gene lists corresponding to the high-throughput expression data.

Results

Here we present a web based graphic tool KEGG Pathway Painter (KPP). KPP paints pathways from the KEGG database using large sets of the candidate genes accompanied by ""overexpressed"" or ""underexpressed"" marks, for example, those generated by microarrays or miRNA profilings.

Conclusion

KPP provides fast and comprehensive visualization of the global GE changes by consolidating a list of the color-coded candidate genes into the KEGG pathways. KPP is freely available and can be accessed at http://web.cos.gmu.edu/~gmanyam/kegg/.",2015-04-15 +27933532,Precise Network Modeling of Systems Genetics Data Using the Bayesian Network Webserver.,"The Bayesian Network Webserver (BNW, http://compbio.uthsc.edu/BNW ) is an integrated platform for Bayesian network modeling of biological datasets. It provides a web-based network modeling environment that seamlessly integrates advanced algorithms for probabilistic causal modeling and reasoning with Bayesian networks. BNW is designed for precise modeling of relatively small networks that contain less than 20 nodes. The structure learning algorithms used by BNW guarantee the discovery of the best (most probable) network structure given the data. To facilitate network modeling across multiple biological levels, BNW provides a very flexible interface that allows users to assign network nodes into different tiers and define the relationships between and within the tiers. This function is particularly useful for modeling systems genetics datasets that often consist of multiscalar heterogeneous genotype-to-phenotype data. BNW enables users to, within seconds or minutes, go from having a simply formatted input file containing a dataset to using a network model to make predictions about the interactions between variables and the potential effects of experimental interventions. In this chapter, we will introduce the functions of BNW and show how to model systems genetics datasets with BNW.",2017-01-01 +32847152,A Qualitative Exploration of Nurses' Information-Gathering Behaviors Prior to Decision Support Tool Design.,"

Background

Large and readily-available clinical datasets combined with improved computational resources have permitted the exploration of many new research and clinical questions. Predictive analytics, especially for adverse events, has surfaced as one promising application of big data, and although statistical results can be highly accurate, little is known about how nurses perceive this new information and how they might act upon it.

Objectives

Within the context of recognizing patients at risk for cardiopulmonary arrest, this study explored the possibility of incorporating predictive analytics into clinical workflows by identifying nurses' current information gathering activities and perceptions of probability-related terms.

Methods

We used a qualitative description approach for data collection and analysis in order to understand participants' information gathering behaviors and term perceptions in their own words. We conducted one-on-one interviews and a focus group with a total of 10 direct care bedside nurses and 8 charge nurses.

Results

Participants collected information from many sources that we categorized as: Patient, Other People, and Technology. The process by which they gathered information was conducted in an inconsistent order and differed by role. Major themes comprised: (a) attempts to find information from additional sources during uncertainty, (b) always being prepared for the worst-case scenario, and (c) the desire to review more detailed predictions. Use of the words probability, risk, and uncertainty were inconsistent.

Conclusions

In an effort to successfully incorporate predictive analytics into clinical workflows, we have described nurses' perceived work practices for gathering information related to clinical deterioration and nurses' beliefs related to probability-based information. Findings from our study could guide design and implementation efforts of predictive analytics in the clinical arena.Jeffery AD, Kennedy B, Dietrich MS, Mion LC, Novak LL. A Qualitative Exploration of Nurses' Information-Gathering Behaviors Prior to Decision Support Tool Design. Appl Clin Inform 2017; 8: 763-778 https://doi.org/10.4338/ACI-2017-02-RA-0033.",2017-07-01 +25085083,AFAL: a web service for profiling amino acids surrounding ligands in proteins.,"With advancements in crystallographic technology and the increasing wealth of information populating structural databases, there is an increasing need for prediction tools based on spatial information that will support the characterization of proteins and protein-ligand interactions. Herein, a new web service is presented termed amino acid frequency around ligand (AFAL) for determining amino acids type and frequencies surrounding ligands within proteins deposited in the Protein Data Bank and for assessing the atoms and atom-ligand distances involved in each interaction (availability: http://structuralbio.utalca.cl/AFAL/index.html ). AFAL allows the user to define a wide variety of filtering criteria (protein family, source organism, resolution, sequence redundancy and distance) in order to uncover trends and evolutionary differences in amino acid preferences that define interactions with particular ligands. Results obtained from AFAL provide valuable statistical information about amino acids that may be responsible for establishing particular ligand-protein interactions. The analysis will enable investigators to compare ligand-binding sites of different proteins and to uncover general as well as specific interaction patterns from existing data. Such patterns can be used subsequently to predict ligand binding in proteins that currently have no structural information and to refine the interpretation of existing protein models. The application of AFAL is illustrated by the analysis of proteins interacting with adenosine-5'-triphosphate.",2014-08-02 +28419194,DMINDA 2.0: integrated and systematic views of regulatory DNA motif identification and analyses.,"

Motivation

Motif identification and analyses are important and have been long-standing computational problems in bioinformatics. Substantial efforts have been made in this field during the past several decades. However, the lack of intuitive and integrative web servers impedes the progress of making effective use of emerging algorithms and tools.

Results

Here we present an integrated web server, DMINDA 2.0, which contains: (i) five motif prediction and analyses algorithms, including a phylogenetic footprinting framework; (ii) 2125 species with complete genomes to support the above five functions, covering animals, plants and bacteria and (iii) bacterial regulon prediction and visualization.

Availability and implementation

DMINDA 2.0 is freely available at http://bmbl.sdstate.edu/DMINDA2.

Contact

qin.ma@sdstate.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +27440201,msVolcano: A flexible web application for visualizing quantitative proteomics data.,"

Unlabelled

We introduce msVolcano, a web application for the visualization of label-free mass spectrometric data. It is optimized for the output of the MaxQuant data analysis pipeline of interactomics experiments and generates volcano plots with lists of interacting proteins. The user can optimize the cutoff values to find meaningful significant interactors for the tagged protein of interest. Optionally, stoichiometries of interacting proteins can be calculated. Several customization options are provided to the user for flexibility, and publication-quality outputs can also be downloaded (tabular and graphical).

Availability

msVolcano is implemented in R Statistical language using Shiny. It can be accessed freely at http://projects.biotec.tu-dresden.de/msVolcano/.",2016-09-01 +27479329,TRIC: an automated alignment strategy for reproducible protein quantification in targeted proteomics.,"Next-generation mass spectrometric (MS) techniques such as SWATH-MS have substantially increased the throughput and reproducibility of proteomic analysis, but ensuring consistent quantification of thousands of peptide analytes across multiple liquid chromatography-tandem MS (LC-MS/MS) runs remains a challenging and laborious manual process. To produce highly consistent and quantitatively accurate proteomics data matrices in an automated fashion, we developed TRIC (http://proteomics.ethz.ch/tric/), a software tool that utilizes fragment-ion data to perform cross-run alignment, consistent peak-picking and quantification for high-throughput targeted proteomics. TRIC reduced the identification error compared to a state-of-the-art SWATH-MS analysis without alignment by more than threefold at constant recall while correcting for highly nonlinear chromatographic effects. On a pulsed-SILAC experiment performed on human induced pluripotent stem cells, TRIC was able to automatically align and quantify thousands of light and heavy isotopic peak groups. Thus, TRIC fills a gap in the pipeline for automated analysis of massively parallel targeted proteomics data sets.",2016-08-01 +26902267,An interactive web-based application for Comprehensive Analysis of RNAi-screen Data.,"RNAi screens are widely used in functional genomics. Although the screen data can be susceptible to a number of experimental biases, many of these can be corrected by computational analysis. For this purpose, here we have developed a web-based platform for integrated analysis and visualization of RNAi screen data named CARD (for Comprehensive Analysis of RNAi Data; available at https://card.niaid.nih.gov). CARD allows the user to seamlessly carry out sequential steps in a rigorous data analysis workflow, including normalization, off-target analysis, integration of gene expression data, optimal thresholds for hit selection and network/pathway analysis. To evaluate the utility of CARD, we describe analysis of three genome-scale siRNA screens and demonstrate: (i) a significant increase both in selection of subsequently validated hits and in rejection of false positives, (ii) an increased overlap of hits from independent screens of the same biology and (iii) insight to microRNA (miRNA) activity based on siRNA seed enrichment.",2016-02-23 +26852142,Impact of data resolution on three-dimensional structure inference methods.,"

Background

Assays that are capable of detecting genome-wide chromatin interactions have produced massive amount of data and led to great understanding of the chromosomal three-dimensional (3D) structure. As technology becomes more sophisticated, higher-and-higher resolution data are being produced, going from the initial 1 Megabases (Mb) resolution to the current 10 Kilobases (Kb) or even 1 Kb resolution. The availability of genome-wide interaction data necessitates development of analytical methods to recover the underlying 3D spatial chromatin structure, but challenges abound. Most of the methods were proposed for analyzing data at low resolution (1 Mb). Their behaviors are thus unknown for higher resolution data. For such data, one of the key features is the high proportion of ""0"" contact counts among all available data, in other words, the excess of zeros.

Results

To address the issue of excess of zeros, in this paper, we propose a truncated Random effect EXpression (tREX) method that can handle data at various resolutions. We then assess the performance of tREX and a number of leading existing methods for recovering the underlying chromatin 3D structure. This was accomplished by creating in-silico data to mimic multiple levels of resolution and submit the methods to a ""stress test"". Finally, we applied tREX and the comparison methods to a Hi-C dataset for which FISH measurements are available to evaluate estimation accuracy.

Conclusion

The proposed tREX method achieves consistently good performance in all 30 simulated settings considered. It is not only robust to resolution level and underlying parameters, but also insensitive to model misspecification. This conclusion is based on observations made in terms of 3D structure estimation accuracy and preservation of topologically associated domains. Application of the methods to the human lymphoblastoid cell line data on chromosomes 14 and 22 further substantiates the superior performance of tREX: the constructed 3D structure from tREX is consistent with the FISH measurements, and the corresponding distances predicted by tREX have higher correlation with the FISH measurements than any of the comparison methods.

Software

An open-source R-package is available at http://www.stat.osu.edu/~statgen/Software/tRex.",2016-02-06 +28886594,"Elemental Sulfur Use and Associations with Pediatric Lung Function and Respiratory Symptoms in an Agricultural Community (California, USA).","

Background

Elemental sulfur, ""the oldest of all pesticides,"" is the most heavily used agricultural pesticide in California and Europe. Sulfur is considered relatively safe and is used in both conventional and organic farming systems. Adverse respiratory effects have been reported in applicators and animals, but the effect on residential populations, and especially on children living in proximity to fields treated with elemental sulfur, is not known.

Objectives

We evaluated associations between residential proximity to elemental sulfur applications and respiratory symptoms and spirometry of children living in an agricultural community.

Methods

Participants were enrolled in the CHAMACOS longitudinal birth cohort. We collected respiratory symptomatology for 347 children at 7 y of age and measured spirometry on a subset of 279. Of these, estimations of proximity to sulfur application and relevant covariate data were available for 237 and 205 children for whom we had symptomatology information and FEV1 measurements, respectively. Data from the California Pesticide Use Reporting System were used to estimate the amount of elemental sulfur applied within 0.5, 1, and 3km of a child's residence during the week, month, and 12 mo prior to pulmonary evaluation. Regression models controlled for maternal smoking during pregnancy; season of birth; PM2.5 (particulate matter ≤2.5mm in aerodynamic diameter); breast feeding duration; child's sex, age, and height; technician; and other covariates.

Results

Adverse associations with respiratory outcomes were found for sulfur applications within 0.5- and 1-km radii. Specifically, asthma medication usage and respiratory symptoms increased [OR=3.51; 95% confidence interval (CI): 1.50, 8.23, p=0.004; OR=2.09; 95% CI: 1.27, 3.46, p=0.004, respectively] and FEV1 decreased (β=−0.143; 95% CI: −0.248, −0.039, p=0.008) per 10-fold increase in the estimated amount of sulfur used within 1 km of child residence during the year prior to pulmonary evaluation.

Conclusions

This study suggests that elemental sulfur use, allowed in both organic and conventional farming, in close proximity to residential areas, may adversely affect children's respiratory health. https://doi.org/10.1289/EHP528.",2017-08-10 +27870045,Unprecedented remote sensing data over King and Rim megafires in the Sierra Nevada Mountains of California.,"Megafires have lasting social, ecological, and economic impacts and are increasing in the western contiguous United States. Because of their infrequent nature, there is a limited sample of megafires to investigate their unique behavior, drivers, and relationship to forest management practices. One approach is to characterize critical information pre-, during, and post-fire using remote sensing. In August 2013, the Rim Fire burned 104,131 ha and in September 2014, the King Fire burned 39,545 ha. Both fires occurred in California's Sierra Nevada. The areas burned by these fires were fortuitously surveyed by airborne campaigns, which provided the most recent remote sensing technologies not currently available from satellite. Technologies include an imaging spectrometer spanning the visible to shortwave infrared (0.38-2.5 μm), a multispectral, high-spatial resolution thermal infrared (3.5-13 μm) spectroradiometer, and Light Detection and Ranging that provide spatial resolutions of pixels from 1 × 1 m to 35 × 35 m. Because of the unique information inherently derived from these technologies before the fires, the areas were subsequently surveyed after the fires. We processed and provide free dissemination of these airborne datasets as products of surface reflectance, spectral metrics and forest structural metrics ( http://dx.doi.org/10.3334/ORNLDAAC/1288). These data products provide a unique opportunity to study relationships among and between remote sensing observations and fuel and fire characteristics (e.g., fuel type, condition, structure, and fire severity). The novelty of these data is not only in the unprecedented types of information available from them before, during, and after two megafires, but also in the synergistic use of multiple state of the art technologies for characterizing the environment. The synergy of these data can provide novel information that can improve maps of fuel type, structure, abundance, and condition that may improve predictions of megafire behavior and effects, thus aiding management before, during, and after such events. Key questions that these data could address include: What drives, extinguishes, and results from megafires? How does megafire behavior relate to fire and fuel management? How does the size and severity of a megafire affect the ecological recovery of the system?",2016-11-01 +28165140,COFFEE: control-free noninvasive fetal chromosomal examination using maternal plasma DNA.,"

Objective

The aim of this study is to develop an approach for analyzing plasma DNA sequencing data for noninvasive fetal chromosomal aneuploidy testing that does not require the comparison with control samples or a series of selected genomic regions.

Results

We developed the control-free noninvasive fetal chromosomal examination (COFFEE) algorithm by utilizing the size differences between the fetally derived and maternally derived DNA molecules in maternal plasma. We applied COFFEE on three datasets generated in different experimental settings. COFFEE showed 100% accuracy in trisomy 21 testing on these datasets. In contrast, samples analyzed using an existing control-based z-score method would introduce a false-positive result because of batch-to-batch variation, when the tested samples were analyzed using control samples from other batches. We believe that COFFEE is useful for enhancing the cost-effectiveness of noninvasive fetal chromosomal aneuploidy testing particularly in laboratories with small caseloads. Source code and testing datasets for COFFEE are available for download at http://www.cuhk.edu.hk/med/cpy/Research/COFFEE/.

Conclusion

Control-free noninvasive fetal chromosomal examination is demonstrated to be a versatile data analysis approach and could enhance the application of noninvasive fetal chromosomal aneuploidy detection. © 2017 John Wiley & Sons, Ltd.",2017-02-17 +28405910,Omics analysis of acetic acid tolerance in Saccharomyces cerevisiae.,"Acetic acid is an inhibitor in industrial processes such as wine making and bioethanol production from cellulosic hydrolysate. It causes energy depletion, inhibition of metabolic enzyme activity, growth arrest and ethanol productivity losses in Saccharomyces cerevisiae. Therefore, understanding the mechanisms of the yeast responses to acetic acid stress is essential for improving acetic acid tolerance and ethanol production. Although 329 genes associated with acetic acid tolerance have been identified in the Saccharomyces genome and included in the database ( http://www.yeastgenome.org/observable/resistance_to_acetic_acid/overview ), the cellular mechanistic responses to acetic acid remain unclear in this organism. Post-genomic approaches such as transcriptomics, proteomics, metabolomics and chemogenomics are being applied to yeast and are providing insight into the mechanisms and interactions of genes, proteins and other components that together determine complex quantitative phenotypic traits such as acetic acid tolerance. This review focuses on these omics approaches in the response to acetic acid in S. cerevisiae. Additionally, several novel strains with improved acetic acid tolerance have been engineered by modifying key genes, and the application of these strains and recently acquired knowledge to industrial processes is also discussed.",2017-04-12 +28402608,A Fast Algorithm to Compute Conical Pockets in Proteins. Application to the Structural Characterization of γ-Carbonic Anhydrases. ,"Some major proteins families, such as carbonic anhydrases (CAs), have a conical cavity at the active site. No algorithm was available to compute conical cavities, so we needed to design one. The fast algorithm we designed let us show on a set of 717 CAs extracted from the PDB database that γ-CAs are characterized by active site cavity cone angles significantly larger than those of α-CAs and β-CAs: the generatrix-axis angles are greater than 60° for the γ-CAs while they are smaller than 50° for the other CAs. Free binaries of the CONICA software implementing the algorithm are available through a software repository at http://petitjeanmichel.free.fr/itoweb.petitjean.freeware.html.",2017-04-12 +22383585,A database of annotated promoters of genes associated with common respiratory and related diseases.,"Many genes have been implicated in the pathogenesis of common respiratory and related diseases (RRDs), yet the underlying mechanisms are largely unknown. Differential gene expression patterns in diseased and healthy individuals suggest that RRDs affect or are affected by modified transcription regulation programs. It is thus crucial to characterize implicated genes in terms of transcriptional regulation. For this purpose, we conducted a promoter analysis of genes associated with 11 common RRDs including allergic rhinitis, asthma, bronchiectasis, bronchiolitis, bronchitis, chronic obstructive pulmonary disease, cystic fibrosis, emphysema, eczema, psoriasis, and urticaria, many of which are thought to be genetically related. The objective of the present study was to obtain deeper insight into the transcriptional regulation of these disease-associated genes by annotating their promoter regions with transcription factors (TFs) and TF binding sites (TFBSs). We discovered many TFs that are significantly enriched in the target disease groups including associations that have been documented in the literature. We also identified a number of putative TFs/TFBSs that appear to be novel. The results of our analysis are provided in an online database that is freely accessible to researchers at http://www.respiratorygenomics.com. Promoter-associated TFBS information and related genomic features, such as histone modification sites, microsatellites, CpG islands, and SNPs, are graphically summarized in the database. Users can compare and contrast underlying mechanisms of specific RRDs relative to candidate genes, TFs, gene ontology terms, micro-RNAs, and biological pathways for the conduct of metaanalyses. This database represents a novel, useful resource for RRD researchers.",2012-03-01 +29409535,DMTO: a realistic ontology for standard diabetes mellitus treatment.,"BACKGROUND:Treatment of type 2 diabetes mellitus (T2DM) is a complex problem. A clinical decision support system (CDSS) based on massive and distributed electronic health record data can facilitate the automation of this process and enhance its accuracy. The most important component of any CDSS is its knowledge base. This knowledge base can be formulated using ontologies. The formal description logic of ontology supports the inference of hidden knowledge. Building a complete, coherent, consistent, interoperable, and sharable ontology is a challenge. RESULTS:This paper introduces the first version of the newly constructed Diabetes Mellitus Treatment Ontology (DMTO) as a basis for shared-semantics, domain-specific, standard, machine-readable, and interoperable knowledge relevant to T2DM treatment. It is a comprehensive ontology and provides the highest coverage and the most complete picture of coded knowledge about T2DM patients' current conditions, previous profiles, and T2DM-related aspects, including complications, symptoms, lab tests, interactions, treatment plan (TP) frameworks, and glucose-related diseases and medications. It adheres to the design principles recommended by the Open Biomedical Ontologies Foundry and is based on ontological realism that follows the principles of the Basic Formal Ontology and the Ontology for General Medical Science. DMTO is implemented under Protégé 5.0 in Web Ontology Language (OWL) 2 format and is publicly available through the National Center for Biomedical Ontology's BioPortal at http://bioportal.bioontology.org/ontologies/DMTO . The current version of DMTO includes more than 10,700 classes, 277 relations, 39,425 annotations, 214 semantic rules, and 62,974 axioms. We provide proof of concept for this approach to modeling TPs. CONCLUSION:The ontology is able to collect and analyze most features of T2DM as well as customize chronic TPs with the most appropriate drugs, foods, and physical exercises. DMTO is ready to be used as a knowledge base for semantically intelligent and distributed CDSS systems.",2018-02-06 +29492101,InforMD: a new initiative to raise public awareness about breast density.,"On a mammogram, breast density (also known as mammographic density) is shown as white and bright regions and is associated with reduced sensitivity in cancer detection and increased breast cancer risk. However, many Australian women are unaware of the significance of breast density as it is not routinely reported or discussed. In order to address this lack of knowledge, Australian breast cancer researchers with expertise in mammographic density formed the InforMD alliance (INformation FORum on Mammographic Density) in 2016. The alliance is working to raise awareness of breast density with the goal of improving breast cancer diagnosis and health outcomes for women. The InforMD website (www.InforMD.org.au) was launched in October 2016, coinciding with a major nationwide public awareness campaign by the alliance during breast cancer awareness month. The website contains unbiased, accurate, updated information on breast density. The website also provides summaries of major research articles in layperson language, recent news items related to breast density, links to relevant information for health professionals, events, and feature articles. Members of the public and health professionals can also subscribe for news updates. The interactive online Forum section facilitates discussion between health professionals, scientists and members of the public. To increase online traffic to the website, Facebook (www.facebook.com/BeInforMD) and Twitter (https://twitter.com/BeInforMD_) pages were launched in December 2016. Since its launch, InforMD has generated considerable interest. The public awareness campaign reached over 7 million Australians through a combination of newspaper, TV, radio, and online news. The website has attracted 13,058 unique visitors and 30,353 page views (data as of 19/12/2017). Breast cancer researchers have a significant role to play in disseminating information to the public on breast density. A combination of mainstream and social media, together with a well-informed and updated website, has laid the groundwork for the InforMD alliance to reach a wide audience.",2018-02-06 +25977294,DIANA-miRPath v3.0: deciphering microRNA function with experimental support.,"The functional characterization of miRNAs is still an open challenge. Here, we present DIANA-miRPath v3.0 (http://www.microrna.gr/miRPathv3) an online software suite dedicated to the assessment of miRNA regulatory roles and the identification of controlled pathways. The new miRPath web server renders possible the functional annotation of one or more miRNAs using standard (hypergeometric distributions), unbiased empirical distributions and/or meta-analysis statistics. DIANA-miRPath v3.0 database and functionality have been significantly extended to support all analyses for KEGG molecular pathways, as well as multiple slices of Gene Ontology (GO) in seven species (Homo sapiens, Mus musculus, Rattus norvegicus, Drosophila melanogaster, Caenorhabditis elegans, Gallus gallus and Danio rerio). Importantly, more than 600 000 experimentally supported miRNA targets from DIANA-TarBase v7.0 have been incorporated into the new schema. Users of DIANA-miRPath v3.0 can harness this wealth of information and substitute or combine the available in silico predicted targets from DIANA-microT-CDS and/or TargetScan v6.2 with high quality experimentally supported interactions. A unique feature of DIANA-miRPath v3.0 is its redesigned Reverse Search module, which enables users to identify and visualize miRNAs significantly controlling selected pathways or belonging to specific GO categories based on in silico or experimental data. DIANA-miRPath v3.0 is freely available to all users without any login requirement.",2015-05-14 +26807157,The development of models to predict melting and pyrolysis point data associated with several hundred thousand compounds mined from PATENTS.,"

Background

Melting point (MP) is an important property in regards to the solubility of chemical compounds. Its prediction from chemical structure remains a highly challenging task for quantitative structure-activity relationship studies. Success in this area of research critically depends on the availability of high quality MP data as well as accurate chemical structure representations in order to develop models. Currently, available datasets for MP predictions have been limited to around 50k molecules while lots more data are routinely generated following the synthesis of novel materials. Significant amounts of MP data are freely available within the patent literature and, if it were available in the appropriate form, could potentially be used to develop predictive models.

Results

We have developed a pipeline for the automated extraction and annotation of chemical data from published PATENTS. Almost 300,000 data points have been collected and used to develop models to predict melting and pyrolysis (decomposition) points using tools available on the OCHEM modeling platform (http://ochem.eu). A number of technical challenges were simultaneously solved to develop models based on these data. These included the handing of sparse data matrices with >200,000,000,000 entries and parallel calculations using 32 × 6 cores per task using 13 descriptor sets totaling more than 700,000 descriptors. We showed that models developed using data collected from PATENTS had similar or better prediction accuracy compared to the highly curated data used in previous publications. The separation of data for chemicals that decomposed rather than melting, from compounds that did undergo a normal melting transition, was performed and models for both pyrolysis and MPs were developed. The accuracy of the consensus MP models for molecules from the drug-like region of chemical space was similar to their estimated experimental accuracy, 32 °C. Last but not least, important structural features related to the pyrolysis of chemicals were identified, and a model to predict whether a compound will decompose instead of melting was developed.

Conclusions

We have shown that automated tools for the analysis of chemical information have reached a mature stage allowing for the extraction and collection of high quality data to enable the development of structure-activity relationship models. The developed models and data are publicly available at http://ochem.eu/article/99826.",2016-01-22 +27794557,FractBias: a graphical tool for assessing fractionation bias following polyploidy.,"

Summary

Following polyploidy events, genomes undergo massive reduction in gene content through a process known as fractionation. Importantly, the fractionation process is not always random, and a bias as to which homeologous chromosome retains or loses more genes can be observed in some species. The process of characterizing whole genome fractionation requires identifying syntenic regions across genomes followed by post-processing of those syntenic datasets to identify and plot gene retention patterns. We have developed a tool, FractBias, to calculate and visualize gene retention and fractionation patterns across whole genomes. Through integration with SynMap and its parent platform CoGe, assembled genomes are pre-loaded and available for analysis, as well as letting researchers integrate their own data with security options to keep them private or make them publicly available.

Availability and implementation

FractBias is freely available as a web application at https://genomevolution.org/CoGe/SynMap.pl . The software is open source (MIT license) and executable with Python 2.7 or iPython notebook, and available on GitHub ( https://goo.gl/PaAtqy ). Documentation for FractBias is available on CoGepedia ( https://goo.gl/ou9dt6 ).

Contact

ericlyons@email.arizona.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +28938573,iATC-mHyb: a hybrid multi-label classifier for predicting the classification of anatomical therapeutic chemicals.,"Recommended by the World Health Organization (WHO), drug compounds have been classified into 14 main ATC (Anatomical Therapeutic Chemical) classes according to their therapeutic and chemical characteristics. Given an uncharacterized compound, can we develop a computational method to fast identify which ATC class or classes it belongs to? The information thus obtained will timely help adjusting our focus and selection, significantly speeding up the drug development process. But this problem is by no means an easy one since some drug compounds may belong to two or more than two ATC classes. To address this problem, using the DO (Drug Ontology) approach based on the ChEBI (Chemical Entities of Biological Interest) database, we developed a predictor called iATC-mDO. Subsequently, hybridizing it with an existing drug ATC classifier, we constructed a predictor called iATC-mHyb. It has been demonstrated by the rigorous cross-validation and from five different measuring angles that iATC-mHyb is remarkably superior to the best existing predictor in identifying the ATC classes for drug compounds. To convenience most experimental scientists, a user-friendly web-server for iATC-mHyd has been established at http://www.jci-bioinfo.cn/iATC-mHyb, by which users can easily get their desired results without the need to go through the complicated mathematical equations involved.",2017-04-11 +25069839,ELM: enhanced lowest common ancestor based method for detecting a pathogenic virus from a large sequence dataset.,"

Background

Emerging viral diseases, most of which are caused by the transmission of viruses from animals to humans, pose a threat to public health. Discovering pathogenic viruses through surveillance is the key to preparedness for this potential threat. Next generation sequencing (NGS) helps us to identify viruses without the design of a specific PCR primer. The major task in NGS data analysis is taxonomic identification for vast numbers of sequences. However, taxonomic identification via a BLAST search against all the known sequences is a computational bottleneck.

Description

Here we propose an enhanced lowest-common-ancestor based method (ELM) to effectively identify viruses from massive sequence data. To reduce the computational cost, ELM uses a customized database composed only of viral sequences for the BLAST search. At the same time, ELM adopts a novel criterion to suppress the rise in false positive assignments caused by the small database. As a result, identification by ELM is more than 1,000 times faster than the conventional methods without loss of accuracy.

Conclusions

We anticipate that ELM will contribute to direct diagnosis of viral infections. The web server and the customized viral database are freely available at http://bioinformatics.czc.hokudai.ac.jp/ELM/.",2014-07-28 +28186229,DiagnoProt: a tool for discovery of new molecules by mass spectrometry.,"

Motivation

Around 75% of all mass spectra remain unidentified by widely adopted proteomic strategies. We present DiagnoProt, an integrated computational environment that can efficiently cluster millions of spectra and use machine learning to shortlist high-quality unidentified mass spectra that are discriminative of different biological conditions.

Results

We exemplify the use of DiagnoProt by shortlisting 4366 high-quality unidentified tandem mass spectra that are discriminative of different types of the Aspergillus fungus.

Availability and implementation

DiagnoProt, a demonstration video and a user tutorial are available at http://patternlabforproteomics.org/diagnoprot .

Contact

andrerfsilva@gmail.com or paulo@pcarvalho.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +28651634,Data- and expert-driven rule induction and filtering framework for functional interpretation and description of gene sets.,"

Background

High-throughput methods in molecular biology provided researchers with abundance of experimental data that need to be interpreted in order to understand the experimental results. Manual methods of functional gene/protein group interpretation are expensive and time-consuming; therefore, there is a need to develop new efficient data mining methods and bioinformatics tools that could support the expert in the process of functional analysis of experimental results.

Results

In this study, we propose a comprehensive framework for the induction of logical rules in the form of combinations of Gene Ontology (GO) terms for functional interpretation of gene sets. Within the framework, we present four approaches: the fully automated method of rule induction without filtering, rule induction method with filtering, expert-driven rule filtering method based on additive utility functions, and expert-driven rule induction method based on the so-called seed or expert terms - the GO terms of special interest which should be included into the description. These GO terms usually describe some processes or pathways of particular interest, which are related to the experiment that is being performed. During the rule induction and filtering processes such seed terms are used as a base on which the description is build.

Conclusion

We compare the descriptions obtained with different algorithms of rule induction and filtering and show that a filtering step is required to reduce the number of rules in the output set so that they could be analyzed by a human expert. However, filtering may remove information from the output rule set which is potentially interesting for the expert. Therefore, in the study, we present two methods that involve interaction with the expert during the process of rule induction. Both of them are able to reduce the number of rules, but only in the case of the method based on seed terms, each of the created rule includes expert terms in combination with the other terms. Further analysis of such combinations may provide new knowledge about biological processes and their combination with other pathways related to genes described by the rules. A suite of Matlab scripts that provide the functionality of a comprehensive framework for the rule induction and filtering presented in this study is available free of charge at: http://rulego.polsl.pl/framework .",2017-06-26 +25627402,Crystal structures of apo-DszC and FMN-bound DszC from Rhodococcus erythropolis D-1.,"

Unlabelled

The release of SO2 from petroleum products derived from crude oil, which contains sulfur compounds such as dibenzothiophene (DBT), leads to air pollution. The '4S' metabolic pathway catalyzes the sequential conversion of DBT to 2-hydroxybiphenyl via three enzymes encoded by the dsz operon in several bacterial species. DszC (DBT monooxygenase), from Rhodococcus erythropolis D-1 is involved in the first two steps of the '4S' pathway. Here, we determined the first crystal structure of FMN-bound DszC, and found that two distinct conformations occur in the loop region (residues 131-142) adjacent to the active site. On the basis of the DszC-FMN structure and the previously reported apo structures of DszC homologs, the binding site for DBT and DBT sulfoxide is proposed.

Database

The atomic coordinates and structure factors for apo-DszC (PDB code: 3X0X) and DszC-FMN (PDB code: 3X0Y) have been deposited in the Protein Data Bank (http://www.rcsb.org).",2015-02-11 +24270788,Lynx: a database and knowledge extraction engine for integrative medicine.,"We have developed Lynx (http://lynx.ci.uchicago.edu)--a web-based database and a knowledge extraction engine, supporting annotation and analysis of experimental data and generation of weighted hypotheses on molecular mechanisms contributing to human phenotypes and disorders of interest. Its underlying knowledge base (LynxKB) integrates various classes of information from >35 public databases and private collections, as well as manually curated data from our group and collaborators. Lynx provides advanced search capabilities and a variety of algorithms for enrichment analysis and network-based gene prioritization to assist the user in extracting meaningful knowledge from LynxKB and experimental data, whereas its service-oriented architecture provides public access to LynxKB and its analytical tools via user-friendly web services and interfaces.",2013-11-21 +25517703,"Septris: a novel, mobile, online, simulation game that improves sepsis recognition and management.","

Problem

Annually affecting over 18 million people worldwide, sepsis is common, deadly, and costly. Despite significant effort by the Surviving Sepsis Campaign and other initiatives, sepsis remains underrecognized and undertreated.

Approach

Research indicates that educating providers may improve sepsis diagnosis and treatment; thus, the Stanford School of Medicine has developed a mobile-accessible, case-based, online game entitled Septris (http://med.stanford.edu/septris/). Septris, launched online worldwide in December 2011, takes an innovative approach to teaching early sepsis identification and evidence-based management. The free gaming platform leverages the massive expansion over the past decade of smartphones and the popularity of noneducational gaming.The authors sought to assess the game's dissemination and its impact on learners' sepsis-related knowledge, skills, and attitudes. In 2012, the authors trained Stanford pregraduate (clerkship) and postgraduate (resident) medical learners (n = 156) in sepsis diagnosis and evidence-based practices via 20 minutes of self-directed game play with Septris. The authors administered pre- and posttests.

Outcomes

By October 2014, Septris garnered over 61,000 visits worldwide. After playing Septris, both pre- and postgraduate groups improved their knowledge on written testing in recognizing and managing sepsis (P < .001). Retrospective self-reporting on their ability to identify and manage sepsis also improved (P < .001). Over 85% of learners reported that they would or would maybe recommend Septris.

Next steps

Future evaluation of Septris should assess its effectiveness among different providers, resource settings, and cultures; generate information about how different learners make clinical decisions; and evaluate the correlation of game scores with sepsis knowledge.",2015-02-01 +26561344,COGNIZER: A Framework for Functional Annotation of Metagenomic Datasets.,"

Background

Recent advances in sequencing technologies have resulted in an unprecedented increase in the number of metagenomes that are being sequenced world-wide. Given their volume, functional annotation of metagenomic sequence datasets requires specialized computational tools/techniques. In spite of having high accuracy, existing stand-alone functional annotation tools necessitate end-users to perform compute-intensive homology searches of metagenomic datasets against ""multiple"" databases prior to functional analysis. Although, web-based functional annotation servers address to some extent the problem of availability of compute resources, uploading and analyzing huge volumes of sequence data on a shared public web-service has its own set of limitations. In this study, we present COGNIZER, a comprehensive stand-alone annotation framework which enables end-users to functionally annotate sequences constituting metagenomic datasets. The COGNIZER framework provides multiple workflow options. A subset of these options employs a novel directed-search strategy which helps in reducing the overall compute requirements for end-users. The COGNIZER framework includes a cross-mapping database that enables end-users to simultaneously derive/infer KEGG, Pfam, GO, and SEED subsystem information from the COG annotations.

Results

Validation experiments performed with real-world metagenomes and metatranscriptomes, generated using diverse sequencing technologies, indicate that the novel directed-search strategy employed in COGNIZER helps in reducing the compute requirements without significant loss in annotation accuracy. A comparison of COGNIZER's results with pre-computed benchmark values indicate the reliability of the cross-mapping database employed in COGNIZER.

Conclusion

The COGNIZER framework is capable of comprehensively annotating any metagenomic or metatranscriptomic dataset from varied sequencing platforms in functional terms. Multiple search options in COGNIZER provide end-users the flexibility of choosing a homology search protocol based on available compute resources. The cross-mapping database in COGNIZER is of high utility since it enables end-users to directly infer/derive KEGG, Pfam, GO, and SEED subsystem annotations from COG categorizations. Furthermore, availability of COGNIZER as a stand-alone scalable implementation is expected to make it a valuable annotation tool in the field of metagenomic research.

Availability and implementation

A Linux implementation of COGNIZER is freely available for download from the following links: http://metagenomics.atc.tcs.com/cognizer, https://metagenomics.atc.tcs.com/function/cognizer.",2015-11-11 +27659451,PSSV: a novel pattern-based probabilistic approach for somatic structural variation identification.,"

Motivation

Whole genome DNA-sequencing (WGS) of paired tumor and normal samples has enabled the identification of somatic DNA changes in an unprecedented detail. Large-scale identification of somatic structural variations (SVs) for a specific cancer type will deepen our understanding of driver mechanisms in cancer progression. However, the limited number of WGS samples, insufficient read coverage, and the impurity of tumor samples that contain normal and neoplastic cells, limit reliable and accurate detection of somatic SVs.

Results

We present a novel pattern-based probabilistic approach, PSSV, to identify somatic structural variations from WGS data. PSSV features a mixture model with hidden states representing different mutation patterns; PSSV can thus differentiate heterozygous and homozygous SVs in each sample, enabling the identification of those somatic SVs with heterozygous mutations in normal samples and homozygous mutations in tumor samples. Simulation studies demonstrate that PSSV outperforms existing tools. PSSV has been successfully applied to breast cancer data to identify somatic SVs of key factors associated with breast cancer development.

Availability and implementation

An R package of PSSV is available at http://www.cbil.ece.vt.edu/software.htm CONTACT: xuan@vt.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-21 +26922258,Combined Circumferential and Longitudinal Left Ventricular Systolic Dysfunction in Patients with Rheumatoid Arthritis without Overt Cardiac Disease.,"

Background

Patients with rheumatoid arthritis have an increased risk for cardiovascular disease. Because of accelerated atherosclerosis and changes in left ventricular (LV) geometry, circumferential and longitudinal (C&L) LV systolic dysfunction (LVSD) may be impaired in these patients despite preserved LV ejection fraction. The aim of this study was to determine the prevalence of and factors associated with combined C&L LVSD in patients with rheumatoid arthritis.

Methods

One hundred ninety-eight outpatients with rheumatoid arthritis without overt cardiac disease were prospectively analyzed from January through June 2014 and compared with 198 matched control subjects. C&L systolic function was evaluated by stress-corrected midwall shortening (sc-MS) and tissue Doppler mitral annular peak systolic velocity (S'). Combined C&L LVSD was defined if sc-MS was <86.5% and S' was <9.0 cm/sec (the 10th percentiles of sc-MS and S' derived in 132 healthy subjects).

Results

Combined C&L LVSD was detected in 56 patients (28%) and was associated with LV mass (odds ratio, 1.03; 95% CI, 1.01-1.06; P = .04) and concentric LV geometry (odds ratio, 2.76; 95% CI, 1.07-7.15; P = .03). By multiple logistic regression analysis, rheumatoid arthritis emerged as an independent predictor of combined C&L LVSD (odds ratio, 2.57; 95% CI, 1.06-6.25). The relationship between sc-MS and S' was statistically significant in the subgroup of 142 patients without combined C&L LVSD (r = 0.40, F < 0.001), having the best fitting by a linear function (sc-MS = 58.1 + 3.34 × peak S'; r(2) = 0.19, P < .0001), absent in patients with combined C&L LVSD.

Conclusions

Combined C&L LVSD is detectable in about one fourth of patients with asymptomatic rheumatoid arthritis and is associated with LV concentric remodeling and hypertrophy. Rheumatoid arthritis predicts this worrisome condition, which may explain the increased risk for cardiovascular events in these patients.

Notice of clarification

The aim of this ""notice of clarification"" is to analyze in brief the similarities and to underline the differences between the current article (defined as ""paper J"") and a separate article entitled ""Prevalence and Factors Associated with Subclinical Left Ventricular Systolic Dysfunction Evaluated by Mid-Wall Mechanics in Rheumatoid Arthritis"" (defined as ""paper E""), which was written several months before paper J, and recently accepted for publication by the journal ""Echocardiography"" (Cioffi et al. http://dx.doi.org/10.1111/echo.13186). We wish to explain more clearly how the manuscript described in ""paper J"" relates to the ""paper E"" and the context in which it ought to be considered. Data in both papers were derived from the same prospective database, so that it would appear questionable if the number of the enrolled patients and/or their clinical/laboratory/echocardiographic characteristics were different. Accordingly, both papers reported that 198 patients with rheumatoid arthritis (RA) were considered and their characteristics were identical, due to the fact that they were the same subjects (this circumstance is common and mandatory among all studies in which the patients were recruited from the same database). These are the similarities between the papers. In paper E, which was written several months before paper J, we focused on the prevalence and factors associated with impaired circumferential left ventricular (LV) systolic function measured as mid-wall shortening (corrected for circumferential end-systolic stress). We found that 110 patients (56% of the whole population) demonstrated this feature. Thus, these 110 patients were the object of the study described in paper E, in which we specifically analyzed the factors associated with the impairment of stress-corrected mid-wall shortening (sc-MS). The conclusions of that paper were: (i) subclinical LV systolic dysfunction (LVSD) is detectable in more than half RA population without overt cardiac disease as measured by sc-MS, (ii) RA per se is associated with LVSD, and (iii) in RA patients only LV relative wall thickness was associated with impaired sc-MS based upon multivariate logistic regression analysis. Differently, in the paper J, we focused on the prevalence and factors associated with combined impairment of circumferential and longitudinal shortening (C&L) in 198 asymptomatic patients with RA. We found that 56 patients (28% of the whole population) presented this feature. Thus, these 56 patients were analyzed in detail in this study, as well as the factors associated with the combined impairment of C&L shortening. In paper J, we evaluated sc-MS as an indicator of circumferential systolic LV shortening, and we also determined the average of tissue Doppler measures of maximal systolic mitral annular velocity at four different sampling sites ( S') as an indicator of longitudinal LV systolic shortening. This approach clearly demonstrates that in paper J, we analyzed data deriving from the tissue Doppler analysis, which were not taken into any consideration in paper E. The investigation described in paper J made evident several original and clinically relevant findings. In patients with RA: (i) the condition of combined C&L left ventricular systolic dysfunction (LVSD) is frequent; (ii) these patients have comparable clinical and laboratory characteristics with those without combined C&L LVSD, but exhibit remarkable concentric LV geometry and increased LV mass, a phenotype that can be consider a model of compensated asymptomatic chronic heart failure; (iii) RA is an independent factor associated with combined C&L LVSD; (iv) no relationship between indexes of circumferential and longitudinal function exists in patients with combined C&L LVSD, while it is statistically significant and positive when the subgroup of patients without combined C&L LVSD is considered, having the best fitting by a linear function. All these findings are unique to the paper J and are not presented (they could not have been) in paper E. It appears clear that, starting from the same 198 patients included in the database, different sub-groups of patients were selected and analyzed in the two papers (they had different echocardiographic characteristics) and, consequently, different factors emerged by the statistical analyses as covariates associated with the different phenotypes of LVSD considered. Importantly, both papers E and J had a very long gestation because all reviewers for the different journals found several and important issues that merited to be addressed: a lot of changes were proposed and much additional information was required, particularly by the reviewers of paper E. Considering this context, it emerges that although paper E was written well before paper J, the two manuscripts were accepted at the same time (we received the letters of acceptance within a couple of weeks). Thus, the uncertainty about the fate of both manuscripts made it very difficult (if not impossible) to cite either of them in the other one and, afterward, we just did not think about this point anymore. Of note, the idea to combine in the analysis longitudinal function came therefore well after the starting process of revision of the paper E and was, in some way inspired by a reviewer's comment. That is why we did not put both findings in the same paper. We think that our explanations provide the broad audience of your journal a perspective of transparency and our respect for the readers' right to understand how the work described in the paper J relates to other work by our research group. Giovanni Cioffi On behalf of all co-authors Ombretta Viapiana, Federica Ognibeni, Andrea Dalbeni, Davide Gatti, Carmine Mazzone, Giorgio Faganello, Andrea Di Lenarda, Silvano Adami, and Maurizio Rossini.",2016-02-24 +23742129,Computational framework to support integration of biomolecular and clinical data within a translational approach.,"

Background

The use of the knowledge produced by sciences to promote human health is the main goal of translational medicine. To make it feasible we need computational methods to handle the large amount of information that arises from bench to bedside and to deal with its heterogeneity. A computational challenge that must be faced is to promote the integration of clinical, socio-demographic and biological data. In this effort, ontologies play an essential role as a powerful artifact for knowledge representation. Chado is a modular ontology-oriented database model that gained popularity due to its robustness and flexibility as a generic platform to store biological data; however it lacks supporting representation of clinical and socio-demographic information.

Results

We have implemented an extension of Chado - the Clinical Module - to allow the representation of this kind of information. Our approach consists of a framework for data integration through the use of a common reference ontology. The design of this framework has four levels: data level, to store the data; semantic level, to integrate and standardize the data by the use of ontologies; application level, to manage clinical databases, ontologies and data integration process; and web interface level, to allow interaction between the user and the system. The clinical module was built based on the Entity-Attribute-Value (EAV) model. We also proposed a methodology to migrate data from legacy clinical databases to the integrative framework. A Chado instance was initialized using a relational database management system. The Clinical Module was implemented and the framework was loaded using data from a factual clinical research database. Clinical and demographic data as well as biomaterial data were obtained from patients with tumors of head and neck. We implemented the IPTrans tool that is a complete environment for data migration, which comprises: the construction of a model to describe the legacy clinical data, based on an ontology; the Extraction, Transformation and Load (ETL) process to extract the data from the source clinical database and load it in the Clinical Module of Chado; the development of a web tool and a Bridge Layer to adapt the web tool to Chado, as well as other applications.

Conclusions

Open-source computational solutions currently available for translational science does not have a model to represent biomolecular information and also are not integrated with the existing bioinformatics tools. On the other hand, existing genomic data models do not represent clinical patient data. A framework was developed to support translational research by integrating biomolecular information coming from different ""omics"" technologies with patient's clinical and socio-demographic data. This framework should present some features: flexibility, compression and robustness. The experiments accomplished from a use case demonstrated that the proposed system meets requirements of flexibility and robustness, leading to the desired integration. The Clinical Module can be accessed in http://dcm.ffclrp.usp.br/caib/pg=iptrans.",2013-06-06 +29313106,Neural correlates of instrumental responding in the context of alcohol-related cues index disorder severity and relapse risk.,"The influence of Pavlovian conditioned stimuli on ongoing behavior may contribute to explaining how alcohol cues stimulate drug seeking and intake. Using a Pavlovian-instrumental transfer task, we investigated the effects of alcohol-related cues on approach behavior (i.e., instrumental response behavior) and its neural correlates, and related both to the relapse after detoxification in alcohol-dependent patients. Thirty-one recently detoxified alcohol-dependent patients and 24 healthy controls underwent instrumental training, where approach or non-approach towards initially neutral stimuli was reinforced by monetary incentives. Approach behavior was tested during extinction with either alcohol-related or neutral stimuli (as Pavlovian cues) presented in the background during functional magnetic resonance imaging (fMRI). Patients were subsequently followed up for 6 months. We observed that alcohol-related background stimuli inhibited the approach behavior in detoxified alcohol-dependent patients (t = - 3.86, p < .001), but not in healthy controls (t = - 0.92, p = .36). This behavioral inhibition was associated with neural activation in the nucleus accumbens (NAcc) (t(30) = 2.06, p < .05). Interestingly, both the effects were only present in subsequent abstainers, but not relapsers and in those with mild but not severe dependence. Our data show that alcohol-related cues can acquire inhibitory behavioral features typical of aversive stimuli despite being accompanied by a stronger NAcc activation, suggesting salience attribution. The fact that these findings are restricted to abstinence and milder illness suggests that they may be potential resilience factors.Clinical trial: LeAD study, http://www.lead-studie.de , NCT01679145.",2018-01-08 +26980280,f-divergence cutoff index to simultaneously identify differential expression in the integrated transcriptome and proteome.,"The ability to integrate 'omics' (i.e. transcriptomics and proteomics) is becoming increasingly important to the understanding of regulatory mechanisms. There are currently no tools available to identify differentially expressed genes (DEGs) across different 'omics' data types or multi-dimensional data including time courses. We present fCI (f-divergence Cut-out Index), a model capable of simultaneously identifying DEGs from continuous and discrete transcriptomic, proteomic and integrated proteogenomic data. We show that fCI can be used across multiple diverse sets of data and can unambiguously find genes that show functional modulation, developmental changes or misregulation. Applying fCI to several proteogenomics datasets, we identified a number of important genes that showed distinctive regulation patterns. The package fCI is available at R Bioconductor and http://software.steenlab.org/fCI/.",2016-03-14 +28921901,StructureSelector: A web-based software to select and visualize the optimal number of clusters using multiple methods.,"Inferences of population genetic structure are of great importance to the fields of ecology and evolutionary biology. The program structure has been widely used to infer population genetic structure. However, previous studies demonstrated that uneven sampling often leads to wrong inferences on hierarchical structure. The most widely used ΔK method tends to identify the uppermost hierarchy of population structure. Recently, four alternative statistics (medmedk, medmeak, maxmedk and maxmeak) were proposed, which appear to be more accurate than the previously used methods for both even and uneven sampling data. However, the lack of easy-to-use software limits the use of these appealing new estimators. Here, we developed a web-based user-friendly software structureselector to calculate the four appealing alternative statistics together with the commonly used Ln Pr(X|K) and ΔK statistics. structureselector accepts the result files of structure, admixture or faststructure as input files. It reports the ""best"" K for each estimator, and the results are available as HTML or tab separated tables. The program can also generate graphical representations for specific K, which can be easily downloaded from the server. The software is freely available at http://lmme.qdio.ac.cn/StructureSelector/.",2017-10-09 +27566673,Streamlined analysis of duplex sequencing data with Du Novo.,"Duplex sequencing was originally developed to detect rare nucleotide polymorphisms normally obscured by the noise of high-throughput sequencing. Here we describe a new, streamlined, reference-free approach for the analysis of duplex sequencing data. We show the approach performs well on simulated data and precisely reproduces previously published results and apply it to a newly produced dataset, enabling us to type low-frequency variants in human mitochondrial DNA. Finally, we provide all necessary tools as stand-alone components as well as integrate them into the Galaxy platform. All analyses performed in this manuscript can be repeated exactly as described at http://usegalaxy.org/duplex .",2016-08-26 +25725059,LocSigDB: a database of protein localization signals. ,"LocSigDB (http://genome.unmc.edu/LocSigDB/) is a manually curated database of experimental protein localization signals for eight distinct subcellular locations; primarily in a eukaryotic cell with brief coverage of bacterial proteins. Proteins must be localized at their appropriate subcellular compartment to perform their desired function. Mislocalization of proteins to unintended locations is a causative factor for many human diseases; therefore, collection of known sorting signals will help support many important areas of biomedical research. By performing an extensive literature study, we compiled a collection of 533 experimentally determined localization signals, along with the proteins that harbor such signals. Each signal in the LocSigDB is annotated with its localization, source, PubMed references and is linked to the proteins in UniProt database along with the organism information that contain the same amino acid pattern as the given signal. From LocSigDB webserver, users can download the whole database or browse/search for data using an intuitive query interface. To date, LocSigDB is the most comprehensive compendium of protein localization signals for eight distinct subcellular locations. Database URL: http://genome.unmc.edu/LocSigDB/",2015-02-27 +28510698,XSuLT: a web server for structural annotation and representation of sequence-structure alignments.,"The web server XSuLT, an enhanced version of the protein alignment annotation program JoY, formats a submitted multiple-sequence alignment using three-dimensional (3D) structural information in order to assist in the comparative analysis of protein evolution and in the optimization of alignments for comparative modelling and construct design. In addition to the features analysed by JoY, which include secondary structure, solvent accessibility and sidechain hydrogen bonds, XSuLT annotates each amino acid residue with residue depth, chain and ligand interactions, inter-residue contacts, sequence entropy, root mean square deviation and secondary structure and disorder prediction. It is also now integrated with built-in 3D visualization which interacts with the formatted alignment to facilitate inspection and understanding. Results can be downloaded as stand-alone HTML for the formatted alignment and as XML with the underlying annotation data. XSuLT is freely available at http://structure.bioc.cam.ac.uk/xsult/.",2017-07-01 +27153572,Visual Omics Explorer (VOE): a cross-platform portal for interactive data visualization.,"

Motivation

Given the abundance of genome sequencing and omics data, an opprtunity and challenge in bioinformatics relates to data mining and visualization. The majority of current bioinformatics visualizations are implemented either as multi-tier web server applications that require significant maintenance effort, or as client software that presumes technical expertise for installation. Here we present the Visual Omics Explorer (VOE), a cross-platform data visualization portal that is implemented using only HTML and Javascript code. VOE is a standalone software that can be loaded offline on the web browser from a local copy of the code, or over the internet without any dependency other than distributing the code through a file sharing service. VOE can interactively display genomics, transcriptomics, epigenomics and metagenomics data stored either locally or retrieved from cloud storage services, and runs on both desktop computers and mobile devices.

Availability and implementation

VOE is accessible at http://bcil.github.io/VOE/ CONTACT: agbiotec@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-07 +21459847,"The IntFOLD server: an integrated web resource for protein fold recognition, 3D model quality assessment, intrinsic disorder prediction, domain prediction and ligand binding site prediction.","The IntFOLD server is a novel independent server that integrates several cutting edge methods for the prediction of structure and function from sequence. Our guiding principles behind the server development were as follows: (i) to provide a simple unified resource that makes our prediction software accessible to all and (ii) to produce integrated output for predictions that can be easily interpreted. The output for predictions is presented as a simple table that summarizes all results graphically via plots and annotated 3D models. The raw machine readable data files for each set of predictions are also provided for developers, which comply with the Critical Assessment of Methods for Protein Structure Prediction (CASP) data standards. The server comprises an integrated suite of five novel methods: nFOLD4, for tertiary structure prediction; ModFOLD 3.0, for model quality assessment; DISOclust 2.0, for disorder prediction; DomFOLD 2.0 for domain prediction; and FunFOLD 1.0, for ligand binding site prediction. Predictions from the IntFOLD server were found to be competitive in several categories in the recent CASP9 experiment. The IntFOLD server is available at the following web site: http://www.reading.ac.uk/bioinf/IntFOLD/.",2011-03-31 +29902989,"Adversities and mental health needs of pregnant adolescents in Kenya: identifying interpersonal, practical, and cultural barriers to care.","

Background

Adolescent pregnancies present a great public health burden in Kenya and Sub-Saharan Africa (UNFPA, Motherhood in Childhood: Facing the challenge of Adolescent Pregnancy, 2013). The disenfranchisement from public institutions and services is further compounded by cultural stigma and gender inequality creating emotional, psychosocial, health, and educational problems in the lives of vulnerable pregnant adolescents (Int J Adolesc Med Health 15(4):321-9, 2003; BMC Public Health 8:83, 2008). In this paper we have applied an engagement interview framework to examine interpersonal, practical, and cultural challenges faced by pregnant adolescents.

Methods

Using a qualitative study design, 12 pregnant adolescents (ages 15-19) visiting a health facility's antenatal services in Nairobi were interviewed. All recruited adolescents were pregnant for the first time and screened positive on the nine-item Patient Health Questionnaire (PHQ-9) with 16% of 176 participants interviewed in a descriptive survey in the same Kangemi primary health facility found to be severely depressed (Osok et al., Depression and its psychosocial risk factors in pregnant Kenyan adolescents: a cross-sectional study in a community health Centre of Nairobi, BMC Psychiatry, 2018 18:136 https://doi.org/10.1186/s12888-018-1706-y). An engagement interview approach (Social Work 52(4):295-308, 2007) was applied to elicit various practical, psychological, interpersonal, and cultural barriers to life adjustment, service access, obtaining resources, and psychosocial support related to pregnancy. Grounded theory method was applied for qualitative data sifting and analysis (Strauss and Corbin, Basics of qualitative research, 1990).

Results

Findings revealed that pregnant adolescents face four major areas of challenges, including depression, anxiety and stress around the pregnancy, denial of the pregnancy, lack of basic needs provisions and care, and restricted educational or livelihood opportunities for personal development post pregnancy. These challenges were related both to existing social and cultural values/norms on gender and traditional family structure, as well as to service structural barriers (including prenatal care, mental health care, newborn care, parenting support services). More importantly, dealing with these challenges has led to negative mental health consequences in adolescent pregnant girls, including feeling insecure about the future, feeling very defeated and sad to be pregnant, and feeling unsupported and disempowered in providing care for the baby.

Conclusions

Findings have implications for service planning, including developing more integrated mental health services for pregnant adolescents. Additionally, we felt a need for developing reproductive education and information dissemination strategies to improve community members' knowledge of pregnant adolescent mental health issues.",2018-06-15 +25760613,NuProPlot: nucleic acid and protein interaction analysis and plotting program.,"Growing numbers of protein and nucleic acid complex structures are being determined and deposited in the Protein Data Bank and the Nucleic Acid Database. With the increasing complexity of these structures, it is challenging to analyse and visualize the three-dimensional interactions. The currently available programs for such analysis and visualization are limited in their applications. They can only analyse a subset of protein-nucleic acid complexes and require multiple iterations before obtaining plots that are suitable for presentation. An interactive web-based program, NuProPlot (http://www.nuproplot.com), has been developed which can automatically identify hydrogen, electrostatic and van der Waals interactions between proteins and nucleic acids and generate a plot showing all of the interactions. Protein-DNA and protein-RNA interactions can be visualized in simple two-dimensional schematics. Interactive schematic drawing options allow selection of the plotted area and repositioning of the individual interactions for better legibility. NuProPlot is a fully automated and user-friendly program providing various custom options. NuProPlot represents a greatly improved option for analysis and presentation of protein-nucleic acid interactions.",2015-02-26 +25894527,A mass spectrometric-derived cell surface protein atlas.,"Cell surface proteins are major targets of biomedical research due to their utility as cellular markers and their extracellular accessibility for pharmacological intervention. However, information about the cell surface protein repertoire (the surfaceome) of individual cells is only sparsely available. Here, we applied the Cell Surface Capture (CSC) technology to 41 human and 31 mouse cell types to generate a mass-spectrometry derived Cell Surface Protein Atlas (CSPA) providing cellular surfaceome snapshots at high resolution. The CSPA is presented in form of an easy-to-navigate interactive database, a downloadable data matrix and with tools for targeted surfaceome rediscovery (http://wlab.ethz.ch/cspa). The cellular surfaceome snapshots of different cell types, including cancer cells, resulted in a combined dataset of 1492 human and 1296 mouse cell surface glycoproteins, providing experimental evidence for their cell surface expression on different cell types, including 136 G-protein coupled receptors and 75 membrane receptor tyrosine-protein kinases. Integrated analysis of the CSPA reveals that the concerted biological function of individual cell types is mainly guided by quantitative rather than qualitative surfaceome differences. The CSPA will be useful for the evaluation of drug targets, for the improved classification of cell types and for a better understanding of the surfaceome and its concerted biological functions in complex signaling microenvironments.",2015-04-20 +24270787,The UCSC Genome Browser database: 2014 update.,"The University of California Santa Cruz (UCSC) Genome Browser (http://genome.ucsc.edu) offers online public access to a growing database of genomic sequence and annotations for a large collection of organisms, primarily vertebrates, with an emphasis on the human and mouse genomes. The Browser's web-based tools provide an integrated environment for visualizing, comparing, analysing and sharing both publicly available and user-generated genomic data sets. As of September 2013, the database contained genomic sequence and a basic set of annotation 'tracks' for ∼90 organisms. Significant new annotations include a 60-species multiple alignment conservation track on the mouse, updated UCSC Genes tracks for human and mouse, and several new sets of variation and ENCODE data. New software tools include a Variant Annotation Integrator that returns predicted functional effects of a set of variants uploaded as a custom track, an extension to UCSC Genes that displays haplotype alleles for protein-coding genes and an expansion of data hubs that includes the capability to display remotely hosted user-provided assembly sequence in addition to annotation data. To improve European access, we have added a Genome Browser mirror (http://genome-euro.ucsc.edu) hosted at Bielefeld University in Germany.",2013-11-21 +28334160,AQUA-DUCT: a ligands tracking tool.,"

Motivation

The identification and tracking of molecules which enter active site cavity requires screening the positions of thousands of single molecules along several thousand molecular dynamic steps. To fill the existing gap between tools searching for tunnels and pathways and advanced tools employed for accelerated water flux investigations, we have developed AQUA-DUCT.

Results

AQUA-DUCT is an easy-to-use tool that facilitates analysis of the behaviour of molecules that penetrate any selected region in a protein. It can be used for any type of molecules, e.g. water, oxygen, carbon dioxide, organic solvents, ions.

Availability and implementation

Linux, Windows, macOS, OpenBSD, http://www.aquaduct.pl .

Contact

a.gora@tunnelinggroup.pl or info@aquaduct.pl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +28155632,Global inference of disease-causing single nucleotide variants from exome sequencing data.,"

Background

Whole exome sequencing (WES) has recently emerged as an effective approach for identifying genetic variants underlying human diseases. However, considerable time and labour is needed for careful investigation of candidate variants. Although filtration based on population frequencies and functional prediction scores could effectively remove common and neutral variants, hundreds or even thousands of rare deleterious variants still remain. In addition, current WES platforms also provide variant information in flanking noncoding regions, such as promoters, introns and splice sites. Despite of being recognized to harbour causal variants, these regions are usually ignored by current analysis pipelines.

Results

We present a novel computational method, called Glints, to overcome the above limitations. Glints is capable of identifying disease-causing SNVs in both coding and flanking noncoding regions from exome sequencing data. The principle behind Glints is that disease-causing variants should manifest their effect at both variant and gene levels. Specifically, Glints integrates 14 types of functional scores, including predictions for both coding and noncoding variants, and 9 types of association scores, which help identifying disease relevant genes. We conducted a large-scale simulation studies based on 1000 Genomes Project data and demonstrated the effectiveness of our method in both coding and flanking noncoding regions. We also applied Glints in two real exome sequencing and demonstrated its effectiveness for uncovering disease-causing SNVs. Both standalone software and web server are available at our website http://bioinfo.au.tsinghua.edu.cn/jianglab/glints .

Conclusions

Glints is effective for uncovering disease-causing SNVs in coding and flanking noncoding regions, which is supported by both simulation and real case studies. Glints is expected to be a useful tool for human genetics research based on exome sequencing data.",2016-12-23 +28643043,18F-Florbetaben PET beta-amyloid binding expressed in Centiloids.,"

Purpose

The Centiloid (CL) method enables quantitative values from Aβ-amyloid (Aβ) imaging to be expressed in a universal unit providing pathological, diagnostic and prognostic thresholds in clinical practice and research and allowing integration of multiple tracers and methods. The method was developed for 11C-PiB scans with zero CL set as the average in young normal subjects and 100 CL the average in subjects with mild Alzheimer's disease (AD). The method allows derivation of equations to convert the uptake value of any tracer into the same standard CL units but first requires head-to-head comparison with 11C-PiB results. We derived the equation to express 18F-florbetaben (FBB) binding in CL units.

Methods

Paired PiB and FBB PET scans were obtained in 35 subjects. including ten young normal subjects aged under 45 years (33 ± 8 years). FBB images were acquired from 90 to 110 min after injection. Spatially normalized images were analysed using the standard CL method (SPM8 coregistration of PET data to MRI data and the MNI-152 atlas) and standard CL regions (cortex and whole cerebellum downloaded from http://www.gaain.org ).

Results

FBB binding was strongly correlated with PiB binding (R 2 = 0.96, SUVRFBB = 0.61 × SUVRPiB + 0.39). The equation to derive CL values from FBB SUVR was CL units = 153.4 × SUVRFBB - 154.9. The CL value in the young normal subjects was -1.08 ± 6.81 for FBB scans compared to -0.32 ± 3.48 for PiB scans, giving a variance ratio of 1.96 (SDFBB CL/SDPiB CL).

Conclusions

18F-FBB binding is strongly correlated with PiB binding and FBB results can now be expressed in CL units.",2017-06-22 +21867878,A transcriptomic atlas of mouse neocortical layers.,"In the mammalian cortex, neurons and glia form a patterned structure across six layers whose complex cytoarchitectonic arrangement is likely to contribute to cognition. We sequenced transcriptomes from layers 1-6b of different areas (primary and secondary) of the adult (postnatal day 56) mouse somatosensory cortex to understand the transcriptional levels and functional repertoires of coding and noncoding loci for cells constituting these layers. A total of 5,835 protein-coding genes and 66 noncoding RNA loci are differentially expressed (""patterned"") across the layers, on the basis of a machine-learning model (naive Bayes) approach. Layers 2-6b are each associated with specific functional and disease annotations that provide insights into their biological roles. This new resource (http://genserv.anat.ox.ac.uk/layers) greatly extends currently available resources, such as the Allen Mouse Brain Atlas and microarray data sets, by providing quantitative expression levels, by being genome-wide, by including novel loci, and by identifying candidate alternatively spliced transcripts that are differentially expressed across layers.",2011-08-01 +26331936,MZDASoft: a software architecture that enables large-scale comparison of protein expression levels over multiple samples based on liquid chromatography/tandem mass spectrometry.,"

Rationale

Without accurate peak linking/alignment, only the expression levels of a small percentage of proteins can be compared across multiple samples in Liquid Chromatography/Mass Spectrometry/Tandem Mass Spectrometry (LC/MS/MS) due to the selective nature of tandem MS peptide identification. This greatly hampers biomedical research that aims at finding biomarkers for disease diagnosis, treatment, and the understanding of disease mechanisms. A recent algorithm, PeakLink, has allowed the accurate linking of LC/MS peaks without tandem MS identifications to their corresponding ones with identifications across multiple samples collected from different instruments, tissues and labs, which greatly enhanced the ability of comparing proteins. However, PeakLink cannot be implemented practically for large numbers of samples based on existing software architectures, because it requires access to peak elution profiles from multiple LC/MS/MS samples simultaneously.

Methods

We propose a new architecture based on parallel processing, which extracts LC/MS peak features, and saves them in database files to enable the implementation of PeakLink for multiple samples. The software has been deployed in High-Performance Computing (HPC) environments. The core part of the software, MZDASoft Parallel Peak Extractor (PPE), can be downloaded with a user and developer's guide, and it can be run on HPC centers directly. The quantification applications, MZDASoft TandemQuant and MZDASoft PeakLink, are written in Matlab, which are compiled with a Matlab runtime compiler. A sample script that incorporates all necessary processing steps of MZDASoft for LC/MS/MS quantification in a parallel processing environment is available. The project webpage is http://compgenomics.utsa.edu/zgroup/MZDASoft.

Results

The proposed architecture enables the implementation of PeakLink for multiple samples. Significantly more (100%-500%) proteins can be compared over multiple samples with better quantification accuracy in test cases.

Conclusion

MZDASoft enables large-scale comparison of protein expression levels over multiple samples with much larger protein comparison coverage and better quantification accuracy. It is an efficient implementation based on parallel processing which can be used to process large amounts of data.",2015-10-01 +28481966,RankProd 2.0: a refactored bioconductor package for detecting differentially expressed features in molecular profiling datasets.,"

Motivation

The Rank Product (RP) is a statistical technique widely used to detect differentially expressed features in molecular profiling experiments such as transcriptomics, metabolomics and proteomics studies. An implementation of the RP and the closely related Rank Sum (RS) statistics has been available in the RankProd Bioconductor package for several years. However, several recent advances in the understanding of the statistical foundations of the method have made a complete refactoring of the existing package desirable.

Results

We implemented a completely refactored version of the RankProd package, which provides a more principled implementation of the statistics for unpaired datasets. Moreover, the permutation-based P -value estimation methods have been replaced by exact methods, providing faster and more accurate results.

Availability and implementation

RankProd 2.0 is available at Bioconductor ( https://www.bioconductor.org/packages/devel/bioc/html/RankProd.html ) and as part of the mzMatch pipeline ( http://www.mzmatch.sourceforge.net ).

Contact

rainer.breitling@manchester.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +26833340,The geospatial data quality REST API for primary biodiversity data.,

Unlabelled

We present a REST web service to assess the geospatial quality of primary biodiversity data. It enables access to basic and advanced functions to detect completeness and consistency issues as well as general errors in the provided record or set of records. The API uses JSON for data interchange and efficient parallelization techniques for fast assessments of large datasets.

Availability and implementation

The Geospatial Data Quality API is part of the VertNet set of APIs. It can be accessed at http://api-geospatial.vertnet-portal.appspot.com/geospatial and is already implemented in the VertNet data portal for quality reporting. Source code is freely available under GPL license from http://www.github.com/vertnet/api-geospatial

Contact

javier.otegui@gmail.com or rguralnick@flmnh.ufl.edu

Supplementary information

Supplementary data are available at Bioinformatics online.,2016-02-01 +25957673,Predicting drug-target interaction for new drugs using enhanced similarity measures and super-target clustering.,"Predicting drug-target interaction using computational approaches is an important step in drug discovery and repositioning. To predict whether there will be an interaction between a drug and a target, most existing methods identify similar drugs and targets in the database. The prediction is then made based on the known interactions of these drugs and targets. This idea is promising. However, there are two shortcomings that have not yet been addressed appropriately. Firstly, most of the methods only use 2D chemical structures and protein sequences to measure the similarity of drugs and targets respectively. However, this information may not fully capture the characteristics determining whether a drug will interact with a target. Secondly, there are very few known interactions, i.e. many interactions are ""missing"" in the database. Existing approaches are biased towards known interactions and have no good solutions to handle possibly missing interactions which affect the accuracy of the prediction. In this paper, we enhance the similarity measures to include non-structural (and non-sequence-based) information and introduce the concept of a ""super-target"" to handle the problem of possibly missing interactions. Based on evaluations on real data, we show that our similarity measure is better than the existing measures and our approach is able to achieve higher accuracy than the two best existing algorithms, WNN-GIP and KBMF2K. Our approach is available at http://web.hku.hk/∼liym1018/projects/drug/drug.html or http://www.bmlnwpu.org/us/tools/PredictingDTI_S2/METHODS.html.",2015-05-06 +28369270,DLTree: efficient and accurate phylogeny reconstruction using the dynamical language method.,"

Summary

A number of alignment-free methods have been proposed for phylogeny reconstruction over the past two decades. But there are some long-standing challenges in these methods, including requirement of huge computer memory and CPU time, and existence of duplicate computations. In this article, we address these challenges with the idea of compressed vector, fingerprint and scalable memory management. With these ideas we developed the DLTree algorithm for efficient implementation of the dynamical language model and whole genome-based phylogenetic analysis. The DLTree algorithm was compared with other alignment-free tools, demonstrating that it is more efficient and accurate for phylogeny reconstruction.

Availability and implementation

The DLTree algorithm is freely available at http://dltree.xtu.edu.cn.

Contact

yuzuguo@aliyun.com or yangjy@nankai.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +23724943,Advancing sex and gender competency in medicine: sex & gender women's health collaborative.,"Research conducted to date has deepened our understanding of sex and gender differences in the etiology, diagnosis, treatment, and outcomes for many conditions that affect both women and men. The Sex and Gender Women's Health Collaborative (SGWHC) is supported by the coordinated efforts of our founding partners: the American Medical Women's Association, the American College of Women's Health Physicians and Society for Women's Health Research to address the gaps in medical education with regard to sex and gender competency in the care of women. The SGWHC initiated and continues to build a novel digital resource library of sex and gender specific materials to be adopted and adapted into medical education and clinical practice, residing @ http://www.sgwhc.org. This article presents a case for the inclusion of sex and gender focused content into medical curricula and describes a means for students, faculty, and practitioners to access a centralized, interactive repository for these resources.",2013-06-01 +28376774,Prediction of reversible disulfide based on features from local structural signatures.,"

Background

Disulfide bonds are traditionally considered to play only structural roles. In recent years, increasing evidence suggests that the disulfide proteome is made up of structural disulfides and reversible disulfides. Unlike structural disulfides, reversible disulfides are usually of important functional roles and may serve as redox switches. Interestingly, only specific disulfide bonds are reversible while others are not. However, whether reversible disulfides can be predicted based on structural information remains largely unknown.

Methods

In this study, two datasets with both types of disulfides were compiled using independent approaches. By comparison of various features extracted from the local structural signatures, we identified several features that differ significantly between reversible and structural disulfides, including disulfide bond length, along with the number, amino acid composition, secondary structure and physical-chemical properties of surrounding amino acids. A SVM-based classifier was developed for predicting reversible disulfides. RESULTS: By 10-fold cross-validation, the model achieved accuracy of 0.750, sensitivity of 0.352, specificity of 0.953, MCC of 0.405 and AUC of 0.751 using the RevSS_PDB dataset. The robustness was further validated by using RevSS_RedoxDB as independent testing dataset. This model was applied to proteins with known structures in the PDB database. The results show that one third of the predicted reversible disulfide containing proteins are well-known redox enzymes, while the remaining are non-enzyme proteins. Given that reversible disulfides are frequently reported from functionally important non-enzyme proteins such as transcription factors, the predictions may provide valuable candidates of novel reversible disulfides for further experimental investigation.

Conclusions

This study provides the first comparative analysis between the reversible and the structural disulfides. Distinct features remarkably different between these two groups of disulfides were identified, and a SVM-based classifier for predicting reversible disulfides was developed accordingly. A web server named RevssPred can be accessed freely from: http://biocomputer.bio.cuhk.edu.hk/RevssPred .",2017-04-04 +24272250,RARGE II: an integrated phenotype database of Arabidopsis mutant traits using a controlled vocabulary.,"Arabidopsis thaliana is one of the most popular experimental plants. However, only 40% of its genes have at least one experimental Gene Ontology (GO) annotation assigned. Systematic observation of mutant phenotypes is an important technique for elucidating gene functions. Indeed, several large-scale phenotypic analyses have been performed and have generated phenotypic data sets from many Arabidopsis mutant lines and overexpressing lines, which are freely available online. Since each Arabidopsis mutant line database uses individual phenotype expression, the differences in the structured term sets used by each database make it difficult to compare data sets and make it impossible to search across databases. Therefore, we obtained publicly available information for a total of 66,209 Arabidopsis mutant lines, including loss-of-function (RATM and TARAPPER) and gain-of-function (AtFOX and OsFOX) lines, and integrated the phenotype data by mapping the descriptions onto Plant Ontology (PO) and Phenotypic Quality Ontology (PATO) terms. This approach made it possible to manage the four different phenotype databases as one large data set. Here, we report a publicly accessible web-based database, the RIKEN Arabidopsis Genome Encyclopedia II (RARGE II; http://rarge-v2.psc.riken.jp/), in which all of the data described in this study are included. Using the database, we demonstrated consistency (in terms of protein function) with a previous study and identified the presumed function of an unknown gene. We provide examples of AT1G21600, which is a subunit in the plastid-encoded RNA polymerase complex, and AT5G56980, which is related to the jasmonic acid signaling pathway.",2013-11-21 +23720488,"COPRED: prediction of fold, GO molecular function and functional residues at the domain level.","

Summary

Only recently the first resources devoted to the functional annotation of proteins at the domain level started to appear. The next step is to develop specific methodologies for predicting function at the domain level based on these resources, and to implement them in web servers to be used by the community. In this work, we present COPRED, a web server for the concomitant prediction of fold, molecular function and functional sites at the domain level, based on a methodology for domain molecular function prediction and a resource of domain functional annotations previously developed and benchmarked.

Availability and implementation

COPRED can be freely accessed at http://csbg.cnb.csic.es/copred. The interface works in all standard web browsers. WebGL (natively supported by most browsers) is required for the in-line preview and manipulation of protein 3D structures. The website includes a detailed help section and usage examples.

Contact

pazos@cnb.csic.es.",2013-05-29 +26006170,WHO Better Outcomes in Labour Difficulty (BOLD) project: innovating to improve quality of care around the time of childbirth.,"As most pregnancy-related deaths and morbidities are clustered around the time of childbirth, quality of care during this period is critical to the survival of pregnant women and their babies. Despite the wide acceptance of partograph as the central tool to optimize labour outcomes for over 40 years, its use has not successfully improved outcomes in many settings for several reasons. There are also increasing questions about the validity and applicability of its central feature - ""the alert line"" - to all women regardless of their labour characteristics. Apart from the known deficiencies in labour care, attempts to improve quality of care in low resource settings have also failed to address and integrate women's birth experience into quality improvement processes. It was against this background that the World Health Organization (WHO) embarked on the Better Outcomes in Labour Difficulty (BOLD) project to improve the quality of intrapartum care in low- and middle-income countries. The main goal of the BOLD project is to reduce intrapartum-related stillbirths, maternal and newborn mortalities and morbidities by addressing the critical barriers to the process of good quality intrapartum care and enhancing the connection between health systems and communities. The project seeks to achieve this goal by (1) developing an evidence-based, easy to use, labour monitoring-to-action decision-support tool (currently termed Simplified, Effective, Labour Monitoring-to-Action - SELMA); and (2) by developing innovative service prototypes/tools, co-designed with users of health services (women, their families and communities) and health providers, to promote access to respectful, dignified and emotionally supportive care for pregnant women and their companions at the time of birth (""Passport to Safer Birth""). This two-pronged approach is expected to positively impact on important domains of quality of care relating to both provision and experience of care. In this paper, we briefly describe the rationale for innovative thinking in relation to improving quality of care around the time of childbirth and introduce WHO current plans to improve care through research, design and implementation of innovative tools and services in the post-2015 era.Please see related articles ' http://dx.doi.org/10.1186/s12978-015-0029-4 ' and ' http://dx.doi.org/10.1186/s12978-015-0028-5 '.",2015-05-26 +23716633,miRmap web: Comprehensive microRNA target prediction online.,"MicroRNAs (miRNAs) posttranscriptionally repress the expression of protein-coding genes. Based on the partial complementarity between miRNA and messenger RNA pairs with a mandatory so-called 'seed' sequence, many thousands of potential targets can be identified. Our open-source software library, miRmap, ranks these potential targets with a biologically meaningful criterion, the repression strength. MiRmap combines thermodynamic, evolutionary, probabilistic and sequence-based features, which cover features from TargetScan, PITA, PACMIT and miRanda. Our miRmap web application offers a user-friendly and feature-rich resource for browsing precomputed miRNA target predictions for model organisms, as well as for predicting and ranking targets for user-submitted sequences. MiRmap web integrates sorting, filtering and exporting of results from multiple queries, as well as providing programmatic access, and is available at http://mirmap.ezlab.org.",2013-05-28 +27585456,Inferring metabolic pathway activity levels from RNA-Seq data.,"

Background

Assessing pathway activity levels is a plausible way to quantify metabolic differences between various conditions. This is usually inferred from microarray expression data. Wide availability of NGS technology has triggered a demand for bioinformatics tools capable of analyzing pathway activity directly from RNA-Seq data. In this paper we introduce XPathway, a set of tools that compares pathway activity analyzing mapping of contigs assembled from RNA-Seq reads to KEGG pathways. The XPathway analysis of pathway activity is based on expectation maximization and topological properties of pathway graphs.

Results

XPathway tools have been applied to RNA-Seq data from the marine bryozoan Bugula neritina with and without its symbiotic bacterium ""Candidatus Endobugula sertula"". We successfully identified several metabolic pathways with differential activity levels. The expression of enzymes from the identified pathways has been further validated through quantitative PCR (qPCR).

Conclusions

Our results show that XPathway is able to detect and quantify the metabolic difference in two samples. The software is implemented in C, Python and shell scripting and is capable of running on Linux/Unix platforms. The source code and installation instructions are available at http://alan.cs.gsu.edu/NGS/?q=content/xpathway .",2016-08-31 +27935622,"Response to Dr Stevens' letter ref. Visitisen et al: ""Short-term effects of night shift work on breast cancer risk: a cohort study of payroll data"".","We thank Dr Richard Stevens for his comments (1) on our recent article that showed no increased risk of breast cancer following recent night shift work when compared with recent day shift work (2). This finding was based on linkage of day-by-day information on working hours and breast cancer incidence data. Results are thus less likely to have been biased by differential misclassification than findings from earlier studies relying on self-report (3). We defined a night shift as ≥3 hours of work between 24:00-05:00 hours and a day shift as ≥3 hours work between 6:00-20:00 hours. This day shift definition did not exclude shifts starting before 05:00 or ending after 24:00 hours. However, this affected only 0.02% and 0.3% of all day shifts, respectively. This diminutive misclassification, that is expected to be non-differential, can hardly explain our negative findings. It is suggested that shifts that begin after 07:00 and end before 18:00 would constitute a more sensible baseline comparison group. Since the biological mechanism is not certain, it is not obvious to us if this will be a more appropriate reference than the present. However, we agree that future studies should test how different definitions of shifts affect the risk of breast cancer, which will be possible using this type of data. We only had information on working hours from 2007 and onwards, and night shift work prior to 2007 could have confounded our analyses towards no effect but only if inversely associated with night shift work in 2007 or later. We find this unlikely. Left truncation could also have biased findings towards the null. We therefore supplemented analyses of the total study population with analyses of the one-third of the population with first recorded employment in 2008 or later (the inception population). Even if the mean age was 35.5 years - and many undoubtedly had been working (with and without night shifts) prior to 2008 - this population should be less affected by such selection bias, but we observed similar risk estimates as for the total study population. Taken together, we find that our study provides rather robust evidence of no short-term breast cancer risk following recent night shift work. It must, however, be stressed that data did not allow assessment of a possible long-term risk. Reference 1. Stevens R. Letter ref. Vitisen et al: ""Short-term effects of night shift work on breast cancer risk: a cohort study of payroll data"". Scand J Work Environ Health. 2017;43(1):95. http://dx.doi.org/10.5271/sjweh.3607 2. Vistisen HT, Garde AH, Frydenberg M, Christiansen P, Hansen AM, Hansen J, Bonde JP, Kolstad HA. Short-term effects of night shift work on breast cancer risk: A cohort study of payroll data. Scand J Work Environ Health. 2017;43(1):59-67. http://dx.doi.org/10.5271/sjweh.3603. 3. Ijaz S, Verbeek J, Seidler A, Lindbohm ML, Ojajarvi A, Orsini N, Costa G, Neuvonen K. Night-shift work and breast cancer--a systematic review and meta-analysis. Scand J Work Environ Health. 2013 Sep 1;39(5):431-47. http://dx.doi.org/10.5271/sjweh.3371.",2016-12-09 +27612452,"Mergeomics: a web server for identifying pathological pathways, networks, and key regulators via multidimensional data integration.","

Background

Human diseases are commonly the result of multidimensional changes at molecular, cellular, and systemic levels. Recent advances in genomic technologies have enabled an outpour of omics datasets that capture these changes. However, separate analyses of these various data only provide fragmented understanding and do not capture the holistic view of disease mechanisms. To meet the urgent needs for tools that effectively integrate multiple types of omics data to derive biological insights, we have developed Mergeomics, a computational pipeline that integrates multidimensional disease association data with functional genomics and molecular networks to retrieve biological pathways, gene networks, and central regulators critical for disease development.

Results

To make the Mergeomics pipeline available to a wider research community, we have implemented an online, user-friendly web server ( http://mergeomics.

Research

idre.ucla.edu/ ). The web server features a modular implementation of the Mergeomics pipeline with detailed tutorials. Additionally, it provides curated genomic resources including tissue-specific expression quantitative trait loci, ENCODE functional annotations, biological pathways, and molecular networks, and offers interactive visualization of analytical results. Multiple computational tools including Marker Dependency Filtering (MDF), Marker Set Enrichment Analysis (MSEA), Meta-MSEA, and Weighted Key Driver Analysis (wKDA) can be used separately or in flexible combinations. User-defined summary-level genomic association datasets (e.g., genetic, transcriptomic, epigenomic) related to a particular disease or phenotype can be uploaded and computed real-time to yield biologically interpretable results, which can be viewed online and downloaded for later use.

Conclusions

Our Mergeomics web server offers researchers flexible and user-friendly tools to facilitate integration of multidimensional data into holistic views of disease mechanisms in the form of tissue-specific key regulators, biological pathways, and gene networks.",2016-09-09 +28910066,"Health, United States, 2016: With Chartbook on Long-term Trends in Health","Health, United States, 2016 is the 40th report on the health status of the nation and is submitted by the Secretary of the Department of Health and Human Services to the President and the Congress of the United States in compliance with Section 308 of the Public Health Service Act. This report was compiled by the Centers for Disease Control and Prevention’s (CDC) National Center for Health Statistics (NCHS). The Health, United States series presents an annual overview of national trends in health statistics. The report contains a Chartbook that assesses the nation’s health by presenting trends and current information on selected measures of morbidity, mortality, health care utilization and access, health risk factors, prevention, health insurance, and personal health care expenditures. This year’s Chartbook focuses on long-term trends in health. The report also contains 114 Trend Tables organized around four major subject areas: health status and determinants, health care utilization, health care resources, and health care expenditures. A companion report—Health, United States: In Brief—features information extracted from the full report. The complete report and related data products are available on the Health, United States website at: http://www.cdc.gov/nchs/hus.htm.",2017-09-15 +26258935,A Sparse Model Based Detection of Copy Number Variations From Exome Sequencing Data.,"

Goal

Whole-exome sequencing provides a more cost-effective way than whole-genome sequencing for detecting genetic variants, such as copy number variations (CNVs). Although a number of approaches have been proposed to detect CNVs from whole-genome sequencing, a direct adoption of these approaches to whole-exome sequencing will often fail because exons are separately located along a genome. Therefore, an appropriate method is needed to target the specific features of exome sequencing data.

Methods

In this paper, a novel sparse model based method is proposed to discover CNVs from multiple exome sequencing data. First, exome sequencing data are represented with a penalized matrix approximation, and technical variability and random sequencing errors are assumed to follow a generalized Gaussian distribution. Second, an iteratively reweighted least squares algorithm is used to estimate the solution.

Results

The method is tested and validated on both synthetic and real data, and compared with other approaches including CoNIFER, XHMM, and cn.MOPS. The test demonstrates that the proposed method outperform other approaches.

Conclusion

The proposed sparse model can detect CNVs from exome sequencing data with high power and precision. Significance: Sparse model can target the specific features of exome sequencing data. The software codes are freely available at http://www.tulane.edu/ wyp/software/Exon_CNV.m.",2016-03-01 +25934797,Pathways with PathWhiz.,"PathWhiz (http://smpdb.ca/pathwhiz) is a web server designed to create colourful, visually pleasing and biologically accurate pathway diagrams that are both machine-readable and interactive. As a web server, PathWhiz is accessible from almost any place and compatible with essentially any operating system. It also houses a public library of pathways and pathway components that can be easily viewed and expanded upon by its users. PathWhiz allows users to readily generate biologically complex pathways by using a specially designed drawing palette to quickly render metabolites (including automated structure generation), proteins (including quaternary structures, covalent modifications and cofactors), nucleic acids, membranes, subcellular structures, cells, tissues and organs. Both small-molecule and protein/gene pathways can be constructed by combining multiple pathway processes such as reactions, interactions, binding events and transport activities. PathWhiz's pathway replication and propagation functions allow for existing pathways to be used to create new pathways or for existing pathways to be automatically propagated across species. PathWhiz pathways can be saved in BioPAX, SBGN-ML and SBML data exchange formats, as well as PNG, PWML, HTML image map or SVG images that can be viewed offline or explored using PathWhiz's interactive viewer. PathWhiz has been used to generate over 700 pathway diagrams for a number of popular databases including HMDB, DrugBank and SMPDB.",2015-05-01 +28057680,An informative approach on differential abundance analysis for time-course metagenomic sequencing data.,"

Motivation

The advent of high-throughput next generation sequencing technology has greatly promoted the field of metagenomics where previously unattainable information about microbial communities can be discovered. Detecting differentially abundant features (e.g. species or genes) plays a critical role in revealing the contributors (i.e. pathogens) to the biological or medical status of microbial samples. However, currently available statistical methods lack power in detecting differentially abundant features contrasting different biological or medical conditions, in particular, for time series metagenomic sequencing data. We have proposed a novel procedure, metaDprof, which is built upon a spline-based method assuming heterogeneous error, to meet the challenges of detecting differentially abundant features from metagenomic samples by comparing different biological/medical conditions across time. It contains two stages: (i) global detection on features and (ii) time interval detection for significant features. The detection procedures in both stages are based on sound statistical support.

Results

Compared with existing methods the new method metaDprof shows the best performance in comprehensive simulation studies. Not only can it accurately detect features relating to the biological condition or disease status of samples but it also can accurately detect the starting and ending time points when the differences arise. The proposed method is also applied to a real metagenomic dataset and the results provide an interesting angle to understand the relationship between the microbiota in mouse gut and diet type.

Availability and implementation

R code and an example dataset are available at https://cals.arizona.edu/∼anling/sbg/software.htm.

Contact

anling@email.arizona.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +28130774,Pre-Exposure Prophylaxis for HIV Prevention: Safety Concerns.,"Available evidence supports the efficacy of pre-exposure prophylaxis (PrEP) in decreasing the incidence of human immunodeficiency virus (HIV) infection among high-risk individuals, especially when used in combination with other behavioural preventive methods. Safety concerns about PrEP present challenges in the implementation and use of PrEP. The aim of this review is to discuss safety concerns observed in completed clinical trials on the use of PrEP. We performed a literature search on PrEP in PubMed, global advocacy for HIV prevention (Aids Vaccine Advocacy Coalition) database, clinical trials registry "" http://www.clinicaltrials.gov "" and scholar.google, using combination search terms 'pre-exposure prophylaxis', 'safety concerns in the use of pre-exposure prophylaxis', 'truvada use as PrEP', 'guidelines for PrEP use', 'HIV pre-exposure prophylaxis' and 'tenofovir' to identify clinical trials and literature on PrEP. We present findings associated with safety issues on the use of PrEP based on a review of 11 clinical trials on PrEP with results on safety and efficacy as at April 2016. We also reviewed findings from routine real-life practice reports. The pharmacological intervention for PrEP was tenofovir disoproxil fumarate/emtricitabine in a combined form as Truvada® or tenofovir as a single entity. Both products are efficacious for PrEP and seem to have a good safety profile. Regular monitoring is recommended to prevent long-term toxic effects. The main adverse effects observed with PrEP are gastrointestinal related; basically mild to moderate nausea, vomiting and diarrhea. Other adverse drug effects worth monitoring are liver enzymes, renal function and bone mineral density. PrEP as an intervention to reduce HIV transmission appears to have a safe benefit-risk profile in clinical trials. It is recommended for widespread use but adherence monitoring and real-world safety surveillance are critical in the post-marketing phase to ensure that the benefits observed in clinical trials are maintained in real-world use.",2017-04-01 +28062447,Cross-React: a new structural bioinformatics method for predicting allergen cross-reactivity.,"The phenomenon of cross-reactivity between allergenic proteins plays an important role to understand how the immune system recognizes different antigen proteins. Allergen proteins are known to cross-react if their sequence comparison shows a high sequence identity which also implies that the proteins have a similar 3D fold. In such cases, linear sequence alignment methods are frequently used to predict cross-reactivity between allergenic proteins. However, the prediction of cross-reactivity between distantly related allergens continues to be a challenging task. To overcome this problem, we developed a new structure-based computational method, Cross-React, to predict cross-reactivity between allergenic proteins available in the Structural Database of Allergens (SDAP). Our method is based on the hypothesis that we can find surface patches on 3D structures of potential allergens with amino acid compositions similar to an epitope in a known allergen. We applied the Cross-React method to a diverse set of seven allergens, and successfully identified several cross-reactive allergens with high to moderate sequence identity which have also been experimentally shown to cross-react. Based on these findings, we suggest that Cross-React can be used as a predictive tool to assess protein allergenicity and cross-reactivity.

Availability and implementation

: Cross-React is available at: http://curie.utmb.edu/Cross-React.html.

Contact

ssnegi@utmb.edu.",2017-04-01 +28165122,Conditional eQTL analysis reveals allelic heterogeneity of gene expression.,"In recent years, multiple eQTL (expression quantitative trait loci) catalogs have become available that can help understand the functionality of complex trait-related single nucleotide polymorphisms (SNPs). In eQTL catalogs, gene expression is often strongly associated with multiple SNPs, which may reflect either one or multiple independent associations. Conditional eQTL analysis allows a distinction between dependent and independent eQTLs. We performed conditional eQTL analysis in 4,896 peripheral blood microarray gene expression samples. Our analysis showed that 35% of genes with a cis eQTL have at least two independent cis eQTLs; for several genes up to 13 independent cis eQTLs were identified. Also, 12% (671) of the independent cis eQTLs identified in conditional analyses were not significant in unconditional analyses. The number of GWAS catalog SNPs identified as eQTL in the conditional analyses increases with 24% as compared to unconditional analyses. We provide an online conditional cis eQTL mapping catalog for whole blood (https://eqtl.onderzoek.io/), which can be used to lookup eQTLs more accurately than in standard unconditional whole blood eQTL databases.",2017-04-01 +27193158,ASL-LEX: A lexical database of American Sign Language.,"ASL-LEX is a lexical database that catalogues information about nearly 1,000 signs in American Sign Language (ASL). It includes the following information: subjective frequency ratings from 25-31 deaf signers, iconicity ratings from 21-37 hearing non-signers, videoclip duration, sign length (onset and offset), grammatical class, and whether the sign is initialized, a fingerspelled loan sign, or a compound. Information about English translations is available for a subset of signs (e.g., alternate translations, translation consistency). In addition, phonological properties (sign type, selected fingers, flexion, major and minor location, and movement) were coded and used to generate sub-lexical frequency and neighborhood density estimates. ASL-LEX is intended for use by researchers, educators, and students who are interested in the properties of the ASL lexicon. An interactive website where the database can be browsed and downloaded is available at http://asl-lex.org .",2017-04-01 +24607494,WT1 expression increases with malignancy and indicates unfavourable outcome in astrocytoma.,"

Aims

The zinc finger transcription factor WT1 is expressed in astrocytic neoplasms and therefore is a potential target of immunotherapy in brain tumours. Our aim was to further elucidate the role of WT1 as a diagnostic and prognostic marker in neuropathology, particularly as to the differentiation of astrocytoma from oligodendroglioma as well as to the dependency of WT1 expression on clinically relevant parameters.

Methods

829 evaluable brain tumour samples were investigated by WT1 immunohistochemistry on full tissue routine slides, consisting of 442 glioblastomas, 303 astrocytomas, 41 oligodendrogliomas and 43 oligoastrocytomas. In addition public WT1 gene expression data of 351 gliomas were analysed.

Results

Our data show that WT1 expression in diffuse astrocytic tumours increases with WHO tumour grade and is associated with older age, absence of IDH1 mutation but not related to O(6)- methyl guanine methyl transferase (MGMT) promoter methylation status. Univariable, but not multivariable survival analysis indicates that WT1 expression is associated with worse outcome in patients with diffuse astrocytoma but not glioblastoma.

Conclusions

The significant WT1 expression differences between diffuse astrocytomas, oligoastrocytomas and oligodendrogliomas, which are also present in the Repository for Molecular Brain Neoplasia Data, National Cancer Institute (REMBRANDT, 2005, http://rembrandt.nci.nih.gov) gene database set, provide a rationale for use of WT1 as part of a routine immunohistochemistry panel.",2014-03-07 +29273594,Effect of Smoking and Folate Levels on the Efficacy of Folic Acid Therapy in Prevention of Stroke in Hypertensive Men.,"We aimed to examine whether the efficacy of folic acid therapy in the primary prevention of stroke is jointly affected by smoking status and baseline folate levels in a male population in a post hoc analysis of the CSPPT (China Stroke Primary Prevention Trial).Eligible participants of the CSPPT were randomly assigned to a double-blind daily treatment of a combined enalapril 10-mg and folic acid 0.8-mg tablet or an enalapril 10-mg tablet alone. In total, 8384 male participants of the CSPPT were included in the current analyses. The primary outcome was first stroke.The median treatment duration was 4.5 years. In the enalapril-alone group, the first stroke risk varied by baseline folate levels and smoking status (never versus ever). Specifically, there was an inverse association between folate levels and first stroke in never smokers (P for linear trend=0.043). However, no such association was found in ever smokers. A test for interaction between baseline folate levels and smoking status on first stroke was significant (P=0.045). In the total sample, folic acid therapy significantly reduced the risk of first stroke in never smokers with folate deficiency (hazard risk, 0.36; 95% confidence interval, 0.16-0.83) and in ever smokers with normal folate levels (hazard risk, 0.69; 95% confidence interval, 0.48-0.99).Baseline folate levels and smoking status can interactively affect the risk of first stroke. Our data suggest that compared with never smokers, ever smokers may require a higher dosage of folic acid to achieve a greater beneficial effect on stroke. Our findings need to be confirmed by future randomized trials.URL: https://www.clinicaltrials.gov. Unique identifier: NCT00794885.",2018-01-01 +28130236,H(O)TA: estimation of DNA methylation and hydroxylation levels and efficiencies from time course data.,"

Motivation

Methylation and hydroxylation of cytosines to form 5-methylcytosine (5mC) and 5-hydroxymethylcytosine (5hmC) belong to the most important epigenetic modifications and their vital role in the regulation of gene expression has been widely recognized. Recent experimental techniques allow to infer methylation and hydroxylation levels at CpG dinucleotides but require a sophisticated statistical analysis to achieve accurate estimates.

Results

We present H(O)TA, a software tool based on a stochastic modeling approach, which simultaneously analyzes time course data from hairpin bisulfite sequencing and hairpin oxidative bisulfite sequencing.

Availability and implementation

: https://mosi.uni-saarland.de/HOTA.

Contact

charalampos.kyriakopoulos@uni-saarland.de or verena.wolf@uni-saarland.de.",2017-06-01 +24857969,HeteroGenome: database of genome periodicity. ,"We present the first release of the HeteroGenome database collecting latent periodicity regions in genomes. Tandem repeats and highly divergent tandem repeats along with the regions of a new type of periodicity, known as profile periodicity, have been collected for the genomes of Saccharomyces cerevisiae, Arabidopsis thaliana, Caenorhabditis elegans and Drosophila melanogaster. We obtained data with the aid of a spectral-statistical approach to search for reliable latent periodicity regions (with periods up to 2000 bp) in DNA sequences. The original two-level mode of data presentation (a broad view of the region of latent periodicity and a second level indicating conservative fragments of its structure) was further developed to enable us to obtain the estimate, without redundancy, that latent periodicity regions make up ~10% of the analyzed genomes. Analysis of the quantitative and qualitative content of located periodicity regions on all chromosomes of the analyzed organisms revealed dominant characteristic types of periodicity in the genomes. The pattern of density distribution of latent periodicity regions on chromosome unambiguously characterizes each chromosome in genome. Database URL: http://www.jcbi.ru/lp_baze/",2014-05-24 +22102576,The MetaCyc database of metabolic pathways and enzymes and the BioCyc collection of pathway/genome databases.,"The MetaCyc database (http://metacyc.org/) provides a comprehensive and freely accessible resource for metabolic pathways and enzymes from all domains of life. The pathways in MetaCyc are experimentally determined, small-molecule metabolic pathways and are curated from the primary scientific literature. MetaCyc contains more than 1800 pathways derived from more than 30,000 publications, and is the largest curated collection of metabolic pathways currently available. Most reactions in MetaCyc pathways are linked to one or more well-characterized enzymes, and both pathways and enzymes are annotated with reviews, evidence codes and literature citations. BioCyc (http://biocyc.org/) is a collection of more than 1700 organism-specific Pathway/Genome Databases (PGDBs). Each BioCyc PGDB contains the full genome and predicted metabolic network of one organism. The network, which is predicted by the Pathway Tools software using MetaCyc as a reference database, consists of metabolites, enzymes, reactions and metabolic pathways. BioCyc PGDBs contain additional features, including predicted operons, transport systems and pathway-hole fillers. The BioCyc website and Pathway Tools software offer many tools for querying and analysis of PGDBs, including Omics Viewers and comparative analysis. New developments include a zoomable web interface for diagrams; flux-balance analysis model generation from PGDBs; web services; and a new tool called Web Groups.",2011-11-18 +25336203,Census-based rapid and accurate metagenome taxonomic profiling.,"

Background

Understanding the taxonomic composition of a sample, whether from patient, food or environment, is important to several types of studies including pathogen diagnostics, epidemiological studies, biodiversity analysis and food quality regulation. With the decreasing costs of sequencing, metagenomic data is quickly becoming the preferred typed of data for such analysis.

Results

Rapidly defining the taxonomic composition (both taxonomic profile and relative frequency) in a metagenomic sequence dataset is challenging because the task of mapping millions of sequence reads from a metagenomic study to a non-redundant nucleotide database such as the NCBI non-redundant nucleotide database (nt) is a computationally intensive task. We have developed a robust subsampling-based algorithm implemented in a tool called CensuScope meant to take a 'sneak peak' into the population distribution and estimate taxonomic composition as if a census was taken of the metagenomic landscape. CensuScope is a rapid and accurate metagenome taxonomic profiling tool that randomly extracts a small number of reads (based on user input) and maps them to NCBI's nt database. This process is repeated multiple times to ascertain the taxonomic composition that is found in majority of the iterations, thereby providing a robust estimate of the population and measures of the accuracy for the results.

Conclusion

CensuScope can be run on a laptop or on a high-performance computer. Based on our analysis we are able to provide some recommendations in terms of the number of sequence reads to analyze and the number of iterations to use. For example, to quantify taxonomic groups present in the sample at a level of 1% or higher a subsampling size of 250 random reads with 50 iterations yields a statistical power of >99%. Windows and UNIX versions of CensuScope are available for download at https://hive.biochemistry.gwu.edu/dna.cgi?cmd=censuscope. CensuScope is also available through the High-performance Integrated Virtual Environment (HIVE) and can be used in conjunction with other HIVE analysis and visualization tools.",2014-10-21 +28413782,APMicroDB: A microsatellite database of Acyrthosiphon pisum.,"Pea aphids represent a complex genetic system that could be used for QTL analysis, genetic diversity and population genetics studies. Here, we described the development of first microsatellite repeat database of the pea aphid (APMicroDB), accessible at ""http://deepaklab.com/aphidmicrodb"". We identified 3,40,233 SSRs using MIcroSAtellite (MISA) tool that was distributed in 14,067 (out of 23,924) scaffold of the pea aphid. We observed 89.53% simple repeats of which 73.41% were mono-nucleotide, followed by di-nucleotide repeats. This database stored information about the repeats kind, GC content, motif type (mono - hexa), genomic location etc. We have also incorporated the primer information derived from Primer3 software of the 2504 bp flanking region of the identified marker. Blast tool is also provided for searching the user query sequence for identified marker and their primers. This work has an immense use for scientific community working in the field of agricultural pest management, QTL mapping, and host-pathogen interaction analysis.",2017-03-30 +24160861,ReactionMap: an efficient atom-mapping algorithm for chemical reactions.,"Large databases of chemical reactions provide new data-mining opportunities and challenges. Key challenges result from the imperfect quality of the data and the fact that many of these reactions are not properly balanced or atom-mapped. Here, we describe ReactionMap, an efficient atom-mapping algorithm. Our approach uses a combination of maximum common chemical subgraph search and minimization of an assignment cost function derived empirically from training data. We use a set of over 259,000 balanced atom-mapped reactions from the SPRESI commercial database to train the system, and we validate it on random sets of 1000 and 17,996 reactions sampled from this pool. These large test sets represent a broad range of chemical reaction types, and ReactionMap correctly maps about 99% of the atoms and about 96% of the reactions, with a mean time per mapping of 2 s. Most correctly mapped reactions are mapped with high confidence. Mapping accuracy compares favorably with ChemAxon's AutoMapper, versions 5 and 6.1, and the DREAM Web tool. These approaches correctly map 60.7%, 86.5%, and 90.3% of the reactions, respectively, on the same data set. A ReactionMap server is available on the ChemDB Web portal at http://cdb.ics.uci.edu .",2013-11-11 +25962468,Laminin L4 domain structure resembles adhesion modules in ephrin receptor and other transmembrane glycoproteins.,"

Unlabelled

The ~ 800 kDa laminin heterotrimer forms a distinctive cross-shaped structure that further self-assembles into networks within the extracellular matrix. The domains at the laminin chain termini, which engage in network formation and cell-surface interaction, are well understood both structurally and functionally. By contrast, the structures and roles of additional domains embedded within the limbs of the laminin cross have remained obscure. Here, we report the X-ray crystal structure, determined to 1.2 Å resolution, of the human laminin α2 subunit L4b domain, site of an inframe deletion mutation associated with mild congenital muscular dystrophy. The α2 L4b domain is an irregular β-sandwich with many short and broken strands linked by extended loops. The most similar known structures are the carbohydrate-binding domains of bacterial cellulases, the ephrin-binding domain of ephrin receptors, and MAM adhesion domains in various other eukaryotic cell-surface proteins. This similarity to mammalian adhesion modules, which was not predicted on the basis of amino acid sequence alone due to lack of detectable homology, suggests that laminin internal domains evolved from a progenitor adhesion molecule and may retain a role in cell adhesion in the context of the laminin trimer.

Database

The atomic coordinates and structure factors have been deposited in the Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers University, New Brunswick, NJ, USA (http://www.rcsb.org/) under codes 4YEP and 4YEQ.",2015-05-28 +29365031,Unravelling triple-negative breast cancer molecular heterogeneity using an integrative multiomic analysis.,"

Background

Recent efforts of genome-wide gene expression profiling analyses have improved our understanding of the biological complexity and diversity of triple-negative breast cancers (TNBCs) reporting, at least six different molecular subtypes of TNBC namely Basal-like 1 (BL1), basal-like 2 (BL2), immunomodulatory (IM), mesenchymal (M), mesenchymal stem-like (MSL) and luminal androgen receptor (LAR). However, little is known regarding the potential driving molecular events within each subtype, their difference in survival and response to therapy. Further insight into the underlying genomic alterations is therefore needed.

Patients and methods

This study was carried out using copy-number aberrations, somatic mutations and gene expression data derived from the Molecular Taxonomy of Breast Cancer International Consortium (METABRIC) and The Cancer Genome Atlas. TNBC samples (n = 550) were classified according to Lehmann's molecular subtypes using the TNBCtype online subtyping tool (http://cbc.mc.vanderbilt.edu/tnbc/).

Results

Each subtype showed significant clinic-pathological characteristic differences. Using a multivariate model, IM subtype showed to be associated with a better prognosis (HR = 0.68; CI = 0.46-0.99; P = 0.043) whereas LAR subtype was associated with a worst prognosis (HR = 1.47; CI = 1.0-2.14; P = 0.046). BL1 subtype was found to be most genomically instable subtype with high TP53 mutation (92%) and copy-number deletion in genes involved in DNA repair mechanism (BRCA2, MDM2, PTEN, RB1 and TP53). LAR tumours were associated with higher mutational burden with significantly enriched mutations in PI3KCA (55%), AKT1 (13%) and CDH1 (13%) genes. M and MSL subtypes were associated with higher signature score for angiogenesis. Finally, IM showed high expression levels of immune signatures and check-point inhibitor genes such as PD1, PDL1 and CTLA4.

Conclusion

Our findings highlight for the first time the substantial genomic heterogeneity that characterize TNBC molecular subtypes, allowing for a better understanding of the disease biology as well as the identification of several candidate targets paving novel approaches for the development of anticancer therapeutics for TNBC.",2018-04-01 +27381342,SIFORM: shared informative factor models for integration of multi-platform bioinformatic data.,"

Motivation

High-dimensional omic data derived from different technological platforms have been extensively used to facilitate comprehensive understanding of disease mechanisms and to determine personalized health treatments. Numerous studies have integrated multi-platform omic data; however, few have efficiently and simultaneously addressed the problems that arise from high dimensionality and complex correlations.

Results

We propose a statistical framework of shared informative factor models that can jointly analyze multi-platform omic data and explore their associations with a disease phenotype. The common disease-associated sample characteristics across different data types can be captured through the shared structure space, while the corresponding weights of genetic variables directly index the strengths of their association with the phenotype. Extensive simulation studies demonstrate the performance of the proposed method in terms of biomarker detection accuracy via comparisons with three popular regularized regression methods. We also apply the proposed method to The Cancer Genome Atlas lung adenocarcinoma dataset to jointly explore associations of mRNA expression and protein expression with smoking status. Many of the identified biomarkers belong to key pathways for lung tumorigenesis, some of which are known to show differential expression across smoking levels. We discover potential biomarkers that reveal different mechanisms of lung tumorigenesis between light smokers and heavy smokers.

Availability and implementation

R code to implement the new method can be downloaded from http://odin.mdacc.tmc.edu/jhhu/ CONTACT: jhu@mdanderson.org.",2016-07-05 +28367366,Automatic single- and multi-label enzymatic function prediction by machine learning.,"The number of protein structures in the PDB database has been increasing more than 15-fold since 1999. The creation of computational models predicting enzymatic function is of major importance since such models provide the means to better understand the behavior of newly discovered enzymes when catalyzing chemical reactions. Until now, single-label classification has been widely performed for predicting enzymatic function limiting the application to enzymes performing unique reactions and introducing errors when multi-functional enzymes are examined. Indeed, some enzymes may be performing different reactions and can hence be directly associated with multiple enzymatic functions. In the present work, we propose a multi-label enzymatic function classification scheme that combines structural and amino acid sequence information. We investigate two fusion approaches (in the feature level and decision level) and assess the methodology for general enzymatic function prediction indicated by the first digit of the enzyme commission (EC) code (six main classes) on 40,034 enzymes from the PDB database. The proposed single-label and multi-label models predict correctly the actual functional activities in 97.8% and 95.5% (based on Hamming-loss) of the cases, respectively. Also the multi-label model predicts all possible enzymatic reactions in 85.4% of the multi-labeled enzymes when the number of reactions is unknown. Code and datasets are available at https://figshare.com/s/a63e0bafa9b71fc7cbd7.",2017-03-29 +23993619,Review of availability of food composition data for fish and shellfish.,"The FAO/INFOODS database on fish and shellfish (aFiSh) is a collection of analytical data from primary sources and holds values for 2,277 entries on raw and processed food with sufficient quality. Most data were entered on fatty acids (60%), followed by macronutrients and their fractions (16%), minerals (10%), amino acids (7%), (pro)vitamins (2%), heavy metals (2%) and other components (3%). Information on several factors that contribute to the variation of compositional data (e.g., biodiversity, catch season, habitat, size and part of fish/shellfish analysed) as well as the bibliographic references are presented alongside with each food entry. The data were published in the FAO/INFOODS Food Composition Database for Biodiversity (BioFoodComp2.0) and in the FAO/INFOODS Analytical Food Composition Database (AnFooD1.0), freely available at the INFOODS webpage http://www.fao.org/infoods/biodiversity/index_en.stm. The provision of easy accessible, analytical compositional data should be seen as stimulation for researchers and compilers to incorporate more analytical and detailed data of fish and shellfish into future food composition tables and databases and to improve dietary assessment tools.",2013-07-11 +29097776,MiSNPDb: a web-based genomic resources of tropical ecology fruit mango (Mangifera indica L.) for phylogeography and varietal differentiation.,"Mango is one of the most important fruits of tropical ecological region of the world, well known for its nutritive value, aroma and taste. Its world production is >45MT worth >200 billion US dollars. Genomic resources are required for improvement in productivity and management of mango germplasm. There is no web-based genomic resources available for mango. Hence rapid and cost-effective high throughput putative marker discovery is required to develop such resources. RAD-based marker discovery can cater this urgent need till whole genome sequence of mango becomes available. Using a panel of 84 mango varieties, a total of 28.6 Gb data was generated by ddRAD-Seq approach on Illumina HiSeq 2000 platform. A total of 1.25 million SNPs were discovered. Phylogenetic tree using 749 common SNPs across these varieties revealed three major lineages which was compared with geographical locations. A web genomic resources MiSNPDb, available at http://webtom.cabgrid.res.in/mangosnps/ is based on 3-tier architecture, developed using PHP, MySQL and Javascript. This web genomic resources can be of immense use in the development of high density linkage map, QTL discovery, varietal differentiation, traceability, genome finishing and SNP chip development for future GWAS in genomic selection program. We report here world's first web-based genomic resources for genetic improvement and germplasm management of mango.",2017-11-02 +28886600,Outdoor Light at Night and Breast Cancer Incidence in the Nurses' Health Study II.,"

Background

Animal and epidemiologic studies suggest that exposure to light at night (LAN) may disrupt circadian patterns and decrease nocturnal secretion of melatonin, which may disturb estrogen regulation, leading to increased breast cancer risk.

Objectives

We examined the association between residential outdoor LAN and breast cancer incidence using data from the nationwide U.S.-based Nurses' Health Study II cohort.

Methods

We followed 109,672 women from 1989 through 2013. Cumulative LAN exposure was estimated using time-varying satellite data for a composite of persistent nighttime illumination at ∼1 km2 scale for each residence during follow-up. Incident invasive breast cancer cases were confirmed by medical record review. We used Cox proportional hazard models to calculate hazard ratios (HRs) and 95% confidence intervals (CIs), adjusting for anthropometric, reproductive, lifestyle, and socioeconomic risk factors.

Results

Over 2,187,425 person-years, we identified 3,549 incident breast cancer cases. Based on a fully adjusted model, the estimated HR for incident breast cancer with an interquartile range (IQR) (31.6 nW/cm2/sr) increase in cumulative average outdoor LAN was 1.05 (95% CI: 1.00, 1.11). An association between LAN and breast cancer appeared to be limited to women who were premenopausal at the time of a case [HR=1.07 (95% CI: 1.01, 1.14) based on 1,973 cases vs. HR=1.00 (95% CI: 0.91, 1.09) based on 1,172 cases in postmenopausal women; p-interaction=0.08]. The LAN-breast cancer association was observed only in past and current smokers at the end of follow-up [HR=1.00 (95% CI: 0.94, 1.07) based on 2,215 cases in never smokers; HR=1.10 (95% CI: 1.01, 1.19) based on 1,034 cases in past smokers vs. HR=1.21 (95% CI: 1.07, 1.37) for 300 cases in current smokers; p-interaction=0.08].

Conclusions

Although further work is required to confirm our results and to clarify potential mechanisms, our findings suggest that exposure to residential outdoor light at night may contribute to invasive breast cancer risk. https://doi.org/10.1289/EHP935.",2017-08-17 +27559195,Clinical and Business Intelligence: Why It's Important to Your Pharmacy.,"According to the Healthcare Information Management and Systems Society, ""Clinical & Business Intelligence (C&BI) is the use and analysis of data captured in the healthcare setting to directly inform decision-making"" (http://www.himss.org/library/clinical-business-intelligence). Some say that it is the right information given to the right person at the right time in the right way. No matter how you define it, the fact remains that timely access, synthesis, and visualization of clinical data have become key to how health professionals make patient care decisions and improve care delivery.",2016-07-01 +26395080,PDON: Parkinson's disease ontology for representation and modeling of the Parkinson's disease knowledge domain.,"

Background

Despite the unprecedented and increasing amount of data, relatively little progress has been made in molecular characterization of mechanisms underlying Parkinson's disease. In the area of Parkinson's research, there is a pressing need to integrate various pieces of information into a meaningful context of presumed disease mechanism(s). Disease ontologies provide a novel means for organizing, integrating, and standardizing the knowledge domains specific to disease in a compact, formalized and computer-readable form and serve as a reference for knowledge exchange or systems modeling of disease mechanism.

Methods

The Parkinson's disease ontology was built according to the life cycle of ontology building. Structural, functional, and expert evaluation of the ontology was performed to ensure the quality and usability of the ontology. A novelty metric has been introduced to measure the gain of new knowledge using the ontology. Finally, a cause-and-effect model was built around PINK1 and two gene expression studies from the Gene Expression Omnibus database were re-annotated to demonstrate the usability of the ontology.

Results

The Parkinson's disease ontology with a subclass-based taxonomic hierarchy covers the broad spectrum of major biomedical concepts from molecular to clinical features of the disease, and also reflects different views on disease features held by molecular biologists, clinicians and drug developers. The current version of the ontology contains 632 concepts, which are organized under nine views. The structural evaluation showed the balanced dispersion of concept classes throughout the ontology. The functional evaluation demonstrated that the ontology-driven literature search could gain novel knowledge not present in the reference Parkinson's knowledge map. The ontology was able to answer specific questions related to Parkinson's when evaluated by experts. Finally, the added value of the Parkinson's disease ontology is demonstrated by ontology-driven modeling of PINK1 and re-annotation of gene expression datasets relevant to Parkinson's disease.

Conclusions

Parkinson's disease ontology delivers the knowledge domain of Parkinson's disease in a compact, computer-readable form, which can be further edited and enriched by the scientific community and also to be used to construct, represent and automatically extend Parkinson's-related computable models. A practical version of the Parkinson's disease ontology for browsing and editing can be publicly accessed at http://bioportal.bioontology.org/ontologies/PDON .",2015-09-22 +28327957,NanoSim: nanopore sequence read simulator based on statistical characterization.,"

Background

The MinION sequencing instrument from Oxford Nanopore Technologies (ONT) produces long read lengths from single-molecule sequencing - valuable features for detailed genome characterization. To realize the potential of this platform, a number of groups are developing bioinformatics tools tuned for the unique characteristics of its data. We note that these development efforts would benefit from a simulator software, the output of which could be used to benchmark analysis tools.

Results

Here, we introduce NanoSim, a fast and scalable read simulator that captures the technology-specific features of ONT data and allows for adjustments upon improvement of nanopore sequencing technology. The first step of NanoSim is read characterization, which provides a comprehensive alignment-based analysis and generates a set of read profiles serving as the input to the next step, the simulation stage. The simulation stage uses the model built in the previous step to produce in silico reads for a given reference genome. NanoSim is written in Python and R. The source files and manual are available at the Genome Sciences Centre website: http://www.bcgsc.ca/platform/bioinfo/software/nanosim.

Conclusion

In this work, we model the base-calling errors of ONT reads to inform the simulation of sequences with similar characteristics. We showcase the performance of NanoSim on publicly available datasets generated using the R7 and R7.3 chemistries and different sequencing kits and compare the resulting synthetic reads to those of other long-sequence simulators and experimental ONT reads. We expect NanoSim to have an enabling role in the field and benefit the development of scalable next-generation sequencing technologies for the long nanopore reads, including genome assembly, mutation detection, and even metagenomic analysis software.",2017-04-01 +26476368,Metadata management for high content screening in OMERO.,"High content screening (HCS) experiments create a classic data management challenge-multiple, large sets of heterogeneous structured and unstructured data, that must be integrated and linked to produce a set of ""final"" results. These different data include images, reagents, protocols, analytic output, and phenotypes, all of which must be stored, linked and made accessible for users, scientists, collaborators and where appropriate the wider community. The OME Consortium has built several open source tools for managing, linking and sharing these different types of data. The OME Data Model is a metadata specification that supports the image data and metadata recorded in HCS experiments. Bio-Formats is a Java library that reads recorded image data and metadata and includes support for several HCS screening systems. OMERO is an enterprise data management application that integrates image data, experimental and analytic metadata and makes them accessible for visualization, mining, sharing and downstream analysis. We discuss how Bio-Formats and OMERO handle these different data types, and how they can be used to integrate, link and share HCS experiments in facilities and public data repositories. OME specifications and software are open source and are available at https://www.openmicroscopy.org.",2015-10-22 +24580755,"CDSbank: taxonomy-aware extraction, selection, renaming and formatting of protein-coding DNA or amino acid sequences.","

Background

Protein-coding DNA sequences and their corresponding amino acid sequences are routinely used to study relationships between sequence, structure, function, and evolution. The rapidly growing size of sequence databases increases the power of such comparative analyses but it makes it more challenging to prepare high quality sequence data sets with control over redundancy, quality, completeness, formatting, and labeling. Software tools for some individual steps in this process exist but manual intervention remains a common and time consuming necessity.

Description

CDSbank is a database that stores both the protein-coding DNA sequence (CDS) and amino acid sequence for each protein annotated in Genbank. CDSbank also stores Genbank feature annotation, a flag to indicate incomplete 5' and 3' ends, full taxonomic data, and a heuristic to rank the scientific interest of each species. This rich information allows fully automated data set preparation with a level of sophistication that aims to meet or exceed manual processing. Defaults ensure ease of use for typical scenarios while allowing great flexibility when needed. Access is via a free web server at http://hazeslab.med.ualberta.ca/CDSbank/.

Conclusions

CDSbank presents a user-friendly web server to download, filter, format, and name large sequence data sets. Common usage scenarios can be accessed via pre-programmed default choices, while optional sections give full control over the processing pipeline. Particular strengths are: extract protein-coding DNA sequences just as easily as amino acid sequences, full access to taxonomy for labeling and filtering, awareness of incomplete sequences, and the ability to take one protein sequence and extract all synonymous CDS or identical protein sequences in other species. Finally, CDSbank can also create labeled property files to, for instance, annotate or re-label phylogenetic trees.",2014-02-28 +28011772,Controlling the joint local false discovery rate is more powerful than meta-analysis methods in joint analysis of summary statistics from multiple genome-wide association studies.,"

Motivation

In genome-wide association studies (GWASs) of common diseases/traits, we often analyze multiple GWASs with the same phenotype together to discover associated genetic variants with higher power. Since it is difficult to access data with detailed individual measurements, summary-statistics-based meta-analysis methods have become popular to jointly analyze datasets from multiple GWASs.

Results

In this paper, we propose a novel summary-statistics-based joint analysis method based on controlling the joint local false discovery rate (Jlfdr). We prove that our method is the most powerful summary-statistics-based joint analysis method when controlling the false discovery rate at a certain level. In particular, the Jlfdr-based method achieves higher power than commonly used meta-analysis methods when analyzing heterogeneous datasets from multiple GWASs. Simulation experiments demonstrate the superior power of our method over meta-analysis methods. Also, our method discovers more associations than meta-analysis methods from empirical datasets of four phenotypes.

Availability and implementation

The R-package is available at: http://bioinformatics.ust.hk/Jlfdr.html .

Contact

eeyu@ust.hk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +28961754,LLR: a latent low-rank approach to colocalizing genetic risk variants in multiple GWAS.,"

Motivation

Genome-wide association studies (GWAS), which genotype millions of single nucleotide polymorphisms (SNPs) in thousands of individuals, are widely used to identify the risk SNPs underlying complex human phenotypes (quantitative traits or diseases). Most conventional statistical methods in GWAS only investigate one phenotype at a time. However, an increasing number of reports suggest the ubiquity of pleiotropy, i.e. many complex phenotypes sharing common genetic bases. This motivated us to leverage pleiotropy to develop new statistical approaches to joint analysis of multiple GWAS.

Results

In this study, we propose a latent low-rank (LLR) approach to colocalizing genetic risk variants using summary statistics. In the presence of pleiotropy, there exist risk loci that affect multiple phenotypes. To leverage pleiotropy, we introduce a low-rank structure to modulate the probabilities of the latent association statuses between loci and phenotypes. Regarding the computational efficiency of LLR, a novel expectation-maximization-path (EM-path) algorithm has been developed to greatly reduce the computational cost and facilitate model selection and inference. We demonstrate the advantages of LLR over competing approaches through simulation studies and joint analysis of 18 GWAS datasets.

Availability and implementation

The LLR software is available on https://sites.google.com/site/liujin810822.

Contact

macyang@ust.hk.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +25855811,SANSparallel: interactive homology search against Uniprot.,"Proteins evolve by mutations and natural selection. The network of sequence similarities is a rich source for mining homologous relationships that inform on protein structure and function. There are many servers available to browse the network of homology relationships but one has to wait up to a minute for results. The SANSparallel webserver provides protein sequence database searches with immediate response and professional alignment visualization by third-party software. The output is a list, pairwise alignment or stacked alignment of sequence-similar proteins from Uniprot, UniRef90/50, Swissprot or Protein Data Bank. The stacked alignments are viewed in Jalview or as sequence logos. The database search uses the suffix array neighborhood search (SANS) method, which has been re-implemented as a client-server, improved and parallelized. The method is extremely fast and as sensitive as BLAST above 50% sequence identity. Benchmarks show that the method is highly competitive compared to previously published fast database search programs: UBLAST, DIAMOND, LAST, LAMBDA, RAPSEARCH2 and BLAT. The web server can be accessed interactively or programmatically at http://ekhidna2.biocenter.helsinki.fi/cgi-bin/sans/sans.cgi. It can be used to make protein functional annotation pipelines more efficient, and it is useful in interactive exploration of the detailed evidence supporting the annotation of particular proteins of interest.",2015-04-08 +29036531,SPATKIN: a simulator for rule-based modeling of biomolecular site dynamics on surfaces.,"

Summary

Rule-based modeling is a powerful approach for studying biomolecular site dynamics. Here, we present SPATKIN, a general-purpose simulator for rule-based modeling in two spatial dimensions. The simulation algorithm is a lattice-based method that tracks Brownian motion of individual molecules and the stochastic firing of rule-defined reaction events. Because rules are used as event generators, the algorithm is network-free, meaning that it does not require to generate the complete reaction network implied by rules prior to simulation. In a simulation, each molecule (or complex of molecules) is taken to occupy a single lattice site that cannot be shared with another molecule (or complex). SPATKIN is capable of simulating a wide array of membrane-associated processes, including adsorption, desorption and crowding. Models are specified using an extension of the BioNetGen language, which allows to account for spatial features of the simulated process.

Availability and implementation

The C ++ source code for SPATKIN is distributed freely under the terms of the GNU GPLv3 license. The source code can be compiled for execution on popular platforms (Windows, Mac and Linux). An installer for 64-bit Windows and a macOS app are available. The source code and precompiled binaries are available at the SPATKIN Web site (http://pmbm.ippt.pan.pl/software/spatkin).

Contact

spatkin.simulator@gmail.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +28066963,Phenotiki: an open software and hardware platform for affordable and easy image-based phenotyping of rosette-shaped plants.,"Phenotyping is important to understand plant biology, but current solutions are costly, not versatile or are difficult to deploy. To solve this problem, we present Phenotiki, an affordable system for plant phenotyping that, relying on off-the-shelf parts, provides an easy to install and maintain platform, offering an out-of-box experience for a well-established phenotyping need: imaging rosette-shaped plants. The accompanying software (with available source code) processes data originating from our device seamlessly and automatically. Our software relies on machine learning to devise robust algorithms, and includes an automated leaf count obtained from 2D images without the need of depth (3D). Our affordable device (~€200) can be deployed in growth chambers or greenhouse to acquire optical 2D images of approximately up to 60 adult Arabidopsis rosettes concurrently. Data from the device are processed remotely on a workstation or via a cloud application (based on CyVerse). In this paper, we present a proof-of-concept validation experiment on top-view images of 24 Arabidopsis plants in a combination of genotypes that has not been compared previously. Phenotypic analysis with respect to morphology, growth, color and leaf count has not been performed comprehensively before now. We confirm the findings of others on some of the extracted traits, showing that we can phenotype at reduced cost. We also perform extensive validations with external measurements and with higher fidelity equipment, and find no loss in statistical accuracy when we use the affordable setting that we propose. Device set-up instructions and analysis software are publicly available ( http://phenotiki.com).",2017-03-02 +28961785,FIRE: functional inference of genetic variants that regulate gene expression.,"

Motivation

Interpreting genetic variation in noncoding regions of the genome is an important challenge for personal genome analysis. One mechanism by which noncoding single nucleotide variants (SNVs) influence downstream phenotypes is through the regulation of gene expression. Methods to predict whether or not individual SNVs are likely to regulate gene expression would aid interpretation of variants of unknown significance identified in whole-genome sequencing studies.

Results

We developed FIRE (Functional Inference of Regulators of Expression), a tool to score both noncoding and coding SNVs based on their potential to regulate the expression levels of nearby genes. FIRE consists of 23 random forests trained to recognize SNVs in cis-expression quantitative trait loci (cis-eQTLs) using a set of 92 genomic annotations as predictive features. FIRE scores discriminate cis-eQTL SNVs from non-eQTL SNVs in the training set with a cross-validated area under the receiver operating characteristic curve (AUC) of 0.807, and discriminate cis-eQTL SNVs shared across six populations of different ancestry from non-eQTL SNVs with an AUC of 0.939. FIRE scores are also predictive of cis-eQTL SNVs across a variety of tissue types.

Availability and implementation

FIRE scores for genome-wide SNVs in hg19/GRCh37 are available for download at https://sites.google.com/site/fireregulatoryvariation/.

Contact

nilah@stanford.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-12-01 +28113912,A Heterogeneous Network Based Method for Identifying GBM-Related Genes by Integrating Multi-Dimensional Data.,"The emergence of multi-dimensional data offers opportunities for more comprehensive analysis of the molecular characteristics of human diseases and therefore improving diagnosis, treatment, and prevention. In this study, we proposed a heterogeneous network based method by integrating multi-dimensional data (HNMD) to identify GBM-related genes. The novelty of the method lies in that the multi-dimensional data of GBM from TCGA dataset that provide comprehensive information of genes, are combined with protein-protein interactions to construct a weighted heterogeneous network, which reflects both the general and disease-specific relationships between genes. In addition, a propagation algorithm with resistance is introduced to precisely score and rank GBM-related genes. The results of comprehensive performance evaluation show that the proposed method significantly outperforms the network based methods with single-dimensional data and other existing approaches. Subsequent analysis of the top ranked genes suggests they may be functionally implicated in GBM, which further corroborates the superiority of the proposed method. The source code and the results of HNMD can be downloaded from the following URL: http://bioinformatics.ustc.edu.cn/hnmd/ .",2016-04-20 +29036270,On expert curation and scalability: UniProtKB/Swiss-Prot as a case study.,"

Motivation

Biological knowledgebases, such as UniProtKB/Swiss-Prot, constitute an essential component of daily scientific research by offering distilled, summarized and computable knowledge extracted from the literature by expert curators. While knowledgebases play an increasingly important role in the scientific community, their ability to keep up with the growth of biomedical literature is under scrutiny. Using UniProtKB/Swiss-Prot as a case study, we address this concern via multiple literature triage approaches.

Results

With the assistance of the PubTator text-mining tool, we tagged more than 10 000 articles to assess the ratio of papers relevant for curation. We first show that curators read and evaluate many more papers than they curate, and that measuring the number of curated publications is insufficient to provide a complete picture as demonstrated by the fact that 8000-10 000 papers are curated in UniProt each year while curators evaluate 50 000-70 000 papers per year. We show that 90% of the papers in PubMed are out of the scope of UniProt, that a maximum of 2-3% of the papers indexed in PubMed each year are relevant for UniProt curation, and that, despite appearances, expert curation in UniProt is scalable.

Availability and implementation

UniProt is freely available at http://www.uniprot.org/.

Contact

sylvain.poux@sib.swiss.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +25910695,Inferring data-specific micro-RNA function through the joint ranking of micro-RNA and pathways from matched micro-RNA and gene expression data.,"

Motivation

In practice, identifying and interpreting the functional impacts of the regulatory relationships between micro-RNA and messenger-RNA is non-trivial. The sheer scale of possible micro-RNA and messenger-RNA interactions can make the interpretation of results difficult.

Results

We propose a supervised framework, pMim, built upon concepts of significance combination, for jointly ranking regulatory micro-RNA and their potential functional impacts with respect to a condition of interest. Here, pMim directly tests if a micro-RNA is differentially expressed and if its predicted targets, which lie in a common biological pathway, have changed in the opposite direction. We leverage the information within existing micro-RNA target and pathway databases to stabilize the estimation and annotation of micro-RNA regulation making our approach suitable for datasets with small sample sizes. In addition to outputting meaningful and interpretable results, we demonstrate in a variety of datasets that the micro-RNA identified by pMim, in comparison to simpler existing approaches, are also more concordant with what is described in the literature.

Availability and implementation

This framework is implemented as an R function, pMim, in the package sydSeq available from http://www.ellispatrick.com/r-packages.

Contact

jean.yang@sydney.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-24 +,SPInDel: a multifunctional workbench for species identification using insertion/deletion variants,"The majority of the available methods for the molecular identification of species use pairwise sequence divergences between the query and reference sequences (DNA barcoding). The presence of multiple insertions and deletions (indels) in the target genomic regions is generally regarded as a problem, as it introduces ambiguities in sequence alignments. However, we have recently shown that a high level of species discrimination is attainable in all taxa of life simply by considering the length of hypervariable regions defined by indel variants. Each species is tagged with a numeric profile of fragment lengths—a true numeric barcode. In this study, we describe a multifunctional computational workbench (named SPInDel for SPecies Identification by Insertions/Deletions) to assist researchers using variable‐length DNA sequences, and we demonstrate its applicability in molecular ecology. The SPInDel workbench provides a step‐by‐step environment for the alignment of target sequences, selection of informative hypervariable regions, design of PCR primers and the statistical validation of the species‐identification process. In our test data sets, we were able to discriminate all species from two genera of frogs (Ansonia and Leptobrachium) inhabiting lowland rainforests and mountain regions of South‐East Asia and species from the most common genus of coral reef fishes (Apogon). Our method can complement conventional DNA barcoding systems when indels are common (e.g. in rRNA genes) without the required step of DNA sequencing. The executable files, source code, documentation and test data sets are freely available at http://www.portugene.com/SPInDel/SPInDel_webworkbench.html.",2012-11-01 +25006225,Processing biological literature with customizable Web services supporting interoperable formats. ,"Web services have become a popular means of interconnecting solutions for processing a body of scientific literature. This has fuelled research on high-level data exchange formats suitable for a given domain and ensuring the interoperability of Web services. In this article, we focus on the biological domain and consider four interoperability formats, BioC, BioNLP, XMI and RDF, that represent domain-specific and generic representations and include well-established as well as emerging specifications. We use the formats in the context of customizable Web services created in our Web-based, text-mining workbench Argo that features an ever-growing library of elementary analytics and capabilities to build and deploy Web services straight from a convenient graphical user interface. We demonstrate a 2-fold customization of Web services: by building task-specific processing pipelines from a repository of available analytics, and by configuring services to accept and produce a combination of input and output data interchange formats. We provide qualitative evaluation of the formats as well as quantitative evaluation of automatic analytics. The latter was carried out as part of our participation in the fourth edition of the BioCreative challenge. Our analytics built into Web services for recognizing biochemical concepts in BioC collections achieved the highest combined scores out of 10 participating teams. Database URL: http://argo.nactem.ac.uk.",2014-07-08 +26090958,DisoMCS: Accurately Predicting Protein Intrinsically Disordered Regions Using a Multi-Class Conservative Score Approach.,"

Unlabelled

The precise prediction of protein intrinsically disordered regions, which play a crucial role in biological procedures, is a necessary prerequisite to further the understanding of the principles and mechanisms of protein function. Here, we propose a novel predictor, DisoMCS, which is a more accurate predictor of protein intrinsically disordered regions. The DisoMCS bases on an original multi-class conservative score (MCS) obtained by sequence-order/disorder alignment. Initially, near-disorder regions are defined on fragments located at both the terminus of an ordered region connecting a disordered region. Then the multi-class conservative score is generated by sequence alignment against a known structure database and represented as order, near-disorder and disorder conservative scores. The MCS of each amino acid has three elements: order, near-disorder and disorder profiles. Finally, the MCS is exploited as features to identify disordered regions in sequences. DisoMCS utilizes a non-redundant data set as the training set, MCS and predicted secondary structure as features, and a conditional random field as the classification algorithm. In predicted near-disorder regions a residue is determined as an order or a disorder according to the optimized decision threshold. DisoMCS was evaluated by cross-validation, large-scale prediction, independent tests and CASP (Critical Assessment of Techniques for Protein Structure Prediction) tests. All results confirmed that DisoMCS was very competitive in terms of accuracy of prediction when compared with well-established publicly available disordered region predictors. It also indicated our approach was more accurate when a query has higher homologous with the knowledge database.

Availability

The DisoMCS is available at http://cal.tongji.edu.cn/disorder/.",2015-06-19 +23630246,The MetaboLights repository: curation challenges in metabolomics.,"MetaboLights is the first general-purpose open-access curated repository for metabolomic studies, their raw experimental data and associated metadata, maintained by one of the major open-access data providers in molecular biology. Increases in the number of depositions, number of samples per study and the file size of data submitted to MetaboLights present a challenge for the objective of ensuring high-quality and standardized data in the context of diverse metabolomic workflows and data representations. Here, we describe the MetaboLights curation pipeline, its challenges and its practical application in quality control of complex data depositions. Database URL: http://www.ebi.ac.uk/metabolights.",2013-04-29 +26917558,Guidelines for the functional annotation of microRNAs using the Gene Ontology.,"MicroRNA regulation of developmental and cellular processes is a relatively new field of study, and the available research data have not been organized to enable its inclusion in pathway and network analysis tools. The association of gene products with terms from the Gene Ontology is an effective method to analyze functional data, but until recently there has been no substantial effort dedicated to applying Gene Ontology terms to microRNAs. Consequently, when performing functional analysis of microRNA data sets, researchers have had to rely instead on the functional annotations associated with the genes encoding microRNA targets. In consultation with experts in the field of microRNA research, we have created comprehensive recommendations for the Gene Ontology curation of microRNAs. This curation manual will enable provision of a high-quality, reliable set of functional annotations for the advancement of microRNA research. Here we describe the key aspects of the work, including development of the Gene Ontology to represent this data, standards for describing the data, and guidelines to support curators making these annotations. The full microRNA curation guidelines are available on the GO Consortium wiki (http://wiki.geneontology.org/index.php/MicroRNA_GO_annotation_manual).",2016-02-25 +25978103,A Workflow to Investigate Exposure and Pharmacokinetic Influences on High-Throughput in Vitro Chemical Screening Based on Adverse Outcome Pathways.,"

Background

Adverse outcome pathways (AOPs) link adverse effects in individuals or populations to a molecular initiating event (MIE) that can be quantified using in vitro methods. Practical application of AOPs in chemical-specific risk assessment requires incorporation of knowledge on exposure, along with absorption, distribution, metabolism, and excretion (ADME) properties of chemicals.

Objectives

We developed a conceptual workflow to examine exposure and ADME properties in relation to an MIE. The utility of this workflow was evaluated using a previously established AOP, acetylcholinesterase (AChE) inhibition.

Methods

Thirty chemicals found to inhibit human AChE in the ToxCast™ assay were examined with respect to their exposure, absorption potential, and ability to cross the blood-brain barrier (BBB). Structures of active chemicals were compared against structures of 1,029 inactive chemicals to detect possible parent compounds that might have active metabolites.

Results

Application of the workflow screened 10 ""low-priority"" chemicals of 30 active chemicals. Fifty-two of the 1,029 inactive chemicals exhibited a similarity threshold of ≥ 75% with their nearest active neighbors. Of these 52 compounds, 30 were excluded due to poor absorption or distribution. The remaining 22 compounds may inhibit AChE in vivo either directly or as a result of metabolic activation.

Conclusions

The incorporation of exposure and ADME properties into the conceptual workflow eliminated 10 ""low-priority"" chemicals that may otherwise have undergone additional, resource-consuming analyses. Our workflow also increased confidence in interpretation of in vitro results by identifying possible ""false negatives.""

Citation

Phillips MB, Leonard JA, Grulke CM, Chang DT, Edwards SW, Brooks R, Goldsmith MR, El-Masri H, Tan YM. 2016. A workflow to investigate exposure and pharmacokinetic influences on high-throughput in vitro chemical screening based on adverse outcome pathways. Environ Health Perspect 124:53-60; http://dx.doi.org/10.1289/ehp.1409450.",2015-05-15 +26084703,A unified gene catalog for the laboratory mouse reference genome.,"We report here a semi-automated process by which mouse genome feature predictions and curated annotations (i.e., genes, pseudogenes, functional RNAs, etc.) from Ensembl, NCBI and Vertebrate Genome Annotation database (Vega) are reconciled with the genome features in the Mouse Genome Informatics (MGI) database (http://www.informatics.jax.org) into a comprehensive and non-redundant catalog. Our gene unification method employs an algorithm (fjoin--feature join) for efficient detection of genome coordinate overlaps among features represented in two annotation data sets. Following the analysis with fjoin, genome features are binned into six possible categories (1:1, 1:0, 0:1, 1:n, n:1, n:m) based on coordinate overlaps. These categories are subsequently prioritized for assessment of annotation equivalencies and differences. The version of the unified catalog reported here contains more than 59,000 entries, including 22,599 protein-coding coding genes, 12,455 pseudogenes, and 24,007 other feature types (e.g., microRNAs, lincRNAs, etc.). More than 23,000 of the entries in the MGI gene catalog have equivalent gene models in the annotation files obtained from NCBI, Vega, and Ensembl. 12,719 of the features are unique to NCBI relative to Ensembl/Vega; 11,957 are unique to Ensembl/Vega relative to NCBI, and 3095 are unique to MGI. More than 4000 genome features fall into categories that require manual inspection to resolve structural differences in the gene models from different annotation sources. Using the MGI unified gene catalog, researchers can easily generate a comprehensive report of mouse genome features from a single source and compare the details of gene and transcript structure using MGI's mouse genome browser.",2015-06-18 +27087830,Bloom Filter Trie: an alignment-free and reference-free data structure for pan-genome storage.,"

Background

High throughput sequencing technologies have become fast and cheap in the past years. As a result, large-scale projects started to sequence tens to several thousands of genomes per species, producing a high number of sequences sampled from each genome. Such a highly redundant collection of very similar sequences is called a pan-genome. It can be transformed into a set of sequences ""colored"" by the genomes to which they belong. A colored de Bruijn graph (C-DBG) extracts from the sequences all colored k-mers, strings of length k, and stores them in vertices.

Results

In this paper, we present an alignment-free, reference-free and incremental data structure for storing a pan-genome as a C-DBG: the bloom filter trie (BFT). The data structure allows to store and compress a set of colored k-mers, and also to efficiently traverse the graph. Bloom filter trie was used to index and query different pangenome datasets. Compared to another state-of-the-art data structure, BFT was up to two times faster to build while using about the same amount of main memory. For querying k-mers, BFT was about 52-66 times faster while using about 5.5-14.3 times less memory.

Conclusion

We present a novel succinct data structure called the Bloom Filter Trie for indexing a pan-genome as a colored de Bruijn graph. The trie stores k-mers and their colors based on a new representation of vertices that compress and index shared substrings. Vertices use basic data structures for lightweight substrings storage as well as Bloom filters for efficient trie and graph traversals. Experimental results prove better performance compared to another state-of-the-art data structure.

Availability

https://www.github.com/GuillaumeHolley/BloomFilterTrie.",2016-04-14 +28430868,MOST-visualization: software for producing automated textbook-style maps of genome-scale metabolic networks.,"

Summary

Visualization of metabolites, reactions and pathways in genome-scale metabolic networks (GEMs) can assist in understanding cellular metabolism. Three attributes are desirable in software used for visualizing GEMs: (i) automation, since GEMs can be quite large; (ii) production of understandable maps that provide ease in identification of pathways, reactions and metabolites; and (iii) visualization of the entire network to show how pathways are interconnected. No software currently exists for visualizing GEMs that satisfies all three characteristics, but MOST-Visualization, an extension of the software package MOST (Metabolic Optimization and Simulation Tool), satisfies (i), and by using a pre-drawn overview map of metabolism based on the Roche map satisfies (ii) and comes close to satisfying (iii).

Availability and implementation

MOST is distributed for free on the GNU General Public License. The software and full documentation are available at http://most.ccib.rutgers.edu/.

Contact

dslun@rutgers.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +24475242,MetaMetaDB: a database and analytic system for investigating microbial habitability.,"MetaMetaDB (http://mmdb.aori.u-tokyo.ac.jp/) is a database and analytic system for investigating microbial habitability, i.e., how a prokaryotic group can inhabit different environments. The interaction between prokaryotes and the environment is a key issue in microbiology because distinct prokaryotic communities maintain distinct ecosystems. Because 16S ribosomal RNA (rRNA) sequences play pivotal roles in identifying prokaryotic species, a system that comprehensively links diverse environments to 16S rRNA sequences of the inhabitant prokaryotes is necessary for the systematic understanding of the microbial habitability. However, existing databases are biased to culturable prokaryotes and exhibit limitations in the comprehensiveness of the data because most prokaryotes are unculturable. Recently, metagenomic and 16S rRNA amplicon sequencing approaches have generated abundant 16S rRNA sequence data that encompass unculturable prokaryotes across diverse environments; however, these data are usually buried in large databases and are difficult to access. In this study, we developed MetaMetaDB (Meta-Metagenomic DataBase), which comprehensively and compactly covers 16S rRNA sequences retrieved from public datasets. Using MetaMetaDB, users can quickly generate hypotheses regarding the types of environments a prokaryotic group may be adapted to. We anticipate that MetaMetaDB will improve our understanding of the diversity and evolution of prokaryotes.",2014-01-27 +25378308,"REBASE--a database for DNA restriction and modification: enzymes, genes and genomes.","REBASE is a comprehensive and fully curated database of information about the components of restriction-modification (RM) systems. It contains fully referenced information about recognition and cleavage sites for both restriction enzymes and methyltransferases as well as commercial availability, methylation sensitivity, crystal and sequence data. All genomes that are completely sequenced are analyzed for RM system components, and with the advent of PacBio sequencing, the recognition sequences of DNA methyltransferases (MTases) are appearing rapidly. Thus, Type I and Type III systems can now be characterized in terms of recognition specificity merely by DNA sequencing. The contents of REBASE may be browsed from the web http://rebase.neb.com and selected compilations can be downloaded by FTP (ftp.neb.com). Monthly updates are also available via email.",2014-11-05 +,Understanding production potentials and yield gaps in intensive maize production in China,"Understanding yield potentials and exploitable gaps in current intensive maize (Zea mays L.) production is essential in order to increase grain yields to meet future food requirements amid strong competition for limited resources. In this study, we used simulations with the Hybrid-Maize Model (http://www.hybridmaize.unl.edu/), highest recorded yields published in the literature, field experiments, and farm survey data to assess yield potentials and gaps in four maize agro-ecological regions of China. In 50 simulations of high-yield sites across China from 1990 to 2009, the yield potential averaged 16.5Mgha−1 for irrigated maize and 13.9Mgha−1 for rainfed maize, respectively. During the same period, the highest recorded yield was 15.4Mgha−1, or 93% of the yield potential of irrigated maize. In comparison, the average farmer's yield was 7.9Mgha−1 based on 5584 farms surveyed in 2007–2008. Consequently, the yield gap between the average farmer's yield and the modeled yield potential (YGM) was 8.6Mgha−1 for irrigated maize and 6.0Mgha−1 for rainfed maize and so farmers attained 48–56% of the yield potential. The yield gap between the average farmer's yield and highest recorded yield (YGR) was 7.6Mgha−1, so farmers attained 51% of the recorded yield. Because the sites used for simulated and recorded yields possessed the most favorable combinations of soil and crop management, closing the gaps in YGM and YGR in farmers’ fields within a short time frame could be very difficult. The attainable yield was collected from field experiments, which were conducted in farmers’ fields by farmers using recommended management practices by local agronomists. The data for attainable yield averaged 12.3Mgha−1 according 137 field experiments across China. The yield gap between the average farmer's yield and the experimental yield (YGE) was 4.5Mgha−1, with farmers attaining 64% of the experimental yield. The main factor explaining this gap was inefficient crop management practices, which constrained yield improvements in farmers’ fields. In order to narrow this gap, multidisciplinary understanding and cooperation among the disciplines of plant science, agronomy, soil science, agro-ecology and extension, resulting in integrated soil–crop system management, are essential.",2013-03-01 +29364820,Air Pollution and Performance-Based Physical Functioning in Dutch Older Adults.,"BACKGROUND:Functional limitations are a major cause for needing care and institutionalization among older adults. Exposure to air pollution has been suggested to be associated with increased functional limitations in older people. OBJECTIVE:Our objective was to assess the association between air pollution and physical functioning in Dutch older adults. METHODS:We analyzed data on performance-based (walking speed, ability to rise from a chair, putting on and taking off a cardigan, balance test) and self-reported physical functioning for 1,762 participants of the Longitudinal Aging Study Amsterdam, who participated in measurement cycles performed in 2005/2006, 2008/2009, and 2011/2012. Annual average outdoor air pollution concentrations [nitrogen dioxide (NO2), nitrogen oxides (NOx), particulate matter with diameters ≤2.5μm (PM2.5), ≤10μm (PM10), and 2.5-10μm (PMcoarse), and PM2.5 absorbance] at the home address at the start of the first measurement cycle were estimated using land-use regression models. Analyses were performed using mixed models with random participant intercepts adjusting for potential confounders. RESULTS:Exposure to most air pollutants was associated with reduced performance-based physical functioning; for example, an interquartile range increase in NO2 exposure was associated with a 0.22 (95% confidence interval: 0.03, 0.42) lower performance test score in fully adjusted models, equivalent to the difference in performance score between participants who differed by 9 mo in age. Exposure to air pollution was generally not statistically significantly associated with self-reported functional limitations, and not associated with a faster decline in performance-based physical functioning over the study period. CONCLUSION:This study suggests that exposure to air pollution may adversely affect physical performance of older adults in the Netherlands. https://doi.org/10.1289/EHP2239.",2018-01-19 +29075224,"Defining a Conceptual Topography of Word Concreteness: Clustering Properties of Emotion, Sensation, and Magnitude among 750 English Words.","Cognitive science has a longstanding interest in the ways that people acquire and use abstract vs. concrete words (e.g., truth vs. piano). One dominant theory holds that abstract and concrete words are subserved by two parallel semantic systems. We recently proposed an alternative account of abstract-concrete word representation premised upon a unitary, high dimensional semantic space wherein word meaning is nested. We hypothesize that a range of cognitive and perceptual dimensions (e.g., emotion, time, space, color, size, visual form) bound this space, forming a conceptual topography. Here we report a normative study where we examined the clustering properties of a sample of English words (N = 750) spanning a spectrum of concreteness in a continuous manner from highly abstract to highly concrete. Participants (N = 328) rated each target word on a range of 14 cognitive dimensions (e.g., color, emotion, valence, polarity, motion, space). The dimensions reduced to three factors: Endogenous factor, Exogenous factor, and Magnitude factor. Concepts were plotted in a unified, multimodal space with concrete and abstract concepts along a continuous continuum. We discuss theoretical implications and practical applications of this dataset. These word norms are freely available for download and use at http://www.reilly-coglab.com/data/.",2017-10-11 +28333633,No-Reference Quality Assessment of Tone-Mapped HDR Pictures.,"Being able to automatically predict digital picture quality, as perceived by human observers, has become important in many applications where humans are the ultimate consumers of displayed visual information. Standard dynamic range (SDR) images provide 8 b/color/pixel. High dynamic range (HDR) images, which are usually created from multiple exposures of the same scene, can provide 16 or 32 b/color/pixel, but must be tonemapped to SDR for display on standard monitors. Multi-exposure fusion techniques bypass HDR creation, by fusing the exposure stack directly to SDR format while aiming for aesthetically pleasing luminance and color distributions. Here, we describe a new no-reference image quality assessment (NR IQA) model for HDR pictures that is based on standard measurements of the bandpass and on newly conceived differential natural scene statistics (NSS) of HDR pictures. We derive an algorithm from the model which we call the HDR IMAGE GRADient-based Evaluator. NSS models have previously been used to devise NR IQA models that effectively predict the subjective quality of SDR images, but they perform significantly worse on tonemapped HDR content. Toward ameliorating this we make here the following contributions: 1) we design HDR picture NR IQA models and algorithms using both standard space-domain NSS features as well as novel HDR-specific gradient-based features that significantly elevate prediction performance; 2) we validate the proposed models on a large-scale crowdsourced HDR image database; and 3) we demonstrate that the proposed models also perform well on legacy natural SDR images. The software is available at: http://live.ece.utexas.edu/research/Quality/higradeRelease.zip.",2017-03-22 +,GenomeView: Visualizing the Next-Generation of Data,"Due to recent advances in sequencing technologies, billions of nucleotide sequences are now produced on a daily basis. A major challenge is to visualize these data, including both whole genome sequence and transcriptome data, for further downstream analyses. Visualization is often overlooked and undervalued, but it is an extremely valuable to explore your data on several levels. A first area where visualization shines is at the early stages of data analysis to perform sanity checks on your data. Eye-balling your data in a visually pleasing way is the best way to get a good feel on what came out of your experiments. Once you have a good idea of what is in your data a good visual representation can be used to generate new hypotheses and to fine-tune analysis parameters. The appropriate image often makes the solution obvious and as such, it really makes it easier to develop algorithms. The ability to interactively explore gives you insights in large-scale data sets and definitely augments our ability to reason about complex data. To this end, we present GenomeView, a stand-alone sequence browser specifically designed to visualize and manipulate a multitude of genomics data. GenomeView enables users to dynamically browse high volumes of aligned short read data, with dynamic navigation and semantic zooming, from the whole genome level to the single nucleotide. At the same time, the tool enables visualization of whole genome alignments of dozens of genomes relative to a reference sequence. GenomeView is unique in its capability to interactively handle huge data sets consisting of dozens of aligned genomes, thousands of annotation features and millions of mapped short reads both as viewer and editor. GenomeView is freely available for academic use as an open source software package at http://genomeview.org.",2011-10-01 +27307629,Data-driven mechanistic analysis method to reveal dynamically evolving regulatory networks.,"

Motivation

Mechanistic models based on ordinary differential equations provide powerful and accurate means to describe the dynamics of molecular machinery which orchestrates gene regulation. When combined with appropriate statistical techniques, mechanistic models can be calibrated using experimental data and, in many cases, also the model structure can be inferred from time-course measurements. However, existing mechanistic models are limited in the sense that they rely on the assumption of static network structure and cannot be applied when transient phenomena affect, or rewire, the network structure. In the context of gene regulatory network inference, network rewiring results from the net impact of possible unobserved transient phenomena such as changes in signaling pathway activities or epigenome, which are generally difficult, but important, to account for.

Results

We introduce a novel method that can be used to infer dynamically evolving regulatory networks from time-course data. Our method is based on the notion that all mechanistic ordinary differential equation models can be coupled with a latent process that approximates the network structure rewiring process. We illustrate the performance of the method using simulated data and, further, we apply the method to study the regulatory interactions during T helper 17 (Th17) cell differentiation using time-course RNA sequencing data. The computational experiments with the real data show that our method is capable of capturing the experimentally verified rewiring effects of the core Th17 regulatory network. We predict Th17 lineage specific subnetworks that are activated sequentially and control the differentiation process in an overlapping manner.

Availability and implementation

An implementation of the method is available at http://research.ics.aalto.fi/csb/software/lem/

Contacts

jukka.intosalmi@aalto.fi or harri.lahdesmaki@aalto.fi.",2016-06-01 +24053737,SynTView - an interactive multi-view genome browser for next-generation comparative microorganism genomics.,"

Background

Dynamic visualisation interfaces are required to explore the multiple microbial genome data now available, especially those obtained by high-throughput sequencing - a.k.a. ""Next-Generation Sequencing"" (NGS) - technologies; they would also be useful for ""standard"" annotated genomes whose chromosome organizations may be compared. Although various software systems are available, few offer an optimal combination of feature-rich capabilities, non-static user interfaces and multi-genome data handling.

Results

We developed SynTView, a comparative and interactive viewer for microbial genomes, designed to run as either a web-based tool (Flash technology) or a desktop application (AIR environment). The basis of the program is a generic genome browser with sub-maps holding information about genomic objects (annotations). The software is characterised by the presentation of syntenic organisations of microbial genomes and the visualisation of polymorphism data (typically Single Nucleotide Polymorphisms - SNPs) along these genomes; these features are accessible to the user in an integrated way. A variety of specialised views are available and are all dynamically inter-connected (including linear and circular multi-genome representations, dot plots, phylogenetic profiles, SNP density maps, and more). SynTView is not linked to any particular database, allowing the user to plug his own data into the system seamlessly, and use external web services for added functionalities. SynTView has now been used in several genome sequencing projects to help biologists make sense out of huge data sets.

Conclusions

The most important assets of SynTView are: (i) the interactivity due to the Flash technology; (ii) the capabilities for dynamic interaction between many specialised views; and (iii) the flexibility allowing various user data sets to be integrated. It can thus be used to investigate massive amounts of information efficiently at the chromosome level. This innovative approach to data exploration could not be achieved with most existing genome browsers, which are more static and/or do not offer multiple views of multiple genomes. Documentation, tutorials and demonstration sites are available at the URL: http://genopole.pasteur.fr/SynTView.",2013-09-22 +27459727,Estimated Costs of Sporadic Gastrointestinal Illness Associated with Surface Water Recreation: A Combined Analysis of Data from NEEAR and CHEERS Studies.,"

Background

The burden of illness can be described by addressing both incidence and illness severity attributable to water recreation. Monetized as cost, attributable disease burden estimates can be useful for environmental management decisions.

Objectives

We characterize the disease burden attributable to water recreation using data from two cohort studies using a cost of illness (COI) approach and estimate the largest drivers of the disease burden of water recreation.

Methods

Data from the NEEAR study, which evaluated swimming and wading in marine and freshwater beaches in six U.S. states, and CHEERS, which evaluated illness after incidental-contact recreation (boating, canoeing, fishing, kayaking, and rowing) on waterways in the Chicago area, were used to estimate the cost per case of gastrointestinal illness and costs attributable to water recreation. Data on health care and medication utilization and missed days of work or leisure were collected and combined with cost data to construct measures of COI.

Results

Depending on different assumptions, the cost of gastrointestinal symptoms attributable to water recreation are estimated to be $1,220 for incidental-contact recreation (range $338-$1,681) and $1,676 for swimming/wading (range $425-2,743) per 1,000 recreators. Lost productivity is a major driver of the estimated COI, accounting for up to 90% of total costs.

Conclusions

Our estimates suggest gastrointestinal illness attributed to surface water recreation at urban waterways, lakes, and coastal marine beaches is responsible for costs that should be accounted for when considering the monetary impact of efforts to improve water quality. The COI provides more information than the frequency of illness, as it takes into account disease incidence, health care utilization, and lost productivity. Use of monetized disease severity information should be included in future studies of water quality and health. Citation: DeFlorio-Barker S, Wade TJ, Jones RM, Friedman LS, Wing C, Dorevitch S. 2017. Estimated costs of sporadic gastrointestinal illness associated with surface water recreation: a combined analysis of data from NEEAR and CHEERS Studies. Environ Health Perspect 125:215-222; http://dx.doi.org/10.1289/EHP130.",2016-07-26 +29785919,Genomic selection in dairy cattle simulated populations.,"Genomic selection is arguably the most promising tool for improving genetic gain in domestic animals to emerge in the last few decades, but is an expensive process. The aim of this study was to evaluate the economic impact related to the implementation of genomic selection in a simulated dairy cattle population. The software QMSim was used to simulate genomic and phenotypic data. The simulated genome contained 30 chromosomes with 100 cm each, 1666 SNPs markers equally spread and 266 QTLs randomly designated for each chromosome. The numbers of markers and QTLs were designated according to information available from Animal QTL (http://www.animalgenome.org/QTLdb) and Bovine QTL (http://bovineqtl.tamu.edu/). The allelic frequency changes were assigned in a gamma distribution with alpha parameters equal to 0·4. Recurrent mutation rates of 1·0e-4 were assumed to apply to markers and QTLs. A historic population of 1000 individuals was generated and the total number of animals was reduced gradually along 850 generations until we obtained a number of 200 animals in the last generation, characterizing a bottleneck effect. Progenies were created along generations from random mating of the male and female gametes, assuming the same proportion of both genders. Than the population was extended for another 150 generations until we obtained 17 000 animals, with only 320 male individuals in the last generation. After this period a 25 year of selection was simulated taking into account a trait limited by sex with heritability of 0·30 (i.e. milk yield), one progeny/cow/year and variance equal to 1·0. Annually, 320 bulls were mated with 16 000 dams, assuming a replacement rate of 60 and 40% for males and females, respectively. Selection and discard criteria were based in four strategies to obtain the EBVs assuming as breeding objective to maximize milk yield. The progeny replaced the discarded animals creating an overlapping generation structure. The selection strategies were: RS is selection based on random values; PS is selection based on phenotypic values; Blup is selection based on EBVs estimated by BLUP; and GEBV is selection based on genomic estimated breeding values in one step, using high (GBlup) and low (GBlupi) density panels. Results indicated that the genetic evaluation using the aid of genomic information could provide better genetic gain rates in dairy cattle breeding programs as well as reduce the average inbreeding coefficient in the population. The economic viability indicators showed that only Blup and GBlup/GBlupi strategies, the ones that used milk control and genetic evaluation were economic viable, considering a discount rate of 6·32% per year.",2018-05-01 +24022982,LTMap: a web server for assessing the potential liver toxicity by genome-wide transcriptional expression data.,"Toxicogenomics (TGx) has played a significant role in mechanistic research related with hepatotoxicity as well as liver toxicity prediction. Currently, several large-scale preclinical TGx data sets were made freely accessible to the public, such as Open TG-GATEs. With the availability of a sufficient amount of microarray data, it is important to integrate this information to provide new insights into the risk assessment of potential drug-induced liver toxicity. Here we developed a web server for evaluating the potential liver toxicity based on genome-wide transcriptomics data, namely LTMap. In LTMap, researchers could compare signatures of query compounds against a pregenerated signature database of 20 123 Affymetrix arrays associated with about 170 compounds retrieved from the largest public toxicogenomics data set Open TG-GATEs. Results from this comparison may lead to the unexpected discovery of similar toxicological responses between chemicals. We validated our computational approach for similarity comparison using three example drugs. Our successful applications of LTMap in these case studies demonstrated its utility in revealing the connection of chemicals according to similar toxicological behaviors. Furthermore, a user-friendly web interface is provided by LTMap to browse and search toxicogenomics data (http://tcm.zju.edu.cn/ltmap).",2013-09-11 +25982314,GermlncRNA: a unique catalogue of long non-coding RNAs and associated regulations in male germ cell development.,"Spermatogenic failure is a major cause of male infertility, which affects millions of couples worldwide. Recent discovery of long non-coding RNAs (lncRNAs) as critical regulators in normal and disease development provides new clues for delineating the molecular regulation in male germ cell development. However, few functional lncRNAs have been characterized to date. A major limitation in studying lncRNA in male germ cell development is the absence of germ cell-specific lncRNA annotation. Current lncRNA annotations are assembled by transcriptome data from heterogeneous tissue sources; specific germ cell transcript information of various developmental stages is therefore under-represented, which may lead to biased prediction or fail to identity important germ cell-specific lncRNAs. GermlncRNA provides the first comprehensive web-based and open-access lncRNA catalogue for three key male germ cell stages, including type A spermatogonia, pachytene spermatocytes and round spermatids. This information has been developed by integrating male germ transcriptome resources derived from RNA-Seq, tiling microarray and GermSAGE. Characterizations on lncRNA-associated regulatory features, potential coding gene and microRNA targets are also provided. Search results from GermlncRNA can be exported to Galaxy for downstream analysis or downloaded locally. Taken together, GermlncRNA offers a new avenue to better understand the role of lncRNAs and associated targets during spermatogenesis. Database URL: http://germlncrna.cbiit.cuhk.edu.hk/",2015-05-17 +28334050,Improving transcriptome de novo assembly by using a reference genome of a related species: Translational genomics from oil palm to coconut.,"The palms are a family of tropical origin and one of the main constituents of the ecosystems of these regions around the world. The two main species of palm represent different challenges: coconut (Cocos nucifera L.) is a source of multiple goods and services in tropical communities, while oil palm (Elaeis guineensis Jacq) is the main protagonist of the oil market. In this study, we present a workflow that exploits the comparative genomics between a target species (coconut) and a reference species (oil palm) to improve the transcriptomic data, providing a proteome useful to answer functional or evolutionary questions. This workflow reduces redundancy and fragmentation, two inherent problems of transcriptomic data, while preserving the functional representation of the target species. Our approach was validated in Arabidopsis thaliana using Arabidopsis lyrata and Capsella rubella as references species. This analysis showed the high sensitivity and specificity of our strategy, relatively independent of the reference proteome. The workflow increased the length of proteins products in A. thaliana by 13%, allowing, often, to recover 100% of the protein sequence length. In addition redundancy was reduced by a factor greater than 3. In coconut, the approach generated 29,366 proteins, 1,246 of these proteins deriving from new contigs obtained with the BRANCH software. The coconut proteome presented a functional profile similar to that observed in rice and an important number of metabolic pathways related to secondary metabolism. The new sequences found with BRANCH software were enriched in functions related to biotic stress. Our strategy can be used as a complementary step to de novo transcriptome assembly to get a representative proteome of a target species. The results of the current analysis are available on the website PalmComparomics (http://palm-comparomics.southgreen.fr/).",2017-03-23 +27294184,"A firm-level dataset for analyzing entry, exit, employment and R&D expenditures in the UK: 1997-2012.","This data article is related to the research article entitled ""Inverted-U relationship between R&D intensity and survival: Evidence on scale and complementarity effects in UK data"" (Ugur et al., In press) [1]. It describes the trends in R&D expenditures, employment of R&D personnel and firm entry and exit rates in the UK from 1998 to 2012. We also provide statistics on net employment creation and net R&D investments due to firm entry and exits. In addition, we compute the correlation coefficients between entry and exit rates at the two digit industry level so as to examine whether the correlations are contemporaneous or inter-temporal. Finally, we provide information about the underlying dataset to which secure access is available through UK Data Service Archive 7716 at http://dx.doi.org/10.5255/UKDA-SN-7716-1.",2016-05-21 +25119676,Improvements to pairwise sequence comparison (PASC): a genome-based web tool for virus classification.,"The number of viral genome sequences in the public databases is increasing dramatically, and these sequences are playing an important role in virus classification. Pairwise sequence comparison is a sequence-based virus classification method. A program using this method calculates the pairwise identities of virus sequences within a virus family and displays their distribution, and visual analysis helps to determine demarcations at different taxonomic levels such as strain, species, genus and subfamily. Subsequent comparison of new sequences against existing ones allows viruses from which the new sequences were derived to be classified. Although this method cannot be used as the only criterion for virus classification in some cases, it is a quantitative method and has many advantages over conventional virus classification methods. It has been applied to several virus families, and there is an increasing interest in using this method for other virus families/groups. The Pairwise Sequence Comparison (PASC) classification tool was created at the National Center for Biotechnology Information. The tool's database stores pairwise identities for complete genomes/segments of 56 virus families/groups. Data in the system are updated every day to reflect changes in virus taxonomy and additions of new virus sequences to the public database. The web interface of the tool ( http://www.ncbi.nlm.nih.gov/sutils/pasc/ ) makes it easy to navigate and perform analyses. Multiple new viral genome sequences can be tested simultaneously with this system to suggest the taxonomic position of virus isolates in a specific family. PASC eliminates potential discrepancies in the results caused by different algorithms and/or different data used by researchers.",2014-08-14 +27761496,Tourism trends in the world׳s main destinations before and after the 2008 financial crisis using UNWTO official data.,"The first decade of the present century has been characterized by several economic shocks such as the 2008 financial crisis. In this data article we present the annual percentage growth rates of the main tourism indicators in the world׳s top tourist destinations: the United States, China, France, Spain, Italy, United Kingdom, Germany, Turkey, Mexico and Austria. We use data from the Compendium of Tourism Statistics provided by the World Tourism Organization (http://www2.unwto.org/content/data-0). It has been demonstrated that the dynamics of growth in the tourism industry pose different challenges to each destination in the previous study ""Positioning and clustering of the world׳s top tourist destinations by means of dimensionality reduction techniques for categorical data"" (Claveria and Poluzzi, 2016, [1]). We provide a descriptive analysis of the variables over the period comprised between 2000 and 2010. We complement the analysis by graphing the evolution of the main variables so as to visually represent the co-movements between tourism variables and economic growth.",2016-04-01 +27485444,Predicting G protein-coupled receptor downstream signaling by tissue expression.,"

Motivation

G protein-coupled receptors (GPCRs) are central to how cells respond to their environment and a major class of pharmacological targets. However, comprehensive knowledge of which pathways are activated and deactivated by these essential sensors is largely unknown. To better understand the mechanism of GPCR signaling system, we integrated five independent genome-wide expression datasets, representing 275 human tissues and cell lines, with protein-protein interactions and functional pathway data.

Results

We found that tissue-specificity plays a crucial part in the function of GPCR signaling system. Only a few GPCRs are expressed in each tissue, which are coupled by different combinations of G-proteins or β-arrestins to trigger specific downstream pathways. Based on this finding, we predicted the downstream pathways of GPCR in human tissues and validated our results with L1000 knockdown data. In total, we identified 154,988 connections between 294 GPCRs and 690 pathways in 240 tissues and cell types.

Availability and implementation

The source code and results supporting the conclusions of this article are available at http://tatonettilab.org/resources/GOTE/source_code/ CONTACT: nick.tatonetti@columbia.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-02 +24574118,Canto: an online tool for community literature curation.,"

Motivation

Detailed curation of published molecular data is essential for any model organism database. Community curation enables researchers to contribute data from their papers directly to databases, supplementing the activity of professional curators and improving coverage of a growing body of literature. We have developed Canto, a web-based tool that provides an intuitive curation interface for both curators and researchers, to support community curation in the fission yeast database, PomBase. Canto supports curation using OBO ontologies, and can be easily configured for use with any species.

Availability

Canto code and documentation are available under an Open Source license from http://curation.pombase.org/. Canto is a component of the Generic Model Organism Database (GMOD) project (http://www.gmod.org/).",2014-02-25 +24923818,ParaPep: a web resource for experimentally validated antiparasitic peptide sequences and their structures. ,"ParaPep is a repository of antiparasitic peptides, which provides comprehensive information related to experimentally validated antiparasitic peptide sequences and their structures. The data were collected and compiled from published research papers, patents and from various databases. The current release of ParaPep holds 863 entries among which 519 are unique peptides. In addition to peptides having natural amino acids, ParaPep also consists of peptides having d-amino acids and chemically modified residues. In ParaPep, most of the peptides have been evaluated for growth inhibition of various species of Plasmodium, Leishmania and Trypanosoma. We have provided comprehensive information about these peptides that include peptide sequence, chemical modifications, stereochemistry, antiparasitic activity, origin, nature of peptide, assay types, type of parasite, mode of action and hemolytic activity. Structures of peptides consisting of natural, as well as modified amino acids have been determined using state-of-the-art software, PEPstr. To facilitate users, various user-friendly web tools, for data fetching, analysis and browsing, have been integrated. We hope that ParaPep will be advantageous in designing therapeutic peptides against parasitic diseases. Database URL: http://crdd.osdd.net/raghava/parapep/",2014-06-12 +24265223,EMAGE mouse embryo spatial gene expression database: 2014 update.,"EMAGE (http://www.emouseatlas.org/emage/) is a freely available database of in situ gene expression patterns that allows users to perform online queries of mouse developmental gene expression. EMAGE is unique in providing both text-based descriptions of gene expression plus spatial maps of gene expression patterns. This mapping allows spatial queries to be accomplished alongside more traditional text-based queries. Here, we describe our recent progress in spatial mapping and data integration. EMAGE has developed a method of spatially mapping 3D embryo images captured using optical projection tomography, and through the use of an IIP3D viewer allows users to view arbitrary sections of raw and mapped 3D image data in the context of a web browser. EMAGE now includes enhancer data, and we have spatially mapped images from a comprehensive screen of transgenic reporter mice that detail the expression of mouse non-coding genomic DNA fragments with enhancer activity. We have integrated the eMouseAtlas anatomical atlas and the EMAGE database so that a user of the atlas can query the EMAGE database easily. In addition, we have extended the atlas framework to enable EMAGE to spatially cross-index EMBRYS whole mount in situ hybridization data. We additionally report on recent developments to the EMAGE web interface, including new query and analysis capabilities.",2013-11-21 +28039094,PETPVE12: an SPM toolbox for Partial Volume Effects correction in brain PET - Application to amyloid imaging with AV45-PET.,"Positron emission tomography (PET) allows detecting molecular brain changes in vivo. However, the accuracy of PET is limited by partial volume effects (PVE) that affects quantitative analysis and visual interpretation of the images. Although PVE-correction methods have been shown to effectively increase the correspondence of the measured signal with the true regional tracer uptake, these procedures are still not commonly applied, neither in clinical nor in research settings. Here, we present an implementation of well validated PVE-correction procedures as a SPM toolbox, PETPVE12, for automated processing. We demonstrate its utility by a comprehensive analysis of the effects of PVE-correction on amyloid-sensitive AV45-PET data from 85 patients with Alzheimer's disease (AD) and 179 cognitively normal (CN) elderly. Effects of PVE-correction on global cortical standard uptake value ratios (SUVR) and the power of diagnostic group separation were assessed for the region-wise geometric transfer matrix method (PVEc-GTM), as well as for the 3-compartmental voxel-wise ""Müller-Gärtner"" method (PVEc-MG). Both PVE-correction methods resulted in decreased global cortical SUVRs in the low to middle range of SUVR values, and in increased global cortical SUVRs at the high values. As a consequence, average SUVR of the CN group was reduced, whereas average SUVR of the AD group was increased by PVE-correction. These effects were also reflected in increased accuracies of group discrimination after PVEc-GTM (AUC=0.86) and PVEc-MG (AUC=0.89) compared to standard non-corrected SUVR (AUC=0.84). Voxel-wise analyses of PVEc-MG corrected data also demonstrated improved detection of regionally increased AV45 SUVR values in AD patients. These findings complement the growing evidence for a beneficial effect of PVE-correction in quantitative analysis of amyloid-sensitive PET data. The novel PETPVE12 toolbox significantly facilitates the application of PVE-correction, particularly within SPM-based processing pipelines. This is expected to foster the use of PVE-correction in brain PET for more widespread use. The toolbox is freely available at http://www.fil.ion.ucl.ac.uk/spm/ext/#PETPVE12.",2016-12-28 +29061753,Assessing the Combined Antibacterial Effect of Isoniazid and Rifampin on Four Mycobacterium tuberculosis Strains Using In Vitro Experiments and Response-Surface Modeling. ,"While isoniazid and rifampin have been the cornerstone of tuberculosis therapy caused by drug-susceptible Mycobacterium tuberculosis for more than 40 years, their combined action has never been thoroughly assessed by modern quantitative pharmacology approaches. The aims of this work were to perform in vitro experiments and mathematical modeling of the antibacterial effect of isoniazid and rifampin alone and in combination against various strains of Mycobacterium tuberculosis After MIC determination of H37Rv and three strains belonging to the Beijing, Euro-American, and Indo-Oceanic lineages, the antibacterial effects of isoniazid and rifampin alone and in combination were studied in static time-kill experiments. A sigmoidal maximum effect model (Hill equation) and a response-surface model were used to describe the effect of the drugs alone and in combination, respectively. The killing effect of isoniazid and rifampin alone were well described by the Hill equation. Rifampin displayed a more concentration-dependent effect than isoniazid around the MIC. The pharmacodynamics parameters of each drug (maximal effect, median effect concentration, and coefficient of sigmoidicity) were quite similar between the four strains. The response-surface model from Minto et al. fit data of combined effect very well with low bias and imprecision (C. F. Minto, T. W. Schnider, T. G. Short, K. M. Gregg, A. Gentilini, Anesthesiology 92:1603-1616, 2000, https://doi.org/10.1097/00000542-200006000-00017). Response-surface modeling showed that the combined action of isoniazid and rifampin was synergistic for the H37Rv, Beijing, and Euro-American strains but only additive for the Indo-Oceanic strain. This study can serve as a motivating example for preclinical evaluation of combined action of antituberculous drugs.",2017-12-21 +28670889,Heuristic Classifier for Observe Accuracy of Cancer Polyp Using Video Capsule Endoscopy,"Methods: Colonoscopy is a technique for examine colon cancer, polyps. In endoscopy, video capsule is universally +used mechanism for finding gastrointestinal stages. But both the mechanisms are used to find the colon cancer or +colorectal polyp. The Automatic Polyp Detection sub-challenge conducted as part of the Endoscopic Vision Challenge +(http://endovis.grand-challenge.org). Method: Colonoscopy may be primary way of improve the ability of colon +cancer detection especially flat lesions. Which otherwise may be difficult to detect. Recently, automatic polyp detection +algorithms have been proposed with various degrees of success. Though polyp detection in colonoscopy and other +traditional endoscopy procedure based images is becoming a mature field, due to its unique imaging characteristics, +detecting polyps automatically in colonoscopy is a hard problem. So the proposed video capsule cam supports to diagnose +the polyps accurate and easy to identify its pattern. Existing methodology mainly concentrated on high accuracy and +less time consumption and it uses many different types of data mining techniques. To analyse these high resolution +video scale image we have to take segmentation of image in pixel level binary pattern with the help of a mid-pass filter +and relative gray level of neighbours. This work consists of three major steps to improve the accuracy of video capsule +endoscopy such as missing data imputation, high dimensionality reduction or feature selection and classification. +The above steps are performed using a dataset called endoscopy polyp disease dataset with 500 patients. Our binary +classification algorithm relieves human analyses using the video frames. SVM has given major contribution to process +the dataset. Results: In this paper the key aspect of proposed results provide segmentation, binary pattern approach +with Genetic Fuzzy based Improved Kernel Support Vector machine (GF-IKSVM) classifier. The segmented images +all are mostly round shape. The result is refined via smooth filtering, computer vision methods and thresholding steps. +Conclusion: Our experimental result produces 94.4% accuracy in that the proposed fuzzy system and genetic Fuzzy, +which is higher than the methods, used in the literature. The GF-IKSVM classifier is well-organized and provides good +accuracy results for patched VCE polyp disease diagnosis.",2017-06-25 +28494801,"Spectral imaging toolbox: segmentation, hyperstack reconstruction, and batch processing of spectral images for the determination of cell and model membrane lipid order.","

Background

Spectral imaging with polarity-sensitive fluorescent probes enables the quantification of cell and model membrane physical properties, including local hydration, fluidity, and lateral lipid packing, usually characterized by the generalized polarization (GP) parameter. With the development of commercial microscopes equipped with spectral detectors, spectral imaging has become a convenient and powerful technique for measuring GP and other membrane properties. The existing tools for spectral image processing, however, are insufficient for processing the large data sets afforded by this technological advancement, and are unsuitable for processing images acquired with rapidly internalized fluorescent probes.

Results

Here we present a MATLAB spectral imaging toolbox with the aim of overcoming these limitations. In addition to common operations, such as the calculation of distributions of GP values, generation of pseudo-colored GP maps, and spectral analysis, a key highlight of this tool is reliable membrane segmentation for probes that are rapidly internalized. Furthermore, handling for hyperstacks, 3D reconstruction and batch processing facilitates analysis of data sets generated by time series, z-stack, and area scan microscope operations. Finally, the object size distribution is determined, which can provide insight into the mechanisms underlying changes in membrane properties and is desirable for e.g. studies involving model membranes and surfactant coated particles. Analysis is demonstrated for cell membranes, cell-derived vesicles, model membranes, and microbubbles with environmentally-sensitive probes Laurdan, carboxyl-modified Laurdan (C-Laurdan), Di-4-ANEPPDHQ, and Di-4-AN(F)EPPTEA (FE), for quantification of the local lateral density of lipids or lipid packing.

Conclusions

The Spectral Imaging Toolbox is a powerful tool for the segmentation and processing of large spectral imaging datasets with a reliable method for membrane segmentation and no ability in programming required. The Spectral Imaging Toolbox can be downloaded from https://uk.mathworks.com/matlabcentral/fileexchange/62617-spectral-imaging-toolbox .",2017-05-12 +29507949,Masked Speech Recognition and Reading Ability in School-Age Children: Is There a Relationship?,"

Purpose

The relationship between reading (decoding) skills, phonological processing abilities, and masked speech recognition in typically developing children was explored. This experiment was designed to evaluate the relationship between phonological processing and decoding abilities and 2 aspects of masked speech recognition in typically developing children: (a) the ability to benefit from temporal and spectral modulations within a noise masker and (b) the masking exerted by a speech masker.

Method

Forty-two typically developing 3rd- and 4th-grade children with normal hearing, ranging in age from 8;10 to 10;6 years (mean age = 9;2 years, SD = 0.5 months), completed sentence recognition testing in 4 different maskers: steady-state noise, temporally modulated noise, spectrally modulated noise, and two-talker speech. Children also underwent assessment of phonological processing abilities and assessments of single-word decoding. As a comparison group, 15 adults with normal hearing also completed speech-in-noise testing.

Results

Speech recognition thresholds varied between approximately 3 and 7 dB across children, depending on the masker condition. Compared to adults, performance in the 2-talker masker was relatively consistent across children. Furthermore, decreasing the signal-to-noise ratio had a more precipitously deleterious effect on children's speech recognition in the 2-talker masker than was observed for adults. For children, individual differences in speech recognition threshold were not predicted by phonological awareness or decoding ability in any masker condition.

Conclusions

No relationship was found between phonological awareness and/or decoding ability and a child's ability to benefit from spectral or temporal modulations. In addition, phonological awareness and/or decoding ability was not related to speech recognition in a 2-talker masker. Last, these data suggest that the between-listeners variability often observed in 2-talker maskers for adults may be smaller for children. The reasons for this child-adult difference need to be further explored.

Supplemental material

https://doi.org/10.23641/asha.5913547.",2018-03-01 +24344172,Sequence analysis of the genome of carnation (Dianthus caryophyllus L.).,"The whole-genome sequence of carnation (Dianthus caryophyllus L.) cv. 'Francesco' was determined using a combination of different new-generation multiplex sequencing platforms. The total length of the non-redundant sequences was 568,887,315 bp, consisting of 45,088 scaffolds, which covered 91% of the 622 Mb carnation genome estimated by k-mer analysis. The N50 values of contigs and scaffolds were 16,644 bp and 60,737 bp, respectively, and the longest scaffold was 1,287,144 bp. The average GC content of the contig sequences was 36%. A total of 1050, 13, 92 and 143 genes for tRNAs, rRNAs, snoRNA and miRNA, respectively, were identified in the assembled genomic sequences. For protein-encoding genes, 43 266 complete and partial gene structures excluding those in transposable elements were deduced. Gene coverage was ∼ 98%, as deduced from the coverage of the core eukaryotic genes. Intensive characterization of the assigned carnation genes and comparison with those of other plant species revealed characteristic features of the carnation genome. The results of this study will serve as a valuable resource for fundamental and applied research of carnation, especially for breeding new carnation varieties. Further information on the genomic sequences is available at http://carnation.kazusa.or.jp.",2013-12-17 +29531631,A prototype for evidence-based pharmaceutical opinions to promote physician-pharmacist communication around deprescribing.,"

Context

Interprofessional communication is an effective mechanism for reducing inappropriate prescriptions among older adults. Physicians' views about which elements are essential for pharmacists to include in an evidence-based pharmaceutical opinion for deprescribing remain unknown.

Objective

To develop a prototype for an evidence-based pharmaceutical opinion that promotes physician-pharmacist communication around deprescribing.

Methods

A standardized template for an evidence-based pharmaceutical opinion was developed with input from a convenience sample of 32 primary care physicians and 61 primary care pharmacists, recruited from conferences and community settings in Montreal, Canada. Participants were asked to comment on the need for clarifying treatment goals, including personalized patient data and biomarkers, highlighting evidence about drug harms, listing the credibility and source of the recommendations, providing therapeutic alternatives and formalizing official documentation of decision making. The content and format of the prototype underwent revision by community physicians and pharmacists until consensus was reached on a final recommended template.

Results

The majority of physicians (84%-97%) requested that the source of the deprescribing recommendations be cited, that alternative management options be provided and that the information be tailored to the patient. Sixteen percent of physicians expressed concern about the information in the opinions being too dense. Pharmacists also questioned the length of the opinion and asked that additional space be provided for the physician's response. A statement was added making the opinion a valid prescription upon receipt of a signature from physicians. Compared to a nonstandardized opinion, the majority of pharmacists believed the template was easier to use, more evidence based, more time efficient and more likely to lead to deprescribing.

Conclusion

Physicians and pharmacists endorsed a standardized template that promotes interprofessional communication for deprescribing (available at https://www.deprescribingnetwork.ca/pharmaceutical-opinions). The outcome of the D-Prescribe trial will determine the effectiveness of these evidence-based pharmaceutical opinions on deprescribing processes and outcomes. Can Pharm J (Ott) 2018;151:xx-xx.",2018-02-08 +24861624,Enhancing UCSF Chimera through web services.,"Integrating access to web services with desktop applications allows for an expanded set of application features, including performing computationally intensive tasks and convenient searches of databases. We describe how we have enhanced UCSF Chimera (http://www.rbvi.ucsf.edu/chimera/), a program for the interactive visualization and analysis of molecular structures and related data, through the addition of several web services (http://www.rbvi.ucsf.edu/chimera/docs/webservices.html). By streamlining access to web services, including the entire job submission, monitoring and retrieval process, Chimera makes it simpler for users to focus on their science projects rather than data manipulation. Chimera uses Opal, a toolkit for wrapping scientific applications as web services, to provide scalable and transparent access to several popular software packages. We illustrate Chimera's use of web services with an example workflow that interleaves use of these services with interactive manipulation of molecular sequences and structures, and we provide an example Python program to demonstrate how easily Opal-based web services can be accessed from within an application. Web server availability: http://webservices.rbvi.ucsf.edu/opal2/dashboard?command=serviceList.",2014-05-26 +27185889,CTLPScanner: a web server for chromothripsis-like pattern detection.,"Chromothripsis is a recently observed phenomenon in cancer cells in which one or several chromosomes shatter into pieces with subsequent inaccurate reassembly and clonal propagation. This type of event generates a potentially vast number of mutations within a relatively short-time period, and has been considered as a new paradigm in cancer development. Despite recent advances, much work is still required to better understand the molecular mechanisms of this phenomenon, and thus an easy-to-use tool is in urgent need for automatically detecting and annotating chromothripsis. Here we present CTLPScanner, a web server for detection of chromothripsis-like pattern (CTLP) in genomic array data. The output interface presents intuitive graphical representations of detected chromosome pulverization region, as well as detailed results in table format. CTLPScanner also provides additional information for associated genes in chromothripsis region to help identify the potential candidates involved in tumorigenesis. To assist in performing meta-data analysis, we integrated over 50 000 pre-processed genomic arrays from The Cancer Genome Atlas and Gene Expression Omnibus into CTLPScanner. The server allows users to explore the presence of chromothripsis signatures from public data resources, without carrying out any local data processing. CTLPScanner is freely available at http://cgma.scu.edu.cn/CTLPScanner/.",2016-05-16 +27324198,RTCR: a pipeline for complete and accurate recovery of T cell repertoires from high throughput sequencing data.,"

Motivation

High Throughput Sequencing (HTS) has enabled researchers to probe the human T cell receptor (TCR) repertoire, which consists of many rare sequences. Distinguishing between true but rare TCR sequences and variants generated by polymerase chain reaction (PCR) and sequencing errors remains a formidable challenge. The conventional approach to handle errors is to remove low quality reads, and/or rare TCR sequences. Such filtering discards a large number of true and often rare TCR sequences. However, accurate identification and quantification of rare TCR sequences is essential for repertoire diversity estimation.

Results

We devised a pipeline, called Recover TCR (RTCR), that accurately recovers TCR sequences, including rare TCR sequences, from HTS data (including barcoded data) even at low coverage. RTCR employs a data-driven statistical model to rectify PCR and sequencing errors in an adaptive manner. Using simulations, we demonstrate that RTCR can easily adapt to the error profiles of different types of sequencers and exhibits consistently high recall and high precision even at low coverages where other pipelines perform poorly. Using published real data, we show that RTCR accurately resolves sequencing errors and outperforms all other pipelines.

Availability and implementation

The RTCR pipeline is implemented in Python (v2.7) and C and is freely available at http://uubram.github.io/RTCR/along with documentation and examples of typical usage.

Contact

b.gerritsen@uu.nl.",2016-06-20 +27307624,Reconstructing the temporal progression of HIV-1 immune response pathways.,"

Motivation

Most methods for reconstructing response networks from high throughput data generate static models which cannot distinguish between early and late response stages.

Results

We present TimePath, a new method that integrates time series and static datasets to reconstruct dynamic models of host response to stimulus. TimePath uses an Integer Programming formulation to select a subset of pathways that, together, explain the observed dynamic responses. Applying TimePath to study human response to HIV-1 led to accurate reconstruction of several known regulatory and signaling pathways and to novel mechanistic insights. We experimentally validated several of TimePaths' predictions highlighting the usefulness of temporal models.

Availability and implementation

Data, Supplementary text and the TimePath software are available from http://sb.cs.cmu.edu/timepath

Contact

zivbj@cs.cmu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +26569417,Sexual Dysfunction Related to Drugs: a Critical Review. Part V: α-Blocker and 5-ARI Drugs.,"

Unlabelled

Sexual dysfunction is a potential side effect of BPH (benign prostatic hyperplasia) and LUTS (lower urinary tract symptoms) drugs: this article is a critical review of the current literature. Many studies have been published on this topic. Methodological flaws limit the conclusions of these studies, mainly because of the lack of diagnostic criteria for ejaculatory and sexual desire dysfunction. Few of these studies are RCTs. The α-blocker (also called α1-adrenergic antagonist, alpha-adrenoceptor antagonist, alpha-blocker or AB) and 5-ARI (also called 5α-reductase inhibitor or testosterone-5-alpha reductase inhibitor) drugs can in particular cause erectile dysfunction, ejaculatory disorders and reduction of sexual desire. The sexual side effect profile of these drugs is different. Among the α-blockers, silodosin appears have the highest incidence of ejaculatory disorders. Persistent sexual side effects after discontinuation of finasteride has recently been reported, however further studies are needed to clarify the true incidence and the significance of this finding. It is desirable that future studies include validated tools to assess and diagnose sexual dysfunction induced by these medications, especially for ejaculation and sexual desire disorders. Only a small amount of research has intentionally set out to investigate sexual dysfunction caused by α-blocker and 5-ARI drugs: studies to specifically assess sexual dysfunction induced by these drugs are needed. Further studies are also needed to assess in the long term the role of combined therapy of phosphodiesterase type 5 inhibitors and α-blockers or 5-ARIs in treating LUTS/BPH.

Methods

This study was conducted in 2014 using the paper and electronic resources of the library of the ""Azienda Provinciale per i Servizi Sanitari (APSS)"" in Trento, Italy (http://atoz.ebsco.com/Titles/2793). The library has access to a wide range of databases including DYNAMED, MEDLINE Full Text, CINAHL Plus Full Text, The Cochrane Library, Micromedex healthcare series, BMJ Clinical Evidence. The full list of available journals can be viewed at http://atoz.ebsco.com/Titles/2793, or at the APSS web site (http://www.apss.tn.it). In completing this review, a literature search was conducted using the key words ""benign prostatic hyperplasia drugs"", ""lower urinary tract symptoms drugs"", ""α-blockers"", ""5-ARIs"", ""sexual dysfunction"", ""sexual side effects"", ""treatment-emergent sexual dysfunction"", ""phosphodiesterase type 5 (PDE5) inhibitors"". All resulting listed articles were reviewed. Studies published between 2002 and December 2014 were included in the review. We included all studies that explicitly reported data on sexual dysfunction during treatment with α-blockers and 5-ARIs. We also reviewed studies that have evaluated the use of phosphodiesterase type 5 (PDE5) inhibitors in combination with these drugs. The purpose was to identify possible intervention strategies for sexual dysfunction related to these drugs.",2015-11-16 +26293704,Factors associated with hospital admission after rotator cuff repair: the role of peripheral nerve blockade.,"

Study objective

The objective was to analyze the impact of a peripheral nerve block in addition to general anesthesia on hospital admission after surgical rotator cuff repair.

Design

This was a population-based outcome study. The cost effectiveness of ambulatory rotator cuff repair relies on the discharge of patients on the day of surgery. As the impact of a peripheral nerve block in addition to general anesthesia on this outcome is unknown, we sought to elucidate this subject using population-based data.

Patients and methods

Information on patients undergoing rotator cuff surgery under general anesthesia with or without the addition of a peripheral nerve block (GN vs G) from a retrospective database provided by Premier Perspective, Inc, Charlotte, NC (http://www.premierinc.com), was analyzed. Using multilevel multivariable regressions, we evaluated the independent impact of the type of anesthesia on the outcomes hospital admission, combined major complications, and increased hospital costs.

Results

We identified 27,201 patients who underwent surgical rotator cuff repair. Approximately 89% (24,240) of patients were discharged on the day of surgery, whereas 11% (2961) were admitted to the hospital. The admission rates for the GN group were 9.1% and 11.2% for the G group (P=.0001). The multivariable regression models showed that patients with the addition of a peripheral nerve block had 18% less risk of being admitted to the hospital (relative risk [RR]=0.82; 95% confidence interval [CI], 0.74-0.91; P=.0003) compared with those without this intervention. Differences in risk for combined major complications (RR=1.00; 95% CI, 0.83-1.20; P=.9751) or increased hospital costs (RR=0.97; 95% CI, 0.93-1.02; P=.2538) were nonsignificant.

Discussion

For patients undergoing surgical rotator cuff repair under general anesthesia, the addition of a peripheral nerve block may be associated with a reduction in the need for postoperative hospital admission after ambulatory surgery. Although the reason for this finding has to remain speculative, better pain control may play a role.",2015-08-17 +26865947,Towards exergaming commons: composing the exergame ontology for publishing open game data.,"

Background

It has been shown that exergames have multiple benefits for physical, mental and cognitive health. Only recently, however, researchers have started considering them as health monitoring tools, through collection and analysis of game metrics data. In light of this and initiatives like the Quantified Self, there is an emerging need to open the data produced by health games and their associated metrics in order for them to be evaluated by the research community in an attempt to quantify their potential health, cognitive and physiological benefits.

Methods

We have developed an ontology that describes exergames using the Web Ontology Language (OWL); it is available at http://purl.org/net/exergame/ns#. After an investigation of key components of exergames, relevant ontologies were incorporated, while necessary classes and properties were defined to model these components. A JavaScript framework was also developed in order to apply the ontology to online exergames. Finally, a SPARQL Endpoint is provided to enable open data access to potential clients through the web.

Results

Exergame components include details for players, game sessions, as well as, data produced during these game-playing sessions. The description of the game includes elements such as goals, game controllers and presentation hardware used; what is more, concepts from already existing ontologies are reused/repurposed. Game sessions include information related to the player, the date and venue where the game was played, as well as, the results/scores that were produced/achieved. These games are subsequently played by 14 users in multiple game sessions and the results derived from these sessions are published in a triplestore as open data.

Conclusions

We model concepts related to exergames by providing a standardized structure for reference and comparison. This is the first work that publishes data from actual exergame sessions on the web, facilitating the integration and analysis of the data, while allowing open data access through the web in an effort to enable the concept of Open Trials for Active and Healthy Ageing.",2016-02-09 +29693765,Integrating dynamic contrast-enhanced magnetic resonance imaging and diffusion kurtosis imaging for neoadjuvant chemotherapy assessment of nasopharyngeal carcinoma.,"BACKGROUND:Since neoadjuvant chemotherapy (NAC) has proven a benefit for locally advanced nasopharyngeal carcinoma (NPC), early response evaluation after chemotherapy is important to implement individualized therapy for NPC in the era of precision medicine. PURPOSE:To determine the combined and independent contribution between dynamic contrast-enhanced magnetic resonance imaging (DCE-MRI) and diffusion kurtosis imaging (DKI) in the early monitoring of NAC response for NPC. STUDY TYPE:Prospective. POPULATION:Fifty-three locally advanced NPC patients. FIELD STRENGTH/SEQUENCE:Four examinations before and at 4, 20, and 40 days after NAC initiation were performed at 3T MRI including DCE-MRI and DKI (b values = 0, 500, 1000, 1500 s/mm2 ). ASSESSMENT:DCE-MRI parameters (Ktrans [the volume transfer constant of Gd-DTPA], kep [rate constant], νe [the extracellular volume fraction of the imaged tissue], and νp [the blood volume fraction]) and DKI parameters (Dapp [apparent diffusion for non-Gaussian distribution] and Kapp [apparent kurtosis coefficient]) were analyzed using dedicated software. STATISTICAL TESTS:MRI parameters and their corresponding changes were compared between responders and nonresponders after one or two NAC cycles treatment using independent-samples Student's t-test or Mann-Whitney U-test depending on the normality contribution test and then followed by logistic regression and receiver operating characteristic curve (ROC) analyses. RESULTS:The responder group (RG) patients presented significantly higher mean Ktrans and Dapp values at baseline and larger Δ K ( 0 - 4 ) trans , Δvp(0-4) , and ΔDapp(0-4) values after either one or two NAC cycles compared with the nonresponder group (NRG) patients (all P < 0.05). ROC analyses demonstrated the higher diagnostic accuracy of combined DCE-MRI and DKI model to distinguish nonresponders from responders after two NAC cycles than using DCE-MRI (0.987 vs. 0.872, P = 0.033) or DKI (0.987 vs. 0.898, P = 0.047) alone. DATA CONCLUSION:Combined DCE-MRI and DKI models had higher diagnostic accuracy for NAC assessment compared with either model used independently. LEVEL OF EVIDENCE:2 Technical Efficacy: Stage 2 J. Magn. Reson. Imaging 2018;47:1208-1216.",2018-04-25 +28318512,Understanding Children's Heart Surgery Data: A Cross-Disciplinary Approach to Codevelop a Website.,"Risk-adjusted survival statistics after children's heart surgery are published annually in the United Kingdom. Interpreting these statistics is difficult, and better resources about how to interpret survival data are needed. Here we describe how a multidisciplinary team of mathematicians, psychologists, and a charity worked with parents of heart surgery children and other users to codevelop online resources to present survival outcomes. Early and ongoing involvement of users was crucial and considerably changed the content, scope, and look of the website, and the formal psychology experiments provided deeper insight. The website http://childrensheartsurgery.info/ was launched in June 2016 to very positive reviews.",2017-03-15 +28294141,GEAR: A database of Genomic Elements Associated with drug Resistance.,"Drug resistance is becoming a serious problem that leads to the failure of standard treatments, which is generally developed because of genetic mutations of certain molecules. Here, we present GEAR (A database of Genomic Elements Associated with drug Resistance) that aims to provide comprehensive information about genomic elements (including genes, single-nucleotide polymorphisms and microRNAs) that are responsible for drug resistance. Right now, GEAR contains 1631 associations between 201 human drugs and 758 genes, 106 associations between 29 human drugs and 66 miRNAs, and 44 associations between 17 human drugs and 22 SNPs. These relationships are firstly extracted from primary literature with text mining and then manually curated. The drug resistome deposited in GEAR provides insights into the genetic factors underlying drug resistance. In addition, new indications and potential drug combinations can be identified based on the resistome. The GEAR database can be freely accessed through http://gear.comp-sysbio.org.",2017-03-15 +29615873,Alterations of White Matter Integrity and Hippocampal Functional Connectivity in Type 2 Diabetes Without Mild Cognitive Impairment.,"Aims: To investigate the white matter (WM) integrity and hippocampal functional connectivity (FC) in type 2 diabetes mellitus (T2DM) patients without mild cognitive impairment (MCI) by using diffusion tensor imaging (DTI) and resting-state functional magnetic resonance imaging (rs-fMRI), respectively. Methods: Twelve T2DM patients without MCI and 24 age, sex and education matched healthy controls (HC) were recruited. DTI and rs-fMRI data were subsequently acquired on a 3.0T MR scanner. Tract-based spatial statistics (TBSS) combining region of interests (ROIs) analysis was used to investigate the alterations of DTI metrics (fractional anisotropy (FA), mean diffusivity (MD), λ1 and λ23) and FC measurement was performed to calculate hippocampal FC with other brain regions. Cognitive function was evaluated by using Mini-Mental State Examination (MMSE) and Montreal Cognitive Assessment (MoCA). Brain volumes were also evaluated among these participants. Results: There were no difference of MMSE and MoCA scores between two groups. Neither whole brain nor regional brain volume decrease was revealed in T2DM patients without MCI. DTI analysis revealed extensive WM disruptions, especially in the body of corpus callosum (CC). Significant decreases of hippocampal FC with certain brain structures were revealed, especially with the bilateral frontal cortex. Furthermore, the decreased FA in left posterior thalamic radiation (PTR) and increased MD in the splenium of CC were closely related with the decreased hippocampal FC to caudate nucleus and frontal cortex. Conclusions: T2DM patients without MCI showed extensive WM disruptions and abnormal hippocampal FC. Moreover, the WM disruptions and abnormal hippocampal FC were closely associated. Highlights -T2DM patients without MCI demonstrated no obvious brain volume decrease.-Extensive white matter disruptions, especially within the body of corpus callosum, were revealed with DTI analysis among the T2DM patients.-Despite no MCI in T2DM patients, decreased functional connectivity between hippocampal region and some critical brain regions were detected.-The alterations in hippocampal functional connectivity were closely associated with those of the white matter structures in T2DM patients. This trial was registered to ClinicalTrials.gov (NCT02420470, https://www.clinicaltrials.gov/).",2018-03-20 +28039163,Training alignment parameters for arbitrary sequencers with LAST-TRAIN.,"

Summary

LAST-TRAIN improves sequence alignment accuracy by inferring substitution and gap scores that fit the frequencies of substitutions, insertions, and deletions in a given dataset. We have applied it to mapping DNA reads from IonTorrent and PacBio RS, and we show that it reduces reference bias for Oxford Nanopore reads.

Availability and implementation

the source code is freely available at http://last.cbrc.jp/.

Contact

mhamada@waseda.jp or mcfrith@edu.k.u-tokyo.ac.jp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +25541969,Scholarly context not found: one in five articles suffers from reference rot.,"The emergence of the web has fundamentally affected most aspects of information communication, including scholarly communication. The immediacy that characterizes publishing information to the web, as well as accessing it, allows for a dramatic increase in the speed of dissemination of scholarly knowledge. But, the transition from a paper-based to a web-based scholarly communication system also poses challenges. In this paper, we focus on reference rot, the combination of link rot and content drift to which references to web resources included in Science, Technology, and Medicine (STM) articles are subject. We investigate the extent to which reference rot impacts the ability to revisit the web context that surrounds STM articles some time after their publication. We do so on the basis of a vast collection of articles from three corpora that span publication years 1997 to 2012. For over one million references to web resources extracted from over 3.5 million articles, we determine whether the HTTP URI is still responsive on the live web and whether web archives contain an archived snapshot representative of the state the referenced resource had at the time it was referenced. We observe that the fraction of articles containing references to web resources is growing steadily over time. We find one out of five STM articles suffering from reference rot, meaning it is impossible to revisit the web context that surrounds them some time after their publication. When only considering STM articles that contain references to web resources, this fraction increases to seven out of ten. We suggest that, in order to safeguard the long-term integrity of the web-based scholarly record, robust solutions to combat the reference rot problem are required. In conclusion, we provide a brief insight into the directions that are explored with this regard in the context of the Hiberlink project.",2014-12-26 +23344737,The semantic priming project.,"Speeded naming and lexical decision data for 1,661 target words following related and unrelated primes were collected from 768 subjects across four different universities. These behavioral measures have been integrated with demographic information for each subject and descriptive characteristics for every item. Subjects also completed portions of the Woodcock-Johnson reading battery, three attentional control tasks, and a circadian rhythm measure. These data are available at a user-friendly Internet-based repository ( http://spp.montana.edu ). This Web site includes a search engine designed to generate lists of prime-target pairs with specific characteristics (e.g., length, frequency, associative strength, latent semantic similarity, priming effect in standardized and raw reaction times). We illustrate the types of questions that can be addressed via the Semantic Priming Project. These data represent the largest behavioral database on semantic priming and are available to researchers to aid in selecting stimuli, testing theories, and reducing potential confounds in their studies.",2013-12-01 +27115645,Bacterial Genomic Data Analysis in the Next-Generation Sequencing Era.,"Bacterial genome sequencing is now an affordable choice for many laboratories for applications in research, diagnostic, and clinical microbiology. Nowadays, an overabundance of tools is available for genomic data analysis. However, tools differ for algorithms, languages, hardware requirements, and user interface, and combining them as it is necessary for sequence data interpretation often requires (bio)informatics skills which can be difficult to find in many laboratories. In addition, multiple data sources, as well as exceedingly large dataset sizes, and increasingly computational complexity further challenge the accessibility, reproducibility, and transparency of the entire process. In this chapter we will cover the main bioinformatics steps required for a complete bacterial genome analysis using next-generation sequencing data, from the raw sequence data to assembled and annotated genomes. All the tools described are available in the Orione framework ( http://orione.crs4.it ), which uniquely combines in a transparent way the most used open source bioinformatics tools for microbiology, allowing microbiologist without any specific hardware or informatics skill to conduct data-intensive computational analyses from quality control to microbial gene annotation.",2016-01-01 +28334295,MetCCS predictor: a web server for predicting collision cross-section values of metabolites in ion mobility-mass spectrometry based metabolomics.,"

Summary

In metabolomics, rigorous structural identification of metabolites presents a challenge for bioinformatics. The use of collision cross-section (CCS) values of metabolites derived from ion mobility-mass spectrometry effectively increases the confidence of metabolite identification, but this technique suffers from the limit number of available CCS values. Currently, there is no software available for rapidly generating the metabolites' CCS values. Here, we developed the first web server, namely, MetCCS Predictor, for predicting CCS values. It can predict the CCS values of metabolites using molecular descriptors within a few seconds. Common users with limited background on bioinformatics can benefit from this software and effectively improve the metabolite identification in metabolomics.

Availability and implementation

The web server is freely available at: http://www.metabolomics-shanghai.org/MetCCS/ .

Contact

jiangzhu@sioc.ac.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +23955518,MitoFish and MitoAnnotator: a mitochondrial genome database of fish with an accurate and automatic annotation pipeline.,"Mitofish is a database of fish mitochondrial genomes (mitogenomes) that includes powerful and precise de novo annotations for mitogenome sequences. Fish occupy an important position in the evolution of vertebrates and the ecology of the hydrosphere, and mitogenomic sequence data have served as a rich source of information for resolving fish phylogenies and identifying new fish species. The importance of a mitogenomic database continues to grow at a rapid pace as massive amounts of mitogenomic data are generated with the advent of new sequencing technologies. A severe bottleneck seems likely to occur with regard to mitogenome annotation because of the overwhelming pace of data accumulation and the intrinsic difficulties in annotating sequences with degenerating transfer RNA structures, divergent start/stop codons of the coding elements, and the overlapping of adjacent elements. To ease this data backlog, we developed an annotation pipeline named MitoAnnotator. MitoAnnotator automatically annotates a fish mitogenome with a high degree of accuracy in approximately 5 min; thus, it is readily applicable to data sets of dozens of sequences. MitoFish also contains re-annotations of previously sequenced fish mitogenomes, enabling researchers to refer to them when they find annotations that are likely to be erroneous or while conducting comparative mitogenomic analyses. For users who need more information on the taxonomy, habitats, phenotypes, or life cycles of fish, MitoFish provides links to related databases. MitoFish and MitoAnnotator are freely available at http://mitofish.aori.u-tokyo.ac.jp/ (last accessed August 28, 2013); all of the data can be batch downloaded, and the annotation pipeline can be used via a web interface.",2013-08-16 +21781283,DbMDR: a relational database for multidrug resistance genes as potential drug targets.,"DbMDR is non-redundant reference database of multidrug resistance (MDR) genes and their orthologs acting as potential drug targets. Drug resistance is a common phenomenon of pathogens, creating a serious problem of inactivation of drugs and antibiotics resulting in occurrence of diseases. Apart from other factors, the MDR genes present in pathogens are shown to be responsible for multidrug resistance. Much of the unorganized information on MDR genes is scattered across the literature and other web resources. Thus, consolidation of such knowledge about MDR genes into one database will make the drug discovery research more efficient. Mining of text for MDR genes has resulted into a large number of publications but in scattered and unorganized form. This information was compiled into a database, which enables a user not only to look at a particular MDR gene but also to find out putative homologs based on sequence similarity, conserved domains, and motifs in proteins encoded by MDR genes more efficiently. At present, DbMDR database contains 2843 MDR genes characterized experimentally as well as functionally annotated with cross-referencing search support. The DbMDR database (http://203.190.147.116/dbmdr/) is a comprehensive resource for comparative study focused on MDR genes and metabolic pathway efflux pumps and intended to provide a platform for researchers for further research in drug resistance.",2011-09-06 +28049437,RELIC: a novel dye-bias correction method for Illumina Methylation BeadChip.,"

Background

The Illumina Infinium HumanMethylation450 BeadChip and its successor, Infinium MethylationEPIC BeadChip, have been extensively utilized in epigenome-wide association studies. Both arrays use two fluorescent dyes (Cy3-green/Cy5-red) to measure methylation level at CpG sites. However, performance difference between dyes can result in biased estimates of methylation levels.

Results

Here we describe a novel method, called REgression on Logarithm of Internal Control probes (RELIC) to correct for dye bias on whole array by utilizing the intensity values of paired internal control probes that monitor the two color channels. We evaluate the method in several datasets against other widely used dye-bias correction methods. Results on data quality improvement showed that RELIC correction statistically significantly outperforms alternative dye-bias correction methods. We incorporated the method into the R package ENmix, which is freely available from the Bioconductor website ( https://www.bioconductor.org/packages/release/bioc/html/ENmix.html ).

Conclusions

RELIC is an efficient and robust method to correct for dye-bias in Illumina Methylation BeadChip data. It outperforms other alternative methods and conveniently implemented in R package ENmix to facilitate DNA methylation studies.",2017-01-03 +28623589,AraNet: A Network Biology Server for Arabidopsis thaliana and Other Non-Model Plant Species.,"Functional gene networks link genes based on their functional relatedness, which is inferred from various complementary biological datasets. Gene networks comprising vast amounts of data can be used to predict which genes are associated with complex traits. Decades of studies in plant biology using the model organism Arabidopsis thaliana have generated large amounts of information, enabling the development of a system-level molecular network. AraNet (currently version 2) is a genome-scale functional gene network for Arabidopsis thaliana, constructed by integrating 19 types of genomics data and can be explored through a web-server (http://www.inetbio.org/aranet) to identify candidate genes for traits of interest. AraNet provides two alternative search paths for users to identify candidate genes and functions. The web server also exploits ortholog relationships between plant species and projects the genes of 28 other plant species (as of April, 2016) into the network of Arabidopsis genes. This allows researchers to use AraNet to predict genes/functions of not only Arabidopsis but also other non-model plants by expanding the functional knowledge of Arabidopsis. Here, we present a detailed description of how to search the AraNet network and interpret the search results to study plant gene functions and their associations with complex phenotypes.",2017-01-01 +28137287,Response to: Correcting for cell-type effects in DNA methylation studies: reference-based method outperforms latent variable approaches in empirical studies.,"We thank Hattab and colleagues for their correspondence and their investigation of cell-type mixture correction methods in methyl-CG binding domain sequencing. Here, we speculate on why surrogate variable analysis (SVA) performed differently between their two data sets, and poorly in one of them.Please see related Correspondence article: https://genomebiology.biomedcentral.com/articles/10/1186/s13059-017-1148-8 and related Research article: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0935-y.",2017-01-30 +27899654,jPOSTrepo: an international standard data repository for proteomes.,"Major advancements have recently been made in mass spectrometry-based proteomics, yielding an increasing number of datasets from various proteomics projects worldwide. In order to facilitate the sharing and reuse of promising datasets, it is important to construct appropriate, high-quality public data repositories. jPOSTrepo (https://repository.jpostdb.org/) has successfully implemented several unique features, including high-speed file uploading, flexible file management and easy-to-use interfaces. This repository has been launched as a public repository containing various proteomic datasets and is available for researchers worldwide. In addition, our repository has joined the ProteomeXchange consortium, which includes the most popular public repositories such as PRIDE in Europe for MS/MS datasets and PASSEL for SRM datasets in the USA. Later MassIVE was introduced in the USA and accepted into the ProteomeXchange, as was our repository in July 2016, providing important datasets from Asia/Oceania. Accordingly, this repository thus contributes to a global alliance to share and store all datasets from a wide variety of proteomics experiments. Thus, the repository is expected to become a major repository, particularly for data collected in the Asia/Oceania region.",2016-11-28 +29747787,Preferred exercise modalities in patients with intermittent claudication.,"Conventional supervised exercise programs (SEPs) for claudicants are traditionally based on time-constrained, group-based structured programs usually at a hospital site. Uptake of an SEP is poor, despite the high-level evidence demonstrating its clinical effectiveness; therefore, alternative forms of exercise programs are needed which are more acceptable to patients. This study aimed to explore a range of exercise modalities to determine patient preferences for exercise delivery on a national level. This was a questionnaire survey to identify and incorporate patient preferences when designing a multicenter nationwide health-service evaluation of patient preference to exercise in the United Kingdom's National Health Service (the PREFER study). Patients with documented stable intermittent claudication who were suitable for an SEP were given a questionnaire to fill out at their clinic visit. Data were recorded using the Bristol Online Survey tool (http://www.survey.bris.ac.uk/) and analyzed descriptively. Thirty complete questionnaires were analyzed. Participants were generally unilateral claudicants (80%) with symptoms for over 1 year (64%). Only 6 of the 30 patients had engaged in a lifelong routine of exercise. Eighty-seven percent of patients indicated that they had not taken part in an exercise program, but 73% of those indicated that they would be willing to participate to improve their walking. Most patients expressed a preference for a home exercise program (50%) followed by a hospital SEP. The majority of patients (43%) were happy to exercise 3 days per week using a walking-based program (53%). There was however no consensus on the duration or intensity of the exercise program. The SEP is the recommended first-line treatment for intermittent claudication patients; however, the vast majority of patients fail to engage with or complete an exercise program. This study demonstrates that exercise therapy should be individualized and take a patient-centered approach. Commissioning groups should incentivize hospitals and clinicians to engage with their patient populations to understand their needs and deliver an appropriate service.",2018-02-02 +25503992,A hybrid method for identification of structural domains.,"Structural domains in proteins are the basic units to form various proteins. In the protein's evolution and functioning, domains play important roles. But the definition of domain is not yet precisely given, and the update cycle of structural domain databases is long. The automatic algorithms identify domains slowly, while protein entities with great structural complexity are on the rise. Here, we present a method which recognizes the compact and modular segments of polypeptide chains to identify structural domains, and contrast some data sets to illuminate their effect. The method combines support vector machine (SVM) with K-means algorithm. It is faster and more stable than most current algorithms and performs better. It also indicates that when proteins are presented as some Alpha-carbon atoms in 3D space, it is feasible to identify structural domains by the spatially structural properties. We have developed a web-server, which would be helpful in identification of structural domains (http://vis.sculab.org/~huayongpan/cgi-bin/domainAssignment.cgi).",2014-12-15 +24092884,The Plant Organelles Database 3 (PODB3) update 2014: integrating electron micrographs and new options for plant organelle research.,"The Plant Organelles Database 2 (PODB2), which was first launched in 2006 as PODB, provides static image and movie data of plant organelles, protocols for plant organelle research and external links to relevant websites. PODB2 has facilitated plant organellar research and the understanding of plant organelle dynamics. To provide comprehensive information on plant organelles in more detail, PODB2 was updated to PODB3 (http://podb.nibb.ac.jp/Organellome/). PODB3 contains two additional components: the electron micrograph database and the perceptive organelles database. Through the electron micrograph database, users can examine the subcellular and/or suborganellar structures in various organs of wild-type and mutant plants. The perceptive organelles database provides information on organelle dynamics in response to external stimuli. In addition to the extra components, the user interface for access has been enhanced in PODB3. The data in PODB3 are directly submitted by plant researchers and can be freely downloaded for use in further analysis. PODB3 contains all the information included in PODB2, and the volume of data and protocols deposited in PODB3 continue to grow steadily. We welcome contributions of data from all plant researchers to enhance the utility and comprehensiveness of PODB3.",2013-10-03 +27531099,SiNVICT: ultra-sensitive detection of single nucleotide variants and indels in circulating tumour DNA.,"

Motivation

Successful development and application of precision oncology approaches require robust elucidation of the genomic landscape of a patient's cancer and, ideally, the ability to monitor therapy-induced genomic changes in the tumour in an inexpensive and minimally invasive manner. Thanks to recent advances in sequencing technologies, 'liquid biopsy', the sampling of patient's bodily fluids such as blood and urine, is considered as one of the most promising approaches to achieve this goal. In many cancer patients, and especially those with advanced metastatic disease, deep sequencing of circulating cell free DNA (cfDNA) obtained from patient's blood yields a mixture of reads originating from the normal DNA and from multiple tumour subclones-called circulating tumour DNA or ctDNA. The ctDNA/cfDNA ratio as well as the proportion of ctDNA originating from specific tumour subclones depend on multiple factors, making comprehensive detection of mutations difficult, especially at early stages of cancer. Furthermore, sensitive and accurate detection of single nucleotide variants (SNVs) and indels from cfDNA is constrained by several factors such as the sequencing errors and PCR artifacts, and mapping errors related to repeat regions within the genome. In this article, we introduce SiNVICT, a computational method that increases the sensitivity and specificity of SNV and indel detection at very low variant allele frequencies. SiNVICT has the capability to handle multiple sequencing platforms with different error properties; it minimizes false positives resulting from mapping errors and other technology specific artifacts including strand bias and low base quality at read ends. SiNVICT also has the capability to perform time-series analysis, where samples from a patient sequenced at multiple time points are jointly examined to report locations of interest where there is a possibility that certain clones were wiped out by some treatment while some subclones gained selective advantage.

Results

We tested SiNVICT on simulated data as well as prostate cancer cell lines and cfDNA obtained from castration-resistant prostate cancer patients. On both simulated and biological data, SiNVICT was able to detect SNVs and indels with variant allele percentages as low as 0.5%. The lowest amounts of total DNA used for the biological data where SNVs and indels could be detected with very high sensitivity were 2.5 ng on the Ion Torrent platform and 10 ng on Illumina. With increased sequencing and mapping accuracy, SiNVICT might be utilized in clinical settings, making it possible to track the progress of point mutations and indels that are associated with resistance to cancer therapies and provide patients personalized treatment. We also compared SiNVICT with other popular SNV callers such as MuTect, VarScan2 and Freebayes. Our results show that SiNVICT performs better than these tools in most cases and allows further data exploration such as time-series analysis on cfDNA sequencing data.

Availability and implementation

SiNVICT is available at: https://sfu-compbio.github.io/sinvictSupplementary information: Supplementary data are available at Bioinformatics online.

Contact

cenk@sfu.ca.",2016-08-16 +28413816,Spectroscopic and AFM characterization of polypeptide-surface interactions: Controls and lipid quantitative analyses.,"This article is related to http://dx.doi.org/10.1016/j.bbamem.2017.01.005 (Ø. Strømland, Ø.S. Handegård, M.L. Govasli, H. Wen, Ø. Halskau, 2017) [1]. In protein and polypeptide-membrane interaction studies, negatively charged lipids are often used as they are a known driver for membrane interaction. When using fluorescence spectroscopy and CD as indicators of polypeptide binding and conformational change, respectively, the effect of zwitterionic lipids only should be documented. The present data documents several aspects of how two engineered polypeptides (A-Cage-C and A-Lnk-C) derived from the membrane associating protein alpha-Lactalbumin affects and are affected by the presence of zwitterionic bilayers in the form of vesicles. We here document the behavior or the Cage and Lnk segments with respect to membrane interaction and their residual fold, using intrinsic tryptophan fluorescence assays. This data description also documents the coverage of solid-supported bilayers prepared by spin-coating mica using binary lipid mixes, a necessary step to ensure that AFM is performed on areas that are covered by lipid bilayers when performing experiments. Uncovered patches are detectable by both force curve measurements and height measurements. We tested naked mica׳s ability to cause aggregation as seen by AFM, and found this to be low compared to preparations containing negatively charged lipids. Work with lipids also carries the risk of chemical degradation taking place during vesicles preparation or other handling of the lipids. We therefor use 31P NMR to quantify the head-group content of commonly used commercial extracts before and after a standard protocol for vesicle production is applied.",2017-03-12 +26112452,PlantOrDB: a genome-wide ortholog database for land plants and green algae.,"

Background

Genes with different functions are originally generated from some ancestral genes by gene duplication, mutation and functional recombination. It is widely accepted that orthologs are homologous genes evolved from speciation events while paralogs are homologous genes resulted from gene duplication events.With the rapid increase of genomic data, identifying and distinguishing these genes among different species is becoming an important part of functional genomics research.

Description

Using 35 plant and 6 green algal genomes from Phytozome v9, we clustered 1,291,670 peptide sequences into 49,355 homologous gene families in terms of sequence similarity. For each gene family, we have generated a peptide sequence alignment and phylogenetic tree, and identified the speciation/duplication events for every node within the tree. For each node, we also identified and highlighted diagnostic characters that facilitate appropriate addition of a new query sequence into the existing phylogenetic tree and sequence alignment of its best matched gene family. Based on a desired species or subgroup of all species, users can view the phylogenetic tree, sequence alignment and diagnostic characters for a given gene family selectively. PlantOrDB not only allows users to identify orthologs or paralogs from phylogenetic trees, but also provides all orthologs that are built using Reciprocal Best Hit (RBH) pairwise alignment method. Users can upload their own sequences to find the best matched gene families, and visualize their query sequences within the relevant phylogenetic trees and sequence alignments.

Conclusion

PlantOrDB ( http://bioinfolab.miamioh.edu/plantordb ) is a genome-wide ortholog database for land plants and green algae. PlantOrDB offers highly interactive visualization, accurate query classification and powerful search functions useful for functional genomic research.",2015-06-26 +26618079,"An evolving computational platform for biological mass spectrometry: workflows, statistics and data mining with MASSyPup64.","In biological mass spectrometry, crude instrumental data need to be converted into meaningful theoretical models. Several data processing and data evaluation steps are required to come to the final results. These operations are often difficult to reproduce, because of too specific computing platforms. This effect, known as 'workflow decay', can be diminished by using a standardized informatic infrastructure. Thus, we compiled an integrated platform, which contains ready-to-use tools and workflows for mass spectrometry data analysis. Apart from general unit operations, such as peak picking and identification of proteins and metabolites, we put a strong emphasis on the statistical validation of results and Data Mining. MASSyPup64 includes e.g., the OpenMS/TOPPAS framework, the Trans-Proteomic-Pipeline programs, the ProteoWizard tools, X!Tandem, Comet and SpiderMass. The statistical computing language R is installed with packages for MS data analyses, such as XCMS/metaXCMS and MetabR. The R package Rattle provides a user-friendly access to multiple Data Mining methods. Further, we added the non-conventional spreadsheet program teapot for editing large data sets and a command line tool for transposing large matrices. Individual programs, console commands and modules can be integrated using the Workflow Management System (WMS) taverna. We explain the useful combination of the tools by practical examples: (1) A workflow for protein identification and validation, with subsequent Association Analysis of peptides, (2) Cluster analysis and Data Mining in targeted Metabolomics, and (3) Raw data processing, Data Mining and identification of metabolites in untargeted Metabolomics. Association Analyses reveal relationships between variables across different sample sets. We present its application for finding co-occurring peptides, which can be used for target proteomics, the discovery of alternative biomarkers and protein-protein interactions. Data Mining derived models displayed a higher robustness and accuracy for classifying sample groups in targeted Metabolomics than cluster analyses. Random Forest models do not only provide predictive models, which can be deployed for new data sets, but also the variable importance. We demonstrate that the later is especially useful for tracking down significant signals and affected pathways in untargeted Metabolomics. Thus, Random Forest modeling supports the unbiased search for relevant biological features in Metabolomics. Our results clearly manifest the importance of Data Mining methods to disclose non-obvious information in biological mass spectrometry . The application of a Workflow Management System and the integration of all required programs and data in a consistent platform makes the presented data analyses strategies reproducible for non-expert users. The simple remastering process and the Open Source licenses of MASSyPup64 (http://www.bioprocess.org/massypup/) enable the continuous improvement of the system.",2015-11-17 +24150937,LoQAtE--Localization and Quantitation ATlas of the yeast proteomE. A new tool for multiparametric dissection of single-protein behavior in response to biological perturbations in yeast.,"Living organisms change their proteome dramatically to sustain a stable internal milieu in fluctuating environments. To study the dynamics of proteins during stress, we measured the localization and abundance of the Saccharomyces cerevisiae proteome under various growth conditions and genetic backgrounds using the GFP collection. We created a database (DB) called 'LoQAtE' (Localizaiton and Quantitation Atlas of the yeast proteomE), available online at http://www.weizmann.ac.il/molgen/loqate/, to provide easy access to these data. Using LoQAtE DB, users can get a profile of changes for proteins of interest as well as querying advanced intersections by either abundance changes, primary localization or localization shifts over the tested conditions. Currently, the DB hosts information on 5330 yeast proteins under three external perturbations (DTT, H₂O₂ and nitrogen starvation) and two genetic mutations [in the chaperonin containing TCP1 (CCT) complex and in the proteasome]. Additional conditions will be uploaded regularly. The data demonstrate hundreds of localization and abundance changes, many of which were not detected at the level of mRNA. LoQAtE is designed to allow easy navigation for non-experts in high-content microscopy and data are available for download. These data should open up new perspectives on the significant role of proteins while combating external and internal fluctuations.",2013-10-22 +23846595,JBioWH: an open-source Java framework for bioinformatics data integration.,"The Java BioWareHouse (JBioWH) project is an open-source platform-independent programming framework that allows a user to build his/her own integrated database from the most popular data sources. JBioWH can be used for intensive querying of multiple data sources and the creation of streamlined task-specific data sets on local PCs. JBioWH is based on a MySQL relational database scheme and includes JAVA API parser functions for retrieving data from 20 public databases (e.g. NCBI, KEGG, etc.). It also includes a client desktop application for (non-programmer) users to query data. In addition, JBioWH can be tailored for use in specific circumstances, including the handling of massive queries for high-throughput analyses or CPU intensive calculations. The framework is provided with complete documentation and application examples and it can be downloaded from the Project Web site at http://code.google.com/p/jbiowh. A MySQL server is available for demonstration purposes at hydrax.icgeb.trieste.it:3307. Database URL: http://code.google.com/p/jbiowh.",2013-07-11 +29032610,Tokyo Guidelines 2018: diagnostic criteria and severity grading of acute cholangitis (with videos).,"Although the diagnostic and severity grading criteria on the 2013 Tokyo Guidelines (TG13) are used worldwide as the primary standard for management of acute cholangitis (AC), they need to be validated through implementation and assessment in actual clinical practice. Here, we conduct a systematic review of the literature to validate the TG13 diagnostic and severity grading criteria for AC and propose TG18 criteria. While there is little evidence evaluating the TG13 criteria, they were validated through a large-scale case series study in Japan and Taiwan. Analyzing big data from this study confirmed that the diagnostic rate of AC based on the TG13 diagnostic criteria was higher than that based on the TG07 criteria, and that 30-day mortality in patients with a higher severity based on the TG13 severity grading criteria was significantly higher. Furthermore, a comparison of patients treated with early or urgent biliary drainage versus patients not treated this way showed no difference in 30-day mortality among patients with Grade I or Grade III AC, but significantly lower 30-day mortality in patients with Grade II AC who were treated with early or urgent biliary drainage. This suggests that the TG13 severity grading criteria can be used to identify Grade II patients whose prognoses may be improved through biliary drainage. The TG13 severity grading criteria may therefore be useful as an indicator for biliary drainage as well as a predictive factor when assessing the patient's prognosis. The TG13 diagnostic and severity grading criteria for AC can provide results quickly, are minimally invasive for the patients, and are inexpensive. We recommend that the TG13 criteria be adopted in the TG18 guidelines and used as standard practice in the clinical setting. Free full articles and mobile app of TG18 are available at: http://www.jshbps.jp/modules/en/index.php?content_id=47. Related clinical questions and references are also included.",2018-01-05 +29263101,Phyletic Distribution and Lineage-Specific Domain Architectures of Archaeal Two-Component Signal Transduction Systems. ,"The two-component signal transduction (TCS) machinery is a key mechanism of sensing environmental changes in the prokaryotic world. TCS systems have been characterized thoroughly in bacteria but to a much lesser extent in archaea. Here, we provide an updated census of more than 2,000 histidine kinases and response regulators encoded in 218 complete archaeal genomes, as well as unfinished genomes available from metagenomic data. We describe the domain architectures of the archaeal TCS components, including several novel output domains, and discuss the evolution of the archaeal TCS machinery. The distribution of TCS systems in archaea is strongly biased, with high levels of abundance in haloarchaea and thaumarchaea but none detected in the sequenced genomes from the phyla Crenarchaeota, Nanoarchaeota, and Korarchaeota The archaeal sensor histidine kinases are generally similar to their well-studied bacterial counterparts but are often located in the cytoplasm and carry multiple PAS and/or GAF domains. In contrast, archaeal response regulators differ dramatically from the bacterial ones. Most archaeal genomes do not encode any of the major classes of bacterial response regulators, such as the DNA-binding transcriptional regulators of the OmpR/PhoB, NarL/FixJ, NtrC, AgrA/LytR, and ActR/PrrA families and the response regulators with GGDEF and/or EAL output domains. Instead, archaea encode multiple copies of response regulators containing either the stand-alone receiver (REC) domain or combinations of REC with PAS and/or GAF domains. Therefore, the prevailing mechanism of archaeal TCS signaling appears to be via a variety of protein-protein interactions, rather than direct transcriptional regulation.IMPORTANCE Although the Archaea represent a separate domain of life, their signaling systems have been assumed to be closely similar to the bacterial ones. A study of the domain architectures of the archaeal two-component signal transduction (TCS) machinery revealed an overall similarity of archaeal and bacterial sensory modules but substantial differences in the signal output modules. The prevailing mechanism of archaeal TCS signaling appears to involve various protein-protein interactions rather than direct transcription regulation. The complete list of histidine kinases and response regulators encoded in the analyzed archaeal genomes is available online at http://www.ncbi.nlm.nih.gov/Complete_Genomes/TCSarchaea.html.",2018-03-12 +26740525,PINCAGE: probabilistic integration of cancer genomics data for perturbed gene identification and sample classification.,"

Motivation

Cancer development and progression is driven by a complex pattern of genomic and epigenomic perturbations. Both types of perturbations can affect gene expression levels and disease outcome. Integrative analysis of cancer genomics data may therefore improve detection of perturbed genes and prediction of disease state. As different data types are usually dependent, analysis based on independence assumptions will make inefficient use of the data and potentially lead to false conclusions.

Model

Here, we present PINCAGE (Probabilistic INtegration of CAncer GEnomics data), a method that uses probabilistic integration of cancer genomics data for combined evaluation of RNA-seq gene expression and 450k array DNA methylation measurements of promoters as well as gene bodies. It models the dependence between expression and methylation using modular graphical models, which also allows future inclusion of additional data types.

Results

We apply our approach to a Breast Invasive Carcinoma dataset from The Cancer Genome Atlas consortium, which includes 82 adjacent normal and 730 cancer samples. We identify new biomarker candidates of breast cancer development (PTF1A, RABIF, RAG1AP1, TIMM17A, LOC148145) and progression (SERPINE3, ZNF706). PINCAGE discriminates better between normal and tumour tissue and between progressing and non-progressing tumours in comparison with established methods that assume independence between tested data types, especially when using evidence from multiple genes. Our method can be applied to any type of cancer or, more generally, to any genomic disease for which sufficient amount of molecular data is available.

Availability and implementation

R scripts available at http://moma.ki.au.dk/prj/pincage/

Contact

: michal.switnicki@clin.au.dk or jakob.skou@clin.au.dk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-06 +27283949,A knowledge-based approach for predicting gene-disease associations.,"

Motivation

Recent advances of next-generation sequence technologies have made it possible to rapidly and inexpensively identify gene variations. Knowing the disease association of these gene variations is important for early intervention to treat deadly diseases and provide possible targets to cure these diseases. Genome-wide association studies (GWAS) have identified many individual genes associated with common diseases. To exploit the large amount of data obtained from GWAS studies and leverage our understanding of common as well as rare diseases, we have developed a knowledge-based approach to predict gene-disease associations. We first derive gene-gene mutual information by utilizing the cooccurrence of genes in known gene-disease association data. Subsequently, the mutual information is combined with known protein-protein interaction networks by a boosted tree regression method.

Results

The method called Know-GENE is compared with the method of random walking on the heterogeneous network using the same input data. For a set of 960 diseases, using the same training data in testing in 3-fold cross-validation, the average recall rate within the top ranked 100 genes by Know-GENE is 65.0% compared with 37.9% by the state of the art random walking on heterogeneous network. This significant improvement is mostly due to the inclusion of knowledge-based mutual information.

Availability and implementation

Predictions for genes associated with the 960 diseases are available at http://cssb2.biology.gatech.edu/knowgene

Contact

: skolnick@gatech.edu.",2016-06-09 +29032233,Proteome-wide prediction and annotation of mitochondrial and sub-mitochondrial proteins by incorporating domain information.,"Mitochondrion is one of the most important subcellular organelle of eukaryotic cells. It carries out several biochemical functions that are extremely vital for cells. Defects in mitochondria also play an important role in the development and progression of different types of cancer. Therefore knowledge of complete mitochondrial protein repertoire is essential to understand overall mitochondrial functionality, maintenance, dynamics and metabolism. It would be of a great practical significance to develop an automated and reliable approach that can identify the mitochondrial proteins and their sub-mitochondrial location. In the present study, we report a two level prediction method, named as SubMitoPred, which predicts mitochondrial proteins (at first level) and their sub-mitochondrial localization (at second level). Our approach is based on combined usage of Pfam domain information and support vector machine model. During training we achieved an overall prediction accuracy of 94.37% at first level while at the second level a prediction accuracy of 74.91% for inner membrane, 82.98% for outer membrane, 71.23% for inter-membrane space and 81.58% accuracy was achieved for matrix. Evaluation on independent data shows better performance of SubMitoPred. Benchmarking showed that SubMitoPred performed better than other existing methods. We also annotated human proteome using SubMitoPred. We also developed a freely accessible web-server as well as standalone software for the use of scientific community, which is available at http://proteininformatics.org/mkumar/submitopred/.",2017-10-12 +27452369,Discovering Regulated Metabolite Families in Untargeted Metabolomics Studies.,"The identification of metabolites by mass spectrometry constitutes a major bottleneck which considerably limits the throughput of metabolomics studies in biomedical or plant research. Here, we present a novel approach to analyze metabolomics data from untargeted, data-independent LC-MS/MS measurements. By integrated analysis of MS(1) abundances and MS/MS spectra, the identification of regulated metabolite families is achieved. This approach offers a global view on metabolic regulation in comparative metabolomics. We implemented our approach in the web application ""MetFamily"", which is freely available at http://msbi.ipb-halle.de/MetFamily/ . MetFamily provides a dynamic link between the patterns based on MS(1)-signal intensity and the corresponding structural similarity at the MS/MS level. Structurally related metabolites are annotated as metabolite families based on a hierarchical cluster analysis of measured MS/MS spectra. Joint examination with principal component analysis of MS(1) patterns, where this annotation is preserved in the loadings, facilitates the interpretation of comparative metabolomics data at the level of metabolite families. As a proof of concept, we identified two trichome-specific metabolite families from wild-type tomato Solanum habrochaites LA1777 in a fully unsupervised manner and validated our findings based on earlier publications and with NMR.",2016-08-02 +26912260,DoOR 2.0--Comprehensive Mapping of Drosophila melanogaster Odorant Responses.,"Odors elicit complex patterns of activated olfactory sensory neurons. Knowing the complete olfactome, i.e. the responses in all sensory neurons for all relevant odorants, is desirable to understand olfactory coding. The DoOR project combines all available Drosophila odorant response data into a single consensus response matrix. Since its first release many studies were published: receptors were deorphanized and several response profiles were expanded. In this study, we add unpublished data to the odor-response profiles for four odorant receptors (Or10a, Or42b, Or47b, Or56a). We deorphanize Or69a, showing a broad response spectrum with the best ligands including 3-hydroxyhexanoate, alpha-terpineol, 3-octanol and linalool. We include all of these datasets into DoOR, provide a comprehensive update of both code and data, and new tools for data analyses and visualizations. The DoOR project has a web interface for quick queries (http://neuro.uni.kn/DoOR), and a downloadable, open source toolbox written in R, including all processed and original datasets. DoOR now gives reliable odorant-responses for nearly all Drosophila olfactory responding units, listing 693 odorants, for a total of 7381 data points.",2016-02-25 +26040196,A database for the taxonomic and phylogenetic identification of the genus Bradyrhizobium using multilocus sequence analysis.,"BACKGROUND:Biological nitrogen fixation, with an emphasis on the legume-rhizobia symbiosis, is a key process for agriculture and the environment, allowing the replacement of nitrogen fertilizers, reducing water pollution by nitrate as well as emission of greenhouse gases. Soils contain numerous strains belonging to the bacterial genus Bradyrhizobium, which establish symbioses with a variety of legumes. However, due to the high conservation of Bradyrhizobium 16S rRNA genes - considered as the backbone of the taxonomy of prokaryotes - few species have been delineated. The multilocus sequence analysis (MLSA) methodology, which includes analysis of housekeeping genes, has been shown to be promising and powerful for defining bacterial species, and, in this study, it was applied to Bradyrhizobium, species, increasing our understanding of the diversity of nitrogen-fixing bacteria. DESCRIPTION:Classification of bacteria of agronomic importance is relevant to biodiversity, as well as to biotechnological manipulation to improve agricultural productivity. We propose the construction of an online database that will provide information and tools using MLSA to improve phylogenetic and taxonomic characterization of Bradyrhizobium, allowing the comparison of genomic sequences with those of type and representative strains of each species. CONCLUSION:A database for the taxonomic and phylogenetic identification of the Bradyrhizobium, genus, using MLSA, will facilitate the use of biological data available through an intuitive web interface. Sequences stored in the on-line database can be compared with multiple sequences of other strains with simplicity and agility through multiple alignment algorithms and computational routines integrated into the database. The proposed database and software tools are available at http://mlsa.cnpso.embrapa.br, and can be used, free of charge, by researchers worldwide to classify Bradyrhizobium, strains; the database and software can be applied to replicate the experiments presented in this study as well as to generate new experiments. The next step will be expansion of the database to include other rhizobial species.",2015-05-26 +26626453,Fast dimension reduction and integrative clustering of multi-omics data using low-rank approximation: application to cancer molecular classification.,"

Background

One major goal of large-scale cancer omics study is to identify molecular subtypes for more accurate cancer diagnoses and treatments. To deal with high-dimensional cancer multi-omics data, a promising strategy is to find an effective low-dimensional subspace of the original data and then cluster cancer samples in the reduced subspace. However, due to data-type diversity and big data volume, few methods can integrative and efficiently find the principal low-dimensional manifold of the high-dimensional cancer multi-omics data.

Results

In this study, we proposed a novel low-rank approximation based integrative probabilistic model to fast find the shared principal subspace across multiple data types: the convexity of the low-rank regularized likelihood function of the probabilistic model ensures efficient and stable model fitting. Candidate molecular subtypes can be identified by unsupervised clustering hundreds of cancer samples in the reduced low-dimensional subspace. On testing datasets, our method LRAcluster (low-rank approximation based multi-omics data clustering) runs much faster with better clustering performances than the existing method. Then, we applied LRAcluster on large-scale cancer multi-omics data from TCGA. The pan-cancer analysis results show that the cancers of different tissue origins are generally grouped as independent clusters, except squamous-like carcinomas. While the single cancer type analysis suggests that the omics data have different subtyping abilities for different cancer types.

Conclusions

LRAcluster is a very useful method for fast dimension reduction and unsupervised clustering of large-scale multi-omics data. LRAcluster is implemented in R and freely available via http://bioinfo.au.tsinghua.edu.cn/software/lracluster/ .",2015-12-01 +22583952,DelPhi: a comprehensive suite for DelPhi software and associated resources.,"

Background

Accurate modeling of electrostatic potential and corresponding energies becomes increasingly important for understanding properties of biological macromolecules and their complexes. However, this is not an easy task due to the irregular shape of biological entities and the presence of water and mobile ions.

Results

Here we report a comprehensive suite for the well-known Poisson-Boltzmann solver, DelPhi, enriched with additional features to facilitate DelPhi usage. The suite allows for easy download of both DelPhi executable files and source code along with a makefile for local installations. The users can obtain the DelPhi manual and parameter files required for the corresponding investigation. Non-experienced researchers can download examples containing all necessary data to carry out DelPhi runs on a set of selected examples illustrating various DelPhi features and demonstrating DelPhi's accuracy against analytical solutions.

Conclusions

DelPhi suite offers not only the DelPhi executable and sources files, examples and parameter files, but also provides links to third party developed resources either utilizing DelPhi or providing plugins for DelPhi. In addition, the users and developers are offered a forum to share ideas, resolve issues, report bugs and seek help with respect to the DelPhi package. The resource is available free of charge for academic users from URL: http://compbio.clemson.edu/DelPhi.php.",2012-05-14 +28011789,myGenomeBrowser: building and sharing your own genome browser.,"myGenomeBrowser is a web-based environment that provides biologists with a way to build, query and share their genome browsers. This tool, that builds on JBrowse, is designed to give users more autonomy while simplifying and minimizing intervention from system administrators. We have extended genome browser basic features to allow users to query, analyze and share their data.

Availability and implementation

myGenomeBrowser is freely available at https://bbric-pipelines.toulouse.inra.fr/myGenomeBrowser and includes tutorial screencasts. Source code and installation instructions can be found at https://framagit.org/BBRIC/myGenomeBrowser . myGenomeBrowser is open-source and mainly implemented in Perl, JavaScript, Apache and Docker.

Contact

sebastien.carrere@inra.fr.",2017-04-01 +24353078,Interdental brushing for the prevention and control of periodontal diseases and dental caries in adults.,"

Background

Effective oral hygiene is a crucial factor in maintaining good oral health, which is associated with overall health and health-related quality of life. Dental floss has been used for many years in conjunction with toothbrushing for removing dental plaque in between teeth, however, interdental brushes have been developed which many people find easier to use than floss, providing there is sufficient space between the teeth.

Objectives

To evaluate the effects of interdental brushing in addition to toothbrushing, as compared with toothbrushing alone or toothbrushing and flossing for the prevention and control of periodontal diseases, dental plaque and dental caries.

Search methods

We searched the following electronic databases: the Cochrane Oral Health Group's Trials Register (to 7 March 2013), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library 2013, Issue 2), MEDLINE via OVID (1946 to 7 March 2013), EMBASE via OVID (1980 to 7 March 2013), CINAHL via EBSCO (1980 to 7 March 2013), LILACS via BIREME (1982 to 7 March 2013), ZETOC Conference Proceedings (1980 to 7 March 2013) and Web of Science Conference Proceedings (1990 to 7 March 2013). We searched the US National Institutes of Health Trials Register (http://clinicaltrials.gov) and the metaRegister of Controlled Trials (http://www.controlled-trials.com/mrct/) for ongoing trials to 7 March 2013. No restrictions were placed on the language or date of publication when searching the electronic databases.

Selection criteria

We included randomised controlled trials (including split-mouth design, cross-over and cluster-randomised trials) of dentate adult patients. The interventions were a combination of toothbrushing and any interdental brushing procedure compared with toothbrushing only or toothbrushing and flossing.

Data collection and analysis

At least two review authors assessed each of the included studies to confirm eligibility, assessed risk of bias and extracted data using a piloted data extraction form. We calculated standardised mean difference (SMD) and 95% confidence interval (CI) for continuous outcomes where different scales were used to assess an outcome. We attempted to extract data on adverse effects of interventions. Where data were missing or unclear we attempted to contact study authors to obtain further information.

Main results

There were seven studies (total 354 participants analysed) included in this review. We assessed one study as being low, three studies as being high and three studies as being at unclear risk of bias. Studies only reported the clinical outcome gingivitis and plaque data, with no studies providing data on many of the outcomes: periodontitis, caries, halitosis and quality of life. Three studies reported that no adverse events were observed or reported during the study. Two other studies provided some data on adverse events but we were unable to pool the data due to lack of detail. Two studies did not report whether adverse events occurred. Interdental brushing in addition to toothbrushing, as compared with toothbrushing alone Only one high risk of bias study (62 participants in analysis) looked at this comparison and there was very low-quality evidence for a reduction in gingivitis (0 to 4 scale, mean in control): mean difference (MD) 0.53 (95% CI 0.23 to 0.83) and plaque (0 to 5 scale): MD 0.95 (95% CI 0.56 to 1.34) at one month, favouring of use of interdental brushes. This represents a 34% reduction in gingivitis and a 32% reduction in plaque. Interdental brushing in addition to toothbrushing, as compared with toothbrushing and flossing Seven studies provided data showing a reduction in gingivitis in favour of interdental brushing at one month: SMD -0.53 (95% CI -0.81 to -0.24, seven studies, 326 participants, low-quality evidence). This translates to a 52% reduction in gingivitis (Eastman Bleeding Index). Although a high effect size in the same direction was observed at three months (SMD -1.98, 95% CI -5.42 to 1.47, two studies, 107 participants, very low quality), the confidence interval was wide and did not exclude the possibility of no difference. There was insufficient evidence to claim a benefit for either interdental brushing or flossing for reducing plaque (SMD at one month 0.10, 95% CI -0.13 to 0.33, seven studies, 326 participants, low-quality evidence) and insufficient evidence at three months (SMD -2.14, 95% CI -5.25 to 0.97, two studies, 107 participants very low-quality evidence).

Authors' conclusions

Only one study looked at whether toothbrushing with interdental brushing was better than toothbrushing alone, and there was very low-quality evidence for a reduction in gingivitis and plaque at one month. There is also low-quality evidence from seven studies that interdental brushing reduces gingivitis when compared with flossing, but these results were only found at one month. There was insufficient evidence to determine whether interdental brushing reduced or increased levels of plaque when compared to flossing.",2013-12-18 +28031183,dAPE: a web server to detect homorepeats and follow their evolution.,"

Summary

Homorepeats are low complexity regions consisting of repetitions of a single amino acid residue. There is no current consensus on the minimum number of residues needed to define a functional homorepeat, nor even if mismatches are allowed. Here we present dAPE, a web server that helps following the evolution of homorepeats based on orthology information, using a sensitive but tunable cutoff to help in the identification of emerging homorepeats.

Availability and implementation

dAPE can be accessed from http://cbdm-01.zdv.uni-mainz.de/∼munoz/polyx .

Contact

munoz@uni-mainz.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +26821742,"RiboGalaxy: A browser based platform for the alignment, analysis and visualization of ribosome profiling data.","Ribosome profiling (ribo-seq) is a technique that uses high-throughput sequencing to reveal the exact locations and densities of translating ribosomes at the entire transcriptome level. The technique has become very popular since its inception in 2009. Yet experimentalists who generate ribo-seq data often have to rely on bioinformaticians to process and analyze their data. We present RiboGalaxy ( http://ribogalaxy.ucc.ie ), a freely available Galaxy-based web server for processing and analyzing ribosome profiling data with the visualization functionality provided by GWIPS-viz ( http://gwips.ucc.ie ). RiboGalaxy offers researchers a suite of tools specifically tailored for processing ribo-seq and corresponding mRNA-seq data. Researchers can take advantage of the published workflows which reduce the multi-step alignment process to a minimum of inputs from the user. Users can then explore their own aligned data as custom tracks in GWIPS-viz and compare their ribosome profiles to existing ribo-seq tracks from published studies. In addition, users can assess the quality of their ribo-seq data, determine the strength of the triplet periodicity signal, generate meta-gene ribosome profiles as well as analyze the relative impact of mRNA sequence features on local read density. RiboGalaxy is accompanied by extensive documentation and tips for helping users. In addition we provide a forum ( http://gwips.ucc.ie/Forum ) where we encourage users to post their questions and feedback to improve the overall RiboGalaxy service.",2016-01-29 +26441500,DeID - a data sharing tool for neuroimaging studies.,"Funding institutions and researchers increasingly expect that data will be shared to increase scientific integrity and provide other scientists with the opportunity to use the data with novel methods that may advance understanding in a particular field of study. In practice, sharing human subject data can be complicated because data must be de-identified prior to sharing. Moreover, integrating varied data types collected in a study can be challenging and time consuming. For example, sharing data from structural imaging studies of a complex disorder requires the integration of imaging, demographic and/or behavioral data in a way that no subject identifiers are included in the de-identified dataset and with new subject labels or identification values that cannot be tracked back to the original ones. We have developed a Java program that users can use to remove identifying information in neuroimaging datasets, while still maintaining the association among different data types from the same subject for further studies. This software provides a series of user interaction wizards to allow users to select data variables to be de-identified, implements functions for auditing and validation of de-identified data, and enables the user to share the de-identified data in a single compressed package through various communication protocols, such as FTPS and SFTP. DeID runs with Windows, Linux, and Mac operating systems and its open architecture allows it to be easily adapted to support a broader array of data types, with the goal of facilitating data sharing. DeID can be obtained at http://www.nitrc.org/projects/deid.",2015-09-22 +24194602,DDBJ progress report: a new submission system for leading to a correct annotation.,"The DNA Data Bank of Japan (DDBJ; http://www.ddbj.nig.ac.jp) maintains and provides archival, retrieval and analytical resources for biological information. This database content is shared with the US National Center for Biotechnology Information (NCBI) and the European Bioinformatics Institute (EBI) within the framework of the International Nucleotide Sequence Database Collaboration (INSDC). DDBJ launched a new nucleotide sequence submission system for receiving traditional nucleotide sequence. We expect that the new submission system will be useful for many submitters to input accurate annotation and reduce the time needed for data input. In addition, DDBJ has started a new service, the Japanese Genotype-phenotype Archive (JGA), with our partner institute, the National Bioscience Database Center (NBDC). JGA permanently archives and shares all types of individual human genetic and phenotypic data. We also introduce improvements in the DDBJ services and databases made during the past year.",2013-11-04 +27153687,EPS: an empirical Bayes approach to integrating pleiotropy and tissue-specific information for prioritizing risk genes.,"

Motivation

Researchers worldwide have generated a huge volume of genomic data, including thousands of genome-wide association studies (GWAS) and massive amounts of gene expression data from different tissues. How to perform a joint analysis of these data to gain new biological insights has become a critical step in understanding the etiology of complex diseases. Due to the polygenic architecture of complex diseases, the identification of risk genes remains challenging. Motivated by the shared risk genes found in complex diseases and tissue-specific gene expression patterns, we propose as an Empirical Bayes approach to integrating Pleiotropy and Tissue-Specific information (EPS) for prioritizing risk genes.

Results

As demonstrated by extensive simulation studies, EPS greatly improves the power of identification for disease-risk genes. EPS enables rigorous hypothesis testing of pleiotropy and tissue-specific risk gene expression patterns. All of the model parameters can be adaptively estimated from the developed expectation-maximization (EM) algorithm. We applied EPS to the bipolar disorder and schizophrenia GWAS from the Psychiatric Genomics Consortium, along with the gene expression data for multiple tissues from the Genotype-Tissue Expression project. The results of the real data analysis demonstrate many advantages of EPS.

Availability and implementation

The EPS software is available on https://sites.google.com/site/liujin810822 CONTACT: eeyang@hkbu.edu.hk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-15 +25399418,"The OMA orthology database in 2015: function predictions, better plant support, synteny view and other improvements.","The Orthologous Matrix (OMA) project is a method and associated database inferring evolutionary relationships amongst currently 1706 complete proteomes (i.e. the protein sequence associated for every protein-coding gene in all genomes). In this update article, we present six major new developments in OMA: (i) a new web interface; (ii) Gene Ontology function predictions as part of the OMA pipeline; (iii) better support for plant genomes and in particular homeologs in the wheat genome; (iv) a new synteny viewer providing the genomic context of orthologs; (v) statically computed hierarchical orthologous groups subsets downloadable in OrthoXML format; and (vi) possibility to export parts of the all-against-all computations and to combine them with custom data for 'client-side' orthology prediction. OMA can be accessed through the OMA Browser and various programmatic interfaces at http://omabrowser.org.",2014-11-15 +28448234,Leveraging Clinical Imaging Archives for Radiomics: Reliability of Automated Methods for Brain Volume Measurement.,"Purpose To validate the use of thick-section clinically acquired magnetic resonance (MR) imaging data for estimating total brain volume (TBV), gray matter (GM) volume (GMV), and white matter (WM) volume (WMV) by using three widely used automated toolboxes: SPM ( www.fil.ion.ucl.ac.uk/spm/ ), FreeSurfer ( surfer.nmr.mgh.harvard.edu ), and FSL (FMRIB software library; Oxford Centre for Functional MR Imaging of the Brain, Oxford, England, https://fsl.fmrib.ox.ac.uk/fsl ). Materials and Methods MR images from a clinical archive were used and data were deidentified. The three methods were applied to estimate brain volumes from thin-section research-quality brain MR images and routine thick-section clinical MR images acquired from the same 38 patients (age range, 1-71 years; mean age, 22 years; 11 women). By using these automated methods, TBV, GMV, and WMV were estimated. Thin- versus thick-section volume comparisons were made for each method by using intraclass correlation coefficients (ICCs). Results SPM exhibited excellent ICCs (0.97, 0.85, and 0.83 for TBV, GMV, and WMV, respectively). FSL exhibited ICCs of 0.69, 0.51, and 0.60 for TBV, GMV, and WMV, respectively, but they were lower than with SPM. FreeSurfer exhibited excellent ICC of 0.63 only for TBV. Application of SPM's voxel-based morphometry on the modulated images of thin-section images and interpolated thick-section images showed fair to excellent ICCs (0.37-0.98) for the majority of brain regions (88.47% [306924 of 346916 voxels] of WM and 80.35% [377 282 of 469 502 voxels] of GM). Conclusion Thick-section clinical-quality MR images can be reliably used for computing quantitative brain metrics such as TBV, GMV, and WMV by using SPM. © RSNA, 2017 Online supplemental material is available for this article.",2017-04-27 +28320624,"Improved prediction of protein-protein interactions using novel negative samples, features, and an ensemble classifier.","Computational methods are employed in bioinformatics to predict protein-protein interactions (PPIs). PPIs and protein-protein non-interactions (PPNIs) display different levels of development, and the number of PPIs is considerably greater than that of PPNIs. This significant difference in the number of PPIs and PPNIs increases the cost of constructing a balanced dataset. PPIs can be classified as either physical or genetic. However, ready-made PPNI databases were proven only to have no physical interactions and were not proven to have no genetic interactions. Hence, ready-made PPNI databases contain false negative non-interactions. In this study, two PPNI datasets were artificially generated from a PPI database. In contrast to various traditional PPI feature extraction methods based on sequential information, two types of novel feature extraction methods were proposed. One is based on secondary structure information, and the other is based on the physicochemical properties of proteins. The experimental results of the RandomPairs dataset validate the efficiency and effectiveness of the proposed prediction model. These results reveal the potential of constructing a PPI negative dataset to reduce false negatives. Related datasets, tools, and source codes are accessible at http://lab.malab.cn/soft/PPIPre/PPIPre.html.",2017-03-04 +25935118,Gene network coherence based on prior knowledge using direct and indirect relationships.,"Gene networks (GNs) have become one of the most important approaches for modeling biological processes. They are very useful to understand the different complex biological processes that may occur in living organisms. Currently, one of the biggest challenge in any study related with GN is to assure the quality of these GNs. In this sense, recent works use artificial data sets or a direct comparison with prior biological knowledge. However, these approaches are not entirely accurate as they only take into account direct gene-gene interactions for validation, leaving aside the weak (indirect) relationships. We propose a new measure, named gene network coherence (GNC), to rate the coherence of an input network according to different biological databases. In this sense, the measure considers not only the direct gene-gene relationships but also the indirect ones to perform a complete and fairer evaluation of the input network. Hence, our approach is able to use the whole information stored in the networks. A GNC JAVA-based implementation is available at: http://fgomezvela.github.io/GNC/. The results achieved in this work show that GNC outperforms the classical approaches for assessing GNs by means of three different experiments using different biological databases and input networks. According to the results, we can conclude that the proposed measure, which considers the inherent information stored in the direct and indirect gene-gene relationships, offers a new robust solution to the problem of GNs biological validation.",2015-03-27 +26779400,AURA 2: Empowering discovery of post-transcriptional networks.,"Post-transcriptional regulation (PTR) of gene expression is now recognized as a major determinant of cell phenotypes. The recent availability of methods to map protein-RNA interactions in entire transcriptomes such as RIP, CLIP and their variants, together with global polysomal and ribosome profiling techniques, are driving the exponential accumulation of vast amounts of data on mRNA contacts in cells, and of corresponding predictions of PTR events. However, this exceptional quantity of information cannot be exploited at its best to reconstruct potential PTR networks, as it still lies scattered throughout several databases and in isolated reports of single interactions. To address this issue, we developed the second and vastly enhanced version of the Atlas of UTR Regulatory Activity (AURA 2), a meta-database centered on mapping interaction of trans-factors with human and mouse UTRs. AURA 2 includes experimentally demonstrated binding sites for RBPs, ncRNAs, thousands of cis-elements, variations, RNA epigenetics data and more. Its user-friendly interface offers various data-mining features including co-regulation search, network generation and regulatory enrichment testing. Gene expression profiles for many tissues and cell lines can be also combined with these analyses to display only the interactions possible in the system under study. AURA 2 aims at becoming a valuable toolbox for PTR studies and at tracing the road for how PTR network-building tools should be designed. AURA 2 is available at http://aura.science.unitn.it.",2014-01-29 +27647160,BS-RNA: An efficient mapping and annotation tool for RNA bisulfite sequencing data.,"Cytosine methylation is one of the most important RNA epigenetic modifications. With the development of experimental technology, scientists attach more importance to RNA cytosine methylation and find bisulfite sequencing is an effective experimental method for RNA cytosine methylation study. However, there are only a few tools can directly deal with RNA bisulfite sequencing data efficiently. Herein, we developed a specialized tool BS-RNA, which can analyze cytosine methylation of RNA based on bisulfite sequencing data and support both paired-end and single-end sequencing reads from directional bisulfite libraries. For paired-end reads, simply removing the biased positions from the 5' end may result in ""dovetailing"" reads, where one or both reads seem to extend past the start of the mate read. BS-RNA could map ""dovetailing"" reads successfully. The annotation result of BS-RNA is exported in BED (.bed) format, including locations, sequence context types (CG/CHG/CHH, H=A,T, or C), reference sequencing depths, cytosine sequencing depths, and methylation levels of covered cytosine sites on both Watson and Crick strands. BS-RNA is an efficient, specialized and highly automated mapping and annotation tool for RNA bisulfite sequencing data. It performs better than the existing program in terms of accuracy and efficiency. BS-RNA is developed by Perl language and the source code of this tool is freely available from the website: http://bs-rna.big.ac.cn.",2016-09-09 +29743730,Evaluating soil moisture retrievals from ESA's SMOS and NASA's SMAP brightness temperature datasets.,"Two satellites are currently monitoring surface soil moisture (SM) using L-band observations: SMOS (Soil Moisture and Ocean Salinity), a joint ESA (European Space Agency), CNES (Centre national d'études spatiales), and CDTI (the Spanish government agency with responsibility for space) satellite launched on November 2, 2009 and SMAP (Soil Moisture Active Passive), a National Aeronautics and Space Administration (NASA) satellite successfully launched in January 2015. In this study, we used a multilinear regression approach to retrieve SM from SMAP data to create a global dataset of SM, which is consistent with SM data retrieved from SMOS. This was achieved by calibrating coefficients of the regression model using the CATDS (Centre Aval de Traitement des Données) SMOS Level 3 SM and the horizontally and vertically polarized brightness temperatures (TB) at 40° incidence angle, over the 2013 - 2014 period. Next, this model was applied to SMAP L3 TB data from Apr 2015 to Jul 2016. The retrieved SM from SMAP (referred to here as SMAP_Reg) was compared to: (i) the operational SMAP L3 SM (SMAP_SCA), retrieved using the baseline Single Channel retrieval Algorithm (SCA); and (ii) the operational SMOSL3 SM, derived from the multiangular inversion of the L-MEB model (L-MEB algorithm) (SMOSL3). This inter-comparison was made against in situ soil moisture measurements from more than 400 sites spread over the globe, which are used here as a reference soil moisture dataset. The in situ observations were obtained from the International Soil Moisture Network (ISMN; https://ismn.geo.tuwien.ac.at/) in North of America (PBO_H2O, SCAN, SNOTEL, iRON, and USCRN), in Australia (Oznet), Africa (DAHRA), and in Europe (REMEDHUS, SMOSMANIA, FMI, and RSMN). The agreement was analyzed in terms of four classical statistical criteria: Root Mean Squared Error (RMSE), Bias, Unbiased RMSE (UnbRMSE), and correlation coefficient (R). Results of the comparison of these various products with in situ observations show that the performance of both SMAP products i.e. SMAP_SCA and SMAP_Reg is similar and marginally better to that of the SMOSL3 product particularly over the PBO_H2O, SCAN, and USCRN sites. However, SMOSL3 SM was closer to the in situ observations over the DAHRA and Oznet sites. We found that the correlation between all three datasets and in situ measurements is best (R > 0.80) over the Oznet sites and worst (R = 0.58) over the SNOTEL sites for SMAP_SCA and over the DAHRA and SMOSMANIA sites (R= 0.51 and R= 0.45 for SMAP_Reg and SMOSL3, respectively). The Bias values showed that all products are generally dry, except over RSMN, DAHRA, and Oznet (and FMI for SMAP_SCA). Finally, our analysis provided interesting insights that can be useful to improve the consistency between SMAP and SMOS datasets.",2017-03-20 +22434836,"CvManGO, a method for leveraging computational predictions to improve literature-based Gene Ontology annotations.","The set of annotations at the Saccharomyces Genome Database (SGD) that classifies the cellular function of S. cerevisiae gene products using Gene Ontology (GO) terms has become an important resource for facilitating experimental analysis. In addition to capturing and summarizing experimental results, the structured nature of GO annotations allows for functional comparison across organisms as well as propagation of functional predictions between related gene products. Due to their relevance to many areas of research, ensuring the accuracy and quality of these annotations is a priority at SGD. GO annotations are assigned either manually, by biocurators extracting experimental evidence from the scientific literature, or through automated methods that leverage computational algorithms to predict functional information. Here, we discuss the relationship between literature-based and computationally predicted GO annotations in SGD and extend a strategy whereby comparison of these two types of annotation identifies genes whose annotations need review. Our method, CvManGO (Computational versus Manual GO annotations), pairs literature-based GO annotations with computational GO predictions and evaluates the relationship of the two terms within GO, looking for instances of discrepancy. We found that this method will identify genes that require annotation updates, taking an important step towards finding ways to prioritize literature review. Additionally, we explored factors that may influence the effectiveness of CvManGO in identifying relevant gene targets to find in particular those genes that are missing literature-supported annotations, but our survey found that there are no immediately identifiable criteria by which one could enrich for these under-annotated genes. Finally, we discuss possible ways to improve this strategy, and the applicability of this method to other projects that use the GO for curation. DATABASE URL: http://www.yeastgenome.org.",2012-03-20 +27668814,OCEAN: Optimized Cross rEActivity estimatioN.,"The prediction of molecular targets is highly beneficial during the drug discovery process, be it for off-target elucidation or deconvolution of phenotypic screens. Here, we present OCEAN, a target prediction tool exclusively utilizing publically available ChEMBL data. OCEAN uses a heuristics approach based on a validation set containing almost 1000 drug ← → target relationships. New ChEMBL data (ChEMBL20 as well as ChEMBL21) released after the validation was used for a prospective OCEAN performance check. The success rates of OCEAN to predict correctly the targets within the TOP10 ranks are 77% for recently marketed drugs and 62% for all new ChEMBL20 compounds and 51% for all new ChEMBL21 compounds. OCEAN is also capable of identifying polypharmacological compounds; the success rate for molecules simultaneously hitting at least two targets is 64% to be correctly predicted within the TOP10 ranks. The source code of OCEAN can be found at http://www.github.com/rdkit/OCEAN.",2016-09-26 +29017987,Nanomaterials Versus Ambient Ultrafine Particles: An Opportunity to Exchange Toxicology Knowledge.,"

Background

A rich body of literature exists that has demonstrated adverse human health effects following exposure to ambient air particulate matter (PM), and there is strong support for an important role of ultrafine (nanosized) particles. At present, relatively few human health or epidemiology data exist for engineered nanomaterials (NMs) despite clear parallels in their physicochemical properties and biological actions in in vitro models.

Objectives

NMs are available with a range of physicochemical characteristics, which allows a more systematic toxicological analysis. Therefore, the study of ultrafine particles (UFP, <100 nm in diameter) provides an opportunity to identify plausible health effects for NMs, and the study of NMs provides an opportunity to facilitate the understanding of the mechanism of toxicity of UFP.

Methods

A workshop of experts systematically analyzed the available information and identified 19 key lessons that can facilitate knowledge exchange between these discipline areas.

Discussion

Key lessons range from the availability of specific techniques and standard protocols for physicochemical characterization and toxicology assessment to understanding and defining dose and the molecular mechanisms of toxicity. This review identifies a number of key areas in which additional research prioritization would facilitate both research fields simultaneously.

Conclusion

There is now an opportunity to apply knowledge from NM toxicology and use it to better inform PM health risk research and vice versa. https://doi.org/10.1289/EHP424.",2017-10-10 +25332394,lncRNAdb v2.0: expanding the reference database for functional long noncoding RNAs.,"Despite the prevalence of long noncoding RNA (lncRNA) genes in eukaryotic genomes, only a small proportion have been examined for biological function. lncRNAdb, available at http://lncrnadb.org, provides users with a comprehensive, manually curated reference database of 287 eukaryotic lncRNAs that have been described independently in the scientific literature. In addition to capturing a great proportion of the recent literature describing functions for individual lncRNAs, lncRNAdb now offers an improved user interface enabling greater accessibility to sequence information, expression data and the literature. The new features in lncRNAdb include the integration of Illumina Body Atlas expression profiles, nucleotide sequence information, a BLAST search tool and easy export of content via direct download or a REST API. lncRNAdb is now endorsed by RNAcentral and is in compliance with the International Nucleotide Sequence Database Collaboration.",2014-10-20 +27295636,Efficient Drug-Pathway Association Analysis via Integrative Penalized Matrix Decomposition.,"Traditional drug discovery practice usually follows the ""one drug - one target"" approach, seeking to identify drug molecules that act on individual targets, which ignores the systemic nature of human diseases. Pathway-based drug discovery recently emerged as an appealing approach to overcome this limitation. An important first step of such pathway-based drug discovery is to identify associations between drug molecules and biological pathways. This task has been made feasible by the accumulating data from high-throughput transcription and drug sensitivity profiling. In this paper, we developed ""iPaD"", an integrative Penalized Matrix Decomposition method to identify drug-pathway associations through jointly modeling of such high-throughput transcription and drug sensitivity data. A scalable bi-convex optimization algorithm was implemented and gave iPaD tremendous advantage in computational efficiency over current state-of-the-art method, which allows it to handle the ever-growing large-scale data sets that current method cannot afford to. On two widely used real data sets, iPaD also significantly outperformed the current method in terms of the number of validated drug-pathway associations that were identified. The Matlab code of our algorithm publicly available at http://licong-jason.github.io/iPaD/.",2016-05-01 +27851974,eFORGE: A Tool for Identifying Cell Type-Specific Signal in Epigenomic Data.,"Epigenome-wide association studies (EWAS) provide an alternative approach for studying human disease through consideration of non-genetic variants such as altered DNA methylation. To advance the complex interpretation of EWAS, we developed eFORGE (http://eforge.cs.ucl.ac.uk/), a new standalone and web-based tool for the analysis and interpretation of EWAS data. eFORGE determines the cell type-specific regulatory component of a set of EWAS-identified differentially methylated positions. This is achieved by detecting enrichment of overlap with DNase I hypersensitive sites across 454 samples (tissues, primary cell types, and cell lines) from the ENCODE, Roadmap Epigenomics, and BLUEPRINT projects. Application of eFORGE to 20 publicly available EWAS datasets identified disease-relevant cell types for several common diseases, a stem cell-like signature in cancer, and demonstrated the ability to detect cell-composition effects for EWAS performed on heterogeneous tissues. Our approach bridges the gap between large-scale epigenomics data and EWAS-derived target selection to yield insight into disease etiology.",2016-11-01 +30271887,Stage 2 Registered Report: Variation in neurodevelopmental outcomes in children with sex chromosome trisomies: testing the double hit hypothesis. ,"Background: The presence of an extra sex chromosome is associated with an increased rate of neurodevelopmental difficulties involving language. The 'double hit' hypothesis proposes that the adverse impact of the extra sex chromosome is amplified when genes that are expressed from the sex chromosomes interact with autosomal variants that usually have only mild effects. We predicted that the impact of an additional sex chromosome on neurodevelopment would depend on common autosomal variants involved in synaptic functions.   Methods: We analysed data from 130 children with sex chromosome trisomies (SCTs: 42 girls with trisomy X, 43 boys with Klinefelter syndrome, and 45 boys with XYY). Two comparison groups were formed from 370 children from a twin study. Three indicators of phenotype were: (i) Standard score on a test of nonword repetition; (ii). A language factor score derived from a test battery; (iii) A general scale of neurodevelopmental challenges based on all available information. Preselected regions of two genes, CNTNAP2 and NRXN1, were tested for association with neurodevelopmental outcomes using Generalised Structural Component Analysis. Results: There was wide phenotypic variation in the SCT group, as well as overall impairment on all three phenotypic measures. There was no association of phenotype with CNTNAP2 or NRXN1 variants in either the SCT group or the comparison groups. Supplementary analyses found no indication of any impact of trisomy type on the results, and exploratory analyses of individual SNPs confirmed the lack of association. Conclusions: We cannot rule out that a double hit may be implicated in the phenotypic variability in children with SCTs, but our analysis does not find any support for the idea that common variants in CNTNAP2 or NRXN1 are associated with the severity of language and neurodevelopmental impairments that often accompany an extra X or Y chromosome. Stage 1 report: http://dx.doi.org/10.12688/wellcomeopenres.13828.2.",2018-01-01 +28968797,iPromoter-2L: a two-layer predictor for identifying promoters and their types by multi-window-based PseKNC.,"

Motivation

Being responsible for initiating transaction of a particular gene in genome, promoter is a short region of DNA. Promoters have various types with different functions. Owing to their importance in biological process, it is highly desired to develop computational tools for timely identifying promoters and their types. Such a challenge has become particularly critical and urgent in facing the avalanche of DNA sequences discovered in the postgenomic age. Although some prediction methods were developed, they can only be used to discriminate a specific type of promoters from non-promoters. None of them has the ability to identify the types of promoters. This is due to the facts that different types of promoters may share quite similar consensus sequence pattern, and that the promoters of same type may have considerably different consensus sequences.

Results

To overcome such difficulty, using the multi-window-based PseKNC (pseudo K-tuple nucleotide composition) approach to incorporate the short-, middle-, and long-range sequence information, we have developed a two-layer seamless predictor named as 'iPromoter-2 L'. The first layer serves to identify a query DNA sequence as a promoter or non-promoter, and the second layer to predict which of the following six types the identified promoter belongs to: σ24, σ28, σ32, σ38, σ54 and σ70.

Availability and implementation

For the convenience of most experimental scientists, a user-friendly and publicly accessible web-server for the powerful new predictor has been established at http://bioinformatics.hitsz.edu.cn/iPromoter-2L/. It is anticipated that iPromoter-2 L will become a very useful high throughput tool for genome analysis.

Contact

bliu@hit.edu.cn or dshuang@tongji.edu.cn or kcchou@gordonlifescience.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +,Prospective analysis of the invasive potential of the European grapevine moth Lobesia botrana (Den. & Schiff.) in California,"1 The polyphagous European grapevine moth Lobesia botrana (Den. & Schiff.) is the principal native pest of grape berries in the Palearctic region. It was found in Napa County, California, in 2009, and it has subsequently been recorded in an additional nine counties, despite an ongoing eradication programme. The present study aimed to assess prospectively its potential geographical distribution and relative abundance in California and the continental U.S.A. A subsidiary goal was to provide explanation for timing control measures. 2 Data from the European literature were used to formulate and parameterize a holistic physiologically‐based demographic model for L. botrana. This model was linked to an extant mechanistic model of grapevine phenology, growth and development that provides the bottom‐up effects of fruiting phenology, age and abundance on L. botrana dynamics. Fruit age affects larval developmental rates, and has carryover effects on pupal development and adult fecundity. Also included in the model were the effects of temperature on developmental, survival and fecundity rates. 3 Observed daily weather data were used to simulate the potential distribution of the moth in California, and the continental U.S.A. The relative total number of pupae per vine per year was used as the metric of favourability at all locations. The simulation data were mapped using grass gis (http://grass.osgeo.org/). 4 The model predicts L. botrana can spread statewide with the highest populations expected in the hotter regions of southern California and the lower half of the Central Valley. In the U.S.A., areas of highest favourability include south Texas, and much of the southeast U.S.A. 5 The effects of a warmer climate on pest abundance were explored by increasing observed mean temperatures 2° and 3 °C. L. botrana abundance is expected to increase in northern California and in the agriculturally rich Central Valley but to decrease in the hot deserts of southern California where summer temperatures would approach its upper thermal limit. 6 Analysis of the timing of mating disruption pheromone for control of L. botrana suggests the greatest benefit would accrue by targeting adults emerging from winter diapause pupae and the flight of first summer adults.",2012-08-01 +26982617,Differential methylation tests of regulatory regions.,"Differential methylation of regulatory elements is critical in epigenetic researches and can be statistically tested. We developed a new statistical test, the generalized integrated functional test (GIFT), that tests for regional differences in methylation based on the methylation percent at each CpG site within a genomic region. The GIFT uses estimated subject-specific profiles with smoothing methods, specifically wavelet smoothing, and calculates an ANOVA-like test to compare the average profile of groups. In this way, possibly correlated CpG sites within the regulatory region are compared all together. Simulations and analyses of data obtained from patients with chronic lymphocytic leukemia indicate that GIFT has good statistical properties and is able to identify promising genomic regions. Further, GIFT is likely to work with multiple different types of experiments since different smoothing methods can be used to estimate the profiles of data without noise. Matlab code for GIFT and sample data are available at http://www.augusta.edu/mcg/biostatepi/people/software/gift.html.",2016-06-01 +25712690,"LCR-eXXXplorer: a web platform to search, visualize and share data for low complexity regions in protein sequences.","

Motivation

Local compositionally biased and low complexity regions (LCRs) in amino acid sequences have initially attracted the interest of researchers due to their implication in generating artifacts in sequence database searches. There is accumulating evidence of the biological significance of LCRs both in physiological and in pathological situations. Nonetheless, LCR-related algorithms and tools have not gained wide appreciation across the research community, partly due to the fact that only a handful of user-friendly software is currently freely available.

Results

We developed LCR-eXXXplorer, an extensible online platform attempting to fill this gap. LCR-eXXXplorer offers tools for displaying LCRs from the UniProt/SwissProt knowledgebase, in combination with other relevant protein features, predicted or experimentally verified. Moreover, users may perform powerful queries against a custom designed sequence/LCR-centric database. We anticipate that LCR-eXXXplorer will be a useful starting point in research efforts for the elucidation of the structure, function and evolution of proteins with LCRs.

Availability and implementation

LCR-eXXXplorer is freely available at the URL http://repeat.biol.ucy.ac.cy/lcr-exxxplorer.

Contact

vprobon@ucy.ac.cy

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-20 +27741311,Impact of the Choice of Normalization Method on Molecular Cancer Class Discovery Using Nonnegative Matrix Factorization.,"Nonnegative Matrix Factorization (NMF) has proved to be an effective method for unsupervised clustering analysis of gene expression data. By the nonnegativity constraint, NMF provides a decomposition of the data matrix into two matrices that have been used for clustering analysis. However, the decomposition is not unique. This allows different clustering results to be obtained, resulting in different interpretations of the decomposition. To alleviate this problem, some existing methods directly enforce uniqueness to some extent by adding regularization terms in the NMF objective function. Alternatively, various normalization methods have been applied to the factor matrices; however, the effects of the choice of normalization have not been carefully investigated. Here we investigate the performance of NMF for the task of cancer class discovery, under a wide range of normalization choices. After extensive evaluations, we observe that the maximum norm showed the best performance, although the maximum norm has not previously been used for NMF. Matlab codes are freely available from: http://maths.nuigalway.ie/~haixuanyang/pNMF/pNMF.htm.",2016-10-14 +25481072,Reprint of 'Tracking the blue: a MLST approach to characterise the Pseudomonas fluorescens group'.,"The Pseudomonas fluorescens group comprises several closely related species that are involved in food contamination and spoilage. Specifically, the interest in P. fluorescens as a spoiler of dairy products increased after the cases of ""blue mozzarella"" that occurred in Italy in 2010. A Multilocus Sequence Typing (MLST) scheme was developed and applied to characterise 136 isolates (reference strains and food borne isolates) at strain level, to reveal the genetic relationships among them and to disclose any possible genetic clustering of phenotypic markers involved in food spoilage (protease, lipase, lecithinase activities and pigmented or fluorescent molecule production). The production of dark blue diffusible pigment was evaluated on several bacterial culture media and directly on mozzarella cheese. The MLST scheme provided precise genotyping at the strain level, and the population analyses of the concatenated sequences allowed major taxa to be defined. This approach was revealed to be suitable for tracking the strains according to their origin, such as dairy plants or food matrices. The genetic analysis revealed the presence of a connection between the blue pigment production and a specific phylogenetic cluster. The development of the online database specific to the P. fluorescens group (http://pubmlst.org/pfluorescens) will facilitate the application of the scheme and the sharing of the data.",2015-02-01 +26011889,Discovering Variable-Length Patterns in Protein Sequences for Protein-Protein Interaction Prediction.,"To predict Protein-Protein Interactions (PPIs), there have recently been some attempts to use computational approaches and among them, sequence-based approaches are often preferred over other kinds of approaches as they do not require prior knowledge about proteins to perform their tasks. However, in deciding if two proteins may interact with each other, existing sequence-based approaches consider only fixed-length segments. We believe that if segments of variable-length can also be considered, interactions between proteins can be more accurately predicted. To consider variable-length segments for PPI predictions, we have developed a VLASPD algorithm. Given a database of protein sequences, VLASPD performs its tasks in several steps. The protein database is first searched to identify frequent sequence segments (FSSs) of different length. The different combinations of the presence and absence of these FSSs are then used to form different associative sequential patterns (ASPs). Based on a statistical measure, the ASPs that occur significantly frequently among proteins in the training set are then identified as significant associative sequential patterns (SASPs). If an SASP is found in a protein pair, it can be considered as providing some evidence to support or refute the existence of an interaction relationship between the protein pairs. The amount of evidence provided are then quantified with an information theoretic measure. How likely two proteins may interact with each other are then decided by the total amount of evidence provided by the SASPs found in the protein pairs. To test the effectiveness of VLASPD, we used several sets of real data. The experimental results show that VLASPD can be a promising approach for PPI prediction. The VLASPD is made available for use and testing at http://www.comp.polyu.edu.hk/~cslhu/resources/vlaspd/.",2015-05-21 +26549429,Testing and Validation of Computational Methods for Mass Spectrometry.,"High-throughput methods based on mass spectrometry (proteomics, metabolomics, lipidomics, etc.) produce a wealth of data that cannot be analyzed without computational methods. The impact of the choice of method on the overall result of a biological study is often underappreciated, but different methods can result in very different biological findings. It is thus essential to evaluate and compare the correctness and relative performance of computational methods. The volume of the data as well as the complexity of the algorithms render unbiased comparisons challenging. This paper discusses some problems and challenges in testing and validation of computational methods. We discuss the different types of data (simulated and experimental validation data) as well as different metrics to compare methods. We also introduce a new public repository for mass spectrometric reference data sets ( http://compms.org/RefData ) that contains a collection of publicly available data sets for performance evaluation for a wide range of different methods.",2015-11-17 +27131376,CARGO: effective format-free compressed storage of genomic information.,"The recent super-exponential growth in the amount of sequencing data generated worldwide has put techniques for compressed storage into the focus. Most available solutions, however, are strictly tied to specific bioinformatics formats, sometimes inheriting from them suboptimal design choices; this hinders flexible and effective data sharing. Here, we present CARGO (Compressed ARchiving for GenOmics), a high-level framework to automatically generate software systems optimized for the compressed storage of arbitrary types of large genomic data collections. Straightforward applications of our approach to FASTQ and SAM archives require a few lines of code, produce solutions that match and sometimes outperform specialized format-tailored compressors and scale well to multi-TB datasets. All CARGO software components can be freely downloaded for academic and non-commercial use from http://bio-cargo.sourceforge.net.",2016-04-29 +27131357,"PHYLOViZ Online: web-based tool for visualization, phylogenetic inference, analysis and sharing of minimum spanning trees.","High-throughput sequencing methods generated allele and single nucleotide polymorphism information for thousands of bacterial strains that are publicly available in online repositories and created the possibility of generating similar information for hundreds to thousands of strains more in a single study. Minimum spanning tree analysis of allelic data offers a scalable and reproducible methodological alternative to traditional phylogenetic inference approaches, useful in epidemiological investigations and population studies of bacterial pathogens. PHYLOViZ Online was developed to allow users to do these analyses without software installation and to enable easy accessing and sharing of data and analyses results from any Internet enabled computer. PHYLOViZ Online also offers a RESTful API for programmatic access to data and algorithms, allowing it to be seamlessly integrated into any third party web service or software. PHYLOViZ Online is freely available at https://online.phyloviz.net.",2016-04-29 +28287354,Systematic Reviews Published in the October 2016 Issue of the Cochrane Library.,"The Cochrane Library of Systematic Reviews has been published quarterly as a DVD and monthly online ( http://www.thecochranelibrary.com ). The final October 2016 issue (4th DVD for 2016) contains 7068 complete reviews and 2467 protocols for reviews in production. In addition, there are citations of 973,000 randomized controlled trials, and 15,700 cited papers in the Cochrane Methodology Register. The Health Technology Assessment database contains some 16,000 citations. One hundred and seventeen new reviews have been published in the previous 3 months, of which three have potential relevance for practitioners in pain and palliative medicine. The impact factor of the Cochrane Library stands at 6.1. Readers are encouraged to access the full report for any articles of interest, as only a brief commentary is provided. The CD version of the Cochrane Library will be discontinued and the Library will only available online in the future.",2017-03-01 +27924038,FARNA: knowledgebase of inferred functions of non-coding RNA transcripts.,"Non-coding RNA (ncRNA) genes play a major role in control of heterogeneous cellular behavior. Yet, their functions are largely uncharacterized. Current available databases lack in-depth information of ncRNA functions across spectrum of various cells/tissues. Here, we present FARNA, a knowledgebase of inferred functions of 10,289 human ncRNA transcripts (2,734 microRNA and 7,555 long ncRNA) in 119 tissues and 177 primary cells of human. Since transcription factors (TFs) and TF co-factors (TcoFs) are crucial components of regulatory machinery for activation of gene transcription, cellular processes and diseases in which TFs and TcoFs are involved suggest functions of the transcripts they regulate. In FARNA, functions of a transcript are inferred from TFs and TcoFs whose genes co-express with the transcript controlled by these TFs and TcoFs in a considered cell/tissue. Transcripts were annotated using statistically enriched GO terms, pathways and diseases across cells/tissues based on guilt-by-association principle. Expression profiles across cells/tissues based on Cap Analysis of Gene Expression (CAGE) are provided. FARNA, having the most comprehensive function annotation of considered ncRNAs across widest spectrum of human cells/tissues, has a potential to greatly contribute to our understanding of ncRNA roles and their regulatory mechanisms in human. FARNA can be accessed at: http://cbrc.kaust.edu.sa/farna.",2017-03-01 +28812439,Evaluating disease prediction models using a cohort whose covariate distribution differs from that of the target population.,"Personal predictive models for disease development play important roles in chronic disease prevention. The performance of these models is evaluated by applying them to the baseline covariates of participants in external cohort studies, with model predictions compared to subjects' subsequent disease incidence. However, the covariate distribution among participants in a validation cohort may differ from that of the population for which the model will be used. Since estimates of predictive model performance depend on the distribution of covariates among the subjects to which it is applied, such differences can cause misleading estimates of model performance in the target population. We propose a method for addressing this problem by weighting the cohort subjects to make their covariate distribution better match that of the target population. Simulations show that the method provides accurate estimates of model performance in the target population, while un-weighted estimates may not. We illustrate the method by applying it to evaluate an ovarian cancer prediction model targeted to US women, using cohort data from participants in the California Teachers Study. The methods can be implemented using open-source code for public use as the R-package RMAP (Risk Model Assessment Package) available at http://stanford.edu/~ggong/rmap/ .",2017-08-16 +26944085,A comprehensive overview of lncRNA annotation resources.,"Long noncoding RNAs (lncRNAs) are emerging as a class of important regulators participating in various biological functions and disease processes. With the widespread application of next-generation sequencing technologies, large numbers of lncRNAs have been identified, producing plenty of lncRNA annotation resources in different contexts. However, at present, we lack a comprehensive overview of these lncRNA annotation resources. In this study, we reviewed 24 currently available lncRNA annotation resources referring to > 205 000 lncRNAs in over 50 tissues and cell lines. We characterized these annotation resources from different aspects, including exon structure, expression, histone modification and function. We found many distinct properties among these annotation resources. Especially, these resources showed diverse chromatin signatures, remarkable tissue and cell type dependence and functional specificity. Our results suggested the incompleteness and complementarity of current lncRNA annotations and the necessity of integration of multiple resources to comprehensively characterize lncRNAs. Finally, we developed 'LNCat' (lncRNA atlas, freely available at http://biocc.hrbmu.edu.cn/LNCat/), a user-friendly database that provides a genome browser of lncRNA structures, visualization of different resources from multiple angles and download of different combinations of lncRNA annotations, and supports rapid exploration, comparison and integration of lncRNA annotation resources. Overall, our study provides a comprehensive comparison of numerous lncRNA annotations, and can facilitate understanding of lncRNAs in human disease.",2017-03-01 +28818036,eccCL: parallelized GPU implementation of Ensemble Classifier Chains.,"

Background

Multi-label classification has recently gained great attention in diverse fields of research, e.g., in biomedical application such as protein function prediction or drug resistance testing in HIV. In this context, the concept of Classifier Chains has been shown to improve prediction accuracy, especially when applied as Ensemble Classifier Chains. However, these techniques lack computational efficiency when applied on large amounts of data, e.g., derived from next-generation sequencing experiments. By adapting algorithms for the use of graphics processing units, computational efficiency can be greatly improved due to parallelization of computations.

Results

Here, we provide a parallelized and optimized graphics processing unit implementation (eccCL) of Classifier Chains and Ensemble Classifier Chains. Additionally to the OpenCL implementation, we provide an R-Package with an easy to use R-interface for parallelized graphics processing unit usage.

Conclusion

eccCL is a handy implementation of Classifier Chains on GPUs, which is able to process up to over 25,000 instances per second, and thus can be used efficiently in high-throughput experiments. The software is available at http://www.heiderlab.de .",2017-08-17 +28365761,PhagesDB: the actinobacteriophage database.,"The Actinobacteriophage Database (PhagesDB) is a comprehensive, interactive, database-backed website that collects and shares information related to the discovery, characterization and genomics of viruses that infect Actinobacterial hosts. To date, more than 8000 bacteriophages-including over 1600 with sequenced genomes-have been entered into the database. PhagesDB plays a crucial role in organizing the discoveries of phage biologists around the world-including students in the SEA-PHAGES program-and has been cited in over 50 peer-reviewed articles.

Availability and implementation

http://phagesdb.org/.

Contact

gfh@pitt.edu.",2017-03-01 +25283805,An RNA-Seq based gene expression atlas of the common bean.,"

Background

Common bean (Phaseolus vulgaris) is grown throughout the world and comprises roughly 50% of the grain legumes consumed worldwide. Despite this, genetic resources for common beans have been lacking. Next generation sequencing, has facilitated our investigation of the gene expression profiles associated with biologically important traits in common bean. An increased understanding of gene expression in common bean will improve our understanding of gene expression patterns in other legume species.

Results

Combining recently developed genomic resources for Phaseolus vulgaris, including predicted gene calls, with RNA-Seq technology, we measured the gene expression patterns from 24 samples collected from seven tissues at developmentally important stages and from three nitrogen treatments. Gene expression patterns throughout the plant were analyzed to better understand changes due to nodulation, seed development, and nitrogen utilization. We have identified 11,010 genes differentially expressed with a fold change ≥ 2 and a P-value < 0.05 between different tissues at the same time point, 15,752 genes differentially expressed within a tissue due to changes in development, and 2,315 genes expressed only in a single tissue. These analyses identified 2,970 genes with expression patterns that appear to be directly dependent on the source of available nitrogen. Finally, we have assembled this data in a publicly available database, The Phaseolus vulgaris Gene Expression Atlas (Pv GEA), http://plantgrn.noble.org/PvGEA/ . Using the website, researchers can query gene expression profiles of their gene of interest, search for genes expressed in different tissues, or download the dataset in a tabular form.

Conclusions

These data provide the basis for a gene expression atlas, which will facilitate functional genomic studies in common bean. Analysis of this dataset has identified genes important in regulating seed composition and has increased our understanding of nodulation and impact of the nitrogen source on assimilation and distribution throughout the plant.",2014-10-06 +28245795,In search of druggable targets for GBM amino acid metabolism.,"

Background

Amino acid (AA) pathways may contain druggable targets for glioblastoma (GBM). Literature reviews and GBM database ( http://r2.amc.nl ) analyses were carried out to screen for such targets among 95 AA related enzymes.

Methods

First, we identified the genes that were differentially expressed in GBMs (3 datasets) compared to non-GBM brain tissues (5 datasets), or were associated with survival differences. Further, protein expression for these enzymes was also analyzed in high grade gliomas (HGGs) (proteinatlas.org). Finally, AA enzyme and gene expression were compared among the 4 TCGA (The Cancer Genome Atlas) subtypes of GBMs.

Results

We detected differences in enzymes involved in glutamate and urea cycle metabolism in GBM. For example, expression levels of BCAT1 (branched chain amino acid transferase 1) and ASL (argininosuccinate lyase) were high, but ASS1 (argininosuccinate synthase 1) was low in GBM. Proneural and neural TCGA subtypes had low expression of all three. High expression of all three correlated with worse outcome. ASL and ASS1 protein levels were mostly undetected in high grade gliomas, whereas BCAT1 was high. GSS (glutathione synthetase) was not differentially expressed, but higher levels were linked to poor progression free survival. ASPA (aspartoacylase) and GOT1 (glutamic-oxaloacetic transaminase 1) had lower expression in GBM (associated with poor outcomes). All three GABA related genes -- glutamate decarboxylase 1 (GAD1) and 2 (GAD2) and 4-aminobutyrate aminotransferase (ABAT) -- were lower in mesenchymal tumors, which in contrast showed higher IDO1 (indoleamine 2, 3-dioxygenase 1) and TDO2 (tryptophan 2, 3-diaxygenase). Expression of PRODH (proline dehydrogenase), a putative tumor suppressor, was lower in GBM. Higher levels predicted poor survival.

Conclusions

Several AA-metabolizing enzymes that are higher in GBM, are also linked to poor outcome (such as BCAT1), which makes them potential targets for therapeutic inhibition. Moreover, existing drugs that deplete asparagine and arginine may be effective against brain tumors, and should be studied in conjunction with chemotherapy. Last, AA metabolism is heterogeneous in TCGA subtypes of GBM (as well as medulloblastomas and other pediatric tumors), which may translate to variable responses to AA targeted therapies.",2017-02-28 +30034909,Variable importance-weighted Random Forests.,"

Background

Random Forests is a popular classification and regression method that has proven powerful for various prediction problems in biological studies. However, its performance often deteriorates when the number of features increases. To address this limitation, feature elimination Random Forests was proposed that only uses features with the largest variable importance scores. Yet the performance of this method is not satisfying, possibly due to its rigid feature selection, and increased correlations between trees of forest.

Methods

We propose variable importance-weighted Random Forests, which instead of sampling features with equal probability at each node to build up trees, samples features according to their variable importance scores, and then select the best split from the randomly selected features.

Results

We evaluate the performance of our method through comprehensive simulation and real data analyses, for both regression and classification. Compared to the standard Random Forests and the feature elimination Random Forests methods, our proposed method has improved performance in most cases.

Conclusions

By incorporating the variable importance scores into the random feature selection step, our method can better utilize more informative features without completely ignoring less informative ones, hence has improved prediction accuracy in the presence of weak signals and large noises. We have implemented an R package ""viRandomForests"" based on the original R package ""randomForest"" and it can be freely downloaded from http://zhaocenter.org/software.",2017-11-06 +29104704,Biomedical Big Data Training Collaborative (BBDTC): An effort to bridge the talent gap in biomedical science and research.,"The BBDTC (https://biobigdata.ucsd.edu) is a community-oriented platform to encourage high-quality knowledge dissemination with the aim of growing a well-informed biomedical big data community through collaborative efforts on training and education. The BBDTC is an e-learning platform that empowers the biomedical community to develop, launch and share open training materials. It deploys hands-on software training toolboxes through virtualization technologies such as Amazon EC2 and Virtualbox. The BBDTC facilitates migration of courses across other course management platforms. The framework encourages knowledge sharing and content personalization through the playlist functionality that enables unique learning experiences and accelerates information dissemination to a wider community.",2017-03-15 +28171606,Comparing co-evolution methods and their application to template-free protein structure prediction.,"

Motivation

Co-evolution methods have been used as contact predictors to identify pairs of residues that share spatial proximity. Such contact predictors have been compared in terms of the precision of their predictions, but there is no study that compares their usefulness to model generation.

Results

We compared eight different co-evolution methods for a set of ∼3500 proteins and found that metaPSICOV stage 2 produces, on average, the most precise predictions. Precision of all the methods is dependent on SCOP class, with most methods predicting contacts in all α and membrane proteins poorly. The contact predictions were then used to assist in de novo model generation. We found that it was not the method with the highest average precision, but rather metaPSICOV stage 1 predictions that consistently led to the best models being produced. Our modelling results show a correlation between the proportion of predicted long range contacts that are satisfied on a model and its quality. We used this proportion to effectively classify models as correct/incorrect; discarding decoys classified as incorrect led to an enrichment in the proportion of good decoys in our final ensemble by a factor of seven. For 17 out of the 18 cases where correct answers were generated, the best models were not discarded by this approach. We were also able to identify eight cases where no correct decoy had been generated.

Availability and implementation

Data is available for download from: http://opig.stats.ox.ac.uk/resources.

Contact

saulo.deoliveira@dtc.ox.ac.uk

Supplimentary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +26970777,Prediction and identification of the effectors of heterotrimeric G proteins in rice (Oryza sativa L.).,"Heterotrimeric G protein signaling cascades are one of the primary metazoan sensing mechanisms linking a cell to environment. However, the number of experimentally identified effectors of G protein in plant is limited. We have therefore studied which tools are best suited for predicting G protein effectors in rice. Here, we compared the predicting performance of four classifiers with eight different encoding schemes on the effectors of G proteins by using 10-fold cross-validation. Four methods were evaluated: random forest, naive Bayes, K-nearest neighbors and support vector machine. We applied these methods to experimentally identified effectors of G proteins and randomly selected non-effector proteins, and tested their sensitivity and specificity. The result showed that random forest classifier with composition of K-spaced amino acid pairs and composition of motif or domain (CKSAAP_PROSITE_200) combination method yielded the best performance, with accuracy and the Mathew's correlation coefficient reaching 74.62% and 0.49, respectively. We have developed G-Effector, an online predictor, which outperforms BLAST, PSI-BLAST and HMMER on predicting the effectors of G proteins. This provided valuable guidance for the researchers to select classifiers combined with different feature selection encoding schemes. We used G-Effector to screen the effectors of G protein in rice, and confirmed the candidate effectors by gene co-expression data. Interestingly, one of the top 15 candidates, which did not appear in the training data set, was validated in a previous research work. Therefore, the candidate effectors list in this article provides both a clue for researchers as to their function and a framework of validation for future experimental work. It is accessible at http://bioinformatics.fafu.edu.cn/geffector.",2017-03-01 +29357348,Evolution of mitotic spindle behavior during the first asymmetric embryonic division of nematodes.,"Asymmetric cell division is essential to generate cellular diversity. In many animal cells, the cleavage plane lies perpendicular to the mitotic spindle, and it is the spindle positioning that dictates the size of the daughter cells. Although some properties of spindle positioning are conserved between distantly related model species and different cell types, little is known of the evolutionary robustness of the mechanisms underlying this event. We recorded the first embryonic division of 42 species of nematodes closely related to Caenorhabditis elegans, which is an excellent model system to study the biophysical properties of asymmetric spindle positioning. Our recordings, corresponding to 128 strains from 27 Caenorhabditis and 15 non-Caenorhabditis species (accessible at http://www.ens-lyon.fr/LBMC/NematodeCell/videos/), constitute a powerful collection of subcellular phenotypes to study the evolution of various cellular processes across species. In the present work, we analyzed our collection to the study of asymmetric spindle positioning. Although all the strains underwent an asymmetric first cell division, they exhibited large intra- and inter-species variations in the degree of cell asymmetry and in several parameters controlling spindle movement, including spindle oscillation, elongation, and displacement. Notably, these parameters changed frequently during evolution with no apparent directionality in the species phylogeny, with the exception of spindle transverse oscillations, which were an evolutionary innovation at the base of the Caenorhabditis genus. These changes were also unrelated to evolutionary variations in embryo size. Importantly, spindle elongation, displacement, and oscillation each evolved independently. This finding contrasts starkly with expectations based on C. elegans studies and reveals previously unrecognized evolutionary changes in spindle mechanics. Collectively, these data demonstrate that, while the essential process of asymmetric cell division has been conserved over the course of nematode evolution, the underlying spindle movement parameters can combine in various ways. Like other developmental processes, asymmetric cell division is subject to system drift.",2018-01-22 +26499134,Extracting Accurate Precursor Information for Tandem Mass Spectra by RawConverter.,"Extraction of data from the proprietary RAW files generated by Thermo Fisher mass spectrometers is the primary step for subsequent data analysis. High resolution and high mass accuracy data obtained by state-of-the-art mass spectrometers (e.g., Orbitraps) can significantly improve both peptide/protein identification and quantification. We developed RawConverter, a stand-alone software tool, to improve data extraction on RAW files from high-resolution Thermo Fisher mass spectrometers. RawConverter extracts full scan and MS(n) data from RAW files like its predecessor RawXtract; most importantly, it associates the accurate precursor mass-to-charge (m/z) value with the tandem mass spectrum. RawConverter accepts RAW data generated by either data-dependent acquisition (DDA) or data-independent acquisition (DIA). It generates output into MS1/MS2/MS3, MGF, or mzXML file formats, which fulfills the format requirements for most data identification and quantification tools. Using the tandem mass spectra extracted by RawConverter with corrected m/z values, 32.8%, 27.1%, and 84.1%, peptide spectra matches (PSMs) produce 17.4% (13.0%), 14.4% (11.5%), and 45.7% (36.2%) more peptide (protein) identifications than ProteoWizard, pXtract, and RawXtract, respectively. RawConverter is implemented in C# and is freely accessible at http://fields.scripps.edu/rawconv.",2015-11-04 +25097386,ProADD: A database on Protein Aggregation Diseases.,"

Unlabelled

ProADD, a database for protein aggregation diseases, is developed to organize the data under a single platform to facilitate easy access for researchers. Diseases caused due to protein aggregation and the proteins involved in each of these diseases are integrated. The database helps in classification of proteins involved in the protein aggregation diseases based on sequence and structural analysis. Analysis of proteins can be done to mine patterns prevailing among the aggregating proteins.

Availability

http://bicmku.in/ProADD.",2014-06-30 +24234003,footprintDB: a database of transcription factors with annotated cis elements and binding interfaces.,"

Motivation

Traditional and high-throughput techniques for determining transcription factor (TF) binding specificities are generating large volumes of data of uneven quality, which are scattered across individual databases.

Results

FootprintDB integrates some of the most comprehensive freely available libraries of curated DNA binding sites and systematically annotates the binding interfaces of the corresponding TFs. The first release contains 2422 unique TF sequences, 10 112 DNA binding sites and 3662 DNA motifs. A survey of the included data sources, organisms and TF families was performed together with proprietary database TRANSFAC, finding that footprintDB has a similar coverage of multicellular organisms, while also containing bacterial regulatory data. A search engine has been designed that drives the prediction of DNA motifs for input TFs, or conversely of TF sequences that might recognize input regulatory sequences, by comparison with database entries. Such predictions can also be extended to a single proteome chosen by the user, and results are ranked in terms of interface similarity. Benchmark experiments with bacterial, plant and human data were performed to measure the predictive power of footprintDB searches, which were able to correctly recover 10, 55 and 90% of the tested sequences, respectively. Correctly predicted TFs had a higher interface similarity than the average, confirming its diagnostic value.

Availability and implementation

Web site implemented in PHP,Perl, MySQL and Apache. Freely available from http://floresta.eead.csic.es/footprintdb.",2013-11-14 +,First whole genome based microsatellite DNA marker database of tomato for mapping and variety identification,"

Background

The cultivated tomato is second most consumed vegetable of the world and is an important part of a diverse and balanced diet as a rich source of vitamins, minerals, phenolic antioxidants and antioxidant lycopene having anti-cancer properties. To reap benefit of genomics of the domestic tomato (Solanum lycopersicum L.) unravelled by Tomato Genome Consortium (The Tomato Genome Consortium, 2012), the bulk mining of its markers in totality is imperative and critically required. The solgenomics has limited number of microsatellite DNA markers (2867) pertaining to solanaceae family. As these markers are of linkage map having relative distance, the choice of selected markers based on absolute distance as of physical map is missing. Only limited microsatellite markers with limitations are reported for variety identification thus there is a need for more markers supplementing DUS test and also for traceability of product in global market.

Description

We present here the first whole genome based microsatellite DNA marker database of tomato, TomSatDB (Tomato MicroSatellite Database) with more than 1.4 million markers mined in-silico, using MIcroSAtellite (MISA) tool. To cater the customized needs of wet lab, features with a novelty of an automated primer designing tool is added. TomSatDB (http://cabindb.iasri.res.in/tomsatdb), a user-friendly and freely accessible tool offers chromosome wise as well as location wise search of primers. It is an online relational database based on “three-tier architecture” that catalogues information of microsatellites in MySQL and user-friendly interface developed using PHP (Hypertext Pre Processor).

Conclusion

Besides abiotic stress, tomato is known to have biotic stress due to its susceptibility over 200 diseases caused by pathogenic fungi, bacteria, viruses and nematodes. These markers are expected to pave the way of germplasm management over abiotic and biotic stress as well as improvement through molecular breeding, leading to increased tomato productivity in India as well as other parts of the world. In era of IPR the new variety can be identified based on allelic variation among varieties supplementing DUS test and product traceability.",2013-01-01 +28903538,POSSUM: a bioinformatics toolkit for generating numerical sequence feature descriptors based on PSSM profiles.,"

Summary

Evolutionary information in the form of a Position-Specific Scoring Matrix (PSSM) is a widely used and highly informative representation of protein sequences. Accordingly, PSSM-based feature descriptors have been successfully applied to improve the performance of various predictors of protein attributes. Even though a number of algorithms have been proposed in previous studies, there is currently no universal web server or toolkit available for generating this wide variety of descriptors. Here, we present POSSUM ( Po sition- S pecific S coring matrix-based feat u re generator for m achine learning), a versatile toolkit with an online web server that can generate 21 types of PSSM-based feature descriptors, thereby addressing a crucial need for bioinformaticians and computational biologists. We envisage that this comprehensive toolkit will be widely used as a powerful tool to facilitate feature extraction, selection, and benchmarking of machine learning-based models, thereby contributing to a more effective analysis and modeling pipeline for bioinformatics research.

Availability and implementation

http://possum.erc.monash.edu/ .

Contact

trevor.lithgow@monash.edu or jiangning.song@monash.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +26224250,Cafe Variome: general-purpose software for making genotype-phenotype data discoverable in restricted or open access contexts.,"Biomedical data sharing is desirable, but problematic. Data ""discovery"" approaches-which establish the existence rather than the substance of data-precisely connect data owners with data seekers, and thereby promote data sharing. Cafe Variome (http://www.cafevariome.org) was therefore designed to provide a general-purpose, Web-based, data discovery tool that can be quickly installed by any genotype-phenotype data owner, or network of data owners, to make safe or sensitive content appropriately discoverable. Data fields or content of any type can be accommodated, from simple ID and label fields through to extensive genotype and phenotype details based on ontologies. The system provides a ""shop window"" in front of data, with main interfaces being a simple search box and a powerful ""query-builder"" that enable very elaborate queries to be formulated. After a successful search, counts of records are reported grouped by ""openAccess"" (data may be directly accessed), ""linkedAccess"" (a source link is provided), and ""restrictedAccess"" (facilitated data requests and subsequent provision of approved records). An administrator interface provides a wide range of options for system configuration, enabling highly customized single-site or federated networks to be established. Current uses include rare disease data discovery, patient matchmaking, and a Beacon Web service.",2015-08-25 +22563072,A novel variational Bayes multiple locus Z-statistic for genome-wide association studies with Bayesian model averaging.,"

Motivation

For many complex traits, including height, the majority of variants identified by genome-wide association studies (GWAS) have small effects, leaving a significant proportion of the heritable variation unexplained. Although many penalized multiple regression methodologies have been proposed to increase the power to detect associations for complex genetic architectures, they generally lack mechanisms for false-positive control and diagnostics for model over-fitting. Our methodology is the first penalized multiple regression approach that explicitly controls Type I error rates and provide model over-fitting diagnostics through a novel normally distributed statistic defined for every marker within the GWAS, based on results from a variational Bayes spike regression algorithm.

Results

We compare the performance of our method to the lasso and single marker analysis on simulated data and demonstrate that our approach has superior performance in terms of power and Type I error control. In addition, using the Women's Health Initiative (WHI) SNP Health Association Resource (SHARe) GWAS of African-Americans, we show that our method has power to detect additional novel associations with body height. These findings replicate by reaching a stringent cutoff of marginal association in a larger cohort.

Availability

An R-package, including an implementation of our variational Bayes spike regression (vBsr) algorithm, is available at http://kooperberg.fhcrc.org/soft.html.",2012-05-04 +27596593,Order restricted inference for oscillatory systems for detecting rhythmic signals.,"

Motivation

Many biological processes, such as cell cycle, circadian clock, menstrual cycles, are governed by oscillatory systems consisting of numerous components that exhibit rhythmic patterns over time. It is not always easy to identify such rhythmic components. For example, it is a challenging problem to identify circadian genes in a given tissue using time-course gene expression data. There is a great potential for misclassifying non-rhythmic as rhythmic genes and vice versa. This has been a problem of considerable interest in recent years. In this article we develop a constrained inference based methodology called Order Restricted Inference for Oscillatory Systems (ORIOS) to detect rhythmic signals. Instead of using mathematical functions (e.g. sinusoidal) to describe shape of rhythmic signals, ORIOS uses mathematical inequalities. Consequently, it is robust and not limited by the biologist's choice of the mathematical model. We studied the performance of ORIOS using simulated as well as real data obtained from mouse liver, pituitary gland and data from NIH3T3, U2OS cell lines. Our results suggest that, for a broad collection of patterns of gene expression, ORIOS has substantially higher power to detect true rhythmic genes in comparison to some popular methods, while also declaring substantially fewer non-rhythmic genes as rhythmic.

Availability and implementation

A user friendly code implemented in R language can be downloaded from http://www.niehs.nih.gov/research/atniehs/labs/bb/staff/peddada/index.cfm CONTACT: peddada@niehs.nih.gov.",2016-09-04 +,13B. Experience With the “Banerji Protocols“ in Treatment of Chronic Disease,"Focus Areas: Integrative Approaches to Care Specific effects of homeopathic treatment are often dismissed as unbelievable due to use of remedies diluted beyond Avogadro's number. Recent basic science research suggests that this argument will no longer hold water with the demonstration that classically prepared ultradilute homeopathic medicines (HM) contain measurable source nanoparticles (NP) and/or silica NP with adsorbed source materials that are heterogeneously dispersed in colloidal solution and have biological properties that differ substantially from bulk forms of the same substance. These findings will soon address the plausibility question; however, individualized homeopathy may remain obscure due to the need for hundreds of hours of training required for effective practice where a different remedy may be needed for every patient regardless of condition. The Banerji Protocols help to alleviate this obstacle. The Prasanta Banerji Homeopathic Research Foundation (http://pbhrfindia.org) is well known for its cancer treatment protocols, having been identified by NCI in 1999 for its Best Case Series with subsequent translational research on specific homeopathic medicines (HM) being done at MD Anderson. Less known are the Protocols for numerous serious conditions that have been developed over four generations of homeopathic practice with thousands of patients in Kolkata, India. The new book from Drs Prasanta and Pratip Banerji, published in June 2013, contains dozens of protocols for the treatment of conditions such as chronic renal failure, dysfunctional uterine bleeding, hepatitis, inflammatory bowel disease, and traumatic brain injury. Dr Barbara Sarter has spent many months collecting data in the Kolkata clinic. She and Dr Joyce Frye have already used many of these protocols in their respective US homeopathic practices with excellent results. The Protocols suggest highly efficacious and cost-effective treatments of numerous conditions for which conventional medicine has limited options.",2013-11-01 +23633602,Integrated database of information from structural genomics experiments.,"Information from structural genomics experiments at the RIKEN SPring-8 Center, Japan has been compiled and published as an integrated database. The contents of the database are (i) experimental data from nine species of bacteria that cover a large variety of protein molecules in terms of both evolution and properties (http://database.riken.jp/db/bacpedia), (ii) experimental data from mutant proteins that were designed systematically to study the influence of mutations on the diffraction quality of protein crystals (http://database.riken.jp/db/bacpedia) and (iii) experimental data from heavy-atom-labelled proteins from the heavy-atom database HATODAS (http://database.riken.jp/db/hatodas). The database integration adopts the semantic web, which is suitable for data reuse and automatic processing, thereby allowing batch downloads of full data and data reconstruction to produce new databases. In addition, to enhance the use of data (i) and (ii) by general researchers in biosciences, a comprehensible user interface, Bacpedia (http://bacpedia.harima.riken.jp), has been developed.",2013-04-19 +25378341,diArk--the database for eukaryotic genome and transcriptome assemblies in 2014.,"Eukaryotic genomes are the basis for understanding the complexity of life from populations to the molecular level. Recent technological innovations have revolutionized the speed of data generation enabling the sequencing of eukaryotic genomes and transcriptomes within days. The database diArk (http://www.diark.org) has been developed with the aim to provide access to all available assembled genomes and transcriptomes. In September 2014, diArk contains about 2600 eukaryotes with 6000 genome and transcriptome assemblies, of which 22% are not available via NCBI/ENA/DDBJ. Several indicators for the quality of the assemblies are provided to facilitate their comparison for selecting the most appropriate dataset for further studies. diArk has a user-friendly web interface with extensive options for filtering and browsing the sequenced eukaryotes. In this new version of the database we have also integrated species, for which transcriptome assemblies are available, and we provide more analyses of assemblies.",2014-11-06 +25779921,MetaNET--a web-accessible interactive platform for biological metabolic network analysis.,"

Background

Metabolic reactions have been extensively studied and compiled over the last century. These have provided a theoretical base to implement models, simulations of which are used to identify drug targets and optimize metabolic throughput at a systemic level. While tools for the perturbation of metabolic networks are available, their applications are limited and restricted as they require varied dependencies and often a commercial platform for full functionality. We have developed MetaNET, an open source user-friendly platform-independent and web-accessible resource consisting of several pre-defined workflows for metabolic network analysis.

Result

MetaNET is a web-accessible platform that incorporates a range of functions which can be combined to produce different simulations related to metabolic networks. These include (i) optimization of an objective function for wild type strain, gene/catalyst/reaction knock-out/knock-down analysis using flux balance analysis. (ii) flux variability analysis (iii) chemical species participation (iv) cycles and extreme paths identification and (v) choke point reaction analysis to facilitate identification of potential drug targets. The platform is built using custom scripts along with the open-source Galaxy workflow and Systems Biology Research Tool as components. Pre-defined workflows are available for common processes, and an exhaustive list of over 50 functions are provided for user defined workflows.

Conclusion

MetaNET, available at http://metanet.osdd.net , provides a user-friendly rich interface allowing the analysis of genome-scale metabolic networks under various genetic and environmental conditions. The framework permits the storage of previous results, the ability to repeat analysis and share results with other users over the internet as well as run different tools simultaneously using pre-defined workflows, and user-created custom workflows.",2014-12-05 +25273111,PFP/ESG: automated protein function prediction servers enhanced with Gene Ontology visualization tool.,"

Unlabelled

Protein function prediction (PFP) is an automated function prediction method that predicts Gene Ontology (GO) annotations for a protein sequence using distantly related sequences and contextual associations of GO terms. Extended similarity group (ESG) is another GO prediction algorithm that makes predictions based on iterative sequence database searches. Here, we provide interactive web servers for the PFP and ESG algorithms that are equipped with an effective visualization of the GO predictions in a hierarchical topology.

Availability

PFP/ESG servers are freely available at http://kiharalab.org/web/pfp.php and http://kiharalab.org/web/esg.php, or access both at http://kiharalab.org/pfp_esg.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-01 +28838990,Marijuana Use and Estimated Glomerular Filtration Rate in Young Adults.,"

Background and objectives

Marijuana use has become more widely accepted in the United States and has been legalized in many areas. Although it is biologically plausible that marijuana could affect kidney function, epidemiologic data are lacking.

Design, setting, participants, & measurements

We conducted a cohort study among young adults with preserved eGFR (i.e., eGFR≥60 ml/min per 1.73 m2) using data from the Coronary Artery Risk Development in Young Adults (CARDIA) study. At scheduled examinations occurring every 5 years and starting at study year 10 (calendar years, 1995-1996), cystatin C was collected over a 10-year period, and urine albumin-to-creatinine ratio was collected over a 15-year period. We investigated the cross-sectional association between current and cumulative marijuana use (in marijuana-years; one marijuana-year equals 365 days of marijuana use) and eGFR by cystatin C (eGFRcys) at year 10. In longitudinal analyses, we investigated the association between cumulative marijuana use and eGFRcys change and rapid (≥3%/year) eGFRcys decline over two 5-year intervals and prevalent albuminuria (urine albumin-to-creatinine ratio ≥30 mg/g) over a 15-year period.

Results

Past or current marijuana use was reported by 83% (3131 of 3765) of the cohort, and the mean eGFRcys was 111 ml/min per 1.73 m2 at year 10. Over the following 10 years, 504 had rapid eGFRcys decline, and over the following 15 years, 426 had prevalent albuminuria. Compared with no use, daily current use and ≥5 marijuana-years of cumulative use were associated with lower eGFRcys at year 10: -4.5% (95% confidence interval, -8.1 to -0.7%; P=0.02) and -3.0% (95% confidence interval, -5.6 to -0.4%; P=0.03), respectively. Marijuana use was not significantly associated with eGFRcys change, rapid eGFRcys decline, or prevalent albuminuria.

Conclusions

Although we identified a modest cross-sectional association between higher marijuana exposure and lower eGFRcys among young adults with preserved eGFR, our findings were largely null and did not demonstrate a longitudinal association between marijuana use and eGFRcys change, rapid eGFRcys decline, or prevalent albuminuria.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2017_08_24_CJASNPodcast_17_10.mp3.",2017-08-24 +25378343,The Eukaryotic Promoter Database: expansion of EPDnew and new promoter analysis tools.,"We present an update of EPDNew (http://epd.vital-it.ch), a recently introduced new part of the Eukaryotic Promoter Database (EPD) which has been described in more detail in a previous NAR Database Issue. EPD is an old database of experimentally characterized eukaryotic POL II promoters, which are conceptually defined as transcription initiation sites or regions. EPDnew is a collection of automatically compiled, organism-specific promoter lists complementing the old corpus of manually compiled promoter entries of EPD. This new part is exclusively derived from next generation sequencing data from high-throughput promoter mapping experiments. We report on the recent growth of EPDnew, its extension to additional model organisms and its improved integration with other bioinformatics resources developed by our group, in particular the Signal Search Analysis and ChIP-Seq web servers.",2014-11-06 +24368831,RefPrimeCouch--a reference gene primer CouchApp.,"To support a quantitative real-time polymerase chain reaction standardization project, a new reference gene database application was required. The new database application was built with the explicit goal of simplifying not only the development process but also making the user interface more responsive and intuitive. To this end, CouchDB was used as the backend with a lightweight dynamic user interface implemented client-side as a one-page web application. Data entry and curation processes were streamlined using an OpenRefine-based workflow. The new RefPrimeCouch database application provides its data online under an Open Database License. Database URL: http://hpclife.th-wildau.de:5984/rpc/_design/rpc/view.html.",2013-12-24 +27467249,Juicer Provides a One-Click System for Analyzing Loop-Resolution Hi-C Experiments.,"Hi-C experiments explore the 3D structure of the genome, generating terabases of data to create high-resolution contact maps. Here, we introduce Juicer, an open-source tool for analyzing terabase-scale Hi-C datasets. Juicer allows users without a computational background to transform raw sequence data into normalized contact maps with one click. Juicer produces a hic file containing compressed contact matrices at many resolutions, facilitating visualization and analysis at multiple scales. Structural features, such as loops and domains, are automatically annotated. Juicer is available as open source software at http://aidenlab.org/juicer/.",2016-07-01 +24440488,Sequencing and characterization of the anadromous steelhead (Oncorhynchus mykiss) transcriptome.,"Identifying the traits that differ between hatchery and wild fish may allow for pragmatic changes to hatchery practice. To meet those ends, we sequenced, assembled, and characterized the anadromous steelhead (Oncorhynchus mykiss) transcriptome. Using the Illumina sequencing platform, we sequenced nearly 41million 76-mer reads representing 3.1 Gbp of steelhead transcriptome. Upon final assembly, this sequence data yielded 86,402 transcript scaffolds, of which, 66,530 (77%) displayed homology to proteins of the non-redundant NCBI database. Gene descriptions and gene ontology terms were used to annotate the transcriptome resulting in 4030 unique gene ontology (GO) annotations attributed to the assembled sequences. We also conducted a comparative analysis that identified homologous genes within four other fish species including zebrafish (Danio rerio), stickleback (Gasterosteus aculeatus), and two pufferfish species (Tetraodon nigroviridis and Takifugu rubripes). Comparing our steelhead reference assembly directly to the transcriptome for rainbow trout (the fresh water life-history variant of the same species) revealed that while the steelhead and rainbow trout transcriptomes are complementary, the steelhead data will be useful for investigating questions related to anadromous (ocean-going) fishes. These sequence data and web tools provide a useful set of resources for salmonid researchers and the broader genomics community (available at http://salmon.cgrb.oregonstate.edu).",2014-01-17 +25604253,"The TREAT-NMD DMD Global Database: analysis of more than 7,000 Duchenne muscular dystrophy mutations.","Analyzing the type and frequency of patient-specific mutations that give rise to Duchenne muscular dystrophy (DMD) is an invaluable tool for diagnostics, basic scientific research, trial planning, and improved clinical care. Locus-specific databases allow for the collection, organization, storage, and analysis of genetic variants of disease. Here, we describe the development and analysis of the TREAT-NMD DMD Global database (http://umd.be/TREAT_DMD/). We analyzed genetic data for 7,149 DMD mutations held within the database. A total of 5,682 large mutations were observed (80% of total mutations), of which 4,894 (86%) were deletions (1 exon or larger) and 784 (14%) were duplications (1 exon or larger). There were 1,445 small mutations (smaller than 1 exon, 20% of all mutations), of which 358 (25%) were small deletions and 132 (9%) small insertions and 199 (14%) affected the splice sites. Point mutations totalled 756 (52% of small mutations) with 726 (50%) nonsense mutations and 30 (2%) missense mutations. Finally, 22 (0.3%) mid-intronic mutations were observed. In addition, mutations were identified within the database that would potentially benefit from novel genetic therapies for DMD including stop codon read-through therapies (10% of total mutations) and exon skipping therapy (80% of deletions and 55% of total mutations).",2015-03-17 +23993102,An interactive resource to identify cancer genetic and lineage dependencies targeted by small molecules.,"The high rate of clinical response to protein-kinase-targeting drugs matched to cancer patients with specific genomic alterations has prompted efforts to use cancer cell line (CCL) profiling to identify additional biomarkers of small-molecule sensitivities. We have quantitatively measured the sensitivity of 242 genomically characterized CCLs to an Informer Set of 354 small molecules that target many nodes in cell circuitry, uncovering protein dependencies that: (1) associate with specific cancer-genomic alterations and (2) can be targeted by small molecules. We have created the Cancer Therapeutics Response Portal (http://www.broadinstitute.org/ctrp) to enable users to correlate genetic features to sensitivity in individual lineages and control for confounding factors of CCL profiling. We report a candidate dependency, associating activating mutations in the oncogene β-catenin with sensitivity to the Bcl-2 family antagonist, navitoclax. The resource can be used to develop novel therapeutic hypotheses and to accelerate discovery of drugs matched to patients by their cancer genotype and lineage.",2013-08-01 +24928188,"FreeSolv: a database of experimental and calculated hydration free energies, with input files.","This work provides a curated database of experimental and calculated hydration free energies for small neutral molecules in water, along with molecular structures, input files, references, and annotations. We call this the Free Solvation Database, or FreeSolv. Experimental values were taken from prior literature and will continue to be curated, with updated experimental references and data added as they become available. Calculated values are based on alchemical free energy calculations using molecular dynamics simulations. These used the GAFF small molecule force field in TIP3P water with AM1-BCC charges. Values were calculated with the GROMACS simulation package, with full details given in references cited within the database itself. This database builds in part on a previous, 504-molecule database containing similar information. However, additional curation of both experimental data and calculated values has been done here, and the total number of molecules is now up to 643. Additional information is now included in the database, such as SMILES strings, PubChem compound IDs, accurate reference DOIs, and others. One version of the database is provided in the Supporting Information of this article, but as ongoing updates are envisioned, the database is now versioned and hosted online. In addition to providing the database, this work describes its construction process. The database is available free-of-charge via http://www.escholarship.org/uc/item/6sd403pz .",2014-06-14 +23468041,Management and dissemination of MS proteomic data with PROTICdb: example of a quantitative comparison between methods of protein extraction.,"High throughput MS-based proteomic experiments generate large volumes of complex data and necessitate bioinformatics tools to facilitate their handling. Needs include means to archive data, to disseminate them to the scientific communities, and to organize and annotate them to facilitate their interpretation. We present here an evolution of PROTICdb, a database software that now handles MS data, including quantification. PROTICdb has been developed to be as independent as possible from tools used to produce the data. Biological samples and proteomics data are described using ontology terms. A Taverna workflow is embedded, thus permitting to automatically retrieve information related to identified proteins by querying external databases. Stored data can be displayed graphically and a ""Query Builder"" allows users to make sophisticated queries without knowledge on the underlying database structure. All resources can be accessed programmatically using a Java client API or RESTful web services, allowing the integration of PROTICdb in any portal. An example of application is presented, where proteins extracted from a maize leaf sample by four different methods were compared using a label-free shotgun method. Data are available at http://moulon.inra.fr/protic/public. PROTICdb thus provides means for data storage, enrichment, and dissemination of proteomics data.",2013-04-05 +27516611,Navigating the Phenotype Frontier: The Monarch Initiative.,"The principles of genetics apply across the entire tree of life. At the cellular level we share biological mechanisms with species from which we diverged millions, even billions of years ago. We can exploit this common ancestry to learn about health and disease, by analyzing DNA and protein sequences, but also through the observable outcomes of genetic differences, i.e. phenotypes. To solve challenging disease problems we need to unify the heterogeneous data that relates genomics to disease traits. Without a big-picture view of phenotypic data, many questions in genetics are difficult or impossible to answer. The Monarch Initiative (https://monarchinitiative.org) provides tools for genotype-phenotype analysis, genomic diagnostics, and precision medicine across broad areas of disease.",2016-08-01 +25776022,RNABP COGEST: a resource for investigating functional RNAs. ,"Structural bioinformatics of RNA has evolved mainly in response to the rapidly accumulating evidence that non-(protein)-coding RNAs (ncRNAs) play critical roles in gene regulation and development. The structures and functions of most ncRNAs are however still unknown. Most of the available RNA structural databases rely heavily on known 3D structures, and contextually correlate base pairing geometry with actual 3D RNA structures. None of the databases provide any direct information about stabilization energies. However, the intrinsic interaction energies of constituent base pairs can provide significant insights into their roles in the overall dynamics of RNA motifs and structures. Quantum mechanical (QM) computations provide the only approach toward their accurate quantification and characterization. 'RNA Base Pair Count, Geometry and Stability' (http://bioinf.iiit.ac.in/RNABPCOGEST) brings together information, extracted from literature data, regarding occurrence frequency, experimental and quantum chemically optimized geometries, and computed interaction energies, for non-canonical base pairs observed in a non-redundant dataset of functional RNA structures. The database is designed to enable the QM community, on the one hand, to identify appropriate biologically relevant model systems and also enable the biology community to easily sift through diverse computational results to gain theoretical insights which could promote hypothesis driven biological research.",2015-03-16 +28749571,L-ornithine L-aspartate in bouts of overt hepatic encephalopathy.,"High-quality data on the efficacy of L-ornithine L-aspartate (LOLA) in patients with cirrhosis and bouts of overt hepatic encephalopathy (OHE) are missing. We evaluated the efficacy of intravenous LOLA in the reversal of bouts of OHE in patients with cirrhosis. In this prospective, double-blind, randomized, placebo-controlled trial conducted at two tertiary care institutes in India, 370 patients with cirrhosis and bouts of OHE were screened. After exclusion, 193 (52.16%) patients were randomized to receive either intravenous infusions of LOLA (n = 98), 30 g daily, or placebo (n = 95) for 5 days. Standard of care treatment (including lactulose and ceftriaxone) was given in both groups. Randomization was done centrally (http://www.sealedenvelope.com/). All study personnel were blinded to the treatment assignment. Fasting venous ammonia levels were estimated daily from 0 to 5 days. Serum tumor necrosis factor-alpha, interleukins, hemogram, and liver and renal function tests were performed at days 0 and 5. Primary outcome was mental state grade at day 5 of treatment. The grade of OHE was significantly lower in the LOLA group (compared to placebo) on days 1-4 but not on day 5. The mean time taken for recovery was lower in the LOLA group compared to the placebo group (1.92 ± 0.93 versus 2.50 ± 1.03 days, P = 0.002; 95% confidence interval -0.852 to -0.202). Venous ammonia at day 5 and length of hospital stay were significantly lower in the LOLA group. No significant difference in interleukins was seen between the groups. Conclusion: In patients with bouts of OHE, intravenous LOLA (as an add-on therapy to lactulose and ceftriaxone) significantly improves the grade of OHE over days 1-4, but not on day 5, and decreases venous ammonia, time of recovery, and length of hospital stay. (Hepatology 2018;67:700-710).",2017-12-27 +24466021,CoryneBase: Corynebacterium genomic resources and analysis tools at your fingertips.,"Corynebacteria are used for a wide variety of industrial purposes but some species are associated with human diseases. With increasing number of corynebacterial genomes having been sequenced, comparative analysis of these strains may provide better understanding of their biology, phylogeny, virulence and taxonomy that may lead to the discoveries of beneficial industrial strains or contribute to better management of diseases. To facilitate the ongoing research of corynebacteria, a specialized central repository and analysis platform for the corynebacterial research community is needed to host the fast-growing amount of genomic data and facilitate the analysis of these data. Here we present CoryneBase, a genomic database for Corynebacterium with diverse functionality for the analysis of genomes aimed to provide: (1) annotated genome sequences of Corynebacterium where 165,918 coding sequences and 4,180 RNAs can be found in 27 species; (2) access to comprehensive Corynebacterium data through the use of advanced web technologies for interactive web interfaces; and (3) advanced bioinformatic analysis tools consisting of standard BLAST for homology search, VFDB BLAST for sequence homology search against the Virulence Factor Database (VFDB), Pairwise Genome Comparison (PGC) tool for comparative genomic analysis, and a newly designed Pathogenomics Profiling Tool (PathoProT) for comparative pathogenomic analysis. CoryneBase offers the access of a range of Corynebacterium genomic resources as well as analysis tools for comparative genomics and pathogenomics. It is publicly available at http://corynebacterium.um.edu.my/.",2014-01-17 +29381462,Next-generation prostate cancer risk calculator for primary care physicians.,"

Introduction

Current prostate cancer risk calculators are limited in impact because only a probability of having prostate cancer is provided. We developed the next generation of prostate cancer risk calculator that incorporates life expectancy in order to better evaluate prostate cancer risk in context to a patient's age and comorbidity.

Methods

We combined two cohorts to develop the new risk calculator. The first was 5638 subjects who all underwent a prostate biopsy for prostate cancer detection. The second was 979 men diagnosed with prostate cancer with long-term survival data. Two regression models were used to create multivariable nomograms and an online prostate cancer risk calculator was developed.

Results

Of the 5638 patients who underwent a prostate biopsy, 629 (11%) were diagnosed with aggressive prostate cancer (Gleason Score 7[4+3] or more). Of the 979 patients who underwent treatment for prostate cancer, the 10-year overall survival (OS) was 49.6% (95% confidence interval [CI] 46.6-52.9). The first multivariable nomogram for cancer risk had a concordance index of 0.74 (95% CI 0.72, 0.76), and the second nomogram to predict survival had a concordance index of 0.71 (95% CI 0.69-0.72). The next-generation prostate cancer risk calculator was developed online and is available at: http://riskcalc.org/ProstateCA_Screen_Tool.

Conclusions

We have developed the next-generation prostate cancer risk calculator that incorporates a patient's life expectancy based on age and comorbidity. This approach will better evaluate prostate cancer risk. Future studies examining other populations will be needed for validation.",2017-12-01 +27143038,QUADrATiC: scalable gene expression connectivity mapping for repurposing FDA-approved therapeutics.,"

Background

Gene expression connectivity mapping has proven to be a powerful and flexible tool for research. Its application has been shown in a broad range of research topics, most commonly as a means of identifying potential small molecule compounds, which may be further investigated as candidates for repurposing to treat diseases. The public release of voluminous data from the Library of Integrated Cellular Signatures (LINCS) programme further enhanced the utilities and potentials of gene expression connectivity mapping in biomedicine.

Results

We describe QUADrATiC ( http://go.qub.ac.uk/QUADrATiC ), a user-friendly tool for the exploration of gene expression connectivity on the subset of the LINCS data set corresponding to FDA-approved small molecule compounds. It enables the identification of compounds for repurposing therapeutic potentials. The software is designed to cope with the increased volume of data over existing tools, by taking advantage of multicore computing architectures to provide a scalable solution, which may be installed and operated on a range of computers, from laptops to servers. This scalability is provided by the use of the modern concurrent programming paradigm provided by the Akka framework. The QUADrATiC Graphical User Interface (GUI) has been developed using advanced Javascript frameworks, providing novel visualization capabilities for further analysis of connections. There is also a web services interface, allowing integration with other programs or scripts.

Conclusions

QUADrATiC has been shown to provide an improvement over existing connectivity map software, in terms of scope (based on the LINCS data set), applicability (using FDA-approved compounds), usability and speed. It offers potential to biological researchers to analyze transcriptional data and generate potential therapeutics for focussed study in the lab. QUADrATiC represents a step change in the process of investigating gene expression connectivity and provides more biologically-relevant results than previous alternative solutions.",2016-05-04 +24304889,Expression Atlas update--a database of gene and transcript expression from microarray- and sequencing-based functional genomics experiments.,"Expression Atlas (http://www.ebi.ac.uk/gxa) is a value-added database providing information about gene, protein and splice variant expression in different cell types, organism parts, developmental stages, diseases and other biological and experimental conditions. The database consists of selected high-quality microarray and RNA-sequencing experiments from ArrayExpress that have been manually curated, annotated with Experimental Factor Ontology terms and processed using standardized microarray and RNA-sequencing analysis methods. The new version of Expression Atlas introduces the concept of 'baseline' expression, i.e. gene and splice variant abundance levels in healthy or untreated conditions, such as tissues or cell types. Differential gene expression data benefit from an in-depth curation of experimental intent, resulting in biologically meaningful 'contrasts', i.e. instances of differential pairwise comparisons between two sets of biological replicates. Other novel aspects of Expression Atlas are its strict quality control of raw experimental data, up-to-date RNA-sequencing analysis methods, expression data at the level of gene sets, as well as genes and a more powerful search interface designed to maximize the biological value provided to the user.",2013-12-04 +24215026,PTMTreeSearch: a novel two-stage tree-search algorithm with pruning rules for the identification of post-translational modification of proteins in MS/MS spectra.,"

Motivation

Tandem mass spectrometry has become a standard tool for identifying post-translational modifications (PTMs) of proteins. Algorithmic searches for PTMs from tandem mass spectrum data (MS/MS) tend to be hampered by noisy data as well as by a combinatorial explosion of search space. This leads to high uncertainty and long search-execution times.

Results

To address this issue, we present PTMTreeSearch, a new algorithm that uses a large database of known PTMs to identify PTMs from MS/MS data. For a given peptide sequence, PTMTreeSearch builds a computational tree wherein each path from the root to the leaves is labeled with the amino acids of a peptide sequence. Branches then represent PTMs. Various empirical tree pruning rules have been designed to decrease the search-execution time by eliminating biologically unlikely solutions. PTMTreeSearch first identifies a relatively small set of high confidence PTM types, and in a second stage, performs a more exhaustive search on this restricted set using relaxed search parameter settings. An analysis of experimental data shows that using the same criteria for false discovery, PTMTreeSearch annotates more peptides than the current state-of-the-art methods and PTM identification algorithms, and achieves this at roughly the same execution time. PTMTreeSearch is implemented as a plugable scoring function in the X!Tandem search engine.

Availability

The source code of PTMTreeSearch and a demo server application can be found at http://net.icgeb.org/ptmtreesearch",2013-11-08 +29036579,HybridSim-VS: a web server for large-scale ligand-based virtual screening using hybrid similarity recognition techniques.,"

Summary

Molecular-similarity searches based on two-dimensional (2D) fingerprint and three-dimensional (3D) shape represent two widely used ligand-based virtual screening (VS) methods in computer-aided drug design. 2D fingerprint-based VS utilizes the binary fragment information on a known ligand, whereas 3D shape-based VS takes advantage of geometric information for predefined features from a 3D conformation. Given their different advantages, it would be desirable to hybridize 2D fingerprint and 3D shape molecular-similarity approaches in drug discovery. Here, we presented a general hybrid molecular-similarity protocol, referred to as HybridSim, obtained by combining the 2D fingerprint- and 3D shape-based similarity search methods and evaluated its performance on 595,036 actives and decoys for 40 pharmaceutically relevant targets available in the Directory of Useful Decoys Enhanced (DUD-E). Our results showed that HybridSim significantly improved the overall performance in 40 VS projects as compared with using only 2D fingerprint and 3D shape methods. Furthermore, HybridSim-VS, the first online platform using the proposed HybridSim method coupled with 17,839,945 screenable and purchasable compounds, was developed to provide large-scale and proficient VS capabilities to experts and nonexperts in the field.

Availability and implementation

HybridSim-VS web server is freely available at http://www.rcidm.org/HybridSim-VS/.

Contact

lingwang@scut.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +28231303,"GreekLex 2: A comprehensive lexical database with part-of-speech, syllabic, phonological, and stress information.","Databases containing lexical properties on any given orthography are crucial for psycholinguistic research. In the last ten years, a number of lexical databases have been developed for Greek. However, these lack important part-of-speech information. Furthermore, the need for alternative procedures for calculating syllabic measurements and stress information, as well as combination of several metrics to investigate linguistic properties of the Greek language are highlighted. To address these issues, we present a new extensive lexical database of Modern Greek (GreekLex 2) with part-of-speech information for each word and accurate syllabification and orthographic information predictive of stress, as well as several measurements of word similarity and phonetic information. The addition of detailed statistical information about Greek part-of-speech, syllabification, and stress neighbourhood allowed novel analyses of stress distribution within different grammatical categories and syllabic lengths to be carried out. Results showed that the statistical preponderance of stress position on the pre-final syllable that is reported for Greek language is dependent upon grammatical category. Additionally, analyses showed that a proportion higher than 90% of the tokens in the database would be stressed correctly solely by relying on stress neighbourhood information. The database and the scripts for orthographic and phonological syllabification as well as phonetic transcription are available at http://www.psychology.nottingham.ac.uk/greeklex/.",2017-02-23 +29020453,"Small Angle Neutron Scattering Studies of R67 Dihydrofolate Reductase, a Tetrameric Protein with Intrinsically Disordered N-Termini.","R67 dihydrofolate reductase (DHFR) is a homotetramer with a single active site pore and no sequence or structural homology with chromosomal DHFRs. The R67 enzyme provides resistance to trimethoprim, an active site-directed inhibitor of Escherichia coli DHFR. Sixteen to twenty N-terminal amino acids are intrinsically disordered in the R67 dimer crystal structure. Chymotrypsin cleavage of 16 N-terminal residues results in an active enzyme with a decreased stability. The space sampled by the disordered N-termini of R67 DHFR was investigated using small angle neutron scattering. From a combined analysis using molecular dynamics and the program SASSIE ( http://www.smallangles.net/sassie/SASSIE_HOME.html ), the apoenzyme displays a radius of gyration (Rg) of 21.46 ± 0.50 Å. Addition of glycine betaine, an osmolyte, does not result in folding of the termini as the Rg increases slightly to 22.78 ± 0.87 Å. SASSIE fits of the latter SANS data indicate that the disordered N-termini sample larger regions of space and remain disordered, suggesting they might function as entropic bristles. Pressure perturbation calorimetry also indicated that the volume of R67 DHFR increases upon addition of 10% betaine and decreased at 20% betaine because of the dehydration of the protein. Studies of the hydration of full-length R67 DHFR in the presence of the osmolytes betaine and dimethyl sulfoxide find around 1250 water molecules hydrating the protein. Similar studies with truncated R67 DHFR yield around 400 water molecules hydrating the protein in the presence of betaine. The difference of ∼900 waters indicates the N-termini are well-hydrated.",2017-11-01 +24608764,DMAP: differential methylation analysis package for RRBS and WGBS data.,"

Motivation

The rapid development of high-throughput sequencing technologies has enabled epigeneticists to quantify DNA methylation on a massive scale. Progressive increase in sequencing capacity present challenges in terms of processing analysis and the interpretation of the large amount of data; investigating differential methylation between genome-scale data from multiple samples highlights this challenge.

Results

We have developed a differential methylation analysis package (DMAP) to generate coverage-filtered reference methylomes and to identify differentially methylated regions across multiple samples from reduced representation bisulphite sequencing and whole genome bisulphite sequencing experiments. We introduce a novel fragment-based approach for investigating DNA methylation patterns for reduced representation bisulphite sequencing data. Further, DMAP provides the identity of gene and CpG features and distances to the differentially methylated regions in a format that is easily analyzed with limited bioinformatics knowledge.

Availability and implementation

The software has been implemented in C and has been written to ensure portability between different platforms. The source code and documentation is freely available (DMAP: as compressed TAR archive folder) from http://biochem.otago.ac.nz/research/databases-software/. Two test datasets are also available for download from the Web site. Test dataset 1 contains reads from chromosome 1 of a patient and a control, which is used for comparative analysis in the current article. Test dataset 2 contains reads from a part of chromosome 21 of three disease and three control samples for testing the operation of DMAP, especially for the analysis of variance. Example commands for the analyses are included.",2014-03-07 +27153711,Simulated single molecule microscopy with SMeagol.,"

Unlabelled

SMeagol is a software tool to simulate highly realistic microscopy data based on spatial systems biology models, in order to facilitate development, validation and optimization of advanced analysis methods for live cell single molecule microscopy data.

Availability and implementation

SMeagol runs on Matlab R2014 and later, and uses compiled binaries in C for reaction-diffusion simulations. Documentation, source code and binaries for Mac OS, Windows and Ubuntu Linux can be downloaded from http://smeagol.sourceforge.net

Contact

johan.elf@icm.uu.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-04 +29101822,Internal dosimetry of inhaled iodine-131.,"In this paper, the dose assessment for the iodine inhalation exposure in 19 aerosol sizes and three gas/vapor forms at three levels of thyroid uptake, was performed. Two different modes of work (light vs. heavy) and breathing (nose vs. mouth) for aerosol inhalation were investigated. In order to calculate the cumulated activities per unit of inhaled activity, a combined model which included the latest models of both human respiratory and alimentary tract was developed. The S values for 131I were computed based on the ICRP adult male and female reference voxel phantoms by the Monte Carlo method. Then, the committed equivalent and committed effective dose coefficients were obtained (The data are available at http://www.um.ac.ir/∼mirihakim). In general, for the nonzero thyroid uptakes, the maximum cumulated activity was found in the thyroid. When the thyroid is blocked, however, the maximum depends on the work and breathing mode and radioisotope form. Overall, the maximum CED coefficient was evaluated for the inhalation of elemental iodine at thyroid uptake of ∼27% (2.8 × 10-8 Sv/Bq). As for the particle inhalation per se, mouth breathing of 0.6 nm and 0.2 μm AMTD particles showed to have the maximum (2.8 × 10-8 Sv/Bq) and minimum (6.4 × 10-9 Sv/Bq) CED coefficients, respectively. Compared to the reference CED coefficients, the authors found an increase of about 58% for inhalation of the aerosols with AMAD of 1 μm and 70% for 5 μm.",2017-11-01 +29125747,Alchemical Free Energy Calculations for Nucleotide Mutations in Protein-DNA Complexes.,"Nucleotide-sequence-dependent interactions between proteins and DNA are responsible for a wide range of gene regulatory functions. Accurate and generalizable methods to evaluate the strength of protein-DNA binding have long been sought. While numerous computational approaches have been developed, most of them require fitting parameters to experimental data to a certain degree, e.g., machine learning algorithms or knowledge-based statistical potentials. Molecular-dynamics-based free energy calculations offer a robust, system-independent, first-principles-based method to calculate free energy differences upon nucleotide mutation. We present an automated procedure to set up alchemical MD-based calculations to evaluate free energy changes occurring as the result of a nucleotide mutation in DNA. We used these methods to perform a large-scale mutation scan comprising 397 nucleotide mutation cases in 16 protein-DNA complexes. The obtained prediction accuracy reaches 5.6 kJ/mol average unsigned deviation from experiment with a correlation coefficient of 0.57 with respect to the experimentally measured free energies. Overall, the first-principles-based approach performed on par with the molecular modeling approaches Rosetta and FoldX. Subsequently, we utilized the MD-based free energy calculations to construct protein-DNA binding profiles for the zinc finger protein Zif268. The calculation results compare remarkably well with the experimentally determined binding profiles. The software automating the structure and topology setup for alchemical calculations is a part of the pmx package; the utilities have also been made available online at http://pmx.mpibpc.mpg.de/dna_webserver.html .",2017-11-29 +28228492,"Neisseria gonorrhoeae Sequence Typing for Antimicrobial Resistance, a Novel Antimicrobial Resistance Multilocus Typing Scheme for Tracking Global Dissemination of N. gonorrhoeae Strains.","A curated Web-based user-friendly sequence typing tool based on antimicrobial resistance determinants in Neisseria gonorrhoeae was developed and is publicly accessible (https://ngstar.canada.ca). The N. gonorrhoeae Sequence Typing for Antimicrobial Resistance (NG-STAR) molecular typing scheme uses the DNA sequences of 7 genes (penA, mtrR, porB, ponA, gyrA, parC, and 23S rRNA) associated with resistance to β-lactam antimicrobials, macrolides, or fluoroquinolones. NG-STAR uses the entire penA sequence, combining the historical nomenclature for penA types I to XXXVIII with novel nucleotide sequence designations; the full mtrR sequence and a portion of its promoter region; portions of ponA, porB, gyrA, and parC; and 23S rRNA sequences. NG-STAR grouped 768 isolates into 139 sequence types (STs) (n = 660) consisting of 29 clonal complexes (CCs) having a maximum of a single-locus variation, and 76 NG-STAR STs (n = 109) were identified as unrelated singletons. NG-STAR had a high Simpson's diversity index value of 96.5% (95% confidence interval [CI] = 0.959 to 0.969). The most common STs were NG-STAR ST-90 (n = 100; 13.0%), ST-42 and ST-91 (n = 45; 5.9%), ST-64 (n = 44; 5.72%), and ST-139 (n = 42; 5.5%). Decreased susceptibility to azithromycin was associated with NG-STAR ST-58, ST-61, ST-64, ST-79, ST-91, and ST-139 (n = 156; 92.3%); decreased susceptibility to cephalosporins was associated with NG-STAR ST-90, ST-91, and ST-97 (n = 162; 94.2%); and ciprofloxacin resistance was associated with NG-STAR ST-26, ST-90, ST-91, ST-97, ST-150, and ST-158 (n = 196; 98.0%). All isolates of NG-STAR ST-42, ST-43, ST-63, ST-81, and ST-160 (n = 106) were susceptible to all four antimicrobials. The standardization of nomenclature associated with antimicrobial resistance determinants through an internationally available database will facilitate the monitoring of the global dissemination of antimicrobial-resistant N. gonorrhoeae strains.",2017-02-22 +24237261,Mercator: a fast and simple web server for genome scale functional annotation of plant sequence data.,"Next-generation technologies generate an overwhelming amount of gene sequence data. Efficient annotation tools are required to make these data amenable to functional genomics analyses. The Mercator pipeline automatically assigns functional terms to protein or nucleotide sequences. It uses the MapMan 'BIN' ontology, which is tailored for functional annotation of plant 'omics' data. The classification procedure performs parallel sequence searches against reference databases, compiles the results and computes the most likely MapMan BINs for each query. In the current version, the pipeline relies on manually curated reference classifications originating from the three reference organisms (Arabidopsis, Chlamydomonas, rice), various other plant species that have a reviewed SwissProt annotation, and more than 2000 protein domain and family profiles at InterPro, CDD and KOG. Functional annotations predicted by Mercator achieve accuracies above 90% when benchmarked against manual annotation. In addition to mapping files for direct use in the visualization software MapMan, Mercator provides graphical overview charts, detailed annotation information in a convenient web browser interface and a MapMan-to-GO translation table to export results as GO terms. Mercator is available free of charge via http://mapman.gabipd.org/web/guest/app/Mercator.",2013-12-17 +29036440,Analysis and prediction of protein folding energy changes upon mutation by element specific persistent homology.,"

Motivation

Site directed mutagenesis is widely used to understand the structure and function of biomolecules. Computational prediction of mutation impacts on protein stability offers a fast, economical and potentially accurate alternative to laboratory mutagenesis. Most existing methods rely on geometric descriptions, this work introduces a topology based approach to provide an entirely new representation of mutation induced protein stability changes that could not be obtained from conventional techniques.

Results

Topology based mutation predictor (T-MP) is introduced to dramatically reduce the geometric complexity and number of degrees of freedom of proteins, while element specific persistent homology is proposed to retain essential biological information. The present approach is found to outperform other existing methods in the predictions of globular protein stability changes upon mutation. A Pearson correlation coefficient of 0.82 with an RMSE of 0.92 kcal/mol is obtained on a test set of 350 mutation samples. For the prediction of membrane protein stability changes upon mutation, the proposed topological approach has a 84% higher Pearson correlation coefficient than the current state-of-the-art empirical methods, achieving a Pearson correlation of 0.57 and an RMSE of 1.09 kcal/mol in a 5-fold cross validation on a set of 223 membrane protein mutation samples.

Availability and implementation

http://weilab.math.msu.edu/TML/TML-MP/.

Contact

wei@math.msu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +28961772,Reference genome assessment from a population scale perspective: an accurate profile of variability and noise.,"

Motivation

Current plant and animal genomic studies are often based on newly assembled genomes that have not been properly consolidated. In this scenario, misassembled regions can easily lead to false-positive findings. Despite quality control scores are included within genotyping protocols, they are usually employed to evaluate individual sample quality rather than reference sequence reliability. We propose a statistical model that combines quality control scores across samples in order to detect incongruent patterns at every genomic region. Our model is inherently robust since common artifact signals are expected to be shared between independent samples over misassembled regions of the genome.

Results

The reliability of our protocol has been extensively tested through different experiments and organisms with accurate results, improving state-of-the-art methods. Our analysis demonstrates synergistic relations between quality control scores and allelic variability estimators, that improve the detection of misassembled regions, and is able to find strong artifact signals even within the human reference assembly. Furthermore, we demonstrated how our model can be trained to properly rank the confidence of a set of candidate variants obtained from new independent samples.

Availability and implementation

This tool is freely available at http://gitlab.com/carbonell/ces.

Contact

jcarbonell.cipf@gmail.com or joaquin.dopazo@juntadeandalucia.es.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +24194597,ppdb: plant promoter database version 3.0.,"ppdb (http://ppdb.agr.gifu-u.ac.jp) is a plant promoter database that provides information on transcription start sites (TSSs), core promoter structure (TATA boxes, Initiators, Y Patches, GA and CA elements) and regulatory element groups (REGs) as putative and comprehensive transcriptional regulatory elements. Since the last report in this journal, the database has been updated in three areas to version 3.0. First, new genomes have been included in the database, and now ppdb provides information on Arabidopsis thaliana, rice, Physcomitrella patens and poplar. Second, new TSS tag data (34 million) from A. thaliana, determined by a high throughput sequencer, has been added to give a ∼200-fold increase in TSS data compared with version 1.0. This results in a much higher coverage of ∼27,000 A. thaliana genes and finer positioning of promoters even for genes with low expression levels. Third, microarray data-based predictions have been appended as REG annotations which inform their putative physiological roles.",2013-11-04 +27760525,www.common-metrics.org: a web application to estimate scores from different patient-reported outcome measures on a common scale.,"

Background

Recently, a growing number of Item-Response Theory (IRT) models has been published, which allow estimation of a common latent variable from data derived by different Patient Reported Outcomes (PROs). When using data from different PROs, direct estimation of the latent variable has some advantages over the use of sum score conversion tables. It requires substantial proficiency in the field of psychometrics to fit such models using contemporary IRT software. We developed a web application ( http://www.common-metrics.org ), which allows estimation of latent variable scores more easily using IRT models calibrating different measures on instrument independent scales.

Results

Currently, the application allows estimation using six different IRT models for Depression, Anxiety, and Physical Function. Based on published item parameters, users of the application can directly estimate latent trait estimates using expected a posteriori (EAP) for sum scores as well as for specific response patterns, Bayes modal (MAP), Weighted likelihood estimation (WLE) and Maximum likelihood (ML) methods and under three different prior distributions. The obtained estimates can be downloaded and analyzed using standard statistical software.

Conclusions

This application enhances the usability of IRT modeling for researchers by allowing comparison of the latent trait estimates over different PROs, such as the Patient Health Questionnaire Depression (PHQ-9) and Anxiety (GAD-7) scales, the Center of Epidemiologic Studies Depression Scale (CES-D), the Beck Depression Inventory (BDI), PROMIS Anxiety and Depression Short Forms and others. Advantages of this approach include comparability of data derived with different measures and tolerance against missing values. The validity of the underlying models needs to be investigated in the future.",2016-10-19 +25100685,BioTextQuest(+): a knowledge integration platform for literature mining and concept discovery.,"

Summary

The iterative process of finding relevant information in biomedical literature and performing bioinformatics analyses might result in an endless loop for an inexperienced user, considering the exponential growth of scientific corpora and the plethora of tools designed to mine PubMed(®) and related biological databases. Herein, we describe BioTextQuest(+), a web-based interactive knowledge exploration platform with significant advances to its predecessor (BioTextQuest), aiming to bridge processes such as bioentity recognition, functional annotation, document clustering and data integration towards literature mining and concept discovery. BioTextQuest(+) enables PubMed and OMIM querying, retrieval of abstracts related to a targeted request and optimal detection of genes, proteins, molecular functions, pathways and biological processes within the retrieved documents. The front-end interface facilitates the browsing of document clustering per subject, the analysis of term co-occurrence, the generation of tag clouds containing highly represented terms per cluster and at-a-glance popup windows with information about relevant genes and proteins. Moreover, to support experimental research, BioTextQuest(+) addresses integration of its primary functionality with biological repositories and software tools able to deliver further bioinformatics services. The Google-like interface extends beyond simple use by offering a range of advanced parameterization for expert users. We demonstrate the functionality of BioTextQuest(+) through several exemplary research scenarios including author disambiguation, functional term enrichment, knowledge acquisition and concept discovery linking major human diseases, such as obesity and ageing.

Availability

The service is accessible at http://bioinformatics.med.uoc.gr/biotextquest.

Contact

g.pavlopoulos@gmail.com or georgios.pavlopoulos@esat.kuleuven.be

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-08-06 +28270892,Molecular Analysis of Factor VIII and Factor IX Genes in Hemophilia Patients: Identification of Novel Mutations and Molecular Dynamics Studies.,"

Background

Hemophilias A and B are X-linked bleeding disorders caused by mutations in the factor VIII and factor IX genes, respectively. Our objective was to identify the spectrum of mutations of the factor VIII and factor IX genes in Saudi Arabian population and determine the genotype and phenotype correlations by molecular dynamics (MD) simulation.

Methods

For genotyping, blood samples from Saudi Arabian patients were collected, and the genomic DNA was amplified, and then sequenced by Sanger method. For molecular simulations, we have used softwares such as CHARMM (Chemistry at Harvard Macromolecular Mechanics; http://www.charmm-gui.org) and GROMACS. In addition, the secondary structure was determined based on the solvent accessibility for the confirmation of the protein stability at the site of mutation.

Results

Six mutations (three novel and three known) were identified in factor VIII gene, and six mutations (one novel and five known) were identified in factor IX gene. The factor VIII novel mutations identified were c.99G>T, p. (W33C) in exon 1, c.2138 DelA, p. (N713Tfs*9) in eon14, also a novel mutation at splicing acceptor site of exon 23 c.6430 - 1G>A. In factor IX, we found a novel mutation c.855G>C, p. (E285D) in exon 8. These novel mutations were not reported in any factor VIII or factor IX databases previously. The deleterious effects of these novel mutations were confirmed by PolyPhen2 and SIFT programs.

Conclusion

The protein functional and structural studies and the models built in this work would be appropriate for predicting the effects of deleterious amino acid substitutions causing these genetic disorders. These findings are useful for genetic counseling in the case of consanguineous marriages which is more common in the Saudi Arabia.",2017-02-21 +24680503,Expansion of coverage of Carbohydrate Structure Database (CSDB).,"The Bacterial Carbohydrate Structure Database (BCSDB), which has been maintained since 2005, was expanded to cover glycans from plants and fungi. The current coverage on plant and fungal glycans includes several thousands of the CarbBank records, as well as data published before 1996 but not deposited in CarbBank. Prior to deposition, the data were verified against the original publications and supplemented with additional information, such as NMR spectra. Both the Bacterial and Plant and Fungal Carbohydrate Structure Databases are freely available at http://csdb.glycoscience.ru.",2013-10-23 +28881959,MaBoSS 2.0: an environment for stochastic Boolean modeling.,"

Motivation

Modeling of signaling pathways is an important step towards the understanding and the treatment of diseases such as cancers, HIV or auto-immune diseases. MaBoSS is a software that allows to simulate populations of cells and to model stochastically the intracellular mechanisms that are deregulated in diseases. MaBoSS provides an output of a Boolean model in the form of time-dependent probabilities, for all biological entities (genes, proteins, phenotypes, etc.) of the model.

Results

We present a new version of MaBoSS (2.0), including an updated version of the core software and an environment. With this environment, the needs for modeling signaling pathways are facilitated, including model construction, visualization, simulations of mutations, drug treatments and sensitivity analyses. It offers a framework for automated production of theoretical predictions.

Availability and implementation

MaBoSS software can be found at https://maboss.curie.fr , including tutorials on existing models and examples of models.

Contact

gautier.stoll@upmc.fr or laurence.calzone@curie.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +23716644,BiDaS: a web-based Monte Carlo BioData Simulator based on sequence/feature characteristics.,"BiDaS is a web-application that can generate massive Monte Carlo simulated sequence or numerical feature data sets (e.g. dinucleotide content, composition, transition, distribution properties) based on small user-provided data sets. BiDaS server enables users to analyze their data and generate large amounts of: (i) Simulated DNA/RNA and aminoacid (AA) sequences following practically identical sequence and/or extracted feature distributions with the original data. (ii) Simulated numerical features, presenting identical distributions, while preserving the exact 2D or 3D between-feature correlations observed in the original data sets. The server can project the provided sequences to multidimensional feature spaces based on: (i) 38 DNA/RNA features describing conformational and physicochemical nucleotide sequence features from the B-DNA-VIDEO database, (ii) 122 DNA/RNA features based on conformational and thermodynamic dinucleotide properties from the DiProDB database and (iii) Pseudo-aminoacid composition of the initial sequences. To the best of our knowledge, this is the first available web-server that allows users to generate vast numbers of biological data sets with realistic characteristics, while keeping between-feature associations. These data sets can be used for a wide variety of current biological problems, such as the in-depth study of gene, transcript, peptide and protein groups/families; the creation of large data sets from just a few available members and the strengthening of machine learning classifiers. All simulations use advanced Monte Carlo sampling techniques. The BiDaS web-application is available at http://bioserver-3.bioacademy.gr/Bioserver/BiDaS/.",2013-05-28 +27340894,Feasibility of Deploying Inhaler Sensors to Identify the Impacts of Environmental Triggers and Built Environment Factors on Asthma Short-Acting Bronchodilator Use.,"

Background

Epidemiological asthma research has relied upon self-reported symptoms or healthcare utilization data, and used the residential address as the primary location for exposure. These data sources can be temporally limited, spatially aggregated, subjective, and burdensome for the patient to collect.

Objectives

First, we aimed to test the feasibility of collecting rescue inhaler use data in space-time using electronic sensors. Second, we aimed to evaluate whether these data have the potential to identify environmental triggers and built environment factors associated with rescue inhaler use and to determine whether these findings would be consistent with the existing literature.

Methods

We utilized zero-truncated negative binomial models to identify triggers associated with inhaler use, and implemented three sensitivity analyses to validate our findings.

Results

Electronic sensors fitted on metered dose inhalers tracked 5,660 rescue inhaler use events in space and time for 140 participants from 13 June 2012 to 28 February 2014. We found that the inhaler sensors were feasible in passively collecting objective rescue inhaler use data. We identified several environmental triggers with a positive and significant association with inhaler use, including: AQI, PM10, weed pollen, and mold. Conversely, the spatial distribution of tree cover demonstrated a negative and significant association with inhaler use.

Conclusions

Utilizing a sensor to capture the signal of rescue inhaler use in space-time offered a passive and objective signal of asthma activity. This approach enabled detailed analyses to identify environmental triggers and built environment factors that are associated with asthma symptoms beyond the residential address. The application of these new technologies has the potential to improve our surveillance and understanding of asthma. Citation: Su JG, Barrett MA, Henderson K, Humblet O, Smith T, Sublett JW, Nesbitt L, Hogg C, Van Sickle D, Sublett JL. 2017. Feasibility of deploying inhaler sensors to identify the impacts of environmental triggers and built environment factors on asthma short-acting bronchodilator use. Environ Health Perspect 125:254-261; http://dx.doi.org/10.1289/EHP266.",2016-06-24 +28453782,SeMPI: a genome-based secondary metabolite prediction and identification web server.,"The secondary metabolism of bacteria, fungi and plants yields a vast number of bioactive substances. The constantly increasing amount of published genomic data provides the opportunity for an efficient identification of gene clusters by genome mining. Conversely, for many natural products with resolved structures, the encoding gene clusters have not been identified yet. Even though genome mining tools have become significantly more efficient in the identification of biosynthetic gene clusters, structural elucidation of the actual secondary metabolite is still challenging, especially due to as yet unpredictable post-modifications. Here, we introduce SeMPI, a web server providing a prediction and identification pipeline for natural products synthesized by polyketide synthases of type I modular. In order to limit the possible structures of PKS products and to include putative tailoring reactions, a structural comparison with annotated natural products was introduced. Furthermore, a benchmark was designed based on 40 gene clusters with annotated PKS products. The web server of the pipeline (SeMPI) is freely available at: http://www.pharmaceutical-bioinformatics.de/sempi.",2017-07-01 +28725475,MINERVA-a platform for visualization and curation of molecular interaction networks.,"Our growing knowledge about various molecular mechanisms is becoming increasingly more structured and accessible. Different repositories of molecular interactions and available literature enable construction of focused and high-quality molecular interaction networks. Novel tools for curation and exploration of such networks are needed, in order to foster the development of a systems biology environment. In particular, solutions for visualization, annotation and data cross-linking will facilitate usage of network-encoded knowledge in biomedical research. To this end we developed the MINERVA (Molecular Interaction NEtwoRks VisuAlization) platform, a standalone webservice supporting curation, annotation and visualization of molecular interaction networks in Systems Biology Graphical Notation (SBGN)-compliant format. MINERVA provides automated content annotation and verification for improved quality control. The end users can explore and interact with hosted networks, and provide direct feedback to content curators. MINERVA enables mapping drug targets or overlaying experimental data on the visualized networks. Extensive export functions enable downloading areas of the visualized networks as SBGN-compliant models for efficient reuse of hosted networks. The software is available under Affero GPL 3.0 as a Virtual Machine snapshot, Debian package and Docker instance at http://r3lab.uni.lu/web/minerva-website/. We believe that MINERVA is an important contribution to systems biology community, as its architecture enables set-up of locally or globally accessible SBGN-oriented repositories of molecular interaction networks. Its functionalities allow overlay of multiple information layers, facilitating exploration of content and interpretation of data. Moreover, annotation and verification workflows of MINERVA improve the efficiency of curation of networks, allowing life-science researchers to better engage in development and use of biomedical knowledge repositories.",2016-09-22 +28219343,Computing structure-based lipid accessibility of membrane proteins with mp_lipid_acc in RosettaMP.,"

Background

Membrane proteins are underrepresented in structural databases, which has led to a lack of computational tools and the corresponding inappropriate use of tools designed for soluble proteins. For membrane proteins, lipid accessibility is an essential property. Although programs are available for sequence-based prediction of lipid accessibility and structure-based identification of solvent-accessible surface area, the latter does not distinguish between water accessible and lipid accessible residues in membrane proteins.

Results

Here we present mp_lipid_acc, the first method to identify lipid accessible residues from the protein structure, implemented in the RosettaMP framework and available as a webserver. Our method uses protein structures transformed in membrane coordinates, for instance from PDBTM or OPM databases, and a defined membrane thickness to classify lipid accessibility of residues. mp_lipid_acc is applicable to both α-helical and β-barrel membrane proteins of diverse architectures with or without water-filled pores and uses a concave hull algorithm for surface-residue classification. We further provide a manually curated benchmark dataset that can be used for further method development.

Conclusions

We present a novel tool to classify lipid accessibility from the protein structure, which is applicable to proteins of diverse architectures and achieves prediction accuracies of 90% on a manually curated database. mp_lipid_acc is part of the Rosetta software suite, available at www.rosettacommons.org . The webserver is available at http://rosie.graylab.jhu.edu/mp_lipid_acc/submit and the benchmark dataset is available at http://tinyurl.com/mp-lipid-acc-dataset .",2017-02-20 +27288089,A New Z Score Curve of the Coronary Arterial Internal Diameter Using the Lambda-Mu-Sigma Method in a Pediatric Population.,"

Background

Several coronary artery Z score models have been developed. However, a Z score model derived by the lambda-mu-sigma (LMS) method has not been established.

Methods

Echocardiographic measurements of the proximal right coronary artery, left main coronary artery, proximal left anterior descending coronary artery, and proximal left circumflex artery were prospectively collected in 3,851 healthy children ≤18 years of age and divided into developmental and validation data sets. In the developmental data set, smooth curves were fitted for each coronary artery using linear, logarithmic, square-root, and LMS methods for both sexes. The relative goodness of fit of these models was compared using the Bayesian information criterion. The best-fitting model was tested for reproducibility using the validation data set. The goodness of fit of the selected model was visually compared with that of the previously reported regression models using a Q-Q plot.

Results

Because the internal diameter of each coronary artery was not similar between sexes, sex-specific Z score models were developed. The LMS model with body surface area as the independent variable showed the best goodness of fit; therefore, the internal diameter of each coronary artery was transformed into a sex-specific Z score on the basis of body surface area using the LMS method. In the validation data set, a Q-Q plot of each model indicated that the distribution of Z scores in the LMS models was closer to the normal distribution compared with previously reported regression models. Finally, the final models for each coronary artery in both sexes were developed using the developmental and validation data sets. A Microsoft Excel-based Z score calculator was also created, which is freely available online (http://raise.umin.jp/zsp/calculator/).

Conclusions

Novel LMS models with which to estimate the sex-specific Z score of each internal coronary artery diameter were generated and validated using a large pediatric population.",2016-06-07 +22537014,EuFishBioMed (COST Action BM0804): a European network to promote the use of small fishes in biomedical research.,"Small fresh water fishes such as the zebrafish (Danio rerio) have become important model organisms for biomedical research. They currently represent the best vertebrate embryo models in which it is possible to derive quantitative data on gene expression, signaling events, and cell behavior in real time in the living animal. Relevant phenotypes in fish mutants are similar to those of other vertebrate models and human diseases. They can be analyzed in great detail and much faster than in mammals. In recent years, approximately 2500 genetically distinct fish lines have been generated by European research groups alone. Their potential, including their possible use by industry, is far from being exploited. To promote zebrafish research in Europe, EuFishBioMed was founded and won support by the EU COST programme ( http://www.cost.esf.org/ ). The main objective of EuFishBioMed is to establish a platform of knowledge exchange for research on small fish models with a strong focus on widening its biomedical applications and an integration of European research efforts and resources. EuFishBioMed currently lists more than 300 member laboratories in Europe, offers funding for short-term laboratory visits, organizes and co-sponsors meetings and workshops, and has successfully lobbied for the establishment of a European Zebrafish Resource Centre. To maintain this network in the future, beyond the funding period of the COST Action, we are currently establishing the European Society for Fish Models in Biology and Medicine.",2012-04-26 +25880930,REGULATOR: a database of metazoan transcription factors and maternal factors for developmental studies.,"

Background

Genes encoding transcription factors that constitute gene-regulatory networks and maternal factors accumulating in egg cytoplasm are two classes of essential genes that play crucial roles in developmental processes. Transcription factors control the expression of their downstream target genes by interacting with cis-regulatory elements. Maternal factors initiate embryonic developmental programs by regulating the expression of zygotic genes and various other events during early embryogenesis.

Results

This article documents the transcription factors of 77 metazoan species as well as human and mouse maternal factors. We improved the previous method using a statistical approach adding Gene Ontology information to Pfam based identification of transcription factors. This method detects previously un-discovered transcription factors. The novel features of this database are: (1) It includes both transcription factors and maternal factors, although the number of species, in which maternal factors are listed, is limited at the moment. (2) Ontological representation at the cell, tissue, organ, and system levels has been specially designed to facilitate development studies. This is the unique feature in our database and is not available in other transcription factor databases.

Conclusions

A user-friendly web interface, REGULATOR ( http://www.bioinformatics.org/regulator/ ), which can help researchers to efficiently identify, validate, and visualize the data analyzed in this study, are provided. Using this web interface, users can browse, search, and download detailed information on species of interest, genes, transcription factor families, or developmental ontology terms.",2015-04-10 +27458779,"Relative Contributions of Agricultural Drift, Para-Occupational, and Residential Use Exposure Pathways to House Dust Pesticide Concentrations: Meta-Regression of Published Data.","

Background

Increased pesticide concentrations in house dust in agricultural areas have been attributed to several exposure pathways, including agricultural drift, para-occupational, and residential use.

Objective

To guide future exposure assessment efforts, we quantified relative contributions of these pathways using meta-regression models of published data on dust pesticide concentrations.

Methods

From studies in North American agricultural areas published from 1995 to 2015, we abstracted dust pesticide concentrations reported as summary statistics [e.g., geometric means (GM)]. We analyzed these data using mixed-effects meta-regression models that weighted each summary statistic by its inverse variance. Dependent variables were either the log-transformed GM (drift) or the log-transformed ratio of GMs from two groups (para-occupational, residential use).

Results

For the drift pathway, predicted GMs decreased sharply and nonlinearly, with GMs 64% lower in homes 250 m versus 23 m from fields (interquartile range of published data) based on 52 statistics from seven studies. For the para-occupational pathway, GMs were 2.3 times higher [95% confidence interval (CI): 1.5, 3.3; 15 statistics, five studies] in homes of farmers who applied pesticides more recently or frequently versus less recently or frequently. For the residential use pathway, GMs were 1.3 (95% CI: 1.1, 1.4) and 1.5 (95% CI: 1.2, 1.9) times higher in treated versus untreated homes, when the probability that a pesticide was used for the pest treatment was 1-19% and ≥ 20%, respectively (88 statistics, five studies).

Conclusion

Our quantification of the relative contributions of pesticide exposure pathways in agricultural populations could improve exposure assessments in epidemiologic studies. The meta-regression models can be updated when additional data become available. Citation: Deziel NC, Beane Freeman LE, Graubard BI, Jones RR, Hoppin JA, Thomas K, Hines CJ, Blair A, Sandler DP, Chen H, Lubin JH, Andreotti G, Alavanja MC, Friesen MC. 2017. Relative contributions of agricultural drift, para-occupational, and residential use exposure pathways to house dust pesticide concentrations: meta-regression of published data. Environ Health Perspect 125:296-305; http://dx.doi.org/10.1289/EHP426.",2016-07-26 +27559154,Cas-analyzer: an online tool for assessing genome editing results using NGS data.,"Genome editing with programmable nucleases has been widely adopted in research and medicine. Next generation sequencing (NGS) platforms are now widely used for measuring the frequencies of mutations induced by CRISPR-Cas9 and other programmable nucleases. Here, we present an online tool, Cas-Analyzer, a JavaScript-based implementation for NGS data analysis. Because Cas-Analyzer is completely used at a client-side web browser on-the-fly, there is no need to upload very large NGS datasets to a server, a time-consuming step in genome editing analysis. Currently, Cas-Analyzer supports various programmable nucleases, including single nucleases and paired nucleases.

Availability and implementation

Free access at http://www.rgenome.net/cas-analyzer/ CONTACT: sangsubae@hanyang.ac.kr or jskim01@snu.ac.krSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-24 +25369459,Restricting or banning alcohol advertising to reduce alcohol consumption in adults and adolescents.,"

Background

Alcohol is estimated to be the fifth leading risk factor for global disability-adjusted life years. Restricting or banning alcohol advertising may reduce exposure to the risk posed by alcohol at the individual and general population level. To date, no systematic review has evaluated the effectiveness, possible harms and cost-effectiveness of this intervention.

Objectives

To evaluate the benefits, harms and costs of restricting or banning the advertising of alcohol, via any format, compared with no restrictions or counter-advertising, on alcohol consumption in adults and adolescents.

Search methods

We searched the Cochrane Drugs and Alcohol Group Specialised Register (May 2014); CENTRAL (Issue 5, 2014); MEDLINE (1966 to 28 May 2014); EMBASE (1974 to 28 May 2014); PsychINFO (June 2013); and five alcohol and marketing databases in October 2013. We also searched seven conference databases and www.clinicaltrials.gov and http://apps.who.int/trialsearch/ in October 2013. We checked the reference lists of all studies identified and those of relevant systematic reviews or guidelines, and contacted researchers, policymakers and other experts in the field for published or unpublished data, regardless of language.

Selection criteria

We included randomised controlled trials (RCTs), controlled clinical trials, prospective and retrospective cohort studies, controlled before-and-after studies and interrupted time series (ITS) studies that evaluated the restriction or banning of alcohol advertising via any format including advertising in the press, on the television, radio, or internet, via billboards, social media or product placement in films. The data could be at the individual (adults or adolescent) or population level.

Data collection and analysis

We used the standard methodological procedures expected by The Cochrane Collaboration.

Main results

We included one small RCT (80 male student participants conducted in the Netherlands and published in 2009) and three ITS studies (general population studies in Canadian provinces conducted in the 1970s and 80s).The RCT found that young men exposed to movies with a low-alcohol content drank less than men exposed to movies with a high-alcohol content (mean difference (MD) -0.65 drinks; 95% CI -1.2, -0.07; p value = 0.03, very-low-quality evidence). Young men exposed to commercials with a neutral content compared with those exposed to commercials for alcohol drank less (MD -0.73 drinks; 95% CI -1.30, -0.16; p value = 0.01, very-low-quality evidence). Outcomes were assessed immediately after the end of the intervention (lasting 1.5 hours), so no follow-up data were available. Using the Grading of Recommendations Assessment, Development and Evaluation approach, the quality of the evidence was rated as very low due to a serious risk of bias, serious indirectness of the included population and serious level of imprecision.Two of the ITS studies evaluated the implementation of an advertising ban and one study evaluated the lifting of such a ban. Each of the three ITS studies evaluated a different type of ban (partial or full) compared with different degrees of restrictions or no restrictions during the control period. The results from the three ITS studies were inconsistent. A meta-analysis of the two studies that evaluated the implementation of a ban showed an overall mean non-significant increase in beer consumption in the general population of 1.10% following the ban (95% CI -5.26, 7.47; p value = 0.43; I(2) = 83%, very-low-quality evidence). This finding is consistent with an increase, no difference, or a decrease in alcohol consumption. In the study evaluating the lifting of a total ban on all forms of alcohol advertising to a partial ban on spirits advertising only, which utilised an Abrupt Auto-regressive Integrated Moving Average model, the volume of all forms of alcohol sales decreased by 11.11 kilolitres (95% CI -27.56, 5.34; p value = 0.19) per month after the ban was lifted. In this model, beer and wine sales increased per month by 14.89 kilolitres (95% CI 0.39, 29.39; p value = 0.04) and 1.15 kilolitres (95% CI -0.91, 3.21; p value = 0.27), respectively, and spirits sales decreased statistically significantly by 22.49 kilolitres (95% CI -36.83, -8.15; p value = 0.002). Using the GRADE approach, the evidence from the ITS studies was rated as very low due to a high risk of bias arising from a lack of randomisation and imprecision in the results.No other prespecified outcomes (including economic loss or hardship due to decreased alcohol sales) were addressed in the included studies and no adverse effects were reported in any of the studies. None of the studies were funded by the alcohol or advertising industries.

Authors' conclusions

There is a lack of robust evidence for or against recommending the implementation of alcohol advertising restrictions. Advertising restrictions should be implemented within a high-quality, well-monitored research programme to ensure the evaluation over time of all relevant outcomes in order to build the evidence base.",2014-11-04 +24234449,FlyBase 102--advanced approaches to interrogating FlyBase.,"FlyBase (http://flybase.org) is the leading website and database of Drosophila genes and genomes. Whether you are using the fruit fly Drosophila melanogaster as an experimental system or wish to understand Drosophila biological knowledge in relation to human disease or to other model systems, FlyBase can help you successfully find the information you are looking for. Here, we demonstrate some of our more advanced searching systems and highlight some of our new tools for searching the wealth of data on FlyBase. The first section explores gene function in FlyBase, using our TermLink tool to search with Controlled Vocabulary terms and our new RNA-Seq Search tool to search gene expression. The second section of this article describes a few ways to search genomic data in FlyBase, using our BLAST server and the new implementation of GBrowse 2, as well as our new FeatureMapper tool. Finally, we move on to discuss our most powerful search tool, QueryBuilder, before describing pre-computed cuts of the data and how to query the database programmatically.",2013-11-13 +28590671,Genetics of the Connectome and the ENIGMA Project,"Here we give an overview of a worldwide effort, called the ENIGMA Consortium (http://enigma.ini.usc.edu), which unites scientists worldwide to determine how variants in our genetic code influence the brain, and how 12 major diseases affect the brain worldwide. At the time of writing, ENIGMA involves over 500 scientists from 185 institutions worldwide, working together on around 30 projects to discover factors that may help or harm the brain. By pooling genome-wide genomic data and brain imaging from over 33,000 people, ENIGMA has been able to identify single-nucleotide differences in the genome that are associated with differences in human brain structure and function. Given the broad interest in brain connectivity and the factors that affect it, we outline some tactics adopted by ENIGMA to discover specific genes that affect the brain; then we describe how ENIGMA is extending these methods to discover genetic influences on brain connectivity.",2017-06-08 +22776079,Concept annotation in the CRAFT corpus.,"

Background

Manually annotated corpora are critical for the training and evaluation of automated methods to identify concepts in biomedical text.

Results

This paper presents the concept annotations of the Colorado Richly Annotated Full-Text (CRAFT) Corpus, a collection of 97 full-length, open-access biomedical journal articles that have been annotated both semantically and syntactically to serve as a research resource for the biomedical natural-language-processing (NLP) community. CRAFT identifies all mentions of nearly all concepts from nine prominent biomedical ontologies and terminologies: the Cell Type Ontology, the Chemical Entities of Biological Interest ontology, the NCBI Taxonomy, the Protein Ontology, the Sequence Ontology, the entries of the Entrez Gene database, and the three subontologies of the Gene Ontology. The first public release includes the annotations for 67 of the 97 articles, reserving two sets of 15 articles for future text-mining competitions (after which these too will be released). Concept annotations were created based on a single set of guidelines, which has enabled us to achieve consistently high interannotator agreement.

Conclusions

As the initial 67-article release contains more than 560,000 tokens (and the full set more than 790,000 tokens), our corpus is among the largest gold-standard annotated biomedical corpora. Unlike most others, the journal articles that comprise the corpus are drawn from diverse biomedical disciplines and are marked up in their entirety. Additionally, with a concept-annotation count of nearly 100,000 in the 67-article subset (and more than 140,000 in the full collection), the scale of conceptual markup is also among the largest of comparable corpora. The concept annotations of the CRAFT Corpus have the potential to significantly advance biomedical text mining by providing a high-quality gold standard for NLP systems. The corpus, annotation guidelines, and other associated resources are freely available at http://bionlp-corpora.sourceforge.net/CRAFT/index.shtml.",2012-07-09 +24962967,CDC's Health Equity Resource Toolkit: disseminating guidance for state practitioners to address obesity disparities.,"Obesity has been on the rise in the United States over the past three decades, and is high. In addition to population-wide trends, it is clear that obesity affects some groups more than others and can be associated with age, income, education, gender, race and ethnicity, and geographic region. To reverse the obesity epidemic, the Centers for Disease Control and Prevention) promotes evidence-based and practice-informed strategies to address nutrition and physical activity environments and behaviors. These public health strategies require translation into actionable approaches that can be implemented by state and local entities to address disparities. The Centers for Disease Control and Prevention used findings from an expert panel meeting to guide the development and dissemination of the Health Equity Resource Toolkit for State Practitioners Addressing Obesity Disparities (available at http://www.cdc.gov/obesity/health_equity/toolkit.html). The Toolkit helps public health practitioners take a systematic approach to program planning using a health equity lens. The Toolkit provides a six-step process for planning, implementing, and evaluating strategies to address obesity disparities. Each section contains (a) a basic description of the steps of the process and suggested evidence-informed actions to help address obesity disparities, (b) practical tools for carrying out activities to help reduce obesity disparities, and (c) a ""real-world"" case study of a successful state-level effort to address obesity with a focus on health equity that is particularly relevant to the content in that section. Hyperlinks to additional resources are included throughout.",2014-06-24 +26006320,"Formative research and development of innovative tools for ""Better Outcomes in Labour Difficulty"" (BOLD): study protocol.","

Background

Most complications during labour and childbirth could be averted with timely interventions by skilled healthcare providers. Yet, the quality and outcomes of childbirth care remains suboptimal in many health facilities in low-resource settings. To accelerate the reduction of childbirth-related maternal, fetal and newborn mortality and morbidity, the World Health Organization has initiated the ""Better Outcomes in Labour Difficulty"" (BOLD) project to address weaknesses in labour care processes and better connect health systems and communities. The project seeks to develop a ""Simplified, Effective, Labour Monitoring-to-Action"" tool (SELMA) to assist healthcare providers to monitor labour and take decisive actions more efficiently; and by developing an innovative set of service prototypes and/or tools termed ""Passport to Safer Birth"", designed with communities and healthcare providers, to promote access to quality care for women during childbirth. This protocol describes the formative research activities to support the development of these tools.

Methods/design

We will employ qualitative research and service design methodologies in eight health facilities and their catchment communities in Nigeria and Uganda. In the health facilities, focus group discussions (FGD) and in-depth interviews (IDI) will be conducted among different cadres of healthcare providers and facility administrators. In the communities, FGDs and IDIs will be conducted among women who have delivered in a health facility. We will use service design methods to explore women's journey to access and receive childbirth care in order to innovate and design services around the needs and expectations of women, within the context of the health system.

Discussion

This formative research will serve several roles. First, it will provide an in-depth understanding of healthcare providers and health system issues to be accounted for in the final design and implementation of SELMA. Second, it will help to identify key moments (""touch points"") where women's experiences of childbirth care are shaped, and where the overall experience of quality care could be improved. The synthesis of findings from the qualitative and service design activities will help identify potential areas for behaviour change related to the provision and experience of childbirth care, and serve as the basis for the development of Passport to Safer Birth. Please see related articles 'http://dx.doi.org/ 10.1186/s12978-015-0027-6 ' and 'http://dx.doi.org/ 10.1186/s12978-015-0029-4 '.",2015-05-26 +27412096,MSAViewer: interactive JavaScript visualization of multiple sequence alignments.,"The MSAViewer is a quick and easy visualization and analysis JavaScript component for Multiple Sequence Alignment data of any size. Core features include interactive navigation through the alignment, application of popular color schemes, sorting, selecting and filtering. The MSAViewer is 'web ready': written entirely in JavaScript, compatible with modern web browsers and does not require any specialized software. The MSAViewer is part of the BioJS collection of components.

Availability and implementation

The MSAViewer is released as open source software under the Boost Software License 1.0. Documentation, source code and the viewer are available at http://msa.biojs.net/Supplementary information: Supplementary data are available at Bioinformatics online.

Contact

msa@bio.sh.",2016-07-13 +28181802,pmx Webserver: A User Friendly Interface for Alchemistry.,"With the increase of available computational power and improvements in simulation algorithms, alchemical molecular dynamics based free energy calculations have developed into routine usage. To further facilitate the usability of alchemical methods for amino acid mutations, we have developed a web based infrastructure for obtaining hybrid protein structures and topologies. The presented webserver allows amino acid mutation selection in five contemporary molecular mechanics force fields. In addition, a complete mutation scan with a user defined amino acid is supported. The output generated by the webserver is directly compatible with the Gromacs molecular dynamics engine and can be used with any of the alchemical free energy calculation setup. Furthermore, we present a database of input files and precalculated free energy differences for tripeptides approximating a disordered state of a protein, of particular use for protein stability studies. Finally, the usage of the webserver and its output is exemplified by performing an alanine scan and investigating thermodynamic stability of the Trp cage mini protein. The webserver is accessible at http://pmx.mpibpc.mpg.de.",2017-02-16 +28160322,ATLAS: A database linking binding affinities with structures for wild-type and mutant TCR-pMHC complexes.,"The ATLAS (Altered TCR Ligand Affinities and Structures) database (https://zlab.umassmed.edu/atlas/web/) is a manually curated repository containing the binding affinities for wild-type and mutant T cell receptors (TCRs) and their antigens, peptides presented by the major histocompatibility complex (pMHC). The database links experimentally measured binding affinities with the corresponding three dimensional (3D) structures for TCR-pMHC complexes. The user can browse and search affinities, structures, and experimental details for TCRs, peptides, and MHCs of interest. We expect this database to facilitate the development of next-generation protein design algorithms targeting TCR-pMHC interactions. ATLAS can be easily parsed using modeling software that builds protein structures for training and testing. As an example, we provide structural models for all mutant TCRs in ATLAS, built using the Rosetta program. Utilizing these structures, we report a correlation of 0.63 between experimentally measured changes in binding energies and our predicted changes. Proteins 2017; 85:908-916. © 2016 Wiley Periodicals, Inc.",2017-02-16 +26819473,BioCircos.js: an interactive Circos JavaScript library for biological data visualization on web applications.,"

Unlabelled

We here present BioCircos.js, an interactive and lightweight JavaScript library especially for biological data interactive visualization. BioCircos.js facilitates the development of web-based applications for circular visualization of various biological data, such as genomic features, genetic variations, gene expression and biomolecular interactions.

Availability and implementation

BioCircos.js and its manual are freely available online at http://bioinfo.ibp.ac.cn/biocircos/

Contact

rschen@ibp.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-27 +24275495,miRBase: annotating high confidence microRNAs using deep sequencing data.,"We describe an update of the miRBase database (http://www.mirbase.org/), the primary microRNA sequence repository. The latest miRBase release (v20, June 2013) contains 24 521 microRNA loci from 206 species, processed to produce 30 424 mature microRNA products. The rate of deposition of novel microRNAs and the number of researchers involved in their discovery continue to increase, driven largely by small RNA deep sequencing experiments. In the face of these increases, and a range of microRNA annotation methods and criteria, maintaining the quality of the microRNA sequence data set is a significant challenge. Here, we describe recent developments of the miRBase database to address this issue. In particular, we describe the collation and use of deep sequencing data sets to assign levels of confidence to miRBase entries. We now provide a high confidence subset of miRBase entries, based on the pattern of mapped reads. The high confidence microRNA data set is available alongside the complete microRNA collection at http://www.mirbase.org/. We also describe embedding microRNA-specific Wikipedia pages on the miRBase website to encourage the microRNA community to contribute and share textual and functional information.",2013-11-25 +28158942,Creating the New from the Old: Combinatorial Libraries Generation with Machine-Learning-Based Compound Structure Optimization.,"The growing computational abilities of various tools that are applied in the broadly understood field of computer-aided drug design have led to the extreme popularity of virtual screening in the search for new biologically active compounds. Most often, the source of such molecules consists of commercially available compound databases, but they can also be searched for within the libraries of structures generated in silico from existing ligands. Various computational combinatorial approaches are based solely on the chemical structure of compounds, using different types of substitutions for new molecules formation. In this study, the starting point for combinatorial library generation was the fingerprint referring to the optimal substructural composition in terms of the activity toward a considered target, which was obtained using a machine learning-based optimization procedure. The systematic enumeration of all possible connections between preferred substructures resulted in the formation of target-focused libraries of new potential ligands. The compounds were initially assessed by machine learning methods using a hashed fingerprint to represent molecules; the distribution of their physicochemical properties was also investigated, as well as their synthetic accessibility. The examination of various fingerprints and machine learning algorithms indicated that the Klekota-Roth fingerprint and support vector machine were an optimal combination for such experiments. This study was performed for 8 protein targets, and the obtained compound sets and their characterization are publically available at http://skandal.if-pan.krakow.pl/comb_lib/ .",2017-02-15 +28069590,Identification of SSR markers closely linked to the yellow seed coat color gene in heading Chinese cabbage (Brassica rapa L. ssp. pekinensis).,"Research on the yellow-seeded variety of heading Chinese cabbage will aid in broadening its germplasm resources and lay a foundation for AA genome research in Brassica crops. Here, an F2 segregating population of 1575 individuals was constructed from two inbred lines (brown-seeded '92S105' and yellow-seeded '91-125'). This population was used to identify the linkage molecular markers of the yellow seed coat trait using simple sequence repeat (SSR) techniques combined with a bulk segregant analysis (BSA). Of the 144 SSR primer pairs on the A01-A10 chromosomes from the Brassica database (http://brassicadb.org/brad/), two pairs located on the A06 chromosome showed polymorphic bands between the bulk DNA pools of eight brown-seeded and eight yellow-seeded F2 progeny. Based on the genome sequence, 454 SSR markers were designed to A06 to detect these polymorphic bands and were synthesized. Six SSR markers linked to the seed coat color gene were successfully selected for fine linkage genetic map construction, in which the two closest flanking markers, SSR449a and SSR317, mapped the Brsc-ye gene to a 40.2 kb region with distances of 0.07 and 0.06 cM, respectively. The molecular markers obtained in this report will assist in the marker-assisted selection and breeding of yellow-seeded lines in Brassica rapa L. and other close species.",2017-02-15 +29504939,The Association between Lifelong Greenspace Exposure and 3-Dimensional Brain Magnetic Resonance Imaging in Barcelona Schoolchildren.,"BACKGROUND:Proponents of the biophilia hypothesis believe that contact with nature, including green spaces, has a crucial role in brain development in children. Currently, however, we are not aware of evidence linking such exposure with potential effects on brain structure. OBJECTIVE:We determined whether lifelong exposure to residential surrounding greenness is associated with regional differences in brain volume based on 3-dimensional magnetic resonance imaging (3D MRI) among children attending primary school. METHODS:We performed a series of analyses using data from a subcohort of 253 Barcelona schoolchildren from the Brain Development and Air Pollution Ultrafine Particles in School Children (BREATHE) project. We averaged satellite-based normalized difference vegetation index (NDVI) across 100-m buffers around all residential addresses since birth to estimate each participant's lifelong exposure to residential surrounding greenness, and we used high-resolution 3D MRIs of brain anatomy to identify regional differences in voxel-wise brain volume associated with greenness exposure. In addition, we performed a supporting substudy to identify regional differences in brain volume associated with measures of working memory (d' from computerized n-back tests) and inattentiveness (hit reaction time standard error from the Attentional Network Task instrument) that were repeated four times over one year. We also performed a second supporting substudy to determine whether peak voxel tissue volumes in brain regions associated with residential greenness predicted cognitive function test scores. RESULTS:Lifelong exposure to greenness was positively associated with gray matter volume in the left and right prefrontal cortex and in the left premotor cortex and with white matter volume in the right prefrontal region, in the left premotor region, and in both cerebellar hemispheres. Some of these regions partly overlapped with regions associated with cognitive test scores (prefrontal cortex and cerebellar and premotor white matter), and peak volumes in these regions predicted better working memory and reduced inattentiveness. CONCLUSION:Our findings from a study population of urban schoolchildren in Barcelona require confirmation, but they suggest that being raised in greener neighborhoods may have beneficial effects on brain development and cognitive function. https://doi.org/10.1289/EHP1876.",2018-02-23 +26741409,Molecular mechanisms of protein aggregation from global fitting of kinetic models.,"The elucidation of the molecular mechanisms by which soluble proteins convert into their amyloid forms is a fundamental prerequisite for understanding and controlling disorders that are linked to protein aggregation, such as Alzheimer's and Parkinson's diseases. However, because of the complexity associated with aggregation reaction networks, the analysis of kinetic data of protein aggregation to obtain the underlying mechanisms represents a complex task. Here we describe a framework, using quantitative kinetic assays and global fitting, to determine and to verify a molecular mechanism for aggregation reactions that is compatible with experimental kinetic data. We implement this approach in a web-based software, AmyloFit. Our procedure starts from the results of kinetic experiments that measure the concentration of aggregate mass as a function of time. We illustrate the approach with results from the aggregation of the β-amyloid (Aβ) peptides measured using thioflavin T, but the method is suitable for data from any similar kinetic experiment measuring the accumulation of aggregate mass as a function of time; the input data are in the form of a tab-separated text file. We also outline general experimental strategies and practical considerations for obtaining kinetic data of sufficient quality to draw detailed mechanistic conclusions, and the procedure starts with instructions for extensive data quality control. For the core part of the analysis, we provide an online platform (http://www.amylofit.ch.cam.ac.uk) that enables robust global analysis of kinetic data without the need for extensive programming or detailed mathematical knowledge. The software automates repetitive tasks and guides users through the key steps of kinetic analysis: determination of constraints to be placed on the aggregation mechanism based on the concentration dependence of the aggregation reaction, choosing from several fundamental models describing assembly into linear aggregates and fitting the chosen models using an advanced minimization algorithm to yield the reaction orders and rate constants. Finally, we outline how to use this approach to investigate which targets potential inhibitors of amyloid formation bind to and where in the reaction mechanism they act. The protocol, from processing data to determining mechanisms, can be completed in <1 d.",2016-01-07 +23660285,PhenoDigm: analyzing curated annotations to associate animal models with human diseases.,"The ultimate goal of studying model organisms is to translate what is learned into useful knowledge about normal human biology and disease to facilitate treatment and early screening for diseases. Recent advances in genomic technologies allow for rapid generation of models with a range of targeted genotypes as well as their characterization by high-throughput phenotyping. As an abundance of phenotype data become available, only systematic analysis will facilitate valid conclusions to be drawn from these data and transferred to human diseases. Owing to the volume of data, automated methods are preferable, allowing for a reliable analysis of the data and providing evidence about possible gene-disease associations. Here, we propose Phenotype comparisons for DIsease Genes and Models (PhenoDigm), as an automated method to provide evidence about gene-disease associations by analysing phenotype information. PhenoDigm integrates data from a variety of model organisms and, at the same time, uses several intermediate scoring methods to identify only strongly data-supported gene candidates for human genetic diseases. We show results of an automated evaluation as well as selected manually assessed examples that support the validity of PhenoDigm. Furthermore, we provide guidance on how to browse the data with PhenoDigm's web interface and illustrate its usefulness in supporting research. Database URL: http://www.sanger.ac.uk/resources/databases/phenodigm",2013-05-09 +27797762,CLIP Tool Kit (CTK): a flexible and robust pipeline to analyze CLIP sequencing data.,"

Summary

UV cross-linking and immunoprecipitation (CLIP), followed by high-throughput sequencing, is a powerful biochemical assay that maps in vivo protein-RNA interactions on a genome-wide scale. The CLIP Tool Kit (CTK) aims at providing a set of tools for flexible, streamlined and comprehensive CLIP data analysis. This software package extends the scope of our original CIMS package.

Availability and implementation

The software is implemented in Perl. The source code and detailed documentation are available at http://zhanglab.c2b2.columbia.edu/index.php/CTK .

Contact

cz2294@columbia.edu.",2017-02-01 +29635518,Presence of aggregates of smooth endoplasmic reticulum in MII oocytes affects oocyte competence: molecular-based evidence.,"STUDY QUESTION:Does the presence of aggregates of smooth endoplasmic reticulum (SERa) impact the transcriptome of human metaphase II (MII) oocytes?. SUMMARY ANSWER:The presence of SERa alters the molecular status of human metaphase II oocytes. WHAT IS KNOWN ALREADY:Oocytes presenting SERa are considered dysmorphic. Oocytes with SERa (SERa+) have been associated with reduced embryological outcome and increased risk of congenital anomalies, although some authors have reported that SERa+ oocytes can lead to healthy newborns. The question of whether or not SERa+ oocytes should be discarded is still open for debate, and no experimental information about the effect of the presence of SERa on the oocyte molecular status is available. STUDY DESIGN, SIZE, DURATION:This study included 28 women, aged <38 years, without any ovarian pathology, and undergoing IVF treatment. Supernumerary MII oocytes with no sign of morphological alterations as well as SERa+ oocytes were donated after written informed consent. A total of 31 oocytes without SERa (SERa-) and 24 SERa+ oocytes were analyzed. PARTICIPANTS/MATERIALS, SETTING, METHODS:Pools of 8-10 oocytes for both group were prepared. Total RNA was extracted from each pool, amplified, labeled and hybridized on oligonucleotide microarrays. Analyses were performed by R software using the limma package. MAIN RESULTS AND THE ROLE OF CHANCE:The expression profiles of SERa+ oocytes significantly differed from those of SERa- oocytes in 488 probe sets corresponding to 102 down-regulated and 283 up-regulated unique transcripts. Gene Ontology analysis by DAVID bioinformatics disclosed that genes involved in three main biological processes were significantly down-regulated in SERa+ oocytes respective to SERa- oocytes: (i) cell and mitotic/meiotic nuclear division, spindle assembly, chromosome partition and G2/M transition of mitotic cell cycle; (ii) organization of cytoskeleton and microtubules; and (iii) mitochondrial structure and activity. Among the transcripts up-regulated in SERa+ oocytes, the most significantly (P = 0.002) enriched GO term was 'GoLoco motif', including the RAP1GAP, GPSM3 and GPSM1 genes. LARGE SCALE DATA:Raw microarray data are accessible through GEO Series accession number GSE106222 (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE106222). LIMITATIONS, REASONS FOR CAUTION:Data validation in a larger cohort of samples would be beneficial, although we applied stringent criteria for gene selection (fold-change >3 or <1/3 and FDR < 0.1). Surveys on clinical outcomes, malformation rates and follow-up of babies born after transfer of embryos from SERa+ oocytes are necessary. WIDER IMPLICATIONS OF THE FINDINGS:We provide information on the molecular status of SERa+ oocytes, highlighting possible associations between presence of SERa, altered oocyte physiology and reduced developmental competence. Our study may offer further information that can assist embryologists to make decisions on whether, and with what possible implications, SERa+ oocytes should be used. We believe that the presence of SERa should be still a 'red flag' in IVF practices and that the decision to inseminate SERa+ oocytes should be discussed on a case-by-case basis. STUDY FUNDING/COMPETING INTEREST(s):This study was partially supported by Ferring Pharmaceuticals. The authors have no conflicts of interest to declare.",2018-06-01 +28824598,PathoBacTyper: A Web Server for Pathogenic Bacteria Identification and Molecular Genotyping.,"With the decline in the cost of whole-genome sequencing because of the introduction of next-generation sequencing (NGS) techniques, many public health and clinical laboratories have started to use bacterial whole genomes for epidemiological surveillance and clinical investigation. For epidemiological and clinical purposes in this ""NGS era,"" whole-genome-scale single nucleotide polymorphism (wgSNP) analysis for genotyping is considered suitable. In this paper, we present an online service, PathoBacTyper (http://halst.nhri.org.tw/PathoBacTyper/), for pathogenic bacteria identification and genotyping based on wgSNP analysis. More than 400 pathogenic bacteria can be identified and genotyped through this service. Four data sets containing 59 Salmonella Heidelberg isolates from three outbreaks with the same pulsed-field gel electrophoresis pattern, 34 Salmonella Typhimurium isolates from six outbreaks, 103 isolates of hospital-associated vancomycin-resistant Enterococcus faecium and 15 Legionella pneumophila isolates from clinical and environmental samples in Israel were used for demonstrating the operation and testing the performance of the PathoBacTyper service. The test results reveal the applicability of this service for epidemiological typing and clinical investigation.",2017-08-03 +28417000,hackseq: Catalyzing collaboration between biological and computational scientists via hackathon.,"hackseq ( http://www.hackseq.com) was a genomics hackathon with the aim of bringing together a diverse set of biological and computational scientists to work on collaborative bioinformatics projects. In October 2016, 66 participants from nine nations came together for three days for hackseq and collaborated on nine projects ranging from data visualization to algorithm development. The response from participants was overwhelmingly positive with 100% (n = 54) of survey respondents saying they would like to participate in future hackathons. We detail key steps for others interested in organizing a successful hackathon and report excerpts from each project.",2017-02-28 +28464793,ICoVeR - an interactive visualization tool for verification and refinement of metagenomic bins.,"

Background

Recent advances in high-throughput sequencing allow for much deeper exploitation of natural and engineered microbial communities, and to unravel so-called ""microbial dark matter"" (microbes that until now have evaded cultivation). Metagenomic analyses result in a large number of genomic fragments (contigs) that need to be grouped (binned) in order to reconstruct draft microbial genomes. While several contig binning algorithms have been developed in the past 2 years, they often lack consensus. Furthermore, these software tools typically lack a provision for the visualization of data and bin characteristics.

Results

We present ICoVeR, the Interactive Contig-bin Verification and Refinement tool, which allows the visualization of genome bins. More specifically, ICoVeR allows curation of bin assignments based on multiple binning algorithms. Its visualization window is composed of two connected and interactive main views, including a parallel coordinates view and a dimensionality reduction plot. To demonstrate ICoVeR's utility, we used it to refine disparate genome bins automatically generated using MetaBAT, CONCOCT and MyCC for an anaerobic digestion metagenomic (AD microbiome) dataset. Out of 31 refined genome bins, 23 were characterized with higher completeness and lower contamination in comparison to their respective, automatically generated, genome bins. Additionally, to benchmark ICoVeR against a previously validated dataset, we used Sharon's dataset representing an infant gut metagenome.

Conclusions

ICoVeR is an open source software package that allows curation of disparate genome bins generated with automatic binning algorithms. It is freely available under the GPLv3 license at https://git.list.lu/eScience/ICoVeR . The data management and analytical functions of ICoVeR are implemented in R, therefore the software can be easily installed on any system for which R is available. Installation and usage guide together with the example files ready to be visualized are also provided via the project wiki. ICoVeR running instance preloaded with AD microbiome and Sharon's datasets can be accessed via the website.",2017-05-02 +27102804,Using expected sequence features to improve basecalling accuracy of amplicon pyrosequencing data.,"

Background

Amplicon pyrosequencing targets a known genetic region and thus inherently produces reads highly anticipated to have certain features, such as conserved nucleotide sequence, and in the case of protein coding DNA, an open reading frame. Pyrosequencing errors, consisting mainly of nucleotide insertions and deletions, are on the other hand likely to disrupt open reading frames. Such an inverse relationship between errors and expectation based on prior knowledge can be used advantageously to guide the process known as basecalling, i.e. the inference of nucleotide sequence from raw sequencing data.

Results

The new basecalling method described here, named Multipass, implements a probabilistic framework for working with the raw flowgrams obtained by pyrosequencing. For each sequence variant Multipass calculates the likelihood and nucleotide sequence of several most likely sequences given the flowgram data. This probabilistic approach enables integration of basecalling into a larger model where other parameters can be incorporated, such as the likelihood for observing a full-length open reading frame at the targeted region. We apply the method to 454 amplicon pyrosequencing data obtained from a malaria virulence gene family, where Multipass generates 20 % more error-free sequences than current state of the art methods, and provides sequence characteristics that allow generation of a set of high confidence error-free sequences.

Conclusions

This novel method can be used to increase accuracy of existing and future amplicon sequencing data, particularly where extensive prior knowledge is available about the obtained sequences, for example in analysis of the immunoglobulin VDJ region where Multipass can be combined with a model for the known recombining germline genes. Multipass is available for Roche 454 data at http://www.cbs.dtu.dk/services/MultiPass-1.0 , and the concept can potentially be implemented for other sequencing technologies as well.",2016-04-22 +25198774,PfalDB: an integrated drug target and chemical database for Plasmodium falciparum.,"Plasmodium falciparum is one of the deadliest protozoan parasite species among those that cause malaria. Uncontrolled use of antimalarial drugs has resulted in evolutionary selection pressure favoring high levels of resistance to antimalarials; currently P.falciparum shows resistance to all classes of antimalarials. Therefore it is essential to identify novel drug targets, and design selective anti-malarials which can overcome resistance. While many drug targets are freely available in various public domain resources, a single comprehensive source of data containing easily searchable and retrievable information is currently lacking. To facilitate the total integration and mining of data emerging from different drug consortia and also to prioritize drug targets for structure-based drug design, an open-access, inclusive comprehensive database for Plasmodium falciparum was established. Meta data of known/modeled structures along with binding site parameters of drug targets have been included in the database. Additionally, chemical compounds showing a positive inhibitory assay against Plasmodium falciparum or known drug targets have also been provided. The database is accessible at http://pfaldb.jnu.ac.in. The database provides diverse information regarding the structure, sequence, stage specific gene expression, pathway, action mechanism, essentiality and druggability for each drug target, and literature to assess the validation status of individual drug targets. It also includes information on individual anti-malarials with their activity and bioassay.",2014-01-01 +23285139,Offering an American graduate medical HIV course to health care workers in resource-limited settings via the Internet.,"

Background

Western accredited medical universities can offer graduate-level academic courses to health care workers (HCWs) in resource-limited settings through the Internet. It is not known whether HCWs are interested in these online courses, whether they can perform as well as matriculated students, or whether such courses are educationally or practically relevant.

Methods and findings

In 2011, the University of Washington (UW) Schools of Medicine and Nursing offered the graduate course, ""Clinical Management of HIV"", to HCWs that included a demographic survey, knowledge assessment, and course evaluation. UW faculty delivered HIV clinical topics through ten 2-hour weekly sessions from the perspectives of practicing HIV medicine in developed and developing settings. HCWs viewed lectures through Adobe Acrobat Connect Pro (Adobe Systems, San Jose, CA), and completed online homework on HIV Web Study (http://depts.washington.edu/hivaids/) and online quizzes. HCWs, who met the same passing requirements as UW students by attending 80% lectures, completing ≥90% homework, and achieving a cumulative ≥70% grade on quizzes, were awarded a certificate. 369 HCWs at 33 sites in 21 countries joined the course in 2011, a >15-fold increase since the course was first offered in 2007. The majority of HCWs came from Africa (72%), and most were physicians (41%), nurses (22%), or midlevel practitioners (20%). 298 HCWs (81%) passed all requirements and earned a certificate. In a paired analysis of pre- and post-course HIV knowledge assessments, 56% of HCWs improved their post-course score (p<0.0001) with 27% improving by at least 30%. In the course evaluation, most HCWs rated the course as excellent (53%) or very good (39%).

Conclusions

This online HIV course demonstrated that opening a Western graduate medical and nursing curriculum to HCWs in resource-limited settings is feasible, popular, and valuable, and may address logistic and economic barriers to the provision of high quality education in these settings.",2012-12-20 +24139024,Enhanced XAO: the ontology of Xenopus anatomy and development underpins more accurate annotation of gene expression and queries on Xenbase.,"

Background

The African clawed frogs Xenopus laevis and Xenopus tropicalis are prominent animal model organisms. Xenopus research contributes to the understanding of genetic, developmental and molecular mechanisms underlying human disease. The Xenopus Anatomy Ontology (XAO) reflects the anatomy and embryological development of Xenopus. The XAO provides consistent terminology that can be applied to anatomical feature descriptions along with a set of relationships that indicate how each anatomical entity is related to others in the embryo, tadpole, or adult frog. The XAO is integral to the functionality of Xenbase (http://www.xenbase.org), the Xenopus model organism database.

Results

We significantly expanded the XAO in the last five years by adding 612 anatomical terms, 2934 relationships between them, 640 synonyms, and 547 ontology cross-references. Each term now has a definition, so database users and curators can be certain they are selecting the correct term when specifying an anatomical entity. With developmental timing information now asserted for every anatomical term, the ontology provides internal checks that ensure high-quality gene expression and phenotype data annotation. The XAO, now with 1313 defined anatomical and developmental stage terms, has been integrated with Xenbase expression and anatomy term searches and it enables links between various data types including images, clones, and publications. Improvements to the XAO structure and anatomical definitions have also enhanced cross-references to anatomy ontologies of other model organisms and humans, providing a bridge between Xenopus data and other vertebrates. The ontology is free and open to all users.

Conclusions

The expanded and improved XAO allows enhanced capture of Xenopus research data and aids mechanisms for performing complex retrieval and analysis of gene expression, phenotypes, and antibodies through text-matching and manual curation. Its comprehensive references to ontologies across taxa help integrate these data for human disease modeling.",2013-10-18 +21760913,HelmCoP: an online resource for helminth functional genomics and drug and vaccine targets prioritization.,"A vast majority of the burden from neglected tropical diseases result from helminth infections (nematodes and platyhelminthes). Parasitic helminthes infect over 2 billion, exerting a high collective burden that rivals high-mortality conditions such as AIDS or malaria, and cause devastation to crops and livestock. The challenges to improve control of parasitic helminth infections are multi-fold and no single category of approaches will meet them all. New information such as helminth genomics, functional genomics and proteomics coupled with innovative bioinformatic approaches provide fundamental molecular information about these parasites, accelerating both basic research as well as development of effective diagnostics, vaccines and new drugs. To facilitate such studies we have developed an online resource, HelmCoP (Helminth Control and Prevention), built by integrating functional, structural and comparative genomic data from plant, animal and human helminthes, to enable researchers to develop strategies for drug, vaccine and pesticide prioritization, while also providing a useful comparative genomics platform. HelmCoP encompasses genomic data from several hosts, including model organisms, along with a comprehensive suite of structural and functional annotations, to assist in comparative analyses and to study host-parasite interactions. The HelmCoP interface, with a sophisticated query engine as a backbone, allows users to search for multi-factorial combinations of properties and serves readily accessible information that will assist in the identification of various genes of interest. HelmCoP is publicly available at: http://www.nematode.net/helmcop.html.",2011-07-08 +28863604,Fitting pole-zero micromechanical models to cochlear response measurements.,"An efficient way of describing the linear micromechanical response of the cochlea is in terms of its poles and zeros. Pole-zero models with local scaling symmetry are derived for both one and two degree-of-freedom micromechanical systems. These elements are then used in a model of the coupled cochlea, which is optimised to minimise the mean square difference between its frequency response and that measured on the basilar membrane inside the mouse cochlea by Lee, Raphael, Xia, Kim, Grillet, Applegate, Ellerbee Bowden, and Oghalai [(2016) J. Neurosci. 36, 8160-8173] and Oghalai Lab [(2015). https://oghalailab.stanford.edu], at different excitation levels. A model with two degree-of-freedom micromechanics generally fits the measurements better than a model with single degree-of-freedom micromechanics, particularly at low excitations where the cochlea is active, except post-mortem conditions, when the cochlea is passive. The model with the best overall fit to the data is found to be one with two degree-of-freedom micromechanics and 3D fluid coupling. Although a unique lumped parameter network cannot be inferred from such a pole-zero description, these fitted results help indicate what properties such a network should have.",2017-08-01 +28493207,New Azulene-Type Sesquiterpenoids from the Fruiting Bodies of Lactarius deliciosus.,"In the 1H NMR-guided fractionation of extracts from the edible mushroom Lactarius deliciosus, two new azulene-type sesquiterpenoids, 7-isopropenyl-4-methyl-azulene-1-carboxylic acid (1) and 15-hydroxy-3,6-dihydrolactarazulene (2), together with seven known compounds were characterized. Their structures were determined on basis of spectroscopic evidence, as well as by comparing with literature data. Amongst the known metabolites, the 13C NMR assignment of 15-hydroxy-6,7-dihydrolactarazulene (3) is reported here for the first time. Moreover, 7-acetyl-4-methylazulene-1-carbaldehyde (5) displayed a moderate antibacterial activity against Staphylococcus aureus. *Digital image of L. deliciosus. Retrieved March 17, 2017 from https://upload.wikimedia.org/wikipedia/commons/e/e3/Lactarius_deliciosus_1_(1).jpg .",2017-05-11 +27153599,HilbertCurve: an R/Bioconductor package for high-resolution visualization of genomic data.,"

Unlabelled

: Hilbert curves enable high-resolution visualization of genomic data on a chromosome- or genome-wide scale. Here we present the HilbertCurve package that provides an easy-to-use interface for mapping genomic data to Hilbert curves. The package transforms the curve as a virtual axis, thereby hiding the details of the curve construction from the user. HilbertCurve supports multiple-layer overlay that makes it a powerful tool to correlate the spatial distribution of multiple feature types.

Availability and implementation

The HilbertCurve package and documentation are freely available from the Bioconductor project: http://www.bioconductor.org/packages/devel/bioc/html/HilbertCurve.html

Contact

m.schlesner@dkfz.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-24 +24259429,Database resources of the National Center for Biotechnology Information.,"In addition to maintaining the GenBank nucleic acid sequence database, the National Center for Biotechnology Information (NCBI, http://www.ncbi.nlm.nih.gov) provides analysis and retrieval resources for the data in GenBank and other biological data made available through the NCBI Web site. NCBI resources include Entrez, the Entrez Programming Utilities, MyNCBI, PubMed, PubMed Central, PubReader, Gene, the NCBI Taxonomy Browser, BLAST, BLAST Link, Primer-BLAST, COBALT, RefSeq, UniGene, HomoloGene, ProtEST, dbMHC, dbSNP, dbVar, Epigenomics, the Genetic Testing Registry, Genome and related tools, the Map Viewer, Trace Archive, Sequence Read Archive, BioProject, BioSample, ClinVar, MedGen, HIV-1/Human Protein Interaction Database, Gene Expression Omnibus, Probe, Online Mendelian Inheritance in Animals, the Molecular Modeling Database, the Conserved Domain Database, the Conserved Domain Architecture Retrieval Tool, Biosystems, Protein Clusters and the PubChem suite of small molecule databases. Augmenting many of the Web applications are custom implementations of the BLAST program optimized to search specialized data sets. All these resources can be accessed through the NCBI home page.",2013-11-19 +23619930,MRIdb: medical image management for biobank research.,"Clinical picture archiving and communications systems provide convenient, efficient access to digital medical images from multiple modalities but can prove challenging to deploy, configure and use. MRIdb is a self-contained image database, particularly suited to the storage and management of magnetic resonance imaging data sets for population phenotyping. It integrates a mature image archival system with an intuitive web-based user interface that provides visualisation and export functionality. In addition, utilities for auditing, data migration and system monitoring are included in a virtual machine image that is easily deployed with minimal configuration. The result is a freely available turnkey solution, designed to support epidemiological and imaging genetics research. It allows the management of patient data sets in a secure, scalable manner without requiring the installation of any bespoke software on end users' workstations. MRIdb is an open-source software, available for download at http://www3.imperial.ac.uk/bioinfsupport/resources/software/mridb .",2013-10-01 +28826097,The prognostic landscape of tumor-infiltrating immune cell and immunomodulators in lung cancer.,"Tumor-infiltrating immune cells are closely associated with clinical outcome. However, immunohistochemistry-based analysis of tumor infiltrates can be misleading as the representative marker of an immune subpopulation might be expressed in other cell types. In this study, based on a metagene approach (known as CIBERSORT) and an online databse, The Cancer Immunome Atlas (https://tcia.at/), we comprehensively analyzed the tumor-infiltrating immune cells present in lung adenocarcinoma (LUAD) and lung squamous cell carcinoma (LUSC). A total of 22 types of both adaptive and innate tumor-infiltrating immune cells were evaluated in LUAD (n=492) and LUSC (n=488). As a result, tumors lacking memory B cells or with increased number of M0 macrophages were associated with the poor prognosis in LUAD at early clinical stage. In LUSC, T follicular helper cells were associated with favorable outcome, while increased number of neutrophils predicted a poor outcome. Moreover, Kaplan-Meier analysis of the prognostic value of immune checkpoint molecules revealed that expression of ICOS was positively correlated the clinical outcome of patients with LUAD. Collectively, our data suggest that tumor-infiltrating immune cells in lung cancer are likely to be important determinants of both prognosis and response to immunotherapies.",2017-08-18 +27575920,Trend analysis of performance parameters of pre-packed columns for protein chromatography over a time span of ten years.,"Pre-packed small scale chromatography columns are increasingly used for process development, for determination of design space in bioprocess development, and for post-licence process verifications. The packing quality of 30,000 pre-packed columns delivered to customers over a period 10 years has been analyzed by advanced statistical tools. First, the data were extracted and checked for inconsistencies, and then were tabulated and made ready for statistical processing using the programming language Perl (https://www.perl.org/) and the statistical computing environment R (https://www.r-project.org/). Reduced HETP and asymmetry were plotted over time to obtain a trend of packing quality over 10 years. The obtained data were used as a visualized coefficient of variation analysis (VCVA), a process that has often been applied in other industries such as semiconductor manufacturing. A typical fluctuation of reduced HETP was seen. A Tsunami effect in manufacturing, the effect of propagation of manufacturing deviations leading to out-of-specification products, was not observed with these pre-packed columns. Principal component analysis (PCA) showed that all packing materials cluster. Our data analysis showed that the current commercially available chromatography media used for biopharmaceutical manufacturing can be reproducibly and uniformly packed in polymer-based chromatography columns, which are designed for ready-to-use purposes. Although the number of packed columns has quadrupled over one decade the packing quality has remained stable.",2016-07-21 +26656615,Empirical likelihood tests for nonparametric detection of differential expression from RNA-seq data.,"The availability of large quantities of transcriptomic data in the form of RNA-seq count data has necessitated the development of methods to identify genes differentially expressed between experimental conditions. Many existing approaches apply a parametric model of gene expression and so place strong assumptions on the distribution of the data. Here we explore an alternate nonparametric approach that applies an empirical likelihood framework, allowing us to define likelihoods without specifying a parametric model of the data. We demonstrate the performance of our method when applied to gold standard datasets, and to existing experimental data. Our approach outperforms or closely matches performance of existing methods in the literature, and requires modest computational resources. An R package, EmpDiff implementing the methods described in the paper is available from: http://homepages.inf.ed.ac.uk/tthorne/software/packages/EmpDiff_0.99.tar.gz.",2015-12-01 +25118225,The PROMIS smoking assessment toolkit--background and introduction to supplement.,"

Introduction

The PROMIS Smoking Initiative has developed an assessment toolkit for measuring 6 domains of interest to cigarette smoking research: nicotine dependence, coping expectancies, emotional and sensory expectancies, health expectancies, psychosocial expectancies, and social motivations for smoking. The papers in this supplement describe the methods used to develop these item banks, their psychometric properties, and the preliminary evidence for their validity. This commentary is meant to provide background information for the material in this supplement.

Methods

After discussing the use of item response theory in behavioral measurement, I will briefly review the initial developmental steps for the smoking assessment toolkit. Finally, I will describe the contents of this supplement and provide some closing remarks.

Results

Psychometric evidence strongly supports the utility of the toolkit of item banks, short forms (SFs), and computer adaptive tests (CATs). The item banks for daily smokers produce scores with reliability estimates above 0.90 for a wide range of each cigarette smoking domain continuum, and SF and CAT administrations also achieve high reliability (generally greater than 0.85) using very few items (4-7 items for most banks). Performance of the banks for nondaily smokers is similar. Preliminary evidence supports the concurrent and the discriminant validity of the bank domains.

Conclusions

The new smoking assessment toolkit has attractive measurement features that are likely to benefit smoking research as researchers begin to utilize this resource. Information about the toolkit and access to the assessments is available at the project Web site (http://www.rand.org/health/projects/promis-smoking-initiative.html) and can also be accessed via the PROMIS Assessment Center (www.assessmentcenter.net).",2014-09-01 +29517817,Magnetic susceptibility increases as diamagnetic molecules breakdown: Myelin digestion during multiple sclerosis lesion formation contributes to increase on QSM.,"

Background

The pathological processes in the first weeks of multiple sclerosis (MS) lesion formation include myelin digestion that breaks chemical bonds in myelin lipid layers. This can increase lesion magnetic susceptibility, which is a potentially useful biomarker in MS patient management, but not yet investigated.

Purpose

To understand and quantify the effects of myelin digestion on quantitative susceptibility mapping (QSM) of MS lesions.

Study type

Histological and QSM analyses on in vitro models of myelin breakdown and MS lesion formation in vivo.

Population/specimens

Acutely demyelinating white matter lesions from MS autopsy tissue were stained with the lipid dye oil red O. Myelin basic protein (MBP), a major membrane protein of myelin, was digested with trypsin. Purified human myelin was denatured with sodium dodecyl sulfate (SDS). QSM was performed on phantoms containing digestion products and untreated controls. In vivo QSM was performed on five MS patients with newly enhancing lesions, and then repeated within 2 weeks.

Field strength/sequence

3D T 2 * -weighted spoiled multiecho gradient echo scans performed at 3T.

Assessment

Region of interest analyses were performed by a biochemist and a neuroradiologist to determine susceptibility changes on in vitro and in vivo QSM images.

Statistical tests

Not applicable.

Results

MBP degradation by trypsin increased the QSM measurement by an average of 112 ± 37 ppb, in excellent agreement with a theoretical estimate of 111 ppb. Degradation of human myelin by SDS increased the QSM measurement by 23 ppb. As MS lesions changed from gadolinium enhancing to nonenhancing over an average of 15.8 ± 3.7 days, their susceptibility increased by an average of 7.5 ± 6.3 ppb.

Data conclusion

Myelin digestion in the early stages of MS lesion formation contributes to an increase in tissue susceptibility, detectable by QSM, as a lesion evolves from gadolinium enhancing to nonenhancing.

Level of evidence

1 Technical Efficacy: Stage 3 J. Magn. Reson. Imaging 2018;47:1281-1287.",2018-03-08 +24288368,Ribosomal Database Project: data and tools for high throughput rRNA analysis.,"Ribosomal Database Project (RDP; http://rdp.cme.msu.edu/) provides the research community with aligned and annotated rRNA gene sequence data, along with tools to allow researchers to analyze their own rRNA gene sequences in the RDP framework. RDP data and tools are utilized in fields as diverse as human health, microbial ecology, environmental microbiology, nucleic acid chemistry, taxonomy and phylogenetics. In addition to aligned and annotated collections of bacterial and archaeal small subunit rRNA genes, RDP now includes a collection of fungal large subunit rRNA genes. RDP tools, including Classifier and Aligner, have been updated to work with this new fungal collection. The use of high-throughput sequencing to characterize environmental microbial populations has exploded in the past several years, and as sequence technologies have improved, the sizes of environmental datasets have increased. With release 11, RDP is providing an expanded set of tools to facilitate analysis of high-throughput data, including both single-stranded and paired-end reads. In addition, most tools are now available as open source packages for download and local use by researchers with high-volume needs or who would like to develop custom analysis pipelines.",2013-11-27 +24124417,MOBBED: a computational data infrastructure for handling large collections of event-rich time series datasets in MATLAB.,"Experiments to monitor human brain activity during active behavior record a variety of modalities (e.g., EEG, eye tracking, motion capture, respiration monitoring) and capture a complex environmental context leading to large, event-rich time series datasets. The considerable variability of responses within and among subjects in more realistic behavioral scenarios requires experiments to assess many more subjects over longer periods of time. This explosion of data requires better computational infrastructure to more systematically explore and process these collections. MOBBED is a lightweight, easy-to-use, extensible toolkit that allows users to incorporate a computational database into their normal MATLAB workflow. Although capable of storing quite general types of annotated data, MOBBED is particularly oriented to multichannel time series such as EEG that have event streams overlaid with sensor data. MOBBED directly supports access to individual events, data frames, and time-stamped feature vectors, allowing users to ask questions such as what types of events or features co-occur under various experimental conditions. A database provides several advantages not available to users who process one dataset at a time from the local file system. In addition to archiving primary data in a central place to save space and avoid inconsistencies, such a database allows users to manage, search, and retrieve events across multiple datasets without reading the entire dataset. The database also provides infrastructure for handling more complex event patterns that include environmental and contextual conditions. The database can also be used as a cache for expensive intermediate results that are reused in such activities as cross-validation of machine learning algorithms. MOBBED is implemented over PostgreSQL, a widely used open source database, and is freely available under the GNU general public license at http://visual.cs.utsa.edu/mobbed. Source and issue reports for MOBBED are maintained at http://vislab.github.com/MobbedMatlab/",2013-10-10 +25378313,An update on LNCipedia: a database for annotated human lncRNA sequences.,"The human genome is pervasively transcribed, producing thousands of non-coding RNA transcripts. The majority of these transcripts are long non-coding RNAs (lncRNAs) and novel lncRNA genes are being identified at rapid pace. To streamline these efforts, we created LNCipedia, an online repository of lncRNA transcripts and annotation. Here, we present LNCipedia 3.0 (http://www.lncipedia.org), the latest version of the publicly available human lncRNA database. Compared to the previous version of LNCipedia, the database grew over five times in size, gaining over 90,000 new lncRNA transcripts. Assessment of the protein-coding potential of LNCipedia entries is improved with state-of-the art methods that include large-scale reprocessing of publicly available proteomics data. As a result, a high-confidence set of lncRNA transcripts with low coding potential is defined and made available for download. In addition, a tool to assess lncRNA gene conservation between human, mouse and zebrafish has been implemented.",2014-11-05 +27153657,Spectral identification of topological domains.,"

Motivation

Topological domains have been proposed as the backbone of interphase chromosome structure. They are regions of high local contact frequency separated by sharp boundaries. Genes within a domain often have correlated transcription. In this paper, we present a computational efficient spectral algorithm to identify topological domains from chromosome conformation data (Hi-C data). We consider the genome as a weighted graph with vertices defined by loci on a chromosome and the edge weights given by interaction frequency between two loci. Laplacian-based graph segmentation is then applied iteratively to obtain the domains at the given compactness level. Comparison with algorithms in the literature shows the advantage of the proposed strategy.

Results

An efficient algorithm is presented to identify topological domains from the Hi-C matrix.

Availability and implementation

The Matlab source code and illustrative examples are available at http://bionetworks.ccmb.med.umich.edu/

Contact

: indikar@med.umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-05 +28166858,Genetic diversity of Mycobacterium avium complex strains isolated in Argentina by MIRU-VNTR.,"Mycobacterium avium sp. avium (MAA), M. avium sp. hominissuis (MAH), and M. avium sp. paratuberculosis (MAP) are the main members of the M. avium complex (MAC) causing diseases in several hosts. The aim of this study was to describe the genetic diversity of MAC isolated from different hosts. Twenty-six MAH and 61 MAP isolates were recovered from humans and cattle, respectively. GenoType CM® and IS1311-PCR were used to identify Mycobacterium species. The IS901-PCR was used to differentiate between MAH and MAA, while IS900-PCR was used to identify MAP. Genotyping was performed using a mycobacterial interspersed repetitive-unit-variable-number tandem-repeat (MIRU-VNTR) scheme (loci: 292, X3, 25, 47, 3, 7, 10, 32) and patterns (INMV) were assigned according to the MAC-INMV database (http://mac-inmv.tours.inra.fr/). Twenty-two (22/26, 84·6%) MAH isolates were genotyped and 16 were grouped into the following, INMV 92, INMV 121, INMV 97, INMV 103, INMV 50, and INMV 40. The loci X3 and 25 showed the largest diversity (D: 0·5844), and the global discriminatory index (Hunter and Gaston discriminatory index, HGDI) was 0·9300. MAP (100%) isolates were grouped into INMV 1, INMV 2, INMV 11, INMV 8, and INMV 5. The HGDI was 0·6984 and loci 292 and 7 had the largest D (0·6980 and 0·5050). MAH presented a higher D when compared with MAP. The MIRU-VNTR was a useful tool to describe the genetic diversity of both MAH and MAP as well as to identify six new MAH patterns that were conveniently reported to the MAC-INMV database. It was also demonstrated that, in the geographical region studied, human MAC cases were produced by MAH as there was no MAA found among the human clinical samples.",2017-02-07 +27342648,TRaCE+: Ensemble inference of gene regulatory networks from transcriptional expression profiles of gene knock-out experiments.,"

Background

The inference of gene regulatory networks (GRNs) from transcriptional expression profiles is challenging, predominantly due to its underdetermined nature. One important consequence of underdetermination is the existence of many possible solutions to this inference. Our previously proposed ensemble inference algorithm TRaCE addressed this issue by inferring an ensemble of network directed graphs (digraphs) using differential gene expressions from gene knock-out (KO) experiments. However, TRaCE could not deal with the mode of the transcriptional regulations (activation or repression), an important feature of GRNs.

Results

In this work, we developed a new algorithm called TRaCE+ for the inference of an ensemble of signed GRN digraphs from transcriptional expression data of gene KO experiments. The sign of the edges indicates whether the regulation is an activation (positive) or a repression (negative). TRaCE+ generates the upper and lower bounds of the ensemble, which define uncertain regulatory interactions that could not be verified by the data. As demonstrated in the case studies using Escherichia coli GRN and 100-gene gold-standard GRNs from DREAM 4 network inference challenge, by accounting for regulatory signs, TRaCE+ could extract more information from the KO data than TRaCE, leading to fewer uncertain edges. Importantly, iterating TRaCE+ with an optimal design of gene KOs could resolve the underdetermined issue of GRN inference in much fewer KO experiments than using TRaCE.

Conclusions

TRaCE+ expands the applications of ensemble GRN inference strategy by accounting for the mode of the gene regulatory interactions. In comparison to TRaCE, TRaCE+ enables a better utilization of gene KO data, thereby reducing the cost of tackling underdetermined GRN inference. TRaCE+ subroutines for MATLAB are freely available at the following website: http://www.cabsel.ethz.ch/tools/trace.html .",2016-06-24 +24217911,AgeFactDB--the JenAge Ageing Factor Database--towards data integration in ageing research.,"AgeFactDB (http://agefactdb.jenage.de) is a database aimed at the collection and integration of ageing phenotype data including lifespan information. Ageing factors are considered to be genes, chemical compounds or other factors such as dietary restriction, whose action results in a changed lifespan or another ageing phenotype. Any information related to the effects of ageing factors is called an observation and is presented on observation pages. To provide concise access to the complete information for a particular ageing factor, corresponding observations are also summarized on ageing factor pages. In a first step, ageing-related data were primarily taken from existing databases such as the Ageing Gene Database--GenAge, the Lifespan Observations Database and the Dietary Restriction Gene Database--GenDR. In addition, we have started to include new ageing-related information. Based on homology data taken from the HomoloGene Database, AgeFactDB also provides observation and ageing factor pages of genes that are homologous to known ageing-related genes. These homologues are considered as candidate or putative ageing-related genes. AgeFactDB offers a variety of search and browse options, and also allows the download of ageing factor or observation lists in TSV, CSV and XML formats.",2013-11-11 +27525867,IFGFA: Identification of featured genes from genomic data using factor analysis. ,"In this study, a software tool (IFGFA) for identification of featured genes from gene expression data based on latent factor analysis was developed. Despite the availability of computational methods and statistical models appropriate for analyzing special genomic data, IFGFA provides a platform for predicting colon cancer-related genes and can be applied to other cancer types. The computational framework behind IFGFA is based on the well-established Bayesian factor and regression model and prior knowledge about the gene from OMIM. We validated the predicted genes by analyzing somatic mutations in patients. An interface was developed to enable users to run the computational framework efficiently through visual programming. IFGFA is executable in a Windows system and does not require other dependent software packages. This program can be freely downloaded at http://www.fupage.org/downloads/ifgfa.zip.",2016-07-25 +25753703,"Bacterial, plant, and fungal carbohydrate structure databases: daily usage.","Natural carbohydrates play important roles in living systems and therefore are used as diagnostic and therapeutic targets. The main goal of glycomics is systematization of carbohydrates and elucidation of their role in human health and disease. The amount of information on natural carbohydrates accumulates rapidly, but scientists still lack databases and computer-assisted tools needed for orientation in the glycomic information space. Therefore, freely available, regularly updated, and cross-linked databases are demanded. Bacterial Carbohydrate Structure Database (Bacterial CSDB) was developed for provision of structural, bibliographic, taxonomic, NMR spectroscopic, and other related information on bacterial and archaeal carbohydrate structures. Its main features are (1) coverage above 90%, (2) high data consistence (above 90% of error-free records), and (3) presence of manually verified bibliographic, NMR spectroscopic, and taxonomic annotations. Recently, CSDB has been expanded to cover carbohydrates of plant and fungal origin. The achievement of full coverage in the plant and fungal domains is expected in the future. CSDB is freely available on the Internet as a web service at http://csdb.glycoscience.ru. This chapter aims at showing how to use CSDB in your daily scientific practice.",2015-01-01 +29295714,"fDETECT webserver: fast predictor of propensity for protein production, purification, and crystallization.","

Background

Development of predictors of propensity of protein sequences for successful crystallization has been actively pursued for over a decade. A few novel methods that expanded the scope of these predictions to address additional steps of protein production and structure determination pipelines were released in recent years. The predictive performance of the current methods is modest. This is because the only input that they use is the protein sequence and since the experimental annotations of these data might be inconsistent given that they were collected across many laboratories and centers. However, even these modest levels of predictive quality are still practical compared to the reported low success rates of crystallization, which are below 10%. We focus on another important aspect related to a high computational cost of running the predictors that offer the expanded scope.

Results

We introduce a novel fDETECT webserver that provides very fast and modestly accurate predictions of the success of protein production, purification, crystallization, and structure determination. Empirical tests on two datasets demonstrate that fDETECT is more accurate than the only other similarly fast method, and similarly accurate and three orders of magnitude faster than the currently most accurate predictors. Our method predicts a single protein in about 120 milliseconds and needs less than an hour to generate the four predictions for an entire human proteome. Moreover, we empirically show that fDETECT secures similar levels of predictive performance when compared with four representative methods that only predict success of crystallization, while it also provides the other three predictions. A webserver that implements fDETECT is available at http://biomine.cs.vcu.edu/servers/fDETECT/ .

Conclusions

fDETECT is a computational tool that supports target selection for protein production and X-ray crystallography-based structure determination. It offers predictive quality that matches or exceeds other state-of-the-art tools and is especially suitable for the analysis of large protein sets.",2018-01-03 +24330140,NEMATIC: a simple and versatile tool for the in silico analysis of plant-nematode interactions.,"Novel approaches for the control of agriculturally damaging nematodes are sorely needed. Endoparasitic nematodes complete their life cycle within the root vascular cylinder, inducing specialized feeding cells: giant cells for root-knot nematodes and syncytia for cyst nematodes. Both nematodes hijack parts of the transduction cascades involved in developmental processes, or partially mimic the plant responses to other interactions with microorganisms, but molecular evidence of their differences and commonalities is still under investigation. Transcriptomics has been used to describe global expression profiles of their interaction with Arabidopsis, generating vast lists of differentially expressed genes. Although these results are available in public databases and publications, the information is scattered and difficult to handle. Here, we present a rapid, visual, user-friendly and easy to handle spreadsheet tool, called NEMATIC (NEMatode-Arabidopsis Transcriptomic Interaction Compendium; http://www.uclm.es/grupo/gbbmp/english/nematic.asp). It combines existing transcriptomic data for the interaction between Arabidopsis and plant-endoparasitic nematodes with data from different transcriptomic analyses regarding hormone and cell cycle regulation, development, different plant tissues, cell types and various biotic stresses. NEMATIC facilitates efficient in silico studies on plant-nematode biology, allowing rapid cross-comparisons with complex datasets and obtaining customized gene selections through sequential comparative and filtering steps. It includes gene functional classification and links to utilities from several databases. This data-mining spreadsheet will be valuable for the understanding of the molecular bases subjacent to feeding site formation by comparison with other plant systems, and for the selection of genes as potential tools for biotechnological control of nematodes, as demonstrated in the experimentally confirmed examples provided.",2014-02-12 +26559505,Identification of differentially methylated loci using wavelet-based functional mixed models.,"

Motivation

DNA methylation is a key epigenetic modification that can modulate gene expression. Over the past decade, a lot of studies have focused on profiling DNA methylation and investigating its alterations in complex diseases such as cancer. While early studies were mostly restricted to CpG islands or promoter regions, recent findings indicate that many of important DNA methylation changes can occur in other regions and DNA methylation needs to be examined on a genome-wide scale. In this article, we apply the wavelet-based functional mixed model methodology to analyze the high-throughput methylation data for identifying differentially methylated loci across the genome. Contrary to many commonly-used methods that model probes independently, this framework accommodates spatial correlations across the genome through basis function modeling as well as correlations between samples through functional random effects, which allows it to be applied to many different settings and potentially leads to more power in detection of differential methylation.

Results

We applied this framework to three different high-dimensional methylation data sets (CpG Shore data, THREE data and NIH Roadmap Epigenomics data), studied previously in other works. A simulation study based on CpG Shore data suggested that in terms of detection of differentially methylated loci, this modeling approach using wavelets outperforms analogous approaches modeling the loci as independent. For the THREE data, the method suggests newly detected regions of differential methylation, which were not reported in the original study.

Availability and implementation

Automated software called WFMM is available at https://biostatistics.mdanderson.org/SoftwareDownload CpG Shore data is available at http://rafalab.dfci.harvard.edu NIH Roadmap Epigenomics data is available at http://compbio.mit.edu/roadmap

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

jefmorris@mdanderson.org.",2015-11-11 +26425931,Child Abuse Mimic: Avulsion Injury in a Child With Penoscrotal Webbing.,"Sexual abuse of children is prevalent in today's society. In 2012, approximately 686,000 children (9.2 per 1000) in the United States were determined to be victims of substantiated child abuse and neglect, according to national data compiled by child protective service agencies; victimization rates were highest for children younger than 1 year. Nearly 9.3% of maltreated children were victims of sexual abuse, this finding was reported by US Department of Health and Human Services (http://www.acf.hhs.gov/programs/cb/research-data-technology/statistics-research/child-maltreatment). Previous research has shown that as many as 1 in 3 girls and 1 in 7 boys will be sexually abused during childhood (Child Abuse Negl. 2003;27:1205-1222). Although sexual abuse seems to be less common in boys than girls, this may be partly due to underdiagnosis and underreporting of sexual abuse in boys (Arch Dis Child. 2007;92:328-331). Clinicians should therefore consider the possibility of sexual abuse when boys present with genital injuries, because failing to recognize and diagnose sexual abuse can pose an ongoing safety risk to a child. However, an erroneous diagnosis of sexual abuse can have equally hazardous repercussions, including removal of a child from their caregivers or prosecution of an innocent individual. A number of medical conditions can mimic child sexual abuse injuries, including anal fissures, failure of midline fusion, perianal streptococcal dermatitis, and straddle injury (J Pediatr Health Care. 2009;23:283-288 and Acta Paediatr. 2011;100:590-593). The following case involves a 5-week-old male infant who presented to the pediatric emergency department with an avulsion injury to his penis concerning for sexual abuse. He was ultimately diagnosed with a relatively rare anatomic variant of the genitalia and determined to have sustained an accidental injury whose appearance mimicked abuse.",2017-04-01 +26873931,Privacy-preserving microbiome analysis using secure computation.,"

Motivation

Developing targeted therapeutics and identifying biomarkers relies on large amounts of research participant data. Beyond human DNA, scientists now investigate the DNA of micro-organisms inhabiting the human body. Recent work shows that an individual's collection of microbial DNA consistently identifies that person and could be used to link a real-world identity to a sensitive attribute in a research dataset. Unfortunately, the current suite of DNA-specific privacy-preserving analysis tools does not meet the requirements for microbiome sequencing studies.

Results

To address privacy concerns around microbiome sequencing, we implement metagenomic analyses using secure computation. Our implementation allows comparative analysis over combined data without revealing the feature counts for any individual sample. We focus on three analyses and perform an evaluation on datasets currently used by the microbiome research community. We use our implementation to simulate sharing data between four policy-domains. Additionally, we describe an application of our implementation for patients to combine data that allows drug developers to query against and compensate patients for the analysis.

Availability and implementation

The software is freely available for download at: http://cbcb.umd.edu/∼hcorrada/projects/secureseq.html

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

hcorrada@umiacs.umd.edu.",2016-02-11 +25413576,Tree shrew database (TreeshrewDB): a genomic knowledge base for the Chinese tree shrew.,"The tree shrew (Tupaia belangeri) is a small mammal with a close relationship to primates and it has been proposed as an alternative experimental animal to primates in biomedical research. The recent release of a high-quality Chinese tree shrew genome enables more researchers to use this species as the model animal in their studies. With the aim to making the access to an extensively annotated genome database straightforward and easy, we have created the Tree shrew Database (TreeshrewDB). This is a web-based platform that integrates the currently available data from the tree shrew genome, including an updated gene set, with a systematic functional annotation and a mRNA expression pattern. In addition, to assist with automatic gene sequence analysis, we have integrated the common programs Blast, Muscle, GBrowse, GeneWise and codeml, into TreeshrewDB. We have also developed a pipeline for the analysis of positive selection. The user-friendly interface of TreeshrewDB, which is available at http://www.treeshrewdb.org, will undoubtedly help in many areas of biological research into the tree shrew.",2014-11-21 +28912564,Transcriptome analysis of developing lens reveals abundance of novel transcripts and extensive splicing alterations.,"Lens development involves a complex and highly orchestrated regulatory program. Here, we investigate the transcriptomic alterations and splicing events during mouse lens formation using RNA-seq data from multiple developmental stages, and construct a molecular portrait of known and novel transcripts. We show that the extent of novelty of expressed transcripts decreases significantly in post-natal lens compared to embryonic stages. Characterization of novel transcripts into partially novel transcripts (PNTs) and completely novel transcripts (CNTs) (novelty score ≥ 70%) revealed that the PNTs are both highly conserved across vertebrates and highly expressed across multiple stages. Functional analysis of PNTs revealed their widespread role in lens developmental processes while hundreds of CNTs were found to be widely expressed and predicted to encode for proteins. We verified the expression of four CNTs across stages. Examination of splice isoforms revealed skipped exon and retained intron to be the most abundant alternative splicing events during lens development. We validated by RT-PCR and Sanger sequencing, the predicted splice isoforms of several genes Banf1, Cdk4, Cryaa, Eif4g2, Pax6, and Rbm5. Finally, we present a splicing browser Eye Splicer ( http://www.iupui.edu/~sysbio/eye-splicer/ ), to facilitate exploration of developmentally altered splicing events and to improve understanding of post-transcriptional regulatory networks during mouse lens development.",2017-09-14 +26692900,Network construction and structure detection with metagenomic count data.,"

Background

The human microbiome plays a critical role in human health. Massive amounts of metagenomic data have been generated with advances in next-generation sequencing technologies that characterize microbial communities via direct isolation and sequencing. How to extract, analyze, and transform these vast amounts of data into useful knowledge is a great challenge to bioinformaticians. Microbial biodiversity research has focused primarily on taxa composition and abundance and less on the co-occurrences among different taxa. However, taxa co-occurrences and their relationships to environmental and clinical conditions are important because network structure may help to understand how microbial taxa function together.

Results

We propose a systematic robust approach for bacteria network construction and structure detection using metagenomic count data. Pairwise similarity/distance measures between taxa are proposed by adapting distance measures for samples in ecology. We also extend the sparse inverse covariance approach to a sparse inverse of a similarity matrix from count data for network construction. Our approach is efficient for large metagenomic count data with thousands of bacterial taxa. We evaluate our method with real and simulated data. Our method identifies true and biologically significant network structures efficiently.

Conclusions

Network analysis is crucial for detecting subnetwork structures with metagenomic count data. We developed a software tool in MATLAB for network construction and biologically significant module detection. Software MetaNet can be downloaded from http://biostatistics.csmc.edu/MetaNet/.",2015-12-12 +24467687,Non-synonymous variations in cancer and their effects on the human proteome: workflow for NGS data biocuration and proteome-wide analysis of TCGA data.,"

Background

Next-generation sequencing (NGS) technologies have resulted in petabytes of scattered data, decentralized in archives, databases and sometimes in isolated hard-disks which are inaccessible for browsing and analysis. It is expected that curated secondary databases will help organize some of this Big Data thereby allowing users better navigate, search and compute on it.

Results

To address the above challenge, we have implemented a NGS biocuration workflow and are analyzing short read sequences and associated metadata from cancer patients to better understand the human variome. Curation of variation and other related information from control (normal tissue) and case (tumor) samples will provide comprehensive background information that can be used in genomic medicine research and application studies. Our approach includes a CloudBioLinux Virtual Machine which is used upstream of an integrated High-performance Integrated Virtual Environment (HIVE) that encapsulates Curated Short Read archive (CSR) and a proteome-wide variation effect analysis tool (SNVDis). As a proof-of-concept, we have curated and analyzed control and case breast cancer datasets from the NCI cancer genomics program - The Cancer Genome Atlas (TCGA). Our efforts include reviewing and recording in CSR available clinical information on patients, mapping of the reads to the reference followed by identification of non-synonymous Single Nucleotide Variations (nsSNVs) and integrating the data with tools that allow analysis of effect nsSNVs on the human proteome. Furthermore, we have also developed a novel phylogenetic analysis algorithm that uses SNV positions and can be used to classify the patient population. The workflow described here lays the foundation for analysis of short read sequence data to identify rare and novel SNVs that are not present in dbSNP and therefore provides a more comprehensive understanding of the human variome. Variation results for single genes as well as the entire study are available from the CSR website (http://hive.biochemistry.gwu.edu/dna.cgi?cmd=csr).

Conclusions

Availability of thousands of sequenced samples from patients provides a rich repository of sequence information that can be utilized to identify individual level SNVs and their effect on the human proteome beyond what the dbSNP database provides.",2014-01-27 +25414341,The IPD and IMGT/HLA database: allele variant databases.,"The Immuno Polymorphism Database (IPD) was developed to provide a centralized system for the study of polymorphism in genes of the immune system. Through the IPD project we have established a central platform for the curation and publication of locus-specific databases involved either directly or related to the function of the Major Histocompatibility Complex in a number of different species. We have collaborated with specialist groups or nomenclature committees that curate the individual sections before they are submitted to IPD for online publication. IPD consists of five core databases, with the IMGT/HLA Database as the primary database. Through the work of the various nomenclature committees, the HLA Informatics Group and in collaboration with the European Bioinformatics Institute we are able to provide public access to this data through the website http://www.ebi.ac.uk/ipd/. The IPD project continues to develop with new tools being added to address scientific developments, such as Next Generation Sequencing, and to address user feedback and requests. Regular updates to the website ensure that new and confirmatory sequences are dispersed to the immunogenetics community, and the wider research and clinical communities.",2014-11-20 +28160064,Minimally invasive transforaminal lumbar interbody fusion versus open transforaminal lumbar interbody fusion: a technical description and review of the literature.,"

Background

Minimally invasive spine surgery (MISS) has been increasingly advocated during the last decade with new studies being reported every year. Minimally invasive spine procedures, such as minimally invasive transforaminal interbody fusion (MI-TLIF), have been introduced to reduce approach-related muscle trauma, to minimise blood loss, and to achieve faster wound healing, quicker ambulation and earlier patient discharge.

Methods

The aim of this article was to give a comprehensive review of the available English literature comparing open TLIF with MI-TLIF techniques published or available online between 1990 and 2014 as identified by an electronic database search on http://www.ncbi.nlm.nih.gov/pubmed . Fourteen relevant studies comparing MI-TLIF and open TLIF cohorts could be identified.

Results and conclusion

MI-TLIF seems to be a valid alternative to open TLIF. Both methods yield good clinical results with similar improvements of Oswestry Disability Index (ODI) and visual analogue scale (VAS) on follow-up. There seems to be no significant differences in clinical outcome and fusion rates on comparison. These results are consistent throughout all reported studies in this review. The most pronounced benefits of MI-TLIF are a significant reduction of blood loss, shorter lengths of hospital stay (LOHS) and lower surgical site infection rates. On the downside, MI-TLIF seems to be associated with significantly higher intraoperative radiation doses, a shallow learning curve, at least in the beginning, longer operating times and potentially more frequent implant failures/cage displacements and revision surgeries.",2017-02-03 +23402499,Open source tools for management and archiving of digital microscopy data to allow integration with patient pathology and treatment information.,"

Background

Virtual microscopy includes digitisation of histology slides and the use of computer technologies for complex investigation of diseases such as cancer. However, automated image analysis, or website publishing of such digital images, is hampered by their large file sizes.

Results

We have developed two Java based open source tools: Snapshot Creator and NDPI-Splitter. Snapshot Creator converts a portion of a large digital slide into a desired quality JPEG image. The image is linked to the patient's clinical and treatment information in a customised open source cancer data management software (Caisis) in use at the Australian Breast Cancer Tissue Bank (ABCTB) and then published on the ABCTB website (http://www.abctb.org.au) using Deep Zoom open source technology. Using the ABCTB online search engine, digital images can be searched by defining various criteria such as cancer type, or biomarkers expressed. NDPI-Splitter splits a large image file into smaller sections of TIFF images so that they can be easily analysed by image analysis software such as Metamorph or Matlab. NDPI-Splitter also has the capacity to filter out empty images.

Conclusions

Snapshot Creator and NDPI-Splitter are novel open source Java tools. They convert digital slides into files of smaller size for further processing. In conjunction with other open source tools such as Deep Zoom and Caisis, this suite of tools is used for the management and archiving of digital microscopy images, enabling digitised images to be explored and zoomed online. Our online image repository also has the capacity to be used as a teaching resource. These tools also enable large files to be sectioned for image analysis.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/5330903258483934.",2013-02-12 +27185892,3D-GNOME: an integrated web service for structural modeling of the 3D genome.,"Recent advances in high-throughput chromosome conformation capture (3C) technology, such as Hi-C and ChIA-PET, have demonstrated the importance of 3D genome organization in development, cell differentiation and transcriptional regulation. There is now a widespread need for computational tools to generate and analyze 3D structural models from 3C data. Here we introduce our 3D GeNOme Modeling Engine (3D-GNOME), a web service which generates 3D structures from 3C data and provides tools to visually inspect and annotate the resulting structures, in addition to a variety of statistical plots and heatmaps which characterize the selected genomic region. Users submit a bedpe (paired-end BED format) file containing the locations and strengths of long range contact points, and 3D-GNOME simulates the structure and provides a convenient user interface for further analysis. Alternatively, a user may generate structures using published ChIA-PET data for the GM12878 cell line by simply specifying a genomic region of interest. 3D-GNOME is freely available at http://3dgnome.cent.uw.edu.pl/.",2016-05-16 +23872200,The GAG database: a new resource to gather genomic annotation cross-references.,"Several institutions provide genomic annotation data, and therefore these data show a significant segmentation and redundancy. Public databases allow access, through their own methods, to genomic and proteomic sequences and related annotation. Although some cross-reference tables are available, they don't cover the complete datasets provided by these databases. The Genomic Annotation Gathering project intends to unify annotation data provided by GenBank and Ensembl. We introduce an intra-species, cross-bank method. Generated results provide an enriched set of cross- references. This method allows for identifying an average of 30% of new cross-references that can be integrated to other utilities dedicated to analyzing related annotation data. By using only sequence comparison, we are able to unify two datasets that previously didn't share any stable cross-bank accession method. The whole process is hosted by the GenOuest platform to provide public access to newly generated cross-references and to allow for regular updates (http://gag.genouest.org).",2013-07-16 +28182555,Comparative Validation of Polyp Detection Methods in Video Colonoscopy: Results From the MICCAI 2015 Endoscopic Vision Challenge.,"Colonoscopy is the gold standard for colon cancer screening though some polyps are still missed, thus preventing early disease detection and treatment. Several computational systems have been proposed to assist polyp detection during colonoscopy but so far without consistent evaluation. The lack of publicly available annotated databases has made it difficult to compare methods and to assess if they achieve performance levels acceptable for clinical use. The Automatic Polyp Detection sub-challenge, conducted as part of the Endoscopic Vision Challenge (http://endovis.grand-challenge.org) at the international conference on Medical Image Computing and Computer Assisted Intervention (MICCAI) in 2015, was an effort to address this need. In this paper, we report the results of this comparative evaluation of polyp detection methods, as well as describe additional experiments to further explore differences between methods. We define performance metrics and provide evaluation databases that allow comparison of multiple methodologies. Results show that convolutional neural networks are the state of the art. Nevertheless, it is also demonstrated that combining different methodologies can lead to an improved overall performance.",2017-02-02 +28052925,ProQ3D: improved model quality assessments using deep learning.,"

Summary

Protein quality assessment is a long-standing problem in bioinformatics. For more than a decade we have developed state-of-art predictors by carefully selecting and optimising inputs to a machine learning method. The correlation has increased from 0.60 in ProQ to 0.81 in ProQ2 and 0.85 in ProQ3 mainly by adding a large set of carefully tuned descriptions of a protein. Here, we show that a substantial improvement can be obtained using exactly the same inputs as in ProQ2 or ProQ3 but replacing the support vector machine by a deep neural network. This improves the Pearson correlation to 0.90 (0.85 using ProQ2 input features).

Availability and implementation

ProQ3D is freely available both as a webserver and a stand-alone program at http://proq3.bioinfo.se/.

Contact

arne@bioinfo.se.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +24830558,The G protein-coupled receptor heterodimer network (GPCR-HetNet) and its hub components.,"G protein-coupled receptors (GPCRs) oligomerization has emerged as a vital characteristic of receptor structure. Substantial experimental evidence supports the existence of GPCR-GPCR interactions in a coordinated and cooperative manner. However, despite the current development of experimental techniques for large-scale detection of GPCR heteromers, in order to understand their connectivity it is necessary to develop novel tools to study the global heteroreceptor networks. To provide insight into the overall topology of the GPCR heteromers and identify key players, a collective interaction network was constructed. Experimental interaction data for each of the individual human GPCR protomers was obtained manually from the STRING and SCOPUS databases. The interaction data were used to build and analyze the network using Cytoscape software. The network was treated as undirected throughout the study. It is comprised of 156 nodes, 260 edges and has a scale-free topology. Connectivity analysis reveals a significant dominance of intrafamily versus interfamily connections. Most of the receptors within the network are linked to each other by a small number of edges. DRD2, OPRM, ADRB2, AA2AR, AA1R, OPRK, OPRD and GHSR are identified as hubs. In a network representation 10 modules/clusters also appear as a highly interconnected group of nodes. Information on this GPCR network can improve our understanding of molecular integration. GPCR-HetNet has been implemented in Java and is freely available at http://www.iiia.csic.es/~ismel/GPCR-Nets/index.html.",2014-05-14 +25348405,UniProt: a hub for protein information.,"UniProt is an important collection of protein sequences and their annotations, which has doubled in size to 80 million sequences during the past year. This growth in sequences has prompted an extension of UniProt accession number space from 6 to 10 characters. An increasing fraction of new sequences are identical to a sequence that already exists in the database with the majority of sequences coming from genome sequencing projects. We have created a new proteome identifier that uniquely identifies a particular assembly of a species and strain or subspecies to help users track the provenance of sequences. We present a new website that has been designed using a user-experience design process. We have introduced an annotation score for all entries in UniProt to represent the relative amount of knowledge known about each protein. These scores will be helpful in identifying which proteins are the best characterized and most informative for comparative analysis. All UniProt data is provided freely and is available on the web at http://www.uniprot.org/.",2014-10-27 +27631363,ePGA: A Web-Based Information System for Translational Pharmacogenomics.,"One of the challenges that arise from the advent of personal genomics services is to efficiently couple individual data with state of the art Pharmacogenomics (PGx) knowledge. Existing services are limited to either providing static views of PGx variants or applying a simplistic match between individual genotypes and existing PGx variants. Moreover, there is a considerable amount of haplotype variation associated with drug metabolism that is currently insufficiently addressed. Here, we present a web-based electronic Pharmacogenomics Assistant (ePGA; http://www.epga.gr/) that provides personalized genotype-to-phenotype translation, linked to state of the art clinical guidelines. ePGA's translation service matches individual genotype-profiles with PGx gene haplotypes and infers the corresponding diplotype and phenotype profiles, accompanied with summary statistics. Additional features include i) the ability to customize translation based on subsets of variants of clinical interest, and ii) to update the knowledge base with novel PGx findings. We demonstrate ePGA's functionality on genetic variation data from the 1000 Genomes Project.",2016-09-15 +24253300,"InvFEST, a database integrating information of polymorphic inversions in the human genome.","The newest genomic advances have uncovered an unprecedented degree of structural variation throughout genomes, with great amounts of data accumulating rapidly. Here we introduce InvFEST (http://invfestdb.uab.cat), a database combining multiple sources of information to generate a complete catalogue of non-redundant human polymorphic inversions. Due to the complexity of this type of changes and the underlying high false-positive discovery rate, it is necessary to integrate all the available data to get a reliable estimate of the real number of inversions. InvFEST automatically merges predictions into different inversions, refines the breakpoint locations, and finds associations with genes and segmental duplications. In addition, it includes data on experimental validation, population frequency, functional effects and evolutionary history. All this information is readily accessible through a complete and user-friendly web report for each inversion. In its current version, InvFEST combines information from 34 different studies and contains 1092 candidate inversions, which are categorized based on internal scores and manual curation. Therefore, InvFEST aims to represent the most reliable set of human inversions and become a central repository to share information, guide future studies and contribute to the analysis of the functional and evolutionary impact of inversions on the human genome.",2013-11-18 +28180317,IMHOTEP-a composite score integrating popular tools for predicting the functional consequences of non-synonymous sequence variants.,"The in silico prediction of the functional consequences of mutations is an important goal of human pathogenetics. However, bioinformatic tools that classify mutations according to their functionality employ different algorithms so that predictions may vary markedly between tools. We therefore integrated nine popular prediction tools (PolyPhen-2, SNPs&GO, MutPred, SIFT, MutationTaster2, Mutation Assessor and FATHMM as well as conservation-based Grantham Score and PhyloP) into a single predictor. The optimal combination of these tools was selected by means of a wide range of statistical modeling techniques, drawing upon 10 029 disease-causing single nucleotide variants (SNVs) from Human Gene Mutation Database and 10 002 putatively ‘benign’ non-synonymous SNVs from UCSC. Predictive performance was found to be markedly improved by model-based integration, whilst maximum predictive capability was obtained with either random forest, decision tree or logistic regression analysis. A combination of PolyPhen-2, SNPs&GO, MutPred, MutationTaster2 and FATHMM was found to perform as well as all tools combined. Comparison of our approach with other integrative approaches such as Condel, CoVEC, CAROL, CADD, MetaSVM and MetaLR using an independent validation dataset, revealed the superiority of our newly proposed integrative approach. An online implementation of this approach, IMHOTEP (‘Integrating Molecular Heuristics and Other Tools for Effect Prediction’), is provided at http://www.uni-kiel.de/medinfo/cgi-bin/predictor/.",2017-02-01 +28171531,Pro54DB: a database for experimentally verified sigma-54 promoters.,"

Summary

In prokaryotes, the σ54 promoters are unique regulatory elements and have attracted much attention because they are in charge of the transcription of carbon and nitrogen-related genes and participate in numerous ancillary processes and environmental responses. All findings on σ54 promoters are favorable for a better understanding of their regulatory mechanisms in gene transcription and an accurate discovery of genes missed by the wet experimental evidences. In order to provide an up-to-date, interactive and extensible database for σ54 promoter, a free and easy accessed database called Pro54DB (σ54 promoter database) was built to collect information of σ54 promoter. In the current version, it has stored 210 experimental-confirmed σ54 promoters with 297 regulated genes in 43 species manually extracted from 133 publications, which is helpful for researchers in fields of bioinformatics and molecular biology.

Availability and implementation

Pro54DB is freely available on the web at http://lin.uestc.edu.cn/database/pro54db with all major browsers supported.

Contacts

greatchen@ncst.edu.cn or hlin@uestc.edu.cn",2017-02-01 +27766395,Evidence-Based Management of Pain After Excisional Haemorrhoidectomy Surgery: A PROSPECT Review Update.,"

Background

The aim of this systematic review was to update previous PROSPECT ( http://www.postoppain.org ) review recommendations for the management of pain after excisional haemorrhoidectomy.

Methods

Randomized studies and reviews published in the English language from July 2006 (end date of last review) to March 2016, assessing analgesic, anaesthetic, and operative interventions pertaining to excisional haemorrhoidectomy in adults, and reporting pain scores, were retrieved from the EMBASE and MEDLINE databases.

Results

An additional 464 studies were identified of which 74 met the inclusion criteria. There were 48 randomized controlled trials and 26 reviews. Quantitative analyses were not performed, as there were limited numbers of trials with a sufficiently homogeneous design.

Conclusion

Pudendal nerve block, with or without general anaesthesia, is recommended for all patients undergoing haemorrhoidal surgery. Either closed haemorrhoidectomy, or open haemorrhoidectomy with electrocoagulation of the pedicle is recommended as the primary procedure. Combinations of analgesics (paracetamol, non-steroidal anti-inflammatory drugs, and opioids), topical lignocaine and glyceryl trinitrate, laxatives, and oral metronidazole are recommended post-operatively. The recommendations are largely based on single intervention, not multimodal intervention, studies.",2017-02-01 +28612849,"Psyllids, It's What's on the Inside That Counts: Community Cross Talk Facilitates Prophage Interactions. ","Despite the availability of massive microbial community data sets (e.g., metagenomes), there is still a lack of knowledge on what molecular mechanisms facilitate cross talk between microbes and prophage within a community context. A study published in mSphere by Jain and colleagues (M. Jain, L. A. Fleites, and D. W. Gabriel, mSphere 2:e00171-17, 2017, https://doi.org/10.1128/mSphereDirect.00171-17) reports on an intriguing new twist of how a prophage of the bacterium ""Candidatus Liberibacter asiaticus"" may have its lytic cycle suppressed partly because of a protein that is expressed by a cooccurring bacterium, Wolbachia. Both of these microbes coexist along with other microbial tenants inside their sap-feeding insect host, a psyllid. Although these results are still preliminary and alternative hypotheses need to be tested, these results suggest an interesting new dimension on how regulation of microbial genomes occurs in a community context.",2017-05-01 +24727366,"'In silico expression analysis', a novel PathoPlant web tool to identify abiotic and biotic stress conditions associated with specific cis-regulatory sequences.","Using bioinformatics, putative cis-regulatory sequences can be easily identified using pattern recognition programs on promoters of specific gene sets. The abundance of predicted cis-sequences is a major challenge to associate these sequences with a possible function in gene expression regulation. To identify a possible function of the predicted cis-sequences, a novel web tool designated 'in silico expression analysis' was developed that correlates submitted cis-sequences with gene expression data from Arabidopsis thaliana. The web tool identifies the A. thaliana genes harbouring the sequence in a defined promoter region and compares the expression of these genes with microarray data. The result is a hierarchy of abiotic and biotic stress conditions to which these genes are most likely responsive. When testing the performance of the web tool, known cis-regulatory sequences were submitted to the 'in silico expression analysis' resulting in the correct identification of the associated stress conditions. When using a recently identified novel elicitor-responsive sequence, a WT-box (CGACTTTT), the 'in silico expression analysis' predicts that genes harbouring this sequence in their promoter are most likely Botrytis cinerea induced. Consistent with this prediction, the strongest induction of a reporter gene harbouring this sequence in the promoter is observed with B. cinerea in transgenic A. thaliana. DATABASE URL: http://www.pathoplant.de/expression_analysis.php.",2014-04-10 +26883487,Computationally expanding infinium HumanMethylation450 BeadChip array data to reveal distinct DNA methylation patterns of rheumatoid arthritis.,"

Motivation

DNA methylation signatures in rheumatoid arthritis (RA) have been identified in fibroblast-like synoviocytes (FLS) with Illumina HumanMethylation450 array. Since <2% of CpG sites are covered by the Illumina 450K array and whole genome bisulfite sequencing is still too expensive for many samples, computationally predicting DNA methylation levels based on 450K data would be valuable to discover more RA-related genes.

Results

We developed a computational model that is trained on 14 tissues with both whole genome bisulfite sequencing and 450K array data. This model integrates information derived from the similarity of local methylation pattern between tissues, the methylation information of flanking CpG sites and the methylation tendency of flanking DNA sequences. The predicted and measured methylation values were highly correlated with a Pearson correlation coefficient of 0.9 in leave-one-tissue-out cross-validations. Importantly, the majority (76%) of the top 10% differentially methylated loci among the 14 tissues was correctly detected using the predicted methylation values. Applying this model to 450K data of RA, osteoarthritis and normal FLS, we successfully expanded the coverage of CpG sites 18.5-fold and accounts for about 30% of all the CpGs in the human genome. By integrative omics study, we identified genes and pathways tightly related to RA pathogenesis, among which 12 genes were supported by triple evidences, including 6 genes already known to perform specific roles in RA and 6 genes as new potential therapeutic targets.

Availability and implementation

The source code, required data for prediction, and demo data for test are freely available at: http://wanglab.ucsd.edu/star/LR450K/ CONTACT: wei-wang@ucsd.edu or gfirestein@ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-15 +23617301,D-Light on promoters: a client-server system for the analysis and visualization of cis-regulatory elements.,"

Background

The binding of transcription factors to DNA plays an essential role in the regulation of gene expression. Numerous experiments elucidated binding sequences which subsequently have been used to derive statistical models for predicting potential transcription factor binding sites (TFBS). The rapidly increasing number of genome sequence data requires sophisticated computational approaches to manage and query experimental and predicted TFBS data in the context of other epigenetic factors and across different organisms.

Results

We have developed D-Light, a novel client-server software package to store and query large amounts of TFBS data for any number of genomes. Users can add small-scale data to the server database and query them in a large scale, genome-wide promoter context. The client is implemented in Java and provides simple graphical user interfaces and data visualization. Here we also performed a statistical analysis showing what a user can expect for certain parameter settings and we illustrate the usage of D-Light with the help of a microarray data set.

Conclusions

D-Light is an easy to use software tool to integrate, store and query annotation data for promoters. A public D-Light server, the client and server software for local installation and the source code under GNU GPL license are available at http://biwww.che.sbg.ac.at/dlight.",2013-04-24 +24185696,BloodChIP: a database of comparative genome-wide transcription factor binding profiles in human blood cells.,"The BloodChIP database (http://www.med.unsw.edu.au/CRCWeb.nsf/page/BloodChIP) supports exploration and visualization of combinatorial transcription factor (TF) binding at a particular locus in human CD34-positive and other normal and leukaemic cells or retrieval of target gene sets for user-defined combinations of TFs across one or more cell types. Increasing numbers of genome-wide TF binding profiles are being added to public repositories, and this trend is likely to continue. For the power of these data sets to be fully harnessed by experimental scientists, there is a need for these data to be placed in context and easily accessible for downstream applications. To this end, we have built a user-friendly database that has at its core the genome-wide binding profiles of seven key haematopoietic TFs in human stem/progenitor cells. These binding profiles are compared with binding profiles in normal differentiated and leukaemic cells. We have integrated these TF binding profiles with chromatin marks and expression data in normal and leukaemic cell fractions. All queries can be exported into external sites to construct TF-gene and protein-protein networks and to evaluate the association of genes with cellular processes and tissue expression.",2013-10-31 +23875761,"3Omics: a web-based systems biology tool for analysis, integration and visualization of human transcriptomic, proteomic and metabolomic data.","

Background

Integrative and comparative analyses of multiple transcriptomics, proteomics and metabolomics datasets require an intensive knowledge of tools and background concepts. Thus, it is challenging for users to perform such analyses, highlighting the need for a single tool for such purposes. The 3Omics one-click web tool was developed to visualize and rapidly integrate multiple human inter- or intra-transcriptomic, proteomic, and metabolomic data by combining five commonly used analyses: correlation networking, coexpression, phenotyping, pathway enrichment, and GO (Gene Ontology) enrichment.

Results

3Omics generates inter-omic correlation networks to visualize relationships in data with respect to time or experimental conditions for all transcripts, proteins and metabolites. If only two of three omics datasets are input, then 3Omics supplements the missing transcript, protein or metabolite information related to the input data by text-mining the PubMed database. 3Omics' coexpression analysis assists in revealing functions shared among different omics datasets. 3Omics' phenotype analysis integrates Online Mendelian Inheritance in Man with available transcript or protein data. Pathway enrichment analysis on metabolomics data by 3Omics reveals enriched pathways in the KEGG/HumanCyc database. 3Omics performs statistical Gene Ontology-based functional enrichment analyses to display significantly overrepresented GO terms in transcriptomic experiments. Although the principal application of 3Omics is the integration of multiple omics datasets, it is also capable of analyzing individual omics datasets. The information obtained from the analyses of 3Omics in Case Studies 1 and 2 are also in accordance with comprehensive findings in the literature.

Conclusions

3Omics incorporates the advantages and functionality of existing software into a single platform, thereby simplifying data analysis and enabling the user to perform a one-click integrated analysis. Visualization and analysis results are downloadable for further user customization and analysis. The 3Omics software can be freely accessed at http://3omics.cmdm.tw.",2013-07-23 +23829391,An untargeted metabolomic workflow to improve structural characterization of metabolites.,"Mass spectrometry-based metabolomics relies on MS(2) data for structural characterization of metabolites. To obtain the high-quality MS(2) data necessary to support metabolite identifications, ions of interest must be purely isolated for fragmentation. Here, we show that metabolomic MS(2) data are frequently characterized by contaminating ions that prevent structural identification. Although using narrow-isolation windows can minimize contaminating MS(2) fragments, even narrow windows are not always selective enough, and they can complicate data analysis by removing isotopic patterns from MS(2) spectra. Moreover, narrow windows can significantly reduce sensitivity. In this work, we introduce a novel, two-part approach for performing metabolomic identifications that addresses these issues. First, we collect MS(2) scans with less stringent isolation settings to obtain improved sensitivity at the expense of specificity. Then, by evaluating MS(2) fragment intensities as a function of retention time and precursor mass targeted for MS(2) analysis, we obtain deconvolved MS(2) spectra that are consistent with pure standards and can therefore be used for metabolite identification. The value of our approach is highlighted with metabolic extracts from brain, liver, astrocytes, as well as nerve tissue, and performance is evaluated by using pure metabolite standards in combination with simulations based on raw MS(2) data from the METLIN metabolite database. A R package implementing the algorithms used in our workflow is available on our laboratory website ( http://pattilab.wustl.edu/decoms2.php ).",2013-08-02 +21569303,Meta-analysis of heterogeneous Down Syndrome data reveals consistent genome-wide dosage effects related to neurological processes.,"

Background

Down syndrome (DS; trisomy 21) is the most common genetic cause of mental retardation in the human population and key molecular networks dysregulated in DS are still unknown. Many different experimental techniques have been applied to analyse the effects of dosage imbalance at the molecular and phenotypical level, however, currently no integrative approach exists that attempts to extract the common information.

Results

We have performed a statistical meta-analysis from 45 heterogeneous publicly available DS data sets in order to identify consistent dosage effects from these studies. We identified 324 genes with significant genome-wide dosage effects, including well investigated genes like SOD1, APP, RUNX1 and DYRK1A as well as a large proportion of novel genes (N = 62). Furthermore, we characterized these genes using gene ontology, molecular interactions and promoter sequence analysis. In order to judge relevance of the 324 genes for more general cerebral pathologies we used independent publicly available microarry data from brain studies not related with DS and identified a subset of 79 genes with potential impact for neurocognitive processes. All results have been made available through a web server under http://ds-geneminer.molgen.mpg.de/.

Conclusions

Our study represents a comprehensive integrative analysis of heterogeneous data including genome-wide transcript levels in the domain of trisomy 21. The detected dosage effects build a resource for further studies of DS pathology and the development of new therapies.",2011-05-11 +28684834,Does stress in a dental hygiene and dental therapy undergraduate programme contribute to a sense of well-being in the students?,"Aims To use a qualitative approach to further explore the stress and well-being of dental hygiene and dental therapy students (DHDTS) during their undergraduate training.Subjects and methods Semi-structured individual interviews to explore motivation, goals, and perceived stress, were conducted with eight DHDTS from across all three years of study at the University of Portsmouth Dental Academy (UPDA). Thematic analysis of the data was undertaken using Braun and Clarke's (2006) six phases of thematic analysis.Results Three main themes of 'fulfilment', 'the learning environment', and 'perception of stress' were identified. Within these themes, a further 12 sub-themes were identified. Analysis suggested that a strong sense of passion to become a clinician mitigated most, but not all, of the stressful experiences of the DHDTS undergraduate learning environment.Conclusions DHDTS' perceived sources of stress during their undergraduate programme were strongly linked to a sense of meaningfulness.Listen to the author talk about the key findings in this paper in the associated video abstract. Available in the supplementary information online and on the BDJ Youtube channel via http://go.nature.com/bdjyoutube.",2017-07-01 +28482034,PhD-SNPg: a webserver and lightweight tool for scoring single nucleotide variants.,"One of the major challenges in human genetics is to identify functional effects of coding and non-coding single nucleotide variants (SNVs). In the past, several methods have been developed to identify disease-related single amino acid changes but only few tools are able to score the impact of non-coding variants. Among the most popular algorithms, CADD and FATHMM predict the effect of SNVs in non-coding regions combining sequence conservation with several functional features derived from the ENCODE project data. Thus, to run CADD or FATHMM locally, the installation process requires to download a large set of pre-calculated information. To facilitate the process of variant annotation we develop PhD-SNPg, a new easy-to-install and lightweight machine learning method that depends only on sequence-based features. Despite this, PhD-SNPg performs similarly or better than more complex methods. This makes PhD-SNPg ideal for quick SNV interpretation, and as benchmark for tool development.

Availability

PhD-SNPg is accessible at http://snps.biofold.org/phd-snpg.",2017-07-01 +28472408,RegulatorTrail: a web service for the identification of key transcriptional regulators.,"Transcriptional regulators such as transcription factors and chromatin modifiers play a central role in most biological processes. Alterations in their activities have been observed in many diseases, e.g. cancer. Hence, it is of utmost importance to evaluate and assess the effects of transcriptional regulators on natural and pathogenic processes. Here, we present RegulatorTrail, a web service that provides rich functionality for the identification and prioritization of key transcriptional regulators that have a strong impact on, e.g. pathological processes. RegulatorTrail offers eight methods that use regulator binding information in combination with transcriptomic or epigenomic data to infer the most influential regulators. Our web service not only provides an intuitive web interface, but also a well-documented RESTful API that allows for a straightforward integration into third-party workflows. The presented case studies highlight the capabilities of our web service and demonstrate its potential for the identification of influential regulators: we successfully identified regulators that might explain the increased malignancy in metastatic melanoma compared to primary tumors, as well as important regulators in macrophages. RegulatorTrail is freely accessible at: https://regulatortrail.bioinf.uni-sb.de/.",2017-07-01 +23545212,An integrative clinical database and diagnostics platform for biomarker identification and analysis in ion mobility spectra of human exhaled air.,"Over the last decade the evaluation of odors and vapors in human breath has gained more and more attention, particularly in the diagnostics of pulmonary diseases. Ion mobility spectrometry coupled with multi-capillary columns (MCC/IMS), is a well known technology for detecting volatile organic compounds (VOCs) in air. It is a comparatively inexpensive, non-invasive, high-throughput method, which is able to handle the moisture that comes with human exhaled air, and allows for characterizing of VOCs in very low concentrations. To identify discriminating compounds as biomarkers, it is necessary to have a clear understanding of the detailed composition of human breath. Therefore, in addition to the clinical studies, there is a need for a flexible and comprehensive centralized data repository, which is capable of gathering all kinds of related information. Moreover, there is a demand for automated data integration and semi-automated data analysis, in particular with regard to the rapid data accumulation, emerging from the high-throughput nature of the MCC/IMS technology. Here, we present a comprehensive database application and analysis platform, which combines metabolic maps with heterogeneous biomedical data in a well-structured manner. The design of the database is based on a hybrid of the entity-attribute-value (EAV) model and the EAV-CR, which incorporates the concepts of classes and relationships. Additionally it offers an intuitive user interface that provides easy and quick access to the platform’s functionality: automated data integration and integrity validation, versioning and roll-back strategy, data retrieval as well as semi-automatic data mining and machine learning capabilities. The platform will support MCC/IMS-based biomarker identification and validation. The software, schemata, data sets and further information is publicly available at http://imsdb.mpi-inf.mpg.de.",2013-04-02 +28200026,Abstracting the dynamics of biological pathways using information theory: a case study of apoptosis pathway.,"

Motivation

Quantitative models are increasingly used in systems biology. Usually, these quantitative models involve many molecular species and their associated reactions. When simulating a tissue with thousands of cells, using these large models becomes computationally and time limiting.

Results

In this paper, we propose to construct abstractions using information theory notions. Entropy is used to discretize the state space and mutual information is used to select a subset of all original variables and their mutual dependencies. We apply our method to an hybrid model of TRAIL-induced apoptosis in HeLa cell. Our abstraction, represented as a Dynamic Bayesian Network (DBN), reduces the number of variables from 92 to 10, and accelerates numerical simulation by an order of magnitude, yet preserving essential features of cell death time distributions.

Availability and implementation

This approach is implemented in the tool DBNizer, freely available at http://perso.crans.org/genest/DBNizer .

Contact

gregory.batt@inria.fr or bgenest@irisa.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +27153666,"XLinkDB 2.0: integrated, large-scale structural analysis of protein crosslinking data.","

Motivation

Large-scale chemical cross-linking with mass spectrometry (XL-MS) analyses are quickly becoming a powerful means for high-throughput determination of protein structural information and protein-protein interactions. Recent studies have garnered thousands of cross-linked interactions, yet the field lacks an effective tool to compile experimental data or access the network and structural knowledge for these large scale analyses. We present XLinkDB 2.0 which integrates tools for network analysis, Protein Databank queries, modeling of predicted protein structures and modeling of docked protein structures. The novel, integrated approach of XLinkDB 2.0 enables the holistic analysis of XL-MS protein interaction data without limitation to the cross-linker or analytical system used for the analysis.

Availability and implementation

XLinkDB 2.0 can be found here, including documentation and help: http://xlinkdb.gs.washington.edu/

Contact

: jimbruce@uw.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-29 +27859414,Improving scoring-docking-screening powers of protein-ligand scoring functions using random forest.,"The development of new protein-ligand scoring functions using machine learning algorithms, such as random forest, has been of significant interest. By efficiently utilizing expanded feature sets and a large set of experimental data, random forest based scoring functions (RFbScore) can achieve better correlations to experimental protein-ligand binding data with known crystal structures; however, more extensive tests indicate that such enhancement in scoring power comes with significant under-performance in docking and screening power tests compared to traditional scoring functions. In this work, to improve scoring-docking-screening powers of protein-ligand docking functions simultaneously, we have introduced a Δvina RF parameterization and feature selection framework based on random forest. Our developed scoring function Δvina RF20 , which employs 20 descriptors in addition to the AutoDock Vina score, can achieve superior performance in all power tests of both CASF-2013 and CASF-2007 benchmarks compared to classical scoring functions. The Δvina RF20 scoring function and its code are freely available on the web at: https://www.nyu.edu/projects/yzhang/DeltaVina. © 2016 Wiley Periodicals, Inc.",2016-11-17 +26424081,Sample data processing in an additive and reproducible taxonomic workflow by using character data persistently linked to preserved individual specimens. ,"We present the model and implementation of a workflow that blazes a trail in systematic biology for the re-usability of character data (data on any kind of characters of pheno- and genotypes of organisms) and their additivity from specimen to taxon level. We take into account that any taxon characterization is based on a limited set of sampled individuals and characters, and that consequently any new individual and any new character may affect the recognition of biological entities and/or the subsequent delimitation and characterization of a taxon. Taxon concepts thus frequently change during the knowledge generation process in systematic biology. Structured character data are therefore not only needed for the knowledge generation process but also for easily adapting characterizations of taxa. We aim to facilitate the construction and reproducibility of taxon characterizations from structured character data of changing sample sets by establishing a stable and unambiguous association between each sampled individual and the data processed from it. Our workflow implementation uses the European Distributed Institute of Taxonomy Platform, a comprehensive taxonomic data management and publication environment to: (i) establish a reproducible connection between sampled individuals and all samples derived from them; (ii) stably link sample-based character data with the metadata of the respective samples; (iii) record and store structured specimen-based character data in formats allowing data exchange; (iv) reversibly assign sample metadata and character datasets to taxa in an editable classification and display them and (v) organize data exchange via standard exchange formats and enable the link between the character datasets and samples in research collections, ensuring high visibility and instant re-usability of the data. The workflow implemented will contribute to organizing the interface between phylogenetic analysis and revisionary taxonomic or monographic work. http://campanula.e-taxonomy.net/.",2015-09-30 +25273105,OrthoInspector 2.0: Software and database updates.,"

Summary

We previously developed OrthoInspector, a package incorporating an original algorithm for the detection of orthology and inparalogy relations between different species. We have added new functionalities to the package. While its original algorithm was not modified, performing similar orthology predictions, we facilitated the prediction of very large databases (thousands of proteomes), refurbished its graphical interface, added new visualization tools for comparative genomics/protein family analysis and facilitated its deployment in a network environment. Finally, we have released three online databases of precomputed orthology relationships.

Availability

Package and databases are freely available at http://lbgi.fr/orthoinspector with all major browsers supported.

Contact

odile.lecompte@unistra.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-01 +27459717,Achieving NHAS 90/90/80 Objectives by 2020: An Interactive Tool Modeling Local HIV Prevalence Projections.,"

Background

Tools using local HIV data to help jurisdictions estimate future demand for medical and support services are needed. We present an interactive prevalence projection model using data obtainable from jurisdictional HIV surveillance and publically available data.

Methods

Using viral load data from Georgia's enhanced HIV/AIDS Reporting System, state level death rates for people living with HIV and the general population, and published estimates for HIV transmission rates, we developed a model for projecting future HIV prevalence. Keeping death rates and HIV transmission rates for undiagnosed, in care/viral load >200, in care/viral load<200, and out of care (no viral load for 12 months) constant, we describe results from simulations with varying inputs projecting HIV incidence and prevalence from 2014 to 2024.

Results

In this model, maintaining Georgia's 2014 rates for diagnosis, transitions in care, viral suppression (VS), and mortality by sub-group through 2020, resulted in 85% diagnosed, 59% in care, and 44% VS among diagnosed (85%/58%/44%) with a total of 67 815 PLWH, 33 953 in care, and more than 1000 new cases per year by 2020. Neither doubling the diagnosis rate nor tripling rates of re-engaging out of care PLWH into care alone were adequate to reach 90/90/80 by 2020. We demonstrate a multicomponent scenario that achieved NHAS goals and resulted in 63 989 PLWH, 57 546 in care, and continued annual prevalence increase through 2024.

Conclusions

Jurisdictions can use this HIV prevalence prediction tool, accessible at https://dph.georgia.gov/hiv-prevalence-projections to assess local capacity to meet future HIV care and social services needs. In this model, achieving 90/90/80 by 2020 in Georgia slowed but did not reverse increases in HIV prevalence, and the number of HIV-infected persons needing care and support services more than doubled. Improving the HIV care infrastructure is imperative.",2016-07-26 +24163125,Tripal v1.1: a standards-based toolkit for construction of online genetic and genomic databases.,"Tripal is an open-source freely available toolkit for construction of online genomic and genetic databases. It aims to facilitate development of community-driven biological websites by integrating the GMOD Chado database schema with Drupal, a popular website creation and content management software. Tripal provides a suite of tools for interaction with a Chado database and display of content therein. The tools are designed to be generic to support the various ways in which data may be stored in Chado. Previous releases of Tripal have supported organisms, genomic libraries, biological stocks, stock collections and genomic features, their alignments and annotations. Also, Tripal and its extension modules provided loaders for commonly used file formats such as FASTA, GFF, OBO, GAF, BLAST XML, KEGG heir files and InterProScan XML. Default generic templates were provided for common views of biological data, which could be customized using an open Application Programming Interface to change the way data are displayed. Here, we report additional tools and functionality that are part of release v1.1 of Tripal. These include (i) a new bulk loader that allows a site curator to import data stored in a custom tab delimited format; (ii) full support of every Chado table for Drupal Views (a powerful tool allowing site developers to construct novel displays and search pages); (iii) new modules including 'Feature Map', 'Genetic', 'Publication', 'Project', 'Contact' and the 'Natural Diversity' modules. Tutorials, mailing lists, download and set-up instructions, extension modules and other documentation can be found at the Tripal website located at http://tripal.info. DATABASE URL: http://tripal.info/.",2013-10-25 +27797759,Transfer learning across ontologies for phenome-genome association prediction.,"

Motivation

To better predict and analyze gene associations with the collection of phenotypes organized in a phenotype ontology, it is crucial to effectively model the hierarchical structure among the phenotypes in the ontology and leverage the sparse known associations with additional training information. In this paper, we first introduce Dual Label Propagation (DLP) to impose consistent associations with the entire phenotype paths in predicting phenotype-gene associations in Human Phenotype Ontology (HPO). DLP is then used as the base model in a transfer learning framework (tlDLP) to incorporate functional annotations in Gene Ontology (GO). By simultaneously reconstructing GO term-gene associations and HPO phenotype-gene associations for all the genes in a protein-protein interaction network, tlDLP benefits from the enriched training associations indirectly through relation with GO terms.

Results

In the experiments to predict the associations between human genes and phenotypes in HPO based on human protein-protein interaction network, both DLP and tlDLP improved the prediction of gene associations with phenotype paths in HPO in cross-validation and the prediction of the most recent associations added after the snapshot of the training data. Moreover, the transfer learning through GO term-gene associations significantly improved association predictions for the phenotypes with no more specific known associations by a large margin. Examples are also shown to demonstrate how phenotype paths in phenotype ontology and transfer learning with gene ontology can improve the predictions.

Availability and implementation

Source code is available at http://compbio.cs.umn.edu/onto phenome .

Contact

kuang@cs.umn.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +27493192,PcircRNA_finder: a software for circRNA prediction in plants.,"

Motivation

Recent studies reveal an important role of non-coding circular RNA (circRNA) in the control of cellular processes. Because of differences in the organization of plant and mammal genomes, the sensitivity and accuracy of circRNA prediction programs using algorithms developed for animals and humans perform poorly for plants.

Results

A circRNA prediction software for plants (termed PcircRNA_finder) was developed that is more sensitive in detecting circRNAs than other frequently used programs (such as find_circ and CIRCexplorer), Based on analysis of simulated and real rRNA-/RNAase R RNA-Seq data from Arabidopsis thaliana and rice PcircRNA_finder provides a more comprehensive sensitive, precise prediction method for plants circRNAs.

Availability and implementation

http://ibi.zju.edu.cn/bioinplant/tools/manual.htm CONTACT: fanlj@zju.edu.cnSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-04 +29243560,Factors associated with delayed initiation of breastfeeding: a survey in Northern Uganda.,"

Background

Initiation of breastfeeding later than 1 hour after birth is associated with increased neonatal morbidity and mortality.

Objective

To determine the prevalence and factors associated with delayed initiation of breastfeeding.

Methods

We conducted a survey in 2016 of 930 children under the age of 2 years in Lira district, northern Uganda. Mothers of the children were interviewed and data was collected on mobile phones using Open Data Kit software ( https://opendatakit.org ). Multivariable logistic regression was used to determine factors associated with delayed initiation of breastfeeding.

Results

Almost half [48.2%, 95% confidence interval (CI) (44.3-52.1)] of the mothers delayed initiation of breastfeeding. Factors significantly associated with delayed initiation of breastfeeding in multivariable analysis included caesarean delivery [Adjusted Odds Ratio (AOR) 11.10 95% CI (3.73-33.04)], discarding initial breast milk [AOR 2.02 95% CI (1.41-2.88)], home delivery [AOR 1.43 95% CI (1.04-1.97)] and mother being responsible for initiating breastfeeding as compared to a health worker or relative [AOR 1.73 95% CI (1.33-2.26)]. Mothers having a secondary education were less likely [AOR 0.54 95% CI (0.30-0.96)] to delay initiation of breastfeeding as compared to those with no education.

Conclusion

About half the mothers delayed initiation of breastfeeding until after 1 hour after birth. Programs to promote, protect and support breastfeeding in this post conflict region are urgently needed.",2017-01-01 +27519564,iTAR: a web server for identifying target genes of transcription factors using ChIP-seq or ChIP-chip data.,"

Background

Chromatin immunoprecipitation followed by massively parallel DNA sequencing (ChIP-seq) or microarray hybridization (ChIP-chip) has been widely used to determine the genomic occupation of transcription factors (TFs). We have previously developed a probabilistic method, called TIP (Target Identification from Profiles), to identify TF target genes using ChIP-seq/ChIP-chip data. To achieve high specificity, TIP applies a conservative method to estimate significance of target genes, with the trade-off being a relatively low sensitivity of target gene identification compared to other methods. Additionally, TIP's output does not render binding-peak locations or intensity, information highly useful for visualization and general experimental biological use, while the variability of ChIP-seq/ChIP-chip file formats has made input into TIP more difficult than desired.

Description

To improve upon these facets, here we present are fined TIP with key extensions. First, it implements a Gaussian mixture model for p-value estimation, increasing target gene identification sensitivity and more accurately capturing the shape of TF binding profile distributions. Second, it enables the incorporation of TF binding-peak data by identifying their locations in significant target gene promoter regions and quantifies their strengths. Finally, for full ease of implementation we have incorporated it into a web server ( http://syslab3.nchu.edu.tw/iTAR/ ) that enables flexibility of input file format, can be used across multiple species and genome assembly versions, and is freely available for public use. The web server additionally performs GO enrichment analysis for the identified target genes to reveal the potential function of the corresponding TF.

Conclusions

The iTAR web server provides a user-friendly interface and supports target gene identification in seven species, ranging from yeast to human. To facilitate investigating the quality of ChIP-seq/ChIP-chip data, the web server generates the chart of the characteristic binding profiles and the density plot of normalized regulatory scores. The iTAR web server is a useful tool in identifying TF target genes from ChIP-seq/ChIP-chip data and discovering biological insights.",2016-08-12 +27287041,Extracting a low-dimensional description of multiple gene expression datasets reveals a potential driver for tumor-associated stroma in ovarian cancer.,"Patterns in expression data conserved across multiple independent disease studies are likely to represent important molecular events underlying the disease. We present the INSPIRE method to infer modules of co-expressed genes and the dependencies among the modules from multiple expression datasets that may contain different sets of genes. We show that INSPIRE infers more accurate models than existing methods to extract low-dimensional representation of expression data. We demonstrate that applying INSPIRE to nine ovarian cancer datasets leads to a new marker and potential driver of tumor-associated stroma, HOPX, followed by experimental validation. The implementation of INSPIRE is available at http://inspire.cs.washington.edu .",2016-06-10 +27676360,mirVAFC: A Web Server for Prioritizations of Pathogenic Sequence Variants from Exome Sequencing Data via Classifications.,"Exome sequencing has been widely used to identify the genetic variants underlying human genetic disorders for clinical diagnoses, but the identification of pathogenic sequence variants among the huge amounts of benign ones is complicated and challenging. Here, we describe a new Web server named mirVAFC for pathogenic sequence variants prioritizations from clinical exome sequencing (CES) variant data of single individual or family. The mirVAFC is able to comprehensively annotate sequence variants, filter out most irrelevant variants using custom criteria, classify variants into different categories as for estimated pathogenicity, and lastly provide pathogenic variants prioritizations based on classifications and mutation effects. Case studies using different types of datasets for different diseases from publication and our in-house data have revealed that mirVAFC can efficiently identify the right pathogenic candidates as in original work in each case. Overall, the Web server mirVAFC is specifically developed for pathogenic sequence variant identifications from family-based CES variants using classification-based prioritizations. The mirVAFC Web server is freely accessible at https://www.wzgenomics.cn/mirVAFC/.",2016-10-13 +25886721,EGFR Mutant Structural Database: computationally predicted 3D structures and the corresponding binding free energies with gefitinib and erlotinib.,"

Background

Epidermal growth factor receptor (EGFR) mutation-induced drug resistance has caused great difficulties in the treatment of non-small-cell lung cancer (NSCLC). However, structural information is available for just a few EGFR mutants. In this study, we created an EGFR Mutant Structural Database (freely available at http://bcc.ee.cityu.edu.hk/data/EGFR.html ), including the 3D EGFR mutant structures and their corresponding binding free energies with two commonly used inhibitors (gefitinib and erlotinib).

Results

We collected the information of 942 NSCLC patients belonging to 112 mutation types. These mutation types are divided into five groups (insertion, deletion, duplication, modification and substitution), and substitution accounts for 61.61% of the mutation types and 54.14% of all the patients. Among all the 942 patients, 388 cases experienced a mutation at residue site 858 with leucine replaced by arginine (L858R), making it the most common mutation type. Moreover, 36 (32.14%) mutation types occur at exon 19, and 419 (44.48%) patients carried a mutation at exon 21. In this study, we predicted the EGFR mutant structures using Rosetta with the collected mutation types. In addition, Amber was employed to refine the structures followed by calculating the binding free energies of mutant-drug complexes.

Conclusions

The EGFR Mutant Structural Database provides resources of 3D structures and the binding affinity with inhibitors, which can be used by other researchers to study NSCLC further and by medical doctors as reference for NSCLC treatment.",2015-03-14 +28365720,BELMiner: adapting a rule-based relation extraction system to extract biological expression language statements from bio-medical literature evidence sentences. ,"Extracting meaningful relationships with semantic significance from biomedical literature is often a challenging task. BioCreative V track4 challenge for the first time has organized a comprehensive shared task to test the robustness of the text-mining algorithms in extracting semantically meaningful assertions from the evidence statement in biomedical text. In this work, we tested the ability of a rule-based semantic parser to extract Biological Expression Language (BEL) statements from evidence sentences culled out of biomedical literature as part of BioCreative V Track4 challenge. The system achieved an overall best F-measure of 21.29% in extracting the complete BEL statement. For relation extraction, the system achieved an F-measure of 65.13% on test data set. Our system achieved the best performance in five of the six criteria that was adopted for evaluation by the task organizers. Lack of ability to derive semantic inferences, limitation in the rule sets to map the textual extractions to BEL function were some of the reasons for low performance in extracting the complete BEL statement. Post shared task we also evaluated the impact of differential NER components on the ability to extract BEL statements on the test data sets besides making a single change in the rule sets that translate relation extractions into a BEL statement. There is a marked improvement by over 20% in the overall performance of the BELMiner's capability to extract BEL statement on the test set. The system is available as a REST-API at http://54.146.11.205:8484/BELXtractor/finder/. http://54.146.11.205:8484/BELXtractor/finder/.",2017-01-01 +28029645,An ensemble approach for large-scale identification of protein- protein interactions using the alignments of multiple sequences.,"Protein-Protein Interactions (PPI) is not only the critical component of various biological processes in cells, but also the key to understand the mechanisms leading to healthy and diseased states in organisms. However, it is time-consuming and cost-intensive to identify the interactions among proteins using biological experiments. Hence, how to develop a more efficient computational method rapidly became an attractive topic in the post-genomic era. In this paper, we propose a novel method for inference of protein-protein interactions from protein amino acids sequences only. Specifically, protein amino acids sequence is firstly transformed into Position-Specific Scoring Matrix (PSSM) generated by multiple sequences alignments; then the Pseudo PSSM is used to extract feature descriptors. Finally, ensemble Rotation Forest (RF) learning system is trained to predict and recognize PPIs based solely on protein sequence feature. When performed the proposed method on the three benchmark data sets (Yeast, H. pylori, and independent dataset) for predicting PPIs, our method can achieve good average accuracies of 98.38%, 89.75%, and 96.25%, respectively. In order to further evaluate the prediction performance, we also compare the proposed method with other methods using same benchmark data sets. The experiment results demonstrate that the proposed method consistently outperforms other state-of-the-art method. Therefore, our method is effective and robust and can be taken as a useful tool in exploring and discovering new relationships between proteins. A web server is made publicly available at the URL http://202.119.201.126:8888/PsePSSM/ for academic use.",2017-01-01 +29339356,Kidney Biomarkers and Decline in eGFR in Patients with Type 2 Diabetes.,"BACKGROUND AND OBJECTIVES:Biomarkers may improve identification of individuals at risk of eGFR decline who may benefit from intervention or dialysis planning. However, available biomarkers remain incompletely validated for risk stratification and prediction modeling. DESIGN, SETTING, PARTICIPANTS, & MEASUREMENTS:We examined serum cystatin C, urinary kidney injury molecule-1 (uKIM-1), and urinary neutrophil gelatinase-associated lipocalin (UNGAL) in 5367 individuals with type 2 diabetes mellitus and recent acute coronary syndromes enrolled in the Examination of Cardiovascular Outcomes with Alogliptin versus Standard of Care (EXAMINE) trial. Baseline concentrations and 6-month changes in biomarkers were also evaluated. Cox proportional regression was used to assess associations with a 50% decrease in eGFR, stage 5 CKD (eGFR<15 ml/min per 1.73 m2), or dialysis. RESULTS:eGFR decline occurred in 98 patients (1.8%) over a median of 1.5 years. All biomarkers individually were associated with higher risk of eGFR decline (P<0.001). However, when adjusting for baseline eGFR, proteinuria, and clinical factors, only baseline cystatin C (adjusted hazard ratio per 1 SD change, 1.66; 95% confidence interval, 1.41 to 1.96; P<0.001) and 6-month change in urinary neutrophil gelatinase-associated lipocalin (adjusted hazard ratio per 1 SD change, 1.07; 95% confidence interval, 1.02 to 1.12; P=0.004) independently associated with CKD progression. A base model for predicting kidney function decline with nine standard risk factors had strong discriminative ability (C-statistic 0.93). The addition of baseline cystatin C improved discrimination (C-statistic 0.94), but it failed to reclassify risk categories of individuals with and without eGFR decline. CONCLUSIONS:The addition of cystatin C or biomarkers of tubular injury did not meaningfully improve the prediction of eGFR decline beyond common clinical factors and routine laboratory data in a large cohort of patients with type 2 diabetes and recent acute coronary syndrome. PODCAST:This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2018_01_16_CJASNPodcast_18_3_G.mp3.",2018-01-16 +26685306,PaxtoolsR: pathway analysis in R using Pathway Commons.,"

Purpose

PaxtoolsR package enables access to pathway data represented in the BioPAX format and made available through the Pathway Commons webservice for users of the R language to aid in advanced pathway analyses. Features include the extraction, merging and validation of pathway data represented in the BioPAX format. This package also provides novel pathway datasets and advanced querying features for R users through the Pathway Commons webservice allowing users to query, extract and retrieve data and integrate these data with local BioPAX datasets.

Availability and implementation

The PaxtoolsR package is compatible with versions of R 3.1.1 (and higher) on Windows, Mac OS X and Linux using Bioconductor 3.0 and is available through the Bioconductor R package repository along with source code and a tutorial vignette describing common tasks, such as data visualization and gene set enrichment analysis. Source code and documentation are at http://www.bioconductor.org/packages/paxtoolsr This plugin is free, open-source and licensed under the LGPL-3.

Contact

paxtools@cbio.mskcc.org or lunaa@cbio.mskcc.org.",2015-12-18 +25097383,IntergenicDB: a database for intergenic sequences.,"

Unlabelled

A whole genome contains not only coding regions, but also non-coding regions. These are located between the end of a given coding region and the beginning of the following coding region. For this reason, the information about gene regulation process underlies in intergenic regions. There is no easy way to obtain intergenic regions from current available databases. IntergenicDB was developed to integrate data of intergenic regions and their gene related information from NCBI databases. The main goal of INTERGENICDB is to offer friendly database for intergenic sequences of bacterial genomes.

Availability

http://intergenicdb.bioinfoucs.com/",2014-06-30 +25667546,Large-scale exploration and analysis of drug combinations.,"

Motivation

Drug combinations are a promising strategy for combating complex diseases by improving the efficacy and reducing corresponding side effects. Currently, a widely studied problem in pharmacology is to predict effective drug combinations, either through empirically screening in clinic or pure experimental trials. However, the large-scale prediction of drug combination by a systems method is rarely considered.

Results

We report a systems pharmacology framework to predict drug combinations (PreDCs) on a computational model, termed probability ensemble approach (PEA), for analysis of both the efficacy and adverse effects of drug combinations. First, a Bayesian network integrating with a similarity algorithm is developed to model the combinations from drug molecular and pharmacological phenotypes, and the predictions are then assessed with both clinical efficacy and adverse effects. It is illustrated that PEA can predict the combination efficacy of drugs spanning different therapeutic classes with high specificity and sensitivity (AUC = 0.90), which was further validated by independent data or new experimental assays. PEA also evaluates the adverse effects (AUC = 0.95) quantitatively and detects the therapeutic indications for drug combinations. Finally, the PreDC database includes 1571 known and 3269 predicted optimal combinations as well as their potential side effects and therapeutic indications.

Availability and implementation

The PreDC database is available at http://sm.nwsuaf.edu.cn/lsp/predc.php.",2015-02-08 +32020986,The Multi-Sensor Advanced Climatology of Liquid Water Path (MAC-LWP).,"The Multi-Sensor Advanced Climatology of Liquid Water Path (MAC-LWP), an updated and enhanced version of the University of Wisconsin (UWisc) cloud liquid water path (CLWP) climatology, currently provides 29 years (1988 - 2016) of monthly gridded (1°) oceanic CLWP information constructed using Remote Sensing Systems (RSS) inter-calibrated 0.25°-resolution retrievals. Satellite sources include SSM/I, TMI, AMSR-E, WindSat, SSMIS, AMSR-2 and GMI. To mitigate spurious CLWP trends, the climatology is corrected for drifting satellite overpass times by simultaneously solving for the monthly average CLWP and monthly-mean diurnal cycle. In addition to a longer record and six additional satellite products, major enhancements relative to the UWisc climatology include updating the input to version 7 RSS retrievals, a correction for a CLWP bias (based on matchups to clear-sky MODIS scenes), and the construction of a total (cloud+rain) liquid water path (TLWP) record for use in analyses of columnar liquid water in raining clouds. Because the microwave emission signal from cloud water is similar to that of precipitation-sized hydrometeors, greater uncertainty in the CLWP record is expected in regions of substantial precipitation. Therefore, the TLWP field can also be used as a quality-control screen, where uncertainty increases as the ratio of CLWP to TLWP decreases. For regions where confidence in CLWP is highest (i.e. CLWP:TLWP > 0.8), systematic differences in MAC CLWP relative to UWisc CLWP range from -15% (e.g. global oceanic stratocumulus decks) to +5-10% (e.g. portions of the higher-latitudes, storm tracks, and shallower convection regions straddling the ITCZ). The dataset is currently hosted at the Goddard Earth Science Data and Information Services Center (http://disc.sci.gsfc.nasa.gov).",2017-12-01 +26743127,A FASTQ compressor based on integer-mapped k-mer indexing for biologist.,"Next generation sequencing (NGS) technologies have gained considerable popularity among biologists. For example, RNA-seq, which provides both genomic and functional information, has been widely used by recent functional and evolutionary studies, especially in non-model organisms. However, storing and transmitting these large data sets (primarily in FASTQ format) have become genuine challenges, especially for biologists with little informatics experience. Data compression is thus a necessity. KIC, a FASTQ compressor based on a new integer-mapped k-mer indexing method, was developed (available at http://www.ysunlab.org/kic.jsp). It offers high compression ratio on sequence data, outstanding user-friendliness with graphic user interfaces, and proven reliability. Evaluated on multiple large RNA-seq data sets from both human and plants, it was found that the compression ratio of KIC had exceeded all major generic compressors, and was comparable to those of the latest dedicated compressors. KIC enables researchers with minimal informatics training to take advantage of the latest sequence compression technologies, easily manage large FASTQ data sets, and reduce storage and transmission cost.",2015-12-30 +28121162,Quorum sensing inhibition in Pseudomonas aeruginosa biofilms: new insights through network mining.,"Quorum sensing plays a pivotal role in Pseudomonas aeruginosa's virulence. This paper reviews experimental results on antimicrobial strategies based on quorum sensing inhibition and discusses current targets in the regulatory network that determines P. aeruginosa biofilm formation and virulence. A bioinformatics framework combining literature mining with information from biomedical ontologies and curated databases was used to create a knowledge network of potential anti-quorum sensing agents for P. aeruginosa. A total of 110 scientific articles, corresponding to 1,004 annotations, were so far included in the network and are analysed in this work. Information on the most studied agents, QS targets and methods is detailed. This knowledge network offers a unique view of existing strategies for quorum sensing inhibition and their main regulatory targets and may be used to readily access otherwise scattered information and to help generate new testable hypotheses. This knowledge network is publicly available at http://pcquorum.org/ .",2017-01-25 +28238542,Aberrant expression of cell cycle and material metabolism related genes contributes to hepatocellular carcinoma occurrence.,"This study aims to deepen our understanding of the molecular mechanism underlying the occurrence of hepatocellular carcinoma (HCC). We first downloaded a gene expression profile dataset GSE29721 (10 HCC and 10 control samples) from Gene Expression Omnibus database (http://www.ncbi.nlm.nih.gov/geo/). Differentially expressed genes (DEGs) were identified by the paired t-test using limma package. Pathway and functional enrichment analyses were performed with DAVID tools. Transcription factors were annotated with TRANSFAC database and tumor associated genes (TAGs) were annotated with TAG and TSGene databases. Protein-protein interaction (PPI) network was conducted using STRING online tool and function module was further identified with BioNet package. Totally, 527 up-regulated DEGs and 587 down-regulated DEGs were identified. GO functional and KEGG pathway enrichment analyses showed that the up-regulated DEGs were mainly related to cell division and cell cycle, while the down-regulated DEGs were largely related to material metabolism, especially secondary metabolism. Proteins encoded by DEGs CDK1, BUB1, CDC20, NCAPG, NDC80, CDCA8, MAD2L1, CCNB1, CCNA2 and BIRC5 were hub genes with high degrees in the PPI network; further module analysis detected a subnetwork consisting of 55 proteins, such as CYP2B6, ACAA1, BHMT and ALDH2. Taken together, aberrant expression of cell cycle related genes (e.g., CDK1, CCNA2, CCNB1, BUB1, MAD2L1 and CDC20) and material metabolism related genes (e.g., CYP2B6, ACAA1, BHMT and ALDH2) may contribute to HCC occurrence.",2017-01-25 +27153693,XIBD: software for inferring pairwise identity by descent on the X chromosome.,

Unlabelled

XIBD performs pairwise relatedness mapping on the X chromosome using dense single nucleotide polymorphism (SNP) data from either SNP chips or next generation sequencing data. It correctly accounts for the difference in chromosomal numbers between males and females and estimates global relatedness as well as regions of the genome that are identical by descent (IBD). XIBD also generates novel graphical summaries of all pairwise IBD tracts for a cohort making it very useful for disease locus mapping.

Availability and implementation

XIBD is written in R/Rcpp and executed from shell scripts that are freely available from http://bioinf.wehi.edu.au/software/XIBD along with accompanying reference datasets.

Contact

henden.l@wehi.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.,2016-03-21 +28874813,"FunctionAnnotator, a versatile and efficient web tool for non-model organism annotation.","

Absatract

Along with the constant improvement in high-throughput sequencing technology, an increasing number of transcriptome sequencing projects are carried out in organisms without decoded genome information and even on environmental biological samples. To study the biological functions of novel transcripts, the very first task is to identify their potential functions. We present a web-based annotation tool, FunctionAnnotator, which offers comprehensive annotations, including GO term assignment, enzyme annotation, domain/motif identification and predictions for subcellular localization. To accelerate the annotation process, we have optimized the computation processes and used parallel computing for all annotation steps. Moreover, FunctionAnnotator is designed to be versatile, and it generates a variety of useful outputs for facilitating other analyses. Here, we demonstrate how FunctionAnnotator can be helpful in annotating non-model organisms. We further illustrate that FunctionAnnotator can estimate the taxonomic composition of environmental samples and assist in the identification of novel proteins by combining RNA-Seq data with proteomics technology. In summary, FunctionAnnotator can efficiently annotate transcriptomes and greatly benefits studies focusing on non-model organisms or metatranscriptomes. FunctionAnnotator, a comprehensive annotation web-service tool, is freely available online at: http://fa.cgu.edu.tw/ . This new web-based annotator will shed light on field studies involving organisms without a reference genome.",2017-09-05 +27738094,"Robust Label-free, Quantitative Profiling of Circulating Plasma Microparticle (MP) Associated Proteins.","Cells of the vascular system release spherical vesicles, called microparticles, in the size range of 0.1-1 μm induced by a variety of stress factors resulting in variable concentrations between health and disease. Furthermore, microparticles have intercellular communication/signaling properties and interfere with inflammation and coagulation pathways. Today's most used analytical technology for microparticle characterization, flow cytometry, is lacking sensitivity and specificity, which might have led to the publication of contradicting results in the past.We propose the use of nano-liquid chromatography two-stage mass spectrometry as a nonbiased tool for quantitative MP proteome analysis.For this, we developed an improved microparticle isolation protocol and quantified the microparticle protein composition of twelve healthy volunteers with a label-free, data-dependent and independent proteomics approach on a quadrupole orbitrap instrument.Using aliquots of 250 μl platelet-free plasma from one individual donor, we achieved excellent reproducibility with an interassay coefficient of variation of 2.7 ± 1.7% (mean ± 1 standard deviation) on individual peptide intensities across 27 acquisitions performed over a period of 3.5 months. We show that the microparticle proteome between twelve healthy volunteers were remarkably similar, and that it is clearly distinguishable from whole cell and platelet lysates. We propose the use of the proteome profile shown in this work as a quality criterion for microparticle purity in proteomics studies. Furthermore, one freeze thaw cycle damaged the microparticle integrity, articulated by a loss of cytoplasm proteins, encompassing a specific set of proteins involved in regulating dynamic structures of the cytoskeleton, and thrombin activation leading to MP clotting. On the other hand, plasma membrane protein composition was unaffected. Finally, we show that multiplexed data-independent acquisition can be used for relative quantification of target proteins using Skyline software. Mass spectrometry data are available via ProteomeXchange (identifier PXD003935) and panoramaweb.org (https://panoramaweb.org/labkey/N1OHMk.url).",2016-10-12 +25252782,Renal Gene Expression Database (RGED): a relational database of gene expression profiles in kidney disease. ,"We present a bioinformatics database named Renal Gene Expression Database (RGED), which contains comprehensive gene expression data sets from renal disease research. The web-based interface of RGED allows users to query the gene expression profiles in various kidney-related samples, including renal cell lines, human kidney tissues and murine model kidneys. Researchers can explore certain gene profiles, the relationships between genes of interests and identify biomarkers or even drug targets in kidney diseases. The aim of this work is to provide a user-friendly utility for the renal disease research community to query expression profiles of genes of their own interest without the requirement of advanced computational skills. Website is implemented in PHP, R, MySQL and Nginx and freely available from http://rged.wall-eva.net. http://rged.wall-eva.net.",2014-09-24 +29475863,Environmental Controls of Oyster-Pathogenic Vibrio spp. in Oregon Estuaries and a Shellfish Hatchery. ,"Vibrio spp. have been a persistent concern for coastal bivalve hatcheries, which are vulnerable to environmental pathogens in the seawater used for rearing larvae, yet the biogeochemical drivers of oyster-pathogenic Vibrio spp. in their planktonic state are poorly understood. Here, we present data tracking oyster-pathogenic Vibrio bacteria in Netarts Bay and Yaquina Bay in Oregon, USA, as well as in adjacent coastal waters and a local shellfish hatchery, through the 2015 upwelling season. Vibrio populations were quantified using a culture-independent approach of high-throughput Vibrio-specific 16S rRNA gene sequencing paired with droplet digital PCR, and abundances were analyzed in the context of local biogeochemistry. The most abundant putative pathogen in our samples was Vibrio coralliilyticus Environmental concentrations of total Vibrio spp. and V. coralliilyticus were highest in Netarts Bay sediment samples and higher in seawater from Netarts Bay than from nearshore coastal waters or Yaquina Bay. In Netarts Bay, the highest V. coralliilyticus concentrations were observed during low tide, and abundances increased throughout the summer. We hypothesize that the warm shallow waters in estuarine mudflats facilitate the local growth of the V. coralliilyticus pathogen. Samples from larval oyster tanks in Whiskey Creek Shellfish Hatchery, which uses seawater pumped directly from Netarts Bay, contained significantly lower total Vibrio species concentrations, but roughly similar V. coralliilyticus concentrations, than did the bay water, resulting in a 30-fold increase in the relative abundance of the V. coralliilyticus pathogen in hatchery tanks. This suggests that the V. coralliilyticus pathogen is able to grow or persist under hatchery conditions.IMPORTANCE It has been argued that oyster-pathogenic Vibrio spp. have contributed to recent mortality events in U.S. shellfish hatcheries (R. A. Elston, H. Hasegawa, K. L. Humphrey, I. K. Polyak, and C. Häse, Dis Aquat Organ 82:119-134, 2008, https://doi.org/10.3354/dao01982); however, these events are often sporadic and unpredictable. The success of hatcheries is critically linked to the chemical and biological composition of inflowing seawater resources; thus, it is pertinent to understand the biogeochemical drivers of oyster-pathogenic Vibrio spp. in their planktonic state. Here, we show that Netarts Bay, the location of a local hatchery, is enriched in oyster-pathogenic V. coralliilyticus compared to coastal seawater, and we hypothesize that conditions in tidal flats promote the local growth of this pathogen. Furthermore, V. coralliilyticus appears to persist in seawater pumped into the local hatchery. These results improve our understanding of the ecology and environmental controls of the V. coralliilyticus pathogen and could be used to improve future aquaculture efforts, as multiple stressors impact hatchery success.",2018-04-16 +28117329,Outcome comparison of different approaches to self-intermittent catheterization in neurogenic patients: a systematic review.,"

Study design

Systematic review (Preferred Reporting Items for Systematic Reviews and Meta-analysis (PRISMA); http://www.prisma-statement.org).

Objectives

Different types of catheters and techniques have been described in the past three decades to identify the best self-intermittent catheterization method. Our aim is to review systematically the literature on the most appropriate material and technique to perform self-intermittent catheterization in the adult neurogenic population.

Methods

A systematic review search was performed through PubMed/Medline, Embase and Cochrane Central Register of Controlled Trials (CENTRAL) databases to study all types of self-intermittent catheters, and analyzing their impact on urinary tract infections (UTIs), urethral trauma, cost-effectiveness, quality of life and patient's satisfaction. We used the following keywords: 'intermittent catheterization/catheterisation', 'neurogenic', 'urinary catheters for intermittent use' and 'urethral catheterization/catheterisation' published by November 2015.

Results

After screening 3768 articles, 31 were included in the final synthesis (level of evidence 1b to 2b). The 2188 trial participants were mainly spinal cord injury adults and women with multiple sclerosis. Hydrophilic-coated catheters tended to decrease the incidence of UTI as well as urethral trauma and improve patient's satisfaction when compared with non-hydrophilic-coated catheters. Similarly, prelubricated catheters were associated with better results in terms of patient satisfaction. Sterile technique seemed to decrease the incidence of recurrent UTI; however, these results are counter-balanced by significantly increasing cost compared with clean catheterization.

Conclusions

The present review demonstrated advantages of hydrophilic-coated catheters in decreasing risk of UTI and urethral trauma as well as improving patient's satisfaction. Prelubricated catheters has been shown to be superior to conventional polyvinyl chloride catheters. Randomized controlled trials comparing hydrophilic and prelubricated catheters must be conducted to assess possible superiority and cost-effectiveness.",2017-01-24 +28839117,High-Resolution Maps of Mouse Reference Populations.,"Genetic reference panels are widely used to map complex, quantitative traits in model organisms. We have generated new high-resolution genetic maps of 259 mouse inbred strains from recombinant inbred strain panels (C57BL/6J × DBA/2J, ILS/IbgTejJ × ISS/IbgTejJ, and C57BL/6J × A/J) and chromosome substitution strain panels (C57BL/6J-Chr#, C57BL/6J-Chr#, and C57BL/6J-Chr#). We genotyped all samples using the Affymetrix Mouse Diversity Array with an average intermarker spacing of 4.3 kb. The new genetic maps provide increased precision in the localization of recombination breakpoints compared to the previous maps. Although the strains were presumed to be fully inbred, we found residual heterozygosity in 40% of individual mice from five of the six panels. We also identified de novo deletions and duplications, in homozygous or heterozygous state, ranging in size from 21 kb to 8.4 Mb. Almost two-thirds (46 out of 76) of these deletions overlap exons of protein coding genes and may have phenotypic consequences. Twenty-nine putative gene conversions were identified in the chromosome substitution strains. We find that gene conversions are more likely to occur in regions where the homologous chromosomes are more similar. The raw genotyping data and genetic maps of these strain panels are available at http://churchill-lab.jax.org/website/MDA.",2017-10-05 +25189745,Occupational exposure to physical agents: the new Italian database for risk assessment and control.,"This article presents the new Italian database of physical agents, which is available at http://www.portaleagentifisici.it. It supports in risk assessment employers who have to comply with Italy's Legislative Decree 81/2008 (transposing into law European Union Directives 2003/10/EC, 2002/44/EC, 2004/40/EC and 2006/25/EC). The database currently contains measurements and declared European Community (EC) values from over 2540 machines; in particular, the database hosts data on mechanical vibration from over 1430 hand-held power tools (e.g., pneumatic and electric hammers, chainsaws, grinders, drills, sanders and saws) and from over 1020 whole-body machines (e.g., buses, fork lifts and wheel tractors). The database is continuously updated as soon as new experimental and declared data are acquired.",2014-01-01 +28398502,PyGOLD: a python based API for docking based virtual screening workflow generation.,"

Motivation

Molecular docking is one of the successful approaches in structure based discovery and development of bioactive molecules in chemical biology and medicinal chemistry. Due to the huge amount of computational time that is still required, docking is often the last step in a virtual screening approach. Such screenings are set as workflows spanned over many steps, each aiming at different filtering task. These workflows can be automatized in large parts using python based toolkits except for docking using the docking software GOLD. However, within an automated virtual screening workflow it is not feasible to use the GUI in between every step to change the GOLD configuration file. Thus, a python module called PyGOLD was developed, to parse, edit and write the GOLD configuration file and to automate docking based virtual screening workflows.

Availability and implementation

The latest version of PyGOLD, its documentation and example scripts are available at: http://www.ccb.tu-dortmund.de/koch or http://www.agkoch.de. PyGOLD is implemented in Python and can be imported as a standard python module without any further dependencies.

Contact

oliver.koch@agkoch.de, oliver.koch@tu-dortmund.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +27570648,Ion Channel ElectroPhysiology Ontology (ICEPO) - a case study of text mining assisted ontology development.,"

Background

Computational modeling of biological cascades is of great interest to quantitative biologists. Biomedical text has been a rich source for quantitative information. Gathering quantitative parameters and values from biomedical text is one significant challenge in the early steps of computational modeling as it involves huge manual effort. While automatically extracting such quantitative information from bio-medical text may offer some relief, lack of ontological representation for a subdomain serves as impedance in normalizing textual extractions to a standard representation. This may render textual extractions less meaningful to the domain experts.

Methods

In this work, we propose a rule-based approach to automatically extract relations involving quantitative data from biomedical text describing ion channel electrophysiology. We further translated the quantitative assertions extracted through text mining to a formal representation that may help in constructing ontology for ion channel events using a rule based approach. We have developed Ion Channel ElectroPhysiology Ontology (ICEPO) by integrating the information represented in closely related ontologies such as, Cell Physiology Ontology (CPO), and Cardiac Electro Physiology Ontology (CPEO) and the knowledge provided by domain experts.

Results

The rule-based system achieved an overall F-measure of 68.93% in extracting the quantitative data assertions system on an independently annotated blind data set. We further made an initial attempt in formalizing the quantitative data assertions extracted from the biomedical text into a formal representation that offers potential to facilitate the integration of text mining into ontological workflow, a novel aspect of this study.

Conclusions

This work is a case study where we created a platform that provides formal interaction between ontology development and text mining. We have achieved partial success in extracting quantitative assertions from the biomedical text and formalizing them in ontological framework.

Availability

The ICEPO ontology is available for download at http://openbionlp.org/mutd/supplementarydata/ICEPO/ICEPO.owl.",2016-07-20 +28817553,"Hypertension Prevalence, Awareness, Treatment, and Control Among Adults Aged ≥18 Years - Los Angeles County, 1999-2006 and 2007-2014.","Hypertension is an important and common risk factor for heart disease and stroke, two of the leading causes of death in adults in the United States. Despite considerable improvement in increasing the awareness, treatment, and control of hypertension, undiagnosed and uncontrolled hypertension remain public health challenges (1). Data from the National Health and Nutrition Examination Survey (NHANES) were used to estimate the prevalence of hypertension, as well as awareness, treatment, and control of hypertension among adults aged ≥18 years in Los Angeles County compared with adults aged ≥18 years in the United States during 1999-2006 and 2007-2014. During 2007-2014, the prevalence of hypertension was 23.1% among adults in Los Angeles County, lower than the prevalence of 29.6% among all U.S. adults. Among adults with hypertension in Los Angeles County, substantial improvements from 1999-2006 to 2007-2014 were found in hypertension awareness (increase from 73.8% to 84.6%), treatment (61.3% to 77.2%), and control (28.5% to 48.3%). Similar improvements were also seen among all U.S. adults. Although the prevalence of hypertension among adults in Los Angeles County meets the Healthy People 2020 (https://www.healthypeople.gov/) goal of ≤26.9%, continued progress is needed to meet the Healthy People 2020 goal of ≥61.2% for control of hypertension.",2017-08-18 +24013925,targetHub: a programmable interface for miRNA-gene interactions.,"

Motivation

With the expansion of high-throughput technologies, understanding different kinds of genome-level data is a common task. MicroRNA (miRNA) is increasingly profiled using high-throughput technologies (microarrays or next-generation sequencing). The downstream analysis of miRNA targets can be difficult. Although there are many databases and algorithms to predict miRNA targets, there are few tools to integrate miRNA-gene interaction data into high-throughput genomic analyses.

Results

We present targetHub, a CouchDB database of miRNA-gene interactions. TargetHub provides a programmer-friendly interface to access miRNA targets. The Web site provides RESTful access to miRNA-gene interactions with an assortment of gene and miRNA identifiers. It can be a useful tool to integrate miRNA target interaction data directly into high-throughput bioinformatics analyses.

Availability

TargetHub is available on the web at http://app1.bioinformatics.mdanderson.org/tarhub/_design/basic/index.html.",2013-09-06 +25258493,VMD-SS: A graphical user interface plug-in to calculate the protein secondary structure in VMD program.,"

Unlabelled

The investigation on the types of secondary structure (SS) of a protein is important. The evolution of secondary structures during molecular dynamics simulations is a useful parameter to analyze protein structures. Therefore, it is of interest to describe VMD-SS (a software program) for the identification of secondary structure elements and its trajectories during simulation for known structures available at the Protein Data Bank (PDB). The program helps to calculate (1) percentage SS, (2) SS occurrence in each residue, (3) percentage SS during simulation, and (4) percentage residues in all SS types during simulation. The VMD-SS plug-in was designed using TCL script and stride to calculate secondary structure features.

Availability

The database is available for free at http://science.scu.ac.ir/HomePage.aspx?TabID=13755.",2014-08-30 +27899586,R-loopDB: a database for R-loop forming sequences (RLFS) and R-loops.,"R-loopDB (http://rloop.bii.a-star.edu.sg) was originally constructed as a collection of computationally predicted R-loop forming sequences (RLFSs) in the human genic regions. The renewed R-loopDB provides updates, improvements and new options, including access to recent experimental data. It includes genome-scale prediction of RLFSs for humans, six other animals and yeast. Using the extended quantitative model of RLFSs (QmRLFS), we significantly increased the number of RLFSs predicted in the human genes and identified RLFSs in other organism genomes. R-loopDB allows searching of RLFSs in the genes and in the 2 kb upstream and downstream flanking sequences of any gene. R-loopDB exploits the Ensembl gene annotation system, providing users with chromosome coordinates, sequences, gene and genomic data of the 1 565 795 RLFSs distributed in 121 056 genic or proximal gene regions of the covered organisms. It provides a comprehensive annotation of Ensembl RLFS-positive genes including 93 454 protein coding genes, 12 480 long non-coding RNA and 7 568 small non-coding RNA genes and 7 554 pseudogenes. Using new interface and genome viewers of R-loopDB, users can search the gene(s) in multiple species with keywords in a single query. R-loopDB provides tools to carry out comparative evolution and genome-scale analyses in R-loop biology.",2016-11-28 +24304897,Plasma Proteome Database as a resource for proteomics research: 2014 update.,"Plasma Proteome Database (PPD; http://www.plasmaproteomedatabase.org/) was initially described in the year 2005 as a part of Human Proteome Organization's (HUPO's) pilot initiative on Human Plasma Proteome Project. Since then, improvements in proteomic technologies and increased throughput have led to identification of a large number of novel plasma proteins. To keep up with this increase in data, we have significantly enriched the proteomic information in PPD. This database currently contains information on 10,546 proteins detected in serum/plasma of which 3784 have been reported in two or more studies. The latest version of the database also incorporates mass spectrometry-derived data including experimentally verified proteotypic peptides used for multiple reaction monitoring assays. Other novel features include published plasma/serum concentrations for 1278 proteins along with a separate category of plasma-derived extracellular vesicle proteins. As plasma proteins have become a major thrust in the field of biomarkers, we have enabled a batch-based query designated Plasma Proteome Explorer, which will permit the users in screening a list of proteins or peptides against known plasma proteins to assess novelty of their data set. We believe that PPD will facilitate both clinical and basic research by serving as a comprehensive reference of plasma proteins in humans and accelerate biomarker discovery and translation efforts.",2013-12-03 +29029258,Acupuncture Improves Peri-menopausal Insomnia: A Randomized Controlled Trial. ,"To evaluate the short-term efficacy of acupuncture for the treatment of peri-menopausal insomnia (PMI). Design: A randomized, participant-blind, placebo-controlled trial consisted of the acupuncture group (n = 38) and placebo-acupuncture group (n = 38). Setting: A tertiary teaching and general hospital. Participants: 76 peri-menopausal women with insomnia disorder based on the International Classification of Sleep Disorders, Third Edition. Interventions: A 10-session of acupuncture at bilateral Shenshu (BL 23) and Ganshu (BL 18) with unilateral Qimen (LR 14) and Jingmen (GB 25) or Streitberger needles at the same acupoints was performed for over 3 weeks. Measurements: Pittsburgh Sleep Quality Index (PSQI) and Insomnia Severity Index (ISI) with over-night polysomnography (PSG) exam were completed at baseline and post-treatment. After the treatments, the decrease from baseline in PSQI score was 8.03 points in acupuncture group and 1.29 points in placebo-acupuncture group. The change from baseline in ISI score was 11.35 points in acupuncture group and 2.87 points in placebo-acupuncture group. In PSG data, acupuncture significantly improved the sleep efficiency and total sleep time, associated with less wake after sleep onset and lower percent stage 1 after the treatment. No significant differences from baseline to post-treatment were found in placebo-acupuncture group. Acupuncture can contribute to a clinically relevant improvement in the short-term treatment of PMI, both subjectively and objectively. Acupuncture for peri-menopause insomnia: a randomized controlled trial, http://www.chictr.org.cn/showproj.aspx?proj=12118 ChiCTR-IPR-15007199, China.",2017-11-01 +27512372,volBrain: An Online MRI Brain Volumetry System.,"The amount of medical image data produced in clinical and research settings is rapidly growing resulting in vast amount of data to analyze. Automatic and reliable quantitative analysis tools, including segmentation, allow to analyze brain development and to understand specific patterns of many neurological diseases. This field has recently experienced many advances with successful techniques based on non-linear warping and label fusion. In this work we present a novel and fully automatic pipeline for volumetric brain analysis based on multi-atlas label fusion technology that is able to provide accurate volumetric information at different levels of detail in a short time. This method is available through the volBrain online web interface (http://volbrain.upv.es), which is publically and freely accessible to the scientific community. Our new framework has been compared with current state-of-the-art methods showing very competitive results.",2016-07-27 +27531105,ImmQuant: a user-friendly tool for inferring immune cell-type composition from gene-expression data.,": The composition of immune-cell subsets is key to the understanding of major diseases and pathologies. Computational deconvolution methods enable researchers to investigate immune cell quantities in complex tissues based on transcriptome data. Here we present ImmQuant, a software tool allowing immunologists to upload transcription profiles of multiple tissue samples, apply deconvolution methodology to predict differences in cell-type quantities between the samples, and then inspect the inferred cell-type alterations using convenient visualization tools. ImmQuant builds on the DCQ deconvolution algorithm and allows a user-friendly utilization of this method by non-bioinformatician researchers. Specifically, it enables investigation of hundreds of immune cell subsets in mouse tissues, as well as a few dozen cell types in human samples.

Availability and implementation

ImmQuant is available for download at http://csgi.tau.ac.il/ImmQuant/ CONTACT: iritgv@post.tau.ac.ilSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-16 +25593347,The 2015 Nucleic Acids Research Database Issue and molecular biology database collection.,"The 2015 Nucleic Acids Research Database Issue contains 172 papers that include descriptions of 56 new molecular biology databases, and updates on 115 databases whose descriptions have been previously published in NAR or other journals. Following the classification that has been introduced last year in order to simplify navigation of the entire issue, these articles are divided into eight subject categories. This year's highlights include RNAcentral, an international community portal to various databases on noncoding RNA; ValidatorDB, a validation database for protein structures and their ligands; SASBDB, a primary repository for small-angle scattering data of various macromolecular complexes; MoonProt, a database of 'moonlighting' proteins, and two new databases of protein-protein and other macromolecular complexes, ComPPI and the Complex Portal. This issue also includes an unusually high number of cancer-related databases and other databases dedicated to genomic basics of disease and potential drugs and drug targets. The size of NAR online Molecular Biology Database Collection, http://www.oxfordjournals.org/nar/database/a/, remained approximately the same, following the addition of 74 new resources and removal of 77 obsolete web sites. The entire Database Issue is freely available online on the Nucleic Acids Research web site (http://nar.oxfordjournals.org/).",2015-01-01 +29067766,MolProbity: More and better reference data for improved all-atom structure validation.,"This paper describes the current update on macromolecular model validation services that are provided at the MolProbity website, emphasizing changes and additions since the previous review in 2010. There have been many infrastructure improvements, including rewrite of previous Java utilities to now use existing or newly written Python utilities in the open-source CCTBX portion of the Phenix software system. This improves long-term maintainability and enhances the thorough integration of MolProbity-style validation within Phenix. There is now a complete MolProbity mirror site at http://molprobity.manchester.ac.uk. GitHub serves our open-source code, reference datasets, and the resulting multi-dimensional distributions that define most validation criteria. Coordinate output after Asn/Gln/His ""flip"" correction is now more idealized, since the post-refinement step has apparently often been skipped in the past. Two distinct sets of heavy-atom-to-hydrogen distances and accompanying van der Waals radii have been researched and improved in accuracy, one for the electron-cloud-center positions suitable for X-ray crystallography and one for nuclear positions. New validations include messages at input about problem-causing format irregularities, updates of Ramachandran and rotamer criteria from the million quality-filtered residues in a new reference dataset, the CaBLAM Cα-CO virtual-angle analysis of backbone and secondary structure for cryoEM or low-resolution X-ray, and flagging of the very rare cis-nonProline and twisted peptides which have recently been greatly overused. Due to wide application of MolProbity validation and corrections by the research community, in Phenix, and at the worldwide Protein Data Bank, newly deposited structures have continued to improve greatly as measured by MolProbity's unique all-atom clashscore.",2017-11-27 +27556597,A hierarchical model for clustering m(6)A methylation peaks in MeRIP-seq data.,"

Background

The recent advent of the state-of-art high throughput sequencing technology, known as Methylated RNA Immunoprecipitation combined with RNA sequencing (MeRIP-seq) revolutionizes the area of mRNA epigenetics and enables the biologists and biomedical researchers to have a global view of N (6)-Methyladenosine (m(6)A) on transcriptome. Yet there is a significant need for new computation tools for processing and analysing MeRIP-Seq data to gain a further insight into the function and m(6)A mRNA methylation.

Results

We developed a novel algorithm and an open source R package ( http://compgenomics.utsa.edu/metcluster ) for uncovering the potential types of m(6)A methylation by clustering the degree of m(6)A methylation peaks in MeRIP-Seq data. This algorithm utilizes a hierarchical graphical model to model the reads account variance and the underlying clusters of the methylation peaks. Rigorous statistical inference is performed to estimate the model parameter and detect the number of clusters. MeTCluster is evaluated on both simulated and real MeRIP-seq datasets and the results demonstrate its high accuracy in characterizing the clusters of methylation peaks. Our algorithm was applied to two different sets of real MeRIP-seq datasets and reveals a novel pattern that methylation peaks with less peak enrichment tend to clustered in the 5' end of both in both mRNAs and lncRNAs, whereas those with higher peak enrichment are more likely to be distributed in CDS and towards the 3'end of mRNAs and lncRNAs. This result might suggest that m(6)A's functions could be location specific.

Conclusions

In this paper, a novel hierarchical graphical model based algorithm was developed for clustering the enrichment of methylation peaks in MeRIP-seq data. MeTCluster is written in R and is publicly available.",2016-08-22 +25647319,PON-P2: prediction method for fast and reliable identification of harmful variants.,"More reliable and faster prediction methods are needed to interpret enormous amounts of data generated by sequencing and genome projects. We have developed a new computational tool, PON-P2, for classification of amino acid substitutions in human proteins. The method is a machine learning-based classifier and groups the variants into pathogenic, neutral and unknown classes, on the basis of random forest probability score. PON-P2 is trained using pathogenic and neutral variants obtained from VariBench, a database for benchmark variation datasets. PON-P2 utilizes information about evolutionary conservation of sequences, physical and biochemical properties of amino acids, GO annotations and if available, functional annotations of variation sites. Extensive feature selection was performed to identify 8 informative features among altogether 622 features. PON-P2 consistently showed superior performance in comparison to existing state-of-the-art tools. In 10-fold cross-validation test, its accuracy and MCC are 0.90 and 0.80, respectively, and in the independent test, they are 0.86 and 0.71, respectively. The coverage of PON-P2 is 61.7% in the 10-fold cross-validation and 62.1% in the test dataset. PON-P2 is a powerful tool for screening harmful variants and for ranking and prioritizing experimental characterization. It is very fast making it capable of analyzing large variant datasets. PON-P2 is freely available at http://structure.bmc.lu.se/PON-P2/.",2015-02-03 +27657141,Microarray Data Processing Techniques for Genome-Scale Network Inference from Large Public Repositories. ,"Pre-processing of microarray data is a well-studied problem. Furthermore, all popular platforms come with their own recommended best practices for differential analysis of genes. However, for genome-scale network inference using microarray data collected from large public repositories, these methods filter out a considerable number of genes. This is primarily due to the effects of aggregating a diverse array of experiments with different technical and biological scenarios. Here we introduce a pre-processing pipeline suitable for inferring genome-scale gene networks from large microarray datasets. We show that partitioning of the available microarray datasets according to biological relevance into tissue- and process-specific categories significantly extends the limits of downstream network construction. We demonstrate the effectiveness of our pre-processing pipeline by inferring genome-scale networks for the model plant Arabidopsis thaliana using two different construction methods and a collection of 11,760 Affymetrix ATH1 microarray chips. Our pre-processing pipeline and the datasets used in this paper are made available at http://alurulab.cc.gatech.edu/microarray-pp.",2016-09-19 +28444139,Multi-label classifier based on histogram of gradients for predicting the anatomical therapeutic chemical class/classes of a given compound.,"

Motivation

Given an unknown compound, is it possible to predict its Anatomical Therapeutic Chemical class/classes? This is a challenging yet important problem since such a prediction could be used to deduce not only a compound's possible active ingredients but also its therapeutic, pharmacological and chemical properties, thereby substantially expediting the pace of drug development. The problem is challenging because some drugs and compounds belong to two or more ATC classes, making machine learning extremely difficult.

Results

In this article a multi-label classifier system is proposed that incorporates information about a compound's chemical-chemical interaction and its structural and fingerprint similarities to other compounds belonging to the different ATC classes. The proposed system reshapes a 1D feature vector to obtain a 2D matrix representation of the compound. This matrix is then described by a histogram of gradients that is fed into a Multi-Label Learning with Label-Specific Features classifier. Rigorous cross-validations demonstrate the superior prediction quality of this method compared with other state-of-the-art approaches developed for this problem, a superiority that is reflected particularly in the absolute true rate, the most important and harshest metric for assessing multi-label systems.

Availability and implementation

The MATLAB code for replicating the experiments presented in this article is available at https://www.dropbox.com/s/7v1mey48tl9bfgz/ToolPaperATC.rar?dl=0 .

Contact

loris.nanni@unipd.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +27307630,Linear effects models of signaling pathways from combinatorial perturbation data.,"

Motivation

Perturbations constitute the central means to study signaling pathways. Interrupting components of the pathway and analyzing observed effects of those interruptions can give insight into unknown connections within the signaling pathway itself, as well as the link from the pathway to the effects. Different pathway components may have different individual contributions to the measured perturbation effects, such as gene expression changes. Those effects will be observed in combination when the pathway components are perturbed. Extant approaches focus either on the reconstruction of pathway structure or on resolving how the pathway components control the downstream effects.

Results

Here, we propose a linear effects model, which can be applied to solve both these problems from combinatorial perturbation data. We use simulated data to demonstrate the accuracy of learning the pathway structure as well as estimation of the individual contributions of pathway components to the perturbation effects. The practical utility of our approach is illustrated by an application to perturbations of the mitogen-activated protein kinase pathway in Saccharomyces cerevisiaeAvailability and Implementation: lem is available as a R package at http://www.mimuw.edu.pl/∼szczurek/lem

Contact

szczurek@mimuw.edu.pl; niko.beerenwinkel@bsse.ethz.ch

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +26271256,DeDaL: Cytoscape 3 app for producing and morphing data-driven and structure-driven network layouts.,"

Background

Visualization and analysis of molecular profiling data together with biological networks are able to provide new mechanistic insights into biological functions. Currently, it is possible to visualize high-throughput data on top of pre-defined network layouts, but they are not always adapted to a given data analysis task. A network layout based simultaneously on the network structure and the associated multidimensional data might be advantageous for data visualization and analysis in some cases.

Results

We developed a Cytoscape app, which allows constructing biological network layouts based on the data from molecular profiles imported as values of node attributes. DeDaL is a Cytoscape 3 app, which uses linear and non-linear algorithms of dimension reduction to produce data-driven network layouts based on multidimensional data (typically gene expression). DeDaL implements several data pre-processing and layout post-processing steps such as continuous morphing between two arbitrary network layouts and aligning one network layout with respect to another one by rotating and mirroring. The combination of all these functionalities facilitates the creation of insightful network layouts representing both structural network features and correlation patterns in multivariate data. We demonstrate the added value of applying DeDaL in several practical applications, including an example of a large protein-protein interaction network.

Conclusions

DeDaL is a convenient tool for applying data dimensionality reduction methods and for designing insightful data displays based on data-driven layouts of biological networks, built within Cytoscape environment. DeDaL is freely available for downloading at http://bioinfo-out.curie.fr/projects/dedal/.",2015-08-14 +28898358,Relevance of the Implementation of Teeth in Three-Dimensional Vocal Tract Models.,"

Purpose

Recently, efforts have been made to investigate the vocal tract using magnetic resonance imaging (MRI). Due to technical limitations, teeth were omitted in many previous studies on vocal tract acoustics. However, the knowledge of how teeth influence vocal tract acoustics might be important in order to estimate the necessity of implementing teeth in vocal tract models. The aim of this study was therefore to estimate the effect of teeth on vocal tract acoustics.

Method

The acoustic properties of 18 solid (3-dimensional printed) vocal tract models without teeth were compared to the same 18 models including teeth in terms of resonance frequencies (fRn). The fRn were obtained from the transfer functions of these models excited by white noise at the glottis level. The models were derived from MRI data of 2 trained singers performing 3 different vowel conditions (/i/, /a/, and /u/) in speech and low-pitched and high-pitched singing.

Results

Depending on the oral configuration, models exhibiting side cavities or side branches were characterized by major changes in the transfer function when teeth were implemented via the introduction of pole-zero pairs.

Conclusions

To avoid errors in modeling, teeth should be included in 3-dimensional vocal tract models for acoustic evaluation.

Supplemental material

https://doi.org/10.23641/asha.5386771.",2017-09-01 +28498899,MAGenTA: a Galaxy implemented tool for complete Tn-Seq analysis and data visualization.,"

Motivation

Transposon insertion sequencing (Tn-Seq) is a microbial systems-level tool, that can determine on a genome-wide scale and in high-throughput, whether a gene, or a specific genomic region, is important for fitness under a specific experimental condition.

Results

Here, we present MAGenTA, a suite of analysis tools which accurately calculate the growth rate for each disrupted gene in the genome to enable the discovery of: (i) new leads for gene function, (ii) non-coding RNAs; (iii) genes, pathways and ncRNAs that are involved in tolerating drugs or induce disease; (iv) higher order genome organization; and (v) host-factors that affect bacterial host susceptibility. MAGenTA is a complete Tn-Seq analysis pipeline making sensitive genome-wide fitness (i.e. growth rate) analysis available for most transposons and Tn-Seq associated approaches (e.g. TraDis, HiTS, IN-Seq) and includes fitness (growth rate) calculations, sliding window analysis, bottleneck calculations and corrections, statistics to compare experiments and strains and genome-wide fitness visualization.

Availability and implementation

MAGenTA is available at the Galaxy public ToolShed repository and all source code can be found and are freely available at https://vanopijnenlab.github.io/MAGenTA/ .

Contact

vanopijn@bc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +27577383,Collection of Medical Original Data with Search Engine for Decision Support.,"Medicine is becoming more and more complex and humans can capture total medical knowledge only partially. For specific access a high resolution search engine is demonstrated, which allows besides conventional text search also search of precise quantitative data of medical findings, therapies and results. Users can define metric spaces (""Domain Spaces"", DSs) with all searchable quantitative data (""Domain Vectors"", DSs). An implementation of the search engine is online in http://numericsearch.com. In future medicine the doctor could make first a rough diagnosis and check which fine diagnostics (quantitative data) colleagues had collected in such a case. Then the doctor decides about fine diagnostics and results are sent (half automatically) to the search engine which filters a group of patients which best fits to these data. In this specific group variable therapies can be checked with associated therapeutic results, like in an individual scientific study for the current patient. The statistical (anonymous) results could be used for specific decision support. Reversely the therapeutic decision (in the best case with later results) could be used to enhance the collection of precise pseudonymous medical original data which is used for better and better statistical (anonymous) search results.",2016-01-01 +21441431,PlaNet: combined sequence and expression comparisons across plant networks derived from seven species.,"The model organism Arabidopsis thaliana is readily used in basic research due to resource availability and relative speed of data acquisition. A major goal is to transfer acquired knowledge from Arabidopsis to crop species. However, the identification of functional equivalents of well-characterized Arabidopsis genes in other plants is a nontrivial task. It is well documented that transcriptionally coordinated genes tend to be functionally related and that such relationships may be conserved across different species and even kingdoms. To exploit such relationships, we constructed whole-genome coexpression networks for Arabidopsis and six important plant crop species. The interactive networks, clustered using the HCCA algorithm, are provided under the banner PlaNet (http://aranet.mpimp-golm.mpg.de). We implemented a comparative network algorithm that estimates similarities between network structures. Thus, the platform can be used to swiftly infer similar coexpressed network vicinities within and across species and can predict the identity of functional homologs. We exemplify this using the PSA-D and chalcone synthase-related gene networks. Finally, we assessed how ontology terms are transcriptionally connected in the seven species and provide the corresponding MapMan term coexpression networks. The data support the contention that this platform will considerably improve transfer of knowledge generated in Arabidopsis to valuable crop species.",2011-03-25 +21516397,Online verification of human cell line identity by STR DNA typing.,"The main prerequisition for any research, development, or production programs involving cell lines is whether a cell line is authentic or not. Microsatellites in the human genome harboring short tandem repeat (STR) DNA markers allow the identification of individual cell lines at the DNA level. Polymerase chain reaction (PCR) amplification of eight highly polymorphic microsatellite STR loci and gender determination have been proven to be the best tools for screening the uniqueness of DNA profiles in an STR database. The main Biological Resource Centers (BRCs), ATCC, DSMZ, JCRB, and RIKEN, have generated large databases of STR cell line profiles for identity control. In cooperation with the Japanese BRCs, DSMZ has piloted the generation of the most comprehensive international reference database, which is linked to a simple search engine for interrogating STR cell line profiles. The tool of online -verification of cell line identities is available on the respective homepages of JCRB and DSMZ ( http://cellbank.nibio.go.jp/cellbank_e.html , http://www.dsmz.de/STRanalysis ). The following sections describe a rapid, practical, inexpensive, and reliable method available to students, technicians, and scientists.",2011-01-01 +25300491,TrypanoCyc: a community-led biochemical pathways database for Trypanosoma brucei.,"The metabolic network of a cell represents the catabolic and anabolic reactions that interconvert small molecules (metabolites) through the activity of enzymes, transporters and non-catalyzed chemical reactions. Our understanding of individual metabolic networks is increasing as we learn more about the enzymes that are active in particular cells under particular conditions and as technologies advance to allow detailed measurements of the cellular metabolome. Metabolic network databases are of increasing importance in allowing us to contextualise data sets emerging from transcriptomic, proteomic and metabolomic experiments. Here we present a dynamic database, TrypanoCyc (http://www.metexplore.fr/trypanocyc/), which describes the generic and condition-specific metabolic network of Trypanosoma brucei, a parasitic protozoan responsible for human and animal African trypanosomiasis. In addition to enabling navigation through the BioCyc-based TrypanoCyc interface, we have also implemented a network-based representation of the information through MetExplore, yielding a novel environment in which to visualise the metabolism of this important parasite.",2014-10-09 +24170806,SporeWeb: an interactive journey through the complete sporulation cycle of Bacillus subtilis.,"Bacterial spores are a continuous problem for both food-based and health-related industries. Decades of scientific research dedicated towards understanding molecular and gene regulatory aspects of sporulation, spore germination and spore properties have resulted in a wealth of data and information. To facilitate obtaining a complete overview as well as new insights concerning this complex and tightly regulated process, we have developed a database-driven knowledge platform called SporeWeb (http://sporeweb.molgenrug.nl) that focuses on gene regulatory networks during sporulation in the Gram-positive bacterium Bacillus subtilis. Dynamic features allow the user to navigate through all stages of sporulation with review-like descriptions, schematic overviews on transcriptional regulation and detailed information on all regulators and the genes under their control. The Web site supports data acquisition on sporulation genes and their expression, regulon network interactions and direct links to other knowledge platforms or relevant literature. The information found on SporeWeb (including figures and tables) can and will be updated as new information becomes available in the literature. In this way, SporeWeb offers a novel, convenient and timely reference, an information source and a data acquisition tool that will aid in the general understanding of the dynamics of the complete sporulation cycle.",2013-10-28 +28844466,Fuzzy mutual information based grouping and new fitness function for PSO in selection of miRNAs in cancer.,"MicroRNAs (miRNA) are one of the important regulators of cell division and also responsible for cancer development. Among the discovered miRNAs, not all are important for cancer detection. In this regard a fuzzy mutual information (FMI) based grouping and miRNA selection method (FMIGS) is developed to identify the miRNAs responsible for a particular cancer. First, the miRNAs are ranked and divided into several groups. Then the most important group is selected among the generated groups. Both the steps viz., ranking of miRNAs and selection of the most relevant group of miRNAs, are performed using FMI. Here the number of groups is automatically determined by the grouping method. After the selection process, redundant miRNAs are removed from the selected set of miRNAs as per user's necessity. In a part of the investigation we proposed a FMI based particle swarm optimization (PSO) method for selecting relevant miRNAs, where FMI is used as a fitness function to determine the fitness of the particles. The effectiveness of FMIGS and FMI based PSO is tested on five data sets and their efficiency in selecting relevant miRNAs are demonstrated. The superior performance of FMIGS to some existing methods are established and the biological significance of the selected miRNAs is observed by the findings of the biological investigation and publicly available pathway analysis tools. The source code related to our investigation is available at http://www.jayanta.droppages.com/FMIGS.html.",2017-08-13 +25632108,CyanOmics: an integrated database of omics for the model cyanobacterium Synechococcus sp. PCC 7002. ,"Cyanobacteria are an important group of organisms that carry out oxygenic photosynthesis and play vital roles in both the carbon and nitrogen cycles of the Earth. The annotated genome of Synechococcus sp. PCC 7002, as an ideal model cyanobacterium, is available. A series of transcriptomic and proteomic studies of Synechococcus sp. PCC 7002 cells grown under different conditions have been reported. However, no database of such integrated omics studies has been constructed. Here we present CyanOmics, a database based on the results of Synechococcus sp. PCC 7002 omics studies. CyanOmics comprises one genomic dataset, 29 transcriptomic datasets and one proteomic dataset and should prove useful for systematic and comprehensive analysis of all those data. Powerful browsing and searching tools are integrated to help users directly access information of interest with enhanced visualization of the analytical results. Furthermore, Blast is included for sequence-based similarity searching and Cluster 3.0, as well as the R hclust function is provided for cluster analyses, to increase CyanOmics's usefulness. To the best of our knowledge, it is the first integrated omics analysis database for cyanobacteria. This database should further understanding of the transcriptional patterns, and proteomic profiling of Synechococcus sp. PCC 7002 and other cyanobacteria. Additionally, the entire database framework is applicable to any sequenced prokaryotic genome and could be applied to other integrated omics analysis projects. Database URL: http://lag.ihb.ac.cn/cyanomics.",2015-01-28 +24608033,The EMPRES-i genetic module: a novel tool linking epidemiological outbreak information and genetic characteristics of influenza viruses.,"Combining epidemiological information, genetic characterization and geomapping in the analysis of influenza can contribute to a better understanding and description of influenza epidemiology and ecology, including possible virus reassortment events. Furthermore, integration of information such as agroecological farming system characteristics can provide new knowledge on risk factors of influenza emergence and spread. Integrating viral characteristics into an animal disease information system is therefore expected to provide a unique tool to trace-and-track particular virus strains; generate clade distributions and spatiotemporal clusters; screen for distribution of viruses with specific molecular markers; identify potential risk factors; and analyze or map viral characteristics related to vaccines used for control and/or prevention. For this purpose, a genetic module was developed within EMPRES-i (FAO's global animal disease information system) linking epidemiological information from influenza events with virus characteristics and enabling combined analysis. An algorithm was developed to act as the interface between EMPRES-i disease event data and publicly available influenza virus sequences in OpenfluDB. This algorithm automatically computes potential links between outbreak event and sequences, which are subsequently manually validated by experts. Subsequently, other virus characteristics such as antiviral resistance can then be associated to outbreak data. To visualize such characteristics on a geographic map, shape files with virus characteristics to overlay on other EMPRES-i map layers (e.g. animal densities) can be generated. The genetic module allows export of associated epidemiological and sequence data for further analysis. FAO has made this tool available for scientists and policy makers. Contributions are expected from users to improve and validate the number of linked influenza events and isolate information as well as the quality of information. Possibilities to interconnect with other influenza sequence databases or to expand the genetic module to other viral diseases (e.g. foot and mouth disease) are being explored. Database OpenfluDB URL: http://openflu.vital-it.ch Database EMPRES-i URL: http://EMPRES-i.fao.org/.",2014-03-06 +29653244,Opioid use following cervical spine surgery: trends and factors associated with long-term use.,"

Background context

Limited or no data exist evaluating risk factors associated with prolonged opioid use following cervical arthrodesis.

Purpose

The objectives of this study were to assess trends in postoperative narcotic use among preoperative opioid users (OUs) versus non-opioid users (NOUs) and to identify factors associated with postoperative narcotic use at 1 year following cervical arthrodesis.

Study design/setting

This is a retrospective observational study.

Patient sample

The patient sample included 17,391 patients (OU: 52.4%) registered in the Humana Inc claims dataset who underwent anterior cervical fusion (ACF) or posterior cervical fusion (PCF) between 2007 and 2015.

Outcome measures

Prolonged opioid usage was defined as narcotic prescription filling at 1 year following cervical arthrodesis.

Methods

Based on preoperative opioid use, patients were identified as an OU (history of narcotic prescription filled within 3 months before surgery) or a NOU (no preoperative prescription). Rates of opioid use were evaluated preoperatively for OU and trended for 1 year postoperatively for both OU and NOU. Multivariable regression techniques investigated factors associated with the use of narcotics at 1 year following ACF and PCF. Based on the model findings, a web-based interactive app was developed to estimate 1-year postoperative risk of using narcotics following cervical arthrodesis (http://neuro-risk.com/opiod-use/ or https://www.neurosurgerycost.com/opioid/opioid_use).

Results

Overall, 87.4% of the patients (n=15,204) underwent ACF, whereas 12.6% (n=2187) underwent PCF. At 1 month following surgery, 47.7% of NOUs and 82% of OUs had a filled opioid prescription. Rates of prescription opioids declined significantly to 7.8% in NOUs versus 50.5% in OUs at 3 months, but plateaued at the 6- to 12-month postoperative period (NOU: 5.7%-6.7%, OU: 44.9%-46.9%). At 1 year, significantly higher narcotic prescription filling rates were observed in OUs compared with NOUs (45.3% vs. 6.3%, p<.001). Preoperative opioid use was a significant driver of 1-year narcotic use following ACF (odds ratio [OR]: 7.02, p<.001) and PCF (OR: 6.98, p<.001), along with younger age (≤50 years), history of drug dependence, and lower back pain.

Conclusions

Over 50% of the patients used opioids before cervical arthrodesis. Postoperative opioid use fell dramatically during the first 3 months in NOU, but nearly half of the preoperative OUs will remain on narcotics at 1 year postoperatively. Our findings serve as a baseline in identifying patients at risk of chronic use and encourage discontinuation of opioids before cervical spine surgery.",2018-04-10 +25037308,Text-mining-assisted biocuration workflows in Argo. ,"Biocuration activities have been broadly categorized into the selection of relevant documents, the annotation of biological concepts of interest and identification of interactions between the concepts. Text mining has been shown to have a potential to significantly reduce the effort of biocurators in all the three activities, and various semi-automatic methodologies have been integrated into curation pipelines to support them. We investigate the suitability of Argo, a workbench for building text-mining solutions with the use of a rich graphical user interface, for the process of biocuration. Central to Argo are customizable workflows that users compose by arranging available elementary analytics to form task-specific processing units. A built-in manual annotation editor is the single most used biocuration tool of the workbench, as it allows users to create annotations directly in text, as well as modify or delete annotations created by automatic processing components. Apart from syntactic and semantic analytics, the ever-growing library of components includes several data readers and consumers that support well-established as well as emerging data interchange formats such as XMI, RDF and BioC, which facilitate the interoperability of Argo with other platforms or resources. To validate the suitability of Argo for curation activities, we participated in the BioCreative IV challenge whose purpose was to evaluate Web-based systems addressing user-defined biocuration tasks. Argo proved to have the edge over other systems in terms of flexibility of defining biocuration tasks. As expected, the versatility of the workbench inevitably lengthened the time the curators spent on learning the system before taking on the task, which may have affected the usability of Argo. The participation in the challenge gave us an opportunity to gather valuable feedback and identify areas of improvement, some of which have already been introduced. Database URL: http://argo.nactem.ac.uk.",2014-07-18 +23664230,Application of the Bayesian approach for derivation of PDFs for concentration ratio values.,"Concentration ratios (CRs) are used to derive activity concentrations in wild plants and animals. Usually, compilations of CR values encompass a wide range of element-organism combinations, extracted from different studies with statistical information reported at varying degrees of detail. To produce a more robust estimation of distribution parameters, data from different studies are normally pooled using classical statistical methods. However, there is inherent subjectivity involved in pooling CR data in the sense that there is a tacit assumption that the CRs under any arbitrarily defined biota category belong to the same population. Here, Bayesian inference has been introduced as an alternative way of making estimates of distribution parameters of CRs. This approach, in contrast to classical methods, is more flexible and also allows us to define the various assumptions required, when combining data, in a more explicit manner. Taking selected data from the recently compiled wildlife transfer database (http://www.wildlifetransferdatabase.org/) as a working example, attempts are made to refine the pooling approaches previously used and to consider situations when empirical data are limited.",2013-05-10 +29019671,Deep Learning Based Regression and Multiclass Models for Acute Oral Toxicity Prediction with Automatic Chemical Feature Extraction.,"Median lethal death, LD50, is a general indicator of compound acute oral toxicity (AOT). Various in silico methods were developed for AOT prediction to reduce costs and time. In this study, we developed an improved molecular graph encoding convolutional neural networks (MGE-CNN) architecture to construct three types of high-quality AOT models: regression model (deepAOT-R), multiclassification model (deepAOT-C), and multitask model (deepAOT-CR). These predictive models highly outperformed previously reported models. For the two external data sets containing 1673 (test set I) and 375 (test set II) compounds, the R2 and mean absolute errors (MAEs) of deepAOT-R on the test set I were 0.864 and 0.195, and the prediction accuracies of deepAOT-C were 95.5% and 96.3% on test sets I and II, respectively. The two external prediction accuracies of deepAOT-CR are 95.0% and 94.1%, while the R2 and MAE are 0.861 and 0.204 for test set I, respectively. We then performed forward and backward exploration of deepAOT models for deep fingerprints, which could support shallow machine learning methods more efficiently than traditional fingerprints or descriptors. We further performed automatic feature learning, a key essence of deep learning, to map the corresponding activation values into fragment space and derive AOT-related chemical substructures by reverse mining of the features. Our deep learning architecture for AOT is generally applicable in predicting and exploring other toxicity or property end points of chemical compounds. The two deepAOT models are freely available at http://repharma.pku.edu.cn/DLAOT/DLAOThome.php or http://www.pkumdl.cn/DLAOT/DLAOThome.php .",2017-10-27 +24170807,The YEASTRACT database: an upgraded information system for the analysis of gene and genomic transcription regulation in Saccharomyces cerevisiae.,"The YEASTRACT (http://www.yeastract.com) information system is a tool for the analysis and prediction of transcription regulatory associations in Saccharomyces cerevisiae. Last updated in June 2013, this database contains over 200,000 regulatory associations between transcription factors (TFs) and target genes, including 326 DNA binding sites for 113 TFs. All regulatory associations stored in YEASTRACT were revisited and new information was added on the experimental conditions in which those associations take place and on whether the TF is acting on its target genes as activator or repressor. Based on this information, new queries were developed allowing the selection of specific environmental conditions, experimental evidence or positive/negative regulatory effect. This release further offers tools to rank the TFs controlling a gene or genome-wide response by their relative importance, based on (i) the percentage of target genes in the data set; (ii) the enrichment of the TF regulon in the data set when compared with the genome; or (iii) the score computed using the TFRank system, which selects and prioritizes the relevant TFs by walking through the yeast regulatory network. We expect that with the new data and services made available, the system will continue to be instrumental for yeast biologists and systems biology researchers.",2013-10-28 +28099838,Osteoarthritis Year in Review 2016: biomarkers (biochemical markers).,"

Purpose

The aim of this ""Year in Review"" article is to summarize and discuss the implications of biochemical marker related articles published between the Osteoarthritis Research Society International (OARSI) 2015 Congress in Seattle and the OARSI 2016 Congress in Amsterdam.

Methods

The PubMed/MEDLINE bibliographic database was searched using the combined keywords: 'biomarker' and 'osteoarthritis'. The PubMed/MEDLINE literature search was conducted using the Advanced Search Builder function (http://www.ncbi.nlm.nih.gov/pubmed/advanced).

Results

Over two hundred new biomarker-related papers were published during the literature search period. Some papers identified new biomarkers whereas others explored the biological properties and clinical utility of existing markers. There were specific references to several adipocytokines including leptin and adiponectin. ADAM Metallopeptidase with Thrombospondin Type 1 motif 4 (ADAMTS-4) and aggrecan ARGS neo-epitope fragment (ARGS) in synovial fluid (SF) and plasma chemokine (CeC motif) ligand 3 (CCL3) were reported as potential new knee biomarkers. New and refined proteomic technologies and novel assays including a fluoro-microbead guiding chip (FMGC) for measuring C-telopeptide of type II collagen (CTX-II) in serum and urine and a novel magnetic nanoparticle-based technology (termed magnetic capture) for collecting and concentrating CTX-II, were described this past year.

Conclusion

There has been steady progress in osteoarthritis (OA) biomarker research in 2016. Several novel biomarkers were identified and new technologies have been developed for measuring existing biomarkers. However, there has been no ""quantum leap"" this past year and identification of novel early OA biomarkers remains challenging. During the past year, OARSI published a set of recommendations for the use of soluble biomarkers in clinical trials, which is a major step forward in the clinical use of OA biomarkers and bodes well for future OA biomarker development.",2017-01-16 +28093075,GAVIN: Gene-Aware Variant INterpretation for medical sequencing.,"We present Gene-Aware Variant INterpretation (GAVIN), a new method that accurately classifies variants for clinical diagnostic purposes. Classifications are based on gene-specific calibrations of allele frequencies from the ExAC database, likely variant impact using SnpEff, and estimated deleteriousness based on CADD scores for >3000 genes. In a benchmark on 18 clinical gene sets, we achieve a sensitivity of 91.4% and a specificity of 76.9%. This accuracy is unmatched by 12 other tools. We provide GAVIN as an online MOLGENIS service to annotate VCF files and as an open source executable for use in bioinformatic pipelines. It can be found at http://molgenis.org/gavin .",2017-01-16 +26761734,Generalized Canonical Time Warping.,"Temporal alignment of human motion has been of recent interest due to its applications in animation, tele-rehabilitation and activity recognition. This paper presents generalized canonical time warping (GCTW), an extension of dynamic time warping (DTW) and canonical correlation analysis (CCA) for temporally aligning multi-modal sequences from multiple subjects performing similar activities. GCTW extends previous work on DTW and CCA in several ways: (1) it combines CCA with DTW to align multi-modal data (e.g., video and motion capture data); (2) it extends DTW by using a linear combination of monotonic functions to represent the warping path, providing a more flexible temporal warp. Unlike exact DTW, which has quadratic complexity, we propose a linear time algorithm to minimize GCTW. (3) GCTW allows simultaneous alignment of multiple sequences. Experimental results on aligning multi-modal data, facial expressions, motion capture data and video illustrate the benefits of GCTW. The code is available at http://humansensing.cs.cmu.edu/ctw.",2016-02-01 +29723168,"Malaria Surveillance - United States, 2015.","

Problem/condition

Malaria in humans is caused by intraerythrocytic protozoa of the genus Plasmodium. These parasites are transmitted by the bite of an infective female Anopheles species mosquito. The majority of malaria infections in the United States occur among persons who have traveled to regions with ongoing malaria transmission. However, malaria is occasionally acquired by persons who have not traveled out of the country through exposure to infected blood products, congenital transmission, laboratory exposure, or local mosquitoborne transmission. Malaria surveillance in the United States is conducted to provide information on its occurrence (e.g., temporal, geographic, and demographic), guide prevention and treatment recommendations for travelers and patients, and facilitate transmission control measures if locally acquired cases are identified.

Period covered

This report summarizes confirmed malaria cases in persons with onset of illness in 2015 and summarizes trends in previous years.

Description of system

Malaria cases diagnosed by blood film microscopy, polymerase chain reaction, or rapid diagnostic tests are reported to local and state health departments by health care providers or laboratory staff members. Case investigations are conducted by local and state health departments, and reports are transmitted to CDC through the National Malaria Surveillance System (NMSS), the National Notifiable Diseases Surveillance System (NNDSS), or direct CDC consultations. CDC reference laboratories provide diagnostic assistance and conduct antimalarial drug resistance marker testing on blood samples submitted by health care providers or local or state health departments. This report summarizes data from the integration of all NMSS and NNDSS cases, CDC reference laboratory reports, and CDC clinical consultations.

Results

CDC received reports of 1,517 confirmed malaria cases, including one congenital case, with an onset of symptoms in 2015 among persons who received their diagnoses in the United States. Although the number of malaria cases diagnosed in the United States has been increasing since the mid-1970s, the number of cases decreased by 208 from 2014 to 2015. Among the regions of acquisition (Africa, West Africa, Asia, Central America, the Caribbean, South America, Oceania, and the Middle East), the only region with significantly fewer imported cases in 2015 compared with 2014 was West Africa (781 versus 969). Plasmodium falciparum, P. vivax, P. ovale, and P. malariae were identified in 67.4%, 11.7%, 4.1%, and 3.1% of cases, respectively. Less than 1% of patients were infected by two species. The infecting species was unreported or undetermined in 12.9% of cases. CDC provided diagnostic assistance for 13.1% of patients with confirmed cases and tested 15.0% of P. falciparum specimens for antimalarial resistance markers. Of the U.S. resident patients who reported purpose of travel, 68.4% were visiting friends or relatives. A lower proportion of U.S. residents with malaria reported taking any chemoprophylaxis in 2015 (26.5%) compared with 2014 (32.5%), and adherence was poor in this group. Among the U.S residents for whom information on chemoprophylaxis use and travel region were known, 95.3% of patients with malaria did not adhere to or did not take a CDC-recommended chemoprophylaxis regimen. Among women with malaria, 32 were pregnant, and none had adhered to chemoprophylaxis. A total of 23 malaria cases occurred among U.S. military personnel in 2015. Three cases of malaria were imported from the approximately 3,000 military personnel deployed to an Ebola-affected country; two of these were not P. falciparum species, and one species was unspecified. Among all reported cases in 2015, 17.1% were classified as severe illnesses and 11 persons died, compared with an average of 6.1 deaths per year during 2000-2014. In 2015, CDC received 153 P. falciparum-positive samples for surveillance of antimalarial resistance markers (although certain loci were untestable for some samples); genetic polymorphisms associated with resistance to pyrimethamine were identified in 132 (86.3%), to sulfadoxine in 112 (73.7%), to chloroquine in 48 (31.4%), to mefloquine in six (4.3%), and to artemisinin in one (<1%), and no sample had resistance to atovaquone. Completion of data elements on the malaria case report form decreased from 2014 to 2015 and remains low, with 24.2% of case report forms missing at least one key element (species, travel history, and resident status).

Interpretation

The decrease in malaria cases from 2014 to 2015 is associated with a decrease in imported cases from West Africa. This finding might be related to altered or curtailed travel to Ebola-affected countries in in this region. Despite progress in reducing malaria worldwide, the disease remains endemic in many regions, and the use of appropriate prevention measures by travelers is still inadequate.

Public health actions

The best way to prevent malaria is to take chemoprophylaxis medication during travel to a country where malaria is endemic. As demonstrated by the U.S. military during the Ebola response, use of chemoprophylaxis and other protection measures is possible in stressful environments, and this can prevent malaria, especially P. falciparum, even in high transmission areas. Detailed recommendations for preventing malaria are available to the general public at the CDC website (https://www.cdc.gov/malaria/travelers/drugs.html). Malaria infections can be fatal if not diagnosed and treated promptly with antimalarial medications appropriate for the patient's age and medical history, the likely country of malaria acquisition, and previous use of antimalarial chemoprophylaxis. Health care providers should consult the CDC Guidelines for Treatment of Malaria in the United States and contact the CDC's Malaria Hotline for case management advice when needed. Malaria treatment recommendations are available online (https://www.cdc.gov/malaria/diagnosis_treatment) and from the Malaria Hotline (770-488-7788 or toll-free at 855-856-4713). Persons submitting malaria case reports (care providers, laboratories, and state and local public health officials) should provide complete information because incomplete reporting compromises case investigations and efforts to prevent infections and examine trends in malaria cases. Compliance with recommended malaria prevention strategies is low among U.S. travelers visiting friends and relatives. Evidence-based prevention strategies that effectively target travelers who are visiting friends and relatives need to be developed and implemented to reduce the numbers of imported malaria cases in the United States. Molecular surveillance of antimalarial drug resistance markers (https://www.cdc.gov/malaria/features/ars.html) has enabled CDC to track, guide treatment, and manage drug resistance in malaria parasites both domestically and internationally. More samples are needed to improve the completeness of antimalarial drug resistance marker analysis; therefore, CDC requests that blood specimens be submitted for all cases diagnosed in the United States.",2018-05-04 +25725057,Determining similarity of scientific entities in annotation datasets. ,"Linked Open Data initiatives have made available a diversity of scientific collections where scientists have annotated entities in the datasets with controlled vocabulary terms from ontologies. Annotations encode scientific knowledge, which is captured in annotation datasets. Determining relatedness between annotated entities becomes a building block for pattern mining, e.g. identifying drug-drug relationships may depend on the similarity of the targets that interact with each drug. A diversity of similarity measures has been proposed in the literature to compute relatedness between a pair of entities. Each measure exploits some knowledge including the name, function, relationships with other entities, taxonomic neighborhood and semantic knowledge. We propose a novel general-purpose annotation similarity measure called 'AnnSim' that measures the relatedness between two entities based on the similarity of their annotations. We model AnnSim as a 1-1 maximum weight bipartite match and exploit properties of existing solvers to provide an efficient solution. We empirically study the performance of AnnSim on real-world datasets of drugs and disease associations from clinical trials and relationships between drugs and (genomic) targets. Using baselines that include a variety of measures, we identify where AnnSim can provide a deeper understanding of the semantics underlying the relatedness of a pair of entities or where it could lead to predicting new links or identifying potential novel patterns. Although AnnSim does not exploit knowledge or properties of a particular domain, its performance compares well with a variety of state-of-the-art domain-specific measures. Database URL: http://www.yeastgenome.org/",2015-02-27 +27764668,Publicly Available Data Provide Evidence against NR1H3 R415Q Causing Multiple Sclerosis.,"It has recently been reported that an NR1H3 missense variant, R415Q, causes a novel familial form of multiple sclerosis (Wang et al., 2016a). This claim is at odds with publicly available data from the Exome Aggregation Consortium (ExAC; http://exac.broadinstitute.org). The allele frequency of R415Q is not significantly higher in cases (0.024%-0.049%) than in ExAC population controls (0.031%), whereas if R415Q conferred even 50% lifetime risk of developing MS, it would be hundreds of times more common in cases than in controls. The upper bound of the 95% confidence interval of penetrance for R415Q can be estimated at 2.2% for women and 1.2% for men, indicating that even if this variant is disease associated, individuals harboring the variant would have a lifetime risk of developing MS no higher than a few percent. ExAC data should be considered when evaluating claims of variant pathogenicity. This Matters Arising paper is in response to Wang et al. (2016a), published in Neuron. See also the related Matters Arising paper by The International Multiple Sclerosis Genetics Consortium (2016) and the response by Wang et al. (2016b), published in this issue.",2016-10-01 +29467106,"Ultrafine and Fine Particle Number and Surface Area Concentrations and Daily Cause-Specific Mortality in the Ruhr Area, Germany, 2009-2014.","BACKGROUND:Although epidemiologic studies have shown associations between particle mass and daily mortality, evidence on other particle metrics is weak. OBJECTIVES:We investigated associations of size-specific particle number concentration (PNC) and lung-deposited particle surface area concentration (PSC) with cause-specific daily mortality in contrast to PM10. METHODS:We used time-series data (March 2009-December 2014) on daily natural, cardiovascular, and respiratory mortality (NM, CVM, RM) of three adjacent cities in the Ruhr Area, Germany. Size-specific PNC (electric mobility diameter of 13.3-750 nm), PSC, and PM10 were measured at an urban background monitoring site. In single- and multipollutant Poisson regression models, we estimated percentage change (95% confidence interval) [% (95% CI)] in mortality per interquartile range (IQR) in exposure at single-day (0-7) and aggregated lags (0-1, 2-3, 4-7), accounting for time trend, temperature, humidity, day of week, holidays, period of seasonal population decrease, and influenza. RESULTS:PNC100-750 and PSC were highly correlated and had similar immediate (lag0-1) and delayed (lag4-7) associations with NM and CVM, for example, 1.12% (95% CI: 0.09, 2.33) and 1.56% (95% CI: 0.22, 2.92) higher NM with IQR increases in PNC100-750 at lag0-1 and lag4-7, respectfully, which were slightly stronger then associations with IQR increases in PM10. Positive associations between PNC and NM were strongest for accumulation mode particles (PNC 100-500 nm), and for larger UFPs (PNC 50-100 nm). Associations between NM and PNC<100 changed little after adjustment for O3 or PM10, but were more sensitive to adjustment for NO2. CONCLUSION:Size-specific PNC (50-500 nm) and lung-deposited PSC were associated with natural and cardiovascular mortality in the Ruhr Area. Although associations were similar to those estimated for an IQR increase in PM10, particle number size distributions can be linked to emission sources, and thus may be more informative for potential public health interventions. Moreover, PSC could be used as an alternative metric that integrates particle size distribution as well as deposition efficiency. https://doi.org/10.1289/EHP2054.",2018-02-15 +27153613,Quality control of single-cell RNA-seq by SinQC.,"

Unlabelled

Single-cell RNA-seq (scRNA-seq) is emerging as a promising technology for profiling cell-to-cell variability in cell populations. However, the combination of technical noise and intrinsic biological variability makes detecting technical artifacts in scRNA-seq samples particularly challenging. Proper detection of technical artifacts is critical to prevent spurious results during downstream analysis. In this study, we present 'Single-cell RNA-seq Quality Control' (SinQC), a method and software tool to detect technical artifacts in scRNA-seq samples by integrating both gene expression patterns and data quality information. We apply SinQC to nine different scRNA-seq datasets, and show that SinQC is a useful tool for controlling scRNA-seq data quality.

Availability and implementation

SinQC software and documents are available at http://www.morgridge.net/SinQC.html

Contacts

: PJiang@morgridge.org or RStewart@morgridge.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-10 +25512614,Anti-peptide monoclonal antibodies generated for immuno-multiple reaction monitoring-mass spectrometry assays have a high probability of supporting Western blot and ELISA.,"Immunoaffinity enrichment of peptides coupled to targeted, multiple reaction monitoring-mass spectrometry (immuno-MRM) has recently been developed for quantitative analysis of peptide and protein expression. As part of this technology, antibodies are generated to short, linear, tryptic peptides that are well-suited for detection by mass spectrometry. Despite its favorable analytical performance, a major obstacle to widespread adoption of immuno-MRM is a lack of validated affinity reagents because commercial antibody suppliers are reluctant to commit resources to producing anti-peptide antibodies for immuno-MRM while the market is much larger for conventional technologies, especially Western blotting and ELISA. Part of this reluctance has been the concern that affinity reagents generated to short, linear, tryptic peptide sequences may not perform well in traditional assays that detect full-length proteins. In this study, we test the feasibility and success rates of generating immuno-MRM monoclonal antibodies (mAbs) (targeting tryptic peptide antigens) that are also compatible with conventional, protein-based immuno-affinity technologies. We generated 40 novel, peptide immuno-MRM assays and determined that the cross-over success rates for using immuno-MRM monoclonals for Western blotting is 58% and for ELISA is 43%, which compare favorably to cross-over success rates amongst conventional immunoassay technologies. These success rates could most likely be increased if conventional and immuno-MRM antigen design strategies were combined, and we suggest a workflow for such a comprehensive approach. Additionally, the 40 novel immuno-MRM assays underwent fit-for-purpose analytical validation, and all mAbs and assays have been made available as a resource to the community via the Clinical Proteomic Tumor Analysis Consortium's (CPTAC) Antibody (http://antibodies.cancer.gov) and Assay Portals (http://assays.cancer.gov), respectively. This study also represents the first determination of the success rate (92%) for generating mAbs for immuno-MRM using a recombinant B cell cloning approach, which is considerably faster than the traditional hybridoma approach.",2014-12-15 +21593420,Genome-wide network model capturing seed germination reveals coordinated regulation of plant cellular phase transitions.,"Seed germination is a complex trait of key ecological and agronomic significance. Few genetic factors regulating germination have been identified, and the means by which their concerted action controls this developmental process remains largely unknown. Using publicly available gene expression data from Arabidopsis thaliana, we generated a condition-dependent network model of global transcriptional interactions (SeedNet) that shows evidence of evolutionary conservation in flowering plants. The topology of the SeedNet graph reflects the biological process, including two state-dependent sets of interactions associated with dormancy or germination. SeedNet highlights interactions between known regulators of this process and predicts the germination-associated function of uncharacterized hub nodes connected to them with 50% accuracy. An intermediate transition region between the dormancy and germination subdomains is enriched with genes involved in cellular phase transitions. The phase transition regulators SERRATE and EARLY FLOWERING IN SHORT DAYS from this region affect seed germination, indicating that conserved mechanisms control transitions in cell identity in plants. The SeedNet dormancy region is strongly associated with vegetative abiotic stress response genes. These data suggest that seed dormancy, an adaptive trait that arose evolutionarily late, evolved by coopting existing genetic pathways regulating cellular phase transition and abiotic stress. SeedNet is available as a community resource (http://vseed.nottingham.ac.uk) to aid dissection of this complex trait and gene function in diverse processes.",2011-05-18 +24636710,Operative versus conservative treatment for patellar dislocation: a meta-analysis of 7 randomized controlled trials.,"

Purpose

Patellofemoral pathology is common, and patellofemoral dislocation mainly affects adolescents and young adults. We conducted a meta-analysis exclusively of RCTs to compare the clinical outcomes of patellar dislocation patients managed operatively versus non-operatively.

Methods

After systematic review of electronic databases and websites, a total of 7 RCTs reporting data on 402 subjects were included. The methodological quality of the literature was assessed using the PEDro critical appraisal tool. Mean differences (MDs) and risk ratio (RR) were calculated for the pooled effects. Heterogeneity was assessed using the I2 test.

Results

Data synthesis showed a lower rate of recurrent patellar dislocation post-treatment in patients managed operatively compared to non-operatively (P=0.01).

Conclusion

The results suggest a difference in outcomes between the treatment strategies. However the limited number of studies and high risk of inherent bias indicate that future studies involving more patients in better-designed randomized controlled trials will be required.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/8011948721221355.",2014-03-18 +27818264,Convex recoloring as an evolutionary marker.,"With the availability of enormous quantities of genetic data it has become common to construct very accurate trees describing the evolutionary history of the species under study, as well as every single gene of these species. These trees allow us to examine the evolutionary compliance of given markers (characters). A marker compliant with the history of the species investigated, has undergone mutations along the species tree branches, such that every subtree of that tree exhibits a different state. Convex recoloring (CR) uses combinatorial representation to measure the adequacy of a taxonomic classifier to a given tree. Despite its biological origins, research on CR has been almost exclusively dedicated to mathematical properties of the problem, or variants of it with little, if any, relationship to taxonomy. In this work we return to the origins of CR. We put CR in a statistical framework and introduce and learn the notion of the statistical significance of a character. We apply this measure to two data sets - Passerine birds and prokaryotes, and four examples. These examples demonstrate various applications of CR, from evolutionary relatedness, through lateral evolution, to supertree construction. The above study was done with a new software that we provide, containing algorithmic improvement with a graphical output of a (optimally) recolored tree.

Availability

A code implementing the features and a README is available at http://research.haifa.ac.il/ssagi/software/convexrecoloring.zip.",2016-11-03 +28291757,CheckMyMetal: a macromolecular metal-binding validation tool.,"Metals are essential in many biological processes, and metal ions are modeled in roughly 40% of the macromolecular structures in the Protein Data Bank (PDB). However, a significant fraction of these structures contain poorly modeled metal-binding sites. CheckMyMetal (CMM) is an easy-to-use metal-binding site validation server for macromolecules that is freely available at http://csgid.org/csgid/metal_sites. The CMM server can detect incorrect metal assignments as well as geometrical and other irregularities in the metal-binding sites. Guidelines for metal-site modeling and validation in macromolecules are illustrated by several practical examples grouped by the type of metal. These examples show CMM users (and crystallographers in general) problems they may encounter during the modeling of a specific metal ion.",2017-02-22 +28859831,Automatic reconstruction of the muscle architecture from the superficial layer fibres data.,"

Background and objective

Physiological cross-sectional area (PCSA) of a muscle plays a significant role in determining the force contribution of muscle fascicles to skeletal movement. This parameter is typically calculated from the lengths of muscle fibres selectively sampled from the superficial layer of the muscle. However, recent studies have found that the length of fibres in the superficial layer often differs significantly (p < 0.5) from the length of fibres in the deep layer. As a result, PCSA estimation is inaccurate. In this paper, we propose a method to automatically reconstruct fibres in the whole volume of a muscle from those selectively sampled on the superficial layer.

Methods

The method performs a centripetal Catmull-Rom interpolation of the input fibres within the volume of a muscle represented by its 3D surface model, automatically distributing the fibres among multiple heads of the muscle and shortening the deep fibres to support large attachment areas with extremely acute angles.

Results

Our C++ implementation runs in a couple of seconds on commodity hardware providing realistic results for both artificial and real data sets we tested.

Conclusions

The fibres produced by the method can be used directly to determine the personalised mechanical muscle functioning. Our implementation is publicly available for the researchers at https://mi.kiv.zcu.cz/.",2017-08-08 +27393799,Using meta-differential evolution to enhance a calculation of a continuous blood glucose level.,"We developed a new model of glucose dynamics. The model calculates blood glucose level as a function of transcapillary glucose transport. In previous studies, we validated the model with animal experiments. We used analytical method to determine model parameters. In this study, we validate the model with subjects with type 1 diabetes. In addition, we combine the analytic method with meta-differential evolution. To validate the model with human patients, we obtained a data set of type 1 diabetes study that was coordinated by Jaeb Center for Health Research. We calculated a continuous blood glucose level from continuously measured interstitial fluid glucose level. We used 6 different scenarios to ensure robust validation of the calculation. Over 96% of calculated blood glucose levels fit A+B zones of the Clarke Error Grid. No data set required any correction of model parameters during the time course of measuring. We successfully verified the possibility of calculating a continuous blood glucose level of subjects with type 1 diabetes. This study signals a successful transition of our research from an animal experiment to a human patient. Researchers can test our model with their data on-line at https://diabetes.zcu.cz.",2016-05-25 +28676654,Identifying Human SIRT1 Substrates by Integrating Heterogeneous Information from Various Sources.,"Most proteins undergo different kinds of modification after translation. Protein acetylation is one of the most crucial post-translational modifications, which causes direct or indirect impact on various biological activities in vivo. As a member of Class III HDACs, SIRT1 was the closest one to the yeast sir2 and drew most attention, while a small number of known SIRT1 substrates caused difficulties to clarify its function. In this work, we designed a novel computational method to screen SIRT1 substrates based on manually collected data and Support Vector Machines (SVMs). Unlike other approaches, we took both primary sequence and protein functional features into consideration. Through integrating functional features, the Matthews correlation coefficient (MCC) for the prediction increased from 0.10 to 0.65. The prediction results were verified by independent dataset and biological experiments. The validation results demostrated that our classifier could effectively identify SIRT1 substrates and filter appropriate candidates for further research. Furthermore, we provide online tool to support SIRT1 substrates prediction, which is freely available at http://bioinfo.bjmu.edu.cn/huac/ .",2017-07-04 +26635141,rcellminer: exploring molecular profiles and drug response of the NCI-60 cell lines in R.,"

Purpose

The rcellminer R package provides a wide range of functionality to help R users access and explore molecular profiling and drug response data for the NCI-60. The package enables flexible programmatic access to CellMiner's unparalleled breadth of NCI-60 data, including gene and protein expression, copy number, whole exome mutations, as well as activity data for ∼21K compounds, with information on their structure, mechanism of action and repeat screens. Functions are available to easily visualize compound structures, activity patterns and molecular feature profiles. Additionally, embedded R Shiny applications allow interactive data exploration.

Availability and implementation

rcellminer is compatible with R 3.2 and above on Windows, Mac OS X and Linux. The package, documentation, tutorials and Shiny-based applications are available through Bioconductor (http://www.bioconductor.org/packages/rcellminer); ongoing updates will occur according to the Bioconductor release schedule with new CellMiner data. The package is free and open-source (LGPL 3).

Contact

lunaa@cbio.mskcc.org or vinodh.rajapakse@nih.gov.",2015-12-03 +25908937,CSGM Designer: a platform for designing cross-species intron-spanning genic markers linked with genome information of legumes.,"

Background

Genetic markers are tools that can facilitate molecular breeding, even in species lacking genomic resources. An important class of genetic markers is those based on orthologous genes, because they can guide hypotheses about conserved gene function, a situation that is well documented for a number of agronomic traits. For under-studied species a key bottleneck in gene-based marker development is the need to develop molecular tools (e.g., oligonucleotide primers) that reliably access genes with orthology to the genomes of well-characterized reference species.

Results

Here we report an efficient platform for the design of cross-species gene-derived markers in legumes. The automated platform, named CSGM Designer (URL: http://tgil.donga.ac.kr/CSGMdesigner), facilitates rapid and systematic design of cross-species genic markers. The underlying database is composed of genome data from five legume species whose genomes are substantially characterized. Use of CSGM is enhanced by graphical displays of query results, which we describe as ""circular viewer"" and ""search-within-results"" functions. CSGM provides a virtual PCR representation (eHT-PCR) that predicts the specificity of each primer pair simultaneously in multiple genomes. CSGM Designer output was experimentally validated for the amplification of orthologous genes using 16 genotypes representing 12 crop and model legume species, distributed among the galegoid and phaseoloid clades. Successful cross-species amplification was obtained for 85.3% of PCR primer combinations.

Conclusion

CSGM Designer spans the divide between well-characterized crop and model legume species and their less well-characterized relatives. The outcome is PCR primers that target highly conserved genes for polymorphism discovery, enabling functional inferences and ultimately facilitating trait-associated molecular breeding.",2015-04-18 +25706687,PathVisio 3: an extendable pathway analysis toolbox.,"PathVisio is a commonly used pathway editor, visualization and analysis software. Biological pathways have been used by biologists for many years to describe the detailed steps in biological processes. Those powerful, visual representations help researchers to better understand, share and discuss knowledge. Since the first publication of PathVisio in 2008, the original paper was cited more than 170 times and PathVisio was used in many different biological studies. As an online editor PathVisio is also integrated in the community curated pathway database WikiPathways. Here we present the third version of PathVisio with the newest additions and improvements of the application. The core features of PathVisio are pathway drawing, advanced data visualization and pathway statistics. Additionally, PathVisio 3 introduces a new powerful extension systems that allows other developers to contribute additional functionality in form of plugins without changing the core application. PathVisio can be downloaded from http://www.pathvisio.org and in 2014 PathVisio 3 has been downloaded over 5,500 times. There are already more than 15 plugins available in the central plugin repository. PathVisio is a freely available, open-source tool published under the Apache 2.0 license (http://www.apache.org/licenses/LICENSE-2.0). It is implemented in Java and thus runs on all major operating systems. The code repository is available at http://svn.bigcat.unimaas.nl/pathvisio. The support mailing list for users is available on https://groups.google.com/forum/#!forum/wikipathways-discuss and for developers on https://groups.google.com/forum/#!forum/wikipathways-devel.",2015-02-23 +28498993,DEOGEN2: prediction and interactive visualization of single amino acid variant deleteriousness in human proteins.,"High-throughput sequencing methods are generating enormous amounts of genomic data, giving unprecedented insights into human genetic variation and its relation to disease. An individual human genome contains millions of Single Nucleotide Variants: to discriminate the deleterious from the benign ones, a variety of methods have been developed that predict whether a protein-coding variant likely affects the carrier individual's health. We present such a method, DEOGEN2, which incorporates heterogeneous information about the molecular effects of the variants, the domains involved, the relevance of the gene and the interactions in which it participates. This extensive contextual information is non-linearly mapped into one single deleteriousness score for each variant. Since for the non-expert user it is sometimes still difficult to assess what this score means, how it relates to the encoded protein, and where it originates from, we developed an interactive online framework (http://deogen2.mutaframe.com/) to better present the DEOGEN2 deleteriousness predictions of all possible variants in all human proteins. The prediction is visualized so both expert and non-expert users can gain insights into the meaning, protein context and origins of each prediction.",2017-07-01 +28453650,"plantiSMASH: automated identification, annotation and expression analysis of plant biosynthetic gene clusters.","Plant specialized metabolites are chemically highly diverse, play key roles in host-microbe interactions, have important nutritional value in crops and are frequently applied as medicines. It has recently become clear that plant biosynthetic pathway-encoding genes are sometimes densely clustered in specific genomic loci: biosynthetic gene clusters (BGCs). Here, we introduce plantiSMASH, a versatile online analysis platform that automates the identification of candidate plant BGCs. Moreover, it allows integration of transcriptomic data to prioritize candidate BGCs based on the coexpression patterns of predicted biosynthetic enzyme-coding genes, and facilitates comparative genomic analysis to study the evolutionary conservation of each cluster. Applied on 48 high-quality plant genomes, plantiSMASH identifies a rich diversity of candidate plant BGCs. These results will guide further experimental exploration of the nature and dynamics of gene clustering in plant metabolism. Moreover, spurred by the continuing decrease in costs of plant genome sequencing, they will allow genome mining technologies to be applied to plant natural product discovery. The plantiSMASH web server, precalculated results and source code are freely available from http://plantismash.secondarymetabolites.org.",2017-07-01 +28317014,MetaDP: a comprehensive web server for disease prediction of 16S rRNA metagenomic datasets.,"High-throughput sequencing-based metagenomics has garnered considerable interest in recent years. Numerous methods and tools have been developed for the analysis of metagenomic data. However, it is still a daunting task to install a large number of tools and complete a complicated analysis, especially for researchers with minimal bioinformatics backgrounds. To address this problem, we constructed an automated software named MetaDP for 16S rRNA sequencing data analysis, including data quality control, operational taxonomic unit clustering, diversity analysis, and disease risk prediction modeling. Furthermore, a support vector machine-based prediction model for intestinal bowel syndrome (IBS) was built by applying MetaDP to microbial 16S sequencing data from 108 children. The success of the IBS prediction model suggests that the platform may also be applied to other diseases related to gut microbes, such as obesity, metabolic syndrome, or intestinal cancer, among others (http://metadp.cn:7001/).",2016-01-01 +28216118,Systematic review of patient education practices in weight loss surgery.,"

Background

Education plays a key role in adherence to lifestyle modifications after weight loss surgery (WLS). Education given before and after surgery may decrease weight recidivism rates and improve outcomes.

Objective

The purpose of this systematic review was to analyze educational practices in bariatric centers.

Methods

The Cumulative Index to Nursing and Allied Health and PubMed databases were searched in May 2016 for English-language, peer-reviewed studies about WLS patient education practices from 1999 to 2016. Publications were: (1) rated with the Advancing Research and Clinical Practice through Close Collaboration levels of evidence hierarchy (see Melnyk's pryamid [http://guides.lib.umich.edu/c.php?g=282802&p=1888246]) and (2) analyzed according to surgical phase, curriculum, program delivery, and educator.

Results

Twenty-four publications met the study criteria. Evidence ratings for preoperative (n = 16) and postoperative studies (n = 8) were levels I to III (n = 5) and IV to VII (n = 17). Two publications were not ratable. Preoperative and postoperative education programs varied in curriculum, teaching methods, and educator. Topics varied in depth. Commonalities were surgical procedure, nutrition, activity, and psychosocial behaviors. Preoperative education was mostly provided in small groups, whereas individual sessions were used postoperatively. Lecture and discussion provided by myriad of healthcare experts from multiple disciplines were typical in both phases. Written or web-based aides supported learning needs in both phases.

Conclusion

WLS patient education varied by curriculum and dose and commonly used passive learning methods (e.g., traditional lecture style instruction with minimal engagement from learners). Results shared can inform future bariatric education programs and accreditation standard development (e.g., Metabolic and Bariatric Surgery Accreditation and Quality Improvement Program patient education standards). Additional study is needed, but existing evidence can guide improvements in high-quality, cost-effective, and patient-centered educational programs.",2017-01-11 +24187504,Hypergeometric analysis of tiling-array and sequence data: detection and interpretation of peaks.,"Probing protein-deoxyribonucleic acid (DNA) is gaining popularity as it sheds light on molecular mechanisms that regulate the expression of genes. Currently, tiling-arrays and next-generation sequencing technology can be used to measure these interactions. Both methods generate a signal over the genome in which contiguous regions of peaks on the genome represent the presence of an interacting molecule. Many methods do exist to identify functional regions of interest (ROIs) on the genome. However the detection of ROIs are often not an end-point in research questions and it therefore requires data dragging between tools to relate the ROIs to information present in databases, such as gene-ontology, pathway information, or enrichment of certain genomic content. We introduce hypergeometric analysis of tiling-array and sequence data (HATSEQ), a powerful tool that accurately identifies functional ROIs on the genome where a genomic signal significantly deviates from the general genome-wide behavior. HATSEQ also includes a number of built-in post-analyses with which biological meaning can be attached to the detected ROIs in terms of gene pathways and de-novo motif analysis, and provides different visualizations and statistical summaries for the detected ROIs. In addition, HATSEQ has an intuitive graphic user interface that lowers the barrier for researchers to analyze their data without the need of scripting languages. We compared the results of HATSEQ against two other popular chromatin immunoprecipitation sequencing (ChIP-Seq) methods and observed overlap in the detected ROIs but HATSEQ is more specific in delineating the peak boundaries. We also discuss the versatility of HATSEQ by using a Signal Transducer and Activator of Transcription 1 (STAT1) ChIP-Seq data-set, and show that the detected ROIs are highly specific for the expected STAT1 binding motif. HATSEQ is freely available at: http://hema13.erasmusmc.nl/index.php/HATSEQ.",2013-10-25 +22074631,Advancing the preservation of cellular therapy products.,"Cell therapies are typically collected in one location, processed in a second location, and then administered in a third location. The ability to preserve the cells is critical to their clinical application. It improves patient access to therapies by increasing the genetic diversity of cells available. In addition, the ability to preserve cells improves the ""manufacturability"" of a cell therapy product by permitting the cells to be stored until the patient is ready for administration of the therapy, permitting inventory control of products, and improving management of staffing at cell therapy facilities. Finally, the ability to preserve cell therapies improves the safety of cell therapy products by extending the shelf life of a product and permitting completion of safety and quality control testing before release of the product for use. The support of the National Blood Foundation has been critical to our work on improving the quality of frozen and thawed cell therapy products through the development of a microfluidic device to remove dimethlysulfoxide (DMSO). We are also involved in research to replace DMSO with other agents that are less toxic to cells and patients. Finally, the need to advance the preservation of cell therapies was a driving force behind the development of the Biopreservation Core Resource (http://www.biocor.net), a national resource in biopreservation. New interest in translation of cell therapies from the bench to the patient's bedside has the potential to drive the transformation of preservation science, technology, and practice.",2011-11-01 +25392415,Cancer3D: understanding cancer mutations through protein structures.,"The new era of cancer genomics is providing us with extensive knowledge of mutations and other alterations in cancer. The Cancer3D database at http://www.cancer3d.org gives an open and user-friendly way to analyze cancer missense mutations in the context of structures of proteins in which they are found. The database also helps users analyze the distribution patterns of the mutations as well as their relationship to changes in drug activity through two algorithms: e-Driver and e-Drug. These algorithms use knowledge of modular structure of genes and proteins to separately study each region. This approach allows users to find novel candidate driver regions or drug biomarkers that cannot be found when similar analyses are done on the whole-gene level. The Cancer3D database provides access to the results of such analyses based on data from The Cancer Genome Atlas (TCGA) and the Cancer Cell Line Encyclopedia (CCLE). In addition, it displays mutations from over 14,700 proteins mapped to more than 24,300 structures from PDB. This helps users visualize the distribution of mutations and identify novel three-dimensional patterns in their distribution.",2014-11-11 +24265224,Updates to BioSamples database at European Bioinformatics Institute.,"The BioSamples database at the EBI (http://www.ebi.ac.uk/biosamples) provides an integration point for BioSamples information between technology specific databases at the EBI, projects such as ENCODE and reference collections such as cell lines. The database delivers a unified query interface and API to query sample information across EBI's databases and provides links back to assay databases. Sample groups are used to manage related samples, e.g. those from an experimental submission, or a single reference collection. Infrastructural improvements include a new user interface with ontological and key word queries, a new query API, a new data submission API, complete RDF data download and a supporting SPARQL endpoint, accessioning at the point of submission to the European Nucleotide Archive and European Genotype Phenotype Archives and improved query response times.",2013-11-21 +28525590,SDM: a server for predicting effects of mutations on protein stability.,"Here, we report a webserver for the improved SDM, used for predicting the effects of mutations on protein stability. As a pioneering knowledge-based approach, SDM has been highlighted as the most appropriate method to use in combination with many other approaches. We have updated the environment-specific amino-acid substitution tables based on the current expanded PDB (a 5-fold increase in information), and introduced new residue-conformation and interaction parameters, including packing density and residue depth. The updated server has been extensively tested using a benchmark containing 2690 point mutations from 132 different protein structures. The revised method correlates well against the hypothetical reverse mutations, better than comparable methods built using machine-learning approaches, highlighting the strength of our knowledge-based approach for identifying stabilising mutations. Given a PDB file (a Protein Data Bank file format containing the 3D coordinates of the protein atoms), and a point mutation, the server calculates the stability difference score between the wildtype and mutant protein. The server is available at http://structure.bioc.cam.ac.uk/sdm2.",2017-07-01 +25397492,Transmitted antiretroviral drug resistance in treatment naïve HIV-infected persons in London in 2011 to 2013.,"

Introduction

Previously published UK data on HIV transmitted drug resistance (TDR) shows that it ranges between 3 and 9.4% [1,2]. However, there are no recent data from populations where HIV transmission rates are increasing. The aim of this study was to assess the prevalence of TDR in untreated HIV-infected individuals attending three HIV specialist clinics under the HIV Directorate, Chelsea and Westminster Hospital and based throughout London - the Kobler Clinic, 56 Dean Street and West London Centre for Sexual Health.

Methods

We included all patients with a HIV diagnosis, no history of antiretroviral therapy (ART) intake, attending one of the three clinics (Kobler (K), 56 Dean Street (DS) and West London (WL)), between 2011 and 2013 who started antiretrovirals. Reverse transcriptase (RT) and protease region sequencing was performed using Vircotype virtual phenotype resistance analysis. Drug resistance mutations were identified according to Stanford University HIV Drug Resistance Database (http://hivdb.stanford.edu/).

Results

Among 1705 HIV-1-infected patients enrolled in the study, 1252 were males (919 were MSM), 107 were females and 346 had no gender recorded. Ethnicity was 51.1% white British/Irish/other, 6.1% African, 2.1% Caribbean, 2.8% Asian, 1.3% Indian/Pakistani/Bangladeshi, 4.2%, other, 3.2% not stated, and 29.2% unknown. 547 were from K (84.3% males, 48.3% MSM), 826 were from DS (84.3% males, 71.9% MSM), and 109 from WL (87.2% males, 56.0% MSM), 223 from other sites not specified. 77.5% (1321 of 1705) of patients had baseline viral resistance testing performed. Prevalence of primary resistance in those with a baseline viral resistance test was 13.5% overall: 19.3% in K, 14.9% in DS, and 14.7% in WL. The most common mutations detected were: NRTI: 184V, 215F, 41L; NNRTI 103N, 179D, 90I; PI 90M, 46I, and 82A. Among patients who tested with TDR, 79.1% had one single mutation, 18.7% and 2.2% exhibited dual or triple class-resistant viruses, respectively.

Conclusions

This study across a large HIV Medicine Directorate reported an overall TDR prevalence which is higher than that previously published and with significant rates of NNRTI resistance at baseline.",2014-11-02 +28459918,Pulmonary Embolism in 2017: Increasing Options for Increasing Incidence.,"Scope of the problem - An increasing burden of disease Acute pulmonary embolism (PE) is a problem encountered by a majority of medical and surgical specialties in their scope of practice. Acute PE is currently the 3rd leading cause of cardiovascular death in the United States, resulting in 100,000 deaths annually as estimated by the Centers for Disease Control (CDC (1). There is a paucity of data and a broad range of estimates for both incidence and morbidity due to acute PE. The mortality of all patients presenting with acute PE is estimated between 10-30% at 90 days utilizing current treatment regimens (2). The incidence of acute symptomatic PE seems to be increasing from 3/100 to more than 6.5/100 in the past 15 years (2). The increasing burden of disease has led to a period of intense investigation into new therapies and strategies to treat acute PE. [Full article available at http://rimed.org/rimedicaljournal-2017-05.asp].",2017-05-01 +25332398,The human DEPhOsphorylation database DEPOD: a 2015 update.,"Phosphatases are crucial enzymes in health and disease, but the knowledge of their biological roles is still limited. Identifying substrates continues to be a great challenge. To support the research on phosphatase-kinase-substrate networks we present here an update on the human DEPhOsphorylation Database: DEPOD (http://www.depod.org or http://www.koehn.embl.de/depod). DEPOD is a manually curated open access database providing human phosphatases, their protein and non-protein substrates, dephosphorylation sites, pathway involvements and external links to kinases and small molecule modulators. All internal data are fully searchable including a BLAST application. Since the first release, more human phosphatases and substrates, their associated signaling pathways (also from new sources), and interacting proteins for all phosphatases and protein substrates have been added into DEPOD. The user interface has been further optimized; for example, the interactive human phosphatase-substrate network contains now a 'highlight node' function for phosphatases, which includes the visualization of neighbors in the network.",2014-10-20 +28077566,"Duplicates, redundancies and inconsistencies in the primary nucleotide databases: a descriptive study. ","GenBank, the EMBL European Nucleotide Archive and the DNA DataBank of Japan, known collectively as the International Nucleotide Sequence Database Collaboration or INSDC, are the three most significant nucleotide sequence databases. Their records are derived from laboratory work undertaken by different individuals, by different teams, with a range of technologies and assumptions and over a period of decades. As a consequence, they contain a great many duplicates, redundancies and inconsistencies, but neither the prevalence nor the characteristics of various types of duplicates have been rigorously assessed. Existing duplicate detection methods in bioinformatics only address specific duplicate types, with inconsistent assumptions; and the impact of duplicates in bioinformatics databases has not been carefully assessed, making it difficult to judge the value of such methods. Our goal is to assess the scale, kinds and impact of duplicates in bioinformatics databases, through a retrospective analysis of merged groups in INSDC databases. Our outcomes are threefold: (1) We analyse a benchmark dataset consisting of duplicates manually identified in INSDC-a dataset of 67 888 merged groups with 111 823 duplicate pairs across 21 organisms from INSDC databases - in terms of the prevalence, types and impacts of duplicates. (2) We categorize duplicates at both sequence and annotation level, with supporting quantitative statistics, showing that different organisms have different prevalence of distinct kinds of duplicate. (3) We show that the presence of duplicates has practical impact via a simple case study on duplicates, in terms of GC content and melting temperature. We demonstrate that duplicates not only introduce redundancy, but can lead to inconsistent results for certain tasks. Our findings lead to a better understanding of the problem of duplication in biological databases.Database URL: the merged records are available at https://cloudstor.aarnet.edu.au/plus/index.php/s/Xef2fvsebBEAv9w.",2017-01-10 +28829315,Multiple Network Alignment via MultiMAGNA+.,"Network alignment (NA) aims to find a node mapping that identifies topologically or functionally similar network regions between molecular networks of different species. Analogous to genomic sequence alignment, NA can be used to transfer biological knowledge from well- to poorly-studied species between aligned network regions. Pairwise NA (PNA) finds similar regions between two networks while multiple NA (MNA) can align more than two networks. We focus on MNA. Existing MNA methods aim to maximize total similarity over all aligned nodes (node conservation). Then, they evaluate alignment quality by measuring the amount of conserved edges, but only after the alignment is constructed. Directly optimizing edge conservation during alignment construction in addition to node conservation may result in superior alignments. Thus, we present a novel MNA method called multiMAGNA++ that can achieve this. Indeed, multiMAGNA++ outperforms or is on par with existing MNA methods, while often completing faster than existing methods. That is, multiMAGNA++ scales well to larger network data and can be parallelized effectively. During method evaluation, we also introduce new MNA quality measures to allow for more fair MNA method comparison compared to the existing alignment quality measures. The multiMAGNA++ code is available on the method's web page at http://nd.edu/~cone/multiMAGNA++/.",2017-08-21 +28472505,"The Antibiotic Resistant Target Seeker (ARTS), an exploration engine for antibiotic cluster prioritization and novel drug target discovery.","With the rise of multi-drug resistant pathogens and the decline in number of potential new antibiotics in development there is a fervent need to reinvigorate the natural products discovery pipeline. Most antibiotics are derived from secondary metabolites produced by microorganisms and plants. To avoid suicide, an antibiotic producer harbors resistance genes often found within the same biosynthetic gene cluster (BGC) responsible for manufacturing the antibiotic. Existing mining tools are excellent at detecting BGCs or resistant genes in general, but provide little help in prioritizing and identifying gene clusters for compounds active against specific and novel targets. Here we introduce the 'Antibiotic Resistant Target Seeker' (ARTS) available at https://arts.ziemertlab.com. ARTS allows for specific and efficient genome mining for antibiotics with interesting and novel targets. The aim of this web server is to automate the screening of large amounts of sequence data and to focus on the most promising strains that produce antibiotics with new modes of action. ARTS integrates target directed genome mining methods, antibiotic gene cluster predictions and 'essential gene screening' to provide an interactive page for rapid identification of known and putative targets in BGCs.",2017-07-01 +28968753,A non-negative matrix factorization based method for predicting disease-associated miRNAs in miRNA-disease bilayer network.,"

Motivation

Identification of disease-associated miRNAs (disease miRNAs) is critical for understanding disease etiology and pathogenesis. Since miRNAs exert their functions by regulating the expression of their target mRNAs, several methods based on the target genes were proposed to predict disease miRNA candidates. They achieved only limited success as they all suffered from the high false-positive rate of target prediction results. Alternatively, other prediction methods were based on the observation that miRNAs with similar functions tend to be associated with similar diseases and vice versa. The methods exploited the information about miRNAs and diseases, including the functional similarities between miRNAs, the similarities between diseases, and the associations between miRNAs and diseases. However, how to integrate the multiple kinds of information completely and consider the biological characteristic of disease miRNAs is a challenging problem.

Results

We constructed a bilayer network to represent the complex relationships among miRNAs, among diseases and between miRNAs and diseases. We proposed a non-negative matrix factorization based method to rank, so as to predict, the disease miRNA candidates. The method integrated the miRNA functional similarity, the disease similarity and the miRNA-disease associations seamlessly, which exploited the complex relationships within the bilayer network and the consensus relationship between multiple kinds of information. Considering the correlation between the candidates related to various diseases, it predicted their respective candidates for all the diseases simultaneously. In addition, the sparseness characteristic of disease miRNAs was introduced to generate more reliable prediction model that excludes those noisy candidates. The results on 15 common diseases showed a superior performance of the new method for not only well-characterized diseases but also new ones. A detailed case study on breast neoplasms, colorectal neoplasms, lung neoplasms and 32 other diseases demonstrated the ability of the method for discovering potential disease miRNAs.

Availability and implementation

The web service for the new method and the list of predicted candidates for all the diseases are available at http://www.bioinfolab.top.

Supplementary information

Supplementary data are available at Bioinformatics online.",2018-01-01 +27153568,Integrated genome browser: visual analytics platform for genomics.,"

Motivation

Genome browsers that support fast navigation through vast datasets and provide interactive visual analytics functions can help scientists achieve deeper insight into biological systems. Toward this end, we developed Integrated Genome Browser (IGB), a highly configurable, interactive and fast open source desktop genome browser.

Results

Here we describe multiple updates to IGB, including all-new capabilities to display and interact with data from high-throughput sequencing experiments. To demonstrate, we describe example visualizations and analyses of datasets from RNA-Seq, ChIP-Seq and bisulfite sequencing experiments. Understanding results from genome-scale experiments requires viewing the data in the context of reference genome annotations and other related datasets. To facilitate this, we enhanced IGB's ability to consume data from diverse sources, including Galaxy, Distributed Annotation and IGB-specific Quickload servers. To support future visualization needs as new genome-scale assays enter wide use, we transformed the IGB codebase into a modular, extensible platform for developers to create and deploy all-new visualizations of genomic data.

Availability and implementation

IGB is open source and is freely available from http://bioviz.org/igb

Contact

aloraine@uncc.edu.",2016-03-16 +29218907,Emergence of pathway-level composite biomarkers from converging gene set signals of heterogeneous transcriptomic responses.,"Recent precision medicine initiatives have led to the expectation of improved clinical decisionmaking anchored in genomic data science. However, over the last decade, only a handful of new single-gene product biomarkers have been translated to clinical practice (FDA approved) in spite of considerable discovery efforts deployed and a plethora of transcriptomes available in the Gene Expression Omnibus. With this modest outcome of current approaches in mind, we developed a pilot simulation study to demonstrate the untapped benefits of developing disease detection methods for cases where the true signal lies at the pathway level, even if the pathway's gene expression alterations may be heterogeneous across patients. In other words, we relaxed the crosspatient homogeneity assumption from the transcript level (cohort assumptions of deregulated gene expression) to the pathway level (assumptions of deregulated pathway expression). Furthermore, we have expanded previous single-subject (SS) methods into cohort analyses to illustrate the benefit of accounting for an individual's variability in cohort scenarios. We compare SS and cohort-based (CB) techniques under 54 distinct scenarios, each with 1,000 simulations, to demonstrate that the emergence of a pathway-level signal occurs through the summative effect of its altered gene expression, heterogeneous across patients. Studied variables include pathway gene set size, fraction of expressed gene responsive within gene set, fraction of expressed gene responsive up- vs down-regulated, and cohort size. We demonstrated that our SS approach was uniquely suited to detect signals in heterogeneous populations in which individuals have varying levels of baseline risks that are simultaneously confounded by patient-specific ""genome -by-environment"" interactions (G×E). Area under the precision-recall curve of the SS approach far surpassed that of the CB (1st quartile, median, 3rd quartile: SS = 0.94, 0.96, 0.99; CB= 0.50, 0.52, 0.65). We conclude that single-subject pathway detection methods are uniquely suited for consistently detecting pathway dysregulation by the inclusion of a patient's individual variability. http://www.lussiergroup.org/publications/PathwayMarker/.",2018-01-01 +25551156,A novel dysregulated pathway-identification analysis based on global influence of within-pathway effects and crosstalk between pathways.,"Identifying dysregulated pathways from high-throughput experimental data in order to infer underlying biological insights is an important task. Current pathway-identification methods focus on single pathways in isolation; however, consideration of crosstalk between pathways could improve our understanding of alterations in biological states. We propose a novel method of pathway analysis based on global influence (PAGI) to identify dysregulated pathways, by considering both within-pathway effects and crosstalk between pathways. We constructed a global gene–gene network based on the relationships among genes extracted from a pathway database. We then evaluated the extent of differential expression for each gene, and mapped them to the global network. The random walk with restart algorithm was used to calculate the extent of genes affected by global influence. Finally, we used cumulative distribution functions to determine the significance values of the dysregulated pathways. We applied the PAGI method to five cancer microarray datasets, and compared our results with gene set enrichment analysis and five other methods. Based on these analyses, we demonstrated that PAGI can effectively identify dysregulated pathways associated with cancer, with strong reproducibility and robustness. We implemented PAGI using the freely available R-based and Web-based tools (http://bioinfo.hrbmu.edu.cn/PAGI).",2015-01-01 +27942458,Single nucleotide-level mapping of DNA double-strand breaks in human HEK293T cells.,"Constitutional biological processes involve the generation of DNA double-strand breaks (DSBs). The production of such breaks and their subsequent resolution are also highly relevant to neurodegenerative diseases and cancer, in which extensive DNA fragmentation has been described Stephens et al. (2011), Blondet et al. (2001). Tchurikov et al. Tchurikov et al. (2011, 2013) have reported previously that frequent sites of DSBs occur in chromosomal domains involved in the co-ordinated expression of genes. This group report that hot spots of DSBs in human HEK293T cells often coincide with H3K4me3 marks, associated with active transcription Kravatsky et al. (2015) and that frequent sites of DNA double-strand breakage are likely to be relevant to cancer genomics Tchurikov et al. (2013, 2016) . Recently, they applied a RAFT (rapid amplification of forum termini) protocol that selects for blunt-ended DSB sites and mapped these to the human genome within defined co-ordinate 'windows'. In this paper, we re-analyse public RAFT data to derive sites of DSBs at the single-nucleotide level across the built genome for human HEK293T cells (https://figshare.com/s/35220b2b79eaaaf64ed8). This refined mapping, combined with accessory ENCODE data tracks and ribosomal DNA-related sequence annotations, will likely be of value for the design of clinically relevant targeted assays such as those for cancer susceptibility, diagnosis, treatment-matching and prognostication.",2016-11-11 +29222172,Proximal Aberrant Crypt Foci Associate with Synchronous Neoplasia and Are Primed for Neoplastic Progression.,"Aberrant crypt foci (ACF) are the earliest morphologically identifiable lesion found within the human colon. Despite their relatively high frequency in the distal colon, few studies have examined the molecular characteristics of ACF within the proximal colon. In the following study, clinical participants (n = 184) were screened for ACF using high-definition chromoendoscopy with contrast dye-spray. Following pathologic confirmation, ACF biopsies were subjected to laser capture microdissection (LCM), and epithelial cells were evaluated for somatic mutations with a customized colorectal cancer mutation panel using DNA-mass spectrometry. Samples were further characterized for microsatellite instability (MSI). Logistic models were used to associate proximal ACF with synchronous (detected during the same procedure) neoplasia. Thirty-nine percent of participants had at least one histologically confirmed proximal ACF. Individuals with a proximal ACF were significantly more likely to present with a synchronous neoplasm (P = 0.001), and specifically, a proximal, tubular, or tubulovillous adenoma (multivariable OR = 2.69; 95% confidence interval, 1.12-6.47; P = 0.027). Proximal ACF were more likely to be dysplastic (52%) compared with distal ACF (13%; P < 0.0001). Somatic mutations to APC, BRAF, KRAS, NRAS, and ERBB2 were detected in 37% of proximal ACF. Hyperplastic ACF were more often MSI-high, but there were no differences in MSI status observed by colonic location. In summary, ACF are identified in the proximal colons of approximately 40% of individuals undergoing chromoendoscopy and more often in patients with synchronous proximal adenomas.Implications: This study provides the most complete set of data, to date, that ACF represent the earliest step in the adenoma-carcinoma sequence but remain below the detection limit of conventional endoscopy.Visual Overview: http//mcr.accrjournals.org/content/molcanres/16/3/486/F1.large.jpg Mol Cancer Res; 16(3); 486-95. ©2017 AACR.",2017-12-08 +27809316,The MaxQuant computational platform for mass spectrometry-based shotgun proteomics.,"MaxQuant is one of the most frequently used platforms for mass-spectrometry (MS)-based proteomics data analysis. Since its first release in 2008, it has grown substantially in functionality and can be used in conjunction with more MS platforms. Here we present an updated protocol covering the most important basic computational workflows, including those designed for quantitative label-free proteomics, MS1-level labeling and isobaric labeling techniques. This protocol presents a complete description of the parameters used in MaxQuant, as well as of the configuration options of its integrated search engine, Andromeda. This protocol update describes an adaptation of an existing protocol that substantially modifies the technique. Important concepts of shotgun proteomics and their implementation in MaxQuant are briefly reviewed, including different quantification strategies and the control of false-discovery rates (FDRs), as well as the analysis of post-translational modifications (PTMs). The MaxQuant output tables, which contain information about quantification of proteins and PTMs, are explained in detail. Furthermore, we provide a short version of the workflow that is applicable to data sets with simple and standard experimental designs. The MaxQuant algorithms are efficiently parallelized on multiple processors and scale well from desktop computers to servers with many cores. The software is written in C# and is freely available at http://www.maxquant.org.",2016-10-27 +27307137,"Combining machine learning, crowdsourcing and expert knowledge to detect chemical-induced diseases in text. ","Drug toxicity is a major concern for both regulatory agencies and the pharmaceutical industry. In this context, text-mining methods for the identification of drug side effects from free text are key for the development of up-to-date knowledge sources on drug adverse reactions. We present a new system for identification of drug side effects from the literature that combines three approaches: machine learning, rule- and knowledge-based approaches. This system has been developed to address the Task 3.B of Biocreative V challenge (BC5) dealing with Chemical-induced Disease (CID) relations. The first two approaches focus on identifying relations at the sentence-level, while the knowledge-based approach is applied both at sentence and abstract levels. The machine learning method is based on the BeFree system using two corpora as training data: the annotated data provided by the CID task organizers and a new CID corpus developed by crowdsourcing. Different combinations of results from the three strategies were selected for each run of the challenge. In the final evaluation setting, the system achieved the highest Recall of the challenge (63%). By performing an error analysis, we identified the main causes of misclassifications and areas for improving of our system, and highlighted the need of consistent gold standard data sets for advancing the state of the art in text mining of drug side effects.Database URL: https://zenodo.org/record/29887?ln¼en#.VsL3yDLWR_V.",2016-06-15 +27098040,GAM: a web-service for integrated transcriptional and metabolic network analysis.,"Novel techniques for high-throughput steady-state metabolomic profiling yield information about changes of nearly thousands of metabolites. Such metabolomic profiles, when analyzed together with transcriptional profiles, can reveal novel insights about underlying biological processes. While a number of conceptual approaches have been developed for data integration, easily accessible tools for integrated analysis of mammalian steady-state metabolomic and transcriptional data are lacking. Here we present GAM ('genes and metabolites'): a web-service for integrated network analysis of transcriptional and steady-state metabolomic data focused on identification of the most changing metabolic subnetworks between two conditions of interest. In the web-service, we have pre-assembled metabolic networks for humans, mice, Arabidopsis and yeast and adapted exact solvers for an optimal subgraph search to work in the context of these metabolic networks. The output is the most regulated metabolic subnetwork of size controlled by false discovery rate parameters. The subnetworks are then visualized online and also can be downloaded in Cytoscape format for subsequent processing. The web-service is available at: https://artyomovlab.wustl.edu/shiny/gam/.",2016-04-20 +27302890,Systems Level Analysis of Histone H3 Post-translational Modifications (PTMs) Reveals Features of PTM Crosstalk in Chromatin Regulation.,"Histones are abundant chromatin constituents carrying numerous post-translational modifications (PTMs). Such PTMs mediate a variety of biological functions, including recruitment of enzymatic readers, writers and erasers that modulate DNA replication, transcription and repair. Individual histone molecules contain multiple coexisting PTMs, some of which exhibit crosstalk, i.e. coordinated or mutually exclusive activities. Here, we present an integrated experimental and computational systems level molecular characterization of histone PTMs and PTM crosstalk. Using wild type and engineered mouse embryonic stem cells (mESCs) knocked out in components of the Polycomb Repressive Complex 2 (PRC2, Suz12(-/-)), PRC1 (Ring1A/B(-/-)) and (Dnmt1/3a/3b(-/-)) we performed comprehensive PTM analysis of histone H3 tails (50 aa) by utilizing quantitative middle-down proteome analysis by tandem mass spectrometry. We characterized combinatorial PTM features across the four mESC lines and then applied statistical data analysis to predict crosstalk between histone H3 PTMs. We detected an overrepresentation of positive crosstalk (codependent marks) between adjacent mono-methylated and acetylated marks, and negative crosstalk (mutually exclusive marks) among most of the seven characterized di- and tri-methylated lysine residues in the H3 tails. We report novel features of PTM interplay involving hitherto poorly characterized arginine methylation and lysine methylation sites, including H3R2me, H3R8me and H3K37me. Integration of the H3 data with RNAseq data by coabundance clustering analysis of histone PTMs and histone modifying enzymes revealed correlations between PTM and enzyme levels. We conclude that middle-down proteomics is a powerful tool to determine conserved or dynamic interdependencies between histone marks, which paves the way for detailed investigations of the histone code. Histone H3 PTM data is publicly available in the CrossTalkDB repository at http://crosstalkdb.bmb.sdu.dk.",2016-06-14 +28061810,Pyviko: an automated Python tool to design gene knockouts in complex viruses with overlapping genes.,"

Background

Gene knockouts are a common tool used to study gene function in various organisms. However, designing gene knockouts is complicated in viruses, which frequently contain sequences that code for multiple overlapping genes. Designing mutants that can be traced by the creation of new or elimination of existing restriction sites further compounds the difficulty in experimental design of knockouts of overlapping genes. While software is available to rapidly identify restriction sites in a given nucleotide sequence, no existing software addresses experimental design of mutations involving multiple overlapping amino acid sequences in generating gene knockouts.

Results

Pyviko performed well on a test set of over 240,000 gene pairs collected from viral genomes deposited in the National Center for Biotechnology Information Nucleotide database, identifying a point mutation which added a premature stop codon within the first 20 codons of the target gene in 93.2% of all tested gene-overprinted gene pairs. This shows that Pyviko can be used successfully in a wide variety of contexts to facilitate the molecular cloning and study of viral overprinted genes.

Conclusions

Pyviko is an extensible and intuitive Python tool for designing knockouts of overlapping genes. Freely available as both a Python package and a web-based interface ( http://louiejtaylor.github.io/pyViKO/ ), Pyviko simplifies the experimental design of gene knockouts in complex viruses with overlapping genes.",2017-01-07 +23894142,MaGnET: Malaria Genome Exploration Tool.,"

Summary

The Malaria Genome Exploration Tool (MaGnET) is a software tool enabling intuitive 'exploration-style' visualization of functional genomics data relating to the malaria parasite, Plasmodium falciparum. MaGnET provides innovative integrated graphic displays for different datasets, including genomic location of genes, mRNA expression data, protein-protein interactions and more. Any selection of genes to explore made by the user is easily carried over between the different viewers for different datasets, and can be changed interactively at any point (without returning to a search).

Availability and implementation

Free online use (Java Web Start) or download (Java application archive and MySQL database; requires local MySQL installation) at http://malariagenomeexplorer.org

Contact

joanna.sharman@ed.ac.uk or dgerloff@ffame.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-27 +27412089,MEMHDX: an interactive tool to expedite the statistical validation and visualization of large HDX-MS datasets.,"

Motivation

With the continued improvement of requisite mass spectrometers and UHPLC systems, Hydrogen/Deuterium eXchange Mass Spectrometry (HDX-MS) workflows are rapidly evolving towards the investigation of more challenging biological systems, including large protein complexes and membrane proteins. The analysis of such extensive systems results in very large HDX-MS datasets for which specific analysis tools are required to speed up data validation and interpretation.

Results

We introduce a web application and a new R-package named 'MEMHDX' to help users analyze, validate and visualize large HDX-MS datasets. MEMHDX is composed of two elements. A statistical tool aids in the validation of the results by applying a mixed-effects model for each peptide, in each experimental condition, and at each time point, taking into account the time dependency of the HDX reaction and number of independent replicates. Two adjusted P-values are generated per peptide, one for the 'Change in dynamics' and one for the 'Magnitude of ΔD', and are used to classify the data by means of a 'Logit' representation. A user-friendly interface developed with Shiny by RStudio facilitates the use of the package. This interactive tool allows the user to easily and rapidly validate, visualize and compare the relative deuterium incorporation on the amino acid sequence and 3D structure, providing both spatial and temporal information.

Availability and implementation

MEMHDX is freely available as a web tool at the project home page http://memhdx.c3bi.pasteur.fr CONTACT: marie-agnes.dillies@pasteur.fr or sebastien.brier@pasteur.frSupplementary information: Supplementary data is available at Bioinformatics online.",2016-07-13 +24217915,WormQTLHD--a web database for linking human disease to natural variation data in C. elegans.,"Interactions between proteins are highly conserved across species. As a result, the molecular basis of multiple diseases affecting humans can be studied in model organisms that offer many alternative experimental opportunities. One such organism-Caenorhabditis elegans-has been used to produce much molecular quantitative genetics and systems biology data over the past decade. We present WormQTL(HD) (Human Disease), a database that quantitatively and systematically links expression Quantitative Trait Loci (eQTL) findings in C. elegans to gene-disease associations in man. WormQTL(HD), available online at http://www.wormqtl-hd.org, is a user-friendly set of tools to reveal functionally coherent, evolutionary conserved gene networks. These can be used to predict novel gene-to-gene associations and the functions of genes underlying the disease of interest. We created a new database that links C. elegans eQTL data sets to human diseases (34 337 gene-disease associations from OMIM, DGA, GWAS Central and NHGRI GWAS Catalogue) based on overlapping sets of orthologous genes associated to phenotypes in these two species. We utilized QTL results, high-throughput molecular phenotypes, classical phenotypes and genotype data covering different developmental stages and environments from WormQTL database. All software is available as open source, built on MOLGENIS and xQTL workbench.",2013-11-11 +29422013,First and second trimester urinary metabolic profiles and fetal growth restriction: an exploratory nested case-control study within the infant development and environment study.,"BACKGROUND:Routine prenatal care fails to identify a large proportion of women at risk of fetal growth restriction (FGR). Metabolomics, the comprehensive analysis of low molecular weight molecules (metabolites) in biological samples, can provide new and earlier biomarkers of prenatal health. Recent research has suggested possible predictive first trimester urine metabolites correlating to fetal growth restriction in the third trimester. Our objective in this current study was to examine urinary metabolic profiles in the first and second trimester of pregnancy in relation to third trimester FGR in a US population from a large, multi-center cohort study of healthy pregnant women. METHODS:We conducted a nested case-control study within The Infant Development and the Environment Study (TIDES), a population-based multi-center pregnancy cohort study. We identified 53 cases of FGR based on the AUDIPOG [Neonatal growth - AUDIPOG [Internet]. [cited 29 Nov 2016]. Available from: http://www.audipog.net/courbes_morpho.php?langue=en ] formula for birthweight percentile considering maternal height, age, and prenatal weight, as well as infant sex, gestational age, and birth rank. Cases were matched to 106 controls based on study site, maternal age (± 2 years), parity, and infant sex. NMR spectroscopy was used to assess concentrations of four urinary metabolites that have been previously associated with FGR (tyrosine, acetate, formate, and trimethylamine) in first and second trimester urine samples. We fit multivariate conditional logistic regression models to estimate the odds of FGR in relation to urinary concentrations of these individual metabolites in the first and second trimesters. Exploratory analyses of custom binned spectroscopy results were run to consider other potentially related metabolites. RESULTS:We found no significant association between the relative concentrations of each of the four metabolites and odds of FGR. Exploratory analyses did not reveal any significant differences in urinary metabolic profiles. Compared with controls, cases delivered earlier (38.6 vs 39.8, p < 0.001), and had lower birthweights (2527 g vs 3471 g, p < 0.001). Maternal BMI was similar between cases and controls. CONCLUSIONS:First and second trimester concentrations of urinary metabolites (acetate, formate, trimethylamine and tyrosine) did not predict FGR. This inconsistency with previous studies highlights the need for more rigorous investigation and data collection in this area before metabolomics can be clinically applied to obstetrics.",2018-02-08 +26722115,MOGEN: a tool for reconstructing 3D models of genomes from chromosomal conformation capturing data.,"

Motivation

The three-dimensional (3D) conformation of chromosomes and genomes play an important role in cellular processes such as gene regulation, DNA replication and genome methylation. Several methods have been developed to reconstruct 3D structures of individual chromosomes from chromosomal conformation capturing data such as Hi-C data. However, few methods can effectively reconstruct the 3D structures of an entire genome due to the difficulty of handling noisy and inconsistent inter-chromosomal contact data.

Results

We generalized a 3D chromosome reconstruction method to make it capable of reconstructing 3D models of genomes from both intra- and inter-chromosomal Hi-C contact data and implemented it as a software tool called MOGEN. We validated MOGEN on synthetic datasets of a polymer worm-like chain model and a yeast genome at first, and then applied it to generate an ensemble of 3D structural models of the genome of human B-cells from a Hi-C dataset. These genome models not only were validated by some known structural patterns of the human genome, such as chromosome compartmentalization, chromosome territories, co-localization of small chromosomes in the nucleus center with the exception of chromosome 18, enriched center-toward inter-chromosomal interactions between elongated or telomere regions of chromosomes, but also demonstrated the intrinsically dynamic orientations between chromosomes. Therefore, MOGEN is a useful tool for converting chromosomal contact data into 3D genome models to provide a better view into the spatial organization of genomes.

Availability and implementation

The software of MOGEN is available at: http://calla.rnet.missouri.edu/mogen/

Contact

: chengji@missouri.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-31 +27164567,IntSIM: An Integrated Simulator of Next-Generation Sequencing Data.,"

Objective

Next-generation sequencing data has been widely used for DNA variant discovery and tumor study through computational tools. Effective simulation of such data with many realistic features is very necessary for testing existing tools and guiding the development of new tools.

Methods

We present an integrated simulation system, IntSIM, to simulate common DNA variants and to generate sequencing reads for mixture genomes. IntSIM has three novel features in comparison with other simulation programs: 1) it is able to simulate both germline and somatic variants in the same sequence, 2) it deals with tumor purity so as to generate reads corresponding to heterogeneous genomes and also produce tumor-normal matched samples, and 3) it simulates correlations among SNPs, among CNVs/CNAs based on HMM models trained from real sequencing genomes, and can simulates broad and focal CNV/CNA events.

Results

The simulation data of IntSIM can reflect characteristics observed from real data and are consistent with input parameters. The IntSIM software package is freely available at http://intsim.sourceforge.net/.

Conclusion

Based on a great number of experiments, IntSIM performs better than other program for some scenarios, such as simulation of heterozygous SNPs, CNVs/CNAs, and can achieve some functions that other programs cannot achieve.

Significance

Simulation with IntSIM can be expected to evaluate performance of methods in detecting various types of variants, analyzing tumor samples, and especially providing a realistic assessment of effect of tumor purity on identification of somatic mutations.",2016-04-29 +26986515,p53MutaGene: an online tool to estimate the effect of p53 mutational status on gene regulation in cancer.,"p53MutaGene is the first online tool for statistical validation of hypotheses regarding the effect of p53 mutational status on gene regulation in cancer. This tool is based on several large-scale clinical gene expression data sets and currently covers breast, colon and lung cancers. The tool detects differential co-expression patterns in expression data between p53 mutated versus p53 normal samples for the user-specified genes. Statistically significant differential co-expression for a gene pair is indicative that regulation of two genes is sensitive to the presence of p53 mutations. p53MutaGene can be used in 'single mode' where the user can test a specific pair of genes or in 'discovery mode' designed for analysis of several genes. Using several examples, we demonstrate that p53MutaGene is a useful tool for fast statistical validation in clinical data of p53-dependent gene regulation patterns. The tool is freely available at http://www.bioprofiling.de/tp53.",2016-03-17 +25886726,Genome-wide distribution comparative and composition analysis of the SSRs in Poaceae.,"

Background

The Poaceae family is of great importance to human beings since it comprises the cereal grasses which are the main sources for human food and animal feed. With the rapid growth of genomic data from Poaceae members, comparative genomics becomes a convinent method to study genetics of diffierent species. The SSRs (Simple Sequence Repeats) are widely used markers in the studies of Poaceae for their high abundance and stability.

Results

In this study, using the genomic sequences of 9 Poaceae species, we detected 11,993,943 SSR loci and developed 6,799,910 SSR primer pairs. The results show that SSRs are distributed on all the genomic elements in grass. Hexamer is the most frequent motif and AT/TA is the most frequent motif in dimer. The abundance of the SSRs has a positive linear relationship with the recombination rate. SSR sequences in the coding regions involve a higher GC content in the Poaceae than that in the other species. SSRs of 70-80 bp in length showed the highest AT/GC base ratio among all of these loci. The result shows the highest polymorphism rate belongs to the SSRs ranged from 30 bp to 40 bp. Using all the SSR primers of Japonica, nineteen universal primers were selected and located on the genome of the grass family. The information of SSR loci, the SSR primers and the tools of mining and analyzing SSR are provided in the PSSRD (Poaceae SSR Database, http://biodb.sdau.edu.cn/pssrd/).

Conclusions

Our study and the PSSRD database provide a foundation for the comparative study in the Poaceae and it will accelerate the study on markers application, gene mapping and molecular breeding.",2015-02-15 +22784567,SigCS base: an integrated genetic information resource for human cerebral stroke.,"

Background

To understand how stroke risk factors mechanistically contribute to stroke, the genetic components regulating each risk factor need to be integrated and evaluated with respect to biological function and through pathway-based algorithms. This resource will provide information to researchers studying the molecular and genetic causes of stroke in terms of genomic variants, genes, and pathways.

Methods

Reported genetic variants, gene structure, phenotypes, and literature information regarding stroke were collected and extracted from publicly available databases describing variants, genome, proteome, functional annotation, and disease subtypes. Stroke related candidate pathways and etiologic genes that participate significantly in risk were analyzed in terms of canonical pathways in public biological pathway databases. These efforts resulted in a relational database of genetic signals of cerebral stroke, SigCS base, which implements an effective web retrieval system.

Results

The current version of SigCS base documents 1943 non-redundant genes with 11472 genetic variants and 165 non-redundant pathways. The web retrieval system of SigCS base consists of two principal search flows, including: 1) a gene-based variant search using gene table browsing or a keyword search, and, 2) a pathway-based variant search using pathway table browsing. SigCS base is freely accessible at http://sysbio.kribb.re.kr/sigcs.

Conclusions

SigCS base is an effective tool that can assist researchers in the identification of the genetic factors associated with stroke by utilizing existing literature information, selecting candidate genes and variants for experimental studies, and examining the pathways that contribute to the pathophysiological mechanisms of stroke.",2011-12-14 +25110036,MSCs and hyaluronan: sticking together for new therapeutic potential?,"Research involving mesenchymal multipotent/stem/progenitor/stromal/marrow cells (MSCs) have translated to clinical trials at an extraordinary pace. By the time of this review, the public clinical trials database (http://clinicaltrials.gov) has 394 clinical trials listed using MSCs for a very wide range of therapeutic applications. Unexpectedly, the explanation for the increase in clinical trials using MSCs does not lie on a well-defined therapeutic mechanism--dramatic results have been demonstrated in a variety of studies involving different animal models of diseases, often describing discrete therapeutic mechanisms exerted by MSCs. This review will focus on recent data suggesting the involvement of hyaluronic acid (HA) in the beneficial effects of MSCs, evaluate the potential of MSC as modulators of HA and the implications of this modulation for disease therapy.",2014-08-07 +28369371,EUPAN enables pan-genome studies of a large number of eukaryotic genomes.,"

Summary

Pan-genome analyses are routinely carried out for bacteria to interpret the within-species gene presence/absence variations (PAVs). However, pan-genome analyses are rare for eukaryotes due to the large sizes and higher complexities of their genomes. Here we proposed EUPAN, a eukaryotic pan-genome analysis toolkit, enabling automatic large-scale eukaryotic pan-genome analyses and detection of gene PAVs at a relatively low sequencing depth. In the previous studies, we demonstrated the effectiveness and high accuracy of EUPAN in the pan-genome analysis of 453 rice genomes, in which we also revealed widespread gene PAVs among individual rice genomes. Moreover, EUPAN can be directly applied to the current re-sequencing projects primarily focusing on single nucleotide polymorphisms.

Availability and implementation

EUPAN is implemented in Perl, R and C ++. It is supported under Linux and preferred for a computer cluster with LSF and SLURM job scheduling system. EUPAN together with its standard operating procedure (SOP) is freely available for non-commercial use (CC BY-NC 4.0) at http://cgm.sjtu.edu.cn/eupan/index.html .

Contact

ccwei@sjtu.edu.cn or jianxin.shi@sjtu.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +29282047,The effects of patients initiated aggression on Chinese medical students' career planning.,"

Background

Patient initiated aggression is common among Chinese health-care workers, reaching over 10,000 incidents annually (Jinyang web. http://6d.dxy.cn/article/55497 . 2013), and the tense doctor-patient relationship generates stress among medical students. Because of the paucity of data (few surveys pay attention to the effects of violence perpetrated by patients on medical students), this study aimed to characterize patient initiated aggression against medical students.

Methods

In this cross-sectional survey conducted at a medical school in West China in 2015, 157 medical students completed a self-administered questionnaire and the Short Form-36, which assesses quality of life. The associations between patient initiated aggression exposure and medical students' career planning or quality of life were assessed using a chi-square test.

Results

Of the 157 medical students, 48 (30.6%) reported having suffered patient initiated aggression at least once during the previous year in the form of mental abuse (20.4%), offensive threat (14.6%), physical violence (8.3%), sexual harassment (verbal: 8.3% or physical: 1.6%), and extreme violence (physical violence leading to surgical treatment or hospitalization) (0.6%). Insufficient communication was the primary reason cited (27.2%). Emotional attack (mental abuse and offensive threat) occurrence differed among age groups (χ2 = 9.786, P = 0.020) and was ubiquitous among those aged >30 years old. Women were more likely than men to suffer physical violence (χ2 = 6.796, P = 0.009). Patient initiated aggression was not significantly associated with medical students' career planning or quality of life.

Conclusions

In this study, patient initiated aggression, albeit common, as in the rest of China, did not appear to be associated with medical students' career planning or quality of life. However, the characteristics described can inform policymaking and the design of programs to minimize patient initiated aggression occurrence.",2017-12-28 +29098178,Denoising of Quality Scores for Boosted Inference and Reduced Storage.,"Massive amounts of sequencing data are being generated thanks to advances in sequencing technology and a dramatic drop in the sequencing cost. Much of the raw data are comprised of nucleotides and the corresponding quality scores that indicate their reliability. The latter are more difficult to compress and are themselves noisy. Lossless and lossy compression of the quality scores has recently been proposed to alleviate the storage costs, but reducing the noise in the quality scores has remained largely unexplored. This raw data is processed in order to identify variants; these genetic variants are used in important applications, such as medical decision making. Thus improving the performance of the variant calling by reducing the noise contained in the quality scores is important. We propose a denoising scheme that reduces the noise of the quality scores and we demonstrate improved inference with this denoised data. Specifically, we show that replacing the quality scores with those generated by the proposed denoiser results in more accurate variant calling in general. Moreover, a consequence of the denoising is that the entropy of the produced quality scores is smaller, and thus significant compression can be achieved with respect to lossless compression of the original quality scores. We expect our results to provide a baseline for future research in denoising of quality scores. The code used in this work as well as a Supplement with all the results are available at http://web.stanford.edu/~iochoa/DCCdenoiser_CodeAndSupplement.zip.",2016-03-01 +28146594,Lyme Carditis: A Case Involving the Conduction System and Mitral Valve.,"Lyme disease is the most common tick-borne infection in the Northern hemisphere. Cardiac manifestations of Lyme disease typically include variable atrioventricular nodal block and rarely structural heart pathology. The incidence of Lyme carditis may be underestimated based on current reporting practices of confirmed cases. This case of a 59-year-old man with Lyme carditis demonstrates the unique presentation of widespread conduction system disease, mitral regurgitation, and suspected ischemic disease. Through clinical data, electrocardiograms, and cardiac imaging, we show the progression, and resolution, of a variety of cardiac symptoms attributable to infection with Lyme. [Full article available at http://rimed.org/rimedicaljournal-2017-02.asp].",2017-02-01 +28398456,GARN2: coarse-grained prediction of 3D structure of large RNA molecules by regret minimization.,"

Motivation

Predicting the 3D structure of RNA molecules is a key feature towards predicting their functions. Methods which work at atomic or nucleotide level are not suitable for large molecules. In these cases, coarse-grained prediction methods aim to predict a shape which could be refined later by using more precise methods on smaller parts of the molecule.

Results

We developed a complete method for sampling 3D RNA structure at a coarse-grained model, taking a secondary structure as input. One of the novelties of our method is that a second step extracts two best possible structures close to the native, from a set of possible structures. Although our method benefits from the first version of GARN, some of the main features on GARN2 are very different. GARN2 is much faster than the previous version and than the well-known methods of the state-of-art. Our experiments show that GARN2 can also provide better structures than the other state-of-the-art methods.

Availability and implementation

GARN2 is written in Java. It is freely distributed and available at http://garn.lri.fr/.

Contact

melanie.boudard@lri.fr or johanne.cohen@lri.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +23175615,EuPathDB: the eukaryotic pathogen database.,"EuPathDB (http://eupathdb.org) resources include 11 databases supporting eukaryotic pathogen genomic and functional genomic data, isolate data and phylogenomics. EuPathDB resources are built using the same infrastructure and provide a sophisticated search strategy system enabling complex interrogations of underlying data. Recent advances in EuPathDB resources include the design and implementation of a new data loading workflow, a new database supporting Piroplasmida (i.e. Babesia and Theileria), the addition of large amounts of new data and data types and the incorporation of new analysis tools. New data include genome sequences and annotation, strand-specific RNA-seq data, splice junction predictions (based on RNA-seq), phosphoproteomic data, high-throughput phenotyping data, single nucleotide polymorphism data based on high-throughput sequencing (HTS) and expression quantitative trait loci data. New analysis tools enable users to search for DNA motifs and define genes based on their genomic colocation, view results from searches graphically (i.e. genes mapped to chromosomes or isolates displayed on a map) and analyze data from columns in result tables (word cloud and histogram summaries of column content). The manuscript herein describes updates to EuPathDB since the previous report published in NAR in 2010.",2012-11-21 +27187201,Top-down analysis of protein samples by de novo sequencing techniques.,"

Motivation

Recent technological advances have made high-resolution mass spectrometers affordable to many laboratories, thus boosting rapid development of top-down mass spectrometry, and implying a need in efficient methods for analyzing this kind of data.

Results

We describe a method for analysis of protein samples from top-down tandem mass spectrometry data, which capitalizes on de novo sequencing of fragments of the proteins present in the sample. Our algorithm takes as input a set of de novo amino acid strings derived from the given mass spectra using the recently proposed Twister approach, and combines them into aggregated strings endowed with offsets. The former typically constitute accurate sequence fragments of sufficiently well-represented proteins from the sample being analyzed, while the latter indicate their location in the protein sequence, and also bear information on post-translational modifications and fragmentation patterns.

Availability and implementation

Freely available on the web at http://bioinf.spbau.ru/en/twister

Contact

vyatkina@spbau.ru or ppevzner@ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-14 +26919047,The Implicitome: A Resource for Rationalizing Gene-Disease Associations.,"High-throughput experimental methods such as medical sequencing and genome-wide association studies (GWAS) identify increasingly large numbers of potential relations between genetic variants and diseases. Both biological complexity (millions of potential gene-disease associations) and the accelerating rate of data production necessitate computational approaches to prioritize and rationalize potential gene-disease relations. Here, we use concept profile technology to expose from the biomedical literature both explicitly stated gene-disease relations (the explicitome) and a much larger set of implied gene-disease associations (the implicitome). Implicit relations are largely unknown to, or are even unintended by the original authors, but they vastly extend the reach of existing biomedical knowledge for identification and interpretation of gene-disease associations. The implicitome can be used in conjunction with experimental data resources to rationalize both known and novel associations. We demonstrate the usefulness of the implicitome by rationalizing known and novel gene-disease associations, including those from GWAS. To facilitate the re-use of implicit gene-disease associations, we publish our data in compliance with FAIR Data Publishing recommendations [https://www.force11.org/group/fairgroup] using nanopublications. An online tool (http://knowledge.bio) is available to explore established and potential gene-disease associations in the context of other biomedical relations.",2016-02-26 +28044061,In silico identification and in vivo validation of miR-495 as a novel regulator of motivation for cocaine that targets multiple addiction-related networks in the nucleus accumbens.,"MicroRNAs (miRNAs) are important post-transcriptional regulators of gene expression and are implicated in the etiology of several neuropsychiatric disorders, including substance use disorders (SUDs). Using in silico genome-wide sequence analyses, we identified miR-495 as a miRNA whose predicted targets are significantly enriched in the Knowledgebase for Addiction Related Genes (ARG) database (KARG; http://karg.cbi.pku.edu.cn). This small non-coding RNA is also highly expressed within the nucleus accumbens (NAc), a pivotal brain region underlying reward and motivation. Using luciferase reporter assays, we found that miR-495 directly targeted the 3'UTRs of Bdnf, Camk2a and Arc. Furthermore, we measured miR-495 expression in response to acute cocaine in mice and found that it is downregulated rapidly and selectively in the NAc, along with concomitant increases in ARG expression. Lentiviral-mediated miR-495 overexpression in the NAc shell (NAcsh) not only reversed these cocaine-induced effects but also downregulated multiple ARG mRNAs in specific SUD-related biological pathways, including those that regulate synaptic plasticity. miR-495 expression was also downregulated in the NAcsh of rats following cocaine self-administration. Most importantly, we found that NAcsh miR-495 overexpression suppressed the motivation to self-administer and seek cocaine across progressive ratio, extinction and reinstatement testing, but had no effect on food reinforcement, suggesting that miR-495 selectively affects addiction-related behaviors. Overall, our in silico search for post-transcriptional regulators identified miR-495 as a novel regulator of multiple ARGs that have a role in modulating motivation for cocaine.",2017-01-03 +28045122,miRsig: a consensus-based network inference methodology to identify pan-cancer miRNA-miRNA interaction signatures.,"Decoding the patterns of miRNA regulation in diseases are important to properly realize its potential in diagnostic, prog- nostic, and therapeutic applications. Only a handful of studies computationally predict possible miRNA-miRNA interactions; hence, such interactions require a thorough investigation to understand their role in disease progression. In this paper, we design a novel computational pipeline to predict the common signature/core sets of miRNA-miRNA interactions for different diseases using network inference algorithms on the miRNA-disease expression profiles; the individual predictions of these algorithms were then merged using a consensus-based approach to predict miRNA-miRNA associations. We next selected the miRNA-miRNA associations across particular diseases to generate the corresponding disease-specific miRNA-interaction networks. Next, graph intersection analysis was performed on these networks for multiple diseases to identify the common signature/core sets of miRNA interactions. We applied this pipeline to identify the common signature of miRNA-miRNA inter- actions for cancers. The identified signatures when validated using a manual literature search from PubMed Central and the PhenomiR database, show strong relevance with the respective cancers, providing an indirect proof of the high accuracy of our methodology. We developed miRsig, an online tool for analysis and visualization of the disease-specific signature/core miRNA-miRNA interactions, available at: http://bnet.egr.vcu.edu/miRsig.",2017-01-03 +30051742,Inference of gene networks from gene expression time series using recurrent neural networks and sparse MAP estimation.,"

Background

The inference of genetic regulatory networks (GRNs) provides insight into the cellular responses to signals. A class of recurrent neural networks (RNNs) capturing the dynamics of GRN has been used as a basis for inferring small-scale GRNs from gene expression time series. The Bayesian framework facilitates incorporating the hypothesis of GRN into the model estimation to improve the accuracy of GRN inference.

Results

We present new methods for inferring small-scale GRNs based on RNNs. The weights of wires of RNN represent the strengths of gene-to-gene regulatory interactions. We use a class of automatic relevance determination (ARD) priors to enforce the sparsity in the maximum a posteriori (MAP) estimates of wire weights of RNN. A particle swarm optimization (PSO) is integrated as an optimization engine into the MAP estimation process. Likely networks of genes generated based on estimated wire weights are combined using the majority rule to determine a final estimated GRN. As an alternative, a class of Lq -norm ( q=1 ) priors is used for attaining the sparse MAP estimates of wire weights of RNN. We also infer the GRN using the maximum likelihood (ML) estimates of wire weights of RNN. The RNN-based GRN inference algorithms, ARD-RNN, Lq -RNN, and ML-RNN are tested on simulated and experimental E. coli and yeast time series containing 6-11 genes and 7-19 data points. Published GRN inference algorithms based on regressions and mutual information networks are performed on the benchmark datasets to compare performances.

Conclusion

ARD and Lq -norm priors are used for the estimation of wire weights of RNN. Results of GRN inference experiments show that ARD-RNN, Lq -RNN have similar best accuracies on the simulated time series. The ARD-RNN is more accurate than Lq -RNN, ML-RNN, and mostly more accurate than the reference algorithms on the experimental time series. The effectiveness of ARD-RNN for inferring small-scale GRNs using gene expression time series of limited length is empirically verified.",2018-04-26 +,PhyloMarker—A Tool for Mining Phylogenetic Markers Through Genome Comparison: Application of the Mouse Lemur (Genus Microcebus) Phylogeny,"Molecular phylogeny is a fundamental tool to understanding the evolution of all life forms. One common issue faced by molecular phylogeny is the lack of sufficient molecular markers. Here, we present PhyloMarker, a phylogenomic tool designed to find nuclear gene markers for the inference of phylogeny through multiple genome comparison. Around 800 candidate markers were identified by PhyloMarker through comparison of partial genomes of Microcebus and Otolemur. In experimental tests of 20 randomly selected markers, nine markers were successfully amplified by PCR and directly sequenced in all 17 nominal Microcebus species. Phylogenetic analyses of the sequence data obtained for 17 taxa and nine markers confirmed the distinct lineage inferred from previous mtDNA data. PhyloMarker has also been used by other projects including the herons (Ardeidae, Aves) phylogeny and the Wood mice (Muridae, Mammalia) phylogeny. All source code and sample data are made available at http://bioinfo-srv1.awh.unomaha.edu/phylomarker/.",2012-01-01 +28335739,Identification of long non-coding transcripts with feature selection: a comparative study.,"

Background

The unveiling of long non-coding RNAs as important gene regulators in many biological contexts has increased the demand for efficient and robust computational methods to identify novel long non-coding RNAs from transcripts assembled with high throughput RNA-seq data. Several classes of sequence-based features have been proposed to distinguish between coding and non-coding transcripts. Among them, open reading frame, conservation scores, nucleotide arrangements, and RNA secondary structure have been used with success in literature to recognize intergenic long non-coding RNAs, a particular subclass of non-coding RNAs.

Results

In this paper we perform a systematic assessment of a wide collection of features extracted from sequence data. We use most of the features proposed in the literature, and we include, as a novel set of features, the occurrence of repeats contained in transposable elements. The aim is to detect signatures (groups of features) able to distinguish long non-coding transcripts from other classes, both protein-coding and non-coding. We evaluate different feature selection algorithms, test for signature stability, and evaluate the prediction ability of a signature with a machine learning algorithm. The study reveals different signatures in human, mouse, and zebrafish, highlighting that some features are shared among species, while others tend to be species-specific. Compared to coding potential tools and similar supervised approaches, including novel signatures, such as those identified here, in a machine learning algorithm improves the prediction performance, in terms of area under precision and recall curve, by 1 to 24%, depending on the species and on the signature.

Conclusions

Understanding which features are best suited for the prediction of long non-coding RNAs allows for the development of more effective automatic annotation pipelines especially relevant for poorly annotated genomes, such as zebrafish. We provide a web tool that recognizes novel long non-coding RNAs with the obtained signatures from fasta and gtf formats. The tool is available at the following url: http://www.bioinformatics-sannio.org/software/ .",2017-03-23 +28927765,Recurrent vulvovaginal candidiasis.,"

Purpose

Recurrent vulvovaginal candidiasis (RVVC), multiple episodes of vulvovaginal candidiasis (VVC; vaginal yeast infection) within a 12-month period, adversely affects quality of life, mental health, and sexual activity. Diagnosis is not straightforward, as VVC is defined by the combination of often nonspecific vaginal symptoms and the presence of yeast-which is a common vaginal commensal. Estimating the incidence and prevalence is challenging: most VVC is diagnosed and treated empirically, the availability for purchase of effective therapies over the counter enables self-diagnosis and treatment, and the duration of the relatively benign VVC symptoms is short, introducing errors into any estimates relying on medical records or patient recall.

Methods

We evaluate current estimates of VVC and RVVC and provide new prevalence estimates using data from a 2011 seven-country (n = 7345) internet panel survey on VVC conducted by Ipsos Health (https://www.ipsos.com/en). We also evaluate information on VVC-associated visits using the National Ambulatory Medical Care Survey.

Results

The estimated probability of VVC by age 50 varied widely by country (from 23% to 49%, mean 39%), as did the estimated probability of RVVC after VVC (from 14% to 28%, mean 23%).

Conclusions

However estimated, the probability of RVVC was high suggesting RVVC is a common condition.",2017-08-15 +28811565,iSS-PC: Identifying Splicing Sites via Physical-Chemical Properties Using Deep Sparse Auto-Encoder.,"Gene splicing is one of the most significant biological processes in eukaryotic gene expression, such as RNA splicing, which can cause a pre-mRNA to produce one or more mature messenger RNAs containing the coded information with multiple biological functions. Thus, identifying splicing sites in DNA/RNA sequences is significant for both the bio-medical research and the discovery of new drugs. However, it is expensive and time consuming based only on experimental technique, so new computational methods are needed. To identify the splice donor sites and splice acceptor sites accurately and quickly, a deep sparse auto-encoder model with two hidden layers, called iSS-PC, was constructed based on minimum error law, in which we incorporated twelve physical-chemical properties of the dinucleotides within DNA into PseDNC to formulate given sequence samples via a battery of cross-covariance and auto-covariance transformations. In this paper, five-fold cross-validation test results based on the same benchmark data-sets indicated that the new predictor remarkably outperformed the existing prediction methods in this field. Furthermore, it is expected that many other related problems can be also studied by this approach. To implement classification accurately and quickly, an easy-to-use web-server for identifying slicing sites has been established for free access at: http://www.jci-bioinfo.cn/iSS-PC.",2017-08-15 +27652002,CrossDocker: a tool for performing cross-docking using Autodock Vina.,"

Background

Cross-docking is an approach to find the best holo structures among multiple structures available for a target protein.

Results

CrossDocker significantly decreases the time needed for setting parameters and inputs for performing multiple dockings, data collection and subsequent analysis.

Conclusion

CrossDocker was written in Python language and is available as executable binary for Windows operating system. It is available at http://www.pharm-sbg.com. Some example data sets were also provided.",2016-03-17 +25165094,MIRPIPE: quantification of microRNAs in niche model organisms.,"

Unlabelled

MicroRNAs (miRNAs) represent an important class of small non-coding RNAs regulating gene expression in eukaryotes. Present algorithms typically rely on genomic data to identify miRNAs and require extensive installation procedures. Niche model organisms lacking genomic sequences cannot be analyzed by such tools. Here we introduce the MIRPIPE application enabling rapid and simple browser-based miRNA homology detection and quantification. MIRPIPE features automatic trimming of raw RNA-Seq reads originating from various sequencing instruments, processing of isomiRs and quantification of detected miRNAs versus public- or user-uploaded reference databases.

Availability and implementation

The Web service is freely available at http://bioinformatics.mpi-bn.mpg.de. MIRPIPE was implemented in Perl and integrated into Galaxy. An offline version for local execution is also available from our Web site.",2014-08-26 +28546311,Evidence for Functional Networks within the Human Brain's White Matter.,"Investigation of the functional macro-scale organization of the human cortex is fundamental in modern neuroscience. Although numerous studies have identified networks of interacting functional modules in the gray-matter, limited research was directed to the functional organization of the white-matter. Recent studies have demonstrated that the white-matter exhibits blood oxygen level-dependent signal fluctuations similar to those of the gray-matter. Here we used these signal fluctuations to investigate whether the white-matter is organized as functional networks by applying a clustering analysis on resting-state functional MRI (RSfMRI) data from white-matter voxels, in 176 subjects (of both sexes). This analysis indicated the existence of 12 symmetrical white-matter functional networks, corresponding to combinations of white-matter tracts identified by diffusion tensor imaging. Six of the networks included interhemispheric commissural bridges traversing the corpus callosum. Signals in white-matter networks correlated with signals from functional gray-matter networks, providing missing knowledge on how these distributed networks communicate across large distances. These findings were replicated in an independent subject group and were corroborated by seed-based analysis in small groups and individual subjects. The identified white-matter functional atlases and analysis codes are available at http://mind.huji.ac.il/white-matter.aspx Our results demonstrate that the white-matter manifests an intrinsic functional organization as interacting networks of functional modules, similarly to the gray-matter, which can be investigated using RSfMRI. The discovery of functional networks within the white-matter may open new avenues of research in cognitive neuroscience and clinical neuropsychiatry.SIGNIFICANCE STATEMENT In recent years, functional MRI (fMRI) has revolutionized all fields of neuroscience, enabling identifications of functional modules and networks in the human brain. However, most fMRI studies ignored a major part of the brain, the white-matter, discarding signals from it as arising from noise. Here we use resting-state fMRI data from 176 subjects to show that signals from the human white-matter contain meaningful information. We identify 12 functional networks composed of interacting long-distance white-matter tracts. Moreover, we show that these networks are highly correlated to resting-state gray-matter networks, highlighting their functional role. Our findings enable reinterpretation of many existing fMRI datasets, and suggest a new way to explore the white-matter role in cognition and its disturbances in neuropsychiatric disorders.",2017-05-25 +26905301,Principal elementary mode analysis (PEMA).,"Principal component analysis (PCA) has been widely applied in fluxomics to compress data into a few latent structures in order to simplify the identification of metabolic patterns. These latent structures lack a direct biological interpretation due to the intrinsic constraints associated with a PCA model. Here we introduce a new method that significantly improves the interpretability of the principal components with a direct link to metabolic pathways. This method, called principal elementary mode analysis (PEMA), establishes a bridge between a PCA-like model, aimed at explaining the maximum variance in flux data, and the set of elementary modes (EMs) of a metabolic network. It provides an easy way to identify metabolic patterns in large fluxomics datasets in terms of the simplest pathways of the organism metabolism. The results using a real metabolic model of Escherichia coli show the ability of PEMA to identify the EMs that generated the different simulated flux distributions. Actual flux data of E. coli and Pichia pastoris cultures confirm the results observed in the simulated study, providing a biologically meaningful model to explain flux data of both organisms in terms of the EM activation. The PEMA toolbox is freely available for non-commercial purposes on http://mseg.webs.upv.es.",2016-02-01 +27572102,Fast and Accurate Protein False Discovery Rates on Large-Scale Proteomics Data Sets with Percolator 3.0.,"Percolator is a widely used software tool that increases yield in shotgun proteomics experiments and assigns reliable statistical confidence measures, such as q values and posterior error probabilities, to peptides and peptide-spectrum matches (PSMs) from such experiments. Percolator's processing speed has been sufficient for typical data sets consisting of hundreds of thousands of PSMs. With our new scalable approach, we can now also analyze millions of PSMs in a matter of minutes on a commodity computer. Furthermore, with the increasing awareness for the need for reliable statistics on the protein level, we compared several easy-to-understand protein inference methods and implemented the best-performing method-grouping proteins by their corresponding sets of theoretical peptides and then considering only the best-scoring peptide for each protein-in the Percolator package. We used Percolator 3.0 to analyze the data from a recent study of the draft human proteome containing 25 million spectra (PM:24870542). The source code and Ubuntu, Windows, MacOS, and Fedora binary packages are available from http://percolator.ms/ under an Apache 2.0 license. Graphical Abstract ᅟ.",2016-08-29 +29220447,"MiRIAD update: using alternative polyadenylation, protein interaction network analysis and additional species to enhance exploration of the role of intragenic miRNAs and their host genes. ",http://www.miriad-database.org.,2017-01-01 +28901845,A Review of Patents on Therapeutic Potential and Delivery of Hydroge n Sulfide.,"

Background

Hydrogen sulfide (H2S) is a colorless gas with a characteristic smell of rotten eggs. Once only thought of as a toxic gas, evidence now shows that H2S plays major roles in pathological and physiological activities. These roles are being utilized to treat diseases and disorders ranging from hypertension, inflammation, edema, cardiovascular issues, chronic pain, cancer, and many more. Challenges facing the use of H2S currently involve achieving the optimum therapeutic concentrations, synthesizing chemically and physiologically stable donors, and developing clinically appropriate delivery systems.

Methods

We did an extensive literature search on therapeutic potentials and related issues of H2S which were presented in a systematic flow pattern in introduction. Patents accepted/filed on various aspects of hydrogen sulfide were searched using the United States Patent and Trademark Office database at http://patft.uspto.gov/ and google patents at https://patents.google.com/. The important search terms combined with H2S were therapeutic effect, pharmacological action, biochemistry, measurement, and delivery. We also incorporated our own experiences and publications while discussing the delivery approaches and associated challenges.

Results

In the process, researchers have discovered novel techniques in preparing the noxious gas by discovering and synthesizing H2S donors and developing controlled and predictable delivery systems. Donors utilized thus far include derivatives of anti-inflammatory drugs like H2S -aspirin, Allium sativum extracts, inorganic salts, phosphorodithioate derivatives, and thioaminoacid derivatives. Use of controlled delivery systems for H2S is critical to maintain its physiological stability, optimum therapeutic window, increase patient compliance, and make it easier to manufacture and administer. Numerous patents overcoming the challenges of using H2S therapeutically with various donors and delivery mechanisms have been reviewed.

Conclusion

The scientific knowledge gained from the last decade researches has moved H2S from a foul smelling pungent gas to the status of a gasotransmitter with many potential therapeutic applications. However, developing a suitable donor and a delivery system using that donor for providing precise and sustained release of H2S for an extended period, is critically needed for any further development towards its translation into clinical practices.",2017-01-01 +28365739,GrTEdb: the first web-based database of transposable elements in cotton (Gossypium raimondii). ,"Although several diploid and tetroploid Gossypium species genomes have been sequenced, the well annotated web-based transposable elements (TEs) database is lacking. To better understand the roles of TEs in structural, functional and evolutionary dynamics of the cotton genome, a comprehensive, specific, and user-friendly web-based database, Gossypium raimondii transposable elements database (GrTEdb), was constructed. A total of 14 332 TEs were structurally annotated and clearly categorized in G. raimondii genome, and these elements have been classified into seven distinct superfamilies based on the order of protein-coding domains, structures and/or sequence similarity, including 2929 Copia-like elements, 10 368 Gypsy-like elements, 299 L1 , 12 Mutators , 435 PIF-Harbingers , 275 CACTAs and 14 Helitrons . Meanwhile, the web-based sequence browsing, searching, downloading and blast tool were implemented to help users easily and effectively to annotate the TEs or TE fragments in genomic sequences from G. raimondii and other closely related Gossypium species. GrTEdb provides resources and information related with TEs in G. raimondii , and will facilitate gene and genome analyses within or across Gossypium species, evaluating the impact of TEs on their host genomes, and investigating the potential interaction between TEs and protein-coding genes in Gossypium species. http://www.grtedb.org/.",2017-01-01 +28365729,The HIV oligonucleotide database (HIVoligoDB). ,"The human immunodeficiency virus (HIV) is associated with one of the most widespread infectious disease, the acquired immunodeficiency syndrome (AIDS). The development of antiretroviral drugs and methods for virus detection requires a comprehensive analysis of the HIV genomic diversity, particularly in the binding sites of oligonucleotides. Here, we describe a versatile online database (HIVoligoDB) with oligonucleotides selected for the diagnosis of HIV and treatment of AIDS. Currently, the database provides an interface for visualization, analysis and download of 380 HIV-1 and 65 HIV-2 oligonucleotides annotated according to curated reference genomes. The database also allows the selection of the most conserved HIV genomic regions for the development of molecular diagnostic assays and sequence-based candidate therapeutics. http://portugene.com/HIVoligoDB.",2017-01-01 +28007064,The Society of Thoracic Surgeons Congenital Heart Surgery Database Public Reporting Initiative.,"Three basic principles provide the rationale for the Society of Thoracic Surgeons (STS) Congenital Heart Surgery Database (CHSD) public reporting initiative: (1) Variation in congenital and pediatric cardiac surgical outcomes exist. (2) Patients and their families have the right to know the outcomes of the treatments that they will receive. (3). It is our professional responsibility to share this information with them in a format they can understand. The STS CHSD public reporting initiative facilitates the voluntary transparent public reporting of congenital and pediatric cardiac surgical outcomes using the STS CHSD Mortality Risk Model. The STS CHSD Mortality Risk Model is used to calculate risk-adjusted operative mortality and adjusts for the following variables: age, primary procedure, weight (neonates and infants), prior cardiothoracic operations, non-cardiac congenital anatomic abnormalities, chromosomal abnormalities or syndromes, prematurity (neonates and infants), and preoperative factors (including preoperative/preprocedural mechanical circulatory support [intraaortic balloon pump, ventricular assist device, extracorporeal membrane oxygenation, or cardiopulmonary support], shock [persistent at time of surgery], mechanical ventilation to treat cardiorespiratory failure, renal failure requiring dialysis and/or renal dysfunction, preoperative neurological deficit, and other preoperative factors). Operative mortality is defined in all STS databases as (1) all deaths, regardless of cause, occurring during the hospitalization in which the operation was performed, even if after 30 days (including patients transferred to other acute care facilities); and (2) all deaths, regardless of cause, occurring after discharge from the hospital, but before the end of the 30th postoperative day. The STS CHSD Mortality Risk Model has good model fit and discrimination with an overall C statistics of 0.875 and 0.858 in the development sample and the validation sample, respectively. These C statistics are the highest C statistics ever seen in a pediatric cardiac surgical risk model. Therefore, the STS CHSD Mortality Risk Model provides excellent adjustment for case mix and should mitigate against risk aversive behavior. The STS CHSD Mortality Risk Model is the best available model to date for measuring outcomes after pediatric cardiac surgery. As of March 2016, 60% of participants in STS CHSD have agreed to publicly report their outcomes through the STS Public Reporting Online website (http://www.sts.org/quality-research-patient-safety/sts-public-reporting-online). Although several opportunities exist to improve our risk models, the current STS CHSD public reporting initiative provides the tools to report publicly, and with meaning and accuracy, the outcomes of congenital and pediatric cardiac surgery.",2017-01-01 +28675997,Cold-induced Anaphylaxis: The Case of a 9-year-old Child and Review of the Literature.,"

Background and objective

The present review investigated cold-induced anaphylaxis, a potentially life-threatening condition that occurs after exposure to cold stimuli and is characterized by respiratory distress and/or hypotension. Anaphylaxis is rarely associated to cold-induced urticarial (CU), a particular form of physical urticaria that is difficult to diagnose and manage. The incidence of cold-induced urticaria has been estimated at about 0.05%, higher in colder regions and in women; its pathological mechanisms are still unknown.

Methods

The literature was searched via the Medline/PubMed database (http://www.ncbi.nlm.gov/ pubmed).

Results and conclusion

Patients affected by CU should be well-informed about the risk of anaphylaxis and preventive measures. The prevention of CU is based on the avoidance of cold exposure. The most effective treatment is antihistamines symptomatic therapy. Anyway, patients should also carry with them an emergency kit containing corticosteroids, antihistamines and an epinephrine injector. Future studies are necessary to determine the CU pathophysiology so to establish a more targeted management of this important and potentially life-threatening condition.",2017-01-01 +28605769,TriatoKey: a web and mobile tool for biodiversity identification of Brazilian triatomine species. ,"Triatomines are blood-sucking insects that transmit the causative agent of Chagas disease, Trypanosoma cruzi. Despite being recognized as a difficult task, the correct taxonomic identification of triatomine species is crucial for vector control in Latin America, where the disease is endemic. In this context, we have developed a web and mobile tool based on PostgreSQL database to help healthcare technicians to overcome the difficulties to identify triatomine vectors when the technical expertise is missing. The web and mobile version makes use of real triatomine species pictures and dichotomous key method to support the identification of potential vectors that occur in Brazil. It provides a user example-driven interface with simple language. TriatoKey can also be useful for educational purposes. http://triatokey.cpqrr.fiocruz.br.",2017-01-01 +28365725,"Carotenoids Database: structures, chemical fingerprints and distribution among organisms. ","To promote understanding of how organisms are related via carotenoids, either evolutionarily or symbiotically, or in food chains through natural histories, we built the Carotenoids Database. This provides chemical information on 1117 natural carotenoids with 683 source organisms. For extracting organisms closely related through the biosynthesis of carotenoids, we offer a new similarity search system 'Search similar carotenoids' using our original chemical fingerprint 'Carotenoid DB Chemical Fingerprints'. These Carotenoid DB Chemical Fingerprints describe the chemical substructure and the modification details based upon International Union of Pure and Applied Chemistry (IUPAC) semi-systematic names of the carotenoids. The fingerprints also allow (i) easier prediction of six biological functions of carotenoids: provitamin A, membrane stabilizers, odorous substances, allelochemicals, antiproliferative activity and reverse MDR activity against cancer cells, (ii) easier classification of carotenoid structures, (iii) partial and exact structure searching and (iv) easier extraction of structural isomers and stereoisomers. We believe this to be the first attempt to establish fingerprints using the IUPAC semi-systematic names. For extracting close profiled organisms, we provide a new tool 'Search similar profiled organisms'. Our current statistics show some insights into natural history: carotenoids seem to have been spread largely by bacteria, as they produce C30, C40, C45 and C50 carotenoids, with the widest range of end groups, and they share a small portion of C40 carotenoids with eukaryotes. Archaea share an even smaller portion with eukaryotes. Eukaryotes then have evolved a considerable variety of C40 carotenoids. Considering carotenoids, eukaryotes seem more closely related to bacteria than to archaea aside from 16S rRNA lineage analysis. : http://carotenoiddb.jp.",2017-01-01 +28069893,Alga-PrAS (Algal Protein Annotation Suite): A Database of Comprehensive Annotation in Algal Proteomes.,"Algae are smaller organisms than land plants and offer clear advantages in research over terrestrial species in terms of rapid production, short generation time and varied commercial applications. Thus, studies investigating the practical development of effective algal production are important and will improve our understanding of both aquatic and terrestrial plants. In this study we estimated multiple physicochemical and secondary structural properties of protein sequences, the predicted presence of post-translational modification (PTM) sites, and subcellular localization using a total of 510,123 protein sequences from the proteomes of 31 algal and three plant species. Algal species were broadly selected from green and red algae, glaucophytes, oomycetes, diatoms and other microalgal groups. The results were deposited in the Algal Protein Annotation Suite database (Alga-PrAS; http://alga-pras.riken.jp/), which can be freely accessed online.",2017-01-01 +27910033,Cytogenetic Resources and Information.,"The main databases devoted stricto sensu to cancer cytogenetics are the ""Mitelman Database of Chromosome Aberrations and Gene Fusions in Cancer"" ( http://cgap.nci.nih.gov/Chromosomes/Mitelman ), the ""Atlas of Genetics and Cytogenetics in Oncology and Haematology"" ( http://atlasgeneticsoncology.org ), and COSMIC ( http://cancer.sanger.ac.uk/cosmic ).However, being a complex multistep process, cancer cytogenetics are broadened to ""cytogenomics,"" with complementary resources on: general databases (nucleic acid and protein sequences databases; cartography browsers: GenBank, RefSeq, UCSC, Ensembl, UniProtKB, and Entrez Gene), cancer genomic portals associated with recent international integrated programs, such as TCGA or ICGC, other fusion genes databases, array CGH databases, copy number variation databases, and mutation databases. Other resources such as the International System for Human Cytogenomic Nomenclature (ISCN), the International Classification of Diseases for Oncology (ICD-O), and the Human Gene Nomenclature Database (HGNC) allow a common language.Data within the scientific/medical community should be freely available. However, most of the institutional stakeholders are now gradually disengaging, and well-known databases are forced to beg or to disappear (which may happen!).",2017-01-01 +27787826,Intrinsic Disorder and Semi-disorder Prediction by SPINE-D.,"Over the past decade, it has become evident that a large proportion of proteins contain intrinsically disordered regions, which play important roles in pivotal cellular functions. Many computational tools have been developed with the aim of identifying the level and location of disorder within a protein. In this chapter, we describe a neural network based technique called SPINE-D that employs a unique three-state design and can accurately capture disordered residues in both short and long disordered regions. SPINE-D was trained on a large database of 4229 non-redundant proteins, and yielded an AUC of 0.86 on a cross-validation test and 0.89 on an independent test. SPINE-D can also detect a semi-disordered state that is associated with induced folders and aggregation-prone regions in disordered proteins and weakly stable or locally unfolded regions in structured proteins. We implement an online web service and an offline stand-alone program for SPINE-D, they are freely available at http://sparks-lab.org/SPINE-D/ . We then walk you through how to use the online and offline SPINE-D in making disorder predictions, and examine the disorder and semi-disorder prediction in a case study on the p53 protein.",2017-01-01 +27508224,Dataset for an analysis of tourism and economic growth: A study of Sri Lanka.,"We use the sample from 1978 to 2014 for the paper (doi:10.1016/j.tmp.2016.05.005). The data on GDP at constant 2005 USD (US dollar), and the gross fixed capital formation at constant 2005 USD are extracted from the World Bank (2015). The labour stock which includes direct and indirect employment and the tourism receipts (in USD) are sourced from the Sri Lanka Tourism Development Authority (http://www.sltda.lk/statistics). Tourism receipts as a per cent of GDP is used to measure tourism demand. The capital stock data is computed using perpetual inventory method, where a depreciation rate of 8 per cent is assumed with the initial capital stock as 1.05 times the GDP of 1969 at constant 2005 USD. The output per worker and capital per worker is computed by dividing the GDP and capital stock by the labour stock, respectively.",2016-07-06 +28605766,GeneHancer: genome-wide integration of enhancers and target genes in GeneCards. ,"A major challenge in understanding gene regulation is the unequivocal identification of enhancer elements and uncovering their connections to genes. We present GeneHancer, a novel database of human enhancers and their inferred target genes, in the framework of GeneCards. First, we integrated a total of 434 000 reported enhancers from four different genome-wide databases: the Encyclopedia of DNA Elements (ENCODE), the Ensembl regulatory build, the functional annotation of the mammalian genome (FANTOM) project and the VISTA Enhancer Browser. Employing an integration algorithm that aims to remove redundancy, GeneHancer portrays 285 000 integrated candidate enhancers (covering 12.4% of the genome), 94 000 of which are derived from more than one source, and each assigned an annotation-derived confidence score. GeneHancer subsequently links enhancers to genes, using: tissue co-expression correlation between genes and enhancer RNAs, as well as enhancer-targeted transcription factor genes; expression quantitative trait loci for variants within enhancers; and capture Hi-C, a promoter-specific genome conformation assay. The individual scores based on each of these four methods, along with gene–enhancer genomic distances, form the basis for GeneHancer’s combinatorial likelihood-based scores for enhancer–gene pairing. Finally, we define ‘elite’ enhancer–gene relations reflecting both a high-likelihood enhancer definition and a strong enhancer–gene association.GeneHancer predictions are fully integrated in the widely used GeneCards Suite, whereby candidate enhancers and their annotations are displayed on every relevant GeneCard. This assists in the mapping of non-coding variants to enhancers, and via the linked genes, forms a basis for variant–phenotype interpretation of whole-genome sequences in health and disease. http://www.genecards.org/.",2017-01-01 +28451973,The ProFunc Function Prediction Server.,"The ProFunc web server is a tool for helping identify the function of a given protein whose 3D coordinates have been experimentally determined or homology modeled. It uses a cocktail of both sequence- and structure-based methods to identify matches to other proteins that may, in turn, suggest the query protein's most likely function. The server was originally developed to aid the worldwide structural genomics effort at the start of the millennium. It accepts a file containing the protein's 3D coordinates in PDB format, and, when processing is complete, sends an email containing a link to the password-protected result pages. The results include an at-a-glance summary, as well as separate pages containing more detailed analyses. The server can be found at: http://www.ebi.ac.uk/thornton-srv/databases/profunc .",2017-01-01 +28365722,OCaPPI-Db: an oligonucleotide probe database for pathogen identification through hybridization capture. ,"The detection and identification of bacterial pathogens involved in acts of bio- and agroterrorism are essential to avoid pathogen dispersal in the environment and propagation within the population. Conventional molecular methods, such as PCR amplification, DNA microarrays or shotgun sequencing, are subject to various limitations when assessing environmental samples, which can lead to inaccurate findings. We developed a hybridization capture strategy that uses a set of oligonucleotide probes to target and enrich biomarkers of interest in environmental samples. Here, we present Oligonucleotide Capture Probes for Pathogen Identification Database (OCaPPI-Db), an online capture probe database containing a set of 1,685 oligonucleotide probes allowing for the detection and identification of 30 biothreat agents up to the species level. This probe set can be used in its entirety as a comprehensive diagnostic tool or can be restricted to a set of probes targeting a specific pathogen or virulence factor according to the user's needs. : http://ocappidb.uca.works.",2017-01-01 +27822869,Describing Sequence Variants Using HGVS Nomenclature.,"DNA sequencing is usually performed to determine the sequence of a region of interest or even the entire genome of an individual. After sequencing, the sequence obtained is compared to a reference, all differences (the variants) are recorded, and the possible consequences of the changes identified, on both the RNA and protein level, are predicted. Finally, when available, a database containing previously reported variants is consulted to determine what other studies might have revealed about the variant or other variants in the same sequence (gene) and what the functional and phenotypic consequences were for the individuals carrying the variant.To facilitate the reporting and databasing of variants a standard was developed, the HGVS recommendations for the description of sequence variants. HGVS nomenclature contains specific formats to describe the basic variant types; substitution, deletion, duplication, insertion, inversion, and conversion. The basics of how to apply the recommendations to describe sequence variants will be explained here. An extensive description of the current HGVS guidelines (version 15.11) is available online at http://www.HGVS.org/varnomen .",2017-01-01 +28605770,Improving biocuration of microRNAs in diseases: a case study in idiopathic pulmonary fibrosis. ,"MicroRNAs (miRNAs) are small and non-coding RNA molecules that inhibit gene expression posttranscriptionally. They play important roles in several biological processes, and in recent years there has been an interest in studying how they are related to the pathogenesis of diseases. Although there are already some databases that contain information for miRNAs and their relation with illnesses, their curation represents a significant challenge due to the amount of information that is being generated every day. In particular, respiratory diseases are poorly documented in databases, despite the fact that they are of increasing concern regarding morbidity, mortality and economic impacts. In this work, we present the results that we obtained in the BioCreative Interactive Track (IAT), using a semiautomatic approach for improving biocuration of miRNAs related to diseases. Our procedures will be useful to complement databases that contain this type of information. We adapted the OntoGene text mining pipeline and the ODIN curation system in a full-text corpus of scientific publications concerning one specific respiratory disease: idiopathic pulmonary fibrosis, the most common and aggressive of the idiopathic interstitial cases of pneumonia. We curated 823 miRNA text snippets and found a total of 246 miRNAs related to this disease based on our semiautomatic approach with the system OntoGene/ODIN. The biocuration throughput improved by a factor of 12 compared with traditional manual biocuration. A significant advantage of our semiautomatic pipeline is that it can be applied to obtain the miRNAs of all the respiratory diseases and offers the possibility to be used for other illnesses. http://odin.ccg.unam.mx/ODIN/bc2015-miRNA/.",2017-01-01 +28365741,TMPL: a database of experimental and theoretical transmembrane protein models positioned in the lipid bilayer. ,"Knowing the position of protein structures within the membrane is crucial for fundamental and applied research in the field of molecular biology. Only few web resources propose coordinate files of oriented transmembrane proteins, and these exclude predicted structures, although they represent the largest part of the available models. In this article, we present TMPL (http://www.dsimb.inserm.fr/TMPL/), a database of transmembrane protein structures (α-helical and β-sheet) positioned in the lipid bilayer. It is the first database to include theoretical models of transmembrane protein structures, making it a large repository with more than 11 000 entries. The TMPL database also contains experimentally solved protein structures, which are available as either atomistic or coarse-grained models. A unique feature of TMPL is the possibility for users to update the database by uploading, through an intuitive web interface, the membrane assignments they can obtain with our recent OREMPRO web server.",2017-01-01 +28365721,PCPPI: a comprehensive database for the prediction of Penicillium-crop protein-protein interactions. ,"Penicillium expansum , the causal agent of blue mold, is one of the most prevalent post-harvest pathogens, infecting a wide range of crops after harvest. In response, crops have evolved various defense systems to protect themselves against this and other pathogens. Penicillium -crop interaction is a multifaceted process and mediated by pathogen- and host-derived proteins. Identification and characterization of the inter-species protein-protein interactions (PPIs) are fundamental to elucidating the molecular mechanisms underlying infection processes between P. expansum and plant crops. Here, we have developed PCPPI, the Penicillium -Crop Protein-Protein Interactions database, which is constructed based on the experimentally determined orthologous interactions in pathogen-plant systems and available domain-domain interactions (DDIs) in each PPI. Thus far, it stores information on 9911 proteins, 439 904 interactions and seven host species, including apple, kiwifruit, maize, pear, rice, strawberry and tomato. Further analysis through the gene ontology (GO) annotation indicated that proteins with more interacting partners tend to execute the essential function. Significantly, semantic statistics of the GO terms also provided strong support for the accuracy of our predicted interactions in PCPPI. We believe that all the PCPPI datasets are helpful to facilitate the study of pathogen-crop interactions and freely available to the research community. : http://bdg.hfut.edu.cn/pcppi/index.html.",2017-01-01 +28111365,PlantRGDB: A Database of Plant Retrocopied Genes.,"RNA-based gene duplication, known as retrocopy, plays important roles in gene origination and genome evolution. The genomes of many plants have been sequenced, offering an opportunity to annotate and mine the retrocopies in plant genomes. However, comprehensive and unified annotation of retrocopies in these plants is still lacking. In this study I constructed the PlantRGDB (Plant Retrocopied Gene DataBase), the first database of plant retrocopies, to provide a putatively complete centralized list of retrocopies in plant genomes. The database is freely accessible at http://probes.pw.usda.gov/plantrgdb or http://aegilops.wheat.ucdavis.edu/plantrgdb. It currently integrates 49 plant species and 38,997 retrocopies along with characterization information. PlantRGDB provides a user-friendly web interface for searching, browsing and downloading the retrocopies in the database. PlantRGDB also offers graphical viewer-integrated sequence information for displaying the structure of each retrocopy. The attributes of the retrocopies of each species are reported using a browse function. In addition, useful tools, such as an advanced search and BLAST, are available to search the database more conveniently. In conclusion, the database will provide a web platform for obtaining valuable insight into the generation of retrocopies and will supplement research on gene duplication and genome evolution in plants.",2017-01-01 +27987177,Plant Genome Duplication Database.,"Genome duplication, widespread in flowering plants, is a driving force in evolution. Genome alignments between/within genomes facilitate identification of homologous regions and individual genes to investigate evolutionary consequences of genome duplication. PGDD (the Plant Genome Duplication Database), a public web service database, provides intra- or interplant genome alignment information. At present, PGDD contains information for 47 plants whose genome sequences have been released. Here, we describe methods for identification and estimation of dates of genome duplication and speciation by functions of PGDD.The database is freely available at http://chibba.agtec.uga.edu/duplication/.",2017-01-01 +23764453,BDgene: a genetic database for bipolar disorder and its overlap with schizophrenia and major depressive disorder.,"

Background

Bipolar disorder (BD) is a common psychiatric disorder with complex genetic architecture. It shares overlapping genetic influences with schizophrenia (SZ) and major depressive disorder (MDD). Large numbers of genetic studies of BD and cross-disorder studies between BD and SZ/MDD have accumulated numerous genetic data. There is a growing need to integrate the data to provide a comprehensive data set to facilitate the genetic study of BD and its highly relevant diseases.

Methods

BDgene database was developed to integrate BD-related genetic factors and shared ones with SZ/MDD from profound literature reading. On the basis of data from the literature, in-depth analyses were performed for further understanding of the data, including gene prioritization, pathway-based analysis, intersection analysis of multidisease candidate genes, and pathway enrichment analysis.

Results

BDgene includes multiple types of literature-reported genetic factors of BD with both positive and negative results, including 797 genes, 3119 single nucleotide polymorphisms, and 789 regions. Shared genetic factors such as single nucleotide polymorphisms, genes, and regions from published cross-disorder studies among BD and SZ/MDD were also presented. In-depth data analyses identified 43 BD core genes; 70 BD candidate pathways; and 127, 79, and 107 new potential cross-disorder genes for BD-SZ, BD-MDD, and BD-SZ-MDD, respectively.

Conclusions

As a central genetic database for BD and the first cross-disorder database for BD and SZ/MDD, BDgene provides not only a comprehensive review of current genetic research but also high-confidence candidate genes and pathways for understanding of BD mechanism and shared etiology among its relevant diseases. BDgene is freely available at http://bdgene.psych.ac.cn.",2013-06-10 +23607573,The German national registry for primary immunodeficiencies (PID).,"In 2009, a federally funded clinical and research consortium (PID-NET, http://www.pid-net.org) established the first national registry for primary immunodeficiencies (PID) in Germany. The registry contains clinical and genetic information on PID patients and is set up within the framework of the existing European Database for Primary Immunodeficiencies, run by the European Society for Primary Immunodeficiencies. Following the example of other national registries, a central data entry clerk has been employed to support data entry at the participating centres. Regulations for ethics approvals have presented a major challenge for participation of individual centres and have led to a delay in data entry in some cases. Data on 630 patients, entered into the European registry between 2004 and 2009, were incorporated into the national registry. From April 2009 to March 2012, the number of contributing centres increased from seven to 21 and 738 additional patients were reported, leading to a total number of 1368 patients, of whom 1232 were alive. The age distribution of living patients differs significantly by gender, with twice as many males than females among children, but 15% more women than men in the age group 30 years and older. The diagnostic delay between onset of symptoms and diagnosis has decreased for some PID over the past 20 years, but remains particularly high at a median of 4 years in common variable immunodeficiency (CVID), the most prevalent PID.",2013-08-01 +27193157,Presenting GECO: An eyetracking corpus of monolingual and bilingual sentence reading.,"This article introduces GECO, the Ghent Eye-Tracking Corpus, a monolingual and bilingual corpus of the eyetracking data of participants reading a complete novel. English monolinguals and Dutch-English bilinguals read an entire novel, which was presented in paragraphs on the screen. The bilinguals read half of the novel in their first language, and the other half in their second language. In this article, we describe the distributions and descriptive statistics of the most important reading time measures for the two groups of participants. This large eyetracking corpus is perfectly suited for both exploratory purposes and more directed hypothesis testing, and it can guide the formulation of ideas and theories about naturalistic reading processes in a meaningful context. Most importantly, this corpus has the potential to evaluate the generalizability of monolingual and bilingual language theories and models to the reading of long texts and narratives. The corpus is freely available at http://expsy.ugent.be/downloads/geco .",2017-04-01 +29095726,"High quality of evidence is uncommon in Cochrane systematic reviews in Anaesthesia, Critical Care and Emergency Medicine.","

Background

The association between the quality of evidence in systematic reviews and authors' conclusions regarding the effectiveness of interventions relevant to anaesthesia has not been examined.

Objective

The objectives of this study were: to determine the proportion of systematic reviews in which the authors made a conclusive statement about the effect of an intervention; to describe the quality of evidence derived from outcomes in reviews that used the Grades of Recommendation, Assessment, Development and Evaluation (GRADE) working group system for grading the quality of evidence; and to identify review characteristics associated with conclusiveness.

Design

Cross-sectional analysis of Cochrane systematic reviews from the Anaesthesia, Critical Care and Emergency Review Group was undertaken.

Data sources

The Cochrane webpage was used to identify reviews for inclusion (http://.ace.cochrane.org/).

Eligibility criteria

New and updated versions of systematic reviews published up to 17 September 2015 were eligible. Protocols for systematic reviews were excluded.

Results

A total of 159 reviews were included. GRADE was used in 103 reviews (65%). Of these, high-level evidence for the primary outcome was identified in 11 reviews (10%). The main reasons that quality of evidence for the primary outcome was downgraded were risk of bias (n = 44; 43%) and imprecision (n = 36; 35%). Authors of 47% (n = 75) of the total number of reviews made conclusive statements about the effects of interventions. Independent predictors of conclusiveness in the subgroup of reviews with GRADE assessments were quality of evidence for the primary outcome (odds ratio 2.03; 95% confidence interval: [1.18 to 3.52] and an increasing number of studies included in reviews (OR 1.05; 95% CI: [1.01 to 1.09]).

Conclusion

It was common for conclusive statements to be made about the effects of interventions despite evidence for the primary outcome being rated less than high quality. Improving methodological quality of trials would have the greatest impact on improving the quality of evidence.",2017-12-01 +25700118,MOAtox: A comprehensive mode of action and acute aquatic toxicity database for predictive model development.,"The mode of toxic action (MOA) has been recognized as a key determinant of chemical toxicity and as an alternative to chemical class-based predictive toxicity modeling. However, the development of quantitative structure activity relationship (QSAR) and other models has been limited by the availability of comprehensive high quality MOA and toxicity databases. The current study developed a dataset of MOA assignments for 1213 chemicals that included a diversity of metals, pesticides, and other organic compounds that encompassed six broad and 31 specific MOAs. MOA assignments were made using a combination of high confidence approaches that included international consensus classifications, QSAR predictions, and weight of evidence professional judgment based on an assessment of structure and literature information. A toxicity database of 674 acute values linked to chemical MOA was developed for fish and invertebrates. Additionally, species-specific measured or high confidence estimated acute values were developed for the four aquatic species with the most reported toxicity values: rainbow trout (Oncorhynchus mykiss), fathead minnow (Pimephales promelas), bluegill (Lepomis macrochirus), and the cladoceran (Daphnia magna). Measured acute toxicity values met strict standardization and quality assurance requirements. Toxicity values for chemicals with missing species-specific data were estimated using established interspecies correlation models and procedures (Web-ICE; http://epa.gov/ceampubl/fchain/webice/), with the highest confidence values selected. The resulting dataset of MOA assignments and paired toxicity values are provided in spreadsheet format as a comprehensive standardized dataset available for predictive aquatic toxicology model development.",2015-02-07 +27153707,PinSnps: structural and functional analysis of SNPs in the context of protein interaction networks.,"

Unlabelled

We present a practical computational pipeline to readily perform data analyses of protein-protein interaction networks by using genetic and functional information mapped onto protein structures. We provide a 3D representation of the available protein structure and its regions (surface, interface, core and disordered) for the selected genetic variants and/or SNPs, and a prediction of the mutants' impact on the protein as measured by a range of methods. We have mapped in total 2587 genetic disorder-related SNPs from OMIM, 587 873 cancer-related variants from COSMIC, and 1 484 045 SNPs from dbSNP. All result data can be downloaded by the user together with an R-script to compute the enrichment of SNPs/variants in selected structural regions.

Availability and implementation

PinSnps is available as open-access service at http://fraternalilab.kcl.ac.uk/PinSnps/

Contact

franca.fraternali@kcl.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-24 +24790154,dbGSH: a database of S-glutathionylation.,"

Unlabelled

S-glutathionylation, the reversible protein posttranslational modification (PTM) that generates a mixed disulfide bond between glutathione and cysteine residue, critically regulates protein activity, stability and redox regulation. Due to its importance in regulating oxidative/nitrosative stress and balance in cellular response, a number of methods have been rapidly developed to study S-glutathionylation, thus expanding the dataset of experimentally determined glutathionylation sites. However, there is currently no database dedicated to the integration of all experimentally verified S-glutathionylation sites along with their characteristics or structural or functional information. Thus, the dbGSH database has been created to integrate all available datasets and to provide the relevant structural analysis. As of January 31, 2014, dbGSH has manually collected >2200 experimentally verified S-glutathionylated peptides from 169 research articles using a text-mining approach. To solve the problem of heterogeneity of the data collected from different sources, the sequence identity of the reported S-glutathionylated peptides is mapped to UniProtKB protein entries. To delineate the structural correlations and consensus motifs of these S-glutathionylation sites, the dbGSH database also provides structural and functional analyses, including the motifs of substrate sites, solvent accessibility, protein secondary and tertiary structures, protein domains and gene ontology.

Availability and implementation

dbGSH is now freely accessible at http://csb.cse.yzu.edu.tw/dbGSH/. The database content is regularly updated with new data collected by the continuous survey of research articles.",2014-04-29 +34377946,A two-stage approach for combining gene expression and mutation with clinical data improves survival prediction in myelodysplastic syndromes and ovarian cancer. ,"Many traditional clinical prognostic factors have been known for cancer for years, but usually provide poor survival prediction. Genomic information is more easily available now which offers opportunities to build more accurate prognostic models. The challenge is how to integrate them to improve survival prediction. The common approach of jointly analyzing all type of covariates directly in one single model may not improve the prediction due to increased model complexity and cannot be easily applied to different datasets. We proposed a two-stage procedure to better combine different sources of information for survival prediction, and applied the two-stage procedure in two cancer datasets: myelodysplastic syndromes (MDS) and ovarian cancer. Our analysis suggests that the prediction performance of different data types are very different, and combining clinical, gene expression and mutation data using the two-stage procedure improves survival prediction in terms of improved concordance index and reduced prediction error. The two-stage procedure can be implemented in BhGLM package which is freely available at http://www.ssg.uab.edu/bhglm/. nyi@uab.edu.",2016-09-15 +27222864,"Using Behavioral Risk Factor Data as a surveillance tool to monitor the prevalence of initiation, continuation and completion of Human Papilloma Virus vaccination in children.","The Human Papilloma Virus (""HPV"") is a common sexually transmitted disease that has infected approximately 79 million men and women in the United States alone. A vaccination is available but in order to be effective it must be received prior to becoming sexually active and recipients must complete a three-dose sequence. In this article we explore the predisposing, enabling and need-based factors associated with parents' or guardians' decision to have their child initiate, continue and complete the Human Papilloma Virus (HPV) vaccine. The data file includes 5531 parents and guardians with presumptive knowledge regarding the number of HPV vaccination their child received. Data includes information on the child (e.g. child׳s age) as well as the adult respondent (e.g. health insurance status). A smaller subset of the dataset along with the code to run the model are supplied with this article. The interpretation of these data can be found in the research article published by the authors in the Journal of Preventive Medicine in 2015 http://dx.doi.org/10.1016/j.ypmed.2016.01.010[1].",2016-03-09 +23039964,GeneFriends: an online co-expression analysis tool to identify novel gene targets for aging and complex diseases.,"

Background

Although many diseases have been well characterized at the molecular level, the underlying mechanisms are often unknown. Nearly half of all human genes remain poorly studied, yet these genes may contribute to a number of disease processes. Genes involved in common biological processes and diseases are often co-expressed. Using known disease-associated genes in a co-expression analysis may help identify and prioritize novel candidate genes for further study.

Results

We have created an online tool, called GeneFriends, which identifies co-expressed genes in over 1,000 mouse microarray datasets. GeneFriends can be used to assign putative functions to poorly studied genes. Using a seed list of disease-associated genes and a guilt-by-association method, GeneFriends allows users to quickly identify novel genes and transcription factors associated with a disease or process. We tested GeneFriends using seed lists for aging, cancer, and mitochondrial complex I disease. We identified several candidate genes that have previously been predicted as relevant targets. Some of the genes identified are already being tested in clinical trials, indicating the effectiveness of this approach. Co-expressed transcription factors were investigated, identifying C/ebp genes as candidate regulators of aging. Furthermore, several novel candidate genes, that may be suitable for experimental or clinical follow-up, were identified. Two of the novel candidates of unknown function that were co-expressed with cancer-associated genes were selected for experimental validation. Knock-down of their human homologs (C1ORF112 and C12ORF48) in HeLa cells slowed growth, indicating that these genes of unknown function, identified by GeneFriends, may be involved in cancer.

Conclusions

GeneFriends is a resource for biologists to identify and prioritize novel candidate genes involved in biological processes and complex diseases. It is an intuitive online resource that will help drive experimentation. GeneFriends is available online at: http://genefriends.org/.",2012-10-06 +26937470,Illumina next generation sequencing data and expression microarrays data from retinoblastoma and medulloblastoma tissues.,"Retinoblastoma (Rb) is a pediatric intraocular malignancy and probably the most robust clinical model on which genetic predisposition to develop cancer has been demonstrated. Since deletions in chromosome 13 have been described in this tumor, we performed next generation sequencing to test whether recurrent losses could be detected in low coverage data. We used Illumina platform for 13 tumor tissue samples: two pools of 4 retinoblastoma cases each and one pool of 5 medulloblastoma cases (raw data can be found at http://www.ebi.ac.uk/ena/data/view/PRJEB6630). We first created an in silico reference profile generated from a human sequenced genome (GRCh37p5). From this data we calculated an integrity score to get an overview of gains and losses in all chromosomes; we next analyzed each chromosome in windows of 40 kb length, calculating for each window the log2 ratio between reads from tumor pool and in silico reference. Finally we generated panoramic maps with all the windows whether lost or gained along each chromosome associated to its cytogenetic bands to facilitate interpretation. Expression microarrays was done for the same samples and a list of over and under expressed genes is presented here. For this detection a significance analysis was done and a log2 fold change was chosen as significant (raw data can be found at http://www.ncbi.nlm.nih.gov/geo/accession number GSE11488). The complete research article can be found at Cancer Genetics journal (Garcia-Chequer et al., in press) [1]. In summary here we provide an overview with visual graphics of gains and losses chromosome by chromosome in retinoblastoma and medulloblastoma, also the integrity score analysis and a list of genes with relevant expression associated. This material can be useful to researchers that may want to explore gains and losses in other malignant tumors with this approach or compare their data with retinoblastoma.",2016-01-27 +25243597,Improved locus-specific database for OPA1 mutations allows inclusion of advanced clinical data.,"Autosomal-dominant optic atrophy (ADOA) is the most common inherited optic neuropathy, due to mutations in the optic atrophy 1 gene (OPA1) in about 60%-80% of cases. At present, the clinical heterogeneity of patients carrying OPA1 variants renders genotype-phenotype correlations difficulty. Since 2005, when we published the first locus-specific database (LSDB) dedicated to OPA1, a large amount of new clinical and genetic knowledge has emerged, prompting us to update this database. We have used the Leiden Open-Source Variation Database to develop a clinico-biological database, aiming to add clinical phenotypes related to OPA1 variants. As a first step, we validated this new database by registering several patients previously reported in the literature, as well as new patients from our own institution. Contributors may now make online submissions of clinical and molecular descriptions of phenotypes due to OPA1 variants, including detailed ophthalmological and neurological data, with due respect to patient anonymity. The updated OPA1 LSDB (http://opa1.mitodyn.org/) should prove useful for molecular diagnoses, large-scale variant statistics, and genotype-phenotype correlations in ADOA studies.",2014-12-01 +29961082,Authors' response: Letter to the Editor concerning OCRA as preferred method in ISO standards on biomechanical risk factors.,"We thank Drs. Colombini and Occhipinti for their personal reply to our Discussion Paper (1, 2). We share the overall goal of preventing workplace injuries and welcome a discussion of the ISO process on workplace ergonomics standards; this was the primary aim of the Discussion Paper. We hope that other members of the relevant ISO working groups will also participate in the discussion. However, Drs. Colombini and Occipinti misinterpret our paper. Our aim was not to ""addresses the scientific basis of ISO standards on biomechanical risk factors and more specifically the OCRA methodology"". The purpose was to point out that ""while the ISO process has value, it has also clear limitations when it comes to developing occupational health and safety standards that should be based on scientific principles"". It is true that our paper discussed the OCRA method, but only as an example, in a single paragraph. We noted that the OCRA method was promoted as the preferred method by the ISO working group even though there were other risk assessment methods which, at the time (and currently), were at least as scientifically valid (3). The discovery that, while on the ISO working group, Drs. Colombini and Occipinti elevated the risk assessment method that they developed (OCRA) over the other methods, demonstrates one of several limitations of the ISO process, namely, the lack of attention to conflict of interest. Finally, we would like to draw attention to the note by Drs. Colombini and Occhipinti that ""the ISO standards in question were actually developed by the working group, as mandated by ISO, over the period 2000‒2004"". This long-elapsed time, without an update to the standard, should be a concern for all scientists given the large quantity of quality scientific literature published since then (eg, 3‒6). Fourteen years is well beyond what is recommended in the ISO guidelines. References 1. Colombini D, Occhipinti E. Scientific basis of the OCRA method for risk assessment of biomechanical overload of the upper limb, as preferred method in ISO standards on biomechanical risk factors. Scand J Work Environ Health ‒ online first. https://doi.org.10.5271/sjweh.3746 2. Armstrong T J, Burdorf I A, Descatha A, Farioli A, Graf M, Horie S, Marras W S, Potvin J R, Rempel D, Spatari G, Takala E P, Verbeek J, Violante FS. Scientific basis of ISO standards on biomechanical risk factors. Scand J Work Environ Health ‒ online first. https://doi.org/10.5271/sjweh.3718 3. Takala EP, Pehkonen I, Forsman M, Hansson GA, Mathiassen SE, Neumann WP, Sjøgaard G, Veiersted KB, Westgaard RH, Winkel J. Systematic evaluation of observational methods assessing biomechanical exposures at work. Scand J Work Environ Health. 2010;36:3-24. https://doi.org/10.5271/sjweh.2876 4. Paulsen R, Gallu T, Gilkey D, Reiser R, Murgia L, Rosecrance J. The inter-rater reliability of Strain Index and OCRA Checklist task assessments in cheese processing. Applied Ergonomics. 2015; 51,199-204. https://doi.org/10.1016/j.apergo.2015.04.019 5. Kapellusch JM, Gerr FE, Malloy EJ, Garg A, Harris-Adamson C, Bao SS, Burt SE, Dale AM, Eisen EA, Evanoff BA, Hegmann KT, Silverstein BA, Theise MS, Rempel DM. Exposure-response relationships for the ACGIH threshold limit value for hand-activity level: results from a pooled data study of carpal tunnel syndrome. Scand J Work Environ Health. 2014;40:610-20. https://doi.org/10.5271/sjweh.3456 6. Violante FS, Farioli A, Graziosi F, Marinelli F, Curti S, Armstrong TJ, Mattioli S, Bonfiglioli R. Carpal tunnel syndrome and manual work: the OCTOPUS cohort, results of a ten-year longitudinal study. Scand J Work Environ Health. 2016;42:280-90. https://doi.org/10.5271/sjweh.3566.",2018-07-01 +27105844,EXPLoRA-web: linkage analysis of quantitative trait loci using bulk segregant analysis.,"Identification of genomic regions associated with a phenotype of interest is a fundamental step toward solving questions in biology and improving industrial research. Bulk segregant analysis (BSA) combined with high-throughput sequencing is a technique to efficiently identify these genomic regions associated with a trait of interest. However, distinguishing true from spuriously linked genomic regions and accurately delineating the genomic positions of these truly linked regions requires the use of complex statistical models currently implemented in software tools that are generally difficult to operate for non-expert users. To facilitate the exploration and analysis of data generated by bulked segregant analysis, we present EXPLoRA-web, a web service wrapped around our previously published algorithm EXPLoRA, which exploits linkage disequilibrium to increase the power and accuracy of quantitative trait loci identification in BSA analysis. EXPLoRA-web provides a user friendly interface that enables easy data upload and parallel processing of different parameter configurations. Results are provided graphically and as BED file and/or text file and the input is expected in widely used formats, enabling straightforward BSA data analysis. The web server is available at http://bioinformatics.intec.ugent.be/explora-web/.",2016-04-21 +25392418,ValidatorDB: database of up-to-date validation results for ligands and non-standard residues from the Protein Data Bank.,"Following the discovery of serious errors in the structure of biomacromolecules, structure validation has become a key topic of research, especially for ligands and non-standard residues. ValidatorDB (freely available at http://ncbr.muni.cz/ValidatorDB) offers a new step in this direction, in the form of a database of validation results for all ligands and non-standard residues from the Protein Data Bank (all molecules with seven or more heavy atoms). Model molecules from the wwPDB Chemical Component Dictionary are used as reference during validation. ValidatorDB covers the main aspects of validation of annotation, and additionally introduces several useful validation analyses. The most significant is the classification of chirality errors, allowing the user to distinguish between serious issues and minor inconsistencies. Other such analyses are able to report, for example, completely erroneous ligands, alternate conformations or complete identity with the model molecules. All results are systematically classified into categories, and statistical evaluations are performed. In addition to detailed validation reports for each molecule, ValidatorDB provides summaries of the validation results for the entire PDB, for sets of molecules sharing the same annotation (three-letter code) or the same PDB entry, and for user-defined selections of annotations or PDB entries.",2014-11-11 +28899742,ICN_Atlas: Automated description and quantification of functional MRI activation patterns in the framework of intrinsic connectivity networks.,"Generally, the interpretation of functional MRI (fMRI) activation maps continues to rely on assessing their relationship to anatomical structures, mostly in a qualitative and often subjective way. Recently, the existence of persistent and stable brain networks of functional nature has been revealed; in particular these so-called intrinsic connectivity networks (ICNs) appear to link patterns of resting state and task-related state connectivity. These networks provide an opportunity of functionally-derived description and interpretation of fMRI maps, that may be especially important in cases where the maps are predominantly task-unrelated, such as studies of spontaneous brain activity e.g. in the case of seizure-related fMRI maps in epilepsy patients or sleep states. Here we present a new toolbox (ICN_Atlas) aimed at facilitating the interpretation of fMRI data in the context of ICN. More specifically, the new methodology was designed to describe fMRI maps in function-oriented, objective and quantitative way using a set of 15 metrics conceived to quantify the degree of 'engagement' of ICNs for any given fMRI-derived statistical map of interest. We demonstrate that the proposed framework provides a highly reliable quantification of fMRI activation maps using a publicly available longitudinal (test-retest) resting-state fMRI dataset. The utility of the ICN_Atlas is also illustrated on a parametric task-modulation fMRI dataset, and on a dataset of a patient who had repeated seizures during resting-state fMRI, confirmed on simultaneously recorded EEG. The proposed ICN_Atlas toolbox is freely available for download at http://icnatlas.com and at http://www.nitrc.org for researchers to use in their fMRI investigations.",2017-09-09 +28028736,Fast H-DROP: A thirty times accelerated version of H-DROP for interactive SVM-based prediction of helical domain linkers.,"Efficient and rapid prediction of domain regions from amino acid sequence information alone is often required for swift structural and functional characterization of large multi-domain proteins. Here we introduce Fast H-DROP, a thirty times accelerated version of our previously reported H-DROP (Helical Domain linker pRediction using OPtimal features), which is unique in specifically predicting helical domain linkers (boundaries). Fast H-DROP, analogously to H-DROP, uses optimum features selected from a set of 3000 ones by combining a random forest and a stepwise feature selection protocol. We reduced the computational time from 8.5 min per sequence in H-DROP to 14 s per sequence in Fast H-DROP on an 8 Xeon processor Linux server by using SWISS-PROT instead of Genbank non-redundant (nr) database for generating the PSSMs. The sensitivity and precision of Fast H-DROP assessed by cross-validation were 33.7 and 36.2%, which were merely ~2% lower than that of H-DROP. The reduced computational time of Fast H-DROP, without affecting prediction performances, makes it more interactive and user-friendly. Fast H-DROP and H-DROP are freely available from http://domserv.lab.tuat.ac.jp/ .",2016-12-27 +24827614,MicroRNA binding sites in C. elegans 3' UTRs.,"MicroRNAs (miRNAs) are post-transcriptional regulators of gene expression. Since the discovery of lin-4, the founding member of the miRNA family, over 360 miRNAs have been identified for Caenorhabditis elegans (C. elegans). Prediction and validation of targets are essential for elucidation of regulatory functions of these miRNAs. For C. elegans, crosslinking immunoprecipitation (CLIP) has been successfully performed for the identification of target mRNA sequences bound by Argonaute protein ALG-1. In addition, reliable annotation of the 3' untranslated regions (3' UTRs) as well as developmental stage-specific expression profiles for both miRNAs and 3' UTR isoforms are available. By utilizing these data, we developed statistical models and bioinformatics tools for both transcriptome-scale and developmental stage-specific predictions of miRNA binding sites in C. elegans 3' UTRs. In performance evaluation via cross validation on the ALG-1 CLIP data, the models were found to offer major improvements over established algorithms for predicting both seed sites and seedless sites. In particular, our top-ranked predictions have a substantially higher true positive rate, suggesting a much higher likelihood of positive experimental validation. A gene ontology analysis of stage-specific predictions suggests that miRNAs are involved in dynamic regulation of biological functions during C. elegans development. In particular, miRNAs preferentially target genes related to development, cell cycle, trafficking, and cell signaling processes. A database for both transcriptome-scale and stage-specific predictions and software for implementing the prediction models are available through the Sfold web server at http://sfold.wadsworth.org.",2014-04-25 +26859295,ePIANNO: ePIgenomics ANNOtation tool.,"Recently, with the development of next generation sequencing (NGS), the combination of chromatin immunoprecipitation (ChIP) and NGS, namely ChIP-seq, has become a powerful technique to capture potential genomic binding sites of regulatory factors, histone modifications and chromatin accessible regions. For most researchers, additional information including genomic variations on the TF binding site, allele frequency of variation between different populations, variation associated disease, and other neighbour TF binding sites are essential to generate a proper hypothesis or a meaningful conclusion. Many ChIP-seq datasets had been deposited on the public domain to help researchers make new discoveries. However, researches are often intimidated by the complexity of data structure and largeness of data volume. Such information would be more useful if they could be combined or downloaded with ChIP-seq data. To meet such demands, we built a webtool: ePIgenomic ANNOtation tool (ePIANNO, http://epianno.stat.sinica.edu.tw/index.html). ePIANNO is a web server that combines SNP information of populations (1000 Genomes Project) and gene-disease association information of GWAS (NHGRI) with ChIP-seq (hmChIP, ENCODE, and ROADMAP epigenomics) data. ePIANNO has a user-friendly website interface allowing researchers to explore, navigate, and extract data quickly. We use two examples to demonstrate how users could use functions of ePIANNO webserver to explore useful information about TF related genomic variants. Users could use our query functions to search target regions, transcription factors, or annotations. ePIANNO may help users to generate hypothesis or explore potential biological functions for their studies.",2016-02-09 +24556904,In silico identification of transcription factors in Medicago sativa using available transcriptomic resources.,"Transcription factors (TFs) are proteins that govern organismal development and response to the environment by regulating gene expression. Information on the amount and diversity of TFs within individual plant species is critical for understanding of their biological roles and evolutionary history across the plant kingdom. Currently, only scattered information on separate TFs is available for alfalfa, the most extensively cultivated forage legume in the world. In the meantime, several large transcriptomic resources that can be used to identify and characterize alfalfa TF genes are freely accessible online. In this study, we have performed an in silico analysis of transcriptome data generated in our laboratory and publicly acquirable from other sources to reveal and systematize alfalfa transcription factors. Transcriptome-wide mining enabled prediction of 983 TFs along with their sequence features and putative phylogenies of the largest families. All data were assembled into a simple open-access database named AlfalfaTFDB ( http://plantpathology.ba.ars.usda.gov/alfalfatfdb.html ). Transcriptomic analysis used in this work represents an effective approach for the identification of TF genes in plants with incomplete genomes, such as alfalfa. Integrated TF repertoires of Medicago sativa will provide an important tool for studying regulation of gene expression in other complex non-model species of agricultural significance.",2014-02-21 +26490638,MtiBase: a database for decoding microRNA target sites located within CDS and 5'UTR regions from CLIP-Seq and expression profile datasets. ,"MicroRNAs (miRNAs) play an important role in the regulation of gene expression. Previous studies on miRNA functions mainly focused on their target sites in the 3' untranslated regions (UTRs) of mRNAs. However, increasing evidence has revealed that miRNAs can also induce mRNA degradation and mediate translational repression via complementary interactions with the coding sequence (CDS) and 5'UTR of mRNAs. In this study, we developed a novel database, MtiBase, to facilitate the comprehensive exploration of CDS- and 5'UTR-located miRNA target sites identified from cross-linking immunoprecipitation sequencing (CLIP-Seq) datasets and to uncover their regulatory effects on mRNA stability and translation from expression profile datasets. By integrating 61 Argonaute protein-binding CLIP-Seq datasets and miRNA target sites predicted by five commonly used programs, we identified approximately 4 400 000 CDS-located and 470 000 5'UTR-located miRNA target sites. Moreover, we evaluated the regulatory effects of miRNAs on mRNA stability and translation using the data from 222 gene expression profiles, and 28 ribosome-protected fragment sequencing, and six pulsed stable isotope labeling with amino acids in culture. Finally, the effects of SNPs on the functions of miRNA target sites were systematically evaluated. Our study provides a useful tool for functional studies of miRNAs in regulating physiology and pathology. Database URL: http://mtibase.sysu.edu.cn.",2015-01-01 +28039165,PBIT: Pipeline Builder for Identification of drug Targets for infectious diseases.,"

Summary

PBIT (Pipeline Builder for Identification of drug Targets) is an online webserver that has been developed for screening of microbial proteomes for critical features of human drug targets such as being non-homologous to human proteome as well as the human gut microbiota, essential for the pathogen's survival, participation in pathogen-specific pathways etc. The tool has been validated by analyzing 57 putative targets of Candida albicans documented in literature. PBIT integrates various in silico approaches known for drug target identification and will facilitate high-throughput prediction of drug targets for infectious diseases, including multi-pathogenic infections.

Availability and implementation

PBIT is freely accessible at http://www.pbit.bicnirrh.res.in/ .

Contact

thomass@nirrh.res.in.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +27164618,Assessing the Association between Thermotolerant Coliforms in Drinking Water and Diarrhea: An Analysis of Individual-Level Data from Multiple Studies.,"

Background

Fecally contaminated drinking water is believed to be a major contributor to the global burden of diarrheal disease and a leading cause of mortality among young children. However, recent systematic reviews and results from blinded studies of water quality interventions have raised questions about the risk associated with fecally contaminated water, particularly as measured by thermotolerant coliform (TTC) bacteria, a WHO-approved indicator of drinking water quality.

Objectives

We investigated the association between TTC in drinking water and diarrhea using data from seven previous studies.

Methods

We obtained individual-level data from available field studies that measured TTC levels in household-drinking water and reported prevalence of diarrhea among household members during the days prior to the visit.

Results

The combined data set included diarrhea prevalence for 26,518 individuals and 8,000 water samples from 4,017 households, yielding 45,052 observations. The odds of diarrhea increased for each log10 increase in TTC/100 mL by 18% (95% CI: 11, 26%) for children < 5 years old and 12% (95% CI: 8, 18%) for all ages. For all ages, the odds of diarrhea increased by 21%, 35% and 49% for those whose household water samples were from 11-100, 101-1,000, and > 1,000 TTC/100 mL, respectively compared to < 1 TTC/100 mL. We found no evidence of increased odds of diarrhea with contamination levels below 11 TTC/100 mL, either in adults or children.

Conclusions

Our analysis of individual-level data shows increased risk of diarrhea with increasing levels of TTC in drinking water. These results suggest an association between fecally contaminated water and diarrheal disease and provides support for health-based targets for levels of TTC in drinking water and for interventions to improve drinking water quality to prevent diarrhea.

Citation

Hodge J, Chang HH, Boisson S, Collin SM, Peletz R, Clasen T. 2016. Assessing the association between thermotolerant coliforms in drinking water and diarrhea: an analysis of individual level data from multiple studies. Environ Health Perspect 124:1560-1567; http://dx.doi.org/10.1289/EHP156.",2016-05-10 +26672061,Recommendations for Evaluating Temporal Trends of Persistent Organic Pollutants in Breast Milk.,"

Background

Biomonitoring data of persistent organic pollutants (POPs) in breast milk are increasingly collected and available for quantitative analysis of levels and time trends. A common approach is to apply log-linear regression to calculate doubling and halving times of the POP concentrations based on the temporal trend observed in breast milk. However, there are different, sometimes conflicting interpretations of these doubling and halving times.

Objectives

We provide a mechanistic understanding of doubling and halving times where possible. Five recommendations are proposed for dealing with POP concentration trends in breast milk during three distinct periods (pre-ban, transition, post-ban period).

Discussion

Using temporal trends of BDE-47 and PCB-153 in breast milk data, we show which information can be gained from the time-trend data. To this end, we analyzed time trends of hypothetical POPs for different periods with time-variant exposure and different intrinsic elimination half-lives, using a dynamic population-based pharmacokinetic model. Different pieces of information can be extracted from time-trend data from different periods. The analysis of trends of short-lived POPs is rather straightforward and facilitates extraction of the intrinsic elimination half-lives from the breast milk data. However, trends of slowly eliminated POPs only provide indications for the exposure time trend.

Conclusions

Time-trend data of rapidly eliminated POPs provide information on exposure time trends and elimination half-lives. Temporal trends of slowly eliminated POPs are more complicated to interpret, and the extraction of exposure time trends and elimination half-lives require data sets covering several decades.

Citation

Gyalpo T, Scheringer M, Hungerbühler K. 2016. Recommendations for evaluating temporal trends of persistent organic pollutants in breast milk. Environ Health Perspect 124:881-885; http://dx.doi.org/10.1289/ehp.1510219.",2015-12-15 +28025339,iMITEdb: the genome-wide landscape of miniature inverted-repeat transposable elements in insects. ,"Miniature inverted-repeat transposable elements (MITEs) have attracted much attention due to their widespread occurrence and high copy numbers in eukaryotic genomes. However, the systematic knowledge about MITEs in insects and other animals is still lacking. In this study, we identified 6012 MITE families from 98 insect species genomes. Comparison of these MITEs with known MITEs in the NCBI non-redundant database and Repbase showed that 5701(∼95%) of 6012 MITE families are novel. The abundance of MITEs varies drastically among different insect species, and significantly correlates with genome size. In general, larger genomes contain more MITEs than small genomes. Furthermore, all identified MITEs were included in a newly constructed database (iMITEdb) (http://gene.cqu.edu.cn/iMITEdb/), which has functions such as browse, search, BLAST and download. Overall, our results not only provide insight on insect MITEs but will also improve assembly and annotation of insect genomes. More importantly, the results presented in this study will promote studies of MITEs function, evolution and application in insects. DATABASE URL: http://gene.cqu.edu.cn/iMITEdb/.",2016-12-26 +26862054,Netter: re-ranking gene network inference predictions using structural network properties.,"

Background

Many algorithms have been developed to infer the topology of gene regulatory networks from gene expression data. These methods typically produce a ranking of links between genes with associated confidence scores, after which a certain threshold is chosen to produce the inferred topology. However, the structural properties of the predicted network do not resemble those typical for a gene regulatory network, as most algorithms only take into account connections found in the data and do not include known graph properties in their inference process. This lowers the prediction accuracy of these methods, limiting their usability in practice.

Results

We propose a post-processing algorithm which is applicable to any confidence ranking of regulatory interactions obtained from a network inference method which can use, inter alia, graphlets and several graph-invariant properties to re-rank the links into a more accurate prediction. To demonstrate the potential of our approach, we re-rank predictions of six different state-of-the-art algorithms using three simple network properties as optimization criteria and show that Netter can improve the predictions made on both artificially generated data as well as the DREAM4 and DREAM5 benchmarks. Additionally, the DREAM5 E.coli. community prediction inferred from real expression data is further improved. Furthermore, Netter compares favorably to other post-processing algorithms and is not restricted to correlation-like predictions. Lastly, we demonstrate that the performance increase is robust for a wide range of parameter settings. Netter is available at http://bioinformatics.intec.ugent.be.

Conclusions

Network inference from high-throughput data is a long-standing challenge. In this work, we present Netter, which can further refine network predictions based on a set of user-defined graph properties. Netter is a flexible system which can be applied in unison with any method producing a ranking from omics data. It can be tailored to specific prior knowledge by expert users but can also be applied in general uses cases. Concluding, we believe that Netter is an interesting second step in the network inference process to further increase the quality of prediction.",2016-02-09 +25332403,"The i5k Workspace@NAL--enabling genomic data access, visualization and curation of arthropod genomes.","The 5000 arthropod genomes initiative (i5k) has tasked itself with coordinating the sequencing of 5000 insect or related arthropod genomes. The resulting influx of data, mostly from small research groups or communities with little bioinformatics experience, will require visualization, dissemination and curation, preferably from a centralized platform. The National Agricultural Library (NAL) has implemented the i5k Workspace@NAL (http://i5k.nal.usda.gov/) to help meet the i5k initiative's genome hosting needs. Any i5k member is encouraged to contact the i5k Workspace with their genome project details. Once submitted, new content will be accessible via organism pages, genome browsers and BLAST search engines, which are implemented via the open-source Tripal framework, a web interface for the underlying Chado database schema. We also implement the Web Apollo software for groups that choose to curate gene models. New content will add to the existing body of 35 arthropod species, which include species relevant for many aspects of arthropod genomic research, including agriculture, invasion biology, systematics, ecology and evolution, and developmental research.",2014-10-20 +30337764,A novel pathway analysis approach based on the unexplained disregulation of genes.,"A crucial step in the understanding of any phenotype is the correct identification of the signaling pathways that are significantly impacted in that phenotype. However, most current pathway analysis methods produce both false positives as well as false negatives in certain circumstances. We hypothesized that such incorrect results are due to the fact that the existing methods fail to distinguish between the primary dis-regulation of a given gene itself and the effects of signaling coming from upstream. Furthermore, a modern whole-genome experiment performed with a next-generation technology spends a great deal of effort to measure the entire set of 30,000-100,000 transcripts in the genome. This is followed by the selection of a few hundreds differentially expressed genes, step that literally discards more than 99% of the collected data. We also hypothesized that such a drastic filtering could discard many genes that play crucial roles in the phenotype. We propose a novel topology-based pathway analysis method that identifies significantly impacted pathways using the entire set of measurements, thus allowing the full use of the data provided by NGS techniques. The results obtained on 24 real data sets involving 12 different human diseases, as well as on 8 yeast knock-out data sets show that the proposed method yields significant improvements with respect to the state-of-the-art methods: SPIA, GSEA and GSA.

Availability

Primary dis-regulation analysis is implemented in R and included in ROntoTools Bioconductor package (versions ≥ 2.0.0). https://www.bioconductor.org/packages/release/bioc/html/ROntoTools.html.",2016-03-24 +27542772,STAMS: STRING-assisted module search for genome wide association studies and application to autism.,"

Motivation

Analyzing genome wide association data in the context of biological pathways helps us understand how genetic variation influences phenotype and increases power to find associations. However, the utility of pathway-based analysis tools is hampered by undercuration and reliance on a distribution of signal across all of the genes in a pathway. Methods that combine genome wide association results with genetic networks to infer the key phenotype-modulating subnetworks combat these issues, but have primarily been limited to network definitions with yes/no labels for gene-gene interactions. A recent method (EW_dmGWAS) incorporates a biological network with weighted edge probability by requiring a secondary phenotype-specific expression dataset. In this article, we combine an algorithm for weighted-edge module searching and a probabilistic interaction network in order to develop a method, STAMS, for recovering modules of genes with strong associations to the phenotype and probable biologic coherence. Our method builds on EW_dmGWAS but does not require a secondary expression dataset and performs better in six test cases.

Results

We show that our algorithm improves over EW_dmGWAS and standard gene-based analysis by measuring precision and recall of each method on separately identified associations. In the Wellcome Trust Rheumatoid Arthritis study, STAMS-identified modules were more enriched for separately identified associations than EW_dmGWAS (STAMS P-value 3.0 × 10-4; EW_dmGWAS- P-value = 0.8). We demonstrate that the area under the Precision-Recall curve is 5.9 times higher with STAMS than EW_dmGWAS run on the Wellcome Trust Type 1 Diabetes data.

Availability and implementation

STAMS is implemented as an R package and is freely available at https://simtk.org/projects/stams CONTACT: rbaltman@stanford.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-19 +24451012,mPUMA: a computational approach to microbiota analysis by de novo assembly of operational taxonomic units based on protein-coding barcode sequences.,"

Background

Formation of operational taxonomic units (OTU) is a common approach to data aggregation in microbial ecology studies based on amplification and sequencing of individual gene targets. The de novo assembly of OTU sequences has been recently demonstrated as an alternative to widely used clustering methods, providing robust information from experimental data alone, without any reliance on an external reference database.

Results

Here we introduce mPUMA (microbial Profiling Using Metagenomic Assembly, http://mpuma.sourceforge.net), a software package for identification and analysis of protein-coding barcode sequence data. It was developed originally for Cpn60 universal target sequences (also known as GroEL or Hsp60). Using an unattended process that is independent of external reference sequences, mPUMA forms OTUs by DNA sequence assembly and is capable of tracking OTU abundance. mPUMA processes microbial profiles both in terms of the direct DNA sequence as well as in the translated amino acid sequence for protein coding barcodes. By forming OTUs and calculating abundance through an assembly approach, mPUMA is capable of generating inputs for several popular microbiota analysis tools. Using SFF data from sequencing of a synthetic community of Cpn60 sequences derived from the human vaginal microbiome, we demonstrate that mPUMA can faithfully reconstruct all expected OTU sequences and produce compositional profiles consistent with actual community structure.

Conclusions

mPUMA enables analysis of microbial communities while empowering the discovery of novel organisms through OTU assembly.",2013-08-15 +23881287,"Rat Genome Database: a unique resource for rat, human, and mouse quantitative trait locus data.","The rat has been widely used as a disease model in a laboratory setting, resulting in an abundance of genetic and phenotype data from a wide variety of studies. These data can be found at the Rat Genome Database (RGD, http://rgd.mcw.edu/), which provides a platform for researchers interested in linking genomic variations to phenotypes. Quantitative trait loci (QTLs) form one of the earliest and core datasets, allowing researchers to identify loci harboring genes associated with disease. These QTLs are not only important for those using the rat to identify genes and regions associated with disease, but also for cross-organism analyses of syntenic regions on the mouse and the human genomes to identify potential regions for study in these organisms. Currently, RGD has data on >1,900 rat QTLs that include details about the methods and animals used to determine the respective QTL along with the genomic positions and markers that define the region. RGD also curates human QTLs (>1,900) and houses>4,000 mouse QTLs (imported from Mouse Genome Informatics). Multiple ontologies are used to standardize traits, phenotypes, diseases, and experimental methods to facilitate queries, analyses, and cross-organism comparisons. QTLs are visualized in tools such as GBrowse and GViewer, with additional tools for analysis of gene sets within QTL regions. The QTL data at RGD provide valuable information for the study of mapped phenotypes and identification of candidate genes for disease associations.",2013-07-23 +23335498,The society of thoracic surgeons national database.,"

Aims

The Society of Thoracic Surgeons (STS) National Database collects detailed clinical information on patients undergoing adult cardiac, paediatric and congenital cardiac, and general thoracic surgical operations. These data are used to support risk-adjusted, nationally benchmarked performance assessment and feedback; voluntary public reporting; quality improvement initiatives; guideline development; appropriateness determination; shared decision making; research using cross-sectional and longitudinal registry linkages; comparative effectiveness studies; government collaborations including postmarket surveillance; regulatory compliance and reimbursement strategies.

Interventions

All database participants receive feedback reports which they may voluntarily share with their hospitals or payers, or publicly report. STS analyses are regularly used as the basis for local, regional and national quality improvement efforts.

Population

More than 90% of adult cardiac programmes in the USA participate, as do the majority of paediatric cardiac programmes, and general thoracic participation continues to increase. Since the inception of the Database in 1989, more than 5 million patient records have been submitted.

Baseline data

Each of the three subspecialty databases includes several hundred variables that characterise patient demographics, diagnosis, medical history, clinical risk factors and urgency of presentation, operative details and postoperative course including adverse outcomes.

Data capture

Data are entered by trained data abstractors and by the care team, using detailed data specifications for each element.

Data quality

Quality and consistency checks assure accurate and complete data, missing data are rare, and audits are performed annually of selected participant sites.

Endpoints

All major outcomes are reported including complications, status at discharge and mortality.

Data access

Applications for STS Database participants to use aggregate national data for research are available at http://www.sts.org/quality-research-patient-safety/research/publications-and-research/access-data-sts-national-database.",2013-01-18 +25480679,Marky: a tool supporting annotation consistency in multi-user and iterative document annotation projects.,"

Background and objectives

Document annotation is a key task in the development of Text Mining methods and applications. High quality annotated corpora are invaluable, but their preparation requires a considerable amount of resources and time. Although the existing annotation tools offer good user interaction interfaces to domain experts, project management and quality control abilities are still limited. Therefore, the current work introduces Marky, a new Web-based document annotation tool equipped to manage multi-user and iterative projects, and to evaluate annotation quality throughout the project life cycle.

Methods

At the core, Marky is a Web application based on the open source CakePHP framework. User interface relies on HTML5 and CSS3 technologies. Rangy library assists in browser-independent implementation of common DOM range and selection tasks, and Ajax and JQuery technologies are used to enhance user-system interaction.

Results

Marky grants solid management of inter- and intra-annotator work. Most notably, its annotation tracking system supports systematic and on-demand agreement analysis and annotation amendment. Each annotator may work over documents as usual, but all the annotations made are saved by the tracking system and may be further compared. So, the project administrator is able to evaluate annotation consistency among annotators and across rounds of annotation, while annotators are able to reject or amend subsets of annotations made in previous rounds. As a side effect, the tracking system minimises resource and time consumption.

Conclusions

Marky is a novel environment for managing multi-user and iterative document annotation projects. Compared to other tools, Marky offers a similar visually intuitive annotation experience while providing unique means to minimise annotation effort and enforce annotation quality, and therefore corpus consistency. Marky is freely available for non-commercial use at http://sing.ei.uvigo.es/marky.",2014-11-25 +29036273,SCooP: an accurate and fast predictor of protein stability curves as a function of temperature.,"

Motivation

The molecular bases of protein stability remain far from elucidated even though substantial progress has been made through both computational and experimental investigations. One of the most challenging goals is the development of accurate prediction tools of the temperature dependence of the standard folding free energy ΔG(T). Such predictors have an enormous series of potential applications, which range from drug design in the biopharmaceutical sector to the optimization of enzyme activity for biofuel production. There is thus an important demand for novel, reliable and fast predictors.

Results

We present the SCooP algorithm, which is a significant step towards accurate temperature-dependent stability prediction. This automated tool uses the protein structure and the host organism as sole entries and predicts the full T-dependent stability curve of monomeric proteins assumed to follow a two-state folding transition. Equivalently, it predicts all the thermodynamic quantities associated to the folding transition, namely the melting temperature Tm, the standard folding enthalpy ΔHm measured at Tm, and the standard folding heat capacity ΔCp. The cross-validated performances are good, with correlation coefficients between predicted and experimental values equal to [0.80, 0.83, 0.72] for ΔHm, ΔCp and Tm, respectively, which increase up to [0.88, 0.90, 0.78] upon the removal of 10% outliers. Moreover, the stability curve prediction of a target protein is very fast: it takes less than a minute. SCooP can thus potentially be applied on a structurome scale. This opens new perspectives of large-scale analyses of protein stability, which is of considerable interest for protein engineering.

Availability and implementation

The SCooP webserver is freely available at http://babylone.ulb.ac.be/SCooP.

Contact

fapucci@ulb.ac.be, jkwasigr@ulb.ac.be or mrooman@ulb.ac.be.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +27993775,GENIUS: web server to predict local gene networks and key genes for biological functions.,"

Summary

GENIUS is a user-friendly web server that uses a novel machine learning algorithm to infer functional gene networks focused on specific genes and experimental conditions that are relevant to biological functions of interest. These functions may have different levels of complexity, from specific biological processes to complex traits that involve several interacting processes. GENIUS also enriches the network with new genes related to the biological function of interest, with accuracies comparable to highly discriminative Support Vector Machine methods.

Availability and implementation

GENIUS currently supports eight model organisms and is freely available for public use at http://networks.bio.puc.cl/genius .

Contact

genius.psbl@gmail.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +26958624,"Data analysis of ""krokodil"" samples obtained by street-like synthesis.","The data described in this work is related to be the subject of an article in the Forensic Science International, titled: ""The harmful chemistry behind ""krokodil"": street-like synthesis and product analysis"" (http://dx.doi.org/10.1016/j.forsciint.2015.07.042) [1]. The data presented here provides additional description of the chemical profile of ""krokodil"". Physicochemical and organoleptic characteristics, TLC profile, UV/Vis, (1)H NMR and FTIR spectrum are presented. These data validate the proposed synthetic procedure and pathway and give further information about the contaminants present in ""krokodil"".",2015-11-28 +26615190,Biocuration of functional annotation at the European nucleotide archive.,"The European Nucleotide Archive (ENA; http://www.ebi.ac.uk/ena) is a repository for the submission, maintenance and presentation of nucleotide sequence data and related sample and experimental information. In this article we report on ENA in 2015 regarding general activity, notable published data sets and major achievements. This is followed by a focus on sustainable biocuration of functional annotation, an area which has particularly felt the pressure of sequencing growth. The importance of functional annotation, how it can be submitted and the shifting role of the biocurator in the context of increasing volumes of data are all discussed.",2015-11-28 +27148435,OBIB-a novel ontology for biobanking.,

Background

Biobanking necessitates extensive integration of data to allow data analysis and specimen sharing. Ontologies have been demonstrated to be a promising approach in fostering better semantic integration of biobank-related data. Hitherto no ontology provided the coverage needed to capture a broad spectrum of biobank user scenarios.

Methods

Based in the principles laid out by the Open Biological and Biomedical Ontologies Foundry two biobanking ontologies have been developed. These two ontologies were merged using a modular approach consistent with the initial development principles. The merging was facilitated by the fact that both ontologies use the same Upper Ontology and re-use classes from a similar set of pre-existing ontologies.

Results

Based on the two previous ontologies the Ontology for Biobanking (http://purl.obolibrary.org/obo/obib.owl) was created. Due to the fact that there was no overlap between the two source ontologies the coverage of the resulting ontology is significantly larger than of the two source ontologies. The ontology is successfully used in managing biobank information of the Penn Medicine BioBank.

Conclusions

Sharing development principles and Upper Ontologies facilitates subsequent merging of ontologies to achieve a broader coverage.,2016-05-02 +27054150,Survey data on household spatial quality and experiences of stress.,"This data article describes a dataset of 1,668 cases representing self-reported assessments of housing inadequacy and perceived housing stress. The dataset also contains person-level and household-level demographic data to contextualize the above measures. A second supplemental file contains the text of the survey instrument. Discussion of theoretical background and measures development as well as a more detailed socioeconomic profile of the sample is available in the associated research article http://dx.doi.org/10.1016/j.jenvp.2016.01.002(Campagna, 2016) [1].",2016-03-09 +29036535,pLoc-mAnimal: predict subcellular localization of animal proteins with both single and multiple sites.,"

Motivation

Cells are deemed the basic unit of life. However, many important functions of cells as well as their growth and reproduction are performed via the protein molecules located at their different organelles or locations. Facing explosive growth of protein sequences, we are challenged to develop fast and effective method to annotate their subcellular localization. However, this is by no means an easy task. Particularly, mounting evidences have indicated proteins have multi-label feature meaning that they may simultaneously exist at, or move between, two or more different subcellular location sites. Unfortunately, most of the existing computational methods can only be used to deal with the single-label proteins. Although the 'iLoc-Animal' predictor developed recently is quite powerful that can be used to deal with the animal proteins with multiple locations as well, its prediction quality needs to be improved, particularly in enhancing the absolute true rate and reducing the absolute false rate.

Results

Here we propose a new predictor called 'pLoc-mAnimal', which is superior to iLoc-Animal as shown by the compelling facts. When tested by the most rigorous cross-validation on the same high-quality benchmark dataset, the absolute true success rate achieved by the new predictor is 37% higher and the absolute false rate is four times lower in comparison with the state-of-the-art predictor.

Availability and implementation

To maximize the convenience of most experimental scientists, a user-friendly web-server for the new predictor has been established at http://www.jci-bioinfo.cn/pLoc-mAnimal/, by which users can easily get their desired results without the need to go through the complicated mathematics involved.

Contact

xxiao@gordonlifescience.org or kcchou@gordonlifescience.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-11-01 +29089455,Echocardiographic Risk Factors for Stroke and Outcomes in Patients With Atrial Fibrillation Anticoagulated With Apixaban or Warfarin.,"Few data exist on the long-term outcomes of patients with spontaneous echo contrast (SEC), left atrial/left atrial appendage (LA/LAA) thrombus, and complex aortic plaque (CAP), in patients with atrial fibrillation receiving oral anticoagulation. We explored the relationship between these 3 echocardiographic findings and clinical outcomes, and the comparative efficacy and safety of apixaban and warfarin for each finding.Patients from the ARISTOTLE trial (Apixaban for Reduction in Stroke and Other Thromboembolic Events in Atrial Fibrillation) with SEC, LA/LAA thrombus, or CAP diagnosed by either transthoracic or transesophageal echocardiography were compared with patients with none of these findings on transesophageal echocardiography.A total of 1251 patients were included: 217 had SEC, 127 had LA/LAA thrombus, 241 had CAP, and 746 had none. The rates of stroke/systemic embolism were not significantly different among patients with and without these echocardiographic findings (hazard ratio, 0.96; 95% confidence interval, 0.25-3.60 for SEC; hazard ratio, 1.27; 95% confidence interval, 0.23-6.86 for LA/LAA thrombus; hazard ratio, 2.21; 95% confidence interval, 0.71-6.85 for CAP). Rates of ischemic stroke, myocardial infarction, cardiovascular death, and all-cause death were also not different between patients with and without these findings. For patients with either SEC or CAP, there was no evidence of a differential effect of apixaban over warfarin. For patients with LA/LAA thrombus, there was also no significant interaction, with the exception of all-cause death and any bleeding where there was a greater benefit of apixaban compared with warfarin among patients with no LA/LAA thrombus.In anticoagulated patients with atrial fibrillation and risk factors for stroke, echocardiographic findings do not seem to add to the risk of thromboembolic events.URL: http://www.clinicaltrials.gov. Unique identifier: NCT00412984.",2017-10-31 +27067409,SuperPhy: predictive genomics for the bacterial pathogen Escherichia coli.,"

Background

Predictive genomics is the translation of raw genome sequence data into a phenotypic assessment of the organism. For bacterial pathogens, these phenotypes can range from environmental survivability, to the severity of human disease. Significant progress has been made in the development of generic tools for genomic analyses that are broadly applicable to all microorganisms; however, a fundamental missing component is the ability to analyze genomic data in the context of organism-specific phenotypic knowledge, which has been accumulated from decades of research and can provide a meaningful interpretation of genome sequence data.

Results

In this study, we present SuperPhy, an online predictive genomics platform ( http://lfz.corefacility.ca/superphy/ ) for Escherichia coli. The platform integrates the analytical tools and genome sequence data for all publicly available E. coli genomes and facilitates the upload of new genome sequences from users under public or private settings. SuperPhy provides real-time analyses of thousands of genome sequences with results that are understandable and useful to a wide community, including those in the fields of clinical medicine, epidemiology, ecology, and evolution. SuperPhy includes identification of: 1) virulence and antimicrobial resistance determinants 2) statistical associations between genotypes, biomarkers, geospatial distribution, host, source, and phylogenetic clade; 3) the identification of biomarkers for groups of genomes on the based presence/absence of specific genomic regions and single-nucleotide polymorphisms and 4) in silico Shiga-toxin subtype.

Conclusions

SuperPhy is a predictive genomics platform that attempts to provide an essential link between the vast amounts of genome information currently being generated and phenotypic knowledge in an organism-specific context.",2016-04-12 +26272077,The mobilize center: an NIH big data to knowledge center to advance human movement research and improve mobility.,"Regular physical activity helps prevent heart disease, stroke, diabetes, and other chronic diseases, yet a broad range of conditions impair mobility at great personal and societal cost. Vast amounts of data characterizing human movement are available from research labs, clinics, and millions of smartphones and wearable sensors, but integration and analysis of this large quantity of mobility data are extremely challenging. The authors have established the Mobilize Center (http://mobilize.stanford.edu) to harness these data to improve human mobility and help lay the foundation for using data science methods in biomedicine. The Center is organized around 4 data science research cores: biomechanical modeling, statistical learning, behavioral and social modeling, and integrative modeling. Important biomedical applications, such as osteoarthritis and weight management, will focus the development of new data science methods. By developing these new approaches, sharing data and validated software tools, and training thousands of researchers, the Mobilize Center will transform human movement research.",2015-08-13 +22871112,PKMiner: a database for exploring type II polyketide synthases.,"

Background

Bacterial aromatic polyketides are a pharmacologically important group of natural products synthesized by type II polyketide synthases (type II PKSs) in actinobacteria. Isolation of novel aromatic polyketides from microbial sources is currently impeded because of the lack of knowledge about prolific taxa for polyketide synthesis and the difficulties in finding and optimizing target microorganisms. Comprehensive analysis of type II PKSs and the prediction of possible polyketide chemotypes in various actinobacterial genomes will thus enable the discovery or synthesis of novel polyketides in the most plausible microorganisms.

Description

We performed a comprehensive computational analysis of type II PKSs and their gene clusters in actinobacterial genomes. By identifying type II PKS subclasses from the sequence analysis of 280 known type II PKSs, we developed highly accurate domain classifiers for these subclasses and derived prediction rules for aromatic polyketide chemotypes generated by different combinations of type II PKS domains. Using 319 available actinobacterial genomes, we predicted 231 type II PKSs from 40 PKS gene clusters in 25 actinobacterial genomes, and polyketide chemotypes corresponding to 22 novel PKS gene clusters in 16 genomes. These results showed that the microorganisms capable of producing aromatic polyketides are specifically distributed within a certain suborder of Actinomycetales such as Catenulisporineae, Frankineae, Micrococcineae, Micromonosporineae, Pseudonocardineae, Streptomycineae, and Streptosporangineae.

Conclusions

We could identify the novel candidates of type II PKS gene clusters and their polyketide chemotypes in actinobacterial genomes by comprehensive analysis of type II PKSs and prediction of aromatic polyketides. The genome analysis results indicated that the specific suborders in actinomycetes could be used as prolific taxa for polyketide synthesis. The chemotype-prediction rules with the suggested type II PKS modules derived using this resource can be used further for microbial engineering to produce various aromatic polyketides. All these resources, together with the results of the analysis, are organized into an easy-to-use database PKMiner, which is accessible at the following URL: http://pks.kaist.ac.kr/pkminer. We believe that this web-based tool would be useful for research in the discovery of novel bacterial aromatic polyketides.",2012-08-08 +28155723,LocExpress: a web server for efficiently estimating expression of novel transcripts.,"

Background

The temporal and spatial-specific expression pattern of a transcript in multiple tissues and cell types can indicate key clues about its function. While several gene atlas available online as pre-computed databases for known gene models, it's still challenging to get expression profile for previously uncharacterized (i.e. novel) transcripts efficiently.

Results

Here we developed LocExpress, a web server for efficiently estimating expression of novel transcripts across multiple tissues and cell types in human (20 normal tissues/cells types and 14 cell lines) as well as in mouse (24 normal tissues/cell types and nine cell lines). As a wrapper to RNA-Seq quantification algorithm, LocExpress efficiently reduces the time cost by making abundance estimation calls increasingly within the minimum spanning bundle region of input transcripts. For a given novel gene model, such local context-oriented strategy allows LocExpress to estimate its FPKMs in hundreds of samples within minutes on a standard Linux box, making an online web server possible.

Conclusions

To the best of our knowledge, LocExpress is the only web server to provide nearly real-time expression estimation for novel transcripts in common tissues and cell types. The server is publicly available at http://loc-express.cbi.pku.edu.cn .",2016-12-22 +21464840,A comparative protein function analysis databaseof different Leishmania strains.,"A complete understanding of different protein functional families and template information opens new avenues for novel drug development. Protein identification and analysis software performs a central role in the investigation of proteins and leads to the development of refined database for description of proteins of different Leishmania strains. There are certain databases for different strains that lack template information and functional family annotation. Rajendra Memorial Research Institute of Medical Sciences (RMRIMS) has developed a web-based unique database to provide information about functional families of different proteins and its template information in different Leishmania species. Based on the template information users can model the tertiary structure of protein. The database facilitates significant relationship between template information and possible protein functional families assigned to different proteins by SVMProt. This database is designed to provide comprehensive descriptions of certain important proteins found in four different species of Leishmania i.e. L. donovani, L. infantum, L. major and L. braziliensis. A specific characterization information table provides information related to species and specific functional families. This database aims to be a resource for scientists working on proteomics. The database is freely available at http://biomedinformri.org/calp/.",2011-03-02 +28453644,TraitRateProp: a web server for the detection of trait-dependent evolutionary rate shifts in sequence sites.,"Understanding species adaptation at the molecular level has been a central goal of evolutionary biology and genomics research. This important task becomes increasingly relevant with the constant rise in both genotypic and phenotypic data availabilities. The TraitRateProp web server offers a unique perspective into this task by allowing the detection of associations between sequence evolution rate and whole-organism phenotypes. By analyzing sequences and phenotypes of extant species in the context of their phylogeny, it identifies sequence sites in a gene/protein whose evolutionary rate is associated with shifts in the phenotype. To this end, it considers alternative histories of whole-organism phenotypic changes, which result in the extant phenotypic states. Its joint likelihood framework that combines models of sequence and phenotype evolution allows testing whether an association between these processes exists. In addition to predicting sequence sites most likely to be associated with the phenotypic trait, the server can optionally integrate structural 3D information. This integration allows a visual detection of trait-associated sequence sites that are juxtapose in 3D space, thereby suggesting a common functional role. We used TraitRateProp to study the shifts in sequence evolution rate of the RPS8 protein upon transitions into heterotrophy in Orchidaceae. TraitRateProp is available at http://traitrate.tau.ac.il/prop.",2017-07-01 +23761454,SigniSite: Identification of residue-level genotype-phenotype correlations in protein multiple sequence alignments.,"Identifying which mutation(s) within a given genotype is responsible for an observable phenotype is important in many aspects of molecular biology. Here, we present SigniSite, an online application for subgroup-free residue-level genotype-phenotype correlation. In contrast to similar methods, SigniSite does not require any pre-definition of subgroups or binary classification. Input is a set of protein sequences where each sequence has an associated real number, quantifying a given phenotype. SigniSite will then identify which amino acid residues are significantly associated with the data set phenotype. As output, SigniSite displays a sequence logo, depicting the strength of the phenotype association of each residue and a heat-map identifying 'hot' or 'cold' regions. SigniSite was benchmarked against SPEER, a state-of-the-art method for the prediction of specificity determining positions (SDP) using a set of human immunodeficiency virus protease-inhibitor genotype-phenotype data and corresponding resistance mutation scores from the Stanford University HIV Drug Resistance Database, and a data set of protein families with experimentally annotated SDPs. For both data sets, SigniSite was found to outperform SPEER. SigniSite is available at: http://www.cbs.dtu.dk/services/SigniSite/.",2013-06-12 +28209129,Orthograph: a versatile tool for mapping coding nucleotide sequences to clusters of orthologous genes.,"

Background

Orthology characterizes genes of different organisms that arose from a single ancestral gene via speciation, in contrast to paralogy, which is assigned to genes that arose via gene duplication. An accurate orthology assignment is a crucial step for comparative genomic studies. Orthologous genes in two organisms can be identified by applying a so-called reciprocal search strategy, given that complete information of the organisms' gene repertoire is available. In many investigations, however, only a fraction of the gene content of the organisms under study is examined (e.g., RNA sequencing). Here, identification of orthologous nucleotide or amino acid sequences can be achieved using a graph-based approach that maps nucleotide sequences to genes of known orthology. Existing implementations of this approach, however, suffer from algorithmic issues that may cause problems in downstream analyses.

Results

We present a new software pipeline, Orthograph, that addresses and solves the above problems and implements useful features for a wide range of comparative genomic and transcriptomic analyses. Orthograph applies a best reciprocal hit search strategy using profile hidden Markov models and maps nucleotide sequences to the globally best matching cluster of orthologous genes, thus enabling researchers to conveniently and reliably delineate orthologs and paralogs from transcriptomic and genomic sequence data. We demonstrate the performance of our approach on de novo-sequenced and assembled transcript libraries of 24 species of apoid wasps (Hymenoptera: Aculeata) as well as on published genomic datasets.

Conclusion

With Orthograph, we implemented a best reciprocal hit approach to reference-based orthology prediction for coding nucleotide sequences such as RNAseq data. Orthograph is flexible, easy to use, open source and freely available at https://mptrsen.github.io/Orthograph . Additionally, we release 24 de novo-sequenced and assembled transcript libraries of apoid wasp species.",2017-02-16 +26910751,MDI-GPU: accelerating integrative modelling for genomic-scale data using GP-GPU computing.,"The integration of multi-dimensional datasets remains a key challenge in systems biology and genomic medicine. Modern high-throughput technologies generate a broad array of different data types, providing distinct--but often complementary--information. However, the large amount of data adds burden to any inference task. Flexible Bayesian methods may reduce the necessity for strong modelling assumptions, but can also increase the computational burden. We present an improved implementation of a Bayesian correlated clustering algorithm, that permits integrated clustering to be routinely performed across multiple datasets, each with tens of thousands of items. By exploiting GPU based computation, we are able to improve runtime performance of the algorithm by almost four orders of magnitude. This permits analysis across genomic-scale data sets, greatly expanding the range of applications over those originally possible. MDI is available here: http://www2.warwick.ac.uk/fac/sci/systemsbiology/research/software/.",2016-03-01 +27511941,Plasma Retinol Kinetics and β-Carotene Bioefficacy Are Quantified by Model-Based Compartmental Analysis in Healthy Young Adults with Low Vitamin A Stores.,"

Background

Model-based compartmental analysis of data on plasma retinol kinetics after administration of labeled retinol provides unique information about whole-body vitamin A metabolism. If labeled β-carotene is coadministered, its bioefficacy relative to the retinol reference dose can also be estimated.

Objectives

The objectives were to model plasma retinol kinetics after administration of labeled preformed vitamin A and provitamin A β-carotene and to determine relative β-carotene bioefficacy.

Methods

We used the Simulation, Analysis and Modeling software (WinSAAM version 3.0.8; http://www.WinSAAM.org) to analyze previously collected data on plasma [13C10]- and [13C5]retinol kinetics for 14 d after oral administration of 1 mg [13C10]retinyl acetate and 2 mg [13C10]β-carotene in oil to 30 healthy young adults of European ancestry [13 men, 17 women; mean ± SD age: 24.5 ± 4.2 y; mean ± SD body weight: 65.2 ± 10 kg; mean ± SD body mass index (in kg/m2): 22.5 ± 1.9] with moderate vitamin A intakes.

Results

A 6-component model provided the best fit to the data, including compartments for initial metabolism of vitamin A, plasma retinol, and extravascular vitamin A storage. The disposal rate was 6.7 ± 3.1 μmol/d, fractional catabolic rate was 6.0% ± 2.3%/d, and vitamin A stores were 123 ± 71 μmol. Relative β-carotene bioefficacy, based on the ratio of the areas under the fraction of dose curves calculated by WinSAAM, averaged 13.5% ± 6.02% (retinol activity equivalents = 7.7:1.0 μg). Interindividual variation in relative β-carotene bioefficacy was high (CV: 44%).

Conclusions

Vitamin A kinetics in these young adults were best described by essentially the same model that had been previously developed by using data for older adults with higher vitamin A stores; differences in parameter values reflected differences in vitamin A status. Estimated β-carotene bioefficacy was relatively low but similar to previously reported estimates obtained by graphical methods. This trial was registered at the UK Clinical Research Network as UKCRN 7413.",2016-08-10 +28873262,Reactivation of TWIST1 contributes to Ewing sarcoma metastasis. ,"Ewing sarcoma is a cancer of bone and soft tissue. Despite aggressive treatment, survival remains poor, particularly in patients with metastatic disease. Failure to treat Ewing sarcoma is due to the lack of understanding of the molecular pathways that regulate metastasis. In addition, no molecular prognostic markers have been identified for Ewing sarcoma to risk stratify patients. Ewing sarcoma patients were divided into high or low Twist1 gene expression and survival curves were generated using the R2 microarray-based Genomic Analysis platform (http://r2.amc.nl). Tumors from Ewing sarcoma patients were also evaluated for TWIST1 expression by immunohistochemistry. Ewing sarcoma xenografts were established to evaluate the role of TWIST1 in metastasis. The effects of Twist1 on migration and invasion were evaluated using migration and invasion assays in A673 and RDES cells. Twist1 expression was a negative prognostic marker for overall survival in a public Ewing sarcoma patient data set based on Twist1 mRNA levels and in patient tumor samples based on Twist1 immunohistochemistry. TWIST1 is detected in significantly higher percentage of patients with metastatic diseases than localized disease. Using Ewing sarcoma tumor xenografts in mice, we found that suppressing TWIST1 levels suppressed metastasis without affecting primary tumor development. Knockdown of Twist1 inhibited the migration and invasion capability, while overexpression of Twist1 promoted migration and invasion in Ewing sarcoma cells. These results suggest that TWIST1 promotes metastasis in Ewing sarcoma and could be used as a prognostic marker for treatment stratification; however, further validation is required in a larger cohort of patients.",2017-09-05 +23452691,search GenBank: interactive orchestration and ad-hoc choreography of Web services in the exploration of the biomedical resources of the National Center For Biotechnology Information.,"

Background

Due to the growing number of biomedical entries in data repositories of the National Center for Biotechnology Information (NCBI), it is difficult to collect, manage and process all of these entries in one place by third-party software developers without significant investment in hardware and software infrastructure, its maintenance and administration. Web services allow development of software applications that integrate in one place the functionality and processing logic of distributed software components, without integrating the components themselves and without integrating the resources to which they have access. This is achieved by appropriate orchestration or choreography of available Web services and their shared functions. After the successful application of Web services in the business sector, this technology can now be used to build composite software tools that are oriented towards biomedical data processing.

Results

We have developed a new tool for efficient and dynamic data exploration in GenBank and other NCBI databases. A dedicated search GenBank system makes use of NCBI Web services and a package of Entrez Programming Utilities (eUtils) in order to provide extended searching capabilities in NCBI data repositories. In search GenBank users can use one of the three exploration paths: simple data searching based on the specified user's query, advanced data searching based on the specified user's query, and advanced data exploration with the use of macros. search GenBank orchestrates calls of particular tools available through the NCBI Web service providing requested functionality, while users interactively browse selected records in search GenBank and traverse between NCBI databases using available links. On the other hand, by building macros in the advanced data exploration mode, users create choreographies of eUtils calls, which can lead to the automatic discovery of related data in the specified databases.

Conclusions

search GenBank extends standard capabilities of the NCBI Entrez search engine in querying biomedical databases. The possibility of creating and saving macros in the search GenBank is a unique feature and has a great potential. The potential will further grow in the future with the increasing density of networks of relationships between data stored in particular databases. search GenBank is available for public use at http://sgb.biotools.pl/.",2013-03-01 +28472475,Amino Acid Interaction (INTAA) web server.,"Large biomolecules-proteins and nucleic acids-are composed of building blocks which define their identity, properties and binding capabilities. In order to shed light on the energetic side of interactions of amino acids between themselves and with deoxyribonucleotides, we present the Amino Acid Interaction web server (http://bioinfo.uochb.cas.cz/INTAA/). INTAA offers the calculation of the residue Interaction Energy Matrix for any protein structure (deposited in Protein Data Bank or submitted by the user) and a comprehensive analysis of the interfaces in protein-DNA complexes. The Interaction Energy Matrix web application aims to identify key residues within protein structures which contribute significantly to the stability of the protein. The application provides an interactive user interface enhanced by 3D structure viewer for efficient visualization of pairwise and net interaction energies of individual amino acids, side chains and backbones. The protein-DNA interaction analysis part of the web server allows the user to view the relative abundance of various configurations of amino acid-deoxyribonucleotide pairs found at the protein-DNA interface and the interaction energies corresponding to these configurations calculated using a molecular mechanical force field. The effects of the sugar-phosphate moiety and of the dielectric properties of the solvent on the interaction energies can be studied for the various configurations.",2017-07-01 +25183354,PPDB - A tool for investigation of plants physiology based on gene ontology. ,"Representing the way forward, from functional genomics and its ontology to functional understanding and physiological model, in a computationally tractable fashion is one of the ongoing challenges faced by computational biology. To tackle the standpoint, we herein feature the applications of contemporary database management to the development of PPDB, a searching and browsing tool for the Plants Physiology Database that is based upon the mining of a large amount of gene ontology data currently available. The working principles and search options associated with the PPDB are publicly available and freely accessible on-line ( http://www.iitr.ernet.in/ajayshiv/ ) through a user friendly environment generated by means of Drupal-6.24. By knowing that genes are expressed in temporally and spatially characteristic patterns and that their functionally distinct products often reside in specific cellular compartments and may be part of one or more multi-component complexes, this sort of work is intended to be relevant for investigating the functional relationships of gene products at a system level and, thus, helps us approach to the full physiology.",2014-09-02 +27628048,"A retrospective analysis of real-world use of the eaTracker® My Goals website by adults from Ontario and Alberta, Canada.","

Background

Little is known about use of goal setting and tracking tools within online programs to support nutrition and physical activity behaviour change. In 2011, Dietitians of Canada added ""My Goals,"" a nutrition and physical activity behaviour goal setting and tracking tool to their free publicly available self-monitoring website (eaTracker® ( http://www.eaTracker.ca/ )). My Goals allows users to: a) set ""ready-made"" SMART (Specific, Measurable, Attainable, Realistic, Time-related) goals (choice of n = 87 goals from n = 13 categories) or ""write your own"" goals, and b) track progress using the ""My Goals Tracker."" The purpose of this study was to characterize: a) My Goals user demographics, b) types of goals set, and c) My Goals Tracker use.

Methods

Anonymous data on all goals set using the My Goals feature from December 6/2012-April 28/2014 by users ≥19y from Ontario and Alberta, Canada were obtained. This dataset contained: anonymous self-reported user demographic data, user set goals, and My Goals Tracker use data. Write your own goals were categorized by topic and specificity. Data were summarized using descriptive statistics. Multivariate binary logistic regression was used to determine associations between user demographics and a) goal topic areas and b) My Goals Tracker use.

Results

Overall, n = 16,511 goal statements (75.4 % ready-made; 24.6 % write your own) set by n = 8,067 adult users 19-85y (83.3 % female; mean age 41.1 ± 15.0y, mean BMI 28.8 ± 7.6kg/m(2)) were included for analysis. Overall, 33.1 % of ready-made goals were from the ""Managing your Weight"" category. Of write your own goal entries, 42.3 % were solely distal goals (most related to weight management); 38.6 % addressed nutrition behaviour change (16.6 % had unspecific general eating goals); 18.1 % addressed physical activity behaviour change (47.3 % had goals without information on exercise amount and type). Many write your own goals were poor quality (e.g., non-specific (e.g., missing amounts)), and possibly unrealistic (e.g., no sugar). Few goals were tracked (<10 %). Demographic variables had statistically significant relations with goal topic areas and My Goals Tracker use.

Conclusions

eaTracker® users had high interest in goal setting and the My Goals feature, however, self-written goals were often poor quality and goal tracking was rare. Further research is needed to better support users.",2016-09-15 +26798323,Orphan Crops Browser: a bridge between model and orphan crops.,"Many important crops have received little attention by the scientific community, either because they are not considered economically important or due to their large and complex genomes. De novo transcriptome assembly, using next-generation sequencing data, is an attractive option for the study of these orphan crops. In spite of the large amount of sequencing data that can be generated, there is currently a lack of tools which can effectively help molecular breeders and biologists to mine this type of information. Our goal was to develop a tool that enables molecular breeders, without extensive bioinformatics knowledge, to efficiently study de novo transcriptome data from any orphan crop (http://www.bioinformatics.nl/denovobrowser/db/species/index). The Orphan Crops Browser has been designed to facilitate the following tasks (1) search and identification of candidate transcripts based on phylogenetic relationships between orthologous sequence data from a set of related species and (2) design specific and degenerate primers for expression studies in the orphan crop of interest. To demonstrate the usability and reliability of the browser, it was used to identify the putative orthologues of 17 known lignin biosynthetic genes from maize and sugarcane in the orphan crop Miscanthus sinensis. Expression studies in miscanthus stem internode tissue differing in maturation were subsequently carried out, to follow the expression of these genes during lignification. Our results showed a negative correlation between lignin content and gene expression. The present data are in agreement with recent findings in maize and other crops, and it is further discussed in this paper.",2016-01-12 +26589277,SomVarIUS: somatic variant identification from unpaired tissue samples.,"

Motivation

Somatic variant calling typically requires paired tumor-normal tissue samples. Yet, paired normal tissues are not always available in clinical settings or for archival samples.

Results

We present SomVarIUS, a computational method for detecting somatic variants using high throughput sequencing data from unpaired tissue samples. We evaluate the performance of the method using genomic data from synthetic and real tumor samples. SomVarIUS identifies somatic variants in exome-seq data of  ∼150 ×  coverage with at least 67.7% precision and 64.6% recall rates, when compared with paired-tissue somatic variant calls in real tumor samples. We demonstrate the utility of SomVarIUS by identifying somatic mutations in formalin-fixed samples, and tracking clonal dynamics of oncogenic mutations in targeted deep sequencing data from pre- and post-treatment leukemia samples.

Availability and implementation

SomVarIUS is written in Python 2.7 and available at http://www.sjdlab.org/resources/

Contact

subhajyoti.de@ucdenver.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-20 +27139377,ChromNet: Learning the human chromatin network from all ENCODE ChIP-seq data.,"A cell's epigenome arises from interactions among regulatory factors-transcription factors and histone modifications-co-localized at particular genomic regions. We developed a novel statistical method, ChromNet, to infer a network of these interactions, the chromatin network, by inferring conditional-dependence relationships among a large number of ChIP-seq data sets. We applied ChromNet to all available 1451 ChIP-seq data sets from the ENCODE Project, and showed that ChromNet revealed previously known physical interactions better than alternative approaches. We experimentally validated one of the previously unreported interactions, MYC-HCFC1. An interactive visualization tool is available at http://chromnet.cs.washington.edu.",2016-04-30 +24267899,Rat Strain Ontology: structured controlled vocabulary designed to facilitate access to strain data at RGD.,"

Background

The Rat Genome Database (RGD) ( http://rgd.mcw.edu/) is the premier site for comprehensive data on the different strains of the laboratory rat (Rattus norvegicus). The strain data are collected from various publications, direct submissions from individual researchers, and rat providers worldwide. Rat strain, substrain designation and nomenclature follow the Guidelines for Nomenclature of Mouse and Rat Strains, instituted by the International Committee on Standardized Genetic Nomenclature for Mice. While symbols and names aid in identifying strains correctly, the flat nature of this information prohibits easy search and retrieval, as well as other data mining functions. In order to improve these functionalities, particularly in ontology-based tools, the Rat Strain Ontology (RS) was developed.

Results

The Rat Strain Ontology (RS) reflects the breeding history, parental background, and genetic manipulation of rat strains. This controlled vocabulary organizes strains by type: inbred, outbred, chromosome altered, congenic, mutant and so on. In addition, under the chromosome altered category, strains are organized by chromosome, and further by type of manipulations, such as mutant or congenic. This allows users to easily retrieve strains of interest with modifications in specific genomic regions. The ontology was developed using the Open Biological and Biomedical Ontology (OBO) file format, and is organized on the Directed Acyclic Graph (DAG) structure. Rat Strain Ontology IDs are included as part of the strain report (RS: ######).

Conclusions

As rat researchers are often unaware of the number of substrains or altered strains within a breeding line, this vocabulary now provides an easy way to retrieve all substrains and accompanying information. Its usefulness is particularly evident in tools such as the PhenoMiner at RGD, where users can now easily retrieve phenotype measurement data for related strains, strains with similar backgrounds or those with similar introgressed regions. This controlled vocabulary also allows better retrieval and filtering for QTLs and in genomic tools such as the GViewer.The Rat Strain Ontology has been incorporated into the RGD Ontology Browser ( http://rgd.mcw.edu/rgdweb/ontology/view.html?acc_id=RS:0000457#s) and is available through the National Center for Biomedical Ontology ( http://bioportal.bioontology.org/ontologies/1150) or the RGD ftp site ( ftp://rgd.mcw.edu/pub/ontology/rat_strain/).",2013-11-22 +27924033,ChIPBase v2.0: decoding transcriptional regulatory networks of non-coding RNAs and protein-coding genes from ChIP-seq data.,"The abnormal transcriptional regulation of non-coding RNAs (ncRNAs) and protein-coding genes (PCGs) is contributed to various biological processes and linked with human diseases, but the underlying mechanisms remain elusive. In this study, we developed ChIPBase v2.0 (http://rna.sysu.edu.cn/chipbase/) to explore the transcriptional regulatory networks of ncRNAs and PCGs. ChIPBase v2.0 has been expanded with ∼10 200 curated ChIP-seq datasets, which represent about 20 times expansion when comparing to the previous released version. We identified thousands of binding motif matrices and their binding sites from ChIP-seq data of DNA-binding proteins and predicted millions of transcriptional regulatory relationships between transcription factors (TFs) and genes. We constructed 'Regulator' module to predict hundreds of TFs and histone modifications that were involved in or affected transcription of ncRNAs and PCGs. Moreover, we built a web-based tool, Co-Expression, to explore the co-expression patterns between DNA-binding proteins and various types of genes by integrating the gene expression profiles of ∼10 000 tumor samples and ∼9100 normal tissues and cell lines. ChIPBase also provides a ChIP-Function tool and a genome browser to predict functions of diverse genes and visualize various ChIP-seq data. This study will greatly expand our understanding of the transcriptional regulations of ncRNAs and PCGs.",2016-10-23 +28113169,The Case for Big Data: New York City's Kalvi HUMAN Project Aims to Use Big Data in Resolving Big Health Questions.,"Cigarette smoking is tied to lung cancer, but people still smoke. Why do people start smoking in the first place? That is one of the many complex, interdisciplinary questions behind the Kavli HUMAN Project (http://kavlihumanproject.org), a massive data-collection endeavor with the goal of learning how everything-from biology to behavior and environment-affects the human condition.",2016-09-01 +28535189,Predicting accurate contacts in thousands of Pfam domain families using PconsC3.,"

Motivation

A few years ago it was shown that by using a maximum entropy approach to describe couplings between columns in a multiple sequence alignment it is possible to significantly increase the accuracy of residue contact predictions. For very large protein families with more than 1000 effective sequences the accuracy is sufficient to produce accurate models of proteins as well as complexes. Today, for about half of all Pfam domain families no structure is known, but unfortunately most of these families have at most a few hundred members, i.e. are too small for such contact prediction methods.

Results

To extend accurate contact predictions to the thousands of smaller protein families we present PconsC3, a fast and improved method for protein contact predictions that can be used for families with even 100 effective sequence members. PconsC3 outperforms direct coupling analysis (DCA) methods significantly independent on family size, secondary structure content, contact range, or the number of selected contacts.

Availability and implementation

PconsC3 is available as a web server and downloadable version at http://c3.pcons.net . The downloadable version is free for all to use and licensed under the GNU General Public License, version 2. At this site contact predictions for most Pfam families are also available. We do estimate that more than 4000 contact maps for Pfam families of unknown structure have more than 50% of the top-ranked contacts predicted correctly.

Contact

arne@bioinfo.se.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +28702677,"Shortened Nonword Repetition Task (NWR-S): A Simple, Quick, and Less Expensive Outcome to Identify Children With Combined Specific Language and Reading Impairment.","

Purpose

The purpose of this research note was to validate a simplified version of the Dutch nonword repetition task (NWR; Rispens & Baker, 2012). The NWR was shortened and scoring was transformed to correct/incorrect nonwords, resulting in the shortened NWR (NWR-S).

Method

NWR-S and NWR performance were compared in the previously published data set of Rispens and Baker (2012; N = 88), who compared NWR performance in 5 participant groups: specific language impairment (SLI), reading impairment (RI), both SLI and RI, one control group matched on chronological age, and one control group matched on language age.

Results

Analyses of variance showed that children with SLI + RI performed significantly worse than other participant groups in NWR-S, just as in NWR. Logistic regression analyses showed that both tasks can predict an SLI + RI outcome. NWR-S holds a sensitivity of 82.6% and a specificity of 95.4% in identifying children with SLI + RI. The sensitivity of the original NWR is 87.0% with a specificity of 87.7%.

Conclusions

As the original NWR, the NWR-S comprising a subset of 22 nonwords scored with a simplified scoring system can identify children with combined SLI and RI while saving a significant amount of the needed assessment time.

Supplemental materials

https://doi.org/10.23641/asha.5150116.",2017-08-01 +24178989,HypoxiaDB: a database of hypoxia-regulated proteins.,"There has been intense interest in the cellular response to hypoxia, and a large number of differentially expressed proteins have been identified through various high-throughput experiments. These valuable data are scattered, and there have been no systematic attempts to document the various proteins regulated by hypoxia. Compilation, curation and annotation of these data are important in deciphering their role in hypoxia and hypoxia-related disorders. Therefore, we have compiled HypoxiaDB, a database of hypoxia-regulated proteins. It is a comprehensive, manually-curated, non-redundant catalog of proteins whose expressions are shown experimentally to be altered at different levels and durations of hypoxia. The database currently contains 72 000 manually curated entries taken on 3500 proteins extracted from 73 peer-reviewed publications selected from PubMed. HypoxiaDB is distinctive from other generalized databases: (i) it compiles tissue-specific protein expression changes under different levels and duration of hypoxia. Also, it provides manually curated literature references to support the inclusion of the protein in the database and establish its association with hypoxia. (ii) For each protein, HypoxiaDB integrates data on gene ontology, KEGG (Kyoto Encyclopedia of Genes and Genomes) pathway, protein-protein interactions, protein family (Pfam), OMIM (Online Mendelian Inheritance in Man), PDB (Protein Data Bank) structures and homology to other sequenced genomes. (iii) It also provides pre-compiled information on hypoxia-proteins, which otherwise requires tedious computational analysis. This includes information like chromosomal location, identifiers like Entrez, HGNC, Unigene, Uniprot, Ensembl, Vega, GI numbers and Genbank accession numbers associated with the protein. These are further cross-linked to respective public databases augmenting HypoxiaDB to the external repositories. (iv) In addition, HypoxiaDB provides an online sequence-similarity search tool for users to compare their protein sequences with HypoxiaDB protein database. We hope that HypoxiaDB will enrich our knowledge about hypoxia-related biology and eventually will lead to the development of novel hypothesis and advancements in diagnostic and therapeutic activities. HypoxiaDB is freely accessible for academic and non-profit users via http://www.hypoxiadb.com.",2013-10-31 +28108305,Efficient dynamic programming algorithm with prior knowledge for protein β-strand alignment.,"One of the main tasks towards the prediction of protein β-sheet structure is to predict the native alignment of β-strands. The alignment of two β-strands defines similar regions that may reflect functional, structural, or evolutionary relationships between them. Therefore, any improvement in β-strands alignment not only reduces the computational search space but also improves β-sheet structure prediction accuracy. To define the alignment scores, previous studies utilized predicted residue-residue contacts (contact maps). However, there are two serious problems using them. First, the precision of contact map prediction techniques, especially for long-range contacts (i.e., β-residues), is still not satisfactory. Second, the residue-residue contact predictors usually utilize general properties of amino acids and disregard the structural features of β-residues. In this paper, we consider β-structure information, which is estimated from protein β-sheet data sets, as alignment scores. However, the predicted contact maps are used as a prior knowledge about residues. They are used for strengthening or weakening the alignment scores in our algorithm. Thus, we can utilize both β-residues and β-structure information in alignment of β-strands. The structure of dynamic programming of the alignment algorithm is changed in order to work with our prior knowledge. Moreover, the Four Russians method is applied to the proposed alignment algorithm in order to reduce the time complexity of the problem. For evaluating the proposed method, we applied it to the state-of-the-art β-sheet structure prediction methods. The experimental results on the BetaSheet916 data set showed significant improvements in the execution time, the accuracy of β-strands' alignment and consequently β-sheet structure prediction accuracy. The results are available at http://conceptsgate.com/BetaSheet.",2017-01-18 +27174940,"PASMet: a web-based platform for prediction, modelling and analyses of metabolic systems.","PASMet (Prediction, Analysis and Simulation of Metabolic networks) is a web-based platform for proposing and verifying mathematical models to understand the dynamics of metabolism. The advantages of PASMet include user-friendliness and accessibility, which enable biologists and biochemists to easily perform mathematical modelling. PASMet offers a series of user-functions to handle the time-series data of metabolite concentrations. The functions are organised into four steps: (i) Prediction of a probable metabolic pathway and its regulation; (ii) Construction of mathematical models; (iii) Simulation of metabolic behaviours; and (iv) Analysis of metabolic system characteristics. Each function contains various statistical and mathematical methods that can be used independently. Users who may not have enough knowledge of computing or programming can easily and quickly analyse their local data without software downloads, updates or installations. Users only need to upload their files in comma-separated values (CSV) format or enter their model equations directly into the website. Once the time-series data or mathematical equations are uploaded, PASMet automatically performs computation on server-side. Then, users can interactively view their results and directly download them to their local computers. PASMet is freely available with no login requirement at http://pasmet.riken.jp/ from major web browsers on Windows, Mac and Linux operating systems.",2016-05-12 +28187413,DaTo: an atlas of biological databases and tools.,"This work presents DaTo, a semi-automatically generated world atlas of biological databases and tools. It extracts raw information from all PubMed articles which contain exact URLs in their abstract section, followed by a manual curation of the abstract and the URL accessibility. DaTo features a user-friendly query interface, providing extensible URL-related annotations, such as the status, the location and the country of the URL. A graphical interaction network browser has also been integrated into the DaTo web interface to facilitate exploration of the relationship between different tools and databases with respect to their ontology-based semantic similarity. Using DaTo, the geographical locations, the health statuses, as well as the journal associations were evaluated with respect to the historical development of bioinformatics tools and databases over the last 20 years. We hope it will inspire the biological community to gain a systematic insight into bioinformatics resources. DaTo is accessible via http://bis.zju.edu.cn/DaTo/.",2016-12-18 +28715547,Identifying the Dimensionality of Oral Language Skills of Children With Typical Development in Preschool Through Fifth Grade.,"

Purpose

Language is a multidimensional construct from prior to the beginning of formal schooling to near the end of elementary school. The primary goals of this study were to identify the dimensionality of language and to determine whether this dimensionality was consistent in children with typical language development from preschool through 5th grade.

Method

In a large sample of 1,895 children, confirmatory factor analysis was conducted with 19-20 measures of language intended to represent 6 factors, including domains of vocabulary and syntax/grammar across modalities of expressive and receptive language, listening comprehension, and vocabulary depth.

Results

A 2-factor model with separate, highly correlated vocabulary and syntax factors provided the best fit to the data, and this model of language dimensionality was consistent from preschool through 5th grade.

Conclusion

This study found that there are fewer dimensions than are often suggested or represented by the myriad subtests in commonly used standardized tests of language. The identified 2-dimensional (vocabulary and syntax) model of language has significant implications for the conceptualization and measurement of the language skills of children in the age range from preschool to 5th grade, including the study of typical and atypical language development, the study of the developmental and educational influences of language, and classification and intervention in clinical practice.

Supplemental materials

https://doi.org/10.23641/asha.5154220.",2017-08-01 +28880980,Progressive Tinnitus Management Level 3 Skills Education: A 5-Year Clinical Retrospective.,"

Purpose

The primary purpose of this study was to determine whether progressive tinnitus management Level 3 skills education workshops conducted at the Bay Pines and Boston Veterans Affairs hospitals result in consistent use of the presented tinnitus management strategies by patients 1-5 years after completing the workshops.

Method

In fiscal year (FY) 2015, the tinnitus workshop follow-up form was mailed to all veterans who completed the Level 3 workshops between FY 2010 and FY 2014. Data were compiled to determine which, if any, of the skills taught in the workshops were being used 1-5 years after completion of the workshops and the impact on quality-of-life indicators.

Results

All self-management skills were being utilized up to 5 years postcompletion; therapeutic sound was utilized the most. The majority of patients reported an improved ability to manage reactions to tinnitus and improved quality-of-life indicators. Over 90% of patients from both sites recommended the program to others with tinnitus.

Conclusion

The self-management skills taught in the progressive tinnitus management Level 3 workshops are sustained over time even when limited resources prevent the full complement of workshops or the involvement of mental health services. The workshops can also be successfully implemented through remote delivery via videoconferencing (telehealth).

Supplemental materials

https://doi.org/10.23641/asha.5370883.",2017-09-01 +28911038,Spliceman2: a computational web server that predicts defects in pre-mRNA splicing.,"

Summary

Most pre-mRNA transcripts in eukaryotic cells must undergo splicing to remove introns and join exons, and splicing elements present a large mutational target for disease-causing mutations. Splicing elements are strongly position dependent with respect to the transcript annotations. In 2012, we presented Spliceman, an online tool that used positional dependence to predict how likely distant mutations around annotated splice sites were to disrupt splicing. Here, we present an improved version of the previous tool that will be more useful for predicting the likelihood of splicing mutations. We have added industry-standard input options (i.e. Spliceman now accepts variant call format files), which allow much larger inputs than previously available. The tool also can visualize the locations-within exons and introns-of sequence variants to be analyzed and the predicted effects on splicing of the pre-mRNA transcript. In addition, Spliceman2 integrates with RNAcompete motif libraries to provide a prediction of which trans -acting factors binding sites are disrupted/created and links out to the UCSC genome browser. In summary, the new features in Spliceman2 will allow scientists and physicians to better understand the effects of single nucleotide variations on splicing.

Availability and implementation

Freely available on the web at http://fairbrother.biomed.brown.edu/spliceman2 . Website implemented in PHP framework-Laravel 5, PostgreSQL, Apache, and Perl, with all major browsers supported.

Contact

william_fairbrother@brown.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +21450055,Rice-Map: a new-generation rice genome browser.,"

Background

The concurrent release of rice genome sequences for two subspecies (Oryza sativa L. ssp. japonica and Oryza sativa L. ssp. indica) facilitates rice studies at the whole genome level. Since the advent of high-throughput analysis, huge amounts of functional genomics data have been delivered rapidly, making an integrated online genome browser indispensable for scientists to visualize and analyze these data. Based on next-generation web technologies and high-throughput experimental data, we have developed Rice-Map, a novel genome browser for researchers to navigate, analyze and annotate rice genome interactively.

Description

More than one hundred annotation tracks (81 for japonica and 82 for indica) have been compiled and loaded into Rice-Map. These pre-computed annotations cover gene models, transcript evidences, expression profiling, epigenetic modifications, inter-species and intra-species homologies, genetic markers and other genomic features. In addition to these pre-computed tracks, registered users can interactively add comments and research notes to Rice-Map as User-Defined Annotation entries. By smoothly scrolling, dragging and zooming, users can browse various genomic features simultaneously at multiple scales. On-the-fly analysis for selected entries could be performed through dedicated bioinformatic analysis platforms such as WebLab and Galaxy. Furthermore, a BioMart-powered data warehouse ""Rice Mart"" is offered for advanced users to fetch bulk datasets based on complex criteria.

Conclusions

Rice-Map delivers abundant up-to-date japonica and indica annotations, providing a valuable resource for both computational and bench biologists. Rice-Map is publicly accessible at http://www.ricemap.org/, with all data available for free downloading.",2011-03-30 +27207943,Complex heatmaps reveal patterns and correlations in multidimensional genomic data.,"

Unlabelled

Parallel heatmaps with carefully designed annotation graphics are powerful for efficient visualization of patterns and relationships among high dimensional genomic data. Here we present the ComplexHeatmap package that provides rich functionalities for customizing heatmaps, arranging multiple parallel heatmaps and including user-defined annotation graphics. We demonstrate the power of ComplexHeatmap to easily reveal patterns and correlations among multiple sources of information with four real-world datasets.

Availability and implementation

The ComplexHeatmap package and documentation are freely available from the Bioconductor project: http://www.bioconductor.org/packages/devel/bioc/html/ComplexHeatmap.html

Contact

m.schlesner@dkfz.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-20 +24320465,Investigation of realistic PET simulations incorporating tumor patient's specificity using anthropomorphic models: creation of an oncology database.,"

Purpose

The GATE Monte Carlo simulation toolkit is used for the implementation of realistic PET simulations incorporating tumor heterogeneous activity distributions. The reconstructed patient images include noise from the acquisition process, imaging system's performance restrictions and have limited spatial resolution. For those reasons, the measured intensity cannot be simply introduced in GATE simulations, to reproduce clinical data. Investigation of the heterogeneity distribution within tumors applying partial volume correction (PVC) algorithms was assessed. The purpose of the present study was to create a simulated oncology database based on clinical data with realistic intratumor uptake heterogeneity properties.

Methods

PET/CT data of seven oncology patients were used in order to create a realistic tumor database investigating the heterogeneity activity distribution of the simulated tumors. The anthropomorphic models (NURBS based cardiac torso and Zubal phantoms) were adapted to the CT data of each patient, and the activity distribution was extracted from the respective PET data. The patient-specific models were simulated with the Monte Carlo Geant4 application for tomography emission (GATE) in three different levels for each case: (a) using homogeneous activity within the tumor, (b) using heterogeneous activity distribution in every voxel within the tumor as it was extracted from the PET image, and (c) using heterogeneous activity distribution corresponding to the clinical image following PVC. The three different types of simulated data in each case were reconstructed with two iterations and filtered with a 3D Gaussian postfilter, in order to simulate the intratumor heterogeneous uptake. Heterogeneity in all generated images was quantified using textural feature derived parameters in 3D according to the ground truth of the simulation, and compared to clinical measurements. Finally, profiles were plotted in central slices of the tumors, across lines with heterogeneous activity distribution for visual assessment.

Results

The accuracy of the simulated database was assessed against the original clinical images. The PVC simulated images matched the clinical ones best. Local, regional, and global features extracted from the PVC simulated images were closest to the clinical measurements, with the exception of the size zone variability and the mean intensity values, where heterogeneous tumors showed better reproducibility. The profiles on PVC simulated tumors after postfiltering seemed to represent the more realistic heterogeneous regions with respect to the clinical reference.

Conclusions

In this study, the authors investigated the input activity map heterogeneity in the GATE simulations of tumors with heterogeneous activity distribution. The most realistic heterogeneous tumors were obtained by inserting PVC activity distributions from the clinical image into the activity map of the simulation. Partial volume effect (PVE) can play a crucial role in the quantification of heterogeneity within tumors and have an important impact on applications such as patient follow-up during treatment and assessment of tumor response to therapy. The development of such a database incorporating patient anatomical and functional variability can be used to evaluate new image processing or analysis algorithms, while providing control of the ground truth, which is not available when dealing with clinical datasets. The database includes all images used and generated in this study, as well as the sinograms and the attenuation phantoms for further investigation. It is freely available to the interested reader of the journal at http://www.med.upatras.gr/oncobase/.",2013-11-01 +25062914,tmBioC: improving interoperability of text-mining tools with BioC. ,"The lack of interoperability among biomedical text-mining tools is a major bottleneck in creating more complex applications. Despite the availability of numerous methods and techniques for various text-mining tasks, combining different tools requires substantial efforts and time owing to heterogeneity and variety in data formats. In response, BioC is a recent proposal that offers a minimalistic approach to tool interoperability by stipulating minimal changes to existing tools and applications. BioC is a family of XML formats that define how to present text documents and annotations, and also provides easy-to-use functions to read/write documents in the BioC format. In this study, we introduce our text-mining toolkit, which is designed to perform several challenging and significant tasks in the biomedical domain, and repackage the toolkit into BioC to enhance its interoperability. Our toolkit consists of six state-of-the-art tools for named-entity recognition, normalization and annotation (PubTator) of genes (GenNorm), diseases (DNorm), mutations (tmVar), species (SR4GN) and chemicals (tmChem). Although developed within the same group, each tool is designed to process input articles and output annotations in a different format. We modify these tools and enable them to read/write data in the proposed BioC format. We find that, using the BioC family of formats and functions, only minimal changes were required to build the newer versions of the tools. The resulting BioC wrapped toolkit, which we have named tmBioC, consists of our tools in BioC, an annotated full-text corpus in BioC, and a format detection and conversion tool. Furthermore, through participation in the 2013 BioCreative IV Interoperability Track, we empirically demonstrate that the tools in tmBioC can be more efficiently integrated with each other as well as with external tools: Our experimental results show that using BioC reduces >60% in lines of code for text-mining tool integration. The tmBioC toolkit is publicly available at http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/. Database URL: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/.",2014-07-25 +24299017,Heterodimeric protein complex identification by naïve Bayes classifiers.,"

Background

Protein complexes are basic cellular entities that carry out the functions of their components. It can be found that in databases of protein complexes of yeast like CYC2008, the major type of known protein complexes is heterodimeric complexes. Although a number of methods for trying to predict sets of proteins that form arbitrary types of protein complexes simultaneously have been proposed, it can be found that they often fail to predict heterodimeric complexes.

Results

In this paper, we have designed several features characterizing heterodimeric protein complexes based on genomic data sets, and proposed a supervised-learning method for the prediction of heterodimeric protein complexes. This method learns the parameters of the features, which are embedded in the naïve Bayes classifier. The log-likelihood ratio derived from the naïve Bayes classifier with the parameter values obtained by maximum likelihood estimation gives the score of a given pair of proteins to predict whether the pair is a heterodimeric complex or not. A five-fold cross-validation shows good performance on yeast. The trained classifiers also show higher predictability than various existing algorithms on yeast data sets with approximate and exact matching criteria.

Conclusions

Heterodimeric protein complex prediction is a rather harder problem than heteromeric protein complex prediction because heterodimeric protein complex is topologically simpler. However, it turns out that by designing features specialized for heterodimeric protein complexes, predictability of them can be improved. Thus, the design of more sophisticate features for heterodimeric protein complexes as well as the accumulation of more accurate and useful genome-wide data sets will lead to higher predictability of heterodimeric protein complexes. Our tool can be downloaded from http://imi.kyushu-u.ac.jp/~om/.",2013-12-03 +25499399,Increasing fluid intake and reducing dehydration risk in older people living in long-term care: a systematic review.,"

Objective

To assess the efficacy of interventions and environmental factors on increasing fluid intake or reducing dehydration risk in older people living in long-term care facilities.

Design

Systematic review of intervention and observational studies.

Data sources

Thirteen electronic databases were searched from inception until September 2013 in all languages. References of included papers and reviews were checked.

Eligibility criteria

Intervention and observational studies investigating modifiable factors to increase fluid intake and/or reduce dehydration risk in older people (≥65 years) living in long-term care facilities who could drink orally.

Review methods

Two reviewers independently screened, selected, abstracted data, and assessed risk of bias from included studies; narrative synthesis was performed.

Results

A total of 4328 titles and abstracts were identified, 325 full-text articles were obtained and 23 were included in the review. Nineteen intervention and 4 observational studies from 7 countries investigated factors at the resident, institutional, or policy level. Overall, the studies were at high risk of bias due to selection and attrition bias and lack of valid outcome measures of fluid intake and dehydration assessment. Reported findings from 6 of the 9 intervention studies investigating the effect of multicomponent strategies on fluid intake or dehydration described a positive effect. Components included greater choice and availability of beverages, increased staff awareness, and increased staff assistance with drinking and toileting. Implementation of the US Resident Assessment Instrument reduced dehydration prevalence from 3% to 1%, P = .01. Two smaller studies reported positive effects: one on fluid intake in 9 men with Alzheimer disease using high-contrast red cups, the other involved supplementing 13 mildly dehydrated residents with oral hydration solution over 5 days to reduce dehydration. Modifications to the dining environment, advice to residents, presentation of beverages, and mode of delivery (straw vs beaker; prethickened drinks vs those thickened at the bedside) were inconclusive. Two large observational studies with good internal validity investigated effects of ownership; in Canada, for-profit ownership was associated with increased hospital admissions for dehydration; no difference was seen in dehydration prevalence between US for-profit and not-for-profit homes, although chain facilities were associated with lower odds of dehydration. This US study did not suggest any effect of staffing levels on dehydration prevalence.

Conclusions

A wide range of interventions and exposures were identified, but the efficacy of many strategies remains unproven due to the high risk of bias present in many studies. Reducing dehydration prevalence in long-term care facilities is likely to require multiple strategies involving policymakers, management, and care staff, but these require further investigation using more robust study methodologies. The review protocol was registered with the International Prospective Register of Systematic Reviews (http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42012003100).",2014-12-12 +29103755,A failure-type specific risk prediction tool for selection of head-and-neck cancer patients for experimental treatments.,"OBJECTIVES:The objective of this work was to develop a tool for decision support, providing simultaneous predictions of the risk of loco-regional failure (LRF) and distant metastasis (DM) after definitive treatment for head-and-neck squamous cell carcinoma (HNSCC). MATERIALS AND METHODS:Retrospective data for 560HNSCC patients were used to generate a multi-endpoint model, combining three cause-specific Cox models (LRF, DM and death with no evidence of disease (death NED)). The model was used to generate risk profiles of patients eligible for/included in a de-intensification study (RTOG 1016) and a dose escalation study (CONTRAST), respectively, to illustrate model predictions versus classic inclusion/exclusion criteria for clinical trials. The model is published as an on-line interactive tool (https://katrin.shinyapps.io/HNSCCmodel/). RESULTS:The final model included pre-selected clinical variables (tumor subsite, T stage, N stage, smoking status, age and performance status) and one additional variable (tumor volume). The treatment failure discrimination ability of the developed model was superior of that of UICC staging, 8th edition (AUCLRF=72.7% vs 64.2%, p<0.001 and AUCDM=70.7% vs 58.8%, p<0.001). Using the model for trial inclusion simulation, it was found that 14% of patients eligible for the de-intensification study had>20% risk of tumor relapse. Conversely, 9 of the 15 dose escalation trial participants had LRF risks<20%. CONCLUSION:A multi-endpoint model was generated and published as an on-line interactive tool. Its potential in decision support was illustrated by generating risk profiles for patients eligible for/included in clinical trials for HNSCC.",2017-09-28 +27587681,Gene-set association tests for next-generation sequencing data.,"

Motivation

Recently, many methods have been developed for conducting rare-variant association studies for sequencing data. These methods have primarily been based on gene-level associations but have not been proven to be as effective as expected. Gene-set-level tests have shown great advantages over gene-level tests in terms of power and robustness, because complex diseases are often caused by multiple genes that comprise of biological gene sets.

Results

Here, we propose several novel gene-set tests that employ rapid and efficient dimensionality reduction. The performance of these tests was investigated using extensive simulations and application to 1058 whole-exome sequences from a Korean population. We identified some known pathways and novel pathways whose rare or common variants are associated with elevated liver enzymes and replicated the results in an independent cohort.

Availability and implementation

Source R code for our algorithm is freely available at http://statgen.snu.ac.kr/software/QTest

Contact

tspark@stats.snu.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +22934099,Predicting and analyzing protein phosphorylation sites in plants using musite.,"Although protein phosphorylation sites can be reliably identified with high-resolution mass spectrometry, the experimental approach is time-consuming and resource-dependent. Furthermore, it is unlikely that an experimental approach could catalog an entire phosphoproteome. Computational prediction of phosphorylation sites provides an efficient and flexible way to reveal potential phosphorylation sites and provide hypotheses in experimental design. Musite is a tool that we previously developed to predict phosphorylation sites based solely on protein sequence. However, it was not comprehensively applied to plants. In this study, the phosphorylation data from Arabidopsis thaliana, B. napus, G. max, M. truncatula, O. sativa, and Z. mays were collected for cross-species testing and the overall plant-specific prediction as well. The results show that the model for A. thaliana can be extended to other organisms, and the overall plant model from Musite outperforms the current plant-specific prediction tools, Plantphos, and PhosphAt, in prediction accuracy. Furthermore, a comparative study of predicted phosphorylation sites across orthologs among different plants was conducted to reveal potential evolutionary features. A bipolar distribution of isolated, non-conserved phosphorylation sites, and highly conserved ones in terms of the amino acid type was observed. It also shows that predicted phosphorylation sites conserved within orthologs do not necessarily share more sequence similarity in the flanking regions than the background, but they often inherit protein disorder, a property that does not necessitate high sequence conservation. Our analysis also suggests that the phosphorylation frequencies among serine, threonine, and tyrosine correlate with their relative proportion in disordered regions. Musite can be used as a web server (http://musite.net) or downloaded as an open-source standalone tool (http://musite.sourceforge.net/).",2012-08-21 +27187034,"MORTALITY OF SELECTED AVIAN ORDERS SUBMITTED TO A WILDLIFE DIAGNOSTIC LABORATORY (SOUTHEASTERN COOPERATIVE WILDLIFE DISEASE STUDY, USA): A 36-YEAR RETROSPECTIVE ANALYSIS.","To determine the relative importance of mortality factors for birds and to assess for patterns in avian mortality over time, we retrospectively examined data of birds submitted to the Southeastern Cooperative Wildlife Disease Study (SCWDS; http://vet.uga.edu/scwds ), US, from 1976 to 2012. During this period, SCWDS, a wildlife diagnostic laboratory, received 2,583 wild bird specimens, from the taxonomic orders Apodiformes, Caprimulgiformes, Cuculiformes, Passeriformes, and Piciformes, originating from 22 states. Data from 2,001 of these birds were analyzed using log-linear models to explore correlations between causes of mortality, taxonomic family, demography, geographic location, and seasonality. Toxicosis was the major cause of mortality, followed by trauma, bacterial infection, physiologic stress, viral infection, and other (mortality causes with low sample numbers and etiologies inconsistent with established categories). Birds submitted during fall and winter had a higher frequency of parasitic infections, trauma, and toxicoses, whereas birds submitted during the spring and summer were more likely to die of an infectious disease, physiologic stress, or trauma. We noted a decrease in toxicoses concurrent with an increase in bacterial infections and trauma diagnoses after the mid-1990s. Toxicosis was the most commonly diagnosed cause of death among adult birds; the majority of juveniles died from physiologic stress, trauma, or viral infections. Infectious agents were diagnosed more often within the families Cardinalidae and Fringilidae, whereas noninfectious etiologies were the primary diagnoses in the Bombycillidae, Parulidae, Sturnidae, Turdidae, and Icteridae. There are important inherent limitations in the examination of data from diagnostic labs, as submission of cases varies in timing, frequency, location, and species and is often influenced by several factors, including media coverage of high-profile mortality events. Notwithstanding, our data provide a rare opportunity to examine long-term, regional, and temporal patterns in causes of avian mortality, and they allow for the analysis of novel and rare mortality factors.",2016-05-17 +26819718,Prescription rate of medications potentially contributing to lower urinary tract symptoms and detection of adverse reactions by prescription sequence symmetry analysis.,"

Background

The lower urinary tract symptoms (LUTS) increases with age and can have a significant effect on the quality of life of the patients. Elderly patients, who are often characterized by a decline in physiological functional and polypharmacy, are susceptible to adverse drug reactions to pharmacotherapy. LUTS can also be a side effect of medication. The purpose of this study was to investigate the possible association between the initiation of LUTS-causing drug therapy and the onset of LUTS.

Methods

Drug dispensing data at the individual level were retrieved from the CISA (Platform for Clinical Information Statistical Analysis: http://www.cisa.jp) database. A retrospective study was conducted by reviewing patients with LUTS who were dispensed drugs that increased the risk of LUTS between April 2011 and March 2012. Prescription sequence symmetry analysis (PSSA) was employed to investigate the associations between the dispensing of medicines of LUTS and that of LUTS-causing drugs.

Results

LUTS-causing drugs were frequently dispensed to patients with LUTS. The use of medications potentially contributing to LUTS was associated with polypharmacy [number of prescription drugs:12.13 ± 6.78 (user) vs. 5.67 ± 5.24 (nonuser)] but not patient age [ age: (71.38 ± 13.28 (user) vs. 70.45 ± 14.80 (nonuser)]. Significant adverse drug events were observed the use of donepezil, cyclophosphamide, antiparkinson drugs, antidepressant, diazepam, antipsychotic drugs for peptic ulcer, tiotropium bromide, and opioids.

Conclusions

The use of prescription LUTS-causing drugs was correlated with polypharmacy. The adverse drug events associated with LUTS-causing drugs were highly prevalent in elderly patients. To prevent of adverse drug events in patients with LUTS, pharmacists and physicians should regularly review medication lists and reduce the prescribed medicines.",2015-02-15 +27655048,modPDZpep: a web resource for structure based analysis of human PDZ-mediated interaction networks.,"

Background

PDZ domains recognize short sequence stretches usually present in C-terminal of their interaction partners. Because of the involvement of PDZ domains in many important biological processes, several attempts have been made for developing bioinformatics tools for genome-wide identification of PDZ interaction networks. Currently available tools for prediction of interaction partners of PDZ domains utilize machine learning approach. Since, they have been trained using experimental substrate specificity data for specific PDZ families, their applicability is limited to PDZ families closely related to the training set. These tools also do not allow analysis of PDZ-peptide interaction interfaces.

Results

We have used a structure based approach to develop modPDZpep, a program to predict the interaction partners of human PDZ domains and analyze structural details of PDZ interaction interfaces. modPDZpep predicts interaction partners by using structural models of PDZ-peptide complexes and evaluating binding energy scores using residue based statistical pair potentials. Since, it does not require training using experimental data on peptide binding affinity, it can predict substrates for diverse PDZ families. Because of the use of simple scoring function for binding energy, it is also fast enough for genome scale structure based analysis of PDZ interaction networks. Benchmarking using artificial as well as real negative datasets indicates good predictive power with ROC-AUC values in the range of 0.7 to 0.9 for a large number of human PDZ domains. Another novel feature of modPDZpep is its ability to map novel PDZ mediated interactions in human protein-protein interaction networks, either by utilizing available experimental phage display data or by structure based predictions.

Conclusions

In summary, we have developed modPDZpep, a web-server for structure based analysis of human PDZ domains. It is freely available at http://www.nii.ac.in/modPDZpep.html or http://202.54.226.235/modPDZpep.html .

Reviewers

This article was reviewed by Michael Gromiha and Zoltán Gáspári.",2016-09-21 +27207945,TarPmiR: a new approach for microRNA target site prediction.,"

Motivation

The identification of microRNA (miRNA) target sites is fundamentally important for studying gene regulation. There are dozens of computational methods available for miRNA target site prediction. Despite their existence, we still cannot reliably identify miRNA target sites, partially due to our limited understanding of the characteristics of miRNA target sites. The recently published CLASH (crosslinking ligation and sequencing of hybrids) data provide an unprecedented opportunity to study the characteristics of miRNA target sites and improve miRNA target site prediction methods.

Results

Applying four different machine learning approaches to the CLASH data, we identified seven new features of miRNA target sites. Combining these new features with those commonly used by existing miRNA target prediction algorithms, we developed an approach called TarPmiR for miRNA target site prediction. Testing on two human and one mouse non-CLASH datasets, we showed that TarPmiR predicted more than 74.2% of true miRNA target sites in each dataset. Compared with three existing approaches, we demonstrated that TarPmiR is superior to these existing approaches in terms of better recall and better precision.

Availability and implementation

The TarPmiR software is freely available at http://hulab.ucf.edu/research/projects/miRNA/TarPmiR/ CONTACTS: haihu@cs.ucf.edu or xiaoman@mail.ucf.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-20 +25315903,Investigation of phosphoproteome in RAGE signaling.,"The receptor for advanced glycation end products (RAGE) is one of the most important proteins implicated in diabetes, cardiovascular diseases, neurodegenerative diseases, and cancer. It is a pattern recognition receptor by virtue of its ability to interact with multiple ligands, RAGE activates several signal transduction pathways through involvement of various kinases that phosphorylate their respective substrates. Only few substrates have been known to be phosphorylated in response to activation by RAGE (e.g., nuclear factor kappa B); however, it is possible that these kinases can phosphorylate multiple substrates depending upon their expression and localization, leading to altered cellular responses in different cell types and conditions. One such example is, glycogen synthase kinase 3 beta which is known to phosphorylate glycogen synthase, acts downstream to RAGE, and hyperphosphorylates microtubule-associated protein tau causing neuronal damage. Thus, it is important to understand the role of various RAGE-activated kinases and their substrates. Therefore, we have reviewed here the details of RAGE-activated kinases in response to different ligands and their respective phosphoproteome. Furthermore, we discuss the analysis of the data mined for known substrates of these kinases from the PhosphoSitePlus (http://www.phosphosite.org) database, and the role of some of the important substrates involved in cancer, diabetes, cardiovascular diseases, and neurodegenerative diseases. In summary, this review provides information on RAGE-activated kinases and their phosphoproteome, which will be helpful in understanding the possible role of RAGE and its ligands in progression of diseases.",2014-12-17 +27974309,Analytical ingredient content and variability of adult multivitamin/mineral products: national estimates for the Dietary Supplement Ingredient Database.,"

Background

Multivitamin/mineral products (MVMs) are the dietary supplements most commonly used by US adults. During manufacturing, some ingredients are added in amounts exceeding the label claims to compensate for expected losses during the shelf life. Establishing the health benefits and harms of MVMs requires accurate estimates of nutrient intake from MVMs based on measures of actual rather than labeled ingredient amounts.

Objectives

Our goals were to determine relations between analytically measured and labeled ingredient content and to compare adult MVM composition with Recommended Dietary Allowances (RDAs) and Tolerable Upper Intake Levels.

Design

Adult MVMs were purchased while following a national sampling plan and chemically analyzed for vitamin and mineral content with certified reference materials in qualified laboratories. For each ingredient, predicted mean percentage differences between analytically obtained and labeled amounts were calculated with the use of regression equations.

Results

For 12 of 18 nutrients, most products had labeled amounts at or above RDAs. The mean measured content of all ingredients (except thiamin) exceeded labeled amounts (overages). Predicted mean percentage differences exceeded labeled amounts by 1.5-13% for copper, manganese, magnesium, niacin, phosphorus, potassium, folic acid, riboflavin, and vitamins B-12, C, and E, and by ∼25% for selenium and iodine, regardless of labeled amount. In contrast, thiamin, vitamin B-6, calcium, iron, and zinc had linear or quadratic relations between the labeled and percentage differences, with ranges from -6.5% to 8.6%, -3.5% to 21%, 7.1% to 29.3%, -0.5% to 16.4%, and -1.9% to 8.1%, respectively. Analytically adjusted ingredient amounts are linked to adult MVMs reported in the NHANES 2003-2008 via the Dietary Supplement Ingredient Database (http://dsid.usda.nih.gov) to facilitate more accurate intake quantification.

Conclusions

Vitamin and mineral overages were measured in adult MVMs, most of which already meet RDAs. Therefore, nutrient overexposures from supplements combined with typical food intake may have unintended health consequences, although this would require further examination.",2016-12-14 +23894186,BGDB: a database of bivalent genes.,"Bivalent gene is a gene marked with both H3K4me3 and H3K27me3 epigenetic modification in the same area, and is proposed to play a pivotal role related to pluripotency in embryonic stem (ES) cells. Identification of these bivalent genes and understanding their functions are important for further research of lineage specification and embryo development. So far, lots of genome-wide histone modification data were generated in mouse and human ES cells. These valuable data make it possible to identify bivalent genes, but no comprehensive data repositories or analysis tools are available for bivalent genes currently. In this work, we develop BGDB, the database of bivalent genes. The database contains 6897 bivalent genes in human and mouse ES cells, which are manually collected from scientific literature. Each entry contains curated information, including genomic context, sequences, gene ontology and other relevant information. The web services of BGDB database were implemented with PHP + MySQL + JavaScript, and provide diverse query functions. Database URL: http://dailab.sysu.edu.cn/bgdb/",2013-07-26 +25093074,"Evolving BioAssay Ontology (BAO): modularization, integration and applications.","The lack of established standards to describe and annotate biological assays and screening outcomes in the domain of drug and chemical probe discovery is a severe limitation to utilize public and proprietary drug screening data to their maximum potential. We have created the BioAssay Ontology (BAO) project (http://bioassayontology.org) to develop common reference metadata terms and definitions required for describing relevant information of low-and high-throughput drug and probe screening assays and results. The main objectives of BAO are to enable effective integration, aggregation, retrieval, and analyses of drug screening data. Since we first released BAO on the BioPortal in 2010 we have considerably expanded and enhanced BAO and we have applied the ontology in several internal and external collaborative projects, for example the BioAssay Research Database (BARD). We describe the evolution of BAO with a design that enables modeling complex assays including profile and panel assays such as those in the Library of Integrated Network-based Cellular Signatures (LINCS). One of the critical questions in evolving BAO is the following: how can we provide a way to efficiently reuse and share among various research projects specific parts of our ontologies without violating the integrity of the ontology and without creating redundancies. This paper provides a comprehensive answer to this question with a description of a methodology for ontology modularization using a layered architecture. Our modularization approach defines several distinct BAO components and separates internal from external modules and domain-level from structural components. This approach facilitates the generation/extraction of derived ontologies (or perspectives) that can suit particular use cases or software applications. We describe the evolution of BAO related to its formal structures, engineering approaches, and content to enable modeling of complex assays and integration with other ontologies and datasets.",2014-06-03 +29410384,Ambient Air Pollution and Chronic Bronchitis in a Cohort of U.S. Women.,"

Background

Limited evidence links air pollution exposure to chronic cough and sputum production. Few reports have investigated the association between long-term exposure to air pollution and classically defined chronic bronchitis.

Objectives

Our objective was to estimate the association between long-term exposure to particulate matter (diameter <10 μm, PM10; <2.5μm, PM2.5), nitrogen dioxide (NO2), and both incident and prevalent chronic bronchitis.

Methods

We estimated annual average PM2.5, PM10, and NO2 concentrations using a national land-use regression model with spatial smoothing at home addresses of participants in a prospective nationwide U.S. cohort study of sisters of women with breast cancer. Incident chronic bronchitis and prevalent chronic bronchitis, cough and phlegm, were assessed by questionnaires.

Results

Among 47,357 individuals with complete data, 1,383 had prevalent chronic bronchitis at baseline, and 647 incident cases occurred over 5.7-y average follow-up. No associations with incident chronic bronchitis were observed. Prevalent chronic bronchitis was associated with PM10 [adjusted odds ratio (aOR) per interquartile range (IQR) difference (5.8μg/m3)=1.07; 95% confidence interval (CI): 1.01, 1.13]. In never-smokers, PM2.5 was associated with prevalent chronic bronchitis (aOR=1.18 per IQR difference; 95% CI: 1.04, 1.34), and NO2 was associated with prevalent chronic bronchitis (aOR=1.10; 95% CI=1.01, 1.20), cough (aOR=1.10; 95% CI: 1.05, 1.16), and phlegm (aOR=1.07; 95% CI: 1.01, 1.14); interaction p-values (nonsmokers vs. smokers) <0.05.

Conclusions

PM10 exposure was related to chronic bronchitis prevalence. Among never-smokers, PM2.5 and NO2 exposure was associated with chronic bronchitis and component symptoms. Results may have policy ramifications for PM10 regulation by providing evidence for respiratory health effects related to long-term PM10 exposure. https://doi.org/10.1289/EHP2199.",2018-02-06 +28831411,Whole transcriptome data of zebrafish exposed to chronic dose of depleted uranium.,"The concentration of depleted uranium (DU) in the environment is expected to increase due to anthropogenic activities, posing potential risks on ecosystems. The effects of chronic exposure to DU at concentration close to the environmental standards (0.3-30 µg DU/L) are scarcely characterised. Genomic alterations caused by low doses of pollutants can potentially propagate over generations, but how these effects may affect the health of the progeny remain uncertain for the vast majority of toxicants. The present dataset describes the transcriptomic effects of a chronic exposure to 20 µg DU/L during 10 days on adult zebrafish (Danio rerio) organs, the brain, the testis and the ovaries. The potential multigenerational effects of DU were assessed on the progeny of the adult exposed fish at the two-cells stage and after four days of development. We describe in this article the summary statistics of the differential gene expression analysis and focus on key molecular pathways affected by an exposure to a low concentration of DU. The data presented in this study supports the observation made in Armant et al. (2017) [1] (https://doi.org/10.1016/j.dib.2016.05.007) that DU can induce a molecular stress in both adult zebrafish and their progeny. The raw dataset has been deposited at the Gene Expression Omnibus (GEO) repository under the accession number GEO:GSE96603.",2017-07-28 +27560171,"Transcript-level expression analysis of RNA-seq experiments with HISAT, StringTie and Ballgown.","High-throughput sequencing of mRNA (RNA-seq) has become the standard method for measuring and comparing the levels of gene expression in a wide variety of species and conditions. RNA-seq experiments generate very large, complex data sets that demand fast, accurate and flexible software to reduce the raw read data to comprehensible results. HISAT (hierarchical indexing for spliced alignment of transcripts), StringTie and Ballgown are free, open-source software tools for comprehensive analysis of RNA-seq experiments. Together, they allow scientists to align reads to a genome, assemble transcripts including novel splice variants, compute the abundance of these transcripts in each sample and compare experiments to identify differentially expressed genes and transcripts. This protocol describes all the steps necessary to process a large set of raw sequencing reads and create lists of gene transcripts, expression levels, and differentially expressed genes and transcripts. The protocol's execution time depends on the computing resources, but it typically takes under 45 min of computer time. HISAT, StringTie and Ballgown are available from http://ccb.jhu.edu/software.shtml.",2016-08-11 +24972631,Insulin and risk of diabetic retinopathy in patients with type 2 diabetes mellitus: data from a meta-analysis of seven cohort studies.,"

Background

Type 2 diabetes mellitus (T2DM) is a chronic incurable disease associated with multi-systemic complications. The chronic complications related to T2DM induce growing burden to the national health system. Diabetic retinopathy (DR) is the most serious ocular complication associated with T2DM and one of the leading causes of secondary blindness. The association between insulin use and DR risk has also been reported in different studies.

Methods

In order to obtain more informative results on the relationship between insulin intake and risk of DR and to take into account more recent evidence, we conducted this meta-analysis by including all available relevant cohort studies. A systemic literature search was performed via electronic databases inclu-apding Pubmed and EMBASE to identify all available relevant studies until February 2014. A total of seven cohort studies were included in this meta-analysis. In this meta-analysis, we conducted a rigorous search of all available published cohort studies to quantify the possible association between insulin use and incidental DR in individuals with type 2 diabetes.

Results

Although major heterogeneity existed in this study, the significant association between insulin use and risk of DR was detected. The subgroup analyses by study design, region, data source and adjustment of HbA1c generated similar results. Also, when the DM duration was adjusted, no result was reported with significant difference.

Conclusion

The results of this meta-analysis helps to better explore the role of insulin use in DR risk development. Meanwhile, our results are statistically robust and yield important conclusions. The underlying mechanism by which insulin use increases DR risk should be explored in future in vitro and in vivo studies. Additional large-scale, well-designed studies with sufficient data are needed to confirm our findings.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2003724731291657.",2014-06-27 +27682468,Botryosphaeria dothidea: a latent pathogen of global importance to woody plant health.,"Botryosphaeria dothidea is the type species of Botryosphaeria (Botryosphaeriaceae, Botryosphaeriales). Fungi residing in this order are amongst the most widespread and important canker and dieback pathogens of trees worldwide, with B. dothidea one of the most common species on a large number of hosts. Its taxonomic circumscription has undergone substantial change in the past decade, making it difficult to interpret the large volume of literature linked to the name B. dothidea. This pathogen profile synthesizes the current understanding of B. dothidea pertaining to its distribution, host associations and role as a pathogen in managed and natural woody environments. The prolonged latent infection or endophytic phase is of particular importance, as it implies that the fungus can easily pass undetected by quarantine systems in traded living plants, fruits and other plant parts. Infections typically become obvious only under conditions of host stress, when disease symptoms develop. This study also considers the knowledge emerging from the recently sequenced B. dothidea genome, elucidating previously unknown aspects of the species, including mating and host infection strategies. Despite more than 150 years of research on B. dothidea, there is clearly much to be learned regarding this global tree pathogen. This is increasingly important given the stresses imposed on various woody hosts as a result of climate change.

Taxonomy

Botryosphaeria dothidea (Moug. ex Fr) Ces. & De Not, 1863. Kingdom Fungi, Phylum Ascomycota, Class Dothideomycetes, Order Botryosphaeriales, Family Botryosphaeriaceae, Genus Botryosphaeria, Species dothidea.

Host range

Confirmed on more than 24 host genera, including woody plants, such as Acacia (= Vachellia), Eucalyptus, Vitis and Pistachio.

Disease symptoms

Associated with twig, branch and stem cankers, tip and branch dieback, fruit rot, blue stain and plant death.

Useful websites

The Botryosphaeria site for detailed morphological descriptions (http://www.crem.fct.unl.pt/botryosphaeria_site/); Systematic Mycology and Microbiology Laboratory Fungal Database for all literature and associated hosts (https://nt.ars-grin.gov/fungaldatabases/); TreeBASE link for the combined ITS and TEF-1α tree (http://purl.org/phylo/treebase/phylows/study/TB2:S18906); DOE Joint Genome Institute, JGI Mycocosm for the Botryosphaeria dothidea genome (http://genome.jgi.doe.gov/Botdo1_1/Botdo1_1.home.html).",2016-12-13 +27976886,MIB: Metal Ion-Binding Site Prediction and Docking Server.,"The structure of a protein determines its biological function(s) and its interactions with other factors; the binding regions tend to be conserved in sequence and structure, and the interacting residues involved are usually in close 3D space. The Protein Data Bank currently contains more than 110 000 protein structures, approximately one-third of which contain metal ions. Identifying and characterizing metal ion-binding sites is thus essential for investigating a protein's function(s) and interactions. However, experimental approaches are time-consuming and costly. The web server reported here was built to predict metal ion-binding residues and to generate the predicted metal ion-bound 3D structure. Binding templates have been constructed for regions that bind 12 types of metal ion-binding residues have been used to construct binding templates. The templates include residues within 3.5 Å of the metal ion, and the fragment transformation method was used for structural comparison between query proteins and templates without any data training. Through the adjustment of scoring functions, which are based on the similarity of structure and binding residues. Twelve kinds of metal ions (Ca2+, Cu2+, Fe3+, Mg2+, Mn2+, Zn2+, Cd2+, Fe2+, Ni2+, Hg2+, Co2+, and Cu+) binding residues prediction are supported. MIB also provides the metal ions docking after prediction. The MIB server is available at http://bioinfo.cmu.edu.tw/MIB/ .",2016-12-15 +24279471,Prognostic value of a nine-gene signature in glioma patients based on mRNA expression profiling.,"

Introduction

Gliomas are the most common primary brain tumors in adults and a significant cause of cancer-related mortality. A 9-gene signature was identified as a novel prognostic model reflecting survival situation obviously in gliomas.

Aims

To identify an mRNA expression signature to improve outcome prediction for patients with different glioma grades.

Results

We used whole-genome mRNA expression microarray data of 220 glioma samples of all grades from the Chinese Glioma Genome Atlas (CGGA) database (http://www.cgga.org.cn) as a discovery set and data from Rembrandt and GSE16011 for validation sets. Data from every single grade were analyzed by the Kaplan-Meier method with a two-sided log-rank test. Univariate Cox regression and linear risk score formula were applied to derive a gene signature with better prognostic performance. We found that patients who had high risk score according to the signature had poor overall survival compared with patients who had low risk score. Highly expressed genes in the high-risk group were analyzed by gene ontology (GO) and gene set variation analysis (GSVA). As a result, the reason for the divisibility of gliomas was likely due to cell life processes and adhesion.

Conclusion

This 9-gene-signature prediction model provided a more accurate predictor of prognosis that denoted patients with high risk score have poor outcome. Moreover, these risk models based on defined molecular profiles showed the considerable prospect in personalized cancer management.",2013-11-27 +27153577,"Analysis of CFSE time-series data using division-, age- and label-structured population models.","

Motivation

In vitro and in vivo cell proliferation is often studied using the dye carboxyfluorescein succinimidyl ester (CFSE). The CFSE time-series data provide information about the proliferation history of populations of cells. While the experimental procedures are well established and widely used, the analysis of CFSE time-series data is still challenging. Many available analysis tools do not account for cell age and employ optimization methods that are inefficient (or even unreliable).

Results

We present a new model-based analysis method for CFSE time-series data. This method uses a flexible description of proliferating cell populations, namely, a division-, age- and label-structured population model. Efficient maximum likelihood and Bayesian estimation algorithms are introduced to infer the model parameters and their uncertainties. These methods exploit the forward sensitivity equations of the underlying partial differential equation model for efficient and accurate gradient calculation, thereby improving computational efficiency and reliability compared with alternative approaches and accelerating uncertainty analysis. The performance of the method is assessed by studying a dataset for immune cell proliferation. This revealed the importance of different factors on the proliferation rates of individual cells. Among others, the predominate effect of cell age on the division rate is found, which was not revealed by available computational methods.

Availability and implementation

The MATLAB source code implementing the models and algorithms is available from http://janhasenauer.github.io/ShAPE-DALSP/Contact: jan.hasenauer@helmholtz-muenchen.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-08 +29320006,Clinical Effectiveness and Cost Effectiveness of Intracoronary Brachytherapy and Drug Eluting Stents,"

Background

Restenosis is one of the most important problems limiting the long-term success of coronary angioplasty. Use of stents has successfully reduced the problem of restenosis, on average from 22 % to 32 %, although with higher values for patients in high risk groups. The use of stents however, has led to the challenge of handling in-stent restenosis. In-stent restenosis is the result of a process called intima hyperplasia whereby local cell activation and inflammation stimulates growth of smooth muscle cells and deposition of extracellular matrix within the vessel lumen. Approaches to combat the problem of restenosis such as systemic or local drug administration or intracoronary brachytherapy have had modest success. A recent approach is the use of drug eluting stents (DES) that may interfere with the proliferative response leading to in-stent restenosis. Drug eluting stents provide a local drug reservoir that is released within a time period of 10-30 days, with no detectable systemic drug levels. Several antiproliferative agents added to different stents are under clinical investigation. Rapamycins, which includes the drugs Sirolimus, Everolimus and Tacrolimus, are immunosuppressive agents that inhibit proliferation of smooth muscle cells. Taxol-based drugs, such as 7-hexaonyltaxol or paclitaxel are cytotoxic drugs that interfere with cell proliferation, and are currently used in cancer chemotherapy. Following the recent approval of two types of drug-eluting stents in Europe and North America, drug eluting stents are rapidly disseminating throughout the health care systems in several countries. The uptake is advocated by great enthusiasm following positive results from randomised controlled trials.

Objectives

To assess the clinical effectiveness and cost effectiveness of intracoronary brachytherapy and drug eluting stents. . To discuss possible implications of these findings for the Norwegian health care setting.

Search strategy

Eligible studies were identified by searches in Medline for the period from 1966 until March 1st 2004 with the search profile: (intracoronar* or vascular or coronar*) and (radiotherapy or radiation or brachytherapy or coat* or eluting or tacrolimus or paclitaxel or sirolimus or taxol or everolimus) and stent*. Additional searches for unpublished studies and presentations from ongoing trials were undertaken at the following web-sites: TCTmd (http://www.tctmd.com/), American college of cardiology (http://www.acc.org/) and Euro PCR (http://www.europcr.com/). In addition information and results from ongoing trials were kindly provided by Johnson and Johnson and Boston Scientific. The systematic search yielded 641 references and 57 conference presentations. 24 publications and nine conference presentations were included for drug-eluting stents and 29 publications for intracoronary brachytherapy.

Inclusion criteria

Population: patients with angina, ischemia, stenosis, restenosis, in-stent restenosis or graftstenosis Intervention: intracoronary brachytherapy or drug eluting stents Outcomes: mortality, myocardial infarction, revascularisation Angiographic measures: restenosis, diameter stenosis and late loss Study design: RCT, controlled trial, case series

Exclusions

Stents with heparin, radioactive stents, liquid-balloon based brachytherapy

Data collection

All articles were independently reviewed by at least two authors. The final set was agreed by consensus. Methodological quality was assessed according criteria used by the Norwegian Centre for Health Technology Assessment (based on Cochrane reviews handbook and CRD guidelines) supplemented with clinical criteria defined by the review group.

Results

Evidenstables with data from included studies and results of assessment are shown in attachment 4 (in English).

Intracoronary brachytherapy

We identified nine RCTs comparing intracoronary brachytherapy with placebo treatment for patients with in-stent restenosis. Intracoronary brachytherapy (beta- or gamma radiation) reduced the risk for revascularisation by 34-44 % compared with placebo after 1 year follow up. The effect was maintained also after 5 years follow up in two gamma brachytherapy studies. The evidence regarding effect on death or myocardial infarction was insufficient for conclusions. No study or the metaanalysis of these studies had sufficient statistical power to analyse effect on clinical outcomes. Brachytherapy was associated with an increased risk of late thrombosis RR2. 18 (1.00-5.33) after 9-12 months follow up.

Drug eluting stents

We identified 13 RCTs that compared drug eluting stents with bare metal stents with over 5 000 patients included. All studies were randomised placebo controlled clinical trials. Six trials evaluated paclitaxel-eluting stents, and six trials evaluated rapamycin-eluting stents. Most studies included patients with short lesions (< 15 mm) in large vessels (> 2.8 mm) in native coronary arteries, although three RCTs included patients with long lesions and / or small coronary arteries. Except for one small study all studies included patients with diabetes. Patients with thrombus, acute myocardial infarction were excluded in these studies. Seven non-controlled trials were identified that evaluated drug eluting stents for indications not included in RCTs. Mortality is a rare event following PCI, and none of the included studies had statistical power to assess effect on mortality. The combined estimate for all-cause mortality after 9-12 months follow up was 1.1 % in DES group and 0.7 % in the BMS group, with a combined RR of 1.56 (95 % CI 0.63-3.87). After two years follow up the RR for all cause mortality was 1.39 (0.75-2.58). The combined rates for cardiac mortality was 0.8 % in DES group and 0.9 % in BMS group after 9-12 months follow up, RR 0.88 (95 % CI 0.39-1.95), with similar rates also after 2 years follow up. The relative risk for cardiac mortality was 0.83 (0.41-1.69) after 2 years follow up based on data from almost 2000 patients. While all sirolimus trials reported all cause mortality, most paclitaxel trials reported cardiac mortality only. We do not know how paclitaxel eluting stents impact on all-cause mortality, or whether the direction of effect is confirmative or opposite to what has been reported in sirolimus trials. Thus we do not have sufficient data for conclusions regarding the long term safety of drug eluting stents with respect to mortality. There was no effect on rates of myocardial infarction. The combined risk estimate for paclitaxel studies after 1 year follow up was RR 0.94 (95 % CI0. 62-1.44) and for rapamycin studies RR 0.98 (95 % CI 0.58-1.66) after 1 year follow up, with similar figures for 2-3 years follow up in rapamycin studies. Thrombosis has been a concern because of the increased risk of thrombosis following intrakoronar brachytherapy. Thrombosis was a rare event and the metaanalysis of these studies showed apparently no difference between groups: late thrombosis was reported for 0.6 % of patients treated with DES and 0.8 % of patients given BMS, RR 0.98 (95 % CI 0.46-2.06). Drug eluting stents, whether rapamycin or paclitaxel, reduced rates of revascularisation for a follow up of 6 to 36 months. The combined results 72 from all studies showed an absolute reduction in retintervention of 9.4 %, RR 0.37 (0.24-0.56). Importantly, this effect was shown to be maintained also after 2 years follow up. There were 147 reintervention in the DES group (n=1801) compared with 373 reinterventions in the BMS group (n=1727), (RR 0.36 (0.25-0.50) p< 0.0001). Reintervention was reported for restenosis in the stent, lesion or target vessel, and were clinically driven according to FDAs criteria in five trials but not stated in seven trials. Several studies report subgroup analysis, with stratification of patients with diabetes, lesions in small vessel or long lesions. No study had power to analyse effect in subgroups. All studies showed reduced rates of revascularisation for patients with diabetes given DES compared with BMS with combined RR 0.37 (95 % CI 0.25-0.54) for 9-12 months follow up. Two studies stratified on lesion length and vessel diameter. Taxus IV found significantly reduced rates of restenosis for patients with lesions in vessel <3.0 mm RR 0.29 (95 % CI 0.19-0.52) but not for vessel ≧3.0 mm RR 0.43 (95 % CI 0.16-1.16). Sirius reported comparable results for small and large vessels with stratification on 2.75 mm. Similarly good results were reported for patients with long lesions in Taxus IV and Sirius. MACE was reported as a composite outcome of death (cardiac or all cause), myocardial infarction and revascularisation. Most studies reported significant reduction in MACE with combined RR estimated for paclitaxel studies of 0.58 (95 % CI 0.47-0.72) and rapamycin studies RR 0.34 (95 % CI 0.27-0.45) after 1 year follow up. Several ongoing or planned trials are expected to make important contributions that may influence the findings in this review. At present results from studies with approximately 5000 patients have been included in this review, in the next few years results from additional 8000 patients is expected.

Comments

Two main findings emerged from the systematic review and metaanalysis of these trials. Drug eluting stents and intracoronary brachytherapy reduced rates of revascularisation. However, possible effect on clinical outcomes such as mortality is at present insufficiently addressed. When new technologies are introduced into clinical practice, the question of clinical effectiveness and the safety of the technology need to be adressed, to ensure that patients are given efficient and safe treatment. This is especially challenging when considering fast evolving technologies such as drug eluting stents. The past history of abandoned studies calls for caution regarding the potential offset between benefit and harm. The life-span of a systematic review in a fast evolving field such as drug eluting stents is short. Several ongoing trials accounting for over 8000 patients will make important contributions regarding clinical effectiveness of this technology. The results from this systematic review may also have implications for the future reporting of outcomes from ongoing and planned clinical trials, especially the use of composite endpoints. Use of MACE as the hierarchical combination of death, myocardial infarction and revascularisation is misleading, both due the possibility of divergent effects of individual outcomes, and due to the fact that revascularisation counts equally with mortality.",2018-01-11 +,PS1-6: The Cancer Research Network: Creating New Possibilities for Cancer Prevention and Improved Cancer Outcomes,"

Background/Aims

Prevention and early detection can greatly reduce cancer-related morbidity and mortality, yet little is known about how well many interventions perform in large populations and the extent to which implementation failures might be remediable.

Methods

The NCI-funded Cancer Research Network (CRN) (U24CA17154) continues to build infrastructure for multi-site collaborations. The CRN provides unparalleled opportunities for conducting innovative cancer prevention and cancer screening research.

Results

Advantages of the CRN include: (1) A large, diverse, membership of approximately 8.5 million individuals for studying prevention and screening across population subgroups in community-based settings. (2) Close ties to CRN health plans whose systems and providers can be intervention targets critical to the success of prevention and screening efforts. (3) Access to the primary care setting enabling study of patient-provider decision making regarding prevention and screening behaviors. (4) Longitudinal data on screening contacts, interventions, and outcomes providing a unique opportunity to evaluate “real-world” screening and prevention, and a means of identifying potentially modifiable failures. (5) Biological samples of biopsies archived for many members. (6) Nationally-recognized leaders in cancer prevention and screening. The CRN investigator-led research portfolio includes expertise in tobacco control, dietary interventions, cancer screening and early detection. (7) Long-term relationships among CRN internal and external scientists to create efficient collaborations. CRN partnerships include the HMO Research Network, NCI-designated cancer centers, federal agencies, and numerous academic institutions.

Conclusions

Achieving national goals for reducing the burden of cancer will require new knowledge about how to optimize existing strategies for prevention and screening; new research focused on biologic, behavioral, pharmacologic, and molecular risk factors; and evaluation of the interactions between behavior change, personal factors, the built environment, and health care systems. For more information about the opportunities afforded by the CRN visit http://crn.cancer.gov.",2013-09-01 +,Comparison of digital image analysis using elliptic Fourier descriptors and major dimensions to phenotype seed shape in hexaploid wheat (Triticum aestivum L.),"Digital image analysis (DIA) is widely used for describing plant organ shape. However, the various types of shape descriptors that can be generated using DIA may identify different loci in genetic analyses. The purpose of this study was to evaluate two different DIA approaches to quantifying wheat seed shape for exploring trait correlations and quantitative trait loci (QTL) mapping. Phenotypic data were produced using the software programs ImageJ (National Institutes of Health, USA, http://rsbweb.nih.gov/ij/ ) and SHAPE (Hiroyoshi Iwata, http://lbm.ab.a.u-tokyo.ac.jp/≃iwata/shape/ ). ImageJ generates measures of length, width, perimeter, and area that can be used to describe dimensions of objects, whereas SHAPE generates elliptic Fourier descriptors (EFDs) to capture shape variation such as roughness, asymmetric skewing, or other two-dimensional aspects not encompassed by axes or distinctions in overall object area. There were significant differences in the results of the QTL analysis depending on the DIA software used. The use of EFDs to characterize horizontal measures of seed shape in wheat identified more QTL with higher LOD scores than length to width ratio. Additionally, the entire three dimensional shape of the seed described using two images in different orientations was shown to identify seed shape QTL that co-located with flour yield (FLYLD) and would go undetected based solely on a two dimensional image of the seed. Both methods identified QTL for length, width, thickness, and vertical perimeter that were co-localized with QTL for FLYLD.",2013-03-01 +27000293,Classification of gene expression data: A hubness-aware semi-supervised approach.,"

Background and objective

Classification of gene expression data is the common denominator of various biomedical recognition tasks. However, obtaining class labels for large training samples may be difficult or even impossible in many cases. Therefore, semi-supervised classification techniques are required as semi-supervised classifiers take advantage of unlabeled data.

Methods

Gene expression data is high-dimensional which gives rise to the phenomena known under the umbrella of the curse of dimensionality, one of its recently explored aspects being the presence of hubs or hubness for short. Therefore, hubness-aware classifiers have been developed recently, such as Naive Hubness-Bayesian k-Nearest Neighbor (NHBNN). In this paper, we propose a semi-supervised extension of NHBNN which follows the self-training schema. As one of the core components of self-training is the certainty score, we propose a new hubness-aware certainty score.

Results

We performed experiments on publicly available gene expression data. These experiments show that the proposed classifier outperforms its competitors. We investigated the impact of each of the components (classification algorithm, semi-supervised technique, hubness-aware certainty score) separately and showed that each of these components are relevant to the performance of the proposed approach.

Conclusions

Our results imply that our approach may increase classification accuracy and reduce computational costs (i.e., runtime). Based on the promising results presented in the paper, we envision that hubness-aware techniques will be used in various other biomedical machine learning tasks. In order to accelerate this process, we made an implementation of hubness-aware machine learning techniques publicly available in the PyHubs software package (http://www.biointelligence.hu/pyhubs) implemented in Python, one of the most popular programming languages of data science.",2016-02-11 +28663600,Enabling Interactive Measurements from Large Coverage Microscopy.,"Microscopy could be an important tool for characterizing stem cell products if quantitative measurements could be collected over multiple spatial and temporal scales. With the cells changing states over time and being several orders of magnitude smaller than cell products, modern microscopes are already capable of imaging large spatial areas, repeat imaging over time, and acquiring images over several spectra. However, characterizing stem cell products from such large image collections is challenging because of data size, required computations, and lack of interactive quantitative measurements needed to determine release criteria. We present a measurement web system consisting of available algorithms, extensions to a client-server framework using Deep Zoom, and the configuration know-how to provide the information needed for inspecting the quality of a cell product. The cell and other data sets are accessible via the prototype web-based system at http://isg.nist.gov/deepzoomweb.",2016-07-01 +28847712,Corrections for multiple comparisons in voxel-based lesion-symptom mapping.,"Voxel-based lesion-symptom mapping (VLSM) is an important method for basic and translational human neuroscience research. VLSM leverages modern neuroimaging analysis techniques to build on the classic approach of examining the relationship between location of brain damage and cognitive deficits. Testing an association between deficit severity and lesion status in each voxel involves very many individual tests and requires statistical correction for multiple comparisons. Several strategies have been adapted from analysis of functional neuroimaging data, though VLSM faces a more difficult trade-off between avoiding false positives and statistical power (missing true effects). We used simulated and real deficit scores from a sample of approximately 100 individuals with left hemisphere stroke to evaluate two such permutation-based approaches. Using permutation to set a minimum cluster size identified a region that systematically extended well beyond the true region, making it ill-suited to identifying brain-behavior relationships. In contrast, generalizing the standard permutation-based family-wise error correction approach provided a principled way to balance false positives and false negatives. Comparison with the widely-used parametric false discovery rate (FDR) correction showed that FDR produces anti-conservative results at smaller sample sizes (N = 30-60). An implementation of the continuous permutation-based FWER correction method described here is included in the lesymap package for lesion-symptom mapping (https://dorianps.github.io/LESYMAP/).",2017-08-26 +21605702,"miRWalk--database: prediction of possible miRNA binding sites by ""walking"" the genes of three genomes.","MicroRNAs are small, non-coding RNA molecules that can complementarily bind to the mRNA 3'-UTR region to regulate the gene expression by transcriptional repression or induction of mRNA degradation. Increasing evidence suggests a new mechanism by which miRNAs may regulate target gene expression by binding in promoter and amino acid coding regions. Most of the existing databases on miRNAs are restricted to mRNA 3'-UTR region. To address this issue, we present miRWalk, a comprehensive database on miRNAs, which hosts predicted as well as validated miRNA binding sites, information on all known genes of human, mouse and rat. All mRNAs, mitochondrial genes and 10 kb upstream flanking regions of all known genes of human, mouse and rat were analyzed by using a newly developed algorithm named 'miRWalk' as well as with eight already established programs for putative miRNA binding sites. An automated and extensive text-mining search was performed on PubMed database to extract validated information on miRNAs. Combined information was put into a MySQL database. miRWalk presents predicted and validated information on miRNA-target interaction. Such a resource enables researchers to validate new targets of miRNA not only on 3'-UTR, but also on the other regions of all known genes. The 'Validated Target module' is updated every month and the 'Predicted Target module' is updated every 6 months. miRWalk is freely available at http://mirwalk.uni-hd.de/.",2011-05-14 +28387272,Guidelines on the timing and frequency of bitewing radiography: a systematic review.,"Objectives To identify guidelines on when and how frequently bitewing radiographs should be used in dentistry for the diagnosis of caries, and to provide an objective appraisal of their quality.Data sources MEDLINE (OVID), US National Guideline Clearinghouse (www.guideline.gov) and the Royal College of Surgeons of England (https://www.rcseng.ac.uk/fds/publications-clinical-guidelines/clinical_guidelines) websites were searched using a variety of relevant search terms (2 August 2016).Data selection Publications were included if they made recommendations on the issue of when and how frequently radiographs should be used in any dentally-related specialty pertaining to the diagnosis of caries; and/or if they were aimed at the individual practitioner (any health professional working within dentistry) and/or patients.Data analysis Thirteen published guidelines were included and assessed using the AGREE II instrument.Conclusions There was a significant variation amongst the guidelines in the recommendations at what age radiography should be undertaken. There was also disagreement on the frequency of repeat radiographs and how this is influenced by the age of the patient and their caries risk.",2017-04-01 +28158457,Efficient computation of transfer free energies of amino acids in beta-barrel membrane proteins.,"

Motivation

Transmembrane beta-barrel proteins (TMBs) serve a multitude of essential cellular functions in Gram-negative bacteria, mitochondria and chloroplasts. Transfer free energies (TFEs) of residues in the transmembrane (TM) region provides fundamental quantifications of thermodynamic stabilities of TMBs, which are important for the folding and the membrane insertion processes, and may help in understanding the structure-function relationship. However, experimental measurement of TFEs of TMBs is challenging. Although a recent computational method can be used to calculate TFEs, the results of which are in excellent agreement with experimentally measured values, this method does not scale up, and is limited to small TMBs.

Results

We have developed an approximation method that calculates TFEs of TM residues in TMBs accurately, with which depth-dependent transfer free energy profiles can be derived. Our results are in excellent agreement with experimental measurements. This method is efficient and applicable to all bacterial TMBs regardless of the size of the protein.

Availability and implementation

An online webserver is available at http://tanto.bioe.uic.edu/tmb-tfe .

Contact

: jliang@uic.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +25263599,On the advantages of word frequency and contextual diversity measures extracted from subtitles: The case of Portuguese.,"We examined the potential advantage of the lexical databases using subtitles and present SUBTLEX-PT, a new lexical database for 132,710 Portuguese words obtained from a 78 million corpus based on film and television series subtitles, offering word frequency and contextual diversity measures. Additionally we validated SUBTLEX-PT with a lexical decision study involving 1920 Portuguese words (and 1920 nonwords) with different lengths in letters (M = 6.89, SD = 2.10) and syllables (M = 2.99, SD = 0.94). Multiple regression analyses on latency and accuracy data were conducted to compare the proportion of variance explained by the Portuguese subtitle word frequency measures with that accounted by the recent written-word frequency database (Procura-PALavras; P-PAL; Soares, Iriarte, et al., 2014 ). As its international counterparts, SUBTLEX-PT explains approximately 15% more of the variance in the lexical decision performance of young adults than the P-PAL database. Moreover, in line with recent studies, contextual diversity accounted for approximately 2% more of the variance in participants' reading performance than the raw frequency counts obtained from subtitles. SUBTLEX-PT is freely available for research purposes (at http://p-pal.di.uminho.pt/about/databases ).",2014-11-07 +27697219,A new biological and clinical resource for research into pregnancy complications: The Baby Bio Bank.,"About 20% of pregnancies are affected by some form of complication. Research has shown that anomalies in implantation, development, and growth of the fetus; ineffective nutrient exchange between mother and fetus due to placental dysfunction; and maternal problems such as hypertension or infection during pregnancy can all lead to adverse pregnancy outcomes. However, the molecular aetiology of such events remains poorly understood. Fetal growth restriction (FGR), recurrent miscarriage (RM), preterm birth (PTB), and pre-eclampsia (PE) are the most common pregnancy complications encountered in the UK and these outcomes can result in an array of morbidities in both mother and baby, and in the most severe cases in mortality. We need to know more about normal pregnancy and where the important triggers are for failure. This prompted us to collect a large set of biological samples with matching clinical data from over 2500 normal and abnormal pregnancies, for use in research into these conditions. This paper outlines the nature of these sample sets and their availability to academia and industry, with the intention that their widespread use in research will make significant contributions to the improvement of maternal and fetal health worldwide (http://www.ucl.ac.uk/tapb/sample-and-data-collections-at-ucl/biobanks-ucl/baby-biobank).",2016-08-24 +25505034,Plant Omics Data Center: an integrated web repository for interspecies gene expression networks with NLP-based curation.,"Comprehensive integration of large-scale omics resources such as genomes, transcriptomes and metabolomes will provide deeper insights into broader aspects of molecular biology. For better understanding of plant biology, we aim to construct a next-generation sequencing (NGS)-derived gene expression network (GEN) repository for a broad range of plant species. So far we have incorporated information about 745 high-quality mRNA sequencing (mRNA-Seq) samples from eight plant species (Arabidopsis thaliana, Oryza sativa, Solanum lycopersicum, Sorghum bicolor, Vitis vinifera, Solanum tuberosum, Medicago truncatula and Glycine max) from the public short read archive, digitally profiled the entire set of gene expression profiles, and drawn GENs by using correspondence analysis (CA) to take advantage of gene expression similarities. In order to understand the evolutionary significance of the GENs from multiple species, they were linked according to the orthology of each node (gene) among species. In addition to other gene expression information, functional annotation of the genes will facilitate biological comprehension. Currently we are improving the given gene annotations with natural language processing (NLP) techniques and manual curation. Here we introduce the current status of our analyses and the web database, PODC (Plant Omics Data Center; http://bioinf.mind.meiji.ac.jp/podc/), now open to the public, providing GENs, functional annotations and additional comprehensive omics resources.",2014-12-11 +28148240,A novel statistical approach for identification of the master regulator transcription factor.,"

Background

Transcription factors are known to play key roles in carcinogenesis and therefore, are gaining popularity as potential therapeutic targets in drug development. A 'master regulator' transcription factor often appears to control most of the regulatory activities of the other transcription factors and the associated genes. This 'master regulator' transcription factor is at the top of the hierarchy of the transcriptomic regulation. Therefore, it is important to identify and target the master regulator transcription factor for proper understanding of the associated disease process and identifying the best therapeutic option.

Methods

We present a novel two-step computational approach for identification of master regulator transcription factor in a genome. At the first step of our method we test whether there exists any master regulator transcription factor in the system. We evaluate the concordance of two ranked lists of transcription factors using a statistical measure. In case the concordance measure is statistically significant, we conclude that there is a master regulator. At the second step, our method identifies the master regulator transcription factor, if there exists one.

Results

In the simulation scenario, our method performs reasonably well in validating the existence of a master regulator when the number of subjects in each treatment group is reasonably large. In application to two real datasets, our method ensures the existence of master regulators and identifies biologically meaningful master regulators. An R code for implementing our method in a sample test data can be found in http://www.somnathdatta.org/software .

Conclusion

We have developed a screening method of identifying the 'master regulator' transcription factor just using only the gene expression data. Understanding the regulatory structure and finding the master regulator help narrowing the search space for identifying biomarkers for complex diseases such as cancer. In addition to identifying the master regulator our method provides an overview of the regulatory structure of the transcription factors which control the global gene expression profiles and consequently the cell functioning.",2017-02-02 +27940215,HTLV-1aA introduction into Brazil and its association with the trans-Atlantic slave trade.,"

Introduction

Human T-lymphotropic virus (HTLV) is an endemic virus in some parts of the world, with Africa being home to most of the viral genetic diversity. In Brazil, HTLV-1 is endemic amongst Japanese and African immigrant populations. Multiple introductions of the virus in Brazil from other epidemic foci were hypothesized. The long terminal repeat (LTR) region of HTLV-1 was used to infer the origin of the virus in Brazil, using phylogenetic analysis.

Methods

LTR sequences were obtained from the HTLV-1 database (http://htlv1db.bahia.fiocruz.br). Sequences were aligned and maximum-likelihood and Bayesian tree topologies were inferred. Brazilian specific clusters were identified and molecular-clock and coalescent models were used to estimate each cluster's time to the most recent common ancestor (tMRCA).

Results

Three Brazilian clusters were identified with a posterior probability ranged from 0.61 to 0.99. Molecular clock analysis of these three clusters dated back their respective tMRCAs between the year 1499 and the year 1668. Additional analysis also identified a close association between Brazilian sequences and new sequences from South Africa.

Conclusion

Our results support the hypothesis of a multiple introductions of HTLV-1 into Brazil, with the majority of introductions occurring in the post-Colombian period. Our results further suggest that HTLV-1 introduction into Brazil was facilitated by the trans-Atlantic slave trade from endemic areas of Africa. The close association between southern African and Brazilian sequences also suggested that greater numbers of the southern African Bantu population might also have been part of the slave trade than previously thought.",2016-12-08 +26873924,Circular RNA profile in gliomas revealed by identification tool UROBORUS.,"Recent evidence suggests that many endogenous circular RNAs (circRNAs) may play roles in biological processes. However, the expression patterns and functions of circRNAs in human diseases are not well understood. Computationally identifying circRNAs from total RNA-seq data is a primary step in studying their expression pattern and biological roles. In this work, we have developed a computational pipeline named UROBORUS to detect circRNAs in total RNA-seq data. By applying UROBORUS to RNA-seq data from 46 gliomas and normal brain samples, we detected thousands of circRNAs supported by at least two read counts, followed by successful experimental validation on 24 circRNAs from the randomly selected 27 circRNAs. UROBORUS is an efficient tool that can detect circRNAs with low expression levels in total RNA-seq without RNase R treatment. The circRNAs expression profiling revealed more than 476 circular RNAs differentially expressed in control brain tissues and gliomas. Together with parental gene expression, we found that circRNA and its parental gene have diversified expression patterns in gliomas and control brain tissues. This study establishes an efficient and sensitive approach for predicting circRNAs using total RNA-seq data. The UROBORUS pipeline can be accessed freely for non-commercial purposes at http://uroborus.openbioinformatics.org/.",2016-02-11 +25324305,MoonProt: a database for proteins that are known to moonlight.,"Moonlighting proteins comprise a class of multifunctional proteins in which a single polypeptide chain performs multiple biochemical functions that are not due to gene fusions, multiple RNA splice variants or pleiotropic effects. The known moonlighting proteins perform a variety of diverse functions in many different cell types and species, and information about their structures and functions is scattered in many publications. We have constructed the manually curated, searchable, internet-based MoonProt Database (http://www.moonlightingproteins.org) with information about the over 200 proteins that have been experimentally verified to be moonlighting proteins. The availability of this organized information provides a more complete picture of what is currently known about moonlighting proteins. The database will also aid researchers in other fields, including determining the functions of genes identified in genome sequencing projects, interpreting data from proteomics projects and annotating protein sequence and structural databases. In addition, information about the structures and functions of moonlighting proteins can be helpful in understanding how novel protein functional sites evolved on an ancient protein scaffold, which can also help in the design of proteins with novel functions.",2014-10-16 +25656518,Profiling phenome-wide associations: a population-based observational study.,"

Objectives

To objectively characterize phenome-wide associations observed in the entire Taiwanese population and represent them in a meaningful, interpretable way.

Study design

In this population-based observational study, we analyzed 782 million outpatient visits and 15 394 unique phenotypes that were observed in the entire Taiwanese population of over 22 million individuals. Our data was obtained from Taiwan's National Health Insurance Research Database.Results We stratified the population into 20 gender-age groups and generated 28.8 million and 31.8 million pairwise odds ratios from male and female subpopulations, respectively. These associations can be accessed online at http://associations.phr.tmu.edu.tw. To demonstrate the database and validate the association estimates obtained, we used correlation analysis to analyze 100 phenotypes that were observed to have the strongest positive association estimates with respect to essential hypertension. The results indicated that association patterns tended to have a strong positive correlation between adjacent age groups, while correlation estimates tended to decline as groups became more distant in age, and they diverged when assessed across gender groups.

Conclusions

The correlation analysis of pairwise disease association patterns across different age and gender groups led to outcomes that were broadly predicted before the analysis, thus confirming the validity of the information contained in the presented database. More diverse individual disease-specific analyses would lead to a better understanding of phenome-wide associations and empower physicians to provide personalized care in terms of predicting, preventing, or initiating an early management of concomitant diseases.",2015-02-05 +24949626,"IIS--Integrated Interactome System: a web-based platform for the annotation, analysis and visualization of protein-metabolite-gene-drug interactions by integrating a variety of data sources and tools.","

Background

High-throughput screening of physical, genetic and chemical-genetic interactions brings important perspectives in the Systems Biology field, as the analysis of these interactions provides new insights into protein/gene function, cellular metabolic variations and the validation of therapeutic targets and drug design. However, such analysis depends on a pipeline connecting different tools that can automatically integrate data from diverse sources and result in a more comprehensive dataset that can be properly interpreted.

Results

We describe here the Integrated Interactome System (IIS), an integrative platform with a web-based interface for the annotation, analysis and visualization of the interaction profiles of proteins/genes, metabolites and drugs of interest. IIS works in four connected modules: (i) Submission module, which receives raw data derived from Sanger sequencing (e.g. two-hybrid system); (ii) Search module, which enables the user to search for the processed reads to be assembled into contigs/singlets, or for lists of proteins/genes, metabolites and drugs of interest, and add them to the project; (iii) Annotation module, which assigns annotations from several databases for the contigs/singlets or lists of proteins/genes, generating tables with automatic annotation that can be manually curated; and (iv) Interactome module, which maps the contigs/singlets or the uploaded lists to entries in our integrated database, building networks that gather novel identified interactions, protein and metabolite expression/concentration levels, subcellular localization and computed topological metrics, GO biological processes and KEGG pathways enrichment. This module generates a XGMML file that can be imported into Cytoscape or be visualized directly on the web.

Conclusions

We have developed IIS by the integration of diverse databases following the need of appropriate tools for a systematic analysis of physical, genetic and chemical-genetic interactions. IIS was validated with yeast two-hybrid, proteomics and metabolomics datasets, but it is also extendable to other datasets. IIS is freely available online at: http://www.lge.ibi.unicamp.br/lnbio/IIS/.",2014-06-20 +24205270,Integrative pathway-based approach for genome-wide association studies: identification of new pathways for rheumatoid arthritis and type 1 diabetes.,"Genome-wide association studies (GWAS) led to the identification of numerous novel loci for a number of complex diseases. Pathway-based approaches using genotypic data provide tangible leads which cannot be identified by single marker approaches as implemented in GWAS. The available pathway analysis approaches mainly differ in the employed databases and in the applied statistics for determining the significance of the associated disease markers. So far, pathway-based approaches using GWAS data failed to consider the overlapping of genes among different pathways or the influence of protein-interactions. We performed a multistage integrative pathway (MIP) analysis on three common diseases--Crohn's disease (CD), rheumatoid arthritis (RA) and type 1 diabetes (T1D)--incorporating genotypic, pathway, protein- and domain-interaction data to identify novel associations between these diseases and pathways. Additionally, we assessed the sensitivity of our method by studying the influence of the most significant SNPs on the pathway analysis by removing those and comparing the corresponding pathway analysis results. Apart from confirming many previously published associations between pathways and RA, CD and T1D, our MIP approach was able to identify three new associations between disease phenotypes and pathways. This includes a relation between the influenza-A pathway and RA, as well as a relation between T1D and the phagosome and toxoplasmosis pathways. These results provide new leads to understand the molecular underpinnings of these diseases. The developed software herein used is available at http://www.cogsys.cs.uni-tuebingen.de/software/GWASPathwayIdentifier/index.htm.",2013-10-25 +29513192,SkipCPP-Pred: an improved and promising sequence-based predictor for predicting cell-penetrating peptides.,"

Background

Cell-penetrating peptides (CPPs) are short peptides (5-30 amino acids) that can enter almost any cell without significant damage. On account of their high delivery efficiency, CPPs are promising candidates for gene therapy and cancer treatment. Accordingly, techniques that correctly predict CPPs are anticipated to accelerate CPP applications in future therapeutics. Recently, computational methods have been reportedly successful in predicting CPPs. Unfortunately, the predictive performance of existing methods is not satisfactory and reliable so as to accurately identify CPPs.

Results

In this study, we propose a novel computational predictor called SkipCPP-Pred to further improve the predictive performance. The novelty of the proposed predictor is that we present a sequence-based feature representation algorithm called adaptive k-skip-n-gram that sufficiently captures the intrinsic correlation information of residues. By fusing the proposed adaptive skip features with a random forest (RF) classifier, we successfully construct the prediction model of SkipCPP-Pred. The various jackknife results demonstrate that the proposed SkipCPP-Pred is 3.6% higher than state-of-the-art CPP predictors in terms of accuracy. Moreover, we construct a high-quality benchmark dataset by reducing the data redundancy and enhancing the similarity between the positive and negative classes. Using this dataset to build prediction models, we can successfully avoid the performance bias lying in existing methods and yield a promising predictive model.

Conclusions

The proposed SkipCPP-Pred is a simple and fast sequence-based predictor featured with the adaptive k-skip-n-gram model for the improved prediction of CPPs. Currently, SkipCPP-Pred is publicly available from an online webserver ( http://server.malab.cn/SkipCPP-Pred/Index.html ).",2017-10-16 +26428289,Generalized empirical Bayesian methods for discovery of differential data in high-throughput biology.,"

Motivation

High-throughput data are now commonplace in biological research. Rapidly changing technologies and application mean that novel methods for detecting differential behaviour that account for a 'large P, small n' setting are required at an increasing rate. The development of such methods is, in general, being done on an ad hoc basis, requiring further development cycles and a lack of standardization between analyses.

Results

We present here a generalized method for identifying differential behaviour within high-throughput biological data through empirical Bayesian methods. This approach is based on our baySeq algorithm for identification of differential expression in RNA-seq data based on a negative binomial distribution, and in paired data based on a beta-binomial distribution. Here we show how the same empirical Bayesian approach can be applied to any parametric distribution, removing the need for lengthy development of novel methods for differently distributed data. Comparisons with existing methods developed to address specific problems in high-throughput biological data show that these generic methods can achieve equivalent or better performance. A number of enhancements to the basic algorithm are also presented to increase flexibility and reduce computational costs.

Availability and implementation

The methods are implemented in the R baySeq (v2) package, available on Bioconductor http://www.bioconductor.org/packages/release/bioc/html/baySeq.html.

Contact

tjh48@cam.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-01 +27179031,tRF2Cancer: A web server to detect tRNA-derived small RNA fragments (tRFs) and their expression in multiple cancers.,"tRNA-derived small RNA fragments (tRFs) are one class of small non-coding RNAs derived from transfer RNAs (tRNAs). tRFs play important roles in cellular processes and are involved in multiple cancers. High-throughput small RNA (sRNA) sequencing experiments can detect all the cellular expressed sRNAs, including tRFs. However, distinguishing genuine tRFs from RNA fragments generated by random degradation remains a major challenge. In this study, we developed an integrated web-based computing system, tRF2Cancer, to accurately identify tRFs from sRNA deep-sequencing data and evaluate their expression in multiple cancers. The binomial test was introduced to evaluate whether reads from a small RNA-seq data set represent tRFs or degraded fragments. A classification method was then used to annotate the types of tRFs based on their sites of origin in pre-tRNA or mature tRNA. We applied the pipeline to analyze 10 991 data sets from 32 types of cancers and identified thousands of expressed tRFs. A tool called 'tRFinCancer' was developed to facilitate the users to inspect the expression of tRFs across different types of cancers. Another tool called 'tRFBrowser' shows both the sites of origin and the distribution of chemical modification sites in tRFs on their source tRNA. The tRF2Cancer web server is available at http://rna.sysu.edu.cn/tRFfinder/.",2016-05-13 +29286379,Automated Robotic Liquid Handling Assembly of Modular DNA Devices. ,"Recent advances in modular DNA assembly techniques have enabled synthetic biologists to test significantly more of the available ""design space"" represented by ""devices"" created as combinations of individual genetic components. However, manual assembly of such large numbers of devices is time-intensive, error-prone, and costly. The increasing sophistication and scale of synthetic biology research necessitates an efficient, reproducible way to accommodate large-scale, complex, and high throughput device construction. Here, a DNA assembly protocol using the Type-IIS restriction endonuclease based Modular Cloning (MoClo) technique is automated on two liquid-handling robotic platforms. Automated liquid-handling robots require careful, often times tedious optimization of pipetting parameters for liquids of different viscosities (e.g. enzymes, DNA, water, buffers), as well as explicit programming to ensure correct aspiration and dispensing of DNA parts and reagents. This makes manual script writing for complex assemblies just as problematic as manual DNA assembly, and necessitates a software tool that can automate script generation. To this end, we have developed a web-based software tool, http://mocloassembly.com, for generating combinatorial DNA device libraries from basic DNA parts uploaded as Genbank files. We provide access to the tool, and an export file from our liquid handler software which includes optimized liquid classes, labware parameters, and deck layout. All DNA parts used are available through Addgene, and their digital maps can be accessed via the Boston University BDC ICE Registry. Together, these elements provide a foundation for other organizations to automate modular cloning experiments and similar protocols. The automated DNA assembly workflow presented here enables the repeatable, automated, high-throughput production of DNA devices, and reduces the risk of human error arising from repetitive manual pipetting. Sequencing data show the automated DNA assembly reactions generated from this workflow are ~95% correct and require as little as 4% as much hands-on time, compared to manual reaction preparation.",2017-12-01 +25731616,Meta-analysis of gene expression profiles indicates genes in spliceosome pathway are up-regulated in hepatocellular carcinoma (HCC).,"Hepatocellular carcinoma (HCC) is among the commonest kind of malignant tumors, which accounts for more than 500,000 cases of newly diagnosed cancer annually. Many microarray studies for identifying differentially expressed genes (DEGs) in HCC have been conducted, but results have varied across different studies. Here, we performed a meta-analysis of publicly available microarray Gene Expression Omnibus datasets, which covers five independent studies, containing 753 HCC samples and 638 non-tumor liver samples. We identified 192 DEGs that were consistently up-regulated in HCC vs. normal liver tissue. For the 192 up-regulated genes, we performed Kyoto Encyclopedia of Genes and Genomes pathway analysis. To our surprise, besides several cell growth-related pathways, spliceosome pathway was also up-regulated in HCC. For further exploring the relationship between spliceosome pathway and HCC, we investigated the expression data of spliceosome pathway genes in 15 independent studies in Nextbio database ( https://www.nextbio.com/b/nextbioCorp.nb ). It was found that many genes of spliceosome pathway such as HSPA1A, SNRPE, SF3B2, SF3B4 and TRA2A genes which we identified to be up-regulated in our meta-analysis were generally overexpressed in HCC. At last, using real-time PCR, we also found that BUD31, SF3B2, SF3B4, SNRPE, SPINK1, TPA2A and HSPA1A genes are significantly up-regulated in clinical HCC samples when compared to the corresponding non-tumorous liver tissues. Our study for the first time indicates that many genes of spliceosome pathway are up-regulated in HCC. This finding might put new insights for people's understanding about the relationship of spliceosome pathway and HCC.",2015-03-03 +29329548,The cost of a pediatric neurocritical care program for traumatic brain injury: a retrospective cohort study.,"

Background

Inpatient care for children with severe traumatic brain injury (sTBI) is expensive, with inpatient charges averaging over $70,000 per case (Hospital Inpatient, Children Only, National Statistics. Diagnoses- clinical classification software (CCS) principal diagnosis category 85 coma, stupor, and brain damage, and 233 intracranial injury. Diagnoses by Aggregate charges [ https://hcupnet.ahrq.gov/#setup ]). This ranks sTBI in the top quartile of pediatric conditions with the greatest inpatient costs (Hospital Inpatient, Children Only, National Statistics. Diagnoses- clinical classification software (CCS) principal diagnosis category 85 coma, stupor, and brain damage, and 233 intracranial injury. Diagnoses by Aggregate charges [ https://hcupnet.ahrq.gov/#setup ]). The Brain Trauma Foundation developed sTBI intensive care guidelines in 2003, with revisions in 2012 (Kochanek, Carney, et. al. PCCM 3:S1-S2, 2012). These guidelines have been widely disseminated, and are associated with improved health outcomes (Pineda, Leonard. et. al. LN 12:45-52, 2013), yet research on the cost of associated hospital care is limited. The objective of this study was to assess the costs of providing hospital care to sTBI patients through a guideline-based Pediatric Neurocritical Care Program (PNCP) implemented at St. Louis Children's Hospital, a pediatric academic medical center in the Midwest United States.

Methods

This is a retrospective cohort study. We used multi-level regression to estimate pre-/post-implementation effects of the PNCP program on inflation adjusted total cost of in-hospital sTBI care. The study population included 58 pediatric patient discharges in the pre-PNCP implementation group (July 15, 1999 - September 17, 2005), and 59 post-implementation patient discharges (September 18, 2005 - January 15, 2012).

Results

Implementation of the PNCP was associated with a non-significant difference in the cost of care between the pre- and post-implementation periods (eβ = 1.028, p = 0.687).

Conclusions

Implementation of the PNCP to support delivery of guideline-based care for children with sTBI did not change the total per-patient cost of in-hospital care. A key strength of this study was its use of hospital cost data rather than charges. Future research should consider the longitudinal post-hospitalization costs of this approach to sTBI care.",2018-01-12 +25189782,"The GOBLET training portal: a global repository of bioinformatics training materials, courses and trainers.","

Summary

Rapid technological advances have led to an explosion of biomedical data in recent years. The pace of change has inspired new collaborative approaches for sharing materials and resources to help train life scientists both in the use of cutting-edge bioinformatics tools and databases and in how to analyse and interpret large datasets. A prototype platform for sharing such training resources was recently created by the Bioinformatics Training Network (BTN). Building on this work, we have created a centralized portal for sharing training materials and courses, including a catalogue of trainers and course organizers, and an announcement service for training events. For course organizers, the portal provides opportunities to promote their training events; for trainers, the portal offers an environment for sharing materials, for gaining visibility for their work and promoting their skills; for trainees, it offers a convenient one-stop shop for finding suitable training resources and identifying relevant training events and activities locally and worldwide.

Availability and implementation

http://mygoblet.org/training-portal.",2014-09-04 +26952134,Data on Arc and Zif268 expression in the brain of the α-2A adrenergic receptor knockout mouse.,"The α2-adrenergic receptor (α2-AR) is widely distributed in the brain with distinct roles for α2-AR subtypes (A, B and C). In this article, data are provided on Activity Regulated Cytoskeleton Associated Protein (Arc) and Zif268 expression in the brain of the α2A-AR knockout (α2A-AR KO) mouse. These data are supplemental to an original research article examining Arc and Zif268 expression in rats injected with the α2-AR antagonist, RX821002 (http://dx.doi.org/10.1016/j.neulet.2015.12.002. [1]).",2016-02-10 +23599415,Preliminary evaluation of the CellFinder literature curation pipeline for gene expression in kidney cells and anatomical parts.,"Biomedical literature curation is the process of automatically and/or manually deriving knowledge from scientific publications and recording it into specialized databases for structured delivery to users. It is a slow, error-prone, complex, costly and, yet, highly important task. Previous experiences have proven that text mining can assist in its many phases, especially, in triage of relevant documents and extraction of named entities and biological events. Here, we present the curation pipeline of the CellFinder database, a repository of cell research, which includes data derived from literature curation and microarrays to identify cell types, cell lines, organs and so forth, and especially patterns in gene expression. The curation pipeline is based on freely available tools in all text mining steps, as well as the manual validation of extracted data. Preliminary results are presented for a data set of 2376 full texts from which >4500 gene expression events in cell or anatomical part have been extracted. Validation of half of this data resulted in a precision of ~50% of the extracted data, which indicates that we are on the right track with our pipeline for the proposed task. However, evaluation of the methods shows that there is still room for improvement in the named-entity recognition and that a larger and more robust corpus is needed to achieve a better performance for event extraction. Database URL: http://www.cellfinder.org/",2013-04-18 +25480116,LAILAPS: the plant science search engine.,"With the number of sequenced plant genomes growing, the number of predicted genes and functional annotations is also increasing. The association between genes and phenotypic traits is currently of great interest. Unfortunately, the information available today is widely scattered over a number of different databases. Information retrieval (IR) has become an all-encompassing bioinformatics methodology for extracting knowledge from complex, heterogeneous and distributed databases, and therefore can be a useful tool for obtaining a comprehensive view of plant genomics, from genes to traits. Here we describe LAILAPS (http://lailaps.ipk-gatersleben.de), an IR system designed to link plant genomic data in the context of phenotypic attributes for a detailed forward genetic research. LAILAPS comprises around 65 million indexed documents, encompassing >13 major life science databases with around 80 million links to plant genomic resources. The LAILAPS search engine allows fuzzy querying for candidate genes linked to specific traits over a loosely integrated system of indexed and interlinked genome databases. Query assistance and an evidence-based annotation system enable time-efficient and comprehensive information retrieval. An artificial neural network incorporating user feedback and behavior tracking allows relevance sorting of results. We fully describe LAILAPS's functionality and capabilities by comparing this system's performance with other widely used systems and by reporting both a validation in maize and a knowledge discovery use-case focusing on candidate genes in barley.",2014-12-04 +25361965,PTMcode v2: a resource for functional associations of post-translational modifications within and between proteins.,"The post-translational regulation of proteins is mainly driven by two molecular events, their modification by several types of moieties and their interaction with other proteins. These two processes are interdependent and together are responsible for the function of the protein in a particular cell state. Several databases focus on the prediction and compilation of protein-protein interactions (PPIs) and no less on the collection and analysis of protein post-translational modifications (PTMs), however, there are no resources that concentrate on describing the regulatory role of PTMs in PPIs. We developed several methods based on residue co-evolution and proximity to predict the functional associations of pairs of PTMs that we apply to modifications in the same protein and between two interacting proteins. In order to make data available for understudied organisms, PTMcode v2 (http://ptmcode.embl.de) includes a new strategy to propagate PTMs from validated modified sites through orthologous proteins. The second release of PTMcode covers 19 eukaryotic species from which we collected more than 300,000 experimentally verified PTMs (>1,300,000 propagated) of 69 types extracting the post-translational regulation of >100,000 proteins and >100,000 interactions. In total, we report 8 million associations of PTMs regulating single proteins and over 9.4 million interplays tuning PPIs.",2014-10-31 +26019122,"COINS Data Exchange: An open platform for compiling, curating, and disseminating neuroimaging data.","Neuroimaging data collection is inherently expensive. Maximizing the return on investment in neuroimaging studies requires that neuroimaging data be re-used whenever possible. In an effort to further scientific knowledge, the COINS Data Exchange (DX) (http://coins.mrn.org/dx) aims to make data sharing seamless and commonplace. DX takes a three-pronged approach towards improving the overall state of data sharing within the neuroscience community. The first prong is compiling data into one location that has been collected from all over the world in many different formats. The second prong is curating the data so that it can be stored in one consistent format and so that data QA/QC measures can be assured. The third prong is disseminating the data so that it is easy to consume and straightforward to interpret. This paper explains the concepts behind each prong and describes some challenges and successes that the Data Exchange has experienced.",2015-05-24 +27242836,VESPUCCI: Exploring Patterns of Gene Expression in Grapevine.,"Large-scale transcriptional studies aim to decipher the dynamic cellular responses to a stimulus, like different environmental conditions. In the era of high-throughput omics biology, the most used technologies for these purposes are microarray and RNA-Seq, whose data are usually required to be deposited in public repositories upon publication. Such repositories have the enormous potential to provide a comprehensive view of how different experimental conditions lead to expression changes, by comparing gene expression across all possible measured conditions. Unfortunately, this task is greatly impaired by differences among experimental platforms that make direct comparisons difficult. In this paper, we present the Vitis Expression Studies Platform Using COLOMBOS Compendia Instances (VESPUCCI), a gene expression compendium for grapevine which was built by adapting an approach originally developed for bacteria, and show how it can be used to investigate complex gene expression patterns. We integrated nearly all publicly available microarray and RNA-Seq expression data: 1608 gene expression samples from 10 different technological platforms. Each sample has been manually annotated using a controlled vocabulary developed ad hoc to ensure both human readability and computational tractability. Expression data in the compendium can be visually explored using several tools provided by the web interface or can be programmatically accessed using the REST interface. VESPUCCI is freely accessible at http://vespucci.colombos.fmach.it.",2016-05-10 +25480115,"Sinbase: an integrated database to study genomics, genetics and comparative genomics in Sesamum indicum.","Sesame (Sesamum indicum L.) is an ancient and important oilseed crop grown widely in tropical and subtropical areas. It belongs to the gigantic order Lamiales, which includes many well-known or economically important species, such as olive (Olea europaea), leonurus (Leonurus japonicus) and lavender (Lavandula spica), many of which have important pharmacological properties. Despite their importance, genetic and genomic analyses on these species have been insufficient due to a lack of reference genome information. The now available S. indicum genome will provide an unprecedented opportunity for studying both S. indicum genetic traits and comparative genomics. To deliver S. indicum genomic information to the worldwide research community, we designed Sinbase, a web-based database with comprehensive sesame genomic, genetic and comparative genomic information. Sinbase includes sequences of assembled sesame pseudomolecular chromosomes, protein-coding genes (27,148), transposable elements (372,167) and non-coding RNAs (1,748). In particular, Sinbase provides unique and valuable information on colinear regions with various plant genomes, including Arabidopsis thaliana, Glycine max, Vitis vinifera and Solanum lycopersicum. Sinbase also provides a useful search function and data mining tools, including a keyword search and local BLAST service. Sinbase will be updated regularly with new features, improvements to genome annotation and new genomic sequences, and is freely accessible at http://ocri-genomics.org/Sinbase/.",2014-12-04 +27919703,Prediction and mechanism elucidation of analyte retention on phospholipid stationary phases (IAM-HPLC) by in silico calculated physico-chemical descriptors.,"The present study proposes a method for an in silico calculation of phospholipophilicity. Phospholipophilicity is intended as the measure of analyte affinity for phospholipids; it is currently assessed by HPLC measures of analyte retention on phosphatidylcholine-like stationary phases (IAM - Immobilized Artificial Membrane) resulting in log kWIAM values. Due to the amphipathic and electrically charged nature of phospholipids, retention on these stationary phases results from complex mechanisms, being affected not only by lipophilicity (as measured by n-octanol/aqueous phase partition coefficients, log P) but also by the occurrence of polar and/or electrostatic intermolecular interaction forces. Differently from log P, to date no method has been proposed for in silico calculation of log kWIAM. The study is aimed both at shedding new light into the retention mechanism on IAM stationary phases and at offering a high-throughput method to achieve such values. A wide set of physico-chemical and topological properties were taken into account, yielding a robust final model including four in silico calculated parameters (lipophilicity, hydrophilic/lipophilic balance, molecular size, and molecule flexibility). The here presented model was based on the analysis of 205 experimentally determined values, taken from the literature and measured by a single research group to minimize the interlaboratory variability; such model is able to predict phospholipophilicity values on both the two IAM stationary phases to date marketed, i.e. IAM.PC.MG and IAM.PC.DD2, with a fairly good degree (r2=0.85) of accuracy. The present work allowed the development of a free on-line service aimed at calculating log kWIAM values of any molecule included in the PubChem database, which is freely available at http://nova.disfarm.unimi.it/logkwiam.htm.",2016-12-03 +25178365,Thoroughbred Horse Single Nucleotide Polymorphism and Expression Database: HSDB.,"Genetics is important for breeding and selection of horses but there is a lack of well-established horse-related browsers or databases. In order to better understand horses, more variants and other integrated information are needed. Thus, we construct a horse genomic variants database including expression and other information. Horse Single Nucleotide Polymorphism and Expression Database (HSDB) (http://snugenome2.snu.ac.kr/HSDB) provides the number of unexplored genomic variants still remaining to be identified in the horse genome including rare variants by using population genome sequences of eighteen horses and RNA-seq of four horses. The identified single nucleotide polymorphisms (SNPs) were confirmed by comparing them with SNP chip data and variants of RNA-seq, which showed a concordance level of 99.02% and 96.6%, respectively. Moreover, the database provides the genomic variants with their corresponding transcriptional profiles from the same individuals to help understand the functional aspects of these variants. The database will contribute to genetic improvement and breeding strategies of Thoroughbreds.",2014-09-01 +23599424,The mzQuantML data standard for mass spectrometry-based quantitative studies in proteomics.,"The range of heterogeneous approaches available for quantifying protein abundance via mass spectrometry (MS)(1) leads to considerable challenges in modeling, archiving, exchanging, or submitting experimental data sets as supplemental material to journals. To date, there has been no widely accepted format for capturing the evidence trail of how quantitative analysis has been performed by software, for transferring data between software packages, or for submitting to public databases. In the context of the Proteomics Standards Initiative, we have developed the mzQuantML data standard. The standard can represent quantitative data about regions in two-dimensional retention time versus mass/charge space (called features), peptides, and proteins and protein groups (where there is ambiguity regarding peptide-to-protein inference), and it offers limited support for small molecule (metabolomic) data. The format has structures for representing replicate MS runs, grouping of replicates (for example, as study variables), and capturing the parameters used by software packages to arrive at these values. The format has the capability to reference other standards such as mzML and mzIdentML, and thus the evidence trail for the MS workflow as a whole can now be described. Several software implementations are available, and we encourage other bioinformatics groups to use mzQuantML as an input, internal, or output format for quantitative software and for structuring local repositories. All project resources are available in the public domain from the HUPO Proteomics Standards Initiative http://www.psidev.info/mzquantml.",2013-04-18 +28498994,ThreaDomEx: a unified platform for predicting continuous and discontinuous protein domains by multiple-threading and segment assembly.,"We develop a hierarchical pipeline, ThreaDomEx, for both continuous domain (CD) and discontinuous domain (DCD) structure predictions. Starting from a query sequence, ThreaDomEx first threads it through the PDB to identify multiple structure templates, where a profile of domain conservation score (DC-score) is derived for domain-segment assignment. To further detect DCDs that consist of separated segments along the sequence, a boundary-clustering algorithm is used to refine the DCD-linker locations. In case that the templates do not contain DCDs, a domain-segment assembly process, guided by symmetry comparison, is applied for further DCD detections. ThreaDomEx was tested a set of 1111 proteins and achieved a normalized domain overlap score of 89.3% compared to experimental data, which is significantly higher than other state-of-the-art methods. It also recalls 26.7% of DCDs with 72.7% precision on the proteins for which threading failed to detect any DCDs. The server provides facilities for users to interactively refine the domain models by adjusting DC-score threshold, deleting and adding domain linkers, and assembling domain segments, which are particularly helpful for the hard targets for which current methods have a low accuracy while human-expert knowledge and experimental insights can be used for refining models. ThreaDomEX server is available at http://zhanglab.ccmb.med.umich.edu/ThreaDomEx.",2017-07-01 +25320561,"Kazusa Marker DataBase: a database for genomics, genetics, and molecular breeding in plants.","In order to provide useful genomic information for agronomical plants, we have established a database, the Kazusa Marker DataBase (http://marker.kazusa.or.jp). This database includes information on DNA markers, e.g., SSR and SNP markers, genetic linkage maps, and physical maps, that were developed at the Kazusa DNA Research Institute. Keyword searches for the markers, sequence data used for marker development, and experimental conditions are also available through this database. Currently, 10 plant species have been targeted: tomato (Solanum lycopersicum), pepper (Capsicum annuum), strawberry (Fragaria × ananassa), radish (Raphanus sativus), Lotus japonicus, soybean (Glycine max), peanut (Arachis hypogaea), red clover (Trifolium pratense), white clover (Trifolium repens), and eucalyptus (Eucalyptus camaldulensis). In addition, the number of plant species registered in this database will be increased as our research progresses. The Kazusa Marker DataBase will be a useful tool for both basic and applied sciences, such as genomics, genetics, and molecular breeding in crops.",2014-09-01 +28460141,HH-MOTiF: de novo detection of short linear motifs in proteins by Hidden Markov Model comparisons.,"Short linear motifs (SLiMs) in proteins are self-sufficient functional sequences that specify interaction sites for other molecules and thus mediate a multitude of functions. Computational, as well as experimental biological research would significantly benefit, if SLiMs in proteins could be correctly predicted de novo with high sensitivity. However, de novo SLiM prediction is a difficult computational task. When considering recall and precision, the performances of published methods indicate remaining challenges in SLiM discovery. We have developed HH-MOTiF, a web-based method for SLiM discovery in sets of mainly unrelated proteins. HH-MOTiF makes use of evolutionary information by creating Hidden Markov Models (HMMs) for each input sequence and its closely related orthologs. HMMs are compared against each other to retrieve short stretches of homology that represent potential SLiMs. These are transformed to hierarchical structures, which we refer to as motif trees, for further processing and evaluation. Our approach allows us to identify degenerate SLiMs, while still maintaining a reasonably high precision. When considering a balanced measure for recall and precision, HH-MOTiF performs better on test data compared to other SLiM discovery methods. HH-MOTiF is freely available as a web-server at http://hh-motif.biochem.mpg.de.",2017-07-01 +26651948,"Meta- and Orthogonal Integration of Influenza ""OMICs"" Data Defines a Role for UBR4 in Virus Budding.","Several systems-level datasets designed to dissect host-pathogen interactions during influenza A infection have been reported. However, apparent discordance among these data has hampered their full utility toward advancing mechanistic and therapeutic knowledge. To collectively reconcile these datasets, we performed a meta-analysis of data from eight published RNAi screens and integrated these data with three protein interaction datasets, including one generated within the context of this study. Further integration of these data with global virus-host interaction analyses revealed a functionally validated biochemical landscape of the influenza-host interface, which can be queried through a simplified and customizable web portal (http://www.metascape.org/IAV). Follow-up studies revealed that the putative ubiquitin ligase UBR4 associates with the viral M2 protein and promotes apical transport of viral proteins. Taken together, the integrative analysis of influenza OMICs datasets illuminates a viral-host network of high-confidence human proteins that are essential for influenza A virus replication.",2015-12-01 +24214998,COLOMBOS v2.0: an ever expanding collection of bacterial expression compendia.,"The COLOMBOS database (http://www.colombos.net) features comprehensive organism-specific cross-platform gene expression compendia of several bacterial model organisms and is supported by a fully interactive web portal and an extensive web API. COLOMBOS was originally published in PLoS One, and COLOMBOS v2.0 includes both an update of the expression data, by expanding the previously available compendia and by adding compendia for several new species, and an update of the surrounding functionality, with improved search and visualization options and novel tools for programmatic access to the database. The scope of the database has also been extended to incorporate RNA-seq data in our compendia by a dedicated analysis pipeline. We demonstrate the validity and robustness of this approach by comparing the same RNA samples measured in parallel using both microarrays and RNA-seq. As far as we know, COLOMBOS currently hosts the largest homogenized gene expression compendia available for seven bacterial model organisms.",2013-11-08 +28612167,Efficient identification of SNPs in pooled DNA samples using a dual mononucleotide addition-based sequencing method.,"Identifying single nucleotide polymorphism (SNPs) from pooled samples is critical for many studies and applications. SNPs determined by next-generation sequencing results may suffer from errors in both base calling and read mapping. Taking advantage of dual mononucleotide addition-based pyrosequencing, we present Epds, a method to efficiently identify SNPs from pooled DNA samples. On the basis of only five patterns of non-synchronistic extensions between the wild and mutant sequences using dual mononucleotide addition-based pyrosequencing, we employed an enumerative algorithm to infer the mutant locus and estimate the proportion of mutant sequence. According to the profiles resulting from three runs with distinct dual mononucleotide additions, Epds could recover the mutant bases. Results showed that our method had a false-positive rate of less than 3%. Series of simulations revealed that Epds outperformed the current method (PSM) in many situations. Finally, experiments based on profiles produced by real sequencing proved that our method could be successfully applied for the identification of mutants from pooled samples. The software for implementing this method and the experimental data are available at http://bioinfo.seu.edu.cn/Epds .",2017-06-13 +26666652,ChromContact: A web tool for analyzing spatial contact of chromosomes from Hi-C data.,"

Background

Hi-C analysis has revealed the three-dimensional architecture of chromosomes in the nucleus. Although Hi-C data contains valuable information on long-range interactions of chromosomes, the data is not yet widely utilized by molecular biologists because of the quantity of data.

Results

We developed a web tool, ChromContact, to utilize the information obtained by Hi-C. The web tool is designed to be simple and easy to use. By specifying a locus of interest, ChromContact calculates contact profiles and generates links to the UCSC Genome Browser, enabling users to visually examine the contact information with various annotations.

Conclusion

ChromContact provides wide-range of molecular biologists with a user-friendly means to access high-resolution Hi-C data. One of the possible applications of ChromContact is investigating novel long-range promoter-enhancer interactions. This facilitates the functional interpretation of statistically significant markers identified by GWAS or ChIP-seq peaks that are located far from any annotated genes. ChromContact is freely accessible at http://bioinfo.sls.kyushu-u.ac.jp/chromcontact/ .",2015-12-15 +28514151,DeepPPI: Boosting Prediction of Protein-Protein Interactions with Deep Neural Networks.,"The complex language of eukaryotic gene expression remains incompletely understood. Despite the importance suggested by many proteins variants statistically associated with human disease, nearly all such variants have unknown mechanisms, for example, protein-protein interactions (PPIs). In this study, we address this challenge using a recent machine learning advance-deep neural networks (DNNs). We aim at improving the performance of PPIs prediction and propose a method called DeepPPI (Deep neural networks for Protein-Protein Interactions prediction), which employs deep neural networks to learn effectively the representations of proteins from common protein descriptors. The experimental results indicate that DeepPPI achieves superior performance on the test data set with an Accuracy of 92.50%, Precision of 94.38%, Recall of 90.56%, Specificity of 94.49%, Matthews Correlation Coefficient of 85.08% and Area Under the Curve of 97.43%, respectively. Extensive experiments show that DeepPPI can learn useful features of proteins pairs by a layer-wise abstraction, and thus achieves better prediction performance than existing methods. The source code of our approach can be available via http://ailab.ahu.edu.cn:8087/DeepPPI/index.html .",2017-05-26 +28817627,Analysing researchers' outreach efforts and the association with publication metrics: A case study of Kudos.,"With the growth of scholarly collaboration networks and social communication platforms, members of the scholarly community are experimenting with their approach to disseminating research outputs, in an effort to increase their audience and outreach. However, from a researcher's point of view, it is difficult to determine whether efforts to make work more visible are worthwhile (in terms of the association with publication metrics) and within that, difficult to assess which platform or network is most effective for sharing work and connecting to a wider audience. We undertook a case study of Kudos (https://www.growkudos.com), a web-based service that claims to help researchers increase the outreach of their publications, to examine the most effective tools for sharing publications online, and to investigate which actions are associated with improved metrics. We extracted a dataset from Kudos of 830,565 unique publications claimed by authors, for which 20,775 had actions taken to explain or share via Kudos, and for 4,867 of these full text download data from publishers was available. Findings show that researchers are most likely to share their work on Facebook, but links shared on Twitter are more likely to be clicked on. A Mann-Whitney U test revealed that a treatment group (publications having actions in Kudos) had a significantly higher median average of 149 full text downloads (23.1% more) per publication as compared to a control group (having no actions in Kudos) with a median average of 121 full text downloads per publication. These findings suggest that performing actions on publications, such as sharing, explaining, or enriching, could help to increase the number of full text downloads of a publication.",2017-08-17 +25398902,YM500v2: a small RNA sequencing (smRNA-seq) database for human cancer miRNome research.,"We previously presented YM500, which is an integrated database for miRNA quantification, isomiR identification, arm switching discovery and novel miRNA prediction from 468 human smRNA-seq datasets. Here in this updated YM500v2 database (http://ngs.ym.edu.tw/ym500/), we focus on the cancer miRNome to make the database more disease-orientated. New miRNA-related algorithms developed after YM500 were included in YM500v2, and, more significantly, more than 8000 cancer-related smRNA-seq datasets (including those of primary tumors, paired normal tissues, PBMC, recurrent tumors, and metastatic tumors) were incorporated into YM500v2. Novel miRNAs (miRNAs not included in the miRBase R21) were not only predicted by three independent algorithms but also cleaned by a new in silico filtration strategy and validated by wetlab data such as Cross-Linked ImmunoPrecipitation sequencing (CLIP-seq) to reduce the false-positive rate. A new function 'Meta-analysis' is additionally provided for allowing users to identify real-time differentially expressed miRNAs and arm-switching events according to customer-defined sample groups and dozens of clinical criteria tidying up by proficient clinicians. Cancer miRNAs identified hold the potential for both basic research and biotech applications.",2014-11-14 +24681202,Gene-disease association with literature based enrichment.,"

Motivation

Gene set enrichment analysis (GSEA) annotates gene microarray data with functional information from the biomedical literature to improve gene-disease association prediction. We hypothesize that supplementing GSEA with comprehensive gene function catalogs built automatically using information extracted from the scientific literature will significantly enhance GSEA prediction quality.

Methods

Gold standard gene sets for breast cancer (BrCa) and colorectal cancer (CRC) were derived from the literature. Two gene function catalogs (CMeSH and CUMLS) were automatically generated. 1. By using Entrez Gene to associate all recorded human genes with PubMed article IDs. 2. Using the genes mentioned in each PubMed article and associating each with the article's MeSH terms (in CMeSH) and extracted UMLS concepts (in CUMLS). Microarray data from the Gene Expression Omnibus for BrCa and CRC was then annotated using CMeSH and CUMLS and for comparison, also with several pre-existing catalogs (C2, C4 and C5 from the Molecular Signatures Database). Ranking was done using, a standard GSEA implementation (GSEA-p). Gene function predictions for enriched array data were evaluated against the gold standard by measuring area under the receiver operating characteristic curve (AUC).

Results

Comparison of ranking using the literature enrichment catalogs, the pre-existing catalogs as well as five randomly generated catalogs show the literature derived enrichment catalogs are more effective. The AUC for BrCa using the unenriched gene expression dataset was 0.43, increasing to 0.89 after gene set enrichment with CUMLS. The AUC for CRC using the unenriched gene expression dataset was 0.54, increasing to 0.9 after enrichment with CMeSH. C2 increased AUC (BrCa 0.76, CRC 0.71) but C4 and C5 performed poorly (between 0.35 and 0.5). The randomly generated catalogs also performed poorly, equivalent to random guessing.

Discussion

Gene set enrichment significantly improved prediction of gene-disease association. Selection of enrichment catalog had a substantial effect on prediction accuracy. The literature based catalogs performed better than the MSigDB catalogs, possibly because they are more recent. Catalogs generated automatically from the literature can be kept up to date.

Conclusion

Prediction of gene-disease association is a fundamental task in biomedical research. GSEA provides a promising method when using literature-based enrichment catalogs.

Availability

The literature based catalogs generated and used in this study are available from http://www2.chi.unsw.edu.au/literature-enrichment.",2014-03-27 +27437927,Functional Dyspepsia: An Enigma in a Conundrum.,"As defined by Rome III, there are 4 abdominal pain-related functional gastrointestinal disorders in children: irritable bowel syndrome, functional dyspepsia (FD), abdominal migraine, and functional abdominal pain. Dyspepsia is a constellation of symptoms referable to the gastroduodenal region of the upper gastrointestinal tract. FD refers to dyspeptic symptoms that cannot currently be explained by an organic cause, and affects 25% to 40% of the adult population over a lifetime. In children, this condition results in increased specialist consultations, with reported prevalence between 3% and 27%. The Rome III criteria for pediatric FD include the presence or persistence of recurrent pain or discomfort centered in the upper abdomen, without evidence of organic disease or change in frequency of stools. Symptoms must be chronic, occurring at least weekly and over a period of at least 6 months. The goal of this article is to provide a narrative review of diagnosis and management of the FD in the pediatric population. A comprehensive search of published literature using the PubMed (http://www.ncbi.nlm.nih.gov/pubmed/) database was carried out to identify all articles published in English from 1998 to November 2015, using 3 key terms; ""FD,"" ""functional gastrointestinal disorders,"" and ""children.""",2016-12-01 +28147217,CCG: an integrative resource of cancer protein-coding genes and long noncoding RNAs.,"The identification of cancer genes remains a main aim of cancer research. With the advances of high-throughput sequencing technologies, thousands of novel cancer genes were identified through recurrent mutation analyses and differential expression analyses between normal tissues and tumors in large populations. Many databases were developed to document the cancer genes. However, no public database providing both cancer protein-coding genes and cancer lncRNAs is available presently. Here, we present the Catalogue of Cancer Genes (CCG) database (http://ccg.xingene.net), a catalogue of cancer genes. It includes both well-supported and candidate cancer protein-coding genes and cancer lncRNAs collected from literature search and public databases. In addition, uniform genomic aberration information (such as somatic mutation and copy number variation) and drug-gene interactions were assigned to cancer genes in the database. CCG represents an effort on integrative assembly of well-supported and candidate cancer protein-coding and long noncoding RNA genes and takes advantages of high-throughput sequencing results on large populations. With the help of CCG, users can easily access a comprehensive list of cancer genes as well as genomic aberration related with these genes. The availability of integrative information will facilitate the understanding of cancer mechanisms. In addition, drug-gene information in CCG provides a useful guide to the development of new anti-cancer drugs and selection of rational combination therapies.",2016-12-01 +27905517,G4IPDB: A database for G-quadruplex structure forming nucleic acid interacting proteins.,"Nucleic acid G-quadruplex structure (G4) Interacting Proteins DataBase (G4IPDB) is an important database that contains detailed information about proteins interacting with nucleic acids that forms G-quadruplex structures. G4IPDB is the first database that provides comprehensive information about this interaction at a single platform. This database contains more than 200 entries with details of interaction such as interacting protein name and their synonyms, their UniProt-ID, source organism, target name and its sequences, ∆Tm, binding/dissociation constants, protein gene name, protein FASTA sequence, interacting residue in protein, related PDB entries, interaction ID, graphical view, PMID, author's name and techniques that were used to detect their interactions. G4IPDB also provides an efficient web-based ""G-quadruplex predictor tool"" that searches putative G-quadruplex forming sequences simultaneously in both sense and anti-sense strands of the query nucleotide sequence and provides the predicted G score. Studying the interaction between proteins and nucleic acids forming G-quadruplex structures could be of therapeutic significance for various diseases including cancer and neurological disease, therefore, having detail information about their interactions on a single platform would be helpful for the discovery and development of novel therapeutics. G4IPDB can be routinely updated (twice in year) and freely available on http://bsbe.iiti.ac.in/bsbe/ipdb/index.php.",2016-12-01 +28588980,Addressing the challenges of diagnostics demand and supply: insights from an online global health discussion platform.,"Several barriers challenge development, adoption and scale-up of diagnostics in low and middle income countries. An innovative global health discussion platform allows capturing insights from the global health community on factors driving demand and supply for diagnostics. We conducted a qualitative content analysis of the online discussion 'Advancing Care Delivery: Driving Demand and Supply of Diagnostics' organised by the Global Health Delivery Project (GHD) (http://www.ghdonline.org/) at Harvard University. The discussion, driven by 12 expert panellists, explored what must be done to develop delivery systems, business models, new technologies, interoperability standards, and governance mechanisms to ensure that patients receive the right diagnostic at the right time. The GHD Online (GHDonline) platform reaches over 19 000 members from 185 countries. Participants (N=99) in the diagnostics discussion included academics, non-governmental organisations, manufacturers, policymakers, and physicians. Data was coded and overarching categories analysed using qualitative data analysis software. Participants considered technical characteristics of diagnostics as smaller barriers to effective use of diagnostics compared with operational and health system challenges, such as logistics, poor fit with user needs, cost, workforce, infrastructure, access, weak regulation and political commitment. Suggested solutions included: health system strengthening with patient-centred delivery; strengthened innovation processes; improved knowledge base; harmonised guidelines and evaluation; supply chain innovations; and mechanisms for ensuring quality and capacity. Engaging and connecting different actors involved with diagnostic development and use is paramount for improving diagnostics. While the discussion participants were not representative of all actors involved, the platform enabled a discussion between globally acknowledged experts and physicians working in different countries.",2016-12-02 +21523855,Comprehensive prediction of mRNA splicing effects of BRCA1 and BRCA2 variants.,"Variants of uncertain significance (VUS) in the BRCA1 and BRCA2 genes potentially affecting coding sequence as well as normal splicing activity have confounded predisposition testing in breast cancer. Here, we apply information theory to analyze BRCA1/2 mRNA splicing mutations categorized as VUS. The method was validated for 31 of 36 mutations known to cause missplicing in BRCA1/2 and all 26 that do not alter splicing. All single-nucleotide variants in the Breast Cancer Information Resource (BIC; Breast Cancer Information Core Database; http://research.nhgri.nih.gov/bic; last access June 1, 2010) were then analyzed. Information analysis is similar in sensitivity to other predictive methods; however, the thermodynamic basis of the theory also enables splice-site affinity to be determined accurately, which is important for assessing mutations that render natural splice sites partially functional and competition between cryptic and natural splice sites. We report 299 of 2,071 single-nucleotide BIC mutations that are predicted to significantly weaken natural sites and/or strengthen cryptic splice sites, 171 of which are not designated as splicing mutations in the database. Splicing alterations are predicted for 68 of 690 BRCA1 and 60 of 958 BRCA2 mutations designated as VUS. These analyses should be useful in prioritizing suspected mutations for downstream expression studies and for predicting aberrantly spliced isoforms generated by these mutations.",2011-05-05 +30245704,Atmospheric mercury concentrations observed at ground-based monitoring sites globally distributed in the framework of the GMOS network.,"Long-term monitoring of data of ambient mercury (Hg) on a global scale to assess its emission, transport, atmospheric chemistry, and deposition processes is vital to understanding the impact of Hg pollution on the environment. The Global Mercury Observation System (GMOS) project was funded by the European Commission (http://www.gmos.eu) and started in November 2010 with the overall goal to develop a coordinated global observing system to monitor Hg on a global scale, including a large network of ground-based monitoring stations, ad hoc periodic oceanographic cruises and measurement flights in the lower and upper troposphere as well as in the lower stratosphere. To date, more than 40 ground-based monitoring sites constitute the global network covering many regions where little to no observational data were available before GMOS. This work presents atmospheric Hg concentrations recorded worldwide in the framework of the GMOS project (2010-2015), analyzing Hg measurement results in terms of temporal trends, seasonality and comparability within the network. Major findings highlighted in this paper include a clear gradient of Hg concentrations between the Northern and Southern hemispheres, confirming that the gradient observed is mostly driven by local and regional sources, which can be anthropogenic, natural or a combination of both.",2016-09-01 +27153595,MGEScan: a Galaxy-based system for identifying retrotransposons in genomes.,"

Unlabelled

: MGEScan-long terminal repeat (LTR) and MGEScan-non-LTR are successfully used programs for identifying LTRs and non-LTR retrotransposons in eukaryotic genome sequences. However, these programs are not supported by easy-to-use interfaces nor well suited for data visualization in general data formats. Here, we present MGEScan, a user-friendly system that combines these two programs with a Galaxy workflow system accelerated with MPI and Python threading on compute clusters. MGEScan and Galaxy empower researchers to identify transposable elements in a graphical user interface with ready-to-use workflows. MGEScan also visualizes the custom annotation tracks for mobile genetic elements in public genome browsers. A maximum speed-up of 3.26× is attained for execution time using concurrent processing and MPI on four virtual cores. MGEScan provides four operational modes: as a command line tool, as a Galaxy Toolshed, on a Galaxy-based web server, and on a virtual cluster on the Amazon cloud.

Availability and implementation

MGEScan tutorials and source code are available at http://mgescan.readthedocs.org/

Contact

hatang@indiana.edu or syoh@ajou.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-07 +27915018,Design and surface immobilization of short anti-biofilm peptides.,"Short antimicrobial peptides are essential to keep us healthy and their lasting potency can inspire the design of new types of antibiotics. This study reports the design of a family of eight-residue tryptophan-rich peptides (TetraF2W) obtained by converting the four phenylalanines in temporin-SHf to tryptophans. The temporin-SHf template was identified from the antimicrobial peptide database (http://aps.unmc.edu/AP). Remarkably, the double arginine variant (TetraF2W-RR) was more effective in killing methicillin-resistant Staphylococcus aureus (MRSA) USA300, but less cytotoxic to human skin HaCat and kidney HEK293 cells, than the lysine-containing dibasic combinations (KR, RK and KK). Killing kinetics and fluorescence spectroscopy suggest membrane targeting of TetraF2W-RR, making it more difficult for bacteria to develop resistance. Because established biofilms on medical devices are difficult to remove, we chose to covalently immobilize TetraF2W-RR onto the polyethylene terephthalate (PET) surface to prevent biofilm formation. The successful surface coating of the peptide is supported by FT-IR and XPS spectroscopies, chemical quantification, and antibacterial assays. This peptide-coated surface indeed prevented S. aureus biofilm formation with no cytotoxicity to human cells. In conclusion, TetraF2W-RR is a short Trp-rich peptide with demonstrated antimicrobial and anti-biofilm potency against MRSA in both the free and immobilized forms. Because these short peptides can be synthesized cost effectively, they may be developed into new antimicrobial agents or used as surface coating compounds.

Statement of significance

It is stunning that the total deaths due to methicillin-resistant Staphylococcus aureus (MRSA) infection are comparable to AIDS/HIV-1, making it urgent to explore new possibilities. This study deals with this problem by two strategies. First, we have designed a family of novel antimicrobial peptides with merely eight amino acids, making it cost effective for chemical synthesis. These peptides are potent against MRSA USA300. Our study uncovers that the high potency of the tryptophan-rich short peptide is coupled with arginines, whereas these Trp- and Arg-rich peptides are less toxic to select human cells than the lysine-containing analogs. Such a combination generates a more selective peptide. As a second strategy, we also demonstrate successful covalent immobilization of this short peptide to the polyethylene terephthalate (PET) surface by first using a chitosan linker, which is easy to obtain. Because biofilms on medical devices are difficult to remove by traditional antibiotics, we also show that the peptide coated surface can prevent biofilm formation. Although rarely demonstrated, we provide evidence that both the free and immobilized peptides target bacterial membranes, rendering it difficult for bacteria to develop resistance. Collectively, the significance of our study is the design of novel antimicrobial peptides provides a useful template for developing novel antimicrobials against MRSA. In addition, orientation-specific immobilization of the same short peptide can prevent biofilm formation on the PET surface, which is widely used in making prosthetic heart valves cuffs and other bio devices.",2016-11-30 +27246819,The BioHub Knowledge Base: Ontology and Repository for Sustainable Biosourcing.,"

Background

The motivation for the BioHub project is to create an Integrated Knowledge Management System (IKMS) that will enable chemists to source ingredients from bio-renewables, rather than from non-sustainable sources such as fossil oil and its derivatives.

Method

The BioHubKB is the data repository of the IKMS; it employs Semantic Web technologies, especially OWL, to host data about chemical transformations, bio-renewable feedstocks, co-product streams and their chemical components. Access to this knowledge base is provided to other modules within the IKMS through a set of RESTful web services, driven by SPARQL queries to a Sesame back-end. The BioHubKB re-uses several bio-ontologies and bespoke extensions, primarily for chemical feedstocks and products, to form its knowledge organisation schema.

Results

Parts of plants form feedstocks, while various processes generate co-product streams that contain certain chemicals. Both chemicals and transformations are associated with certain qualities, which the BioHubKB also attempts to capture. Of immediate commercial and industrial importance is to estimate the cost of particular sets of chemical transformations (leading to candidate surfactants) performed in sequence, and these costs too are captured. Data are sourced from companies' internal knowledge and document stores, and from the publicly available literature. Both text analytics and manual curation play their part in populating the ontology. We describe the prototype IKMS, the BioHubKB and the services that it supports for the IKMS.

Availability

The BioHubKB can be found via http://biohub.cs.manchester.ac.uk/ontology/biohub-kb.owl .",2016-06-01 +26954507,CLUSTOM-CLOUD: In-Memory Data Grid-Based Software for Clustering 16S rRNA Sequence Data in the Cloud Environment.,"High-throughput sequencing can produce hundreds of thousands of 16S rRNA sequence reads corresponding to different organisms present in the environmental samples. Typically, analysis of microbial diversity in bioinformatics starts from pre-processing followed by clustering 16S rRNA reads into relatively fewer operational taxonomic units (OTUs). The OTUs are reliable indicators of microbial diversity and greatly accelerate the downstream analysis time. However, existing hierarchical clustering algorithms that are generally more accurate than greedy heuristic algorithms struggle with large sequence datasets. To keep pace with the rapid rise in sequencing data, we present CLUSTOM-CLOUD, which is the first distributed sequence clustering program based on In-Memory Data Grid (IMDG) technology-a distributed data structure to store all data in the main memory of multiple computing nodes. The IMDG technology helps CLUSTOM-CLOUD to enhance both its capability of handling larger datasets and its computational scalability better than its ancestor, CLUSTOM, while maintaining high accuracy. Clustering speed of CLUSTOM-CLOUD was evaluated on published 16S rRNA human microbiome sequence datasets using the small laboratory cluster (10 nodes) and under the Amazon EC2 cloud-computing environments. Under the laboratory environment, it required only ~3 hours to process dataset of size 200 K reads regardless of the complexity of the human microbiome data. In turn, one million reads were processed in approximately 20, 14, and 11 hours when utilizing 20, 30, and 40 nodes on the Amazon EC2 cloud-computing environment. The running time evaluation indicates that CLUSTOM-CLOUD can handle much larger sequence datasets than CLUSTOM and is also a scalable distributed processing system. The comparative accuracy test using 16S rRNA pyrosequences of a mock community shows that CLUSTOM-CLOUD achieves higher accuracy than DOTUR, mothur, ESPRIT-Tree, UCLUST and Swarm. CLUSTOM-CLOUD is written in JAVA and is freely available at http://clustomcloud.kopri.re.kr.",2016-03-08 +26797640,"The Global Prevalence of Infections in Urology Study: A Long-Term, Worldwide Surveillance Study on Urological Infections. ","The Global Prevalence of Infections in Urology (GPIU) study is a worldwide-performed point prevalence study intended to create surveillance data on antibiotic resistance, type of urogenital infections, risk factors and data on antibiotic consumption, specifically in patients at urological departments with healthcare-associated urogenital infections (HAUTI). Investigators registered data through a web-based application (http://gpiu.esiu.org/). Data collection includes the practice and characteristics of the hospital and urology ward. On a certain day in November, each year, all urological patients present in the urological department at 8:00 a.m. are screened for HAUTI encompassing their full hospital course from admission to discharge. Apart from the GPIU main study, several side studies are taking place, dealing with transurethral resection of the prostate, prostate biopsy, as well as urosepsis. The GPIU study has been annually performed since 2003. Eight-hundred fifty-six urology units from 70 countries have participated so far, including 27,542 patients. A proxy for antibiotic consumption is reflected by the application rates used for antibiotic prophylaxis for urological interventions. Resistance rates of most uropathogens against antibiotics were high, especially with a note of multidrug resistance. The severity of HAUTI is also increasing, 25% being urosepsis in recent years.",2016-01-19 +24048470,BioC: a minimalist approach to interoperability for biomedical text processing.,"A vast amount of scientific information is encoded in natural language text, and the quantity of such text has become so great that it is no longer economically feasible to have a human as the first step in the search process. Natural language processing and text mining tools have become essential to facilitate the search for and extraction of information from text. This has led to vigorous research efforts to create useful tools and to create humanly labeled text corpora, which can be used to improve such tools. To encourage combining these efforts into larger, more powerful and more capable systems, a common interchange format to represent, store and exchange the data in a simple manner between different language processing systems and text mining tools is highly desirable. Here we propose a simple extensible mark-up language format to share text documents and annotations. The proposed annotation approach allows a large number of different annotations to be represented including sentences, tokens, parts of speech, named entities such as genes or diseases and relationships between named entities. In addition, we provide simple code to hold this data, read it from and write it back to extensible mark-up language files and perform some sample processing. We also describe completed as well as ongoing work to apply the approach in several directions. Code and data are available at http://bioc.sourceforge.net/. Database URL: http://bioc.sourceforge.net/",2013-09-18 +27716034,VennDiagramWeb: a web application for the generation of highly customizable Venn and Euler diagrams.,"

Background

Visualization of data generated by high-throughput, high-dimensionality experiments is rapidly becoming a rate-limiting step in computational biology. There is an ongoing need to quickly develop high-quality visualizations that can be easily customized or incorporated into automated pipelines. This often requires an interface for manual plot modification, rapid cycles of tweaking visualization parameters, and the generation of graphics code. To facilitate this process for the generation of highly-customizable, high-resolution Venn and Euler diagrams, we introduce VennDiagramWeb: a web application for the widely used VennDiagram R package. VennDiagramWeb is hosted at http://venndiagram.res.oicr.on.ca/ .

Results

VennDiagramWeb allows real-time modification of Venn and Euler diagrams, with parameter setting through a web interface and immediate visualization of results. It allows customization of essentially all aspects of figures, but also supports integration into computational pipelines via download of R code. Users can upload data and download figures in a range of formats, and there is exhaustive support documentation.

Conclusions

VennDiagramWeb allows the easy creation of Venn and Euler diagrams for computational biologists, and indeed many other fields. Its ability to support real-time graphics changes that are linked to downloadable code that can be integrated into automated pipelines will greatly facilitate the improved visualization of complex datasets. For application support please contact Paul.Boutros@oicr.on.ca.",2016-10-03 +28814324,Cancerouspdomains: comprehensive analysis of cancer type-specific recurrent somatic mutations in proteins and domains.,"

Background

Discriminating driver mutations from the ones that play no role in cancer is a severe bottleneck in elucidating molecular mechanisms underlying cancer development. Since protein domains are representatives of functional regions within proteins, mutations on them may disturb the protein functionality. Therefore, studying mutations at domain level may point researchers to more accurate assessment of the functional impact of the mutations.

Results

This article presents a comprehensive study to map mutations from 29 cancer types to both sequence- and structure-based domains. Statistical analysis was performed to identify candidate domains in which mutations occur with high statistical significance. For each cancer type, the corresponding type-specific domains were distinguished among all candidate domains. Subsequently, cancer type-specific domains facilitated the identification of specific proteins for each cancer type. Besides, performing interactome analysis on specific proteins of each cancer type showed high levels of interconnectivity among them, which implies their functional relationship. To evaluate the role of mitochondrial genes, stem cell-specific genes and DNA repair genes in cancer development, their mutation frequency was determined via further analysis.

Conclusions

This study has provided researchers with a publicly available data repository for studying both CATH and Pfam domain regions on protein-coding genes. Moreover, the associations between different groups of genes/domains and various cancer types have been clarified. The work is available at http://www.cancerouspdomains.ir .",2017-08-16 +27307613,A network-driven approach for genome-wide association mapping.,"

Motivation

It remains a challenge to detect associations between genotypes and phenotypes because of insufficient sample sizes and complex underlying mechanisms involved in associations. Fortunately, it is becoming more feasible to obtain gene expression data in addition to genotypes and phenotypes, giving us new opportunities to detect true genotype-phenotype associations while unveiling their association mechanisms.

Results

In this article, we propose a novel method, NETAM, that accurately detects associations between SNPs and phenotypes, as well as gene traits involved in such associations. We take a network-driven approach: NETAM first constructs an association network, where nodes represent SNPs, gene traits or phenotypes, and edges represent the strength of association between two nodes. NETAM assigns a score to each path from an SNP to a phenotype, and then identifies significant paths based on the scores. In our simulation study, we show that NETAM finds significantly more phenotype-associated SNPs than traditional genotype-phenotype association analysis under false positive control, taking advantage of gene expression data. Furthermore, we applied NETAM on late-onset Alzheimer's disease data and identified 477 significant path associations, among which we analyzed paths related to beta-amyloid, estrogen, and nicotine pathways. We also provide hypothetical biological pathways to explain our findings.

Availability and implementation

Software is available at http://www.sailing.cs.cmu.edu/

Contact

: epxing@cs.cmu.edu.",2016-06-01 +27903894,dbSAP: single amino-acid polymorphism database for protein variation detection.,"Millions of human single nucleotide polymorphisms (SNPs) or mutations have been identified so far, and these variants could be strongly correlated with phenotypic variations of traits/diseases. Among these variants, non-synonymous ones can result in amino-acid changes that are called single amino-acid polymorphisms (SAPs). Although some studies have tried to investigate the SAPs, only a small fraction of SAPs have been identified due to inadequately inferred protein variation database and the low coverage of mass spectrometry (MS) experiments. Here, we present the dbSAP database for conveniently accessing the comprehensive information and relationships of spectra, peptides and proteins of SAPs, as well as related genes, pathways, diseases and drug targets. In order to fully explore human SAPs, we built a customized protein database that contained comprehensive variant proteins by integrating and annotating the human SNPs and mutations from eight distinct databases (UniProt, Protein Mutation Database, HPMD, MSIPI, MS-CanProVar, dbSNP, Ensembl and COSMIC). After a series of quality controls, a total of 16 854 SAP peptides involving in 439 537 spectra were identified with large scale MS datasets from various human tissues and cell lines. dbSAP is freely available at http://www.megabionet.org/dbSAP/index.html.",2016-11-29 +27899672,The SWISS-MODEL Repository-new features and functionality.,"SWISS-MODEL Repository (SMR) is a database of annotated 3D protein structure models generated by the automated SWISS-MODEL homology modeling pipeline. It currently holds >400 000 high quality models covering almost 20% of Swiss-Prot/UniProtKB entries. In this manuscript, we provide an update of features and functionalities which have been implemented recently. We address improvements in target coverage, model quality estimates, functional annotations and improved in-page visualization. We also introduce a new update concept which includes regular updates of an expanded set of core organism models and UniProtKB-based targets, complemented by user-driven on-demand update of individual models. With the new release of the modeling pipeline, SMR has implemented a REST-API and adopted an open licencing model for accessing model coordinates, thus enabling bulk download for groups of targets fostering re-use of models in other contexts. SMR can be accessed at https://swissmodel.expasy.org/repository.",2016-11-29 +27899635,InterPro in 2017-beyond protein family and domain annotations.,"InterPro (http://www.ebi.ac.uk/interpro/) is a freely available database used to classify protein sequences into families and to predict the presence of important domains and sites. InterProScan is the underlying software that allows both protein and nucleic acid sequences to be searched against InterPro's predictive models, which are provided by its member databases. Here, we report recent developments with InterPro and its associated software, including the addition of two new databases (SFLD and CDD), and the functionality to include residue-level annotation and prediction of intrinsic disorder. These developments enrich the annotations provided by InterPro, increase the overall number of residues annotated and allow more specific functional inferences.",2016-11-29 +27899620,MRPrimerV: a database of PCR primers for RNA virus detection.,"Many infectious diseases are caused by viral infections, and in particular by RNA viruses such as MERS, Ebola and Zika. To understand viral disease, detection and identification of these viruses are essential. Although PCR is widely used for rapid virus identification due to its low cost and high sensitivity and specificity, very few online database resources have compiled PCR primers for RNA viruses. To effectively detect viruses, the MRPrimerV database (http://MRPrimerV.com) contains 152 380 247 PCR primer pairs for detection of 1818 viruses, covering 7144 coding sequences (CDSs), representing 100% of the RNA viruses in the most up-to-date NCBI RefSeq database. Due to rigorous similarity testing against all human and viral sequences, every primer in MRPrimerV is highly target-specific. Because MRPrimerV ranks CDSs by the penalty scores of their best primer, users need only use the first primer pair for a single-phase PCR or the first two primer pairs for two-phase PCR. Moreover, MRPrimerV provides the list of genome neighbors that can be detected using each primer pair, covering 22 192 variants of 532 RefSeq RNA viruses. We believe that the public availability of MRPrimerV will facilitate viral metagenomics studies aimed at evaluating the variability of viruses, as well as other scientific tasks.",2016-11-29 +27829219,Altered expression of miRNAs and methylation of their promoters are correlated in neuroblastoma.,"Neuroblastoma is the most common human extracranial solid tumor during infancy. Involvement of several miRNAs in its pathogenesis has been ascertained. Interestingly, most of their encoding genes reside in hypermethylated genomic regions: thus, their tumor suppressor function is normally disallowed in these tumors. To date, the therapeutic role of the demethylating agent 5'-Aza-2 deoxycytidine (5'-AZA) and its effects on miRNAome modulation in neuroblastoma have not been satisfactorily explored. Starting from a high-throughput expression profiling of 754 miRNAs and based on a proper selection, we focused on miR-29a-3p, miR-34b-3p, miR-181c-5p and miR-517a-3p as candidate miRNAs for our analysis. They resulted downregulated in four neuroblastoma cell lines with respect to normal adrenal gland. MiRNAs 29a-3p and 34b-3p also resulted downregulated in vivo in a murine neuroblastoma progression model. Unlike the amount of methylation of their encoding gene promoters, all these miRNAs were significantly overexpressed following treatment with 5'-AZA. Transfection with candidate miRNAs mimics significantly decreased neuroblastoma cells proliferation rate. A lower expression of miR-181c was significantly associated to a worse overall survival in a public dataset of 498 neuroblastoma samples (http://r2.amc.nl). Our data strongly suggest that CDK6, DNMT3A, DNMT3B are targets of miR-29a-3p, while CCNE2 and E2F3 are targets of miR-34b-3p. Based on all these data, we propose that miR-29a-3p, miR-34b-3p, miR-181c-5p and miR-517a-3p are disallowed tumor suppressor genes in neuroblastoma and suggest them as new therapeutic targets in neuroblastoma.",2016-12-01 +28293068,A web-based microsatellite database for the Magnaporthe oryzae genome.,"Microsatellites have been widely utilized for molecular marker development. Codominant and multiallelic nature of these simple repeats have several advantages over other types of molecular markers. Their broad applicability in the area of molecular biology like gene mapping, genome characterization, genome evolution, and gene regulation has been reported in various crop plants, animals and fungi. Considering these benefits of the SSR markers, a MMDB (Magnaporthe oryzae Microsatellite Database) was developed to help in understanding about the pathogen and its diversity at strains level of a particular geographic region, which can help us to make a proper utilization of blast resistance genes in the region. This microsatellite database is based on whole genome sequence of two M. oryzae isolates, RML-29 (2665 SSRs from 43037792 bp) and RP-2421 (3169 SSRs from 45510614 bp). Although, first M. oryzae genome (70-15) was sequenced in 2005, but this sequenced isolate is not a true field isolate of M. oryzae. Therefore, MMDB has great potential in the study of diversification and characterization of M. oryzae and other related fungi.

Availability

http://14.139.229.199/home.aspx.",2016-11-29 +27899674,CDD/SPARCLE: functional classification of proteins via subfamily domain architectures.,"NCBI's Conserved Domain Database (CDD) aims at annotating biomolecular sequences with the location of evolutionarily conserved protein domain footprints, and functional sites inferred from such footprints. An archive of pre-computed domain annotation is maintained for proteins tracked by NCBI's Entrez database, and live search services are offered as well. CDD curation staff supplements a comprehensive collection of protein domain and protein family models, which have been imported from external providers, with representations of selected domain families that are curated in-house and organized into hierarchical classifications of functionally distinct families and sub-families. CDD also supports comparative analyses of protein families via conserved domain architectures, and a recent curation effort focuses on providing functional characterizations of distinct subfamily architectures using SPARCLE: Subfamily Protein Architecture Labeling Engine. CDD can be accessed at https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml.",2016-11-29 +24143056,MAGICdb - Mango Genetic stocks Identification and Characterisation database.,"MAGICdb is a unique database that integrates the morphological, fruit quality and the marker data of most popular and widely cultivated commercially important mango cultivars. The main objective of MAGICdb is to provide the end users with an integrated dataset of each mango variety cultivated widely in Tamil Nadu. MAGICdb structure is categorized in to three domains namely Morphological Data Search, Fruit Quality Search and Marker Search which in further contains details on Tree Character, Bearing Habit, Season of fruiting, Number of inflorescence/Sq.m, Percentage of hermaphrodite flower(%), Fruit set percentage(%), Number of fruits/ tree, Fruit weight (g) and, Yield (Kg/ tree). This database is equipped with a user friendly interface enabling the users to retrieve the information with ease. Database is available at http://www.tnaugenomics.com/mango/index.php.",2013-09-23 +21480662,GlycoFly: a database of Drosophila N-linked glycoproteins identified using SPEG--MS techniques.,"Protein glycosylation affects cellular functions of the central nervous system (CNS). Its deficiency leads to neurological disorders such as ataxia, paralysis, learning disability, mental retardation, and memory loss. However, the glycoproteins that are responsible for these diseases are not well characterized. In this study, Drosophila melanogaster was used as a model organism to identify the N-glycosylated proteins and N-glycosylation sites of its CNS by means of proteomics. Adult fly heads were digested with chymotrypsin or trypsin and the N-linked glycopeptides were captured using solid phase extraction of N-linked glycopeptides (SPEG) technique followed by mass spectrometry (MS) analysis using LTQ OrbiTrap Velos. Three hundred and thirty new and 147 previously known glycoproteins were identified from 721 uniquely detected peptides that have 740 NXS/T glycosylation sites. The N-glycosylation sites were highly abundant in cell adhesion, ion channel, and ion binding molecules, which are important for nerve maturation, organ development, axon guidance, learning, and memory. Identification of the N-glycosylated sites of these proteins will enhance our knowledge of these proteins and serve as a basis for future studies to address the roles of these proteins in neurological function and disorders. A database for Drosophila N-linked glycopeptides ( http://betenbaugh.jhu.edu/GlycoFly ) has been established in this study as a resource for study of neurological disorders.",2011-04-25 +27899662,"KEGG: new perspectives on genomes, pathways, diseases and drugs.","KEGG (http://www.kegg.jp/ or http://www.genome.jp/kegg/) is an encyclopedia of genes and genomes. Assigning functional meanings to genes and genomes both at the molecular and higher levels is the primary objective of the KEGG database project. Molecular-level functions are stored in the KO (KEGG Orthology) database, where each KO is defined as a functional ortholog of genes and proteins. Higher-level functions are represented by networks of molecular interactions, reactions and relations in the forms of KEGG pathway maps, BRITE hierarchies and KEGG modules. In the past the KO database was developed for the purpose of defining nodes of molecular networks, but now the content has been expanded and the quality improved irrespective of whether or not the KOs appear in the three molecular network databases. The newly introduced addendum category of the GENES database is a collection of individual proteins whose functions are experimentally characterized and from which an increasing number of KOs are defined. Furthermore, the DISEASE and DRUG databases have been improved by systematic analysis of drug labels for better integration of diseases and drugs with the KEGG molecular networks. KEGG is moving towards becoming a comprehensive knowledge base for both functional interpretation and practical application of genomic information.",2016-11-28 +28855832,Using feature optimization-based support vector machine method to recognize the β-hairpin motifs in enzymes.,"β-Hairpins in enzyme, a kind of special protein with catalytic functions, contain many binding sites which are essential for the functions of enzyme. With the increasing number of observed enzyme protein sequences, it is of especial importance to use bioinformatics techniques to quickly and accurately identify the β-hairpin in enzyme protein for further advanced annotation of structure and function of enzyme. In this work, the proposed method was trained and tested on a non-redundant enzyme β-hairpin database containing 2818 β-hairpins and 1098 non-β-hairpins. With 5-fold cross-validation on the training dataset, the overall accuracy of 90.08% and Matthew's correlation coefficient (Mcc) of 0.74 were obtained, while on the independent test dataset, the overall accuracy of 88.93% and Mcc of 0.76 were achieved. Furthermore, the method was validated on 845 β-hairpins with ligand binding sites. With 5-fold cross-validation on the training dataset and independent test on the test dataset, the overall accuracies were 85.82% (Mcc of 0.71) and 84.78% (Mcc of 0.70), respectively. With an integration of mRMR feature selection and SVM algorithm, a reasonable high accuracy was achieved, indicating the method to be an effective tool for the further studies of β-hairpins in enzymes structure. Additionally, as a novelty for function prediction of enzymes, β-hairpins with ligand binding sites were predicted. Based on this work, a web server was constructed to predict β-hairpin motifs in enzymes (http://202.207.29.251:8080/).",2016-11-28 +27899583,XTalkDB: a database of signaling pathway crosstalk.,"Analysis of signaling pathways and their crosstalk is a cornerstone of systems biology. Thousands of papers have been published on these topics. Surprisingly, there is no database that carefully and explicitly documents crosstalk between specific pairs of signaling pathways. We have developed XTalkDB (http://www.xtalkdb.org) to fill this very important gap. XTalkDB contains curated information for 650 pairs of pathways from over 1600 publications. In addition, the database reports the molecular components (e.g. proteins, hormones, microRNAs) that mediate crosstalk between a pair of pathways and the species and tissue in which the crosstalk was observed. The XTalkDB website provides an easy-to-use interface for scientists to browse crosstalk information by querying one or more pathways or molecules of interest.",2016-11-28 +27994650,Prediction of reacting atoms for the major biotransformation reactions of organic xenobiotics.,"

Background

The knowledge of drug metabolite structures is essential at the early stage of drug discovery to understand the potential liabilities and risks connected with biotransformation. The determination of the site of a molecule at which a particular metabolic reaction occurs could be used as a starting point for metabolite identification. The prediction of the site of metabolism does not always correspond to the particular atom that is modified by the enzyme but rather is often associated with a group of atoms. To overcome this problem, we propose to operate with the term ""reacting atom"", corresponding to a single atom in the substrate that is modified during the biotransformation reaction. The prediction of the reacting atom(s) in a molecule for the major classes of biotransformation reactions is necessary to generate drug metabolites.

Results

Substrates of the major human cytochromes P450 and UDP-glucuronosyltransferases from the Biovia Metabolite database were divided into nine groups according to their reaction classes, which are aliphatic and aromatic hydroxylation, N- and O-glucuronidation, N-, S- and C-oxidation, and N- and O-dealkylation. Each training set consists of positive and negative examples of structures with one labelled atom. In the positive examples, the labelled atom is the reacting atom of a particular reaction that changed adjacency. Negative examples represent non-reacting atoms of a particular reaction. We used Labelled Multilevel Neighbourhoods of Atoms descriptors for the designation of reacting atoms. A Bayesian-like algorithm was applied to estimate the structure-activity relationships. The average invariant accuracy of prediction obtained in leave-one-out and 20-fold cross-validation procedures for five human isoforms of cytochrome P450 and all isoforms of UDP-glucuronosyltransferase varies from 0.86 to 0.99 (0.96 on average).

Conclusions

We report that reacting atoms may be predicted with reasonable accuracy for the major classes of metabolic reactions-aliphatic and aromatic hydroxylation, N- and O-glucuronidation, N-, S- and C-oxidation, and N- and O-dealkylation. The proposed method is implemented as a freely available web service at http://www.way2drug.com/RA and may be used for the prediction of the most probable biotransformation reaction(s) and the appropriate reacting atoms in drug-like compounds.Graphical abstract.",2016-11-28 +26981420,Genome-wide RNA-seq and ChIP-seq reveal Linc-YY1 function in regulating YY1/PRC2 activity during skeletal myogenesis.,"Little is known how lincRNAs are involved in skeletal myogenesis. Here we describe the discovery and functional annotation of Linc-YY1, a novel lincRNA originating from the promoter of the transcription factor (TF) Yin Yang 1 (YY1). Starting from whole transcriptome shotgun sequencing (a.k.a. RNA-seq) data from muscle C2C12 cells, a series of bioinformatics analysis was applied towards the identification of hundreds of high-confidence novel lincRNAs. Genome-wide approaches were then employed to demonstrate that Linc-YY1 functions to promote myogenesis through associating with YY1 and regulating YY1/PRC2 transcriptional activity in trans. Here we describe the details of the ChIP-seq, RNA-seq experiments, and data analysis procedures associated with the study published by Zhou and colleagues in the Nature Communications Journal in 2015 Zhou et al. (2015) [1]. The data was deposited on NCBI's Gene Expression Omnibus (GEO, http://www.ncbi.nlm.nih.gov/geo/) with accession number GSE74049.",2016-02-02 +23920929,Program for validation of aggregated hospital discharge data.,"Hospitals are major providers of health services and analysis of hospital activity data is of great interest for both policy makers and public health researchers. The WHO Regional Office for Europe disseminates the hospital discharge data from European countries through the European Hospital Morbidity Database, available on http://data.euro.who.int/hmdb. In order to ensure that reliable high quality data on hospital activities can be published in a timely manner, a program for validation of hospital discharge data has been developed using the R language for statistical computing. This program has been in use since the October 2012 version of the European Hospital Morbidity Database and its use has contributed to improved quality and comparability of data on hospital activities across Europe.",2013-01-01 +28399981,Efficacy and Safety of SGLT2 Inhibitors in Patients with Type 1 Diabetes: A Meta-analysis of Randomized Controlled Trials.,"Objective To assess the efficiency and safety of a novel sodium-glucose co-transporter 2 (SGLT2) inhibitor-SGLT2 inhibitors, in combination with insulin for type 1 diabetes mellitus (T1DM). Methods We searched Medline, Embase, and the Cochrane Collaboration Library to identify the eligible studies published between January 2010 and July 2016 without restriction of language. The Food and Drug Administration (FDA) data and ClinicalTrials (http://www.clinicaltrials.gov) were also searched. The included studies met the following criteria: randomized controlled trials; T1DM patients aged between 18 and 65 years old; patients were treated with insulin plus SGLT2 inhibitors for more than 2 weeks; patients' glycosylated hemoglobin (HbA1c) levels were between 7% and 12%. The SGLT2 inhibitors group was treated with SGLT2 inhibitors plus insulin, and the placebo group received placebo plus insulin treatment. The outcomes should include one of the following items: fasting blood glucose, HbA1c, glycosuria, or adverse effects. Data were analyzed by two physicians independently. The risk of bias was evaluated by using the Cochrane Collaboration's Risk of Bias tool and heterogeneity among studies was assessed using Chi-square test. Random effect model was used to analyze the treatment effects with Revman 5.3.Results Three trials including 178 patients were enrolled. As compared to the placebo group, SGLT2 inhibitor absolutely decreased fasting blood glucose [mean differences (MD) -2.47 mmol/L, 95% confidence interval (CI) -3.65 to -1.28, P<0.001] and insulin dosage (standardized MD -0.75 U, 95%CI -1.17 to -0.33, P<0.001). SGLT2 inhibitors could also increase the excretion of urine glucose (MD 131.09 g/24 h, 95%CI 91.79 to 170.39, P<0.001). There were no significant differences in the incidences of hyperglycemia [odds ratio (OR) 1.82, 95%CI 0.63 to 5.29, P=0.27], urinary tract infection (OR 0.95, 95%CI 0.19 to 4.85, P=0.95), genital tract infection (OR 0.27, 95%CI 0.01 to 7.19, P=0.43), and diabetic ketoacidosis (OR 6.03, 95%CI 0.27 to 135.99, P=0.26) between the two groups.Conclusion SGLT2 inhibitors combined with insulin might be an efficient and safe treatment modality for T1DM patients.",2017-04-01 +28453673,VEXOR: an integrative environment for prioritization of functional variants in fine-mapping analysis.,"

Motivation

The identification of the functional variants responsible for observed genome-wide association studies (GWAS) signals is one of the most challenging tasks of the post-GWAS research era. Several tools have been developed to annotate genetic variants by their genomic location and potential functional implications. Each of these tools has its own requirements and internal logic, which forces the user to become acquainted with each interface.

Results

From an awareness of the amount of work needed to analyze a single locus, we have built a flexible, versatile and easy-to-use web interface designed to help in prioritizing variants and predicting their potential functional implications. This interface acts as a single-point of entry linking association results with reference tools and relevant experiments.

Availability and implementation

VEXOR is an integrative web application implemented through the Shiny framework and available at: http://romix.genome.ulaval.ca/vexor.

Contact

arnaud.droit@crchuq.ulaval.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +28893723,Scientific Challenges in the Risk Assessment of Food Contact Materials.,"

Background

Food contact articles (FCAs) are manufactured from food contact materials (FCMs) that include plastics, paper, metal, glass, and printing inks. Chemicals can migrate from FCAs into food during storage, processing, and transportation. Food contact materials' safety is evaluated using chemical risk assessment (RA). Several challenges to the RA of FCAs exist.

Objectives

We review regulatory requirements for RA of FCMs in the United States and Europe, identify gaps in RA, and highlight opportunities for improving the protection of public health. We intend to initiate a discussion in the wider scientific community to enhance the safety of food contact articles.

Discussion

Based on our evaluation of the evidence, we conclude that current regulations are insufficient for addressing chemical exposures from FCAs. RA currently focuses on monomers and additives used in the manufacture of products, but it does not cover all substances formed in the production processes. Several factors hamper effective RA for many FCMs, including a lack of information on chemical identity, inadequate assessment of hazardous properties, and missing exposure data. Companies make decisions about the safety of some food contact chemicals (FCCs) without review by public authorities. Some chemical migration limits cannot be enforced because analytical standards are unavailable.

Conclusion

We think that exposures to hazardous substances migrating from FCAs require more attention. We recommend a) limiting the number and types of chemicals authorized for manufacture and b) developing novel approaches for assessing the safety of chemicals in FCAs, including unidentified chemicals that form during or after production. https://doi.org/10.1289/EHP644.",2017-09-11 +25428363,The BioGRID interaction database: 2015 update.,"The Biological General Repository for Interaction Datasets (BioGRID: http://thebiogrid.org) is an open access database that houses genetic and protein interactions curated from the primary biomedical literature for all major model organism species and humans. As of September 2014, the BioGRID contains 749,912 interactions as drawn from 43,149 publications that represent 30 model organisms. This interaction count represents a 50% increase compared to our previous 2013 BioGRID update. BioGRID data are freely distributed through partner model organism databases and meta-databases and are directly downloadable in a variety of formats. In addition to general curation of the published literature for the major model species, BioGRID undertakes themed curation projects in areas of particular relevance for biomedical sciences, such as the ubiquitin-proteasome system and various human disease-associated interaction networks. BioGRID curation is coordinated through an Interaction Management System (IMS) that facilitates the compilation interaction records through structured evidence codes, phenotype ontologies, and gene annotation. The BioGRID architecture has been improved in order to support a broader range of interaction and post-translational modification types, to allow the representation of more complex multi-gene/protein interactions, to account for cellular phenotypes through structured ontologies, to expedite curation through semi-automated text-mining approaches, and to enhance curation quality control.",2014-11-26 +26428292,Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data.,"

Motivation

Detection of random errors and systematic biases is a crucial step of a robust pipeline for processing high-throughput sequencing (HTS) data. Bioinformatics software tools capable of performing this task are available, either for general analysis of HTS data or targeted to a specific sequencing technology. However, most of the existing QC instruments only allow processing of one sample at a time.

Results

Qualimap 2 represents a next step in the QC analysis of HTS data. Along with comprehensive single-sample analysis of alignment data, it includes new modes that allow simultaneous processing and comparison of multiple samples. As with the first version, the new features are available via both graphical and command line interface. Additionally, it includes a large number of improvements proposed by the user community.

Availability and implementation

The implementation of the software along with documentation is freely available at http://www.qualimap.org.

Contact

meyer@mpiib-berlin.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-01 +24130375,Sexually transmitted diseases putative drug target database: a comprehensive database of putative drug targets of pathogens identified by comparative genomics.,"

Objective

Sexually transmitted diseases (STD) are the serious public health problems and also impose a financial burden on the economy. Sexually transmitted infections are cured with single or multiple antibiotics. However, in many cases the organism showed persistence even after treatment. In the current study, the set of druggable targets in STD pathogens have been identified by comparative genomics.

Materials and methods

The subtractive genomics scheme exploits the properties of non-homology, essentiality, membrane localization and metabolic pathway uniqueness in identifying the drug targets. To achieve the effective use of data and to understand properties of drug target under single canopy, an integrated knowledge database of drug targets in STD bacteria was created. Data for each drug targets include biochemical pathway, function, cellular localization, essentiality score and structural details.

Results

The proteome of STD pathogens yielded 44 membrane associated proteins possessing unique metabolic pathways when subjected to the algorithm. The database can be accessed at http://biomedresearchasia.org/index.html.

Conclusion

Diverse data merged in the common framework of this database is expected to be valuable not only for basic studies in clinical bioinformatics, but also for basic studies in immunological, biotechnological and clinical fields.",2013-09-01 +26691694,Dintor: functional annotation of genomic and proteomic data.,"

Background

During the last decade, a great number of extremely valuable large-scale genomics and proteomics datasets have become available to the research community. In addition, dropping costs for conducting high-throughput sequencing experiments and the option to outsource them considerably contribute to an increasing number of researchers becoming active in this field. Even though various computational approaches have been developed to analyze these data, it is still a laborious task involving prudent integration of many heterogeneous and frequently updated data sources, creating a barrier for interested scientists to accomplish their own analysis.

Results

We have implemented Dintor, a data integration framework that provides a set of over 30 tools to assist researchers in the exploration of genomics and proteomics datasets. Each of the tools solves a particular task and several tools can be combined into data processing pipelines. Dintor covers a wide range of frequently required functionalities, from gene identifier conversions and orthology mappings to functional annotation of proteins and genetic variants up to candidate gene prioritization and Gene Ontology-based gene set enrichment analysis. Since the tools operate on constantly changing datasets, we provide a mechanism to unambiguously link tools with different versions of archived datasets, which guarantees reproducible results for future tool invocations. We demonstrate a selection of Dintor's capabilities by analyzing datasets from four representative publications. The open source software can be downloaded and installed on a local Unix machine. For reasons of data privacy it can be configured to retrieve local data only. In addition, the Dintor tools are available on our public Galaxy web service at http://dintor.eurac.edu .

Conclusions

Dintor is a computational annotation framework for the analysis of genomic and proteomic datasets, providing a rich set of tools that cover the most frequently encountered tasks. A major advantage is its capability to consistently handle multiple versions of tool-associated datasets, supporting the researcher in delivering reproducible results.",2015-12-21 +24259432,RefSeq: an update on mammalian reference sequences.,"The National Center for Biotechnology Information (NCBI) Reference Sequence (RefSeq) database is a collection of annotated genomic, transcript and protein sequence records derived from data in public sequence archives and from computation, curation and collaboration (http://www.ncbi.nlm.nih.gov/refseq/). We report here on growth of the mammalian and human subsets, changes to NCBI's eukaryotic annotation pipeline and modifications affecting transcript and protein records. Recent changes to NCBI's eukaryotic genome annotation pipeline provide higher throughput, and the addition of RNAseq data to the pipeline results in a significant expansion of the number of transcripts and novel exons annotated on mammalian RefSeq genomes. Recent annotation changes include reporting supporting evidence for transcript records, modification of exon feature annotation and the addition of a structured report of gene and sequence attributes of biological interest. We also describe a revised protein annotation policy for alternatively spliced transcripts with more divergent predicted proteins and we summarize the current status of the RefSeqGene project.",2013-11-19 +27685097,Resolving macromolecular structures from electron cryo-tomography data using subtomogram averaging in RELION.,"Electron cryo-tomography (cryo-ET) is a technique that is used to produce 3D pictures (tomograms) of complex objects such as asymmetric viruses, cellular organelles or whole cells from a series of tilted electron cryo-microscopy (cryo-EM) images. Averaging of macromolecular complexes found within tomograms is known as subtomogram averaging, and this technique allows structure determination of macromolecular complexes in situ. Subtomogram averaging is also gaining in popularity for the calculation of initial models for single-particle analysis. We describe herein a protocol for subtomogram averaging from cryo-ET data using the RELION software (http://www2.mrc-lmb.cam.ac.uk/relion). RELION was originally developed for cryo-EM single-particle analysis, and the subtomogram averaging approach presented in this protocol has been implemented in the existing workflow for single-particle analysis so that users may conveniently tap into existing capabilities of the RELION software. We describe how to calculate 3D models for the contrast transfer function (CTF) that describe the transfer of information in the imaging process, and we illustrate the results of classification and subtomogram averaging refinement for cryo-ET data of purified hepatitis B capsid particles and Saccharomyces cerevisiae 80S ribosomes. Using the steps described in this protocol, along with the troubleshooting and optimization guidelines, high-resolution maps can be obtained in which secondary structure elements are resolved subtomogram.",2016-09-29 +24122843,HTS-DB: an online resource to publish and query data from functional genomics high-throughput siRNA screening projects.,"High-throughput screening (HTS) uses technologies such as RNA interference to generate loss-of-function phenotypes on a genomic scale. As these technologies become more popular, many research institutes have established core facilities of expertise to deal with the challenges of large-scale HTS experiments. As the efforts of core facility screening projects come to fruition, focus has shifted towards managing the results of these experiments and making them available in a useful format that can be further mined for phenotypic discovery. The HTS-DB database provides a public view of data from screening projects undertaken by the HTS core facility at the CRUK London Research Institute. All projects and screens are described with comprehensive assay protocols, and datasets are provided with complete descriptions of analysis techniques. This format allows users to browse and search data from large-scale studies in an informative and intuitive way. It also provides a repository for additional measurements obtained from screens that were not the focus of the project, such as cell viability, and groups these data so that it can provide a gene-centric summary across several different cell lines and conditions. All datasets from our screens that can be made available can be viewed interactively and mined for further hit lists. We believe that in this format, the database provides researchers with rapid access to results of large-scale experiments that might facilitate their understanding of genes/compounds identified in their own research. DATABASE URL: http://hts.cancerresearchuk.org/db/public.",2013-10-11 +27942460,Transcription profiling data set of different states of Mycoplasma gallisepticum.,"Mycoplasma gallisepticum belongs to class Mollicutes and causes chronic respiratory disease in birds. It has a reduced genome, lack of cell wall and many metabolic pathways, and also easy to culture and non-pathogenic to humans. Aforementioned made it is a convenient model for studying of systems biology of minimal cell. Studying the transcriptomic level of M. gallisepticum is interesting for both understanding of common principles of transcription regulation of minimal cell and response to definite influence for pathogen bacteria. For rapid investigation of gene expression we developed microarray design including 3366 probes for 678 genes. They included 665 protein coding sequences and 13 antisense RNAs from 816 genes and 17 ncRNAs present in Mycoplasma gallisepticum. The study was performed on Agilent one-color microarray with custom design and random-T7 polymerase primer for cDNA synthesis. Here we present the data for transcription profiling of M. gallisepticum under different types of exposures: genetic knock-out mutants, cell culture exposed to sublethal concentrations of antibiotics and well-characterized heat stress effect. Mutants have transposon insertion to hypothetical membrane protein, lactate dehydrogenase, helicase with unknown function, 1-deoxy-d-xylulose 5-phosphate reductoisomerase or potential sigma factor. For inhibition of important cell systems, treatment with carbonyl cyanide m-chlorophenylhydrazone (CCCP), novobiocin or tetracycline were chosen. Data are available via NCBI Gene Expression Omnibus (GEO) with the accession number GSE85777 (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE85777).",2016-11-29 +27408916,"Glyco-centric lectin magnetic bead array (LeMBA) - proteomics dataset of human serum samples from healthy, Barrett׳s esophagus and esophageal adenocarcinoma individuals.","This data article describes serum glycoprotein biomarker discovery and qualification datasets generated using lectin magnetic bead array (LeMBA) - mass spectrometry techniques, ""Serum glycoprotein biomarker discovery and qualification pipeline reveals novel diagnostic biomarker candidates for esophageal adenocarcinoma"" [1]. Serum samples collected from healthy, metaplastic Barrett׳s esophagus (BE) and esophageal adenocarcinoma (EAC) individuals were profiled for glycoprotein subsets via differential lectin binding. The biomarker discovery proteomics dataset consisting of 20 individual lectin pull-downs for 29 serum samples with a spiked-in internal standard chicken ovalbumin protein has been deposited in the PRIDE partner repository of the ProteomeXchange Consortium with the data set identifier PRIDE: PXD002442. Annotated MS/MS spectra for the peptide identifications can be viewed using MS-Viewer (〈http://prospector2.ucsf.edu/prospector/cgi-bin/msform.cgi?form=msviewer〉) using search key ""jn7qafftux"". The qualification dataset contained 6-lectin pulldown-coupled multiple reaction monitoring-mass spectrometry (MRM-MS) data for 41 protein candidates, from 60 serum samples. This dataset is available as a supplemental files with the original publication [1].",2016-04-01 +28807887,An integrative system biology approach to unravel potential drug candidates for multiple age related disorders.,"Aging, though an inevitable part of life, is becoming a worldwide social and economic problem. Healthy aging is usually marked by low probability of age related disorders. Good therapeutic approaches are still in need to cure age related disorders. Occurrence of more than one ARD in an individual, expresses the need of discovery of such target proteins, which can affect multiple ARDs. Advanced scientific and medical research technologies throughout last three decades have arrived to the point where lots of key molecular determinants affect human disorders can be examined thoroughly. In this study, we designed and executed an approach to prioritize drugs that may target multiple age related disorders. Our methodology, focused on the analysis of biological pathways and protein protein interaction networks that may contribute to the pharmacology of age related disorders, included various steps such as retrieval and analysis of data, protein-protein interaction network analysis, and statistical and comparative analysis of topological coefficients, pathway, and functional enrichment analysis, and identification of drug-target proteins. We assume that the identified molecular determinants may be prioritized for further screening as novel drug targets to cure multiple ARDs. Based on the analysis, an online tool named as 'ARDnet' has been developed to construct and demonstrate ARD interactions at the level of PPI, ARDs and ARDs protein interaction, ARDs pathway interaction and drug-target interaction. The tool is freely made available at http://genomeinformatics.dtu.ac.in/ARDNet/Index.html.",2017-08-12 +28661960,Longitudinal study of the influence of lung function on vascular health from adolescence to early adulthood in a British multiethnic cohort.,"

Background

Vascular and lung function develop and decline over the life course; both predict cardiovascular events and mortality but little is known of how they develop over time. We analysed their relationship in a multiethnic cohort study to test whether lung function from early adolescence to young adulthood affected vascular indices.

Methods

'DASH' (http://dash.sphsu.mrc.ac.uk) included 6643 children aged 11-13 years in 2003; a representative 10% sample (n = 665) participated in a pilot follow-up in 2013. Psychosocial, anthropometric, blood pressure (BP), and lung function measures were collected in both surveys; aortic pulse wave velocity (PWV) and augmentation index (AIx) were measured at aged 21-23 years. Relationships between forced expiratory volume Z-scores in 1 s (zFEV1), after global initiative-ethnic adjustments and BP, PWV, and AIx were tested in linear regression and general estimating statistical models.

Results

In total, 488 people with complete data were included. At 11-13 years, SBP was positively associated with zFEV1 (coefficient = 1.90, 95% confidence interval 1.11-2.68, P < 0.001); but not at 21-23 years. The 10-year increase in zFEV1 was associated with rise in SBP (1.38, 0.25-1.51, P < 0.05) in mixed effect models adjusted for age, sex, ethnicity, waist to height ratio, employment, reported racism, smoking, and alcohol use but DBP change was unrelated. In fully adjusted models, neither PWV nor central AIx were associated with zFEV1 at 11-13 years or 21-23 years (P > 0.05).

Conclusion

Forced expiratory volume change is positively and independently associated with SBP change from adolescence to young adulthood, suggesting earlier lung function plays important roles in SBP development. Vascular indices were unrelated to lung function or its change.",2017-11-01 +28035026,InMoDe: tools for learning and visualizing intra-motif dependencies of DNA binding sites.,"

Summary

Recent studies have shown that the traditional position weight matrix model is often insufficient for modeling transcription factor binding sites, as intra-motif dependencies play a significant role for an accurate description of binding motifs. Here, we present the Java application InMoDe, a collection of tools for learning, leveraging and visualizing such dependencies of putative higher order. The distinguishing feature of InMoDe is a robust model selection from a class of parsimonious models, taking into account dependencies only if justified by the data while choosing for simplicity otherwise.

Availability and implementation

InMoDe is implemented in Java and is available as command line application, as application with a graphical user-interface, and as an integration into Galaxy on the project website at http://www.jstacs.de/index.php/InMoDe .

Contact

ralf.eggeling@cs.helsinki.fi.",2017-02-01 +21851431,Comparative deep transcriptional profiling of four developing oilseeds.,"Transcriptome analysis based on deep expressed sequence tag (EST) sequencing allows quantitative comparisons of gene expression across multiple species. Using pyrosequencing, we generated over 7 million ESTs from four stages of developing seeds of Ricinus communis, Brassica napus, Euonymus alatus and Tropaeolum majus, which differ in their storage tissue for oil, their ability to photosynthesize and in the structure and content of their triacylglycerols (TAG). The larger number of ESTs in these 16 datasets provided reliable estimates of the expression of acyltransferases and other enzymes expressed at low levels. Analysis of EST levels from these oilseeds revealed both conserved and distinct species-specific expression patterns for genes involved in the synthesis of glycerolipids and their precursors. Independent of the species and tissue type, ESTs for core fatty acid synthesis enzymes maintained a conserved stoichiometry and a strong correlation in temporal profiles throughout seed development. However, ESTs associated with non-plastid enzymes of oil biosynthesis displayed dissimilar temporal patterns indicative of different regulation. The EST levels for several genes potentially involved in accumulation of unusual TAG structures were distinct. Comparison of expression of members from multi-gene families allowed the identification of specific isoforms with conserved function in oil biosynthesis. In all four oilseeds, ESTs for Rubisco were present, suggesting its possible role in carbon metabolism, irrespective of light availability. Together, these data provide a resource for use in comparative and functional genomics of diverse oilseeds. Expression data for more than 350 genes encoding enzymes and proteins involved in lipid metabolism are available at the 'ARALIP' website (http://aralip.plantbiology.msu.edu/).",2011-10-10 +22022467,HIVsirDB: a database of HIV inhibiting siRNAs.,"

Background

Human immunodeficiency virus (HIV) is responsible for millions of deaths every year. The current treatment involves the use of multiple antiretroviral agents that may harm patients due to their toxic nature. RNA interference (RNAi) is a potent candidate for the future treatment of HIV, uses short interfering RNA (siRNA/shRNA) for silencing HIV genes. In this study, attempts have been made to create a database HIVsirDB of siRNAs responsible for silencing HIV genes.

Descriptions

HIVsirDB is a manually curated database of HIV inhibiting siRNAs that provides comprehensive information about each siRNA or shRNA. Information was collected and compiled from literature and public resources. This database contains around 750 siRNAs that includes 75 partially complementary siRNAs differing by one or more bases with the target sites and over 100 escape mutant sequences. HIVsirDB structure contains sixteen fields including siRNA sequence, HIV strain, targeted genome region, efficacy and conservation of target sequences. In order to facilitate user, many tools have been integrated in this database that includes; i) siRNAmap for mapping siRNAs on target sequence, ii) HIVsirblast for BLAST search against database, iii) siRNAalign for aligning siRNAs.

Conclusion

HIVsirDB is a freely accessible database of siRNAs which can silence or degrade HIV genes. It covers 26 types of HIV strains and 28 cell types. This database will be very useful for developing models for predicting efficacy of HIV inhibiting siRNAs. In summary this is a useful resource for researchers working in the field of siRNA based HIV therapy. HIVsirDB database is accessible at http://crdd.osdd.net/raghava/hivsir/.",2011-10-11 +,Evaluating the Efficacy of Protected Habitat Areas for the California Spotted Owl Using Long-Term Monitoring Data,"The USDA Forest Service has adopted a management strategy for the California spotted owl (Strix occidentalis occidentalis) in the Sierra Nevada that relies on protecting habitat (Protected Activity Centers [PAC]) around suspected owl territory centers. We discuss the history of the PAC concept and evaluate its efficacy by comparing owl core areas of use, derived from usage distributions based on long-term location data of territorial owls, with their associated PACs. The average size of core areas used by spotted owls (334.7 ac; SE = 40.2; N = 29; 95% usage distribution for roost and nest locations) was similar to the average PAC size (287.5 ac; SE = 4.3; N = 29; t = 1.16; P < 0.25; 28 df). The 50 and 90% usage distributions for owl use area were smaller than their corresponding PACs (t = 38.88, P < 0.0001, and 28 df; t = 2.31, P < 0.03, and 28 df, respectively). The spatial overlap between owl core areas of use and PACs was also high. The average proportions of each core area that coincided with a PAC area was 0.84, 0.70, and 0.61 for the 50, 90, and 95% usage distributions, respectively. Moreover, there were more owl locations found inside ( = 36.0; range, 8-76; SE = 2.96) than outside ( = 6.9; range, 0-26; SE = 1.03) of PACs (t = 9.289; P < 0.0001; 68 df). We concluded that PACs, even though derived through an ad hoc but reasoned method, appear to be a key element for conservation of California spotted owls because owls have used these areas over long periods of time (up to 24 years). We also suggest that location data collected during long-term monitoring programs may be useful for identifying core areas for habitat protection not only for spotted owls but also for other species.",2012-09-01 +23603846,PhenoMiner: quantitative phenotype curation at the rat genome database.,"The Rat Genome Database (RGD) is the premier repository of rat genomic and genetic data and currently houses >40 000 rat gene records as well as human and mouse orthologs, >2000 rat and 1900 human quantitative trait loci (QTLs) records and >2900 rat strain records. Biological information curated for these data objects includes disease associations, phenotypes, pathways, molecular functions, biological processes and cellular components. Recently, a project was initiated at RGD to incorporate quantitative phenotype data for rat strains, in addition to the currently existing qualitative phenotype data for rat strains, QTLs and genes. A specialized curation tool was designed to generate manual annotations with up to six different ontologies/vocabularies used simultaneously to describe a single experimental value from the literature. Concurrently, three of those ontologies needed extensive addition of new terms to move the curation forward. The curation interface development, as well as ontology development, was an ongoing process during the early stages of the PhenoMiner curation project. Database URL: http://rgd.mcw.edu.",2013-04-19 +27084948,mtDNA-Server: next-generation sequencing data analysis of human mitochondrial DNA in the cloud.,"Next generation sequencing (NGS) allows investigating mitochondrial DNA (mtDNA) characteristics such as heteroplasmy (i.e. intra-individual sequence variation) to a higher level of detail. While several pipelines for analyzing heteroplasmies exist, issues in usability, accuracy of results and interpreting final data limit their usage. Here we present mtDNA-Server, a scalable web server for the analysis of mtDNA studies of any size with a special focus on usability as well as reliable identification and quantification of heteroplasmic variants. The mtDNA-Server workflow includes parallel read alignment, heteroplasmy detection, artefact or contamination identification, variant annotation as well as several quality control metrics, often neglected in current mtDNA NGS studies. All computational steps are parallelized with Hadoop MapReduce and executed graphically with Cloudgene. We validated the underlying heteroplasmy and contamination detection model by generating four artificial sample mix-ups on two different NGS devices. Our evaluation data shows that mtDNA-Server detects heteroplasmies and artificial recombinations down to the 1% level with perfect specificity and outperforms existing approaches regarding sensitivity. mtDNA-Server is currently able to analyze the 1000G Phase 3 data (n = 2,504) in less than 5 h and is freely accessible at https://mtdna-server.uibk.ac.at.",2016-04-15 +27882706,High frequency of potential interactions between direct-acting antivirals and concomitant therapy in HIV/hepatitis C virus-coinfected patients in clinical practice.,"

Objectives

The aim of the study was to analyse the frequency and degree of potential drug-drug interactions (DDIs) between direct-acting antivirals (DAAs) and concomitant medication used by HIV/hepatitis C virus (HCV)-coinfected patients, including antiretroviral therapy (ART) and other drugs.

Methods

All patients with HIV infection and viraemic HCV genotype 1, 3 or 4 coinfection attending a tertiary care centre in Spain (November 2014 to November 2015) were included in the study. DDIs were classified as major, i.e. drugs should not be co-administered, or minor, i.e. close monitoring, dosage alteration or change in timing may be required if drugs are co-administered, following the http://www.hep-druginteractions.org database recommendations.

Results

A total of 244 patients were included in the study, of whom 224 (92%) were previous injecting drug users. Major DDIs were found for: paritaprevir-r/ombitasvir plus dasabuvir (3D), in 60 (44%) of 138 individuals with genotype 1; paritaprevir-r/ombitasvir (2D), in 22 (37%) of 60 individuals with genotype 4; sofosbuvir/ledipasvir (SOF/LDV), in four (2%) of 198 patients with genotype 1 or 4; simeprevir (SMV) plus SOF, in 160 (81%) of 198 patients with genotype 1 or 4; daclatasvir (DCV) plus SOF, in seven (3%) of 244 patients with genotype 1, 3 or 4 (P < 0.001). Minor DDIs were found for: 3D, in 123 (89%) individuals with genotype 1; 2D, in 52 (87%) individuals with genotype 4; SOF/LDV, in 154 (78%) patients with genotype 1 or 4; SMV plus SOF, in 129 (65%) patients with genotype 1 or 4; DCV plus SOF, in 149 (61%) patients with genotype 1, 3 or 4 (P < 0.001).

Conclusions

Drug-drug interactions between DAAs and ART or other commonly prescribed medications are frequently found among HIV/HCV-coinfected patients. Potential major and minor DDIs are more frequent with 3D, 2D and SMV plus SOF regimens.",2016-11-24 +24408217,"DevMouse, the mouse developmental methylome database and analysis tools.","DNA methylation undergoes dynamic changes during mouse development and plays crucial roles in embryogenesis, cell-lineage determination and genomic imprinting. Bisulfite sequencing enables profiling of mouse developmental methylomes on an unprecedented scale; however, integrating and mining these data are challenges for experimental biologists. Therefore, we developed DevMouse, which focuses on the efficient storage of DNA methylomes in temporal order and quantitative analysis of methylation dynamics during mouse development. The latest release of DevMouse incorporates 32 normalized and temporally ordered methylomes across 15 developmental stages and related genome information. A flexible query engine is developed for acquisition of methylation profiles for genes, microRNAs, long non-coding RNAs and genomic intervals of interest across selected developmental stages. To facilitate in-depth mining of these profiles, DevMouse offers online analysis tools for the quantification of methylation variation, identification of differentially methylated genes, hierarchical clustering, gene function annotation and enrichment. Moreover, a configurable MethyBrowser is provided to view the base-resolution methylomes under a genomic context. In brief, DevMouse hosts comprehensive mouse developmental methylome data and provides online tools to explore the relationships of DNA methylation and development. Database URL: http://www.devmouse.org/",2014-01-09 +28096778,"PineElm_SSRdb: a microsatellite marker database identified from genomic, chloroplast, mitochondrial and EST sequences of pineapple (Ananas comosus (L.) Merrill).","

Background

Simple Sequence Repeats or microsatellites are resourceful molecular genetic markers. There are only few reports of SSR identification and development in pineapple. Complete genome sequence of pineapple available in the public domain can be used to develop numerous novel SSRs. Therefore, an attempt was made to identify SSRs from genomic, chloroplast, mitochondrial and EST sequences of pineapple which will help in deciphering genetic makeup of its germplasm resources.

Results

A total of 359511 SSRs were identified in pineapple (356385 from genome sequence, 45 from chloroplast sequence, 249 in mitochondrial sequence and 2832 from EST sequences). The list of EST-SSR markers and their details are available in the database.

Conclusions

PineElm_SSRdb is an open source database available for non-commercial academic purpose at http://app.bioelm.com/ with a mapping tool which can develop circular maps of selected marker set. This database will be of immense use to breeders, researchers and graduates working on Ananas spp. and to others working on cross-species transferability of markers, investigating diversity, mapping and DNA fingerprinting.",2016-11-24 +27330567,"BioTriangle: a web-accessible platform for generating various molecular representations for chemicals, proteins, DNAs/RNAs and their interactions.","

Background

More and more evidences from network biology indicate that most cellular components exert their functions through interactions with other cellular components, such as proteins, DNAs, RNAs and small molecules. The rapidly increasing amount of publicly available data in biology and chemistry enables researchers to revisit interaction problems by systematic integration and analysis of heterogeneous data. Currently, some tools have been developed to represent these components. However, they have some limitations and only focus on the analysis of either small molecules or proteins or DNAs/RNAs. To the best of our knowledge, there is still a lack of freely-available, easy-to-use and integrated platforms for generating molecular descriptors of DNAs/RNAs, proteins, small molecules and their interactions.

Results

Herein, we developed a comprehensive molecular representation platform, called BioTriangle, to emphasize the integration of cheminformatics and bioinformatics into a molecular informatics platform for computational biology study. It contains a feature-rich toolkit used for the characterization of various biological molecules and complex interaction samples including chemicals, proteins, DNAs/RNAs and even their interactions. By using BioTriangle, users are able to start a full pipelining from getting molecular data, molecular representation to constructing machine learning models conveniently.

Conclusion

BioTriangle provides a user-friendly interface to calculate various features of biological molecules and complex interaction samples conveniently. The computing tasks can be submitted and performed simply in a browser without any sophisticated installation and configuration process. BioTriangle is freely available at http://biotriangle.scbdd.com.Graphical abstractAn overview of BioTriangle. A platform for generating various molecular representations for chemicals, proteins, DNAs/RNAs and their interactions.",2016-06-21 +23671338,Analysis Tool Web Services from the EMBL-EBI.,"Since 2004 the European Bioinformatics Institute (EMBL-EBI) has provided access to a wide range of databases and analysis tools via Web Services interfaces. This comprises services to search across the databases available from the EMBL-EBI and to explore the network of cross-references present in the data (e.g. EB-eye), services to retrieve entry data in various data formats and to access the data in specific fields (e.g. dbfetch), and analysis tool services, for example, sequence similarity search (e.g. FASTA and NCBI BLAST), multiple sequence alignment (e.g. Clustal Omega and MUSCLE), pairwise sequence alignment and protein functional analysis (e.g. InterProScan and Phobius). The REST/SOAP Web Services (http://www.ebi.ac.uk/Tools/webservices/) interfaces to these databases and tools allow their integration into other tools, applications, web sites, pipeline processes and analytical workflows. To get users started using the Web Services, sample clients are provided covering a range of programming languages and popular Web Service tool kits, and a brief guide to Web Services technologies, including a set of tutorials, is available for those wishing to learn more and develop their own clients. Users of the Web Services are informed of improvements and updates via a range of methods.",2013-05-13 +29162595,Predicting Outcome in Patients with Anti-GBM Glomerulonephritis.,"

Background and objectives

Large studies on long-term kidney outcome in patients with anti-glomerular basement membrane (anti-GBM) GN are lacking. This study aimed to identify clinical and histopathologic parameters that predict kidney outcome in these patients.

Design, setting, participants, & measurements

This retrospective analysis included a total of 123 patients with anti-GBM GN between 1986 and 2015 from six centers worldwide. Their kidney biopsy samples were classified according to the histopathologic classification for ANCA-associated GN. Clinical data such as details of treatment were retrieved from clinical records. The primary outcome parameter was the occurrence of ESRD. Kidney survival was analyzed using the log-rank test and Cox regression analyses.

Results

The 5-year kidney survival rate was 34%, with an improved rate observed among patients diagnosed after 2007 (P=0.01). In patients with anti-GBM GN, histopathologic class and kidney survival were associated (P<0.001). Only one of 15 patients with a focal class biopsy sample (≥50% normal glomeruli) developed ESRD. Patients with a sclerotic class biopsy sample (≥50% globally sclerotic glomeruli) and patients with 100% cellular crescents did not recover from dialysis dependency at presentation. In multivariable analysis, dialysis dependency at presentation (hazard ratio [HR], 3.17; 95% confidence interval [95% CI], 1.59 to 6.32), percentage of normal glomeruli (HR, 0.97; 95% CI, 0.95 to 0.99), and extent of interstitial infiltrate (HR, 2.02; 95% CI, 1.17 to 3.50) were predictors of ESRD during follow-up.

Conclusions

Dialysis dependency, low percentage of normal glomeruli, and large extent of interstitial infiltrate are associated with poor kidney outcome in anti-GBM GN. Kidney outcome has improved during recent years; the success rate doubled after 2007.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2017_11_21_CJASNPodcast_18_1_v.mp3.",2017-11-21 +30235992,Social Media Monitoring and Adverse Drug Reaction Reporting in Pharmacovigilance: An Overview of the Regulatory Landscape.,"In the context of the European Union's Innovative Medicines Initiative (IMI) project titled Web-Recognizing Adverse Drug Reactions (WEB-RADR; http://web-radr.eu ), which focuses on the assessment of new data sources and the optimization of the collection of information on suspected adverse reactions in pharmacovigilance, a survey was performed in 182 countries/jurisdictions in 2014 to 2015. The goal was to gather information on existing practices, guidance, and legal requirements on social media monitoring to identify potential safety issues related to medicines. The survey response rate was 100%. The results revealed that 80% of the surveyed countries do not have such necessities despite the fact that 63% of these countries have an established national pharmacovigilance system. Among the countries having an established pharmacovigilance system, only 29% have specific requirements, most countries do have similar provisions as set out in the EU guidelines on Good Pharmacovigilance Practices (GVP). A small subset of countries within the European Economic Area (EEA) have requirements that exceed those stated in GVP, namely, Italy, France, Sweden, and the UK. Outside the EEA, Turkey and the United States have also developed further guidance. The outcome of the survey will inform the development of a future policy framework on the further use of social media as new pharmacovigilance data source in the EEA. In addition, this paper elaborates on some current practical case management issues encountered by companies based on the existing regulatory guidance.",2016-09-27 +22665823,In the clinic. Travel medicine.,"The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians’ Information and Education Resource) and MKSAP (Medical Knowledge and Self- Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP’s Medical Education and Publishing divisions and with the assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult http://pier.acponline.org, http://www.acponline.org/products_services/mksap/15/?pr31, and other resources referenced in each issue of In the Clinic.",2012-06-01 +24932006,Inductive matrix completion for predicting gene-disease associations.,"

Motivation

Most existing methods for predicting causal disease genes rely on specific type of evidence, and are therefore limited in terms of applicability. More often than not, the type of evidence available for diseases varies-for example, we may know linked genes, keywords associated with the disease obtained by mining text, or co-occurrence of disease symptoms in patients. Similarly, the type of evidence available for genes varies-for example, specific microarray probes convey information only for certain sets of genes. In this article, we apply a novel matrix-completion method called Inductive Matrix Completion to the problem of predicting gene-disease associations; it combines multiple types of evidence (features) for diseases and genes to learn latent factors that explain the observed gene-disease associations. We construct features from different biological sources such as microarray expression data and disease-related textual data. A crucial advantage of the method is that it is inductive; it can be applied to diseases not seen at training time, unlike traditional matrix-completion approaches and network-based inference methods that are transductive.

Results

Comparison with state-of-the-art methods on diseases from the Online Mendelian Inheritance in Man (OMIM) database shows that the proposed approach is substantially better-it has close to one-in-four chance of recovering a true association in the top 100 predictions, compared to the recently proposed Catapult method (second best) that has <15% chance. We demonstrate that the inductive method is particularly effective for a query disease with no previously known gene associations, and for predicting novel genes, i.e. genes that are previously not linked to diseases. Thus the method is capable of predicting novel genes even for well-characterized diseases. We also validate the novelty of predictions by evaluating the method on recently reported OMIM associations and on associations recently reported in the literature.

Availability

Source code and datasets can be downloaded from http://bigdata.ices.utexas.edu/project/gene-disease.",2014-06-01 +26656948,The MG-RAST metagenomics database and portal in 2015.,"MG-RAST (http://metagenomics.anl.gov) is an open-submission data portal for processing, analyzing, sharing and disseminating metagenomic datasets. The system currently hosts over 200,000 datasets and is continuously updated. The volume of submissions has increased 4-fold over the past 24 months, now averaging 4 terabasepairs per month. In addition to several new features, we report changes to the analysis workflow and the technologies used to scale the pipeline up to the required throughput levels. To show possible uses for the data from MG-RAST, we present several examples integrating data and analyses from MG-RAST into popular third-party analysis tools or sequence alignment tools.",2015-12-09 +26424852,BacDive--The Bacterial Diversity Metadatabase in 2016.,"BacDive-the Bacterial Diversity Metadatabase (http://bacdive.dsmz.de) provides strain-linked information about bacterial and archaeal biodiversity. The range of data encompasses taxonomy, morphology, physiology, sampling and concomitant environmental conditions as well as molecular biology. The majority of data is manually annotated and curated. Currently (with release 9/2015), BacDive covers 53 978 strains. Newly implemented RESTful web services provide instant access to the content in machine-readable XML and JSON format. Besides an overall increase of data content, BacDive offers new data fields and features, e.g. the search for gene names, plasmids or 16S rRNA in the advanced search, as well as improved linkage of entries to external life science web resources.",2015-09-30 +27893392,Waterloo Exploration Database: New Challenges for Image Quality Assessment Models.,"The great content diversity of real-world digital images poses a grand challenge to image quality assessment (IQA) models, which are traditionally designed and validated on a handful of commonly used IQA databases with very limited content variation. To test the generalization capability and to facilitate the wide usage of IQA techniques in real-world applications, we establish a large-scale database named the Waterloo Exploration Database, which in its current state contains 4744 pristine natural images and 94 880 distorted images created from them. Instead of collecting the mean opinion score for each image via subjective testing, which is extremely difficult if not impossible, we present three alternative test criteria to evaluate the performance of IQA models, namely, the pristine/distorted image discriminability test, the listwise ranking consistency test, and the pairwise preference consistency test (P-test). We compare 20 well-known IQA models using the proposed criteria, which not only provide a stronger test in a more challenging testing environment for existing models, but also demonstrate the additional benefits of using the proposed database. For example, in the P-test, even for the best performing no-reference IQA model, more than 6 million failure cases against the model are ""discovered"" automatically out of over 1 billion test pairs. Furthermore, we discuss how the new database may be exploited using innovative approaches in the future, to reveal the weaknesses of existing IQA models, to provide insights on how to improve the models, and to shed light on how the next-generation IQA models may be developed. The database and codes are made publicly available at: https://ece.uwaterloo.ca/~k29ma/exploration/.",2016-11-22 +25414340,The Pathogen-Host Interactions database (PHI-base): additions and future developments.,"Rapidly evolving pathogens cause a diverse array of diseases and epidemics that threaten crop yield, food security as well as human, animal and ecosystem health. To combat infection greater comparative knowledge is required on the pathogenic process in multiple species. The Pathogen-Host Interactions database (PHI-base) catalogues experimentally verified pathogenicity, virulence and effector genes from bacterial, fungal and protist pathogens. Mutant phenotypes are associated with gene information. The included pathogens infect a wide range of hosts including humans, animals, plants, insects, fish and other fungi. The current version, PHI-base 3.6, available at http://www.phi-base.org, stores information on 2875 genes, 4102 interactions, 110 host species, 160 pathogenic species (103 plant, 3 fungal and 54 animal infecting species) and 181 diseases drawn from 1243 references. Phenotypic and gene function information has been obtained by manual curation of the peer-reviewed literature. A controlled vocabulary consisting of nine high-level phenotype terms permits comparisons and data analysis across the taxonomic space. PHI-base phenotypes were mapped via their associated gene information to reference genomes available in Ensembl Genomes. Virulence genes and hotspots can be visualized directly in genome browsers. Future plans for PHI-base include development of tools facilitating community-led curation and inclusion of the corresponding host target(s).",2014-11-20 +28462029,CRF: detection of CRISPR arrays using random forest.,"CRISPRs (clustered regularly interspaced short palindromic repeats) are particular repeat sequences found in wide range of bacteria and archaea genomes. Several tools are available for detecting CRISPR arrays in the genomes of both domains. Here we developed a new web-based CRISPR detection tool named CRF (CRISPR Finder by Random Forest). Different from other CRISPR detection tools, a random forest classifier was used in CRF to filter out invalid CRISPR arrays from all putative candidates and accordingly enhanced detection accuracy. In CRF, particularly, triplet elements that combine both sequence content and structure information were extracted from CRISPR repeats for classifier training. The classifier achieved high accuracy and sensitivity. Moreover, CRF offers a highly interactive web interface for robust data visualization that is not available among other CRISPR detection tools. After detection, the query sequence, CRISPR array architecture, and the sequences and secondary structures of CRISPR repeats and spacers can be visualized for visual examination and validation. CRF is freely available at http://bioinfolab.miamioh.edu/crf/home.php.",2017-04-25 +24234005,SeqDepot: streamlined database of biological sequences and precomputed features.,"

Unlabelled

Assembling and/or producing integrated knowledge of sequence features continues to be an onerous and redundant task despite a large number of existing resources. We have developed SeqDepot-a novel database that focuses solely on two primary goals: (i) assimilating known primary sequences with predicted feature data and (ii) providing the most simple and straightforward means to procure and readily use this information. Access to >28.5 million sequences and 300 million features is provided through a well-documented and flexible RESTful interface that supports fetching specific data subsets, bulk queries, visualization and searching by MD5 digests or external database identifiers. We have also developed an HTML5/JavaScript web application exemplifying how to interact with SeqDepot and Perl/Python scripts for use with local processing pipelines.

Availability

Freely available on the web at http://seqdepot.net/. RESTaccess via http://seqdepot.net/api/v1. Database files and scripts maybe downloaded from http://seqdepot.net/download.",2013-11-13 +27327084,E-Flux2 and SPOT: Validated Methods for Inferring Intracellular Metabolic Flux Distributions from Transcriptomic Data.,"

Background

Several methods have been developed to predict system-wide and condition-specific intracellular metabolic fluxes by integrating transcriptomic data with genome-scale metabolic models. While powerful in many settings, existing methods have several shortcomings, and it is unclear which method has the best accuracy in general because of limited validation against experimentally measured intracellular fluxes.

Results

We present a general optimization strategy for inferring intracellular metabolic flux distributions from transcriptomic data coupled with genome-scale metabolic reconstructions. It consists of two different template models called DC (determined carbon source model) and AC (all possible carbon sources model) and two different new methods called E-Flux2 (E-Flux method combined with minimization of l2 norm) and SPOT (Simplified Pearson cOrrelation with Transcriptomic data), which can be chosen and combined depending on the availability of knowledge on carbon source or objective function. This enables us to simulate a broad range of experimental conditions. We examined E. coli and S. cerevisiae as representative prokaryotic and eukaryotic microorganisms respectively. The predictive accuracy of our algorithm was validated by calculating the uncentered Pearson correlation between predicted fluxes and measured fluxes. To this end, we compiled 20 experimental conditions (11 in E. coli and 9 in S. cerevisiae), of transcriptome measurements coupled with corresponding central carbon metabolism intracellular flux measurements determined by 13C metabolic flux analysis (13C-MFA), which is the largest dataset assembled to date for the purpose of validating inference methods for predicting intracellular fluxes. In both organisms, our method achieves an average correlation coefficient ranging from 0.59 to 0.87, outperforming a representative sample of competing methods. Easy-to-use implementations of E-Flux2 and SPOT are available as part of the open-source package MOST (http://most.ccib.rutgers.edu/).

Conclusion

Our method represents a significant advance over existing methods for inferring intracellular metabolic flux from transcriptomic data. It not only achieves higher accuracy, but it also combines into a single method a number of other desirable characteristics including applicability to a wide range of experimental conditions, production of a unique solution, fast running time, and the availability of a user-friendly implementation.",2016-06-21 +26657557,Detecting actively translated open reading frames in ribosome profiling data.,"RNA-sequencing protocols can quantify gene expression regulation from transcription to protein synthesis. Ribosome profiling (Ribo-seq) maps the positions of translating ribosomes over the entire transcriptome. We have developed RiboTaper (available at https://ohlerlab.mdc-berlin.de/software/), a rigorous statistical approach that identifies translated regions on the basis of the characteristic three-nucleotide periodicity of Ribo-seq data. We used RiboTaper with deep Ribo-seq data from HEK293 cells to derive an extensive map of translation that covered open reading frame (ORF) annotations for more than 11,000 protein-coding genes. We also found distinct ribosomal signatures for several hundred upstream ORFs and ORFs in annotated noncoding genes (ncORFs). Mass spectrometry data confirmed that RiboTaper achieved excellent coverage of the cellular proteome. Although dozens of novel peptide products were validated in this manner, few of the currently annotated long noncoding RNAs appeared to encode stable polypeptides. RiboTaper is a powerful method for comprehensive de novo identification of actively used ORFs from Ribo-seq data.",2015-12-14 +27871221,Most of the tight positional conservation of transcription factor binding sites near the transcription start site reflects their co-localization within regulatory modules.,"

Background

Transcription factors (TFs) form complexes that bind regulatory modules (RMs) within DNA, to control specific sets of genes. Some transcription factor binding sites (TFBSs) near the transcription start site (TSS) display tight positional preferences relative to the TSS. Furthermore, near the TSS, RMs can co-localize TFBSs with each other and the TSS. The proportion of TFBS positional preferences due to TFBS co-localization within RMs is unknown, however. ChIP experiments confirm co-localization of some TFBSs genome-wide, including near the TSS, but they typically examine only a few TFs at a time, using non-physiological conditions that can vary from lab to lab. In contrast, sequence analysis can examine many TFs uniformly and methodically, broadly surveying the co-localization of TFBSs with tight positional preferences relative to the TSS.

Results

Our statistics found 43 significant sets of human motifs in the JASPAR TF Database with positional preferences relative to the TSS, with 38 preferences tight (±5 bp). Each set of motifs corresponded to a gene group of 135 to 3304 genes, with 42/43 (98%) gene groups independently validated by DAVID, a gene ontology database, with FDR < 0.05. Motifs corresponding to two TFBSs in a RM should co-occur more than by chance alone, enriching the intersection of the gene groups corresponding to the two TFs. Thus, a gene-group intersection systematically enriched beyond chance alone provides evidence that the two TFs participate in an RM. Of the 903 = 43*42/2 intersections of the 43 significant gene groups, we found 768/903 (85%) pairs of gene groups with significantly enriched intersections, with 564/768 (73%) intersections independently validated by DAVID with FDR < 0.05. A user-friendly web site at http://go.usa.gov/3kjsH permits biologists to explore the interaction network of our TFBSs to identify candidate subunit RMs.

Conclusions

Gene duplication and convergent evolution within a genome provide obvious biological mechanisms for replicating an RM near the TSS that binds a particular TF subunit. Of all intersections of our 43 significant gene groups, 85% were significantly enriched, with 73% of the significant enrichments independently validated by gene ontology. The co-localization of TFBSs within RMs therefore likely explains much of the tight TFBS positional preferences near the TSS.",2016-11-21 +28854595,Dormancy and activation of human oocytes from primordial and primary follicles: molecular clues to oocyte regulation.,"

Study question

Do specific transcriptome dynamics in human oocytes from primordial and primary follicles identify novel pathways in oocyte activation?

Summary answer

The transcriptomic profiles in oocytes from primordial and primary follicles, respectively, revealed several new canonical pathways as putative mediators of oocyte dormancy and activation.

What is known already

Cellular signaling pathways including PI3K/AKT and AKT/mTOR as well as TGF-β and IGF signaling are known to regulate the primordial-to-primary transition in mammalian follicle development.

Study design, size, duration

We performed a class comparison study on human oocytes from primordial (n = 436) and primary (n = 182) follicles donated by three women having ovarian tissue cryopreserved before chemotherapy.

Participants/materials, setting, methods

RNA was extracted from oocytes from primordial and primary follicles isolated by Laser Capture Microdissection, and submitted to the HiSeq Illumina platform. Data mapping, quality control, filtering and expression analysis were performed using Tophat (2.0.4), Cufflinks (2.0.2), BWA (0.6.2) and software R. Modeling of complex biological systems was performed using the IPA® software. Finally, qPCR and immunohistochemistry were employed to explore expression and localization of selected genes and products in human ovarian tissue.

Main results and the role of chance

We found 223 and 268 genes down-regulated and up-regulated, respectively, in the oocytes during the human primordial-to-primary follicle transition (P < 0.05 and/or FPKM fold-change >2). IPA® enrichment analysis revealed known pathways ('mTOR Signaling', 'PI3K/AKT Signaling' and 'PTEN Signaling') as well as enriched canonical pathways not previously associated with human ovarian follicle development such as 'ErB Signaling' and 'NGF Signaling' in the down-regulated category and 'Regulation of eIF4 and P70S6K Signaling' and 'HER-2 Signaling in Breast Cancer' in the up-regulated group. Additionally, immunohistochemistry on human ovarian tissue explored the intraovarian localization of VASA, FOXO1 and eIF4E.

Large scale data

http://users-birc.au.dk/biopv/published_data/ernst_2017/.

Limitations, reasons for caution

This is a descriptive analysis and no functional studies were performed. The study was based on a limited number of patients and the experimental design could not take into account the natural biological variance in human samples. Therefore, qPCR was used to confirm selected genes alongside immunohistochemical stainings.

Wider implications of the findings

This study shows, for the first time, a detailed molecular description of global gene transcription activities in oocytes from primordial and primary follicles, respectively. Knowing the global transcription profiles of human oocyte dormancy and activation are important in developing new clinical applications.

Study funding/competing interest(s)

E.H.E. was supported by Health Faculty, Aarhus University and Kong Christian Den Tiendes Fond. K.H. and S.F. were supported by an MRC (UK) project grant MR/M012638/1. K.L.H. was supported by grants from Fonden til Lægevidenskabens Fremme, Kong Christian Den Tiendes Fond. K.L.H. and L.S. were supported by the IDEAS grant from Aarhus University Research Foundation (AUFF). There are no conflicts of interest.",2017-08-01 +23636887,PhenoTips: patient phenotyping software for clinical and research use.,"We have developed PhenoTips: open source software for collecting and analyzing phenotypic information for patients with genetic disorders. Our software combines an easy-to-use interface, compatible with any device that runs a Web browser, with a standardized database back end. The PhenoTips' user interface closely mirrors clinician workflows so as to facilitate the recording of observations made during the patient encounter. Collected data include demographics, medical history, family history, physical and laboratory measurements, physical findings, and additional notes. Phenotypic information is represented using the Human Phenotype Ontology; however, the complexity of the ontology is hidden behind a user interface, which combines simple selection of common phenotypes with error-tolerant, predictive search of the entire ontology. PhenoTips supports accurate diagnosis by analyzing the entered data, then suggesting additional clinical investigations and providing Online Mendelian Inheritance in Man (OMIM) links to likely disorders. By collecting, classifying, and analyzing phenotypic information during the patient encounter, PhenoTips allows for streamlining of clinic workflow, efficient data entry, improved diagnosis, standardization of collected patient phenotypes, and sharing of anonymized patient phenotype data for the study of rare disorders. Our source code and a demo version of PhenoTips are available at http://phenotips.org.",2013-05-24 +27631059,Highlights of the 2011 Drug Abuse Warning Network (DAWN) Findings on Drug-Related Emergency Department Visits,"Background: The Drug Abuse Warning Network (DAWN) provides nationally representative patient demographic and visit-level information on emergency department (ED) visits resulting from substance misuse or abuse, adverse reactions to drugs taken as prescribed or directed, accidental ingestion of drugs, drug-related suicide attempts, and ED admissions for substance abuse treatment. 2011 is the eighth year that the Substance Abuse and Mental Health Services Administration (SAMHSA) has collected data on drug-related ED visits in the U.S. using the new sampling and study design introduced in 2004. Methods: 2011 DAWN data are compared with 2004 and 2009 for visits involving illicit drug use, misuse or abuse of pharmaceuticals, alcohol use, adverse reactions to drugs, and accidental ingestions. All changes over time and between age groups are measured by comparing the rate of ED visits per 100,000 population, not point estimates. Results: In 2011, there were 5.1 million drug-related ED visits; 49% were attributed to drug misuse or abuse with 45% attributed to adverse drug reactions. ED visits involving use of illicit drugs were relatively stable from 2004 (estimated 991,640 visits) to 2009 (974,392 visits) but increased from 2009 to 2011 (1,252,500 visits); between 2009 and 2011, the rate of visits involving illicit stimulants increased 68%, and the rate of visits involving marijuana rose 19%. ED visits involving misuse or abuse of pharmaceuticals increased from 2004 (626,470 visits) through 2011 (1,428,145 visits); the most commonly involved drugs were anti-anxiety and insomnia medications and narcotic pain relievers (160.9 and 134.8 visits per 100,000 population, respectively). ED visits involving adverse reactions to drugs increased from 1,250,377 visits in 2005 to 2,287,271 visits in 2009; however, no increase occurred between 2009 and 2011 (2,301,059 visits). Conclusion: A central finding of the 2011 DAWN is that the involvement of certain commonly abused pharmaceuticals in ED visits associated with drug misuse or abuse did not change from 2009 to 2011. There were no significant increases in the rates of visits involving narcotic pain relievers from 2009 to 2011. Visits involving anti-anxiety or insomnia medications increased a small amount in general, but no specific drugs in this category showed increases. No increases occurred from 2009 to 2011 for ED visits involving adverse reactions to pharmaceuticals overall. Pharmaceuticals continue to be involved at a higher rate than illicit drugs. A second critical finding is that there may be an increase in the involvement of illicit drugs. After 5 years of relative stability, an upward trend was observed between 2009 and 2011. Visits involving marijuana, illicit stimulants, and synthetic cannabinoids increased between 2009 and 2011. Involvement of legal stimulants (e.g., CNS stimulants used to treat attention deficit/hyperactivity disorder) also rose over this period. A more thorough analysis of 2011 and previous data is available at the SAMHSA website (http://www.samhsa.gov/data/), including a comprehensive set of detailed tables that display drug-specific ED visit counts by various patient demographics and visit characteristics for the U.S and select metropolitan areas.",2016-09-16 +25450223,Haemophilus influenzae Genome Database (HIGDB): a single point web resource for Haemophilus influenzae.,"

Background

Haemophilus influenzae (H. Influenzae) is the causative agent of pneumonia, bacteraemia and meningitis. The organism is responsible for large number of deaths in both developed and developing countries. Even-though the first bacterial genome to be sequenced was that of H. Influenzae, there is no exclusive database dedicated for H. Influenzae. This prompted us to develop the Haemophilus influenzae Genome Database (HIGDB).

Methods

All data of HIGDB are stored and managed in MySQL database. The HIGDB is hosted on Solaris server and developed using PERL modules. Ajax and JavaScript are used for the interface development.

Results

The HIGDB contains detailed information on 42,741 proteins, 18,077 genes including 10 whole genome sequences and also 284 three dimensional structures of proteins of H. influenzae. In addition, the database provides ""Motif search"" and ""GBrowse"". The HIGDB is freely accessible through the URL: http://bioserver1.physics.iisc.ernet.in/HIGDB/.

Discussion

The HIGDB will be a single point access for bacteriological, clinical, genomic and proteomic information of H. influenzae. The database can also be used to identify DNA motifs within H. influenzae genomes and to compare gene or protein sequences of a particular strain with other strains of H. influenzae.",2014-10-14 +28137710,Dscam1 web server: online prediction of Dscam1 self- and hetero-affinity.,"

Motivation

Formation of homodimers by identical Dscam1 protein isomers on cell surface is the key factor for the self-avoidance of growing neurites. Dscam1 immense diversity has a critical role in the formation of arthropod neuronal circuit, showing unique evolutionary properties when compared to other cell surface proteins. Experimental measures are available for 89 self-binding and 1722 hetero-binding protein samples, out of more than 19 thousands (self-binding) and 350 millions (hetero-binding) possible isomer combinations.

Results

We developed Dscam1 Web Server to quickly predict Dscam1 self- and hetero- binding affinity for batches of Dscam1 isomers. The server can help the study of Dscam1 affinity and help researchers navigate through the tens of millions of possible isomer combinations to isolate the strong-binding ones.

Availability and implementation

Dscam1 Web Server is freely available at: http://bioinformatics.tecnoparco.org/Dscam1-webserver . Web server code is available at https://gitlab.com/ne1s0n/Dscam1-binding .

Contact

simone.marini@unipv.it or guangzhong.wang@picb.ac.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +28186245,FELLS: fast estimator of latent local structure.,"

Motivation

The behavior of a protein is encoded in its sequence, which can be used to predict distinct features such as secondary structure, intrinsic disorder or amphipathicity. Integrating these and other features can help explain the context-dependent behavior of proteins. However, most tools focus on a single aspect, hampering a holistic understanding of protein structure. Here, we present Fast Estimator of Latent Local Structure (FELLS) to visualize structural features from the protein sequence. FELLS provides disorder, aggregation and low complexity predictions as well as estimated local propensities including amphipathicity. A novel fast estimator of secondary structure (FESS) is also trained to provide a fast response. The calculations required for FELLS are extremely fast and suited for large-scale analysis while providing a detailed analysis of difficult cases.

Availability and implementation

The FELLS web server is available from URL: http://protein.bio.unipd.it/fells/ . The server also exposes RESTful functionality allowing programmatic prediction requests. An executable version of FESS for Linux can be downloaded from URL: protein.bio.unipd.it/download/.

Contact

silvio.tosatto@unipd.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +24952385,ShrimpGPAT: a gene and protein annotation tool for knowledge sharing and gene discovery in shrimp.,"

Background

Although captured and cultivated marine shrimp constitute highly important seafood in terms of both economic value and production quantity, biologists have little knowledge of the shrimp genome and this partly hinders their ability to improve shrimp aquaculture. To help improve this situation, the Shrimp Gene and Protein Annotation Tool (ShrimpGPAT) was conceived as a community-based annotation platform for the acquisition and updating of full-length complementary DNAs (cDNAs), Expressed Sequence Tags (ESTs), transcript contigs and protein sequences of penaeid shrimp and their decapod relatives and for in-silico functional annotation and sequence analysis.

Description

ShrimpGPAT currently holds quality-filtered, molecular sequences of 14 decapod species (~500,000 records for six penaeid shrimp and eight other decapods). The database predominantly comprises transcript sequences derived by both traditional EST Sanger sequencing and more recently by massive-parallel sequencing technologies. The analysis pipeline provides putative functions in terms of sequence homologs, gene ontologies and protein-protein interactions. Data retrieval can be conducted easily either by a keyword text search or by a sequence query via BLAST, and users can save records of interest for later investigation using tools such as multiple sequence alignment and BLAST searches against pre-defined databases. In addition, ShrimpGPAT provides space for community insights by allowing functional annotation with tags and comments on sequences. Community-contributed information will allow for continuous database enrichment, for improvement of functions and for other aspects of sequence analysis.

Conclusions

ShrimpGPAT is a new, free and easily accessed service for the shrimp research community that provides a comprehensive and up-to-date database of quality-filtered decapod gene and protein sequences together with putative functional prediction and sequence analysis tools. An important feature is its community-based functional annotation capability that allows the research community to contribute knowledge and insights about the properties of molecular sequences for better, shared, functional characterization of shrimp genes. Regularly updated and expanded with data on more decapods, ShrimpGPAT is publicly available at http://shrimpgpat.sc.mahidol.ac.th/.",2014-06-21 +24994891,FindPath: a Matlab solution for in silico design of synthetic metabolic pathways.,"

Summary

Several methods and computational tools have been developed to design novel metabolic pathways. A major challenge is evaluating the metabolic efficiency of the designed pathways in the host organism. Here we present FindPath, a unified system to predict and rank possible pathways according to their metabolic efficiency in the cellular system. This tool uses a chemical reaction database to generate possible metabolic pathways and exploits constraint-based models (CBMs) to identify the most efficient synthetic pathway to achieve the desired metabolic function in a given host microorganism. FindPath can be used with common tools for CBM manipulation and uses the standard SBML format for both input and output files.

Availability and implementation

http://metasys.insa-toulouse.fr/software/findpath/.

Contact

heux@insa-toulouse.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-03 +25127889,SPARQLGraph: a web-based platform for graphically querying biological Semantic Web databases.,"

Background

Semantic Web has established itself as a framework for using and sharing data across applications and database boundaries. Here, we present a web-based platform for querying biological Semantic Web databases in a graphical way.

Results

SPARQLGraph offers an intuitive drag & drop query builder, which converts the visual graph into a query and executes it on a public endpoint. The tool integrates several publicly available Semantic Web databases, including the databases of the just recently released EBI RDF platform. Furthermore, it provides several predefined template queries for answering biological questions. Users can easily create and save new query graphs, which can also be shared with other researchers.

Conclusions

This new graphical way of creating queries for biological Semantic Web databases considerably facilitates usability as it removes the requirement of knowing specific query languages and database structures. The system is freely available at http://sparqlgraph.i-med.ac.at.",2014-08-15 +25058394,IthaGenes: an interactive database for haemoglobin variations and epidemiology.,"Inherited haemoglobinopathies are the most common monogenic diseases, with millions of carriers and patients worldwide. At present, we know several hundred disease-causing mutations on the globin gene clusters, in addition to numerous clinically important trans-acting disease modifiers encoded elsewhere and a multitude of polymorphisms with relevance for advanced diagnostic approaches. Moreover, new disease-linked variations are discovered every year that are not included in traditional and often functionally limited locus-specific databases. This paper presents IthaGenes, a new interactive database of haemoglobin variations, which stores information about genes and variations affecting haemoglobin disorders. In addition, IthaGenes organises phenotype, relevant publications and external links, while embedding the NCBI Sequence Viewer for graphical representation of each variation. Finally, IthaGenes is integrated with the companion tool IthaMaps for the display of corresponding epidemiological data on distribution maps. IthaGenes is incorporated in the ITHANET community portal and is free and publicly available at http://www.ithanet.eu/db/ithagenes.",2014-07-24 +28200016,IRaPPA: information retrieval based integration of biophysical models for protein assembly selection.,"

Motivation

In order to function, proteins frequently bind to one another and form 3D assemblies. Knowledge of the atomic details of these structures helps our understanding of how proteins work together, how mutations can lead to disease, and facilitates the designing of drugs which prevent or mimic the interaction.

Results

Atomic modeling of protein-protein interactions requires the selection of near-native structures from a set of docked poses based on their calculable properties. By considering this as an information retrieval problem, we have adapted methods developed for Internet search ranking and electoral voting into IRaPPA, a pipeline integrating biophysical properties. The approach enhances the identification of near-native structures when applied to four docking methods, resulting in a near-native appearing in the top 10 solutions for up to 50% of complexes benchmarked, and up to 70% in the top 100.

Availability and implementation

IRaPPA has been implemented in the SwarmDock server ( http://bmm.crick.ac.uk/∼SwarmDock/ ), pyDock server ( http://life.bsc.es/pid/pydockrescoring/ ) and ZDOCK server ( http://zdock.umassmed.edu/ ), with code available on request.

Contact

moal@ebi.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +28130423,IC-Finder: inferring robustly the hierarchical organization of chromatin folding.,"The spatial organization of the genome plays a crucial role in the regulation of gene expression. Recent experimental techniques like Hi-C have emphasized the segmentation of genomes into interaction compartments that constitute conserved functional domains participating in the maintenance of a proper cell identity. Here, we propose a novel method, IC-Finder, to identify interaction compartments (IC) from experimental Hi-C maps. IC-Finder is based on a hierarchical clustering approach that we adapted to account for the polymeric nature of chromatin. Based on a benchmark of realistic in silico Hi-C maps, we show that IC-Finder is one of the best methods in terms of reliability and is the most efficient numerically. IC-Finder proposes two original options: a probabilistic description of the inferred compartments and the possibility to explore the various hierarchies of chromatin organization. Applying the method to experimental data in fly and human, we show how the predicted segmentation may depend on the normalization scheme and how 3D compartmentalization is tightly associated with epigenomic information. IC-Finder provides a robust and generic 'all-in-one' tool to uncover the general principles of 3D chromatin folding and their influence on gene regulation. The software is available at http://membres-timc.imag.fr/Daniel.Jost/DJ-TIMC/Software.html.",2017-06-01 +27776539,CLIP-GENE: a web service of the condition specific context-laid integrative analysis for gene prioritization in mouse TF knockout experiments.,"

Motivation

Transcriptome data from the gene knockout experiment in mouse is widely used to investigate functions of genes and relationship to phenotypes. When a gene is knocked out, it is important to identify which genes are affected by the knockout gene. Existing methods, including differentially expressed gene (DEG) methods, can be used for the analysis. However, existing methods require cutoff values to select candidate genes, which can produce either too many false positives or false negatives. This hurdle can be addressed either by improving the accuracy of gene selection or by providing a method to rank candidate genes effectively, or both. Prioritization of candidate genes should consider the goals or context of the knockout experiment. As of now, there are no tools designed for both selecting and prioritizing genes from the mouse knockout data. Hence, the necessity of a new tool arises.

Results

In this study, we present CLIP-GENE, a web service that selects gene markers by utilizing differentially expressed genes, mouse transcription factor (TF) network, and single nucleotide variant information. Then, protein-protein interaction network and literature information are utilized to find genes that are relevant to the phenotypic differences. One of the novel features is to allow researchers to specify their contexts or hypotheses in a set of keywords to rank genes according to the contexts that the user specify. We believe that CLIP-GENE will be useful in characterizing functions of TFs in mouse experiments.

Availability

http://epigenomics.snu.ac.kr/CLIP-GENE REVIEWERS: This article was reviewed by Dr. Lee and Dr. Pongor.",2016-10-24 +24849907,Multibiodose radiation emergency triage categorization software.,"In this note, the authors describe the MULTIBIODOSE software, which has been created as part of the MULTIBIODOSE project. The software enables doses estimated by networks of laboratories, using up to five retrospective (biological and physical) assays, to be combined to give a single estimate of triage category for each individual potentially exposed to ionizing radiation in a large scale radiation accident or incident. The MULTIBIODOSE software has been created in Java. The usage of the software is based on the MULTIBIODOSE Guidance: the program creates a link to a single SQLite database for each incident, and the database is administered by the lead laboratory. The software has been tested with Java runtime environment 6 and 7 on a number of different Windows, Mac, and Linux systems, using data from a recent intercomparison exercise. The Java program MULTIBIODOSE_1.0.jar is freely available to download from http://www.multibiodose.eu/software or by contacting the software administrator: MULTIBIODOSE-software@gmx.com.",2014-07-01 +29074515,Euthanasia and physician-assisted suicide not meeting due care criteria in the Netherlands: a qualitative review of review committee judgements.,"ObjectivesTo assess how Dutch regional euthanasia review committees (RTE) apply the euthanasia and physician-assisted suicide (EAS) due care criteria in cases where the criteria are judged not to have been met ('due care not met' (DCNM)) and to evaluate how the criteria function to set limits in Dutch EAS practice. DESIGN:A qualitative review using directed content analysis of DCNM cases in the Netherlands from 2012 to 2016 published on the RTE website (https://www.euthanasiecommissie.nl/) as of 31 January 2017. RESULTS:Of 33 DCNM cases identified (occurring 2012-2016), 32 cases (97%) were published online and included in the analysis. 22 cases (69%) violated only procedural criteria, relating to improper medication administration or inadequate physician consultation. 10 cases (31%) failed to meet substantive criteria, with the most common violation involving the no reasonable alternative (to EAS) criterion (seven cases). Most substantive cases involved controversial elements, such as EAS for psychiatric disorders or 'tired of life', in incapacitated patients or by physicians from advocacy organisations. Even in substantive criteria cases, the RTE's focus was procedural. The cases were more about unorthodox, unprofessional or overconfident physician behaviours and not whether patients should have received EAS. However, in some cases, physicians knowingly pushed the limits of EAS law. Physicians from euthanasia advocacy organisations were over-represented in substantive criteria cases. Trained EAS consultants tended to agree with or facilitate EAS in DCNM cases. Physicians and families had difficulty applying ambiguous advance directives of incapacitated patients. CONCLUSION:As a retrospective review of physician self-reported data, the Dutch RTEs do not focus on whether patients should have received EAS, but instead primarily gauge whether doctors conducted EAS in a thorough, professional manner. To what extent this constitutes enforcement of strict safeguards, especially when cases contain controversial features, is not clear.",2017-10-25 +26743510,Improving microRNA target prediction by modeling with unambiguously identified microRNA-target pairs from CLIP-ligation studies.,"

Motivation

MicroRNAs (miRNAs) are small non-coding RNAs that are extensively involved in many physiological and disease processes. One major challenge in miRNA studies is the identification of genes targeted by miRNAs. Currently, most researchers rely on computational programs to initially identify target candidates for subsequent validation. Although considerable progress has been made in recent years for computational target prediction, there is still significant room for algorithmic improvement.

Results

Here, we present an improved target prediction algorithm, which was developed by modeling high-throughput profiling data from recent CLIPL (crosslinking and immunoprecipitation followed by RNA ligation) sequencing studies. In these CLIPL-seq studies, the RNA sequences in each miRNA-target pair were covalently linked and unambiguously determined experimentally. By analyzing the CLIPL data, many known and novel features relevant to target recognition were identified and then used to build a computational model for target prediction. Comparative analysis showed that the new algorithm had improved performance over existing algorithms when applied to independent experimental data.

Availability and implementation

All the target prediction data as well as the prediction tool can be accessed at miRDB (http://mirdb.org).

Contact

xwang@radonc.wustl.edu.",2016-01-06 +26546515,Legume information system (LegumeInfo.org): a key component of a set of federated data resources for the legume family.,"Legume Information System (LIS), at http://legumeinfo.org, is a genomic data portal (GDP) for the legume family. LIS provides access to genetic and genomic information for major crop and model legumes. With more than two-dozen domesticated legume species, there are numerous specialists working on particular species, and also numerous GDPs for these species. LIS has been redesigned in the last three years both to better integrate data sets across the crop and model legumes, and to better accommodate specialized GDPs that serve particular legume species. To integrate data sets, LIS provides genome and map viewers, holds synteny mappings among all sequenced legume species and provides a set of gene families to allow traversal among orthologous and paralogous sequences across the legumes. To better accommodate other specialized GDPs, LIS uses open-source GMOD components where possible, and advocates use of common data templates, formats, schemas and interfaces so that data collected by one legume research community are accessible across all legume GDPs, through similar interfaces and using common APIs. This federated model for the legumes is managed as part of the 'Legume Federation' project (accessible via http://legumefederation.org), which can be thought of as an umbrella project encompassing LIS and other legume GDPs.",2015-11-05 +21311855,Comparative mapping of the Oregon Wolfe Barley using doubled haploid lines derived from female and male gametes.,"The Oregon Wolfe Barley mapping population is a resource for genetics research and instruction. Prior reports are based on a population of doubled haploid (DH) lines developed by the Hordeum bulbosum (H.b.) method, which samples female gametes. We developed new DH lines from the same cross using anther culture (A.C.), which samples male gametes. Linkage maps were generated in each of the two subpopulations using the same 1,328 single nucleotide polymorphism markers. The linkage maps based on DH lines derived from the products of megasporogeneis and microsporogenesis revealed minor differences in terms of estimated recombination rates. There were no differences in locus ordering. There was greater segregation distortion in the A.C.-derived subpopulation than in the H.b.-derived subpopulation, but in the region showing the greatest distortion, the cause was more likely allelic variation at the ZEO1 plant height locus rather than to DH production method. The effects of segregation distortion and pleiotropy had greater impacts on estimates of quantitative trait locus effect than population size for reproductive fitness traits assayed under greenhouse conditions. The Oregon Wolfe Barley (OWB) population and data are community resources. Seed is available from three distribution centers located in North America, Europe, and Asia. Details on ordering seed sets, as well as complete genotype and phenotype data files, are available at http://wheat.pw.usda.gov/ggpages/maps/OWB/ .",2011-02-11 +25359888,Large-scale binding ligand prediction by improved patch-based method Patch-Surfer2.0.,"

Motivation

Ligand binding is a key aspect of the function of many proteins. Thus, binding ligand prediction provides important insight in understanding the biological function of proteins. Binding ligand prediction is also useful for drug design and examining potential drug side effects.

Results

We present a computational method named Patch-Surfer2.0, which predicts binding ligands for a protein pocket. By representing and comparing pockets at the level of small local surface patches that characterize physicochemical properties of the local regions, the method can identify binding pockets of the same ligand even if they do not share globally similar shapes. Properties of local patches are represented by an efficient mathematical representation, 3D Zernike Descriptor. Patch-Surfer2.0 has significant technical improvements over our previous prototype, which includes a new feature that captures approximate patch position with a geodesic distance histogram. Moreover, we constructed a large comprehensive database of ligand binding pockets that will be searched against by a query. The benchmark shows better performance of Patch-Surfer2.0 over existing methods.

Availability and implementation

http://kiharalab.org/patchsurfer2.0/ CONTACT: dkihara@purdue.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-29 +21943375,"Reptilian-transcriptome v1.0, a glimpse in the brain transcriptome of five divergent Sauropsida lineages and the phylogenetic position of turtles.","

Background

Reptiles are largely under-represented in comparative genomics despite the fact that they are substantially more diverse in many respects than mammals. Given the high divergence of reptiles from classical model species, next-generation sequencing of their transcriptomes is an approach of choice for gene identification and annotation.

Results

Here, we use 454 technology to sequence the brain transcriptome of four divergent reptilian and one reference avian species: the Nile crocodile, the corn snake, the bearded dragon, the red-eared turtle, and the chicken. Using an in-house pipeline for recursive similarity searches of >3,000,000 reads against multiple databases from 7 reference vertebrates, we compile a reptilian comparative transcriptomics dataset, with homology assignment for 20,000 to 31,000 transcripts per species and a cumulated non-redundant sequence length of 248.6 Mbases. Our approach identifies the majority (87%) of chicken brain transcripts and about 50% of de novo assembled reptilian transcripts. In addition to 57,502 microsatellite loci, we identify thousands of SNP and indel polymorphisms for population genetic and linkage analyses. We also build very large multiple alignments for Sauropsida and mammals (two million residues per species) and perform extensive phylogenetic analyses suggesting that turtles are not basal living reptiles but are rather associated with Archosaurians, hence, potentially answering a long-standing question in the phylogeny of Amniotes.

Conclusions

The reptilian transcriptome (freely available at http://www.reptilian-transcriptomes.org) should prove a useful new resource as reptiles are becoming important new models for comparative genomics, ecology, and evolutionary developmental genetics.",2011-09-26 +27706277,The Decline of the Autopsy in Rhode Island and Nationwide: Past Trends and Future Directions.,"The autopsy has long been a fundamental aspect of medical practice and research. However, in the last 50 years, the proportion of deaths for which an autopsy is performed has decreased dramatically. Here we examine some of the reasons for the decline of the autopsy, as well as several interventions that have been proposed to revive it. We also present autopsy utilization data from the Lifespan system, which mirrors nationwide trends. [Full article available at http://rimed.org/rimedicaljournal-2016-10.asp].",2016-10-04 +24948109,BrassicaTED - a public database for utilization of miniature transposable elements in Brassica species.,"

Background

MITE, TRIM and SINEs are miniature form transposable elements (mTEs) that are ubiquitous and dispersed throughout entire plant genomes. Tens of thousands of members cause insertion polymorphism at both the inter- and intra- species level. Therefore, mTEs are valuable targets and resources for development of markers that can be utilized for breeding, genetic diversity and genome evolution studies. Taking advantage of the completely sequenced genomes of Brassica rapa and B. oleracea, characterization of mTEs and building a curated database are prerequisite to extending their utilization for genomics and applied fields in Brassica crops.

Findings

We have developed BrassicaTED as a unique web portal containing detailed characterization information for mTEs of Brassica species. At present, BrassicaTED has datasets for 41 mTE families, including 5894 and 6026 members from 20 MITE families, 1393 and 1639 members from 5 TRIM families, 1270 and 2364 members from 16 SINE families in B. rapa and B. oleracea, respectively. BrassicaTED offers different sections to browse structural and positional characteristics for every mTE family. In addition, we have added data on 289 MITE insertion polymorphisms from a survey of seven Brassica relatives. Genes with internal mTE insertions are shown with detailed gene annotation and microarray-based comparative gene expression data in comparison with their paralogs in the triplicated B. rapa genome. This database also includes a novel tool, K BLAST (Karyotype BLAST), for clear visualization of the locations for each member in the B. rapa and B. oleracea pseudo-genome sequences.

Conclusions

BrassicaTED is a newly developed database of information regarding the characteristics and potential utility of mTEs including MITE, TRIM and SINEs in B. rapa and B. oleracea. The database will promote the development of desirable mTE-based markers, which can be utilized for genomics and breeding in Brassica species. BrassicaTED will be a valuable repository for scientists and breeders, promoting efficient research on Brassica species. BrassicaTED can be accessed at http://im-crop.snu.ac.kr/BrassicaTED/index.php.",2014-06-20 +27736794,Protein Synthesis during Germination: Shedding New Light on a Classical Question.,"Despite over a century of research into the mystery of bacterial spore dormancy and germination, a key question remains unresolved: is protein synthesis required for germination? The development of more sophisticated techniques for assessing and preventing protein synthesis has renewed interest in this long-standing question in recent years. In this issue, Korza et al. (G. Korza, B. Setlow, L. Rao, Q. Li, and P. Setlow, J. Bacteriol 198:3254-3264, 2016, http://dx.doi.org/10.1128/JB.00583-16) address this with a novel approach. We discuss their results in the context of recently published data.",2016-11-18 +21791102,Applications of the pipeline environment for visual informatics and genomics computations.,"

Background

Contemporary informatics and genomics research require efficient, flexible and robust management of large heterogeneous data, advanced computational tools, powerful visualization, reliable hardware infrastructure, interoperability of computational resources, and detailed data and analysis-protocol provenance. The Pipeline is a client-server distributed computational environment that facilitates the visual graphical construction, execution, monitoring, validation and dissemination of advanced data analysis protocols.

Results

This paper reports on the applications of the LONI Pipeline environment to address two informatics challenges - graphical management of diverse genomics tools, and the interoperability of informatics software. Specifically, this manuscript presents the concrete details of deploying general informatics suites and individual software tools to new hardware infrastructures, the design, validation and execution of new visual analysis protocols via the Pipeline graphical interface, and integration of diverse informatics tools via the Pipeline eXtensible Markup Language syntax. We demonstrate each of these processes using several established informatics packages (e.g., miBLAST, EMBOSS, mrFAST, GWASS, MAQ, SAMtools, Bowtie) for basic local sequence alignment and search, molecular biology data analysis, and genome-wide association studies. These examples demonstrate the power of the Pipeline graphical workflow environment to enable integration of bioinformatics resources which provide a well-defined syntax for dynamic specification of the input/output parameters and the run-time execution controls.

Conclusions

The LONI Pipeline environment http://pipeline.loni.ucla.edu provides a flexible graphical infrastructure for efficient biomedical computing and distributed informatics research. The interactive Pipeline resource manager enables the utilization and interoperability of diverse types of informatics resources. The Pipeline client-server model provides computational power to a broad spectrum of informatics investigators--experienced developers and novice users, user with or without access to advanced computational-resources (e.g., Grid, data), as well as basic and translational scientists. The open development, validation and dissemination of computational networks (pipeline workflows) facilitates the sharing of knowledge, tools, protocols and best practices, and enables the unbiased validation and replication of scientific findings by the entire community.",2011-07-26 +28350216,AtSLP2 is an intronless protein phosphatase that co-expresses with intronless mitochondrial pentatricopeptide repeat (PPR) and tetratricopeptide (TPR) protein encoding genes.,"Shewanella-like PPP family phosphatases (SLPs) are a unique lineage of eukaryote PPP-family phosphatases of bacterial origin which are not found in metazoans. 1,2 Their absence in metazoans is marked by their ancient bacterial origins and presence in plants. 1 Recently, we found that the SLP2 phosphatase ortholog of Arabidopsis thaliana localized to the mitochondrial intermembrane space (IMS) where it was determined to be activated by mitochondrial intermembrane space protein 40 (MIA40) to regulate seed germination. 3 Through examination of atslp2 knockout (accelerated germination) and 35S::AtSLP2 over-expressing (delayed germination) plants it was found that AtSLP2 influences Arabidopsis thaliana germination rates via gibberellic acid (GA) biosynthesis. 3 However, the exact mechanism by which this occurs remains unresolved. To identify potential partners of AtSLP2 in regulating germination through GA, we undertook a gene co-expression network analysis using RNA-sequencing data available through Genevestigator ( https://genevestigator.com/gv/ ).",2017-04-01 +28031184,SVScore: an impact prediction tool for structural variation.,"

Summary

Here we present SVScore, a tool for in silico structural variation (SV) impact prediction. SVScore aggregates per-base single nucleotide polymorphism (SNP) pathogenicity scores across relevant genomic intervals for each SV in a manner that considers variant type, gene features and positional uncertainty. We show that the allele frequency spectrum of high-scoring SVs is strongly skewed toward lower frequencies, suggesting that they are under purifying selection, and that SVScore identifies deleterious variants more effectively than alternative methods. Notably, our results also suggest that duplications are under surprisingly strong selection relative to deletions, and that there are a similar number of strongly pathogenic SVs and SNPs in the human population.

Availability and implementation

SVScore is implemented in Perl and available freely at {{ http://www.github.com/lganel/SVScore }} for use under the MIT license.

Contact

ihall@wustl.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +26568625,Using de novo protein structure predictions to measure the quality of very large multiple sequence alignments.,"

Motivation

Multiple sequence alignments (MSAs) with large numbers of sequences are now commonplace. However, current multiple alignment benchmarks are ill-suited for testing these types of alignments, as test cases either contain a very small number of sequences or are based purely on simulation rather than empirical data.

Results

We take advantage of recent developments in protein structure prediction methods to create a benchmark (ContTest) for protein MSAs containing many thousands of sequences in each test case and which is based on empirical biological data. We rank popular MSA methods using this benchmark and verify a recent result showing that chained guide trees increase the accuracy of progressive alignment packages on datasets with thousands of proteins.

Availability and implementation

Benchmark data and scripts are available for download at http://www.bioinf.ucd.ie/download/ContTest.tar.gz

Contact

des.higgins@ucd.ie

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-14 +28625475,Prediction of protein-protein interactions by label propagation with protein evolutionary and chemical information derived from heterogeneous network.,"Prediction of protein-protein interactions (PPIs) is of great significance. To achieve this, we propose a novel computational method for PPIs prediction based on a similarity network fusion (SNF) model for integrating the physical and chemical properties of proteins. Specifically, the physical and chemical properties of protein are the protein amino acid mutation rate and its hydrophobicity, respectively. The amino acid mutation rate is extracted using a BLOSUM62 matrix, which puts the protein sequence into block substitution matrix. The SNF model is exploited to fuse protein physical and chemical features of multiple data by iteratively updating each original network. Finally, the complementary features from the fused network are fed into a label propagation algorithm (LPA) for PPIs prediction. The experimental results show that the proposed method achieves promising performance and outperforms the traditional methods for the public dataset of H. pylori, Human, and Yeast. In addition, our proposed method achieves average accuracy of 76.65%, 81.98%, 84.56%, 84.01% and 84.38% on E. coli, C. elegans, H. sapien, H. pylori and M. musculus datasets, respectively. Comparison results demonstrate that the proposed method is very promising and provides a cost-effective alternative for predicting PPIs. The source code and all datasets are available at http://pan.baidu.com/s/1dF7rp7N.",2017-06-16 +27026615,PheKB: a catalog and workflow for creating electronic phenotype algorithms for transportability.,"

Objective

Health care generated data have become an important source for clinical and genomic research. Often, investigators create and iteratively refine phenotype algorithms to achieve high positive predictive values (PPVs) or sensitivity, thereby identifying valid cases and controls. These algorithms achieve the greatest utility when validated and shared by multiple health care systems.Materials and Methods We report the current status and impact of the Phenotype KnowledgeBase (PheKB, http://phekb.org), an online environment supporting the workflow of building, sharing, and validating electronic phenotype algorithms. We analyze the most frequent components used in algorithms and their performance at authoring institutions and secondary implementation sites.

Results

As of June 2015, PheKB contained 30 finalized phenotype algorithms and 62 algorithms in development spanning a range of traits and diseases. Phenotypes have had over 3500 unique views in a 6-month period and have been reused by other institutions. International Classification of Disease codes were the most frequently used component, followed by medications and natural language processing. Among algorithms with published performance data, the median PPV was nearly identical when evaluated at the authoring institutions (n = 44; case 96.0%, control 100%) compared to implementation sites (n = 40; case 97.5%, control 100%).

Discussion

These results demonstrate that a broad range of algorithms to mine electronic health record data from different health systems can be developed with high PPV, and algorithms developed at one site are generally transportable to others.

Conclusion

By providing a central repository, PheKB enables improved development, transportability, and validity of algorithms for research-grade phenotypes using health care generated data.",2016-03-28 +28865494,Implementation and validation of the extended Hill-type muscle model with robust routing capabilities in LS-DYNA for active human body models.,"

Background

In the state of the art finite element AHBMs for car crash analysis in the LS-DYNA software material named *MAT_MUSCLE (*MAT_156) is used for active muscles modeling. It has three elements in parallel configuration, which has several major drawbacks: restraint approximation of the physical reality, complicated parameterization and absence of the integrated activation dynamics. This study presents implementation of the extended four element Hill-type muscle model with serial damping and eccentric force-velocity relation including [Formula: see text] dependent activation dynamics and internal method for physiological muscle routing.

Results

Proposed model was implemented into the general-purpose finite element (FE) simulation software LSDYNA as a user material for truss elements. This material model is verified and validated with three different sets of mammalian experimental data, taken from the literature. It is compared to the *MAT_MUSCLE (*MAT_156) Hill-type muscle model already existing in LS-DYNA, which is currently used in finite element human body models (HBMs). An application example with an arm model extracted from the FE ViVA OpenHBM is given, taking into account physiological muscle paths.

Conclusion

The simulation results show better material model accuracy, calculation robustness and improved muscle routing capability compared to *MAT_156. The FORTRAN source code for the user material subroutine dyn21.f and the muscle parameters for all simulations, conducted in the study, are given at https://zenodo.org/record/826209 under an open source license. This enables a quick application of the proposed material model in LS-DYNA, especially in active human body models (AHBMs) for applications in automotive safety.",2017-09-02 +29244000,Identification of recent cases of hepatitis C virus infection using physical-chemical properties of hypervariable region 1 and a radial basis function neural network classifier.,"

Background

Identification of acute or recent hepatitis C virus (HCV) infections is important for detecting outbreaks and devising timely public health interventions for interruption of transmission. Epidemiological investigations and chemistry-based laboratory tests are 2 main approaches that are available for identification of acute HCV infection. However, owing to complexity, both approaches are not efficient. Here, we describe a new sequence alignment-free method to discriminate between recent (R) and chronic (C) HCV infection using next-generation sequencing (NGS) data derived from the HCV hypervariable region 1 (HVR1).

Results

Using dinucleotide auto correlation (DAC), we identified physical-chemical (PhyChem) features of HVR1 variants. Significant (p < 9.58 × 10-4) differences in the means and frequency distributions of PhyChem features were found between HVR1 variants sampled from patients with recent vs chronic (R/C) infection. Moreover, the R-associated variants were found to occupy distinct and discrete PhyChem spaces. A radial basis function neural network classifier trained on the PhyChem features of intra-host HVR1 variants accurately classified R/C-HVR1 variants (classification accuracy (CA) = 94.85%; area under the ROC curve, AUROC = 0.979), in 10-fold cross-validation). The classifier was accurate in assigning individual HVR1 variants to R/C-classes in the testing set (CA = 84.15%; AUROC = 0.912) and in detection of infection duration (R/C-class) in patients (CA = 88.45%). Statistical tests and evaluation of the classifier on randomly-labeled datasets indicate that classifiers' CA is robust (p < 0.001) and unlikely due to random correlations (CA = 59.04% and AUROC = 0.50).

Conclusions

The PhyChem features of intra-host HVR1 variants are strongly associated with the duration of HCV infection. Application of the PhyChem biomarkers to models for detection of the R/C-state of HCV infection in patients offers a new opportunity for detection of outbreaks and for molecular surveillance. The method will be available at https://webappx.cdc.gov/GHOST/ to the authenticated users of Global Hepatitis Outbreak and Surveillance Technology (GHOST) for further testing and validation.",2017-12-06 +25348399,HAMAP in 2015: updates to the protein family classification and annotation system.,"HAMAP (High-quality Automated and Manual Annotation of Proteins--available at http://hamap.expasy.org/) is a system for the automatic classification and annotation of protein sequences. HAMAP provides annotation of the same quality and detail as UniProtKB/Swiss-Prot, using manually curated profiles for protein sequence family classification and expert curated rules for functional annotation of family members. HAMAP data and tools are made available through our website and as part of the UniRule pipeline of UniProt, providing annotation for millions of unreviewed sequences of UniProtKB/TrEMBL. Here we report on the growth of HAMAP and updates to the HAMAP system since our last report in the NAR Database Issue of 2013. We continue to augment HAMAP with new family profiles and annotation rules as new protein families are characterized and annotated in UniProtKB/Swiss-Prot; the latest version of HAMAP (as of 3 September 2014) contains 1983 family classification profiles and 1998 annotation rules (up from 1780 and 1720). We demonstrate how the complex logic of HAMAP rules allows for precise annotation of individual functional variants within large homologous protein families. We also describe improvements to our web-based tool HAMAP-Scan which simplify the classification and annotation of sequences, and the incorporation of an improved sequence-profile search algorithm.",2014-10-27 +26794641,A brain imaging repository of normal structural MRI across the life course: Brain Images of Normal Subjects (BRAINS).,"The Brain Images of Normal Subjects (BRAINS) Imagebank (http://www.brainsimagebank.ac.uk) is an integrated repository project hosted by the University of Edinburgh and sponsored by the Scottish Imaging Network: A Platform for Scientific Excellence (SINAPSE) collaborators. BRAINS provide sharing and archiving of detailed normal human brain imaging and relevant phenotypic data already collected in studies of healthy volunteers across the life-course. It particularly focusses on the extremes of age (currently older age, and in future perinatal) where variability is largest, and which are under-represented in existing databanks. BRAINS is a living imagebank where new data will be added when available. Currently BRAINS contains data from 808 healthy volunteers, from 15 to 81years of age, from 7 projects in 3 centres. Additional completed and ongoing studies of normal individuals from 1st to 10th decades are in preparation and will be included as they become available. BRAINS holds several MRI structural sequences, including T1, T2, T2* and fluid attenuated inversion recovery (FLAIR), available in DICOM (http://dicom.nema.org/); in future Diffusion Tensor Imaging (DTI) will be added where available. Images are linked to a wide range of 'textual data', such as age, medical history, physiological measures (e.g. blood pressure), medication use, cognitive ability, and perinatal information for pre/post-natal subjects. The imagebank can be searched to include or exclude ranges of these variables to create better estimates of 'what is normal' at different ages.",2016-01-18 +25161236,Effects of small particle numbers on long-term behaviour in discrete biochemical systems.,"

Motivation

The functioning of many biological processes depends on the appearance of only a small number of a single molecular species. Additionally, the observation of molecular crowding leads to the insight that even a high number of copies of species do not guarantee their interaction. How single particles contribute to stabilizing biological systems is not well understood yet. Hence, we aim at determining the influence of single molecules on the long-term behaviour of biological systems, i.e. whether they can reach a steady state.

Results

We provide theoretical considerations and a tool to analyse Systems Biology Markup Language models for the possibility to stabilize because of the described effects. The theory is an extension of chemical organization theory, which we called discrete chemical organization theory. Furthermore we scanned the BioModels Database for the occurrence of discrete chemical organizations. To exemplify our method, we describe an application to the Template model of the mitotic spindle assembly checkpoint mechanism.

Availability and implementation

http://www.biosys.uni-jena.de/Services.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +28916602,Validation Study of Image-Based Fractional Flow Reserve During Coronary Angiography. ,"Fractional flow reserve (FFR), an index of the hemodynamic severity of coronary stenoses, is derived from invasive measurements and requires a pressure-monitoring guidewire and hyperemic stimulus. Angiography-derived FFR measurements (FFRangio) may have several advantages. The aim of this study is to assess the diagnostic performance and interobserver reproducibility of FFRangio in patients with stable coronary artery disease. FFRangio is a computational method based on rapid flow analysis for the assessment of FFR. FFRangio uses the patient's hemodynamic data and routine angiograms to generate a complete 3-dimensional coronary tree with color-coded FFR values at any epicardial location. Hyperemic flow ratio is derived from an automatic resistance-based lumped model of the entire coronary tree. A total of 203 lesions were analyzed in 184 patients from 4 centers. Values derived using FFRangio ranged from 0.5 to 0.97 (median 0.85) and correlated closely (Spearman ρ=0.90; P<0.001) with the invasive FFR measurements, which ranged from 0.5 to 1 (median 0.84). In Bland-Altman analyses, the 95% limits of agreement between these methods ranged from -0.096 to 0.112. Using an FFR cutoff value of 0.80, the sensitivity, specificity, and diagnostic accuracy of FFRangio were 88%, 95%, and 93%, respectively. The intraclass coefficient between 2 blinded operators was 0.962 with a 95% confidence interval from 0.950 to 0.971, P<0.001. There is a high concordance between FFRangio and invasive FFR. The color-coded display of FFR values during coronary angiography facilitates the integration of physiology and anatomy for decision making on revascularization in patients with stable coronary artery disease. URL: https://www.clinicaltrials.gov. Unique identifier: NCT03005028.",2017-09-01 +29216867,Protein kinase C α enhances migration of breast cancer cells through FOXC2-mediated repression of p120-catenin.,"

Background

Despite recent advances in the diagnosis and treatment of breast cancer, metastasis remains the main cause of death. Since migration of tumor cells is considered a prerequisite for tumor cell invasion and metastasis, a pressing goal in tumor biology has been to elucidate factors regulating their migratory activity. Protein kinase C alpha (PKCα) is a serine-threonine protein kinase implicated in cancer metastasis and associated with poor prognosis in breast cancer patients. In this study, we set out to define the signaling axis mediated by PKCα to promote breast cancer cell migration.

Methods

Oncomine™ overexpression analysis was used to probe for PRKCA (PKCα) and FOXC2 expression in mRNA datasets. The heat map of PRKCA, FOXC2, and CTNND1 were obtained from the UC Santa Cruz platform. Survival data were obtained by PROGgene and available at http://www.compbio.iupui.edu/proggene . Markers for EMT and adherens junction were assessed by Western blotting and quantitative polymerase chain reaction. Effects of PKCα and FOXC2 on migration and invasion were assessed in vitro by transwell migration and invasion assays respectively. Cellular localization of E-cadherin and p120-catenin was determined by immunofluorescent staining. Promoter activity of p120-catenin was determined by dual luciferase assay using a previously validated p120-catenin reporter construct. Interaction between FOXC2 and p120-catenin promoter was verified by chromatin immunoprecipitation assay.

Results

We determined that PKCα expression is necessary to maintain the migratory and invasive phenotype of both endocrine resistant and triple negative breast cancer cell lines. FOXC2 acts as a transcriptional repressor downstream of PKCα, and represses p120-catenin expression. Consequently, loss of p120-catenin leads to destabilization of E-cadherin at the adherens junction. Inhibition of either PKCα or FOXC2 is sufficient to rescue p120-catenin expression and trigger relocalization of p120-catenin and E-cadherin to the cell membrane, resulting in reduced tumor cell migration and invasion.

Conclusions

Taken together, these results suggest that breast cancer metastasis may partially be controlled through PKCα/FOXC2-dependent repression of p120-catenin and highlight the potential for PKCα signal transduction networks to be targeted for the treatment of endocrine resistant and triple negative breast cancer.",2017-12-07 +27639358,Diffusion kurtosis metrics as biomarkers of microstructural development: A comparative study of a group of children and a group of adults.,"The most common modality of diffusion MRI used in the ageing and development studies is diffusion tensor imaging (DTI) providing two key measures, fractional anisotropy and mean diffusivity. Here, we investigated diffusional changes occurring between childhood (average age 10.3 years) and mitddle adult age (average age 54.3 years) with the help of diffusion kurtosis imaging (DKI), a recent novel extension of DTI that provides additional metrics quantifying non-Gaussianity of water diffusion in brain tissue. We performed voxelwise statistical between-group comparison of diffusion tensor and kurtosis tensor metrics using two methods, namely, the tract-based spatial statistics (TBSS) and the atlas-based regional data analysis. For the latter, fractional anisotropy, mean diffusivity, mean diffusion kurtosis, and other scalar diffusion tensor and kurtosis tensor parameters were evaluated for white matter fibres provided by the Johns-Hopkins-University Atlas in the FSL toolkit (http://fsl.fmrib.ox.ac.uk/fsl/fslwiki/Atlases). Within the same age group, all evaluated parameters varied depending on the anatomical region. TBSS analysis showed that changes in kurtosis tensor parameters beyond adolescence are more widespread along the skeleton in comparison to the changes of the diffusion tensor metrics. The regional data analysis demonstrated considerably larger between-group changes of the diffusion kurtosis metrics than of diffusion tensor metrics in all investigated regions. The effect size of the parametric changes between childhood and middle adulthood was quantified using Cohen's d. We used Cohen's d related to mean diffusion kurtosis to examine heterogeneous maturation of various fibres. The largest changes of this parameter (interpreted as reflecting the lowest level of maturation by the age of children group) were observed in the association fibres, cingulum (gyrus) and cingulum (hippocampus) followed by superior longitudinal fasciculus and inferior longitudinal fasciculus. The smallest changes were observed in the commissural fibres, forceps major and forceps minor. In conclusion, our data suggest that DKI is sensitive to developmental changes in local microstructure and environment, and is particularly powerful to unravel developmental differences in major association fibres, such as the cingulum and superior longitudinal fasciculus.",2016-09-14 +28970140,MicroRNA-675 promotes glioma cell proliferation and motility by negatively regulating retinoblastoma 1.,"Previous studies indicated that microRNA (miR)-675 and its precursor lncRNA H19 were both overexpressed in glioma tissues, and H19 might play an oncogenic role. To investigate the involvement of miR-675 in gliomas and its underlying mechanisms, we here collected candidate target genes of miR-675-5p from miRTarBase (http://mirtarbase.mbc.nctu.edu.tw/, Release 6.0), which contains the experimentally validated microRNA-target interactions. Then, regulatory effects of miR-675 on its target genes were validated using clinical samples and glioma cell lines. Involvement of the miR-675-target axis deregulation in cell proliferation, migration and invasion of glioma was demonstrated by both gain- and loss-of-function experiments. As a result, retinoblastoma 1 (RB1) was identified as a candidate target gene of miR-675-5p. Expression levels of miR-675-5p in glioma tissues and cells were negatively correlated with RB1 expression at both mRNA and protein levels. Importantly, deregulation of the miR-675-5p-RB1 axis was significantly associated with advanced World Health Organization (WHO) grade and low Karnofsky performance score (KPS) score of glioma patients. Luciferase reporter assay verified that RB1 was a direct target gene of miR-675 in glioma cells. Functionally, miR-675 promoted glioma cell proliferation, migration and invasion. Notably, simulation of RB1 antagonized the effects induced by miR-675 up-regulation in glioma cells. In conclusion, our data suggest that miR-675 may be a key negative regulator of RB1 and the imbalance of the miR-675-RB1 axis may be clinically associated with aggressive progression of glioma patients. In addition, miR-675 may act as an oncogenic miRNA in glioma cells via regulating its target gene RB1.",2017-09-29 +24132929,gCMAP: user-friendly connectivity mapping with R.,"

Unlabelled

Connections between disease phenotypes and drug effects can be made by identifying commonalities in the associated patterns of differential gene expression. Searchable databases that record the impacts of chemical or genetic perturbations on the transcriptome--here referred to as 'connectivity maps'--permit discovery of such commonalities. We describe two R packages, gCMAP and gCMAPWeb, which provide a complete framework to construct and query connectivity maps assembled from user-defined collections of differential gene expression data. Microarray or RNAseq data are processed in a standardized way, and results can be interrogated using various well-established gene set enrichment methods. The packages also feature an easy-to-deploy web application that facilitates reproducible research through automatic generation of graphical and tabular reports.

Availability and implementation

The gCMAP and gCMAPWeb R packages are freely available for UNIX, Windows and Mac OS X operating systems at Bioconductor (http://www.bioconductor.org).",2013-10-15 +25464141,Identification of antimicrobial resistance genes in multidrug-resistant clinical Bacteroides fragilis isolates by whole genome shotgun sequencing.,"Bacteroides fragilis constitutes the most frequent anaerobic bacterium causing bacteremia in humans. The genetic background for antimicrobial resistance in B. fragilis is diverse with some genes requiring insertion sequence (IS) elements inserted upstream for increased expression. To evaluate whole genome shotgun sequencing as a method for predicting antimicrobial resistance properties, one meropenem resistant and five multidrug-resistant blood culture isolates were sequenced and antimicrobial resistance genes and IS elements identified using ResFinder 2.1 (http://cge.cbs.dtu.dk/services/ResFinder/) and a custom BLAST database. Combinations of cfxA, cepA, cfiA, nimA, nimD, nimE, nimJ, tetQ, ermB, ermF, bexB, linAn2 and mefEn2 genes were identified in the six isolates. blaOXA-347, an open reading frame predicted to be a β-lactamase (Cheng et al., 2012), was identified in one strain. Full length IS elements were identified directly upstream of four genes, but in most cases contigs terminated 100-150 bases upstream of the gene in question. Even though partial IS elements were identified in these short sequences, certain identification could not be ascertained. Full antiobiograms for B. fragilis from genetic data will most likely require complete or nearly complete genomes. Current approaches to this are laborious and/or costly. Emerging technologies such as nanopore based single DNA strand sensing could perhaps provide a solution in the future.",2014-11-11 +27634098,Climate and Health Co-Benefits in Low-Income Countries: A Case Study of Carbon Financed Water Filters in Kenya and a Call for Independent Monitoring.,"

Background

The recent global climate agreement in Paris aims to mitigate greenhouse gas emissions while fostering sustainable development and establishes an international trading mechanism to meet this goal. Currently, carbon offset program implementers are allowed to collect their own monitoring data to determine the number of carbon credits to be awarded.

Objectives

We summarize reasons for mandating independent monitoring of greenhouse gas emission reduction projects. In support of our policy recommendations, we describe a case study of a program designed to earn carbon credits by distributing almost one million drinking water filters in rural Kenya to avert the use of fuel for boiling water. We compare results from an assessment conducted by our research team in the program area among households with pregnant women or caregivers in rural villages with low piped water access with the reported program monitoring data and discuss the implications.

Discussion

Our assessment in Kenya found lower levels of household water filter usage than the internal program monitoring reported estimates used to determine carbon credits; we found 19% (n = 4,041) of households reported filter usage 2-3 years after filter distribution compared to the program stated usage rate of 81% (n = 14,988) 2.7 years after filter distribution. Although carbon financing could be a financially sustainable approach to scale up water treatment and improve health in low-income settings, these results suggest program effectiveness will remain uncertain in the absence of requiring monitoring data be collected by third-party organizations.

Conclusion

Independent monitoring should be a key requirement for carbon credit verification in future international carbon trading mechanisms to ensure programs achieve benefits in line with sustainable development goals. Citation: Pickering AJ, Arnold BF, Dentz HN, Colford JM Jr., Null C. 2017. Climate and health co-benefits in low-income countries: a case study of carbon financed water filters in Kenya and a call for independent monitoring. Environ Health Perspect 125:278-283; http://dx.doi.org/10.1289/EHP342.",2016-09-16 +27849411,Systematic Reviews Published in the July 2016 Issue of the Cochrane Library.,"The Cochrane Library of Systematic Reviews is published quarterly as a DVD and monthly online ( http://www.thecochranelibrary.com ). The July 2016 issue (third DVD for 2016) contains 6963 complete reviews, 2457 protocols for reviews in production. In addition, there are citations of 945,000 randomized controlled trials, and 15,700 cited papers in the Cochrane Methodology Register. The Health Technology Assessment database contains some 16,000 citations. One hundred and twenty-one new reviews have been published in the previous three months, of which four have potential relevance for practitioners in pain and palliative medicine. The impact factor of the Cochrane Library stands at 6.1. Readers are encouraged to access the full report for any articles of interest, as only a brief commentary is provided.",2016-11-16 +27240020,Web-based ecosystem software for virtual crossmatching in transplant programs.,"

Background

The compatibilities between donors and recipients are extremely important for evaluating the immunological risks of transplants. One challenge faced by data analysis tools is the transformation of complex data into simple, intuitive, and important information that can be used to resolve contemporary problems. To address this challenge, we developed the EpViX software to perform epitope reactivity analyses and automated epitope virtual crossmatching. EpViX is a facilitator of medical decision-making regarding the identification of the best donor for a high-immunologic risk recipient. The objective of this work is to describe the computational architecture of the EpViX ecosystem (http://www.epvix.com.br).

Materials and methods

EpViX is a freeware on the web that was developed in the Ruby language. EpViX can be accessed from different platforms, e.g., PCs, tablets, and smartphones. It consists of an ecosystem of tools that are capable of integrating all of the stakeholders who are involved in a transplant process with a deceased donor.

Results

We successfully developed a program that allows people to work collaboratively and effectively during the donation process by accurately predicting negative crossmatches, saving time and other resources.

Conclusions

EpViX represents a significant breakthrough for the organ transplant process and may meet the current needs of transplant programs because it increases the chances of the allocation of low-immunologic risk donors to highly sensitized recipients and assures greater equity among the recipients on a waiting list. EpViX was duly verified and tested in terms of data security. Moreover, usability tests demonstrated that EpViX is an intuitive and easy-to-use tool.",2016-05-19 +28472273,EBT: a statistic test identifying moderate size of significant features with balanced power and precision for genome-wide rate comparisons.,"

Motivation

In genome-wide rate comparison studies, there is a big challenge for effective identification of an appropriate number of significant features objectively, since traditional statistical comparisons without multi-testing correction can generate a large number of false positives while multi-testing correction tremendously decreases the statistic power.

Results

In this study, we proposed a new exact test based on the translation of rate comparison to two binomial distributions. With modeling and real datasets, the exact binomial test (EBT) showed an advantage in balancing the statistical precision and power, by providing an appropriate size of significant features for further studies. Both correlation analysis and bootstrapping tests demonstrated that EBT is as robust as the typical rate-comparison methods, e.g. χ 2 test, Fisher's exact test and Binomial test. Performance comparison among machine learning models with features identified by different statistical tests further demonstrated the advantage of EBT. The new test was also applied to analyze the genome-wide somatic gene mutation rate difference between lung adenocarcinoma (LUAD) and lung squamous cell carcinoma (LUSC), two main lung cancer subtypes and a list of new markers were identified that could be lineage-specifically associated with carcinogenesis of LUAD and LUSC, respectively. Interestingly, three cilia genes were found selectively with high mutation rates in LUSC, possibly implying the importance of cilia dysfunction in the carcinogenesis.

Availability and implementation

An R package implementing EBT could be downloaded from the website freely: http://www.szu-bioinf.org/EBT .

Contact

wangyj@szu.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +29058576,LCS-TA to identify similar fragments in RNA 3D structures.,"

Background

In modern structural bioinformatics, comparison of molecular structures aimed to identify and assess similarities and differences between them is one of the most commonly performed procedures. It gives the basis for evaluation of in silico predicted models. It constitutes the preliminary step in searching for structural motifs. In particular, it supports tracing the molecular evolution. Faced with an ever-increasing amount of available structural data, researchers need a range of methods enabling comparative analysis of the structures from either global or local perspective.

Results

Herein, we present a new, superposition-independent method which processes pairs of RNA 3D structures to identify their local similarities. The similarity is considered in the context of structure bending and bonds' rotation which are described by torsion angles. In the analyzed RNA structures, the method finds the longest continuous segments that show similar torsion within a user-defined threshold. The length of the segment is provided as local similarity measure. The method has been implemented as LCS-TA algorithm (Longest Continuous Segments in Torsion Angle space) and is incorporated into our MCQ4Structures application, freely available for download from http://www.cs.put.poznan.pl/tzok/mcq/ .

Conclusions

The presented approach ties torsion-angle-based method of structure analysis with the idea of local similarity identification by handling continuous 3D structure segments. The first method, implemented in MCQ4Structures, has been successfully utilized in RNA-Puzzles initiative. The second one, originally applied in Euclidean space, is a component of LGA (Local-Global Alignment) algorithm commonly used in assessing protein models submitted to CASP. This unique combination of concepts implemented in LCS-TA provides a new perspective on structure quality assessment in local and quantitative aspect. A series of computational experiments show the first results of applying our method to comparison of RNA 3D models. LCS-TA can be used for identifying strengths and weaknesses in the prediction of RNA tertiary structures.",2017-10-23 +28472524,I-TASSER-MR: automated molecular replacement for distant-homology proteins using iterative fragment assembly and progressive sequence truncation.,"Molecular replacement (MR) is one of the most common techniques used for solving the phase problem in X-ray crystal diffraction. The success rate of MR however drops quickly when the sequence identity between query and templates is reduced, while the I-TASSER-MR server is designed to solve the phase problem for proteins that lack close homologous templates. Starting from a sequence, it first generates full-length models using I-TASSER by iterative structural fragment reassembly. A progressive sequence truncation procedure is then used for editing the models based on local variations of the structural assembly simulations. Next, the edited models are submitted to MR-REX to search for optimal placements in the crystal unit-cells through replica-exchange Monte Carlo simulations, with the phasing results used by CNS for final atomic model refinement and selection. The I-TASSER-MR algorithm was tested in large-scale benchmark datasets and solved 36% more targets compared to using the best threading templates. The server takes primary sequence and raw crystal diffraction data as input, with output containing annotated phase information and refined structure models. It also allows users to choose between different methods for setting B-factors and the number of models used for phasing. The online server is freely available at http://zhanglab.ccmb.med.umich.edu/I-TASSER-MR.",2017-07-01 +28334186,RNAscClust: clustering RNA sequences using structure conservation and graph based motifs.,"

Motivation

Clustering RNA sequences with common secondary structure is an essential step towards studying RNA function. Whereas structural RNA alignment strategies typically identify common structure for orthologous structured RNAs, clustering seeks to group paralogous RNAs based on structural similarities. However, existing approaches for clustering paralogous RNAs, do not take the compensatory base pair changes obtained from structure conservation in orthologous sequences into account.

Results

Here, we present RNAscClust , the implementation of a new algorithm to cluster a set of structured RNAs taking their respective structural conservation into account. For a set of multiple structural alignments of RNA sequences, each containing a paralog sequence included in a structural alignment of its orthologs, RNAscClust computes minimum free-energy structures for each sequence using conserved base pairs as prior information for the folding. The paralogs are then clustered using a graph kernel-based strategy, which identifies common structural features. We show that the clustering accuracy clearly benefits from an increasing degree of compensatory base pair changes in the alignments.

Availability and implementation

RNAscClust is available at http://www.bioinf.uni-freiburg.de/Software/RNAscClust .

Contact

gorodkin@rth.dk or backofen@informatik.uni-freiburg.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +28018423,A Meta-Analysis Based Method for Prioritizing Candidate Genes Involved in a Pre-specific Function.,"The identification of genes associated with a given biological function in plants remains a challenge, although network-based gene prioritization algorithms have been developed for Arabidopsis thaliana and many non-model plant species. Nevertheless, these network-based gene prioritization algorithms have encountered several problems; one in particular is that of unsatisfactory prediction accuracy due to limited network coverage, varying link quality, and/or uncertain network connectivity. Thus, a model that integrates complementary biological data may be expected to increase the prediction accuracy of gene prioritization. Toward this goal, we developed a novel gene prioritization method named RafSee, to rank candidate genes using a random forest algorithm that integrates sequence, evolutionary, and epigenetic features of plants. Subsequently, we proposed an integrative approach named RAP (Rank Aggregation-based data fusion for gene Prioritization), in which an order statistics-based meta-analysis was used to aggregate the rank of the network-based gene prioritization method and RafSee, for accurately prioritizing candidate genes involved in a pre-specific biological function. Finally, we showcased the utility of RAP by prioritizing 380 flowering-time genes in Arabidopsis. The ""leave-one-out"" cross-validation experiment showed that RafSee could work as a complement to a current state-of-art network-based gene prioritization system (AraNet v2). Moreover, RAP ranked 53.68% (204/380) flowering-time genes higher than AraNet v2, resulting in an 39.46% improvement in term of the first quartile rank. Further evaluations also showed that RAP was effective in prioritizing genes-related to different abiotic stresses. To enhance the usability of RAP for Arabidopsis and non-model plant species, an R package implementing the method is freely available at http://bioinfo.nwafu.edu.cn/software.",2016-12-15 +28334194,swga: a primer design toolkit for selective whole genome amplification.,"

Motivation

Population genomic analyses are often hindered by difficulties in obtaining sufficient numbers of genomes for analysis by DNA sequencing. Selective whole-genome amplification (SWGA) provides an efficient approach to amplify microbial genomes from complex backgrounds for sequence acquisition. However, the process of designing sets of primers for this method has many degrees of freedom and would benefit from an automated process to evaluate the vast number of potential primer sets.

Results

Here, we present swga , a program that identifies primer sets for SWGA and evaluates them for efficiency and selectivity. We used swga to design and test primer sets for the selective amplification of Wolbachia pipientis genomic DNA from infected Drosophila melanogaster and Mycobacterium tuberculosis from human blood. We identify primer sets that successfully amplify each against their backgrounds and describe a general method for using swga for arbitrary targets. In addition, we describe characteristics of primer sets that correlate with successful amplification, and present guidelines for implementation of SWGA to detect new targets.

Availability and implementation

Source code and documentation are freely available on https://www.github.com/eclarke/swga . The program is implemented in Python and C and licensed under the GNU Public License.

Contact

ecl@mail.med.upenn.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +28582569,GPCR-SSFE 2.0-a fragment-based molecular modeling web tool for Class A G-protein coupled receptors.,"G-protein coupled receptors (GPCRs) are key players in signal transduction and therefore a large proportion of pharmaceutical drugs target these receptors. Structural data of GPCRs are sparse yet important for elucidating the molecular basis of GPCR-related diseases and for performing structure-based drug design. To ameliorate this problem, GPCR-SSFE 2.0 (http://www.ssfa-7tmr.de/ssfe2/), an intuitive web server dedicated to providing three-dimensional Class A GPCR homology models has been developed. The updated web server includes 27 inactive template structures and incorporates various new functionalities. Uniquely, it uses a fingerprint correlation scoring strategy for identifying the optimal templates, which we demonstrate captures structural features that sequence similarity alone is unable to do. Template selection is carried out separately for each helix, allowing both single-template models and fragment-based models to be built. Additionally, GPCR-SSFE 2.0 stores a comprehensive set of pre-calculated and downloadable homology models and also incorporates interactive loop modeling using the tool SL2, allowing knowledge-based input by the user to guide the selection process. For visual analysis, the NGL viewer is embedded into the result pages. Finally, blind-testing using two recently published structures shows that GPCR-SSFE 2.0 performs comparably or better than other state-of-the art GPCR modeling web servers.",2017-07-01 +26833341,Integrative analysis for identifying joint modular patterns of gene-expression and drug-response data.,"

Motivation

The underlying relationship between genomic factors and the response of diverse cancer drugs still remains unclear. A number of studies showed that the heterogeneous responses to anticancer treatments of patients were partly associated with their specific changes in gene expression and somatic alterations. The emerging large-scale pharmacogenomic data provide us valuable opportunities to improve existing therapies or to guide early-phase clinical trials of compounds under development. However, how to identify the underlying combinatorial patterns among pharmacogenomics data are still a challenging issue.

Results

In this study, we adopted a sparse network-regularized partial least square (SNPLS) method to identify joint modular patterns using large-scale pairwise gene-expression and drug-response data. We incorporated a molecular network to the (sparse) partial least square model to improve the module accuracy via a network-based penalty. We first demonstrated the effectiveness of SNPLS using a set of simulation data and compared it with two typical methods. Further, we applied it to gene expression profiles for 13 321 genes and pharmacological profiles for 98 anticancer drugs across 641 cancer cell lines consisting of diverse types of human cancers. We identified 20 gene-drug co-modules, each of which consists of 30 cell lines, 137 genes and 2 drugs on average. The majority of identified co-modules have significantly functional implications and coordinated gene-drug associations. The modular analysis here provided us new insights into the molecular mechanisms of how drugs act and suggested new drug targets for therapy of certain types of cancers.

Availability and implementation

A matlab package of SNPLS is available at http://page.amss.ac.cn/shihua.zhang/

Contact

: zsh@amss.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-01 +27330136,Comprehensive analysis of high-throughput screens with HiTSeekR.,"High-throughput screening (HTS) is an indispensable tool for drug (target) discovery that currently lacks user-friendly software tools for the robust identification of putative hits from HTS experiments and for the interpretation of these findings in the context of systems biology. We developed HiTSeekR as a one-stop solution for chemical compound screens, siRNA knock-down and CRISPR/Cas9 knock-out screens, as well as microRNA inhibitor and -mimics screens. We chose three use cases that demonstrate the potential of HiTSeekR to fully exploit HTS screening data in quite heterogeneous contexts to generate novel hypotheses for follow-up experiments: (i) a genome-wide RNAi screen to uncover modulators of TNFα, (ii) a combined siRNA and miRNA mimics screen on vorinostat resistance and (iii) a small compound screen on KRAS synthetic lethality. HiTSeekR is publicly available at http://hitseekr.compbio.sdu.dk It is the first approach to close the gap between raw data processing, network enrichment and wet lab target generation for various HTS screen types.",2016-06-21 +24587080,A data-driven algorithm integrating clinical and laboratory features for the diagnosis and prognosis of necrotizing enterocolitis.,"

Background

Necrotizing enterocolitis (NEC) is a major source of neonatal morbidity and mortality. Since there is no specific diagnostic test or risk of progression model available for NEC, the diagnosis and outcome prediction of NEC is made on clinical grounds. The objective in this study was to develop and validate new NEC scoring systems for automated staging and prognostic forecasting.

Study design

A six-center consortium of university based pediatric teaching hospitals prospectively collected data on infants under suspicion of having NEC over a 7-year period. A database comprised of 520 infants was utilized to develop the NEC diagnostic and prognostic models by dividing the entire dataset into training and testing cohorts of demographically matched subjects. Developed on the training cohort and validated on the blind testing cohort, our multivariate analyses led to NEC scoring metrics integrating clinical data.

Results

Machine learning using clinical and laboratory results at the time of clinical presentation led to two nec models: (1) an automated diagnostic classification scheme; (2) a dynamic prognostic method for risk-stratifying patients into low, intermediate and high NEC scores to determine the risk for disease progression. We submit that dynamic risk stratification of infants with NEC will assist clinicians in determining the need for additional diagnostic testing and guide potential therapies in a dynamic manner.

Algorithm availability

http://translationalmedicine.stanford.edu/cgi-bin/NEC/index.pl and smartphone application upon request.",2014-02-28 +27157157,Japanese Community Pharmacists' Dispensing Influences Medicine Price Reduction more than Prescription Numbers.,"This study examined the economic efficiency of the separation of prescription and dispensation medicines between doctors in medical institutions and pharmacists in pharmacies. The separation system in Japanese prefectures was examined with publicly available data (Ministry of Health, Labour and Welfare, 2012-2014; retrieved from http://www.mhlw.go.jp/topics/medias/year). We investigated whether the separation system reduces the number of medicines or the medication cost of a prescription because of separating the economic management between prescribing and dispensing and the effect of mutual observation between doctors and pharmacists. It is optional for Japanese medical institutions to participate in the separation system. Consequently, the spreading rate of the separation system in each administrative district is highly variable. We examined the separation system effect using the National Healthcare Insurance data for three years, 2012-2014. We tested whether the separation system ratio for each prefecture was significantly correlated to the medication price or the number of medicines on a prescription. If spreading the separation system influenced the price of prescribed daily medications or the number of medicines, the correlation would be significant. As a result, the medication price was significantly negatively correlated with the separation system ratio, but the number of medicines was not significant. Therefore, the separation system was effective in reducing daily medication cost but had little influence on reducing the number of daily medicines. This was observed over three years in Japan.",2016-09-01 +28881992,Image-based spatiotemporal causality inference for protein signaling networks.,"

Motivation

Efforts to model how signaling and regulatory networks work in cells have largely either not considered spatial organization or have used compartmental models with minimal spatial resolution. Fluorescence microscopy provides the ability to monitor the spatiotemporal distribution of many molecules during signaling events, but as of yet no methods have been described for large scale image analysis to learn a complex protein regulatory network. Here we present and evaluate methods for identifying how changes in concentration in one cell region influence concentration of other proteins in other regions.

Results

Using 3D confocal microscope movies of GFP-tagged T cells undergoing costimulation, we learned models containing putative causal relationships among 12 proteins involved in T cell signaling. The models included both relationships consistent with current knowledge and novel predictions deserving further exploration. Further, when these models were applied to the initial frames of movies of T cells that had been only partially stimulated, they predicted the localization of proteins at later times with statistically significant accuracy. The methods, consisting of spatiotemporal alignment, automated region identification, and causal inference, are anticipated to be applicable to a number of biological systems.

Availability and implementation

The source code and data are available as a Reproducible Research Archive at http://murphylab.cbd.cmu.edu/software/2017_TcellCausalModels/.

Contact

murphy@cmu.edu.",2017-07-01 +28498966,GenProBiS: web server for mapping of sequence variants to protein binding sites.,"Discovery of potentially deleterious sequence variants is important and has wide implications for research and generation of new hypotheses in human and veterinary medicine, and drug discovery. The GenProBiS web server maps sequence variants to protein structures from the Protein Data Bank (PDB), and further to protein-protein, protein-nucleic acid, protein-compound, and protein-metal ion binding sites. The concept of a protein-compound binding site is understood in the broadest sense, which includes glycosylation and other post-translational modification sites. Binding sites were defined by local structural comparisons of whole protein structures using the Protein Binding Sites (ProBiS) algorithm and transposition of ligands from the similar binding sites found to the query protein using the ProBiS-ligands approach with new improvements introduced in GenProBiS. Binding site surfaces were generated as three-dimensional grids encompassing the space occupied by predicted ligands. The server allows intuitive visual exploration of comprehensively mapped variants, such as human somatic mis-sense mutations related to cancer and non-synonymous single nucleotide polymorphisms from 21 species, within the predicted binding sites regions for about 80 000 PDB protein structures using fast WebGL graphics. The GenProBiS web server is open and free to all users at http://genprobis.insilab.org.",2017-07-01 +28472495,C-SPADE: a web-tool for interactive analysis and visualization of drug screening experiments through compound-specific bioactivity dendrograms.,"The advent of polypharmacology paradigm in drug discovery calls for novel chemoinformatic tools for analyzing compounds' multi-targeting activities. Such tools should provide an intuitive representation of the chemical space through capturing and visualizing underlying patterns of compound similarities linked to their polypharmacological effects. Most of the existing compound-centric chemoinformatics tools lack interactive options and user interfaces that are critical for the real-time needs of chemical biologists carrying out compound screening experiments. Toward that end, we introduce C-SPADE, an open-source exploratory web-tool for interactive analysis and visualization of drug profiling assays (biochemical, cell-based or cell-free) using compound-centric similarity clustering. C-SPADE allows the users to visually map the chemical diversity of a screening panel, explore investigational compounds in terms of their similarity to the screening panel, perform polypharmacological analyses and guide drug-target interaction predictions. C-SPADE requires only the raw drug profiling data as input, and it automatically retrieves the structural information and constructs the compound clusters in real-time, thereby reducing the time required for manual analysis in drug development or repurposing applications. The web-tool provides a customizable visual workspace that can either be downloaded as figure or Newick tree file or shared as a hyperlink with other users. C-SPADE is freely available at http://cspade.fimm.fi/.",2017-07-01 +27189542,SpreaD3: Interactive Visualization of Spatiotemporal History and Trait Evolutionary Processes.,"Model-based phylogenetic reconstructions increasingly consider spatial or phenotypic traits in conjunction with sequence data to study evolutionary processes. Alongside parameter estimation, visualization of ancestral reconstructions represents an integral part of these analyses. Here, we present a complete overhaul of the spatial phylogenetic reconstruction of evolutionary dynamics software, now called SpreaD3 to emphasize the use of data-driven documents, as an analysis and visualization package that primarily complements Bayesian inference in BEAST (http://beast.bio.ed.ac.uk, last accessed 9 May 2016). The integration of JavaScript D3 libraries (www.d3.org, last accessed 9 May 2016) offers novel interactive web-based visualization capacities that are not restricted to spatial traits and extend to any discrete or continuously valued trait for any organism of interest.",2016-04-23 +28669903,How to improve parameter estimates in GLM-based fMRI data analysis: cross-validated Bayesian model averaging.,"In functional magnetic resonance imaging (fMRI), model quality of general linear models (GLMs) for first-level analysis is rarely assessed. In recent work (Soch et al., 2016: ""How to avoid mismodelling in GLM-based fMRI data analysis: cross-validated Bayesian model selection"", NeuroImage, vol. 141, pp. 469-489; http://dx.doi.org/10.1016/j.neuroimage.2016.07.047), we have introduced cross-validated Bayesian model selection (cvBMS) to infer the best model for a group of subjects and use it to guide second-level analysis. While this is the optimal approach given that the same GLM has to be used for all subjects, there is a much more efficient procedure when model selection only addresses nuisance variables and regressors of interest are included in all candidate models. In this work, we propose cross-validated Bayesian model averaging (cvBMA) to improve parameter estimates for these regressors of interest by combining information from all models using their posterior probabilities. This is particularly useful as different models can lead to different conclusions regarding experimental effects and the most complex model is not necessarily the best choice. We find that cvBMS can prevent not detecting established effects and that cvBMA can be more sensitive to experimental effects than just using even the best model in each subject or the model which is best in a group of subjects.",2017-06-29 +26769317,Realizing privacy preserving genome-wide association studies.,"

Motivation

As genomics moves into the clinic, there has been much interest in using this medical data for research. At the same time the use of such data raises many privacy concerns. These circumstances have led to the development of various methods to perform genome-wide association studies (GWAS) on patient records while ensuring privacy. In particular, there has been growing interest in applying differentially private techniques to this challenge. Unfortunately, up until now all methods for finding high scoring SNPs in a differentially private manner have had major drawbacks in terms of either accuracy or computational efficiency.

Results

Here we overcome these limitations with a substantially modified version of the neighbor distance method for performing differentially private GWAS, and thus are able to produce a more viable mechanism. Specifically, we use input perturbation and an adaptive boundary method to overcome accuracy issues. We also design and implement a convex analysis based algorithm to calculate the neighbor distance for each SNP in constant time, overcoming the major computational bottleneck in the neighbor distance method. It is our hope that methods such as ours will pave the way for more widespread use of patient data in biomedical research.

Availability and implementation

A python implementation is available at http://groups.csail.mit.edu/cb/DiffPriv/

Contact

bab@csail.mit.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-14 +27802571,Aquerium: A web application for comparative exploration of domain-based protein occurrences on the taxonomically clustered genome tree.,"Gene duplication and loss are major driving forces in evolution. While many important genomic resources provide information on gene presence, there is a lack of tools giving equal importance to presence and absence information as well as web platforms enabling easy visual comparison of multiple domain-based protein occurrences at once. Here, we present Aquerium, a platform for visualizing genomic presence and absence of biomolecules with a focus on protein domain architectures. The web server offers advanced domain organization querying against the database of pre-computed domains for ∼26,000 organisms and it can be utilized for identification of evolutionary events, such as fusion, disassociation, duplication, and shuffling of protein domains. The tool also allows alternative inputs of custom entries or BLASTP results for visualization. Aquerium will be a useful tool for biologists who perform comparative genomic and evolutionary analyses. The web server is freely accessible at http://aquerium.utk.edu. Proteins 2016; 85:72-77. © 2016 Wiley Periodicals, Inc.",2016-11-13 +22086960,"PrimerBank: a PCR primer database for quantitative gene expression analysis, 2012 update.","Optimization of primer sequences for polymerase chain reaction (PCR) and quantitative PCR (qPCR) and reaction conditions remains an experimental challenge. We have developed a resource, PrimerBank, which contains primers that can be used for PCR and qPCR under stringent and allele-invariant amplification conditions. A distinguishing feature of PrimerBank is the experimental validation of primer pairs covering most known mouse genes. Here, we describe a major update of PrimerBank that includes the design of new primers covering 17,076 and 18,086 genes for the human and mouse species, respectively. As a result of this update, PrimerBank contains 497,156 primers (an increase of 62% from the previous version) that cover 36,928 human and mouse genes, corresponding to around 94% of all known protein-coding gene sequences. An updated algorithm based on our previous approach was used to design new primers using current genomic information available from the National Center for Biotechnology Information (NCBI). PrimerBank primers work under uniform PCR conditions, and can be used for high-throughput or genome-wide qPCR. Because of their broader linear dynamic range and greater sensitivity, qPCR approaches are used to reanalyze changes in expression suggested by exploratory technologies such as microarrays and RNA-Seq. The primers and all experimental validation data can be freely accessed from the PrimerBank website, http://pga.mgh.harvard.edu/primerbank/.",2011-11-15 +23515433,TIARA genome database: update 2013.,"The Total Integrated Archive of short-Read and Array (TIARA; http://tiara.gmi.ac.kr) database stores and integrates human genome data generated from multiple technologies including next-generation sequencing and high-resolution comparative genomic hybridization array. The TIARA genome browser is a powerful tool for the analysis of personal genomic information by exploring genomic variants such as SNPs, indels and structural variants simultaneously. As of September 2012, the TIARA database provides raw data and variant information for 13 sequenced whole genomes, 16 sequenced transcriptomes and 33 high resolution array assays. Sequencing reads are available at a depth of ~30× for whole genomes and 50× for transcriptomes. Information on genomic variants includes a total of ~9.56 million SNPs, 23 025 of which are non-synonymous SNPs, and ~1.19 million indels. In this update, by adding high coverage sequencing of additional human individuals, the TIARA genome database now provides an extensive record of rare variants in humans. Following TIARA's fundamentally integrative approach, new transcriptome sequencing data are matched with whole-genome sequencing data in the genome browser. Users can here observe, for example, the expression levels of human genes with allele-specific quantification. Improvements to the TIARA genome browser include the intuitive display of new complex and large-scale data sets.",2013-03-20 +28407097,miRCat2: accurate prediction of plant and animal microRNAs from next-generation sequencing datasets.,"

Motivation

MicroRNAs are a class of ∼21-22 nt small RNAs which are excised from a stable hairpin-like secondary structure. They have important gene regulatory functions and are involved in many pathways including developmental timing, organogenesis and development in eukaryotes. There are several computational tools for miRNA detection from next-generation sequencing datasets. However, many of these tools suffer from high false positive and false negative rates. Here we present a novel miRNA prediction algorithm, miRCat2. miRCat2 incorporates a new entropy-based approach to detect miRNA loci, which is designed to cope with the high sequencing depth of current next-generation sequencing datasets. It has a user-friendly interface and produces graphical representations of the hairpin structure and plots depicting the alignment of sequences on the secondary structure.

Results

We test miRCat2 on a number of animal and plant datasets and present a comparative analysis with miRCat, miRDeep2, miRPlant and miReap. We also use mutants in the miRNA biogenesis pathway to evaluate the predictions of these tools. Results indicate that miRCat2 has an improved accuracy compared with other methods tested. Moreover, miRCat2 predicts several new miRNAs that are differentially expressed in wild-type versus mutants in the miRNA biogenesis pathway.

Availability and implementation

miRCat2 is part of the UEA small RNA Workbench and is freely available from http://srna-workbench.cmp.uea.ac.uk/.

Contact

v.moulton@uea.ac.uk or s.moxon@uea.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +23418187,Protein signature-based estimation of metagenomic abundances including all domains of life and viruses.,"

Motivation

Metagenome analysis requires tools that can estimate the taxonomic abundances in anonymous sequence data over the whole range of biological entities. Because there is usually no prior knowledge about the data composition, not only all domains of life but also viruses have to be included in taxonomic profiling. Such a full-range approach, however, is difficult to realize owing to the limited coverage of available reference data. In particular, archaea and viruses are generally not well represented by current genome databases.

Results

We introduce a novel approach to taxonomic profiling of metagenomes that is based on mixture model analysis of protein signatures. Our results on simulated and real data reveal the difficulties of the existing methods when measuring achaeal or viral abundances and show the overall good profiling performance of the protein-based mixture model. As an application example, we provide a large-scale analysis of data from the Human Microbiome Project. This demonstrates the utility of our method as a first instance profiling tool for a fast estimate of the community structure.

Availability

http://gobics.de/TaxyPro.

Supplementary information

Supplementary Material is available at Bioinformatics online.",2013-02-15 +24319143,MMDB and VAST+: tracking structural similarities between macromolecular complexes.,"The computational detection of similarities between protein 3D structures has become an indispensable tool for the detection of homologous relationships, the classification of protein families and functional inference. Consequently, numerous algorithms have been developed that facilitate structure comparison, including rapid searches against a steadily growing collection of protein structures. To this end, NCBI's Molecular Modeling Database (MMDB), which is based on the Protein Data Bank (PDB), maintains a comprehensive and up-to-date archive of protein structure similarities computed with the Vector Alignment Search Tool (VAST). These similarities have been recorded on the level of single proteins and protein domains, comprising in excess of 1.5 billion pairwise alignments. Here we present VAST+, an extension to the existing VAST service, which summarizes and presents structural similarity on the level of biological assemblies or macromolecular complexes. VAST+ simplifies structure neighboring results and shows, for macromolecular complexes tracked in MMDB, lists of similar complexes ranked by the extent of similarity. VAST+ replaces the previous VAST service as the default presentation of structure neighboring data in NCBI's Entrez query and retrieval system. MMDB and VAST+ can be accessed via http://www.ncbi.nlm.nih.gov/Structure.",2013-12-06 +28114089,[Non-conventional pharmacological agents for the treatment of bipolar disorder: Α systematic review of the evidence].,"Bipolar disorder (BD) has a complex and variable clinical picture which is characterized by many different phacets and phases and as a result its therapeutical options are also complex and often unsatisfactory. Typically the so-called ""mood stabilizers"" are used in the treatment of BD and in this class lithium and specific antiepileptics are included. The present study aimed to systematically review the literature concerning the presence of randomized double blind clinical trials of 'non conventional' pharmaceutical treatment options. The present systematic review utilized the PRISMA method and searched the MEDLINE through January 1st 2015 with the use of appropriate key words. In order to identify randomized controlled trials- RCTs a combination of the words ""bipolar"", ""manic"", ""mania"", ""manic depression"" and ""manic depressive"" with ""randomized"" was used. Webpages with lists of trials were also searched including http://clinicaltrials.gov and http://www.clinicalstudyresults.org as well as the official webpages of all pharma companies with products marketed in the treatment of BD. The reference lists of various review papers were also searched. The MEDLINE was searched with the combination of the words ""guidelines"" or ""algorithms"" with ""mania"", ""manic"", ""bipolar"", ""manicdepressive"" or ""manic depression"" in order to identify articles with treatment guidelines. The reference list of these articles were also scanned. From 3,284 papers which were initially traced, only 47 papers were included in the present study. From those agents studied in acute mania, tamoxifen is efficacious as monotherapy and as combination therapy with lithium and other mood stabilizers, however its safety profile is relatively poor. Allopurinol manifests efficacy in combination with lithium but not with other agents and its safety profile is satisfactory. Methoxyprogesterone is efficacious in combination with mood stabilizers and its safety profile is very good. In acute bipolar depression the combinations of FEWP with carbamazepine and ketamine, modafinil, pramipexole, pregnenolone and maybe armodafinil with mood stabilizers are efficacious. The safety profile of these combinations is medium. The use of celecoxib, lisdexamfetamine and memantine have negative data. Concerning the maintenance treatment, the data are negative for memantine and for Nacetylcysteine. Although most of the data concerning the usefulness of ""non-conventional"" pharmacotherapeutic agents in the treatment of bipolar disorder are negative, it is encouraging that those agents who have been proven efficacious probably exert their therapeutic effect through pathways which differ from usual and probably different from those classically considered in most biological models of bipolar illness. In this way there constitute new paradigms and open new horizons in the understanding of the disease.",2016-10-01 +25378316,"IMGT®, the international ImMunoGeneTics information system® 25 years on.","IMGT(®), the international ImMunoGeneTics information system(®)(http://www.imgt.org) is the global reference in immunogenetics and immunoinformatics. By its creation in 1989 by Marie-Paule Lefranc (Université de Montpellier and CNRS), IMGT(®) marked the advent of immunoinformatics, which emerged at the interface between immunogenetics and bioinformatics. IMGT(®) is specialized in the immunoglobulins (IG) or antibodies, T cell receptors (TR), major histocompatibility (MH) and proteins of the IgSF and MhSF superfamilies. IMGT(®) is built on the IMGT-ONTOLOGY axioms and concepts, which bridged the gap between genes, sequences and 3D structures. The concepts include the IMGT(®) standardized keywords (identification), IMGT(®) standardized labels (description), IMGT(®) standardized nomenclature (classification), IMGT unique numbering and IMGT Colliers de Perles (numerotation). IMGT(®) comprises 7 databases, 17 online tools and 15,000 pages of web resources, and provides a high-quality and integrated system for analysis of the genomic and expressed IG and TR repertoire of the adaptive immune responses, including NGS high-throughput data. Tools and databases are used in basic, veterinary and medical research, in clinical applications (mutation analysis in leukemia and lymphoma) and in antibody engineering and humanization. The IMGT/mAb-DB interface was developed for therapeutic antibodies and fusion proteins for immunological applications (FPIA). IMGT(®) is freely available at http://www.imgt.org.",2014-11-05 +25152233,ExpTreeDB: web-based query and visualization of manually annotated gene expression profiling experiments of human and mouse from GEO.,"

Motivation

Numerous public microarray datasets are valuable resources for the scientific communities. Several online tools have made great steps to use these data by querying related datasets with users' own gene signatures or expression profiles. However, dataset annotation and result exhibition still need to be improved.

Results

ExpTreeDB is a database that allows for queries on human and mouse microarray experiments from Gene Expression Omnibus with gene signatures or profiles. Compared with similar applications, ExpTreeDB pays more attention to dataset annotations and result visualization. We introduced a multiple-level annotation system to depict and organize original experiments. For example, a tamoxifen-treated cell line experiment is hierarchically annotated as 'agent→drug→estrogen receptor antagonist→tamoxifen'. Consequently, retrieved results are exhibited by an interactive tree-structured graphics, which provide an overview for related experiments and might enlighten users on key items of interest.

Availability and implementation

The database is freely available at http://biotech.bmi.ac.cn/ExpTreeDB. Web site is implemented in Perl, PHP, R, MySQL and Apache.",2014-08-24 +27468948,Gene expression classification using epigenetic features and DNA sequence composition in the human embryonic stem cell line H1.,"Epigenetic factors are known to correlate with gene expression in the existing studies. However, quantitative models that accurately classify the highly and lowly expressed genes based on epigenetic factors are currently lacking. In this study, a new machine learning method combines histone modifications, DNA methylation, DNA accessibility, transcription factors, and trinucleotide composition with support vector machines (SVM) is developed in the context of human embryonic stem cell line (H1). The results indicate that the predictive accuracy will be markedly improved when the epigenetic features are considered. The predictive accuracy and Matthews correlation coefficient of the best model are as high as 95.96% and 0.92 for 10-fold cross-validation test, and 95.58% and 0.92 for independent dataset test, respectively. Our model provides a good way to judge a gene is either highly or lowly expressed gene by using genetic and epigenetic data, when the expression data of the gene is lacking. And a web-server GECES for our analysis method is established at http://202.207.14.87:8032/fuwu/GECES/index.asp, so that other scientists can easily get their desired results by our web-server, without going through the mathematical details.",2016-07-25 +27832200,A Platform for Designing Genome-Based Personalized Immunotherapy or Vaccine against Cancer.,"Due to advancement in sequencing technology, genomes of thousands of cancer tissues or cell-lines have been sequenced. Identification of cancer-specific epitopes or neoepitopes from cancer genomes is one of the major challenges in the field of immunotherapy or vaccine development. This paper describes a platform Cancertope, developed for designing genome-based immunotherapy or vaccine against a cancer cell. Broadly, the integrated resources on this platform are apportioned into three precise sections. First section explains a cancer-specific database of neoepitopes generated from genome of 905 cancer cell lines. This database harbors wide range of epitopes (e.g., B-cell, CD8+ T-cell, HLA class I, HLA class II) against 60 cancer-specific vaccine antigens. Second section describes a partially personalized module developed for predicting potential neoepitopes against a user-specific cancer genome. Finally, we describe a fully personalized module developed for identification of neoepitopes from genomes of cancerous and healthy cells of a cancer-patient. In order to assist the scientific community, wide range of tools are incorporated in this platform that includes screening of epitopes against human reference proteome (http://www.imtech.res.in/raghava/cancertope/).",2016-11-10 +23127988,Adenosiland: walking through adenosine receptors landscape.,"Adenosine receptors (ARs) belong to the family of G protein-coupled receptors. Four distinct subtypes are known, termed adenosine A(1), A(2A), A(2B) and A(3). receptors and they are regulated by adenosine which is one of the most ancient and widespread chemical messengers in the animal and plant kingdoms. Moreover, ARs are widely distributed in human body and they are expressed with different density in diverse tissues. It is not surprising that they are involved in the regulation of several physiopathological processes. Adenosiland represents the first tentative of an integrated bioinformatics and chemoinformatics web-resource dedicated to adenosine receptors. This informatics platform provides a wide-ranging of structure based and ligand based query functions to facilitate the exploration of adenosine receptor structures from primary sequences to three-dimensional architectures. Here, we present an overview of Adenosiland platform describing the most valuable searching tools and their functionalities. Adenosiland can be freely accessed at http://mms.dsfarm.unipd.it/Adenosiland/.",2012-10-23 +28361677,Predicting protein-binding regions in RNA using nucleotide profiles and compositions.,"

Background

Motivated by the increased amount of data on protein-RNA interactions and the availability of complete genome sequences of several organisms, many computational methods have been proposed to predict binding sites in protein-RNA interactions. However, most computational methods are limited to finding RNA-binding sites in proteins instead of protein-binding sites in RNAs. Predicting protein-binding sites in RNA is more challenging than predicting RNA-binding sites in proteins. Recent computational methods for finding protein-binding sites in RNAs have several drawbacks for practical use.

Results

We developed a new support vector machine (SVM) model for predicting protein-binding regions in mRNA sequences. The model uses sequence profiles constructed from log-odds scores of mono- and di-nucleotides and nucleotide compositions. The model was evaluated by standard 10-fold cross validation, leave-one-protein-out (LOPO) cross validation and independent testing. Since actual mRNA sequences have more non-binding regions than protein-binding regions, we tested the model on several datasets with different ratios of protein-binding regions to non-binding regions. The best performance of the model was obtained in a balanced dataset of positive and negative instances. 10-fold cross validation with a balanced dataset achieved a sensitivity of 91.6%, a specificity of 92.4%, an accuracy of 92.0%, a positive predictive value (PPV) of 91.7%, a negative predictive value (NPV) of 92.3% and a Matthews correlation coefficient (MCC) of 0.840. LOPO cross validation showed a lower performance than the 10-fold cross validation, but the performance remains high (87.6% accuracy and 0.752 MCC). In testing the model on independent datasets, it achieved an accuracy of 82.2% and an MCC of 0.656. Testing of our model and other state-of-the-art methods on a same dataset showed that our model is better than the others.

Conclusions

Sequence profiles of log-odds scores of mono- and di-nucleotides were much more powerful features than nucleotide compositions in finding protein-binding regions in RNA sequences. But, a slight performance gain was obtained when using the sequence profiles along with nucleotide compositions. These are preliminary results of ongoing research, but demonstrate the potential of our approach as a powerful predictor of protein-binding regions in RNA. The program and supporting data are available at http://bclab.inha.ac.kr/RBPbinding .",2017-03-14 +28223433,Global Cancer in Women: Burden and Trends.,"This review is an abbreviated version of a report prepared for the American Cancer Society Global Health department and EMD Serono, Inc., a subsidiary of Merck KGaA, Darmstadt, Germany, which was released at the Union for International Cancer Control World Cancer Congress in Paris in November 2016. The original report can be found at https://www.cancer.org/health-care-professionals/our-global-health-work/global-cancer-burden/global-burden-of-cancer-in-women.html. Staff in the Intramural Research Department of the American Cancer Society designed and conducted the study, including analysis, interpretation, and presentation of the review. The funding sources had no involvement in the study design, data analysis and interpretation, or preparation of the reviewThere are striking disparities in the global cancer burden in women, yet few publications highlight cancer occurrence in this population, particularly for cancers that are not sex specific. This article, the first in a series of two, summarizes the current burden, trends, risk factors, prevention, early detection, and survivorship of all cancers combined and seven sites (breast, cervix, uterine corpus, ovary, colorectum, lung, and liver) that account for about 60% of the cancer burden among women worldwide, using data from the International Agency for Research on Cancer. Estimated 2012 overall cancer death rates in general are higher among women in low- and middle-income countries (LMICs) than high-income countries (HICs), despite their lower overall incidence rates, largely due to inadequate access to early detection and treatment. For example, the top mortality rates are in Zimbabwe (147 deaths per 100,000) and Malawi (138). Furthermore, incidence rates of cancers associated with economic development (e.g., lung, breast, colorectum) are rising in several LMICs. The burden of cancer among women could be substantially reduced in both HICs and LMICs through broad and equitable implementation of effective interventions, including tobacco control, HPV and HBV vaccination, and screening (breast, cervix, and colorectum). Cancer Epidemiol Biomarkers Prev; 26(4); 444-57. ©2017 AACRSee related article by Islami et al. in this CEBP Focus section, ""Global Cancer in Women.""",2017-02-21 +27312411,MultiQC: summarize analysis results for multiple tools and samples in a single report.,"

Motivation

Fast and accurate quality control is essential for studies involving next-generation sequencing data. Whilst numerous tools exist to quantify QC metrics, there is no common approach to flexibly integrate these across tools and large sample sets. Assessing analysis results across an entire project can be time consuming and error prone; batch effects and outlier samples can easily be missed in the early stages of analysis.

Results

We present MultiQC, a tool to create a single report visualising output from multiple tools across many samples, enabling global trends and biases to be quickly identified. MultiQC can plot data from many common bioinformatics tools and is built to allow easy extension and customization.

Availability and implementation

MultiQC is available with an GNU GPLv3 license on GitHub, the Python Package Index and Bioconda. Documentation and example reports are available at http://multiqc.info

Contact

phil.ewels@scilifelab.se.",2016-06-16 +27824078,PGAdb-builder: A web service tool for creating pan-genome allele database for molecular fine typing.,"With the advance of next generation sequencing techniques, whole genome sequencing (WGS) is expected to become the optimal method for molecular subtyping of bacterial isolates. To use WGS as a general subtyping method for disease outbreak investigation and surveillance, the layout of WGS-based typing must be comparable among laboratories. Whole genome multilocus sequence typing (wgMLST) is an approach that achieves this requirement. To apply wgMLST as a standard subtyping approach, a pan-genome allele database (PGAdb) for the population of a bacterial organism must first be established. We present a free web service tool, PGAdb-builder (http://wgmlstdb.imst.nsysu.edu.tw), for the construction of bacterial PGAdb. The effectiveness of PGAdb-builder was tested by constructing a pan-genome allele database for Salmonella enterica serovar Typhimurium, with the database being applied to create a wgMLST tree for a panel of epidemiologically well-characterized S. Typhimurium isolates. The performance of the wgMLST-based approach was as high as that of the SNP-based approach in Leekitcharoenphon's study used for discerning among epidemiologically related and non-related isolates.",2016-11-08 +27153636,Mollack: a web server for the automated creation of conformational ensembles for intrinsically disordered proteins.,"

Unlabelled

Intrinsically disordered proteins (IDPs) play central roles in many biological processes. Consequently, an accurate description of the disordered state is an important step towards a comprehensive understanding of a number of important biological functions. In this work we describe a new web server, Mollack, for the automated construction of unfolded ensembles that uses both experimental and molecular simulation data to construct models for the unfolded state. An important aspect of the method is that it calculates a quantitative estimate of the uncertainty in the constructed ensemble, thereby providing an objective measure of the quality of the final model. Overall, Mollack facilitates structure-function studies of disordered proteins.

Availability and implementation

http://cmstultz-mollack.mit.edu

Contact

cmstultz@mit.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-19 +26738481,"TRAPLINE: a standardized and automated pipeline for RNA sequencing data analysis, evaluation and annotation.","

Background

Technical advances in Next Generation Sequencing (NGS) provide a means to acquire deeper insights into cellular functions. The lack of standardized and automated methodologies poses a challenge for the analysis and interpretation of RNA sequencing data. We critically compare and evaluate state-of-the-art bioinformatics approaches and present a workflow that integrates the best performing data analysis, data evaluation and annotation methods in a Transparent, Reproducible and Automated PipeLINE (TRAPLINE) for RNA sequencing data processing (suitable for Illumina, SOLiD and Solexa).

Results

Comparative transcriptomics analyses with TRAPLINE result in a set of differentially expressed genes, their corresponding protein-protein interactions, splice variants, promoter activity, predicted miRNA-target interactions and files for single nucleotide polymorphism (SNP) calling. The obtained results are combined into a single file for downstream analysis such as network construction. We demonstrate the value of the proposed pipeline by characterizing the transcriptome of our recently described stem cell derived antibiotic selected cardiac bodies ('aCaBs').

Conclusion

TRAPLINE supports NGS-based research by providing a workflow that requires no bioinformatics skills, decreases the processing time of the analysis and works in the cloud. The pipeline is implemented in the biomedical research platform Galaxy and is freely accessible via www.sbi.uni-rostock.de/RNAseqTRAPLINE or the specific Galaxy manual page (https://usegalaxy.org/u/mwolfien/p/trapline---manual).",2016-01-06 +26732371,miRNA Digger: a comprehensive pipeline for genome-wide novel miRNA mining.,"MicroRNAs (miRNAs) are important regulators of gene expression. The recent advances in high-throughput sequencing (HTS) technique have greatly facilitated large-scale detection of the miRNAs. However, thoroughly discovery of novel miRNAs from the available HTS data sets remains a major challenge. In this study, we observed that Dicer-mediated cleavage sites for the processing of the miRNA precursors could be mapped by using degradome sequencing data in both animals and plants. In this regard, a novel tool, miRNA Digger, was developed for systematical discovery of miRNA candidates through genome-wide screening of cleavage signals based on degradome sequencing data. To test its sensitivity and reliability, miRNA Digger was applied to discover miRNAs from four organs of Arabidopsis. The results revealed that a majority of already known mature miRNAs along with their miRNA*s expressed in these four organs were successfully recovered. Notably, a total of 30 novel miRNA-miRNA* pairs that have not been registered in miRBase were discovered by miRNA Digger. After target prediction and degradome sequencing data-based validation, eleven miRNA-target interactions involving six of the novel miRNAs were identified. Taken together, miRNA Digger could be applied for sensitive detection of novel miRNAs and it could be freely downloaded from http://www.bioinfolab.cn/miRNA_Digger/index.html.",2016-01-06 +23482073,The HUPO proteomics standards initiative- mass spectrometry controlled vocabulary.,"Controlled vocabularies (CVs), i.e. a collection of predefined terms describing a modeling domain, used for the semantic annotation of data, and ontologies are used in structured data formats and databases to avoid inconsistencies in annotation, to have a unique (and preferably short) accession number and to give researchers and computer algorithms the possibility for more expressive semantic annotation of data. The Human Proteome Organization (HUPO)-Proteomics Standards Initiative (PSI) makes extensive use of ontologies/CVs in their data formats. The PSI-Mass Spectrometry (MS) CV contains all the terms used in the PSI MS-related data standards. The CV contains a logical hierarchical structure to ensure ease of maintenance and the development of software that makes use of complex semantics. The CV contains terms required for a complete description of an MS analysis pipeline used in proteomics, including sample labeling, digestion enzymes, instrumentation parts and parameters, software used for identification and quantification of peptides/proteins and the parameters and scores used to determine their significance. Owing to the range of topics covered by the CV, collaborative development across several PSI working groups, including proteomics research groups, instrument manufacturers and software vendors, was necessary. In this article, we describe the overall structure of the CV, the process by which it has been developed and is maintained and the dependencies on other ontologies. Database URL: http://psidev.cvs.sourceforge.net/viewvc/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo.",2013-03-12 +25606058,Systematic evaluation of connectivity map for disease indications.,"

Background

Connectivity map data and associated methodologies have become a valuable tool in understanding drug mechanism of action (MOA) and discovering new indications for drugs. One of the key ideas of connectivity map (CMAP) is to measure the connectivity between disease gene expression signatures and compound-induced gene expression profiles. Despite multiple impressive anecdotal validations, only a few systematic evaluations have assessed the accuracy of this aspect of CMAP, and most of these utilize drug-to-drug matching to transfer indications across the two drugs.

Methods

To assess CMAP methodologies in a more direct setting, namely the power of classifying known drug-disease relationships, we evaluated three CMAP-based methods on their prediction performance against a curated dataset of 890 true drug-indication pairs. The disease signatures were generated using Gene Logic BioExpress™ system and the compound profiles were derived from the Connectivity Map database (CMAP, build 02, http://www.broadinstitute.org/CMAP/).

Results

The similarity scoring algorithm called eXtreme Sum (XSum) performs better than the standard Kolmogorov-Smirnov (KS) statistic in terms of the area under curve and can achieve a four-fold enrichment at 0.01 false positive rate level, with AUC = 2.2E-4, P value = 0.0035.

Conclusion

Connectivity map can significantly enrich true positive drug-indication pairs given an effective matching algorithm.",2014-12-02 +24536078,MICdb3.0: a comprehensive resource of microsatellite repeats from prokaryotic genomes.,"The MICdb is a comprehensive relational database of perfect microsatellites extracted from completely sequenced and annotated genomes of bacteria and archaea. The current version MICdb3.0 is an updated and revised version of MICdb2.0. As compared with the previous version MICdb2.0, the current release is significantly improved in terms of much larger coverage of genomes, improved presentation of queried results, user-friendly administration module to manage Simple Sequence Repeat (SSR) data such as addition of new genomes, deletion of obsolete data, etc., and also removal of certain features deemed to be redundant. The new web-interface to the database called Microsatellite Analysis Server (MICAS) version 3.0 has been improved by the addition of powerful high-quality visualization tools to view the query results in the form of pie charts and bar graphs. All the query results and graphs can be exported in different formats so that the users can use them for further analysis. MICAS3.0 is also equipped with a unique genome comparison module using which users can do pair-wise comparison of genomes with regard to their microsatellite distribution. The advanced search module can be used to filter the repeats based on certain criteria such as filtering repeats of a particular motif/repeat size, extracting repeats of coding/non-coding regions, sort repeats, etc. The MICdb database has, therefore, been made portable to be administered by a person with the necessary administrative privileges. The MICdb3.0 database and analysis server can be accessed for free from www.cdfd.org.in/micas. Database URL: http://www.cdfd.org.in/micas.",2014-02-17 +27993788,MRUniNovo: an efficient tool for de novo peptide sequencing utilizing the hadoop distributed computing framework.,"

Summary

Tandem mass spectrometry-based de novo peptide sequencing is a complex and time-consuming process. The current algorithms for de novo peptide sequencing cannot rapidly and thoroughly process large mass spectrometry datasets. In this paper, we propose MRUniNovo, a novel tool for parallel de novo peptide sequencing. MRUniNovo parallelizes UniNovo based on the Hadoop compute platform. Our experimental results demonstrate that MRUniNovo significantly reduces the computation time of de novo peptide sequencing without sacrificing the correctness and accuracy of the results, and thus can process very large datasets that UniNovo cannot.

Availability and implementation

MRUniNovo is an open source software tool implemented in java. The source code and the parameter settings are available at http://bioinfo.hupo.org.cn/MRUniNovo/index.php.

Contact

s131020002@hnu.edu.cn ; taochen1019@163.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +27261064,A Collection of Conserved Noncoding Sequences to Study Gene Regulation in Flowering Plants.,"Transcription factors (TFs) regulate gene expression by binding cis-regulatory elements, of which the identification remains an ongoing challenge owing to the prevalence of large numbers of nonfunctional TF binding sites. Powerful comparative genomics methods, such as phylogenetic footprinting, can be used for the detection of conserved noncoding sequences (CNSs), which are functionally constrained and can greatly help in reducing the number of false-positive elements. In this study, we applied a phylogenetic footprinting approach for the identification of CNSs in 10 dicot plants, yielding 1,032,291 CNSs associated with 243,187 genes. To annotate CNSs with TF binding sites, we made use of binding site information for 642 TFs originating from 35 TF families in Arabidopsis (Arabidopsis thaliana). In three species, the identified CNSs were evaluated using TF chromatin immunoprecipitation sequencing data, resulting in significant overlap for the majority of data sets. To identify ultraconserved CNSs, we included genomes of additional plant families and identified 715 binding sites for 501 genes conserved in dicots, monocots, mosses, and green algae. Additionally, we found that genes that are part of conserved mini-regulons have a higher coherence in their expression profile than other divergent gene pairs. All identified CNSs were integrated in the PLAZA 3.0 Dicots comparative genomics platform (http://bioinformatics.psb.ugent.be/plaza/versions/plaza_v3_dicots/) together with new functionalities facilitating the exploration of conserved cis-regulatory elements and their associated genes. The availability of this data set in a user-friendly platform enables the exploration of functional noncoding DNA to study gene regulation in a variety of plant species, including crops.",2016-06-03 +27988776,Alloionema californicum n. sp. (Nematoda: Alloionematidae): a new alloionematid from USA.,"A new species of the family Alloionematidae was isolated from a rotten winged gourd at White Crane Garden, San Francisco, USA, sampled by Christopher Nelson in November 2010, and a live culture is deposited in Félix Lab Strain Database (http://www.justbio.com/worms/index.php), IBENS, Paris, France. Specimens from the culture have been examined. Both morphologically and molecularly, the nematode described herein as Alloionema californicum n. sp. differs from the other alloionematid species, A. appendiculatum and Neoalloionema tricaudatum. It is characterised by having a narrow stoma, 2.5-3.5 or 4 times longer than broad in adults or dauer juveniles respectively. Lateral fields are not present in adults but occur as one prominent ridge in dauers. Males have no bursa, six pairs of genital papillae and one single papilla. Dauers have large apparent phasmids in the middle of the tail. The ecology of the newly described species is unknown but probably it is a saprobic bacteriophagous nematode preferring rotting organic material.",2016-11-07 +27821657,The UCL low-density lipoprotein receptor gene variant database: pathogenicity update.,"

Background

Familial hypercholesterolaemia (OMIM 143890) is most frequently caused by variations in the low-density lipoprotein receptor (LDLR) gene. Predicting whether novel variants are pathogenic may not be straightforward, especially for missense and synonymous variants. In 2013, the Association of Clinical Genetic Scientists published guidelines for the classification of variants, with categories 1 and 2 representing clearly not or unlikely pathogenic, respectively, 3 representing variants of unknown significance (VUS), and 4 and 5 representing likely to be or clearly pathogenic, respectively. Here, we update the University College London (UCL) LDLR variant database according to these guidelines.

Methods

PubMed searches and alerts were used to identify novel LDLR variants for inclusion in the database. Standard in silico tools were used to predict potential pathogenicity. Variants were designated as class 4/5 only when the predictions from the different programs were concordant and as class 3 when predictions were discordant.

Results

The updated database (http://www.lovd.nl/LDLR) now includes 2925 curated variants, representing 1707 independent events. All 129 nonsense variants, 337 small frame-shifting and 117/118 large rearrangements were classified as 4 or 5. Of the 795 missense variants, 115 were in classes 1 and 2, 605 in class 4 and 75 in class 3. 111/181 intronic variants, 4/34 synonymous variants and 14/37 promoter variants were assigned to classes 4 or 5. Overall, 112 (7%) of reported variants were class 3.

Conclusions

This study updates the LDLR variant database and identifies a number of reported VUS where additional family and in vitro studies will be required to confirm or refute their pathogenicity.",2016-11-07 +28858829,Toward Consistent Methodology to Quantify Populations in Proximity to Oil and Gas Development: A National Spatial Analysis and Review.,"

Background

Higher risk of exposure to environmental health hazards near oil and gas wells has spurred interest in quantifying populations that live in proximity to oil and gas development. The available studies on this topic lack consistent methodology and ignore aspects of oil and gas development of value to public health-relevant assessment and decision-making.

Objectives

We aim to present a methodological framework for oil and gas development proximity studies grounded in an understanding of hydrocarbon geology and development techniques.

Methods

We geospatially overlay locations of active oil and gas wells in the conterminous United States and Census data to estimate the population living in proximity to hydrocarbon development at the national and state levels. We compare our methods and findings with existing proximity studies.

Results

Nationally, we estimate that 17.6 million people live within 1,600m (∼1 mi) of at least one active oil and/or gas well. Three of the eight studies overestimate populations at risk from actively producing oil and gas wells by including wells without evidence of production or drilling completion and/or using inappropriate population allocation methods. The remaining five studies, by omitting conventional wells in regions dominated by historical conventional development, significantly underestimate populations at risk.

Conclusions

The well inventory guidelines we present provide an improved methodology for hydrocarbon proximity studies by acknowledging the importance of both conventional and unconventional well counts as well as the relative exposure risks associated with different primary production categories (e.g., oil, wet gas, dry gas) and developmental stages of wells. https://doi.org/10.1289/EHP1535.",2017-08-23 +25558364,The PREDICTS database: a global database of how local terrestrial biodiversity responds to human impacts.,"Biodiversity continues to decline in the face of increasing anthropogenic pressures such as habitat destruction, exploitation, pollution and introduction of alien species. Existing global databases of species' threat status or population time series are dominated by charismatic species. The collation of datasets with broad taxonomic and biogeographic extents, and that support computation of a range of biodiversity indicators, is necessary to enable better understanding of historical declines and to project - and avert - future declines. We describe and assess a new database of more than 1.6 million samples from 78 countries representing over 28,000 species, collated from existing spatial comparisons of local-scale biodiversity exposed to different intensities and types of anthropogenic pressures, from terrestrial sites around the world. The database contains measurements taken in 208 (of 814) ecoregions, 13 (of 14) biomes, 25 (of 35) biodiversity hotspots and 16 (of 17) megadiverse countries. The database contains more than 1% of the total number of all species described, and more than 1% of the described species within many taxonomic groups - including flowering plants, gymnosperms, birds, mammals, reptiles, amphibians, beetles, lepidopterans and hymenopterans. The dataset, which is still being added to, is therefore already considerably larger and more representative than those used by previous quantitative models of biodiversity trends and responses. The database is being assembled as part of the PREDICTS project (Projecting Responses of Ecological Diversity In Changing Terrestrial Systems - http://www.predicts.org.uk). We make site-level summary data available alongside this article. The full database will be publicly available in 2015.",2014-12-02 +25098641,Development of a Smartphone App for a Genetics Website: The Amyotrophic Lateral Sclerosis Online Genetics Database (ALSoD).,"

Background

The ALS Online Genetics Database (ALSoD) website holds mutation, geographical, and phenotype data on genes implicated in amyotrophic lateral sclerosis (ALS) and links to bioinformatics resources, publications, and tools for analysis. On average, there are 300 unique visits per day, suggesting a high demand from the research community. To enable wider access, we developed a mobile-friendly version of the website and a smartphone app.

Objective

We sought to compare data traffic before and after implementation of a mobile version of the website to assess utility.

Methods

We identified the most frequently viewed pages using Google Analytics and our in-house analytic monitoring. For these, we optimized the content layout of the screen, reduced image sizes, and summarized available information. We used the Microsoft .NET framework mobile detection property (HttpRequest.IsMobileDevice in the Request.Browser object in conjunction with HttpRequest.UserAgent), which returns a true value if the browser is a recognized mobile device. For app development, we used the Eclipse integrated development environment with Android plug-ins. We wrapped the mobile website version with the WebView object in Android. Simulators were downloaded to test and debug the applications.

Results

The website automatically detects access from a mobile phone and redirects pages to fit the smaller screen. Because the amount of data stored on ALSoD is very large, the available information for display using smartphone access is deliberately restricted to improve usability. Visits to the website increased from 2231 to 2820, yielding a 26% increase from the pre-mobile to post-mobile period and an increase from 103 to 340 visits (230%) using mobile devices (including tablets). The smartphone app is currently available on BlackBerry and Android devices and will be available shortly on iOS as well.

Conclusions

Further development of the ALSoD website has allowed access through smartphones and tablets, either through the website or directly through a mobile app, making genetic data stored on the database readily accessible to researchers and patients across multiple devices.",2013-09-04 +28241850,Accurate and equitable medical genomic analysis requires an understanding of demography and its influence on sample size and ratio.,"In a recent study, Petrovski and Goldstein reported that (non-Finnish) Europeans have significantly fewer nonsynonymous singletons in Online Mendelian Inheritance in Man (OMIM) disease genes compared with Africans, Latinos, South Asians, East Asians, and other unassigned non-Europeans. We use simulations of Exome Aggregation Consortium (ExAC) data to show that sample size and ratio interact to influence the number of these singletons identified in a cohort. These interactions are different across ancestries and can lead to the same number of identified singletons in both Europeans and non-Europeans without an equal number of samples. We conclude that there is a need to account for the ancestry-specific influence of demography on genomic architecture and rare variant analysis in order to address inequalities in medical genomic analysis.The authors of the original article were invited to submit a response, but declined to do so. Please see related Open Letter: http://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-1016-y.",2017-02-27 +27350137,Analysis of Transitional and Turbulent Flow Through the FDA Benchmark Nozzle Model Using Laser Doppler Velocimetry.,"Transitional and turbulent flow through a simplified medical device model is analyzed as part of the FDA's Critical Path Initiative, designed to improve the process of bringing medical products to market. Computational predictions are often used in the development of devices and reliable in vitro data is needed to validate computational results, particularly estimations of the Reynolds stresses that could play a role in damaging blood elements. The high spatial resolution of laser Doppler velocimetry (LDV) is used to collect two component velocity data within the FDA benchmark nozzle model. Two flow conditions are used to produce flow encompassing laminar, transitional, and turbulent regimes, and viscous stresses, principal Reynolds stresses, and turbulence intensities are calculated from the measured LDV velocities. Axial velocities and viscous stresses are compared to data from a prior inter-laboratory study conducted with particle image velocimetry. Large velocity gradients are observed near the wall in the nozzle throat and in the jet shear layer located in the expansion downstream of the throat, with axial velocity changing as much as 4.5 m/s over 200 μm. Additionally, maximum Reynolds shear stresses of 1000-2000 Pa are calculated in the high shear regions, which are an order of magnitude higher than the peak viscous shear stresses (<100 Pa). It is important to consider the effects of both viscous and turbulent stresses when simulating flow through medical devices. Reynolds stresses above commonly accepted hemolysis thresholds are measured in the nozzle model, indicating that hemolysis may occur under certain flow conditions. As such, the presented turbulence quantities from LDV, which are also available for download at https://fdacfd.nci.nih.gov/ , provide an ideal validation test for computational simulations that seek to characterize the flow field and to predict hemolysis within the FDA nozzle geometry.",2016-06-27 +24293645,STITCH 4: integration of protein-chemical interactions with user data.,"STITCH is a database of protein-chemical interactions that integrates many sources of experimental and manually curated evidence with text-mining information and interaction predictions. Available at http://stitch.embl.de, the resulting interaction network includes 390 000 chemicals and 3.6 million proteins from 1133 organisms. Compared with the previous version, the number of high-confidence protein-chemical interactions in human has increased by 45%, to 367 000. In this version, we added features for users to upload their own data to STITCH in the form of internal identifiers, chemical structures or quantitative data. For example, a user can now upload a spreadsheet with screening hits to easily check which interactions are already known. To increase the coverage of STITCH, we expanded the text mining to include full-text articles and added a prediction method based on chemical structures. We further changed our scheme for transferring interactions between species to rely on orthology rather than protein similarity. This improves the performance within protein families, where scores are now transferred only to orthologous proteins, but not to paralogous proteins. STITCH can be accessed with a web-interface, an API and downloadable files.",2013-11-28 +22874250,Ophiucus: RDF-based visualization tool for health simulation models.,"Simulation modeling of population health is becoming increasingly popular for epidemiology research and public health policy-making. However, the acceptability of population health simulation models is inhibited by their complexity and the lack of established standards to describe these models. To address this issue, we propose Ophiuchus - an RDF (Resource Description Framework: http://www.w3.org/RDF/)-based visualization tool for generating interactive 2D diagrams of population health simulation models, which describe these models in an explicit and formal manner. We present the results of a preliminary system assessment and discuss current limitations of the system.",2012-01-01 +25547877,JRC GMO-Matrix: a web application to support Genetically Modified Organisms detection strategies.,"

Background

The polymerase chain reaction (PCR) is the current state of the art technique for DNA-based detection of Genetically Modified Organisms (GMOs). A typical control strategy starts by analyzing a sample for the presence of target sequences (GM-elements) known to be present in many GMOs. Positive findings from this ""screening"" are then confirmed with GM (event) specific test methods. A reliable knowledge of which GMOs are detected by combinations of GM-detection methods is thus crucial to minimize the verification efforts.

Description

In this article, we describe a novel platform that links the information of two unique databases built and maintained by the European Union Reference Laboratory for Genetically Modified Food and Feed (EU-RL GMFF) at the Joint Research Centre (JRC) of the European Commission, one containing the sequence information of known GM-events and the other validated PCR-based detection and identification methods. The new platform compiles in silico determinations of the detection of a wide range of GMOs by the available detection methods using existing scripts that simulate PCR amplification and, when present, probe binding. The correctness of the information has been verified by comparing the in silico conclusions to experimental results for a subset of forty-nine GM events and six methods.

Conclusions

The JRC GMO-Matrix is unique for its reliance on DNA sequence data and its flexibility in integrating novel GMOs and new detection methods. Users can mine the database using a set of web interfaces that thus provide a valuable support to GMO control laboratories in planning and evaluating their GMO screening strategies. The platform is accessible at http://gmo-crl.jrc.ec.europa.eu/jrcgmomatrix/ .",2014-12-30 +28108450,NetLand: quantitative modeling and visualization of Waddington's epigenetic landscape using probabilistic potential.,"

Summary

Waddington's epigenetic landscape is a powerful metaphor for cellular dynamics driven by gene regulatory networks (GRNs). Its quantitative modeling and visualization, however, remains a challenge, especially when there are more than two genes in the network. A software tool for Waddington's landscape has not been available in the literature. We present NetLand, an open-source software tool for modeling and simulating the kinetic dynamics of GRNs, and visualizing the corresponding Waddington's epigenetic landscape in three dimensions without restriction on the number of genes in a GRN. With an interactive and graphical user interface, NetLand can facilitate the knowledge discovery and experimental design in the study of cell fate regulation (e.g. stem cell differentiation and reprogramming).

Availability and implementation

NetLand can run under operating systems including Windows, Linux and OS X. The executive files and source code of NetLand as well as a user manual, example models etc. can be downloaded from http://netland-ntu.github.io/NetLand/ .

Contact

zhengjie@ntu.edu.sg.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +24214964,DriverDB: an exome sequencing database for cancer driver gene identification.,"Exome sequencing (exome-seq) has aided in the discovery of a huge amount of mutations in cancers, yet challenges remain in converting oncogenomics data into information that is interpretable and accessible for clinical care. We constructed DriverDB (http://ngs.ym.edu.tw/driverdb/), a database which incorporates 6079 cases of exome-seq data, annotation databases (such as dbSNP, 1000 Genome and Cosmic) and published bioinformatics algorithms dedicated to driver gene/mutation identification. We provide two points of view, 'Cancer' and 'Gene', to help researchers to visualize the relationships between cancers and driver genes/mutations. The 'Cancer' section summarizes the calculated results of driver genes by eight computational methods for a specific cancer type/dataset and provides three levels of biological interpretation for realization of the relationships between driver genes. The 'Gene' section is designed to visualize the mutation information of a driver gene in five different aspects. Moreover, a 'Meta-Analysis' function is provided so researchers may identify driver genes in customer-defined samples. The novel driver genes/mutations identified hold potential for both basic research and biotech applications.",2013-11-07 +25355513,Dr.VIS v2.0: an updated database of human disease-related viral integration sites in the era of high-throughput deep sequencing.,"Dr.VIS is a database of human disease-related viral integration sites (VIS). The number of VIS has grown rapidly since Dr.VIS was first released in 2011, and there is growing recognition of the important role that viral integration plays in the development of malignancies. The updated database version, Dr.VIS v2.0 (http://www.bioinfo.org/drvis or bminfor.tongji.edu.cn/drvis_v2), represents 25 diseases, covers 3340 integration sites of eight oncogenic viruses in human chromosomes and provides more accurate information about VIS from high-throughput deep sequencing results obtained mainly after 2012. Data of VISes for three newly identified oncogenic viruses for 14 related diseases have been added to this 2015 update, which has a 5-fold increase of VISes compared to Dr.VIS v1.0. Dr.VIS v2.0 has 2244 precise integration sites, 867 integration regions and 551 junction sequences. A total of 2295 integration sites are located near 1730 involved genes. Of the VISes, 1153 are detected in the exons or introns of genes, with 294 located up to 5 kb and a further 112 located up to 10 kb away. As viral integration may alter chromosome stability and gene expression levels, characterizing VISes will contribute toward the discovery of novel oncogenes, tumor suppressor genes and tumor-associated pathways.",2014-10-29 +27825033,VirusDetect: An automated pipeline for efficient virus discovery using deep sequencing of small RNAs.,"Accurate detection of viruses in plants and animals is critical for agriculture production and human health. Deep sequencing and assembly of virus-derived small interfering RNAs has proven to be a highly efficient approach for virus discovery. Here we present VirusDetect, a bioinformatics pipeline that can efficiently analyze large-scale small RNA (sRNA) datasets for both known and novel virus identification. VirusDetect performs both reference-guided assemblies through aligning sRNA sequences to a curated virus reference database and de novo assemblies of sRNA sequences with automated parameter optimization and the option of host sRNA subtraction. The assembled contigs are compared to a curated and classified reference virus database for known and novel virus identification, and evaluated for their sRNA size profiles to identify novel viruses. Extensive evaluations using plant and insect sRNA datasets suggest that VirusDetect is highly sensitive and efficient in identifying known and novel viruses. VirusDetect is freely available at http://bioinfo.bti.cornell.edu/tool/VirusDetect/.",2016-11-05 +27809604,Video-assisted thoracic lobectomy for lung cancer in Italy: the 'VATS Group' Project.,"As part of the third Mediterranean Symposium in Thoracic Surgical Oncology, we introduce the Italian VATS Group ( http://vatsgroup.org/sito/index.php ). This national collaborative initiative was established in 2013 and started to recruit patients in January 2014; as of July 2016, 3680 patients have been enrolled in the database. Three different video-assisted thoracic surgery approaches have been predominantly used by Italian thoracic surgery centers, 71% of them preferentially adopting a multi-portal approach, with a 20% recorded morbidity. The majority of the cases were stage I adenocarcinomas of the lung. Conversion to open surgery occurred in 9% of the cases. The study suggests video-assisted thoracic surgery lobectomy as a 'gold standard' for the surgical treatment of early-stage lung cancer in Italy.",2016-11-04 +27883925,Factors associated with failure of oncology drugs in late-stage clinical development: A systematic review.,"

Background

We aimed to describe the reasons for failure of experimental anticancer drugs in late-stage clinical development.

Material and methods

We searched the PharmaProjects database (https://citeline.com/products/pharmaprojects/) for anticancer drugs discontinued between 01/01/2009 and 06/30/2014. Drug programs that reached phase III trials, but never gained Food and Drug Administration (FDA) approval were compared to 37 anti-cancer drugs achieving FDA approval in this time period.

Results

Forty-two drugs fit our criteria for development failures. These failed drugs (49% targeted, 23% cytotoxics, and 28% other) were tested in 43 cancer indications (drug programs). Only 16% (7/43) of failed drug programs adopted a biomarker-driven rationale for patient selection versus 57% (21/37) of successful drug programs (P<0.001). Phase II trial information was available in 32 of 43 failed drug programs and in 32 of 37 successful programs. Nine of the 32 trials (28%) of failed drugs versus 28 of 32 trials (87%) of successful drugs (P<0.001) achieved proof of concept (single agent response rate (RR) ⩾20% or combination therapy showing a ⩾20% RR increase above the median historical RR without the experimental agent (with a minimal absolute increase of 5%) or a randomized phase II trial showing significance (P⩽0.05) for its primary outcome). No pattern of study sites, trial design or funding characteristics emerged from the failed drug analysis.

Conclusion

For drugs that reached Phase III, lack of a biomarker-driven strategy and failure to attain proof of concept in phase II are potential risk factors for later discontinuation, especially for targeted agents.",2016-11-04 +27813701,UNcleProt (Universal Nuclear Protein database of barley): The first nuclear protein database that distinguishes proteins from different phases of the cell cycle.,"Proteins are the most abundant component of the cell nucleus, where they perform a plethora of functions, including the assembly of long DNA molecules into condensed chromatin, DNA replication and repair, regulation of gene expression, synthesis of RNA molecules and their modification. Proteins are important components of nuclear bodies and are involved in the maintenance of the nuclear architecture, transport across the nuclear envelope and cell division. Given their importance, the current poor knowledge of plant nuclear proteins and their dynamics during the cell's life and division is striking. Several factors hamper the analysis of the plant nuclear proteome, but the most critical seems to be the contamination of nuclei by cytosolic material during their isolation. With the availability of an efficient protocol for the purification of plant nuclei, based on flow cytometric sorting, contamination by cytoplasmic remnants can be minimized. Moreover, flow cytometry allows the separation of nuclei in different stages of the cell cycle (G1, S, and G2). This strategy has led to the identification of large number of nuclear proteins from barley (Hordeum vulgare), thus triggering the creation of a dedicated database called UNcleProt, http://barley.gambrinus.ueb.cas.cz/ .",2016-11-04 +28886593,"Influence of Tetrabromobisphenol A, with or without Concurrent Triclosan, upon Bisphenol A and Estradiol Concentrations in Mice.","

Background

Humans are commonly exposed to multiple environmental chemicals, including tetrabromobisphenol A (TBBPA; a flame retardant), triclosan (an antimicrobial agent), and bisphenol A (BPA; polycarbonate plastics). These chemicals are readily absorbed and may interact with each other.

Objectives

We sought to determine whether TBBPA, given alone or in combination with triclosan, can modulate the concentrations of BPA and 17β-estradiol (E2).

Methods

Female and male CF-1 mice were each given a subcutaneous injection of 0-27mg TBBPA, with or without concurrent 0.33mg triclosan, followed by dietary administration of 50μg/kg body weight 14C-BPA. Radioactivity was measured in blood serum and tissues through liquid scintillation counting. In subsequent experiments, female and male CF-1 mice were each given a subcutaneous injection of 0 or 1mg TBBPA and E2 was measured in urine 2-12 h after injection.

Results

Doses as low as 1mg TBBPA significantly elevated 14C-BPA concentrations in the uterus and ovaries of females; in the testes, epididymides, vesicular-coagulating glands, and preputial glands of males; and in blood serum, heart, lungs, and kidneys of both sexes; urinary E2 concentrations were also elevated. Lower doses of TBBPA or triclosan that had no effects on their own elevated 14C-BPA concentrations when the two substances were given concurrently.

Conclusion

These data indicate that TBBPA, triclosan, and BPA interact in vivo, consistent with evidence that TBBPA and triclosan inhibit enzymes that are critical for BPA and E2 metabolism. https://doi.org/10.1289/EHP1329.",2017-08-21 +29298761,Nondepressive Psychosocial Factors and CKD Outcomes in Black Americans.,"BACKGROUND AND OBJECTIVES:Established risk factors for CKD do not fully account for risk of CKD in black Americans. We studied the association of nondepressive psychosocial factors with risk of CKD in the Jackson Heart Study. DESIGN, SETTING, PARTICIPANTS, & MEASUREMENTS:We used principal component analysis to identify underlying constructs from 12 psychosocial baseline variables (perceived daily, lifetime, and burden of lifetime discrimination; stress; anger in; anger out; hostility; pessimism; John Henryism; spirituality; perceived social status; and social support). Using multivariable models adjusted for demographics and comorbidity, we examined the association of psychosocial variables with baseline CKD prevalence, eGFR decline, and incident CKD during follow-up. RESULTS:Of 3390 (64%) Jackson Heart Study participants with the required data, 656 (19%) had prevalent CKD. Those with CKD (versus no CKD) had lower perceived daily (mean [SD] score =7.6 [8.5] versus 9.7 [9.0]) and lifetime discrimination (2.5 [2.0] versus 3.1 [2.2]), lower perceived stress (4.2 [4.0] versus 5.2 [4.4]), higher hostility (12.1 [5.2] versus 11.5 [4.8]), higher John Henryism (30.0 [4.8] versus 29.7 [4.4]), and higher pessimism (2.3 [2.2] versus 2.0 [2.1]; all P<0.05). Principal component analysis identified three factors from the 12 psychosocial variables: factor 1, life stressors (perceived discrimination, stress); factor 2, moods (anger, hostility); and, factor 3, coping strategies (John Henryism, spirituality, social status, social support). After adjustments, factor 1 (life stressors) was negatively associated with prevalent CKD at baseline among women only: odds ratio, 0.76 (95% confidence interval, 0.65 to 0.89). After a median follow-up of 8 years, identified psychosocial factors were not significantly associated with eGFR decline (life stressors: β=0.08; 95% confidence interval, -0.02 to 0.17; moods: β=0.03; 95% confidence interval, -0.06 to 0.13; coping: β=-0.02; 95% confidence interval, -0.12 to 0.08) or incident CKD (life stressors: odds ratio, 1.07; 95% confidence interval, 0.88 to 1.29; moods: odds ratio, 1.02; 95% confidence interval, 0.84 to 1.24; coping: odds ratio, 0.91; 95% confidence interval, 0.75 to 1.11). CONCLUSIONS:Greater life stressors were associated with lower prevalence of CKD at baseline in the Jackson Heart Study. However, psychosocial factors were not associated with risk of CKD over a median follow-up of 8 years. PODCAST:This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2018_01_03_CJASNPodcast_18_2_L.mp3.",2018-01-03 +27794556,gargammel: a sequence simulator for ancient DNA.,"

Summary

Ancient DNA has emerged as a remarkable tool to infer the history of extinct species and past populations. However, many of its characteristics, such as extensive fragmentation, damage and contamination, can influence downstream analyses. To help investigators measure how these could impact their analyses in silico , we have developed gargammel, a package that simulates ancient DNA fragments given a set of known reference genomes. Our package simulates the entire molecular process from post-mortem DNA fragmentation and DNA damage to experimental sequencing errors, and reproduces most common bias observed in ancient DNA datasets.

Availability and implementation

The package is publicly available on github: https://grenaud.github.io/gargammel/ and released under the GPL.

Contact

gabriel.renaud@snm.ku.dk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +27148975,SpinachDB: A Well-Characterized Genomic Database for Gene Family Classification and SNP Information of Spinach.,"Spinach (Spinacia oleracea L.), which originated in central and western Asia, belongs to the family Amaranthaceae. Spinach is one of most important leafy vegetables with a high nutritional value as well as being a perfect research material for plant sex chromosome models. As the completion of genome assembly and gene prediction of spinach, we developed SpinachDB (http://222.73.98.124/spinachdb) to store, annotate, mine and analyze genomics and genetics datasets efficiently. In this study, all of 21702 spinach genes were annotated. A total of 15741 spinach genes were catalogued into 4351 families, including identification of a substantial number of transcription factors. To construct a high-density genetic map, a total of 131592 SSRs and 1125743 potential SNPs located in 548801 loci of spinach genome were identified in 11 cultivated and wild spinach cultivars. The expression profiles were also performed with RNA-seq data using the FPKM method, which could be used to compare the genes. Paralogs in spinach and the orthologous genes in Arabidopsis, grape, sugar beet and rice were identified for comparative genome analysis. Finally, the SpinachDB website contains seven main sections, including the homepage; the GBrowse map that integrates genome, genes, SSR and SNP marker information; the Blast alignment service; the gene family classification search tool; the orthologous and paralogous gene pairs search tool; and the download and useful contact information. SpinachDB will be continually expanded to include newly generated robust genomics and genetics data sets along with the associated data mining and analysis tools.",2016-05-05 +27153587,jSplice: a high-performance method for accurate prediction of alternative splicing events and its application to large-scale renal cancer transcriptome data.,"

Motivation

Alternative splicing represents a prime mechanism of post-transcriptional gene regulation whose misregulation is associated with a broad range of human diseases. Despite the vast availability of transcriptome data from different cell types and diseases, bioinformatics-based surveys of alternative splicing patterns remain a major challenge due to limited availability of analytical tools that combine high accuracy and rapidity.

Results

We describe here a novel junction-centric method, jSplice, that enables de novo extraction of alternative splicing events from RNA-sequencing data with high accuracy, reliability and speed. Application to clear cell renal carcinoma (ccRCC) cell lines and 65 ccRCC patients revealed experimentally validatable alternative splicing changes and signatures able to prognosticate ccRCC outcome. In the aggregate, our results propose jSplice as a key analytic tool for the derivation of cell context-dependent alternative splicing patterns from large-scale RNA-sequencing datasets.

Availability and implementation

jSplice is a standalone Python application freely available at http://www.mhs.biol.ethz.ch/research/krek/jsplice

Contact

wilhelm.krek@biol.ethz.ch

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-21 +28934097,"Respiratory, Dermal, and Eye Irritation Symptoms Associated with Corexit™ EC9527A/EC9500A following the Deepwater Horizon Oil Spill: Findings from the GuLF STUDY.","

Background

The large quantities of chemical oil dispersants used in the oil spill response and cleanup (OSRC) work following the Deepwater Horizon disaster provide an opportunity to study associations between dispersant exposure (Corexit™ EC9500A or EC9527A) and human health.

Objectives

Our objectives were to examine associations between potential exposure to the dispersants and adverse respiratory, dermal, and eye irritation symptoms.

Methods

Using data from detailed Gulf Long-term Follow-up ( GuLF) Study enrollment interviews, we determined potential exposure to either dispersant from participant-reported tasks during the OSRC work. Between 27,659 and 29,468 participants provided information on respiratory, dermal, and eye irritation health. We estimated prevalence ratios (PRs) to measure associations with symptoms reported during the OSRC work and at study enrollment, adjusting for potential confounders including airborne total hydrocarbons exposure, use of cleaning chemicals, and participant demographics.

Results

Potential exposure to either of the dispersants was significantly associated with all health outcomes at the time of the OSRC, with the strongest association for burning in the nose, throat, or lungs [adjusted PR (aPR)=1.61 (95% CI: 1.42, 1.82)], tightness in chest [aPR=1.58 (95% CI: 1.37, 1.81)], and burning eyes [aPR=1.48 (95% CI: 1.35, 1.64). Weaker, but still significant, associations were found between dispersant exposure and symptoms present at enrollment.

Conclusions

Potential exposure to Corexit™ EC9527A or EC9500A was associated with a range of health symptoms at the time of the OSRC, as well as at the time of study enrollment, 1-3 y after the spill. https://doi.org/10.1289/EHP1677.",2017-09-15 +27797756,MIToS.jl: mutual information tools for protein sequence analysis in the Julia language.,"

Motivation

MIToS is an environment for mutual information analysis and a framework for protein multiple sequence alignments (MSAs) and protein structures (PDB) management in Julia language. It integrates sequence and structural information through SIFTS, making Pfam MSAs analysis straightforward. MIToS streamlines the implementation of any measure calculated from residue contingency tables and its optimization and testing in terms of protein contact prediction. As an example, we implemented and tested a BLOSUM62-based pseudo-count strategy in mutual information analysis.

Availability and implementation

The software is totally implemented in Julia and supported for Linux, OS X and Windows. It's freely available on GitHub under MIT license: http://mitos.leloir.org.ar .

Contacts

diegozea@gmail.com or cmb@leloir.org.ar.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +26571518,Visualizing Volcanic Clouds in the Atmosphere and Their Impact on Air Traffic.,"Volcanic eruptions are not only hazardous in the direct vicinity of a volcano, but they also affect the climate and air travel for great distances. This article sheds light on the Grímsvötn, Puyehue-Cordón Caulle, and Nabro eruptions in 2011. The authors study the agreement of the complementary satellite data, reconstruct sulfate aerosol and volcanic ash clouds, visualize endangered flight routes, minimize occlusion in particle trajectory visualizations, and focus on the main pathways of Nabro's sulfate aerosol into the stratosphere. The results here were developed for the 2014 IEEE Scientific Visualization Contest, which centers around the fusion of multiple satellite data modalities to reconstruct and assess the movement of volcanic ash and sulfate aerosol emissions. Using data from three volcanic eruptions that occurred in the span of approximately three weeks, the authors study the agreement of the complementary satellite data, reconstruct sulfate aerosol and volcanic ash clouds, visualize endangered flight routes, minimize occlusion in particle trajectory visualizations, and focus on the main pathways of sulfate aerosol into the stratosphere. This video provides animations of the reconstructed ash clouds. https://youtu.be/D9DvJ5AvZAs.",2015-11-11 +27831898,A Characteristic-Based Framework for Multiple Sequence Aligners.,"The multiple sequence alignment is a well-known bioinformatics problem that consists in the alignment of three or more biological sequences (protein or nucleic acid). In the literature, a number of tools have been proposed for dealing with this biological sequence alignment problem, such as progressive methods, consistency-based methods, or iterative methods; among others. These aligners often use a default parameter configuration for all the input sequences to align. However, the default configuration is not always the best choice, the alignment accuracy of the tool may be highly boosted if specific parameter configurations are used, depending on the biological characteristics of the input sequences. In this paper, we propose a characteristic-based framework for multiple sequence aligners. The idea of the framework is, given an input set of unaligned sequences, extract its characteristics and run the aligner with the best parameter configuration found for another set of unaligned sequences with similar characteristics. In order to test the framework, we have used the well-known multiple sequence comparison by log-expectation (MUSCLE) v3.8 aligner with different benchmarks, such as benchmark alignments database v3.0, protein reference alignment benchmark v4.0, and sequence alignment benchmark v1.65. The results shown that the alignment accuracy and conservation of MUSCLE might be greatly improved with the proposed framework, specially in those scenarios with a low percentage of identity. The characteristic-based framework for multiple sequence aligners is freely available for downloading at http://arco.unex.es/arl/fwk-msa/cbf-msa.zip.",2016-11-02 +27460371,SNP Discovery Using Next Generation Transcriptomic Sequencing.,"In this chapter, I will guide the user through methods to find new SNP markers from expressed sequence (RNA-Seq) data, focusing on the sample preparation and also on the bioinformatic analyses needed to sort through the immense flood of data from high-throughput sequencing machines. The general steps included are as follows: sample preparation, sequencing, quality control of data, assembly, mapping, SNP discovery, filtering, validation. The first few steps are traditional laboratory protocols, whereas steps following the sequencing are of bioinformatic nature. The bioinformatics described herein are by no means exhaustive, rather they serve as one example of a simple way of analyzing high-throughput sequence data to find SNP markers. Ideally, one would like to run through this protocol several times with a new dataset, while varying software parameters slightly, in order to determine the robustness of the results. The final validation step, although not described in much detail here, is also quite critical as that will be the final test of the accuracy of the assumptions made in silico.There is a plethora of downstream applications of a SNP dataset, not covered in this chapter. For an example of a more thorough protocol also including differential gene expression and functional enrichment analyses, BLAST annotation and downstream applications of SNP markers, a good starting point could be the ""Simple Fool's Guide to population genomics via RNA-Seq,"" which is available at http://sfg.stanford.edu .",2016-01-01 +24270792,Gene3D: Multi-domain annotations for protein sequence and comparative genome analysis.,"Gene3D (http://gene3d.biochem.ucl.ac.uk) is a database of protein domain structure annotations for protein sequences. Domains are predicted using a library of profile HMMs from 2738 CATH superfamilies. Gene3D assigns domain annotations to Ensembl and UniProt sequence sets including >6000 cellular genomes and >20 million unique protein sequences. This represents an increase of 45% in the number of protein sequences since our last publication. Thanks to improvements in the underlying data and pipeline, we see large increases in the domain coverage of sequences. We have expanded this coverage by integrating Pfam and SUPERFAMILY domain annotations, and we now resolve domain overlaps to provide highly comprehensive composite multi-domain architectures. To make these data more accessible for comparative genome analyses, we have developed novel search algorithms for searching genomes to identify related multi-domain architectures. In addition to providing domain family annotations, we have now developed a pipeline for 3D homology modelling of domains in Gene3D. This has been applied to the human genome and will be rolled out to other major organisms over the next year.",2013-11-21 +28911330,Extensive transcriptomic and epigenomic remodelling occurs during Arabidopsis thaliana germination.,"

Background

Seed germination involves progression from complete metabolic dormancy to a highly active, growing seedling. Many factors regulate germination and these interact extensively, forming a complex network of inputs that control the seed-to-seedling transition. Our understanding of the direct regulation of gene expression and the dynamic changes in the epigenome and small RNAs during germination is limited. The interactions between genome, transcriptome and epigenome must be revealed in order to identify the regulatory mechanisms that control seed germination.

Results

We present an integrated analysis of high-resolution RNA sequencing, small RNA sequencing and MethylC sequencing over ten developmental time points in Arabidopsis thaliana seeds, finding extensive transcriptomic and epigenomic transformations associated with seed germination. We identify previously unannotated loci from which messenger RNAs are expressed transiently during germination and find widespread alternative splicing and divergent isoform abundance of genes involved in RNA processing and splicing. We generate the first dynamic transcription factor network model of germination, identifying known and novel regulatory factors. Expression of both microRNA and short interfering RNA loci changes significantly during germination, particularly between the seed and the post-germinative seedling. These are associated with changes in gene expression and large-scale demethylation observed towards the end of germination, as the epigenome transitions from an embryo-like to a vegetative seedling state.

Conclusions

This study reveals the complex dynamics and interactions of the transcriptome and epigenome during seed germination, including the extensive remodelling of the seed DNA methylome from an embryo-like to vegetative-like state during the seed-to-seedling transition. Data are available for exploration in a user-friendly browser at https://jbrowse.latrobe.edu.au/germination_epigenome .",2017-09-15 +25578511,Use of Pooled State Administrative Data for Mental Health Services Research.,"State systems are a rich, albeit challenging, laboratory for policy-relevant services research studies. State mental health authorities routinely devote resources to collect data for state planning and reporting purposes. However, these data are rarely used in cross-state comparisons to inform state or federal policy development. In 2008, in response to key recommendations from the National Institute of Mental Health (NIMH) Advisory Council's ""The Road Ahead: Research Partnership to Transform Services,"" (http://www.nimh.nih.gov/about/advisory-boards-and-groups/namhc/reports/road-ahead.pdf), NIMH issued a request for applications (RFA) to support studies on the impact of state policy changes on access, cost, quality and outcomes of care for individuals with mental disorders. The purpose of the RFA was to bridge the divide between research and policy by encouraging research that used state administrative data across states, and to address significant state-defined health policy initiatives. Five projects involving eight states were selected through peer review for funding. Projects began in 2009 and were funded for 3 years. This report provides a brief description of the five projects, followed by an analysis of the impact, challenges, and lessons learned from these policy-partnered studies. We conclude by offering suggestions on ways to use state administrative data for informing state health policies, which is especially timely given national and state changes in the structure and financing of healthcare.",2016-01-01 +28937960,Prenatal Exposure to Nonpersistent Endocrine Disruptors and Behavior in Boys at 3 and 5 Years.,"

Background

Sex-specific associations have been reported between phthalates, bisphenol A (BPA), and child behavior. No data on large study populations are available for other phenols with possible endocrine-disrupting properties.

Objectives

We aimed to study associations between prenatal exposure to phthalates and several phenols on behavior among male infants.

Methods

We quantified 11 phthalate metabolites and nine phenols (four parabens, benzophenone-3, BPA, two dichlorophenols, triclosan) in spot urine samples collected during pregnancy among EDEN cohort mothers who delivered a boy. Mothers completed the Strength and Difficulties Questionnaire (SDQ) when their children were 3.1 (n=529) and 5.6 (n=464) y old.

Results

BPA was positively associated with the relationship problems subscale at 3 y [incidence rate ratio (IRR): 1.11; 95% confidence interval (CI): 1.03, 1.20] and the hyperactivity-inattention subscale scores at 5 y (IRR: 1.08; 95% CI: 1.01, 1.14). Mono-n-butyl phthalate (MnBP) was positively associated with internalizing behavior, relationship problem, and emotional symptom scores at 3 y. Monobenzyl phthalate (MBzP) was positively associated with internalizing behavior and relationship problems scores at 3 y. After dichotomizing SDQ scores, triclosan tended to be positively associated with emotional symptom subscales at both 3 and 5 y.

Conclusions

The observed associations between BPA, MnBP, and behavior in boys are consistent with previous findings. Further health impact assessment studies based on dose-response functions corrected for exposure misclassification are required to quantify the public health burden possibly entailed by such associations. https://doi.org/10.1289/EHP1314.",2017-09-15 +23396323,The annotation-enriched non-redundant patent sequence databases.,"The EMBL-European Bioinformatics Institute (EMBL-EBI) offers public access to patent sequence data, providing a valuable service to the intellectual property and scientific communities. The non-redundant (NR) patent sequence databases comprise two-level nucleotide and protein sequence clusters (NRNL1, NRNL2, NRPL1 and NRPL2) based on sequence identity (level-1) and patent family (level-2). Annotation from the source entries in these databases is merged and enhanced with additional information from the patent literature and biological context. Corrections in patent publication numbers, kind-codes and patent equivalents significantly improve the data quality. Data are available through various user interfaces including web browser, downloads via FTP, SRS, Dbfetch and EBI-Search. Sequence similarity/homology searches against the databases are available using BLAST, FASTA and PSI-Search. In this article, we describe the data collection and annotation and also outline major changes and improvements introduced since 2009. Apart from data growth, these changes include additional annotation for singleton clusters, the identifier versioning for tracking entry change and the entry mappings between the two-level databases. Database URL: http://www.ebi.ac.uk/patentdata/nr/",2013-02-09 +23586394,Clever generation of rich SPARQL queries from annotated relational schema: application to Semantic Web Service creation for biological databases.,"

Background

In recent years, a large amount of ""-omics"" data have been produced. However, these data are stored in many different species-specific databases that are managed by different institutes and laboratories. Biologists often need to find and assemble data from disparate sources to perform certain analyses. Searching for these data and assembling them is a time-consuming task. The Semantic Web helps to facilitate interoperability across databases. A common approach involves the development of wrapper systems that map a relational database schema onto existing domain ontologies. However, few attempts have been made to automate the creation of such wrappers.

Results

We developed a framework, named BioSemantic, for the creation of Semantic Web Services that are applicable to relational biological databases. This framework makes use of both Semantic Web and Web Services technologies and can be divided into two main parts: (i) the generation and semi-automatic annotation of an RDF view; and (ii) the automatic generation of SPARQL queries and their integration into Semantic Web Services backbones. We have used our framework to integrate genomic data from different plant databases.

Conclusions

BioSemantic is a framework that was designed to speed integration of relational databases. We present how it can be used to speed the development of Semantic Web Services for existing relational biological databases. Currently, it creates and annotates RDF views that enable the automatic generation of SPARQL queries. Web Services are also created and deployed automatically, and the semantic annotations of our Web Services are added automatically using SAWSDL attributes. BioSemantic is downloadable at http://southgreen.cirad.fr/?q=content/Biosemantic.",2013-04-15 +27776089,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline on the Role of Cranial Molding Orthosis (Helmet) Therapy for Patients With Positional Plagiocephaly.,"

Background

No evidence-based guidelines exist on the role of cranial-molding orthosis (helmet) therapy for patients with positional plagiocephaly.

Objective

To address the clinical question: ""Does helmet therapy provide effective treatment for positional plagiocephaly?"" and to make treatment recommendations based on the available evidence.

Methods

The US National Library of Medicine Medline database and the Cochrane Library were queried by using MeSH headings and key words relevant to the objective of this systematic review. Abstracts were reviewed, after which studies meeting the inclusion criteria were selected and graded according to their quality of evidence (Classes I-III). Evidentiary tables were constructed that summarized pertinent study results, and, based on the quality of the literature, recommendations were made (Levels I-III).

Results

Fifteen articles met criteria for inclusion into the evidence tables. There was 1 prospective randomized controlled trial (Class II), 5 prospective comparative studies (Class II), and 9 retrospective comparative studies (Class II).

Conclusion

There is a fairly substantive body of nonrandomized evidence that demonstrates more significant and faster improvement of cranial shape in infants with positional plagiocephaly treated with a helmet in comparison with conservative therapy, especially if the deformity is severe, provided that helmet therapy is applied during the appropriate period of infancy. Specific criteria regarding the measurement and quantification of deformity and the most appropriate time window in infancy for treatment of positional plagiocephaly with a helmet remains elusive. In general, infants with a more severe presenting deformity and infants who are helmeted early in infancy tend to have more significant correction (and even normalization) of head shape. The full guidelines document can be located at https://www.cns.org/guidelines/guidelines-management-patients-positional-plagiocephaly/Chapter_5.",2016-11-01 +27759672,Guidelines: Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline for the Diagnosis of Patients With Positional Plagiocephaly: The Role of Imaging.,"

Background

No evidence-based guidelines exist for the imaging of patients with positional plagiocephaly.

Objective

The objective of this systematic review and evidence-based guideline is to answer the question, Is imaging necessary for infants with positional plagiocephaly to make a diagnosis?

Methods

The National Library of Medicine Medline database and the Cochrane Library were queried with the use of MeSH headings and key words relevant to imaging as a means to diagnose plagiocephaly. Abstracts were reviewed, and an evidentiary table was assembled summarizing the studies and the quality of evidence (Classes I-III). Based on the quality of the literature, a recommendation was rendered (Level I, II, or III).

Results

A total of 42 full-text articles were selected for review. Of these, 10 were eliminated; thus, 32 full-text were manuscripts selected. There was no Class I evidence, but 2 Class II and 30 Class III studies were included. Three-dimensional cranial topographical imaging, ultrasound, skull x-rays, computed tomography, and magnetic resonance imaging were investigated.

Conclusion

Clinical examination is most often sufficient to diagnose plagiocephaly (quality, Class III; strength, Level III). Within the limits of this systematic review, the evidence suggests that imaging is rarely necessary and should be reserved for cases in which the clinical examination is equivocal. Many of the imaging studies were not designed to address the diagnostic utility of the imaging modality, and authors were actually assessing the utility of the imaging in longitudinal follow-up, not initial diagnosis. For this reason, some of the studies reviewed were downgraded in Level of Evidence. When needed, 3-dimensional cranial topographical photo, skull x-rays, or ultrasound imaging is almost always sufficient for definitive diagnosis. Computed tomography scanning should not be used to diagnose plagiocephaly, but it may be necessary to rule out craniosynostosis. The full guidelines document can be located at https://www.cns.org/guidelines/guidelines-management-patients-positional-plagiocephaly/Chapter_2.",2016-11-01 +27776087,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline on the Management of Patients With Positional Plagiocephaly: The Role of Repositioning.,"

Background

Plagiocephaly, involving positional deformity of the calvarium in infants, is one of the most common reasons for pediatric neurosurgical consultation.

Objective

To answer the question: ""what is the evidence for the effectiveness of repositioning for positional plagiocephaly?"" Treatment recommendations are provided based on the available evidence.

Methods

The National Library of Medicine MEDLINE database and the Cochrane Library were queried using MeSH headings and key words relevant to repositioning as a means to treat plagiocephaly and brachycephaly. Abstracts were reviewed to identify which studies met the inclusion criteria. An evidentiary table was assembled summarizing the studies and the quality of evidence (Classes I-III). Based on the quality of the literature, a recommendation was rendered (Level I, II, or III).

Results

There were 3 randomized trials (Class I), 1 prospective cohort study (Class II), and 6 retrospective cohort studies (Class III). Repositioning education was found to be equal to a repositioning device and inferior to a physical therapy program. Five of the 7 cohort studies comparing repositioning with a helmet reported helmets to be better and take less time.

Conclusion

Within the limits of this systematic review, repositioning education is effective in affording some degree of correction in virtually all infants with positional plagiocephaly or brachycephaly. Most studies suggest that a molding helmet corrects asymmetry more rapidly and to a greater degree than repositioning education. In a Class I study, repositioning education was as effective as repositioning education in conjunction with a repositioning wrap/device. Another Class I study demonstrated that a bedding pillow was superior to physical therapy for some infants. However, in keeping with the American Academy of Pediatrics' warning against the use of soft positioning pillows in the sleeping environment, the Task Force recommends physical therapy over any positioning device. The full guidelines document can be located at https://www.cns.org/guidelines/guidelines-management-patients-positional-plagiocephaly/Chapter_3.",2016-11-01 +27759675,Guidelines: Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline on the Role of Cranial Molding Orthosis (Helmet) Therapy for Patients With Positional Plagiocephaly.,"

Background

No evidence-based guidelines exist on the role of cranial-molding orthosis (helmet) therapy for patients with positional plagiocephaly.

Objective

To address the clinical question: ""Does helmet therapy provide effective treatment for positional plagiocephaly?"" and to make treatment recommendations based on the available evidence.

Methods

The US National Library of Medicine Medline database and the Cochrane Library were queried by using MeSH headings and key words relevant to the objective of this systematic review. Abstracts were reviewed, after which studies meeting the inclusion criteria were selected and graded according to their quality of evidence (Classes I-III). Evidentiary tables were constructed that summarized pertinent study results, and, based on the quality of the literature, recommendations were made (Levels I-III).

Results

Fifteen articles met criteria for inclusion into the evidence tables. There was 1 prospective randomized controlled trial (Class II), 5 prospective comparative studies (Class II), and 9 retrospective comparative studies (Class II).

Conclusion

There is a fairly substantive body of nonrandomized evidence that demonstrates more significant and faster improvement of cranial shape in infants with positional plagiocephaly treated with a helmet in comparison with conservative therapy, especially if the deformity is severe, provided that helmet therapy is applied during the appropriate period of infancy. Specific criteria regarding the measurement and quantification of deformity and the most appropriate time window in infancy for treatment of positional plagiocephaly with a helmet remains elusive. In general, infants with a more severe presenting deformity and infants who are helmeted early in infancy tend to have more significant correction (and even normalization) of head shape. The full guidelines document can be located at https://www.cns.org/guidelines/guidelines-management-patients-positional-plagiocephaly/Chapter_5.",2016-11-01 +27776086,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline for the Diagnosis of Patients With Positional Plagiocephaly: The Role of Imaging.,"

Background

No evidence-based guidelines exist for the imaging of patients with positional plagiocephaly.

Objective

The objective of this systematic review and evidence-based guideline is to answer the question, Is imaging necessary for infants with positional plagiocephaly to make a diagnosis?

Methods

The National Library of Medicine Medline database and the Cochrane Library were queried with the use of MeSH headings and key words relevant to imaging as a means to diagnose plagiocephaly. Abstracts were reviewed, and an evidentiary table was assembled summarizing the studies and the quality of evidence (Classes I-III). Based on the quality of the literature, a recommendation was rendered (Level I, II, or III).

Results

A total of 42 full-text articles were selected for review. Of these, 10 were eliminated; thus, 32 full-text were manuscripts selected. There was no Class I evidence, but 2 Class II and 30 Class III studies were included. Three-dimensional cranial topographical imaging, ultrasound, skull x-rays, computed tomography, and magnetic resonance imaging were investigated.

Conclusion

Clinical examination is most often sufficient to diagnose plagiocephaly (quality, Class III; strength, Level III). Within the limits of this systematic review, the evidence suggests that imaging is rarely necessary and should be reserved for cases in which the clinical examination is equivocal. Many of the imaging studies were not designed to address the diagnostic utility of the imaging modality, and authors were actually assessing the utility of the imaging in longitudinal follow-up, not initial diagnosis. For this reason, some of the studies reviewed were downgraded in Level of Evidence. When needed, 3-dimensional cranial topographical photo, skull x-rays, or ultrasound imaging is almost always sufficient for definitive diagnosis. Computed tomography scanning should not be used to diagnose plagiocephaly, but it may be necessary to rule out craniosynostosis. The full guidelines document can be located at https://www.cns.org/guidelines/guidelines-management-patients-positional-plagiocephaly/Chapter_2.",2016-11-01 +21656910,A database of reaction monitoring mass spectrometry assays for elucidating therapeutic response in cancer.,"

Purpose

The Quantitative Assay Database (QuAD), http://proteome.moffitt.org/QUAD/, facilitates widespread implementation of quantitative mass spectrometry in cancer biology and clinical research through sharing of methods and reagents for monitoring protein expression and modification.

Experimental design

Liquid chromatography coupled to multiple reaction monitoring (LC-MRM) mass spectrometry assays are developed using SDS-PAGE fractionated lysates from cancer cell lines. Pathway maps created using GeneGO Metacore provide the biological relationships between proteins and illustrate concepts for multiplexed analysis; each protein can be selected to examine assay development at the protein and peptide levels.

Results

The coupling of SDS-PAGE and multiple reaction monitoring mass spectrometry screening has been used to detect 876 peptides from 218 cancer-related proteins in model systems including colon, lung, melanoma, leukemias, and myeloma, which has led to the development of 95 quantitative assays including stable-isotope-labeled peptide standards. Methods are published online and peptide standards are made available to the research community. Protein expression measurements for heat shock proteins, including a comparison with ELISA and monitoring response to the HSP90 inhibitor, 17-(dimethylaminoethylamino)-17-demethoxygeldanamycin (17-DMAG), are used to illustrate the components of the QuAD and its potential utility.

Conclusions and clinical relevance

This resource enables quantitative assessment of protein components of signaling pathways and biological processes and holds promise for systematic investigation of treatment responses in cancer.",2011-06-08 +26949727,"Data for the identification of proteins and post-translational modifications of proteins associated to histones H3 and H4 in S. cerevisiae, using tandem affinity purification coupled with mass spectrometry.","Tandem affinity purification method (TAP) allows the efficient purification of native protein complexes which incorporate a target protein fused with the TAP tag. Purified multiprotein complexes can then be subjected to diverse types of proteomic analyses. Here we describe the data acquired after applying the TAP strategy on histones H3 and H4 coupled with mass spectrometry to identify associated proteins and protein post-translational modifications in the budding yeast, Saccharomyces cerevisiae. The mass spectrometry dataset described here consists of 14 files generated from four different analyses in a 5600 Triple TOF (Sciex) by information-dependent acquisition (IDA) LC-MS/MS. The above files contain information about protein identification, protein relative abundance, and PTMs identification. The instrumental raw data from these files has been also uploaded to the ProteomeXchange Consortium via the PRIDE partner repository, with the dataset identifier PRIDE: PXD002671 and http://dx.doi.org/10.6019/PXD002671. These data are discussed and interpreted in http://dx.doi.org/10.1016/j.jprot.2016.01.004. Valero et al. (2016) [1].",2016-02-05 +27759673,Guidelines: Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline on the Management of Patients With Positional Plagiocephaly: The Role of Repositioning.,"

Background

Plagiocephaly, involving positional deformity of the calvarium in infants, is one of the most common reasons for pediatric neurosurgical consultation.

Objective

To answer the question: ""what is the evidence for the effectiveness of repositioning for positional plagiocephaly?"" Treatment recommendations are provided based on the available evidence.

Methods

The National Library of Medicine MEDLINE database and the Cochrane Library were queried using MeSH headings and key words relevant to repositioning as a means to treat plagiocephaly and brachycephaly. Abstracts were reviewed to identify which studies met the inclusion criteria. An evidentiary table was assembled summarizing the studies and the quality of evidence (Classes I-III). Based on the quality of the literature, a recommendation was rendered (Level I, II, or III).

Results

There were 3 randomized trials (Class I), 1 prospective cohort study (Class II), and 6 retrospective cohort studies (Class III). Repositioning education was found to be equal to a repositioning device and inferior to a physical therapy program. Five of the 7 cohort studies comparing repositioning with a helmet reported helmets to be better and take less time.

Conclusion

Within the limits of this systematic review, repositioning education is effective in affording some degree of correction in virtually all infants with positional plagiocephaly or brachycephaly. Most studies suggest that a molding helmet corrects asymmetry more rapidly and to a greater degree than repositioning education. In a Class I study, repositioning education was as effective as repositioning education in conjunction with a repositioning wrap/device. Another Class I study demonstrated that a bedding pillow was superior to physical therapy for some infants. However, in keeping with the American Academy of Pediatrics' warning against the use of soft positioning pillows in the sleeping environment, the Task Force recommends physical therapy over any positioning device. The full guidelines document can be located at https://www.cns.org/guidelines/guidelines-management-patients-positional-plagiocephaly/Chapter_3.",2016-11-01 +28130234,Evolutionary design of multiple genes encoding the same protein.,"

Motivation

Enhancing expression levels of a target protein is an important goal in synthetic biology. A widely used strategy is to integrate multiple copies of genes encoding a target protein into a host organism genome. Integrating highly similar sequences, however, can induce homologous recombination between them, resulting in the ultimate reduction of the number of integrated genes.

Results

We propose a method for designing multiple protein-coding sequences (i.e. CDSs) that are unlikely to induce homologous recombination, while encoding the same protein. The method, which is based on multi-objective genetic algorithm, is intended to design a set of CDSs whose nucleotide sequences are as different as possible and whose codon usage frequencies are as highly adapted as possible to the host organism. We show that our method not only successfully designs a set of intended CDSs, but also provides insight into the trade-off between nucleotide differences among gene copies and codon usage frequencies.

Availability and implementation

Our method, named Tandem Designer, is available as a web-based application at http://tandem.trahed.jp/tandem/ .

Contact

: terai_goro@intec.co.jp or asai@k.u-tokyo.ac.jp.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +23180765,RiceXPro version 3.0: expanding the informatics resource for rice transcriptome.,"A wide range of resources on gene expression profiling enhance various strategies in plant molecular biology particularly in characterization of gene function. We have updated our gene expression profile database, RiceXPro (http://ricexpro.dna.affrc.go.jp/), to provide more comprehensive information on the transcriptome of rice encompassing the entire growth cycle and various experimental conditions. The gene expression profiles are currently grouped into three categories, namely, 'field/development' with 572 data corresponding to 12 data sets, 'plant hormone' with 143 data corresponding to 13 data sets and 'cell- and tissue-type' comprising of 38 microarray data. In addition to the interface for retrieving expression information of a gene/genes in each data set, we have incorporated an interface for a global approach in searching an overall view of the gene expression profiles from multiple data sets within each category. Furthermore, we have also added a BLAST search function that enables users to explore expression profile of a gene/genes with similarity to a query sequence. Therefore, the updated version of RiceXPro can be used more efficiently to survey the gene expression signature of rice in sufficient depth and may also provide clues on gene function of other cereal crops.",2012-11-23 +24990533,GALT protein database: querying structural and functional features of GALT enzyme.,"Knowledge of the impact of variations on protein structure can enhance the comprehension of the mechanisms of genetic diseases related to that protein. Here, we present a new version of GALT Protein Database, a Web-accessible data repository for the storage and interrogation of structural effects of variations of the enzyme galactose-1-phosphate uridylyltransferase (GALT), the impairment of which leads to classic Galactosemia, a rare genetic disease. This new version of this database now contains the models of 201 missense variants of GALT enzyme, including heterozygous variants, and it allows users not only to retrieve information about the missense variations affecting this protein, but also to investigate their impact on substrate binding, intersubunit interactions, stability, and other structural features. In addition, it allows the interactive visualization of the models of variants collected into the database. We have developed additional tools to improve the use of the database by nonspecialized users. This Web-accessible database (http://bioinformatica.isa.cnr.it/GALT/GALT2.0) represents a model of tools potentially suitable for application to other proteins that are involved in human pathologies and that are subjected to genetic variations.",2014-07-23 +24482028,Health literacy in vascular and interventional radiology: a comparative analysis of online patient education resources.,"

Purpose

The Internet is frequently accessed by patients as a resource for medical knowledge. However, the provided material is typically written at a level well above the recommended 7th grade level. A clear understanding of the capabilities, limitations, risks, and benefits of interventional radiology by patients, both current and prospective, is hindered when the textual information offered to the public is pitched at a level of sophistication too high for general comprehension.

Methods

In January 2013, all 25 patient education resources from the Cardiovascular and Interventional Radiology Society of Europe (CIRSE) Web site ( http://www.cirse.org ) and all 31 resources from the Society of Interventional Radiology (SIR) Web site ( http://www.sirweb.org ) were analyzed for their specific level of readability using ten quantitative scales: Flesch Reading Ease, Flesch-Kincaid Grade Level, Simple Measure of Gobbledygook, Gunning fog index, New Fog Count, Coleman-Liau index, FORCAST formula, Fry graph, Raygor Readability Estimate, and New Dale-Chall.

Results

Collectively, the patient education resources on the CIRSE Web site are written at the 12.3 grade level, while the resources on the SIR Web site are written at the 14.5 grade level.

Conclusion

Educational health care materials available on both the CIRSE and the SIR Web sites are presented in language in the aggregate that could be too difficult for many lay people to fully understand. Given the complex nature of vascular and interventional radiology, it may be advantageous to rewrite these educational resources at a lower reading level to increase comprehension.",2014-01-31 +26802430,Network-Based Analysis of eQTL Data to Prioritize Driver Mutations.,"In clonal systems, interpreting driver genes in terms of molecular networks helps understanding how these drivers elicit an adaptive phenotype. Obtaining such a network-based understanding depends on the correct identification of driver genes. In clonal systems, independent evolved lines can acquire a similar adaptive phenotype by affecting the same molecular pathways, a phenomenon referred to as parallelism at the molecular pathway level. This implies that successful driver identification depends on interpreting mutated genes in terms of molecular networks. Driver identification and obtaining a network-based understanding of the adaptive phenotype are thus confounded problems that ideally should be solved simultaneously. In this study, a network-based eQTL method is presented that solves both the driver identification and the network-based interpretation problem. As input the method uses coupled genotype-expression phenotype data (eQTL data) of independently evolved lines with similar adaptive phenotypes and an organism-specific genome-wide interaction network. The search for mutational consistency at pathway level is defined as a subnetwork inference problem, which consists of inferring a subnetwork from the genome-wide interaction network that best connects the genes containing mutations to differentially expressed genes. Based on their connectivity with the differentially expressed genes, mutated genes are prioritized as driver genes. Based on semisynthetic data and two publicly available data sets, we illustrate the potential of the network-based eQTL method to prioritize driver genes and to gain insights in the molecular mechanisms underlying an adaptive phenotype. The method is available at http://bioinformatics.intec.ugent.be/phenetic_eqtl/index.html.",2016-01-23 +27853484,Filtered circular fingerprints improve either prediction or runtime performance while retaining interpretability.,"

Background

Even though circular fingerprints have been first introduced more than 50 years ago, they are still widely used for building highly predictive, state-of-the-art (Q)SAR models. Historically, these structural fragments were designed to search large molecular databases. Hence, to derive a compact representation, circular fingerprint fragments are often folded to comparatively short bit-strings. However, folding fingerprints introduces bit collisions, and therefore adds noise to the encoded structural information and removes its interpretability. Both representations, folded as well as unprocessed fingerprints, are often used for (Q)SAR modeling.

Results

We show that it can be preferable to build (Q)SAR models with circular fingerprint fragments that have been filtered by supervised feature selection, instead of applying folded or all fragments. Compared to folded fingerprints, filtered fingerprints significantly increase predictive performance and remain unambiguous and interpretable. Compared to unprocessed fingerprints, filtered fingerprints reduce the computational effort and are a more compact and less redundant feature representation. Depending on the selected learning algorithm filtering yields about equally predictive (Q)SAR models. We demonstrate the suitability of filtered fingerprints for (Q)SAR modeling by presenting our freely available web service Collision-free Filtered Circular Fingerprints that provides rationales for predictions by highlighting important structural features in the query compound (see http://coffer.informatik.uni-mainz.de).

Conclusions

Circular fingerprints are potent structural features that yield highly predictive models and encode interpretable structural information. However, to not lose interpretability, circular fingerprints should not be folded when building prediction models. Our experiments show that filtering is a suitable option to reduce the high computational effort when working with all fingerprint fragments. Additionally, our experiments suggest that the area under precision recall curve is a more sensible statistic for validating (Q)SAR models for virtual screening than the area under ROC or other measures for early recognition.

Graphical abstract

",2016-10-31 +28436664,CPPred-RF: A Sequence-based Predictor for Identifying Cell-Penetrating Peptides and Their Uptake Efficiency.,"Cell-penetrating peptides (CPPs), have been proven as important drug-delivery vehicles, demonstrating the potential as therapeutic candidates. The past decade has witnessed a rapid growth in CPP-based research. Recently, many computational efforts have been made to develop machine-learning-based methods for identifying CPPs. Although much progress has been made, existing methods still suffer low feature representation capability that limits further performance improvement. In this study, we propose a novel predictor called CPPred-RF, in which we integrate multiple sequence-based feature descriptors to sufficiently explore distinct information embedded in CPPs, employ a well-established feature selection technique to improve the feature representation, and, for the first time, construct a two-layer prediction framework based on the random forest algorithm. The jackknife results on benchmark data sets show that the proposed CPPred-RF is at least competitive with the state-of-the-art predictors. Moreover, we establish the first online Web server in terms of predicting CPPs and their uptake efficiency simultaneously. It is freely available at http://server.malab.cn/CPPred-RF .",2017-04-26 +27917038,"Revised classification and catalogue of global Nepticulidae and Opostegidae (Lepidoptera, Nepticuloidea).","A catalogue of all named Nepticulidae and Opostegidae is presented, including fossil species. The catalogue is simultaneously published online in the scratchpad http://nepticuloidea.info/ and in Catalogue of Life (http://www.catalogueoflife.org/col/details/database/id/172). We provide a historical overview of taxonomic research on Nepticuloidea and a brief 'state of the art'. A DNA barcode dataset with 3205 barcodes is made public at the same time, providing DNA barcodes of ca. 779 species, of which 2563 are identified as belonging to 444 validly published species. We recognise 862 extant and 18 fossil species of Nepticulidae in 22 extant genera and the fossil form genus Stigmellites. We count 192 valid Opostegidae species in 7 genera, without fossils. We also list seven dubious Nepticulidae names that cannot be placed due to absent type material and poor descriptions, 18 unavailable names in Nepticulidae that cannot be placed and we also list the 33 names (including four fossils) that once were placed as Nepticulidae or Opostegidae but are now excluded. All synonyms and previous combinations are listed. The generic classification follows the Molecular phylogeny that is published almost simultaneously. Subfamilies and tribes are not recognised, Trifurculinae Scoble, 1983 is synonymised with Nepticulidae Stainton, 1854 and Opostegoidinae Kozlov, 1987 is synonymised with Opostegidae Meyrick, 1893. The status of Casanovula Hoare, 2013, Etainia Beirne, 1945, Fomoria Beirne, 1945, Glaucolepis Braun, 1917, Menurella Hoare, 2013, Muhabbetana Koçak & Kemal, 2007 and Zimmermannia Hering, 1940 is changed from subgenus to full genus, whereas two genera are considered synonyms again: Manoneura Davis, 1979, a synonym of Enteucha Meyrick, 1915 and Levarchama Beirne, 1945, a synonym of Trifurcula Zeller, 1848. We propose 87 new combinations in Nepticulidae and 10 in Opostegidae, largely due to the new classification, and re-examination of some species. We propose the following 37 new synonymies for species (35 in Nepticulidae, 2 in Opostegidae): Stigmella acerifoliella Dovnar-Zapolski, 1969 (unavailable, = Stigmella acerna Puplesis, 1988), Stigmella nakamurai Kemperman & Wilkinson, 1985 (= Stigmella palionisi Puplesis, 1984), Nepticula amseli Skala, 1941 (unavailable = Stigmella birgittae Gustafsson, 1985), Stigmella cathepostis Kemperman & Wilkinson, 1985 (= Stigmella microtheriella (Stainton, 1854)), Stigmella populnea Kemperman & Wilkinson, 1985 (= Stigmella nivenburgensis (Preissecker, 1942)), Nepticula obscurella Braun, 1912 (revised synonymy, = Stigmella myricafoliella (Busck, 1900)), Nepticula mandingella Gustafsson, 1972 (= Stigmella wollofella (Gustafsson, 1972)), Stigmella rosaefoliella pectocatena Wilkinson & Scoble, 1979 (= Stigmella centifoliella (Zeller, 1848)), Micropteryx pomivorella Packard, 1870 (= Stigmella oxyacanthella (Stainton, 1854)), Stigmella crataegivora Puplesis, 1985 (= Stigmella micromelis Puplesis, 1985), Stigmella scinanella Wilkinson & Scoble, 1979 (= Stigmella purpuratella (Braun, 1917)), Stigmella palmatae Puplesis, 1984 (= Stigmella filipendulae (Wocke, 1871)), Stigmella sesplicata Kemperman & Wilkinson, 1985 (= Stigmella lediella (Schleich, 1867)), Stigmella rhododendrifolia Dovnar-Zapolski & Tomilova, 1978 (unavailable, = Stigmella lediella (Schleich, 1867)), Stigmella oa Kemperman & Wilkinson, 1985 (= Stigmella spiculifera Kemperman & Wilkinson, 1985), Stigmella gracilipae Hirano, 2014 (= Stigmella monticulella Puplesis, 1984), Nepticula chaoniella Herrich-Schäffer, 1863 (= Stigmella samiatella (Zeller, 1839)), Bohemannia piotra Puplesis, 1984 (= Bohemannia pulverosella (Stainton, 1849)), Bohemannia nipponicella Hirano, 2010 (= Bohemannia manschurella Puplesis, 1984), Sinopticula sinica Yang, 1989 (= Glaucolepis oishiella (Matsumura, 1931)), Trifurcula collinella Nel, 2012 (= Glaucolepis magna (A. Laštuvka & Z. Laštuvka, 1997)), Obrussa tigrinella Puplesis, 1985 (= Etainia trifasciata (Matsumura, 1931)), Microcalyptris vittatus Puplesis, 1984 and Microcalyptris arenosus Falkovitsh, 1986 (both = Acalyptris falkovitshi (Puplesis, 1984)), Ectoedemia castaneae Busck, 1913, Ectoedemia heinrichi Busck, 1914 and Ectoedemia helenella Wilkinson, 1981 (all three = Zimmermannia bosquella (Chambers, 1878)), Ectoedemia chloranthis Meyrick, 1928 and Ectoedemia acanthella Wilkinson & Newton, 1981 (both = Zimmermannia grandisella (Chambers, 1880)), Ectoedemia coruscella Wilkinson, 1981 (= Zimmermannia mesoloba (Davis, 1978)), Ectoedemia piperella Wilkinson & Newton, 1981 and Ectoedemia reneella Wilkinson, 1981 (both = Zimmermannia obrutella (Zeller, 1873)), Ectoedemia similigena Puplesis, 1994 (= Ectoedemia turbidella (Zeller, 1848)), Ectoedemia andrella Wilkinson, 1981 (= Ectoedemia ulmella (Braun, 1912)), Nepticula canadensis Braun, 1917 (= Ectoedemia minimella (Zetterstedt, 1839)), Opostega rezniki Kozlov, 1985 (= Opostega cretatella Chrétien, 1915), Pseudopostega cyrneochalcopepla Nel & Varenne, 2012 (= Pseudopostega chalcopepla (Walsingham, 1908)). Stigmella caryaefoliella (Clemens, 1861) and Zimmermannia bosquella (Chambers, 1878) are taken out of synonymy and re-instated as full species. Lectotypes are designated for Trifurcula obrutella Zeller, 1873 and Nepticula grandisella Chambers, 1880.",2016-10-31 +27153607,GenomeRunner web server: regulatory similarity and differences define the functional impact of SNP sets.,"

Motivation

The growing amount of regulatory data from the ENCODE, Roadmap Epigenomics and other consortia provides a wealth of opportunities to investigate the functional impact of single nucleotide polymorphisms (SNPs). Yet, given the large number of regulatory datasets, researchers are posed with a challenge of how to efficiently utilize them to interpret the functional impact of SNP sets.

Results

We developed the GenomeRunner web server to automate systematic statistical analysis of SNP sets within a regulatory context. Besides defining the functional impact of SNP sets, GenomeRunner implements novel regulatory similarity/differential analyses, and cell type-specific regulatory enrichment analysis. Validated against literature- and disease ontology-based approaches, analysis of 39 disease/trait-associated SNP sets demonstrated that the functional impact of SNP sets corresponds to known disease relationships. We identified a group of autoimmune diseases with SNPs distinctly enriched in the enhancers of T helper cell subpopulations, and demonstrated relevant cell type-specificity of the functional impact of other SNP sets. In summary, we show how systematic analysis of genomic data within a regulatory context can help interpreting the functional impact of SNP sets.

Availability and implementation

GenomeRunner web server is freely available at http://www.integrativegenomics.org/

Contact

mikhail.dozmorov@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-01 +28886606,High-Throughput Analysis of Ovarian Cycle Disruption by Mixtures of Aromatase Inhibitors.,"

Background

Combining computational toxicology with ExpoCast exposure estimates and ToxCast™ assay data gives us access to predictions of human health risks stemming from exposures to chemical mixtures.

Objectives

We explored, through mathematical modeling and simulations, the size of potential effects of random mixtures of aromatase inhibitors on the dynamics of women's menstrual cycles.

Methods

We simulated random exposures to millions of potential mixtures of 86 aromatase inhibitors. A pharmacokinetic model of intake and disposition of the chemicals predicted their internal concentration as a function of time (up to 2 y). A ToxCast™ aromatase assay provided concentration-inhibition relationships for each chemical. The resulting total aromatase inhibition was input to a mathematical model of the hormonal hypothalamus-pituitary-ovarian control of ovulation in women.

Results

Above 10% inhibition of estradiol synthesis by aromatase inhibitors, noticeable (eventually reversible) effects on ovulation were predicted. Exposures to individual chemicals never led to such effects. In our best estimate, ∼10% of the combined exposures simulated had mild to catastrophic impacts on ovulation. A lower bound on that figure, obtained using an optimistic exposure scenario, was 0.3%.

Conclusions

These results demonstrate the possibility to predict large-scale mixture effects for endocrine disrupters with a predictive toxicology approach that is suitable for high-throughput ranking and risk assessment. The size of the effects predicted is consistent with an increased risk of infertility in women from everyday exposures to our chemical environment. https://doi.org/10.1289/EHP742.",2017-07-19 +26458888,Unified tests for fine-scale mapping and identifying sparse high-dimensional sequence associations.,"

Motivation

In searching for genetic variants for complex diseases with deep sequencing data, genomic marker sets of high-dimensional genotypic data and sparse functional variants are quite common. Existing sequence association tests are incapable of identifying such marker sets or individual causal loci, although they appeared powerful to identify small marker sets with dense functional variants. In sequence association studies of admixed individuals, cryptic relatedness and population structure are known to confound the association analyses.

Method

We here propose a unified marker wise test (uFineMap) to accurately localize causal loci and a unified high-dimensional set based test (uHDSet) to identify high-dimensional sparse associations in deep sequencing genomic data of multi-ethnic individuals with random relatedness. These two novel tests are based on scaled sparse linear mixed regressions with Lp (0 < p < 1) norm regularization. They jointly adjust for cryptic relatedness, population structure and other confounders to prevent false discoveries and improve statistical power for identifying promising individual markers and marker sets that harbor functional genetic variants of a complex trait.

Results

With large scale simulation data and real data analyses, the proposed tests appropriately controlled Type I error rates and appeared to be more powerful than several prominent methods. We illustrated their practical utilities by the applications to DNA sequence data of Framingham Heart Study for osteoporosis. The proposed tests identified 11 novel significant genes that were missed by the prominent famSKAT and GEMMA. In particular, four out of six most significant pathways identified by the uHDSet but missed by famSKAT have been reported to be related to BMD or osteoporosis in the literature.

Availability and implementation

The computational toolkit is available for academic use: https://sites.google.com/site/shaolongscode/home/uhdset

Contact

wyp@tulane.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-12 +28886603,Review of Epidemiological Studies of Drinking-Water Turbidity in Relation to Acute Gastrointestinal Illness.,"

Background

Turbidity has been used as an indicator of microbiological contamination of drinking water in time-series studies attempting to discern the presence of waterborne gastrointestinal illness; however, the utility of turbidity as a proxy exposure measure has been questioned.

Objectives

We conducted a review of epidemiological studies of the association between turbidity of drinking-water supplies and incidence of acute gastrointestinal illness (AGI), including a synthesis of the overall weight of evidence. Our goal was to evaluate the potential for causal inference from the studies.

Methods

We identified 14 studies on the topic (distinct by region, time period and/or population). We evaluated each study with regard to modeling approaches, potential biases, and the strength of evidence. We also considered consistencies and differences in the collective results.

Discussion

Positive associations between drinking-water turbidity and AGI incidence were found in different cities and time periods, and with both unfiltered and filtered supplies. There was some evidence for a stronger association at higher turbidity levels. The studies appeared to adequately adjust for confounding. There was fair consistency in the notable lags between turbidity measurement and AGI identification, which fell between 6 and 10 d in many studies.

Conclusions

The observed associations suggest a detectable incidence of waterborne AGI from drinking water in the systems and time periods studied. However, some discrepant results indicate that the association may be context specific. Combining turbidity with seasonal and climatic factors, additional water quality measures, and treatment data may enhance predictive modeling in future studies. https://doi.org/10.1289/EHP1090.",2017-08-17 +21472436,"The Structural Biology Knowledgebase: a portal to protein structures, sequences, functions, and methods.","The Protein Structure Initiative's Structural Biology Knowledgebase (SBKB, URL: http://sbkb.org ) is an open web resource designed to turn the products of the structural genomics and structural biology efforts into knowledge that can be used by the biological community to understand living systems and disease. Here we will present examples on how to use the SBKB to enable biological research. For example, a protein sequence or Protein Data Bank (PDB) structure ID search will provide a list of related protein structures in the PDB, associated biological descriptions (annotations), homology models, structural genomics protein target status, experimental protocols, and the ability to order available DNA clones from the PSI:Biology-Materials Repository. A text search will find publication and technology reports resulting from the PSI's high-throughput research efforts. Web tools that aid in research, including a system that accepts protein structure requests from the community, will also be described. Created in collaboration with the Nature Publishing Group, the Structural Biology Knowledgebase monthly update also provides a research library, editorials about new research advances, news, and an events calendar to present a broader view of structural genomics and structural biology.",2011-04-07 +24185702,FunCoup 3.0: database of genome-wide functional coupling networks.,"We present an update of the FunCoup database (http://FunCoup.sbc.su.se) of functional couplings, or functional associations, between genes and gene products. Identifying these functional couplings is an important step in the understanding of higher level mechanisms performed by complex cellular processes. FunCoup distinguishes between four classes of couplings: participation in the same signaling cascade, participation in the same metabolic process, co-membership in a protein complex and physical interaction. For each of these four classes, several types of experimental and statistical evidence are combined by Bayesian integration to predict genome-wide functional coupling networks. The FunCoup framework has been completely re-implemented to allow for more frequent future updates. It contains many improvements, such as a regularization procedure to automatically downweight redundant evidences and a novel method to incorporate phylogenetic profile similarity. Several datasets have been updated and new data have been added in FunCoup 3.0. Furthermore, we have developed a new Web site, which provides powerful tools to explore the predicted networks and to retrieve detailed information about the data underlying each prediction.",2013-10-31 +27012570,Tacrolimus or clobetasol for treatment of oral lichen planus.,"

Data sources

Pubmed, the Cochrane library, Scopus, Science Direct and two publishing company journals between 1998 and 2012.

Study selection

Randomised controlled trials (RCTs) where the population included patients having OLP and the interventions were the use of clobetasol or tacrolimus compared to another intervention, while the outcome was improvement in clinical status.

Data extraction and synthesis

Two authors working independently assessed for inclusion and performed data extraction. Quality was evaluated using Critical Appraisal Skills Programme (CASP) worksheets (http://www.casp-uk.net/). The treatment effect was calculated using OR and then pooled using a fixed model since heterogeneity was calculated as very low.

Results

Ten studies were included; five studies involved clobetasol and five involved tacrolimus. Two meta-analyses were presented. The odds ratio for improvement for clobetasol was 1.21 (95%CI; 0.48 - 3.05) and 8.09 (95%CI; 3.77 - 17.38) for tacrolimus.

Conclusions

The authors concluded that using clobetasol or tacrolimus increases the odds of improvement of OLP lesions and therefore they are effective treatment for the condition, and go on to recommend tacrolimus as first-line therapy.",2016-03-01 +27885741,Zika and pregnancy: A comprehensive review. ,"Zika virus (ZIKV) infection is a well-nurtured topic for healthcare personnel nowadays. Central nervous system involvement including microcephaly and ocular involvements has already been reported in neonates of affected pregnant ladies. In this article, we have discussed these effects on the newborns of ZIKV-infected mothers. The proposed pathogenesis, modes of transmission of this infection from mothers to the fetuses, diagnosis of the cases and precaution for the pregnant ladies have also been discussed. We have gathered the recently available data on the risk of ZIKV for expectant mothers from PubMed, https://www.gov.uk/guidance/zika-virus as well as from centers for disease control and prevention websites.",2016-11-25 +27794552,LinkProt: a database collecting information about biological links.,"Protein chains are known to fold into topologically complex shapes, such as knots, slipknots or complex lassos. This complex topology of the chain can be considered as an additional feature of a protein, separate from secondary and tertiary structures. Moreover, the complex topology can be defined also as one additional structural level. The LinkProt database (http://linkprot.cent.uw.edu.pl) collects and displays information about protein links - topologically non-trivial structures made by up to four chains and complexes of chains (e.g. in capsids). The database presents deterministic links (with loops closed, e.g. by two disulfide bonds), links formed probabilistically and macromolecular links. The structures are classified according to their topology and presented using the minimal surface area method. The database is also equipped with basic tools which allow users to analyze the topology of arbitrary (bio)polymers.",2016-10-28 +27587686,ModuleAlign: module-based global alignment of protein-protein interaction networks.,"

Motivation

As an increasing amount of protein-protein interaction (PPI) data becomes available, their computational interpretation has become an important problem in bioinformatics. The alignment of PPI networks from different species provides valuable information about conserved subnetworks, evolutionary pathways and functional orthologs. Although several methods have been proposed for global network alignment, there is a pressing need for methods that produce more accurate alignments in terms of both topological and functional consistency.

Results

In this work, we present a novel global network alignment algorithm, named ModuleAlign, which makes use of local topology information to define a module-based homology score. Based on a hierarchical clustering of functionally coherent proteins involved in the same module, ModuleAlign employs a novel iterative scheme to find the alignment between two networks. Evaluated on a diverse set of benchmarks, ModuleAlign outperforms state-of-the-art methods in producing functionally consistent alignments. By aligning Pathogen-Human PPI networks, ModuleAlign also detects a novel set of conserved human genes that pathogens preferentially target to cause pathogenesis.

Availability

http://ttic.uchicago.edu/∼hashemifar/ModuleAlign.html

Contact

canzar@ttic.edu or j3xu.ttic.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +27347375,A curated transcriptome dataset collection to investigate the functional programming of human hematopoietic cells in early life.,"Compendia of large-scale datasets made available in public repositories provide an opportunity to identify and fill gaps in biomedical knowledge. But first, these data need to be made readily accessible to research investigators for interpretation. Here we make available a collection of transcriptome datasets to investigate the functional programming of human hematopoietic cells in early life. Thirty two datasets were retrieved from the NCBI Gene Expression Omnibus (GEO) and loaded in a custom web application called the Gene Expression Browser (GXB), which was designed for interactive query and visualization of integrated large-scale data. Quality control checks were performed. Multiple sample groupings and gene rank lists were created allowing users to reveal age-related differences in transcriptome profiles, changes in the gene expression of neonatal hematopoietic cells to a variety of immune stimulators and modulators, as well as during cell differentiation. Available demographic, clinical, and cell phenotypic information can be overlaid with the gene expression data and used to sort samples. Web links to customized graphical views can be generated and subsequently inserted in manuscripts to report novel findings. GXB also enables browsing of a single gene across projects, thereby providing new perspectives on age- and developmental stage-specific expression of a given gene across the human hematopoietic system. This dataset collection is available at: http://developmentalimmunology.gxbsidra.org/dm3/geneBrowser/list.",2016-03-30 +27153606,Drug-induced adverse events prediction with the LINCS L1000 data.,"

Motivation

Adverse drug reactions (ADRs) are a central consideration during drug development. Here we present a machine learning classifier to prioritize ADRs for approved drugs and pre-clinical small-molecule compounds by combining chemical structure (CS) and gene expression (GE) features. The GE data is from the Library of Integrated Network-based Cellular Signatures (LINCS) L1000 dataset that measured changes in GE before and after treatment of human cells with over 20 000 small-molecule compounds including most of the FDA-approved drugs. Using various benchmarking methods, we show that the integration of GE data with the CS of the drugs can significantly improve the predictability of ADRs. Moreover, transforming GE features to enrichment vectors of biological terms further improves the predictive capability of the classifiers. The most predictive biological-term features can assist in understanding the drug mechanisms of action. Finally, we applied the classifier to all  >20 000 small-molecules profiled, and developed a web portal for browsing and searching predictive small-molecule/ADR connections.

Availability and implementation

The interface for the adverse event predictions for the  >20 000 LINCS compounds is available at http://maayanlab.net/SEP-L1000/ CONTACT: avi.maayan@mssm.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-01 +23740741,DIGEP-Pred: web service for in silico prediction of drug-induced gene expression profiles based on structural formula.,"

Summary

Experimentally found gene expression profiles are used to solve different problems in pharmaceutical studies, such as drug repositioning, resistance, toxicity and drug-drug interactions. A special web service, DIGEP-Pred, for prediction of drug-induced changes of gene expression profiles based on structural formulae of chemicals has been developed. Structure-activity relationships for prediction of drug-induced gene expression profiles were determined by Prediction of Activity Spectra for Substances (PASS) software. Comparative Toxicogenomics Database with data on the known drug-induced gene expression profiles of chemicals was used to create mRNA- and protein-based training sets. An average prediction accuracy for the training sets (ROC AUC) calculated by leave-one-out cross-validation on the basis of mRNA data (1385 compounds, 952 genes, 500 up- and 475 down-regulations) and protein data (1451 compounds, 139 genes, 93 up- and 55 down-regulations) exceeded 0.85.

Availability

Freely available on the web at http://www.way2drug.com/GE.",2013-06-05 +27903911,miRTar2GO: a novel rule-based model learning method for cell line specific microRNA target prediction that integrates Ago2 CLIP-Seq and validated microRNA-target interaction data.,"MicroRNAs (miRNAs) are ∼19-22 nucleotides (nt) long regulatory RNAs that regulate gene expression by recognizing and binding to complementary sequences on mRNAs. The key step in revealing the function of a miRNA, is the identification of miRNA target genes. Recent biochemical advances including PAR-CLIP and HITS-CLIP allow for improved miRNA target predictions and are widely used to validate miRNA targets. Here, we present miRTar2GO, which is a model, trained on the common rules of miRNA-target interactions, Argonaute (Ago) CLIP-Seq data and experimentally validated miRNA target interactions. miRTar2GO is designed to predict miRNA target sites using more relaxed miRNA-target binding characteristics. More importantly, miRTar2GO allows for the prediction of cell-type specific miRNA targets. We have evaluated miRTar2GO against other widely used miRNA target prediction algorithms and demonstrated that miRTar2GO produced significantly higher F1 and G scores. Target predictions, binding specifications, results of the pathway analysis and gene ontology enrichment of miRNA targets are freely available at http://www.mirtar2go.org.",2017-04-01 +24968257,Different infusion durations for preventing platinum-induced hearing loss in children with cancer.,"

Background

Platinum-based therapy, including cisplatin, carboplatin or oxaliplatin, or a combination of these, is used to treat a variety of paediatric malignancies. Unfortunately, one of the most important adverse effects is the occurrence of hearing loss or ototoxicity. In an effort to prevent this ototoxicity, different platinum infusion durations have been studied.

Objectives

To assess the effects of different durations of platinum infusion to prevent hearing loss or tinnitus, or both, in children with cancer. Secondary objectives were to assess possible effects of these infusion durations on: a) anti-tumour efficacy of platinum-based therapy, b) adverse effects other than hearing loss or tinnitus, and c) quality of life.

Search methods

We searched the electronic databases Cochrane Central Register of Controlled Trials (CENTRAL 2013, Issue 12), MEDLINE (PubMed) (1945 to 4 December 2013) and EMBASE (Ovid) (1980 to 4 December 2013). In addition, we handsearched reference lists of relevant articles and the conference proceedings of the International Society for Paediatric Oncology (2009 to 2013). We scanned ClinicalTrials.gov (www.clinicaltrials.gov) and the World Health Organization International Clinical Trials Registry Platform (WHO ICTRP) (http://www.who.int/ictrp/en/) for ongoing trials (both searched on 13 December 2013).

Selection criteria

Randomised controlled trials (RCTs) or controlled clinical trials (CCTs) comparing different platinum infusion durations in children with cancer. Only the platinum infusion duration could differ between the treatment groups.

Data collection and analysis

Two review authors independently performed the study selection, risk of bias assessment and GRADE assessment of included studies, and data extraction including adverse effects. Analyses were performed according to the guidelines of the Cochrane Handbook for Systematic Reviews of Interventions.

Main results

We identified one RCT and no CCTs. The RCT (total number of children = 91) evaluated the use of a continuous cisplatin infusion (N = 43) versus a one hour bolus cisplatin infusion (N = 48) in children with neuroblastoma. For the continuous infusion, cisplatin was administered on days 1 to 5 of the cycle but it is unclear if the infusion duration was a total of 5 days. Methodological limitations were present. Only results from shortly after induction therapy were provided. No clear evidence of a difference in hearing loss (defined as asymptomatic and symptomatic disease combined) between the different infusion durations was identified as results were imprecise (RR 1.39; 95% CI 0.47 to 4.13, low quality evidence). Although the numbers of children were not provided, it was stated that tumour response was equivalent in both treatment arms. With regard to adverse effects other than ototoxicity we were only able to assess toxic deaths. Again, the confidence interval of the estimated effect was too wide to exclude differences between the treatment groups (RR 1.12; 95% CI 0.07 to 17.31, low quality evidence). No data were available for the other outcomes of interest (i.e. tinnitus, overall survival, event-free survival and quality of life) or for other (combinations of) infusion durations or other platinum analogues.

Authors' conclusions

Since only one eligible RCT evaluating the use of a continuous cisplatin infusion versus a one hour bolus cisplatin infusion was found, and that had methodological limitations, no definitive conclusions can be made. It should be noted that 'no evidence of effect', as identified in this review, is not the same as 'evidence of no effect'. For other (combinations of) infusion durations and other platinum analogues no eligible studies were identified. More high quality research is needed.",2014-06-26 +28122607,A step forward in addressing cancer survivorship in the Asia-Pacific region.,"Cancer survivorship is being increasingly recognized as an important component of cancer care. This commentary reviews the key findings reported in the recent BMC Medicine publication of the ACTION study, which focuses on the health-related quality of life and psychological distress in 5249 cancer survivors in eight low- and middle-income countries in Southeast Asia. The study identified that more than one-third of survivors experience at least mild levels of anxiety and depressive symptoms and that poorer outcomes in quality of life, anxiety, and depressive symptoms are linked to a number of clinical and demographic factors. Such data provides an important foundation to inform cancer policy and service planning in Asia. Future research efforts are required to further understand the needs of cancer survivors in this region and determine interventions to improve outcomes for this population.Please see related article: http://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-016-0768-2 .",2017-01-26 +28365724,"Actionable, long-term stable and semantic web compatible identifiers for access to biological collection objects. ","With biodiversity research activities being increasingly shifted to the web, the need for a system of persistent and stable identifiers for physical collection objects becomes increasingly pressing. The Consortium of European Taxonomic Facilities agreed on a common system of HTTP-URI-based stable identifiers which is now rolled out to its member organizations. The system follows Linked Open Data principles and implements redirection mechanisms to human-readable and machine-readable representations of specimens facilitating seamless integration into the growing semantic web. The implementation of stable identifiers across collection organizations is supported with open source provider software scripts, best practices documentations and recommendations for RDF metadata elements facilitating harmonized access to collection information in web portals. : http://cetaf.org/cetaf-stable-identifiers.",2017-01-01 +23832245,PhosphoChain: a novel algorithm to predict kinase and phosphatase networks from high-throughput expression data.,"

Motivation

Protein phosphorylation is critical for regulating cellular activities by controlling protein activities, localization and turnover, and by transmitting information within cells through signaling networks. However, predictions of protein phosphorylation and signaling networks remain a significant challenge, lagging behind predictions of transcriptional regulatory networks into which they often feed.

Results

We developed PhosphoChain to predict kinases, phosphatases and chains of phosphorylation events in signaling networks by combining mRNA expression levels of regulators and targets with a motif detection algorithm and optional prior information. PhosphoChain correctly reconstructed ∼78% of the yeast mitogen-activated protein kinase pathway from publicly available data. When tested on yeast phosphoproteomic data from large-scale mass spectrometry experiments, PhosphoChain correctly identified ∼27% more phosphorylation sites than existing motif detection tools (NetPhosYeast and GPS2.0), and predictions of kinase-phosphatase interactions overlapped with ∼59% of known interactions present in yeast databases. PhosphoChain provides a valuable framework for predicting condition-specific phosphorylation events from high-throughput data.

Availability

PhosphoChain is implemented in Java and available at http://virgo.csie.ncku.edu.tw/PhosphoChain/ or http://aitchisonlab.com/PhosphoChain",2013-07-05 +27789706,CGDB: a database of circadian genes in eukaryotes.,"We report a database of circadian genes in eukaryotes (CGDB, http://cgdb.biocuckoo.org), containing ∼73 000 circadian-related genes in 68 animals, 39 plants and 41 fungi. Circadian rhythm is ∼24 h rhythm in behavioral and physiological processes that exists in almost all organisms on the earth. Defects in the circadian system are highly associated with a number of diseases such as cancers. Although several databases have been established for rhythmically expressed genes, a comprehensive database of cycling genes across phyla is still lacking. From the literature, we collected 1382 genes of which transcript level oscillations were validated using methods such as RT-PCR, northern blot and in situ hybridization. Given that many genes exhibit different oscillatory patterns in different tissues/cells within an organism, we have included information regarding the phase and amplitude of the oscillation, as well as the tissue/cells in which the oscillation was identified. Using these well characterized cycling genes, we have then conducted an orthologous search and identified ∼45 000 potential cycling genes from 148 eukaryotes. Given that significant effort has been devoted to identifying cycling genes by transcriptome profiling, we have also incorporated these results, a total of over 26 000 genes, into our database.",2016-10-26 +27789704,KERIS: kaleidoscope of gene responses to inflammation between species.,"A cornerstone of modern biomedical research is the use of animal models to study disease mechanisms and to develop new therapeutic approaches. In order to help the research community to better explore the similarities and differences of genomic response between human inflammatory diseases and murine models, we developed KERIS: kaleidoscope of gene responses to inflammation between species (available at http://www.igenomed.org/keris/). As of June 2016, KERIS includes comparisons of the genomic response of six human inflammatory diseases (burns, trauma, infection, sepsis, endotoxin and acute respiratory distress syndrome) and matched mouse models, using 2257 curated samples from the Inflammation and the Host Response to Injury Glue Grant studies and other representative studies in Gene Expression Omnibus. A researcher can browse, query, visualize and compare the response patterns of genes, pathways and functional modules across different diseases and corresponding murine models. The database is expected to help biologists choosing models when studying the mechanisms of particular genes and pathways in a disease and prioritizing the translation of findings from disease models into clinical studies.",2016-10-26 +27789689,TcoF-DB v2: update of the database of human and mouse transcription co-factors and transcription factor interactions.,"Transcription factors (TFs) play a pivotal role in transcriptional regulation, making them crucial for cell survival and important biological functions. For the regulation of transcription, interactions of different regulatory proteins known as transcription co-factors (TcoFs) and TFs are essential in forming necessary protein complexes. Although TcoFs themselves do not bind DNA directly, their influence on transcriptional regulation and initiation, although indirect, has been shown to be significant, with the functionality of TFs strongly influenced by the presence of TcoFs. In the TcoF-DB v2 database, we collect information on TcoFs. In this article, we describe updates and improvements implemented in TcoF-DB v2. TcoF-DB v2 provides several new features that enables exploration of the roles of TcoFs. The content of the database has significantly expanded, and is enriched with information from Gene Ontology, biological pathways, diseases and molecular signatures. TcoF-DB v2 now includes many more TFs; has substantially increased the number of human TcoFs to 958, and now includes information on mouse (418 new TcoFs). TcoF-DB v2 enables the exploration of information on TcoFs and allows investigations into their influence on transcriptional regulation in humans and mice. TcoF-DB v2 can be accessed at http://tcofdb.org/.",2016-10-26 +27789692,"WERAM: a database of writers, erasers and readers of histone acetylation and methylation in eukaryotes.","In this work, we developed a database WERAM (http://weram.biocuckoo.org/) for histone acetyltransferases, histone deacetylases, histone methyltransferases, histone demethylases and acetyl- or methyl-binding proteins, which catalyze, remove and recognize histone acetylation and methylation sites as 'writers', 'erasers' and 'readers', and synergistically determine the 'histone code'. From the scientific literature, we totally collected over 580 experimentally identified histone regulators from eight model organisms, including Homo sapiens, Mus musculus, Rattus norvegicus, Drosophila melanogaster, Caenorhabditis elegans, Arabidopsis thaliana, Schizosaccharomyces pombe and Saccharomyces cerevisiae We also collected ∼900 site-specific regulator-histone relations from the eight species. According to the experimental evidence, known histone regulators were classified into distinct families. To computationally detect more proteins in eukaryotes, we constructed hidden Markov model (HMM) profiles for histone regulator families. For families without HMM profiles, we also conducted orthologous searches. Totally, WERAM database contained more than 20 thousand non-redundant histone regulators from 148 eukaryotes. The detailed annotations and classification information of histone regulators were provided, together with site-specific histone substrates if available.",2016-10-26 +24288371,Pfam: the protein families database.,"Pfam, available via servers in the UK (http://pfam.sanger.ac.uk/) and the USA (http://pfam.janelia.org/), is a widely used database of protein families, containing 14 831 manually curated entries in the current release, version 27.0. Since the last update article 2 years ago, we have generated 1182 new families and maintained sequence coverage of the UniProt Knowledgebase (UniProtKB) at nearly 80%, despite a 50% increase in the size of the underlying sequence database. Since our 2012 article describing Pfam, we have also undertaken a comprehensive review of the features that are provided by Pfam over and above the basic family data. For each feature, we determined the relevance, computational burden, usage statistics and the functionality of the feature in a website context. As a consequence of this review, we have removed some features, enhanced others and developed new ones to meet the changing demands of computational biology. Here, we describe the changes to Pfam content. Notably, we now provide family alignments based on four different representative proteome sequence data sets and a new interactive DNA search interface. We also discuss the mapping between Pfam and known 3D structures.",2013-11-27 +23493402,"CathaCyc, a metabolic pathway database built from Catharanthus roseus RNA-Seq data.","The medicinal plant Madagascar periwinkle (Catharanthus roseus) synthesizes numerous terpenoid indole alkaloids (TIAs), such as the anticancer drugs vinblastine and vincristine. The TIA pathway operates in a complex metabolic network that steers plant growth and survival. Pathway databases and metabolic networks reconstructed from 'omics' sequence data can help to discover missing enzymes, study metabolic pathway evolution and, ultimately, engineer metabolic pathways. To date, such databases have mainly been built for model plant species with sequenced genomes. Although genome sequence data are not available for most medicinal plant species, next-generation sequencing is now extensively employed to create comprehensive medicinal plant transcriptome sequence resources. Here we report on the construction of CathaCyc, a detailed metabolic pathway database, from C. roseus RNA-Seq data sets. CathaCyc (version 1.0) contains 390 pathways with 1,347 assigned enzymes and spans primary and secondary metabolism. Curation of the pathways linked with the synthesis of TIAs and triterpenoids, their primary metabolic precursors, and their elicitors, the jasmonate hormones, demonstrated that RNA-Seq resources are suitable for the construction of pathway databases. CathaCyc is accessible online (http://www.cathacyc.org) and offers a range of tools for the visualization and analysis of metabolic networks and 'omics' data. Overlay with expression data from publicly available RNA-Seq resources demonstrated that two well-characterized C. roseus terpenoid pathways, those of TIAs and triterpenoids, are subject to distinct regulation by both developmental and environmental cues. We anticipate that databases such as CathaCyc will become key to the study and exploitation of the metabolism of medicinal plants.",2013-03-14 +26653538,Reproducibility of Differential Proteomic Technologies in CPTAC Fractionated Xenografts.,"The NCI Clinical Proteomic Tumor Analysis Consortium (CPTAC) employed a pair of reference xenograft proteomes for initial platform validation and ongoing quality control of its data collection for The Cancer Genome Atlas (TCGA) tumors. These two xenografts, representing basal and luminal-B human breast cancer, were fractionated and analyzed on six mass spectrometers in a total of 46 replicates divided between iTRAQ and label-free technologies, spanning a total of 1095 LC-MS/MS experiments. These data represent a unique opportunity to evaluate the stability of proteomic differentiation by mass spectrometry over many months of time for individual instruments or across instruments running dissimilar workflows. We evaluated iTRAQ reporter ions, label-free spectral counts, and label-free extracted ion chromatograms as strategies for data interpretation (source code is available from http://homepages.uc.edu/~wang2x7/Research.htm ). From these assessments, we found that differential genes from a single replicate were confirmed by other replicates on the same instrument from 61 to 93% of the time. When comparing across different instruments and quantitative technologies, using multiple replicates, differential genes were reproduced by other data sets from 67 to 99% of the time. Projecting gene differences to biological pathways and networks increased the degree of similarity. These overlaps send an encouraging message about the maturity of technologies for proteomic differentiation.",2015-12-22 +27512621,Weighted K-means support vector machine for cancer prediction.,"To date, the support vector machine (SVM) has been widely applied to diverse bio-medical fields to address disease subtype identification and pathogenicity of genetic variants. In this paper, I propose the weighted K-means support vector machine (wKM-SVM) and weighted support vector machine (wSVM), for which I allow the SVM to impose weights to the loss term. Besides, I demonstrate the numerical relations between the objective function of the SVM and weights. Motivated by general ensemble techniques, which are known to improve accuracy, I directly adopt the boosting algorithm to the newly proposed weighted KM-SVM (and wSVM). For predictive performance, a range of simulation studies demonstrate that the weighted KM-SVM (and wSVM) with boosting outperforms the standard KM-SVM (and SVM) including but not limited to many popular classification rules. I applied the proposed methods to simulated data and two large-scale real applications in the TCGA pan-cancer methylation data of breast and kidney cancer. In conclusion, the weighted KM-SVM (and wSVM) increases accuracy of the classification model, and will facilitate disease diagnosis and clinical treatment decisions to benefit patients. A software package (wSVM) is publicly available at the R-project webpage (https://www.r-project.org).",2016-07-25 +28614739,"Assessment of soil water, carbon and nitrogen cycling in reseeded grassland on the North Wyke Farm Platform using a process-based model.","The North Wyke Farm Platform (NWFP) generates large volumes of temporally-indexed data that provides a valuable test-bed for agricultural mathematical models in temperate grasslands. In our study, we used the primary datasets generated from the NWFP (https://nwfp.rothamsted.ac.uk/) to validate the SPACSYS model in terms of the dynamics of water loss and forage dry matter yield estimated through cutting. The SPACSYS model is capable of simulating soil water, carbon (C) and nitrogen (N) balance in the soil-plant-atmosphere system. The validated model was then used to simulate the responses of soil water, C and N to reseeding grass cultivars with either high sugar (Lolium perenne L. cv. AberMagic) or deep rooting (Festulolium cv. Prior) traits. Simulation results demonstrated that the SPACSYS model could predict reliably soil water, C and N cycling in reseeded grassland. Compared to AberMagic, the Prior grass could fix more C in the second year following reseeding, whereas less C was lost through soil respiration in the first transition year. In comparison to the grass cultivar of the permanent pasture that existed before reseeding, both grasses reduced N losses through runoff and contributed to reducing water loss, especially Prior in relation to the latter. The SPACSYS model could predict these differences as supported by the rich dataset from the NWFP, providing a tool for future predictions on less characterized pasture.",2017-06-12 +27153719,MetaKTSP: a meta-analytic top scoring pair method for robust cross-study validation of omics prediction analysis.,"

Motivation

Supervised machine learning is widely applied to transcriptomic data to predict disease diagnosis, prognosis or survival. Robust and interpretable classifiers with high accuracy are usually favored for their clinical and translational potential. The top scoring pair (TSP) algorithm is an example that applies a simple rank-based algorithm to identify rank-altered gene pairs for classifier construction. Although many classification methods perform well in cross-validation of single expression profile, the performance usually greatly reduces in cross-study validation (i.e. the prediction model is established in the training study and applied to an independent test study) for all machine learning methods, including TSP. The failure of cross-study validation has largely diminished the potential translational and clinical values of the models. The purpose of this article is to develop a meta-analytic top scoring pair (MetaKTSP) framework that combines multiple transcriptomic studies and generates a robust prediction model applicable to independent test studies.

Results

We proposed two frameworks, by averaging TSP scores or by combining P-values from individual studies, to select the top gene pairs for model construction. We applied the proposed methods in simulated data sets and three large-scale real applications in breast cancer, idiopathic pulmonary fibrosis and pan-cancer methylation. The result showed superior performance of cross-study validation accuracy and biomarker selection for the new meta-analytic framework. In conclusion, combining multiple omics data sets in the public domain increases robustness and accuracy of the classification model that will ultimately improve disease understanding and clinical treatment decisions to benefit patients.

Availability and implementation

An R package MetaKTSP is available online. (http://tsenglab.biostat.pitt.edu/software.htm).

Contact

ctseng@pitt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-02 +27387388,Identification and Validation of HCC-specific Gene Transcriptional Signature for Tumor Antigen Discovery.,"A novel two-step bioinformatics strategy was applied for identification of signatures with therapeutic implications in hepatitis-associated HCC. Transcriptional profiles from HBV- and HCV-associated HCC samples were compared with non-tumor liver controls. Resulting HCC modulated genes were subsequently compared with different non-tumor tissue samples. Two related signatures were identified, namely ""HCC-associated"" and ""HCC-specific"". Expression data were validated by RNA-Seq analysis carried out on unrelated HCC samples and protein expression was confirmed according to The Human Protein Atlas"" (http://proteinatlas.org/), a public repository of immunohistochemistry data. Among all, aldo-keto reductase family 1 member B10, and IGF2 mRNA-binding protein 3 were found strictly HCC-specific with no expression in 18/20 normal tissues. Target peptides for vaccine design were predicted for both proteins associated with the most prevalent HLA-class I and II alleles. The described novel strategy showed to be feasible for identification of HCC-specific proteins as highly potential target for HCC immunotherapy.",2016-07-08 +30051445,[Treatment of 21 cases of chronic radiation intestinal injury by staging ileostomy and closure operation].,"

Objective

To summarize the application of staged ileostomy and closure operation combined with nutritional support therapy in the treatment of chronic radiation intestinal injury(CRII).

Methods

Clinical data of patients with definite radiation history and pathological diagnosis of CRII receiving treatment at Department of General Surgery, Jinling Hospital from January 2012 to December 2016 were retrospectively analyzed. Patients who were diagnosed with tumor recurrence during operation or by postoperative pathology were excluded. Patients undergoing stageI( ileostomy and stageII( closure operation combined with nutrition support therapy were enrolled to the cohort. Detailed scheme of stage I( ileostomy and therapeutic time were determined by clinical symptoms and nutritional status. While performing ileostomy, the removal of intestinal lesions depended on range and degree of intestinal injury. Nutritional support therapy and other symptom-relieving therapy were offered after surgery. Timing for stageII( closure operation was decided according to nutritional status of patients. Lesions of remaining intestine were determined during operation, then necessary intestinal resection and closure operation were performed. Adhesion classification of radiation intestinal injury (total five levels) proposed by our center was adopted to evaluate the level and range of intestinal lesions. Level 0 indicated no adhesion between injured intestinal loop and surrounding organs; level 1 indicated that the adhesion and fibrosis were limited to right pelvis; level 2 indicated that the adhesion included all pelvis and the adhesion was severe and difficult to divide; level 3 was the forward extension of level 2 adhesion, which was between injured intestinal loop and anterior pelvic wall; level 4 was the upward extension of level 3 adhesion, which was between injured intestinal loop and anterior abdominal wall. Clavien-Dindo classification (lower level means milder symptom) and complication comprehensive index(CCI, lower CCI means milder symptom) calculated by on-line program (http:∕∕www.assessurgery. com) were applied to estimate postoperative complications. Resected intestinal length, adhesion classification of radiation intestinal injury, postoperative complications and time to total enteral nutritional (TEN) of both surgeries and nutritional status (body mass index and serum albumin) were compared between stageI( ileostomy and stageII( closure operation.

Results

Twenty-one patients were enrolled in the research with 2 males and 19 females. Primary tumor included 14 cervical cancers, 3 rectal cancers, 1 endometrial cancer, 1 ovarian carcinoma, 1 seminoma and 1 mixed germ cell tumor. Median interval between the end of radiation and radiation intestinal injury was 7(2 to 91) months and median interval between the incidence of radiation intestinal injury and ileostomy was 5(<1 to 75) months. Operative indications for ileostomy were obstruction in 14 cases (66.7%), intestinal internal fistula in 1 case (4.8%), intestinal outer fistula in 2 cases (9.5%), radiation proctitis in 3 cases (14.3%) and acute intestinal perforation in 1 case (4.8%). Average age of patients undergoing stageI( ileostomy was 48 (18 to 60) years with BMI (17.0±2.7) kg/m2 and serum albumin (36.8±5.2) g/L. Patients undergoing stageII( closure operation had significantly higher BMI [(18.4±2.0) kg/m2, t=-2.747, P=0.013] and higher serum albumin [(40.8±3.6) g/L, t=-3.505, P=0.002]. Average interval between stageI( ileostomy and stageII( closure surgery was (197±77) days. Resected intestinal length of stageI( ileostomy was which was significantly longer than that of stageII( closure surgery [(74.0±56.1) cm vs. (15.5±10.4) cm, t=4.547, P= 0.000]. Abdominal adhesion classification of stageII( ileostomy plus closure operation was significantly better as compared to stage I( ileostomy(Z=-3.347, P=0.001). Morbidity of postoperative complications in stageI( ileostomy was 52.4% (11/21), which decreased to 19.0% (4/21) in stageII( operation with significant difference (χ²=5.081, P=0.024). Postoperative complication Clavien-Dindo classification and CCI scores in stageII( operation were significantly lower than those in stageI( operation (P=0.006 and P=0.002). Till June 2017, 17 of 21 patients(81.0%) were followed-up for (28±18) months. Except for 2 cases of relapse, 15 patients recovered to normal diet.

Conclusions

Application of staged ileostomy and closure operation combined with nutritional support therapy to CRII is in accordance with the principle of injury control surgery. Furthermore, this staged approach is safe and effective, can reduce the morbidity and the severity of complications, and can also be helpful to decide the margin for intestinal resection.",2018-07-01 +23180798,The International Nucleotide Sequence Database Collaboration.,"The International Nucleotide Sequence Database Collaboration (INSDC; http://www.insdc.org), one of the longest-standing global alliances of biological data archives, captures, preserves and provides comprehensive public domain nucleotide sequence information. Three partners of the INSDC work in cooperation to establish formats for data and metadata and protocols that facilitate reliable data submission to their databases and support continual data exchange around the world. In this article, the INSDC current status and update for the year of 2012 are presented. Among discussed items of international collaboration meeting in 2012, BioSample database and changes in submission are described as topics.",2012-11-24 +,SNPpath: Characterizing cattle SNPs by enriched pathway terms,"High‐density single nucleotide polymorphism (SNP) microarrays have made large‐scale genome‐wide association studies (GWAS) and genomic selection (GS) feasible. Valuable insight into the genetic basis underlying complex polygenic traits will likely be gained by considering functionally related sets of genes simultaneously. SNPpath, a suite of computer‐generated imagery‐based web servers has been developed to automatically annotate and characterize cattle SNPs by enriched KEGG (Kyoto Encyclopedia of Genes and Genomes) pathway terms. The SNPpath allows users to navigate and analysis large SNP sets and is the only web server currently providing pathway annotations of cattle SNPs in National Center for Biotechnology Information's dbSNP database and three commercial platforms. Hence, we describe SNPpath and provide details of the query options, as well as biological examples of use. The SNPpath may be favorable for the analysis of combining SNP association analysis with pathway‐driven gene set enrichment analysis and is freely available at http://klab.sjtu.edu.cn/SNPpath.",2012-04-01 +26265041,Software Analysis of Uncorrelated MS1 Peaks for Discovery of Post-Translational Modifications.,"The goal in proteomics to identify all peptides in a complex mixture has been largely addressed using various LC MS/MS approaches, such as data dependent acquisition, SRM/MRM, and data independent acquisition instrumentation. Despite these developments, many peptides remain unsequenced, often due to low abundance, poor fragmentation patterns, or data analysis difficulties. Many of the unidentified peptides exhibit strong evidence in high resolution MS(1) data and are frequently post-translationally modified, playing a significant role in biological processes. Proteomics Workbench (PWB) software was developed to automate the detection and visualization of all possible peptides in MS(1) data, reveal candidate peptides not initially identified, and build inclusion lists for subsequent MS(2) analysis to uncover new identifications. We used this software on existing data on the autophagy regulating kinase Ulk1 as a proof of concept for this method, as we had already manually identified a number of phosphorylation sites Dorsey, F. C. et al (J. Proteome. Res. 8(11), 5253-5263 (2009)). PWB found all previously identified sites of phosphorylation. The software has been made freely available at http://www.proteomicsworkbench.com . Graphical Abstract ᅟ.",2015-08-12 +25748288,Characterization and distribution of repetitive elements in association with genes in the human genome.,"Repetitive elements constitute more than 50% of the human genome. Recent studies implied that the complexity of living organisms is not just a direct outcome of a number of coding sequences; the repetitive elements, which do not encode proteins, may also play a significant role. Though scattered studies showed that repetitive elements in the regulatory regions of a gene control gene expression, no systematic survey has been done to report the characterization and distribution of various types of these repetitive elements in the human genome. Sequences from 5' and 3' untranslated regions and upstream and downstream of a gene were downloaded from the Ensembl database. The repetitive elements in the neighboring of each gene were identified and classified using cross-matching implemented in the RepeatMasker. The annotation and distribution of distinct classes of repetitive elements associated with individual gene were collected to characterize genes in association with different types of repetitive elements using systems biology program. We identified a total of 1,068,400 repetitive elements which belong to 37-class families and 1235 subclasses that are associated with 33,761 genes and 57,365 transcripts. In addition, we found that the tandem repeats preferentially locate proximal to the transcription start site (TSS) of genes and the major function of these genes are involved in developmental processes. On the other hand, interspersed repetitive elements showed a tendency to be accumulated at distal region from the TSS and the function of interspersed repeat-containing genes took part in the catabolic/metabolic processes. Results from the distribution analysis were collected and used to construct a gene-based repetitive element database (GBRED; http://www.binfo.ncku.edu.tw/GBRED/index.html). A user-friendly web interface was designed to provide the information of repetitive elements associated with any particular gene(s). This is the first study focusing on the gene-associated repetitive elements in the human genome. Our data showed distinct genes associated with different kinds of repetitive element and implied such combination may shape the function of these genes. Aside from the conventional view of these elements in genome evolution, results from this study offer a systemic review to facilitate exploitation of these elements in genome function.",2015-02-27 +23547897,RegTransBase--a database of regulatory sequences and interactions based on literature: a resource for investigating transcriptional regulation in prokaryotes.,"

Background

Due to the constantly growing number of sequenced microbial genomes, comparative genomics has been playing a major role in the investigation of regulatory interactions in bacteria. Regulon inference mostly remains a field of semi-manual examination since absence of a knowledgebase and informatics platform for automated and systematic investigation restricts opportunities for computational prediction. Additionally, confirming computationally inferred regulons by experimental data is critically important.

Description

RegTransBase is an open-access platform with a user-friendly web interface publicly available at http://regtransbase.lbl.gov. It consists of two databases - a manually collected hierarchical regulatory interactions database based on more than 7000 scientific papers which can serve as a knowledgebase for verification of predictions, and a large set of curated by experts transcription factor binding sites used in regulon inference by a variety of tools. RegTransBase captures the knowledge from published scientific literature using controlled vocabularies and contains various types of experimental data, such as: the activation or repression of transcription by an identified direct regulator; determination of the transcriptional regulatory function of a protein (or RNA) directly binding to DNA or RNA; mapping of binding sites for a regulatory protein; characterization of regulatory mutations. Analysis of the data collected from literature resulted in the creation of Putative Regulons from Experimental Data that are also available in RegTransBase.

Conclusions

RegTransBase is a powerful user-friendly platform for the investigation of regulation in prokaryotes. It uses a collection of validated regulatory sequences that can be easily extracted and used to infer regulatory interactions by comparative genomics techniques thus assisting researchers in the interpretation of transcriptional regulation data.",2013-04-02 +28083252,A Prototype Knowledge-Sharing Service for Clinical Decision Support Artifacts.,"This article, by researchers from Partners HealthCare and the RAND Corporation, primarily describes the work associated with Task 4.8 of the Advancing Clinical Decision Support (ACDS) effort, a project intended to accelerate the effective use of computer-based clinical decision support (CDS) interventions to facilitate evidence-based clinical practice and the meaningful use of health information technology. The key objectives of Task 4.8 were to develop CDS artifacts for at least 20 interventions of different types, targeted toward guidelines and clinical conditions called for in the 2011 meaningful use criteria, and to disseminate the tools, content, and materials through a knowledge-sharing service (KSS) that could potentially be deployed on a national scale. The ACDS interventions or artifacts were built utilizing the extensible markup language (XML) schema developed by the Clinical Decision Support Consortium (CDSC) project and were published on the CDSC portal (http://cdsportal.partners.org/), which functions as the ACDS KSS. While the original CDSC Level 3 XML schema adequately supported the development of the ACDS artifacts, the authors worked with the CDSC team to expand the schema to support additional intervention types (order sets, documentation templates, infobuttons, relevant data display, and value sets). Twenty-two CDS artifacts and 16 value sets were developed that cover the five CDS intervention types. Three custom style sheets were developed to render the XML files in human-readable form. The authors recommend investment in the foundational building blocks for shareable CDS, such as dictionaries and value sets, as these will be essential. The CDS content on the portal will need to be expanded and maintained in order for it to remain a viable resource for CDS implementers.",2012-06-01 +23508969,CistromeFinder for ChIP-seq and DNase-seq data reuse.,"

Summary

Chromatin immunoprecipitation and DNase I hypersensitivity assays with high-throughput sequencing have greatly accelerated the understanding of transcriptional and epigenetic regulation, although data reuse for the community of experimental biologists has been challenging. We created a data portal CistromeFinder that can help query, evaluate and visualize publicly available Chromatin immunoprecipitation and DNase I hypersensitivity assays with high-throughput sequencing data in human and mouse. The database currently contains 6378 samples over 4391 datasets, 313 factors and 102 cell lines or cell populations. Each dataset has gone through a consistent analysis and quality control pipeline; therefore, users could evaluate the overall quality of each dataset before examining binding sites near their genes of interest. CistromeFinder is integrated with UCSC genome browser for visualization, Primer3Plus for ChIP-qPCR primer design and CistromeMap for submitting newly available datasets. It also allows users to leave comments to facilitate data evaluation and update.

Availability

http://cistrome.org/finder.

Contact

xsliu@jimmy.harvard.edu or henry_long@dfci.harvard.edu.",2013-03-18 +23897986,EADB: an estrogenic activity database for assessing potential endocrine activity.,"Endocrine-active chemicals can potentially have adverse effects on both humans and wildlife. They can interfere with the body's endocrine system through direct or indirect interactions with many protein targets. Estrogen receptors (ERs) are one of the major targets, and many endocrine disruptors are estrogenic and affect the normal estrogen signaling pathways. However, ERs can also serve as therapeutic targets for various medical conditions, such as menopausal symptoms, osteoporosis, and ER-positive breast cancer. Because of the decades-long interest in the safety and therapeutic utility of estrogenic chemicals, a large number of chemicals have been assayed for estrogenic activity, but these data exist in various sources and different formats that restrict the ability of regulatory and industry scientists to utilize them fully for assessing risk-benefit. To address this issue, we have developed an Estrogenic Activity Database (EADB; http://www.fda.gov/ScienceResearch/BioinformaticsTools/EstrogenicActivityDatabaseEADB/default.htm) and made it freely available to the public. EADB contains 18,114 estrogenic activity data points collected for 8212 chemicals tested in 1284 binding, reporter gene, cell proliferation, and in vivo assays in 11 different species. The chemicals cover a broad chemical structure space and the data span a wide range of activities. A set of tools allow users to access EADB and evaluate potential endocrine activity of chemicals. As a case study, a classification model was developed using EADB for predicting ER binding of chemicals.",2013-07-28 +28542514,PanWeb: A web interface for pan-genomic analysis.,"With increased production of genomic data since the advent of next-generation sequencing (NGS), there has been a need to develop new bioinformatics tools and areas, such as comparative genomics. In comparative genomics, the genetic material of an organism is directly compared to that of another organism to better understand biological species. Moreover, the exponentially growing number of deposited prokaryote genomes has enabled the investigation of several genomic characteristics that are intrinsic to certain species. Thus, a new approach to comparative genomics, termed pan-genomics, was developed. In pan-genomics, various organisms of the same species or genus are compared. Currently, there are many tools that can perform pan-genomic analyses, such as PGAP (Pan-Genome Analysis Pipeline), Panseq (Pan-Genome Sequence Analysis Program) and PGAT (Prokaryotic Genome Analysis Tool). Among these software tools, PGAP was developed in the Perl scripting language and its reliance on UNIX platform terminals and its requirement for an extensive parameterized command line can become a problem for users without previous computational knowledge. Thus, the aim of this study was to develop a web application, known as PanWeb, that serves as a graphical interface for PGAP. In addition, using the output files of the PGAP pipeline, the application generates graphics using custom-developed scripts in the R programming language. PanWeb is freely available at http://www.computationalbiology.ufpa.br/panweb.",2017-05-24 +24214987,UniHI 7: an enhanced database for retrieval and interactive analysis of human molecular interaction networks.,"Unified Human Interactome (UniHI) (http://www.unihi.org) is a database for retrieval, analysis and visualization of human molecular interaction networks. Its primary aim is to provide a comprehensive and easy-to-use platform for network-based investigations to a wide community of researchers in biology and medicine. Here, we describe a major update (version 7) of the database previously featured in NAR Database Issue. UniHI 7 currently includes almost 350,000 molecular interactions between genes, proteins and drugs, as well as numerous other types of data such as gene expression and functional annotation. Multiple options for interactive filtering and highlighting of proteins can be employed to obtain more reliable and specific network structures. Expression and other genomic data can be uploaded by the user to examine local network structures. Additional built-in tools enable ready identification of known drug targets, as well as of biological processes, phenotypes and pathways enriched with network proteins. A distinctive feature of UniHI 7 is its user-friendly interface designed to be utilized in an intuitive manner, enabling researchers less acquainted with network analysis to perform state-of-the-art network-based investigations.",2013-11-08 +27747270,Data on examining the role of human capital in the energy-growth nexus across countries.,This article describes two publicly available data sources: the new generation of Penn World Table (www.ggdc.net/pwt) and the BP Statistical Review of World Energy (http://www.bp.com/statisticalreview) which can be used to examine the role of human capital in the energy-growth nexus across countries. The critical human capital measure across countries is for the first time made available in the Penn World Table 8.0 and it enables empirical researchers to conduct cross-country analysis involving human capital much easily than ever before.,2016-09-23 +27384039,The Global Food System as a Transport Pathway for Hazardous Chemicals: The Missing Link between Emissions and Exposure.,"

Background

Food is a major pathway for human exposure to hazardous chemicals. The modern food system is becoming increasingly complex and globalized, but models for food-borne exposure typically assume locally derived diets or use concentrations directly measured in foods without accounting for food origin. Such approaches may not reflect actual chemical intakes because concentrations depend on food origin, and representative analysis is seldom available. Processing, packaging, storage, and transportation also impart different chemicals to food and are not yet adequately addressed. Thus, the link between environmental emissions and realistic human exposure is effectively broken.

Objectives

We discuss the need for a fully integrated treatment of the modern industrialized food system, and we propose strategies for using existing models and relevant supporting data sources to track chemicals during production, processing, packaging, storage, and transport.

Discussion

Fate and bioaccumulation models describe how chemicals distribute in the environment and accumulate through local food webs. Human exposure models can use concentrations in food to determine body burdens based on individual or population characteristics. New models now include the impacts of processing and packaging but are far from comprehensive. We propose to close the gap between emissions and exposure by utilizing a wider variety of models and data sources, including global food trade data, processing, and packaging models.

Conclusions

A comprehensive approach that takes into account the complexity of the modern global food system is essential to enable better prediction of human exposure to chemicals in food, sound risk assessments, and more focused risk abatement strategies. Citation: Ng CA, von Goetz N. 2017. The global food system as a transport pathway for hazardous chemicals: the missing link between emissions and exposure. Environ Health Perspect 125:1-7; http://dx.doi.org/10.1289/EHP168.",2016-07-06 +23702556,Genomic region operation kit for flexible processing of deep sequencing data.,"Computational analysis of data produced in deep sequencing (DS) experiments is challenging due to large data volumes and requirements for flexible analysis approaches. Here, we present a mathematical formalism based on set algebra for frequently performed operations in DS data analysis to facilitate translation of biomedical research questions to language amenable for computational analysis. With the help of this formalism, we implemented the Genomic Region Operation Kit (GROK), which supports various DS-related operations such as preprocessing, filtering, file conversion, and sample comparison. GROK provides high-level interfaces for R, Python, Lua, and command line, as well as an extension C++ API. It supports major genomic file formats and allows storing custom genomic regions in efficient data structures such as red-black trees and SQL databases. To demonstrate the utility of GROK, we have characterized the roles of two major transcription factors (TFs) in prostate cancer using data from 10 DS experiments. GROK is freely available with a user guide from >http://csbi.ltdk.helsinki.fi/grok/.",2013-01-01 +22833353,Validating an interlingual metanorm for emotional analysis of texts.,"In this article, we present a set of 12 norms that characterize emotional terms in French, English, German, Spanish, Italian, and Finnish. The high correlation between the norm values in the two emotional dimensions of valence and arousal suggests an interlingual homogeneity of emotional representations and allows a significant metanorm-EMONORM-to be established with 6,383 terms characterized in valence and 4,345 terms characterized in arousal. This metanorm is a resource for creating experimental materials in studies on language and emotions. Furthermore, we perform three tests using EMONORM, with the objectives of (1) identifying basic emotions from their valence and arousal values, (2) determining the orientation of texts referring to positive and negative emotions, and (3) evaluating the intensity of emotions expressed in texts. The results are highly similar to those for human judgments. Finally, we present EMOVAL/SEMOTEX, a Web application for static and dynamic valence and arousal emotional analysis of texts using EMONORM ( http://www.semotex.fr ).",2012-12-01 +28904192,Role of Herpes Simplex Virus 1 γ34.5 in the Regulation of IRF3 Signaling. ,"During viral infection, pattern recognition receptors (PRRs) and their associated adaptors recruit TANK-binding kinase 1 (TBK1) to activate interferon regulatory factor 3 (IRF3), resulting in production of type I interferons (IFNs). ICP0 and ICP34.5 are among the proteins encoded by herpes simplex virus 1 (HSV-1) that modulate type I IFN signaling. We constructed a recombinant virus (ΔXX) that lacks amino acids 87 to 106, a portion of the previously described TBK1-binding domain of the γ34.5 gene (D. Verpooten, Y. Ma, S. Hou, Z. Yan, and B. He, J Biol Chem 284:1097-1105, 2009, https://doi.org/10.1074/JBC.M805905200). These 20 residues are outside the γ34.5 beclin1-binding domain (BBD) that interacts with beclin1 and regulates autophagy. Unexpectedly, ΔXX showed no deficit in replication in vivo in a variety of tissues and showed virulence comparable to that of wild-type and marker-rescued viruses following intracerebral infection. ΔXX was fully capable of mediating the dephosphorylation of eIF2α, and the virus was capable of controlling the phosphorylation of IRF3. In contrast, a null mutant in γ34.5 failed to control IRF3 phosphorylation due to an inability of the mutant to sustain expression of ICP0. Our data show that while γ34.5 regulates IRF3 phosphorylation, the TBK1-binding domain itself has no impact on IRF3 phosphorylation or on replication and pathogenesis in mice.IMPORTANCE Interferons (IFNs) are potent activators of a variety of host responses that serve to control virus infections. The Herpesviridae have evolved countermeasures to IFN responses. Herpes simplex virus 1 (HSV-1) encodes the multifunctional neurovirulence protein ICP34.5. In this study, we investigated the biological relevance of the interaction between ICP34.5 and TANK-binding kinase 1 (TBK1), an activator of IFN responses. Here, we establish that although ICP34.5 binds TBK1 under certain conditions through a TBK1-binding domain (TBD), there was no direct impact of the TBD on viral replication or virulence in mice. Furthermore, we showed that activation of IRF3, a substrate of TBK1, was independent of the TBD. Instead, we provided evidence that the ability of ICP34.5 to control IRF3 activation is through its ability to reverse translational shutoff and sustain the expression of other IFN inhibitors encoded by the virus. This work provides new insights into the immunomodulatory functions of ICP34.5.",2017-11-14 +26818838,A Bayesian approach for estimating allele-specific expression from RNA-Seq data with diploid genomes.,"

Background

RNA-sequencing (RNA-Seq) has become a popular tool for transcriptome profiling in mammals. However, accurate estimation of allele-specific expression (ASE) based on alignments of reads to the reference genome is challenging, because it contains only one allele on a mosaic haploid genome. Even with the information of diploid genome sequences, precise alignment of reads to the correct allele is difficult because of the high-similarity between the corresponding allele sequences.

Results

We propose a Bayesian approach to estimate ASE from RNA-Seq data with diploid genome sequences. In the statistical framework, the haploid choice is modeled as a hidden variable and estimated simultaneously with isoform expression levels by variational Bayesian inference. Through the simulation data analysis, we demonstrate the effectiveness of the proposed approach in terms of identifying ASE compared to the existing approach. We also show that our approach enables better quantification of isoform expression levels compared to the existing methods, TIGAR2, RSEM and Cufflinks. In the real data analysis of the human reference lymphoblastoid cell line GM12878, some autosomal genes were identified as ASE genes, and skewed paternal X-chromosome inactivation in GM12878 was identified.

Conclusions

The proposed method, called ASE-TIGAR, enables accurate estimation of gene expression from RNA-Seq data in an allele-specific manner. Our results show the effectiveness of utilizing personal genomic information for accurate estimation of ASE. An implementation of our method is available at http://nagasakilab.csml.org/ase-tigar .",2016-01-11 +26355659,Notice to Readers: The Effect of Falsified Clostridium difficile Infections Surveillance Data on Results Reported in MMWR.,"In 2012, MMWR published the report, ""Vital Signs: Preventing Clostridium difficile Infections,"" which examined Clostridium difficile infection (CDI) surveillance data. This report contained several errors pertaining to Emerging Infections Program (EIP) data. These errors occurred as a result of scientific misconduct by a former employee of the Oregon Health Authority. The Public Health Service Office of Research Integrity has determined that the former employee falsified or fabricated data for 56 Oregon EIP CDI case report forms (https://ori.hhs.gov/content/case-summary-asherin-ryan). The authors re-analyzed the EIP data to determine if the removal of all Oregon CDI cases (57 total cases) from the 10,342 cases included in the original publication altered the previously reported results. It did not. Re-analysis confirms the conclusions in the original report. Data in the original report from sources other than the Oregon Health Authority (i.e., from other EIP sites, the National Healthcare Safety Network, and Illinois, Massachusetts, and New York CDI prevention programs) were not involved in the research misconduct.Errata for the 2012 report have been published in this issue of MMWR.",2015-09-11 +28662317,"ClonEstiMate, a Bayesian method for quantifying rates of clonality of populations genotyped at two-time steps.","Partial clonality is commonly used in eukaryotes and has large consequences for their evolution and ecology. Assessing accurately the relative importance of clonal vs. sexual reproduction matters for studying and managing such species. Here, we proposed a Bayesian approach, ClonEstiMate, to infer rates of clonality c from populations sampled twice over a short time interval, ideally one generation time. The method relies on the likelihood of the transitions between genotype frequencies of ancestral and descendent populations, using an extended Wright-Fisher model explicitly integrating reproductive modes. Our model provides posterior probability distribution of inferred c, given the assumed rates of mutation, as well as inbreeding and selfing when occurring. Tested under various conditions, this model provided accurate inferences of c, especially when the amount of information was modest, that is low sample sizes, few loci, low polymorphism and strong linkage disequilibrium. Inferences remained robust when mutation models and rates were misinformed. However, the method was sensitive to moderate frequencies of null alleles and when the time interval between required samplings exceeding two generations. Misinformed rates on mating modes (inbreeding and selfing) also resulted in biased inferences. Our method was tested on eleven data sets covering five partially clonal species, for which the extent of clonality was formerly deciphered. It delivered highly consistent results with previous information on the biology of those species. ClonEstiMate represents a powerful tool for detecting and inferring clonality in finite populations, genotyped with SNPs or microsatellites. It is freely available at https://www6.rennes.inra.fr/igepp_eng/Productions/Software.",2017-08-08 +23327937,Evidence classification of high-throughput protocols and confidence integration in RegulonDB.,"RegulonDB provides curated information on the transcriptional regulatory network of Escherichia coli and contains both experimental data and computationally predicted objects. To account for the heterogeneity of these data, we introduced in version 6.0, a two-tier rating system for the strength of evidence, classifying evidence as either 'weak' or 'strong' (Gama-Castro,S., Jimenez-Jacinto,V., Peralta-Gil,M. et al. RegulonDB (Version 6.0): gene regulation model of Escherichia Coli K-12 beyond transcription, active (experimental) annotated promoters and textpresso navigation. Nucleic Acids Res., 2008;36:D120-D124.). We now add to our classification scheme the classification of high-throughput evidence, including chromatin immunoprecipitation (ChIP) and RNA-seq technologies. To integrate these data into RegulonDB, we present two strategies for the evaluation of confidence, statistical validation and independent cross-validation. Statistical validation involves verification of ChIP data for transcription factor-binding sites, using tools for motif discovery and quality assessment of the discovered matrices. Independent cross-validation combines independent evidence with the intention to mutually exclude false positives. Both statistical validation and cross-validation allow to upgrade subsets of data that are supported by weak evidence to a higher confidence level. Likewise, cross-validation of strong confidence data extends our two-tier rating system to a three-tier system by introducing a third confidence score 'confirmed'. Database URL: http://regulondb.ccg.unam.mx/",2013-01-17 +,"Phylogeny and systematics of the bee genus Osmia (Hymenoptera: Megachilidae) with emphasis on North American Melanosmia: subgenera, synonymies and nesting biology revisited","The predominantly Holarctic bee genus Osmia Panzer is species‐rich and behaviourally diverse. A robust phylogeny of this genus is important for understanding the evolution of the immense variety of morphological and behavioural traits exhibited by this group. We infer a phylogeny of Osmia using DNA sequence data obtained from three nuclear genes (elongation factor 1‐α, LW‐rhodopsin and CAD) and the mitochondrial gene COI. Our taxon sampling places special attention on North American members of the subgenus Melanosmia Schmiedeknecht; we discuss the novel placement of a number of species traditionally assigned to O. (Melanosmia) and examine the relative support for alternative classifications of this species‐rich subgenus. We use this new phylogeny to guide a reassessment of morphological and behavioural characters within Osmia. Our results provide support for the recognition of Osmia (Hapsidosmia), subgen.n., a monotypic subgenus containing Osmia iridis Cockerell & Titus. We synonymize Osmia (Mystacosmia) Snelling under O. (Melanosmia), syn.n. We synonymize Osmia (Acanthosmioides) Ashmead under O. (Melanosmia), syn.n., propose ‘odontogaster species group’ as a replacement for the subgeneric name Acanthosmioides, and refine the morphological characters that serve to diagnose the species group. We additionally propose ‘nigrifrons species group’ for a clade within O. (Melanosmia) containing most species formerly placed in Osmia (Centrosmia) Robertson. We demonstrate more cohesive patterns of nest substrate use in the nigrifrons and odontogaster species groups than was previously believed to occur, reconsider character polarity of aspects of the female mandible, and show that a large number of morphological characters have evolved convergently within the genus. In order to facilitate discussion of relevant taxa, we propose the following 15 new synonymies: O. bakeri Sandhouse under O. melanopleura Cockerell; O. crenulaticornis Michener under O. pinorum Cockerell; O. claremontensis Michener under O. sedula Sandhouse; O. cockerelli Sandhouse under O. dakotensis Michener; O. francisconis White under O. enixa Sandhouse; O. hurdi White under O. austromaritima Michener; O. sladeni Sandhouse under O. nifoata Cockerell; O. titusi Cockerell under O. phenax Cockerell; O. subtrevoris Cockerell, O. physariae Cockerell, and O. erecta Michener under O. giliarum Cockerell; and O. universitatis Cockerell, O. integrella Cockerell, O. amala Cockerell, and O. metitia Cockerell under O. nigrifrons Cresson, syn.n. We remove O. wyomingensis Michener from synonymy with O. nifoata Cockerell, stat.n., and O. pinorum Cockerell from synonymy with O. physariae Cockerell, stat.n. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:A3E7D63B‐5C4C‐4ACF‐BF33‐48E5C5DD1B0D.",2013-07-01 +26051252,Light-weight reference-based compression of FASTQ data.,"

Background

The exponential growth of next generation sequencing (NGS) data has posed big challenges to data storage, management and archive. Data compression is one of the effective solutions, where reference-based compression strategies can typically achieve superior compression ratios compared to the ones not relying on any reference.

Results

This paper presents a lossless light-weight reference-based compression algorithm namely LW-FQZip to compress FASTQ data. The three components of any given input, i.e., metadata, short reads and quality score strings, are first parsed into three data streams in which the redundancy information are identified and eliminated independently. Particularly, well-designed incremental and run-length-limited encoding schemes are utilized to compress the metadata and quality score streams, respectively. To handle the short reads, LW-FQZip uses a novel light-weight mapping model to fast map them against external reference sequence(s) and produce concise alignment results for storage. The three processed data streams are then packed together with some general purpose compression algorithms like LZMA. LW-FQZip was evaluated on eight real-world NGS data sets and achieved compression ratios in the range of 0.111-0.201. This is comparable or superior to other state-of-the-art lossless NGS data compression algorithms.

Conclusions

LW-FQZip is a program that enables efficient lossless FASTQ data compression. It contributes to the state of art applications for NGS data storage and transmission. LW-FQZip is freely available online at: http://csse.szu.edu.cn/staff/zhuzx/LWFQZip.",2015-06-09 +26543172,Fast Optimized Cluster Algorithm for Localizations (FOCAL): a spatial cluster analysis for super-resolved microscopy.,"

Motivation

Single-molecule localization microscopy (SMLM) microscopy provides images of cellular structure at a resolution an order of magnitude below what can be achieved by conventional diffraction limited techniques. The concomitantly larger data sets generated by SMLM require increasingly efficient image analysis software. Density based clustering algorithms, with the most ubiquitous being DBSCAN, are commonly used to quantitatively assess sub-cellular assemblies. DBSCAN, however, is slow, scaling with the number of localizations like O(n log (n)) at best, and it's performance is highly dependent upon a subjectively selected choice of parameters.

Results

We have developed a grid-based clustering algorithm FOCAL, which explicitly accounts for several dominant artifacts arising in SMLM image reconstructions. FOCAL is fast and efficient, scaling like O(n), and only has one set parameter. We assess DBSCAN and FOCAL on experimental dSTORM data of clusters of eukaryotic RNAP II and PALM data of the bacterial protein H-NS, then provide a detailed comparison via simulation. FOCAL performs comparable and often superior to DBSCAN while yielding a significantly faster analysis. Additionally, FOCAL provides a novel method for filtering out of focus clusters from complex SMLM images.

Availability and implementation

The data and code are available at: http://www.utm.utoronto.ca/milsteinlab/resources/Software/FOCAL/ CONTACT: josh.milstein@utoronto.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-04 +28617225,A greedy alignment-free distance estimator for phylogenetic inference.,"

Background

Alignment-free sequence comparison approaches have been garnering increasing interest in various data- and compute-intensive applications such as phylogenetic inference for large-scale sequences. While k-mer based methods are predominantly used in real applications, the average common substring (ACS) approach is emerging as one of the prominent alignment-free approaches. This ACS approach has been further generalized by some recent work, either greedily or exactly, by allowing a bounded number of mismatches in the common substrings.

Results

We present ALFRED-G, a greedy alignment-free distance estimator for phylogenetic tree reconstruction based on the concept of the generalized ACS approach. In this algorithm, we have investigated a new heuristic to efficiently compute the lengths of common strings with mismatches allowed, and have further applied this heuristic to phylogeny reconstruction. Performance evaluation using real sequence datasets shows that our heuristic is able to reconstruct comparable, or even more accurate, phylogenetic tree topologies than the kmacs heuristic algorithm at highly competitive speed.

Conclusions

ALFRED-G is an alignment-free heuristic for evolutionary distance estimation between two biological sequences. This algorithm is implemented in C++ and has been incorporated into our open-source ALFRED software package ( http://alurulab.cc.gatech.edu/phylo ).",2017-06-07 +23479348,"EDAM: an ontology of bioinformatics operations, types of data and identifiers, topics and formats.","

Motivation

Advancing the search, publication and integration of bioinformatics tools and resources demands consistent machine-understandable descriptions. A comprehensive ontology allowing such descriptions is therefore required.

Results

EDAM is an ontology of bioinformatics operations (tool or workflow functions), types of data and identifiers, application domains and data formats. EDAM supports semantic annotation of diverse entities such as Web services, databases, programmatic libraries, standalone tools, interactive applications, data schemas, datasets and publications within bioinformatics. EDAM applies to organizing and finding suitable tools and data and to automating their integration into complex applications or workflows. It includes over 2200 defined concepts and has successfully been used for annotations and implementations.

Availability

The latest stable version of EDAM is available in OWL format from http://edamontology.org/EDAM.owl and in OBO format from http://edamontology.org/EDAM.obo. It can be viewed online at the NCBO BioPortal and the EBI Ontology Lookup Service. For documentation and license please refer to http://edamontology.org. This article describes version 1.2 available at http://edamontology.org/EDAM_1.2.owl.

Contact

jison@ebi.ac.uk.",2013-03-11 +28644242,Effects of Participation in Sports Programs on Walking Ability and Endurance Over Time in Children With Cerebral Palsy.,"

Objective

Children with cerebral palsy may benefit from maintaining a high level of physical fitness similar to typically developing children especially in terms of long-term physical performance, although in practice this is often difficult. The purpose of this study was to determine the effect of participation in sports programs on walking ability and endurance over time.

Design

A retrospective cohort study included participants with cerebral palsy, aged 6 to 20 yrs, who attended a summer sports program from 2004 to 2012. There were 256 participant sessions with pre/post data recorded. The participants consisted of a total of 97 children (mean age [SD] = 11.4 [3.1] yrs), many of whom attended multiple programs throughout the years. Programs were held 6 hrs/d, 5 d/wk for up to 4 wks. Outcome measures included the Timed Up and Go, modified 6-min walk, and 25-ft walk/run.

Results

The results showed significant improvements in the Timed Up and Go, modified 6-min walk distance and 25-ft walk/run over time. Children in Gross Motor Classification System level III made the largest gains.

Conclusions

Walking ability and endurance seem to improve after participation in an intensive summer sports programs. Higher frequency of program attendance resulted in significant improvements in the Timed Up and Go.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES: Upon completion of this article, the reader should be able to: (1) Discuss the importance of physical activity at the participation level (sports programs) for children with cerebral palsy; (2) Contrast the changes in walking ability and endurance for children in Gross Motor Function Classification System level I, II, and III after sports programs; and (3) Identify the impact of higher frequency of sports program attendance over time on walking ability.

Level

Advanced ACCREDITATION: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians.The Association of Academic Physiatrists designates this Journal-based CME activity for a maximum of 0.75 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2017-12-01 +27756204,RNAdualPF: software to compute the dual partition function with sample applications in molecular evolution theory.,"

Background

RNA inverse folding is the problem of finding one or more sequences that fold into a user-specified target structure s 0, i.e. whose minimum free energy secondary structure is identical to the target s 0. Here we consider the ensemble of all RNA sequences that have low free energy with respect to a given target s 0.

Results

We introduce the program RNAdualPF, which computes the dual partition function Z , defined as the sum of Boltzmann factors exp(-E(a,s 0)/RT) of all RNA nucleotide sequences a compatible with target structure s 0. Using RNAdualPF, we efficiently sample RNA sequences that approximately fold into s 0, where additionally the user can specify IUPAC sequence constraints at certain positions, and whether to include dangles (energy terms for stacked, single-stranded nucleotides). Moreover, since we also compute the dual partition function Z (k) over all sequences having GC-content k, the user can require that all sampled sequences have a precise, specified GC-content. Using Z , we compute the dual expected energy 〈E 〉, and use it to show that natural RNAs from the Rfam 12.0 database have higher minimum free energy than expected, thus suggesting that functional RNAs are under evolutionary pressure to be only marginally thermodynamically stable. We show that C. elegans precursor microRNA (pre-miRNA) is significantly non-robust with respect to mutations, by comparing the robustness of each wild type pre-miRNA sequence with 2000 [resp. 500] sequences of the same GC-content generated by RNAdualPF, which approximately [resp. exactly] fold into the wild type target structure. We confirm and strengthen earlier findings that precursor microRNAs and bacterial small noncoding RNAs display plasticity, a measure of structural diversity.

Conclusion

We describe RNAdualPF, which rapidly computes the dual partition function Z and samples sequences having low energy with respect to a target structure, allowing sequence constraints and specified GC-content. Using different inverse folding software, another group had earlier shown that pre-miRNA is mutationally robust, even controlling for compositional bias. Our opposite conclusion suggests a cautionary note that computationally based insights into molecular evolution may heavily depend on the software used. C/C++-software for RNAdualPF is available at http://bioinformatics.bc.edu/clotelab/RNAdualPF .",2016-10-19 +27760124,"rasbhari: Optimizing Spaced Seeds for Database Searching, Read Mapping and Alignment-Free Sequence Comparison.","Many algorithms for sequence analysis rely on word matching or word statistics. Often, these approaches can be improved if binary patterns representing match and don't-care positions are used as a filter, such that only those positions of words are considered that correspond to the match positions of the patterns. The performance of these approaches, however, depends on the underlying patterns. Herein, we show that the overlap complexity of a pattern set that was introduced by Ilie and Ilie is closely related to the variance of the number of matches between two evolutionarily related sequences with respect to this pattern set. We propose a modified hill-climbing algorithm to optimize pattern sets for database searching, read mapping and alignment-free sequence comparison of nucleic-acid sequences; our implementation of this algorithm is called rasbhari. Depending on the application at hand, rasbhari can either minimize the overlap complexity of pattern sets, maximize their sensitivity in database searching or minimize the variance of the number of pattern-based matches in alignment-free sequence comparison. We show that, for database searching, rasbhari generates pattern sets with slightly higher sensitivity than existing approaches. In our Spaced Words approach to alignment-free sequence comparison, pattern sets calculated with rasbhari led to more accurate estimates of phylogenetic distances than the randomly generated pattern sets that we previously used. Finally, we used rasbhari to generate patterns for short read classification with CLARK-S. Here too, the sensitivity of the results could be improved, compared to the default patterns of the program. We integrated rasbhari into Spaced Words; the source code of rasbhari is freely available at http://rasbhari.gobics.de/.",2016-10-19 +27091369,"The Next Generation of Risk Assessment Multi-Year Study-Highlights of Findings, Applications to Risk Assessment, and Future Directions.","

Background

The Next Generation (NexGen) of Risk Assessment effort is a multi-year collaboration among several organizations evaluating new, potentially more efficient molecular, computational, and systems biology approaches to risk assessment. This article summarizes our findings, suggests applications to risk assessment, and identifies strategic research directions.

Objective

Our specific objectives were to test whether advanced biological data and methods could better inform our understanding of public health risks posed by environmental exposures.

Methods

New data and methods were applied and evaluated for use in hazard identification and dose-response assessment. Biomarkers of exposure and effect, and risk characterization were also examined. Consideration was given to various decision contexts with increasing regulatory and public health impacts. Data types included transcriptomics, genomics, and proteomics. Methods included molecular epidemiology and clinical studies, bioinformatic knowledge mining, pathway and network analyses, short-duration in vivo and in vitro bioassays, and quantitative structure activity relationship modeling.

Discussion

NexGen has advanced our ability to apply new science by more rapidly identifying chemicals and exposures of potential concern, helping characterize mechanisms of action that influence conclusions about causality, exposure-response relationships, susceptibility and cumulative risk, and by elucidating new biomarkers of exposure and effects. Additionally, NexGen has fostered extensive discussion among risk scientists and managers and improved confidence in interpreting and applying new data streams.

Conclusions

While considerable uncertainties remain, thoughtful application of new knowledge to risk assessment appears reasonable for augmenting major scope assessments, forming the basis for or augmenting limited scope assessments, and for prioritization and screening of very data limited chemicals. Citation: Cote I, Andersen ME, Ankley GT, Barone S, Birnbaum LS, Boekelheide K, Bois FY, Burgoon LD, Chiu WA, Crawford-Brown D, Crofton KM, DeVito M, Devlin RB, Edwards SW, Guyton KZ, Hattis D, Judson RS, Knight D, Krewski D, Lambert J, Maull EA, Mendrick D, Paoli GM, Patel CJ, Perkins EJ, Poje G, Portier CJ, Rusyn I, Schulte PA, Simeonov A, Smith MT, Thayer KA, Thomas RS, Thomas R, Tice RR, Vandenberg JJ, Villeneuve DL, Wesselkamper S, Whelan M, Whittaker C, White R, Xia M, Yauk C, Zeise L, Zhao J, DeWoskin RS. 2016. The Next Generation of Risk Assessment multiyear study-highlights of findings, applications to risk assessment, and future directions. Environ Health Perspect 124:1671-1682; http://dx.doi.org/10.1289/EHP233.",2016-04-19 +28105921,SIMBA: a web tool for managing bacterial genome assembly generated by Ion PGM sequencing technology.,"

Background

The evolution of Next-Generation Sequencing (NGS) has considerably reduced the cost per sequenced-base, allowing a significant rise of sequencing projects, mainly in prokaryotes. However, the range of available NGS platforms requires different strategies and software to correctly assemble genomes. Different strategies are necessary to properly complete an assembly project, in addition to the installation or modification of various software. This requires users to have significant expertise in these software and command line scripting experience on Unix platforms, besides possessing the basic expertise on methodologies and techniques for genome assembly. These difficulties often delay the complete genome assembly projects.

Results

In order to overcome this, we developed SIMBA (SImple Manager for Bacterial Assemblies), a freely available web tool that integrates several component tools for assembling and finishing bacterial genomes. SIMBA provides a friendly and intuitive user interface so bioinformaticians, even with low computational expertise, can work under a centralized administrative control system of assemblies managed by the assembly center head. SIMBA guides the users to execute assembly process through simple and interactive pages. SIMBA workflow was divided in three modules: (i) projects: allows a general vision of genome sequencing projects, in addition to data quality analysis and data format conversions; (ii) assemblies: allows de novo assemblies with the software Mira, Minia, Newbler and SPAdes, also assembly quality validations using QUAST software; and (iii) curation: presents methods to finishing assemblies through tools for scaffolding contigs and close gaps. We also presented a case study that validated the efficacy of SIMBA to manage bacterial assemblies projects sequenced using Ion Torrent PGM.

Conclusion

Besides to be a web tool for genome assembly, SIMBA is a complete genome assemblies project management system, which can be useful for managing of several projects in laboratories. SIMBA source code is available to download and install in local webservers at http://ufmg-simba.sourceforge.net .",2016-12-15 +27924015,TSTMP: target selection for structural genomics of human transmembrane proteins.,"The TSTMP database is designed to help the target selection of human transmembrane proteins for structural genomics projects and structure modeling studies. Currently, there are only 60 known 3D structures among the polytopic human transmembrane proteins and about a further 600 could be modeled using existing structures. Although there are a great number of human transmembrane protein structures left to be determined, surprisingly only a small fraction of these proteins have 'selected' (or above) status according to the current version the TargetDB/TargetTrack database. This figure is even worse regarding those transmembrane proteins that would contribute the most to the structural coverage of the human transmembrane proteome. The database was built by sorting out proteins from the human transmembrane proteome with known structure and searching for suitable model structures for the remaining proteins by combining the results of a state-of-the-art transmembrane specific fold recognition algorithm and a sequence similarity search algorithm. Proteins were searched for homologues among the human transmembrane proteins in order to select targets whose successful structure determination would lead to the best structural coverage of the human transmembrane proteome. The pipeline constructed for creating the TSTMP database guarantees to keep the database up-to-date. The database is available at http://tstmp.enzim.ttk.mta.hu.",2016-10-18 +28185240,A staging system for correct phenotype interpretation of mouse embryos harvested on embryonic day 14 (E14.5).,"We present a simple and quick system for accurately scoring the developmental progress of mouse embryos harvested on embryonic day 14 (E14.5). Based solely on the external appearance of the maturing forelimb, we provide a convenient way to distinguish six developmental sub-stages. Using a variety of objective morphometric data obtained from the commonly used C57BL/6N mouse strain, we show that these stages correlate precisely with the growth of the entire embryo and its organs. Applying the new staging system to phenotype analyses of E14.5 embryos of 58 embryonic lethal null mutant lines from the DMDD research programme (https://dmdd.org.uk) and its pilot, we show that homozygous mutant embryos are frequently delayed in development. To demonstrate the importance of our staging system for correct phenotype interpretation, we describe stage-specific changes of the palate, heart and gut, and provide examples in which correct diagnosis of malformations relies on correct staging.",2017-02-09 +28073762,A DNA intercalation methodology for an efficient prediction of ligand binding pose and energetics.,"

Motivation

Drug intercalation is an important strategy for DNA inhibition which is often employed in cancer chemotherapy. Despite its high significance, the field is characterized by limited success in identification of novel intercalator molecules and lack of automated and dedicated drug-DNA intercalation methodology.

Results

We report here a novel intercalation methodology (christened ' Intercalate' ) for predicting both the structures and energetics of DNA-intercalator complexes, covering the processes of DNA unwinding and (non-covalent) binding. Given a DNA sequence and intercalation site information, Intercalate generates the 3D structure of DNA, creates the intercalation site, performs docking at the intercalation site and evaluates DNA-intercalator binding energy in an automated way. The structures and energetics of the DNA-intercalator complexes produced by Intercalate methodology are seen to be in good agreement with experiment. The dedicated attempt made in developing a drug-DNA intercalation methodology (compatible with its mechanism) with high accuracy should prove useful in the discovery of potential intercalators for their use as anticancers, antibacterials or antivirals.

Availability and implementation

http://www.scfbio-iitd.res.in/intercalate/.

Contact

anjali@scfbio-iitd.res.in or bjayaram@chemistry.iitd.ac.in.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +23362826,Hybrid feature detection and information accumulation using high-resolution LC-MS metabolomics data.,"Feature detection is a critical step in the preprocessing of liquid chromatography-mass spectrometry (LC-MS) metabolomics data. Currently, the predominant approach is to detect features using noise filters and peak shape models based on the data at hand alone. Databases of known metabolites and historical data contain information that could help boost the sensitivity of feature detection, especially for low-concentration metabolites. However, utilizing such information in targeted feature detection may cause large number of false positives because of the high levels of noise in LC-MS data. With high-resolution mass spectrometry such as liquid chromatograph-Fourier transform mass spectrometry (LC-FTMS), high-confidence matching of peaks to known features is feasible. Here we describe a computational approach that serves two purposes. First it boosts feature detection sensitivity by using a hybrid procedure of both untargeted and targeted peak detection. New algorithms are designed to reduce the chance of false-positives by nonparametric local peak detection and filtering. Second, it can accumulate information on the concentration variation of metabolites over large number of samples, which can help find rare features and/or features with uncommon concentration in future studies. Information can be accumulated on features that are consistently found in real data even before their identities are found. We demonstrate the value of the approach in a proof-of-concept study. The method is implemented as part of the R package apLCMS at http://www.sph.emory.edu/apLCMS/ .",2013-02-12 +24996896,TAPAS: tools to assist the targeted protein quantification of human alternative splice variants.,"

Motivation

In proteomes of higher eukaryotes, many alternative splice variants can only be detected by their shared peptides. This makes it highly challenging to use peptide-centric mass spectrometry to distinguish and to quantify protein isoforms resulting from alternative splicing events.

Results

We have developed two complementary algorithms based on linear mathematical models to efficiently compute a minimal set of shared and unique peptides needed to quantify a set of isoforms and splice variants. Further, we developed a statistical method to estimate the splice variant abundances based on stable isotope labeled peptide quantities. The algorithms and databases are integrated in a web-based tool, and we have experimentally tested the limits of our quantification method using spiked proteins and cell extracts.

Availability and implementation

The TAPAS server is available at URL http://davinci.crg.es/tapas/.

Contact

luis.serrano@crg.eu or christina.kiel@crg.eu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-04 +23433187,"CartograTree: connecting tree genomes, phenotypes and environment.","Today, researchers spend a tremendous amount of time gathering, formatting, filtering and visualizing data collected from disparate sources. Under the umbrella of forest tree biology, we seek to provide a platform and leverage modern technologies to connect biotic and abiotic data. Our goal is to provide an integrated web-based workspace that connects environmental, genomic and phenotypic data via geo-referenced coordinates. Here, we connect the genomic query web-based workspace, DiversiTree and a novel geographical interface called CartograTree to data housed on the TreeGenes database. To accomplish this goal, we implemented Simple Semantic Web Architecture and Protocol to enable the primary genomics database, TreeGenes, to communicate with semantic web services regardless of platform or back-end technologies. The novelty of CartograTree lies in the interactive workspace that allows for geographical visualization and engagement of high performance computing (HPC) resources. The application provides a unique tool set to facilitate research on the ecology, physiology and evolution of forest tree species. CartograTree can be accessed at: http://dendrome.ucdavis.edu/cartogratree.",2013-02-25 +28453672,Primerize-2D: automated primer design for RNA multidimensional chemical mapping.,"

Summary

Rapid RNA synthesis of comprehensive single mutant libraries and targeted multiple mutant libraries is enabling new multidimensional chemical approaches to solve RNA structures. PCR assembly of DNA templates and in vitro transcription allow synthesis and purification of hundreds of RNA mutants in a cost-effective manner, with sharing of primers across constructs allowing significant reductions in expense. However, these protocols require organization of primer locations across numerous 96 well plates and guidance for pipetting, non-trivial tasks for which informatics and visualization tools can prevent costly errors. We report here an online tool to accelerate synthesis of large libraries of desired mutants through design and efficient organization of primers. The underlying program and graphical interface have been experimentally tested in our laboratory for RNA domains with lengths up to 300 nucleotides and libraries encompassing up to 960 variants. In addition to the freely available Primerize-2D server, the primer design code is available as a stand-alone Python package for broader applications.

Availability and implementation

http://primerize2d.stanford.edu.

Contact

rhiju@stanford.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +25623424,Re-annotation of the woodland strawberry (Fragaria vesca) genome.,"

Background

Fragaria vesca is a low-growing, small-fruited diploid strawberry species commonly called woodland strawberry. It is native to temperate regions of Eurasia and North America and while it produces edible fruits, it is most highly useful as an experimental perennial plant system that can serve as a model for the agriculturally important Rosaceae family. A draft of the F. vesca genome sequence was published in 2011 [Nat Genet 43:223,2011]. The first generation annotation (version 1.1) were developed using GeneMark-ES+[Nuc Acids Res 33:6494,2005]which is a self-training gene prediction tool that relies primarily on the combination of ab initio predictions with mapping high confidence ESTs in addition to mapping gene deserts from transposable elements. Based on over 25 different tissue transcriptomes, we have revised the F. vesca genome annotation, thereby providing several improvements over version 1.1.

Results

The new annotation, which was achieved using Maker, describes many more predicted protein coding genes compared to the GeneMark generated annotation that is currently hosted at the Genome Database for Rosaceae ( http://www.rosaceae.org/ ). Our new annotation also results in an increase in the overall total coding length, and the number of coding regions found. The total number of gene predictions that do not overlap with the previous annotations is 2286, most of which were found to be homologous to other plant genes. We have experimentally verified one of the new gene model predictions to validate our results.

Conclusions

Using the RNA-Seq transcriptome sequences from 25 diverse tissue types, the re-annotation pipeline improved existing annotations by increasing the annotation accuracy based on extensive transcriptome data. It uncovered new genes, added exons to current genes, and extended or merged exons. This complete genome re-annotation will significantly benefit functional genomic studies of the strawberry and other members of the Rosaceae.",2015-01-27 +26962181,Intake of Total Polyphenols and Some Classes of Polyphenols Is Inversely Associated with Diabetes in Elderly People at High Cardiovascular Disease Risk.,"

Background

Higher consumption of some polyphenols has been associated with a reduced risk of diabetes. However, no studies have evaluated the relation between all polyphenol subclasses and the incidence of diabetes.

Objective

We aimed to prospectively examine the associations between the intake of total polyphenols and different groups of polyphenols (flavonoids, phenolic acids, stilbenes, lignans, and others) on the risk of incident diabetes in the PREDIMED (Prevención con Dieta Mediterránea) trial.

Methods

This was an observational cohort analysis of the nondiabetic participants in the PREDIMED trial. This study was a multicenter, controlled, randomized, parallel-group feeding trial to assess the effects of either a Mediterranean diet that was supplemented with extra-virgin olive oil or nuts or advice to adhere to a low-fat control diet on cardiovascular outcomes in elderly men and women at high cardiovascular disease risk. From the 7447 randomly assigned participants, 3430 were selected because they were free of diabetes at baseline and filled out the food-frequency questionnaires (FFQs). Polyphenol intake was calculated by matching food consumption data from repeated FFQs with the Phenol-Explorer database on the polyphenol content of each reported food. HRs and 95% CIs for diabetes according to tertiles of polyphenol intake were estimated with the use of time-dependent Cox proportional hazards models.

Results

Over a mean of 5.51 y of follow-up (18,900 person-years), there were 314 new cases of diabetes. After multivariable adjustment, we observed a 28% reduction in new-onset diabetes in the highest compared with the lowest tertile of total polyphenol intake (HR: 0.72; 95% CI: 0.52, 0.99; P-trend = 0.05). The intake of subclasses of polyphenols also was inversely associated with diabetes risk, including for total flavonoids (HR: 0.67; 95% CI: 0.48, 0.93; P-trend = 0.02), stilbenes (HR: 0.57; 95% CI: 0.38, 0.84; P-trend = 0.003), dihydroflavonols (HR: 0.59; 95% CI: 0.40, 0.88; P-trend = 0.003), and flavanones (HR: 0.69; 95% CI: 0.49, 0.97; P-trend = 0.03).

Conclusions

A high intake of total polyphenols, total flavonoids (specifically flavanones and dihydroflavonols), and stilbenes is associated with a reduced risk of diabetes in elderly persons at high risk of cardiovascular disease. This trial was registered at http://www.controlled-trials.com as ISRCTN35739639.",2015-04-01 +28692985,EmDL: Extracting miRNA-Drug Interactions from Literature.,"The microRNAs (miRNAs), regulators of post-transcriptional processes, have been found to affect the efficacy of drugs by regulating the biological processes in which the target proteins of drugs may be involved. For example, some drugs develop resistance when certain miRNAs are overexpressed. Therefore, identifying miRNAs that affect drug effects can help understand the mechanisms of drug actions and design more efficient drugs. Although some computational approaches have been developed to predict miRNA-drug associations, such associations rarely provide explicit information about which miRNAs and how they affect drug efficacy. On the other hand, there are rich information about which miRNAs affect the efficacy of which drugs in the literature. In this paper, we present a novel text mining approach, named as EmDL (Extracting miRNA-Drug interactions from Literature), to extract the relationships of miRNAs affecting drug efficacy from literature. Benchmarking on the drug-miRNA interactions manually extracted from MEDLINE and PubMed Central, EmDL outperforms traditional text mining approaches as well as other popular methods for predicting drug-miRNA associations. Specifically, EmDL can effectively identify the sentences that describe the relationships of miRNAs affecting drug effects. The drug-miRNA interactome presented here can help understand how miRNAs affect drug effects and provide insights into the mechanisms of drug actions. In addition, with the information about drug-miRNA interactions, more effective drugs or combinatorial strategies can be designed in the future. The data used here can be accessed at http://mtd.comp-sysbio.org/.",2017-07-06 +27749991,"Nutritional interventions or exposures in infants and children aged up to 3 years and their effects on subsequent risk of overweight, obesity and body fat: a systematic review of systematic reviews.","This study, performed as part of the international EarlyNutrition research project (http://www.project-earlynutrition.eu), provides a systematic review of systematic reviews on the effects of nutritional interventions or exposures in children (up to 3 years of age) on the subsequent risk of obesity, overweight and adiposity. Electronic databases (including MEDLINE, Embase and Cochrane Library) were searched up until September 2015. Forty systematic reviews were included. A consistent association of breastfeeding with a modest reduction in the risk of later overweight and obesity in childhood and adulthood was found (the odds decreased by 13% based on high-quality studies), but residual confounding cannot be excluded. Lowering the protein content of infant formula is a promising intervention to reduce the risk of later overweight and obesity in children. There is no consistent evidence of an association of the age of introducing complementary foods, sugar-sweetened beverage or energy intake in early childhood with later overweight/obesity, but there are some indications of an association of protein intake during the complementary feeding period with later overweight/obesity. There was inadequate evidence to determine the effects of other nutritional interventions or exposures, including modifications of infant formula composition, fat intake or consumption of different food groups.",2016-10-17 +27803746,"NEEMP: software for validation, accurate calculation and fast parameterization of EEM charges.","

Background

The concept of partial atomic charges was first applied in physical and organic chemistry and was later also adopted in computational chemistry, bioinformatics and chemoinformatics. The electronegativity equalization method (EEM) is the most frequently used approach for calculating partial atomic charges. EEM is fast and its accuracy is comparable to the quantum mechanical charge calculation method for which it was parameterized. Several EEM parameter sets for various types of molecules and QM charge calculation approaches have been published and new ones are still needed and produced. Methodologies for EEM parameterization have been described in a few articles, but a software tool for EEM parameterization and EEM parameter sets validation has not been available until now.

Results

We provide the software tool NEEMP (http://ncbr.muni.cz/NEEMP), which offers three main functionalities: EEM parameterization [via linear regression (LR) and differential evolution with local minimization (DE-MIN)]; EEM parameter set validation (i.e., validation of coverage and quality) and EEM charge calculation. NEEMP functionality is shown using a parameterization and a validation case study. The parameterization case study demonstrated that LR is an appropriate approach for smaller and homogeneous datasets and DE-MIN is a suitable solution for larger and heterogeneous datasets. The validation case study showed that EEM parameter set coverage and quality can still be problematic. Therefore, it makes sense to verify the coverage and quality of EEM parameter sets before their use, and NEEMP is an appropriate tool for such verification. Moreover, it seems from both case studies that new EEM parameterizations need to be performed and new EEM parameter sets obtained with high quality and coverage for key structural databases.

Conclusion

We provide the software tool NEEMP, which is to the best of our knowledge the only available software package that enables EEM parameterization and EEM parameter set validation. Additionally, its DE-MIN parameterization method is an innovative approach, developed by ourselves and first published in this work. In addition, we also prepared four high-quality EEM parameter sets tailored to ligand molecules.Graphical abstract.",2016-10-17 +28332203,Margins of freedom: a field-theoretic approach to class-based health dispositions and practices.,"Pierre Bourdieu's theory of practice situates social practices in the relational interplay between experiential mental phenomena (habitus), resources (capitals) and objective social structures (fields). When applied to class-based practices in particular, the overarching field of power within which social classes are potentially made manifest is the primary field of interest. Applying relational statistical techniques to original survey data from Toronto and Vancouver, Canada, we investigated whether smoking, engaging in physical activity and consuming fruit and vegetables are dispersed in a three-dimensional field of power shaped by economic and cultural capitals and cultural dispositions and practices. We find that aesthetic dispositions and flexibility of developing and established dispositions are associated with positioning in the Canadian field of power and embedded in the logics of the health practices dispersed in the field. From this field-theoretic perspective, behavioural change requires the disruption of existing relations of harmony between the habitus of agents, the fields within which the practices are enacted and the capitals that inform and enforce the mores and regularities of the fields. The three-dimensional model can be explored at: http://relational-health.ca/margins-freedom.",2017-03-23 +22044723,Generation of the first BAC-based physical map of the common carp genome.,"

Background

Common carp (Cyprinus carpio), a member of Cyprinidae, is the third most important aquaculture species in the world with an annual global production of 3.4 million metric tons, accounting for nearly 14% of the all freshwater aquaculture production in the world. Apparently genomic resources are needed for this species in order to study its performance and production traits. In spite of much progress, no physical maps have been available for common carp. The objective of this project was to generate a BAC-based physical map using fluorescent restriction fingerprinting.

Result

The first generation of common carp physical map was constructed using four- color High Information Content Fingerprinting (HICF). A total of 72,158 BAC clones were analyzed that generated 67,493 valid fingerprints (5.5 × genome coverage). These BAC clones were assembled into 3,696 contigs with the average length of 476 kb and a N50 length of 688 kb, representing approximately 1.76 Gb of the common carp genome. The largest contig contained 171 BAC clones with the physical length of 3.12 Mb. There are 761 contigs longer than the N50, and these contigs should be the most useful resource for future integrations with linkage map and whole genome sequence assembly. The common carp physical map is available at http://genomics.cafs.ac.cn/fpc/WebAGCoL/Carp/WebFPC/.

Conclusion

The reported common carp physical map is the first physical map of the common carp genome. It should be a valuable genome resource facilitating whole genome sequence assembly and characterization of position-based genes important for aquaculture traits.",2011-11-02 +32025106,Application of ray-traced tropospheric slant delays to geodetic VLBI analysis.,"The correction of tropospheric influences via so-called path delays is critical for the analysis of observations from space geodetic techniques like the very long baseline interferometry (VLBI). In standard VLBI analysis, the a priori slant path delays are determined using the concept of zenith delays, mapping functions and gradients. The a priori use of ray-traced delays, i.e., tropospheric slant path delays determined with the technique of ray-tracing through the meteorological data of numerical weather models (NWM), serves as an alternative way of correcting the influences of the troposphere on the VLBI observations within the analysis. In the presented research, the application of ray-traced delays to the VLBI analysis of sessions in a time span of 16.5 years is investigated. Ray-traced delays have been determined with program RADIATE (see Hofmeister in Ph.D. thesis, Department of Geodesy and Geophysics, Faculty of Mathematics and Geoinformation, Technische Universität Wien. http://resolver.obvsg.at/urn:nbn:at:at-ubtuw:1-3444, 2016) utilizing meteorological data provided by NWM of the European Centre for Medium-Range Weather Forecasts (ECMWF). In comparison with a standard VLBI analysis, which includes the tropospheric gradient estimation, the application of the ray-traced delays to an analysis, which uses the same parameterization except for the a priori slant path delay handling and the used wet mapping factors for the zenith wet delay (ZWD) estimation, improves the baseline length repeatability (BLR) at 55.9% of the baselines at sub-mm level. If no tropospheric gradients are estimated within the compared analyses, 90.6% of all baselines benefit from the application of the ray-traced delays, which leads to an average improvement of the BLR of 1 mm. The effects of the ray-traced delays on the terrestrial reference frame are also investigated. A separate assessment of the RADIATE ray-traced delays is carried out by comparison to the ray-traced delays from the National Aeronautics and Space Administration Goddard Space Flight Center (NASA GSFC) (Eriksson and MacMillan in http://lacerta.gsfc.nasa.gov/tropodelays, 2016) with respect to the analysis performances in terms of BLR results. If tropospheric gradient estimation is included in the analysis, 51.3% of the baselines benefit from the RADIATE ray-traced delays at sub-mm difference level. If no tropospheric gradients are estimated within the analysis, the RADIATE ray-traced delays deliver a better BLR at 63% of the baselines compared to the NASA GSFC ray-traced delays.",2017-02-22 +26685308,MSAcquisitionSimulator: data-dependent acquisition simulator for LC-MS shotgun proteomics.,"

Unlabelled

Data-dependent acquisition (DDA) is the most common method used to control the acquisition process of shotgun proteomics experiments. While novel DDA approaches have been proposed, their evaluation is made difficult by the need of programmatic control of a mass spectrometer. An alternative is in silico analysis, for which suitable software has been unavailable. To meet this need, we have developed MSAcquisitionSimulator-a collection of C ++ programs for simulating ground truth LC-MS data and the subsequent application of custom DDA algorithms. It provides an opportunity for researchers to test, refine and evaluate novel DDA algorithms prior to implementation on a mass spectrometer.

Availability and implementation

The software is freely available from its Github repository http://www.github.com/DennisGoldfarb/MSAcquisitionSimulator/ which contains further documentation and usage instructions.

Contact

weiwang@cs.ucla.edu or ben_major@med.unc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-17 +26877689,GPS tracking data of Lesser Black-backed Gulls and Herring Gulls breeding at the southern North Sea coast.,"In this data paper, Bird tracking - GPS tracking of Lesser Black-backed Gulls and Herring Gulls breeding at the southern North Sea coast is described, a species occurrence dataset published by the Research Institute for Nature and Forest (INBO). The dataset (version 5.5) contains close to 2.5 million occurrences, recorded by 101 GPS trackers mounted on 75 Lesser Black-backed Gulls and 26 Herring Gulls breeding at the Belgian and Dutch coast. The trackers were developed by the University of Amsterdam Bird Tracking System (UvA-BiTS, http://www.uva-bits.nl). These automatically record and transmit bird movements, which allows us and others to study their habitat use and migration behaviour in great detail. Our bird tracking network is operational since 2013. It is funded for LifeWatch by the Hercules Foundation and maintained in collaboration with UvA-BiTS and the Flanders Marine Institute (VLIZ). The recorded data are periodically released in bulk as open data (http://dataset.inbo.be/bird-tracking-gull-occurrences), and are also accessible through CartoDB and the Global Biodiversity Information Facility (GBIF).",2016-01-20 +27424761,Sequence Search and Comparative Genomic Analysis of SUMO-Activating Enzymes Using CoGe.,"The growing number of genome sequences completed during the last few years has made necessary the development of bioinformatics tools for the easy access and retrieval of sequence data, as well as for downstream comparative genomic analyses. Some of these are implemented as online platforms that integrate genomic data produced by different genome sequencing initiatives with data mining tools as well as various comparative genomic and evolutionary analysis possibilities.Here, we use the online comparative genomics platform CoGe ( http://www.genomevolution.org/coge/ ) (Lyons and Freeling. Plant J 53:661-673, 2008; Tang and Lyons. Front Plant Sci 3:172, 2012) (1) to retrieve the entire complement of orthologous and paralogous genes belonging to the SUMO-Activating Enzymes 1 (SAE1) gene family from a set of species representative of the Brassicaceae plant eudicot family with genomes fully sequenced, and (2) to investigate the history, timing, and molecular mechanisms of the gene duplications driving the evolutionary expansion and functional diversification of the SAE1 family in Brassicaceae.",2016-01-01 +27666114,Is there an added value of faecal calprotectin and haemoglobin in the diagnostic work-up for primary care patients suspected of significant colorectal disease? A cross-sectional diagnostic study.,"

Background

The majority of primary care patients referred for bowel endoscopy do not have significant colorectal disease (SCD), and are - in hindsight - unnecessarily exposed to a small but realistic risk of severe endoscopy-associated complications. We developed a diagnostic strategy to better exclude SCD in these patients and evaluated the value of adding a faecal calprotectin point-of-care (POC) and/or a POC faecal immunochemical test for haemoglobin (FIT) to routine clinical information.

Methods

We used data from a prospective diagnostic study in SCD-suspected patients from 266 Dutch primary care practices referred for endoscopy to develop a diagnostic model for SCD with routine clinical information, which we extended with faecal calprotectin POC (quantitatively in μg/g faeces) and/or POC FIT results (qualitatively with a 6 μg/g faeces detection limit). We defined SCD as colorectal cancer (CRC), inflammatory bowel disease, diverticulitis, or advanced adenoma (>1 cm).

Results

Of 810 patients, 141 (17.4 %) had SCD. A diagnostic model with routine clinical data discriminated between patients with and without SCD with an area under the receiver operating characteristic curve (AUC) of 0.741 (95 % CI, 0.694-0.789). This AUC increased to 0.763 (95 % CI, 0.718-0.809; P = 0.078) when adding the calprotectin POC test, to 0.831 (95 % CI, 0.791-0.872; P < 0.001) when adding the POC FIT, and to 0.837 (95 % CI, 0.798-0.876; P < 0.001) upon combined extension. At a ≥ 5.0 % SCD probability threshold for endoscopy referral, 30.4 % of the patients tested negative based on this combined POC-tests extended model (95 % CI, 25.7-35.3 %), with 96.4 % negative predictive value (95 % CI, 93.1-98.2 %) and 93.7 % sensitivity (95 % CI, 88.2-96.8 %). Excluding the calprotectin POC test from this model still yielded 30.1 % test negatives (95 % CI, 24.7-35.6 %) and 96.0 % negative predictive value (95 % CI, 92.6-97.9 %), with 93.0 % sensitivity (95 % CI, 87.4-96.4 %).

Conclusions

FIT - and to a much lesser extent calprotectin - POC testing showed incremental value for SCD diagnosis beyond standard clinical information. A diagnostic strategy with routine clinical data and a POC FIT test may safely rule out SCD and prevent unnecessary endoscopy referral in approximately one third of SCD-suspected primary care patients. Please see related article: http://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-016-0694-3 .",2016-09-26 +24174543,Hemolytik: a database of experimentally determined hemolytic and non-hemolytic peptides.,"Hemolytik (http://crdd.osdd.net/raghava/hemolytik/) is a manually curated database of experimentally determined hemolytic and non-hemolytic peptides. Data were compiled from a large number of published research articles and various databases like Antimicrobial Peptide Database, Collection of Anti-microbial Peptides, Dragon Antimicrobial Peptide Database and Swiss-Prot. The current release of Hemolytik database contains ∼3000 entries that include ∼2000 unique peptides whose hemolytic activities were evaluated on erythrocytes isolated from as many as 17 different sources. Each entry in Hemolytik provides comprehensive information about a peptide, like its name, sequence, origin, reported function, property such as chirality, types (linear and cyclic), end modifications as well as details pertaining to its hemolytic activity. In addition, tertiary structure of each peptide has been predicted, and secondary structure states have been assigned. To facilitate the scientific community, a user-friendly interface has been developed with various tools for data searching and analysis. We hope, Hemolytik will be useful for researchers working in the field of designing therapeutic peptides.",2013-10-29 +27153624,DyNet: visualization and analysis of dynamic molecular interaction networks.,"

Unlabelled

: The ability to experimentally determine molecular interactions on an almost proteome-wide scale under different conditions is enabling researchers to move from static to dynamic network analysis, uncovering new insights into how interaction networks are physically rewired in response to different stimuli and in disease. Dynamic interaction data presents a special challenge in network biology. Here, we present DyNet, a Cytoscape application that provides a range of functionalities for the visualization, real-time synchronization and analysis of large multi-state dynamic molecular interaction networks enabling users to quickly identify and analyze the most 'rewired' nodes across many network states.

Availability and implementation

DyNet is available at the Cytoscape (3.2+) App Store (http://apps.cytoscape.org/apps/dynet).

Contact

david.lynn@sahmri.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-03 +28007885,Inferring Ancestral Recombination Graphs from Bacterial Genomic Data.,"Homologous recombination is a central feature of bacterial evolution, yet it confounds traditional phylogenetic methods. While a number of methods specific to bacterial evolution have been developed, none of these permit joint inference of a bacterial recombination graph and associated parameters. In this article, we present a new method which addresses this shortcoming. Our method uses a novel Markov chain Monte Carlo algorithm to perform phylogenetic inference under the ClonalOrigin model. We demonstrate the utility of our method by applying it to ribosomal multilocus sequence typing data sequenced from pathogenic and nonpathogenic Escherichia coli serotype O157 and O26 isolates collected in rural New Zealand. The method is implemented as an open source BEAST 2 package, Bacter, which is available via the project web page at http://tgvaughan.github.io/bacter.",2016-12-22 +26616242,SFINX: Straightforward Filtering Index for Affinity Purification-Mass Spectrometry Data Analysis.,"Affinity purification-mass spectrometry is one of the most common techniques for the analysis of protein-protein interactions, but inferring bona fide interactions from the resulting data sets remains notoriously difficult. We introduce SFINX, a Straightforward Filtering INdeX that identifies true-positive protein interactions in a fast, user-friendly, and highly accurate way. SFINX outperforms alternative techniques on two benchmark data sets and is available via the Web interface at http://sfinx.ugent.be/.",2015-12-17 +27405453,High type II error and interpretation inconsistencies when attempting to refute transgenerational epigenetic inheritance.,"A recently published article in Genome Biology attempts to refute important aspects of the phenomenon of transgenerational epigenetic inheritance (TEI). An alternative explanation of the data is offered here, showing that TEI is indeed not contradicted.Please see related Correspondence article: www.dx.doi.org/10.1186/s13059-016-0981-5 and related Research article: http://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0619-z.",2016-07-12 +29117944,Small-Molecule Sigma1 Modulator Induces Autophagic Degradation of PD-L1.,"Emerging evidence suggests that Sigma1 (SIGMAR1, also known as sigma-1 receptor) is a unique ligand-regulated integral membrane scaffolding protein that contributes to cellular protein and lipid homeostasis. Previously, we demonstrated that some small-molecule modulators of Sigma1 alter endoplasmic reticulum (ER)-associated protein homeostasis pathways in cancer cells, including the unfolded protein response and autophagy. Programmed death-ligand 1 (PD-L1) is a type I integral membrane glycoprotein that is cotranslationally inserted into the ER and is processed and transported through the secretory pathway. Once at the surface of cancer cells, PD-L1 acts as a T-cell inhibitory checkpoint molecule and suppresses antitumor immunity. Here, we demonstrate that in Sigma1-expressing triple-negative breast and androgen-independent prostate cancer cells, PD-L1 protein levels were suppressed by RNAi knockdown of Sigma1 and by small-molecule inhibition of Sigma1. Sigma1-mediated action was confirmed by pharmacologic competition between Sigma1-selective inhibitor and activator ligands. When administered alone, the Sigma1 inhibitor decreased cell surface PD-L1 expression and suppressed functional interaction of PD-1 and PD-L1 in a coculture of T cells and cancer cells. Conversely, the Sigma1 activator increased PD-L1 cell surface expression, demonstrating the ability to positively and negatively modulate Sigma1 associated PD-L1 processing. We discovered that the Sigma1 inhibitor induced degradation of PD-L1 via autophagy, by a mechanism distinct from bulk macroautophagy or general ER stress-associated autophagy. Finally, the Sigma1 inhibitor suppressed IFNγ-induced PD-L1. Our data demonstrate that small-molecule Sigma1 modulators can be used to regulate PD-L1 in cancer cells and trigger its degradation by selective autophagy.Implications: Sigma1 modulators sequester and eliminate PD-L1 by autophagy, thus preventing functional PD-L1 expression at the cell surface. This posits Sigma1 modulators as novel therapeutic agents in PD-L1/PD-1 blockade strategies that regulate the tumor immune microenvironment.Visual Overview: http://mcr.aacrjournals.org/content/molcanres/16/2/243/F1.large.jpg Mol Cancer Res; 16(2); 243-55. ©2017 AACR.",2017-11-08 +27426920,Yeast membrane proteomics using leucine metabolic labelling: Bioinformatic data processing and exemplary application to the ER-intramembrane protease Ypf1.,"We describe in detail the usage of leucine metabolic labelling in yeast in order to monitor quantitative proteome alterations, e.g. upon removal of a protease. Since laboratory yeast strains are typically leucine auxotroph, metabolic labelling with trideuterated leucine (d3-leucine) is a straightforward, cost-effective, and ubiquitously applicable strategy for quantitative proteomic studies, similar to the widely used arginine/lysine metabolic labelling method for mammalian cells. We showcase the usage of advanced peptide quantification using the FeatureFinderMultiplex algorithm (part of the OpenMS software package) for robust and reliable quantification. Furthermore, we present an OpenMS bioinformatics data analysis workflow that combines accurate quantification with high proteome coverage. In order to enable visualization, peptide-mapping, and sharing of quantitative proteomic data, especially for membrane-spanning and cell-surface proteins, we further developed the web-application Proteator (http://proteator.appspot.com). Due to its simplicity and robustness, we expect metabolic leucine labelling in yeast to be of great interest to the research community. As an exemplary application, we show the identification of the copper transporter Ctr1 as a putative substrate of the ER-intramembrane protease Ypf1 by yeast membrane proteomics using d3-leucine isotopic labelling.",2016-07-15 +26543174,meRanTK: methylated RNA analysis ToolKit.,"

Unlabelled

The significance and function of posttranscriptional cytosine methylation in poly(A)RNA attracts great interest but is still poorly understood. High-throughput sequencing of RNA treated with bisulfite (RNA-BSseq) or subjected to enrichment techniques like Aza-IP or miCLIP enables transcriptome wide studies of this particular modification at single base pair resolution. However, to date, there are no specialized software tools available for the analysis of RNA-BSseq or Aza-IP data. Therefore, we developed meRanTK, the first publicly available tool kit which addresses the special demands of high-throughput RNA cytosine methylation data analysis. It provides fast and easy to use splice-aware bisulfite sequencing read mapping, comprehensive methylation calling and identification of differentially methylated cytosines by statistical analysis of single- and multi-replicate experiments. Application of meRanTK to RNA-BSseq or Aza-IP data produces accurate results in standard compliant formats.

Availability and implementation

meRanTK, source code and test data are released under the GNU GPLv3+ license and are available at http://icbi.at/software/meRanTK/ CONTACT: dietmar.rieder@i-med.ac.at.",2015-11-04 +24016071,CellMinerHCC: a microarray-based expression database for hepatocellular carcinoma cell lines.,"

Background & aims

Therapeutic options for hepatocellular carcinoma (HCC) still remain limited. Development of gene targeted therapies is a promising option. A better understanding of the underlying molecular biology is gained in in vitro experiments. However, even with targeted manipulation of gene expression varying treatment responses were observed in diverse HCC cell lines. Therefore, information on gene expression profiles of various HCC cell lines may be crucial to experimental designs. To generate a publicly available database containing microarray expression profiles of diverse HCC cell lines.

Methods

Microarray data were analyzed using an individually scripted R program package. Data were stored in a PostgreSQL database with a PHP written web interface. Evaluation and comparison of individual cell line expression profiles are supported via public web interface.

Results

This database allows evaluation of gene expression profiles of 18 HCC cell lines and comparison of differential gene expression between multiple cell lines. Analysis of commonly regulated genes for signaling pathway enrichment and interactions demonstrates a liver tumor phenotype with enrichment of major cancer related KEGG signatures like 'cancer' and 'inflammatory response'. Further molecular associations of strong scientific interest, e.g. 'lipid metabolism', were also identified.

Conclusions

We have generated CellMinerHCC (http://www.medicalgenomics.org/cellminerhcc), a publicly available database containing gene expression data of 18 HCC cell lines. This database will aid in the design of in vitro experiments in HCC research, because the genetic specificities of various HCC cell lines will be considered.",2013-09-09 +23662787,Automatic chemical structure annotation of an LC-MS(n) based metabolic profile from green tea.,"Liquid chromatography coupled with multistage accurate mass spectrometry (LC-MS(n)) can generate comprehensive spectral information of metabolites in crude extracts. To support structural characterization of the many metabolites present in such complex samples, we present a novel method ( http://www.emetabolomics.org/magma ) to automatically process and annotate the LC-MS(n) data sets on the basis of candidate molecules from chemical databases, such as PubChem or the Human Metabolite Database. Multistage MS(n) spectral data is automatically annotated with hierarchical trees of in silico generated substructures of candidate molecules to explain the observed fragment ions and alternative candidates are ranked on the basis of the calculated matching score. We tested this method on an untargeted LC-MS(n) (n ≤ 3) data set of a green tea extract, generated on an LC-LTQ/Orbitrap hybrid MS system. For the 623 spectral trees obtained in a single LC-MS(n) run, a total of 116,240 candidate molecules with monoisotopic masses matching within 5 ppm mass accuracy were retrieved from the PubChem database, ranging from 4 to 1327 candidates per molecular ion. The matching scores were used to rank the candidate molecules for each LC-MS(n) component. The median and third quartile fractional ranks for 85 previously identified tea compounds were 3.5 and 7.5, respectively. The substructure annotations and rankings provided detailed structural information of the detected components, beyond annotation with elemental formula only. Twenty-four additional components were putatively identified by expert interpretation of the automatically annotated data set, illustrating the potential to support systematic and untargeted metabolite identification.",2013-05-31 +27980629,Homozygosity disequilibrium and its gene regulation.,"Homozygosity disequilibrium (HD) describes a nonrandom pattern of sizable runs of homozygosity (ROH) that deviated from a random distribution of homozygotes and heterozygotes in the genome. In this study, we developed a double-weight local polynomial model for estimating homozygosity intensity. This new estimation method enables considering the local property and genetic information of homozygosity in the human genome when detecting regions of HD. By using this new method, we estimated whole-genome homozygosity intensities by analyzing real whole-genome sequencing data of 959 related individuals from 20 large pedigrees provided by Genetic Analysis Workshop 19 (GAW19). Through the analysis, we derived the distribution of HD in the human genome and provided evidence for the genetic component of natural variation in HD. Generalized estimating equation analysis for 855 related individuals was performed to identify regions of HD associated with diastolic blood pressure (DBP), systolic blood pressure, and hypertension (HTN), with concomitant adjustment for age and sex. We identified one DBP-associated and 2 HTN-associated regions of HD. We also studied the gene regulation of HD by analyzing the real whole-genome transcription data of 647 individuals. A set of gene expressions regulated by the DBP- and HTN-associated regions of HD was identified. Finally, we conducted simulation studies to evaluate the performance of our homozygosity association test. The results showed that the association test had a high power and that type 1 error was controlled. The methods have been integrated into our developed Loss-of-Heterozygosity Analysis Suite software, which can be downloaded at http://www.stat.sinica.edu.tw/hsinchou/genetics/loh/LOHAS.htm.",2016-10-18 +28394474,Proteomic identification of rainbow trout blood plasma proteins and their relationship to seminal plasma proteins. ,"The characterisation of fish blood proteomes is important for comparative studies of seminal and blood proteins as well as for the analysis of fish immune mechanisms and pathways. In this study, LC-MS/MS and 2D-DIGE were applied to compare rainbow trout seminal (SP) and blood plasma (BP) proteomes. The 54 differentially abundant proteins identified in SP are involved in a variety of signalling pathways, including protein ubiquitination, liver X receptor/retinoid X receptor (LXR/RXR) and farnesoid X receptor activation, cell cycle and acute phase signalling. These findings may indicate the prevalence of acute phase signalling pathways in trout SP, and its essential role in protecting spermatozoa and reproductive tissues. Our study provides the first in-depth analysis of the trout BP proteome, with a total of 119 proteins identified. The major proteins of rainbow trout BP were recognised as acute phase proteins. Analysis of BP proteins indicated that acute phase response signalling, the complement system, liver X receptor/retinoid X receptor and farnesoid X receptor activation and the coagulation system are the top canonical pathways. This study enhances knowledge of the blood origin of trout SP proteins and understanding of fish reproductive biology. Our results provide new insight into blood proteins specifically important for fish physiology and innate immunity. The mass spectrometry data are available via ProteomeXchange with the identifier PXD005988 and https://doi.org/10.6019/PXD005988.",2017-06-01 +23285197,An integrative approach to inferring gene regulatory module networks.,"

Background

Gene regulatory networks (GRNs) provide insight into the mechanisms of differential gene expression at a system level. However, the methods for inference, functional analysis and visualization of gene regulatory modules and GRNs require the user to collect heterogeneous data from many sources using numerous bioinformatics tools. This makes the analysis expensive and time-consuming.

Results

In this work, the BiologicalNetworks application-the data integration and network based research environment-was extended with tools for inference and analysis of gene regulatory modules and networks. The backend database of the application integrates public data on gene expression, pathways, transcription factor binding sites, gene and protein sequences, and functional annotations. Thus, all data essential for the gene regulation analysis can be mined publicly. In addition, the user's data can either be integrated in the database and become public, or kept private within the application. The capabilities to analyze multiple gene expression experiments are also provided.

Conclusion

The generated modular networks, regulatory modules and binding sites can be visualized and further analyzed within this same application. The developed tools were applied to the mouse model of asthma and the OCT4 regulatory network in embryonic stem cells. Developed methods and data are available through the Java application from BiologicalNetworks program at http://www.biologicalnetworks.org.",2012-12-20 +24297257,NECTAR: a database of codon-centric missense variant annotations.,"NECTAR (Non-synonymous Enriched Coding muTation ARchive; http://nectarmutation.org) is a database and web application to annotate disease-related and functionally important amino acids in human proteins. A number of tools are available to facilitate the interpretation of DNA variants identified in diagnostic or research sequencing. These typically identify previous reports of DNA variation at a given genomic location, predict its effects on transcript and protein sequence and may predict downstream functional consequences. Previous reports and functional annotations are typically linked by the genomic location of the variant observed. NECTAR collates disease-causing variants and functionally important amino acid residues from a number of sources. Importantly, rather than simply linking annotations by a shared genomic location, NECTAR annotates variants of interest with details of previously reported variation affecting the same codon. This provides a much richer data set for the interpretation of a novel DNA variant. NECTAR also identifies functionally equivalent amino acid residues in evolutionarily related proteins (paralogues) and, where appropriate, transfers annotations between them. As well as accessing these data through a web interface, users can upload batches of variants in variant call format (VCF) for annotation on-the-fly. The database is freely available to download from the ftp site: ftp://ftp.nectarmutation.org.",2013-12-01 +28934728,Has Toxicity Testing Moved into the 21st Century? A Survey and Analysis of Perceptions in the Field of Toxicology.,"

Background

Ten years ago, leaders in the field of toxicology called for a transformation of the discipline and a shift from primarily relying on traditional animal testing to incorporating advances in biotechnology and predictive methodologies into alternative testing strategies (ATS). Governmental agencies and academic and industry partners initiated programs to support such a transformation, but a decade later, the outcomes of these efforts are not well understood.

Objectives

We aimed to assess the use of ATS and the perceived barriers and drivers to their adoption by toxicologists and by others working in, or closely linked with, the field of toxicology.

Methods

We surveyed 1,381 toxicologists and experts in associated fields regarding the viability and use of ATS and the perceived barriers and drivers of ATS for a range of applications. We performed ranking, hierarchical clustering, and correlation analyses of the survey data.

Results

Many respondents indicated that they were already using ATS, or believed that ATS were already viable approaches, for toxicological assessment of one or more end points in their primary area of interest or concern (26-86%, depending on the specific ATS/application pair). However, the proportions of respondents reporting use of ATS in the previous 12 mo were smaller (4.5-41%). Concern about regulatory acceptance was the most commonly cited factor inhibiting the adoption of ATS, and a variety of technical concerns were also cited as significant barriers to ATS viability. The factors most often cited as playing a significant role (currently or in the future) in driving the adoption of ATS were the need for expedited toxicology information, the need for reduced toxicity testing costs, demand by regulatory agencies, and ethical or moral concerns.

Conclusions

Our findings indicate that the transformation of the field of toxicology is partly implemented, but significant barriers to acceptance and adoption remain. https://doi.org/10.1289/EHP1435.",2017-08-30 +25204235,"The 5th National Audit Project (NAP5) on accidental awareness during general anaesthesia: protocol, methods and analysis of data.","Accidental awareness during general anaesthesia with recall is a potentially distressing complication of general anaesthesia that can lead to psychological harm. The 5th National Audit Project was designed to investigate the reported incidence, predisposing factors, causality and impact of accidental awareness. A nationwide network of local co-ordinators across all UK and Irish public hospitals reported all new patient reports of accidental awareness to a central database, using a system of monthly anonymised reporting over a calendar year. The database collected the details of the reported event, anaesthetic and surgical technique, and any sequelae. These reports were categorised into main types by a multidisciplinary panel, using a formalised process of analysis. The main categories of accidental awareness were: certain or probable; possible; during sedation; on or from the intensive care unit; could not be determined; unlikely; drug errors; and statement only. The degree of evidence to support the categorisation was also defined for each report. Patient experience and sequelae were categorised using current tools or modifications of such. The 5th National Audit Project methodology may be used to assess new reports of accidental awareness during general anaesthesia in a standardised manner, especially for the development of an ongoing database of case reporting. This paper is a shortened version describing the protocols, methods and data analysis from 5th National Audit Project - the full report can be found at http://www.nationalauditprojects.org.uk/NAP5_home#pt.",2014-10-01 +28083826,"Development of a Reference Image Collection Library for Histopathology Image Processing, Analysis and Decision Support Systems Research.","Histopathology image processing, analysis and computer-aided diagnosis have been shown as effective assisting tools towards reliable and intra-/inter-observer invariant decisions in traditional pathology. Especially for cancer patients, decisions need to be as accurate as possible in order to increase the probability of optimal treatment planning. In this study, we propose a new image collection library (HICL-Histology Image Collection Library) comprising 3831 histological images of three different diseases, for fostering research in histopathology image processing, analysis and computer-aided diagnosis. Raw data comprised 93, 116 and 55 cases of brain, breast and laryngeal cancer respectively collected from the archives of the University Hospital of Patras, Greece. The 3831 images were generated from the most representative regions of the pathology, specified by an experienced histopathologist. The HICL Image Collection is free for access under an academic license at http://medisp.bme.teiath.gr/hicl/ . Potential exploitations of the proposed library may span over a board spectrum, such as in image processing to improve visualization, in segmentation for nuclei detection, in decision support systems for second opinion consultations, in statistical analysis for investigation of potential correlations between clinical annotations and imaging findings and, generally, in fostering research on histopathology image processing and analysis. To the best of our knowledge, the HICL constitutes the first attempt towards creation of a reference image collection library in the field of traditional histopathology, publicly and freely available to the scientific community.",2017-06-01 +23825557,Prioritization of Copy Number Variation Loci Associated with Autism from AutDB-An Integrative Multi-Study Genetic Database.,"Copy number variants (CNVs) are thought to play an important role in the predisposition to autism spectrum disorder (ASD). However, their relatively low frequency and widespread genomic distribution complicates their accurate characterization and utilization for clinical genetics purposes. Here we present a comprehensive analysis of multi-study, genome-wide CNV data from AutDB (http://mindspec.org/autdb.html), a genetic database that accommodates detailed annotations of published scientific reports of CNVs identified in ASD individuals. Overall, we evaluated 4,926 CNVs in 2,373 ASD subjects from 48 scientific reports, encompassing ∼2.12×10(9) bp of genomic data. Remarkable variation was seen in CNV size, with duplications being significantly larger than deletions, (P  =  3×10(-105); Wilcoxon rank sum test). Examination of the CNV burden across the genome revealed 11 loci with a significant excess of CNVs among ASD subjects (P<7×10(-7)). Altogether, these loci covered 15,610 kb of the genome and contained 166 genes. Remarkable variation was seen both in locus size (20 - 4950 kb), and gene content, with seven multigenic (≥3 genes) and four monogenic loci. CNV data from control populations was used to further refine the boundaries of these ASD susceptibility loci. Interestingly, our analysis indicates that 15q11.2-13.3, a genomic region prone to chromosomal rearrangements of various sizes, contains three distinct ASD susceptibility CNV loci that vary in their genomic boundaries, CNV types, inheritance patterns, and overlap with CNVs from control populations. In summary, our analysis of AutDB CNV data provides valuable insights into the genomic characteristics of ASD susceptibility CNV loci and could therefore be utilized in various clinical settings and facilitate future genetic research of this disorder.",2013-06-18 +27742821,HieranoiDB: a database of orthologs inferred by Hieranoid.,"HieranoiDB (http://hieranoiDB.sbc.su.se) is a freely available on-line database for hierarchical groups of orthologs inferred by the Hieranoid algorithm. It infers orthologs at each node in a species guide tree with the InParanoid algorithm as it progresses from the leaves to the root. Here we present a database HieranoiDB with a web interface that makes it easy to search and visualize the output of Hieranoid, and to download it in various formats. Searching can be performed using protein description, identifier or sequence. In this first version, orthologs are available for the 66 Quest for Orthologs reference proteomes. The ortholog trees are shown graphically and interactively with marked speciation and duplication nodes that show the inferred evolutionary scenario, and allow for correct extraction of predicted orthologs from the Hieranoid trees.",2016-10-13 +26139636,ERGC: an efficient referential genome compression algorithm.,"

Motivation

Genome sequencing has become faster and more affordable. Consequently, the number of available complete genomic sequences is increasing rapidly. As a result, the cost to store, process, analyze and transmit the data is becoming a bottleneck for research and future medical applications. So, the need for devising efficient data compression and data reduction techniques for biological sequencing data is growing by the day. Although there exists a number of standard data compression algorithms, they are not efficient in compressing biological data. These generic algorithms do not exploit some inherent properties of the sequencing data while compressing. To exploit statistical and information-theoretic properties of genomic sequences, we need specialized compression algorithms. Five different next-generation sequencing data compression problems have been identified and studied in the literature. We propose a novel algorithm for one of these problems known as reference-based genome compression.

Results

We have done extensive experiments using five real sequencing datasets. The results on real genomes show that our proposed algorithm is indeed competitive and performs better than the best known algorithms for this problem. It achieves compression ratios that are better than those of the currently best performing algorithms. The time to compress and decompress the whole genome is also very promising.

Availability and implementation

The implementations are freely available for non-commercial purposes. They can be downloaded from http://engr.uconn.edu/∼rajasek/ERGC.zip.

Contact

rajasek@engr.uconn.edu.",2015-07-02 +28858828,Arsenic and Obesity: A Comparison of Urine Dilution Adjustment Methods.,"

Introduction

A commonly used approach to adjust for urine dilution in analyses of biomarkers is to adjust for urinary creatinine. However, creatinine is a product of muscle mass and is therefore associated with body mass. In studies of urinary analytes and obesity or obesity-related outcomes, controlling for creatinine could induce collider stratification bias. We illustrate this phenomenon with an analysis of urinary arsenic.

Objective

We aimed to evaluate various approaches of adjustment for urinary dilution on the associations between urinary arsenic concentration and measures of obesity.

Methods

Using data from the National Health and Nutrition Examination Survey, we regressed body mass index (BMI) and waist-to-height ratios on urinary arsenic concentrations. We compared eight approaches to account for urine dilution, including standardization by urinary creatinine, osmolality, and flow rates, and inclusion of these metrics as independent covariates. We also used a recently proposed method known as covariate-adjusted standardization.

Results

Inverse associations between urinary arsenic concentration with BMI and waist-to-height ratio were observed when either creatinine or osmolality were used to standardize or as covariates. Not adjusting for dilution, standardizing or adjusting for urinary flow rate, and using covariate-adjusted standardization resulted in null associations observed between arsenic concentration in relation to BMI and waist-to-height ratio.

Conclusions

Our findings suggest that arsenic exposure is not associated with obesity, and that urinary creatinine and osmolality may be colliders on the causal pathway from arsenic exposure to obesity, as common descendants of hydration and body composition. In studies of urinary biomarkers and obesity or obesity-related outcomes, alternative metrics such as urinary flow rate or analytic strategies such as covariate-adjusted standardization should be considered. https://doi.org/10.1289/EHP1202.",2017-08-28 +28369161,Enhanced methods to detect haplotypic effects on gene expression.,"

Motivation

Expression quantitative trait loci (eQTLs), genetic variants associated with gene expression levels, are identified in eQTL mapping studies. Such studies typically test for an association between single nucleotide polymorphisms (SNPs) and expression under an additive model, which ignores interaction and haplotypic effects. Mismatches between the model tested and the underlying genetic architecture can lead to a loss of association power. Here we introduce a new haplotype-based test for eQTL studies that looks for haplotypic effects on expression levels. Our test is motivated by compound heterozygous architectures, a common disease model for recessive monogenic disorders, where two different alleles can have the same effect on a gene's function.

Results

When the underlying true causal architecture for a simulated gene is a compound heterozygote, our method is better able to capture the signal than the marginal SNP method. When the underlying model is a single SNP, there is no difference in the power of our method relative to the marginal SNP method. We apply our method to empirical gene expression data measured in 373 European individuals from the GEUVADIS study and find 29 more eGenes (genes with at least one association) than the standard marginal SNP method. Furthermore, in 974 of the 3529 total eGenes, our haplotype-based method results in a stronger association signal than the standard marginal SNP method. This demonstrates our method both increases power over the standard method and provides evidence of haplotypic architectures regulating gene expression.

Availability and implementation

http://bogdan.bioinformatics.ucla.edu/software/.

Contact

rob.brown@ucla.edu or pasaniuc@ucla.edu.",2017-08-01 +28655038,Kinematic Analysis of Speech Sound Sequencing Errors Induced by Delayed Auditory Feedback.,"

Purpose

Delayed auditory feedback (DAF) causes speakers to become disfluent and make phonological errors. Methods for assessing the kinematics of speech errors are lacking, with most DAF studies relying on auditory perceptual analyses, which may be problematic, as errors judged to be categorical may actually represent blends of sounds or articulatory errors.

Method

Eight typical speakers produced nonsense syllable sequences under normal and DAF (200 ms). Lip and tongue kinematics were captured with electromagnetic articulography. Time-locked acoustic recordings were transcribed, and the kinematics of utterances with and without perceived errors were analyzed with existing and novel quantitative methods.

Results

New multivariate measures showed that for 5 participants, kinematic variability for productions perceived to be error free was significantly increased under delay; these results were validated by using the spatiotemporal index measure. Analysis of error trials revealed both typical productions of a nontarget syllable and productions with articulatory kinematics that incorporated aspects of both the target and the perceived utterance.

Conclusions

This study is among the first to characterize articulatory changes under DAF and provides evidence for different classes of speech errors, which may not be perceptually salient. New methods were developed that may aid visualization and analysis of large kinematic data sets.

Supplemental material

https://doi.org/10.23641/asha.5103067.",2017-06-01 +28200120,nala: text mining natural language mutation mentions.,"

Motivation

The extraction of sequence variants from the literature remains an important task. Existing methods primarily target standard (ST) mutation mentions (e.g. 'E6V'), leaving relevant mentions natural language (NL) largely untapped (e.g. 'glutamic acid was substituted by valine at residue 6').

Results

We introduced three new corpora suggesting named-entity recognition (NER) to be more challenging than anticipated: 28-77% of all articles contained mentions only available in NL. Our new method nala captured NL and ST by combining conditional random fields with word embedding features learned unsupervised from the entire PubMed. In our hands, nala substantially outperformed the state-of-the-art. For instance, we compared all unique mentions in new discoveries correctly detected by any of three methods (SETH, tmVar, or nala ). Neither SETH nor tmVar discovered anything missed by nala , while nala uniquely tagged 33% mentions. For NL mentions the corresponding value shot up to 100% nala -only.

Availability and implementation

Source code, API and corpora freely available at: http://tagtog.net/-corpora/IDP4+ .

Contact

nala@rostlab.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +24148649,snOPY: a small nucleolar RNA orthological gene database.,"

Background

Small nucleolar RNAs (snoRNAs) are a class of non-coding RNAs that guide the modification of specific nucleotides in ribosomal RNAs (rRNAs) and small nuclear RNAs (snRNAs). Although most non-coding RNAs undergo post-transcriptional modifications prior to maturation, the functional significance of these modifications remains unknown. Here, we introduce the snoRNA orthological gene database (snOPY) as a tool for studying RNA modifications.

Findings

snOPY provides comprehensive information about snoRNAs, snoRNA gene loci, and target RNAs. It also contains data for orthologues from various species, which enables users to analyze the evolution of snoRNA genes. In total, 13,770 snoRNA genes, 10,345 snoRNA gene loci, and 133 target RNAs have been registered. Users can search and access the data efficiently using a simple web interface with a series of internal links. snOPY is freely available on the web at http://snoopy.med.miyazaki-u.ac.jp.

Conclusions

snOPY is the database that provides information about the small nucleolar RNAs and their orthologues. It will help users to study RNA modifications and snoRNA gene evolution.",2013-10-23 +26395772,APTANI: a computational tool to select aptamers through sequence-structure motif analysis of HT-SELEX data.,"

Motivation

Aptamers are synthetic nucleic acid molecules that can bind biological targets in virtue of both their sequence and three-dimensional structure. Aptamers are selected using SELEX, Systematic Evolution of Ligands by EXponential enrichment, a technique that exploits aptamer-target binding affinity. The SELEX procedure, coupled with high-throughput sequencing (HT-SELEX), creates billions of random sequences capable of binding different epitopes on specific targets. Since this technique produces enormous amounts of data, computational analysis represents a critical step to screen and select the most biologically relevant sequences.

Results

Here, we present APTANI, a computational tool to identify target-specific aptamers from HT-SELEX data and secondary structure information. APTANI builds on AptaMotif algorithm, originally implemented to analyze SELEX data; extends the applicability of AptaMotif to HT-SELEX data and introduces new functionalities, as the possibility to identify binding motifs, to cluster aptamer families or to compare output results from different HT-SELEX cycles. Tabular and graphical representations facilitate the downstream biological interpretation of results.

Availability and implementation

APTANI is available at http://aptani.unimore.it.

Contact

silvio.bicciato@unimore.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-22 +23951226,Mutationmapper: a tool to aid the mapping of protein mutation data.,"There has been a rapid increase in the amount of mutational data due to, amongst other things, an increase in single nucleotide polymorphism (SNP) data and the use of site-directed mutagenesis as a tool to help dissect out functional properties of proteins. Many manually curated databases have been developed to index point mutations but they are not sustainable with the ever-increasing volume of scientific literature. There have been considerable efforts in the automatic extraction of mutation specific information from raw text involving use of various text-mining approaches. However, one of the key problems is to link these mutations with its associated protein and to present this data in such a way that researchers can immediately contextualize it within a structurally related family of proteins. To aid this process, we have developed an application called MutationMapper. Point mutations are extracted from abstracts and are validated against protein sequences in Uniprot as far as possible. Our methodology differs in a fundamental way from the usual text-mining approach. Rather than start with abstracts, we start with protein sequences, which facilitates greatly the process of validating a potential point mutation identified in an abstract. The results are displayed as mutations mapped on to the protein sequence or a multiple sequence alignment. The latter enables one to readily pick up mutations performed at equivalent positions in related proteins. We demonstrate the use of MutationMapper against several examples including a single sequence and multiple sequence alignments. The application is available as a web-service at http://mutationmapper.bioch.ox.ac.uk.",2013-08-09 +24007337,GRASP [Genomic Resource Access for Stoichioproteomics]: comparative explorations of the atomic content of 12 Drosophila proteomes.,"

Background

""Stoichioproteomics"" relates the elemental composition of proteins and proteomes to variation in the physiological and ecological environment. To help harness and explore the wealth of hypotheses made possible under this framework, we introduce GRASP (http://www.graspdb.net), a public bioinformatic knowledgebase containing information on the frequencies of 20 amino acids and atomic composition of their side chains. GRASP integrates comparative protein composition data with annotation data from multiple public databases. Currently, GRASP includes information on proteins of 12 sequenced Drosophila (fruit fly) proteomes, which will be expanded to include increasingly diverse organisms over time. In this paper we illustrate the potential of GRASP for testing stoichioproteomic hypotheses by conducting an exploratory investigation into the composition of 12 Drosophila proteomes, testing the prediction that protein atomic content is associated with species ecology and with protein expression levels.

Results

Elements varied predictably along multivariate axes. Species were broadly similar, with the D. willistoni proteome a clear outlier. As expected, individual protein atomic content within proteomes was influenced by protein function and amino acid biochemistry. Evolution in elemental composition across the phylogeny followed less predictable patterns, but was associated with broad ecological variation in diet. Using expression data available for D. melanogaster, we found evidence consistent with selection for efficient usage of elements within the proteome: as expected, nitrogen content was reduced in highly expressed proteins in most tissues, most strongly in the gut, where nutrients are assimilated, and least strongly in the germline.

Conclusions

The patterns identified here using GRASP provide a foundation on which to base future research into the evolution of atomic composition in Drosophila and other taxa.",2013-09-04 +28349405,Quantitative Modeling and Automated Analysis of Meiotic Recombination.,"Many morphological features, in both physical and biological systems, exhibit spatial patterns that are specifically characterized by a tendency to occur with even spacing (in one, two, or three dimensions). The positions of crossover (CO) recombination events along meiotic chromosomes provide an interesting biological example of such an effect. In general, mechanisms that explain such patterns may (a) be mechanically based, (b) occur by a reaction-diffusion mechanism in which macroscopic mechanical effects are irrelevant, or (c) involve a combination of both types of effects. We have proposed that meiotic CO patterns arise by a mechanical mechanism, have developed mathematical expressions for such a process based on a particular physical system with analogous properties (the so-called beam-film model), and have shown that the beam-film model can very accurately explain experimental CO patterns as a function of the values of specific defined parameters. Importantly, the mathematical expressions of the beam-film model can apply quite generally to any mechanism, whether it involves mechanical components or not, as long as its logic and component features correspond to those of the beam-film system. Furthermore, via its various parameters, the beam-film model discretizes the patterning process into specific components. Thus, the model can be used to explore the theoretically predicted effects of various types of changes in the patterning process. Such predictions can expand detailed understanding of the bases for various biological effects. We present here a new MATLAB program that implements the mathematical expressions of the beam-film model with increased robustness and accessibility as compared to programs presented previously. As in previous versions, the presented program permits both (1) simulation of predicted CO positions along chromosomes of a test population and (2) easy analysis of CO positions, both for experimental data sets and for data sets resulting from simulations. The goal of the current presentation is to make these approaches more readily accessible to a wider audience of researchers. Also, the program is easily modified, and we encourage interested users to make changes to suit their specific needs. A link to the program is available on the Kleckner laboratory website: http://projects.iq.harvard.edu/kleckner_lab .",2017-01-01 +24045775,Pattern search in BioPAX models.,"

Motivation

BioPAX is a standard language for representing complex cellular processes, including metabolic networks, signal transduction and gene regulation. Owing to the inherent complexity of a BioPAX model, searching for a specific type of subnetwork can be non-trivial and difficult.

Results

We developed an open source and extensible framework for defining and searching graph patterns in BioPAX models. We demonstrate its use with a sample pattern that captures directed signaling relations between proteins. We provide search results for the pattern obtained from the Pathway Commons database and compare these results with the current data in signaling databases SPIKE and SignaLink. Results show that a pattern search in public pathway data can identify a substantial amount of signaling relations that do not exist in signaling databases.

Availability

BioPAX-pattern software was developed in Java. Source code and documentation is freely available at http://code.google.com/p/biopax-pattern under Lesser GNU Public License.",2013-09-16 +27742405,Having a BLAST: Searchable transcriptome resources for the gilthead sea bream and the European sea bass.,"The gilthead sea bream (Sparus aurata) and the European sea bass (Dicentrarchus labrax) are the most important aquaculture species in the Mediterranean Sea and since the last decade it has been seen an exponential increase in their available molecular resources. In order to improve accessibility to transcriptome resources, Expressed Sequence Tags (ESTs), mRNA sequences and raw read sequences were assembled and deposited in BLAST queryable databases. The publicly available sea bream and sea bass sequences (6.4 and 247.5 million) generated 45,094 and 68,117 assembled sequences, with, respectively, arithmetic mean size of 998 and 2125bp and N50 of 1302 and 2966bp. The assemblies will be regularly updated and new analytical tools added to the web server at http://sea.ccmar.ualg.pt.",2016-10-11 +27725713,Systems-level analysis reveals selective regulation of Aqp2 gene expression by vasopressin.,"Vasopressin-mediated regulation of renal water excretion is defective in a variety of water balance disorders in humans. It occurs in part through long-term mechanisms that regulate the abundance of the aquaporin-2 water channel in renal collecting duct cells. Here, we use deep DNA sequencing in mouse collecting duct cells to ask whether vasopressin signaling selectively increases Aqp2 gene transcription or whether it triggers a broadly targeted transcriptional network. ChIP-Seq quantification of binding sites for RNA polymerase II was combined with RNA-Seq quantification of transcript abundances to identify genes whose transcription is regulated by vasopressin. (View curated dataset at https://helixweb.nih.gov/ESBL/Database/Vasopressin/). The analysis revealed only 35 vasopressin-regulated genes (of 3659) including Aqp2. Increases in RNA polymerase II binding and mRNA abundances for Aqp2 far outstripped corresponding measurements for all other genes, consistent with the conclusion that vasopressin-mediated transcriptional regulation is highly selective for Aqp2. Despite the overall selectivity of the net transcriptional response, vasopressin treatment was associated with increased RNA polymerase II binding to the promoter proximal region of a majority of expressed genes, suggesting a nearly global positive regulation of transcriptional initiation with transcriptional pausing. Thus, the overall net selectivity appears to be a result of selective control of transcriptional elongation.",2016-10-11 +28437450,ESPRIT-Forest: Parallel clustering of massive amplicon sequence data in subquadratic time.,"The rapid development of sequencing technology has led to an explosive accumulation of genomic sequence data. Clustering is often the first step to perform in sequence analysis, and hierarchical clustering is one of the most commonly used approaches for this purpose. However, it is currently computationally expensive to perform hierarchical clustering of extremely large sequence datasets due to its quadratic time and space complexities. In this paper we developed a new algorithm called ESPRIT-Forest for parallel hierarchical clustering of sequences. The algorithm achieves subquadratic time and space complexity and maintains a high clustering accuracy comparable to the standard method. The basic idea is to organize sequences into a pseudo-metric based partitioning tree for sub-linear time searching of nearest neighbors, and then use a new multiple-pair merging criterion to construct clusters in parallel using multiple threads. The new algorithm was tested on the human microbiome project (HMP) dataset, currently one of the largest published microbial 16S rRNA sequence dataset. Our experiment demonstrated that with the power of parallel computing it is now compu- tationally feasible to perform hierarchical clustering analysis of tens of millions of sequences. The software is available at http://www.acsu.buffalo.edu/∼yijunsun/lab/ESPRIT-Forest.html.",2017-04-24 +28724145,Tense Marking in the English Narrative Retells of Dual Language Preschoolers.,"

Purpose

This longitudinal study investigated the emergence of English tense marking in young (Spanish-English) dual language learners (DLLs) over 4 consecutive academic semesters, addressing the need for longitudinal data on typical acquisition trajectories of English in DLL preschoolers.

Method

Language sample analysis was conducted on 139 English narrative retells elicited from 39 preschool-age (Spanish-English) DLLs (range = 39-65 months). Growth curve models captured within- and between-individual change in tense-marking accuracy over time. Tense-marking accuracy was indexed by the finite verb morphology composite and by 2 specifically developed adaptations. Individual tense markers were systematically described in terms of overall accuracy and specific error patterns.

Results

Tense-marking accuracy exhibited significant growth over time for each composite. Initially, irregular past-tense accuracy was higher than regular past-tense accuracy; over time, however, regular past-tense marking outpaced accuracy on irregular verbs.

Conclusions

These findings suggest that young DLLs can achieve high tense-marking accuracy assuming 2 years of immersive exposure to English. Monitoring the growth in tense-marking accuracy over time and considering productive tense-marking errors as partially correct more precisely captured the emergence of English tense marking in this population with highly variable expressive language skills.

Supplemental materials

https://doi.org/10.23641/asha.5176942.",2017-07-01 +26639183,Modeling X Chromosome Data Using Random Forests: Conquering Sex Bias.,"Machine learning methods, including Random Forests (RF), are increasingly used for genetic data analysis. However, the standard RF algorithm does not correctly model the effects of X chromosome single nucleotide polymorphisms (SNPs), leading to biased estimates of variable importance. We propose extensions of RF to correctly model X SNPs, including a stratified approach and an approach based on the process of X chromosome inactivation. We applied the new and standard RF approaches to case-control alcohol dependence data from the Study of Addiction: Genes and Environment (SAGE), and compared the performance of the alternative approaches via a simulation study. Standard RF applied to a case-control study of alcohol dependence yielded inflated variable importance estimates for X SNPs, even when sex was included as a variable, but the results of the new RF methods were consistent with univariate regression-based approaches that correctly model X chromosome data. Simulations showed that the new RF methods eliminate the bias in standard RF variable importance for X SNPs when sex is associated with the trait, and are able to detect causal autosomal and X SNPs. Even in the absence of sex effects, the new extensions perform similarly to standard RF. Thus, we provide a powerful multimarker approach for genetic analysis that accommodates X chromosome data in an unbiased way. This method is implemented in the freely available R package ""snpRF"" (http://www.cran.r-project.org/web/packages/snpRF/).",2015-12-07 +26122086,A user-friendly workflow for analysis of Illumina gene expression bead array data available at the arrayanalysis.org portal.,"

Background

Illumina whole-genome expression bead arrays are a widely used platform for transcriptomics. Most of the tools available for the analysis of the resulting data are not easily applicable by less experienced users. ArrayAnalysis.org provides researchers with an easy-to-use and comprehensive interface to the functionality of R and Bioconductor packages for microarray data analysis. As a modular open source project, it allows developers to contribute modules that provide support for additional types of data or extend workflows.

Results

To enable data analysis of Illumina bead arrays for a broad user community, we have developed a module for ArrayAnalysis.org that provides a free and user-friendly web interface for quality control and pre-processing for these arrays. This module can be used together with existing modules for statistical and pathway analysis to provide a full workflow for Illumina gene expression data analysis. The module accepts data exported from Illumina's GenomeStudio, and provides the user with quality control plots and normalized data. The outputs are directly linked to the existing statistics module of ArrayAnalysis.org, but can also be downloaded for further downstream analysis in third-party tools.

Conclusions

The Illumina bead arrays analysis module is available at http://www.arrayanalysis.org . A user guide, a tutorial demonstrating the analysis of an example dataset, and R scripts are available. The module can be used as a starting point for statistical evaluation and pathway analysis provided on the website or to generate processed input data for a broad range of applications in life sciences research.",2015-06-30 +27411809,High type I error and misrepresentations in search for transgenerational epigenetic inheritance: response to Guerrero-Bosagna.,"In a recent paper, we described our efforts in search for evidence supporting epigenetic transgenerational inheritance caused by endocrine disrupter chemicals. One aspect of our study was to compare genome-wide DNA methylation changes in the vinclozolin-exposed fetal male germ cells (n = 3) to control samples (n = 3), their counterparts in the next, unexposed, generation (n = 3 + 3) and also in adult spermatozoa (n = 2 + 2) in both generations. We reported finding zero common hits in the intersection of these four comparisons. In our interpretation, this result did not support the notion that DNA methylation provides a mechanism for a vinclozolin-induced transgenerational male infertility phenotype. In response to criticism by Guerrero-Bosagna regarding our statistical power in the above study, here we provide power calculations to clarify the statistical power of our study and to show the validity of our conclusions. We also explain here how our data is misinterpreted in the commentary by Guerrero-Bosagna by leaving out important data points from consideration.Please see related Correspondence article: xxx (13059_2016_982) and related Research article: http://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0619-z.",2016-07-12 +27721396,GFinisher: a new strategy to refine and finish bacterial genome assemblies.,"Despite the development in DNA sequencing technology, improving the number and the length of reads, the process of reconstruction of complete genome sequences, the so called genome assembly, is still complex. Only 13% of the prokaryotic genome sequencing projects have been completed. Draft genome sequences deposited in public databases are fragmented in contigs and may lack the full gene complement. The aim of the present work is to identify assembly errors and improve the assembly process of bacterial genomes. The biological patterns observed in genomic sequences and the application of a priori information can allow the identification of misassembled regions, and the reorganization and improvement of the overall de novo genome assembly. GFinisher starts generating a Fuzzy GC skew graphs for each contig in an assembly and follows breaking down the contigs in critical points in order to reassemble and close them using jFGap. This has been successfully applied to dataset from 96 genome assemblies, decreasing the number of contigs by up to 86%. GFinisher can easily optimize assemblies of prokaryotic draft genomes and can be used to improve the assembly programs based on nucleotide sequence patterns in the genome. The software and source code are available at http://gfinisher.sourceforge.net/.",2016-10-10 +27723837,RNAMethPre: A Web Server for the Prediction and Query of mRNA m6A Sites.,"N6-Methyladenosine (m6A) is the most common mRNA modification; it occurs in a wide range of taxon and is associated with many key biological processes. High-throughput experiments have identified m6A-peaks and sites across the transcriptome, but studies of m6A sites at the transcriptome-wide scale are limited to a few species and tissue types. Therefore, the computational prediction of mRNA m6A sites has become an important strategy. In this study, we integrated multiple features of mRNA (flanking sequences, local secondary structure information, and relative position information) and trained a SVM classifier to predict m6A sites in mammalian mRNA sequences. Our method achieves ideal performance in both cross-validation tests and rigorous independent dataset tests. The server also provides a comprehensive database of predicted transcriptome-wide m6A sites and curated m6A-seq peaks from the literature for both human and mouse, and these can be queried and visualized in a genome browser. The RNAMethPre web server provides a user-friendly tool for the prediction and query of mRNA m6A sites, which is freely accessible for public use at http://bioinfo.tsinghua.edu.cn/RNAMethPre/index.html.",2016-10-10 +29339430,Simultaneous Binding of Multiple EF-Tu Copies to Translating Ribosomes in Live Escherichia coli. ,"In bacteria, elongation factor Tu is a translational cofactor that forms ternary complexes with aminoacyl-tRNA (aa-tRNA) and GTP. Binding of a ternary complex to one of four flexible L7/L12 units on the ribosome tethers a charged tRNA in close proximity to the ribosomal A site. Two sequential tests for a match between the aa-tRNA anticodon and the current mRNA codon then follow. Because one elongation cycle can occur in as little as 50 ms and the vast majority of aa-tRNA copies are not cognate with the current mRNA codon, this testing must occur rapidly. We present a single-molecule localization and tracking study of fluorescently labeled EF-Tu in live Escherichia coli Imaging at 2 ms/frame distinguishes 60% slowly diffusing EF-Tu copies (assigned as transiently bound to translating ribosome) from 40% rapidly diffusing copies (assigned as a mixture of free ternary complexes and free EF-Tu). Combining these percentages with copy number estimates, we infer that the four L7/L12 sites are essentially saturated with ternary complexes in vivo. The results corroborate an earlier inference that all four sites can simultaneously tether ternary complexes near the A site, creating a high local concentration that may greatly enhance the rate of testing of aa-tRNAs. Our data and a combinatorial argument both suggest that the initial recognition test for a codon-anticodon match occurs in less than 1 to 2 ms per aa-tRNA copy. The results refute a recent study (A. Plochowietz, I. Farrell, Z. Smilansky, B. S. Cooperman, and A. N. Kapanidis, Nucleic Acids Res 45:926-937, 2016, https://doi.org/10.1093/nar/gkw787) of tRNA diffusion in E. coli that inferred that aa-tRNAs arrive at the ribosomal A site as bare monomers, not as ternary complexes.IMPORTANCE Ribosomes catalyze translation of the mRNA codon sequence into the corresponding sequence of amino acids within the nascent polypeptide chain. Polypeptide elongation can be as fast as 50 ms per added amino acid. Each amino acid arrives at the ribosome as a ternary complex comprising an aminoacyl-tRNA (aa-tRNA), an elongation factor called EF-Tu, and GTP. There are 43 different aa-tRNAs in use, only one of which typically matches the current mRNA codon. Thus, ternary complexes must be tested very rapidly. Here we use fluorescence-based single-molecule methods that locate and track single EF-Tu copies in E. coli Fast and slow diffusive behavior determines the fraction of EF-Tu copies that are ribosome bound. We infer simultaneous tethering of ~4 ternary complexes to the ribosome, which may facilitate rapid initial testing for codon matching on a time scale of less than 1 to 2 ms per aa-tRNA.",2018-01-16 +28881997,"Predicting phenotypes from microarrays using amplified, initially marginal, eigenvector regression.","

Motivation

The discovery of relationships between gene expression measurements and phenotypic responses is hampered by both computational and statistical impediments. Conventional statistical methods are less than ideal because they either fail to select relevant genes, predict poorly, ignore the unknown interaction structure between genes, or are computationally intractable. Thus, the creation of new methods which can handle many expression measurements on relatively small numbers of patients while also uncovering gene-gene relationships and predicting well is desirable.

Results

We develop a new technique for using the marginal relationship between gene expression measurements and patient survival outcomes to identify a small subset of genes which appear highly relevant for predicting survival, produce a low-dimensional embedding based on this small subset, and amplify this embedding with information from the remaining genes. We motivate our methodology by using gene expression measurements to predict survival time for patients with diffuse large B-cell lymphoma, illustrate the behavior of our methodology on carefully constructed synthetic examples, and test it on a number of other gene expression datasets. Our technique is computationally tractable, generally outperforms other methods, is extensible to other phenotypes, and also identifies different genes (relative to existing methods) for possible future study.

Availability and implementation

All of the code and data are available at http://mypage.iu.edu/∼dajmcdon/research/ .

Contact

dajmcdon@indiana.edu.

Supplementary information

Supplementary material is available at Bioinformatics online.",2017-07-01 +24912662,A flexible simulation platform to quantify and manage emergency department crowding.,"

Background

Hospital-based Emergency Departments are struggling to provide timely care to a steadily increasing number of unscheduled ED visits. Dwindling compensation and rising ED closures dictate that meeting this challenge demands greater operational efficiency.

Methods

Using techniques from operations research theory, as well as a novel event-driven algorithm for processing priority queues, we developed a flexible simulation platform for hospital-based EDs. We tuned the parameters of the system to mimic U.S. nationally average and average academic hospital-based ED performance metrics and are able to assess a variety of patient flow outcomes including patient door-to-event times, propensity to leave without being seen, ED occupancy level, and dynamic staffing and resource use.

Results

The causes of ED crowding are variable and require site-specific solutions. For example, in a nationally average ED environment, provider availability is a surprising, but persistent bottleneck in patient flow. As a result, resources expended in reducing boarding times may not have the expected impact on patient throughput. On the other hand, reallocating resources into alternate care pathways can dramatically expedite care for lower acuity patients without delaying care for higher acuity patients. In an average academic ED environment, bed availability is the primary bottleneck in patient flow. Consequently, adjustments to provider scheduling have a limited effect on the timeliness of care delivery, while shorter boarding times significantly reduce crowding. An online version of the simulation platform is available at http://spark.rstudio.com/klopiano/EDsimulation/.

Conclusion

In building this robust simulation framework, we have created a novel decision-support tool that ED and hospital managers can use to quantify the impact of proposed changes to patient flow prior to implementation.",2014-06-09 +24881812,"ITS2, 18S, 16S or any other RNA - simply aligning sequences and their individual secondary structures simultaneously by an automatic approach.","Secondary structures of RNA sequences are increasingly being used as additional information in reconstructing phylogenies and/or in distinguishing species by compensatory base change (CBC) analyses. However, in most cases just one secondary structure is used in manually correcting an automatically generated multiple sequence alignment and/or just one secondary structure is used in guiding a sequence alignment still completely generated by hand. With the advent of databases and tools offering individual RNA secondary structures, here we re-introduce a twelve letter code already implemented in 4SALE - a tool for synchronous sequence and secondary structure alignment and editing - that enables one to align RNA sequences and their individual secondary structures synchronously and fully automatic, while dramatically increasing the phylogenetic information content. We further introduce a scaled down non-GUI version of 4SALE particularly designed for big data analysis, and available at: http://4sale.bioapps.biozentrum.uni-wuerzburg.de.",2014-06-02 +28611620,ATPP: A Pipeline for Automatic Tractography-Based Brain Parcellation.,"There is a longstanding effort to parcellate brain into areas based on micro-structural, macro-structural, or connectional features, forming various brain atlases. Among them, connectivity-based parcellation gains much emphasis, especially with the considerable progress of multimodal magnetic resonance imaging in the past two decades. The Brainnetome Atlas published recently is such an atlas that follows the framework of connectivity-based parcellation. However, in the construction of the atlas, the deluge of high resolution multimodal MRI data and time-consuming computation poses challenges and there is still short of publically available tools dedicated to parcellation. In this paper, we present an integrated open source pipeline (https://www.nitrc.org/projects/atpp), named Automatic Tractography-based Parcellation Pipeline (ATPP) to realize the framework of parcellation with automatic processing and massive parallel computing. ATPP is developed to have a powerful and flexible command line version, taking multiple regions of interest as input, as well as a user-friendly graphical user interface version for parcellating single region of interest. We demonstrate the two versions by parcellating two brain regions, left precentral gyrus and middle frontal gyrus, on two independent datasets. In addition, ATPP has been successfully utilized and fully validated in a variety of brain regions and the human Brainnetome Atlas, showing the capacity to greatly facilitate brain parcellation.",2017-05-29 +23072312,Gene Fusion Markup Language: a prototype for exchanging gene fusion data.,"

Background

An avalanche of next generation sequencing (NGS) studies has generated an unprecedented amount of genomic structural variation data. These studies have also identified many novel gene fusion candidates with more detailed resolution than previously achieved. However, in the excitement and necessity of publishing the observations from this recently developed cutting-edge technology, no community standardization approach has arisen to organize and represent the data with the essential attributes in an interchangeable manner. As transcriptome studies have been widely used for gene fusion discoveries, the current non-standard mode of data representation could potentially impede data accessibility, critical analyses, and further discoveries in the near future.

Results

Here we propose a prototype, Gene Fusion Markup Language (GFML) as an initiative to provide a standard format for organizing and representing the significant features of gene fusion data. GFML will offer the advantage of representing the data in a machine-readable format to enable data exchange, automated analysis interpretation, and independent verification. As this database-independent exchange initiative evolves it will further facilitate the formation of related databases, repositories, and analysis tools. The GFML prototype is made available at http://code.google.com/p/gfml-prototype/.

Conclusion

The Gene Fusion Markup Language (GFML) presented here could facilitate the development of a standard format for organizing, integrating and representing the significant features of gene fusion data in an inter-operable and query-able fashion that will enable biologically intuitive access to gene fusion findings and expedite functional characterization. A similar model is envisaged for other NGS data analyses.",2012-10-16 +28077570,KTCNlncDB-a first platform to investigate lncRNAs expressed in human keratoconus and non-keratoconus corneas. ,"Keratoconus (KTCN, OMIM 148300) is a degenerative eye disorder characterized by progressive stromal thinning that leads to a conical shape of the cornea, resulting in optical aberrations and even loss of visual function. The biochemical background of the disease is poorly understood, which motivated us to perform RNA-Seq experiment, aimed at better characterizing the KTCN transcriptome and identification of long non-coding RNAs (lncRNAs) that might be involved in KTCN etiology. The in silico functional studies based on predicted lncRNA:RNA base-pairings led us to recognition of a number of lncRNAs possibly regulating genes with known or plausible links to KTCN. The lncRNA sequences and data regarding their predicted functions in controlling the RNA processing and stability are available for browse, search and download in KTCNlncDB (http://rhesus.amu.edu.pl/KTCNlncDB/), the first online platform devoted to KTCN transcriptome.Database URL: http://rhesus.amu.edu.pl/KTCNlncDB/.",2017-01-10 +28934095,Lifelong Residential Exposure to Green Space and Attention: A Population-based Prospective Study.,"

Background

Natural environments, including green spaces, may have beneficial impacts on brain development. However, longitudinal evidence of an association between long-term exposure to green spaces and cognitive development (including attention) in children is limited.

Objectives

We evaluated the association between lifelong residential exposure to green space and attention during preschool and early primary school years.

Methods

This longitudinal study was based on data from two well-established population-based birth cohorts in Spain. We assessed lifelong exposure to residential surrounding greenness and tree cover as the average of satellite-based normalized difference vegetation index and vegetation continuous fields, respectively, surrounding the child's residential addresses at birth, 4-5 y, and 7 y. Attention was characterized using two computer-based tests: Conners' Kiddie Continuous Performance Test (K-CPT) at 4-5 y (n=888) and Attentional Network Task (ANT) at 7 y (n=987). We used adjusted mixed effects models with cohort random effects to estimate associations between exposure to greenness and attention at ages 4-5 and 7 y.

Results

Higher lifelong residential surrounding greenness was associated with fewer K-CPT omission errors and lower K-CPT hit reaction time-standard error (HRT-SE) at 4-5 y and lower ANT HRT-SE at 7 y, consistent with better attention. This exposure was not associated with K-CPT commission errors or with ANT omission or commission errors. Associations with residential surrounding tree cover also were close to the null, or were negative (for ANT HRT-SE) but not statistically significant.

Conclusion

Exposure to residential surrounding greenness was associated with better scores on tests of attention at 4-5 y and 7 y of age in our longitudinal cohort. https://doi.org/10.1289/EHP694.",2017-09-18 +24078711,GPCR ontology: development and application of a G protein-coupled receptor pharmacology knowledge framework.,"

Motivation

Novel tools need to be developed to help scientists analyze large amounts of available screening data with the goal to identify entry points for the development of novel chemical probes and drugs. As the largest class of drug targets, G protein-coupled receptors (GPCRs) remain of particular interest and are pursued by numerous academic and industrial research projects.

Results

We report the first GPCR ontology to facilitate integration and aggregation of GPCR-targeting drugs and demonstrate its application to classify and analyze a large subset of the PubChem database. The GPCR ontology, based on previously reported BioAssay Ontology, depicts available pharmacological, biochemical and physiological profiles of GPCRs and their ligands. The novelty of the GPCR ontology lies in the use of diverse experimental datasets linked by a model to formally define these concepts. Using a reasoning system, GPCR ontology offers potential for knowledge-based classification of individuals (such as small molecules) as a function of the data.

Availability

The GPCR ontology is available at http://www.bioassayontology.org/bao_gpcr and the National Center for Biomedical Ontologies Web site.",2013-09-29 +27797779,JADOPPT: java based AutoDock preparing and processing tool.,"

Motivation

AutoDock is a very popular software package for docking and virtual screening. However, currently it is hard work to visualize more than one result from the virtual screening at a time. To overcome this limitation we have designed JADOPPT, a tool for automatically preparing and processing multiple ligand-protein docked poses obtained from AutoDock. It allows the simultaneous visual assessment and comparison of multiple poses through clustering methods. Moreover, it permits the representation of reference ligands with known binding modes, binding site residues, highly scoring regions for the ligand, and the calculated binding energy of the best ranked results.

Availability and implementation

JADOPPT, supplementary material (Case Studies 1 and 2) and video tutorials are available at http://visualanalytics.land/cgarcia/JADOPPT.html.

Contacts

carlosgarcia@usal.es or pelaez@usal.es.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +27727092,[Deceased donation in renal transplantation].,"

Objectives

To review epidemiologic data's and medical results of deceased donation in renal transplantation.

Material and methods

Relevant publications were identified through Medline (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) database using the following keywords, alone or in association, ""brain death; cardiac arrest; deceased donation; organ procurement; transplantation"". Articles were selected according to methods, language of publication and relevance. The reference lists were used to identify additional historical studies of interest. Both prospective and retrospective series, in French and English, as well as review articles and recommendations were selected. In addition, French national transplant and health agencies (http://www.agence-biomedecine.fr and http://www.has-sante.fr) databases were screened using identical keywords. A total of 2498 articles, 8 official reports and 17 newspaper articles were identified; after careful selection 157 publications were eligible for our review.

Results

Deceased donation may involve either brain death or non-heartbeating donors (NHBD). Organ shortage led to the procurement of organs from expanded-criteria donors, with an increased age at donation and extended vascular disease, leading to inferior results after transplantation and underlining the need for careful donor management during brain death or cardiac arrest. Evolution of French legislation covering bioethics allowed procurement from Maastricht categories II and recently III non-heartbeating donors.

Conclusion

The increase of organ shortage emphasizes the need for a rigorous surgical technique during procurement to avoid loss of transplants. A history or current neoplasm in deceased-donors, requires attention to increase the pool of organs without putting the recipients at risk for cancer transmission. French NHBD program, especially from Maastricht category III, may stand for a potential source of valuable organs.",2016-10-08 +28496131,Leveraging multiple genomic data to prioritize disease-causing indels from exome sequencing data.,"The emergence of exome sequencing in recent years has enabled rapid and cost-effective detection of genetic variants in coding regions and offers a great opportunity to combine sequencing experiments with subsequent computational analysis for dissecting genetic basis of human inherited diseases. However, this strategy, though successful in practice, still faces such challenges as limited sample size and substantial number or diversity of candidate variants. To overcome these obstacles, researchers have been concentrated in the development of advanced computational methods and have recently achieved great progress for analysing single nucleotide variant. Nevertheless, it still remains unclear on how to analyse indels, another type of genetic variant that accounts for substantial proportion of known disease-causing variants. In this paper, we proposed an integrative method to effectively identify disease-causing indels from exome sequencing data. Specifically, we put forward a statistical method to combine five functional prediction scores, four genic association scores and a genic intolerance score to produce an integrated p-value, which could then be used for prioritizing candidate indels. We performed extensive simulation studies and demonstrated that our method achieved high accuracy in uncovering disease-causing indels. Our software is available at http://bioinfo.au.tsinghua.edu.cn/jianglab/IndelPrioritizer/.",2017-05-11 +26635364,Wasabi: An Integrated Platform for Evolutionary Sequence Analysis and Data Visualization.,"Wasabi is an open source, web-based environment for evolutionary sequence analysis. Wasabi visualizes sequence data together with a phylogenetic tree within a modern, user-friendly interface: The interface hides extraneous options, supports context sensitive menus, drag-and-drop editing, and displays additional information, such as ancestral sequences, associated with specific tree nodes. The Wasabi environment supports reproducibility by automatically storing intermediate analysis steps and includes built-in functions to share data between users and publish analysis results. For computational analysis, Wasabi supports PRANK and PAGAN for phylogeny-aware alignment and alignment extension, and it can be easily extended with other tools. Along with drag-and-drop import of local files, Wasabi can access remote data through URL and import sequence data, GeneTrees and EPO alignments directly from Ensembl. To demonstrate a typical workflow using Wasabi, we reproduce key findings from recent comparative genomics studies, including a reanalysis of the EGLN1 gene from the tiger genome study: These case studies can be browsed within Wasabi at http://wasabiapp.org:8000?id=usecases. Wasabi runs inside a web browser and does not require any installation. One can start using it at http://wasabiapp.org. All source code is licensed under the AGPLv3.",2015-12-03 +27282790,Coping with menopausal symptoms: An internet survey of Belgian postmenopausal women.,"

Objectives

An internet survey was performed to obtain data on the current use in Belgium of hormone replacement therapy and alternative treatments for the alleviation of menopausal symptoms. A supplementary aim was to assess the use of opt-in internet opinion panels (TalkToChange, http://www.talktochange.com, and GMI, http://www.gmi-mr.com/global-panel) as a potential new way to obtain data on menopausal issues.

Study design

Data were collected via an internet platform from 696 postmenopausal women aged 45-60 years.

Outcome measures

Respondents were asked questions about their socio-demographic profile, their experience of the menopause, the burden of the menopause, its impact on their quality of life and the treatment of menopausal symptoms (if any).

Results

The opt-in internet opinion panels proved a quick way (19days) to obtain reliable information with a low error margin (3.7%). The online survey collected detailed socio-demographic data. Almost all of the women (98%) had heard about the menopause before. Sixty-one percent perceived the menopause as a temporary phase (17% thought it lasted for one or two years and 44% thought it lasted for three to five years) and only 39% realized the menopause would last for the rest of their life. Twenty-three percent of the women reported any kind of impact of the menopause on their quality of life. However, for the other 77% the menopause had resulted in complaints. No differences according to the women's age, level of education or professional status were found in this respect. Sixty-nine percent of the women had 'ever' used some type of treatment for menopausal symptoms and 53% were currently using a treatment. Forty percent of those with more than three symptoms were currently untreated. Of those who were not on hormone replacement therapy (HRT), 61% would not consider taking it (54% were 'strongly opposed' and 7% simply 'opposed'), while 8% would consider asking their doctor for HRT. Among those women who were opposed to HRT, 25% indicated that they were afraid of the increased risk of breast cancer, 34% cited cardiovascular risks and 26% were worried about weight gain. In this Belgian sample, HRT was used significantly more often by French-speaking women (32%) than by Dutch-speaking women (9%) (OR 4.4, p<0.0001). The alternatives to HRT had a high satisfaction rate among users. Relaxation techniques, regular physical activity, acupuncture and avoiding stress had satisfaction rates similar to that with HRT. It was not possible to compare the alternatives in the same women. Nor was it possible to assess whether more pronounced symptoms required a specific treatment.

Conclusion

Opt-in internet opinion panels proved a quick and efficient way to gather data on menopausal issues in Belgium. Despite the high levels of awareness and knowledge, there is some confusion concerning the duration of the menopause, and its common perception as a temporary condition is likely to mean that the menopausal burden is substantially underestimated. Many symptomatic women are untreated.",2016-05-07 +24125645,"The diversity of shell matrix proteins: genome-wide investigation of the pearl oyster, Pinctada fucata.","In molluscs, shell matrix proteins are associated with biomineralization, a biologically controlled process that involves nucleation and growth of calcium carbonate crystals. Identification and characterization of shell matrix proteins are important for better understanding of the adaptive radiation of a large variety of molluscs. We searched the draft genome sequence of the pearl oyster Pinctada fucata and annotated 30 different kinds of shell matrix proteins. Of these, we could identified Perlucin, ependymin-related protein and SPARC as common genes shared by bivalves and gastropods; however, most gastropod shell matrix proteins were not found in the P. fucata genome. Glycinerich proteins were conserved in the genus Pinctada. Another important finding with regard to these annotated genes was that numerous shell matrix proteins are encoded by more than one gene; e.g., three ACCBP-like proteins, three CaLPs, five chitin synthase-like proteins, two N16 proteins (pearlins), 10 N19 proteins, two nacreins, four Pifs, nine shematrins, two prismalin-14 proteins, and 21 tyrosinases. This diversity of shell matrix proteins may be implicated in the morphological diversity of mollusc shells. The annotated genes reported here can be searched in P. fucata gene models version 1.1 and genome assembly version 1.0 ( http://marinegenomics.oist.jp/pinctada_fucata ). These genes should provide a useful resource for studies of the genetic basis of biomineralization and evaluation of the role of shell matrix proteins as an evolutionary toolkit among the molluscs.",2013-10-01 +,Joint Space Loss after Arthroscopic Partial Meniscectomy: Data from the Osteoarthritis Initiative,"

Objectives:

Knee osteoarthritis (OA) is a prevalent disease that causes substantial disability and use of medical resources, and knee arthroscopy is frequently performed in patients with OA or at risk of developing OA. While meniscectomy has been associated with progression of OA in multiple studies, none have assessed progression of joint space width (JSW) loss compared to matched controls. The Osteoarthritis Initiative (OAI) provides a unique cohort to enable this evaluation. We hypothesize that JSW significantly decreases in meniscectomy patients versus matched controls within a one-year period, and that joint space loss continues to be more rapid in subsequent years.

Methods:

A prospective cohort study with matching was conducted using records from the OAI public use data sets. The cohort (n=4796) is contains the incidence subcohort (normal radiographs with risk of developing OA) and the progression subcohort (radiographic evidence of OA). Subjects have fixed-flexion radiographs taken at yearly intervals and validated measurements of JSW are performed. Additional details about the OAI and study design are publicly accessible at http://oai.epi-ucsf.org/datarelease/About.asp. 141 meniscectomy knees were identified and 141 controls were randomly selected while matching for subcohort, gender, study site, age, knee side, and year. Paired t-test was used to evaluate change in JSW over the first year in the 141 matched pairs. Repeated measures MANOVA with adjustment for age, gender, race, and BMI was used to assess longitudinal changes in JSW in a subset of 33 matched pairs with 4 years of JSW measurements available.

Results:

Meniscectomy and control groups were balanced with respect to age, gender, race, BMI, and baseline JSW. The JSW decrease overa 1-year period was 0.948 mm in meniscectomy knees and 0.137 mm in controls p<0.0001). Table 1 shows similar results when stratifying by subcohort. In the crude and adjusted analyses of knees with 4 years of follow-up, the rate of JSW loss after the first year was not significantly different between meniscectomy knees and controls as shown in Figure 1.

Conclusion:

Arthroscopic partial meniscectomy is associated with increased loss of JSW during the first year after surgery in knees with OA and knees at risk of developing OA; however, the rate of JSW loss is not accelerated over the next 3 years. Immediate JSW narrowing may occur due to loss of the interposed meniscus, due to morphologic changes such as flattening and extrusion, or due to rapid degeneration of articular cartilage in response to increased tibiofemoral contact stress. Significance among both subcohorts suggests that meniscectomy causes progression of osteoarthritis independent of disease stage. Future investigation of change in cartilage and meniscal volumes on MR imaging may further explain the cause of this joint space loss.",2013-09-01 +27727091,[Chronic kidney disease and kidney transplantation].,"

Objectives

To report epidemiology and characteristics of end-stage renal disease (ESRD) patients and renal transplant candidates, and to evaluate access to waiting list and results of renal transplantation.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords: ""chronic kidney disease, epidemiology, kidney transplantation, cost, survival, graft, brain death, cardiac arrest, access, allocation"". French legal documents have been reviewed using the government portal (http://www.legifrance.gouv.fr). Articles were selected according to methods, language of publication and relevance. The reference lists were used to identify additional historical studies of interest. Both prospective and retrospective series, in French and English, as well as review articles and recommendations were selected. In addition, French national transplant and health agencies (http://www.agence-biomedecine.fr and http://www.has-sante.fr) databases were screened using identical keywords. A total of 3234 articles, 6 official reports and 3 newspaper articles were identified; after careful selection 99 publications were eligible for our review.

Results

The increasing prevalence of chronic kidney disease (CKD) leads to worsen organ shortage. Renal transplantation remains the best treatment option for ESRD, providing recipients with an increased survival and quality of life, at lower costs than other renal replacement therapies. The never-ending lengthening of the waiting list raises issues regarding treatment strategies and candidates' selection, and underlines the limits of organ sharing without additional source of kidneys available for transplantation.

Conclusion

Allocation policies aim to reduce medical or geographical disparities regarding enrollment on a waiting list or access to an allotransplant.",2016-10-07 +27717309,RStrucFam: a web server to associate structure and cognate RNA for RNA-binding proteins from sequence information.,"

Background

RNA-binding proteins (RBPs) interact with their cognate RNA(s) to form large biomolecular assemblies. They are versatile in their functionality and are involved in a myriad of processes inside the cell. RBPs with similar structural features and common biological functions are grouped together into families and superfamilies. It will be useful to obtain an early understanding and association of RNA-binding property of sequences of gene products. Here, we report a web server, RStrucFam, to predict the structure, type of cognate RNA(s) and function(s) of proteins, where possible, from mere sequence information.

Results

The web server employs Hidden Markov Model scan (hmmscan) to enable association to a back-end database of structural and sequence families. The database (HMMRBP) comprises of 437 HMMs of RBP families of known structure that have been generated using structure-based sequence alignments and 746 sequence-centric RBP family HMMs. The input protein sequence is associated with structural or sequence domain families, if structure or sequence signatures exist. In case of association of the protein with a family of known structures, output features like, multiple structure-based sequence alignment (MSSA) of the query with all others members of that family is provided. Further, cognate RNA partner(s) for that protein, Gene Ontology (GO) annotations, if any and a homology model of the protein can be obtained. The users can also browse through the database for details pertaining to each family, protein or RNA and their related information based on keyword search or RNA motif search.

Conclusions

RStrucFam is a web server that exploits structurally conserved features of RBPs, derived from known family members and imprinted in mathematical profiles, to predict putative RBPs from sequence information. Proteins that fail to associate with such structure-centric families are further queried against the sequence-centric RBP family HMMs in the HMMRBP database. Further, all other essential information pertaining to an RBP, like overall function annotations, are provided. The web server can be accessed at the following link: http://caps.ncbs.res.in/rstrucfam .",2016-10-07 +28630197,Pharmacodynamics of Cefepime Combined with Tazobactam against Clinically Relevant Enterobacteriaceae in a Neutropenic Mouse Thigh Model. ,"The lack of new antibiotics has prompted investigation of the combination of two existing agents-cefepime, a broad-spectrum cephalosporin, and tazobactam-to broaden their efficacy against extended-spectrum beta-lactamase (ESBL)-producing Enterobacteriaceae We determined the pharmacokinetic (PK) and pharmacodynamic (PD) properties of the combination in a murine neutropenic thigh model in order to establish its exposure-response relationships (ERRs). The PK of cefepime were determined for five doses; that of tazobactam was determined in earlier studies (Melchers et al., Antimicrob Agents Chemother 59:3373-3376, 2015, https://doi.org/10.1128/AAC.04402-14). The PK were linear for both compounds. The estimated mean (standard deviation [SD]) half-life of cefepime was 0.33 (0.12) h, and that of tazobactam was 0.176 (0.026) h; the volumes of distribution (V) were 0.73 liters/kg and 1.14 liters/kg, respectively. PD studies of cefepime administered every 2 h (q2h) with or without tazobactam, including dose fractionation studies of tazobactam, were performed against six ESBL-producing isolates. A sigmoidal maximum-effect (Emax) model was fitted to the data. In the dose fractionation study, the q2h regimen was more efficacious than the q4h and q6h regimens, indicating time-dependent activity of tazobactam. The threshold concentration (CT ) best correlating with tazobactam efficacy was 0.25 mg/liter, as evidenced by the best fit of the percentage of time above the threshold concentration (%fT>CT ) and response. A mean %fT>CT of 24.6% (range, 11.4 to 36.3%) for a CT of 0.25 mg/liter was required to obtain a bacteriostatic effect. We conclude that tazobactam enhanced the effect of cefepime in otherwise resistant isolates of Enterobacteriaceae and that the %fT>CT of 0.25 mg/liter best correlated with efficacy. These studies provide the basis for the development of human dosing regimens for this combination.",2017-08-24 +28053161,GETPrime 2.0: gene- and transcript-specific qPCR primers for 13 species including polymorphisms.,"GETPrime (http://bbcftools.epfl.ch/getprime) is a database with a web frontend providing gene- and transcript-specific, pre-computed qPCR primer pairs. The primers have been optimized for genome-wide specificity and for allowing the selective amplification of one or several splice variants of most known genes. To ease selection, primers have also been ranked according to defined criteria such as genome-wide specificity (with BLAST), amplicon size, and isoform coverage. Here, we report a major upgrade (2.0) of the database: eight new species (yeast, chicken, macaque, chimpanzee, rat, platypus, pufferfish, and Anolis carolinensis) now complement the five already included in the previous version (human, mouse, zebrafish, fly, and worm). Furthermore, the genomic reference has been updated to Ensembl v81 (while keeping earlier versions for backward compatibility) as a result of re-designing the back-end database and automating the import of relevant sections of the Ensembl database in species-independent fashion. This also allowed us to map known polymorphisms to the primers (on average three per primer for human), with the aim of reducing experimental error when targeting specific strains or individuals. Another consequence is that the inclusion of future Ensembl releases and other species has now become a relatively straightforward task.",2016-10-07 +27229861,Prediction of peptidoglycan hydrolases- a new class of antibacterial proteins.,"

Background

The efficacy of antibiotics against bacterial infections is decreasing due to the development of resistance in bacteria, and thus, there is a need to search for potential alternatives to antibiotics. In this scenario, peptidoglycan hydrolases can be used as alternate antibacterial agents due to their unique property of cleaving peptidoglycan cell wall present in both gram-positive and gram-negative bacteria. Along with a role in maintaining overall peptidoglycan turnover in a cell and in daughter cell separation, peptidoglycan hydrolases also play crucial role in bacterial pathophysiology requiring development of a computational tool for the identification and classification of novel peptidoglycan hydrolases from genomic and metagenomic data.

Results

In this study, the known peptidoglycan hydrolases were divided into multiple classes based on their site of action and were used for the development of a computational tool 'HyPe' for identification and classification of novel peptidoglycan hydrolases from genomic and metagenomic data. Various classification models were developed using amino acid and dipeptide composition features by training and optimization of Random Forest and Support Vector Machines. Random Forest multiclass model was selected for the development of HyPe tool as it showed up to 71.12 % sensitivity, 99.98 % specificity, 99.55 % accuracy and 0.80 MCC in four different classes of peptidoglycan hydrolases. The tool was validated on 24 independent genomic datasets and showed up to 100 % sensitivity and 0.94 MCC. The ability of HyPe to identify novel peptidoglycan hydrolases was also demonstrated on 24 metagenomic datasets.

Conclusions

The present tool helps in the identification and classification of novel peptidoglycan hydrolases from complete genomic or metagenomic ORFs. To our knowledge, this is the only tool available for the prediction of peptidoglycan hydrolases from genomic and metagenomic data.

Availability

http://metagenomics.iiserb.ac.in/hype/ and http://metabiosys.iiserb.ac.in/hype/ .",2016-05-27 +27454228,Protein complexes predictions within protein interaction networks using genetic algorithms.,"

Background

Protein-protein interaction networks are receiving increased attention due to their importance in understanding life at the cellular level. A major challenge in systems biology is to understand the modular structure of such biological networks. Although clustering techniques have been proposed for clustering protein-protein interaction networks, those techniques suffer from some drawbacks. The application of earlier clustering techniques to protein-protein interaction networks in order to predict protein complexes within the networks does not yield good results due to the small-world and power-law properties of these networks.

Results

In this paper, we construct a new clustering algorithm for predicting protein complexes through the use of genetic algorithms. We design an objective function for exclusive clustering and overlapping clustering. We assess the quality of our proposed clustering algorithm using two gold-standard data sets.

Conclusions

Our algorithm can identify protein complexes that are significantly enriched in the gold-standard data sets. Furthermore, our method surpasses three competing methods: MCL, ClusterOne, and MCODE in terms of the quality of the predicted complexes. The source code and accompanying examples are freely available at http://faculty.kfupm.edu.sa/ics/eramadan/GACluster.zip .",2016-07-25 +27720628,[Simultaneous pancreas and kidney transplantation].,"

Objectives

To perform a State of The Art about the different aspects of pancreas transplantation such as indications, technical features, immunosuppressive strategies and outcomes of simultaneous pancreas-kidney transplantation.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords (MESH) : « pancreas transplantation; kidney transplantation; simultaneous pancreas-kidney transplantation; immunosuppression ». Publications obtained were selected based on methodology, language, date of publication (last 20 years) and relevance. Prospective and retrospective studies, in English or French, review articles; meta-analysis and guidelines were selected and analyzed. This search found 2736 articles. After reading titles and abstracts, 23 were included in the text, based on their relevance.

Results

These last few years, considerable progresses were done in optimizing indication for pancreas transplantation, as well as surgical improvement and a better used of immunosuppression. In the first part of this article, demographics, indication and pre-transplant evaluation will be described. The different techniques of procurement, preparation and transplantation will then be discussed. Finally, the results and outcomes of pancreas transplantation will be reported.

Conclusions

Despite its morbidity, pancreas transplantation is the optimal treatment of end stage renal disease in diabetic patients under 55. Long-term results and quality of life improvement after pancreas transplantation are excellent.

Level of evidence

NA.",2016-10-06 +21270440,Global gene expression analysis of human erythroid progenitors.,"Understanding the pattern of gene expression during erythropoiesis is crucial for a synthesis of erythroid developmental biology. Here, we isolated 4 distinct populations at successive erythropoietin-dependent stages of erythropoiesis, including the terminal, pyknotic stage. The transcriptome was determined using Affymetrix arrays. First, we demonstrated the importance of using defined cell populations to identify lineage and temporally specific patterns of gene expression. Cells sorted by surface expression profile not only express significantly fewer genes than unsorted cells but also demonstrate significantly greater differences in the expression levels of particular genes between stages than unsorted cells. Second, using standard software, we identified more than 1000 transcripts not previously observed to be differentially expressed during erythroid maturation, 13 of which are highly significantly terminally regulated, including RFXAP and SMARCA4. Third, using matched filtering, we identified 12 transcripts not previously reported to be continuously up-regulated in maturing human primary erythroblasts. Finally, using transcription factor binding site analysis, we identified potential transcription factors that may regulate gene expression during terminal erythropoiesis. Our stringent lists of differentially regulated and continuously expressed transcripts containing many genes with undiscovered functions in erythroblasts are a resource for future functional studies of erythropoiesis. Our Human Erythroid Maturation database is available at https://cellline.molbiol.ox.ac.uk/eryth/index.html. [corrected].",2011-01-26 +26436140,Entropy-scaling search of massive biological data.,"Many data sets exhibit well-defined structure that can be exploited to design faster search tools, but it is not always clear when such acceleration is possible. Here we introduce a framework for similarity search based on characterizing a data set's entropy and fractal dimension. We prove that searching scales in time with metric entropy (number of covering hyperspheres), if the fractal dimension of the data set is low, and scales in space with the sum of metric entropy and information-theoretic entropy (randomness of the data). Using these ideas, we present accelerated versions of standard tools, with no loss in specificity and little loss in sensitivity, for use in three domains-high-throughput drug screening (Ammolite, 150x speedup), metagenomics (MICA, 3.5x speedup of DIAMOND (3700x BLASTX)), and protein structure search (esFragBag, 10x speedup of FragBag). Our framework can be used to achieve 'compressive omics,' and the general theory can be readily applied to data science problems outside of biology. Source code: http://gems.csail.mit.edu.",2015-08-01 +27153681,OpenSegSPIM: a user-friendly segmentation tool for SPIM data.,"

Unlabelled

OpenSegSPIM is an open access and user friendly 3D automatic quantitative analysis tool for Single Plane Illumination Microscopy data. The software is designed to extract, in a user-friendly way, quantitative relevant information from SPIM image stacks, such as the number of nuclei or cells. It provides quantitative measurement (volume, sphericity, distance, intensity) on Light Sheet Fluorescent Microscopy images.

Availability and implementation

freely available from http://www.opensegspim.weebly.com Source code and binaries under BSD License.

Contact

lgole@imcb.a-star.edu.sg or wmyu@imcb.a-star.edu.sg or sohail.ahmed@imb.a-star.edu.sg

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-22 +27153660,BRAT-nova: fast and accurate mapping of bisulfite-treated reads.,"

Unlabelled

In response to increasing amounts of sequencing data, faster and faster aligners need to become available. Here, we introduce BRAT-nova, a completely rewritten and improved implementation of the mapping tool BRAT-BW for bisulfite-treated reads (BS-Seq). BRAT-nova is very fast and accurate. On the human genome, BRAT-nova is 2-7 times faster than state-of-the-art aligners, while maintaining the same percentage of uniquely mapped reads and space usage. On synthetic reads, BRAT-nova is 2-8 times faster than state-of-the-art aligners while maintaining similar mapping accuracy, methylation call accuracy, methylation level accuracy and space efficiency.

Availability and implementation

The software is available in the public domain at http://compbio.cs.ucr.edu/brat/

Contact

elenah@cs.ucr.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-23 +28062446,pHMM-tree: phylogeny of profile hidden Markov models.,"Protein families are often represented by profile hidden Markov models (pHMMs). Homology between two distant protein families can be determined by comparing the pHMMs. Here we explored the idea of building a phylogeny of protein families using the distance matrix of their pHMMs. We developed a new software and web server (pHMM-tree) to allow four major types of inputs: (i) multiple pHMM files, (ii) multiple aligned protein sequence files, (iii) mixture of pHMM and aligned sequence files and (iv) unaligned protein sequences in a single file. The output will be a pHMM phylogeny of different protein families delineating their relationships. We have applied pHMM-tree to build phylogenies for CAZyme (carbohydrate active enzyme) classes and Pfam clans, which attested its usefulness in the phylogenetic representation of the evolutionary relationship among distant protein families.

Availability and implementation

This software is implemented in C/C ++ and is available at http://cys.bios.niu.edu/pHMM-Tree/source/.

Contact

zhanghan@nankai.edu.cn or yyin@niu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +28053164,The Papillomavirus Episteme: a major update to the papillomavirus sequence database.,"The Papillomavirus Episteme (PaVE) is a database of curated papillomavirus genomic sequences, accompanied by web-based sequence analysis tools. This update describes the addition of major new features. The papillomavirus genomes within PaVE have been further annotated, and now includes the major spliced mRNA transcripts. Viral genes and transcripts can be visualized on both linear and circular genome browsers. Evolutionary relationships among PaVE reference protein sequences can be analysed using multiple sequence alignments and phylogenetic trees. To assist in viral discovery, PaVE offers a typing tool; a simplified algorithm to determine whether a newly sequenced virus is novel. PaVE also now contains an image library containing gross clinical and histopathological images of papillomavirus infected lesions. Database URL: https://pave.niaid.nih.gov/.",2016-10-05 +26936376,HSA: integrating multi-track Hi-C data for genome-scale reconstruction of 3D chromatin structure.,"Genome-wide 3C technologies (Hi-C) are being increasingly employed to study three-dimensional (3D) genome conformations. Existing computational approaches are unable to integrate accumulating data to facilitate studying 3D chromatin structure and function. We present HSA ( http://ouyanglab.jax.org/hsa/ ), a flexible tool that jointly analyzes multiple contact maps to infer 3D chromatin structure at the genome scale. HSA globally searches the latent structure underlying different cleavage footprints. Its robustness and accuracy outperform or rival existing tools on extensive simulations and orthogonal experiment validations. Applying HSA to recent in situ Hi-C data, we found the 3D chromatin structures are highly conserved across various human cell types.",2016-03-02 +27720313,[Surgical complications of renal transplantation].,"

Objective

To report the nature, incidence, diagnosis and treatment options of surgical complications after renal transplantation.

Material and methods

Relevant publications were identified through Medline (http://www.ncbi.nlm.nih.gov/) and Embase (http://www.embase.com/) database from 1960 to 2016 using the following keywords ""fistula; lymphocele; stricture; thrombosis"", in association with ""renal transplantation"" in Title/Abstract field. Articles were selected according to methods, language of publication and relevance. A total of 7618 articles were identified including specifically 981 for vascular complications, 1016 for urologic complications and 239 for lymphocele; after careful selection 190 publications were eligible for our review.

Results

Surgical complications occur in 1 to 30% of renal transplantations while being incompletely reported without consensual management. Angioplasty techniques led to a significant improvement of short- and long-term vascular complications outcome. Risk factors for transplant thrombosis are a right allotransplant, multiple renal arteries or vasculopathy in the donor, diabetes, arterial disease or thrombophilia in the recipient and hemodynamic changes during procedure. Urinary complications and lymphocele significantly impair overall outcome and recipients quality of life with no demonstrated impact on allotransplant survival. Immediate or salvage pelvi-ureterostomy is a main treatment option for ureteral strictures and fistula.

Conclusion

Prevention of surgical complications following renal transplantation relies on careful allotransplant preparation and strict respect of surgical best practices. Increasing comorbidities in recipients as well as marginal donors are significant limits for the improvement of post-transplant surgical outcome.",2016-10-05 +27688025,Completeness of Spontaneous Adverse Drug Reaction Reports Sent by General Practitioners to a Regional Pharmacovigilance Centre: A Descriptive Study.,"

Introduction

Spontaneous reporting of adverse drug reactions (ADRs) remains the cornerstone of postmarketing drug safety surveillance (pharmacovigilance); however, one of its main limitations is incomplete data, thus limiting conclusions about causality assessment.

Objective

The primary aim of this study was to assess the completeness of ADR reports sent by general practitioners (GPs) to regional pharmacovigilance centres and the secondary objective was to identify factors associated with complete ADR reports.

Methods

All ADR reports sent by GPs to the Midi-Pyrénées Regional Pharmacovigilance Center (Toulouse, France) from 1 January 2010 to 31 December 2013 were reviewed. Healthcare professionals and patients can forward an ADR using either an online form through the Pharmacology Information Bulletin website ( http://www.bip31.fr ) or 'traditional' ADR reports (i.e. email, letter or fax). According to information provided in ADR reports (i.e. patient identification, ADR, date of occurrence, clinical description, drugs, etc.), reports were classified into three groups: 'well-documented', 'slightly documented' or 'poorly documented'. A multivariate logistic regression was performed to investigate potential factors associated with a 'well-documented' ADR report.

Results

During the study period, 613 ADR reports were analysed. Among these reports, only 12.7 % were classified as 'well-documented', 68.5 % as 'slightly documented' and 18.8 % as 'poorly documented'. An association between a 'well-documented' ADR report and its 'seriousness' was found (odds ratio = 1.70 [95 % CI 1.04-2.76], p = 0.01). No association between report completeness ('well-documented' report) and GP practice location or mode of ADR reporting was found.

Conclusions

The study shows that only one out of eight ADR reports from GPs was 'well-documented'. Therefore, it appears to be important to promote further information being available regarding the data required in ADR reports to optimise the evaluation of drug causality.",2016-12-01 +24861626,A web tool for the design and management of panels of genes for targeted enrichment and massive sequencing for clinical applications.,"Disease targeted sequencing is gaining importance as a powerful and cost-effective application of high throughput sequencing technologies to the diagnosis. However, the lack of proper tools to process the data hinders its extensive adoption. Here we present TEAM, an intuitive and easy-to-use web tool that fills the gap between the predicted mutations and the final diagnostic in targeted enrichment sequencing analysis. The tool searches for known diagnostic mutations, corresponding to a disease panel, among the predicted patient's variants. Diagnostic variants for the disease are taken from four databases of disease-related variants (HGMD-public, HUMSAVAR, ClinVar and COSMIC.) If no primary diagnostic variant is found, then a list of secondary findings that can help to establish a diagnostic is produced. TEAM also provides with an interface for the definition of and customization of panels, by means of which, genes and mutations can be added or discarded to adjust panel definitions. TEAM is freely available at: http://team.babelomics.org.",2014-05-26 +28035031,AnglerFish: a webserver for defining the geometry of α-helices in membrane proteins.,"

Summary

Integral membrane proteins that form helical pores and bundles constitute major drug targets, and many of their structures have been defined by crystallography and cryo-electron microscopy. The gating of channels and ligand binding of transporters generally involve changes in orientation of one or more the constituent helices in the structures. At present there is no standard easily accessible means for defining the orientation of a helix in a membrane protein structure. AnglerFish is a web-based tool for parameterising the angles of transmembrane helices based on PDB coordinates, with the helical orientations defined by the angles 'tilt' and 'swing'. AnglerFish is particularly useful for defining changes in structure between different states, including both symmetric and asymmetric transitions, and can be used to quantitate differences between related structures or different subunits within the same structure.

Availability and implementation

AnglerFish is freely available at http://anglerfish.cryst.bbk.ac.uk . The website is implemented in Perl-cgi and Apache and operation in all major browsers is supported. The source code is available at GitHub.

Contact

b.wallace@mail.cryst.bbk.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +27378295,Unbiased classification of spatial strategies in the Barnes maze.,"

Motivation

Spatial learning is one of the most widely studied cognitive domains in neuroscience. The Morris water maze and the Barnes maze are the most commonly used techniques to assess spatial learning and memory in rodents. Despite the fact that these tasks are well-validated paradigms for testing spatial learning abilities, manual categorization of performance into behavioral strategies is subject to individual interpretation, and thus to bias. We have previously described an unbiased machine-learning algorithm to classify spatial strategies in the Morris water maze.

Results

Here, we offer a support vector machine-based, automated, Barnes-maze unbiased strategy (BUNS) classification algorithm, as well as a cognitive score scale that can be used for memory acquisition, reversal training and probe trials. The BUNS algorithm can greatly benefit Barnes maze users as it provides a standardized method of strategy classification and cognitive scoring scale, which cannot be derived from typical Barnes maze data analysis.

Availability and implementation

Freely available on the web at http://okunlab.wix.com/okunlab as a MATLAB application.

Contact

eitan.okun@biu.ac.ilSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-04 +27152837,Evaluation of OASIS QSAR Models Using ToxCast™ in Vitro Estrogen and Androgen Receptor Binding Data and Application in an Integrated Endocrine Screening Approach.,"

Background

Integrative testing strategies (ITSs) for potential endocrine activity can use tiered in silico and in vitro models. Each component of an ITS should be thoroughly assessed.

Objectives

We used the data from three in vitro ToxCast™ binding assays to assess OASIS, a quantitative structure-activity relationship (QSAR) platform covering both estrogen receptor (ER) and androgen receptor (AR) binding. For stronger binders (described here as AC50 < 1 μM), we also examined the relationship of QSAR predictions of ER or AR binding to the results from 18 ER and 10 AR transactivation assays, 72 ER-binding reference compounds, and the in vivo uterotrophic assay.

Methods

NovaScreen binding assay data for ER (human, bovine, and mouse) and AR (human, chimpanzee, and rat) were used to assess the sensitivity, specificity, concordance, and applicability domain of two OASIS QSAR models. The binding strength relative to the QSAR-predicted binding strength was examined for the ER data. The relationship of QSAR predictions of binding to transactivation- and pathway-based assays, as well as to in vivo uterotrophic responses, was examined.

Results

The QSAR models had both high sensitivity (> 75%) and specificity (> 86%) for ER as well as both high sensitivity (92-100%) and specificity (70-81%) for AR. For compounds within the domains of the ER and AR QSAR models that bound with AC50 < 1 μM, the QSAR models accurately predicted the binding for the parent compounds. The parent compounds were active in all transactivation assays where metabolism was incorporated and, except for those compounds known to require metabolism to manifest activity, all assay platforms where metabolism was not incorporated. Compounds in-domain and predicted to bind by the ER QSAR model that were positive in ToxCast™ ER binding at AC50 < 1 μM were active in the uterotrophic assay.

Conclusions

We used the extensive ToxCast™ HTS binding data set to show that OASIS ER and AR QSAR models had high sensitivity and specificity when compounds were in-domain of the models. Based on this research, we recommend a tiered screening approach wherein a) QSAR is used to identify compounds in-domain of the ER or AR binding models and predicted to bind; b) those compounds are screened in vitro to assess binding potency; and c) the stronger binders (AC50 < 1 μM) are screened in vivo. This scheme prioritizes compounds for integrative testing and risk assessment. Importantly, compounds that are not in-domain, that are predicted either not to bind or to bind weakly, that are not active in in vitro, that require metabolism to manifest activity, or for which in vivo AR testing is in order, need to be assessed differently.

Citation

Bhhatarai B, Wilson DM, Price PS, Marty S, Parks AK, Carney E. 2016. Evaluation of OASIS QSAR models using ToxCast™ in vitro estrogen and androgen receptor binding data and application in an integrated endocrine screening approach. Environ Health Perspect 124:1453-1461; http://dx.doi.org/10.1289/EHP184.",2016-05-06 +27706278,Fluid Choice Matters in Critically-ill Patients with Acute Pancreatitis: Lactated Ringer's vs. Isotonic Saline.,"

Objectives

To investigate the effect of different crystal- loid solutions on clinical outcomes in critically-ill patients with acute pancreatitis (AP).

Methods

We conducted a retrospective study of patients with AP admitted to the ICU using the Multiparameter Intelligent Monitoring in Intensive Care III (MIMIC-III) database. We investigated the effect of fluid type; lactated ringer's (LR) vs. isotonic saline (IS) on hospital mortality rates, and ICU length of stay (LOS).

Results

Hospital mortality of the 198 included patients was 12%. For fluid type, 32.9% were resuscitated with LR vs. 67.1% with IS. Hospital mortality was lower in the LR group (5.8%) vs. 14.9% for IS group, odds ratio of 3.10 [P=0.041]. This effect was still observed after adjusting for confounders. However, ICU LOS was longer in LR compared to IS group; 6.2±6.9 vs. 4.2±4.49 days respectively [P= 0.020].

Conclusion

The type of fluid used for resuscitation in AP may affect the outcome. LR may have survival benefit over IS in critically-ill patients with AP. [Full article available at http://rimed.org/rimedicaljournal-2016-10.asp].",2016-10-04 +25818734,Endostar combined with chemotherapy compared with chemotherapy alone in the treatment of nonsmall lung carcinoma: A meta-analysis based on Chinese patients.,"

Introduction

Lung cancer is the leading cause of cancer-associated death world-wide. And the lung cancer is generally divided into small cell lung carcinoma and non-small cell lung cancer. For advanced NSCLC, the chemotherapy and target therapy were the important treatment modality. This meta-analysis was to evaluate the clinical efficacy and toxicity between endostar combined chemotherapy and chemotherapy alone in Chinese patients.

Materials and methods

We searched the PubMed, EMBASE, and CNKI databases to find the potential relevant articles reporting the endostar combined with chemotherapy regimen in the treatment of nonsmall cell lung cancer in Chinese patients. The tumor response and toxicity difference between the two groups were demonstrated by odds ratio (OR) and its 95% confidence interval (95% CI). All the data was pooled by Stata 11.0 (http://www.stata.com; Stata Corporation, College Station, TX) software.

Results

We included 14 studies published in Chinese or English studies. The pooled results showed adding endostar in the chemotherapy regimen can significant increase the objective response rate (OR = 2.42, 95% CI = 1.87-3.12, P = 0.00) and disease control rate (OR = 2.22, 95% CI = 1.68-2.94, P = 0.00). For toxicities, the pooled data showed no statistical difference for grade III-IV granulocytopenia risk (OR = 1.04, 95% CI = 0.74-1.44, P = 0.83). Nausea and vomiting (OR = 0.93 95% CI: 0.51-1.52, P = 0.78) and grade III-IV alopecia (OR = 0.99, 95% CI: 0.76-1.29, P = 0.95). The funnel plot showed no statistical publications.

Conclusion

Combined treatment with endostar can improve the response rate for NSCLC patients without increasing the risk of developing severe adverse event.",2014-03-01 +24851551,Multilocus sequencing typing of Pseudomonas aeruginosa isolates and analysis of potential pathogenicity of typical genotype strains from occupational oxyhelium saturation divers.,"

Background

Pseudomonas aeruginosa (P. aeruginosa) is a common microbe isolated from divers with ear and skin infections. To obtain the epidemic characters of the occurrence of the P. aeruginosa infection, multilocus sequence typing (MLST) was used to assess the genetic background of different strains isolated from divers involved in saturation diving.

Methods

A total of 64 P. aeruginosa strains from naval divers were sequenced by multilocus sequence typing using seven housekeeping genes (acsA, aroE, guaA, mutL, nuoD, ppsA and trpE). The results were analyzed based on the P. aeruginosa international MLST database to obtain the allelic profiles and sequence types (STs). MLST data were analyzed by Bionumerics 4.0 (http: // pubmlst.org/mlstanalyse) using LIAN and eBURST. Twenty-eight strains with the typical genotype were selected for further analysis of pathogenic characteristics by Caenorhabditis elegans (C. elegans) fast killing model.

Results

Data from MLST revealed a high STs diversity among the strains. Of the 64 strains, 53 strains were assigned to 19 STs, and the remaining 11 clones could not be assigned. ST274 accounted for 18.5% (12/64), and ST260 accounted for 15.62% (10/64). C. elegans killing assay showed that all the test strains had distinct virulent properties as compared with the negative control group. Clone 503-1 had the highest virulence and clone 54 had the lowest virulence as compared with the positive clinical group.

Conclusion

The P. aeruginosa strains carried by the occupational diver groups in Chinese regions have characteristically dominant STs, and have a relatively strong virulence as compared with the standard strain and the clinically isolated positive control strain.",2014-03-01 +23696792,ExtremeDB: a unified web repository of extremophilic archaea and bacteria.,"Extremophiles are the microorganisms which can survive under extreme conditions of temperature, pressure, pH, salinity etc. They have gained much attention for their potential role in biotechnological and industrial applications. The large amount of experimental data in the literature is so diverse, that it becomes difficult and time consuming for the researcher to implement it in various areas of research. Therefore, a systematic arrangement of data and redirection in a similar fashion through web interface can assist researchers in analyzing the data as per their requirement. ExtremeDB is a freely available web based relational database which integrates general characteristics, genome-proteome information, industrial applications and recent scientific investigations of the seven major groups of 865 extremophillic microorganisms. The search options are user friendly and analyses tools such as Compare and Extreme BLAST have been incorporated for comparative analysis of two or more extremophiles and determining the sequence similarity of a given protein/nucleotide in relation to other extremophiles respectively. The effort put forth herein in the form of database, would open up new avenues on the potential utility of extremophiles in applied research. ExtremeDB is freely accessible via http://extrem.igib.res.in.",2013-05-16 +26772743,A computational method for genotype calling in family-based sequencing data.,"

Background

As sequencing technologies can help researchers detect common and rare variants across the human genome in many individuals, it is known that jointly calling genotypes across multiple individuals based on linkage disequilibrium (LD) can facilitate the analysis of low to modest coverage sequence data. However, genotype-calling methods for family-based sequence data, particularly for complex families beyond parent-offspring trios, are still lacking.

Results

In this study, first, we proposed an algorithm that considers both linkage disequilibrium (LD) patterns and familial transmission in nuclear and multi-generational families while retaining the computational efficiency. Second, we extended our method to incorporate external reference panels to analyze family-based sequence data with a small sample size. In simulation studies, we show that modeling multiple offspring can dramatically increase genotype calling accuracy and reduce phasing and Mendelian errors, especially at low to modest coverage. In addition, we show that using external panels can greatly facilitate genotype calling of sequencing data with a small number of individuals. We applied our method to a whole genome sequencing study of 1339 individuals at ~10X coverage from the Minnesota Center for Twin and Family Research.

Conclusions

The aggregated results show that our methods significantly outperform existing ones that ignore family constraints or LD information. We anticipate that our method will be useful for many ongoing family-based sequencing projects. We have implemented our methods efficiently in a C++ program FamLDCaller, which is available from http://www.pitt.edu/~wec47/famldcaller.html.",2016-01-16 +24517242,A consensus based template for reporting of pre-hospital major incident medical management.,"

Background

Structured reporting of major incidents has been advocated to improve the care provided at future incidents. A systematic review identified ten existing templates for reporting major incident medical management, but these templates are not in widespread use. We aimed to address this challenge by designing an open access template for uniform reporting of data from pre-hospital major incident medical management that will be tested for feasibility.

Methods

An expert group of thirteen European major incident practitioners, planners or academics participated in a four stage modified nominal group technique consensus process to design a novel reporting template. Initially, each expert proposed 30 variables. Secondly, these proposals were combined and each expert prioritized 45 variables from the total of 270. Thirdly, the expert group met in Norway to develop the template. Lastly, revisions to the final template were agreed via e-mail.

Results

The consensus process resulted in a template consisting of 48 variables divided into six categories; pre-incident data, Emergency Medical Service (EMS) background, incident characteristics, EMS response, patient characteristics and key lessons.

Conclusions

The expert group reached consensus on a set of key variables to report the medical management of pre-hospital major incidents and developed a novel reporting template. The template will be freely available for downloading and reporting on http://www.majorincidentreporting.org. This is the first global open access database for pre-hospital major incident reporting. The use of a uniform dataset will allow comparative analysis and has potential to identify areas of improvement for future responses.",2014-01-30 +27704290,Do personality traits assessed on medical school admission predict exit performance? A UK-wide longitudinal cohort study.,"Traditional methods of assessing personality traits in medical school selection have been heavily criticised. To address this at the point of selection, ""non-cognitive"" tests were included in the UK Clinical Aptitude Test, the most widely-used aptitude test in UK medical education (UKCAT: http://www.ukcat.ac.uk/ ). We examined the predictive validity of these non-cognitive traits with performance during and on exit from medical school. We sampled all students graduating in 2013 from the 30 UKCAT consortium medical schools. Analysis included: candidate demographics, UKCAT non-cognitive scores, medical school performance data-the Educational Performance Measure (EPM) and national exit situational judgement test (SJT) outcomes. We examined the relationships between these variables and SJT and EPM scores. Multilevel modelling was used to assess the relationships adjusting for confounders. The 3343 students who had taken the UKCAT non-cognitive tests and had both EPM and SJT data were entered into the analysis. There were four types of non-cognitive test: (1) libertariancommunitarian, (2) NACE-narcissism, aloofness, confidence and empathy, (3) MEARS-self-esteem, optimism, control, self-discipline, emotional-nondefensiveness (END) and faking, (4) an abridged version of 1 and 2 combined. Multilevel regression showed that, after correcting for demographic factors, END predicted SJT and EPM decile. Aloofness and empathy in NACE were predictive of SJT score. This is the first large-scale study examining the relationship between performance on non-cognitive selection tests and medical school exit assessments. The predictive validity of these tests was limited, and the relationships revealed do not fit neatly with theoretical expectations. This study does not support their use in selection.",2016-10-04 +26446135,HEALER: homomorphic computation of ExAct Logistic rEgRession for secure rare disease variants analysis in GWAS.,"

Motivation

Genome-wide association studies (GWAS) have been widely used in discovering the association between genotypes and phenotypes. Human genome data contain valuable but highly sensitive information. Unprotected disclosure of such information might put individual's privacy at risk. It is important to protect human genome data. Exact logistic regression is a bias-reduction method based on a penalized likelihood to discover rare variants that are associated with disease susceptibility. We propose the HEALER framework to facilitate secure rare variants analysis with a small sample size.

Results

We target at the algorithm design aiming at reducing the computational and storage costs to learn a homomorphic exact logistic regression model (i.e. evaluate P-values of coefficients), where the circuit depth is proportional to the logarithmic scale of data size. We evaluate the algorithm performance using rare Kawasaki Disease datasets.

Availability and implementation

Download HEALER at http://research.ucsd-dbmi.org/HEALER/ CONTACT: shw070@ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-06 +26948029,Quantitative feature extraction from the Chinese hamster ovary bioprocess bibliome using a novel meta-analysis workflow.,"The scientific literature concerning Chinese hamster ovary (CHO) cells grows annually due to the importance of CHO cells in industrial bioprocessing of therapeutics. In an effort to start to catalogue the breadth of CHO phenotypes, or phenome, we present the CHO bibliome. This bibliographic compilation covers all published CHO cell studies from 1995 to 2015, and each study is classified by the types of phenotypic and bioprocess data contained therein. Using data from selected studies, we also present a quantitative meta-analysis of bioprocess characteristics across diverse culture conditions, yielding novel insights and addressing the validity of long held assumptions. Specifically, we show that bioprocess titers can be predicted using indicator variables derived from viable cell density, viability, and culture duration. We further identified a positive correlation between the cumulative viable cell density (VCD) and final titer, irrespective of cell line, media, and other bioprocess parameters. In addition, growth rate was negatively correlated with performance attributes, such as VCD and titer. In summary, despite assumptions that technical diversity among studies and opaque publication practices can limit research re-use in this field, we show that the statistical analysis of diverse legacy bioprocess data can provide insight into bioprocessing capabilities of CHO cell lines used in industry. The CHO bibliome can be accessed at http://lewislab.ucsd.edu/cho-bibliome/.",2016-03-03 +27553238,Exploiting Microbeams for Membrane Protein Structure Determination.,"A reproducible, and sample independent means of predictably obtaining large, well-ordered crystals has proven elusive in macromolecular crystallography. In the structure determination pipeline, crystallisation often proves to be a rate-limiting step, and the process of obtaining even small or badly ordered crystals can prove time-consuming and laborious. This is particularly true in the field of membrane protein crystallography and this is reflected in the limited number of unique membrane protein structures deposited in the protein data bank (less than 650 by June 2016 - http://blanco.biomol.uci.edu/mpstruc ). Over recent years the requirement for, and time and cost associated with obtaining, large crystals has been partially alleviated through the development of beamline instrumentation allowing data collection, and structure solution, from ever-smaller crystals. Advances in several areas have led to a step change in what might be considered achievable during a synchrotron trip over the last decade. This chapter will briefly review the current status of the field, the tools available to ease data collection and processing, and give some examples of exploitation of these for membrane protein microfocus macromolecular crystallography.",2016-01-01 +24234437,ClinVar: public archive of relationships among sequence variation and human phenotype.,"ClinVar (http://www.ncbi.nlm.nih.gov/clinvar/) provides a freely available archive of reports of relationships among medically important variants and phenotypes. ClinVar accessions submissions reporting human variation, interpretations of the relationship of that variation to human health and the evidence supporting each interpretation. The database is tightly coupled with dbSNP and dbVar, which maintain information about the location of variation on human assemblies. ClinVar is also based on the phenotypic descriptions maintained in MedGen (http://www.ncbi.nlm.nih.gov/medgen). Each ClinVar record represents the submitter, the variation and the phenotype, i.e. the unit that is assigned an accession of the format SCV000000000.0. The submitter can update the submission at any time, in which case a new version is assigned. To facilitate evaluation of the medical importance of each variant, ClinVar aggregates submissions with the same variation/phenotype combination, adds value from other NCBI databases, assigns a distinct accession of the format RCV000000000.0 and reports if there are conflicting clinical interpretations. Data in ClinVar are available in multiple formats, including html, download as XML, VCF or tab-delimited subsets. Data from ClinVar are provided as annotation tracks on genomic RefSeqs and are used in tools such as Variation Reporter (http://www.ncbi.nlm.nih.gov/variation/tools/reporter), which reports what is known about variation based on user-supplied locations.",2013-11-14 +26954922,[Quality registration is a support for more specialized palliative care. 9 out of 12 of the indicators improved significantly during one year]. ,"Systematic improvement of quality of care at the end of life is still scarce. The regional palliative care service in Skåne, which has eight units covering a population of almost 1.3 million inhabitants, has systematically used the Swedish Register of Palliative Care (SRPC, http://www.palliativ.se/ ) for care development. 9 out of 12 of the indicators improved significantly in 2014 compared to 2013. Through transparency of registered data and regular feedback to the department's units we have been able to encourage the exchange of experiences between the units, provide opportunities for internal and external benchmarking and also prioritize topics for teaching and training. The positive results must be interpreted with caution as this is a non-controlled follow-up. The exact correlation between recorded data and value for the patient is mainly unknown and requires further studies.",2016-03-08 +27833382,Polymorphisms and resistance mutations of hepatitis C virus on sequences in the European hepatitis C virus database.,"

Aim

To evaluate the occurrence of resistant mutations in treatment-naïve hepatitis C virus (HCV) sequences deposited in the European hepatitis C virus database (euHCVdb).

Methods

The sequences were downloaded from the euHCVdb (https://euhcvdb.ibcp.fr/euHCVdb/). The search was performed for full-length NS3 protease, NS5A and NS5B polymerase sequences of HCV, separated by genotypes 1a, 1b, 2a, 2b and 3a, and resulted in 798 NS3, 708 NS5A and 535 NS5B sequences from HCV genotypes 1a, 1b, 2a, 2b and 3a, after the exclusion of sequences containing errors and/or gaps or incomplete sequences, and sequences from patients previously treated with direct antiviral agents (DAA). The sequence alignment was performed with MEGA 6.06 MAC and the resulting protein sequences were then analyzed using the BioEdit 7.2.5. for mutations associated with resistance. Only positions that have been described as being associated with failure in treatment in in vivo studies, and/or as conferring a more than 2-fold change in replication in comparison to the wildtype reference strain in in vitro phenotypic assays were included in the analysis.

Results

The Q80K variant in the NS3 gene was the most prevalent mutation, being found in 44.66% of subtype 1a and 0.25% of subtype 1b. Other frequent mutations observed in more than 2% of the NS3 sequences were: I170V (3.21%) in genotype 1a, and Y56F (15.93%), V132I (23.28%) and I170V (65.20%) in genotype 1b. For the NS5A, 2.21% of the genotype 1a sequences have the P58S mutation, 5.95% of genotype 1b sequences have the R30Q mutation, 15.79% of subtypes 2a sequences have the Q30R mutation, 23.08% of subtype 2b sequences have a L31M mutation, and in subtype 3a sequences, 23.08% have the M31L resistant variants. For the NS5B, the V321L RAV was identified in 0.60% of genotype 1a and in 0.32% of genotype 1b sequences, and the N142T variant was observed in 0.32% of subtype 1b sequences. The C316Y, S556G, D559N RAV were identified in 0.33%, 7.82% and 0.32% of genotype 1b sequences, respectively, and were not observed in other genotypes.

Conclusion

HCV mutants resistant to DAAs are found in low frequency, nevertheless they could be selected and therapy could fail due resistance substitutions in HCV genome.",2016-10-01 +27206400,Contemporary Review of Risk-Stratified Management in Acute Uncomplicated and Complicated Diverticulitis.,"

Background

Acute colonic diverticulitis is a common clinical condition. Severity of the disease is based on clinical, laboratory, and radiological investigations and dictates the need for medical or surgical intervention. Recent clinical trials have improved the understanding of the natural history of the disease resulting in new approaches to and better evidence for the management of acute diverticulitis.

Methods

We searched the Cochrane Library (years 2004-2015), MEDLINE (years 2004-2015), and EMBASE (years 2004-2015) databases. We used the search terms ""diverticulitis, colonic"" or ""acute diverticulitis"" or ""divertic*"" in combination with the terms ""management,"" ""antibiotics,"" ""non-operative,"" or ""surgery."" Registers for clinical trials (such as the WHO registry and the https://clinicaltrials.gov/ ) were searched for ongoing, recruiting, or closed trials not yet published.

Results

Antibiotic treatment can be avoided in simple, non-complicated diverticulitis and outpatient management is safe. The management of complicated disease, ranging from a localized abscess to perforation with diffuse peritonitis, has changed towards either percutaneous or minimally invasive approaches in selected cases. The role of laparoscopic lavage without resection in perforated non-fecal diverticulitis is still debated; however, recent evidence from two randomised controlled trials has found a higher re-intervention in this group of patients.

Conclusions

A shift in management has occurred towards conservative management in acute uncomplicated disease. Those with uncomplicated acute diverticulitis may be treated without antibiotics. For complicated diverticulitis with purulent peritonitis, the use of peritoneal lavage appears to be non-superior to resection.",2016-10-01 +27635958,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline on Preoperative Imaging Assessment of Patients With Suspected Nonfunctioning Pituitary Adenomas.,"

Background

The authors reviewed published articles pertaining to the preoperative imaging evaluation of nonfunctioning pituitary adenomas (NFPAs) and formulated recommendations.

Objective

To provide an exhaustive review of published articles pertaining to the preoperative imaging evaluation of nonfunctioning pituitary adenomas.

Methods

The MEDLINE database was queried for studies investigating imaging for the preoperative evaluation of pituitary adenomas.

Results

From an initial search of 5598 articles, 122 articles were evaluated in detail and included in this article. Based on analysis of these articles, the recommendations are as follows: (1) High-resolution magnetic resonance imaging (level II) is recommended as the standard for preoperative assessment of nonfunctioning pituitary adenomas, but may be supplemented with CT (level III) and fluoroscopy (level III). (2) Although there are promising results suggesting the utility of magnetic resonance spectroscopy, magnetic resonance perfusion, positron emission tomography, and single-photon emission computed tomography, there is insufficient evidence to make formal recommendations pertaining to their clinical applications.

Conclusion

The authors identified 122 articles that form the basis of recommendations for preoperative imaging evaluation of nonfunctioning pituitary adenomas. The full guidelines document for this chapter can be located at https://www.cns.org/guidelines/guidelines-management-patients-non-functioning-pituitary-adenomas/Chapter_2.

Abbreviations

CT, computed tomographyDWI, diffusion-weighted imagingMRI, magnetic resonance imagingNFPA, nonfunctioning pituitary adenoma.",2016-10-01 +26876813,What do we know about homocysteine and exercise? A review from the literature.,"High total homocysteine (tHcy) concentrations contribute to an increased risk of cardiovascular diseases and neurodegenerative disorders. Several investigations have focused on the effect of exercise on tHcy concentrations, but results remain controversial. The differences among the methodologies in the investigations make difficult the interpretation of results. This review differentiates the effects of exercise on tHcy and establishes the relation with the implicated biomarkers on tHcy metabolism related to exercise. The electronic database MEDLINE (http://www.ncbi.nlm.nih.gov) was used for searching studies published between years 2002 and 2015. 'Homocysteine', 'Training ', 'Exercise', 'Physical Activity' as well as combinations out of these terms were entered in the database. Articles were grouped in: 1) Acute effect of exercise on tHcy, 2) chronic exercise and tHcy, 3) relationship of physical activity (PA) level and cardiorespiratory fitness with tHcy, and 4) biomarkers related to tHcy and exercise. From a total of 30 articles, most of the studies analyzing the acute effect of exercise showed an increase on tHcy concentrations. Studies analyzing the chronic effect on tHcy concentrations showed contradictory results and no consensus exists probably due to the differences in the methodology, exercise interventions and participants characteristics. Low cardiorespiratory fitness seems to be associated with high tHcy; in contrast, the relation of PA levels and tHcy needs further research. Regarding biomarkers related to tHcy and exercise, some studies showed an increase of folate, vitamin B12, and creatine after acute exercise that could to be due to requirement of protein turnover and an increased metabolic demand of vitamin-B.",2016-10-01 +22140215,RAC: Repository of Antibiotic resistance Cassettes.,"Antibiotic resistance in bacteria is often due to acquisition of resistance genes associated with different mobile genetic elements. In Gram-negative bacteria, many resistance genes are found as part of small mobile genetic elements called gene cassettes, generally found integrated into larger elements called integrons. Integrons carrying antibiotic resistance gene cassettes are often associated with mobile elements and here are designated 'mobile resistance integrons' (MRIs). More than one cassette can be inserted in the same integron to create arrays that contribute to the spread of multi-resistance. In many sequences in databases such as GenBank, only the genes within cassettes, rather than whole cassettes, are annotated and the same gene/cassette may be given different names in different entries, hampering analysis. We have developed the Repository of Antibiotic resistance Cassettes (RAC) website to provide an archive of gene cassettes that includes alternative gene names from multiple nomenclature systems and allows the community to contribute new cassettes. RAC also offers an additional function that allows users to submit sequences containing cassettes or arrays for annotation using the automatic annotation system Attacca. Attacca recognizes features (gene cassettes, integron regions) and identifies cassette arrays as patterns of features and can also distinguish minor cassette variants that may encode different resistance phenotypes (aacA4 cassettes and bla cassettes-encoding β-lactamases). Gaps in annotations are manually reviewed and those found to correspond to novel cassettes are assigned unique names. While there are other websites dedicated to integrons or antibiotic resistance genes, none includes a complete list of antibiotic resistance gene cassettes in MRI or offers consistent annotation and appropriate naming of all of these cassettes in submitted sequences. RAC thus provides a unique resource for researchers, which should reduce confusion and improve the quality of annotations of gene cassettes in integrons associated with antibiotic resistance. DATABASE URL: http://www2.chi.unsw.edu.au/rac.",2011-12-02 +27472655,Mortality due to Vegetation Fire-Originated PM2.5 Exposure in Europe-Assessment for the Years 2005 and 2008.,"

Background

Vegetation fires can release substantial quantities of fine particles (PM2.5), which are harmful to health. The fire smoke may be transported over long distances and can cause adverse health effects over wide areas.

Objective

We aimed to assess annual mortality attributable to short-term exposures to vegetation fire-originated PM2.5 in different regions of Europe.

Methods

PM2.5 emissions from vegetation fires in Europe in 2005 and 2008 were evaluated based on Moderate Resolution Imaging Spectroradiometer (MODIS) satellite data on fire radiative power. Atmospheric transport of the emissions was modeled using the System for Integrated modeLling of Atmospheric coMposition (SILAM) chemical transport model. Mortality impacts were estimated for 27 European countries based on a) modeled daily PM2.5 concentrations and b) population data, both presented in a 50 × 50 km2 spatial grid; c) an exposure-response function for short-term PM2.5 exposure and daily nonaccidental mortality; and d) country-level data for background mortality risk.

Results

In the 27 countries overall, an estimated 1,483 and 1,080 premature deaths were attributable to the vegetation fire-originated PM2.5 in 2005 and 2008, respectively. Estimated impacts were highest in southern and eastern Europe. However, all countries were affected by fire-originated PM2.5, and even the lower concentrations in western and northern Europe contributed substantially (~ 30%) to the overall estimate of attributable mortality.

Conclusions

Our assessment suggests that air pollution caused by PM2.5 released from vegetation fires is a notable risk factor for public health in Europe. Moreover, the risk can be expected to increase in the future as climate change proceeds. This factor should be taken into consideration when evaluating the overall health and socioeconomic impacts of these fires. Citation: Kollanus V, Prank M, Gens A, Soares J, Vira J, Kukkonen J, Sofiev M, Salonen RO, Lanki T. 2017. Mortality due to vegetation fire-originated PM2.5 exposure in Europe-assessment for the years 2005 and 2008. Environ Health Perspect 125:30-37; http://dx.doi.org/10.1289/EHP194.",2016-07-29 +23375020,FANTOM: Functional and taxonomic analysis of metagenomes.,"

Background

Interpretation of quantitative metagenomics data is important for our understanding of ecosystem functioning and assessing differences between various environmental samples. There is a need for an easy to use tool to explore the often complex metagenomics data in taxonomic and functional context.

Results

Here we introduce FANTOM, a tool that allows for exploratory and comparative analysis of metagenomics abundance data integrated with metadata information and biological databases. Importantly, FANTOM can make use of any hierarchical database and it comes supplied with NCBI taxonomic hierarchies as well as KEGG Orthology, COG, PFAM and TIGRFAM databases.

Conclusions

The software is implemented in Python, is platform independent, and is available at http://www.sysbio.se/Fantom.",2013-02-01 +24227674,MEDIPS: genome-wide differential coverage analysis of sequencing data derived from DNA enrichment experiments.,"

Motivation

DNA enrichment followed by sequencing is a versatile tool in molecular biology, with a wide variety of applications including genome-wide analysis of epigenetic marks and mechanisms. A common requirement of these diverse applications is a comparison of read coverage between experimental conditions. The amount of samples generated for such comparisons ranges from few replicates to hundreds of samples per condition for epigenome-wide association studies. Consequently, there is an urgent need for software that allows for fast and simple processing and comparison of sequencing data derived from enriched DNA.

Results

Here, we present a major update of the R/Bioconductor package MEDIPS, which allows for an arbitrary number of replicates per group and integrates sophisticated statistical methods for the detection of differential coverage between experimental conditions. Our approach can be applied to a diversity of quantitative sequencing data. In addition, our update adds novel functionality to MEDIPS, including correlation analysis between samples, and takes advantage of Bioconductor's annotation databases to facilitate annotation of specific genomic regions.

Availability and implementation

The latest version of MEDIPS is available as version 1.12.0 and part of Bioconductor 2.13. The package comes with a manual containing detailed description of its functionality and is available at http://www.bioconductor.org.",2013-11-13 +28817602,Exploring the potential of a structural alphabet-based tool for mining multiple target conformations and target flexibility insight.,"Protein flexibility is often implied in binding with different partners and is essential for protein function. The growing number of macromolecular structures in the Protein Data Bank entries and their redundancy has become a major source of structural knowledge of the protein universe. The analysis of structural variability through available redundant structures of a target, called multiple target conformations (MTC), obtained using experimental or modeling methods and under different biological conditions or different sources is one way to explore protein flexibility. This analysis is essential to improve the understanding of various mechanisms associated with protein target function and flexibility. In this study, we explored structural variability of three biological targets by analyzing different MTC sets associated with these targets. To facilitate the study of these MTC sets, we have developed an efficient tool, SA-conf, dedicated to capturing and linking the amino acid and local structure variability and analyzing the target structural variability space. The advantage of SA-conf is that it could be applied to divers sets composed of MTCs available in the PDB obtained using NMR and crystallography or homology models. This tool could also be applied to analyze MTC sets obtained by dynamics approaches. Our results showed that SA-conf tool is effective to quantify the structural variability of a MTC set and to localize the structural variable positions and regions of the target. By selecting adapted MTC subsets and comparing their variability detected by SA-conf, we highlighted different sources of target flexibility such as induced by binding partner, by mutation and intrinsic flexibility. Our results support the interest to mine available structures associated with a target using to offer valuable insight into target flexibility and interaction mechanisms. The SA-conf executable script, with a set of pre-compiled binaries are available at http://www.mti.univ-paris-diderot.fr/recherche/plateformes/logiciels.",2017-08-17 +25505091,JEPEG: a summary statistics based tool for gene-level joint testing of functional variants.,"

Motivation

Gene expression is influenced by variants commonly known as expression quantitative trait loci (eQTL). On the basis of this fact, researchers proposed to use eQTL/functional information univariately for prioritizing single nucleotide polymorphisms (SNPs) signals from genome-wide association studies (GWAS). However, most genes are influenced by multiple eQTLs which, thus, jointly affect any downstream phenotype. Therefore, when compared with the univariate prioritization approach, a joint modeling of eQTL action on phenotypes has the potential to substantially increase signal detection power. Nonetheless, a joint eQTL analysis is impeded by (i) not measuring all eQTLs in a gene and/or (ii) lack of access to individual genotypes.

Results

We propose joint effect on phenotype of eQTL/functional SNPs associated with a gene (JEPEG), a novel software tool which uses only GWAS summary statistics to (i) impute the summary statistics at unmeasured eQTLs and (ii) test for the joint effect of all measured and imputed eQTLs in a gene. We illustrate the behavior/performance of the developed tool by analysing the GWAS meta-analysis summary statistics from the Psychiatric Genomics Consortium Stage 1 and the Genetic Consortium for Anorexia Nervosa.

Conclusions

Applied analyses results suggest that JEPEG complements commonly used univariate GWAS tools by: (i) increasing signal detection power via uncovering (a) novel genes or (b) known associated genes in smaller cohorts and (ii) assisting in fine-mapping of challenging regions, e.g. major histocompatibility complex for schizophrenia.

Availability and implementation

JEPEG, its associated database of eQTL SNPs and usage examples are publicly available at http://code.google.com/p/jepeg/.

Contact

dlee4@vcu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-12 +26679168,Mineotaur: a tool for high-content microscopy screen sharing and visual analytics.,"High-throughput/high-content microscopy-based screens are powerful tools for functional genomics, yielding intracellular information down to the level of single-cells for thousands of genotypic conditions. However, accessing their data requires specialized knowledge and most often that data is no longer analyzed after initial publication. We describe Mineotaur ( http://www.mineotaur.org ), a open-source, downloadable web application that allows easy online sharing and interactive visualisation of large screen datasets, facilitating their dissemination and further analysis, and enhancing their impact.",2015-12-17 +25022451,Systematic review of the cost effectiveness of radiation therapy for prostate cancer from 2003 to 2013.,"

Background

Prostate cancer remains a prevalent diagnosis with a spectrum of treatment choices that offer similar oncologic outcomes but differing side effect profiles and associated costs. As the technology for prostate radiation therapy has advanced, its associated costs have escalated, thus making cost-effectiveness analyses critical to assess the value of competing treatment options, including watchful waiting, surgery, brachytherapy, intensity-modulated radiation therapy (IMRT), 3D-conformal radiation therapy (3D-CRT), proton beam therapy (PBT), and stereotactic body radiation therapy (SBRT).

Objective

The aim of this systematic review was to identify articles that performed a cost-effectiveness analysis on different radiation treatment options for localized prostate cancer, summarize their findings, and highlight the main drivers of cost effectiveness.

Methods

A literature search was performed on two databases, PubMed and the Cost-Effectiveness Analysis Registry ( https://research.tufts-nemc.org/cear4 ), using search terms that included 'prostate', 'cost effectiveness prostate radiation' and 'cost analysis comparative effectiveness prostate radiation'. Studies were included in this review if the cost data were from 2002 or later, and outcomes reported both cost and effectiveness, preferably including a cost-utility analysis with the outcome of an incremental cost-effectiveness ratio with quality-adjusted life-year (QALY) as the effectiveness measure.

Results

There were 14 articles between 2003 and 2013 that discussed cost effectiveness of prostate radiotherapy in men over the age of 65. All but four of the papers were from the US; the others were from Canada and the UK. The majority of the papers used Markov decision analysis and estimated cost from a payer's perspective, usually from Medicare reimbursement data. Assumptions for the model and utilities to calculate QALYs were estimated using published literature at the time of the analysis. Each analysis had a sensitivity analysis to compensate for the uncertainty of the model inputs. The main drivers of cost effectiveness were the cost of the radiation treatment and the differential QALYs accrued because of different treatment-related morbidities. Brachytherapy was consistently found to be more cost effective when compared with surgery and other radiation treatment options. IMRT was cost effective when compared with 3D-CRT. PBT was not found to be cost effective in any of the analyses, mostly due to the high costs of PBT. SBRT was the newest technology that was analyzed, and it was also found to be cost effective compared with IMRT and PBT.

Conclusions

Cost-effectiveness research of prostate radiation treatments allows patients, providers, and payers to better understand the true value of each treatment choice. Due to the variation in each of these analyses (e.g., costing, and disease and complication assumptions, etc.), it is difficult to generalize the results. One must be careful in drawing conclusions from these studies and extrapolating to individual patients, particularly with the clear utility dependence seen in the majority of these studies.",2014-08-01 +27398021,TMAinspiration: Decode Interdependencies in Multifactorial Tissue Microarray Data.,"There are no satisfying tools in tissue microarray (TMA) data analysis up to now to analyze the cooperative behavior of all measured markers in a multifactorial TMA approach. The developed tool TMAinspiration is not only offering an analysis option to close this gap but also offering an ecosystem consisting of quality control concepts and supporting scripts to make this approach a platform for informed practice and further research. The TMAinspiration method is specifically focusing on the demands of the TMA analysis by controlling errors and noise by a generalized regression scheme while at the same time avoiding to introduce a priori too many constraints into the analysis of the data. So, we are testing partitions of a proximity table to find an optimal support for a ranking scheme of molecular dependencies. The idea of combining several partitions to one ensemble, which is balancing the optimization process, is based on the main assumption that all these perspectives on the cellular network need to be self-consistent. Several application examples in breast cancer and one in squamous cell carcinoma demonstrate that this procedure is nicely confirming a priori knowledge on the expression characteristics of protein markers, while also integrating many new results discovered in the treasury of a bigger TMA experiment. The code and software are now freely available at: http://complex-systems.uni-muenster.de/tma_inspiration.html.",2016-06-29 +22908213,The UCSC genome browser and associated tools.,"The UCSC Genome Browser (http://genome.ucsc.edu) is a graphical viewer for genomic data now in its 13th year. Since the early days of the Human Genome Project, it has presented an integrated view of genomic data of many kinds. Now home to assemblies for 58 organisms, the Browser presents visualization of annotations mapped to genomic coordinates. The ability to juxtapose annotations of many types facilitates inquiry-driven data mining. Gene predictions, mRNA alignments, epigenomic data from the ENCODE project, conservation scores from vertebrate whole-genome alignments and variation data may be viewed at any scale from a single base to an entire chromosome. The Browser also includes many other widely used tools, including BLAT, which is useful for alignments from high-throughput sequencing experiments. Private data uploaded as Custom Tracks and Data Hubs in many formats may be displayed alongside the rich compendium of precomputed data in the UCSC database. The Table Browser is a full-featured graphical interface, which allows querying, filtering and intersection of data tables. The Saved Session feature allows users to store and share customized views, enhancing the utility of the system for organizing multiple trains of thought. Binary Alignment/Map (BAM), Variant Call Format and the Personal Genome Single Nucleotide Polymorphisms (SNPs) data formats are useful for visualizing a large sequencing experiment (whole-genome or whole-exome), where the differences between the data set and the reference assembly may be displayed graphically. Support for high-throughput sequencing extends to compact, indexed data formats, such as BAM, bigBed and bigWig, allowing rapid visualization of large datasets from RNA-seq and ChIP-seq experiments via local hosting.",2012-08-20 +27693279,[Ischemia-reperfusion. Preservation solution and hypothermic machine perfusion].,"

Aims

To describe ischemia-reperfusion mechanisms, the impact on kidney graft and strategies developed to minimize ischemia-reperfusion damages.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords: ischemia-reperfusion; organ preservation; hypothermic machine perfusion; renal transplantation. Publications obtained were selected based on methodology, language, date of publication and relevance. Prospective and retrospective studies, in English or French, review articles; meta-analysis and guidelines were selected and analyzed. This search found 1293 articles. After reading titles and abstracts, 88 were included in the text, based on their relevance.

Results

Ischemia-reperfusion injuries occur when blood supply of an organ is interrupted or drastically reduced. Ischemic damages started immediately after arterial clamping in donor, persist during cold ischemia time, and are increased after reperfusion because of increased oxygen levels, organ warming and recipient cell infiltration. Besides metabolic and biologic impact, IR induced dramatic immunologic impact through immunologic cells activation.

Conclusions

Knowledge of IR mechanisms is crucial to improve organ storage strategies and to decreased impact of IR on long-term graft and patient survival. Hypothermic machine perfusion was associated with prolonged graft survival versus cold storage. Principles and results of hypothermic machine perfusion will be reported.",2016-09-29 +21554709,PeakRanger: a cloud-enabled peak caller for ChIP-seq data.,"

Background

Chromatin immunoprecipitation (ChIP), coupled with massively parallel short-read sequencing (seq) is used to probe chromatin dynamics. Although there are many algorithms to call peaks from ChIP-seq datasets, most are tuned either to handle punctate sites, such as transcriptional factor binding sites, or broad regions, such as histone modification marks; few can do both. Other algorithms are limited in their configurability, performance on large data sets, and ability to distinguish closely-spaced peaks.

Results

In this paper, we introduce PeakRanger, a peak caller software package that works equally well on punctate and broad sites, can resolve closely-spaced peaks, has excellent performance, and is easily customized. In addition, PeakRanger can be run in a parallel cloud computing environment to obtain extremely high performance on very large data sets. We present a series of benchmarks to evaluate PeakRanger against 10 other peak callers, and demonstrate the performance of PeakRanger on both real and synthetic data sets. We also present real world usages of PeakRanger, including peak-calling in the modENCODE project.

Conclusions

Compared to other peak callers tested, PeakRanger offers improved resolution in distinguishing extremely closely-spaced peaks. PeakRanger has above-average spatial accuracy in terms of identifying the precise location of binding events. PeakRanger also has excellent sensitivity and specificity in all benchmarks evaluated. In addition, PeakRanger offers significant improvements in run time when running on a single processor system, and very marked improvements when allowed to take advantage of the MapReduce parallel environment offered by a cloud computing resource. PeakRanger can be downloaded at the official site of modENCODE project: http://www.modencode.org/software/ranger/",2011-05-09 +27693278,[Pediatric kidney transplantation].,"

Aims

To describe indications, surgical aspects, results and outcomes of kidney transplantation in children.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords (MESH): ""kidney transplantation"", ""pediatric"", ""children"", ""outcomes"". Publications obtained were selected based on methodology, language, date of publication (last 10 years) and relevance. Prospective and retrospective studies, in English or French, review articles; meta-analysis and guidelines were selected and analyzed. This search found 2608 articles. After reading titles and abstracts, 18 were included in the text based on their relevance.

Results

Kidney transplantation is the gold-standard treatment for end stage renal kidney disease in children. The surgical procedure is well standardized with a retroperitoneal approach when child and kidney size allow it or a transperitoneal approach in child less than 15 kg and big size kidney graft. Anastomosis sites include iliac vessels in the retroperitoneal approach, and inferior vena cava and aorta in case of transperitoneal procedure. Ureteral reimplantation used most of the time a Campos Freire technique. Sometimes, particular conditions in the recipient (such as vena cava thrombosis) required procedure adaptation.

Conclusion

Graft survival dramatically increased over the past few years and is now superior to those observed in adult kidney transplantation, particularly in experienced team with microsurgery skills. Immunosuppressive treatments are similar to adults. Viral infections and post-transplant lymphoproliferative disorder are the main complications of renal transplantation in children and may lead to lethal outcomes. An increase graft loss is observed during boyhood due to immunosuppressive drugs uncompliance.",2016-09-29 +28725476,Food allergen detection by mass spectrometry: the role of systems biology.,"Food allergy prevalence is rising worldwide, motivating the development of assays that can sensitively and reliably detect trace amounts of allergens in manufactured food. Mass spectrometry (MS) is a promising alternative to commonly employed antibody-based assays owing to its ability to quantify multiple proteins in complex matrices with high sensitivity. In this review, we discuss a targeted MS workflow for the quantitation of allergenic protein in food products that employs selected reaction monitoring (SRM). We highlight the aspects of SRM method development unique to allergen quantitation and identify opportunities for simplifying the process. One promising avenue identified through a comprehensive survey of published MS literature is the use of proteotypic peptides, which are peptides whose presence appears robust to variations in food matrix, sample preparation protocol, and MS instrumentation. We conclude that proteotypic peptides exist for a subset of allergenic milk, egg, and peanut proteins. For less studied allergens such as soy, wheat, fish, shellfish, and tree nuts, we offer guidance and tools for peptide selection and specificity verification as part of an interactive web database, the Allergen Peptide Browser (http://www.AllergenPeptideBrowser.org). With ongoing improvements in MS instrumentation, analysis software, and strategies for targeted quantitation, we expect an increasing role of MS as an analytical tool for ensuring regulatory compliance.",2016-09-29 +24285305,NONCODEv4: exploring the world of long non-coding RNA genes.,"NONCODE (http://www.bioinfo.org/noncode/) is an integrated knowledge database dedicated to non-coding RNAs (excluding tRNAs and rRNAs). Non-coding RNAs (ncRNAs) have been implied in diseases and identified to play important roles in various biological processes. Since NONCODE version 3.0 was released 2 years ago, discovery of novel ncRNAs has been promoted by high-throughput RNA sequencing (RNA-Seq). In this update of NONCODE, we expand the ncRNA data set by collection of newly identified ncRNAs from literature published in the last 2 years and integration of the latest version of RefSeq and Ensembl. Particularly, the number of long non-coding RNA (lncRNA) has increased sharply from 73 327 to 210 831. Owing to similar alternative splicing pattern to mRNAs, the concept of lncRNA genes was put forward to help systematic understanding of lncRNAs. The 56 018 and 46 475 lncRNA genes were generated from 95 135 and 67 628 lncRNAs for human and mouse, respectively. Additionally, we present expression profile of lncRNA genes by graphs based on public RNA-seq data for human and mouse, as well as predict functions of these lncRNA genes. The improvements brought to the database also include an incorporation of an ID conversion tool from RefSeq or Ensembl ID to NONCODE ID and a service of lncRNA identification. NONCODE is also accessible through http://www.noncode.org/.",2013-11-26 +23956306,Incorporating prior knowledge into Gene Network Study.,"

Motivation

A major goal in genomic research is to identify genes that may jointly influence a biological response. From many years of intensive biomedical research, a large body of biological knowledge, or pathway information, has accumulated in available databases. There is a strong interest in leveraging these pathways to improve the statistical power and interpretability in studying gene networks associated with complex phenotypes. This prior information is a valuable complement to large-scale genomic data such as gene expression data generated from microarrays. However, it is a non-trivial task to effectively integrate available biological knowledge into gene expression data when reconstructing gene networks.

Results

In this article, we developed and applied a Lasso method from a Bayesian perspective, a method we call prior Lasso (pLasso), for the reconstruction of gene networks. In this method, we partition edges between genes into two subsets: one subset of edges is present in known pathways, whereas the other has no prior information associated. Our method assigns different prior distributions to each subset according to a modified Bayesian information criterion that incorporates prior knowledge on both the network structure and the pathway information. Simulation studies have indicated that the method is more effective in recovering the underlying network than a traditional Lasso method that does not use the prior information. We applied pLasso to microarray gene expression datasets, where we used information from the Pathway Commons (PC) and the Kyoto Encyclopedia of Genes and Genomes (KEGG) as prior information for the network reconstruction, and successfully identified network hub genes associated with clinical outcome in cancer patients.

Availability

The source code is available at http://nba.uth.tmc.edu/homepage/liu/pLasso.",2013-08-16 +27153665,DOGMA: domain-based transcriptome and proteome quality assessment.,"

Motivation

Genome studies have become cheaper and easier than ever before, due to the decreased costs of high-throughput sequencing and the free availability of analysis software. However, the quality of genome or transcriptome assemblies can vary a lot. Therefore, quality assessment of assemblies and annotations are crucial aspects of genome analysis pipelines.

Results

We developed DOGMA, a program for fast and easy quality assessment of transcriptome and proteome data based on conserved protein domains. DOGMA measures the completeness of a given transcriptome or proteome and provides information about domain content for further analysis. DOGMA provides a very fast way to do quality assessment within seconds.

Availability and implementation

DOGMA is implemented in Python and published under GNU GPL v.3 license. The source code is available on https://ebbgit.uni-muenster.de/domainWorld/DOGMA/ CONTACTS: e.dohmen@wwu.de or c.kemena@wwu.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-05 +24471365,Three-dimensional adult male head and skull contours.,"

Objective

Traumatic brain injury (TBI) is a major public health issue, affecting millions of people annually. Anthropomorphic test devices (ATDs) and finite element models (FEMs) provide a means of understanding factors leading to TBI, potentially reducing the occurrence. Thus, there is a need to ensure that these tools accurately model humans. For example, the Hybrid III was not based on 3-dimensional human head shape data. The objective of this study is to produce average head and skull contours for an average U.S. male that can be used for ATDs and FEMs.

Methods

Computed tomography (CT) scans of adult male heads were obtained from a database provided by the University of Virginia Center for Applied Biomechanics. An orthographic viewer was used to extract head and skull contours from the CT scans. Landmarks were measured graphically using HyperMesh (Altair, HyperWorks). To determine the head occipital condyle (OC) centroid, surface meshes of the OCs were made and the centroid of the surfaces was calculated. The Hybrid III contour was obtained using a MicroScribe Digitizer (Solution Technologies, Inc., Oella, MD). Comparisons of the average male and ATD contours were performed using 2 methods: (1) the midsagittal and midcoronal ATD contours relative to the OC centroid were compared to the corresponding 1 SD range of the average male contours; (2) the ATD sagittal contour was translated relative to the average male sagittal contour to minimize the area between the 2 contours.

Results

Average male head and skull contours were created. Landmark measurements were made for the dorsum sellae, nasion skin, nasion bone, infraorbital foramen, and external auditory meatus, all relative to the OC centroid. The Hybrid III midsagittal contour was outside the 1 SD range for 15.2 percent of the average male head contour but only by a maximum distance of 1.5 mm, whereas the Hybrid III midcoronal head contour was outside the 1 SD range for 12.2 percent of the average male head contour by a maximum distance of 2 mm. Minimization of the area between the midsagittal contours resulted in only 2.3 mm of translation, corroborating the good correlation between the contours established by initial comparison.

Conclusions

Three-dimensional average male head and skull contours were created and measurements of landmark locations were made. It was found that the 50th percentile male Hybrid III corresponds well to the average male head contour and validated its 3D shape. Average adult head and skull contours and landmark data are available for public research use at http://biomechanics.pratt.duke.edu/data .",2014-01-01 +26424856,smallWig: parallel compression of RNA-seq WIG files.,"

Contributions

We developed a new lossless compression method for WIG data, named smallWig, offering the best known compression rates for RNA-seq data and featuring random access functionalities that enable visualization, summary statistics analysis and fast queries from the compressed files. Our approach results in order of magnitude improvements compared with bigWig and ensures compression rates only a fraction of those produced by cWig. The key features of the smallWig algorithm are statistical data analysis and a combination of source coding methods that ensure high flexibility and make the algorithm suitable for different applications. Furthermore, for general-purpose file compression, the compression rate of smallWig approaches the empirical entropy of the tested WIG data. For compression with random query features, smallWig uses a simple block-based compression scheme that introduces only a minor overhead in the compression rate. For archival or storage space-sensitive applications, the method relies on context mixing techniques that lead to further improvements of the compression rate. Implementations of smallWig can be executed in parallel on different sets of chromosomes using multiple processors, thereby enabling desirable scaling for future transcriptome Big Data platforms.

Motivation

The development of next-generation sequencing technologies has led to a dramatic decrease in the cost of DNA/RNA sequencing and expression profiling. RNA-seq has emerged as an important and inexpensive technology that provides information about whole transcriptomes of various species and organisms, as well as different organs and cellular communities. The vast volume of data generated by RNA-seq experiments has significantly increased data storage costs and communication bandwidth requirements. Current compression tools for RNA-seq data such as bigWig and cWig either use general-purpose compressors (gzip) or suboptimal compression schemes that leave significant room for improvement. To substantiate this claim, we performed a statistical analysis of expression data in different transform domains and developed accompanying entropy coding methods that bridge the gap between theoretical and practical WIG file compression rates.

Results

We tested different variants of the smallWig compression algorithm on a number of integer-and real- (floating point) valued RNA-seq WIG files generated by the ENCODE project. The results reveal that, on average, smallWig offers 18-fold compression rate improvements, up to 2.5-fold compression time improvements, and 1.5-fold decompression time improvements when compared with bigWig. On the tested files, the memory usage of the algorithm never exceeded 90 KB. When more elaborate context mixing compressors were used within smallWig, the obtained compression rates were as much as 23 times better than those of bigWig. For smallWig used in the random query mode, which also supports retrieval of the summary statistics, an overhead in the compression rate of roughly 3-17% was introduced depending on the chosen system parameters. An increase in encoding and decoding time of 30% and 55% represents an additional performance loss caused by enabling random data access. We also implemented smallWig using multi-processor programming. This parallelization feature decreases the encoding delay 2-3.4 times compared with that of a single-processor implementation, with the number of processors used ranging from 2 to 8; in the same parameter regime, the decoding delay decreased 2-5.2 times.

Availability and implementation

The smallWig software can be downloaded from: http://stanford.edu/~zhiyingw/smallWig/smallwig.html, http://publish.illinois.edu/milenkovic/, http://web.stanford.edu/~tsachy/.

Contact

zhiyingw@stanford.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-30 +28515314,"The mzIdentML Data Standard Version 1.2, Supporting Advances in Proteome Informatics.","The first stable version of the Proteomics Standards Initiative mzIdentML open data standard (version 1.1) was published in 2012-capturing the outputs of peptide and protein identification software. In the intervening years, the standard has become well-supported in both commercial and open software, as well as a submission and download format for public repositories. Here we report a new release of mzIdentML (version 1.2) that is required to keep pace with emerging practice in proteome informatics. New features have been added to support: (1) scores associated with localization of modifications on peptides; (2) statistics performed at the level of peptides; (3) identification of cross-linked peptides; and (4) support for proteogenomics approaches. In addition, there is now improved support for the encoding of de novo sequencing of peptides, spectral library searches, and protein inference. As a key point, the underlying XML schema has only undergone very minor modifications to simplify as much as possible the transition from version 1.1 to version 1.2 for implementers, but there have been several notable updates to the format specification, implementation guidelines, controlled vocabularies and validation software. mzIdentML 1.2 can be described as backwards compatible, in that reading software designed for mzIdentML 1.1 should function in most cases without adaptation. We anticipate that these developments will provide a continued stable base for software teams working to implement the standard. All the related documentation is accessible at http://www.psidev.info/mzidentml.",2017-05-17 +24269041,Convergence of decreasing male and increasing female incidence rates in major tobacco-related cancers in Europe in 1988-2010.,"

Introduction

Smoking prevalence has been declining in men all over Europe, while the trend varies in European regions among women. To study the impact of past smoking prevalence, we present a comprehensive overview of the most recent trends in incidence, during 1988-2010, in 26 countries, of four of the major cancers in the respiratory and upper gastro-intestinal tract associated with tobacco smoking.

Methods

Data from 47 population-based cancer registries for lung, laryngeal, oral cavity and pharyngeal, and oesophageal cancer cases were obtained from the newly developed data repository within the European Cancer Observatory (http://eco.iarc.fr/). Truncated age-standardised incidence rates (35-74 years) by calendar year, average annual percentage change in incidence over 1998-2007 were calculated. Smoking prevalence in selected countries was extracted from the Organisation for Economic Co-operation and Development and the World Health Organization databases.

Results

There remained great but changing variation in the incidence rates of tobacco-related cancers by European region. Generally, the high rates among men have been declining, while the lower rates among women are increasing, resulting in convergence of the rates. Female lung cancer rates were above male rates in Denmark, Iceland and Sweden (35-64 years). In lung and laryngeal cancers, where smoking is the main risk factor, rates were highest in central and eastern Europe, southern Europe and the Baltic countries. Despite a lowering of female smoking prevalence, female incidence rates of lung, laryngeal and oral cavity cancers increased in most parts of Europe, but were stable in the Baltic countries. Mixed trends emerged in oesophageal cancer, probably explained by differing risk factors for the two main histological subtypes.

Conclusions

This data repository offers the opportunity to show the variety of incidence trends by sex among European countries. The diverse patterns of trends reflect varied exposure to risk factors. Given the heavy cancer burden attributed to tobacco and the fact that tobacco use is entirely preventable, tobacco control remains a top priority in Europe. Prevention efforts should be intensified in central and eastern Europe, southern Europe and the Baltic countries.",2013-11-20 +29135438,Exposure to Perfluoroalkyl Substances and Metabolic Outcomes in Pregnant Women: Evidence from the Spanish INMA Birth Cohorts.,"

Background

Exposure to perfluoroalkyl substances (PFASs) may increase risk for metabolic diseases; however, epidemiologic evidence is lacking at the present time. Pregnancy is a period of enhanced tissue plasticity for the fetus and the mother and may be a critical window of PFAS exposure susceptibility.

Objective

We evaluated the associations between PFAS exposures and metabolic outcomes in pregnant women.

Methods

We analyzed 1,240 pregnant women from the Spanish INMA [Environment and Childhood Project (INfancia y Medio Ambiente)] birth cohort study (recruitment period: 2003-2008) with measured first pregnancy trimester plasma concentrations of four PFASs (in nanograms/milliliter). We used logistic regression models to estimate associations of PFASs (log10-transformed and categorized into quartiles) with impaired glucose tolerance (IGT) and gestational diabetes mellitus (GDM), and we used linear regression models to estimate associations with first-trimester serum levels of triglycerides, total cholesterol, and C-reactive protein (CRP).

Results

Perfluorooctane sulfonate (PFOS) and perfluorohexane sulfonate (PFHxS) were positively associated with IGT (137 cases) [OR per log10-unit increase=1.99 (95% CI: 1.06, 3.78) and OR=1.65 ( 95% CI: 0.99, 2.76), respectively]. PFOS and PFHxS associations with GDM (53 cases) were in a similar direction, but less precise. PFOS and perfluorononanoate (PFNA) were negatively associated with triglyceride levels [percent median change per log10-unit increase=-5.86% (95% CI: -9.91%, -1.63%) and percent median change per log10-unit increase=-4.75% (95% CI: -8.16%, -0.61%, respectively], whereas perfluorooctanoate (PFOA) was positively associated with total cholesterol [percent median change per log10-unit increase=1.26% (95% CI: 0.01%, 2.54%)]. PFASs were not associated with CRP in the subset of the population with available data (n=640).

Conclusions

Although further confirmation is required, the findings from this study suggest that PFAS exposures during pregnancy may influence lipid metabolism and glucose tolerance and thus may impact the health of the mother and her child. https://doi.org/10.1289/EHP1062.",2017-11-13 +25037307,RTeQTL: Real-Time Online Engine for Expression Quantitative Trait Loci Analyses. ,"Our database tool, called Real-Time Engine for Expression Quantitative Trait Loci Analyses (RTeQTL), can efficiently provide eQTL association results that are not available in existing eQTL databases browsers. These functions include (i) single SNP (single-nucleotide polymorphism) and (ii) two-SNP conditional eQTL effects on gene expression regardless of the magnitude of P-values. The database is based on lymphoblastoid cell lines from >900 samples with global gene expression and genome-wide genotyped and imputed SNP data. The detailed result for any pairs of gene and SNPs can be efficiently computed and browsed online, as well as downloaded in batch mode. This is the only tool that can assess the independent effect of a disease- or trait-associated SNP on gene expression conditioning on other SNPs of interest, such as the top eQTL of the same gene. It is also useful to identify eQTLs for candidate genes, which are often missed in existing eQTL browsers, which only store results with genome-wide significant P-value. Additional analyses stratifying by gender can also be easily achieved by this tool. Database URL: http://eqtl.rc.fas.harvard.edu/.",2014-07-18 +27670643,HydDB: A web tool for hydrogenase classification and analysis.,"H2 metabolism is proposed to be the most ancient and diverse mechanism of energy-conservation. The metalloenzymes mediating this metabolism, hydrogenases, are encoded by over 60 microbial phyla and are present in all major ecosystems. We developed a classification system and web tool, HydDB, for the structural and functional analysis of these enzymes. We show that hydrogenase function can be predicted by primary sequence alone using an expanded classification scheme (comprising 29 [NiFe], 8 [FeFe], and 1 [Fe] hydrogenase classes) that defines 11 new classes with distinct biological functions. Using this scheme, we built a web tool that rapidly and reliably classifies hydrogenase primary sequences using a combination of k-nearest neighbors' algorithms and CDD referencing. Demonstrating its capacity, the tool reliably predicted hydrogenase content and function in 12 newly-sequenced bacteria, archaea, and eukaryotes. HydDB provides the capacity to browse the amino acid sequences of 3248 annotated hydrogenase catalytic subunits and also contains a detailed repository of physiological, biochemical, and structural information about the 38 hydrogenase classes defined here. The database and classifier are freely and publicly available at http://services.birc.au.dk/hyddb/.",2016-09-27 +27670777,FAMSA: Fast and accurate multiple sequence alignment of huge protein families.,"Rapid development of modern sequencing platforms has contributed to the unprecedented growth of protein families databases. The abundance of sets containing hundreds of thousands of sequences is a formidable challenge for multiple sequence alignment algorithms. The article introduces FAMSA, a new progressive algorithm designed for fast and accurate alignment of thousands of protein sequences. Its features include the utilization of the longest common subsequence measure for determining pairwise similarities, a novel method of evaluating gap costs, and a new iterative refinement scheme. What matters is that its implementation is highly optimized and parallelized to make the most of modern computer platforms. Thanks to the above, quality indicators, i.e. sum-of-pairs and total-column scores, show FAMSA to be superior to competing algorithms, such as Clustal Omega or MAFFT for datasets exceeding a few thousand sequences. Quality does not compromise on time or memory requirements, which are an order of magnitude lower than those in the existing solutions. For example, a family of 415519 sequences was analyzed in less than two hours and required no more than 8 GB of RAM. FAMSA is available for free at http://sun.aei.polsl.pl/REFRESH/famsa.",2016-09-27 +27587701,Bayesian parameter estimation for the Wnt pathway: an infinite mixture models approach.,"

Motivation

Likelihood-free methods, like Approximate Bayesian Computation (ABC), have been extensively used in model-based statistical inference with intractable likelihood functions. When combined with Sequential Monte Carlo (SMC) algorithms they constitute a powerful approach for parameter estimation and model selection of mathematical models of complex biological systems. A crucial step in the ABC-SMC algorithms, significantly affecting their performance, is the propagation of a set of parameter vectors through a sequence of intermediate distributions using Markov kernels.

Results

In this article, we employ Dirichlet process mixtures (DPMs) to design optimal transition kernels and we present an ABC-SMC algorithm with DPM kernels. We illustrate the use of the proposed methodology using real data for the canonical Wnt signaling pathway. A multi-compartment model of the pathway is developed and it is compared to an existing model. The results indicate that DPMs are more efficient in the exploration of the parameter space and can significantly improve ABC-SMC performance. In comparison to alternative sampling schemes that are commonly used, the proposed approach can bring potential benefits in the estimation of complex multimodal distributions. The method is used to estimate the parameters and the initial state of two models of the Wnt pathway and it is shown that the multi-compartment model fits better the experimental data.

Availability and implementation

Python scripts for the Dirichlet Process Gaussian Mixture model and the Gibbs sampler are available at https://sites.google.com/site/kkoutroumpas/software

Contact

konstantinos.koutroumpas@ecp.fr.",2016-09-01 +26978842,Prioritizing Environmental Chemicals for Obesity and Diabetes Outcomes Research: A Screening Approach Using ToxCast™ High-Throughput Data.,"

Background

Diabetes and obesity are major threats to public health in the United States and abroad. Understanding the role that chemicals in our environment play in the development of these conditions is an emerging issue in environmental health, although identifying and prioritizing chemicals for testing beyond those already implicated in the literature is challenging. This review is intended to help researchers generate hypotheses about chemicals that may contribute to diabetes and to obesity-related health outcomes by summarizing relevant findings from the U.S. Environmental Protection Agency (EPA) ToxCast™ high-throughput screening (HTS) program.

Objectives

Our aim was to develop new hypotheses around environmental chemicals of potential interest for diabetes- or obesity-related outcomes using high-throughput screening data.

Methods

We identified ToxCast™ assay targets relevant to several biological processes related to diabetes and obesity (insulin sensitivity in peripheral tissue, pancreatic islet and β cell function, adipocyte differentiation, and feeding behavior) and presented chemical screening data against those assay targets to identify chemicals of potential interest.

Discussion

The results of this screening-level analysis suggest that the spectrum of environmental chemicals to consider in research related to diabetes and obesity is much broader than indicated by research papers and reviews published in the peer-reviewed literature. Testing hypotheses based on ToxCast™ data will also help assess the predictive utility of this HTS platform.

Conclusions

More research is required to put these screening-level analyses into context, but the information presented in this review should facilitate the development of new hypotheses.

Citation

Auerbach S, Filer D, Reif D, Walker V, Holloway AC, Schlezinger J, Srinivasan S, Svoboda D, Judson R, Bucher JR, Thayer KA. 2016. Prioritizing environmental chemicals for obesity and diabetes outcomes research: a screening approach using ToxCast™ high-throughput data. Environ Health Perspect 124:1141-1154; http://dx.doi.org/10.1289/ehp.1510456.",2016-03-15 +25204695,"5th National Audit Project (NAP5) on accidental awareness during general anaesthesia: protocol, methods, and analysis of data.","

Background

Accidental awareness during general anaesthesia (AAGA) with recall is a potentially distressing complication of general anaesthesia that can lead to psychological harm. The 5th National Audit Project (NAP5) was designed to investigate the reported incidence, predisposing factors, causality, and impact of accidental awareness.

Methods

A nationwide network of local co-ordinators across all the UK and Irish public hospitals reported all new patient reports of accidental awareness to a central database, using a system of monthly anonymized reporting over a calendar year. The database collected the details of the reported event, anaesthetic and surgical technique, and any sequelae. These reports were categorized into main types by a multidisciplinary panel, using a formalized process of analysis.

Results

The main categories of accidental awareness were: certain or probable; possible; during sedation; on or from the intensive care unit; could not be determined; unlikely; drug errors; and statement only. The degree of evidence to support the categorization was also defined for each report. Patient experience and sequelae were categorized using current tools or modifications of such.

Conclusions

The NAP5 methodology may be used to assess new reports of AAGA in a standardized manner, especially for the development of an ongoing database of case reporting. This paper is a shortened version describing the protocols, methods, and data analysis from NAP5--the full report can be found at http://www.nationalauditprojects.org.uk/NAP5_home.",2014-09-09 +28090381,LTRclassifier: A website for fast structural LTR retrotransposons classification in plants.,"Automatic classification of LTR retrotransposons is a big challenge in the area of massive genomics. Many tools were developed to detect them but automatic classification is somehow challenging. Here we propose a simple approach, LTRclassifier, based on HMM recognition followed by BLAST analyses (i) to classify plant LTR retrotransposons in their respective superfamily, and (ii) to provide automatically a basic functional annotation of these elements. The method was tested on various TE databases, and shown to be robust and fast. This tool is available as a web service implemented at IRD bioinformatics facility, http://LTRclassifier.ird.fr/.",2016-09-26 +27990263,"Lies, irony, and contradiction - an annotation of semantic conflict in the movie ""Forrest Gump"".","Here we extend the information on the structure of the core stimulus of the studyforrest project (http://studyforrest.org) with a description of semantic conflict in the ""Forrest Gump"" movie. Three observers annotated the movie independently regarding episodes with portrayal of lies, irony or sarcasm. We present frequency statistics, and inter-observer reliability measures that qualify and quantify semantic conflict in the stimulus. While the number of identified events is limited, this annotation nevertheless enriches the knowledge about the complex high-level structure of this stimulus, and can help to evaluate its utility for future studies, and the usability of the existing brain imaging data regarding this aspect of cognition.",2016-09-26 +27853580,MultispeQ Beta: a tool for large-scale plant phenotyping connected to the open PhotosynQ network.,"Large-scale high-throughput plant phenotyping (sometimes called phenomics) is becoming increasingly important in plant biology and agriculture and is essential to cutting-edge plant breeding and management approaches needed to meet the food and fuel needs for the next century. Currently, the application of these approaches is severely limited by the availability of appropriate instrumentation and by the ability to communicate experimental protocols, results and analyses. To address these issues, we have developed a low-cost, yet sophisticated open-source scientific instrument designed to enable communities of researchers, plant breeders, educators, farmers and citizen scientists to collect high-quality field data on a large scale. The MultispeQ provides measurements in the field or laboratory of both, environmental conditions (light intensity and quality, temperature, humidity, CO2 levels, time and location) and useful plant phenotypes, including photosynthetic parameters-photosystem II quantum yield (ΦII), non-photochemical exciton quenching (NPQ), photosystem II photoinhibition, light-driven proton translocation and thylakoid proton motive force, regulation of the chloroplast ATP synthase and potentially many others-and leaf chlorophyll and other pigments. Plant phenotype data are transmitted from the MultispeQ to mobile devices, laptops or desktop computers together with key metadata that gets saved to the PhotosynQ platform (https://photosynq.org) and provides a suite of web-based tools for sharing, visualization, filtering, dissemination and analyses. We present validation experiments, comparing MultispeQ results with established platforms, and show that it can be usefully deployed in both laboratory and field settings. We present evidence that MultispeQ can be used by communities of researchers to rapidly measure, store and analyse multiple environmental and plant properties, allowing for deeper understanding of the complex interactions between plants and their environment.",2016-10-26 +27267125,PhenoImageShare: an image annotation and query infrastructure.,"

Background

High throughput imaging is now available to many groups and it is possible to generate a large quantity of high quality images quickly. Managing this data, consistently annotating it, or making it available to the community are all challenges that come with these methods.

Results

PhenoImageShare provides an ontology-enabled lightweight image data query, annotation service and a single point of access backed by a Solr server for programmatic access to an integrated image collection enabling improved community access. PhenoImageShare also provides an easy to use online image annotation tool with functionality to draw regions of interest on images and to annotate them with terms from an autosuggest-enabled ontology-lookup widget. The provenance of each image, and annotation, is kept and links to original resources are provided. The semantic and intuitive search interface is species and imaging technology neutral. PhenoImageShare now provides access to annotation for over 100,000 images for 2 species.

Conclusion

The PhenoImageShare platform provides underlying infrastructure for both programmatic access and user-facing tools for biologists enabling the query and annotation of federated images. PhenoImageShare is accessible online at http://www.phenoimageshare.org .",2016-06-07 +21731011,The MSM program: web-based statistics package for estimating usual dietary intake using the Multiple Source Method.,"

Background/objectives

The Multiple Source Method (MSM) is a new statistical method for estimating usual dietary intake including episodically consumed foods on the basis of two or more short-term measurements such as 24-h dietary recalls. Optional information regarding habitual use or non-use of a food can be included as a covariate in the model estimating the intake, as well as a parameter for identifying consumers and non-consumers. The objective was to implement the MSM algorithms into an easy-to-use statistical program package.

Subjects/methods

The implementation was realized as a web-based application using the Perl application framework Catalyst. As the engine for the statistical calculations, the R system was used. To allow simultaneous use of the program by different users, a multiuser system with a resource bag pattern design was implemented.

Results

We established a software program that implements the algorithms of the MSM and allows interactive usage of the method, using standard web technologies. The program is hosted on a website established at the DIFE and can be accessed at https://nugo.dife.de/msm. The communication between users and the program web site is encrypted, securing transmitted data against unauthorized use. Users can interactively import several data sets, define the analysis model, review and export results and graphs. The use of the program is supported by online help and a user guide.

Conclusions

The MSM website provides a program package that allows nutritional scientists to calculate usual dietary intakes by combining short-term and long-term measurements (multiple sources). It promotes simple access to the MSM to estimate usual food intake for individuals and populations.",2011-07-01 +29187166,What is the safest mode of delivery for extremely preterm cephalic/non-cephalic twin pairs? A systematic review and meta-analyses.,"

Background

Given the controversy around mode of delivery, our objective was to assess the evidence regarding the safest mode of delivery for actively resuscitated extremely preterm cephalic/non-cephalic twin pairs before 28 weeks of gestation.

Methods

We searched Cochrane CENTRAL, MEDLINE, EMBASE and  http://clinicaltrials.gov from January 1994 to January 2017. Two reviewers independently screened titles, abstracts and full text articles, extracted data and assessed risk of bias. We included randomized controlled trials and observational studies. Our primary outcome was a composite of neonatal death (<28 days of life) and severe brain injury in survivors (intraventricular hemorrhage grade ≥ 3 or periventricular leukomalacia). We performed random-effects meta-analyses, generating odds ratios with 95% confidence intervals for the first and second twin separately, and for both twins together. We assessed the risk of bias using a modified Newcastle Ottawa Scale (NOS) for observational studies and used Grading of Recommendations Assessment, Development and Evaluation approach (GRADE).

Results

Our search generated 2695 articles, and after duplicate removal, we screened 2051 titles and abstracts, selecting 113 articles for full-text review. We contacted 36 authors, and ultimately, three observational studies met our inclusion criteria. In cephalic/non-cephalic twin pairs delivered by caesarean section compared to vaginal birth at 24+0-27+6 weeks the odds ratio for our composite outcome of neonatal death and severe brain injury for the cephalic first twin was 0.35 (95% CI 0.00-92.61, two studies, I2 = 76%), 1.69 for the non-cephalic second twin (95% CI 0.04-72.81, two studies, I2 = 55%) and 0.83 for both twins (95% CI 0.05-13.43, two studies, I2 = 56%). According to the modified Newcastle Ottawa Scale we assessed individual study quality as being at high risk of bias and according to GRADE the overall evidence for our primary outcomes was very low.

Conclusion

Our systematic review on the safest mode of delivery for extremely preterm cephalic/non-cephalic twin pairs found very limited existing evidence, without significant differences in neonatal death and severe brain injury by mode of delivery.",2017-11-29 +22216218,RNA deep sequencing reveals differential microRNA expression during development of sea urchin and sea star.,"microRNAs (miRNAs) are small (20-23 nt), non-coding single stranded RNA molecules that act as post-transcriptional regulators of mRNA gene expression. They have been implicated in regulation of developmental processes in diverse organisms. The echinoderms, Strongylocentrotus purpuratus (sea urchin) and Patiria miniata (sea star) are excellent model organisms for studying development with well-characterized transcriptional networks. However, to date, nothing is known about the role of miRNAs during development in these organisms, except that the genes that are involved in the miRNA biogenesis pathway are expressed during their developmental stages. In this paper, we used Illumina Genome Analyzer (Illumina, Inc.) to sequence small RNA libraries in mixed stage population of embryos from one to three days after fertilization of sea urchin and sea star (total of 22,670,000 reads). Analysis of these data revealed the miRNA populations in these two species. We found that 47 and 38 known miRNAs are expressed in sea urchin and sea star, respectively, during early development (32 in common). We also found 13 potentially novel miRNAs in the sea urchin embryonic library. miRNA expression is generally conserved between the two species during development, but 7 miRNAs are highly expressed in only one species. We expect that our two datasets will be a valuable resource for everyone working in the field of developmental biology and the regulatory networks that affect it. The computational pipeline to analyze Illumina reads is available at http://www.benoslab.pitt.edu/services.html.",2011-12-28 +27003708,MaxReport: An Enhanced Proteomic Result Reporting Tool for MaxQuant.,"MaxQuant is a proteomic software widely used for large-scale tandem mass spectrometry data. We have designed and developed an enhanced result reporting tool for MaxQuant, named as MaxReport. This tool can optimize the results of MaxQuant and provide additional functions for result interpretation. MaxReport can generate report tables for protein N-terminal modifications. It also supports isobaric labelling based relative quantification at the protein, peptide or site level. To obtain an overview of the results, MaxReport performs general descriptive statistical analyses for both identification and quantification results. The output results of MaxReport are well organized and therefore helpful for proteomic users to better understand and share their data. The script of MaxReport, which is freely available at http://websdoor.net/bioinfo/maxreport/, is developed using Python code and is compatible across multiple systems including Windows and Linux.",2016-03-22 +24259431,Updates on the web-based VIOLIN vaccine database and analysis system.,"The integrative Vaccine Investigation and Online Information Network (VIOLIN) vaccine research database and analysis system (http://www.violinet.org) curates, stores, analyses and integrates various vaccine-associated research data. Since its first publication in NAR in 2008, significant updates have been made. Starting from 211 vaccines annotated at the end of 2007, VIOLIN now includes over 3240 vaccines for 192 infectious diseases and eight noninfectious diseases (e.g. cancers and allergies). Under the umbrella of VIOLIN, >10 relatively independent programs are developed. For example, Protegen stores over 800 protective antigens experimentally proven valid for vaccine development. VirmugenDB annotated over 200 'virmugens', a term coined by us to represent those virulence factor genes that can be mutated to generate successful live attenuated vaccines. Specific patterns were identified from the genes collected in Protegen and VirmugenDB. VIOLIN also includes Vaxign, the first web-based vaccine candidate prediction program based on reverse vaccinology. VIOLIN collects and analyzes different vaccine components including vaccine adjuvants (Vaxjo) and DNA vaccine plasmids (DNAVaxDB). VIOLIN includes licensed human vaccines (Huvax) and veterinary vaccines (Vevax). The Vaccine Ontology is applied to standardize and integrate various data in VIOLIN. VIOLIN also hosts the Ontology of Vaccine Adverse Events (OVAE) that logically represents adverse events associated with licensed human vaccines.",2013-11-19 +29309632,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on Surgical Resection for the Treatment of Patients With Vestibular Schwannomas.,"QUESTION 1:What surgical approaches for vestibular schwannomas (VS) are best for complete resection and facial nerve (FN) preservation when serviceable hearing is present? RECOMMENDATION:There is insufficient evidence to support the superiority of either the middle fossa (MF) or the retrosigmoid (RS) approach for complete VS resection and FN preservation when serviceable hearing is present. QUESTION 2:Which surgical approach (RS or translabyrinthine [TL]) for VS is best for complete resection and FN preservation when serviceable hearing is not present? RECOMMENDATION:There is insufficient evidence to support the superiority of either the RS or the TL approach for complete VS resection and FN preservation when serviceable hearing is not present. QUESTION 3:Does VS size matter for facial and vestibulocochlear nerve preservation with surgical resection? RECOMMENDATION:Level 3: Patients with larger VS tumor size should be counseled about the greater than average risk of loss of serviceable hearing. QUESTION 4:Should small intracanalicular tumors (<1.5 cm) be surgically resected? RECOMMENDATION:There are insufficient data to support a firm recommendation that surgery be the primary treatment for this subclass of VSs. QUESTION 5:Is hearing preservation routinely possible with VS surgical resection when serviceable hearing is present? RECOMMENDATION:Level 3: Hearing preservation surgery via the MF or the RS approach may be attempted in patients with small tumor size (<1.5 cm) and good preoperative hearing. QUESTION 6:When should surgical resection be the initial treatment in patients with neurofibromatosis type 2 (NF2)? RECOMMENDATION:There is insufficient evidence that surgical resection should be the initial treatment in patients with NF2. QUESTION 7:Does a multidisciplinary team, consisting of neurosurgery and neurotology, provides the best outcomes of complete resection and facial/vestibulocochlear nerve preservation for patients undergoing resection of VSs? RECOMMENDATION:There is insufficient evidence to support stating that a multidisciplinary team, usually consisting of a neurosurgeon and a neurotologist, provides superior outcomes compared to either subspecialist working alone. QUESTION 8:Does a subtotal surgical resection of a VS followed by stereotactic radiosurgery (SRS) to the residual tumor provide comparable hearing and FN preservation to patients who undergo a complete surgical resection? RECOMMENDATION:There is insufficient evidence to support subtotal resection (STR) followed by SRS provides comparable hearing and FN preservation to patients who undergo a complete surgical resection. QUESTION 9:Does surgical resection of VS treat preoperative balance problems more effectively than SRS? RECOMMENDATION:There is insufficient evidence to support either surgical resection or SRS for treatment of preoperative balance problems. QUESTION 10:Does surgical resection of VS treat preoperative trigeminal neuralgia more effectively than SRS? RECOMMENDATION:Level 3: Surgical resection of VSs may be used to better relieve symptoms of trigeminal neuralgia than SRS. QUESTION 11:Is surgical resection of VSs more difficult (associated with higher facial neuropathies and STR rates) after initial treatment with SRS? RECOMMENDATION:Level 3: If microsurgical resection is necessary after SRS, it is recommended that patients be counseled that there is an increased likelihood of a STR and decreased FN function.  The full guideline can be found at: https://www.cns.org/guidelines/guidelines-management-patients-vestibular-schwannoma/chapter_8.",2018-02-01 +25117763,Structure and mechanism of the unique C2 domain of Aida.,"Axin interactor, dorsalization-associated (Aida) was identified as a regulatory factor that utilizes its C-terminal region to interact with axis formation inhibitor (Axin). Aida abrogates the Axin-mediated Jun N-terminal kinase activation required for proper dorsalization during zebrafish embryonic development, and thus functions as a proventralization factor. Here, we report the structure of Aida C-terminal fragments, which adopt a conventional C2 domain topology. We also demonstrate that Aida can specifically bind to phosphoinositides in a Ca(2+) -independent manner, and is able to associate with the cell membrane via a novel positively charged surface, namely a basic loop. Mutation of the positively charged patch on the basic loop leads to destabilization of the Aida-membrane association or disruption of the Aida-Axin interaction, resulting in impaired Jun N-terminal kinase inhibition. Together, our findings provide a molecular basis for C2 domain-mediated Aida-membrane and Aida-Axin associations.

Database

The atomic coordinates and structure factors of the mouse Aida C2 domain (code: 2QZ5) and the zebrafish Aida C2 domain (code: 2QZQ) have been deposited in the Protein Data Bank (http://www.rcsb.org/)

Structured digital abstract

 AIDA physically interacts with Axin by anti tag coimmunoprecipitation (View interaction).",2014-09-06 +28934094,Estimated Changes in Life Expectancy and Adult Mortality Resulting from Declining PM2.5 Exposures in the Contiguous United States: 1980-2010.,"

Background

PM2.5 precursor emissions have declined over the course of several decades, following the implementation of local, state, and federal air quality policies. Estimating the corresponding change in population exposure and PM2.5-attributable risk of death prior to the year 2000 is made difficult by the lack of PM2.5 monitoring data.

Objectives

We used a new technique to estimate historical PM2.5 concentrations, and estimated the effects of changes in PM2.5 population exposures on mortality in adults (age ≥30y), and on life expectancy at birth, in the contiguous United States during 1980-2010.

Methods

We estimated annual mean county-level PM2.5 concentrations in 1980, 1990, 2000, and 2010 using universal kriging incorporating geographic variables. County-level death rates and national life tables for each year were obtained from the U.S. Census and Centers for Disease Control and Prevention. We used log-linear and nonlinear concentration-response coefficients from previous studies to estimate changes in the numbers of deaths and in life years and life expectancy at birth, attributable to changes in PM2.5.

Results

Between 1980 and 2010, population-weighted PM2.5 exposures fell by about half, and the estimated number of excess deaths declined by about a third. The States of California, Virginia, New Jersey, and Georgia had some of the largest estimated reductions in PM2.5-attributable deaths. Relative to a counterfactual population with exposures held constant at 1980 levels, we estimated that people born in 2050 would experience an ∼1-y increase in life expectancy at birth, and that there would be a cumulative gain of 4.4 million life years among adults ≥30y of age.

Conclusions

Our estimates suggest that declines in PM2.5 exposures between 1980 and 2010 have benefitted public health. https://doi.org/10.1289/EHP507.",2017-09-06 +27669239,Identification of Protein-Protein Interactions via a Novel Matrix-Based Sequence Representation Model with Amino Acid Contact Information. ,"Identification of protein-protein interactions (PPIs) is a difficult and important problem in biology. Since experimental methods for predicting PPIs are both expensive and time-consuming, many computational methods have been developed to predict PPIs and interaction networks, which can be used to complement experimental approaches. However, these methods have limitations to overcome. They need a large number of homology proteins or literature to be applied in their method. In this paper, we propose a novel matrix-based protein sequence representation approach to predict PPIs, using an ensemble learning method for classification. We construct the matrix of Amino Acid Contact (AAC), based on the statistical analysis of residue-pairing frequencies in a database of 6323 protein-protein complexes. We first represent the protein sequence as a Substitution Matrix Representation (SMR) matrix. Then, the feature vector is extracted by applying algorithms of Histogram of Oriented Gradient (HOG) and Singular Value Decomposition (SVD) on the SMR matrix. Finally, we feed the feature vector into a Random Forest (RF) for judging interaction pairs and non-interaction pairs. Our method is applied to several PPI datasets to evaluate its performance. On the S . c e r e v i s i a e dataset, our method achieves 94 . 83 % accuracy and 92 . 40 % sensitivity. Compared with existing methods, and the accuracy of our method is increased by 0 . 11 percentage points. On the H . p y l o r i dataset, our method achieves 89 . 06 % accuracy and 88 . 15 % sensitivity, the accuracy of our method is increased by 0 . 76 % . On the H u m a n PPI dataset, our method achieves 97 . 60 % accuracy and 96 . 37 % sensitivity, and the accuracy of our method is increased by 1 . 30 % . In addition, we test our method on a very important PPI network, and it achieves 92 . 71 % accuracy. In the Wnt-related network, the accuracy of our method is increased by 16 . 67 % . The source code and all datasets are available at https://figshare.com/s/580c11dce13e63cb9a53.",2016-09-24 +27899594,ECOD: new developments in the evolutionary classification of domains.,"Evolutionary Classification Of protein Domains (ECOD) (http://prodata.swmed.edu/ecod) comprehensively classifies protein with known spatial structures maintained by the Protein Data Bank (PDB) into evolutionary groups of protein domains. ECOD relies on a combination of automatic and manual weekly updates to achieve its high accuracy and coverage with a short update cycle. ECOD classifies the approximately 120 000 depositions of the PDB into more than 500 000 domains in ∼3400 homologous groups. We show the performance of the weekly update pipeline since the release of ECOD, describe improvements to the ECOD website and available search options, and discuss novel structures and homologous groups that have been classified in the recent updates. Finally, we discuss the future directions of ECOD and further improvements planned for the hierarchy and update process.",2016-11-29 +27670824,[Immunology and immunosuppression in kidney transplantation. ABO and HLA incompatible kidney transplantation].,"

Objectives

To perform a state of the art about immunological features in renal transplantation, immunosuppressive drugs and their mechanisms of action and immunologically high risk transplantations such as ABO and HLA-incompatible transplantation.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords (MESH): ""allogenic response; allograft; immunosuppression; ABO incompatible transplantation; donor specific antibodies; HLA incompatible; desensitization; kidney transplantation"". Publications obtained were selected based on methodology, language, date of publication (last 10 years) and relevance. Prospective and retrospective studies, in English or French, review articles; meta-analysis and guidelines were selected and analyzed. This search found 4717 articles. After reading titles and abstracts, 141 were included in the text, based on their relevance.

Results

The considerable step in comprehension and knowledge allogeneic response this last few years allowed a better used of immunosuppression and the discover of news immunosuppressive drugs. In the first part of this article, the allogeneic response will be described. The different classes of immunosuppressive drugs will be presented and the actual management of immunosuppression will be discussed. Eventually, the modalities and results of immunologically high-risk transplantations such as ABO and HLA incompatible transplantations will be reported.

Conclusions

The knowledge and the control of allogeneic response to allogeneic graft allowed the development of renal transplantation.",2016-09-23 +28011776,SarConfoCal: simultaneous sarcomere length and cytoplasmic calcium measurements for laser scanning confocal microscopy images.,"

Summary

Simultaneous recordings of myocytes contractility and their cytoplasmic calcium concentration allow powerful studies, particularly on heart failure and other cardiac dysfunctions. Such studies require dedicated and expensive experimental devices that are difficult to use. Thus we propose SarConfoCal, the first and only software to simultaneously analyse both cytoplasmic calcium variations (from fluorescence signal) and myocytes contractility (from sarcomere length measurement) on laser scanning confocal microscopy images. SarConfoCal is easy to set up and use, especially by people without programming skills.

Availability and implementation

The software is freely distributed under the GNU General Public License. Download and setup instructions are available at http://pccv.univ-tours.fr/ImageJ/SarConfoCal . It is provided as a toolset for ImageJ (the open-source program for image analysis provided by the National Institutes of Health). SarConfoCal has been tested under Windows, Mac and Linux operating systems.

Contact

come.pasqualin@univ-tours.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +28610126,"Taxonomic description and 3D modelling of a new species of myzostomid (Annelida, Myzostomida) associated with black corals from Madagascar.","Eenymeenymyzostoma nigrocorallium n. sp. is the first species of myzostomid worm associated with black corals to be described. Endoparasitic specimens of E. nigrocorallium were found associated with three species of antipatharians on the Great Reef of Toliara. Individuals inhabit the gastrovascular ducts of their hosts and evidence of infestation is, most of the time, not visible externally. Phylogenetic analyses based on 18S rDNA, 16S rDNA and COI data indicate a close relation to Eenymeenymyzostoma cirripedium, the only other species of the genus. The morphology of E. nigrocorallium is very unusual compared to that of the more conventional E. cirripedium. The new species has five pairs of extremely reduced parapodia located on the body margin and no introvert, cirri or lateral organs. Individuals are hermaphroditic, with the male and female gonads both being located dorsally in the trunk. It also has a highly developed parenchymo-muscular layer on the ventral side, and the digestive system lies in the middle part of the trunk. A three-dimensional digital model of this worm's body plan has been constructed whereby the external morphology and in toto views of the observed organ systems (nervous, digestive and reproductive) can be viewed on-screen: http://doi.org/10.13140/RG.2.2.17911.21923.",2017-03-19 +28610618,Relatively frequent switching of transcription start sites during cerebellar development.,"

Background

Alternative transcription start site (TSS) usage plays important roles in transcriptional control of mammalian gene expression. The growing interest in alternative TSSs and their role in genome diversification spawned many single-gene studies on differential usages of tissue-specific or temporal-specific alternative TSSs. However, exploration of the switching usage of alternative TSS usage on a genomic level, especially in the central nervous system, is largely lacking.

Results

In this study, We have prepared a unique set of time-course data for the developing cerebellum, as part of the FANTOM5 consortium ( http://fantom.gsc.riken.jp/5/ ) that uses their innovative capturing of 5' ends of all transcripts followed by Helicos next generation sequencing. We analyzed the usage of all transcription start sites (TSSs) at each time point during cerebellar development that provided information on multiple RNA isoforms that emerged from the same gene. We developed a mathematical method that systematically compares the expression of different TSSs of a gene to identify temporal crossover and non-crossover switching events. We identified 48,489 novel TSS switching events in 5433 genes during cerebellar development. This includes 9767 crossover TSS switching events in 1511 genes, where the dominant TSS shifts over time.

Conclusions

We observed a relatively high prevalence of TSS switching in cerebellar development where the resulting temporally-specific gene transcripts and protein products can play important regulatory and functional roles.",2017-06-13 +,Assembly and Characterization of the European Hazelnut ‘Jefferson’ Transcriptome,"European hazelnut (Corylus avellana L.) is of worldwide agricultural significance, with breeding efforts focused on combining high nut yield and nut quality with resistance to diseases such as eastern filbert blight (EFB), a cause of severe crop loss in much of the United States. Oregon State University recently released a resistant cultivar, ‘Jefferson’ (OSU 703.007), that was chosen for transcriptome sequencing to establish further genomic resources for C. avellana L. We used Illumina ribonucleic acid sequencing (RNA-seq) to characterize complementary DNA (cDNA) libraries from four hazelnut tissues, including leaves, catkins, bark, and whole seedlings. The 6.8 Gb of hazelnut transcriptome data was assembled de novo into 28,255 contigs with an average length of 532 bp and an N50 (the minimum contig length necessary such that all contigs of equal or greater length will equal half of the bases of the assembly) of 961 bp. Sequence comparisons using BLASTX and gene ontology (GO) classifications were used to generate automated descriptive function annotations. High similarity of the predicted proteins to sequences in related plants demonstrates the validity of the transcript contigs, with 80.8% having similarity to grape (Vitis vinifera L.), poplar (Populus trichocarpa Torr. & A. Gray), and castor bean (Ricinus communis L.) sequences in the public domain. A survey of GO terms enriched among tissue-specific transcripts further validates the assembly. A basic local alignment search tool (BLAST) portal and web resources (http://hazelnut.cgrb.oregonstate.edu [accessed 8 Jan. 2010]) are available and will be of importance to breeders for marker-assisted breeding efforts.",2012-11-01 +27605104,TRI_tool: a web-tool for prediction of protein-protein interactions in human transcriptional regulation.,"The TRI_tool, a sequence-based web tool for prediction of protein interactions in the human transcriptional regulation, is intended for biomedical investigators who work on understanding the regulation of gene expression. It has an improved predictive performance due to the training on updated, human specific, experimentally validated datasets. The TRI_tool is designed to test up to 100 potential interactions with no time delay and to report both probabilities and binarized predictions.

Availability and implementation

http://www.vin.bg.ac.rs/180/tools/tfpred.php CONTACT: vladaper@vinca.rs; nevenav@vinca.rsSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-07 +25541944,Allele Workbench: transcriptome pipeline and interactive graphics for allele-specific expression.,"Sequencing the transcriptome can answer various questions such as determining the transcripts expressed in a given species for a specific tissue or condition, evaluating differential expression, discovering variants, and evaluating allele-specific expression. Differential expression evaluates the expression differences between different strains, tissues, and conditions. Allele-specific expression evaluates expression differences between parental alleles. Both differential expression and allele-specific expression have been studied for heterosis (hybrid vigor), where the hybrid has improved performance over the parents for one or more traits. The Allele Workbench software was developed for a heterosis study that evaluated allele-specific expression for a mouse F1 hybrid using libraries from multiple tissues with biological replicates. This software has been made into a distributable package, which includes a pipeline, a Java interface to build the database, and a Java interface for query and display of the results. The required input is a reference genome, annotation file, and one or more RNA-Seq libraries with optional replicates. It evaluates allelic imbalance at the SNP and transcript level and flags transcripts with significant opposite directional allele-specific expression. The Java interface allows the user to view data from libraries, replicates, genes, transcripts, exons, and variants, including queries on allele imbalance for selected libraries. To determine the impact of allele-specific SNPs on protein folding, variants are annotated with their effect (e.g., missense), and the parental protein sequences may be exported for protein folding analysis. The Allele Workbench processing results in transcript files and read counts that can be used as input to the previously published Transcriptome Computational Workbench, which has a new algorithm for determining a trimmed set of gene ontology terms. The software with demo files is available from https://code.google.com/p/allele-workbench. Additionally, all software is ready for immediate use from an Atmosphere Virtual Machine Image available from the iPlant Collaborative (www.iplantcollaborative.org).",2014-12-26 +28712912,MultiXplore: Visual exploration platform for multimodal neuroimaging data.,"

Background

Construction of brain functional and structural networks by neuroimaging methods facilitates inter-modal studies. These type of studies often demand exploration tools to carry out functional-structural discoveries and answer questions regarding the anatomical basis of brain networks.

New method

This paper describes the design and development of a software module for interactive visualization and exploration of dual-modal brain networks. Our objective was to equip the user with a research tool to investigate brain connectivity matrices while visualizing relevant anatomical landmarks within a 3D volumetric view. In order to create this view, MultiXplore was designed to load data from both structural and diffusion MRI and connectivity matrices.

Results

Once user starts to select desired cells through an interactive matrix unit, associated axonal fiber pathways and grey matter regions are generated and displayed. Integration and visualization of functional and structural networks in this 3D interactive framework was successfully implemented and tested.

Comparison with existing method(s)

MultiXplore contributes to the transition of connectivity visualization techniques from node-link format to an anatomically more realistic graphical form and assists scientists in relating connectivity matrices to their anatomical correlates. This module also benefits from additional novel functionalities to annotate and differentiate fibers in a large bundle. Unlike traditional graph displays, interactive functionality helps in the inspection and visualization of relevant structures without cluttering the scene with excessive items.

Conclusion

This module was designed and developed as a plugin to 3D Slicer imaging platform and is accessible for neuroimaging researchers through NITRC (http://www.nitrc.org/projects/multixplore/).",2017-07-13 +27665408,"[Sexuality, fertility and pregnancy after kidney transplantation].","

Aims

To describe sexuality and fertility alterations secondary to chronic kidney disease and their outcomes after renal transplantation.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords: erectile dysfunction; impotence; sexuality; pregnancy; fertility; renal transplantation. Publications obtained were selected based on methodology, language, date of publication (last 10 years) and relevance. Prospective and retrospective studies, in English or French, review articles; meta-analysis and guidelines were selected and analyzed. This search found 706 articles. After reading titles and abstracts, 76 were included in the text, based on their relevance.

Results

The observed prevalence of erectile dysfunction is high in men with chronic kidney disease. The causes of erectile dysfunction are numbers and its origin is often multifactorial. Most of the time, kidney transplantation improves sexuality and the management of erectile dysfunction in transplanted men is similar to the general population. Improvement in sexuality in men and women after kidney transplantation may conduct to pregnancy. The outcomes of pregnancy after transplantation are quite good in absence of risk factors such as time to pregnancy less than 1 year after transplantation, uncontrolled high blood pressure, and decreased renal function of the graft. Adaptation of immunosuppression may be required to avoid any teratogenicity for the fetus.

Conclusion

Kidney transplantation improves sexuality and fertility in men and women with chronic kidney disease.",2016-09-21 +27665407,[Urologic malignancies in renal transplant candidates and recipients].,"

Objective

To review epidemiology and management of urologic neoplasms in renal transplant candidates and recipients.

Material and methods

Relevant publications were identified through Medline (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) database using the following keywords, alone or in association, ""neoplasms""; ""prostate cancer""; ""renal carcinoma""; ""renal transplantation""; ""transitional carcinoma""; ""waiting list"". Articles were selected according to methods, language of publication and relevance. A total of 7730 articles were identified including 781 for solid tumors, 1565 for renal cell carcinoma (RCC), 2674 for prostate cancer (Pca), 385 for transitional carcinoma (TC) and 56 for testicular cancer; after careful selection, 221 publications were eligible for our review.

Results

Renal transplant candidates and recipients are at higher risk of urologic neoplasms than general population, but prostate cancer has similar features. Thus, all therapeutic options are valid. Conversely to radiation therapy, radical prostatectomy provides precise staging and immediate affirmation of therapeutic success. Lymph nodes dissection needs to be discussed; systematic screening using PSA level and digital rectal examination should be offered in this specific population. RCC arising in native kidneys are usually low grade and stage and require total nephrectomy. In transplant candidates, there is no need to delay transplantation after treatment of low risk RCC according to published predictive nomograms. RCC of the allograft are rare, with a prevalence of 0.2 to 05% with a dialysis free survival ranging from 40 to 75% at 21.5 to 43 months. Treatment options are nephron sparing surgery, percutaneous ablation and immediate or deferred transplantectomy. Conversely to RCC or PCa, TC present with more unfavorable features as general population. Their management faces specific difficulties such as lower efficacy of BCG instillation or the technical challenge of urinary diversion.

Conclusion

Application of appropriate indication for transplantectomy relies on benefit-risk balance between the interruption of immunosuppressive agents versus survival and quality of life impairment after returning to dialysis. No robust recommendation exists regarding switch of immunosuppressive drugs. Cancer predictive factors and access to a subsequent transplantation are key decisive elements.",2016-09-21 +27665406,[Renal transplantation in 2046: Future and perspectives].,"

Objectives

To report major findings that may build the future of kidney transplantation.

Material and methods

Relevant publications were identified through Medline (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) database from 1960 to 2016 using the following keywords, in association, ""bio-engineering; heterotransplantation; immunomodulation; kidney; regenerative medicine; xenotransplantation"". Articles were selected according to methods, language of publication and relevance. A total of 5621 articles were identified including 2264 for xenotransplantation, 1058 for regenerative medicine and 2299 for immunomodulation; after careful selection, 86 publications were eligible for our review.

Results

Despite genetic constructs, xenotransplantation faces the inevitable obstacle of species barrier. Uncertainty regarding xenograft acceptance by recipients as well as ethical considerations due to the debatable utilization of animal lives, are major limits for its future. Regenerative medicine and tridimensional bioprinting allow successful implantation of organs. Bioengineering, using decellularized tissue matrices or synthetic scaffold, seeded with pluripotent cells and assembled using bioreactors, provide exciting results but remain far for reconstituting renal complexity and vascular patency. Immune tolerance may be achieved through a tough initial T-cell depletion or a combined haplo-identical bone marrow transplant leading to lymphohematopoietic chimerism.

Conclusion

Current researches aim to increase the pool of organs available for transplantation (xenotransplants and bio-artificial kidneys) and to increase allograft survival through the induction of immune tolerance. Reported results suggest the onset of a thrilling new era for renal transplantation providing end-stage renal disease-patients with an improved survival and quality of life.",2016-09-21 +27665410,[Polycystic kidney disease and kidney transplantation].,"

Objectives

To perform a state of the art about autosomal dominant polykystic kidney disease (ADPKD), management of its urological complications and end stage renal disease treatment modalities.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords (MESH): ""autosomal dominant polykystic kidney disease"", ""complications"", ""native nephrectomy"", ""kidney transplantation"". Publications obtained were selected based on methodology, language, date of publication (last 10 years) and relevance. Prospective and retrospective studies, in English or French, review articles; meta-analysis and guidelines were selected and analyzed. This search found 3779 articles. After reading titles and abstracts, 52 were included in the text, based on their relevance.

Results

ADPKD is the most inherited renal disease, leading to end stage renal disease requiring dialysis or renal transplantation in about 50% of the patients. Many urological complications (gross hematuria, cysts infection, renal pain, lithiasis) of ADPKD required urological management. The pretransplant evaluation will ask the challenging question of native nephrectomy only in case of recurrent kidney complications or large kidney not allowing graft implantation. The optimum timing for native nephrectomy will depend on many factors (dialysis or preemptive transplantation, complication severity, anuria, easy access to transplantation, potential living donor).

Conclusion

Pretransplant management of ADPKD is challenging. A conservative strategy should be promoted to avoid anuria (and its metabolic complications) and to preserve a functioning low urinary tract and quality of life. When native nephrectomy should be performed, surgery remains the gold standard but renal arterial embolization may be a safe option due to its low morbidity.",2016-09-21 +27654850,Species From Feces: Order-Wide Identification of Chiroptera From Guano and Other Non-Invasive Genetic Samples.,"Bat guano is a relatively untapped reservoir of information, having great utility as a DNA source because it is often available at roosts even when bats are not and is an easy type of sample to collect from a difficult-to-study mammalian order. Recent advances from microbial community studies in primer design, sequencing, and analysis enable fast, accurate, and cost-effective species identification. Here, we borrow from this discipline to develop an order-wide DNA mini-barcode assay (Species from Feces) based on a segment of the mitochondrial gene cytochrome c oxidase I (COI). The assay works effectively with fecal DNA and is conveniently transferable to low-cost, high-throughput Illumina MiSeq technology that also allows simultaneous pairing with other markers. Our PCR primers target a region of COI that is highly discriminatory among Chiroptera (92% species-level identification of barcoded species), and are sufficiently degenerate to allow hybridization across diverse bat taxa. We successfully validated our system with 54 bat species across both suborders. Despite abundant arthropod prey DNA in guano, our primers were highly specific to bats; no arthropod DNA was detected in thousands of feces run on Sanger and Illumina platforms. The assay is extendable to fecal pellets of unknown age as well as individual and pooled guano, to allow for individual (using singular fecal pellets) and community (using combined pellets collected from across long-term roost sites) analyses. We developed a searchable database (http://nau.edu/CEFNS/Forestry/Research/Bats/Search-Tool/) that allows users to determine the discriminatory capability of our markers for bat species of interest. Our assay has applications worldwide for examining disease impacts on vulnerable species, determining species assemblages within roosts, and assessing the presence of bat species that are vulnerable or facing extinction. The development and analytical pathways are rapid, reliable, and inexpensive, and can be applied to ecology and conservation studies of other taxa.",2016-09-21 +25058807,Online quantitative proteomics p-value calculator for permutation-based statistical testing of peptide ratios.,"The utility of high-throughput quantitative proteomics to identify differentially abundant proteins en-masse relies on suitable and accessible statistical methodology, which remains mostly an unmet need. We present a free web-based tool, called Quantitative Proteomics p-value Calculator (QPPC), designed for accessibility and usability by proteomics scientists and biologists. Being an online tool, there is no requirement for software installation. Furthermore, QPPC accepts generic peptide ratio data generated by any mass spectrometer and database search engine. Importantly, QPPC utilizes the permutation test that we recently found to be superior to other methods for analysis of peptide ratios because it does not assume normal distributions.1 QPPC assists the user in selecting significantly altered proteins based on numerical fold change, or standard deviation from the mean or median, together with the permutation p-value. Output is in the form of comma separated values files, along with graphical visualization using volcano plots and histograms. We evaluate the optimal parameters for use of QPPC, including the permutation level and the effect of outlier and contaminant peptides on p-value variability. The optimal parameters defined are deployed as default for the web-tool at http://qppc.di.uq.edu.au/ .",2014-07-30 +23023984,InterMine: a flexible data warehouse system for the integration and analysis of heterogeneous biological data.,"

Summary

InterMine is an open-source data warehouse system that facilitates the building of databases with complex data integration requirements and a need for a fast customizable query facility. Using InterMine, large biological databases can be created from a range of heterogeneous data sources, and the extensible data model allows for easy integration of new data types. The analysis tools include a flexible query builder, genomic region search and a library of 'widgets' performing various statistical analyses. The results can be exported in many commonly used formats. InterMine is a fully extensible framework where developers can add new tools and functionality. Additionally, there is a comprehensive set of web services, for which client libraries are provided in five commonly used programming languages.

Availability

Freely available from http://www.intermine.org under the LGPL license.

Contact

g.micklem@gen.cam.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-27 +28161745,Serious fungal infections in Canada.,"There are currently no nationwide epidemiological data on fungal infections in Canada. We estimated the burden of serious fungal diseases using literature review and modeling, as per a methodology previously described by the LIFE program ( http://www.LIFE-worldwide.org ). Among the population of Canada (35.5 million in 2014), it was estimated that approximately 1.8% are affected by a serious fungal infection. Recurrent vulvovaginal candidiasis, severe asthma with fungal sensitization, and allergic bronchopulmonary aspergillosis are the most frequent infections, with population prevalences of 498,688 (1403/100,000), 73,344 (206/100,000), and 61,854 (174/100,000) cases, respectively. Over 3000 invasive fungal infections are estimated to occur annually, with incidences of 2068 cases (5.8/100,000) of invasive candidiasis, 566 cases (1.6/100,000) of invasive aspergillosis, 252 cases (0.71/100,000) of Pneumocystis pneumonia, 99 cases (0.28/100,000) of endemic mycoses, and 63 cases (0.18/100,000) of cryptococcosis. These estimates warrant validation through more formal epidemiological studies in Canada.",2017-02-04 +28101574,NEK2 serves as a prognostic biomarker for hepatocellular carcinoma.,"Never in mitosis gene A (NIMA)-related kinase 2 (NEK2) is a microtubule-associated protein that regulates spindle assembly in human cells and is overexpressed in various malignancies. However, the role of NEK2 in hepatocellular carcinoma (HCC) remains undetermined. We performed RNA-seq of the HCC cell line SMMC-7721 and the normal liver cell line HL-7702 using the Ion Proton System. NEK2 expression was detected using quantitative reverse transcription polymerase chain reaction in two cell lines and 5 matched HCC and adjacent non-tumorous liver tissues. The correlation between survival and NEK2 expression was analyzed in 359 patients with HCC using RNASeqV2 data available from The Cancer Genome Atlas (TCGA) website (https://tcga-data.nci.nih.gov/tcga/). The expression of NEK2, phospho-AKT and MMP-2 was evaluated by immunohistochemistry in 63 cases of HCC and matched adjacent non-tumorous liver tissues. Relationships between protein expression and clinicopathological parameters were assessed, and the correlations between NEK2 with phospho-AKT and MMP-2 expressions were evaluated. A total of 610 differentially expressed genes (DEGs) were revealed in the transcriptome comparison, 297 of which were upregulated and 313 were downregulated in HCC. NEK2, as the most obviously different DEG in cells and tissues from the RNA-seq data, was listed as an HCC candidate biomarker for further verification. NEK2 was overexpressed in HCC cells and tissues (P=0.002, P=0.013) and HCC patients with a high expression of NEK2 had a poor prognosis (P=0.0145). Clinical analysis indicated that the overexpression of NEK2 in HCC was significantly correlated with diolame complete (P<0.001), tumor nodule number (P=0.012) and recurrence (P=0.004). NEK2 expression was positively correlated with the expression of phospho-AKT (r=0.883, P<0.01) and MMP-2 (r=0.781, P<0.01). Overexpression of NEK2 was associated with clinicopathological characteristics and poor patient outcomes, suggesting that NEK2 serves as a prognostic biomarker for HCC. Alteration of NEK2 protein levels may contribute to invasion and metastasis of HCC, which may occur through activation of AKT signaling and promotion of MMP-2 expression.",2017-01-03 +27651524,Does Whole-Body Vibration Improve the Functional Exercise Capacity of Subjects With COPD? A Meta-Analysis.,"Whole-body vibration (WBV) is considered a type of physical activity based on the assumption that it results in an increase in muscle strength and performance and, therefore, may be a promising way to exercise patients with COPD. A comprehensive database search (PubMed/MEDLINE, LILACS, CINAHL, Web of Science, Scopus, and COCHRANE Library) for randomized trials, including original articles, that compared WBV groups versus control groups was conducted and studies were selected for comparison. The effect of WBV treatment was compared for minimum clinically important differences. The statistical heterogeneity among the studies was assessed using the I2 statistic; the results are expressed as percentages. Inconsistencies of up to 25% were considered low, those between 50 and 75% were considerate moderate, and those > 75% were considered high. Risk of bias was classified based on the Cochrane Collaboration tool, the meta-analysis was conducted using RevMan 5.3 software, and the level of evidence was assessed using the GRADE system. The primary outcome was functional exercise capacity. Secondary outcomes were quality of life, performance in activities of daily living, muscle strength of the lower limbs, and possible adverse effects assessed clinically or by subject reports. We included 4 articles involving 185 subjects for analysis. All subjects in the groups undergoing WBV showed improvement in distance walked in the 6-min walk test compared with the control group (57.85 m, 95% CI 16.36-99.33 m). Regarding the secondary end points, just one article reported improved quality of life and activities of daily living. The only article that assessed muscle strength found no difference between the groups. The quality of evidence for functional exercise capacity outcome was considered moderate. WBV seems to benefit subjects with COPD by improving their functional exercise capacity, without producing adverse effects. The quality of evidence is moderate, but the degree of recommendation is strong. (International Prospective Register of Systematic Reviews, http://www.crd.york.ac.uk/prospero, 2015:CRD42015027659.).",2016-09-20 +27179030,DeAnnIso: a tool for online detection and annotation of isomiRs from small RNA sequencing data.,"Small RNA (sRNA) Sequencing technology has revealed that microRNAs (miRNAs) are capable of exhibiting frequent variations from their canonical sequences, generating multiple variants: the isoforms of miRNAs (isomiRs). However, integrated tool to precisely detect and systematically annotate isomiRs from sRNA sequencing data is still in great demand. Here, we present an online tool, DeAnnIso (Detection and Annotation of IsomiRs from sRNA sequencing data). DeAnnIso can detect all the isomiRs in an uploaded sample, and can extract the differentially expressing isomiRs from paired or multiple samples. Once the isomiRs detection is accomplished, detailed annotation information, including isomiRs expression, isomiRs classification, SNPs in miRNAs and tissue specific isomiR expression are provided to users. Furthermore, DeAnnIso provides a comprehensive module of target analysis and enrichment analysis for the selected isomiRs. Taken together, DeAnnIso is convenient for users to screen for isomiRs of their interest and useful for further functional studies. The server is implemented in PHP + Perl + R and available to all users for free at: http://mcg.ustc.edu.cn/bsc/deanniso/ and http://mcg2.ustc.edu.cn/bsc/deanniso/.",2016-05-13 +26440803,The REporting of studies Conducted using Observational Routinely-collected health Data (RECORD) statement.,"Routinely collected health data, obtained for administrative and clinical purposes without specific a priori research goals, are increasingly used for research. The rapid evolution and availability of these data have revealed issues not addressed by existing reporting guidelines, such as Strengthening the Reporting of Observational Studies in Epidemiology (STROBE). The REporting of studies Conducted using Observational Routinely collected health Data (RECORD) statement was created to fill these gaps. RECORD was created as an extension to the STROBE statement to address reporting items specific to observational studies using routinely collected health data. RECORD consists of a checklist of 13 items related to the title, abstract, introduction, methods, results, and discussion section of articles, and other information required for inclusion in such research reports. This document contains the checklist and explanatory and elaboration information to enhance the use of the checklist. Examples of good reporting for each RECORD checklist item are also included herein. This document, as well as the accompanying website and message board (http://www.record-statement.org), will enhance the implementation and understanding of RECORD. Through implementation of RECORD, authors, journals editors, and peer reviewers can encourage transparency of research reporting.",2015-10-06 +27111507,Automated structure modeling of large protein assemblies using crosslinks as distance restraints.,"Crosslinking mass spectrometry is increasingly used for structural characterization of multisubunit protein complexes. Chemical crosslinking captures conformational heterogeneity, which typically results in conflicting crosslinks that cannot be satisfied in a single model, making detailed modeling a challenging task. Here we introduce an automated modeling method dedicated to large protein assemblies ('XL-MOD' software is available at http://aria.pasteur.fr/supplementary-data/x-links) that (i) uses a form of spatial restraints that realistically reflects the distribution of experimentally observed crosslinked distances; (ii) automatically deals with ambiguous and/or conflicting crosslinks and identifies alternative conformations within a Bayesian framework; and (iii) allows subunit structures to be flexible during conformational sampling. We demonstrate our method by testing it on known structures and available crosslinking data. We also crosslinked and modeled the 17-subunit yeast RNA polymerase III at atomic resolution; the resulting model agrees remarkably well with recently published cryoelectron microscopy structures and provides additional insights into the polymerase structure.",2016-04-25 +25122033,An education in contrast: state-by-state assessment of school immunization records requirements.,"

Objectives

We reviewed the complexities of school-related immunization policies, their relation to immunization information systems (IIS) and immunization registries, and the historical context to better understand this convoluted policy system.

Methods

We used legal databases (Lexis-Nexis and Westlaw) to identify school immunization records policies for 50 states, 5 cities, and the District of Columbia (Centers for Disease Control and Prevention ""grantees""). The original search took place from May to September 2010 (cross-referenced in July 2013 with the list on http://www.immunize.org/laws ). We describe the requirements, agreement with IIS policies, and penalties for policy violations.

Results

We found a complex web of public health, medical, and education-directed policies, which complicates immunization data sharing. Most (79%) require records of immunizations for children to attend school or for a child-care institution licensure, but only a few (11%) require coordination between IIS and schools or child-care facilities.

Conclusions

To realize the full benefit of IIS investment, including improved immunization and school health program efficiencies, IIS and school immunization records policies must be better coordinated. States with well-integrated policies may serve as models for effective harmonization.",2014-08-14 +23584085,A tutorial for software development in quantitative proteomics using PSI standard formats.,"The Human Proteome Organisation - Proteomics Standards Initiative (HUPO-PSI) has been working for ten years on the development of standardised formats that facilitate data sharing and public database deposition. In this article, we review three HUPO-PSI data standards - mzML, mzIdentML and mzQuantML, which can be used to design a complete quantitative analysis pipeline in mass spectrometry (MS)-based proteomics. In this tutorial, we briefly describe the content of each data model, sufficient for bioinformaticians to devise proteomics software. We also provide guidance on the use of recently released application programming interfaces (APIs) developed in Java for each of these standards, which makes it straightforward to read and write files of any size. We have produced a set of example Java classes and a basic graphical user interface to demonstrate how to use the most important parts of the PSI standards, available from http://code.google.com/p/psi-standard-formats-tutorial. This article is part of a Special Issue entitled: Computational Proteomics in the Post-Identification Era. Guest Editors: Martin Eisenacher and Christian Stephan.",2013-04-12 +26800544,Predicting Protein Function via Semantic Integration of Multiple Networks.,"Determining the biological functions of proteins is one of the key challenges in the post-genomic era. The rapidly accumulated large volumes of proteomic and genomic data drives to develop computational models for automatically predicting protein function in large scale. Recent approaches focus on integrating multiple heterogeneous data sources and they often get better results than methods that use single data source alone. In this paper, we investigate how to integrate multiple biological data sources with the biological knowledge, i.e., Gene Ontology (GO), for protein function prediction. We propose a method, called SimNet, to Semantically integrate multiple functional association Networks derived from heterogenous data sources. SimNet firstly utilizes GO annotations of proteins to capture the semantic similarity between proteins and introduces a semantic kernel based on the similarity. Next, SimNet constructs a composite network, obtained as a weighted summation of individual networks, and aligns the network with the kernel to get the weights assigned to individual networks. Then, it applies a network-based classifier on the composite network to predict protein function. Experiment results on heterogenous proteomic data sources of Yeast, Human, Mouse, and Fly show that, SimNet not only achieves better (or comparable) results than other related competitive approaches, but also takes much less time. The Matlab codes of SimNet are available at https://sites.google.com/site/guoxian85/simnet.",2015-07-22 +28763388,Health Impact Assessments and Extreme Weather-Challenges for Environmental Health.,"

Background

The Florida Department of Health, Environmental Public Health Tracking Program, in collaboration with the Escambia County Health Department and the University of West Florida, used the Health Impact Assessment Framework to examine adverse health outcomes that may be related to an extreme flood event in Pensacola, Florida (Escambia County) during April 29 to May 3, 2014. In this 2014 flood event, portions of Pensacola received more than 15.5 in of rain in a single day. Infrastructure impacts from this extreme event included destroyed bridges and roads and the failure of many sewage lift stations.

Objective

To determine whether there were associated increases in injury, illness, and death, data on reportable diseases, hospitalizations, emergency department (ED) visits, and deaths that occurred during the impact period in 2014 were compared with a control period in 2008.

Design

We used an ecological design to compare impact and control periods and examined the proportion of hospitalizations, ED visits, and deaths potentially attributable to the extreme flood event.

Results

The results of this comparison were mixed, with some Escambia County zip codes showing increased hospitalizations and ED visits, and some zip codes showing a decrease. However, countywide, there were increases in the proportion of both injury- and respiratory-related hospitalizations and ED visits during the impact period.

Conclusions

It is challenging to characterize human health impacts from natural disasters such as extreme floods. Still, it is believed that specific policy changes could result in fewer health impacts during future flood events. For example, this study recommended raising the electric panels on lift stations above the flood elevation to keep them operational during extreme rainfall events. For more maps and tables, consult the complete project report available online at http://www.floridatracking.com/HealthTrackFL/document/Escambia_HIA_Report.pdf.",2017-09-01 +28867092,"Development, features and application of DIET ASSESS & PLAN (DAP) software in supporting public health nutrition research in Central Eastern European Countries (CEEC).","In order to meet growing public health nutrition challenges in Central Eastern European Countries (CEEC) and Balkan countries, development of a Research Infrastructure (RI) and availability of an effective nutrition surveillance system are a prerequisite. The building block of this RI is an innovative tool called DIET ASSESS & PLAN (DAP), which is a platform for standardized and harmonized food consumption collection, comprehensive dietary intake assessment and nutrition planning. Its unique structure enables application of national food composition databases (FCDBs) from the European food composition exchange platform (28 national FCDBs) developed by EuroFIR (http://www.eurofir.org/) and in addition allows communication with other tools. DAP is used for daily menu and/or long-term diet planning in diverse public sector settings, foods design/reformulation, food labelling, nutrient intake assessment and calculation of the dietary diversity indicator, Minimum Dietary Diversity-Women (MDD-W). As a validated tool in different national and international projects, DAP represents an important RI in public health nutrition epidemiology in the CEEC region.",2016-09-19 +21736759,The differential disease regulome.,"

Background

Transcription factors in disease-relevant pathways represent potential drug targets, by impacting a distinct set of pathways that may be modulated through gene regulation. The influence of transcription factors is typically studied on a per disease basis, and no current resources provide a global overview of the relations between transcription factors and disease. Furthermore, existing pipelines for related large-scale analysis are tailored for particular sources of input data, and there is a need for generic methodology for integrating complementary sources of genomic information.

Results

We here present a large-scale analysis of multiple diseases versus multiple transcription factors, with a global map of over-and under-representation of 446 transcription factors in 1010 diseases. This map, referred to as the differential disease regulome, provides a first global statistical overview of the complex interrelationships between diseases, genes and controlling elements. The map is visualized using the Google map engine, due to its very large size, and provides a range of detailed information in a dynamic presentation format.The analysis is achieved through a novel methodology that performs a pairwise, genome-wide comparison on the cartesian product of two distinct sets of annotation tracks, e.g. all combinations of one disease and one TF.The methodology was also used to extend with maps using alternative data sets related to transcription and disease, as well as data sets related to Gene Ontology classification and histone modifications. We provide a web-based interface that allows users to generate other custom maps, which could be based on precisely specified subsets of transcription factors and diseases, or, in general, on any categorical genome annotation tracks as they are improved or become available.

Conclusion

We have created a first resource that provides a global overview of the complex relations between transcription factors and disease. As the accuracy of the disease regulome depends mainly on the quality of the input data, forthcoming ChIP-seq based binding data for many TFs will provide improved maps. We further believe our approach to genome analysis could allow an advance from the current typical situation of one-time integrative efforts to reproducible and upgradable integrative analysis. The differential disease regulome and its associated methodology is available at http://hyperbrowser.uio.no.",2011-07-07 +24812336,fastGapFill: efficient gap filling in metabolic networks.,"

Motivation

Genome-scale metabolic reconstructions summarize current knowledge about a target organism in a structured manner and as such highlight missing information. Such gaps can be filled algorithmically. Scalability limitations of available algorithms for gap filling hinder their application to compartmentalized reconstructions.

Results

We present fastGapFill, a computationally efficient tractable extension to the COBRA toolbox that permits the identification of candidate missing knowledge from a universal biochemical reaction database (e.g. Kyoto Encyclopedia of Genes and Genomes) for a given (compartmentalized) metabolic reconstruction. The stoichiometric consistency of the universal reaction database and of the metabolic reconstruction can be tested for permitting the computation of biologically more relevant solutions. We demonstrate the efficiency and scalability of fastGapFill on a range of metabolic reconstructions.

Availability and implementation

fastGapFill is freely available from http://thielelab.eu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-07 +27307639,SHARAKU: an algorithm for aligning and clustering read mapping profiles of deep sequencing in non-coding RNA processing.,"

Motivation

Deep sequencing of the transcripts of regulatory non-coding RNA generates footprints of post-transcriptional processes. After obtaining sequence reads, the short reads are mapped to a reference genome, and specific mapping patterns can be detected called read mapping profiles, which are distinct from random non-functional degradation patterns. These patterns reflect the maturation processes that lead to the production of shorter RNA sequences. Recent next-generation sequencing studies have revealed not only the typical maturation process of miRNAs but also the various processing mechanisms of small RNAs derived from tRNAs and snoRNAs.

Results

We developed an algorithm termed SHARAKU to align two read mapping profiles of next-generation sequencing outputs for non-coding RNAs. In contrast with previous work, SHARAKU incorporates the primary and secondary sequence structures into an alignment of read mapping profiles to allow for the detection of common processing patterns. Using a benchmark simulated dataset, SHARAKU exhibited superior performance to previous methods for correctly clustering the read mapping profiles with respect to 5'-end processing and 3'-end processing from degradation patterns and in detecting similar processing patterns in deriving the shorter RNAs. Further, using experimental data of small RNA sequencing for the common marmoset brain, SHARAKU succeeded in identifying the significant clusters of read mapping profiles for similar processing patterns of small derived RNA families expressed in the brain.

Availability and implementation

The source code of our program SHARAKU is available at http://www.dna.bio.keio.ac.jp/sharaku/, and the simulated dataset used in this work is available at the same link. Accession code: The sequence data from the whole RNA transcripts in the hippocampus of the left brain used in this work is available from the DNA DataBank of Japan (DDBJ) Sequence Read Archive (DRA) under the accession number DRA004502.

Contact

yasu@bio.keio.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +27651482,PDB2CD: a web-based application for the generation of circular dichroism spectra from protein atomic coordinates.,"

Motivation

Circular dichroism (CD) spectroscopy is extensively utilized for determining the percentages of secondary structure content present in proteins. However, although a large contributor, secondary structure is not the only factor that influences the shape and magnitude of the CD spectrum produced. Other structural features can make contributions so an entire protein structural conformation can give rise to a CD spectrum. There is a need for an application capable of generating protein CD spectra from atomic coordinates. However, no empirically derived method to do this currently exists.

Results

PDB2CD has been created as an empirical-based approach to the generation of protein CD spectra from atomic coordinates. The method utilizes a combination of structural features within the conformation of a protein; not only its percentage secondary structure content, but also the juxtaposition of these structural components relative to one another, and the overall structure similarity of the query protein to proteins in our dataset, the SP175 dataset, the 'gold standard' set obtained from the Protein Circular Dichroism Data Bank (PCDDB). A significant number of the CD spectra associated with the 71 proteins in this dataset have been produced with excellent accuracy using a leave-one-out cross-validation process. The method also creates spectra in good agreement with those of a test set of 14 proteins from the PCDDB. The PDB2CD package provides a web-based, user friendly approach to enable researchers to produce CD spectra from protein atomic coordinates.

Availability and implementation

http://pdb2cd.cryst.bbk.ac.uk CONTACT: r.w.janes@qmul.ac.ukSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-20 +23674503,The PhosphoGRID Saccharomyces cerevisiae protein phosphorylation site database: version 2.0 update.,"PhosphoGRID is an online database that curates and houses experimentally verified in vivo phosphorylation sites in the Saccharomyces cerevisiae proteome (www.phosphogrid.org). Phosphosites are annotated with specific protein kinases and/or phosphatases, along with the condition(s) under which the phosphorylation occurs and/or the effects on protein function. We report here an updated data set, including nine additional high-throughput (HTP) mass spectrometry studies. The version 2.0 data set contains information on 20 177 unique phosphorylated residues, representing a 4-fold increase from version 1.0, and includes 1614 unique phosphosites derived from focused low-throughput (LTP) studies. The overlap between HTP and LTP studies represents only ∼3% of the total unique sites, but importantly 45% of sites from LTP studies with defined function were discovered in at least two independent HTP studies. The majority of new phosphosites in this update occur on previously documented proteins, suggesting that coverage of phosphoproteins in the yeast proteome is approaching saturation. We will continue to update the PhosphoGRID data set, with the expectation that the integration of information from LTP and HTP studies will enable the development of predictive models of phosphorylation-based signaling networks. Database URL: http://www.phosphogrid.org/",2013-05-13 +22814884,"Pyrosequencing-based transcriptomic resources in the pond snail Lymnaea stagnalis, with a focus on genes involved in molecular response to diquat-induced stress.","Due to their ability to explore whole genome response to drugs and stressors, omics-based approaches are widely used in toxicology and ecotoxicology, and identified as powerful tools for future ecological risk assessment and environmental monitoring programs. Understanding the long-term effects of contaminants may indeed benefit from the coupling of genomics and eco-evolutionary hypotheses. Next-generation sequencing provides a new way to investigate pollutants impact, by targeting early responses, screening chemicals, and directly quantifying gene expression, even in organisms without reference genome. Lymnaea stagnalis is a freshwater mollusk in which access to genomic resources is critical for many scientific issues, especially in ecotoxicology. We used 454-pyrosequencing to obtain new transcriptomic resources in L. stagnalis and to preliminarily explore gene expression response to a redox-cycling pesticide, diquat. We obtained 151,967 and 128,945 high-quality reads from control and diquat-exposed individuals, respectively. Sequence assembly provided 141,999 contigs, of which 124,387 were singletons. BlastX search revealed significant match for 34.6 % of the contigs (21.2 % protein hits). KEGG annotation showed a predominance of hits with genes involved in energy metabolism and circulatory system, and revealed more than 400 putative genes involved in oxidative stress, cellular/molecular stress and signaling pathways, apoptosis, and metabolism of xenobiotics. Results also suggest that diquat may have a great diversity of molecular effects. Moreover, new genetic markers (putative SNPs) were discovered. We also created a Ensembl-like web-tool for data-mining ( http://genotoul-contigbrowser.toulouse.inra.fr:9095/Lymnaea_stagnalis/index.html ). This resource is expected to be relevant for any genomic approach aimed at understanding the molecular basis of physiological and evolutionary responses to environmental stress in L. stagnalis.",2012-07-20 +26389570,Comparing the performance of biomedical clustering methods.,"Identifying groups of similar objects is a popular first step in biomedical data analysis, but it is error-prone and impossible to perform manually. Many computational methods have been developed to tackle this problem. Here we assessed 13 well-known methods using 24 data sets ranging from gene expression to protein domains. Performance was judged on the basis of 13 common cluster validity indices. We developed a clustering analysis platform, ClustEval (http://clusteval.mpi-inf.mpg.de), to promote streamlined evaluation, comparison and reproducibility of clustering results in the future. This allowed us to objectively evaluate the performance of all tools on all data sets with up to 1,000 different parameter sets each, resulting in a total of more than 4 million calculated cluster validity indices. We observed that there was no universal best performer, but on the basis of this wide-ranging comparison we were able to develop a short guideline for biomedical clustering tasks. ClustEval allows biomedical researchers to pick the appropriate tool for their data type and allows method developers to compare their tool to the state of the art.",2015-09-21 +21177659,TMPad: an integrated structural database for helix-packing folds in transmembrane proteins.,"α-helical transmembrane (TM) proteins play an important role in many critical and diverse biological processes, and specific associations between TM helices are important determinants for membrane protein folding, dynamics and function. In order to gain insights into the above phenomena, it is necessary to investigate different types of helix-packing modes and interactions. However, such information is difficult to obtain because of the experimental impediment and a lack of a well-annotated source of helix-packing folds in TM proteins. We have developed the TMPad (TransMembrane Protein Helix-Packing Database) which addresses the above issues by integrating experimentally observed helix-helix interactions and related structural information of membrane proteins. Specifically, the TMPad offers pre-calculated geometric descriptors at the helix-packing interface including residue backbone/side-chain contacts, interhelical distances and crossing angles, helical translational shifts and rotational angles. The TMPad also includes the corresponding sequence, topology, lipid accessibility, ligand-binding information and supports structural classification, schematic diagrams and visualization of the above structural features of TM helix-packing. Through detailed annotations and visualizations of helix-packing, this online resource can serve as an information gateway for deciphering the relationship between helix-helix interactions and higher levels of organization in TM protein structure and function. The website of the TMPad is freely accessible to the public at http://bio-cluster.iis.sinica.edu.tw/TMPad.",2011-01-01 +28934723,Neural Mechanisms Underlying the Disruption of Male Courtship Behavior by Adult Exposure to Di(2-ethylhexyl) Phthalate in Mice.,"

Background

Courtship behavior plays a critical role in attracting females and reproduction success. However, the effects of exposure to a ubiquitous contaminant di(2-ethylhexyl) phthalate (DEHP) on these behaviors and, in particular, on courtship vocalizations have not been examined.

Objective

The effects of adult exposure to DEHP on courtship and mating behaviors and gonadotropic axis and neural mechanisms involved in DEHP-induced effects were analyzed in male mice.

Methods

Adult C57BL/6J males were orally exposed to DEHP (0, 0.5, 5, and 50μg/kg/d) for 4 wk. Olfactory preference, ultrasonic vocalizations (USVs), partner preference and mating, as well as locomotor activity and motor coordination, were measured. The kisspeptin system and testosterone levels were analyzed. Proteomic and molecular studies were conducted on the hypothalamic preoptic nucleus, the key region involved in sexual motivation to vocalize and mate.

Results

DEHP at 50μg/kg/d reduced the emission of USVs, whereas lower doses changed the ratio of syllable categories. This was associated with diminished sexual interest of female partners toward males exposed to 5 or 50μg/kg/d and increased latency to mate, despite normal olfactory preference. The kisspeptin system and circulating testosterone levels were unaffected. In DEHP-exposed males, proteomic analysis of the preoptic nucleus identified differentially expressed proteins connected to the androgen receptor (AR). Indeed, exposure to 5 or 50μg/kg/d of DEHP induced selective AR downregulation in this nucleus and upstream chemosensory regions. The involvement of AR changes in the observed alterations was further supported by the reduced emission of courtship vocalizations in males with disrupted neural AR expression.

Conclusions

These data demonstrate the critical role of neural AR in courtship vocalizations and raises the possibility that the vulnerability of this signaling pathway to exposure to endocrine disrupters may be detrimental for courtship communication and mating in several species. https://doi.org/10.1289/EHP1443.",2017-09-01 +28430949,"Capturing non-local interactions by long short-term memory bidirectional recurrent neural networks for improving prediction of protein secondary structure, backbone angles, contact numbers and solvent accessibility.","

Motivation

The accuracy of predicting protein local and global structural properties such as secondary structure and solvent accessible surface area has been stagnant for many years because of the challenge of accounting for non-local interactions between amino acid residues that are close in three-dimensional structural space but far from each other in their sequence positions. All existing machine-learning techniques relied on a sliding window of 10-20 amino acid residues to capture some 'short to intermediate' non-local interactions. Here, we employed Long Short-Term Memory (LSTM) Bidirectional Recurrent Neural Networks (BRNNs) which are capable of capturing long range interactions without using a window.

Results

We showed that the application of LSTM-BRNN to the prediction of protein structural properties makes the most significant improvement for residues with the most long-range contacts (|i-j| >19) over a previous window-based, deep-learning method SPIDER2. Capturing long-range interactions allows the accuracy of three-state secondary structure prediction to reach 84% and the correlation coefficient between predicted and actual solvent accessible surface areas to reach 0.80, plus a reduction of 5%, 10%, 5% and 10% in the mean absolute error for backbone ϕ , ψ , θ and τ angles, respectively, from SPIDER2. More significantly, 27% of 182724 40-residue models directly constructed from predicted C α atom-based θ and τ have similar structures to their corresponding native structures (6Å RMSD or less), which is 3% better than models built by ϕ and ψ angles. We expect the method to be useful for assisting protein structure and function prediction.

Availability and implementation

The method is available as a SPIDER3 server and standalone package at http://sparks-lab.org .

Contact

yaoqi.zhou@griffith.edu.au or yuedong.yang@griffith.edu.au.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-09-01 +26346985,VING: a software for visualization of deep sequencing signals.,"

Background

Next generation sequencing (NGS) data treatment often requires mapping sequenced reads onto a reference genome for further analysis. Mapped data are commonly visualized using genome browsers. However, such software are not suited for a publication-ready and versatile representation of NGS data coverage, especially when multiple experiments are simultaneously treated.

Results

We developed 'VING', a stand-alone R script that takes as input NGS mapping files and genome annotations to produce accurate snapshots of the NGS coverage signal for any specified genomic region. VING offers multiple viewing options, including strand-specific views and a special heatmap mode for representing multiple experiments in a single figure.

Conclusions

VING produces high-quality figures for NGS data representation in a genome region of interest. It is available at http://vm-gb.curie.fr/ving/. We also developed a Galaxy wrapper, available in the Galaxy tool shed with installation and usage instructions.",2015-09-07 +25767303,Global differential expression of genes located in the Down Syndrome Critical Region in normal human brain.,"

Background

The information of gene expression obtained from databases, have made possible the extraction and analysis of data related with several molecular processes involving not only in brain homeostasis but its disruption in some neuropathologies; principally in Down syndrome and the Alzheimer disease.

Objective

To correlate the levels of transcription of 19 genes located in the Down Syndrome Critical Region (DSCR) with their expression in several substructures of normal human brain.

Methods

There were obtained expression profiles of 19 DSCR genes in 42 brain substructures, from gene expression values available at the database of the human brain of the Brain Atlas of the Allen Institute for Brain Sciences"", (http://human.brain-map.org/). The co-expression patterns of DSCR genes in brain were calculated by using multivariate statistical methods.

Results

Highest levels of gene expression were registered at caudate nucleus, nucleus accumbens and putamen among central areas of cerebral cortex. Increased expression levels of RCAN1 that encode by a protein involved in signal transduction process of the CNS were recorded for PCP4 that participates in the binding to calmodulin and TTC3; a protein that is associated with differentiation of neurons. That previously identified brain structures play a crucial role in the learning process, in different class of memory and in motor skills.

Conclusion

The precise regulation of DSCR gene expression is crucial to maintain the brain homeostasis, especially in those areas with high levels of gene expression associated with a remarkable process of learning and cognition.",2014-10-01 +24321274,StrainInfo introduces electronic passports for microorganisms.,"Microbiology builds upon biological material deposited in biological resource centers (BRCs) as a reference framework for collaborative research. BRCs assign so-called strain numbers to label the deposited material and are responsible for long-term preservation and worldwide distribution of the material. Cultured microorganisms can be deposited into multiple BRCs and BRCs also mutually exchange their holdings. As a result, many different strain numbers can be attached to biological material that stems from the same isolate. In practice, this material is considered equivalent and used interchangeably. This implies that finding information on given biological material requires all equivalent strain numbers to be used when searching. StrainInfo introduces strain passports for microorganisms: a uniform overview of information known about a given microbial strain. It contains all known equivalent strain numbers and information on the exchange history, sequences and related literature of the strain. Each passport has an associated strain browser that gives direct access to the underlying BRC catalog entries on which the passport was based. Taxon, sequence and literature passports are implemented in a similar manner. In addition to web pages that serve human users, integrated information is also offered in machine readable formats useful for automated, large-scale analysis. StrainInfo is envisioned to be an open platform integrating microbial information. This platform can form the basis for new methods of microbiological research, leveraging the vast amount of electronic information available online. StrainInfo is available from http://www.StrainInfo.net.",2013-12-08 +25766308,Genome-wide characterization of developmental stage- and tissue-specific transcription factors in wheat.,"

Background

Wheat (Triticum aestivum) is one of the most important cereal crops, providing food for humans and feed for other animals. However, its productivity is challenged by various biotic and abiotic stresses such as fungal diseases, insects, drought, salinity, and cold. Transcription factors (TFs) regulate gene expression in different tissues and at various developmental stages in plants and animals, and they can be identified and classified into families according to their structural and specialized DNA-binding domains (DBDs). Transcription factors are important regulatory components of the genome, and are the main targets for engineering stress tolerance.

Results

In total, 2407 putative TFs were identified from wheat expressed sequence tags, and then classified into 63 families by using Hmm searches against hidden Markov model (HMM) profiles. In this study, 2407 TFs represented approximately 2.22% of all genes in the wheat genome, a smaller proportion than those reported for other cereals in PlantTFDB V3.0 (3.33%-5.86%) and PlnTFDB (4.30%-6.46%). We assembled information from the various databases for individual TFs, including annotations and details of their developmental stage- and tissue-specific expression patterns. Based on this information, we identified 1257 developmental stage-specific TFs and 1104 tissue-specific TFs, accounting for 52.22% and 45.87% of the 2407 wheat TFs, respectively. We identified 338, 269, 262, 175, 49, and 18 tissue-specific TFs in the flower, seed, root, leaf, stem, and crown, respectively. There were 100, 6, 342, 141, 390, and 278 TFs specifically expressed at the dormant seed, germinating seed, reproductive, ripening, seedling, and vegetative stages, respectively. We constructed a comprehensive database of wheat TFs, designated as WheatTFDB ( http://xms.sicau.edu.cn/wheatTFDB/ ).

Conclusions

Approximately 2.22% (2407 genes) of all genes in the wheat genome were identified as TFs, and were clustered into 63 TF families. We identified 1257 developmental stage-specific TFs and 1104 tissue-specific TFs, based on information about their developmental- and tissue-specific expression patterns obtained from publicly available gene expression databases. The 2407 wheat TFs and their annotations are summarized in our database, WheatTFDB. These data will be useful identifying target TFs involved in the stress response at a particular stage of development.",2015-02-25 +26694379,MicroRNA-Target Network Inference and Local Network Enrichment Analysis Identify Two microRNA Clusters with Distinct Functions in Head and Neck Squamous Cell Carcinoma.,"MicroRNAs represent ~22 nt long endogenous small RNA molecules that have been experimentally shown to regulate gene expression post-transcriptionally. One main interest in miRNA research is the investigation of their functional roles, which can typically be accomplished by identification of mi-/mRNA interactions and functional annotation of target gene sets. We here present a novel method ""miRlastic"", which infers miRNA-target interactions using transcriptomic data as well as prior knowledge and performs functional annotation of target genes by exploiting the local structure of the inferred network. For the network inference, we applied linear regression modeling with elastic net regularization on matched microRNA and messenger RNA expression profiling data to perform feature selection on prior knowledge from sequence-based target prediction resources. The novelty of miRlastic inference originates in predicting data-driven intra-transcriptome regulatory relationships through feature selection. With synthetic data, we showed that miRlastic outperformed commonly used methods and was suitable even for low sample sizes. To gain insight into the functional role of miRNAs and to determine joint functional properties of miRNA clusters, we introduced a local enrichment analysis procedure. The principle of this procedure lies in identifying regions of high functional similarity by evaluating the shortest paths between genes in the network. We can finally assign functional roles to the miRNAs by taking their regulatory relationships into account. We thoroughly evaluated miRlastic on a cohort of head and neck cancer (HNSCC) patients provided by The Cancer Genome Atlas. We inferred an mi-/mRNA regulatory network for human papilloma virus (HPV)-associated miRNAs in HNSCC. The resulting network best enriched for experimentally validated miRNA-target interaction, when compared to common methods. Finally, the local enrichment step identified two functional clusters of miRNAs that were predicted to mediate HPV-associated dysregulation in HNSCC. Our novel approach was able to characterize distinct pathway regulations from matched miRNA and mRNA data. An R package of miRlastic was made available through: http://icb.helmholtz-muenchen.de/mirlastic.",2015-12-18 +27307607,A novel method for discovering local spatial clusters of genomic regions with functional relationships from DNA contact maps.,"

Motivation

The three-dimensional structure of genomes makes it possible for genomic regions not adjacent in the primary sequence to be spatially proximal. These DNA contacts have been found to be related to various molecular activities. Previous methods for analyzing DNA contact maps obtained from Hi-C experiments have largely focused on studying individual interactions, forming spatial clusters composed of contiguous blocks of genomic locations, or classifying these clusters into general categories based on some global properties of the contact maps.

Results

Here, we describe a novel computational method that can flexibly identify small clusters of spatially proximal genomic regions based on their local contact patterns. Using simulated data that highly resemble Hi-C data obtained from real genome structures, we demonstrate that our method identifies spatial clusters that are more compact than methods previously used for clustering genomic regions based on DNA contact maps. The clusters identified by our method enable us to confirm functionally related genomic regions previously reported to be spatially proximal in different species. We further show that each genomic region can be assigned a numeric affinity value that indicates its degree of participation in each local cluster, and these affinity values correlate quantitatively with DNase I hypersensitivity, gene expression, super enhancer activities and replication timing in a cell type specific manner. We also show that these cluster affinity values can precisely define boundaries of reported topologically associating domains, and further define local sub-domains within each domain.

Availability and implementation

The source code of BNMF and tutorials on how to use the software to extract local clusters from contact maps are available at http://yiplab.cse.cuhk.edu.hk/bnmf/

Contact

kevinyip@cse.cuhk.edu.hk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +27172192,New Software for the Fast Estimation of Population Recombination Rates (FastEPRR) in the Genomic Era.,"Genetic recombination is a very important evolutionary mechanism that mixes parental haplotypes and produces new raw material for organismal evolution. As a result, information on recombination rates is critical for biological research. In this paper, we introduce a new extremely fast open-source software package (FastEPRR) that uses machine learning to estimate recombination rate [Formula: see text] (=[Formula: see text]) from intraspecific DNA polymorphism data. When [Formula: see text] and the number of sampled diploid individuals is large enough ([Formula: see text]), the variance of [Formula: see text] remains slightly smaller than that of [Formula: see text] The new estimate [Formula: see text] (calculated by averaging [Formula: see text] and [Formula: see text]) has the smallest variance of all cases. When estimating [Formula: see text], the finite-site model was employed to analyze cases with a high rate of recurrent mutations, and an additional method is proposed to consider the effect of variable recombination rates within windows. Simulations encompassing a wide range of parameters demonstrate that different evolutionary factors, such as demography and selection, may not increase the false positive rate of recombination hotspots. Overall, accuracy of FastEPRR is similar to the well-known method, LDhat, but requires far less computation time. Genetic maps for each human population (YRI, CEU, and CHB) extracted from the 1000 Genomes OMNI data set were obtained in less than 3 d using just a single CPU core. The Pearson Pairwise correlation coefficient between the [Formula: see text] and [Formula: see text] maps is very high, ranging between 0.929 and 0.987 at a 5-Mb scale. Considering that sample sizes for these kinds of data are increasing dramatically with advances in next-generation sequencing technologies, FastEPRR (freely available at http://www.picb.ac.cn/evolgen/) is expected to become a widely used tool for establishing genetic maps and studying recombination hotspots in the population genomic era.",2016-06-01 +24106090,Laminin-database v.2.0: an update on laminins in health and neuromuscular disorders.,"The laminin (LM)-database, hosted at http://www.lm.lncc.br, was published in the NAR database 2011 edition. It was the first database that provided comprehensive information concerning a non-collagenous family of extracellular matrix proteins, the LMs. In its first version, this database contained a large amount of information concerning LMs related to health and disease, with particular emphasis on the haemopoietic system. Users can easily access several tabs for LMs and LM-related molecules, as well as LM nomenclatures and direct links to PubMed. The LM-database version 2.0 integrates data from several publications to achieve a more comprehensive knowledge of LMs in health and disease. The novel features include the addition of two new tabs, 'Neuromuscular Disorders' and 'miRNA--LM Relationship'. More specifically, in this updated version, an expanding set of data has been displayed concerning the role of LMs in neuromuscular and neurodegenerative diseases, as well as the putative involvement of microRNAs. Given the importance of LMs in several biological processes, such as cell adhesion, proliferation, differentiation, migration and cell death, this upgraded version expands for users a panoply of information, regarding complex molecular circuitries that involve LMs in health and disease, including neuromuscular and neurodegenerative disorders.",2013-10-07 +25426929,"ALLocator: an interactive web platform for the analysis of metabolomic LC-ESI-MS datasets, enabling semi-automated, user-revised compound annotation and mass isotopomer ratio analysis.","Adduct formation, fragmentation events and matrix effects impose special challenges to the identification and quantitation of metabolites in LC-ESI-MS datasets. An important step in compound identification is the deconvolution of mass signals. During this processing step, peaks representing adducts, fragments, and isotopologues of the same analyte are allocated to a distinct group, in order to separate peaks from coeluting compounds. From these peak groups, neutral masses and pseudo spectra are derived and used for metabolite identification via mass decomposition and database matching. Quantitation of metabolites is hampered by matrix effects and nonlinear responses in LC-ESI-MS measurements. A common approach to correct for these effects is the addition of a U-13C-labeled internal standard and the calculation of mass isotopomer ratios for each metabolite. Here we present a new web-platform for the analysis of LC-ESI-MS experiments. ALLocator covers the workflow from raw data processing to metabolite identification and mass isotopomer ratio analysis. The integrated processing pipeline for spectra deconvolution ""ALLocatorSD"" generates pseudo spectra and automatically identifies peaks emerging from the U-13C-labeled internal standard. Information from the latter improves mass decomposition and annotation of neutral losses. ALLocator provides an interactive and dynamic interface to explore and enhance the results in depth. Pseudo spectra of identified metabolites can be stored in user- and method-specific reference lists that can be applied on succeeding datasets. The potential of the software is exemplified in an experiment, in which abundance fold-changes of metabolites of the l-arginine biosynthesis in C. glutamicum type strain ATCC 13032 and l-arginine producing strain ATCC 21831 are compared. Furthermore, the capability for detection and annotation of uncommon large neutral losses is shown by the identification of (γ-)glutamyl dipeptides in the same strains. ALLocator is available online at: https://allocator.cebitec.uni-bielefeld.de. A login is required, but freely available.",2014-11-26 +27647651,[Urinary lithiasis in renal transplant recipient].,"

Objectives

To report epidemiology and characteristics of urinary lithiasis and its management in kidney allograft at the time of organ procurement or after kidney transplantation.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords (MESH): urinary lithiasis, stone, kidney transplantation. Publications obtained were selected based on methodology, language, date of publication (last 10 years) and relevance. Prospective and retrospective studies, in English or French, review articles; meta-analysis and guidelines were selected and analyzed. This search found 58 articles. After reading, 37 were included in the text based on their relevance.

Results

Frequency of urinary lithiasis in renal transplant recipient is similar to those observed in the general population. Generally, urinary lithiasis of the graft is asymptomatic because of renal denervation after organ procurement and transplantation. Nevertheless, this situation may be at high risk due to the immunosuppressed state of the recipient with a unique functioning kidney. Most of the time, the diagnosis is incidental during routine post-transplantation follow-up. Management of urolithiasis in renal transplant recipient is similar to that performed in general population.

Conclusion

Due to its potential severity in transplanted immunosuppressed patients with a sole kidney, urolithiasis requires expert urological management.",2016-09-16 +23550212,The switches.ELM resource: a compendium of conditional regulatory interaction interfaces.,"Short linear motifs (SLiMs) are protein interaction sites that play an important role in cell regulation by controlling protein activity, localization, and local abundance. The functionality of a SLiM can be modulated in a context-dependent manner to induce a gain, loss, or exchange of binding partners, which will affect the function of the SLiM-containing protein. As such, these conditional interactions underlie molecular decision-making in cell signaling. We identified multiple types of pre- and posttranslational switch mechanisms that can regulate the function of a SLiM and thereby control its interactions. The collected examples of experimentally characterized SLiM-based switch mechanisms were curated in the freely accessible switches.ELM resource (http://switches.elm.eu.org). On the basis of these examples, we defined and integrated rules to analyze SLiMs for putative regulatory switch mechanisms. We applied these rules to known validated SLiMs, providing evidence that more than half of these are likely to be pre- or posttranslationally regulated. In addition, we showed that posttranslationally modified sites are enriched around SLiMs, which enables cooperative and integrative regulation of protein interaction interfaces. We foresee switches.ELM complementing available resources to extend our knowledge of the molecular mechanisms underlying cell signaling.",2013-04-02 +26882475,ZBIT Bioinformatics Toolbox: A Web-Platform for Systems Biology and Expression Data Analysis.,"Bioinformatics analysis has become an integral part of research in biology. However, installation and use of scientific software can be difficult and often requires technical expert knowledge. Reasons are dependencies on certain operating systems or required third-party libraries, missing graphical user interfaces and documentation, or nonstandard input and output formats. In order to make bioinformatics software easily accessible to researchers, we here present a web-based platform. The Center for Bioinformatics Tuebingen (ZBIT) Bioinformatics Toolbox provides web-based access to a collection of bioinformatics tools developed for systems biology, protein sequence annotation, and expression data analysis. Currently, the collection encompasses software for conversion and processing of community standards SBML and BioPAX, transcription factor analysis, and analysis of microarray data from transcriptomics and proteomics studies. All tools are hosted on a customized Galaxy instance and run on a dedicated computation cluster. Users only need a web browser and an active internet connection in order to benefit from this service. The web platform is designed to facilitate the usage of the bioinformatics tools for researchers without advanced technical background. Users can combine tools for complex analyses or use predefined, customizable workflows. All results are stored persistently and reproducible. For each tool, we provide documentation, tutorials, and example data to maximize usability. The ZBIT Bioinformatics Toolbox is freely available at https://webservices.cs.uni-tuebingen.de/.",2016-02-16 +26508761,openBIS ELN-LIMS: an open-source database for academic laboratories.,"

Unlabelled

The open-source platform openBIS (open Biology Information System) offers an Electronic Laboratory Notebook and a Laboratory Information Management System (ELN-LIMS) solution suitable for the academic life science laboratories. openBIS ELN-LIMS allows researchers to efficiently document their work, to describe materials and methods and to collect raw and analyzed data. The system comes with a user-friendly web interface where data can be added, edited, browsed and searched.

Availability and implementation

The openBIS software, a user guide and a demo instance are available at https://openbis-eln-lims.ethz.ch. The demo instance contains some data from our laboratory as an example to demonstrate the possibilities of the ELN-LIMS (Ottoz et al., 2014). For rapid local testing, a VirtualBox image of the ELN-LIMS is also available.",2015-10-27 +28315408,StemTextSearch: Stem cell gene database with evidence from abstracts.,"

Background

Previous studies have used many methods to find biomarkers in stem cells, including text mining, experimental data and image storage. However, no text-mining methods have yet been developed which can identify whether a gene plays a positive or negative role in stem cells.

Description

StemTextSearch identifies the role of a gene in stem cells by using a text-mining method to find combinations of gene regulation, stem-cell regulation and cell processes in the same sentences of biomedical abstracts.

Conclusions

The dataset includes 5797 genes, with 1534 genes having positive roles in stem cells, 1335 genes having negative roles, 1654 genes with both positive and negative roles, and 1274 with an uncertain role. The precision of gene role in StemTextSearch is 0.66, and the recall is 0.78. StemTextSearch is a web-based engine with queries that specify (i) gene, (ii) category of stem cell, (iii) gene role, (iv) gene regulation, (v) cell process, (vi) stem-cell regulation, and (vii) species. StemTextSearch is available through http://bio.yungyun.com.tw/StemTextSearch.aspx.",2017-03-14 +,Genome-scale identification of resistance gene analogs and the development of their intron length polymorphism markers in maize,"As introns are vulnerable to changes such as insertions and deletions when exposed to various evolutionary forces, they constitute a repository for developing genetic markers based on intron length polymorphisms (ILP). This study developed a set of genetic markers that use the potential intron length polymorphism in resistance gene analogs (RGAs) in Zea mays. By searching the genome of Zea mays B73 for the homologs of 73 R genes which have already been identified in plants, we found 861 RGAs, 632 of which have at least one intron that can serve as putative markers targeting the intron length polymorphism in RGAs (RGA-ILP). We developed 1972 candidate markers via electronic PCR (e-PCR) with primer pairs designed in each pair of exonic regions that flank an intron. Furthermore, the performance of RGA-ILP among four maize inbred lines (Huangzao4, B73, Mo17, and Dan340) was evaluated with 69 pairs of randomly selected primers. Of them, 46.4% showed bands that had discriminating length polymorphism, and between any two of the inbred lines the proportion of polymorphism ranged from 23.2 to 31.9%. To make it convenient to use these markers for those interested in molecular breeding of disease-resistant maize, we provide all related information in a web-based database named MaizeRGA, which is available at http://www.sicau.edu.cn/web/yms/rga/maizeRGA.html .",2012-02-01 +25118648,PDB explorer -- a web based algorithm for protein annotation viewer and 3D visualization.,"The PDB file format, is a text format characterizing the three dimensional structures of macro molecules available in the Protein Data Bank (PDB). Determined protein structure are found in coalition with other molecules or ions such as nucleic acids, water, ions, Drug molecules and so on, which therefore can be described in the PDB format and have been deposited in PDB database. PDB is a machine generated file, it's not human readable format, to read this file we need any computational tool to understand it. The objective of our present study is to develop a free online software for retrieval, visualization and reading of annotation of a protein 3D structure which is available in PDB database. Main aim is to create PDB file in human readable format, i.e., the information in PDB file is converted in readable sentences. It displays all possible information from a PDB file including 3D structure of that file. Programming languages and scripting languages like Perl, CSS, Javascript, Ajax, and HTML have been used for the development of PDB Explorer. The PDB Explorer directly parses the PDB file, calling methods for parsed element secondary structure element, atoms, coordinates etc. PDB Explorer is freely available at http://www.pdbexplorer.eminentbio.com/home with no requirement of log-in.",2014-08-09 +27751943,"Untangling the relatedness among correlations, Part II: Inter-subject correlation group analysis through linear mixed-effects modeling.","It has been argued that naturalistic conditions in FMRI studies provide a useful paradigm for investigating perception and cognition through a synchronization measure, inter-subject correlation (ISC). However, one analytical stumbling block has been the fact that the ISC values associated with each single subject are not independent, and our previous paper (Chen et al., 2016) used simulations and analyses of real data to show that the methodologies adopted in the literature do not have the proper control for false positives. In the same paper, we proposed nonparametric subject-wise bootstrapping and permutation testing techniques for one and two groups, respectively, which account for the correlation structure, and these greatly outperformed the prior methods in controlling the false positive rate (FPR); that is, subject-wise bootstrapping (SWB) worked relatively well for both cases with one and two groups, and subject-wise permutation (SWP) testing was virtually ideal for group comparisons. Here we seek to explicate and adopt a parametric approach through linear mixed-effects (LME) modeling for studying the ISC values, building on the previous correlation framework, with the benefit that the LME platform offers wider adaptability, more powerful interpretations, and quality control checking capability than nonparametric methods. We describe both theoretical and practical issues involved in the modeling and the manner in which LME with crossed random effects (CRE) modeling is applied. A data-doubling step further allows us to conveniently track the subject index, and achieve easy implementations. We pit the LME approach against the best nonparametric methods, and find that the LME framework achieves proper control for false positives. The new LME methodologies are shown to be both efficient and robust, and they will be publicly available in AFNI (http://afni.nimh.nih.gov).",2016-10-15 +27896305,Measuring resilience to financial instability: A new dataset.,"In recognition of the severe consequences of the recent international financial crisis, the topic of macroprudential policy has elicited considerable research effort. The data set reports, for 46 economies around the globe, an index of the capacity to deploy macroprudential policies. The index aims to represent the essence of what constitutes a macroprudential regime is developed and used in http://dx.doi.org/10.1016/j.jfs.2016.08.007 (D. Lombardi, P.L. Siklos, 2016) [1]. Specifically, the index quantifies: (1) how existing macroprudential frameworks are organized; and (2) how far a particular jurisdiction is from reaching the goals established by the Group of Twenty (G20) and the Financial Stability Board (FSB). The latter is a benchmark that has not been considered in the burgeoning literature that seeks to quantify the role of macroprudential policies.",2016-11-09 +28480171,Pathogenicity in POLG syndromes: DNA polymerase gamma pathogenicity prediction server and database.,"DNA polymerase gamma (POLG) is the replicative polymerase responsible for maintaining mitochondrial DNA (mtDNA). Disorders related to its functionality are a major cause of mitochondrial disease. The clinical spectrum of POLG syndromes includes Alpers-Huttenlocher syndrome (AHS), childhood myocerebrohepatopathy spectrum (MCHS), myoclonic epilepsy myopathy sensory ataxia (MEMSA), the ataxia neuropathy spectrum (ANS) and progressive external ophthalmoplegia (PEO). We have collected all publicly available POLG-related patient data and analyzed it using our pathogenic clustering model to provide a new research and clinical tool in the form of an online server. The server evaluates the pathogenicity of both previously reported and novel mutations. There are currently 176 unique point mutations reported and found in mitochondrial patients in the gene encoding the catalytic subunit of POLG, POLG. The mutations are distributed nearly uniformly along the length of the primary amino acid sequence of the gene. Our analysis shows that most of the mutations are recessive, and that the reported dominant mutations cluster within the polymerase active site in the tertiary structure of the POLG enzyme. The POLG Pathogenicity Prediction Server (http://polg.bmb.msu.edu) is targeted at clinicians and scientists studying POLG disorders, and aims to provide the most current available information regarding the pathogenicity of POLG mutations.",2017-04-18 +23812995,Compressive genomics for protein databases.,"

Motivation

The exponential growth of protein sequence databases has increasingly made the fundamental question of searching for homologs a computational bottleneck. The amount of unique data, however, is not growing nearly as fast; we can exploit this fact to greatly accelerate homology search. Acceleration of programs in the popular PSI/DELTA-BLAST family of tools will not only speed-up homology search directly but also the huge collection of other current programs that primarily interact with large protein databases via precisely these tools.

Results

We introduce a suite of homology search tools, powered by compressively accelerated protein BLAST (CaBLASTP), which are significantly faster than and comparably accurate with all known state-of-the-art tools, including HHblits, DELTA-BLAST and PSI-BLAST. Further, our tools are implemented in a manner that allows direct substitution into existing analysis pipelines. The key idea is that we introduce a local similarity-based compression scheme that allows us to operate directly on the compressed data. Importantly, CaBLASTP's runtime scales almost linearly in the amount of unique data, as opposed to current BLASTP variants, which scale linearly in the size of the full protein database being searched. Our compressive algorithms will speed-up many tasks, such as protein structure prediction and orthology mapping, which rely heavily on homology search.

Availability

CaBLASTP is available under the GNU Public License at http://cablastp.csail.mit.edu/

Contact

bab@mit.edu.",2013-07-01 +24715218,CancerEST: a web-based tool for automatic meta-analysis of public EST data.,"The identification of cancer-restricted biomarkers is fundamental to the development of novel cancer therapies and diagnostic tools. The construction of comprehensive profiles to define tissue- and cancer-specific gene expression has been central to this. To this end, the exploitation of the current wealth of 'omic'-scale databases can be facilitated by automated approaches, allowing researchers to directly address specific biological questions. Here we present CancerEST, a user-friendly and intuitive web-based tool for the automated identification of candidate cancer markers/targets, for examining tissue specificity as well as for integrated expression profiling. CancerEST operates by means of constructing and meta-analyzing expressed sequence tag (EST) profiles of user-supplied gene sets across an EST database supporting 36 tissue types. Using a validation data set from the literature, we show the functionality and utility of CancerEST. DATABASE URL: http://www.cancerest.org.uk.",2014-04-07 +25257241,Relationship between homocysteine level and diabetic retinopathy: a systematic review and meta-analysis.,"

Background

The relationship between homocysteine (Hcy) and diabetic retinopathy (DR) remains unclear to date. Therefore, a systematic review and meta-analysis was performed on the relationship between Hcy level and DR.

Methods

Studies were identified by searching PubMed, Embase, and Web of Science databases until 5 May, 2014.

Results

A total of 31 studies involving 6,394 participants were included in the meta-analysis. After pooling the data from each included study, the blood Hcy concentration in the DR group was observed to be higher than that in the control group [WMD=2.55; 95% confidence interval (CI), 1.70-3.40], and diabetes mellitus (DM) patients with hyperhomocysteinemia were at a risk for DR [odds ratio (OR)=1.93; 95% CI, 1.46-2.53]. Considering the different DM types, hyperhomocysteinemia in T1DM (OR=1.83, 95% CI, 1.28-2.62) was associated with DR rather than in T2DM (OR=1.59, 95% CI, 0.72-3.51). Considerable statistical heterogeneity in the overall summary estimates was partly explained by the geographical differences.

Conclusions

Results from this current meta-analysis indicate that hyperhomocysteinemia is a risk factor for DR, especially proliferative DR. Differences between geographical regions were observed in the relationship between hyperhomocysteinemia with T1DM risk. Given the heterogeneous results, the relationship between high Hcy and DR needs further investigation.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_167.",2014-09-26 +24850848,iSimp in BioC standard format: enhancing the interoperability of a sentence simplification system. ,"This article reports the use of the BioC standard format in our sentence simplification system, iSimp, and demonstrates its general utility. iSimp is designed to simplify complex sentences commonly found in the biomedical text, and has been shown to improve existing text mining applications that rely on the analysis of sentence structures. By adopting the BioC format, we aim to make iSimp readily interoperable with other applications in the biomedical domain. To examine the utility of iSimp in BioC, we implemented a rule-based relation extraction system that uses iSimp as a preprocessing module and BioC for data exchange. Evaluation on the training corpus of BioNLP-ST 2011 GENIA Event Extraction (GE) task showed that iSimp sentence simplification improved the recall by 3.2% without reducing precision. The iSimp simplification-annotated corpora, both our previously used corpus and the GE corpus in the current study, have been converted into the BioC format and made publicly available at the project's Web site: http://research.bioinformatics.udel.edu/isimp/. Database URL:http://research.bioinformatics.udel.edu/isimp/",2014-05-21 +23772942,Pregnancy and birth cohort resources in europe: a large opportunity for aetiological child health research.,"

Background

During the past 25 years, many pregnancy and birth cohorts have been established. Each cohort provides unique opportunities for examining associations of early-life exposures with child development and health. However, to fully exploit the large amount of available resources and to facilitate cross-cohort collaboration, it is necessary to have accessible information on each cohort and its individual characteristics. The aim of this work was to provide an overview of European pregnancy and birth cohorts registered in a freely accessible database located at http://www.birthcohorts.net.

Methods

European pregnancy and birth cohorts initiated in 1980 or later with at least 300 mother-child pairs enrolled during pregnancy or at birth, and with postnatal data, were eligible for inclusion. Eligible cohorts were invited to provide information on the data and biological samples collected, as well as the timing of data collection.

Results

In total, 70 cohorts were identified. Of these, 56 fulfilled the inclusion criteria encompassing a total of more than 500,000 live-born European children. The cohorts represented 19 countries with the majority of cohorts located in Northern and Western Europe. Some cohorts were general with multiple aims, whilst others focused on specific health or exposure-related research questions.

Conclusion

This work demonstrates a great potential for cross-cohort collaboration addressing important aspects of child health. The web site, http://www.birthcohorts.net, proved to be a useful tool for accessing information on European pregnancy and birth cohorts and their characteristics.",2013-07-01 +27835645,PoCos: Population Covering Locus Sets for Risk Assessment in Complex Diseases.,"Susceptibility loci identified by GWAS generally account for a limited fraction of heritability. Predictive models based on identified loci also have modest success in risk assessment and therefore are of limited practical use. Many methods have been developed to overcome these limitations by incorporating prior biological knowledge. However, most of the information utilized by these methods is at the level of genes, limiting analyses to variants that are in or proximate to coding regions. We propose a new method that integrates protein protein interaction (PPI) as well as expression quantitative trait loci (eQTL) data to identify sets of functionally related loci that are collectively associated with a trait of interest. We call such sets of loci ""population covering locus sets"" (PoCos). The contributions of the proposed approach are three-fold: 1) We consider all possible genotype models for each locus, thereby enabling identification of combinatorial relationships between multiple loci. 2) We develop a framework for the integration of PPI and eQTL into a heterogenous network model, enabling efficient identification of functionally related variants that are associated with the disease. 3) We develop a novel method to integrate the genotypes of multiple loci in a PoCo into a representative genotype to be used in risk assessment. We test the proposed framework in the context of risk assessment for seven complex diseases, type 1 diabetes (T1D), type 2 diabetes (T2D), psoriasis (PS), bipolar disorder (BD), coronary artery disease (CAD), hypertension (HT), and multiple sclerosis (MS). Our results show that the proposed method significantly outperforms individual variant based risk assessment models as well as the state-of-the-art polygenic score. We also show that incorporation of eQTL data improves the performance of identified POCOs in risk assessment. We also assess the biological relevance of PoCos for three diseases that have similar biological mechanisms and identify novel candidate genes. The resulting software is publicly available at http://compbio.

Case

edu/pocos/.",2016-11-11 +24403538,Allerdictor: fast allergen prediction using text classification techniques.,"

Motivation

Accurately identifying and eliminating allergens from biotechnology-derived products are important for human health. From a biomedical research perspective, it is also important to identify allergens in sequenced genomes. Many allergen prediction tools have been developed during the past years. Although these tools have achieved certain levels of specificity, when applied to large-scale allergen discovery (e.g. at a whole-genome scale), they still yield many false positives and thus low precision (even at low recall) due to the extreme skewness of the data (allergens are rare). Moreover, the most accurate tools are relatively slow because they use protein sequence alignment to build feature vectors for allergen classifiers. Additionally, only web server implementations of the current allergen prediction tools are publicly available and are without the capability of large batch submission. These weaknesses make large-scale allergen discovery ineffective and inefficient in the public domain.

Results

We developed Allerdictor, a fast and accurate sequence-based allergen prediction tool that models protein sequences as text documents and uses support vector machine in text classification for allergen prediction. Test results on multiple highly skewed datasets demonstrated that Allerdictor predicted allergens with high precision over high recall at fast speed. For example, Allerdictor only took ∼6 min on a single core PC to scan a whole Swiss-Prot database of ∼540 000 sequences and identified <1% of them as allergens.

Availability and implementation

Allerdictor is implemented in Python and available as standalone and web server versions at http://allerdictor.vbi.vt.edu CONTACT: lawrence@vbi.vt.edu Supplementary information: Supplementary data are available at Bioinformatics online.",2014-01-07 +26819472,Unipept web services for metaproteomics analysis.,

Unlabelled

Unipept is an open source web application that is designed for metaproteomics analysis with a focus on interactive datavisualization. It is underpinned by a fast index built from UniProtKB and the NCBI taxonomy that enables quick retrieval of all UniProt entries in which a given tryptic peptide occurs. Unipept version 2.4 introduced web services that provide programmatic access to the metaproteomics analysis features. This enables integration of Unipept functionality in custom applications and data processing pipelines.

Availability and implementation

The web services are freely available at http://api.unipept.ugent.be and are open sourced under the MIT license.

Contact

Unipept@ugent.be

Supplementary information

Supplementary data are available at Bioinformatics online.,2016-01-27 +26035493,ProPairs: A Data Set for Protein-Protein Docking.,"ProPairs is a data set of crystal structures of protein complexes defined as biological assemblies in the protein data bank (PDB), which are classified as legitimate protein-protein docking complexes by also identifying the corresponding unbound protein structures in the PDB. The underlying program selecting suitable protein complexes, also called ProPairs, is an automated method to extract structures of legitimate protein docking complexes and their unbound partner proteins from the PDB which fulfill specific criteria. In this way a total of 5,642 protein complexes have been identified with 11,600 different decompositions in unbound protein pairs yielding legitimate protein docking partners. After removing sequence redundancy (requiring a sequence identity of the residues in the interface of less than 40%), 2,070 different legitimate protein docking complexes remain. For 810 of these protein docking complexes, both docking partners possess corresponding unbound structures in the PDB. From the 2,070 nonredundant protein docking complexes there are 417 which possess a cofactor at the interface. From the 176 protein docking complexes of the Protein-Protein Docking Benchmark 4.0 (DB4.0) data set, 13 differ from the ProPairs data set. Twelve of them differ with respect to the composition of the unbound structures but are contained in the large redundant ProPairs data set. One protein docking complex of the DB4.0 data set is not contained in ProPairs since the biological assembly specified in the PDB is wrong (PDB id 1d6r ). For one protein complex (PDB id 1bgx ) the DB4.0 data set uses a fabricated unbound structure. For public use interactive online access is provided to the ProPairs data set of nonredundant protein docking complexes along with the source code of the underlying method [ http://propairs.github.io].",2015-06-15 +26333403,FASTR: A novel data format for concomitant representation of RNA sequence and secondary structure information.,"Given the importance of RNA secondary structures in defining their biological role, it would be convenient for researchers seeking RNA data if both sequence and structural information pertaining to RNA molecules are made available together. Current nucleotide data repositories archive only RNA sequence data. Furthermore, storage formats which can frugally represent RNA sequence as well as structure data in a single file, are currently unavailable. This article proposes a novel storage format, 'FASTR', for concomitant representation of RNA sequence and structure. The storage efficiency of the proposed FASTR format has been evaluated using RNA data from various microorganisms. Results indicate that the size of FASTR formatted files (containing both RNA sequence as well as structure information) are equivalent to that of FASTA-format files, which contain only RNA sequence information. RNA secondary structure is typically represented using a combination of a string of nucleotide characters along with the corresponding dot-bracket notation indicating structural attributes. 'FASTR' - the novel storage format proposed in the present study enables a frugal representation of both RNA sequence and structural information in the form of a single string. In spite of having a relatively smaller storage footprint, the resultant 'fastr' string(s) retain all sequence as well as secondary structural information that could be stored using a dot-bracket notation. An implementation of the 'FASTR' methodology is available for download at http://metagenomics.atc.tcs.com/compression/fastr.",2015-09-01 +28061747,GTB - an online genome tolerance browser.,"

Background

Accurate methods capable of predicting the impact of single nucleotide variants (SNVs) are assuming ever increasing importance. There exists a plethora of in silico algorithms designed to help identify and prioritize SNVs across the human genome for further investigation. However, no tool exists to visualize the predicted tolerance of the genome to mutation, or the similarities between these methods.

Results

We present the Genome Tolerance Browser (GTB, http://gtb.biocompute.org.uk ): an online genome browser for visualizing the predicted tolerance of the genome to mutation. The server summarizes several in silico prediction algorithms and conservation scores: including 13 genome-wide prediction algorithms and conservation scores, 12 non-synonymous prediction algorithms and four cancer-specific algorithms.

Conclusion

The GTB enables users to visualize the similarities and differences between several prediction algorithms and to upload their own data as additional tracks; thereby facilitating the rapid identification of potential regions of interest.",2017-01-06 +28796045,With a little help from a computer: discriminating between bacterial and viral meningitis based on dominance-based rough set approach analysis.,"Differential Diagnosis of bacterial and viral meningitis remains an important clinical problem. A number of methods to assist in the diagnoses of meningitis have been developed, but none of them have been found to have high specificity with 100% sensitivity.We conducted a retrospective analysis of the medical records of 148 children hospitalized in St. Joseph Children's Hospital in Poznań. In this study, we applied for the first time the original methodology of dominance-based rough set approach (DRSA) to diagnostic patterns of meningitis data and represented them by decision rules useful in discriminating between bacterial and viral meningitis. The induction algorithm is called VC-DomLEM; it has been implemented as software package called jMAF (http://www.cs.put.poznan.pl/jblaszczynski/Site/jRS.html), based on java Rough Set (jRS) library.In the studied group, there were 148 patients (78 boys and 70 girls), and the mean age was 85 months. We analyzed 14 attributes, of which only 4 were used to generate the 6 rules, with C-reactive protein (CRP) being the most valuable.Factors associated with bacterial meningitis were: CRP level ≥86 mg/L, number of leukocytes in cerebrospinal fluid (CSF) ≥4481 μL, symptoms duration no longer than 2 days, or age less than 1 month. Factors associated with viral meningitis were CRP level not higher than 19 mg/L, or CRP level not higher than 84 mg/L in a patient older than 11 months with no more than 1100 μL leukocytes in CSF.We established the minimum set of attributes significant for classification of patients with meningitis. This is new set of rules, which, although intuitively anticipated by some clinicians, has not been formally demonstrated until now.",2017-08-01 +28383656,PreMosa: extracting 2D surfaces from 3D microscopy mosaics.,"

Motivation

A significant focus of biological research is to understand the development, organization and function of tissues. A particularly productive area of study is on single layer epithelial tissues in which the adherence junctions of cells form a 2D manifold that is fluorescently labeled. Given the size of the tissue, a microscope must collect a mosaic of overlapping 3D stacks encompassing the stained surface. Downstream interpretation is greatly simplified by preprocessing such a dataset as follows: (i) extracting and mapping the stained manifold in each stack into a single 2D projection plane, (ii) correcting uneven illumination artifacts, (iii) stitching the mosaic planes into a single, large 2D image and (iv) adjusting the contrast.

Results

We have developed PreMosa, an efficient, fully automatic pipeline to perform the four preprocessing tasks above resulting in a single 2D image of the stained manifold across which contrast is optimized and illumination is even. Notable features are as follows. First, the 2D projection step employs a specially developed algorithm that actually finds the manifold in the stack based on maximizing contrast, intensity and smoothness. Second, the projection step comes first, implying all subsequent tasks are more rapidly solved in 2D. And last, the mosaic melding employs an algorithm that globally adjusts contrasts amongst the 2D tiles so as to produce a seamless, high-contrast image. We conclude with an evaluation using ground-truth datasets and present results on datasets from Drosophila melanogaster wings and Schmidtae mediterranea ciliary components.

Availability and implementation

PreMosa is available under https://cblasse.github.io/premosa.

Contact

blasse@mpi-cbg.de or myers@mpi-cbg.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +29265030,A simplified multiplex PCR-based typing method for common Salmonella enterica serovars supported by online server-based detection system.,"

Background & objectives

A rapid and simple alternative method is needed to replace the laborious, time-consuming Salmonella serotyping. The objective of the present study was to improve and simplify a previously reported multiplex polymerase chain reaction (PCR)-based method and to create an online server to enable rapid determination of serovars.

Methods

A method of multiplex PCR-based genome typing (MPGT) was standardized using 59 Salmonella isolates of 31 serovars. Several previously reported primers were modified to obtain a more accurate performance. The screen was separated into four different multiplex reactions distinguishable on standard electrophoresis. A blind study was subsequently performed with 81 isolates of 10 serovars most prevalent in India. Whole genome information from 440 Salmonella isolates was used to confirm the usefulness of this method and concurrence of in silico predictions and PCR results were investigated. A public server (http://www.mpgt-salmonella.res.in) was established for data storage and determination of closest previously observed Salmonella isolates based on obtained MPGT patterns.

Results

The 16 target genes amplified showed variability in their presence in strains from different serotypes. Hence, identical amplification patterns suggested genetic relatedness of strains and usually identical serological behaviour. The observed absence/presence patterns of genes were converted to an MPGT code. Altogether, 83 different codes were predicted in silico based on the whole genome information of 440 strains. Results confirmed that major serovars usually displayed unique MPGT codes.

Interpretation & conclusions

The multiplex PCR assay resulted in specific binary codes for isolates from each of the 31 Salmonella serovars tested. The online server allowed the user to compare obtained PCR results with stored previous patterns. Simplicity, speed and cost-effectiveness make this tool useful for quick outbreak management.",2017-08-01 +26699225,Network-Based Isoform Quantification with RNA-Seq Data for Cancer Transcriptome Analysis.,"High-throughput mRNA sequencing (RNA-Seq) is widely used for transcript quantification of gene isoforms. Since RNA-Seq data alone is often not sufficient to accurately identify the read origins from the isoforms for quantification, we propose to explore protein domain-domain interactions as prior knowledge for integrative analysis with RNA-Seq data. We introduce a Network-based method for RNA-Seq-based Transcript Quantification (Net-RSTQ) to integrate protein domain-domain interaction network with short read alignments for transcript abundance estimation. Based on our observation that the abundances of the neighboring isoforms by domain-domain interactions in the network are positively correlated, Net-RSTQ models the expression of the neighboring transcripts as Dirichlet priors on the likelihood of the observed read alignments against the transcripts in one gene. The transcript abundances of all the genes are then jointly estimated with alternating optimization of multiple EM problems. In simulation Net-RSTQ effectively improved isoform transcript quantifications when isoform co-expressions correlate with their interactions. qRT-PCR results on 25 multi-isoform genes in a stem cell line, an ovarian cancer cell line, and a breast cancer cell line also showed that Net-RSTQ estimated more consistent isoform proportions with RNA-Seq data. In the experiments on the RNA-Seq data in The Cancer Genome Atlas (TCGA), the transcript abundances estimated by Net-RSTQ are more informative for patient sample classification of ovarian cancer, breast cancer and lung cancer. All experimental results collectively support that Net-RSTQ is a promising approach for isoform quantification. Net-RSTQ toolbox is available at http://compbio.cs.umn.edu/Net-RSTQ/.",2015-12-23 +23772554,PRIMOS: an integrated database of reassessed protein-protein interactions providing web-based access to in silico validation of experimentally derived data.,"Steady improvements in proteomics present a bioinformatic challenge to retrieve, store, and process the accumulating and often redundant amount of information. In particular, a large-scale comparison and analysis of protein-protein interaction (PPI) data requires tools for data interpretation as well as validation. At this juncture, the Protein Interaction and Molecule Search (PRIMOS) platform represents a novel web portal that unifies six primary PPI databases (BIND, Biomolecular Interaction Network Database; DIP, Database of Interacting Proteins; HPRD, Human Protein Reference Database; IntAct; MINT, Molecular Interaction Database; and MIPS, Munich Information Center for Protein Sequences) into a single consistent repository, which currently includes more than 196,700 redundancy-removed PPIs. PRIMOS supports three advanced search strategies centering on disease-relevant PPIs, on inter- and intra-organismal crosstalk relations (e.g., pathogen-host interactions), and on highly connected protein nodes analysis (""hub"" identification). The main novelties distinguishing PRIMOS from other secondary PPI databases are the reassessment of known PPIs, and the capacity to validate personal experimental data by our peer-reviewed, homology-based validation. This article focuses on definite PRIMOS use cases (presentation of embedded biological concepts, example applications) to demonstrate its broad functionality and practical value. PRIMOS is publicly available at http://primos.fh-hagenberg.at.",2013-06-01 +24136999,RiceWiki: a wiki-based database for community curation of rice genes.,"Rice is the most important staple food for a large part of the world's human population and also a key model organism for biological studies of crops as well as other related plants. Here we present RiceWiki (http://ricewiki.big.ac.cn), a wiki-based, publicly editable and open-content platform for community curation of rice genes. Most existing related biological databases are based on expert curation; with the exponentially exploding volume of rice knowledge and other relevant data, however, expert curation becomes increasingly laborious and time-consuming to keep knowledge up-to-date, accurate and comprehensive, struggling with the flood of data and requiring a large number of people getting involved in rice knowledge curation. Unlike extant relevant databases, RiceWiki features harnessing collective intelligence in community curation of rice genes, quantifying users' contributions in each curated gene and providing explicit authorship for each contributor in any given gene, with the aim to exploit the full potential of the scientific community for rice knowledge curation. Based on community curation, RiceWiki bears the potential to make it possible to build a rice encyclopedia by and for the scientific community that harnesses community intelligence for collaborative knowledge curation, covers all aspects of biological knowledge and keeps evolving with novel knowledge.",2013-10-16 +26509669,Toward Atomistic Resolution Structure of Phosphatidylcholine Headgroup and Glycerol Backbone at Different Ambient Conditions.,"Phospholipids are essential building blocks of biological membranes. Despite a vast amount of very accurate experimental data, the atomistic resolution structures sampled by the glycerol backbone and choline headgroup in phoshatidylcholine bilayers are not known. Atomistic resolution molecular dynamics simulations have the potential to resolve the structures, and to give an arrestingly intuitive interpretation of the experimental data, but only if the simulations reproduce the data within experimental accuracy. In the present work, we simulated phosphatidylcholine (PC) lipid bilayers with 13 different atomistic models, and compared simulations with NMR experiments in terms of the highly structurally sensitive C-H bond vector order parameters. Focusing on the glycerol backbone and choline headgroups, we showed that the order parameter comparison can be used to judge the atomistic resolution structural accuracy of the models. Accurate models, in turn, allow molecular dynamics simulations to be used as an interpretation tool that translates these NMR data into a dynamic three-dimensional representation of biomolecules in biologically relevant conditions. In addition to lipid bilayers in fully hydrated conditions, we reviewed previous experimental data for dehydrated bilayers and cholesterol-containing bilayers, and interpreted them with simulations. Although none of the existing models reached experimental accuracy, by critically comparing them we were able to distill relevant chemical information: (1) increase of choline order parameters indicates the P-N vector tilting more parallel to the membrane, and (2) cholesterol induces only minor changes to the PC (glycerol backbone) structure. This work has been done as a fully open collaboration, using nmrlipids.blogspot.fi as a communication platform; all the scientific contributions were made publicly on this blog. During the open research process, the repository holding our simulation trajectories and files ( https://zenodo.org/collection/user-nmrlipids ) has become the most extensive publicly available collection of molecular dynamics simulation trajectories of lipid bilayers.",2015-11-25 +27303626,A curated transcriptome dataset collection to investigate the development and differentiation of the human placenta and its associated pathologies.,"Compendia of large-scale datasets made available in public repositories provide a precious opportunity to discover new biomedical phenomena and to fill gaps in our current knowledge. In order to foster novel insights it is necessary to ensure that these data are made readily accessible to research investigators in an interpretable format. Here we make a curated, public, collection of transcriptome datasets relevant to human placenta biology available for further analysis and interpretation via an interactive data browsing interface. We identified and retrieved a total of 24 datasets encompassing 759 transcriptome profiles associated with the development of the human placenta and associated pathologies from the NCBI Gene Expression Omnibus (GEO) and present them in a custom web-based application designed for interactive query and visualization of integrated large-scale datasets ( http://placentalendocrinology.gxbsidra.org/dm3/landing.gsp). We also performed quality control checks using relevant biological markers. Multiple sample groupings and rank lists were subsequently created to facilitate data query and interpretation. Via this interface, users can create web-links to customized graphical views which may be inserted into manuscripts for further dissemination, or e-mailed to collaborators for discussion. The tool also enables users to browse a single gene across different projects, providing a mechanism for  developing new perspectives on the role of a molecule of interest across multiple biological states. The dataset collection we created here is available at: http://placentalendocrinology.gxbsidra.org/dm3.",2016-03-09 +23527209,"SNPflow: a lightweight application for the processing, storing and automatic quality checking of genotyping assays.","Single nucleotide polymorphisms (SNPs) play a prominent role in modern genetics. Current genotyping technologies such as Sequenom iPLEX, ABI TaqMan and KBioscience KASPar made the genotyping of huge SNP sets in large populations straightforward and allow the generation of hundreds of thousands of genotypes even in medium sized labs. While data generation is straightforward, the subsequent data conversion, storage and quality control steps are time-consuming, error-prone and require extensive bioinformatic support. In order to ease this tedious process, we developed SNPflow. SNPflow is a lightweight, intuitive and easily deployable application, which processes genotype data from Sequenom MassARRAY (iPLEX) and ABI 7900HT (TaqMan, KASPar) systems and is extendible to other genotyping methods as well. SNPflow automatically converts the raw output files to ready-to-use genotype lists, calculates all standard quality control values such as call rate, expected and real amount of replicates, minor allele frequency, absolute number of discordant replicates, discordance rate and the p-value of the HWE test, checks the plausibility of the observed genotype frequencies by comparing them to HapMap/1000-Genomes, provides a module for the processing of SNPs, which allow sex determination for DNA quality control purposes and, finally, stores all data in a relational database. SNPflow runs on all common operating systems and comes as both stand-alone version and multi-user version for laboratory-wide use. The software, a user manual, screenshots and a screencast illustrating the main features are available at http://genepi-snpflow.i-med.ac.at.",2013-03-19 +26298294,Automatically visualise and analyse data on pathways using PathVisioRPC from any programming environment.,"

Background

Biological pathways are descriptive diagrams of biological processes widely used for functional analysis of differentially expressed genes or proteins. Primary data analysis, such as quality control, normalisation, and statistical analysis, is often performed in scripting languages like R, Perl, and Python. Subsequent pathway analysis is usually performed using dedicated external applications. Workflows involving manual use of multiple environments are time consuming and error prone. Therefore, tools are needed that enable pathway analysis directly within the same scripting languages used for primary data analyses. Existing tools have limited capability in terms of available pathway content, pathway editing and visualisation options, and export file formats. Consequently, making the full-fledged pathway analysis tool PathVisio available from various scripting languages will benefit researchers.

Results

We developed PathVisioRPC, an XMLRPC interface for the pathway analysis software PathVisio. PathVisioRPC enables creating and editing biological pathways, visualising data on pathways, performing pathway statistics, and exporting results in several image formats in multiple programming environments. We demonstrate PathVisioRPC functionalities using examples in Python. Subsequently, we analyse a publicly available NCBI GEO gene expression dataset studying tumour bearing mice treated with cyclophosphamide in R. The R scripts demonstrate how calls to existing R packages for data processing and calls to PathVisioRPC can directly work together. To further support R users, we have created RPathVisio simplifying the use of PathVisioRPC in this environment. We have also created a pathway module for the microarray data analysis portal ArrayAnalysis.org that calls the PathVisioRPC interface to perform pathway analysis. This module allows users to use PathVisio functionality online without having to download and install the software and exemplifies how the PathVisioRPC interface can be used by data analysis pipelines for functional analysis of processed genomics data.

Conclusions

PathVisioRPC enables data visualisation and pathway analysis directly from within various analytical environments used for preliminary analyses. It supports the use of existing pathways from WikiPathways or pathways created using the RPC itself. It also enables automation of tasks performed using PathVisio, making it useful to PathVisio users performing repeated visualisation and analysis tasks. PathVisioRPC is freely available for academic and commercial use at http://projects.bigcat.unimaas.nl/pathvisiorpc.",2015-08-23 +27625338,"Alternative Splicing, Internal Promoter, Nonsense-Mediated Decay, or All Three: Explaining the Distribution of Truncation Variants in Titin.","

Background

Truncating mutations in the giant sarcomeric gene Titin are the most common type of genetic alteration in dilated cardiomyopathy. Detailed studies have amassed a wealth of information about truncating variant position in cases and controls. Nonetheless, considerable confusion exists as to how to interpret the pathogenicity of these variants, hindering our ability to make useful recommendations to patients.

Methods and results

Building on our recent discovery of a conserved internal promoter within the Titin gene, we sought to develop an integrative statistical model to explain the observed pattern of Titin truncation variants in patients with dilated cardiomyopathy and population controls. We amassed Titin truncation mutation information from 1714 human dilated cardiomyopathy cases and >69 000 controls and found 3 factors explaining the distribution of Titin mutations: (1) alternative splicing, (2) whether the internal promoter Cronos isoform was disrupted, and (3) whether the distal C terminus was targeted (in keeping with the observation that truncation variants in this region escape nonsense-mediated decay and continue to be incorporated in the sarcomere). A model using these 3 factors had strong predictive performance with an area under the receiver operating characteristic curve of 0.81. Accordingly, individuals with either the most severe form of dilated cardiomyopathy or whose mutations demonstrated clear family segregation experienced the highest risk profile across all 3 components.

Conclusions

We conclude that quantitative models derived from large-scale human genetic and phenotypic data can be applied to help overcome the ever-growing challenges of genetic data interpretation. Results of our approach can be found at http://cvri.ucsf.edu/~deo/TTNtruncationvariant.html.",2016-09-13 +23651452,Reconstituting protein interaction networks using parameter-dependent domain-domain interactions.,"

Background

We can describe protein-protein interactions (PPIs) as sets of distinct domain-domain interactions (DDIs) that mediate the physical interactions between proteins. Experimental data confirm that DDIs are more consistent than their corresponding PPIs, lending support to the notion that analyses of DDIs may improve our understanding of PPIs and lead to further insights into cellular function, disease, and evolution. However, currently available experimental DDI data cover only a small fraction of all existing PPIs and, in the absence of structural data, determining which particular DDI mediates any given PPI is a challenge.

Results

We present two contributions to the field of domain interaction analysis. First, we introduce a novel computational strategy to merge domain annotation data from multiple databases. We show that when we merged yeast domain annotations from six annotation databases we increased the average number of domains per protein from 1.05 to 2.44, bringing it closer to the estimated average value of 3. Second, we introduce a novel computational method, parameter-dependent DDI selection (PADDS), which, given a set of PPIs, extracts a small set of domain pairs that can reconstruct the original set of protein interactions, while attempting to minimize false positives. Based on a set of PPIs from multiple organisms, our method extracted 27% more experimentally detected DDIs than existing computational approaches.

Conclusions

We have provided a method to merge domain annotation data from multiple sources, ensuring large and consistent domain annotation for any given organism. Moreover, we provided a method to extract a small set of DDIs from the underlying set of PPIs and we showed that, in contrast to existing approaches, our method was not biased towards DDIs with low or high occurrence counts. Finally, we used these two methods to highlight the influence of the underlying annotation density on the characteristics of extracted DDIs. Although increased annotations greatly expanded the possible DDIs, the lack of knowledge of the true biological false positive interactions still prevents an unambiguous assignment of domain interactions responsible for all protein network interactions.Executable files and examples are given at: http://www.bhsai.org/downloads/padds/",2013-05-07 +24772376,An Accurate Scalable Template-based Alignment Algorithm.,"The rapid determination of nucleic acid sequences is increasing the number of sequences that are available. Inherent in a template or seed alignment is the culmination of structural and functional constraints that are selecting those mutations that are viable during the evolution of the RNA. While we might not understand these structural and functional, template-based alignment programs utilize the patterns of sequence conservation to encapsulate the characteristics of viable RNA sequences that are aligned properly. We have developed a program that utilizes the different dimensions of information in rCAD, a large RNA informatics resource, to establish a profile for each position in an alignment. The most significant include sequence identity and column composition in different phylogenetic taxa. We have compared our methods with a maximum of eight alternative alignment methods on different sets of 16S and 23S rRNA sequences with sequence percent identities ranging from 50% to 100%. The results showed that CRWAlign outperformed the other alignment methods in both speed and accuracy. A web-based alignment server is available at http://www.rna.ccbb.utexas.edu/SAE/2F/CRWAlign.",2012-12-01 +23949335,An informatics approach to integrating genetic and neurological data in speech and language neuroscience.,"A number of heritable disorders impair the normal development of speech and language processes and occur in large numbers within the general population. While candidate genes and loci have been identified, the gap between genotype and phenotype is vast, limiting current understanding of the biology of normal and disordered processes. This gap exists not only in our scientific knowledge, but also in our research communities, where genetics researchers and speech, language, and cognitive scientists tend to operate independently. Here we describe a web-based, domain-specific, curated database that represents information about genotype-phenotype relations specific to speech and language disorders, as well as neuroimaging results demonstrating focal brain differences in relevant patients versus controls. Bringing these two distinct data types into a common database ( http://neurospeech.org/sldb ) is a first step toward bringing molecular level information into cognitive and computational theories of speech and language function. One bridge between these data types is provided by densely sampled profiles of gene expression in the brain, such as those provided by the Allen Brain Atlases. Here we present results from exploratory analyses of human brain gene expression profiles for genes implicated in speech and language disorders, which are annotated in our database. We then discuss how such datasets can be useful in the development of computational models that bridge levels of analysis, necessary to provide a mechanistic understanding of heritable language disorders. We further describe our general approach to information integration, discuss important caveats and considerations, and offer a specific but speculative example based on genes implicated in stuttering and basal ganglia function in speech motor control.",2014-01-01 +26861822,Measuring the spatial correlations of protein binding sites.,"

Motivation

Understanding the interactions of different DNA binding proteins is a crucial first step toward deciphering gene regulatory mechanism. With advances of high-throughput sequencing technology such as ChIP-seq, the genome-wide binding sites of many proteins have been profiled under different biological contexts. It is of great interest to quantify the spatial correlations of the binding sites, such as their overlaps, to provide information for the interactions of proteins. Analyses of the overlapping patterns of binding sites have been widely performed, mostly based on ad hoc methods. Due to the heterogeneity and the tremendous size of the genome, such methods often lead to biased even erroneous results.

Results

In this work, we discover a Simpson's paradox phenomenon in assessing the genome-wide spatial correlation of protein binding sites. Leveraging information from publicly available data, we propose a testing procedure for evaluating the significance of overlapping from a pair of proteins, which accounts for background artifacts and genome heterogeneity. Real data analyses demonstrate that the proposed method provide more biologically meaningful results.

Availability and implementation

An R package is available at http://www.sta.cuhk.edu.hk/YWei/ChIPCor.html

Contacts

ywei@sta.cuhk.edu.hk or hao.wu@emory.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-09 +29149877,Key bottlenecks to the provision of safe obstetric anaesthesia in low- income countries; a cross-sectional survey of 64 hospitals in Uganda.,"

Background

Despite recent advances in surgery and anaesthesia which significantly improve safety, many health facilities in low-and middle-income countries (LMICs) remain chronically under-resourced with inability to cope effectively with serious obstetric complications (Knight et al., PLoS One 8:e63846, 2013). As a result many of these countries still have unacceptably high maternal and neonatal mortality rates. Recent data at the national referral hospitals in East Africa reported that none of the national referral hospitals met the World Federation of Societies of Anesthesiologists (WFSA) international standards required to provide safe obstetric anaesthesia (Epiu I: Challenges of Anesthesia in Low-and Middle-Income Countries. WFSA; 2014 http://wfsa.newsweaver.com/Newsletter/p8c8ta4ri7a1wsacct9y3u?a=2&p=47730565&t=27996496 ). In spite of this evidence, factors contributing to maternal mortality related to anaesthesia in LMICs and the magnitude of these issues have not been comprehensively studied. We therefore set out to assess regional referral, district, private for profit and private not-for profit hospitals in Uganda.

Methods

We conducted a cross-sectional survey at 64 government and private hospitals in Uganda using pre-set questionnaires to the anaesthetists and hospital directors. Access to the minimum requirements for safe obstetric anaesthesia according to WFSA guidelines were also checked using a checklist for operating and recovery rooms.

Results

Response rate was 100% following personal interviews of anaesthetists, and hospital directors. Only 3 of the 64 (5%) of the hospitals had all requirements available to meet the WFSA International guidelines for safe anaesthesia. Additionally, 54/64 (84%) did not have a trained physician anaesthetist and 5/64 (8%) had no trained providers for anaesthesia at all. Frequent shortages of drugs were reported for regional/neuroaxial anaesthesia, and other essential drugs were often lacking such as antacids and antihypertensives. We noted that many of the anaesthesia machines present were obsolete models without functional safety alarms and/or mechanical ventilators. Continuous ECG was only available in 3/64 (5%) of hospitals.

Conclusion

We conclude that there is a significant lack of essential equipment for the delivery of safe anaesthesia across this region. This is compounded by the shortage of trained providers and inadequate supervision. It is therefore essential to strengthen anaesthesia services by addressing these specific deficiencies. This will include improved training of associate clinicians, training more physician anaesthetists and providing the basic equipment required to provide safe and effective care. These services are key components of comprehensive emergency obstetric care and anaesthetists are crucial in managing critically ill mothers and ensuring good surgical outcomes.",2017-11-17 +27412087,sBWT: memory efficient implementation of the hardware-acceleration-friendly Schindler transform for the fast biological sequence mapping.,"

Motivation

The Full-text index in Minute space (FM-index) derived from the Burrows-Wheeler transform (BWT) is broadly used for fast string matching in large genomes or a huge set of sequencing reads. Several graphic processing unit (GPU) accelerated aligners based on the FM-index have been proposed recently; however, the construction of the index is still handled by central processing unit (CPU), only parallelized in data level (e.g. by performing blockwise suffix sorting in GPU), or not scalable for large genomes.

Results

To fulfill the need for a more practical, hardware-parallelizable indexing and matching approach, we herein propose sBWT based on a BWT variant (i.e. Schindler transform) that can be built with highly simplified hardware-acceleration-friendly algorithms and still suffices accurate and fast string matching in repetitive references. In our tests, the implementation achieves significant speedups in indexing and searching compared with other BWT-based tools and can be applied to a variety of domains.

Availability and implementation

sBWT is implemented in C ++ with CPU-only and GPU-accelerated versions. sBWT is open-source software and is available at http://jhhung.github.io/sBWT/Supplementary information: Supplementary data are available at Bioinformatics online.

Contact

chyee@ntu.edu.tw or jhhung@nctu.edu.tw (also juihunghung@gmail.com).",2016-07-13 +23970444,High-dose chemotherapy and autologous haematopoietic stem cell rescue for children with high-risk neuroblastoma.,"

Background

Despite the development of new treatment options, the prognosis of high-risk neuroblastoma patients is still poor; more than half of patients experience disease recurrence. High-dose chemotherapy and haematopoietic stem cell rescue (i.e. myeloablative therapy) might improve survival. This review is an update of a previously published Cochrane review.

Objectives

The primary objective was to compare the efficacy of myeloablative therapy with conventional therapy in children with high-risk neuroblastoma. Secondary objectives were to determine possible effects of these interventions on adverse events, late effects and quality of life.

Search methods

We searched the electronic databases CENTRAL (The Cochrane Library 2012, issue 6), MEDLINE/PubMed (1966 to June 2012) and EMBASE/Ovid (1980 to June 2012). In addition, we searched reference lists of relevant articles and the conference proceedings of the International Society for Paediatric Oncology (SIOP) (from 2002 to 2011), American Society for Pediatric Hematology and Oncology (ASPHO) (from 2002 to 2012), Advances in Neuroblastoma Research (ANR) (from 2002 to 2012) and American Society for Clinical Oncology (ASCO) (from 2008 to 2012). We searched for ongoing trials by scanning the ISRCTN register and the National Institute of Health Register (http://www.controlled-trials.com; both screened July 2012).

Selection criteria

Randomised controlled trials (RCTs) comparing the efficacy of myeloablative therapy with conventional therapy in high-risk neuroblastoma patients.

Data collection and analysis

Two authors independently performed study selection, data extraction and risk of bias assessment. If appropriate, we pooled studies. The risk ratio (RR) and 95% confidence interval (CI) was calculated for dichotomous outcomes. For the assessment of survival data, we calculated the hazard ratio (HR) and 95% CI. We used Parmar's method if hazard ratios were not reported in the study. We used a random-effects model.

Main results

We identified three RCTs including 739 children. They all used an age of one year as the cut-off point for pre-treatment risk stratification. The updated search identified a manuscript reporting additional follow-up data for one of these RCTs. There was a statistically significant difference in event-free survival in favour of myeloablative therapy over conventional chemotherapy or no further treatment (3 studies, 739 patients; HR 0.78, 95% CI 0.67 to 0.90). There was a statistically significant difference in overall survival in favour of myeloablative therapy over conventional chemotherapy or no further treatment (2 studies, 360 patients; HR 0.74, 95% CI 0.57 to 0.98). However, when additional follow-up data were included in the analyses the difference in event-free survival remained statistically significant (3 studies. 739 patients; HR 0.79, 95% CI 0.70 to 0.90), but the difference in overall survival was no longer statistically significant (2 studies, 360 patients; HR 0.86, 95% CI 0.73 to 1.01). The meta-analysis of secondary malignant disease and treatment-related death did not show any statistically significant differences between the treatment groups. Data from one study (379 patients) showed a significantly higher incidence of renal effects, interstitial pneumonitis and veno-occlusive disease in the myeloablative group compared to conventional chemotherapy, whereas for serious infections and sepsis no significant difference between the treatment groups was identified. No information on quality of life was reported. In the individual studies we evaluated different subgroups, but the results were not univocal in all studies. All studies had some methodological limitations.

Authors' conclusions

Based on the currently available evidence, myeloablative therapy seems to work in terms of event-free survival. For overall survival there is currently no evidence of effect when additional follow-up data are included. No definitive conclusions can be made regarding adverse effects and quality of life, although possible higher levels of adverse effects should be kept in mind. A definitive conclusion regarding the effect of myeloablative therapy in different subgroups is not possible. This systematic review only allows a conclusion on the concept of myeloablative therapy; no conclusions can be made regarding the best treatment strategy. Future trials on the use of myeloablative therapy for high-risk neuroblastoma should focus on identifying the most optimal induction and/or myeloablative regimen. The best study design to answer these questions is a RCT. These RCTs should be performed in homogeneous study populations (e.g. stage of disease and patient age) and have a long-term follow-up. Different risk groups, using the most recent definitions, should be taken into account.It should be kept in mind that recently the age cut-off for high risk disease was changed from one year to 18 months. As a result it is possible that patients with what is now classified as intermediate-risk disease have been included in the high-risk groups. Consequently the relevance of the results of these studies to the current practice can be questioned. Survival rates may be overestimated due to the inclusion of patients with intermediate-risk disease.",2013-08-22 +23282057,IntPath--an integrated pathway gene relationship database for model organisms and important pathogens.,"

Background

Pathway data are important for understanding the relationship between genes, proteins and many other molecules in living organisms. Pathway gene relationships are crucial information for guidance, prediction, reference and assessment in biochemistry, computational biology, and medicine. Many well-established databases--e.g., KEGG, WikiPathways, and BioCyc--are dedicated to collecting pathway data for public access. However, the effectiveness of these databases is hindered by issues such as incompatible data formats, inconsistent molecular representations, inconsistent molecular relationship representations, inconsistent referrals to pathway names, and incomprehensive data from different databases.

Results

In this paper, we overcome these issues through extraction, normalization and integration of pathway data from several major public databases (KEGG, WikiPathways, BioCyc, etc). We build a database that not only hosts our integrated pathway gene relationship data for public access but also maintains the necessary updates in the long run. This public repository is named IntPath (Integrated Pathway gene relationship database for model organisms and important pathogens). Four organisms--S. cerevisiae, M. tuberculosis H37Rv, H. Sapiens and M. musculus--are included in this version (V2.0) of IntPath. IntPath uses the ""full unification"" approach to ensure no deletion and no introduced noise in this process. Therefore, IntPath contains much richer pathway-gene and pathway-gene pair relationships and much larger number of non-redundant genes and gene pairs than any of the single-source databases. The gene relationships of each gene (measured by average node degree) per pathway are significantly richer. The gene relationships in each pathway (measured by average number of gene pairs per pathway) are also considerably richer in the integrated pathways. Moderate manual curation are involved to get rid of errors and noises from source data (e.g., the gene ID errors in WikiPathways and relationship errors in KEGG). We turn complicated and incompatible xml data formats and inconsistent gene and gene relationship representations from different source databases into normalized and unified pathway-gene and pathway-gene pair relationships neatly recorded in simple tab-delimited text format and MySQL tables, which facilitates convenient automatic computation and large-scale referencing in many related studies. IntPath data can be downloaded in text format or MySQL dump. IntPath data can also be retrieved and analyzed conveniently through web service by local programs or through web interface by mouse clicks. Several useful analysis tools are also provided in IntPath.

Conclusions

We have overcome in IntPath the issues of compatibility, consistency, and comprehensiveness that often hamper effective use of pathway databases. We have included four organisms in the current release of IntPath. Our methodology and programs described in this work can be easily applied to other organisms; and we will include more model organisms and important pathogens in future releases of IntPath. IntPath maintains regular updates and is freely available at http://compbio.ddns.comp.nus.edu.sg:8080/IntPath.",2012-12-12 +24515476,"Antibody V and C domain sequence, structure, and interaction analysis with special reference to IMGT®.","IMGT(®), the international ImMunoGeneTics information system(®) (http://www.imgt.org), created in 1989 (Centre National de la Recherche Scientifique, Montpellier University), is acknowledged as the global reference in immunogenetics and immunoinformatics. The accuracy and the consistency of the IMGT(®) data are based on IMGT-ONTOLOGY which bridges the gap between genes, sequences, and three-dimensional (3D) structures. Thus, receptors, chains, and domains are characterized with the same IMGT(®) rules and standards (IMGT standardized labels, IMGT gene and allele nomenclature, IMGT unique numbering, IMGT Collier de Perles), independently from the molecule type (genomic DNA, complementary DNA, transcript, or protein) or from the species. More particularly, IMGT(®) tools and databases provide a highly standardized analysis of the immunoglobulin (IG) or antibody and T cell receptor (TR) V and C domains. IMGT/V-QUEST analyzes the V domains of IG or TR rearranged nucleotide sequences, integrates the IMGT/JunctionAnalysis and IMGT/Automat tools, and provides IMGT Collier de Perles. IMGT/HighV-QUEST analyzes sequences from high-throughput sequencing (HTS) (up to 150,000 sequences per batch) and performs statistical analysis on up to 450,000 results, with the same resolution and high quality as IMGT/V-QUEST online. IMGT/DomainGapAlign analyzes amino acid sequences of V and C domains and IMGT/3Dstructure-DB and associated tools provide information on 3D structures, contact analysis, and paratope/epitope interactions. These IMGT(®) tools and databases, and the IMGT/mAb-DB interface with access to therapeutical antibody data, provide an invaluable help for antibody engineering and antibody humanization.",2014-01-01 +25077800,Fast and sensitive alignment of microbial whole genome sequencing reads to large sequence datasets on a desktop PC: application to metagenomic datasets and pathogen identification.,"Next generation sequencing (NGS) of metagenomic samples is becoming a standard approach to detect individual species or pathogenic strains of microorganisms. Computer programs used in the NGS community have to balance between speed and sensitivity and as a result, species or strain level identification is often inaccurate and low abundance pathogens can sometimes be missed. We have developed Taxoner, an open source, taxon assignment pipeline that includes a fast aligner (e.g. Bowtie2) and a comprehensive DNA sequence database. We tested the program on simulated datasets as well as experimental data from Illumina, IonTorrent, and Roche 454 sequencing platforms. We found that Taxoner performs as well as, and often better than BLAST, but requires two orders of magnitude less running time meaning that it can be run on desktop or laptop computers. Taxoner is slower than the approaches that use small marker databases but is more sensitive due the comprehensive reference database. In addition, it can be easily tuned to specific applications using small tailored databases. When applied to metagenomic datasets, Taxoner can provide a functional summary of the genes mapped and can provide strain level identification. Taxoner is written in C for Linux operating systems. The code and documentation are available for research applications at http://code.google.com/p/taxoner.",2014-07-31 +28451982,Multi-Algorithm Particle Simulations with Spatiocyte.,"As quantitative biologists get more measurements of spatially regulated systems such as cell division and polarization, simulation of reaction and diffusion of proteins using the data is becoming increasingly relevant to uncover the mechanisms underlying the systems. Spatiocyte is a lattice-based stochastic particle simulator for biochemical reaction and diffusion processes. Simulations can be performed at single molecule and compartment spatial scales simultaneously. Molecules can diffuse and react in 1D (filament), 2D (membrane), and 3D (cytosol) compartments. The implications of crowded regions in the cell can be investigated because each diffusing molecule has spatial dimensions. Spatiocyte adopts multi-algorithm and multi-timescale frameworks to simulate models that simultaneously employ deterministic, stochastic, and particle reaction-diffusion algorithms. Comparison of light microscopy images to simulation snapshots is supported by Spatiocyte microscopy visualization and molecule tagging features. Spatiocyte is open-source software and is freely available at http://spatiocyte.org .",2017-01-01 +28365726,Boechera microsatellite website: an online portal for species identification and determination of hybrid parentage. ,"Boechera (Brassicaceae) has many features to recommend it as a model genus for ecological and evolutionary research, including species richness, ecological diversity, experimental tractability and close phylogenetic proximity to Arabidopsis . However, efforts to realize the full potential of this model system have been thwarted by the frequent inability of researchers to identify their samples and place them in a broader evolutionary context. Here we present the Boechera Microsatellite Website (BMW), a portal that archives over 55 000 microsatellite allele calls from 4471 specimens (including 133 nomenclatural types). The portal includes analytical tools that utilize data from 15 microsatellite loci as a highly effective DNA barcoding system. The BMW facilitates the accurate identification of Boechera samples and the investigation of reticulate evolution among the ±83 sexual diploid taxa in the genus, thereby greatly enhancing Boechera 's potential as a model system. http://sites.biology.duke.edu/windhamlab/.",2017-01-01 +27609420,Multi-species Identification of Polymorphic Peptide Variants via Propagation in Spectral Networks.,"Peptide and protein identification remains challenging in organisms with poorly annotated or rapidly evolving genomes, as are commonly encountered in environmental or biofuels research. Such limitations render tandem mass spectrometry (MS/MS) database search algorithms ineffective as they lack corresponding sequences required for peptide-spectrum matching. We address this challenge with the spectral networks approach to (1) match spectra of orthologous peptides across multiple related species and then (2) propagate peptide annotations from identified to unidentified spectra. We here present algorithms to assess the statistical significance of spectral alignments (Align-GF), reduce the impurity in spectral networks, and accurately estimate the error rate in propagated identifications. Analyzing three related Cyanothece species, a model organism for biohydrogen production, spectral networks identified peptides from highly divergent sequences from networks with dozens of variant peptides, including thousands of peptides in species lacking a sequenced genome. Our analysis further detected the presence of many novel putative peptides even in genomically characterized species, thus suggesting the possibility of gaps in our understanding of their proteomic and genomic expression. A web-based pipeline for spectral networks analysis is available at http://proteomics.ucsd.edu/software.",2016-09-08 +26758513,CANEapp: a user-friendly application for automated next generation transcriptomic data analysis.,"

Background

Next generation sequencing (NGS) technologies are indispensable for molecular biology research, but data analysis represents the bottleneck in their application. Users need to be familiar with computer terminal commands, the Linux environment, and various software tools and scripts. Analysis workflows have to be optimized and experimentally validated to extract biologically meaningful data. Moreover, as larger datasets are being generated, their analysis requires use of high-performance servers.

Results

To address these needs, we developed CANEapp (application for Comprehensive automated Analysis of Next-generation sequencing Experiments), a unique suite that combines a Graphical User Interface (GUI) and an automated server-side analysis pipeline that is platform-independent, making it suitable for any server architecture. The GUI runs on a PC or Mac and seamlessly connects to the server to provide full GUI control of RNA-sequencing (RNA-seq) project analysis. The server-side analysis pipeline contains a framework that is implemented on a Linux server through completely automated installation of software components and reference files. Analysis with CANEapp is also fully automated and performs differential gene expression analysis and novel noncoding RNA discovery through alternative workflows (Cuffdiff and R packages edgeR and DESeq2). We compared CANEapp to other similar tools, and it significantly improves on previous developments. We experimentally validated CANEapp's performance by applying it to data derived from different experimental paradigms and confirming the results with quantitative real-time PCR (qRT-PCR). CANEapp adapts to any server architecture by effectively using available resources and thus handles large amounts of data efficiently. CANEapp performance has been experimentally validated on various biological datasets. CANEapp is available free of charge at http://psychiatry.med.miami.edu/research/laboratory-of-translational-rna-genomics/CANE-app .

Conclusions

We believe that CANEapp will serve both biologists with no computational experience and bioinformaticians as a simple, timesaving but accurate and powerful tool to analyze large RNA-seq datasets and will provide foundations for future development of integrated and automated high-throughput genomics data analysis tools. Due to its inherently standardized pipeline and combination of automated analysis and platform-independence, CANEapp is an ideal for large-scale collaborative RNA-seq projects between different institutions and research groups.",2016-01-13 +28200021,Statistical database analysis of the role of loop dynamics for protein-protein complex formation and allostery.,"

Motivation

Protein loops show rich conformational dynamics properties on a wide range of timescales as they play an essential role for many cellular functions during protein-protein interactions and recognition processes. However, little is known about the detail behavior of loops upon protein binding including allostery.

Results

We report the loop motions and their dominant timescales for a library of 230 proteins that form protein-protein complexes using the ToeLoop predictor of loop dynamics. We applied the analysis to proteins in both their complex and free state and relate specific loop properties to their role in protein recognition. We observe a strong tendency of loops that move on relatively slow timescales of tens of ns to sub-μs to be directly involved in binding and recognition processes. Complex formation leads to a significant reduction in loop flexibility at the binding interface, but in a number of cases it can also trigger increased flexibility in distal loops in response to allosteric conformational changes. The importance of loop dynamics and allostery is highlighted by a case study of an antibody-antigen complex. Furthermore, we explored the relationship between loop dynamics and experimental binding affinities and found that a prevalence of high loop rigidity at the binding interface is an indicator of increased binding strength.

Availability and implementation

http://spin.ccic.ohio-state.edu/index.php/toeloopppi.

Contact

bruschweiler.1@osu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +28158612,Accurate prediction of human essential genes using only nucleotide composition and association information.,"

Motivation

Previously constructed classifiers in predicting eukaryotic essential genes integrated a variety of features including experimental ones. If we can obtain satisfactory prediction using only nucleotide (sequence) information, it would be more promising. Three groups recently identified essential genes in human cancer cell lines using wet experiments and it provided wonderful opportunity to accomplish our idea. Here we improved the Z curve method into the λ-interval form to denote nucleotide composition and association information and used it to construct the SVM classifying model.

Results

Our model accurately predicted human gene essentiality with an AUC higher than 0.88 both for 5-fold cross-validation and jackknife tests. These results demonstrated that the essentiality of human genes could be reliably reflected by only sequence information. We re-predicted the negative dataset by our Pheg server and 118 genes were additionally predicted as essential. Among them, 20 were found to be homologues in mouse essential genes, indicating that some of the 118 genes were indeed essential, however previous experiments overlooked them. As the first available server, Pheg could predict essentiality for anonymous gene sequences of human. It is also hoped the λ-interval Z curve method could be effectively extended to classification issues of other DNA elements.

Availability and implementation

http://cefg.uestc.edu.cn/Pheg.

Contact

fbguo@uestc.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-06-01 +28226024,[Guidelines for Accurate and Transparent Health Estimates Reporting: the GATHER Statement].,"Measurements of health indicators are rarely available for every population and period of interest, and available data may not be comparable. The Guidelines for Accurate and Transparent Health Estimates Reporting (GATHER) define best reporting practices for studies that calculate health estimates for multiple populations (in time or space) using multiple information sources. Health estimates that fall within the scope of GATHER include all quantitative population-level estimates (including global, regional, national, or subnational estimates) of health indicators, including indicators of health status, incidence and prevalence of diseases, injuries, and disability and functioning; and indicators of health determinants, including health behaviours and health exposures. GATHER comprises a checklist of 18 items that are essential for best reporting practice. A more detailed explanation and elaboration document, describing the interpretation and rationale of each reporting item along with examples of good reporting, is available on the GATHER website (http://gather-statement.org).",2017-01-01 +27616707,DM-BLD: differential methylation detection using a hierarchical Bayesian model exploiting local dependency.,"

Motivation

The advent of high-throughput DNA methylation profiling techniques has enabled the possibility of accurate identification of differentially methylated genes for cancer research. The large number of measured loci facilitates whole genome methylation study, yet posing great challenges for differential methylation detection due to the high variability in tumor samples.

Results

We have developed a novel probabilistic approach, D: ifferential M: ethylation detection using a hierarchical B: ayesian model exploiting L: ocal D: ependency (DM-BLD), to detect differentially methylated genes based on a Bayesian framework. The DM-BLD approach features a joint model to capture both the local dependency of measured loci and the dependency of methylation change in samples. Specifically, the local dependency is modeled by Leroux conditional autoregressive structure; the dependency of methylation changes is modeled by a discrete Markov random field. A hierarchical Bayesian model is developed to fully take into account the local dependency for differential analysis, in which differential states are embedded as hidden variables. Simulation studies demonstrate that DM-BLD outperforms existing methods for differential methylation detection, particularly when the methylation change is moderate and the variability of methylation in samples is high. DM-BLD has been applied to breast cancer data to identify important methylated genes (such as polycomb target genes and genes involved in transcription factor activity) associated with breast cancer recurrence.

Availability and implementation

A Matlab package of DM-BLD is available at http://www.cbil.ece.vt.edu/software.htm CONTACT: Xuan@vt.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-11 +23958731,A fast Peptide Match service for UniProt Knowledgebase.,"

Summary

We have developed a new web application for peptide matching using Apache Lucene-based search engine. The Peptide Match service is designed to quickly retrieve all occurrences of a given query peptide from UniProt Knowledgebase (UniProtKB) with isoforms. The matched proteins are shown in summary tables with rich annotations, including matched sequence region(s) and links to corresponding proteins in a number of proteomic/peptide spectral databases. The results are grouped by taxonomy and can be browsed by organism, taxonomic group or taxonomy tree. The service supports queries where isobaric leucine and isoleucine are treated equivalent, and an option for searching UniRef100 representative sequences, as well as dynamic queries to major proteomic databases. In addition to the web interface, we also provide RESTful web services. The underlying data are updated every 4 weeks in accordance with the UniProt releases.

Availability

http://proteininformationresource.org/peptide.shtml.

Contact

chenc@udel.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-08-19 +30272889,[Length of Meristematic and Fully Elongated Root Cells Related to Haploid DNA Content].,"The lengths of meristematic (l(m)) and fully-elongated cells (l(e)) were measured in the roots of 118 +monocot and dicot species of herbaceous plants from 20 angiosperm families. The results were analyzed using +the data on haploid DNA content (C(val)) for the same species from the website (http://data.kew.org/cvalues). +The distribution range of lm, le, and C(val) was wider in monocot plants compared to dicots. Values of l(m) and l(e) +in monocot and lm in dicot species correlated positively with C(val). Dependence of lm and le on C(val) was similar +in diploid and polyploid species, both monocots and dicots. The average length of root cells differed less than +the root length.",2016-11-01 +27614385,[History of kidney transplantation surgery].,"

Objectives

To perform a state of the art about the history of kidney transplantation.

Material and methods

An exhaustive systematic review of the scientific literature was performed in the Medline database (http://www.ncbi.nlm.nih.gov) and Embase (http://www.embase.com) using different associations of the following keywords (MESH): kidney transplantation, history, vascular anastomosis.

Results

From the first vascular ligations to the discovery of ciclosporin, the history of organ transplantation was made of surgical bets and medical discoveries, such as blood group, HLA-system, immunity, etc. The audacity of some surgeons led to the onset of renal transplantation as the treatment of choice for end stage renal disease.

Conclusion

This article aims to describe the first surgical methods for vascular anastomosis and renal transplantation. Through a comprehensive search within the archives of the French National Library, the authors provide a precise description of the first renal transplantations performed, the technique that have been used and their authors.",2016-09-07 +27603513,SMEpred workbench: A web server for predicting efficacy of chemicallymodified siRNAs.,"Chemical modifications have been extensively exploited to circumvent shortcomings in therapeutic applications of small interfering RNAs (siRNAs). However, experimental designing and testing of these siRNAs or chemically modified siRNAs (cm-siRNAs) involves enormous resources. Therefore, in-silico intervention in designing cm-siRNAs would be of utmost importance. We developed SMEpred workbench to predict the efficacy of normal siRNAs as well as cm-siRNAs using 3031 heterogeneous cm-siRNA sequences from siRNAmod database. These include 30 frequently used chemical modifications on different positions of either siRNA strand. Support Vector Machine (SVM) was employed to develop predictive models utilizing various sequence features namely mono-, di-nucleotide composition, binary pattern and their hybrids. We achieved highest Pearson Correlation Coefficient (PCC) of 0.80 during 10-fold cross validation and similar PCC value in independent validation. We have provided the algorithm in the 'SMEpred' pipeline to predict the normal siRNAs from the gene or mRNA sequence. For multiple modifications, we have assembled 'MultiModGen' module to design multiple modifications and further process them to evaluate their predicted efficacies. SMEpred webserver will be useful to scientific community engaged in use of RNAi-based technology as well as for therapeutic development. Web server is available for public use at following URL address: http://bioinfo.imtech.res.in/manojk/smepred .",2016-09-07 +27606012,Ligand-based virtual screening interface between PyMOL and LiSiCA.,"Ligand-based virtual screening of large small-molecule databases is an important step in the early stages of drug development. It is based on the similarity principle and is used to reduce the chemical space of large databases to a manageable size where chosen ligands can be experimentally tested. Ligand-based virtual screening can also be used to identify bioactive molecules with different basic scaffolds compared to already known bioactive molecules, thus having the potential to increase the structural variability of compounds. Here, we present an interface between the popular molecular graphics system PyMOL and the ligand-based virtual screening software LiSiCA available at http://insilab.org/lisica-plugin and demonstrate how this interface can be used in the early stages of drug discovery process.Graphical AbstractLigand-based virtual screening interface between PyMOL and LiSiCA.",2016-09-07 +27603020,DIANA-TarBase and DIANA Suite Tools: Studying Experimentally Supported microRNA Targets.,"microRNAs (miRNAs) are short non-coding RNAs (∼22 nts) present in animals, plants, and viruses. They are considered central post-transcriptional regulators of gene expression and are key components in a great number of physiological and pathological conditions. The accurate characterization of their targets is considered essential to a series of applications and basic or applied research settings. DIANA-TarBase (http://www.microrna.gr/tarbase) was initially launched in 2006. It is a reference repository indexing experimentally derived miRNA-gene interactions in different cell types, tissues, and conditions across numerous species. This unit focuses on the study of experimentally supported miRNA-gene interactions, as well as their functional interpretation through the use of available tools in the DIANA suite (http://www.microrna.gr). The proposed use-case scenarios are presented in protocols, describing how to utilize the DIANA-TarBase database and DIANA-microT-CDS server and perform miRNA-targeted pathway analysis with DIANA-miRPath-v3. All analyses are directly invoked or initiated from DIANA-TarBase. © 2016 by John Wiley & Sons, Inc.",2016-09-07 +26607428,A Bayesian model for detection of high-order interactions among genetic variants in genome-wide association studies.,"

Background

A central question for disease studies and crop improvements is how genetics variants drive phenotypes. Genome Wide Association Study (GWAS) provides a powerful tool for characterizing the genotype-phenotype relationships in complex traits and diseases. Epistasis (gene-gene interaction), including high-order interaction among more than two genes, often plays important roles in complex traits and diseases, but current GWAS analysis usually just focuses on additive effects of single nucleotide polymorphisms (SNPs). The lack of effective computational modelling of high-order functional interactions often leads to significant under-utilization of GWAS data.

Results

We have developed a novel Bayesian computational method with a Markov Chain Monte Carlo (MCMC) search, and implemented the method as a Bayesian High-order Interaction Toolkit (BHIT) for detecting epistatic interactions among SNPs. BHIT first builds a Bayesian model on both continuous data and discrete data, which is capable of detecting high-order interactions in SNPs related to case--control or quantitative phenotypes. We also developed a pipeline that enables users to apply BHIT on different species in different use cases.

Conclusions

Using both simulation data and soybean nutritional seed composition studies on oil content and protein content, BHIT effectively detected some high-order interactions associated with phenotypes, and it outperformed a number of other available tools. BHIT is freely available for academic users at http://digbio.missouri.edu/BHIT/.",2015-11-25 +27787834,"CX, DPX, and PCW: Web Servers for the Visualization of Interior and Protruding Regions of Protein Structures in 3D and 1D.","The CX and DPX web-based servers at http://pongor.itk.ppke.hu/bioinfoservices are dedicated to the analysis of protein 3D structures submitted by the users as Protein Data Bank (PDB) files. CX computes an atomic protrusion index, cx that makes it possible to highlight the protruding atoms within a protein 3D structure. DPX calculates a depth index, dpx for buried atoms, and allows one to visualize the distribution of buried residues. CX and DPX visualize 3D structures colored according to the calculated indices and return PDB files that can be visualized using standard programs. A combined server site, the Protein Core Workbench allows visualization of dpx, cx, solvent-accessible area as well as the number of atomic contacts as 3D plots and 1D sequence plots. Online visualization of the 3D structures and 1D sequence plots are available in all three servers. Mirror sites are available at http://hydra.icgeb.trieste.it/protein/ .",2017-01-01 +23667249,A Toolkit to assess health needs for congenital disorders in low- and middle-income countries: an instrument for public health action.,"

Background

In 2010 the World Health Assembly called for action to improve the care and prevention of congenital disorders, noting that technical guidance would be required for this task, especially in low- and middle-income countries. Responding to this call, we have developed a freely available web-accessible Toolkit for assessing health needs for congenital disorders.

Methods

Materials for the Toolkit website (http://toolkit.phgfoundation.org) were prepared by an iterative process of writing, discussion and modification by the project team, with advice from external experts. A customized database was developed using epidemiological, demographic, socio-economic and health-services data from a range of validated sources. Document-processing and data integration software combines data from the database with a template to generate topic- and country-specific Calculator documents for quantitative analysis.

Results

The Toolkit guides users through selection of topics (including both clinical conditions and relevant health services), assembly and evaluation of qualitative and quantitative information, assessment of the potential effects of selected interventions, and planning and prioritization of actions to reduce the risk or prevalence of congenital disorders.

Conclusions

The Toolkit enables users without epidemiological or public health expertise to undertake health needs assessment as a prerequisite for strategic planning in relation to congenital disorders in their country or region.",2013-05-10 +27711033,"Update: Interim Guidance for Preconception Counseling and Prevention of Sexual Transmission of Zika Virus for Persons with Possible Zika Virus Exposure - United States, September 2016.","CDC has updated its interim guidance for persons with possible Zika virus exposure who are planning to conceive (1) and interim guidance to prevent transmission of Zika virus through sexual contact (2), now combined into a single document. Guidance for care for pregnant women with possible Zika virus exposure was previously published (3). Possible Zika virus exposure is defined as travel to or residence in an area of active Zika virus transmission (http://www.cdc.gov/zika/geo/index.html), or sex* without a condom with a partner who traveled to or lived in an area of active transmission. Based on new though limited data, CDC now recommends that all men with possible Zika virus exposure who are considering attempting conception with their partner, regardless of symptom status,§ wait to conceive until at least 6 months after symptom onset (if symptomatic) or last possible Zika virus exposure (if asymptomatic). Recommendations for women planning to conceive remain unchanged: women with possible Zika virus exposure are recommended to wait to conceive until at least 8 weeks after symptom onset (if symptomatic) or last possible Zika virus exposure (if asymptomatic). Couples with possible Zika virus exposure, who are not pregnant and do not plan to become pregnant, who want to minimize their risk for sexual transmission of Zika virus should use a condom or abstain from sex for the same periods for men and women described above. Women of reproductive age who have had or anticipate future Zika virus exposure who do not want to become pregnant should use the most effective contraceptive method that can be used correctly and consistently. These recommendations will be further updated when additional data become available.",2016-10-07 +21414985,A wholly defined Agilent microarray spike-in dataset.,"

Motivation

Spike-in datasets provide a valuable resource for assessing and comparing among competing microarray analysis strategies. Our previous wholly defined spike-in datasets, the Golden and Platinum Spikes, have provided insights for the analysis of Affymetrix GeneChips. However, a similar dataset, in which all cRNA identities and relative levels are known prospectively, has not been available for two-color platforms.

Results

We have generated a wholly defined spike-in dataset for Agilent microarrays consisting of 12 arrays with more than 2000 differentially expressed, and approximately 3600 background, cRNAs. The composition of this 'Ag Spike' dataset is identical to that of our previous Platinum Spike dataset and therefore allows direct cross-platform comparison. We demonstrate here the utility of the Ag Spike dataset for evaluating different analysis methods designed for two-color arrays. Comparison between the Ag Spike and Platinum Spike studies shows high agreement between results obtained using the Affymetrix and Agilent platforms.

Availability

The Ag Spike raw data can be accessed at http://www.ccr.buffalo.edu/halfon/spike/index.html and through NCBI's Gene Expression Omnibus (GEO; accession GSE24866).",2011-03-16 +23095476,The Hawaiian Freshwater Algal Database (HfwADB): a laboratory LIMS and online biodiversity resource.,"

Background

Biodiversity databases serve the important role of highlighting species-level diversity from defined geographical regions. Databases that are specially designed to accommodate the types of data gathered during regional surveys are valuable in allowing full data access and display to researchers not directly involved with the project, while serving as a Laboratory Information Management System (LIMS). The Hawaiian Freshwater Algal Database, or HfwADB, was modified from the Hawaiian Algal Database to showcase non-marine algal specimens collected from the Hawaiian Archipelago by accommodating the additional level of organization required for samples including multiple species.

Description

The Hawaiian Freshwater Algal Database is a comprehensive and searchable database containing photographs and micrographs of samples and collection sites, geo-referenced collecting information, taxonomic data and standardized DNA sequence data. All data for individual samples are linked through unique 10-digit accession numbers (""Isolate Accession""), the first five of which correspond to the collection site (""Environmental Accession""). Users can search online for sample information by accession number, various levels of taxonomy, habitat or collection site. HfwADB is hosted at the University of Hawaii, and was made publicly accessible in October 2011. At the present time the database houses data for over 2,825 samples of non-marine algae from 1,786 collection sites from the Hawaiian Archipelago. These samples include cyanobacteria, red and green algae and diatoms, as well as lesser representation from some other algal lineages.

Conclusions

HfwADB is a digital repository that acts as a Laboratory Information Management System for Hawaiian non-marine algal data. Users can interact with the repository through the web to view relevant habitat data (including geo-referenced collection locations) and download images of collection sites, specimen photographs and micrographs, and DNA sequences. It is publicly available at http://algae.manoa.hawaii.edu/hfwadb/.",2012-10-25 +23203881,"BRENDA in 2013: integrated reactions, kinetic data, enzyme function data, improved disease classification: new options and contents in BRENDA.","The BRENDA (BRaunschweig ENzyme DAtabase) enzyme portal (http://www.brenda-enzymes.org) is the main information system of functional biochemical and molecular enzyme data and provides access to seven interconnected databases. BRENDA contains 2.7 million manually annotated data on enzyme occurrence, function, kinetics and molecular properties. Each entry is connected to a reference and the source organism. Enzyme ligands are stored with their structures and can be accessed via their names, synonyms or via a structure search. FRENDA (Full Reference ENzyme DAta) and AMENDA (Automatic Mining of ENzyme DAta) are based on text mining methods and represent a complete survey of PubMed abstracts with information on enzymes in different organisms, tissues or organelles. The supplemental database DRENDA provides more than 910 000 new EC number-disease relations in more than 510 000 references from automatic search and a classification of enzyme-disease-related information. KENDA (Kinetic ENzyme DAta), a new amendment extracts and displays kinetic values from PubMed abstracts. The integration of the EnzymeDetector offers an automatic comparison, evaluation and prediction of enzyme function annotations for prokaryotic genomes. The biochemical reaction database BKM-react contains non-redundant enzyme-catalysed and spontaneous reactions and was developed to facilitate and accelerate the construction of biochemical models.",2012-11-29 +28930515,Changes in Transportation-Related Air Pollution Exposures by Race-Ethnicity and Socioeconomic Status: Outdoor Nitrogen Dioxide in the United States in 2000 and 2010.,"

Background

Disparities in exposure to air pollution by race-ethnicity and by socioeconomic status have been documented in the United States, but the impacts of declining transportation-related air pollutant emissions on disparities in exposure have not been studied in detail.

Objective

This study was designed to estimate changes over time (2000 to 2010) in disparities in exposure to outdoor concentrations of a transportation-related air pollutant, nitrogen dioxide (NO2), in the United States.

Methods

We combined annual average NO2 concentration estimates from a temporal land use regression model with Census demographic data to estimate outdoor exposures by race-ethnicity, socioeconomic characteristics (income, age, education), and by location (region, state, county, urban area) for the contiguous United States in 2000 and 2010.

Results

Estimated annual average NO2 concentrations decreased from 2000 to 2010 for all of the race-ethnicity and socioeconomic status groups, including a decrease from 17.6 ppb to 10.7 ppb (-6.9 ppb) in nonwhite [non-(white alone, non-Hispanic)] populations, and 12.6 ppb to 7.8 ppb (-4.7 ppb) in white (white alone, non-Hispanic) populations. In 2000 and 2010, disparities in NO2 concentrations were larger by race-ethnicity than by income. Although the national nonwhite-white mean NO2 concentration disparity decreased from a difference of 5.0 ppb in 2000 to 2.9 ppb in 2010, estimated mean NO2 concentrations remained 37% higher for nonwhites than whites in 2010 (40% higher in 2000), and nonwhites were 2.5 times more likely than whites to live in a block group with an average NO2 concentration above the WHO annual guideline in 2010 (3.0 times more likely in 2000).

Conclusions

Findings suggest that absolute NO2 exposure disparities by race-ethnicity decreased from 2000 to 2010, but relative NO2 exposure disparities persisted, with higher NO2 concentrations for nonwhites than whites in 2010. https://doi.org/10.1289/EHP959.",2017-09-14 +29084634,Associations between Ambient Fine Particulate Oxidative Potential and Cardiorespiratory Emergency Department Visits.,"

Background

Oxidative potential (OP) has been proposed as a measure of toxicity of ambient particulate matter (PM).

Objectives

Our goal was to address an important research gap by using daily OP measurements to conduct population-level analysis of the health effects of measured ambient OP.

Methods

A semi-automated dithiothreitol (DTT) analytical system was used to measure daily average OP (OPDTT) in water-soluble fine PM at a central monitor site in Atlanta, Georgia, over eight sampling periods (a total of 196 d) during June 2012-April 2013. Data on emergency department (ED) visits for selected cardiorespiratory outcomes were obtained for the five-county Atlanta metropolitan area. Poisson log-linear regression models controlling for temporal confounders were used to conduct time-series analyses of the relationship between daily counts of ED visits and either the 3-d moving average (lag 0-2) of OPDTT or same-day OPDTT. Bipollutant regression models were run to estimate the health associations of OPDTT while controlling for other pollutants.

Results

OPDTT was measured for 196 d (mean=0.32 nmol/min/m3, interquartile range=0.21). Lag 0-2 OPDTT was associated with ED visits for respiratory disease (RR=1.03, 95% confidence interval (CI): 1.00, 1.05 per interquartile range increase in OPDTT), asthma (RR=1.12, 95% CI: 1.03, 1.22), and ischemic heart disease (RR=1.19, 95% CI: 1.03, 1.38). Same-day OPDTT was not associated with ED visits for any outcome. Lag 0-2 OPDTT remained a significant predictor of asthma and ischemic heart disease in most bipollutant models.

Conclusions

Lag 0-2 OPDTT was associated with ED visits for multiple cardiorespiratory outcomes, providing support for the utility of OPDTT as a measure of fine particle toxicity. https://doi.org/10.1289/EHP1545.",2017-10-26 +27796336,Rawcopy: Improved copy number analysis with Affymetrix arrays.,"Microarray data is subject to noise and systematic variation that negatively affects the resolution of copy number analysis. We describe Rawcopy, an R package for processing of Affymetrix CytoScan HD, CytoScan 750k and SNP 6.0 microarray raw intensities (CEL files). Noise characteristics of a large number of reference samples are used to estimate log ratio and B-allele frequency for total and allele-specific copy number analysis. Rawcopy achieves better signal-to-noise ratio and higher proportion of validated alterations than commonly used free and proprietary alternatives. In addition, Rawcopy visualizes each microarray sample for assessment of technical quality, patient identity and genome-wide absolute copy number states. Software and instructions are available at http://rawcopy.org.",2016-10-31 +26787660,Multi-omics enrichment analysis using the GeneTrail2 web service.,"

Motivation

Gene set analysis has revolutionized the interpretation of high-throughput transcriptomic data. Nowadays, with comprehensive studies that measure multiple -omics from the same sample, powerful tools for the integrative analysis of multi-omics datasets are required.

Results

Here, we present GeneTrail2, a web service allowing the integrated analysis of transcriptomic, miRNomic, genomic and proteomic datasets. It offers multiple statistical tests, a large number of predefined reference sets, as well as a comprehensive collection of biological categories and enables direct comparisons between the computed results. We used GeneTrail2 to explore pathogenic mechanisms of Wilms tumors. We not only succeeded in revealing signaling cascades that may contribute to the malignancy of blastemal subtype tumors but also identified potential biomarkers for nephroblastoma with adverse prognosis. The presented use-case demonstrates that GeneTrail2 is well equipped for the integrative analysis of comprehensive -omics data and may help to shed light on complex pathogenic mechanisms in cancer and other diseases.

Availability and implementation

GeneTrail2 can be freely accessed under https://genetrail2.bioinf.uni-sb.de

Contact

: dstoeckel@bioinf.uni-sb.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-18 +,Visualization of molecular processes associated with seed dormancy and germination using MapMan,"Seed dormancy and germination involve the concerted operation of molecular and biochemical programmes. It has become feasible to study these processes in great detail, using the current methods for transcriptome, proteome and metabolome analysis. Yet, the large amounts of data generated by these methods are often dazzling and demand efficient tools for data visualization. We have used the freely available PageMan/MapMan package (http://MapMan.gabipd.org) to visualize transcriptome and metabolome changes in Arabidopsis thaliana seeds during dormancy and germination. Using this package we developed two seed-specific MapMan pathways, which efficiently capture the most important molecular processes in seeds. The results demonstrated the usefulness of the PageMan/MapMan package for seed research.",2011-06-01 +22701464,"pep2pro: the high-throughput proteomics data processing, analysis, and visualization tool.","The pep2pro database was built to support effective high-throughput proteome data analysis. Its database schema allows the coherent integration of search results from different database-dependent search algorithms and filtering of the data including control for unambiguous assignment of peptides to proteins. The capacity of the pep2pro database has been exploited in data analysis of various Arabidopsis proteome datasets. The diversity of the datasets and the associated scientific questions required thorough querying of the data. This was supported by the relational format structure of the data that links all information on the sample, spectrum, search database, and algorithm to peptide and protein identifications and their post-translational modifications. After publication of datasets they are made available on the pep2pro website at www.pep2pro.ethz.ch. Further, the pep2pro data analysis pipeline also handles data export do the PRIDE database (http://www.ebi.ac.uk/pride) and data retrieval by the MASCP Gator (http://gator.masc-proteomics.org/). The utility of pep2pro will continue to be used for analysis of additional datasets and as a data warehouse. The capacity of the pep2pro database for proteome data analysis has now also been made publicly available through the release of pep2pro4all, which consists of a database schema and a script that will populate the database with mass spectrometry data provided in mzIdentML format.",2012-06-11 +23180786,WormQTL--public archive and analysis web portal for natural variation data in Caenorhabditis spp.,"Here, we present WormQTL (http://www.wormqtl.org), an easily accessible database enabling search, comparative analysis and meta-analysis of all data on variation in Caenorhabditis spp. Over the past decade, Caenorhabditis elegans has become instrumental for molecular quantitative genetics and the systems biology of natural variation. These efforts have resulted in a valuable amount of phenotypic, high-throughput molecular and genotypic data across different developmental worm stages and environments in hundreds of C. elegans strains. WormQTL provides a workbench of analysis tools for genotype-phenotype linkage and association mapping based on but not limited to R/qtl (http://www.rqtl.org). All data can be uploaded and downloaded using simple delimited text or Excel formats and are accessible via a public web user interface for biologists and R statistic and web service interfaces for bioinformaticians, based on open source MOLGENIS and xQTL workbench software. WormQTL welcomes data submissions from other worm researchers.",2012-11-24 +27286002,Male Breast Cancer Incidence and Mortality Risk in the Japanese Atomic Bomb Survivors - Differences in Excess Relative and Absolute Risk from Female Breast Cancer.,"

Background

There are well-known associations of ionizing radiation with female breast cancer, and emerging evidence also for male breast cancer. In the United Kingdom, female breast cancer following occupational radiation exposure is among that set of cancers eligible for state compensation and consideration is currently being given to an extension to include male breast cancer.

Objectives

We compare radiation-associated excess relative and absolute risks of male and female breast cancers.

Methods

Breast cancer incidence and mortality data in the Japanese atomic-bomb survivors were analyzed using relative and absolute risk models via Poisson regression.

Results

We observed significant (p ≤ 0.01) dose-related excess risk for male breast cancer incidence and mortality. For incidence and mortality data, there are elevations by factors of approximately 15 and 5, respectively, of relative risk for male compared with female breast cancer incidence, the former borderline significant (p = 0.050). In contrast, for incidence and mortality data, there are elevations by factors of approximately 20 and 10, respectively, of female absolute risk compared with male, both statistically significant (p < 0.001). There are no indications of differences between the sexes in age/time-since-exposure/age-at-exposure modifications to the relative or absolute excess risk. The probability of causation of male breast cancer following radiation exposure exceeds by at least a factor of 5 that of many other malignancies.

Conclusions

There is evidence of much higher radiation-associated relative risk for male than for female breast cancer, although absolute excess risks for males are much less than for females. However, the small number of male cases and deaths suggests a degree of caution in interpretation of this finding. Citation: Little MP, McElvenny DM. 2017. Male breast cancer incidence and mortality risk in the Japanese atomic bomb survivors - differences in excess relative and absolute risk from female breast cancer. Environ Health Perspect 125:223-229; http://dx.doi.org/10.1289/EHP151.",2016-06-10 +26685309,Fast integration-based prediction bands for ordinary differential equation models.,"

Motivation

To gain a deeper understanding of biological processes and their relevance in disease, mathematical models are built upon experimental data. Uncertainty in the data leads to uncertainties of the model's parameters and in turn to uncertainties of predictions. Mechanistic dynamic models of biochemical networks are frequently based on nonlinear differential equation systems and feature a large number of parameters, sparse observations of the model components and lack of information in the available data. Due to the curse of dimensionality, classical and sampling approaches propagating parameter uncertainties to predictions are hardly feasible and insufficient. However, for experimental design and to discriminate between competing models, prediction and confidence bands are essential. To circumvent the hurdles of the former methods, an approach to calculate a profile likelihood on arbitrary observations for a specific time point has been introduced, which provides accurate confidence and prediction intervals for nonlinear models and is computationally feasible for high-dimensional models.

Results

In this article, reliable and smooth point-wise prediction and confidence bands to assess the model's uncertainty on the whole time-course are achieved via explicit integration with elaborate correction mechanisms. The corresponding system of ordinary differential equations is derived and tested on three established models for cellular signalling. An efficiency analysis is performed to illustrate the computational benefit compared with repeated profile likelihood calculations at multiple time points.

Availability and implementation

The integration framework and the examples used in this article are provided with the software package Data2Dynamics, which is based on MATLAB and freely available at http://www.data2dynamics.org

Contact

helge.hass@fdm.uni-freiburg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-17 +24369152,Discriminative motif optimization based on perceptron training.,"

Motivation

Generating accurate transcription factor (TF) binding site motifs from data generated using the next-generation sequencing, especially ChIP-seq, is challenging. The challenge arises because a typical experiment reports a large number of sequences bound by a TF, and the length of each sequence is relatively long. Most traditional motif finders are slow in handling such enormous amount of data. To overcome this limitation, tools have been developed that compromise accuracy with speed by using heuristic discrete search strategies or limited optimization of identified seed motifs. However, such strategies may not fully use the information in input sequences to generate motifs. Such motifs often form good seeds and can be further improved with appropriate scoring functions and rapid optimization.

Results

We report a tool named discriminative motif optimizer (DiMO). DiMO takes a seed motif along with a positive and a negative database and improves the motif based on a discriminative strategy. We use area under receiver-operating characteristic curve (AUC) as a measure of discriminating power of motifs and a strategy based on perceptron training that maximizes AUC rapidly in a discriminative manner. Using DiMO, on a large test set of 87 TFs from human, drosophila and yeast, we show that it is possible to significantly improve motifs identified by nine motif finders. The motifs are generated/optimized using training sets and evaluated on test sets. The AUC is improved for almost 90% of the TFs on test sets and the magnitude of increase is up to 39%.

Availability and implementation

DiMO is available at http://stormo.wustl.edu/DiMO",2013-12-24 +23707967,The banana genome hub.,"Banana is one of the world's favorite fruits and one of the most important crops for developing countries. The banana reference genome sequence (Musa acuminata) was recently released. Given the taxonomic position of Musa, the completed genomic sequence has particular comparative value to provide fresh insights about the evolution of the monocotyledons. The study of the banana genome has been enhanced by a number of tools and resources that allows harnessing its sequence. First, we set up essential tools such as a Community Annotation System, phylogenomics resources and metabolic pathways. Then, to support post-genomic efforts, we improved banana existing systems (e.g. web front end, query builder), we integrated available Musa data into generic systems (e.g. markers and genetic maps, synteny blocks), we have made interoperable with the banana hub, other existing systems containing Musa data (e.g. transcriptomics, rice reference genome, workflow manager) and finally, we generated new results from sequence analyses (e.g. SNP and polymorphism analysis). Several uses cases illustrate how the Banana Genome Hub can be used to study gene families. Overall, with this collaborative effort, we discuss the importance of the interoperability toward data integration between existing information systems. Database URL: http://banana-genome.cirad.fr/",2013-05-23 +28749778,Serum Vaccine Antibody Concentrations in Adolescents Exposed to Perfluorinated Compounds.,"

Background

Postnatal exposure to perfluorinated alkylate substances (PFASs) is associated with lower serum concentrations of specific antibodies against certain childhood vaccines at 7 y.

Objectives

We prospectively followed a Faroese birth cohort to determine these associations at 13 y.

Methods

In 516 subjects (79% of eligible cohort members) who were 13 years old, serum concentrations of PFASs and of antibodies against diphtheria and tetanus were measured and were compared with data from the previous examination at 7 y. Multiple regression analyses and structural equation models were applied to determine the association between postnatal PFAS exposures and antibody concentrations.

Results

Serum concentrations of PFASs and antibodies generally declined from 7 y to 13 y. However, 68 subjects had visited the emergency room and had likely received a vaccination booster, and a total of 202 children showed higher vaccine antibody concentrations at 13 y than at 7 y. Therefore, separate analyses were conducted after exclusion of these two subgroups. Diphtheria antibody concentrations decreased at elevated PFAS concentrations at 13 y and 7 y; the associations were statistically significant for perfluorodecanoate (PFDA) at 7 y and for perfluorooctanoate (PFOA) at 13 y, both suggesting a decrease by ∼25% for each doubling of exposure. Structural equation models showed that a doubling in PFAS exposure at 7 y was associated with losses in diphtheria antibody concentrations at 13 y of 10–30% for the five PFASs. Few associations were observed for anti-tetanus concentrations.

Conclusions

These results are in accord with previous findings of PFAS immunotoxicity at current exposure levels. https://doi.org/10.1289/EHP275.",2017-07-26 +23680785,Catalytic site identification--a web server to identify catalytic site structural matches throughout PDB.,"The catalytic site identification web server provides the innovative capability to find structural matches to a user-specified catalytic site among all Protein Data Bank proteins rapidly (in less than a minute). The server also can examine a user-specified protein structure or model to identify structural matches to a library of catalytic sites. Finally, the server provides a database of pre-calculated matches between all Protein Data Bank proteins and the library of catalytic sites. The database has been used to derive a set of hypothesized novel enzymatic function annotations. In all cases, matches and putative binding sites (protein structure and surfaces) can be visualized interactively online. The website can be accessed at http://catsid.llnl.gov.",2013-05-16 +27286763,Evaluation of RPE-Select: A Web-Based Respiratory Protective Equipment Selector Tool.,"This article describes the evaluation of an open-access web-based respiratory protective equipment selector tool (RPE-Select, accessible at http://www.healthyworkinglives.com/rpe-selector). This tool is based on the principles of the COSHH-Essentials (C-E) control banding (CB) tool, which was developed for the exposure risk management of hazardous chemicals in the workplace by small and medium sized enterprises (SMEs) and general practice H&S professionals. RPE-Select can be used for identifying adequate and suitable RPE for dusts, fibres, mist (solvent, water, and oil based), sprays, volatile solids, fumes, gases, vapours, and actual or potential oxygen deficiency. It can be applied for substances and products with safety data sheets as well as for a large number of commonly encountered process-generated substances (PGS), such as poultry house dusts or welding fume. Potential international usability has been built-in by using the Hazard Statements developed for the Globally Harmonised System (GHS) and providing recommended RPE in picture form as well as with a written specification. Illustration helps to compensate for the variabilities in assigned protection factors across the world. RPE-Select uses easily understandable descriptions/explanations and an interactive stepwise flow for providing input/answers at each step. The output of the selection process is a report summarising the user input data and a selection of RPE, including types of filters where applicable, from which the user can select the appropriate one for each wearer. In addition, each report includes 'Dos' and 'Don'ts' for the recommended RPE. RPE-Select outcomes, based on up to 20 hypothetical use scenarios, were evaluated in comparison with other available RPE selection processes and tools, and by 32 independent users with a broad range of familiarities with industrial use scenarios in general and respiratory protection in particular. For scenarios involving substances having safety data sheets, 87% of RPE-Select outcomes resulted in a 'safe' RPE selection, while 98% 'safe' outcomes were achieved for scenarios involving process-generated substances. Reasons for the outliers were examined. User comments and opinions on the mechanics and usability of RPE-Select are also presented.",2016-06-10 +26038725,PANDA: pathway and annotation explorer for visualizing and interpreting gene-centric data.,"Objective. Bringing together genomics, transcriptomics, proteomics, and other -omics technologies is an important step towards developing highly personalized medicine. However, instrumentation has advances far beyond expectations and now we are able to generate data faster than it can be interpreted. Materials and Methods. We have developed PANDA (Pathway AND Annotation) Explorer, a visualization tool that integrates gene-level annotation in the context of biological pathways to help interpret complex data from disparate sources. PANDA is a web-based application that displays data in the context of well-studied pathways like KEGG, BioCarta, and PharmGKB. PANDA represents data/annotations as icons in the graph while maintaining the other data elements (i.e., other columns for the table of annotations). Custom pathways from underrepresented diseases can be imported when existing data sources are inadequate. PANDA also allows sharing annotations among collaborators. Results. In our first use case, we show how easy it is to view supplemental data from a manuscript in the context of a user's own data. Another use-case is provided describing how PANDA was leveraged to design a treatment strategy from the somatic variants found in the tumor of a patient with metastatic sarcomatoid renal cell carcinoma. Conclusion. PANDA facilitates the interpretation of gene-centric annotations by visually integrating this information with context of biological pathways. The application can be downloaded or used directly from our website: http://bioinformaticstools.mayo.edu/research/panda-viewer/.",2015-05-19 +24244640,CNVannotator: a comprehensive annotation server for copy number variation in the human genome.,"Copy number variation (CNV) is one of the most prevalent genetic variations in the genome, leading to an abnormal number of copies of moderate to large genomic regions. High-throughput technologies such as next-generation sequencing often identify thousands of CNVs involved in biological or pathological processes. Despite the growing demand to filter and classify CNVs by factors such as frequency in population, biological features, and function, surprisingly, no online web server for CNV annotations has been made available to the research community. Here, we present CNVannotator, a web server that accepts an input set of human genomic positions in a user-friendly tabular format. CNVannotator can perform genomic overlaps of the input coordinates using various functional features, including a list of the reported 356,817 common CNVs, 181,261 disease CNVs, as well as, 140,342 SNPs from genome-wide association studies. In addition, CNVannotator incorporates 2,211,468 genomic features, including ENCODE regulatory elements, cytoband, segmental duplication, genome fragile site, pseudogene, promoter, enhancer, CpG island, and methylation site. For cancer research community users, CNVannotator can apply various filters to retrieve a subgroup of CNVs pinpointed in hundreds of tumor suppressor genes and oncogenes. In total, 5,277,234 unique genomic coordinates with functional features are available to generate an output in a plain text format that is free to download. In summary, we provide a comprehensive web resource for human CNVs. The annotated results along with the server can be accessed at http://bioinfo.mc.vanderbilt.edu/CNVannotator/.",2013-11-14 +26568627,Uncovering multiloci-ordering by algebraic property of Laplacian matrix and its Fiedler vector.,"

Motivation

The loci-ordering, based on two-point recombination fractions for a pair of loci, is the most important step in constructing a reliable and fine genetic map.

Results

Using the concept from complex graph theory, here we propose a Laplacian ordering approach which uncovers the loci-ordering of multiloci simultaneously. The algebraic property for a Fiedler vector of a Laplacian matrix, constructed from the recombination fraction of the loci-ordering for 26 loci of barley chromosome IV, 846 loci of Arabidopsis thaliana and 1903 loci of Malus domestica, together with the variable threshold uncovers their loci-orders. It offers an alternative yet robust approach for ordering multiloci.

Availability and implementation

Source code program with data set is available as supplementary data and also in a software category of the website (http://biophysics.dgist.ac.kr)

Contact

crkim@pusan.ac.kr or iksoochang@dgist.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-14 +28943782,The BSM-AI project: SUSY-AI-generalizing LHC limits on supersymmetry with machine learning.,"A key research question at the Large Hadron Collider is the test of models of new physics. Testing if a particular parameter set of such a model is excluded by LHC data is a challenge: it requires time consuming generation of scattering events, simulation of the detector response, event reconstruction, cross section calculations and analysis code to test against several hundred signal regions defined by the ATLAS and CMS experiments. In the BSM-AI project we approach this challenge with a new idea. A machine learning tool is devised to predict within a fraction of a millisecond if a model is excluded or not directly from the model parameters. A first example is SUSY-AI, trained on the phenomenological supersymmetric standard model (pMSSM). About 300, 000 pMSSM model sets - each tested against 200 signal regions by ATLAS - have been used to train and validate SUSY-AI. The code is currently able to reproduce the ATLAS exclusion regions in 19 dimensions with an accuracy of at least [Formula: see text]. It has been validated further within the constrained MSSM and the minimal natural supersymmetric model, again showing high accuracy. SUSY-AI and its future BSM derivatives will help to solve the problem of recasting LHC results for any model of new physics. SUSY-AI can be downloaded from http://susyai.hepforge.org/. An on-line interface to the program for quick testing purposes can be found at http://www.susy-ai.org/.",2017-04-24 +23828784,Automated QuantMap for rapid quantitative molecular network topology analysis.,"

Summary

The previously disclosed QuantMap method for grouping chemicals by biological activity used online services for much of the data gathering and some of the numerical analysis. The present work attempts to streamline this process by using local copies of the databases and in-house analysis. Using computational methods similar or identical to those used in the previous work, a qualitatively equivalent result was found in just a few seconds on the same dataset (collection of 18 drugs). We use the user-friendly Galaxy framework to enable users to analyze their own datasets. Hopefully, this will make the QuantMap method more practical and accessible and help achieve its goals to provide substantial assistance to drug repositioning, pharmacology evaluation and toxicology risk assessment.

Availability

http://galaxy.predpharmtox.org

Contact

mats.gustafsson@medsci.uu.se or ola.spjuth@farmbio.uu.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-04 +23143105,MonarchBase: the monarch butterfly genome database.,"The monarch butterfly (Danaus plexippus) is emerging as a model organism to study the mechanisms of circadian clocks and animal navigation, and the genetic underpinnings of long-distance migration. The initial assembly of the monarch genome was released in 2011, and the biological interpretation of the genome focused on the butterfly's migration biology. To make the extensive data associated with the genome accessible to the general biological and lepidopteran communities, we established MonarchBase (available at http://monarchbase.umassmed.edu). The database is an open-access, web-available portal that integrates all available data associated with the monarch butterfly genome. Moreover, MonarchBase provides access to an updated version of genome assembly (v3) upon which all data integration is based. These include genes with systematic annotation, as well as other molecular resources, such as brain expressed sequence tags, migration expression profiles and microRNAs. MonarchBase utilizes a variety of retrieving methods to access data conveniently and for integrating biological interpretations.",2012-11-09 +23601403,The new on-line Czech Food Composition Database.,"The new on-line Czech Food Composition Database (FCDB) was launched on http://www.czfcdb.cz in December 2010 as a main freely available channel for dissemination of Czech food composition data. The application is based on a complied FCDB documented according to the EuroFIR standardised procedure for full value documentation and indexing of foods by the LanguaL™ Thesaurus. A content management system was implemented for administration of the website and performing data export (comma-separated values or EuroFIR XML transport package formats) by a compiler. Reference/s are provided for each published value with linking to available freely accessible on-line sources of data (e.g. full texts, EuroFIR Document Repository, on-line national FCDBs). LanguaL™ codes are displayed within each food record as searchable keywords of the database. A photo (or a photo gallery) is used as a visual descriptor of a food item. The application is searchable on foods, components, food groups, alphabet and a multi-field advanced search.",2013-02-11 +27581938,Insights into horizontal acquisition patterns of dormancy and reactivation regulon genes in mycobacterial species using a partitioning-based framework.,"Horizontal Gene Transfer (HGT) events, initially thought to be rare in Mycobacterium tuberculosis, have recently been shown to be involved in the acquisition of virulence operons in M. tuberculosis. We have developed a new partitioning framework based HGT prediction algorithm, called Grid3M, and applied the same for the prediction of HGTs in Mycobacteria. Validation and testing using simulated and real microbial genomes indicated better performance of Grid3M as compared with other widely used HGT prediction methods. Specific analysis of the genes belonging to dormancy/reactivation regulons across 14 mycobacterial genomes indicated that horizontal acquisition is specifically restricted to important accessory proteins. The results also revealed Burkholderia species to be a probable source of HGT genes belonging to these regulons. The current study provides a basis for similar analyses investigating the functional/evolutionary aspects of HGT genes in other pathogens. A database of Grid3M predicted HGTs in completely sequenced genomes is available at https://metagenomics.atc.tcs.com/Grid3M/.",2016-09-01 +27587666,"PanTools: representation, storage and exploration of pan-genomic data.","

Motivation

Next-generation sequencing technology is generating a wealth of highly similar genome sequences for many species, paving the way for a transition from single-genome to pan-genome analyses. Accordingly, genomics research is going to switch from reference-centric to pan-genomic approaches. We define the pan-genome as a comprehensive representation of multiple annotated genomes, facilitating analyses on the similarity and divergence of the constituent genomes at the nucleotide, gene and genome structure level. Current pan-genomic approaches do not thoroughly address scalability, functionality and usability.

Results

We introduce a generalized De Bruijn graph as a pan-genome representation, as well as an online algorithm to construct it. This representation is stored in a Neo4j graph database, which makes our approach scalable to large eukaryotic genomes. Besides the construction algorithm, our software package, called PanTools, currently provides functionality for annotating pan-genomes, adding sequences, grouping genes, retrieving gene sequences or genomic regions, reconstructing genomes and comparing and querying pan-genomes. We demonstrate the performance of the tool using datasets of 62 E. coli genomes, 93 yeast genomes and 19 Arabidopsis thaliana genomes.

Availability and implementation

The Java implementation of PanTools is publicly available at http://www.bif.wur.nl

Contact

sandra.smit@wur.nl.",2016-09-01 +25925574,"i-cisTarget 2015 update: generalized cis-regulatory enrichment analysis in human, mouse and fly.","i-cisTarget is a web tool to predict regulators of a set of genomic regions, such as ChIP-seq peaks or co-regulated/similar enhancers. i-cisTarget can also be used to identify upstream regulators and their target enhancers starting from a set of co-expressed genes. Whereas the original version of i-cisTarget was focused on Drosophila data, the 2015 update also provides support for human and mouse data. i-cisTarget detects transcription factor motifs (position weight matrices) and experimental data tracks (e.g. from ENCODE, Roadmap Epigenomics) that are enriched in the input set of regions. As experimental data tracks we include transcription factor ChIP-seq data, histone modification ChIP-seq data and open chromatin data. The underlying processing method is based on a ranking-and-recovery procedure, allowing accurate determination of enrichment across heterogeneous datasets, while also discriminating direct from indirect target regions through a 'leading edge' analysis. We illustrate i-cisTarget on various Ewing sarcoma datasets to identify EWS-FLI1 targets starting from ChIP-seq, differential ATAC-seq, differential H3K27ac and differential gene expression data. Use of i-cisTarget is free and open to all, and there is no login requirement. Address: http://gbiomed.kuleuven.be/apps/lcb/i-cisTarget.",2015-04-29 +25619353,[Diagnosis of MECP2 duplication syndrome with molecular genetic techniques].,"

Objective

To investigate whether the four boys with delayed motor development and intellectual disability suffer from MECP 2 duplication syndrome.

Method

Blood specimens and clinical data of four patients and mothers of patient 2 and patient 4 were collected. Genomic DNA was extracted from peripheral blood using DNA extraction kit. At first multiplex ligation-dependent probe amplification (MLPA) was employed in 4 patients, two distinct kits SALSA P036 and P070 for sub-telomere screening, and SALSA P245 for the 22 common microdeletion and microduplication syndromes. Then array-CGH analysis was carried out. Two mothers of patients were tested by array- comparative genomic hybridization (CGH) and X chromosome inactivation analysis.

Result

All the 4 patients presented with severe hypotonia, delayed motor development, intellectual disability and absent or limited language. Three patients manifested recurrent pneumonia in infancy except patient 2. Four patients had duplication on chromosome Xq28 with MLPA kit SALSA P245. Array-CGH identified the size of each duplication on Xq28. The precise size of each duplication was different in the four patients: patient 1, 14.931 Mb, patient 2, 0.393 Mb, patient 3, 0.482 Mb and patient 4, 0.299 Mb. To compare Xq28 duplications with UCSC database (http://genome.ucsc.edu/) revealed that each duplication harbors the MECP 2 and HCFC 1 gene. Mothers of patient 2 and patient 4 also carried microduplication on Xq28. X chromosome inactivation analysis demonstrated completely skewed inactivation (0: 100) and it is the inactive allele that passed on to the patients.

Conclusion

For patients that present with delayed motor development, intellectual disability, hypotonia, absent or limited language and recurrent infection, combination of MLPA and array- CGH is effective and specific diagnostic methods of MECP 2 duplication syndrome.",2014-12-01 +26656006,Fitchi: haplotype genealogy graphs based on the Fitch algorithm.,"

Unlabelled

: In population genetics and phylogeography, haplotype genealogy graphs are important tools for the visualization of population structure based on sequence data. In this type of graph, node sizes are often drawn in proportion to haplotype frequencies and edge lengths represent the minimum number of mutations separating adjacent nodes. I here present Fitchi, a new program that produces publication-ready haplotype genealogy graphs based on the Fitch algorithm.

Availability and implementation

http://www.evoinformatics.eu/fitchi.htm

Contact

: michaelmatschiner@mac.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-09 +26529777,Ontology-Based Search of Genomic Metadata.,"The Encyclopedia of DNA Elements (ENCODE) is a huge and still expanding public repository of more than 4,000 experiments and 25,000 data files, assembled by a large international consortium since 2007; unknown biological knowledge can be extracted from these huge and largely unexplored data, leading to data-driven genomic, transcriptomic, and epigenomic discoveries. Yet, search of relevant datasets for knowledge discovery is limitedly supported: metadata describing ENCODE datasets are quite simple and incomplete, and not described by a coherent underlying ontology. Here, we show how to overcome this limitation, by adopting an ENCODE metadata searching approach which uses high-quality ontological knowledge and state-of-the-art indexing technologies. Specifically, we developed S.O.S. GeM (http://www.bioinformatics.deib.polimi.it/SOSGeM/), a system supporting effective semantic search and retrieval of ENCODE datasets. First, we constructed a Semantic Knowledge Base by starting with concepts extracted from ENCODE metadata, matched to and expanded on biomedical ontologies integrated in the well-established Unified Medical Language System. We prove that this inference method is sound and complete. Then, we leveraged the Semantic Knowledge Base to semantically search ENCODE data from arbitrary biologists' queries. This allows correctly finding more datasets than those extracted by a purely syntactic search, as supported by the other available systems. We empirically show the relevance of found datasets to the biologists' queries.",2015-10-26 +27580923,Skeleton Genetics: a comprehensive database for genes and mutations related to genetic skeletal disorders. ,"Genetic skeletal disorders (GSD) involving the skeletal system arises through disturbances in the complex processes of skeletal development, growth and homeostasis and remain a diagnostic challenge because of their clinical heterogeneity and genetic variety. Over the past decades, tremendous effort platforms have been made to explore the complex heterogeneity, and massive new genes and mutations have been identified in different GSD, but the information supplied by literature is still limited and it is hard to meet the further needs of scientists and clinicians. In this study, combined with Nosology and Classification of genetic skeletal disorders, we developed the first comprehensive and annotated genetic skeletal disorders database, named 'SkeletonGenetics', which contains information about all GSD-related knowledge including 8225 mutations in 357 genes, with detailed information associated with 481 clinical diseases (2260 clinical phenotype) classified in 42 groups defined by molecular, biochemical and/or radiographic criteria from 1698 publications. Further annotations were performed to each entry including Gene Ontology, pathways analysis, protein-protein interaction, mutation annotations, disease-disease clustering and gene-disease networking. Furthermore, using concise search methods, intuitive graphical displays, convenient browsing functions and constantly updatable features, 'SkeletonGenetics' could serve as a central and integrative database for unveiling the genetic and pathways pre-dispositions of GSD.Database URL: http://101.200.211.232/skeletongenetics/.",2016-08-31 +27585568,GRASPx: efficient homolog-search of short peptide metagenome database through simultaneous alignment and assembly.,"

Background

Metagenomics is a cultivation-independent approach that enables the study of the genomic composition of microbes present in an environment. Metagenomic samples are routinely sequenced using next-generation sequencing technologies that generate short nucleotide reads. Proteins identified from these reads are mostly of partial length. On the other hand, de novo assembly of a large metagenomic dataset is computationally demanding and the assembled contigs are often fragmented, resulting in the identification of protein sequences that are also of partial length and incomplete. Annotation of an incomplete protein sequence often proceeds by identifying its homologs in a database of reference sequences. Identifying the homologs of incomplete sequences is a challenge and can result in substandard annotation of proteins from metagenomic datasets. To address this problem, we recently developed a homology detection algorithm named GRASP (Guided Reference-based Assembly of Short Peptides) that identifies the homologs of a given reference protein sequence in a database of short peptide metagenomic sequences. GRASP was developed to implement a simultaneous alignment and assembly algorithm for annotation of short peptides identified on metagenomic reads. The program achieves significantly improved recall rate at the cost of computational efficiency. In this article, we adopted three techniques to speed up the original version of GRASP, including the pre-construction of extension links, local assembly of individual seeds, and the implementation of query-level parallelism.

Results

The resulting new program, GRASPx, achieves >30X speedup compared to its predecessor GRASP. At the same time, we show that the performance of GRASPx is consistent with that of GRASP, and that both of them significantly outperform other popular homology-search tools including the BLAST and FASTA suites. GRASPx was also applied to a human saliva metagenome dataset and shows superior performance for both recall and precision rates.

Conclusions

In this article we present GRASPx, a fast and accurate homology-search program implementing a simultaneous alignment and assembly framework. GRASPx can be used for more comprehensive and accurate annotation of short peptides. GRASPx is freely available at http://graspx.sourceforge.net/ .",2016-08-31 +24146773,The AERO system: a 3D-like approach for recording gene expression patterns in the whole mouse embryo.,"We have recently constructed a web-based database of gene expression in the mouse whole embryo, EMBRYS (http://embrys.jp/embrys/html/MainMenu.html). To allow examination of gene expression patterns to the fullest extent possible, this database provides both photo images and annotation data. However, since embryos develop via an intricate process of morphogenesis, it would be of great value to track embryonic gene expression from a three dimensional perspective. In fact, several methods have been developed to achieve this goal, but highly laborious procedures and specific operational skills are generally required. We utilized a novel microscopic technique that enables the easy capture of rotational, 3D-like images of the whole embryo. In this method, a rotary head equipped with two mirrors that are designed to obtain an image tilted at 45 degrees to the microscope stage captures serial images at 2-degree intervals. By a simple operation, 180 images are automatically collected. These 2D images obtained at multiple angles are then used to reconstruct 3D-like images, termed AERO images. By means of this system, over 800 AERO images of 191 gene expression patterns were captured. These images can be easily rotated on the computer screen using the EMBRYS database so that researchers can view an entire embryo by a virtual viewing on a computer screen in an unbiased or non-predetermined manner. The advantages afforded by this approach make it especially useful for generating data viewed in public databases.",2013-10-16 +27687984,MITK-OpenIGTLink for combining open-source toolkits in real-time computer-assisted interventions.,"

Purpose

Due to rapid developments in the research areas of medical imaging, medical image processing and robotics, computer-assisted interventions (CAI) are becoming an integral part of modern patient care. From a software engineering point of view, these systems are highly complex and research can benefit greatly from reusing software components. This is supported by a number of open-source toolkits for medical imaging and CAI such as the medical imaging interaction toolkit (MITK), the public software library for ultrasound imaging research (PLUS) and 3D Slicer. An independent inter-toolkit communication such as the open image-guided therapy link (OpenIGTLink) can be used to combine the advantages of these toolkits and enable an easier realization of a clinical CAI workflow.

Methods

MITK-OpenIGTLink is presented as a network interface within MITK that allows easy to use, asynchronous two-way messaging between MITK and clinical devices or other toolkits. Performance and interoperability tests with MITK-OpenIGTLink were carried out considering the whole CAI workflow from data acquisition over processing to visualization.

Results

We present how MITK-OpenIGTLink can be applied in different usage scenarios. In performance tests, tracking data were transmitted with a frame rate of up to 1000 Hz and a latency of 2.81 ms. Transmission of images with typical ultrasound (US) and greyscale high-definition (HD) resolutions of [Formula: see text] and [Formula: see text] is possible at up to 512 and 128 Hz, respectively.

Conclusion

With the integration of OpenIGTLink into MITK, this protocol is now supported by all established open-source toolkits in the field. This eases interoperability between MITK and toolkits such as PLUS or 3D Slicer and facilitates cross-toolkit research collaborations. MITK and its submodule MITK-OpenIGTLink are provided open source under a BSD-style licence ( http://mitk.org ).",2016-09-29 +26680022,MethGo: a comprehensive tool for analyzing whole-genome bisulfite sequencing data.,"

Background

DNA methylation is a major epigenetic modification regulating several biological processes. A standard approach to measure DNA methylation is bisulfite sequencing (BS-Seq). BS-Seq couples bisulfite conversion of DNA with next-generation sequencing to profile genome-wide DNA methylation at single base resolution. The analysis of BS-Seq data involves the use of customized aligners for mapping bisulfite converted reads and the bioinformatic pipelines for downstream data analysis.

Results

Here we developed MethGo, a software tool designed for the analysis of data from whole-genome bisulfite sequencing (WGBS) and reduced representation bisulfite sequencing (RRBS). MethGo provides both genomic and epigenomic analyses including: 1) coverage distribution of each cytosine; 2) global cytosine methylation level; 3) cytosine methylation level distribution; 4) cytosine methylation level of genomic elements; 5) chromosome-wide cytosine methylation level distribution; 6) Gene-centric cytosine methylation level; 7) cytosine methylation levels at transcription factor binding sites (TFBSs); 8) single nucleotide polymorphism (SNP) calling, and 9) copy number variation (CNV) calling.

Conclusions

MethGo is a simple and effective tool for the analysis of BS-Seq data including both WGBS and RRBS. It contains 9 analyses in 5 major modules to profile (epi)genome. It profiles genome-wide DNA methylation in global and in gene level scale. It can also analyze the methylation pattern around the transcription factor binding sites, and assess genetic variations such as SNPs and CNVs. MethGo is coded in Python and is publically available at http://paoyangchen-laboratory.github.io/methgo/.",2015-12-09 +27587672,Fast genotyping of known SNPs through approximate k-mer matching.,"

Motivation

As the volume of next-generation sequencing (NGS) data increases, faster algorithms become necessary. Although speeding up individual components of a sequence analysis pipeline (e.g. read mapping) can reduce the computational cost of analysis, such approaches do not take full advantage of the particulars of a given problem. One problem of great interest, genotyping a known set of variants (e.g. dbSNP or Affymetrix SNPs), is important for characterization of known genetic traits and causative disease variants within an individual, as well as the initial stage of many ancestral and population genomic pipelines (e.g. GWAS).

Results

We introduce lightweight assignment of variant alleles (LAVA), an NGS-based genotyping algorithm for a given set of SNP loci, which takes advantage of the fact that approximate matching of mid-size k-mers (with k = 32) can typically uniquely identify loci in the human genome without full read alignment. LAVA accurately calls the vast majority of SNPs in dbSNP and Affymetrix's Genome-Wide Human SNP Array 6.0 up to about an order of magnitude faster than standard NGS genotyping pipelines. For Affymetrix SNPs, LAVA has significantly higher SNP calling accuracy than existing pipelines while using as low as ∼5 GB of RAM. As such, LAVA represents a scalable computational method for population-level genotyping studies as well as a flexible NGS-based replacement for SNP arrays.

Availability and implementation

LAVA software is available at http://lava.csail.mit.edu

Contact

bab@mit.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +28535831,CSOLNP: Numerical Optimization Engine for Solving Non-linearly Constrained Problems.,"We introduce the optimizer CSOLNP, which is a C++ implementation of the R package RSOLNP (Ghalanos & Theussl, 2012, Rsolnp: General non-linear optimization using augmented Lagrange multiplier method. R package version, 1) alongside some improvements. CSOLNP solves non-linearly constrained optimization problems using a Sequential Quadratic Programming (SQP) algorithm. CSOLNP, NPSOL (a very popular implementation of SQP method in FORTRAN (Gill et al., 1986, User's guide for NPSOL (version 4.0): A Fortran package for nonlinear programming (No. SOL-86-2). Stanford, CA: Stanford University Systems Optimization Laboratory), and SLSQP (another SQP implementation available as part of the NLOPT collection (Johnson, 2014, The NLopt nonlinear-optimization package. Retrieved from http://ab-initio.mit.edu/nlopt)) are three optimizers available in OpenMx package. These optimizers are compared in terms of runtimes, final objective values, and memory consumption. A Monte Carlo analysis of the performance of the optimizers was performed on ordinal and continuous models with five variables and one or two factors. While the relative difference between the objective values is less than 0.5%, CSOLNP is in general faster than NPSOL and SLSQP for ordinal analysis. As for continuous data, none of the optimizers performs consistently faster than the others. In terms of memory usage, we used Valgrind's heap profiler tool, called Massif, on one-factor threshold models. CSOLNP and NPSOL consume the same amount of memory, while SLSQP uses 71 MB more memory than the other two optimizers.",2017-05-24 +23749449,Phylogenetic portrait of the Saccharomyces cerevisiae functional genome.,"The genome of budding yeast (Saccharomyces cerevisiae) contains approximately 5800 protein-encoding genes, the majority of which are associated with some known biological function. Yet the extent of amino acid sequence conservation of these genes over all phyla has only been partially examined. Here we provide a more comprehensive overview and visualization of the conservation of yeast genes and a means for browsing and exploring the data in detail, down to the individual yeast gene, at http://yeast-phylogroups.princeton.edu. We used data from the OrthoMCL database, which has defined orthologs from approximately 150 completely sequenced genomes, including diverse representatives of the archeal, bacterial, and eukaryotic domains. By clustering genes based on similar patterns of conservation, we organized and visualized all the protein-encoding genes in yeast as a single heat map. Most genes fall into one of eight major clusters, called ""phylogroups."" Gene ontology analysis of the phylogroups revealed that they were associated with specific, distinct trends in gene function, generalizations likely to be of interest to a wide range of biologists.",2013-08-07 +27729159,"Universal neonatal hearing screening program in Shanghai, China: An inter-regional and international comparison.","

Objective

By comparing the Universal Neonatal Hearing Screening (UNHS) program as implemented in Shanghai and other regions in China and countries around the world, this study makes an assessment of the Shanghai model and summarizes the experiences implementing the UNHS program, so as to provide a valuable reference for other countries or regions to carry out UNHS more effectively. Since Shanghai is one of the most developed regions in China, we also examined the relationship between economic development and the UNHS starting year and coverage rate.

Methods

The study conducted a systematic review of published studies in Chinese and English on the program status of neonatal hearing screening to compare and analyze the implementation of the UNHS program in 20 cities or provinces in China and 24 regions or countries around the world. The literature search in Chinese was conducted in the three most authoritative publication databases, CNKI (China National Knowledge Infrastructure), WANFANGDATA, and CQVIP (http://www.cqvip.com/). We searched all publications in those databases with the keywords ""neonatal hearing screening"" (in Chinese) between 2005 and 2014. English literature was searched using the same keywords (in English). The publication database included Medline and Web of Science, and the search time period was 2000-2014.

Results

Shanghai was one of the first regions in China to implement UNHS, and its coverage rate was among the top regions by international comparison. The starting time of the UNHS program had no relationship with the Gross Domestic Product (GDP) per capita in the same year. Economic level serves as a threshold for carrying out UNHS but is not a linear contributor to the exact starting time of such a program. The screening coverage rate generally showed a rising trend with the increasing GDP per capita in China, but it had no relationship with the area's GDP per capita in selected regions and countries around the world. The system design of UNHS is the key factor influencing screening coverage. Policy makers, program administrators, and cost-sharing structures are important factors that influence the coverage rates of UNHS.

Conclusion

When to carry out a UNHS program is determined by the willingness and preference of the local government, which is influenced by the area's social, political and cultural conditions. Mandatory hearing screening and minimal-cost to no-cost intervention are two pillars for a good coverage rate of UNHS. In terms of system design, decision-making, implementation, funding and the concrete implementation plan are all important factors affecting the implementation of the UNHS.",2016-08-29 +27775165,Predicting Prostate Cancer Recurrence After Radical Prostatectomy.,"

Background

Prostate cancer prognosis is variable, and management decisions involve balancing patients' risks of recurrence and recurrence-free death. Moreover, the roles of body mass index (BMI) and race in risk of recurrence are controversial [1,2]. To address these issues, we developed and cross-validated RAPS (Risks After Prostate Surgery), a personal prediction model for biochemical recurrence (BCR) within 10 years of radical prostatectomy (RP) that includes BMI and race as possible predictors, and recurrence-free death as a competing risk.

Methods

RAPS uses a patient's risk factors at surgery to assign him a recurrence probability based on statistical learning methods applied to a cohort of 1,276 patients undergoing RP at the University of Pennsylvania. We compared the performance of RAPS to that of an existing model with respect to calibration (by comparing observed and predicted outcomes), and discrimination (using the area under the receiver operating characteristic curve (AUC)).

Results

RAPS' cross-validated BCR predictions provided better calibration than those of an existing model that underestimated patients' risks. Discrimination was similar for the two models, with BCR AUCs of 0.793, 95% confidence interval (0.766-0.820) for RAPS, and 0.780 (0.745-0.815) for the existing model. RAPS' most important BCR predictors were tumor grade, preoperative prostate-specific antigen (PSA) level and BMI; race was less important [3]. RAPS' predictions can be obtained online at https://predict.shinyapps.io/raps.

Conclusion

RAPS' cross-validated BCR predictions were better calibrated than those of an existing model, and BMI information contributed substantially to these predictions. RAPS predictions for recurrence-free death were limited by lack of co-morbidity data; however the model provides a simple framework for extension to include such data. Its use and extension should facilitate decision strategies for post-RP prostate cancer management. Prostate 77:291-298, 2017. © 2016 Wiley Periodicals, Inc.",2016-10-24 +24723577,Modeling the time--varying subjective quality of HTTP video streams with rate adaptations.,"Newly developed hypertext transfer protocol (HTTP)-based video streaming technologies enable flexible rate-adaptation under varying channel conditions. Accurately predicting the users' quality of experience (QoE) for rate-adaptive HTTP video streams is thus critical to achieve efficiency. An important aspect of understanding and modeling QoE is predicting the up-to-the-moment subjective quality of a video as it is played, which is difficult due to hysteresis effects and nonlinearities in human behavioral responses. This paper presents a Hammerstein-Wiener model for predicting the time-varying subjective quality (TVSQ) of rate-adaptive videos. To collect data for model parameterization and validation, a database of longer duration videos with time-varying distortions was built and the TVSQs of the videos were measured in a large-scale subjective study. The proposed method is able to reliably predict the TVSQ of rate adaptive videos. Since the Hammerstein-Wiener model has a very simple structure, the proposed method is suitable for online TVSQ prediction in HTTP-based streaming.",2014-05-01 +22813584,Classification of rhodopsin structures by modern methods of structural bioinformatics.,"We report a classification of the crystallographic structures of bovine and squid rhodopsins corresponding to different stages of their photocycles. Using the resource Protein (Structure) Comparison, Knowledge, Similarity, and Information server (ProCKSI, http://www.procksi.net/), selected spatial structures were compared on the basis of classification schemes (dendrograms). To compare the spatial structures of transmembrane proteins, optimal consensus was developed from methods implemented in ProCKSI. Structures were also clustered using principal component analysis, resulting in good agreement with the classification based on the ProCKSI consensus method. Analysis of the results revealed the basic movements of individual transmembrane domains of these proteins that we were able to relate to different stages of the photoactivation of rhodopsin. A combination of methods identified in this study can be used as an up-to-date analytical tool to study the conformational dynamics of membrane receptors.",2012-05-01 +27230879,Computing tumor trees from single cells.,Computational methods have been developed to reconstruct evolutionary lineages from tumors using single-cell genomic data. The resulting tumor trees have important applications in cancer research and clinical oncology.Please see related Research articles: http://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0929-9 and http://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0936-x .,2016-05-26 +23163785,Binomial probability distribution model-based protein identification algorithm for tandem mass spectrometry utilizing peak intensity information.,"Mass spectrometry has become one of the most important technologies in proteomic analysis. Tandem mass spectrometry (LC-MS/MS) is a major tool for the analysis of peptide mixtures from protein samples. The key step of MS data processing is the identification of peptides from experimental spectra by searching public sequence databases. Although a number of algorithms to identify peptides from MS/MS data have been already proposed, e.g. Sequest, OMSSA, X!Tandem, Mascot, etc., they are mainly based on statistical models considering only peak-matches between experimental and theoretical spectra, but not peak intensity information. Moreover, different algorithms gave different results from the same MS data, implying their probable incompleteness and questionable reproducibility. We developed a novel peptide identification algorithm, ProVerB, based on a binomial probability distribution model of protein tandem mass spectrometry combined with a new scoring function, making full use of peak intensity information and, thus, enhancing the ability of identification. Compared with Mascot, Sequest, and SQID, ProVerB identified significantly more peptides from LC-MS/MS data sets than the current algorithms at 1% False Discovery Rate (FDR) and provided more confident peptide identifications. ProVerB is also compatible with various platforms and experimental data sets, showing its robustness and versatility. The open-source program ProVerB is available at http://bioinformatics.jnu.edu.cn/software/proverb/ .",2012-11-29 +25883147,The ReproGenomics Viewer: an integrative cross-species toolbox for the reproductive science community.,"We report the development of the ReproGenomics Viewer (RGV), a multi- and cross-species working environment for the visualization, mining and comparison of published omics data sets for the reproductive science community. The system currently embeds 15 published data sets related to gametogenesis from nine model organisms. Data sets have been curated and conveniently organized into broad categories including biological topics, technologies, species and publications. RGV's modular design for both organisms and genomic tools enables users to upload and compare their data with that from the data sets embedded in the system in a cross-species manner. The RGV is freely available at http://rgv.genouest.org.",2015-04-16 +28065899,Statistical inference of protein structural alignments using information and compression.,"

Motivation

Structural molecular biology depends crucially on computational techniques that compare protein three-dimensional structures and generate structural alignments (the assignment of one-to-one correspondences between subsets of amino acids based on atomic coordinates). Despite its importance, the structural alignment problem has not been formulated, much less solved, in a consistent and reliable way. To overcome these difficulties, we present here a statistical framework for the precise inference of structural alignments, built on the Bayesian and information-theoretic principle of Minimum Message Length (MML). The quality of any alignment is measured by its explanatory power-the amount of lossless compression achieved to explain the protein coordinates using that alignment.

Results

We have implemented this approach in MMLigner , the first program able to infer statistically significant structural alignments. We also demonstrate the reliability of MMLigner 's alignment results when compared with the state of the art. Importantly, MMLigner can also discover different structural alignments of comparable quality, a challenging problem for oligomers and protein complexes.

Availability and implementation

Source code, binaries and an interactive web version are available at http://lcb.infotech.monash.edu.au/mmligner .

Contact

arun.konagurthu@monash.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +29989551,Long-Term Air Pollution Exposure and Amyotrophic Lateral Sclerosis in Netherlands: A Population-based Case-control Study.,"

Background

Recently, there has been increasing evidence that exposure to air pollution is linked to neurodegenerative diseases, but little is known about the association with amyotrophic lateral sclerosis (ALS).

Objectives

We investigated the association between long-term exposure to air pollution and risk of developing ALS.

Methods

A population-based case-control study was conducted in Netherlands from 1 January 2006 to 1 January 2013. Data from 917 ALS patients and 2,662 controls were analyzed. Annual mean air pollution concentrations were assessed by land use regression (LUR) models developed as part of the European Study of Cohorts for Air Pollution Effects (ESCAPE). Exposure estimates included nitrogen oxides (NO2, NOx), particulate matter (PM) with diameters of <2.5 μm (PM2.5), <10 μm (PM10), between 10 μm and 2.5 μm (PMcoarse), and PM2.5 absorbance. We performed conditional logistic regression analysis using two different multivariate models (model 1 adjusted for age, gender, education, smoking status, alcohol use, body mass index, and socioeconomic status; model 2 additionally adjusted for urbanization degree).

Results

Risk of ALS was significantly increased for individuals in the upper exposure quartile of PM2.5 absorbance [OR=1.67; 95% confidence interval (CI): 1.27, 2.18], NO2 (OR=1.74; 95% CI: 1.32, 2.30), and NOx concentrations (OR=1.38; 95% CI: 1.07, 1.77). These results, except for NOx, remained significant after adjusting additionally for urbanization degree.

Conclusions

Based on a large population-based case-control study, we report evidence for the association between long-term exposure to traffic-related air pollution and increased susceptibility to ALS. Our findings further support the necessity for regulatory public health interventions to combat air pollution levels and provide additional insight into the potential pathophysiology of ALS. https://doi.org/10.1289/EHP1115.",2017-09-27 +26714481,Circlator: automated circularization of genome assemblies using long sequencing reads.,"The assembly of DNA sequence data is undergoing a renaissance thanks to emerging technologies capable of producing reads tens of kilobases long. Assembling complete bacterial and small eukaryotic genomes is now possible, but the final step of circularizing sequences remains unsolved. Here we present Circlator, the first tool to automate assembly circularization and produce accurate linear representations of circular sequences. Using Pacific Biosciences and Oxford Nanopore data, Circlator correctly circularized 26 of 27 circularizable sequences, comprising 11 chromosomes and 12 plasmids from bacteria, the apicoplast and mitochondrion of Plasmodium falciparum and a human mitochondrion. Circlator is available at http://sanger-pathogens.github.io/circlator/ .",2015-12-29 +26446133,SIMToolbox: a MATLAB toolbox for structured illumination fluorescence microscopy.,"

Unlabelled

SIMToolbox is an open-source, modular set of functions for MATLAB equipped with a user-friendly graphical interface and designed for processing two-dimensional and three-dimensional data acquired by structured illumination microscopy (SIM). Both optical sectioning and super-resolution applications are supported. The software is also capable of maximum a posteriori probability image estimation (MAP-SIM), an alternative method for reconstruction of structured illumination images. MAP-SIM can potentially reduce reconstruction artifacts, which commonly occur due to refractive index mismatch within the sample and to imperfections in the illumination.

Availability and implementation

SIMToolbox, example data and the online documentation are freely accessible at http://mmtg.fel.cvut.cz/SIMToolbox.

Contact

ghagen@uccs.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-06 +26424727,Fast Dating Using Least-Squares Criteria and Algorithms.,"Phylogenies provide a useful way to understand the evolutionary history of genetic samples, and data sets with more than a thousand taxa are becoming increasingly common, notably with viruses (e.g., human immunodeficiency virus (HIV)). Dating ancestral events is one of the first, essential goals with such data. However, current sophisticated probabilistic approaches struggle to handle data sets of this size. Here, we present very fast dating algorithms, based on a Gaussian model closely related to the Langley-Fitch molecular-clock model. We show that this model is robust to uncorrelated violations of the molecular clock. Our algorithms apply to serial data, where the tips of the tree have been sampled through times. They estimate the substitution rate and the dates of all ancestral nodes. When the input tree is unrooted, they can provide an estimate for the root position, thus representing a new, practical alternative to the standard rooting methods (e.g., midpoint). Our algorithms exploit the tree (recursive) structure of the problem at hand, and the close relationships between least-squares and linear algebra. We distinguish between an unconstrained setting and the case where the temporal precedence constraint (i.e., an ancestral node must be older that its daughter nodes) is accounted for. With rooted trees, the former is solved using linear algebra in linear computing time (i.e., proportional to the number of taxa), while the resolution of the latter, constrained setting, is based on an active-set method that runs in nearly linear time. With unrooted trees the computing time becomes (nearly) quadratic (i.e., proportional to the square of the number of taxa). In all cases, very large input trees (>10,000 taxa) can easily be processed and transformed into time-scaled trees. We compare these algorithms to standard methods (root-to-tip, r8s version of Langley-Fitch method, and BEAST). Using simulated data, we show that their estimation accuracy is similar to that of the most sophisticated methods, while their computing time is much faster. We apply these algorithms on a large data set comprising 1194 strains of Influenza virus from the pdm09 H1N1 Human pandemic. Again the results show that these algorithms provide a very fast alternative with results similar to those of other computer programs. These algorithms are implemented in the LSD software (least-squares dating), which can be downloaded from http://www.atgc-montpellier.fr/LSD/, along with all our data sets and detailed results. An Online Appendix, providing additional algorithm descriptions, tables, and figures can be found in the Supplementary Material available on Dryad at http://dx.doi.org/10.5061/dryad.968t3.",2015-09-30 +26608174,Reads2Type: a web application for rapid microbial taxonomy identification.,"

Background

Identification of bacteria may be based on sequencing and molecular analysis of a specific locus such as 16S rRNA, or a set of loci such as in multilocus sequence typing. In the near future, healthcare institutions and routine diagnostic microbiology laboratories may need to sequence the entire genome of microbial isolates. Therefore we have developed Reads2Type, a web-based tool for taxonomy identification based on whole bacterial genome sequence data.

Results

Raw sequencing data provided by the user are mapped against a set of marker probes that are derived from currently available bacteria complete genomes. Using a dataset of 1003 whole genome sequenced bacteria from various sequencing platforms, Reads2Type was able to identify the species with 99.5 % accuracy and on the minutes time scale.

Conclusions

In comparison with other tools, Reads2Type offers the advantage of not needing to transfer sequencing files, as the entire computational analysis is done on the computer of whom utilizes the web application. This also prevents data privacy issues to arise. The Reads2Type tool is available at http://www.cbs.dtu.dk/~dhany/reads2type.html.",2015-11-25 +23245293,SNPTrack™ : an integrated bioinformatics system for genetic association studies.,"A genetic association study is a complicated process that involves collecting phenotypic data, generating genotypic data, analyzing associations between genotypic and phenotypic data, and interpreting genetic biomarkers identified. SNPTrack is an integrated bioinformatics system developed by the US Food and Drug Administration (FDA) to support the review and analysis of pharmacogenetics data resulting from FDA research or submitted by sponsors. The system integrates data management, analysis, and interpretation in a single platform for genetic association studies. Specifically, it stores genotyping data and single-nucleotide polymorphism (SNP) annotations along with study design data in an Oracle database. It also integrates popular genetic analysis tools, such as PLINK and Haploview. SNPTrack provides genetic analysis capabilities and captures analysis results in its database as SNP lists that can be cross-linked for biological interpretation to gene/protein annotations, Gene Ontology, and pathway analysis data. With SNPTrack, users can do the entire stream of bioinformatics jobs for genetic association studies. SNPTrack is freely available to the public at http://www.fda.gov/ScienceResearch/BioinformaticsTools/SNPTrack/default.htm.",2012-07-05 +27330994,RNA-Seq reveals changes in the Staphylococcus aureus transcriptome following blue light illumination.,"In an effort to better understand the mechanism by which blue light inhibits the growth of Staphylococcus aureus in culture, a whole transcriptome analysis of S. aureus isolate BUSA2288 was performed using RNA-Seq to analyze the differential gene expression in response to blue light exposure. RNA was extracted from S. aureus cultures pooled from 24 1 ml well samples that were each illuminated with a dose of 250 J/cm(2) of 465 nm blue light and from control cultures grown in the dark. Complementary DNA libraries were generated from enriched mRNA samples and sequenced using the Illumina MiSeq Next Generation Sequencer. Here we report one type of analysis that identified 32 candidate genes for further investigation. Blue light has been shown to be bactericidal against S. aureus and is a potential alternative therapy for antibiotic resistant organisms. The mechanism for the inactivation of bacteria is hypothesized to involve reactive oxygen species. These RNA-Seq results provide data that may be used to test this hypothesis. The RNA-Seq data generated by these experiments is deposited in Gene Expression Omnibus (Gene accession GSE62055) and may be found at NCBI (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE62055).",2016-05-26 +23109553,ECMDB: the E. coli Metabolome Database.,"The Escherichia coli Metabolome Database (ECMDB, http://www.ecmdb.ca) is a comprehensively annotated metabolomic database containing detailed information about the metabolome of E. coli (K-12). Modelled closely on the Human and Yeast Metabolome Databases, the ECMDB contains >2600 metabolites with links to ∼1500 different genes and proteins, including enzymes and transporters. The information in the ECMDB has been collected from dozens of textbooks, journal articles and electronic databases. Each metabolite entry in the ECMDB contains an average of 75 separate data fields, including comprehensive compound descriptions, names and synonyms, chemical taxonomy, compound structural and physicochemical data, bacterial growth conditions and substrates, reactions, pathway information, enzyme data, gene/protein sequence data and numerous hyperlinks to images, references and other public databases. The ECMDB also includes an extensive collection of intracellular metabolite concentration data compiled from our own work as well as other published metabolomic studies. This information is further supplemented with thousands of fully assigned reference nuclear magnetic resonance and mass spectrometry spectra obtained from pure E. coli metabolites that we (and others) have collected. Extensive searching, relational querying and data browsing tools are also provided that support text, chemical structure, spectral, molecular weight and gene/protein sequence queries. Because of E. coli's importance as a model organism for biologists and as a biofactory for industry, we believe this kind of database could have considerable appeal not only to metabolomics researchers but also to molecular biologists, systems biologists and individuals in the biotechnology industry.",2012-10-29 +28174599,DEApp: an interactive web interface for differential expression analysis of next generation sequence data.,"

Background

A growing trend in the biomedical community is the use of Next Generation Sequencing (NGS) technologies in genomics research. The complexity of downstream differential expression (DE) analysis is however still challenging, as it requires sufficient computer programing and command-line knowledge. Furthermore, researchers often need to evaluate and visualize interactively the effect of using differential statistical and error models, assess the impact of selecting different parameters and cutoffs, and finally explore the overlapping consensus of cross-validated results obtained with different methods. This represents a bottleneck that slows down or impedes the adoption of NGS technologies in many labs.

Results

We developed DEApp, an interactive and dynamic web application for differential expression analysis of count based NGS data. This application enables models selection, parameter tuning, cross validation and visualization of results in a user-friendly interface.

Conclusions

DEApp enables labs with no access to full time bioinformaticians to exploit the advantages of NGS applications in biomedical research. This application is freely available at https://yanli.shinyapps.io/DEAppand https://gallery.shinyapps.io/DEApp.",2017-02-03 +25950236,"NetworkAnalyst for statistical, visual and network-based meta-analysis of gene expression data.","Meta-analysis of gene expression data sets is increasingly performed to help identify robust molecular signatures and to gain insights into underlying biological processes. The complicated nature of such analyses requires both advanced statistics and innovative visualization strategies to support efficient data comparison, interpretation and hypothesis generation. NetworkAnalyst (http://www.networkanalyst.ca) is a comprehensive web-based tool designed to allow bench researchers to perform various common and complex meta-analyses of gene expression data via an intuitive web interface. By coupling well-established statistical procedures with state-of-the-art data visualization techniques, NetworkAnalyst allows researchers to easily navigate large complex gene expression data sets to determine important features, patterns, functions and connections, thus leading to the generation of new biological hypotheses. This protocol provides a step-wise description of how to effectively use NetworkAnalyst to perform network analysis and visualization from gene lists; to perform meta-analysis on gene expression data while taking into account multiple metadata parameters; and, finally, to perform a meta-analysis of multiple gene expression data sets. NetworkAnalyst is designed to be accessible to biologists rather than to specialist bioinformaticians. The complete protocol can be executed in ∼1.5 h. Compared with other similar web-based tools, NetworkAnalyst offers a unique visual analytics experience that enables data analysis within the context of protein-protein interaction networks, heatmaps or chord diagrams. All of these analysis methods provide the user with supporting statistical and functional evidence.",2015-05-07 +27098035,GIANT API: an application programming interface for functional genomics.,"GIANT API provides biomedical researchers programmatic access to tissue-specific and global networks in humans and model organisms, and associated tools, which includes functional re-prioritization of existing genome-wide association study (GWAS) data. Using tissue-specific interaction networks, researchers are able to predict relationships between genes specific to a tissue or cell lineage, identify the changing roles of genes across tissues and uncover disease-gene associations. Additionally, GIANT API enables computational tools like NetWAS, which leverages tissue-specific networks for re-prioritization of GWAS results. The web services covered by the API include 144 tissue-specific functional gene networks in human, global functional networks for human and six common model organisms and the NetWAS method. GIANT API conforms to the REST architecture, which makes it stateless, cacheable and highly scalable. It can be used by a diverse range of clients including web browsers, command terminals, programming languages and standalone apps for data analysis and visualization. The API is freely available for use at http://giant-api.princeton.edu.",2016-04-20 +23155064,MetalPDB: a database of metal sites in biological macromolecular structures.,"We present here MetalPDB (freely accessible at http://metalweb.cerm.unifi.it), a novel resource aimed at conveying the information available on the three-dimensional (3D) structures of metal-binding biological macromolecules in a consistent and effective manner. This is achieved through the systematic and automated representation of metal-binding sites in proteins and nucleic acids by way of Minimal Functional Sites (MFSs). MFSs are 3D templates that describe the local environment around the metal(s) independently of the larger context of the macromolecular structure embedding the site(s), and are the central objects of MetalPDB design. MFSs are grouped into equistructural (broadly defined as sites found in corresponding positions in similar structures) and equivalent sites (equistructural sites that contain the same metals), allowing users to easily analyse similarities and variations in metal-macromolecule interactions, and to link them to functional information. The web interface of MetalPDB allows access to a comprehensive overview of metal-containing biological structures, providing a basis to investigate the basic principles governing the properties of these systems. MetalPDB is updated monthly in an automated manner.",2012-11-15 +27354696,pSumo-CD: predicting sumoylation sites in proteins with covariance discriminant algorithm by incorporating sequence-coupled effects into general PseAAC.,"

Motivation

Sumoylation is a post-translational modification (PTM) process, in which small ubiquitin-related modifier (SUMO) is attaching by covalent bonds to substrate protein. It is critical to many different biological processes such as replicating genome, expressing gene, localizing and stabilizing proteins; unfortunately, it is also involved with many major disorders including Alzheimer's and Parkinson's diseases. Therefore, for both basic research and drug development, it is important to identify the sumoylation sites in proteins.

Results

To address such a problem, we developed a predictor called pSumo-CD by incorporating the sequence-coupled information into the general pseudo-amino acid composition (PseAAC) and introducing the covariance discriminant (CD) algorithm, in which a bias-adjustment term, which has the function to automatically adjust the errors caused by the bias due to the imbalance of training data, had been incorporated. Rigorous cross-validations indicated that the new predictor remarkably outperformed the existing state-of-the-art prediction method for the same purpose.

Availability and implementation

For the convenience of most experimental scientists, a user-friendly web-server for pSumo-CD has been established at http://www.jci-bioinfo.cn/pSumo-CD, by which users can easily obtain their desired results without the need to go through the complicated mathematical equations involved.

Contact

jjia@gordonlifescience.org, xxiao@gordonlifescience.org or kcchou@gordonlifescience.orgSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-26 +23822816,Large-scale integrative network-based analysis identifies common pathways disrupted by copy number alterations across cancers.,"

Background

Many large-scale studies analyzed high-throughput genomic data to identify altered pathways essential to the development and progression of specific types of cancer. However, no previous study has been extended to provide a comprehensive analysis of pathways disrupted by copy number alterations across different human cancers. Towards this goal, we propose a network-based method to integrate copy number alteration data with human protein-protein interaction networks and pathway databases to identify pathways that are commonly disrupted in many different types of cancer.

Results

We applied our approach to a data set of 2,172 cancer patients across 16 different types of cancers, and discovered a set of commonly disrupted pathways, which are likely essential for tumor formation in majority of the cancers. We also identified pathways that are only disrupted in specific cancer types, providing molecular markers for different human cancers. Analysis with independent microarray gene expression datasets confirms that the commonly disrupted pathways can be used to identify patient subgroups with significantly different survival outcomes. We also provide a network view of disrupted pathways to explain how copy number alterations affect pathways that regulate cell growth, cycle, and differentiation for tumorigenesis.

Conclusions

In this work, we demonstrated that the network-based integrative analysis can help to identify pathways disrupted by copy number alterations across 16 types of human cancers, which are not readily identifiable by conventional overrepresentation-based and other pathway-based methods. All the results and source code are available at http://compbio.cs.umn.edu/NetPathID/.",2013-07-03 +25025376,tropiTree: an NGS-based EST-SSR resource for 24 tropical tree species.,"The development of genetic tools for non-model organisms has been hampered by cost, but advances in next-generation sequencing (NGS) have created new opportunities. In ecological research, this raises the prospect for developing molecular markers to simultaneously study important genetic processes such as gene flow in multiple non-model plant species within complex natural and anthropogenic landscapes. Here, we report the use of bar-coded multiplexed paired-end Illumina NGS for the de novo development of expressed sequence tag-derived simple sequence repeat (EST-SSR) markers at low cost for a range of 24 tree species. Each chosen tree species is important in complex tropical agroforestry systems where little is currently known about many genetic processes. An average of more than 5,000 EST-SSRs was identified for each of the 24 sequenced species, whereas prior to analysis 20 of the species had fewer than 100 nucleotide sequence citations. To make results available to potential users in a suitable format, we have developed an open-access, interactive online database, tropiTree (http://bioinf.hutton.ac.uk/tropiTree), which has a range of visualisation and search facilities, and which is a model for the efficient presentation and application of NGS data.",2014-07-15 +26801960,Structured sparse canonical correlation analysis for brain imaging genetics: an improved GraphNet method.,"

Motivation

Structured sparse canonical correlation analysis (SCCA) models have been used to identify imaging genetic associations. These models either use group lasso or graph-guided fused lasso to conduct feature selection and feature grouping simultaneously. The group lasso based methods require prior knowledge to define the groups, which limits the capability when prior knowledge is incomplete or unavailable. The graph-guided methods overcome this drawback by using the sample correlation to define the constraint. However, they are sensitive to the sign of the sample correlation, which could introduce undesirable bias if the sign is wrongly estimated.

Results

We introduce a novel SCCA model with a new penalty, and develop an efficient optimization algorithm. Our method has a strong upper bound for the grouping effect for both positively and negatively correlated features. We show that our method performs better than or equally to three competing SCCA models on both synthetic and real data. In particular, our method identifies stronger canonical correlations and better canonical loading patterns, showing its promise for revealing interesting imaging genetic associations.

Availability and implementation

The Matlab code and sample data are freely available at http://www.iu.edu/∼shenlab/tools/angscca/

Contact

shenli@iu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-21 +28077696,A New Development in Trypanosoma cruzi Detection.,"Chagas disease is caused by the parasite Trypanosoma cruzi and is an important cause of morbidity and mortality in areas of Latin America where Chagas disease is endemic and among infected individuals who have migrated to nonendemic areas of North America and Europe. There are many diagnostic tests that are employed in the serological diagnosis of this infection. In this issue of the Journal of Clinical Microbiology, Bautista-López et al. provide characterization of excretory vesicles (EVs) from Vero cells infected with T. cruzi and provide data on the EVs produced by trypomastigotes and amastigotes (N. L. Bautista-López et al., J Clin Microbiol 55:744-758, 2017, https://doi.org/10.1128/JCM.01649-16). Their proteomic study defines potential targets to evaluate for improved diagnostic tests, effects on host cell biology that contribute to the pathogenesis of infection, and vaccine candidates. If any of the EV-associated proteins identified were to be correlated to cure of infection, this would be a major advance.",2017-01-11 +26800861,"EchinoDB, an application for comparative transcriptomics of deeply-sampled clades of echinoderms.","

Background

One of our goals for the echinoderm tree of life project (http://echinotol.org) is to identify orthologs suitable for phylogenetic analysis from next-generation transcriptome data. The current dataset is the largest assembled for echinoderm phylogeny and transcriptomics. We used RNA-Seq to profile adult tissues from 42 echinoderm specimens from 24 orders and 37 families. In order to achieve sampling members of clades that span key evolutionary divergence, many of our exemplars were collected from deep and polar seas.

Description

A small fraction of the transcriptome data we produced is being used for phylogenetic reconstruction. Thus to make a larger dataset available to researchers with a wide variety of interests, we made a web-based application, EchinoDB (http://echinodb.uncc.edu). EchinoDB is a repository of orthologous transcripts from echinoderms that is searchable via keywords and sequence similarity.

Conclusions

From transcripts we identified 749,397 clusters of orthologous loci. We have developed the information technology to manage and search the loci their annotations with respect to the Sea Urchin (Strongylocentrotus purpuratus) genome. Several users have already taken advantage of these data for spin-off projects in developmental biology, gene family studies, and neuroscience. We hope others will search EchinoDB to discover datasets relevant to a variety of additional questions in comparative biology.",2016-01-22 +23566368,QSAR as a random event: modeling of nanoparticles uptake in PaCa2 cancer cells.,"Quantitative structure-property/activity relationships (QSPRs/QSARs) are a tool to predict various endpoints for various substances. The ""classic"" QSPR/QSAR analysis is based on the representation of the molecular structure by the molecular graph. However, simplified molecular input-line entry system (SMILES) gradually becomes most popular representation of the molecular structure in the databases available on the Internet. Under such circumstances, the development of molecular descriptors calculated directly from SMILES becomes attractive alternative to ""classic"" descriptors. The CORAL software (http://www.insilico.eu/coral) is provider of SMILES-based optimal molecular descriptors which are aimed to correlate with various endpoints. We analyzed data set on nanoparticles uptake in PaCa2 pancreatic cancer cells. The data set includes 109 nanoparticles with the same core but different surface modifiers (small organic molecules). The concept of a QSAR as a random event is suggested in opposition to ""classic"" QSARs which are based on the only one distribution of available data into the training and the validation sets. In other words, five random splits into the ""visible"" training set and the ""invisible"" validation set were examined. The SMILES-based optimal descriptors (obtained by the Monte Carlo technique) for these splits are calculated with the CORAL software. The statistical quality of all these models is good.",2013-04-06 +27459855,Identification of independent association signals and putative functional variants for breast cancer risk through fine-scale mapping of the 12p11 locus.,"

Background

Multiple recent genome-wide association studies (GWAS) have identified a single nucleotide polymorphism (SNP), rs10771399, at 12p11 that is associated with breast cancer risk.

Method

We performed a fine-scale mapping study of a 700 kb region including 441 genotyped and more than 1300 imputed genetic variants in 48,155 cases and 43,612 controls of European descent, 6269 cases and 6624 controls of East Asian descent and 1116 cases and 932 controls of African descent in the Breast Cancer Association Consortium (BCAC; http://bcac.ccge.medschl.cam.ac.uk/ ), and in 15,252 BRCA1 mutation carriers in the Consortium of Investigators of Modifiers of BRCA1/2 (CIMBA). Stepwise regression analyses were performed to identify independent association signals. Data from the Encyclopedia of DNA Elements project (ENCODE) and the Cancer Genome Atlas (TCGA) were used for functional annotation.

Results

Analysis of data from European descendants found evidence for four independent association signals at 12p11, represented by rs7297051 (odds ratio (OR) = 1.09, 95 % confidence interval (CI) = 1.06-1.12; P = 3 × 10(-9)), rs805510 (OR = 1.08, 95 % CI = 1.04-1.12, P = 2 × 10(-5)), and rs1871152 (OR = 1.04, 95 % CI = 1.02-1.06; P = 2 × 10(-4)) identified in the general populations, and rs113824616 (P = 7 × 10(-5)) identified in the meta-analysis of BCAC ER-negative cases and BRCA1 mutation carriers. SNPs rs7297051, rs805510 and rs113824616 were also associated with breast cancer risk at P < 0.05 in East Asians, but none of the associations were statistically significant in African descendants. Multiple candidate functional variants are located in putative enhancer sequences. Chromatin interaction data suggested that PTHLH was the likely target gene of these enhancers. Of the six variants with the strongest evidence of potential functionality, rs11049453 was statistically significantly associated with the expression of PTHLH and its nearby gene CCDC91 at P < 0.05.

Conclusion

This study identified four independent association signals at 12p11 and revealed potentially functional variants, providing additional insights into the underlying biological mechanism(s) for the association observed between variants at 12p11 and breast cancer risk.",2016-06-21 +28113627,Reduced-Reference Quality Assessment Based on the Entropy of DWT Coefficients of Locally Weighted Gradient Magnitudes.,"Perceptual image quality assessment (IQA) attempts to use computational models to estimate the image quality in accordance with subjective evaluations. Reduced-reference (RR) image quality assessment (IQA) methods make use of partial information or features extracted from the reference image for estimating the quality of distorted images. Finding a balance between the number of RR features and accuracy of the estimated image quality is essential and important in IQA. In this paper we propose a training-free low-cost RRIQA method that requires a very small number of RR features (6 RR features). The proposed RRIQA algorithm is based on the discrete wavelet transform (DWT) of locally weighted gradient magnitudes.We apply human visual system's contrast sensitivity and neighborhood gradient information to weight the gradient magnitudes in a locally adaptive manner. The RR features are computed by measuring the entropy of each DWT subband, for each scale, and pooling the subband entropies along all orientations, resulting in L RR features (one average entropy per scale) for an L-level DWT. Extensive experiments performed on seven large-scale benchmark databases demonstrate that the proposed RRIQA method delivers highly competitive performance as compared to the state-of-the-art RRIQA models as well as full reference ones for both natural and texture images. The MATLAB source code of REDLOG and the evaluation results are publicly available online at https://http://lab.engineering.asu.edu/ivulab/software/redlog/.",2016-08-24 +26662617,Three-Month Real-Time Dengue Forecast Models: An Early Warning System for Outbreak Alerts and Policy Decision Support in Singapore.,"

Background

With its tropical rainforest climate, rapid urbanization, and changing demography and ecology, Singapore experiences endemic dengue; the last large outbreak in 2013 culminated in 22,170 cases. In the absence of a vaccine on the market, vector control is the key approach for prevention.

Objectives

We sought to forecast the evolution of dengue epidemics in Singapore to provide early warning of outbreaks and to facilitate the public health response to moderate an impending outbreak.

Methods

We developed a set of statistical models using least absolute shrinkage and selection operator (LASSO) methods to forecast the weekly incidence of dengue notifications over a 3-month time horizon. This forecasting tool used a variety of data streams and was updated weekly, including recent case data, meteorological data, vector surveillance data, and population-based national statistics. The forecasting methodology was compared with alternative approaches that have been proposed to model dengue case data (seasonal autoregressive integrated moving average and step-down linear regression) by fielding them on the 2013 dengue epidemic, the largest on record in Singapore.

Results

Operationally useful forecasts were obtained at a 3-month lag using the LASSO-derived models. Based on the mean average percentage error, the LASSO approach provided more accurate forecasts than the other methods we assessed. We demonstrate its utility in Singapore's dengue control program by providing a forecast of the 2013 outbreak for advance preparation of outbreak response.

Conclusions

Statistical models built using machine learning methods such as LASSO have the potential to markedly improve forecasting techniques for recurrent infectious disease outbreaks such as dengue.

Citation

Shi Y, Liu X, Kok SY, Rajarethinam J, Liang S, Yap G, Chong CS, Lee KS, Tan SS, Chin CK, Lo A, Kong W, Ng LC, Cook AR. 2016. Three-month real-time dengue forecast models: an early warning system for outbreak alerts and policy decision support in Singapore. Environ Health Perspect 124:1369-1375; http://dx.doi.org/10.1289/ehp.1509981.",2015-12-11 +28164797,An introduction to mixture item response theory models.,"Mixture item response theory (IRT) allows one to address situations that involve a mixture of latent subpopulations that are qualitatively different but within which a measurement model based on a continuous latent variable holds. In this modeling framework, one can characterize students by both their location on a continuous latent variable as well as by their latent class membership. For example, in a study of risky youth behavior this approach would make it possible to estimate an individual's propensity to engage in risky youth behavior (i.e., on a continuous scale) and to use these estimates to identify youth who might be at the greatest risk given their class membership. Mixture IRT can be used with binary response data (e.g., true/false, agree/disagree, endorsement/not endorsement, correct/incorrect, presence/absence of a behavior), Likert response scales, partial correct scoring, nominal scales, or rating scales. In the following, we present mixture IRT modeling and two examples of its use. Data needed to reproduce analyses in this article are available as supplemental online materials at http://dx.doi.org/10.1016/j.jsp.2016.01.002.",2016-04-16 +28705239,"NAP: The Network Analysis Profiler, a web tool for easier topological analysis and comparison of medium-scale biological networks.","

Objective

Nowadays, due to the technological advances of high-throughput techniques, Systems Biology has seen a tremendous growth of data generation. With network analysis, looking at biological systems at a higher level in order to better understand a system, its topology and the relationships between its components is of a great importance. Gene expression, signal transduction, protein/chemical interactions, biomedical literature co-occurrences, are few of the examples captured in biological network representations where nodes represent certain bioentities and edges represent the connections between them. Today, many tools for network visualization and analysis are available. Nevertheless, most of them are standalone applications that often (i) burden users with computing and calculation time depending on the network's size and (ii) focus on handling, editing and exploring a network interactively. While such functionality is of great importance, limited efforts have been made towards the comparison of the topological analysis of multiple networks.

Results

Network Analysis Provider (NAP) is a comprehensive web tool to automate network profiling and intra/inter-network topology comparison. It is designed to bridge the gap between network analysis, statistics, graph theory and partially visualization in a user-friendly way. It is freely available and aims to become a very appealing tool for the broader community. It hosts a great plethora of topological analysis methods such as node and edge rankings. Few of its powerful characteristics are: its ability to enable easy profile comparisons across multiple networks, find their intersection and provide users with simplified, high quality plots of any of the offered topological characteristics against any other within the same network. It is written in R and Shiny, it is based on the igraph library and it is able to handle medium-scale weighted/unweighted, directed/undirected and bipartite graphs. NAP is available at http://bioinformatics.med.uoc.gr/NAP .",2017-07-14 +25075616,SNP@lincTFBS: an integrated database of polymorphisms in human LincRNA transcription factor binding sites.,"Large intergenic non-coding RNAs (lincRNAs) are a new class of functional transcripts, and aberrant expression of lincRNAs was associated with several human diseases. The genetic variants in lincRNA transcription factor binding sites (TFBSs) can change lincRNA expression, thereby affecting the susceptibility to human diseases. To identify and annotate these functional candidates, we have developed a database SNP@lincTFBS, which is devoted to the exploration and annotation of single nucleotide polymorphisms (SNPs) in potential TFBSs of human lincRNAs. We identified 6,665 SNPs in 6,614 conserved TFBSs of 2,423 human lincRNAs. In addition, with ChIPSeq dataset, we identified 139,576 SNPs in 304,517 transcription factor peaks of 4,813 lincRNAs. We also performed comprehensive annotation for these SNPs using 1000 Genomes Project datasets across 11 populations. Moreover, one of the distinctive features of SNP@lincTFBS is the collection of disease-associated SNPs in the lincRNA TFBSs and SNPs in the TFBSs of disease-associated lincRNAs. The web interface enables both flexible data searches and downloads. Quick search can be query of lincRNA name, SNP identifier, or transcription factor name. SNP@lincTFBS provides significant advances in identification of disease-associated lincRNA variants and improved convenience to interpret the discrepant expression of lincRNAs. The SNP@lincTFBS database is available at http://bioinfo.hrbmu.edu.cn/SNP_lincTFBS.",2014-07-30 +27601976,TS-EUROTRAIN: A European-Wide Investigation and Training Network on the Etiology and Pathophysiology of Gilles de la Tourette Syndrome.,"Gilles de la Tourette Syndrome (GTS) is characterized by the presence of multiple motor and phonic tics with a fluctuating course of intensity, frequency, and severity. Up to 90% of patients with GTS present with comorbid conditions, most commonly attention-deficit/hyperactivity disorder (ADHD), and obsessive-compulsive disorder (OCD), thus providing an excellent model for the exploration of shared etiology across disorders. TS-EUROTRAIN (FP7-PEOPLE-2012-ITN, Grant Agr.No. 316978) is a Marie Curie Initial Training Network (http://ts-eurotrain.eu) that aims to elucidate the complex etiology of the onset and clinical course of GTS, investigate the neurobiological underpinnings of GTS and related disorders, translate research findings into clinical applications, and establish a pan-European infrastructure for the study of GTS. This includes the challenges of (i) assembling a large genetic database for the evaluation of the genetic architecture with high statistical power; (ii) exploring the role of gene-environment interactions including the effects of epigenetic phenomena; (iii) employing endophenotype-based approaches to understand the shared etiology between GTS, OCD, and ADHD; (iv) establishing a developmental animal model for GTS; (v) gaining new insights into the neurobiological mechanisms of GTS via cross-sectional and longitudinal neuroimaging studies; and (vi) partaking in outreach activities including the dissemination of scientific knowledge about GTS to the public. Fifteen partners from academia and industry and 12 PhD candidates pursue the project. Here, we aim to share the design of an interdisciplinary project, showcasing the potential of large-scale collaborative efforts in the field of GTS. Our ultimate aims are to elucidate the complex etiology and neurobiological underpinnings of GTS, translate research findings into clinical applications, and establish Pan-European infrastructure for the study of GTS and associated disorders.",2016-08-23 +27025440,GenoMatrix: A Software Package for Pedigree-Based and Genomic Prediction Analyses on Complex Traits.,"Genomic and pedigree-based best linear unbiased prediction methodologies (G-BLUP and P-BLUP) have proven themselves efficient for partitioning the phenotypic variance of complex traits into its components, estimating the individuals' genetic merits, and predicting unobserved (or yet-to-be observed) phenotypes in many species and fields of study. The GenoMatrix software, presented here, is a user-friendly package to facilitate the process of using genome-wide marker data and parentage information for G-BLUP and P-BLUP analyses on complex traits. It provides users with a collection of applications which help them on a set of tasks from performing quality control on data to constructing and manipulating the genomic and pedigree-based relationship matrices and obtaining their inverses. Such matrices will be then used in downstream analyses by other statistical packages. The package also enables users to obtain predicted values for unobserved individuals based on the genetic values of observed related individuals. GenoMatrix is available to the research community as a Windows 64bit executable and can be downloaded free of charge at: http://compbio.ufl.edu/software/genomatrix/.",2016-03-29 +24759728,Sample sequencing of vascular plants demonstrates widespread conservation and divergence of microRNAs.,"Small RNAs are pivotal regulators of gene expression that guide transcriptional and post-transcriptional silencing mechanisms in eukaryotes, including plants. Here we report a comprehensive atlas of sRNA and miRNA from 3 species of algae and 31 representative species across vascular plants, including non-model plants. We sequence and quantify sRNAs from 99 different tissues or treatments across species, resulting in a data set of over 132 million distinct sequences. Using miRBase mature sequences as a reference, we identify the miRNA sequences present in these libraries. We apply diverse profiling methods to examine critical sRNA and miRNA features, such as size distribution, tissue-specific regulation and sequence conservation between species, as well as to predict putative new miRNA sequences. We also develop database resources, computational analysis tools and a dedicated website, http://smallrna.udel.edu/. This study provides new insights on plant sRNAs and miRNAs, and a foundation for future studies.",2014-04-23 +27272119,FamPipe: An Automatic Analysis Pipeline for Analyzing Sequencing Data in Families for Disease Studies.,"In disease studies, family-based designs have become an attractive approach to analyzing next-generation sequencing (NGS) data for the identification of rare mutations enriched in families. Substantial research effort has been devoted to developing pipelines for automating sequence alignment, variant calling, and annotation. However, fewer pipelines have been designed specifically for disease studies. Most of the current analysis pipelines for family-based disease studies using NGS data focus on a specific function, such as identifying variants with Mendelian inheritance or identifying shared chromosomal regions among affected family members. Consequently, some other useful family-based analysis tools, such as imputation, linkage, and association tools, have yet to be integrated and automated. We developed FamPipe, a comprehensive analysis pipeline, which includes several family-specific analysis modules, including the identification of shared chromosomal regions among affected family members, prioritizing variants assuming a disease model, imputation of untyped variants, and linkage and association tests. We used simulation studies to compare properties of some modules implemented in FamPipe, and based on the results, we provided suggestions for the selection of modules to achieve an optimal analysis strategy. The pipeline is under the GNU GPL License and can be downloaded for free at http://fampipe.sourceforge.net.",2016-06-06 +27589091,Maths anxiety and medication dosage calculation errors: A scoping review.,"A student's accuracy on drug calculation tests may be influenced by maths anxiety, which can impede one's ability to understand and complete mathematic problems. It is important for healthcare students to overcome this barrier when calculating drug dosages in order to avoid administering the incorrect dose to a patient when in the clinical setting. The aim of this study was to examine the effects of maths anxiety on healthcare students' ability to accurately calculate drug dosages by performing a scoping review of the existing literature. This review utilised a six-stage methodology using the following databases; CINAHL, Embase, Medline, Scopus, PsycINFO, Google Scholar, Trip database (http://www.tripdatabase.com/) and Grey Literature report (http://www.greylit.org/). After an initial title/abstract review of relevant papers, and then full text review of the remaining papers, six articles were selected for inclusion in this study. Of the six articles included, there were three experimental studies, two quantitative studies and one mixed method study. All studies addressed nursing students and the presence of maths anxiety. No relevant studies from other disciplines were identified in the existing literature. Three studies took place in the U.S, the remainder in Canada, Australia and United Kingdom. Upon analysis of these studies, four factors including maths anxiety were identified as having an influence on a student's drug dosage calculation abilities. Ultimately, the results from this review suggest more research is required in nursing and other relevant healthcare disciplines regarding the effects of maths anxiety on drug dosage calculations. This additional knowledge will be important to further inform development of strategies to decrease the potentially serious effects of errors in drug dosage calculation to patient safety.",2016-08-22 +23496976,AUREA: an open-source software system for accurate and user-friendly identification of relative expression molecular signatures.,"

Background

Public databases such as the NCBI Gene Expression Omnibus contain extensive and exponentially increasing amounts of high-throughput data that can be applied to molecular phenotype characterization. Collectively, these data can be analyzed for such purposes as disease diagnosis or phenotype classification. One family of algorithms that has proven useful for disease classification is based on relative expression analysis and includes the Top-Scoring Pair (TSP), k-Top-Scoring Pairs (k-TSP), Top-Scoring Triplet (TST) and Differential Rank Conservation (DIRAC) algorithms. These relative expression analysis algorithms hold significant advantages for identifying interpretable molecular signatures for disease classification, and have been implemented previously on a variety of computational platforms with varying degrees of usability. To increase the user-base and maximize the utility of these methods, we developed the program AUREA (Adaptive Unified Relative Expression Analyzer)-a cross-platform tool that has a consistent application programming interface (API), an easy-to-use graphical user interface (GUI), fast running times and automated parameter discovery.

Results

Herein, we describe AUREA, an efficient, cohesive, and user-friendly open-source software system that comprises a suite of methods for relative expression analysis. AUREA incorporates existing methods, while extending their capabilities and bringing uniformity to their interfaces. We demonstrate that combining these algorithms and adaptively tuning parameters on the training sets makes these algorithms more consistent in their performance and demonstrate the effectiveness of our adaptive parameter tuner by comparing accuracy across diverse datasets.

Conclusions

We have integrated several relative expression analysis algorithms and provided a unified interface for their implementation while making data acquisition, parameter fixing, data merging, and results analysis 'point-and-click' simple. The unified interface and the adaptive parameter tuning of AUREA provide an effective framework in which to investigate the massive amounts of publically available data by both 'in silico' and 'bench' scientists. AUREA can be found at http://price.systemsbiology.net/AUREA/.",2013-03-05 +23311574,MAPI: a software framework for distributed biomedical applications.,"

Unlabelled

Background

The amount of web-based resources (databases, tools etc.) in biomedicine has increased, but the integrated usage of those resources is complex due to differences in access protocols and data formats. However, distributed data processing is becoming inevitable in several domains, in particular in biomedicine, where researchers face rapidly increasing data sizes. This big data is difficult to process locally because of the large processing, memory and storage capacity required.

Results

This manuscript describes a framework, called MAPI, which provides a uniform representation of resources available over the Internet, in particular for Web Services. The framework enhances their interoperability and collaborative use by enabling a uniform and remote access. The framework functionality is organized in modules that can be combined and configured in different ways to fulfil concrete development requirements.

Conclusions

The framework has been tested in the biomedical application domain where it has been a base for developing several clients that are able to integrate different web resources. The MAPI binaries and documentation are freely available at http://www.bitlab-es.com/mapi under the Creative Commons Attribution-No Derivative Works 2.5 Spain License. The MAPI source code is available by request (GPL v3 license).",2013-01-11 +28728139,A Longitudinal Analysis of the Influence of the Neighborhood Environment on Recreational Walking within the Neighborhood: Results from RESIDE.,"

Background

There is limited longitudinal evidence confirming the role of neighborhood environment attributes in encouraging people to walk more or if active people simply choose to live in activity-friendly neighborhoods. Natural experiments of policy changes to create more walkable communities provide stronger evidence for a causal effect of neighborhood environments on residents' walking.

Objectives

We aimed to investigate longitudinal associations between objective and perceived neighborhood environment measures and neighborhood recreational walking.

Methods

We analyzed longitudinal data collected over 8 yr (four surveys) from the RESIDential Environments (RESIDE) Study (Perth, Australia, 2003-2012). At each time point, participants reported the frequency and total minutes of recreational walking/week within their neighborhood and neighborhood environment perceptions. Objective measures of the neighborhood environment were generated using a Geographic Information System (GIS).

Results

Local recreational walking was influenced by objectively measured access to a medium-/large-size park, beach access, and higher street connectivity, which was reduced when adjusted for neighborhood perceptions. In adjusted models, positive perceptions of access to a park and beach, higher street connectivity, neighborhood esthetics, and safety from crime were independent determinants of increased neighborhood recreational walking. Local recreational walking increased by 9 min/wk (12% increase in frequency) for each additional perceived neighborhood attribute present.

Conclusions

Our findings provide urban planners and policy makers with stronger causal evidence of the positive impact of well-connected neighborhoods and access to local parks of varying sizes on local residents' recreational walking and health. https://doi.org/10.1289/EHP823.",2017-07-12 +27131783,Candidate gene prioritization with Endeavour.,"Genomic studies and high-throughput experiments often produce large lists of candidate genes among which only a small fraction are truly relevant to the disease, phenotype or biological process of interest. Gene prioritization tackles this problem by ranking candidate genes by profiling candidates across multiple genomic data sources and integrating this heterogeneous information into a global ranking. We describe an extended version of our gene prioritization method, Endeavour, now available for six species and integrating 75 data sources. The performance (Area Under the Curve) of Endeavour on cross-validation benchmarks using 'gold standard' gene sets varies from 88% (for human phenotypes) to 95% (for worm gene function). In addition, we have also validated our approach using a time-stamped benchmark derived from the Human Phenotype Ontology, which provides a setting close to prospective validation. With this benchmark, using 3854 novel gene-phenotype associations, we observe a performance of 82%. Altogether, our results indicate that this extended version of Endeavour efficiently prioritizes candidate genes. The Endeavour web server is freely available at https://endeavour.esat.kuleuven.be/.",2016-04-30 +27334001,multiDE: a dimension reduced model based statistical method for differential expression analysis using RNA-sequencing data with multiple treatment conditions.,"

Background

The growing complexity of biological experiment design based on high-throughput RNA sequencing (RNA-seq) is calling for more accommodative statistical tools. We focus on differential expression (DE) analysis using RNA-seq data in the presence of multiple treatment conditions.

Results

We propose a novel method, multiDE, for facilitating DE analysis using RNA-seq read count data with multiple treatment conditions. The read count is assumed to follow a log-linear model incorporating two factors (i.e., condition and gene), where an interaction term is used to quantify the association between gene and condition. The number of the degrees of freedom is reduced to one through the first order decomposition of the interaction, leading to a dramatically power improvement in testing DE genes when the number of conditions is greater than two. In our simulation situations, multiDE outperformed the benchmark methods (i.e. edgeR and DESeq2) even if the underlying model was severely misspecified, and the power gain was increasing in the number of conditions. In the application to two real datasets, multiDE identified more biologically meaningful DE genes than the benchmark methods. An R package implementing multiDE is available publicly at http://homepage.fudan.edu.cn/zhangh/softwares/multiDE .

Conclusions

When the number of conditions is two, multiDE performs comparably with the benchmark methods. When the number of conditions is greater than two, multiDE outperforms the benchmark methods.",2016-06-22 +27143047,Elevated AQP1 Expression Is Associated With Unfavorable Oncologic Outcome in Patients With Hilar Cholangiocarcinoma.,"

Background

Hilar cholangiocarcinomas are malignant tumors with a poor prognosis. An early prediction of prognosis for patients may help us determine treatment strategies. Aquaporin 1 is a cell membrane channel involved in water transport, cell motility, and proliferation. Increasing evidences showed that aquaporin 1 played a role in tumor prognosis and diagnosis. The purpose of this study is to evaluate the role of aquaporin 1 in hilar cholangiocarcinoma.

Methods

Here, we analyzed messenger RNA expression data of genes function as bile secretion in a data set of 169 samples using the R2 bioinformatic platform ( http://r2.amc.nl ). Quantitative polymerase chain reaction was performed to verify the gene expression in 17 hilar cholangiocarcinoma samples. Immunohistochemistry was also performed in a series of specimens from 62 hilar cholangiocarcinoma tissues, and its clinical significance was assessed by clinical correlation and Kaplan-Meier analyses.

Results

All data were analyzed using the R2 web application, aquaporin 1 was selected for further analysis. The significant expression variation of aquaporin 1 among 17 cases with cholangiocarcinoma was also found using quantitative polymerase chain reaction. The expression level of aquaporin 1 protein significantly correlated with tumor-node-metastasis stage ( P = .002) and overall survival time ( P = .010). Higher aquaporin 1 expression indicated poor prognostic outcomes ( P <.05, log-rank test). Multivariate analysis also showed strong aquaporin 1 protein expression was an independent adverse prognosticator in hilar cholangiocarcinoma ( P = .002).

Conclusion

This study highlighted the prognostic value of aquaporin 1 in hilar cholangiocarcinoma. Strong aquaporin 1 expression predicts poor survival, regardless of pathological features. Immunohistochemical detection of aquaporin 1, as a prognostic marker, may contribute to predicting clinical outcome for patients with hilar cholangiocarcinoma.",2016-05-02 +21781326,PromBase: a web resource for various genomic features and predicted promoters in prokaryotic genomes.,"

Background

As more and more genomes are being sequenced, an overview of their genomic features and annotation of their functional elements, which control the expression of each gene or transcription unit of the genome, is a fundamental challenge in genomics and bioinformatics.

Findings

Relative stability of DNA sequence has been used to predict promoter regions in 913 microbial genomic sequences with GC-content ranging from 16.6% to 74.9%. Irrespective of the genome GC-content the relative stability based promoter prediction method has already been proven to be robust in terms of recall and precision. The predicted promoter regions for the 913 microbial genomes have been accumulated in a database called PromBase. Promoter search can be carried out in PromBase either by specifying the gene name or the genomic position. Each predicted promoter region has been assigned to a reliability class (low, medium, high, very high and highest) based on the difference between its average free energy and the downstream region. The recall and precision values for each class are shown graphically in PromBase. In addition, PromBase provides detailed information about base composition, CDS and CG/TA skews for each genome and various DNA sequence dependent structural properties (average free energy, curvature and bendability) in the vicinity of all annotated translation start sites (TLS).

Conclusion

PromBase is a database, which contains predicted promoter regions and detailed analysis of various genomic features for 913 microbial genomes. PromBase can serve as a valuable resource for comparative genomics study and help the experimentalist to rapidly access detailed information on various genomic features and putative promoter regions in any given genome. This database is freely accessible for academic and non- academic users via the worldwide web http://nucleix.mbu.iisc.ernet.in/prombase/.",2011-07-22 +24353116,Antibiotic use for irreversible pulpitis.,"

Background

Irreversible pulpitis, which is characterised by acute and intense pain, is one of the most frequent reasons that patients attend for emergency dental care. Apart from removal of the tooth, the customary way of relieving the pain of irreversible pulpitis is by drilling into the tooth, removing the inflamed pulp (nerve) and cleaning the root canal. However, a significant number of dentists continue to prescribe antibiotics to stop the pain of irreversible pulpitis.

Objectives

To assess the effects of systemic antibiotics for irreversible pulpitis.

Search methods

We searched the Cochrane Oral Health Group's Trials Register (to 5 September 2013); the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library 2013, Issue 9); MEDLINE via OVID (1946 to 5 September 2013); EMBASE via OVID (1980 to 5 September 2013) and the US National Institutes of Health Trials Register (http://clinicaltrials.gov). There were no language restrictions in the searches of the electronic databases.

Selection criteria

Randomised controlled trials which compared pain relief with systemic antibiotics and analgesics, against placebo and analgesics in the acute preoperative phase of irreversible pulpitis.

Data collection and analysis

Two review authors screened studies and extracted data independently. We assessed the quality of the evidence of included studies using GRADEPro software. Pooling of data was not possible and a descriptive summary is presented.

Main results

One trial assessed at low risk of bias, involving 40 participants was included in this update of the review. The quality of the body of evidence was rated low for the different outcomes. There was a close parallel distribution of the pain ratings in both the intervention and placebo groups over the seven-day study period. There was insufficient evidence to claim or refute a benefit for penicillin for pain intensity. There was no significant difference in the mean total number of ibuprofen tablets over the study period: 9.2 (standard deviation (SD) 6.02) in the penicillin group versus 9.6 (SD 6.34) in the placebo group; mean difference -0.40 (95% confidence interval (CI) -4.23 to 3.43; P value = 0.84). This applied equally for the mean total number of Tylenol tablets: 6.9 (SD 6.87) used in the penicillin group versus 4.45 (SD 4.82) in the placebo group; mean difference 2.45 (95% CI -1.23 to 6.13; P value = 0.19). Our secondary outcome on reporting of adverse events was not addressed in this study.

Authors' conclusions

This systematic review which was based on one low powered small sample trial assessed as a low risk of bias, illustrates that there is insufficient evidence to determine whether antibiotics reduce pain or not compared to not having antibiotics. The results of this review confirm the necessity for further larger sample and methodologically sound trials that can provide additional evidence as to whether antibiotics, prescribed in the preoperative phase, can affect treatment outcomes for irreversible pulpitis.",2013-12-19 +28851273,EL_PSSM-RT: DNA-binding residue prediction by integrating ensemble learning with PSSM Relation Transformation.,"

Background

Prediction of DNA-binding residue is important for understanding the protein-DNA recognition mechanism. Many computational methods have been proposed for the prediction, but most of them do not consider the relationships of evolutionary information between residues.

Results

In this paper, we first propose a novel residue encoding method, referred to as the Position Specific Score Matrix (PSSM) Relation Transformation (PSSM-RT), to encode residues by utilizing the relationships of evolutionary information between residues. PDNA-62 and PDNA-224 are used to evaluate PSSM-RT and two existing PSSM encoding methods by five-fold cross-validation. Performance evaluations indicate that PSSM-RT is more effective than previous methods. This validates the point that the relationship of evolutionary information between residues is indeed useful in DNA-binding residue prediction. An ensemble learning classifier (EL_PSSM-RT) is also proposed by combining ensemble learning model and PSSM-RT to better handle the imbalance between binding and non-binding residues in datasets. EL_PSSM-RT is evaluated by five-fold cross-validation using PDNA-62 and PDNA-224 as well as two independent datasets TS-72 and TS-61. Performance comparisons with existing predictors on the four datasets demonstrate that EL_PSSM-RT is the best-performing method among all the predicting methods with improvement between 0.02-0.07 for MCC, 4.18-21.47% for ST and 0.013-0.131 for AUC. Furthermore, we analyze the importance of the pair-relationships extracted by PSSM-RT and the results validates the usefulness of PSSM-RT for encoding DNA-binding residues.

Conclusions

We propose a novel prediction method for the prediction of DNA-binding residue with the inclusion of relationship of evolutionary information and ensemble learning. Performance evaluation shows that the relationship of evolutionary information between residues is indeed useful in DNA-binding residue prediction and ensemble learning can be used to address the data imbalance issue between binding and non-binding residues. A web service of EL_PSSM-RT ( http://hlt.hitsz.edu.cn:8080/PSSM-RT_SVM/ ) is provided for free access to the biological research community.",2017-08-29 +27327771,A Bacterial Analysis Platform: An Integrated System for Analysing Bacterial Whole Genome Sequencing Data for Clinical Diagnostics and Surveillance.,"Recent advances in whole genome sequencing have made the technology available for routine use in microbiological laboratories. However, a major obstacle for using this technology is the availability of simple and automatic bioinformatics tools. Based on previously published and already available web-based tools we developed a single pipeline for batch uploading of whole genome sequencing data from multiple bacterial isolates. The pipeline will automatically identify the bacterial species and, if applicable, assemble the genome, identify the multilocus sequence type, plasmids, virulence genes and antimicrobial resistance genes. A short printable report for each sample will be provided and an Excel spreadsheet containing all the metadata and a summary of the results for all submitted samples can be downloaded. The pipeline was benchmarked using datasets previously used to test the individual services. The reported results enable a rapid overview of the major results, and comparing that to the previously found results showed that the platform is reliable and able to correctly predict the species and find most of the expected genes automatically. In conclusion, a combined bioinformatics platform was developed and made publicly available, providing easy-to-use automated analysis of bacterial whole genome sequencing data. The platform may be of immediate relevance as a guide for investigators using whole genome sequencing for clinical diagnostics and surveillance. The platform is freely available at: https://cge.cbs.dtu.dk/services/CGEpipeline-1.1 and it is the intention that it will continue to be expanded with new features as these become available.",2016-06-21 +27334476,SERAPHIM: studying environmental rasters and phylogenetically informed movements.,"SERAPHIM (""Studying Environmental Rasters and PHylogenetically Informed Movements"") is a suite of computational methods developed to study phylogenetic reconstructions of spatial movement in an environmental context. SERAPHIM extracts the spatio-temporal information contained in estimated phylogenetic trees and uses this information to calculate summary statistics of spatial spread and to visualize dispersal history. Most importantly, SERAPHIM enables users to study the impact of customized environmental variables on the spread of the study organism. Specifically, given an environmental raster, SERAPHIM computes environmental ""weights"" for each phylogeny branch, which represent the degree to which the environmental variable impedes (or facilitates) lineage movement. Correlations between movement duration and these environmental weights are then assessed, and the statistical significances of these correlations are evaluated using null distributions generated by a randomization procedure. SERAPHIM can be applied to any phylogeny whose nodes are annotated with spatial and temporal information. At present, such phylogenies are most often found in the field of emerging infectious diseases, but will become increasingly common in other biological disciplines as population genomic data grows.

Availability and implementation

SERAPHIM 1.0 is freely available from http://evolve.zoo.ox.ac.uk/ R package, source code, example files, tutorials and a manual are also available from this website.

Contact

simon.dellicour@kuleuven.be or oliver.pybus@zoo.ox.ac.ukSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-22 +27541203,Systematic Reviews Published in the April 2016 Issue of the Cochrane Library.,"The Cochrane Library of Systematic Reviews is published quarterly as a DVD and monthly online ( http://www.thecochranelibrary.com ). The April 2016 issue (2nd DVD for 2016) contains 6875 complete reviews, 2417 protocols for reviews in production, and 36,600 short summaries of systematic reviews published in the general medical literature (this short summary database is no longer being updated). In addition, there are citations of 934,000 randomized controlled trials, and 15,700 cited papers in the Cochrane Methodology Register. The Health Technology Assessment database contains some 16,000 citations. One hundred and twenty-nine new reviews have been published in the previous 3 months, of which three have potential relevance for practitioners in pain and palliative medicine. The impact factor of the Cochrane Library stands at 5.939. Readers are encouraged to access the full report for any articles of interest, as only a brief commentary is provided.",2016-08-19 +28627465,Differential proteome analysis of hippocampus and temporal cortex using label-free based 2D-LC-MS/MS.,"Hippocampus and temporal cortex are important brain regions, which play distinct, but complimentary roles in mediating learning and memory. Herein, we utilized label-free differential proteome strategy to explore function of normal human hippocampus and temporal cortex in learning and memory. As a result, a total of 5529 and 5702 proteins were identified in hippocampus and temporal cortex, respectively, 516 of which were significantly differential expressed, with abundance span 5 orders of magnitudes. Pathways analysis showed that temporal cortex was involved in growth of axons growth and synapse density regulation, through which could regulate long-term potentiation and long-term retention of trace memory. Hippocampus was involved in regulation of cell survival and cell viability, and regulates neurons proliferation by actin dynamics changes, through which involved in both short-term memory and long-term memory. Four selected differential proteins were further validated by Western blot and immunohistochemistry. For the first time, we identified proteins and associated pathways of hippocampus and temporal cortex in human cognition process using proteomic strategy, which would provide references for generating corresponding insights in hippocampus and temporal cortex-related cognitive function. The original data files can be downloaded at http://211.102.209.254/page/PSV023.html;?url=1489542083729AFHp (password: kYxh).

Significance

This study explored the potential molecular mechanism of hippocampus and temporal cortex in human cognition function using proteomics strategy, which will offer a baseline reference for further cognitive disorders study and reveal insights into physiology of temporal cortex and hippocampus.",2017-06-13 +21326606,dPORE-miRNA: polymorphic regulation of microRNA genes.,"

Background

MicroRNAs (miRNAs) are short non-coding RNA molecules that act as post-transcriptional regulators and affect the regulation of protein-coding genes. Mostly transcribed by PolII, miRNA genes are regulated at the transcriptional level similarly to protein-coding genes. In this study we focus on human miRNAs. These miRNAs are involved in a variety of pathways and can affect many diseases. Our interest is on possible deregulation of the transcription initiation of the miRNA encoding genes, which is facilitated by variations in the genomic sequence of transcriptional control regions (promoters).

Methodology

Our aim is to provide an online resource to facilitate the investigation of the potential effects of single nucleotide polymorphisms (SNPs) on miRNA gene regulation. We analyzed SNPs overlapped with predicted transcription factor binding sites (TFBSs) in promoters of miRNA genes. We also accounted for the creation of novel TFBSs due to polymorphisms not present in the reference genome. The resulting changes in the original TFBSs and potential creation of new TFBSs were incorporated into the Dragon Database of Polymorphic Regulation of miRNA genes (dPORE-miRNA).

Conclusions

The dPORE-miRNA database enables researchers to explore potential effects of SNPs on the regulation of miRNAs. dPORE-miRNA can be interrogated with regards to: a/miRNAs (their targets, or involvement in diseases, or biological pathways), b/SNPs, or c/transcription factors. dPORE-miRNA can be accessed at http://cbrc.kaust.edu.sa/dpore and http://apps.sanbi.ac.za/dpore/. Its use is free for academic and non-profit users.",2011-02-04 +27491037,Improving tRNAscan-SE Annotation Results via Ensemble Classifiers.,"tRNAScan-SE is a tRNA detection program that is widely used for tRNA annotation; however, the false positive rate of tRNAScan-SE is unacceptable for large sequences. Here, we used a machine learning method to try to improve the tRNAScan-SE results. A new predictor, tRNA-Predict, was designed. We obtained real and pseudo-tRNA sequences as training data sets using tRNAScan-SE and constructed three different tRNA feature sets. We then set up an ensemble classifier, LibMutil, to predict tRNAs from the training data. The positive data set of 623 tRNA sequences was obtained from tRNAdb 2009 and the negative data set was the false positive tRNAs predicted by tRNAscan-SE. Our in silico experiments revealed a prediction accuracy rate of 95.1 % for tRNA-Predict using 10-fold cross-validation. tRNA-Predict was developed to distinguish functional tRNAs from pseudo-tRNAs rather than to predict tRNAs from a genome-wide scan. However, tRNA-Predict can work with the output of tRNAscan-SE, which is a genome-wide scanning method, to improve the tRNAscan-SE annotation results. The tRNA-Predict web server is accessible at http://datamining.xmu.edu.cn/∼gjs/tRNA-Predict.",2015-09-14 +28845508,Impact of renal denervation on tissue Na+ content in treatment-resistant hypertension.,"

Objectives

Renal denervation (RDN) has been introduced for reducing blood pressure (BP) in treatment-resistant hypertension (TRH). The precise mechanism how RDN exerts its BP-lowering effects are not yet fully understood. It is widely accepted that sodium (Na+) plays a crucial role in the pathogenesis of hypertensive disease. However, there is increasing evidence of osmotically inactive Na+ storage. We investigated the impact of RDN on Na+ homeostasis using estimation of salt intake, and measurement of tissue Na+ content.

Methods

In a study 41 patients with TRH (office BP ≥140/90 mmHg and diagnosis confirmed by 24-h ambulatory BP monitoring) underwent RDN. Tissue Na+ content was assessed non-invasively with 3.0 T magnetic resonance imaging before and 6 months after RDN. In addition, 24-h urinary Na+ excretion as an estimate of salt intake and spot urine Na+/K+ excretion were assessed. The study was registered at http://www.clinicaltrials.gov (ID: NCT01687725).

Results

There was a significant fall in BP (office: -17 ± 20/-10 ± 12 mmHg; 24-h: -11 ± 13/-6 ± 9 mmHg, all p < 0.001) 6 months after RDN. In contrast, tissue Na+ content of the muscle (20.1 ± 3.9 vs. 20.7 ± 4.0 mmol/L, p = 0.229) and skin (24.4 ± 6.5 vs. 24.8 ± 6.6 mmol/L, p = 0.695) did not change after RDN. Moreover, there was also no change in salt intake after RDN, whereas Na+/K+ ratio only acutely increased.

Conclusions

Although RDN resulted in a substantial reduction of BP, tissue Na+ content of the muscle and skin was not mobilized and reduced. These data indicate that the BP reduction after RDN is unrelated to Na+ homeostasis.",2017-08-28 +27321817,OncoScape: Exploring the cancer aberration landscape by genomic data fusion.,"Although large-scale efforts for molecular profiling of cancer samples provide multiple data types for many samples, most approaches for finding candidate cancer genes rely on somatic mutations and DNA copy number only. We present a new method, OncoScape, which exploits five complementary data types across 11 cancer types to identify new candidate cancer genes. We find many rarely mutated genes that are strongly affected by other aberrations. We retrieve the majority of known cancer genes but also new candidates such as STK31 and MSRA with very high confidence. Several genes show a dual oncogene- and tumor suppressor-like behavior depending on the tumor type. Most notably, the well-known tumor suppressor RB1 shows strong oncogene-like signal in colon cancer. We applied OncoScape to cell lines representing ten cancer types, providing the most comprehensive comparison of aberrations in cell lines and tumor samples to date. This revealed that glioblastoma, breast and colon cancer show strong similarity between cell lines and tumors, while head and neck squamous cell carcinoma and bladder cancer, exhibit very little similarity between cell lines and tumors. To facilitate exploration of the cancer aberration landscape, we created a web portal enabling interactive analysis of OncoScape results (http://ccb.nki.nl/software/oncoscape).",2016-06-20 +27736829,"Summary of Notifiable Infectious Diseases and Conditions - United States, 2014.","The Summary of Notifiable Infectious Diseases and Conditions-United States, 2014 (hereafter referred to as the summary) contains the official statistics, in tabular and graphic form, for the reported occurrence of nationally notifiable infectious diseases and conditions in the United States for 2014. Unless otherwise noted, data are final totals for 2014 reported as of June 30, 2015. These statistics are collected and compiled from reports sent by U.S. state and territory, New York City, and District of Columbia health departments to the National Notifiable Diseases Surveillance System (NNDSS), which is operated by CDC in collaboration with the Council of State and Territorial Epidemiologists (CSTE). This summary is available at http://www.cdc.gov/mmwr/mmwr_nd/index.html. This site also includes summary publications from previous years.",2016-10-14 +26589448,Programmatic access to logical models in the Cell Collective modeling environment via a REST API.,"

Unlabelled

Cell Collective (www.cellcollective.org) is a web-based interactive environment for constructing, simulating and analyzing logical models of biological systems. Herein, we present a Web service to access models, annotations, and simulation data in the Cell Collective platform through the Representational State Transfer (REST) Application Programming Interface (API). The REST API provides a convenient method for obtaining Cell Collective data through almost any programming language. To ensure easy processing of the retrieved data, the request output from the API is available in a standard JSON format.

Availability and implementation

The Cell Collective REST API is freely available at http://thecellcollective.org/tccapi. All public models in Cell Collective are available through the REST API. For users interested in creating and accessing their own models through the REST API first need to create an account in Cell Collective (http://thecellcollective.org).

Contact

thelikar2@unl.edu.

Supplementary information

Technical user documentation: https://goo.gl/U52GWo.",2015-11-14 +27540266,Higher classification sensitivity of short metagenomic reads with CLARK-S.,"The growing number of metagenomic studies in medicine and environmental sciences is creating increasing demands on the computational infrastructure designed to analyze these very large datasets. Often, the construction of ultra-fast and precise taxonomic classifiers can compromise on their sensitivity (i.e. the number of reads correctly classified). Here we introduce CLARK-S, a new software tool that can classify short reads with high precision, high sensitivity and high speed.

Availability and implementation

CLARK-S is freely available at http://clark.cs.ucr.edu/ CONTACT: stelo@cs.ucr.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-18 +24666037,"NRLiSt BDB, the manually curated nuclear receptors ligands and structures benchmarking database.","Nuclear receptors (NRs) constitute an important class of drug targets. We created the most exhaustive NR-focused benchmarking database to date, the NRLiSt BDB (NRs ligands and structures benchmarking database). The 9905 compounds and 339 structures of the NRLiSt BDB are ready for structure-based and ligand-based virtual screening. In the present study, we detail the protocol used to generate the NRLiSt BDB and its features. We also give some examples of the errors that we found in ChEMBL that convinced us to manually review all original papers. Since extensive and manually curated experimental data about NR ligands and structures are provided in the NRLiSt BDB, it should become a powerful tool to assess the performance of virtual screening methods on NRs, to assist the understanding of NR's function and modulation, and to support the discovery of new drugs targeting NRs. NRLiSt BDB is freely available online at http://nrlist.drugdesign.fr .",2014-03-25 +27124399,Inferring Intra-Community Microbial Interaction Patterns from Metagenomic Datasets Using Associative Rule Mining Techniques.,"The nature of inter-microbial metabolic interactions defines the stability of microbial communities residing in any ecological niche. Deciphering these interaction patterns is crucial for understanding the mode/mechanism(s) through which an individual microbial community transitions from one state to another (e.g. from a healthy to a diseased state). Statistical correlation techniques have been traditionally employed for mining microbial interaction patterns from taxonomic abundance data corresponding to a given microbial community. In spite of their efficiency, these correlation techniques can capture only 'pair-wise interactions'. Moreover, their emphasis on statistical significance can potentially result in missing out on several interactions that are relevant from a biological standpoint. This study explores the applicability of one of the earliest association rule mining algorithm i.e. the 'Apriori algorithm' for deriving 'microbial association rules' from the taxonomic profile of given microbial community. The classical Apriori approach derives association rules by analysing patterns of co-occurrence/co-exclusion between various '(subsets of) features/items' across various samples. Using real-world microbiome data, the efficiency/utility of this rule mining approach in deciphering multiple (biologically meaningful) association patterns between 'subsets/subgroups' of microbes (constituting microbiome samples) is demonstrated. As an example, association rules derived from publicly available gut microbiome datasets indicate an association between a group of microbes (Faecalibacterium, Dorea, and Blautia) that are known to have mutualistic metabolic associations among themselves. Application of the rule mining approach on gut microbiomes (sourced from the Human Microbiome Project) further indicated similar microbial association patterns in gut microbiomes irrespective of the gender of the subjects. A Linux implementation of the Association Rule Mining (ARM) software (customised for deriving 'microbial association rules' from microbiome data) is freely available for download from the following link: http://metagenomics.atc.tcs.com/arm.",2016-04-28 +28074633,A computational interactome for prioritizing genes associated with complex agronomic traits in rice (Oryza sativa).,"Rice (Oryza sativa) is one of the most important staple foods for more than half of the global population. Many rice traits are quantitative, complex and controlled by multiple interacting genes. Thus, a full understanding of genetic relationships will be critical to systematically identify genes controlling agronomic traits. We developed a genome-wide rice protein-protein interaction network (RicePPINet, http://netbio.sjtu.edu.cn/riceppinet) using machine learning with structural relationship and functional information. RicePPINet contained 708 819 predicted interactions for 16 895 non-transposable element related proteins. The power of the network for discovering novel protein interactions was demonstrated through comparison with other publicly available protein-protein interaction (PPI) prediction methods, and by experimentally determined PPI data sets. Furthermore, global analysis of domain-mediated interactions revealed RicePPINet accurately reflects PPIs at the domain level. Our studies showed the efficiency of the RicePPINet-based method in prioritizing candidate genes involved in complex agronomic traits, such as disease resistance and drought tolerance, was approximately 2-11 times better than random prediction. RicePPINet provides an expanded landscape of computational interactome for the genetic dissection of agronomically important traits in rice.",2017-03-04 +24147765,"Rice DB: an Oryza Information Portal linking annotation, subcellular location, function, expression, regulation, and evolutionary information for rice and Arabidopsis.","Omics research in Oryza sativa (rice) relies on the use of multiple databases to obtain different types of information to define gene function. We present Rice DB, an Oryza information portal that is a functional genomics database, linking gene loci to comprehensive annotations, expression data and the subcellular location of encoded proteins. Rice DB has been designed to integrate the direct comparison of rice with Arabidopsis (Arabidopsis thaliana), based on orthology or 'expressology', thus using and combining available information from two pre-eminent plant models. To establish Rice DB, gene identifiers (more than 40 types) and annotations from a variety of sources were compiled, functional information based on large-scale and individual studies was manually collated, hundreds of microarrays were analysed to generate expression annotations, and the occurrences of potential functional regulatory motifs in promoter regions were calculated. A range of computational subcellular localization predictions were also run for all putative proteins encoded in the rice genome, and experimentally confirmed protein localizations have been collated, curated and linked to functional studies in rice. A single search box allows anything from gene identifiers (for rice and/or Arabidopsis), motif sequences, subcellular location, to keyword searches to be entered, with the capability of Boolean searches (such as AND/OR). To demonstrate the utility of Rice DB, several examples are presented including a rice mitochondrial proteome, which draws on a variety of sources for subcellular location data within Rice DB. Comparisons of subcellular location, functional annotations, as well as transcript expression in parallel with Arabidopsis reveals examples of conservation between rice and Arabidopsis, using Rice DB (http://ricedb.plantenergy.uwa.edu.au).",2013-11-29 +23696674,Clinical genomic database.,"Technological advances have greatly increased the availability of human genomic sequencing. However, the capacity to analyze genomic data in a clinically meaningful way lags behind the ability to generate such data. To help address this obstacle, we reviewed all conditions with genetic causes and constructed the Clinical Genomic Database (CGD) (http://research.nhgri.nih.gov/CGD/), a searchable, freely Web-accessible database of conditions based on the clinical utility of genetic diagnosis and the availability of specific medical interventions. The CGD currently includes a total of 2,616 genes organized clinically by affected organ systems and interventions (including preventive measures, disease surveillance, and medical or surgical interventions) that could be reasonably warranted by the identification of pathogenic mutations. To aid independent analysis and optimize new data incorporation, the CGD also includes all genetic conditions for which genetic knowledge may affect the selection of supportive care, informed medical decision-making, prognostic considerations, reproductive decisions, and allow avoidance of unnecessary testing, but for which specific interventions are not otherwise currently available. For each entry, the CGD includes the gene symbol, conditions, allelic conditions, clinical categorization (for both manifestations and interventions), mode of inheritance, affected age group, description of interventions/rationale, links to other complementary databases, including databases of variants and presumed pathogenic mutations, and links to PubMed references (>20,000). The CGD will be regularly maintained and updated to keep pace with scientific discovery. Further content-based expert opinions are actively solicited. Eventually, the CGD may assist the rapid curation of individual genomes as part of active medical care.",2013-05-21 +23729471,BeEP Server: Using evolutionary information for quality assessment of protein structure models.,"The BeEP Server (http://www.embnet.qb.fcen.uba.ar/embnet/beep.php) is an online resource aimed to help in the endgame of protein structure prediction. It is able to rank submitted structural models of a protein through an explicit use of evolutionary information, a criterion differing from structural or energetic considerations commonly used in other assessment programs. The idea behind BeEP (Best Evolutionary Pattern) is to benefit from the substitution pattern derived from structural constraints present in a set of homologous proteins adopting a given protein conformation. The BeEP method uses a model of protein evolution that takes into account the structure of a protein to build site-specific substitution matrices. The suitability of these substitution matrices is assessed through maximum likelihood calculations from which position-specific and global scores can be derived. These scores estimate how well the structural constraints derived from each structural model are represented in a sequence alignment of homologous proteins. Our assessment on a subset of proteins from the Critical Assessment of techniques for protein Structure Prediction (CASP) experiment has shown that BeEP is capable of discriminating the models and selecting one or more native-like structures. Moreover, BeEP is not explicitly parameterized to find structural similarities between models and given targets, potentially helping to explore the conformational ensemble of the native state.",2013-05-31 +27872588,Conversation Therapy with People with Aphasia and Conversation Partners using Video Feedback: A Group and Case Series Investigation of Changes in Interaction.,"Conversation therapies employing video for feedback and to facilitate outcome measurement are increasingly used with people with post-stroke aphasia and their conversation partners; however the evidence base for change in everyday interaction remains limited. We investigated the effect of Better Conversations with Aphasia (BCA), an intervention that is freely available online at https://extend.ucl.ac.uk/. Eight people with chronic agrammatic aphasia, and their regular conversation partners participated in the tailored 8 week program involving significant video feedback. We explored changes in: (i) conversation facilitators (such as multi-modal turns by people with aphasia); and (ii) conversation barriers (such as use of test questions by conversation partners). The outcome of intervention was evaluated directly by measuring change in video-recorded everyday conversations. The study employed a pre-post design with multiple 5 minute samples of conversation before and after intervention, scored by trained raters blind to the point of data collection. Group level analysis showed no significant increase in conversation facilitators. There was, however, a significant reduction in the number of conversation barriers. The case series data revealed variability in conversation behaviors across occasions for the same dyad and between different dyads. Specifically, post-intervention there was a significant increase in facilitator behaviors for two dyads, a decrease for one and no significant change for five dyads. There was a significant decrease in barrier behaviors for five dyads and no significant change for three dyads. The reduction in barrier behaviors was considerable; on average change from over eight to fewer than three barrier behaviors in 5 minutes of conversation. The pre-post design has the limitation of no comparison group. However, change occurs in targeted conversational behaviors and in people with chronic aphasia and their partners. The findings suggest change can occur after eight therapy sessions and have implications for clinical practice. A reduction in barrier behaviors may be easier to obtain, although the controlled case series results demonstrate a significant increase in conversation facilitators is also possible. The rehabilitation tool is available online and video technology was central to delivering intervention and evaluating change.",2016-11-07 +24994456,BorreliaBase: a phylogeny-centered browser of Borrelia genomes.,"

Background

The bacterial genus Borrelia (phylum Spirochaetes) consists of two groups of pathogens represented respectively by B. burgdorferi, the agent of Lyme borreliosis, and B. hermsii, the agent of tick-borne relapsing fever. The number of publicly available Borrelia genomic sequences is growing rapidly with the discovery and sequencing of Borrelia strains worldwide. There is however a lack of dedicated online databases to facilitate comparative analyses of Borrelia genomes.

Description

We have developed BorreliaBase, an online database for comparative browsing of Borrelia genomes. The database is currently populated with sequences from 35 genomes of eight Lyme-borreliosis (LB) group Borrelia species and 7 Relapsing-fever (RF) group Borrelia species. Distinct from genome repositories and aggregator databases, BorreliaBase serves manually curated comparative-genomic data including genome-based phylogeny, genome synteny, and sequence alignments of orthologous genes and intergenic spacers.

Conclusions

With a genome phylogeny at its center, BorreliaBase allows online identification of hypervariable lipoprotein genes, potential regulatory elements, and recombination footprints by providing evolution-based expectations of sequence variability at each genomic locus. The phylo-centric design of BorreliaBase (http://borreliabase.org) is a novel model for interactive browsing and comparative analysis of bacterial genomes online.",2014-07-03 +22976082,An RNA Mapping DataBase for curating RNA structure mapping experiments.,"

Summary

We have established an RNA mapping database (RMDB) to enable structural, thermodynamic and kinetic comparisons across single-nucleotide-resolution RNA structure mapping experiments. The volume of structure mapping data has greatly increased since the development of high-throughput sequencing techniques, accelerated software pipelines and large-scale mutagenesis. For scientists wishing to infer relationships between RNA sequence/structure and these mapping data, there is a need for a database that is curated, tagged with error estimates and interfaced with tools for sharing, visualization, search and meta-analysis. Through its on-line front-end, the RMDB allows users to explore single-nucleotide-resolution mapping data in heat-map, bar-graph and colored secondary structure graphics; to leverage these data to generate secondary structure hypotheses; and to download the data in standardized and computer-friendly files, including the RDAT and community-consensus SNRNASM formats. At the time of writing, the database houses 53 entries, describing more than 2848 experiments of 1098 RNA constructs in several solution conditions and is growing rapidly.

Availability

Freely available on the web at http://rmdb.stanford.edu.

Contact

rhiju@stanford.edu.

Supplementary information

Supplementary data are available at Bioinformatics Online.",2012-09-12 +26356345,Predicting Protein Relationships to Human Pathways through a Relational Learning Approach Based on Simple Sequence Features.,"

Unlabelled

Biological pathways are important elements of systems biology and in the past decade, an increasing number of pathway databases have been set up to document the growing understanding of complex cellular processes. Although more genome-sequence data are becoming available, a large fraction of it remains functionally uncharacterized. Thus, it is important to be able to predict the mapping of poorly annotated proteins to original pathway models.

Results

We have developed a Relational Learning-based Extension (RLE) system to investigate pathway membership through a function prediction approach that mainly relies on combinations of simple properties attributed to each protein. RLE searches for proteins with molecular similarities to specific pathway components. Using RLE, we associated 383 uncharacterized proteins to 28 pre-defined human Reactome pathways, demonstrating relative confidence after proper evaluation. Indeed, in specific cases manual inspection of the database annotations and the related literature supported the proposed classifications. Examples of possible additional components of the Electron transport system, Telomere maintenance and Integrin cell surface interactions pathways are discussed in detail.

Availability

All the human predicted proteins in the 2009 and 2012 releases 30 and 40 of Reactome are available at http://rle.bioinfo.cnio.es.",2014-07-01 +26501966,Genomics Virtual Laboratory: A Practical Bioinformatics Workbench for the Cloud.,"

Background

Analyzing high throughput genomics data is a complex and compute intensive task, generally requiring numerous software tools and large reference data sets, tied together in successive stages of data transformation and visualisation. A computational platform enabling best practice genomics analysis ideally meets a number of requirements, including: a wide range of analysis and visualisation tools, closely linked to large user and reference data sets; workflow platform(s) enabling accessible, reproducible, portable analyses, through a flexible set of interfaces; highly available, scalable computational resources; and flexibility and versatility in the use of these resources to meet demands and expertise of a variety of users. Access to an appropriate computational platform can be a significant barrier to researchers, as establishing such a platform requires a large upfront investment in hardware, experience, and expertise.

Results

We designed and implemented the Genomics Virtual Laboratory (GVL) as a middleware layer of machine images, cloud management tools, and online services that enable researchers to build arbitrarily sized compute clusters on demand, pre-populated with fully configured bioinformatics tools, reference datasets and workflow and visualisation options. The platform is flexible in that users can conduct analyses through web-based (Galaxy, RStudio, IPython Notebook) or command-line interfaces, and add/remove compute nodes and data resources as required. Best-practice tutorials and protocols provide a path from introductory training to practice. The GVL is available on the OpenStack-based Australian Research Cloud (http://nectar.org.au) and the Amazon Web Services cloud. The principles, implementation and build process are designed to be cloud-agnostic.

Conclusions

This paper provides a blueprint for the design and implementation of a cloud-based Genomics Virtual Laboratory. We discuss scope, design considerations and technical and logistical constraints, and explore the value added to the research community through the suite of services and resources provided by our implementation.",2015-10-26 +27914054,Computational Protein Design Through Grafting and Stabilization.,"Computational grafting of target residues onto existing protein scaffolds is a powerful method for the design of proteins with novel function. In the grafting method side chain mutations are introduced into a preexisting protein scaffold to recreate a target functional motif. The success of this approach relies on two primary criteria: (1) the availability of compatible structural scaffolds, and (2) the introduction of mutations that do not affect the protein structure or stability. To identify compatible structural motifs we use the Erebus webserver, to search the protein data bank (PDB) for user-defined structural scaffolds. To identify potential design mutations we use the Eris webserver, which accurately predicts changes in protein stability resulting from mutations. Mutations that increase the protein stability are more likely to maintain the protein structure and therefore produce the desired function. Together these tools provide effective methods for identifying existing templates and guiding further design experiments. The software tools for scaffold searching and design are available at http://dokhlab.org .",2017-01-01 +27914057,BindML/BindML+: Detecting Protein-Protein Interaction Interface Propensity from Amino Acid Substitution Patterns.,"Prediction of protein-protein interaction sites in a protein structure provides important information for elucidating the mechanism of protein function and can also be useful in guiding a modeling or design procedures of protein complex structures. Since prediction methods essentially assess the propensity of amino acids that are likely to be part of a protein docking interface, they can help in designing protein-protein interactions. Here, we introduce BindML and BindML+ protein-protein interaction sites prediction methods. BindML predicts protein-protein interaction sites by identifying mutation patterns found in known protein-protein complexes using phylogenetic substitution models. BindML+ is an extension of BindML for distinguishing permanent and transient types of protein-protein interaction sites. We developed an interactive web-server that provides a convenient interface to assist in structural visualization of protein-protein interactions site predictions. The input data for the web-server are a tertiary structure of interest. BindML and BindML+ are available at http://kiharalab.org/bindml/ and http://kiharalab.org/bindml/plus/ .",2017-01-01 +28419290,SVMQA: support-vector-machine-based protein single-model quality assessment.,"

Motivation

The accurate ranking of predicted structural models and selecting the best model from a given candidate pool remain as open problems in the field of structural bioinformatics. The quality assessment (QA) methods used to address these problems can be grouped into two categories: consensus methods and single-model methods. Consensus methods in general perform better and attain higher correlation between predicted and true quality measures. However, these methods frequently fail to generate proper quality scores for native-like structures which are distinct from the rest of the pool. Conversely, single-model methods do not suffer from this drawback and are better suited for real-life applications where many models from various sources may not be readily available.

Results

In this study, we developed a support-vector-machine-based single-model global quality assessment (SVMQA) method. For a given protein model, the SVMQA method predicts TM-score and GDT_TS score based on a feature vector containing statistical potential energy terms and consistency-based terms between the actual structural features (extracted from the three-dimensional coordinates) and predicted values (from primary sequence). We trained SVMQA using CASP8, CASP9 and CASP10 targets and determined the machine parameters by 10-fold cross-validation. We evaluated the performance of our SVMQA method on various benchmarking datasets. Results show that SVMQA outperformed the existing best single-model QA methods both in ranking provided protein models and in selecting the best model from the pool. According to the CASP12 assessment, SVMQA was the best method in selecting good-quality models from decoys in terms of GDTloss.

Availability and implementation

SVMQA method can be freely downloaded from http://lee.kias.re.kr/SVMQA/SVMQA_eval.tar.gz.

Contact

jlee@kias.re.kr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-08-01 +28320317,NaviGO: interactive tool for visualization and functional similarity and coherence analysis with gene ontology.,"

Background

The number of genomics and proteomics experiments is growing rapidly, producing an ever-increasing amount of data that are awaiting functional interpretation. A number of function prediction algorithms were developed and improved to enable fast and automatic function annotation. With the well-defined structure and manual curation, Gene Ontology (GO) is the most frequently used vocabulary for representing gene functions. To understand relationship and similarity between GO annotations of genes, it is important to have a convenient pipeline that quantifies and visualizes the GO function analyses in a systematic fashion.

Results

NaviGO is a web-based tool for interactive visualization, retrieval, and computation of functional similarity and associations of GO terms and genes. Similarity of GO terms and gene functions is quantified with six different scores including protein-protein interaction and context based association scores we have developed in our previous works. Interactive navigation of the GO function space provides intuitive and effective real-time visualization of functional groupings of GO terms and genes as well as statistical analysis of enriched functions.

Conclusions

We developed NaviGO, which visualizes and analyses functional similarity and associations of GO terms and genes. The NaviGO webserver is freely available at: http://kiharalab.org/web/navigo .",2017-03-20 +23497177,SymbioGBR: a web-based database of Symbiodinium associated with cnidarian hosts on the Great Barrier Reef.,"

Background

The algal endosymbionts (genus Symbiodinium) associated with scleractinian corals (and other reef invertebrates) have received a lot of research attention in the past decade, particularly as certain host-symbiont associations appear more affected by increasing seawater temperatures than others. With the rapid accumulation of information on the diversity of Symbiodinium, it is becoming increasingly difficult to compare newly acquired Symbiodinium data with existing data to detect patterns of host-symbiont specificity on broader spatial scales. The lack of a general consensus on the classification of Symbiodinium species coupled with the variety of different markers used to identify the genus Symbiodinium (ITS1, ITS2, LSU D1/D2, chloroplast 23S rDNA and psbA minicircle) further complicate direct comparison.

Description

The SymbioGBR database compiles all currently available Symbiodinium sequences and associated host information of data collected from the Great Barrier Reef into a single relational database that is accessible via a user-friendly, searchable web-based application (http://www.SymbioGBR.org). SymbioGBR allows users to query Symbiodinium types or sequences sourced from various genetic markers (e.g. ITS1, ITS2, LSU D1/D2 and chloroplast 23S) and invertebrate host species to explore their reported associations. In addition, as the database includes sequence information of multiple genetic markers, it allows cross-referencing between conventional (e.g. ITS2 region) and novel markers that exhibit low intragenomic variability (e.g. psbA region). Finally, the database is based on the collection details of individual specimens. Such host-symbiont associations can be assessed quantitatively and viewed in relation to their environmental and geographic context.

Conclusions

The SymbioGBR database provides a comprehensive overview of Symbiodinium diversity and host-associations on the Great Barrier Reef. It provides a quick, user-friendly means to compare newly acquired data on Symbiodinium (e.g. raw sequences or characterized Symbiodinium types) with previous data on the diversity of invertebrate host-symbiont associations on the GBR. The inclusion of psbAncr sequence information allows for validation of widely used ITS1/ITS2 markers and their ability to accurately identify relevant sequences. Most importantly, centralization of sequence information from multiple genetic markers will aid the classification of Symbiodinium species diversity and allow researchers to easily compare patterns of host-Symbiodinium associations.",2013-03-13 +27797773,LRFragLib: an effective algorithm to identify fragments for de novo protein structure prediction.,"

Motivation

The quality of fragment library determines the efficiency of fragment assembly, an approach that is widely used in most de novo protein-structure prediction algorithms. Conventional fragment libraries are constructed mainly based on the identities of amino acids, sometimes facilitated by predicted information including dihedral angles and secondary structures. However, it remains challenging to identify near-native fragment structures with low sequence homology.

Results

We introduce a novel fragment-library-construction algorithm, LRFragLib, to improve the detection of near-native low-homology fragments of 7-10 residues, using a multi-stage, flexible selection protocol. Based on logistic regression scoring models, LRFragLib outperforms existing techniques by achieving a significantly higher precision and a comparable coverage on recent CASP protein sets in sampling near-native structures. The method also has a comparable computational efficiency to the fastest existing techniques with substantially reduced memory usage.

Availability and implementation

The source code is available for download at http://166.111.152.91/Downloads.html.

Contact

hgong@tsinghua.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +27524807,PIPE: a protein-protein interaction passage extraction module for BioCreative challenge. ,"Identifying the interactions between proteins mentioned in biomedical literatures is one of the frequently discussed topics of text mining in the life science field. In this article, we propose PIPE, an interaction pattern generation module used in the Collaborative Biocurator Assistant Task at BioCreative V (http://www.biocreative.org/) to capture frequent protein-protein interaction (PPI) patterns within text. We also present an interaction pattern tree (IPT) kernel method that integrates the PPI patterns with convolution tree kernel (CTK) to extract PPIs. Methods were evaluated on LLL, IEPA, HPRD50, AIMed and BioInfer corpora using cross-validation, cross-learning and cross-corpus evaluation. Empirical evaluations demonstrate that our method is effective and outperforms several well-known PPI extraction methods. DATABASE URL.",2016-08-14 +26679776,Properly defining the targets of a transcription factor significantly improves the computational identification of cooperative transcription factor pairs in yeast.,"

Background

Transcriptional regulation of gene expression in eukaryotes is usually accomplished by cooperative transcription factors (TFs). Computational identification of cooperative TF pairs has become a hot research topic and many algorithms have been proposed in the literature. A typical algorithm for predicting cooperative TF pairs has two steps. (Step 1) Define the targets of each TF under study. (Step 2) Design a measure for calculating the cooperativity of a TF pair based on the targets of these two TFs. While different algorithms have distinct sophisticated cooperativity measures, the targets of a TF are usually defined using ChIP-chip data. However, there is an inherent weakness in using ChIP-chip data to define the targets of a TF. ChIP-chip analysis can only identify the binding targets of a TF but it cannot distinguish the true regulatory from the binding but non-regulatory targets of a TF.

Results

This work is the first study which aims to investigate whether the performance of computational identification of cooperative TF pairs could be improved by using a more biologically relevant way to define the targets of a TF. For this purpose, we propose four simple algorithms, all of which consist of two steps. (Step 1) Define the targets of a TF using (i) ChIP-chip data in the first algorithm, (ii) TF binding data in the second algorithm, (iii) TF perturbation data in the third algorithm, and (iv) the intersection of TF binding and TF perturbation data in the fourth algorithm. Compared with the first three algorithms, the fourth algorithm uses a more biologically relevant way to define the targets of a TF. (Step 2) Measure the cooperativity of a TF pair by the statistical significance of the overlap of the targets of these two TFs using the hypergeometric test. By adopting four existing performance indices, we show that the fourth proposed algorithm (PA4) significantly out performs the other three proposed algorithms. This suggests that the computational identification of cooperative TF pairs is indeed improved when using a more biologically relevant way to define the targets of a TF. Strikingly, the prediction results of our simple PA4 are more biologically meaningful than those of the 12 existing sophisticated algorithms in the literature, all of which used ChIP-chip data to define the targets of a TF. This suggests that properly defining the targets of a TF may be more important than designing sophisticated cooperativity measures. In addition, our PA4 has the power to predict several experimentally validated cooperative TF pairs, which have not been successfully predicted by any existing algorithms in the literature.

Conclusions

This study shows that the performance of computational identification of cooperative TF pairs could be improved by using a more biologically relevant way to define the targets of a TF. The main contribution of this study is not to propose another new algorithm but to provide a new thinking for the research of computational identification of cooperative TF pairs. Researchers should put more effort on properly defining the targets of a TF (i.e. Step 1) rather than totally focus on designing sophisticated cooperativity measures (i.e. Step 2). The lists of TF target genes, the Matlab codes and the prediction results of the four proposed algorithms could be downloaded from our companion website http://cosbi3.ee.ncku.edu.tw/TFI/.",2015-12-09 +26306699,"MEXPRESS: visualizing expression, DNA methylation and clinical TCGA data.","

Background

In recent years, increasing amounts of genomic and clinical cancer data have become publically available through large-scale collaborative projects such as The Cancer Genome Atlas (TCGA). However, as long as these datasets are difficult to access and interpret, they are essentially useless for a major part of the research community and their scientific potential will not be fully realized. To address these issues we developed MEXPRESS, a straightforward and easy-to-use web tool for the integration and visualization of the expression, DNA methylation and clinical TCGA data on a single-gene level ( http://mexpress.be ).

Results

In comparison to existing tools, MEXPRESS allows researchers to quickly visualize and interpret the different TCGA datasets and their relationships for a single gene, as demonstrated for GSTP1 in prostate adenocarcinoma. We also used MEXPRESS to reveal the differences in the DNA methylation status of the PAM50 marker gene MLPH between the breast cancer subtypes and how these differences were linked to the expression of MPLH.

Conclusions

We have created a user-friendly tool for the visualization and interpretation of TCGA data, offering clinical researchers a simple way to evaluate the TCGA data for their genes or candidate biomarkers of interest.",2015-08-26 +26063840,EXIMS: an improved data analysis pipeline based on a new peak picking method for EXploring Imaging Mass Spectrometry data.,"

Motivation

Matrix Assisted Laser Desorption Ionization-Imaging Mass Spectrometry (MALDI-IMS) in 'omics' data acquisition generates detailed information about the spatial distribution of molecules in a given biological sample. Various data processing methods have been developed for exploring the resultant high volume data. However, most of these methods process data in the spectral domain and do not make the most of the important spatial information available through this technology. Therefore, we propose a novel streamlined data analysis pipeline specifically developed for MALDI-IMS data utilizing significant spatial information for identifying hidden significant molecular distribution patterns in these complex datasets.

Methods

The proposed unsupervised algorithm uses Sliding Window Normalization (SWN) and a new spatial distribution based peak picking method developed based on Gray level Co-Occurrence (GCO) matrices followed by clustering of biomolecules. We also use gist descriptors and an improved version of GCO matrices to extract features from molecular images and minimum medoid distance to automatically estimate the number of possible groups.

Results

We evaluated our algorithm using a new MALDI-IMS metabolomics dataset of a plant (Eucalypt) leaf. The algorithm revealed hidden significant molecular distribution patterns in the dataset, which the current Component Analysis and Segmentation Map based approaches failed to extract. We further demonstrate the performance of our peak picking method over other traditional approaches by using a publicly available MALDI-IMS proteomics dataset of a rat brain. Although SWN did not show any significant improvement as compared with using no normalization, the visual assessment showed an improvement as compared to using the median normalization.

Availability and implementation

The source code and sample data are freely available at http://exims.sourceforge.net/.

Contact

awgcdw@student.unimelb.edu.au or chalini_w@live.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-10 +24424778,Global genomic diversity of Oryza sativa varieties revealed by comparative physical mapping.,"Bacterial artificial chromosome (BAC) physical maps embedding a large number of BAC end sequences (BESs) were generated for Oryza sativa ssp. indica varieties Minghui 63 (MH63) and Zhenshan 97 (ZS97) and were compared with the genome sequences of O. sativa spp. japonica cv. Nipponbare and O. sativa ssp. indica cv. 93-11. The comparisons exhibited substantial diversities in terms of large structural variations and small substitutions and indels. Genome-wide BAC-sized and contig-sized structural variations were detected, and the shared variations were analyzed. In the expansion regions of the Nipponbare reference sequence, in comparison to the MH63 and ZS97 physical maps, as well as to the previously constructed 93-11 physical map, the amounts and types of the repeat contents, and the outputs of gene ontology analysis, were significantly different from those of the whole genome. Using the physical maps of four wild Oryza species from OMAP (http://www.omap.org) as a control, we detected many conserved and divergent regions related to the evolution process of O. sativa. Between the BESs of MH63 and ZS97 and the two reference sequences, a total of 1532 polymorphic simple sequence repeats (SSRs), 71,383 SNPs, 1767 multiple nucleotide polymorphisms, 6340 insertions, and 9137 deletions were identified. This study provides independent whole-genome resources for intra- and intersubspecies comparisons and functional genomics studies in O. sativa. Both the comparative physical maps and the GBrowse, which integrated the QTL and molecular markers from GRAMENE (http://www.gramene.org) with our physical maps and analysis results, are open to the public through our Web site (http://gresource.hzau.edu.cn/resource/resource.html).",2014-01-14 +26671443,BMDExpress Data Viewer - a visualization tool to analyze BMDExpress datasets.,"Regulatory agencies increasingly apply benchmark dose (BMD) modeling to determine points of departure for risk assessment. BMDExpress applies BMD modeling to transcriptomic datasets to identify transcriptional BMDs. However, graphing and analytical capabilities within BMDExpress are limited, and the analysis of output files is challenging. We developed a web-based application, BMDExpress Data Viewer (http://apps.sciome.com:8082/BMDX_Viewer/), for visualizing and graphing BMDExpress output files. The application consists of ""Summary Visualization"" and ""Dataset Exploratory"" tools. Through analysis of transcriptomic datasets of the toxicants furan and 4,4'-methylenebis(N,N-dimethyl)benzenamine, we demonstrate that the ""Summary Visualization Tools"" can be used to examine distributions of gene and pathway BMD values, and to derive a potential point of departure value based on summary statistics. By applying filters on enrichment P-values and minimum number of significant genes, the ""Functional Enrichment Analysis"" tool enables the user to select biological processes or pathways that are selectively perturbed by chemical exposure and identify the related BMD. The ""Multiple Dataset Comparison"" tool enables comparison of gene and pathway BMD values across multiple experiments (e.g., across timepoints or tissues). The ""BMDL-BMD Range Plotter"" tool facilitates the observation of BMD trends across biological processes or pathways. Through our case studies, we demonstrate that BMDExpress Data Viewer is a useful tool to visualize, explore and analyze BMDExpress output files. Visualizing the data in this manner enables rapid assessment of data quality, model fit, doses of peak activity, most sensitive pathway perturbations and other metrics that will be useful in applying toxicogenomics in risk assessment. © 2015 Her Majesty the Queen in Right of Canada. Journal of Applied Toxicology published by John Wiley & Sons, Ltd.",2015-12-15 +24840677,Atopic disorders are more common in childhood migraine and correlated headache phenotype.,"

Background

The supportive clinical and pathophysiological data about the correlation between migraine and atopic disorders are far from a coincidence. In order to determine and investigate the correlates of atopic disorders in a specific dataset, we performed this retrospective cross-sectional clinical-based study.

Methods

The dataset was composed from three tertiary center web-based databases (http://www.childhoodheadache.org). Headache diagnosis and differential diagnosis were made according to the International Classification of Headache Disorders, 2nd version and the Diagnostic Statistical Manual of Mental Disorders, 5th edition. Migraine with aura, migraine without aura, chronic migraine and episodic and chronic tension type headache (TTH) patients were included. All other causes of headache disorders, including comorbid headache disorders like migraine plus TTH or ""possible"" causes of headache, were excluded.

Results

The study included 438 patients with migraine and 357 patients with TTH, whose age and sex distribution were identical. After descriptive statistics accordingly, 80 migraine (18.2%) and 23 TTH (6.4%) patients were found to have specific atopic disorders (P < 0.001). Atopic disorders are more commonly reported in patients with migraine with aura (21.6%) than those with migraine without aura and TTH (P < 0.001). The most common atopic disorders were seasonal rhinitis, conjunctivitis and asthma. There was also a close correlation between TTH with atopic disorders and psychiatric comorbid disorders of the patients.

Conclusions

Although the International Classification of Headache Disorders, 2nd version, does not specify, atopic disorders should be suspected in all migraine patients and their relatives, not only for accurate diagnosis but also for planning prophylactic medications, such as β-blockers.",2014-10-15 +26540723,Inference With Collaborative Model for Interactive Tumor Segmentation in Medical Image Sequences.,"Segmenting organisms or tumors from medical data (e.g., computed tomography volumetric images, ultrasound, or magnetic resonance imaging images/image sequences) is one of the fundamental tasks in medical image analysis and diagnosis, and has received long-term attentions. This paper studies a novel computational framework of interactive segmentation for extracting liver tumors from image sequences, and it is suitable for different types of medical data. The main contributions are twofold. First, we propose a collaborative model to jointly formulate the tumor segmentation from two aspects: 1) region partition and 2) boundary presence. The two terms are complementary but simultaneously competing: the former extracts the tumor based on its appearance/texture information, while the latter searches for the palpable tumor boundary. Moreover, in order to adapt the data variations, we allow the model to be discriminatively trained based on both the seed pixels traced by the Lucas-Kanade algorithm and the scribbles placed by the user. Second, we present an effective inference algorithm that iterates to: 1) solve tumor segmentation using the augmented Lagrangian method and 2) propagate the segmentation across the image sequence by searching for distinctive matches between images. We keep the collaborative model updated during the inference in order to well capture the tumor variations over time. We have verified our system for segmenting liver tumors from a number of clinical data, and have achieved very promising results. The software developed with this paper can be found at http://vision.sysu.edu.cn/projects/med-interactive-seg/.",2015-10-29 +23452239,Establishment of the Lotus japonicus Gene Expression Atlas (LjGEA) and its use to explore legume seed maturation.,"Lotus japonicus is a model species for legume genomics. To accelerate legume functional genomics, we developed a Lotus japonicus Gene Expression Atlas (LjGEA), which provides a global view of gene expression in all organ systems of this species, including roots, nodules, stems, petioles, leaves, flowers, pods and seeds. Time-series data covering multiple stages of developing pod and seed are included in the LjGEA. In addition, previously published L. japonicus Affymetrix data are included in the database, making it a 'one-stop shop' for transcriptome analysis of this species. The LjGEA web server (http://ljgea.noble.org/) enables flexible, multi-faceted analyses of the transcriptome. Transcript data may be accessed using the Affymetrix probe identification number, DNA sequence, gene name, functional description in natural language, and GO and KEGG annotation terms. Genes may be discovered through co-expression or differential expression analysis. Users may select a subset of experiments and visualize and compare expression profiles of multiple genes simultaneously. Data may be downloaded in a tabular form compatible with common analytical and visualization software. To illustrate the power of LjGEA, we explored the transcriptome of developing seeds. Genes represented by 36 474 probe sets were expressed at some stage during seed development, and almost half of these genes displayed differential expression during development. Among the latter were 624 transcription factor genes, some of which are orthologs of transcription factor genes that are known to regulate seed development in other species, while most are novel and represent attractive targets for reverse genetics approaches to determine their roles in this important organ.",2013-03-04 +29479215,Assessment of WINROP algorithm as screening tool for preterm infants in Manitoba to detect retinopathy of prematurity.,"

Objective

Developing less invasive methods for early detection of retinopathy of prematurity (ROP) is vital to minimizing blindness in premature infants. Lofqvist and colleagues developed a computer-based ROP risk algorithm (WINROP) (https://winrop.com), which detects downtrends in postnatal weight gain that correlate with the development of sight-threatening ROP. The aim of this study is to investigate the sensitivity and specificity of the WINROP algorithm to detect vision-threatening ROP.

Methods

This is a retrospective chart review study between January 2008 and December 2013. This study was conducted in the neonatal intensive care unit in Children's Hospital at Health Sciences Centre, Winnipeg, Manitoba, Canada. The study included preterm infants, less than 32 weeks' gestation, who were admitted to the hospital during the study period. The included 215 infants were eligible for ROP screening and had sufficient data to be entered into the WINROP algorithm. Infants were screened by a paediatric ophthalmologist for retinopathy of prematurity. The body weight of infants was measured weekly and entered into the WINROP algorithm; the sensitivity and the specificity of the WINROP algorithm were assessed.

Results

The mean gestational age was 28.6 ± 1.8 weeks. The mean body weight was 1244 ± 294 g. The sensitivity of the WINROP algorithm to detect vision-threatening retinopathy of prematurity in our cohort was 90% (P=0.021) with a specificity of 60% (P=0.002).

Conclusion

The WINROP algorithm lacks sufficient sensitivity to be used clinically in our population. The algorithm needs to be reassessed in contemporary populations.",2017-05-08 +27522608,HOMCOS: an updated server to search and model complex 3D structures.,"The HOMCOS server ( http://homcos.pdbj.org ) was updated for both searching and modeling the 3D complexes for all molecules in the PDB. As compared to the previous HOMCOS server, the current server targets all of the molecules in the PDB including proteins, nucleic acids, small compounds and metal ions. Their binding relationships are stored in the database. Five services are available for users. For the services ""Modeling a Homo Protein Multimer"" and ""Modeling a Hetero Protein Multimer"", a user can input one or two proteins as the queries, while for the service ""Protein-Compound Complex"", a user can input one chemical compound and one protein. The server searches similar molecules by BLAST and KCOMBU. Based on each similar complex found, a simple sequence-replaced model is quickly generated by replacing the residue names and numbers with those of the query protein. A target compound is flexibly superimposed onto the template compound using the program fkcombu. If monomeric 3D structures are input as the query, then template-based docking can be performed. For the service ""Searching Contact Molecules for a Query Protein"", a user inputs one protein sequence as the query, and then the server searches for its homologous proteins in PDB and summarizes their contacting molecules as the predicted contacting molecules. The results are summarized in ""Summary Bars"" or ""Site Table""display. The latter shows the results as a one-site-one-row table, which is useful for annotating the effects of mutations. The service ""Searching Contact Molecules for a Query Compound"" is also available.",2016-08-13 +27522084,Extensive complementarity between gene function prediction methods.,"

Motivation

The number of sequenced genomes rises steadily but we still lack the knowledge about the biological roles of many genes. Automated function prediction (AFP) is thus a necessity. We hypothesized that AFP approaches that draw on distinct genome features may be useful for predicting different types of gene functions, motivating a systematic analysis of the benefits gained by obtaining and integrating such predictions.

Results

Our pipeline amalgamates 5 133 543 genes from 2071 genomes in a single massive analysis that evaluates five established genomic AFP methodologies. While 1227 Gene Ontology (GO) terms yielded reliable predictions, the majority of these functions were accessible to only one or two of the methods. Moreover, different methods tend to assign a GO term to non-overlapping sets of genes. Thus, inferences made by diverse genomic AFP methods display a striking complementary, both gene-wise and function-wise. Because of this, a viable integration strategy is to rely on a single most-confident prediction per gene/function, rather than enforcing agreement across multiple AFP methods. Using an information-theoretic approach, we estimate that current databases contain 29.2 bits/gene of known Escherichia coli gene functions. This can be increased by up to 5.5 bits/gene using individual AFP methods or by 11 additional bits/gene upon integration, thereby providing a highly-ranking predictor on the Critical Assessment of Function Annotation 2 community benchmark. Availability of more sequenced genomes boosts the predictive accuracy of AFP approaches and also the benefit from integrating them.

Availability and implementation

The individual and integrated GO predictions for the complete set of genes are available from http://gorbi.irb.hr/ CONTACT: fran.supek@irb.hrSupplementary information: Supplementary materials are available at Bioinformatics online.",2016-08-13 +26072479,A hierarchical Bayesian model for flexible module discovery in three-way time-series data.,"

Motivation

Detecting modules of co-ordinated activity is fundamental in the analysis of large biological studies. For two-dimensional data (e.g. genes × patients), this is often done via clustering or biclustering. More recently, studies monitoring patients over time have added another dimension. Analysis is much more challenging in this case, especially when time measurements are not synchronized. New methods that can analyze three-way data are thus needed.

Results

We present a new algorithm for finding coherent and flexible modules in three-way data. Our method can identify both core modules that appear in multiple patients and patient-specific augmentations of these core modules that contain additional genes. Our algorithm is based on a hierarchical Bayesian data model and Gibbs sampling. The algorithm outperforms extant methods on simulated and on real data. The method successfully dissected key components of septic shock response from time series measurements of gene expression. Detected patient-specific module augmentations were informative for disease outcome. In analyzing brain functional magnetic resonance imaging time series of subjects at rest, it detected the pertinent brain regions involved.

Availability and implementation

R code and data are available at http://acgt.cs.tau.ac.il/twigs/.",2015-06-01 +22102885,"Novel SSR markers from BAC-end sequences, DArT arrays and a comprehensive genetic map with 1,291 marker loci for chickpea (Cicer arietinum L.).","Chickpea (Cicer arietinum L.) is the third most important cool season food legume, cultivated in arid and semi-arid regions of the world. The goal of this study was to develop novel molecular markers such as microsatellite or simple sequence repeat (SSR) markers from bacterial artificial chromosome (BAC)-end sequences (BESs) and diversity arrays technology (DArT) markers, and to construct a high-density genetic map based on recombinant inbred line (RIL) population ICC 4958 (C. arietinum)×PI 489777 (C. reticulatum). A BAC-library comprising 55,680 clones was constructed and 46,270 BESs were generated. Mining of these BESs provided 6,845 SSRs, and primer pairs were designed for 1,344 SSRs. In parallel, DArT arrays with ca. 15,000 clones were developed, and 5,397 clones were found polymorphic among 94 genotypes tested. Screening of newly developed BES-SSR markers and DArT arrays on the parental genotypes of the RIL mapping population showed polymorphism with 253 BES-SSR markers and 675 DArT markers. Segregation data obtained for these polymorphic markers and 494 markers data compiled from published reports or collaborators were used for constructing the genetic map. As a result, a comprehensive genetic map comprising 1,291 markers on eight linkage groups (LGs) spanning a total of 845.56 cM distance was developed (http://cmap.icrisat.ac.in/cmap/sm/cp/thudi/). The number of markers per linkage group ranged from 68 (LG 8) to 218 (LG 3) with an average inter-marker distance of 0.65 cM. While the developed resource of molecular markers will be useful for genetic diversity, genetic mapping and molecular breeding applications, the comprehensive genetic map with integrated BES-SSR markers will facilitate its anchoring to the physical map (under construction) to accelerate map-based cloning of genes in chickpea and comparative genome evolution studies in legumes.",2011-11-15 +28231383,The pangenome of hexaploid bread wheat.,"There is an increasing understanding that variation in gene presence-absence plays an important role in the heritability of agronomic traits; however, there have been relatively few studies on variation in gene presence-absence in crop species. Hexaploid wheat is one of the most important food crops in the world and intensive breeding has reduced the genetic diversity of elite cultivars. Major efforts have produced draft genome assemblies for the cultivar Chinese Spring, but it is unknown how well this represents the genome diversity found in current modern elite cultivars. In this study we build an improved reference for Chinese Spring and explore gene diversity across 18 wheat cultivars. We predict a pangenome size of 140 500 ± 102 genes, a core genome of 81 070 ± 1631 genes and an average of 128 656 genes in each cultivar. Functional annotation of the variable gene set suggests that it is enriched for genes that may be associated with important agronomic traits. In addition to variation in gene presence, more than 36 million intervarietal single nucleotide polymorphisms were identified across the pangenome. This study of the wheat pangenome provides insight into genome diversity in elite wheat as a basis for genomics-based improvement of this important crop. A wheat pangenome, GBrowse, is available at http://appliedbioinformatics.com.au/cgi-bin/gb2/gbrowse/WheatPan/, and data are available to download from http://wheatgenome.info/wheat_genome_databases.php.",2017-04-05 +28465078,Joint involvement in systemic lupus erythematosus: From pathogenesis to clinical assessment.,"

Objective

In the present review, the different phenotypes, clinimetric and imaging tools able to assess joint involvement in patients affected by Systemic Lupus Erythematosus (SLE) have been described and summarized. Furthermore, the current knowledge about the pathogenic mechanism and the potential biomarkers of this feature is reported.

Methods

A literature search was done in PubMed, accessed via the National Library of Medicine PubMed interface (http://www.ncbi.nlm.nih.gov/pubmed). Firstly, PubMed was searched using the term ""systemic lupus erythematosus"" OR ""lupus"" in combination with (AND) ""joint"" OR ""articular"".Secondly, the same PubMed research was combined with other terms, such as ""pathogenesis"" OR ""genetic"" OR ""antibodies"" OR ""biomarkers"" OR ""cytokines"" OR ""imaging"" OR ""ultrasonography"" OR ""magnetic resonance"" OR ""clinimetry"".

Results

After a stringent selection, we evaluated in the present review 13 papers concerning clinical phenotypes of SLE joint involvement, 14 concerning clinimetric assessment, 20 concerning imaging, and finally, 28 concerning pathogenesis and biomarkers. Further relevant data were obtained from the reference lists of articles returned using these search terms and from authors own experience and knowledge of the literature.

Conclusion

Despite the prevalence and severity of SLE joint involvement, more awareness and a deeper evaluation of the clinical heterogeneity of this manifestation are mandatory. Moreover, longitudinal studies are needed to assess the progression of this manifestation and to provide standard definitions and examination/recording protocols.",2017-04-04 +26139633,A multivariate Bernoulli model to predict DNaseI hypersensitivity status from haplotype data.,"

Motivation

Haplotype models enjoy a wide range of applications in population inference and disease gene discovery. The hidden Markov models traditionally used for haplotypes are hindered by the dubious assumption that dependencies occur only between consecutive pairs of variants. In this article, we apply the multivariate Bernoulli (MVB) distribution to model haplotype data. The MVB distribution relies on interactions among all sets of variants, thus allowing for the detection and exploitation of long-range and higher-order interactions. We discuss penalized estimation and present an efficient algorithm for fitting sparse versions of the MVB distribution to haplotype data. Finally, we showcase the benefits of the MVB model in predicting DNaseI hypersensitivity (DH) status--an epigenetic mark describing chromatin accessibility--from population-scale haplotype data.

Results

We fit the MVB model to real data from 59 individuals on whom both haplotypes and DH status in lymphoblastoid cell lines are publicly available. The model allows prediction of DH status from genetic data (prediction R2=0.12 in cross-validations). Comparisons of prediction under the MVB model with prediction under linear regression (best linear unbiased prediction) and logistic regression demonstrate that the MVB model achieves about 10% higher prediction R2 than the two competing methods in empirical data.

Availability and implementation

Software implementing the method described can be downloaded at http://bogdan.bioinformatics.ucla.edu/software/.

Contact

shihuwenbo@ucla.edu or pasaniuc@ucla.edu.",2015-07-02 +27285588,Effects of Ambient Air Pollution Exposure on Olfaction: A Review.,"

Background

Olfactory dysfunction affects millions of people worldwide. This sensory impairment is associated with neurodegenerative disease and significantly decreased quality of life. Exposure to airborne pollutants has been implicated in olfactory decline, likely due to the anatomic susceptibility of the olfactory nerve to the environment. Historically, studies have focused on occupational exposures, but more recent studies have considered effects from exposure to ambient air pollutants.

Objectives

To examine all relevant human data evaluating a link between ambient pollution exposure and olfaction and to review supporting animal data in order to examine potential mechanisms for pollution-associated olfactory loss.

Methods

We identified and reviewed relevant articles from 1950 to 2015 using PubMed and Web of Science and focusing on human epidemiologic and pathophysiologic studies. Animal studies were included only to support pertinent data on humans. We reviewed findings from these studies evaluating a relationship between environmental pollutant exposure and olfactory function.

Results

We identified and reviewed 17 articles, with 1 additional article added from a bibliography search, for a total of 18 human studies. There is evidence in human epidemiologic and pathologic studies that increased exposure to ambient air pollutants is associated with olfactory dysfunction. However, most studies have used proxies for pollution exposure in small samples of convenience. Human pathologic studies, with supporting animal work, have also shown that air pollution can contact the olfactory epithelium, translocate to the olfactory bulb, and migrate to the olfactory cortex. Pollutants can deposit at each location, causing direct damage and disruption of tissue morphology or inducing local inflammation and cellular stress responses.

Conclusions

Ambient air pollution may impact human olfactory function. Additional studies are needed to examine air pollution-related olfactory impacts on the general population using measured pollution exposures and to link pollution exposure with olfactory dysfunction and related pathology. Citation: Ajmani GS, Suh HH, Pinto JM. 2016. Effects of ambient air pollution exposure on olfaction: a review. Environ Health Perspect 124:1683-1693; http://dx.doi.org/10.1289/EHP136.",2016-06-10 +29026509,Development and evaluation of a speech-generating AAC mobile app for minimally verbal children with autism spectrum disorder in Mainland China.,"

Background

Mobile touchscreen devices are currently being used as speech-generating devices (SGDs) and have been shown to promote the communication skills, particularly the requesting skills of children with autism spectrum disorders (ASD) who have limited spoken language. However, no augmentative and alternative communication (AAC) mobile app has been developed and evaluated in the Chinese language in Mainland China.

Methods

We developed an AAC mobile app, which is the first in Mainland China, to our knowledge, named Yuudee (Chinese name (xiaoyudi)). Yuudee was developed using the Objective-C and Java programming languages. A five-phase training protocol for making requests using Yuudee was developed based on the Picture Exchange Communication System. We trained ten minimally verbal children with ASD to make requests using Yuudee and evaluated the effectiveness of the training.

Results

Yuudee has a built-in library of over 400 pictures with corresponding spoken phrases that are divided into 39 categories ranging from making simple requests to expressing emotions. An additional important feature of Yuudee is its customization functions that allow a parent or trainer to easily select pictures and phrases to display, create new pictures and phrases, and change the layouts and orders of the pictures to fit the personal needs of each child. Yuudee is freely available in an iOS version from the iTunes App Store (https://itunes.apple.com/cn/app/xiao-yu-di/id794832934?mt=8) and in an Android version from Google Play (https://play.google.com/store/apps/details?id=com.supersuperstar.yuudee.vue) and domestic Chinese Android App stores. Three consecutive unprompted successful responses, which were defined as an initial training success, were achieved in at least three of the five phases for all ten of the evaluated children. The accuracy rate of a given phase was calculated for each child who achieved three consecutive unprompted successful responses in the phase. Seven children achieved at least 50% accuracy in at least two of the five phases. The other three children achieved at least 50% accuracy in only one phase. Two children achieved at least 50% accuracy in all of the phases in which they were trained.

Conclusions

Our data suggest that Yuudee is a useful tool for helping minimally verbal children with ASD make requests.",2017-10-03 +28885978,In Vitro Evaluation of Mitochondrial Function and Estrogen Signaling in Cell Lines Exposed to the Antiseptic Cetylpyridinium Chloride.,"

Background

Quaternary ammonium salts (QUATS), such as cetylpyridinium chloride (CPC) and benzalkonium chloride (BAK), are frequently used in antiseptic formulations, including toothpastes, mouthwashes, lozenges, throat and nasal sprays, and as biocides. Although in a recent ruling, the U.S. Food and Drug Administration (FDA) banned CPC from certain products and requested more data on BAK's efficacy and safety profile, QUATS, in general, and CPC and BAK, in particular, continue to be used in personal health care, food, and pharmaceutical and cleaning industries.

Objectives

We aimed to assess CPC's effects on mitochondrial toxicity and endocrine disruption in vitro.

Method

Mitochondrial O2 consumption and adenosine triphosphate (ATP) synthesis rates of osteosarcoma cybrid cells were measured before and after CPC and BAK treatment. Antiestrogenic effects of the compounds were measured by a luciferase-based assay using recombinant human breast carcinoma cells (VM7Luc4E2, ERalpha-positive).

Results

CPC inhibited both mitochondrial O2 consumption [half maximal inhibitory concentration (IC50): 3.8μM] and ATP synthesis (IC50: 0.9μM), and additional findings supported inhibition of mitochondrial complex 1 as the underlying mechanism for these effects. In addition, CPC showed concentration-dependent antiestrogenic activity half maximal effective concentration [(EC50): 4.5μM)]. BAK, another antimicrobial QUATS that is structurally similar to CPC, and the pesticide rotenone, a known complex 1 inhibitor, also showed mitochondrial inhibitory and antiestrogenic effects. In all three cases, there was overlap of the antiestrogenic activity with the mitochondrial inhibitory activity.

Conclusions

Mitochondrial inhibition in vitro occurred at a CPC concentration that may be relevant to human exposures. The antiestrogenic activity of CPC, BAK, rotenone, and triclosan may be related to their mitochondrial inhibitory activity. Our findings support the need for additional research on the mitochondrial inhibitory and antiestrogenic effects of QUATS, including CPC and BAK. https://doi.org/10.1289/EHP1404.",2017-08-22 +28598584,CaMELS: In silico prediction of calmodulin binding proteins and their binding sites.,"Due to Ca2+ -dependent binding and the sequence diversity of Calmodulin (CaM) binding proteins, identifying CaM interactions and binding sites in the wet-lab is tedious and costly. Therefore, computational methods for this purpose are crucial to the design of such wet-lab experiments. We present an algorithm suite called CaMELS (CalModulin intEraction Learning System) for predicting proteins that interact with CaM as well as their binding sites using sequence information alone. CaMELS offers state of the art accuracy for both CaM interaction and binding site prediction and can aid biologists in studying CaM binding proteins. For CaM interaction prediction, CaMELS uses protein sequence features coupled with a large-margin classifier. CaMELS models the binding site prediction problem using multiple instance machine learning with a custom optimization algorithm which allows more effective learning over imprecisely annotated CaM-binding sites during training. CaMELS has been extensively benchmarked using a variety of data sets, mutagenic studies, proteome-wide Gene Ontology enrichment analyses and protein structures. Our experiments indicate that CaMELS outperforms simple motif-based search and other existing methods for interaction and binding site prediction. We have also found that the whole sequence of a protein, rather than just its binding site, is important for predicting its interaction with CaM. Using the machine learning model in CaMELS, we have identified important features of protein sequences for CaM interaction prediction as well as characteristic amino acid sub-sequences and their relative position for identifying CaM binding sites. Python code for training and evaluating CaMELS together with a webserver implementation is available at the URL: http://faculty.pieas.edu.pk/fayyaz/software.html#camels.",2017-07-03 +24044748,Noncontiguous atom matching structural similarity function.,"Measuring similarity between molecules is a fundamental problem in cheminformatics. Given that similar molecules tend to have similar physical, chemical, and biological properties, the notion of molecular similarity plays an important role in the exploration of molecular data sets, query-retrieval in molecular databases, and in structure-property/activity modeling. Various methods to define structural similarity between molecules are available in the literature, but so far none has been used with consistent and reliable results for all situations. We propose a new similarity method based on atom alignment for the analysis of structural similarity between molecules. This method is based on the comparison of the bonding profiles of atoms on comparable molecules, including features that are seldom found in other structural or graph matching approaches like chirality or double bond stereoisomerism. The similarity measure is then defined on the annotated molecular graph, based on an iterative directed graph similarity procedure and optimal atom alignment between atoms using a pairwise matching algorithm. With the proposed approach the similarities detected are more intuitively understood because similar atoms in the molecules are explicitly shown. This noncontiguous atom matching structural similarity method (NAMS) was tested and compared with one of the most widely used similarity methods (fingerprint-based similarity) using three difficult data sets with different characteristics. Despite having a higher computational cost, the method performed well being able to distinguish either different or very similar hydrocarbons that were indistinguishable using a fingerprint-based approach. NAMS also verified the similarity principle using a data set of structurally similar steroids with differences in the binding affinity to the corticosteroid binding globulin receptor by showing that pairs of steroids with a high degree of similarity (>80%) tend to have smaller differences in the absolute value of binding activity. Using a highly diverse set of compounds with information about the monoamine oxidase inhibition level, the method was also able to recover a significantly higher average fraction of active compounds when the seed is active for different cutoff threshold values of similarity. Particularly, for the cutoff threshold values of 86%, 93%, and 96.5%, NAMS was able to recover a fraction of actives of 0.57, 0.63, and 0.83, respectively, while the fingerprint-based approach was able to recover a fraction of actives of 0.41, 0.40, and 0.39, respectively. NAMS is made available freely for the whole community in a simple Web based tool as well as the Python source code at http://nams.lasige.di.fc.ul.pt/.",2013-10-08 +21460088,Complete genome sequence of the Thermophilic Bacterium Exiguobacterium sp. AT1b.,"Here we present the genome of strain Exiguobacterium sp. AT1b, a thermophilic member of the genus Exiguobacterium whose representatives were isolated from various environments along a thermal and physicochemical gradient. This genome was sequenced to be a comparative resource for the study of thermal adaptation with a psychroactive representative of the genus, Exiguobacterium sibiricum strain 255-15, that was previously sequenced by the U.S. Department of Energy's (DOE's) Joint Genome Institute (JGI) (http://genome.ornl.gov/microbial/exig/).",2011-04-01 +27515825,Onco-Regulon: an integrated database and software suite for site specific targeting of transcription factors of cancer genes. ,"Transcription factors (TFs) bind at multiple sites in the genome and regulate expression of many genes. Regulating TF binding in a gene specific manner remains a formidable challenge in drug discovery because the same binding motif may be present at multiple locations in the genome. Here, we present Onco-Regulon (http://www.scfbio-iitd.res.in/software/onco/NavSite/index.htm), an integrated database of regulatory motifs of cancer genes clubbed with Unique Sequence-Predictor (USP) a software suite that identifies unique sequences for each of these regulatory DNA motifs at the specified position in the genome. USP works by extending a given DNA motif, in 5'→3', 3' →5' or both directions by adding one nucleotide at each step, and calculates the frequency of each extended motif in the genome by Frequency Counter programme. This step is iterated till the frequency of the extended motif becomes unity in the genome. Thus, for each given motif, we get three possible unique sequences. Closest Sequence Finder program predicts off-target drug binding in the genome. Inclusion of DNA-Protein structural information further makes Onco-Regulon a highly informative repository for gene specific drug development. We believe that Onco-Regulon will help researchers to design drugs which will bind to an exclusive site in the genome with no off-target effects, theoretically.Database URL: http://www.scfbio-iitd.res.in/software/onco/NavSite/index.htm.",2016-08-10 +23193287,GenBank.,"GenBank® (http://www.ncbi.nlm.nih.gov) is a comprehensive database that contains publicly available nucleotide sequences for almost 260 000 formally described species. These sequences are obtained primarily through submissions from individual laboratories and batch submissions from large-scale sequencing projects, including whole-genome shotgun (WGS) and environmental sampling projects. Most submissions are made using the web-based BankIt or standalone Sequin programs, and GenBank staff assigns accession numbers upon data receipt. Daily data exchange with the European Nucleotide Archive (ENA) and the DNA Data Bank of Japan (DDBJ) ensures worldwide coverage. GenBank is accessible through the NCBI Entrez retrieval system, which integrates data from the major DNA and protein sequence databases along with taxonomy, genome, mapping, protein structure and domain information, and the biomedical journal literature via PubMed. BLAST provides sequence similarity searches of GenBank and other sequence databases. Complete bimonthly releases and daily updates of the GenBank database are available by FTP. To access GenBank and its related retrieval and analysis services, begin at the NCBI home page: www.ncbi.nlm.nih.gov.",2012-11-27 +27153644,OREMPRO web server: orientation and assessment of atomistic and coarse-grained structures of membrane proteins.,"

Unlabelled

: The experimental determination of membrane protein orientation within the lipid bilayer is extremely challenging, such that computational methods are most often the only solution. Moreover, obtaining all-atom 3D structures of membrane proteins is also technically difficult, and many of the available data are either experimental low-resolution structures or theoretical models, whose structural quality needs to be evaluated. Here, to address these two crucial problems, we propose OREMPRO, a web server capable of both (i) positioning α-helical and β-sheet transmembrane domains in the lipid bilayer and (ii) assessing their structural quality. Most importantly, OREMPRO uses the sole alpha carbon coordinates, which makes it the only web server compatible with both high and low structural resolutions. Finally, OREMPRO is also interesting in its ability to process coarse-grained protein models, by using coordinates of backbone beads in place of alpha carbons.

Availability and implementation

http://www.dsimb.inserm.fr/OREMPRO/ CONTACT: : guillaume.postic@univ-paris-diderot.fr or jean-christophe.gelly@univ-paris-diderot.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-19 +28881980,Alignment of dynamic networks.,"

Motivation

Network alignment (NA) aims to find a node mapping that conserves similar regions between compared networks. NA is applicable to many fields, including computational biology, where NA can guide the transfer of biological knowledge from well- to poorly-studied species across aligned network regions. Existing NA methods can only align static networks. However, most complex real-world systems evolve over time and should thus be modeled as dynamic networks. We hypothesize that aligning dynamic network representations of evolving systems will produce superior alignments compared to aligning the systems' static network representations, as is currently done.

Results

For this purpose, we introduce the first ever dynamic NA method, DynaMAGNA ++. This proof-of-concept dynamic NA method is an extension of a state-of-the-art static NA method, MAGNA++. Even though both MAGNA++ and DynaMAGNA++ optimize edge as well as node conservation across the aligned networks, MAGNA++ conserves static edges and similarity between static node neighborhoods, while DynaMAGNA++ conserves dynamic edges (events) and similarity between evolving node neighborhoods. For this purpose, we introduce the first ever measure of dynamic edge conservation and rely on our recent measure of dynamic node conservation. Importantly, the two dynamic conservation measures can be optimized with any state-of-the-art NA method and not just MAGNA++. We confirm our hypothesis that dynamic NA is superior to static NA, on synthetic and real-world networks, in computational biology and social domains. DynaMAGNA++ is parallelized and has a user-friendly graphical interface.

Availability and implementation

http://nd.edu/∼cone/DynaMAGNA++/ .

Contact

tmilenko@nd.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +25748005,Safety Profile Assessment: An online tool to gauge safety-critical performance in radiation oncology.,"

Purpose

It is challenging for the radiation oncology practitioner to manage and implement the plethora of recently generated recommendations on quality and safety improvement. The online Safety Profile Assessment (SPA) tool uses an easy-to-use question-and-answer format to assess safety/quality within a clinic, provide a way to benchmark against peers, and facilitate improvement. This report describes the design and development of the SPA and experience from the first year of use.

Methods

Performance indicators for the SPA were derived from 4 foundations: the Agency for Healthcare Research and Quality, a review of 7 recent authoritative documents specific to radiation oncology, a recent American Association of Physicists in Medicine report on incident learning, and the American College of Radiology-American Society for Radiation Oncology accreditation system as of 2011. After pilot testing, the free-access tool was launched through the American Association of Physicists in Medicine website (http://spa.aapm.org) in July 2013. Questionnaire data were collected to assess the experience of users.

Results

The SPA tool consists of 92 indicators designed to probe safety and quality. A clinic's performance is benchmarked against all other responses in the database, and aided by a downloadable log, quality/safety improvement strategies can be developed and tracked over time. At the time this paper was written, 279 individuals had registered, and 107 had completed the SPA. On average, the SPA required 1.3 hours to complete. The majority of respondents to the questionnaire (56%) completed the SPA with a multidisciplinary group of 4 people on average. Respondents noted that the SPA was easy or very easy to use (70%) and that they would definitely or very probably complete it again (63%).

Conclusions

SPA provides a straightforward means of gauging a clinic's performance in key safety-critical areas and has been evaluated favorably by the first cohort of users. The tool has been qualified by the American Board of Radiology (ABR) as meeting the criteria for Practice Quality Improvement requirements of the ABR Maintenance of Certification Program.",2014-11-03 +26209432,A mutation profile for top-k patient search exploiting Gene-Ontology and orthogonal non-negative matrix factorization.,"

Motivation

As the quantity of genomic mutation data increases, the likelihood of finding patients with similar genomic profiles, for various disease inferences, increases. However, so does the difficulty in identifying them. Similarity search based on patient mutation profiles can solve various translational bioinformatics tasks, including prognostics and treatment efficacy predictions for better clinical decision making through large volume of data. However, this is a challenging problem due to heterogeneous and sparse characteristics of the mutation data as well as their high dimensionality.

Results

To solve this problem we introduce a compact representation and search strategy based on Gene-Ontology and orthogonal non-negative matrix factorization. Statistical significance between the identified cancer subtypes and their clinical features are computed for validation; results show that our method can identify and characterize clinically meaningful tumor subtypes comparable or better in most datasets than the recently introduced Network-Based Stratification method while enabling real-time search. To the best of our knowledge, this is the first attempt to simultaneously characterize and represent somatic mutational data for efficient search purposes.

Availability

The implementations are available at: https://sites.google.com/site/postechdm/research/implementation/orgos.

Contact

sael@cs.stonybrook.edu or hwanjoyu@postech.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-23 +26130741,Cohort Profile Update: Australian Longitudinal Study on Women's Health.,"In 1996 the Australian Longitudinal Study on Women's Health recruited a nationally representative sample of more than 40,000 women in three age cohorts, born in 1973-78, 1946-51 and 1921-26. At least six waves of 3-yearly surveys have been completed. Although the focus remains on factors affecting the health and well-being of women and their access to and use of health services across urban, rural and remote areas of Australia, the study has now been considerably expanded by linkage to other health data sets. For most women who have ever participated in the study, linked records are now available for: government-subsidized non-hospital services (e.g. all general practitioner visits); pharmaceutical prescriptions filled; national death index, including codes for multiple causes of death; aged care assessments and services; cancer registries; and, for most states and territories, hospital admissions and perinatal data. Additionally, a large cohort of women born in 1989-95 have been recruited. The data are available to approved collaborators, with more than 780 researchers using the data so far. Full details of the study materials and data access procedures are available at [http://www.alswh.org.au/].",2015-06-30 +28881972,Abundance estimation and differential testing on strain level in metagenomics data.,"

Motivation

Current metagenomics approaches allow analyzing the composition of microbial communities at high resolution. Important changes to the composition are known to even occur on strain level and to go hand in hand with changes in disease or ecological state. However, specific challenges arise for strain level analysis due to highly similar genome sequences present. Only a limited number of tools approach taxa abundance estimation beyond species level and there is a strong need for dedicated tools for strain resolution and differential abundance testing.

Methods

We present DiTASiC ( fferential axa bundance including milarity orrection) as a novel approach for quantification and differential assessment of individual taxa in metagenomics samples. We introduce a generalized linear model for the resolution of shared read counts which cause a significant bias on strain level. Further, we capture abundance estimation uncertainties, which play a crucial role in differential abundance analysis. A novel statistical framework is built, which integrates the abundance variance and infers abundance distributions for differential testing sensitive to strain level.

Results

As a result, we obtain highly accurate abundance estimates down to sub-strain level and enable fine-grained resolution of strain clusters. We demonstrate the relevance of read ambiguity resolution and integration of abundance uncertainties for differential analysis. Accurate detections of even small changes are achieved and false-positives are significantly reduced. Superior performance is shown on latest benchmark sets of various complexities and in comparison to existing methods.

Availability and implementation

DiTASiC code is freely available from https://rki_bioinformatics.gitlab.io/ditasic .

Contact

renardB@rki.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +28203713,SANA: simulated annealing far outperforms many other search algorithms for biological network alignment.,"

Summary

Every alignment algorithm consists of two orthogonal components: an objective function M measuring the quality of an alignment, and a search algorithm that explores the space of alignments looking for ones scoring well according to M . We introduce a new search algorithm called SANA (Simulated Annealing Network Aligner) and apply it to protein-protein interaction networks using S 3 as the topological measure. Compared against 12 recent algorithms, SANA produces 5-10 times as many correct node pairings as the others when the correct answer is known. We expose an anti-correlation in many existing aligners between their ability to produce good topological vs. functional similarity scores, whereas SANA usually outscores other methods in both measures. If given the perfect objective function encoding the identity mapping, SANA quickly converges to the perfect solution while many other algorithms falter. We observe that when aligning networks with a known mapping and optimizing only S 3 , SANA creates alignments that are not perfect and yet whose S 3 scores match that of the perfect alignment. We call this phenomenon saturation of the topological score . Saturation implies that a measure's correlation with alignment correctness falters before the perfect alignment is reached. This, combined with SANA's ability to produce the perfect alignment if given the perfect objective function, suggests that better objective functions may lead to dramatically better alignments. We conclude that future work should focus on finding better objective functions, and offer SANA as the search algorithm of choice.

Availability and implementation

Software available at http://sana.ics.uci.edu .

Contact

whayes@uci.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-07-01 +26680279,A new peak detection algorithm for MALDI mass spectrometry data based on a modified Asymmetric Pseudo-Voigt model.,"

Background

Mass Spectrometry (MS) is a ubiquitous analytical tool in biological research and is used to measure the mass-to-charge ratio of bio-molecules. Peak detection is the essential first step in MS data analysis. Precise estimation of peak parameters such as peak summit location and peak area are critical to identify underlying bio-molecules and to estimate their abundances accurately. We propose a new method to detect and quantify peaks in mass spectra. It uses dual-tree complex wavelet transformation along with Stein's unbiased risk estimator for spectra smoothing. Then, a new method, based on the modified Asymmetric Pseudo-Voigt (mAPV) model and hierarchical particle swarm optimization, is used for peak parameter estimation.

Results

Using simulated data, we demonstrated the benefit of using the mAPV model over Gaussian, Lorentz and Bi-Gaussian functions for MS peak modelling. The proposed mAPV model achieved the best fitting accuracy for asymmetric peaks, with lower percentage errors in peak summit location estimation, which were 0.17% to 4.46% less than that of the other models. It also outperformed the other models in peak area estimation, delivering lower percentage errors, which were about 0.7% less than its closest competitor - the Bi-Gaussian model. In addition, using data generated from a MALDI-TOF computer model, we showed that the proposed overall algorithm outperformed the existing methods mainly in terms of sensitivity. It achieved a sensitivity of 85%, compared to 77% and 71% of the two benchmark algorithms, continuous wavelet transformation based method and Cromwell respectively.

Conclusions

The proposed algorithm is particularly useful for peak detection and parameter estimation in MS data with overlapping peak distributions and asymmetric peaks. The algorithm is implemented using MATLAB and the source code is freely available at http://mapv.sourceforge.net.",2015-12-09 +28579402,MIDAS: Mining differentially activated subpaths of KEGG pathways from multi-class RNA-seq data.,"Pathway based analysis of high throughput transcriptome data is a widely used approach to investigate biological mechanisms. Since a pathway consists of multiple functions, the recent approach is to determine condition specific sub-pathways or subpaths. However, there are several challenges. First, few existing methods utilize explicit gene expression information from RNA-seq. More importantly, subpath activity is usually an average of statistical scores, e.g., correlations, of edges in a candidate subpath, which fails to reflect gene expression quantity information. In addition, none of existing methods can handle multiple phenotypes. To address these technical problems, we designed and implemented an algorithm, MIDAS, that determines condition specific subpaths, each of which has different activities across multiple phenotypes. MIDAS utilizes gene expression quantity information fully and the network centrality information to determine condition specific subpaths. To test performance of our tool, we used TCGA breast cancer RNA-seq gene expression profiles with five molecular subtypes. 36 differentially activate subpaths were determined. The utility of our method, MIDAS, was demonstrated in four ways. All 36 subpaths are well supported by the literature information. Subsequently, we showed that these subpaths had a good discriminant power for five cancer subtype classification and also had a prognostic power in terms of survival analysis. Finally, in a performance comparison of MIDAS to a recent subpath prediction method, PATHOME, our method identified more subpaths and much more genes that are well supported by the literature information.

Availability

http://biohealth.snu.ac.kr/software/MIDAS/.",2017-06-01 +26481357,WikiPathways: capturing the full diversity of pathway knowledge.,"WikiPathways (http://www.wikipathways.org) is an open, collaborative platform for capturing and disseminating models of biological pathways for data visualization and analysis. Since our last NAR update, 4 years ago, WikiPathways has experienced massive growth in content, which continues to be contributed by hundreds of individuals each year. New aspects of the diversity and depth of the collected pathways are described from the perspective of researchers interested in using pathway information in their studies. We provide updates on extensions and services to support pathway analysis and visualization via popular standalone tools, i.e. PathVisio and Cytoscape, web applications and common programming environments. We introduce the Quick Edit feature for pathway authors and curators, in addition to new means of publishing pathways and maintaining custom pathway collections to serve specific research topics and communities. In addition to the latest milestones in our pathway collection and curation effort, we also highlight the latest means to access the content as publishable figures, as standard data files, and as linked data, including bulk and programmatic access.",2015-10-19 +28229511,What explains the correlation between growth in vocabulary and grammar? New evidence from latent change score analyses of simultaneous bilingual development. ,"A close relationship between children's vocabulary size and the grammatical complexity of their speech is well attested but not well understood. The present study used latent change score modeling to examine the dynamic relationships between vocabulary and grammar growth within and across languages in longitudinal data from 90 simultaneous Spanish-English bilingual children who were assessed at 6-month intervals between 30 and 48 months. Slopes of vocabulary and grammar growth were strongly correlated within each language and showed moderate or nonsignificant relationships across languages. There was no evidence that vocabulary level predicted subsequent grammar growth or that the level of grammatical development predicted subsequent vocabulary growth. We propose that a common influence of properties of input on vocabulary and grammatical development is the source of their correlated but uncoupled growth. An unanticipated across-language finding was a negative relationship between level of English skill and subsequent Spanish growth. We propose that the cultural context of Spanish-English bilingualism in the US is the reason that strong English skills jeopardize Spanish language growth, while Spanish skills do not affect English growth. A video abstract of this article can be viewed at: https://youtu.be/qEHSQ0yRre0.",2017-02-22 +21519393,Digital atlas of anatomical subdivisions and boundaries of the rat hippocampal region.,"The rat hippocampal region is frequently studied in relation to learning and memory processes and brain diseases. The region is complex, consisting of multiple subdivisions that are challenging to delineate anatomically. Published atlases of the rat brain typically lack the underlying histological criteria necessary to identify boundaries, and textbooks descriptions of the region are often inadequately illustrated and thus difficult to relate to experimental data. An overview of both anatomical features and criteria used to delineate boundaries is required to assign location to experimental material from the hippocampal region. To address this issue, we have developed a web-based atlas application in which images of histological sections are integrated with new and up-to-date criteria for subdividing the rat hippocampus formation, fasciola, and associated parahippocampal regions. The atlas application consists of an interactive image viewer with high-resolution images of an extensive series of sections stained for NeuN, calbindin, and parvalbumin, and an index of structures with detailed descriptions of the criteria used to define the boundaries. Images can be inspected with a graphical overlay of selected subregions. Bi-directional links between images and the index of structures are provided. In summary, we provide a novel content-rich digital atlas resource facilitating identification of morphological features relevant for delineating the anatomical subdivisions of the rat hippocampal region. The atlas application is available at http://www.rbwb.org.",2011-04-08 +28963089,Residential Air Pollution and Associations with Wheeze and Shortness of Breath in Adults: A Combined Analysis of Cross-Sectional Data from Two Large European Cohorts.,"

Background

Research examining associations between air pollution exposure and respiratory symptoms in adults has generally been inconclusive. This may be related in part to sample size issues, which also preclude analysis in potentially vulnerable subgroups.

Objectives

We estimated associations between air pollution exposures and the prevalence of wheeze and shortness of breath using harmonized baseline data from two very large European cohorts, Lifelines (2006-2013) and UK Biobank (2006-2010). Our aim was also to determine whether the relationship between air pollution and respiratory symptom prevalence differed between individuals with different characteristics.

Methods

Cross-sectional analyses explored associations between prevalence of self-reported wheeze and shortness of breath and annual mean particulate matter with aerodynamic diameter <2.5μm, 2.5-10μm, and <10μm (PM2.5, PMcoarse, and PM10, respectively) and nitrogen dioxide (NO2) concentrations at place of residence using logistic regression. Subgroup analyses and tests for interaction were performed for age, sex, smoking status, household income, obesity status, and asthma status.

Results

All PM exposures were associated with respiratory symptoms based on single-pollutant models, with the largest associations seen for PM2.5 with prevalence of wheezing {odds ratio (OR)=1.16 per 5μg/m³ [95% confidence interval (CI): 1.11, 1.21]} and shortness of breath [OR=1.61 per 5μg/m³ (95% CI: 1.45, 1.78)]. The association between shortness of breath and a 5-μg/m³ increment in PM2.5 was significantly higher for individuals from lower-[OR=1.73 (95% CI: 1.52, 1.97)] versus higher-income households [OR=1.31 (95% CI: 1.11, 1.55); p-interaction=0.005), whereas the association between PM2.5 and wheeze was limited to lower-income participants [OR=1.30 (95% CI: 1.22, 1.38) vs. OR=1.02; (95% CI: 0.96, 1.08); p-interaction<0.001]. Exposure to NO2 also showed positive associations with wheeze and shortness of breath.

Conclusion

Exposure to PM and NO2 air pollution was associated with the prevalence of wheeze and shortness of breath in this large study, with stronger associations between PM2.5 and both outcomes among lower- versus higher-income participants. https://doi.org/10.1289/EHP1353.",2017-09-29 +26755623,MTG2: an efficient algorithm for multivariate linear mixed model analysis based on genomic information.,"

Unlabelled

We have developed an algorithm for genetic analysis of complex traits using genome-wide SNPs in a linear mixed model framework. Compared to current standard REML software based on the mixed model equation, our method is substantially faster. The advantage is largest when there is only a single genetic covariance structure. The method is particularly useful for multivariate analysis, including multi-trait models and random regression models for studying reaction norms. We applied our proposed method to publicly available mice and human data and discuss the advantages and limitations.

Availability and implementation

MTG2 is available in https://sites.google.com/site/honglee0707/mtg2 CONTACT: hong.lee@une.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-10 +23537399,Cell death proteomics database: consolidating proteomics data on cell death.,"Programmed cell death is a ubiquitous process of utmost importance for the development and maintenance of multicellular organisms. More than 10 different types of programmed cell death forms have been discovered. Several proteomics analyses have been performed to gain insight in proteins involved in the different forms of programmed cell death. To consolidate these studies, we have developed the cell death proteomics (CDP) database, which comprehends data from apoptosis, autophagy, cytotoxic granule-mediated cell death, excitotoxicity, mitotic catastrophe, paraptosis, pyroptosis, and Wallerian degeneration. The CDP database is available as a web-based database to compare protein identifications and quantitative information across different experimental setups. The proteomics data of 73 publications were integrated and unified with protein annotations from UniProt-KB and gene ontology (GO). Currently, more than 6,500 records of more than 3,700 proteins are included in the CDP. Comparing apoptosis and autophagy using overrepresentation analysis of GO terms, the majority of enriched processes were found in both, but also some clear differences were perceived. Furthermore, the analysis revealed differences and similarities of the proteome between autophagosomal and overall autophagy. The CDP database represents a useful tool to consolidate data from proteome analyses of programmed cell death and is available at http://celldeathproteomics.uio.no.",2013-04-10 +28155685,Sig2GRN: a software tool linking signaling pathway with gene regulatory network for dynamic simulation.,"

Background

Linking computational models of signaling pathways to predicted cellular responses such as gene expression regulation is a major challenge in computational systems biology. In this work, we present Sig2GRN, a Cytoscape plugin that is able to simulate time-course gene expression data given the user-defined external stimuli to the signaling pathways.

Methods

A generalized logical model is used in modeling the upstream signaling pathways. Then a Boolean model and a thermodynamics-based model are employed to predict the downstream changes in gene expression based on the simulated dynamics of transcription factors in signaling pathways.

Results

Our empirical case studies show that the simulation of Sig2GRN can predict changes in gene expression patterns induced by DNA damage signals and drug treatments.

Conclusions

As a software tool for modeling cellular dynamics, Sig2GRN can facilitate studies in systems biology by hypotheses generation and wet-lab experimental design.

Availability

http://histone.scse.ntu.edu.sg/Sig2GRN/.",2016-12-23 +28963088,Traffic-Related Air Pollution and All-Cause Mortality during Tuberculosis Treatment in California.,"

Background

Ambient air pollution and tuberculosis (TB) have an impact on public health worldwide, yet associations between the two remain uncertain.

Objective

We determined the impact of residential traffic on mortality during treatment of active TB.

Methods

From 2000-2012, we enrolled 32,875 patients in California with active TB and followed them throughout treatment. We obtained patient data from the California Tuberculosis Registry and calculated traffic volumes and traffic densities in 100- to 400-m radius buffers around residential addresses. We used Cox models to determine mortality hazard ratios, controlling for demographic, socioeconomic, and clinical potential confounders. We categorized traffic exposures as quintiles and determined trends using Wald tests.

Results

Participants contributed 22,576 person-years at risk. There were 2,305 deaths during treatment for a crude mortality rate of 1,021 deaths per 10,000 person-years. Traffic volumes and traffic densities in all buffers around patient residences were associated with increased mortality during TB treatment, although the findings were not statistically significant in all buffers. As the buffer size decreased, fifth-quintile mortality hazards increased, and trends across quintiles of traffic exposure became more statistically significant. Increasing quintiles of nearest-road traffic volumes in the 100-m buffer were associated with 3%, 14%, 19%, and 28% increased risk of death during TB treatment [first quintile, referent; second quintile hazard ratio (HR)=1.03 [95% confidence interval (CI): 0.86, 1.25]; third quintile HR=1.14 (95% CI: 0.95, 1.37); fourth quintile HR=1.19 (95% CI: 0.99, 1.43); fifth quintile HR=1.28 (95% CI: 1.07, 1.53), respectively; p-trend=0.002].

Conclusions

Residential proximity to road traffic volumes and traffic density were associated with increased all-cause mortality in patients undergoing treatment for active tuberculosis even after adjusting for multiple demographic, socioeconomic, and clinical factors, suggesting that TB patients are susceptible to the adverse health effects of traffic-related air pollution. https://doi.org/10.1289/EHP1699.",2017-09-29 +28219434,When is proton pump inhibitor use appropriate?,"Proton pump inhibitor (PPI) therapy is commonly used outside of Food and Drug Administration indication for a broad range of conditions such as extra-esophageal reflux and PPI-responsive esophageal eosinophilia. While this may be appropriate in some scenarios, it has also resulted in widespread inappropriate PPI use. At the same time, data suggesting adverse effects of long-term PPI therapy are multiplying, albeit mainly from low quality studies. The systematic review by Scarpignato et al. (BMC Med 14:179, 2016) addresses this dilemma with a comprehensive analysis of the risks and benefits of PPI use. The authors concluded that, while PPIs are highly efficacious in erosive acid-peptic disorders, efficacy is not equaled in other conditions. In some instances, they found no supportive evidence of benefit. With respect to side effects, they indicated that the questionable harms associated with PPI therapy do not outweigh the benefits afforded by appropriate PPI use. However, inappropriate PPI use results in increased healthcare costs and unnecessary exposure to potential adverse effects. Ideally, PPI therapy should be personalized, based on indication, effectiveness, patient preference, and risk assessment.Please see related article: http://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-016-0718-z .",2017-02-21 +24665023,TP53 mutations in human cancer: database reassessment and prospects for the next decade.,"More than 50% of human tumors carry TP53 gene mutations and in consequence more than 45,000 somatic and germline mutations have been gathered in the UMD TP53 database (http://p53.fr). Analyses of these mutations have been invaluable for bettering our knowledge on the structure-function relationships within the TP53 protein and the high degree of heterogeneity of the various TP53 mutants in human cancer. In this review, we discuss how with the release of the sequences of thousands of tumor genomes issued from high-throughput sequencing, the description of novel TP53 mutants is now reaching a plateau indicating that we are close to the full set of mutants that target the elusive tumor-suppressive activity of this protein. We performed an extensive and thorough analysis of the TP53 mutation database, focusing particularly on specific sets of mutations that were overlooked in the past because of their low frequencies, for example, synonymous mutations, splice mutations, or mutations-targeting residues subject to posttranslational modifications. We also discuss the evolution of the statistical methods used to differentiate TP53 passenger mutations and artifactual data from true mutations, a process vital to the release of an accurate TP53 mutation database that will in turn be an invaluable tool for both clinicians and researchers.",2014-06-01 +28812812,A Cost Analysis of the Jan Aushadhi Scheme in India.,"Medicines constitute a substantial proportion of out-of-pocket (OOP) expenses in Indian households. In order to address this issue, the Government of India launched the Jan Aushadhi (Medicine for the Masses) Scheme (JAS) to provide cheap generic medicines to the patients (http://janaushadhi.gov.in/about_jan_aushadhi.html). These medicines are provided through the Jan Aushadhi stores established across the country. The objective of this study was to do a quick assessment for policy-makers regarding the objective of the JAS. Implications on cost savings for patients and policy implications of the scheme were analyzed. Secondary data sources were used to obtain prices of medicines under the JAS and prices of branded medicines of the same formulations. A cost analysis design was used. There are substantial differences between the JAS price and the cheapest branded medicine available in the market. However, not all JAS prices are lower than branded medicines. For example, the cheapest branded cefuroxime axetil (500 mg) (antibiotic) in the market is almost three times cheaper than its JAS price. Hence, there are cheaper brands available for some commonly prescribed medicines. From the policy perspective, it raises serious questions regarding the pricing of medicines in the JAS and its overarching goal. Since patients are dependent on physicians for medicine prescriptions and have little knowledge of the price variations among branded and generic medicines, the JAS may not provide the cheapest alternative for the patients. Hence, the government should urgently review the JAS prices to achieve its goal of providing low-cost affordable medicines.",2017-05-01 +22829745,CyanoEXpress: A web database for exploration and visualisation of the integrated transcriptome of cyanobacterium Synechocystis sp. PCC6803.,"

Unlabelled

Synechocystis sp. PCC6803 is one of the best studied cyanobacteria and an important model organism for our understanding of photosynthesis. The early availability of its complete genome sequence initiated numerous transcriptome studies, which have generated a wealth of expression data. Analysis of the accumulated data can be a powerful tool to study transcription in a comprehensive manner and to reveal underlying regulatory mechanisms, as well as to annotate genes whose functions are yet unknown. However, use of divergent microarray platforms, as well as distributed data storage make meta-analyses of Synechocystis expression data highly challenging, especially for researchers with limited bioinformatic expertise and resources. To facilitate utilisation of the accumulated expression data for a wider research community, we have developed CyanoEXpress, a web database for interactive exploration and visualisation of transcriptional response patterns in Synechocystis. CyanoEXpress currently comprises expression data for 3073 genes and 178 environmental and genetic perturbations obtained in 31 independent studies. At present, CyanoEXpress constitutes the most comprehensive collection of expression data available for Synechocystis and can be freely accessed.

Availability

The database is available for free at http://cyanoexpress.sysbiolab.eu.",2012-07-06 +24143170,Can inferred provenance and its visualisation be used to detect erroneous annotation? A case study using UniProtKB.,"A constant influx of new data poses a challenge in keeping the annotation in biological databases current. Most biological databases contain significant quantities of textual annotation, which often contains the richest source of knowledge. Many databases reuse existing knowledge; during the curation process annotations are often propagated between entries. However, this is often not made explicit. Therefore, it can be hard, potentially impossible, for a reader to identify where an annotation originated from. Within this work we attempt to identify annotation provenance and track its subsequent propagation. Specifically, we exploit annotation reuse within the UniProt Knowledgebase (UniProtKB), at the level of individual sentences. We describe a visualisation approach for the provenance and propagation of sentences in UniProtKB which enables a large-scale statistical analysis. Initially levels of sentence reuse within UniProtKB were analysed, showing that reuse is heavily prevalent, which enables the tracking of provenance and propagation. By analysing sentences throughout UniProtKB, a number of interesting propagation patterns were identified, covering over [Formula: see text] sentences. Over [Formula: see text] sentences remain in the database after they have been removed from the entries where they originally occurred. Analysing a subset of these sentences suggest that approximately [Formula: see text] are erroneous, whilst [Formula: see text] appear to be inconsistent. These results suggest that being able to visualise sentence propagation and provenance can aid in the determination of the accuracy and quality of textual annotation. Source code and supplementary data are available from the authors website at http://homepages.cs.ncl.ac.uk/m.j.bell1/sentence_analysis/.",2013-10-15 +26922377,Targeted alignment and end repair elimination increase alignment and methylation measure accuracy for reduced representation bisulfite sequencing data.,"

Background

DNA methylation is an important epigenetic modification involved in many biological processes. Reduced representation bisulfite sequencing (RRBS) is a cost-effective method for studying DNA methylation at single base resolution. Although several tools are available for RRBS data processing and analysis, it is not clear which strategy performs the best and there has not been much attention to the contamination issue from artificial cytosines incorporated during the end repair step of library preparation. To address these issues, we describe a new method, Targeted Alignment and Artificial Cytosine Elimination for RRBS (TRACE-RRBS), which aligns bisulfite sequence reads to MSP1 digitally digested reference and specifically removes the end repair cytosines. We compared this approach on a simulated and a real dataset with 7 other RRBS analysis tools and Illumina 450 K microarray platform.

Results

TRACE-RRBS aligns sequence reads to a small fraction of the genome where RRBS protocol targets on and was demonstrated as the fastest, most sensitive and specific tool for the simulated dataset. For the real dataset, TRACE-RRBS took about the same time as RRBSMAP, a third to a sixth of time needed for BISMARK and NOVOALIGN. TRACE-RRBS aligned more reads uniquely than other tools and achieved the highest correlation with 450 k microarray data. The end repair artificial cytosine removal increased correlation between nearby CpGs and accuracy of methylation quantification.

Conclusions

TRACE-RRBS is fast and more accurate tool for RRBS data analysis. It is freely available for academic use at http://bioinformaticstools.mayo.edu/.",2016-02-27 +23175610,"The mouse genome database: genotypes, phenotypes, and models of human disease.","The laboratory mouse is the premier animal model for studying human biology because all life stages can be accessed experimentally, a completely sequenced reference genome is publicly available and there exists a myriad of genomic tools for comparative and experimental research. In the current era of genome scale, data-driven biomedical research, the integration of genetic, genomic and biological data are essential for realizing the full potential of the mouse as an experimental model. The Mouse Genome Database (MGD; http://www.informatics.jax.org), the community model organism database for the laboratory mouse, is designed to facilitate the use of the laboratory mouse as a model system for understanding human biology and disease. To achieve this goal, MGD integrates genetic and genomic data related to the functional and phenotypic characterization of mouse genes and alleles and serves as a comprehensive catalog for mouse models of human disease. Recent enhancements to MGD include the addition of human ortholog details to mouse Gene Detail pages, the inclusion of microRNA knockouts to MGD's catalog of alleles and phenotypes, the addition of video clips to phenotype images, providing access to genotype and phenotype data associated with quantitative trait loci (QTL) and improvements to the layout and display of Gene Ontology annotations.",2012-11-21 +25794139,CAPER 3.0: A Scalable Cloud-Based System for Data-Intensive Analysis of Chromosome-Centric Human Proteome Project Data Sets.,"The Chromosome-centric Human Proteome Project (C-HPP) aims to catalog genome-encoded proteins using a chromosome-by-chromosome strategy. As the C-HPP proceeds, the increasing requirement for data-intensive analysis of the MS/MS data poses a challenge to the proteomic community, especially small laboratories lacking computational infrastructure. To address this challenge, we have updated the previous CAPER browser into a higher version, CAPER 3.0, which is a scalable cloud-based system for data-intensive analysis of C-HPP data sets. CAPER 3.0 uses cloud computing technology to facilitate MS/MS-based peptide identification. In particular, it can use both public and private cloud, facilitating the analysis of C-HPP data sets. CAPER 3.0 provides a graphical user interface (GUI) to help users transfer data, configure jobs, track progress, and visualize the results comprehensively. These features enable users without programming expertise to easily conduct data-intensive analysis using CAPER 3.0. Here, we illustrate the usage of CAPER 3.0 with four specific mass spectral data-intensive problems: detecting novel peptides, identifying single amino acid variants (SAVs) derived from known missense mutations, identifying sample-specific SAVs, and identifying exon-skipping events. CAPER 3.0 is available at http://prodigy.bprc.ac.cn/caper3.",2015-03-27 +28172415,BASIC: BCR assembly from single cells.,"

Motivation

The B-cell receptor enables individual B cells to identify diverse antigens, including bacterial and viral proteins. While advances in RNA-sequencing (RNA-seq) have enabled high throughput profiling of transcript expression in single cells, the unique task of assembling the full-length heavy and light chain sequences from single cell RNA-seq (scRNA-seq) in B cells has been largely unstudied.

Results

We developed a new software tool, BASIC, which allows investigators to use scRNA-seq for assembling BCR sequences at single-cell resolution. To demonstrate the utility of our software, we subjected nearly 200 single human B cells to scRNA-seq, assembled the full-length heavy and the light chains, and experimentally confirmed these results by using single-cell primer-based nested PCRs and Sanger sequencing.

Availability and implementation

http://ttic.uchicago.edu/∼aakhan/BASIC

Contact

aakhan@ttic.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +28477785,Allergenic extracts to diagnose and treat sensitivity to insect venoms and inhaled allergens.,"

Objective

To review allergenic extracts used to diagnose or treat insect allergies, including how the extracts are manufactured and their measurements of potency or concentration.

Data sources

Peer-reviewed articles derived from searching PubMed (National Center for Biotechnology Information) about insect allergies and extract preparation. Encyclopedia of Life (http://www.eol.org/) and http://allergome.org/ were also referenced for background information on insects and associated allergens.

Study selections

Search terms used for the PubMed searches included insect allergens and allergies, Apidae, Vespidae, fire ants, cockroach allergies, insect allergen extract preparation, and standardization.

Results

Humans may be sensitized to insect allergens by inhalation or through stings. Cockroaches and moths are predominantly responsible for inhalation insect allergy and are a major indoor allergen in urban settings. Bees, fire ants, and wasps are responsible for sting allergy. In the United States, there are multiple insect allergen products commercially available that are regulated by the US Food and Drug Administration. Of those extracts, honeybee venom and insect venom proteins are standardized with measurements of potency. The remaining insect allergen extracts are nonstandardized products that do not have potency measurements.

Conclusion

Sensitization to inhalational and stinging insect allergens is reported worldwide. Crude insect allergen extracts are used for diagnosis and specific immunotherapy. A variety of source materials are used by different manufacturers to prepare these extracts, which may result in qualitative differences that are not reflected in measurements of potency or protein concentration.",2017-05-01 +26361498,miRTarVis: an interactive visual analysis tool for microRNA-mRNA expression profile data.,"

Background

MicroRNAs (miRNA) are short nucleotides that down-regulate its target genes. Various miRNA target prediction algorithms have used sequence complementarity between miRNA and its targets. Recently, other algorithms tried to improve sequence-based miRNA target prediction by exploiting miRNA-mRNA expression profile data. Some web-based tools are also introduced to help researchers predict targets of miRNAs from miRNA-mRNA expression profile data. A demand for a miRNA-mRNA visual analysis tool that features novel miRNA prediction algorithms and more interactive visualization techniques exists.

Results

We designed and implemented miRTarVis, which is an interactive visual analysis tool that predicts targets of miRNAs from miRNA-mRNA expression profile data and visualizes the resulting miRNA-target interaction network. miRTarVis has intuitive interface design in accordance with the analysis procedure of load, filter, predict, and visualize. It predicts targets of miRNA by adopting Bayesian inference and MINE analyses, as well as conventional correlation and mutual information analyses. It visualizes a resulting miRNA-mRNA network in an interactive Treemap, as well as a conventional node-link diagram. miRTarVis is available at http://hcil.snu.ac.kr/~rati/miRTarVis/index.html.

Conclusions

We reported findings from miRNA-mRNA expression profile data of asthma patients using miRTarVis in a case study. miRTarVis helps to predict and understand targets of miRNA from miRNA-mRNA expression profile data.",2015-08-13 +26623179,miRMOD: a tool for identification and analysis of 5' and 3' miRNA modifications in Next Generation Sequencing small RNA data.,"In the past decade, the microRNAs (miRNAs) have emerged to be important regulators of gene expression across various species. Several studies have confirmed different types of post-transcriptional modifications at terminal ends of miRNAs. The reports indicate that miRNA modifications are conserved and functionally significant as it may affect miRNA stability and ability to bind mRNA targets, hence affecting target gene repression. Next Generation Sequencing (NGS) of the small RNA (sRNA) provides an efficient and reliable method to explore miRNA modifications. The need for dedicated software, especially for users with little knowledge of computers, to determine and analyze miRNA modifications in sRNA NGS data, motivated us to develop miRMOD. miRMOD is a user-friendly, Microsoft Windows and Graphical User Interface (GUI) based tool for identification and analysis of 5' and 3' miRNA modifications (non-templated nucleotide additions and trimming) in sRNA NGS data. In addition to identification of miRNA modifications, the tool also predicts and compares the targets of query and modified miRNAs. In order to compare binding affinities for the same target, miRMOD utilizes minimum free energies of the miRNA:target and modified-miRNA:target interactions. Comparisons of the binding energies may guide experimental exploration of miRNA post-transcriptional modifications. The tool is available as a stand-alone package to overcome large data transfer problems commonly faced in web-based high-throughput (HT) sequencing data analysis tools. miRMOD package is freely available at http://bioinfo.icgeb.res.in/miRMOD.",2015-10-20 +27497657,Identification of a dominant Chlamydia trachomatis strain in patients attending sexual transmitted infection clinic and female sex workers in Tunisia using a high resolution typing method.,"

Background

The distribution of Chlamydia trachomatis genotypes in Tunisia was previously studied using the reverse hybridization method. In this study, we used multilocus sequence typing (MLST) to describe Chlamydia trachomatis genetic diversity among heterosexual populations in Tunisia. The obtained sequence types (STs) were compared with those from a heterosexual population from Amsterdam, the Netherlands.

Methods

Clinical Tunisian patients and female sex workers provided 107 Chlamydia trachomatis positive samples that were used for MLST. Samples from 256 heterosexuals visiting the Amsterdam STI clinic were included as a reference group. Six highly variable genetic regions including the ompA gene were amplified and sequenced. The ST numbers were derived from a Chlamydia typing database (http://mlstdb.uu.se) and used to draw minimum spanning trees.

Results

ompA sequencing detected 7 genotypes among the Tunisian populations of which genotype E was the most prevalent (66.3%). This genotype E resolved into 23 different STs and among these the ST3 was predominant (53.5%). MLST displayed 43 STs, of which 28 (65%) were new in the database. Minimum spanning tree analysis of all Tunisian samples identified 4 clusters of which one formed a clonal cluster with samples presenting the most prevalent ST3. When comparing samples from the Tunisian and Dutch populations in one minimum spanning tree, there was little overlap between the Chlamydia trachomatis samples.

Conclusion

The CT-hrMLST scheme allowed us to identify that the Tunisian distribution was dominated by one genotype E (ST3) strain which is also highly prevalent in many other countries worldwide.",2016-08-04 +27596405,Proteomic profiling of camel and cow milk proteins under heat treatment.,"Cow and camel milk proteins before and after heat treatment at 80°C for 60min were identified using LC/MS and LC-MS/MS following monodimensional electrophoresis. The database used for the identification of camel and cow proteins was set from http://www.uniprot.org/. The obtained results showed that, after heating, camel milk at 80°C for 60min, camel α-lactalbumin (α-la) and peptidoglycan recognition protein (PGRP) were not detected while camel serum albumin (CSA) was significantly diminished. When heating cow milk at 80°C for 60min, α-lactalbumin (α-la) and β-lactoglobulin (β-lg) were not significantly detected. Moreover, 19 protein bands from SDS-PAGE were analyzed and a total of 45 different proteins were identified by LC-MS/MS. Casein fractions were kept intact under a heat treatment of 80°C during 60min of both camel and cow milks. Camel and bovine whey proteins were affected by a heat treatment of 80°C for 60min.",2016-08-04 +24572313,Brillouin-zone database on the Bilbao Crystallographic Server.,"The Brillouin-zone database of the Bilbao Crystallographic Server (http://www.cryst.ehu.es) offers k-vector tables and figures which form the background of a classification of the irreducible representations of all 230 space groups. The symmetry properties of the wavevectors are described by the so-called reciprocal-space groups and this classification scheme is compared with the classification of Cracknell et al. [Kronecker Product Tables, Vol. 1, General Introduction and Tables of Irreducible Representations of Space Groups (1979). New York: IFI/Plenum]. The compilation provides a solution to the problems of uniqueness and completeness of space-group representations by specifying the independent parameter ranges of general and special k vectors. Guides to the k-vector tables and figures explain the content and arrangement of the data. Recent improvements and modifications of the Brillouin-zone database, including new tables and figures for the trigonal, hexagonal and monoclinic space groups, are discussed in detail and illustrated by several examples.",2014-02-12 +28666758,The changing epidemiology of invasive Haemophilus influenzae disease: Emergence and global presence of serotype a strains that may require a new vaccine for control.,"

Background

More than two decades after the implementation of the Hib conjugate vaccine in North America, Haemophilus influenzae serotype a (Hia) has emerged as a significant cause of invasive disease in Indigenous communities. However, little is known about the global presence of this pathogen.

Methods

We interrogated the H. influenzae Multi-Locus Sequence Typing (MLST) website (https://pubmlst.org/hinfluenzae/) by selecting for serotype a records. We also updated our previous literature review on this subject matter.

Results

Hia has been reported from at least 35 countries on six major continents. However, most Hia diseases were associated with Indigenous communities. Clonal analysis identified two clonal populations with one typified as ST-23 responsible for most invasive disease in North America and being the predominant clone described on the H. influenzae MLST website. Incidence of invasive Hia disease in Indigenous communities in North America are similar to the rates of Hib disease reported prior to the Hib conjugate vaccine era. Hia causes severe clinical diseases, such as meningitis, septicaemia, pneumonia, and septic arthritis with case-fatality rates between 5.6% and 33% depending on the age of the patient and the genetic makeup of the Hia strain.

Conclusion

Although invasive Hia disease can be found globally, the current epidemiological data suggest that this infection predominantly affects Indigenous communities in North America. The clinical disease of Hia and the clonal nature of the bacteria resemble that of Hib. The high incidence of invasive Hia disease in Indigenous communities, along with potential fatality and severe sequelae causing long-term disability in survivors, may support the development of a new Hia conjugate vaccine for protection against this infection similar in design to the one introduced in the 1990s to control invasive Hib disease.",2017-06-27 +26134183,Integrating Microarray Data and GRNs.,"With the completion of the Human Genome Project and the emergence of high-throughput technologies, a vast amount of molecular and biological data are being produced. Two of the most important and significant data sources come from microarray gene-expression experiments and respective databanks (e,g., Gene Expression Omnibus-GEO (http://www.ncbi.nlm.nih.gov/geo)), and from molecular pathways and Gene Regulatory Networks (GRNs) stored and curated in public (e.g., Kyoto Encyclopedia of Genes and Genomes-KEGG (http://www.genome.jp/kegg/pathway.html), Reactome (http://www.reactome.org/ReactomeGWT/entrypoint.html)) as well as in commercial repositories (e.g., Ingenuity IPA (http://www.ingenuity.com/products/ipa)). The association of these two sources aims to give new insight in disease understanding and reveal new molecular targets in the treatment of specific phenotypes.Three major research lines and respective efforts that try to utilize and combine data from both of these sources could be identified, namely: (1) de novo reconstruction of GRNs, (2) identification of Gene-signatures, and (3) identification of differentially expressed GRN functional paths (i.e., sub-GRN paths that distinguish between different phenotypes). In this chapter, we give an overview of the existing methods that support the different types of gene-expression and GRN integration with a focus on methodologies that aim to identify phenotype-discriminant GRNs or subnetworks, and we also present our methodology.",2016-01-01 +23093600,The Comparative Toxicogenomics Database: update 2013.,"The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) provides information about interactions between environmental chemicals and gene products and their relationships to diseases. Chemical-gene, chemical-disease and gene-disease interactions manually curated from the literature are integrated to generate expanded networks and predict many novel associations between different data types. CTD now contains over 15 million toxicogenomic relationships. To navigate this sea of data, we added several new features, including DiseaseComps (which finds comparable diseases that share toxicogenomic profiles), statistical scoring for inferred gene-disease and pathway-chemical relationships, filtering options for several tools to refine user analysis and our new Gene Set Enricher (which provides biological annotations that are enriched for gene sets). To improve data visualization, we added a Cytoscape Web view to our ChemComps feature, included color-coded interactions and created a 'slim list' for our MEDIC disease vocabulary (allowing diseases to be grouped for meta-analysis, visualization and better data management). CTD continues to promote interoperability with external databases by providing content and cross-links to their sites. Together, this wealth of expanded chemical-gene-disease data, combined with novel ways to analyze and view content, continues to help users generate testable hypotheses about the molecular mechanisms of environmental diseases.",2012-10-23 +26159465,WebDISCO: a web service for distributed cox model learning without patient-level data sharing.,"

Objective

The Cox proportional hazards model is a widely used method for analyzing survival data. To achieve sufficient statistical power in a survival analysis, it usually requires a large amount of data. Data sharing across institutions could be a potential workaround for providing this added power.

Methods and materials

The authors develop a web service for distributed Cox model learning (WebDISCO), which focuses on the proof-of-concept and algorithm development for federated survival analysis. The sensitive patient-level data can be processed locally and only the less-sensitive intermediate statistics are exchanged to build a global Cox model. Mathematical derivation shows that the proposed distributed algorithm is identical to the centralized Cox model.

Results

The authors evaluated the proposed framework at the University of California, San Diego (UCSD), Emory, and Duke. The experimental results show that both distributed and centralized models result in near-identical model coefficients with differences in the range [Formula: see text] to [Formula: see text]. The results confirm the mathematical derivation and show that the implementation of the distributed model can achieve the same results as the centralized implementation.

Limitation

The proposed method serves as a proof of concept, in which a publicly available dataset was used to evaluate the performance. The authors do not intend to suggest that this method can resolve policy and engineering issues related to the federated use of institutional data, but they should serve as evidence of the technical feasibility of the proposed approach.Conclusions WebDISCO (Web-based Distributed Cox Regression Model; https://webdisco.ucsd-dbmi.org:8443/cox/) provides a proof-of-concept web service that implements a distributed algorithm to conduct distributed survival analysis without sharing patient level data.",2015-07-09 +28319463,Solid Cancer Incidence among the Life Span Study of Atomic Bomb Survivors: 1958-2009.,"This is the third analysis of solid cancer incidence among the Life Span Study (LSS) cohort of atomic bomb survivors in Hiroshima and Nagasaki, adding eleven years of follow-up data since the previously reported analysis. For this analysis, several changes and improvements were implemented, including updated dose estimates (DS02R1) and adjustment for smoking. Here, we focus on all solid cancers in aggregate. The eligible cohort included 105,444 subjects who were alive and had no known history of cancer at the start of follow-up. A total of 80,205 subjects had individual dose estimates and 25,239 were not in either city at the time of the bombings. The follow-up period was 1958-2009, providing 3,079,484 person-years of follow-up. Cases were identified by linkage with population-based Hiroshima and Nagasaki Cancer Registries. Poisson regression methods were used to elucidate the nature of the radiation-associated risks per Gy of weighted absorbed colon dose using both excess relative risk (ERR) and excess absolute risk (EAR) models adjusted for smoking. Risk estimates were reported for a person exposed at age 30 years with attained age of 70 years. In this study, 22,538 incident first primary solid cancer cases were identified, of which 992 were associated with radiation exposure. There were 5,918 cases (26%) that occurred in the 11 years (1999-2009) since the previously reported study. For females, the dose response was consistent with linearity with an estimated ERR of 0.64 per Gy (95% CI: 0.52 to 0.77). For males, significant upward curvature over the full dose range as well as restricted dose ranges was observed and therefore, a linear-quadratic model was used, which resulted in an ERR of 0.20 (95% CI: 0.12 to 0.28) at 1 Gy and an ERR of 0.010 (95% CI: -0.0003 to 0.021) at 0.1 Gy. The shape of the ERR dose response was significantly different among males and females (P = 0.02). While there was a significant decrease in the ERR with increasing attained age, this decrease was more rapid in males compared to females. The lowest dose range that showed a statistically significant dose response using the sex-averaged, linear ERR model was 0-100 mGy (P = 0.038). In conclusion, this analysis demonstrates that solid cancer risks remain elevated more than 60 years after exposure. Sex-averaged upward curvature was observed in the dose response independent of adjustment for smoking. Findings from the current analysis regarding the dose-response shape were not fully consistent with those previously reported, raising unresolved questions. At this time, uncertainties in the shape of the dose response preclude definitive conclusions to confidently guide radiation protection policies. Upcoming results from a series of analyses focusing on the radiation risks for specific organs or organ families, as well as continued follow-up are needed to fully understand the nature of radiation-related cancer risk and its public health significance. Data and analysis scripts are available for download at: http://www.rerf.or.jp .",2017-03-20 +26072513,Reconstructing gene regulatory dynamics from high-dimensional single-cell snapshot data.,"

Motivation

High-dimensional single-cell snapshot data are becoming widespread in the systems biology community, as a mean to understand biological processes at the cellular level. However, as temporal information is lost with such data, mathematical models have been limited to capture only static features of the underlying cellular mechanisms.

Results

Here, we present a modular framework which allows to recover the temporal behaviour from single-cell snapshot data and reverse engineer the dynamics of gene expression. The framework combines a dimensionality reduction method with a cell time-ordering algorithm to generate pseudo time-series observations. These are in turn used to learn transcriptional ODE models and do model selection on structural network features. We apply it on synthetic data and then on real hematopoietic stem cells data, to reconstruct gene expression dynamics during differentiation pathways and infer the structure of a key gene regulatory network.

Availability and implementation

C++ and Matlab code available at https://www.helmholtz-muenchen.de/fileadmin/ICB/software/inferenceSnapshot.zip.",2015-06-01 +28152521,LncSubpathway: a novel approach for identifying dysfunctional subpathways associated with risk lncRNAs by integrating lncRNA and mRNA expression profiles and pathway topologies.,"Long non-coding RNAs (lncRNAs) play important roles in various biological processes, including the development of many diseases. Pathway analysis is a valuable aid for understanding the cellular functions of these transcripts. We have developed and characterized LncSubpathway, a novel method that integrates lncRNA and protein coding gene (PCG) expression with interactome data to identify disease risk subpathways that functionally associated with risk lncRNAs. LncSubpathway identifies the most relevance regions which are related with risk lncRNA set and implicated with study conditions through simultaneously considering the dysregulation extent of lncRNAs, PCGs and their correlations. Simulation studies demonstrated that the sensitivity and false positive rates of LncSubpathway were within acceptable ranges, and that LncSubpathway could accurately identify dysregulated regions that related with disease risk lncRNAs within pathways. When LncSubpathway was applied to colorectal carcinoma and breast cancer subtype datasets, it identified cancer type- and breast cancer subtype-related meaningful subpathways. Further, analysis of its robustness and reproducibility indicated that LncSubpathway was a reliable means of identifying subpathways that functionally associated with lncRNAs. LncSubpathway is freely available at http://www.bio-bigdata.com/lncSubpathway/.",2017-02-01 +27587690,Patterns of amino acid conservation in human and animal immunodeficiency viruses.,"

Motivation

Due to their high genomic variability, RNA viruses and retroviruses present a unique opportunity for detailed study of molecular evolution. Lentiviruses, with HIV being a notable example, are one of the best studied viral groups: hundreds of thousands of sequences are available together with experimentally resolved three-dimensional structures for most viral proteins. In this work, we use these data to study specific patterns of evolution of the viral proteins, and their relationship to protein interactions and immunogenicity.

Results

We propose a method for identification of two types of surface residues clusters with abnormal conservation: extremely conserved and extremely variable clusters. We identify them on the surface of proteins from HIV and other animal immunodeficiency viruses. Both types of clusters are overrepresented on the interaction interfaces of viral proteins with other proteins, nucleic acids or low molecular-weight ligands, both in the viral particle and between the virus and its host. In the immunodeficiency viruses, the interaction interfaces are not more conserved than the corresponding proteins on an average, and we show that extremely conserved clusters coincide with protein-protein interaction hotspots, predicted as the residues with the largest energetic contribution to the interaction. Extremely variable clusters have been identified here for the first time. In the HIV-1 envelope protein gp120, they overlap with known antigenic sites. These antigenic sites also contain many residues from extremely conserved clusters, hence representing a unique interacting interface enriched both in extremely conserved and in extremely variable clusters of residues. This observation may have important implication for antiretroviral vaccine development.

Availability and implementation

A Python package is available at https://bioinf.mpi-inf.mpg.de/publications/viral-ppi-pred/

Contact

voitenko@mpi-inf.mpg.de or kalinina@mpi-inf.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +24670875,Herceptin resistance database for understanding mechanism of resistance in breast cancer patients.,"Monoclonal antibody Trastuzumab/Herceptin is considered as frontline therapy for Her2-positive breast cancer patients. However, it is not effective against several patients due to acquired or de novo resistance. In last one decade, several assays have been performed to understand the mechanism of Herceptin resistance with/without supplementary drugs. This manuscript describes a database HerceptinR, developed for understanding the mechanism of resistance at genetic level. HerceptinR maintains information about 2500 assays performed against various breast cancer cell lines (BCCs), for improving sensitivity of Herceptin with or without supplementary drugs. In order to understand Herceptin resistance at genetic level, we integrated genomic data of BCCs that include expression, mutations and copy number variations in different cell lines. HerceptinR will play a vital role in i) designing biomarkers to identify patients eligible for Herceptin treatment and ii) identification of appropriate supplementary drug for a particular patient. HerceptinR is available at http://crdd.osdd.net/raghava/herceptinr/.",2014-03-27 +25352730,SoyProLow: A protein database enriched in low abundant soybean proteins.,"

Unlabelled

Soybeans are an important legume crop that contain 2 major storage proteins, β-conglycinin and glycinin, which account about 70- 80% of total seed proteins. These abundant proteins hinder the isolation and characterization of several low abundant proteins in soybean seeds. Several protein extraction methodologies were developed in our laboratory to decrease these abundant storage proteins in seed extracts and to also decrease the amount of ribulose-1, 5-bisphosphate carboxylase/oxygenase (RuBisCO), which is normally very abundant in leaf extracts. One of the extraction methodologies used 40% isopropanol and was more effective in depleting soybean storage proteins and enhancing low abundant seed proteins than similar methods using 10-80% isopropanol. Extractions performed with 40% isopropanol decreased the amount of storage proteins and revealed 107 low abundant proteins when using the combined approaches of two-dimensional polyacrylamide gel electrophoresis (2D-PAGE) and Mass Spectrometry (MS). The separation of proteins was achieved by iso-electric focusing (IEF) and 2D-PAGE. The proteins were analyzed with MS techniques to provide amino acid sequence. The proteins were identified by comparing their amino acid sequences with those in different databases including NCBI-non redundant, UniprotKB and MSDB databases. In this investigation, previously published results on low abundant soybean seed proteins were used to create an online database (SoyProLow) to provide a data repository that can be used as a reference to identify and characterize low abundance proteins. This database is freely accessible to individuals using similar techniques and can be for the subsequent genetic manipulation to produce value added soybean traits. An intuitive user interface based on dynamic HTML enables users to browse the network and the profiles of the low abundant proteins.

Availability

http://bioinformatics.towson.edu/Soybean_low_abundance_proteins_2D_Gel_DB/Gel1.aspx.",2014-09-30 +23348786,RAMP: a bioinformatics framework for researching imaging agents through molecular pathways.,"Signaling pathways are the fundamental grammar of cellular communication, yet few frameworks are available to analyze molecular imaging probes in the context of signaling pathways. Such a framework would aid in the design and selection of imaging probes for measuring specific signaling pathways and, vice versa, help illuminate which pathways are being assayed by a given probe. RAMP (Researching imaging Agents through Molecular Pathways) is a bioinformatics framework for connecting signaling pathways and imaging probes using a controlled vocabulary of the imaging targets. RAMP contains signaling pathway data from MetaCore, the Kyoto Encyclopedia of Genes and Genomes, and the Gene Ontology project; imaging probe data from the Molecular Imaging and Contrast Agent Database (MICAD); and tissue protein expression data from The Human Protein Atlas. The RAMP search tool is available at . Examples are presented to demonstrate the utility of RAMP for pathway-based searches of molecular imaging probes.",2013-01-01 +27510874,[Application of the computer-based respiratory sound analysis system based on Mel-frequency cepstral coefficient and dynamic time warping in healthy children].,"

Objective

We designed a computer-based respiratory sound analysis system to identify pediatric normal lung sound. To verify the validity of the computer-based respiratory sound analysis system.

Method

First we downloaded the standard lung sounds from the network database (website: http: //www.easyauscultation.com/lung-sounds-reference-guide) and recorded 3 samples of abnormal loud sound (rhonchi, wheeze and crackles) from three patients of The Department of Pediatrics, the First Affiliated Hospital of Xiamen University. We regarded such lung sounds as""reference lung sounds"". The""test lung sounds""were recorded from 29 children form Kindergarten of Xiamen University. we recorded lung sound by portable electronic stethoscope and valid lung sounds were selected by manual identification. We introduced Mel-frequency cepstral coefficient (MFCC) to extract lung sound features and dynamic time warping (DTW) for signal classification.

Result

We had 39 standard lung sounds, recorded 58 test lung sounds. This computer-based respiratory sound analysis system was carried out in 58 lung sound recognition, correct identification of 52 times, error identification 6 times. Accuracy was 89.7%.

Conclusion

Based on MFCC and DTW, our computer-based respiratory sound analysis system can effectively identify healthy lung sounds of children (accuracy can reach 89.7%), fully embodies the reliability of the lung sounds analysis system.",2016-08-01 +27479659,TepiTool: A Pipeline for Computational Prediction of T Cell Epitope Candidates.,"Computational prediction of T cell epitope candidates is currently being used in several applications including vaccine discovery studies, development of diagnostics, and removal of unwanted immune responses against protein therapeutics. There have been continuous improvements in the performance of MHC binding prediction tools, but their general adoption by immunologists has been slow due to the lack of user-friendly interfaces and guidelines. Current tools only provide minimal advice on what alleles to include, what lengths to consider, how to deal with homologous peptides, and what cutoffs should be considered relevant. This protocol provides step-by-step instructions with necessary recommendations for prediction of the best T cell epitope candidates with the newly developed online tool called TepiTool. TepiTool, which is part of the Immune Epitope Database (IEDB), provides some of the top MHC binding prediction algorithms for number of species including humans, chimpanzees, bovines, gorillas, macaques, mice, and pigs. The TepiTool is freely accessible at http://tools.iedb.org/tepitool/. © 2016 by John Wiley & Sons, Inc.",2016-08-01 +27563447,A genetic database can be utilized to identify potential biomarkers for biphenotypic hepatocellular carcinoma-cholangiocarcinoma.,"

Background

Biphenotypic hepatocellular carcinoma-cholangiocarcinoma (HCC-CC) is an uncommon primary liver neoplasm. Due to limitations in radiologic imaging for the diagnosis of this condition, biopsy is a common method for diagnosis, which is invasive and holds potential complications. To identify alternative means for obtaining the diagnosis and assessing the prognosis of this condition, we evaluated biomarkers for biphenotypic HCC-CC using a genetic database.

Methods

To evaluate the genetic associations with each variable we utilized GeneCards(®), The Human Gene Compendium (http://www.genecards.org). The results of our search were entered into the Pathway Interaction Database from the National Cancer Institute (PID-NCI) (http://pid.nci.nih.gov), to generate a biomolecule interaction map.

Results

The results of our query yielded 690 genes for HCC, 98 genes for CC and 50 genes for HCC-CC. Genes depicted in this analysis demonstrate the role of hormonal regulation, embryonic development, cell surface adhesion, cytokeratin stability, mucin production, metalloproteinase regulation, Ras signaling, metabolism and apoptosis. Examples of previously described markers included hepatocyte growth factor (HGF), mesenchymal epithelial transition (MET) and Kirsten rat sarcoma viral oncogene homolog (KRAS). Novel markers included phosphatidylinositol-4,5-bisphosphate 3-kinase, catalytic subunit alpha (PIK3CA), GPC3, choline kinase alpha (CHKA), prostaglandin-endoperoxide synthase 2 (PTGS2), telomerase reverse transcriptase (TERT), myeloid cell leukemia 1 (MCL1) and N-acetyltransferase 2 (NAT2).

Conclusions

GeneCards is a useful research tool in the genetic analysis of low frequency malignancies. Utilizing this tool we identified several biomarkers are methods for diagnosing HCC-CC. Finally, utilizing these methods, HCC-CC was found to be predominantly a subtype of CC.",2016-08-01 +26452296,Fine-Scale Exposure to Allergenic Pollen in the Urban Environment: Evaluation of Land Use Regression Approach.,"

Background

Despite the recent developments in physically and chemically based analysis of atmospheric particles, no models exist for resolving the spatial variability of pollen concentration at urban scale.

Objectives

We developed a land use regression (LUR) approach for predicting spatial fine-scale allergenic pollen concentrations in the Helsinki metropolitan area, Finland, and evaluated the performance of the models against available empirical data.

Methods

We used grass pollen data monitored at 16 sites in an urban area during the peak pollen season and geospatial environmental data. The main statistical method was generalized linear model (GLM).

Results

GLM-based LURs explained 79% of the spatial variation in the grass pollen data based on all samples, and 47% of the variation when samples from two sites with very high concentrations were excluded. In model evaluation, prediction errors ranged from 6% to 26% of the observed range of grass pollen concentrations. Our findings support the use of geospatial data-based statistical models to predict the spatial variation of allergenic grass pollen concentrations at intra-urban scales. A remote sensing-based vegetation index was the strongest predictor of pollen concentrations for exposure assessments at local scales.

Conclusions

The LUR approach provides new opportunities to estimate the relations between environmental determinants and allergenic pollen concentration in human-modified environments at fine spatial scales. This approach could potentially be applied to estimate retrospectively pollen concentrations to be used for long-term exposure assessments.

Citation

Hjort J, Hugg TT, Antikainen H, Rusanen J, Sofiev M, Kukkonen J, Jaakkola MS, Jaakkola JJ. 2016. Fine-scale exposure to allergenic pollen in the urban environment: evaluation of land use regression approach. Environ Health Perspect 124:619-626; http://dx.doi.org/10.1289/ehp.1509761.",2015-10-09 +28071710,MicroPattern: a web-based tool for microbe set enrichment analysis and disease similarity calculation based on a list of microbes.,"The microbiota colonized on human body is renowned as ""a forgotten organ"" due to its big impacts on human health and disease. Recently, microbiome studies have identified a large number of microbes differentially regulated in a variety of conditions, such as disease and diet. However, methods for discovering biological patterns in the differentially regulated microbes are still limited. For this purpose, here, we developed a web-based tool named MicroPattern to discover biological patterns for a list of microbes. In addition, MicroPattern implemented and integrated an algorithm we previously presented for the calculation of disease similarity based on disease-microbe association data. MicroPattern first grouped microbes into different sets based on the associated diseases and the colonized positions. Then, for a given list of microbes, MicroPattern performed enrichment analysis of the given microbes on all of the microbe sets. Moreover, using MicroPattern, we can also calculate disease similarity based on the shared microbe associations. Finally, we confirmed the accuracy and usefulness of MicroPattern by applying it to the changed microbes under the animal-based diet condition. MicroPattern is freely available at http://www.cuilab.cn/micropattern.",2017-01-10 +26576654,Prioritizing hypothesis tests for high throughput data.,"

Motivation

The advent of high throughput data has led to a massive increase in the number of hypothesis tests conducted in many types of biological studies and a concomitant increase in stringency of significance thresholds. Filtering methods, which use independent information to eliminate less promising tests and thus reduce multiple testing, have been widely and successfully applied. However, key questions remain about how to best apply them: When is filtering beneficial and when is it detrimental? How good does the independent information need to be in order for filtering to be effective? How should one choose the filter cutoff that separates tests that pass the filter from those that don't?

Result

We quantify the effect of the quality of the filter information, the filter cutoff and other factors on the effectiveness of the filter and show a number of results: If the filter has a high probability (e.g. 70%) of ranking true positive features highly (e.g. top 10%), then filtering can lead to dramatic increase (e.g. 10-fold) in discovery probability when there is high redundancy in information between hypothesis tests. Filtering is less effective when there is low redundancy between hypothesis tests and its benefit decreases rapidly as the quality of the filter information decreases. Furthermore, the outcome is highly dependent on the choice of filter cutoff. Choosing the cutoff without reference to the data will often lead to a large loss in discovery probability. However, naïve optimization of the cutoff using the data will lead to inflated type I error. We introduce a data-based method for choosing the cutoff that maintains control of the family-wise error rate via a correction factor to the significance threshold. Application of this approach offers as much as a several-fold advantage in discovery probability relative to no filtering, while maintaining type I error control. We also introduce a closely related method of P-value weighting that further improves performance.

Availability and implementation

R code for calculating the correction factor is available at http://www.stat.uga.edu/people/faculty/paul-schliekelman

Contact

pdschlie@stat.uga.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-16 +27587662,Drug response prediction by inferring pathway-response associations with kernelized Bayesian matrix factorization.,"

Motivation

A key goal of computational personalized medicine is to systematically utilize genomic and other molecular features of samples to predict drug responses for a previously unseen sample. Such predictions are valuable for developing hypotheses for selecting therapies tailored for individual patients. This is especially valuable in oncology, where molecular and genetic heterogeneity of the cells has a major impact on the response. However, the prediction task is extremely challenging, raising the need for methods that can effectively model and predict drug responses.

Results

In this study, we propose a novel formulation of multi-task matrix factorization that allows selective data integration for predicting drug responses. To solve the modeling task, we extend the state-of-the-art kernelized Bayesian matrix factorization (KBMF) method with component-wise multiple kernel learning. In addition, our approach exploits the known pathway information in a novel and biologically meaningful fashion to learn the drug response associations. Our method quantitatively outperforms the state of the art on predicting drug responses in two publicly available cancer datasets as well as on a synthetic dataset. In addition, we validated our model predictions with lab experiments using an in-house cancer cell line panel. We finally show the practical applicability of the proposed method by utilizing prior knowledge to infer pathway-drug response associations, opening up the opportunity for elucidating drug action mechanisms. We demonstrate that pathway-response associations can be learned by the proposed model for the well-known EGFR and MEK inhibitors.

Availability and implementation

The source code implementing the method is available at http://research.cs.aalto.fi/pml/software/cwkbmf/

Contacts

muhammad.ammad-ud-din@aalto.fi or samuel.kaski@aalto.fi

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +25161239,Identification of structural features in chemicals associated with cancer drug response: a systematic data-driven analysis.,"

Motivation

Analysis of relationships of drug structure to biological response is key to understanding off-target and unexpected drug effects, and for developing hypotheses on how to tailor drug therapies. New methods are required for integrated analyses of a large number of chemical features of drugs against the corresponding genome-wide responses of multiple cell models.

Results

In this article, we present the first comprehensive multi-set analysis on how the chemical structure of drugs impacts on genome-wide gene expression across several cancer cell lines [Connectivity Map (CMap) database]. The task is formulated as searching for drug response components across multiple cancers to reveal shared effects of drugs and the chemical features that may be responsible. The components can be computed with an extension of a recent approach called Group Factor Analysis. We identify 11 components that link the structural descriptors of drugs with specific gene expression responses observed in the three cell lines and identify structural groups that may be responsible for the responses. Our method quantitatively outperforms the limited earlier methods on CMap and identifies both the previously reported associations and several interesting novel findings, by taking into account multiple cell lines and advanced 3D structural descriptors. The novel observations include: previously unknown similarities in the effects induced by 15-delta prostaglandin J2 and HSP90 inhibitors, which are linked to the 3D descriptors of the drugs; and the induction by simvastatin of leukemia-specific response, resembling the effects of corticosteroids.

Availability and implementation

Source Code implementing the method is available at: http://research.ics.aalto.fi/mi/software/GFAsparse.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +27480116,"Gene, Environment and Methylation (GEM): a tool suite to efficiently navigate large scale epigenome wide association studies and integrate genotype and interaction between genotype and environment.","

Background

The interplay among genetic, environment and epigenetic variation is not fully understood. Advances in high-throughput genotyping methods, high-density DNA methylation detection and well-characterized sample collections, enable epigenetic association studies at the genomic and population levels (EWAS). The field has extended to interrogate the interaction of environmental and genetic (GxE) influences on epigenetic variation. Also, the detection of methylation quantitative trait loci (methQTLs) and their association with health status has enhanced our knowledge of epigenetic mechanisms in disease trajectory. However analysis of this type of data brings computational challenges and there are few practical solutions to enable large scale studies in standard computational environments.

Results

GEM is a highly efficient R tool suite for performing epigenome wide association studies (EWAS). GEM provides three major functions named GEM_Emodel, GEM_Gmodel and GEM_GxEmodel to study the interplay of Gene, Environment and Methylation (GEM). Within GEM, the pre-existing ""Matrix eQTL"" package is utilized and extended to study methylation quantitative trait loci (methQTL) and the interaction of genotype and environment (GxE) to determine DNA methylation variation, using matrix based iterative correlation and memory-efficient data analysis. Benchmarking presented here on a publicly available dataset, demonstrated that GEM can facilitate reliable genome-wide methQTL and GxE analysis on a standard laptop computer within minutes.

Conclusions

The GEM package facilitates efficient EWAS study in large cohorts. It is written in R code and can be freely downloaded from Bioconductor at https://www.bioconductor.org/packages/GEM/ .",2016-08-02 +23125366,Xenbase: expansion and updates of the Xenopus model organism database.,"Xenbase (http://www.xenbase.org) is a model organism database that provides genomic, molecular, cellular and developmental biology content to biomedical researchers working with the frog, Xenopus and Xenopus data to workers using other model organisms. As an amphibian Xenopus serves as a useful evolutionary bridge between invertebrates and more complex vertebrates such as birds and mammals. Xenbase content is collated from a variety of external sources using automated and semi-automated pipelines then processed via a combination of automated and manual annotation. A link-matching system allows for the wide variety of synonyms used to describe biological data on unique features, such as a gene or an anatomical entity, to be used by the database in an equivalent manner. Recent updates to the database include the Xenopus laevis genome, a new Xenopus tropicalis genome build, epigenomic data, collections of RNA and protein sequences associated with genes, more powerful gene expression searches, a community and curated wiki, an extensive set of manually annotated gene expression patterns and a new database module that contains data on over 700 antibodies that are useful for exploring Xenopus cell and developmental biology.",2012-11-03 +,The first Nitrogen Index app for mobile devices: Using portable technology for smart agricultural management,"Nitrogen fertilizer from organic and inorganic sources is used across the world’s agroecosystems. It contributes to higher yields and higher economic returns to farmers, and is essential for food security. However, when more is applied than necessary, significant amounts of nitrogen can exit the system and impact the environment. Appropriate nitrogen management, where the right amounts of nitrogen are applied and best practices are used to ensure higher use efficiency, is important for conservation. A recent study from the USDA reported that about 66% of US cropland was not meeting all of three best management practices for nitrogen: best rate, best time of application, and best method of application (http://www.ers.usda.gov/publications/err-economic-research-report/err127.aspx). Nitrogen tools can help us assess the risk of nitrogen losses from a field to the environment and increase nitrogen use efficiencies (http://www.ars.usda.gov/is/AR/archive/sep11/nitrogen0911.htm). We need quick and mobile nitrogen management risk assessment tools that can be carried to the field and elsewhere. The first smartphone/tablet application (or “app”) of the Nitrogen Index was released on February 26th, 2012. A PC® version1Trade and manufacturer’s names are necessary to report factually on available data; however, the USDA neither guarantees nor warrants the standard of the product, and the use of the name by USDA implies no approval of the product to the exclusion of others that may also be suitable.1 of the Nitrogen Index, which can be used on PC desktop and laptop computers, is also available, so users can email the results from their Nitrogen Index app developed for smartphones/tablets in the field to their computer back at the office (or a farmer’s or consultant’s computer) using their portable device’s internet service. The Nitrogen Index smartphone/tablet application can be downloaded at no cost from the Google PlayTM website (https://play.google.com/store) to any phone that has the AndroidTM system. To find the application, the user simply needs to do a search at the Google Play website using “Nitrogen Index” as the search term. This new advancement in portable technology will enable the use of small devices such as smartphones to conduct an assessment on site and visit with a farmer at any given field site where a wireless connection is available. The Nitrogen Index is a quick tool, and its assessments of nitrogen loss risk are correlated with observed values. This is the first Nitrogen Index app in the world and it is the beginning of more advances to come in the field of portable agricultural technology. New advances in software (e.g., apps) and technology are bringing us to a new frontier of technology transfer, and portable technologies (e.g., smartphones, tablets) are making possible the development of ‘smart agriculture’.",2013-02-01 +28340552,StrAuto: automation and parallelization of STRUCTURE analysis.,"

Background

Population structure inference using the software STRUCTURE has become an integral part of population genetic studies covering a broad spectrum of taxa including humans. The ever-expanding size of genetic data sets poses computational challenges for this analysis. Although at least one tool currently implements parallel computing to reduce computational overload of this analysis, it does not fully automate the use of replicate STRUCTURE analysis runs required for downstream inference of optimal K. There is pressing need for a tool that can deploy population structure analysis on high performance computing clusters.

Results

We present an updated version of the popular Python program StrAuto, to streamline population structure analysis using parallel computing. StrAuto implements a pipeline that combines STRUCTURE analysis with the Evanno Δ K analysis and visualization of results using STRUCTURE HARVESTER. Using benchmarking tests, we demonstrate that StrAuto significantly reduces the computational time needed to perform iterative STRUCTURE analysis by distributing runs over two or more processors.

Conclusion

StrAuto is the first tool to integrate STRUCTURE analysis with post-processing using a pipeline approach in addition to implementing parallel computation - a set up ideal for deployment on computing clusters. StrAuto is distributed under the GNU GPL (General Public License) and available to download from http://strauto.popgen.org .",2017-03-24 +25958393,NaviCell Web Service for network-based data visualization.,"Data visualization is an essential element of biological research, required for obtaining insights and formulating new hypotheses on mechanisms of health and disease. NaviCell Web Service is a tool for network-based visualization of 'omics' data which implements several data visual representation methods and utilities for combining them together. NaviCell Web Service uses Google Maps and semantic zooming to browse large biological network maps, represented in various formats, together with different types of the molecular data mapped on top of them. For achieving this, the tool provides standard heatmaps, barplots and glyphs as well as the novel map staining technique for grasping large-scale trends in numerical values (such as whole transcriptome) projected onto a pathway map. The web service provides a server mode, which allows automating visualization tasks and retrieving data from maps via RESTful (standard HTTP) calls. Bindings to different programming languages are provided (Python and R). We illustrate the purpose of the tool with several case studies using pathway maps created by different research groups, in which data visualization provides new insights into molecular mechanisms involved in systemic diseases such as cancer and neurodegenerative diseases.",2015-05-09 +27587691,PEPSI-Dock: a detailed data-driven protein-protein interaction potential accelerated by polar Fourier correlation.,"

Motivation

Docking prediction algorithms aim to find the native conformation of a complex of proteins from knowledge of their unbound structures. They rely on a combination of sampling and scoring methods, adapted to different scales. Polynomial Expansion of Protein Structures and Interactions for Docking (PEPSI-Dock) improves the accuracy of the first stage of the docking pipeline, which will sharpen up the final predictions. Indeed, PEPSI-Dock benefits from the precision of a very detailed data-driven model of the binding free energy used with a global and exhaustive rigid-body search space. As well as being accurate, our computations are among the fastest by virtue of the sparse representation of the pre-computed potentials and FFT-accelerated sampling techniques. Overall, this is the first demonstration of a FFT-accelerated docking method coupled with an arbitrary-shaped distance-dependent interaction potential.

Results

First, we present a novel learning process to compute data-driven distant-dependent pairwise potentials, adapted from our previous method used for rescoring of putative protein-protein binding poses. The potential coefficients are learned by combining machine-learning techniques with physically interpretable descriptors. Then, we describe the integration of the deduced potentials into a FFT-accelerated spherical sampling provided by the Hex library. Overall, on a training set of 163 heterodimers, PEPSI-Dock achieves a success rate of 91% mid-quality predictions in the top-10 solutions. On a subset of the protein docking benchmark v5, it achieves 44.4% mid-quality predictions in the top-10 solutions when starting from bound structures and 20.5% when starting from unbound structures. The method runs in 5-15 min on a modern laptop and can easily be extended to other types of interactions.

Availability and implementation

https://team.inria.fr/nano-d/software/PEPSI-Dock

Contact

sergei.grudinin@inria.fr.",2016-09-01 +26037908,The mzqLibrary--An open source Java library supporting the HUPO-PSI quantitative proteomics standard.,"The mzQuantML standard has been developed by the Proteomics Standards Initiative for capturing, archiving and exchanging quantitative proteomic data, derived from mass spectrometry. It is a rich XML-based format, capable of representing data about two-dimensional features from LC-MS data, and peptides, proteins or groups of proteins that have been quantified from multiple samples. In this article we report the development of an open source Java-based library of routines for mzQuantML, called the mzqLibrary, and associated software for visualising data called the mzqViewer. The mzqLibrary contains routines for mapping (peptide) identifications on quantified features, inference of protein (group)-level quantification values from peptide-level values, normalisation and basic statistics for differential expression. These routines can be accessed via the command line, via a Java programming interface access or a basic graphical user interface. The mzqLibrary also contains several file format converters, including import converters (to mzQuantML) from OpenMS, Progenesis LC-MS and MaxQuant, and exporters (from mzQuantML) to other standards or useful formats (mzTab, HTML, csv). The mzqViewer contains in-built routines for viewing the tables of data (about features, peptides or proteins), and connects to the R statistical library for more advanced plotting options. The mzqLibrary and mzqViewer packages are available from https://code.google.com/p/mzq-lib/.",2015-07-14 +26374744,mAPKL: R/ Bioconductor package for detecting gene exemplars and revealing their characteristics.,"

Background

So far many algorithms have been proposed towards the detection of significant genes in microarray analysis problems. Several of those approaches are freely available as R-packages though their engagement in gene expression analysis by non-bioinformaticians is usually a frustrating task. Besides, only some of those packages offer a complete suite of tools starting from initial data import and ending to analysis report. Here we present an R/Bioconductor package that implements a hybrid gene selection method along with a bunch of functions to facilitate a thorough and convenient gene expression profiling analysis.

Results

mAPKL is an open-source R/Bioconductor package that implements the mAP-KL hybrid gene selection method. The advantage of this method is that selects a small number of gene exemplars while achieving comparable classification results to other well established algorithms on a variety of datasets and dataset sizes. The mAPKL package is accompanied with extra functionalities including (i) solid data import; (ii) data sampling following a user-defined proportion; (iii) preprocessing through several normalization and transformation alternatives; (iv) classification with the aid of SVM and performance evaluation; (v) network analysis of the significant genes (exemplars), including degree of centrality, closeness, betweeness, clustering coefficient as well as the construction of an edge list table; (vi) gene annotation analysis, (vii) pathway analysis and (viii) auto-generated analysis reporting.

Conclusions

Users are able to run a thorough gene expression analysis in a timely manner starting from raw data and concluding to network characteristics of the selected gene exemplars. Detailed instructions and example data are provided in the R package, which is freely available at Bioconductor under the GPL-2 or later license http://www.bioconductor.org/packages/3.1/bioc/html/mAPKL.html.",2015-09-15 +23626918,AnsNGS: An Annotation System to Sequence Variations of Next Generation Sequencing Data for Disease-Related Phenotypes.,"

Objectives

Next-generation sequencing (NGS) data in the identification of disease-causing genes provides a promising opportunity in the diagnosis of disease. Beyond the previous efforts for NGS data alignment, variant detection, and visualization, developing a comprehensive annotation system supported by multiple layers of disease phenotype-related databases is essential for deciphering the human genome. To satisfy the impending need to decipher the human genome, it is essential to develop a comprehensive annotation system supported by multiple layers of disease phenotype-related databases.

Methods

AnsNGS (Annotation system of sequence variations for next-generation sequencing data) is a tool for contextualizing variants related to diseases and examining their functional consequences. The AnsNGS integrates a variety of annotation databases to attain multiple levels of annotation.

Results

The AnsNGS assigns biological functions to variants, and provides gene (or disease)-centric queries for finding disease-causing variants. The AnsNGS also connects those genes harbouring variants and the corresponding expression probes for downstream analysis using expression microarrays. Here, we demonstrate its ability to identify disease-related variants in the human genome.

Conclusions

The AnsNGS can give a key insight into which of these variants is already known to be involved in a disease-related phenotype or located in or near a known regulatory site. The AnsNGS is available free of charge to academic users and can be obtained from http://snubi.org/software/AnsNGS/.",2013-03-31 +,Streamside Management Zones Affect Movement of Silvicultural Nitrogen and Phosphorus Fertilizers to Piedmont Streams,"Forestry best management practices (BMP) recommendations for streamside management zones (SMZs) are based on limited data regarding SMZ width, partial harvests, and nutrient movements after forest fertilization. Agricultural fertilization is commonly linked to increased stream nutrients. However, less is known about effectiveness of SMZ options for controlling nutrient movements after silvicultural fertilization. Diammonium phosphate and urea were applied to 12 subwatersheds in 3-year-old loblolly pine (Pinus taeda L.) plantations in the Virginia Piedmont. Three replicates of four SMZ treatments were superimposed on 12 subwatersheds in a previous SMZ harvest sediment study (7.6-m SMZ, 15.2-m SMZ thin, 15.2-m SMZ, and 30.5-m SMZ). Surface, near-surface, subsurface, and stream water samples were collected monthly for 1 year and analyzed for nitrate (), ammonium (), and orthophosphate (ortho-P). Transected measurements from streamside to fertilized plantations allowed interpretations of spatial nutrient measurements across SMZs. When compared with wider SMZs, 7.6-m SMZs had 3-10× surface water , 3-6× near-surface water , and 1-2× more stream water . No significant differences were detected for for any SMZ treatment. The 15.2-m SMZ thin had small but significant increases (2-8×) in surface runoff for ortho-P relative to other SMZ treatments, perhaps because of increased surface water movement along thinning corridors. Across all SMZ treatments, comparisons of stream edges with fertilized stands indicated reductions of 33-98%, reductions of 68-97%, and ortho-P reductions of 70-98%. A 39% rainfall deficit during the study influenced results, but conventional SMZs ≥ 15.2 m protected streams from fertilization nutrient increases.",2013-02-01 +27621538,"Nonproliferative and Proliferative Lesions of the Rat and Mouse Skeletal Tissues (Bones, Joints, and Teeth).","The INHAND (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) Project (www.toxpath.org/inhand.asp) is an initiative of the Societies of Toxicological Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP) and North America (STP) to develop an internationally accepted nomenclature for proliferative and nonproliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature for classifying microscopic lesions observed in the skeletal tissues and teeth of laboratory rats and mice, with color photomicrographs illustrating examples of many common lesions. The standardized nomenclature presented in this document is also available on the internet (http://www.goreni.org/). Sources of material were databases from government, academic and industrial laboratories throughout the world.",2016-07-29 +28327601,BCIP: a gene-centered platform for identifying potential regulatory genes in breast cancer.,"Breast cancer is a disease with high heterogeneity. Many issues on tumorigenesis and progression are still elusive. It is critical to identify genes that play important roles in the progression of tumors, especially for tumors with poor prognosis such as basal-like breast cancer and tumors in very young women. To facilitate the identification of potential regulatory or driver genes, we present the Breast Cancer Integrative Platform (BCIP, http://omics.bmi.ac.cn/bcancer/). BCIP maintains multi-omics data selected with strict quality control and processed with uniform normalization methods, including gene expression profiles from 9,005 tumor and 376 normal tissue samples, copy number variation information from 3,035 tumor samples, microRNA-target interactions, co-expressed genes, KEGG pathways, and mammary tissue-specific gene functional networks. This platform provides a user-friendly interface integrating comprehensive and flexible analysis tools on differential gene expression, copy number variation, and survival analysis. The prominent characteristic of BCIP is that users can perform analysis by customizing subgroups with single or combined clinical features, including subtypes, histological grades, pathologic stages, metastasis status, lymph node status, ER/PR/HER2 status, TP53 mutation status, menopause status, age, tumor size, therapy responses, and prognosis. BCIP will help to identify regulatory or driver genes and candidate biomarkers for further research in breast cancer.",2017-03-22 +27153718,Multilevel biological characterization of exomic variants at the protein level significantly improves the identification of their deleterious effects.,"

Motivation

There are now many predictors capable of identifying the likely phenotypic effects of single nucleotide variants (SNVs) or short in-frame Insertions or Deletions (INDELs) on the increasing amount of genome sequence data. Most of these predictors focus on SNVs and use a combination of features related to sequence conservation, biophysical, and/or structural properties to link the observed variant to either neutral or disease phenotype. Despite notable successes, the mapping between genetic variants and their phenotypic effects is riddled with levels of complexity that are not yet fully understood and that are often not taken into account in the predictions, despite their promise of significantly improving the prediction of deleterious mutants.

Results

We present DEOGEN, a novel variant effect predictor that can handle both missense SNVs and in-frame INDELs. By integrating information from different biological scales and mimicking the complex mixture of effects that lead from the variant to the phenotype, we obtain significant improvements in the variant-effect prediction results. Next to the typical variant-oriented features based on the evolutionary conservation of the mutated positions, we added a collection of protein-oriented features that are based on functional aspects of the gene affected. We cross-validated DEOGEN on 36 825 polymorphisms, 20 821 deleterious SNVs, and 1038 INDELs from SwissProt. The multilevel contextualization of each (variant, protein) pair in DEOGEN provides a 10% improvement of MCC with respect to current state-of-the-art tools.

Availability and implementation

The software and the data presented here is publicly available at http://ibsquare.be/deogen

Contact

: wvranken@vub.ac.be

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-18 +27174170,A homology-based pipeline for global prediction of post-translational modification sites.,"The pathways of protein post-translational modifications (PTMs) have been shown to play particularly important roles for almost any biological process. Identification of PTM substrates along with information on the exact sites is fundamental for fully understanding or controlling biological processes. Alternative computational strategies would help to annotate PTMs in a high-throughput manner. Traditional algorithms are suited for identifying the common organisms and tissues that have a complete PTM atlas or extensive experimental data. While annotation of rare PTMs in most organisms is a clear challenge. In this work, to this end we have developed a novel homology-based pipeline named PTMProber that allows identification of potential modification sites for most of the proteomes lacking PTMs data. Cross-promotion E-value (CPE) as stringent benchmark has been used in our pipeline to evaluate homology to known modification sites. Independent-validation tests show that PTMProber achieves over 58.8% recall with high precision by CPE benchmark. Comparisons with other machine-learning tools show that PTMProber pipeline performs better on general predictions. In addition, we developed a web-based tool to integrate this pipeline at http://bioinfo.ncu.edu.cn/PTMProber/index.aspx. In addition to pre-constructed prediction models of PTM, the website provides an extensional functionality to allow users to customize models.",2016-05-13 +24203711,DrugBank 4.0: shedding new light on drug metabolism.,"DrugBank (http://www.drugbank.ca) is a comprehensive online database containing extensive biochemical and pharmacological information about drugs, their mechanisms and their targets. Since it was first described in 2006, DrugBank has rapidly evolved, both in response to user requests and in response to changing trends in drug research and development. Previous versions of DrugBank have been widely used to facilitate drug and in silico drug target discovery. The latest update, DrugBank 4.0, has been further expanded to contain data on drug metabolism, absorption, distribution, metabolism, excretion and toxicity (ADMET) and other kinds of quantitative structure activity relationships (QSAR) information. These enhancements are intended to facilitate research in xenobiotic metabolism (both prediction and characterization), pharmacokinetics, pharmacodynamics and drug design/discovery. For this release, >1200 drug metabolites (including their structures, names, activity, abundance and other detailed data) have been added along with >1300 drug metabolism reactions (including metabolizing enzymes and reaction types) and dozens of drug metabolism pathways. Another 30 predicted or measured ADMET parameters have been added to each DrugCard, bringing the average number of quantitative ADMET values for Food and Drug Administration-approved drugs close to 40. Referential nuclear magnetic resonance and MS spectra have been added for almost 400 drugs as well as spectral and mass matching tools to facilitate compound identification. This expanded collection of drug information is complemented by a number of new or improved search tools, including one that provides a simple analyses of drug-target, -enzyme and -transporter associations to provide insight on drug-drug interactions.",2013-11-06 +27870245,The Art of Compiling Protein Binding Site Ensembles.,"Structure-based drug design starts with the collection, preparation, and initial analysis of protein structures. With more than 115,000 structures publically available in the Protein Data Bank (PDB), fully automated processes reliably performing these important preprocessing steps are needed. Several tools are available for these tasks, however, most of them do not address the special needs of scientists interested in protein-ligand interactions. In this paper, we summarize our research activities towards an automated processing pipeline from raw PDB data towards ready-to-use protein binding site ensembles. Starting from a single protein structure, the pipeline covers the following phases: Extracting structurally related binding sites from the PDB, aligning disconnected binding site sequences, resolving tautomeric forms and protonation, orienting hydrogens and flippable side-chains, structurally aligning the multitude of binding sites, and performing a reasonable reduction of ensemble structures. The pipeline, named SIENA, creates protein-structural ensembles for the analysis of protein flexibility, molecular design efforts like docking or de novo design within seconds. For the first time, we are able to process the whole PDB in order to create a large collection of protein binding site ensembles. SIENA is available as part of the ZBH ProteinsPlus webserver under http://proteinsplus.zbh.uni-hamburg.de.",2016-05-30 +26704599,MCAST: scanning for cis-regulatory motif clusters.,"

Unlabelled

Precise regulatory control of genes, particularly in eukaryotes, frequently requires the joint action of multiple sequence-specific transcription factors. A cis-regulatory module (CRM) is a genomic locus that is responsible for gene regulation and that contains multiple transcription factor binding sites in close proximity. Given a collection of known transcription factor binding motifs, many bioinformatics methods have been proposed over the past 15 years for identifying within a genomic sequence candidate CRMs consisting of clusters of those motifs.

Results

The MCAST algorithm uses a hidden Markov model with a P-value-based scoring scheme to identify candidate CRMs. Here, we introduce a new version of MCAST that offers improved graphical output, a dynamic background model, statistical confidence estimates based on false discovery rate estimation and, most significantly, the ability to predict CRMs while taking into account epigenomic data such as DNase I sensitivity or histone modification data. We demonstrate the validity of MCAST's statistical confidence estimates and the utility of epigenomic priors in identifying CRMs.

Availability and implementation

MCAST is part of the MEME Suite software toolkit. A web server and source code are available at http://meme-suite.org and http://alternate.meme-suite.org

Contact

t.bailey@imb.uq.edu.au or william-noble@uw.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-24 +25394492,MicroRNA-7 directly targets insulin-like growth factor 1 receptor to inhibit cellular growth and glucose metabolism in gliomas.,"

Background

Recent studies observed that altered energy metabolism has become widespread in cancer cells along with other cancer-associated traits that have been accepted as hallmarks of cancer. Akt signaling pathway is involved in the aerobic glycolysis program. However, mechanisms underlying the regulation of aerobic glycolysis and Akt activity in gliomas remain unclear. MicroRNAs are a group of small non-coding RNAs that can function as endogenous RNA interference to regulate expression of targeted genes. This study was conducted to detect the function of miR-7 targeting insulin-like growth factor 1 receptor (IGF-1R), which is an upstream regulator of Akt.

Methods

MicroRNA expression data for gliomas and normal controls were downloaded from The Cancer Genome Atlas (TCGA) database. Quantitative real-time PCR was used to measure the microRNA-7 (miR-7) expression level, and Western blot was performed to detect protein expression in U87 and U251 cells. Colony formation assay and glycolysis stress test were also conducted. Luciferase reporter assay was used to identify the mechanism of IGF-1R and miR-7 regulation.

Results

miR-7 was downregulated in human glioma tissues based on TCGA database. Forced expression of miR-7 or IGF-1R knockdown inhibited colony formation and glucose metabolic capabilities of glioma cells in vitro and decreased the p-Akt expression level. Bioinformatics analysis results indicated that IGF-1R could be a target of miR-7. Western blot and luciferase reporter assays showed that miR-7 modulated IGF-1R expression by directly targeting the binding site within the 3'-untranslated region.

Conclusions

This study provides the first evidence that miR-7 inhibits cellular growth and glucose metabolism in gliomas, at least partially, by regulating the IGF-1R/Akt signaling pathway. Therefore, miR-7 is a promising molecular drug for glioma treatment.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_211.",2014-11-14 +26936044,Quantitative Prediction of Drug Interactions Caused by CYP1A2 Inhibitors and Inducers.,"

Background

A simple method to predict drug-drug interactions mediated by cytochrome P450 enzymes (CYPs) on the basis of in vivo data has been previously applied for several CYP isoforms but not for CYP1A2. The objective of this study was to extend this method to drug interactions caused by CYP1A2 inhibitors and inducers.

Methods

First, initial estimates of the model parameters were obtained using data from the literature. Then, an external validation of these initial estimates was performed by comparing model-based predicted area under the concentration-time curve (AUC) ratios with observations not used in the initial estimation. Third, refined estimates of the model parameters were obtained by Bayesian orthogonal regression using Winbugs software, and predicted AUC ratios were compared with all available observations. Finally, predicted AUC ratios for all possible substrates-inhibitors and substrates-inducers were computed.

Results

A total of 100 AUC ratios were retrieved from the literature. Model parameters were estimated for 19 CYP1A2 substrate drugs, 26 inhibitors and seven inducers, including tobacco smoking. In the external validation, the mean prediction error of the AUC ratios was -0.22, while the mean absolute error was 0.97 (37 %). After the Bayesian estimation step, the mean prediction error was 0.11, while the mean absolute error was 0.43 (22 %). The AUC ratios for 625 possible interactions were computed.

Conclusion

This analysis provides insights into the interaction profiles of drugs poorly studied so far and can help to identify and manage significant interactions in clinical practice. Those results are now available to the community via a web tool ( http://www.ddi-predictor.org ).",2016-08-01 +27404214,ATLAS of Biochemistry: A Repository of All Possible Biochemical Reactions for Synthetic Biology and Metabolic Engineering Studies.,"Because the complexity of metabolism cannot be intuitively understood or analyzed, computational methods are indispensable for studying biochemistry and deepening our understanding of cellular metabolism to promote new discoveries. We used the computational framework BNICE.ch along with cheminformatic tools to assemble the whole theoretical reactome from the known metabolome through expansion of the known biochemistry presented in the Kyoto Encyclopedia of Genes and Genomes (KEGG) database. We constructed the ATLAS of Biochemistry, a database of all theoretical biochemical reactions based on known biochemical principles and compounds. ATLAS includes more than 130 000 hypothetical enzymatic reactions that connect two or more KEGG metabolites through novel enzymatic reactions that have never been reported to occur in living organisms. Moreover, ATLAS reactions integrate 42% of KEGG metabolites that are not currently present in any KEGG reaction into one or more novel enzymatic reactions. The generated repository of information is organized in a Web-based database ( http://lcsb-databases.epfl.ch/atlas/ ) that allows the user to search for all possible routes from any substrate compound to any product. The resulting pathways involve known and novel enzymatic steps that may indicate unidentified enzymatic activities and provide potential targets for protein engineering. Our approach of introducing novel biochemistry into pathway design and associated databases will be important for synthetic biology and metabolic engineering.",2016-07-28 +23110173,"SEED servers: high-performance access to the SEED genomes, annotations, and metabolic models.","The remarkable advance in sequencing technology and the rising interest in medical and environmental microbiology, biotechnology, and synthetic biology resulted in a deluge of published microbial genomes. Yet, genome annotation, comparison, and modeling remain a major bottleneck to the translation of sequence information into biological knowledge, hence computational analysis tools are continuously being developed for rapid genome annotation and interpretation. Among the earliest, most comprehensive resources for prokaryotic genome analysis, the SEED project, initiated in 2003 as an integration of genomic data and analysis tools, now contains >5,000 complete genomes, a constantly updated set of curated annotations embodied in a large and growing collection of encoded subsystems, a derived set of protein families, and hundreds of genome-scale metabolic models. Until recently, however, maintaining current copies of the SEED code and data at remote locations has been a pressing issue. To allow high-performance remote access to the SEED database, we developed the SEED Servers (http://www.theseed.org/servers): four network-based servers intended to expose the data in the underlying relational database, support basic annotation services, offer programmatic access to the capabilities of the RAST annotation server, and provide access to a growing collection of metabolic models that support flux balance analysis. The SEED servers offer open access to regularly updated data, the ability to annotate prokaryotic genomes, the ability to create metabolic reconstructions and detailed models of metabolism, and access to hundreds of existing metabolic models. This work offers and supports a framework upon which other groups can build independent research efforts. Large integrations of genomic data represent one of the major intellectual resources driving research in biology, and programmatic access to the SEED data will provide significant utility to a broad collection of potential users.",2012-10-24 +23621914,CoDP: predicting the impact of unclassified genetic variants in MSH6 by the combination of different properties of the protein.,"

Background

Lynch syndrome is a hereditary cancer predisposition syndrome caused by a mutation in one of the DNA mismatch repair (MMR) genes. About 24% of the mutations identified in Lynch syndrome are missense substitutions and the frequency of missense variants in MSH6 is the highest amongst these MMR genes. Because of this high frequency, the genetic testing was not effectively used in MSH6 so far. We, therefore, developed CoDP (Combination of the Different Properties), a bioinformatics tool to predict the impact of missense variants in MSH6.

Methods

We integrated the prediction results of three methods, namely MAPP, PolyPhen-2 and SIFT. Two other structural properties, namely solvent accessibility and the change in the number of heavy atoms of amino acids in the MSH6 protein, were further combined explicitly. MSH6 germline missense variants classified by their associated clinical and molecular data were used to fit the parameters for the logistic regression model and to assess the prediction. The performance of CoDP was compared with those of other conventional tools, namely MAPP, SIFT, PolyPhen-2 and PON-MMR.

Results

A total of 294 germline missense variants were collected from the variant databases and literature. Of them, 34 variants were available for the parameter training and the prediction performance test. We integrated the prediction results of MAPP, PolyPhen-2 and SIFT, and two other structural properties, namely solvent accessibility and the change in the number of heavy atoms of amino acids in the MSH6 protein, were further combined explicitly. Variants data classified by their associated clinical and molecular data were used to fit the parameters for the logistic regression model and to assess the prediction. The values of the positive predictive value (PPV), the negative predictive value (NPV), sensitivity, specificity and accuracy of the tools were compared on the whole data set. PPV of CoDP was 93.3% (14/15), NPV was 94.7% (18/19), specificity was 94.7% (18/19), sensitivity was 93.3% (14/15) and accuracy was 94.1% (32/34). Area under the curve of CoDP was 0.954, that of MAPP for MSH6 was 0.919, of SIFT was 0.864 and of PolyPhen-2 HumVar was 0.819. The power to distinguish between pathogenic and non-pathogenic variants of these methods was tested by Wilcoxon rank sum test (p < 8.9 × 10(-6) for CoDP, p < 3.3 × 10(-5) for MAPP, p < 3.1 × 10(-4) for SIFT and p < 1.2 × 10(-3) for PolyPhen-2 HumVar), and CoDP was shown to outperform other conventional methods.

Conclusion

In this paper, we provide a human curated data set for MSH6 missense variants, and CoDP, the prediction tool, which achieved better accuracy for predicting the impact of missense variants in MSH6 than any other known tools. CoDP is available at http://cib.cf.ocha.ac.jp/CoDP/.",2013-04-28 +25649616,GenoMetric Query Language: a novel approach to large-scale genomic data management.,"

Motivation

Improvement of sequencing technologies and data processing pipelines is rapidly providing sequencing data, with associated high-level features, of many individual genomes in multiple biological and clinical conditions. They allow for data-driven genomic, transcriptomic and epigenomic characterizations, but require state-of-the-art 'big data' computing strategies, with abstraction levels beyond available tool capabilities.

Results

We propose a high-level, declarative GenoMetric Query Language (GMQL) and a toolkit for its use. GMQL operates downstream of raw data preprocessing pipelines and supports queries over thousands of heterogeneous datasets and samples; as such it is key to genomic 'big data' analysis. GMQL leverages a simple data model that provides both abstractions of genomic region data and associated experimental, biological and clinical metadata and interoperability between many data formats. Based on Hadoop framework and Apache Pig platform, GMQL ensures high scalability, expressivity, flexibility and simplicity of use, as demonstrated by several biological query examples on ENCODE and TCGA datasets.

Availability and implementation

The GMQL toolkit is freely available for non-commercial use at http://www.bioinformatics.deib.polimi.it/GMQL/.",2015-02-03 +23732275,Relating genes to function: identifying enriched transcription factors using the ENCODE ChIP-Seq significance tool.,"

Motivation

Biological analysis has shifted from identifying genes and transcripts to mapping these genes and transcripts to biological functions. The ENCODE Project has generated hundreds of ChIP-Seq experiments spanning multiple transcription factors and cell lines for public use, but tools for a biomedical scientist to analyze these data are either non-existent or tailored to narrow biological questions. We present the ENCODE ChIP-Seq Significance Tool, a flexible web application leveraging public ENCODE data to identify enriched transcription factors in a gene or transcript list for comparative analyses.

Implementation

The ENCODE ChIP-Seq Significance Tool is written in JavaScript on the client side and has been tested on Google Chrome, Apple Safari and Mozilla Firefox browsers. Server-side scripts are written in PHP and leverage R and a MySQL database. The tool is available at http://encodeqt.stanford.edu.

Contact

abutte@stanford.edu

Supplementary information

Supplementary material is available at Bioinformatics online.",2013-06-03 +27465130,NTTMUNSW BioC modules for recognizing and normalizing species and gene/protein mentions. ,"In recent years, the number of published biomedical articles has increased as researchers have focused on biological domains to investigate the functions of biological objects, such as genes and proteins. However, the ambiguous nature of genes and their products have rendered the literature more complex for readers and curators of molecular interaction databases. To address this challenge, a normalization technique that can link variants of biological objects to a single, standardized form was applied. In this work, we developed a species normalization module, which recognizes species names and normalizes them to NCBI Taxonomy IDs. Unlike most previous work, which ignored the prefix of a gene name that represents an abbreviation of the species name to which the gene belongs, the recognition results of our module include the prefixed species. The developed species normalization module achieved an overall F-score of 0.954 on an instance-level species normalization corpus. For gene normalization, two separate modules were respectively employed to recognize gene mentions and normalize those mentions to their Entrez Gene IDs by utilizing a multistage normalization algorithm developed for processing full-text articles. All of the developed modules are BioC-compatible .NET framework libraries and are publicly available from the NuGet gallery.Database URL: https://sites.google.com/site/hjdairesearch/Projects/isn-corpus.",2016-07-27 +23448274,PASmiR: a literature-curated database for miRNA molecular regulation in plant response to abiotic stress.,"

Background

Over 200 published studies of more than 30 plant species have reported a role for miRNAs in regulating responses to abiotic stresses. However, data from these individual reports has not been collected into a single database. The lack of a curated database of stress-related miRNAs limits research in this field, and thus a cohesive database system should necessarily be constructed for data deposit and further application.

Description

PASmiR, a literature-curated and web-accessible database, was developed to provide detailed, searchable descriptions of miRNA molecular regulation in different plant abiotic stresses. PASmiR currently includes data from ~200 published studies, representing 1038 regulatory relationships between 682 miRNAs and 35 abiotic stresses in 33 plant species. PASmiR's interface allows users to retrieve miRNA-stress regulatory entries by keyword search using plant species, abiotic stress, and miRNA identifier. Each entry upon keyword query contains detailed regulation information for a specific miRNA, including species name, miRNA identifier, stress name, miRNA expression pattern, detection method for miRNA expression, a reference literature, and target gene(s) of the miRNA extracted from the corresponding reference or miRBase. Users can also contribute novel regulatory entries by using a web-based submission page. The PASmiR database is freely accessible from the two URLs of http://hi.ustc.edu.cn:8080/PASmiR, and http://pcsb.ahau.edu.cn:8080/PASmiR.

Conclusion

The PASmiR database provides a solid platform for collection, standardization, and searching of miRNA-abiotic stress regulation data in plants. As such this database will be a comprehensive repository for miRNA regulatory mechanisms involved in plant response to abiotic stresses for the plant stress physiology community.",2013-03-01 +28781523,The influence of seasonality and weather changes on premature birth incidence.,"

Background

Although the effects of meteorological factors on the general population health are widely documented, little is known about their influence on human pregnancy and birth. The present study aims at analyzing the influence of the atmospheric conditions on premature births.

Method

One hundred and eight nine cases of premature births were included in the study, with a gestational age between 24 to 37 weeks of amenorrhea. Cases with antepartum fetal death and those with uncertain gestational age have been excluded. Daily weather data were obtained using http://www.wunderground.com site. A Pearson's product-moment correlation was run to assess the relationship between weekly preterm birth incidence and the total number of premature births and the mean maximum and minimum temperature (Tmax, Tmin), maximum and minimum average humidity (Umax, Umin), maximum and minimum atmospheric pressure mean (P max, P min), average wind speed and average quantity precipitations, calculated for one calendar week.

Results

Approximately 7.7% of all births during the study period occurred before 37 weeks of gestation, the main reason for hospitalization being premature rupture of membranes (45%). The analysis revealed a moderate positive correlation between weekly preterm birth incidence and the average temperature (r=0.306, n=52, p=0.027) and a moderate positive correlation between weekly preterm birth incidence and temperature variation (r=0.307, n=52, p=0.007). Our study found no significant statistic correlation between the humidity variation, pressure variation, and wind speed.

Conclusions

The incidence of premature births can be influenced by variations of specific weather factors, especially during the weeks characterized by large fluctuations in temperature. The results obtained might inspire the construction of multicenter studies to investigate more thoroughly the adverse effects of some meteorological factors that can influence the outcomes of human pregnancy.",2017-07-15 +,Assessing Tropical Forests' Climatic Sensitivities with Long-term Data,"Analyses relating long-term records of tree growth to interannual climatic variation at La Selva, Costa Rica have revealed marked forest sensitivities to both temperature and dry-season intensity (Clark et al. 2010). The tropical-forest biome is certain to become warmer, and many areas may become drier. Testing the generality of the La Selva findings with similar analyses of field data from diverse forests across the biome will be a valuable next step. Based on our experiences during the La Selva studies, we propose that such assessments will need to address three issues. One is the number of repeat forest measurements. Short series of re-censuses can be an unreliable basis for assessing climatic sensitivities. For some key climatic factors (e.g., temperature), records consisting of fewer than 10-12 re-censuses can span limited climatic ranges, producing erratic and largely nonsignificant correlations. Multiyear census intervals exacerbate these data limitations. Second, different types of forest-growth data call for different analysis approaches. Cohort and tree-ring records need to be adjusted for ontogenetic growth changes, while stand-level data require taking into account potentially confounding influences from forest compositional changes, as from succession. Third, a reliable meteorological record is critical. Poor-quality or internally inconsistent climatic records can fatally corrupt assessments of forest sensitivities. To be usable in such analyses, the meteorological record requires data quality control, gap filling, and adjustments to maintain the record's internal consistency in the face of commonly occurring methods changes (instruments, siting). We illustrate these issues using analyses of the long-term La Selva records. Abstract in Spanish is available at http://www.blackwell-synergy.com/loi/btp.",2011-01-01 +26888080,Inference and Analysis of Population Structure Using Genetic Data and Network Theory.,"Clustering individuals to subpopulations based on genetic data has become commonplace in many genetic studies. Inference about population structure is most often done by applying model-based approaches, aided by visualization using distance-based approaches such as multidimensional scaling. While existing distance-based approaches suffer from a lack of statistical rigor, model-based approaches entail assumptions of prior conditions such as that the subpopulations are at Hardy-Weinberg equilibria. Here we present a distance-based approach for inference about population structure using genetic data by defining population structure using network theory terminology and methods. A network is constructed from a pairwise genetic-similarity matrix of all sampled individuals. The community partition, a partition of a network to dense subgraphs, is equated with population structure, a partition of the population to genetically related groups. Community-detection algorithms are used to partition the network into communities, interpreted as a partition of the population to subpopulations. The statistical significance of the structure can be estimated by using permutation tests to evaluate the significance of the partition's modularity, a network theory measure indicating the quality of community partitions. To further characterize population structure, a new measure of the strength of association (SA) for an individual to its assigned community is presented. The strength of association distribution (SAD) of the communities is analyzed to provide additional population structure characteristics, such as the relative amount of gene flow experienced by the different subpopulations and identification of hybrid individuals. Human genetic data and simulations are used to demonstrate the applicability of the analyses. The approach presented here provides a novel, computationally efficient model-free method for inference about population structure that does not entail assumption of prior conditions. The method is implemented in the software NetStruct (available at https://giligreenbaum.wordpress.com/software/).",2016-02-17 +23209672,Effectiveness of cognitive behavioral therapy for depression in patients receiving disability benefits: a systematic review and individual patient data meta-analysis.,"

Objectives

To systematically summarize the randomized trial evidence regarding the relative effectiveness of cognitive behavioural therapy (CBT) in patients with depression in receipt of disability benefits in comparison to those not receiving disability benefits.

Data sources

All relevant RCTs from a database of randomized controlled and comparative studies examining the effects of psychotherapy for adult depression (http://www.evidencebasedpsychotherapies.org), electronic databases (MEDLINE, EMBASE, PSYCINFO, AMED, CINAHL and CENTRAL) to June 2011, and bibliographies of all relevant articles. STUDY ELIGIBILITY CRITERIA, PARTICIPANTS AND INTERVENTION: Adult patients with major depression, randomly assigned to CBT versus minimal/no treatment or care-as-usual.

Study appraisal and synthesis methods

Three teams of reviewers, independently and in duplicate, completed title and abstract screening, full text review and data extraction. We performed an individual patient data meta-analysis to summarize data.

Results

Of 92 eligible trials, 70 provided author contact information; of these 56 (80%) were successfully contacted to establish if they captured receipt of benefits as a baseline characteristic; 8 recorded benefit status, and 3 enrolled some patients in receipt of benefits, of which 2 provided individual patient data. Including both patients receiving and not receiving disability benefits, 2 trials (227 patients) suggested a possible reduction in depression with CBT, as measured by the Beck Depression Inventory, mean difference [MD] (95% confidence interval [CI]) = -2.61 (-5.28, 0.07), p = 0.06; minimally important difference of 5. The effect appeared larger, though not significantly, in those in receipt of benefits (34 patients) versus not receiving benefits (193 patients); MD (95% CI) = -4.46 (-12.21, 3.30), p = 0.26.

Conclusions

Our data does not support the hypothesis that CBT has smaller effects in depressed patients receiving disability benefits versus other patients. Given that the confidence interval is wide, a decreased effect is still possible, though if the difference exists, it is likely to be small.",2012-11-29 +25879845,ClusTrack: feature extraction and similarity measures for clustering of genome-wide data sets.,"Clustering is a popular technique for explorative analysis of data, as it can reveal subgroupings and similarities between data in an unsupervised manner. While clustering is routinely applied to gene expression data, there is a lack of appropriate general methodology for clustering of sequence-level genomic and epigenomic data, e.g. ChIP-based data. We here introduce a general methodology for clustering data sets of coordinates relative to a genome assembly, i.e. genomic tracks. By defining appropriate feature extraction approaches and similarity measures, we allow biologically meaningful clustering to be performed for genomic tracks using standard clustering algorithms. An implementation of the methodology is provided through a tool, ClusTrack, which allows fine-tuned clustering analyses to be specified through a web-based interface. We apply our methods to the clustering of occupancy of the H3K4me1 histone modification in samples from a range of different cell types. The majority of samples form meaningful subclusters, confirming that the definitions of features and similarity capture biological, rather than technical, variation between the genomic tracks. Input data and results are available, and can be reproduced, through a Galaxy Pages document at http://hyperbrowser.uio.no/hb/u/hb-superuser/p/clustrack. The clustering functionality is available as a Galaxy tool, under the menu option ""Specialized analyzis of tracks"", and the submenu option ""Cluster tracks based on genome level similarity"", at the Genomic HyperBrowser server: http://hyperbrowser.uio.no/hb/.",2015-04-16 +23599837,The impact on emergency department visits for respiratory illness during the southern california wildfires.,"

Introduction

In 2007 wildfires ravaged Southern California resulting in the largest evacuation due to a wildfire in American history. We report how these wildfires affected emergency department (ED) visits for respiratory illness.

Methods

We extracted data from a Kaiser Permanente database for a single metropolitan community ED. We compared the number of visits due to respiratory illness at time intervals of 2 weeks before and during the time when the fires were burning. We counted the total number of patients with chief complaint of dyspnea, cough, and asthma and final international classification of disease 9 coding diagnosis of asthma, bronchitis, chronic obstructive pulmonary disease and respiratory syndrome, and analyzed data for both total number and proportion of ED visits. We evaluated the data using Early Aberration Reporting System software to determine significant single-visit increases compared to expected counts. We also analyzed the average length of ED stay. Data on air quality were extracted from the http://www.airnow.gov site.

Results

There were significant differences between pre-fire and fire period average visit counts for the chief complaints of dyspnea and asthma. Dypnea complaints increased by 3.2 visits per day. During the fire the diagnoses of asthma increased significantly by 2.6 patients per day. Air quality reached air quality index values of 300, indicating very unhealthy conditions. Average ED length of stay times remained unchanged during the fire period compared to the pre-fire period.

Conclusion

The 2007 Southern California wildfires caused significant surges in the volume of ED patients seeking treatment for respiratory illness. Disaster plans should prepare for these surges when future wildfires occur.",2013-03-01 +22036828,Macrophages and neutrophils in SLE-An online molecular catalog.,"Systemic Lupus Erythematosus (SLE) is a heterogeneous group of autoimmune disorders defined by a consensus of clinical and laboratory criteria. Much of the pathophysiology and therapy of SLE has focused on autoimmune B and T cells of the adaptive immune system. Recently, focus has shifted to the role of myeloid cells like neutrophils and macrophages - part of the innate immune system - in SLE pathogenesis. These cells have altered molecular profiles affecting multiple pathways, but no salient overview has been undertaken to broadly define and categorize this dysregualtion. Here we endeavor to provide this overview and build a new freely accessible online resource for this purpose (http://www.mohanlab.org/SLE_BASE/myeloid_cells/).",2011-10-19 +27457921,DisSim: an online system for exploring significant similar diseases and exhibiting potential therapeutic drugs.,"The similarity of pair-wise diseases reveals the molecular relationships between them. For example, similar diseases have the potential to be treated by common therapeutic chemicals (TCs). In this paper, we introduced DisSim, an online system for exploring similar diseases, and comparing corresponding TCs. Currently, DisSim implemented five state-of-the-art methods to measure the similarity between Disease Ontology (DO) terms and provide the significance of the similarity score. Furthermore, DisSim integrated TCs of diseases from the Comparative Toxicogenomics Database (CTD), which can help to identify potential relationships between TCs and similar diseases. The system can be accessed from http://123.59.132.21:8080/DisSim.",2016-07-26 +22934074,Bridging the phenotypic and genetic data useful for integrated breeding through a data annotation using the Crop Ontology developed by the crop communities of practice.,"The Crop Ontology (CO) of the Generation Challenge Program (GCP) (http://cropontology.org/) is developed for the Integrated Breeding Platform (IBP) (http://www.integratedbreeding.net/) by several centers of The Consultative Group on International Agricultural Research (CGIAR): bioversity, CIMMYT, CIP, ICRISAT, IITA, and IRRI. Integrated breeding necessitates that breeders access genotypic and phenotypic data related to a given trait. The CO provides validated trait names used by the crop communities of practice (CoP) for harmonizing the annotation of phenotypic and genotypic data and thus supporting data accessibility and discovery through web queries. The trait information is completed by the description of the measurement methods and scales, and images. The trait dictionaries used to produce the Integrated Breeding (IB) fieldbooks are synchronized with the CO terms for an automatic annotation of the phenotypic data measured in the field. The IB fieldbook provides breeders with direct access to the CO to get additional descriptive information on the traits. Ontologies and trait dictionaries are online for cassava, chickpea, common bean, groundnut, maize, Musa, potato, rice, sorghum, and wheat. Online curation and annotation tools facilitate (http://cropontology.org) direct maintenance of the trait information and production of trait dictionaries by the crop communities. An important feature is the cross referencing of CO terms with the Crop database trait ID and with their synonyms in Plant Ontology (PO) and Trait Ontology (TO). Web links between cross referenced terms in CO provide online access to data annotated with similar ontological terms, particularly the genetic data in Gramene (University of Cornell) or the evaluation and climatic data in the Global Repository of evaluation trials of the Climate Change, Agriculture and Food Security programme (CCAFS). Cross-referencing and annotation will be further applied in the IBP.",2012-08-25 +28011780,FUEL-mLoc: feature-unified prediction and explanation of multi-localization of cellular proteins in multiple organisms.,"Although many web-servers for predicting protein subcellular localization have been developed, they often have the following drawbacks: (i) lack of interpretability or interpreting results with heterogenous information which may confuse users; (ii) ignoring multi-location proteins and (iii) only focusing on specific organism. To tackle these problems, we present an interpretable and efficient web-server, namely FUEL-mLoc, using eature- nified prediction and xplanation of m ulti- oc alization of cellular proteins in multiple organisms. Compared to conventional localization predictors, FUEL-mLoc has the following advantages: (i) using unified features (i.e. essential GO terms) to interpret why a prediction is made; (ii) being capable of predicting both single- and multi-location proteins and (iii) being able to handle proteins of multiple organisms, including Eukaryota, Homo sapiens, Viridiplantae, Gram-positive Bacteria, Gram-negative Bacteria and Virus . Experimental results demonstrate that FUEL-mLoc outperforms state-of-the-art subcellular-localization predictors.

Availability and implementation

http://bioinfo.eie.polyu.edu.hk/FUEL-mLoc/.

Contacts

shibiao.wan@princeton.edu or enmwmak@polyu.edu.hk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +26671800,CoGI: Towards Compressing Genomes as an Image.,"Genomic science is now facing an explosive increase of data thanks to the fast development of sequencing technology. This situation poses serious challenges to genomic data storage and transferring. It is desirable to compress data to reduce storage and transferring cost, and thus to boost data distribution and utilization efficiency. Up to now, a number of algorithms / tools have been developed for compressing genomic sequences. Unlike the existing algorithms, most of which treat genomes as one-dimensional text strings and compress them based on dictionaries or probability models, this paper proposes a novel approach called CoGI (the abbreviation of Compressing Genomes as an Image) for genome compression, which transforms the genomic sequences to a two-dimensional binary image (or bitmap), then applies a rectangular partition coding algorithm to compress the binary image. CoGI can be used as either a reference-based compressor or a reference-free compressor. For the former, we develop two entropy-based algorithms to select a proper reference genome. Performance evaluation is conducted on various genomes. Experimental results show that the reference-based CoGI significantly outperforms two state-of-the-art reference-based genome compressors GReEn and RLZ-opt in both compression ratio and compression efficiency. It also achieves comparable compression ratio but two orders of magnitude higher compression efficiency in comparison with XM--one state-of-the-art reference-free genome compressor. Furthermore, our approach performs much better than Gzip--a general-purpose and widely-used compressor, in both compression speed and compression ratio. So, CoGI can serve as an effective and practical genome compressor. The source code and other related documents of CoGI are available at: http://admis.fudan.edu.cn/projects/cogi.htm.",2015-11-01 +26909367,Quantitative analysis by next generation sequencing of hematopoietic stem and progenitor cells (LSK) and of splenic B cells transcriptomes from wild-type and Usp3-knockout mice.,"The data described here provide genome-wide expression profiles of murine primitive hematopoietic stem and progenitor cells (LSK) and of B cell populations, obtained by high throughput sequencing. Cells are derived from wild-type mice and from mice deficient for the ubiquitin-specific protease 3 (USP3; Usp3Δ/Δ). Modification of histone proteins by ubiquitin plays a crucial role in the cellular response to DNA damage (DDR) (Jackson and Durocher, 2013) [1]. USP3 is a histone H2A deubiquitinating enzyme (DUB) that regulates ubiquitin-dependent DDR in response to DNA double-strand breaks (Nicassio et al., 2007; Doil et al., 2008) [2], [3]. Deletion of USP3 in mice increases the incidence of spontaneous tumors and affects hematopoiesis [4]. In particular, Usp3-knockout mice show progressive loss of B and T cells and decreased functional potential of hematopoietic stem cells (HSCs) during aging. USP3-deficient cells, including HSCs, display enhanced histone ubiquitination, accumulate spontaneous DNA damage and are hypersensitive to ionizing radiation (Lancini et al., 2014) [4]. To address whether USP3 loss leads to deregulation of specific molecular pathways relevant to HSC homeostasis and/or B cell development, we have employed the RNA-sequencing technology and investigated transcriptional differences between wild-type and Usp3Δ/Δ LSK, naïve B cells or in vitro activated B cells. The data relate to the research article ""Tight regulation of ubiquitin-mediated DNA damage response by USP3 preserves the functional integrity of hematopoietic stem cells"" (Lancini et al., 2014) [4]. The RNA-sequencing and analysis data sets have been deposited in NCBI׳s Gene Expression Omnibus (Edgar et al., 2002) [5] and are accessible through GEO Series accession number GSE58495 (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE58495). With this article, we present validation of the RNA-seq data set through quantitative real-time PCR and comparative analysis.",2016-01-08 +27389579,What is the publication rate for presentations given at the British Academic Conference in Otolaryngology (BACO)?,"

Objectives

The publication rate of some large academic meetings such as the American Academy of Otolaryngology-Head and Neck Surgery has been reported as 32%. We aimed to compare the rate of publication at the British Academic Conference in Otolaryngology (BACO) to allow surveillance of research activity in the United Kingdom (UK).

Design and setting

The abstract records of both BACO 2009 and 2012 were examined. The MEDLINE database was searched using PubMed (http://www.ncbi.nlm.nih.gov/pubmed) and an iterative approach. We recorded time to publication as well as the authors' region and journal.

Main outcome measures

publication rate by conference, region and journal.

Results

Twice the number of presentations were made at BACO 2012 (n = 814) compared to BACO 2009 (n = 387). Absolute numbers of publications were 158 in 2012 and 92 in 2009. Overall, the publication rate dropped from 24% overall in 2009 to 19% in 2012. This difference in proportions was not significant (P = 0.08). The number of abstracts accepted for BACO 2012 doubled from BACO 2009 in nearly every subspecialty category, except the general/training category, which trebled. For both conferences, head and neck was the largest subspecialty abstract category, as well as the largest subspecialty publication category.

Conclusions

This study showed that the majority of abstracts presented at BACO 2009 and 2012 did not progress to publication. The rate of publication was similar to that seen in other general ENT meetings but do not compare favourably to the 69% rate seen for presentations made at the Otorhinolaryngological Research Society (ORS). The large increase in accepted abstracts at BACO 2012 may reflect growing competition for entry to specialist training.",2016-07-25 +22102568,The Gene Ontology: enhancements for 2011.,"The Gene Ontology (GO) (http://www.geneontology.org) is a community bioinformatics resource that represents gene product function through the use of structured, controlled vocabularies. The number of GO annotations of gene products has increased due to curation efforts among GO Consortium (GOC) groups, including focused literature-based annotation and ortholog-based functional inference. The GO ontologies continue to expand and improve as a result of targeted ontology development, including the introduction of computable logical definitions and development of new tools for the streamlined addition of terms to the ontology. The GOC continues to support its user community through the use of e-mail lists, social media and web-based resources.",2011-11-18 +26206304,TEtranscripts: a package for including transposable elements in differential expression analysis of RNA-seq datasets.,"

Motivation

Most RNA-seq data analysis software packages are not designed to handle the complexities involved in properly apportioning short sequencing reads to highly repetitive regions of the genome. These regions are often occupied by transposable elements (TEs), which make up between 20 and 80% of eukaryotic genomes. They can contribute a substantial portion of transcriptomic and genomic sequence reads, but are typically ignored in most analyses.

Results

Here, we present a method and software package for including both gene- and TE-associated ambiguously mapped reads in differential expression analysis. Our method shows improved recovery of TE transcripts over other published expression analysis methods, in both synthetic data and qPCR/NanoString-validated published datasets.

Availability and implementation

The source code, associated GTF files for TE annotation, and testing data are freely available at http://hammelllab.labsites.cshl.edu/software.

Contact

mhammell@cshl.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-23 +27466621,Simultaneous gene finding in multiple genomes.,"

Motivation

As the tree of life is populated with sequenced genomes ever more densely, the new challenge is the accurate and consistent annotation of entire clades of genomes. We address this problem with a new approach to comparative gene finding that takes a multiple genome alignment of closely related species and simultaneously predicts the location and structure of protein-coding genes in all input genomes, thereby exploiting negative selection and sequence conservation. The model prefers potential gene structures in the different genomes that are in agreement with each other, or-if not-where the exon gains and losses are plausible given the species tree. We formulate the multi-species gene finding problem as a binary labeling problem on a graph. The resulting optimization problem is NP hard, but can be efficiently approximated using a subgradient-based dual decomposition approach.

Results

The proposed method was tested on whole-genome alignments of 12 vertebrate and 12 Drosophila species. The accuracy was evaluated for human, mouse and Drosophila melanogaster and compared to competing methods. Results suggest that our method is well-suited for annotation of (a large number of) genomes of closely related species within a clade, in particular, when RNA-Seq data are available for many of the genomes. The transfer of existing annotations from one genome to another via the genome alignment is more accurate than previous approaches that are based on protein-spliced alignments, when the genomes are at close to medium distances.

Availability and implementation

The method is implemented in C ++ as part of Augustus and available open source at http://bioinf.uni-greifswald.de/augustus/ CONTACT: stefaniekoenig@ymail.com or mario.stanke@uni-greifswald.deSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-27 +28443990,MicroRNA-340-5p modulates cisplatin resistance by targeting LPAATβ in osteosarcoma.,"MicroRNAs (miRNAs) play an important role in drug resistance and modulate the efficiency of chemotherapy. A recent study indicated that miR-340 functions as a tumor suppressor in various types of cancer. However, the role of miR-340 in chemotherapy has not been reported yet. In this study, we found that miR-340 enhanced cisplatin (CDDP)-induced cell death. Induction of miR-340-5p expression decreased the IC50 of CDDP and increased the apoptosis of CDDP-resistant MG-63 and Saos-2 cells. Moreover, miR-340-5p decreased the accumulation of MRP1 and MDR1. We further explored the mechanism underlying the promoting effects of miR-340-5p on CDDP-induced cell death. We identified a potential target of miR-340 in the 3' untranslated region of lysophosphatidic acid acyltransferase (LPAATβ) using the online program Targetscan (http://www.microrna.org). Luciferase reporter assays showed that miR-340 binds to the 3'UTR of LPAATβ. Enforced expression of miR-340-5p decreased the accumulation of LPAATβ in both MG-63 and Saos-2 cells. Silencing LPAATβ decreased the IC50 of CDDP and increased the apoptosis of CDDP-resistant MG-63 and Saos-2 cells, which is consistent with the effect of miR-340-5p on CDDP-induced cell death. Moreover, induced expression of LPAATβ compromised the effects of miR-340-5p on CDDP-induced cell death and accumulation of MRP1 and MDR1. Taken together, our data indicated that miR-340-5p enhanced the sensitivity to CDDP by targeting LPAATβ.",2017-04-20 +26487736,RNA structure framework: automated transcriptome-wide reconstruction of RNA secondary structures from high-throughput structure probing data.,"

Summary

The rapidly increasing number of discovered non-coding RNAs makes the understanding of their structure a key feature toward a deeper comprehension of gene expression regulation. Various enzymatic- and chemically- based approaches have been recently developed to allow whole-genome studies of RNA secondary structures. Several methods have been recently presented that allow high-throughput RNA structure probing (CIRS-seq, Structure-seq, SHAPE-seq, PARS, etc.) and unbiased structural inference of residues within RNAs in their native conformation. We here present an analysis toolkit, named RNA Structure Framework (RSF), which allows fast and fully-automated analysis of high-throughput structure probing data, from data pre-processing to whole-transcriptome RNA structure inference.

Availability and implementation

RSF is written in Perl and is freely available under the GPLv3 license from http://rsf.hugef-research.org.

Contact

salvatore.oliviero@hugef-torino.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-19 +28431529,HirBin: high-resolution identification of differentially abundant functions in metagenomes.,"

Background

Gene-centric analysis of metagenomics data provides information about the biochemical functions present in a microbiome under a certain condition. The ability to identify significant differences in functions between metagenomes is dependent on accurate classification and quantification of the sequence reads (binning). However, biological effects acting on specific functions may be overlooked if the classes are too general.

Methods

Here we introduce High-Resolution Binning (HirBin), a new method for gene-centric analysis of metagenomes. HirBin combines supervised annotation with unsupervised clustering to bin sequence reads at a higher resolution. The supervised annotation is performed by matching sequence fragments to genes using well-established protein domains, such as TIGRFAM, PFAM or COGs, followed by unsupervised clustering where each functional domain is further divided into sub-bins based on sequence similarity. Finally, differential abundance of the sub-bins is statistically assessed.

Results

We show that HirBin is able to identify biological effects that are only present at more specific functional levels. Furthermore we show that changes affecting more specific functional levels are often diluted at the more general level and therefore overlooked when analyzed using standard binning approaches.

Conclusions

HirBin improves the resolution of the gene-centric analysis of metagenomes and facilitates the biological interpretation of the results. HirBin is implemented as a Python package and is freely available for download at http://bioinformatics.math.chalmers.se/hirbin .",2017-04-21 +28623886,EPSILON-CP: using deep learning to combine information from multiple sources for protein contact prediction.,"

Background

Accurately predicted contacts allow to compute the 3D structure of a protein. Since the solution space of native residue-residue contact pairs is very large, it is necessary to leverage information to identify relevant regions of the solution space, i.e. correct contacts. Every additional source of information can contribute to narrowing down candidate regions. Therefore, recent methods combined evolutionary and sequence-based information as well as evolutionary and physicochemical information. We develop a new contact predictor (EPSILON-CP) that goes beyond current methods by combining evolutionary, physicochemical, and sequence-based information. The problems resulting from the increased dimensionality and complexity of the learning problem are combated with a careful feature analysis, which results in a drastically reduced feature set. The different information sources are combined using deep neural networks.

Results

On 21 hard CASP11 FM targets, EPSILON-CP achieves a mean precision of 35.7% for top- L/10 predicted long-range contacts, which is 11% better than the CASP11 winning version of MetaPSICOV. The improvement on 1.5L is 17%. Furthermore, in this study we find that the amino acid composition, a commonly used feature, is rendered ineffective in the context of meta approaches. The size of the refined feature set decreased by 75%, enabling a significant increase in training data for machine learning, contributing significantly to the observed improvements.

Conclusions

Exploiting as much and diverse information as possible is key to accurate contact prediction. Simply merging the information introduces new challenges. Our study suggests that critical feature analysis can improve the performance of contact prediction methods that combine multiple information sources. EPSILON-CP is available as a webservice: http://compbio.robotics.tu-berlin.de/epsilon/.",2017-06-17 +26110022,DNA microarray integromics analysis platform.,"

Background

The study of interactions between molecules belonging to different biochemical families (such as lipids and nucleic acids) requires specialized data analysis methods. This article describes the DNA Microarray Integromics Analysis Platform, a unique web application that focuses on computational integration and analysis of ""multi-omics"" data. Our tool supports a range of complex analyses, including - among others - low- and high-level analyses of DNA microarray data, integrated analysis of transcriptomics and lipidomics data and the ability to infer miRNA-mRNA interactions.

Results

We demonstrate the characteristics and benefits of the DNA Microarray Integromics Analysis Platform using two different test cases. The first test case involves the analysis of the nutrimouse dataset, which contains measurements of the expression of genes involved in nutritional problems and the concentrations of hepatic fatty acids. The second test case involves the analysis of miRNA-mRNA interactions in polysaccharide-stimulated human dermal fibroblasts infected with porcine endogenous retroviruses.

Conclusions

The DNA Microarray Integromics Analysis Platform is a web-based graphical user interface for ""multi-omics"" data management and analysis. Its intuitive nature and wide range of available workflows make it an effective tool for molecular biology research. The platform is hosted at https://lifescience.plgrid.pl/.",2015-06-25 +26589589,Integrating heterogeneous genomic data to accurately identify disease subtypes.,"

Background

High-throughput biotechnologies have been widely used to characterize clinical samples from various perspectives e.g., epigenomics, genomics and transcriptomics. However, because of the heterogeneity of these technologies and their outputs, individual analysis of the various types of data is hard to create a comprehensive view of disease subtypes. Integrative methods are of pressing need.

Methods

In this study, we evaluated the possible issues that hamper integrative analysis of the heterogeneous disease data types, and proposed iBFE, an effective and efficient computational method to subvert those issues from a feature extraction perspective.

Results

Strict experiments on both simulated and real datasets demonstrated that iBFE can easily overcome issues caused by scale conflicts, noise conflicts, incompleteness of patient relationships, and conflicts between patient relationships, and that iBFE can effectively combine the merits of DNA methylation, mRNA expression and microRNA (miRNA) expression datasets to accurately identify disease subtypes of significantly different prognosis.

Conclusions

iBFE is an effective and efficient method for integrative analysis of heterogeneous genomic data to accurately identify disease subtypes. The Matlab code of iBFE is freely available from http://zhangroup.aporc.org/iBFE.",2015-11-20 +23661693,CMAP: Complement Map Database.,"

Summary

The human complement system is increasingly perceived as an intricate protein network of effectors, inhibitors and regulators that drives critical processes in health and disease and extensively communicates with associated physiological pathways ranging from immunity and inflammation to homeostasis and development. A steady stream of experimental data reveals new fascinating connections at a rapid pace; although opening unique opportunities for research discoveries, the comprehensiveness and large diversity of experimental methods, nomenclatures and publication sources renders it highly challenging to keep up with the essential findings. With the Complement Map Database (CMAP), we have created a novel and easily accessible research tool to assist the complement community and scientists from related disciplines in exploring the complement network and discovering new connections.

Availability

http://www.complement.us/cmap.

Contact

lambris@upenn.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-09 +21882445,Banking Tissue for Neurodegenerative Research,"Human brain banking has become an essential part of the research landscape in neurodegenerative disorders and neurobiology. The demand for high quality banked tissue has been on a steady rise for quite some time. Advanced research studies, including proteomics, metabolomics, m-RNA micro arrays, and genomics, are fast becoming the standard in neuroscience investigations. Since many investigators study human diseases or biological processes, it is therefore not a surprise that human tissue is in high demand to verify findings from animal models of disease. Many leading neuroscientists are focusing a large component of their research on techniques that require the collection of the highest quality of human brain tissue. The Kathleen Price Bryan Brain Bank (KPBBB; http://adrc. mc.duke.edu/BB.htm) at Duke University Medical Center (DUMC) in Durham, North Carolina, has over 20 years of experience with this process (1). Successful human brain banking requires not only attention to the users' needs for the highest quality of tissue, but it is also imperative for brain bankers to ensure that the donor’s wishes are honored. There have been great strides on a national level to facilitate the availability and distribution of these resources to the ever-growing demand in the neuroscience community. The National Institute on Aging (NIA) through the National Alzheimer’s Coordinating Center (NACC) has created an infrastructure and informatics network to support collaboration among the individual NIA-funded Alzheimer’s Disease Centers (ADCs) and to serve as a resource for the neuroscience research community. The banked tissue thus obtained is an invaluable resource available to qualified researchers. This chapter describes general concepts concerning proper acquisition, storage, and distribution of brain tissue for neurodegenerative research.",2011-09-02 +29366842,Histologic Factors Associated With Need for Surgery in Patients With Pedunculated T1 Colorectal Carcinomas.,"

Background & aims

Most patients with pedunculated T1 colorectal tumors referred for surgery are not found to have lymph node metastases, and were therefore unnecessarily placed at risk for surgery-associated complications. We aimed to identify histologic factors associated with need for surgery in patients with pedunculated T1 colorectal tumors.

Methods

We performed a cohort-nested matched case-control study of 708 patients diagnosed with pedunculated T1 colorectal tumors at 13 hospitals in The Netherlands, from January 1, 2000 through December 31, 2014, followed for a median of 44 months (interquartile range, 20-80 months). We identified 37 patients (5.2%) who required surgery (due to lymph node, intramural, or distant metastases). These patients were matched with patients with pedunculated T1 colorectal tumors without a need for surgery (no metastases, controls, n = 111). Blinded pathologists analyzed specimens from each tumor, stained with H&E. We evaluated associations between histologic factors and patient need for surgery using univariable conditional logistic regression analysis. We used multivariable least absolute shrinkage and selection operator (LASSO; an online version of the LASSO model is available at: http://t1crc.com/calculator/) regression to develop models for identification of patients with tumors requiring surgery, and tested the accuracy of our model by projecting our case-control data toward the entire cohort (708 patients). We compared our model with previously developed strategies to identify high-risk tumors: conventional model 1 (based on poor differentiation, lymphovascular invasion, or Haggitt level 4) and conventional model 2 (based on poor differentiation, lymphovascular invasion, Haggitt level 4, or tumor budding).

Results

We identified 5 histologic factors that differentiated cases from controls: lymphovascular invasion, Haggitt level 4 invasion, muscularis mucosae type B (incompletely or completely disrupted), poorly differentiated clusters and tumor budding, which identified patients who required surgery with an area under the curve (AUC) value of 0.83 (95% confidence interval, 0.76-0.90). When we used a clinically plausible predicted probability threshold of ≥4.0%, 67.5% (478 of 708) of patients were predicted to not need surgery. This threshold identified patients who required surgery with 83.8% sensitivity (95% confidence interval, 68.0%-93.8%) and 70.3% specificity (95% confidence interval, 60.9%-78.6%). Conventional models 1 and 2 identified patients who required surgery with lower AUC values (AUC, 0.67; 95% CI, 0.60-0.74; P = .002 and AUC, 0.64; 95% CI, 0.58-0.70; P < .001, respectively) than our LASSO model. When we applied our LASSO model with a predicted probability threshold of ≥4.0%, the percentage of missed cases (tumors mistakenly assigned as low risk) was comparable (6 of 478 [1.3%]) to that of conventional model 1 (4 of 307 [1.3%]) and conventional model 2 (3 of 244 [1.2%]). However, the percentage of patients referred for surgery based on our LASSO model was much lower (32.5%, n = 230) than that for conventional model 1 (56.6%, n = 401) or conventional model 2 (65.5%, n = 464).

Conclusions

In a cohort-nested matched case-control study of 708 patients with pedunculated T1 colorectal carcinomas, we developed a model based on histologic features of tumors that identifies patients who require surgery (due to high risk of metastasis) with greater accuracy than previous models. Our model might be used to identify patients most likely to benefit from adjuvant surgery.",2018-01-31 +25432597,"Identification of candidate genes, regions and markers for pre-harvest sprouting resistance in wheat (Triticum aestivum L.).","

Background

Pre-harvest sprouting (PHS) of wheat grain leads to a reduction in grain yield and quality. The availability of markers for marker-assisted selection (MAS) of PHS resistance will serve to enhance breeding selection and advancement of lines for cultivar development. The aim of this study was to identify candidate regions and develop molecular markers for PHS resistance in wheat. This was achieved via high density mapping of single nucleotide polymorphism (SNP) markers from an Illumina 90 K Infinium Custom Beadchip in a doubled haploid (DH) population derived from a RL4452/'AC Domain' cross and subsequent detection of quantitative trait loci (QTL) for PHS related traits (falling number [FN], germination index [GI] and sprouting index [SI]). SNP marker sequences flanking QTL were used to locate colinear regions in Brachypodium and rice, and identify genic markers associated with PHS resistance that can be utilized for MAS in wheat.

Results

A linkage map spanning 2569.4 cM was constructed with a total of 12,201 SNP, simple sequence repeat (SSR), diversity arrays technology (DArT) and expressed sequence tag (EST) markers. QTL analyses using Multiple Interval Mapping (MIM) identified four QTL for PHS resistance traits on chromosomes 3B, 4A, 7B and 7D. Sequences of SNPs flanking these QTL were subject to a BLASTN search on the International Wheat Genome Sequencing Consortium (IWGSC) database (http://wheat-urgi.versailles.inra.fr/Seq-Repository). Best survey sequence hits were subject to a BLASTN search on Gramene (www.gramene.org) against both Brachypodium and rice databases, and candidate genes and regions for PHS resistance were identified. A total of 18 SNP flanking sequences on chromosomes 3B, 4A, 7B and 7D were converted to KASP markers and validated with matching genotype calls of Infinium SNP data.

Conclusions

Our study identified candidate genes involved in abscissic acid (ABA) and gibberellin (GA) metabolism, and flowering time in four genomic regions of Brachypodium and rice respectively, in addition to 18 KASP markers for PHS resistance in wheat. These markers can be deployed in future genetic studies of PHS resistance and might also be useful in the evaluation of PHS in germplasm and breeding material.",2014-11-29 +28261470,Using a multiscale image processing method to characterize the periodic growth patterns on scallop shells.,"The fine periodic growth patterns on shell surfaces have been widely used for studies in the ecology and evolution of scallops. Modern X-ray CT scanners and digital cameras can provide high-resolution image data that contain abundant information such as the shell formation rate, ontogenetic age, and life span of shellfish organisms. We introduced a novel multiscale image processing method based on matched filters with Gaussian kernels and partial differential equation (PDE) multiscale hierarchical decomposition to segment the small tubular and periodic structures in scallop shell images. The periodic patterns of structures (consisting of bifurcation points, crossover points of the rings and ribs, and the connected lines) could be found by our Space-based Depth-First Search (SDFS) algorithm. We created a MATLAB package to implement our method of periodic pattern extraction and pattern matching on the CT and digital scallop images available in this study. The results confirmed the hypothesis that the shell cyclic structure patterns encompass genetically specific information that can be used as an effective invariable biomarker for biological individual recognition. The package is available with a quick-start guide and includes three examples: http://mgb.ouc.edu.cn/novegene/html/code.php.",2017-02-09 +25052703,APADB: a database for alternative polyadenylation and microRNA regulation events. ,"Alternative polyadenylation (APA) is a widespread mechanism that contributes to the sophisticated dynamics of gene regulation. Approximately 50% of all protein-coding human genes harbor multiple polyadenylation (PA) sites; their selective and combinatorial use gives rise to transcript variants with differing length of their 3' untranslated region (3'UTR). Shortened variants escape UTR-mediated regulation by microRNAs (miRNAs), especially in cancer, where global 3'UTR shortening accelerates disease progression, dedifferentiation and proliferation. Here we present APADB, a database of vertebrate PA sites determined by 3' end sequencing, using massive analysis of complementary DNA ends. APADB provides (A)PA sites for coding and non-coding transcripts of human, mouse and chicken genes. For human and mouse, several tissue types, including different cancer specimens, are available. APADB records the loss of predicted miRNA binding sites and visualizes next-generation sequencing reads that support each PA site in a genome browser. The database tables can either be browsed according to organism and tissue or alternatively searched for a gene of interest. APADB is the largest database of APA in human, chicken and mouse. The stored information provides experimental evidence for thousands of PA sites and APA events. APADB combines 3' end sequencing data with prediction algorithms of miRNA binding sites, allowing to further improve prediction algorithms. Current databases lack correct information about 3'UTR lengths, especially for chicken, and APADB provides necessary information to close this gap. Database URL: http://tools.genxpro.net/apadb/.",2014-07-22 +28489205,Profile of the appendectomies performed in the Brazilian Public Health System.,"

Objective

to analyze the profile of appendectomies performed in the Brazilian Public Health System (SUS) and to compare the laparoscopic and laparotomic techniques of appendectomy.

Methods

This work used information from DataSus from 2008 to 2014 (http://datasus.saude.gov.br). We compared the data of patients submitted to laparotomic appendectomy with those submitted to laparoscopic one.

Results

when comparing the total growth of appendectomies, the laparoscopic route increased 279.7%, while the increase in laparotomic surgery was 25% (p <0.001) in the study period. With regard to medical and hospital costs, laparoscopic appendectomy accounted for only 2.6% of the total expenditure on appendectomies performed by the Unified Health System (SUS) hospitals, with an average cost 7.6% lower than that of laparotomy procedures, but without statistical significance. The mortality rate was 57.1% lower in the laparoscopic approach when compared with laparotomy.

Conclusion

there has been a significant increase in the laparoscopic route in the treatment of appendicitis, but the method is still rarely used in SUS patients. The costs of laparoscopic appendectomy were similar to those observed in laparotomic access.",2017-01-01 +27810777,GPView: A program for wave function analysis and visualization.,"In this manuscript, we will introduce a recently developed program GPView, which can be used for wave function analysis and visualization. The wave function analysis module can calculate and generate 3D cubes for various types of molecular orbitals and electron density of electronic excited states, such as natural orbitals, natural transition orbitals, natural difference orbitals, hole-particle density, detachment-attachment density and transition density. The visualization module of GPView can display molecular and electronic (iso-surfaces) structures. It is also able to animate single trajectories of molecular dynamics and non-adiabatic excited state molecular dynamics using the data stored in existing files. There are also other utilities to extract and process the output of quantum chemistry calculations. The GPView provides full graphic user interface (GUI), so it very easy to use. It is available from website http://life-tp.com/gpview.",2016-10-24 +26575079,Marginal regression models for clustered count data based on zero-inflated Conway-Maxwell-Poisson distribution with applications.,"Community water fluoridation is an important public health measure to prevent dental caries, but it continues to be somewhat controversial. The Iowa Fluoride Study (IFS) is a longitudinal study on a cohort of Iowa children that began in 1991. The main purposes of this study (http://www.dentistry.uiowa.edu/preventive-fluoride-study) were to quantify fluoride exposures from both dietary and nondietary sources and to associate longitudinal fluoride exposures with dental fluorosis (spots on teeth) and dental caries (cavities). We analyze a subset of the IFS data by a marginal regression model with a zero-inflated version of the Conway-Maxwell-Poisson distribution for count data exhibiting excessive zeros and a wide range of dispersion patterns. In general, we introduce two estimation methods for fitting a ZICMP marginal regression model. Finite sample behaviors of the estimators and the resulting confidence intervals are studied using extensive simulation studies. We apply our methodologies to the dental caries data. Our novel modeling incorporating zero inflation, clustering, and overdispersion sheds some new light on the effect of community water fluoridation and other factors. We also include a second application of our methodology to a genomic (next-generation sequencing) dataset that exhibits underdispersion.",2015-11-17 +26515824,SeqSIMLA2_exact: simulate multiple disease sites in large pedigrees with given disease status for diseases with low prevalence.,"

Unlabelled

It is difficult for current simulation tools to simulate sequence data in a pre-specified pedigree structure and pre-specified affection status. Previously, we developed a flexible tool, SeqSIMLA2, for simulating sequence data in either unrelated case-control or family samples with different disease and quantitative trait models. Here we extended the tool to efficiently simulate sequences with multiple disease sites in large pedigrees with a given disease status for each pedigree member, assuming that the disease prevalence is low.

Availability and implementation

SeqSIMLA2_exact is implemented with C++ and is available at http://seqsimla.sourceforge.net.",2015-10-29 +27397138,Dynamic Bayesian Network for Accurate Detection of Peptides from Tandem Mass Spectra.,"A central problem in mass spectrometry analysis involves identifying, for each observed tandem mass spectrum, the corresponding generating peptide. We present a dynamic Bayesian network (DBN) toolkit that addresses this problem by using a machine learning approach. At the heart of this toolkit is a DBN for Rapid Identification (DRIP), which can be trained from collections of high-confidence peptide-spectrum matches (PSMs). DRIP's score function considers fragment ion matches using Gaussians rather than fixed fragment-ion tolerances and also finds the optimal alignment between the theoretical and observed spectrum by considering all possible alignments, up to a threshold that is controlled using a beam-pruning algorithm. This function not only yields state-of-the art database search accuracy but also can be used to generate features that significantly boost the performance of the Percolator postprocessor. The DRIP software is built upon a general purpose DBN toolkit (GMTK), thereby allowing a wide variety of options for user-specific inference tasks as well as facilitating easy modifications to the DRIP model in future work. DRIP is implemented in Python and C++ and is available under Apache license at http://melodi-lab.github.io/dripToolkit .",2016-07-22 +24940876,3D face recognition based on multiple keypoint descriptors and sparse representation.,"Recent years have witnessed a growing interest in developing methods for 3D face recognition. However, 3D scans often suffer from the problems of missing parts, large facial expressions, and occlusions. To be useful in real-world applications, a 3D face recognition approach should be able to handle these challenges. In this paper, we propose a novel general approach to deal with the 3D face recognition problem by making use of multiple keypoint descriptors (MKD) and the sparse representation-based classification (SRC). We call the proposed method 3DMKDSRC for short. Specifically, with 3DMKDSRC, each 3D face scan is represented as a set of descriptor vectors extracted from keypoints by meshSIFT. Descriptor vectors of gallery samples form the gallery dictionary. Given a probe 3D face scan, its descriptors are extracted at first and then its identity can be determined by using a multitask SRC. The proposed 3DMKDSRC approach does not require the pre-alignment between two face scans and is quite robust to the problems of missing data, occlusions and expressions. Its superiority over the other leading 3D face recognition schemes has been corroborated by extensive experiments conducted on three benchmark databases, Bosphorus, GavabDB, and FRGC2.0. The Matlab source code for 3DMKDSRC and the related evaluation results are publicly available at http://sse.tongji.edu.cn/linzhang/3dmkdsrcface/3dmkdsrc.htm.",2014-06-18 +24632500,comTAR: a web tool for the prediction and characterization of conserved microRNA targets in plants.,"

Motivation

MicroRNAs (miRNAs) are major regulators of gene expression in plants and animals. They recognize their target messenger RNAs (mRNAs) by sequence complementarity and guide them to cleavage or translational arrest. So far, the prediction of plant miRNA-target pairs generally relies on the use of empirical parameters deduced from known miRNA-target interactions.

Results

We developed comTAR, a web tool for the prediction of miRNA targets that is mainly based on the conservation of the potential regulation in different species. We used data generated from a pipeline applied to transcript datasets of 33 angiosperms that was used to build a database of potential miRNA targets of different plant species. The database contains information describing each miRNA-target pair, their function and evolutionary conservation, while the results are displayed in a user-friendly interface. The tool also allows the search using new miRNAs.

Availability and implementation

The Web site is free to all users, with no login requirements, at http://rnabiology.ibr-conicet.gov.ar/comtar.",2014-03-14 +26618088,Splicing Express: a software suite for alternative splicing analysis using next-generation sequencing data.,"Motivation. Alternative splicing events (ASEs) are prevalent in the transcriptome of eukaryotic species and are known to influence many biological phenomena. The identification and quantification of these events are crucial for a better understanding of biological processes. Next-generation DNA sequencing technologies have allowed deep characterization of transcriptomes and made it possible to address these issues. ASEs analysis, however, represents a challenging task especially when many different samples need to be compared. Some popular tools for the analysis of ASEs are known to report thousands of events without annotations and/or graphical representations. A new tool for the identification and visualization of ASEs is here described, which can be used by biologists without a solid bioinformatics background. Results. A software suite named Splicing Express was created to perform ASEs analysis from transcriptome sequencing data derived from next-generation DNA sequencing platforms. Its major goal is to serve the needs of biomedical researchers who do not have bioinformatics skills. Splicing Express performs automatic annotation of transcriptome data (GTF files) using gene coordinates available from the UCSC genome browser and allows the analysis of data from all available species. The identification of ASEs is done by a known algorithm previously implemented in another tool named Splooce. As a final result, Splicing Express creates a set of HTML files composed of graphics and tables designed to describe the expression profile of ASEs among all analyzed samples. By using RNA-Seq data from the Illumina Human Body Map and the Rat Body Map, we show that Splicing Express is able to perform all tasks in a straightforward way, identifying well-known specific events. Availability and Implementation. Splicing Express is written in Perl and is suitable to run only in UNIX-like systems. More details can be found at: http://www.bioinformatics-brazil.org/splicingexpress.",2015-11-19 +23203882,The PRoteomics IDEntifications (PRIDE) database and associated tools: status in 2013.,"The PRoteomics IDEntifications (PRIDE, http://www.ebi.ac.uk/pride) database at the European Bioinformatics Institute is one of the most prominent data repositories of mass spectrometry (MS)-based proteomics data. Here, we summarize recent developments in the PRIDE database and related tools. First, we provide up-to-date statistics in data content, splitting the figures by groups of organisms and species, including peptide and protein identifications, and post-translational modifications. We then describe the tools that are part of the PRIDE submission pipeline, especially the recently developed PRIDE Converter 2 (new submission tool) and PRIDE Inspector (visualization and analysis tool). We also give an update about the integration of PRIDE with other MS proteomics resources in the context of the ProteomeXchange consortium. Finally, we briefly review the quality control efforts that are ongoing at present and outline our future plans.",2012-11-29 +23196990,PLI: a web-based tool for the comparison of protein-ligand interactions observed on PDB structures.,"

Motivation

A large fraction of the entries contained in the Protein Data Bank describe proteins in complex with low molecular weight molecules such as physiological compounds or synthetic drugs. In many cases, the same molecule is found in distinct protein-ligand complexes. There is an increasing interest in Medicinal Chemistry in comparing protein binding sites to get insight on interactions that modulate the binding specificity, as this structural information can be correlated with other experimental data of biochemical or physiological nature and may help in rational drug design.

Results

The web service protein-ligand interaction presented here provides a tool to analyse and compare the binding pockets of homologous proteins in complex with a selected ligand. The information is deduced from protein-ligand complexes present in the Protein Data Bank and stored in the underlying database.

Availability

Freely accessible at http://bioinformatics.istge.it/pli/.",2012-11-29 +23650175,The comprehensive antibiotic resistance database.,"The field of antibiotic drug discovery and the monitoring of new antibiotic resistance elements have yet to fully exploit the power of the genome revolution. Despite the fact that the first genomes sequenced of free living organisms were those of bacteria, there have been few specialized bioinformatic tools developed to mine the growing amount of genomic data associated with pathogens. In particular, there are few tools to study the genetics and genomics of antibiotic resistance and how it impacts bacterial populations, ecology, and the clinic. We have initiated development of such tools in the form of the Comprehensive Antibiotic Research Database (CARD; http://arpcard.mcmaster.ca). The CARD integrates disparate molecular and sequence data, provides a unique organizing principle in the form of the Antibiotic Resistance Ontology (ARO), and can quickly identify putative antibiotic resistance genes in new unannotated genome sequences. This unique platform provides an informatic tool that bridges antibiotic resistance concerns in health care, agriculture, and the environment.",2013-05-06 +23193296,LUCApedia: a database for the study of ancient life.,"Organisms represented by the root of the universal evolutionary tree were most likely complex cells with a sophisticated protein translation system and a DNA genome encoding hundreds of genes. The growth of bioinformatics data from taxonomically diverse organisms has made it possible to infer the likely properties of early life in greater detail. Here we present LUCApedia, (http://eeb.princeton.edu/lucapedia), a unified framework for simultaneously evaluating multiple data sets related to the Last Universal Common Ancestor (LUCA) and its predecessors. This unification is achieved by mapping eleven such data sets onto UniProt, KEGG and BioCyc IDs. LUCApedia may be used to rapidly acquire evidence that a certain gene or set of genes is ancient, to examine the early evolution of metabolic pathways, or to test specific hypotheses related to ancient life by corroborating them against the rest of the database.",2012-11-27 +26109357,Gene Model Annotations for Drosophila melanogaster: Impact of High-Throughput Data.,"We report the current status of the FlyBase annotated gene set for Drosophila melanogaster and highlight improvements based on high-throughput data. The FlyBase annotated gene set consists entirely of manually annotated gene models, with the exception of some classes of small non-coding RNAs. All gene models have been reviewed using evidence from high-throughput datasets, primarily from the modENCODE project. These datasets include RNA-Seq coverage data, RNA-Seq junction data, transcription start site profiles, and translation stop-codon read-through predictions. New annotation guidelines were developed to take into account the use of the high-throughput data. We describe how this flood of new data was incorporated into thousands of new and revised annotations. FlyBase has adopted a philosophy of excluding low-confidence and low-frequency data from gene model annotations; we also do not attempt to represent all possible permutations for complex and modularly organized genes. This has allowed us to produce a high-confidence, manageable gene annotation dataset that is available at FlyBase (http://flybase.org). Interesting aspects of new annotations include new genes (coding, non-coding, and antisense), many genes with alternative transcripts with very long 3' UTRs (up to 15-18 kb), and a stunning mismatch in the number of male-specific genes (approximately 13% of all annotated gene models) vs. female-specific genes (less than 1%). The number of identified pseudogenes and mutations in the sequenced strain also increased significantly. We discuss remaining challenges, for instance, identification of functional small polypeptides and detection of alternative translation starts.",2015-06-24 +25887233,Kernel approaches for differential expression analysis of mass spectrometry-based metabolomics data.,"

Background

Data generated from metabolomics experiments are different from other types of ""-omics"" data. For example, a common phenomenon in mass spectrometry (MS)-based metabolomics data is that the data matrix frequently contains missing values, which complicates some quantitative analyses. One way to tackle this problem is to treat them as absent. Hence there are two types of information that are available in metabolomics data: presence/absence of a metabolite and a quantitative value of the abundance level of a metabolite if it is present. Combining these two layers of information poses challenges to the application of traditional statistical approaches in differential expression analysis.

Results

In this article, we propose a novel kernel-based score test for the metabolomics differential expression analysis. In order to simultaneously capture both the continuous pattern and discrete pattern in metabolomics data, two new kinds of kernels are designed. One is the distance-based kernel and the other is the stratified kernel. While we initially describe the procedures in the case of single-metabolite analysis, we extend the methods to handle metabolite sets as well.

Conclusions

Evaluation based on both simulated data and real data from a liver cancer metabolomics study indicates that our kernel method has a better performance than some existing alternatives. An implementation of the proposed kernel method in the R statistical computing environment is available at http://works.bepress.com/debashis_ghosh/60/ .",2015-03-11 +25316076,Ten years of change: National Library of Medicine TOXMAP gets a new look.,"The United States National Library of Medicine (NLM) TOXNET® databases < http://toxnet.nlm.nih.gov > provide broad coverage of environmental health information covering a wide variety of topics, including access to the U.S. Environment Protection Agency (EPA)'s Toxics Release Inventory (TRI) data. The NLM web-based geographic information system (GIS), TOXMAP® < http://toxmap.nlm.nih.gov/ >, provides interactive maps which show where TRI chemicals are released into the environment and links to TOXNET for information about these chemicals. TOXMAP also displays locations of Superfund sites on the EPA National Priority List, as well as information about the chemical contaminants at these sites. This column focuses on a new version of TOXMAP which brings it up to date with current web GIS technologies and user expectations.",2014-01-01 +24112409,Towards a unified paradigm for sequence-based identification of fungi.,"The nuclear ribosomal internal transcribed spacer (ITS) region is the formal fungal barcode and in most cases the marker of choice for the exploration of fungal diversity in environmental samples. Two problems are particularly acute in the pursuit of satisfactory taxonomic assignment of newly generated ITS sequences: (i) the lack of an inclusive, reliable public reference data set and (ii) the lack of means to refer to fungal species, for which no Latin name is available in a standardized stable way. Here, we report on progress in these regards through further development of the UNITE database (http://unite.ut.ee) for molecular identification of fungi. All fungal species represented by at least two ITS sequences in the international nucleotide sequence databases are now given a unique, stable name of the accession number type (e.g. Hymenoscyphus pseudoalbidus|GU586904|SH133781.05FU), and their taxonomic and ecological annotations were corrected as far as possible through a distributed, third-party annotation effort. We introduce the term 'species hypothesis' (SH) for the taxa discovered in clustering on different similarity thresholds (97-99%). An automatically or manually designated sequence is chosen to represent each such SH. These reference sequences are released (http://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and in the QIIME pipeline. The system and the data will be updated automatically as the number of public fungal ITS sequences grows. We invite everybody in the position to improve the annotation or metadata associated with their particular fungal lineages of expertise to do so through the new Web-based sequence management system in UNITE.",2013-09-24 +27134731,A curated transcriptome dataset collection to investigate the immunobiology of HIV infection.,"Compendia of large-scale datasets available in public repositories provide an opportunity to identify and fill current gaps in biomedical knowledge. But first, these data need to be readily accessible to research investigators for interpretation. Here, we make available a collection of transcriptome datasets relevant to HIV infection. A total of 2717 unique transcriptional profiles distributed among 34 datasets were identified, retrieved from the NCBI Gene Expression Omnibus (GEO), and loaded in a custom web application, the Gene Expression Browser (GXB), designed for interactive query and visualization of integrated large-scale data. Multiple sample groupings and rank lists were created to facilitate dataset query and interpretation via this interface. Web links to customized graphical views can be generated by users and subsequently inserted in manuscripts reporting novel findings, such as discovery notes. The tool also enables browsing of a single gene across projects, which can provide new perspectives on the role of a given molecule across biological systems. This curated dataset collection is available at: http://hiv.gxbsidra.org/dm3/geneBrowser/list.",2016-03-11 +26040454,FinisherSC: a repeat-aware tool for upgrading de novo assembly using long reads.,"

Unlabelled

We introduce FinisherSC, a repeat-aware and scalable tool for upgrading de novo assembly using long reads. Experiments with real data suggest that FinisherSC can provide longer and higher quality contigs than existing tools while maintaining high concordance.

Availability and implementation

The tool and data are available and will be maintained at http://kakitone.github.io/finishingTool/

Contact

: dntse@stanford.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-03 +27334474,Vecuum: identification and filtration of false somatic variants caused by recombinant vector contamination.,"

Motivation

Advances in sequencing technologies have remarkably lowered the detection limit of somatic variants to a low frequency. However, calling mutations at this range is still confounded by many factors including environmental contamination. Vector contamination is a continuously occurring issue and is especially problematic since vector inserts are hardly distinguishable from the sample sequences. Such inserts, which may harbor polymorphisms and engineered functional mutations, can result in calling false variants at corresponding sites. Numerous vector-screening methods have been developed, but none could handle contamination from inserts because they are focusing on vector backbone sequences alone.

Results

We developed a novel method-Vecuum-that identifies vector-originated reads and resultant false variants. Since vector inserts are generally constructed from intron-less cDNAs, Vecuum identifies vector-originated reads by inspecting the clipping patterns at exon junctions. False variant calls are further detected based on the biased distribution of mutant alleles to vector-originated reads. Tests on simulated and spike-in experimental data validated that Vecuum could detect 93% of vector contaminants and could remove up to 87% of variant-like false calls with 100% precision. Application to public sequence datasets demonstrated the utility of Vecuum in detecting false variants resulting from various types of external contamination.

Availability and implementation

Java-based implementation of the method is available at http://vecuum.sourceforge.net/ CONTACT: swkim@yuhs.acSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-22 +29657279,oncoNcRNA: A Web Portal for Exploring the Non-Coding RNAs with Oncogenic Potentials in Human Cancers. ,"Non-coding RNAs (ncRNAs) have been shown to contribute to tumorigenesis and progression. However, the functions of the majority of ncRNAs remain unclear. Through integrating published large-scale somatic copy number alterations (SCNAs) data from various human cancer types, we have developed oncoNcRNA, a user-friendly web portal to explore ncRNAs with oncogenic potential in human cancers. The portal characterizes the SCNAs of over 58,000 long non-coding RNAs (lncRNAs), 34,000 piwi-interacting RNAs (piRNAs), 2700 microRNAs (miRNAs), 600 transfer RNAs (tRNAs) and 400 small nucleolar RNAs (snoRNAs) in 64 human cancer types. It enables researchers to rapidly and intuitively analyze the oncogenic potential of ncRNAs of interest. Indeed, we have discovered a large number of ncRNAs which are frequently amplified or deleted within and across tumor types. Moreover, we built a web-based tool, Correlations, to explore the relationships between gene expression and copy number from ~10,000 tumor samples in 36 cancer types identified by The Cancer Genome Atlas (TCGA). oncoNcRNA is a valuable tool for investigating the function and clinical relevance of ncRNAs in human cancers. oncoNcRNA is freely available at http://rna.sysu.edu.cn/onconcrna/.",2017-02-07 +23180796,Animal QTLdb: an improved database tool for livestock animal QTL/association data dissemination in the post-genome era.,"The Animal QTL database (QTLdb; http://www.animalgenome.org/QTLdb) is designed to house all publicly available QTL and single-nucleotide polymorphism/gene association data on livestock animal species. An earlier version was published in the Nucleic Acids Research Database issue in 2007. Since then, we have continued our efforts to develop new and improved database tools to allow more data types, parameters and functions. Our efforts have transformed the Animal QTLdb into a tool that actively serves the research community as a quality data repository and more importantly, a provider of easily accessible tools and functions to disseminate QTL and gene association information. The QTLdb has been heavily used by the livestock genomics community since its first public release in 2004. To date, there are 5920 cattle, 3442 chicken, 7451 pigs, 753 sheep and 88 rainbow trout data points in the database, and at least 290 publications that cite use of the database. The rapid advancement in genomic studies of cattle, chicken, pigs, sheep and other livestock animals has presented us with challenges, as well as opportunities for the QTLdb to meet the evolving needs of the research community. Here, we report our progress over the recent years and highlight new functions and services available to the general public.",2012-11-24 +28796632,"The Association of Arsenic Metabolism with Cancer, Cardiovascular Disease, and Diabetes: A Systematic Review of the Epidemiological Evidence.","

Background

The available evidence on the role of arsenic metabolism in individual susceptibility to the development of cancer, cardiovascular disease, and diabetes has not been formally and comprehensively reviewed.

Objectives

Our goal was to systematically investigate the association of arsenic metabolism with cancer, cardiovascular disease, and diabetes-related outcomes in epidemiologic studies. As a secondary objective, we characterized the variation of arsenic metabolism in different populations worldwide.

Methods

We searched Medline/PubMed and EMBASE from inception to January 2016 and applied predetermined exclusion criteria. Compositional data analysis was used to describe the distribution of arsenic metabolism biomarkers and evaluate the association between arsenic exposure and metabolism.

Results

Twenty-eight studies met the inclusion criteria, 12 on cancer, nine on cardiovascular disease, and seven on diabetes-related outcomes. The median (interquartile range) for mean iAs%, MMA%, and DMA% was 11.2 (7.8-14.9)%, 13.0 (10.4-13.6)%, and 74.9 (69.8-80.0)%, respectively. Findings across studies suggested that higher arsenic exposure levels were associated with higher iAs% and lower DMA% and not associated with MMA%. For cancer, most studies found a pattern of higher MMA% and lower DMA% associated with higher risk of all-site, urothelial, lung, and skin cancers. For cardiovascular disease, higher MMA% was generally associated with higher risk of carotid atherosclerosis and clinical cardiovascular disease but not with hypertension. For diabetes-related outcomes, the pattern of lower MMA% and higher DMA% was associated with higher risk of metabolic syndrome and diabetes.

Conclusions

Population level of iAs% and DMA%, but not MMA%, were associated with arsenic exposure levels. Overall, study findings suggest that higher MMA% was associated with an increased risk of cancer and cardiovascular disease, while lower MMA% was associated with an increased risk of diabetes and metabolic syndrome. Additional population-based studies and experimental studies are needed to further evaluate and understand the role of arsenic exposure in arsenic metabolism and the role of arsenic metabolism in disease development. https://doi.org/10.1289/EHP577.",2017-08-01 +28361674,"BATVI: Fast, sensitive and accurate detection of virus integrations.","

Background

The study of virus integrations in human genome is important since virus integrations were shown to be associated with diseases. In the literature, few methods have been proposed that predict virus integrations using next generation sequencing datasets. Although they work, they are slow and are not very sensitive.

Results and discussion

This paper introduces a new method BatVI to predict viral integrations. Our method uses a fast screening method to filter out chimeric reads containing possible viral integrations. Next, sensitive alignments of these candidate chimeric reads are called by BLAST. Chimeric reads that are co-localized in the human genome are clustered. Finally, by assembling the chimeric reads in each cluster, high confident virus integration sites are extracted.

Conclusion

We compared the performance of BatVI with existing methods VirusFinder and VirusSeq using both simulated and real-life datasets of liver cancer patients. BatVI ran an order of magnitude faster and was able to predict almost twice the number of true positives compared to other methods while maintaining a false positive rate less than 1%. For the liver cancer datasets, BatVI uncovered novel integrations to two important genes TERT and MLL4, which were missed by previous studies. Through gene expression data, we verified the correctness of these additional integrations. BatVI can be downloaded from http://biogpu.ddns.comp.nus.edu.sg/~ksung/batvi/index.html .",2017-03-14 +27998283,A genotypic method for determining HIV-2 coreceptor usage enables epidemiological studies and clinical decision support.,"

Background

CCR5-coreceptor antagonists can be used for treating HIV-2 infected individuals. Before initiating treatment with coreceptor antagonists, viral coreceptor usage should be determined to ensure that the virus can use only the CCR5 coreceptor (R5) and cannot evade the drug by using the CXCR4 coreceptor (X4-capable). However, until now, no online tool for the genotypic identification of HIV-2 coreceptor usage had been available. Furthermore, there is a lack of knowledge on the determinants of HIV-2 coreceptor usage. Therefore, we developed a data-driven web service for the prediction of HIV-2 coreceptor usage from the V3 loop of the HIV-2 glycoprotein and used the tool to identify novel discriminatory features of X4-capable variants.

Results

Using 10 runs of tenfold cross validation, we selected a linear support vector machine (SVM) as the model for geno2pheno[coreceptor-hiv2], because it outperformed the other SVMs with an area under the ROC curve (AUC) of 0.95. We found that SVMs were highly accurate in identifying HIV-2 coreceptor usage, attaining sensitivities of 73.5% and specificities of 96% during tenfold nested cross validation. The predictive performance of SVMs was not significantly different (p value 0.37) from an existing rules-based approach. Moreover, geno2pheno[coreceptor-hiv2] achieved a predictive accuracy of 100% and outperformed the existing approach on an independent data set containing nine new isolates with corresponding phenotypic measurements of coreceptor usage. geno2pheno[coreceptor-hiv2] could not only reproduce the established markers of CXCR4-usage, but also revealed novel markers: the substitutions 27K, 15G, and 8S were significantly predictive of CXCR4 usage. Furthermore, SVMs trained on the amino-acid sequences of the V1 and V2 loops were also quite accurate in predicting coreceptor usage (AUCs of 0.84 and 0.65, respectively).

Conclusions

In this study, we developed geno2pheno[coreceptor-hiv2], the first online tool for the prediction of HIV-2 coreceptor usage from the V3 loop. Using our method, we identified novel amino-acid markers of X4-capable variants in the V3 loop and found that HIV-2 coreceptor usage is also influenced by the V1/V2 region. The tool can aid clinicians in deciding whether coreceptor antagonists such as maraviroc are a treatment option and enables epidemiological studies investigating HIV-2 coreceptor usage. geno2pheno[coreceptor-hiv2] is freely available at http://coreceptor-hiv2.geno2pheno.org .",2016-12-20 +27391578,SwissSimilarity: A Web Tool for Low to Ultra High Throughput Ligand-Based Virtual Screening.,"SwissSimilarity is a new web tool for rapid ligand-based virtual screening of small to unprecedented ultralarge libraries of small molecules. Screenable compounds include drugs, bioactive and commercial molecules, as well as 205 million of virtual compounds readily synthesizable from commercially available synthetic reagents. Predictions can be carried out on-the-fly using six different screening approaches, including 2D molecular fingerprints as well as superpositional and fast nonsuperpositional 3D similarity methodologies. SwissSimilarity is part of a large initiative of the SIB Swiss Institute of Bioinformatics to provide online tools for computer-aided drug design, such as SwissDock, SwissBioisostere or SwissTargetPrediction with which it can interoperate, and is linked to other well-established online tools and databases. User interface and backend have been designed for simplicity and ease of use, to provide proficient virtual screening capabilities to specialists and nonexperts in the field. SwissSimilarity is accessible free of charge or login at http://www.swisssimilarity.ch .",2016-07-19 +26046924,Empirical gradient threshold technique for automated segmentation across image modalities and cell lines.,"New microscopy technologies are enabling image acquisition of terabyte-sized data sets consisting of hundreds of thousands of images. In order to retrieve and analyze the biological information in these large data sets, segmentation is needed to detect the regions containing cells or cell colonies. Our work with hundreds of large images (each 21,000×21,000 pixels) requires a segmentation method that: (1) yields high segmentation accuracy, (2) is applicable to multiple cell lines with various densities of cells and cell colonies, and several imaging modalities, (3) can process large data sets in a timely manner, (4) has a low memory footprint and (5) has a small number of user-set parameters that do not require adjustment during the segmentation of large image sets. None of the currently available segmentation methods meet all these requirements. Segmentation based on image gradient thresholding is fast and has a low memory footprint. However, existing techniques that automate the selection of the gradient image threshold do not work across image modalities, multiple cell lines, and a wide range of foreground/background densities (requirement 2) and all failed the requirement for robust parameters that do not require re-adjustment with time (requirement 5). We present a novel and empirically derived image gradient threshold selection method for separating foreground and background pixels in an image that meets all the requirements listed above. We quantify the difference between our approach and existing ones in terms of accuracy, execution speed, memory usage and number of adjustable parameters on a reference data set. This reference data set consists of 501 validation images with manually determined segmentations and image sizes ranging from 0.36 Megapixels to 850 Megapixels. It includes four different cell lines and two image modalities: phase contrast and fluorescent. Our new technique, called Empirical Gradient Threshold (EGT), is derived from this reference data set with a 10-fold cross-validation method. EGT segments cells or colonies with resulting Dice accuracy index measurements above 0.92 for all cross-validation data sets. EGT results has also been visually verified on a much larger data set that includes bright field and Differential Interference Contrast (DIC) images, 16 cell lines and 61 time-sequence data sets, for a total of 17,479 images. This method is implemented as an open-source plugin to ImageJ as well as a standalone executable that can be downloaded from the following link: https://isg.nist.gov/.",2015-06-05 +27547538,Gall-ID: tools for genotyping gall-causing phytopathogenic bacteria.,"Understanding the population structure and genetic diversity of plant pathogens, as well as the effect of agricultural practices on pathogen evolution, is important for disease management. Developments in molecular methods have contributed to increase the resolution for accurate pathogen identification, but those based on analysis of DNA sequences can be less straightforward to use. To address this, we developed Gall-ID, a web-based platform that uses DNA sequence information from 16S rDNA, multilocus sequence analysis and whole genome sequences to group disease-associated bacteria to their taxonomic units. Gall-ID was developed with a particular focus on gall-forming bacteria belonging to Agrobacterium, Pseudomonas savastanoi, Pantoea agglomerans, and Rhodococcus. Members of these groups of bacteria cause growth deformation of plants, and some are capable of infecting many species of field, orchard, and nursery crops. Gall-ID also enables the use of high-throughput sequencing reads to search for evidence for homologs of characterized virulence genes, and provides downloadable software pipelines for automating multilocus sequence analysis, analyzing genome sequences for average nucleotide identity, and constructing core genome phylogenies. Lastly, additional databases were included in Gall-ID to help determine the identity of other plant pathogenic bacteria that may be in microbial communities associated with galls or causative agents in other diseased tissues of plants. The URL for Gall-ID is http://gall-id.cgrb.oregonstate.edu/.",2016-07-19 +25033462,Transcriptome sequencing and developmental regulation of gene expression in Anopheles aquasalis.,"

Background

Anopheles aquasalis is a major malaria vector in coastal areas of South and Central America where it breeds preferentially in brackish water. This species is very susceptible to Plasmodium vivax and it has been already incriminated as responsible vector in malaria outbreaks. There has been no high-throughput investigation into the sequencing of An. aquasalis genes, transcripts and proteins despite its epidemiological relevance. Here we describe the sequencing, assembly and annotation of the An. aquasalis transcriptome.

Methodology/principal findings

A total of 419 thousand cDNA sequence reads, encompassing 164 million nucleotides, were assembled in 7544 contigs of ≥ 2 sequences, and 1999 singletons. The majority of the An. aquasalis transcripts encode proteins with their closest counterparts in another neotropical malaria vector, An. darlingi. Several analyses in different protein databases were used to annotate and predict the putative functions of the deduced An. aquasalis proteins. Larval and adult-specific transcripts were represented by 121 and 424 contig sequences, respectively. Fifty-one transcripts were only detected in blood-fed females. The data also reveal a list of transcripts up- or down-regulated in adult females after a blood meal. Transcripts associated with immunity, signaling networks and blood feeding and digestion are discussed.

Conclusions/significance

This study represents the first large-scale effort to sequence the transcriptome of An. aquasalis. It provides valuable information that will facilitate studies on the biology of this species and may lead to novel strategies to reduce malaria transmission on the South American continent. The An. aquasalis transcriptome is accessible at http://exon.niaid.nih.gov/transcriptome/An_aquasalis/Anaquexcel.xlsx.",2014-07-17 +25816229,MAFCO: a compression tool for MAF files.,"In the last decade, the cost of genomic sequencing has been decreasing so much that researchers all over the world accumulate huge amounts of data for present and future use. These genomic data need to be efficiently stored, because storage cost is not decreasing as fast as the cost of sequencing. In order to overcome this problem, the most popular general-purpose compression tool, gzip, is usually used. However, these tools were not specifically designed to compress this kind of data, and often fall short when the intention is to reduce the data size as much as possible. There are several compression algorithms available, even for genomic data, but very few have been designed to deal with Whole Genome Alignments, containing alignments between entire genomes of several species. In this paper, we present a lossless compression tool, MAFCO, specifically designed to compress MAF (Multiple Alignment Format) files. Compared to gzip, the proposed tool attains a compression gain from 34% to 57%, depending on the data set. When compared to a recent dedicated method, which is not compatible with some data sets, the compression gain of MAFCO is about 9%. Both source-code and binaries for several operating systems are freely available for non-commercial use at: http://bioinformatics.ua.pt/software/mafco.",2015-03-27 +28784687,Health-Related Quality of Life Outcomes in PARADIGM-HF. ,"Patients with heart failure and reduced ejection fraction have impaired health-related quality of life (HRQL) with variable responses to therapies that target mortality and heart failure hospitalizations. In PARADIGM-HF trial (Prospective Comparison of ARNI [Angiotensin Receptor-Neprilysin Inhibitor] With ACEI [Angiotensin-Converting-Enzyme Inhibitor] to Determine Impact on Global Mortality and Morbidity in Heart Failure), sacubitril/valsartan reduced morbidity and mortality compared with enalapril. Another major treatment goal is to improve HRQL. Given improvements in mortality with sacubitril/valsartan, this analysis provides comprehensive assessment of impact of therapy on HRQL in survivors only. Patients (after run-in phase) completed disease-specific HRQL using Kansas City Cardiomyopathy Questionnaire (KCCQ) at randomization, 4 month, 8 month, and annual visits. Changes in KCCQ scores were calculated using repeated measures analysis of covariance model that adjusted for treatment and baseline values (principal efficacy prespecified at 8 months). Among the 8399 patients enrolled in PARADIGM-HF, 7623 (91%) completed KCCQ scores at randomization with complete data at 8 months for 6881 patients (90% of baseline). At 8 months, sacubitril/valsartan group noted improvements in both KCCQ clinical summary score (+0.64 versus -0.29; P=0.008) and KCCQ overall summary score (+1.13 versus -0.14; P<0.001) in comparison to enalapril group and significantly less proportion of patients with deterioration (≥5 points decrease) of both KCCQ scores (27% versus 31%; P=0.01). Adjusted change scores demonstrated consistent improvements in sacubitril/valsartan compared with enalapril through 36 months. Change scores in KCCQ clinical summary scores and KCCQ overall summary scores were better in patients treated with sacubitril/valsartan compared with those treated with enalapril, with consistency in most domains, and persist during follow-up beyond 8 months. These findings demonstrate that sacubitril/valsartan leads to better HRQL in surviving patients with heart failure. URL: http://www.clinicaltrials.gov. Unique identifier: NCT01035255.",2017-08-01 +26243018,Correcting systematic bias and instrument measurement drift with mzRefinery.,"

Motivation

Systematic bias in mass measurement adversely affects data quality and negates the advantages of high precision instruments.

Results

We introduce the mzRefinery tool for calibration of mass spectrometry data files. Using confident peptide spectrum matches, three different calibration methods are explored and the optimal transform function is chosen. After calibration, systematic bias is removed and the mass measurement errors are centered at 0 ppm. Because it is part of the ProteoWizard package, mzRefinery can read and write a wide variety of file formats.

Availability and implementation

The mzRefinery tool is part of msConvert, available with the ProteoWizard open source package at http://proteowizard.sourceforge.net/

Contact

samuel.payne@pnnl.gov.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-04 +26754666,The Dynamics of Transcript Abundance during Cellularization of Developing Barley Endosperm.,"Within the cereal grain, the endosperm and its nutrient reserves are critical for successful germination and in the context of grain utilization. The identification of molecular determinants of early endosperm development, particularly regulators of cell division and cell wall deposition, would help predict end-use properties such as yield, quality, and nutritional value. Custom microarray data have been generated using RNA isolated from developing barley grain endosperm 3 d to 8 d after pollination (DAP). Comparisons of transcript abundance over time revealed 47 gene expression modules that can be clustered into 10 broad groups. Superimposing these modules upon cytological data allowed patterns of transcript abundance to be linked with key stages of early grain development. Here, attention was focused on how the datasets could be mined to explore and define the processes of cell wall biosynthesis, remodeling, and degradation. Using a combination of spatial molecular network and gene ontology enrichment analyses, it is shown that genes involved in cell wall metabolism are found in multiple modules, but cluster into two main groups that exhibit peak expression at 3 DAP to 4 DAP and 5 DAP to 8 DAP. The presence of transcription factor genes in these modules allowed candidate genes for the control of wall metabolism during early barley grain development to be identified. The data are publicly available through a dedicated web interface (https://ics.hutton.ac.uk/barseed/), where they can be used to interrogate co- and differential expression for any other genes, groups of genes, or transcription factors expressed during early endosperm development.",2016-01-11 +23812996,GeneScissors: a comprehensive approach to detecting and correcting spurious transcriptome inference owing to RNA-seq reads misalignment.,"

Motivation

RNA-seq techniques provide an unparalleled means for exploring a transcriptome with deep coverage and base pair level resolution. Various analysis tools have been developed to align and assemble RNA-seq data, such as the widely used TopHat/Cufflinks pipeline. A common observation is that a sizable fraction of the fragments/reads align to multiple locations of the genome. These multiple alignments pose substantial challenges to existing RNA-seq analysis tools. Inappropriate treatment may result in reporting spurious expressed genes (false positives) and missing the real expressed genes (false negatives). Such errors impact the subsequent analysis, such as differential expression analysis. In our study, we observe that ~3.5% of transcripts reported by TopHat/Cufflinks pipeline correspond to annotated nonfunctional pseudogenes. Moreover, ~10.0% of reported transcripts are not annotated in the Ensembl database. These genes could be either novel expressed genes or false discoveries.

Results

We examine the underlying genomic features that lead to multiple alignments and investigate how they generate systematic errors in RNA-seq analysis. We develop a general tool, GeneScissors, which exploits machine learning techniques guided by biological knowledge to detect and correct spurious transcriptome inference by existing RNA-seq analysis methods. In our simulated study, GeneScissors can predict spurious transcriptome calls owing to misalignment with an accuracy close to 90%. It provides substantial improvement over the widely used TopHat/Cufflinks or MapSplice/Cufflinks pipelines in both precision and F-measurement. On real data, GeneScissors reports 53.6% less pseudogenes and 0.97% more expressed and annotated transcripts, when compared with the TopHat/Cufflinks pipeline. In addition, among the 10.0% unannotated transcripts reported by TopHat/Cufflinks, GeneScissors finds that >16.3% of them are false positives.

Availability

The software can be downloaded at http://csbio.unc.edu/genescissors/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +26452192,Nano-QSAR: Model of mutagenicity of fullerene as a mathematical function of different conditions.,"The experimental data on the bacterial reverse mutation test (under various conditions) on C60 nanoparticles for the cases (i) TA100, and (ii) WP2uvrA/pkM101 are examined as endpoints. By means of the optimal descriptors calculated with the Monte Carlo method a mathematical model of these endpoints has been built up. The models are a mathematical function of eclectic data such as (i) dose (g/plate); (ii) metabolic activation (i.e. with mix S9 or without mix S9); and (iii) illumination (i.e. darkness or irradiation). The eclectic data on different conditions were represented by so-called quasi-SMILES. In contrast to the traditional SMILES which are representation of molecular structure, the quasi-SMILES are representation of conditions by sequence of symbols. The calculations were carried out with the CORAL software, available on the Internet at http://www.insilico.eu/coral. The main idea of the suggested descriptors is the accumulation of all available eclectic information in the role of logical and digital basis for building up a model. The computational experiments have shown that the described approach can be a tool to build up models of mutagenicity of fullerene under different conditions.",2015-10-13 +21605717,Graves' disease: diagnostic and therapeutic challenges (multimedia activity).,"Graves' disease is the most common cause of hyperthyroidism in the United States. Graves' disease occurs more often in women with a female:male ratio of 5:1 and a population prevalence of 1% to 2%. A genetic determinant to the susceptibility to Graves' disease is suspected because of familial clustering of the disease, a high sibling recurrence risk, the familial occurrence of thyroid autoantibodies, and the 30% concordance in disease status between identical twins. Graves' disease is an autoimmune thyroid disorder characterized by the infiltration of immune effector cells and thyroid antigen-specific T cells into the thyroid and thyroid-stimulating hormone receptor expressing tissues, with the production of autoantibodies to well-defined thyroidal antigens, such as thyroid peroxidase, thyroglobulin, and the thyroid-stimulating hormone receptor. The thyroid-stimulating hormone receptor is central to the regulation of thyroid growth and function. Stimulatory autoantibodies in Graves' disease activate the thyroid-stimulating hormone receptor leading to thyroid hyperplasia and unregulated thyroid hormone production and secretion. Below-normal levels of baseline serum thyroid-stimulating hormone receptor, normal to elevated serum levels of T4, elevated serum levels of T3 and thyroid-stimulating hormone receptor autoantibodies, and a diffusely enlarged, heterogeneous, hypervascular (increased Doppler flow) thyroid gland confirm diagnosis of Graves' disease (available at: http://supplements.amjmed.com/2010/hyperthyroid/faculty.php). This Resource Center is also available through the website of The American Journal of Medicine (www.amjmed.com). Click on the “Thyroid/Graves' Disease” link in the “Resource Centers” section, found on the right side of the Journal homepage.",2011-06-01 +27822524,From Sample to Multi-Omics Conclusions in under 48 Hours. ,"Multi-omics methods have greatly advanced our understanding of the biological organism and its microbial associates. However, they are not routinely used in clinical or industrial applications, due to the length of time required to generate and analyze omics data. Here, we applied a novel integrated omics pipeline for the analysis of human and environmental samples in under 48 h. Human subjects that ferment their own foods provided swab samples from skin, feces, oral cavity, fermented foods, and household surfaces to assess the impact of home food fermentation on their microbial and chemical ecology. These samples were analyzed with 16S rRNA gene sequencing, inferred gene function profiles, and liquid chromatography-tandem mass spectrometry (LC-MS/MS) metabolomics through the Qiita, PICRUSt, and GNPS pipelines, respectively. The human sample microbiomes clustered with the corresponding sample types in the American Gut Project (http://www.americangut.org), and the fermented food samples produced a separate cluster. The microbial communities of the household surfaces were primarily sourced from the fermented foods, and their consumption was associated with increased gut microbial diversity. Untargeted metabolomics revealed that human skin and fermented food samples had separate chemical ecologies and that stool was more similar to fermented foods than to other sample types. Metabolites from the fermented foods, including plant products such as procyanidin and pheophytin, were present in the skin and stool samples of the individuals consuming the foods. Some food metabolites were modified during digestion, and others were detected in stool intact. This study represents a first-of-its-kind analysis of multi-omics data that achieved time intervals matching those of classic microbiological culturing. IMPORTANCE Polymicrobial infections are difficult to diagnose due to the challenge in comprehensively cultivating the microbes present. Omics methods, such as 16S rRNA sequencing, metagenomics, and metabolomics, can provide a more complete picture of a microbial community and its metabolite production, without the biases and selectivity of microbial culture. However, these advanced methods have not been applied to clinical or industrial microbiology or other areas where complex microbial dysbioses require immediate intervention. The reason for this is the length of time required to generate and analyze omics data. Here, we describe the development and application of a pipeline for multi-omics data analysis in time frames matching those of the culture-based approaches often used for these applications. This study applied multi-omics methods effectively in clinically relevant time frames and sets a precedent toward their implementation in clinical medicine and industrial microbiology.",2016-03-01 +26383495,MELLO: Medical lifelog ontology for data terms from self-tracking and lifelog devices.,"

Objective

The increasing use of health self-tracking devices is making the integration of heterogeneous data and shared decision-making more challenging. Computational analysis of lifelog data has been hampered by the lack of semantic and syntactic consistency among lifelog terms and related ontologies. Medical lifelog ontology (MELLO) was developed by identifying lifelog concepts and relationships between concepts, and it provides clear definitions by following ontology development methods. MELLO aims to support the classification and semantic mapping of lifelog data from diverse health self-tracking devices.

Methods

MELLO was developed using the General Formal Ontology method with a manual iterative process comprising five steps: (1) defining the scope of lifelog data, (2) identifying lifelog concepts, (3) assigning relationships among MELLO concepts, (4) developing MELLO properties (e.g., synonyms, preferred terms, and definitions) for each MELLO concept, and (5) evaluating representative layers of the ontology content. An evaluation was performed by classifying 11 devices into 3 classes by subjects, and performing pairwise comparisons of lifelog terms among 5 devices in each class as measured using the Jaccard similarity index.

Results

MELLO represents a comprehensive knowledge base of 1998 lifelog concepts, with 4996 synonyms for 1211 (61%) concepts and 1395 definitions for 926 (46%) concepts. The MELLO Browser and MELLO Mapper provide convenient access and annotating non-standard proprietary terms with MELLO (http://mello.snubi.org/). MELLO covers 88.1% of lifelog terms from 11 health self-tracking devices and uses simple string matching to match semantically similar terms provided by various devices that are not yet integrated. The results from the comparisons of Jaccard similarities between simple string matching and MELLO matching revealed increases of 2.5, 2.2, and 5.7 folds for physical activity,body measure, and sleep classes, respectively.

Conclusions

MELLO is the first ontology for representing health-related lifelog data with rich contents including definitions, synonyms, and semantic relationships. MELLO fills the semantic gap between heterogeneous lifelog terms that are generated by diverse health self-tracking devices. The unified representation of lifelog terms facilitated by MELLO can help describe an individual's lifestyle and environmental factors, which can be included with user-generated data for clinical research and thereby enhance data integration and sharing.",2015-08-17 +23220694,The plant ontology as a tool for comparative plant anatomy and genomic analyses.,"The Plant Ontology (PO; http://www.plantontology.org/) is a publicly available, collaborative effort to develop and maintain a controlled, structured vocabulary ('ontology') of terms to describe plant anatomy, morphology and the stages of plant development. The goals of the PO are to link (annotate) gene expression and phenotype data to plant structures and stages of plant development, using the data model adopted by the Gene Ontology. From its original design covering only rice, maize and Arabidopsis, the scope of the PO has been expanded to include all green plants. The PO was the first multispecies anatomy ontology developed for the annotation of genes and phenotypes. Also, to our knowledge, it was one of the first biological ontologies that provides translations (via synonyms) in non-English languages such as Japanese and Spanish. As of Release #18 (July 2012), there are about 2.2 million annotations linking PO terms to >110,000 unique data objects representing genes or gene models, proteins, RNAs, germplasm and quantitative trait loci (QTLs) from 22 plant species. In this paper, we focus on the plant anatomical entity branch of the PO, describing the organizing principles, resources available to users and examples of how the PO is integrated into other plant genomics databases and web portals. We also provide two examples of comparative analyses, demonstrating how the ontology structure and PO-annotated data can be used to discover the patterns of expression of the LEAFY (LFY) and terpene synthase (TPS) gene homologs.",2012-12-05 +27437975,JOURNALS ANALYSIS IN UROLOGY AND PLASTIC SURGERY.,"

Objective

Find out the main journals used in Urology and Plastic Surgery.

Methods

Was consulted the WebQualis database and selected the ""consult"" after the ""rating"" and finally by ""journal title."" Also was crossed the following keywords: urology, urologic, urological, prostate, prostatic, plastic, reconstructive, aesthetic. The journals classified in the field of Capes Medicine III were selected, and registered their respective strata. To confirm the 2014 impact factor, was consulted the http://www.impactfactorsearch.com/ database; simply typing the journal title its impact factor appears automatically.

Results

Was found 23 journals in Urology and 12 in Plastic Surgery. The average impact factor of urological journals was 2,256 and in Plastic Surgery 1,060. Among the urological journals, seven (30.4%) were in the A WebQualis rating and among Plastic Surgery only one (8.3%) was found in this stratum.

Conclusion

There are quantitative and qualitative differences between journals in Urology and Plastic Surgery. These data can help to develop appropriate assessment methods for each specialty, considering the different features of the presented papers.

Objetivo

Procurar destacar os principais periódicos utilizados na urologia e na cirurgia plástica.

Métodos

Foi consultada a base de dados WebQualis. Selecionou-se a opção ""consultar"", depois a opção ""classificação"" e por fim por ""título do periódico"". Cruzaram-se também os seguintes descritores: urology, urologic, urological, prostate, prostatic, plastic, reconstructive, aesthetic. Os periódicos classificados na área da Medicina III da Capes foram selecionados, e seus respectivos estratos registrados. Para confirmação do fator de impacto de 2014, consultou-se a base de dados http://www.impactfactorsearch.com/, onde a digitação do nome do periódico revela automaticamente seu impacto.

Resultados

Foram encontrados 23 periódicos urológicos e 12 na cirurgia plástica. O fator de impacto médio dos urológicos foi de 2.256 e o da cirurgia plástica de 1.060. Entre os periódicos urológicos, sete (30,4%) encontravam-se no estrato A do Qualis e entre os da cirurgia plástica apenas um (8,3%) encontrava-se neste estrato.

Conclusão

Existem diferenças quantitativas e qualitativas entre os periódicos urológicos e os da cirurgia plástica. Estes dados podem auxiliar na elaboração de métodos de avaliação adequados para cada especialidade, considerando-se as diferentes características dos periódicos apresentados.",2015-01-01 +27103878,Catalogue of Texas spiders.,"This catalogue lists 1,084 species of spiders (three identified to genus only) in 311 genera from 53 families currently recorded from Texas and is based on the ""Bibliography of Texas Spiders"" published by Bea Vogel in 1970. The online list of species can be found at http://pecanspiders.tamu.edu/spidersoftexas.htm. Many taxonomic revisions have since been published, particularly in the families Araneidae, Gnaphosidae and Leptonetidae. Many genera in other families have been revised. The Anyphaenidae, Ctenidae, Hahniidae, Nesticidae, Sicariidae and Tetragnathidae were also revised. Several families have been added and others split up. Several genera of Corinnidae were transferred to Phrurolithidae and Trachelidae. Two genera from Miturgidae were transferred to Eutichuridae. Zoridae was synonymized under Miturgidae. A single species formerly in Amaurobiidae is now in the Family Amphinectidae. Some trapdoor spiders in the family Ctenizidae have been transferred to Euctenizidae. Gertsch and Mulaik started a list of Texas spiders in 1940. In a letter from Willis J. Gertsch dated October 20, 1982, he stated ""Years ago a first listing of the Texas fauna was published by me based largely on Stanley Mulaik material, but it had to be abandoned because of other tasks."" This paper is a compendium of the spiders of Texas with distribution, habitat, collecting method and other data available from revisions and collections. This includes many records and unpublished data (including data from three unpublished studies). One of these studies included 16,000 adult spiders belonging to 177 species in 29 families. All specimens in that study were measured and results are in the appendix. Hidalgo County has 340 species recorded with Brazos County at 323 and Travis County at 314 species. These reflect the amount of collecting in the area.",2016-03-02 +24038259,Lipidomics technologies at the end of the first decade and the beginning of the next.,"The lipidome is composed of all of the biomolecules defined as lipids, which encompass compounds of amazing structural diversity and complexity. It has been ∼1 decade since the study of ""lipidomics"" was begun in earnest, and the technologies and tools for data analysis have advanced considerably over this period. This workshop summarized the scope of the lipidome and technologies for its analysis, lipidomics databases and other online tools, and examples of the application of lipidomics to nutritional research. The slides from the workshop, online lipidomics tools, and databases are available at http://www.lipidmaps.org.",2013-09-01 +24904731,GenderMedDB: an interactive database of sex and gender-specific medical literature.,"

Background

Searches for sex and gender-specific publications are complicated by the absence of a specific algorithm within search engines and by the lack of adequate archives to collect the retrieved results. We previously addressed this issue by initiating the first systematic archive of medical literature containing sex and/or gender-specific analyses. This initial collection has now been greatly enlarged and re-organized as a free user-friendly database with multiple functions: GenderMedDB (http://gendermeddb.charite.de).

Description

GenderMedDB retrieves the included publications from the PubMed database. Manuscripts containing sex and/or gender-specific analysis are continuously screened and the relevant findings organized systematically into disciplines and diseases. Publications are furthermore classified by research type, subject and participant numbers. More than 11,000 abstracts are currently included in the database, after screening more than 40,000 publications. The main functions of the database include searches by publication data or content analysis based on pre-defined classifications. In addition, registrants are enabled to upload relevant publications, access descriptive publication statistics and interact in an open user forum.

Conclusions

Overall, GenderMedDB offers the advantages of a discipline-specific search engine as well as the functions of a participative tool for the gender medicine community.",2014-05-23 +26002886,Diffusion maps for high-dimensional single-cell analysis of differentiation data.,"

Motivation

Single-cell technologies have recently gained popularity in cellular differentiation studies regarding their ability to resolve potential heterogeneities in cell populations. Analyzing such high-dimensional single-cell data has its own statistical and computational challenges. Popular multivariate approaches are based on data normalization, followed by dimension reduction and clustering to identify subgroups. However, in the case of cellular differentiation, we would not expect clear clusters to be present but instead expect the cells to follow continuous branching lineages.

Results

Here, we propose the use of diffusion maps to deal with the problem of defining differentiation trajectories. We adapt this method to single-cell data by adequate choice of kernel width and inclusion of uncertainties or missing measurement values, which enables the establishment of a pseudotemporal ordering of single cells in a high-dimensional gene expression space. We expect this output to reflect cell differentiation trajectories, where the data originates from intrinsic diffusion-like dynamics. Starting from a pluripotent stage, cells move smoothly within the transcriptional landscape towards more differentiated states with some stochasticity along their path. We demonstrate the robustness of our method with respect to extrinsic noise (e.g. measurement noise) and sampling density heterogeneities on simulated toy data as well as two single-cell quantitative polymerase chain reaction datasets (i.e. mouse haematopoietic stem cells and mouse embryonic stem cells) and an RNA-Seq data of human pre-implantation embryos. We show that diffusion maps perform considerably better than Principal Component Analysis and are advantageous over other techniques for non-linear dimension reduction such as t-distributed Stochastic Neighbour Embedding for preserving the global structures and pseudotemporal ordering of cells.

Availability and implementation

The Matlab implementation of diffusion maps for single-cell data is available at https://www.helmholtz-muenchen.de/icb/single-cell-diffusion-map.

Contact

fbuettner.phys@gmail.com, fabian.theis@helmholtz-muenchen.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-21 +26740524,Bayesian variable selection for binary outcomes in high-dimensional genomic studies using non-local priors.,"

Motivation

The advent of new genomic technologies has resulted in the production of massive data sets. Analyses of these data require new statistical and computational methods. In this article, we propose one such method that is useful in selecting explanatory variables for prediction of a binary response. Although this problem has recently been addressed using penalized likelihood methods, we adopt a Bayesian approach that utilizes a mixture of non-local prior densities and point masses on the binary regression coefficient vectors.

Results

The resulting method, which we call iMOMLogit, provides improved performance in identifying true models and reducing estimation and prediction error in a number of simulation studies. More importantly, its application to several genomic datasets produces predictions that have high accuracy using far fewer explanatory variables than competing methods. We also describe a novel approach for setting prior hyperparameters by examining the total variation distance between the prior distributions on the regression parameters and the distribution of the maximum likelihood estimator under the null distribution. Finally, we describe a computational algorithm that can be used to implement iMOMLogit in ultrahigh-dimensional settings ([Formula: see text]) and provide diagnostics to assess the probability that this algorithm has identified the highest posterior probability model.

Availability and implementation

Software to implement this method can be downloaded at: http://www.stat.tamu.edu/∼amir/code.html

Contact

wwang7@mdanderson.org or vjohnson@stat.tamu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-06 +23185330,"CyanoPhyChe: a database for physico-chemical properties, structure and biochemical pathway information of cyanobacterial proteins.","CyanoPhyChe is a user friendly database that one can browse through for physico-chemical properties, structure and biochemical pathway information of cyanobacterial proteins. We downloaded all the protein sequences from the cyanobacterial genome database for calculating the physico-chemical properties, such as molecular weight, net charge of protein, isoelectric point, molar extinction coefficient, canonical variable for solubility, grand average hydropathy, aliphatic index, and number of charged residues. Based on the physico-chemical properties, we provide the polarity, structural stability and probability of a protein entering in to an inclusion body (PEPIB). We used the data generated on physico-chemical properties, structure and biochemical pathway information of all cyanobacterial proteins to construct CyanoPhyChe. The data can be used for optimizing methods of expression and characterization of cyanobacterial proteins. Moreover, the 'Search' and data export options provided will be useful for proteome analysis. Secondary structure was predicted for all the cyanobacterial proteins using PSIPRED tool and the data generated is made accessible to researchers working on cyanobacteria. In addition, external links are provided to biological databases such as PDB and KEGG for molecular structure and biochemical pathway information, respectively. External links are also provided to different cyanobacterial databases. CyanoPhyChe can be accessed from the following URL: http://bif.uohyd.ac.in/cpc.",2012-11-21 +26231426,MICC: an R package for identifying chromatin interactions from ChIA-PET data.,"

Unlabelled

ChIA-PET is rapidly emerging as an important experimental approach to detect chromatin long-range interactions at high resolution. Here, we present Model based Interaction Calling from ChIA-PET data (MICC), an easy-to-use R package to detect chromatin interactions from ChIA-PET sequencing data. By applying a Bayesian mixture model to systematically remove random ligation and random collision noise, MICC could identify chromatin interactions with a significantly higher sensitivity than existing methods at the same false discovery rate.

Availability and implementation

http://bioinfo.au.tsinghua.edu.cn/member/xwwang/MICCusage

Contact

michael.zhang@utdallas.edu or xwwang@tsinghua.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-31 +26681544,Detection of differentially methylated regions from bisulfite-seq data by hidden Markov models incorporating genome-wide methylation level distributions.,"

Background

Detection of differential methylation between biological samples is an important task in bisulfite-seq data analysis. Several studies have attempted de novo finding of differentially methylated regions (DMRs) using hidden Markov models (HMMs). However, there is room for improvement in the design of HMMs, especially on emission functions that evaluate the likelihood of differential methylation at each cytosine site.

Results

We describe a new HMM for DMR detection from bisulfite-seq data. Our method utilizes emission functions that combine binomial models for aligned read counts, and beta mixtures for incorporating genome-wide methylation level distributions. We also develop unsupervised learning algorithms to adjust parameters of the beta-binomial models depending on differential methylation types (up, down, and not changed). In experiments on both simulated and real datasets, the new HMM improves DMR detection accuracy compared with HMMs in our previous study. Furthermore, our method achieves better accuracy than other methods using Fisher's exact test and methylation level smoothing.

Conclusions

Our method enables accurate DMR detection from bisulfite-seq data. The implementation of our method is named ComMet, and distributed as a part of Bisulfighter package, which is available at http://epigenome.cbrc.jp/bisulfighter.",2015-12-09 +27240256,Data-driven hypothesis weighting increases detection power in genome-scale multiple testing.,"Hypothesis weighting improves the power of large-scale multiple testing. We describe independent hypothesis weighting (IHW), a method that assigns weights using covariates independent of the P-values under the null hypothesis but informative of each test's power or prior probability of the null hypothesis (http://www.bioconductor.org/packages/IHW). IHW increases power while controlling the false discovery rate and is a practical approach to discovering associations in genomics, high-throughput biology and other large data sets.",2016-05-30 +28171511,DEF: an automated dead-end filling approach based on quasi-endosymbiosis.,"

Motivation

Gap filling for the reconstruction of metabolic networks is to restore the connectivity of metabolites via finding high-confidence reactions that could be missed in target organism. Current methods for gap filling either fall into the network topology or have limited capability in finding missing reactions that are indirectly related to dead-end metabolites but of biological importance to the target model.

Results

We present an automated dead-end filling (DEF) approach, which is derived from the wisdom of endosymbiosis theory, to fill gaps by finding the most efficient dead-end utilization paths in a constructed quasi-endosymbiosis model. The recalls of reactions and dead ends of DEF reach around 73% and 86%, respectively. This method is capable of finding indirectly dead-end-related reactions with biological importance for the target organism and is applicable to any given metabolic model. In the E. coli iJR904 model, for instance, about 42% of the dead-end metabolites were fixed by our proposed method.

Availabilty and implementaion

DEF is publicly available at http://bis.zju.edu.cn/DEF/.

Contact

mchen@zju.edu.cn

Supplimentary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +28104964,MFPPI - Multi FASTA ProtParam Interface.,"Physico-chemical properties reflect the functional and structural characteristics of a protein. The comparative study of the physicochemical properties is important to know role of a protein in exploring its molecular evolution. A number of online and offline tools are available for calculating the physico-chemical properties of a single protein sequence. However, a tool is not available for a comparative study with graphical visualization of Multi-FASTA sequences. Hence, we describe the development and utility of MFPPI V.1.0 (a web interface developed in JAVA platform) to input each FASTA sequence from Multi-FASTA file into the ProtParam web server for the calculation of physico-chemical properties. MFPPI V.1.0 calculates different physico-chemical properties for a given set of proteins in a single run and saves the data in the MSExcel sheet. Furthermore, it provides a graphical representation of protein physico-chemical properties for analysis and visualization of data in a user-friendly manner. Therefore, the output from the analysis helps to understand compositional changes and functional relationship in evolution among organisms. We have demonstrated the utility of MFPPI V.1.0 using 17 mtATP6 protein sequences from different mammalian species. It is available for free at http://insilicogenomics.in/mfpcalc/mfppi.html.",2016-04-10 +28119819,Topoisomerase I inhibition leads to length-dependent gene expression changes in human primary astrocytes.,"Topoisomerase I is required for the proper expression of long genes (> 100 kb) in mouse and human cortical neurons, including many candidate genes for autism spectrum disorder (ASD) [1]. Given the important role of astrocytes in brain development [2], we investigated whether long genes, including autism susceptibility genes, also require topoisomerase I expression in human primary astrocytes. We carried genome-wide expression profiling of cultured human primary astrocytes following treatment with the topoisomerase I inhibitor Topotecan, using Illumina microarrays. We identified several thousands of differentially expressed genes and confirmed that topoisomerase I inhibition affects gene expression in human primary astrocytes in a length-dependent manner. We also identified over 20 ASD-associated genes that show topoisomerase-dependent gene expression in human primary astrocytes but have not been previously reported as topoisomerase-I-dependent in neurons. The microarray data have been deposited in NCBI GEO (https://www.ncbi.nlm.nih.gov/geo/) under accession number GSE90052.",2016-12-23 +23161694,Genenames.org: the HGNC resources in 2013.,"The HUGO Gene Nomenclature Committee situated at the European Bioinformatics Institute assigns unique symbols and names to human genes. Since 2011, the data within our database has expanded largely owing to an increase in naming pseudogenes and non-coding RNA genes, and we now have >33,500 approved symbols. Our gene families and groups have also increased to nearly 500, with ∼45% of our gene entries associated to at least one family or group. We have also redesigned the HUGO Gene Nomenclature Committee website http://www.genenames.org creating a constant look and feel across the site and improving usability and readability for our users. The site provides a public access portal to our database with no restrictions imposed on access or the use of the data. Within this article, we review our online resources and data with particular emphasis on the updates to our website.",2012-11-17 +28597489,"Development of a web-based application and multicountry analysis framework for assessing interdicted infections and cost-utility of screening donated blood for HIV, HCV and HBV.","

Background and objectives

Most countries test donations for HIV, HCV and HBV using serology with or without nucleic acid testing (NAT). Cost-utility analyses provide information on the relative value of different screening options. The aim of this project was to develop an open access risk assessment and cost-utility analysis web-tool for assessing HIV, HCV and HBV screening options (http://www.isbtweb.org/working-parties/transfusion-transmitted-infectious-diseases/). An analysis for six countries (Brazil, Ghana, the Netherlands, South Africa, Thailand and USA) was conducted.

Materials and methods

Four strategies; (1) antibody assays (Abs) for HIV and HCV + HBsAg, (2) antibody assays that include antigens for HIV and HCV (Combo) + HBsAg, (3) NAT in minipools of variable size (MP NAT) and (4) individual donation (ID) NAT can be evaluated using the tool. Country-specific data on donors, donation testing results, recipient outcomes and costs are entered using the online interface. Results obtained include the number infections interdicted using each screening options, and the (incremental and average) cost-utility of the options.

Results

In each of the six countries evaluated, the use of antibody assays is cost effective or even cost saving. NAT has varying cost-utility depending on the setting, and where adopted, the incremental cost-utility exceeds any previously defined or proposed threshold in each country.

Conclusion

The web-tool allows an assessment of infectious units interdicted and value for money of different testing strategies. Regardless of gross national income (GNI) per capita, countries appear willing to dedicate healthcare resources to blood supply safety in excess of that for other sectors of health care.",2017-06-08 +22611296,A mass spectrometry proteomics data management platform.,"Mass spectrometry-based proteomics is increasingly being used in biomedical research. These experiments typically generate a large volume of highly complex data, and the volume and complexity are only increasing with time. There exist many software pipelines for analyzing these data (each typically with its own file formats), and as technology improves, these file formats change and new formats are developed. Files produced from these myriad software programs may accumulate on hard disks or tape drives over time, with older files being rendered progressively more obsolete and unusable with each successive technical advancement and data format change. Although initiatives exist to standardize the file formats used in proteomics, they do not address the core failings of a file-based data management system: (1) files are typically poorly annotated experimentally, (2) files are ""organically"" distributed across laboratory file systems in an ad hoc manner, (3) files formats become obsolete, and (4) searching the data and comparing and contrasting results across separate experiments is very inefficient (if possible at all). Here we present a relational database architecture and accompanying web application dubbed Mass Spectrometry Data Platform that is designed to address the failings of the file-based mass spectrometry data management approach. The database is designed such that the output of disparate software pipelines may be imported into a core set of unified tables, with these core tables being extended to support data generated by specific pipelines. Because the data are unified, they may be queried, viewed, and compared across multiple experiments using a common web interface. Mass Spectrometry Data Platform is open source and freely available at http://code.google.com/p/msdapl/.",2012-05-18 +24130305,ORMAN: optimal resolution of ambiguous RNA-Seq multimappings in the presence of novel isoforms.,"

Motivation

RNA-Seq technology is promising to uncover many novel alternative splicing events, gene fusions and other variations in RNA transcripts. For an accurate detection and quantification of transcripts, it is important to resolve the mapping ambiguity for those RNA-Seq reads that can be mapped to multiple loci: >17% of the reads from mouse RNA-Seq data and 50% of the reads from some plant RNA-Seq data have multiple mapping loci. In this study, we show how to resolve the mapping ambiguity in the presence of novel transcriptomic events such as exon skipping and novel indels towards accurate downstream analysis. We introduce ORMAN ( O ptimal R esolution of M ultimapping A mbiguity of R N A-Seq Reads), which aims to compute the minimum number of potential transcript products for each gene and to assign each multimapping read to one of these transcripts based on the estimated distribution of the region covering the read. ORMAN achieves this objective through a combinatorial optimization formulation, which is solved through well-known approximation algorithms, integer linear programs and heuristics.

Results

On a simulated RNA-Seq dataset including a random subset of transcripts from the UCSC database, the performance of several state-of-the-art methods for identifying and quantifying novel transcripts, such as Cufflinks, IsoLasso and CLIIQ, is significantly improved through the use of ORMAN. Furthermore, in an experiment using real RNA-Seq reads, we show that ORMAN is able to resolve multimapping to produce coverage values that are similar to the original distribution, even in genes with highly non-uniform coverage.

Availability

ORMAN is available at http://orman.sf.net",2013-10-15 +28367405,Synaptic vesicles isolated from the electric organ of Torpedo californica and from the central nervous system of Mus musculus contain small ribonucleic acids (sRNAs).,"Synaptic vesicles (SVs) are presynaptic organelles that load and release small molecule neurotransmitters at chemical synapses. In addition to classic neurotransmitters, we have demonstrated that SVs isolated from the Peripheral Nervous Systems (PNS) of the electric organ of Torpedo californica, a model cholinergic synapse, and SVs isolated from the Central Nervous System (CNS) of Mus musculus (mouse) contain small ribonucleic acids (sRNAs; ≤ 50 nucleotides) (Scientific Reports, 5:1-14(14918) Li et al. (2015) [1]). Our previous publication provided the five most abundant sequences associated with the T. californica SVs, and the ten most abundant sequences associated with the mouse SVs, representing 59% and 39% of the total sRNA reads sequenced, respectively). We provide here a full repository of the SV sRNAs sequenced from T. californica and the mouse deposited in the NCBI as biosamples. Three data studies are included: SVs isolated from the electric organ of T. californica using standard techniques, SVs isolated from the electric organ of T. californica using standard techniques with an additional affinity purification step, and finally, SVs isolated from the CNS of mouse. The three biosamples are available at https://www.ncbi.nlm.nih.gov/biosample/ SRS1523467, SRS1523466, and SRS1523472 respectively.",2017-03-08 +26204236,MI-PVT: A Tool for Visualizing the Chromosome-Centric Human Proteome.,"We have developed the web-based Michigan Proteome Visualization Tool (MI-PVT) to visualize and compare protein expression and isoform-level function across human chromosomes and tissues (http://guanlab.ccmb.med.umich.edu/mipvt). As proof of principle, we have populated the tool with Human Proteome Map (HPM) data. We were able to observe many biologically interesting features. From the vantage point of our chromosome 17 team, for example, we found more than 300 proteins from chromosome 17 expressed in each of the 30 tissues and cell types studied, with the highest number of expressed proteins being 685 in testis. Comparisons of expression levels across tissues showed low numbers of proteins expressed in esophagus, but esophagus had 12 cytoskeletal proteins coded on chromosome 17 with very high expression (>1000 spectral counts). This customized MI-PVT should be helpful for biologists to browse and study specific proteins and protein data sets across tissues and chromosomes. Users can upload any data of interest in MI-PVT for visualization. Our aim is to integrate extensive mass-spectrometric proteomic data into the tool to facilitate finding chromosome-centric protein expression and correlation across tissues.",2015-08-03 +25569221,A RESTful API for accessing microbial community data for MG-RAST.,"Metagenomic sequencing has produced significant amounts of data in recent years. For example, as of summer 2013, MG-RAST has been used to annotate over 110,000 data sets totaling over 43 Terabases. With metagenomic sequencing finding even wider adoption in the scientific community, the existing web-based analysis tools and infrastructure in MG-RAST provide limited capability for data retrieval and analysis, such as comparative analysis between multiple data sets. Moreover, although the system provides many analysis tools, it is not comprehensive. By opening MG-RAST up via a web services API (application programmers interface) we have greatly expanded access to MG-RAST data, as well as provided a mechanism for the use of third-party analysis tools with MG-RAST data. This RESTful API makes all data and data objects created by the MG-RAST pipeline accessible as JSON objects. As part of the DOE Systems Biology Knowledgebase project (KBase, http://kbase.us) we have implemented a web services API for MG-RAST. This API complements the existing MG-RAST web interface and constitutes the basis of KBase's microbial community capabilities. In addition, the API exposes a comprehensive collection of data to programmers. This API, which uses a RESTful (Representational State Transfer) implementation, is compatible with most programming environments and should be easy to use for end users and third parties. It provides comprehensive access to sequence data, quality control results, annotations, and many other data types. Where feasible, we have used standards to expose data and metadata. Code examples are provided in a number of languages both to show the versatility of the API and to provide a starting point for users. We present an API that exposes the data in MG-RAST for consumption by our users, greatly enhancing the utility of the MG-RAST service.",2015-01-08 +27274834,Renal replacement therapy in Europe: a summary of the 2013 ERA-EDTA Registry Annual Report with a focus on diabetes mellitus.,"

Background

This article provides a summary of the 2013 European Renal Association-European Dialysis and Transplant Association (ERA-EDTA) Registry Annual Report (available at http://www.era-edta-reg.org), with a focus on patients with diabetes mellitus (DM) as the cause of end-stage renal disease (ESRD).

Methods

In 2015, the ERA-EDTA Registry received data on renal replacement therapy (RRT) for ESRD from 49 national or regional renal registries in 34 countries in Europe and bordering the Mediterranean Sea. Individual patient data were provided by 31 registries, while 18 registries provided aggregated data. The total population covered by the participating registries comprised 650 million people.

Results

In total, 72 933 patients started RRT for ESRD within the countries and regions reporting to the ERA-EDTA Registry, resulting in an overall incidence of 112 per million population (pmp). The overall prevalence on 31 December 2013 was 738 pmp (n = 478 990). Patients with DM as the cause of ESRD comprised 24% of the incident RRT patients (26 pmp) and 17% of the prevalent RRT patients (122 pmp). When compared with the USA, the incidence of patients starting RRT pmp secondary to DM in Europe was five times lower and the incidence of RRT due to other causes of ESRD was two times lower. Overall, 19 426 kidney transplants were performed (30 pmp). The 5-year adjusted survival for all RRT patients was 60.9% [95% confidence interval (CI) 60.5-61.3] and 50.6% (95% CI 49.9-51.2) for patients with DM as the cause of ESRD.",2016-01-31 +27734896,3Disease Browser: A Web server for integrating 3D genome and disease-associated chromosome rearrangement data.,"Chromosomal rearrangement (CR) events have been implicated in many tumor and non-tumor human diseases. CR events lead to their associated diseases by disrupting gene and protein structures. Also, they can lead to diseases through changes in chromosomal 3D structure and gene expression. In this study, we search for CR-associated diseases potentially caused by chromosomal 3D structure alteration by integrating Hi-C and ChIP-seq data. Our algorithm rediscovers experimentally verified disease-associated CRs (polydactyly diseases) that alter gene expression by disrupting chromosome 3D structure. Interestingly, we find that intellectual disability may be a candidate disease caused by 3D chromosome structure alteration. We also develop a Web server (3Disease Browser, http://3dgb.cbi.pku.edu.cn/disease/) for integrating and visualizing disease-associated CR events and chromosomal 3D structure.",2016-10-13 +27403208,"bSiteFinder, an improved protein-binding sites prediction server based on structural alignment: more accurate and less time-consuming.","

Motivation

Protein-binding sites prediction lays a foundation for functional annotation of protein and structure-based drug design. As the number of available protein structures increases, structural alignment based algorithm becomes the dominant approach for protein-binding sites prediction. However, the present algorithms underutilize the ever increasing numbers of three-dimensional protein-ligand complex structures (bound protein), and it could be improved on the process of alignment, selection of templates and clustering of template. Herein, we built so far the largest database of bound templates with stringent quality control. And on this basis, bSiteFinder as a protein-binding sites prediction server was developed.

Results

By introducing Homology Indexing, Chain Length Indexing, Stability of Complex and Optimized Multiple-Templates Clustering into our algorithm, the efficiency of our server has been significantly improved. Further, the accuracy was approximately 2-10 % higher than that of other algorithms for the test with either bound dataset or unbound dataset. For 210 bound dataset, bSiteFinder achieved high accuracies up to 94.8 % (MCC 0.95). For another 48 bound/unbound dataset, bSiteFinder achieved high accuracies up to 93.8 % for bound proteins (MCC 0.95) and 85.4 % for unbound proteins (MCC 0.72). Our bSiteFinder server is freely available at http://binfo.shmtu.edu.cn/bsitefinder/, and the source code is provided at the methods page.

Conclusion

An online bSiteFinder server is freely available at http://binfo.shmtu.edu.cn/bsitefinder/. Our work lays a foundation for functional annotation of protein and structure-based drug design. With ever increasing numbers of three-dimensional protein-ligand complex structures, our server should be more accurate and less time-consuming.Graphical Abstract bSiteFinder (http://binfo.shmtu.edu.cn/bsitefinder/) as a protein-binding sites prediction server was developed based on the largest database of bound templates so far with stringent quality control. By introducing Homology Indexing, Chain Length Indexing, Stability of Complex and Optimized Multiple-Templates Clustering into our algorithm, the efficiency of our server have been significantly improved. What's more, the accuracy was approximately 2-10 % higher than that of other algorithms for the test with either bound dataset or unbound dataset.",2016-07-11 +28885976,Quantitative Microbial Risk Assessment for Spray Irrigation of Dairy Manure Based on an Empirical Fate and Transport Model.,"

Background

Spray irrigation for land-applying livestock manure is increasing in the United States as farms become larger and economies of scale make manure irrigation affordable. Human health risks from exposure to zoonotic pathogens aerosolized during manure irrigation are not well understood.

Objectives

We aimed to a) estimate human health risks due to aerosolized zoonotic pathogens downwind of spray-irrigated dairy manure; and b) determine which factors (e.g., distance, weather conditions) have the greatest influence on risk estimates.

Methods

We sampled downwind air concentrations of manure-borne fecal indicators and zoonotic pathogens during 21 full-scale dairy manure irrigation events at three farms. We fit these data to hierarchical empirical models and used model outputs in a quantitative microbial risk assessment (QMRA) to estimate risk [probability of acute gastrointestinal illness (AGI)] for individuals exposed to spray-irrigated dairy manure containing Campylobacter jejuni, enterohemorrhagic Escherichia coli (EHEC), or Salmonella spp.

Results

Median risk estimates from Monte Carlo simulations ranged from 10-5 to 10-2 and decreased with distance from the source. Risk estimates for Salmonella or EHEC-related AGI were most sensitive to the assumed level of pathogen prevalence in dairy manure, while risk estimates for C. jejuni were not sensitive to any single variable. Airborne microbe concentrations were negatively associated with distance and positively associated with wind speed, both of which were retained in models as a significant predictor more often than relative humidity, solar irradiation, or temperature.

Conclusions

Our model-based estimates suggest that reducing pathogen prevalence and concentration in source manure would reduce the risk of AGI from exposure to manure irrigation, and that increasing the distance from irrigated manure (i.e., setbacks) and limiting irrigation to times of low wind speed may also reduce risk. https://doi.org/10.1289/EHP283.",2017-08-16 +27153621,Memdock: an α-helical membrane protein docking algorithm.,"

Motivation

A wide range of fundamental biological processes are mediated by membrane proteins. Despite their large number and importance, less than 1% of all 3D protein structures deposited in the Protein Data Bank are of membrane proteins. This is mainly due to the challenges of crystallizing such proteins or performing NMR spectroscopy analyses. All the more so, there is only a small number of membrane protein-protein complexes with known structure. Therefore, developing computational tools for docking membrane proteins is crucial. Numerous methods for docking globular proteins exist, however few have been developed especially for membrane proteins and designed to address docking within the lipid bilayer environment.

Results

We present a novel algorithm, Memdock, for docking α-helical membrane proteins which takes into consideration the lipid bilayer environment for docking as well as for refining and ranking the docking candidates. We show that our algorithm improves both the docking accuracy and the candidates ranking compared to a standard protein-protein docking algorithm.

Availability and implementation

http://bioinfo3d.cs.tau.ac.il/Memdock/

Contacts

namih@tau.ac.il or wolfson@tau.ac.il

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-08 +24903516,Assisted curation of regulatory interactions and growth conditions of OxyR in E. coli K-12. ,"Given the current explosion of data within original publications generated in the field of genomics, a recognized bottleneck is the transfer of such knowledge into comprehensive databases. We have for years organized knowledge on transcriptional regulation reported in the original literature of Escherichia coli K-12 into RegulonDB (http://regulondb.ccg.unam.mx), our database that is currently supported by >5000 papers. Here, we report a first step towards the automatic biocuration of growth conditions in this corpus. Using the OntoGene text-mining system (http://www.ontogene.org), we extracted and manually validated regulatory interactions and growth conditions in a new approach based on filters that enable the curator to select informative sentences from preprocessed full papers. Based on a set of 48 papers dealing with oxidative stress by OxyR, we were able to retrieve 100% of the OxyR regulatory interactions present in RegulonDB, including the transcription factors and their effect on target genes. Our strategy was designed to extract, as we did, their growth conditions. This result provides a proof of concept for a more direct and efficient curation process, and enables us to define the strategy of the subsequent steps to be implemented for a semi-automatic curation of original literature dealing with regulation of gene expression in bacteria. This project will enhance the efficiency and quality of the curation of knowledge present in the literature of gene regulation, and contribute to a significant increase in the encoding of the regulatory network of E. coli. RegulonDB Database URL: http://regulondb.ccg.unam.mx OntoGene URL: http://www.ontogene.org.",2014-06-04 +24340000,CyTargetLinker: a cytoscape app to integrate regulatory interactions in network analysis.,"

Introduction

The high complexity and dynamic nature of the regulation of gene expression, protein synthesis, and protein activity pose a challenge to fully understand the cellular machinery. By deciphering the role of important players, including transcription factors, microRNAs, or small molecules, a better understanding of key regulatory processes can be obtained. Various databases contain information on the interactions of regulators with their targets for different organisms, data recently being extended with the results of the ENCODE (Encyclopedia of DNA Elements) project. A systems biology approach integrating our understanding on different regulators is essential in interpreting the regulation of molecular biological processes.

Implementation

We developed CyTargetLinker (http://projects.bigcat.unimaas.nl/cytargetlinker), a Cytoscape app, for integrating regulatory interactions in network analysis. Recently we released CyTargetLinker as one of the first apps for Cytoscape 3. It provides a user-friendly and flexible interface to extend biological networks with regulatory interactions, such as microRNA-target, transcription factor-target and/or drug-target. Importantly, CyTargetLinker employs identifier mapping to combine various interaction data resources that use different types of identifiers.

Results

Three case studies demonstrate the strength and broad applicability of CyTargetLinker, (i) extending a mouse molecular interaction network, containing genes linked to diabetes mellitus, with validated and predicted microRNAs, (ii) enriching a molecular interaction network, containing DNA repair genes, with ENCODE transcription factor and (iii) building a regulatory meta-network in which a biological process is extended with information on transcription factor, microRNA and drug regulation.

Conclusions

CyTargetLinker provides a simple and extensible framework for biologists and bioinformaticians to integrate different regulatory interactions into their network analysis approaches. Visualization options enable biological interpretation of complex regulatory networks in a graphical way. Importantly the incorporation of our tool into the Cytoscape framework allows the application of CyTargetLinker in combination with a wide variety of other apps for state-of-the-art network analysis.",2013-12-05 +26656569,SpotCaliper: fast wavelet-based spot detection with accurate size estimation.,"

Motivation

SpotCaliper is a novel wavelet-based image-analysis software providing a fast automatic detection scheme for circular patterns (spots), combined with the precise estimation of their size. It is implemented as an ImageJ plugin with a friendly user interface. The user is allowed to edit the results by modifying the measurements (in a semi-automated way), extract data for further analysis. The fine tuning of the detections includes the possibility of adjusting or removing the original detections, as well as adding further spots.

Results

The main advantage of the software is its ability to capture the size of spots in a fast and accurate way.

Availability and implementation

http://bigwww.epfl.ch/algorithms/spotcaliper/

Contact

zsuzsanna.puspoki@epfl.ch

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-10 +28679524,Relationship Between Serum Inflammatory Marker Levels and the Dynamic Changes in Coronary Plaque Characteristics After Statin Therapy. ,"The mechanism of statin for atheroma stabilization remains unclear. We aimed to assess the relationship between on-treatment changes in serum inflammatory biomarker levels and plaque composition in differed nonculprit coronary lesions. The changes in serum biochemical values, and intravascular ultrasound data were evaluated in 218 patients with virtual histology (VH)-intravascular ultrasound-defined fibroatheroma-containing segments after 12-month rosuvastatin treatment. When stratifying patients into quartiles according to the change in high-sensitivity C-reactive protein (hsCRP), there was a significant positive linear relationship for the changes in %necrotic core (coefficient, 1.31; standard error, 0.54) and %dense calcium volumes (coefficient, 0.80; standard error, 0.27), but a negative linear relationship for the changes in %fibrous (coefficient, -0.94; standard error, 0.45) and %fibrofatty volumes (coefficient, -1.17; standard error, 0.56; all P<0.05). The decrease in hsCRP (-1.2±3.9 versus 0.5±3.4 mg/L; P=0.02) was greater in those without VH-defined thin-cap fibroatheroma (TCFA, defined as >30° of necrotic core abutting the lumen in 3 consecutive slices) than those with VH-TCFA at follow-up. Diabetes mellitus, a larger normalized total atheroma volume, and the presence of VH-TCFA at baseline predicted the presence of VH-TCFA at follow-up (odds ratio, 4.01, 1.18, and 9.21, respectively; all P<0.05), whereas the change in hsCRP showed a trend (odds ratio, 1.19; P=0.07). The change in low-density lipoprotein-cholesterol had no relationship with the changes in hsCRP or plaque compositions. With 12-month rosuvastatin therapy, a greater hsCRP reduction (not low-density lipoprotein-cholesterol) was associated with a greater decrease in %necrotic core volume and the absence of VH-TCFA, indicating a link between the anti-inflammatory action of statin and plaque stabilization by reducing NC and reinforcing fibrous cap. URL: https://www.clinicaltrials.gov. Unique identifier: NCT00997880.",2017-07-01 +23046922,Parallel-META: efficient metagenomic data analysis based on high-performance computation.,"

Background

Metagenomics method directly sequences and analyses genome information from microbial communities. There are usually more than hundreds of genomes from different microbial species in the same community, and the main computational tasks for metagenomic data analyses include taxonomical and functional component examination of all genomes in the microbial community. Metagenomic data analysis is both data- and computation- intensive, which requires extensive computational power. Most of the current metagenomic data analysis softwares were designed to be used on a single computer or single computer clusters, which could not match with the fast increasing number of large metagenomic projects' computational requirements. Therefore, advanced computational methods and pipelines have to be developed to cope with such need for efficient analyses.

Result

In this paper, we proposed Parallel-META, a GPU- and multi-core-CPU-based open-source pipeline for metagenomic data analysis, which enabled the efficient and parallel analysis of multiple metagenomic datasets and the visualization of the results for multiple samples. In Parallel-META, the similarity-based database search was parallelized based on GPU computing and multi-core CPU computing optimization. Experiments have shown that Parallel-META has at least 15 times speed-up compared to traditional metagenomic data analysis method, with the same accuracy of the results http://www.computationalbioenergy.org/parallel-meta.html.

Conclusion

The parallel processing of current metagenomic data would be very promising: with current speed up of 15 times and above, binning would not be a very time-consuming process any more. Therefore, some deeper analysis of the metagenomic data, such as the comparison of different samples, would be feasible in the pipeline, and some of these functionalities have been included into the Parallel-META pipeline.",2012-07-16 +25267795,CarrotDB: a genomic and transcriptomic database for carrot. ,"Carrot (Daucus carota L.) is an economically important vegetable worldwide and is the largest source of carotenoids and provitamin A in the human diet. Given the importance of this vegetable to humans, research and breeding communities on carrot should obtain useful genomic and transcriptomic information. The first whole-genome sequences of 'DC-27' carrot were de novo assembled and analyzed. Transcriptomic sequences of 14 carrot genotypes were downloaded from the Sequence Read Archive (SRA) database of National Center for Biotechnology Information (NCBI) and mapped to the whole-genome sequence before assembly. Based on these data sets, the first Web-based genomic and transcriptomic database for D. carota (CarrotDB) was developed (database homepage: http://apiaceae.njau.edu.cn/car rotdb). CarrotDB offers the tools of Genome Map and Basic Local Alignment Search Tool. Using these tools, users can search certain target genes and simple sequence repeats along with designed primers of 'DC-27'. Assembled transcriptomic sequences along with fragments per kilobase of transcript sequence per millions base pairs sequenced information (FPKM) information of 14 carrot genotypes are also provided. Users can download de novo assembled whole-genome sequences, putative gene sequences and putative protein sequences of 'DC-27'. Users can also download transcriptome sequence assemblies of 14 carrot genotypes along with their FPKM information. A total of 2826 transcription factor (TF) genes classified into 57 families were identified in the entire genome sequences. These TF genes were embedded in CarrotDB as an interface. The 'GERMPLASM' part of CarrotDB also offers taproot photos of 45 carrot genotypes and a table containing accession numbers, names, countries of origin and colors of cortex, phloem and xylem parts of taproots corresponding to each carrot genotype. CarrotDB will be continuously updated with new information. Database URL: http://apiaceae.njau.edu.cn/carrotdb/",2014-09-29 +27797406,The impact of fraudulent and irreproducible data to the translational research crisis - solutions and implementation.,"One of the aims of basic neuroscience research is ultimately the development of therapeutics to cure diseases. Funders granting money to research institutions increasingly express interest into how their financial resources are used and look for successful translation in clinical practice. Disappointingly, many findings that started out promising in basic research projects and phase I trials did not live up to the promise of therapeutic efficacy in later phase II or III trials. An inordinately high amount of time and money is thus spent on research that does not always have the required human impact. Potential reasons for these problems are numerous. Although research misconduct occurs and contributes to this shortcoming, it is not the only important factor. Frequently, basic science results turn out to be irreproducible. Irreproducibility, outside of malfeasance, is multifactorial and can include poor experimental design, conduct, statistical analysis, reporting standards, and conceptual flaws. Further confounding problems include an insufficient transferability of animal to human physiology, as well as intersubject group variability, for example, sexual dimorphisms. While the causes of poor data reproducibility are therefore numerous, equally there are many groups that can contribute to improvements in how basic science is reported. Here, we will review how the Journal of Neurochemistry can contribute to increasing the value of preclinical and translational research. Despite a vast amount of very promising basic research findings, these failed to successfully translate into the clinical practice so far. The reasons for this 'data reproducibility crisis' are numerous, for example, rooting in insufficient experimental design, conceptual flaws, incorrect statistical planning and evaluation, incomplete model system that do not adequately reproduce the human physiology, and further reasons discussed in this Review with the aim to present practical solutions that can be implemented by researchers, journals editors, and reviewers. We will also explain measures the Journal of Neurochemistry have implemented to overcome these issues and weaknesses in preclinical research. These includes adherence to the ARRIVE ( www.nc3rs.org) guidelines, NINDS standards (doi: 10.1038/nature11556), and The Transparency and Openness Promotion Guidelines (TOP) Committee guidelines (https://cos.io/top/#TOP). This article is part of the 60th Anniversary special issue.",2016-10-01 +23193266,The TissueNet database of human tissue protein-protein interactions.,"Knowledge of protein-protein interactions (PPIs) is important for identifying the functions of proteins and the processes they are involved in. Although data of human PPIs are easily accessible through several public databases, these databases do not specify the human tissues in which these PPIs take place. The TissueNet database of human tissue PPIs (http://netbio.bgu.ac.il/tissuenet/) associates each interaction with human tissues that express both pair mates. This was achieved by integrating current data of experimentally detected PPIs with extensive data of gene and protein expression across 16 main human tissues. Users can query TissueNet using a protein and retrieve its PPI partners per tissue, or using a PPI and retrieve the tissues expressing both pair mates. The graphical representation of the output highlights tissue-specific and tissue-wide PPIs. Thus, TissueNet provides a unique platform for assessing the roles of human proteins and their interactions across tissues.",2012-11-27 +24997640,Partial least squares based gene expression analysis in renal failure.,"

Background

Preventive and therapeutic options for renal failure are still limited. Gene expression profile analysis is powerful in the identification of biological differences between end stage renal failure patients and healthy controls. Previous studies mainly used variance/regression analysis without considering various biological, environmental factors. The purpose of this study is to investigate the gene expression difference between end stage renal failure patients and healthy controls with partial least squares (PLS) based analysis.

Methods

With gene expression data from the Gene Expression Omnibus database, we performed PLS analysis to identify differentially expressed genes. Enrichment and network analyses were also carried out to capture the molecular signatures of renal failure.

Results

We acquired 573 differentially expressed genes. Pathway and Gene Ontology items enrichment analysis revealed over-representation of dysregulated genes in various biological processes. Network analysis identified seven hub genes with degrees higher than 10, including CAND1, CDK2, TP53, SMURF1, YWHAE, SRSF1, and RELA. Proteins encoded by CDK2, TP53, and RELA have been associated with the progression of renal failure in previous studies.

Conclusions

Our findings shed light on expression character of renal failure patients with the hope to offer potential targets for future therapeutic studies.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1450799302127207.",2014-07-05 +23321019,Modeling of non-additive mixture properties using the Online CHEmical database and Modeling environment (OCHEM).,"The Online Chemical Modeling Environment (OCHEM, http://ochem.eu) is a web-based platform that provides tools for automation of typical steps necessary to create a predictive QSAR/QSPR model. The platform consists of two major subsystems: a database of experimental measurements and a modeling framework. So far, OCHEM has been limited to the processing of individual compounds. In this work, we extended OCHEM with a new ability to store and model properties of binary non-additive mixtures. The developed system is publicly accessible, meaning that any user on the Web can store new data for binary mixtures and develop models to predict their non-additive properties.The database already contains almost 10,000 data points for the density, bubble point, and azeotropic behavior of binary mixtures. For these data, we developed models for both qualitative (azeotrope/zeotrope) and quantitative endpoints (density and bubble points) using different learning methods and specially developed descriptors for mixtures. The prediction performance of the models was similar to or more accurate than results reported in previous studies. Thus, we have developed and made publicly available a powerful system for modeling mixtures of chemical compounds on the Web.",2013-01-15 +27597880,Functional networks inference from rule-based machine learning models.,"

Background

Functional networks play an important role in the analysis of biological processes and systems. The inference of these networks from high-throughput (-omics) data is an area of intense research. So far, the similarity-based inference paradigm (e.g. gene co-expression) has been the most popular approach. It assumes a functional relationship between genes which are expressed at similar levels across different samples. An alternative to this paradigm is the inference of relationships from the structure of machine learning models. These models are able to capture complex relationships between variables, that often are different/complementary to the similarity-based methods.

Results

We propose a protocol to infer functional networks from machine learning models, called FuNeL. It assumes, that genes used together within a rule-based machine learning model to classify the samples, might also be functionally related at a biological level. The protocol is first tested on synthetic datasets and then evaluated on a test suite of 8 real-world datasets related to human cancer. The networks inferred from the real-world data are compared against gene co-expression networks of equal size, generated with 3 different methods. The comparison is performed from two different points of view. We analyse the enriched biological terms in the set of network nodes and the relationships between known disease-associated genes in a context of the network topology. The comparison confirms both the biological relevance and the complementary character of the knowledge captured by the FuNeL networks in relation to similarity-based methods and demonstrates its potential to identify known disease associations as core elements of the network. Finally, using a prostate cancer dataset as a case study, we confirm that the biological knowledge captured by our method is relevant to the disease and consistent with the specialised literature and with an independent dataset not used in the inference process.

Availability

The implementation of our network inference protocol is available at: http://ico2s.org/software/funel.html.",2016-09-05 +26686274,"The Thuringian registry for bloodstream infections, antibiotic resistance and the practice of blood culture sampling--AlertsNet.","Evidence-based blood culture (BC) testing is of utmost importance for intensive care unit (ICU) patients suspected for sepsis. Knowledge of the aetiological agent and its susceptibility to anti-infective agents enables the clinician to initiate appropriate antimicrobial therapy and guides diagnostic procedures. This has been shown to reduce mortality, ICU stay and antibiotic overuse. Whereas microbiological laboratory practice has been highly standardised, shortfalls in pre-analytic procedures in the ICU have a significant effect on the diagnostic yield. Currently, surveillance data on BC practice lack hospital-, patient- and laboratory-based denominator data. Supporting information on differences in the clinical practice of BC testing, differences in the characteristics of the institution and the case-mix on specific wards, as well as differences in the availability of microbiological laboratories is demanded on a population basis. A population-based survey on BC practice has been established for the German Federal State of Thuringia connecting both hospitals and microbiological laboratories within an electronic registry for immediate enrolment of BC findings (AlertsNet; http://www.alertsnet.de). The registry includes microbiological results and clinical data as well as institutional variables (e.g. case severity indices) from all patients with clinically relevant positive BCs at the participating centres. The main objectives are to sustain and expand a population-based surveillance and warning system for the assessment of diagnosis, risk factors, treatment and outcomes of hospitalised patients and to improve outcomes of patients with bloodstream infections.",2015-12-01 +27935621,"Letter in reference to: ""Short-term effects of night shift work on breast cancer risk: a cohort study of payroll data"".","There are major flaws with the analyses in the Vistisen et al (1) cohort study examining if night shift work is a short-term risk factor for breast cancer. The crucial problem is the potential for exposure misclassification, which is very high. The authors' definition of day shift is ""≥3 hours of work between 06:00-20:00 hours"". This means that a worker on an 8-hour shift that begins at 03:00 hours would be classified as a day rather than night shift worker because he/she worked only two hours between 24:00-05:00 hours. Similarly, a second shifter might start work at 17:00 but not get off until 01:00 and yet still be classified as a ""day shift"" worker. This does not make sense as a baseline comparison group ""unexposed"" to work during the night hours. A sensible classification system would be to define ""day shift"" as any shift that begins after 07:00 and ends before 18:00 hours. This is straightforward and avoids all of the ambiguities inherent in the definition used by the authors. In addition, the authors claim that the ""inception population"" is less likely to have had past prior non-day work hours. However, this group has an average age of >35 years. It is inconceivable that all of these women were new graduates who started a public health sector job for the first time. Rather, the majority must surely have worked elsewhere for many years but then started in the regions covered only after 2006. This topic is too important, and this cohort too valuable, not to carefully define the baseline comparison group of ""day workers"" in a sensible manner. All the inferences rely crucially on this definition. The authors have the data to define the day-working baseline group in a way that avoids these obvious biases. That is why it is so frustrating that the authors chose to conduct the analyses as they did, with a highly flawed definition of ""day work"", when they could have done so much better. A highly flawed epidemiological report is worse than no report at all because it misleads the scientific community and the public. Reference 1. Vistisen HT, Garde AH, Frydenberg M, Christiansen P, Hansen ÅM, Hansen J, Bonde JPE, Kolstad HA. Short-term effects of night shift work on breast cancer risk: a cohort study of payroll data. Scand J Work Environ Health - online first. http://dx.doi.org/10.5271/sjweh.3603.",2016-12-09 +27175225,OmniSearch: a semantic search system based on the Ontology for MIcroRNA Target (OMIT) for microRNA-target gene interaction data.,"As a special class of non-coding RNAs (ncRNAs), microRNAs (miRNAs) perform important roles in numerous biological and pathological processes. The realization of miRNA functions depends largely on how miRNAs regulate specific target genes. It is therefore critical to identify, analyze, and cross-reference miRNA-target interactions to better explore and delineate miRNA functions. Semantic technologies can help in this regard. We previously developed a miRNA domain-specific application ontology, Ontology for MIcroRNA Target (OMIT), whose goal was to serve as a foundation for semantic annotation, data integration, and semantic search in the miRNA field. In this paper we describe our continuing effort to develop the OMIT, and demonstrate its use within a semantic search system, OmniSearch, designed to facilitate knowledge capture of miRNA-target interaction data. Important changes in the current version OMIT are summarized as: (1) following a modularized ontology design (with 2559 terms imported from the NCRO ontology); (2) encoding all 1884 human miRNAs (vs. 300 in previous versions); and (3) setting up a GitHub project site along with an issue tracker for more effective community collaboration on the ontology development. The OMIT ontology is free and open to all users, accessible at: http://purl.obolibrary.org/obo/omit.owl. The OmniSearch system is also free and open to all users, accessible at: http://omnisearch.soc.southalabama.edu/index.php/Software.",2016-05-10 +23737449,antiSMASH 2.0--a versatile platform for genome mining of secondary metabolite producers.,"Microbial secondary metabolites are a potent source of antibiotics and other pharmaceuticals. Genome mining of their biosynthetic gene clusters has become a key method to accelerate their identification and characterization. In 2011, we developed antiSMASH, a web-based analysis platform that automates this process. Here, we present the highly improved antiSMASH 2.0 release, available at http://antismash.secondarymetabolites.org/. For the new version, antiSMASH was entirely re-designed using a plug-and-play concept that allows easy integration of novel predictor or output modules. antiSMASH 2.0 now supports input of multiple related sequences simultaneously (multi-FASTA/GenBank/EMBL), which allows the analysis of draft genomes comprising multiple contigs. Moreover, direct analysis of protein sequences is now possible. antiSMASH 2.0 has also been equipped with the capacity to detect additional classes of secondary metabolites, including oligosaccharide antibiotics, phenazines, thiopeptides, homo-serine lactones, phosphonates and furans. The algorithm for predicting the core structure of the cluster end product is now also covering lantipeptides, in addition to polyketides and non-ribosomal peptides. The antiSMASH ClusterBlast functionality has been extended to identify sub-clusters involved in the biosynthesis of specific chemical building blocks. The new features currently make antiSMASH 2.0 the most comprehensive resource for identifying and analyzing novel secondary metabolite biosynthetic pathways in microorganisms.",2013-06-03 +24932005,Using association rule mining to determine promising secondary phenotyping hypotheses.,"

Motivation

Large-scale phenotyping projects such as the Sanger Mouse Genetics project are ongoing efforts to help identify the influences of genes and their modification on phenotypes. Gene-phenotype relations are crucial to the improvement of our understanding of human heritable diseases as well as the development of drugs. However, given that there are ∼: 20 000 genes in higher vertebrate genomes and the experimental verification of gene-phenotype relations requires a lot of resources, methods are needed that determine good candidates for testing.

Results

In this study, we applied an association rule mining approach to the identification of promising secondary phenotype candidates. The predictions rely on a large gene-phenotype annotation set that is used to find occurrence patterns of phenotypes. Applying an association rule mining approach, we could identify 1967 secondary phenotype hypotheses that cover 244 genes and 136 phenotypes. Using two automated and one manual evaluation strategies, we demonstrate that the secondary phenotype candidates possess biological relevance to the genes they are predicted for. From the results we conclude that the predicted secondary phenotypes constitute good candidates to be experimentally tested and confirmed.

Availability

The secondary phenotype candidates can be browsed through at http://www.sanger.ac.uk/resources/databases/phenodigm/gene/secondaryphenotype/list.

Contact

ao5@sanger.ac.uk or ds5@sanger.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-01 +28723463,The Global Prevalence of Infections in Urology (GPUI) Study: A Worldwide Surveillance Study in Urology Patients.,"The GPIU study is currently the only study registering health care-associated urogenital tract infections, especially in urology patients, in an ongoing surveillance protocol that can help to deliver data on adequate empirical antibiotic therapy in hospitalised urology patients according to guideline recommendations. The annual GPIU study will continue to be performed in November of each year under the URL http://gpiu.esiu.org/.",2016-04-01 +26170239,BatAlign: an incremental method for accurate alignment of sequencing reads.,"Structural variations (SVs) play a crucial role in genetic diversity. However, the alignments of reads near/across SVs are made inaccurate by the presence of polymorphisms. BatAlign is an algorithm that integrated two strategies called 'Reverse-Alignment' and 'Deep-Scan' to improve the accuracy of read-alignment. In our experiments, BatAlign was able to obtain the highest F-measures in read-alignments on mismatch-aberrant, indel-aberrant, concordantly/discordantly paired and SV-spanning data sets. On real data, the alignments of BatAlign were able to recover 4.3% more PCR-validated SVs with 73.3% less callings. These suggest BatAlign to be effective in detecting SVs and other polymorphic-variants accurately using high-throughput data. BatAlign is publicly available at https://goo.gl/a6phxB.",2015-07-13 +27497442,A novel copy number variants kernel association test with application to autism spectrum disorders studies.,"

Motivation

Copy number variants (CNVs) have been implicated in a variety of neurodevelopmental disorders, including autism spectrum disorders, intellectual disability and schizophrenia. Recent advances in high-throughput genomic technologies have enabled rapid discovery of many genetic variants including CNVs. As a result, there is increasing interest in studying the role of CNVs in the etiology of many complex diseases. Despite the availability of an unprecedented wealth of CNV data, methods for testing association between CNVs and disease-related traits are still under-developed due to the low prevalence and complicated multi-scale features of CNVs.

Results

We propose a novel CNV kernel association test (CKAT) in this paper. To address the low prevalence, CNVs are first grouped into CNV regions (CNVR). Then, taking into account the multi-scale features of CNVs, we first design a single-CNV kernel which summarizes the similarity between two CNVs, and next aggregate the single-CNV kernel to a CNVR kernel which summarizes the similarity between two CNVRs. Finally, association between CNVR and disease-related traits is assessed by comparing the kernel-based similarity with the similarity in the trait using a score test for variance components in a random effect model. We illustrate the proposed CKAT using simulations and show that CKAT is more powerful than existing methods, while always being able to control the type I error. We also apply CKAT to a real dataset examining the association between CNV and autism spectrum disorders, which demonstrates the potential usefulness of the proposed method.

Availability and implementation

A R package to implement the proposed CKAT method is available at http://works.bepress.com/debashis_ghosh/ CONTACTS: xzhan@fhcrc.org or debashis.ghosh@ucdenver.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-06 +26714661,"Treelink: data integration, clustering and visualization of phylogenetic trees.","

Background

Phylogenetic trees are central to a wide range of biological studies. In many of these studies, tree nodes need to be associated with a variety of attributes. For example, in studies concerned with viral relationships, tree nodes are associated with epidemiological information, such as location, age and subtype. Gene trees used in comparative genomics are usually linked with taxonomic information, such as functional annotations and events. A wide variety of tree visualization and annotation tools have been developed in the past, however none of them are intended for an integrative and comparative analysis.

Results

Treelink is a platform-independent software for linking datasets and sequence files to phylogenetic trees. The application allows an automated integration of datasets to trees for operations such as classifying a tree based on a field or showing the distribution of selected data attributes in branches and leafs. Genomic and proteonomic sequences can also be linked to the tree and extracted from internal and external nodes. A novel clustering algorithm to simplify trees and display the most divergent clades was also developed, where validation can be achieved using the data integration and classification function. Integrated geographical information allows ancestral character reconstruction for phylogeographic plotting based on parsimony and likelihood algorithms.

Conclusion

Our software can successfully integrate phylogenetic trees with different data sources, and perform operations to differentiate and visualize those differences within a tree. File support includes the most popular formats such as newick and csv. Exporting visualizations as images, cluster outputs and genomic sequences is supported. Treelink is available as a web and desktop application at http://www.treelinkapp.com .",2015-12-29 +26072510,Reconstruction of clonal trees and tumor composition from multi-sample sequencing data.,"

Motivation

DNA sequencing of multiple samples from the same tumor provides data to analyze the process of clonal evolution in the population of cells that give rise to a tumor.

Results

We formalize the problem of reconstructing the clonal evolution of a tumor using single-nucleotide mutations as the variant allele frequency (VAF) factorization problem. We derive a combinatorial characterization of the solutions to this problem and show that the problem is NP-complete. We derive an integer linear programming solution to the VAF factorization problem in the case of error-free data and extend this solution to real data with a probabilistic model for errors. The resulting AncesTree algorithm is better able to identify ancestral relationships between individual mutations than existing approaches, particularly in ultra-deep sequencing data when high read counts for mutations yield high confidence VAFs.

Availability and implementation

An implementation of AncesTree is available at: http://compbio.cs.brown.edu/software.",2015-06-01 +23314754,"STIFDB2: an updated version of plant stress-responsive transcription factor database with additional stress signals, stress-responsive transcription factor binding sites and stress-responsive genes in Arabidopsis and rice.","Understanding the principles of abiotic and biotic stress responses, tolerance and adaptation remains important in plant physiology research to develop better varieties of crop plants. Better understanding of plant stress response mechanisms and application of knowledge derived from integrated experimental and bioinformatics approaches are gaining importance. Earlier, we showed that compiling a database of stress-responsive transcription factors and their corresponding target binding sites in the form of Hidden Markov models at promoter, untranslated and upstream regions of stress-up-regulated genes from expression analysis can help in elucidating various aspects of the stress response in Arabidopsis. In addition to the extensive content in the first version, STIFDB2 is now updated with 15 stress signals, 31 transcription factors and 5,984 stress-responsive genes from three species (Arabidopsis thaliana, Oryza sativa subsp. japonica and Oryza sativa subsp. indica). We have employed an integrated biocuration and genomic data mining approach to characterize the data set of transcription factors and consensus binding sites from literature mining and stress-responsive genes from the Gene Expression Omnibus. STIFDB2 currently has 38,798 associations of stress signals, stress-responsive genes and transcription factor binding sites predicted using the Stress-responsive Transcription Factor (STIF) algorithm, along with various functional annotation data. As a unique plant stress regulatory genomics data platform, STIFDB2 can be utilized for targeted as well as high-throughput experimental and computational studies to unravel principles of the stress regulome in dicots and gramineae. STIFDB2 is available from the URL: http://caps.ncbs.res.in/stifdb2.",2013-01-10 +24840658,Accessibility and quality of online information for pediatric orthopaedic surgery fellowships.,"

Background

Pediatric orthopaedic fellowship applicants commonly use online-based resources for information on potential programs. Two primary sources are the San Francisco Match (SF Match) database and the Pediatric Orthopaedic Society of North America (POSNA) database. We sought to determine the accessibility and quality of information that could be obtained by using these 2 sources.

Methods

The online databases of the SF Match and POSNA were reviewed to determine the availability of embedded program links or external links for the included programs. If not available in the SF Match or POSNA data, Web sites for listed programs were located with a Google search. All identified Web sites were analyzed for accessibility, content volume, and content quality.

Results

At the time of online review, 50 programs, offering 68 positions, were listed in the SF Match database. Although 46 programs had links included with their information, 36 (72%) of them simply listed http://www.sfmatch.org as their unique Web site. Ten programs (20%) had external links listed, but only 2 (4%) linked directly to the fellowship web page. The POSNA database does not list any links to the 47 programs it lists, which offer 70 positions. On the basis of a Google search of the 50 programs listed in the SF Match database, web pages were found for 35. Of programs with independent web pages, all had a description of the program and 26 (74%) described their application process. Twenty-nine (83%) listed research requirements, 22 (63%) described the rotation schedule, and 12 (34%) discussed the on-call expectations. A contact telephone number and/or email address was provided by 97% of programs. Twenty (57%) listed both the coordinator and fellowship director, 9 (26%) listed the coordinator only, 5 (14%) listed the fellowship director only, and 1 (3%) had no contact information given.

Conclusions

The SF Match and POSNA databases provide few direct links to fellowship Web sites, and individual program Web sites either do not exist or do not effectively convey information about the programs.

Clinical relevance

Improved accessibility and accurate information online would allow potential applicants to obtain information about pediatric fellowships in a more efficient manner.",2014-12-01 +24997741,Current treatment of psoriatic arthritis: update based on a systematic literature review to establish French Society for Rheumatology (SFR) recommendations for managing spondyloarthritis.,"OBJECTIVE:The latest recommendations on managing psoriatic arthritis (PsA) were issued in 2007 by the French Society for Rheumatology (SFR) and in 2012 by the European League against Rheumatism (EULAR). A panel of spondyloarthritis experts developed new recommendations on the management of spondyloarthritides, including PsA, based on a literature review and expert opinion. METHODS:The relevant literature published between December 1, 2009 and March 31, 2013 was reviewed by searching Medline; Embase; the Cochrane database; abstracts from meetings held by the SFR, EULAR, and American College of Rheumatology (ACR) between 2010 and 2012; and the therapeutic trials registered on http://www.clinicaltrials.gov. RESULTS:No studies assessed nonsteroidal anti-inflammatory drugs or glucocorticoids (given systemically or intraarticularly) in PsA. The efficacy of methotrexate was evaluated versus a placebo in the randomized MIPA trial. TNFα antagonists (the soluble receptor etanercept, chimeric monoclonal antibody [mAb] infliximab, humanized mAbs adalimumab and golimumab, and PEGylated mAb certolizumab) are the reference-standard biotherapies in PsA. The treat-to-target approach should be used, with the target being a remission or minimal disease activity. Registry data leave room for controversy about the potential benefits of combining methotrexate and a TNFα antagonist. Switching to an alternative TNFα antagonist when the first drug fails is effective, although the initial response and drug continuation rate may be decreased. New drugs such as apremilast and ustekinumab are being developed. CONCLUSION:This systematic literature review allowed the development of new SFR recommendations on the treatment of PsA.",2014-07-02 +27801919,The Role of the State Health Laboratories in Advancing Health Equity.,"While laboratories play an important and recognized role in many public health programs that require surveillance of disease spread or monitoring of environmental conditions, the role of public laboratories in assessing and advancing health equity is not well understood. Yet, public laboratories collect, provide or generate much of the data used to determine health equity status and monitor heath equity trends in multiple settings and disciplines. RI State Health Laboratories, a division of the RI Department of Health, operates programs that help measure and address health disparities. Health equity themes are present in laboratory programs that measure environmental determinants of health and assure equal access to laboratory screening and diagnostic services. This article will review the role of laboratory programs in advancing health equity in the state. Specific examples of laboratory contributions to health equity programs will be provided and examined. Future trends and unmet needs will also be discussed. [Full article available at http://rimed.org/rimedicaljournal-2016-11.asp].",2016-11-01 +27385285,Effects of Neonicotinoid Pesticide Exposure on Human Health: A Systematic Review.,"

Background

Numerous studies have identified detectable levels of neonicotinoids (neonics) in the environment, adverse effects of neonics in many species, including mammals, and pathways through which human exposure to neonics could occur, yet little is known about the human health effects of neonic exposure.

Objective

In this systematic review, we sought to identify human population studies on the health effects of neonics.

Methods

Studies published in English between 2005 and 2015 were searched using PubMed, Scopus, and Web of Science databases. No restrictions were placed on the type of health outcome assessed. Risk of bias was assessed using guidance developed by the National Toxicology Program's Office of Health Assessment and Translation.

Results

Eight studies investigating the human health effects of exposure to neonics were identified. Four examined acute exposure: Three neonic poisoning studies reported two fatalities (n = 1,280 cases) and an occupational exposure study of 19 forestry workers reported no adverse effects. Four general population studies reported associations between chronic neonic exposure and adverse developmental or neurological outcomes, including tetralogy of Fallot (AOR 2.4, 95% CI: 1.1, 5.4), anencephaly (AOR 2.9, 95% CI: 1.0, 8.2), autism spectrum disorder [AOR 1.3, 95% credible interval (CrI): 0.78, 2.2], and a symptom cluster including memory loss and finger tremor (OR 14, 95% CI: 3.5, 57). Reported odds ratios were based on exposed compared to unexposed groups.

Conclusions

The studies conducted to date were limited in number with suggestive but methodologically weak findings related to chronic exposure. Given the wide-scale use of neonics, more studies are needed to fully understand their effects on human health. Citation: Cimino AM, Boyles AL, Thayer KA, Perry MJ. 2017. Effects of neonicotinoid pesticide exposure on human health: a systematic review. Environ Health Perspect 125:155-162; http://dx.doi.org/10.1289/EHP515.",2016-07-06 +27389614,"Ebola Surveillance - Guinea, Liberia, and Sierra Leone.","Developing a surveillance system during a public health emergency is always challenging but is especially so in countries with limited public health infrastructure. Surveillance for Ebola virus disease (Ebola) in the West African countries heavily affected by Ebola (Guinea, Liberia, and Sierra Leone) faced numerous impediments, including insufficient numbers of trained staff, community reticence to report cases and contacts, limited information technology resources, limited telephone and Internet service, and overwhelming numbers of infected persons. Through the work of CDC and numerous partners, including the countries' ministries of health, the World Health Organization, and other government and nongovernment organizations, functional Ebola surveillance was established and maintained in these countries. CDC staff were heavily involved in implementing case-based surveillance systems, sustaining case surveillance and contact tracing, and interpreting surveillance data. In addition to helping the ministries of health and other partners understand and manage the epidemic, CDC's activities strengthened epidemiologic and data management capacity to improve routine surveillance in the countries affected, even after the Ebola epidemic ended, and enhanced local capacity to respond quickly to future public health emergencies. However, the many obstacles overcome during development of these Ebola surveillance systems highlight the need to have strong public health, surveillance, and information technology infrastructure in place before a public health emergency occurs. Intense, long-term focus on strengthening public health surveillance systems in developing countries, as described in the Global Health Security Agenda, is needed.The activities summarized in this report would not have been possible without collaboration with many U.S and international partners (http://www.cdc.gov/vhf/ebola/outbreaks/2014-west-africa/partners.html).",2016-07-08 +27153672,RCP: a novel probe design bias correction method for Illumina Methylation BeadChip.,"

Motivation

The Illumina HumanMethylation450 BeadChip has been extensively utilized in epigenome-wide association studies. This array and its successor, the MethylationEPIC array, use two types of probes-Infinium I (type I) and Infinium II (type II)-in order to increase genome coverage but differences in probe chemistries result in different type I and II distributions of methylation values. Ignoring the difference in distributions between the two probe types may bias downstream analysis.

Results

Here, we developed a novel method, called Regression on Correlated Probes (RCP), which uses the existing correlation between pairs of nearby type I and II probes to adjust the beta values of all type II probes. We evaluate the effect of this adjustment on reducing probe design type bias, reducing technical variation in duplicate samples, improving accuracy of measurements against known standards, and retention of biological signal. We find that RCP is statistically significantly better than unadjusted data or adjustment with alternative methods including SWAN and BMIQ.

Availability

We incorporated the method into the R package ENmix, which is freely available from the Bioconductor website (https://www.bioconductor.org/packages/release/bioc/html/ENmix.html).

Contact

niulg@ucmail.uc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-05 +26778510,An evaluation of the accuracy and speed of metagenome analysis tools.,"Metagenome studies are becoming increasingly widespread, yielding important insights into microbial communities covering diverse environments from terrestrial and aquatic ecosystems to human skin and gut. With the advent of high-throughput sequencing platforms, the use of large scale shotgun sequencing approaches is now commonplace. However, a thorough independent benchmark comparing state-of-the-art metagenome analysis tools is lacking. Here, we present a benchmark where the most widely used tools are tested on complex, realistic data sets. Our results clearly show that the most widely used tools are not necessarily the most accurate, that the most accurate tool is not necessarily the most time consuming, and that there is a high degree of variability between available tools. These findings are important as the conclusions of any metagenomics study are affected by errors in the predicted community composition and functional capacity. Data sets and results are freely available from http://www.ucbioinformatics.org/metabenchmark.html.",2016-01-18 +27998936,CellSort: a support vector machine tool for optimizing fluorescence-activated cell sorting and reducing experimental effort.,"

Motivation

High throughput screening by fluorescence activated cell sorting (FACS) is a common task in protein engineering and directed evolution. It can also be a rate-limiting step if high false positive or negative rates necessitate multiple rounds of enrichment. Current FACS software requires the user to define sorting gates by intuition and is practically limited to two dimensions. In cases when multiple rounds of enrichment are required, the software cannot forecast the enrichment effort required.

Results

We have developed CellSort, a support vector machine (SVM) algorithm that identifies optimal sorting gates based on machine learning using positive and negative control populations. CellSort can take advantage of more than two dimensions to enhance the ability to distinguish between populations. We also present a Bayesian approach to predict the number of sorting rounds required to enrich a population from a given library size. This Bayesian approach allowed us to determine strategies for biasing the sorting gates in order to reduce the required number of enrichment rounds. This algorithm should be generally useful for improve sorting outcomes and reducing effort when using FACS.

Availability and implementation

Source code available at http://tyolab.northwestern.edu/tools/ . k-tyo@northwestern.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +26568631,CHRONOS: a time-varying method for microRNA-mediated subpathway enrichment analysis.,"

Motivation

In the era of network medicine and the rapid growth of paired time series mRNA/microRNA expression experiments, there is an urgent need for pathway enrichment analysis methods able to capture the time- and condition-specific 'active parts' of the biological circuitry as well as the microRNA impact. Current methods ignore the multiple dynamical 'themes'-in the form of enriched biologically relevant microRNA-mediated subpathways-that determine the functionality of signaling networks across time.

Results

To address these challenges, we developed time-vaRying enriCHment integrOmics Subpathway aNalysis tOol (CHRONOS) by integrating time series mRNA/microRNA expression data with KEGG pathway maps and microRNA-target interactions. Specifically, microRNA-mediated subpathway topologies are extracted and evaluated based on the temporal transition and the fold change activity of the linked genes/microRNAs. Further, we provide measures that capture the structural and functional features of subpathways in relation to the complete organism pathway atlas. Our application to synthetic and real data shows that CHRONOS outperforms current subpathway-based methods into unraveling the inherent dynamic properties of pathways.

Availability and implementation

CHRONOS is freely available at http://biosignal.med.upatras.gr/chronos/

Contact

tassos.bezerianos@nus.edu.sg

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-14 +22823407,Kinetic modelling of central carbon metabolism in Escherichia coli.,"

Unlabelled

In the present study, we developed a detailed kinetic model of Escherichia coli central carbon metabolism. The main model assumptions were based on the results of metabolic and regulatory reconstruction of the system and thorough model verification with experimental data. The development and verification of the model included several stages, which allowed us to take into account both in vitro and in vivo experimental data and avoid the ambiguity that frequently occurs in detailed models of biochemical pathways. The choice of the level of detail for the mathematical description of enzymatic reaction rates and the evaluation of parameter values were based on available published data. Validation of the complete model of the metabolic pathway describing specific physiological states was based on fluxomics and metabolomics data. In particular, we developed a model that describes aerobic growth of E. coli in continuous culture with a limiting concentration of glucose. Such modification of the model was used to integrate experimental metabolomics data obtained in steady-state conditions for wild-type E. coli and genetically modified strains, e.g. knockout of the pyruvate kinase gene (pykA). Following analysis of the model behaviour, and comparison of the coincidence between predicted and experimental data, it was possible to investigate the functional and regulatory properties of E. coli central carbon metabolism. For example, a novel metabolic regulatory mechanism for 6-phosphogluconate dehydrogenase inhibition by phosphoenolpyruvate was hypothesized, and the flux ratios between the reactions catalysed by enzyme isoforms were predicted.

Database

The mathematical model described here has been submitted to the JWS Online Cellular Systems Modelling Database and can be accessed at http://jjj.biochem.sun.ac.za/database/peskov/index.html",2012-09-01 +23314752,UniVIO: a multiple omics database with hormonome and transcriptome data from rice.,"Plant hormones play important roles as signaling molecules in the regulation of growth and development by controlling the expression of downstream genes. Since the hormone signaling system represents a complex network involving functional cross-talk through the mutual regulation of signaling and metabolism, a comprehensive and integrative analysis of plant hormone concentrations and gene expression is important for a deeper understanding of hormone actions. We have developed a database named Uniformed Viewer for Integrated Omics (UniVIO: http://univio.psc.riken.jp/), which displays hormone-metabolome (hormonome) and transcriptome data in a single formatted (uniformed) heat map. At the present time, hormonome and transcriptome data obtained from 14 organ parts of rice plants at the reproductive stage and seedling shoots of three gibberellin signaling mutants are included in the database. The hormone concentration and gene expression data can be searched by substance name, probe ID, gene locus ID or gene description. A correlation search function has been implemented to enable users to obtain information of correlated substance accumulation and gene expression. In the correlation search, calculation method, range of correlation coefficient and plant samples can be selected freely.",2013-01-10 +28062441,ESA-UbiSite: accurate prediction of human ubiquitination sites by identifying a set of effective negatives.,"

Motivation

Numerous ubiquitination sites remain undiscovered because of the limitations of mass spectrometry-based methods. Existing prediction methods use randomly selected non-validated sites as non-ubiquitination sites to train ubiquitination site prediction models.

Results

We propose an evolutionary screening algorithm (ESA) to select effective negatives among non-validated sites and an ESA-based prediction method, ESA-UbiSite, to identify human ubiquitination sites. The ESA selects non-validated sites least likely to be ubiquitination sites as training negatives. Moreover, the ESA and ESA-UbiSite use a set of well-selected physicochemical properties together with a support vector machine for accurate prediction. Experimental results show that ESA-UbiSite with effective negatives achieved 0.92 test accuracy and a Matthews's correlation coefficient of 0.48, better than existing prediction methods. The ESA increased ESA-UbiSite's test accuracy from 0.75 to 0.92 and can improve other post-translational modification site prediction methods.

Availability and implementation

An ESA-UbiSite-based web server has been established at http://iclab.life.nctu.edu.tw/iclab_webtools/ESAUbiSite/ .

Contact

syho@mail.nctu.edu.tw.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +28190219,Temporal changes of microRNA gga-let-7b and gga-let-7i expression in chickens challenged with subgroup J avian leukosis virus.,"Two important microRNAs, gga-let-7b and gga-let-7i were examined for the relative expression in liver and bone marrow tissues from specific pathogen free chickens that were challenged either with GD1109 or NX0101 strain of subgroup J avian leukosis virus (ALV-J). The GD1109 strain of ALV-J reportedly causes hemangioma (HE) and NX0101 reportedly causes myeloma (ML) in susceptible chickens. Temporal changes of both gga-let-7b and gga-let-7i expression in ALV-J infected chickens were observed in contrast to its counterpart of a non-infected negative control group of chickens (P < 0.05 or P < 0.01) during the first 120 days post infection. Use of the web-based computational DIANA-mirPath software (available at http://microrna.gr/mirpath ), it was predicted that both gga-let-7b and gga-let-7i were involved in multiple pathways including signaling pathways, such as MAPK, TGF-beta, Notch, Wnt, mTOR, Cell cycle, P53 and Jak-STAT. Combining our experimental data with reports on the microRNAs, we suggest that both gga-let-7i and gga-let-7b may also act as tumor suppressors in chicken, especially play a critical role in tumorigenesis induced by ALV-J.",2017-02-11 +27381294,"After All, Only Millions? ","An update on the census of species of Archaea and Bacteria published recently in mBio (P. D. Schloss, R. A. Girard, T. Martin, J. Edwards, and J. C. Thrash, mBio 7:e00201-16, 2016, http://dx.doi.org/10.1128/mBio.00201-16) showed again that, despite ever-increasing sequencing efforts, the PCR-based retrieval of 16S rRNA genes is approaching saturation. On average, 95% of the genes analyzed today are identical to those present in public databases, with rarefaction analysis indicating that about one-third of the bacterial and archaeal diversity has already been covered. Therefore, despite estimates of up to 10(12) microbial species, the option should be considered that the census of Archaea and Bacteria on planet Earth might yield only millions of species after all.",2016-07-05 +27587688,AUCpreD: proteome-level protein disorder prediction by AUC-maximized deep convolutional neural fields.,"

Motivation

Protein intrinsically disordered regions (IDRs) play an important role in many biological processes. Two key properties of IDRs are (i) the occurrence is proteome-wide and (ii) the ratio of disordered residues is about 6%, which makes it challenging to accurately predict IDRs. Most IDR prediction methods use sequence profile to improve accuracy, which prevents its application to proteome-wide prediction since it is time-consuming to generate sequence profiles. On the other hand, the methods without using sequence profile fare much worse than using sequence profile.

Method

This article formulates IDR prediction as a sequence labeling problem and employs a new machine learning method called Deep Convolutional Neural Fields (DeepCNF) to solve it. DeepCNF is an integration of deep convolutional neural networks (DCNN) and conditional random fields (CRF); it can model not only complex sequence-structure relationship in a hierarchical manner, but also correlation among adjacent residues. To deal with highly imbalanced order/disorder ratio, instead of training DeepCNF by widely used maximum-likelihood, we develop a novel approach to train it by maximizing area under the ROC curve (AUC), which is an unbiased measure for class-imbalanced data.

Results

Our experimental results show that our IDR prediction method AUCpreD outperforms existing popular disorder predictors. More importantly, AUCpreD works very well even without sequence profile, comparing favorably to or even outperforming many methods using sequence profile. Therefore, our method works for proteome-wide disorder prediction while yielding similar or better accuracy than the others.

Availability and implementation

http://raptorx2.uchicago.edu/StructurePropertyPred/predict/

Contact

wangsheng@uchicago.edu, jinboxu@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +23161675,ChIPBase: a database for decoding the transcriptional regulation of long non-coding RNA and microRNA genes from ChIP-Seq data.,"Long non-coding RNAs (lncRNAs) and microRNAs (miRNAs) represent two classes of important non-coding RNAs in eukaryotes. Although these non-coding RNAs have been implicated in organismal development and in various human diseases, surprisingly little is known about their transcriptional regulation. Recent advances in chromatin immunoprecipitation with next-generation DNA sequencing (ChIP-Seq) have provided methods of detecting transcription factor binding sites (TFBSs) with unprecedented sensitivity. In this study, we describe ChIPBase (http://deepbase.sysu.edu.cn/chipbase/), a novel database that we have developed to facilitate the comprehensive annotation and discovery of transcription factor binding maps and transcriptional regulatory relationships of lncRNAs and miRNAs from ChIP-Seq data. The current release of ChIPBase includes high-throughput sequencing data that were generated by 543 ChIP-Seq experiments in diverse tissues and cell lines from six organisms. By analysing millions of TFBSs, we identified tens of thousands of TF-lncRNA and TF-miRNA regulatory relationships. Furthermore, two web-based servers were developed to annotate and discover transcriptional regulatory relationships of lncRNAs and miRNAs from ChIP-Seq data. In addition, we developed two genome browsers, deepView and genomeView, to provide integrated views of multidimensional data. Moreover, our web implementation supports diverse query types and the exploration of TFs, lncRNAs, miRNAs, gene ontologies and pathways.",2012-11-17 +25863133,PlantMirnaT: miRNA and mRNA integrated analysis fully utilizing characteristics of plant sequencing data.,"miRNA is known to regulate up to several hundreds coding genes, thus the integrated analysis of miRNA and mRNA expression data is an important problem. Unfortunately, the integrated analysis is challenging since it needs to consider expression data of two different types, miRNA and mRNA, and target relationship between miRNA and mRNA is not clear, especially when microarray data is used. Fortunately, due to the low sequencing cost, small RNA and RNA sequencing are routinely processed and we may be able to infer regulation relationships between miRNAs and mRNAs more accurately by using sequencing data. However, no method is developed specifically for sequencing data. Thus we developed PlantMirnaT, a new miRNA-mRNA integrated analysis system. To fully leverage the power of sequencing data, three major features are developed and implemented in PlantMirnaT. First, we implemented a plant-specific short read mapping tool based on recent discoveries on miRNA target relationship in plant. Second, we designed and implemented an algorithm considering miRNA targets in the full intragenic region, not just 3' UTR. Lastly but most importantly, our algorithm is designed to consider quantity of miRNA expression and its distribution on target mRNAs. The new algorithm was used to characterize rice under drought condition using our proprietary data. Our algorithm successfully discovered that two miRNAs, miRNA1425-5p, miRNA 398b, that are involved in suppression of glucose pathway in a naturally drought resistant rice, Vandana. The system can be downloaded at https://sites.google.com/site/biohealthinformaticslab/resources.",2015-04-08 +27993780,RBPPred: predicting RNA-binding proteins from sequence using SVM.,"

Motivation

Detection of RNA-binding proteins (RBPs) is essential since the RNA-binding proteins play critical roles in post-transcriptional regulation and have diverse roles in various biological processes. Moreover, identifying RBPs by computational prediction is much more efficient than experimental methods and may have guiding significance on the experiment design.

Results

In this study, we present the RBPPred (an RNA-binding protein predictor), a new method based on the support vector machine, to predict whether a protein binds RNAs, based on a comprehensive feature representation. By integrating the physicochemical properties with the evolutionary information of protein sequences, the new approach RBPPred performed much better than state-of-the-art methods. The results show that RBPPred correctly predicted 83% of 2780 RBPs and 96% out of 7093 non-RBPs with MCC of 0.808 using the 10-fold cross validation. Furthermore, we achieved a sensitivity of 84%, specificity of 97% and MCC of 0.788 on the testing set of human proteome. In addition we tested the capability of RBPPred to identify new RBPs, which further confirmed the practicability and predictability of the method.

Availability and implementation

RBPPred program can be accessed at: http://rnabinding.com/RBPPred.html .

Contact

liushiyong@gmail.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +28724643,A review of the characteristics of dietary fibers relevant to appetite and energy intake outcomes in human intervention trials.,"Background: Many intervention studies have tested the effect of dietary fibers (DFs) on appetite-related outcomes, with inconsistent results. However, DFs comprise a wide range of compounds with diverse properties, and the specific contribution of these to appetite control is not well characterized.Objective: The influence of specific DF characteristics [i.e., viscosity, gel-forming capacity, fermentability, or molecular weight (MW)] on appetite-related outcomes was assessed in healthy humans.Design: Controlled human intervention trials that tested the effects of well-characterized DFs on appetite ratings or energy intake were identified from a systematic search of literature. Studies were included only if they reported 1) DF name and origin and 2) data on viscosity, gelling properties, fermentability, or MW of the DF materials or DF-containing matrixes.Results: A high proportion of the potentially relevant literature was excluded because of lack of adequate DF characterization. In total, 49 articles that met these criteria were identified, which reported 90 comparisons of various DFs in foods, beverages, or supplements in acute or sustained-exposure trials. In 51 of the 90 comparisons, the DF-containing material of interest was efficacious for ≥1 appetite-related outcome. Reported differences in material viscosity, MW, or fermentability did not clearly correspond to differences in efficacy, whereas gel-forming DF sources were consistently efficacious (but with very few comparisons).Conclusions: The overall inconsistent relations of DF properties with respect to efficacy may reflect variation in measurement methodology, nature of the DF preparation and matrix, and study designs. Methods of DF characterization, incorporation, and study design are too inconsistent to allow generalized conclusions about the effects of DF properties on appetite and preclude the development of reliable, predictive, structure-function relations. Improved standards for characterization and reporting of DF sources and DF-containing materials are strongly recommended for future studies on the effects of DF on human physiology. This trial was registered at http://www.crd.york.ac.uk/PROSPERO as CRD42015015336.",2017-07-19 +28572074,Characterization of Variability in Toxicokinetics and Toxicodynamics of Tetrachloroethylene Using the Collaborative Cross Mouse Population.,"

Background

Evaluation of interindividual variability is a challenging step in risk assessment. For most environmental pollutants, including perchloroethylene (PERC), experimental data are lacking, resulting in default assumptions being used to account for variability in toxicokinetics and toxicodynamics.

Objective

We quantitatively examined the relationship between PERC toxicokinetics and toxicodynamics at the population level to test whether individuals with increased oxidative metabolism are be more sensitive to hepatotoxicity following PERC exposure.

Methods

Male mice from 45 strains of the Collaborative Cross (CC) were orally administered a single dose of PERC (1,000 mg/kg) or vehicle (Alkamuls-EL620) and euthanized at various time points (n = 1/strain/time). Concentration–time profiles were generated for PERC and its primary oxidative metabolite trichloroacetate (TCA) in multiple tissues. Toxicodynamic phenotyping was also performed.

Results

Significant variability among strains was observed in toxicokinetics of PERC and TCA in every tissue examined. Based on area under the curve (AUC), the range of liver TCA levels spanned nearly an order of magnitude (~8-fold). Expression of liver cytochrome P4502E1 did not correlate with TCA levels. Toxicodynamic phenotyping revealed an effect of PERC on bodyweight loss, induction of peroxisome proliferator activated receptor-alpha (PPARα)-regulated genes, and dysregulation of hepatic lipid homeostasis. Clustering was observed among a) liver levels of PERC, TCA, and triglycerides; b) TCA levels in liver and kidney; and c) TCA levels in serum, brain, fat, and lung.

Conclusions

Using the CC mouse population model, we have demonstrated a complex and highly variable relationship between PERC and TCA toxicokinetics and toxicodynamics at the population level. https://doi.org/10.1289/EHP788.",2017-05-30 +25422159,JSparklines: making tabular proteomics data come alive.,"Perhaps the most common way of presenting proteomics data, and indeed life sciences data in general, is by using some form of tabular data. And while tables can be very informative and contain lots of information, the format can be challenging to interpret visually. An elegant and efficient solution is to extend the textual and numerical information with an additional visual layer, referred to as sparklines, making it intuitive to draw inferences about the properties of the underlying data. We here present a free and open source Java library called JSparklines (http://jsparklines.googlecode.com) that allows straightforward addition of a substantial list of customizable sparklines to tabular data representations, and we show examples of how these sparklines greatly simplify the interpretation of the tabular data.",2015-01-19 +28031186,ACTG: novel peptide mapping onto gene models.,"

Summary

In many proteogenomic applications, mapping peptide sequences onto genome sequences can be very useful, because it allows us to understand origins of the gene products. Existing software tools either take the genomic position of a peptide start site as an input or assume that the peptide sequence exactly matches the coding sequence of a given gene model. In case of novel peptides resulting from genomic variations, especially structural variations such as alternative splicing, these existing tools cannot be directly applied unless users supply information about the variant, either its genomic position or its transcription model. Mapping potentially novel peptides to genome sequences, while allowing certain genomic variations, requires introducing novel gene models when aligning peptide sequences to gene structures. We have developed a new tool called ACTG (Amino aCids To Genome), which maps peptides to genome, assuming all possible single exon skipping, junction variation allowing three edit distances from the original splice sites, exon extension and frame shift. In addition, it can also consider SNVs (single nucleotide variations) during mapping phase if a user provides the VCF (variant call format) file as an input.

Availability and implementation

Available at http://prix.hanyang.ac.kr/ACTG/search.jsp .

Contact

eunokpaek@hanyang.ac.kr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +28886602,"Heat Wave and Mortality: A Multicountry, Multicommunity Study.","

Background

Few studies have examined variation in the associations between heat waves and mortality in an international context.

Objectives

We aimed to systematically examine the impacts of heat waves on mortality with lag effects internationally.

Methods

We collected daily data of temperature and mortality from 400 communities in 18 countries/regions and defined 12 types of heat waves by combining community-specific daily mean temperature ≥90th, 92.5th, 95th, and 97.5th percentiles of temperature with duration ≥2, 3, and 4 d. We used time-series analyses to estimate the community-specific heat wave-mortality relation over lags of 0-10 d. Then, we applied meta-analysis to pool heat wave effects at the country level for cumulative and lag effects for each type of heat wave definition.

Results

Heat waves of all definitions had significant cumulative associations with mortality in all countries, but varied by community. The higher the temperature threshold used to define heat waves, the higher heat wave associations on mortality. However, heat wave duration did not modify the impacts. The association between heat waves and mortality appeared acutely and lasted for 3 and 4 d. Heat waves had higher associations with mortality in moderate cold and moderate hot areas than cold and hot areas. There were no added effects of heat waves on mortality in all countries/regions, except for Brazil, Moldova, and Taiwan. Heat waves defined by daily mean and maximum temperatures produced similar heat wave-mortality associations, but not daily minimum temperature.

Conclusions

Results indicate that high temperatures create a substantial health burden, and effects of high temperatures over consecutive days are similar to what would be experienced if high temperature days occurred independently. People living in moderate cold and moderate hot areas are more sensitive to heat waves than those living in cold and hot areas. Daily mean and maximum temperatures had similar ability to define heat waves rather than minimum temperature. https://doi.org/10.1289/EHP1026.",2017-08-10 +23084778,NSort/DB: an intranuclear compartment protein database.,"Distinct substructures within the nucleus are associated with a wide variety of important nuclear processes. Structures such as chromatin and nuclear pores have specific roles, while others such as Cajal bodies are more functionally varied. Understanding the roles of these membraneless intra-nuclear compartments requires extensive data sets covering nuclear and compartment-associated proteins. NSort/DB is a database providing access to intra- or sub-nuclear compartment associations for the mouse nuclear proteome. Based on resources ranging from large-scale curated data sets to detailed experiments, this data set provides a high-quality set of annotations of non-exclusive association of nuclear proteins with structures such as promyelocytic leukaemia bodies and chromatin. The database is searchable by protein identifier or compartment, and has a documented web service API. The search interface, web service and data download are all freely available online at http://www.nsort.org/db/. Availability of this data set will enable systematic analyses of the protein complements of nuclear compartments, improving our understanding of the diverse functional repertoire of these structures.",2012-07-25 +27380939,Evaluation of off-target and on-target scoring algorithms and integration into the guide RNA selection tool CRISPOR.,"

Background

The success of the CRISPR/Cas9 genome editing technique depends on the choice of the guide RNA sequence, which is facilitated by various websites. Despite the importance and popularity of these algorithms, it is unclear to which extent their predictions are in agreement with actual measurements.

Results

We conduct the first independent evaluation of CRISPR/Cas9 predictions. To this end, we collect data from eight SpCas9 off-target studies and compare them with the sites predicted by popular algorithms. We identify problems in one implementation but found that sequence-based off-target predictions are very reliable, identifying most off-targets with mutation rates superior to 0.1 %, while the number of false positives can be largely reduced with a cutoff on the off-target score. We also evaluate on-target efficiency prediction algorithms against available datasets. The correlation between the predictions and the guide activity varied considerably, especially for zebrafish. Together with novel data from our labs, we find that the optimal on-target efficiency prediction model strongly depends on whether the guide RNA is expressed from a U6 promoter or transcribed in vitro. We further demonstrate that the best predictions can significantly reduce the time spent on guide screening.

Conclusions

To make these guidelines easily accessible to anyone planning a CRISPR genome editing experiment, we built a new website ( http://crispor.org ) that predicts off-targets and helps select and clone efficient guide sequences for more than 120 genomes using different Cas9 proteins and the eight efficiency scoring systems evaluated here.",2016-07-05 +27374119,neXtA5: accelerating annotation of articles via automated approaches in neXtProt. ,"The rapid increase in the number of published articles poses a challenge for curated databases to remain up-to-date. To help the scientific community and database curators deal with this issue, we have developed an application, neXtA5, which prioritizes the literature for specific curation requirements. Our system, neXtA5, is a curation service composed of three main elements. The first component is a named-entity recognition module, which annotates MEDLINE over some predefined axes. This report focuses on three axes: Diseases, the Molecular Function and Biological Process sub-ontologies of the Gene Ontology (GO). The automatic annotations are then stored in a local database, BioMed, for each annotation axis. Additional entities such as species and chemical compounds are also identified. The second component is an existing search engine, which retrieves the most relevant MEDLINE records for any given query. The third component uses the content of BioMed to generate an axis-specific ranking, which takes into account the density of named-entities as stored in the Biomed database. The two ranked lists are ultimately merged using a linear combination, which has been specifically tuned to support the annotation of each axis. The fine-tuning of the coefficients is formally reported for each axis-driven search. Compared with PubMed, which is the system used by most curators, the improvement is the following: +231% for Diseases, +236% for Molecular Functions and +3153% for Biological Process when measuring the precision of the top-returned PMID (P0 or mean reciprocal rank). The current search methods significantly improve the search effectiveness of curators for three important curation axes. Further experiments are being performed to extend the curation types, in particular protein-protein interactions, which require specific relationship extraction capabilities. In parallel, user-friendly interfaces powered with a set of JSON web services are currently being implemented into the neXtProt annotation pipeline.Available on: http://babar.unige.ch:8082/neXtA5Database URL: http://babar.unige.ch:8082/neXtA5/fetcher.jsp.",2016-07-03 +26292041,The Presence of Oxalobacter formigenes in the Microbiome of Healthy Young Adults.,"

Purpose

Oxalobacter formigenes, a member of the human colonic microbiota with a major role in net colonic oxalate transport and secretion, is protective against the formation of calcium oxalate kidney stones. We describe the prevalence, relative abundance and stability of O. formigenes in healthy young adults in the United States.

Materials and methods

We used HMP (Human Microbiome Project) data on fecal samples from 242 healthy young adults who had 1 to 3 study visits. Samples underwent whole genomic shotgun sequencing and/or 16S rRNA sequencing. Three data sets available from the processed sequence data were studied, including whole genomic shotgun metagenomic analysis by alignment to reference genomes using shotgun community profiling, or MetaPhlAn (http://huttenhower.sph.harvard.edu/metaphlan) or QIIME (http://qiime.org/) analysis of the V1-3 or V3-5 16S sequences.

Results

O. formigenes was detected in fecal samples using whole genomic shotgun and 16S rRNA data. Analysis of the whole genomic shotgun data set using shotgun community profiling showed that 29 of 94 subjects (31%) were O. formigenes positive. V1-3 and V3-5 analyses were less sensitive for O. formigenes detection. When present, O. formigenes relative abundance varied over 3 log10 and was normally distributed. All assays agreed in 58 of 66 samples (88%) studied by all 3 methods. Of 14 subjects who were O. formigenes positive at baseline 13 (93%) were positive at the followup visit, indicating the stability of colonization.

Conclusions

O. formigenes appears to be stably present in fewer than half of healthy young adults in the United States. It is most sensitively detected by whole genomic shotgun.",2015-08-17 +26773131,FINEMAP: efficient variable selection using summary data from genome-wide association studies.,"

Motivation

The goal of fine-mapping in genomic regions associated with complex diseases and traits is to identify causal variants that point to molecular mechanisms behind the associations. Recent fine-mapping methods using summary data from genome-wide association studies rely on exhaustive search through all possible causal configurations, which is computationally expensive.

Results

We introduce FINEMAP, a software package to efficiently explore a set of the most important causal configurations of the region via a shotgun stochastic search algorithm. We show that FINEMAP produces accurate results in a fraction of processing time of existing approaches and is therefore a promising tool for analyzing growing amounts of data produced in genome-wide association studies and emerging sequencing projects.

Availability and implementation

FINEMAP v1.0 is freely available for Mac OS X and Linux at http://www.christianbenner.com

Contact

: christian.benner@helsinki.fi or matti.pirinen@helsinki.fi.",2016-01-14 +26072483,Integrative random forest for gene regulatory network inference.,"

Motivation

Gene regulatory network (GRN) inference based on genomic data is one of the most actively pursued computational biological problems. Because different types of biological data usually provide complementary information regarding the underlying GRN, a model that integrates big data of diverse types is expected to increase both the power and accuracy of GRN inference. Towards this goal, we propose a novel algorithm named iRafNet: integrative random forest for gene regulatory network inference.

Results

iRafNet is a flexible, unified integrative framework that allows information from heterogeneous data, such as protein-protein interactions, transcription factor (TF)-DNA-binding, gene knock-down, to be jointly considered for GRN inference. Using test data from the DREAM4 and DREAM5 challenges, we demonstrate that iRafNet outperforms the original random forest based network inference algorithm (GENIE3), and is highly comparable to the community learning approach. We apply iRafNet to construct GRN in Saccharomyces cerevisiae and demonstrate that it improves the performance in predicting TF-target gene regulations and provides additional functional insights to the predicted gene regulations.

Availability and implementation

The R code of iRafNet implementation and a tutorial are available at: http://research.mssm.edu/tulab/software/irafnet.html",2015-06-01 +26446134,Rust-Bio: a fast and safe bioinformatics library.,"

Summary

We present Rust-Bio, the first general purpose bioinformatics library for the innovative Rust programming language. Rust-Bio leverages the unique combination of speed, memory safety and high-level syntax offered by Rust to provide a fast and safe set of bioinformatics algorithms and data structures with a focus on sequence analysis.

Availability and implementation

Rust-Bio is available open source under the MIT license at https://rust-bio.github.io.

Contact

koester@jimmy.harvard.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-06 +27013647,MuSERA: Multiple Sample Enriched Region Assessment.,"Enriched region (ER) identification is a fundamental step in several next-generation sequencing (NGS) experiment types. Yet, although NGS experimental protocols recommend producing replicate samples for each evaluated condition and their consistency is usually assessed, typically pipelines for ER identification do not consider available NGS replicates. This may alter genome-wide descriptions of ERs, hinder significance of subsequent analyses on detected ERs and eventually preclude biological discoveries that evidence in replicate could support. MuSERA is a broadly useful stand-alone tool for both interactive and batch analysis of combined evidence from ERs in multiple ChIP-seq or DNase-seq replicates. Besides rigorously combining sample replicates to increase statistical significance of detected ERs, it also provides quantitative evaluations and graphical features to assess the biological relevance of each determined ER set within its genomic context; they include genomic annotation of determined ERs, nearest ER distance distribution, global correlation assessment of ERs and an integrated genome browser. We review MuSERA rationale and implementation, and illustrate how sets of significant ERs are expanded by applying MuSERA on replicates for several types of NGS data, including ChIP-seq of transcription factors or histone marks and DNase-seq hypersensitive sites. We show that MuSERA can determine a new, enhanced set of ERs for each sample by locally combining evidence on replicates, and prove how the easy-to-use interactive graphical displays and quantitative evaluations that MuSERA provides effectively support thorough inspection of obtained results and evaluation of their biological content, facilitating their understanding and biological interpretations. MuSERA is freely available at http://www.bioinformatics.deib.polimi.it/MuSERA/.",2017-05-01 +27183440,Automated mapping of phenotype space with single-cell data.,"Accurate identification of cell subsets in complex populations is key to discovering novelty in multidimensional single-cell experiments. We present X-shift (http://web.stanford.edu/~samusik/vortex/), an algorithm that processes data sets using fast k-nearest-neighbor estimation of cell event density and arranges populations by marker-based classification. X-shift enables automated cell-subset clustering and access to biological insights that 'prior knowledge' might prevent the researcher from discovering.",2016-05-16 +28062450,The structural bioinformatics library: modeling in biomolecular science and beyond.,"

Motivation

Software in structural bioinformatics has mainly been application driven. To favor practitioners seeking off-the-shelf applications, but also developers seeking advanced building blocks to develop novel applications, we undertook the design of the Structural Bioinformatics Library ( SBL , http://sbl.inria.fr ), a generic C ++/python cross-platform software library targeting complex problems in structural bioinformatics. Its tenet is based on a modular design offering a rich and versatile framework allowing the development of novel applications requiring well specified complex operations, without compromising robustness and performances.

Results

The SBL involves four software components (1-4 thereafter). For end-users, the SBL provides ready to use, state-of-the-art (1) applications to handle molecular models defined by unions of balls, to deal with molecular flexibility, to model macro-molecular assemblies. These applications can also be combined to tackle integrated analysis problems. For developers, the SBL provides a broad C ++ toolbox with modular design, involving core (2) algorithms , (3) biophysical models and (4) modules , the latter being especially suited to develop novel applications. The SBL comes with a thorough documentation consisting of user and reference manuals, and a bugzilla platform to handle community feedback.

Availability and implementation

The SBL is available from http://sbl.inria.fr.

Contact

Frederic.Cazals@inria.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +24316578,RefSeq microbial genomes database: new representation and annotation strategy.,"The source of the microbial genomic sequences in the RefSeq collection is the set of primary sequence records submitted to the International Nucleotide Sequence Database public archives. These can be accessed through the Entrez search and retrieval system at http://www.ncbi.nlm.nih.gov/genome. Next-generation sequencing has enabled researchers to perform genomic sequencing at rates that were unimaginable in the past. Microbial genomes can now be sequenced in a matter of hours, which has led to a significant increase in the number of assembled genomes deposited in the public archives. This huge increase in DNA sequence data presents new challenges for the annotation, analysis and visualization bioinformatics tools. New strategies have been developed for the annotation and representation of reference genomes and sequence variations derived from population studies and clinical outbreaks.",2013-12-06 +25910699,CiVi: circular genome visualization with unique features to analyze sequence elements.,"

Unlabelled

We have developed CiVi, a user-friendly web-based tool to create custom circular maps to aid the analysis of microbial genomes and sequence elements. Sequence related data such as gene-name, COG class, PFAM domain, GC%, and subcellular location can be comprehensively viewed. Quantitative gene-related data (e.g. expression ratios or read counts) as well as predicted sequence elements (e.g. regulatory sequences) can be uploaded and visualized. CiVi accommodates the analysis of genomic elements by allowing a visual interpretation in the context of: (i) their genome-wide distribution, (ii) provided experimental data and (iii) the local orientation and location with respect to neighboring genes. CiVi thus enables both experts and non-experts to conveniently integrate public genome data with the results of genome analyses in circular genome maps suitable for publication.

Contact

L.Overmars@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.

Availability and implementation

CiVi is freely available at http://civi.cmbi.ru.nl.",2015-04-24 +22416047,"Observ-OM and Observ-TAB: Universal syntax solutions for the integration, search, and exchange of phenotype and genotype information.","Genetic and epidemiological research increasingly employs large collections of phenotypic and molecular observation data from high quality human and model organism samples. Standardization efforts have produced a few simple formats for exchange of these various data, but a lightweight and convenient data representation scheme for all data modalities does not exist, hindering successful data integration, such as assignment of mouse models to orphan diseases and phenotypic clustering for pathways. We report a unified system to integrate and compare observation data across experimental projects, disease databases, and clinical biobanks. The core object model (Observ-OM) comprises only four basic concepts to represent any kind of observation: Targets, Features, Protocols (and their Applications), and Values. An easy-to-use file format (Observ-TAB) employs Excel to represent individual and aggregate data in straightforward spreadsheets. The systems have been tested successfully on human biobank, genome-wide association studies, quantitative trait loci, model organism, and patient registry data using the MOLGENIS platform to quickly setup custom data portals. Our system will dramatically lower the barrier for future data sharing and facilitate integrated search across panels and species. All models, formats, documentation, and software are available for free and open source (LGPLv3) at http://www.observ-om.org.",2012-04-04 +25663356,Streaming visualisation of quantitative mass spectrometry data based on a novel raw signal decomposition method.,"As data rates rise, there is a danger that informatics for high-throughput LC-MS becomes more opaque and inaccessible to practitioners. It is therefore critical that efficient visualisation tools are available to facilitate quality control, verification, validation, interpretation, and sharing of raw MS data and the results of MS analyses. Currently, MS data is stored as contiguous spectra. Recall of individual spectra is quick but panoramas, zooming and panning across whole datasets necessitates processing/memory overheads impractical for interactive use. Moreover, visualisation is challenging if significant quantification data is missing due to data-dependent acquisition of MS/MS spectra. In order to tackle these issues, we leverage our seaMass technique for novel signal decomposition. LC-MS data is modelled as a 2D surface through selection of a sparse set of weighted B-spline basis functions from an over-complete dictionary. By ordering and spatially partitioning the weights with an R-tree data model, efficient streaming visualisations are achieved. In this paper, we describe the core MS1 visualisation engine and overlay of MS/MS annotations. This enables the mass spectrometrist to quickly inspect whole runs for ionisation/chromatographic issues, MS/MS precursors for coverage problems, or putative biomarkers for interferences, for example. The open-source software is available from http://seamass.net/viz/.",2015-03-09 +25527095,Empowering biologists with multi-omics data: colorectal cancer as a paradigm.,"

Motivation

Recent completion of the global proteomic characterization of The Cancer Genome Atlas (TCGA) colorectal cancer (CRC) cohort resulted in the first tumor dataset with complete molecular measurements at DNA, RNA and protein levels. Using CRC as a paradigm, we describe the application of the NetGestalt framework to provide easy access and interpretation of multi-omics data.

Results

The NetGestalt CRC portal includes genomic, epigenomic, transcriptomic, proteomic and clinical data for the TCGA CRC cohort, data from other CRC tumor cohorts and cell lines, and existing knowledge on pathways and networks, giving a total of more than 17 million data points. The portal provides features for data query, upload, visualization and integration. These features can be flexibly combined to serve various needs of the users, maximizing the synergy among omics data, human visualization and quantitative analysis. Using three case studies, we demonstrate that the portal not only provides user-friendly data query and visualization but also enables efficient data integration within a single omics data type, across multiple omics data types, and over biological networks.

Availability and implementation

The NetGestalt CRC portal can be freely accessed at http://www.netgestalt.org.

Contact

bing.zhang@vanderbilt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-18 +27421400,Structure diagram of binary Lennard-Jones clusters.,"We analyze the structure diagram for binary clusters of Lennard-Jones particles by means of a global optimization approach for a large range of cluster sizes, compositions, and interaction energies and present a publicly accessible database of 180 000 minimal energy structures (http://softmattertheory.lu/clusters.html). We identify a variety of structures such as core-shell clusters, Janus clusters, and clusters in which the minority species is located at the vertices of icosahedra. Such clusters can be synthesized from nanoparticles in agglomeration experiments and used as building blocks in colloidal molecules or crystals. We discuss the factors that determine the formation of clusters with specific structures.",2016-07-01 +27329603,Network-based identification of microRNAs as potential pharmacogenomic biomarkers for anticancer drugs.,"As the recent development of high-throughput technologies in cancer pharmacogenomics, there is an urgent need to develop new computational approaches for comprehensive identification of new pharmacogenomic biomarkers, such as microRNAs (miRNAs). In this study, a network-based framework, namely the SMiR-NBI model, was developed to prioritize miRNAs as potential biomarkers characterizing treatment responses of anticancer drugs on the basis of a heterogeneous network connecting drugs, miRNAs and genes. A high area under the receiver operating characteristic curve of 0.820 ± 0.013 was yielded during 10-fold cross validation. In addition, high performance was further validated in identifying new anticancer mechanism-of-action for natural products and non-steroidal anti-inflammatory drugs. Finally, the newly predicted miRNAs for tamoxifen and metformin were experimentally validated in MCF-7 and MDA-MB-231 breast cancer cell lines via qRT-PCR assays. High success rates of 60% and 65% were yielded for tamoxifen and metformin, respectively. Specifically, 11 oncomiRNAs (e.g. miR-20a-5p, miR-27a-3p, miR-29a-3p, and miR-146a-5p) from the top 20 predicted miRNAs were experimentally verified as new pharmacogenomic biomarkers for metformin in MCF-7 or MDA-MB-231 cell lines. In summary, the SMiR-NBI model would provide a powerful tool to identify potential pharmacogenomic biomarkers characterized by miRNAs in the emerging field of precision cancer medicine, which is available at http://lmmd.ecust.edu.cn/database/smir-nbi/.",2016-07-01 +27504574,Responding to change in a challenging climate: 2015 five-year report of the Policy and Planning Board.,"The American Psychological Association (APA) Bylaws Article XI.7 (http://www.apa.org/about/governance/bylaws/article-11.aspx) requires that the Policy and Planning Board report annually by publication to the membership and review the structure and function of the association as a whole every fifth year. This report details the board's 5-year review, including APA's challenges and achievements from 2011 through 2015 within the context of broader social and environmental changes. Recommended priorities for future change are offered. (PsycINFO Database Record",2016-07-01 +27044653,SoftPanel: a website for grouping diseases and related disorders for generation of customized panels.,"

Background

Targeted next-generation sequencing is playing an increasingly important role in biological research and clinical diagnosis by allowing researchers to sequence high priority genes at much higher depths and at a fraction of the cost of whole genome or exome sequencing. However, in designing the panel of genes to be sequenced, investigators need to consider the tradeoff between the better sensitivity of a broad panel and the higher specificity of a potentially more relevant panel. Although tools to prioritize candidate disease genes have been developed, the great majority of these require prior knowledge and a set of seed genes as input, which is only possible for diseases with a known genetic etiology.

Results

To meet the demands of both researchers and clinicians, we have developed a user-friendly website called SoftPanel. This website is intended to serve users by allowing them to input a single disorder or a disorder group and generate a panel of genes predicted to underlie the disorder of interest. Various methods of retrieval including a keyword search, browsing of an arborized list of International Classification of Diseases, 10th revision (ICD-10) codes or using disorder phenotypic similarities can be combined to define a group of disorders and the genes known to be associated with them. Moreover, SoftPanel enables users to expand or refine a gene list by utilizing several biological data resources. In addition to providing users with the facility to create a ""hard"" panel that contains an exact gene list for targeted sequencing, SoftPanel also enables generation of a ""soft"" panel of genes, which may be used to further filter a significantly altered set of genes identified through whole genome or whole exome sequencing. The service and data provided by SoftPanel can be accessed at http://www.isb.pku.edu.cn/SoftPanel/ . A tutorial page is included for trying out sample data and interpreting results.

Conclusion

SoftPanel provides a convenient and powerful tool for creating a targeted panel of potential disease genes while supporting different forms of input. SoftPanel may be utilized in both genomics research and personalized medicine.",2016-04-05 +24271398,NeXO Web: the NeXO ontology database and visualization platform.,"The Network-extracted Ontology (NeXO) is a gene ontology inferred directly from large-scale molecular networks. While most ontologies are constructed through manual expert curation, NeXO uses a principled computational approach which integrates evidence from hundreds of thousands of individual gene and protein interactions to construct a global hierarchy of cellular components and processes. Here, we describe the development of the NeXO Web platform (http://www.nexontology.org)-an online database and graphical user interface for visualizing, browsing and performing term enrichment analysis using NeXO and the gene ontology. The platform applies state-of-the-art web technology and visualization techniques to provide an intuitive framework for investigating biological machinery captured by both data-driven and manually curated ontologies.",2013-11-23 +26322270,Analysis of transcript changes in a heme-deficient mutant of Escherichia coli in response to CORM-3 [Ru(CO)3Cl(glycinate)].,"This article describes in extended detail the methodology applied for acquisition of transcriptomic data, and subsequent statistical data modelling, published by Wilson et al. (2015) in a study of the effects of carbon monoxide-releasing molecule-3 (CORM-3 [Ru(CO)3Cl(glycinate)]) on heme-deficient bacteria. The objective was to identify non-heme targets of CORM action. Carbon monoxide (CO) interacts with heme-containing proteins, in particular respiratory cytochromes; however, CORMs have been shown to elicit multifaceted effects in bacteria, suggesting that the compounds may have additional targets. We therefore sought to elucidate the activity of CORM-3, the first water-soluble CORM and one of the most characterised CORMs to date, in bacteria devoid of heme synthesis. Importantly, we also tested inactive CORM-3 (iCORM-3), a ruthenium co-ligand fragment that does not release CO, in order to differentiate between CO- and compound-related effects. A well-established hemA mutant of Escherichia coli was used for the study and, for comparison, parallel experiments were performed on the corresponding wild-type strain. Global transcriptomic changes induced by CORM-3 and iCORM-3 were evaluated using a Two-Color Microarray-Based Prokaryote Analysis (FairPlay III Labeling) by Agilent Technologies (Inc. 2009). Data acquisition was carried out using Agilent Feature Extraction software (v6.5) and data normalisation, as well as information about gene products and their function was obtained from GeneSpring GX v7.3 (Agilent Technologies). Functional category lists were created using KEGG (Kyoto Encyclopedia of Genes and Genomes). Relevant regulatory proteins for each gene were identified, where available, using regulonDB and EcoCyc (World Wide Web). Statistical data modelling was performed on the gene expression data to infer transcription factor activities. The transcriptomic data can be accessed through NCBI's Gene Expression Omnibus (GEO): series accession number GSE55097 (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE55097).",2015-06-13 +26132000,Using the PhenX Toolkit to Add Standard Measures to a Study.,"The PhenX (consensus measures for Phenotypes and eXposures) Toolkit (https://www.phenxtoolkit.org/) offers high-quality, well-established measures of phenotypes and exposures for use by the scientific community. The goal is to promote the use of standard measures, enhance data interoperability, and help investigators identify opportunities for collaborative and translational research. The Toolkit contains 395 measures drawn from 22 research domains (fields of research), along with additional collections of measures for Substance Abuse and Addiction (SAA) research, Mental Health Research (MHR), and Tobacco Regulatory Research (TRR). Additional measures for TRR that are expected to be released in 2015 include Obesity, Eating Disorders, and Sickle Cell Disease. Measures are selected by working groups of domain experts using a consensus process that includes input from the scientific community. The Toolkit provides a description of each PhenX measure, the rationale for including it in the Toolkit, protocol(s) for collecting the measure, and supporting documentation. Users can browse measures in the Toolkit or can search the Toolkit using the Smart Query Tool or a full text search. PhenX Toolkit users select measures of interest to add to their Toolkit. Registered Toolkit users can save their Toolkit and return to it later to revise or complete. They then have options to download a customized Data Collection Worksheet that specifies the data to be collected, and a Data Dictionary that describes each variable included in the Data Collection Worksheet. The Toolkit also has a Register Your Study feature that facilitates cross-study collaboration by allowing users to find other investigators using the same PhenX measures.",2015-07-01 +23792403,Systemic nucleoside antiviral agents may be effective in prevention of recurrent herpes labialis.,"

Data sources

The Cochrane Central Register of Controlled Trials, Cochrane Database of Systematic Reviews, Medline and Embase databases were searched together with the reference lists of primary studies, commentaries and reviews. Grey literature resources including the System for Information on Grey Literature in Europe, the Scopus Web and Patent searches, Proquest Dissertations and Theses Fulltext, the Index to Scientific and Technical Proceedings and the clinical trials registry (http://clinicaltrials.gov) were also searched.

Study selection

Randomised controlled trials (RCTs) involving nucleoside antiviral agents for the prevention of recurrent oral herpes in healthy immunocompetent subjects ≥12 years old were included. No language restrictions were applied. Study quality was assessed following Cochrane guidelines.

Data extraction and synthesis

Data were abstracted using a standardised data extraction form and analysed with meta-analysis carried out only with studies that reported the same outcome measure.

Results

Ten studies were included, only one study was considered to have a low risk of bias, five an unclear risk and four a high risk of bias. Oral acyclovir (800-1,600 mg daily) and valacyclovir (500 mg daily for four months) were shown to be effective in the prevention of RHL when taken prior to the appearance of any symptoms or exposure to triggers.

Conclusions

This review found support for the use of systemic acyclovir and valacyclovir for the prevention of RHL. However, the findings from this review should be interpreted with caution, because the methodologic assessment of the quality of the included studies showed an unclear risk of bias in five out of the ten included papers, and a high risk of bias in four studies.",2013-01-01 +23681907,From manual curation to visualization of gene families and networks across Solanaceae plant species.,"High-quality manual annotation methods and practices need to be scaled to the increased rate of genomic data production. Curation based on gene families and gene networks is one approach that can significantly increase both curation efficiency and quality. The Sol Genomics Network (SGN; http://solgenomics.net) is a comparative genomics platform, with genetic, genomic and phenotypic information of the Solanaceae family and its closely related species that incorporates a community-based gene and phenotype curation system. In this article, we describe a manual curation system for gene families aimed at facilitating curation, querying and visualization of gene interaction patterns underlying complex biological processes, including an interface for efficiently capturing information from experiments with large data sets reported in the literature. Well-annotated multigene families are useful for further exploration of genome organization and gene evolution across species. As an example, we illustrate the system with the multigene transcription factor families, WRKY and Small Auxin Up-regulated RNA (SAUR), which both play important roles in responding to abiotic stresses in plants. Database URL: http://solgenomics.net/",2013-05-15 +23226127,The UCLA multimodal connectivity database: a web-based platform for brain connectivity matrix sharing and analysis.,"Brain connectomics research has rapidly expanded using functional MRI (fMRI) and diffusion-weighted MRI (dwMRI). A common product of these varied analyses is a connectivity matrix (CM). A CM stores the connection strength between any two regions (""nodes"") in a brain network. This format is useful for several reasons: (1) it is highly distilled, with minimal data size and complexity, (2) graph theory can be applied to characterize the network's topology, and (3) it retains sufficient information to capture individual differences such as age, gender, intelligence quotient (IQ), or disease state. Here we introduce the UCLA Multimodal Connectivity Database (http://umcd.humanconnectomeproject.org), an openly available website for brain network analysis and data sharing. The site is a repository for researchers to publicly share CMs derived from their data. The site also allows users to select any CM shared by another user, compute graph theoretical metrics on the site, visualize a report of results, or download the raw CM. To date, users have contributed over 2000 individual CMs, spanning different imaging modalities (fMRI, dwMRI) and disorders (Alzheimer's, autism, Attention Deficit Hyperactive Disorder). To demonstrate the site's functionality, whole brain functional and structural connectivity matrices are derived from 60 subjects' (ages 26-45) resting state fMRI (rs-fMRI) and dwMRI data and uploaded to the site. The site is utilized to derive graph theory global and regional measures for the rs-fMRI and dwMRI networks. Global and nodal graph theoretical measures between functional and structural networks exhibit low correspondence. This example demonstrates how this tool can enhance the comparability of brain networks from different imaging modalities and studies. The existence of this connectivity-based repository should foster broader data sharing and enable larger-scale meta-analyses comparing networks across imaging modality, age group, and disease state.",2012-11-28 +22323457,iSyTE: integrated Systems Tool for Eye gene discovery.,"

Purpose

To facilitate the identification of genes associated with cataract and other ocular defects, the authors developed and validated a computational tool termed iSyTE (integrated Systems Tool for Eye gene discovery; http://bioinformatics.udel.edu/Research/iSyTE). iSyTE uses a mouse embryonic lens gene expression data set as a bioinformatics filter to select candidate genes from human or mouse genomic regions implicated in disease and to prioritize them for further mutational and functional analyses.

Methods

Microarray gene expression profiles were obtained for microdissected embryonic mouse lens at three key developmental time points in the transition from the embryonic day (E)10.5 stage of lens placode invagination to E12.5 lens primary fiber cell differentiation. Differentially regulated genes were identified by in silico comparison of lens gene expression profiles with those of whole embryo body (WB) lacking ocular tissue.

Results

Gene set analysis demonstrated that this strategy effectively removes highly expressed but nonspecific housekeeping genes from lens tissue expression profiles, allowing identification of less highly expressed lens disease-associated genes. Among 24 previously mapped human genomic intervals containing genes associated with isolated congenital cataract, the mutant gene is ranked within the top two iSyTE-selected candidates in approximately 88% of cases. Finally, in situ hybridization confirmed lens expression of several novel iSyTE-identified genes.

Conclusions

iSyTE is a publicly available Web resource that can be used to prioritize candidate genes within mapped genomic intervals associated with congenital cataract for further investigation. Extension of this approach to other ocular tissue components will facilitate eye disease gene discovery.",2012-03-21 +26379782,RRegrs: an R package for computer-aided model selection with multiple regression models.,"

Background

Predictive regression models can be created with many different modelling approaches. Choices need to be made for data set splitting, cross-validation methods, specific regression parameters and best model criteria, as they all affect the accuracy and efficiency of the produced predictive models, and therefore, raising model reproducibility and comparison issues. Cheminformatics and bioinformatics are extensively using predictive modelling and exhibit a need for standardization of these methodologies in order to assist model selection and speed up the process of predictive model development. A tool accessible to all users, irrespectively of their statistical knowledge, would be valuable if it tests several simple and complex regression models and validation schemes, produce unified reports, and offer the option to be integrated into more extensive studies. Additionally, such methodology should be implemented as a free programming package, in order to be continuously adapted and redistributed by others.

Results

We propose an integrated framework for creating multiple regression models, called RRegrs. The tool offers the option of ten simple and complex regression methods combined with repeated 10-fold and leave-one-out cross-validation. Methods include Multiple Linear regression, Generalized Linear Model with Stepwise Feature Selection, Partial Least Squares regression, Lasso regression, and Support Vector Machines Recursive Feature Elimination. The new framework is an automated fully validated procedure which produces standardized reports to quickly oversee the impact of choices in modelling algorithms and assess the model and cross-validation results. The methodology was implemented as an open source R package, available at https://www.github.com/enanomapper/RRegrs, by reusing and extending on the caret package.

Conclusion

The universality of the new methodology is demonstrated using five standard data sets from different scientific fields. Its efficiency in cheminformatics and QSAR modelling is shown with three use cases: proteomics data for surface-modified gold nanoparticles, nano-metal oxides descriptor data, and molecular descriptors for acute aquatic toxicity data. The results show that for all data sets RRegrs reports models with equal or better performance for both training and test sets than those reported in the original publications. Its good performance as well as its adaptability in terms of parameter optimization could make RRegrs a popular framework to assist the initial exploration of predictive models, and with that, the design of more comprehensive in silico screening applications.Graphical abstractRRegrs is a computer-aided model selection framework for R multiple regression models; this is a fully validated procedure with application to QSAR modelling.",2015-09-15 +27307609,Influence maximization in time bounded network identifies transcription factors regulating perturbed pathways.,"

Motivation

To understand the dynamic nature of the biological process, it is crucial to identify perturbed pathways in an altered environment and also to infer regulators that trigger the response. Current time-series analysis methods, however, are not powerful enough to identify perturbed pathways and regulators simultaneously. Widely used methods include methods to determine gene sets such as differentially expressed genes or gene clusters and these genes sets need to be further interpreted in terms of biological pathways using other tools. Most pathway analysis methods are not designed for time series data and they do not consider gene-gene influence on the time dimension.

Results

In this article, we propose a novel time-series analysis method TimeTP for determining transcription factors (TFs) regulating pathway perturbation, which narrows the focus to perturbed sub-pathways and utilizes the gene regulatory network and protein-protein interaction network to locate TFs triggering the perturbation. TimeTP first identifies perturbed sub-pathways that propagate the expression changes along the time. Starting points of the perturbed sub-pathways are mapped into the network and the most influential TFs are determined by influence maximization technique. The analysis result is visually summarized in TF-PATHWAY MAP IN TIME CLOCK: TimeTP was applied to PIK3CA knock-in dataset and found significant sub-pathways and their regulators relevant to the PIP3 signaling pathway.

Availability and implementation

TimeTP is implemented in Python and available at http://biohealth.snu.ac.kr/software/TimeTP/Supplementary information: Supplementary data are available at Bioinformatics online.

Contact

sunkim.bioinfo@snu.ac.kr.",2016-06-01 +27295644,Parallel and Space-Efficient Construction of Burrows-Wheeler Transform and Suffix Array for Big Genome Data.,"Next-generation sequencing technologies have led to the sequencing of more and more genomes, propelling related research into the era of big data. In this paper, we present ParaBWT, a parallelized Burrows-Wheeler transform (BWT) and suffix array construction algorithm for big genome data. In ParaBWT, we have investigated a progressive construction approach to constructing the BWT of single genome sequences in linear space complexity, but with a small constant factor. This approach has been further parallelized using multi-threading based on a master-slave coprocessing model. After gaining the BWT, the suffix array is constructed in a memory-efficient manner. The performance of ParaBWT has been evaluated using two sequences generated from two human genome assemblies: the Ensembl Homo sapiens assembly and the human reference genome. Our performance comparison to FMD-index and Bwt-disk reveals that on 12 CPU cores, ParaBWT runs up to 2.2× faster than FMD-index and up to 99.0× faster than Bwt-disk. BWT construction algorithms for very long genomic sequences are time consuming and (due to their incremental nature) inherently difficult to parallelize. Thus, their parallelization is challenging and even relatively small speedups like the ones of our method over FMD-index are of high importance to research. ParaBWT is written in C++, and is freely available at http://parabwt.sourceforge.net.",2016-05-01 +26498826,HiFive: a tool suite for easy and efficient HiC and 5C data analysis.,"The chromatin interaction assays 5C and HiC have advanced our understanding of genomic spatial organization, but analysis approaches for these data are limited by usability and flexibility. The HiFive tool suite provides efficient data handling and a variety of normalization approaches for easy, fast analysis and method comparison. Integration of MPI-based parallelization allows scalability and rapid processing time. In addition to single-command analysis of an entire experiment from mapped reads to interaction values, HiFive has been integrated into the open-source, web-based platform Galaxy to connect users with computational resources and a graphical interface. HiFive is open-source software available from http://taylorlab.org/software/hifive/ .",2015-10-24 +28596779,Characterization of Aldehyde Oxidase (AO) Genes Involved in the Accumulation of Carotenoid Pigments in Wheat Grain.,"Aldehyde Oxidase (AO) enzyme (EC 1.2.3.1) catalyzes the final steps of carotenoid catabolism and it is a key enzyme in the abscisic acid (ABA) biosynthesis. AO isoforms are located in the cytosolic compartment of tissues in many plants, where induce the oxidation of aldehydes into carboxylic acid, and in addition, catalyze the hydroxylation of some heterocycles. The goal of the present study was to characterize the AO genes involved in the accumulation of carotenoid pigments in wheat grain, an important quantitative trait controlled by multiple genes. The cDNAs corresponding to the four AO isoforms from Arabidopsis thaliana and five AO isoforms from Brachypodium distachyon were used as query in 454 sequence assemblies data for Triticum aestivum cv. Chinese Spring (https://urgi.versailles.inra.fr/blast/blast.php) to obtain the partial or whole orthologous wheat AO sequences. Three wheat isoforms, designated AO1, AO2, and AO3 were located on the chromosome groups 2, 5, and 7, respectively, and mapped on two consensus wheat maps by SNP markers located within the AO gene sequences. To validate the possible relationships between AO3 genes and carotenoid accumulation in wheat, the expression levels of AO-A3 and AO-B3 gene were determined during the kernel maturation stage of two durum wheat cultivars, Ciccio and Svevo, characterized by a low and high carotenoid content, respectively. Different AO-A3 gene expression values were observed between the two cultivars indicating that the AO-A3 allele present in Ciccio was more active in carotenoid degradation. A gene marker was developed and can be used for marker-assisted selection in wheat breeding programs.",2017-05-24 +24078710,A user-oriented web crawler for selectively acquiring online content in e-health research.,"

Motivation

Life stories of diseased and healthy individuals are abundantly available on the Internet. Collecting and mining such online content can offer many valuable insights into patients' physical and emotional states throughout the pre-diagnosis, diagnosis, treatment and post-treatment stages of the disease compared with those of healthy subjects. However, such content is widely dispersed across the web. Using traditional query-based search engines to manually collect relevant materials is rather labor intensive and often incomplete due to resource constraints in terms of human query composition and result parsing efforts. The alternative option, blindly crawling the whole web, has proven inefficient and unaffordable for e-health researchers.

Results

We propose a user-oriented web crawler that adaptively acquires user-desired content on the Internet to meet the specific online data source acquisition needs of e-health researchers. Experimental results on two cancer-related case studies show that the new crawler can substantially accelerate the acquisition of highly relevant online content compared with the existing state-of-the-art adaptive web crawling technology. For the breast cancer case study using the full training set, the new method achieves a cumulative precision between 74.7 and 79.4% after 5 h of execution till the end of the 20-h long crawling session as compared with the cumulative precision between 32.8 and 37.0% using the peer method for the same time period. For the lung cancer case study using the full training set, the new method achieves a cumulative precision between 56.7 and 61.2% after 5 h of execution till the end of the 20-h long crawling session as compared with the cumulative precision between 29.3 and 32.4% using the peer method. Using the reduced training set in the breast cancer case study, the cumulative precision of our method is between 44.6 and 54.9%, whereas the cumulative precision of the peer method is between 24.3 and 26.3%; for the lung cancer case study using the reduced training set, the cumulative precisions of our method and the peer method are, respectively, between 35.7 and 46.7% versus between 24.1 and 29.6%. These numbers clearly show a consistently superior accuracy of our method in discovering and acquiring user-desired online content for e-health research.

Availability and implementation

The implementation of our user-oriented web crawler is freely available to non-commercial users via the following Web site: http://bsec.ornl.gov/AdaptiveCrawler.shtml. The Web site provides a step-by-step guide on how to execute the web crawler implementation. In addition, the Web site provides the two study datasets including manually labeled ground truth, initial seeds and the crawling results reported in this article.",2013-09-29 +23725466,PMTED: a plant microRNA target expression database.,"

Background

MicroRNAs (miRNAs) are identified in nearly all plants where they play important roles in development and stress responses by target mRNA cleavage or translation repression. MiRNAs exert their functions by sequence complementation with target genes and hence their targets can be predicted using bioinformatics algorithms. In the past two decades, microarray technology has been employed to study genes involved in important biological processes such as biotic response, abiotic response, and specific tissues and developmental stages, many of which are miRNA targets. Despite their value in assisting research work for plant biologists, miRNA target genes are difficult to access without pre-processing and assistance of necessary analytical and visualization tools because they are embedded in a large body of microarray data that are scattered around in public databases.

Description

Plant MiRNA Target Expression Database (PMTED) is designed to retrieve and analyze expression profiles of miRNA targets represented in the plethora of existing microarray data that are manually curated. It provides a Basic Information query function for miRNAs and their target sequences, gene ontology, and differential expression profiles. It also provides searching and browsing functions for a global Meta-network among species, bioprocesses, conditions, and miRNAs, meta-terms curated from well annotated microarray experiments. Networks are displayed through a Cytoscape Web-based graphical interface. In addition to conserved miRNAs, PMTED provides a target prediction portal for user-defined novel miRNAs and corresponding target expression profile retrieval. Hypotheses that are suggested by miRNA-target networks should provide starting points for further experimental validation.

Conclusions

PMTED exploits value-added microarray data to study the contextual significance of miRNA target genes and should assist functional investigation for both miRNAs and their targets. PMTED will be updated over time and is freely available for non-commercial use at http://pmted.agrinome.org.",2013-06-03 +26286719,A convex formulation for joint RNA isoform detection and quantification from multiple RNA-seq samples.,"

Background

Detecting and quantifying isoforms from RNA-seq data is an important but challenging task. The problem is often ill-posed, particularly at low coverage. One promising direction is to exploit several samples simultaneously.

Results

We propose a new method for solving the isoform deconvolution problem jointly across several samples. We formulate a convex optimization problem that allows to share information between samples and that we solve efficiently. We demonstrate the benefits of combining several samples on simulated and real data, and show that our approach outperforms pooling strategies and methods based on integer programming.

Conclusion

Our convex formulation to jointly detect and quantify isoforms from RNA-seq data of multiple related samples is a computationally efficient approach to leverage the hypotheses that some isoforms are likely to be present in several samples. The software and source code are available at http://cbio.ensmp.fr/flipflop.",2015-08-19 +27354697,Cyclo-lib: a database of computational molecular dynamics simulations of cyclodextrins.,"

Motivation

Cyclodextrins (CDs) are amongst the most versatile/multi-functional molecules used in molecular research and chemical applications. They are natural cyclic oligosaccharides typically employed to encapsulate hydrophobic groups in their central cavity. This allows solubilizing, protecting or reducing the toxicity of a large variety of different molecules including drugs, dyes and surfactant agents. In spite of their great potential, atomic level information of these molecules, which is key for their function, is really scarce. Computational Molecular Dynamics (MD) simulations have the potential to efficiently fill this gap, providing structural-dynamic information at atomic level in time scales ranging from ps to μs.

Results

Cyclo-lib is a database with a publicly accessible web-interface containing structural and dynamic analysis obtained from computational MD simulation trajectories (250 ns long) of native and modified CDs in explicit water molecules. Cyclo-lib currently includes 70 CDs typically employed for fundamental and industrial research. Tools for comparative analysis between different CDs, as well as to restrict the analysis to specific time-segments within the trajectories are also available. Cyclo-lib provides atomic resolution information aimed to complement experimental results performed with the same molecules.

Availability and implementation

The database is freely available under http://cyclo-lib.mduse.com/ CONTACT: Angel.Pineiro@usc.es.",2016-06-27 +27446111,PlantFuncSSR: Integrating First and Next Generation Transcriptomics for Mining of SSR-Functional Domains Markers.,"Analysis of repetitive DNA sequence content and divergence among the repetitive functional classes is a well-accepted approach for estimation of inter- and intra-generic differences in plant genomes. Among these elements, microsatellites, or Simple Sequence Repeats (SSRs), have been widely demonstrated as powerful genetic markers for species and varieties discrimination. We present PlantFuncSSRs platform having more than 364 plant species with more than 2 million functional SSRs. They are provided with detailed annotations for easy functional browsing of SSRs and with information on primer pairs and associated functional domains. PlantFuncSSRs can be leveraged to identify functional-based genic variability among the species of interest, which might be of particular interest in developing functional markers in plants. This comprehensive on-line portal unifies mining of SSRs from first and next generation sequencing datasets, corresponding primer pairs and associated in-depth functional annotation such as gene ontology annotation, gene interactions and its identification from reference protein databases. PlantFuncSSRs is freely accessible at: http://www.bioinfocabd.upo.es/plantssr.",2016-06-27 +23468181,EsPal: one-stop shopping for Spanish word properties.,"This article introduces EsPal: a Web-accessible repository containing a comprehensive set of properties of Spanish words. EsPal is based on an extensible set of data sources, beginning with a 300 million token written database and a 460 million token subtitle database. Properties available include word frequency, orthographic structure and neighborhoods, phonological structure and neighborhoods, and subjective ratings such as imageability. Subword structure properties are also available in terms of bigrams and trigrams, biphones, and bisyllables. Lemma and part-of-speech information and their corresponding frequencies are also indexed. The website enables users either to upload a set of words to receive their properties or to receive a set of words matching constraints on the properties. The properties themselves are easily extensible and will be added over time as they become available. It is freely available from the following website: http://www.bcbl.eu/databases/espal/ .",2013-12-01 +27922074,Resistance gene identification from Larimichthys crocea with machine learning techniques.,"The research on resistance genes (R-gene) plays a vital role in bioinformatics as it has the capability of coping with adverse changes in the external environment, which can form the corresponding resistance protein by transcription and translation. It is meaningful to identify and predict R-gene of Larimichthys crocea (L.Crocea). It is friendly for breeding and the marine environment as well. Large amounts of L.Crocea's immune mechanisms have been explored by biological methods. However, much about them is still unclear. In order to break the limited understanding of the L.Crocea's immune mechanisms and to detect new R-gene and R-gene-like genes, this paper came up with a more useful combination prediction method, which is to extract and classify the feature of available genomic data by machine learning. The effectiveness of feature extraction and classification methods to identify potential novel R-gene was evaluated, and different statistical analyzes were utilized to explore the reliability of prediction method, which can help us further understand the immune mechanisms of L.Crocea against pathogens. In this paper, a webserver called LCRG-Pred is available at http://server.malab.cn/rg_lc/.",2016-12-06 +27001666,Leveraging protein quaternary structure to identify oncogenic driver mutations.,"

Background

Identifying key ""driver"" mutations which are responsible for tumorigenesis is critical in the development of new oncology drugs. Due to multiple pharmacological successes in treating cancers that are caused by such driver mutations, a large body of methods have been developed to differentiate these mutations from the benign ""passenger"" mutations which occur in the tumor but do not further progress the disease. Under the hypothesis that driver mutations tend to cluster in key regions of the protein, the development of algorithms that identify these clusters has become a critical area of research.

Results

We have developed a novel methodology, QuartPAC (Quaternary Protein Amino acid Clustering), that identifies non-random mutational clustering while utilizing the protein quaternary structure in 3D space. By integrating the spatial information in the Protein Data Bank (PDB) and the mutational data in the Catalogue of Somatic Mutations in Cancer (COSMIC), QuartPAC is able to identify clusters which are otherwise missed in a variety of proteins. The R package is available on Bioconductor at: http://bioconductor.jp/packages/3.1/bioc/html/QuartPAC.html .

Conclusion

QuartPAC provides a unique tool to identify mutational clustering while accounting for the complete folded protein quaternary structure.",2016-03-22 +27914557,Refining the relevant population in forensic voice comparison - A response to Hicks et alii (2015) The importance of distinguishing information from evidence/observations when formulating propositions.,"Hicks et alii [Sci. Just. 55 (2015) 520-525. http://dx.doi.org/10.1016/j.scijus.2015.06.008] propose that forensic speech scientists not use the accent of the speaker of questioned identity to refine the relevant population. This proposal is based on a lack of understanding of the realities of forensic voice comparison. If it were implemented, it would make data-based forensic voice comparison analysis within the likelihood ratio framework virtually impossible. We argue that it would also lead forensic speech scientists to present invalid unreliable strength of evidence statements, and not allow them to conduct the tests that would make them aware of this problem.",2016-07-11 +23667806,Protein co-migration database (PCoM -DB) for Arabidopsis thylakoids and Synechocystis cells.,"Protein-protein interactions are critical for most cellular processes; however, many remain to be identified. Here, to comprehensively identify protein complexes in photosynthetic organisms, we applied the recently developed approach of blue native PAGE (BN-PAGE) coupled with LC-MS/MS to the thylakoid proteins of Arabidopsis thaliana and the whole cell proteins of whole cell proteins of Synechocystis sp. PCC 6803. We identified 245 proteins from the purified Arabidopsis thylakoid membranes and 1,458 proteins from the whole cells of Synechocystis using the method. Next, we generated protein migration profiles that were assessed by plotting the label-free estimations of protein abundances versus migration distance in BN-PAGE. Comparisons between the migration profiles of the major photosynthetic complexes and their band patterns showed that the protein migration profiles were well correlated. Thus, the protein migration profiles allowed us to estimate the molecular size of each protein complex and to identify co-migrated proteins with the proteins of interest by determining the protein pairs that contained peaks in the same gel slice. Finally, we built the protein co-migration database for photosynthetic organisms (PCoM-DB: http://pcomdb.lowtem.hokudai.ac.jp/proteins/top) to make our data publicly accessible online, which stores the analyzed data with a user-friendly interface to compare the migration profiles of proteins of interest. It helps users to find unidentified protein complexes in Arabidopsis thylakoids and Synechocystis cells. The accumulation of the data from the BN-PAGE coupled with LC-MS/MS should reveal unidentified protein complexes and should aid in understanding the adaptation and the evolution of photosynthetic organisms.",2013-04-08 +25673340,MESMER: minimal ensemble solutions to multiple experimental restraints.,"

Motivation

Macromolecular structures and interactions are intrinsically heterogeneous, temporally adopting a range of configurations that can confound the analysis of data from bulk experiments. To obtain quantitative insights into heterogeneous systems, an ensemble-based approach can be employed, in which predicted data computed from a collection of models is compared to the observed experimental results. By simultaneously fitting orthogonal structural data (e.g. small-angle X-ray scattering, nuclear magnetic resonance residual dipolar couplings, dipolar electron-electron resonance spectra), the range and population of accessible macromolecule structures can be probed.

Results

We have developed MESMER, software that enables the user to identify ensembles that can recapitulate experimental data by refining thousands of component collections selected from an input pool of potential structures. The MESMER suite includes a powerful graphical user interface (GUI) to streamline usage of the command-line tools, calculate data from structure libraries and perform analyses of conformational and structural heterogeneity. To allow for incorporation of other data types, modular Python plugins enable users to compute and fit data from nearly any type of quantitative experimental data.

Results

Conformational heterogeneity in three macromolecular systems was analyzed with MESMER, demonstrating the utility of the streamlined, user-friendly software.

Availability and implementation

https://code.google.com/p/mesmer/",2015-02-10 +26454277,I-PV: a CIRCOS module for interactive protein sequence visualization.,"

Summary

Today's genome browsers and protein databanks supply vast amounts of information about proteins. The challenge is to concisely bring together this information in an interactive and easy to generate format.

Availability and implementation

We have developed an interactive CIRCOS module called i-PV to visualize user supplied protein sequence, conservation and SNV data in a live presentable format. I-PV can be downloaded from http://www.i-pv.org.

Contact

ibrahim.tanyalcin@i-pv.org, itanyalc@vub.ac.be or support@i-pv.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-10 +26667885,The Evolving Scale and Profile of Cancer Worldwide: Much Ado About Everything.,"Today, cancer is responsible for one in three premature deaths from noncommunicable diseases worldwide, and the number of annual cancer diagnoses will rise to well over 20 million by the year 2030. That cancer is of profound importance to future global health reflects both recent gains in human development as well as mortality transitions that are centuries old. Still, cancer is complex, and the extensive geographical and temporal heterogeneity alerts us to the need for targeted, local approaches to cancer control. The study of trends in specific cancer types remains essential in monitoring and evaluating such strategies and as a descriptive tool for hypothesizing possible contributory factors. Of greatest necessity is an expansion of the availability of high-quality data. To improve the limited cancer incidence data available in low- and middle-income countries (LMIC), the Global Initiative for Cancer Registry Development (http://gicr.iarc.fr) is an international partnership supporting countries to redraw the surveillance map.",2015-12-14 +25505093,diXa: a data infrastructure for chemical safety assessment.,"

Motivation

The field of toxicogenomics (the application of '-omics' technologies to risk assessment of compound toxicities) has expanded in the last decade, partly driven by new legislation, aimed at reducing animal testing in chemical risk assessment but mainly as a result of a paradigm change in toxicology towards the use and integration of genome wide data. Many research groups worldwide have generated large amounts of such toxicogenomics data. However, there is no centralized repository for archiving and making these data and associated tools for their analysis easily available.

Results

The Data Infrastructure for Chemical Safety Assessment (diXa) is a robust and sustainable infrastructure storing toxicogenomics data. A central data warehouse is connected to a portal with links to chemical information and molecular and phenotype data. diXa is publicly available through a user-friendly web interface. New data can be readily deposited into diXa using guidelines and templates available online. Analysis descriptions and tools for interrogating the data are available via the diXa portal.

Availability and implementation

http://www.dixa-fp7.eu

Contact

d.hendrickx@maastrichtuniversity.nl; info@dixa-fp7.eu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-12 +26040457,A web application for the unspecific detection of differentially expressed DNA regions in strand-specific expression data.,"

Unlabelled

Genomic technologies allow laboratories to produce large-scale data sets, either through the use of next-generation sequencing or microarray platforms. To explore these data sets and obtain maximum value from the data, researchers view their results alongside all the known features of a given reference genome. To study transcriptional changes that occur under a given condition, researchers search for regions of the genome that are differentially expressed between different experimental conditions. In order to identify these regions several algorithms have been developed over the years, along with some bioinformatic platforms that enable their use. However, currently available applications for comparative microarray analysis exclusively focus on changes in gene expression within known transcribed regions of predicted protein-coding genes, the changes that occur in non-predictable genetic elements, such as non-coding RNAs. Here, we present a web application for the visualization of strand-specific tiling microarray or next-generation sequencing data that allows customized detection of differentially expressed regions all along the genome in an unspecific manner, that allows identification of all RNA sequences, predictable or not.

Availability and implementation

The web application is freely accessible at http://tilingscan.uv.es/. TilingScan is implemented in PHP and JavaScript.

Contact

vicente.arnau@uv.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-02 +23396300,pseudoMap: an innovative and comprehensive resource for identification of siRNA-mediated mechanisms in human transcribed pseudogenes.,"RNA interference (RNAi) is a gene silencing process within living cells, which is controlled by the RNA-induced silencing complex with a sequence-specific manner. In flies and mice, the pseudogene transcripts can be processed into short interfering RNAs (siRNAs) that regulate protein-coding genes through the RNAi pathway. Following these findings, we construct an innovative and comprehensive database to elucidate siRNA-mediated mechanism in human transcribed pseudogenes (TPGs). To investigate TPG producing siRNAs that regulate protein-coding genes, we mapped the TPGs to small RNAs (sRNAs) that were supported by publicly deep sequencing data from various sRNA libraries and constructed the TPG-derived siRNA-target interactions. In addition, we also presented that TPGs can act as a target for miRNAs that actually regulate the parental gene. To enable the systematic compilation and updating of these results and additional information, we have developed a database, pseudoMap, capturing various types of information, including sequence data, TPG and cognate annotation, deep sequencing data, RNA-folding structure, gene expression profiles, miRNA annotation and target prediction. As our knowledge, pseudoMap is the first database to demonstrate two mechanisms of human TPGs: encoding siRNAs and decoying miRNAs that target the parental gene. pseudoMap is freely accessible at http://pseudomap.mbc.nctu.edu.tw/. Database URL: http://pseudomap.mbc.nctu.edu.tw/",2013-02-08 +27342196,Systematic identification and analysis of frequent gene fusion events in metabolic pathways.,"

Background

Gene fusions are the most powerful type of in silico-derived functional associations. However, many fusion compilations were made when <100 genomes were available, and algorithms for identifying fusions need updating to handle the current avalanche of sequenced genomes. The availability of a large fusion dataset would help probe functional associations and enable systematic analysis of where and why fusion events occur.

Results

Here we present a systematic analysis of fusions in prokaryotes. We manually generated two training sets: (i) 121 fusions in the model organism Escherichia coli; (ii) 131 fusions found in B vitamin metabolism. These sets were used to develop a fusion prediction algorithm that captured the training set fusions with only 7 % false negatives and 50 % false positives, a substantial improvement over existing approaches. This algorithm was then applied to identify 3.8 million potential fusions across 11,473 genomes. The results of the analysis are available in a searchable database at http://modelseed.org/projects/fusions/ . A functional analysis identified 3,000 reactions associated with frequent fusion events and revealed areas of metabolism where fusions are particularly prevalent.

Conclusions

Customary definitions of fusions were shown to be ambiguous, and a stricter one was proposed. Exploring the genes participating in fusion events showed that they most commonly encode transporters, regulators, and metabolic enzymes. The major rationales for fusions between metabolic genes appear to be overcoming pathway bottlenecks, avoiding toxicity, controlling competing pathways, and facilitating expression and assembly of protein complexes. Finally, our fusion dataset provides powerful clues to decipher the biological activities of domains of unknown function.",2016-06-24 +24948510,A novel multi-alignment pipeline for high-throughput sequencing data. ,"Mapping reads to a reference sequence is a common step when analyzing allele effects in high-throughput sequencing data. The choice of reference is critical because its effect on quantitative sequence analysis is non-negligible. Recent studies suggest aligning to a single standard reference sequence, as is common practice, can lead to an underlying bias depending on the genetic distances of the target sequences from the reference. To avoid this bias, researchers have resorted to using modified reference sequences. Even with this improvement, various limitations and problems remain unsolved, which include reduced mapping ratios, shifts in read mappings and the selection of which variants to include to remove biases. To address these issues, we propose a novel and generic multi-alignment pipeline. Our pipeline integrates the genomic variations from known or suspected founders into separate reference sequences and performs alignments to each one. By mapping reads to multiple reference sequences and merging them afterward, we are able to rescue more reads and diminish the bias caused by using a single common reference. Moreover, the genomic origin of each read is determined and annotated during the merging process, providing a better source of information to assess differential expression than simple allele queries at known variant positions. Using RNA-seq of a diallel cross, we compare our pipeline with the single-reference pipeline and demonstrate our advantages of more aligned reads and a higher percentage of reads with assigned origins. Database URL: http://csbio.unc.edu/CCstatus/index.py?run=Pseudo.",2014-06-18 +23389821,RenalTube: a network tool for clinical and genetic diagnosis of primary tubulopathies.,"

Unlabelled

The main purpose was to build a database while facilitating access to genotyping in order to improve the clinical and molecular knowledge of primary tubulopathies. Three tertiary referral centers of Spain collect clinical data through the site http://www.renaltube.com , while offering the analysis of 22 genes corresponding to 23 primary tubulopathies. There are three ways of collaboration: option 1 consists of adding patients to the database with clinical and biochemical information and requesting for genetic study, option 2 requires the payment of a fee for genetic analysis exclusively, and option 3 allows the enrollment of patients with a previously confirmed mutation. After 2 years of activity, RenalTube has collected data from 222 patients, the majority from Spain and Latin America (85.3 %). The most common tubulopathies are distal renal tubular acidosis (22.5 %) and classical Bartter syndrome (19.3 %) followed by familial hypomagnesemia with hypercalciuria and nephrocalcinosis (15.7 %) and Gitelman syndrome (15 %). Option 1 is the collaborating method preferred by doctors (62.3 %) followed by option 3 (36.3 %).

Conclusion

RenalTube is a network-based registry that can be easily reached and filled out worldwide. A web-based approach with a multilateral collaboration scheme enhances the recruitment of data and promotes the understanding of underlying mechanisms of rare inherited diseases, defines more accurate diagnostic and follow-up criteria, develops new molecular techniques and will improve the overall care of the patients.",2013-02-07 +28263393,VoroMQA: Assessment of protein structure quality using interatomic contact areas.,"In the absence of experimentally determined protein structure many biological questions can be addressed using computational structural models. However, the utility of protein structural models depends on their quality. Therefore, the estimation of the quality of predicted structures is an important problem. One of the approaches to this problem is the use of knowledge-based statistical potentials. Such methods typically rely on the statistics of distances and angles of residue-residue or atom-atom interactions collected from experimentally determined structures. Here, we present VoroMQA (Voronoi tessellation-based Model Quality Assessment), a new method for the estimation of protein structure quality. Our method combines the idea of statistical potentials with the use of interatomic contact areas instead of distances. Contact areas, derived using Voronoi tessellation of protein structure, are used to describe and seamlessly integrate both explicit interactions between protein atoms and implicit interactions of protein atoms with solvent. VoroMQA produces scores at atomic, residue, and global levels, all in the fixed range from 0 to 1. The method was tested on the CASP data and compared to several other single-model quality assessment methods. VoroMQA showed strong performance in the recognition of the native structure and in the structural model selection tests, thus demonstrating the efficacy of interatomic contact areas in estimating protein structure quality. The software implementation of VoroMQA is freely available as a standalone application and as a web server at http://bioinformatics.lt/software/voromqa. Proteins 2017; 85:1131-1145. © 2017 Wiley Periodicals, Inc.",2017-03-24 +24244721,"Meta-analysis using a novel database, miRStress, reveals miRNAs that are frequently associated with the radiation and hypoxia stress-responses.","Organisms are often exposed to environmental pressures that affect homeostasis, so it is important to understand the biological basis of stress-response. Various biological mechanisms have evolved to help cells cope with potentially cytotoxic changes in their environment. miRNAs are small non-coding RNAs which are able to regulate mRNA stability. It has been suggested that miRNAs may tip the balance between continued cytorepair and induction of apoptosis in response to stress. There is a wealth of data in the literature showing the effect of environmental stress on miRNAs, but it is scattered in a large number of disparate publications. Meta-analyses of this data would produce added insight into the molecular mechanisms of stress-response. To facilitate this we created and manually curated the miRStress database, which describes the changes in miRNA levels following an array of stress types in eukaryotic cells. Here we describe this database and validate the miRStress tool for analysing miRNAs that are regulated by stress. To validate the database we performed a cross-species analysis to identify miRNAs that respond to radiation. The analysis tool confirms miR-21 and miR-34a as frequently deregulated in response to radiation, but also identifies novel candidates as potentially important players in this stress response, including miR-15b, miR-19b, and miR-106a. Similarly, we used the miRStress tool to analyse hypoxia-responsive miRNAs. The most frequently deregulated miRNAs were miR-210 and miR-21, as expected. Several other miRNAs were also found to be associated with hypoxia, including miR-181b, miR-26a/b, miR-106a, miR-213 and miR-192. Therefore the miRStress tool has identified miRNAs with hitherto unknown or under-appreciated roles in the response to specific stress types. The miRStress tool, which can be used to uncover new insight into the biological roles of miRNAs, and also has the potential to unearth potential biomarkers for therapeutic response, is freely available at http://mudshark.brookes.ac.uk/MirStress.",2013-11-14 +23193278,The Rice Genome Knowledgebase (RGKbase): an annotation database for rice comparative genomics and evolutionary biology.,"Over the past 10 years, genomes of cultivated rice cultivars and their wild counterparts have been sequenced although most efforts are focused on genome assembly and annotation of two major cultivated rice (Oryza sativa L.) subspecies, 93-11 (indica) and Nipponbare (japonica). To integrate information from genome assemblies and annotations for better analysis and application, we now introduce a comparative rice genome database, the Rice Genome Knowledgebase (RGKbase, http://rgkbase.big.ac.cn/RGKbase/). RGKbase is built to have three major components: (i) integrated data curation for rice genomics and molecular biology, which includes genome sequence assemblies, transcriptomic and epigenomic data, genetic variations, quantitative trait loci (QTLs) and the relevant literature; (ii) User-friendly viewers, such as Gbrowse, GeneBrowse and Circos, for genome annotations and evolutionary dynamics and (iii) Bioinformatic tools for compositional and synteny analyses, gene family classifications, gene ontology terms and pathways and gene co-expression networks. RGKbase current includes data from five rice cultivars and species: Nipponbare (japonica), 93-11 (indica), PA64s (indica), the African rice (Oryza glaberrima) and a wild rice species (Oryza brachyantha). We are also constantly introducing new datasets from variety of public efforts, such as two recent releases-sequence data from ∼1000 rice varieties, which are mapped into the reference genome, yielding ample high-quality single-nucleotide polymorphisms and insertions-deletions.",2012-11-28 +27057769,Online Calculator to Improve Counseling of Short-Term Neonatal Morbidity and Mortality Outcomes at Extremely Low Gestational Age (23-28 Weeks).,"Objective Extremely low gestational age (ELGA) infants are at high risk of perinatal and neonatal morbidity and mortality. Accurate and relevant data are essential for developing a health care plan and providing realistic estimates of infants' outcomes. Study Design Retrospective analysis of all infants delivered between 23(0/7) and 28(6/7) weeks' gestation over 11 years at a single center. Using logistic regression analysis, gestational age (GA)-specific mortality and morbidity rates, and the effects of gender, antenatal corticosteroids, multiple gestation, and birth weight (BW) were determined. Results Of the 766 study infants, 644 (84.1%) were admitted to the neonatal intensive care unit, of which 502 (75.8%) survived to discharge. GA, antenatal corticosteroids, and BW were significant predictors of survival (GA: odds ratio [OR] = 1.83, 95% confidence interval [CI] = 1.64-2.04; corticosteroids: OR = 7.62, 95% CI = 5.19-11.18; BW: OR = 1.56, 95% CI = 1.44-1.69). Increasing BW correlated with a decreasing mortality rate. Conclusion This study provides recent outcome data of ELGA infants delivered at a tertiary level center. The results have been translated into an online counseling tool (http://murmuring-brook-6600.herokuapp.com/ELGA.html).",2016-04-08 +27258798,Treatment of bacterial vaginosis in pregnancy in order to reduce the risk of spontaneous preterm delivery - a clinical recommendation.,"

Introduction

Bacterial vaginosis (BV) is characterized by a dysbiosis of the vaginal microbiota with a depletion of Lactobacillus spp. In pregnancy, prevalence's between 7 and 30% have been reported depending on the study population and the definition. BV may be associated with an increased risk of spontaneous preterm delivery (sPTD). However, it is controversial whether or not BV-positive pregnant women will benefit from treatment to reduce the risk of sPTD. We could not identify any good-quality guideline addressing this issue. Consequently we aimed to produce this clinical recommendation based on GRADE.

Material and methods

Systematic literature searches were conducted in the following databases: Guidelines International Network: G-I-N, Medline, Embase, The Cochrane Database of Systematic Reviews, Web of Science and http://www.clinicaltrials.gov from 1999 to 3 October 2014. Hence, nine guidelines, 34 reviews, 18 randomized controlled trials and 12 observational studies were included.

Results

The GRADE quality of evidence was consistently low or very low, primarily because none of the risk ratios (RR) for the risk of sPTD at <37 weeks were statistically significant. Concerning treatment with metronidazole, RR was 1.11 (95% CI 0.93-1.34) in low-risk pregnancies and 0.96 (95% CI 0.78-1.18) in high risk pregnancies. Concerning treatment with clindamycin at any gestational age, the RR was 0.87 (95% CI 0.73-1.05).

Conclusion

This systematic review gives a strong recommendation against treatment with metronidazole and a weak recommendation against treatment with clindamycin to reduce the sPTD rate in both high-risk and low-risk pregnancies with BV.",2016-06-23 +26335208,T-REx: Transcriptome analysis webserver for RNA-seq Expression data.,"

Background

Transcriptomics analyses of bacteria (and other organisms) provide global as well as detailed information on gene expression levels and, consequently, on other processes in the cell. RNA sequencing (RNA-seq) has over the past few years become the most accurate method for global transcriptome measurements and for the identification of novel RNAs. This development has been accompanied by advances in the bioinformatics methods, tools and software packages that deal with the analysis of the large data sets resulting from RNA-seq efforts.

Results

Based on years of experience in analyzing transcriptome data, we developed a user-friendly webserver that performs the statistical analysis on the gene expression values generated by RNA-seq. It also provides the user with a whole range of data plots. We benchmarked our RNA-seq pipeline, T-REx, using a case study of CodY mutants of Bacillus subtilis and show that it could easily and automatically reproduce the statistical analysis of the cognate publication. Furthermore, by mining the correlation matrices, k-means clusters and heatmaps generated by T-REx we observed interesting gene-behavior and identified sub-groups in the CodY regulon.

Conclusion

T-REx is a parameter-free statistical analysis pipeline for RNA-seq gene expression data that is dedicated for use by biologists and bioinformaticians alike. The tables and figures produced by T-REx are in most cases sufficient to accurately mine the statistical results. In addition to the stand-alone version, we offer a user-friendly webserver that only needs basic input ( http://genome2d.molgenrug.nl ).",2015-09-03 +22434838,PRIDE: quality control in a proteomics data repository.,"The PRoteomics IDEntifications (PRIDE) database is a large public proteomics data repository, containing over 270 million mass spectra (by November 2011). PRIDE is an archival database, providing the proteomics data supporting specific scientific publications in a computationally accessible manner. While PRIDE faces rapid increases in data deposition size as well as number of depositions, the major challenge is to ensure a high quality of data depositions in the context of highly diverse proteomics work flows and data representations. Here, we describe the PRIDE curation pipeline and its practical application in quality control of complex data depositions. DATABASE URL: http://www.ebi.ac.uk/pride/.",2012-03-20 +24271397,KBDOCK 2013: a spatial classification of 3D protein domain family interactions.,"Comparing, classifying and modelling protein structural interactions can enrich our understanding of many biomolecular processes. This contribution describes Kbdock (http://kbdock.loria.fr/), a database system that combines the Pfam domain classification with coordinate data from the PDB to analyse and model 3D domain-domain interactions (DDIs). Kbdock can be queried using Pfam domain identifiers, protein sequences or 3D protein structures. For a given query domain or pair of domains, Kbdock retrieves and displays a non-redundant list of homologous DDIs or domain-peptide interactions in a common coordinate frame. Kbdock may also be used to search for and visualize interactions involving different, but structurally similar, Pfam families. Thus, structural DDI templates may be proposed even when there is little or no sequence similarity to the query domains.",2013-11-23 +23696374,HAPLOFIND: a new method for high-throughput mtDNA haplogroup assignment.,"Deep sequencing technologies are completely revolutionizing the approach to DNA analysis. Mitochondrial DNA (mtDNA) studies entered in the ""postgenomic era"": the burst in sequenced samples observed in nuclear genomics is expected also in mitochondria, a trend that can already be detected checking complete mtDNA sequences database submission rate. Tools for the analysis of these data are available, but they fail in throughput or in easiness of use. We present here a new pipeline based on previous algorithms, inherited from the ""nuclear genomic toolbox,"" combined with a newly developed algorithm capable of efficiently and easily classify new mtDNA sequences according to PhyloTree nomenclature. Detected mutations are also annotated using data collected from publicly available databases. Thanks to the analysis of all freely available sequences with known haplogroup obtained from GenBank, we were able to produce a PhyloTree-based weighted tree, taking into account each haplogroup pattern conservation. The combination of a highly efficient aligner, coupled with our algorithm and massive usage of asynchronous parallel processing, allowed us to build a high-throughput pipeline for the analysis of mtDNA sequences that can be quickly updated to follow the ever-changing nomenclature. HaploFind is freely accessible at the following Web address: https://haplofind.unibo.it.",2013-06-12 +22272252,LipidXplorer: a software for consensual cross-platform lipidomics.,"LipidXplorer is the open source software that supports the quantitative characterization of complex lipidomes by interpreting large datasets of shotgun mass spectra. LipidXplorer processes spectra acquired on any type of tandem mass spectrometers; it identifies and quantifies molecular species of any ionizable lipid class by considering any known or assumed molecular fragmentation pathway independently of any resource of reference mass spectra. It also supports any shotgun profiling routine, from high throughput top-down screening for molecular diagnostic and biomarker discovery to the targeted absolute quantification of low abundant lipid species. Full documentation on installation and operation of LipidXplorer, including tutorial, collection of spectra interpretation scripts, FAQ and user forum are available through the wiki site at: https://wiki.mpi-cbg.de/wiki/lipidx/index.php/Main_Page.",2012-01-17 +26870755,Proteome-wide dataset supporting the study of ancient metazoan macromolecular complexes.,"Our analysis examines the conservation of multiprotein complexes among metazoa through use of high resolution biochemical fractionation and precision mass spectrometry applied to soluble cell extracts from 5 representative model organisms Caenorhabditis elegans, Drosophila melanogaster, Mus musculus, Strongylocentrotus purpuratus, and Homo sapiens. The interaction network obtained from the data was validated globally in 4 distant species (Xenopus laevis, Nematostella vectensis, Dictyostelium discoideum, Saccharomyces cerevisiae) and locally by targeted affinity-purification experiments. Here we provide details of our massive set of supporting biochemical fractionation data available via ProteomeXchange (PXD002319-PXD002328), PPIs via BioGRID (185267); and interaction network projections via (http://metazoa.med.utoronto.ca) made fully accessible to allow further exploration. The datasets here are related to the research article on metazoan macromolecular complexes in Nature [1].",2015-12-12 +26072512,Misassembly detection using paired-end sequence reads and optical mapping data.,"

Motivation

A crucial problem in genome assembly is the discovery and correction of misassembly errors in draft genomes. We develop a method called misSEQuel that enhances the quality of draft genomes by identifying misassembly errors and their breakpoints using paired-end sequence reads and optical mapping data. Our method also fulfills the critical need for open source computational methods for analyzing optical mapping data. We apply our method to various assemblies of the loblolly pine, Francisella tularensis, rice and budgerigar genomes. We generated and used stimulated optical mapping data for loblolly pine and F.tularensis and used real optical mapping data for rice and budgerigar.

Results

Our results demonstrate that we detect more than 54% of extensively misassembled contigs and more than 60% of locally misassembled contigs in assemblies of F.tularensis and between 31% and 100% of extensively misassembled contigs and between 57% and 73% of locally misassembled contigs in assemblies of loblolly pine. Using the real optical mapping data, we correctly identified 75% of extensively misassembled contigs and 100% of locally misassembled contigs in rice, and 77% of extensively misassembled contigs and 80% of locally misassembled contigs in budgerigar.

Availability and implementation

misSEQuel can be used as a post-processing step in combination with any genome assembler and is freely available at http://www.cs.colostate.edu/seq/.",2015-06-01 +27330139,RNAlien - Unsupervised RNA family model construction.,"Determining the function of a non-coding RNA requires costly and time-consuming wet-lab experiments. For this reason, computational methods which ascertain the homology of a sequence and thereby deduce functionality and family membership are often exploited. In this fashion, newly sequenced genomes can be annotated in a completely computational way. Covariance models are commonly used to assign novel RNA sequences to a known RNA family. However, to construct such models several examples of the family have to be already known. Moreover, model building is the work of experts who manually edit the necessary RNA alignment and consensus structure. Our method, RNAlien, starting from a single input sequence collects potential family member sequences by multiple iterations of homology search. RNA family models are fully automatically constructed for the found sequences. We have tested our method on a subset of the Rfam RNA family database. RNAlien models are a starting point to construct models of comparable sensitivity and specificity to manually curated ones from the Rfam database. RNAlien Tool and web server are available at http://rna.tbi.univie.ac.at/rnalien/.",2016-06-21 +28518173,Simultaneous quantification of N- and O-glycans using a solid-phase method.,"Glycosylation has a pivotal role in a diverse range of biological activities, modulating the structure and function of proteins. Glycogens coupled to the nitrogen atom (N-linked) of asparagine side chains or to the oxygen atom (O-linked) of serine and threonine side chains represent the two major protein glycosylation forms. N-glycans can be released by glycosidases, whereas O-glycans are often cleaved by chemical reaction. However, it is challenging to combine these enzymatic and chemical reactions in order to analyze both N- and O-glycans. We recently developed a glycoprotei n immobilization for glycan extraction (GIG) method that allows for the simultaneous analysis of N- and O-glycans on a solid support. GIG enables quantitative analysis of N-glycans and O-glycans from a single specimen and can be applied to a high-throughput automated platform. Here we provide a step-by-step GIG protocol that includes procedures for (i) protein immobilization on an aldehyde-active solid support by reductive amination; (ii) stabilization of fragile sialic acids by carbodiimide coupling; (iii) release of N-glycans by PNGase F digestion; (iv) release of O-glycans by β-elimination using ammonia in the presence of 1-phenyl-3-methyl-5-pyrazolone (PMP) to prevent alditol peeling from O-glycans; (v) mass spectrometry (MS) analysis; and (vi) data analysis for identification of glycans using in-house developed software (GIG Tool; free to download via http://www.biomarkercenter.org/gigtool). The GIG tool extracts precursor masses, oxonium ions and glycan fragments from tandem (liquid chromatography (LC)-MS/MS) mass spectra for glycan identification, and reporter ions from quaternary amine containing isobaric tag for glycan (QUANTITY) isobaric tags are used for quantification of the relative abundance of N-glycans. The GIG protocol takes ∼3 d.",2017-05-18 +27319297,ORION: a web server for protein fold recognition and structure prediction using evolutionary hybrid profiles.,"Protein structure prediction based on comparative modeling is the most efficient way to produce structural models when it can be performed. ORION is a dedicated webserver based on a new strategy that performs this task. The identification by ORION of suitable templates is performed using an original profile-profile approach that combines sequence and structure evolution information. Structure evolution information is encoded into profiles using structural features, such as solvent accessibility and local conformation -with Protein Blocks-, which give an accurate description of the local protein structure. ORION has recently been improved, increasing by 5% the quality of its results. The ORION web server accepts a single protein sequence as input and searches homologous protein structures within minutes. Various databases such as PDB, SCOP and HOMSTRAD can be mined to find an appropriate structural template. For the modeling step, a protein 3D structure can be directly obtained from the selected template by MODELLER and displayed with global and local quality model estimation measures. The sequence and the predicted structure of 4 examples from the CAMEO server and a recent CASP11 target from the 'Hard' category (T0818-D1) are shown as pertinent examples. Our web server is accessible at http://www.dsimb.inserm.fr/ORION/.",2016-06-20 +26231214,POTION: an end-to-end pipeline for positive Darwinian selection detection in genome-scale data through phylogenetic comparison of protein-coding genes.,"

Background

Detection of genes evolving under positive Darwinian evolution in genome-scale data is nowadays a prevailing strategy in comparative genomics studies to identify genes potentially involved in adaptation processes. Despite the large number of studies aiming to detect and contextualize such gene sets, there is virtually no software available to perform this task in a general, automatic, large-scale and reliable manner. This certainly occurs due to the computational challenges involved in this task, such as the appropriate modeling of data under analysis, the computation time to perform several of the required steps when dealing with genome-scale data and the highly error-prone nature of the sequence and alignment data structures needed for genome-wide positive selection detection.

Results

We present POTION, an open source, modular and end-to-end software for genome-scale detection of positive Darwinian selection in groups of homologous coding sequences. Our software represents a key step towards genome-scale, automated detection of positive selection, from predicted coding sequences and their homology relationships to high-quality groups of positively selected genes. POTION reduces false positives through several sophisticated sequence and group filters based on numeric, phylogenetic, quality and conservation criteria to remove spurious data and through multiple hypothesis corrections, and considerably reduces computation time thanks to a parallelized design. Our software achieved a high classification performance when used to evaluate a curated dataset of Trypanosoma brucei paralogs previously surveyed for positive selection. When used to analyze predicted groups of homologous genes of 19 strains of Mycobacterium tuberculosis as a case study we demonstrated the filters implemented in POTION to remove sources of errors that commonly inflate errors in positive selection detection. A thorough literature review found no other software similar to POTION in terms of customization, scale and automation.

Conclusion

To the best of our knowledge, POTION is the first tool to allow users to construct and check hypotheses regarding the occurrence of site-based evidence of positive selection in non-curated, genome-scale data within a feasible time frame and with no human intervention after initial configuration. POTION is available at http://www.lmb.cnptia.embrapa.br/share/POTION/.",2015-08-01 +27995669,PERCH: A Unified Framework for Disease Gene Prioritization.,"To interpret genetic variants discovered from next-generation sequencing, integration of heterogeneous information is vital for success. This article describes a framework named PERCH (Polymorphism Evaluation, Ranking, and Classification for a Heritable trait), available at http://BJFengLab.org/. It can prioritize disease genes by quantitatively unifying a new deleteriousness measure called BayesDel, an improved assessment of the biological relevance of genes to the disease, a modified linkage analysis, a novel rare-variant association test, and a converted variant call quality score. It supports data that contain various combinations of extended pedigrees, trios, and case-controls, and allows for a reduced penetrance, an elevated phenocopy rate, liability classes, and covariates. BayesDel is more accurate than PolyPhen2, SIFT, FATHMM, LRT, Mutation Taster, Mutation Assessor, PhyloP, GERP++, SiPhy, CADD, MetaLR, and MetaSVM. The overall approach is faster and more powerful than the existing quantitative method pVAAST, as shown by the simulations of challenging situations in finding the missing heritability of a complex disease. This framework can also classify variants of unknown significance (variants of uncertain significance) by quantitatively integrating allele frequencies, deleteriousness, association, and co-segregation. PERCH is a versatile tool for gene prioritization in gene discovery research and variant classification in clinical genetic testing.",2017-01-28 +,ADDENDUM,"The Spiegel Online (http://www.spiegel.de/international/) by Philip Bethge on May 5, 2012, was titled “A Future of Self-Surveillance? Tech Pioneers Track Bodily Functions Day and Night.” Accordingly, the authors of the foregoing article (Halberg et al) trust that those concerned with self-tracking will include the self-surveillance of blood pressure and heart rate, analyzed chronobiologically in repeated passes over the accumulating data at systematically selected times during their entire lifespans. In this endeavor, they can focus on vascular variability disorders as a first step. Thereby, one immediately gauges undue loads (ie, strain) and is informed as to their associations that can provide clues for stress relief; one also detects changes in the risk of severe disease and disease-related events, such as stroke (within 6 years, from <5% to near 100%, in one cited study); those treated for high blood pressure will not fly blind to risks induced by the medication (Figure A) that cannot be detected by office visits.",2012-05-01 +27223594,Patients with KCNJ11-related diabetes frequently have neuropsychological impairments compared with sibling controls.,"

Aims

KCNJ11-related diabetes is the most common form of permanent neonatal diabetes and has been associated with a spectrum of neurodevelopmental problems. We compared neurodevelopmental outcomes in patients with KCNJ11 mutations and their sibling controls.

Methods

Through our Monogenic Diabetes Registry (http://monogenicdiabetes.uchicago.edu/), we evaluated 23 patients with KCNJ11 mutations with (n = 9) and without (n = 14) global developmental delay successfully treated with sulfonylurea and 20 healthy sibling controls, using a battery of targeted neuropsychological and behavioural assessments with scaled scores that are comparable across a wide range of ages.

Results

Patients with KCNJ11-related diabetes without global developmental delay had significant differences compared with sibling controls on a range of assessments including IQ, measures of academic achievement and executive function. KCNJ11 patients with global delay exhibited significant differences in behavioural symptoms with a tendency to avoid social contact and displayed a reduced ability to adapt to new circumstances. Parents reported more immature behaviour, gross mood swings, bizarre thoughts, other unusual and severe behaviours, and there were also significant deficits in all subdomains of daily living skills.

Conclusions

This series represents the largest and most comprehensive study of neuropsychological and behavioural dysfunction of individuals with KCNJ11 diabetes and is the first to compare outcome with sibling controls. Our data demonstrate the variety of neurodevelopmental problems seen in those with KCNJ11 mutations, even in those without recognized global developmental delays. These data can be used to counsel families and guide structured neurodevelopmental assessments and treatments based on the initial genetic diagnosis in patients with neonatal diabetes.",2016-06-22 +26244889,FROG - Fingerprinting Genomic Variation Ontology.,"Genetic variations play a crucial role in differential phenotypic outcomes. Given the complexity in establishing this correlation and the enormous data available today, it is imperative to design machine-readable, efficient methods to store, label, search and analyze this data. A semantic approach, FROG: ""FingeRprinting Ontology of Genomic variations"" is implemented to label variation data, based on its location, function and interactions. FROG has six levels to describe the variation annotation, namely, chromosome, DNA, RNA, protein, variations and interactions. Each level is a conceptual aggregation of logically connected attributes each of which comprises of various properties for the variant. For example, in chromosome level, one of the attributes is location of variation and which has two properties, allosomes or autosomes. Another attribute is variation kind which has four properties, namely, indel, deletion, insertion, substitution. Likewise, there are 48 attributes and 278 properties to capture the variation annotation across six levels. Each property is then assigned a bit score which in turn leads to generation of a binary fingerprint based on the combination of these properties (mostly taken from existing variation ontologies). FROG is a novel and unique method designed for the purpose of labeling the entire variation data generated till date for efficient storage, search and analysis. A web-based platform is designed as a test case for users to navigate sample datasets and generate fingerprints. The platform is available at http://ab-openlab.csir.res.in/frog.",2015-08-05 +25910696,Data-dependent bucketing improves reference-free compression of sequencing reads.,"

Motivation

The storage and transmission of high-throughput sequencing data consumes significant resources. As our capacity to produce such data continues to increase, this burden will only grow. One approach to reduce storage and transmission requirements is to compress this sequencing data.

Results

We present a novel technique to boost the compression of sequencing that is based on the concept of bucketing similar reads so that they appear nearby in the file. We demonstrate that, by adopting a data-dependent bucketing scheme and employing a number of encoding ideas, we can achieve substantially better compression ratios than existing de novo sequence compression tools, including other bucketing and reordering schemes. Our method, Mince, achieves up to a 45% reduction in file sizes (28% on average) compared with existing state-of-the-art de novo compression schemes.

Availability and implementation

Mince is written in C++11, is open source and has been made available under the GPLv3 license. It is available at http://www.cs.cmu.edu/∼ckingsf/software/mince.

Contact

carlk@cs.cmu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-24 +27115029,Longitudinal data on cortical thickness before and after working memory training.,"The data and supplementary information provided in this article relate to our research article ""Task complexity and location specific changes of cortical thickness in executive and salience networks after working memory training"" (Metzler-Baddeley et al., 2016) [1]. We provide cortical thickness and subcortical volume data derived from parieto-frontal cortical regions and the basal ganglia with the FreeSurfer longitudinal analyses stream (http://surfer.nmr.mgh.harvard.edu [2]) before and after Cogmed working memory training (Cogmed and Cogmed Working Memory Training, 2012) [3]. This article also provides supplementary information to the research article, i.e., within-group comparisons between baseline and outcome cortical thickness and subcortical volume measures, between-group tests of performance changes in cognitive benchmark tests (www.cambridgebrainsciences.com [4]), correlation analyses between performance changes in benchmark tests and training-related structural changes, correlation analyses between the time spent training and structural changes, a scatterplot of the relationship between cortical thickness measures derived from the occipital lobe as control region and the chronological order of the MRI sessions to assess potential scanner drift effects and a post-hoc vertex-wise whole brain analysis with FreeSurfer Qdec (https://surfer.nmr.mgh.harvard.edu/fswiki/Qdec [5]).",2016-04-02 +24912499,REGNET: mining context-specific human transcription networks using composite genomic information.,"

Background

Genome-wide expression profiles reflect the transcriptional networks specific to the given cell context. However, most statistical models try to estimate the average connectivity of the networks from a collection of gene expression data, and are unable to characterize the context-specific transcriptional regulations. We propose an approach for mining context-specific transcription networks from a large collection of gene expression fold-change profiles and composite gene-set information.

Results

Using a composite gene-set analysis method, we combine the information of transcription factor binding sites, Gene Ontology or pathway gene sets and gene expression fold-change profiles for a variety of cell conditions. We then collected all the significant patterns and constructed a database of context-specific transcription networks for human (REGNET). As a result, context-specific roles of transcription factors as well as their functional targets are readily explored. To validate the approach, nine predicted targets of E2F1 in HeLa cells were tested using chromatin immunoprecipitation assay. Among them, five (Gadd45b, Dusp6, Mll5, Bmp2 and E2f3) were successfully bound by E2F1. c-JUN and the EMT transcription networks were also validated from literature.

Conclusions

REGNET is a useful tool for exploring the ternary relationships among the transcription factors, their functional targets and the corresponding cell conditions. It is able to provide useful clues for novel cell-specific transcriptional regulations. The REGNET database is available at http://mgrc.kribb.re.kr/regnet.",2014-06-09 +24146757,SIDD: a semantically integrated database towards a global view of human disease.,"

Background

A number of databases have been developed to collect disease-related molecular, phenotypic and environmental features (DR-MPEs), such as genes, non-coding RNAs, genetic variations, drugs, phenotypes and environmental factors. However, each of current databases focused on only one or two DR-MPEs. There is an urgent demand to develop an integrated database, which can establish semantic associations among disease-related databases and link them to provide a global view of human disease at the biological level. This database, once developed, will facilitate researchers to query various DR-MPEs through disease, and investigate disease mechanisms from different types of data.

Methodology

To establish an integrated disease-associated database, disease vocabularies used in different databases are mapped to Disease Ontology (DO) through semantic match. 4,284 and 4,186 disease terms from Medical Subject Headings (MeSH) and Online Mendelian Inheritance in Man (OMIM) respectively are mapped to DO. Then, the relationships between DR-MPEs and diseases are extracted and merged from different source databases for reducing the data redundancy.

Conclusions

A semantically integrated disease-associated database (SIDD) is developed, which integrates 18 disease-associated databases, for researchers to browse multiple types of DR-MPEs in a view. A web interface allows easy navigation for querying information through browsing a disease ontology tree or searching a disease term. Furthermore, a network visualization tool using Cytoscape Web plugin has been implemented in SIDD. It enhances the SIDD usage when viewing the relationships between diseases and DR-MPEs. The current version of SIDD (Jul 2013) documents 4,465,131 entries relating to 139,365 DR-MPEs, and to 3,824 human diseases. The database can be freely accessed from: http://mlg.hit.edu.cn/SIDD.",2013-10-11 +25815061,Bacterial rose garden for metagenomic SNP-based phylogeny visualization.,"

Background

One of the most challenging tasks in genomic analysis nowadays is metagenomics. Biomedical applications of metagenomics give rise to datasets containing hundreds and thousands of samples from various body sites for hundreds of patients. Inherently metagenome is by far more complex than a single genome as it varies in time by the amount of bacteria comprising it. Other levels of data complexity include geography of the samples and phylogenetic distance between the genomes of the same operational taxonomic unit (OTU). We have developed the visualization concept for the representation of multilayer metagenomics data - the bacterial rose garden. The approach allows to display the taxonomic distance between the representatives of the same OTU in different samples and use variety of the metadata for display.

Results

We have developed the principle of visualization allowing for multilayer information representation. We have incorporated data on OTU diversity across metagenomes and origin of the samples. The visual representation we have called ""rose"" is focused on the phylogenetic distance between the representatives of the same OTU. The visual representation is realized as interactive data chart which allows user to interact with data and explore variables. It is known that classical representation of the taxonomic tree is a reduction of information from original pairwise distance matrix. The visualization presented is a way to save all the information available through projection of distance matrix into single dimensional space of one sample. It could serve as a basis for further more complex information representation. We have used the principle proposed for visualization of 101 bacterial OTUs phylogenetic distances, finally we provide open code for the web page generation.

Conclusions

Bacterial rose garden is a versatile visualization principle coping with the major difficulties of metagenomic big-data visualization without loss of data. The method proposed is showing the interconnectedness of variables and is realized as user-friendly web page allowing for dynamic data exploration. The concept provided serves as one of the original approaches for metagenomic data representation and sharing. Full functional prototype could be found at http://rosegarden.datalaboratory.ru.",2015-03-21 +26787664,Piecewise parameter estimation for stochastic models in COPASI.,"

Motivation

Computational modeling is widely used for deepening the understanding of biological processes. Parameterizing models to experimental data needs computationally efficient techniques for parameter estimation. Challenges for parameter estimation include in general the high dimensionality of the parameter space with local minima and in specific for stochastic modeling the intrinsic stochasticity.

Results

We implemented the recently suggested multiple shooting for stochastic systems (MSS) objective function for parameter estimation in stochastic models into COPASI. This MSS objective function can be used for parameter estimation in stochastic models but also shows beneficial properties when used for ordinary differential equation models. The method can be applied with all of COPASI's optimization algorithms, and can be used for SBML models as well.

Availability and implementation

The methodology is available in COPASI as of version 4.15.95 and can be downloaded from http://www.copasi.org

Contact

frank.bergmann@bioquant.uni-heidelberg.de or fbergman@caltech.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-18 +23071747,TRIP database 2.0: a manually curated information hub for accessing TRP channel interaction network.,"Transient receptor potential (TRP) channels are a family of Ca(2+)-permeable cation channels that play a crucial role in biological and disease processes. To advance TRP channel research, we previously created the TRIP (TRansient receptor potential channel-Interacting Protein) Database, a manually curated database that compiles scattered information on TRP channel protein-protein interactions (PPIs). However, the database needs to be improved for information accessibility and data utilization. Here, we present the TRIP Database 2.0 (http://www.trpchannel.org) in which many helpful, user-friendly web interfaces have been developed to facilitate knowledge acquisition and inspire new approaches to studying TRP channel functions: 1) the PPI information found in the supplementary data of referred articles was curated; 2) the PPI summary matrix enables users to intuitively grasp overall PPI information; 3) the search capability has been expanded to retrieve information from 'PubMed' and 'PIE the search' (a specialized search engine for PPI-related articles); and 4) the PPI data are available as sif files for network visualization and analysis using 'Cytoscape'. Therefore, our TRIP Database 2.0 is an information hub that works toward advancing data-driven TRP channel research.",2012-10-11 +25326082,"A PDB-wide, evolution-based assessment of protein-protein interfaces.","

Background

Thanks to the growth in sequence and structure databases, more than 50 million sequences are now available in UniProt and 100,000 structures in the PDB. Rich information about protein-protein interfaces can be obtained by a comprehensive study of protein contacts in the PDB, their sequence conservation and geometric features.

Results

An automated computational pipeline was developed to run our Evolutionary Protein-Protein Interface Classifier (EPPIC) software on the entire PDB and store the results in a relational database, currently containing > 800,000 interfaces. This allows the analysis of interface data on a PDB-wide scale. Two large benchmark datasets of biological interfaces and crystal contacts, each containing about 3000 entries, were automatically generated based on criteria thought to be strong indicators of interface type. The BioMany set of biological interfaces includes NMR dimers solved as crystal structures and interfaces that are preserved across diverse crystal forms, as catalogued by the Protein Common Interface Database (ProtCID) from Xu and Dunbrack. The second dataset, XtalMany, is derived from interfaces that would lead to infinite assemblies and are therefore crystal contacts. BioMany and XtalMany were used to benchmark the EPPIC approach. The performance of EPPIC was also compared to classifications from the Protein Interfaces, Surfaces, and Assemblies (PISA) program on a PDB-wide scale, finding that the two approaches give the same call in about 88% of PDB interfaces. By comparing our safest predictions to the PDB author annotations, we provide a lower-bound estimate of the error rate of biological unit annotations in the PDB. Additionally, we developed a PyMOL plugin for direct download and easy visualization of EPPIC interfaces for any PDB entry. Both the datasets and the PyMOL plugin are available at http://www.eppic-web.org/ewui/\#downloads.

Conclusions

Our computational pipeline allows us to analyze protein-protein contacts and their sequence conservation across the entire PDB. Two new benchmark datasets are provided, which are over an order of magnitude larger than existing manually curated ones. These tools enable the comprehensive study of several aspects of protein-protein contacts in the PDB and represent a basis for future, even larger scale studies of protein-protein interactions.",2014-10-18 +27013430,"Influenza during pregnancy: Incidence, vaccination coverage and attitudes toward vaccination in the French web-based cohort G-GrippeNet.","

Introduction

Pregnancy is a risk factor for severe influenza. However, data on influenza incidence during pregnancy are scarce. Likewise, no data are available on influenza vaccine coverage in France since national recommendation in 2012. We aimed to assess these points using a novel nationwide web-based surveillance system, G-GrippeNet.

Methods

During the 2014/2015 influenza season, pregnant women living in metropolitan France were enrolled through a web platform (https://www.grippenet.fr/). Throughout the season, participants were asked to report, on a weekly basis, if they had experienced symptoms of influenza-like-illness (ILI). ILI episodes reported were used to calculate incidence density rates based on period of participation from each participant. Vaccination coverage was estimated after weighing on age and education level from national data on pregnant women. Factors associated with higher vaccination coverage were obtained through a logistic regression with Odds Ratio (OR) corrected with the Zhang and Yu method.

Results

A total of 153 women were enrolled. ILI incidence density rate was 1.8 per 100 person-week (95% CI, 1.5-2.1). This rate was higher in women older than 40 years (RR = 3.0, 95% CI [1.1-8.3], p = 0.03) and during first/second trimesters compared to third trimester (RR = 4.0, 95% CI [1.4-12.0], p = 0.01). Crude vaccination coverage was 39% (95% CI, 31-47) and weighted vaccination coverage was estimated at 26% (95% CI, 20-34). Health care provider recommendation for vaccination (corrected OR = 7.8; 95% CI [3.0-17.1]) and non-smoking status (cOR = 2.1; 95% CI [1.2-6.9]) were associated with higher vaccine uptake.

Conclusion

This original web based longitudinal surveillance study design proved feasible in pregnant women population. First results are of interest and underline that public health policies should emphasize the vaccination promotion through health care providers.",2016-03-21 +24406170,PeptiSite: a structural database of peptide binding sites in 4D.,"We developed PeptiSite, a comprehensive and reliable database of biologically and structurally characterized peptide-binding sites, in which each site is represented by an ensemble of its complexes with protein, peptide and small molecule partners. The unique features of the database include: (1) the ensemble site representation that provides a fourth dimension to the otherwise three dimensional data, (2) comprehensive characterization of the binding site architecture that may consist of a multimeric protein assembly with cofactors and metal ions and (3) analysis of consensus interaction motifs within the ensembles and identification of conserved determinants of these interactions. Currently the database contains 585 proteins with 650 peptide-binding sites. http://peptisite.ucsd.edu/ link allows searching for the sites of interest and interactive visualization of the ensembles using the ActiveICM web-browser plugin. This structural database for protein-peptide interactions enables understanding of structural principles of these interactions and may assist the development of an efficient peptide docking benchmark.",2014-01-06 +25797358,NIH/NCATS/GRDR® Common Data Elements: A leading force for standardized data collection.,"The main goal of the NIH/NCATS GRDR® program is to serve as a central web-based global data repository to integrate de-identified patient clinical data from rare disease registries, and other data sources, in a standardized manner, to be available to researchers for conducting various biomedical studies, including clinical trials and to support analyses within and across diseases. The aim of the program is to advance research for many rare diseases. One of the first tasks toward achieving this goal was the development of a set of Common Data Elements (CDEs), which are controlled terminologies that represent collected data. A list of 75 CDEs was developed by a national committee and was validated and implemented during a period of 2 year proof of concept. Access to GRDR CDEs is freely available at: https://grdr.ncats.nih.gov/index.php?option=com_content&view=article&id=3&Itemid=5. The GRDR CDEs have been the cornerstone of the GRDR repository, as well as of several other national and international patient registries. The establishment of the GRDR program has elevated the issue of data standardization and interoperability for rare disease patient registries, to international attention, resulting in a global dialog and significant change in the mindset of registry developers, patient advocacy groups, and other national and international organizations.",2015-03-20 +27315278,BLAT2DOLite: An Online System for Identifying Significant Relationships between Genetic Sequences and Diseases.,"The significantly related diseases of sequences could play an important role in understanding the functions of these sequences. In this paper, we introduced BLAT2DOLite, an online system for annotating human genes and diseases and identifying the significant relationships between sequences and diseases. Currently, BLAT2DOLite integrates Entrez Gene database and Disease Ontology Lite (DOLite), which contain loci of gene and relationships between genes and diseases. It utilizes hypergeometric test to calculate P-values between genes and diseases of DOLite. The system can be accessed from: http://123.59.132.21:8080/BLAT2DOLite. The corresponding web service is described in: http://123.59.132.21:8080/BLAT2DOLite/BLAT2DOLiteIDMappingPort?wsdl.",2016-06-17 +26937474,Data on megakaryocytes in the bone marrow of mice exposed to formaldehyde.,"Previously, we reported that occupational exposure to formaldehyde (FA) exposure in factory workers reduced platelet counts, http://dx.doi.org/10.1158/1055-9965.EPI-09-0762[1], while exposure in mice increased platelet counts http://dx.doi.org/10.1371/journal.pone.0074974[2]. Bone marrow megakaryocyte (MK) numbers were also increased in exposed mice, as determined qualitatively. The data presented here are from a quantitative evaluation of MK numbers in the bone marrow histopathological slides from the previous FA exposure experiments in mice. Bone marrow slides were prepared using a single 5 μm section of femur from 2 mice randomly selected from each exposure group (n=9) treated with 0, 0.5 and 3.0 mg/m(3) FA by nose-only inhalation. MKs were systemically counted and average MK frequency was calculated as the total MK per slide divided by the number of fields evaluated. Data are presented visually as microscopy views and graphically as MK frequency.",2016-02-05 +28187714,TipMT: Identification of PCR-based taxon-specific markers.,"

Background

Molecular genetic markers are one of the most informative and widely used genome features in clinical and environmental diagnostic studies. A polymerase chain reaction (PCR)-based molecular marker is very attractive because it is suitable to high throughput automation and confers high specificity. However, the design of taxon-specific primers may be difficult and time consuming due to the need to identify appropriate genomic regions for annealing primers and to evaluate primer specificity.

Results

Here, we report the development of a Tool for Identification of Primers for Multiple Taxa (TipMT), which is a web application to search and design primers for genotyping based on genomic data. The tool identifies and targets single sequence repeats (SSR) or orthologous/taxa-specific genes for genotyping using Multiplex PCR. This pipeline was applied to the genomes of four species of Leishmania (L. amazonensis, L. braziliensis, L. infantum and L. major) and validated by PCR using artificial genomic DNA mixtures of the Leishmania species as templates. This experimental validation demonstrates the reliability of TipMT because amplification profiles showed discrimination of genomic DNA samples from Leishmania species.

Conclusions

The TipMT web tool allows for large-scale identification and design of taxon-specific primers and is freely available to the scientific community at http://200.131.37.155/tipMT/ .",2017-02-11 +26187896,QoRTs: a comprehensive toolset for quality control and data processing of RNA-Seq experiments.,"

Background

High-throughput next-generation RNA sequencing has matured into a viable and powerful method for detecting variations in transcript expression and regulation. Proactive quality control is of critical importance as unanticipated biases, artifacts, or errors can potentially drive false associations and lead to flawed results.

Results

We have developed the Quality of RNA-Seq Toolset, or QoRTs, a comprehensive, multifunction toolset that assists in quality control and data processing of high-throughput RNA sequencing data.

Conclusions

QoRTs generates an unmatched variety of quality control metrics, and can provide cross-comparisons of replicates contrasted by batch, biological sample, or experimental condition, revealing any outliers and/or systematic issues that could drive false associations or otherwise compromise downstream analyses. In addition, QoRTs simultaneously replaces the functionality of numerous other data-processing tools, and can quickly and efficiently generate quality control metrics, coverage counts (for genes, exons, and known/novel splice-junctions), and browser tracks. These functions can all be carried out as part of a single unified data-processing/quality control run, greatly reducing both the complexity and the total runtime of the analysis pipeline. The software, source code, and documentation are available online at http://hartleys.github.io/QoRTs.",2015-07-19 +25730491,Genome sequence-independent identification of RNA editing sites.,"RNA editing generates post-transcriptional sequence changes that can be deduced from RNA-seq data, but detection typically requires matched genomic sequence or multiple related expression data sets. We developed the GIREMI tool (genome-independent identification of RNA editing by mutual information; https://www.ibp.ucla.edu/research/xiao/GIREMI.html) to predict adenosine-to-inosine editing accurately and sensitively from a single RNA-seq data set of modest sequencing depth. Using GIREMI on existing data, we observed tissue-specific and evolutionary patterns in editing sites in the human population.",2015-03-02 +27330550,SV-STAT accurately detects structural variation via alignment to reference-based assemblies.,"

Background

Genomic deletions, inversions, and other rearrangements known collectively as structural variations (SVs) are implicated in many human disorders. Technologies for sequencing DNA provide a potentially rich source of information in which to detect breakpoints of structural variations at base-pair resolution. However, accurate prediction of SVs remains challenging, and existing informatics tools predict rearrangements with significant rates of false positives or negatives.

Results

To address this challenge, we developed 'Structural Variation detection by STAck and Tail' (SV-STAT) which implements a novel scoring metric. The software uses this statistic to quantify evidence for structural variation in genomic regions suspected of harboring rearrangements. To demonstrate SV-STAT, we used targeted and genome-wide approaches. First, we applied a custom capture array followed by Roche/454 and SV-STAT to three pediatric B-lineage acute lymphoblastic leukemias, identifying five structural variations joining known and novel breakpoint regions. Next, we detected SVs genome-wide in paired-end Illumina data collected from additional tumor samples. SV-STAT showed predictive accuracy as high as or higher than leading alternatives. The software is freely available under the terms of the GNU General Public License version 3 at https://gitorious.org/svstat/svstat.

Conclusions

SV-STAT works across multiple sequencing chemistries, paired and single-end technologies, targeted or whole-genome strategies, and it complements existing SV-detection software. The method is a significant advance towards accurate detection and genotyping of genomic rearrangements from DNA sequencing data.",2016-06-18 +23446294,Effectively addressing complex proteomic search spaces with peptide spectrum matching.,"

Summary

Protein identification by mass spectrometry is commonly accomplished using a peptide sequence matching search algorithm, whose sensitivity varies inversely with the size of the sequence database and the number of post-translational modifications considered. We present the Spectrum Identification Machine, a peptide sequence matching tool that capitalizes on the high-intensity b1-fragment ion of tandem mass spectra of peptides coupled in solution with phenylisotiocyanate to confidently sequence the first amino acid and ultimately reduce the search space. We demonstrate that in complex search spaces, a gain of some 120% in sensitivity can be achieved.

Availability

All data generated and the software are freely available for academic use at http://proteomics.fiocruz.br/software/sim.

Contact

paulo@pcarvalho.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-27 +25697821,Extending P450 site-of-metabolism models with region-resolution data.,"

Motivation

Cytochrome P450s are a family of enzymes responsible for the metabolism of approximately 90% of FDA-approved drugs. Medicinal chemists often want to know which atoms of a molecule-its metabolized sites-are oxidized by Cytochrome P450s in order to modify their metabolism. Consequently, there are several methods that use literature-derived, atom-resolution data to train models that can predict a molecule's sites of metabolism. There is, however, much more data available at a lower resolution, where the exact site of metabolism is not known, but the region of the molecule that is oxidized is known. Until now, no site-of-metabolism models made use of region-resolution data.

Results

Here, we describe XenoSite-Region, the first reported method for training site-of-metabolism models with region-resolution data. Our approach uses the Expectation Maximization algorithm to train a site-of-metabolism model. Region-resolution metabolism data was simulated from a large site-of-metabolism dataset, containing 2000 molecules with 3400 metabolized and 30 000 un-metabolized sites and covering nine Cytochrome P450 isozymes. When training on the same molecules (but with only region-level information), we find that this approach yields models almost as accurate as models trained with atom-resolution data. Moreover, we find that atom-resolution trained models are more accurate when also trained with region-resolution data from additional molecules. Our approach, therefore, opens up a way to extend the applicable domain of site-of-metabolism models into larger regions of chemical space. This meets a critical need in drug development by tapping into underutilized data commonly available in most large drug companies.

Availability and implementation

The algorithm, data and a web server are available at http://swami.wustl.edu/xregion.",2015-02-19 +26236573,"SOCR data dashboard: an integrated big data archive mashing medicare, labor, census and econometric information. ","Intuitive formulation of informative and computationally-efficient queries on big and complex datasets present a number of challenges. As data collection is increasingly streamlined and ubiquitous, data exploration, discovery and analytics get considerably harder. Exploratory querying of heterogeneous and multi-source information is both difficult and necessary to advance our knowledge about the world around us. We developed a mechanism to integrate dispersed multi-source data and service the mashed information via human and machine interfaces in a secure, scalable manner. This process facilitates the exploration of subtle associations between variables, population strata, or clusters of data elements, which may be opaque to standard independent inspection of the individual sources. This a new platform includes a device agnostic tool (Dashboard webapp, http://socr.umich.edu/HTML5/Dashboard/) for graphical querying, navigating and exploring the multivariate associations in complex heterogeneous datasets. The paper illustrates this core functionality and serviceoriented infrastructure using healthcare data (e.g., US data from the 2010 Census, Demographic and Economic surveys, Bureau of Labor Statistics, and Center for Medicare Services) as well as Parkinson's Disease neuroimaging data. Both the back-end data archive and the front-end dashboard interfaces are continuously expanded to include additional data elements and new ways to customize the human and machine interactions. A client-side data import utility allows for easy and intuitive integration of user-supplied datasets. This completely open-science framework may be used for exploratory analytics, confirmatory analyses, meta-analyses, and education and training purposes in a wide variety of fields.",2015-01-01 +26556386,contamDE: differential expression analysis of RNA-seq data for contaminated tumor samples.,"

Motivation

Accurate detection of differentially expressed genes between tumor and normal samples is a primary approach of cancer-related biomarker identification. Due to the infiltration of tumor surrounding normal cells, the expression data derived from tumor samples would always be contaminated with normal cells. Ignoring such cellular contamination would deflate the power of detecting DE genes and further confound the biological interpretation of the analysis results. For the time being, there does not exists any differential expression analysis approach for RNA-seq data in literature that can properly account for the contamination of tumor samples.

Results

Without appealing to any extra information, we develop a new method 'contamDE' based on a novel statistical model that associates RNA-seq expression levels with cell types. It is demonstrated through simulation studies that contamDE could be much more powerful than the existing methods that ignore the contamination. In the application to two cancer studies, contamDE uniquely found several potential therapy and prognostic biomarkers of prostate cancer and non-small cell lung cancer.

Availability and implementation

An R package contamDE is freely available at http://homepage.fudan.edu.cn/zhangh/softwares/

Contact

zhanghfd@fudan.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-09 +28599263,"Environmental Exposures to Lead, Mercury, and Cadmium and Hearing Loss in Adults and Adolescents: KNHANES 2010-2012.","

Background

The prevalence of hearing loss increases rapidly with aging. Hearing loss is common in all age groups, even in young adults and adolescents. A growing body of evidence has suggested that heavy metals have ototoxic effects, yet few epidemiological studies have investigated the association between heavy metals and hearing loss in a general population that includes adults and adolescents.

Objectives

We examined the association between environmental exposures to lead, mercury, and cadmium and the risk of hearing loss in adults and adolescents while controlling for potential confounding factors, including noise exposures and clinical factors.

Methods

We analyzed cross-sectional data from 5,187 adults and 853 adolescents in the Korean National Health and Nutrition Examination Survey 2010-2012. Pure-tone average (PTA) of hearing thresholds at high frequency (3, 4, and 6 kHz) were computed, and hearing loss was defined as a PTA>25 dB in adults and PTA>15 dB in adolescents.

Results

In adults, the highest (vs. lowest) quartiles of blood lead and cadmium were associated with 1.70 (95% CI: 1.25, 2.31) and 1.47 (95% CI: 1.05, 2.05) odds ratios for high-frequency hearing loss (p-trend<0.001 and=0.007), respectively. In adolescents, the highest quartile (vs. lowest) of blood cadmium had an odds ratio of 3.03 (95% CI: 1.44, 6.40) for high-frequency hearing loss (p-trend=0.003), but blood lead was not associated with hearing loss. No significant association between blood mercury and hearing loss was suggested in either adults or adolescents.

Conclusions

The results of the present study suggest that exposure to environmental lead and cadmium in adults and exposure to environmental cadmium in adolescents may play a role in the risk of hearing loss. https://doi.org/10.1289/EHP565.",2017-06-08 +28187713,Novel methods to optimize gene and statistic test for evaluation - an application for Escherichia coli.,"

Background

Since the recombinant protein was discovered, it has become more popular in many aspects of life science. The value of global pharmaceutical market was $87 billion in 2008 and the sales for industrial enzyme exceeded $4 billion in 2012. This is strong evidence showing the great potential of recombinant protein. However, native genes introduced into a host can cause incompatibility of codon usage bias, GC content, repeat region, Shine-Dalgarno sequence with host's expression system, so the yields can fall down significantly. Hence, we propose novel methods for gene optimization based on neural network, Bayesian theory, and Euclidian distance.

Result

The correlation coefficients of our neural network are 0.86, 0.73, and 0.90 in training, validation, and testing process. In addition, genes optimized by our methods seem to associate with highly expressed genes and give reasonable codon adaptation index values. Furthermore, genes optimized by the proposed methods are highly matched with the previous experimental data.

Conclusion

The proposed methods have high potential for gene optimization and further researches in gene expression. We built a demonstrative program using Matlab R2014a under Mac OS X. The program was published in both standalone executable program and Matlab function files. The developed program can be accessed from http://www.math.hcmus.edu.vn/~ptbao/paper_soft/GeneOptProg/ .",2017-02-10 +,"Links between the Environment, Abundance and Diversity of Andean Moths","Ideas on the spatial variation of biodiversity often imply a causal link between the abundance and species richness of organisms. We investigated this ‘more individuals hypothesis' using light-trapping data of three unrelated groups of moths (Arctiidae, Geometridae and Pyraloidea) from the Ecuadorian Andes. We analyzed environmental correlates of specimen densities found in different habitats, finding effects of temperature, moonlight, forest succession, elevation and season. We corrected abundance data for light-trapping artefacts, and we measured species diversity with various metrics known to be unbiased by undersampling. We found significant positive correlations between abundance and species diversity for all three taxonomic groups. We discuss implications for a general evaluation of species-energy theory as well as for a better understanding of ecological processes in montane habitats of the Andes. Abstract in Spanish is available at http://www.blackwell-synergy.com/loi/btp.",2011-03-01 +24698187,[Centre de référence sur les agents tératogènes (CRAT): a pioneer center].,"Le Centre de référence sur les agents tératogènes (CRAT), founded in 1975, is the first national and international public organization especially involved in the problem of drugs during pregnancy, and during this period of time has been responsible for many initiatives in this field: health care providers information and counsel service, innovating risk assessment methodology, new method for clinical data collection leading to a database including more than 50 000 exposed pregnancies, innovative free access internet website (http://www.lecrat.org), multidisciplinary expert group in French medicines agencies (Afssaps/ANSM) and foundation of a European network ""European network teratology information service"" (ENTIS). All these innovations represent consequent advances and contribute to a better management of exposed pregnant women and their newborns, as well as the survey and signal detection during pregnancy. The CRAT is also involved in the field of drugs on fertility and on paternal exposures.",2014-01-01 +27301453,ProInflam: a webserver for the prediction of proinflammatory antigenicity of peptides and proteins.,"

Background

Proinflammatory immune response involves a complex series of molecular events leading to inflammatory reaction at a site, which enables host to combat plurality of infectious agents. It can be initiated by specific stimuli such as viral, bacterial, parasitic or allergenic antigens, or by non-specific stimuli such as LPS. On counter with such antigens, the complex interaction of antigen presenting cells, T cells and inflammatory mediators like IL1α, IL1β, TNFα, IL12, IL18 and IL23 lead to proinflammatory immune response and further clearance of infection. In this study, we have tried to establish a relation between amino acid sequence of antigen and induction of proinflammatory response.

Results

A total of 729 experimentally-validated proinflammatory and 171 non-proinflammatory epitopes were obtained from IEDB database. The A, F, I, L and V amino acids and AF, FA, FF, PF, IV, IN dipeptides were observed as preferred residues in proinflammatory epitopes. Using the compositional and motif-based features of proinflammatory and non-proinflammatory epitopes, we have developed machine learning-based models for prediction of proinflammatory response of peptides. The hybrid of motifs and dipeptide-based features displayed best performance with MCC = 0.58 and an accuracy of 87.6 %.

Conclusion

The amino acid sequence-based features of peptides were used to develop a machine learning-based prediction tool for the prediction of proinflammatory epitopes. This is a unique tool for the computational identification of proinflammatory peptide antigen/candidates and provides leads for experimental validations. The prediction model and tools for epitope mapping and similarity search are provided as a comprehensive web server which is freely available at http://metagenomics.iiserb.ac.in/proinflam/ and http://metabiosys.iiserb.ac.in/proinflam/ .",2016-06-14 +25271282,A comprehensive functional map of the hepatitis C virus genome provides a resource for probing viral proteins.,"

Unlabelled

Pairing high-throughput sequencing technologies with high-throughput mutagenesis enables genome-wide investigations of pathogenic organisms. Knowledge of the specific functions of protein domains encoded by the genome of the hepatitis C virus (HCV), a major human pathogen that contributes to liver disease worldwide, remains limited to insight from small-scale studies. To enhance the capabilities of HCV researchers, we have obtained a high-resolution functional map of the entire viral genome by combining transposon-based insertional mutagenesis with next-generation sequencing. We generated a library of 8,398 mutagenized HCV clones, each containing one 15-nucleotide sequence inserted at a unique genomic position. We passaged this library in hepatic cells, recovered virus pools, and simultaneously assayed the abundance of mutant viruses in each pool by next-generation sequencing. To illustrate the validity of the functional profile, we compared the genetic footprints of viral proteins with previously solved protein structures. Moreover, we show the utility of these genetic footprints in the identification of candidate regions for epitope tag insertion. In a second application, we screened the genetic footprints for phenotypes that reflected defects in later steps of the viral life cycle. We confirmed that viruses with insertions in a region of the nonstructural protein NS4B had a defect in infectivity while maintaining genome replication. Overall, our genome-wide HCV mutant library and the genetic footprints obtained by high-resolution profiling represent valuable new resources for the research community that can direct the attention of investigators toward unidentified roles of individual protein domains.

Importance

Our insertional mutagenesis library provides a resource that illustrates the effects of relatively small insertions on local protein structure and HCV viability. We have also generated complementary resources, including a website (http://hangfei.bol.ucla.edu) and a panel of epitope-tagged mutant viruses that should enhance the research capabilities of investigators studying HCV. Researchers can now detect epitope-tagged viral proteins by established antibodies, which will allow biochemical studies of HCV proteins for which antibodies are not readily available. Furthermore, researchers can now quickly look up genotype-phenotype relationships and base further mechanistic studies on the residue-by-residue information from the functional profile. More broadly, this approach offers a general strategy for the systematic functional characterization of viruses on the genome scale.",2014-09-30 +27378875,"Mindfulness-Based Stress Reduction, Fear Conditioning, and The Uncinate Fasciculus: A Pilot Study.","Mindfulness has been suggested to impact emotional learning, but research on these processes is scarce. The classical fear conditioning/extinction/extinction retention paradigm is a well-known method for assessing emotional learning. The present study tested the impact of mindfulness training on fear conditioning and extinction memory and further investigated whether changes in white matter fiber tracts might support such changes. The uncinate fasciculus (UNC) was of particular interest in the context of emotional learning. In this pilot study, 46 healthy participants were quasi-randomized to a Mindfulness-Based Stress Reduction (MBSR, N = 23) or waitlist control (N = 23) group and underwent a two-day fear conditioning, extinction learning, and extinction memory protocol before and after the course or control period. Skin conductance response (SCR) data served to measure the physiological response during conditioning and extinction memory phases. Diffusion tensor imaging (DTI) data were analyzed with probabilistic tractography and analyzed for changes of fractional anisotropy in the UNC. During conditioning, participants were able to maintain a differential response to conditioned vs. not conditioned stimuli following the MBSR course (i.e., higher sensitivity to the conditioned stimuli), while controls dropped the response. Extinction memory results were not interpretable due to baseline differences. MBSR participants showed a significant increase in fractional anisotropy in the UNC, while controls did not (group by time interaction missed significance). Pre-post changes in UNC were correlated with changes in the response to the conditioned stimuli. The findings suggest effects of mindfulness practice on the maintenance of sensitivity of emotional responses and suggest underlying neural plasticity. (ClinicalTrials.gov, Identifier NCT01320969, https://clinicaltrials.gov/ct2/show/NCT01320969).",2016-06-15 +23175605,The EBI enzyme portal.,"The availability of comprehensive information about enzymes plays an important role in answering questions relevant to interdisciplinary fields such as biochemistry, enzymology, biofuels, bioengineering and drug discovery. At the EMBL European Bioinformatics Institute, we have developed an enzyme portal (http://www.ebi.ac.uk/enzymeportal) to provide this wealth of information on enzymes from multiple in-house resources addressing particular data classes: protein sequence and structure, reactions, pathways and small molecules. The fact that these data reside in separate databases makes information discovery cumbersome. The main goal of the portal is to simplify this process for end users.",2012-11-21 +25948659,Data Resource Profile: The European Union Statistics on Income and Living Conditions (EU-SILC).,"Social and economic policies are inextricably linked with population health outcomes in Europe, yet few datasets are able to fully explore and compare this relationship across European countries. The European Union Statistics on Income and Living Conditions (EU-SILC) survey aims to address this gap using microdata on income, living conditions and health. EU-SILC contains both cross-sectional and longitudinal elements, with nationally representative samples of individuals 16 years and older in 28 European Union member states as well as Iceland, Norway and Switzerland. Data collection began in 2003 in Belgium, Denmark, Ireland, Greece, Luxembourg and Austria, with subsequent expansion across Europe. By 2011, all 28 EU member states, plus three others, were included in the dataset. Although EU-SILC is administered by Eurostat, the data are output-harmonized so that countries are required to collect specified data items but are free to determine sampling strategies for data collection purposes. EU-SILC covers approximately 500,000 European residents for its cross-sectional survey annually. Whereas aggregated data from EU-SILC are publicly available [http://ec.europa.eu/eurostat/web/income-and-living-conditions/data/main-tables], microdata are only available to research organizations subject to approval by Eurostat. Please refer to [http://epp.eurostat.ec.europa.eu/portal/page/portal/microdata/eu_silc] for further information regarding microdata access.",2015-04-01 +24244913,"BioNames: linking taxonomy, texts, and trees.","BioNames is a web database of taxonomic names for animals, linked to the primary literature and, wherever possible, to phylogenetic trees. It aims to provide a taxonomic ""dashboard"" where at a glance we can see a summary of the taxonomic and phylogenetic information we have for a given taxon and hence provide a quick answer to the basic question ""what is this taxon?"" BioNames combines classifications from the Global Biodiversity Information Facility (GBIF) and GenBank, images from the Encyclopedia of Life (EOL), animal names from the Index of Organism Names (ION), and bibliographic data from multiple sources including the Biodiversity Heritage Library (BHL) and CrossRef. The user interface includes display of full text articles, interactive timelines of taxonomic publications, and zoomable phylogenies. It is available at http://bionames.org.",2013-10-29 +27354160,BioNSi: A Discrete Biological Network Simulator Tool.,"Modeling and simulation of biological networks is an effective and widely used research methodology. The Biological Network Simulator (BioNSi) is a tool for modeling biological networks and simulating their discrete-time dynamics, implemented as a Cytoscape App. BioNSi includes a visual representation of the network that enables researchers to construct, set the parameters, and observe network behavior under various conditions. To construct a network instance in BioNSi, only partial, qualitative biological data suffices. The tool is aimed for use by experimental biologists and requires no prior computational or mathematical expertise. BioNSi is freely available at http://bionsi.wix.com/bionsi , where a complete user guide and a step-by-step manual can also be found.",2016-07-13 +25712692,specL--an R/Bioconductor package to prepare peptide spectrum matches for use in targeted proteomics.,"Targeted data extraction methods are attractive ways to obtain quantitative peptide information from a proteomics experiment. Sequential Window Acquisition of all Theoretical Spectra (SWATH) and Data Independent Acquisition (DIA) methods increase reproducibility of acquired data because the classical precursor selection is omitted and all present precursors are fragmented. However, especially for targeted data extraction, MS coordinates (retention time information precursor and fragment masses) are required for the particular entities (peptide ions). These coordinates are usually generated in a so-called discovery experiment earlier on in the project if not available in public spectral library repositories. The quality of the assay panel is crucial to ensure appropriate downstream analysis. For that, a method is needed to create spectral libraries and to export customizable assay panels.Here, we present a versatile set of functions to generate assay panels from spectral libraries for use in targeted data extraction methods (SWATH/DIA) in the area of proteomics.specL is implemented in the R language and available under an open-source license (GPL-3) in Bioconductor since BioC 3.0 (R-3.1) http://www.bioconductor.org (Trachsel et al., 2015). A vignette with a complete tutorial describing data import/export and analysis is included in the package and can also be found as supplement material of this article.cp@fgcz.ethz.ch or jg@fgcz.ethz.chSupplementary data are available at Bioinformatics online.",2015-02-23 +27787832,Prediction of Protein Phosphorylation Sites by Integrating Secondary Structure Information and Other One-Dimensional Structural Properties.,"Studies on phosphorylation are important but challenging for both wet-bench experiments and computational studies, and accurate non-kinase-specific prediction tools are highly desirable for whole-genome annotation in a wide variety of species. Here, we describe a phosphorylation site prediction webserver, PhosphoSVM, that employs Support Vector Machine to combine protein secondary structure information and seven other one-dimensional structural properties, including Shannon entropy, relative entropy, predicted protein disorder information, predicted solvent accessible area, amino acid overlapping properties, averaged cumulative hydrophobicity, and subsequence k-nearest neighbor profiles. This method achieved AUC values of 0.8405/0.8183/0.7383 for serine (S), threonine (T), and tyrosine (Y) phosphorylation sites, respectively, in animals with a tenfold cross-validation. The model trained by the animal phosphorylation sites was also applied to a plant phosphorylation site dataset as an independent test. The AUC values for the independent test data set were 0.7761/0.6652/0.5958 for S/T/Y phosphorylation sites, respectively. This algorithm with the optimally trained model was implemented as a webserver. The webserver, trained model, and all datasets used in the current study are available at http://sysbio.unl.edu/PhosphoSVM .",2017-01-01 +26126977,An efficiency analysis of high-order combinations of gene-gene interactions using multifactor-dimensionality reduction.,"

Background

Multifactor dimensionality reduction (MDR) is widely used to analyze interactions of genes to determine the complex relationship between diseases and polymorphisms in humans. However, the astronomical number of high-order combinations makes MDR a highly time-consuming process which can be difficult to implement for multiple tests to identify more complex interactions between genes. This study proposes a new framework, named fast MDR (FMDR), which is a greedy search strategy based on the joint effect property.

Results

Six models with different minor allele frequencies (MAFs) and different sample sizes were used to generate the six simulation data sets. A real data set was obtained from the mitochondrial D-loop of chronic dialysis patients. Comparison of results from the simulation data and real data sets showed that FMDR identified significant gene-gene interaction with less computational complexity than the MDR in high-order interaction analysis.

Conclusion

FMDR improves the MDR difficulties associated with the computational loading of high-order SNPs and can be used to evaluate the relative effects of each individual SNP on disease susceptibility. FMDR is freely available at http://bioinfo.kmu.edu.tw/FMDR.rar .",2015-07-01 +23812978,CAMPways: constrained alignment framework for the comparative analysis of a pair of metabolic pathways.,"

Motivation

Given a pair of metabolic pathways, an alignment of the pathways corresponds to a mapping between similar substructures of the pair. Successful alignments may provide useful applications in phylogenetic tree reconstruction, drug design and overall may enhance our understanding of cellular metabolism.

Results

We consider the problem of providing one-to-many alignments of reactions in a pair of metabolic pathways. We first provide a constrained alignment framework applicable to the problem. We show that the constrained alignment problem even in a primitive setting is computationally intractable, which justifies efforts for designing efficient heuristics. We present our Constrained Alignment of Metabolic Pathways (CAMPways) algorithm designed for this purpose. Through extensive experiments involving a large pathway database, we demonstrate that when compared with a state-of-the-art alternative, the CAMPways algorithm provides better alignment results on metabolic networks as far as measures based on same-pathway inclusion and biochemical significance are concerned. The execution speed of our algorithm constitutes yet another important improvement over alternative algorithms.

Availability

Open source codes, executable binary, useful scripts, all the experimental data and the results are freely available as part of the Supplementary Material at http://code.google.com/p/campways/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +23255149,"Exploring genetic, genomic, and phenotypic data at the rat genome database.","The laboratory rat, Rattus norvegicus, is an important model of human health and disease, and experimental findings in the rat have relevance to human physiology and disease. The Rat Genome Database (RGD, http://rgd.mcw.edu) is a model organism database that provides access to a wide variety of curated rat data including disease associations, phenotypes, pathways, molecular functions, biological processes, and cellular components for genes, quantitative trait loci, and strains. We present an overview of the database followed by specific examples that can be used to gain experience in employing RGD to explore the wealth of functional data available for the rat.",2012-12-01 +25940630,Galahad: a web server for drug effect analysis from gene expression.,"Galahad (https://galahad.esat.kuleuven.be) is a web-based application for analysis of drug effects. It provides an intuitive interface to be used by anybody interested in leveraging microarray data to gain insights into the pharmacological effects of a drug, mainly identification of candidate targets, elucidation of mode of action and understanding of off-target effects. The core of Galahad is a network-based analysis method of gene expression. As an input, Galahad takes raw Affymetrix human microarray data from treatment versus control experiments and provides quality control and data exploration tools, as well as computation of differential expression. Alternatively, differential expression values can be uploaded directly. Using these differential expression values, drug target prioritization and both pathway and disease enrichment can be calculated and visualized. Drug target prioritization is based on the integration of the gene expression data with a functional protein association network. The web site is free and open to all and there is no login requirement.",2015-05-04 +26556387,"BioNetFit: a fitting tool compatible with BioNetGen, NFsim and distributed computing environments.","

Unlabelled

Rule-based models are analyzed with specialized simulators, such as those provided by the BioNetGen and NFsim open-source software packages. Here, we present BioNetFit, a general-purpose fitting tool that is compatible with BioNetGen and NFsim. BioNetFit is designed to take advantage of distributed computing resources. This feature facilitates fitting (i.e. optimization of parameter values for consistency with data) when simulations are computationally expensive.

Availability and implementation

BioNetFit can be used on stand-alone Mac, Windows/Cygwin, and Linux platforms and on Linux-based clusters running SLURM, Torque/PBS, or SGE. The BioNetFit source code (Perl) is freely available (http://bionetfit.nau.edu).

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

bionetgen.help@gmail.com.",2015-11-09 +24101916,FunGene: the functional gene pipeline and repository.,"Ribosomal RNA genes have become the standard molecular markers for microbial community analysis for good reasons, including universal occurrence in cellular organisms, availability of large databases, and ease of rRNA gene region amplification and analysis. As markers, however, rRNA genes have some significant limitations. The rRNA genes are often present in multiple copies, unlike most protein-coding genes. The slow rate of change in rRNA genes means that multiple species sometimes share identical 16S rRNA gene sequences, while many more species share identical sequences in the short 16S rRNA regions commonly analyzed. In addition, the genes involved in many important processes are not distributed in a phylogenetically coherent manner, potentially due to gene loss or horizontal gene transfer. While rRNA genes remain the most commonly used markers, key genes in ecologically important pathways, e.g., those involved in carbon and nitrogen cycling, can provide important insights into community composition and function not obtainable through rRNA analysis. However, working with ecofunctional gene data requires some tools beyond those required for rRNA analysis. To address this, our Functional Gene Pipeline and Repository (FunGene; http://fungene.cme.msu.edu/) offers databases of many common ecofunctional genes and proteins, as well as integrated tools that allow researchers to browse these collections and choose subsets for further analysis, build phylogenetic trees, test primers and probes for coverage, and download aligned sequences. Additional FunGene tools are specialized to process coding gene amplicon data. For example, FrameBot produces frameshift-corrected protein and DNA sequences from raw reads while finding the most closely related protein reference sequence. These tools can help provide better insight into microbial communities by directly studying key genes involved in important ecological processes.",2013-10-01 +24243689,Protein interactome analysis of 12 mitogen-activated protein kinase kinase kinase in rice using a yeast two-hybrid system.,"The mitogen-activated protein kinase (MAPK) cascade is composed at least of MAP3K (for MAPK kinase kinase), MAP2K, and MAPK family modules. These components together play a central role in mediating extracellular signals to the cell and vice versa by interacting with their partner proteins. However, the MAP3K-interacting proteins remain poorly investigated in plants. Here, we utilized a yeast two-hybrid system and bimolecular fluorescence complementation in the model crop rice (Oryza sativa) to map MAP3K-interacting proteins. We identified 12 novel nonredundant interacting protein pairs (IPPs) representing 11 nonredundant interactors using 12 rice MAP3Ks (available as full-length cDNA in the rice KOME (http://cdna01.dna.affrc.go.jp/cDNA/) at the time of experimental design and execution) as bait and a rice seedling cDNA library as prey. Of the 12 MAP3Ks, only six had interacting protein partners. The established MAP3K interactome consisted of two kinases, three proteases, two forkhead-associated domain-containing proteins, two expressed proteins, one E3 ligase, one regulatory protein, and one retrotransposon protein. Notably, no MAP3K showed physical interaction with either MAP2K or MAPK. Seven IPPs (58.3%) were confirmed in vivo by bimolecular fluorescence complementation. Subcellular localization of 14 interactors, together involved in nine IPPs (75%) further provide prerequisite for biological significance of the IPPs. Furthermore, GO of identified interactors predicted their involvement in diverse physiological responses, which were supported by a literature survey. These findings increase our knowledge of the MAP3K-interacting proteins, help in proposing a model of MAPK modules, provide a valuable resource for developing a complete map of the rice MAPK interactome, and allow discussion for translating the interactome knowledge to rice crop improvement against environmental factors.",2014-01-01 +26587579,Arsenic and Environmental Health: State of the Science and Future Research Opportunities.,"

Background

Exposure to inorganic and organic arsenic compounds is a major public health problem that affects hundreds of millions of people worldwide. Exposure to arsenic is associated with cancer and noncancer effects in nearly every organ in the body, and evidence is mounting for health effects at lower levels of arsenic exposure than previously thought. Building from a tremendous knowledge base with > 1,000 scientific papers published annually with ""arsenic"" in the title, the question becomes, what questions would best drive future research directions?

Objectives

The objective is to discuss emerging issues in arsenic research and identify data gaps across disciplines.

Methods

The National Institutes of Health's National Institute of Environmental Health Sciences Superfund Research Program convened a workshop to identify emerging issues and research needs to address the multi-faceted challenges related to arsenic and environmental health. This review summarizes information captured during the workshop.

Discussion

More information about aggregate exposure to arsenic is needed, including the amount and forms of arsenic found in foods. New strategies for mitigating arsenic exposures and related health effects range from engineered filtering systems to phytogenetics and nutritional interventions. Furthermore, integration of omics data with mechanistic and epidemiological data is a key step toward the goal of linking biomarkers of exposure and susceptibility to disease mechanisms and outcomes.

Conclusions

Promising research strategies and technologies for arsenic exposure and adverse health effect mitigation are being pursued, and future research is moving toward deeper collaborations and integration of information across disciplines to address data gaps.

Citation

Carlin DJ, Naujokas MF, Bradham KD, Cowden J, Heacock M, Henry HF, Lee JS, Thomas DJ, Thompson C, Tokar EJ, Waalkes MP, Birnbaum LS, Suk WA. 2016. Arsenic and environmental health: state of the science and future research opportunities. Environ Health Perspect 124:890-899; http://dx.doi.org/10.1289/ehp.1510209.",2015-11-20 +27494258,NIAS-Server: Neighbors Influence of Amino acids and Secondary Structures in Proteins.,"The exponential growth in the number of experimentally determined three-dimensional protein structures provide a new and relevant knowledge about the conformation of amino acids in proteins. Only a few of probability densities of amino acids are publicly available for use in structure validation and prediction methods. NIAS (Neighbors Influence of Amino acids and Secondary structures) is a web-based tool used to extract information about conformational preferences of amino acid residues and secondary structures in experimental-determined protein templates. This information is useful, for example, to characterize folds and local motifs in proteins, molecular folding, and can help the solution of complex problems such as protein structure prediction, protein design, among others. The NIAS-Server and supplementary data are available at http://sbcb.inf.ufrgs.br/nias .",2016-08-05 +23593466,"Exploiting the transcriptome of Euphrates Poplar, Populus euphratica (Salicaceae) to develop and characterize new EST-SSR markers and construct an EST-SSR database.","

Background

Microsatellite markers or Simple Sequence Repeats (SSRs) are the most popular markers in population/conservation genetics. However, the development of novel microsatellite markers has been impeded by high costs, a lack of available sequence data and technical difficulties. New species-specific microsatellite markers were required to investigate the evolutionary history of the Euphratica tree, Populus euphratica, the only tree species found in the desert regions of Western China and adjacent Central Asian countries.

Methodology/principal findings

A total of 94,090 non-redundant Expressed Sequence Tags (ESTs) from P. euphratica comprising around 63 Mb of sequence data were searched for SSRs. 4,202 SSRs were found in 3,839 ESTs, with 311 ESTs containing multiple SSRs. The most common motif types were trinucleotides (37%) and hexanucleotides (33%) repeats. We developed primer pairs for all of the identified EST-SSRs (eSSRs) and selected 673 of these pairs at random for further validation. 575 pairs (85%) gave successful amplification, of which, 464 (80.7%) were polymorphic in six to 24 individuals from natural populations across Northern China. We also tested the transferability of the polymorphic eSSRs to nine other Populus species. In addition, to facilitate the use of these new eSSR markers by other researchers, we mapped them onto Populus trichocarpa scaffolds in silico and compiled our data into a web-based database (http://202.205.131.253:8080/poplar/resources/static_page/index.html).

Conclusions

The large set of validated eSSRs identified in this work will have many potential applications in studies on P. euphratica and other poplar species, in fields such as population genetics, comparative genomics, linkage mapping, QTL, and marker-assisted breeding. Their use will be facilitated by their incorporation into a user-friendly web-based database.",2013-04-11 +28469375,Numericware i: Identical by State Matrix Calculator.,"We introduce software, Numericware i, to compute identical by state (IBS) matrix based on genotypic data. Calculating an IBS matrix with a large dataset requires large computer memory and takes lengthy processing time. Numericware i addresses these challenges with 2 algorithmic methods: multithreading and forward chopping. The multithreading allows computational routines to concurrently run on multiple central processing unit (CPU) processors. The forward chopping addresses memory limitation by dividing a dataset into appropriately sized subsets. Numericware i allows calculation of the IBS matrix for a large genotypic dataset using a laptop or a desktop computer. For comparison with different software, we calculated genetic relationship matrices using Numericware i, SPAGeDi, and TASSEL with the same genotypic dataset. Numericware i calculates IBS coefficients between 0 and 2, whereas SPAGeDi and TASSEL produce different ranges of values including negative values. The Pearson correlation coefficient between the matrices from Numericware i and TASSEL was high at .9972, whereas SPAGeDi showed low correlation with Numericware i (.0505) and TASSEL (.0587). With a high-dimensional dataset of 500 entities by 10 000 000 SNPs, Numericware i spent 382 minutes using 19 CPU threads and 64 GB memory by dividing the dataset into 3 pieces, whereas SPAGeDi and TASSEL failed with the same dataset. Numericware i is freely available for Windows and Linux under CC-BY 4.0 license at https://figshare.com/s/f100f33a8857131eb2db.",2017-03-10 +26092859,NCC-AUC: an AUC optimization method to identify multi-biomarker panel for cancer prognosis from genomic and clinical data.,"

Motivation

In prognosis and survival studies, an important goal is to identify multi-biomarker panels with predictive power using molecular characteristics or clinical observations. Such analysis is often challenged by censored, small-sample-size, but high-dimensional genomic profiles or clinical data. Therefore, sophisticated models and algorithms are in pressing need.

Results

In this study, we propose a novel Area Under Curve (AUC) optimization method for multi-biomarker panel identification named Nearest Centroid Classifier for AUC optimization (NCC-AUC). Our method is motived by the connection between AUC score for classification accuracy evaluation and Harrell's concordance index in survival analysis. This connection allows us to convert the survival time regression problem to a binary classification problem. Then an optimization model is formulated to directly maximize AUC and meanwhile minimize the number of selected features to construct a predictor in the nearest centroid classifier framework. NCC-AUC shows its great performance by validating both in genomic data of breast cancer and clinical data of stage IB Non-Small-Cell Lung Cancer (NSCLC). For the genomic data, NCC-AUC outperforms Support Vector Machine (SVM) and Support Vector Machine-based Recursive Feature Elimination (SVM-RFE) in classification accuracy. It tends to select a multi-biomarker panel with low average redundancy and enriched biological meanings. Also NCC-AUC is more significant in separation of low and high risk cohorts than widely used Cox model (Cox proportional-hazards regression model) and L1-Cox model (L1 penalized in Cox model). These performance gains of NCC-AUC are quite robust across 5 subtypes of breast cancer. Further in an independent clinical data, NCC-AUC outperforms SVM and SVM-RFE in predictive accuracy and is consistently better than Cox model and L1-Cox model in grouping patients into high and low risk categories.

Conclusion

In summary, NCC-AUC provides a rigorous optimization framework to systematically reveal multi-biomarker panel from genomic and clinical data. It can serve as a useful tool to identify prognostic biomarkers for survival analysis.

Availability and implementation

NCC-AUC is available at http://doc.aporc.org/wiki/NCC-AUC.

Contact

ywang@amss.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-18 +26965075,The impact of varicella vaccination on varicella-related hospitalization rates: global data review.,"

Objective

to describe the impact of varicella vaccination on varicella-related hospitalization rates in countries that implemented universal vaccination against the disease.

Data source

we identified countries that implemented universal vaccination against varicella at the http://apps.who.int/immunization_monitoring/globalsummary/schedules site of the World Health Organization and selected articles in Pubmed describing the changes (pre/post-vaccination) in the varicella-related hospitalization rates in these countries, using the Keywords ""varicella"", ""vaccination/vaccine"" and ""children"" (or) ""hospitalization"". Publications in English published between January 1995 and May 2015 were included.

Data synthesis

24 countries with universal vaccination against varicella and 28 articles describing the impact of the vaccine on varicella-associated hospitalizations rates in seven countries were identified. The US had 81.4% -99.2% reduction in hospitalization rates in children younger than four years after 6-14 years after the onset of universal vaccination (1995), with vaccination coverage of 90%; Uruguay: 94% decrease (children aged 1-4 years) in six years, vaccination coverage of 90%; Canada: 93% decrease (age 1-4 years) in 10 years, coverage of 93%; Germany: 62.4% decrease (age 1-4 years) in 8 years, coverage of 78.2%; Australia: 76.8% decrease (age 1-4 years) in 5 years, coverage of 90%; Spain: 83.5% decrease (age <5 years) in four years, coverage of 77.2% and Italy 69.7% -73.8% decrease (general population), coverage of 60%-95%.

Conclusions

The publications showed variations in the percentage of decrease in varicella-related hospitalization rates after universal vaccination in the assessed countries; the results probably depend on the time since the implementation of universal vaccination, differences in the studied age group, hospital admission criteria, vaccination coverage and strategy, which does not allow direct comparison between data.",2016-02-17 +23511729,pepgrep: A tool for peptide MS/MS pattern matching.,"Typically, detection of protein sequences in collision-induced dissociation (CID) tandem MS (MS2) dataset is performed by mapping identified peptide ions back to protein sequence by using the protein database search (PDS) engine. Finding a particular peptide sequence of interest in CID MS2 records very often requires manual evaluation of the spectrum, regardless of whether the peptide-associated MS2 scan is identified by PDS algorithm or not. We have developed a compact cross-platform database-free command-line utility, pepgrep, which helps to find an MS2 fingerprint for a selected peptide sequence by pattern-matching of modelled MS2 data using Peptide-to-MS2 scoring algorithm. pepgrep can incorporate dozens of mass offsets corresponding to a variety of post-translational modifications (PTMs) into the algorithm. Decoy peptide sequences are used with the tested peptide sequence to reduce false-positive results. The engine is capable of screening an MS2 data file at a high rate when using a cluster computing environment. The matched MS2 spectrum can be displayed by using built-in graphical application programming interface (API) or optionally recorded to file. Using this algorithm, we were able to find extra peptide sequences in studied CID spectra that were missed by PDS identification. Also we found pepgrep especially useful for examining a CID of small fractions of peptides resulting from, for example, affinity purification techniques. The peptide sequences in such samples are less likely to be positively identified by using routine protein-centric algorithm implemented in PDS. The software is freely available at http://bsproteomics.essex.ac.uk:8080/data/download/pepgrep-1.4.tgz.",2013-03-16 +25140992,Profile hidden Markov models for the detection of viruses within metagenomic sequence data.,"Rapid, sensitive, and specific virus detection is an important component of clinical diagnostics. Massively parallel sequencing enables new diagnostic opportunities that complement traditional serological and PCR based techniques. While massively parallel sequencing promises the benefits of being more comprehensive and less biased than traditional approaches, it presents new analytical challenges, especially with respect to detection of pathogen sequences in metagenomic contexts. To a first approximation, the initial detection of viruses can be achieved simply through alignment of sequence reads or assembled contigs to a reference database of pathogen genomes with tools such as BLAST. However, recognition of highly divergent viral sequences is problematic, and may be further complicated by the inherently high mutation rates of some viral types, especially RNA viruses. In these cases, increased sensitivity may be achieved by leveraging position-specific information during the alignment process. Here, we constructed HMMER3-compatible profile hidden Markov models (profile HMMs) from all the virally annotated proteins in RefSeq in an automated fashion using a custom-built bioinformatic pipeline. We then tested the ability of these viral profile HMMs (""vFams"") to accurately classify sequences as viral or non-viral. Cross-validation experiments with full-length gene sequences showed that the vFams were able to recall 91% of left-out viral test sequences without erroneously classifying any non-viral sequences into viral protein clusters. Thorough reanalysis of previously published metagenomic datasets with a set of the best-performing vFams showed that they were more sensitive than BLAST for detecting sequences originating from more distant relatives of known viruses. To facilitate the use of the vFams for rapid detection of remote viral homologs in metagenomic data, we provide two sets of vFams, comprising more than 4,000 vFams each, in the HMMER3 format. We also provide the software necessary to build custom profile HMMs or update the vFams as more viruses are discovered (http://derisilab.ucsf.edu/software/vFam).",2014-08-20 +27286952,Comparison of multilocus sequence typing and multilocus typing microarray of Chlamydia trachomatis strains from Argentina and Chile.,"This study compared conventional ompA genotyping of Chlamydia trachomatis with multilocus sequence typing (MLST) and multilocus typing (MLT) DNA microarray. + +DNA extracts of 104 C. trachomatis positive specimens were analyzed by ompA sequencing and MLST and of these 76 by MLT array. Obtained MLST sequence types (STs) were compared to sequences in the database http://mlstdb.uu.se. The resolution obtained for MLST (35 STs) was 2.1 higher than for ompA sequencing (17 variants) and 1.3 higher than MLT array (27 MLT groups). Among the 104 samples the predominant genotype E could be divided into 5 ompA variants and 23 STs of which 16 had not been reported in previous studies. The most common STs, ST3 and ST56, were identified as founders and are common in several countries on a global scale. The MLST and the MLT array provided similar strain discrimination capacity and showed considerably higher resolution than conventional ompA sequencing.",2016-06-07 +26515818,EMDomics: a robust and powerful method for the identification of genes differentially expressed between heterogeneous classes.,"

Motivation

A major goal of biomedical research is to identify molecular features associated with a biological or clinical class of interest. Differential expression analysis has long been used for this purpose; however, conventional methods perform poorly when applied to data with high within class heterogeneity.

Results

To address this challenge, we developed EMDomics, a new method that uses the Earth mover's distance to measure the overall difference between the distributions of a gene's expression in two classes of samples and uses permutations to obtain q-values for each gene. We applied EMDomics to the challenging problem of identifying genes associated with drug resistance in ovarian cancer. We also used simulated data to evaluate the performance of EMDomics, in terms of sensitivity and specificity for identifying differentially expressed gene in classes with high within class heterogeneity. In both the simulated and real biological data, EMDomics outperformed competing approaches for the identification of differentially expressed genes, and EMDomics was significantly more powerful than conventional methods for the identification of drug resistance-associated gene sets. EMDomics represents a new approach for the identification of genes differentially expressed between heterogeneous classes and has utility in a wide range of complex biomedical conditions in which sample classes show within class heterogeneity.

Availability and implementation

The R package is available at http://www.bioconductor.org/packages/release/bioc/html/EMDomics.html.",2015-10-29 +27227420,Simulation of 2D NMR Spectra of Carbohydrates Using GODESS Software.,"Glycan Optimized Dual Empirical Spectrum Simulation (GODESS) is a web service, which has been recently shown to be one of the most accurate tools for simulation of (1)H and (13)C 1D NMR spectra of natural carbohydrates and their derivatives. The new version of GODESS supports visualization of the simulated (1)H and (13)C chemical shifts in the form of most 2D spin correlation spectra commonly used in carbohydrate research, such as (1)H-(1)H TOCSY, COSY/COSY-DQF/COSY-RCT, and (1)H-(13)C edHSQC, HSQC-COSY, HSQC-TOCSY, and HMBC. Peaks in the simulated 2D spectra are color-coded and labeled according to the signal assignment and can be exported in JCAMP-DX format. Peak widths are estimated empirically from the structural features. GODESS is available free of charge via the Internet at the platform of the Carbohydrate Structure Database project ( http://csdb.glycoscience.ru ).",2016-06-06 +22581768,PEP-FOLD: an updated de novo structure prediction server for both linear and disulfide bonded cyclic peptides.,"In the context of the renewed interest of peptides as therapeutics, it is important to have an on-line resource for 3D structure prediction of peptides with well-defined structures in aqueous solution. We present an updated version of PEP-FOLD allowing the treatment of both linear and disulphide bonded cyclic peptides with 9-36 amino acids. The server makes possible to define disulphide bonds and any residue-residue proximity under the guidance of the biologists. Using a benchmark of 34 cyclic peptides with one, two and three disulphide bonds, the best PEP-FOLD models deviate by an average RMS of 2.75 Å from the full NMR structures. Using a benchmark of 37 linear peptides, PEP-FOLD locates lowest-energy conformations deviating by 3 Å RMS from the NMR rigid cores. The evolution of PEP-FOLD comes as a new on-line service to supersede the previous server. The server is available at: http://bioserv.rpbs.univ-paris-diderot.fr/PEP-FOLD.",2012-05-11 +28497124,"PARV4 prevalence, phylogeny, immunology and coinfection with HIV, HBV and HCV in a multicentre African cohort.","Background: The seroprevalence of human parvovirus-4 (PARV4) varies considerably by region. In sub-Saharan Africa, seroprevalence is high in the general population, but little is known about the transmission routes or the prevalence of coinfection with blood-borne viruses, HBV, HCV and HIV.  Methods: To further explore the characteristics of PARV4 in this setting, with a particular focus on the prevalence and significance of coinfection, we screened a cohort of 695 individuals recruited from Durban and Kimberley (South Africa) and Gaborone (Botswana) for PARV4 IgG and DNA, as well as documenting HIV, HBV and HCV status.  Results: Within these cohorts, 69% of subjects were HIV-positive. We identified no cases of HCV by PCR, but 7.4% were positive for HBsAg. PARV4 IgG was positive in 42%; seroprevalence was higher in adults (69%) compared to children (21%) (p<0.0001) and in HIV-positive (52%) compared to HIV-negative individuals (24%) (p<0.0001), but there was no association with HBsAg status. We developed an on-line tool to allow visualization of coinfection data (https://purl.oclc.org/coinfection-viz). We identified five subjects who were PCR-positive for PARV4 genotype-3. Ex vivo CD8+ T cell responses spanned the entire PARV4 proteome and we propose a novel HLA-B*57:03-restricted epitope within the NS protein.  Conclusions: This characterisation of PARV4 infection provides enhanced insights into the epidemiology of infection and co-infection in African cohorts, and provides the foundations for planning further focused studies to elucidate transmission pathways, immune responses, and the clinical significance of this organism.",2017-04-07 +28388646,Bayesian evaluation of effect size after replicating an original study.,"The vast majority of published results in the literature is statistically significant, which raises concerns about their reliability. The Reproducibility Project Psychology (RPP) and Experimental Economics Replication Project (EE-RP) both replicated a large number of published studies in psychology and economics. The original study and replication were statistically significant in 36.1% in RPP and 68.8% in EE-RP suggesting many null effects among the replicated studies. However, evidence in favor of the null hypothesis cannot be examined with null hypothesis significance testing. We developed a Bayesian meta-analysis method called snapshot hybrid that is easy to use and understand and quantifies the amount of evidence in favor of a zero, small, medium and large effect. The method computes posterior model probabilities for a zero, small, medium, and large effect and adjusts for publication bias by taking into account that the original study is statistically significant. We first analytically approximate the methods performance, and demonstrate the necessity to control for the original study's significance to enable the accumulation of evidence for a true zero effect. Then we applied the method to the data of RPP and EE-RP, showing that the underlying effect sizes of the included studies in EE-RP are generally larger than in RPP, but that the sample sizes of especially the included studies in RPP are often too small to draw definite conclusions about the true effect size. We also illustrate how snapshot hybrid can be used to determine the required sample size of the replication akin to power analysis in null hypothesis significance testing and present an easy to use web application (https://rvanaert.shinyapps.io/snapshot/) and R code for applying the method.",2017-04-07 +24564552,Systematically profiling and annotating long intergenic non-coding RNAs in human embryonic stem cell.,"

Background

While more and more long intergenic non-coding RNAs (lincRNAs) were identified to take important roles in both maintaining pluripotency and regulating differentiation, how these lincRNAs may define and drive cell fate decisions on a global scale are still mostly elusive. Systematical profiling and comprehensive annotation of embryonic stem cells lincRNAs may not only bring a clearer big picture of these novel regulators but also shed light on their functionalities.

Results

Based on multiple RNA-Seq datasets, we systematically identified 300 human embryonic stem cell lincRNAs (hES lincRNAs). Of which, one forth (78 out of 300) hES lincRNAs were further identified to be biasedly expressed in human ES cells. Functional analysis showed that they were preferentially involved in several early-development related biological processes. Comparative genomics analysis further suggested that around half of the identified hES lincRNAs were conserved in mouse. To facilitate further investigation of these hES lincRNAs, we constructed an online portal for biologists to access all their sequences and annotations interactively. In addition to navigation through a genome browse interface, users can also locate lincRNAs through an advanced query interface based on both keywords and expression profiles, and analyze results through multiple tools.

Conclusions

By integrating multiple RNA-Seq datasets, we systematically characterized and annotated 300 hES lincRNAs. A full functional web portal is available freely at http://scbrowse.cbi.pku.edu.cn. As the first global profiling and annotating of human embryonic stem cell lincRNAs, this work aims to provide a valuable resource for both experimental biologists and bioinformaticians.",2013-10-16 +27392337,Full-Body Musculoskeletal Model for Muscle-Driven Simulation of Human Gait.,"

Objective

Musculoskeletal models provide a non-invasive means to study human movement and predict the effects of interventions on gait. Our goal was to create an open-source 3-D musculoskeletal model with high-fidelity representations of the lower limb musculature of healthy young individuals that can be used to generate accurate simulations of gait.

Methods

Our model includes bony geometry for the full body, 37 degrees of freedom to define joint kinematics, Hill-type models of 80 muscle-tendon units actuating the lower limbs, and 17 ideal torque actuators driving the upper body. The model's musculotendon parameters are derived from previous anatomical measurements of 21 cadaver specimens and magnetic resonance images of 24 young healthy subjects. We tested the model by evaluating its computational time and accuracy of simulations of healthy walking and running.

Results

Generating muscle-driven simulations of normal walking and running took approximately 10 minutes on a typical desktop computer. The differences between our muscle-generated and inverse dynamics joint moments were within 3% (RMSE) of the peak inverse dynamics joint moments in both walking and running, and our simulated muscle activity showed qualitative agreement with salient features from experimental electromyography data.

Conclusion

These results suggest that our model is suitable for generating muscle-driven simulations of healthy gait. We encourage other researchers to further validate and apply the model to study other motions of the lower extremity.

Significance

The model is implemented in the open-source software platform OpenSim. The model and data used to create and test the simulations are freely available at https://simtk.org/home/full_body/, allowing others to reproduce these results and create their own simulations.",2016-07-07 +28643908,Jaw mechanics in dolichofacial and brachyfacial phenotypes: A longitudinal cephalometric-based study.,"

Objectives

To determine whether dolichofacial (Frankfort horizontal mandibular plane angle (FHMPA) ≥30°) vs brachyfacial (FHMPA ≤22°) phenotypes differ in temporomandibular joint (TMJ) loads and whether these differences correlate longitudinally with mandibular ramus height (Condylion-Gonion, Co-Go).

Setting and sample population

Lateral and posteroanterior cephalographs from ten dolichofacial and ten brachyfacial individuals made at average ages of 6 (T1), 12 (T2) and 18 (T3) years and available online (http://www.aaoflegacycollection.org/aaof_home.html) were used.

Materials and methods

Three-dimensional anatomical data were derived from cephalographs and used in numerical models to predict TMJ loads for a range of biting angles on incisors, canines and first molars. Two criteria were used to define clinically important between-group TMJ load differences: statistical significance was defined with a two-group t-test, and where differences were also ≥20%. A statistical approach called response surface analysis was used to assess correlation between TMJ loads and its predictors considered in this study.

Results

The two phenotypes had significantly different FHMPA at all ages (P<.05). No differences in TMJ loads were found at T1. Ipsilateral and contralateral TMJ loads at T2 and T3 were significant and ≥20% larger in dolichofacial than brachyfacial phenotypes for specific biting angles (all adjusted P<.05). Regression analysis indicated age and ramus height contribute 53% of the variability in normalized values of TMJ loads. At higher ages, dolichofacial phenotypes had significantly higher TMJ loads which were correlated with shorter ramus heights compared to brachyfacial phenotypes.

Conclusions

Craniofacial mechanics may explain, in part, mandibular growth differences between dolichofacial and brachyfacial phenotypes.",2017-06-01 +23567054,Sire predicted transmitting ability for conformation and yield traits and previous lactation incidence of foot lesions as risk factors for the incidence of foot lesions in Holstein cows.,"The aims of the present study were (1) to investigate the repeatability of foot lesions [sole ulcers (SU), white line disease (WLD), and digital dermatitis (DD)] across multiple lactations, (2) to evaluate the effect of foot lesions on cow survivability and milk production across multiple lactations, and (3) to investigate the role of sire predicted transmitting ability (PTA) for conformation and production traits as risk factors for the incidence of SU, WLD, and DD. Data were collected from a dairy farm located in Cayuga County, New York. A total of 11,442 cows having first calved during the period from May 13, 2001, to March 26, 2012, were enrolled in the study. Data regarding sire genetic evaluations were obtained from DairyBulls.com (http://www.DairyBulls.com). Lameness was detected and treated and lesions were recorded into a dairy record database (DairyCOMP 305; Valley Agricultural Software, Tulare, CA) by trained farm employees. All demographic, production, and foot lesion data were extracted from DairyCOMP 305 and merged with the sires' PTA information to form a unique database. Mixed logistic regression, general linear mixed, and multivariable Cox proportional hazards models were used to analyze the data. Sole ulcers, WLD, and DD incidence was significantly higher for cows affected with SU, WLD, or DD in previous lactations. Cows affected with WLD or DD during the first lactation had significantly higher WLD or DD incidence during the second and the third lactations. Cows affected with SU or WLD during their first lactation had significantly lower milk production during the second lactation and cows diagnosed with SU, WLD, or DD during their second lactation had higher second-lactation mature-equivalent 305-d milk yield. Sire PTA for milk and protein yield were significantly associated with the incidence of SU, WLD, and DD and incidence of SU and WLD, respectively. Sire PTA for several conformation traits were found to be associated with SU, WLD, and DD incidence. Cows that were affected with SU or WLD during their first lactation were at 1.18 or 1.43 higher hazard of culling than unaffected cows, respectively. In summary, we conclude that SU, WLD, and DD are highly repeatable across lactations and that high genetic merit for milk and protein production as well as milk production (phenotype) were significantly associated with higher risk of SU, WLD, and DD. Additionally, sire PTA for several conformation traits were significantly associated with their daughters' foot lesion incidence.",2013-04-05 +26923200,The Transcriptional Landscape of the Photosynthetic Model Cyanobacterium Synechocystis sp. PCC6803.,"Cyanobacteria exhibit a great capacity to adapt to different environmental conditions through changes in gene expression. Although this plasticity has been extensively studied in the model cyanobacterium Synechocystis sp. PCC 6803, a detailed analysis of the coordinated transcriptional adaption across varying conditions is lacking. Here, we report a meta-analysis of 756 individual microarray measurements conducted in 37 independent studies-the most comprehensive study of the Synechocystis transcriptome to date. Using stringent statistical evaluation, we characterized the coordinated adaptation of Synechocystis' gene expression on systems level. Evaluation of the data revealed that the photosynthetic apparatus is subjected to greater changes in expression than other cellular components. Nevertheless, network analyses indicated a significant degree of transcriptional coordination of photosynthesis and various metabolic processes, and revealed the tight co-regulation of components of photosystems I, II and phycobilisomes. Detailed inspection of the integrated data led to the discovery a variety of regulatory patterns and novel putative photosynthetic genes. Intriguingly, global clustering analyses suggested contrasting transcriptional response of metabolic and regulatory genes stress to conditions. The integrated Synechocystis transcriptome can be accessed and interactively analyzed via the CyanoEXpress website (http://cyanoexpress.sysbiolab.eu).",2016-02-29 +27270713,HITSZ_CDR: an end-to-end chemical and disease relation extraction system for BioCreative V. ,"In this article, an end-to-end system was proposed for the challenge task of disease named entity recognition (DNER) and chemical-induced disease (CID) relation extraction in BioCreative V, where DNER includes disease mention recognition (DMR) and normalization (DN). Evaluation on the challenge corpus showed that our system achieved the highest F1-scores 86.93% on DMR, 84.11% on DN, 43.04% on CID relation extraction, respectively. The F1-score on DMR is higher than our previous one reported by the challenge organizers (86.76%), the highest F1-score of the challenge.Database URL: http://database.oxfordjournals.org/content/2016/baw077.",2016-06-05 +28166503,PhosPred-RF: A Novel Sequence-Based Predictor for Phosphorylation Sites Using Sequential Information Only.,"Many recent efforts have been made for the development of machine learning-based methods for fast and accurate phosphorylation site prediction. Currently, a majority of well-performing methods are based on hybrid information to build prediction models, such as evolutionary information, disorder information, and so on. Unfortunately, this type of methods suffers two major limitations: one is that it would not be much of help for protein phosphorylation site prediction in case of no obvious homology detected; the other is that computing such the complicated information is time-consuming, which probably limits the usage of predictors in practical applications. In this paper, we present a simple, fast, and powerful feature representation algorithm, which sufficiently explores the sequential information from multiple perspectives only based on primary sequences, and successfully captures the differences between true phosphorylation sites and hboxnon-phosphorylation sites. Using the proposed features, we propose a random forest-based predictor named PhosPred-RF in the prediction of protein phosphorylation sites from proteins. We evaluate and compare the proposed predictor with the state-of-the-art predictors on some benchmark data sets. The experimental results show that PhosPred-RF outperforms other existing predictors, demonstrating its potential to be a useful tool for protein phosphorylation site prediction. Currently, the proposed PhosPred-RF is freely accessible to the public through the user-friendly webserver http://server.malab.cn/PhosPred-RF.",2017-01-31 +26909368,"Structural, thermal and photo-physical data of azo-aromatic TEMPO derivatives before and after their grafting to polyolefins.","The data reported in this paper are complementary to the characterization of 4-(phenylazo)-benzoyl-2,2,6,6-tetramethylpiperidine-1-oxyl radical (AzO-TEMPO) and of the 4-(2-thienylazo)-benzoyl-2,2,6,6-tetramethylpiperidine-1-oxyl radical (ThiO-TEMPO) before and after their grafting to two polyethylene matrices (a copolymer ethylene/α-olefin (co-EO) and a high density polyethylene (HDPE)). Particularly the data reported in this paper confirm the structure (FT-IR analysis), the thermal (TGA and EPR) and the photo-physical (UV-vis) properties of the RO-TEMPO derivatives before and after their grafting. Herein, the FT-IR spectrum and TGA thermogram of ThiO-TEMPO were compared with those of AzO-TEMPO. Moreover, the superimposition of UV-vis spectra collected during the irradiation under 366 or 254 nm emitting lamp of AzO-TEMPO and ThiO-TEMPO in acetonitrile solution are reported. Finally, a complete DSC characterization of the functionalized POs is shown. DOI of original article: 〈http://dx.doi.org/10.1016/j.polymer.2015.11.018〉 [1].",2016-01-06 +28035027,QAcon: single model quality assessment using protein structural and contact information with machine learning techniques.,"

Motivation

Protein model quality assessment (QA) plays a very important role in protein structure prediction. It can be divided into two groups of methods: single model and consensus QA method. The consensus QA methods may fail when there is a large portion of low quality models in the model pool.

Results

In this paper, we develop a novel single-model quality assessment method QAcon utilizing structural features, physicochemical properties, and residue contact predictions. We apply residue-residue contact information predicted by two protein contact prediction methods PSICOV and DNcon to generate a new score as feature for quality assessment. This novel feature and other 11 features are used as input to train a two-layer neural network on CASP9 datasets to predict the quality of a single protein model. We blindly benchmarked our method QAcon on CASP11 dataset as the MULTICOM-CLUSTER server. Based on the evaluation, our method is ranked as one of the top single model QA methods. The good performance of the features based on contact prediction illustrates the value of using contact information in protein quality assessment.

Availability and implementation

The web server and the source code of QAcon are freely available at: http://cactus.rnet.missouri.edu/QAcon.

Contact

chengji@missouri.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +29202050,One Size Doesn't Fit All: Measuring Individual Privacy in Aggregate Genomic Data.,"Even in the aggregate, genomic data can reveal sensitive information about individuals. We present a new model-based measure, PrivMAF, that provides provable privacy guarantees for aggregate data (namely minor allele frequencies) obtained from genomic studies. Unlike many previous measures that have been designed to measure the total privacy lost by all participants in a study, PrivMAF gives an individual privacy measure for each participant in the study, not just an average measure. These individual measures can then be combined to measure the worst case privacy loss in the study. Our measure also allows us to quantify the privacy gains achieved by perturbing the data, either by adding noise or binning. Our findings demonstrate that both perturbation approaches offer significant privacy gains. Moreover, we see that these privacy gains can be achieved while minimizing perturbation (and thus maximizing the utility) relative to stricter notions of privacy, such as differential privacy. We test PrivMAF using genotype data from the Wellcome Trust Case Control Consortium, providing a more nuanced understanding of the privacy risks involved in an actual genome-wide association studies. Interestingly, our analysis demonstrates that the privacy implications of releasing MAFs from a study can differ greatly from individual to individual. An implementation of our method is available at http://privmaf.csail.mit.edu.",2015-07-20 +22693220,"CellBase, a comprehensive collection of RESTful web services for retrieving relevant biological information from heterogeneous sources.","During the past years, the advances in high-throughput technologies have produced an unprecedented growth in the number and size of repositories and databases storing relevant biological data. Today, there is more biological information than ever but, unfortunately, the current status of many of these repositories is far from being optimal. Some of the most common problems are that the information is spread out in many small databases; frequently there are different standards among repositories and some databases are no longer supported or they contain too specific and unconnected information. In addition, data size is increasingly becoming an obstacle when accessing or storing biological data. All these issues make very difficult to extract and integrate information from different sources, to analyze experiments or to access and query this information in a programmatic way. CellBase provides a solution to the growing necessity of integration by easing the access to biological data. CellBase implements a set of RESTful web services that query a centralized database containing the most relevant biological data sources. The database is hosted in our servers and is regularly updated. CellBase documentation can be found at http://docs.bioinfo.cipf.es/projects/cellbase.",2012-06-12 +26890920,Prediction and Validation of Disease Genes Using HeteSim Scores.,"Deciphering the gene disease association is an important goal in biomedical research. In this paper, we use a novel relevance measure, called HeteSim, to prioritize candidate disease genes. Two methods based on heterogeneous networks constructed using protein-protein interaction, gene-phenotype associations, and phenotype-phenotype similarity, are presented. In HeteSim_MultiPath (HSMP), HeteSim scores of different paths are combined with a constant that dampens the contributions of longer paths. In HeteSim_SVM (HSSVM), HeteSim scores are combined with a machine learning method. The 3-fold experiments show that our non-machine learning method HSMP performs better than the existing non-machine learning methods, our machine learning method HSSVM obtains similar accuracy with the best existing machine learning method CATAPULT. From the analysis of the top 10 predicted genes for different diseases, we found that HSSVM avoid the disadvantage of the existing machine learning based methods, which always predict similar genes for different diseases. The data sets and Matlab code for the two methods are freely available for download at http://lab.malab.cn/data/HeteSim/index.jsp.",2016-02-12 +26217800,"Metagenomic data of fungal internal transcribed Spacer and 18S rRNA gene sequences from Lonar lake sediment, India.","The data in this article contains the sequences of fungal Internal Transcribed Spacer (ITS) and 18S rRNA gene from a metagenome of Lonar soda lake, India. Sequences were amplified using fungal specific primers, which amplified the amplicon lined between the 18S and 28S rRNA genes. Data were obtained using Fungal tag-encoded FLX amplicon pyrosequencing (fTEFAP) technique and used to analyze fungal profile by the culture-independent method. Primary analysis using PlutoF 454 pipeline suggests the Lonar lake mycobiome contained the 29 different fungal species. The raw sequencing data used to perform this analysis along with FASTQ file are located in the NCBI Sequence Read Archive (SRA) under accession No. SRX889598 (http://www.ncbi.nlm.nih.gov/sra/SRX889598).",2015-06-18 +27730161,Factors that affect simulated driving in patients with obstructive sleep apnoea. ,Objective data for advising sleep apnoea sufferers whether they are at increased risk of an accident when driving http://ow.ly/TWPgm.,2015-10-01 +27259657,The Orthology Ontology: development and applications.,"

Background

Computational comparative analysis of multiple genomes provides valuable opportunities to biomedical research. In particular, orthology analysis can play a central role in comparative genomics; it guides establishing evolutionary relations among genes of organisms and allows functional inference of gene products. However, the wide variations in current orthology databases necessitate the research toward the shareability of the content that is generated by different tools and stored in different structures. Exchanging the content with other research communities requires making the meaning of the content explicit.

Description

The need for a common ontology has led to the creation of the Orthology Ontology (ORTH) following the best practices in ontology construction. Here, we describe our model and major entities of the ontology that is implemented in the Web Ontology Language (OWL), followed by the assessment of the quality of the ontology and the application of the ORTH to existing orthology datasets. This shareable ontology enables the possibility to develop Linked Orthology Datasets and a meta-predictor of orthology through standardization for the representation of orthology databases. The ORTH is freely available in OWL format to all users at http://purl.org/net/orth .

Conclusions

The Orthology Ontology can serve as a framework for the semantic standardization of orthology content and it will contribute to a better exploitation of orthology resources in biomedical research. The results demonstrate the feasibility of developing shareable datasets using this ontology. Further applications will maximize the usefulness of this ontology.",2016-06-04 +27694198,bTSSfinder: a novel tool for the prediction of promoters in cyanobacteria and Escherichia coli.,"

Motivation

The computational search for promoters in prokaryotes remains an attractive problem in bioinformatics. Despite the attention it has received for many years, the problem has not been addressed satisfactorily. In any bacterial genome, the transcription start site is chosen mostly by the sigma (σ) factor proteins, which control the gene activation. The majority of published bacterial promoter prediction tools target σ 70 promoters in Escherichia coli . Moreover, no σ-specific classification of promoters is available for prokaryotes other than for E. coli .

Results

Here, we introduce bTSSfinder, a novel tool that predicts putative promoters for five classes of σ factors in Cyanobacteria (σ A , σ C , σ H , σ G and σ F ) and for five classes of sigma factors in E. coli (σ 70 , σ 38 , σ 32 , σ 28 and σ 24 ). Comparing to currently available tools, bTSSfinder achieves higher accuracy (MCC = 0.86, F 1 -score = 0.93) compared to the next best tool with MCC = 0.59, F 1 -score = 0.79) and covers multiple classes of promoters.

Availability and implementation

bTSSfinder is available standalone and online at http://www.cbrc.kaust.edu.sa/btssfinder .

Contacts

ilham.shahmuradov@kaust.edu.sa or vladimir.bajic@kaust.edu.sa.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +26459872,Prioritization Of Nonsynonymous Single Nucleotide Variants For Exome Sequencing Studies Via Integrative Learning On Multiple Genomic Data.,"The rapid advancement of next generation sequencing technology has greatly accelerated the progress for understanding human inherited diseases via such innovations as exome sequencing. Nevertheless, the identification of causative variants from sequencing data remains a great challenge. Traditional statistical genetics approaches such as linkage analysis and association studies have limited power in analyzing exome sequencing data, while relying on simply filtration strategies and predicted functional implications of mutations to pinpoint pathogenic variants are prone to produce false positives. To overcome these limitations, we herein propose a supervised learning approach, termed snvForest, to prioritize candidate nonsynonymous single nucleotide variants for a specific type of disease by integrating 11 functional scores at the variant level and 8 association scores at the gene level. We conduct a series of large-scale in silico validation experiments, demonstrating the effectiveness of snvForest across 2,511 diseases of different inheritance styles and the superiority of our approach over two state-of-the-art methods. We further apply snvForest to three real exome sequencing data sets of epileptic encephalophathies and intellectual disability to show the ability of our approach to identify causative de novo mutations for these complex diseases. The online service and standalone software of snvForest are found at http://bioinfo.au.tsinghua.edu.cn/jianglab/snvforest.",2015-10-13 +25913208,phylogeo: an R package for geographic analysis and visualization of microbiome data.,"

Motivation

We have created an R package named phylogeo that provides a set of geographic utilities for sequencing-based microbial ecology studies. Although the geographic location of samples is an important aspect of environmental microbiology, none of the major software packages used in processing microbiome data include utilities that allow users to map and explore the spatial dimension of their data. phylogeo solves this problem by providing a set of plotting and mapping functions that can be used to visualize the geographic distribution of samples, to look at the relatedness of microbiomes using ecological distance, and to map the geographic distribution of particular sequences. By extending the popular phyloseq package and using the same data structures and command formats, phylogeo allows users to easily map and explore the geographic dimensions of their data from the R programming language.

Availability and implementation

phylogeo is documented and freely available http://zachcp.github.io/phylogeo

Contact

: zcharlop@rockefeller.edu.",2015-04-25 +26668005,DNAshapeR: an R/Bioconductor package for DNA shape prediction and feature encoding.,"

Unlabelled

DNAshapeR predicts DNA shape features in an ultra-fast, high-throughput manner from genomic sequencing data. The package takes either nucleotide sequence or genomic coordinates as input and generates various graphical representations for visualization and further analysis. DNAshapeR further encodes DNA sequence and shape features as user-defined combinations of k-mer and DNA shape features. The resulting feature matrices can be readily used as input of various machine learning software packages for further modeling studies.

Availability and implementation

The DNAshapeR software package was implemented in the statistical programming language R and is freely available through the Bioconductor project at https://www.bioconductor.org/packages/devel/bioc/html/DNAshapeR.html and at the GitHub developer site, http://tsupeichiu.github.io/DNAshapeR/ CONTACT: rohs@usc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-14 +25977808,"A multi-subject, multi-modal human neuroimaging dataset.","We describe data acquired with multiple functional and structural neuroimaging modalities on the same nineteen healthy volunteers. The functional data include Electroencephalography (EEG), Magnetoencephalography (MEG) and functional Magnetic Resonance Imaging (fMRI) data, recorded while the volunteers performed multiple runs of hundreds of trials of a simple perceptual task on pictures of familiar, unfamiliar and scrambled faces during two visits to the laboratory. The structural data include T1-weighted MPRAGE, Multi-Echo FLASH and Diffusion-weighted MR sequences. Though only from a small sample of volunteers, these data can be used to develop methods for integrating multiple modalities from multiple runs on multiple participants, with the aim of increasing the spatial and temporal resolution above that of any one modality alone. They can also be used to integrate measures of functional and structural connectivity, and as a benchmark dataset to compare results across the many neuroimaging analysis packages. The data are freely available from https://openfmri.org/.",2015-01-20 +27003505,The Ebb and Flow of Airborne Pathogens: Monitoring and Use in Disease Management Decisions.,"Perhaps the earliest form of monitoring the regional spread of plant disease was a group of growers gathering together at the market and discussing what they see in their crops. This type of reporting continues to this day through regional extension blogs, by crop consultants and more formal scouting of sentential plots in the IPM PIPE network (http://www.ipmpipe.org/). As our knowledge of plant disease epidemiology has increased, we have also increased our ability to detect and monitor the presence of pathogens and use this information to make management decisions in commercial production systems. The advent of phylogenetics, next-generation sequencing, and nucleic acid amplification technologies has allowed for development of sensitive and accurate assays for pathogen inoculum detection and quantification. The application of these tools is beginning to change how we manage diseases with airborne inoculum by allowing for the detection of pathogen movement instead of assuming it and by targeting management strategies to the early phases of the epidemic development when there is the greatest opportunity to reduce the rate of disease development. While there are numerous advantages to using data on inoculum presence to aid management decisions, there are limitations in what the data represent that are often unrecognized. In addition, our understanding of where and how to effectively monitor airborne inoculum is limited. There is a strong need to improve our knowledge of the mechanisms that influence inoculum dispersion across scales as particles move from leaf to leaf, and everything in between.",2016-04-05 +25701573,Oasis: online analysis of small RNA deep sequencing data.,"

Unlabelled

Oasis is a web application that allows for the fast and flexible online analysis of small-RNA-seq (sRNA-seq) data. It was designed for the end user in the lab, providing an easy-to-use web frontend including video tutorials, demo data and best practice step-by-step guidelines on how to analyze sRNA-seq data. Oasis' exclusive selling points are a differential expression module that allows for the multivariate analysis of samples, a classification module for robust biomarker detection and an advanced programming interface that supports the batch submission of jobs. Both modules include the analysis of novel miRNAs, miRNA targets and functional analyses including GO and pathway enrichment. Oasis generates downloadable interactive web reports for easy visualization, exploration and analysis of data on a local system. Finally, Oasis' modular workflow enables for the rapid (re-) analysis of data.

Availability and implementation

Oasis is implemented in Python, R, Java, PHP, C++ and JavaScript. It is freely available at http://oasis.dzne.de.

Contact

stefan.bonn@dzne.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-19 +28858830,Exposure to Low Levels of Lead in Utero and Umbilical Cord Blood DNA Methylation in Project Viva: An Epigenome-Wide Association Study.,"

Background

Early-life exposure to lead is associated with deficits in neurodevelopment and with hematopoietic system toxicity. DNA methylation may be one of the underlying mechanisms for the adverse effects of prenatal lead on the offspring, but epigenome-wide methylation data for low levels of prenatal lead exposure are lacking.

Objectives

We investigated the association between prenatal maternal lead exposure and epigenome-wide DNA methylation in umbilical cord blood nucleated cells in Project Viva, a prospective U.S.-based prebirth cohort with relatively low levels of lead exposure.

Methods

Among 268 mother-infant pairs, we measured lead concentrations in red blood cells (RBC) from prenatal maternal blood samples, and using HumanMethylation450 Bead Chips, we measured genome-wide methylation levels at 482,397 CpG loci in umbilical cord blood and retained 394,460 loci after quality control. After adjustment for batch effects, cell types, and covariates, we used robust linear regression models to examine associations of prenatal lead exposure with DNA methylation in cord blood at epigenome-wide significance level [false discovery rate (FDR)<0.05].

Results

The mean [standard deviation (SD)] maternal RBC lead level was 1.22 (0.63) μg/dL. CpG cg10773601 showed an epigenome-wide significant negative association with prenatal lead exposure (-1.4% per doubling increase in lead exposure; p=2.3×10-7) and was annotated to C-Type Lectin Domain Family 11, Member A (CLEC11A), which functions as a growth factor for primitive hematopoietic progenitor cells. In sex-specific analyses, we identified more CpGs with FDR<0.05 among female infants (n=38) than among male infants (n=2). One CpG (cg24637308), which showed a strong negative association with prenatal lead exposure among female infants (-4.3% per doubling increase in lead exposure; p=1.1×10-06), was annotated to Dynein Heavy Chain Domain 1 gene (DNHD1) which is highly expressed in human brain. Interestingly, there were strong correlations between blood and brain methylation for CpG (cg24637308) based on another independent set of samples with a high proportion of female participants.

Conclusion

Prenatal low-level lead exposure was associated with newborn DNA methylation, particularly in female infants. https://doi.org/10.1289/EHP1246.",2017-08-25 +27378297,The SMAL web server: global multiple network alignment from pairwise alignments.,"

Motivation

Alignments of protein-protein interaction networks (PPIN) can be used to predict protein function, study conserved aspects of the interactome, and to establish evolutionary correspondences. Within this problem context, determining multiple network alignments (MNA) is a significant challenge that involves high computational complexity. A limited number of public MNA implementations are available currently and the majority of the pairwise network alignment (PNA) algorithms do not have MNA counterparts. Furthermore, current MNA algorithms do not allow choosing a specific PPIN relative to which an MNA could be constructed. Also, once an MNA is obtained, it cannot easily be modified, such as through addition of a new network, without expensive re-computation of the entire MNA.

Results

SMAL (Scaffold-Based Multiple Network Aligner) is a public, open-source, web-based application for determining MNAs from existing PNAs that addresses all the aforementioned challenges. With SMAL, PNAs can be combined rapidly to obtain an MNA. The software also supports visualization and user-data interactions to facilitate exploratory analysis and sensemaking. SMAL is especially useful when multiple alignments relative to a particular PPIN are required; furthermore, SMAL alignments are persistent in that existing correspondences between networks (obtained during PNA or MNA) are not lost as new networks are added. In comparative studies alongside existent MNA techniques, SMAL MNAs were found to be superior per a number of measures, such as the total number of identified homologs and interologs as well as the fraction of all identified correspondences that are functionally similar or homologous to the scaffold. While directed primarily at PPIN-alignment, SMAL is a generic network aligner and may be applied to arbitrary networks.Availability information: The SMAL web server and source code is available at: http://haddock6.sfsu.edu/smal/ CONTACT: rahul@sfsu.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-04 +22568821,PHYLOViZ: phylogenetic inference and data visualization for sequence based typing methods.,"

Background

With the decrease of DNA sequencing costs, sequence-based typing methods are rapidly becoming the gold standard for epidemiological surveillance. These methods provide reproducible and comparable results needed for a global scale bacterial population analysis, while retaining their usefulness for local epidemiological surveys. Online databases that collect the generated allelic profiles and associated epidemiological data are available but this wealth of data remains underused and are frequently poorly annotated since no user-friendly tool exists to analyze and explore it.

Results

PHYLOViZ is platform independent Java software that allows the integrated analysis of sequence-based typing methods, including SNP data generated from whole genome sequence approaches, and associated epidemiological data. goeBURST and its Minimum Spanning Tree expansion are used for visualizing the possible evolutionary relationships between isolates. The results can be displayed as an annotated graph overlaying the query results of any other epidemiological data available.

Conclusions

PHYLOViZ is a user-friendly software that allows the combined analysis of multiple data sources for microbial epidemiological and population studies. It is freely available at http://www.phyloviz.net.",2012-05-08 +26491177,Cost-Effectiveness of 30- Compared to 20-Milliliter Blood Cultures: a Retrospective Study.,"The importance of blood culture (BC) volume for detection of bloodstream infections (BSIs) is documented. Recently, improved diagnostic sensitivity was demonstrated for 30- versus 20-ml BCs in adults (Cockerill FR, Wilson JW, Vetter EA, Goodman KM, Torgerson CA, Harmsen WS, Schleck CD, IIstrup DM, Washington JA, Wilson WR. Clin Infect Dis 38:1724-1730, 2004, http://dx.doi.org/10.1128/JCM.01314-11). Hospitals receive higher reimbursement for patients with documented septicemia. We determined the cost-effectiveness of 30-ml versus 20-ml BCs using results from our institution and previously published data. Positive BC results from 292 bacteremic episodes were reviewed. The costs of the reagents, equipment, phlebotomist, and technologist time were determined. The medical records department provided Medicare reimbursement (MR) data for patients with selected ICD-9 codes. These data provided an estimate of the annualized increase in MR versus costs associated with conversion to 30-ml BCs. MR for 464 annual primary BSIs was $24,808/episode. An expected 7.2% increase in BSIs detected using 30-ml BCs would add 34 additional cases annually and increase MR by $843,472. Comparative MR data for cases where septicemia complicated another diagnosis were available for 4 International Classification of Diseases, Ninth Revision (ICD-9) codes: laparoscopic cholecystectomy, biliary tract disorders, pneumonia, and cellulitis. The mean incremental MR was $9,667 per episode, which projected to a $483,350 revenue increase annually. The annual cost associated with conversion to 30-ml BCs was estimated to be $157,798. Thus, the potential net increase in hospital revenue would be $1,169,031 for 30-ml versus 20-ml BCs. Our results suggest that conversion to 30-ml BCs may not only improve patient care by detecting more BSIs but also increase hospital revenue substantially.",2015-10-21 +25995232,When less is more: 'slicing' sequencing data improves read decoding accuracy and de novo assembly quality.,"

Motivation

As the invention of DNA sequencing in the 70s, computational biologists have had to deal with the problem of de novo genome assembly with limited (or insufficient) depth of sequencing. In this work, we investigate the opposite problem, that is, the challenge of dealing with excessive depth of sequencing.

Results

We explore the effect of ultra-deep sequencing data in two domains: (i) the problem of decoding reads to bacterial artificial chromosome (BAC) clones (in the context of the combinatorial pooling design we have recently proposed), and (ii) the problem of de novo assembly of BAC clones. Using real ultra-deep sequencing data, we show that when the depth of sequencing increases over a certain threshold, sequencing errors make these two problems harder and harder (instead of easier, as one would expect with error-free data), and as a consequence the quality of the solution degrades with more and more data. For the first problem, we propose an effective solution based on 'divide and conquer': we 'slice' a large dataset into smaller samples of optimal size, decode each slice independently, and then merge the results. Experimental results on over 15 000 barley BACs and over 4000 cowpea BACs demonstrate a significant improvement in the quality of the decoding and the final assembly. For the second problem, we show for the first time that modern de novo assemblers cannot take advantage of ultra-deep sequencing data.

Availability and implementation

Python scripts to process slices and resolve decoding conflicts are available from http://goo.gl/YXgdHT; software Hashfilter can be downloaded from http://goo.gl/MIyZHs

Contact

stelo@cs.ucr.edu or timothy.close@ucr.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-20 +24626233,"rAvis: an R-package for downloading information stored in Proyecto AVIS, a citizen science bird project.","Citizen science projects store an enormous amount of information about species distribution, diversity and characteristics. Researchers are now beginning to make use of this rich collection of data. However, access to these databases is not always straightforward. Apart from the largest and international projects, citizen science repositories often lack specific Application Programming Interfaces (APIs) to connect them to the scientific environments. Thus, it is necessary to develop simple routines to allow researchers to take advantage of the information collected by smaller citizen science projects, for instance, programming specific packages to connect them to popular scientific environments (like R). Here, we present rAvis, an R-package to connect R-users with Proyecto AVIS (http://proyectoavis.com), a Spanish citizen science project with more than 82,000 bird observation records. We develop several functions to explore the database, to plot the geographic distribution of the species occurrences, and to generate personal queries to the database about species occurrences (number of individuals, distribution, etc.) and birdwatcher observations (number of species recorded by each collaborator, UTMs visited, etc.). This new R-package will allow scientists to access this database and to exploit the information generated by Spanish birdwatchers over the last 40 years.",2014-03-13 +28464826,microTaboo: a general and practical solution to the k-disjoint problem.,"

Background

A common challenge in bioinformatics is to identify short sub-sequences that are unique in a set of genomes or reference sequences, which can efficiently be achieved by k-mer (k consecutive nucleotides) counting. However, there are several areas that would benefit from a more stringent definition of ""unique"", requiring that these sub-sequences of length W differ by more than k mismatches (i.e. a Hamming distance greater than k) from any other sub-sequence, which we term the k-disjoint problem. Examples include finding sequences unique to a pathogen for probe-based infection diagnostics; reducing off-target hits for re-sequencing or genome editing; detecting sequence (e.g. phage or viral) insertions; and multiple substitution mutations. Since both sensitivity and specificity are critical, an exhaustive, yet efficient solution is desirable.

Results

We present microTaboo, a method that allows for efficient and extensive sequence mining of unique (k-disjoint) sequences of up to 100 nucleotides in length. On a number of simulated and real data sets ranging from microbe- to mammalian-size genomes, we show that microTaboo is able to efficiently find all sub-sequences of a specified length W that do not occur within a threshold of k mismatches in any other sub-sequence. We exemplify that microTaboo has many practical applications, including point substitution detection, sequence insertion detection, padlock probe target search, and candidate CRISPR target mining.

Conclusions

microTaboo implements a solution to the k-disjoint problem in an alignment- and assembly free manner. microTaboo is available for Windows, Mac OS X, and Linux, running Java 7 and higher, under the GNU GPLv3 license, at: https://MohammedAlJaff.github.io/microTaboo.",2017-05-02 +23881731,Mass media interventions for reducing mental health-related stigma.,"

Background

Mental health-related stigma is widespread and has major adverse effects on the lives of people with mental health problems. Its two major components are discrimination (being treated unfairly) and prejudice (stigmatising attitudes). Anti-stigma initiatives often include mass media interventions, and such interventions can be expensive. It is important to know if mass media interventions are effective.

Objectives

To assess the effects of mass media interventions on reducing stigma (discrimination and prejudice) related to mental ill health compared to inactive controls, and to make comparisons of effectiveness based on the nature of the intervention (e.g. number of mass media components), the content of the intervention (e.g. type of primary message), and the type of media (e.g. print, internet).

Search methods

We searched eleven databases: the Cochrane Central Register of Controlled Trials (CENTRAL, The Cochrane Library, Issue 7, 2011); MEDLINE (OvidSP),1966 to 15 August 2011; EMBASE (OvidSP),1947 to 15 August 2011; PsycINFO (OvidSP), 1806 to 15 August 2011; CINAHL (EBSCOhost) 1981 to 16 August 2011; ERIC (CSA), 1966 to 16 August 2011; Social Science Citation Index (ISI), 1956 to 16 August 2011; OpenSIGLE (http://www.opengrey.eu/), 1980 to 18 August 2012; Worldcat Dissertations and Theses (OCLC), 1978 to 18 August 2011; metaRegister of Controlled Trials (http://www.controlled-trials.com/mrct/mrct_about.asp), 1973 to 18 August 2011; and Ichushi (OCLC), 1903 to 11 November 2011. We checked references from articles and reviews, and citations from included studies. We also searched conference abstracts and websites, and contacted researchers.

Selection criteria

Randomised controlled trials (RCTs), cluster RCTs or interrupted time series studies of mass media interventions compared to inactive controls in members of the general public or any of its constituent groups (excluding studies in which all participants were people with mental health problems), with mental health as a subject of the intervention and discrimination or prejudice outcome measures.

Data collection and analysis

Two authors independently extracted data and assessed the risk of bias of included studies. We contacted study authors for missing information. Information about adverse effects was collected from study reports. Primary outcomes were discrimination and prejudice, and secondary outcomes were knowledge, cost, reach, recall, and awareness of interventions, duration/sustainability of media effects, audience reactions to media content, and unforeseen adverse effects. We calculated standardised mean differences and odds ratios. We conducted a primarily narrative synthesis due to the heterogeneity of included studies. Subgroup analyses were undertaken to examine the effects of the nature, content and type of mass media intervention.

Main results

We included 22 studies involving 4490 participants. All were randomised trials (3 were cluster RCTs), and 19 of the 22 studies had analysable outcome data. Seventeen of the studies had student populations. Most of the studies were at unclear or high risk of bias for all forms of bias except detection bias.Findings from the five trials with discrimination outcomes (n = 1196) were mixed, with effects showing a reduction, increase or consistent with no evidence of effect. The median standardised mean difference (SMD) for the three trials (n = 394) with continuous outcomes was -0.25, with SMDs ranging from -0.85 (95% confidence interval (CI) -1.39 to -0.31) to -0.17 (95% CI -0.53 to 0.20). Odds ratios (OR) for the two studies (n = 802) with dichotomous discrimination outcomes showed no evidence of effect: results were 1.30 (95% CI 0.53 to 3.19) and 1.19 (95% CI 0.85 to 1.65).The 19 trials (n = 3176) with prejudice outcomes had median SMDs favouring the intervention, at the three following time periods: -0.38 (immediate), -0.38 (1 week to 2 months) and -0.49 (6 to 9 months). SMDs for prejudice outcomes across all studies ranged from -2.94 (95% CI -3.52 to -2.37) to 2.40 (95% CI 0.62 to 4.18). The median SMDs indicate that mass media interventions may have a small to medium effect in decreasing prejudice, and are equivalent to reducing the level of prejudice from that associated with schizophrenia to that associated with major depression.The studies were very heterogeneous, statistically, in their populations, interventions and outcomes, and only two meta-analyses within two subgroups were warranted. Data on secondary outcomes were sparse. Cost data were provided on request for three studies (n = 416), were highly variable, and did not address cost-effectiveness. Two studies (n = 455) contained statements about adverse effects and neither reported finding any.

Authors' conclusions

Mass media interventions may reduce prejudice, but there is insufficient evidence to determine their effects on discrimination. Very little is known about costs, adverse effects or other outcomes. Our review found few studies in middle- and low-income countries, or with employers or health professionals as the target group, and none targeted at children or adolescents. The findings are limited by the quality of the evidence, which was low for the primary outcomes for discrimination and prejudice, low for adverse effects and very low for costs. More research is required to establish the effects of mass media interventions on discrimination, to better understand which types of mass media intervention work best, to provide evidence about cost-effectiveness, and to fill evidence gaps about types of mass media not covered in this review. Such research should use robust methods, report data more consistently with reporting guidelines and be less reliant on student populations.",2013-07-23 +23703214,PiDNA: Predicting protein-DNA interactions with structural models.,"Predicting binding sites of a transcription factor in the genome is an important, but challenging, issue in studying gene regulation. In the past decade, a large number of protein-DNA co-crystallized structures available in the Protein Data Bank have facilitated the understanding of interacting mechanisms between transcription factors and their binding sites. Recent studies have shown that both physics-based and knowledge-based potential functions can be applied to protein-DNA complex structures to deliver position weight matrices (PWMs) that are consistent with the experimental data. To further use the available structural models, the proposed Web server, PiDNA, aims at first constructing reliable PWMs by applying an atomic-level knowledge-based scoring function on numerous in silico mutated complex structures, and then using the PWM constructed by the structure models with small energy changes to predict the interaction between proteins and DNA sequences. With PiDNA, the users can easily predict the relative preference of all the DNA sequences with limited mutations from the native sequence co-crystallized in the model in a single run. More predictions on sequences with unlimited mutations can be realized by additional requests or file uploading. Three types of information can be downloaded after prediction: (i) the ranked list of mutated sequences, (ii) the PWM constructed by the favourable mutated structures, and (iii) any mutated protein-DNA complex structure models specified by the user. This study first shows that the constructed PWMs are similar to the annotated PWMs collected from databases or literature. Second, the prediction accuracy of PiDNA in detecting relatively high-specificity sites is evaluated by comparing the ranked lists against in vitro experiments from protein-binding microarrays. Finally, PiDNA is shown to be able to select the experimentally validated binding sites from 10,000 random sites with high accuracy. With PiDNA, the users can design biological experiments based on the predicted sequence specificity and/or request mutated structure models for further protein design. As well, it is expected that PiDNA can be incorporated with chromatin immunoprecipitation data to refine large-scale inference of in vivo protein-DNA interactions. PiDNA is available at: http://dna.bime.ntu.edu.tw/pidna.",2013-05-22 +27613545,[Comprehensibility of online-based patient education material in ophthalmology].,"

Background

Investigations have shown that the internet as a source of information in medical issues is increasing in importance. For most patients information delivered or supported by hospitals and universities is considered to be the most reliable, however, the comprehensibility of available information is often considered to be wanting. Comprehensibility scores are formulae allowing a quantitative value for the readability of a document to be calculated.

Objective

The purpose of this study was to assess data by analyzing the comprehensibility of medical information published on the websites of departments for ophthalmology of German university hospitals. We investigated and analyzed medical information dealing with three eye diseases with potentially severe irreversible damage.

Methods

The websites of 32 departments for ophthalmology of German university hospitals were investigated. Information regarding cataracts, glaucoma and retinal detachment (amotio retinae) were identified and analyzed. All information was systematically analyzed regarding comprehensibility by using the analysis program Text-Lab ( http://www.text-lab.de ) by calculation of five readability scores: the Hohenheim comprehensibility index (HVI), the Amstad index, the simple measure of gobbledygook (G-SMOG) index, the Vienna non-fictional text formula (W-STX) and the readability index (LIX).

Results

In 59 cases (61.46 %) useful text information from the homepage of the institutions could be detected and analyzed. On average the comprehensibility of the information was identified as being poor (HVI 7.91 ± 3.94, Amstad index 35.45 ± 11.85, Vienna formula 11.19 ± 1.93, G‑SMOG 9.77 ± 1.42 and the LIX 54.53 ± 6.67).

Conclusion

In most of the cases patient information material was written far above the literacy level of the average population. It must be assumed that the presented information is difficult to read for the majority of the patients. A critical evaluation of accessible information material seems to be desirable and available texts should be amended.",2017-05-01 +24271393,ExoLocator--an online view into genetic makeup of vertebrate proteins.,"ExoLocator (http://exolocator.eopsf.org) collects in a single place information needed for comparative analysis of protein-coding exons from vertebrate species. The main source of data--the genomic sequences, and the existing exon and homology annotation--is the ENSEMBL database of completed vertebrate genomes. To these, ExoLocator adds the search for ostensibly missing exons in orthologous protein pairs across species, using an extensive computational pipeline to narrow down the search region for the candidate exons and find a suitable template in the other species, as well as state-of-the-art implementations of pairwise alignment algorithms. The resulting complements of exons are organized in a way currently unique to ExoLocator: multiple sequence alignments, both on the nucleotide and on the peptide levels, clearly indicating the exon boundaries. The alignments can be inspected in the web-embedded viewer, downloaded or used on the spot to produce an estimate of conservation within orthologous sets, or functional divergence across paralogues.",2013-11-23 +28632011,Essential Public Health Competencies for Medical Students: Establishing a Consensus in Family Medicine.,"Phenomenon: The integration of public health (PH) competency training into medical education, and further integration of PH and primary care, has been urged by the U.S. Institute of Medicine. However, PH competencies are numerous, and no consensus exists over which competencies are most important for adoption by current trainees. Our objective was to conduct a group concept mapping exercise with stakeholders identifying the most important and feasible PH skills to incorporate in medical and residency curricula.

Approach

We utilized a group concept mapping technique via the Concept System Global Max ( http://www.conceptsystems.com ), where family medicine educators and PH professionals completed the phrase, ""A key Public Health competency for physicians-in-training to learn is …"" with 1-10 statements. The statement list was edited for duplication and other issues; stakeholders then sorted the statements and rated them for importance and feasibility of integration. Multidimensional scaling and cluster analysis were used to create a two-dimensional point map of domains of PH training, allowing visual comparison of groupings of related ideas and relative importance of these ideas.

Findings

There were 116 nonduplicative statements (225 total) suggested by 120 participants. Three metacategories of competencies emerged: Clinic, Community & Culture, Health System Understanding, and Population Health Science & Data. Insights: We identified and organized a set of topics that serve as a foundation for the integration of family medicine and PH education. Incorporating these topics into medical education is viewed as important and feasible by family medicine educators and PH professions.",2017-03-02 +28498903,Efficacy and safety of inhaled anaesthetic for postoperative sedation during mechanical ventilation in adult cardiac surgery patients: a systematic review and meta-analysis.,"The aim was to evaluate the efficacy and safety of volatile anaesthetic for postoperative sedation in adult cardiac surgery patients through a systematic review and meta-analysis. We retrieved randomized controlled trials from MEDLINE, EMBASE, CENTRAL, Web of Science, clinical trials registries, conference proceedings, and reference lists of included articles. Independent reviewers extracted data, including patient characteristics, type of intraoperative anaesthesia, inhaled anaesthetic used, comparator sedation, and outcomes of interest, using pre-piloted forms. We assessed risk of bias using the Cochrane Tool and evaluated the strength of the evidence using the GRADE approach. Eight studies enrolling 610 patients were included. Seven had a high and one a low risk of bias. The times to extubation after intensive care unit (ICU) admission and sedation discontinuation were, respectively, 76 [95% confidence interval (CI) -150 to - 2, I2=79%] and 74 min (95% CI - 126 to - 23, I2=96%) less in patients who were sedated using volatile anaesthetic. There was no difference in ICU or hospital length of stay. Patients who received volatile anaesthetic sedation had troponin concentrations that were 0.71 ng ml-1 (95% CI 0.23-1.2) lower than control patients. Reporting on other outcomes was varied and not suitable for meta-analysis. Volatile anaesthetic sedation may be associated with a shorter time to extubation after cardiac surgery but no change in ICU or hospital length of stay. It is associated with a significantly lower postoperative troponin concentration, but the impact of this on adverse cardiovascular outcomes is uncertain. Blinded randomized trials using intention-to-treat analysis are required. PROSPERO registry number: 2016:CRD42016033874. Available from http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42016033874.",2017-05-01 +28073761,Seeing the trees through the forest: sequence-based homo- and heteromeric protein-protein interaction sites prediction using random forest.,"

Motivation

Genome sequencing is producing an ever-increasing amount of associated protein sequences. Few of these sequences have experimentally validated annotations, however, and computational predictions are becoming increasingly successful in producing such annotations. One key challenge remains the prediction of the amino acids in a given protein sequence that are involved in protein-protein interactions. Such predictions are typically based on machine learning methods that take advantage of the properties and sequence positions of amino acids that are known to be involved in interaction. In this paper, we evaluate the importance of various features using Random Forest (RF), and include as a novel feature backbone flexibility predicted from sequences to further optimise protein interface prediction.

Results

We observe that there is no single sequence feature that enables pinpointing interacting sites in our Random Forest models. However, combining different properties does increase the performance of interface prediction. Our homomeric-trained RF interface predictor is able to distinguish interface from non-interface residues with an area under the ROC curve of 0.72 in a homomeric test-set. The heteromeric-trained RF interface predictor performs better than existing predictors on a independent heteromeric test-set. We trained a more general predictor on the combined homomeric and heteromeric dataset, and show that in addition to predicting homomeric interfaces, it is also able to pinpoint interface residues in heterodimers. This suggests that our random forest model and the features included capture common properties of both homodimer and heterodimer interfaces.

Availability and implementation

The predictors and test datasets used in our analyses are freely available ( http://www.ibi.vu.nl/downloads/RF_PPI/ ).

Contact

k.a.feenstra@vu.nl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +24516327,PPS: A computing engine to find Palindromes in all Protein sequences.,"

Unlabelled

The primary structure of a protein molecule comprises a linear chain of amino acid residues. Certain parts of this linear chain are unique in nature and function. They can be classified under different categories and their roles studied in detail. Two such unique categories are the palindromic sequences and the Single Amino Acid Repeats (SAARs), which plays a major role in the structure, function and evolution of the protein molecule. In spite of their presence in various protein sequences, palindromes have not yet been investigated in detail. Thus, to enable a comprehensive understanding of these sequences, a computing engine, PPS, has been developed. The users can search the occurrences of palindromes and SAARs in all the protein sequences available in various databases and can view the three-dimensional structures (in case it is available in the known three-dimensional protein structures deposited to the Protein Data Bank) using the graphics plug-in Jmol. The proposed server is the first of its kind and can be freely accessed through the World Wide Web.

Availability

URL http://pranag.physics.iisc.ernet.in/pps/",2014-01-29 +27031943,"Update: Interim Guidance for Health Care Providers Caring for Women of Reproductive Age with Possible Zika Virus Exposure--United States, 2016.","CDC has updated its interim guidance for U.S. health care providers caring for women of reproductive age with possible Zika virus exposure to include recommendations on counseling women and men with possible Zika virus exposure who are interested in conceiving. This guidance is based on limited available data on persistence of Zika virus RNA in blood and semen. Women who have Zika virus disease should wait at least 8 weeks after symptom onset to attempt conception, and men with Zika virus disease should wait at least 6 months after symptom onset to attempt conception. Women and men with possible exposure to Zika virus but without clinical illness consistent with Zika virus disease should wait at least 8 weeks after exposure to attempt conception. Possible exposure to Zika virus is defined as travel to or residence in an area of active Zika virus transmission ( http://www.cdc.gov/zika/geo/active-countries.html), or sex (vaginal intercourse, anal intercourse, or fellatio) without a condom with a man who traveled to or resided in an area of active transmission. Women and men who reside in areas of active Zika virus transmission should talk with their health care provider about attempting conception. This guidance also provides updated recommendations on testing of pregnant women with possible Zika virus exposure. These recommendations will be updated when additional data become available.",2016-04-01 +23180575,Vivaldi: visualization and validation of biomacromolecular NMR structures from the PDB.,"We describe Vivaldi (VIsualization and VALidation DIsplay; http://pdbe.org/vivaldi), a web-based service for the analysis, visualization, and validation of NMR structures in the Protein Data Bank (PDB). Vivaldi provides access to model coordinates and several types of experimental NMR data using interactive visualization tools, augmented with structural annotations and model-validation information. The service presents information about the modeled NMR ensemble, validation of experimental chemical shifts, residual dipolar couplings, distance and dihedral angle constraints, as well as validation scores based on empirical knowledge and databases. Vivaldi was designed for both expert NMR spectroscopists and casual non-expert users who wish to obtain a better grasp of the information content and quality of NMR structures in the public archive.",2013-01-15 +23688397,PLANEX: the plant co-expression database.,"

Background

The PLAnt co-EXpression database (PLANEX) is a new internet-based database for plant gene analysis. PLANEX (http://planex.plantbioinformatics.org) contains publicly available GeneChip data obtained from the Gene Expression Omnibus (GEO) of the National Center for Biotechnology Information (NCBI). PLANEX is a genome-wide co-expression database, which allows for the functional identification of genes from a wide variety of experimental designs. It can be used for the characterization of genes for functional identification and analysis of a gene's dependency among other genes. Gene co-expression databases have been developed for other species, but gene co-expression information for plants is currently limited.

Description

We constructed PLANEX as a list of co-expressed genes and functional annotations for Arabidopsis thaliana, Glycine max, Hordeum vulgare, Oryza sativa, Solanum lycopersicum, Triticum aestivum, Vitis vinifera and Zea mays. PLANEX reports Pearson's correlation coefficients (PCCs; r-values) that distribute from a gene of interest for a given microarray platform set corresponding to a particular organism. To support PCCs, PLANEX performs an enrichment test of Gene Ontology terms and Cohen's Kappa value to compare functional similarity for all genes in the co-expression database. PLANEX draws a cluster network with co-expressed genes, which is estimated using the k-mean method. To construct PLANEX, a variety of datasets were interpreted by the IBM supercomputer Advanced Interactive eXecutive (AIX) in a supercomputing center.

Conclusion

PLANEX provides a correlation database, a cluster network and an interpretation of enrichment test results for eight plant species. A typical co-expressed gene generates lists of co-expression data that contain hundreds of genes of interest for enrichment analysis. Also, co-expressed genes can be identified and cataloged in terms of comparative genomics by using the 'Co-expression gene compare' feature. This type of analysis will help interpret experimental data and determine whether there is a common term among genes of interest.",2013-05-20 +25853652,DGEclust: differential expression analysis of clustered count data.,"We present a statistical methodology, DGEclust, for differential expression analysis of digital expression data. Our method treats differential expression as a form of clustering, thus unifying these two concepts. Furthermore, it simultaneously addresses the problem of how many clusters are supported by the data and uncertainty in parameter estimation. DGEclust successfully identifies differentially expressed genes under a number of different scenarios, maintaining a low error rate and an excellent control of its false discovery rate with reasonable computational requirements. It is formulated to perform particularly well on low-replicated data and be applicable to multi-group data. DGEclust is available at http://dvav.github.io/dgeclust/.",2015-02-20 +27412086,MorphoLibJ: integrated library and plugins for mathematical morphology with ImageJ.,"

Motivation

Mathematical morphology (MM) provides many powerful operators for processing 2D and 3D images. However, most MM plugins currently implemented for the popular ImageJ/Fiji platform are limited to the processing of 2D images.

Results

The MorphoLibJ library proposes a large collection of generic tools based on MM to process binary and grey-level 2D and 3D images, integrated into user-friendly plugins. We illustrate how MorphoLibJ can facilitate the exploitation of 3D images of plant tissues.

Availability and implementation

MorphoLibJ is freely available at http://imagej.net/MorphoLibJ CONTACT: david.legland@nantes.inra.frSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-13 +28088762,The super-n-motifs model: a novel alignment-free approach for representing and comparing RNA secondary structures.,"

Motivation

Comparing ribonucleic acid (RNA) secondary structures of arbitrary size uncovers structural patterns that can provide a better understanding of RNA functions. However, performing fast and accurate secondary structure comparisons is challenging when we take into account the RNA configuration (i.e. linear or circular), the presence of pseudoknot and G-quadruplex (G4) motifs and the increasing number of secondary structures generated by high-throughput probing techniques. To address this challenge, we propose the super-n-motifs model based on a latent analysis of enhanced motifs comprising not only basic motifs but also adjacency relations. The super-n-motifs model computes a vector representation of secondary structures as linear combinations of these motifs.

Results

We demonstrate the accuracy of our model for comparison of secondary structures from linear and circular RNA while also considering pseudoknot and G4 motifs. We show that the super-n-motifs representation effectively captures the most important structural features of secondary structures, as compared to other representations such as ordered tree, arc-annotated and string representations. Finally, we demonstrate the time efficiency of our model, which is alignment free and capable of performing large-scale comparisons of 10 000 secondary structures with an efficiency up to 4 orders of magnitude faster than existing approaches.

Availability and implementation

The super-n-motifs model was implemented in C ++. Source code and Linux binary are freely available at http://jpsglouzon.github.io/supernmotifs/ .

Contact

Shengrui.Wang@Usherbrooke.ca.

Supplementary information

Supplementary data are available at Bioinformatics o nline.",2017-04-01 +28073758,"Resolution and reconciliation of non-binary gene trees with transfers, duplications and losses.","

Summary

Gene trees reconstructed from sequence alignments contain poorly supported branches when the phylogenetic signal in the sequences is insufficient to determine them all. When a species tree is available, the signal of gains and losses of genes can be used to correctly resolve the unsupported parts of the gene history. However finding a most parsimonious binary resolution of a non-binary tree obtained by contracting the unsupported branches is NP-hard if transfer events are considered as possible gene scale events, in addition to gene origination, duplication and loss. We propose an exact, parameterized algorithm to solve this problem in single-exponential time, where the parameter is the number of connected branches of the gene tree that show low support from the sequence alignment or, equivalently, the maximum number of children of any node of the gene tree once the low-support branches have been collapsed. This improves on the best known algorithm by an exponential factor. We propose a way to choose among optimal solutions based on the available information. We show the usability of this principle on several simulated and biological datasets. The results are comparable in quality to several other tested methods having similar goals, but our approach provides a lower running time and a guarantee that the produced solution is optimal.

Availability and implementation

Our algorithm has been integrated into the ecceTERA phylogeny package, available at http://mbb.univ-montp2.fr/MBB/download_sources/16__ecceTERA and which can be run online at http://mbb.univ-montp2.fr/MBB/subsection/softExec.php?soft=eccetera .

Contact

celine.scornavacca@umontpellier.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +28682968,Prognostic Impact of Intra-abdominal/Pelvic Inflammation After Radical Surgery for Locally Recurrent Rectal Cancer.,"

Background

The influence of postoperative infectious complications, such as anastomotic leakage, on survival has been reported for various cancers, including colorectal cancer. However, it remains unclear whether intra-abdominal/pelvic inflammation after radical surgery for locally recurrent rectal cancer is relevant to its prognosis.

Objective

The purpose of this study was to evaluate factors associated with survival after radical surgery for locally recurrent rectal cancer.

Design

The prospectively collected data of patients were retrospectively evaluated.

Settings

This study was conducted at a single-institution tertiary care cancer center.

Patients

Between 1983 and 2012, patients who underwent radical surgery for locally recurrent rectal cancer with curative intent at the National Cancer Center Hospital were reviewed.

Main outcome measures

Factors associated with overall and relapse-free survival were evaluated.

Results

During the study period, a total of 180 patients were eligible for analyses. Median blood loss and operation time for locally recurrent rectal cancer were 2022 mL and 634 minutes. Five-year overall and 3-year relapse-free survival rates were 38.6% and 26.7%. Age (p = 0.002), initial tumor stage (p = 0.03), pain associated with locally recurrent rectal cancer (p = 0.03), CEA level (p = 0.004), resection margin (p < 0.001), intra-abdominal/pelvic inflammation (p < 0.001), and surgery period (p = 0.045) were independent prognostic factors associated with overall survival, whereas CEA level (p = 0.01), resection margin (p = 0.002), and intra-abdominal/pelvic inflammation (p = 0.001) were associated with relapse-free survival. Intra-abdominal/pelvic inflammation was observed in 45 patients (25.0%). A large amount of perioperative blood loss was the only factor associated with the occurrence of intra-abdominal/pelvic inflammation (p = 0.007).

Limitations

This study was limited by its retrospective nature and heterogeneous population.

Conclusions

Intra-abdominal/pelvic inflammation after radical surgery for locally recurrent rectal cancer is associated with poor prognosis. See Video Abstract at http://journals.lww.com/dcrjournal/Pages/videogallery.aspx.",2017-08-01 +25592880,Fast individual ancestry inference from DNA sequence data leveraging allele frequencies for multiple populations.,"

Background

Estimation of individual ancestry from genetic data is useful for the analysis of disease association studies, understanding human population history and interpreting personal genomic variation. New, computationally efficient methods are needed for ancestry inference that can effectively utilize existing information about allele frequencies associated with different human populations and can work directly with DNA sequence reads.

Results

We describe a fast method for estimating the relative contribution of known reference populations to an individual's genetic ancestry. Our method utilizes allele frequencies from the reference populations and individual genotype or sequence data to obtain a maximum likelihood estimate of the global admixture proportions using the BFGS optimization algorithm. It accounts for the uncertainty in genotypes present in sequence data by using genotype likelihoods and does not require individual genotype data from external reference panels. Simulation studies and application of the method to real datasets demonstrate that our method is significantly times faster than previous methods and has comparable accuracy. Using data from the 1000 Genomes project, we show that estimates of the genome-wide average ancestry for admixed individuals are consistent between exome sequence data and whole-genome low-coverage sequence data. Finally, we demonstrate that our method can be used to estimate admixture proportions using pooled sequence data making it a valuable tool for controlling for population stratification in sequencing based association studies that utilize DNA pooling.

Conclusions

Our method is an efficient and versatile tool for estimating ancestry from DNA sequence data and is available from https://sites.google.com/site/vibansal/software/iAdmix .",2015-01-16 +28557711,Prenatal Residential Proximity to Agricultural Pesticide Use and IQ in 7-Year-Old Children.,"

Background

Residential proximity to agricultural pesticide use has been associated with neural tube defects and autism, but more subtle outcomes such as cognition have not been studied.

Objectives

We evaluated the relationship between prenatal residential proximity to agricultural use of potentially neurotoxic pesticides and neurodevelopment in 7-year-old children.

Methods

Participants included mothers and children (n=283) living in the agricultural Salinas Valley of California enrolled in the Center for the Health Assessment of Mothers and Children of Salinas (CHAMACOS) study. We estimated agricultural pesticide use within 1 km of maternal residences during pregnancy using a geographic information system, residential location, and California’s comprehensive agricultural Pesticide Use Report data. We used regression models to evaluate prenatal residential proximity to agricultural use of five potentially neurotoxic pesticide groups (organophosphates, carbamates, pyrethroids, neonicotinoids, and manganese fungicides) and five individual organophosphates (acephate, chlorpyrifos, diazinon, malathion, and oxydemeton-methyl) and cognition in 7-year-old children. All models included prenatal urinary dialkyl phosphate metabolite concentrations.

Results

We observed a decrease of 2.2 points [95% confidence interval (CI): −3.9, −0.5] in Full-Scale IQ and 2.9 points (95% CI: −4.4, −1.3) in Verbal Comprehension for each standard deviation increase in toxicity-weighted use of organophosphate pesticides. In separate models, we observed similar decrements in Full-Scale IQ with each standard deviation increase of use for two organophosphates (acephate and oxydemeton-methyl) and three neurotoxic pesticide groups (pyrethroids, neonicotinoids, and manganese fungicides).

Conclusions

This study identified potential relationships between maternal residential proximity to agricultural use of neurotoxic pesticides and poorer neurodevelopment in children. https://doi.org/10.1289/EHP504.",2017-05-25 +27307625,Finding correct protein-protein docking models using ProQDock.,"

Motivation

Protein-protein interactions are a key in virtually all biological processes. For a detailed understanding of the biological processes, the structure of the protein complex is essential. Given the current experimental techniques for structure determination, the vast majority of all protein complexes will never be solved by experimental techniques. In lack of experimental data, computational docking methods can be used to predict the structure of the protein complex. A common strategy is to generate many alternative docking solutions (atomic models) and then use a scoring function to select the best. The success of the computational docking technique is, to a large degree, dependent on the ability of the scoring function to accurately rank and score the many alternative docking models.

Results

Here, we present ProQDock, a scoring function that predicts the absolute quality of docking model measured by a novel protein docking quality score (DockQ). ProQDock uses support vector machines trained to predict the quality of protein docking models using features that can be calculated from the docking model itself. By combining different types of features describing both the protein-protein interface and the overall physical chemistry, it was possible to improve the correlation with DockQ from 0.25 for the best individual feature (electrostatic complementarity) to 0.49 for the final version of ProQDock. ProQDock performed better than the state-of-the-art methods ZRANK and ZRANK2 in terms of correlations, ranking and finding correct models on an independent test set. Finally, we also demonstrate that it is possible to combine ProQDock with ZRANK and ZRANK2 to improve performance even further.

Availability and implementation

http://bioinfo.ifm.liu.se/ProQDock

Contact

bjornw@ifm.liu.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +23666626,Graphite Web: Web tool for gene set analysis exploiting pathway topology.,"Graphite web is a novel web tool for pathway analyses and network visualization for gene expression data of both microarray and RNA-seq experiments. Several pathway analyses have been proposed either in the univariate or in the global and multivariate context to tackle the complexity and the interpretation of expression results. These methods can be further divided into 'topological' and 'non-topological' methods according to their ability to gain power from pathway topology. Biological pathways are, in fact, not only gene lists but can be represented through a network where genes and connections are, respectively, nodes and edges. To this day, the most used approaches are non-topological and univariate although they miss the relationship among genes. On the contrary, topological and multivariate approaches are more powerful, but difficult to be used by researchers without bioinformatic skills. Here we present Graphite web, the first public web server for pathway analysis on gene expression data that combines topological and multivariate pathway analyses with an efficient system of interactive network visualizations for easy results interpretation. Specifically, Graphite web implements five different gene set analyses on three model organisms and two pathway databases. Graphite Web is freely available at http://graphiteweb.bio.unipd.it/.",2013-05-10 +27307608,Convolutional neural network architectures for predicting DNA-protein binding.,"

Motivation

Convolutional neural networks (CNN) have outperformed conventional methods in modeling the sequence specificity of DNA-protein binding. Yet inappropriate CNN architectures can yield poorer performance than simpler models. Thus an in-depth understanding of how to match CNN architecture to a given task is needed to fully harness the power of CNNs for computational biology applications.

Results

We present a systematic exploration of CNN architectures for predicting DNA sequence binding using a large compendium of transcription factor datasets. We identify the best-performing architectures by varying CNN width, depth and pooling designs. We find that adding convolutional kernels to a network is important for motif-based tasks. We show the benefits of CNNs in learning rich higher-order sequence features, such as secondary motifs and local sequence context, by comparing network performance on multiple modeling tasks ranging in difficulty. We also demonstrate how careful construction of sequence benchmark datasets, using approaches that control potentially confounding effects like positional or motif strength bias, is critical in making fair comparisons between competing methods. We explore how to establish the sufficiency of training data for these learning tasks, and we have created a flexible cloud-based framework that permits the rapid exploration of alternative neural network architectures for problems in computational biology.

Availability and implementation

All the models analyzed are available at http://cnn.csail.mit.edu

Contact

gifford@mit.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +28011771,Improving protein disorder prediction by deep bidirectional long short-term memory recurrent neural networks.,"

Motivation

Capturing long-range interactions between structural but not sequence neighbors of proteins is a long-standing challenging problem in bioinformatics. Recently, long short-term memory (LSTM) networks have significantly improved the accuracy of speech and image classification problems by remembering useful past information in long sequential events. Here, we have implemented deep bidirectional LSTM recurrent neural networks in the problem of protein intrinsic disorder prediction.

Results

The new method, named SPOT-Disorder, has steadily improved over a similar method using a traditional, window-based neural network (SPINE-D) in all datasets tested without separate training on short and long disordered regions. Independent tests on four other datasets including the datasets from critical assessment of structure prediction (CASP) techniques and >10 000 annotated proteins from MobiDB, confirmed SPOT-Disorder as one of the best methods in disorder prediction. Moreover, initial studies indicate that the method is more accurate in predicting functional sites in disordered regions. These results highlight the usefulness combining LSTM with deep bidirectional recurrent neural networks in capturing non-local, long-range interactions for bioinformatics applications.

Availability and implementation

SPOT-disorder is available as a web server and as a standalone program at: http://sparks-lab.org/server/SPOT-disorder/index.php .

Contact

j.hanson@griffith.edu.au or yuedong.yang@griffith.edu.au or yaoqi.zhou@griffith.edu.au.

Supplementary information

Supplementary data is available at Bioinformatics online.",2017-03-01 +28364324,Primary hyperparathyroidism: insights from the Indian PHPT registry.,"The presentation of primary hyperparathyroidism (PHPT) is variable throughout the world. The present study explored retrospective data submitted to the Indian PHPT registry ( http://www.indianphptregistry.com ) between July 2005 and June 2015 from 5 centres covering four different geographical regions. The clinical, biochemical, radiological and histopathological characteristics of PHPT patients across India were analysed for similarity and variability across the centres. A total of 464 subjects (137 men and 327 women) with histopathologically proven PHPT were analysed. The mean age was 41 ± 14 years with a female:male ratio of 2.4:1. The majority (95%) of patients were symptomatic. Common clinical manifestations among all the centres were weakness and fatigability (58.7%), bone pain (56%), renal stone disease (31%), pancreatitis (12.3%) and gallstone disease (11%). Mean serum calcium, parathyroid hormone and inorganic phosphorus levels were 11.9 ± 1.6 mg/dL, 752.4 ± 735.2 pg/mL and 2.8 ± 0.9 mg/dL, respectively. Sestamibi scanning had better sensitivity than ultrasonography in the localisation of parathyroid adenoma; however, when these two modalities were combined, 93% of the cases were correctly localised. Mean parathyroid adenoma weight was 5.6 ± 6.5 g (0.1-54 g). It was concluded that the majority of PHPT patients within India are still mainly symptomatic with >50% of patients presenting with bone disease and one-third with renal impairment. Compared to Western countries, Indian patients with PHPT are younger, biochemical abnormalities are more severe, and adenoma weight is higher. As our observation is largely derived from a tertiary care hospital (no routine screening of serum calcium level), the results do not reflect racial differences in susceptibility to PHPT.",2017-03-31 +27723026,OIEBTLABNET: the web-based network of the OIE Bluetongue Reference Laboratories.,"Bluetongue (BT) is a mild to severe disease of domestic and wild ruminants caused by the Bluetongue virus (BTV) and generally transmitted by Culicoides biting midges. Its occurrence also determines a livestock trade ban in affected countries with severe economic consequences on national and international trade. For this reason, in May 2011, the OIE encouraged the OIE Reference Laboratories to establish and maintain a BT network to provide expertise and training to the OIE and OIE Member Countries for BT diagnosis, surveillance and control. The network is constantly sustained by world leading scientists in the field of virology, epidemiology, serology, entomology and vaccine development. The website, available at http://oiebtnet.izs.it/btlabnet/, hosts an Information System containing data on BTV outbreaks and strains and a WebGIS that distributes maps on BTV occurrence. In this paper we describe the applications and present the benefits derived from the use of the WebGIS in the context of BT international surveillance network.",2016-09-01 +29504178,Tissue characterization of uterine fibroids with an intravoxel incoherent motion model: The need for T2 correction.,"

Background

Diminished signal intensity of uterine fibroids in T2 -weighted images is routinely used as a qualitative marker of fibroid hypoperfusion. However, quantitative classification of fibroid perfusion with intravoxel incoherent motion (IVIM) model-based metrics is not yet clinically accepted.

Purpose

To investigate the influence of T2 correction on the estimation of IVIM model parameters for characterizing uterine fibroid tissue.

Study type

Prospective.

Population

Fourteen women with 41 fibroids (12 Type I and 29 Type II, per Funaki classification) underwent diffusion-weighted imaging and T2 mapping.

Field strength

Diffusion-weighted images (b values: 0, 20, 40, 60, 100, 200, 400, 600, 800, 1000 s/mm2 ) and T2 maps were obtained at 1.5T.

Assessment

The effect of uterine fibroid T2 variation on IVIM model parameters (diffusion coefficient, perfusion coefficient, and perfusion volume fraction) were numerically modeled and experimentally evaluated without (D, D*, f) and with (Dc , D c * , fc ) T2 correction. The relationship of T2 with D and the T2 -corrected perfusion volume fraction (fc ) was also examined.

Statistical test

D-values and f-values estimated with and without T2 correction were compared by using a two-tailed Student's t-test.

Results

Type II fibroids had higher D and f than Type I fibroids, but the differences were not significant (Type I vs. Type II, D: 0.83 ± 0.20 vs. 0.80 ± 0.25 mm2 /s, P = 0.78; f: 23.64 ± 4.87% vs. 25.27 ± 7.46%, P = 0.49). For Type I and Type II fibroids, fc was lower than f, and fc of Type II fibroids was significantly higher than that of Type I fibroids (Type I vs. Type II, fc : 7.80 ± 1.88% vs. 11.82 ± 4.13%, P = 0.003). Both D and fc exponentially increased with the increase of fibroid T2 as functions: D c ( T 2 ) = - 1.52 × 10 - 3 e - 3.42 T 2 290 + 1.84 × 10 - 3 and f c ( T 2 ) = - 0.2336 e - 3.217 T 2 290 + 0.2269 , respectively. D asymptotically approached 1.79 × 10-3 mm2 /s, and fc approached 21.74%.

Data conclusion

T2 correction is important when using IVIM-based models to characterize uterine fibroid tissue.

Level of evidence

2 Technical Efficacy: Stage 1 J. Magn. Reson. Imaging 2018;48:994-1001.",2018-03-05 +24488861,Expanding the mutational spectrum of CRLF1 in Crisponi/CISS1 syndrome.,"Crisponi syndrome (CS) and cold-induced sweating syndrome type 1 (CISS1) share clinical characteristics, such as dysmorphic features, muscle contractions, scoliosis, and cold-induced sweating, with CS patients showing a severe clinical course in infancy involving hyperthermia associated with death in most cases in the first years of life. To date, 24 distinct CRLF1 mutations have been found either in homozygosity or in compound heterozygosity in CS/CISS1 patients, with the highest prevalence in Sardinia, Turkey, and Spain. By reporting 11 novel CRLF1 mutations, here we expand the mutational spectrum of CRLF1 in the CS/CISS1 syndrome to a total of 35 variants and present an overview of the different molecular and clinical features of all of them. To catalog all the 35 mutations, we created a CRLF1 mutations database, based on the Leiden Open (source) Variation Database (LOVD) system (https://grenada.lumc.nl/LOVD2/mendelian_genes/variants). Overall, the available functional and clinical data support the fact that both syndromes actually represent manifestations of the same autosomal-recessive disorder caused by mutations in the CRLF1 gene. Therefore, we propose to rename the two overlapping entities with the broader term of Crisponi/CISS1 syndrome.",2014-03-06 +26072514,De novo meta-assembly of ultra-deep sequencing data.,"

Unlabelled

We introduce a new divide and conquer approach to deal with the problem of de novo genome assembly in the presence of ultra-deep sequencing data (i.e. coverage of 1000x or higher). Our proposed meta-assembler Slicembler partitions the input data into optimal-sized 'slices' and uses a standard assembly tool (e.g. Velvet, SPAdes, IDBA_UD and Ray) to assemble each slice individually. Slicembler uses majority voting among the individual assemblies to identify long contigs that can be merged to the consensus assembly. To improve its efficiency, Slicembler uses a generalized suffix tree to identify these frequent contigs (or fraction thereof). Extensive experimental results on real ultra-deep sequencing data (8000x coverage) and simulated data show that Slicembler significantly improves the quality of the assembly compared with the performance of the base assembler. In fact, most of the times, Slicembler generates error-free assemblies. We also show that Slicembler is much more resistant against high sequencing error rate than the base assembler.

Availability and implementation

Slicembler can be accessed at http://slicembler.cs.ucr.edu/.",2015-06-01 +26072499,A generic methodological framework for studying single cell motility in high-throughput time-lapse data.,"

Motivation

Motility is a fundamental cellular attribute, which plays a major part in processes ranging from embryonic development to metastasis. Traditionally, single cell motility is often studied by live cell imaging. Yet, such studies were so far limited to low throughput. To systematically study cell motility at a large scale, we need robust methods to quantify cell trajectories in live cell imaging data.

Results

The primary contribution of this article is to present Motility study Integrated Workflow (MotIW), a generic workflow for the study of single cell motility in high-throughput time-lapse screening data. It is composed of cell tracking, cell trajectory mapping to an original feature space and hit detection according to a new statistical procedure. We show that this workflow is scalable and demonstrates its power by application to simulated data, as well as large-scale live cell imaging data. This application enables the identification of an ontology of cell motility patterns in a fully unsupervised manner.

Availability and implementation

Python code and examples are available online (http://cbio.ensmp.fr/∼aschoenauer/motiw.html)",2015-06-01 +28453561,A WebGIS platform for the monitoring of Farm Animal Genetic Resources (GENMON).,"

Background

In 2007, the Food and Agriculture Organization of the United Nations (FAO) initiated the Global plan of action for Farm Animal Genetic Resources (FAnGR). The main goal of this plan is to reduce further loss of genetic diversity in farm animals, so as to protect and promote the diversity of farm animal resources. An important step to reach this goal is to monitor and prioritize endangered breeds in the context of conservation programs.

Methodology/web portal implementation

The GENMON WebGIS platform is able to monitor FAnGR and to evaluate the degree of endangerment of livestock breeds. The system takes into account pedigree and introgression information, the geographical concentration of animals, the cryo-conservation plan and the sustainability of breeding activities based on socio-economic data as well as present and future land use conditions. A multi-criteria decision tool supports the aggregation of the multi-thematic indices mentioned above using the MACBETH method, which is based on a weighted average using satisfaction thresholds. GENMON is a monitoring tool to reach subjective decisions made by a government agency. It relies on open source software and is available at http://lasigsrv2.epfl.ch/genmon-ch.

Results/significance

GENMON allows users to upload pedigree-information (animal ID, parents, birthdate, sex, location and introgression) from a specific livestock breed and to define species and/or region-specific weighting parameters and thresholds. The program then completes a pedigree analysis and derives several indices that are used to calculate an integrated score of conservation prioritization for the breeds under investigation. The score can be visualized on a geographic map and allows a fast, intuitive and regional identification of breeds in danger. Appropriate conservation actions and breeding programs can thus be undertaken in order to promote the recovery of the genetic diversity in livestock breeds in need. The use of the platform is illustrated by means of an example based on three local livestock breeds from different species in Switzerland.",2017-04-28 +26314578,MGFM: a novel tool for detection of tissue and cell specific marker genes from microarray gene expression data.,"

Background

Identification of marker genes associated with a specific tissue/cell type is a fundamental challenge in genetic and cell research. Marker genes are of great importance for determining cell identity, and for understanding tissue specific gene function and the molecular mechanisms underlying complex diseases.

Results

We have developed a new bioinformatics tool called MGFM (Marker Gene Finder in Microarray data) to predict marker genes from microarray gene expression data. Marker genes are identified through the grouping of samples of the same type with similar marker gene expression levels. We verified our approach using two microarray data sets from the NCBI's Gene Expression Omnibus public repository encompassing samples for similar sets of five human tissues (brain, heart, kidney, liver, and lung). Comparison with another tool for tissue-specific gene identification and validation with literature-derived established tissue markers established functionality, accuracy and simplicity of our tool. Furthermore, top ranked marker genes were experimentally validated by reverse transcriptase-polymerase chain reaction (RT-PCR). The sets of predicted marker genes associated with the five selected tissues comprised well-known genes of particular importance in these tissues. The tool is freely available from the Bioconductor web site, and it is also provided as an online application integrated into the CellFinder platform ( http://cellfinder.org/analysis/marker ).

Conclusions

MGFM is a useful tool to predict tissue/cell type marker genes using microarray gene expression data. The implementation of the tool as an R-package as well as an application within CellFinder facilitates its use.",2015-08-28 +27153658,APPAGATO: an APproximate PArallel and stochastic GrAph querying TOol for biological networks.,"

Motivation

Biological network querying is a problem requiring a considerable computational effort to be solved. Given a target and a query network, it aims to find occurrences of the query in the target by considering topological and node similarities (i.e. mismatches between nodes, edges, or node labels). Querying tools that deal with similarities are crucial in biological network analysis because they provide meaningful results also in case of noisy data. In addition, as the size of available networks increases steadily, existing algorithms and tools are becoming unsuitable. This is rising new challenges for the design of more efficient and accurate solutions.

Results

This paper presents APPAGATO, a stochastic and parallel algorithm to find approximate occurrences of a query network in biological networks. APPAGATO handles node, edge and node label mismatches. Thanks to its randomic and parallel nature, it applies to large networks and, compared with existing tools, it provides higher performance as well as statistically significant more accurate results. Tests have been performed on protein-protein interaction networks annotated with synthetic and real gene ontology terms. Case studies have been done by querying protein complexes among different species and tissues.

Availability and implementation

APPAGATO has been developed on top of CUDA-C ++ Toolkit 7.0 framework. The software is available online http://profs.sci.univr.it/∼bombieri/APPAGATO CONTACT: rosalba.giugno@univr.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-29 +24596151,A proteomic chronology of gene expression through the cell cycle in human myeloid leukemia cells.,"Technological advances have enabled the analysis of cellular protein and RNA levels with unprecedented depth and sensitivity, allowing for an unbiased re-evaluation of gene regulation during fundamental biological processes. Here, we have chronicled the dynamics of protein and mRNA expression levels across a minimally perturbed cell cycle in human myeloid leukemia cells using centrifugal elutriation combined with mass spectrometry-based proteomics and RNA-Seq, avoiding artificial synchronization procedures. We identify myeloid-specific gene expression and variations in protein abundance, isoform expression and phosphorylation at different cell cycle stages. We dissect the relationship between protein and mRNA levels for both bulk gene expression and for over ∼6000 genes individually across the cell cycle, revealing complex, gene-specific patterns. This data set, one of the deepest surveys to date of gene expression in human cells, is presented in an online, searchable database, the Encyclopedia of Proteome Dynamics (http://www.peptracker.com/epd/). DOI: http://dx.doi.org/10.7554/eLife.01630.001.",2014-01-01 +27130479,IPED: a highly efficient denoising tool for Illumina MiSeq Paired-end 16S rRNA gene amplicon sequencing data.,"

Background

The development of high-throughput sequencing technologies has revolutionized the field of microbial ecology via the sequencing of phylogenetic marker genes (e.g. 16S rRNA gene amplicon sequencing). Denoising, the removal of sequencing errors, is an important step in preprocessing amplicon sequencing data. The increasing popularity of the Illumina MiSeq platform for these applications requires the development of appropriate denoising methods.

Results

The newly proposed denoising algorithm IPED includes a machine learning method which predicts potentially erroneous positions in sequencing reads based on a combination of quality metrics. Subsequently, this information is used to group those error-containing reads with correct reads, resulting in error-free consensus reads. This is achieved by masking potentially erroneous positions during this clustering step. Compared to the second best algorithm available, IPED detects double the amount of errors. Reducing the error rate had a positive effect on the clustering of reads in operational taxonomic units, with an almost perfect correspondence between the number of clusters and the theoretical number of species present in the mock communities.

Conclusion

Our algorithm IPED is a powerful denoising tool for correcting sequencing errors in Illumina MiSeq 16S rRNA gene amplicon sequencing data. Apart from significantly reducing the error rate of the sequencing reads, it has also a beneficial effect on their clustering into operational taxonomic units. IPED is freely available at http://science.sckcen.be/en/Institutes/EHS/MCB/MIC/Bioinformatics/ .",2016-04-29 +23203871,"STRING v9.1: protein-protein interaction networks, with increased coverage and integration.","Complete knowledge of all direct and indirect interactions between proteins in a given cell would represent an important milestone towards a comprehensive description of cellular mechanisms and functions. Although this goal is still elusive, considerable progress has been made-particularly for certain model organisms and functional systems. Currently, protein interactions and associations are annotated at various levels of detail in online resources, ranging from raw data repositories to highly formalized pathway databases. For many applications, a global view of all the available interaction data is desirable, including lower-quality data and/or computational predictions. The STRING database (http://string-db.org/) aims to provide such a global perspective for as many organisms as feasible. Known and predicted associations are scored and integrated, resulting in comprehensive protein networks covering >1100 organisms. Here, we describe the update to version 9.1 of STRING, introducing several improvements: (i) we extend the automated mining of scientific texts for interaction information, to now also include full-text articles; (ii) we entirely re-designed the algorithm for transferring interactions from one model organism to the other; and (iii) we provide users with statistical information on any functional enrichment observed in their networks.",2012-11-29 +26079350,Investigating microbial co-occurrence patterns based on metagenomic compositional data.,"

Motivation

The high-throughput sequencing technologies have provided a powerful tool to study the microbial organisms living in various environments. Characterizing microbial interactions can give us insights into how they live and work together as a community. Metagonomic data are usually summarized in a compositional fashion due to varying sampling/sequencing depths from one sample to another. We study the co-occurrence patterns of microbial organisms using their relative abundance information. Analyzing compositional data using conventional correlation methods has been shown prone to bias that leads to artifactual correlations.

Results

We propose a novel method, regularized estimation of the basis covariance based on compositional data (REBACCA), to identify significant co-occurrence patterns by finding sparse solutions to a system with a deficient rank. To be specific, we construct the system using log ratios of count or proportion data and solve the system using the l1-norm shrinkage method. Our comprehensive simulation studies show that REBACCA (i) achieves higher accuracy in general than the existing methods when a sparse condition is satisfied; (ii) controls the false positives at a pre-specified level, while other methods fail in various cases and (iii) runs considerably faster than the existing comparable method. REBACCA is also applied to several real metagenomic datasets.

Availability and implementation

The R codes for the proposed method are available at http://faculty.wcas.northwestern.edu/∼hji403/REBACCA.htm

Contact

hongmei@northwestern.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-16 +27227521,"The Madrid Affective Database for Spanish (MADS): Ratings of Dominance, Familiarity, Subjective Age of Acquisition and Sensory Experience.","The current study presents ratings by 540 Spanish native speakers for dominance, familiarity, subjective age of acquisition (AoA), and sensory experience (SER) for the 875 Spanish words included in the Madrid Affective Database for Spanish (MADS). The norms can be downloaded as supplementary materials for this manuscript from https://figshare.com/s/8e7b445b729527262c88 These ratings may be of potential relevance to researches who are interested in characterizing the interplay between language and emotion. Additionally, with the aim of investigating how the affective features interact with the lexicosemantic properties of words, we performed correlational analyses between norms for familiarity, subjective AoA and SER, and scores for those affective variables which are currently included in the MADs. A distinct pattern of significant correlations with affective features was found for different lexicosemantic variables. These results show that familiarity, subjective AoA and SERs may have independent effects on the processing of emotional words. They also suggest that these psycholinguistic variables should be fully considered when formulating theoretical approaches to the processing of affective language.",2016-05-26 +27294689,HIV incidence and associated risk factors in men who have sex with men in Mainland China: an updated systematic review and meta-analysis. ,"Background: The national annually reported proportion of men who have sex with men (MSM) among people living with HIV (PLWH) is growing in China. To better inform the public health sector how to improve HIV prevention strategies, it is necessary to understand the current level of HIV incidence and its correlates. Methods: Google Scholar, PubMed, Web of Science and three major Chinese electronic publication databases (http://qikan.cqvip.com/, http://g.wanfangdata.com.cn/, http://www.cnki.net/, respectively) were searched for studies reporting HIV incidence. Comprehensive Meta-Analysis (CMA) 2.0 statistical software (Biostat, Inc. Englewood, NJ, USA) was used to calculate the pooled HIV incidence and perform subgroup-analysis to find correlates for HIV seroconversion in Chinese MSM. Random effects modelling was then conducted. Results: Twenty-five eligible studies were included in this meta-analysis. The calculated pooled HIV incidence was 5.61/100 person years (PY), with an increasing trend over time (3.24/100PY, 5.29/100PY, 5.50/100PY in 2005-2008, 2009-2011, 2012-2014 respectively, χ2 test for trend P = 0.04). Subgroup analyses indicated that age <25 years (rate ratio (RR) = 1.85), junior college education and below (RR = 1.87), having ≥ 2 male sexual partners in past 6 months (RR = 2.50), baseline syphilis infection (RR = 2.99), homosexual orientation (RR = 1.91), preferred bottom/versatile roles in anal sexual intercourse (RR = 2.33), and having unprotected anal intercourse in the past 6 months (RR = 2.16) significantly increased the risk for HIV seroconversion (each P < 0.05). Uncircumcised MSM had a marginal statistically significant higher HIV incidence (RR = 3.35, P = 0.051). Conclusion: HIV incidence is still alarmingly high among Chinese MSM. Stronger HIV intervention strategies should be implemented, in particular targeting young, less educated and syphilis-infected MSM.",2016-05-26 +22730453,"MuteinDB: the mutein database linking substrates, products and enzymatic reactions directly with genetic variants of enzymes.","Mutational events as well as the selection of the optimal variant are essential steps in the evolution of living organisms. The same principle is used in laboratory to extend the natural biodiversity to obtain better catalysts for applications in biomanufacturing or for improved biopharmaceuticals. Furthermore, single mutation in genes of drug-metabolizing enzymes can also result in dramatic changes in pharmacokinetics. These changes are a major cause of patient-specific drug responses and are, therefore, the molecular basis for personalized medicine. MuteinDB systematically links laboratory-generated enzyme variants (muteins) and natural isoforms with their biochemical properties including kinetic data of catalyzed reactions. Detailed information about kinetic characteristics of muteins is available in a systematic way and searchable for known mutations and catalyzed reactions as well as their substrates and known products. MuteinDB is broadly applicable to any known protein and their variants and makes mutagenesis and biochemical data searchable and comparable in a simple and easy-to-use manner. For the import of new mutein data, a simple, standardized, spreadsheet-based data format has been defined. To demonstrate the broad applicability of the MuteinDB, first data sets have been incorporated for selected cytochrome P450 enzymes as well as for nitrilases and peroxidases. Database URL: http://www.MuteinDB.org.",2012-06-21 +23208789,The EU-ADR Web Platform: delivering advanced pharmacovigilance tools.,"

Purpose

Pharmacovigilance methods have advanced greatly during the last decades, making post-market drug assessment an essential drug evaluation component. These methods mainly rely on the use of spontaneous reporting systems and health information databases to collect expertise from huge amounts of real-world reports. The EU-ADR Web Platform was built to further facilitate accessing, monitoring and exploring these data, enabling an in-depth analysis of adverse drug reactions risks.

Methods

The EU-ADR Web Platform exploits the wealth of data collected within a large-scale European initiative, the EU-ADR project. Millions of electronic health records, provided by national health agencies, are mined for specific drug events, which are correlated with literature, protein and pathway data, resulting in a rich drug-event dataset. Next, advanced distributed computing methods are tailored to coordinate the execution of data-mining and statistical analysis tasks. This permits obtaining a ranked drug-event list, removing spurious entries and highlighting relationships with high risk potential.

Results

The EU-ADR Web Platform is an open workspace for the integrated analysis of pharmacovigilance datasets. Using this software, researchers can access a variety of tools provided by distinct partners in a single centralized environment. Besides performing standalone drug-event assessments, they can also control the pipeline for an improved batch analysis of custom datasets. Drug-event pairs can be substantiated and statistically analysed within the platform's innovative working environment.

Conclusions

A pioneering workspace that helps in explaining the biological path of adverse drug reactions was developed within the EU-ADR project consortium. This tool, targeted at the pharmacovigilance community, is available online at https://bioinformatics.ua.pt/euadr/.",2012-12-04 +27224906,PredictSNP2: A Unified Platform for Accurately Evaluating SNP Effects by Exploiting the Different Characteristics of Variants in Distinct Genomic Regions.,"An important message taken from human genome sequencing projects is that the human population exhibits approximately 99.9% genetic similarity. Variations in the remaining parts of the genome determine our identity, trace our history and reveal our heritage. The precise delineation of phenotypically causal variants plays a key role in providing accurate personalized diagnosis, prognosis, and treatment of inherited diseases. Several computational methods for achieving such delineation have been reported recently. However, their ability to pinpoint potentially deleterious variants is limited by the fact that their mechanisms of prediction do not account for the existence of different categories of variants. Consequently, their output is biased towards the variant categories that are most strongly represented in the variant databases. Moreover, most such methods provide numeric scores but not binary predictions of the deleteriousness of variants or confidence scores that would be more easily understood by users. We have constructed three datasets covering different types of disease-related variants, which were divided across five categories: (i) regulatory, (ii) splicing, (iii) missense, (iv) synonymous, and (v) nonsense variants. These datasets were used to develop category-optimal decision thresholds and to evaluate six tools for variant prioritization: CADD, DANN, FATHMM, FitCons, FunSeq2 and GWAVA. This evaluation revealed some important advantages of the category-based approach. The results obtained with the five best-performing tools were then combined into a consensus score. Additional comparative analyses showed that in the case of missense variations, protein-based predictors perform better than DNA sequence-based predictors. A user-friendly web interface was developed that provides easy access to the five tools' predictions, and their consensus scores, in a user-understandable format tailored to the specific features of different categories of variations. To enable comprehensive evaluation of variants, the predictions are complemented with annotations from eight databases. The web server is freely available to the community at http://loschmidt.chemi.muni.cz/predictsnp2.",2016-05-25 +23203888,Interferome v2.0: an updated database of annotated interferon-regulated genes.,"Interferome v2.0 (http://interferome.its.monash.edu.au/interferome/) is an update of an earlier version of the Interferome DB published in the 2009 NAR database edition. Vastly improved computational infrastructure now enables more complex and faster queries, and supports more data sets from types I, II and III interferon (IFN)-treated cells, mice or humans. Quantitative, MIAME compliant data are collected, subjected to thorough, standardized, quantitative and statistical analyses and then significant changes in gene expression are uploaded. Comprehensive manual collection of metadata in v2.0 allows flexible, detailed search capacity including the parameters: range of -fold change, IFN type, concentration and time, and cell/tissue type. There is no limit to the number of genes that can be used to search the database in a single query. Secondary analysis such as gene ontology, regulatory factors, chromosomal location or tissue expression plots of IFN-regulated genes (IRGs) can be performed in Interferome v2.0, or data can be downloaded in convenient text formats compatible with common secondary analysis programs. Given the importance of IFN to innate immune responses in infectious, inflammatory diseases and cancer, this upgrade of the Interferome to version 2.0 will facilitate the identification of gene signatures of importance in the pathogenesis of these diseases.",2012-11-29 +27153720,PON-Sol: prediction of effects of amino acid substitutions on protein solubility.,"

Motivation

Solubility is one of the fundamental protein properties. It is of great interest because of its relevance to protein expression. Reduced solubility and protein aggregation are also associated with many diseases.

Results

We collected from literature the largest experimentally verified solubility affecting amino acid substitution (AAS) dataset and used it to train a predictor called PON-Sol. The predictor can distinguish both solubility decreasing and increasing variants from those not affecting solubility. PON-Sol has normalized correct prediction ratio of 0.491 on cross-validation and 0.432 for independent test set. The performance of the method was compared both to solubility and aggregation predictors and found to be superior. PON-Sol can be used for the prediction of effects of disease-related substitutions, effects on heterologous recombinant protein expression and enhanced crystallizability. One application is to investigate effects of all possible AASs in a protein to aid protein engineering.

Availability and implementation

PON-Sol is freely available at http://structure.bmc.lu.se/PON-Sol The training and test data are available at http://structure.bmc.lu.se/VariBench/ponsol.php

Contact

mauno.vihinen@med.lu.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-19 +26663632,SECOND-STAGE TREATMENTS FOR RELATIVE NONRESPONDERS TO COGNITIVE BEHAVIORAL THERAPY (CBT) FOR PANIC DISORDER WITH OR WITHOUT AGORAPHOBIA-CONTINUED CBT VERSUS SSRI: A RANDOMIZED CONTROLLED TRIAL.,"

Background

Cognitive behavioral therapy (CBT) and pharmacotherapy are efficacious for the short-term treatment of panic disorder. Less is known about the efficacy of these therapies for individuals who do not respond fully to short-term CBT.

Method

The current trial is a second-step stratified randomized design comparing two treatment conditions-selective serotonin reuptake inhibitor (SSRI; paroxetine or citalopram; n = 34) and continued CBT (n = 24)-in a sample of individuals classified as treatment nonresponders to an initial course of CBT for panic disorder. Participants were randomized to 3 months of treatment and then followed for an additional 9 months. Only treatment responders after 3 months were maintained on the treatment until 12-month follow-up. Data analysis focused on panic disorder symptoms and achievement of response status across the first 3 months of treatment. Final follow-up data are presented descriptively.

Results

Participants in the SSRI condition showed significantly lower panic disorder symptoms as compared to continued CBT at 3 months. Results were similar when excluding individuals with comorbid major depression or analyzing the entire intent-to-treat sample. Group differences disappeared during 9-month naturalistic follow-up, although there was significant attrition and use of nonstudy therapies in both arms.

Conclusions

These data suggest greater improvement in panic disorder symptoms when switching to SSRI after failure to fully respond to an initial course of CBT. Future studies should further investigate relapse following treatment discontinuation for nonresponders who became responders. Clinicaltrials.gov Identifier: NCT00000368; https://clinicaltrials.gov/show/NCT00000368.",2015-12-10 +26315913,HapCol: accurate and memory-efficient haplotype assembly from long reads.,"

Motivation

Haplotype assembly is the computational problem of reconstructing haplotypes in diploid organisms and is of fundamental importance for characterizing the effects of single-nucleotide polymorphisms on the expression of phenotypic traits. Haplotype assembly highly benefits from the advent of 'future-generation' sequencing technologies and their capability to produce long reads at increasing coverage. Existing methods are not able to deal with such data in a fully satisfactory way, either because accuracy or performances degrade as read length and sequencing coverage increase or because they are based on restrictive assumptions.

Results

By exploiting a feature of future-generation technologies-the uniform distribution of sequencing errors-we designed an exact algorithm, called HapCol, that is exponential in the maximum number of corrections for each single-nucleotide polymorphism position and that minimizes the overall error-correction score. We performed an experimental analysis, comparing HapCol with the current state-of-the-art combinatorial methods both on real and simulated data. On a standard benchmark of real data, we show that HapCol is competitive with state-of-the-art methods, improving the accuracy and the number of phased positions. Furthermore, experiments on realistically simulated datasets revealed that HapCol requires significantly less computing resources, especially memory. Thanks to its computational efficiency, HapCol can overcome the limits of previous approaches, allowing to phase datasets with higher coverage and without the traditional all-heterozygous assumption.

Availability and implementation

Our source code is available under the terms of the GNU General Public License at http://hapcol.algolab.eu/

Contact

bonizzoni@disco.unimib.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-26 +23193264,Database resources of the National Center for Biotechnology Information.,"In addition to maintaining the GenBank® nucleic acid sequence database, the National Center for Biotechnology Information (NCBI, http://www.ncbi.nlm.nih.gov) provides analysis and retrieval resources for the data in GenBank and other biological data made available through the NCBI web site. NCBI resources include Entrez, the Entrez Programming Utilities, MyNCBI, PubMed, PubMed Central, Gene, the NCBI Taxonomy Browser, BLAST, BLAST Link (BLink), Primer-BLAST, COBALT, Splign, RefSeq, UniGene, HomoloGene, ProtEST, dbMHC, dbSNP, dbVar, Epigenomics, the Genetic Testing Registry, Genome and related tools, the Map Viewer, Model Maker, Evidence Viewer, Trace Archive, Sequence Read Archive, BioProject, BioSample, Retroviral Genotyping Tools, HIV-1/Human Protein Interaction Database, Gene Expression Omnibus, Probe, Online Mendelian Inheritance in Animals, the Molecular Modeling Database, the Conserved Domain Database, the Conserved Domain Architecture Retrieval Tool, Biosystems, Protein Clusters and the PubChem suite of small molecule databases. Augmenting many of the web applications are custom implementations of the BLAST program optimized to search specialized data sets. All of these resources can be accessed through the NCBI home page.",2012-11-27 +24304892,miRTarBase update 2014: an information resource for experimentally validated miRNA-target interactions.,"MicroRNAs (miRNAs) are small non-coding RNA molecules capable of negatively regulating gene expression to control many cellular mechanisms. The miRTarBase database (http://mirtarbase.mbc.nctu.edu.tw/) provides the most current and comprehensive information of experimentally validated miRNA-target interactions. The database was launched in 2010 with data sources for >100 published studies in the identification of miRNA targets, molecular networks of miRNA targets and systems biology, and the current release (2013, version 4) includes significant expansions and enhancements over the initial release (2010, version 1). This article reports the current status of and recent improvements to the database, including (i) a 14-fold increase to miRNA-target interaction entries, (ii) a miRNA-target network, (iii) expression profile of miRNA and its target gene, (iv) miRNA target-associated diseases and (v) additional utilities including an upgrade reminder and an error reporting/user feedback system.",2013-12-04 +22945789,JEnsembl: a version-aware Java API to Ensembl data systems.,"

Motivation

The Ensembl Project provides release-specific Perl APIs for efficient high-level programmatic access to data stored in various Ensembl database schema. Although Perl scripts are perfectly suited for processing large volumes of text-based data, Perl is not ideal for developing large-scale software applications nor embedding in graphical interfaces. The provision of a novel Java API would facilitate type-safe, modular, object-orientated development of new Bioinformatics tools with which to access, analyse and visualize Ensembl data.

Results

The JEnsembl API implementation provides basic data retrieval and manipulation functionality from the Core, Compara and Variation databases for all species in Ensembl and EnsemblGenomes and is a platform for the development of a richer API to Ensembl datasources. The JEnsembl architecture uses a text-based configuration module to provide evolving, versioned mappings from database schema to code objects. A single installation of the JEnsembl API can therefore simultaneously and transparently connect to current and previous database instances (such as those in the public archive) thus facilitating better analysis repeatability and allowing 'through time' comparative analyses to be performed.

Availability

Project development, released code libraries, Maven repository and documentation are hosted at SourceForge (http://jensembl.sourceforge.net).",2012-09-03 +23330685,BioSM: metabolomics tool for identifying endogenous mammalian biochemical structures in chemical structure space.,"The structural identification of unknown biochemical compounds in complex biofluids continues to be a major challenge in metabolomics research. Using LC/MS, there are currently two major options for solving this problem: searching small biochemical databases, which often do not contain the unknown of interest or searching large chemical databases which include large numbers of nonbiochemical compounds. Searching larger chemical databases (larger chemical space) increases the odds of identifying an unknown biochemical compound, but only if nonbiochemical structures can be eliminated from consideration. In this paper we present BioSM; a cheminformatics tool that uses known endogenous mammalian biochemical compounds (as scaffolds) and graph matching methods to identify endogenous mammalian biochemical structures in chemical structure space. The results of a comprehensive set of empirical experiments suggest that BioSM identifies endogenous mammalian biochemical structures with high accuracy. In a leave-one-out cross validation experiment, BioSM correctly predicted 95% of 1388 Kyoto Encyclopedia of Genes and Genomes (KEGG) compounds as endogenous mammalian biochemicals using 1565 scaffolds. Analysis of two additional biological data sets containing 2330 human metabolites (HMDB) and 2416 plant secondary metabolites (KEGG) resulted in biochemical annotations of 89% and 72% of the compounds, respectively. When a data set of 3895 drugs (DrugBank and USAN) was tested, 48% of these structures were predicted to be biochemical. However, when a set of synthetic chemical compounds (Chembridge and Chemsynthesis databases) were examined, only 29% of the 458,207 structures were predicted to be biochemical. Moreover, BioSM predicted that 34% of 883,199 randomly selected compounds from PubChem were biochemical. We then expanded the scaffold list to 3927 biochemical compounds and reevaluated the above data sets to determine whether scaffold number influenced model performance. Although there were significant improvements in model sensitivity and specificity using the larger scaffold list, the data set comparison results were very similar. These results suggest that additional biochemical scaffolds will not further improve our representation of biochemical structure space and that the model is reasonably robust. BioSM provides a qualitative (yes/no) and quantitative (ranking) method for endogenous mammalian biochemical annotation of chemical space and, thus, will be useful in the identification of unknown biochemical structures in metabolomics. BioSM is freely available at http://metabolomics.pharm.uconn.edu.",2013-02-27 +28110985,A Profile of Indian Health Service Emergency Departments.,"

Study objective

The Indian Health Service provides health care to eligible American Indians and Alaskan Natives. No published data exist on emergency services offered by this unique health care system. We seek to determine the characteristics and capabilities of Indian Health Service emergency departments (EDs).

Methods

All Indian Health Service EDs were surveyed about demographics and operational characteristics for 2014 with the National Emergency Department Inventory survey (available at http://www.emnet-nedi.org/).

Results

Of the forty eligible sites, there were 34 respondents (85% response rate). Respondents reported a total of 637,523 ED encounters, ranging from 521 to 63,200 visits per site. Overall, 85% (95% confidence interval 70% to 94%) had continuous physician coverage. Of all physicians staffing the ED, a median of 13% (interquartile range 0% to 50%) were board certified or board prepared in emergency medicine. Overall, 50% (95% confidence interval 34% to 66%) of respondents reported that their ED was operating over capacity.

Conclusion

Indian Health Service EDs varied widely in visit volume, with many operating over capacity. Most were not staffed by board-certified or -prepared emergency physicians. Most lacked access to specialty consultation and telemedicine capabilities.",2017-01-19 +23193292,NPIDB: Nucleic acid-Protein Interaction DataBase.,"The Nucleic acid-Protein Interaction DataBase (http://npidb.belozersky.msu.ru/) contains information derived from structures of DNA-protein and RNA-protein complexes extracted from the Protein Data Bank (3846 complexes in October 2012). It provides a web interface and a set of tools for extracting biologically meaningful characteristics of nucleoprotein complexes. The content of the database is updated weekly. The current version of the Nucleic acid-Protein Interaction DataBase is an upgrade of the version published in 2007. The improvements include a new web interface, new tools for calculation of intermolecular interactions, a classification of SCOP families that contains DNA-binding protein domains and data on conserved water molecules on the DNA-protein interface.",2012-11-27 +26265270,Continuous prediction of secondary progression in the individual course of multiple sclerosis.,"

Background

Prediction of the course of multiple sclerosis (MS) was traditionally based on features close to onset.

Objective

To evaluate predictors of the individual risk of secondary progression (SP) identified at any time during relapsing-remitting MS.

Methods

We analysed a database comprising an untreated MS incidence cohort (n=306) with five decades of follow-up. Data regarding predictors of all attacks (n=749) and demographics from patients (n=157) with at least one distinct second attack were included as covariates in a Poisson regression analysis with SP as outcome.

Results

The average hazard function of transition to SPMS was 0.046 events per patient year, showing a maximum at age 33. Three covariates were significant predictors: age, a descriptor of the most recent relapse, and the interaction between the descriptor and time since the relapse. A hazard function termed ""prediction score"" estimated the risk of SP as number of transition events per patient year (range <0.01 to >0.15).

Conclusions

The insights gained from this study are that the risk of transition to SP varies over time in individual patients, that the risk of SP is linked to previous relapses, that predictors in the later stages of the course are more effective than the traditional onset predictors, and that the number of potential predictors can be reduced to a few (three in this study) essential items. This advanced simplification facilitates adaption of the ""prediction score"" to other (more recent, benign or treated) materials, and allows for compact web-based applications (http://msprediction.com).",2014-05-09 +27186799,Greazy: Open-Source Software for Automated Phospholipid Tandem Mass Spectrometry Identification.,"Lipid identification from data produced with high-throughput technologies is essential to the elucidation of the roles played by lipids in cellular function and disease. Software tools for identifying lipids from tandem mass (MS/MS) spectra have been developed, but they are often costly or lack the sophistication of their proteomics counterparts. We have developed Greazy, an open source tool for the automated identification of phospholipids from MS/MS spectra, that utilizes methods similar to those developed for proteomics. From user-supplied parameters, Greazy builds a phospholipid search space and associated theoretical MS/MS spectra. Experimental spectra are scored against search space lipids with similar precursor masses using a peak score based on the hypergeometric distribution and an intensity score utilizing the percentage of total ion intensity residing in matching peaks. The LipidLama component filters the results via mixture modeling and density estimation. We assess Greazy's performance against the NIST 2014 metabolomics library, observing high accuracy in a search of multiple lipid classes. We compare Greazy/LipidLama against the commercial lipid identification software LipidSearch and show that the two platforms differ considerably in the sets of identified spectra while showing good agreement on those spectra identified by both. Lastly, we demonstrate the utility of Greazy/LipidLama with different instruments. We searched data from replicates of alveolar type 2 epithelial cells obtained with an Orbitrap and from human serum replicates generated on a quadrupole-time-of-flight (Q-TOF). These findings substantiate the application of proteomics derived methods to the identification of lipids. The software is available from the ProteoWizard repository: http://tiny.cc/bumbershoot-vc12-bin64 .",2016-05-24 +27613899,"How US Smokers Refer to E-cigarettes: An Examination of User-Generated Posts From a Web-Based Smoking Cessation Intervention, 2008-2015.","

Introduction

A challenge in Electronic Nicotine Delivery System (ENDS) research is how to refer to these devices in ways that are meaningful to current or potential users. The objectives of this study were to: (1) describe the frequency of ENDS terms in a web-based smoking cessation intervention; and (2) determine whether terms vary by US geographic region and date.

Methods

Data were drawn from public posts between 2008-2015 on http://BecomeAnEX.org and limited to US users. We conducted ""exact"" and ""fuzzy"" searches to find posts containing ENDS keywords using custom Python scripts, and extracted geocoding data and date for each post. We examined counts and frequencies of ENDS terms by unique user, by unique user and region, and by unique user and date.

Results

We identified 1023 unique US website users who had written a post containing one or more ENDS keywords. Posters were majority female (79%), educated (78% attended at least some college), and had a median age of 47 years. Overall, 92% of ENDS posters employed the term ""e-cigarette"" or a derivation. Derivations of ""vape"" became increasingly popular in 2013, whereas ""NJoy"" and ""blu"" were employed by fewer than 2% of posters. We found no variation in frequency of ENDS terms by US region.

Conclusions

Researchers may have confidence that ""e-cigarette"" and ""vape"" are recognizable terms among US treatment-seeking smokers. Conversely, terms such as ""ENDS,"" commonly employed by researchers and public health advocates, are not used by smokers and may be an impediment to tobacco control research.

Implications

Researchers may have confidence that ""e-cigarette,"" and, to a lesser extent, ""vape"" are recognizable terms among US adult smokers referring to ENDS (including accessories, brand names, and actions). Conversely, terms such as ""electronic nicotine delivery systems,"" commonly employed by researchers and public health advocates, are not used by US smokers and may be an impediment to tobacco control research and practice.",2016-08-16 +27268407,Tracking medical genetic literature through machine learning.,"There has been remarkable progress in identifying the causes of genetic conditions as well as understanding how changes in specific genes cause disease. Though difficult (and often superficial) to parse, an interesting tension involves emphasis on basic research aimed to dissect normal and abnormal biology versus more clearly clinical and therapeutic investigations. To examine one facet of this question and to better understand progress in Mendelian-related research, we developed an algorithm that classifies medical literature into three categories (Basic, Clinical, and Management) and conducted a retrospective analysis. We built a supervised machine learning classification model using the Azure Machine Learning (ML) Platform and analyzed the literature (1970-2014) from NCBI's Entrez Gene2Pubmed Database (http://www.ncbi.nlm.nih.gov/gene) using genes from the NHGRI's Clinical Genomics Database (http://research.nhgri.nih.gov/CGD/). We applied our model to 376,738 articles: 288,639 (76.6%) were classified as Basic, 54,178 (14.4%) as Clinical, and 24,569 (6.5%) as Management. The average classification accuracy was 92.2%. The rate of Clinical publication was significantly higher than Basic or Management. The rate of publication of article types differed significantly when divided into key eras: Human Genome Project (HGP) planning phase (1984-1990); HGP launch (1990) to publication (2001); following HGP completion to the ""Next Generation"" advent (2009); the era following 2009. In conclusion, in addition to the findings regarding the pace and focus of genetic progress, our algorithm produced a database that can be used in a variety of contexts including automating the identification of management-related literature.",2016-05-22 +23180790,DDBJ new system and service refactoring.,"The DNA data bank of Japan (DDBJ, http://www.ddbj.nig.ac.jp) maintains a primary nucleotide sequence database and provides analytical resources for biological information to researchers. This database content is exchanged with the US National Center for Biotechnology Information (NCBI) and the European Bioinformatics Institute (EBI) within the framework of the International Nucleotide Sequence Database Collaboration (INSDC). Resources provided by the DDBJ include traditional nucleotide sequence data released in the form of 27 316 452 entries or 16 876 791 557 base pairs (as of June 2012), and raw reads of new generation sequencers in the sequence read archive (SRA). A Japanese researcher published his own genome sequence via DDBJ-SRA on 31 July 2012. To cope with the ongoing genomic data deluge, in March 2012, our computer previous system was totally replaced by a commodity cluster-based system that boasts 122.5 TFlops of CPU capacity and 5 PB of storage space. During this upgrade, it was considered crucial to replace and refactor substantial portions of the DDBJ software systems as well. As a result of the replacement process, which took more than 2 years to perform, we have achieved significant improvements in system performance.",2012-11-24 +27273670,MetalPredator: a web server to predict iron-sulfur cluster binding proteomes.,"

Motivation

The prediction of the iron-sulfur proteome is highly desirable for biomedical and biological research but a freely available tool to predict iron-sulfur proteins has not been developed yet.

Results

We developed a web server to predict iron-sulfur proteins from protein sequence(s). This tool, called MetalPredator, is able to process complete proteomes rapidly with high recall and precision.

Availability and implementation

The web server is freely available at: http://metalweb.cerm.unifi.it/tools/metalpredator/

Contact

andreini@cerm.unifi.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-06 +24081580,3did: a catalog of domain-based interactions of known three-dimensional structure.,"The database of 3D interacting domains (3did, available online for browsing and bulk download at http://3did.irbbarcelona.org) is a catalog of protein-protein interactions for which a high-resolution 3D structure is known. 3did collects and classifies all structural templates of domain-domain interactions in the Protein Data Bank, providing molecular details for such interactions. The current version also includes a pipeline for the discovery and annotation of novel domain-motif interactions. For every interaction, 3did identifies and groups different binding modes by clustering similar interfaces into 'interaction topologies'. By maintaining a constantly updated collection of domain-based structural interaction templates, 3did is a reference source of information for the structural characterization of protein interaction networks. 3did is updated every 6 months.",2013-09-29 +23180783,"SwissRegulon, a database of genome-wide annotations of regulatory sites: recent updates.","Identification of genomic regulatory elements is essential for understanding the dynamics of cellular processes. This task has been substantially facilitated by the availability of genome sequences for many species and high-throughput data of transcripts and transcription factor (TF) binding. However, rigorous computational methods are necessary to derive accurate genome-wide annotations of regulatory sites from such data. SwissRegulon (http://swissregulon.unibas.ch) is a database containing genome-wide annotations of regulatory motifs, promoters and TF binding sites (TFBSs) in promoter regions across model organisms. Its binding site predictions were obtained with rigorous Bayesian probabilistic methods that operate on orthologous regions from related genomes, and use explicit evolutionary models to assess the evidence of purifying selection on each site. New in the current version of SwissRegulon is a curated collection of 190 mammalian regulatory motifs associated with ∼340 TFs, and TFBS annotations across a curated set of ∼35 000 promoters in both human and mouse. Predictions of TFBSs for Saccharomyces cerevisiae have also been significantly extended and now cover 158 of yeast's ∼180 TFs. All data are accessible through both an easily navigable genome browser with search functions, and as flat files that can be downloaded for further analysis.",2012-11-24 +22408366,Using the Spinal Cord Injury Common Data Elements.,"International Spinal Cord Injury (SCI) Data Sets include core, basic, and extended data sets. To date, 13 data sets have been published on the Web site of the International Spinal Cord Injury Society (ISCoS; www.iscos.org.uk), and several more are forthcoming. The data sets are constituted of data elements, which may be appropriate to use in trials conducted to test novel therapeutic candidates including neuroprotective drugs, various cell types, and rehabilitative strategies and devices. The National Institute of Neurological Disorders and Stroke (NINDS), the National Institutes of Health (NIH), embarked on a Common Data Element (CDE) Project 5 years ago. The mission of the NINDS CDE Project is to develop data standards for clinical research. The NINDS CDE team has since developed variable names and database structures for the International SCI Data Sets (ie, the SCI CDEs; http://www.commondataelements.ninds.nih.gov/SCI.aspx). Dataset variable names and database structure are exemplified with the International SCI Core Data Set and the International SCI Cardiovascular Function Basic Data Set. The consistency of the data sets and the CDE format may improve the ability to transfer critical medical information electronically from one center to another. The goals of the SCI CDE initiative are to increase the efficiency and effectiveness of clinical research studies and clinical treatment, increase data quality, facilitate data sharing, and help educate new clinical investigators. Pilot testing the SCI CDEs is an important step to ensure the SCI CDE effort achieves its goals.",2012-01-01 +24907353,miRror-Suite: decoding coordinated regulation by microRNAs. ,"MicroRNAs (miRNAs) are short, non-coding RNAs that negatively regulate post-transcriptional mRNA levels. Recent data from cross-linking and immunoprecipitation technologies confirmed the combinatorial nature of the miRNA regulation. We present the miRror-Suite platform, developed to yield a robust and concise explanation for miRNA regulation from a large collection of differentially expressed transcripts and miRNAs. The miRror-Suite platform includes the miRror2.0 and Probability Supported Iterative miRror (PSI-miRror) tools. Researchers who performed large-scale transcriptomics or miRNA profiling experiments from cells and tissues will benefit from miRror-Suite. Our platform provides a concise, plausible explanation for the regulation of miRNAs in such complex settings. The input for miRror2.0 may include hundreds of differentially expressed genes or miRNAs. In the case of miRNAs as input, the algorithm seeks the statistically most likely set of genes regulated by this input. Alternatively, for a set of genes, the miRror algorithm seeks a collection of miRNAs that best explains their regulation. The miRror-Suite algorithm designates statistical criteria that were uniformly applied to a dozen miRNA-target prediction databases. Users select the preferred databases for predictions and numerous optional filters/parameters that restrict the search to the desired tissues, cell lines, level of expression and predictor scores. PSI-miRror is an advanced application for refining the input set by gradually enhancing the degree of pairing of the sets of miRNAs with the sets of targets. The iterations of PSI-miRror probe the interlinked nature of miRNAs and targets within cells. miRror-Suite serves experimentalists in facilitating the understanding of miRNA regulation through combinatorial- cooperative activity. The platform applies to human, mouse, rat, fly, worm and zebrafish. Database URL: http://www.mirrorsuite.cs.huji.ac.il.",2014-06-06 +27212003,A review of instruments to measure interprofessional team-based primary care.,"Interprofessional team-based care is increasingly regarded as an important feature of delivery systems redesigned to provide more efficient and higher quality care, including primary care. Measurement of the functioning of such teams might enable improvement of team effectiveness and could facilitate research on team-based primary care. Our aims were to develop a conceptual framework of high-functioning primary care teams to identify and review instruments that measure the constructs identified in the framework, and to create a searchable, web-based atlas of such instruments (available at: http://primarycaremeasures.ahrq.gov/team-based-care/ ). Our conceptual framework was developed from existing frameworks, the teamwork literature, and expert input. The framework is based on an Input-Mediator-Output model and includes 12 constructs to which we mapped both instruments as a whole, and individual instrument items. Instruments were also reviewed for relevance to measuring team-based care, and characterized. Instruments were identified from peer-reviewed and grey literature, measure databases, and expert input. From nearly 200 instruments initially identified, we found 48 to be relevant to measuring team-based primary care. The majority of instruments were surveys (n = 44), and the remainder (n = 4) were observational checklists. Most instruments had been developed/tested in healthcare settings (n = 30) and addressed multiple constructs, most commonly communication (n = 42), heedful interrelating (n = 42), respectful interactions (n = 40), and shared explicit goals (n = 37). The majority of instruments had some reliability testing (n = 39) and over half included validity testing (n = 29). Currently available instruments offer promise to researchers and practitioners to assess teams' performance, but additional work is needed to adapt these instruments for primary care settings.",2016-05-21 +27243002,Efficient Synergistic Single-Cell Genome Assembly.,"As the vast majority of all microbes are unculturable, single-cell sequencing has become a significant method to gain insight into microbial physiology. Single-cell sequencing methods, currently powered by multiple displacement genome amplification (MDA), have passed important milestones such as finishing and closing the genome of a prokaryote. However, the quality and reliability of genome assemblies from single cells are still unsatisfactory due to uneven coverage depth and the absence of scattered chunks of the genome in the final collection of reads caused by MDA bias. In this work, our new algorithm Hybrid De novo Assembler (HyDA) demonstrates the power of coassembly of multiple single-cell genomic data sets through significant improvement of the assembly quality in terms of predicted functional elements and length statistics. Coassemblies contain significantly more base pairs and protein coding genes, cover more subsystems, and consist of longer contigs compared to individual assemblies by the same algorithm as well as state-of-the-art single-cell assemblers SPAdes and IDBA-UD. Hybrid De novo Assembler (HyDA) is also able to avoid chimeric assemblies by detecting and separating shared and exclusive pieces of sequence for input data sets. By replacing one deep single-cell sequencing experiment with a few single-cell sequencing experiments of lower depth, the coassembly method can hedge against the risk of failure and loss of the sample, without significantly increasing sequencing cost. Application of the single-cell coassembler HyDA to the study of three uncultured members of an alkane-degrading methanogenic community validated the usefulness of the coassembly concept. HyDA is open source and publicly available at http://chitsazlab.org/software.html, and the raw reads are available at http://chitsazlab.org/research.html.",2016-05-23 +23368677,Identifying cross-category relations in gene ontology and constructing genome-specific term association networks.,"

Background

Gene Ontology (GO) has been widely used in biological databases, annotation projects, and computational analyses. Although the three GO categories are structured as independent ontologies, the biological relationships across the categories are not negligible for biological reasoning and knowledge integration. However, the existing cross-category ontology term similarity measures are either developed by utilizing the GO data only or based on manually curated term name similarities, ignoring the fact that GO is evolving quickly and the gene annotations are far from complete.

Results

In this paper we introduce a new cross-category similarity measurement called CroGO by incorporating genome-specific gene co-function network data. The performance study showed that our measurement outperforms the existing algorithms. We also generated genome-specific term association networks for yeast and human. An enrichment based test showed our networks are better than those generated by the other measures.

Conclusions

The genome-specific term association networks constructed using CroGO provided a platform to enable a more consistent use of GO. In the networks, the frequently occurred MF-centered hub indicates that a molecular function may be shared by different genes in multiple biological processes, or a set of genes with the same functions may participate in distinct biological processes. And common subgraphs in multiple organisms also revealed conserved GO term relationships. Software and data are available online at http://www.msu.edu/~jinchen/CroGO.",2013-01-21 +28039503,"Does Postoperative Erythropoietin Reduce Transfusions and Hemodynamic Instability Following Liposuction, Either Alone or Associated with Abdominoplasty or Mammaplasty? A Comparative, Prospective Study of 50 Consecutive Patients.","

Introduction

Erythropoietin (EPO) is a hematopoietic growth factor and an alternative to avoid blood transfusion in high-blood-loss surgeries. We evaluate EPO efficacy to reduce clinically relevant anemia and dehydration in patients undergoing liposuction.

Methods

We prospectively evaluated 50 consecutive patients subjected to liposuction greater than 2.5 L and alternately assigned into two comparable groups (25 patients each), except for the postoperative administration of erythropoietin (4000 UI per day subcutaneously) during five consecutive days. Incidence data for blood transfusion or parenteral hydration were collected. Statistical analyses were performed with significance at p value <5%.

Results

There was no significant difference between groups related to any preoperative feature or the incidence of dehydration (p = 0.1099) or transfusion (p = 1.0).

Conclusion

Postoperative erythropoietin administration was not effective in preventing blood transfusion for anemia or parenteral hydration for hemodynamic instability in patients undergoing major liposuction.

Level of evidence iii

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266.",2016-12-30 +27203418,Nanoengineered Templated Polymer Particles: Navigating the Biological Realm.,"Nanoengineered materials offer tremendous promise for developing the next generation of therapeutics. We are transitioning from simple research questions, such as ""can this particle eradicate cancer cells?"" to more sophisticated ones like ""can we design a particle to preferentially deliver cargo to a specific cancer cell type?"" These developments are poised to usher in a new era of nanoengineered drug delivery systems. We primarily work with templating methods for engineering polymer particles and investigate their biological interactions. Templates are scaffolds that facilitate the formation of particles with well-controlled size, shape, structure, stiffness, stability, and surface chemistry. In the past decade, breakthroughs in engineering new templates, combined with advances in coating techniques, including layer-by-layer (LbL) assembly, surface polymerization, and metal-phenolic network (MPN) coordination chemistry, have enabled particles with specific physicochemical properties to be engineered. While materials science offers an ever-growing number of new synthesis techniques, a central challenge of therapeutic delivery has become understanding how nanoengineered materials interact with biological systems. Increased collaboration between chemists, biologists, and clinicians has resulted in a vast research output on bio-nano interactions. Our understanding of cell-particle interactions has grown considerably, but conventional in vitro experimentation provides limited information, and understanding how to bridge the in vitro/in vivo gap is a continuing challenge. As has been demonstrated in other fields, there is now a growing interest in applying computational approaches to advance this area. A considerable knowledge base is now emerging, and with it comes new and exciting opportunities that are already being capitalized on through the translation of materials into the clinic. In this Account, we outline our perspectives gained from a decade of work at the interface between polymer particle engineering and bio-nano interactions. We divide our research into three areas: (i) biotrafficking, including cellular association, intracellular transport, and biodistribution; (ii) biodegradation and how to achieve controlled, responsive release of therapeutics; and (iii) applications, including drug delivery, controlling immunostimulatory responses, biosensing, and microreactors. There are common challenges in these areas for groups developing nanoengineered therapeutics. A key ""lesson-learned"" has been the considerable challenge of staying informed about the developments relevant to this field. There are a number of reasons for this, most notably the interdisciplinary nature of the work, the large numbers of researchers and research outputs, and the limited standardization in technique nomenclature. Additionally, a large body of work is being generated with limited central archiving, other than vast general databases. To help address these points, we have created a web-based tool to organize our past, present, and future work [Bio-nano research knowledgebase, http://bionano.eng.unimelb.edu.au/knowledge_base/ (accessed May 2, 2016)]. This tool is intended to serve as a first step toward organizing results in this large, complex area. We hope that this will inspire researchers, both in generating new ideas and also in collecting, collating, and sharing their experiences to guide future research.",2016-05-20 +27153635,TreeQTL: hierarchical error control for eQTL findings.,

Unlabelled

: Commonly used multiplicity adjustments fail to control the error rate for reported findings in many expression quantitative trait loci (eQTL) studies. TreeQTL implements a hierarchical multiple testing procedure which allows control of appropriate error rates defined relative to a grouping of the eQTL hypotheses.

Availability and implementation

The R package TreeQTL is available for download at http://bioinformatics.org/treeqtl

Contact

sabatti@stanford.edu

Supplementary information

Supplementary data are available at Bioinformatics online.,2016-04-19 +27211858,A Genome-Scale Database and Reconstruction of Caenorhabditis elegans Metabolism.,"We present a genome-scale model of Caenorhabditis elegans metabolism along with the public database ElegCyc (http://elegcyc.bioinf.uni-jena.de:1100), which represents a reference for metabolic pathways in the worm and allows for the visualization as well as analysis of omics datasets. Our model reflects the metabolic peculiarities of C. elegans that make it distinct from other higher eukaryotes and mammals, including mice and humans. We experimentally verify one of these peculiarities by showing that the lifespan-extending effect of L-tryptophan supplementation is dose dependent (hormetic). Finally, we show the utility of our model for analyzing omics datasets through predicting changes in amino acid concentrations after genetic perturbations and analyzing metabolic changes during normal aging as well as during two distinct, reactive oxygen species (ROS)-related lifespan-extending treatments. Our analyses reveal a notable similarity in metabolic adaptation between distinct lifespan-extending interventions and point to key pathways affecting lifespan in nematodes.",2016-05-19 +22707153,Proteomic analysis of human hippocampus shows differential protein expression in the different hippocampal subfields.,"In the current investigation, we aimed to characterize the differential protein expression in each of the hippocampal subregions in healthy control samples (n = 20). We used laser-assisted microdissection and difference in-gel electrophoresis to enrich for these tissues and to compare protein profiles. Image analysis was carried out using Progenesis SameSpots. Samples with a false discovery rate smaller than 5%, a p-value of < 0.01, and an expression of at least ± 1.2 were considered significant. Proteins were identified using LC-ESI-MS/MS. The raw mass spectral data were analyzed using DataAnalysis software. Data were searched against the Swissprot database using MASCOT. Samples were grouped according to the different subregions and we found 182 spots to be differentially expressed between the different hippocampal subregions. These have been made available as part of the UCD-2DPAGE database at http://proteomics-portal.ucd.ie:8082. The associated MS data have been submitted to PRIDE (Accession numbers 21593-21745). This baseline data will be helpful in helping us to understand the central role of the hippocampus in health and the evidence that particular hippocampal subregions are differentially affected in disease.",2012-08-01 +28732463,Common and phylogenetically widespread coding for peptides by bacterial small RNAs.,"

Background

While eukaryotic noncoding RNAs have recently received intense scrutiny, it is becoming clear that bacterial transcription is at least as pervasive. Bacterial small RNAs and antisense RNAs (sRNAs) are often assumed to be noncoding, due to their lack of long open reading frames (ORFs). However, there are numerous examples of sRNAs encoding for small proteins, whether or not they also have a regulatory role at the RNA level.

Methods

Here, we apply flexible machine learning techniques based on sequence features and comparative genomics to quantify the prevalence of sRNA ORFs under natural selection to maintain protein-coding function in 14 phylogenetically diverse bacteria. Importantly, we quantify uncertainty in our predictions, and follow up on them using mass spectrometry proteomics and comparison to datasets including ribosome profiling.

Results

A majority of annotated sRNAs have at least one ORF between 10 and 50 amino acids long, and we conservatively predict that 409±191.7 unannotated sRNA ORFs are under selection to maintain coding (mean estimate and 95% confidence interval), an average of 29 per species considered here. This implies that overall at least 10.3±0.5% of sRNAs have a coding ORF, and in some species around 20% do. 165±69 of these novel coding ORFs have some antisense overlap to annotated ORFs. As experimental validation, many of our predictions are translated in published ribosome profiling data and are identified via mass spectrometry shotgun proteomics. B. subtilis sRNAs with coding ORFs are enriched for high expression in biofilms and confluent growth, and S. pneumoniae sRNAs with coding ORFs are involved in virulence. sRNA coding ORFs are enriched for transmembrane domains and many are predicted novel components of type I toxin/antitoxin systems.

Conclusions

We predict over two dozen new protein-coding genes per bacterial species, but crucially also quantified the uncertainty in this estimate. Our predictions for sRNA coding ORFs, along with predicted novel type I toxins and tools for sorting and visualizing genomic context, are freely available in a user-friendly format at http://disco-bac.web.pasteur.fr. We expect these easily-accessible predictions to be a valuable tool for the study not only of bacterial sRNAs and type I toxin-antitoxin systems, but also of bacterial genetics and genomics.",2017-07-21 +27192614,Bayesian Top-Down Protein Sequence Alignment with Inferred Position-Specific Gap Penalties.,"We describe a Bayesian Markov chain Monte Carlo (MCMC) sampler for protein multiple sequence alignment (MSA) that, as implemented in the program GISMO and applied to large numbers of diverse sequences, is more accurate than the popular MSA programs MUSCLE, MAFFT, Clustal-Ω and Kalign. Features of GISMO central to its performance are: (i) It employs a ""top-down"" strategy with a favorable asymptotic time complexity that first identifies regions generally shared by all the input sequences, and then realigns closely related subgroups in tandem. (ii) It infers position-specific gap penalties that favor insertions or deletions (indels) within each sequence at alignment positions in which indels are invoked in other sequences. This favors the placement of insertions between conserved blocks, which can be understood as making up the proteins' structural core. (iii) It uses a Bayesian statistical measure of alignment quality based on the minimum description length principle and on Dirichlet mixture priors. Consequently, GISMO aligns sequence regions only when statistically justified. This is unlike methods based on the ad hoc, but widely used, sum-of-the-pairs scoring system, which will align random sequences. (iv) It defines a system for exploring alignment space that provides natural avenues for further experimentation through the development of new sampling strategies for more efficiently escaping from suboptimal traps. GISMO's superior performance is illustrated using 408 protein sets containing, on average, 235 sequences. These sets correspond to NCBI Conserved Domain Database alignments, which have been manually curated in the light of available crystal structures, and thus provide a means to assess alignment accuracy. GISMO fills a different niche than other MSA programs, namely identifying and aligning a conserved domain present within a large, diverse set of full length sequences. The GISMO program is available at http://gismo.igs.umaryland.edu/.",2016-05-18 +27209127,Explorations in genome-wide association studies and network analyses with dairy cattle fertility traits.,"The objective of this study was to identify single nucleotide polymorphisms and gene networks associated with 3 fertility traits in dairy cattle-daughter pregnancy rate, heifer conception rate, and cow conception rate-using different approaches. Deregressed predicted transmitting abilities were available for approximately 24,000 Holstein bulls and 36,000 Holstein cows sampled from the National Dairy Database with high-density genotypes. Of those, 1,732 bulls and 375 cows had been genotyped with the Illumina BovineHD Genotyping BeadChip (Illumina Inc., San Diego, CA). The remaining animals were genotyped with various chips of lower density that were imputed to high density. Univariate and trivariate genome-wide association studies (GWAS) with both medium- (60,671 markers) and high-density (312,614 markers) panels were performed for daughter pregnancy rate, heifer conception rate, and cow conception rate using GEMMA (version 0.94; http://www.xzlab.org/software.html). Analyses were conducted using bulls only, cows only, and a sample of both bulls and cows. The partial correlation and information theory algorithm was used to develop gene interaction networks. The most significant markers were further investigated to identify putatively associated genes. Little overlap in associated genes could be found between GWAS using different reference populations of bulls only, cows only, and combined bulls and cows. The partial correlation and information theory algorithm was able to identify several genes that were not identified by ordinary GWAS. The results obtained herein will aid in further dissecting the complex biology underlying fertility traits in dairy cattle, while also providing insight into the nuances of GWAS.",2016-05-18 +27092947,Rapid Prediction of Bacterial Heterotrophic Fluxomics Using Machine Learning and Constraint Programming.,"13C metabolic flux analysis (13C-MFA) has been widely used to measure in vivo enzyme reaction rates (i.e., metabolic flux) in microorganisms. Mining the relationship between environmental and genetic factors and metabolic fluxes hidden in existing fluxomic data will lead to predictive models that can significantly accelerate flux quantification. In this paper, we present a web-based platform MFlux (http://mflux.org) that predicts the bacterial central metabolism via machine learning, leveraging data from approximately 100 13C-MFA papers on heterotrophic bacterial metabolisms. Three machine learning methods, namely Support Vector Machine (SVM), k-Nearest Neighbors (k-NN), and Decision Tree, were employed to study the sophisticated relationship between influential factors and metabolic fluxes. We performed a grid search of the best parameter set for each algorithm and verified their performance through 10-fold cross validations. SVM yields the highest accuracy among all three algorithms. Further, we employed quadratic programming to adjust flux profiles to satisfy stoichiometric constraints. Multiple case studies have shown that MFlux can reasonably predict fluxomes as a function of bacterial species, substrate types, growth rate, oxygen conditions, and cultivation methods. Due to the interest of studying model organism under particular carbon sources, bias of fluxome in the dataset may limit the applicability of machine learning models. This problem can be resolved after more papers on 13C-MFA are published for non-model species.",2016-04-19 +25429434,Droperidol for treatment of nausea and vomiting in palliative care patients.,"

Background

This is an updated version of the original Cochrane review published in Issue 10, 2010, on droperidol for the treatment of nausea and vomiting in palliative care patients. Nausea and vomiting are common symptoms in patients with terminal illness and can be very unpleasant and distressing. There are several different types of antiemetic treatments that can be used to control these symptoms. Droperidol is an antipsychotic drug and has been used and studied as an antiemetic in the management of postoperative and chemotherapy nausea and vomiting.

Objectives

To evaluate the efficacy and adverse events (both minor and serious) associated with the use of droperidol for the treatment of nausea and vomiting in palliative care patients.

Search methods

We searched electronic databases including CENTRAL, MEDLINE (1950-), EMBASE (1980-), CINAHL (1981-) and AMED (1985-), using relevant search terms and synonyms. The basic search strategy was (""droperidol"" OR ""butyrophenone"") AND (""nausea"" OR ""vomiting""), modified for each database. We updated the search on 2 December 2009. We performed updated searches of MEDLINE, EMBASE, CENTRAL and AMED 2009 to 2013 on 19 November 2013 and of CINAHL on 20 November 2013. We also searched trial registers (metaRegister of controlled trials (www.controlled-trials.com/mrct), clinicaltrials.gov (www.clinicaltrials.gov) and the WHO International Clinical Trials Registry Platform (ICTRP) (http://apps.who.int/trialsearch/)) on 22 November 2013, using the keyword ""droperidol"".

Selection criteria

Randomised controlled trials (RCTs) of droperidol for the treatment of nausea or vomiting, or both, in adults receiving palliative care or suffering from an incurable progressive medical condition.

Data collection and analysis

We judged the potential relevance of studies based on their titles and abstracts, and obtained studies that we anticipated might meet the inclusion criteria. Two review authors independently reviewed the abstracts for the initial review and four review authors reviewed the abstracts for the update to assess suitability for inclusion. We discussed discrepancies to achieve consensus.

Main results

The 2010 search strategy identified 1664 abstracts (and 827 duplicates) of which we obtained 23 studies in full as potentially meeting the inclusion criteria. On review of the full papers, we identified no studies that met the inclusion criteria.The updated searches carried out in November 2013 identified 304 abstracts (261 excluding duplicates) of which we obtained 18 references in full as potentially meeting the inclusion criteria. On review of the full papers, we identified no studies that met the inclusion criteria, therefore there were no included studies in this review.We found no registered trials of droperidol for the management of nausea or vomiting in palliative care.

Authors' conclusions

Since first publication of this review, no new studies were found. There is insufficient evidence to advise on the use of droperidol for the management of nausea and vomiting in palliative care. Studies of antiemetics in palliative care settings are needed to identify which agents are most effective, with minimum side effects.",2014-11-27 +27306882,CHiCAGO: robust detection of DNA looping interactions in Capture Hi-C data.,"Capture Hi-C (CHi-C) is a method for profiling chromosomal interactions involving targeted regions of interest, such as gene promoters, globally and at high resolution. Signal detection in CHi-C data involves a number of statistical challenges that are not observed when using other Hi-C-like techniques. We present a background model and algorithms for normalisation and multiple testing that are specifically adapted to CHi-C experiments. We implement these procedures in CHiCAGO ( http://regulatorygenomicsgroup.org/chicago ), an open-source package for robust interaction detection in CHi-C. We validate CHiCAGO by showing that promoter-interacting regions detected with this method are enriched for regulatory features and disease-associated SNPs.",2016-06-15 +23837517,Implementing DICOM structured reporting in a large-scale telemedicine network.,"

Introduction

Large-scale asynchronous telemedicine networks can offer a unique opportunity for the acquisition of detailed epidemiological information if the data are acquired and handled in an appropriate way. In this work, an approach is presented for the integration of medical reports in the Digital Imaging and Communications in Medicine (DICOM) Structured Reporting standard in telemedicine networks using structured vocabularies.

Materials and methods

The use of these structured vocabularies is extended beyond radiology, and a case study in telecardiology is presented. The approach was applied in the context of a real-world statewide public telemedicine network; nowadays on average 470 written electrocardiographic structured reports daily are being performed. Cardiologists provided more than 220,000 written structured reports, and these reports are stored into a central database.

Results

This study was performed during a 12-month period, and it was possible to examine possible associations between a list of co-morbidities and cardiac risk factors with a diagnosis that indicates the presence of cardiac ischemia, cardiac injury, or possible necrosis by using DICOM Structured Reporting. Our application is responsible for coordinating the process of issuance of reports through various technologies and devices. The system works as a library in an HTTP server, which accesses information from studies in DICOM format from the database and from structured vocabularies.

Conclusions

Results indicate that traceability of morbidity, diagnoses, and patient clinical information can be achieved, resulting in an efficient data mining-friendly framework. A multidevice application for Web-based and smartphone-based platforms showed that it is a viable solution for applying the DICOM Structured Reporting standard in telemedicine networks.",2013-07-01 +27189607,Argo: enabling the development of bespoke workflows and services for disease annotation. ,"Argo (http://argo.nactem.ac.uk) is a generic text mining workbench that can cater to a variety of use cases, including the semi-automatic annotation of literature. It enables its technical users to build their own customised text mining solutions by providing a wide array of interoperable and configurable elementary components that can be seamlessly integrated into processing workflows. With Argo's graphical annotation interface, domain experts can then make use of the workflows' automatically generated output to curate information of interest.With the continuously rising need to understand the aetiology of diseases as well as the demand for their informed diagnosis and personalised treatment, the curation of disease-relevant information from medical and clinical documents has become an indispensable scientific activity. In the Fifth BioCreative Challenge Evaluation Workshop (BioCreative V), there was substantial interest in the mining of literature for disease-relevant information. Apart from a panel discussion focussed on disease annotations, the chemical-disease relations (CDR) track was also organised to foster the sharing and advancement of disease annotation tools and resources.This article presents the application of Argo's capabilities to the literature-based annotation of diseases. As part of our participation in BioCreative V's User Interactive Track (IAT), we demonstrated and evaluated Argo's suitability to the semi-automatic curation of chronic obstructive pulmonary disease (COPD) phenotypes. Furthermore, the workbench facilitated the development of some of the CDR track's top-performing web services for normalising disease mentions against the Medical Subject Headings (MeSH) database. In this work, we highlight Argo's support for developing various types of bespoke workflows ranging from ones which enabled us to easily incorporate information from various databases, to those which train and apply machine learning-based concept recognition models, through to user-interactive ones which allow human curators to manually provide their corrections to automatically generated annotations. Our participation in the BioCreative V challenges shows Argo's potential as an enabling technology for curating disease and phenotypic information from literature.Database URL: http://argo.nactem.ac.uk.",2016-05-17 +26342233,"ResiCon: a method for the identification of dynamic domains, hinges and interfacial regions in proteins.","

Motivation

Structure of most proteins is flexible. Identification and analysis of intramolecular motions is a complex problem. Breaking a structure into relatively rigid parts, the so-called dynamic domains, may help comprehend the complexity of protein's mobility. We propose a new approach called ResiCon (Residue Contacts analysis), which performs this task by applying a data-mining analysis of an ensemble of protein configurations and recognizes dynamic domains, hinges and interfacial regions, by considering contacts between residues.

Results

Dynamic domains found by ResiCon are more compact than those identified by two other popular methods: PiSQRD and GeoStaS. The current analysis was carried out using a known reference set of 30 NMR protein structures, as well as molecular dynamics simulation data of flap opening events in HIV-1 protease. The more detailed analysis of HIV-1 protease dataset shows that ResiCon identified dynamic domains involved in structural changes of functional importance.

Availability and implementation

The ResiCon server is available at URL: http://dworkowa.imdik.pan.pl/EP/ResiCon.

Contact

pawel@bioexploratorium.pl

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-05 +25855807,The EBI Search engine: providing search and retrieval functionality for biological data from EMBL-EBI.,"The European Bioinformatics Institute (EMBL-EBI-https://www.ebi.ac.uk) provides free and unrestricted access to data across all major areas of biology and biomedicine. Searching and extracting knowledge across these domains requires a fast and scalable solution that addresses the requirements of domain experts as well as casual users. We present the EBI Search engine, referred to here as 'EBI Search', an easy-to-use fast text search and indexing system with powerful data navigation and retrieval capabilities. API integration provides access to analytical tools, allowing users to further investigate the results of their search. The interconnectivity that exists between data resources at EMBL-EBI provides easy, quick and precise navigation and a better understanding of the relationship between different data types including sequences, genes, gene products, proteins, protein domains, protein families, enzymes and macromolecular structures, together with relevant life science literature.",2015-04-08 +26353840,Reveel: large-scale population genotyping using low-coverage sequencing data.,"

Motivation

Population low-coverage whole-genome sequencing is rapidly emerging as a prominent approach for discovering genomic variation and genotyping a cohort. This approach combines substantially lower cost than full-coverage sequencing with whole-genome discovery of low-allele frequency variants, to an extent that is not possible with array genotyping or exome sequencing. However, a challenging computational problem arises of jointly discovering variants and genotyping the entire cohort. Variant discovery and genotyping are relatively straightforward tasks on a single individual that has been sequenced at high coverage, because the inference decomposes into the independent genotyping of each genomic position for which a sufficient number of confidently mapped reads are available. However, in low-coverage population sequencing, the joint inference requires leveraging the complex linkage disequilibrium (LD) patterns in the cohort to compensate for sparse and missing data in each individual. The potentially massive computation time for such inference, as well as the missing data that confound low-frequency allele discovery, need to be overcome for this approach to become practical.

Results

Here, we present Reveel, a novel method for single nucleotide variant calling and genotyping of large cohorts that have been sequenced at low coverage. Reveel introduces a novel technique for leveraging LD that deviates from previous Markov-based models, and which is aimed at computational efficiency as well as accuracy in capturing LD patterns present in rare haplotypes. We evaluate Reveel's performance through extensive simulations as well as real data from the 1000 Genomes Project, and show that it achieves higher accuracy in low-frequency allele discovery and substantially lower computation cost than previous state-of-the-art methods.

Availability and implementation

http://reveel.stanford.edu/

Contact

: serafim@cs.stanford.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-09 +23888101,Design and development of portal for biological database in agriculture.,"

Unlabelled

The application of novel and modern techniques in genetic engineering and genomics has resulted in information explosion in genomics. Three major genome databases under International Nucleotide Sequence Database collaboration NCBI, DDBJ and EMBL have been providing a convenient platform for submission of sequences which they share among themselves. Many institutes in India under Indian Council of Agricultural Research have scientists working on biotechnology and bioinformatics research. The various studies conducted by them, generate massive data related to biological information of plants, animals, insects, microbes and fisheries. These scientists are dependent on NCBI, EMBL, DDBJ and other portals for their sequence submissions, analysis and other data mining tasks. Due to various limitations imposed on these sites and the poor connectivity problem prevents them to conduct their studies on these open domain databases. The valued information generated by them needs to be shared by the scientific communities to eliminate the duplication of efforts and expedite their knowledge extended towards new findings. A secured common submission portal system with user-friendly interfaces, integrated help and error checking facilities has been developed in such a way that the database at the backend consists of a union of the items available on the above mentioned databases. Standard database management concepts have been employed for their systematic storage management. Extensive hardware resources in the form of high performance computing facility are being installed for deployment of this portal.

Availability

http://cabindb.iasri.res.in:8080/sequence_portal/",2013-06-29 +25505006,AtCAST3.0 update: a web-based tool for analysis of transcriptome data by searching similarities in gene expression profiles.,"In transcriptome experiments, the experimental conditions (e.g. mutants and/or treatments) cause transcriptional changes. Identifying experimental conditions that induce similar or opposite transcriptional changes can be useful to identify experimental conditions that affect the same biological process. AtCAST (http://atpbsmd.yokohama-cu.ac.jp) is a web-based tool to analyze the relationship between experimental conditions among transcriptome data. Users can analyze 'user's transcriptome data' of a new mutant or a new chemical compound whose function remains unknown to generate novel biological hypotheses. This tool also allows for mining of related 'experimental conditions' from the public microarray data, which are pre-included in AtCAST. This tool extracts a set of genes (i.e. module) that show significant transcriptional changes and generates a network graph to present related transcriptome data. The updated AtCAST now contains data on >7,000 microarrays, including experiments on various stresses, mutants and chemical treatments. Gene ontology term enrichment (GOE) analysis is introduced to assist the characterization of transcriptome data. The new AtCAST supports input from multiple platforms, including the 'Arabisopsis gene 1.1 ST array', a new microarray chip from Affymetrix and RNA sequencing (RNA-seq) data obtained using next-generation sequencing (NGS). As a pilot study, we conducted microarray analysis of Arabidopsis under auxin treatment using the new Affymetrix chip, and then analyzed the data in AtCAST. We also analyzed RNA-seq data of the pifq mutant using AtCAST. These new features will facilitate analysis of associations between transcriptome data obtained using different platforms.",2014-12-09 +28019059,Impact of genotyping errors on statistical power of association tests in genomic analyses: A case study.,"A key step in genomic studies is to assess high throughput measurements across millions of markers for each participant's DNA, either using microarrays or sequencing techniques. Accurate genotype calling is essential for downstream statistical analysis of genotype-phenotype associations, and next generation sequencing (NGS) has recently become a more common approach in genomic studies. How the accuracy of variant calling in NGS-based studies affects downstream association analysis has not, however, been studied using empirical data in which both microarrays and NGS were available. In this article, we investigate the impact of variant calling errors on the statistical power to identify associations between single nucleotides and disease, and on associations between multiple rare variants and disease. Both differential and nondifferential genotyping errors are considered. Our results show that the power of burden tests for rare variants is strongly influenced by the specificity in variant calling, but is rather robust with regard to sensitivity. By using the variant calling accuracies estimated from a substudy of a Cooperative Studies Program project conducted by the Department of Veterans Affairs, we show that the power of association tests is mostly retained with commonly adopted variant calling pipelines. An R package, GWAS.PC, is provided to accommodate power analysis that takes account of genotyping errors (http://zhaocenter.org/software/).",2016-12-26 +23203983,The 2013 Nucleic Acids Research Database Issue and the online molecular biology database collection.,"The 20th annual Database Issue of Nucleic Acids Research includes 176 articles, half of which describe new online molecular biology databases and the other half provide updates on the databases previously featured in NAR and other journals. This year's highlights include two databases of DNA repeat elements; several databases of transcriptional factors and transcriptional factor-binding sites; databases on various aspects of protein structure and protein-protein interactions; databases for metagenomic and rRNA sequence analysis; and four databases specifically dedicated to Escherichia coli. The increased emphasis on using the genome data to improve human health is reflected in the development of the databases of genomic structural variation (NCBI's dbVar and EBI's DGVa), the NIH Genetic Testing Registry and several other databases centered on the genetic basis of human disease, potential drugs, their targets and the mechanisms of protein-ligand binding. Two new databases present genomic and RNAseq data for monkeys, providing wealth of data on our closest relatives for comparative genomics purposes. The NAR online Molecular Biology Database Collection, available at http://www.oxfordjournals.org/nar/database/a/, has been updated and currently lists 1512 online databases. The full content of the Database Issue is freely available online on the Nucleic Acids Research website (http://nar.oxfordjournals.org/).",2012-11-30 +28084397,Altered Pathway Analyzer: A gene expression dataset analysis tool for identification and prioritization of differentially regulated and network rewired pathways.,"Gene connection rewiring is an essential feature of gene network dynamics. Apart from its normal functional role, it may also lead to dysregulated functional states by disturbing pathway homeostasis. Very few computational tools measure rewiring within gene co-expression and its corresponding regulatory networks in order to identify and prioritize altered pathways which may or may not be differentially regulated. We have developed Altered Pathway Analyzer (APA), a microarray dataset analysis tool for identification and prioritization of altered pathways, including those which are differentially regulated by TFs, by quantifying rewired sub-network topology. Moreover, APA also helps in re-prioritization of APA shortlisted altered pathways enriched with context-specific genes. We performed APA analysis of simulated datasets and p53 status NCI-60 cell line microarray data to demonstrate potential of APA for identification of several case-specific altered pathways. APA analysis reveals several altered pathways not detected by other tools evaluated by us. APA analysis of unrelated prostate cancer datasets identifies sample-specific as well as conserved altered biological processes, mainly associated with lipid metabolism, cellular differentiation and proliferation. APA is designed as a cross platform tool which may be transparently customized to perform pathway analysis in different gene expression datasets. APA is freely available at http://bioinfo.icgeb.res.in/APA.",2017-01-13 +28905132,Transethnic insight into the genetics of glycaemic traits: fine-mapping results from the Population Architecture using Genomics and Epidemiology (PAGE) consortium.,"

Aims/hypothesis

Elevated levels of fasting glucose and fasting insulin in non-diabetic individuals are markers of dysregulation of glucose metabolism and are strong risk factors for type 2 diabetes. Genome-wide association studies have discovered over 50 SNPs associated with these traits. Most of these loci were discovered in European populations and have not been tested in a well-powered multi-ethnic study. We hypothesised that a large, ancestrally diverse, fine-mapping genetic study of glycaemic traits would identify novel and population-specific associations that were previously undetectable by European-centric studies.

Methods

A multiethnic study of up to 26,760 unrelated individuals without diabetes, of predominantly Hispanic/Latino and African ancestries, were genotyped using the Metabochip. Transethnic meta-analysis of racial/ethnic-specific linear regression analyses were performed for fasting glucose and fasting insulin. We attempted to replicate 39 fasting glucose and 17 fasting insulin loci. Genetic fine-mapping was performed through sequential conditional analyses in 15 regions that included both the initially reported SNP association(s) and denser coverage of SNP markers. In addition, Metabochip-wide analyses were performed to discover novel fasting glucose and fasting insulin loci. The most significant SNP associations were further examined using bioinformatic functional annotation.

Results

Previously reported SNP associations were significantly replicated (p ≤ 0.05) in 31/39 fasting glucose loci and 14/17 fasting insulin loci. Eleven glycaemic trait loci were refined to a smaller list of potentially causal variants through transethnic meta-analysis. Stepwise conditional analysis identified two loci with independent secondary signals (G6PC2-rs477224 and GCK-rs2908290), which had not previously been reported. Population-specific conditional analyses identified an independent signal in G6PC2 tagged by the rare variant rs77719485 in African ancestry. Further Metabochip-wide analysis uncovered one novel fasting insulin locus at SLC17A2-rs75862513.

Conclusions/interpretation

These findings suggest that while glycaemic trait loci often have generalisable effects across the studied populations, transethnic genetic studies help to prioritise likely functional SNPs, identify novel associations that may be population-specific and in turn have the potential to influence screening efforts or therapeutic discoveries.

Data availability

The summary statistics from each of the ancestry-specific and transethnic (combined ancestry) results can be found under the PAGE study on dbGaP here: https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs000356.v1.p1.",2017-09-13 +24558441,Human transporter database: comprehensive knowledge and discovery tools in the human transporter genes.,"Transporters are essential in homeostatic exchange of endogenous and exogenous substances at the systematic, organic, cellular, and subcellular levels. Gene mutations of transporters are often related to pharmacogenetics traits. Recent developments in high throughput technologies on genomics, transcriptomics and proteomics allow in depth studies of transporter genes in normal cellular processes and diverse disease conditions. The flood of high throughput data have resulted in urgent need for an updated knowledgebase with curated, organized, and annotated human transporters in an easily accessible way. Using a pipeline with the combination of automated keywords query, sequence similarity search and manual curation on transporters, we collected 1,555 human non-redundant transporter genes to develop the Human Transporter Database (HTD) (http://htd.cbi.pku.edu.cn). Based on the extensive annotations, global properties of the transporter genes were illustrated, such as expression patterns and polymorphisms in relationships with their ligands. We noted that the human transporters were enriched in many fundamental biological processes such as oxidative phosphorylation and cardiac muscle contraction, and significantly associated with Mendelian and complex diseases such as epilepsy and sudden infant death syndrome. Overall, HTD provides a well-organized interface to facilitate research communities to search detailed molecular and genetic information of transporters for development of personalized medicine.",2014-02-18 +28583127,CMOST: an open-source framework for the microsimulation of colorectal cancer screening strategies.,"

Background

Colorectal cancer (CRC) is a leading cause of cancer-related mortality. CRC incidence and mortality can be reduced by several screening strategies, including colonoscopy, but randomized CRC prevention trials face significant obstacles such as the need for large study populations with long follow-up. Therefore, CRC screening strategies will likely be designed and optimized based on computer simulations. Several computational microsimulation tools have been reported for estimating efficiency and cost-effectiveness of CRC prevention. However, none of these tools is publicly available. There is a need for an open source framework to answer practical questions including testing of new screening interventions and adapting findings to local conditions.

Methods

We developed and implemented a new microsimulation model, Colon Modeling Open Source Tool (CMOST), for modeling the natural history of CRC, simulating the effects of CRC screening interventions, and calculating the resulting costs. CMOST facilitates automated parameter calibration against epidemiological adenoma prevalence and CRC incidence data.

Results

Predictions of CMOST were highly similar compared to a large endoscopic CRC prevention study as well as predictions of existing microsimulation models. We applied CMOST to calculate the optimal timing of a screening colonoscopy. CRC incidence and mortality are reduced most efficiently by a colonoscopy between the ages of 56 and 59; while discounted life years gained (LYG) is maximal at 49-50 years. With a dwell time of 13 years, the most cost-effective screening is at 59 years, at $17,211 discounted USD per LYG. While cost-efficiency varied according to dwell time it did not influence the optimal time point of screening interventions within the tested range.

Conclusions

Predictions of CMOST are highly similar compared to a randomized CRC prevention trial as well as those of other microsimulation tools. This open source tool will enable health-economics analyses in for various countries, health-care scenarios and CRC prevention strategies. CMOST is freely available under the GNU General Public License at https://gitlab.com/misselwb/CMOST.",2017-06-05 +26999001,Strain-level microbial epidemiology and population genomics from shotgun metagenomics.,"Identifying microbial strains and characterizing their functional potential is essential for pathogen discovery, epidemiology and population genomics. We present pangenome-based phylogenomic analysis (PanPhlAn; http://segatalab.cibio.unitn.it/tools/panphlan), a tool that uses metagenomic data to achieve strain-level microbial profiling resolution. PanPhlAn recognized outbreak strains, produced the largest strain-level population genomic study of human-associated bacteria and, in combination with metatranscriptomics, profiled the transcriptional activity of strains in complex communities.",2016-03-21 +26491386,"SIFlore, a dataset of geographical distribution of vascular plants covering five centuries of knowledge in France: Results of a collaborative project coordinated by the Federation of the National Botanical Conservatories.","UNLABELLED:More than 20 years ago, the French Muséum National d'Histoire Naturelle (MNHN, Secretariat of the Fauna and Flora) published the first part of an atlas of the flora of France at a 20km spatial resolution, accounting for 645 taxa (Dupont 1990). Since then, at the national level, there has not been any work on this scale relating to flora distribution, despite the obvious need for a better understanding. In 2011, in response to this need, the Federation des Conservatoires Botaniques Nationaux (FCBN, http://www.fcbn.fr) launched an ambitious collaborative project involving eleven national botanical conservatories of France. The project aims to establish a formal procedure and standardized system for data hosting, aggregation and publication for four areas: flora, fungi, vegetation and habitats. In 2014, the first phase of the project led to the development of the national flora dataset: SIFlore. As it includes about 21 million records of flora occurrences, this is currently the most comprehensive dataset on the distribution of vascular plants (Tracheophyta) in the French territory. SIFlore contains information for about 15'454 plant taxa occurrences (indigenous and alien taxa) in metropolitan France and Reunion Island, from 1545 until 2014. The data records were originally collated from inventories, checklists, literature and herbarium records. SIFlore was developed by assembling flora datasets from the regional to the national level. At the regional level, source records are managed by the national botanical conservatories that are responsible for flora data collection and validation. In order to present our results, a geoportal was developed by the Fédération des conservatoires botaniques nationaux that allows the SIFlore dataset to be publically viewed. This portal is available at: http://siflore.fcbn.fr. As the FCBN belongs to the Information System for Nature and Landscapes' (SINP), a governmental program, the dataset is also accessible through the websites of the National Inventory of Natural Heritage (http://www.inpn.fr) and the Global Biodiversity Information Facility (http://www.gbif.fr). SIFlore is regularly updated with additional data records. It is also planned to expand the scope of the dataset to include information about taxon biology, phenology, ecology, chorology, frequency, conservation status and seed banks. A map showing an estimation of the dataset completeness (based on Jackknife 1 estimator) is presented and included as a numerical appendix. PURPOSE:SIFlore aims to make the data of the flora of France available at the national level for conservation, policy management and scientific research. Such a dataset will provide enough information to allow for macro-ecological reviews of species distribution patterns and, coupled with climatic or topographic datasets, the identification of determinants of these patterns. This dataset can be considered as the primary indicator of the current state of knowledge of flora distribution across France. At a policy level, and in the context of global warming, this should promote the adoption of new measures aiming to improve and intensify flora conservation and surveys.",2015-09-29 +21655319,Confidence interval based parameter estimation--a new SOCR applet and activity.,"Many scientific investigations depend on obtaining data-driven, accurate, robust and computationally-tractable parameter estimates. In the face of unavoidable intrinsic variability, there are different algorithmic approaches, prior assumptions and fundamental principles for computing point and interval estimates. Efficient and reliable parameter estimation is critical in making inference about observable experiments, summarizing process characteristics and prediction of experimental behaviors. In this manuscript, we demonstrate simulation, construction, validation and interpretation of confidence intervals, under various assumptions, using the interactive web-based tools provided by the Statistics Online Computational Resource (http://www.SOCR.ucla.edu). Specifically, we present confidence interval examples for population means, with known or unknown population standard deviation; population variance; population proportion (exact and approximate), as well as confidence intervals based on bootstrapping or the asymptotic properties of the maximum likelihood estimates. Like all SOCR resources, these confidence interval resources may be openly accessed via an Internet-connected Java-enabled browser. The SOCR confidence interval applet enables the user to empirically explore and investigate the effects of the confidence-level, the sample-size and parameter of interest on the corresponding confidence interval. Two applications of the new interval estimation computational library are presented. The first one is a simulation of confidence interval estimating the US unemployment rate and the second application demonstrates the computations of point and interval estimates of hippocampal surface complexity for Alzheimers disease patients, mild cognitive impairment subjects and asymptomatic controls.",2011-05-31 +27080514,CDROM: Classification of Duplicate gene RetentiOn Mechanisms.,"

Background

Gene duplication is a major source of new genes that is thought to play an important role in phenotypic innovation. Though several mechanisms have been hypothesized to drive the functional evolution and long-term retention of duplicate genes, there are currently no software tools for assessing their genome-wide contributions. Thus, the evolutionary mechanisms by which duplicate genes acquire novel functions remain unclear in a number of taxa.

Results

In a recent study, researchers developed a phylogenetic approach that uses gene expression data from two species to classify the mechanisms underlying the retention of duplicate genes (Proc Natl Acad Sci USA 110:1740917414, 2013). We have implemented their classification method, as well as a more generalized method, in the R package CDROM, enabling users to apply these methods to their data and gain insights into the origin of novel biological functions after gene duplication. The CDROM R package, source code, and user manual for the R package are available for download from CRAN at https://cran.rstudio.com/web/packages/CDROM/ . Additionally, the CDROM R source code, user manual for running CDROM from the source code, and sample dataset used in this manuscript can be accessed at www.personal.psu.edu/rua15/software.html .

Conclusions

CDROM is the first software package that enables genome-wide classification of the mechanisms driving the long-term retention of duplicate genes. It is user-friendly and flexible, providing researchers with a tool for studying the functional evolution of duplicate genes in a variety of taxa.",2016-04-14 +25795417,miRBoost: boosting support vector machines for microRNA precursor classification.,"Identification of microRNAs (miRNAs) is an important step toward understanding post-transcriptional gene regulation and miRNA-related pathology. Difficulties in identifying miRNAs through experimental techniques combined with the huge amount of data from new sequencing technologies have made in silico discrimination of bona fide miRNA precursors from non-miRNA hairpin-like structures an important topic in bioinformatics. Among various techniques developed for this classification problem, machine learning approaches have proved to be the most promising. However these approaches require the use of training data, which is problematic due to an imbalance in the number of miRNAs (positive data) and non-miRNAs (negative data), which leads to a degradation of their performance. In order to address this issue, we present an ensemble method that uses a boosting technique with support vector machine components to deal with imbalanced training data. Classification is performed following a feature selection on 187 novel and existing features. The algorithm, miRBoost, performed better in comparison with state-of-the-art methods on imbalanced human and cross-species data. It also showed the highest ability among the tested methods for discovering novel miRNA precursors. In addition, miRBoost was over 1400 times faster than the second most accurate tool tested and was significantly faster than most of the other tools. miRBoost thus provides a good compromise between prediction efficiency and execution time, making it highly suitable for use in genome-wide miRNA precursor prediction. The software miRBoost is available on our web server http://EvryRNA.ibisc.univ-evry.fr.",2015-03-20 +23482505,Genome-wide SNP genotyping to infer the effects on gene functions in tomato.,"The genotype data of 7054 single nucleotide polymorphism (SNP) loci in 40 tomato lines, including inbred lines, F1 hybrids, and wild relatives, were collected using Illumina's Infinium and GoldenGate assay platforms, the latter of which was utilized in our previous study. The dendrogram based on the genotype data corresponded well to the breeding types of tomato and wild relatives. The SNPs were classified into six categories according to their positions in the genes predicted on the tomato genome sequence. The genes with SNPs were annotated by homology searches against the nucleotide and protein databases, as well as by domain searches, and they were classified into the functional categories defined by the NCBI's eukaryotic orthologous groups (KOG). To infer the SNPs' effects on the gene functions, the three-dimensional structures of the 843 proteins that were encoded by the genes with SNPs causing missense mutations were constructed by homology modelling, and 200 of these proteins were considered to carry non-synonymous amino acid substitutions in the predicted functional sites. The SNP information obtained in this study is available at the Kazusa Tomato Genomics Database (http://plant1.kazusa.or.jp/tomato/).",2013-03-12 +26547152,Estimation and uncertainty of reversible Markov models.,"Reversibility is a key concept in Markov models and master-equation models of molecular kinetics. The analysis and interpretation of the transition matrix encoding the kinetic properties of the model rely heavily on the reversibility property. The estimation of a reversible transition matrix from simulation data is, therefore, crucial to the successful application of the previously developed theory. In this work, we discuss methods for the maximum likelihood estimation of transition matrices from finite simulation data and present a new algorithm for the estimation if reversibility with respect to a given stationary vector is desired. We also develop new methods for the Bayesian posterior inference of reversible transition matrices with and without given stationary vector taking into account the need for a suitable prior distribution preserving the meta-stable features of the observed process during posterior inference. All algorithms here are implemented in the PyEMMA software--http://pyemma.org--as of version 2.0.",2015-11-01 +27274402,Hydrocephalus caused by unilateral foramen of Monro obstruction: A review on terminology.,"

Background

Hydrocephalus caused by unilateral foramen of Monro (FM) obstruction has been referred to in literature by many different terminologies. Precise terminology describing hydrocephalus confined to just one lateral ventricle has a very important prognostic value and determines whether or not the patient can be shunt free after an endoscopic procedure.

Methods

Aiming to define the best term for unilateral FM obstruction, 19 terms were employed on PubMed database (http://www.ncbi.nlm.nih.gov/pubmed) as quoted phrases.

Results

A total of 194 articles were found. Four patterns of hydrocephalus were discriminated as a result of our research term query and were divided by types for didactic purpose. Type A - partial dilation of the lateral ventricle; Type B - pure unilateral obstruction of the FM; Type C - previously shunted patients with secondary obstruction of the FM; and Type D - asymmetric lateral ventricles with patent FM.

Conclusion

In unilateral FM obstruction hydrocephalus, an in-depth review on terminology application is critical to avoid mistakes that may compromise comparisons among different series. This terminology review suggests that Type B hydrocephalus, i.e., the hydrocephalus confined to just one lateral ventricle with no other sites of cerebrospinal fluid circulation blockage, are best described by the terms unilateral hydrocephalus (UH) and monoventricular hydrocephalus, the first being by far the most popular. Type A hydrocephalus is best represented in the literature by the terms uniloculated hydrocephalus and loculated ventricle; Type C hydrocephalus by the terms isolated lateral ventricle and isolated UH; and Type D hydrocephalus by the term asymmetric hydrocephalus.",2016-05-13 +24771658,RegPhos 2.0: an updated resource to explore protein kinase-substrate phosphorylation networks in mammals.,"Protein phosphorylation catalyzed by kinases plays crucial roles in regulating a variety of intracellular processes. Owing to an increasing number of in vivo phosphorylation sites that have been identified by mass spectrometry (MS)-based proteomics, the RegPhos, available online at http://csb.cse.yzu.edu.tw/RegPhos2/, was developed to explore protein phosphorylation networks in human. In this update, we not only enhance the data content in human but also investigate kinase-substrate phosphorylation networks in mouse and rat. The experimentally validated phosphorylation sites as well as their catalytic kinases were extracted from public resources, and MS/MS phosphopeptides were manually curated from research articles. RegPhos 2.0 aims to provide a more comprehensive view of intracellular signaling networks by integrating the information of metabolic pathways and protein-protein interactions. A case study shows that analyzing the phosphoproteome profile of time-dependent cell activation obtained from Liquid chromatography-mass spectrometry (LC-MS/MS) analysis, the RegPhos deciphered not only the consistent scheme in B cell receptor (BCR) signaling pathway but also novel regulatory molecules that may involve in it. With an attempt to help users efficiently identify the candidate biomarkers in cancers, 30 microarray experiments, including 39 cancerous versus normal cells, were analyzed for detecting cancer-specific expressed genes coding for kinases and their substrates. Furthermore, this update features an improved web interface to facilitate convenient access to the exploration of phosphorylation networks for a group of genes/proteins. Database URL: http://csb.cse.yzu.edu.tw/RegPhos2/",2014-04-25 +27917504,Constructing and enacting kinship in sister-to-sister egg donation families: a multi-family member interview study.,"Although intra-familial egg donation has been practiced for more than 15 years in several countries, little is known about family relationships in this family type. Framed within the new kinship studies, this article focuses on the experiential dimension of kinship in sister-to-sister egg donation families: how is kinship 'unpacked' and 'reconstructed' in this specific family constellation? Qualitative data analysis of interviews with receiving parents, their donating sisters and the donor children revealed six themes: (1) being connected as an extended family; (2) disambiguating motherhood; (3) giving and receiving as structuring processes; (4) acknowledging and managing the 'special' link between donor and child; (5) making sense of the union between father and donor; and (6) kinship constructions being challenged. This study showed the complex and continuous balancing of meanings related to the mother-child dyad, the donor-child dyad and the donor-father dyad. What stood out was the complexity of, on the one hand cherishing the genetic link with the child allowed by the sisters' egg donation, while, on the other, managing the meanings related to this link, by, for instance, acknowledging, downsizing, symbolising, and differentiating it from the mother-child bond. (A Virtual Abstract of this paper can be accessed at: https://www.youtube.com/channel/UC_979cmCmR9rLrKuD7z0ycA).",2016-12-05 +27171895,Systematic Reviews Published in the January 2016 Issue of the Cochrane Library.,"The Cochrane Library of Systematic Reviews is published quarterly as a DVD and monthly online ( http://www.thecochranelibrary.com ). The January 2016 issue (first DVD for 2016) contains 6746 complete reviews, 2445 protocols for reviews in production, and 36,600 short summaries of systematic reviews published in the general medical literature (this short summary database is no longer being updated). In addition, there are citations of 921,000 randomized controlled trials, and 15,700 cited papers in the Cochrane Methodology Register. The Health Technology Assessment database contains some 15,000 citations. One hundred and twenty-four new reviews have been published in the previous 3 months, of which just two have potential relevance for practitioners in pain and palliative medicine. The impact factor of the Cochrane Library stands at 5.939. Readers are encouraged to access the full report for any articles of interest, as only a brief commentary is provided.",2016-05-12 +28155714,Pretata: predicting TATA binding proteins with novel features and dimensionality reduction strategy.,"

Background

It is necessary and essential to discovery protein function from the novel primary sequences. Wet lab experimental procedures are not only time-consuming, but also costly, so predicting protein structure and function reliably based only on amino acid sequence has significant value. TATA-binding protein (TBP) is a kind of DNA binding protein, which plays a key role in the transcription regulation. Our study proposed an automatic approach for identifying TATA-binding proteins efficiently, accurately, and conveniently. This method would guide for the special protein identification with computational intelligence strategies.

Results

Firstly, we proposed novel fingerprint features for TBP based on pseudo amino acid composition, physicochemical properties, and secondary structure. Secondly, hierarchical features dimensionality reduction strategies were employed to improve the performance furthermore. Currently, Pretata achieves 92.92% TATA-binding protein prediction accuracy, which is better than all other existing methods.

Conclusions

The experiments demonstrate that our method could greatly improve the prediction accuracy and speed, thus allowing large-scale NGS data prediction to be practical. A web server is developed to facilitate the other researchers, which can be accessed at http://server.malab.cn/preTata/ .",2016-12-23 +27174934,HotSpot Wizard 2.0: automated design of site-specific mutations and smart libraries in protein engineering.,"HotSpot Wizard 2.0 is a web server for automated identification of hot spots and design of smart libraries for engineering proteins' stability, catalytic activity, substrate specificity and enantioselectivity. The server integrates sequence, structural and evolutionary information obtained from 3 databases and 20 computational tools. Users are guided through the processes of selecting hot spots using four different protein engineering strategies and optimizing the resulting library's size by narrowing down a set of substitutions at individual randomized positions. The only required input is a query protein structure. The results of the calculations are mapped onto the protein's structure and visualized with a JSmol applet. HotSpot Wizard lists annotated residues suitable for mutagenesis and can automatically design appropriate codons for each implemented strategy. Overall, HotSpot Wizard provides comprehensive annotations of protein structures and assists protein engineers with the rational design of site-specific mutations and focused libraries. It is freely available at http://loschmidt.chemi.muni.cz/hotspotwizard.",2016-05-12 +28409440,Repair of primary and incisional hernias using composite mesh fixed with absorbable tackers: preliminary experience of a laparoscopic approach with a newly designed mesh in 29 cases.,"Outcome of primary and incisional hernia repair is still affected by clinical complications in terms of recurrences, pain and discomfort. Factors like surgical approach, prosthesis characteristics and method of fixation might influence the outcome. We evaluated in a prospective observational study a cohort population which underwent primary and incisional laparoscopic hernia repair, with the use of a composite mesh in polypropylene fixed with absorbable devices. We focused on assessing the feasibility and safety of these procedures; they were always performed by an experienced laparoscopic surgeon, analyzing data from our patients through the EuraHS registry. Seventy nine procedures of primary and incisional hernia repair were performed from July 2013 to November 2015 at Santa Maria Regina degli Angeli Hospital in Adria (RO). All cases have been registered at the EuraHS registry ( http://www.eurahs.eu ); among them, we analyzed 29 procedures performed using a new composite polypropylene mesh (CMC, Clear Composite Mesh, DIPROMED srl San Mauro Torinese, Turin, Italy), fixed with absorbable tackers (ETHICON, Ethicon LLC Guaynabo, Puerto Rico 00969). We performed 23 incisional hernia repairs, 4 primary hernia repairs (1 umbilical, 2 epigastric and 1 lumbar hernia) and 2 parastomal hernia repairs. The median operation time was 65.1 min for elective and 81.4 min for urgent procedures (three cases). We had two post-operative complications (6.89%), one case of bleeding and another case of prolonged ileus successfully treated with conservative management. We had no recurrences at follow-up. According to QoL, at 12 months patients do not complain about any pain or discomfort for esthetic result. Laparoscopic treatment of primary and incisional hernia with the use of composite mesh in polypropylene fixed with absorbable devices is feasible and safe.",2017-04-13 +21882442,Knowledge-Based Analysis of Protein Interaction Networks in Neurodegenerative Diseases,"The large-scale datasets generated by gene sequencing, proteomics, and other high-throughput experimental technologies are the bases for understanding life as a molecular system and for developing medical, industrial, and other practical applications. In order to facilitate bioinformatics analysis of such large-scale datasets, it is essential to organize our knowledge on higher levels of systemic functions in a computable form, so that it can be used as a reference for inferring molecular systems from the information contained in the building blocks. Thus, we have been developing the KEGG (Kyoto Encyclopedia of Genes and Genomes) database (http://www.genome.jp/kegg/), an integrated resource of about 20 databases (1). The main component is the KEGG PATHWAY database, consisting of manually drawn graphical diagrams of molecular networks, called pathway maps, and representing various cellular processes and organism behaviors. KEGG PATHWAY is a reference database for pathway mapping, which is the process to match, for example, a genomic or transcriptomic content of genes against KEGG reference pathway maps to infer systemic functions of the cell or the organism. As part of the KEGG PATHWAY database, we organize disease pathway maps representing our knowledge of causative genes and molecular networks related to them for human diseases, including cancers, immune disorders, neurodegenerative diseases, metabolic disorders, and infectious diseases. Here we focus on neurodegenerative diseases, which were among the first to be made available on the KEGG PATHWAY database. A diverse range of neurodegenerative diseases is commonly characterized by the accumulation of abnormal protein aggregates. Causative genes, including those that produce abnormal proteins, have been identified in various neurodegenerative diseases. The current information is not sufficient to find common molecular mechanisms of the diseases. In this chapter we first present an overview of KEGG, including the KEGG DISEASE and KEGG DRUG databases, and describe the KEGG PATHWAY maps for six neurodegenerative diseases: Alzheimer’s disease (AD), Parkinson’s disease (PD), amyotrophic lateral sclerosis (ALS), Huntington’s disease (HD), dentatorubropallidoluysian atrophy (DRPLA), and prion diseases (PRION). We then present bioinformatics analysis to combine and expand these pathway maps toward identification of common proteins and common interactions, which may lead to a better understanding of common molecular pathogenic mechanisms (2).",2011-09-02 +27179867,Multilocus sequence typing of Lactobacillus casei isolates from naturally fermented foods in China and Mongolia.,"Lactobacillus casei is a lactic acid bacterium used in manufacturing of many fermented food products. To investigate the genetic diversity and population biology of this food-related bacterium, 224 Lb. casei isolates and 5 reference isolates were examined by multilocus sequence typing (MLST). Among them, 224 Lb. casei isolates were isolated from homemade fermented foods, including naturally fermented dairy products, acidic gruel, and Sichuan pickles from 38 different regions in China and Mongolia. The MLST scheme was developed based on the analysis of 10 selected housekeeping genes (carB, clpX, dnaA, groEL, murE, pyrG, pheS, recA, rpoC, and uvrC). All 229 isolates could be allocated to 171 unique sequence types, including 25 clonal complexes and 71 singletons. The high index of association value (1.3524) and standardized index of association value (0.1503) indicate the formation of an underlying clonal population by all the isolates. However, split-decomposition, relative frequency of occurrence of recombination and mutation, and relative effect of recombination and mutation in the diversification values confirm that recombination may have occurred, and were more frequent than mutation during the evolution of Lb. casei. Results from Structure analyses (version 2.3; http://pritch.bsd.uchicago.edu/structure.html) demonstrated that there were 5 lineages in the Lb. casei isolates, and the overall relatedness built by minimum spanning tree showed no clear relationship between the clonal complexes with either the isolation sources or sampling locations of the isolates. Our newly developed MLST scheme of Lb. casei was an easy and valuable tool that, together with the construction of an MLST database, will contribute to further detailed studies on the evolution and population genetics of Lb. casei from various niches.",2016-05-11 +27167218,MiasDB: A Database of Molecular Interactions Associated with Alternative Splicing of Human Pre-mRNAs.,"Alternative splicing (AS) is pervasive in human multi-exon genes and is a major contributor to expansion of the transcriptome and proteome diversity. The accurate recognition of alternative splice sites is regulated by information contained in networks of protein-protein and protein-RNA interactions. However, the mechanisms leading to splice site selection are not fully understood. Although numerous databases have been built to describe AS, molecular interaction databases associated with AS have only recently emerged. In this study, we present a new database, MiasDB, that provides a description of molecular interactions associated with human AS events. This database covers 938 interactions between human splicing factors, RNA elements, transcription factors, kinases and modified histones for 173 human AS events. Every entry includes the interaction partners, interaction type, experimental methods, AS type, tissue specificity or disease-relevant information, a simple description of the functionally tested interaction in the AS event and references. The database can be queried easily using a web server (http://47.88.84.236/Miasdb). We display some interaction figures for several genes. With this database, users can view the regulation network describing AS events for 12 given genes.",2016-05-11 +28004786,GPS-PAIL: prediction of lysine acetyltransferase-specific modification sites from protein sequences.,"Protein acetylation catalyzed by specific histone acetyltransferases (HATs) is an essential post-translational modification (PTM) and involved in the regulation a broad spectrum of biological processes in eukaryotes. Although several ten thousands of acetylation sites have been experimentally identified, the upstream HATs for most of the sites are unclear. Thus, the identification of HAT-specific acetylation sites is fundamental for understanding the regulatory mechanisms of protein acetylation. In this work, we first collected 702 known HAT-specific acetylation sites of 205 proteins from the literature and public data resources, and a motif-based analysis demonstrated that different types of HATs exhibit similar but considerably distinct sequence preferences for substrate recognition. Using 544 human HAT-specific sites for training, we constructed a highly useful tool of GPS-PAIL for the prediction of HAT-specific sites for up to seven HATs, including CREBBP, EP300, HAT1, KAT2A, KAT2B, KAT5 and KAT8. The prediction accuracy of GPS-PAIL was critically evaluated, with a satisfying performance. Using GPS-PAIL, we also performed a large-scale prediction of potential HATs for known acetylation sites identified from high-throughput experiments in nine eukaryotes. Both online service and local packages were implemented, and GPS-PAIL is freely available at: http://pail.biocuckoo.org.",2016-12-22 +27170286,BioC-compatible full-text passage detection for protein-protein interactions using extended dependency graph. ,"There has been a large growth in the number of biomedical publications that report experimental results. Many of these results concern detection of protein-protein interactions (PPI). In BioCreative V, we participated in the BioC task and developed a PPI system to detect text passages with PPIs in the full-text articles. By adopting the BioC format, the output of the system can be seamlessly added to the biocuration pipeline with little effort required for the system integration. A distinctive feature of our PPI system is that it utilizes extended dependency graph, an intermediate level of representation that attempts to abstract away syntactic variations in text. As a result, we are able to use only a limited set of rules to extract PPI pairs in the sentences, and additional rules to detect additional passages for PPI pairs. For evaluation, we used the 95 articles that were provided for the BioC annotation task. We retrieved the unique PPIs from the BioGRID database for these articles and show that our system achieves a recall of 83.5%. In order to evaluate the detection of passages with PPIs, we further annotated Abstract and Results sections of 20 documents from the dataset and show that an f-value of 80.5% was obtained. To evaluate the generalizability of the system, we also conducted experiments on AIMed, a well-known PPI corpus. We achieved an f-value of 76.1% for sentence detection and an f-value of 64.7% for unique PPI detection.Database URL: http://proteininformationresource.org/iprolink/corpora.",2016-05-11 +24533055,Binding sites analyser (BiSA): software for genomic binding sites archiving and overlap analysis.,"Genome-wide mapping of transcription factor binding and histone modification reveals complex patterns of interactions. Identifying overlaps in binding patterns by different factors is a major objective of genomic studies, but existing methods to archive large numbers of datasets in a personalised database lack sophistication and utility. Therefore we have developed transcription factor DNA binding site analyser software (BiSA), for archiving of binding regions and easy identification of overlap with or proximity to other regions of interest. Analysis results can be restricted by chromosome or base pair overlap between regions or maximum distance between binding peaks. BiSA is capable of reporting overlapping regions that share common base pairs; regions that are nearby; regions that are not overlapping; and average region sizes. BiSA can identify genes located near binding regions of interest, genomic features near a gene or locus of interest and statistical significance of overlapping regions can also be reported. Overlapping results can be visualized as Venn diagrams. A major strength of BiSA is that it is supported by a comprehensive database of publicly available transcription factor binding sites and histone modifications, which can be directly compared to user data. The documentation and source code are available on http://bisa.sourceforge.net.",2014-02-12 +26962089,Complicated Urinary Tract Infections: What's a Lab To Do?,"The article by Price et al. in this issue (T. K. Price et al., J Clin Microbiol 54:1216-1222, 2016, http://dx.doi.org/10.1128/JCM.00044-16) advocates for the use of a larger inoculum when culturing urine obtained by ""in-and-out"" catheterization in a selected female population. Their findings and the resulting challenges will afford clinical microbiologists and specialty physicians an opportunity to review what will or should be done with the additional microbiological culture data.",2016-03-09 +25969447,ClustVis: a web tool for visualizing clustering of multivariate data using Principal Component Analysis and heatmap.,"The Principal Component Analysis (PCA) is a widely used method of reducing the dimensionality of high-dimensional data, often followed by visualizing two of the components on the scatterplot. Although widely used, the method is lacking an easy-to-use web interface that scientists with little programming skills could use to make plots of their own data. The same applies to creating heatmaps: it is possible to add conditional formatting for Excel cells to show colored heatmaps, but for more advanced features such as clustering and experimental annotations, more sophisticated analysis tools have to be used. We present a web tool called ClustVis that aims to have an intuitive user interface. Users can upload data from a simple delimited text file that can be created in a spreadsheet program. It is possible to modify data processing methods and the final appearance of the PCA and heatmap plots by using drop-down menus, text boxes, sliders etc. Appropriate defaults are given to reduce the time needed by the user to specify input parameters. As an output, users can download PCA plot and heatmap in one of the preferred file formats. This web server is freely available at http://biit.cs.ut.ee/clustvis/.",2015-05-12 +24519378,SurvMicro: assessment of miRNA-based prognostic signatures for cancer clinical outcomes by multivariate survival analysis.,"

Unlabelled

MicroRNAs (miRNAs) play a key role in post-transcriptional regulation of mRNA levels. Their function in cancer has been studied by high-throughput methods generating valuable sources of public information. Thus, miRNA signatures predicting cancer clinical outcomes are emerging. An important step to propose miRNA-based biomarkers before clinical validation is their evaluation in independent cohorts. Although it can be carried out using public data, such task is time-consuming and requires a specialized analysis. Therefore, to aid and simplify the evaluation of prognostic miRNA signatures in cancer, we developed SurvMicro, a free and easy-to-use web tool that assesses miRNA signatures from publicly available miRNA profiles using multivariate survival analysis. SurvMicro is composed of a wide and updated database of >40 cohorts in different tissues and a web tool where survival analysis can be done in minutes. We presented evaluations to portray the straightforward functionality of SurvMicro in liver and lung cancer. To our knowledge, SurvMicro is the only bioinformatic tool that aids the evaluation of multivariate prognostic miRNA signatures in cancer.

Availability and implementation

SurvMicro and its tutorial are freely available at http://bioinformatica.mty.itesm.mx/SurvMicro.",2014-02-11 +28718769,Pesticide Exposure and Risk of Rheumatoid Arthritis among Licensed Male Pesticide Applicators in the Agricultural Health Study.,"

Background

The occupation of farming has been associated with rheumatoid arthritis (RA); pesticides may account for this association, but there are few studies.

Objectives

We investigated associations between RA and use of pesticides in the Agricultural Health Study.

Methods

The study sample was drawn from male pesticide applicators enrolled in 1993–1997 who provided questionnaire data at baseline and at least once during follow-up (over a median 18 y; interquartile range 16–19). Incident RA cases (n=220), confirmed by physicians or by self-reported use of disease-modifying antirheumatic drugs, were compared with noncases (n=26,134) who did not report RA. Odds ratios (ORs) and 95% confidence intervals (CIs) were estimated using logistic regression, adjusting for enrollment age, state, smoking pack-years, and education. We evaluated the association of RA with the use of 46 pesticides and across 4 levels (never use and tertiles) of lifetime days of use for 16 pesticides with OR≥1.2 for ever use.

Results

Incident RA was associated with ever use of fonofos (OR = 1.70; 95% CI: 1.22, 2.37), carbaryl (OR = 1.51; 95% CI: 1.03, 2.23), and chlorimuron ethyl (OR = 1.45; 95% CI: 1.01, 2.07) compared with never use. Statistically significant exposure–response trends in association with RA were observed for lifetime days of use of atrazine [ORtertile3= 1.62 (95% CI: 1.09, 2.40); ptrend=0.01] and toxaphene [ORtertile3= 2.42 (95% CI: 1.03, 5.68); ptrend=0.02]. Exposure–response was nonlinear for fonofos [ORtertile1= 2.27 (95% CI: 1.44, 3.57); ORtertile2= 0.98 (95% CI: 0.54, 1.80); ORtertile3= 2.10 (95% CI: 1.32, 3.36); ptrend=0.005] and suggestive for carbaryl (ptrend=0.053).

Conclusions

Our results provide novel evidence of associations between exposure to some pesticides and RA in male farmers. https://doi.org/10.1289/EHP1013.",2017-07-14 +27596864,CRISPRdigger: detecting CRISPRs with better direct repeat annotations.,"Clustered regularly interspaced short palindromic repeats (CRISPRs) are important genetic elements in many bacterial and archaeal genomes, and play a key role in prokaryote immune systems' fight against invasive foreign elements. The CRISPR system has also been engineered to facilitate target gene editing in eukaryotic genomes. Using the common features of mis-annotated CRISPRs in prokaryotic genomes, this study proposed an accurate de novo CRISPR annotation program CRISPRdigger, which can take a partially assembled genome as its input. A comprehensive comparison with the three existing programs demonstrated that CRISPRdigger can recover more Direct Repeats (DRs) for CRISPRs and achieve a higher accuracy for a query genome. The program was implemented by Perl and all the parameters had default values, so that a user could annotate CRISPRs in a query genome by supplying only a genome sequence in the FASTA format. All the supplementary data are available at http://www.healthinformaticslab.org/supp/.",2016-09-06 +26083032,QuorUM: An Error Corrector for Illumina Reads.,"

Motivation

Illumina Sequencing data can provide high coverage of a genome by relatively short (most often 100 bp to 150 bp) reads at a low cost. Even with low (advertised 1%) error rate, 100 × coverage Illumina data on average has an error in some read at every base in the genome. These errors make handling the data more complicated because they result in a large number of low-count erroneous k-mers in the reads. However, there is enough information in the reads to correct most of the sequencing errors, thus making subsequent use of the data (e.g. for mapping or assembly) easier. Here we use the term ""error correction"" to denote the reduction in errors due to both changes in individual bases and trimming of unusable sequence. We developed an error correction software called QuorUM. QuorUM is mainly aimed at error correcting Illumina reads for subsequent assembly. It is designed around the novel idea of minimizing the number of distinct erroneous k-mers in the output reads and preserving the most true k-mers, and we introduce a composite statistic π that measures how successful we are at achieving this dual goal. We evaluate the performance of QuorUM by correcting actual Illumina reads from genomes for which a reference assembly is available.

Results

We produce trimmed and error-corrected reads that result in assemblies with longer contigs and fewer errors. We compared QuorUM against several published error correctors and found that it is the best performer in most metrics we use. QuorUM is efficiently implemented making use of current multi-core computing architectures and it is suitable for large data sets (1 billion bases checked and corrected per day per core). We also demonstrate that a third-party assembler (SOAPdenovo) benefits significantly from using QuorUM error-corrected reads. QuorUM error corrected reads result in a factor of 1.1 to 4 improvement in N50 contig size compared to using the original reads with SOAPdenovo for the data sets investigated.

Availability

QuorUM is distributed as an independent software package and as a module of the MaSuRCA assembly software. Both are available under the GPL open source license at http://www.genome.umd.edu.

Contact

gmarcais@umd.edu.",2015-06-17 +27161010,ToxEvaluator: an integrated computational platform to aid the interpretation of toxicology study-related findings. ,"Attempts are frequently made to investigate adverse findings from preclinical toxicology studies in order to better understand underlying toxicity mechanisms. These efforts often begin with limited information, including a description of the adverse finding, knowledge of the structure of the chemical associated with its cause and the intended pharmacological target. ToxEvaluator was developed jointly by Pfizer and the Comparative Toxicogenomics Database (http://ctdbase.org) team at North Carolina State University as an in silico platform to facilitate interpretation of toxicity findings in light of prior knowledge. Through the integration of a diverse set of in silico tools that leverage a number of public and proprietary databases, ToxEvaluator streamlines the process of aggregating and interrogating diverse sources of information. The user enters compound and target identifiers, and selects adverse event descriptors from a safety lexicon and mapped MeSH disease terms. ToxEvaluator provides a summary report with multiple distinct areas organized according to what target or structural aspects have been linked to the adverse finding, including primary pharmacology, structurally similar proprietary compounds, structurally similar public domain compounds, predicted secondary (i.e. off-target) pharmacology and known secondary pharmacology. Similar proprietary compounds and their associated in vivo toxicity findings are reported, along with a link to relevant supporting documents. For similar public domain compounds and interacting targets, ToxEvaluator integrates relationships curated in Comparative Toxicogenomics Database, returning all direct and inferred linkages between them. As an example of its utility, we demonstrate how ToxEvaluator rapidly identified direct (primary pharmacology) and indirect (secondary pharmacology) linkages between cerivastatin and myopathy.",2016-05-09 +25971740,CAPRI: efficient inference of cancer progression models from cross-sectional data.,"

Unlabelled

We devise a novel inference algorithm to effectively solve the cancer progression model reconstruction problem. Our empirical analysis of the accuracy and convergence rate of our algorithm, CAncer PRogression Inference (CAPRI), shows that it outperforms the state-of-the-art algorithms addressing similar problems.

Motivation

Several cancer-related genomic data have become available (e.g. The Cancer Genome Atlas, TCGA) typically involving hundreds of patients. At present, most of these data are aggregated in a cross-sectional fashion providing all measurements at the time of diagnosis. Our goal is to infer cancer 'progression' models from such data. These models are represented as directed acyclic graphs (DAGs) of collections of 'selectivity' relations, where a mutation in a gene A 'selects' for a later mutation in a gene B. Gaining insight into the structure of such progressions has the potential to improve both the stratification of patients and personalized therapy choices.

Results

The CAPRI algorithm relies on a scoring method based on a probabilistic theory developed by Suppes, coupled with bootstrap and maximum likelihood inference. The resulting algorithm is efficient, achieves high accuracy and has good complexity, also, in terms of convergence properties. CAPRI performs especially well in the presence of noise in the data, and with limited sample sizes. Moreover CAPRI, in contrast to other approaches, robustly reconstructs different types of confluent trajectories despite irregularities in the data. We also report on an ongoing investigation using CAPRI to study atypical Chronic Myeloid Leukemia, in which we uncovered non trivial selectivity relations and exclusivity patterns among key genomic events.

Availability and implementation

CAPRI is part of the TRanslational ONCOlogy R package and is freely available on the web at: http://bimib.disco.unimib.it/index.php/Tronco

Contact

daniele.ramazzotti@disco.unimib.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-13 +22543945,"[The age of Gutenberg is over: a consideration of medical education--past, present and future].","Education is the basis for reliable medical care and medical progress. Our medical knowledge has increased more in the past 50 years than in the 500 years before. The spatial and human resource capacity of our universities cannot cope with the existing academic structures and needs. Part of the problem can be solved by ""blended learning"", that is a combination of traditional teaching methods (frontal lectures, courses, bedside teaching) with supplementary web-based e-learning. In addition to conveying a sound basic knowledge, the ability to cope with modern media and prepare for lifelong learning must also be taught. Out of the large number of e-learning platforms for undergraduate students offered in the internet, we present the program DOIT (Dermatology Online with Interactive Technology; http://www.swisdom.org) and the program Dermokrates (http://www.Dermokrates.com) of the German, Austrian and Swiss Dermatological Societies for postgraduate Continuing Medical Education (CME). The biggest obstacle in the implementation of new developments is the stubborn adherence to traditional structures.",2012-04-01 +22859501,SKEMPI: a Structural Kinetic and Energetic database of Mutant Protein Interactions and its use in empirical models.,"

Motivation

Empirical models for the prediction of how changes in sequence alter protein-protein binding kinetics and thermodynamics can garner insights into many aspects of molecular biology. However, such models require empirical training data and proper validation before they can be widely applied. Previous databases contained few stabilizing mutations and no discussion of their inherent biases or how this impacts model construction or validation.

Results

We present SKEMPI, a database of 3047 binding free energy changes upon mutation assembled from the scientific literature, for protein-protein heterodimeric complexes with experimentally determined structures. This represents over four times more data than previously collected. Changes in 713 association and dissociation rates and 127 enthalpies and entropies were also recorded. The existence of biases towards specific mutations, residues, interfaces, proteins and protein families is discussed in the context of how the data can be used to construct predictive models. Finally, a cross-validation scheme is presented which is capable of estimating the efficacy of derived models on future data in which these biases are not present.

Availability

The database is available online at http://life.bsc.es/pid/mutation_database/.",2012-08-01 +27332507,From Static to Interactive: Transforming Data Visualization to Improve Transparency.,"Data presentation for scientific publications in small sample size studies has not changed substantially in decades. It relies on static figures and tables that may not provide sufficient information for critical evaluation, particularly of the results from small sample size studies. Interactive graphics have the potential to transform scientific publications from static reports of experiments into interactive datasets. We designed an interactive line graph that demonstrates how dynamic alternatives to static graphics for small sample size studies allow for additional exploration of empirical datasets. This simple, free, web-based tool (http://statistika.mfub.bg.ac.rs/interactive-graph/) demonstrates the overall concept and may promote widespread use of interactive graphics.",2016-06-22 +28933120,[Explore anti-atherosclerotic mechanism of component compatibility of Danshen and Shanzha based on network pharmacology and cell level].,"To explore the anti-atherosclerotic mechanism of active component compatibility of Danshen and Shanzha (SC121) based on network pharmacology and in vitro research validation with cell model. On one hand, according to the chemical structures and pharmacological activities of the compounds reported in Danshen and Shanzha, 5 compounds, i.e., salvianolic acid B, tanshinone ⅡA, tanshinol, epicatechin and procyanidin B2 were chosen and used for network pharmacology analysis. Then the TCMSP(http://lsp.nwsuaf.edu.cn/tcmsp.php)was used for finding the network targets for 5 compounds from SC121. The signaling pathway associated with cardiovascular disease was analyzed by KEGG mapping, the biological process associated with cardiovascular disease was analyzed by Uniprot. And, the mechanism of SC121 was predicted by network pharmacology. In vitro cell model was subsequently performed for validation. HUVEC and RAW264.7 cell injuries and foam cell formation were constructed by ox-LDL, and the intervention effects of SC121 were assayed. The result showed that SC121 not only alleviated the damage of HUVEC and RAW264.7, lowered the ROS level, but also decreased the area of foam cell in a dose-dependent manner, which indicated that SC121 could inhibit the damage of endothelial cells and lower the oxidative stress. The experimental data validated the prediction of network pharmacology, and elucidated the mechanism of SC121's effect on AS.",2016-12-01 +24717071,Discovery of novel genes and gene isoforms by integrating transcriptomic and proteomic profiling from mouse liver.,"Comprehensively identifying gene expression in both transcriptomic and proteomic levels of one tissue is a prerequisite for a deeper understanding of its biological functions. Alternative splicing and RNA editing, two main forms of transcriptional processing, play important roles in transcriptome and proteome diversity and result in multiple isoforms for one gene, which are hard to identify by mass spectrometry (MS)-based proteomics approach due to the relative lack of isoform information in standard protein databases. In our study, we employed MS and RNA-Seq in parallel into mouse liver tissue and captured a considerable catalogue of both transcripts and proteins that, respectively, covered 60 and 34% of protein-coding genes in Ensembl. We then developed a bioinformatics workflow for building a customized protein database that for the first time included new splicing-derived peptides and RNA-editing-caused peptide variants, allowing us to more completely identify protein isoforms. Using this experimentally determined database, we totally identified 150 peptides not present in standard biological databases at false discovery rate of <1%, corresponding to 72 novel splicing isoforms, 43 new genetic regions, and 15 RNA-editing sites. Of these, 11 randomly selected novel events passed experimental verification by PCR and Sanger sequencing. New discoveries of gene products with high confidence in two omics levels demonstrated the robustness and effectiveness of our approach and its potential application into improve genome annotation. All the MS data have been deposited to the iProx ( http://ww.iprox.org ) with the identifier IPX00003601.",2014-04-18 +22685160,"Improving links between literature and biological data with text mining: a case study with GEO, PDB and MEDLINE.","High-throughput experiments and bioinformatics techniques are creating an exploding volume of data that are becoming overwhelming to keep track of for biologists and researchers who need to access, analyze and process existing data. Much of the available data are being deposited in specialized databases, such as the Gene Expression Omnibus (GEO) for microarrays or the Protein Data Bank (PDB) for protein structures and coordinates. Data sets are also being described by their authors in publications archived in literature databases such as MEDLINE and PubMed Central. Currently, the curation of links between biological databases and the literature mainly relies on manual labour, which makes it a time-consuming and daunting task. Herein, we analysed the current state of link curation between GEO, PDB and MEDLINE. We found that the link curation is heterogeneous depending on the sources and databases involved, and that overlap between sources is low, <50% for PDB and GEO. Furthermore, we showed that text-mining tools can automatically provide valuable evidence to help curators broaden the scope of articles and database entries that they review. As a result, we made recommendations to improve the coverage of curated links, as well as the consistency of information available from different databases while maintaining high-quality curation. Database URLs: http://www.ncbi.nlm.nih.gov/PubMed, http://www.ncbi.nlm.nih.gov/geo/, http://www.rcsb.org/pdb/",2012-06-08 +23161677,G4LDB: a database for discovering and studying G-quadruplex ligands.,"The G-quadruplex ligands database (G4LDB, http://www.g4ldb.org) provides a unique collection of reported G-quadruplex ligands to streamline ligand/drug discovery targeting G-quadruplexes. G-quadruplexes are guanine-rich nucleic acid sequences in human telomeres and gene promoter regions. There is a growing recognition for their profound roles in a wide spectrum of diseases, such as cancer, diabetes and cardiovascular disease. Ligands that affect the structure and activity of G-quadruplexes can shed light on the search for G-quadruplex-targeting drugs. Therefore, we built the G4LDB to (i) compile a data set covering various physical properties and 3D structure of G-quadruplex ligands; (ii) provide Web-based tools for G-quadruplex ligand design; and (iii) to facilitate the discovery of novel therapeutic and diagnostic agents targeting G-quadruplexes. G4LDB currently contains >800 G-quadruplex ligands with ∼4000 activity records, which, to our knowledge, is the most extensive collection of its kind. It offers a user friendly interface that can meet a variety of data inquiries from researchers. For example, ligands can be searched for by name, molecular properties, structures, ligand activities and so on. Building on the reported data, the database also provides an online ligand design module that can predict ligand binding affinity in real time.",2012-11-17 +26612782,"Expression profiles of human epididymis epithelial cells reveal the functional diversity of caput, corpus and cauda regions.","

Study hypothesis

Region-specific transcriptional profiling of tissues and cultured epithelial cells from the human epididymis will predict functional specialization along the duct.

Study finding

We identified the molecular signature driving functions of the caput, corpus and cauda epithelium, and determined how these differ to establish the regional differentiation of the organ.

What is known already

The epithelium lining the human male genital ducts has a critical role in fertility. In particular, it controls the luminal environment in the epididymis, which is required for normal sperm maturation and reproductive competence. Studies in many animal species have largely informed our understanding of the molecular basis of epididymis function. However, there are substantial differences between species.

Study design, samples/materials, methods

Using RNA sequencing on biological replicates, we described gene expression profiles for tissue from each region of the epididymis and cultured epithelial cells derived from these regions. Bioinformatic tools were then utilized to identify differentially expressed genes (DEGs) between tissues and cells from the caput, corpus and cauda.

Main results and the role of chance

The data showed that the caput is functionally divergent from the corpus and cauda, which have very similar transcriptomes. Interrogation of DEGs using gene ontology process enrichment analyses showed that processes of ion transport, response to hormone stimulus and urogenital tract development are more evident in the caput, while defense response processes are more important in the corpus/cauda. Consistent with these regional differences in epididymis function, we observed differential expression of transcription factors in the caput and corpus/cauda.

Limitations, reasons for caution

Cultured caput, corpus and cauda cells may not faithfully represent the same cells in the intact organ, due to loss of hormonal signals from the testis and communication from other cell types.

Wider implications of the findings

Our data provide a molecular characterization that will facilitate advances in understanding human epididymis epithelium biology in health and disease. They may also reveal the mechanisms coordinating epididymis luminal environment and sperm maturation.

Large scale data

Data deposited at http://www.ncbi.nlm.nih.gov/geo/GSE72986.

Study funding and competing interests

This work was supported by the National Institutes of Health: R01HD068901 (PI: A.H.). The authors declare no conflict of interest.",2015-11-26 +25687422,Mega2: validated data-reformatting for linkage and association analyses.,"BACKGROUND:In a typical study of the genetics of a complex human disease, many different analysis programs are used, to test for linkage and association. This requires extensive and careful data reformatting, as many of these analysis programs use differing input formats. Writing scripts to facilitate this can be tedious, time-consuming, and error-prone. To address these issues, the open source Mega2 data reformatting program provides validated and tested data conversions from several commonly-used input formats to many output formats. RESULTS:Mega2, the Manipulation Environment for Genetic Analysis, facilitates the creation of analysis-ready datasets from data gathered as part of a genetic study. It transparently allows users to process genetic data for family-based or case/control studies accurately and efficiently. In addition to data validation checks, Mega2 provides analysis setup capabilities for a broad choice of commonly-used genetic analysis programs. First released in 2000, Mega2 has recently been significantly improved in a number of ways. We have rewritten it in C++ and have reduced its memory requirements. Mega2 now can read input files in LINKAGE, PLINK, and VCF/BCF formats, as well as its own specialized annotated format. It supports conversion to many commonly-used formats including SOLAR, PLINK, Merlin, Mendel, SimWalk2, Cranefoot, IQLS, FBAT, MORGAN, BEAGLE, Eigenstrat, Structure, and PLINK/SEQ. When controlled by a batch file, Mega2 can be used non-interactively in data reformatting pipelines. Support for genetic data from several other species besides humans has been added. CONCLUSIONS:By providing tested and validated data reformatting, Mega2 facilitates more accurate and extensive analyses of genetic data, avoiding the need to write, debug, and maintain one's own custom data reformatting scripts. Mega2 is freely available at https://watson.hgen.pitt.edu/register/.",2014-12-05 +22280360,The EnzymeTracker: an open-source laboratory information management system for sample tracking.,"

Background

In many laboratories, researchers store experimental data on their own workstation using spreadsheets. However, this approach poses a number of problems, ranging from sharing issues to inefficient data-mining. Standard spreadsheets are also error-prone, as data do not undergo any validation process. To overcome spreadsheets inherent limitations, a number of proprietary systems have been developed, which laboratories need to pay expensive license fees for. Those costs are usually prohibitive for most laboratories and prevent scientists from benefiting from more sophisticated data management systems.

Results

In this paper, we propose the EnzymeTracker, a web-based laboratory information management system for sample tracking, as an open-source and flexible alternative that aims at facilitating entry, mining and sharing of experimental biological data. The EnzymeTracker features online spreadsheets and tools for monitoring numerous experiments conducted by several collaborators to identify and characterize samples. It also provides libraries of shared data such as protocols, and administration tools for data access control using OpenID and user/team management. Our system relies on a database management system for efficient data indexing and management and a user-friendly AJAX interface that can be accessed over the Internet. The EnzymeTracker facilitates data entry by dynamically suggesting entries and providing smart data-mining tools to effectively retrieve data. Our system features a number of tools to visualize and annotate experimental data, and export highly customizable reports. It also supports QR matrix barcoding to facilitate sample tracking.

Conclusions

The EnzymeTracker was designed to be easy to use and offers many benefits over spreadsheets, thus presenting the characteristics required to facilitate acceptance by the scientific community. It has been successfully used for 20 months on a daily basis by over 50 scientists. The EnzymeTracker is freely available online at http://cubique.fungalgenomics.ca/enzymedb/index.html under the GNU GPLv3 license.",2012-01-26 +28728135,Prostate Cancer Risk and DNA Methylation Signatures in Aging Rats following Developmental BPA Exposure: A Dose-Response Analysis.,"

Background

Previous studies have uncovered heightened prostatic susceptibility to hormone-induced neoplasia from early-life exposure to low-dose bisphenol A (BPA). However, significant data gaps remain that are essential to address for biological relevance and necessary risk assessment.

Objectives

A complete BPA dose-response analysis of prostate lesions across multiple prostatic lobes was conducted that included internal BPA dosimetry, progression to adenocarcinoma with aging and mechanistic connections to epigenetically reprogramed genes.

Methods

Male neonatal Sprague-Dawley rats were briefly exposed to 0.1 to 5,000 μg BPA/kg BW on postnatal days (PND) 1, 3, and 5. Individual prostate lobes plus periurethral prostatic ducts were evaluated at 7 mo or 1 y of age without or with adult testosterone plus estradiol (T+E) to promote carcinogenesis. DNA methylation of five genes was quantified by bisulfite genomic sequencing in d-200 dorsal prostates across BPA doses. Serum free-BPA and BPA-glucuronide were quantitated in sera of individual PND 3 pups collected 1 hr postexposure utilizing ultra-high-pressure tandem mass spectrometry (UHPLC-MS-MS).

Results

The lowest BPA dose initiated maximal hormonal carcinogenesis in lateral prostates despite undetectable free BPA 1 hr postexposure. Further, prostatic intraepithelial neoplasia (PIN) progressed to carcinoma in rats given neonatal low-dose BPA with adult T+E but not in rats given adult T+E alone. The dorsal and ventral lobes and periurethral prostatic ducts exhibited a nonmonotonic dose response with peak PIN, proliferation and apoptotic values at 10–100 μg/kg BW. This was paralleled by nonmonotonic and dose-specific DNA hypomethylation of genes that confer carcinogenic risk, with greatest hypomethylation at the lowest BPA doses.

Conclusions

Developmental BPA exposures heighten prostate cancer susceptibility in a complex dose- and lobe-specific manner. Importantly, elevated carcinogenic risk is found at doses that yield undetectable serum free BPA. Dose-specific epigenetic modifications of selected genes provide a mechanistic framework that may connect early-life BPA to later-life predisposition to prostate carcinogenesis. https://doi.org/10.1289/EHP1050.",2017-07-11 +27154270,Synteny Portal: a web-based application portal for synteny block analysis.,"Recent advances in next-generation sequencing technologies and genome assembly algorithms have enabled the accumulation of a huge volume of genome sequences from various species. This has provided new opportunities for large-scale comparative genomics studies. Identifying and utilizing synteny blocks, which are genomic regions conserved among multiple species, is key to understanding genomic architecture and the evolutionary history of genomes. However, the construction and visualization of such synteny blocks from multiple species are very challenging, especially for biologists with a lack of computational skills. Here, we present Synteny Portal, a versatile web-based application portal for constructing, visualizing and browsing synteny blocks. With Synteny Portal, users can easily (i) construct synteny blocks among multiple species by using prebuilt alignments in the UCSC genome browser database, (ii) visualize and download syntenic relationships as high-quality images, (iii) browse synteny blocks with genetic information and (iv) download the details of synteny blocks to be used as input for downstream synteny-based analyses, all in an intuitive and easy-to-use web-based interface. We believe that Synteny Portal will serve as a highly valuable tool that will enable biologists to easily perform comparative genomics studies by compensating limitations of existing tools. Synteny Portal is freely available at http://bioinfo.konkuk.ac.kr/synteny_portal.",2016-05-06 +26220962,MetTailor: dynamic block summary and intensity normalization for robust analysis of mass spectrometry data in metabolomics.,"

Motivation

Accurate cross-sample peak alignment and reliable intensity normalization is a critical step for robust quantitative analysis in untargetted metabolomics since tandem mass spectrometry (MS/MS) is rarely used for compound identification. Therefore shortcomings in the data processing steps can easily introduce false positives due to misalignments and erroneous normalization adjustments in large sample studies.

Results

In this work, we developed a software package MetTailor featuring two novel data preprocessing steps to remedy drawbacks in the existing processing tools. First, we propose a novel dynamic block summarization (DBS) method for correcting misalignments from peak alignment algorithms, which alleviates missing data problem due to misalignments. For the purpose of verifying correct re-alignments, we propose to use the cross-sample consistency in isotopic intensity ratios as a quality metric. Second, we developed a flexible intensity normalization procedure that adjusts normalizing factors against the temporal variations in total ion chromatogram (TIC) along the chromatographic retention time (RT). We first evaluated the DBS algorithm using a curated metabolomics dataset, illustrating that the algorithm identifies misaligned peaks and correctly realigns them with good sensitivity. We next demonstrated the DBS algorithm and the RT-based normalization procedure in a large-scale dataset featuring >100 sera samples in primary Dengue infection study. Although the initial alignment was successful for the majority of peaks, the DBS algorithm still corrected ∼7000 misaligned peaks in this data and many recovered peaks showed consistent isotopic patterns with the peaks they were realigned to. In addition, the RT-based normalization algorithm efficiently removed visible local variations in TIC along the RT, without sacrificing the sensitivity of detecting differentially expressed metabolites.

Availability and implementation

The R package MetTailor is freely available at the SourceForge website http://mettailor.sourceforge.net/.

Contact

hyung_won_choi@nuhs.edu.sg

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-27 +24225318,Selectome update: quality control and computational improvements to a database of positive selection.,"Selectome (http://selectome.unil.ch/) is a database of positive selection, based on a branch-site likelihood test. This model estimates the number of nonsynonymous substitutions (dN) and synonymous substitutions (dS) to evaluate the variation in selective pressure (dN/dS ratio) over branches and over sites. Since the original release of Selectome, we have benchmarked and implemented a thorough quality control procedure on multiple sequence alignments, aiming to provide minimum false-positive results. We have also improved the computational efficiency of the branch-site test implementation, allowing larger data sets and more frequent updates. Release 6 of Selectome includes all gene trees from Ensembl for Primates and Glires, as well as a large set of vertebrate gene trees. A total of 6810 gene trees have some evidence of positive selection. Finally, the web interface has been improved to be more responsive and to facilitate searches and browsing.",2013-11-12 +23138916,Predictive models and computational toxicology.,"Understanding the potential health risks posed by environmental chemicals is a significant challenge elevated by the large number of diverse chemicals with generally uncharacterized exposures, mechanisms, and toxicities. The ToxCast computational toxicology research program was launched by EPA in 2007 and is part of the federal Tox21 consortium to develop a cost-effective approach for efficiently prioritizing the toxicity testing of thousands of chemicals and the application of this information to assessing human toxicology. ToxCast addresses this problem through an integrated workflow using high-throughput screening (HTS) of chemical libraries across more than 650 in vitro assays including biochemical assays, human cells and cell lines, and alternative models such as mouse embryonic stem cells and zebrafish embryo development. The initial phase of ToxCast profiled a library of 309 environmental chemicals, mostly pesticidal actives having rich in vivo data from guideline studies that include chronic/cancer bioassays in mice and rats, multigenerational reproductive studies in rats, and prenatal developmental toxicity endpoints in rats and rabbits. The first phase of ToxCast was used to build models that aim to determine how well in vivo animal effects can be predicted solely from the in vitro data. Phase I is now complete and both the in vitro data (ToxCast) and anchoring in vivo database (ToxRefDB) have been made available to the public (http://actor.epa.gov/). As Phase II of ToxCast is now underway, the purpose of this chapter is to review progress to date with ToxCast predictive modeling, using specific examples on developmental and reproductive effects in rats and rabbits with lessons learned during Phase I.",2013-01-01 +25810432,"Integration of somatic mutation, expression and functional data reveals potential driver genes predictive of breast cancer survival.","

Motivation

Genome and transcriptome analyses can be used to explore cancers comprehensively, and it is increasingly common to have multiple omics data measured from each individual. Furthermore, there are rich functional data such as predicted impact of mutations on protein coding and gene/protein networks. However, integration of the complex information across the different omics and functional data is still challenging. Clinical validation, particularly based on patient outcomes such as survival, is important for assessing the relevance of the integrated information and for comparing different procedures.

Results

An analysis pipeline is built for integrating genomic and transcriptomic alterations from whole-exome and RNA sequence data and functional data from protein function prediction and gene interaction networks. The method accumulates evidence for the functional implications of mutated potential driver genes found within and across patients. A driver-gene score (DGscore) is developed to capture the cumulative effect of such genes. To contribute to the score, a gene has to be frequently mutated, with high or moderate mutational impact at protein level, exhibiting an extreme expression and functionally linked to many differentially expressed neighbors in the functional gene network. The pipeline is applied to 60 matched tumor and normal samples of the same patient from The Cancer Genome Atlas breast-cancer project. In clinical validation, patients with high DGscores have worse survival than those with low scores (P = 0.001). Furthermore, the DGscore outperforms the established expression-based signatures MammaPrint and PAM50 in predicting patient survival. In conclusion, integration of mutation, expression and functional data allows identification of clinically relevant potential driver genes in cancer.

Availability and implementation

The documented pipeline including annotated sample scripts can be found in http://fafner.meb.ki.se/biostatwiki/driver-genes/.

Contact

yudi.pawitan@ki.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-03-24 +24386107,Endogenous carbamylation of renal medullary proteins.,"Protein carbamylation is a post-translational modification that can occur in the presence of urea. In solution, urea is in equilibrium with ammonium cyanate, and carbamylation occurs when cyanate ions react with the amino groups of lysines, arginines, protein N-termini, as well as sulfhydryl groups of cysteines. The concentration of urea is elevated in the renal inner medulla compared with other tissues. Due to the high urea concentration, we hypothesized that carbamylation can occur endogenously within the rat inner medulla. Using immunoblotting of rat kidney cortical and medullary homogenates with a carbamyl-lysine specific antibody, we showed that carbamylation is present in a large number of inner medullary proteins. Using protein mass spectrometry (LC-MS/MS) of rat renal inner medulla, we identified 456 unique carbamylated sites in 403 proteins, including many that play important physiological roles in the renal medulla [Data can be accessed at https://helixweb.nih.gov/ESBL/Database/Carbamylation/Carbamylation_peptide_sorted.html]. We conclude that protein carbamylation occurs endogenously in the kidney, modifying many physiologically important proteins.",2013-12-26 +24343855,Antibacterial agents in composite restorations for the prevention of dental caries.,"

Background

Dental caries is a multifactorial disease in which the fermentation of food sugars by bacteria from the biofilm (dental plaque) leads to localised demineralisation of tooth surfaces, which may ultimately result in cavity formation. Resin composites are widely used in dentistry to restore teeth. These restorations can fail for a number of reasons, such as secondary caries, and restorative material fracture and other minor reasons. From these, secondary caries, which are caries lesions developed adjacent to restorations, is the main cause for restorations replacement. The presence of antibacterials in both the filling material and the bonding systems would theoretically be able to affect the initiation and progression of caries adjacent to restorations. This is an update of the Cochrane review published in 2009.

Objectives

To assess the effects of antibacterial agents incorporated into composite restorations for the prevention of dental caries.

Search methods

We searched the following electronic databases: the Cochrane Oral Health Group's Trials Register (to 23 July 2013), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library 2013, Issue 6), MEDLINE via OVID (1946 to 23 July 2013) and EMBASE via OVID (1980 to 23 July 2013). We searched the US National Institutes of Health Trials Register (http://clinicaltrials.gov), the metaRegister of Controlled Trials (www.controlled-trials.com) and the World Health Organization International Clinical Trials Registry platform (www.who.int/trialsearch) for ongoing trials. No restrictions were placed on the language or date of publication when searching the electronic databases.

Selection criteria

Randomised controlled trials comparing resin composite restorations containing antibacterial agents with composite restorations not containing antibacterial agents.

Data collection and analysis

Two review authors conducted screening of studies in duplicate and independently, and although no eligible trials were identified, the two authors had planned to extract data independently and assess trial quality using standard Cochrane Collaboration methodologies.

Main results

We retrieved 308 references to studies, none of which matched the inclusion criteria for this review and all of which were excluded.

Authors' conclusions

We were unable to identify any randomised controlled trials on the effects of antibacterial agents incorporated into composite restorations for the prevention of dental caries. The absence of high level evidence for the effectiveness of this intervention emphasises the need for well designed, adequately powered, randomised controlled clinical trials. Thus, conclusions remain the same as the previously published review, with no included clinical trials.",2013-12-17 +23161674,Voronoia4RNA--a database of atomic packing densities of RNA structures and their complexes.,"Voronoia4RNA (http://proteinformatics.charite.de/voronoia4rna/) is a structural database storing precalculated atomic volumes, atomic packing densities (PDs) and coordinates of internal cavities for currently 1869 RNAs and RNA-protein complexes. Atomic PDs are a measure for van der Waals interactions. Regions of low PD, containing water-sized internal cavities, refer to local structure flexibility or compressibility. RNA molecules build up the skeleton of large molecular machineries such as ribosomes or form smaller flexible structures such as riboswitches. The wealth of structural data on RNAs and their complexes allows setting up representative data sets and analysis of their structural features. We calculated atomic PDs from atomic volumes determined by the Voronoi cell method and internal cavities analytically by Delaunay triangulation. Reference internal PD values were derived from a non-redundant sub-data set of buried atoms. Comparison of internal PD values shows that RNA is more tightly packed than proteins. Finally, the relation between structure size, resolution and internal PD of the Voronoia4RNA entries is discussed. RNA, protein structures and their complexes can be visualized by the Jmol-based viewer Provi. Variations in PD are depicted by a color code. Internal cavities are represented by their molecular boundaries or schematically as balls.",2012-11-17 +26076068,Quest for Missing Proteins: Update 2015 on Chromosome-Centric Human Proteome Project.,"This paper summarizes the recent activities of the Chromosome-Centric Human Proteome Project (C-HPP) consortium, which develops new technologies to identify yet-to-be annotated proteins (termed ""missing proteins"") in biological samples that lack sufficient experimental evidence at the protein level for confident protein identification. The C-HPP also aims to identify new protein forms that may be caused by genetic variability, post-translational modifications, and alternative splicing. Proteogenomic data integration forms the basis of the C-HPP's activities; therefore, we have summarized some of the key approaches and their roles in the project. We present new analytical technologies that improve the chemical space and lower detection limits coupled to bioinformatics tools and some publicly available resources that can be used to improve data analysis or support the development of analytical assays. Most of this paper's content has been compiled from posters, slides, and discussions presented in the series of C-HPP workshops held during 2014. All data (posters, presentations) used are available at the C-HPP Wiki (http://c-hpp.webhosting.rug.nl/) and in the Supporting Information.",2015-07-23 +23161688,SwissBioisostere: a database of molecular replacements for ligand design.,"The SwissBioisostere database (http://www.swissbioisostere.ch) contains information on molecular replacements and their performance in biochemical assays. It is meant to provide researchers in drug discovery projects with ideas for bioisosteric modifications of their current lead molecule, as well as to give interested scientists access to the details on particular molecular replacements. As of August 2012, the database contains 21,293,355 datapoints corresponding to 5,586,462 unique replacements that have been measured in 35,039 assays against 1948 molecular targets representing 30 target classes. The accessible data were created through detection of matched molecular pairs and mining bioactivity data in the ChEMBL database. The SwissBioisostere database is hosted by the Swiss Institute of Bioinformatics and available via a web-based interface.",2012-11-17 +28890846,Prediction of endoplasmic reticulum resident proteins using fragmented amino acid composition and support vector machine.,"

Background

The endoplasmic reticulum plays an important role in many cellular processes, which includes protein synthesis, folding and post-translational processing of newly synthesized proteins. It is also the site for quality control of misfolded proteins and entry point of extracellular proteins to the secretory pathway. Hence at any given point of time, endoplasmic reticulum contains two different cohorts of proteins, (i) proteins involved in endoplasmic reticulum-specific function, which reside in the lumen of the endoplasmic reticulum, called as endoplasmic reticulum resident proteins and (ii) proteins which are in process of moving to the extracellular space. Thus, endoplasmic reticulum resident proteins must somehow be distinguished from newly synthesized secretory proteins, which pass through the endoplasmic reticulum on their way out of the cell. Approximately only 50% of the proteins used in this study as training data had endoplasmic reticulum retention signal, which shows that these signals are not essentially present in all endoplasmic reticulum resident proteins. This also strongly indicates the role of additional factors in retention of endoplasmic reticulum-specific proteins inside the endoplasmic reticulum.

Methods

This is a support vector machine based method, where we had used different forms of protein features as inputs for support vector machine to develop the prediction models. During training leave-one-out approach of cross-validation was used. Maximum performance was obtained with a combination of amino acid compositions of different part of proteins.

Results

In this study, we have reported a novel support vector machine based method for predicting endoplasmic reticulum resident proteins, named as ERPred. During training we achieved a maximum accuracy of 81.42% with leave-one-out approach of cross-validation. When evaluated on independent dataset, ERPred did prediction with sensitivity of 72.31% and specificity of 83.69%. We have also annotated six different proteomes to predict the candidate endoplasmic reticulum resident proteins in them. A webserver, ERPred, was developed to make the method available to the scientific community, which can be accessed at http://proteininformatics.org/mkumar/erpred/index.html.

Discussion

We found that out of 124 proteins of the training dataset, only 66 proteins had endoplasmic reticulum retention signals, which shows that these signals are not an absolute necessity for endoplasmic reticulum resident proteins to remain inside the endoplasmic reticulum. This observation also strongly indicates the role of additional factors in retention of proteins inside the endoplasmic reticulum. Our proposed predictor, ERPred, is a signal independent tool. It is tuned for the prediction of endoplasmic reticulum resident proteins, even if the query protein does not contain specific ER-retention signal.",2017-09-04 +23161678,Gene Ontology annotations and resources.,"The Gene Ontology (GO) Consortium (GOC, http://www.geneontology.org) is a community-based bioinformatics resource that classifies gene product function through the use of structured, controlled vocabularies. Over the past year, the GOC has implemented several processes to increase the quantity, quality and specificity of GO annotations. First, the number of manual, literature-based annotations has grown at an increasing rate. Second, as a result of a new 'phylogenetic annotation' process, manually reviewed, homology-based annotations are becoming available for a broad range of species. Third, the quality of GO annotations has been improved through a streamlined process for, and automated quality checks of, GO annotations deposited by different annotation groups. Fourth, the consistency and correctness of the ontology itself has increased by using automated reasoning tools. Finally, the GO has been expanded not only to cover new areas of biology through focused interaction with experts, but also to capture greater specificity in all areas of the ontology using tools for adding new combinatorial terms. The GOC works closely with other ontology developers to support integrated use of terminologies. The GOC supports its user community through the use of e-mail lists, social media and web-based resources.",2012-11-17 +21479735,XCEDE: an extensible schema for biomedical data.,"The XCEDE (XML-based Clinical and Experimental Data Exchange) XML schema, developed by members of the BIRN (Biomedical Informatics Research Network), provides an extensive metadata hierarchy for storing, describing and documenting the data generated by scientific studies. Currently at version 2.0, the XCEDE schema serves as a specification for the exchange of scientific data between databases, analysis tools, and web services. It provides a structured metadata hierarchy, storing information relevant to various aspects of an experiment (project, subject, protocol, etc.). Each hierarchy level also provides for the storage of data provenance information allowing for a traceable record of processing and/or changes to the underlying data. The schema is extensible to support the needs of various data modalities and to express types of data not originally envisioned by the developers. The latest version of the XCEDE schema and manual are available from http://www.xcede.org/ .",2012-01-01 +27189556,RiboDB Database: A Comprehensive Resource for Prokaryotic Systematics.,"Ribosomal proteins (r-proteins) are increasingly used as an alternative to ribosomal rRNA for prokaryotic systematics. However, their routine use is difficult because r-proteins are often not or wrongly annotated in complete genome sequences, and there is currently no dedicated exhaustive database of r-proteins. RiboDB aims at fulfilling this gap. This weekly updated comprehensive database allows the fast and easy retrieval of r-protein sequences from publicly available complete prokaryotic genome sequences. The current version of RiboDB contains 90 r-proteins from 3,750 prokaryotic complete genomes encompassing 38 phyla/major classes and 1,759 different species. RiboDB is accessible at http://ribodb.univ-lyon1.fr and through ACNUC interfaces.",2016-05-05 +27151202,CSM-lig: a web server for assessing and comparing protein-small molecule affinities.,"Determining the affinity of a ligand for a given protein is a crucial component of drug development and understanding their biological effects. Predicting binding affinities is a challenging and difficult task, and despite being regarded as poorly predictive, scoring functions play an important role in the analysis of molecular docking results. Here, we present CSM-Lig (http://structure.bioc.cam.ac.uk/csm_lig), a web server tailored to predict the binding affinity of a protein-small molecule complex, encompassing both protein and small-molecule complementarity in terms of shape and chemistry via graph-based structural signatures. CSM-Lig was trained and evaluated on different releases of the PDBbind databases, achieving a correlation of up to 0.86 on 10-fold cross validation and 0.80 in blind tests, performing as well as or better than other widely used methods. The web server allows users to rapidly and automatically predict binding affinities of collections of structures and assess the interactions made. We believe CSM-lig would be an invaluable tool for helping assess docking poses, the effects of multiple mutations, including insertions, deletions and alternative splicing events, in protein-small molecule affinity, unraveling important aspects that drive protein-compound recognition.",2016-05-05 +28751859,"Probabilistic White Matter Atlases of Human Auditory, Basal Ganglia, Language, Precuneus, Sensorimotor, Visual and Visuospatial Networks.","Background: Despite the popularity of functional connectivity analyses and the well-known topology of several intrinsic cortical networks, relatively little is known about the white matter regions (i.e., structural connectivity) underlying these networks. In the current study, we have therefore performed fMRI-guided diffusion tensor imaging (DTI) tractography to create probabilistic white matter atlases for eight previously identified functional brain networks, including the Auditory, Basal Ganglia, Language, Precuneus, Sensorimotor, Primary Visual, Higher Visual and Visuospatial Networks. Methods: Whole-brain diffusion imaging data were acquired from a cohort of 32 healthy volunteers, and were warped to the ICBM template using a two-stage, high-dimensional, non-linear spatial normalization procedure. Deterministic tractography, with fractional anisotropy (FA) ≥0.15 and deviation angle <50°, was then performed using the Fiber Association by Continuous Tracking (FACT) algorithm, and a multi-ROI approach to identify tracts of interest. Regions-of-interest (ROIs) for each of the eight networks were taken from a pre-existing atlas of functionally defined regions to explore all ROI-to-ROI connections within each network, and all resulting streamlines were saved as binary masks to create probabilistic atlases (across participants) for tracts between each ROI-to-ROI pair. Results: The resulting functionally-defined white matter atlases (i.e., for each tract and each network as a whole) were saved as NIFTI images in stereotaxic ICBM coordinates, and have been added to the UManitoba-JHU Functionally-Defined Human White Matter Atlas (http://www.nitrc.org/projects/uofm_jhu_atlas/). Conclusion: To the best of our knowledge, this work represents the first attempt to comprehensively identify and map white matter connectomes for the Auditory, Basal Ganglia, Language, Precuneus, Sensorimotor, Primary Visual, Higher Visual and Visuospatial Networks. Therefore, the resulting probabilistic atlases represent a unique tool for future neuroimaging studies wishing to ascribe voxel-wise or ROI-based changes (i.e., in DTI or other quantitative white matter imaging signals) to these functional brain networks.",2017-06-19 +23275696,DB Dehydrogenase: an online integrated structural database on enzyme dehydrogenase.,"

Unlabelled

Dehydrogenase enzymes are almost inevitable for metabolic processes. Shortage or malfunctioning of dehydrogenases often leads to several acute diseases like cancers, retinal diseases, diabetes mellitus, Alzheimer, hepatitis B & C etc. With advancement in modern-day research, huge amount of sequential, structural and functional data are generated everyday and widens the gap between structural attributes and its functional understanding. DB Dehydrogenase is an effort to relate the functionalities of dehydrogenase with its structures. It is a completely web-based structural database, covering almost all dehydrogenases [~150 enzyme classes, ~1200 entries from ~160 organisms] whose structures are known. It is created by extracting and integrating various online resources to provide the true and reliable data and implemented by MySQL relational database through user friendly web interfaces using CGI Perl. Flexible search options are there for data extraction and exploration. To summarize, sequence, structure, function of all dehydrogenases in one place along with the necessary option of cross-referencing; this database will be utile for researchers to carry out further work in this field.

Availability

The database is available for free at http://www.bifku.in/DBD/",2012-10-13 +28043950,Freeze-quenched maize mesophyll and bundle sheath separation uncovers bias in previous tissue-specific RNA-Seq data.,"The high efficiency of C4 photosynthesis relies on spatial division of labor, classically with initial carbon fixation in the mesophyll and carbon reduction in the bundle sheath. By employing grinding and serial filtration over liquid nitrogen, we enriched C4 tissues along a developing leaf gradient. This method treats both C4 tissues in an integrity-preserving and consistent manner, while allowing complementary measurements of metabolite abundance and enzyme activity, thus providing a comprehensive data set. Meta-analysis of this and the previous studies highlights the strengths and weaknesses of different C4 tissue separation techniques. While the method reported here achieves the least enrichment, it is the only one that shows neither strong 3' (degradation) bias, nor different severity of 3' bias between samples. The meta-analysis highlighted previously unappreciated observations, such as an accumulation of evidence that aspartate aminotransferase is more mesophyll specific than expected from the current NADP-ME C4 cycle model, and a shift in enrichment of protein synthesis genes from bundle sheath to mesophyll during development. The full comparative dataset is available for download, and a web visualization tool (available at http://www.plant-biochemistry.hhu.de/resources.html) facilitates comparison of the the Z. mays bundle sheath and mesophyll studies, their consistencies and their conflicts.",2017-01-02 +22465442,EmaxDB: Availability of a first draft genome sequence for the apicomplexan Eimeria maxima.,"Apicomplexan parasites are serious pathogens of animals and man that cause diseases including coccidiosis, malaria and toxoplasmosis. The importance of these parasites has prompted the establishment of genomic resources in support of developing effective control strategies. For the Eimeria species resources have developed most rapidly for the reference Eimeria tenella Houghton strain (http://www.genedb.org/Homepage/Etenella). The value of these resources can be enhanced by comparison with related parasites. The well characterised immunogenicity and genetic diversity associated with Eimeria maxima promote its use in genetics-led studies on coccidiosis and recommended its selection for sequencing. Using a combination of sequencing technologies a first draft assembly and annotation has been produced for an E. maxima Houghton strain-derived clone (EmaxDB; http://www.genomemalaysia.gov.my/emaxdb/). The assembly of a draft genome sequence for E. maxima provides a resource for comparative studies with Eimeria and related parasites as demonstrated here through the identification of genes predicted to encode microneme proteins in E. maxima.",2012-03-23 +28506979,Impact of Ezetimibe on the Rate of Cardiovascular-Related Hospitalizations and Associated Costs Among Patients With a Recent Acute Coronary Syndrome: Results From the IMPROVE-IT Trial (Improved Reduction of Outcomes: Vytorin Efficacy International Trial). ,"Ezetimibe, when added to simvastatin therapy, reduces cardiovascular events after recent acute coronary syndrome. However, the impact of ezetimibe on cardiovascular-related hospitalizations and associated costs is unknown. We used patient-level data from the IMPROVE-IT (Improved Reduction of Outcomes: Vytorin Efficacy International Trial) to examine the impact of simvastatin-ezetimibe versus simvastatin-placebo on cardiovascular-related hospitalizations and related costs (excluding drug costs) over 7 years follow-up. Medicare Severity-Diagnosis Related Groups were assigned to all cardiovascular hospitalizations. Hospital costs were estimated using Medicare reimbursement rates for 2013. Associated physician costs were estimated as a percentage of hospital costs. The impact of treatment assignment on hospitalization rates and costs was estimated using Poisson and linear regression, respectively. There was a significantly lower cardiovascular hospitalization rate with ezetimibe compared with placebo (risk ratio, 0.95; 95% confidence interval, 0.90-0.99; P=0.031), mainly attributable to fewer hospitalizations for percutaneous coronary intervention, angina, and stroke. Consequently, cardiovascular-related hospitalization costs over 7 years were $453 per patient lower with ezetimibe (95% confidence interval, -$38 to -$869; P=0.030). Although all prespecified subgroups had lower cost with ezetimibe therapy, patients with diabetes mellitus, patients aged ≥75 years, and patients at higher predicted risk for recurrent ischemic events had even greater cost offsets. Addition of ezetimibe to statin therapy in patients with a recent acute coronary syndrome leads to reductions in cardiovascular-related hospitalizations and associated costs, with the greatest cost offsets in high-risk patients. These cost reductions may completely offset the cost of the drug once ezetimibe becomes generic, and may lead to cost savings from the perspective of the healthcare system, if treatment with ezetimibe is targeted to high-risk patients. URL: https://www.clinicaltrials.gov. Unique Identifier: NCT00202878.",2017-05-01 +27164589,CVD2014-A Database for Evaluating No-Reference Video Quality Assessment Algorithms.,"In this paper, we present a new video database: CVD2014-Camera Video Database. In contrast to previous video databases, this database uses real cameras rather than introducing distortions via post-processing, which results in a complex distortion space in regard to the video acquisition process. CVD2014 contains a total of 234 videos that are recorded using 78 different cameras. Moreover, this database contains the observer-specific quality evaluation scores rather than only providing mean opinion scores. We have also collected open-ended quality descriptions that are provided by the observers. These descriptions were used to define the quality dimensions for the videos in CVD2014. The dimensions included sharpness, graininess, color balance, darkness, and jerkiness. At the end of this paper, a performance study of image and video quality algorithms for predicting the subjective video quality is reported. For this performance study, we proposed a new performance measure that accounts for observer variance. The performance study revealed that there is room for improvement regarding the video quality assessment algorithms. The CVD2014 video database has been made publicly available for the research community. All video sequences and corresponding subjective ratings can be obtained from the CVD2014 project page (http://www.helsinki.fi/psychology/groups/visualcognition/).",2016-05-03 +26130132,BioMaS: a modular pipeline for Bioinformatic analysis of Metagenomic AmpliconS.,"

Background

Substantial advances in microbiology, molecular evolution and biodiversity have been carried out in recent years thanks to Metagenomics, which allows to unveil the composition and functions of mixed microbial communities in any environmental niche. If the investigation is aimed only at the microbiome taxonomic structure, a target-based metagenomic approach, here also referred as Meta-barcoding, is generally applied. This approach commonly involves the selective amplification of a species-specific genetic marker (DNA meta-barcode) in the whole taxonomic range of interest and the exploration of its taxon-related variants through High-Throughput Sequencing (HTS) technologies. The accessibility to proper computational systems for the large-scale bioinformatic analysis of HTS data represents, currently, one of the major challenges in advanced Meta-barcoding projects.

Results

BioMaS (Bioinformatic analysis of Metagenomic AmpliconS) is a new bioinformatic pipeline designed to support biomolecular researchers involved in taxonomic studies of environmental microbial communities by a completely automated workflow, comprehensive of all the fundamental steps, from raw sequence data upload and cleaning to final taxonomic identification, that are absolutely required in an appropriately designed Meta-barcoding HTS-based experiment. In its current version, BioMaS allows the analysis of both bacterial and fungal environments starting directly from the raw sequencing data from either Roche 454 or Illumina HTS platforms, following two alternative paths, respectively. BioMaS is implemented into a public web service available at https://recasgateway.ba.infn.it/ and is also available in Galaxy at http://galaxy.cloud.ba.infn.it:8080 (only for Illumina data).

Conclusion

BioMaS is a friendly pipeline for Meta-barcoding HTS data analysis specifically designed for users without particular computing skills. A comparative benchmark, carried out by using a simulated dataset suitably designed to broadly represent the currently known bacterial and fungal world, showed that BioMaS outperforms QIIME and MOTHUR in terms of extent and accuracy of deep taxonomic sequence assignments.",2015-07-01 +27147719,Cutting to the Core of the Issue: Emerging Strategies To Reduce Prostate Biopsy-Related Infections.,"Over 1 million men undergo biopsy in the United States each year to evaluate for prostate cancer (S. Loeb, H. B. Carter, S. I. Berndt, W. Ricker, and E. M. Schaeffer, J Urol 186:1830-1834, 2011, http://dx.doi.org/10.1016/j.juro.2011.06.057). In recent years, there has been a rise in infectious complications related to these procedures. This review aims to provide an overview of the guidelines that direct transrectal prostate biopsy, to describe associated infection, and to evaluate the published data driving the current trend toward prebiopsy screening for resistant organisms.",2016-05-04 +27357721,PhyloToAST: Bioinformatics tools for species-level analysis and visualization of complex microbial datasets.,"The 16S rRNA gene is widely used for taxonomic profiling of microbial ecosystems; and recent advances in sequencing chemistry have allowed extremely large numbers of sequences to be generated from minimal amounts of biological samples. Analysis speed and resolution of data to species-level taxa are two important factors in large-scale explorations of complex microbiomes using 16S sequencing. We present here new software, Phylogenetic Tools for Analysis of Species-level Taxa (PhyloToAST), that completely integrates with the QIIME pipeline to improve analysis speed, reduce primer bias (requiring two sequencing primers), enhance species-level analysis, and add new visualization tools. The code is free and open source, and can be accessed at http://phylotoast.org.",2016-06-30 +27259542,Group-combined P-values with applications to genetic association studies.,"

Motivation

In large-scale genetic association studies with tens of hundreds of single nucleotide polymorphisms (SNPs) genotyped, the traditional statistical framework of logistic regression using maximum likelihood estimator (MLE) to infer the odds ratios of SNPs may not work appropriately. This is because a large number of odds ratios need to be estimated, and the MLEs may be not stable when some of the SNPs are in high linkage disequilibrium. Under this situation, the P-value combination procedures seem to provide good alternatives as they are constructed on the basis of single-marker analysis.

Results

The commonly used P-value combination methods (such as the Fisher's combined test, the truncated product method, the truncated tail strength and the adaptive rank truncated product) may lose power when the significance level varies across SNPs. To tackle this problem, a group combined P-value method (GCP) is proposed, where the P-values are divided into multiple groups and then are combined at the group level. With this strategy, the significance values are integrated at different levels, and the power is improved. Simulation shows that the GCP can effectively control the type I error rates and have additional power over the existing methods-the power increase can be as high as over 50% under some situations. The proposed GCP method is applied to data from the Genetic Analysis Workshop 16. Among all the methods, only the GCP and ARTP can give the significance to identify a genomic region covering gene DSC3 being associated with rheumatoid arthritis, but the GCP provides smaller P-value.

Availability and implementation

http://www.statsci.amss.ac.cn/yjscy/yjy/lqz/201510/t20151027_313273.html

Contact

liqz@amss.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-03 +27357172,Accounting for pairwise distance restraints in FFT-based protein-protein docking.,"ClusPro is a heavily used protein-protein docking server based on the fast Fourier transform (FFT) correlation approach. While FFT enables global docking, accounting for pairwise distance restraints using penalty terms in the scoring function is computationally expensive. We use a different approach and directly select low energy solutions that also satisfy the given restraints. As expected, accounting for restraints generally improves the rank of near native predictions, while retaining or even improving the numerical efficiency of FFT based docking.

Availability and implementation

The software is freely available as part of the ClusPro web-based server at http://cluspro.org/nousername.php CONTACT: midas@laufercenter.org or vajda@bu.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-29 +26566394,CyNetworkBMA: a Cytoscape app for inferring gene regulatory networks.,"

Background

Inference of gene networks from expression data is an important problem in computational biology. Many algorithms have been proposed for solving the problem efficiently. However, many of the available implementations are programming libraries that require users to write code, which limits their accessibility.

Results

We have developed a tool called CyNetworkBMA for inferring gene networks from expression data that integrates with Cytoscape. Our application offers a graphical user interface for networkBMA, an efficient implementation of Bayesian Model Averaging methods for network construction. The client-server architecture of CyNetworkBMA makes it possible to distribute or centralize computation depending on user needs.

Conclusions

CyNetworkBMA is an easy-to-use tool that makes network inference accessible to non-programmers through seamless integration with Cytoscape. CyNetworkBMA is available on the Cytoscape App Store at http://apps.cytoscape.org/apps/cynetworkbma.",2015-11-11 +27986495,"Current strategies for the long-term assessment, monitoring, and management of cystic fibrosis patients treated with CFTR modulator therapy.","The content for this activity is based on the satellite symposium, ""Current Strategies for the Long-term Assessment, Monitoring, and Management for Cystic Fibrosis Patients Treated with CFTR Modulator Therapy"" that was presented at the 39th European Cystic Fibrosis Society Conference on June 10, 2016 (Online access: http://courses.elseviercme.com/ecfs2016e/619e). The emergence of novel targeted agents, that directly correct CFTR loss function alleles, has created new treatment opportunities for patients with cystic fibrosis with advanced disease. Knowledge of the role of these agents in the clinical setting is quickly evolving and will require physicians to stay acquainted with the latest data as well as evidence-based treatment guidelines in order to achieve optimized cystic fibrosis patient care. Ideally, after diagnosis, a personalized approach would be adapted and tailored to the patient through genome-informed medicine. However, due to the relative recentness of genomic-based therapeutics, physicians may have a limited knowledge base regarding these new treatment options and how to best incorporate these agents into patient management plans. Although cystic fibrosis is still largely regarded as a pediatric disease, the median survival for patients is 35years of age. Consequently, pediatric-to-adult cystic fibrosis care programs would allow suitable preparation time for this transition and develop a standardized group of self-care and management skills.",2016-12-13 +28669936,Exposure to Traffic-Related Air Pollution and Serum Inflammatory Cytokines in Children.,"

Background

Long-term exposure to ambient air pollution can lead to adverse health effects in children; however, underlying biological mechanisms are not fully understood.

Objectives

We evaluated the effect of air pollution exposure during different time periods on mRNA expression as well as circulating levels of inflammatory cytokines in children.

Methods

We measured a panel of 10 inflammatory markers in peripheral blood samples from 670 8-y-old children in the Barn/Child, Allergy, Milieu, Stockholm, Epidemiology (BAMSE) birth cohort. Outdoor concentrations of nitrogen dioxide (NO2) and particulate matter (PM) with aerodynamic diameter <10 μm (PM10) from road traffic were estimated for residential, daycare, and school addresses using dispersion modeling. Time-weighted average exposures during infancy and at biosampling were linked to serum cytokine levels using linear regression analysis. Furthermore, gene expression data from 16-year-olds in BAMSE (n=238) were used to evaluate links between air pollution exposure and expression of genes coding for the studied inflammatory markers.

Results

A 10 μg/m3 increase of NO2 exposure during infancy was associated with a 13.6% (95% confidence interval (CI): 0.8; 28.1%) increase in interleukin-6 (IL-6) levels, as well as with a 27.8% (95% CI: 4.6, 56.2%) increase in IL-10 levels, the latter limited to children with asthma. However, no clear associations were observed for current exposure. Results were similar using PM10, which showed a high correlation with NO2. The functional analysis identified several differentially expressed genes in response to air pollution exposure during infancy, including IL10, IL13, and TNF;.

Conclusion

Our results indicate alterations in systemic inflammatory markers in 8-y-old children in relation to early-life exposure to traffic-related air pollution. https://doi.org/10.1289/EHP460.",2017-06-16 +22426342,BioQ: tracing experimental origins in public genomic databases using a novel data provenance model.,"

Unlabelled

Public genomic databases, which are often used to guide genetic studies of human disease, are now being applied to genomic medicine through in silico integrative genomics. These databases, however, often lack tools for systematically determining the experimental origins of the data.

Results

We introduce a new data provenance model that we have implemented in a public web application, BioQ, for assessing the reliability of the data by systematically tracing its experimental origins to the original subjects and biologics. BioQ allows investigators to both visualize data provenance as well as explore individual elements of experimental process flow using precise tools for detailed data exploration and documentation. It includes a number of human genetic variation databases such as the HapMap and 1000 Genomes projects.

Availability and implementation

BioQ is freely available to the public at http://bioq.saclab.net.",2012-03-16 +25540184,J-Circos: an interactive Circos plotter.,"

Summary

Circos plots are graphical outputs that display three dimensional chromosomal interactions and fusion transcripts. However, the Circos plot tool is not an interactive visualization tool, but rather a figure generator. For example, it does not enable data to be added dynamically nor does it provide information for specific data points interactively. Recently, an R-based Circos tool (RCircos) has been developed to integrate Circos to R, but similarly, Rcircos can only be used to generate plots. Thus, we have developed a Circos plot tool (J-Circos) that is an interactive visualization tool that can plot Circos figures, as well as being able to dynamically add data to the figure, and providing information for specific data points using mouse hover display and zoom in/out functions. J-Circos uses the Java computer language to enable, it to be used on most operating systems (Windows, MacOS, Linux). Users can input data into J-Circos using flat data formats, as well as from the Graphical user interface (GUI). J-Circos will enable biologists to better study more complex chromosomal interactions and fusion transcripts that are otherwise difficult to visualize from next-generation sequencing data.

Availability and implementation

J-circos and its manual are freely available at http://www.australianprostatecentre.org/research/software/jcircos

Contact

j.an@qut.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-24 +27131070,Drug-induced Brugada syndrome: Clinical characteristics and risk factors.,"

Background

Cardiac arrest may result from seemingly innocuous medications that do not necessarily have cardiac indications. The best-known example is the drug-induced long QT syndrome. A less known but not necessarily less important form of drug-induced proarrhythmia is the drug-induced Brugada syndrome.

Objective

The purpose of this study was to identify clinical and ECG risk markers for drug-induced Brugada syndrome.

Methods

Reports of drug-induced Brugada syndrome recounted by an international database (http://www.brugadadrugs.org) were reviewed to define characteristics that identify patients prone to developing this complication. For each patient with drug-induced Brugada syndrome who had an ECG recorded in the absence of drugs, we included 5 healthy controls matched by gender and age. All ECGs were evaluated for Brugada-like abnormalities.

Results

Seventy-four cases of drug-induced Brugada syndrome from noncardiac medications were identified: 77% were male, and drug toxicity was involved in 46%. Drug-induced Brugada syndrome from oral medications generally occurred weeks after the initiation of therapy. Mortality was 13%. By definition, all cases had a type I Brugada pattern during drug therapy. Nevertheless, their ECG in the absence of drugs was more frequently abnormal than the ECG of controls (56% vs 33%, P = .04).

Conclusion

Drug-induced Brugada syndrome from noncardiac drugs occurs predominantly in adult males, is frequently due to drug toxicity, and occurs late after the onset of therapy. Minor changes are frequently noticeable on baseline ECG, but screening is impractical because of a prohibitive false-positive rate.",2016-05-01 +26249809,ASSIsT: an automatic SNP scoring tool for in- and outbreeding species.,"

Unlabelled

ASSIsT (Automatic SNP ScorIng Tool) is a user-friendly customized pipeline for efficient calling and filtering of SNPs from Illumina Infinium arrays, specifically devised for custom genotyping arrays. Illumina has developed an integrated software for SNP data visualization and inspection called GenomeStudio (GS). ASSIsT builds on GS-derived data and identifies those markers that follow a bi-allelic genetic model and show reliable genotype calls. Moreover, ASSIsT re-edits SNP calls with null alleles or additional SNPs in the probe annealing site. ASSIsT can be employed in the analysis of different population types such as full-sib families and mating schemes used in the plant kingdom (backcross, F1, F2), and unrelated individuals. The final result can be directly exported in the format required by the most common software for genetic mapping and marker-trait association analysis. ASSIsT is developed in Python and runs in Windows and Linux.

Availability and implementation

The software, example data sets and tutorials are freely available at http://compbiotoolbox.fmach.it/assist/.

Contact

eric.vandeweg@wur.nl.",2015-08-06 +26338769,Discovering hospital admission patterns using models learnt from electronic hospital records.,"

Motivation

Electronic medical records, nowadays routinely collected in many developed countries, open a new avenue for medical knowledge acquisition. In this article, this vast amount of information is used to develop a novel model for hospital admission type prediction.

Results

I introduce a novel model for hospital admission-type prediction based on the representation of a patient's medical history in the form of a binary history vector. This representation is motivated using empirical evidence from previous work and validated using a large data corpus of medical records from a local hospital. The proposed model allows exploration, visualization and patient-specific prognosis making in an intuitive and readily understood manner. Its power is demonstrated using a large, real-world data corpus collected by a local hospital on which it is shown to outperform previous state-of-the-art in the literature, achieving over 82% accuracy in the prediction of the first future diagnosis. The model was vastly superior for long-term prognosis as well, outperforming previous work in 82% of the cases, while producing comparable performance in the remaining 18% of the cases.

Availability and implementation

Full Matlab source code is freely available for download at: http://ognjen-arandjelovic.t15.org/data/dprog.zip.",2015-09-03 +26357328,RBioCloud: A Light-Weight Framework for Bioconductor and R-based Jobs on the Cloud.,"Large-scale ad hoc analytics of genomic data is popular using the R-programming language supported by over 700 software packages provided by Bioconductor. More recently, analytical jobs are benefitting from on-demand computing and storage, their scalability and their low maintenance cost, all of which are offered by the cloud. While biologists and bioinformaticists can take an analytical job and execute it on their personal workstations, it remains challenging to seamlessly execute the job on the cloud infrastructure without extensive knowledge of the cloud dashboard. How analytical jobs can not only with minimum effort be executed on the cloud, but also how both the resources and data required by the job can be managed is explored in this paper. An open-source light-weight framework for executing R-scripts using Bioconductor packages, referred to as `RBioCloud', is designed and developed. RBioCloud offers a set of simple command-line tools for managing the cloud resources, the data and the execution of the job. Three biological test cases validate the feasibility of RBioCloud. The framework is available from http://www.rbiocloud.com.",2015-07-01 +22554788,Directly e-mailing authors of newly published papers encourages community curation.,"Much of the data within Model Organism Databases (MODs) comes from manual curation of the primary research literature. Given limited funding and an increasing density of published material, a significant challenge facing all MODs is how to efficiently and effectively prioritize the most relevant research papers for detailed curation. Here, we report recent improvements to the triaging process used by FlyBase. We describe an automated method to directly e-mail corresponding authors of new papers, requesting that they list the genes studied and indicate ('flag') the types of data described in the paper using an online tool. Based on the author-assigned flags, papers are then prioritized for detailed curation and channelled to appropriate curator teams for full data extraction. The overall response rate has been 44% and the flagging of data types by authors is sufficiently accurate for effective prioritization of papers. In summary, we have established a sustainable community curation program, with the result that FlyBase curators now spend less time triaging and can devote more effort to the specialized task of detailed data extraction. Database URL: http://flybase.org/",2012-05-02 +27636407,Biological Therapy in Pediatric Inflammatory Bowel Disease: A Systematic Review.,"The incidence of inflammatory bowel disease (IBD) has increased steadily worldwide, both in adult and in children; approximately 25% of IBD patients are diagnosed before the age of 18. The natural history of IBD is usually more severe in children than in adults, and can be associated with linear growth impairment, delayed puberty onset, reduced bone mass index, malnutrition, and the need for surgery. Biological therapies, especially blocking tumor necrosis factor-α (TNFα), have radically modified the treatment strategies and disease course of IBD in children. In particular, drugs such as Infliximab and Adalimumab are routinely used in the treatment of pediatric IBD. The role of Infliximab and Adalimumab in the management of pediatric IBD has been recently updated in the Consensus guidelines of ECCO/ESPGHAN. Data regarding short-term and long-term efficacy and safety of these drugs in children, and the effects of ""top-down"" and ""step-up"" strategies, are lacking. In this paper, the authors will review current indications, efficacy, and safety of biological therapy in pediatric IBD patients, evaluating all articles published after ECCO/ESPGHAN guidelines publication. The authors carried out a systematic search through MEDLINE through PubMed (http://www.ncbi.nlm.nih.gov/pubmed/) Embase, CINAHL, Cochrane Library, and gray literature, from January 2013 to January 2016. Anti-TNFα has been shown to be effective and safe to maintain remission and to achieve mucosal healing. Multicenter trials based on large sample size cohorts are needed to better clarify long-term efficacy of anti-TNFα and the real incidence of treatment-related complications in pediatric IBD.",2017-02-01 +27307615,DrugE-Rank: improving drug-target interaction prediction of new candidate drugs or targets by ensemble learning to rank.,"

Motivation

Identifying drug-target interactions is an important task in drug discovery. To reduce heavy time and financial cost in experimental way, many computational approaches have been proposed. Although these approaches have used many different principles, their performance is far from satisfactory, especially in predicting drug-target interactions of new candidate drugs or targets.

Methods

Approaches based on machine learning for this problem can be divided into two types: feature-based and similarity-based methods. Learning to rank is the most powerful technique in the feature-based methods. Similarity-based methods are well accepted, due to their idea of connecting the chemical and genomic spaces, represented by drug and target similarities, respectively. We propose a new method, DrugE-Rank, to improve the prediction performance by nicely combining the advantages of the two different types of methods. That is, DrugE-Rank uses LTR, for which multiple well-known similarity-based methods can be used as components of ensemble learning.

Results

The performance of DrugE-Rank is thoroughly examined by three main experiments using data from DrugBank: (i) cross-validation on FDA (US Food and Drug Administration) approved drugs before March 2014; (ii) independent test on FDA approved drugs after March 2014; and (iii) independent test on FDA experimental drugs. Experimental results show that DrugE-Rank outperforms competing methods significantly, especially achieving more than 30% improvement in Area under Prediction Recall curve for FDA approved new drugs and FDA experimental drugs.

Availability

http://datamining-iip.fudan.edu.cn/service/DrugE-Rank

Contact

zhusf@fudan.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +24478461,Mutational landscape of the essential autophagy gene BECN1 in human cancers.,"

Unlabelled

Evidence suggests that the catabolic process of macroautophagy (autophagy hereafter) can either suppress or promote cancer. The essential autophagy gene ATG6/BECN1 encoding the Beclin1 protein has been implicated as a haploinsufficient tumor suppressor in breast, ovarian, and prostate cancers. The proximity of BECN1 to the known breast and ovarian tumor suppressor breast cancer 1, early onset, BRCA1, on chromosome 17q21, has made this determination equivocal. Here, the mutational status of BECN1 was assessed in human tumor sequencing data from The Cancer Genome Atlas (TCGA) and other databases. Large deletions encompassing both BRCA1 and BECN1, and deletions of only BRCA1 but not BECN1, were found in breast and ovarian cancers, consistent with BRCA1 loss being a primary driver mutation in these cancers. Furthermore, there was no evidence for BECN1 mutation or loss in any other cancer, casting doubt on whether BECN1 is a tumor suppressor in most human cancers.

Implications

Contrary to previous reports, BECN1 is not significantly mutated in human cancer and not a tumor-suppressor gene, as originally thought. VISUAL OVERVIEW: http://mcr.aacrjournals.org/content/early/2014/04/01/1541-7786.MCR-13-0614/F1.large.jpg.",2014-01-29 +25726569,Development of data representation standards by the human proteome organization proteomics standards initiative.,"

Objective

To describe the goals of the Proteomics Standards Initiative (PSI) of the Human Proteome Organization, the methods that the PSI has employed to create data standards, the resulting output of the PSI, lessons learned from the PSI's evolution, and future directions and synergies for the group.

Materials and methods

The PSI has 5 categories of deliverables that have guided the group. These are minimum information guidelines, data formats, controlled vocabularies, resources and software tools, and dissemination activities. These deliverables are produced via the leadership and working group organization of the initiative, driven by frequent workshops and ongoing communication within the working groups. Official standards are subjected to a rigorous document process that includes several levels of peer review prior to release.

Results

We have produced and published minimum information guidelines describing what information should be provided when making data public, either via public repositories or other means. The PSI has produced a series of standard formats covering mass spectrometer input, mass spectrometer output, results of informatics analysis (both qualitative and quantitative analyses), reports of molecular interaction data, and gel electrophoresis analyses. We have produced controlled vocabularies that ensure that concepts are uniformly annotated in the formats and engaged in extensive software development and dissemination efforts so that the standards can efficiently be used by the community.Conclusion In its first dozen years of operation, the PSI has produced many standards that have accelerated the field of proteomics by facilitating data exchange and deposition to data repositories. We look to the future to continue developing standards for new proteomics technologies and workflows and mechanisms for integration with other omics data types. Our products facilitate the translation of genomics and proteomics findings to clinical and biological phenotypes. The PSI website can be accessed at http://www.psidev.info.",2015-02-28 +28073754,Fast and accurate phylogeny reconstruction using filtered spaced-word matches.,"

Motivation

Word-based or 'alignment-free' algorithms are increasingly used for phylogeny reconstruction and genome comparison, since they are much faster than traditional approaches that are based on full sequence alignments. Existing alignment-free programs, however, are less accurate than alignment-based methods.

Results

We propose Filtered Spaced Word Matches (FSWM) , a fast alignment-free approach to estimate phylogenetic distances between large genomic sequences. For a pre-defined binary pattern of match and don't-care positions, FSWM rapidly identifies spaced word-matches between input sequences, i.e. gap-free local alignments with matching nucleotides at the match positions and with mismatches allowed at the don't-care positions. We then estimate the number of nucleotide substitutions per site by considering the nucleotides aligned at the don't-care positions of the identified spaced-word matches. To reduce the noise from spurious random matches, we use a filtering procedure where we discard all spaced-word matches for which the overall similarity between the aligned segments is below a threshold. We show that our approach can accurately estimate substitution frequencies even for distantly related sequences that cannot be analyzed with existing alignment-free methods; phylogenetic trees constructed with FSWM distances are of high quality. A program run on a pair of eukaryotic genomes of a few hundred Mb each takes a few minutes.

Availability and implementation

The program source code for FSWM including a documentation, as well as the software that we used to generate artificial genome sequences are freely available at http://fswm.gobics.de/.

Contact

chris.leimeister@stud.uni-goettingen.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +26338770,Benchmark analysis of algorithms for determining and quantifying full-length mRNA splice forms from RNA-seq data.,"

Motivation

Because of the advantages of RNA sequencing (RNA-Seq) over microarrays, it is gaining widespread popularity for highly parallel gene expression analysis. For example, RNA-Seq is expected to be able to provide accurate identification and quantification of full-length splice forms. A number of informatics packages have been developed for this purpose, but short reads make it a difficult problem in principle. Sequencing error and polymorphisms add further complications. It has become necessary to perform studies to determine which algorithms perform best and which if any algorithms perform adequately. However, there is a dearth of independent and unbiased benchmarking studies. Here we take an approach using both simulated and experimental benchmark data to evaluate their accuracy.

Results

We conclude that most methods are inaccurate even using idealized data, and that no method is highly accurate once multiple splice forms, polymorphisms, intron signal, sequencing errors, alignment errors, annotation errors and other complicating factors are present. These results point to the pressing need for further algorithm development.

Availability and implementation

Simulated datasets and other supporting information can be found at http://bioinf.itmat.upenn.edu/BEERS/bp2.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-03 +28172617,iATC-mISF: a multi-label classifier for predicting the classes of anatomical therapeutic chemicals.,"

Motivation

Given a compound, can we predict which anatomical therapeutic chemical (ATC) class/classes it belongs to? It is a challenging problem since the information thus obtained can be used to deduce its possible active ingredients, as well as its therapeutic, pharmacological and chemical properties. And hence the pace of drug development could be substantially expedited. But this problem is by no means an easy one. Particularly, some drugs or compounds may belong to two or more ATC classes.

Results

To address it, a multi-label classifier, called iATC-mISF, was developed by incorporating the information of chemical–chemical interaction, the information of the structural similarity, and the information of the fingerprintal similarity. Rigorous cross-validations showed that the proposed predictor achieved remarkably higher prediction quality than its cohorts for the same purpose, particularly in the absolute true rate, the most important and harsh metrics for the multi-label systems.

Availability and implementation

The web-server for iATC-mISF is accessible at http://www.jci-bioinfo.cn/iATC-mISF. Furthermore, to maximize the convenience for most experimental scientists, a step-by-step guide was provided, by which users can easily get their desired results without needing to go through the complicated mathematical equations. Their inclusion in this article is just for the integrity of the new method and stimulating more powerful methods to deal with various multi-label systems in biology.

Contact

xxiao@gordonlifescience.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-02-01 +28854979,The effect of exercise training on clinical outcomes in patients with the metabolic syndrome: a systematic review and meta-analysis.,"

Background

Purpose: to establish if exercise training improves clinical outcomes in people with metabolic syndrome (MetS). Registered with PROSPERO international prospective register of systematic reviews ( https://www.crd.york.ac.uk/PROSPERO/Identifier:CRD42017055491 ).

Data sources

studies were identified through a MEDLINE search strategy (1985 to Jan 12, 2017), Cochrane controlled trials registry, CINAHL and SPORTDiscus.

Study selection

prospective randomized or controlled trials of exercise training in humans with metabolic syndrome, lasting 12 weeks or more.

Results

We included 16 studies with 23 intervention groups; 77,000 patient-hours of exercise training. In analyses of aerobic exercise studies versus control: body mass index was significantly reduced, mean difference (MD) -0.29 (kg m-2) (95% CI -0.44, -0.15, p < 0.0001); body mass was significantly reduced, MD -1.16 kg (95% CI -1.83, -0.48, p = 0.0008); waist circumference was significantly reduced MD -1.37 cm (95% CI -2.02, -0.71, p < 0.0001), peak VO2 was significantly improved MD 3.00 mL kg-1 min-1 (95% CI 1.92, 4.08, p < 0.000001); systolic blood pressure and diastolic blood pressure were significantly reduced, MD -2.54 mmHg (95% CI -4.34, -0.75, p = 0.006), and, MD -2.27 mmHg (95% CI -3.47, -1.06, p = 0.0002) respectively; fasting blood glucose was significantly reduced MD -0.16 mmol L-1 (95% CI -0.32, -0.01, p = 0.04); triglycerides were significantly reduced MD -0.21 mmol L-1 (95% CI -0.29, -0.13, p < 0.00001); and low density lipoprotein was significantly reduced MD -0.03 mmol L-1 (95% CI -0.05, -0.00, p = 0.02). In analyses of combined exercise versus control: waist circumference, MD -3.80 cm (95% CI -5.65, -1.95, p < 0.0001); peak VO2 MD 4.64 mL kg-1 min-1 (95% CI 2.42, 6.87, p < 0.0001); systolic blood pressure MD -3.79 mmHg (95% CI -6.18, -1.40, p = 0.002); and high density lipoprotein (HDL) MD 0.14 (95% CI 0.04, 0.25, p = 0.009) were all significantly improved. We found no significant differences between outcome measures between the two exercise interventions.

Conclusions

Exercise training improves body composition, cardiovascular, and, metabolic outcomes in people with metabolic syndrome. For some outcome measures, isolated aerobic exercise appears optimal.",2017-08-30 +23203982,SINEBase: a database and tool for SINE analysis.,"SINEBase (http://sines.eimb.ru) integrates the revisited body of knowledge about short interspersed elements (SINEs). A set of formal definitions concerning SINEs was introduced. All available sequence data were screened through these definitions and the genetic elements misidentified as SINEs were discarded. As a result, 175 SINE families have been recognized in animals, flowering plants and green algae. These families were classified by the modular structure of their nucleotide sequences and the frequencies of different patterns were evaluated. These data formed the basis for the database of SINEs. The SINEBase website can be used in two ways: first, to explore the database of SINE families, and second, to analyse candidate SINE sequences using specifically developed tools. This article presents an overview of the database and the process of SINE identification and analysis.",2012-11-30 +24599084,The CO-Regulation Database (CORD): a tool to identify coordinately expressed genes.,"

Background

Meta-analysis of gene expression array databases has the potential to reveal information about gene function. The identification of gene-gene interactions may be inferred from gene expression information but such meta-analysis is often limited to a single microarray platform. To address this limitation, we developed a gene-centered approach to analyze differential expression across thousands of gene expression experiments and created the CO-Regulation Database (CORD) to determine which genes are correlated with a queried gene.

Results

Using the GEO and ArrayExpress database, we analyzed over 120,000 group by group experiments from gene microarrays to determine the correlating genes for over 30,000 different genes or hypothesized genes. CORD output data is presented for sample queries with focus on genes with well-known interaction networks including p16 (CDKN2A), vimentin (VIM), MyoD (MYOD1). CDKN2A, VIM, and MYOD1 all displayed gene correlations consistent with known interacting genes.

Conclusions

We developed a facile, web-enabled program to determine gene-gene correlations across different gene expression microarray platforms. Using well-characterized genes, we illustrate how CORD's identification of co-expressed genes contributes to a better understanding a gene's potential function. The website is found at http://cord-db.org.",2014-03-05 +25717197,RepExplore: addressing technical replicate variance in proteomics and metabolomics data analysis.,"

Unlabelled

High-throughput omics datasets often contain technical replicates included to account for technical sources of noise in the measurement process. Although summarizing these replicate measurements by using robust averages may help to reduce the influence of noise on downstream data analysis, the information on the variance across the replicate measurements is lost in the averaging process and therefore typically disregarded in subsequent statistical analyses.We introduce RepExplore, a web-service dedicated to exploit the information captured in the technical replicate variance to provide more reliable and informative differential expression and abundance statistics for omics datasets. The software builds on previously published statistical methods, which have been applied successfully to biomedical omics data but are difficult to use without prior experience in programming or scripting. RepExplore facilitates the analysis by providing a fully automated data processing and interactive ranking tables, whisker plot, heat map and principal component analysis visualizations to interpret omics data and derived statistics.

Availability and implementation

Freely available at http://www.repexplore.tk

Contact

enrico.glaab@uni.lu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-25 +23740839,Correlating transcriptional networks to breast cancer survival: a large-scale coexpression analysis.,"Weighted gene coexpression network analysis (WGCNA) is a powerful 'guilt-by-association'-based method to extract coexpressed groups of genes from large heterogeneous messenger RNA expression data sets. We have utilized WGCNA to identify 11 coregulated gene clusters across 2342 breast cancer samples from 13 microarray-based gene expression studies. A number of these transcriptional modules were found to be correlated to clinicopathological variables (e.g. tumor grade), survival endpoints for breast cancer as a whole (disease-free survival, distant disease-free survival and overall survival) and also its molecular subtypes (luminal A, luminal B, HER2+ and basal-like). Examples of findings arising from this work include the identification of a cluster of proliferation-related genes that when upregulated correlated to increased tumor grade and were associated with poor survival in general. The prognostic potential of novel genes, for example, ubiquitin-conjugating enzyme E2S (UBE2S) within this group was confirmed in an independent data set. In addition, gene clusters were also associated with survival for breast cancer molecular subtypes including a cluster of genes that was found to correlate with prognosis exclusively for basal-like breast cancer. The upregulation of several single genes within this coexpression cluster, for example, the potassium channel, subfamily K, member 5 (KCNK5) was associated with poor outcome for the basal-like molecular subtype. We have developed an online database to allow user-friendly access to the coexpression patterns and the survival analysis outputs uncovered in this study (available at http://glados.ucd.ie/Coexpression/).",2013-06-05 +28031185,MetaDCN: meta-analysis framework for differential co-expression network detection with an application in breast cancer.,"

Motivation

Gene co-expression network analysis from transcriptomic studies can elucidate gene-gene interactions and regulatory mechanisms. Differential co-expression analysis helps further detect alterations of regulatory activities in case/control comparison. Co-expression networks estimated from single transcriptomic study is often unstable and not generalizable due to cohort bias and limited sample size. With the rapid accumulation of publicly available transcriptomic studies, co-expression analysis combining multiple transcriptomic studies can provide more accurate and robust results.

Results

In this paper, we propose a meta-analytic framework for detecting differentially co-expressed networks (MetaDCN). Differentially co-expressed seed modules are first detected by optimizing an energy function via simulated annealing. Basic modules sharing common pathways are merged into pathway-centric supermodules and a Cytoscape plug-in (MetaDCNExplorer) is developed to visualize and explore the findings. We applied MetaDCN to two breast cancer applications: ER+/ER- comparison using five training and three testing studies, and ILC/IDC comparison with two training and two testing studies. We identified 20 and 4 supermodules for ER+/ER- and ILC/IDC comparisons, respectively. Ranking atop are 'immune response pathway' and 'complement cascades pathway' for ER comparison, and 'extracellular matrix pathway' for ILC/IDC comparison. Without the need for prior information, the results from MetaDCN confirm existing as well as discover novel disease mechanisms in a systems manner.

Availability and implementation

R package 'MetaDCN' and Cytoscape App 'MetaDCNExplorer' are available at http://tsenglab.biostat.pitt.edu/software.htm .

Contact

ctseng@pitt.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-04-01 +26283178,MVDA: a multi-view genomic data integration methodology.,"

Background

Multiple high-throughput molecular profiling by omics technologies can be collected for the same individuals. Combining these data, rather than exploiting them separately, can significantly increase the power of clinically relevant patients subclassifications.

Results

We propose a multi-view approach in which the information from different data layers (views) is integrated at the levels of the results of each single view clustering iterations. It works by factorizing the membership matrices in a late integration manner. We evaluated the effectiveness and the performance of our method on six multi-view cancer datasets. In all the cases, we found patient sub-classes with statistical significance, identifying novel sub-groups previously not emphasized in literature. Our method performed better as compared to other multi-view clustering algorithms and, unlike other existing methods, it is able to quantify the contribution of single views on the final results.

Conclusion

Our observations suggest that integration of prior information with genomic features in the subtyping analysis is an effective strategy in identifying disease subgroups. The methodology is implemented in R and the source code is available online at http://neuronelab.unisa.it/a-multi-view-genomic-data-integration-methodology/ .",2015-08-19 +27031083,Phenotype-driven gene target definition in clinical genome-wide sequencing data interpretation.,"

Purpose

Genome-wide sequencing approaches are increasingly being used in place of disease gene panel sequencing approaches. Despite the well-recognized benefits of these approaches, they also carry with them an increased burden of analyzing overwhelmingly large gene targets and an increased possibility of detecting incidental findings.

Methods

We propose a novel approach for design of individualized phenotype gene panels using the set of signs and symptoms observed and selecting relevant genes on the basis of known phenotype-gene associations.

Results

We used results of diagnostic exome sequencing in 405 cases submitted to our institution to show retrospectively that using the phenotype gene panel increases the sensitivity of masked exome analysis (increase from 25.4 to 29.7% in overall diagnostic yield). We also show that such a strategy enables the possibility of masked analysis of genome-wide sequencing data in patients with poorly defined and multifaceted clinical presentations. Ultimately, we show that this approach enables control over the incidental findings rate (0.25% in phenotype gene panels). Finally, we provide a Web tool for customized phenotype panel creation (available at http://www.kimg.eu/generator).

Conclusion

In conclusion, we present a novel approach to a phenotype-driven diagnostic process of genome scale sequencing data that harnesses the sensitivity of these approaches while restricting the analysis to genes relevant to clinical presentation in patient.Genet Med 18 11, 1102-1110.",2016-03-31 +28028797,"GLUT1, MCT1/4 and CD147 overexpression supports the metabolic reprogramming in papillary renal cell carcinoma.","Papillary Renal Cell carcinoma (pRCC) is the second most common type of RCC, accounting for about 15% of all RCCs. Surgical excision is the main treatment option. Still, 10 - 15 % of clinically localized tumours will recur and/or develop metastasis early after surgery, and no reliable prognostic biomarkers are available to identify them. It is known that pRCC cells rely on high rates of aerobic glycolysis, characterized by the up-regulation of many proteins and enzymes related with the glycolytic pathway. However, a metabolic signature enabling the identification of advanced pRCC tumours remains to be discovered. The aim of this study was to characterize the metabolic phenotype of pRCCs (subtypes 1-pRCC1 and 2-pRCC2) by evaluating the expression pattern of the glucose transporters (GLUTs) 1 and 4 and the monocarboxylate transporters (MCTs) 1 and 4, as well as their chaperon CD147. We analysed the clinico-pathological data and the protein and mRNA expression of GLUT1, GLUT4 and MCT1, MCT4 and CD147 in tumours from Porto and TCGA series (http://cancergenome.nih.gov/), respectively. With the exception of GLUT4, plasma membrane expression of all proteins was frequently observed in pRCCs. GLUT1 and MCT1 membrane overexpression was significantly higher in pRCC2 and significantly associated with higher pN-stage and higher Fuhrman grade. Overexpression of GLUT1, MCT1/4 and CD147, supports the metabolic reprograming in pRCCs. MCT1 expression was associated with pRCC aggressiveness, regardless of the tumour histotype.",2016-12-28 +27173522,PepPSy: a web server to prioritize gene products in experimental and biocuration workflows. ,"Among the 20 000 human gene products predicted from genome annotation, about 3000 still lack validation at protein level. We developed PepPSy, a user-friendly gene expression-based prioritization system, to help investigators to determine in which human tissues they should look for an unseen protein. PepPSy can also be used by biocurators to revisit the annotation of specific categories of proteins based on the 'omics' data housed by the system. In this study, it was used to prioritize 21 dubious protein-coding genes among the 616 annotated in neXtProt for reannotation. PepPSy is freely available at http://peppsy.genouest.orgDatabase URL: http://peppsy.genouest.org.",2016-05-12 +26501006,Have Smartphones Contributed in the Clinical Progress of Oral and Maxillofacial Surgery?,"BACKGROUND:Dental surgeons who encounter complex situations, such as those in unscheduled care, often have limited resources to provide a structured and specialty care. Therefore, there is always a need for cost-effective, easy to handle, easy to carry ""Smartphones"". OBJECTIVE:The purpose of this paper was to undertake a review of literature on ""Smartphone in Oral and Maxillofacial Surgery"" online data-base and discuss the case series with emphasis on the role of attending dental surgeon and the maxillofacial surgeon. MATERIALS AND METHODS:The available literature relevant to oral and maxillofacial surgery in online data-base of the United States National Library of Medicine: PubMed (http://www.ncbi.nlm.nih.gov/pubmed/) was searched. The inclusion criterion was to review the published clinical papers, abstracts and evidence based reviews on 'Uses of Smartphone in Oral and Maxillofacial Surgery'. RESULTS:Six articles were found with the search term ""Smartphone in Oral and Maxillofacial Surgery"" in the literature searched. Five articles met the inclusion criteria for the study. The relevant data was extracted, tabulated, and reviewed to draw evidence-based conclusions for uses of smartphone in oral and maxillofacial surgery. CONCLUSION:Utilization of smartphones in oral and maxillofacial surgery facilitate in differential diagnosis, treatment, follow up, prevention of the disease further and thereby improve the quality of patient care without requiring the presence of the maxillofacial surgeon in remote areas.",2015-09-01 +22953731,CTen: a web-based platform for identifying enriched cell types from heterogeneous microarray data.,"

Background

Interpreting in vivo sampled microarray data is often complicated by changes in the cell population demographics. To put gene expression into its proper biological context, it is necessary to distinguish differential gene transcription from artificial gene expression induced by changes in the cellular demographics.

Results

CTen (cell type enrichment) is a web-based analytical tool which uses our highly expressed, cell specific (HECS) gene database to identify enriched cell types in heterogeneous microarray data. The web interface is designed for differential expression and gene clustering studies, and the enrichment results are presented as heatmaps or downloadable text files.

Conclusions

In this work, we use an independent, cell-specific gene expression data set to assess CTen's performance in accurately identifying the appropriate cell type and provide insight into the suggested level of enrichment to optimally minimize the number of false discoveries. We show that CTen, when applied to microarray data developed from infected lung tissue, can correctly identify the cell signatures of key lymphocytes in a highly heterogeneous environment and compare its performance to another popular bioinformatics tool. Furthermore, we discuss the strong implications cell type enrichment has in the design of effective microarray workflow strategies and show that, by combining CTen with gene expression clustering, we may be able to determine the relative changes in the number of key cell types.CTen is available at http://www.influenza-x.org/~jshoemaker/cten/",2012-09-06 +27608769,Where's WALY? : A proof of concept study of the 'wellbeing adjusted life year' using secondary analysis of cross-sectional survey data.,"

Background

The Quality-Adjusted Life Year (QALY) is a measure that combines life extension and health improvement in a single score, reflecting preferences around different types of health gain. It can therefore be used to inform decision-making around allocation of health care resources to mutually exclusive options that would produce qualitatively different health benefits. A number of quality-of-life instruments can be used to calculate QALYs. The EQ-5D is one of the most commonly used, and is the preferred option for submissions to NICE ( https://www.nice.org.uk/process/pmg9/ ). However, it has limitations that might make it unsuitable for use in areas such as public and mental health where interventions may aim to improve well-being. One alternative to the QALY is a Wellbeing-Adjusted Life Year. In this study we explore the need for a Wellbeing-Adjusted Life Year measure by examining the extent to which a measure of wellbeing (the Warwick-Edinburgh Mental Well-being Scale) maps onto the EQ-5D-3L.

Methods

Secondary analyses were conducted on data from the Coventry Household Survey in which 7469 participants completed the EQ-5D-3L, Warwick-Edinburgh Mental Well-being Scale, and a measure of self-rated health. Data were analysed using descriptive statistics, Pearson's and Spearman's correlations, linear regression, and receiver operating characteristic curves.

Results

Approximately 75 % of participants scored the maximum on the EQ-5D-3L. Those with maximum EQ-5D-3L scores reported a wide range of levels of mental wellbeing. Both the Warwick-Edinburgh Mental Well-being Scale and the EQ-5D-3L were able to detect differences between those with higher and lower levels of self-reported health. Linear regression indicated that scores on the Warwick-Edinburgh Mental Well-being Scale and the EQ-5D-3L were weakly, positively correlated (with R(2) being 0.104 for the index and 0.141 for the visual analogue scale).

Conclusion

The Warwick-Edinburgh Mental Well-being Scale maps onto the EQ-5D-3L to only a limited extent. Levels of mental wellbeing varied greatly amongst participants who had the maximum score on the EQ-5D-3L. To evaluate the relative effectiveness of interventions that impact on mental wellbeing, a new measure - a Wellbeing Adjusted Life Year - is needed.",2016-09-08 +26231428,MEPSA: minimum energy pathway analysis for energy landscapes.,"

Unlabelled

From conformational studies to atomistic descriptions of enzymatic reactions, potential and free energy landscapes can be used to describe biomolecular systems in detail. However, extracting the relevant data of complex 3D energy surfaces can sometimes be laborious. In this article, we present MEPSA (Minimum Energy Path Surface Analysis), a cross-platform user friendly tool for the analysis of energy landscapes from a transition state theory perspective. Some of its most relevant features are: identification of all the barriers and minima of the landscape at once, description of maxima edge profiles, detection of the lowest energy path connecting two minima and generation of transition state theory diagrams along these paths. In addition to a built-in plotting system, MEPSA can save most of the generated data into easily parseable text files, allowing more versatile uses of MEPSA's output such as the generation of molecular dynamics restraints from a calculated path.

Availability and implementation

MEPSA is freely available (under GPLv3 license) at: http://bioweb.cbm.uam.es/software/MEPSA/ CONTACT: pagomez@cbm.csic.es.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-31 +24475134,ASDCD: antifungal synergistic drug combination database.,"Finding effective drugs to treat fungal infections has important clinical significance based on high mortality rates, especially in an immunodeficient population. Traditional antifungal drugs with single targets have been reported to cause serious side effects and drug resistance. Nowadays, however, drug combinations, particularly with respect to synergistic interaction, have attracted the attention of researchers. In fact, synergistic drug combinations could simultaneously affect multiple subpopulations, targets, and diseases. Therefore, a strategy that employs synergistic antifungal drug combinations could eliminate the limitations noted above and offer the opportunity to explore this emerging bioactive chemical space. However, it is first necessary to build a powerful database in order to facilitate the analysis of drug combinations. To address this gap in our knowledge, we have built the first Antifungal Synergistic Drug Combination Database (ASDCD), including previously published synergistic antifungal drug combinations, chemical structures, targets, target-related signaling pathways, indications, and other pertinent data. Its current version includes 210 antifungal synergistic drug combinations and 1225 drug-target interactions, involving 105 individual drugs from more than 12,000 references. ASDCD is freely available at http://ASDCD.amss.ac.cn.",2014-01-24 +23874394,"LifeMap Discovery™: the embryonic development, stem cells, and regenerative medicine research portal.","LifeMap Discovery™ provides investigators with an integrated database of embryonic development, stem cell biology and regenerative medicine. The hand-curated reconstruction of cell ontology with stem cell biology; including molecular, cellular, anatomical and disease-related information, provides efficient and easy-to-use, searchable research tools. The database collates in vivo and in vitro gene expression and guides translation from in vitro data to the clinical utility, and thus can be utilized as a powerful tool for research and discovery in stem cell biology, developmental biology, disease mechanisms and therapeutic discovery. LifeMap Discovery is freely available to academic nonprofit institutions at http://discovery.lifemapsc.com.",2013-07-17 +21453524,Reliability analysis of the Ahringer Caenorhabditis elegans RNAi feeding library: a guide for genome-wide screens.,"

Background

The Ahringer C. elegans RNAi feeding library prepared by cloning genomic DNA fragments has been widely used in genome-wide analysis of gene function. However, the library has not been thoroughly validated by direct sequencing, and there are potential errors, including: 1) mis-annotation (the clone with the retired gene name should be remapped to the actual target gene); 2) nonspecific PCR amplification; 3) cross-RNAi; 4) mis-operation such as sample loading error, etc.

Results

Here we performed a reliability analysis on the Ahringer C. elegans RNAi feeding library, which contains 16,256 bacterial strains, using a bioinformatics approach. Results demonstrated that most (98.3%) of the bacterial strains in the library are reliable. However, we also found that 2,851 (17.54%) bacterial strains need to be re-annotated even they are reliable. Most of these bacterial strains are the clones having the retired gene names. Besides, 28 strains are grouped into unreliable category and 226 strains are marginal because of probably expressing unrelated double-stranded RNAs (dsRNAs). The accuracy of the prediction was further confirmed by direct sequencing analysis of 496 bacterial strains. Finally, a freely accessible database named CelRNAi (http://biocompute.bmi.ac.cn/CelRNAi/) was developed as a valuable complement resource for the feeding RNAi library by providing the predicted information on all bacterial strains. Moreover, submission of the direct sequencing result or any other annotations for the bacterial strains to the database are allowed and will be integrated into the CelRNAi database to improve the accuracy of the library. In addition, we provide five candidate primer sets for each of the unreliable and marginal bacterial strains for users to construct an alternative vector for their own RNAi studies.

Conclusions

Because of the potential unreliability of the Ahringer C. elegans RNAi feeding library, we strongly suggest the user examine the reliability information of the bacterial strains in the CelRNAi database before performing RNAi experiments, as well as the post-RNAi experiment analysis.",2011-03-31 +28957412,Cohort profile: The Canadian HIV Women's Sexual and Reproductive Health Cohort Study (CHIWOS).,"Globally, women are at increased vulnerability to HIV due to biological, social, structural, and political reasons. Women living with HIV also experience unique issues related to their medical and social healthcare, which makes a clinical care model specific to their needs worthy of exploration. Furthermore, there is a dearth of research specific to women living with HIV. Research for this population has often been narrowly focused on pregnancy-related issues without considering their complex structural inequalities, social roles, and healthcare and biological needs. For these reasons, we have come together, as researchers, clinicians and community members in Canada, to develop the Canadian HIV Women's Sexual and Reproductive Health Cohort Study (CHIWOS) to investigate the concept of women-centred HIV care (WCHC) and its impact on the overall, HIV, women's, mental, sexual, and reproductive health outcomes of women living with HIV. Here, we present the CHIWOS cohort profile, which describes the cohort and presents preliminary findings related to perceived WCHC. CHIWOS is a prospective, observational cohort study of women living with HIV in British Columbia (BC), Ontario, and Quebec. Two additional Canadian provinces, Saskatchewan and Manitoba, will join the cohort in 2018. Using community-based research principles, CHIWOS engages women living with HIV throughout the entire research process meeting the requirements of the 'Greater Involvement of People living with HIV/AIDS'. Study data are collected through an interviewer-administered questionnaire that uses a web-based platform. From August 2013 to May 2015, a total of 1422 women living with HIV in BC, Ontario, and Quebec were enrolled and completed the baseline visit. Follow-up interviews are being conducted at 18-month intervals. Of the 1422 participants at baseline, 356 were from BC (25%), 713 from Ontario (50%), 353 from Quebec (25%). The median age of the participants at baseline was 43 years (range, 16-74). 22% identified as Indigenous, 30% as African, Caribbean or Black, 41% as Caucasian/White, and 7% as other ethnicities. Overall, 83% of women were taking antiretroviral therapy at the time of the baseline interview and of them, 87% reported an undetectable viral load. Of the 1326 women who received HIV medical care in the previous year and responded to corresponding questions, 57% (95% CI: 54%-60%) perceived that the care they received from their primary HIV doctor had been women-centred. There were provincial and age differences among women who indicated that they received WCHC versus not; women from BC or Ontario were more likely to report WCHC compared to participants in Quebec. They were also more likely to be younger. CHIWOS will be an important tool to develop care models specific for women living with HIV. Moreover, CHIWOS is collecting extensive information on socio-demographics, social determinants of health, psychological factors, and sexual and reproductive health and offers an important platform to answer many relevant research questions for and with women living with HIV. Information on the cohort can be found on the study website (http://www.chiwos.ca).",2017-09-28 +27998934,Xenolog classification.,"

Motivation

Orthology analysis is a fundamental tool in comparative genomics. Sophisticated methods have been developed to distinguish between orthologs and paralogs and to classify paralogs into subtypes depending on the duplication mechanism and timing, relative to speciation. However, no comparable framework exists for xenologs: gene pairs whose history, since their divergence, includes a horizontal transfer. Further, the diversity of gene pairs that meet this broad definition calls for classification of xenologs with similar properties into subtypes.

Results

We present a xenolog classification that uses phylogenetic reconciliation to assign each pair of genes to a class based on the event responsible for their divergence and the historical association between genes and species. Our classes distinguish between genes related through transfer alone and genes related through duplication and transfer. Further, they separate closely-related genes in distantly-related species from distantly-related genes in closely-related species. We present formal rules that assign gene pairs to specific xenolog classes, given a reconciled gene tree with an arbitrary number of duplications and transfers. These xenology classification rules have been implemented in software and tested on a collection of ∼13 000 prokaryotic gene families. In addition, we present a case study demonstrating the connection between xenolog classification and gene function prediction.

Availability and implementation

The xenolog classification rules have been implemented in N otung 2.9, a freely available phylogenetic reconciliation software package. http://www.cs.cmu.edu/~durand/Notung . Gene trees are available at http://dx.doi.org/10.7488/ds/1503 .

Contact

durand@cmu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +25854485,PhyResSE: a Web Tool Delineating Mycobacterium tuberculosis Antibiotic Resistance and Lineage from Whole-Genome Sequencing Data.,"Antibiotic-resistant tuberculosis poses a global threat, causing the deaths of hundreds of thousands of people annually. While whole-genome sequencing (WGS), with its unprecedented level of detail, promises to play an increasingly important role in diagnosis, data analysis is a daunting challenge. Here, we present a simple-to-use web service (free for academic use at http://phyresse.org). Delineating both lineage and resistance, it provides state-of-the-art methodology to life scientists and physicians untrained in bioinformatics. It combines elaborate data processing and quality control, as befits human diagnostics, with a treasure trove of validated resistance data collected from well-characterized samples in-house and worldwide.",2015-04-08 +26046293,FunPat: function-based pattern analysis on RNA-seq time series data.,"

Background

Dynamic expression data, nowadays obtained using high-throughput RNA sequencing, are essential to monitor transient gene expression changes and to study the dynamics of their transcriptional activity in the cell or response to stimuli. Several methods for data selection, clustering and functional analysis are available; however, these steps are usually performed independently, without exploiting and integrating the information derived from each step of the analysis.

Methods

Here we present FunPat, an R package for time series RNA sequencing data that integrates gene selection, clustering and functional annotation into a single framework. FunPat exploits functional annotations by performing for each functional term, e.g. a Gene Ontology term, an integrated selection-clustering analysis to select differentially expressed genes that share, besides annotation, a common dynamic expression profile.

Results

FunPat performance was assessed on both simulated and real data. With respect to a stand-alone selection step, the integration of the clustering step is able to improve the recall without altering the false discovery rate. FunPat also shows high precision and recall in detecting the correct temporal expression patterns; in particular, the recall is significantly higher than hierarchical, k-means and a model-based clustering approach specifically designed for RNA sequencing data. Moreover, when biological replicates are missing, FunPat is able to provide reproducible lists of significant genes. The application to real time series expression data shows the ability of FunPat to select differentially expressed genes with high reproducibility, indirectly confirming high precision and recall in gene selection. Moreover, the expression patterns obtained as output allow an easy interpretation of the results.

Conclusions

A novel analysis pipeline was developed to search the main temporal patterns in classes of genes similarly annotated, improving the sensitivity of gene selection by integrating the statistical evidence of differential expression with the information on temporal profiles and the functional annotations. Significant genes are associated to both the most informative functional terms, avoiding redundancy of information, and the most representative temporal patterns, thus improving the readability of the results. FunPat package is provided in R/Bioconductor at link: http://sysbiobig.dei.unipd.it/?q=node/79.",2015-06-01 +26895947,MetaCRAM: an integrated pipeline for metagenomic taxonomy identification and compression.,"

Background

Metagenomics is a genomics research discipline devoted to the study of microbial communities in environmental samples and human and animal organs and tissues. Sequenced metagenomic samples usually comprise reads from a large number of different bacterial communities and hence tend to result in large file sizes, typically ranging between 1-10 GB. This leads to challenges in analyzing, transferring and storing metagenomic data. In order to overcome these data processing issues, we introduce MetaCRAM, the first de novo, parallelized software suite specialized for FASTA and FASTQ format metagenomic read processing and lossless compression.

Results

MetaCRAM integrates algorithms for taxonomy identification and assembly, and introduces parallel execution methods; furthermore, it enables genome reference selection and CRAM based compression. MetaCRAM also uses novel reference-based compression methods designed through extensive studies of integer compression techniques and through fitting of empirical distributions of metagenomic read-reference positions. MetaCRAM is a lossless method compatible with standard CRAM formats, and it allows for fast selection of relevant files in the compressed domain via maintenance of taxonomy information. The performance of MetaCRAM as a stand-alone compression platform was evaluated on various metagenomic samples from the NCBI Sequence Read Archive, suggesting 2- to 4-fold compression ratio improvements compared to gzip. On average, the compressed file sizes were 2-13 percent of the original raw metagenomic file sizes.

Conclusions

We described the first architecture for reference-based, lossless compression of metagenomic data. The compression scheme proposed offers significantly improved compression ratios as compared to off-the-shelf methods such as zip programs. Furthermore, it enables running different components in parallel and it provides the user with taxonomic and assembly information generated during execution of the compression pipeline.

Availability

The MetaCRAM software is freely available at http://web.engr.illinois.edu/~mkim158/metacram.html. The website also contains a README file and other relevant instructions for running the code. Note that to run the code one needs a minimum of 16 GB of RAM. In addition, virtual box is set up on a 4GB RAM machine for users to run a simple demonstration.",2016-02-19 +27855145,"Potentially Preventable Deaths Among the Five Leading Causes of Death - United States, 2010 and 2014.","Death rates by specific causes vary across the 50 states and the District of Columbia.* Information on differences in rates for the leading causes of death among states might help state health officials determine prevention goals, priorities, and strategies. CDC analyzed National Vital Statistics System data to provide national and state-specific estimates of potentially preventable deaths among the five leading causes of death in 2014 and compared these estimates with estimates previously published for 2010. Compared with 2010, the estimated number of potentially preventable deaths changed (supplemental material at https://stacks.cdc.gov/view/cdc/42472); cancer deaths decreased 25% (from 84,443 to 63,209), stroke deaths decreased 11% (from 16,973 to 15,175), heart disease deaths decreased 4% (from 91,757 to 87,950), chronic lower respiratory disease (CLRD) (e.g., asthma, bronchitis, and emphysema) deaths increased 1% (from 28,831 to 29,232), and deaths from unintentional injuries increased 23% (from 36,836 to 45,331). A better understanding of progress made in reducing potentially preventable deaths in the United States might inform state and regional efforts targeting the prevention of premature deaths from the five leading causes in the United States.",2016-11-18 +25913207,DockStar: a novel ILP-based integrative method for structural modeling of multimolecular protein complexes.,"

Motivation

Atomic resolution modeling of large multimolecular assemblies is a key task in Structural Cell Biology. Experimental techniques can provide atomic resolution structures of single proteins and small complexes, or low resolution data of large multimolecular complexes.

Results

We present a novel integrative computational modeling method, which integrates both low and high resolution experimental data. The algorithm accepts as input atomic resolution structures of the individual subunits obtained from X-ray, NMR or homology modeling, and interaction data between the subunits obtained from mass spectrometry. The optimal assembly of the individual subunits is formulated as an Integer Linear Programming task. The method was tested on several representative complexes, both in the bound and unbound cases. It placed correctly most of the subunits of multimolecular complexes of up to 16 subunits and significantly outperformed the CombDock and Haddock multimolecular docking methods.

Availability and implementation

http://bioinfo3d.cs.tau.ac.il/DockStar

Contact

naamaamir@mail.tau.ac.il or wolfson@tau.ac.il

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-25 +27845998,Bioresorbable scaffolds compared with everolimus-eluting stents for the treatment of chronic coronary total occlusion: clinical and angiographic results of a matched paired comparison.,"

Objectives

Data on bioresorbable vascular scaffolds for recanalization of chronic total occlusions (CTOs) are limited. We compared the bioresorbable scaffold Absorb with everolimus-eluting stents for the treatment of true CTO.

Methods

After recanalization of CTO, 15 lesions treated with the bioresorbable scaffold Absorb were matched with 15 lesions receiving everolimus-eluting stent (EES) (http://www.clinicaltrials.gov NCT02162082). Match criteria were presence of diabetes mellitus, total device length and maximal device diameter. Angiographic follow-up was scheduled after 9 months and clinical follow-up after 12 months. Dual antiplatelet therapy was administered for 12 months. Quantitative coronary analysis was carried out before and after implantation and at angiographic follow-up. All lesions were predilated. The Absorb scaffolds and drug-eluting stents were carefully placed and postdilatated with high-pressure balloons. Patients received dual antiplatelet therapy for 12 months. The baseline characteristics were similar between both the groups. The mean scaffold length was 81.7±29.1 versus 79.3±27.4 mm for the mean stent length (P=0.82). In-device late lumen loss at the 9-month follow-up was 0.38±0.47 versus 0.46±0.60 mm (P=0.69). The device-oriented composite endpoint was similar in both groups, with 6.7% in the Absorb-group versus 13.3% in the EES group because of target lesion revascularization (P=0.54).

Conclusion

In CTOs, the use of a bioresorbable scaffold Absorb after recanalization showed similar 9-month angiographic and 12-month clinical results compared with an EES with 12 months of dual antiplatelet therapy.",2017-03-01 +23088274,Computational tools and resources for metabolism-related property predictions. 2. Application to prediction of half-life time in human liver microsomes.,"

Background

The most important factor affecting metabolic excretion of compounds from the body is their half-life time. This provides an indication of compound stability of, for example, drug molecules. We report on our efforts to develop QSAR models for metabolic stability of compounds, based on in vitro half-life assay data measured in human liver microsomes.

Method

A variety of QSAR models generated using different statistical methods and descriptor sets implemented in both open-source and commercial programs (KNIME, GUSAR and StarDrop) were analyzed. The models obtained were compared using four different external validation sets from public and commercial data sources, including two smaller sets of in vivo half-life data in humans.

Conclusion

In many cases, the accuracy of prediction achieved on one external test set did not correspond to the results achieved with another test set. The most predictive models were used for predicting the metabolic stability of compounds from the open NCI database, the results of which are publicly available on the NCI/CADD Group web server ( http://cactus.nci.nih.gov ).",2012-10-01 +23092397,admetSAR: a comprehensive source and free tool for assessment of chemical ADMET properties.,"Absorption, distribution, metabolism, excretion, and toxicity (ADMET) properties play key roles in the discovery/development of drugs, pesticides, food additives, consumer products, and industrial chemicals. This information is especially useful when to conduct environmental and human hazard assessment. The most critical rate limiting step in the chemical safety assessment workflow is the availability of high quality data. This paper describes an ADMET structure-activity relationship database, abbreviated as admetSAR. It is an open source, text and structure searchable, and continually updated database that collects, curates, and manages available ADMET-associated properties data from the published literature. In admetSAR, over 210,000 ADMET annotated data points for more than 96,000 unique compounds with 45 kinds of ADMET-associated properties, proteins, species, or organisms have been carefully curated from a large number of diverse literatures. The database provides a user-friendly interface to query a specific chemical profile, using either CAS registry number, common name, or structure similarity. In addition, the database includes 22 qualitative classification and 5 quantitative regression models with highly predictive accuracy, allowing to estimate ecological/mammalian ADMET properties for novel chemicals. AdmetSAR is accessible free of charge at http://www.admetexp.org.",2012-11-01 +26207740,A Methodology for the Development of RESTful Semantic Web Services for Gene Expression Analysis.,"Gene expression studies are generally performed through multi-step analysis processes, which require the integrated use of a number of analysis tools. In order to facilitate tool/data integration, an increasing number of analysis tools have been developed as or adapted to semantic web services. In recent years, some approaches have been defined for the development and semantic annotation of web services created from legacy software tools, but these approaches still present many limitations. In addition, to the best of our knowledge, no suitable approach has been defined for the functional genomics domain. Therefore, this paper aims at defining an integrated methodology for the implementation of RESTful semantic web services created from gene expression analysis tools and the semantic annotation of such services. We have applied our methodology to the development of a number of services to support the analysis of different types of gene expression data, including microarray and RNASeq. All developed services are publicly available in the Gene Expression Analysis Services (GEAS) Repository at http://dcm.ffclrp.usp.br/lssb/geas. Additionally, we have used a number of the developed services to create different integrated analysis scenarios to reproduce parts of two gene expression studies documented in the literature. The first study involves the analysis of one-color microarray data obtained from multiple sclerosis patients and healthy donors. The second study comprises the analysis of RNA-Seq data obtained from melanoma cells to investigate the role of the remodeller BRG1 in the proliferation and morphology of these cells. Our methodology provides concrete guidelines and technical details in order to facilitate the systematic development of semantic web services. Moreover, it encourages the development and reuse of these services for the creation of semantically integrated solutions for gene expression analysis.",2015-07-24 +26608751,Detecting differentially expressed genes by smoothing effect of gene length on variance estimation.,"Next-generation sequencing technologies are widely used in genome research, and RNA sequencing (RNA-Seq) is becoming the main application for gene expression profiling. A large number of computational methods have been developed for analyzing differentially expressed (DE) genes in RNA-Seq data. However, most existing algorithms prefer to call long genes as DE. Short DE genes are rarely detected. In this work, we set out to gain insight into the influence of gene length on RNA-Seq data analysis and to figure out the effect of gene length on variance estimation of RNA-Seq read counts, which is important for statistic test to identify DE genes. We proposed a balanced method of hunting for short DE genes with significance by smoothing a gene length factor. Computational experiments indicate that our method performs well. Software available: http://www.iipl.fudan.edu.cn/lenseq/.",2015-10-11 +22610856,GENIES: gene network inference engine based on supervised analysis.,"Gene network inference engine based on supervised analysis (GENIES) is a web server to predict unknown part of gene network from various types of genome-wide data in the framework of supervised network inference. The originality of GENIES lies in the construction of a predictive model using partially known network information and in the integration of heterogeneous data with kernel methods. The GENIES server accepts any 'profiles' of genes or proteins (e.g. gene expression profiles, protein subcellular localization profiles and phylogenetic profiles) or pre-calculated gene-gene similarity matrices (or 'kernels') in the tab-delimited file format. As a training data set to learn a predictive model, the users can choose either known molecular network information in the KEGG PATHWAY database or their own gene network data. The user can also select an algorithm of supervised network inference, choose various parameters in the method, and control the weights of heterogeneous data integration. The server provides the list of newly predicted gene pairs, maps the predicted gene pairs onto the associated pathway diagrams in KEGG PATHWAY and indicates candidate genes for missing enzymes in organism-specific metabolic pathways. GENIES (http://www.genome.jp/tools/genies/) is publicly available as one of the genome analysis tools in GenomeNet.",2012-05-18 +24270789,Protein Ontology: a controlled structured network of protein entities.,"The Protein Ontology (PRO; http://proconsortium.org) formally defines protein entities and explicitly represents their major forms and interrelations. Protein entities represented in PRO corresponding to single amino acid chains are categorized by level of specificity into family, gene, sequence and modification metaclasses, and there is a separate metaclass for protein complexes. All metaclasses also have organism-specific derivatives. PRO complements established sequence databases such as UniProtKB, and interoperates with other biomedical and biological ontologies such as the Gene Ontology (GO). PRO relates to UniProtKB in that PRO's organism-specific classes of proteins encoded by a specific gene correspond to entities documented in UniProtKB entries. PRO relates to the GO in that PRO's representations of organism-specific protein complexes are subclasses of the organism-agnostic protein complex terms in the GO Cellular Component Ontology. The past few years have seen growth and changes to the PRO, as well as new points of access to the data and new applications of PRO in immunology and proteomics. Here we describe some of these developments.",2013-11-21 +23118479,SpliceAid-F: a database of human splicing factors and their RNA-binding sites.,"A comprehensive knowledge of all the factors involved in splicing, both proteins and RNAs, and of their interaction network is crucial for reaching a better understanding of this process and its functions. A large part of relevant information is buried in the literature or collected in various different databases. By hand-curated screenings of literature and databases, we retrieved experimentally validated data on 71 human RNA-binding splicing regulatory proteins and organized them into a database called 'SpliceAid-F' (http://www.caspur.it/SpliceAidF/). For each splicing factor (SF), the database reports its functional domains, its protein and chemical interactors and its expression data. Furthermore, we collected experimentally validated RNA-SF interactions, including relevant information on the RNA-binding sites, such as the genes where these sites lie, their genomic coordinates, the splicing effects, the experimental procedures used, as well as the corresponding bibliographic references. We also collected information from experiments showing no RNA-SF binding, at least in the assayed conditions. In total, SpliceAid-F contains 4227 interactions, 2590 RNA-binding sites and 1141 'no-binding' sites, including information on cellular contexts and conditions where binding was tested. The data collected in SpliceAid-F can provide significant information to explain an observed splicing pattern as well as the effect of mutations in functional regulatory elements.",2012-10-30 +27128735,Trends in urological stone disease: a 5-year update of hospital episode statistics.,"

Objective

To provide a 5-year follow-on update on the changes in prevalence and treatment of upper urinary tract (UUT) stone disease in England.

Methods

Data from the Hospital Episode Statistics (HES) website (http://www.hesonline.nhs.uk) were extracted, summarised, analysed, and presented.

Results

The total number of UUT stone hospital episodes increased slightly from 83 050 in 2009-2010 to 86 742 in 2014-2015 (4.4% increase). The use of shockwave lithotripsy (SWL) for treating all UUT stones remained stable over the 5-year study period following a significant increase in previous years. There was a 49.6% increase in the number of ureteroscopic stone treatments from 12 062 in 2009-2010 to 18 055 in 2014-2015. Increase in ureterorenoscopy (flexible ureteroscopy) showed the most rapid increase from 3 267 to 6 631 cases in the 5-year study period (103% increase). The gap between the total number of ureteroscopies and SWL treatments continues to narrow. Open stone surgery continued to decline with only 30 reported cases in 2014-2015. Due to the continued rapid increase in the number of ureteroscopies performed, treatment for stone disease has continued to increase significantly in comparison to other urological activity.

Conclusion

This study provides an update on the changing landscape of the management of UUT stones in England. It shows a sustained high prevalence of stone disease commensurate with levels in other developed countries. This study reveals a trend in the last 5 years to surgically intervene on a higher proportion of patients with stones. As in other countries, there is a significant increase in the use of ureteroscopy (particularly intrarenal flexible ureteroscopy) in England. These data have important implications for work-force planning, training, service delivery, and research in the field of urolithiasis.",2016-05-26 +23066841,MetNet Online: a novel integrated resource for plant systems biology.,"

Background

Plants are important as foods, pharmaceuticals, biorenewable chemicals, fuel resources, bioremediation tools and general tools for recombinant technology. The study of plant biological pathways is advanced by easy access to integrated data sources. Today, various plant data sources are scattered throughout the web, making it increasingly complicated to build comprehensive datasets.

Results

MetNet Online is a web-based portal that provides access to a regulatory and metabolic plant pathway database. The database and portal integrate Arabidopsis, soybean (Glycine max) and grapevine (Vitis vinifera) data. Pathways are enriched with known or predicted information on sub cellular location. MetNet Online enables pathways, interactions and entities to be browsed or searched by multiple categories such as sub cellular compartment, pathway ontology, and GO term. In addition to this, the ""My MetNet"" feature allows registered users to bookmark content and track, import and export customized lists of entities. Users can also construct custom networks using existing pathways and/or interactions as building blocks.

Conclusion

The site can be reached at http://www.metnetonline.org. Extensive video tutorials on how to use the site are available through http://www.metnetonline.org/tutorial/.",2012-10-15 +27846630,"A First Evaluation of OMNI®, A New Device for Continuous Renal Replacement Therapy.","

Background

Omni® (B. Braun, Germany) is a new-generation, continuous renal replacement therapy (CRRT) machine designed to improve user interface, minimize downtime and optimize renal dose delivery. It was never tested in humans.

Methods

We used Omni® to provide CRRT in 10 critically ill patients. We collected therapy data, metabolic parameters and evaluated user's satisfaction with a survey.

Results

CRRT was delivered using Omni® in CVVH-heparin (6 patients) and CVVHD-citrate (4 patients) modes for a total duration of 617.7 h. No adverse event was observed. The mean filter life was 22.8 (CVVH-heparin) and 33.5 (CVVHD-citrate) h. Alarms-related downtime corresponded to 5.9% of total therapy time. Delivered renal dose was 96.6% of prescribed. Satisfactory metabolic control and fluid removal were achieved. Overall, users evaluated interface, design and usability as excellent.

Conclusion

CRRT in CVVH-heparin and CVVHD-citrate modes was provided using Omni® in a safe and efficient way for 10 critically ill patients. Video Journal Club 'Cappuccino with Claudio Ronco' at http://www.karger.com/?doi=451053.",2016-11-16 +27257615,"Bran data of total flavonoid and total phenolic contents, oxygen radical absorbance capacity, and profiles of proanthocyanidins and whole grain physical traits of 32 red and purple rice varieties.","Phytochemicals in red and purple bran rice have potential health benefit to humans. We determined the phytochemicals in brans of 32 red and purple global rice varieties. The description of the origin and physical traits of the whole grain (color, length, width, thickness and 100-kernel weight) of this germplasm collection are provided along with data of total flavonoid and total phenolic contents, oxygen radical absorbance capacity and total proanthocyanidin contents. The contents and proportions of individual oligomers, from degree of polymerization of monomers to 14-mers, and polymers in bran of these 32 rice varieties are presented (DOI: http://dx.doi.org/10.1016/j.foodchem.2016.04.004) [1].",2016-05-10 +27110275,Sparse RNA folding revisited: space-efficient minimum free energy structure prediction.,"

Background

RNA secondary structure prediction by energy minimization is the central computational tool for the analysis of structural non-coding RNAs and their interactions. Sparsification has been successfully applied to improve the time efficiency of various structure prediction algorithms while guaranteeing the same result; however, for many such folding problems, space efficiency is of even greater concern, particularly for long RNA sequences. So far, space-efficient sparsified RNA folding with fold reconstruction was solved only for simple base-pair-based pseudo-energy models.

Results

Here, we revisit the problem of space-efficient free energy minimization. Whereas the space-efficient minimization of the free energy has been sketched before, the reconstruction of the optimum structure has not even been discussed. We show that this reconstruction is not possible in trivial extension of the method for simple energy models. Then, we present the time- and space-efficient sparsified free energy minimization algorithm SparseMFEFold that guarantees MFE structure prediction. In particular, this novel algorithm provides efficient fold reconstruction based on dynamically garbage-collected trace arrows. The complexity of our algorithm depends on two parameters, the number of candidates Z and the number of trace arrows T; both are bounded by [Formula: see text], but are typically much smaller. The time complexity of RNA folding is reduced from [Formula: see text] to [Formula: see text]; the space complexity, from [Formula: see text] to [Formula: see text]. Our empirical results show more than 80 % space savings over RNAfold [Vienna RNA package] on the long RNAs from the RNA STRAND database (≥2500 bases).

Conclusions

The presented technique is intentionally generalizable to complex prediction algorithms; due to their high space demands, algorithms like pseudoknot prediction and RNA-RNA-interaction prediction are expected to profit even stronger than ""standard"" MFE folding. SparseMFEFold is free software, available at http://www.bioinf.uni-leipzig.de/~will/Software/SparseMFEFold.",2016-04-23 +22222089,ABrowse--a customizable next-generation genome browser framework.,"

Background

With the rapid growth of genome sequencing projects, genome browser is becoming indispensable, not only as a visualization system but also as an interactive platform to support open data access and collaborative work. Thus a customizable genome browser framework with rich functions and flexible configuration is needed to facilitate various genome research projects.

Results

Based on next-generation web technologies, we have developed a general-purpose genome browser framework ABrowse which provides interactive browsing experience, open data access and collaborative work support. By supporting Google-map-like smooth navigation, ABrowse offers end users highly interactive browsing experience. To facilitate further data analysis, multiple data access approaches are supported for external platforms to retrieve data from ABrowse. To promote collaborative work, an online user-space is provided for end users to create, store and share comments, annotations and landmarks. For data providers, ABrowse is highly customizable and configurable. The framework provides a set of utilities to import annotation data conveniently. To build ABrowse on existing annotation databases, data providers could specify SQL statements according to database schema. And customized pages for detailed information display of annotation entries could be easily plugged in. For developers, new drawing strategies could be integrated into ABrowse for new types of annotation data. In addition, standard web service is provided for data retrieval remotely, providing underlying machine-oriented programming interface for open data access.

Conclusions

ABrowse framework is valuable for end users, data providers and developers by providing rich user functions and flexible customization approaches. The source code is published under GNU Lesser General Public License v3.0 and is accessible at http://www.abrowse.org/. To demonstrate all the features of ABrowse, a live demo for Arabidopsis thaliana genome has been built at http://arabidopsis.cbi.edu.cn/.",2012-01-05 +27105317,A Prospective Analysis of Meat Mutagens and Colorectal Cancer in the Nurses' Health Study and Health Professionals Follow-up Study.,"

Background

Heterocyclic amines (HCAs) in cooked meats may play a role in colorectal cancer (CRC) development.

Objectives

We aimed to prospectively examine the association between estimated intakes of HCAs and meat-derived mutagenicity (MDM) in two cohorts of health professionals, the Health Professionals Follow-up Study (HPFS) and the Nurses' Health Study (NHS).

Methods

In 29,615 men and 65,875 women, intake of the HCAs 2-amino-3,8-dimethylimidazo(4,5-j)quinoxaline (MeIQx), 2-amino-1-methyl-6-phenylimidazo(4,5-b)pyridine (PhIP), 2-amino-3,4,8-trimethylimidazo(4,5-f)quinoxaline (DiMeIQx), and MDM was estimated using a 1996 cooking questionnaire, the 1994 food frequency questionnaire, and an online database. Cox proportional hazards models were used to estimate hazard ratios (HRs) and 95% confidence intervals (CIs) and to adjust for potential confounders. Estimates for both cohorts were pooled using random-effects meta-analysis.

Results

Between 1996 and 2010, 418 male and 790 female CRC cases were identified. Meat mutagen intake was not statistically significantly associated with risk of CRC [highest vs. lowest quintile, pooled HR (95% CI) for MeIQx: 1.12 (0.93, 1.34), p for trend 0.23; PhIP: 1.10 (0.90, 1.33), p for trend 0.35; MDM: 1.03 (0.86, 1.24), p for trend 0.75] or subtypes of CRC defined by tumor location (proximal or distal colon, or rectum). When analyzed by source of meat, PhIP from red but not from white meat was nonsignificantly positively associated with CRC and significantly positively associated with proximal cancers [HR (95% CI) per standard deviation increase of log-transformed intake: PhIP red meat: CRC: 1.06 (0.99, 1.12), proximal: 1.11 (1.02, 1.21); PhIP white meat: CRC: 0.99 (0.94, 1.04), proximal: 1.00 (0.93, 1.09)].

Conclusions

Estimated intakes of meat mutagens were not significantly associated with CRC risk over 14 years of follow-up in the NHS and HPFS cohorts. Results for PhIP from red but not from white meat warrant further investigation.

Citation

Le NT, Michels FA, Song M, Zhang X, Bernstein AM, Giovannucci EL, Fuchs CS, Ogino S, Chan AT, Sinha R, Willett WC, Wu K. 2016. A prospective analysis of meat mutagens and colorectal cancer in the Nurses' Health Study and Health Professionals Follow-up Study. Environ Health Perspect 124:1529-1536; http://dx.doi.org/10.1289/EHP238.",2016-04-22 +28749367,Genome-wide Association Study of Susceptibility to Particulate Matter-Associated QT Prolongation.,"

Background

Ambient particulate matter (PM) air pollution exposure has been associated with increases in QT interval duration (QT). However, innate susceptibility to PM-associated QT prolongation has not been characterized.

Objective

To characterize genetic susceptibility to PM-associated QT prolongation in a multi-racial/ethnic, genome-wide association study (GWAS).

Methods

Using repeated electrocardiograms (1986–2004), longitudinal data on PM<10 μm in diameter (PM10), and generalized estimating equations methods adapted for low-prevalence exposure, we estimated approximately 2.5×106 SNP×PM10 interactions among nine Women’s Health Initiative clinical trials and Atherosclerosis Risk in Communities Study subpopulations (n=22,158), then combined subpopulation-specific results in a fixed-effects, inverse variance-weighted meta-analysis.

Results

A common variant (rs1619661; coded allele: T) significantly modified the QT-PM10 association (p=2.11×10−8). At PM10 concentrations >90th percentile, QT increased 7 ms across the CC and TT genotypes: 397 (95% confidence interval: 396, 399) to 404 (403, 404) ms. However, QT changed minimally across rs1619661 genotypes at lower PM10 concentrations. The rs1619661 variant is on chromosome 10, 132 kilobase (kb) downstream from CXCL12, which encodes a chemokine, stromal cell-derived factor 1, that is expressed in cardiomyocytes and decreases calcium influx across the L-type Ca2+ channel.

Conclusions

The findings suggest that biologically plausible genetic factors may alter susceptibility to PM10-associated QT prolongation in populations protected by the U.S. Environmental Protection Agency’s National Ambient Air Quality Standards. Independent replication and functional characterization are necessary to validate our findings. https://doi.org/10.1289/EHP347",2017-06-08 +27106060,PSI/TM-Coffee: a web server for fast and accurate multiple sequence alignments of regular and transmembrane proteins using homology extension on reduced databases.,"The PSI/TM-Coffee web server performs multiple sequence alignment (MSA) of proteins by combining homology extension with a consistency based alignment approach. Homology extension is performed with Position Specific Iterative (PSI) BLAST searches against a choice of redundant and non-redundant databases. The main novelty of this server is to allow databases of reduced complexity to rapidly perform homology extension. This server also gives the possibility to use transmembrane proteins (TMPs) reference databases to allow even faster homology extension on this important category of proteins. Aside from an MSA, the server also outputs topological prediction of TMPs using the HMMTOP algorithm. Previous benchmarking of the method has shown this approach outperforms the most accurate alignment methods such as MSAProbs, Kalign, PROMALS, MAFFT, ProbCons and PRALINE™. The web server is available at http://tcoffee.crg.cat/tmcoffee.",2016-04-22 +25038819,GACT: a Genome build and Allele definition Conversion Tool for SNP imputation and meta-analysis in genetic association studies.,"

Background

Genome-wide association studies (GWAS) have successfully identified genes associated with complex human diseases. Although much of the heritability remains unexplained, combining single nucleotide polymorphism (SNP) genotypes from multiple studies for meta-analysis will increase the statistical power to identify new disease-associated variants. Meta-analysis requires same allele definition (nomenclature) and genome build among individual studies. Similarly, imputation, commonly-used prior to meta-analysis, requires the same consistency. However, the genotypes from various GWAS are generated using different genotyping platforms, arrays or SNP-calling approaches, resulting in use of different genome builds and allele definitions. Incorrect assumptions of identical allele definition among combined GWAS lead to a large portion of discarded genotypes or incorrect association findings. There is no published tool that predicts and converts among all major allele definitions.

Results

In this study, we have developed a tool, GACT, which stands for Genome build and Allele definition Conversion Tool, that predicts and inter-converts between any of the common SNP allele definitions and between the major genome builds. In addition, we assessed several factors that may affect imputation quality, and our results indicated that inclusion of singletons in the reference had detrimental effects while ambiguous SNPs had no measurable effect. Unexpectedly, exclusion of genotypes with missing rate > 0.001 (40% of study SNPs) showed no significant decrease of imputation quality (even significantly higher when compared to the imputation with singletons in the reference), especially for rare SNPs.

Conclusion

GACT is a new, powerful, and user-friendly tool with both command-line and interactive online versions that can accurately predict, and convert between any of the common allele definitions and between genome builds for genome-wide meta-analysis and imputation of genotypes from SNP-arrays or deep-sequencing, particularly for data from the dbGaP and other public databases.

Gact software

http://www.uvm.edu/genomics/software/gact.",2014-07-19 +26774327,Decision support systems for personalized and participative radiation oncology.,"A paradigm shift from current population based medicine to personalized and participative medicine is underway. This transition is being supported by the development of clinical decision support systems based on prediction models of treatment outcome. In radiation oncology, these models 'learn' using advanced and innovative information technologies (ideally in a distributed fashion - please watch the animation: http://youtu.be/ZDJFOxpwqEA) from all available/appropriate medical data (clinical, treatment, imaging, biological/genetic, etc.) to achieve the highest possible accuracy with respect to prediction of tumor response and normal tissue toxicity. In this position paper, we deliver an overview of the factors that are associated with outcome in radiation oncology and discuss the methodology behind the development of accurate prediction models, which is a multi-faceted process. Subsequent to initial development/validation and clinical introduction, decision support systems should be constantly re-evaluated (through quality assurance procedures) in different patient datasets in order to refine and re-optimize the models, ensuring the continuous utility of the models. In the reasonably near future, decision support systems will be fully integrated within the clinic, with data and knowledge being shared in a standardized, dynamic, and potentially global manner enabling truly personalized and participative medicine.",2016-01-14 +22584067,AncestrySNPminer: a bioinformatics tool to retrieve and develop ancestry informative SNP panels.,"A wealth of genomic information is available in public and private databases. However, this information is underutilized for uncovering population specific and functionally relevant markers underlying complex human traits. Given the huge amount of SNP data available from the annotation of human genetic variation, data mining is a faster and cost effective approach for investigating the number of SNPs that are informative for ancestry. In this study, we present AncestrySNPminer, the first web-based bioinformatics tool specifically designed to retrieve Ancestry Informative Markers (AIMs) from genomic data sets and link these informative markers to genes and ontological annotation classes. The tool includes an automated and simple ""scripting at the click of a button"" functionality that enables researchers to perform various population genomics statistical analyses methods with user friendly querying and filtering of data sets across various populations through a single web interface. AncestrySNPminer can be freely accessed at https://research.cchmc.org/mershalab/AncestrySNPminer/login.php.",2012-05-11 +27105845,Companion: a web server for annotation and analysis of parasite genomes.,"Currently available sequencing technologies enable quick and economical sequencing of many new eukaryotic parasite (apicomplexan or kinetoplastid) species or strains. Compared to SNP calling approaches, de novo assembly of these genomes enables researchers to additionally determine insertion, deletion and recombination events as well as to detect complex sequence diversity, such as that seen in variable multigene families. However, there currently are no automated eukaryotic annotation pipelines offering the required range of results to facilitate such analyses. A suitable pipeline needs to perform evidence-supported gene finding as well as functional annotation and pseudogene detection up to the generation of output ready to be submitted to a public database. Moreover, no current tool includes quick yet informative comparative analyses and a first pass visualization of both annotation and analysis results. To overcome those needs we have developed the Companion web server (http://companion.sanger.ac.uk) providing parasite genome annotation as a service using a reference-based approach. We demonstrate the use and performance of Companion by annotating two Leishmania and Plasmodium genomes as typical parasite cases and evaluate the results compared to manually annotated references.",2016-04-21 +27105847,SL2: an interactive webtool for modeling of missing segments in proteins.,"SuperLooper2 (SL2) (http://proteinformatics.charite.de/sl2) is the updated version of our previous web-server SuperLooper, a fragment based tool for the prediction and interactive placement of loop structures into globular and helical membrane proteins. In comparison to our previous version, SL2 benefits from both a considerably enlarged database of fragments derived from high-resolution 3D protein structures of globular and helical membrane proteins, and the integration of a new protein viewer. The database, now with double the content, significantly improved the coverage of fragment conformations and prediction quality. The employment of the NGL viewer for visualization of the protein under investigation and interactive selection of appropriate loops makes SL2 independent of third-party plug-ins and additional installations.",2016-04-21 +27098585,Identification of genomic sites for CRISPR/Cas9-based genome editing in the Vitis vinifera genome.,"

Background

CRISPR/Cas9 has been recently demonstrated as an effective and popular genome editing tool for modifying genomes of humans, animals, microorganisms, and plants. Success of such genome editing is highly dependent on the availability of suitable target sites in the genomes to be edited. Many specific target sites for CRISPR/Cas9 have been computationally identified for several annual model and crop species, but such sites have not been reported for perennial, woody fruit species. In this study, we identified and characterized five types of CRISPR/Cas9 target sites in the widely cultivated grape species Vitis vinifera and developed a user-friendly database for editing grape genomes in the future.

Results

A total of 35,767,960 potential CRISPR/Cas9 target sites were identified from grape genomes in this study. Among them, 22,597,817 target sites were mapped to specific genomic locations and 7,269,788 were found to be highly specific. Protospacers and PAMs were found to distribute uniformly and abundantly in the grape genomes. They were present in all the structural elements of genes with the coding region having the highest abundance. Five PAM types, TGG, AGG, GGG, CGG and NGG, were observed. With the exception of the NGG type, they were abundantly present in the grape genomes. Synteny analysis of similar genes revealed that the synteny of protospacers matched the synteny of homologous genes. A user-friendly database containing protospacers and detailed information of the sites was developed and is available for public use at the Grape-CRISPR website ( http://biodb.sdau.edu.cn/gc/index.html ).

Conclusion

Grape genomes harbour millions of potential CRISPR/Cas9 target sites. These sites are widely distributed among and within chromosomes with predominant abundance in the coding regions of genes. We developed a publicly-accessible Grape-CRISPR database for facilitating the use of the CRISPR/Cas9 system as a genome editing tool for functional studies and molecular breeding of grapes. Among other functions, the database allows users to identify and select multi-protospacers for editing similar sequences in grape genomes simultaneously.",2016-04-21 +22829726,"Actinobase: Database on molecular diversity, phylogeny and biocatalytic potential of salt tolerant alkaliphilic actinomycetes.","

Unlabelled

Actinobase is a relational database of molecular diversity, phylogeny and biocatalytic potential of haloalkaliphilic actinomycetes. The main objective of this data base is to provide easy access to range of information, data storage, comparison and analysis apart from reduced data redundancy, data entry, storage, retrieval costs and improve data security. Information related to habitat, cell morphology, Gram reaction, biochemical characterization and molecular features would allow researchers in understanding identification and stress adaptation of the existing and new candidates belonging to salt tolerant alkaliphilic actinomycetes. The PHP front end helps to add nucleotides and protein sequence of reported entries which directly help researchers to obtain the required details. Analysis of the genus wise status of the salt tolerant alkaliphilic actinomycetes indicated 6 different genera among the 40 classified entries of the salt tolerant alkaliphilic actinomycetes. The results represented wide spread occurrence of salt tolerant alkaliphilic actinomycetes belonging to diverse taxonomic positions. Entries and information related to actinomycetes in the database are publicly accessible at http://www.actinobase.in. On clustalW/X multiple sequence alignment of the alkaline protease gene sequences, different clusters emerged among the groups. The narrow search and limit options of the constructed database provided comparable information. The user friendly access to PHP front end facilitates would facilitate addition of sequences of reported entries.

Availability

The database is available for free at http://www.actinobase.in.",2012-06-16 +27792167,Predicting Protein-Protein Interaction Sites Using Sequence Descriptors and Site Propensity of Neighboring Amino Acids. ,"Information about the interface sites of Protein-Protein Interactions (PPIs) is useful for many biological research works. However, despite the advancement of experimental techniques, the identification of PPI sites still remains as a challenging task. Using a statistical learning technique, we proposed a computational tool for predicting PPI interaction sites. As an alternative to similar approaches requiring structural information, the proposed method takes all of the input from protein sequences. In addition to typical sequence features, our method takes into consideration that interaction sites are not randomly distributed over the protein sequence. We characterized this positional preference using protein complexes with known structures, proposed a numerical index to estimate the propensity and then incorporated the index into a learning system. The resulting predictor, without using structural information, yields an area under the ROC curve (AUC) of 0.675, recall of 0.597, precision of 0.311 and accuracy of 0.583 on a ten-fold cross-validation experiment. This performance is comparable to the previous approach in which structural information was used. Upon introducing the B-factor data to our predictor, we demonstrated that the AUC can be further improved to 0.750. The tool is accessible at http://bsaltools.ym.edu.tw/predppis.",2016-10-26 +27098042,g:Profiler-a web server for functional interpretation of gene lists (2016 update).,"Functional enrichment analysis is a key step in interpreting gene lists discovered in diverse high-throughput experiments. g:Profiler studies flat and ranked gene lists and finds statistically significant Gene Ontology terms, pathways and other gene function related terms. Translation of hundreds of gene identifiers is another core feature of g:Profiler. Since its first publication in 2007, our web server has become a popular tool of choice among basic and translational researchers. Timeliness is a major advantage of g:Profiler as genome and pathway information is synchronized with the Ensembl database in quarterly updates. g:Profiler supports 213 species including mammals and other vertebrates, plants, insects and fungi. The 2016 update of g:Profiler introduces several novel features. We have added further functional datasets to interpret gene lists, including transcription factor binding site predictions, Mendelian disease annotations, information about protein expression and complexes and gene mappings of human genetic polymorphisms. Besides the interactive web interface, g:Profiler can be accessed in computational pipelines using our R package, Python interface and BioJS component. g:Profiler is freely available at http://biit.cs.ut.ee/gprofiler/.",2016-04-20 +26072511,Robust reconstruction of gene expression profiles from reporter gene data using linear inversion.,"

Motivation

Time-series observations from reporter gene experiments are commonly used for inferring and analyzing dynamical models of regulatory networks. The robust estimation of promoter activities and protein concentrations from primary data is a difficult problem due to measurement noise and the indirect relation between the measurements and quantities of biological interest.

Results

We propose a general approach based on regularized linear inversion to solve a range of estimation problems in the analysis of reporter gene data, notably the inference of growth rate, promoter activity, and protein concentration profiles. We evaluate the validity of the approach using in silico simulation studies, and observe that the methods are more robust and less biased than indirect approaches usually encountered in the experimental literature based on smoothing and subsequent processing of the primary data. We apply the methods to the analysis of fluorescent reporter gene data acquired in kinetic experiments with Escherichia coli. The methods are capable of reliably reconstructing time-course profiles of growth rate, promoter activity and protein concentration from weak and noisy signals at low population volumes. Moreover, they capture critical features of those profiles, notably rapid changes in gene expression during growth transitions.

Availability and implementation

The methods described in this article are made available as a Python package (LGPL license) and also accessible through a web interface. For more information, see https://team.inria.fr/ibis/wellinverter.",2015-06-01 +27096425,A Web-Based Platform for Designing Vaccines against Existing and Emerging Strains of Mycobacterium tuberculosis.,"

Unlabelled

Development of an effective vaccine against drug-resistant Mycobacterium tuberculosis (Mtb) is crucial for saving millions of premature deaths every year due to tuberculosis. This paper describes a web portal developed for assisting researchers in designing vaccines against emerging Mtb strains using traditional and modern approaches. Firstly, we annotated 59 genomes of Mycobacterium species to understand similarity/dissimilarity between tuberculoid, non-tuberculoid and vaccine strains at genome level. Secondly, antigen-based vaccine candidates have been predicted in each Mtb strain. Thirdly, epitopes-based vaccine candidates were predicted/discovered in above antigen-based vaccine candidates that can stimulate all arms of immune system. Finally, a database of predicted vaccine candidates at epitopes as well at antigen level has been developed for above strains. In order to design vaccine against a newly sequenced genome of Mtb strain, server integrates three modules for identification of strain-, antigen-, epitope-specific vaccine candidates. We observed that 103,522 unique peptides (9mers) had the potential to induce an antibody response and/or promiscuous binder to MHC alleles and/or have the capability to stimulate T lymphocytes. In summary, this web-portal will be useful for researchers working on designing vaccines against Mtb including drug-resistant strains.

Availability

The database is available freely at http://crdd.osdd.net/raghava/mtbveb/.",2016-04-20 +27511937,A Retinol Isotope Dilution Equation Predicts Both Group and Individual Total Body Vitamin A Stores in Adults Based on Data from an Early Postdosing Blood Sample.,"

Background

Retinol isotope dilution (RID) is used to determine vitamin A total body stores (TBS) after an oral dose of a vitamin A stable isotope. The generally accepted prediction equation proposed by Olson's group in 1989 (Furr et al. Am J Clin Nutr 1989;49:713-6) includes factors related to dose absorption and retention, isotope equilibration in plasma compared with stores, catabolism during the mixing period, and the optimal time for measuring plasma isotope enrichment.

Objectives

The objectives were 1) to develop a modified RID equation and identify an earlier sampling time for predicting TBS and 2) to improve prediction in individuals as well as groups.

Methods

To develop a modified RID equation, we used results of model-based compartmental analysis [the Simulation, Analysis and Modeling software (WinSAAM version 3.0.8; http://www.WinSAAM.org)] of plasma [13C10]retinol kinetic data from 32 previously studied, healthy young adults of European ancestry who had moderate vitamin A intakes and who ingested 2.95 μmol [13C10]retinyl acetate.

Results

We examined the time dependence of factors in the prediction equation related to absorption/retention (Fa) and isotope equilibration (S) and determined that 4 or 5 d postdosing was the optimal sampling time. TBS calculated by the equation TBS = Fa x S x (1/SAp), where SAp is plasma retinol specific activity (fraction of dose/μmol), were highly correlated with model-predicted TBS (r = 0.95 and 0.96 for 4 and 5 d, respectively; P < 0.001); predictions for individuals were also highly correlated (Rs = 0.94 and 0.94; P < 0.001).

Conclusion

The equation TBS ≈ 0.5 × (1/SAp) accurately predicted vitamin A TBS in this group of 32 healthy young adults and its individual members with the use of data from 1 blood sample taken 4 d after isotope administration.",2016-08-10 +27095195,Pharmit: interactive exploration of chemical space.,"Pharmit (http://pharmit.csb.pitt.edu) provides an online, interactive environment for the virtual screening of large compound databases using pharmacophores, molecular shape and energy minimization. Users can import, create and edit virtual screening queries in an interactive browser-based interface. Queries are specified in terms of a pharmacophore, a spatial arrangement of the essential features of an interaction, and molecular shape. Search results can be further ranked and filtered using energy minimization. In addition to a number of pre-built databases of popular compound libraries, users may submit their own compound libraries for screening. Pharmit uses state-of-the-art sub-linear algorithms to provide interactive screening of millions of compounds. Queries typically take a few seconds to a few minutes depending on their complexity. This allows users to iteratively refine their search during a single session. The easy access to large chemical datasets provided by Pharmit simplifies and accelerates structure-based drug design. Pharmit is available under a dual BSD/GPL open-source license.",2016-04-19 +27153640,e23D: database and visualization of A-to-I RNA editing sites mapped to 3D protein structures.,"

Unlabelled

e23D, a database of A-to-I RNA editing sites from human, mouse and fly mapped to evolutionary related protein 3D structures, is presented. Genomic coordinates of A-to-I RNA editing sites are converted to protein coordinates and mapped onto 3D structures from PDB or theoretical models from ModBase. e23D allows visualization of the protein structure, modeling of recoding events and orientation of the editing with respect to nearby genomic functional sites from databases of disease causing mutations and genomic polymorphism.

Availability and implementation

http://www.sheba-cancer.org.il/e23D CONTACT: oz.solomon@live.biu.ac.il or Eran.Eyal@sheba.health.gov.il.",2016-04-19 +27091488,"Cooking Coal Use and All-Cause and Cause-Specific Mortality in a Prospective Cohort Study of Women in Shanghai, China.","

Background

Nearly 4.3 million deaths worldwide were attributable to exposure to household air pollution in 2012. However, household coal use remains widespread.

Objectives

We investigated the association of cooking coal and all-cause and cause-specific mortality in a prospective cohort of primarily never-smoking women in Shanghai, China.

Methods

A cohort of 74,941 women were followed from 1996 through 2009 with annual linkage to the Shanghai vital statistics database. Cause-specific mortality was identified through 2009. Use of household coal for cooking was assessed through a residential history questionnaire. Cox proportional hazards models estimated the risk of mortality associated with household coal use.

Results

In this cohort, 63% of the women ever used coal (n = 46,287). Compared with never coal use, ever use of coal was associated with mortality from all causes [hazard ratio (HR) = 1.12; 95% confidence interval (CI): 1.05, 1.21], cancer (HR = 1.14; 95% CI: 1.03, 1.27), and ischemic heart disease (overall HR = 1.61; 95% CI: 1.14, 2.27; HR for myocardial infarction specifically = 1.80; 95% CI: 1.16, 2.79). The risk of cardiovascular mortality increased with increasing duration of coal use, compared with the risk in never users. The association between coal use and ischemic heart disease mortality diminished with increasing years since cessation of coal use.

Conclusions

Evidence from this study suggests that past use of coal among women in Shanghai is associated with excess all-cause mortality, and from cardiovascular diseases in particular. The decreasing association with cardiovascular mortality as the time since last use of coal increased emphasizes the importance of reducing use of household coal where use is still widespread.

Citation

Kim C, Seow WJ, Shu XO, Bassig BA, Rothman N, Chen BE, Xiang YB, Hosgood HD III, Ji BT, Hu W, Wen C, Chow WH, Cai Q, Yang G, Gao YT, Zheng W, Lan Q. 2016. Cooking coal use and all-cause and cause-specific mortality in a prospective cohort study of women in Shanghai, China. Environ Health Perspect 124:1384-1389; http://dx.doi.org/10.1289/EHP236.",2016-04-19 +27090940,Prioritizing functional phosphorylation sites based on multiple feature integration.,"Protein phosphorylation is an important type of post-translational modification that is involved in a variety of biological activities. Most phosphorylation events occur on serine, threonine and tyrosine residues in eukaryotes. In recent years, many phosphorylation sites have been identified as a result of advances in mass-spectrometric techniques. However, a large percentage of phosphorylation sites may be non-functional. Systematically prioritizing functional sites from a large number of phosphorylation sites will be increasingly important for the study of their biological roles. This study focused on exploring the intrinsic features of functional phosphorylation sites to predict whether a phosphosite is likely to be functional. We found significant differences in the distribution of evolutionary conservation, kinase association, disorder score, and secondary structure between known functional and background phosphorylation datasets. We built four different types of classifiers based on the most representative features and found that their performances were similar. We also prioritized 213,837 human phosphorylation sites from a variety of phosphorylation databases, which will be helpful for subsequent functional studies. All predicted results are available for query and download on our website (Predict Functional Phosphosites, PFP, http://pfp.biosino.org/).",2016-04-19 +26007697,MSI.R scripts reveal volatile and semi-volatile features in low-temperature plasma mass spectrometry imaging (LTP-MSI) of chilli (Capsicum annuum).,"In cartography, the combination of colour and contour lines is used to express a three-dimensional landscape on a two-dimensional map. We transferred this concept to the analysis of mass spectrometry imaging (MSI) data and developed a collection of R scripts for the efficient evaluation of .imzML archives in a four-step strategy: (1) calculation of the density distribution of mass-to-charge ratio (m/z) signals in the .imzML file and assembling of a pseudo-master spectrum with peak list, (2) automated generation of mass images for a defined scan range and subsequent visual inspection, (3) visualisation of individual ion distributions and export of relevant .mzML spectra and (4) creation of overlay graphics of ion images and photographies. The use of a Hue-Chroma-Luminance (HCL) colour model in MSI graphics takes into account the human perception for colours and supports the correct evaluation of signal intensities. Further, readers with colour blindness are supported. Contour maps promote the visual recognition of patterns in MSI data, which is particularly useful for noisy data sets. We demonstrate the scalability of MSI.R scripts by running them on different systems: on a personal computer, on Amazon Web Services (AWS) instances and on an institutional cluster. By implementing a parallel computing strategy, the execution speed for .imzML data scanning with image generation could be improved by more than an order of magnitude. Applying our MSI.R scripts ( http://www.bioprocess.org/MSI.R ) to low-temperature plasma (LTP)-MSI data shows the localisation of volatile and semi-volatile compounds in the cross-cut of a chilli (Capsicum annuum) fruit. The subsequent identification of compounds by gas and liquid chromatography coupled to mass spectrometry (GC-MS, LC-MS) proves that LTP-MSI enables the direct measurement of volatile organic compound (VOC) distributions from biological tissues.",2015-05-26 +25957350,BinDNase: a discriminatory approach for transcription factor binding prediction using DNase I hypersensitivity data.,"

Motivation

Transcription factors (TFs) are a class of DNA-binding proteins that have a central role in regulating gene expression. To reveal mechanisms of transcriptional regulation, a number of computational tools have been proposed for predicting TF-DNA interaction sites. Recent studies have shown that genome-wide sequencing data on open chromatin sites from a DNase I hypersensitivity experiments (DNase-seq) has a great potential to map putative binding sites of all transcription factors in a single experiment. Thus, computational methods for analysing DNase-seq to accurately map TF-DNA interaction sites are highly needed.

Results

Here, we introduce a novel discriminative algorithm, BinDNase, for predicting TF-DNA interaction sites using DNase-seq data. BinDNase implements an efficient method for selecting and extracting informative features from DNase I signal for each TF, either at single nucleotide resolution or for larger regions. The method is applied to 57 transcription factors in cell line K562 and 31 transcription factors in cell line HepG2 using data from the ENCODE project. First, we show that BinDNase compares favourably to other supervised and unsupervised methods developed for TF-DNA interaction prediction using DNase-seq data. We demonstrate the importance to model each TF with a separate prediction model, reflecting TF-specific DNA accessibility around the TF-DNA interaction site. We also show that a highly standardised DNase-seq data (pre)processing is a requisite for accurate TF binding predictions and that sequencing depth has on average only a moderate effect on prediction accuracy. Finally, BinDNase's binding predictions generalise to other cell types, thus making BinDNase a versatile tool for accurate TF binding prediction.

Availability and implementation

R implementation of the algorithm is available in: http://research.ics.aalto.fi/csb/software/bindnase/.

Contact

juhani.kahara@aalto.fi

Supplementary information

Supplemental data are available at Bioinformatics online.",2015-05-07 +30014872,Italian pediatric nutrition survey.,"

Introduction

the prevalence of malnutrition in children and its impact on clinical outcomes is underrecognized by clinicians in Italy as well as worldwide. A novel definition of pediatric malnutrition has been recently proposed by a working group of the Academy of Nutrition and Dietetics and American Society for Parenteral and Enteral Nutrition (A.S.P.E.N.), based on the correlation between illness and the use of zscores of anthropometric measurements.

Aim

to investigate the prevalence of malnutrition and related nutritional support among hospitalized children in Italy, in a nationwide survey performed in a single day (16/4/2015).

Methods

an open access website (http://nday.biomedia.net) was used to collected data from 73 hospitals and 101 wards in 14 Italian regions (1994 patients). Anonymous information was collected on hospitals' characteristics, patient's anthropometry, admission diagnosis, presence of chronic diseases and use of nutritional support: oral nutritional supplements (ONS), enteral nutrition (EN) or parenteral nutrition (PN). Z-scores of anthropometric measurements, calculated with Epi Info 7.1.5, defined nutritional status: wasting was identified by BMI or Weight-for-Length z-score (<-1 mild, <-2 moderate, <-3 severe), stunting by Height-for-Age Z-score <-2. WHO 2006 and CDC 2000 growth charts were used respectively for children younger and older than 2 years old.

Results

1790 complete records were obtained for hospitalized patients aged 0-20 years, with median age 6.16 (0.1-20 years and 53.3% males). 52.9% were aged 0-6 years and 58.8% of children suffered from chronic diseases. Wasting was detected in 28.7% of the total sample with higher occurrence observed in age ranges 0-6 and 14-20 years, while 17.3% of patients showed stunting; surprisingly almost 27% of them were aged 0-2. A ranking of the admission diagnosis with the highest rate of malnutrition was complied. The prevalence of wasting was significantly (p < 0.005) higher amongst children with chronic diseases (34.1% vs. 27.1%); stunting prevalence tripled in patients with chronic disease (24.5% vs. 8.3%). Only 23.5% of malnourished children (17%, 25.6% and 36.7%, respectively mild, moderate and severe malnutrition) received nutritional support: 11.7% received oral nutrition supplements (ONS, modular or complete), 11.5% enteral nutrition (EN, 6.4% via nasogastric tube, 5.1% via gastrostomy) and 6.8 % received parenteral nutrition (PN); in some patients a combination of two. Nutritional support is more commonly used among stunting patients, 39.5% of children under treatment.

Conclusion

Malnutrition of any grade was observed in nearly 1/3 and stunting in 17% of the reported hospitalized children, and it is likely to be underrecognized as the nutritional support reached only a small part of the malnourished children.",2017-08-21 +22928022,Transcriptome analysis of the silkworm (Bombyx mori) by high-throughput RNA sequencing.,"The domestic silkworm, Bombyx mori, is a model insect with important economic value for silk production that also acts as a bioreactor for biomaterial production. The functional complexity of the silkworm transcriptome has not yet been fully elucidated, although genomic sequencing and other tools have been widely used in its study. We explored the transcriptome of silkworm at different developmental stages using high-throughput paired-end RNA sequencing. A total of about 3.3 gigabases (Gb) of sequence was obtained, representing about a 7-fold coverage of the B. mori genome. From the reads that were mapped to the genome sequence; 23,461 transcripts were obtained, 5,428 of them were novel. Of the 14,623 predicted protein-coding genes in the silkworm genome database, 11,884 of them were found to be expressed in the silkworm transcriptome, giving a coverage of 81.3%. A total of 13,195 new exons were detected, of which, 5,911 were found in the annotated genes in the Silkworm Genome Database (SilkDB). An analysis of alternative splicing in the transcriptome revealed that 3,247 genes had undergone alternative splicing. To help with the data analysis, a transcriptome database that integrates our transcriptome data with the silkworm genome data was constructed and is publicly available at http://124.17.27.136/gbrowse2/. To our knowledge, this is the first study to elucidate the silkworm transcriptome using high-throughput RNA sequencing technology. Our data indicate that the transcriptome of silkworm is much more complex than previously anticipated. This work provides tools and resources for the identification of new functional elements and paves the way for future functional genomics studies.",2012-08-23 +23894279,SVM-based prediction of propeptide cleavage sites in spider toxins identifies toxin innovation in an Australian tarantula.,"Spider neurotoxins are commonly used as pharmacological tools and are a popular source of novel compounds with therapeutic and agrochemical potential. Since venom peptides are inherently toxic, the host spider must employ strategies to avoid adverse effects prior to venom use. It is partly for this reason that most spider toxins encode a protective proregion that upon enzymatic cleavage is excised from the mature peptide. In order to identify the mature toxin sequence directly from toxin transcripts, without resorting to protein sequencing, the propeptide cleavage site in the toxin precursor must be predicted bioinformatically. We evaluated different machine learning strategies (support vector machines, hidden Markov model and decision tree) and developed an algorithm (SpiderP) for prediction of propeptide cleavage sites in spider toxins. Our strategy uses a support vector machine (SVM) framework that combines both local and global sequence information. Our method is superior or comparable to current tools for prediction of propeptide sequences in spider toxins. Evaluation of the SVM method on an independent test set of known toxin sequences yielded 96% sensitivity and 100% specificity. Furthermore, we sequenced five novel peptides (not used to train the final predictor) from the venom of the Australian tarantula Selenotypus plumipes to test the accuracy of the predictor and found 80% sensitivity and 99.6% 8-mer specificity. Finally, we used the predictor together with homology information to predict and characterize seven groups of novel toxins from the deeply sequenced venom gland transcriptome of S. plumipes, which revealed structural complexity and innovations in the evolution of the toxins. The precursor prediction tool (SpiderP) is freely available on ArachnoServer (http://www.arachnoserver.org/spiderP.html), a web portal to a comprehensive relational database of spider toxins. All training data, test data, and scripts used are available from the SpiderP website.",2013-07-22 +24311564,RepeatsDB: a database of tandem repeat protein structures.,"RepeatsDB (http://repeatsdb.bio.unipd.it/) is a database of annotated tandem repeat protein structures. Tandem repeats pose a difficult problem for the analysis of protein structures, as the underlying sequence can be highly degenerate. Several repeat types haven been studied over the years, but their annotation was done in a case-by-case basis, thus making large-scale analysis difficult. We developed RepeatsDB to fill this gap. Using state-of-the-art repeat detection methods and manual curation, we systematically annotated the Protein Data Bank, predicting 10,745 repeat structures. In all, 2797 structures were classified according to a recently proposed classification schema, which was expanded to accommodate new findings. In addition, detailed annotations were performed in a subset of 321 proteins. These annotations feature information on start and end positions for the repeat regions and units. RepeatsDB is an ongoing effort to systematically classify and annotate structural protein repeats in a consistent way. It provides users with the possibility to access and download high-quality datasets either interactively or programmatically through web services.",2013-12-05 +26508754,Omokage search: shape similarity search service for biomolecular structures in both the PDB and EMDB.,"

Unlabelled

Omokage search is a service to search the global shape similarity of biological macromolecules and their assemblies, in both the Protein Data Bank (PDB) and Electron Microscopy Data Bank (EMDB). The server compares global shapes of assemblies independent of sequence order and number of subunits. As a search query, the user inputs a structure ID (PDB ID or EMDB ID) or uploads an atomic model or 3D density map to the server. The search is performed usually within 1 min, using one-dimensional profiles (incremental distance rank profiles) to characterize the shapes. Using the gmfit (Gaussian mixture model fitting) program, the found structures are fitted onto the query structure and their superimposed structures are displayed on the Web browser. Our service provides new structural perspectives to life science researchers.

Availability and implementation

Omokage search is freely accessible at http://pdbj.org/omokage/.",2015-10-27 +22839745,Laparoscopic versus open catheter placement in peritoneal dialysis patients: a systematic review and meta-analysis.,"

Background

Peritoneal dialysis has been proven to be a safe and effective mode of renal replacement therapy for patients with end-stage renal disease. The usage of laparoscopic catheter placement technique was increased in recent years. But the advantages and disadvantages between the laparoscopic catheter placement technique and open laparotomy technique were still http://in controversy. The objective of this study is to access the operation-related data and complications of catheter placement for peritoneal dialysis (PD) patients, Then to determine the better method for catheter insertion.

Methods

We performed a systematic review and meta-analysis on published studies identified by the databases PubMed, EMBASE, Highwire, and the Cochrane Library. Analysis was performed using the statistical software Review Manager Version 5.0.

Results

We assessed the operation-related data and complications of four randomized controlled trials (RCTs) and ten observational studies. The available data showed that laparoscope prolonged the time for catheter insertion in PD patients, however, the two groups did not significantly differ in hospital stays, early and late complications, including infection, dialysate leaks, catheter migration, pericannular bleeding, blockage and hernia.

Conclusions

The data showed that Laparoscopic catheter placement had no superiority to open surgery. However, this treatment still needs to be confirmed in a large, multi-center, well-designed RCT.",2012-07-27 +27197815,PERMANOVA-S: association test for microbial community composition that accommodates confounders and multiple distances.,"

Motivation

Recent advances in sequencing technology have made it possible to obtain high-throughput data on the composition of microbial communities and to study the effects of dysbiosis on the human host. Analysis of pairwise intersample distances quantifies the association between the microbiome diversity and covariates of interest (e.g. environmental factors, clinical outcomes, treatment groups). In the design of these analyses, multiple choices for distance metrics are available. Most distance-based methods, however, use a single distance and are underpowered if the distance is poorly chosen. In addition, distance-based tests cannot flexibly handle confounding variables, which can result in excessive false-positive findings.

Results

We derive presence-weighted UniFrac to complement the existing UniFrac distances for more powerful detection of the variation in species richness. We develop PERMANOVA-S, a new distance-based method that tests the association of microbiome composition with any covariates of interest. PERMANOVA-S improves the commonly-used Permutation Multivariate Analysis of Variance (PERMANOVA) test by allowing flexible confounder adjustments and ensembling multiple distances. We conducted extensive simulation studies to evaluate the performance of different distances under various patterns of association. Our simulation studies demonstrate that the power of the test relies on how well the selected distance captures the nature of the association. The PERMANOVA-S unified test combines multiple distances and achieves good power regardless of the patterns of the underlying association. We demonstrate the usefulness of our approach by reanalyzing several real microbiome datasets.

Availability and implementation

miProfile software is freely available at https://medschool.vanderbilt.edu/tang-lab/software/miProfile

Contact

z.tang@vanderbilt.edu or g.chen@vanderbilt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-19 +26401542,What is the effect of reduced street lighting on crime and road traffic injuries at night? A mixed-methods study,"

Background

Some local authorities have reduced street lighting at night to save energy, but little is known about impacts on public health or about public concerns about impacts on well-being.

Aim

To evaluate the effect of reduced street lighting on crime and road traffic injuries.

Design

A mixed-methods study comprising a rapid appraisal, a controlled interrupted time series analysis and a cost–benefit analysis (CBA).

Setting

England and Wales.

Target population

Residents and workers in eight case study areas; road traffic casualties and victims of crime.

Interventions evaluated

Switch-off (i.e. lights permanently turned off), part-night lighting (e.g. lights switched off between 12 a.m. and 6 a.m.), dimming lights and white lights/light-emitting diodes (LEDs).

Outcomes

Public views about implications on well-being; road traffic injury data (STATS19: http://data.gov.uk/dataset/road-accidents-safety-data) obtained for the period 2000–13; crime data (Police.uk: data.police.uk) obtained for the period December 2010–December 2013. Detailed crime data were obtained from one police force for a methodological study of the spatial level at which Police.uk data are valid for analysis.

Statistical methods

Road traffic collisions were analysed at street segment level. Regression models were used to estimate changes in daytime and night-time collision rates associated with lighting interventions. The ratio of night-time and daytime changes was considered the best estimate of change in night-time collisions following each lighting intervention. Police.uk crime data were found to be reliable when analysed at middle super output area (MSOA) level. For crime, the analysis used the proportion of total km of road in each MSOA with each lighting intervention. Regression models controlled for yearly and monthly trends and were fitted in each geographical region and police force. Effect estimates were pooled in random-effects meta-analyses.

Results

Public concerns centred on personal security, road safety, crime, fear of crime, sleep quality and being able to see the night sky. Street lighting reductions went largely unnoticed or had only marginal impacts on well-being, but for a minority of people switch-off and part-night lighting elicited concerns about fear of the dark, modernity and local governance. Street lighting data were obtained from 62 local authorities. There was no evidence that reduced street lighting was associated with road traffic collisions at night. There was significant heterogeneity in the estimated effects on crime at police force level. Overall, there was no evidence that reduced street lighting was associated with crime. There was weak evidence for a reduction in crime associated with dimming [rate ratio (RR) 0.84, 95% confidence interval (CI) 0.70 to 1.02] and white light (RR 0.89, 95% CI 0.77 to 1.03). The CBA suggests that part-night lighting may represent a net benefit to local authorities.

Limitations

The study did not account for the impacts of other safety/crime prevention initiatives (e.g. improved road markings; closed-circuit television), and so associations may be partly attributable to these initiatives. The CBA was unable to include potentially important impacts such as fear of crime and reduced mobility.

Conclusion

This study found little evidence of harmful effects of switch-off, part-night lighting, dimming or changes to white light/LEDs on levels of road traffic collisions or crime in England and Wales. However, the public were also concerned about other health outcomes. Research is needed to understand how lighting affects opportunities for crime prevention and how these vary by context. Research is needed also on other public health impacts of light at night.

Funding

The National Institute for Health Research Public Health Research programme.",2015-09-25 +21423752,"HMMerThread: detecting remote, functional conserved domains in entire genomes by combining relaxed sequence-database searches with fold recognition.","Conserved domains in proteins are one of the major sources of functional information for experimental design and genome-level annotation. Though search tools for conserved domain databases such as Hidden Markov Models (HMMs) are sensitive in detecting conserved domains in proteins when they share sufficient sequence similarity, they tend to miss more divergent family members, as they lack a reliable statistical framework for the detection of low sequence similarity. We have developed a greatly improved HMMerThread algorithm that can detect remotely conserved domains in highly divergent sequences. HMMerThread combines relaxed conserved domain searches with fold recognition to eliminate false positive, sequence-based identifications. With an accuracy of 90%, our software is able to automatically predict highly divergent members of conserved domain families with an associated 3-dimensional structure. We give additional confidence to our predictions by validation across species. We have run HMMerThread searches on eight proteomes including human and present a rich resource of remotely conserved domains, which adds significantly to the functional annotation of entire proteomes. We find ∼4500 cross-species validated, remotely conserved domain predictions in the human proteome alone. As an example, we find a DNA-binding domain in the C-terminal part of the A-kinase anchor protein 10 (AKAP10), a PKA adaptor that has been implicated in cardiac arrhythmias and premature cardiac death, which upon stress likely translocates from mitochondria to the nucleus/nucleolus. Based on our prediction, we propose that with this HLH-domain, AKAP10 is involved in the transcriptional control of stress response. Further remotely conserved domains we discuss are examples from areas such as sporulation, chromosome segregation and signalling during immune response. The HMMerThread algorithm is able to automatically detect the presence of remotely conserved domains in proteins based on weak sequence similarity. Our predictions open up new avenues for biological and medical studies. Genome-wide HMMerThread domains are available at http://vm1-hmmerthread.age.mpg.de.",2011-03-10 +25908942,HLAreporter: a tool for HLA typing from next generation sequencing data.,"Human leukocyte antigen (HLA) typing from next generation sequencing (NGS) data has the potential for widespread applications. Here we introduce a novel tool (HLAreporter) for HLA typing from NGS data based on read-mapping using a comprehensive reference panel containing all known HLA alleles, followed by de novo assembly of the gene-specific short reads. Accurate HLA typing at high-digit resolution was achieved when it was tested on publicly available NGS data, outperforming other newly developed tools such as HLAminer and PHLAT. HLAreporter can be downloaded from http://paed.hku.hk/genome/.",2015-03-16 +26087747,Installing a Local Copy of the Reactome Web Site and Knowledgebase.,"The Reactome project builds, maintains, and publishes a knowledgebase of biological pathways. The information in the knowledgebase is gathered from the experts in the field, peer reviewed and edited by Reactome editorial staff, and then published to the Reactome Web site, http://www.reactome.org. The Reactome software is open source and builds on top of other open-source or freely available software. Reactome data and code can be freely downloaded in its entirety and the Web site installed locally. This allows for more flexible interrogation of the data and also makes it possible to add one's own information to the knowledgebase.",2015-06-19 +27899649,Protein Ontology (PRO): enhancing and scaling up the representation of protein entities.,"The Protein Ontology (PRO; http://purl.obolibrary.org/obo/pr) formally defines and describes taxon-specific and taxon-neutral protein-related entities in three major areas: proteins related by evolution; proteins produced from a given gene; and protein-containing complexes. PRO thus serves as a tool for referencing protein entities at any level of specificity. To enhance this ability, and to facilitate the comparison of such entities described in different resources, we developed a standardized representation of proteoforms using UniProtKB as a sequence reference and PSI-MOD as a post-translational modification reference. We illustrate its use in facilitating an alignment between PRO and Reactome protein entities. We also address issues of scalability, describing our first steps into the use of text mining to identify protein-related entities, the large-scale import of proteoform information from expert curated resources, and our ability to dynamically generate PRO terms. Web views for individual terms are now more informative about closely-related terms, including for example an interactive multiple sequence alignment. Finally, we describe recent improvement in semantic utility, with PRO now represented in OWL and as a SPARQL endpoint. These developments will further support the anticipated growth of PRO and facilitate discoverability of and allow aggregation of data relating to protein entities.",2016-11-28 +26452124,Sampling and counting genome rearrangement scenarios.,"

Background

Even for moderate size inputs, there are a tremendous number of optimal rearrangement scenarios, regardless what the model is and which specific question is to be answered. Therefore giving one optimal solution might be misleading and cannot be used for statistical inferring. Statistically well funded methods are necessary to sample uniformly from the solution space and then a small number of samples are sufficient for statistical inferring.

Contribution

In this paper, we give a mini-review about the state-of-the-art of sampling and counting rearrangement scenarios, focusing on the reversal, DCJ and SCJ models. Above that, we also give a Gibbs sampler for sampling most parsimonious labeling of evolutionary trees under the SCJ model. The method has been implemented and tested on real life data. The software package together with example data can be downloaded from http://www.renyi.hu/~miklosi/SCJ-Gibbs/.",2015-10-02 +28335568,An Electricity Price-Aware Open-Source Smart Socket for the Internet of Energy. ,"The Internet of Energy (IoE) represents a novel paradigm where electrical power systems work cooperatively with smart devices to increase the visibility of energy consumption and create safer, cleaner and sustainable energy systems. The implementation of IoE services involves the use of multiple components, like embedded systems, power electronics or sensors, which are an essential part of the infrastructure dedicated to the generation and distribution energy and the one required by the final consumer. This article focuses on the latter and presents a smart socket system that collects the information about energy price and makes use of sensors and actuators to optimize home energy consumption according to the user preferences. Specifically, this article provides three main novel contributions. First, what to our knowledge is the first hardware prototype that manages in a practical real-world scenario the price values obtained from a public electricity operator is presented. The second contribution is related to the definition of a novel wireless sensor network communications protocol based on Wi-Fi that allows for creating an easy-to-deploy smart plug system that self-organizes and auto-configures to collect the sensed data, minimizing user intervention. Third, it is provided a thorough description of the design of one of the few open-source smart plug systems, including its communications architecture, the protocols implemented, the main sensing and actuation components and the most relevant pieces of the software. Moreover, with the aim of illustrating the capabilities of the smart plug system, the results of different experiments performed are shown. Such experiments evaluate in real-world scenarios the system's ease of use, its communications range and its performance when using HTTPS. Finally, the economic savings are estimated for different appliances, concluding that, in the practical situation proposed, the smart plug system allows certain energy-demanding appliances to save almost €70 per year.",2017-03-21 +28241391,Princeton_TIGRESS 2.0: High refinement consistency and net gains through support vector machines and molecular dynamics in double-blind predictions during the CASP11 experiment.,"Protein structure refinement is the challenging problem of operating on any protein structure prediction to improve its accuracy with respect to the native structure in a blind fashion. Although many approaches have been developed and tested during the last four CASP experiments, a majority of the methods continue to degrade models rather than improve them. Princeton_TIGRESS (Khoury et al., Proteins 2014;82:794-814) was developed previously and utilizes separate sampling and selection stages involving Monte Carlo and molecular dynamics simulations and classification using an SVM predictor. The initial implementation was shown to consistently refine protein structures 76% of the time in our own internal benchmarking on CASP 7-10 targets. In this work, we improved the sampling and selection stages and tested the method in blind predictions during CASP11. We added a decomposition of physics-based and hybrid energy functions, as well as a coordinate-free representation of the protein structure through distance-binning Cα-Cα distances to capture fine-grained movements. We performed parameter estimation to optimize the adjustable SVM parameters to maximize precision while balancing sensitivity and specificity across all cross-validated data sets, finding enrichment in our ability to select models from the populations of similar decoys generated for targets in CASPs 7-10. The MD stage was enhanced such that larger structures could be further refined. Among refinement methods that are currently implemented as web-servers, Princeton_TIGRESS 2.0 demonstrated the most consistent and most substantial net refinement in blind predictions during CASP11. The enhanced refinement protocol Princeton_TIGRESS 2.0 is freely available as a web server at http://atlas.engr.tamu.edu/refinement/. Proteins 2017; 85:1078-1098. © 2017 Wiley Periodicals, Inc.",2017-03-21 +26401099,BioImg.org: A Catalog of Virtual Machine Images for the Life Sciences.,"Virtualization is becoming increasingly important in bioscience, enabling assembly and provisioning of complete computer setups, including operating system, data, software, and services packaged as virtual machine images (VMIs). We present an open catalog of VMIs for the life sciences, where scientists can share information about images and optionally upload them to a server equipped with a large file system and fast Internet connection. Other scientists can then search for and download images that can be run on the local computer or in a cloud computing environment, providing easy access to bioinformatics environments. We also describe applications where VMIs aid life science research, including distributing tools and data, supporting reproducible analysis, and facilitating education. BioImg.org is freely available at: https://bioimg.org.",2015-09-10 +27084944,MBROLE 2.0-functional enrichment of chemical compounds.,"Metabolites Biological Role (MBROLE) is a server that performs functional enrichment analysis of a list of chemical compounds derived from a metabolomics experiment, which allows this list to be interpreted in biological terms. Since its release in 2011, MBROLE has been used by different groups worldwide to analyse metabolomics experiments from a variety of organisms. Here we present the latest version of the system, MBROLE2, accessible at http://csbg.cnb.csic.es/mbrole2 MBROLE2 has been supplemented with 10 databases not available in the previous version, which allow analysis over a larger, richer set of vocabularies including metabolite-protein and drug-protein interactions. This new version performs automatic conversion of compound identifiers from different databases, thus simplifying usage. In addition, the user interface has been redesigned to generate an interactive, more intuitive representation of the results.",2016-04-15 +27084939,RBscore&NBench: a high-level web server for nucleic acid binding residues prediction with a large-scale benchmarking database.,"RBscore&NBench combines a web server, RBscore and a database, NBench. RBscore predicts RNA-/DNA-binding residues in proteins and visualizes the prediction scores and features on protein structures. The scoring scheme of RBscore directly links feature values to nucleic acid binding probabilities and illustrates the nucleic acid binding energy funnel on the protein surface. To avoid dataset, binding site definition and assessment metric biases, we compared RBscore with 18 web servers and 3 stand-alone programs on 41 datasets, which demonstrated the high and stable accuracy of RBscore. A comprehensive comparison led us to develop a benchmark database named NBench. The web server is available on: http://ahsoka.u-strasbg.fr/rbscorenbench/.",2016-04-15 +27092486,Profilings of MicroRNAs in the Liver of Common Carp (Cyprinus carpio) Infected with Flavobacterium columnare.,"MicroRNAs (miRNAs) play important roles in regulation of many biological processes in eukaryotes, including pathogen infection and host interactions. Flavobacterium columnare (FC) infection can cause great economic loss of common carp (Cyprinus carpio) which is one of the most important cultured fish in the world. However, miRNAs in response to FC infection in common carp has not been characterized. To identify specific miRNAs involved in common carp infected with FC, we performed microRNA sequencing using livers of common carp infected with and without FC. A total of 698 miRNAs were identified, including 142 which were identified and deposited in the miRbase database (Available online: http://www.mirbase.org/) and 556 had only predicted miRNAs. Among the deposited miRNAs, eight miRNAs were first identified in common carp. Thirty of the 698 miRNAs were differentially expressed miRNAs (DIE-miRNAs) between the FC infected and control samples. From the DIE-miRNAs, seven were selected randomly and their expression profiles were confirmed to be consistent with the microRNA sequencing results using RT-PCR and qRT-PCR. In addition, a total of 27,363 target genes of the 30 DIE-miRNAs were predicted. The target genes were enriched in five Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways, including focal adhesion, extracellular matrix (ECM)-receptor interaction, erythroblastic leukemia viral oncogene homolog (ErbB) signaling pathway, regulation of actin cytoskeleton, and adherent junction. The miRNA expression profile of the liver of common carp infected with FC will pave the way for the development of effective strategies to fight against FC infection.",2016-04-15 +22789590,CBrowse: a SAM/BAM-based contig browser for transcriptome assembly visualization and analysis.,"

Summary

To address the impending need for exploring rapidly increased transcriptomics data generated for non-model organisms, we developed CBrowse, an AJAX-based web browser for visualizing and analyzing transcriptome assemblies and contigs. Designed in a standard three-tier architecture with a data pre-processing pipeline, CBrowse is essentially a Rich Internet Application that offers many seamlessly integrated web interfaces and allows users to navigate, sort, filter, search and visualize data smoothly. The pre-processing pipeline takes the contig sequence file in FASTA format and its relevant SAM/BAM file as the input; detects putative polymorphisms, simple sequence repeats and sequencing errors in contigs and generates image, JSON and database-compatible CSV text files that are directly utilized by different web interfaces. CBowse is a generic visualization and analysis tool that facilitates close examination of assembly quality, genetic polymorphisms, sequence repeats and/or sequencing errors in transcriptome sequencing projects.

Availability

CBrowse is distributed under the GNU General Public License, available at http://bioinfolab.muohio.edu/CBrowse/

Contact

liangc@muohio.edu or liangc.mu@gmail.com; glji@xmu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-12 +28488169,Cerebral oxygen desaturation occurs frequently in patients with hypertension undergoing major abdominal surgery.,"Hypertensive patients are more likely to experience latent cerebral ischemia causing regional cerebral oxygen saturation (rSO2) decrease during general anesthesia. The aim of this prospective observational study was to assess the incidence of decreased rSO2 in hypertensive patients undergoing major abdominal surgery and the perioperative factors affecting this change in rSO2. A total of 41 hypertensive patients were enrolled and stratified according to their hypertension as controlled and uncontrolled. The intraoperative rSO2 and physiological data were routinely collected. The Mini-Mental State Exam (MMSE) was used to test cognitive function before surgery and after 4 days. Cerebral desaturation was defined as a decrease in rSO2 of more than 20% of the baseline value. There were 20 patients (49%) suffering intraoperative cerebral desaturation classified into cerebral desaturation group (group D) and those 21 without intraoperative desaturation classified into normal group (group N). The area under the curve below 90 and 80% of baseline (AUCrSO2 <90% of baseline and AUCrSO2 <80% of baseline) was lower in patients of group N (2752.4 ± 1453.3 min% and 0.0 min%) than in patients of group D (6264.9 ± 1832.3 min% and 4486.5 ± 1664.9 min%, P < 0.001). Comparing the two groups, the number of uncontrolled hypertensive individuals in group D (12/20) was significantly more than group N (4/21) (P = 0.007). A significant correlation was observed between relative decrease in MAP and relative decrease in rSO2 (r2 = 0.495, P < 0.001). Moreover, nine patients (45%) in group D occurred early postoperative cognitive function decline were more than three patients (14.3%) in group N (P = 0.031). This pilot study showed a large proportion of hypertensive patient experienced cerebral desaturation during major abdominal surgery and uncontrolled hypertension predisposed to this desaturation. NCT02147275 (registered at http://www.clinicaltrials.gov ).",2017-05-09 +26226130,Node sampling for protein complex estimation in bait-prey graphs.,"In cellular biology, node-and-edge graph or ""network"" data collection often uses bait-prey technologies such as co-immunoprecipitation (CoIP). Bait-prey technologies assay relationships or ""interactions"" between protein pairs, with CoIP specifically measuring protein complex co-membership. Analyses of CoIP data frequently focus on estimating protein complex membership. Due to budgetary and other constraints, exhaustive assay of the entire network using CoIP is not always possible. We describe a stratified sampling scheme to select baits for CoIP experiments when protein complex estimation is the main goal. Expanding upon the classic framework in which nodes represent proteins and edges represent pairwise interactions, we define generalized nodes as sets of adjacent nodes with identical adjacency outside the set and use these as strata from which to select the next set of baits. Strata are redefined at each round of sampling to incorporate accumulating data. This scheme maintains user-specified quality thresholds for protein complex estimates and, relative to simple random sampling, leads to a marked increase in the number of correctly estimated complexes at each round of sampling. The R package seqSample contains all source code and is available at http://vault.northwestern.edu/~dms877/Rpacks/.",2015-08-01 +28345145,Air pollution and short-term clinical outcomes of patients with acute myocardial infarction.,"Ambient air pollution is well-known to be a serious risk factor for cardiovascular diseases, stroke, and death. However, the association between air pollutants (AP) exposure and short-term clinical outcomes in acute myocardial infarction (AMI) patients (pts) has not been elucidated well. In the present study, 37 880 AMI pts were enrolled from October 2005 to December 2013 in a nationwide large-scale, prospective, multicentre Korea AMI registry (KAMIR registry; http://www.kamir.or.kr). We obtained data on AP (e.g., NO2 , SO2 , CO, O3 and PM10 ) from the Korean National Institute of Environmental Research (NIER; http://www.nier.go.kr). Clinical endpoints included death, recurrent myocardial infarction (Re-MI), any revascularization and composite of all-cause death and Re-MI. Exposure to AP is defined as the average exposure to AP within 24 hours before AMI admission. We observed that a 0.01 part per million (ppm) increase in NO2 concentration, 0.001 ppm increase in SO2 concentration, and 0.1 ppm increase in CO concentration each increased the risk of total death by 9.7% (95% CI, 6.2%-13.4%), 1.9% (95% CI, 0.3%-3.6%), and 2.1% (95% CI, 0.5%-3.9%), respectively. Exceptionally, O3 decreased the risk of total death by 0.6% (95% CI -0.2% to -1.0%) per 0.01 ppm increase. PM10 was not related to any cardiovascular events. AP were each stratified into five quintiles according to ranges of AP levels. After adjusting analysis for risk variables, only high quintiles (Q4, Q5) of NO2 were positively associated with total death, cardiac death and MI, while SO2 , CO, O3 and PM10 were shown to be not related to any cardiovascular events at all levels. In AMI patients, each AP and its concentration has shown a different effect to short-term mortality and cardiovascular events.",2017-06-01 +25725494,GeLL: a generalized likelihood library for phylogenetic models.,"

Unlabelled

Phylogenetic models are an important tool in molecular evolution allowing us to study the pattern and rate of sequence change. The recent influx of new sequence data in the biosciences means that to address evolutionary questions, we need a means for rapid and easy model development and implementation. Here we present GeLL, a Java library that lets users use text to quickly and efficiently define novel forms of discrete data and create new substitution models that describe how those data change on a phylogeny. GeLL allows users to define general substitution models and data structures in a way that is not possible in other existing libraries, including mixture models and non-reversible models. Classes are provided for calculating likelihoods, optimizing model parameters and branch lengths, ancestral reconstruction and sequence simulation.

Availability and implementation

http://phylo.bio.ku.edu/GeLL under a GPL v3 license.",2015-02-27 +23203874,The cell: an image library-CCDB: a curated repository of microscopy data.,"The cell: an image library-CCDB (CIL-CCDB) (http://www.cellimagelibrary.org) is a searchable database and archive of cellular images. As a repository for microscopy data, it accepts all forms of cell imaging from light and electron microscopy, including multi-dimensional images, Z- and time stacks in a broad variety of raw-data formats, as well as movies and animations. The software design of CIL-CCDB was intentionally designed to allow easy incorporation of new technologies and image formats as they are developed. Currently, CIL-CCDB contains over 9250 images from 358 different species. Images are evaluated for quality and annotated with terms from 14 different ontologies in 16 different fields as well as a basic description and technical details. Since its public launch on 9 August 2010, it has been designed to serve as not only an archive but also an active site for researchers and educators.",2012-11-29 +27071849,Fast and sensitive taxonomic classification for metagenomics with Kaiju.,"Metagenomics emerged as an important field of research not only in microbial ecology but also for human health and disease, and metagenomic studies are performed on increasingly larger scales. While recent taxonomic classification programs achieve high speed by comparing genomic k-mers, they often lack sensitivity for overcoming evolutionary divergence, so that large fractions of the metagenomic reads remain unclassified. Here we present the novel metagenome classifier Kaiju, which finds maximum (in-)exact matches on the protein-level using the Burrows-Wheeler transform. We show in a genome exclusion benchmark that Kaiju classifies reads with higher sensitivity and similar precision compared with current k-mer-based classifiers, especially in genera that are underrepresented in reference databases. We also demonstrate that Kaiju classifies up to 10 times more reads in real metagenomes. Kaiju can process millions of reads per minute and can run on a standard PC. Source code and web server are available at http://kaiju.binf.ku.dk.",2016-04-13 +27085416,A profile of sphingolipids and related compounds tentatively identified in yak milk.,"This work characterized a fraction of constituents in yak milk within the realm of approximately 1,000 to 3,000 Da using matrix-assisted laser desorption/ionization (MALDI) time-of-flight mass spectrometry. Eleven samples of yak milk powder from the Sichuan province of China were received by the Department of Food Science, University of Wisconsin-Madison, and stored at room temperature until analysis. Sample preparation involved delipidation and deproteinization of yak milk samples and cold ethanol precipitation. Subsequently, MALDI time-of-flight mass spectrometry was performed in positive ion, reflector mode (AB Sciex TOF/TOF 4800 MALDI; AB Sciex, Foster City, CA). The instrument was first calibrated with the manufacturer's 6-peptide mixture, and each spectrum was internally calibrated using the accurate mass of ACTH Fragment 18-39 standard peptide (protonated mass at m/z 2464.199) present in each sample. Laser power was adjusted for the calibration standards and for each sample so that the signal obtained for the most-abundant ion in each spectrum could be maximized, or kept below ~2×10(4) to preserve spectral quality. Structure and name based on mass were matched using the Metlin metabolite database (https://metlin.scripps.edu/index.php). Results of the current work for yak milk powder showed a large variety of sphingolipid structures with clusters around 1,200, 1,600, and 2,000 Da. The profiling matched several glycosphingolipids, such as gangliosides GA1, GD1a, GD1b, GD3, GM1, GM2, GM3, and GT2 and several other unique moieties, including deaminated neuraminic acid (KDN) oligosaccharides, and fucose containing gangliosides. Matrix preparation and MALDI time-of-flight parameters were important factors established in this work to allow high resolution profiling of complex sphingolipids in yak powder milk.",2016-04-13 +26329719,CompGO: an R package for comparing and visualizing Gene Ontology enrichment differences between DNA binding experiments.,"

Background

Gene ontology (GO) enrichment is commonly used for inferring biological meaning from systems biology experiments. However, determining differential GO and pathway enrichment between DNA-binding experiments or using the GO structure to classify experiments has received little attention.

Results

Herein, we present a bioinformatics tool, CompGO, for identifying Differentially Enriched Gene Ontologies, called DiEGOs, and pathways, through the use of a z-score derivation of log odds ratios, and visualizing these differences at GO and pathway level. Through public experimental data focused on the cardiac transcription factor NKX2-5, we illustrate the problems associated with comparing GO enrichments between experiments using a simple overlap approach.

Conclusions

We have developed an R/Bioconductor package, CompGO, which implements a new statistic normally used in epidemiological studies for performing comparative GO analyses and visualizing comparisons from . BED data containing genomic coordinates as well as gene lists as inputs. We justify the statistic through inclusion of experimental data and compare to the commonly used overlap method. CompGO is freely available as a R/Bioconductor package enabling easy integration into existing pipelines and is available at: http://www.bioconductor.org/packages/release/bioc/html/CompGO.html packages/release/bioc/html/CompGO.html.",2015-09-02 +28636529,"Bladder Cancer and Water Disinfection By-product Exposures through Multiple Routes: A Population-Based Case-Control Study (New England, USA).","

Background

Ingestion of disinfection byproducts has been associated with bladder cancer in multiple studies. Although associations with other routes of exposure have been suggested, epidemiologic evidence is limited.

Objectives

We evaluated the relationship between bladder cancer and total, chlorinated, and brominated trihalomethanes (THMs) through various exposure routes.

Methods

In a population-based case–control study in New England (n=(1,213) cases; n=(1,418) controls), we estimated lifetime exposure to THMs from ingestion, showering/bathing, and hours of swimming pool use. We calculated odds ratios (ORs) and 95% confidence intervals (CIs) using unconditional logistic regression adjusted for confounders.

Results

Adjusted ORs for bladder cancer comparing participants with exposure above the 95th percentile with those in the lowest quartile of exposure (based on the distribution in controls) were statistically significant for average daily intake mg/d of total THMs [OR=1.53 (95% CI: 1.01, 2.32), p-trend=0.16] and brominated THMs [OR=1.98 (95% CI: 1.19, 3.29), p-trend=0.03]. For cumulative intake mg, the OR at the 95th percentile of total THMs was 1.45 (95% CI: 0.95, 2.2), p-trend=0.13; the ORs at the 95th percentile for chlorinated and brominated THMs were 1.77 (95% CI: 1.05, 2,.99), p-trend=0.07 and 1.78 (95% CI: 1.05, 3.00), p-trend=0.02, respectively. The OR in the highest category of showering/bathing for brominated THMs was 1.43 (95% CI: 0.80, 2.42), p-trend=0.10. We found no evidence of an association for bladder cancer and hours of swimming pool use.

Conclusions

We observed a modest association between ingestion of water with higher THMs (>95th percentile vs.<25th percentile) and bladder cancer. Brominated THMs have been a particular concern based on toxicologic evidence, and our suggestive findings for multiple metrics require further study in a population with higher levels of these exposures. Data from this population do not support an association between swimming pool use and bladder cancer. https://doi.org/10.1289/EHP89.",2017-06-21 +23436708,Scanning of novel cancer/testis proteins by human testis proteomic analysis.,"The testes are where spermatogenesis, the sperm-generating process that is unique to men, occurs. Importantly, human spermatogenesis and tumorigenesis share key similarities. Until now, only a few proteins in the human testis have been identified due to limitations of available technology. In this paper, using an advanced proteomics platform, we have identified 7346 unique proteins within the human testis with a high degree of confidence. Immunohistochemistry data from the Human Protein Atlas database show over 90% (1833/2020) of identified proteins can be detected in the human testis using specific antibodies. To make the data widely available to the scientific community, an online Human Testis Proteome Database (HTPD, http://reprod.njmu.edu.cn/htpd/) was built. Many of the identified human testicular proteins are associated with human infertility, especially human testicular predominantly expressed proteins. We characterized six novel cancer/testis genes (TMPRSS12, TPPP2, PRSS55, DMRT1, PIWIL1, HEMGN), which map to cancer-associated genetic variants positions, in both the cancer and testis tissues using genome-wide analyses. Our results provide a molecular connection between spermatogenesis and tumorigenesis and broaden the range of cancer antigen choice available for immunotherapy.",2013-03-06 +26231429,PBAP: a pipeline for file processing and quality control of pedigree data with dense genetic markers.,"

Motivation

Huge genetic datasets with dense marker panels are now common. With the availability of sequence data and recognition of importance of rare variants, smaller studies based on pedigrees are again also common. Pedigree-based samples often start with a dense marker panel, a subset of which may be used for linkage analysis to reduce computational burden and to limit linkage disequilibrium between single-nucleotide polymorphisms (SNPs). Programs attempting to select markers for linkage panels exist but lack flexibility.

Results

We developed a pedigree-based analysis pipeline (PBAP) suite of programs geared towards SNPs and sequence data. PBAP performs quality control, marker selection and file preparation. PBAP sets up files for MORGAN, which can handle analyses for small and large pedigrees, typically human, and results can be used with other programs and for downstream analyses. We evaluate and illustrate its features with two real datasets.

Availability and implementation

PBAP scripts may be downloaded from http://faculty.washington.edu/wijsman/software.shtml.

Contact

wijsman@uw.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-30 +23564845,Network predicting drug's anatomical therapeutic chemical code.,"

Motivation

Discovering drug's Anatomical Therapeutic Chemical (ATC) classification rules at molecular level is of vital importance to understand a vast majority of drugs action. However, few studies attempt to annotate drug's potential ATC-codes by computational approaches.

Results

Here, we introduce drug-target network to computationally predict drug's ATC-codes and propose a novel method named NetPredATC. Starting from the assumption that drugs with similar chemical structures or target proteins share common ATC-codes, our method, NetPredATC, aims to assign drug's potential ATC-codes by integrating chemical structures and target proteins. Specifically, we first construct a gold-standard positive dataset from drugs' ATC-code annotation databases. Then we characterize ATC-code and drug by their similarity profiles and define kernel function to correlate them. Finally, we use a kernel method, support vector machine, to automatically predict drug's ATC-codes. Our method was validated on four drug datasets with various target proteins, including enzymes, ion channels, G-protein couple receptors and nuclear receptors. We found that both drug's chemical structure and target protein are predictive, and target protein information has better accuracy. Further integrating these two data sources revealed more experimentally validated ATC-codes for drugs. We extensively compared our NetPredATC with SuperPred, which is a chemical similarity-only based method. Experimental results showed that our NetPredATC outperforms SuperPred not only in predictive coverage but also in accuracy. In addition, database search and functional annotation analysis support that our novel predictions are worthy of future experimental validation.

Conclusion

In conclusion, our new method, NetPredATC, can predict drug's ATC-codes more accurately by incorporating drug-target network and integrating data, which will promote drug mechanism understanding and drug repositioning and discovery.

Availability

NetPredATC is available at http://doc.aporc.org/wiki/NetPredATC.

Contact

ycwang@nwipb.cas.cn or ywang@amss.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-05 +23677943,Updating RNA-Seq analyses after re-annotation.,"

Unlabelled

The estimation of isoform abundances from RNA-Seq data requires a time-intensive step of mapping reads to either an assembled or previously annotated transcriptome, followed by an optimization procedure for deconvolution of multi-mapping reads. These procedures are essential for downstream analysis such as differential expression. In cases where it is desirable to adjust the underlying annotation, for example, on the discovery of novel isoforms or errors in existing annotations, current pipelines must be rerun from scratch. This makes it difficult to update abundance estimates after re-annotation, or to explore the effect of changes in the transcriptome on analyses. We present a novel efficient algorithm for updating abundance estimates from RNA-Seq experiments on re-annotation that does not require re-analysis of the entire dataset. Our approach is based on a fast partitioning algorithm for identifying transcripts whose abundances may depend on the added or deleted isoforms, and on a fast follow-up approach to re-estimating abundances for all transcripts. We demonstrate the effectiveness of our methods by showing how to synchronize RNA-Seq abundance estimates with the daily RefSeq incremental updates. Thus, we provide a practical approach to maintaining relevant databases of RNA-Seq derived abundance estimates even as annotations are being constantly revised.

Availability and implementation

Our methods are implemented in software called ReXpress and are freely available, together with source code, at http://bio.math.berkeley.edu/ReXpress/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-14 +24194598,JASPAR 2014: an extensively expanded and updated open-access database of transcription factor binding profiles.,"JASPAR (http://jaspar.genereg.net) is the largest open-access database of matrix-based nucleotide profiles describing the binding preference of transcription factors from multiple species. The fifth major release greatly expands the heart of JASPAR-the JASPAR CORE subcollection, which contains curated, non-redundant profiles-with 135 new curated profiles (74 in vertebrates, 8 in Drosophila melanogaster, 10 in Caenorhabditis elegans and 43 in Arabidopsis thaliana; a 30% increase in total) and 43 older updated profiles (36 in vertebrates, 3 in D. melanogaster and 4 in A. thaliana; a 9% update in total). The new and updated profiles are mainly derived from published chromatin immunoprecipitation-seq experimental datasets. In addition, the web interface has been enhanced with advanced capabilities in browsing, searching and subsetting. Finally, the new JASPAR release is accompanied by a new BioPython package, a new R tool package and a new R/Bioconductor data package to facilitate access for both manual and automated methods.",2013-11-04 +22764159,BasyLiCA: a tool for automatic processing of a Bacterial Live Cell Array.,"

Unlabelled

Live Cell Array (LCA) technology allows the acquisition of high-resolution time-course profiles of bacterial gene expression by the systematic assessment of fluorescence in living cells carrying either transcriptional or translational fluorescent protein fusion. However, the direct estimation of promoter activities by time-dependent derivation of the fluorescence datasets generates high levels of noise. Here, we present BasyLiCA, a user-friendly open-source interface and database dedicated to the automatic storage and standardized treatment of LCA data. Data quality reports are generated automatically. Growth rates and promoter activities are calculated by tunable discrete Kalman filters that can be set to incorporate data from biological replicates, significantly reducing the impact of noise measurement in activity estimations.

Availability

The BasyLiCA software and the related documentation are available at http://genome.jouy.inra.fr/basylica.",2012-07-04 +23299413,ERISdb: a database of plant splice sites and splicing signals.,"Splicing is one of the major contributors to observed spatiotemporal diversification of transcripts and proteins in metazoans. There are numerous factors that affect the process, but splice sites themselves along with the adjacent splicing signals are critical here. Unfortunately, there is still little known about splicing in plants and, consequently, further research in some fields of plant molecular biology will encounter difficulties. Keeping this in mind, we performed a large-scale analysis of splice sites in eight plant species, using novel algorithms and tools developed by us. The analyses included identification of orthologous splice sites, polypyrimidine tracts and branch sites. Additionally we identified putative intronic and exonic cis-regulatory motifs, U12 introns as well as splice sites in 45 microRNA genes in five plant species. We also provide experimental evidence for plant splice sites in the form of expressed sequence tag and RNA-Seq data. All the data are stored in a novel database called ERISdb and are freely available at http://lemur.amu.edu.pl/share/ERISdb/.",2013-01-07 +,"Overexpression of OsRDCP1, a rice RING domain-containing E3 ubiquitin ligase, increased tolerance to drought stress in rice (Oryza sativa L.)","CaRma1H1 was previously identified as a hot pepper drought-induced RING E3 Ub ligase. We have identified five putative proteins that display a significant sequence identity with CaRma1H1 in the rice genome database (http://signal.salk.edu/cgi-bin/RiceGE). These five rice paralogs possess a single RING motif in their N-terminal regions, consistent with the notion that RING proteins are encoded by a multi-gene family. Therefore, these proteins were named OsRDCPs (Oryza sativa RING domain-containing proteins). Among these paralogs, OsRDCP1 was induced by drought stress, whereas the other OsRDCP members were constitutively expressed, with OsRDCP4 transcripts expressed at the highest level in rice seedlings. osrdcp1 loss-of-function knockout mutant and OsRDCP1-overexpressing transgenic rice plants were developed. Phenotypic analysis showed that wild-type plants and the homozygous osrdcp1 G2 mutant line displayed similar phenotypes under normal growth conditions and in response to drought stress. This may be due to complementation by other OsRDCP paralogs. In contrast, 35S:OsRDCP1 T2 transgenic rice plants exhibited improved tolerance to severe water deficits. Although the physiological function of OsRDCP1 remains unclear, there are several possible mechanisms for its involvement in a subset of physiological responses to counteract dehydration stress in rice plants.",2011-06-01 +26797014,Predicting DNA Methylation State of CpG Dinucleotide Using Genome Topological Features and Deep Networks.,"The hypo- or hyper-methylation of the human genome is one of the epigenetic features of leukemia. However, experimental approaches have only determined the methylation state of a small portion of the human genome. We developed deep learning based (stacked denoising autoencoders, or SdAs) software named ""DeepMethyl"" to predict the methylation state of DNA CpG dinucleotides using features inferred from three-dimensional genome topology (based on Hi-C) and DNA sequence patterns. We used the experimental data from immortalised myelogenous leukemia (K562) and healthy lymphoblastoid (GM12878) cell lines to train the learning models and assess prediction performance. We have tested various SdA architectures with different configurations of hidden layer(s) and amount of pre-training data and compared the performance of deep networks relative to support vector machines (SVMs). Using the methylation states of sequentially neighboring regions as one of the learning features, an SdA achieved a blind test accuracy of 89.7% for GM12878 and 88.6% for K562. When the methylation states of sequentially neighboring regions are unknown, the accuracies are 84.82% for GM12878 and 72.01% for K562. We also analyzed the contribution of genome topological features inferred from Hi-C. DeepMethyl can be accessed at http://dna.cs.usm.edu/deepmethyl/.",2016-01-22 +23418188,Chemical rule-based filtering of MS/MS spectra.,"

Motivation

Identification of proteins by mass spectrometry-based proteomics requires automated interpretation of peptide tandem mass spectrometry spectra. The effectiveness of peptide identification can be greatly improved by filtering out extraneous noise peaks before the subsequent database searching steps.

Results

Here we present a novel chemical rule-based filtering algorithm, termed CRF, which makes use of the predictable patterns (rules) of collision-induced peptide fragmentation. The algorithm selects peak pairs that obey the common fragmentation rules within plausible limits of mass tolerance as well as peak intensity and produces spectra that can be subsequently submitted to any search engine. CRF increases the positive predictive value and decreases the number of random matches and thus improves performance by 15-20% in terms of peptide annotation using search engines, such as X!Tandem. Importantly, the algorithm also achieves data compression rates of ∼75%.

Availability

The MATLAB source code and a web server are available at http://hydrax.icgeb.trieste.it/CRFilter/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-15 +28104956,MPDB: Molecular Pathways Brain Database.,"Molecular Pathways Brain Database (MPDB), is a novel database for molecular information of the brain pathways and is an initiative to provide an organized platform for researchers in the field of neuro-informatics. The database currently has information from 1850 molecules for three different sensory pathways namely olfactory transduction, photo transduction and long-term potentiation. The usefulness of the database is demonstrated by an analysis of the olfactory transduction pathway which helps understand their olfactory specifity and further indicates that some of the molecules have evolved independently among these organisms as per the need of time and function. The database is available for free at http://pranag.physics.iisc.ernet.in/mpdb/.",2016-04-10 +28196472,PreMeta: a tool to facilitate meta-analysis of rare-variant associations.,"

Background

Meta-analysis is essential to the discovery of rare variants that influence complex diseases and traits. Four major software packages, namely MASS, MetaSKAT, RAREMETAL, and seqMeta, have been developed to perform meta-analysis of rare-variant associations. These packages first generate summary statistics for each study and then perform the meta-analysis by combining the summary statistics. Because of incompatible file formats and non-equivalent summary statistics, the output files from the study-level analysis of one package cannot be directly used to perform meta-analysis in another package.

Results

We developed a computationally efficient software program, PreMeta, to resolve the non-compatibility of the four software packages and to facilitate meta-analysis of large-scale sequencing studies in a consortium setting. PreMeta reformats the output files of study-level summary statistics generated by the four packages (text files produced by MASS and RAREMETAL, binary files produced by MetaSKAT, and R data files produced by seqMeta) and translates the summary statistics from one form to another, such that the summary statistics from any package can be used to perform meta-analysis in any other package. With this tool, consortium members are not required to use the same software for study-level analyses. In addition, PreMeta checks for allele mismatches, corrects summary statistics, and allows the rescaled inverse normal transformation to be performed at the meta-analysis stage by rescaling summary statistics.

Conclusions

PreMeta processes summary statistics from the four packages to make them compatible and avoids the need to redo study-level analyses. PreMeta documentation and executable are available at: http://dlin.web.unc.edu/software/premeta .",2017-02-14 +27283952,TaggerOne: joint named entity recognition and normalization with semi-Markov Models.,"

Motivation

Text mining is increasingly used to manage the accelerating pace of the biomedical literature. Many text mining applications depend on accurate named entity recognition (NER) and normalization (grounding). While high performing machine learning methods trainable for many entity types exist for NER, normalization methods are usually specialized to a single entity type. NER and normalization systems are also typically used in a serial pipeline, causing cascading errors and limiting the ability of the NER system to directly exploit the lexical information provided by the normalization.

Methods

We propose the first machine learning model for joint NER and normalization during both training and prediction. The model is trainable for arbitrary entity types and consists of a semi-Markov structured linear classifier, with a rich feature approach for NER and supervised semantic indexing for normalization. We also introduce TaggerOne, a Java implementation of our model as a general toolkit for joint NER and normalization. TaggerOne is not specific to any entity type, requiring only annotated training data and a corresponding lexicon, and has been optimized for high throughput.

Results

We validated TaggerOne with multiple gold-standard corpora containing both mention- and concept-level annotations. Benchmarking results show that TaggerOne achieves high performance on diseases (NCBI Disease corpus, NER f-score: 0.829, normalization f-score: 0.807) and chemicals (BioCreative 5 CDR corpus, NER f-score: 0.914, normalization f-score 0.895). These results compare favorably to the previous state of the art, notwithstanding the greater flexibility of the model. We conclude that jointly modeling NER and normalization greatly improves performance.

Availability and implementation

The TaggerOne source code and an online demonstration are available at: http://www.ncbi.nlm.nih.gov/bionlp/taggerone

Contact

zhiyong.lu@nih.gov

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-09 +25790783,"FQC: A novel approach for efficient compression, archival, and dissemination of fastq datasets.","Sequence data repositories archive and disseminate fastq data in compressed format. In spite of having relatively lower compression efficiency, data repositories continue to prefer GZIP over available specialized fastq compression algorithms. Ease of deployment, high processing speed and portability are the reasons for this preference. This study presents FQC, a fastq compression method that, in addition to providing significantly higher compression gains over GZIP, incorporates features necessary for universal adoption by data repositories/end-users. This study also proposes a novel archival strategy which allows sequence repositories to simultaneously store and disseminate lossless as well as (multiple) lossy variants of fastq files, without necessitating any additional storage requirements. For academic users, Linux, Windows, and Mac implementations (both 32 and 64-bit) of FQC are freely available for download at: https://metagenomics.atc.tcs.com/compression/FQC .",2015-02-08 +22434830,YeastMine--an integrated data warehouse for Saccharomyces cerevisiae data as a multipurpose tool-kit.,"The Saccharomyces Genome Database (SGD; http://www.yeastgenome.org/) provides high-quality curated genomic, genetic, and molecular information on the genes and their products of the budding yeast Saccharomyces cerevisiae. To accommodate the increasingly complex, diverse needs of researchers for searching and comparing data, SGD has implemented InterMine (http://www.InterMine.org), an open source data warehouse system with a sophisticated querying interface, to create YeastMine (http://yeastmine.yeastgenome.org). YeastMine is a multifaceted search and retrieval environment that provides access to diverse data types. Searches can be initiated with a list of genes, a list of Gene Ontology terms, or lists of many other data types. The results from queries can be combined for further analysis and saved or downloaded in customizable file formats. Queries themselves can be customized by modifying predefined templates or by creating a new template to access a combination of specific data types. YeastMine offers multiple scenarios in which it can be used such as a powerful search interface, a discovery tool, a curation aid and also a complex database presentation format. DATABASE URL: http://yeastmine.yeastgenome.org.",2012-03-20 +27273671,MIA: non-targeted mass isotopolome analysis.,"

Unlabelled

MIA detects and visualizes isotopic enrichment in gas chromatography electron ionization mass spectrometry (GC-EI-MS) datasets in a non-targeted manner. It provides an easy-to-use graphical user interface that allows for visual mass isotopomer distribution analysis across multiple datasets. MIA helps to reveal changes in metabolic fluxes, visualizes metabolic proximity of isotopically enriched compounds and shows the fate of the applied stable isotope labeled tracer.

Availability and implementation

Linux and Windows binaries, documentation, and sample data are freely available for download at http://massisotopolomeanalyzer.lu MIA is a stand-alone application implemented in C ++  and based on Qt5, NTFD and the MetaboliteDetector framework.

Contact

karsten.hiller@uni.lu.",2016-06-06 +27157862,"The effects of family, dentition, and dental caries on the salivary microbiome.","

Purpose

Family members share genes, environment, and microbial communities. If there is a strong effect of family on the salivary microbiota, controlling for family will enhance identification of microbial communities associated with cariogenesis. The present study was designed to assess the similarity of the salivary microbiome among families and the association between the salivary microbiome and dental decay taking age into account.

Methods

We selected families (n = 49) participating in the cohort study of oral health conducted by the Center for Oral Health Research in Appalachia. All families where at least two children and at least one parent gave a saliva sample (n = 173) were included. Saliva samples were collected at least 1 hour after eating or drinking. After DNA extraction, the V6 region of the 16s rRNA gene was sequenced. Paired ends were joined using fast length adjustment of short reads, sequences were demultiplexed and filtered using Quantitative Insights Into Microbial Ecology 1.9.0, and taxonomy was assigned using the Ribosomal Database Project (RDP; http://rdp.cme.msu.edu/) classifier and sequences aligned with the CORE database using PyNAST.

Results

The salivary microbiome changed with age and was more similar within families than between families. There was no difference in the diversity of the salivary microbiome by dental decay. After taking into account age and family, signals of dental decay were weak in the saliva, whether examined at the phyla, genus, or operational taxonomic level.

Conclusions

The salivary microbiome does not appear to be a good indicator of dental caries.",2016-04-09 +25874114,Analysis and visualisation of movement: an interdisciplinary review.,"The processes that cause and influence movement are one of the main points of enquiry in movement ecology. However, ecology is not the only discipline interested in movement: a number of information sciences are specialising in analysis and visualisation of movement data. The recent explosion in availability and complexity of movement data has resulted in a call in ecology for new appropriate methods that would be able to take full advantage of the increasingly complex and growing data volume. One way in which this could be done is to form interdisciplinary collaborations between ecologists and experts from information sciences that analyse movement. In this paper we present an overview of new movement analysis and visualisation methodologies resulting from such an interdisciplinary research network: the European COST Action ""MOVE - Knowledge Discovery from Moving Objects"" (http://www.move-cost.info). This international network evolved over four years and brought together some 140 researchers from different disciplines: those that collect movement data (out of which the movement ecology was the largest represented group) and those that specialise in developing methods for analysis and visualisation of such data (represented in MOVE by computational geometry, geographic information science, visualisation and visual analytics). We present MOVE achievements and at the same time put them in ecological context by exploring relevant ecological themes to which MOVE studies do or potentially could contribute.",2015-03-10 +25987413,A Scalable Approach for Protein False Discovery Rate Estimation in Large Proteomic Data Sets.,"Calculating the number of confidently identified proteins and estimating false discovery rate (FDR) is a challenge when analyzing very large proteomic data sets such as entire human proteomes. Biological and technical heterogeneity in proteomic experiments further add to the challenge and there are strong differences in opinion regarding the conceptual validity of a protein FDR and no consensus regarding the methodology for protein FDR determination. There are also limitations inherent to the widely used classic target-decoy strategy that particularly show when analyzing very large data sets and that lead to a strong over-representation of decoy identifications. In this study, we investigated the merits of the classic, as well as a novel target-decoy-based protein FDR estimation approach, taking advantage of a heterogeneous data collection comprised of ∼19,000 LC-MS/MS runs deposited in ProteomicsDB (https://www.proteomicsdb.org). The ""picked"" protein FDR approach treats target and decoy sequences of the same protein as a pair rather than as individual entities and chooses either the target or the decoy sequence depending on which receives the highest score. We investigated the performance of this approach in combination with q-value based peptide scoring to normalize sample-, instrument-, and search engine-specific differences. The ""picked"" target-decoy strategy performed best when protein scoring was based on the best peptide q-value for each protein yielding a stable number of true positive protein identifications over a wide range of q-value thresholds. We show that this simple and unbiased strategy eliminates a conceptual issue in the commonly used ""classic"" protein FDR approach that causes overprediction of false-positive protein identification in large data sets. The approach scales from small to very large data sets without losing performance, consistently increases the number of true-positive protein identifications and is readily implemented in proteomics analysis software.",2015-05-17 +26309201,MAFsnp: A Multi-Sample Accurate and Flexible SNP Caller Using Next-Generation Sequencing Data.,"Most existing statistical methods developed for calling single nucleotide polymorphisms (SNPs) using next-generation sequencing (NGS) data are based on Bayesian frameworks, and there does not exist any SNP caller that produces p-values for calling SNPs in a frequentist framework. To fill in this gap, we develop a new method MAFsnp, a Multiple-sample based Accurate and Flexible algorithm for calling SNPs with NGS data. MAFsnp is based on an estimated likelihood ratio test (eLRT) statistic. In practical situation, the involved parameter is very close to the boundary of the parametric space, so the standard large sample property is not suitable to evaluate the finite-sample distribution of the eLRT statistic. Observing that the distribution of the test statistic is a mixture of zero and a continuous part, we propose to model the test statistic with a novel two-parameter mixture distribution. Once the parameters in the mixture distribution are estimated, p-values can be easily calculated for detecting SNPs, and the multiple-testing corrected p-values can be used to control false discovery rate (FDR) at any pre-specified level. With simulated data, MAFsnp is shown to have much better control of FDR than the existing SNP callers. Through the application to two real datasets, MAFsnp is also shown to outperform the existing SNP callers in terms of calling accuracy. An R package ""MAFsnp"" implementing the new SNP caller is freely available at http://homepage.fudan.edu.cn/zhangh/softwares/.",2015-08-26 +28550171,β-Adrenergic receptor stimulation inhibits proarrhythmic alternans in postinfarction border zone cardiomyocytes: a computational analysis.,"The border zone (BZ) of the viable myocardium adjacent to an infarct undergoes extensive autonomic and electrical remodeling and is prone to repolarization alternans-induced cardiac arrhythmias. BZ remodeling processes may promote or inhibit Ca2+ and/or repolarization alternans and may differentially affect ventricular arrhythmogenesis. Here, we used a detailed computational model of the canine ventricular cardiomyocyte to study the determinants of alternans in the BZ and their regulation by β-adrenergic receptor (β-AR) stimulation. The BZ model developed Ca2+ transient alternans at slower pacing cycle lengths than the control model, suggesting that the BZ may promote spatially heterogeneous alternans formation in an infarcted heart. β-AR stimulation abolished alternans. By evaluating all combinations of downstream β-AR stimulation targets, we identified both direct (via ryanodine receptor channels) and indirect [via sarcoplasmic reticulum (SR) Ca2+ load] modulation of SR Ca2+ release as critical determinants of Ca2+ transient alternans. These findings were confirmed in a human ventricular cardiomyocyte model. Cell-to-cell coupling indirectly modulated the likelihood of alternans by affecting the action potential upstroke, reducing the trigger for SR Ca2+ release in one-dimensional strand simulations. However, β-AR stimulation inhibited alternans in both single and multicellular simulations. Taken together, these data highlight a potential antiarrhythmic role of sympathetic hyperinnervation in the BZ by reducing the likelihood of alternans and provide new insights into the underlying mechanisms controlling Ca2+ transient and repolarization alternans.NEW & NOTEWORTHY We integrated, for the first time, postmyocardial infarction electrical and autonomic remodeling in a detailed, validated computer model of β-adrenergic stimulation in ventricular cardiomyocytes. Here, we show that β-adrenergic stimulation inhibits alternans and provide novel insights into underlying mechanisms, adding to a recent controversy about pro-/antiarrhythmic effects of postmyocardial infarction hyperinnervation.Listen to this article's corresponding podcast at http://ajpheart.podbean.com/e/%CE%B2-ar-stimulation-and-alternans-in-border-zone-cardiomyocytes/.",2017-05-26 +25688256,MRM-DIFF: data processing strategy for differential analysis in large scale MRM-based lipidomics studies.,"Based on theoretically calculated comprehensive lipid libraries, in lipidomics as many as 1000 multiple reaction monitoring (MRM) transitions can be monitored for each single run. On the other hand, lipid analysis from each MRM chromatogram requires tremendous manual efforts to identify and quantify lipid species. Isotopic peaks differing by up to a few atomic masses further complicate analysis. To accelerate the identification and quantification process we developed novel software, MRM-DIFF, for the differential analysis of large-scale MRM assays. It supports a correlation optimized warping (COW) algorithm to align MRM chromatograms and utilizes quality control (QC) sample datasets to automatically adjust the alignment parameters. Moreover, user-defined reference libraries that include the molecular formula, retention time, and MRM transition can be used to identify target lipids and to correct peak abundances by considering isotopic peaks. Here, we demonstrate the software pipeline and introduce key points for MRM-based lipidomics research to reduce the mis-identification and overestimation of lipid profiles. The MRM-DIFF program, example data set and the tutorials are downloadable at the ""Standalone software"" section of the PRIMe (Platform for RIKEN Metabolomics, http://prime.psc.riken.jp/) database website.",2014-01-01 +23323138,Current status of the diagnosis and management of amyotrophic lateral sclerosis in Korea: a multi-center cross-sectional study.,"

Background and purpose

Recently published, evidence-based guidelines should alter the management of amyotrophic lateral sclerosis (ALS)/motor neuron disease (MND). However, the newest recommendations for ALS/MND therapy are not reflected in actual clinical practice. We sought to evaluate the current status of the diagnosis and management of ALS in Korea.

Methods

The Korean ALS/MND research group was organized in 2010, involving more than 50 neurologists from neuromuscular centers in Korea. Participating centers collected data from April to September 2010 on the diagnosis and management of patients with ALS. Data forms from the ALS patient care database, which is a component of the ALS clinical assessment, research, and education program (http://www.outcomes-umassmed.org/ALS/), were modified and used for data collection.

Results

In total, 373 sporadic ALS cases from 35 centers were enrolled. The demographic features and clinical findings were similar to those in previous reports from other countries. The mean age at onset was 50-60 years, and a slight male predominance was observed. The enrolled patients predominantly showed focal onset of cervical or lumbosacral symptoms. Only about one-half of the indicated patients (31.4%) received a physician's recommendation for a parenteral gastrostomy, and 18.1% underwent the procedure. Noninvasive ventilation was recommended in 23% of patients, but applied in only 9.5% of them. Tracheostomy was performed in 12.7% of patients.

Conclusions

The demographic and clinical features of the diagnosis and management of ALS in Korea are similar to those reported in other countries; however, supportive management, as recommended in evidence-based guidelines, are not yet widely recommended or performed for patients with ALS in Korea.",2012-12-21 +24619174,"Combined analysis of gene expression, DNA copy number, and mutation profiling data to display biological process anomalies in individual breast cancers.","The goal of this analysis was to develop a computational tool that integrates the totality of gene expression, DNA copy number, and sequence abnormalities in individual cancers in the framework of biological processes. We used the hierarchical structure of the gene ontology (GO) database to create a reference network and projected mRNA expression, DNA copy number and mutation anomalies detected in single samples into this space. We applied our method to 59 breast cancers where all three types of molecular data were available. Each cancer had a large number of disturbed biological processes. Locomotion, multicellular organismal process, and signal transduction pathways were the most commonly affected GO terms, but the individual molecular events were different from case-to-case. Estrogen receptor-positive and -negative cancers had different repertoire of anomalies. We tested the functional impact of 27 mRNAs that had overexpression in cancer with variable frequency (<2-42 %) using an siRNA screen. Each of these genes inhibited cell growth in at least some of 18 breast cancer cell lines. We developed a free, on-line software tool ( http://netgoplot.org ) to display the complex genomic abnormalities in individual cancers in the biological framework of the GO biological processes. Each cancer harbored a variable number of pathway anomalies and the individual molecular events that caused an anomaly varied from case-to-case. Our in vitro experiments indicate that rare case-specific molecular abnormalities can play a functional role and driver events may vary from case-to-case depending on the constellation of other molecular anomalies.",2014-03-12 +28092552,Discriminative Elastic-Net Regularized Linear Regression.,"In this paper, we aim at learning compact and discriminative linear regression models. Linear regression has been widely used in different problems. However, most of the existing linear regression methods exploit the conventional zero-one matrix as the regression targets, which greatly narrows the flexibility of the regression model. Another major limitation of these methods is that the learned projection matrix fails to precisely project the image features to the target space due to their weak discriminative capability. To this end, we present an elastic-net regularized linear regression (ENLR) framework, and develop two robust linear regression models which possess the following special characteristics. First, our methods exploit two particular strategies to enlarge the margins of different classes by relaxing the strict binary targets into a more feasible variable matrix. Second, a robust elastic-net regularization of singular values is introduced to enhance the compactness and effectiveness of the learned projection matrix. Third, the resulting optimization problem of ENLR has a closed-form solution in each iteration, which can be solved efficiently. Finally, rather than directly exploiting the projection matrix for recognition, our methods employ the transformed features as the new discriminate representations to make final image classification. Compared with the traditional linear regression model and some of its variants, our method is much more accurate in image classification. Extensive experiments conducted on publicly available data sets well demonstrate that the proposed framework can outperform the state-of-the-art methods. The MATLAB codes of our methods can be available at http://www.yongxu.org/lunwen.html.",2017-01-11 +27801918,"Diabetes and BMI: Health Equity through Early Intervention on Dysglycemia, and How Providers Can Help.","Like most states in the U.S., Rhode Island's rate of type 2 Diabetes Mellitus (DM) is rising as its population has both aged and become heavier. Risk of both BMI>=30 and DM has risen across almost all demographics, but disparities continue to exist in both conditions. We analyzed state health survey data to assess race/ethnicity-stratified DM and BMI and the age-adjusted rate of DM by weight status relative to the late 1990s. The prevalence of obesity increased across almost all demographic groups relative to 15 years ago, but the rise was greatest among non-Hispanic whites. The age-adjusted rate of DM had a similar increase across racial/ethnic categories where BMI>=30, but black adults were still at higher risk of DM even at a BMI<30. In sum, non-Hispanic whites and Hispanics are ""catching up"" to blacks' historically higher prevalence of obesity and DM, but disparities remain in both conditions. We describe two ways providers can collaborate with the Department of Health to address these growing health problems. [Full article available at http://rimed.org/rimedicaljournal-2016-11.asp].",2016-11-01 +27052996,Transcription Factor Information System (TFIS): A Tool for Detection of Transcription Factor Binding Sites.,"Transcription factors are trans-acting proteins that interact with specific nucleotide sequences known as transcription factor binding site (TFBS), and these interactions are implicated in regulation of the gene expression. Regulation of transcriptional activation of a gene often involves multiple interactions of transcription factors with various sequence elements. Identification of these sequence elements is the first step in understanding the underlying molecular mechanism(s) that regulate the gene expression. For in silico identification of these sequence elements, we have developed an online computational tool named transcription factor information system (TFIS) for detecting TFBS for the first time using a collection of JAVA programs and is mainly based on TFBS detection using position weight matrix (PWM). The database used for obtaining position frequency matrices (PFM) is JASPAR and HOCOMOCO, which is an open-access database of transcription factor binding profiles. Pseudo-counts are used while converting PFM to PWM, and TFBS detection is carried out on the basis of percent score taken as threshold value. TFIS is equipped with advanced features such as direct sequence retrieving from NCBI database using gene identification number and accession number, detecting binding site for common TF in a batch of gene sequences, and TFBS detection after generating PWM from known raw binding sequences in addition to general detection methods. TFIS can detect the presence of potential TFBSs in both the directions at the same time. This feature increases its efficiency. And the results for this dual detection are presented in different colors specific to the orientation of the binding site. Results obtained by the TFIS are more detailed and specific to the detected TFs as integration of more informative links from various related web servers are added in the result pages like Gene Ontology, PAZAR database and Transcription Factor Encyclopedia in addition to NCBI and UniProt. Common TFs like SP1, AP1 and NF-KB of the Amyloid beta precursor gene is easily detected using TFIS along with multiple binding sites. In another scenario of embryonic developmental process, TFs of the FOX family (FOXL1 and FOXC1) were also identified. TFIS is platform-independent which is publicly available along with its support and documentation at http://tfistool.appspot.com and http://www.bioinfoplus.com/tfis/ . TFIS is licensed under the GNU General Public License, version 3 (GPL-3.0).",2016-04-06 +27050421,The Functional Human C-Terminome.,"All translated proteins end with a carboxylic acid commonly called the C-terminus. Many short functional sequences (minimotifs) are located on or immediately proximal to the C-terminus. However, information about the function of protein C-termini has not been consolidated into a single source. Here, we built a new ""C-terminome"" database and web system focused on human proteins. Approximately 3,600 C-termini in the human proteome have a minimotif with an established molecular function. To help evaluate the function of the remaining C-termini in the human proteome, we inferred minimotifs identified by experimentation in rodent cells, predicted minimotifs based upon consensus sequence matches, and predicted novel highly repetitive sequences in C-termini. Predictions can be ranked by enrichment scores or Gene Evolutionary Rate Profiling (GERP) scores, a measurement of evolutionary constraint. By searching for new anchored sequences on the last 10 amino acids of proteins in the human proteome with lengths between 3-10 residues and up to 5 degenerate positions in the consensus sequences, we have identified new consensus sequences that predict instances in the majority of human genes. All of this information is consolidated into a database that can be accessed through a C-terminome web system with search and browse functions for minimotifs and human proteins. A known consensus sequence-based predicted function is assigned to nearly half the proteins in the human proteome. Weblink: http://cterminome.bio-toolkit.com.",2016-04-06 +28621586,The COOLER Code: A Novel Analytical Approach to Calculate Subcellular Energy Deposition by Internal Electron Emitters.,"COmputation Of Local Electron Release (COOLER), a software program has been designed for dosimetry assessment at the cellular/subcellular scale, with a given distribution of administered low-energy electron-emitting radionuclides in cellular compartments, which remains a critical step in risk/benefit analysis for advancements in internal radiotherapy. The software is intended to overcome the main limitations of the medical internal radiation dose (MIRD) formalism for calculations of cellular S-values (i.e., dose to a target region in the cell per decay in a given source region), namely, the use of the continuous slowing down approximation (CSDA) and the assumption of a spherical cell geometry. To this aim, we developed an analytical approach, entrusted to a MATLAB-based program, using as input simulated data for electron spatial energy deposition directly derived from full Monte Carlo track structure calculations with PARTRAC. Results from PARTRAC calculations on electron range, stopping power and residual energy versus traveled distance curves are presented and, when useful for implementation in COOLER, analytical fit functions are given. Example configurations for cells in different culture conditions (V79 cells in suspension or adherent culture) with realistic geometrical parameters are implemented for use in the tool. Finally, cellular S-value predictions by the newly developed code are presented for different cellular geometries and activity distributions (uniform activity in the nucleus, in the entire cell or on the cell surface), validated against full Monte Carlo calculations with PARTRAC, and compared to MIRD standards, as well as results based on different track structure calculations (Geant4-DNA). The largest discrepancies between COOLER and MIRD predictions were generally found for electrons between 25 and 30 keV, where the magnitude of disagreement in S-values can vary from 50 to 100%, depending on the activity distribution. In calculations for activity distribution on the cell surface, MIRD predictions appeared to fail the most. The proposed method is suitable for Auger-cascade electrons, but can be extended to any energy of interest and to beta spectra; as an example, the 3H case is also discussed. COOLER is intended to be accessible to everyone (preclinical and clinical researchers included), and may provide important information for the selection of radionuclides, the interpretation of radiobiological or preclinical results, and the general establishment of doses in any scenario, e.g., with cultured cells in the laboratory or with therapeutic or diagnostic applications. The software will be made available for download from the DTU-Nutech website: http://www.nutech.dtu.dk/ .",2017-06-16 +27050120,A novel non-thermostable deuterolysin from Aspergillus oryzae.,"Three putative deuterolysin (EC 3.4.24.29) genes (deuA, deuB, and deuC) were found in the Aspergillus oryzae genome database ( http://www.bio.nite.go.jp/dogan/project/view/AO ). One of these genes, deuA, was corresponding to NpII gene, previously reported. DeuA and DeuB were overexpressed by recombinant A. oryzae and were purified. The degradation profiles against protein substrates of both enzymes were similar, but DeuB showed wider substrate specificity against peptidyl MCA-substrates compared with DeuA. Enzymatic profiles of DeuB except for thermostability also resembled those of DeuA. DeuB was inactivated by heat treatment above 80° C, different from thermostable DeuA. Transcription analysis in wild type A. oryzae showed only deuB was expressed in liquid culture, and the addition of the proteinous substrate upregulated the transcription. Furthermore, the NaNO3 addition seems to eliminate the effect of proteinous substrate for the transcription of deuB.",2016-04-06 +28547293,Nonsurgical Facial Rejuvenation: Outcomes and Safety of Neuromodulator and Soft-Tissue Filler Procedures Performed in a Resident Cosmetic Clinic.,"

Background

The ability to perform nonsurgical facial rejuvenation procedures is a core competency requirement for plastic surgery residents. However, limited data exist on training models to achieve competency in nonsurgical facial rejuvenation and on outcomes of these procedures performed by residents. The purpose here is to evaluate patient-reported outcomes and safety of nonsurgical facial rejuvenation procedures performed by plastic surgery residents.

Methods

We prospectively enrolled 50 patients undergoing neuromodulator and/or soft-tissue filler injections in a resident cosmetic clinic between April and August 2016. Patients completed FACE-Q modules pre-procedure, and at 1 week and 1 month post-procedure. Paired t-tests were used to calculate statistical significance of changes between pre- and post-procedure scores. Effect sizes were calculated to assess clinical improvement from pre- to post-procedure. The magnitude of change was interpreted using Cohen's arbitrary criteria (small 0.20, moderate 0.50, large 0.80).

Results

Forty-five patients completed the study. Patients experienced significant improvements (p < 0.001) in all FACE-Q domains, including aging appearance appraisal (improved from 49.7 ± 29.4 to 70.1 ± 21.6, effect size 0.79), psychological well-being (44.0 ± 14.6-78.6 ± 20.7, effect size 1.93), social functioning (48.6 ± 16.6-75.5 ± 21.7, effect size 1.20), and satisfaction with facial appearance (50.1 ± 13.7-66.2 ± 19.7, effect size 0.95). At 1 month, overall satisfaction with outcome and decision were 75.8 ± 20.7 and 81.1 ± 20.4, respectively. No patients experienced complications.

Conclusions

Nonsurgical facial rejuvenation procedures performed by residents can improve patients' quality of life and provide high satisfaction without compromising safety.

Level of evidence iv

This journal requires that authors assign a level of evidence to each submission to which Evidence-Based Medicine rankings are applicable. This excludes Review Articles, Book Reviews, and manuscripts that concern Basic Science, Animal Studies, Cadaver Studies, and Experimental Studies. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266 .",2017-05-25 +27504010,Crowdsourcing and curation: perspectives from biology and natural language processing. ,"Crowdsourcing is increasingly utilized for performing tasks in both natural language processing and biocuration. Although there have been many applications of crowdsourcing in these fields, there have been fewer high-level discussions of the methodology and its applicability to biocuration. This paper explores crowdsourcing for biocuration through several case studies that highlight different ways of leveraging 'the crowd'; these raise issues about the kind(s) of expertise needed, the motivations of participants, and questions related to feasibility, cost and quality. The paper is an outgrowth of a panel session held at BioCreative V (Seville, September 9-11, 2015). The session consisted of four short talks, followed by a discussion. In their talks, the panelists explored the role of expertise and the potential to improve crowd performance by training; the challenge of decomposing tasks to make them amenable to crowdsourcing; and the capture of biological data and metadata through community editing.Database URL: http://www.mitre.org/publications/technical-papers/crowdsourcing-and-curation-perspectives.",2016-08-07 +27154141,High-performance web services for querying gene and variant annotation.,"Efficient tools for data management and integration are essential for many aspects of high-throughput biology. In particular, annotations of genes and human genetic variants are commonly used but highly fragmented across many resources. Here, we describe MyGene.info and MyVariant.info, high-performance web services for querying gene and variant annotation information. These web services are currently accessed more than three million times permonth. They also demonstrate a generalizable cloud-based model for organizing and querying biological annotation information. MyGene.info and MyVariant.info are provided as high-performance web services, accessible at http://mygene.info and http://myvariant.info . Both are offered free of charge to the research community.",2016-05-06 +28093407,Protein multiple sequence alignment benchmarking through secondary structure prediction.,"

Motivation

Multiple sequence alignment (MSA) is commonly used to analyze sets of homologous protein or DNA sequences. This has lead to the development of many methods and packages for MSA over the past 30 years. Being able to compare different methods has been problematic and has relied on gold standard benchmark datasets of 'true' alignments or on MSA simulations. A number of protein benchmark datasets have been produced which rely on a combination of manual alignment and/or automated superposition of protein structures. These are either restricted to very small MSAs with few sequences or require manual alignment which can be subjective. In both cases, it remains very difficult to properly test MSAs of more than a few dozen sequences. PREFAB and HomFam both rely on using a small subset of sequences of known structure and do not fairly test the quality of a full MSA.

Results

In this paper we describe QuanTest, a fully automated and highly scalable test system for protein MSAs which is based on using secondary structure prediction accuracy (SSPA) to measure alignment quality. This is based on the assumption that better MSAs will give more accurate secondary structure predictions when we include sequences of known structure. SSPA measures the quality of an entire alignment however, not just the accuracy on a handful of selected sequences. It can be scaled to alignments of any size but here we demonstrate its use on alignments of either 200 or 1000 sequences. This allows the testing of slow accurate programs as well as faster, less accurate ones. We show that the scores from QuanTest are highly correlated with existing benchmark scores. We also validate the method by comparing a wide range of MSA alignment options and by including different levels of mis-alignment into MSA, and examining the effects on the scores.

Availability and implementation

QuanTest is available from http://www.bioinf.ucd.ie/download/QuanTest.tgz.

Contact

quan.le@ucd.ie.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-05-01 +28803016,"Association between ambient temperature and humidity, vaginal temperature, and automatic activity monitoring on induced estrus in lactating cows.","The objective of this study was to determine the association between ambient temperature and humidity, vaginal temperature, and automated activity monitoring in synchronized cows. Lactating Holstein cows (n = 641; 41.5 ± 9.4 kg of milk/d) were fitted with leg-mounted pedometers, resulting in 843 evaluated activity episodes of estrus. Vaginal temperature was monitored using thermometers attached to an intravaginal device as part of a timed artificial insemination (TAI) protocol; vaginal temperature was recorded every 10 min for 3 d. Ambient temperature and relative humidity were monitored using an external thermometer placed in the center of each pen. Milk production and body condition score (BCS) data were collected at the time of thermometer insertion. All statistical analysis was performed in R (https://www.r-project.org/) using Pearson correlation, analysis of variance, and logistic regression. Heat stress was calculated based on the percentage of time the cow spent with a vaginal temperature ≥39.1°C (PCT39) 9 to 11 d before TAI, and was classified as high (≥22.9%) or low (<22.9%). The mean vaginal temperature was 38.9 ± 0.2°C, and the mean maximum and minimum vaginal temperatures were 39.7 ± 0.5°C and 38.0 ± 0.8°C, respectively, with an average amplitude of 1.71 ± 0.9°C. Mean relative increase (RI) of estrus walking activity was 237.0 ± 160%. Animals with low BCS had a lower RI compared with cows with medium BCS (260.31 ± 17.45% vs. 296.42 ± 6.62%). Cows in early lactation showed lower RI compared with mid- and late-lactation animals (265.40 ± 9.90% vs. 288.36 ± 11.58% vs. 295.75 ± 11.29% for early, mid, and late lactation, respectively). Temperature-humidity index (THI) conditions categorized as low (THI ≤65) were associated with greater RI compared with medium (>65 to <70) and high THI (≥70). We detected no significant effect of PCT39 or milk production on RI, whereas parity exhibited a tendency. Cows that displayed greater RI at estrus had greater pregnancies per artificial insemination (P/AI) than cows with low RI (27 vs. 20%) or no RI (27 vs. 12%). Primiparous cows had greater P/AI than multiparous cows (27 vs. 20%), and cows in early and mid lactation had improved P/AI than those in late lactation (26 vs. 22 vs. 16% for early, mid, and late lactation, respectively). An interaction was observed between PCT39 and THI on P/AI, where a subpopulation of cows with high PCT39 had decreased P/AI under high THI conditions, but no differences in P/AI were observed for high PCT39 cows under medium or low THI conditions (13 vs. 24 vs. 26%). Future research should aim to refine variables related to hyperthermia and to understand the effects of body temperature on estrus expression and pregnancy rates.",2017-08-09 +28495897,"Geographical Difference of the Interaction of Sex With Treatment Strategy in Patients With Multivessel Disease and Left Main Disease: A Meta-Analysis From SYNTAX (Synergy Between PCI With Taxus and Cardiac Surgery), PRECOMBAT (Bypass Surgery Versus Angioplasty Using Sirolimus-Eluting Stent in Patients With Left Main Coronary Artery Disease), and BEST (Bypass Surgery and Everolimus-Eluting Stent Implantation in the Treatment of Patients With Multivessel Coronary Artery Disease) Randomized Controlled Trials. ","The impact of sex on clinical outcomes of percutaneous coronary intervention and coronary artery bypass graft for patients with multivessel coronary disease and unprotected left main disease could be dissimilar between Western and Asian populations. To assess clinical outcomes after percutaneous coronary intervention or coronary artery bypass graft in women and men with multivessel coronary disease and unprotected left main disease, a pooled analysis (n=3280) was performed using the patient-level data from 3 large randomized trials: SYNTAX (Synergy between PCI with Taxus and Cardiac Surgery), PRECOMBAT (Bypass Surgery Versus Angioplasty Using Sirolimus-Eluting Stent in Patients With Left Main Coronary Artery Disease), and BEST (Bypass Surgery and Everolimus-Eluting Stent Implantation in the Treatment of Patients with Multivessel Coronary Artery Disease) trials. The primary end point was all-cause death. Of 3280 patients, 794 patients (24.2%) were women. The median follow-up period was 1806 days (1611-1837 days). In women, a high heterogeneity of the treatment effect among the 3 trials was found for all-cause death (I2>50%), whereas in men, it was consistent across the 3 trials. In the Western trial (SYNTAX), female sex favored coronary artery bypass graft compared with percutaneous coronary intervention (hazard ratio(percutaneous coronary intervention) 2.213; 95% confidence interval, 1.242-3.943; P=0.007), whereas in the Asian women (PRECOMBAT and BEST), the treatment effect was neutral between both strategies. Sex interaction with treatment strategy was evident in the Western trial (Pinteraction=0.019) but not in the Asian trials (PRECOMBAT Pinteraction=0.469 and BEST Pinteraction=0.472; I2=58%). The present meta-analysis suggested the presence of the heterogeneous sex-treatment interaction across Asian and Western trials. Considering the ongoing globalization of our medical practice, the heterogeneity of the sex-treatment interaction needs to be well recognized and taken into account during the decision making of the treatment strategy. URL: https://www.clinicaltrials.gov. Unique identifiers: NCT00114972, NCT00997828, NCT00422968.",2017-05-01 +24253302,MultitaskProtDB: a database of multitasking proteins.,"We have compiled MultitaskProtDB, available online at http://wallace.uab.es/multitask, to provide a repository where the many multitasking proteins found in the literature can be stored. Multitasking or moonlighting is the capability of some proteins to execute two or more biological functions. Usually, multitasking proteins are experimentally revealed by serendipity. This ability of proteins to perform multitasking functions helps us to understand one of the ways used by cells to perform many complex functions with a limited number of genes. Even so, the study of this phenomenon is complex because, among other things, there is no database of moonlighting proteins. The existence of such a tool facilitates the collection and dissemination of these important data. This work reports the database, MultitaskProtDB, which is designed as a friendly user web page containing >288 multitasking proteins with their NCBI and UniProt accession numbers, canonical and additional biological functions, monomeric/oligomeric states, PDB codes when available and bibliographic references. This database also serves to gain insight into some characteristics of multitasking proteins such as frequencies of the different pairs of functions, phylogenetic conservation and so forth.",2013-11-18 +28495896,Percutaneous Coronary Intervention of Saphenous Vein Graft. ,"Percutaneous coronary intervention (PCI) of saphenous vein grafts (SVGs) has historically been associated with a high risk of adverse ischemic events, but there is a paucity of contemporary data on the second-generation drug-eluting stent use within SVG, and the relative importance of high platelet reactivity (HPR) in SVG PCI versus native lesion PCI is unknown. We studied ischemic and bleeding events after SVG PCI and their association with HPR. Subjects in the prospective, multicenter ADAPT-DES study (Assessment of Dual Antiplatelet Therapy With Drug-Eluting Stents) were stratified according to whether they had PCI of an SVG or a non-SVG lesion. Two-year outcomes were compared between groups using univariate and multivariable Cox proportional hazards models. HPR was defined as on-clopidogrel P2Y12 platelet reaction units >208 as measured by the VerifyNow assay; major adverse cardiac events were defined as the composite of cardiac death, myocardial infarction, or stent thrombosis. Among 8582 subjects in ADAPT-DES, 405 (4.7%) had SVG PCI. SVG PCI was independently associated with a higher 2-year risk of major adverse cardiac events (adjusted hazard ratio, 2.34; 95% confidence interval, 1.69-3.23; P<0.0001), ischemia-driven target vessel revascularization (adjusted hazard ratio, 1.82; 95% confidence interval, 1.37-2.42; P<0.0001), and stent thrombosis (adjusted hazard ratio, 2.26; 95% confidence interval, 1.42-3.59; P=0.0006), but not of bleeding (adjusted hazard ratio, 0.99; 95% confidence interval, 0.68-1.46; P=0.97). There was no statistical interaction between HPR and SVG PCI in regard to major adverse cardiac events (adjusted Pinteraction=0.99). SVG PCI is associated with a considerably higher risk of 2-year adverse ischemic events, with HPR conferring similar risk in SVG and non-SVG PCI. More potent and longer antiplatelet therapy may be beneficial for patients undergoing SVG PCI. URL: http://www.clinicaltrials.gov. Unique identifier: NCT00638794.",2017-05-01 +24454733,Deriving a mutation index of carcinogenicity using protein structure and protein interfaces.,"With the advent of Next Generation Sequencing the identification of mutations in the genomes of healthy and diseased tissues has become commonplace. While much progress has been made to elucidate the aetiology of disease processes in cancer, the contributions to disease that many individual mutations make remain to be characterised and their downstream consequences on cancer phenotypes remain to be understood. Missense mutations commonly occur in cancers and their consequences remain challenging to predict. However, this knowledge is becoming more vital, for both assessing disease progression and for stratifying drug treatment regimes. Coupled with structural data, comprehensive genomic databases of mutations such as the 1000 Genomes project and COSMIC give an opportunity to investigate general principles of how cancer mutations disrupt proteins and their interactions at the molecular and network level. We describe a comprehensive comparison of cancer and neutral missense mutations; by combining features derived from structural and interface properties we have developed a carcinogenicity predictor, InCa (Index of Carcinogenicity). Upon comparison with other methods, we observe that InCa can predict mutations that might not be detected by other methods. We also discuss general limitations shared by all predictors that attempt to predict driver mutations and discuss how this could impact high-throughput predictions. A web interface to a server implementation is publicly available at http://inca.icr.ac.uk/.",2014-01-15 +27259540,"UniCon3D: de novo protein structure prediction using united-residue conformational search via stepwise, probabilistic sampling.","

Motivation

Recent experimental studies have suggested that proteins fold via stepwise assembly of structural units named 'foldons' through the process of sequential stabilization. Alongside, latest developments on computational side based on probabilistic modeling have shown promising direction to perform de novo protein conformational sampling from continuous space. However, existing computational approaches for de novo protein structure prediction often randomly sample protein conformational space as opposed to experimentally suggested stepwise sampling.

Results

Here, we develop a novel generative, probabilistic model that simultaneously captures local structural preferences of backbone and side chain conformational space of polypeptide chains in a united-residue representation and performs experimentally motivated conditional conformational sampling via stepwise synthesis and assembly of foldon units that minimizes a composite physics and knowledge-based energy function for de novo protein structure prediction. The proposed method, UniCon3D, has been found to (i) sample lower energy conformations with higher accuracy than traditional random sampling in a small benchmark of 6 proteins; (ii) perform comparably with the top five automated methods on 30 difficult target domains from the 11th Critical Assessment of Protein Structure Prediction (CASP) experiment and on 15 difficult target domains from the 10th CASP experiment; and (iii) outperform two state-of-the-art approaches and a baseline counterpart of UniCon3D that performs traditional random sampling for protein modeling aided by predicted residue-residue contacts on 45 targets from the 10th edition of CASP.

Availability and implementation

Source code, executable versions, manuals and example data of UniCon3D for Linux and OSX are freely available to non-commercial users at http://sysbio.rnet.missouri.edu/UniCon3D/ CONTACT: chengji@missouri.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-03 +25962835,A multi-view genomic data simulator.,"

Background

OMICs technologies allow to assay the state of a large number of different features (e.g., mRNA expression, miRNA expression, copy number variation, DNA methylation, etc.) from the same samples. The objective of these experiments is usually to find a reduced set of significant features, which can be used to differentiate the conditions assayed. In terms of development of novel feature selection computational methods, this task is challenging for the lack of fully annotated biological datasets to be used for benchmarking. A possible way to tackle this problem is generating appropriate synthetic datasets, whose composition and behaviour are fully controlled and known a priori.

Results

Here we propose a novel method centred on the generation of networks of interactions among different biological molecules, especially involved in regulating gene expression. Synthetic datasets are obtained from ordinary differential equations based models with known parameters. Our results show that the generated datasets are well mimicking the behaviour of real data, for popular data analysis methods are able to selectively identify existing interactions.

Conclusions

The proposed method can be used in conjunction to real biological datasets in the assessment of data mining techniques. The main strength of this method consists in the full control on the simulated data while retaining coherence with the real biological processes. The R package MVBioDataSim is freely available to the scientific community at http://neuronelab.unisa.it/?p=1722.",2015-05-12 +23390137,ELECANS--an integrated model development environment for multiscale cancer systems biology.,"

Motivation

Computational multiscale models help cancer biologists to study the spatiotemporal dynamics of complex biological systems and to reveal the underlying mechanism of emergent properties.

Results

To facilitate the construction of such models, we have developed a next generation modelling platform for cancer systems biology, termed 'ELECANS' (electronic cancer system). It is equipped with a graphical user interface-based development environment for multiscale modelling along with a software development kit such that hierarchically complex biological systems can be conveniently modelled and simulated by using the graphical user interface/software development kit combination. Associated software accessories can also help users to perform post-processing of the simulation data for visualization and further analysis. In summary, ELECANS is a new modelling platform for cancer systems biology and provides a convenient and flexible modelling and simulation environment that is particularly useful for those without an intensive programming background.

Availability and implementation

ELECANS, its associated software accessories, demo examples, documentation and issues database are freely available at http://sbie.kaist.ac.kr/sub_0204.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-06 +24884676,Genome-wide identification of heat shock proteins (Hsps) and Hsp interactors in rice: Hsp70s as a case study.,"

Background

Heat shock proteins (Hsps) perform a fundamental role in protecting plants against abiotic stresses. Although researchers have made great efforts on the functional analysis of individual family members, Hsps have not been fully characterized in rice (Oryza sativa L.) and little is known about their interactors.

Results

In this study, we combined orthology-based approach with expression association data to screen rice Hsps for the expression patterns of which strongly correlated with that of heat responsive probe-sets. Twenty-seven Hsp candidates were identified, including 12 small Hsps, six Hsp70s, three Hsp60s, three Hsp90s, and three clpB/Hsp100s. Then, using a combination of interolog and expression profile-based methods, we inferred 430 interactors of Hsp70s in rice, and validated the interactions by co-localization and function-based methods. Subsequent analysis showed 13 interacting domains and 28 target motifs were over-represented in Hsp70s interactors. Twenty-four GO terms of biological processes and five GO terms of molecular functions were enriched in the positive interactors, whose expression levels were positively associated with Hsp70s. Hsp70s interaction network implied that Hsp70s were involved in macromolecular translocation, carbohydrate metabolism, innate immunity, photosystem II repair and regulation of kinase activities.

Conclusions

Twenty-seven Hsps in rice were identified and 430 interactors of Hsp70s were inferred and validated, then the interacting network of Hsp70s was induced and the function of Hsp70s was analyzed. Furthermore, two databases named Rice Heat Shock Proteins (RiceHsps) and Rice Gene Expression Profile (RGEP), and one online tool named Protein-Protein Interaction Predictor (PPIP), were constructed and could be accessed at http://bioinformatics.fafu.edu.cn/.",2014-05-07 +25864936,An approach to improve kernel-based Protein-Protein Interaction extraction by learning from large-scale network data.,"Protein-Protein Interaction extraction (PPIe) from biomedical literatures is an important task in biomedical text mining and has achieved desirable results on the annotated datasets. However, the traditional machine learning methods on PPIe suffer badly from vocabulary gap and data sparseness, which weakens classification performance. In this work, an approach capturing external information from the web-based data is introduced to address these problems and boost the existing methods. The approach involves three kinds of word representation techniques: distributed representation, vector clustering and Brown clusters. Experimental results show that our method outperforms the state-of-the-art methods on five publicly available corpora. Our code and data are available at: http://chaoslog.com/improving-kernel-based-protein-protein-interaction-extraction-by-unsupervised-word-representation-codes-and-data.html.",2015-04-09 +26217690,Data set for the genome-wide transcriptome analysis of human epidermal melanocytes.,"The data in this article contains data related to the research articled entitle Genome-wide transcriptome analysis of human epidermal melanocytes. This data article contains a complete list of gene and transcript isoform expression in human epidermal melanocytes. Transcript isoforms that are differentially expressed in lightly versus darkly pigmented melanocytes are identified. We also provide data showing the gene expression profiles of cell signaling gene families (receptors, ion channels, and transcription factors) in melanocytes. The raw sequencing data used to perform this transcriptome analysis is located in the NCBI Sequence Read Archive under Accession No. SRP039354 http://dx.doi.org/10.7301/Z0MW2F2N.",2014-10-27 +24608172,YTRP: a repository for yeast transcriptional regulatory pathways.,"Regulatory targets of transcription factors (TFs) can be identified by the TF perturbation experiments, which reveal the expression changes owing to the perturbation (deletion or overexpression) of TFs. But the identified targets of a given TF consist of both direct and indirect regulatory targets. It has been shown that most of the TFPE-identified regulatory targets are indirect, indicating that TF-gene regulation is mainly through transcriptional regulatory pathways (TRPs) consisting of intermediate TFs. Without identification of these TRPs, it is not easy to understand how a TF regulates its indirect targets. Because there is no such database depositing the potential TRPs for Saccharomyces cerevisiae now, this motivates us to construct the YTRP (Yeast Transcriptional Regulatory Pathway) database. For each TF-gene regulatory pair under different experimental conditions, all possible TRPs in two underlying networks (constructed using experimentally verified TF-gene binding pairs and TF-gene regulatory pairs from the literature) for the specified experimental conditions were automatically enumerated by TRP mining procedures developed from the graph theory. The enumerated TRPs of a TF-gene regulatory pair provide experimentally testable hypotheses for the molecular mechanisms behind a TF and its regulatory target. YTRP is available online at http://cosbi3.ee.ncku.edu.tw/YTRP/. We believe that the TRPs deposited in this database will greatly improve the usefulness of TFPE data for yeast biologists to study the regulatory mechanisms between a TF and its knocked-out targets. Database URL: http://cosbi3.ee.ncku.edu.tw/YTRP/.",2014-03-07 +22863766,"An R package suite for microarray meta-analysis in quality control, differentially expressed gene analysis and pathway enrichment detection.","

Summary

With the rapid advances and prevalence of high-throughput genomic technologies, integrating information of multiple relevant genomic studies has brought new challenges. Microarray meta-analysis has become a frequently used tool in biomedical research. Little effort, however, has been made to develop a systematic pipeline and user-friendly software. In this article, we present MetaOmics, a suite of three R packages MetaQC, MetaDE and MetaPath, for quality control, differentially expressed gene identification and enriched pathway detection for microarray meta-analysis. MetaQC provides a quantitative and objective tool to assist study inclusion/exclusion criteria for meta-analysis. MetaDE and MetaPath were developed for candidate marker and pathway detection, which provide choices of marker detection, meta-analysis and pathway analysis methods. The system allows flexible input of experimental data, clinical outcome (case-control, multi-class, continuous or survival) and pathway databases. It allows missing values in experimental data and utilizes multi-core parallel computing for fast implementation. It generates informative summary output and visualization plots, operates on different operation systems and can be expanded to include new algorithms or combine different types of genomic data. This software suite provides a comprehensive tool to conveniently implement and compare various genomic meta-analysis pipelines.

Availability

http://www.biostat.pitt.edu/bioinfo/software.htm

Contact

ctseng@pitt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-08-03 +27485442,MetaCoMET: a web platform for discovery and visualization of the core microbiome.,"

Motivation

A key component of the analysis of microbiome datasets is the identification of OTUs shared between multiple experimental conditions, commonly referred to as the core microbiome.

Results

We present a web platform named MetaCoMET that enables the discovery and visualization of the core microbiome and provides a comparison of the relative abundance and diversity patterns between subsets of samples within a microbiome dataset. MetaCoMET provides an efficient and interactive graphical interface for analyzing each subset defined by the union or disjunction of groups within the Venn diagram, and includes a graphical taxonomy summary, alpha diversity metrics, Principal Coordinate analysis, abundance-based heatmaps, and a chart indicating the geographic distribution of each sample.

Availability and implementation

MetaCoMET is a user-friendly and efficient web platform freely accessible at http://probes.pw.usda.gov/MetaCoMET or http://aegilops.wheat.ucdavis.edu/MetaCoMET CONTACT: devin.coleman-derr@ars.usda.govSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-02 +27038623,Transcriptome dynamics in the asexual cycle of the chordate Botryllus schlosseri.,"

Background

We performed an analysis of the transcriptome during the blastogenesis of the chordate Botryllus schlosseri, focusing in particular on genes involved in cell death by apoptosis. The tunicate B. schlosseri is an ascidian forming colonies characterized by the coexistence of three blastogenetic generations: filter-feeding adults, buds on adults, and budlets on buds. Cyclically, adult tissues undergo apoptosis and are progressively resorbed and replaced by their buds originated by asexual reproduction. This is a feature of colonial tunicates, the only known chordates that can reproduce asexually.

Results

Thanks to a newly developed web-based platform ( http://botryllus.cribi.unipd.it ), we compared the transcriptomes of the mid-cycle, the pre-take-over, and the take-over phases of the colonial blastogenetic cycle. The platform is equipped with programs for comparative analysis and allows to select the statistical stringency. We enriched the genome annotation with 11,337 new genes; 581 transcripts were resolved as complete open reading frames, translated in silico into amino acid sequences and then aligned onto the non-redundant sequence database. Significant differentially expressed genes were classified within the gene ontology categories. Among them, we recognized genes involved in apoptosis activation, de-activation, and regulation.

Conclusions

With the current work, we contributed to the improvement of the first released B. schlosseri genome assembly and offer an overview of the transcriptome changes during the blastogenetic cycle, showing up- and down-regulated genes. These results are important for the comprehension of the events underlying colony growth and regression, cell proliferation, colony homeostasis, and competition among different generations.",2016-04-02 +27214906,txCoords: A Novel Web Application for Transcriptomic Peak Re-Mapping.,"Since the development of new technologies such as RIP-Seq and m6A-seq, peak calling has become an important step in transcriptomic sequencing data analysis. However, many of the reported genomic coordinates of transcriptomic peaks are incorrect owing to negligence of the introns. There is currently a lack of a convenient tool to address this problem. Here, we present txCoords, a novel and easy-to-use web application for transcriptomic peak re-mapping. txCoords can be used to correct the incorrectly reported transcriptomic peaks and retrieve the true sequences. It also supports visualization of the re-mapped peaks in a schematic figure or from the UCSC Genome Browser. Our web server is freely available at http://www.bioinfo.tsinghua.edu.cn/txCoords.",2016-05-16 +24691332,The Stanford Microsurgery and Resident Training (SMaRT) Scale: validation of an on-line global rating scale for technical assessment.,"INTRODUCTION: We previously reported results of our on-line microsurgery training program, showing that residents who had access to our website significantly improved their cognitive and technical skills. In this study, we report an objective means for expert evaluators to reliably rate trainees' technical skills under the microscope, with the use of our novel global rating scale. METHODS: ""Microsurgery Essentials"" (http://smartmicrosurgery.com) is our on-line training curriculum. Residents were randomly divided into 2 groups: 1 group reviewed this online resource and the other did not. Pre- and post-tests consisted of videotaped microsurgical sessions in which the trainee performed ""microsurgery"" on 3 different models: latex glove, penrose drain, and the dorsal vessel of a chicken foot. The SMaRT (Stanford Microsurgery and Resident Training) scale, consisting of 9 categories graded on a 5-point Likert scale, was used to assess the trainees. Results were analyzed with ANOVA and Student t test, with P less than 0.05 indicating statistical significance. RESULTS: Seventeen residents participated in the study. The SMaRT scale adequately differentiated the performance of more experienced senior residents (PGY-4 to PGY-6, total average score=3.43) from less experienced junior residents (PGY-1 to PGY-3, total average score=2.10, P<0.0001). Residents who viewed themselves as being confident received a higher score on the SMaRT scale (average score 3.5), compared to residents who were not as confident (average score 2.1) (P<0.001). There were no significant differences in scoring among all 3 evaluators (P>0.05). Additionally, junior residents who had access to our website showed a significant increase in their graded technical performance by 0.7 points when compared to residents who did not have access to the website who showed an improvement of only 0.2 points (P=0.01). CONCLUSIONS: Our SMaRT scale is valid and reliable in assessing the microsurgical skills of residents and other trainees. Current trainees are more likely to use self-directed on-line education because of its easy accessibility and interactive format. Our global rating scale can help ensure residents are achieving appropriate technical milestones.",2014-05-01 +27713481,Computational prediction shines light on type III secretion origins.,"Type III secretion system is a key bacterial symbiosis and pathogenicity mechanism responsible for a variety of infectious diseases, ranging from food-borne illnesses to the bubonic plague. In many Gram-negative bacteria, the type III secretion system transports effector proteins into host cells, converting resources to bacterial advantage. Here we introduce a computational method that identifies type III effectors by combining homology-based inference with de novo predictions, reaching up to 3-fold higher performance than existing tools. Our work reveals that signals for recognition and transport of effectors are distributed over the entire protein sequence instead of being confined to the N-terminus, as was previously thought. Our scan of hundreds of prokaryotic genomes identified previously unknown effectors, suggesting that type III secretion may have evolved prior to the archaea/bacteria split. Crucially, our method performs well for short sequence fragments, facilitating evaluation of microbial communities and rapid identification of bacterial pathogenicity - no genome assembly required. pEffect and its data sets are available at http://services.bromberglab.org/peffect.",2016-10-07 +22139939,ccPDB: compilation and creation of data sets from Protein Data Bank.,"ccPDB (http://crdd.osdd.net/raghava/ccpdb/) is a database of data sets compiled from the literature and Protein Data Bank (PDB). First, we collected and compiled data sets from the literature used for developing bioinformatics methods to annotate the structure and function of proteins. Second, data sets were derived from the latest release of PDB using standard protocols. Third, we developed a powerful module for creating a wide range of customized data sets from the current release of PDB. This is a flexible module that allows users to create data sets using a simple six step procedure. In addition, a number of web services have been integrated in ccPDB, which include submission of jobs on PDB-based servers, annotation of protein structures and generation of patterns. This database maintains >30 types of data sets such as secondary structure, tight-turns, nucleotide interacting residues, metals interacting residues, DNA/RNA binding residues and so on.",2011-12-01 +27660889,MINFIT: A Spreadsheet-Based Tool for Parameter Estimation in an Equilibrium Speciation Software Program.,"Determination of equilibrium constants describing chemical reactions in the aqueous phase and at solid-water interface relies on inverse modeling and parameter estimation. Although there are existing tools available, the steep learning curve prevents the wider community of environmental engineers and chemists to adopt those tools. Stemming from classical chemical equilibrium codes, MINEQL+ has been one of the most widely used chemical equilibrium software programs. We developed a spreadsheet-based tool, which we are calling MINFIT, that interacts with MINEQL+ to perform parameter estimations that optimize model fits to experimental data sets. MINFIT enables automatic and convenient screening of a large number of parameter sets toward the optimal solutions by calling MINEQL+ to perform iterative forward calculations following either exhaustive equidistant grid search or randomized search algorithms. The combined use of the two algorithms can securely guide the searches for the global optima. We developed interactive interfaces so that the optimization processes are transparent. Benchmark examples including both aqueous and surface complexation problems illustrate the parameter estimation and associated sensitivity analysis. MINFIT is accessible at http://minfit.strikingly.com .",2016-10-07 +25861966,"AVIA v2.0: annotation, visualization and impact analysis of genomic variants and genes.","

Unlabelled

As sequencing becomes cheaper and more widely available, there is a greater need to quickly and effectively analyze large-scale genomic data. While the functionality of AVIA v1.0, whose implementation was based on ANNOVAR, was comparable with other annotation web servers, AVIA v2.0 represents an enhanced web-based server that extends genomic annotations to cell-specific transcripts and protein-level functional annotations. With AVIA's improved interface, users can better visualize their data, perform comprehensive searches and categorize both coding and non-coding variants.

Availability and implementation

AVIA is freely available through the web at http://avia.abcc.ncifcrf.gov.

Contact

Hue.Vuong@fnlcr.nih.gov

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-09 +23028969,Transcriptome tomography for brain analysis in the web-accessible anatomical space.,"Increased information on the encoded mammalian genome is expected to facilitate an integrated understanding of complex anatomical structure and function based on the knowledge of gene products. Determination of gene expression-anatomy associations is crucial for this understanding. To elicit the association in the three-dimensional (3D) space, we introduce a novel technique for comprehensive mapping of endogenous gene expression into a web-accessible standard space: Transcriptome Tomography. The technique is based on conjugation of sequential tissue-block sectioning, all fractions of which are used for molecular measurements of gene expression densities, and the block- face imaging, which are used for 3D reconstruction of the fractions. To generate a 3D map, tissues are serially sectioned in each of three orthogonal planes and the expression density data are mapped using a tomographic technique. This rapid and unbiased mapping technique using a relatively small number of original data points allows researchers to create their own expression maps in the broad anatomical context of the space. In the first instance we generated a dataset of 36,000 maps, reconstructed from data of 61 fractions measured with microarray, covering the whole mouse brain (ViBrism: http://vibrism.riken.jp/3dviewer/ex/index.html) in one month. After computational estimation of the mapping accuracy we validated the dataset against existing data with respect to the expression location and density. To demonstrate the relevance of the framework, we showed disease related expression of Huntington's disease gene and Bdnf. Our tomographic approach is applicable to analysis of any biological molecules derived from frozen tissues, organs and whole embryos, and the maps are spatially isotropic and well suited to the analysis in the standard space (e.g. Waxholm Space for brain-atlas databases). This will facilitate research creating and using open-standards for a molecular-based understanding of complex structures; and will contribute to new insights into a broad range of biological and medical questions.",2012-09-19 +27027346,Towards understanding the lifespan extension by reduced insulin signaling: bioinformatics analysis of DAF-16/FOXO direct targets in Caenorhabditis elegans.,"DAF-16, the C. elegans FOXO transcription factor, is an important determinant in aging and longevity. In this work, we manually curated FOXODB http://lyh.pkmu.cn/foxodb/, a database of FOXO direct targets. It now covers 208 genes. Bioinformatics analysis on 109 DAF-16 direct targets in C. elegans found interesting results. (i) DAF-16 and transcription factor PQM-1 co-regulate some targets. (ii) Seventeen targets directly regulate lifespan. (iii) Four targets are involved in lifespan extension induced by dietary restriction. And (iv) DAF-16 direct targets might play global roles in lifespan regulation.",2016-04-01 +25972773,MarVis-Pathway: integrative and exploratory pathway analysis of non-targeted metabolomics data.,"A central aim in the evaluation of non-targeted metabolomics data is the detection of intensity patterns that differ between experimental conditions as well as the identification of the underlying metabolites and their association with metabolic pathways. In this context, the identification of metabolites based on non-targeted mass spectrometry data is a major bottleneck. In many applications, this identification needs to be guided by expert knowledge and interactive tools for exploratory data analysis can significantly support this process. Additionally, the integration of data from other omics platforms, such as DNA microarray-based transcriptomics, can provide valuable hints and thereby facilitate the identification of metabolites via the reconstruction of related metabolic pathways. We here introduce the MarVis-Pathway tool, which allows the user to identify metabolites by annotation of pathways from cross-omics data. The analysis is supported by an extensive framework for pathway enrichment and meta-analysis. The tool allows the mapping of data set features by ID, name, and accurate mass, and can incorporate information from adduct and isotope correction of mass spectrometry data. MarVis-Pathway was integrated in the MarVis-Suite (http://marvis.gobics.de), which features the seamless highly interactive filtering, combination, clustering, and visualization of omics data sets. The functionality of the new software tool is illustrated using combined mass spectrometry and DNA microarray data. This application confirms jasmonate biosynthesis as important metabolic pathway that is upregulated during the wound response of Arabidopsis plants.",2014-10-10 +26964602,Resuscitation in the dental practice.,"The Resuscitation Council (UK) published new resuscitation guidelines in October 2015. The aim of this article is to understand these new guidelines and how dental practices should implement them. A 'resuscitation in the dental practice poster' has been designed which incorporates the new Resuscitation Council (UK) adult basic life support algorithm. This poster, endorsed by the British Dental Association, is included with this issue of the British Dental Journal. Further copies can be downloaded from: https://www.walsallhealthcare.nhs.uk/Data/Sites/1/media/documents/health-and-safety/resus.pdf.",2016-03-01 +22581653,Paralogous annotation of disease-causing variants in long QT syndrome genes.,"Discriminating between rare benign and pathogenic variation is a key challenge in clinical genetics, particularly as increasing numbers of nonsynonymous single-nucleotide polymorphisms (SNPs) are identified in resequencing studies. Here, we describe an approach for the functional annotation of nonsynonymous variants that identifies functionally important, disease-causing residues across protein families using multiple sequence alignment. We applied the methodology to long QT syndrome (LQT) genes, which cause sudden death, and their paralogues, which largely cause neurological disease. This approach accurately classified known LQT disease-causing variants (positive predictive value = 98.4%) with a better performance than established bioinformatic methods. The analysis also identified 1078 new putative disease loci, which we incorporated along with known variants into a comprehensive and freely accessible long QT resource (http://cardiodb.org/Paralogue_Annotation/), based on newly created Locus Reference Genomic sequences (http://www.lrg-sequence.org/). We propose that paralogous annotation is widely applicable for Mendelian human disease genes.",2012-06-07 +23299411,Rice Annotation Project Database (RAP-DB): an integrative and interactive database for rice genomics.,"The Rice Annotation Project Database (RAP-DB, http://rapdb.dna.affrc.go.jp/) has been providing a comprehensive set of gene annotations for the genome sequence of rice, Oryza sativa (japonica group) cv. Nipponbare. Since the first release in 2005, RAP-DB has been updated several times along with the genome assembly updates. Here, we present our newest RAP-DB based on the latest genome assembly, Os-Nipponbare-Reference-IRGSP-1.0 (IRGSP-1.0), which was released in 2011. We detected 37,869 loci by mapping transcript and protein sequences of 150 monocot species. To provide plant researchers with highly reliable and up to date rice gene annotations, we have been incorporating literature-based manually curated data, and 1,626 loci currently incorporate literature-based annotation data, including commonly used gene names or gene symbols. Transcriptional activities are shown at the nucleotide level by mapping RNA-Seq reads derived from 27 samples. We also mapped the Illumina reads of a Japanese leading japonica cultivar, Koshihikari, and a Chinese indica cultivar, Guangluai-4, to the genome and show alignments together with the single nucleotide polymorphisms (SNPs) and gene functional annotations through a newly developed browser, Short-Read Assembly Browser (S-RAB). We have developed two satellite databases, Plant Gene Family Database (PGFD) and Integrative Database of Cereal Gene Phylogeny (IDCGP), which display gene family and homologous gene relationships among diverse plant species. RAP-DB and the satellite databases offer simple and user-friendly web interfaces, enabling plant and genome researchers to access the data easily and facilitating a broad range of plant research topics.",2013-01-07 +27031623,QTL Mapping of Low-Temperature Germination Ability in the Maize IBM Syn4 RIL Population.,"Low temperature is the primary factor to affect maize sowing in early spring. It is, therefore, vital for maize breeding programs to improve tolerance to low temperatures at seed germination stage. However, little is known about maize QTL involved in low-temperature germination ability. 243 lines of the intermated B73×Mo17 (IBM) Syn4 recombinant inbred line (RIL) population was used for QTL analysis of low-temperature germination ability. There were significant differences in germination-related traits under both conditions of low temperature (12°C/16 h, 18°C/8 h) and optimum temperature (28°C/24 h) between the parental lines. Only three QTL were identified for controlling optimum-temperature germination rate. Six QTL controlling low-temperature germination rate were detected on chromosome 4, 5, 6, 7 and 9, and contribution rate of single QTL explained between 3.39%~11.29%. In addition, six QTL controlling low-temperature primary root length were detected in chromosome 4, 5, 6, and 9, and the contribution rate of single QTL explained between 3.96%~8.41%. Four pairs of QTL were located at the same chromosome position and together controlled germination rate and primary root length under low temperature condition. The nearest markers apart from the corresponding QTL (only 0.01 cM) were umc1303 (265.1 cM) on chromosome 4, umc1 (246.4 cM) on chromosome 5, umc62 (459.1 cM) on chromosome 6, bnl14.28a (477.4 cM) on chromosome 9, respectively. A total of 3155 candidate genes were extracted from nine separate intervals based on the Maize Genetics and Genomics Database (http://www.maizegdb.org). Five candidate genes were selected for analysis as candidates putatively affecting seed germination and seedling growth at low temperature. The results provided a basis for further fine mapping, molecular marker assisted breeding and functional study of cold-tolerance at the stage of seed germination in maize.",2016-03-31 +24387861,Tracking the blue: a MLST approach to characterise the Pseudomonas fluorescens group.,"The Pseudomonas fluorescens group comprises several closely related species that are involved in food contamination and spoilage. Specifically, the interest in P. fluorescens as a spoiler of dairy products increased after the cases of ""blue mozzarella"" that occurred in Italy in 2010. A Multilocus Sequence Typing (MLST) scheme was developed and applied to characterise 136 isolates (reference strains and food borne isolates) at strain level, to reveal the genetic relationships among them and to disclose any possible genetic clustering of phenotypic markers involved in food spoilage (protease, lipase, lecithinase activities and pigmented or fluorescent molecule production). The production of dark blue diffusible pigment was evaluated on several bacterial culture media and directly on mozzarella cheese. The MLST scheme provided precise genotyping at the strain level, and the population analyses of the concatenated sequences allowed major taxa to be defined. This approach was revealed to be suitable for tracking the strains according to their origin, such as dairy plants or food matrices. The genetic analysis revealed the presence of a connection between the blue pigment production and a specific phylogenetic cluster. The development of the online database specific to the P. fluorescens group (http://pubmlst.org/pfluorescens) will facilitate the application of the scheme and the sharing of the data.",2013-12-03 +25953800,Identification of C2H2-ZF binding preferences from ChIP-seq data using RCADE.,"

Unlabelled

Current methods for motif discovery from chromatin immunoprecipitation followed by sequencing (ChIP-seq) data often identify non-targeted transcription factor (TF) motifs, and are even further limited when peak sequences are similar due to common ancestry rather than common binding factors. The latter aspect particularly affects a large number of proteins from the Cys2His2 zinc finger (C2H2-ZF) class of TFs, as their binding sites are often dominated by endogenous retroelements that have highly similar sequences. Here, we present recognition code-assisted discovery of regulatory elements (RCADE) for motif discovery from C2H2-ZF ChIP-seq data. RCADE combines predictions from a DNA recognition code of C2H2-ZFs with ChIP-seq data to identify models that represent the genuine DNA binding preferences of C2H2-ZF proteins. We show that RCADE is able to identify generalizable binding models even from peaks that are exclusively located within the repeat regions of the genome, where state-of-the-art motif finding approaches largely fail.

Availability and implementation

RCADE is available as a webserver and also for download at http://rcade.ccbr.utoronto.ca/.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

t.hughes@utoronto.ca.",2015-05-06 +23143109,HemaExplorer: a database of mRNA expression profiles in normal and malignant haematopoiesis.,"The HemaExplorer (http://servers.binf.ku.dk/hemaexplorer) is a curated database of processed mRNA Gene expression profiles (GEPs) that provides an easy display of gene expression in haematopoietic cells. HemaExplorer contains GEPs derived from mouse/human haematopoietic stem and progenitor cells as well as from more differentiated cell types. Moreover, data from distinct subtypes of human acute myeloid leukemia is included in the database allowing researchers to directly compare gene expression of leukemic cells with those of their closest normal counterpart. Normalization and batch correction lead to full integrity of the data in the database. The HemaExplorer has comprehensive visualization interface that can make it useful as a daily tool for biologists and cancer researchers to assess the expression patterns of genes encountered in research or literature. HemaExplorer is relevant for all research within the fields of leukemia, immunology, cell differentiation and the biology of the haematopoietic system.",2012-11-09 +24788324,Widely applicable MATLAB routines for automated analysis of saccadic reaction times.,"Saccadic reaction time (SRT) is a widely used dependent variable in eye-tracking studies of human cognition and its disorders. SRTs are also frequently measured in studies with special populations, such as infants and young children, who are limited in their ability to follow verbal instructions and remain in a stable position over time. In this article, we describe a library of MATLAB routines (Mathworks, Natick, MA) that are designed to (1) enable completely automated implementation of SRT analysis for multiple data sets and (2) cope with the unique challenges of analyzing SRTs from eye-tracking data collected from poorly cooperating participants. The library includes preprocessing and SRT analysis routines. The preprocessing routines (i.e., moving median filter and interpolation) are designed to remove technical artifacts and missing samples from raw eye-tracking data. The SRTs are detected by a simple algorithm that identifies the last point of gaze in the area of interest, but, critically, the extracted SRTs are further subjected to a number of postanalysis verification checks to exclude values contaminated by artifacts. Example analyses of data from 5- to 11-month-old infants demonstrated that SRTs extracted with the proposed routines were in high agreement with SRTs obtained manually from video records, robust against potential sources of artifact, and exhibited moderate to high test-retest stability. We propose that the present library has wide utility in standardizing and automating SRT-based cognitive testing in various populations. The MATLAB routines are open source and can be downloaded from http://www.uta.fi/med/icl/methods.html .",2015-06-01 +26368549,SATRAP: SOLiD Assembler TRAnslation Program.,"SOLiD DNA sequences are typically analyzed using a reference genome, while they are not recommended for de novo assembly of genomes or transcriptomes. This is mainly due to the difficulty in translating the SOLiD color-space data into normal base-space sequences. In fact, the nature of color-space is such that any misinterpreted color leads to a chain of further translation errors, producing totally wrong results. Here we describe SATRAP, a computer program designed to efficiently translate de novo assembled color-space sequences into a base-space format. The program was tested and validated using simulated and real transcriptomic data; its modularity allows an easy integration into more complex pipelines, such as Oases for RNA-seq de novo assembly. SATRAP is available at http://satrap.cribi.unipd.it, either as a multi-step pipeline incorporating several tools for RNA-seq assembly or as an individual module for use with the Oases package.",2015-09-14 +24588870,Sputum myeloperoxidase in chronic obstructive pulmonary disease.,"

Background

Airway inflammation, especially neutrophilic airway inflammation, is a cardinal pathophysiologic feature in chronic obstructive pulmonary disease (COPD) patients. The ideal biomarkers characterizing the inflammation might have important potential clinical applications in disease assessment and therapeutic intervention. Sputum myeloperoxidase (MPO) is recognized as a marker of neutrophil activity. The purpose of this meta-analysis is to determine whether sputum MPO levels could reflect disease status or be regulated by regular medications for COPD.

Methods

Studies were identified by searching PubMed, Embase, the Cochrane Database, CINAHL and http://www.controlled-trials.com for relevant reports published before September 2012. Observational studies comparing sputum MPO in COPD patients and healthy subjects or asthmatics, or within the COPD group, and studies comparing sputum MPO before and after treatment were all included. Data were independently extracted by two investigators and analyzed using STATA 10.0 software.

Results

A total of 24 studies were included in the meta-analysis. Sputum MPO levels were increased in stable COPD patients when compared with normal controls, and this increase was especially pronounced during exacerbations as compared with MPO levels during the stable state. Theophylline treatment was able to reduce MPO levels in COPD patients, while glucocorticoid treatment failed to achieve the same result.

Conclusion

Sputum MPO might be a promising biomarker for guiding COPD management; however, further investigations are needed to confirm this.",2014-03-03 +22828716,Construction of an open-access QT database for detecting the proarrhythmia potential of marketed drugs: ECG-ViEW.,"Information about the QT interval from surface electrocardiograms (ECGs) is essential for surveillance of the proarrhythmia potential of marketed drugs. However, ECG records obtained in daily practice cannot be easily used for this purpose without labor-intensive manual effort. This study was aimed at constructing an open-access QT database, the Electrocardiogram Vigilance with Electronic Data Warehouse (ECG-ViEW). This longitudinal observational database contains 710,369 measurements of QT and associated clinical data from 371,401 patients. The de-identified database is freely available at http://www.ecgview.org.",2012-07-25 +22936402,General olfactory sensitivity database (GOSdb): candidate genes and their genomic variations.,"Genetic variations in olfactory receptors likely contribute to the diversity of odorant-specific sensitivity phenotypes. Our working hypothesis is that genetic variations in auxiliary olfactory genes, including those mediating transduction and sensory neuronal development, may constitute the genetic basis for general olfactory sensitivity (GOS) and congenital general anosmia (CGA). We thus performed a systematic exploration for auxiliary olfactory genes and their documented variation. This included a literature survey, seeking relevant functional in vitro studies, mouse gene knockouts and human disorders with olfactory phenotypes, as well as data mining in published transcriptome and proteome data for genes expressed in olfactory tissues. In addition, we performed next-generation transcriptome sequencing (RNA-seq) of human olfactory epithelium and mouse olfactory epithelium and bulb, so as to identify sensory-enriched transcripts. Employing a global score system based on attributes of the 11 data sources utilized, we identified a list of 1,680 candidate auxiliary olfactory genes, of which 450 are shortlisted as having higher probability of a functional role. For the top-scoring 136 genes, we identified genomic variants (probably damaging single nucleotide polymorphisms, indels, and copy number deletions) gleaned from public variation repositories. This database of genes and their variants should assist in rationalizing the great interindividual variation in human overall olfactory sensitivity (http://genome.weizmann.ac.il/GOSdb).",2012-10-11 +22759918,Text-mining applied to autoimmune disease research: the Sjögren's syndrome knowledge base.,"

Background

Sjögren's syndrome is a tissue-specific autoimmune disease that affects exocrine tissues, especially salivary glands and lacrimal glands. Despite a large body of evidence gathered over the past 60 years, significant gaps still exist in our understanding of Sjögren's syndrome. The goal of this study was to develop a database that collects and organizes gene and protein expression data from the existing literature for comparative analysis with future gene expression and proteomic studies of Sjögren's syndrome.

Description

To catalog the existing knowledge in the field, we used text mining to generate the Sjögren's Syndrome Knowledge Base (SSKB) of published gene/protein data, which were extracted from PubMed using text mining of over 7,700 abstracts and listing approximately 500 potential genes/proteins. The raw data were manually evaluated to remove duplicates and false-positives and assign gene names. The data base was manually curated to 477 entries, including 377 potential functional genes, which were used for enrichment and pathway analysis using gene ontology and KEGG pathway analysis.

Conclusions

The Sjögren's syndrome knowledge base ( http://sskb.umn.edu) can form the foundation for an informed search of existing knowledge in the field as new potential therapeutic targets are identified by conventional or high throughput experimental techniques.",2012-07-03 +24293654,The SEED and the Rapid Annotation of microbial genomes using Subsystems Technology (RAST).,"In 2004, the SEED (http://pubseed.theseed.org/) was created to provide consistent and accurate genome annotations across thousands of genomes and as a platform for discovering and developing de novo annotations. The SEED is a constantly updated integration of genomic data with a genome database, web front end, API and server scripts. It is used by many scientists for predicting gene functions and discovering new pathways. In addition to being a powerful database for bioinformatics research, the SEED also houses subsystems (collections of functionally related protein families) and their derived FIGfams (protein families), which represent the core of the RAST annotation engine (http://rast.nmpdr.org/). When a new genome is submitted to RAST, genes are called and their annotations are made by comparison to the FIGfam collection. If the genome is made public, it is then housed within the SEED and its proteins populate the FIGfam collection. This annotation cycle has proven to be a robust and scalable solution to the problem of annotating the exponentially increasing number of genomes. To date, >12 000 users worldwide have annotated >60 000 distinct genomes using RAST. Here we describe the interconnectedness of the SEED database and RAST, the RAST annotation pipeline and updates to both resources.",2013-11-29 +27023095,Integrative NMR for biomolecular research.,"NMR spectroscopy is a powerful technique for determining structural and functional features of biomolecules in physiological solution as well as for observing their intermolecular interactions in real-time. However, complex steps associated with its practice have made the approach daunting for non-specialists. We introduce an NMR platform that makes biomolecular NMR spectroscopy much more accessible by integrating tools, databases, web services, and video tutorials that can be launched by simple installation of NMRFAM software packages or using a cross-platform virtual machine that can be run on any standard laptop or desktop computer. The software package can be downloaded freely from the NMRFAM software download page ( http://pine.nmrfam.wisc.edu/download_packages.html ), and detailed instructions are available from the Integrative NMR Video Tutorial page ( http://pine.nmrfam.wisc.edu/integrative.html ).",2016-03-29 +27382076,Reannotation of Yersinia pestis Strain 91001 Based on Omics Data.,"Yersinia pestis is among the most dangerous human pathogens, and systematic research of this pathogen is important in bacterial pathogenomics research. To fully interpret the biological functions, physiological characteristics, and pathogenesis of Y. pestis, a comprehensive annotation of its entire genome is necessary. The emergence of omics-based research has brought new opportunities to better annotate the genome of this pathogen. Here, the complete genome of Y. pestis strain 91001 was reannotated using genomics and proteogenomics data. One hundred and thirty-seven unreliable coding sequences were removed, and 41 homologous genes were relocated with their translational initiation sites, while the functions of seven pseudogenes and 392 hypothetical genes were revised. Moreover, annotations of noncoding RNAs, repeat sequences, and transposable elements have also been incorporated. The reannotated results are freely available at http://tody.bmi.ac.cn.",2016-07-05 +28690676,Radioguided localisation of non-palpable lesions of the breast in Costa Rica: review of results of our first 800 patients in private practice.,"

Background

Surgical treatment of non-palpable breast lesions is controversial. At the European Institute of Oncology in Milan, Italy, Prof Umberto Veronesi introduced a new technique called the radioguided occult lesion localisation (ROLL) in 1996 to replace conventional methods and their disadvantages (Zurrida S, Galimberti V, and Monti S et al (1998) Radioguided localization of occult breast lesionsBreast7 11-13 https://doi.org/10.1016/S0960-9776(98)90044-3). Given the success experienced in that institution, the method became the technique of choice for the early diagnosis of breast cancer. In this paper, we will examine the technical aspects of ROLL and the results from a large series of patients treated in our private practice in Costa Rica.

Methods

We analysed the first 816 patients with different non-palpable breast lesions detected by ultrasound or mammography within our private practice in Costa Rica. In 774 patients, technetium 99m labelled with human serum albumin (7-10 MBq) in 0.2 ml of saline solution was injected into the lesion under mammographic or ultrasound guidance. The excisional biopsy was done by means of a gamma-probe and complete excision of the lesion was verified by X-ray on the specimen in lesions that were visible by mammography and ultrasound 4 months after surgery. In the remaining 42 patients, the localisation of the lesion was carried out by wire.

Results

The tracer was correctly positioned in the first attempt in 772/816 (94.6%) of cases and in the second attempt in two other cases. In 42/816 (5.1%) cases, the localisation of the lesion had to be performed with the traditional method. X-rays showed that the lesion was entirely removed in 770/772 (99.74%) of cases.

Conclusion

The ROLL is a simple and excellent option for the removal of hidden breast lesions in clinical practice. It offers the advantage of making resections safer and with tumour-free margins, in addition to reducing the number of reinterventions. Since it makes it possible to specify to the pathologist the exact site where the lesion is located, we can guarantee a better diagnosis. The rate of success with the use of this technique corresponds to the available scientific data, so we conclude that it is a procedure that we can routinely perform in private practice in Costa Rica.",2017-06-08 +23236458,CINPER: an interactive web system for pathway prediction for prokaryotes.,"We present a web-based network-construction system, CINPER (CSBL INteractive Pathway BuildER), to assist a user to build a user-specified gene network for a prokaryotic organism in an intuitive manner. CINPER builds a network model based on different types of information provided by the user and stored in the system. CINPER's prediction process has four steps: (i) collection of template networks based on (partially) known pathways of related organism(s) from the SEED or BioCyc database and the published literature; (ii) construction of an initial network model based on the template networks using the P-Map program; (iii) expansion of the initial model, based on the association information derived from operons, protein-protein interactions, co-expression modules and phylogenetic profiles; and (iv) computational validation of the predicted models based on gene expression data. To facilitate easy applications, CINPER provides an interactive visualization environment for a user to enter, search and edit relevant data and for the system to display (partial) results and prompt for additional data. Evaluation of CINPER on 17 well-studied pathways in the MetaCyc database shows that the program achieves an average recall rate of 76% and an average precision rate of 90% on the initial models; and a higher average recall rate at 87% and an average precision rate at 28% on the final models. The reduced precision rate in the final models versus the initial models reflects the reality that the final models have large numbers of novel genes that have no experimental evidences and hence are not yet collected in the MetaCyc database. To demonstrate the usefulness of this server, we have predicted an iron homeostasis gene network of Synechocystis sp. PCC6803 using the server. The predicted models along with the server can be accessed at http://csbl.bmb.uga.edu/cinper/.",2012-12-07 +26013811,DeAnnCNV: a tool for online detection and annotation of copy number variations from whole-exome sequencing data.,"With the decrease in costs, whole-exome sequencing (WES) has become a very popular and powerful tool for the identification of genetic variants underlying human diseases. However, integrated tools to precisely detect and systematically annotate copy number variations (CNVs) from WES data are still in great demand. Here, we present an online tool, DeAnnCNV (Detection and Annotation of Copy Number Variations from WES data), to meet the current demands of WES users. Upon submitting the file generated from WES data by an in-house tool that can be downloaded from our server, DeAnnCNV can detect CNVs in each sample and extract the shared CNVs among multiple samples. DeAnnCNV also provides additional useful supporting information for the detected CNVs and associated genes to help users to find the potential candidates for further experimental study. The web server is implemented in PHP + Perl + MATLAB and is online available to all users for free at http://mcg.ustc.edu.cn/db/cnv/.",2015-05-26 +25501775,Cardiac Fatalities in Firefighters: An Analysis of the U.S. Fire Administration Database.,"Cardiac fatalities are the leading cause of death among all firefighters. Increasing age has been linked to increased cardiac fatalities in firefighters; however, circumstances surrounding in-line-of-duty cardiac firefighter deaths can also increase the risk of a cardiac death. The authors hypothesize that cardiac fatalities among firefighters will be related to the type of duty and level of physical exertion. The authors analyzed the Firefighter Fatalities and Statistics data collected by the U.S. Fire Administration (http://apps.usfa.fema.gov/firefighter-fatalities/fatalityData/statistics) from January 2002 to December 2012. Data were analyzed for associations between age, firefighter classification, duty-type, and cause of fatal cardiac event. A total of 1153 firefighter fatalities occurred during the 10-year period reviewed. Of these, 47% were cardiac fatalities. Mean age was significantly higher in firefighters who suffered a cardiac fatality (52.0 ± 11.4 ± 40.8 ± 14.7 years; P < .05). Volunteer firefighters suffered significantly higher proportion of cardiac fatalities (62%; P < .05) followed by career firefighters (32%). Additionally, cardiac fatalities were the leading cause of death for volunteer firefighters (54%; P < .05). The highest proportion of cardiac fatalities occurred on-the-scene (29%; P < .05) followed by after-duty fatalities (25%). Stress and overexertion accounted for 98% of the cause of cardiac fatalities. Adjusting for rank and firefighter classification, age (odds ratio, 1.06; 95% confidence interval, 1.05-1.08) and stress or overexertion (odds ratio, 11.9; 95% confidence interval, 1.7-83.4) were independent predictors of a firefighter cardiac fatality. Both career and volunteer firefighters are at significantly higher risk of a fatal cardiac event as they age. These fatalities occur in a significant proportion on-the-scene. National efforts should be aimed at these high-risk populations to improve cardiovascular health.",2016-05-01 +25691913,Clustering of reads with alignment-free measures and quality values.,"BACKGROUND:The data volume generated by Next-Generation Sequencing (NGS) technologies is growing at a pace that is now challenging the storage and data processing capacities of modern computer systems. In this context an important aspect is the reduction of data complexity by collapsing redundant reads in a single cluster to improve the run time, memory requirements, and quality of post-processing steps like assembly and error correction. Several alignment-free measures, based on k-mers counts, have been used to cluster reads. Quality scores produced by NGS platforms are fundamental for various analysis of NGS data like reads mapping and error detection. Moreover future-generation sequencing platforms will produce long reads but with a large number of erroneous bases (up to 15 %). RESULTS:In this scenario it will be fundamental to exploit quality value information within the alignment-free framework. To the best of our knowledge this is the first study that incorporates quality value information and k-mers counts, in the context of alignment-free measures, for the comparison of reads data. Based on this principles, in this paper we present a family of alignment-free measures called D (q) -type. A set of experiments on simulated and real reads data confirms that the new measures are superior to other classical alignment-free statistics, especially when erroneous reads are considered. Also results on de novo assembly and metagenomic reads classification show that the introduction of quality values improves over standard alignment-free measures. These statistics are implemented in a software called QCluster (http://www.dei.unipd.it/~ciompin/main/qcluster.html).",2015-01-28 +26909376,Dataset concerning GroEL chaperonin interaction with proteins.,"GroEL chaperonin is well-known to interact with a wide variety of polypeptide chains. Here we show the data related to our previous work (http://dx.doi.org/10.1016/j.pep.2015.11.020[1]), and concerning the interaction of GroEL with native (lysozyme, α-lactalbumin) and denatured (lysozyme, α-lactalbumin and pepsin) proteins in solution. The use of affinity chromatography on the base of denatured pepsin for GroEL purification from fluorescent impurities is represented as well.",2016-01-13 +26370285,Reference-free compression of high throughput sequencing data with a probabilistic de Bruijn graph.,"

Background

Data volumes generated by next-generation sequencing (NGS) technologies is now a major concern for both data storage and transmission. This triggered the need for more efficient methods than general purpose compression tools, such as the widely used gzip method.

Results

We present a novel reference-free method meant to compress data issued from high throughput sequencing technologies. Our approach, implemented in the software LEON, employs techniques derived from existing assembly principles. The method is based on a reference probabilistic de Bruijn Graph, built de novo from the set of reads and stored in a Bloom filter. Each read is encoded as a path in this graph, by memorizing an anchoring kmer and a list of bifurcations. The same probabilistic de Bruijn Graph is used to perform a lossy transformation of the quality scores, which allows to obtain higher compression rates without losing pertinent information for downstream analyses.

Conclusions

LEON was run on various real sequencing datasets (whole genome, exome, RNA-seq or metagenomics). In all cases, LEON showed higher overall compression ratios than state-of-the-art compression software. On a C. elegans whole genome sequencing dataset, LEON divided the original file size by more than 20. LEON is an open source software, distributed under GNU affero GPL License, available for download at http://gatb.inria.fr/software/leon/.",2015-09-14 +29369592,[Functional linear models for region-based association analysis].,"Regional association analysis is one of the most powerful tools for gene mapping because instead analysis of individual variants it simultaneously considers all variants in the region. Recent development of the models for regional association analysis involves functional data analysis approach. In the framework of this approach, genotypes of variants within region as well as their effects are described by continuous functions. Such approach allows us to use information about both linkage and linkage disequilibrium and reduce the influence of noise and/or observation errors. Here we define a functional linear mixed model to test association on independent and structured samples. We demonstrate how to test fixed and random effects of a set of genetic variants in the region on quantitative trait. Estimation of statistical properties of new methods shows that type I errors are in accordance with declared values and power is high especially for models with fixed effects of genotypes. We suppose that new functional regression linear models facilitate identification of rare genetic variants controlling complex human and animal traits. New methods are implemented in computer software FREGAT which is available for free download at http://mga.bionet.nsc.ru/soft/FREGAT/.",2016-10-01 +23297035,"Assessing identity, redundancy and confounds in Gene Ontology annotations over time.","

Motivation

The Gene Ontology (GO) is heavily used in systems biology, but the potential for redundancy, confounds with other data sources and problems with stability over time have been little explored.

Results

We report that GO annotations are stable over short periods, with 3% of genes not being most semantically similar to themselves between monthly GO editions. However, we find that genes can alter their 'functional identity' over time, with 20% of genes not matching to themselves (by semantic similarity) after 2 years. We further find that annotation bias in GO, in which some genes are more characterized than others, has declined in yeast, but generally increased in humans. Finally, we discovered that many entries in protein interaction databases are owing to the same published reports that are used for GO annotations, with 66% of assessed GO groups exhibiting this confound. We provide a case study to illustrate how this information can be used in analyses of gene sets and networks.

Availability

Data available at http://chibi.ubc.ca/assessGO.",2013-01-06 +22691961,PredSulSite: prediction of protein tyrosine sulfation sites with multiple features and analysis.,"Tyrosine sulfation is a ubiquitous posttranslational modification that regulates extracellular protein-protein interactions, intracellular protein transportation modulation, and protein proteolytic process. However, identifying tyrosine sulfation sites remains a challenge due to the lability of sulfation sequences. In this study, we developed a method called PredSulSite that incorporates protein secondary structure, physicochemical properties of amino acids, and residue sequence order information based on support vector machine to predict sulfotyrosine sites. Three types of encoding algorithms-secondary structure, grouped weight, and autocorrelation function-were applied to mine features from tyrosine sulfation proteins. The prediction model with multiple features achieved an accuracy of 92.89% in 10-fold cross-validation. Feature analysis showed that the coil structure, acidic amino acids, and residue interactions around the tyrosine sulfation sites all contributed to the sulfation site determination. The detailed feature analysis in this work can help us to understand the sulfation mechanism and provide guidance for the related experimental validation. PredSulSite is available as a community resource at http://www.bioinfo.ncu.edu.cn/inquiries_PredSulSite.aspx.",2012-06-09 +26248465,BioWardrobe: an integrated platform for analysis of epigenomics and transcriptomics data.,"High-throughput sequencing has revolutionized biology by enhancing our ability to perform genome-wide studies. However, due to lack of bioinformatics expertise, modern technologies are still beyond the capabilities of many laboratories. Herein, we present the BioWardrobe platform, which allows users to store, visualize and analyze epigenomics and transcriptomics data using a biologist-friendly web interface, without the need for programming expertise. Predefined pipelines allow users to download data, visualize results on a genome browser, calculate RPKMs (reads per kilobase per million) and identify peaks. Advanced capabilities include differential gene expression and binding analysis, and creation of average tag -density profiles and heatmaps. BioWardrobe can be found at http://biowardrobe.com .",2015-08-07 +23193294,CTCFBSDB 2.0: a database for CTCF-binding sites and genome organization.,"CTCF is a highly conserved transcriptional regulator protein that performs diverse functions such as regulating gene expression and organizing the 3D structure of the genome. Here, we describe recent updates to a database of CTCF-binding sites, CTCFBSDB (http://insulatordb.uthsc.edu/), which now contains almost 15 million CTCF-binding sequences in 10 species. Since the original publication of the database, studies of the 3D structure of the genome, such as those provided by Hi-C experiments, have suggested that CTCF plays an important role in mediating intra- and inter-chromosomal interactions. To reflect this important progress, we have integrated CTCF-binding sites with genomic topological domains defined using Hi-C data. Additionally, the updated database includes new features enabled by new CTCF-binding site data, including binding site occupancy and the ability to visualize overlapping CTCF-binding sites determined in separate experiments.",2012-11-27 +27433060,PreFRP: Prediction and visualization of fluctuation residues in proteins.,"

Aim

The PreFRP web server extracts sequence and basic information of a protein structure and groups amino acid residues in a protein into three important types such as high, moderate, and weak fluctuating residues.

Materials and methods

The server takes a protein data bank file or an amino acid sequence as input and prints the probability of amino acid residues to fluctuate. The server also provides a link to Jmol, a molecular visualization program to visualize the high, moderate, and weak fluctuating residues in three different colors.

Results

Prediction and visualization of fluctuating amino acid residues in proteins may help to understand the complex three-dimensional structure of proteins and may further help in docking and mutation experiments.

Availability

The web server is freely accessible through the web page of the author's institution http://www.mpi.edu.in/prefrp/link.html.",2016-07-01 +23378291,"PhenoDB: a new web-based tool for the collection, storage, and analysis of phenotypic features.","To interpret whole exome/genome sequence data for clinical and research purposes, comprehensive phenotypic information, knowledge of pedigree structure, and results of previous clinical testing are essential. With these requirements in mind and to meet the needs of the Centers for Mendelian Genomics project, we have developed PhenoDB (http://phenodb.net), a secure, Web-based portal for entry, storage, and analysis of phenotypic and other clinical information. The phenotypic features are organized hierarchically according to the major headings and subheadings of the Online Mendelian Inheritance in Man (OMIM®) clinical synopses, with further subdivisions according to structure and function. Every string allows for a free-text entry. All of the approximately 2,900 features use the preferred term from Elements of Morphology and are fully searchable and mapped to the Human Phenotype Ontology and Elements of Morphology. The PhenoDB allows for ascertainment of relevant information from a case in a family or cohort, which is then searchable by family, OMIM number, phenotypic feature, mode of inheritance, genes screened, and so on. The database can also be used to format phenotypic data for submission to dbGaP for appropriately consented individuals. PhenoDB was built using Django, an open source Web development tool, and is freely available through the Johns Hopkins McKusick-Nathans Institute of Genetic Medicine (http://phenodb.net).",2013-03-04 +26535051,Digital imaging of root traits (DIRT): a high-throughput computing and collaboration platform for field-based root phenomics.,"

Background

Plant root systems are key drivers of plant function and yield. They are also under-explored targets to meet global food and energy demands. Many new technologies have been developed to characterize crop root system architecture (CRSA). These technologies have the potential to accelerate the progress in understanding the genetic control and environmental response of CRSA. Putting this potential into practice requires new methods and algorithms to analyze CRSA in digital images. Most prior approaches have solely focused on the estimation of root traits from images, yet no integrated platform exists that allows easy and intuitive access to trait extraction and analysis methods from images combined with storage solutions linked to metadata. Automated high-throughput phenotyping methods are increasingly used in laboratory-based efforts to link plant genotype with phenotype, whereas similar field-based studies remain predominantly manual low-throughput.

Description

Here, we present an open-source phenomics platform ""DIRT"", as a means to integrate scalable supercomputing architectures into field experiments and analysis pipelines. DIRT is an online platform that enables researchers to store images of plant roots, measure dicot and monocot root traits under field conditions, and share data and results within collaborative teams and the broader community. The DIRT platform seamlessly connects end-users with large-scale compute ""commons"" enabling the estimation and analysis of root phenotypes from field experiments of unprecedented size.

Conclusion

DIRT is an automated high-throughput computing and collaboration platform for field based crop root phenomics. The platform is accessible at http://www.dirt.iplantcollaborative.org/ and hosted on the iPlant cyber-infrastructure using high-throughput grid computing resources of the Texas Advanced Computing Center (TACC). DIRT is a high volume central depository and high-throughput RSA trait computation platform for plant scientists working on crop roots. It enables scientists to store, manage and share crop root images with metadata and compute RSA traits from thousands of images in parallel. It makes high-throughput RSA trait computation available to the community with just a few button clicks. As such it enables plant scientists to spend more time on science rather than on technology. All stored and computed data is easily accessible to the public and broader scientific community. We hope that easy data accessibility will attract new tool developers and spur creative data usage that may even be applied to other fields of science.",2015-11-02 +23800136,MetaPathways: a modular pipeline for constructing pathway/genome databases from environmental sequence information.,"

Background

A central challenge to understanding the ecological and biogeochemical roles of microorganisms in natural and human engineered ecosystems is the reconstruction of metabolic interaction networks from environmental sequence information. The dominant paradigm in metabolic reconstruction is to assign functional annotations using BLAST. Functional annotations are then projected onto symbolic representations of metabolism in the form of KEGG pathways or SEED subsystems.

Results

Here we present MetaPathways, an open source pipeline for pathway inference that uses the PathoLogic algorithm to map functional annotations onto the MetaCyc collection of reactions and pathways, and construct environmental Pathway/Genome Databases (ePGDBs) compatible with the editing and navigation features of Pathway Tools. The pipeline accepts assembled or unassembled nucleotide sequences, performs quality assessment and control, predicts and annotates noncoding genes and open reading frames, and produces inputs to PathoLogic. In addition to constructing ePGDBs, MetaPathways uses MLTreeMap to build phylogenetic trees for selected taxonomic anchor and functional gene markers, converts General Feature Format (GFF) files into concatenated GenBank files for ePGDB construction based on third-party annotations, and generates useful file formats including Sequin files for direct GenBank submission and gene feature tables summarizing annotations, MLTreeMap trees, and ePGDB pathway coverage summaries for statistical comparisons.

Conclusions

MetaPathways provides users with a modular annotation and analysis pipeline for predicting metabolic interaction networks from environmental sequence information using an alternative to KEGG pathways and SEED subsystems mapping. It is extensible to genomic and transcriptomic datasets from a wide range of sequencing platforms, and generates useful data products for microbial community structure and function analysis. The MetaPathways software package, installation instructions, and example data can be obtained from http://hallam.microbiology.ubc.ca/MetaPathways.",2013-06-21 +23703212,CLIP-based prediction of mammalian microRNA binding sites.,"Prediction and validation of microRNA (miRNA) targets are essential for understanding functions of miRNAs in gene regulation. Crosslinking immunoprecipitation (CLIP) allows direct identification of a huge number of Argonaute-bound target sequences that contain miRNA binding sites. By analysing data from CLIP studies, we identified a comprehensive list of sequence, thermodynamic and target structure features that are essential for target binding by miRNAs in the 3' untranslated region (3' UTR), coding sequence (CDS) region and 5' untranslated region (5' UTR) of target messenger RNA (mRNA). The total energy of miRNA:target hybridization, a measure of target structural accessibility, is the only essential feature common for both seed and seedless sites in all three target regions. Furthermore, evolutionary conservation is an important discriminating feature for both seed and seedless sites. These features enabled us to develop novel statistical models for the predictions of both seed sites and broad classes of seedless sites. Through both intra-dataset validation and inter-dataset validation, our approach showed major improvements over established algorithms for predicting seed sites and a class of seedless sites. Furthermore, we observed good performance from cross-species validation, suggesting that our prediction framework can be valuable for broad application to other mammalian species and beyond. Transcriptome-wide binding site predictions enabled by our approach will greatly complement the available CLIP data, which only cover small fractions of transcriptomes and known miRNAs due to non-detectable levels of expression. Software and database tools based on the prediction models have been developed and are available through Sfold web server at http://sfold.wadsworth.org.",2013-05-22 +26363189,"Dynamic spatiotemporal brain analyses using high-performance electrical neuroimaging, Part II: A step-by-step tutorial.","Our recently published analytic toolbox (Cacioppo et al., 2014), running under MATLAB environment and Brainstorm, offered a theoretical framework and set of validation studies for the automatic detection of event-related changes in the global pattern and global field power of electrical brain activity. Here, we provide a step-by-step tutorial of this toolbox along with a detailed description of analytical plans (aka the Chicago Electrical Neuroimaging Analytics, CENA) for the statistical analysis of brain microstate configuration and global field power in within and between-subject designs. Available CENA functions include: (1) a difference wave function; (2) a high-performance microsegmentation suite (HPMS), which consists of three specific analytic tools: (i) a root mean square error (RMSE) metric for identifying stable states and transition states across discrete event-related brain microstates; (ii) a similarity metric based on cosine distance in n dimensional sensor space to determine whether template maps for successive brain microstates differ in configuration of brain activity, and (iii) global field power (GFP) metrics for identifying changes in the overall level of activation of the brain; (3) a bootstrapping function for assessing the extent to which the solutions identified in the HPMS are robust (reliable, generalizable) and for empirically deriving additional experimental hypotheses; and (4) step-by-step procedures for performing a priori contrasts for data analysis. CENA is freely available for brain data spatiotemporal analyses at https://hpenlaboratory.uchicago.edu/page/cena, with sample data, user tutorial videos, and documentation.",2015-09-10 +24678954,Visual ModuleOrganizer: a graphical interface for the detection and comparative analysis of repeat DNA modules.,"

Background

DNA repeats, such as transposable elements, minisatellites and palindromic sequences, are abundant in sequences and have been shown to have significant and functional roles in the evolution of the host genomes. In a previous study, we introduced the concept of a repeat DNA module, a flexible motif present in at least two occurences in the sequences. This concept was embedded into ModuleOrganizer, a tool allowing the detection of repeat modules in a set of sequences. However, its implementation remains difficult for larger sequences.

Results

Here we present Visual ModuleOrganizer, a Java graphical interface that enables a new and optimized version of the ModuleOrganizer tool. To implement this version, it was recoded in C++ with compressed suffix tree data structures. This leads to less memory usage (at least 120-fold decrease in average) and decreases by at least four the computation time during the module detection process in large sequences. Visual ModuleOrganizer interface allows users to easily choose ModuleOrganizer parameters and to graphically display the results. Moreover, Visual ModuleOrganizer dynamically handles graphical results through four main parameters: gene annotations, overlapping modules with known annotations, location of the module in a minimal number of sequences, and the minimal length of the modules. As a case study, the analysis of FoldBack4 sequences clearly demonstrated that our tools can be extended to comparative and evolutionary analyses of any repeat sequence elements in a set of genomic sequences. With the increasing number of sequences available in public databases, it is now possible to perform comparative analyses of repeated DNA modules in a graphic and friendly manner within a reasonable time period.

Availability

Visual ModuleOrganizer interface and the new version of the ModuleOrganizer tool are freely available at: http://lcb.cnrs-mrs.fr/spip.php?rubrique313.",2014-03-28 +22194913,ProKinO: an ontology for integrative analysis of protein kinases in cancer.,"

Background

Protein kinases are a large and diverse family of enzymes that are genomically altered in many human cancers. Targeted cancer genome sequencing efforts have unveiled the mutational profiles of protein kinase genes from many different cancer types. While mutational data on protein kinases is currently catalogued in various databases, integration of mutation data with other forms of data on protein kinases such as sequence, structure, function and pathway is necessary to identify and characterize key cancer causing mutations. Integrative analysis of protein kinase data, however, is a challenge because of the disparate nature of protein kinase data sources and data formats.

Results

Here, we describe ProKinO, a protein kinase-specific ontology, which provides a controlled vocabulary of terms, their hierarchy, and relationships unifying sequence, structure, function, mutation and pathway information on protein kinases. The conceptual representation of such diverse forms of information in one place not only allows rapid discovery of significant information related to a specific protein kinase, but also enables large-scale integrative analysis of protein kinase data in ways not possible through other kinase-specific resources. We have performed several integrative analyses of ProKinO data and, as an example, found that a large number of somatic mutations (∼288 distinct mutations) associated with the haematopoietic neoplasm cancer type map to only 8 kinases in the human kinome. This is in contrast to glioma, where the mutations are spread over 82 distinct kinases. We also provide examples of how ontology-based data analysis can be used to generate testable hypotheses regarding cancer mutations.

Conclusion

We present an integrated framework for large-scale integrative analysis of protein kinase data. Navigation and analysis of ontology data can be performed using the ontology browser available at: http://vulcan.cs.uga.edu/prokino.",2011-12-14 +27016700,CD-REST: a system for extracting chemical-induced disease relation in literature. ,"Mining chemical-induced disease relations embedded in the vast biomedical literature could facilitate a wide range of computational biomedical applications, such as pharmacovigilance. The BioCreative V organized a Chemical Disease Relation (CDR) Track regarding chemical-induced disease relation extraction from biomedical literature in 2015. We participated in all subtasks of this challenge. In this article, we present our participation system Chemical Disease Relation Extraction SysTem (CD-REST), an end-to-end system for extracting chemical-induced disease relations in biomedical literature. CD-REST consists of two main components: (1) a chemical and disease named entity recognition and normalization module, which employs the Conditional Random Fields algorithm for entity recognition and a Vector Space Model-based approach for normalization; and (2) a relation extraction module that classifies both sentence-level and document-level candidate drug-disease pairs by support vector machines. Our system achieved the best performance on the chemical-induced disease relation extraction subtask in the BioCreative V CDR Track, demonstrating the effectiveness of our proposed machine learning-based approaches for automatic extraction of chemical-induced disease relations in biomedical literature. The CD-REST system provides web services using HTTP POST request. The web services can be accessed fromhttp://clinicalnlptool.com/cdr The online CD-REST demonstration system is available athttp://clinicalnlptool.com/cdr/cdr.html. Database URL:http://clinicalnlptool.com/cdr;http://clinicalnlptool.com/cdr/cdr.html.",2016-03-25 +27374611,An automated system for evaluation of the potential functionome: MAPLE version 2.1.0.,"Metabolic and physiological potential evaluator (MAPLE) is an automatic system that can perform a series of steps used in the evaluation of potential comprehensive functions (functionome) harboured in the genome and metagenome. MAPLE first assigns KEGG Orthology (KO) to the query gene, maps the KO-assigned genes to the Kyoto Encyclopedia of Genes and Genomes (KEGG) functional modules, and then calculates the module completion ratio (MCR) of each functional module to characterize the potential functionome in the user's own genomic and metagenomic data. In this study, we added two more useful functions to calculate module abundance and Q-value, which indicate the functional abundance and statistical significance of the MCR results, respectively, to the new version of MAPLE for more detailed comparative genomic and metagenomic analyses. Consequently, MAPLE version 2.1.0 reported significant differences in the potential functionome, functional abundance, and diversity of contributors to each function among four metagenomic datasets generated by the global ocean sampling expedition, one of the most popular environmental samples to use with this system. MAPLE version 2.1.0 is now available through the web interface (http://www.genome.jp/tools/maple/) 17 June 2016, date last accessed.",2016-10-01 +26803164,Computational approaches to define a human milk metaglycome.,"

Motivation

The goal of deciphering the human glycome has been hindered by the lack of high-throughput sequencing methods for glycans. Although mass spectrometry (MS) is a key technology in glycan sequencing, MS alone provides limited information about the identification of monosaccharide constituents, their anomericity and their linkages. These features of individual, purified glycans can be partly identified using well-defined glycan-binding proteins, such as lectins and antibodies that recognize specific determinants within glycan structures.

Results

We present a novel computational approach to automate the sequencing of glycans using metadata-assisted glycan sequencing, which combines MS analyses with glycan structural information from glycan microarray technology. Success in this approach was aided by the generation of a 'virtual glycome' to represent all potential glycan structures that might exist within a metaglycomes based on a set of biosynthetic assumptions using known structural information. We exploited this approach to deduce the structures of soluble glycans within the human milk glycome by matching predicted structures based on experimental data against the virtual glycome. This represents the first meta-glycome to be defined using this method and we provide a publically available web-based application to aid in sequencing milk glycans.

Availability and implementation

http://glycomeseq.emory.edu

Contact

sagravat@bidmc.harvard.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-23 +28013218,Editor's Highlight: Genetic Targets of Acute Toluene Inhalation in Drosophila melanogaster.,"Interpretation and use of data from high-throughput assays for chemical toxicity require links between effects at molecular targets and adverse outcomes in whole animals. The well-characterized genome of Drosophila melanogaster provides a potential model system by which phenotypic responses to chemicals can be mapped to genes associated with those responses, which may in turn suggest adverse outcome pathways associated with those genes. To determine the utility of this approach, we used the Drosophila Genetics Reference Panel (DGRP), a collection of ∼200 homozygous lines of fruit flies whose genomes have been sequenced. We quantified toluene-induced suppression of motor activity in 123 lines of these flies during exposure to toluene, a volatile organic compound known to induce narcosis in mammals via its effects on neuronal ion channels. We then applied genome-wide association analyses on this effect of toluene using the DGRP web portal (http://dgrp2.gnets.ncsu.edu), which identified polymorphisms in candidate genes associated with the variation in response to toluene exposure. We tested ∼2 million variants and found 82 polymorphisms located in or near 66 candidate genes that were associated with phenotypic variation for sensitivity to toluene at P < 5 × 10-5, and human orthologs for 52 of these candidate Drosophila genes. None of these orthologs are known to be involved in canonical pathways for mammalian neuronal ion channels, including GABA, glutamate, dopamine, glycine, serotonin, and voltage sensitive calcium channels. Thus this analysis did not reveal a genetic signature consistent with processes previously shown to be involved in toluene-induced narcosis in mammals. The list of the human orthologs included Gene Ontology terms associated with signaling, nervous system development and embryonic morphogenesis; these orthologs may provide insight into potential new pathways that could mediate the narcotic effects of toluene.",2017-03-01 +26914653,The 2015 Bioinformatics Open Source Conference (BOSC 2015).,"The Bioinformatics Open Source Conference (BOSC) is organized by the Open Bioinformatics Foundation (OBF), a nonprofit group dedicated to promoting the practice and philosophy of open source software development and open science within the biological research community. Since its inception in 2000, BOSC has provided bioinformatics developers with a forum for communicating the results of their latest efforts to the wider research community. BOSC offers a focused environment for developers and users to interact and share ideas about standards; software development practices; practical techniques for solving bioinformatics problems; and approaches that promote open science and sharing of data, results, and software. BOSC is run as a two-day special interest group (SIG) before the annual Intelligent Systems in Molecular Biology (ISMB) conference. BOSC 2015 took place in Dublin, Ireland, and was attended by over 125 people, about half of whom were first-time attendees. Session topics included ""Data Science;"" ""Standards and Interoperability;"" ""Open Science and Reproducibility;"" ""Translational Bioinformatics;"" ""Visualization;"" and ""Bioinformatics Open Source Project Updates"". In addition to two keynote talks and dozens of shorter talks chosen from submitted abstracts, BOSC 2015 included a panel, titled ""Open Source, Open Door: Increasing Diversity in the Bioinformatics Open Source Community,"" that provided an opportunity for open discussion about ways to increase the diversity of participants in BOSC in particular, and in open source bioinformatics in general. The complete program of BOSC 2015 is available online at http://www.open-bio.org/wiki/BOSC_2015_Schedule.",2016-02-25 +28138060,RNA-Puzzles Round III: 3D RNA structure prediction of five riboswitches and one ribozyme.,"RNA-Puzzles is a collective experiment in blind 3D RNA structure prediction. We report here a third round of RNA-Puzzles. Five puzzles, 4, 8, 12, 13, 14, all structures of riboswitch aptamers and puzzle 7, a ribozyme structure, are included in this round of the experiment. The riboswitch structures include biological binding sites for small molecules (S-adenosyl methionine, cyclic diadenosine monophosphate, 5-amino 4-imidazole carboxamide riboside 5'-triphosphate, glutamine) and proteins (YbxF), and one set describes large conformational changes between ligand-free and ligand-bound states. The Varkud satellite ribozyme is the most recently solved structure of a known large ribozyme. All puzzles have established biological functions and require structural understanding to appreciate their molecular mechanisms. Through the use of fast-track experimental data, including multidimensional chemical mapping, and accurate prediction of RNA secondary structure, a large portion of the contacts in 3D have been predicted correctly leading to similar topologies for the top ranking predictions. Template-based and homology-derived predictions could predict structures to particularly high accuracies. However, achieving biological insights from de novo prediction of RNA 3D structures still depends on the size and complexity of the RNA. Blind computational predictions of RNA structures already appear to provide useful structural information in many cases. Similar to the previous RNA-Puzzles Round II experiment, the prediction of non-Watson-Crick interactions and the observed high atomic clash scores reveal a notable need for an algorithm of improvement. All prediction models and assessment results are available at http://ahsoka.u-strasbg.fr/rnapuzzles/.",2017-01-30 +23180793,IPD--the Immuno Polymorphism Database.,"The Immuno Polymorphism Database (IPD), http://www.ebi.ac.uk/ipd/ is a set of specialist databases related to the study of polymorphic genes in the immune system. The IPD project works with specialist groups or nomenclature committees who provide and curate individual sections before they are submitted to IPD for online publication. The IPD project stores all the data in a set of related databases. IPD currently consists of four databases: IPD-KIR, contains the allelic sequences of killer-cell immunoglobulin-like receptors, IPD-MHC, a database of sequences of the major histocompatibility complex of different species; IPD-HPA, alloantigens expressed only on platelets; and IPD-ESTDAB, which provides access to the European Searchable Tumour Cell-Line Database, a cell bank of immunologically characterized melanoma cell lines. The data is currently available online from the website and FTP directory. This article describes the latest updates and additional tools added to the IPD project.",2012-11-24 +26752768,Predictable tuning of protein expression in bacteria.,We comprehensively assessed the contribution of the Shine-Dalgarno sequence to protein expression and used the data to develop EMOPEC (Empirical Model and Oligos for Protein Expression Changes; http://emopec.biosustain.dtu.dk). EMOPEC is a free tool that makes it possible to modulate the expression level of any Escherichia coli gene by changing only a few bases. Measured protein levels for 91% of our designed sequences were within twofold of the desired target level.,2016-01-11 +,Hotspot-Mutation Analysis of the EGFR/KRAS/BRAF Pathway Using Mutation Surveyor® Software,"Hotspot-mutation analysis of the EGFR/KRAS/BRAF pathway (or other clinically relevant pathway) can quickly genotype patients as candidates who may respond favorably to specific drug treatments and therapies or into other groups where treatment options are limited and less favorable. Sanger sequencing analysis using Mutation Surveyor software provides high-throughput, high-sensitivity variation detection. Increased efficiency can be achieved using flexible and customizable reporting-sequencing results can be organized by patient identifiers, variation type (reported or unreported, pathogenic or benign or drug sensitive), by gene/exon/amplicon, or quality metrics, and other options. GenBank sequence files from NCBI for EGFR exons 18, 19, 20, and 21; KRAS exons 2 and 3; and BRAF exon 15 were edited to contain reported variations. These reported variations included polymorphisms from dbSNP (downloaded with the GenBank file), pathogenic and drug-sensitivity variations for EGFR (obtained from http://www.egfr.org/), activating mutations for KRAS, and constitutive mutations for BRAF. Bidirectional sequencing data for twelve, simulated (mutations obtained from sequencing reports in the scientific literature), patients were developed and compared to the customized GenBank sequences. Sequencing analysis results were grouped by patient-specific identifiers. Any unmatched or low quality data files are identified in the report, indicating which samples require resequencing. Mutations that match reported variations added to the GenBank sequences are highlighted-SNP identifiers or color coding of SNP type quickly indicate which variations are pathogenic or drug-sensitive or reported in dbSNP. Unreported variations are not highlighted and may be benign or variations of unknown significance. The gene column displays the gene and accession number for that gene used for the analysis. The exon column displays the exon number of the gene, and accession numbers of the mRNA and protein used for the analysis. High-throughput, high-sensitivity variation detection coupled with personalized reporting provides robust and economical genotyping of patients.",2011-10-01 +25502379,AutoWeka: toward an automated data mining software for QSAR and QSPR studies.,"

Unlabelled

In biology and chemistry, a key goal is to discover novel compounds affording potent biological activity or chemical properties. This could be achieved through a chemical intuition-driven trial-and-error process or via data-driven predictive modeling. The latter is based on the concept of quantitative structure-activity/property relationship (QSAR/QSPR) when applied in modeling the biological activity and chemical properties, respectively, of compounds. Data mining is a powerful technology underlying QSAR/QSPR as it harnesses knowledge from large volumes of high-dimensional data via multivariate analysis. Although extremely useful, the technicalities of data mining may overwhelm potential users, especially those in the life sciences. Herein, we aim to lower the barriers to access and utilization of data mining software for QSAR/QSPR studies. AutoWeka is an automated data mining software tool that is powered by the widely used machine learning package Weka. The software provides a user-friendly graphical interface along with an automated parameter search capability. It employs two robust and popular machine learning methods: artificial neural networks and support vector machines. This chapter describes the practical usage of AutoWeka and relevant tools in the development of predictive QSAR/QSPR models.

Availability

The software is freely available at http://www.mt.mahidol.ac.th/autoweka.",2015-01-01 +26543886,Dataset for the quantitative proteomics analysis of the primary hepatocellular carcinoma with single and multiple lesions.,"Hepatocellular Carcinoma (HCC) is one of the most common malignant tumor, which is causing the second leading cancer-related death worldwide. The tumor tissues and the adjacent noncancerous tissues obtained from HCC patients with single and multiple lesions were quantified using iTRAQ. A total of 5513 proteins (FDR of 1%) were identified which correspond to roughly 27% of the total liver proteome. And 107 and 330 proteins were dysregulated in HCC tissue with multiple lesions (MC group) and HCC tissue with a single lesion (SC group), compared with their noncancerous tissue (MN and SN group) respectively. Bioinformatics analysis (GO, KEGG and IPA) allowed these data to be organized into distinct categories. The data accompanying the manuscript on this approach (Xing et al., J. Proteomics (2015), http://dx.doi.org/10.1016/j.jprot.2015.08.007[1]) have been deposited to the iProX with identifier IPX00037601.",2015-09-08 +26799713,"Development of a RAD-Seq Based DNA Polymorphism Identification Software, AgroMarker Finder, and Its Application in Rice Marker-Assisted Breeding.","Rapid and accurate genome-wide marker detection is essential to the marker-assisted breeding and functional genomics studies. In this work, we developed an integrated software, AgroMarker Finder (AMF: http://erp.novelbio.com/AMF), for providing graphical user interface (GUI) to facilitate the recently developed restriction-site associated DNA (RAD) sequencing data analysis in rice. By application of AMF, a total of 90,743 high-quality markers (82,878 SNPs and 7,865 InDels) were detected between rice varieties JP69 and Jiaoyuan5A. The density of the identified markers is 0.2 per Kb for SNP markers, and 0.02 per Kb for InDel markers. Sequencing validation revealed that the accuracy of genome-wide marker detection by AMF is 93%. In addition, a validated subset of 82 SNPs and 31 InDels were found to be closely linked to 117 important agronomic trait genes, providing a basis for subsequent marker-assisted selection (MAS) and variety identification. Furthermore, we selected 12 markers from 31 validated InDel markers to identify seed authenticity of variety Jiaoyuanyou69, and we also identified 10 markers closely linked to the fragrant gene BADH2 to minimize linkage drag for Wuxiang075 (BADH2 donor)/Jiachang1 recombinants selection. Therefore, this software provides an efficient approach for marker identification from RAD-seq data, and it would be a valuable tool for plant MAS and variety protection.",2016-01-22 +27259543,DBSI server: DNA binding site identifier.,"

Unlabelled

: Protein-nucleic acid interactions are among the most important intermolecular interactions in the regulation of cellular events. Identifying residues involved in these interactions from protein structure alone is an important challenge. Here we introduce the webserver interface to DNA Binding Site Identifier (DBSI), a powerful structure-based SVM model for the prediction and visualization of DNA binding sites on protein structures. DBSI has been shown to be a top-performing model to predict DNA binding sites on the surface of a protein or peptide and shows promise in predicting RNA binding sites.

Availability and implementation

Server is available at http://dbsi.mitchell-lab.org

Contact

jcmitchell@wisc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-03 +26217790,Sequencing data and MLPA analysis data in support of the effectiveness and reliability of an asymmetric PCR-Based approach in preparing long MLPA probes.,"ABI PRISM 3100 Genetic Analyzer, a multi-color fluorescence-based DNA analysis system with 16 capillaries operating in parallel, was ideal tool both for DNA sequencing and DNA fragment analysis [1,2]. To demonstrate the effectiveness and reliability of an asymmetric PCR-Based approach (X.Y. Ling, G.M. Zhang, G. Pan, H. Long, Y.H. Cheng, C.Y. Xiang, L. Kang, F. Chen, Z.N. Chen, Preparing long probes by an asymmetric PCR-based approach for multiplex ligation-dependent probe amplification (MLPA), Anal. Biochem. (2015), http://dx.doi.org/10.1016/j.ab.2015.03.031, in press) in preparing the long MLPA probes that were generated with a M13-based method before [4], some prepared long MLPA probes were sequenced and then tested in MLPA analysis. Sequencing data shows that the long MLPA probes were identical to the designed ones, indicating the long probes can be easily prepared with the new method, and the MPLA analysis data shows that the results of MPLA analysis with these long probes were as same accurate and specific as with ones prepared with other methods. The sequencing data was not presented in the research article (X.Y. Ling, G.M. Zhang, G. Pan, H. Long, Y.H. Cheng, C.Y. Xiang, L. Kang, F. Chen, Z.N. Chen, Preparing long probes by an asymmetric PCR-based approach for multiplex ligation-dependent probe amplification (MLPA), Anal. Biochem. (2015), 10.1016/j.ab.2015.03.031, in press), but the MLPA analysis data was converted into figure 4 and figure 5 of the research article.",2015-05-27 +27009626,IntSplice: prediction of the splicing consequences of intronic single-nucleotide variations in the human genome.,"Precise spatiotemporal regulation of splicing is mediated by splicing cis-elements on pre-mRNA. Single-nucleotide variations (SNVs) affecting intronic cis-elements possibly compromise splicing, but no efficient tool has been available to identify them. Following an effect-size analysis of each intronic nucleotide on annotated alternative splicing, we extracted 105 parameters that could affect the strength of the splicing signals. However, we could not generate reliable support vector regression models to predict the percent-splice-in (PSI) scores for normal human tissues. Next, we generated support vector machine (SVM) models using 110 parameters to directly differentiate pathogenic SNVs in the Human Gene Mutation Database and normal SNVs in the dbSNP database, and we obtained models with a sensitivity of 0.800±0.041 (mean and s.d.) and a specificity of 0.849±0.021. Our IntSplice models were more discriminating than SVM models that we generated with Shapiro-Senapathy score and MaxEntScan::score3ss. We applied IntSplice to a naturally occurring and nine artificial intronic mutations in RAPSN causing congenital myasthenic syndrome. IntSplice correctly predicted the splicing consequences for nine of the ten mutants. We created a web service program, IntSplice (http://www.med.nagoya-u.ac.jp/neurogenetics/IntSplice) to predict splicing-affecting SNVs at intronic positions from -50 to -3.",2016-03-24 +28292754,Cardiac Resynchronization Therapy Reduces Ventricular Arrhythmias in Primary but Not Secondary Prophylactic Implantable Cardioverter Defibrillator Patients: Insight From the Resynchronization in Ambulatory Heart Failure Trial. ,"The RAFT (Resynchronization in Ambulatory Heart Failure Trial) demonstrated that cardiac resynchronization therapy (CRT) reduced both mortality and heart failure hospitalizations in patients with functional class II or III heart failure and widened QRS. We examined the influence of CRT on ventricular arrhythmias in patients with primary versus secondary prophylaxis defibrillator indications. All ventricular arrhythmias among RAFT study participants were downloaded and adjudicated by 2 blinded reviewers with an overreader for disagreements and committee review for remaining discrepancies. Incidence of ventricular arrhythmias among patients randomized to CRT-D versus implantable cardioverter defibrillator (ICD) were compared within the groups of patients treated for primary prophylaxis and for secondary prophylaxis. Of 1798 enrolled patients, 1764 had data available for adjudication and were included. Of these, 1531 patients were implanted for primary prophylaxis, while 233 patients were implanted for secondary prophylaxis; 884 patients were randomized to ICD and 880 to CRT-D. During 5953.6 patient-years of follow-up, there were 11 278 appropriate ICD detections of ventricular arrhythmias. In the primary prophylaxis group, CRT-D significantly reduced incidence ventricular arrhythmias in comparison to ICD (hazard ratio, 0.86; 95% confidence interval, 0.74-0.99; P=0.044). This effect was not seen in the secondary prophylaxis group (hazard ratio, 1.14; 95% confidence interval, 0.82-1.58; P=0.45). CRT-D was not associated with significant differences in overall ventricular arrhythmia burden in either group. CRT reduced the rate of onset of new ventricular arrhythmias detected by ICDs in patients without a history of prior ventricular arrhythmias. This effect was not observed among patients who had prior ventricular arrhythmias. URL: http://www.clinicaltrials.gov. Unique identifier: NCT00251251.",2017-03-01 +25392408,The UCSC Cancer Genomics Browser: update 2015.,"The UCSC Cancer Genomics Browser (https://genome-cancer.ucsc.edu/) is a web-based application that integrates relevant data, analysis and visualization, allowing users to easily discover and share their research observations. Users can explore the relationship between genomic alterations and phenotypes by visualizing various -omic data alongside clinical and phenotypic features, such as age, subtype classifications and genomic biomarkers. The Cancer Genomics Browser currently hosts 575 public datasets from genome-wide analyses of over 227,000 samples, including datasets from TCGA, CCLE, Connectivity Map and TARGET. Users can download and upload clinical data, generate Kaplan-Meier plots dynamically, export data directly to Galaxy for analysis, plus generate URL bookmarks of specific views of the data to share with others.",2014-11-11 +25505085,Identifying cancer-related microRNAs based on gene expression data.,"

Motivation

MicroRNAs (miRNAs) are short non-coding RNAs that play important roles in post-transcriptional regulations as well as other important biological processes. Recently, accumulating evidences indicate that miRNAs are extensively involved in cancer. However, it is a big challenge to identify which miRNAs are related to which cancer considering the complex processes involved in tumors, where one miRNA may target hundreds or even thousands of genes and one gene may regulate multiple miRNAs. Despite integrative analysis of matched gene and miRNA expression data can help identify cancer-associated miRNAs, such kind of data is not commonly available. On the other hand, there are huge amount of gene expression data that are publicly accessible. It will significantly improve the efficiency of characterizing miRNA's function in cancer if we can identify cancer miRNAs directly from gene expression data.

Results

We present a novel computational framework to identify the cancer-related miRNAs based solely on gene expression profiles without requiring either miRNA expression data or the matched gene and miRNA expression data. The results on multiple cancer datasets show that our proposed method can effectively identify cancer-related miRNAs with higher precision compared with other popular approaches. Furthermore, some of our novel predictions are validated by both differentially expressed miRNAs and evidences from literature, implying the predictive power of our proposed method. In addition, we construct a cancer-miRNA-pathway network, which can help explain how miRNAs are involved in cancer.

Availability and implementation

The R code and data files for the proposed method are available at http://comp-sysbio.org/miR_Path/

Contact

liukeq@gmail.com

Supplementary information

supplementary data are available at Bioinformatics online.",2014-12-12 +24910813,Sparse Representation for Prediction of HIV-1 Protease Drug Resistance.,"HIV rapidly evolves drug resistance in response to antiviral drugs used in AIDS therapy. Estimating the specific resistance of a given strain of HIV to individual drugs from sequence data has important benefits for both the therapy of individual patients and the development of novel drugs. We have developed an accurate classification method based on the sparse representation theory, and demonstrate that this method is highly effective with HIV-1 protease. The protease structure is represented using our newly proposed encoding method based on Delaunay triangulation, and combined with the mutated amino acid sequences of known drug-resistant strains to train a machine-learning algorithm both for classification and regression of drug-resistant mutations. An overall cross-validated classification accuracy of 97% is obtained when trained on a publically available data base of approximately 1.5×104 known sequences (Stanford HIV database http://hivdb.stanford.edu/cgi-bin/GenoPhenoDS.cgi). Resistance to four FDA approved drugs is computed and comparisons with other algorithms demonstrate that our method shows significant improvements in classification accuracy.",2013-01-01 +28113865,Pubcast and Genecast: Browsing and Exploring Publications and Associated Curated Content in Biology Through Mobile Devices.,"Services such as Facebook, Amazon, and eBay were once solely accessed from stationary computers. These web services are now being used increasingly on mobile devices. We acknowledge this new reality by providing users a way to access publications and a curated cancer mutation database on their mobile device with daily automated updates.

Availability

http://hive. biochemistry.gwu.edu/tools/HivePubcast.",2016-03-23 +27606011,"Rocker: Open source, easy-to-use tool for AUC and enrichment calculations and ROC visualization.","

Abstract

Receiver operating characteristics (ROC) curve with the calculation of area under curve (AUC) is a useful tool to evaluate the performance of biomedical and chemoinformatics data. For example, in virtual drug screening ROC curves are very often used to visualize the efficiency of the used application to separate active ligands from inactive molecules. Unfortunately, most of the available tools for ROC analysis are implemented into commercially available software packages, or are plugins in statistical software, which are not always the easiest to use. Here, we present Rocker, a simple ROC curve visualization tool that can be used for the generation of publication quality images. Rocker also includes an automatic calculation of the AUC for the ROC curve and Boltzmann-enhanced discrimination of ROC (BEDROC). Furthermore, in virtual screening campaigns it is often important to understand the early enrichment of active ligand identification, for this Rocker offers automated calculation routine. To enable further development of Rocker, it is freely available (MIT-GPL license) for use and modifications from our web-site (http://www.jyu.fi/rocker).",2016-09-07 +27016142,tRNAmodpred: A computational method for predicting posttranscriptional modifications in tRNAs.,"tRNA molecules contain numerous chemically altered nucleosides, which are formed by enzymatic modification of the primary transcripts during the complex tRNA maturation process. Some of the modifications are introduced by single reactions, while other require complex series of reactions carried out by several different enzymes. The location and distribution of various types of modifications vary greatly between different tRNA molecules, organisms and organelles. We have developed a computational method tRNAmodpred, for predicting modifications in tRNA sequences. Briefly, our method takes as an input one or more unmodified tRNA sequences and a set of protein sequences corresponding to a proteome of a cell. Subsequently it identifies homologs of known tRNA modification enzymes in the proteome, predicts tRNA modification activities and maps them onto known pathways of RNA modification from the MODOMICS database. Thereby, theoretically possible modification pathways are identified, and products of these modification reactions are proposed for query tRNAs. This method allows for predicting modification patterns for newly sequenced genomes as well as for checking tentative modification status of tRNAs from one species treated with enzymes from another source, e.g. to predict the possible modifications of eukaryotic tRNAs expressed in bacteria. tRNAmodpred is freely available as a web server at http://genesilico.pl/trnamodpred/.",2016-03-23 +27748631,Quantitative structure-permeability relationships at various pH values for neutral and amphoteric drugs and drug-like compounds.,"Human intestinal absorption is a key property for orally administered drugs and is dependent on pH. This study focuses on neutral and amphoteric compounds and their membrane permeabilities across the range of pH values found in the human intestine. The membrane permeability values for 15 neutral and 60 amphoteric compounds at pH 3, 5, 7.4 and 9 were measured using the parallel artificial membrane permeability assay (PAMPA). For each data series the quantitative structure-permeability relationships were developed and analysed. The results show that the membrane permeability of neutral compounds is attributed to a single structural characteristic, the hydrogen bond donor ability. Amphoteric compounds are more complex because of their chemical constitution, and therefore require three-parameter models to describe and predict membrane permeability. Analysis of the models for amphoteric compounds reveals that membrane permeability depends on multiple structural characteristics: the partition coefficient, hydrogen bond properties and the shape of the molecules. In addition to conventional validation strategies, two external compounds (isradipine and omeprazole) were tested and revealed very good agreement of pH profiles between experimental and predicted membrane permeability for all of the developed models. Selected QSAR models are available at the QsarDB repository (http://dx.doi.org/10.15152/QDB.184).",2016-10-17 +23118488,HEXEvent: a database of Human EXon splicing Events.,"HEXEvent (http://hexevent.mmg.uci.edu) is a new database that permits the user to compile genome-wide exon data sets of human internal exons showing selected splicing events. User queries can be customized based on the type and the frequency of alternative splicing events. For each splicing version of an exon, an ESTs count is given, specifying the frequency of the event. A user-specific definition of constitutive exons can be entered to designate an exon exclusion level still acceptable for an exon to be considered as constitutive. Similarly, the user has the option to define a maximum inclusion level for an exon to be called an alternatively spliced exon. Unlike other existing splicing databases, HEXEvent permits the user to easily extract alternative splicing information for individual, multiple or genome-wide human internal exons. Importantly, the generated data sets are downloadable for further analysis.",2012-10-31 +23486613,Comparative genome analysis and gene finding in Candida species using CGOB.,"The Candida Gene Order Browser (CGOB) was developed as a tool to visualize and analyze synteny relationships in multiple Candida species, and to provide an accurate, manually curated set of orthologous Candida genes for evolutionary analyses. Here, we describe major improvements to CGOB. The underlying structure of the database has been changed significantly. Genomic features are now based directly on genome annotations rather than on protein sequences, which allows non-protein features such as centromere locations in Candida albicans and tRNA genes in all species to be included. The data set has been expanded to 13 species, including genomes of pathogens (C. albicans, C. parapsilosis, C. tropicalis, and C. orthopsilosis), and those of xylose-degrading species with important biotechnological applications (C. tenuis, Scheffersomyces stipitis, and Spathaspora passalidarum). Updated annotations of C. parapsilosis, C. dubliniensis, and Debaryomyces hansenii have been incorporated. We discovered more than 1,500 previously unannotated genes among the 13 genomes, ranging in size from 29 to 3,850 amino acids. Poorly conserved and rapidly evolving genes were also identified. Re-analysis of the mating type loci of the xylose degraders suggests that C. tenuis is heterothallic, whereas both Spa. passalidarum and S. stipitis are homothallic. As well as hosting the browser, the CGOB website (http://cgob.ucd.ie) gives direct access to all the underlying genome annotations, sequences, and curated orthology data.",2013-03-13 +26243019,ERC analysis: web-based inference of gene function via evolutionary rate covariation.,"

Unlabelled

The recent explosion of comparative genomics data presents an unprecedented opportunity to construct gene networks via the evolutionary rate covariation (ERC) signature. ERC is used to identify genes that experienced similar evolutionary histories, and thereby draws functional associations between them. The ERC Analysis website allows researchers to exploit genome-wide datasets to infer novel genes in any biological function and to explore deep evolutionary connections between distinct pathways and complexes. The website provides five analytical methods, graphical output, statistical support and access to an increasing number of taxonomic groups.

Availability and implementation

Analyses and data at http://csb.pitt.edu/erc_analysis/

Contact

nclark@pitt.edu.",2015-08-04 +22836712,A comparative cellular and molecular biology of longevity database.,"Discovering key cellular and molecular traits that promote longevity is a major goal of aging and longevity research. One experimental strategy is to determine which traits have been selected during the evolution of longevity in naturally long-lived animal species. This comparative approach has been applied to lifespan research for nearly four decades, yielding hundreds of datasets describing aspects of cell and molecular biology hypothesized to relate to animal longevity. Here, we introduce a Comparative Cellular and Molecular Biology of Longevity Database, available at ( http://genomics.brocku.ca/ccmbl/ ), as a compendium of comparative cell and molecular data presented in the context of longevity. This open access database will facilitate the meta-analysis of amalgamated datasets using standardized maximum lifespan (MLSP) data (from AnAge). The first edition contains over 800 data records describing experimental measurements of cellular stress resistance, reactive oxygen species metabolism, membrane composition, protein homeostasis, and genome homeostasis as they relate to vertebrate species MLSP. The purpose of this review is to introduce the database and briefly demonstrate its use in the meta-analysis of combined datasets.",2012-07-27 +23161672,APPRIS: annotation of principal and alternative splice isoforms.,"Here, we present APPRIS (http://appris.bioinfo.cnio.es), a database that houses annotations of human splice isoforms. APPRIS has been designed to provide value to manual annotations of the human genome by adding reliable protein structural and functional data and information from cross-species conservation. The visual representation of the annotations provided by APPRIS for each gene allows annotators and researchers alike to easily identify functional changes brought about by splicing events. In addition to collecting, integrating and analyzing reliable predictions of the effect of splicing events, APPRIS also selects a single reference sequence for each gene, here termed the principal isoform, based on the annotations of structure, function and conservation for each transcript. APPRIS identifies a principal isoform for 85% of the protein-coding genes in the GENCODE 7 release for ENSEMBL. Analysis of the APPRIS data shows that at least 70% of the alternative (non-principal) variants would lose important functional or structural information relative to the principal isoform.",2012-11-17 +26093148,LFQC: a lossless compression algorithm for FASTQ files.,"

Motivation

Next Generation Sequencing (NGS) technologies have revolutionized genomic research by reducing the cost of whole genome sequencing. One of the biggest challenges posed by modern sequencing technology is economic storage of NGS data. Storing raw data is infeasible because of its enormous size and high redundancy. In this article, we address the problem of storage and transmission of large FASTQ files using innovative compression techniques.

Results

We introduce a new lossless non-reference based FASTQ compression algorithm named Lossless FASTQ Compressor. We have compared our algorithm with other state of the art big data compression algorithms namely gzip, bzip2, fastqz (Bonfield and Mahoney, 2013), fqzcomp (Bonfield and Mahoney, 2013), Quip (Jones et al., 2012), DSRC2 (Roguski and Deorowicz, 2014). This comparison reveals that our algorithm achieves better compression ratios on LS454 and SOLiD datasets.

Availability and implementation

The implementations are freely available for non-commercial purposes. They can be downloaded from http://engr.uconn.edu/rajasek/lfqc-v1.1.zip.

Contact

rajasek@engr.uconn.edu.",2015-06-20 +28082885,A Supervoxel-Based Method for Groupwise Whole Brain Parcellation with Resting-State fMRI Data.,"Node definition is a very important issue in human brain network analysis and functional connectivity studies. Typically, the atlases generated from meta-analysis, random criteria, and structural criteria are utilized as nodes in related applications. However, these atlases are not originally designed for such purposes and may not be suitable. In this study, we combined normalized cut (Ncut) and a supervoxel method called simple linear iterative clustering (SLIC) to parcellate whole brain resting-state fMRI data in order to generate appropriate brain atlases. Specifically, Ncut was employed to extract features from connectivity matrices, and then SLIC was applied on the extracted features to generate parcellations. To obtain group level parcellations, two approaches named mean SLIC and two-level SLIC were proposed. The cluster number varied in a wide range in order to generate parcellations with multiple granularities. The two SLIC approaches were compared with three state-of-the-art approaches under different evaluation metrics, which include spatial contiguity, functional homogeneity, and reproducibility. Both the group-to-group reproducibility and the group-to-subject reproducibility were evaluated in our study. The experimental results showed that the proposed approaches obtained relatively good overall clustering performances in different conditions that included different weighting functions, different sparsifying schemes, and several confounding factors. Therefore, the generated atlases are appropriate to be utilized as nodes for network analysis. The generated atlases and major source codes of this study have been made publicly available at http://www.nitrc.org/projects/slic/.",2016-12-27 +28685036,Mapping the global research landscape and knowledge gaps on multimorbidity: a bibliometric study.,"

Background

To summarize global research trends and activities on multimorbidity; then to assess the knowledge gaps and to identify implications for knowledge exchange between high income countries (HICs) and low- and middle- income countries (LMICs).

Methods

A comprehensive search was conducted to identify research publications on multimorbidity in the Web of ScienceTM, as well as diabetes, depression, hypertension, and Chronic Obstructive Pulmonary Disease (COPD). The time frame for the search was from 1900 to June, 2016. Information (such as publication date, subject category, author, country of origin, title, abstract, and keywords) were extracted and the full texts were obtained for the co-citation analysis. Data were linked with the life expectancy at birth (years) and Gross National Income (GNI). Co-citation and hierarchal clustering analysis was used to map the trends and research networks with CiteSpace II (JAVA freeware, copyright Chaomei Chen, http://cluster.cis.drexel.edu/~cchen/citespace/).

Findings

We identified 2864 relevant publications as at June 2016, with the first paper on this topic indexed in 1974 from Germany, but 80% were published after 2010. Further analysis yielded two knowledge gaps: (1) compared with single conditions (diabetes, hypertension, depression, and COPD), there is a mismatch between the high prevalence of multimorbidity and its research outputs (ratio of articles on multimorbidity vs other four single conditions is 1:13-150); (2) although a total of 76 countries have contributed to this research area, only 5% of research originated from LMICs where 73% of non-communicable disease (NCD) related deaths had occurred. Additional analysis showed the median year of first publication occurred 15 years later in the LMICs compared with HICs (2010 vs 1995); and longer life expectancy was associated with exponentially higher publication outputs (Pearson correlation coefficient r = 0.95) at the global level. The life expectancy at the median year (1994) of first publication was 66.1, with the gap between LMICs and HICs 7.9 (68.4 vs 76.3).

Conclusions

This study confirms substantial knowledge gaps in the research agenda on multimorbidity, with input urgently needed to move us forward worldwide, especially for and in LMICs. There is the possibility that LMICs can learn from and collaborate with HICs in this area.",2017-06-01 +26787662,EPI-peptide designer: a tool for designing peptide ligand libraries based on epitope-paratope interactions.,"

Motivation

Antibodies are an important class of biological drugs, but with limitations, such as inadequate pharmacokinetics, adverse immunogenicity and high production costs. Synthetic peptides for the desired target represent an important alternative to antibodies. However, no computational tool exists to guide the design of these peptides.

Results

To identify the interacting residues in a given antibody-antigen (Ab-Ag) interface we used Interface Interacting Residue (I2R), a selection method based on computed molecular interactions. The aggregation of all the molecular interactions between epitope and paratope residues allowed us to transform the 3D Ab-Ag complex structures into interface graphs. Based on these data and the probability of molecular interaction we developed EPI-Peptide Designer tool that uses predicted paratope residues for an epitope of interest to generate targeted peptide ligand libraries. EPI-Peptide Designer successfully predicted 301 peptides able to bind to LiD1 target protein (65% of the experimentally tested peptides), an enrichment of 22% compared to randomly generated peptides. This tool should enable the development of a new generation of synthetic interacting peptides that could be very useful in the biosensor, diagnostic and therapeutic fields.

Availability and implementation

All software developed in this work are available at http://www.biocomp.icb.ufmg.br/biocomp/

Contact

liza@icb.ufmg.br

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-18 +27196673,Geno2pheno[HCV] - A Web-based Interpretation System to Support Hepatitis C Treatment Decisions in the Era of Direct-Acting Antiviral Agents.,"The face of hepatitis C virus (HCV) therapy is changing dramatically. Direct-acting antiviral agents (DAAs) specifically targeting HCV proteins have been developed and entered clinical practice in 2011. However, despite high sustained viral response (SVR) rates of more than 90%, a fraction of patients do not eliminate the virus and in these cases treatment failure has been associated with the selection of drug resistance mutations (RAMs). RAMs may be prevalent prior to the start of treatment, or can be selected under therapy, and furthermore they can persist after cessation of treatment. Additionally, certain DAAs have been approved only for distinct HCV genotypes and may even have subtype specificity. Thus, sequence analysis before start of therapy is instrumental for managing DAA-based treatment strategies. We have created the interpretation system geno2pheno[HCV] (g2p[HCV]) to analyse HCV sequence data with respect to viral subtype and to predict drug resistance. Extensive reviewing and weighting of literature related to HCV drug resistance was performed to create a comprehensive list of drug resistance rules for inhibitors of the HCV protease in non-structural protein 3 (NS3-protease: Boceprevir, Paritaprevir, Simeprevir, Asunaprevir, Grazoprevir and Telaprevir), the NS5A replicase factor (Daclatasvir, Ledipasvir, Elbasvir and Ombitasvir), and the NS5B RNA-dependent RNA polymerase (Dasabuvir and Sofosbuvir). Upon submission of up to eight sequences, g2p[HCV] aligns the input sequences, identifies the genomic region(s), predicts the HCV geno- and subtypes, and generates for each DAA a drug resistance prediction report. g2p[HCV] offers easy-to-use and fast subtype and resistance analysis of HCV sequences, is continuously updated and freely accessible under http://hcv.geno2pheno.org/index.php. The system was partially validated with respect to the NS3-protease inhibitors Boceprevir, Telaprevir and Simeprevir by using data generated with recombinant, phenotypic cell culture assays obtained from patients' virus variants.",2016-05-19 +22121220,The IntAct molecular interaction database in 2012.,"IntAct is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. Two levels of curation are now available within the database, with both IMEx-level annotation and less detailed MIMIx-compatible entries currently supported. As from September 2011, IntAct contains approximately 275,000 curated binary interaction evidences from over 5000 publications. The IntAct website has been improved to enhance the search process and in particular the graphical display of the results. New data download formats are also available, which will facilitate the inclusion of IntAct's data in the Semantic Web. IntAct is an active contributor to the IMEx consortium (http://www.imexconsortium.org). IntAct source code and data are freely available at http://www.ebi.ac.uk/intact.",2011-11-24 +22345621,RNA-Seq Atlas--a reference database for gene expression profiling in normal tissue by next-generation sequencing.,"

Motivation

Next-generation sequencing technology enables an entirely new perspective for clinical research and will speed up personalized medicine. In contrast to microarray-based approaches, RNA-Seq analysis provides a much more comprehensive and unbiased view of gene expression. Although the perspective is clear and the long-term success of this new technology obvious, bioinformatics resources making these data easily available especially to the biomedical research community are still evolving.

Results

We have generated RNA-Seq Atlas, a web-based repository of RNA-Seq gene expression profiles and query tools. The website offers open and easy access to RNA-Seq gene expression profiles and tools to both compare tissues and find genes with specific expression patterns. To enlarge the scope of the RNA-Seq Atlas, the data were linked to common functional and genetic databases, in particular offering information on the respective gene, signaling pathway analysis and evaluation of biological functions by means of gene ontologies. Additionally, data were linked to several microarray gene profiles, including BioGPS normal tissue profiles and NCI60 cancer cell line expression data. Our data search interface allows an integrative detailed comparison between our RNA-Seq data and the microarray information. This is the first database providing data mining tools and open access to large scale RNA-Seq expression profiles. Its applications will be versatile, as it will be beneficial in identifying tissue specific genes and expression profiles, comparison of gene expression profiles among diverse tissues, but also systems biology approaches linking tissue function to gene expression changes.

Availability and implementation

http://medicalgenomics.org/rna_seq_atlas.",2012-02-17 +,A Dated Phylogeny Complements Macroecological Analysis to Explain the Diversity Patterns in Geonoma (Arecaceae),"Integrating phylogenetic data into macroecological studies of biodiversity patterns may complement the information provided by present-day spatial patterns. In the present study, we used range map data for all Geonoma (Arecaceae) species to assess whether Geonoma species composition forms spatially coherent floristic clusters. We then evaluated the extent to which the spatial variation in species composition reflects present-day environmental variation vs. nonenvironmental spatial effects, as expected if the pattern reflects historical biogeography. We also examined the degree of geographic structure in the Geonoma phylogeny. Finally, we used a dated phylogeny to assess whether species richness within the floristic clusters was constrained by a specific historical biogeographic driver, namely time-for-diversification. A cluster analysis identified six spatially coherent floristic clusters, four of which were used to reveal a significant geographic phylogenetic structure. Variation partitioning analysis showed that 56 percent of the variation in species composition could be explained by spatial variables alone, consistent with historical factors having played a major role in generating the Geonoma diversity pattern. To test for a time-for-diversification effect, we correlated four different species richness measures with the diversification time of the earliest large lineage that is characteristic of each cluster. In support of this hypothesis, we found that geographic areas with higher richness contained older radiations. We conclude that current geographic diversity patterns in Geonoma reflect the present-day climate, but to a larger extent are related to nonenvironmental spatial constraints linked to colonization time, dispersal limitation, and geological history, followed by within-area evolutionary diversification. Abstract in Spanish is available at http://www.blackwell-synergy.com/loi/btp.",2011-05-01 +24843384,Mental health and psychiatric care in Bolivia: what do we know?,"

Background

Recently Bolivia has implemented a universal health system, but their mental health policy is still emerging.

Objectives

To investigate the current state of the mental health care system in Bolivia and discuss challenges for structuring a coordinated network of services that can effectively meet the needs of the Bolivian population.

Methods

This review was conducted by searching for scholarly articles through the databases Lilacs, Medline OPS, HISA and IBECS REPIDISCA via the search portal in the Virtual Health Library - NLM (http://www.bireme.br).

Results

Bolivia has a National Mental Health Plan that is intended to guide mental health promotion, prevention, treatment and rehabilitation of mental illness, but the resources for this area of health are limited. There are 1.06 psychiatrists and 0.46 psychologists per 100, 000 inhabitants. Information on psychiatric morbidity in Bolivia and the impact of mental disorders on the global burden of disease is scarce. Admission statistics reported by psychiatric hospitals in the country show that the main cause of hospitalization is substance abuse (30%). Alcohol consumption is responsible for 90% of these admissions, in addition to being a major cause of deaths in traffic and one of the main risk factors for domestic violence. Almost one in two women in Bolivia (47%) experienced some form of violence from their partner in the last year. Nineteen percent of women living with a partner reported being physically abused, while 7% were sexually abused by their partners. Isolated studies report that suicide rates are disproportionately high in Bolivia.

Conclusions

Although there is a shortage of epidemiological data in Bolivia, it is clear the impact of alcohol addiction in psychiatric admissions, domestic violence and traffic accidents. Violence against women and suicides are important issues to be tackled. Among the proposed strategies to afford human resources for mental health in Bolivia, ""task shifting"", the delegation of tasks to non-specialists should be extensively adopted in the country to improve mental health care.",2014-05-15 +25935040,Identification Exon Skipping Events From High-Throughput RNA Sequencing Data.,"The emergence of next-generation high-throughput RNA sequencing (RNA-Seq) provides tremendous opportunities for researchers to analyze alternative splicing on a genome-wide scale. However, accurate identification of alternative splicing events from RNA-Seq data has remained an unresolved challenge in next-generation sequencing (NGS) studies. Identifying exon skipping (ES) events is an essential part in genome-wide alternative splicing event identification. In this paper, we propose a novel method ESFinder, a random forest classifier to identify ES events from RNA-Seq data. ESFinder conducts thorough studies on predicting features and figures out proper features according to their relevance for ES event identification. Experimental results on real human skeletal muscle and brain RNA-Seq data show that ESFinder could effectively predict ES events with high predictive accuracy. The codes of ESFinder are available at http://mlg.hit.edu.cn/ybai/ES/ESFinder.html.",2015-04-29 +26994911,Assessing the state of the art in biomedical relation extraction: overview of the BioCreative V chemical-disease relation (CDR) task. ,"Manually curating chemicals, diseases and their relationships is significantly important to biomedical research, but it is plagued by its high cost and the rapid growth of the biomedical literature. In recent years, there has been a growing interest in developing computational approaches for automatic chemical-disease relation (CDR) extraction. Despite these attempts, the lack of a comprehensive benchmarking dataset has limited the comparison of different techniques in order to assess and advance the current state-of-the-art. To this end, we organized a challenge task through BioCreative V to automatically extract CDRs from the literature. We designed two challenge tasks: disease named entity recognition (DNER) and chemical-induced disease (CID) relation extraction. To assist system development and assessment, we created a large annotated text corpus that consisted of human annotations of chemicals, diseases and their interactions from 1500 PubMed articles. 34 teams worldwide participated in the CDR task: 16 (DNER) and 18 (CID). The best systems achieved an F-score of 86.46% for the DNER task--a result that approaches the human inter-annotator agreement (0.8875)--and an F-score of 57.03% for the CID task, the highest results ever reported for such tasks. When combining team results via machine learning, the ensemble system was able to further improve over the best team results by achieving 88.89% and 62.80% in F-score for the DNER and CID task, respectively. Additionally, another novel aspect of our evaluation is to test each participating system's ability to return real-time results: the average response time for each team's DNER and CID web service systems were 5.6 and 9.3 s, respectively. Most teams used hybrid systems for their submissions based on machining learning. Given the level of participation and results, we found our task to be successful in engaging the text-mining research community, producing a large annotated corpus and improving the results of automatic disease recognition and CDR extraction. Database URL: http://www.biocreative.org/tasks/biocreative-v/track-3-cdr/.",2016-03-19 +27107898,Induction of OAS gene family in HIV monocyte infected patients with high and low viral load.,"

Background

The innate immunity plays a predominant role in the early control of HIV infection, before the induction of adaptive immune responses. The cytokine secretion operated by the CD4(+) T helper cells is able to induce a response in the innate immunity cells and significantly affect HIV-1 persistence and replication. One of the pathways activated by monocytes to restrain viral infection is the 2' -5' -oligoadenylate synthetase (OAS)/RNase L pathway. OAS is activated by dsRNA and IFNs to produce 2' -5' oligoadenylates, which are activators of RNase L. This enzyme degrades viral and cellular RNAs, thus restricting viral infection.

Materials and methods

We analyzed a microarray dataset obtained from the NCBI Gene Expression Omnibus (GEO, http://www.ncbi.nlm.nih.gov/geo/) databank (accession number GSE18464) in order to verify the modulation of the OAS gene family in CD14 (+) monocytes isolated from 55 subjects, 22 with HIV-1 HVL (high viral load), and 22 with HIV-1 LVL (low viral load), as well as in 11 HIV-1 seronegative controls. We have validated the data on the expression levels of the OAS genes by performing real-time PCR on monocyte from a cohort of HIV infected patients (n = 20), with clinical characteristics similar to those of the patients recruited in the study present in the microarray.

Results

Microarray analysis showed that OAS gene family are significantly upregulated in monocyte of HIV-1 patients with HVL, as compared to LVL patients and to healthy donors. Furthermore, we showed a significant correlation between the OAS gene family and the log2 viral load and CD4 count. These results were confirmed by the in vitro validation.

Conclusions

Data from this study suggest an involvement for the OAS gene family in the control of HIV-1 infection.",2016-04-20 +27000774,Mem-ADSVM: A two-layer multi-label predictor for identifying multi-functional types of membrane proteins.,"Identifying membrane proteins and their multi-functional types is an indispensable yet challenging topic in proteomics and bioinformatics. However, most of the existing membrane-protein predictors have the following problems: (1) they do not predict whether a given protein is a membrane protein or not; (2) they are limited to predicting membrane proteins with single-label functional types but ignore those with multi-functional types; and (3) there is still much room for improvement for their performance. To address these problems, this paper proposes a two-layer multi-label predictor, namely Mem-ADSVM, which can identify membrane proteins (Layer I) and their multi-functional types (Layer II). Specifically, given a query protein, its associated gene ontology (GO) information is retrieved by searching a compact GO-term database with its homologous accession number. Subsequently, the GO information is classified by a binary support vector machine (SVM) classifier to determine whether it is a membrane protein or not. If yes, it will be further classified by a multi-label multi-class SVM classifier equipped with an adaptive-decision (AD) scheme to determine to which functional type(s) it belongs. Experimental results show that Mem-ADSVM significantly outperforms state-of-the-art predictors in terms of identifying both membrane proteins and their multi-functional types. This paper also suggests that the two-layer prediction architecture is better than the one-layer for prediction performance. For reader׳s convenience, the Mem-ADSVM server is available online at http://bioinfo.eie.polyu.edu.hk/MemADSVMServer/.",2016-03-19 +26597459,Coev-web: a web platform designed to simulate and evaluate coevolving positions along a phylogenetic tree.,"

Background

Available methods to simulate nucleotide or amino acid data typically use Markov models to simulate each position independently. These approaches are not appropriate to assess the performance of combinatorial and probabilistic methods that look for coevolving positions in nucleotide or amino acid sequences.

Results

We have developed a web-based platform that gives a user-friendly access to two phylogenetic-based methods implementing the Coev model: the evaluation of coevolving scores and the simulation of coevolving positions. We have also extended the capabilities of the Coev model to allow for the generalization of the alphabet used in the Markov model, which can now analyse both nucleotide and amino acid data sets. The simulation of coevolving positions is novel and builds upon the developments of the Coev model. It allows user to simulate pairs of dependent nucleotide or amino acid positions.

Conclusions

The main focus of our paper is the new simulation method we present for coevolving positions. The implementation of this method is embedded within the web platform Coev-web that is freely accessible at http://coev.vital-it.ch/, and was tested in most modern web browsers.",2015-11-23 +25411328,jNMFMA: a joint non-negative matrix factorization meta-analysis of transcriptomics data.,"

Motivation

Tremendous amount of omics data being accumulated poses a pressing challenge of meta-analyzing the heterogeneous data for mining new biological knowledge. Most existing methods deal with each gene independently, thus often resulting in high false positive rates in detecting differentially expressed genes (DEG). To our knowledge, no or little effort has been devoted to methods that consider dependence structures underlying transcriptomics data for DEG identification in meta-analysis context.

Results

This article proposes a new meta-analysis method for identification of DEGs based on joint non-negative matrix factorization (jNMFMA). We mathematically extend non-negative matrix factorization (NMF) to a joint version (jNMF), which is used to simultaneously decompose multiple transcriptomics data matrices into one common submatrix plus multiple individual submatrices. By the jNMF, the dependence structures underlying transcriptomics data can be interrogated and utilized, while the high-dimensional transcriptomics data are mapped into a low-dimensional space spanned by metagenes that represent hidden biological signals. jNMFMA finally identifies DEGs as genes that are associated with differentially expressed metagenes. The ability of extracting dependence structures makes jNMFMA more efficient and robust to identify DEGs in meta-analysis context. Furthermore, jNMFMA is also flexible to identify DEGs that are consistent among various types of omics data, e.g. gene expression and DNA methylation. Experimental results on both simulation data and real-world cancer data demonstrate the effectiveness of jNMFMA and its superior performance over other popular approaches.

Availability and implementation

R code for jNMFMA is available for non-commercial use via http://micblab.iim.ac.cn/Download/.

Contact

hqwang@ustc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-16 +26722119,Robust quantitative scratch assay.,"

Unlabelled

The wound healing assay (or scratch assay) is a technique frequently used to quantify the dependence of cell motility-a central process in tissue repair and evolution of disease-subject to various treatments conditions. However processing the resulting data is a laborious task due its high throughput and variability across images. This Robust Quantitative Scratch Assay algorithm introduced statistical outputs where migration rates are estimated, cellular behaviour is distinguished and outliers are identified among groups of unique experimental conditions. Furthermore, the RQSA decreased measurement errors and increased accuracy in the wound boundary at comparable processing times compared to previously developed method (TScratch).

Availability and implementation

The RQSA is freely available at: http://ophid.utoronto.ca/RQSA/RQSA_Scripts.zip The image sets used for training and validation and results are available at: (http://ophid.utoronto.ca/RQSA/trainingSet.zip, http://ophid.utoronto.ca/RQSA/validationSet.zip, http://ophid.utoronto.ca/RQSA/ValidationSetResults.zip, http://ophid.utoronto.ca/RQSA/ValidationSet_H1975.zip, http://ophid.utoronto.ca/RQSA/ValidationSet_H1975Results.zip, http://ophid.utoronto.ca/RQSA/RobustnessSet.zip, http://ophid.utoronto.ca/RQSA/RobustnessSet.zip). Supplementary Material is provided for detailed description of the development of the RQSA.

Contact

juris@ai.utoronto.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-31 +22080565,modMine: flexible access to modENCODE data.,"In an effort to comprehensively characterize the functional elements within the genomes of the important model organisms Drosophila melanogaster and Caenorhabditis elegans, the NHGRI model organism Encyclopaedia of DNA Elements (modENCODE) consortium has generated an enormous library of genomic data along with detailed, structured information on all aspects of the experiments. The modMine database (http://intermine.modencode.org) described here has been built by the modENCODE Data Coordination Center to allow the broader research community to (i) search for and download data sets of interest among the thousands generated by modENCODE; (ii) access the data in an integrated form together with non-modENCODE data sets; and (iii) facilitate fine-grained analysis of the above data. The sophisticated search features are possible because of the collection of extensive experimental metadata by the consortium. Interfaces are provided to allow both biologists and bioinformaticians to exploit these rich modENCODE data sets now available via modMine.",2011-11-12 +27801916,Preventing Foodborne and Enteric Illnesses Among At-Risk Populations in the United States and Rhode Island.,"One out of every six people in the United States is estimated to become sick each year from pathogens that can cause foodborne illness. The groups at greatest risk for serious illness, hospitalization, or death include young children, older adults, people with chronic conditions, and pregnant women. Such health disparities must be considered along with those disparities that may exist among racial and ethnic groups and among groups of varying socioeconomic status. We analyzed risk profiles for enteric disease using data from Rhode Island and the nation as a whole, exploring disparities among groups defined by demographic and health characteristics. As expected, disparities in the burden of enteric illnesses are not limited to racial or ethnic differences in disease burden, or in differences otherwise attributable to socioeconomic status. Age is an especially important determinant of risk, as is residential status. Other groups found to be especially vulnerable to foodborne and enteric illnesses in Rhode Island include pregnant women and those with certain health conditions (e.g., cancer, liver disease or immunosuppression). By understanding what groups are at increased risk, providers can more effectively counsel their patients to mitigate risk and effectively treat these conditions. [Full article available at http://rimed.org/rimedicaljournal-2016-11.asp].",2016-11-01 +,Reflections on the surface energy imbalance problem,"The ‘energy imbalance problem’ in micrometeorology arises because at most flux measurement sites the sum of eddy fluxes of sensible and latent heat (H+λE) is less than the available energy (A). Either eddy fluxes are underestimated or A is overestimated. Reasons for the imbalance are: (1) a failure to satisfy the fundamental assumption of one-dimensional transport that is necessary for measurements on a single tower to represent spatially-averaged fluxes to/from the underlying surface, and (2) measurement errors in eddy fluxes, net radiation and changes in energy storage in soils, air and biomass below the measurement height. Radiometer errors are unlikely to overestimate A significantly, but phase lags caused by incorrect estimates of the energy storage terms can explain why H+λE systematically underestimates A at half-hourly time scales. Energy closure is observed at only 8% of flux sites in the La Thuile dataset (http://www.fluxdata.org/DataInfo/default.aspx) with half-hourly averages but this increases to 45% of sites using 24h averages because energy entering the soil, air and biomass in the morning is returned in the afternoon and evening. Unrealistically large and positive horizontal gradients in temperature and humidity are needed for advective flux divergences to explain the energy imbalance at half-hourly time scales. Imbalances between H+λE and A still occur in daily averages but the small residual energy imbalances are explicable by horizontal and vertical advective flux divergences. Systematic underestimates of the vertical heat flux also occur if horizontal u′T′ covariances contaminate the vertical w′T′ signal due to incorrect coordinate rotations. Closure of the energy balance is possible at half-hourly time scales by careful attention to all sources of measurement and data processing errors in the eddy covariance system and by accurate measurement of net radiation and every energy storage term needed to calculate available energy.",2012-04-01 +24651476,Integer programming-based method for designing synthetic metabolic networks by Minimum Reaction Insertion in a Boolean model.,"In this paper, we consider the Minimum Reaction Insertion (MRI) problem for finding the minimum number of additional reactions from a reference metabolic network to a host metabolic network so that a target compound becomes producible in the revised host metabolic network in a Boolean model. Although a similar problem for larger networks is solvable in a flux balance analysis (FBA)-based model, the solution of the FBA-based model tends to include more reactions than that of the Boolean model. However, solving MRI using the Boolean model is computationally more expensive than using the FBA-based model since the Boolean model needs more integer variables. Therefore, in this study, to solve MRI for larger networks in the Boolean model, we have developed an efficient Integer Programming formalization method in which the number of integer variables is reduced by the notion of feedback vertex set and minimal valid assignment. As a result of computer experiments conducted using the data of metabolic networks of E. coli and reference networks downloaded from the Kyoto Encyclopedia of Genes and Genomes (KEGG) database, we have found that the developed method can appropriately solve MRI in the Boolean model and is applicable to large scale-networks for which an exhaustive search does not work. We have also compared the developed method with the existing connectivity-based methods and FBA-based methods, and show the difference between the solutions of our method and the existing methods. A theoretical analysis of MRI is also conducted, and the NP-completeness of MRI is proved in the Boolean model. Our developed software is available at ""http://sunflower.kuicr.kyoto-u.ac.jp/~rogi/minRect/minRect.html.""",2014-03-20 +27590733,Identifying N 6-methyladenosine sites in the Arabidopsis thaliana transcriptome.,"N 6-Methyladenosine (m6A) plays important roles in many biological processes. The knowledge of the distribution of m6A is helpful for understanding its regulatory roles. Although the experimental methods have been proposed to detect m6A, the resolutions of these methods are still unsatisfying especially for Arabidopsis thaliana. Benefitting from the experimental data, in the current work, a support vector machine-based method was proposed to identify m6A sites in A. thaliana transcriptome. The proposed method was validated on a benchmark dataset using jackknife test and was also validated by identifying strain-specific m6A sites in A. thaliana. The obtained predictive results indicate that the proposed method is quite promising. For the convenience of experimental biologists, an online webserver for the proposed method was built, which is freely available at http://lin.uestc.edu.cn/server/M6ATH . These results indicate that the proposed method holds a potential to become an elegant tool in identifying m6A site in A. thaliana.",2016-09-02 +24243844,FireDB: a compendium of biological and pharmacologically relevant ligands.,"FireDB (http://firedb.bioinfo.cnio.es) is a curated inventory of catalytic and biologically relevant small ligand-binding residues culled from the protein structures in the Protein Data Bank. Here we present the important new additions since the publication of FireDB in 2007. The database now contains an extensive list of manually curated biologically relevant compounds. Biologically relevant compounds are informative because of their role in protein function, but they are only a small fraction of the entire ligand set. For the remaining ligands, the FireDB provides cross-references to the annotations from publicly available biological, chemical and pharmacological compound databases. FireDB now has external references for 95% of contacting small ligands, making FireDB a more complete database and providing the scientific community with easy access to the pharmacological annotations of PDB ligands. In addition to the manual curation of ligands, FireDB also provides insights into the biological relevance of individual binding sites. Here, biological relevance is calculated from the multiple sequence alignments of related binding sites that are generated from all-against-all comparison of each FireDB binding site. The database can be accessed by RESTful web services and is available for download via MySQL.",2013-11-15 +28275370,PBHMDA: Path-Based Human Microbe-Disease Association Prediction.,"With the advance of sequencing technology and microbiology, the microorganisms have been found to be closely related to various important human diseases. The increasing identification of human microbe-disease associations offers important insights into the underlying disease mechanism understanding from the perspective of human microbes, which are greatly helpful for investigating pathogenesis, promoting early diagnosis and improving precision medicine. However, the current knowledge in this domain is still limited and far from complete. Here, we present the computational model of Path-Based Human Microbe-Disease Association prediction (PBHMDA) based on the integration of known microbe-disease associations and the Gaussian interaction profile kernel similarity for microbes and diseases. A special depth-first search algorithm was implemented to traverse all possible paths between microbes and diseases for inferring the most possible disease-related microbes. As a result, PBHMDA obtained a reliable prediction performance with AUCs (The area under ROC curve) of 0.9169 and 0.8767 in the frameworks of both global and local leave-one-out cross validations, respectively. Based on 5-fold cross validation, average AUCs of 0.9082 ± 0.0061 further demonstrated the efficiency of the proposed model. For the case studies of liver cirrhosis, type 1 diabetes, and asthma, 9, 7, and 9 out of predicted microbes in the top 10 have been confirmed by previously published experimental literatures, respectively. We have publicly released the prioritized microbe-disease associations, which may help to select the most potential pairs for further guiding the experimental confirmation. In conclusion, PBHMDA may have potential to boost the discovery of novel microbe-disease associations and aid future research efforts toward microbe involvement in human disease mechanism. The code and data of PBHMDA is freely available at http://www.escience.cn/system/file?fileId=85214.",2017-02-22 +25872217,An Efficient Algorithm for Discovering Motifs in Large DNA Data Sets.,"The planted (l,d) motif discovery has been successfully used to locate transcription factor binding sites in dozens of promoter sequences over the past decade. However, there has not been enough work done in identifying (l,d) motifs in the next-generation sequencing (ChIP-seq) data sets, which contain thousands of input sequences and thereby bring new challenge to make a good identification in reasonable time. To cater this need, we propose a new planted (l,d) motif discovery algorithm named MCES, which identifies motifs by mining and combining emerging substrings. Specially, to handle larger data sets, we design a MapReduce-based strategy to mine emerging substrings distributedly. Experimental results on the simulated data show that i) MCES is able to identify (l,d) motifs efficiently and effectively in thousands to millions of input sequences, and runs faster than the state-of-the-art (l,d) motif discovery algorithms, such as F-motif and TraverStringsR; ii) MCES is able to identify motifs without known lengths, and has a better identification accuracy than the competing algorithm CisFinder. Also, the validity of MCES is tested on real data sets. MCES is freely available at http://sites.google.com/site/feqond/mces.",2015-04-09 +24163105,PolymiRTS Database 3.0: linking polymorphisms in microRNAs and their target sites with human diseases and biological pathways.,"Polymorphisms in microRNAs (miRNAs) and their target sites (PolymiRTS) are known to disrupt miRNA function, leading to the development of disease and variation in physiological and behavioral phenotypes. Here, we describe recent updates to the PolymiRTS database (http://compbio.uthsc.edu/miRSNP), an integrated platform for analyzing the functional impact of genetic polymorphisms in miRNA seed regions and miRNA target sites. Recent advances in genomic technologies have made it possible to identify miRNA-mRNA binding sites from direct mapping experiments such as CLASH (cross linking, ligation and sequencing of hybrids). We have integrated data from CLASH experiments in the PolymiRTS database to provide more complete and accurate miRNA-mRNA interactions. Other significant new features include (i) small insertions and deletions in miRNA seed regions and miRNA target sites, (ii) TargetScan context + score differences for assessing the impact of polymorphic miRNA-mRNA interactions and (iii) biological pathways. The browse and search pages of PolymiRTS allow users to explore the relations between the PolymiRTSs and gene expression traits, physiological and behavioral phenotypes, human diseases and biological pathways.",2013-10-24 +26989152,From one to many: expanding the Saccharomyces cerevisiae reference genome panel. ,"In recent years, thousands of Saccharomyces cerevisiae genomes have been sequenced to varying degrees of completion. The Saccharomyces Genome Database (SGD) has long been the keeper of the original eukaryotic reference genome sequence, which was derived primarily from S. cerevisiae strain S288C. Because new technologies are pushing S. cerevisiae annotation past the limits of any system based exclusively on a single reference sequence, SGD is actively working to expand the original S. cerevisiae systematic reference sequence from a single genome to a multi-genome reference panel. We first commissioned the sequencing of additional genomes and their automated analysis using the AGAPE pipeline. Here we describe our curation strategy to produce manually reviewed high-quality genome annotations in order to elevate 11 of these additional genomes to Reference status. Database URL: http://www.yeastgenome.org/.",2016-03-17 +27857164,miRNAsong: a web-based tool for generation and testing of miRNA sponge constructs in silico.,"MicroRNA (miRNA) sponges are RNA transcripts containing multiple high-affinity binding sites that associate with and sequester specific miRNAs to prevent them from interacting with their target messenger (m)RNAs. Due to the high specificity of miRNA sponges and strong inhibition of target miRNAs, these molecules have become increasingly applied in miRNA loss-of-function studies. However, improperly designed sponge constructs may sequester off-target miRNAs; thus, it has become increasingly important to develop a tool for miRNA sponge construct design and testing. In this study, we introduce microRNA sponge generator and tester (miRNAsong), a freely available web-based tool for generation and in silico testing of miRNA sponges. This tool generates miRNA sponge constructs for specific miRNAs and miRNA families/clusters and tests them for potential binding to miRNAs in selected organisms. Currently, miRNAsong allows for testing of sponge constructs in 219 species covering 35,828 miRNA sequences. Furthermore, we also provide an example, supplemented with experimental data, of how to use this tool. Using miRNAsong, we designed and tested a sponge for miR-145 inhibition, and cloned the sequence into an inducible lentiviral vector. We found that established cell lines expressing miR-145 sponge strongly inhibited miR-145, thus demonstrating the usability of miRNAsong tool for sponge generation. URL: http://www.med.muni.cz/histology/miRNAsong/.",2016-11-18 +24157837,"MEROPS: the database of proteolytic enzymes, their substrates and inhibitors.","Peptidases, their substrates and inhibitors are of great relevance to biology, medicine and biotechnology. The MEROPS database (http://merops.sanger.ac.uk) aims to fulfill the need for an integrated source of information about these. The database has hierarchical classifications in which homologous sets of peptidases and protein inhibitors are grouped into protein species, which are grouped into families, which are in turn grouped into clans. Recent developments include the following. A community annotation project has been instigated in which acknowledged experts are invited to contribute summaries for peptidases. Software has been written to provide an Internet-based data entry form. Contributors are acknowledged on the relevant web page. A new display showing the intron/exon structures of eukaryote peptidase genes and the phasing of the junctions has been implemented. It is now possible to filter the list of peptidases from a completely sequenced bacterial genome for a particular strain of the organism. The MEROPS filing pipeline has been altered to circumvent the restrictions imposed on non-interactive blastp searches, and a HMMER search using specially generated alignments to maximize the distribution of organisms returned in the search results has been added.",2013-10-23 +,Mistletoes Play Different Roles in a Modular Host–Parasite Network,"Antagonistic interactions between host plants and mistletoes often form complex networks of interacting species. Adequate characterization of network organization requires a combination of qualitative and quantitative data. Therefore, we assessed the distribution of interactions between mistletoes and hosts in the Brazilian Pantanal and characterized the network structure in relation to nestedness and modularity. Interactions were highly asymmetric, with mistletoes presenting low host specificity (i.e., weak dependence) and with hosts being highly susceptible to mistletoe‐specific infections. We found a non‐nested and modular pattern of interactions, wherein each mistletoe species interacted with a particular set of host species. Psittacanthus spp. infected more species and individuals and also caused a high number of infections per individual, whereas the other mistletoes showed a more specialized pattern of infection. For this reason, Psittacanthus spp. were regarded as module hubs while the other mistletoe species showed a peripheral role. We hypothesize that this pattern is primarily the result of different seed dispersal systems. Although all mistletoe species in our study are bird dispersed, the frugivorous assemblage of Psittacanthus spp. is composed of a larger suite of birds, whereas Phoradendron are mainly dispersed by Euphonia species. The larger assemblage of bird species dispersing Psittacanthus seeds may also increase the number of hosts colonized and, consequently, its dominance in the study area. Nevertheless, other restrictions on the interactions among species, such as the differential capacity of mistletoe infections, defense strategies of hosts and habitat types, can also generate or enhance the observed pattern. Abstract in Portuguese is available at http://www.blackwell‐synergy.com/loi/btp.",2012-03-01 +25728529,The Quantification of Representative Sequences pipeline for amplicon sequencing: case study on within-population ITS1 sequence variation in a microparasite infecting Daphnia.,"Next generation sequencing (NGS) platforms are replacing traditional molecular biology protocols like cloning and Sanger sequencing. However, accuracy of NGS platforms has rarely been measured when quantifying relative frequencies of genotypes or taxa within populations. Here we developed a new bioinformatic pipeline (QRS) that pools similar sequence variants and estimates their frequencies in NGS data sets from populations or communities. We tested whether the estimated frequency of representative sequences, generated by 454 amplicon sequencing, differs significantly from that obtained by Sanger sequencing of cloned PCR products. This was performed by analysing sequence variation of the highly variable first internal transcribed spacer (ITS1) of the ichthyosporean Caullerya mesnili, a microparasite of cladocerans of the genus Daphnia. This analysis also serves as a case example of the usage of this pipeline to study within-population variation. Additionally, a public Illumina data set was used to validate the pipeline on community-level data. Overall, there was a good correspondence in absolute frequencies of C. mesnili ITS1 sequences obtained from Sanger and 454 platforms. Furthermore, analyses of molecular variance (amova) revealed that population structure of C. mesnili differs across lakes and years independently of the sequencing platform. Our results support not only the usefulness of amplicon sequencing data for studies of within-population structure but also the successful application of the QRS pipeline on Illumina-generated data. The QRS pipeline is freely available together with its documentation under GNU Public Licence version 3 at http://code.google.com/p/quantification-representative-sequences.",2015-03-05 +28725481,Identifying biological mechanisms for favorable cancer prognosis using non-hypothesis-driven iterative survival analysis.,"Survival analyses based on the Kaplan-Meier estimate have been pervasively used to support or validate the relevance of biological mechanisms in cancer research. Recently, with the appearance of gene expression high-throughput technologies, this kind of analysis has been applied to tumor transcriptomics data. In a 'bottom-up' approach, gene-expression profiles that are associated with a deregulated pathway hypothetically involved in cancer progression are first identified and then subsequently correlated with a survival effect, which statistically supports or requires the rejection of such a hypothesis. In this work, we propose a 'top-down' approach, in which the clinical outcome (survival) is the starting point that guides the identification of deregulated biological mechanisms in cancer by a non-hypothesis-driven iterative survival analysis. We show that the application of our novel method to a population of ~2,000 breast cancer patients of the METABRIC consortium allows the identification of several well-known cancer mechanisms, such as ERBB4, HNF3A and TGFB pathways, and the investigation of their paradoxical dual effect. In addition, several novel biological mechanisms are proposed as potentially involved in cancer progression. The proposed exploratory methodology can be considered both alternative and complementary to classical 'bottom-up' approaches for validation of biological hypotheses. We propose that our method may be used to better characterize cancer, and may therefore impact the future design of therapies that are truly molecularly tailored to individual patients. The method, named SURCOMED, was implemented as a web-based tool, which is publicly available at http://surcomed.vital-it.ch. R scripts are also available at http://surcomed.sourceforge.net).",2016-12-22 +26716751,[ProteoСat: a tool for planning of proteomic experiments].,"ProteoCat is a computer program has been designed to help researchers in the planning of large-scale proteomic experiments. The central part of this program is the subprogram of hydrolysis simulation that supports 4 proteases (trypsin, lysine C, endoproteinases AspN and GluC). For the peptides obtained after virtual hydrolysis or loaded from data file a number of properties important in mass-spectrometric experiments can be calculated or predicted. The data can be analyzed or filtered to reduce a set of peptides. The program is using new and improved modification of our methods developed to predict pI and probability of peptide detection; pI can also be predicted for a number of popular pKa's scales, proposed by other investigators. The algorithm for prediction of peptide retention time was realized similar to the algorithm used in the program SSRCalc. ProteoCat can estimate the coverage of amino acid sequences of proteins under defined limitation on peptides detection, as well as the possibility of assembly of peptide fragments with user-defined size of ""sticky"" ends. The program has a graphical user interface, written on JAVA and available at http://www.ibmc.msk.ru/LPCIT/ProteoCat.",2015-11-01 +27423894,ntHash: recursive nucleotide hashing.,"

Motivation

Hashing has been widely used for indexing, querying and rapid similarity search in many bioinformatics applications, including sequence alignment, genome and transcriptome assembly, k-mer counting and error correction. Hence, expediting hashing operations would have a substantial impact in the field, making bioinformatics applications faster and more efficient.

Results

We present ntHash, a hashing algorithm tuned for processing DNA/RNA sequences. It performs the best when calculating hash values for adjacent k-mers in an input sequence, operating an order of magnitude faster than the best performing alternatives in typical use cases.

Availability and implementation

ntHash is available online at http://www.bcgsc.ca/platform/bioinfo/software/nthash and is free for academic use.

Contacts

hmohamadi@bcgsc.ca or ibirol@bcgsc.caSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-16 +26975833,REGene: a literature-based knowledgebase of animal regeneration that bridge tissue regeneration and cancer.,"Regeneration is a common phenomenon across multiple animal phyla. Regeneration-related genes (REGs) are critical for fundamental cellular processes such as proliferation and differentiation. Identification of REGs and elucidating their functions may help to further develop effective treatment strategies in regenerative medicine. So far, REGs have been largely identified by small-scale experimental studies and a comprehensive characterization of the diverse biological processes regulated by REGs is lacking. Therefore, there is an ever-growing need to integrate REGs at the genomics, epigenetics, and transcriptome level to provide a reference list of REGs for regeneration and regenerative medicine research. Towards achieving this, we developed the first literature-based database called REGene (REgeneration Gene database). In the current release, REGene contains 948 human (929 protein-coding and 19 non-coding genes) and 8445 homologous genes curated from gene ontology and extensive literature examination. Additionally, the REGene database provides detailed annotations for each REG, including: gene expression, methylation sites, upstream transcription factors, and protein-protein interactions. An analysis of the collected REGs reveals strong links to a variety of cancers in terms of genetic mutation, protein domains, and cellular pathways. We have prepared a web interface to share these regeneration genes, supported by refined browsing and searching functions at http://REGene.bioinfo-minzhao.org/.",2016-03-15 +26980517,HerDing: herb recommendation system to treat diseases using genes and chemicals. ,"In recent years, herbs have been researched for new drug candidates because they have a long empirical history of treating diseases and are relatively free from side effects. Studies to scientifically prove the medical efficacy of herbs for target diseases often spend a considerable amount of time and effort in choosing candidate herbs and in performing experiments to measure changes of marker genes when treating herbs. A computational approach to recommend herbs for treating diseases might be helpful to promote efficiency in the early stage of such studies. Although several databases related to traditional Chinese medicine have been already developed, there is no specialized Web tool yet recommending herbs to treat diseases based on disease-related genes. Therefore, we developed a novel search engine, HerDing, focused on retrieving candidate herb-related information with user search terms (a list of genes, a disease name, a chemical name or an herb name). HerDing was built by integrating public databases and by applying a text-mining method. The HerDing website is free and open to all users, and there is no login requirement. Database URL: http://combio.gist.ac.kr/herding.",2016-03-15 +26441427,A Cascade Random Forests Algorithm for Predicting Protein-Protein Interaction Sites.,"Protein-protein interactions exist ubiquitously and play important roles in the life cycles of living cells. The interaction sites (residues) are essential to understanding the underlying mechanisms of protein-protein interactions. Previous research has demonstrated that the accurate identification of protein-protein interaction sites (PPIs) is helpful for developing new therapeutic drugs because many drugs will interact directly with those residues. Because of its significant potential in biological research and drug development, the prediction of PPIs has become an important topic in computational biology. However, a severe data imbalance exists in the PPIs prediction problem, where the number of the majority class samples (non-interacting residues) is far larger than that of the minority class samples (interacting residues). Thus, we developed a novel cascade random forests algorithm (CRF) to address the serious data imbalance that exists in the PPIs prediction problem. The proposed CRF resolves the negative effect of data imbalance by connecting multiple random forests in a cascade-like manner, each of which is trained with a balanced training subset that includes all minority samples and a subset of majority samples using an effective ensemble protocol. Based on the proposed CRF, we implemented a new sequence-based PPIs predictor, called CRF-PPI, which takes the combined features of position-specific scoring matrices, averaged cumulative hydropathy, and predicted relative solvent accessibility as model inputs. Benchmark experiments on both the cross validation and independent validation datasets demonstrated that the proposed CRF-PPI outperformed the state-of-the-art sequence-based PPIs predictors. The source code for CRF-PPI and the benchmark datasets are available online at http://csbio.njust.edu.cn/bioinf/CRF-PPI for free academic use.",2015-09-28 +24108511,Antibiotics for the prophylaxis of bacterial endocarditis in dentistry.,"

Background

Infective endocarditis is a severe infection arising in the lining of the chambers of the heart with a high mortality rate.Many dental procedures cause bacteraemia and it was believed that this may lead to bacterial endocarditis (BE) in a few people. Guidelines in many countries have recommended that prior to invasive dental procedures antibiotics are administered to people at high risk of endocarditis. However, recent guidance by the National Institute for Health and Care Excellence (NICE) in England and Wales has recommended that antibiotics are not required.

Objectives

To determine whether prophylactic antibiotic administration, compared to no such administration or placebo, before invasive dental procedures in people at risk or at high risk of bacterial endocarditis influences mortality, serious illness or the incidence of endocarditis.

Search methods

The following electronic databases were searched: the Cochrane Oral Health Group's Trials Register (to 21 January 2013), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library 2012, Issue 12), MEDLINE via OVID (1946 to 21 January 2013) and EMBASE via OVID (1980 to 21 January 2013). We searched for ongoing trials in the US National Institutes of Health Trials Register (http://clinicaltrials.gov) and the metaRegister of Controlled Trials (http://www.controlled-trials.com/mrct/). No restrictions were placed on the language or date of publication when searching the electronic databases.

Selection criteria

Due to the low incidence of BE it was anticipated that few if any trials would be located. For this reason, cohort and case-control studies were included where suitably matched control or comparison groups had been studied. The intervention was the administration of antibiotic, compared to no such administration, before a dental procedure in people with an increased risk of BE. Cohort studies would need to follow those individuals at increased risk and assess outcomes following any invasive dental procedures, grouping by whether prophylaxis was received or not. Included case-control studies would need to match people who had developed endocarditis (and who were known to be at increased risk before undergoing an invasive dental procedure preceding the onset of endocarditis) with those at similar risk but who had not developed endocarditis. Outcomes of interest were mortality or serious adverse events requiring hospital admission; development of endocarditis following any dental procedure in a defined time period; development of endocarditis due to other non-dental causes; any recorded adverse events to the antibiotics; and cost implications of the antibiotic provision for the care of those patients who developed endocarditis.

Data collection and analysis

Two review authors independently selected studies for inclusion then assessed risk of bias and extracted data from the included study.

Main results

No randomised controlled trials (RCTs), controlled clinical trials (CCTs) or cohort studies were included. One case-control study met the inclusion criteria. It collected all the cases of endocarditis in the Netherlands over two years, finding a total of 24 people who developed endocarditis within 180 days of an invasive dental procedure, definitely requiring prophylaxis according to current guidelines, and who were at increased risk of endocarditis due to a pre-existing cardiac problem. This study included participants who died because of the endocarditis (using proxies). Controls attended local cardiology outpatient clinics for similar cardiac problems, had undergone an invasive dental procedure within the past 180 days, and were matched by age with the cases. No significant effect of penicillin prophylaxis on the incidence of endocarditis could be seen. No data were found on other outcomes.

Authors' conclusions

There remains no evidence about whether antibiotic prophylaxis is effective or ineffective against bacterial endocarditis in people at risk who are about to undergo an invasive dental procedure. It is not clear whether the potential harms and costs of antibiotic administration outweigh any beneficial effect. Ethically, practitioners need to discuss the potential benefits and harms of antibiotic prophylaxis with their patients before a decision is made about administration.",2013-10-09 +28066557,Stability of fruit quality traits in diverse watermelon cultivars tested in multiple environments.,"Lycopene is a naturally occurring red carotenoid compound that is found in watermelon. Lycopene has antioxidant properties. Lycopene content, sugar content and hollowheart resistance are subject to significant genotype×environment interaction (G×E), which makes breeding for these fruit quality traits difficult. The objectives of this study were to (i) evaluate the influence of years and locations on lycopene content, sugar content and hollowheart resistance for a set of watermelon genotypes, and (ii) identify genotypes with high stability for lycopene, sugar, and hollowheart resistance. A diverse set of 40 genotypes was tested over 3 years and 8 locations across the southern United States in replicated, multi-harvest trials. Lycopene was tested in a subset of 10 genotypes. Data were analyzed using univariate and multivariate stability statistics (BLUP-GGE biplot) using SASGxE and RGxE programs. There were strong effects of environment as well as G×E interaction on watermelon quality traits. On the basis of stability measures, genotypes were classified as stable or unstable for each quality trait. 'Crimson Sweet' is an inbred line with high quality trait performance as well as trait stability. 'Stone Mountain', 'Tom Watson', 'Crimson Sweet' and 'Minilee' were among the best genotypes for lycopene content, sugar content and hollowheart resistance. We developed a stability chart based on marketable yield and average ranking generated from different stability measures for yield attributes and quality traits. The chart will assist in choosing parents for improvement of watermelon cultivars. See http://cuke.hort.ncsu.edu/cucurbit/wmelon/wmelonmain.html.",2016-12-21 +25414366,Biological Dynamics Markup Language (BDML): an open format for representing quantitative biological dynamics data.,"

Motivation

Recent progress in live-cell imaging and modeling techniques has resulted in generation of a large amount of quantitative data (from experimental measurements and computer simulations) on spatiotemporal dynamics of biological objects such as molecules, cells and organisms. Although many research groups have independently dedicated their efforts to developing software tools for visualizing and analyzing these data, these tools are often not compatible with each other because of different data formats.

Results

We developed an open unified format, Biological Dynamics Markup Language (BDML; current version: 0.2), which provides a basic framework for representing quantitative biological dynamics data for objects ranging from molecules to cells to organisms. BDML is based on Extensible Markup Language (XML). Its advantages are machine and human readability and extensibility. BDML will improve the efficiency of development and evaluation of software tools for data visualization and analysis.

Availability and implementation

A specification and a schema file for BDML are freely available online at http://ssbd.qbic.riken.jp/bdml/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-19 +24303032,Prioritization of cancer marker candidates based on the immunohistochemistry staining images deposited in the human protein atlas.,"Cancer marker discovery is an emerging topic in high-throughput quantitative proteomics. However, the omics technology usually generates a long list of marker candidates that requires a labor-intensive filtering process in order to screen for potentially useful markers. Specifically, various parameters, such as the level of overexpression of the marker in the cancer type of interest, which is related to sensitivity, and the specificity of the marker among cancer groups, are the most critical considerations. Protein expression profiling on the basis of immunohistochemistry (IHC) staining images is a technique commonly used during such filtering procedures. To systematically investigate the protein expression in different cancer versus normal tissues and cell types, the Human Protein Atlas is a most comprehensive resource because it includes millions of high-resolution IHC images with expert-curated annotations. To facilitate the filtering of potential biomarker candidates from large-scale omics datasets, in this study we have proposed a scoring approach for quantifying IHC annotation of paired cancerous/normal tissues and cancerous/normal cell types. We have comprehensively calculated the scores of all the 17219 tested antibodies deposited in the Human Protein Atlas based on their accumulated IHC images and obtained 457110 scores covering 20 different types of cancers. Statistical tests demonstrate the ability of the proposed scoring approach to prioritize cancer-specific proteins. Top 100 potential marker candidates were prioritized for the 20 cancer types with statistical significance. In addition, a model study was carried out of 1482 membrane proteins identified from a quantitative comparison of paired cancerous and adjacent normal tissues from patients with colorectal cancer (CRC). The proposed scoring approach demonstrated successful prioritization and identified four CRC markers, including two of the most widely used, namely CEACAM5 and CEACAM6. These results demonstrate the potential of this scoring approach in terms of cancer marker discovery and development. All the calculated scores are available at http://bal.ym.edu.tw/hpa/.",2013-11-26 +23554723,Active motif finder - a bio-tool based on mutational structures in DNA sequences.,"Active Motif Finder (AMF) is a novel algorithmic tool, designed based on mutations in DNA sequences. Tools available at present for finding motifs are based on matching a given motif in the query sequence. AMF describes a new algorithm that identifies the occurrences of patterns which possess all kinds of mutations like insertion, deletion and mismatch. The algorithm is mainly based on the Alignment Score Matrix (ASM) computation by comparing input motif with full length sequence. Much of the effort in bioinformatics is directed to identify these motifs in the sequences of newly discovered genes. The proposed bio-tool serves as an open resource for analysis and useful for studying polymorphisms in DNA sequences. AMF can be searched via a user-friendly interface. This tool is intended to serve the scientific community working in the areas of chemical and structural biology, and is freely available to all users, at http://www.sastra.edu/scbt/amf/.",2011-11-01 +26975196,Sequencing and comparative analyses of the genomes of zoysiagrasses.,"Zoysiais a warm-season turfgrass, which comprises 11 allotetraploid species (2n= 4x= 40), each possessing different morphological and physiological traits. To characterize the genetic systems of Zoysia plants and to analyse their structural and functional differences in individual species and accessions, we sequenced the genomes of Zoysia species using HiSeq and MiSeq platforms. As a reference sequence of Zoysia species, we generated a high-quality draft sequence of the genome of Z. japonica accession 'Nagirizaki' (334 Mb) in which 59,271 protein-coding genes were predicted. In parallel, draft genome sequences of Z. matrella 'Wakaba' and Z. pacifica 'Zanpa' were also generated for comparative analyses. To investigate the genetic diversity among the Zoysia species, genome sequence reads of three additional accessions, Z. japonica'Kyoto', Z. japonica'Miyagi' and Z. matrella'Chiba Fair Green', were accumulated, and aligned against the reference genome of 'Nagirizaki' along with those from 'Wakaba' and 'Zanpa'. As a result, we detected 7,424,163 single-nucleotide polymorphisms and 852,488 short indels among these species. The information obtained in this study will be valuable for basic studies on zoysiagrass evolution and genetics as well as for the breeding of zoysiagrasses, and is made available in the 'Zoysia Genome Database' at http://zoysia.kazusa.or.jp.",2016-03-14 +25031655,Obesity gene atlas in mammals.,"Obesity in humans has increased at an alarming rate over the past two decades and has become one of the leading public health problems worldwide. Studies have revealed a large number of genes/markers that are associated with obesity and/or obesity-related phenotypes, indicating an urgent need to develop a central database for helping the community understand the genetic complexity of obesity. In the present study, we collected a total of 1,736 obesity associated loci and created a freely available obesity database, including 1,515 protein-coding genes and 221 microRNAs (miRNAs) collected from four mammalian species: human, cattle, rat, and mouse. These loci were integrated as orthologs on comparative genomic views in human, cattle, and mouse. The database and genomic views are freely available online at: http://www.integratomics-time.com/fat_deposition. Bioinformatics analyses of the collected data revealed some potential novel obesity related molecular markers which represent focal points for testing more targeted hypotheses and designing experiments for further studies. We believe that this centralized database on obesity and adipogenesis will facilitate development of comparative systems biology approaches to address this important health issue in human and their potential applications in animals.",2013-12-01 +27852242,AnnoLnc: a web server for systematically annotating novel human lncRNAs.,"

Background

Long noncoding RNAs (lncRNAs) have been shown to play essential roles in almost every important biological process through multiple mechanisms. Although the repertoire of human lncRNAs has rapidly expanded, their biological function and regulation remain largely elusive, calling for a systematic and integrative annotation tool.

Results

Here we present AnnoLnc ( http://annolnc.cbi.pku.edu.cn ), a one-stop portal for systematically annotating novel human lncRNAs. Based on more than 700 data sources and various tool chains, AnnoLnc enables a systematic annotation covering genomic location, secondary structure, expression patterns, transcriptional regulation, miRNA interaction, protein interaction, genetic association and evolution. An intuitive web interface is available for interactive analysis through both desktops and mobile devices, and programmers can further integrate AnnoLnc into their pipeline through standard JSON-based Web Service APIs.

Conclusions

To the best of our knowledge, AnnoLnc is the only web server to provide on-the-fly and systematic annotation for newly identified human lncRNAs. Compared with similar tools, the annotation generated by AnnoLnc covers a much wider spectrum with intuitive visualization. Case studies demonstrate the power of AnnoLnc in not only rediscovering known functions of human lncRNAs but also inspiring novel hypotheses.",2016-11-16 +27354938,SeeHaBITaT: A server on bioinformatics applications for Tospoviruses and other species.,"Plant viruses are important limiting factors in agricultural productivity. Tospovirus is one of the severe plant pathogens, causing damage to economically important food and ornamental crops worldwide through thrips as vectors. Database application resources exclusively on this virus would help to design better control measures, which aren't available. SeeHaBITaT is a unique and exclusive web based server providing work bench to perform computational research on tospoviruses and its species. SeeHaBITaT hosts Tospoviruses specific database Togribase, MOLBIT, SRMBIT and SS with PDB. These applications would be of immense help to the Tospovirus scientific community. The server could be accessed at http://bit.srmuniv.ac.in/.",2016-03-12 +27153675,TreeDom: a graphical web tool for analysing domain architecture evolution.,"

Unlabelled

We present TreeDom, a web tool for graphically analysing the evolutionary history of domains in multi-domain proteins. Individual domains on the same protein chain may have distinct evolutionary histories, which is important to grasp in order to understand protein function. For instance, it may be important to know whether a domain was duplicated recently or long ago, to know the origin of inserted domains, or to know the pattern of domain loss within a protein family. TreeDom uses the Pfam database as the source of domain annotations, and displays these on a sequence tree. An advantage of TreeDom is that the user can limit the analysis to N sequences that are most similar to a query, or provide a list of sequence IDs to include. Using the Pfam alignment of the selected sequences, a tree is built and displayed together with the domain architecture of each sequence.Availablility and implementation: http://TreeDom.sbc.su.se

Contact

Erik.Sonnhammer@scilifelab.se.",2016-03-12 +27118584,Trimming Surface Sugars Protects Histoplasma from Immune Attack.,"Dectin-1 is an essential innate immune receptor that recognizes β-glucans in fungal cell walls. Its importance is underscored by the mechanisms that fungal pathogens have evolved to avoid detection by this receptor. One such pathogen is Histoplasma capsulatum, and in a recent article in mBio, Rappleye's group presented data showing that yeasts of this organism secrete a β-glucanase, Eng1, which acts to prune β-glucans that are exposed on the fungal cell surface [A. L. Garfoot et al., mBio 7(2):e01388-15, 2016, http://dx.doi.org/10.1128/mBio.01388-15]. The trimming of these sugars reduces immune recognition through Dectin-1 and subsequent inflammatory responses, enhancing the pathogenesis of H. capsulatum.",2016-04-26 +27566531,The efficiency of chronic disease care in sub-Saharan Africa.,"The number of people needing chronic disease care is projected to increase in sub-Saharan Africa as a result of expanding human immunodeficiency virus (HIV) treatment coverage, rising life expectancies, and lifestyle changes. Using nationally representative data of healthcare facilities, Di Giorgio et al. found that many HIV clinics in Kenya, Uganda, and Zambia appear to have considerable untapped capacity to provide care for additional patients. These findings highlight the potential for increasing the efficiency of clinical processes for chronic disease care at the facility level. Important questions for future research are how estimates of comparative technical efficiency across facilities change, when they are adjusted for quality of care and the composition of patients by care complexity. Looking ahead, substantial research investment will be needed to ensure that we do not forgo the opportunity to learn how efficiency changes, as chronic care is becoming increasingly differentiated by patient type and integrated across diseases and health systems functions.Please see related article: http://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-016-0653-z.",2016-08-26 +26177965,"Karect: accurate correction of substitution, insertion and deletion errors for next-generation sequencing data.","

Motivation

Next-generation sequencing generates large amounts of data affected by errors in the form of substitutions, insertions or deletions of bases. Error correction based on the high-coverage information, typically improves de novo assembly. Most existing tools can correct substitution errors only; some support insertions and deletions, but accuracy in many cases is low.

Results

We present Karect, a novel error correction technique based on multiple alignment. Our approach supports substitution, insertion and deletion errors. It can handle non-uniform coverage as well as moderately covered areas of the sequenced genome. Experiments with data from Illumina, 454 FLX and Ion Torrent sequencing machines demonstrate that Karect is more accurate than previous methods, both in terms of correcting individual-bases errors (up to 10% increase in accuracy gain) and post de novo assembly quality (up to 10% increase in NGA50). We also introduce an improved framework for evaluating the quality of error correction.

Availability and implementation

Karect is available at: http://aminallam.github.io/karect.

Contact

amin.allam@kaust.edu.sa

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-14 +24385242,Disturbance of Arabidopsis thaliana microRNA-regulated pathways by Xcc bacterial effector proteins.,"Plants are continuously subjected to infection by pathogens, including bacteria and viruses. Bacteria can inject a variety of effector proteins into the host to reprogram host defense mechanism. It is known that microRNAs participate in plant disease resistance to bacterial pathogens and previous studies have suggested that some bacterial effectors have evolved to disturb the host's microRNA-regulated pathways; and so enabling infection. In this study, the inter-species interaction between an Xanthomonas campestris pv campestris (Xcc) pathogen effector and Arabidopsis thaliana microRNA transcription promoter was investigated using three methods: (1) interolog, (2) alignment based on using transcription factor binding site profile matrix, and (3) the web-based binding site prediction tool, PATSER. Furthermore, we integrated another two data sets from our previous study into the present web-based system. These are (1) microRNA target genes and their downstream effects mediated by protein-protein interaction (PPI), and (2) the Xcc-Arabidopsis PPI information. This present work is probably the first comprehensive study of constructing pathways that comprises effector, microRNA, target genes and PPI for the study of pathogen-host interactions. It is expected that this study may help to elucidate the role of pathogen-host interplay in a plant's immune system. The database is freely accessible at: http://ppi.bioinfo.asia.edu.tw/EDMRP .",2014-01-04 +27771556,Weighted edge based clustering to identify protein complexes in protein-protein interaction networks incorporating gene expression profile.,"Protein complex detection from protein-protein interaction (PPI) network has received a lot of focus in recent years. A number of methods identify protein complexes as dense sub-graphs using network information while several other methods detect protein complexes based on topological information. While the methods based on identifying dense sub-graphs are more effective in identifying protein complexes, not all protein complexes have high density. Moreover, existing methods focus more on static PPI networks and usually overlook the dynamic nature of protein complexes. Here, we propose a new method, Weighted Edge based Clustering (WEC), to identify protein complexes based on the weight of the edge between two interacting proteins, where the weight is defined by the edge clustering coefficient and the gene expression correlation between the interacting proteins. Our WEC method is capable of detecting highly inter-connected and co-expressed protein complexes. The experimental results of WEC on three real life data shows that our method can detect protein complexes effectively in comparison with other highly cited existing methods.

Availability

The WEC tool is available at http://agnigarh.tezu.ernet.in/~rosy8/shared.html.",2016-10-08 +26969678,PDP-CON: prediction of domain/linker residues in protein sequences using a consensus approach.,"The prediction of domain/linker residues in protein sequences is a crucial task in the functional classification of proteins, homology-based protein structure prediction, and high-throughput structural genomics. In this work, a novel consensus-based machine-learning technique was applied for residue-level prediction of the domain/linker annotations in protein sequences using ordered/disordered regions along protein chains and a set of physicochemical properties. Six different classifiers-decision tree, Gaussian naïve Bayes, linear discriminant analysis, support vector machine, random forest, and multilayer perceptron-were exhaustively explored for the residue-level prediction of domain/linker regions. The protein sequences from the curated CATH database were used for training and cross-validation experiments. Test results obtained by applying the developed PDP-CON tool to the mutually exclusive, independent proteins of the CASP-8, CASP-9, and CASP-10 databases are reported. An n-star quality consensus approach was used to combine the results yielded by different classifiers. The average PDP-CON accuracy and F-measure values for the CASP targets were found to be 0.86 and 0.91, respectively. The dataset, source code, and all supplementary materials for this work are available at https://cmaterju.org/cmaterbioinfo/ for noncommercial use.",2016-03-11 +24250117,BBGD454: A database for transcriptome analysis of blueberry using 454 sequences.,"

Unlabelled

Blueberry is an economically and nutritionally important small fruit crop, native to North America. As with many crops, extreme low temperature can affect blueberry crop yield negatively and cause major losses to growers. For this reason, blueberry breeding programs have focused on developing improved cultivars with broader climatic adaptation. To help achieve this goal, the blueberry genomic database (BBGD454) was developed to provide the research community with valuable resources to identify genes that play an important role in flower bud and fruit development, cold acclimation and chilling accumulation in blueberry. The database was developed using SQLServer2008 to house 454 transcript sequences, annotations and gene expression profiles of blueberry genes. BBGD454 can be accessed publically from a web-based interface; this website provides search and browse functionalities to allow scientists to access and search the data in order to correlate gene expression with gene function in different stages of blueberry fruit ripening, at different stages of cold acclimation of flower buds, and in leaves.

Availability

It can be accessed from http://bioinformatics.towson.edu/BBGD454/",2013-10-16 +24920231,Estonian folk traditional experiences on natural anticancer remedies: from past to the future.,"

Context

Despite diagnostic and therapeutic advancements, the burden of cancer is still increasing worldwide. Toxicity of current chemotherapeutics to normal cells and their resistance to tumor cells highlights the urgent need for new drugs with minimal adverse side effects. The use of natural anticancer agents has entered into the area of cancer research and increased efforts are being made to isolate bioactive products from medicinal plants.

Objective

To lead the search for plants with potential cytotoxic activity, ethnopharmacological knowledge can give a great contribution. Therefore, the attention of this review is devoted to the natural remedies traditionally used for the cancer treatment by Estonian people over a period of almost 150 years.

Methods

Two massive databases, the first one stored in the Estonian Folklore Archives and the second one in the electronic database HERBA ( http://herba.folklore.ee/ ), containing altogether more than 30 000 ethnomedicinal texts were systematically reviewed to compile data about the Estonian folk traditional experiences on natural anticancer remedies.

Results and conclusion

As a result, 44 different plants with potential anticancer properties were elicited, 5 of which [Angelica sylvestris L. (Apiaceae), Anthemis tinctoria L. (Asteraceae), Pinus sylvestris L. (Pinaceae), Sorbus aucuparia L. (Rosaceae), and Prunus padus L. (Rosaceae)] have not been previously described with respect to their tumoricidal activities in the scientific literature, suggesting thus the potential herbal materials for further investigations of natural anticancer compounds.",2014-02-07 +27638400,MSAProbs-MPI: parallel multiple sequence aligner for distributed-memory systems.,"MSAProbs is a state-of-the-art protein multiple sequence alignment tool based on hidden Markov models. It can achieve high alignment accuracy at the expense of relatively long runtimes for large-scale input datasets. In this work we present MSAProbs-MPI, a distributed-memory parallel version of the multithreaded MSAProbs tool that is able to reduce runtimes by exploiting the compute capabilities of common multicore CPU clusters. Our performance evaluation on a cluster with 32 nodes (each containing two Intel Haswell processors) shows reductions in execution time of over one order of magnitude for typical input datasets. Furthermore, MSAProbs-MPI using eight nodes is faster than the GPU-accelerated QuickProbs running on a Tesla K20. Another strong point is that MSAProbs-MPI can deal with large datasets for which MSAProbs and QuickProbs might fail due to time and memory constraints, respectively.

Availability and implementation

Source code in C ++ and MPI running on Linux systems as well as a reference manual are available at http://msaprobs.sourceforge.net CONTACT: jgonzalezd@udc.esSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-16 +28348848,PhagePhisher: a pipeline for the discovery of covert viral sequences in complex genomic datasets.,"Obtaining meaningful viral information from large sequencing datasets presents unique challenges distinct from prokaryotic and eukaryotic sequencing efforts. The difficulties surrounding this issue can be ascribed in part to the genomic plasticity of viruses themselves as well as the scarcity of existing information in genomic databases. The open-source software PhagePhisher (http://www.putonti-lab.com/phagephisher) has been designed as a simple pipeline to extract relevant information from complex and mixed datasets, and will improve the examination of bacteriophages, viruses, and virally related sequences, in a range of environments. Key aspects of the software include speed and ease of use; PhagePhisher can be used with limited operator knowledge of bioinformatics on a standard workstation. As a proof-of-concept, PhagePhisher was successfully implemented with bacteria-virus mixed samples of varying complexity. Furthermore, viral signals within microbial metagenomic datasets were easily and quickly identified by PhagePhisher, including those from prophages as well as lysogenic phages, an important and often neglected aspect of examining phage populations in the environment. PhagePhisher resolves viral-related sequences which may be obscured by or imbedded in bacterial genomes.",2016-03-10 +26912756,Revisiting the Roles of Culture and Culture-Independent Detection Tests for Campylobacter.,"Culture-independent detection tests (CIDTs) for Campylobacter have become an area of intense controversy and confusion among laboratorians in the field of clinical microbiology. To date, the true analytical and clinical performance of stool antigen CIDTs versus truly optimized culture conditions is unknown. In this issue of the Journal of Clinical Microbiology, Fitzgerald and colleagues (C. Fitzgerald et al., J Clin Microbiol 54:1209-1215, 2016, http://dx.doi.org/10.1128/JCM.01925-15) report comprehensive performance data for four Campylobacter stool antigen CIDTs versus culture and molecular diagnostics.",2016-02-24 +23396302,The YeastGenome app: the Saccharomyces Genome Database at your fingertips.,"The Saccharomyces Genome Database (SGD) is a scientific database that provides researchers with high-quality curated data about the genes and gene products of Saccharomyces cerevisiae. To provide instant and easy access to this information on mobile devices, we have developed YeastGenome, a native application for the Apple iPhone and iPad. YeastGenome can be used to quickly find basic information about S. cerevisiae genes and chromosomal features regardless of internet connectivity. With or without network access, you can view basic information and Gene Ontology annotations about a gene of interest by searching gene names and gene descriptions or by browsing the database within the app to find the gene of interest. With internet access, the app provides more detailed information about the gene, including mutant phenotypes, references and protein and genetic interactions, as well as provides hyperlinks to retrieve detailed information by showing SGD pages and views of the genome browser. SGD provides online help describing basic ways to navigate the mobile version of SGD, highlights key features and answers frequently asked questions related to the app. The app is available from iTunes (http://itunes.com/apps/yeastgenome). The YeastGenome app is provided freely as a service to our community, as part of SGD's mission to provide free and open access to all its data and annotations.",2013-02-08 +26701675,Discriminative Transfer Subspace Learning via Low-Rank and Sparse Representation.,"In this paper, we address the problem of unsupervised domain transfer learning in which no labels are available in the target domain. We use a transformation matrix to transfer both the source and target data to a common subspace, where each target sample can be represented by a combination of source samples such that the samples from different domains can be well interlaced. In this way, the discrepancy of the source and target domains is reduced. By imposing joint low-rank and sparse constraints on the reconstruction coefficient matrix, the global and local structures of data can be preserved. To enlarge the margins between different classes as much as possible and provide more freedom to diminish the discrepancy, a flexible linear classifier (projection) is obtained by learning a non-negative label relaxation matrix that allows the strict binary label matrix to relax into a slack variable matrix. Our method can avoid a potentially negative transfer by using a sparse matrix to model the noise and, thus, is more robust to different types of noise. We formulate our problem as a constrained low-rankness and sparsity minimization problem and solve it by the inexact augmented Lagrange multiplier method. Extensive experiments on various visual domain adaptation tasks show the superiority of the proposed method over the state-of-the art methods. The MATLAB code of our method will be publicly available at http://www.yongxu.org/lunwen.html.",2015-12-18 +27777244,Disease named entity recognition by combining conditional random fields and bidirectional recurrent neural networks. ,"The recognition of disease and chemical named entities in scientific articles is a very important subtask in information extraction in the biomedical domain. Due to the diversity and complexity of disease names, the recognition of named entities of diseases is rather tougher than those of chemical names. Although there are some remarkable chemical named entity recognition systems available online such as ChemSpot and tmChem, the publicly available recognition systems of disease named entities are rare. This article presents a system for disease named entity recognition (DNER) and normalization. First, two separate DNER models are developed. One is based on conditional random fields model with a rule-based post-processing module. The other one is based on the bidirectional recurrent neural networks. Then the named entities recognized by each of the DNER model are fed into a support vector machine classifier for combining results. Finally, each recognized disease named entity is normalized to a medical subject heading disease name by using a vector space model based method. Experimental results show that using 1000 PubMed abstracts for training, our proposed system achieves an F1-measure of 0.8428 at the mention level and 0.7804 at the concept level, respectively, on the testing data of the chemical-disease relation task in BioCreative V.Database URL: http://219.223.252.210:8080/SS/cdr.html.",2016-10-24 +25048120,IMGT/HLA and the Immuno Polymorphism Database.,"The IMGT/HLA Database (http://www.ebi.ac.uk/ipd/imgt/hla/) was first released over 15 years ago, providing the HLA community with a searchable repository of highly curated HLA sequences. The HLA complex is located within the 6p21.3 region of human chromosome 6 and contains more than 220 genes of diverse function. Many of the genes encode proteins of the immune system and are highly polymorphic, with some genes currently having over 3,000 known allelic variants. The Immuno Polymorphism Database (IPD) (http://www.ebi.ac.uk/ipd/) expands on this model, with a further set of specialist databases related to the study of polymorphic genes in the immune system. The IPD project works with specialist groups or nomenclature committees who provide and curate individual sections before they are submitted to IPD for online publication. IPD currently consists of four databases: IPD-KIR contains the allelic sequences of killer-cell immunoglobulin-like receptors; IPD-MHC is a database of sequences of the major histocompatibility complex of different species; IPD-HPA, alloantigens expressed only on platelets; and IPD-ESTDAB, which provides access to the European Searchable Tumour Cell-Line Database, a cell bank of immunologically characterized melanoma cell lines. Through the work of the HLA Informatics Group and in collaboration with the European Bioinformatics Institute we are able to provide public access to this data through the website http://www.ebi.ac.uk/ipd/.",2014-01-01 +26953092,A Web Server and Mobile App for Computing Hemolytic Potency of Peptides.,"Numerous therapeutic peptides do not enter the clinical trials just because of their high hemolytic activity. Recently, we developed a database, Hemolytik, for maintaining experimentally validated hemolytic and non-hemolytic peptides. The present study describes a web server and mobile app developed for predicting, and screening of peptides having hemolytic potency. Firstly, we generated a dataset HemoPI-1 that contains 552 hemolytic peptides extracted from Hemolytik database and 552 random non-hemolytic peptides (from Swiss-Prot). The sequence analysis of these peptides revealed that certain residues (e.g., L, K, F, W) and motifs (e.g., ""FKK"", ""LKL"", ""KKLL"", ""KWK"", ""VLK"", ""CYCR"", ""CRR"", ""RFC"", ""RRR"", ""LKKL"") are more abundant in hemolytic peptides. Therefore, we developed models for discriminating hemolytic and non-hemolytic peptides using various machine learning techniques and achieved more than 95% accuracy. We also developed models for discriminating peptides having high and low hemolytic potential on different datasets called HemoPI-2 and HemoPI-3. In order to serve the scientific community, we developed a web server, mobile app and JAVA-based standalone software (http://crdd.osdd.net/raghava/hemopi/).",2016-03-08 +24381135,Related genes and potential biomarkers for early diagnosis of Alzheimer's disease: a preliminary study based on DNA microarray.,"

Aim

The aim of this study is to extend our understanding of the molecular mechanism of Alzheimer's disease (AD).

Methods

We downloaded the gene expression profile GSE18309 from Gene Expression Omnibus database, which includes 3 genechips from patients with mild cognitive impairment (MCI), 3 genechips from patients with AD, and 3 genechips from normal controls (NC). Linear Models for Microarray Data package was used to identify differentially expressed genes (DEGs) in MCI versus NC group and AD versus NC group. Then, we extracted the overlapping DEGs of 2 groups for functional and pathway enrichment analysis using FuncAssociate software accompanied by gene ontology and expressing analysis systematic explorer, respectively. Further, AutoDock4 (http://autodock.scripps.edu/) was used to predict the docking site between small molecule ligands and proteins of a key DEG.

Results

A total of 60 DEGs were identified. Biological processes associated with nutrient response and muscle development were significantly dysregulated in AD and MCI. In addition, we identified 2 active binding sites (A5 and L30) on protein structure of cholecystokinin A receptor (CCKAR) for drug design.

Conclusion

The DEGs including CCKAR might be used as biomarkers for early diagnosis of AD. However, further experimental studies are needed to confirm our results.",2013-12-30 +24023856,Analysis of essential Arabidopsis nuclear genes encoding plastid-targeted proteins.,"The Chloroplast 2010 Project (http://www.plastid.msu.edu/) identified and phenotypically characterized homozygous mutants in over three thousand genes, the majority of which encode plastid-targeted proteins. Despite extensive screening by the community, no homozygous mutant alleles were available for several hundred genes, suggesting that these might be enriched for genes of essential function. Attempts were made to generate homozygotes in ~1200 of these lines and 521 of the homozygous viable lines obtained were deposited in the Arabidopsis Biological Resource Center (http://abrc.osu.edu/). Lines that did not yield a homozygote in soil were tested as potentially homozygous lethal due to defects either in seed or seedling development. Mutants were characterized at four stages of development: developing seed, mature seed, at germination, and developing seedlings. To distinguish seed development or seed pigment-defective mutants from seedling development mutants, development of seeds was assayed in siliques from heterozygous plants. Segregating seeds from heterozygous parents were sown on supplemented media in an attempt to rescue homozygous seedlings that could not germinate or survive in soil. Growth of segregating seeds in air and air enriched to 0.3% carbon dioxide was compared to discover mutants potentially impaired in photorespiration or otherwise responsive to CO2 supplementation. Chlorophyll fluorescence measurements identified CO2-responsive mutants with altered photosynthetic parameters. Examples of genes with a viable mutant allele and one or more putative homozygous-lethal alleles were documented. RT-PCR of homozygotes for potentially weak alleles revealed that essential genes may remain undiscovered because of the lack of a true null mutant allele. This work revealed 33 genes with two or more lethal alleles and 73 genes whose essentiality was not confirmed with an independent lethal mutation, although in some cases second leaky alleles were identified.",2013-09-04 +25181531,RTCGAToolbox: a new tool for exporting TCGA Firehose data.,"

Background & objective

Managing data from large-scale projects (such as The Cancer Genome Atlas (TCGA)) for further analysis is an important and time consuming step for research projects. Several efforts, such as the Firehose project, make TCGA pre-processed data publicly available via web services and data portals, but this information must be managed, downloaded and prepared for subsequent steps. We have developed an open source and extensible R based data client for pre-processed data from the Firehouse, and demonstrate its use with sample case studies. Results show that our RTCGAToolbox can facilitate data management for researchers interested in working with TCGA data. The RTCGAToolbox can also be integrated with other analysis pipelines for further data processing.

Availability and implementation

The RTCGAToolbox is open-source and licensed under the GNU General Public License Version 2.0. All documentation and source code for RTCGAToolbox is freely available at http://mksamur.github.io/RTCGAToolbox/ for Linux and Mac OS X operating systems.",2014-09-02 +24336806,NetCoffee: a fast and accurate global alignment approach to identify functionally conserved proteins in multiple networks.,"

Motivation

Owing to recent advancements in high-throughput technologies, protein-protein interaction networks of more and more species become available in public databases. The question of how to identify functionally conserved proteins across species attracts a lot of attention in computational biology. Network alignments provide a systematic way to solve this problem. However, most existing alignment tools encounter limitations in tackling this problem. Therefore, the demand for faster and more efficient alignment tools is growing.

Results

We present a fast and accurate algorithm, NetCoffee, which allows to find a global alignment of multiple protein-protein interaction networks. NetCoffee searches for a global alignment by maximizing a target function using simulated annealing on a set of weighted bipartite graphs that are constructed using a triplet approach similar to T-Coffee. To assess its performance, NetCoffee was applied to four real datasets. Our results suggest that NetCoffee remedies several limitations of previous algorithms, outperforms all existing alignment tools in terms of speed and nevertheless identifies biologically meaningful alignments.

Availability

The source code and data are freely available for download under the GNU GPL v3 license at https://code.google.com/p/netcoffee/.",2013-12-13 +27294022,The need to redefine genomic data sharing: A focus on data accessibility.,"DNAdigest's mission is to investigate and address the issues hindering efficient and ethical genomic data sharing in the human genomics research community. We conducted contextual interviews with human genomics researchers in clinical, academic or industrial R&D settings about their experience with accessing and sharing human genomic data. The qualitative interviews were followed by an online survey which provided quantitative support for our findings. Here we present the generalised workflow for accessing human genomic data through both public and restricted-access repositories and discuss reported points of frustration and their possible improvements. We discuss how data discoverability and accessibility are lacking in current mechanisms and how these are the prerequisites for adoption of best practices in the research community. We summarise current initiatives related to genomic data discovery and present a new data discovery platform available at http://nucleobase.co.uk.",2014-09-28 +29787190,Culture of Care: Organizational Responsibilities,"Animal use in research has contributed significantly to advances in science and medicine, and the role of laboratory animal professionals in this process is pivotal (AALAS 2001; Medina 2008). While it is desirable to use alternatives to live animals for this process, the use of animals continues to be necessary to protect human and animal health and the environment (EU 2010). To preserve the privilege to use animals in research, a strong program of animal care and use becomes important for several reasons: regulatory compliance, quality of scientific results, addressing public sensitivities, managing staff sensitivities, and moral obligations to the animals themselves. Regulations impacting on the care and use of research animals are covered in greater detail elsewhere in this text. The most commonly referenced regulatory standards include the Guide for the Care and Use of Laboratory Animals (Guide) (recognized internationally as setting standards for animal care and use), European Union (EU) Directive 2010/63/EU, and the World Organization for Animal Health (OIE). The Guide states that “all who care for, use, or produce animals for research, testing or teaching must assume responsibility for their well-being,” and that “both researchers and institutions have affirmative duties of humane care and use” of research animals, which is later defined as “those actions taken to ensure that laboratory animals are treated according to high ethical and scientific standards” (NRC 2011). The Guide further states that “it is the institution’s responsibility to put into place policies, procedures, standards, organizational structure, staffing, facilities, and practices to ensure the humane care and use of laboratory animals throughout the institution” (NRC 2011). The EU Directive states that animals have intrinsic value that must be respected and that “animal welfare considerations should be given the highest priority ” that each use is carefully evaluated,” and that principles of replacement, reduction, and refinement (the 3Rs) should be considered systematically when using animals in research (EU 2010). The OIE, comprised of more than 170 member countries, has eight guiding principles on animal welfare outlined in its Animal Health Code. These principles also support incorporation of the 3Rs and state “that the use of animals carries with it an ethical responsibility to ensure their welfare to the greatest extent practicable” (OIE 2008). Biomedical progress depends, fundamentally, on scientific excellence, which is dependent on quality animal care (Ad Hoc Committee to Revise the International Guiding Principles 2012; Friese 2013). The provision of excellent care also addresses some of the ethical and moral concerns of the general public regarding the use of animals in research. Animal care and use carries with it the responsibility to ensure that high ethical and scientific standards (NRC 2011) are met, and the public is reassured by knowing how much effort is expended by animal caregiving staff on behalf of the animals, to adhere to the intent as well as the scope of the laws that protect research animals (Medina 2008; EU 2010; Coleman 2011). Strong animal care and use programs also address the sensitivities of staff, who often choose careers in animal research because of their love and compassion for animals (Coleman 2011; Davies and Horst 2015). Institutional culture influences the productivity and performance of many enterprises (Simone 2009; Ng’ang’a and Nyongesa 2012; Uddin et al. 2013), and cultures that promote caring for the animals and people supporting animal care and use programs can provide a basis for an exceptional animal care and use program. This culture, often referred to as the “culture of caring” or “culture of care,” promotes compassion and respect for laboratory animals and the people who work with them. In discussing a “strong culture” at a successful large technology company, Kunda focuses on the “self-conscious and tireless celebration of the company’s strong culture”“one in which employees are creative, committed, entrepreneurial, independent, and moral.” This involves not only employees’ intellectual skills and physical presence, but also their emotions, moral sense, and personal loyalties (Kunda 2006). Care is less something to be rigidly defined than a style of thinking. It “directs attention to what was once rendered invisible within scientific research ” as opposed to the calculable and controllable” (Mol et al. 2010). Davies and Horst (2015) write about the relationship between “craft” (or skills) and “care” and reflect on the potential implications of the promotion of a culture of care in a research setting. They propose a model of craft as a caring practice “which brings together skill, a focus on utility or purpose and a particular emotional orientation (care, passion and commitment).” In their analysis of numerous research labs globally, they found that “a happy group was understood as a productive one,” and the strongest leaders accommodated different individuals and viewed treating people well as vital, “both because it is the right thing to do and because it is, ultimately, good for science.” A culture of care goes beyond being compliant with applicable rules and regulations and strives to meet the full intent of established rules and regulations”“excellent animal welfare and reproducible scientific results. Many of the laws and guidelines surrounding animal care and use allow for the use of professional judgment (Klein and Bayne 2007). This should not be interpreted to support a minimalistic approach that just meets the letter of the law, but instead should be applied to working with animals in a manner that strives to provide the best possible care for the animals, thus producing the highest-quality scientific results (Medina 2008). A culture of care often starts with an institutional mission and value statement that clearly states the institution’s commitment to the humane care and use of animals (Phanuel Kofi Darbi 2012). This mission statement frequently refers to the advancement of knowledge, the development of life-saving procedures and drugs, improving the quality of life for humans and animals, or some similar goal. The corresponding value statement, often referred to as “core values,” articulates the institution’s commitment to animal welfare, the humane care and use of laboratory animals, and/or the implementation of the 3Rs. Examples include: “[Our Institution] is committed to the humane care of the research animals we produce and work with in all of our activities” (http://www.criver.com/about-us/humane-care/best-practices). “We are committed to reducing our reliance on animal testing methods, and promoting the development, validation and use of non-animal testing models. [The Institution] requires that where animals have been or may be used for research or testing, that we abide by the principles of the 3Rs of animal research” (http://www.bms.com/sustainability/environmental_performance/Pages/product_stewardship.aspx). “[Our Institution] is committed to ensuring the humane care and use of laboratory animals in the company’s research and development programs. We recognize that high quality science and humane animal care are inseparable. In addition to complying with applicable legislation and regulations, [Our Institution’s] laboratory animal research programs and facilities aim to exceed regulatory agency standards” (http://www.abbvie.com/responsibility/transparency-policies/home.html#). A culture of care usually includes: Strong institutional commitment to provide the resources and leadership necessary, such as ongoing communication from management that reinforces the commitment to animal welfare for all institutional stakeholders (scientists, technicians, shareholders, and the public). Creation of an environment where staff feel empowered to come forward with any concerns or suggestions they have to improve the animal care and use program and that respects and nurtures staff compassion. Mechanisms to support open communications on all aspects of the program. A well-defined program of training on aspects of animal care and use, including ethics for all employees (from animal care technicians to top research scientists) and mechanisms to ensure competency. Programs that recognize excellence in animal care and use. Empowerment of animal welfare oversight committees, such as the Institutional Animal Care and Use Committee (IACUC), Ethics Committees (ECs), and Animal Welfare Bodies (AWBs). Commitment to, and proactive implementation of, the 3Rs. The productivity of any enterprise is, ultimately, dependent on the culture established to drive its success (Kunda 2006). Biomedical advances in a research culture are a significant aspect of their measure of productivity, and as a result, these advances continue to improve and save human and animal lives. There are still many unmet medical needs for both people and animals. Therapeutic discoveries are necessary to address these various diseases and disorders. The research community cannot provide the cures and treatments needed without collecting scientific data in both preclinical animal studies and human clinical trials, which both must adhere to high scientific and ethical standards as described in good laboratory practice (GLP) and good clinical practice (GCP) regulations, respectively (FDA 2001). The miracles of tomorrow depend on ongoing innovations in biomedical research progress today (Brouwers et al. 2011). Caring for research animals can present a variety of emotional challenges for research and laboratory animal professionals (AALAS 2013); however, a strong culture of care that supports the overall well-being of all the animals and people involved in biomedical discovery may drive productivity in unprecedented ways. This chapter elaborates on the important components described above, providing examples and suggestions for how a culture of care can be incorporated into any animal care and use program, regardless of size or scientific mission.",2018-05-23 +22623287,Software for analysing ion mobility mass spectrometry data to improve peptide identification.,"The development of ion mobility (IM) MS instruments has the capability to provide an added dimension to peptide analysis pipelines in proteomics, but, as yet, there are few software tools available for analysing such data. IM can be used to provide additional separation of parent ions or product ions following fragmentation. In this work, we have created a set of software tools that are capable of converting three dimensional IM data generated from analysis of fragment ions into a variety of formats used in proteomics. We demonstrate that IM can be used to calculate the charge state of a fragment ion, demonstrating the potential to improve peptide identification by excluding non-informative ions from a database search. We also provide preliminary evidence of structural differences between b and y ions for certain peptide sequences but not others. All software tools and data sets are made available in the public domain at http://code.google.com/p/ion-mobility-ms-tools/.",2012-06-01 +26292701,Tobacco companies' efforts to undermine ingredient disclosure: the Massachusetts benchmark study.,"

Objectives

To assess the 'Massachusetts Benchmark Study' (MBS) that the tobacco companies presented to the Massachusetts Department of Public Health (MDPH) in 1999 in response to ingredient disclosure regulations in the state. This case study can inform future ingredient disclosure regulations, including implementation of Articles 9 and 10 of the WHO Framework Convention on Tobacco Control (FCTC).

Methods

We analysed documents available at http://legacy.library.ucsf.edu to identify internal communications regarding the design and execution of the MBS and internal studies on the relationship between tar, nicotine and carbon monoxide and smoke constituents and reviewed publications that further evaluated data published as part of the MBS.

Results

The companies conducted extensive studies of cigarette design factors and ingredients that significantly impacted the levels of constituents. While this study asserted that by-brand emissions could be estimated reliably from published tar, nicotine, and carbon monoxide levels, the tobacco companies were well aware that factors beyond tar, nicotine and carbon monoxide influenced levels of constituents included in the study. This severely limited the potential usefulness of the MBS predictor equations.

Conclusions

Despite promises to provide data that would allow regulators to predict constituent data for all brands on the market, the final MBS results offered no useful predictive information to inform regulators, the scientific community or consumers. When implementing FCTC Articles 9 and 10, regulatory agencies should demand detailed by-brand information on tobacco product constituents and toxin deliveries to users.",2015-08-20 +22135302,DiseaseMeth: a human disease methylation database.,"DNA methylation is an important epigenetic modification for genomic regulation in higher organisms that plays a crucial role in the initiation and progression of diseases. The integration and mining of DNA methylation data by methylation-specific PCR and genome-wide profiling technology could greatly assist the discovery of novel candidate disease biomarkers. However, this is difficult without a comprehensive DNA methylation repository of human diseases. Therefore, we have developed DiseaseMeth, a human disease methylation database (http://bioinfo.hrbmu.edu.cn/diseasemeth). Its focus is the efficient storage and statistical analysis of DNA methylation data sets from various diseases. Experimental information from over 14,000 entries and 175 high-throughput data sets from a wide number of sources have been collected and incorporated into DiseaseMeth. The latest release incorporates the gene-centric methylation data of 72 human diseases from a variety of technologies and platforms. To facilitate data extraction, DiseaseMeth supports multiple search options such as gene ID and disease name. DiseaseMeth provides integrated gene methylation data based on cross-data set analysis for disease and normal samples. These can be used for in-depth identification of differentially methylated genes and the investigation of gene-disease relationship.",2011-12-01 +21252074,PhyloPro: a web-based tool for the generation and visualization of phylogenetic profiles across Eukarya.,"

Summary

With increasing numbers of eukaryotic genome sequences, phylogenetic profiles of eukaryotic genes are becoming increasingly informative. Here, we introduce a new web-tool Phylopro (http://compsysbio.org/phylopro/), which uses the 120 available eukaryotic genome sequences to visualize the evolutionary trajectories of user-defined subsets of model organism genes. Applied to pathways or complexes, PhyloPro allows the user to rapidly identify core conserved elements of biological processes together with those that may represent lineage-specific innovations. PhyloPro thus provides a valuable resource for the evolutionary and comparative studies of biological systems.",2011-01-19 +27224847,The INTEGRATE project: Delivering solutions for efficient multi-centric clinical research and trials.,"The objective of the INTEGRATE project (http://www.fp7-integrate.eu/) that has recently concluded successfully was the development of innovative biomedical applications focused on streamlining the execution of clinical research, on enabling multidisciplinary collaboration, on management and large-scale sharing of multi-level heterogeneous datasets, and on the development of new methodologies and of predictive multi-scale models in cancer. In this paper, we present the way the INTEGRATE consortium has approached important challenges such as the integration of multi-scale biomedical data in the context of post-genomic clinical trials, the development of predictive models and the implementation of tools to facilitate the efficient execution of postgenomic multi-centric clinical trials in breast cancer. Furthermore, we provide a number of key ""lessons learned"" during the process and give directions for further future research and development.",2016-05-17 +28286068,Efficacy of bacillus Calmette-Guérin Strains for Treatment of Nonmuscle Invasive Bladder Cancer: A Systematic Review and Network Meta-Analysis.,"

Purpose

We sought to determine the efficacy of genetically distinct bacillus Calmette-Guérin strains in preventing disease recurrence in patients with nonmuscle invasive bladder cancer.

Materials and methods

We conducted a systematic review and network meta-analysis of trials evaluating bacillus Calmette-Guérin strains against all possible comparators (different bacillus Calmette-Guérin strains, chemotherapy and nonbacillus Calmette-Guérin biological therapies) with intravesical chemotherapy as the common comparator. MEDLINE® (http://www.ncbi.nlm.nih.gov/pubmed) served as the primary data source, with the search from inception to October 2016 for clinical trials involving patients with nonmuscle invasive bladder cancer receiving bacillus Calmette-Guérin. Primary outcome measure was bladder cancer recurrence, defined as recurrent bladder tumor of any grade or stage. Random effect network meta-analysis provided estimates for outcomes and is presented as odds ratios.

Results

Across all possible comparators (65 trials, 12,246 patients, 9 strains) there were 2,177 recurrences in 5,642 treated patients (38.6%) and 2,316 recurrences in 5,441 comparators (42.6%). With chemotherapy as the common comparator (28 trials, 5,757 patients, 5 strains) Tokyo-172 (OR 0.39, 95% CI 0.16-0.93), Pasteur (OR 0.49, 95% CI 0.28-0.86) and TICE® (OR 0.61, 95% CI 0.40-0.93) strains were significantly better than chemotherapy at preventing recurrence. No bacillus Calmette-Guérin strain demonstrated significant superiority when compared to any other strain at preventing recurrence in the network meta-analysis.

Conclusions

Bacillus Calmette-Guérin strains exhibited significant differences in efficacy compared to chemotherapy. However, no definitive conclusions could be reached regarding strain superiority, and head-to-head trials are greatly needed to further understand the importance of strain selection in determining bacillus Calmette-Guérin efficacy.",2017-03-10 +26353838,SHAPE directed RNA folding.,"

Summary

Chemical mapping experiments allow for nucleotide resolution assessment of RNA structure. We demonstrate that different strategies of integrating probing data with thermodynamics-based RNA secondary structure prediction algorithms can be implemented by means of soft constraints. This amounts to incorporating suitable pseudo-energies into the standard energy model for RNA secondary structures. As a showcase application for this new feature of the ViennaRNA Package we compare three distinct, previously published strategies to utilize SHAPE reactivities for structure prediction. The new tool is benchmarked on a set of RNAs with known reference structure.

Availability and implementation

The capability for SHAPE directed RNA folding is part of the upcoming release of the ViennaRNA Package 2.2, for which a preliminary release is already freely available at http://www.tbi.univie.ac.at/RNA.

Contact

michael.wolfinger@univie.ac.at

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-09 +24340041,POGs2: a web portal to facilitate cross-species inferences about protein architecture and function in plants.,"The Putative orthologous Groups 2 Database (POGs2) (http://pogs.uoregon.edu/) integrates information about the inferred proteomes of four plant species (Arabidopsis thaliana, Zea mays, Orza sativa, and Populus trichocarpa) in a display that facilitates comparisons among orthologs and extrapolation of annotations among species. A single-page view collates key functional data for members of each Putative Orthologous Group (POG): graphical representations of InterPro domains, predicted and established intracellular locations, and imported gene descriptions. The display incorporates POGs predicted by two different algorithms as well as gene trees, allowing users to evaluate the validity of POG memberships. The web interface provides ready access to sequences and alignments of POG members, as well as sequences, alignments, and domain architectures of closely-related paralogs. A simple and flexible search interface permits queries by BLAST and by any combination of gene identifier, keywords, domain names, InterPro identifiers, and intracellular location. The concurrent display of domain architectures for orthologous proteins highlights errors in gene models and false-negatives in domain predictions. The POGs2 layout is also useful for exploring candidate genes identified by transposon tagging, QTL mapping, map-based cloning, and proteomics, and for navigating between orthologous groups that belong to the same gene family.",2013-12-10 +26612788,Meta-analysis in Stata using gllamm.,"There are several user-written programs for performing meta-analysis in Stata (Stata Statistical Software: College Station, TX: Stata Corp LP). These include metan, metareg, mvmeta, and glst. However, there are several cases for which these programs do not suffice. For instance, there is no software for performing univariate meta-analysis with correlated estimates, for multilevel or hierarchical meta-analysis, or for meta-analysis of longitudinal data. In this work, we show with practical applications that many disparate models, including but not limited to the ones mentioned earlier, can be fitted using gllamm. The software is very versatile and can handle a wide variety of models with applications in a wide range of disciplines. The method presented here takes advantage of these modeling capabilities and makes use of appropriate transformations, based on the Cholesky decomposition of the inverse of the covariance matrix, known as generalized least squares, in order to handle correlated data. The models described earlier can be thought of as special instances of a general linear mixed-model formulation, but to the author's knowledge, a general exposition in order to incorporate all the available models for meta-analysis as special cases and the instructions to fit them in Stata has not been presented so far. Source code is available at http:www.compgen.org/tools/gllamm.",2015-11-27 +26178880,Joint haplotype assembly and genotype calling via sequential Monte Carlo algorithm.,"

Background

Genetic variations predispose individuals to hereditary diseases, play important role in the development of complex diseases, and impact drug metabolism. The full information about the DNA variations in the genome of an individual is given by haplotypes, the ordered lists of single nucleotide polymorphisms (SNPs) located on chromosomes. Affordable high-throughput DNA sequencing technologies enable routine acquisition of data needed for the assembly of single individual haplotypes. However, state-of-the-art high-throughput sequencing platforms generate data that is erroneous, which induces uncertainty in the SNP and genotype calling procedures and, ultimately, adversely affect the accuracy of haplotyping. When inferring haplotype phase information, the vast majority of the existing techniques for haplotype assembly assume that the genotype information is correct. This motivates the development of methods capable of joint genotype calling and haplotype assembly.

Results

We present a haplotype assembly algorithm, ParticleHap, that relies on a probabilistic description of the sequencing data to jointly infer genotypes and assemble the most likely haplotypes. Our method employs a deterministic sequential Monte Carlo algorithm that associates single nucleotide polymorphisms with haplotypes by exhaustively exploring all possible extensions of the partial haplotypes. The algorithm relies on genotype likelihoods rather than on often erroneously called genotypes, thus ensuring a more accurate assembly of the haplotypes. Results on both the 1000 Genomes Project experimental data as well as simulation studies demonstrate that the proposed approach enables highly accurate solutions to the haplotype assembly problem while being computationally efficient and scalable, generally outperforming existing methods in terms of both accuracy and speed.

Conclusions

The developed probabilistic framework and sequential Monte Carlo algorithm enable joint haplotype assembly and genotyping in a computationally efficient manner. Our results demonstrate fast and highly accurate haplotype assembly aided by the re-examination of erroneously called genotypes. A C code implementation of ParticleHap will be available for download from https://sites.google.com/site/asynoeun/particlehap.",2015-07-16 +25144213,Transcatheter aortic valve replacement: establishing a comprehensive program model for hybrid cardiac catheterization laboratories in the Department of Veterans Affairs.,"Aortic valve disease, especially aortic stenosis, becomes progressively debilitating and carries a high mortality risk if it is categorized as severe and symptomatic (J Thorac Cardiovas Surg. 2012;144(3):e29-e84). In the past, the only treatment for aortic stenosis was surgical aortic valve replacement. Surgical treatment may require several hours of cardioplegia, and if the patient has comorbidities, such as renal failure or chronic obstructive pulmonary disease, their operative mortality percentage increases.In 2011, the US Food and Drug Administration approved the use of a transcatheter aortic valve replacement (TAVR) procedure for patients who were deemed high risk or inoperative for the routine surgical aortic valve replacement surgery. More than 20, 000 TAVRs have been performed in patients worldwide since 2002 when Dr Alain Cribier performed the first-in-man TAVR (Arch Cardiovasc Dis. 2012;105(3):145-152). The Edwards Lifesciences SAPIEN XT valve and the Medtronic CoreValve are commercially available.The clinical findings and economic statistic have supported the expansion of the TAVR procedure. However, there has been considerable controversy over where the procedure is to occur and who is directly responsible for directing the TAVR care. This debate has identified barriers to the implementation of a TAVR program. The operating rooms and a cardiac catheterization laboratory are underprepared for the hybrid valve replacement therapy. Because of the barriers identified, the Department of Veterans Affairs determined a need for a systematic approach to review the programs that applied for this structural heart disease program. A centralized team was developed to ensure room readiness and staff competency. The use of the Health Failure Mode and Effects Analysis can define high-risk clinical processes and conduct a hazard analysis. Worksheets can show potential failure modes and their probabilities, along with actions and outcome measures, team collaboration, extensive screening, and selection process. The TAVR program begins implementation with data entry with each case into CART-CL (Cardiovascular Assessment, Reporting and Tracking System for Cath Labs, Veteran Administration database for interventional cardiology procedures). If an untoward event occurs, within 24 hours the CART-CL Quality Assessment Team is activated to begin the review process. This provides real-time review and feedback to the local facility in an expeditious manner. Cardiac catheterization laboratories have been inundated with rapidly changing technological advances in the past decade. The era for structural heart repair is rapidly mobilizing from a surgical/operating room setting to a transcatheter/hybrid catheterization laboratory suite. The use of the new hybrid catheterization laboratories will continue to expand as the approval of future transcatheter therapies evolve. Editor's note: Due to the volume of important information presented in each table, only the first table is included in the print version of the article, however, all tables may be viewed in their entirety free of charge on the online version of this article: http://journals.lww.com/dccnjournal/pages/default.aspx.",2014-09-01 +22827703,Short read sequence typing (SRST): multi-locus sequence types from short reads.,"

Background

Multi-locus sequence typing (MLST) has become the gold standard for population analyses of bacterial pathogens. This method focuses on the sequences of a small number of loci (usually seven) to divide the population and is simple, robust and facilitates comparison of results between laboratories and over time. Over the last decade, researchers and population health specialists have invested substantial effort in building up public MLST databases for nearly 100 different bacterial species, and these databases contain a wealth of important information linked to MLST sequence types such as time and place of isolation, host or niche, serotype and even clinical or drug resistance profiles. Recent advances in sequencing technology mean it is increasingly feasible to perform bacterial population analysis at the whole genome level. This offers massive gains in resolving power and genetic profiling compared to MLST, and will eventually replace MLST for bacterial typing and population analysis. However given the wealth of data currently available in MLST databases, it is crucial to maintain backwards compatibility with MLST schemes so that new genome analyses can be understood in their proper historical context.

Results

We present a software tool, SRST, for quick and accurate retrieval of sequence types from short read sets, using inputs easily downloaded from public databases. SRST uses read mapping and an allele assignment score incorporating sequence coverage and variability, to determine the most likely allele at each MLST locus. Analysis of over 3,500 loci in more than 500 publicly accessible Illumina read sets showed SRST to be highly accurate at allele assignment. SRST output is compatible with common analysis tools such as eBURST, Clonal Frame or PhyloViz, allowing easy comparison between novel genome data and MLST data. Alignment, fastq and pileup files can also be generated for novel alleles.

Conclusions

SRST is a novel software tool for accurate assignment of sequence types using short read data. Several uses for the tool are demonstrated, including quality control for high-throughput sequencing projects, plasmid MLST and analysis of genomic data during outbreak investigation. SRST is open-source, requires Python, BWA and SamTools, and is available from http://srst.sourceforge.net.",2012-07-24 +26944514,"Wickerhamomyces spegazzinii sp. nov., an ascomycetous yeast isolated from the fungus garden of Acromyrmex lundii nest (Hymenoptera: Formicidae).","A novel ascomycetous yeast species in the genus Wickerhamomyces was isolated from the fungus garden of an attine ant nest, Acromyrmex lundii (Hymenoptera: Formicidae), from Santa Fe province, Argentina. Pairwise sequence alignment of D1/D2 sequences in the GenBank (http://www.ncbi.nlm.nih.gov) database revealed that the novel species is related most closely to Wickerhamomyces subpelliculosus, Wickerhamomyces linferdii, Wickerhamomyces anomalus, Wickerhamomyces siamensis and Wickerhamomycesciferrii with 96% similarity to the first four. The species name Wickerhamomyces spegazzinii sp. nov. is proposed to accommodate this novel strain, which differs from the above species in melibiose, 5-keto-D-gluconate, succinate, and DL-lactate assimilation among others. The type strain is JLU025T (=CBS 12756T=CBMAI 1619T).",2016-03-04 +23160412,Opportunities for text mining in the FlyBase genetic literature curation workflow.,"FlyBase is the model organism database for Drosophila genetic and genomic information. Over the last 20 years, FlyBase has had to adapt and change to keep abreast of advances in biology and database design. We are continually looking for ways to improve curation efficiency and efficacy. Genetic literature curation focuses on the extraction of genetic entities (e.g. genes, mutant alleles, transgenic constructs) and their associated phenotypes and Gene Ontology terms from the published literature. Over 2000 Drosophila research articles are now published every year. These articles are becoming ever more data-rich and there is a growing need for text mining to shoulder some of the burden of paper triage and data extraction. In this article, we describe our curation workflow, along with some of the problems and bottlenecks therein, and highlight the opportunities for text mining. We do so in the hope of encouraging the BioCreative community to help us to develop effective methods to mine this torrent of information. DATABASE URL: http://flybase.org",2012-11-17 +25941090,The Cambridge MRI database for animal models of Huntington disease.,We describe the Cambridge animal brain magnetic resonance imaging repository comprising 400 datasets to date from mouse models of Huntington disease. The data include raw images as well as segmented grey and white matter images with maps of cortical thickness. All images and phenotypic data for each subject are freely-available without restriction from (http://www.dspace.cam.ac.uk/handle/1810/243361/). Software and anatomical population templates optimised for animal brain analysis with MRI are also available from this site.,2015-05-02 +25717193,Normalization and noise reduction for single cell RNA-seq experiments.,

Unlabelled

A major roadblock towards accurate interpretation of single cell RNA-seq data is large technical noise resulted from small amount of input materials. The existing methods mainly aim to find differentially expressed genes rather than directly de-noise the single cell data. We present here a powerful but simple method to remove technical noise and explicitly compute the true gene expression levels based on spike-in ERCC molecules.

Availability and implementation

The software is implemented by R and the download version is available at http://wanglab.ucsd.edu/star/GRM.

Contact

wei-wang@ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.,2015-02-24 +26886731,GeneNetFinder2: Improved Inference of Dynamic Gene Regulatory Relations with Multiple Regulators.,"A gene involved in complex regulatory interactions may have multiple regulators since gene expression in such interactions is often controlled by more than one gene. Another thing that makes gene regulatory interactions complicated is that regulatory interactions are not static, but change over time during the cell cycle. Most research so far has focused on identifying gene regulatory relations between individual genes in a particular stage of the cell cycle. In this study we developed a method for identifying dynamic gene regulations of several types from the time-series gene expression data. The method can find gene regulations with multiple regulators that work in combination or individually as well as those with single regulators. The method has been implemented as the second version of GeneNetFinder (hereafter called GeneNetFinder2) and tested on several gene expression datasets. Experimental results with gene expression data revealed the existence of genes that are not regulated by individual genes but rather by a combination of several genes. Such gene regulatory relations cannot be found by conventional methods. Our method finds such regulatory relations as well as those with multiple, independent regulators or single regulators, and represents gene regulatory relations as a dynamic network in which different gene regulatory relations are shown in different stages of the cell cycle. GeneNetFinder2 is available at http://bclab.inha.ac.kr/GeneNetFinder and will be useful for modeling dynamic gene regulations with multiple regulators.",2016-01-01 +28725467,Pathways on demand: automated reconstruction of human signaling networks.,"Signaling pathways are a cornerstone of systems biology. Several databases store high-quality representations of these pathways that are amenable for automated analyses. Despite painstaking and manual curation, these databases remain incomplete. We present PATHLINKER, a new computational method to reconstruct the interactions in a signaling pathway of interest. PATHLINKER efficiently computes multiple short paths from the receptors to transcriptional regulators (TRs) in a pathway within a background protein interaction network. We use PATHLINKER to accurately reconstruct a comprehensive set of signaling pathways from the NetPath and KEGG databases. We show that PATHLINKER has higher precision and recall than several state-of-the-art algorithms, while also ensuring that the resulting network connects receptor proteins to TRs. PATHLINKER's reconstruction of the Wnt pathway identified CFTR, an ABC class chloride ion channel transporter, as a novel intermediary that facilitates the signaling of Ryk to Dab2, which are known components of Wnt/β-catenin signaling. In HEK293 cells, we show that the Ryk-CFTR-Dab2 path is a novel amplifier of β-catenin signaling specifically in response to Wnt 1, 2, 3, and 3a of the 11 Wnts tested. PATHLINKER captures the structure of signaling pathways as represented in pathway databases better than existing methods. PATHLINKER's success in reconstructing pathways from NetPath and KEGG databases point to its applicability for complementing manual curation of these databases. PATHLINKER may serve as a promising approach for prioritizing proteins and interactions for experimental study, as illustrated by its discovery of a novel pathway in Wnt/β-catenin signaling. Our supplementary website at http://bioinformatics.cs.vt.edu/~murali/supplements/2016-sys-bio-applications-pathlinker/ provides links to the PATHLINKER software, input datasets, PATHLINKER reconstructions of NetPath pathways, and links to interactive visualizations of these reconstructions on GraphSpace.",2016-03-03 +22102583,Mouse Phenome Database (MPD).,"The Mouse Phenome Project was launched a decade ago to complement mouse genome sequencing efforts by promoting new phenotyping initiatives under standardized conditions and collecting the data in a central public database, the Mouse Phenome Database (MPD; http://phenome.jax.org). MPD houses a wealth of strain characteristics data to facilitate the use of the laboratory mouse in translational research for human health and disease, helping alleviate problems involving experimentation in humans that cannot be done practically or ethically. Data sets are voluntarily contributed by researchers from a variety of institutions and settings, or in some cases, retrieved by MPD staff from public sources. MPD maintains a growing collection of standardized reference data that assists investigators in selecting mouse strains for research applications; houses treatment/control data for drug studies and other interventions; offers a standardized platform for discovering genotype-phenotype relationships; and provides tools for hypothesis testing. MPD improvements and updates since our last NAR report are presented, including the addition of new tools and features to facilitate navigation and data mining as well as the acquisition of new data (phenotypic, genotypic and gene expression).",2011-11-18 +22747692,A national study of chaplaincy services and end-of-life outcomes.,"

Background

Medicine has long acknowledged the role of chaplains in healthcare, but there is little research on the relationship between chaplaincy care and health outcomes. The present study examines the association between chaplaincy services and end-of-life care service choices.

Methods

HealthCare Chaplaincy purchased the AHA survey database from the American Hospital Association. The Dartmouth Atlas of Health Care database was provided to HealthCare Chaplaincy by The Dartmouth Institute for Health Policy & Clinical Practice, with the permission of Dartmouth Atlas Co-Principal Investigator Elliot S. Fisher, M.D., M.P.H. The Dartmouth Atlas of Health Care is available interactively on-line at http://www.dartmouthatlas.org/. Patient data are aggregated at the hospital level in the Dartmouth Atlas of Health Care. IRB approval was not sought for the project because the data are available to the public through one means or another, and neither database contains data about individual patients, i.e. all the variables are measures of hospital characteristics. We combined and analyzed data from the American Hospital Association's Annual Survey and outcome data from The Dartmouth Atlas of Health Care in a cross-sectional study of 3,585 hospitals. Two outcomes were examined: the percent of patients who (1) died in the hospital, and (2) were enrolled in hospice. Ordinary least squares regression was used to measure the association between the provision of chaplaincy services and each of the outcomes, controlling for six factors associated with hospital death rates.

Results and discussion

The analyses found significantly lower rates of hospital deaths (β = .04, p < .05) and higher rates of hospice enrollment (β = .06, p < .001) for patients cared for in hospitals that provided chaplaincy services compared to hospitals that did not.

Conclusions

The findings suggest that chaplaincy services may play a role in increasing hospice enrollment. This may be attributable to chaplains' assistance to patients and families in making decisions about care at the end-of-life, perhaps by aligning their values and wishes with actual treatment plans. Additional research is warranted.",2012-07-02 +26747636,Disentangling the influence of cell phone usage in the dilemma zone: An econometric approach.,"This paper focuses on developing an analysis framework to study the impact of cell phone treatment (cell phone type and call status) on driver behavior in the presence of a dilemma zone. Specifically, we examine how the treatment influences the driver maneuver decision at the intersection (stop or cross) and the eventual success of the maneuver. For a stop maneuver, success is defined as stopping before the stop line. Similarly, for a cross maneuver, success is defined as clearing the intersection safely before the light turns red. The eventual success or failure of the driver's decision process is dependent on the factors that affected the maneuver decision. Hence it is important to recognize the interconnectedness of the stop or cross decision with its eventual success (or failure). Toward this end, we formulate and estimate a joint framework to analyze the stop/cross decision with its eventual success (or failure) simultaneously. The study is conducted based on driving simulator data provided online for the 2014 Transportation Research Board Data Contest at http://depts.washington.edu/hfsm/upload.php. The model is estimated to analyze drivers' behavior at the onset of yellow by employing exogenous variables from three broad categories: driver characteristics, cell phone attributes and driving attributes. We also generate probability surfaces to identify dilemma zone distribution associated with different cell phone treatment types. The plots clearly illustrate the impact of various cellphone treatments on driver dilemma zone behavior.",2015-12-31 +23758607,Linking the potato genome to the conserved ortholog set (COS) markers.,"

Background

Conserved ortholog set (COS) markers are an important functional genomics resource that has greatly improved orthology detection in Asterid species. A comprehensive list of these markers is available at Sol Genomics Network (http://solgenomics.net/) and many of these have been placed on the genetic maps of a number of solanaceous species.

Results

We amplified over 300 COS markers from eight potato accessions involving two diploid landraces of Solanum tuberosum Andigenum group (formerly classified as S. goniocalyx, S. phureja), and a dihaploid clone derived from a modern tetraploid cultivar of S. tuberosum and the wild species S. berthaultii, S. chomatophilum, and S. paucissectum. By BLASTn (Basic Local Alignment Search Tool of the NCBI, National Center for Biotechnology Information) algorithm we mapped the DNA sequences of these markers into the potato genome sequence. Additionally, we mapped a subset of these markers genetically in potato and present a comparison between the physical and genetic locations of these markers in potato and in comparison with the genetic location in tomato. We found that most of the COS markers are single-copy in the reference genome of potato and that the genetic location in tomato and physical location in potato sequence are mostly in agreement. However, we did find some COS markers that are present in multiple copies and those that map in unexpected locations. Sequence comparisons between species show that some of these markers may be paralogs.

Conclusions

The sequence-based physical map becomes helpful in identification of markers for traits of interest thereby reducing the number of markers to be tested for applications like marker assisted selection, diversity, and phylogenetic studies.",2013-06-08 +22563069,A holistic in silico approach to predict functional sites in protein structures.,"

Motivation

Proteins execute and coordinate cellular functions by interacting with other biomolecules. Among these interactions, protein-protein (including peptide-mediated), protein-DNA and protein-RNA interactions cover a wide range of critical processes and cellular functions. The functional characterization of proteins requires the description and mapping of functional biomolecular interactions and the identification and characterization of functional sites is an important step towards this end.

Results

We have developed a novel computational method, Multi-VORFFIP (MV), a tool to predicts protein-, peptide-, DNA- and RNA-binding sites in proteins. MV utilizes a wide range of structural, evolutionary, experimental and energy-based information that is integrated into a common probabilistic framework by means of a Random Forest ensemble classifier. While remaining competitive when compared with current methods, MV is a centralized resource for the prediction of functional sites and is interfaced by a powerful web application tailored to facilitate the use of the method and analysis of predictions to non-expert end-users.

Availability

http://www.bioinsilico.org/MVORFFIP",2012-05-04 +25348213,DIANA--algorithmic improvements for analysis of data-independent acquisition MS data.,"

Motivation

Data independent acquisition mass spectrometry has emerged as a reproducible and sensitive alternative in quantitative proteomics, where parsing the highly complex tandem mass spectra requires dedicated algorithms. Recently, targeted data extraction was proposed as a novel analysis strategy for this type of data, but it is important to further develop these concepts to provide quality-controlled, interference-adjusted and sensitive peptide quantification.

Results

We here present the algorithm DIANA and the classifier PyProphet, which are based on new probabilistic sub-scores to classify the chromatographic peaks in targeted data-independent acquisition data analysis. The algorithm is capable of providing accurate quantitative values and increased recall at a controlled false discovery rate, in a complex gold standard dataset. Importantly, we further demonstrate increased confidence gained by the use of two complementary data-independent acquisition targeted analysis algorithms, as well as increased numbers of quantified peptide precursors in complex biological samples.

Availability and implementation

DIANA is implemented in scala and python and available as open source (Apache 2.0 license) or pre-compiled binaries from http://quantitativeproteomics.org/diana. PyProphet can be installed from PyPi (https://pypi.python.org/pypi/pyprophet).

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-27 +26984843,Controversial opinion: evaluation of EGR1 and LAMA2 loci for high myopia in Chinese populations.,"Functional studies have suggested the important role of early growth response 1 (EGR1) and Laminin α2-chain (LAMA2) in human eye development. Genetic studies have reported a significant association of the single nucleotide polymorphism (SNP) in the LAMA2 gene with myopia. This study aimed to evaluate the association of the tagging SNPs (tSNPs) in the EGR1 and LAMA2 genes with high myopia in two independent Han Chinese populations. Four tSNPs (rs11743810 in the EGR1 gene; rs2571575, rs9321170, and rs1889891 in the LAMA2 gene) were selected, according to the HapMap database (http://hapmap.ncbi.nlm.nih.gov), and were genotyped using the ligase detection reaction (LDR) approach for 167 Han Chinese nuclear families with extremely highly myopic offspring (<-10.0 diopters) and an independent group with 485 extremely highly myopic cases (<-10.0 diopters) and 499 controls. Direct sequencing was used to confirm the LDR results in twenty randomly selected subjects. Family-based association analysis was performed using the family-based association test (FBAT) software package (Version 1.5.5). Population-based association analysis was performed using the Chi-square test. The association analysis power was estimated using online software (http://design.cs.ucla.edu). The FBAT demonstrated that all four tSNPs tested did not show association with high myopia (P>0.05). Haplotype analysis of tSNPs in the LAMA2 genes also did not show a significant association (P>0.05). Meanwhile, population-based association analysis also showed no significant association results with high myopia (P>0.05). On the basis of our family- and population-based analyses for the Han Chinese population, we did not find positive association signals of the four SNPs in the LAMA2 and EGR1 genes with high myopia.",2016-03-01 +26093149,Chimira: analysis of small RNA sequencing data and microRNA modifications.,"

Unlabelled

Chimira is a web-based system for microRNA (miRNA) analysis from small RNA-Seq data. Sequences are automatically cleaned, trimmed, size selected and mapped directly to miRNA hairpin sequences. This generates count-based miRNA expression data for subsequent statistical analysis. Moreover, it is capable of identifying epi-transcriptomic modifications in the input sequences. Supported modification types include multiple types of 3'-modifications (e.g. uridylation, adenylation), 5'-modifications and also internal modifications or variation (ADAR editing or single nucleotide polymorphisms). Besides cleaning and mapping of input sequences to miRNAs, Chimira provides a simple and intuitive set of tools for the analysis and interpretation of the results (see also Supplementary Material). These allow the visual study of the differential expression between two specific samples or sets of samples, the identification of the most highly expressed miRNAs within sample pairs (or sets of samples) and also the projection of the modification profile for specific miRNAs across all samples. Other tools have already been published in the past for various types of small RNA-Seq analysis, such as UEA workbench, seqBuster, MAGI, OASIS and CAP-miRSeq, CPSS for modifications identification. A comprehensive comparison of Chimira with each of these tools is provided in the Supplementary Material. Chimira outperforms all of these tools in total execution speed and aims to facilitate simple, fast and reliable analysis of small RNA-Seq data allowing also, for the first time, identification of global microRNA modification profiles in a simple intuitive interface.

Availability and implementation

Chimira has been developed as a web application and it is accessible here: http://www.ebi.ac.uk/research/enright/software/chimira.

Contact

aje@ebi.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-20 +24304899,"SCOPe: Structural Classification of Proteins--extended, integrating SCOP and ASTRAL data and classification of new structures.","Structural Classification of Proteins-extended (SCOPe, http://scop.berkeley.edu) is a database of protein structural relationships that extends the SCOP database. SCOP is a manually curated ordering of domains from the majority of proteins of known structure in a hierarchy according to structural and evolutionary relationships. Development of the SCOP 1.x series concluded with SCOP 1.75. The ASTRAL compendium provides several databases and tools to aid in the analysis of the protein structures classified in SCOP, particularly through the use of their sequences. SCOPe extends version 1.75 of the SCOP database, using automated curation methods to classify many structures released since SCOP 1.75. We have rigorously benchmarked our automated methods to ensure that they are as accurate as manual curation, though there are many proteins to which our methods cannot be applied. SCOPe is also partially manually curated to correct some errors in SCOP. SCOPe aims to be backward compatible with SCOP, providing the same parseable files and a history of changes between all stable SCOP and SCOPe releases. SCOPe also incorporates and updates the ASTRAL database. The latest release of SCOPe, 2.03, contains 59 514 Protein Data Bank (PDB) entries, increasing the number of structures classified in SCOP by 55% and including more than 65% of the protein structures in the PDB.",2013-12-03 +25630311,A psycholinguistic database for traditional Chinese character naming.,"In this study, we aimed to provide a large-scale set of psycholinguistic norms for 3,314 traditional Chinese characters, along with their naming reaction times (RTs), collected from 140 Chinese speakers. The lexical and semantic variables in the database include frequency, regularity, familiarity, consistency, number of strokes, homophone density, semantic ambiguity rating, phonetic combinability, semantic combinability, and the number of disyllabic compound words formed by a character. Multiple regression analyses were conducted to examine the predictive powers of these variables for the naming RTs. The results demonstrated that these variables could account for a significant portion of variance (55.8%) in the naming RTs. An additional multiple regression analysis was conducted to demonstrate the effects of consistency and character frequency. Overall, the regression results were consistent with the findings of previous studies on Chinese character naming. This database should be useful for research into Chinese language processing, Chinese education, or cross-linguistic comparisons. The database can be accessed via an online inquiry system (http://ball.ling.sinica.edu.tw/namingdatabase/index.html).",2016-03-01 +23055621,SALMONELLABASE - An online database of druggable targets of Salmonella species.,"

Unlabelled

Salmonellosis is one of the most common and widely distributed food borne diseases caused by Salmonella serovars. The emergence of multi drug resistant strains has become a threatening public health problem and targeting unique effectors of this pathogen can be considered as a powerful strategy for drug design. SalmonellaBase is an online web portal serving as an integrated source of information about Salmonella serovars with the data required for the structural and functional studies and the analysis of druggable targets in Salmonella. We have identified several target proteins, which helps in the pathogenicity of the organism and predicted their structures. The database will have the information on completely sequenced genomes of Salmonella species with the complete set of protein sequences of the respective strains, determined structures, predicted protein structures and biochemical pathways of the respective strains. In addition, we have provided information about name and source of the protein, Uniprot and Protein Data Bank codes and literature information. Furthermore, SalmonellaBase is linked to related databases and other resources. We have set up a web interface with different search and display options so that users have the ability to get the data in several ways. SalmonellaBase is a freely available database.

Availability

http://www.salmonellabase.com/",2012-08-03 +27009818,[Comparison of Clinical Outcomes of VATS and SBRT in the Treatment of NSCLC].,"

Background and objective

More and more chest physicians chose video-assisted thoracoscopic surgery (VATS) to treat early stage non-small cell lung cancer (NSCLC). In recent years, there is still lack of a random trial comparing the clinical outcomes of VATS and stereotactic body radiotherapy (SBRT) in treating NSCLC. To provide a reference for the choice between VATS and SBRT, in the current meta-analysis, we compared the clinical outcomes of these two therapies in treating NSCLC.

Methods

Five major medical databases, CNKI, CPVIP (http://www.cqvip.com/), PubMed, Embase, and ISI web of science were systematically searched to identify all studies from January 2010 to February 2016 on VATS and SBRT therapies. Finally, original English or Chinese publications of stage I and II NSCLC with adequate patients and adequate SBRT doses were enrolled. A multivariate random effects model was used to perform a meta-analysis to compare overall survival and disease free survival between VATS and SBRT while adjusting for median age and operable patient numbers.

Results

Fourteen VATS studies (included 3,482 patients) and nineteen SBRT studies (included 3,997 patients) published in the same period were eligible. The median age and follow-up duration were 64 years and 43.4 months for VATS patients and 74 years and 29.5 months for SBRT patients, respectively. The mean unadjusted overall survival rates at 1, 2, 3, and 5 years with VATS were 93.5%, 84.9%, 77.0% and 76.3% compared to 89.0% 73.3% 59.0% and 36.7% with SBRT. The mean unadjusted disease free survival rates at 1, 2, 3, and 5 years with VATS were 93.6%, 88.6%, 85.6% and 75.6% compared to 79.3%, 72.1%, 64.9% and 58.9% with SBRT. While, after adjusted for proportion of operable patients and median age, the estimate overall survival rates at 1, 2, 3, and 5 years with VATS were 94%, 92%, 84% and 71% compared to 98%, 95%, 87% and 83% with SBRT. And the estimate disease free survival rates at 1, 2, 3, and 5 years with VATS were 97%, 94%, 85% and 75% compared to 88%, 81%, 74% and 63% with SBRT.

Conclusion

Before adjustment, the SBRT group showed worse clinical outcomes (overall survival and disease free survival) than VATS group. When take consider of median age and operability, the patients with SBRT differ substantially from patients treated with VATS. After adjustment of median age and operability, there are no significant differences between these two therapy in treating NSCLC.",2016-03-01 +24763918,PLIC: protein-ligand interaction clusters.,"Most of the biological processes are governed through specific protein-ligand interactions. Discerning different components that contribute toward a favorable protein- ligand interaction could contribute significantly toward better understanding protein function, rationalizing drug design and obtaining design principles for protein engineering. The Protein Data Bank (PDB) currently hosts the structure of ∼68 000 protein-ligand complexes. Although several databases exist that classify proteins according to sequence and structure, a mere handful of them annotate and classify protein-ligand interactions and provide information on different attributes of molecular recognition. In this study, an exhaustive comparison of all the biologically relevant ligand-binding sites (84 846 sites) has been conducted using PocketMatch: a rapid, parallel, in-house algorithm. PocketMatch quantifies the similarity between binding sites based on structural descriptors and residue attributes. A similarity network was constructed using binding sites whose PocketMatch scores exceeded a high similarity threshold (0.80). The binding site similarity network was clustered into discrete sets of similar sites using the Markov clustering (MCL) algorithm. Furthermore, various computational tools have been used to study different attributes of interactions within the individual clusters. The attributes can be roughly divided into (i) binding site characteristics including pocket shape, nature of residues and interaction profiles with different kinds of atomic probes, (ii) atomic contacts consisting of various types of polar, hydrophobic and aromatic contacts along with binding site water molecules that could play crucial roles in protein-ligand interactions and (iii) binding energetics involved in interactions derived from scoring functions developed for docking. For each ligand-binding site in each protein in the PDB, site similarity information, clusters they belong to and description of site attributes are provided as a relational database-protein-ligand interaction clusters (PLIC). Database URL: http://proline.biochem.iisc.ernet.in/PLIC.",2014-04-23 +25630312,LSE-Sign: A lexical database for Spanish Sign Language.,"The LSE-Sign database is a free online tool for selecting Spanish Sign Language stimulus materials to be used in experiments. It contains 2,400 individual signs taken from a recent standardized LSE dictionary, and a further 2,700 related nonsigns. Each entry is coded for a wide range of grammatical, phonological, and articulatory information, including handshape, location, movement, and non-manual elements. The database is accessible via a graphically based search facility which is highly flexible both in terms of the search options available and the way the results are displayed. LSE-Sign is available at the following website: http://www.bcbl.eu/databases/lse/.",2016-03-01 +26703974,MixChIP: a probabilistic method for cell type specific protein-DNA binding analysis.,"

Background

Transcription factors (TFs) are proteins that bind to DNA and regulate gene expression. To understand details of gene regulation, characterizing TF binding sites in different cell types, diseases and among individuals is essential. However, sometimes TF binding can only be measured from biological samples that contain multiple cell or tissue types. Sample heterogeneity can have a considerable effect on TF binding site detection. While manual separation techniques can be used to isolate a cell type of interest from heterogeneous samples, such techniques are challenging and can change intra-cellular interactions, including protein-DNA binding. Computational deconvolution methods have emerged as an alternative strategy to study heterogeneous samples and numerous methods have been proposed to analyze gene expression. However, no computational method exists to deconvolve cell type specific TF binding from heterogeneous samples.

Results

We present a probabilistic method, MixChIP, to identify cell type specific TF binding sites from heterogeneous chromatin immunoprecipitation sequencing (ChIP-seq) data. Our method simultaneously estimates the binding strength in different cell types as well as the proportions of different cell types in each sample when only partial prior information about cell type composition is available. We demonstrate the utility of MixChIP by analyzing ChIP-seq data from two cell lines which we artificially mix to generate (simulated) heterogeneous samples and by analyzing ChIP-seq data from breast cancer patients measuring oestrogen receptor (ER) binding in primary breast cancer tissues. We show that MixChIP is more accurate in detecting TF binding sites from multiple heterogeneous ChIP-seq samples than the standard methods which do not account for sample heterogeneity.

Conclusions

Our results show that MixChIP can estimate cell-type proportions and identify cell type specific TF binding sites from heterogeneous ChIP-seq samples. Thus, MixChIP can be an invaluable tool in analyzing heterogeneous ChIP-seq samples, such as those originating from cancer studies. R implementation is available at http://research.ics.aalto.fi/csb/software/mixchip/ .",2015-12-24 +24919658,Web services-based text-mining demonstrates broad impacts for interoperability and process simplification. ,"The Critical Assessment of Information Extraction systems in Biology (BioCreAtIvE) challenge evaluation tasks collectively represent a community-wide effort to evaluate a variety of text-mining and information extraction systems applied to the biological domain. The BioCreative IV Workshop included five independent subject areas, including Track 3, which focused on named-entity recognition (NER) for the Comparative Toxicogenomics Database (CTD; http://ctdbase.org). Previously, CTD had organized document ranking and NER-related tasks for the BioCreative Workshop 2012; a key finding of that effort was that interoperability and integration complexity were major impediments to the direct application of the systems to CTD's text-mining pipeline. This underscored a prevailing problem with software integration efforts. Major interoperability-related issues included lack of process modularity, operating system incompatibility, tool configuration complexity and lack of standardization of high-level inter-process communications. One approach to potentially mitigate interoperability and general integration issues is the use of Web services to abstract implementation details; rather than integrating NER tools directly, HTTP-based calls from CTD's asynchronous, batch-oriented text-mining pipeline could be made to remote NER Web services for recognition of specific biological terms using BioC (an emerging family of XML formats) for inter-process communications. To test this concept, participating groups developed Representational State Transfer /BioC-compliant Web services tailored to CTD's NER requirements. Participants were provided with a comprehensive set of training materials. CTD evaluated results obtained from the remote Web service-based URLs against a test data set of 510 manually curated scientific articles. Twelve groups participated in the challenge. Recall, precision, balanced F-scores and response times were calculated. Top balanced F-scores for gene, chemical and disease NER were 61, 74 and 51%, respectively. Response times ranged from fractions-of-a-second to over a minute per article. We present a description of the challenge and summary of results, demonstrating how curation groups can effectively use interoperable NER technologies to simplify text-mining pipeline implementation. Database URL: http://ctdbase.org/",2014-06-10 +24756107,"Large-scale determination of sequence, structure, and function relationships in cytosolic glutathione transferases across the biosphere.","The cytosolic glutathione transferase (cytGST) superfamily comprises more than 13,000 nonredundant sequences found throughout the biosphere. Their key roles in metabolism and defense against oxidative damage have led to thousands of studies over several decades. Despite this attention, little is known about the physiological reactions they catalyze and most of the substrates used to assay cytGSTs are synthetic compounds. A deeper understanding of relationships across the superfamily could provide new clues about their functions. To establish a foundation for expanded classification of cytGSTs, we generated similarity-based subgroupings for the entire superfamily. Using the resulting sequence similarity networks, we chose targets that broadly covered unknown functions and report here experimental results confirming GST-like activity for 82 of them, along with 37 new 3D structures determined for 27 targets. These new data, along with experimentally known GST reactions and structures reported in the literature, were painted onto the networks to generate a global view of their sequence-structure-function relationships. The results show how proteins of both known and unknown function relate to each other across the entire superfamily and reveal that the great majority of cytGSTs have not been experimentally characterized or annotated by canonical class. A mapping of taxonomic classes across the superfamily indicates that many taxa are represented in each subgroup and highlights challenges for classification of superfamily sequences into functionally relevant classes. Experimental determination of disulfide bond reductase activity in many diverse subgroups illustrate a theme common for many reaction types. Finally, sequence comparison between an enzyme that catalyzes a reductive dechlorination reaction relevant to bioremediation efforts with some of its closest homologs reveals differences among them likely to be associated with evolution of this unusual reaction. Interactive versions of the networks, associated with functional and other types of information, can be downloaded from the Structure-Function Linkage Database (SFLD; http://sfld.rbvi.ucsf.edu).",2014-04-22 +22135418,Metscape 2 bioinformatics tool for the analysis and visualization of metabolomics and gene expression data.,"

Motivation

Metabolomics is a rapidly evolving field that holds promise to provide insights into genotype-phenotype relationships in cancers, diabetes and other complex diseases. One of the major informatics challenges is providing tools that link metabolite data with other types of high-throughput molecular data (e.g. transcriptomics, proteomics), and incorporate prior knowledge of pathways and molecular interactions.

Results

We describe a new, substantially redesigned version of our tool Metscape that allows users to enter experimental data for metabolites, genes and pathways and display them in the context of relevant metabolic networks. Metscape 2 uses an internal relational database that integrates data from KEGG and EHMN databases. The new version of the tool allows users to identify enriched pathways from expression profiling data, build and analyze the networks of genes and metabolites, and visualize changes in the gene/metabolite data. We demonstrate the applications of Metscape to annotate molecular pathways for human and mouse metabolites implicated in the pathogenesis of sepsis-induced acute lung injury, for the analysis of gene expression and metabolite data from pancreatic ductal adenocarcinoma, and for identification of the candidate metabolites involved in cancer and inflammation.

Availability

Metscape is part of the National Institutes of Health-supported National Center for Integrative Biomedical Informatics (NCIBI) suite of tools, freely available at http://metscape.ncibi.org. It can be downloaded from http://cytoscape.org or installed via Cytoscape plugin manager.

Contact

metscape-help@umich.edu; akarnovs@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-30 +21357576,Automatically identifying and annotating mouse embryo gene expression patterns.,"

Motivation

Deciphering the regulatory and developmental mechanisms for multicellular organisms requires detailed knowledge of gene interactions and gene expressions. The availability of large datasets with both spatial and ontological annotation of the spatio-temporal patterns of gene expression in mouse embryo provides a powerful resource to discover the biological function of embryo organization. Ontological annotation of gene expressions consists of labelling images with terms from the anatomy ontology for mouse development. If the spatial genes of an anatomical component are expressed in an image, the image is then tagged with a term of that anatomical component. The current annotation is done manually by domain experts, which is both time consuming and costly. In addition, the level of detail is variable, and inevitably errors arise from the tedious nature of the task. In this article, we present a new method to automatically identify and annotate gene expression patterns in the mouse embryo with anatomical terms.

Results

The method takes images from in situ hybridization studies and the ontology for the developing mouse embryo, it then combines machine learning and image processing techniques to produce classifiers that automatically identify and annotate gene expression patterns in these images. We evaluate our method on image data from the EURExpress study, where we use it to automatically classify nine anatomical terms: humerus, handplate, fibula, tibia, femur, ribs, petrous part, scapula and head mesenchyme. The accuracy of our method lies between 70% and 80% with few exceptions. We show that other known methods have lower classification performance than ours. We have investigated the images misclassified by our method and found several cases where the original annotation was not correct. This shows our method is robust against this kind of noise.

Availability

The annotation result and the experimental dataset in the article can be freely accessed at http://www2.docm.mmu.ac.uk/STAFF/L.Han/geneannotation/.

Contact

l.han@mmu.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-25 +25361575,BiobankConnect: software to rapidly connect data elements for pooled analysis across biobanks using ontological and lexical indexing.,"

Objective

Pooling data across biobanks is necessary to increase statistical power, reveal more subtle associations, and synergize the value of data sources. However, searching for desired data elements among the thousands of available elements and harmonizing differences in terminology, data collection, and structure, is arduous and time consuming.

Materials and methods

To speed up biobank data pooling we developed BiobankConnect, a system to semi-automatically match desired data elements to available elements by: (1) annotating the desired elements with ontology terms using BioPortal; (2) automatically expanding the query for these elements with synonyms and subclass information using OntoCAT; (3) automatically searching available elements for these expanded terms using Lucene lexical matching; and (4) shortlisting relevant matches sorted by matching score.

Results

We evaluated BiobankConnect using human curated matches from EU-BioSHaRE, searching for 32 desired data elements in 7461 available elements from six biobanks. We found 0.75 precision at rank 1 and 0.74 recall at rank 10 compared to a manually curated set of relevant matches. In addition, best matches chosen by BioSHaRE experts ranked first in 63.0% and in the top 10 in 98.4% of cases, indicating that our system has the potential to significantly reduce manual matching work.

Conclusions

BiobankConnect provides an easy user interface to significantly speed up the biobank harmonization process. It may also prove useful for other forms of biomedical data integration. All the software can be downloaded as a MOLGENIS open source app from http://www.github.com/molgenis, with a demo available at http://www.biobankconnect.org.",2014-10-31 +26453823,Whole-exome sequencing enhances prognostic classification of myeloid malignancies.,"

Purpose

To date the standard nosology and prognostic schemes for myeloid neoplasms have been based on morphologic and cytogenetic criteria. We sought to test the hypothesis that a comprehensive, unbiased analysis of somatic mutations may allow for an improved classification of these diseases to predict outcome (overall survival).

Experimental design

We performed whole-exome sequencing (WES) of 274 myeloid neoplasms, including myelodysplastic syndrome (MDS, N=75), myelodysplastic/myeloproliferative neoplasia (MDS/MPN, N=33), and acute myeloid leukemia (AML, N=22), augmenting the resulting mutational data with public WES results from AML (N=144). We fit random survival forests (RSFs) to the patient survival and clinical/cytogenetic data, with and without gene mutation information, to build prognostic classifiers. A targeted sequencing assay was used to sequence predictor genes in an independent cohort of 507 patients, whose accompanying data were used to evaluate performance of the risk classifiers.

Results

We show that gene mutations modify the impact of standard clinical variables on patient outcome, and therefore their incorporation hones the accuracy of prediction. The mutation-based classification scheme robustly predicted patient outcome in the validation set (log rank P=6.77 × 10(-21); poor prognosis vs. good prognosis categories HR 10.4, 95% CI 3.21-33.6). The RSF-based approach also compares favorably with recently-published efforts to incorporate mutational information for MDS prognosis.

Conclusion

The results presented here support the inclusion of mutational information in prognostic classification of myeloid malignancies. Our classification scheme is implemented in a publicly available web-based tool (http://myeloid-risk.

Case

edu/).",2015-10-08 +24220091,RNA Bricks--a database of RNA 3D motifs and their interactions.,"The RNA Bricks database (http://iimcb.genesilico.pl/rnabricks), stores information about recurrent RNA 3D motifs and their interactions, found in experimentally determined RNA structures and in RNA-protein complexes. In contrast to other similar tools (RNA 3D Motif Atlas, RNA Frabase, Rloom) RNA motifs, i.e. 'RNA bricks' are presented in the molecular environment, in which they were determined, including RNA, protein, metal ions, water molecules and ligands. All nucleotide residues in RNA bricks are annotated with structural quality scores that describe real-space correlation coefficients with the electron density data (if available), backbone geometry and possible steric conflicts, which can be used to identify poorly modeled residues. The database is also equipped with an algorithm for 3D motif search and comparison. The algorithm compares spatial positions of backbone atoms of the user-provided query structure and of stored RNA motifs, without relying on sequence or secondary structure information. This enables the identification of local structural similarities among evolutionarily related and unrelated RNA molecules. Besides, the search utility enables searching 'RNA bricks' according to sequence similarity, and makes it possible to identify motifs with modified ribonucleotide residues at specific positions.",2013-11-12 +27134638,ExactSearch: a web-based plant motif search tool.,"

Background

Plant biologists frequently need to examine if a sequence motif bound by a specific transcription or translation factor is present in the proximal promoters or 3' untranslated regions (3' UTR) of a set of plant genes of interest. To achieve such a task, plant biologists have to not only identify an appropriate algorithm for motif searching, but also manipulate the large volume of sequence data, making it burdensome to carry out or fulfill.

Result

In this study, we developed a web portal that enables plant molecular biologists to search for DNA motifs especially degenerate ones in custom sequences or the flanking regions of all genes in the 50 plant species whose genomes have been sequenced. A web tool like this is demanded to meet a variety of needs of plant biologists for identifying the potential gene regulatory relationships. We implemented a suffix tree algorithm to accelerate the searching process of a group of motifs in a multitude of target genes. The motifs to be searched can be in the degenerate bases in addition to adenine (A), cytosine (C), guanine (G), and thymine (T). The target sequences to be searched can be custom sequences or the selected proximal gene sequences from any one of the 50 sequenced plant species. The web portal also contains the functionality to facilitate the search of motifs that are represented by position probability matrix in above-mentioned species. Currently, the algorithm can accomplish an exhaust search of 100 motifs in 35,000 target sequences of 2 kb long in 4.2 min. However, the runtime may change in the future depending on the space availability, number of running jobs, network traffic, data loading, and output packing and delivery through electronic mailing.

Conclusion

A web portal was developed to facilitate searching of motifs presents in custom sequences or the proximal promoters or 3' UTR of 50 plant species with the sequenced genomes. This web tool is accessible by using this URL: http://sys.bio.mtu.edu/motif/index.php.",2016-04-28 +24525374,lncRNAMap: a map of putative regulatory functions in the long non-coding transcriptome.,"

Background

Recent studies have demonstrated the importance of long non-coding RNAs (lncRNAs) in chromatin remodeling, and in transcriptional and post-transcriptional regulation. However, only a few specific lncRNAs are well understood, whereas others are completely uncharacterized. To address this, there is a need for user-friendly platform to studying the putative regulatory functions of human lncRNAs.

Description

lncRNAMap is an integrated and comprehensive database relating to exploration of the putative regulatory functions of human lncRNAs with two mechanisms of regulation, by encoding siRNAs and by acting as miRNA decoys. To investigate lncRNAs producing siRNAs that regulate protein-coding genes, lncRNAMap integrated small RNAs (sRNAs) that were supported by publicly available deep sequencing data from various sRNA libraries and constructed lncRNA-derived siRNA-target interactions. In addition, lncRNAMap demonstrated that lncRNAs can act as targets for miRNAs that would otherwise regulate protein-coding genes. Previously studies indicated that intergenic lncRNAs (lincRNAs) either positive or negative regulated neighboring genes, therefore, lncRNAMap surveyed neighboring genes within a 1Mb distance from the genomic location of specific lncRNAs and provided the expression profiles of lncRNA and its neighboring genes. The gene expression profiles may supply the relationship between lncRNA and its neighboring genes.

Conclusions

lncRNAMap is a powerful user-friendly platform for the investigation of putative regulatory functions of human lncRNAs with producing siRNAs and acting as miRNA decoy. lncRNAMap is freely available on the web at http://lncRNAMap.mbc.nctu.edu.tw/.",2014-01-23 +22139937,NRG-CING: integrated validation reports of remediated experimental biomolecular NMR data and coordinates in wwPDB.,"For many macromolecular NMR ensembles from the Protein Data Bank (PDB) the experiment-based restraint lists are available, while other experimental data, mainly chemical shift values, are often available from the BioMagResBank. The accuracy and precision of the coordinates in these macromolecular NMR ensembles can be improved by recalculation using the available experimental data and present-day software. Such efforts, however, generally fail on half of all NMR ensembles due to the syntactic and semantic heterogeneity of the underlying data and the wide variety of formats used for their deposition. We have combined the remediated restraint information from our NMR Restraints Grid (NRG) database with available chemical shifts from the BioMagResBank and the Common Interface for NMR structure Generation (CING) structure validation reports into the weekly updated NRG-CING database (http://nmr.cmbi.ru.nl/NRG-CING). Eleven programs have been included in the NRG-CING production pipeline to arrive at validation reports that list for each entry the potential inconsistencies between the coordinates and the available experimental NMR data. The longitudinal validation of these data in a publicly available relational database yields a set of indicators that can be used to judge the quality of every macromolecular structure solved with NMR. The remediated NMR experimental data sets and validation reports are freely available online.",2011-12-01 +26177966,A Bayesian approach for structure learning in oscillating regulatory networks.,"

Motivation

Oscillations lie at the core of many biological processes, from the cell cycle, to circadian oscillations and developmental processes. Time-keeping mechanisms are essential to enable organisms to adapt to varying conditions in environmental cycles, from day/night to seasonal. Transcriptional regulatory networks are one of the mechanisms behind these biological oscillations. However, while identifying cyclically expressed genes from time series measurements is relatively easy, determining the structure of the interaction network underpinning the oscillation is a far more challenging problem.

Results

Here, we explicitly leverage the oscillatory nature of the transcriptional signals and present a method for reconstructing network interactions tailored to this special but important class of genetic circuits. Our method is based on projecting the signal onto a set of oscillatory basis functions using a Discrete Fourier Transform. We build a Bayesian Hierarchical model within a frequency domain linear model in order to enforce sparsity and incorporate prior knowledge about the network structure. Experiments on real and simulated data show that the method can lead to substantial improvements over competing approaches if the oscillatory assumption is met, and remains competitive also in cases it is not.

Availability

DSS, experiment scripts and data are available at http://homepages.inf.ed.ac.uk/gsanguin/DSS.zip.

Contact

d.trejo-banos@sms.ed.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-14 +26920945,Unraveling the pectinolytic function of Bacteroides xylanisolvens using a RNA-seq approach and mutagenesis.,"

Background

Diet and particularly dietary fibres have an impact on the gut microbiome and play an important role in human health and disease. Pectin is a highly consumed dietary fibre found in fruits and vegetables and is also a widely used additive in the food industry. Yet there is no information on the effect of pectin on the human gut microbiome. Likewise, little is known on gut pectinolytic bacteria and their enzyme systems. This study was undertaken to investigate the mechanisms of pectin degradation by the prominent human gut symbiont Bacteroides xylanisolvens.

Results

Transcriptomic analyses of B. xylanisolvens XB1A grown on citrus and apple pectins at mid- and late-log phases highlighted six polysaccharide utilization loci (PUL) that were overexpressed on pectin relative to glucose. The PUL numbers used in this report are those given by Terrapon et al. (Bioinformatics 31(5):647-55, 2015) and found in the PUL database: http://www.cazy.org/PULDB/. Based on their CAZyme composition, we propose that PUL 49 and 50, the most overexpressed PULs on both pectins and at both growth phases, are involved in homogalacturonan (HG) and type I rhamnogalacturonan (RGI) degradation, respectively. PUL 13 and PUL 2 could be involved in the degradation of arabinose-containing side chains and of type II rhamnogalacturonan (RGII), respectively. Considering that HG is the most abundant moiety (>70%) within pectin, the importance of PUL 49 was further investigated by insertion mutagenesis into the susC-like gene. The insertion blocked transcription of the susC-like and the two downstream genes (susD-like/FnIII). The mutant showed strong growth reduction, thus confirming that PUL 49 plays a major role in pectin degradation.

Conclusion

This study shows the existence of six PULs devoted to pectin degradation by B. xylanisolvens, one of them being particularly important in this function. Hence, this species deploys a very complex enzymatic machinery that probably reflects the structural complexity of pectin. Our findings also highlight the metabolic plasticity of B. xylanisolvens towards dietary fibres that contributes to its competitive fitness within the human gut ecosystem. Wider functional and ecological studies are needed to understand how dietary fibers and especially plant cell wall polysaccharides drive the composition and metabolism of the fibrolytic and non-fibrolytic community within the gut microbial ecosystem.",2016-02-27 +27923350,ConEVA: a toolbox for comprehensive assessment of protein contacts.,"

Background

In recent years, successful contact prediction methods and contact-guided ab initio protein structure prediction methods have highlighted the importance of incorporating contact information into protein structure prediction methods. It is also observed that for almost all globular proteins, the quality of contact prediction dictates the accuracy of structure prediction. Hence, like many existing evaluation measures for evaluating 3D protein models, various measures are currently used to evaluate predicted contacts, with the most popular ones being precision, coverage and distance distribution score (Xd).

Results

We have built a web application and a downloadable tool, ConEVA, for comprehensive assessment and detailed comparison of predicted contacts. Besides implementing existing measures for contact evaluation we have implemented new and useful methods of contact visualization using chord diagrams and comparison using Jaccard similarity computations. For a set (or sets) of predicted contacts, the web application runs even when a native structure is not available, visualizing the contact coverage and similarity between predicted contacts. We applied the tool on various contact prediction data sets and present our findings and insights we obtained from the evaluation of effective contact assessments. ConEVA is publicly available at http://cactus.rnet.missouri.edu/coneva/ .

Conclusion

ConEVA is useful for a range of contact related analysis and evaluations including predicted contact comparison, investigation of individual protein folding using predicted contacts, and analysis of contacts in a structure of interest.",2016-12-07 +25581801,Targeted exploration and analysis of large cross-platform human transcriptomic compendia.,"We present SEEK (search-based exploration of expression compendia; http://seek.princeton.edu/), a query-based search engine for very large transcriptomic data collections, including thousands of human data sets from many different microarray and high-throughput sequencing platforms. SEEK uses a query-level cross-validation-based algorithm to automatically prioritize data sets relevant to the query and a robust search approach to identify genes, pathways and processes co-regulated with the query. SEEK provides multigene query searching with iterative metadata-based search refinement and extensive visualization-based analysis options.",2015-01-12 +26878124,A multilevel ant colony optimization algorithm for classical and isothermic DNA sequencing by hybridization with multiplicity information available.,"The classical sequencing by hybridization takes into account a binary information about sequence composition. A given element from an oligonucleotide library is or is not a part of the target sequence. However, the DNA chip technology has been developed and it enables to receive a partial information about multiplicity of each oligonucleotide the analyzed sequence consist of. Currently, it is not possible to assess the exact data of such type but even partial information should be very useful. Two realistic multiplicity information models are taken into consideration in this paper. The first one, called ""one and many"" assumes that it is possible to obtain information if a given oligonucleotide occurs in a reconstructed sequence once or more than once. According to the second model, called ""one, two and many"", one is able to receive from biochemical experiment information if a given oligonucleotide is present in an analyzed sequence once, twice or at least three times. An ant colony optimization algorithm has been implemented to verify the above models and to compare with existing algorithms for sequencing by hybridization which utilize the additional information. The proposed algorithm solves the problem with any kind of hybridization errors. Computational experiment results confirm that using even the partial information about multiplicity leads to increased quality of reconstructed sequences. Moreover, they also show that the more precise model enables to obtain better solutions and the ant colony optimization algorithm outperforms the existing ones. Test data sets and the proposed ant colony optimization algorithm are available on: http://bioserver.cs.put.poznan.pl/download/ACO4mSBH.zip.",2016-01-28 +25115331,KiMoSys: a web-based repository of experimental data for KInetic MOdels of biological SYStems.,"

Background

The kinetic modeling of biological systems is mainly composed of three steps that proceed iteratively: model building, simulation and analysis. In the first step, it is usually required to set initial metabolite concentrations, and to assign kinetic rate laws, along with estimating parameter values using kinetic data through optimization when these are not known. Although the rapid development of high-throughput methods has generated much omics data, experimentalists present only a summary of obtained results for publication, the experimental data files are not usually submitted to any public repository, or simply not available at all. In order to automatize as much as possible the steps of building kinetic models, there is a growing requirement in the systems biology community for easily exchanging data in combination with models, which represents the main motivation of KiMoSys development.

Description

KiMoSys is a user-friendly platform that includes a public data repository of published experimental data, containing concentration data of metabolites and enzymes and flux data. It was designed to ensure data management, storage and sharing for a wider systems biology community. This community repository offers a web-based interface and upload facility to turn available data into publicly accessible, centralized and structured-format data files. Moreover, it compiles and integrates available kinetic models associated with the data.KiMoSys also integrates some tools to facilitate the kinetic model construction process of large-scale metabolic networks, especially when the systems biologists perform computational research.

Conclusions

KiMoSys is a web-based system that integrates a public data and associated model(s) repository with computational tools, providing the systems biology community with a novel application facilitating data storage and sharing, thus supporting construction of ODE-based kinetic models and collaborative research projects.The web application implemented using Ruby on Rails framework is freely available for web access at http://kimosys.org, along with its full documentation.",2014-08-13 +25924931,plethy: management of whole body plethysmography data in R.,"

Background

Characterization of respiratory phenotypes can enhance complex trait and genomic studies involving allergic/autoimmune and infectious diseases. Many aspects of respiration can be measured using devices known as plethysmographs that can measure thoracic movement. One such approach (the Buxco platform) performs unrestrained whole body plethysmography on mice which infers thoracic movements from pressure differences from the act of inhalation and exhalation. While proprietary software is available to perform basic statistical analysis as part of machine's bundled software, it is desirable to be able to incorporate these analyses into high-throughput pipelines and integrate them with other data types, as well as leverage the wealth of analytic and visualization approaches provided by the R statistical computing environment.

Results

This manuscript describes the plethy package which is an R/Bioconductor framework for pre-processing and analysis of plethysmography data with emphasis on larger scale longitudinal experiments. The plethy package was designed to facilitate quality control and exploratory data analysis. We provide a demonstration of the features of plethy using a dataset assessing the respiratory effects over time of SARS and Influenza infection in mice.

Conclusion

The plethy package provides functionality for users to import, perform quality assessment and exploratory data analysis in a manner that allows interoperability with existing modelling tools. Our package is implemented in R and is freely available as part of the Bioconductor project http://www.bioconductor.org/packages/release/bioc/html/plethy.html .",2015-04-29 +26921390,"ETE 3: Reconstruction, Analysis, and Visualization of Phylogenomic Data.","The Environment for Tree Exploration (ETE) is a computational framework that simplifies the reconstruction, analysis, and visualization of phylogenetic trees and multiple sequence alignments. Here, we present ETE v3, featuring numerous improvements in the underlying library of methods, and providing a novel set of standalone tools to perform common tasks in comparative genomics and phylogenetics. The new features include (i) building gene-based and supermatrix-based phylogenies using a single command, (ii) testing and visualizing evolutionary models, (iii) calculating distances between trees of different size or including duplications, and (iv) providing seamless integration with the NCBI taxonomy database. ETE is freely available at http://etetoolkit.org.",2016-02-26 +25897133,Babelomics 5.0: functional interpretation for new generations of genomic data.,"Babelomics has been running for more than one decade offering a user-friendly interface for the functional analysis of gene expression and genomic data. Here we present its fifth release, which includes support for Next Generation Sequencing data including gene expression (RNA-seq), exome or genome resequencing. Babelomics has simplified its interface, being now more intuitive. Improved visualization options, such as a genome viewer as well as an interactive network viewer, have been implemented. New technical enhancements at both, client and server sides, makes the user experience faster and more dynamic. Babelomics offers user-friendly access to a full range of methods that cover: (i) primary data analysis, (ii) a variety of tests for different experimental designs and (iii) different enrichment and network analysis algorithms for the interpretation of the results of such tests in the proper functional context. In addition to the public server, local copies of Babelomics can be downloaded and installed. Babelomics is freely available at: http://www.babelomics.org.",2015-04-20 +25708300,Discovering monotonic stemness marker genes from time-series stem cell microarray data.,"

Background

Identification of genes with ascending or descending monotonic expression patterns over time or stages of stem cells is an important issue in time-series microarray data analysis. We propose a method named Monotonic Feature Selector (MFSelector) based on a concept of total discriminating error (DEtotal) to identify monotonic genes. MFSelector considers various time stages in stage order (i.e., Stage One vs. other stages, Stages One and Two vs. remaining stages and so on) and computes DEtotal of each gene. MFSelector can successfully identify genes with monotonic characteristics.

Results

We have demonstrated the effectiveness of MFSelector on two synthetic data sets and two stem cell differentiation data sets: embryonic stem cell neurogenesis (ESCN) and embryonic stem cell vasculogenesis (ESCV) data sets. We have also performed extensive quantitative comparisons of the three monotonic gene selection approaches. Some of the monotonic marker genes such as OCT4, NANOG, BLBP, discovered from the ESCN dataset exhibit consistent behavior with that reported in other studies. The role of monotonic genes found by MFSelector in either stemness or differentiation is validated using information obtained from Gene Ontology analysis and other literature. We justify and demonstrate that descending genes are involved in the proliferation or self-renewal activity of stem cells, while ascending genes are involved in differentiation of stem cells into variant cell lineages.

Conclusions

We have developed a novel system, easy to use even with no pre-existing knowledge, to identify gene sets with monotonic expression patterns in multi-stage as well as in time-series genomics matrices. The case studies on ESCN and ESCV have helped to get a better understanding of stemness and differentiation. The novel monotonic marker genes discovered from a data set are found to exhibit consistent behavior in another independent data set, demonstrating the utility of the proposed method. The MFSelector R function and data sets can be downloaded from: http://microarray.ym.edu.tw/tools/MFSelector/.",2015-01-21 +22494395,InterStoreDB: a generic integration resource for genetic and genomic data.,"Associating phenotypic traits and quantitative trait loci (QTL) to causative regions of the underlying genome is a key goal in agricultural research. InterStoreDB is a suite of integrated databases designed to assist in this process. The individual databases are species independent and generic in design, providing access to curated datasets relating to plant populations, phenotypic traits, genetic maps, marker loci and QTL, with links to functional gene annotation and genomic sequence data. Each component database provides access to associated metadata, including data provenance and parameters used in analyses, thus providing users with information to evaluate the relative worth of any associations identified. The databases include CropStoreDB, for management of population, genetic map, QTL and trait measurement data, SeqStoreDB for sequence-related data and AlignStoreDB, which stores sequence alignment information, and allows navigation between genetic and genomic datasets. Genetic maps are visualized and compared using the CMAP tool, and functional annotation from sequenced genomes is provided via an EnsEMBL-based genome browser. This framework facilitates navigation of the multiple biological domains involved in genetics and genomics research in a transparent manner within a single portal. We demonstrate the value of InterStoreDB as a tool for Brassica research. InterStoreDB is available from: http://www.interstoredb.org.",2012-05-01 +27570674,PALME: PAtients Like My gEnome.,"PAtients Like My gEnome (PALME) is a webservice that matches patients based on their genome and healthcare profiles. We support two types of inputs: (1) dual query (a variant + phenotype), and (2) genome sequences. For the first type of queries, we will show the patient profile matching the inputs. For the second type of queries, we will calculate similarity (based on Hamming distance) and show the distribution of phenotypes of similar patients given the input sequences of a target patient. Using the publicly available Personal Genome Project (PGP) dataset, we retrieved 4,360 patients' profiles along with their genome data, medical conditions, and treatments. We used a subset of these profiles to build PALME to be an interactive system to support healthcare profile matching. PALME is designed not only for biomedical researchers to support their studies on human genome but also for individuals to explore their own genetics and health. The webservice is accessible at (http://pgp.ucsd-dbmi.org:3838/GenAnaly/PatientGen/#) and the demo videos are available at (https://youtu.be/ycP0rXQizlc).",2016-07-20 +27318206,STRUM: structure-based prediction of protein stability changes upon single-point mutation.,"

Motivation

Mutations in human genome are mainly through single nucleotide polymorphism, some of which can affect stability and function of proteins, causing human diseases. Several methods have been proposed to predict the effect of mutations on protein stability; but most require features from experimental structure. Given the fast progress in protein structure prediction, this work explores the possibility to improve the mutation-induced stability change prediction using low-resolution structure modeling.

Results

We developed a new method (STRUM) for predicting stability change caused by single-point mutations. Starting from wild-type sequences, 3D models are constructed by the iterative threading assembly refinement (I-TASSER) simulations, where physics- and knowledge-based energy functions are derived on the I-TASSER models and used to train STRUM models through gradient boosting regression. STRUM was assessed by 5-fold cross validation on 3421 experimentally determined mutations from 150 proteins. The Pearson correlation coefficient (PCC) between predicted and measured changes of Gibbs free-energy gap, ΔΔG, upon mutation reaches 0.79 with a root-mean-square error 1.2 kcal/mol in the mutation-based cross-validations. The PCC reduces if separating training and test mutations from non-homologous proteins, which reflects inherent correlations in the current mutation sample. Nevertheless, the results significantly outperform other state-of-the-art methods, including those built on experimental protein structures. Detailed analyses show that the most sensitive features in STRUM are the physics-based energy terms on I-TASSER models and the conservation scores from multiple-threading template alignments. However, the ΔΔG prediction accuracy has only a marginal dependence on the accuracy of protein structure models as long as the global fold is correct. These data demonstrate the feasibility to use low-resolution structure modeling for high-accuracy stability change prediction upon point mutations.

Availability and implementation

http://zhanglab.ccmb.med.umich.edu/STRUM/ CONTACT: qiang@suda.edu.cn and zhng@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-17 +26114781,Xylitol and caries prevention.,"

Data sources

Cochrane Oral Health Group Trials Register, the Cochrane Central Register of Controlled Trials (CENTRAL), Medline, Embase, CINAHL, Web of Science Conference Proceedings, Proquest Dissertations and Theses, US National Institutes of Health Trials Register (http://clinicaltrials.gov) and the WHO Clinical Trials Registry Platform for ongoing trials. No language or year restrictions were used.

Study selection

Randomised controlled trials assessing the effects of xylitol products on dental caries in children and adults.

Data extraction and synthesis

Two review authors independently screened the results of the electronic searches, extracted data and assessed the risk of bias of the included studies. Authors were contacted where possible for missing data or clarification where feasible. For continuous outcomes, means and standard deviations were used to obtain the mean difference and 95% confidence interval (CI). Continuous data was used to calculate prevented fractions (PF) and 95% CIs to summarise the percentage reduction in caries. For dichotomous outcomes, reported risk ratios (RR) and 95% CIs were used. As there were fewer than four studies included in the meta-analysis, a fixed effect model was used.

Results

Ten studies were included with a total of 5903 participants. One study was assessed as being at low risk of bias, two were assessed as unclear risk of bias with seven at high risk of bias. Over 2.5–3 years, low quality evidence demonstrated that with 4216 children analysed, a fluoride toothpaste with 10% xylitol (exact dosage unsure) reduced caries by 13% when compared to a fluoride only toothpaste. (PF −0.13, 95% CI −0.18 to −0.08. Remaining evidence of the use of xylitol in children has risk of bias and uncertainty of effect and was therefore insufficient to determine a benefit from xylitol. Four studies reported that there were no adverse effects from any of the interventions. Two studies reported similar rates of adverse effects between study arms. The remaining studies either mentioned adverse effects but did not report any usable data, or did not mention them at all. Adverse effects include sores in the mouth, cramps, bloating, constipation, flatulence and loose stool or diarrhoea.

Conclusions

Low quality evidence suggested that fluoride toothpaste containing xylitol may be more effective than fluoride-only toothpaste for preventing caries in the permanent teeth of children. The effect estimate should be interpreted with caution due to high risk of bias and the fact that it was derived from two studies that were carried out by the same authors in the same population. The remaining evidence was low to very low quality and is insufficient to determine whether any other xylitol-containing products can prevent caries in infants, older children or adults.",2015-06-01 +28490512,Preconceptional and gestational weight trajectories and risk of delivering a small-for-gestational-age baby in rural Gambia.,"Background: Maternal nutritional status is a key determinant of small for gestational age (SGA), but some knowledge gaps remain, particularly regarding the role of the energy balance entering pregnancy.Objective: We investigated how preconceptional and gestational weight trajectories (summarized by individual-level traits) are associated with SGA risk in rural Gambia.Design: The sample comprised 670 women in a trial with serial weight data (7310 observations) that were available before and during pregnancy. Individual trajectories from 6 mo before conception to 30 wk of gestation were produced with the use of multilevel modeling. Summary traits were expressed as weight z scores [weight z score at 3 mo preconception (zwt-3 mo), weight z score at conception, weight z score at 3 mo postconception, weight z score at 7 mo postconception (zwt+7 mo), and conditional measures that represented the change from the preceding time] and were related to SGA risk with the use of Poisson regression with confounder adjustment; linear splines were used to account for nonlinearity.Results: Maternal weight at each time point had a consistent nonlinear relation with SGA risk. For example, the zwt-3 mo estimate was stronger in women with values ≤0.5 (RR: 0.736; 95% CI: 0.594, 0.910) than in women with values >0.5 (RR: 0.920; 95% CI: 0.682, 1.241). The former group had the highest observed SGA prevalence. Focusing on weight change, only conditional zwt+7 mo was associated with SGA and only in women with values >-0.5 (RR: 0.579; 95% CI: 0.463, 0.724).Conclusions: Protection against delivering an SGA neonate offered by greater preconceptional or gestational weight may be most pronounced in more undernourished and vulnerable women. Independent of this possibility, greater second- and third-trimester weight gain beyond a threshold may be protective. This trial was registered at http://www.isrctn.com/ as ISRCTN49285450.",2017-05-10 +27153670,WeSME: uncovering mutual exclusivity of cancer drivers and beyond.,"

Motivation

Mutual exclusivity is a widely recognized property of many cancer drivers. Knowledge about these relationships can provide important insights into cancer drivers, cancer-driving pathways and cancer subtypes. It can also be used to predict new functional interactions between cancer driving genes and uncover novel cancer drivers. Currently, most of mutual exclusivity analyses are preformed focusing on a limited set of genes in part due to the computational cost required to rigorously compute P -values.

Results

To reduce the computing cost and perform less restricted mutual exclusivity analysis, we developed an efficient method to estimate P -values while controlling the mutation rates of individual patients and genes similar to the permutation test. A comprehensive mutual exclusivity analysis allowed us to uncover mutually exclusive pairs, some of which may have relatively low mutation rates. These pairs often included likely cancer drivers that have been missed in previous analyses. More importantly, our results demonstrated that mutual exclusivity can also provide information that goes beyond the interactions between cancer drivers and can, for example, elucidate different mutagenic processes in different cancer groups. In particular, including frequently mutated, long genes such as TTN in our analysis allowed us to observe interesting patterns of APOBEC activity in breast cancer and identify a set of related driver genes that are highly predictive of patient survival. In addition, we utilized our mutual exclusivity analysis in support of a previously proposed model where APOBEC activity is the underlying process that causes TP53 mutations in a subset of breast cancer cases.

Availability and implementation

http://www.ncbi.nlm.nih.gov/CBBresearch/Przytycka/index.cgi#wesme.

Contact

przytyck@ncbi.nlm.nih.gov.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +27150809,KeyPathwayMinerWeb: online multi-omics network enrichment.,"We present KeyPathwayMinerWeb, the first online platform for de novo pathway enrichment analysis directly in the browser. Given a biological interaction network (e.g. protein-protein interactions) and a series of molecular profiles derived from one or multiple OMICS studies (gene expression, for instance), KeyPathwayMiner extracts connected sub-networks containing a high number of active or differentially regulated genes (proteins, metabolites) in the molecular profiles. The web interface at (http://keypathwayminer.compbio.sdu.dk) implements all core functionalities of the KeyPathwayMiner tool set such as data integration, input of background knowledge, batch runs for parameter optimization and visualization of extracted pathways. In addition to an intuitive web interface, we also implemented a RESTful API that now enables other online developers to integrate network enrichment as a web service into their own platforms.",2016-05-05 +24647562,Disease prediction models and operational readiness.,"The objective of this manuscript is to present a systematic review of biosurveillance models that operate on select agents and can forecast the occurrence of a disease event. We define a disease event to be a biological event with focus on the One Health paradigm. These events are characterized by evidence of infection and or disease condition. We reviewed models that attempted to predict a disease event, not merely its transmission dynamics and we considered models involving pathogens of concern as determined by the US National Select Agent Registry (as of June 2011). We searched commercial and government databases and harvested Google search results for eligible models, using terms and phrases provided by public health analysts relating to biosurveillance, remote sensing, risk assessments, spatial epidemiology, and ecological niche modeling. After removal of duplications and extraneous material, a core collection of 6,524 items was established, and these publications along with their abstracts are presented in a semantic wiki at http://BioCat.pnnl.gov. As a result, we systematically reviewed 44 papers, and the results are presented in this analysis. We identified 44 models, classified as one or more of the following: event prediction (4), spatial (26), ecological niche (28), diagnostic or clinical (6), spread or response (9), and reviews (3). The model parameters (e.g., etiology, climatic, spatial, cultural) and data sources (e.g., remote sensing, non-governmental organizations, expert opinion, epidemiological) were recorded and reviewed. A component of this review is the identification of verification and validation (V&V) methods applied to each model, if any V&V method was reported. All models were classified as either having undergone Some Verification or Validation method, or No Verification or Validation. We close by outlining an initial set of operational readiness level guidelines for disease prediction models based upon established Technology Readiness Level definitions.",2014-03-19 +28542123,"Malaria Surveillance - United States, 2014.","

Problem/condition

Malaria in humans is caused by intraerythrocytic protozoa of the genus Plasmodium. These parasites are transmitted by the bite of an infective female Anopheles mosquito. The majority of malaria infections in the United States occur among persons who have traveled to regions with ongoing malaria transmission. However, malaria is occasionally acquired by persons who have not traveled out of the country through exposure to infected blood products, congenital transmission, laboratory exposure, or local mosquitoborne transmission. Malaria surveillance in the United States is conducted to identify episodes of local transmission and to guide prevention recommendations for travelers.

Period covered

This report summarizes cases in persons with onset of illness in 2014 and trends during previous years.

Description of system

Malaria cases diagnosed by blood film, polymerase chain reaction, or rapid diagnostic tests are reported to local and state health departments by health care providers or laboratory staff. Case investigations are conducted by local and state health departments, and reports are transmitted to CDC through the National Malaria Surveillance System, National Notifiable Diseases Surveillance System, or direct CDC consultations. CDC conducts antimalarial drug resistance marker testing on blood samples submitted by health care providers or local or state health departments. Data from these reporting systems serve as the basis for this report.

Results

CDC received reports of 1,724 confirmed malaria cases, including one congenital case and two cryptic cases, with onset of symptoms in 2014 among persons in the United States. The number of confirmed cases in 2014 is consistent with the number of confirmed cases reported in 2013 (n = 1,741; this number has been updated from a previous publication to account for delayed reporting for persons with symptom onset occurring in late 2013). Plasmodium falciparum, P. vivax, P. ovale, and P. malariae were identified in 66.1%, 13.3%, 5.2%, and 2.7% of cases, respectively. Less than 1.0% of patients were infected with two species. The infecting species was unreported or undetermined in 11.7% of cases. CDC provided diagnostic assistance for 14.2% of confirmed cases and tested 12.0% of P. falciparum specimens for antimalarial resistance markers. Of patients who reported purpose of travel, 57.5% were visiting friends and relatives (VFR). Among U.S. residents for whom information on chemoprophylaxis use and travel region was known, 7.8% reported that they initiated and adhered to a chemoprophylaxis drug regimen recommended by CDC for the regions to which they had traveled. Thirty-two cases were among pregnant women, none of whom had adhered to chemoprophylaxis. Among all reported cases, 17.0% were classified as severe illness, and five persons with malaria died. CDC received 137 P. falciparum-positive samples for the detection of antimalarial resistance markers (although some loci for chloroquine and mefloquine were untestable for up to nine samples). Of the 137 samples tested, 131 (95.6%) had genetic polymorphisms associated with pyrimethamine drug resistance, 96 (70.0%) with sulfadoxine resistance, 77 (57.5%) with chloroquine resistance, three (2.3%) with mefloquine drug resistance, one (<1.0%) with atovaquone resistance, and two (1.4%) with artemisinin resistance.

Interpretation

The overall trend of malaria cases has been increasing since 1973; the number of cases reported in 2014 is the fourth highest annual total since then. Despite progress in reducing global prevalence of malaria, the disease remains endemic in many regions and use of appropriate prevention measures by travelers is still inadequate.

Public health action

Completion of data elements on the malaria case report form increased slightly in 2014 compared with 2013, but still remains unacceptably low. In 2014, at least one essential element (i.e., species, travel history, or resident status) was missing in 21.3% of case report forms. Incomplete reporting compromises efforts to examine trends in malaria cases and prevent infections. VFR travelers continue to be a difficult population to reach with effective malaria prevention strategies. Evidence-based prevention strategies that effectively target VFR travelers need to be developed and implemented to have a substantial impact on the number of imported malaria cases in the United States. Fewer U.S. resident patients reported taking chemoprophylaxis in 2014 (27.2%) compared with 2013 (28.6%), and adherence was poor among those who did take chemoprophylaxis. Proper use of malaria chemoprophylaxis will prevent the majority of malaria illnesses and reduce risk for severe disease (https://www.cdc.gov/malaria/travelers/drugs.html). Malaria infections can be fatal if not diagnosed and treated promptly with antimalarial medications appropriate for the patient's age and medical history, likely country of malaria acquisition, and previous use of antimalarial chemoprophylaxis. Recent molecular laboratory advances have enabled CDC to identify and conduct molecular surveillance of antimalarial drug resistance markers (https://www.cdc.gov/malaria/features/ars.html) and improve the ability of CDC to track, guide treatment, and manage drug resistance in malaria parasites both domestically and globally. For this effort to be successful, specimens should be submitted for all cases diagnosed in the United States. Clinicians should consult CDC Guidelines for Treatment of Malaria in the United States and contact the CDC Malaria Hotline for case management advice, when needed. Malaria treatment recommendations can be obtained online at https://www.cdc.gov/malaria/diagnosis_treatment/ or by calling the Malaria Hotline at 770-488-7788 or toll-free at 855-856-4713.",2017-05-26 +27331114,Data on the kinetics of in vitro assembled chromatin.,"Here, we use LC-MS/MS and SWATH-MS to describe the kinetics of in vitro assembled chromatin supported by an embryo extract prepared from preblastoderm Drosophila melanogaster embryos (DREX). This system allows easy manipulation of distinct aspects of chromatin assembly such as post-translational histone modifications, the levels of histone chaperones and the concentration of distinct DNA binding factors. In total, 480 proteins have been quantified as chromatin enriched factors and their binding kinetics have been monitored in the time course of 15 min, 1 h and 4 h of chromatin assembly. The data accompanying the manuscript on this approach, Völker-Albert et al., 2016 ""A quantitative proteomic analysis of in vitro assembled chromatin"" [1], has been deposited to the ProteomeXchange Consortium (http://www.proteomexchange.org) via the PRIDE partner repository with the dataset identifier submission number PRIDE: PXD002537 and PRIDE: PXD003445.",2016-06-01 +24893568,Association between OGG1 Ser326Cys and APEX1 Asp148Glu polymorphisms and breast cancer risk: a meta-analysis.,"

Background

The base excision repair (BER) pathway removes DNA damage caused by ionizing radiation, reactive oxidative species and methylating agents. OGG1 and APE1 are two important genes in the BER pathway. Many epidemiological studies have evaluated the association between polymorphisms in the two BER genes (OGG1 Ser326Cys and APE1 Asp148Glu) and breast cancer risk. However, the results are inconsistent.

Methods

We searched the electronic databases including PubMed, Embase and Cochrane library for all eligible studies for the period up to February 2014. Data were extracted by two independent authors and pooled odds ratios (ORs) with corresponding 95% confidence intervals (CIs) were used to assess the strength of the association.

Results

A total of 17 studies including 9,040 cases and 10,042 controls were available for OGG1 Ser326Cys polymorphism and 7 studies containing 2,979 cases and 3,111 controls were included for APE1 Asp148Glu polymorphism. With respect to OGG1 Ser326Cys polymorphism, we did not find a significant association with breast cancer risk when all eligible studies were pooled into the meta-analysis. However, in subgroup analyses by ethnicity and menopausal status, statistical significant increased breast cancer risk was found in Asian populations (Cys/Cys vs. Ser/Ser: OR=1.157, 95% CI 1.013-1.321, P=0.011; Cys/Cys vs. Ser/Cys+Ser/Ser: OR=1.113, 95% CI 1.009-1.227, P=0.014) and postmenopausal patients (Cys/Cys vs. Ser/Cys+Ser/Ser: OR=1.162, 95% CI 1.003-1.346, P=0.024). In subgroup analysis according to quality score, source of control, and HWE in controls, no any significant association was detected. With respect to APE1 Asp148Glu polymorphism, no significant association with breast cancer risk was demonstrated in the overall and stratified analyses.

Conclusions

The present meta-analysis suggests that the OGG1 Ser326Cys polymorphism may be a risk factor for breast cancer in Asians and postmenopausal patients. Further large and well-designed studies are needed to confirm this association.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1156934297124915.",2014-06-03 +24137012,The Global Genome Biodiversity Network (GGBN) Data Portal.,"The Global Genome Biodiversity Network (GGBN) was formed in 2011 with the principal aim of making high-quality well-documented and vouchered collections that store DNA or tissue samples of biodiversity, discoverable for research through a networked community of biodiversity repositories. This is achieved through the GGBN Data Portal (http://data.ggbn.org), which links globally distributed databases and bridges the gap between biodiversity repositories, sequence databases and research results. Advances in DNA extraction techniques combined with next-generation sequencing technologies provide new tools for genome sequencing. Many ambitious genome sequencing projects with the potential to revolutionize biodiversity research consider access to adequate samples to be a major bottleneck in their workflow. This is linked not only to accelerating biodiversity loss and demands to improve conservation efforts but also to a lack of standardized methods for providing access to genomic samples. Biodiversity biobank-holding institutions urgently need to set a standard of collaboration towards excellence in collections stewardship, information access and sharing and responsible and ethical use of such collections. GGBN meets these needs by enabling and supporting accessibility and the efficient coordinated expansion of biodiversity biobanks worldwide.",2013-10-16 +25468824,Cohort Profile: The Australian Longitudinal Study of Ageing (ALSA).,"In response to the expressed need for more sophisticated and multidisciplinary data concerning ageing of the Australian population, the Australian Longitudinal Study of Ageing (ALSA) was established some two decades ago in Adelaide, South Australia. At Baseline in 1992, 2087 participants living in the community or in residential care (ranging in age from 65 to 103 years) were interviewed in their place of residence (1031 or 49% women), including 565 couples. By 2013, 12 Waves had been completed; both face-to-face and telephone personal interviews were conducted. Data collected included self-reports of demographic details, health, depression, morbid conditions, hospitalization, gross mobility, physical performance, activities of daily living, lifestyle activities, social resources, exercise, education and income. Objective performance data for physical and cognitive function were also collected. The ALSA data are held at the Flinders Centre for Ageing Studies, Flinders University. Procedures for data access, information on collaborations, publications and other details can be found at [http://flinders.edu.au/sabs/fcas/].",2014-12-01 +28039166,An ensemble approach to protein fold classification by integration of template-based assignment and support vector machine classifier.,"

Motivation

Protein fold classification is a critical step in protein structure prediction. There are two possible ways to classify protein folds. One is through template-based fold assignment and the other is ab-initio prediction using machine learning algorithms. Combination of both solutions to improve the prediction accuracy was never explored before.

Results

We developed two algorithms, HH-fold and SVM-fold for protein fold classification. HH-fold is a template-based fold assignment algorithm using the HHsearch program. SVM-fold is a support vector machine-based ab-initio classification algorithm, in which a comprehensive set of features are extracted from three complementary sequence profiles. These two algorithms are then combined, resulting to the ensemble approach TA-fold. We performed a comprehensive assessment for the proposed methods by comparing with ab-initio methods and template-based threading methods on six benchmark datasets. An accuracy of 0.799 was achieved by TA-fold on the DD dataset that consists of proteins from 27 folds. This represents improvement of 5.4-11.7% over ab-initio methods. After updating this dataset to include more proteins in the same folds, the accuracy increased to 0.971. In addition, TA-fold achieved >0.9 accuracy on a large dataset consisting of 6451 proteins from 184 folds. Experiments on the LE dataset show that TA-fold consistently outperforms other threading methods at the family, superfamily and fold levels. The success of TA-fold is attributed to the combination of template-based fold assignment and ab-initio classification using features from complementary sequence profiles that contain rich evolution information.

Availability and implementation

http://yanglab.nankai.edu.cn/TA-fold/.

Contact

yangjy@nankai.edu.cn or mhb-506@163.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2017-03-01 +26345175,A robust algorithm for optimizing protein structures with NMR chemical shifts.,"Over the past decade, a number of methods have been developed to determine the approximate structure of proteins using minimal NMR experimental information such as chemical shifts alone, sparse NOEs alone or a combination of comparative modeling data and chemical shifts. However, there have been relatively few methods that allow these approximate models to be substantively refined or improved using the available NMR chemical shift data. Here, we present a novel method, called Chemical Shift driven Genetic Algorithm for biased Molecular Dynamics (CS-GAMDy), for the robust optimization of protein structures using experimental NMR chemical shifts. The method incorporates knowledge-based scoring functions and structural information derived from NMR chemical shifts via a unique combination of multi-objective MD biasing, a genetic algorithm, and the widely used XPLOR molecular modelling language. Using this approach, we demonstrate that CS-GAMDy is able to refine and/or fold models that are as much as 10 Å (RMSD) away from the correct structure using only NMR chemical shift data. CS-GAMDy is also able to refine of a wide range of approximate or mildly erroneous protein structures to more closely match the known/correct structure and the known/correct chemical shifts. We believe CS-GAMDy will allow protein models generated by sparse restraint or chemical-shift-only methods to achieve sufficiently high quality to be considered fully refined and ""PDB worthy"". The CS-GAMDy algorithm is explained in detail and its performance is compared over a range of refinement scenarios with several commonly used protein structure refinement protocols. The program has been designed to be easily installed and easily used and is available at http://www.gamdy.ca.",2015-09-07 +22748168,"Rational drug repositioning guided by an integrated pharmacological network of protein, disease and drug.","

Background

The process of drug discovery and development is time-consuming and costly, and the probability of success is low. Therefore, there is rising interest in repositioning existing drugs for new medical indications. When successful, this process reduces the risk of failure and costs associated with de novo drug development. However, in many cases, new indications of existing drugs have been found serendipitously. Thus there is a clear need for establishment of rational methods for drug repositioning.

Results

In this study, we have established a database we call ""PharmDB"" which integrates data associated with disease indications, drug development, and associated proteins, and known interactions extracted from various established databases. To explore linkages of known drugs to diseases of interest from within PharmDB, we designed the Shared Neighborhood Scoring (SNS) algorithm. And to facilitate exploration of tripartite (Drug-Protein-Disease) network, we developed a graphical data visualization software program called phExplorer, which allows us to browse PharmDB data in an interactive and dynamic manner. We validated this knowledge-based tool kit, by identifying a potential application of a hypertension drug, benzthiazide (TBZT), to induce lung cancer cell death.

Conclusions

By combining PharmDB, an integrated tripartite database, with Shared Neighborhood Scoring (SNS) algorithm, we developed a knowledge platform to rationally identify new indications for known FDA approved drugs, which can be customized to specific projects using manual curation. The data in PharmDB is open access and can be easily explored with phExplorer and accessed via BioMart web service (http://www.i-pharm.org/, http://biomart.i-pharm.org/).",2012-07-02 +24623726,What do relatives experience when supporting someone in early psychosis?,"

Objectives

In the United Kingdom (UK), the government has set out priorities to support relatives and carers. Despite this, many relatives of people experiencing psychosis continue to feel unsupported by mental health services. This may be due to lack of funding, high caseloads for mental health professionals, or due to a lack of understanding of what relatives experience as a result of their family member's psychosis. This research aimed to explore relatives' experiences of supporting a relative in early psychosis.

Design

Thematic analysis was used to conduct an in-depth study of relatives' experiences of supporting a family member in early psychosis.

Methods

Eligible individuals were recruited via local National Health Service Early Intervention Teams and other carer support agencies. Four focus groups were conducted, each with a range of five to seven participants.

Results

Four key themes 'reflecting relatives' understanding and management of psychosis were identified: 'Psychosis from the relatives' perspective'; 'Relatives' fight with the mental health 'system'; 'Is anybody listening? Does anyone understand?'; and 'Relatives' coping'. Clinical implications of these themes are discussed.

Conclusions

This study has clear implications for improvement in how relatives are supported in the United Kingdom, such as; clearer guidance for staff about confidentiality, treating relatives as partners in care and providing better quality information for relatives.

Practitioner points

Continue to improve the Care Plan Approach process to include relatives as partners in care. Information available about psychosis needs to be clear and, where possible, clarify the processes and protocols by which services operate and how to access appropriate help. Move away from simplistic rules about confidentiality and formalise procedures to allow relatives and carers access to the information they need, without impeding service users' rights. For example, providing additional training for professionals such as Rethink's 'Carers and Confidentiality' online resource (http://www.carersandconfidentiality.org.uk/). Improved support, supervision and training are needed for staff to deal with relatives' distress and the impact of psychosis. Relatives' experiences of services is more positive in specialist Early Interventions Services for psychosis, than in other health service teams.",2014-03-12 +24428888,OncomiRdbB: a comprehensive database of microRNAs and their targets in breast cancer.,"

Background

Given the estimate that 30% of our genes are controlled by microRNAs, it is essential that we understand the precise relationship between microRNAs and their targets. OncomiRs are microRNAs (miRNAs) that have been frequently shown to be deregulated in cancer. However, although several oncomiRs have been identified and characterized, there is as yet no comprehensive compilation of this data which has rendered it underutilized by cancer biologists. There is therefore an unmet need in generating bioinformatic platforms to speed the identification of novel therapeutic targets.

Description

We describe here OncomiRdbB, a comprehensive database of oncomiRs mined from different existing databases for mouse and humans along with novel oncomiRs that we have validated in human breast cancer samples. The database also lists their respective predicted targets, identified using miRanda, along with their IDs, sequences, chromosome location and detailed description. This database facilitates querying by search strings including microRNA name, sequence, accession number, target genes and organisms. The microRNA networks and their hubs with respective targets at 3'UTR, 5'UTR and exons of different pathway genes were also deciphered using the 'R' algorithm.

Conclusion

OncomiRdbB is a comprehensive and integrated database of oncomiRs and their targets in breast cancer with multiple query options which will help enhance both understanding of the biology of breast cancer and the development of new and innovative microRNA based diagnostic tools and targets of therapeutic significance. OncomiRdbB is freely available for download through the URL link http://tdb.ccmb.res.in/OncomiRdbB/index.htm.",2014-01-15 +25883139,Assessing the impact of mutations found in next generation sequencing data over human signaling pathways.,"Modern sequencing technologies produce increasingly detailed data on genomic variation. However, conventional methods for relating either individual variants or mutated genes to phenotypes present known limitations given the complex, multigenic nature of many diseases or traits. Here we present PATHiVar, a web-based tool that integrates genomic variation data with gene expression tissue information. PATHiVar constitutes a new generation of genomic data analysis methods that allow studying variants found in next generation sequencing experiment in the context of signaling pathways. Simple Boolean models of pathways provide detailed descriptions of the impact of mutations in cell functionality so as, recurrences in functionality failures can easily be related to diseases, even if they are produced by mutations in different genes. Patterns of changes in signal transmission circuits, often unpredictable from individual genes mutated, correspond to patterns of affected functionalities that can be related to complex traits such as disease progression, drug response, etc. PATHiVar is available at: http://pathivar.babelomics.org.",2015-04-16 +26910004,Mini-Review: Monosomy 1p36 syndrome: reviewing the correlation between deletion sizes and phenotypes. ,"The major clinical features of monosomy 1p36 deletion are developmental delay and hypotonia associated with short stature and craniofacial dysmorphisms. The objective of this study was to review the cases of 1p36 deletion that was reported between 1999 and 2014, in order to identify a possible correlation between the size of the 1p36-deleted segment and the clinical phenotype of the disease. Scientific articles published in the (National Center for Biotechnology Information; NCBI http://www.ncbi.nlm.nih.gov/pubmed) and Scientific Electronic Library Online (www.scielo.com.br) databases were searched using key word combinations, such as ""1p36 deletion"", ""monosomy 1p36 deletion"", and ""1p36 deletion syndrome"". Articles in English or Spanish reporting the correlation between deletion sizes and the respective clinical phenotypes were retrieved, while letters, reviews, guidelines, and studies with mouse models were excluded. Among the 746 retrieved articles, only 17 (12 case reports and 5 series of cases), comprising 29 patients (9 males and 20 females, aged 0 months (neonate) to 22 years) bearing the 1p36 deletions and whose clinical phenotypes were described, met the inclusion criteria. The genotype-phenotype correlation in monosomy 1p36 is a challenge because of the variability in the size of the deleted segment, as well as in the clinical manifestations of similar size deletions. Therefore, the severity of the clinical features was not always associated with the deletion size, possibly because of the other influences, such as stochastic factors, epigenetic events, or reduced penetration of the deleted genes.",2016-02-22 +28139848,Continent catheterizable tubes/stomas in adult neuro-urological patients: A systematic review.,"

Aims

To systematically review all available evidence on the effectiveness and complications of continent cutaneous stoma or tube (CCS/T) to treat bladder-emptying difficulties in adult neuro-urological patients.

Methods

The search strategy and studies selection were performed on Medline, Embase, and Cochrane using the PICOS method according to the PRISMA statement (CRD42015019212; http://www.crd.york.ac.uk/PROSPERO).

Results

After screening 3,634 abstracts, 11 studies (all retrospective, enrolling 213 patients) were included in a narrative synthesis. Mean follow-up ranged from 21.6 months to 8.7 years (median: 36 months, IQR 28.5-44). At last follow-up, the ability to catheterize rate was ≥84% (except in one study: 58.3%) and the continence rate at stoma was >75%. Data comparing health-related quality-of-life before and after surgery were not available in any study. Overall, 85/213 postoperative events required reoperation: 7 events (7 patients) occurring ≤3 months postoperatively, 22 events (16 patients) >3 months, and 56 events (55 patients) for which the time after surgery was not specified. Sixty additional complications (60 patients) were reported but did not require surgical treatment. Tube stenosis occurred in 4-32% of the cases (median: 14%, IQR 9-24). Complications related to concomitant procedures (augmentation cystoplasty, pouch) included neovesicocutaneous fistulae, bladder stones, and bladder perforations. Risk of bias and confounding was high in all studies.

Conclusions

CCS/T appears to be an effective treatment option in adult neuro-urological patients unable to perform intermittent self-catheterization through the urethra. However, the complication rate is meaningful and the quality of evidence is low, especially in terms of long-term outcomes including the impact on the quality-of-life.",2017-01-31 +24014230,Supraglottic airway devices versus tracheal intubation for airway management during general anaesthesia in obese patients.,"

Background

The number of obese patients requiring general anaesthesia is likely to increase in coming years, and obese patients pose considerable challenges to the anaesthetic team. Tracheal intubation may be more difficult and risk of aspiration of gastric contents into the lungs is increased in obese patients. Supraglottic airway devices (SADs) offer an alternative airway to traditional tracheal intubation with potential benefits, including ease of fit and less airway disturbance. Although SADs are now widely used, clinical concerns remain that their use for airway management in obese patients may increase the risk of serious complications.

Objectives

We wished to examine whether supraglottic airway devices can be used as a safe and effective alternative to tracheal intubation in securing the airway during general anaesthesia in obese patients (with a body mass index (BMI) > 30 kg/m(2)).

Search methods

We searched for eligible trials in the following databases: Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library, Issue 8, 2012), MEDLINE via Ovid (from 1985 to 9 September 2012) and EMBASE via Ovid (from 1985 to 9 September 2012). The Cochrane highly sensitive filter for randomized controlled trials was applied in MEDLINE and EMBASE. We also searched trial registers such as www.clinicaltrials.gov and the Current Controlled Clinical Trials Website (http://www.controlled-trials.com/) for ongoing trials. The start date of these searches was limited to 1985, shortly before the first SAD was introduced, in 1988. We undertook forward and backward citation tracing for key review articles and eligible articles identified through the electronic resources.

Selection criteria

We considered all randomized controlled trials of participants aged 16 years and older with a BMI > 30 kg/m(2) undergoing general anaesthesia. We compared the use of any model of SAD with the use of tracheal tubes (TTs) of any design.

Data collection and analysis

We used standard methodological procedures expected by The Cochrane Collaboration. Two review authors independently assessed trial quality and extracted data, including information on adverse events. We contacted study authors for additional information. If sufficient data were available, results were presented as pooled risk ratios (RRs) with 95% confidence intervals (CIs) based on random-effects models (inverse variance method). We employed the Chi(2) test and calculated the I(2) statistic to investigate study heterogeneity.

Main results

We identified two eligible studies, both comparing the use of one model of SAD, the ProSeal laryngeal mask airway (PLMA) with a TT, with a total study population of 232. One study population underwent laparoscopic surgery. The included studies were generally of high quality, but there was an unavoidable high risk of bias in the main airway variables, such as change of device or laryngospasm, as the intubator could not be blinded. Many outcomes included data from one study only.A total of 5/118 (4.2%) participants randomly assigned to PLMA across both studies were changed to TT insertion because of failed or unsatisfactory placement of the device. Postoperative episodes of hypoxaemia (oxygen saturation < 92% whilst breathing air) were less common in the PLMA groups (RR 0.27, 95% CI 0.10 to 0.72). We found a significant postoperative difference in mean oxygen saturation, with saturation 2.54% higher in the PLMA group (95% CI 1.09% to 4.00%). This analysis showed high levels of heterogeneity between results (I(2) = 71%). The leak fraction was significantly higher in the PLMA group, with the largest difference seen during abdominal insufflation-a 6.4% increase in the PLMA group (95% CI 3.07% to 9.73%).No cases of pulmonary aspiration of gastric contents, mortality or serious respiratory complications were reported in either study. We are therefore unable to present effect estimates for these outcomes.In all, 2/118 participants with a PLMA suffered laryngospam or bronchospasm compared with 4/114 participants with a TT. The pooled estimate shows a non-significant reduction in laryngospasm in the PLMA group (RR 0.48, 95% CI 0.09 to 2.59).Postoperative coughing was less common in the PLMA group (RR 0.10, 95% CI 0.03 to 0.31), and there was no significant difference in the risk of sore throat or dysphonia (RR 0.25, 95% CI 0.03 to 2.13). On average, PLMA placement took 5.9 seconds longer than TT placement (95% CI 3 seconds to 8.8 seconds). There was no significant difference in the proportion of successful first placements of a device, with 33/35 (94.2%) first-time successes in the PLMA group and 32/35 (91.4%) in the TT group.

Authors' conclusions

We have inadequate information to draw conclusions about safety, and we can only comment on one design of SAD (the PLMA) in obese patients. We conclude that during routine and laparoscopic surgery, PLMAs may take a few seconds longer to insert, but this is unlikely to be a matter of clinical importance. A failure rate of 3% to 5% can be anticipated in obese patients. However, once fitted, PLMAs provide at least as good oxygenation, with the caveat that the leak fraction may increase, although in the included studies, this did not affect ventilation. We found significant improvement in oxygenation during and after surgery, indicating better pulmonary performance of the PLMA, and reduced postoperative coughing, suggesting better recovery for patients.",2013-09-09 +28228207,Complications and Mortality Associated with Temporary Abdominal Closure Techniques: A Systematic Review and Meta-Analysis.,"Temporary abdominal closure (TAC) techniques are routinely used in the open abdomen. Ideally, they should prevent evisceration, aid in removal of unwanted fluid from the peritoneal cavity, facilitate in achieving safe definitive fascial closure, as well as prevent the development of intra-abdominal complications. TAC techniques used in the open abdomen were compared with negative pressure wound therapy (NPWT) to identify which was superior. A systematic review was conducted using the Preferred Reporting Items for Systematic Reviews and Meta-Analysis guidelines involving Medline, Excerpta Medica, Cochrane Central Register of Controlled Trials, Cumulative Index to Nursing and Allied Health Literature, and Clinicaltrials.gov. All studies describing TAC technique use in the open abdomen were eligible for inclusion. Data were analyzed per TAC technique in the form of a meta-analysis. A total of 225 articles were included in the final analysis. A meta-analysis involving only randomized controlled trials showed that NPWT with continuous fascial closure was superior to NPWT alone for definitive fascial closure [mean difference (MD): 35% ± 23%; P = 0.0044]. A subsequent meta-analysis involving all included studies confirmed its superiority across outcomes for definitive fascial closure (MD: 19% ± 3%; P < 0.0001), perioperative (MD: -4.0% ± 2.4%; P = 0.0013) and in-hospital (MD: -5.0% ± 2.9%; P = 0.0013) mortality, entero-atmospheric fistula (MD: -2.0% ± 1.8%; P = 0.0041), ventral hernia (MD: -4.0% ± 2.4%; P = 0.0010), and intra-abdominal abscess (MD: -3.1% ± 2.1%; P = 0.0044). Therefore, it was concluded that NPWT with continuous fascial traction is superior to NPWT alone.",2017-02-01 +25777524,WGBSSuite: simulating whole-genome bisulphite sequencing data and benchmarking differential DNA methylation analysis tools.,"

Motivation

As the number of studies looking at differences between DNA methylation increases, there is a growing demand to develop and benchmark statistical methods to analyse these data. To date no objective approach for the comparison of these methods has been developed and as such it remains difficult to assess which analysis tool is most appropriate for a given experiment. As a result, there is an unmet need for a DNA methylation data simulator that can accurately reproduce a wide range of experimental setups, and can be routinely used to compare the performance of different statistical models.

Results

We have developed WGBSSuite, a flexible stochastic simulation tool that generates single-base resolution DNA methylation data genome-wide. Several simulator parameters can be derived directly from real datasets provided by the user in order to mimic real case scenarios. Thus, it is possible to choose the most appropriate statistical analysis tool for a given simulated design. To show the usefulness of our simulator, we also report a benchmark of commonly used methods for differential methylation analysis.

Availability and implementation

WGBS code and documentation are available under GNU licence at http://www.wgbssuite.org.uk/

Contact

: owen.rackham@imperial.ac.uk or l.bottolo@imperial.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-03-15 +27012785,A novel comparative pattern analysis approach identifies chronic alcohol mediated dysregulation of transcriptomic dynamics during liver regeneration.,

Background

Liver regeneration is inhibited by chronic ethanol consumption and this impaired repair response may contribute to the risk for alcoholic liver disease. We developed and applied a novel data analysis approach to assess the effect of chronic ethanol intake in the mechanisms responsible for liver regeneration. We performed a time series transcriptomic profiling study of the regeneration response after 2/3rd partial hepatectomy (PHx) in ethanol-fed and isocaloric control rats.

Results

We developed a novel data analysis approach focusing on comparative pattern counts (COMPACT) to exhaustively identify the dominant and subtle differential expression patterns. Approximately 6500 genes were differentially regulated in Ethanol or Control groups within 24 h after PHx. Adaptation to chronic ethanol intake significantly altered the immediate early gene expression patterns and nearly completely abrogated the cell cycle induction in hepatocytes post PHx. The patterns highlighted by COMPACT analysis contained several non-parenchymal cell specific markers indicating their aberrant transcriptional response as a novel mechanism through which chronic ethanol intake deregulates the integrated liver tissue response.

Conclusions

Our novel comparative pattern analysis revealed new insights into ethanol-mediated molecular changes in non-parenchymal liver cells as a possible contribution to the defective liver regeneration phenotype. The results revealed for the first time an ethanol-induced shift of hepatic stellate cells from a pro-regenerative phenotype to that of an anti-regenerative state after PHx. Our results can form the basis for novel interventions targeting the non-parenchymal cells in normalizing the dysfunctional repair response process in alcoholic liver disease. Our approach is illustrated online at http://compact.jefferson.edu .,2016-03-25 +25729900,"Who tweets? Deriving the demographic characteristics of age, occupation and social class from twitter user meta-data.","This paper specifies, designs and critically evaluates two tools for the automated identification of demographic data (age, occupation and social class) from the profile descriptions of Twitter users in the United Kingdom (UK). Meta-data data routinely collected through the Collaborative Social Media Observatory (COSMOS: http://www.cosmosproject.net/) relating to UK Twitter users is matched with the occupational lookup tables between job and social class provided by the Office for National Statistics (ONS) using SOC2010. Using expert human validation, the validity and reliability of the automated matching process is critically assessed and a prospective class distribution of UK Twitter users is offered with 2011 Census baseline comparisons. The pattern matching rules for identifying age are explained and enacted following a discussion on how to minimise false positives. The age distribution of Twitter users, as identified using the tool, is presented alongside the age distribution of the UK population from the 2011 Census. The automated occupation detection tool reliably identifies certain occupational groups, such as professionals, for which job titles cannot be confused with hobbies or are used in common parlance within alternative contexts. An alternative explanation on the prevalence of hobbies is that the creative sector is overrepresented on Twitter compared to 2011 Census data. The age detection tool illustrates the youthfulness of Twitter users compared to the general UK population as of the 2011 Census according to proportions, but projections demonstrate that there is still potentially a large number of older platform users. It is possible to detect ""signatures"" of both occupation and age from Twitter meta-data with varying degrees of accuracy (particularly dependent on occupational groups) but further confirmatory work is needed.",2015-03-02 +27306945,"A Mixture of 3 Bifidobacteria Decreases Abdominal Pain and Improves the Quality of Life in Children With Irritable Bowel Syndrome: A Multicenter, Randomized, Double-Blind, Placebo-Controlled, Crossover Trial.","

Goals

We assessed the efficacy of a probiotic mixture of Bifidobacterium infantis M-63, breve M-16V, and longum BB536 in improving abdominal pain (AP) and quality of life (QoL) in children with irritable bowel syndrome (IBS) and functional dyspepsia (FD).

Background

AP-associated functional gastrointestinal disorders, particularly IBS and FD, are common in pediatrics, and no well-established treatment is currently available. Although probiotics have shown promising results in adults, data in children are heterogeneous.

Study

Forty-eight children with IBS (median age, 11.2 y; range, 8 to 17.9 y) and 25 with FD (age, 11.6 y; range, 8 to 16.6 y) were randomized to receive either a mixture of 3 Bifidobacteria or a placebo for 6 weeks. After a 2-week ""washout"" period, each patient was switched to the other group and followed up for further 6 weeks. At baseline and follow-up, patients completed a symptom diary and a QoL questionnaire. AP resolution represented the primary outcome parameter.

Results

In IBS, but not in FD, Bifidobacteria determined a complete resolution of AP in a significantly higher proportion of children, when compared with placebo (P=0.006), and significantly improved AP frequency (P=0.02). The proportion of IBS children with an improvement in QoL was significantly higher after probiotics than after placebo (48% vs. 17%, P=0.001), but this finding was not confirmed in FD.

Conclusions

In children with IBS a mixture of Bifidobacterium infantis M-63, breve M-16V, and longum BB536 is associated with improvement in AP and QoL. These findings were not confirmed in FD subjects. Trial identifier: NCT02566876 (http://www.clinicaltrial.gov).",2017-01-01 +26055961,Optimization of sequence alignments according to the number of sequences vs. number of sites trade-off.,"

Background

Comparative analysis of homologous sequences enables the understanding of evolutionary patterns at the molecular level, unraveling the functional constraints that shaped the underlying genes. Bioinformatic pipelines for comparative sequence analysis typically include procedures for (i) alignment quality assessment and (ii) control of sequence redundancy. An additional, underassessed step is the control of the amount and distribution of missing data in sequence alignments. While the number of sequences available for a given gene typically increases with time, the site-specific coverage of each alignment position remains highly variable because of differences in sequencing and annotation quality, or simply because of biological variation. For any given alignment-based analysis, the selection of sequences thus defines a trade-off between the species representation and the quantity of sites with sufficient coverage to be included in the subsequent analyses.

Results

We introduce an algorithm for the optimization of sequence alignments according to the number of sequences vs. number of sites trade-off. The algorithm uses a guide tree to compute scores for each bipartition of the alignment, allowing the recursive selection of sequence subsets with optimal combinations of sequence and site numbers. By applying our methods to two large data sets of several thousands of gene families, we show that significant site-specific coverage increases can be achieved while controlling for the species representation.

Conclusions

The algorithm introduced in this work allows the control of the distribution of missing data in any sequence alignment by removing sequences to increase the number of sites with a defined minimum coverage. We advocate that our missing data optimization procedure in an important step which should be considered in comparative analysis pipelines, together with alignment quality assessment and control of sampled diversity. An open source C++ implementation is available at http://bioweb.me/physamp.",2015-06-09 +27782103,The Southampton-York Natural Scenes (SYNS) dataset: Statistics of surface attitude.,"Recovering 3D scenes from 2D images is an under-constrained task; optimal estimation depends upon knowledge of the underlying scene statistics. Here we introduce the Southampton-York Natural Scenes dataset (SYNS: https://syns.soton.ac.uk), which provides comprehensive scene statistics useful for understanding biological vision and for improving machine vision systems. In order to capture the diversity of environments that humans encounter, scenes were surveyed at random locations within 25 indoor and outdoor categories. Each survey includes (i) spherical LiDAR range data (ii) high-dynamic range spherical imagery and (iii) a panorama of stereo image pairs. We envisage many uses for the dataset and present one example: an analysis of surface attitude statistics, conditioned on scene category and viewing elevation. Surface normals were estimated using a novel adaptive scale selection algorithm. Across categories, surface attitude below the horizon is dominated by the ground plane (0° tilt). Near the horizon, probability density is elevated at 90°/270° tilt due to vertical surfaces (trees, walls). Above the horizon, probability density is elevated near 0° slant due to overhead structure such as ceilings and leaf canopies. These structural regularities represent potentially useful prior assumptions for human and machine observers, and may predict human biases in perceived surface attitude.",2016-10-26 +24334617,"Legius syndrome, an Update. Molecular pathology of mutations in SPRED1.","Multiple café-au-lait macules (CALMs) are the hallmark of Von Recklinghausen disease, or neurofibromatosis type 1 (NF1). In 2007 we reported that some individuals with multiple CALMs have a heterozygous mutation in the SPRED1 gene and have NF1-like syndrome, or Legius syndrome. Individuals with Legius syndrome have multiple CALMs with or without freckling, but they do not show the typical NF1-associated tumors such as neurofibromas or optic pathway gliomas. NF1-associated bone abnormalities and Lisch nodules are also not reported in patients with Legius syndrome. Consequently, individuals with Legius syndrome require less intense medical surveillance than those with NF1. The SPRED1 gene was identified in 2001 and codes for a protein that downregulates the RAS-mitogen activated protein kinase (RAS-MAPK) pathway; as does neurofibromin, the protein encoded by the NF1 gene. It is estimated that about 1-4% of individuals with multiple CALMs have a heterozygous SPRED1 mutation. Mutational and clinical data on 209 patients with Legius syndrome are tabulated in an online database (http://www.lovd.nl/SPRED1). Mice with homozygous knockout of the Spred1 gene show learning deficits and decreased synaptic plasticity in hippocampal neurons similar to those seen in Nf1 heterozygous mice, underlining the importance of the RAS-MAPK pathway for learning and memory. Recently, specific binding between neurofibromin and SPRED1 was demonstrated. SPRED1 seems to play an important role in recruiting neurofibromin to the plasma membrane.",2013-12-10 +26896844,EXTRACT: interactive extraction of environment metadata and term suggestion for metagenomic sample annotation. ,"The microbial and molecular ecology research communities have made substantial progress on developing standards for annotating samples with environment metadata. However, sample manual annotation is a highly labor intensive process and requires familiarity with the terminologies used. We have therefore developed an interactive annotation tool, EXTRACT, which helps curators identify and extract standard-compliant terms for annotation of metagenomic records and other samples. Behind its web-based user interface, the system combines published methods for named entity recognition of environment, organism, tissue and disease terms. The evaluators in the BioCreative V Interactive Annotation Task found the system to be intuitive, useful, well documented and sufficiently accurate to be helpful in spotting relevant text passages and extracting organism and environment terms. Comparison of fully manual and text-mining-assisted curation revealed that EXTRACT speeds up annotation by 15-25% and helps curators to detect terms that would otherwise have been missed. Database URL: https://extract.hcmr.gr/.",2016-02-20 +24421117,Proteomics of extracellular vesicles: Exosomes and ectosomes.,"Almost all bacteria, archaea, and eukaryotic cells shed extracellular vesicles either constitutively or in a regulated manner. These nanosized membrane vesicles are spherical, bilayered proteolipids that harbor specific subsets of proteins, DNAs, RNAs, and lipids. Recent research has facilitated conceptual advancements in this emerging field that indicate that extracellular vesicles act as intercellular communicasomes by transferring signals to their target cell via surface ligands and delivering receptors and functional molecules. Recent progress in mass spectrometry-based proteomic analyses of mammalian extracellular vesicles derived from diverse cell types and body fluids has resulted in the identification of several thousand vesicular proteins that provide us with essential clues to the molecular mechanisms involved in vesicle cargo sorting and biogenesis. Furthermore, cell-type- or disease-specific vesicular proteins help us to understand the pathophysiological functions of extracellular vesicles and contribute to the discovery of diagnostic and therapeutic target proteins. This review focuses on the high-throughput mass spectrometry-based proteomic analyses of mammalian extracellular vesicles (i.e., exosomes and ectosomes), EVpedia (a free web-based integrated database of high-throughput data for systematic analyses of extracellular vesicles; http://evpedia.info), and the intravesicular protein-protein interaction network analyses of mammalian extracellular vesicles. The goal of this article is to encourage further studies to construct a comprehensive proteome database for extracellular vesicles that will help us to not only decode the biogenesis and cargo-sorting mechanisms during vesicle formation but also elucidate the pathophysiological roles of these complex extracellular organelles.",2014-01-14 +24321360,"Exploring the ""dark matter"" of a mammalian proteome by protein structure and function modeling.","

Background

A growing body of evidence shows that gene products encoded by short open reading frames play key roles in numerous cellular processes. Yet, they are generally overlooked in genome assembly, escaping annotation because small protein-coding genes are difficult to predict computationally. Consequently, there are still a considerable number of small proteins whose functions are yet to be characterized.

Results

To address this issue, we apply a collection of structural bioinformatics algorithms to infer molecular function of putative small proteins from the mouse proteome. Specifically, we construct 1,743 confident structure models of small proteins, which reveal a significant structural diversity with a noticeably high helical content. A subsequent structure-based function annotation of small protein models exposes 178,745 putative protein-protein interactions with the remaining gene products in the mouse proteome, 1,100 potential binding sites for small organic molecules and 987 metal-binding signatures.

Conclusions

These results strongly indicate that many small proteins adopt three-dimensional structures and are fully functional, playing important roles in transcriptional regulation, cell signaling and metabolism. Data collected through this work is freely available to the academic community at http://www.brylinski.org/content/databases to support future studies oriented on elucidating the functions of hypothetical small proteins.",2013-12-09 +26941783,shRNA target prediction informed by comprehensive enquiry (SPICE): a supporting system for high-throughput screening of shRNA library.,"RNA interference (RNAi) screening is extensively used in the field of reverse genetics. RNAi libraries constructed using random oligonucleotides have made this technology affordable. However, the new methodology requires exploration of the RNAi target gene information after screening because the RNAi library includes non-natural sequences that are not found in genes. Here, we developed a web-based tool to support RNAi screening. The system performs short hairpin RNA (shRNA) target prediction that is informed by comprehensive enquiry (SPICE). SPICE automates several tasks that are laborious but indispensable to evaluate the shRNAs obtained by RNAi screening. SPICE has four main functions: (i) sequence identification of shRNA in the input sequence (the sequence might be obtained by sequencing clones in the RNAi library), (ii) searching the target genes in the database, (iii) demonstrating biological information obtained from the database, and (iv) preparation of search result files that can be utilized in a local personal computer (PC). Using this system, we demonstrated that genes targeted by random oligonucleotide-derived shRNAs were not different from those targeted by organism-specific shRNA. The system facilitates RNAi screening, which requires sequence analysis after screening. The SPICE web application is available at http://www.spice.sugysun.org/.",2016-02-19 +22821489,SpectraBank: an open access tool for rapid microbial identification by MALDI-TOF MS fingerprinting.,"MALDI-TOF MS has proved to be an accurate, rapid, and cost-effective technique for microbial identification in which the spectral fingerprint of an unknown strain can be compared to a database of spectra from reference strains. Most of the existing databases are private and often costly to access, and little spectral information is shared among researchers. The objective of the present communication is to introduce the SpectraBank database (http://www.spectrabank.org), which provides open access MALDI-TOF mass spectra from a variety of microorganisms. This work aims to familiarize readers with the SpectraBank database, from the sample preparation, data collection, and data analysis to how the spectral reference data can be used for microbial species identification. The database currently includes more than 200 MALDI-TOF MS spectra from more than 70 bacterial species and links to the freely available web-based application SPECLUST (http://bioinfo.thep.lu.se/speclust.html) to allow comparisons of the obtained peak mass lists and evaluate phyloproteomic relationships. The SpectraBank database is intended to be expanded by the addition of new spectra from microbial strains, obtained in our laboratory and by other researchers.",2012-07-01 +23162083,PTID: an integrated web resource and computational tool for agrochemical discovery.,"

Summary

Although in silico drug discovery approaches are crucial for the development of pharmaceuticals, their potential advantages in agrochemical industry have not been realized. The challenge for computer-aided methods in agrochemical arena is a lack of sufficient information for both pesticides and their targets. Therefore, it is important to establish such knowledge repertoire that contains comprehensive pesticides' profiles, which include physicochemical properties, environmental fates, toxicities and mode of actions. Here, we present an integrated platform called Pesticide-Target interaction database (PTID), which comprises a total of 1347 pesticides with rich annotation of ecotoxicological and toxicological data as well as 13 738 interactions of pesticide-target and 4245 protein terms via text mining. Additionally, through the integration of ChemMapper, an in-house computational approach to polypharmacology, PTID can be used as a computational platform to identify pesticides targets and design novel agrochemical products.

Availability

http://lilab.ecust.edu.cn/ptid/.

Contact

hlli@ecust.edu.cn; xhqian@ecust.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-18 +21935468,"CDinFusion--submission-ready, on-line integration of sequence and contextual data.","State of the art (DNA) sequencing methods applied in ""Omics"" studies grant insight into the 'blueprints' of organisms from all domains of life. Sequencing is carried out around the globe and the data is submitted to the public repositories of the International Nucleotide Sequence Database Collaboration. However, the context in which these studies are conducted often gets lost, because experimental data, as well as information about the environment are rarely submitted along with the sequence data. If these contextual or metadata are missing, key opportunities of comparison and analysis across studies and habitats are hampered or even impossible. To address this problem, the Genomic Standards Consortium (GSC) promotes checklists and standards to better describe our sequence data collection and to promote the capturing, exchange and integration of sequence data with contextual data. In a recent community effort the GSC has developed a series of recommendations for contextual data that should be submitted along with sequence data. To support the scientific community to significantly enhance the quality and quantity of contextual data in the public sequence data repositories, specialized software tools are needed. In this work we present CDinFusion, a web-based tool to integrate contextual and sequence data in (Multi)FASTA format prior to submission. The tool is open source and available under the Lesser GNU Public License 3. A public installation is hosted and maintained at the Max Planck Institute for Marine Microbiology at http://www.megx.net/cdinfusion. The tool may also be installed locally using the open source code available at http://code.google.com/p/cdinfusion.",2011-09-13 +23601383,FAO/INFOODS food composition database for biodiversity.,"Nutrient content can vary as much between different varieties of the same foods, as they do among different foods. Knowledge of varietal differences can therefore mean the difference between nutrient adequacy and inadequacy. The FAO/INFOODS food composition database for biodiversity has been developed with analytical data for foods described at the level of variety, cultivar and breed, and for underutilized and wild foods. It contains 6411 food entries and values for 451 components together with the bibliographic references and other information. The database is in MS Excel format and can be downloaded free-of-charge from the INFOODS website http://www.fao.org/infoods/biodiversity/index_en.stm. It is intended to annually publish new editions, making these data available for national and regional food composition databases. This database could be used to raise the awareness, promote and investigate food biodiversity and help to better estimate nutrient intakes.",2012-09-13 +26569599,Parmbsc1: a refined force field for DNA simulations.,"We present parmbsc1, a force field for DNA atomistic simulation, which has been parameterized from high-level quantum mechanical data and tested for nearly 100 systems (representing a total simulation time of ∼ 140 μs) covering most of DNA structural space. Parmbsc1 provides high-quality results in diverse systems. Parameters and trajectories are available at http://mmb.irbbarcelona.org/ParmBSC1/.",2015-11-16 +25725090,SimSeq: a nonparametric approach to simulation of RNA-sequence datasets.,"

Motivation

RNA sequencing analysis methods are often derived by relying on hypothetical parametric models for read counts that are not likely to be precisely satisfied in practice. Methods are often tested by analyzing data that have been simulated according to the assumed model. This testing strategy can result in an overly optimistic view of the performance of an RNA-seq analysis method.

Results

We develop a data-based simulation algorithm for RNA-seq data. The vector of read counts simulated for a given experimental unit has a joint distribution that closely matches the distribution of a source RNA-seq dataset provided by the user. We conduct simulation experiments based on the negative binomial distribution and our proposed nonparametric simulation algorithm. We compare performance between the two simulation experiments over a small subset of statistical methods for RNA-seq analysis available in the literature. We use as a benchmark the ability of a method to control the false discovery rate. Not surprisingly, methods based on parametric modeling assumptions seem to perform better with respect to false discovery rate control when data are simulated from parametric models rather than using our more realistic nonparametric simulation strategy.

Availability and implementation

The nonparametric simulation algorithm developed in this article is implemented in the R package SimSeq, which is freely available under the GNU General Public License (version 2 or later) from the Comprehensive R Archive Network (http://cran.rproject.org/).

Contact

sgbenidt@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-26 +26040460,quantro: a data-driven approach to guide the choice of an appropriate normalization method.,"Normalization is an essential step in the analysis of high-throughput data. Multi-sample global normalization methods, such as quantile normalization, have been successfully used to remove technical variation. However, these methods rely on the assumption that observed global changes across samples are due to unwanted technical variability. Applying global normalization methods has the potential to remove biologically driven variation. Currently, it is up to the subject matter experts to determine if the stated assumptions are appropriate. Here, we propose a data-driven alternative. We demonstrate the utility of our method (quantro) through examples and simulations. A software implementation is available from http://www.bioconductor.org/packages/release/bioc/html/quantro.html .",2015-06-04 +24319146,The Catalytic Site Atlas 2.0: cataloging catalytic sites and residues identified in enzymes.,"Understanding which are the catalytic residues in an enzyme and what function they perform is crucial to many biology studies, particularly those leading to new therapeutics and enzyme design. The original version of the Catalytic Site Atlas (CSA) (http://www.ebi.ac.uk/thornton-srv/databases/CSA) published in 2004, which catalogs the residues involved in enzyme catalysis in experimentally determined protein structures, had only 177 curated entries and employed a simplistic approach to expanding these annotations to homologous enzyme structures. Here we present a new version of the CSA (CSA 2.0), which greatly expands the number of both curated (968) and automatically annotated catalytic sites in enzyme structures, utilizing a new method for annotation transfer. The curated entries are used, along with the variation in residue type from the sequence comparison, to generate 3D templates of the catalytic sites, which in turn can be used to find catalytic sites in new structures. To ease the transfer of CSA annotations to other resources a new ontology has been developed: the Enzyme Mechanism Ontology, which has permitted the transfer of annotations to Mechanism, Annotation and Classification in Enzymes (MACiE) and UniProt Knowledge Base (UniProtKB) resources. The CSA database schema has been re-designed and both the CSA data and search capabilities are presented in a new modern web interface.",2013-12-06 +26792120,Protein Sequence Annotation Tool (PSAT): a centralized web-based meta-server for high-throughput sequence annotations.,"

Background

Here we introduce the Protein Sequence Annotation Tool (PSAT), a web-based, sequence annotation meta-server for performing integrated, high-throughput, genome-wide sequence analyses. Our goals in building PSAT were to (1) create an extensible platform for integration of multiple sequence-based bioinformatics tools, (2) enable functional annotations and enzyme predictions over large input protein fasta data sets, and (3) provide a web interface for convenient execution of the tools.

Results

In this paper, we demonstrate the utility of PSAT by annotating the predicted peptide gene products of Herbaspirillum sp. strain RV1423, importing the results of PSAT into EC2KEGG, and using the resulting functional comparisons to identify a putative catabolic pathway, thereby distinguishing RV1423 from a well annotated Herbaspirillum species. This analysis demonstrates that high-throughput enzyme predictions, provided by PSAT processing, can be used to identify metabolic potential in an otherwise poorly annotated genome.

Conclusions

PSAT is a meta server that combines the results from several sequence-based annotation and function prediction codes, and is available at http://psat.llnl.gov/psat/. PSAT stands apart from other sequence-based genome annotation systems in providing a high-throughput platform for rapid de novo enzyme predictions and sequence annotations over large input protein sequence data sets in FASTA. PSAT is most appropriately applied in annotation of large protein FASTA sets that may or may not be associated with a single genome.",2016-01-20 +23095498,"The human ""magnesome"": detecting magnesium binding sites on human proteins.","

Background

Magnesium research is increasing in molecular medicine due to the relevance of this ion in several important biological processes and associated molecular pathogeneses. It is still difficult to predict from the protein covalent structure whether a human chain is or not involved in magnesium binding. This is mainly due to little information on the structural characteristics of magnesium binding sites in proteins and protein complexes. Magnesium binding features, differently from those of other divalent cations such as calcium and zinc, are elusive. Here we address a question that is relevant in protein annotation: how many human proteins can bind Mg2+? Our analysis is performed taking advantage of the recently implemented Bologna Annotation Resource (BAR-PLUS), a non hierarchical clustering method that relies on the pair wise sequence comparison of about 14 millions proteins from over 300.000 species and their grouping into clusters where annotation can safely be inherited after statistical validation.

Results

After cluster assignment of the latest version of the human proteome, the total number of human proteins for which we can assign putative Mg binding sites is 3,751. Among these proteins, 2,688 inherit annotation directly from human templates and 1,063 inherit annotation from templates of other organisms. Protein structures are highly conserved inside a given cluster. Transfer of structural properties is possible after alignment of a given sequence with the protein structures that characterise a given cluster as obtained with a Hidden Markov Model (HMM) based procedure. Interestingly a set of 370 human sequences inherit Mg2+ binding sites from templates sharing less than 30% sequence identity with the template.

Conclusion

We describe and deliver the ""human magnesome"", a set of proteins of the human proteome that inherit putative binding of magnesium ions. With our BAR-hMG, 251 clusters including 1,341 magnesium binding protein structures corresponding to 387 sequences are sufficient to annotate some 13,689 residues in 3,751 human sequences as ""magnesium binding"". Protein structures act therefore as three dimensional seeds for structural and functional annotation of human sequences. The data base collects specifically all the human proteins that can be annotated according to our procedure as ""magnesium binding"", the corresponding structures and BAR+ clusters from where they derive the annotation (http://bar.biocomp.unibo.it/mg).",2012-09-07 +26363178,"DIVAS: a centralized genetic variant repository representing 150,000 individuals from multiple disease cohorts.","

Motivation

A plethora of sequenced and genotyped disease cohorts is available to the biomedical research community, spread across many portals and represented in various formats.

Results

We have gathered several large studies, including GERA and GRU, and computed population- and disease-specific genetic variant frequencies. In total, our portal provides fast access to genetic variants observed in 84,928 individuals from 39 disease populations. We also include 66,335 controls, such as the 1000 Genomes and Scripps Wellderly.

Conclusion

Combining multiple studies helps validate disease-associated variants in each underlying data set, detect potential false positives using frequencies of control populations, and identify novel candidate disease-causing alterations in known or suspected genes.

Availability and implementation

https://rvs.u.hpc.mssm.edu/divas

Contact

rong.chen@mssm.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-12 +27343609,From continuous flow analysis to programmable Flow Injection techniques. A history and tutorial of emerging methodologies.,"Automation of reagent based assays, also known as Flow Analysis, is based on sample processing, in which a sample flows towards and through a detector for monitoring of its components. The Achilles heel of this methodology is that the majority of FA techniques use constant continuous forward flow to transport the sample - an approach which continually consumes reagents and generates chemical waste. Therefore the purpose of this report is to highlight recent developments of flow programming that not only save reagents, but also lead by means of advanced sample processing to selective and sensitive assays based on stop flow measurement. Flow programming combined with a novel approach to data harvesting yields a novel approach to single standard calibration, and avoids interference caused by refractive index. Finally, flow programming is useful for sample preparation, such as rapid, extensive sample dilution. The principles are illustrated by selected references to an available online tutorial http://www.flowinjectiontutorial,com/.",2016-05-26 +26677965,DeNovo: virus-host sequence-based protein-protein interaction prediction.,"

Motivation

Can we predict protein-protein interactions (PPIs) of a novel virus with its host? Three major problems arise: the lack of known PPIs for that virus to learn from, the cost of learning about its proteins and the sequence dissimilarity among viral families that makes most methods inapplicable or inefficient. We develop DeNovo, a sequence-based negative sampling and machine learning framework that learns from PPIs of different viruses to predict for a novel one, exploiting the shared host proteins. We tested DeNovo on PPIs from different domains to assess generalization.

Results

By solving the challenge of generating less noisy negative interactions, DeNovo achieved accuracy up to 81 and 86% when predicting PPIs of viral proteins that have no and distant sequence similarity to the ones used for training, receptively. This result is comparable to the best achieved in single virus-host and intra-species PPI prediction cases. Thus, we can now predict PPIs for virtually any virus infecting human. DeNovo generalizes well; it achieved near optimal accuracy when tested on bacteria-human interactions.

Availability and implementation

Code, data and additional supplementary materials needed to reproduce this study are available at: https://bioinformatics.cs.vt.edu/~alzahraa/denovo

Contact

alzahraa@vt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-16 +27768687,PreTIS: A Tool to Predict Non-canonical 5' UTR Translational Initiation Sites in Human and Mouse.,"Translation of mRNA sequences into proteins typically starts at an AUG triplet. In rare cases, translation may also start at alternative non-AUG codons located in the annotated 5' UTR which leads to an increased regulatory complexity. Since ribosome profiling detects translational start sites at the nucleotide level, the properties of these start sites can then be used for the statistical evaluation of functional open reading frames. We developed a linear regression approach to predict in-frame and out-of-frame translational start sites within the 5' UTR from mRNA sequence information together with their translation initiation confidence. Predicted start codons comprise AUG as well as near-cognate codons. The underlying datasets are based on published translational start sites for human HEK293 and mouse embryonic stem cells that were derived by the original authors from ribosome profiling data. The average prediction accuracy of true vs. false start sites for HEK293 cells was 80%. When applied to mouse mRNA sequences, the same model predicted translation initiation sites observed in mouse ES cells with an accuracy of 76%. Moreover, we illustrate the effect of in silico mutations in the flanking sequence context of a start site on the predicted initiation confidence. Our new webservice PreTIS visualizes alternative start sites and their respective ORFs and predicts their ability to initiate translation. Solely, the mRNA sequence is required as input. PreTIS is accessible at http://service.bioinformatik.uni-saarland.de/pretis.",2016-10-21 +27317420,How important is gametocyte clearance after malaria therapy?,"There has been increasing interest in the role of malaria drugs in preventing malaria transmission from humans to mosquitoes, which would help augment malaria control and elimination strategies. Nevertheless, only one stage in the malaria parasite life cycle, the gametocyte, is infectious to mosquitoes. The Worldwide Antimalarial Resistance Network (WWARN) have analyzed data from 48,840 patients from 141 clinical trials in order to define the nature and determinants of gametocyte clearance following artemisinin combination treatment (ACT) for symptomatic malaria infections. However, the presence of gametocytes does not always predict their infectivity, meaning that the microscopy-based methods used by the WWARN investigators represent an imperfect surrogate marker of transmissibility. Their findings, that some ACTs clear gametocytes faster than others, should be interpreted in light of these limitations and important gaps in our understanding of the biology and epidemiology of malaria transmission.Please see related article: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-016-0621-7.",2016-06-18 +24358444,Oligotyping: Differentiating between closely related microbial taxa using 16S rRNA gene data. ,"Bacteria comprise the most diverse domain of life on Earth, where they occupy nearly every possible ecological niche and play key roles in biological and chemical processes. Studying the composition and ecology of bacterial ecosystems and understanding their function is of prime importance. High-throughput sequencing technologies enable nearly comprehensive descriptions of bacterial diversity through 16S ribosomal RNA gene amplicons. Analyses of these communities generally rely upon taxonomic assignments through reference databases, or clustering approaches using de facto sequence similarity thresholds to identify operational taxonomic units. However, these methods often fail to resolve ecologically meaningful differences between closely related organisms in complex microbial datasets.In this paper we describe oligotyping, a novel supervised computational method that allows researchers to investigate the diversity of closely related but distinct bacterial organisms in final operational taxonomic units identified in environmental datasets through 16S ribosomal RNA gene data by the canonical approaches.Our analysis of two datasets from two distinct environments demonstrates the capacity of oligotyping at discriminating distinct microbial populations of ecological importance.Oligotyping can resolve the distribution of closely related organisms across environments and unveil previously overlooked ecological patterns for microbial communities. The URL http://oligotyping.org offers an open-source software pipeline for oligotyping.",2013-12-01 +26106259,MITOSCISSOR: A Useful Tool for Auto-Assembly of Mitogenomic Datasets in the Evolutionary Analysis of Fishes.,"As a result of the development of rapid and efficient sequencing technologies, complete sequences of numerous mitochondrial genomes are now available. Mitochondrial genomes have been widely used to evaluate relationships between species in several fields, including evolutionary and population genetics, as well as in forensic identification and in the study of mitochondrial diseases in humans. However, the creation of mitochondrial genomes is extremely time consuming. In this paper, we present a new tool, MITOSCISSOR, which is a rapid method for parsing and formatting dozens of complete mitochondrial genome sequences. With the aid of MITOSCISSOR, complete mitochondrial genome sequences of 103 species from Tetraodontiformes (a difficult-to-classify order of fish) were easily parsed and formatted. It typically takes several days to produce similar results when relying upon manual editing. This tool could open the .gb file of Genbank directly and help us to use existing mitogenomic data. In the present study, we established the first clear and robust molecular phylogeny of 103 tetraodontiform fishes, a goal that has long eluded ichthyologists. MITOSCISSOR greatly increases the efficiency with which DNA data files can be parsed and annotated, and thus has the potential to greatly facilitate evolutionary analysis using mitogenomic data. This software is freely available for noncommercial users at http://www.filedropper.com/mitoscissor.",2015-06-14 +26476781,A-DaGO-Fun: an adaptable Gene Ontology semantic similarity-based functional analysis tool.,"

Summary

Gene Ontology (GO) semantic similarity measures are being used for biological knowledge discovery based on GO annotations by integrating biological information contained in the GO structure into data analyses. To empower users to quickly compute, manipulate and explore these measures, we introduce A-DaGO-Fun (ADaptable Gene Ontology semantic similarity-based Functional analysis). It is a portable software package integrating all known GO information content-based semantic similarity measures and relevant biological applications associated with these measures. A-DaGO-Fun has the advantage not only of handling datasets from the current high-throughput genome-wide applications, but also allowing users to choose the most relevant semantic similarity approach for their biological applications and to adapt a given module to their needs.

Availability and implementation

A-DaGO-Fun is freely available to the research community at http://web.cbio.uct.ac.za/ITGOM/adagofun. It is implemented in Linux using Python under free software (GNU General Public Licence).

Contact

gmazandu@cbio.uct.ac.za or Nicola.Mulder@uct.ac.za

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-17 +23193280,StreptomeDB: a resource for natural compounds isolated from Streptomyces species.,"Bacteria from the genus Streptomyces are very important for the production of natural bioactive compounds such as antibiotic, antitumour or immunosuppressant drugs. Around two-thirds of all known natural antibiotics are produced by these bacteria. An enormous quantity of crucial data related to this genus has been generated and published, but so far no freely available and comprehensive database exists. Here, we present StreptomeDB (http://www.pharmaceutical-bioinformatics.de/streptomedb/). To the best of our knowledge, this is the largest database of natural products isolated from Streptomyces. It contains >2400 unique and diverse compounds from >1900 different Streptomyces strains and substrains. In addition to names and molecular structures of the compounds, information about source organisms, references, biological role, activities and synthesis routes (e.g. polyketide synthase derived and non-ribosomal peptides derived) is included. Data can be accessed through queries on compound names, chemical structures or organisms. Extraction from the literature was performed through automatic text mining of thousands of articles from PubMed, followed by manual curation. All annotated compound structures can be downloaded from the website and applied for in silico screenings for identifying new active molecules with undiscovered properties.",2012-11-28 +23865838,A fast weak motif-finding algorithm based on community detection in graphs.,"

Background

Identification of transcription factor binding sites (also called 'motif discovery') in DNA sequences is a basic step in understanding genetic regulation. Although many successful programs have been developed, the problem is far from being solved on account of diversity in gene expression/regulation and the low specificity of binding sites. State-of-the-art algorithms have their own constraints (e.g., high time or space complexity for finding long motifs, low precision in identification of weak motifs, or the OOPS constraint: one occurrence of the motif instance per sequence) which limit their scope of application.

Results

In this paper, we present a novel and fast algorithm we call TFBSGroup. It is based on community detection from a graph and is used to discover long and weak (l,d) motifs under the ZOMOPS constraint (zero, one or multiple occurrence(s) of the motif instance(s) per sequence), where l is the length of a motif and d is the maximum number of mutations between a motif instance and the motif itself. Firstly, TFBSGroup transforms the (l, d) motif search in sequences to focus on the discovery of dense subgraphs within a graph. It identifies these subgraphs using a fast community detection method for obtaining coarse-grained candidate motifs. Next, it greedily refines these candidate motifs towards the true motif within their own communities. Empirical studies on synthetic (l, d) samples have shown that TFBSGroup is very efficient (e.g., it can find true (18, 6), (24, 8) motifs within 30 seconds). More importantly, the algorithm has succeeded in rapidly identifying motifs in a large data set of prokaryotic promoters generated from the Escherichia coli database RegulonDB. The algorithm has also accurately identified motifs in ChIP-seq data sets for 12 mouse transcription factors involved in ES cell pluripotency and self-renewal.

Conclusions

Our novel heuristic algorithm, TFBSGroup, is able to quickly identify nearly exact matches for long and weak (l, d) motifs in DNA sequences under the ZOMOPS constraint. It is also capable of finding motifs in real applications. The source code for TFBSGroup can be obtained from http://bioinformatics.bioengr.uic.edu/TFBSGroup/.",2013-07-17 +26973378,"Nonproliferative and Proliferative Lesions of the Gastrointestinal Tract, Pancreas and Salivary Glands of the Rat and Mouse.","The INHAND (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) project is a joint initiative of the Societies of Toxicologic Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP), and North America (STP) to develop an internationally accepted nomenclature and diagnostic criteria for nonproliferative and proliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature and diagnostic criteria for classifying lesions in the digestive system including the salivary glands and the exocrine pancreas of laboratory rats and mice. Most lesions are illustrated by color photomicrographs. The standardized nomenclature, the diagnostic criteria, and the photomicrographs are also available electronically on the Internet (http://www.goreni.org/). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous and age related lesions as well as lesions induced by exposure to test items. Relevant infectious and parasitic lesions are included as well. A widely accepted and utilized international harmonization of nomenclature and diagnostic criteria for the digestive system will decrease misunderstandings among regulatory and scientific research organizations in different countries and provide a common language to increase and enrich international exchanges of information among toxicologists and pathologists.",2016-02-13 +32355508,"A Galaxy-based bioinformatics pipeline for optimised, streamlined microsatellite development from Illumina next-generation sequencing data.","Microsatellites are useful tools for ecologists and conservationist biologists, but are taxa-specific and traditionally expensive and time-consuming to develop. New methods using next-generation sequencing (NGS) have reduced these problems, but the plethora of software available for processing NGS data may cause confusion and difficulty for researchers new to the field of bioinformatics. We developed a bioinformatics pipeline for microsatellite development from Illumina paired-end sequences, which is packaged in the open-source bioinformatics tool Galaxy. This optimises and streamlines the design of a microsatellite panel and provides a user-friendly graphical user interface. The pipeline utilises existing programs along with our own novel program and wrappers to: quality-filter and trim reads (Trimmomatic); generate sequence quality reports (FastQC); identify potentially-amplifiable microsatellite loci (Pal_finder); design primers (Primer3); assemble pairs of reads to enhance marker amplification success rates (PANDAseq); and filter optimal loci (Pal_filter). The complete pipeline is freely available for use via a pre-configured Galaxy instance, accessible at https://palfinder.ls.manchester.ac.uk.",2016-08-02 +23245398,GWIDD: a comprehensive resource for genome-wide structural modeling of protein-protein interactions.,"Protein-protein interactions are a key component of life processes. The knowledge of the three-dimensional structure of these interactions is important for understanding protein function. Genome-Wide Docking Database (http://gwidd.bioinformatics.ku.edu) offers an extensive source of data for structural studies of protein-protein complexes on genome scale. The current release of the database combines the available experimental data on the structure and characteristics of protein interactions with structural modeling of protein complexes for 771 organisms spanned over the entire universe of life from viruses to humans. The interactions are stored in a relational database with user-friendly interface that includes various search options. The search results can be interactively previewed; the structures, downloaded, along with the interaction characteristics.",2012-07-11 +27402703,"Pan-Specific Prediction of Peptide-MHC Class I Complex Stability, a Correlate of T Cell Immunogenicity.","Binding of peptides to MHC class I (MHC-I) molecules is the most selective event in the processing and presentation of Ags to CTL, and insights into the mechanisms that govern peptide-MHC-I binding should facilitate our understanding of CTL biology. Peptide-MHC-I interactions have traditionally been quantified by the strength of the interaction, that is, the binding affinity, yet it has been shown that the stability of the peptide-MHC-I complex is a better correlate of immunogenicity compared with binding affinity. In this study, we have experimentally analyzed peptide-MHC-I complex stability of a large panel of human MHC-I allotypes and generated a body of data sufficient to develop a neural network-based pan-specific predictor of peptide-MHC-I complex stability. Integrating the neural network predictors of peptide-MHC-I complex stability with state-of-the-art predictors of peptide-MHC-I binding is shown to significantly improve the prediction of CTL epitopes. The method is publicly available at http://www.cbs.dtu.dk/services/NetMHCstabpan.",2016-07-08 +25042682,Bringing biocuration to China.,"Biocuration involves adding value to biomedical data by the processes of standardization, quality control and information transferring (also known as data annotation). It enhances data interoperability and consistency, and is critical in translating biomedical data into scientific discovery. Although China is becoming a leading scientific data producer, biocuration is still very new to the Chinese biomedical data community. In fact, there currently lacks an equivalent acknowledged word in Chinese for the word ""curation"". Here we propose its Chinese translation as (Pinyin) ""shěn biān"", based on its implied meanings taken by biomedical data community. The 8th International Biocuration Conference to be held in China (http://biocuration2015.tilsi.org) next year bears the potential to raise the general awareness in China of the significant role of biocuration in scientific discovery. However, challenges are ahead in its implementation.",2014-07-17 +24297252,eggNOG v4.0: nested orthology inference across 3686 organisms.,"With the increasing availability of various 'omics data, high-quality orthology assignment is crucial for evolutionary and functional genomics studies. We here present the fourth version of the eggNOG database (available at http://eggnog.embl.de) that derives nonsupervised orthologous groups (NOGs) from complete genomes, and then applies a comprehensive characterization and analysis pipeline to the resulting gene families. Compared with the previous version, we have more than tripled the underlying species set to cover 3686 organisms, keeping track with genome project completions while prioritizing the inclusion of high-quality genomes to minimize error propagation from incomplete proteome sets. Major technological advances include (i) a robust and scalable procedure for the identification and inclusion of high-quality genomes, (ii) provision of orthologous groups for 107 different taxonomic levels compared with 41 in eggNOGv3, (iii) identification and annotation of particularly closely related orthologous groups, facilitating analysis of related gene families, (iv) improvements of the clustering and functional annotation approach, (v) adoption of a revised tree building procedure based on the multiple alignments generated during the process and (vi) implementation of quality control procedures throughout the entire pipeline. As in previous versions, eggNOGv4 provides multiple sequence alignments and maximum-likelihood trees, as well as broad functional annotation. Users can access the complete database of orthologous groups via a web interface, as well as through bulk download.",2013-12-01 +22621763,The Ontology for Parasite Lifecycle (OPL): towards a consistent vocabulary of lifecycle stages in parasitic organisms.,"

Background

Genome sequencing of many eukaryotic pathogens and the volume of data available on public resources have created a clear requirement for a consistent vocabulary to describe the range of developmental forms of parasites. Consistent labeling of experimental data and external data, in databases and the literature, is essential for integration, cross database comparison, and knowledge discovery. The primary objective of this work was to develop a dynamic and controlled vocabulary that can be used for various parasites. The paper describes the Ontology for Parasite Lifecycle (OPL) and discusses its application in parasite research.

Results

The OPL is based on the Basic Formal Ontology (BFO) and follows the rules set by the OBO Foundry consortium. The first version of the OPL models complex life cycle stage details of a range of parasites, such as Trypanosoma sp., Leishmaniasp., Plasmodium sp., and Shicstosoma sp. In addition, the ontology also models necessary contextual details, such as host information, vector information, and anatomical locations. OPL is primarily designed to serve as a reference ontology for parasite life cycle stages that can be used for database annotation purposes and in the lab for data integration or information retrieval as exemplified in the application section below.

Conclusion

OPL is freely available at http://purl.obolibrary.org/obo/opl.owl and has been submitted to the BioPortal site of NCBO and to the OBO Foundry. We believe that database and phenotype annotations using OPL will help run fundamental queries on databases to know more about gene functions and to find intervention targets for various parasites. The OPL is under continuous development and new parasites and/or terms are being added.",2012-05-23 +24217912,The Human Phenotype Ontology project: linking molecular biology and disease through phenotype data.,"The Human Phenotype Ontology (HPO) project, available at http://www.human-phenotype-ontology.org, provides a structured, comprehensive and well-defined set of 10,088 classes (terms) describing human phenotypic abnormalities and 13,326 subclass relations between the HPO classes. In addition we have developed logical definitions for 46% of all HPO classes using terms from ontologies for anatomy, cell types, function, embryology, pathology and other domains. This allows interoperability with several resources, especially those containing phenotype information on model organisms such as mouse and zebrafish. Here we describe the updated HPO database, which provides annotations of 7,278 human hereditary syndromes listed in OMIM, Orphanet and DECIPHER to classes of the HPO. Various meta-attributes such as frequency, references and negations are associated with each annotation. Several large-scale projects worldwide utilize the HPO for describing phenotype information in their datasets. We have therefore generated equivalence mappings to other phenotype vocabularies such as LDDB, Orphanet, MedDRA, UMLS and phenoDB, allowing integration of existing datasets and interoperability with multiple biomedical resources. We have created various ways to access the HPO database content using flat files, a MySQL database, and Web-based tools. All data and documentation on the HPO project can be found online.",2013-11-11 +27626500,iOri-Human: identify human origin of replication by incorporating dinucleotide physicochemical properties into pseudo nucleotide composition.,"The initiation of replication is an extremely important process in DNA life cycle. Given an uncharacterized DNA sequence, can we identify where its origin of replication (ORI) is located? It is no doubt a fundamental problem in genome analysis. Particularly, with the rapid development of genome sequencing technology that results in a huge amount of sequence data, it is highly desired to develop computational methods for rapidly and effectively identifying the ORIs in these genomes. Unfortunately, by means of the existing computational methods, such as sequence alignment or kmer strategies, it could hardly achieve decent success rates. To address this problem, we developed a predictor called ""iOri-Human"". Rigorous jackknife tests have shown that its overall accuracy and stability in identifying human ORIs are over 75% and 50%, respectively. In the predictor, it is through the pseudo nucleotide composition (an extension of pseudo amino acid composition) that 96 physicochemical properties for the 16 possible constituent dinucleotides have been incorporated to reflect the global sequence patterns in DNA as well as its local sequence patterns. Moreover, a user-friendly web-server for iOri-Human has been established at http://lin.uestc.edu.cn/server/iOri-Human.html, by which users can easily get their desired results without the need to through the complicated mathematics involved.",2016-10-01 +,The Supramap project: linking pathogen genomes with geography to fight emergent infectious diseases,"Novel pathogens have the potential to become critical issues of national security, public health and economic welfare. As demonstrated by the response to Severe Acute Respiratory Syndrome (SARS) and influenza, genomic sequencing has become an important method for diagnosing agents of infectious disease. Despite the value of genomic sequences in characterizing novel pathogens, raw data on their own do not provide the information needed by public health officials and researchers. One must integrate knowledge of the genomes of pathogens with host biology and geography to understand the etiology of epidemics. To these ends, we have created an application called Supramap (http://supramap.osu.edu) to put information on the spread of pathogens and key mutations across time, space and various hosts into a geographic information system (GIS). To build this application, we created a web service for integrated sequence alignment and phylogenetic analysis as well as methods to describe the tree, mutations, and host shifts in Keyhole Markup Language (KML). We apply the application to 239 sequences of the polymerase basic 2 (PB2) gene of recent isolates of avian influenza (H5N1). We map a mutation, glutamic acid to lysine at position 627 in the PB2 protein (E627K), in H5N1 influenza that allows for increased replication of the virus in mammals. We use a statistical test to support the hypothesis of a correlation of E627K mutations with avian-mammalian host shifts but reject the hypothesis that lineages with E627K are moving westward. Data, instructions for use, and visualizations are included as supplemental materials at: http://supramap.osu.edu/sm/supramap/publications. © The Willi Hennig Society 2010.",2011-02-01 +26663558,Understanding continuous professional development participation and choice of mid-career general dental practitioners.,"

Objective

Participating in continuing professional development (CPD) activities is a requirement for dental practitioners to keep their skills and knowledge up to date. Understanding the ways dental practitioners engage with professional development and the impact on practice is not fully known (Eaton et al. 2011, http://www.gdc-uk.org/Aboutus/policy/Documents/Impact%20Of%20CPD%20In%20Dentistry.pdf). The aim of this study was to gain insights into the ways that dentists reflect on their professional development and what may be influencing their choices.

Method

Empirical qualitative data were collected by semi-structured interviewing of five mid-career dentists. Using grounded theory, the data were analysed for themes about CPD choice and participation.

Results

Three themes were identified as influences to dentists' choices of CPD with pragmatic considerations of how new learning could benefit their patients and their practices. Dental practitioners were influenced by the requirements of external regulatory bodies which they did not consider to necessarily improve practice.

Conclusion

Dentists working in primary care in the UK are undertaking CPD which is influenced by the pragmatic requirements of running a small business and to meet regulatory requirements. In this sample, dentists are not critically reflecting on their education needs when choosing their CPD activity. Protected learning time and organisational feedback and support are recommended as a way to promote more meaningful reflection on learning and to improve professional development.",2015-12-10 +26935399,Protein inference: A protein quantification perspective.,"In mass spectrometry-based shotgun proteomics, protein quantification and protein identification are two major computational problems. To quantify the protein abundance, a list of proteins must be firstly inferred from the raw data. Then the relative or absolute protein abundance is estimated with quantification methods, such as spectral counting. Until now, most researchers have been dealing with these two processes separately. In fact, the protein inference problem can be regarded as a special protein quantification problem in the sense that truly present proteins are those proteins whose abundance values are not zero. Some recent published papers have conceptually discussed this possibility. However, there is still a lack of rigorous experimental studies to test this hypothesis. In this paper, we investigate the feasibility of using protein quantification methods to solve the protein inference problem. Protein inference methods aim to determine whether each candidate protein is present in the sample or not. Protein quantification methods estimate the abundance value of each inferred protein. Naturally, the abundance value of an absent protein should be zero. Thus, we argue that the protein inference problem can be viewed as a special protein quantification problem in which one protein is considered to be present if its abundance is not zero. Based on this idea, our paper tries to use three simple protein quantification methods to solve the protein inference problem effectively. The experimental results on six data sets show that these three methods are competitive with previous protein inference algorithms. This demonstrates that it is plausible to model the protein inference problem as a special protein quantification task, which opens the door of devising more effective protein inference algorithms from a quantification perspective. The source codes of our methods are available at: http://code.google.com/p/protein-inference/.",2016-02-13 +22102587,SABIO-RK--database for biochemical reaction kinetics.,"SABIO-RK (http://sabio.h-its.org/) is a web-accessible database storing comprehensive information about biochemical reactions and their kinetic properties. SABIO-RK offers standardized data manually extracted from the literature and data directly submitted from lab experiments. The database content includes kinetic parameters in relation to biochemical reactions and their biological sources with no restriction on any particular set of organisms. Additionally, kinetic rate laws and corresponding equations as well as experimental conditions are represented. All the data are manually curated and annotated by biological experts, supported by automated consistency checks. SABIO-RK can be accessed via web-based user interfaces or automatically via web services that allow direct data access by other tools. Both interfaces support the export of the data together with its annotations in SBML (Systems Biology Markup Language), e.g. for import in modelling tools.",2011-11-18 +26537300,"CellProfiler Tracer: exploring and validating high-throughput, time-lapse microscopy image data.","

Background

Time-lapse analysis of cellular images is an important and growing need in biology. Algorithms for cell tracking are widely available; what researchers have been missing is a single open-source software package to visualize standard tracking output (from software like CellProfiler) in a way that allows convenient assessment of track quality, especially for researchers tuning tracking parameters for high-content time-lapse experiments. This makes quality assessment and algorithm adjustment a substantial challenge, particularly when dealing with hundreds of time-lapse movies collected in a high-throughput manner.

Results

We present CellProfiler Tracer, a free and open-source tool that complements the object tracking functionality of the CellProfiler biological image analysis package. Tracer allows multi-parametric morphological data to be visualized on object tracks, providing visualizations that have already been validated within the scientific community for time-lapse experiments, and combining them with simple graph-based measures for highlighting possible tracking artifacts.

Conclusions

CellProfiler Tracer is a useful, free tool for inspection and quality control of object tracking data, available from http://www.cellprofiler.org/tracer/.",2015-11-04 +26656649,Gene discovery for Mendelian conditions via social networking: de novo variants in KDM1A cause developmental delay and distinctive facial features.,"

Purpose

The pace of Mendelian gene discovery is slowed by the ""n-of-1 problem""-the difficulty of establishing the causality of a putatively pathogenic variant in a single person or family. Identification of an unrelated person with an overlapping phenotype and suspected pathogenic variant in the same gene can overcome this barrier, but it is often impeded by lack of a convenient or widely available way to share data on candidate variants/genes among families, clinicians, and researchers.

Methods

Social networking among families, clinicians, and researchers was used to identify three children with variants of unknown significance in KDM1A and similar phenotypes.

Results

De novo variants in KDM1A underlie a new syndrome characterized by developmental delay and distinctive facial features.

Conclusion

Social networking is a potentially powerful strategy to discover genes for rare Mendelian conditions, particularly those with nonspecific phenotypic features. To facilitate the efforts of families to share phenotypic and genomic information with each other, clinicians, and researchers, we developed the Repository for Mendelian Genomics Family Portal (RMD-FP; http://uwcmg.org/#/family). Design and development of MyGene2 (http://www.mygene2.org), a Web-based tool that enables families, clinicians, and researchers to search for gene matches based on analysis of phenotype and exome data deposited into the RMD-FP, is under way.Genet Med 18 8, 788-795.",2015-12-10 +,First Report of Fusarium Wilt Caused by Fusarium oxysporum f. sp. canariensis on Canary Island Date Palm in Texas and South Carolina,"Canary Island date palm (Phoenix canariensis) is native to the Canary Islands and widely grown throughout the world as an ornamental. At a home site in Austin, TX in May 2008 and a commercial site near Charleston, SC in December 2009, declining Canary Island date palms were observed. Symptoms included individual leaves with chlorotic or necrotic leaflets on one side of the leaf blade (one-sided wilt or death) and a distinct reddish brown stripe along the petiole and rachis. Cross-sections through the petiole or rachis exhibited discoloration of internal tissue. Fusarium oxysporum was isolated from the internal petiole or rachis tissue of each palm sample onto one-quarter-strength potato dextrose agar (PDA). Typical macroconidia in pale orange sporodochia, microconidia in false heads on short monophialides, and chlamydospores were observed (2). Macroconidia were mostly 3-septate, slightly curved, and ranged from 3.8 to 4.2 × 42.9 to 46.5 μm. Microconidia were single cell, oval to reniform, and ranged from 2.5 to 2.9 × 7.2 to 7.8 μm. Single-spore isolates grown on full-strength PDA (12-h light and 26°C) produced abundant white-to-pale lavender mycelia with a purple pigment in the agar. One isolate from each location (PLM-385B from Texas and PLM-511A from South Carolina) was selected for pathogenicity tests and molecular characterization. The translation elongation factor 1-α gene (EF-1α) was amplified in each isolate by PCR using the ef1 and ef2 primers (1). Products were sequenced and queried for similarity against the NCBI database and the FUSARIUM-ID database (http://isolate.fusariumdb.org/index.php) (1) using the BLAST search tool. In both databases, both isolates matched F. oxysporum f. sp. canariensis strain NRRL 26035 (GenBank Accession No. AF008485; FD_01211) at 100% sequence similarity. Sequences for PLM-385B and PLM-511A have been deposited in the NCBI database (GenBank Accession Nos. HM591537 and HM591538, respectively). Pathogenicity of these two isolates was tested on three-leaf Canary Island date palm seedlings. There were five replicate palms per isolate and control treatment. All potting mix was shaken from the roots and three groups of five seedlings were placed in small buckets. Twenty-five milliliters of a 106 conidia ml–1 suspension was pipetted down among the leaf bases and the excess drained onto the roots. Control palms received sterile water. Seedlings were covered with plastic for 48 h and then transplanted into separate growing containers. Ten weeks after inoculation, initial symptoms of a leaf wilt (off-color and folded over) were observed on some of the inoculated palms. After 4 months, all palms inoculated with PLM-511A were dead and three of the five palms inoculated with PLM-385B were dead. The pathogen was reisolated from diseased palms. All five control palms remained healthy. While the symptomatic palm in Texas had been in the home site approximately 2 years, which implied the palm could have been already infected when transplanted, the palm in South Carolina had been planted in 1990. To our knowledge, this is the first report of Fusarium wilt of Canary Island date palm in Texas and South Carolina. Previously in the United States, the disease had only been noted in California, Florida, and Nevada.",2011-03-01 +26868053,The Corvids Literature Database--500 years of ornithological research from a crow's perspective. ,"Corvids (Corvidae) play a major role in ornithological research. Because of their worldwide distribution, diversity and adaptiveness, they have been studied extensively. The aim of the Corvids Literature Database (CLD, http://www.corvids.de/cld) is to record all publications (citation format) on all extant and extinct Crows, Ravens, Jays and Magpies worldwide and tag them with specific keywords making them available for researchers worldwide. The self-maintained project started in 2006 and today comprises 8000 articles, spanning almost 500 years. The CLD covers publications from 164 countries, written in 36 languages and published by 8026 authors in 1503 journals (plus books, theses and other publications). Forty-nine percent of all records are available online as full-text documents or deposited in the physical CLD archive. The CLD contains 442 original corvid descriptions. Here, we present a metadata assessment of articles recorded in the CLD including a gap analysis and prospects for future research. Database URL: http://www.corvids.de/cld.",2016-02-11 +23239846,RadishBase: a database for genomics and genetics of radish.,"Radish is an economically important vegetable crop. During the past several years, large-scale genomics and genetics resources have been accumulated for this species. To store, query, analyze and integrate these radish resources efficiently, we have developed RadishBase (http://bioinfo.bti.cornell.edu/radish), a genomics and genetics database of radish. Currently the database contains radish mitochondrial genome sequences, expressed sequence tag (EST) and unigene sequences and annotations, biochemical pathways, EST-derived single nucleotide polymorphism (SNP) and simple sequence repeat (SSR) markers, and genetic maps. RadishBase is designed to enable users easily to retrieve and visualize biologically important information through a set of efficient query interfaces and analysis tools, including the BLAST search and unigene annotation query interfaces, and tools to classify unigenes functionally, to identify enriched gene ontology (GO) terms and to visualize genetic maps. A database containing radish pathways predicted from unigene sequences is also included in RadishBase. The tools and interfaces in RadishBase allow efficient mining of recently released and continually expanding large-scale radish genomics and genetics data sets, including the radish genome sequences and RNA-seq data sets.",2012-12-13 +26753741,MS3ALIGN: an efficient molecular surface aligner using the topology of surface curvature.,"

Background

Aligning similar molecular structures is an important step in the process of bio-molecular structure and function analysis. Molecular surfaces are simple representations of molecular structure that are easily constructed from various forms of molecular data such as 3D atomic coordinates (PDB) and Electron Microscopy (EM) data.

Methods

We present a Multi-Scale Morse-Smale Molecular-Surface Alignment tool, MS3ALIGN, which aligns molecular surfaces based on significant protrusions on the molecular surface. The input is a pair of molecular surfaces represented as triangle meshes. A key advantage of MS3ALIGN is computational efficiency that is achieved because it processes only a few carefully chosen protrusions on the molecular surface. Furthermore, the alignments are partial in nature and therefore allows for inexact surfaces to be aligned.

Results

The method is evaluated in four settings. First, we establish performance using known alignments with varying overlap and noise values. Second, we compare the method with SurfComp, an existing surface alignment method. We show that we are able to determine alignments reported by SurfComp, as well as report relevant alignments not found by SurfComp. Third, we validate the ability of MS3ALIGN to determine alignments in the case of structurally dissimilar binding sites. Fourth, we demonstrate the ability of MS3ALIGN to align iso-surfaces derived from cryo-electron microscopy scans.

Conclusions

We have presented an algorithm that aligns Molecular Surfaces based on the topology of surface curvature. A webserver and standalone software implementation of the algorithm available at http://vgl.serc.iisc.ernet.in/ms3align.",2016-01-12 +22139929,BioProject and BioSample databases at NCBI: facilitating capture and organization of metadata.,"As the volume and complexity of data sets archived at NCBI grow rapidly, so does the need to gather and organize the associated metadata. Although metadata has been collected for some archival databases, previously, there was no centralized approach at NCBI for collecting this information and using it across databases. The BioProject database was recently established to facilitate organization and classification of project data submitted to NCBI, EBI and DDBJ databases. It captures descriptive information about research projects that result in high volume submissions to archival databases, ties together related data across multiple archives and serves as a central portal by which to inform users of data availability. Concomitantly, the BioSample database is being developed to capture descriptive information about the biological samples investigated in projects. BioProject and BioSample records link to corresponding data stored in archival repositories. Submissions are supported by a web-based Submission Portal that guides users through a series of forms for input of rich metadata describing their projects and samples. Together, these databases offer improved ways for users to query, locate, integrate and interpret the masses of data held in NCBI's archival repositories. The BioProject and BioSample databases are available at http://www.ncbi.nlm.nih.gov/bioproject and http://www.ncbi.nlm.nih.gov/biosample, respectively.",2011-12-01 +28361694,An empirical fuzzy multifactor dimensionality reduction method for detecting gene-gene interactions.,"

Background

Detection of gene-gene interaction (GGI) is a key challenge towards solving the problem of missing heritability in genetics. The multifactor dimensionality reduction (MDR) method has been widely studied for detecting GGIs. MDR reduces the dimensionality of multi-factor by means of binary classification into high-risk (H) or low-risk (L) groups. Unfortunately, this simple binary classification does not reflect the uncertainty of H/L classification. Thus, we proposed Fuzzy MDR to overcome limitations of binary classification by introducing the degree of membership of two fuzzy sets H/L. While Fuzzy MDR demonstrated higher power than that of MDR, its performance is highly dependent on the several tuning parameters. In real applications, it is not easy to choose appropriate tuning parameter values.

Result

In this work, we propose an empirical fuzzy MDR (EF-MDR) which does not require specifying tuning parameters values. Here, we propose an empirical approach to estimating the membership degree that can be directly estimated from the data. In EF-MDR, the membership degree is estimated by the maximum likelihood estimator of the proportion of cases(controls) in each genotype combination. We also show that the balanced accuracy measure derived from this new membership function is a linear function of the standard chi-square statistics. This relationship allows us to perform the standard significance test using p-values in the MDR framework without permutation. Through two simulation studies, the power of the proposed EF-MDR is shown to be higher than those of MDR and Fuzzy MDR. We illustrate the proposed EF-MDR by analyzing Crohn's disease (CD) and bipolar disorder (BD) in the Wellcome Trust Case Control Consortium (WTCCC) dataset.

Conclusion

We propose an empirical Fuzzy MDR for detecting GGI using the maximum likelihood of the proportion of cases(controls) as the membership degree of the genotype combination. The program written in R for EF-MDR is available at http://statgen.snu.ac.kr/software/EF-MDR .",2017-03-14 +26528569,Systems Biology Markup Language (SBML) Level 2 Version 5: Structures and Facilities for Model Definitions.,"Computational models can help researchers to interpret data, understand biological function, and make quantitative predictions. The Systems Biology Markup Language (SBML) is a file format for representing computational models in a declarative form that can be exchanged between different software systems. SBML is oriented towards describing biological processes of the sort common in research on a number of topics, including metabolic pathways, cell signaling pathways, and many others. By supporting SBML as an input/output format, different tools can all operate on an identical representation of a model, removing opportunities for translation errors and assuring a common starting point for analyses and simulations. This document provides the specification for Version 5 of SBML Level 2. The specification defines the data structures prescribed by SBML as well as their encoding in XML, the eXtensible Markup Language. This specification also defines validation rules that determine the validity of an SBML document, and provides many examples of models in SBML form. Other materials and software are available from the SBML project web site, http://sbml.org.",2015-09-04 +21854616,A second-generation anchored genetic linkage map of the tammar wallaby (Macropus eugenii).,"

Background

The tammar wallaby, Macropus eugenii, a small kangaroo used for decades for studies of reproduction and metabolism, is the model Australian marsupial for genome sequencing and genetic investigations. The production of a more comprehensive cytogenetically-anchored genetic linkage map will significantly contribute to the deciphering of the tammar wallaby genome. It has great value as a resource to identify novel genes and for comparative studies, and is vital for the ongoing genome sequence assembly and gene ordering in this species.

Results

A second-generation anchored tammar wallaby genetic linkage map has been constructed based on a total of 148 loci. The linkage map contains the original 64 loci included in the first-generation map, plus an additional 84 microsatellite loci that were chosen specifically to increase coverage and assist with the anchoring and orientation of linkage groups to chromosomes. These additional loci were derived from (a) sequenced BAC clones that had been previously mapped to tammar wallaby chromosomes by fluorescence in situ hybridization (FISH), (b) End sequence from BACs subsequently FISH-mapped to tammar wallaby chromosomes, and (c) tammar wallaby genes orthologous to opossum genes predicted to fill gaps in the tammar wallaby linkage map as well as three X-linked markers from a published study. Based on these 148 loci, eight linkage groups were formed. These linkage groups were assigned (via FISH-mapped markers) to all seven autosomes and the X chromosome. The sex-pooled map size is 1402.4 cM, which is estimated to provide 82.6% total coverage of the genome, with an average interval distance of 10.9 cM between adjacent markers. The overall ratio of female/male map length is 0.84, which is comparable to the ratio of 0.78 obtained for the first-generation map.

Conclusions

Construction of this second-generation genetic linkage map is a significant step towards complete coverage of the tammar wallaby genome and considerably extends that of the first-generation map. It will be a valuable resource for ongoing tammar wallaby genetic research and assembling the genome sequence. The sex-pooled map is available online at http://compldb.angis.org.au/.",2011-08-19 +,#wheezing: A Content Analysis of Asthma-Related Tweets,"

Objective

We present a Content Analysis project using Natural Language Processing to aid in Twitter-based syndromic surveillance of Asthma.

Introduction

Recently, a growing number of studies have made use of Twitter to track the spread of infectious disease. These investigations show that there are reliable spikes in traffic related to keywords associated with the spread of infectious diseases like Influenza [1], as well as other Syndromes [2]. However, little research has been done using Social Media to monitor chronic conditions like Asthma, which do not spread from sufferer to sufferer. We therefore test the feasibility of using Twitter for Asthma surveillance, using techniques from NLP and machine learning to achieve a deeper understanding of what users Tweet about Asthma, rather than relying only on keyword search.

Methods

We retrieved a large volume of Tweets from the Twitter API. Search terms included “asthma,” and several misspellings of that word; terms for common medical devices associated with Asthma such as “inhaler” and “nebulizer”; and names of prescription drugs used to treat the condition, including “albuterol” and “Singulair.” A randomly sampled subset of these Tweets (N=3511) was annotated for content, based on an annotation scheme that coded for the following elements: the Experiencer of Asthma symptoms (Self, Family, Friend, Named Other, Unidentified, and All-Non-Self, which was the union of these last four categories); aspects of the type of information being conveyed by each Tweet (Medication, Triggers, Physical Activity, Contacting of a Medical Practitioner, Allergies, Questions, Suggestions, Information, News, Spam); as well as Negative Sentiment, Future temporality, and Non-English content. Further details on the annotation scheme used can be found at http://idiom.ucsd.edu/∼ggilling/annotation.pdf. Inter-annotator agreement on a subset of the Tweets (N=403) fell in an acceptable range for all categories (Cohen’s Kappa >0.6). Once annotation was complete, the Tweets’ texts were stemmed and converted into vectors of unigram and bigram counts. These were then stripped of sparse terms (all those words appearing in fewer than 1 in 200 Tweets), which left multi-dimensional vectors consisting of the counts of the remaining words in all Tweets. Statistical machine-learning classifiers including K-nearest neighbors, Naive Bayes and Support Vector Machines were then trained on the unigram and bigram models.

Results

SVM with 10-fold cross-validation achieved greatest prediction accuracy with the unigram model, as shown in Table 1. Categories that showed the greatest reduction in classification error using the unigram model were Non-English, Self, All-Non-Self, Medication, Symptoms and Spam. The majority of these categories showed very high Precision, as well as fairly high Recall for the unigram model. Unexpectedly, the bigram model faired far worse than the Unigram model, which suggests that individual words in these Tweets were more reliably predictive of content than pairs of words, which occurred less frequently.

Conclusions

Text-classification increases the utility of Twitter as a data-source for studying chronic conditions such as Asthma. Using these methods, we can automatically reject Tweets that are non-English or Spam. We can also determine who is experiencing symptoms: the Twitter user or another individual. Fairly simple models are able to predict with good certainty whether a user is talking about their Symptoms, their Medication, or Triggers for their Asthma, as well as whether they are expressing Negative sentiment about their condition. We demonstrate that Social Media such as Twitter is a promising means by which to conduct surveillance for chronic conditions such as Asthma.",2013-01-01 +28155654,Exploiting the recognition code for elucidating the mechanism of zinc finger protein-DNA interactions.,"

Background

Engineering zinc finger protein motifs for specific binding to double-stranded DNA is critical for targeted genome editing. Most existing tools for predicting DNA-binding specificity in zinc fingers are trained on data obtained from naturally occurring proteins, thereby skewing the predictions. Moreover, these mostly neglect the cooperativity exhibited by zinc fingers.

Methods

Here, we present an ab-initio method that is based on mutation of the key α-helical residues of individual fingers of the parent template for Zif-268 and its consensus sequence (PDB ID: 1AAY). In an attempt to elucidate the mechanism of zinc finger protein-DNA interactions, we evaluated and compared three approaches, differing in the amino acid mutations introduced in the Zif-268 parent template, and the mode of binding they try to mimic, i.e., modular and synergistic mode of binding.

Results

Comparative evaluation of the three strategies reveals that the synergistic mode of binding appears to mimic the ideal mechanism of DNA-zinc finger protein binding. Analysis of the predictions made by all three strategies indicate strong dependence of zinc finger binding specificity on the amino acid propensity and the position of a 3-bp DNA sub-site in the target DNA sequence. Moreover, the binding affinity of the individual zinc fingers was found to increase in the order Finger 1 < Finger 2 < Finger 3, thus confirming the cooperative effect.

Conclusions

Our analysis offers novel insights into the prediction of ZFPs for target DNA sequences and the approaches have been made available as an easy to use web server at http://web.iitd.ac.in/~sundar/zifpredict_ihbe.",2016-12-22 +22962466,EnrichNet: network-based gene set enrichment analysis.,"

Motivation

Assessing functional associations between an experimentally derived gene or protein set of interest and a database of known gene/protein sets is a common task in the analysis of large-scale functional genomics data. For this purpose, a frequently used approach is to apply an over-representation-based enrichment analysis. However, this approach has four drawbacks: (i) it can only score functional associations of overlapping gene/proteins sets; (ii) it disregards genes with missing annotations; (iii) it does not take into account the network structure of physical interactions between the gene/protein sets of interest and (iv) tissue-specific gene/protein set associations cannot be recognized.

Results

To address these limitations, we introduce an integrative analysis approach and web-application called EnrichNet. It combines a novel graph-based statistic with an interactive sub-network visualization to accomplish two complementary goals: improving the prioritization of putative functional gene/protein set associations by exploiting information from molecular interaction networks and tissue-specific gene expression data and enabling a direct biological interpretation of the results. By using the approach to analyse sets of genes with known involvement in human diseases, new pathway associations are identified, reflecting a dense sub-network of interactions between their corresponding proteins.

Availability

EnrichNet is freely available at http://www.enrichnet.org.

Contact

Natalio.Krasnogor@nottingham.ac.uk, reinhard.schneider@uni.lu or avalencia@cnio.es

Supplementary information

Supplementary data are available at Bioinformatics Online.",2012-09-01 +28289827,Correlation of Prediction and Actual Outcome of Three-Dimensional Simulation in Breast Augmentation Using a Cloud-Based Program.,"

Background

Breast augmentation is among the most frequently performed cosmetic plastic surgeries. Providing patients with ""realistic"" 3D simulations of breast augmentation outcomes is becoming increasingly common. Until recently, such programs were costly and required significant equipment, training, and office space. New simple user-friendly cloud-based programs have been developed, but to date there remains a paucity of objective evidence comparing these 3D simulations with the post-operative outcomes.

Objectives

To determine the aesthetic similarity between pre-operative 3D simulation generated by Crisalix and real post-operative outcomes.

Methods

A retrospective review of 20 patients receiving bilateral breast augmentation was conducted comparing 6-month post-operative outcomes with 3D simulation using Crisalix software. Similarities between post-operative and simulated images were measured by three attending plastic surgeons and ten plastic surgery residents using a series of parameters.

Results

Assessment reveals similarity between the 3D simulation and 6-month post-operative images for overall appearance, breast height, breast width, breast volume, breast projection, and nipple correction. Crisalix software generated more representative simulations for symmetric breasts than for tuberous or ptotic breasts. Comparison of overall aesthetic outcome to simulation showed that the post-operative outcome was more appealing for the symmetric and tuberous breasts and less appealing for the ptotic breasts.

Conclusions

Our data suggest that Crisalix offers a good overall 3D simulated image of post-operative breast augmentation outcomes. Improvements to the simulation of the post-operative outcomes for ptotic and tuberous breasts would result in greater predictive capabilities of Crisalix. Collectively, Crisalix offers good predictive simulations for symmetric breasts.

Level of evidence iv

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266 .",2017-03-13 +20949389,The AIDS and Cancer Specimen Resource.,"The AIDS and Cancer Specimen Resource (ACSR) is a cooperative agreement among the United States National Cancer Institute (NCI) (Office of the Director, Office of HIV and AIDS Malignancy (OHAM)) and regional US consortia, University of California, San Francisco (West Coast), George Washington University (East Coast), and The Ohio State University (Mid-Region). The ACSR's main objective is to collect, preserve, and disperse HIV-related tissues and biologic fluids along with clinical data to qualified investigators with a focus on HIV/AIDS-related malignancies. The ACSR biorepository has more than 265,000 human HIV-positive and control samples available from 39 processing types, 16 specimen types, and 52 anatomical site types. These HIV-infected biological fluids and tissues are made available to funded approved investigators at no fee. Technical support such as HIV DNA identification in tissues and tissue microarray (TMA) blocks are available to assist approved investigators. Research needs may be filled through ACSR cooperative arrangements when not met by currently banked material. Those participating with the ACSR are expected to share their research findings with the scientific community. Some 117 abstract/poster and podium reports at national and international scientific meetings and 94 publications have been contributed to the scientific literature (as of 2010). Investigators can browse the ACSR Internet site at http://acsr.ucsf.edu for biospecimens to support their scientific initiatives, including basic, translational, biomarker discovery, and molecular epidemiology studies.",2011-01-01 +23651891,Has improved hand hygiene compliance reduced the risk of hospital-acquired infections among hospitalized patients in Ontario? Analysis of publicly reported patient safety data from 2008 to 2011.,"

Design

 Prospective, observational, ecological, time series, cross-sectional study examining the association between hand hygiene compliance (HHC) rates and the incidence of hospital-acquired infections.

Setting

 Acute care hospitals (N = 166) located in the province of Ontario, Canada.

Methods

 All data were extracted from the Ontario patient safety indicator database ( http://www.hqontario.ca/public-reporting/patient-safety). Complete data were available for 166 acute care hospitals from October 1, 2008, to December 31, 2011. The rates of Clostridium difficile infection (CDI) are reported monthly, methicillin-resistant Staphylococcus aureus (MRSA) bacteremia quarterly, and HHC rates yearly. Trends and associations for each indicator were evaluated by ordinary least squares regression (HHC), zero-inflated Poisson regression (MRSA bacteremia), or Poisson regression (CDI). Dependent variables included in the regression analyses were extracted from the same database and included year, healthcare region, and type of hospital (teaching or small or large community).

Results

Compared to those in 2008, reported HHC rates improved every year both before and after environment/patient contact (range, 10.6%-25.3%). Compared to those in 2008, there was no corresponding change in the rates of MRSA bacteremia; however, the rates of CDI decreased in 2009 but were not statistically significantly decreased from baseline in either 2010 or 2011. No consistent association was demonstrated between changes in the rates of HHC and these two healthcare-associated infections (HAIs).

Conclusions

 Despite significant improvements in reported rates of HHC among healthcare personnel in Ontario's hospitals, we could not demonstrate a positive ecological impact on rates of these two HAIs.",2013-04-23 +23060433,Dr. PIAS 2.0: an update of a database of predicted druggable protein-protein interactions.,"Druggable Protein-protein Interaction Assessment System (Dr. PIAS) is a database of druggable protein-protein interactions (PPIs) predicted by our support vector machine (SVM)-based method. Since the first publication of this database, Dr. PIAS has been updated to version 2.0. PPI data have been increased considerably, from 71,500 to 83,324 entries. As the new positive instances in our method, 4 PPIs and 10 tertiary structures have been added. This addition increases the prediction accuracy of our SVM classifier in comparison with the previous classifier, despite the number of added PPIs and structures is small. We have introduced the novel concept of 'similar positives' of druggable PPIs, which will help researchers discover small compounds that can inhibit predicted druggable PPIs. Dr. PIAS will aid the effective search for druggable PPIs from a mine of interactome data being rapidly accumulated. Dr. PIAS 2.0 is available at http://www.drpias.net.",2012-10-10 +25328540,NGS-Logistics: federated analysis of NGS sequence variants across multiple locations.,"As many personal genomes are being sequenced, collaborative analysis of those genomes has become essential. However, analysis of personal genomic data raises important privacy and confidentiality issues. We propose a methodology for federated analysis of sequence variants from personal genomes. Specific base-pair positions and/or regions are queried for samples to which the user has access but also for the whole population. The statistics results do not breach data confidentiality but allow further exploration of the data; researchers can negotiate access to relevant samples through pseudonymous identifiers. This approach minimizes the impact on data confidentiality while enabling powerful data analysis by gaining access to important rare samples. Our methodology is implemented in an open source tool called NGS-Logistics, freely available at https://ngsl.esat.kuleuven.be.",2014-09-17 +28092522,Active Self-Paced Learning for Cost-Effective and Progressive Face Identification.,"This paper aims to develop a novel cost-effective framework for face identification, which progressively maintains a batch of classifiers with the increasing face images of different individuals. By naturally combining two recently rising techniques: active learning (AL) and self-paced learning (SPL), our framework is capable of automatically annotating new instances and incorporating them into training under weak expert recertification. We first initialize the classifier using a few annotated samples for each individual, and extract image features using the convolutional neural nets. Then, a number of candidates are selected from the unannotated samples for classifier updating, in which we apply the current classifiers ranking the samples by the prediction confidence. In particular, our approach utilizes the high-confidence and low-confidence samples in the self-paced and the active user-query way, respectively. The neural nets are later fine-tuned based on the updated classifiers. Such heuristic implementation is formulated as solving a concise active SPL optimization problem, which also advances the SPL development by supplementing a rational dynamic curriculum constraint. The new model finely accords with the ""instructor-student-collaborative"" learning mode in human education. The advantages of this proposed framework are two-folds: i) The required number of annotated samples is significantly decreased while the comparable performance is guaranteed. A dramatic reduction of user effort is also achieved over other state-of-the-art active learning techniques. ii) The mixture of SPL and AL effectively improves not only the classifier accuracy compared to existing AL/SPL methods but also the robustness against noisy data. We evaluate our framework on two challenging datasets, which include hundreds of persons under diverse conditions, and demonstrate very promising results. Please find the code of this project at: http://hcp.sysu.edu.cn/projects/aspl/.",2017-01-16 +23164367,Estimating relative abundances of proteins from shotgun proteomics data.,"

Background

Spectral counting methods provide an easy means of identifying proteins with differing abundances between complex mixtures using shotgun proteomics data. The crux spectral-counts command, implemented as part of the Crux software toolkit, implements four previously reported spectral counting methods, the spectral index (SI(N)), the exponentially modified protein abundance index (emPAI), the normalized spectral abundance factor (NSAF), and the distributed normalized spectral abundance factor (dNSAF).

Results

We compared the reproducibility and the linearity relative to each protein's abundance of the four spectral counting metrics. Our analysis suggests that NSAF yields the most reproducible counts across technical and biological replicates, and both SI(N) and NSAF achieve the best linearity.

Conclusions

With the crux spectral-counts command, Crux provides open-source modular methods to analyze mass spectrometry data for identifying and now quantifying peptides and proteins. The C++ source code, compiled binaries, spectra and sequence databases are available at http://noble.gs.washington.edu/proj/crux-spectral-counts.",2012-11-19 +26578606,BubbleTree: an intuitive visualization to elucidate tumoral aneuploidy and clonality using next generation sequencing data.,"Tumors are characterized by properties of genetic instability, heterogeneity, and significant oligoclonality. Elucidating this intratumoral heterogeneity is challenging but important. In this study, we propose a framework, BubbleTree, to characterize the tumor clonality using next generation sequencing (NGS) data. BubbleTree simultaneously elucidates the complexity of a tumor biopsy, estimating cancerous cell purity, tumor ploidy, allele-specific copy number, and clonality and represents this in an intuitive graph. We further developed a three-step heuristic method to automate the interpretation of the BubbleTree graph, using a divide-and-conquer strategy. In this study, we demonstrated the performance of BubbleTree with comparisons to similar commonly used tools such as THetA2, ABSOLUTE, AbsCN-seq and ASCAT, using both simulated and patient-derived data. BubbleTree outperformed these tools, particularly in identifying tumor subclonal populations and polyploidy. We further demonstrated BubbleTree's utility in tracking clonality changes from patients' primary to metastatic tumor and dating somatic single nucleotide and copy number variants along the tumor clonal evolution. Overall, the BubbleTree graph and corresponding model is a powerful approach to provide a comprehensive spectrum of the heterogeneous tumor karyotype in human tumors. BubbleTree is R-based and freely available to the research community (https://www.bioconductor.org/packages/release/bioc/html/BubbleTree.html).",2015-11-17 +26030926,SDMdata: A Web-Based Software Tool for Collecting Species Occurrence Records.,"It is important to easily and efficiently obtain high quality species distribution data for predicting the potential distribution of species using species distribution models (SDMs). There is a need for a powerful software tool to automatically or semi-automatically assist in identifying and correcting errors. Here, we use Python to develop a web-based software tool (SDMdata) to easily collect occurrence data from the Global Biodiversity Information Facility (GBIF) and check species names and the accuracy of coordinates (latitude and longitude). It is an open source software (GNU Affero General Public License/AGPL licensed) allowing anyone to access and manipulate the source code. SDMdata is available online free of charge from .",2015-06-01 +22701463,Medicago PhosphoProtein Database: a repository for Medicago truncatula phosphoprotein data.,"The ability of legume crops to fix atmospheric nitrogen via a symbiotic association with soil rhizobia makes them an essential component of many agricultural systems. Initiation of this symbiosis requires protein phosphorylation-mediated signaling in response to rhizobial signals named Nod factors. Medicago truncatula (Medicago) is the model system for studying legume biology, making the study of its phosphoproteome essential. Here, we describe the Medicago PhosphoProtein Database (MPPD; http://phospho.medicago.wisc.edu), a repository built to house phosphoprotein, phosphopeptide, and phosphosite data specific to Medicago. Currently, the MPPD holds 3,457 unique phosphopeptides that contain 3,404 non-redundant sites of phosphorylation on 829 proteins. Through the web-based interface, users are allowed to browse identified proteins or search for proteins of interest. Furthermore, we allow users to conduct BLAST searches of the database using both peptide sequences and phosphorylation motifs as queries. The data contained within the database are available for download to be investigated at the user's discretion. The MPPD will be updated continually with novel phosphoprotein and phosphopeptide identifications, with the intent of constructing an unparalleled compendium of large-scale Medicago phosphorylation data.",2012-06-11 +27845358,Recommendations for the National Institute for Neurologic Disorders and Stroke spinal cord injury common data elements for children and youth with SCI.,"

Study design

In 2014, the adult spinal cord injury (SCI) common data element (CDE) recommendations were made available. This project was a review of the adult SCI CDE for relevance to children and youth with SCI.

Objectives

The objective of this study was to review the National Institute of Neurologic Disorders and Stroke (NINDS) adult SCI CDEs for relevance to children and youth with SCI.

Setting

International.

Methods

The pediatric working group consisted of international members with varied fields of expertise related to pediatric SCI. The group convened biweekly meetings for 6 months in 2015. All of the adult SCI CDEs were reviewed, evaluated and modified/created for four age groups: 0-5 years, 6-12 years, 13-15 years and 16-18 years. Whenever possible, results of published research studies were used to guide recommendations. In the absence of empirical support, grey literature and international content expert consensus were garnered. Existing pediatric NINDS CDEs and new CDEs were developed in areas where adult recommendations were not appropriate. After internal working group review of domain recommendations, these pediatric CDEs were vetted during a public review from November through December 2015.

Results

Version 1.0 of the pediatric SCI CDEs was posted in February 2016.

Conclusion

The pediatric SCI CDEs are incorporated directly into the NINDS SCI CDE sets and can be found at https://commondataelements.ninds.nih.gov.",2016-11-15 +26223200,Knowledge-based reasoning to annotate noncoding RNA using multi-agent system.,"Noncoding RNAs (ncRNAs) have been focus of intense research over the last few years. Since characteristics and signals of ncRNAs are not entirely known, researchers use different computational tools together with their biological knowledge to predict putative ncRNAs. In this context, this work presents ncRNA-Agents, a multi-agent system to annotate ncRNAs based on the output of different tools, using inference rules to simulate biologists' reasoning. Experiments with data from the fungus Saccharomyces cerevisiae allowed to measure the performance of ncRNA-Agents, with better sensibility, when compared to Infernal, a widely used tool for annotating ncRNA. Besides, data of the Schizosaccharomyces pombe and Paracoccidioides brasiliensis fungi identified novel putative ncRNAs, which demonstrated the usefulness of our approach. NcRNA-Agents can be be found at: http://www.biomol.unb.br/ncrna-agents.",2015-06-24 +27587700,Logical model specification aided by model-checking techniques: application to the mammalian cell cycle regulation.,"

Motivation

Understanding the temporal behaviour of biological regulatory networks requires the integration of molecular information into a formal model. However, the analysis of model dynamics faces a combinatorial explosion as the number of regulatory components and interactions increases.

Results

We use model-checking techniques to verify sophisticated dynamical properties resulting from the model regulatory structure in the absence of kinetic assumption. We demonstrate the power of this approach by analysing a logical model of the molecular network controlling mammalian cell cycle. This approach enables a systematic analysis of model properties, the delineation of model limitations, and the assessment of various refinements and extensions based on recent experimental observations. The resulting logical model accounts for the main irreversible transitions between cell cycle phases, the sequential activation of cyclins, and the inhibitory role of Skp2, and further emphasizes the multifunctional role for the cell cycle inhibitor Rb.

Availability and implementation

The original and revised mammalian cell cycle models are available in the model repository associated with the public modelling software GINsim (http://ginsim.org/node/189).

Contact

thieffry@ens.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +28075490,Soluble transferrin receptor and risk of type 2 diabetes in the obese and nonobese.,"

Background

Studies evaluating the relationship between soluble transferrin receptor (sTfR), a biomarker inversely related to body iron stores, and risk of type 2 diabetes mellitus (T2DM) are scarce and inconclusive. Furthermore, sTfR concentrations have been observed to be significantly higher in obese than in nonobese individuals. Therefore, the aim of this study was to assess the relationship between sTfR and the risk of T2DM in obese and nonobese subjects.

Design

A nested case-control study of 153 cases of newly diagnosed diabetic subjects, 73 obese and 80 nonobese, and 306 individually matched controls, 138 obese and 166 nonobese, who did not develop T2DM for a median 6-year follow-up (interquartile range: 3·9-6·5) was conducted using data from the PREvention with MEDiterranean Diet (PREDIMED) cohort (http://www.controlled-trials.com/ISRCTN35739639). Cases and controls were matched for age (≤ 67 vs. > 67 years), gender, dietary intervention group and BMI (≤ 27 vs. > 27 kg/m2 ).

Results

Waist circumference is the main determinant of sTfR concentrations in the whole sample (β = 0·476, P < 0·001), in the obese (β = 0·802, P < 0·001) and the nonobese (β = 0·455, P = 0·003). Furthermore, sTfR is directly associated with the risk of T2DM in obese individuals (OR = 2·79; 95% CI: 1·35-5·77, P = 0·005) and inversely associated in nonobese individuals (OR = 0·40; 95% CI: 0·20-0·79, P = 0·015).

Conclusions

The association between sTfR levels and risk of T2DM in a population at high cardiovascular risk depend on the presence or absence of obesity. While in nonobese subjects elevated sTfR levels are associated with a decreased risk of developing T2DM, in obese subjects the risk increases. This suggests that obesity alters the relationship between sTfR and T2DM incidence.",2017-02-11 +25583448,De novo assembly of bacterial transcriptomes from RNA-seq data.,"Transcriptome assays are increasingly being performed by high-throughput RNA sequencing (RNA-seq). For organisms whose genomes have not been sequenced and annotated, transcriptomes must be assembled de novo from the RNA-seq data. Here, we present novel algorithms, specific to bacterial gene structures and transcriptomes, for analysis of bacterial RNA-seq data and de novo transcriptome assembly. The algorithms are implemented in an open source software system called Rockhopper 2. We find that Rockhopper 2 outperforms other de novo transcriptome assemblers and offers accurate and efficient analysis of bacterial RNA-seq data. Rockhopper 2 is available at http://cs.wellesley.edu/~btjaden/Rockhopper .",2015-01-13 +22144203,DeepBase: annotation and discovery of microRNAs and other noncoding RNAs from deep-sequencing data.,"Recent advances in high-throughput deep-sequencing technology have produced large numbers of short and long RNA sequences and enabled the detection and profiling of known and novel microRNAs (miRNAs) and other noncoding RNAs (ncRNAs) at unprecedented sensitivity and depth. In this chapter, we describe the use of deepBase, a database that we have developed to integrate all public deep-sequencing data and to facilitate the comprehensive annotation and discovery of miRNAs and other ncRNAs from these data. deepBase provides an integrative, interactive, and versatile web graphical interface to evaluate miRBase-annotated miRNA genes and other known ncRNAs, explores the expression patterns of miRNAs and other ncRNAs, and discovers novel miRNAs and other ncRNAs from deep-sequencing data. deepBase also provides a deepView genome browser to comparatively analyze these data at multiple levels. deepBase is available at http://deepbase.sysu.edu.cn/.",2012-01-01 +22434847,Ontology searching and browsing at the Rat Genome Database.,"The Rat Genome Database (RGD) is the premier repository of rat genomic and genetic data and currently houses over 40 000 rat gene records, as well as human and mouse orthologs, 1857 rat and 1912 human quantitative trait loci (QTLs) and 2347 rat strains. Biological information curated for these data objects includes disease associations, phenotypes, pathways, molecular functions, biological processes and cellular components. RGD uses more than a dozen different ontologies to standardize annotation information for genes, QTLs and strains. That means a lot of time can be spent searching and browsing ontologies for the appropriate terms needed both for curating and mining the data. RGD has upgraded its ontology term search to make it more versatile and more robust. A term search result is connected to a term browser so the user can fine-tune the search by viewing parent and children terms. Most publicly available term browsers display a hierarchical organization of terms in an expandable tree format. RGD has replaced its old tree browser format with a 'driller' type of browser that allows quicker drilling up and down through the term branches, which has been confirmed by testing. The RGD ontology report pages have also been upgraded. Expanded functionality allows more choice in how annotations are displayed and what subsets of annotations are displayed. The new ontology search, browser and report features have been designed to enhance both manual data curation and manual data extraction. DATABASE URL: http://rgd.mcw.edu/rgdweb/ontology/search.html.",2012-03-20 +26002692,SurgiCal Obesity Treatment Study (SCOTS): protocol for a national prospective cohort study of patients undergoing bariatric surgery in Scotland.,"

Introduction

The efficacy of bariatric surgery for large-scale, long-term weight loss is well established. However, many questions remain over the continual benefits and cost-effectiveness of that weight loss for overall health, particularly when accounting for potential complications and adverse events of surgery. Health research institutes in the UK and the USA have called for high-quality longitudinal cohort studies of patients undergoing bariatric surgery, assessing outcomes such as surgical complications, mortality, diabetes remission, microvascular complications, cardiovascular events, mental health, cost and healthcare use.

Methods and analysis

SurgiCal Obesity Treatment Study (SCOTS) is a national, prospective, observational, cohort study of patients undergoing primary bariatric surgical procedures in Scotland. This study aims to recruit 2000 patients and conduct a follow-up for 10 years postbariatric surgery using multiple data collection methods: surgeon-recorded data, electronic health record linkage, and patient-reported outcome measures. Outcomes measured will include: mortality, weight change, diabetes, surgical, cardiovascular, cancer, behavioural, reproductive/urological and nutritional variables. Healthcare utilisation and economic productivity will be collected to inform cost-effectiveness analysis.

Ethics and dissemination

The study has received a favourable ethical opinion from the West of Scotland Research Ethics committee. All publications arising from this cohort study will be published in open-access peer-reviewed journals. All SCOTS investigators (all members of the research team at every recruiting site) will have the ability to propose research suggestions and potential publications using SCOTS data; a publications committee will approve all requests for use of SCOTS data and propose writing committees and timelines. Lay-person summaries of all research findings will be published simultaneously on the SCOTS website (http://www.scotsurgeystudy.org.uk).",2015-05-22 +26216453,BioCluster: tool for identification and clustering of Enterobacteriaceae based on biochemical data.,"Presumptive identification of different Enterobacteriaceae species is routinely achieved based on biochemical properties. Traditional practice includes manual comparison of each biochemical property of the unknown sample with known reference samples and inference of its identity based on the maximum similarity pattern with the known samples. This process is labor-intensive, time-consuming, error-prone, and subjective. Therefore, automation of sorting and similarity in calculation would be advantageous. Here we present a MATLAB-based graphical user interface (GUI) tool named BioCluster. This tool was designed for automated clustering and identification of Enterobacteriaceae based on biochemical test results. In this tool, we used two types of algorithms, i.e., traditional hierarchical clustering (HC) and the Improved Hierarchical Clustering (IHC), a modified algorithm that was developed specifically for the clustering and identification of Enterobacteriaceae species. IHC takes into account the variability in result of 1-47 biochemical tests within this Enterobacteriaceae family. This tool also provides different options to optimize the clustering in a user-friendly way. Using computer-generated synthetic data and some real data, we have demonstrated that BioCluster has high accuracy in clustering and identifying enterobacterial species based on biochemical test data. This tool can be freely downloaded at http://microbialgen.du.ac.bd/biocluster/.",2015-06-01 +23626002,KGVDB: a population-based genomic map of CNVs tagged by SNPs in Koreans.,"

Summary

Despite a growing interest in a correlation between copy number variations (CNVs) and flanking single nucleotide polymorphisms, few databases provide such information. In particular, most information on CNV available so far was obtained in Caucasian and Yoruba populations, and little is known about CNV in Asian populations. This article presents a database that provides CNV regions tagged by single nucleotide polymorphisms in about 4700 Koreans, which were detected under strict quality control, manually curated and experimentally validated.

Availability

KGVDB is freely available for non-commercial use at http://biomi.cdc.go.kr/KGVDB.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-26 +25485661,What can 1 billion trials tell us about visual search?,"Mobile technology (e.g., smartphones and tablets) has provided psychologists with a wonderful opportunity: through careful design and implementation, mobile applications can be used to crowd source data collection. By garnering massive amounts of data from a wide variety of individuals, it is possible to explore psychological questions that have, to date, been out of reach. Here we discuss 2 examples of how data from the mobile game Airport Scanner (Kedlin Co., http://www.airportscannergame.com) can be used to address questions about the nature of visual search that pose intractable problems for laboratory-based research. Airport Scanner is a successful mobile game with millions of unique users and billions of individual trials, which allows for examining nuanced visual search questions. The goals of the current Observation Report were to highlight the growing opportunity that mobile technology affords psychological research and to provide an example roadmap of how to successfully collect usable data.",2014-12-08 +21720920,"The British Lexicon Project: lexical decision data for 28,730 monosyllabic and disyllabic English words.","We present a new database of lexical decision times for English words and nonwords, for which two groups of British participants each responded to 14,365 monosyllabic and disyllabic words and the same number of nonwords for a total duration of 16 h (divided over multiple sessions). This database, called the British Lexicon Project (BLP), fills an important gap between the Dutch Lexicon Project (DLP; Keuleers, Diependaele, & Brysbaert, Frontiers in Language Sciences. Psychology, 1, 174, 2010) and the English Lexicon Project (ELP; Balota et al., 2007), because it applies the repeated measures design of the DLP to the English language. The high correlation between the BLP and ELP data indicates that a high percentage of variance in lexical decision data sets is systematic variance, rather than noise, and that the results of megastudies are rather robust with respect to the selection and presentation of the stimuli. Because of its design, the BLP makes the same analyses possible as the DLP, offering researchers with a new interesting data set of word-processing times for mixed effects analyses and mathematical modeling. The BLP data are available at http://crr.ugent.be/blp and as Electronic Supplementary Materials.",2012-03-01 +22903636,"PACSY, a relational database management system for protein structure and chemical shift analysis.","PACSY (Protein structure And Chemical Shift NMR spectroscopY) is a relational database management system that integrates information from the Protein Data Bank, the Biological Magnetic Resonance Data Bank, and the Structural Classification of Proteins database. PACSY provides three-dimensional coordinates and chemical shifts of atoms along with derived information such as torsion angles, solvent accessible surface areas, and hydrophobicity scales. PACSY consists of six relational table types linked to one another for coherence by key identification numbers. Database queries are enabled by advanced search functions supported by an RDBMS server such as MySQL or PostgreSQL. PACSY enables users to search for combinations of information from different database sources in support of their research. Two software packages, PACSY Maker for database creation and PACSY Analyzer for database analysis, are available from http://pacsy.nmrfam.wisc.edu.",2012-08-19 +25649620,Methods for the detection and assembly of novel sequence in high-throughput sequencing data.,"

Motivation

Large insertions of novel sequence are an important type of structural variants. Previous studies used traditional de novo assemblers for assembling non-mapping high-throughput sequencing (HTS) or capillary reads and then tried to anchor them in the reference using paired read information.

Results

We present approaches for detecting insertion breakpoints and targeted assembly of large insertions from HTS paired data: BASIL and ANISE. On near identity repeats that are hard for assemblers, ANISE employs a repeat resolution step. This results in far better reconstructions than obtained by the compared methods. On simulated data, we found our insert assembler to be competitive with the de novo assemblers ABYSS and SGA while yielding already anchored inserted sequence as opposed to unanchored contigs as from ABYSS/SGA. On real-world data, we detected novel sequence in a human individual and thoroughly validated the assembled sequence. ANISE was found to be superior to the competing tool MindTheGap on both simulated and real-world data.

Availability and implementation

ANISE and BASIL are available for download at http://www.seqan.de/projects/herbarium under a permissive open source license.",2015-02-02 +26065494,Carotta: Revealing Hidden Confounder Markers in Metabolic Breath Profiles.,"Computational breath analysis is a growing research area aiming at identifying volatile organic compounds (VOCs) in human breath to assist medical diagnostics of the next generation. While inexpensive and non-invasive bioanalytical technologies for metabolite detection in exhaled air and bacterial/fungal vapor exist and the first studies on the power of supervised machine learning methods for profiling of the resulting data were conducted, we lack methods to extract hidden data features emerging from confounding factors. Here, we present Carotta, a new cluster analysis framework dedicated to uncovering such hidden substructures by sophisticated unsupervised statistical learning methods. We study the power of transitivity clustering and hierarchical clustering to identify groups of VOCs with similar expression behavior over most patient breath samples and/or groups of patients with a similar VOC intensity pattern. This enables the discovery of dependencies between metabolites. On the one hand, this allows us to eliminate the effect of potential confounding factors hindering disease classification, such as smoking. On the other hand, we may also identify VOCs associated with disease subtypes or concomitant diseases. Carotta is an open source software with an intuitive graphical user interface promoting data handling, analysis and visualization. The back-end is designed to be modular, allowing for easy extensions with plugins in the future, such as new clustering methods and statistics. It does not require much prior knowledge or technical skills to operate. We demonstrate its power and applicability by means of one artificial dataset. We also apply Carotta exemplarily to a real-world example dataset on chronic obstructive pulmonary disease (COPD). While the artificial data are utilized as a proof of concept, we will demonstrate how Carotta finds candidate markers in our real dataset associated with confounders rather than the primary disease (COPD) and bronchial carcinoma (BC). Carotta is publicly available at http://carotta.compbio.sdu.dk [1].",2015-06-10 +28455332,Absence of ppGpp Leads to Increased Mobilization of Intermediately Accumulated Poly(3-Hydroxybutyrate) in Ralstonia eutropha H16. ,"In this study, we constructed a set of Ralstonia eutropha H16 strains with single, double, or triple deletions of the (p)ppGpp synthase/hydrolase (spoT1), (p)ppGpp synthase (spoT2), and/or polyhydroxybutyrate (PHB) depolymerase (phaZa1 or phaZa3) gene, and we determined the impact on the levels of (p)ppGpp and on accumulated PHB. Mutants with deletions of both the spoT1 and spoT2 genes were unable to synthesize detectable amounts of (p)ppGpp and accumulated only minor amounts of PHB, due to PhaZa1-mediated depolymerization of PHB. In contrast, unusually high levels of PHB were found in strains in which the (p)ppGpp concentration was increased by the overexpression of (p)ppGpp synthase (SpoT2) and the absence of (p)ppGpp hydrolase. Determination of (p)ppGpp levels in wild-type R. eutropha under different growth conditions and induction of the stringent response by amino acid analogs showed that the concentrations of (p)ppGpp during the growth phase determine the amount of PHB remaining in later growth phases by influencing the efficiency of the PHB mobilization system in stationary growth. The data reported for a previously constructed ΔspoT2 strain (C. J. Brigham, D. R. Speth, C. Rha, and A. J. Sinskey, Appl Environ Microbiol 78:8033-8044, 2012, https://doi.org/10.1128/AEM.01693-12) were identified as due to an experimental error in strain construction, and our results are in contrast to the previous indication that the spoT2 gene product is essential for PHB accumulation in R. eutrophaIMPORTANCE Polyhydroxybutyrate (PHB) is an important intracellular carbon and energy storage compound in many prokaryotes and helps cells survive periods of starvation and other stress conditions. Research activities in several laboratories over the past 3 decades have shown that both PHB synthase and PHB depolymerase are constitutively expressed in most PHB-accumulating bacteria, such as Ralstonia eutropha This implies that PHB synthase and depolymerase activities must be well regulated in order to avoid a futile cycle of simultaneous PHB synthesis and PHB degradation (mobilization). Previous reports suggested that the stringent response in Rhizobium etli and R. eutropha is involved in the regulation of PHB metabolism. However, the levels of (p)ppGpp and the influence of those levels on PHB accumulation and PHB mobilization have not yet been determined for any PHB-accumulating species. In this study, we optimized a (p)ppGpp extraction procedure and a high-performance liquid chromatography-mass spectrometry (HPLC-MS)-based detection method for the quantification of (p)ppGpp in R. eutropha This enabled us to study the relationship between the concentrations of (p)ppGpp and the accumulated levels of PHB in the wild type and in several constructed mutant strains. We show that overproduction of the alarmone (p)ppGpp correlated with reduced growth and massive overproduction of PHB. In contrast, in the absence of (p)ppGpp, mobilization of PHB was dramatically enhanced.",2017-06-16 +23536820,Comparative GO: a web application for comparative gene ontology and gene ontology-based gene selection in bacteria.,"

Unlabelled

The primary means of classifying new functions for genes and proteins relies on Gene Ontology (GO), which defines genes/proteins using a controlled vocabulary in terms of their Molecular Function, Biological Process and Cellular Component. The challenge is to present this information to researchers to compare and discover patterns in multiple datasets using visually comprehensible and user-friendly statistical reports. Importantly, while there are many GO resources available for eukaryotes, there are none suitable for simultaneous, graphical and statistical comparison between multiple datasets. In addition, none of them supports comprehensive resources for bacteria. By using Streptococcus pneumoniae as a model, we identified and collected GO resources including genes, proteins, taxonomy and GO relationships from NCBI, UniProt and GO organisations. Then, we designed database tables in PostgreSQL database server and developed a Java application to extract data from source files and loaded into database automatically. We developed a PHP web application based on Model-View-Control architecture, used a specific data structure as well as current and novel algorithms to estimate GO graphs parameters. We designed different navigation and visualization methods on the graphs and integrated these into graphical reports. This tool is particularly significant when comparing GO groups between multiple samples (including those of pathogenic bacteria) from different sources simultaneously. Comparing GO protein distribution among up- or down-regulated genes from different samples can improve understanding of biological pathways, and mechanism(s) of infection. It can also aid in the discovery of genes associated with specific function(s) for investigation as a novel vaccine or therapeutic targets.

Availability

http://turing.ersa.edu.au/BacteriaGO.",2013-03-11 +24363380,BiPACE 2D--graph-based multiple alignment for comprehensive 2D gas chromatography-mass spectrometry.,"

Motivation

Comprehensive 2D gas chromatography-mass spectrometry is an established method for the analysis of complex mixtures in analytical chemistry and metabolomics. It produces large amounts of data that require semiautomatic, but preferably automatic handling. This involves the location of significant signals (peaks) and their matching and alignment across different measurements. To date, there exist only a few openly available algorithms for the retention time alignment of peaks originating from such experiments that scale well with increasing sample and peak numbers, while providing reliable alignment results.

Results

We describe BiPACE 2D, an automated algorithm for retention time alignment of peaks from 2D gas chromatography-mass spectrometry experiments and evaluate it on three previously published datasets against the mSPA, SWPA and Guineu algorithms. We also provide a fourth dataset from an experiment studying the H2 production of two different strains of Chlamydomonas reinhardtii that is available from the MetaboLights database together with the experimental protocol, peak-detection results and manually curated multiple peak alignment for future comparability with newly developed algorithms.

Availability and implementation

BiPACE 2D is contained in the freely available Maltcms framework, version 1.3, hosted at http://maltcms.sf.net, under the terms of the L-GPL v3 or Eclipse Open Source licenses. The software used for the evaluation along with the underlying datasets is available at the same location. The C.reinhardtii dataset is freely available at http://www.ebi.ac.uk/metabolights/MTBLS37.",2013-12-20 +23203868,COXPRESdb: a database of comparative gene coexpression networks of eleven species for mammals.,"Coexpressed gene databases are valuable resources for identifying new gene functions or functional modules in metabolic pathways and signaling pathways. Although coexpressed gene databases are a fundamental platform in the field of plant biology, their use in animal studies is relatively limited. The COXPRESdb (http://coxpresdb.jp) provides coexpression relationships for multiple animal species, as comparisons of coexpressed gene lists can enhance the reliability of gene coexpression determinations. Here, we report the updates of the database, mainly focusing on the following two points. First, we updated our coexpression data by including recent microarray data for the previous seven species (human, mouse, rat, chicken, fly, zebrafish and nematode) and adding four new species (monkey, dog, budding yeast and fission yeast), along with a new human microarray platform. A reliability scoring function was also implemented, based on coexpression conservation to filter out coexpression with low reliability. Second, the network drawing function was updated, to implement automatic cluster analyses with enrichment analyses in Gene Ontology and in cis elements, along with interactive network analyses with Cytoscape Web. With these updates, COXPRESdb will become a more powerful tool for analyses of functional and regulatory networks of genes in a variety of animal species.",2012-11-29 +23143107,"ChiTaRS: a database of human, mouse and fruit fly chimeric transcripts and RNA-sequencing data.","Chimeric RNAs that comprise two or more different transcripts have been identified in many cancers and among the Expressed Sequence Tags (ESTs) isolated from different organisms; they might represent functional proteins and produce different disease phenotypes. The ChiTaRS database of Chimeric Transcripts and RNA-Sequencing data (http://chitars.bioinfo.cnio.es/) collects more than 16 000 chimeric RNAs from humans, mice and fruit flies, 233 chimeras confirmed by RNA-seq reads and ∼2000 cancer breakpoints. The database indicates the expression and tissue specificity of these chimeras, as confirmed by RNA-seq data, and it includes mass spectrometry results for some human entries at their junctions. Moreover, the database has advanced features to analyze junction consistency and to rank chimeras based on the evidence of repeated junction sites. Finally, 'Junction Search' screens through the RNA-seq reads found at the chimeras' junction sites to identify putative junctions in novel sequences entered by users. Thus, ChiTaRS is an extensive catalog of human, mouse and fruit fly chimeras that will extend our understanding of the evolution of chimeric transcripts in eukaryotes and can be advantageous in the analysis of human cancer breakpoints.",2012-11-09 +28669938,Individual and Joint Effects of Early-Life Ambient Exposure and Maternal Prepregnancy Obesity on Childhood Overweight or Obesity.,"

Background

Although previous studies suggest that exposure to traffic-related pollution during childhood increases the risk of childhood overweight or obesity (COWO), the role of early life exposure to fine particulate matter (aerodynamic diameter <2.5 μm; PM2.5) and its joint effect with the mother’s prepregnancy body mass index (MPBMI) on COWO remain unclear.

Objectives

The present study was conducted to examine the individual and joint effects of ambient PM2.5 exposures and MPBMI on the risk of COWO.

Methods

We estimated exposures to ambient PM2.5in utero and during the first 2 y of life (F2YL), using data from the U.S. Environmental Protection Agency’s (EPA’s) Air Quality System matched to residential address, in 1,446 mother–infant pairs who were recruited at birth from 1998 and followed up prospectively through 2012 at the Boston Medical Center in Massachusetts. We quantified the individual and joint effects of PM2.5 exposure with MPBMI on COWO, defined as the child’s age- and sex-specific BMI z-score ≥85th percentile at the last well-child care visit between 2 and 9 y of age. Additivity was assessed by estimating the reduced excess risk due to interaction.

Results

Comparing the highest and lowest quartiles of PM2.5, the adjusted relative risks (RRs) [95% confidence intervals (CIs)] of COWO were 1.3 (95% CI: 1.1, 1.5), 1.2 (95% CI: 1.0, 1.4), 1.2 (95% CI: 1.0, 1.4), 1.3 (95% CI: 1.1, 1.6), 1.3 (95% CI: 1.1, 1.5) and 1.3 (1.1, 1.5) during preconception; the first, second, and third trimesters; the entire period of pregnancy; and F2YL, respectively. Spline regression showed a dose–response relationship between PM2.5 levels and COWO after a threshold near the median exposure (10.46 μg/m3–10.89 μg/m3). Compared with their counterparts, children of obese mothers exposed to high levels of PM2.5 had the highest risk of COWO [RR≥2.0, relative excess risk due to interaction (RERI) not significant].

Conclusions

In the present study, we observed that early life exposure to PM2.5 may play an important role in the early life origins of COWO and may increase the risk of COWO in children of mothers who were overweight or obese before pregnancy beyond the risk that can be attributed to MPBMI alone. Our findings emphasize the clinical and public health policy relevance of early life PM2.5 exposure. https://doi.org/10.1289/EHP261",2017-06-14 +26955310,"Validation of acute physiologic and chronic health evaluation II scoring system software developed at The Aga Khan University, Pakistan.","

Objective

To assess the predictive performance of Acute Physiologic and Chronic Health Evaluation II (APACHE II) software available on the hospital intranet and analyze interrater reliability of calculating the APACHE II score by the gold standard manual method or automatically using the software.

Materials and methods

An expert scorer not involved in the data collection had calculated APACHE II score of 213 patients admitted to surgical Intensive Care Unit using the gold standard manual method for a previous study performed in the department. The same data were entered into the computer software available on the hospital intranet (http://intranet/apacheii) to recalculate the APACHE II score automatically along with the predicted mortality. Receiver operating characteristic curve (ROC), Hosmer-Lemeshow goodness-of-fit statistical test and Pearson's correlation coefficient was computed.

Results

The 213 patients had an average APACHE II score of 17.20 ± 8.24, the overall mortality rate was 32.8% and standardized mortality ratio was 1.00. The area under the ROC curve of 0.827 was significantly >0.5 (P < 0.01) and had confidence interval of 0.77-0.88. The goodness-of-fit test showed a good calibration (H = 5.46, P = 0.71). Interrater reliability using Pearson's product moment correlations demonstrated a strong positive relationship between the computer and the manual expert scorer (r = 0.98, P = 0.0005).

Conclusion

APACHE II software available on the hospital's intranet has satisfactory calibration and discrimination and interrater reliability is good when compared with the gold standard manual method.",2016-01-01 +26827237,"DOMMINO 2.0: integrating structurally resolved protein-, RNA-, and DNA-mediated macromolecular interactions. ","Macromolecular interactions are formed between proteins, DNA and RNA molecules. Being a principle building block in macromolecular assemblies and pathways, the interactions underlie most of cellular functions. Malfunctioning of macromolecular interactions is also linked to a number of diseases. Structural knowledge of the macromolecular interaction allows one to understand the interaction's mechanism, determine its functional implications and characterize the effects of genetic variations, such as single nucleotide polymorphisms, on the interaction. Unfortunately, until now the interactions mediated by different types of macromolecules, e.g. protein-protein interactions or protein-DNA interactions, are collected into individual and unrelated structural databases. This presents a significant obstacle in the analysis of macromolecular interactions. For instance, the homogeneous structural interaction databases prevent scientists from studying structural interactions of different types but occurring in the same macromolecular complex. Here, we introduce DOMMINO 2.0, a structural Database Of Macro-Molecular INteractiOns. Compared to DOMMINO 1.0, a comprehensive database on protein-protein interactions, DOMMINO 2.0 includes the interactions between all three basic types of macromolecules extracted from PDB files. DOMMINO 2.0 is automatically updated on a weekly basis. It currently includes ∼1,040,000 interactions between two polypeptide subunits (e.g. domains, peptides, termini and interdomain linkers), ∼43,000 RNA-mediated interactions, and ∼12,000 DNA-mediated interactions. All protein structures in the database are annotated using SCOP and SUPERFAMILY family annotation. As a result, protein-mediated interactions involving protein domains, interdomain linkers, C- and N- termini, and peptides are identified. Our database provides an intuitive web interface, allowing one to investigate interactions at three different resolution levels: whole subunit network, binary interaction and interaction interface. Database URL: http://dommino.org.",2016-01-30 +26827236,OGDD (Olive Genetic Diversity Database): a microsatellite markers' genotypes database of worldwide olive trees for cultivar identification and virgin olive oil traceability. ,"Olive (Olea europaea), whose importance is mainly due to nutritional and health features, is one of the most economically significant oil-producing trees in the Mediterranean region. Unfortunately, the increasing market demand towards virgin olive oil could often result in its adulteration with less expensive oils, which is a serious problem for the public and quality control evaluators of virgin olive oil. Therefore, to avoid frauds, olive cultivar identification and virgin olive oil authentication have become a major issue for the producers and consumers of quality control in the olive chain. Presently, genetic traceability using SSR is the cost effective and powerful marker technique that can be employed to resolve such problems. However, to identify an unknown monovarietal virgin olive oil cultivar, a reference system has become necessary. Thus, an Olive Genetic Diversity Database (OGDD) (http://www.bioinfo-cbs.org/ogdd/) is presented in this work. It is a genetic, morphologic and chemical database of worldwide olive tree and oil having a double function. In fact, besides being a reference system generated for the identification of unkown olive or virgin olive oil cultivars based on their microsatellite allele size(s), it provides users additional morphological and chemical information for each identified cultivar. Currently, OGDD is designed to enable users to easily retrieve and visualize biologically important information (SSR markers, and olive tree and oil characteristics of about 200 cultivars worldwide) using a set of efficient query interfaces and analysis tools. It can be accessed through a web service from any modern programming language using a simple hypertext transfer protocol call. The web site is implemented in java, JavaScript, PHP, HTML and Apache with all major browsers supported. Database URL: http://www.bioinfo-cbs.org/ogdd/.",2016-01-30 +23193271,"GenomeRNAi: a database for cell-based and in vivo RNAi phenotypes, 2013 update.","RNA interference (RNAi) represents a powerful method to systematically study loss-of-function phenotypes on a large scale with a wide variety of biological assays, constituting a rich source for the assignment of gene function. The GenomeRNAi database (http://www.genomernai.org) makes available RNAi phenotype data extracted from the literature for human and Drosophila. It also provides RNAi reagent information, along with an assessment as to their efficiency and specificity. This manuscript describes an update of the database previously featured in the NAR Database Issue. The new version has undergone a complete re-design of the user interface, providing an intuitive, flexible framework for additional functionalities. Screen information and gene-reagent-phenotype associations are now available for download. The integration with other resources has been improved by allowing in-links via GenomeRNAi screen IDs, or external gene or reagent identifiers. A distributed annotation system (DAS) server enables the visualization of the phenotypes and reagents in the context of a genome browser. We have added a page listing 'frequent hitters', i.e. genes that show a phenotype in many screens, which might guide on-going RNAi studies. Structured annotation guidelines have been established to facilitate consistent curation, and a submission template for direct submission by data producers is available for download.",2012-11-27 +26840333,Regulation of Expression and Evolution of Genes in Plastids of Rhodophytic Branch. ,A novel algorithm and original software were used to cluster all proteins encoded in plastids of 72 species of the rhodophytic branch. The results are publicly available at http://lab6.iitp.ru/ppc/redline72/ in a database that allows fast identification of clusters (protein families) both by a fragment of an amino acid sequence and by a phylogenetic profile of a protein. No such integral clustering with the corresponding functions can be found in the public domain. The putative regulons of the transcription factors Ycf28 and Ycf29 encoded in the plastids were identified using the clustering and the database. A regulation of translation initiation was proposed for the ycf24 gene in plastids of certain red algae and apicomplexans as well as a regulation of a putative gene in apicoplasts of Babesia spp. and Theileria parva. The conserved regulation of the ycf24 gene expression and specificity alternation of the transcription factor Ycf28 were shown in the plastids. A phylogenetic tree of plastids was generated for the rhodophytic branch. The hypothesis of the origin of apicoplasts from the common ancestor of all apicomplexans from plastids of red algae was confirmed.,2016-01-29 +26909938,Regulation of bolting and identification of the α-tubulin gene family in Brassica rapa L. ssp pekinensis. ,"Microtubules are important components of eukaryotic cells, and they play vital roles in cell morphogenesis, carrying of signaling molecules, transport of materials, and establishing the cell polarity. During bolting of biennial plants, cell division and elongation are involved, and cell elongation inevitably involves the microtubules arrangement and expression of related genes. So we deduce that it is of great significance to figure out the mechanism of bolting and flowering in which TUA genes are involved. In the present study, bioinformatic methods were used to predict and identify the α-tubulin gene family (BrTUAs) in Brassica rapa L. ssp pekinensis (Chinese cabbage) through the alignment of AtTUA gene sequence from Arabidopsis thaliana with the B. rapa genome database (http://brassicadb.org/brad/) using the basic local alignment search tool. The change in the structure and functions of BrTUAs during the process of evolution, cis-acting elements in the promoter sequences of BrTUAs, and the expression of the identified genes was also analyzed. Twelve members of the α-tubulin gene family were identified from Chinese cabbage. The gene length, intron, exon, and promoter regions were determined to have changed significantly during the genome evolution. Only five of the 12 members were encoded completely and were observed to differ in their spatial and temporal expression. The five BrTUA promoter sequences contained different numbers of cis-elements responsive to light and low-temperature response, cis-elements responsive among which hormonal responses were significantly different. We also report that the BrTUAs were involved in the regulation of the bolting in Chinese cabbage, and propose that this process could be controlled by regulating the expression of BrTUAs.",2016-01-29 +27547274,Neutrophil-to-lymphocyte ratio in occlusive vascular diseases: the literature review of the past 10 years.,"

Background

This study aims to evaluate the results of studies investigating neutrophil-to-lymphocyte ratio (NLR) and to identify the prognostic and diagnostic value of NLR in occlusive vascular diseases.

Methods

With the aim of identifying the studies related to NLR, a search was performed on http://www.ncbi.nlm.nih.gov/pubmed by using the key words ""neutrophil lymphocyte ratio"" between January 2005 and December 2014. All of the original articles were evaluated according to date of publications, countries, clinics and topics. Studies about occlusive vascular diseases were evaluated according to their qualifications, review methods and results. SPSS for Windows 16.0 was used in data analysis and data were expressed as mean, standard deviation and percentage.

Results

A total of 735 original research articles were investigated. The number of publications have shown a regular logarithmic increase over the years. Thirty-two percent of all publications were performed by clinics in Turkey and 56.4% were performed by general-oncological surgery and cardiology clinics. A total of 107 publications were identified to be about occlusive vascular diseases, 80.3% of these publications were found to be prognostic and 19.6% to be diagnostic, 82.2% of them were found to be planned as retrospective and 17.7% as prospective. In 95.3% of prognostic publications, there was a positive correlation between high NLR values at admission and poor prognosis. In 95.3% of diagnostic publications high NLR values at admission were identified to be significant diagnostically.

Conclusion

Elevated neutrophil-to-lymphocyte ratio at admission, could be used as a diagnostic and/or prognostic parameter in occlusive vascular diseases.",2016-01-01 +23193289,"PANTHER in 2013: modeling the evolution of gene function, and other gene attributes, in the context of phylogenetic trees.","The data and tools in PANTHER-a comprehensive, curated database of protein families, trees, subfamilies and functions available at http://pantherdb.org-have undergone continual, extensive improvement for over a decade. Here, we describe the current PANTHER process as a whole, as well as the website tools for analysis of user-uploaded data. The main goals of PANTHER remain essentially unchanged: the accurate inference (and practical application) of gene and protein function over large sequence databases, using phylogenetic trees to extrapolate from the relatively sparse experimental information from a few model organisms. Yet the focus of PANTHER has continually shifted toward more accurate and detailed representations of evolutionary events in gene family histories. The trees are now designed to represent gene family evolution, including inference of evolutionary events, such as speciation and gene duplication. Subfamilies are still curated and used to define HMMs, but gene ontology functional annotations can now be made at any node in the tree, and are designed to represent gain and loss of function by ancestral genes during evolution. Finally, PANTHER now includes stable database identifiers for inferred ancestral genes, which are used to associate inferred gene attributes with particular genes in the common ancestral genomes of extant species.",2012-11-27 +22900683,HTRIdb: an open-access database for experimentally verified human transcriptional regulation interactions.,"

Background

The modeling of interactions among transcription factors (TFs) and their respective target genes (TGs) into transcriptional regulatory networks is important for the complete understanding of regulation of biological processes. In the case of experimentally verified human TF-TG interactions, there is no database at present that explicitly provides such information even though many databases containing human TF-TG interaction data have been available. In an effort to provide researchers with a repository of experimentally verified human TF-TG interactions from which such interactions can be directly extracted, we present here the Human Transcriptional Regulation Interactions database (HTRIdb).

Description

The HTRIdb is an open-access database that can be searched via a user-friendly web interface and the retrieved TF-TG interactions data and the associated protein-protein interactions can be downloaded or interactively visualized as a network through the web version of the popular Cytoscape visualization tool, the Cytoscape Web. Moreover, users can improve the database quality by uploading their own interactions and indicating inconsistencies in the data. So far, HTRIdb has been populated with 284 TFs that regulate 18302 genes, totaling 51871 TF-TG interactions. HTRIdb is freely available at http://www.lbbc.ibb.unesp.br/htri.

Conclusions

HTRIdb is a powerful user-friendly tool from which human experimentally validated TF-TG interactions can be easily extracted and used to construct transcriptional regulation interaction networks enabling researchers to decipher the regulation of biological processes.",2012-08-17 +23192551,Viral genome analysis and knowledge management.,"One of the challenges of genetic data analysis is to combine information from sources that are distributed around the world and accessible through a wide array of different methods and interfaces. The HIV database and its footsteps, the hepatitis C virus (HCV) and hemorrhagic fever virus (HFV) databases, have made it their mission to make different data types easily available to their users. This involves a large amount of behind-the-scenes processing, including quality control and analysis of the sequences and their annotation. Gene and protein sequences are distilled from the sequences that are stored in GenBank; to this end, both submitter annotation and script-generated sequences are used. Alignments of both nucleotide and amino acid sequences are generated, manually curated, distilled into an alignment model, and regenerated in an iterative cycle that results in ever better new alignments. Annotation of epidemiological and clinical information is parsed, checked, and added to the database. User interfaces are updated, and new interfaces are added based upon user requests. Vital for its success, the database staff are heavy users of the system, which enables them to fix bugs and find opportunities for improvement. In this chapter we describe some of the infrastructure that keeps these heavily used analysis platforms alive and vital after nearly 25 years of use. The database/analysis platforms described in this chapter can be accessed at http://hiv.lanl.gov http://hcv.lanl.gov http://hfv.lanl.gov.",2013-01-01 +27331109,Aggregation kinetic dataset to determine the stability of the purified and refolded recombinant ppTvCP4 protein of Trichomonas vaginalis.,"The recombinant ppTvCP4 (ppTvCP4r) protein, a specific inhibitor of the proteolytic activity and virulence properties of Trichomonas vaginalis, depending on cathepsin L-like cysteine proteinases (CPs) (http:dx.doi.org/ 10.1016/j.biocel.2014.12.001[1], http:dx.doi.org/ 10.1016/j.micinf.2013.09.002[2], http:dx.doi.org/ 10.1155/2015/946787[3]) was stable in the elution buffer up to two months at 4 °C. However, it was prone to aggregate in PBS (functional assay buffer) [1]. Therefore, before functional assays, the aggregation kinetic of refolded ppTvCP4r was determined after the exchange to PBS. Samples of purified and refolded ppTvCP4r (0.15 mg/ml) in PBS were incubated for 0-24 h at 4 and 25 °C, spun down, measured the protein concentration in the supernatant and checked for the presence of aggregated protein in the pellet. The concentration of protein progressively decreased in the supernatant through time at both temperatures as the protein aggregated. Data in this article are related to the research paper [1].",2016-06-02 +23193293,Human Ageing Genomic Resources: integrated databases and tools for the biology and genetics of ageing.,"The Human Ageing Genomic Resources (HAGR, http://genomics.senescence.info) is a freely available online collection of research databases and tools for the biology and genetics of ageing. HAGR features now several databases with high-quality manually curated data: (i) GenAge, a database of genes associated with ageing in humans and model organisms; (ii) AnAge, an extensive collection of longevity records and complementary traits for >4000 vertebrate species; and (iii) GenDR, a newly incorporated database, containing both gene mutations that interfere with dietary restriction-mediated lifespan extension and consistent gene expression changes induced by dietary restriction. Since its creation about 10 years ago, major efforts have been undertaken to maintain the quality of data in HAGR, while further continuing to develop, improve and extend it. This article briefly describes the content of HAGR and details the major updates since its previous publications, in terms of both structure and content. The completely redesigned interface, more intuitive and more integrative of HAGR resources, is also presented. Altogether, we hope that through its improvements, the current version of HAGR will continue to provide users with the most comprehensive and accessible resources available today in the field of biogerontology.",2012-11-27 +23180781,InnateDB: systems biology of innate immunity and beyond--recent updates and continuing curation.,"InnateDB (http://www.innatedb.com) is an integrated analysis platform that has been specifically designed to facilitate systems-level analyses of mammalian innate immunity networks, pathways and genes. In this article, we provide details of recent updates and improvements to the database. InnateDB now contains >196 000 human, mouse and bovine experimentally validated molecular interactions and 3000 pathway annotations of relevance to all mammalian cellular systems (i.e. not just immune relevant pathways and interactions). In addition, the InnateDB team has, to date, manually curated in excess of 18 000 molecular interactions of relevance to innate immunity, providing unprecedented insight into innate immunity networks, pathways and their component molecules. More recently, InnateDB has also initiated the curation of allergy- and asthma-related interactions. Furthermore, we report a range of improvements to our integrated bioinformatics solutions including web service access to InnateDB interaction data using Proteomics Standards Initiative Common Query Interface, enhanced Gene Ontology analysis for innate immunity, and the availability of new network visualizations tools. Finally, the recent integration of bovine data makes InnateDB the first integrated network analysis platform for this agriculturally important model organism.",2012-11-24 +23242389,"Safety of tumor necrosis factor inhibitors use for rheumatoid arthritis and ankylosing spondylitis in Africa, the Middle East, and Asia: focus on severe infections and tuberculosis.","Multiple studies of patients in Western countries with rheumatoid arthritis (RA) and ankylosing spondylitis (AS) have indicated increased risk for active tuberculosis (TB) and other infections among these individuals. It has also been consistently reported that patients receiving tumor necrosis factor (TNF) inhibitors for these conditions have higher rates of active TB and other infections than RA or AS patients not receiving these medications. These issues have been studied less extensively in the Asia and Africa-Middle East regions, and information from these regions is important because of higher rates of TB in the general population. This paper reviews studies of RA and AS patients from Asia, Africa, and the Middle East who received TNF inhibitors. A literature search was conducted using http://www.ncbi.nlm.nih.gov/pubmed to collect and report these data. The years included in the PubMed literature search ranged from January 2000 to October 2011. Additionally, information from the China Hospital Knowledge Database was used to report data from Chinese patients with RA and AS treated with TNF inhibitors. Results from these studies indicate that the risk for active TB and other infections in AS and RA patients from Asia, Africa, and the Middle East are increased in patients receiving TNF inhibitors and that the risk is higher among those treated with monoclonal antibodies versus soluble TNF receptor.",2012-12-15 +27347003,Log::ProgramInfo: A Perl module to collect and log data for bioinformatics pipelines.,"

Background

To reproduce and report a bioinformatics analysis, it is important to be able to determine the environment in which a program was run. It can also be valuable when trying to debug why different executions are giving unexpectedly different results.

Results

Log::ProgramInfo is a Perl module that writes a log file at the termination of execution of the enclosing program, to document useful execution characteristics. This log file can be used to re-create the environment in order to reproduce an earlier execution. It can also be used to compare the environments of two executions to determine whether there were any differences that might affect (or explain) their operation.

Availability

The source is available on CPAN (Macdonald and Boutros, Log-ProgramInfo. http://search.cpan.org/~boutroslb/Log-ProgramInfo/).

Conclusion

Using Log::ProgramInfo in programs creating result data for publishable research, and including the Log::ProgramInfo output log as part of the publication of that research is a valuable method to assist others to duplicate the programming environment as a precursor to validating and/or extending that research.",2016-06-24 +22290570,VAR-MD: a tool to analyze whole exome-genome variants in small human pedigrees with mendelian inheritance.,"The analysis of variants generated by exome sequencing (ES) of families with rare Mendelian diseases is a time-consuming, manual process that represents one barrier to applying the technology routinely. To address this issue, we have developed a software tool, VAR-MD (http://research.nhgri.nih.gov/software/var-md/), for analyzing the DNA sequence variants produced by human ES. VAR-MD generates a ranked list of variants using predicted pathogenicity, Mendelian inheritance models, genotype quality, and population variant frequency data. VAR-MD was tested using two previously solved data sets and one unsolved data set. In the solved cases, the correct variant was listed at the top of VAR-MD's variant ranking. In the unsolved case, the correct variant was highly ranked, allowing for subsequent identification and validation. We conclude that VAR-MD has the potential to enhance mutation identification using family based, annotated next generation sequencing data. Moreover, we predict an incremental advancement in software performance as the reference databases, such as Single Nucleotide Polymorphism Database and Human Gene Mutation Database, continue to improve.",2012-02-24 +27565583,PseKRAAC: a flexible web server for generating pseudo K-tuple reduced amino acids composition.,"The reduced amino acids perform powerful ability for both simplifying protein complexity and identifying functional conserved regions. However, dealing with different protein problems may need different kinds of cluster methods. Encouraged by the success of pseudo-amino acid composition algorithm, we developed a freely available web server, called PseKRAAC (the pseudo K-tuple reduced amino acids composition). By implementing reduced amino acid alphabets, the protein complexity can be significantly simplified, which leads to decrease chance of overfitting, lower computational handicap and reduce information redundancy. PseKRAAC delivers more capability for protein research by incorporating three crucial parameters that describes protein composition. Users can easily generate many different modes of PseKRAAC tailored to their needs by selecting various reduced amino acids alphabets and other characteristic parameters. It is anticipated that the PseKRAAC web server will become a very useful tool in computational proteomics and protein sequence analysis.

Availability and implementation

Freely available on the web at http://bigdata.imu.edu.cn/psekraac CONTACTS: yczuo@imu.edu.cn or imu.hema@foxmail.com or yanglei_hmu@163.comSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-26 +24150944,HRaP: database of occurrence of HomoRepeats and patterns in proteomes.,"We focus our attention on multiple repeats of one amino acid (homorepeats) and create a new database (named HRaP, at http://bioinfo.protres.ru/hrap/) of occurrence of homorepeats and disordered patterns in different proteomes. HRaP is aimed at understanding the amino acid tandem repeat function in different proteomes. Therefore, the database includes 122 proteomes, 97 eukaryotic and 25 bacterial ones that can be divided into 9 kingdoms and 5 phyla of bacteria. The database includes 1,449,561 protein sequences and 771,786 sequences of proteins with GO annotations. We have determined homorepeats and patterns that are associated with some function. Through our web server, the user can do the following: (i) search for proteins with the given homorepeat in 122 proteomes, including GO annotation for these proteins; (ii) search for proteins with the given disordered pattern from the library of disordered patterns constructed on the clustered Protein Data Bank in 122 proteomes, including GO annotations for these proteins; (iii) analyze lengths of homorepeats in different proteomes; (iv) investigate disordered regions in the chosen proteins in 122 proteomes; (v) study the coupling of different homorepeats in one protein; (vi) determine longest runs for each amino acid inside each proteome; and (vii) download the full list of proteins with the given length of a homorepeat.",2013-10-22 +24313344,"Protannotator: a semiautomated pipeline for chromosome-wise functional annotation of the ""missing"" human proteome.","The chromosome-centric human proteome project (C-HPP) aims to define the complete set of proteins encoded in each human chromosome. The neXtProt database (September 2013) lists 20,128 proteins for the human proteome, of which 3831 human proteins (∼19%) are considered ""missing"" according to the standard metrics table (released September 27, 2013). In support of the C-HPP initiative, we have extended the annotation strategy developed for human chromosome 7 ""missing"" proteins into a semiautomated pipeline to functionally annotate the ""missing"" human proteome. This pipeline integrates a suite of bioinformatics analysis and annotation software tools to identify homologues and map putative functional signatures, gene ontology, and biochemical pathways. From sequential BLAST searches, we have primarily identified homologues from reviewed nonhuman mammalian proteins with protein evidence for 1271 (33.2%) ""missing"" proteins, followed by 703 (18.4%) homologues from reviewed nonhuman mammalian proteins and subsequently 564 (14.7%) homologues from reviewed human proteins. Functional annotations for 1945 (50.8%) ""missing"" proteins were also determined. To accelerate the identification of ""missing"" proteins from proteomics studies, we generated proteotypic peptides in silico. Matching these proteotypic peptides to ENCODE proteogenomic data resulted in proteomic evidence for 107 (2.8%) of the 3831 ""missing proteins, while evidence from a recent membrane proteomic study supported the existence for another 15 ""missing"" proteins. The chromosome-wise functional annotation of all ""missing"" proteins is freely available to the scientific community through our web server (http://biolinfo.org/protannotator).",2013-12-13 +25652394,Standardized evaluation of algorithms for computer-aided diagnosis of dementia based on structural MRI: the CADDementia challenge.,"Algorithms for computer-aided diagnosis of dementia based on structural MRI have demonstrated high performance in the literature, but are difficult to compare as different data sets and methodology were used for evaluation. In addition, it is unclear how the algorithms would perform on previously unseen data, and thus, how they would perform in clinical practice when there is no real opportunity to adapt the algorithm to the data at hand. To address these comparability, generalizability and clinical applicability issues, we organized a grand challenge that aimed to objectively compare algorithms based on a clinically representative multi-center data set. Using clinical practice as the starting point, the goal was to reproduce the clinical diagnosis. Therefore, we evaluated algorithms for multi-class classification of three diagnostic groups: patients with probable Alzheimer's disease, patients with mild cognitive impairment and healthy controls. The diagnosis based on clinical criteria was used as reference standard, as it was the best available reference despite its known limitations. For evaluation, a previously unseen test set was used consisting of 354 T1-weighted MRI scans with the diagnoses blinded. Fifteen research teams participated with a total of 29 algorithms. The algorithms were trained on a small training set (n=30) and optionally on data from other sources (e.g., the Alzheimer's Disease Neuroimaging Initiative, the Australian Imaging Biomarkers and Lifestyle flagship study of aging). The best performing algorithm yielded an accuracy of 63.0% and an area under the receiver-operating-characteristic curve (AUC) of 78.8%. In general, the best performances were achieved using feature extraction based on voxel-based morphometry or a combination of features that included volume, cortical thickness, shape and intensity. The challenge is open for new submissions via the web-based framework: http://caddementia.grand-challenge.org.",2015-01-31 +23557111,CUDASW++ 3.0: accelerating Smith-Waterman protein database search by coupling CPU and GPU SIMD instructions.,"

Background

The maximal sensitivity for local alignments makes the Smith-Waterman algorithm a popular choice for protein sequence database search based on pairwise alignment. However, the algorithm is compute-intensive due to a quadratic time complexity. Corresponding runtimes are further compounded by the rapid growth of sequence databases.

Results

We present CUDASW++ 3.0, a fast Smith-Waterman protein database search algorithm, which couples CPU and GPU SIMD instructions and carries out concurrent CPU and GPU computations. For the CPU computation, this algorithm employs SSE-based vector execution units as accelerators. For the GPU computation, we have investigated for the first time a GPU SIMD parallelization, which employs CUDA PTX SIMD video instructions to gain more data parallelism beyond the SIMT execution model. Moreover, sequence alignment workloads are automatically distributed over CPUs and GPUs based on their respective compute capabilities. Evaluation on the Swiss-Prot database shows that CUDASW++ 3.0 gains a performance improvement over CUDASW++ 2.0 up to 2.9 and 3.2, with a maximum performance of 119.0 and 185.6 GCUPS, on a single-GPU GeForce GTX 680 and a dual-GPU GeForce GTX 690 graphics card, respectively. In addition, our algorithm has demonstrated significant speedups over other top-performing tools: SWIPE and BLAST+.

Conclusions

CUDASW++ 3.0 is written in CUDA C++ and PTX assembly languages, targeting GPUs based on the Kepler architecture. This algorithm obtains significant speedups over its predecessor: CUDASW++ 2.0, by benefiting from the use of CPU and GPU SIMD instructions as well as the concurrent execution on CPUs and GPUs. The source code and the simulated data are available at http://cudasw.sourceforge.net.",2013-04-04 +21698393,"A library of cortical morphology analysis tools to study development, aging and genetics of cerebral cortex.","Sharing of analysis techniques and tools is among the main driving forces of modern neuroscience. We describe a library of tools developed to quantify global and regional differences in cortical anatomy in high resolution structural MR images. This library is distributed as a plug-in application for popular structural analysis software, BrainVisa (BV). It contains tools to measure global and regional gyrification, gray matter thickness and sulcal and gyral white matter spans. We provide a description of each tool and examples for several case studies to demonstrate their use. These examples show how the BV library was used to study cortical folding process during antenatal development and recapitulation of this process during cerebral aging. Further, the BV library was used to perform translation research in humans and non-human primates on the genetics of cerebral gyrification. This library, including source code and self-contained binaries for popular computer platforms, is available from the NIH-Neuroimaging Informatics Tools and Resources Clearinghouse (NITRC) resource ( http://www.nitrc.org/projects/brainvisa_ext ).",2012-01-01 +26602691,PSORTdb: expanding the bacteria and archaea protein subcellular localization database to better reflect diversity in cell envelope structures.,"Protein subcellular localization (SCL) is important for understanding protein function, genome annotation, and has practical applications such as identification of potential vaccine components or diagnostic/drug targets. PSORTdb (http://db.psort.org) comprises manually curated SCLs for proteins which have been experimentally verified (ePSORTdb), as well as pre-computed SCL predictions for deduced proteomes from bacterial and archaeal complete genomes available from NCBI (cPSORTdb). We now report PSORTdb 3.0. It features improvements increasing user-friendliness, and further expands both ePSORTdb and cPSORTdb with a focus on improving protein SCL data in cases where it is most difficult-proteins associated with non-classical Gram-positive/Gram-negative/Gram-variable cell envelopes. ePSORTdb data curation was expanded, including adding in additional cell envelope localizations, and incorporating markers for cPSORTdb to automatically computationally identify if new genomes to be analysed fall into certain atypical cell envelope categories (i.e. Deinococcus-Thermus, Thermotogae, Corynebacteriales/Corynebacterineae, including Mycobacteria). The number of predicted proteins in cPSORTdb has increased from 3,700,000 when PSORTdb 2.0 was released to over 13,000,000 currently. PSORTdb 3.0 will be of wider use to researchers studying a greater diversity of monoderm or diderm microbes, including medically, agriculturally and industrially important species that have non-classical outer membranes or other cell envelope features.",2015-11-23 +27567225,An automated and objective method for age partitioning of reference intervals based on continuous centile curves.,"Reference intervals are the most commonly used decision support tool when interpreting quantitative laboratory results. They may require partitioning to better describe subpopulations that display significantly different reference values. Partitioning by age is particularly important for the paediatric population since there are marked physiological changes associated with growth and maturation. However, most partitioning methods are either technically complex or require prior knowledge of the underlying physiology/biological variation of the population. There is growing interest in the use of continuous centile curves, which provides seamless laboratory reference values as a child grows, as an alternative to rigidly described fixed reference intervals. However, the mathematical functions that describe these curves can be complex and may not be easily implemented in laboratory information systems. Hence, the use of fixed reference intervals is expected to continue for a foreseeable time. We developed a method that objectively proposes optimised age partitions and reference intervals for quantitative laboratory data (http://research.sph.nus.edu.sg/pp/ppResult.aspx), based on the sum of gradient that best describes the underlying distribution of the continuous centile curves. It is hoped that this method may improve the selection of age intervals for partitioning, which is receiving increasing attention in paediatric laboratory medicine.",2016-08-25 +23392425,Systematic review of the accuracy of dual-source cardiac CT for detection of arterial stenosis in difficult to image patient groups.,"

Purpose

To assess the diagnostic performance of dual-source cardiac (DSC) computed tomography (CT) newer-generation CT instruments for identifying anatomically significant coronary artery disease (CAD) in patients who are difficult to image by using 64-section CT.

Materials and methods

A literature search comprised bibliographic databases (January 1, 2000, to March 22, 2011, with a pragmatic update on September 6, 2012), trial registries, and conference proceedings. Only studies using invasive coronary angiography as reference standard were included. Risk of bias was assessed (QUADAS-2). Results were stratified according to patient group on the basis of clinical characteristics. Summary estimates of sensitivity and specificity of DSC CT for detecting 50% or greater arterial stenosis were calculated by using a bivariate summary receiver operating characteristic or random-effects model.

Results

Twenty-five studies reported accuracy of DSC CT for diagnosing CAD in difficult to image patients; in 22 studies, one of two CT units of the same manufacturer (Somatom Definition or Somatom Definition Flash) was used, and in the remaining three, a different CT unit of another manufacturer (Aquilion One) was used. The pooled, per-patient estimates of sensitivity were 97.7% (95% confidence interval [CI]: 88.0%, 99.9%) and 97.7% (95% CI: 93.2%, 99.3%) for patients with arrhythmias and high heart rates, respectively. The corresponding pooled estimates of specificity were 81.7% (95% CI: 71.6%, 89.4%) and 86.3% (95% CI: 80.2%, 90.7%), respectively. All data were acquired by using Somatom Definition. In two studies with Somatom and one study with Aquilion One, sensitivity estimates of 90% or greater were reported in patients with previous stent implantations; specificities were 81.7% and 89.5% for Somatom and 81.0% for Aquilion One. In patients with high coronary calcium scores, previous bypass grafts, or obesity, only per-segment or per-artery data were available. Sensitivity estimates remained high (>90% in all but one study), and specificities ranged from 79.1% to 100%. All data were acquired by using Somatom Definition.

Conclusion

DSC CT may be sufficiently accurate to diagnose clinically significant CAD in some or all difficult to image patients.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.13121136/-/DC1.",2013-02-07 +26804550,Molecular mechanisms associated with breast cancer based on integrated gene expression profiling by bioinformatics analysis.,"In this study, we aimed to gain more insights into the underlying molecular mechanisms responsible for breast cancer (BC) progression. Three gene expression profiles of human BC were integrated and used to screen the differentially expressed genes (DEGs) between healthy breast samples and BC samples. Protein-protein interaction (PPI) network of DEGs was constructed by mapping DEGs into the Search Tool for the Retrieval of Interacting Genes (STRING) database; then the subnetworks of PPI were constructed with plug-in, MCODE and DEGs in Subnetwork 1 were analysed based on Kyoto Encyclopaedia of Genes and Genomes (KEGG) pathway database ( http://www.genome.jp/kegg /). In addition, co-expression network of DEGs was established using the Cytoscape. Totalally 931 DEGs were selected, including 340 up-regulated genes and 591 down-regulated genes. KEGG pathway analysis for DEGs in Subnetwork 1 showed that the pathogenesis of BC was associated with cell cycle, oocyte meiosis, progesterone-mediated oocyte maturation and p53 signalling pathways. Meanwhile, the most significant-related DEGs were found by co-expression network analysis of DEGs. In conclusion, CCNG1 might be involved in the progression of BC via inhibiting cell proliferation, and ADAMTS1 might play a crucial role in BC development through the regulation of angiogenesis.",2016-01-25 +27054153,Atypical carcinoid and large cell neuroendocrine carcinoma of the lung: a proteomic dataset from formalin-fixed archival samples.,"Here we present a dataset generated using formalin-fixed paraffin-embedded archival samples from two rare lung neuroendocrine tumor subtypes (namely, two atypical carcinoids, ACs, and two large-cell neuroendocrine carcinomas, LCNECs). Samples were subjected to a shotgun proteomics pipeline, comprising full-length protein extraction, SDS removal through spin columns, in solution trypsin digestion, long gradient liquid chromatography peptide separation and LTQ-Orbitrap mass spectrometry analysis. A total of 1260 and 2436 proteins were identified in the AC and LCNEC samples, respectively, with FDR <1%. MS data are available in the PeptideAtlas repository at http://www.peptideatlas.org/PASS/PASS00375.",2016-03-09 +27634882,Subpathway-LNCE: Identify dysfunctional subpathways competitively regulated by lncRNAs through integrating lncRNA-mRNA expression profile and pathway topologies.,"Recently, studies have reported that long noncoding RNAs (lncRNAs) can act as modulators of mRNAs through competitively binding to microRNAs (miRNAs) and have relevance to tumorigenesis as well as other diseases. Identify lncRNA competitively regulated subpathway not only can gain insight into the initiation and progression of disease, but also help for understanding the functional roles of lncRNAs in the disease context. Here, we present an effective method, Subpathway-LNCE, which was specifically designed to identify lncRNAs competitively regulated functions and the functional roles of these competitive regulation lncRNAs have not be well characterized in diseases. Moreover, the method integrated lncRNA-mRNA expression profile and pathway topologies. Using prostate cancer datasets and LUAD data sets, we confirmed the effectiveness of our method in identifying disease associated dysfunctional subpathway that regulated by lncRNAs. By analyzing kidney renal clear cell carcinoma related lncRNA competitively regulated subpathway network, we show that Subpathway-LNCE can help uncover disease key lncRNAs. Furthermore, we demonstrated that our method is reproducible and robust. Subpathway-LNCE provide a flexible tool to identify lncRNA competitively regulated signal subpathways underlying certain condition, and help to expound the functional roles of lncRNAs in various status. Subpathway-LNCE has been developed as an R package freely available at https://cran.rstudio.com/web/packages/SubpathwayLNCE/.",2016-10-01 +24214955,TFBSshape: a motif database for DNA shape features of transcription factor binding sites.,"Transcription factor binding sites (TFBSs) are most commonly characterized by the nucleotide preferences at each position of the DNA target. Whereas these sequence motifs are quite accurate descriptions of DNA binding specificities of transcription factors (TFs), proteins recognize DNA as a three-dimensional object. DNA structural features refine the description of TF binding specificities and provide mechanistic insights into protein-DNA recognition. Existing motif databases contain extensive nucleotide sequences identified in binding experiments based on their selection by a TF. To utilize DNA shape information when analysing the DNA binding specificities of TFs, we developed a new tool, the TFBSshape database (available at http://rohslab.cmb.usc.edu/TFBSshape/), for calculating DNA structural features from nucleotide sequences provided by motif databases. The TFBSshape database can be used to generate heat maps and quantitative data for DNA structural features (i.e., minor groove width, roll, propeller twist and helix twist) for 739 TF datasets from 23 different species derived from the motif databases JASPAR and UniPROBE. As demonstrated for the basic helix-loop-helix and homeodomain TF families, our TFBSshape database can be used to compare, qualitatively and quantitatively, the DNA binding specificities of closely related TFs and, thus, uncover differential DNA binding specificities that are not apparent from nucleotide sequence alone.",2013-11-07 +22943222,Evaluation of internet derived patient information.,"

Introduction

The internet is a widely used, powerful resource for patients to research medical conditions. There is an extensive amount of information available on the internet. It is important for patient information to be accurate and in an easily accessible format. This article aims to assess the quality of patient information on hydrocephalus and compares the findings with recent evaluations in other surgical specialties.

Methods

The term 'hydrocephalus' was searched for on the search engines http://www.google.com/, http://www.bing.com/ and http://www.yahoo.com/. The top 20 results of these searches were assessed using the University of Michigan consumer health website evaluation checklist.

Results

The quality of patient information websites on hydrocephalus is highly variable. Websites rarely provide sufficient authorship information, do not review their information regularly enough and only reference material occasionally. The background of the provider was found to influence the quality of the website, with academic and care providers creating the best websites.

Conclusions

On comparing our findings with those of recent studies from other surgical specialties, it was found that there was often a conflict of interest between the background of the provider and the information supplied. It is recommended that clinicians personally research material for their patients to be able to guide them to suitable, accurate websites.",2012-07-01 +24507667,Kassiopeia: a database and web application for the analysis of mutually exclusive exomes of eukaryotes.,"

Background

Alternative splicing is an important process in higher eukaryotes that allows obtaining several transcripts from one gene. A specific case of alternative splicing is mutually exclusive splicing, in which exactly one exon out of a cluster of neighbouring exons is spliced into the mature transcript. Recently, a new algorithm for the prediction of these exons has been developed based on the preconditions that the exons of the cluster have similar lengths, sequence homology, and conserved splice sites, and that they are translated in the same reading frame.

Description

In this contribution we introduce Kassiopeia, a database and web application for the generation, storage, and presentation of genome-wide analyses of mutually exclusive exomes. Currently, Kassiopeia provides access to the mutually exclusive exomes of twelve Drosophila species, the thale cress Arabidopsis thaliana, the flatworm Caenorhabditis elegans, and human. Mutually exclusive spliced exons (MXEs) were predicted based on gene reconstructions from Scipio. Based on the standard prediction values, with which 83.5% of the annotated MXEs of Drosophila melanogaster were reconstructed, the exomes contain surprisingly more MXEs than previously supposed and identified. The user can search Kassiopeia using BLAST or browse the genes of each species optionally adjusting the parameters used for the prediction to reveal more divergent or only very similar exon candidates.

Conclusions

We developed a pipeline to predict MXEs in the genomes of several model organisms and a web interface, Kassiopeia, for their visualization. For each gene Kassiopeia provides a comprehensive gene structure scheme, the sequences and predicted secondary structures of the MXEs, and, if available, further evidence for MXE candidates from cDNA/EST data, predictions of MXEs in homologous genes of closely related species, and RNA secondary structure predictions. Kassiopeia can be accessed at http://www.motorprotein.de/kassiopeia.",2014-02-10 +26433280,FAO/INFOODS e-Learning Course on Food Composition Data.,"The FAO/INFOODS e-Learning Course on Food Composition Data was developed to close existing knowledge gaps on food composition of professionals working with those data. It covers the important aspects of food composition, is based on instructional design, is highly interactive and comprises 14 lessons of approximate 10h duration. It was developed primarily for usage in universities, but also suits self-paced learning and blended learning programmes. It is available at: http://www.fao.org/infoods/infoods/training/en/ free-of-charge in English, as on-line version or CD-ROM. Feedback from users was very positive and universities start to incorporate it into their curricula. The translation into other languages and the implementation of a certification and assessment programme are envisaged. e-Learning is cost-effective and reaches a wide audience. The course is expected to contribute to the improved data quality, usage, generation, publication and appreciation of food composition data.",2014-11-15 +25979726,Data Resource Profile: German Health Update (GEDA)--the health interview survey for adults in Germany.,"The German Health Update (GEDA) study is one component of the recently established nationwide health monitoring system administered by the Robert Koch Institute. The repeated cross-sectional GEDA surveys aim to provide current data on health and disease, health determinants and time trends in health and morbidity in the adult population in Germany. This forms the basis for planning requirements and recommendations for public health policy.Between 2008 and 2013, three GEDA waves were carried out, involving a total of 62,606 computer-assisted telephone interviews with adults in Germany, living in private household, and reachable via landline.A core set of indicators was used in all GEDA waves to gather information on subjective health and health-related quality of life, chronic diseases, injuries, impairment to health and disabilities, mental health, health behaviours, social determinants, use of health services and socio-demographic characteristics.The data from the GEDA surveys are provided for public use and epidemiological research. After submitting an application form, the data are accessible from: [http://www.rki.de/EN/Content/Health_Monitoring/Public_Use_Files/public_use_file_node.htm].",2015-04-01 +22903802,VariBench: a benchmark database for variations.,"Several computational methods have been developed for predicting the effects of rapidly expanding variation data. Comparison of the performance of tools has been very difficult as the methods have been trained and tested with different datasets. Until now, unbiased and representative benchmark datasets have been missing. We have developed a benchmark database suite, VariBench, to overcome this problem. VariBench contains datasets of experimentally verified high-quality variation data carefully chosen from literature and relevant databases. It provides the mapping of variation position to different levels (protein, RNA and DNA sequences, protein three-dimensional structure), along with identifier mapping to relevant databases. VariBench contains the first benchmark datasets for variation effect analysis, a field which is of high importance and where many developments are currently going on. VariBench datasets can be used, for example, to test performance of prediction tools as well as to train novel machine learning-based tools. New datasets will be included and the community is encouraged to submit high-quality datasets to the service. VariBench is freely available at http://structure.bmc.lu.se/VariBench.",2012-10-11 +26341477,A comprehensive catalogue of the coding and non-coding transcripts of the human inner ear.,"The mammalian inner ear consists of the cochlea and the vestibular labyrinth (utricle, saccule, and semicircular canals), which participate in both hearing and balance. Proper development and life-long function of these structures involves a highly complex coordinated system of spatial and temporal gene expression. The characterization of the inner ear transcriptome is likely important for the functional study of auditory and vestibular components, yet, primarily due to tissue unavailability, detailed expression catalogues of the human inner ear remain largely incomplete. We report here, for the first time, comprehensive transcriptome characterization of the adult human cochlea, ampulla, saccule and utricle of the vestibule obtained from patients without hearing abnormalities. Using RNA-Seq, we measured the expression of >50,000 predicted genes corresponding to approximately 200,000 transcripts, in the adult inner ear and compared it to 32 other human tissues. First, we identified genes preferentially expressed in the inner ear, and unique either to the vestibule or cochlea. Next, we examined expression levels of specific groups of potentially interesting RNAs, such as genes implicated in hearing loss, long non-coding RNAs, pseudogenes and transcripts subject to nonsense mediated decay (NMD). We uncover the spatial specificity of expression of these RNAs in the hearing/balance system, and reveal evidence of tissue specific NMD. Lastly, we investigated the non-syndromic deafness loci to which no gene has been mapped, and narrow the list of potential candidates for each locus. These data represent the first high-resolution transcriptome catalogue of the adult human inner ear. A comprehensive identification of coding and non-coding RNAs in the inner ear will enable pathways of auditory and vestibular function to be further defined in the study of hearing and balance. Expression data are freely accessible at https://www.tgen.org/home/research/research-divisions/neurogenomics/supplementary-data/inner-ear-transcriptome.aspx.",2015-09-01 +26543893,A structural group-connectome in standard stereotactic (MNI) space.,"A group connectome of 20 subjects has been normalized into standard stereotactic (MNI) space. Data has been processed using the Gibbs' Tracking approach (Reisert et al., 2011) [11] and normalized into standard space using DARTEL (Ashburner, 2007) [1]. All data has been acquired within the scope of the study A. Horn, D. Ostwald, M. Reisert, F. Blankenburg, The structural-functional connectome and the default mode network of the human brain, NeuroImage 102 (2013) 142-151. http://doi.org/10.1016/j.neuroimage.2013.09.069. The utility of this dataset can be described by the following points: In medical studies in which subject-specific dMRI is not available, a standardized connectome may help to gain some canonical insight into white-matter connectivity. The dataset enables scientists who use different modalities (like EEG, MEG etc.) without access to MRI, to combine studies obtained using other methodology with insights from the brain's inner structural formation. The dataset could also extend possible claims made by meta-analyzes/literature-based studies.",2015-09-07 +22948725,The Human Gene Mutation Database (HGMD) and its exploitation in the fields of personalized genomics and molecular evolution.,"The Human Gene Mutation Database (HGMD) constitutes a comprehensive core collection of data on germ-line mutations in nuclear genes underlying or associated with human inherited disease (http://www.hgmd.org). Data cataloged include single-base-pair substitutions in coding, regulatory, and splicing-relevant regions, micro-deletions and micro-insertions, indels, and triplet repeat expansions, as well as gross gene deletions, insertions, duplications, and complex rearrangements. Each mutation is entered into HGMD only once, in order to avoid confusion between recurrent and identical-by-descent lesions. By March 2012, the database contained in excess of 123,600 different lesions (HGMD Professional release 2012.1) detected in 4,514 different nuclear genes, with new entries currently accumulating at a rate in excess of 10,000 per annum. ∼6,000 of these entries constitute disease-associated and functional polymorphisms. HGMD also includes cDNA reference sequences for more than 98% of the listed genes.",2012-09-01 +27551106,PPI4DOCK: large scale assessment of the use of homology models in free docking over more than 1000 realistic targets.,"

Motivation

Protein-protein docking methods are of great importance for understanding interactomes at the structural level. It has become increasingly appealing to use not only experimental structures but also homology models of unbound subunits as input for docking simulations. So far we are missing a large scale assessment of the success of rigid-body free docking methods on homology models.

Results

We explored how we could benefit from comparative modelling of unbound subunits to expand docking benchmark datasets. Starting from a collection of 3157 non-redundant, high X-ray resolution heterodimers, we developed the PPI4DOCK benchmark containing 1417 docking targets based on unbound homology models. Rigid-body docking by Zdock showed that for 1208 cases (85.2%), at least one correct decoy was generated, emphasizing the efficiency of rigid-body docking in generating correct assemblies. Overall, the PPI4DOCK benchmark contains a large set of realistic cases and provides new ground for assessing docking and scoring methodologies.

Availability and implementation

Benchmark sets can be downloaded from http://biodev.cea.fr/interevol/ppi4dock/ CONTACT: guerois@cea.frSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-22 +25495116,AKE - the Accelerated k-mer Exploration web-tool for rapid taxonomic classification and visualization.,"

Background

With the advent of low cost, fast sequencing technologies metagenomic analyses are made possible. The large data volumes gathered by these techniques and the unpredictable diversity captured in them are still, however, a challenge for computational biology.

Results

In this paper we address the problem of rapid taxonomic assignment with small and adaptive data models (< 5 MB) and present the accelerated k-mer explorer (AKE). Acceleration in AKE's taxonomic assignments is achieved by a special machine learning architecture, which is well suited to model data collections that are intrinsically hierarchical. We report classification accuracy reasonably well for ranks down to order, observed on a study on real world data (Acid Mine Drainage, Cow Rumen).

Conclusion

We show that the execution time of this approach is orders of magnitude shorter than competitive approaches and that accuracy is comparable. The tool is presented to the public as a web application (url: https://ani.cebitec.uni-bielefeld.de/ake/ , username: bmc, password: bmcbioinfo).",2014-12-13 +23216677,MALINA: a web service for visual analytics of human gut microbiota whole-genome metagenomic reads.,"MALINA is a web service for bioinformatic analysis of whole-genome metagenomic data obtained from human gut microbiota sequencing. As input data, it accepts metagenomic reads of various sequencing technologies, including long reads (such as Sanger and 454 sequencing) and next-generation (including SOLiD and Illumina). It is the first metagenomic web service that is capable of processing SOLiD color-space reads, to authors' knowledge. The web service allows phylogenetic and functional profiling of metagenomic samples using coverage depth resulting from the alignment of the reads to the catalogue of reference sequences which are built into the pipeline and contain prevalent microbial genomes and genes of human gut microbiota. The obtained metagenomic composition vectors are processed by the statistical analysis and visualization module containing methods for clustering, dimension reduction and group comparison. Additionally, the MALINA database includes vectors of bacterial and functional composition for human gut microbiota samples from a large number of existing studies allowing their comparative analysis together with user samples, namely datasets from Russian Metagenome project, MetaHIT and Human Microbiome Project (downloaded from http://hmpdacc.org). MALINA is made freely available on the web at http://malina.metagenome.ru. The website is implemented in JavaScript (using Ext JS), Microsoft .NET Framework, MS SQL, Python, with all major browsers supported.",2012-12-07 +27228152,LncRNApred: Classification of Long Non-Coding RNAs and Protein-Coding Transcripts by the Ensemble Algorithm with a New Hybrid Feature.,"As a novel class of noncoding RNAs, long noncoding RNAs (lncRNAs) have been verified to be associated with various diseases. As large scale transcripts are generated every year, it is significant to accurately and quickly identify lncRNAs from thousands of assembled transcripts. To accurately discover new lncRNAs, we develop a classification tool of random forest (RF) named LncRNApred based on a new hybrid feature. This hybrid feature set includes three new proposed features, which are MaxORF, RMaxORF and SNR. LncRNApred is effective for classifying lncRNAs and protein coding transcripts accurately and quickly. Moreover,our RF model only requests the training using data on human coding and non-coding transcripts. Other species can also be predicted by using LncRNApred. The result shows that our method is more effective compared with the Coding Potential Calculate (CPC). The web server of LncRNApred is available for free at http://mm20132014.wicp.net:57203/LncRNApred/home.jsp.",2016-05-26 +25759670,The Open Physiology workflow: modeling processes over physiology circuitboards of interoperable tissue units.,"A key challenge for the physiology modeling community is to enable the searching, objective comparison and, ultimately, re-use of models and associated data that are interoperable in terms of their physiological meaning. In this work, we outline the development of a workflow to modularize the simulation of tissue-level processes in physiology. In particular, we show how, via this approach, we can systematically extract, parcellate and annotate tissue histology data to represent component units of tissue function. These functional units are semantically interoperable, in terms of their physiological meaning. In particular, they are interoperable with respect to [i] each other and with respect to [ii] a circuitboard representation of long-range advective routes of fluid flow over which to model long-range molecular exchange between these units. We exemplify this approach through the combination of models for physiology-based pharmacokinetics and pharmacodynamics to quantitatively depict biological mechanisms across multiple scales. Links to the data, models and software components that constitute this workflow are found at http://open-physiology.org/.",2015-02-24 +26528564,The Systems Biology Markup Language (SBML): Language Specification for Level 3 Version 1 Core.,"Computational models can help researchers to interpret data, understand biological function, and make quantitative predictions. The Systems Biology Markup Language (SBML) is a file format for representing computational models in a declarative form that can be exchanged between different software systems. SBML is oriented towards describing biological processes of the sort common in research on a number of topics, including metabolic pathways, cell signaling pathways, and many others. By supporting SBML as an input/output format, different tools can all operate on an identical representation of a model, removing opportunities for translation errors and assuring a common starting point for analyses and simulations. This document provides the specification for Version 1 of SBML Level 3 Core. The specification defines the data structures prescribed by SBML as well as their encoding in XML, the eXtensible Markup Language. This specification also defines validation rules that determine the validity of an SBML document, and provides many examples of models in SBML form. Other materials and software are available from the SBML project web site, http://sbml.org/.",2015-09-04 +23368680,"Genomic differences between cultivated soybean, G. max and its wild relative G. soja.","

Background

Glycine max is an economically important crop and many different varieties of soybean exist around the world. The first draft sequences and gene models of G. max (domesticated soybean) as well as G. soja (wild soybean), both became available in 2010. This opened the door for comprehensive comparative genomics studies between the two varieties.

Results

We have further analysed the sequences and identified the 425 genes that are unique to G. max and unavailable in G. soja. We further studied the genes with significant number of non-synonymous SNPs in their upstream regions. 12 genes involved in seed development, 3 in oil and 6 in protein concentration are unique to G. max. A significant number of unique genes are seen to overlap with the QTL regions of the three traits including seed, oil and protein. We have also developed a graphical chromosome visualizer as part of the Soybean Knowledge Base (SoyKB) tools for molecular breeding, which was used in the analysis and visualization of overlapping QTL regions for multiple traits with the deletions and SNPs in G. soja.

Conclusions

The comparisons between genome sequences of G. max and G. soja show significant differences between the genomic compositions of the two. The differences also highlight the phenotypic differences between the two in terms of seed development, oil and protein traits. These significant results have been integrated into the SoyKB resource and are publicly available for users to browse at http://soykb.org/GSoja.",2013-01-21 +21536137,"Proteopedia: a status report on the collaborative, 3D web-encyclopedia of proteins and other biomolecules.","Proteopedia is a collaborative, 3D web-encyclopedia of protein, nucleic acid and other biomolecule structures. Created as a means for communicating biomolecule structures to a diverse scientific audience, Proteopedia (http://www.proteopedia.org) presents structural annotation in an intuitive, interactive format and allows members of the scientific community to easily contribute their own annotations. Here, we provide a status report on Proteopedia by describing advances in the web resource since its inception three and a half years ago, focusing on features of potential direct use to the scientific community. We discuss its progress as a collaborative 3D-encyclopedia of structures as well as its use as a complement to scientific publications and PowerPoint presentations. We also describe Proteopedia's use for 3D visualization in structure-related pedagogy.",2011-04-23 +23256928,"First proteomic exploration of protein-encoding genes on chromosome 1 in human liver, stomach, and colon.","The launch of the Chromosome-Centric Human Proteome Project provides an opportunity to gain insight into the human proteome. The Chinese Human Chromosome Proteome Consortium has initiated proteomic exploration of protein-encoding genes on human chromosomes 1, 8, and 20. Collaboration within the consortium has generated a comprehensive proteome data set using normal and carcinomatous tissues from human liver, stomach, and colon and 13 cell lines originating in these organs. We identified 12,101 proteins (59.8% coverage against Swiss-Prot human entries) with a protein false discovery rate of less than 1%. On chromosome 1, 1,252 proteins mapping to 1,227 genes, representing 60.9% of Swiss-Prot entries, were identified; however, 805 proteins remain unidentified, suggesting that analysis of more diverse samples using more advanced proteomic technologies is required. Genes encoding the unidentified proteins were concentrated in seven blocks, located at p36, q12-21, and q42-44, partly consistent with correlation of these blocks with cancers of the liver, stomach, and colon. Combined transcriptome, proteome, and cofunctionality analyses confirmed 23 coexpression clusters containing 165 genes. Biological information, including chromosome structure, GC content, and protein coexpression pattern was analyzed using multilayered, circular visualization and tabular visualization. Details of data analysis and updates are available in the Chinese Chromosome-Centric Human Proteome Database ( http://proteomeview.hupo.org.cn/chromosome/ ).",2012-12-20 +27275538,Post-transcriptional knowledge in pathway analysis increases the accuracy of phenotypes classification.,"

Motivation

Prediction of phenotypes from high-dimensional data is a crucial task in precision biology and medicine. Many technologies employ genomic biomarkers to characterize phenotypes. However, such elements are not sufficient to explain the underlying biology. To improve this, pathway analysis techniques have been proposed. Nevertheless, such methods have shown lack of accuracy in phenotypes classification.

Results

Here we propose a novel methodology called MITHrIL (Mirna enrIched paTHway Impact anaLysis) for the analysis of signaling pathways, which extends the work of Tarca et al., 2009. MITHrIL augments pathways with missing regulatory elements, such as microRNAs, and their interactions with genes. The method takes as input the expression values of genes and/or microRNAs and returns a list of pathways sorted according to their degree of deregulation, together with the corresponding statistical significance (p-values). Our analysis shows that MITHrIL outperforms its competitors even in the worst case. In addition, our method is able to correctly classify sets of tumor samples drawn from TCGA.

Availability

MITHrIL is freely available at the following URL: http://alpha.dmi.unict.it/mithril/.",2016-08-01 +26721429,Effect of k-tuple length on sample-comparison with high-throughput sequencing data.,"The high-throughput metagenomic sequencing offers a powerful technique to compare the microbial communities. Without requiring extra reference sequences, alignment-free models with short k-tuple (k = 2-10 bp) yielded promising results. Short k-tuples describe the overall statistical distribution, but is hard to capture the specific characteristics inside one microbial community. Longer k-tuple contains more abundant information. However, because the frequency vector of long k-tuple(k ≥ 30 bp) is sparse, the statistical measures designed for short k-tuples are not applicable. In our study, we considered each tuple as a meaningful word and then each sequencing data as a document composed of the words. Therefore, the comparison between two sequencing data is processed as ""topic analysis of documents"" in text mining. We designed a pipeline with long k-tuple features to compare metagenomic samples combined using algorithms from text mining and pattern recognition. The pipeline is available at http://culotuple.codeplex.com/. Experiments show that our pipeline with long k-tuple features: ①separates genomes with high similarity; ②outperforms short k-tuple models in all experiments. When k ≥ 12, the short k-tuple measures are not applicable anymore. When k is between 20 and 40, long k-tuple pipeline obtains much better grouping results; ③is free from the effect of sequencing platforms/protocols. ③We obtained meaningful and supported biological results on the 40-tuples selected for comparison.",2015-12-22 +,Investigación de Especies Invasoras para Satisfacer las Necesidades del Manejo y Planificación de Recursos,"As zebra mussels (Dreissena polymorpha) continue to spread among inland lakes of the United States and Canada, there is growing interest from professionals, citizens, and other stakeholders to know which lakes are likely to be colonized by zebra mussels. Thus, we developed a classification of lake suitability for zebra mussels on the basis of measured or estimated concentrations of dissolved calcium in lake water and applied the classification to >11,500 lakes in Wisconsin and the Upper Peninsula of Michigan. The majority of lakes (58%) were classified as unsuitable (<10 mg/L Ca) for survival and reproduction of zebra mussels, 27% were identified as suitable (≥21 mg/L Ca), and 15% were classified as borderline suitable (≥10 and <21 mg/L Ca). Of the 77 inland lakes with confirmed zebra mussel records for which data on dissolved calcium were available, our method classified 74 as suitable and 3 as borderline suitable. To communicate this lake‐specific suitability information and to help prioritize regional efforts to monitor and prevent the expansion of zebra mussels and other invasive species, we developed a web‐based interface (available from http://www.aissmartprevention.wisc.edu/). Although we are still uncertain of how access to suitability information ultimately affects decision making, we believe this is a useful case study of building communication channels among researchers, practitioners, and the public.",2011-10-01 +22342955,Online tools for polyphasic analysis of Mycobacterium tuberculosis complex genotyping data: now and next.,"Molecular diagnostics and genotyping of pathogens have become indispensable tools in clinical microbiology and disease surveillance. For isolates of the Mycobacterium tuberculosis complex (MTBC, causative agents of tuberculosis), multilocus variable number tandem repeat analysis (MLVA) targeting mycobacterial interspersed repetitive units (MIRU) has been internationally adopted as the new standard, portable, reproducible, and discriminatory typing method. Here, we review new sets of specialized web based bioinformatics tools that have become available for analyzing MLVA data especially in combination with other, complementary genotyping markers (polyphasic analysis). Currently, there are only two databases available that are not restricted to store one kind of genotyping data only, namely SITVIT/SpolDB4 and MIRU-VNTRplus. SITVIT/SpolDB4 (http://www.pasteur-guadeloupe.fr:8081/SITVITDemo) contains spoligotyping data from a large number of strains of diverse origin. However, besides options to query the data, the actual version of SITVIT/SpolDB4 offers no functionality for more complex analysis e.g. tree-based analysis. In comparison, the MIRU-VNTRplus web application (http://www.miru-vntrplus.org), represents a freely accessible service that enables users to analyze genotyping data of their strains alone or in comparison with a currently limited but well characterized reference database of strains representing the major MTBC lineages. Data (MLVA-, spoligotype-, large sequence polymorphism, and single nucleotide polymorphism) can be visualized and analyzed using just one genotyping method or a weighted combination of several markers. A variety of analysis tools are available such as creation of phylogenetic and minimum spanning trees, semi-automated phylogenetic lineage identification based on comparison with the reference database and mapping of geographic information. To facilitate scientific communication, a universal, expanding genotype nomenclature (MLVA MtbC15-9 type) service that can be queried via a web- or a SOAP-interface has been implemented. An extensive documentation guides users through all application functions. Perspectives for future development, including generalization to other bacterial species, are presented.",2012-02-10 +27402906,CRISPR-DO for genome-wide CRISPR design and optimization.,"

Motivation

Despite the growing popularity in using CRISPR/Cas9 technology for genome editing and gene knockout, its performance still relies on well-designed single guide RNAs (sgRNA). In this study, we propose a web application for the Design and Optimization (CRISPR-DO) of guide sequences that target both coding and non-coding regions in spCas9 CRISPR system across human, mouse, zebrafish, fly and worm genomes. CRISPR-DO uses a computational sequence model to predict sgRNA efficiency, and employs a specificity scoring function to evaluate the potential of off-target effect. It also provides information on functional conservation of target sequences, as well as the overlaps with exons, putative regulatory sequences and single-nucleotide polymorphisms (SNPs). The web application has a user-friendly genome-browser interface to facilitate the selection of the best target DNA sequences for experimental design.

Availability and implementation

CRISPR-DO is available at http://cistrome.org/crispr/ CONTACT: qiliu@tongji.edu.cn or hanxu@jimmy.harvard.edu or xsliu@jimmy.harvard.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-10 +26139637,"Error filtering, pair assembly and error correction for next-generation sequencing reads.","

Motivation

Next-generation sequencing produces vast amounts of data with errors that are difficult to distinguish from true biological variation when coverage is low.

Results

We demonstrate large reductions in error frequencies, especially for high-error-rate reads, by three independent means: (i) filtering reads according to their expected number of errors, (ii) assembling overlapping read pairs and (iii) for amplicon reads, by exploiting unique sequence abundances to perform error correction. We also show that most published paired read assemblers calculate incorrect posterior quality scores.

Availability and implementation

These methods are implemented in the USEARCH package. Binaries are freely available at http://drive5.com/usearch.

Contact

robert@drive5.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-02 +27244371,Clinical results of single and multiple bioresorbable drug-eluting scaffolds for treatment of de-novo coronary artery disease.,"

Objectives

Data on multiple bioresorbable vascular scaffolds (BVS) for the treatment of coronary lesions are limited. We compared clinical results after implantation of single or multiple BVS for the treatment of de-novo coronary artery disease.

Methods

We enrolled 236 patients with 311 lesions treated with Absorb BVS. Quantitative coronary angiography before and after scaffold implantation was performed. All lesions were predilated. Absorb was implanted with slow inflation and 81% were postdilated with a high-pressure balloon. Patients received dual antiplatelet therapy for 6 months for stable angina pectoris and for 12 months for acute coronary syndrome. Patients were clinically followed for 12 months. Acute gain was 1.39±0.47 mm. Multiple scaffolds per lesion were implanted in 23.8% (N=74/311 lesions). The mean scaffold length was 21 mm for single and 48 mm (range 28-112 mm) for multiple BVS. Periprocedural myocardial infarction (13.5 vs. 4.6%, P<0.013) and target lesion revascularization (6.8 vs. 0.8%; P=0.003) were significantly higher in the multiple-scaffold group compared with the single-scaffold group. There was no definite scaffold thrombosis. (http://www.clinicaltrials.gov, NCT02162056).

Conclusion

Target lesion revascularization within 12 months and periprocedural myocardial infarction were higher for lesions treated with multiple scaffolds compared with lesions treated with single BVS.",2016-11-01 +25707295,IRcall and IRclassifier: two methods for flexible detection of intron retention events from RNA-Seq data.,"

Background

The emergence of next-generation RNA sequencing (RNA-Seq) provides tremendous opportunities for researchers to analyze alternative splicing on a genome-wide scale. However, accurate detection of intron retention (IR) events from RNA-Seq data has remained an unresolved challenge in next-generation sequencing (NGS) studies.

Results

We propose two new methods: IRcall and IRclassifier to detect IR events from RNA-Seq data. Our methods combine together gene expression information, read coverage within an intron, and read counts (within introns, within flanking exons, supporting splice junctions, and overlapping with 5' splice site/ 3' splice site), employing ranking strategy and classifiers to detect IR events. We applied our approaches to one published RNA-Seq data on contrasting skip mutant and wild-type in Arabidopsis thaliana. Compared with three state-of-the-art methods, IRcall and IRclassifier could effectively filter out false positives, and predict more accurate IR events.

Availability

The data and codes of IRcall and IRclassifier are available at http://mlg.hit.edu.cn/ybai/IR/IRcallAndIRclass.html.",2015-01-21 +28508029,A B-Cell Gene Signature Correlates With the Extent of Gluten-Induced Intestinal Injury in Celiac Disease.,"

Background & aims

Celiac disease (CeD) provides an opportunity to study autoimmunity and the transition in immune cells as dietary gluten induces small intestinal lesions.

Methods

Seventy-three celiac disease patients on a long-term, gluten-free diet ingested a known amount of gluten daily for 6 weeks. A peripheral blood sample and intestinal biopsy specimens were taken before and 6 weeks after initiating the gluten challenge. Biopsy results were reported on a continuous numeric scale that measured the villus-height-to-crypt-depth ratio to quantify gluten-induced intestinal injury. Pooled B and T cells were isolated from whole blood, and RNA was analyzed by DNA microarray looking for changes in peripheral B- and T-cell gene expression that correlated with changes in villus height to crypt depth, as patients maintained a relatively healthy intestinal mucosa or deteriorated in the face of a gluten challenge.

Results

Gluten-dependent intestinal damage from baseline to 6 weeks varied widely across all patients, ranging from no change to extensive damage. Genes differentially expressed in B cells correlated strongly with the extent of intestinal damage. A relative increase in B-cell gene expression correlated with a lack of sensitivity to gluten whereas their relative decrease correlated with gluten-induced mucosal injury. A core B-cell gene module, representing a subset of B-cell genes analyzed, accounted for the correlation with intestinal injury.

Conclusions

Genes comprising the core B-cell module showed a net increase in expression from baseline to 6 weeks in patients with little to no intestinal damage, suggesting that these individuals may have mounted a B-cell immune response to maintain mucosal homeostasis and circumvent inflammation. DNA microarray data were deposited at the GEO repository (accession number: GSE87629; available: https://www.ncbi.nlm.nih.gov/geo/).",2017-01-28 +25886979,FastMotif: spectral sequence motif discovery.,"

Motivation

Sequence discovery tools play a central role in several fields of computational biology. In the framework of Transcription Factor binding studies, most of the existing motif finding algorithms are computationally demanding, and they may not be able to support the increasingly large datasets produced by modern high-throughput sequencing technologies.

Results

We present FastMotif, a new motif discovery algorithm that is built on a recent machine learning technique referred to as Method of Moments. Based on spectral decompositions, our method is robust to model misspecifications and is not prone to locally optimal solutions. We obtain an algorithm that is extremely fast and designed for the analysis of big sequencing data. On HT-Selex data, FastMotif extracts motif profiles that match those computed by various state-of-the-art algorithms, but one order of magnitude faster. We provide a theoretical and numerical analysis of the algorithm's robustness and discuss its sensitivity with respect to the free parameters.

Availability and implementation

The Matlab code of FastMotif is available from http://lcsb-portal.uni.lu/bioinformatics.

Contact

vlassis@adobe.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-16 +27801917,Statewide Assessment of Cost-Related Healthcare Access Barriers in Rhode Island.,"Although co-payments and deductibles are means of keeping health expenditures low, they have also been cited as barriers that inhibit patients from accessing necessary healthcare. We aimed to evaluate Rhode Island residents' experiences with cost-related access challenges within the state's healthcare system. We conducted a cross-sectional survey of resident experiences with healthcare in Rhode Island. Our survey instrument was composed of the RAND Corporation ""Short-Form Patient Satisfaction Questionnaire (PSQ-18)"", questions developed by the Rhode Island Office of the Health Insurance Commissioner, and ranking of health priorities based on prior community assessments conducted by the Rhode Island Department of Health. Data were collected at venues across the state as part of the Rhode Island Department of Health 2015 Statewide Health Inventory. From July to August 2015, 404 surveys were completed. We found that 40% of respondents had a co-pay of $20-$50, while 35.7% of respondents had a deductible of greater than $500. Further, one-third of respondents delayed receiving care due to financial barriers. This decision resulted in a worsening condition or hospital visit for nearly half of those respondents. Co-pays and deductibles pose challenges to Rhode Islanders accessing health care. Cost-related barriers to healthcare access should continue to be addressed, especially in the context of preventive care services, which are now being built into health insurance premiums through the Patient Protection and Affordable Care Act. [Full article available at http://rimed.org/rimedicaljournal-2016-11.asp].",2016-11-01 +23630576,CMS: a web-based system for visualization and analysis of genome-wide methylation data of human cancers.,"

Background

DNA methylation of promoter CpG islands is associated with gene suppression, and its unique genome-wide profiles have been linked to tumor progression. Coupled with high-throughput sequencing technologies, it can now efficiently determine genome-wide methylation profiles in cancer cells. Also, experimental and computational technologies make it possible to find the functional relationship between cancer-specific methylation patterns and their clinicopathological parameters.

Methodology/principal findings

Cancer methylome system (CMS) is a web-based database application designed for the visualization, comparison and statistical analysis of human cancer-specific DNA methylation. Methylation intensities were obtained from MBDCap-sequencing, pre-processed and stored in the database. 191 patient samples (169 tumor and 22 normal specimen) and 41 breast cancer cell-lines are deposited in the database, comprising about 6.6 billion uniquely mapped sequence reads. This provides comprehensive and genome-wide epigenetic portraits of human breast cancer and endometrial cancer to date. Two views are proposed for users to better understand methylation structure at the genomic level or systemic methylation alteration at the gene level. In addition, a variety of annotation tracks are provided to cover genomic information. CMS includes important analytic functions for interpretation of methylation data, such as the detection of differentially methylated regions, statistical calculation of global methylation intensities, multiple gene sets of biologically significant categories, interactivity with UCSC via custom-track data. We also present examples of discoveries utilizing the framework.

Conclusions/significance

CMS provides visualization and analytic functions for cancer methylome datasets. A comprehensive collection of datasets, a variety of embedded analytic functions and extensive applications with biological and translational significance make this system powerful and unique in cancer methylation research. CMS is freely accessible at: http://cbbiweb.uthscsa.edu/KMethylomes/.",2013-04-22 +26958625,Nucleotide sequence alignment of hdcA from Gram-positive bacteria.,"The decarboxylation of histidine -carried out mainly by some gram-positive bacteria- yields the toxic dietary biogenic amine histamine (Ladero et al. 2010 〈10.2174/157340110791233256〉 [1], Linares et al. 2016 〈http://dx.doi.org/10.1016/j.foodchem.2015.11.013〉〉 [2]). The reaction is catalyzed by a pyruvoyl-dependent histidine decarboxylase (Linares et al. 2011 〈10.1080/10408398.2011.582813〉 [3]), which is encoded by the gene hdcA. In order to locate conserved regions in the hdcA gene of Gram-positive bacteria, this article provides a nucleotide sequence alignment of all the hdcA sequences from Gram-positive bacteria present in databases. For further utility and discussion, see 〈http://dx.doi.org/ 10.1016/j.foodcont.2015.11.035〉〉 [4].",2016-01-18 +26700057,Systemic Analysis of Regulated Functional Networks.,"In biological and medical sciences, high throughput analytical methods are now commonly used to investigate samples of different conditions, e.g., patients versus controls. Systemic functional analyses emerged as a reference method to go beyond a list of regulated compounds, and identify activated or inactivated biological functions. This approach holds the promise for a better understanding of biological systems, of the mechanisms involved in disease progression, and thus improved diagnosis, prognosis, and treatment. In this chapter, we present a simple workflow to conduct pathway analyses on biological data using the freely available Reactome platform (http://www.reactome.org).",2016-01-01 +25979725,Health & Demographic Surveillance System Profile: The Ifakara Rural and Urban Health and Demographic Surveillance System (Ifakara HDSS).,"The Ifakara Rural HDSS (125,000 people) was set up in 1996 for a trial of the effectiveness of social marketing of bed nets on morbidity and mortality of children aged under 5 years, whereas the Ifakara Urban HDSS (45,000 people) since 2007 has provided demographic indicators for a typical small urban centre setting. Jointly they form the Ifakara HDSS (IHDSS), located in the Kilombero valley in south-east Tanzania. Socio-demographic data are collected twice a year. Current malaria work focuses on phase IV studies for antimalarials and on determinants of fine-scale variation of pathogen transmission risk, to inform malaria elimination strategies. The IHDSS is also used to describe the epidemiology and health system aspects of maternal, neonatal and child health and for intervention trials at individual and health systems levels. More recently, IHDSS researchers have studied epidemiology, health-seeking and national programme effectiveness for chronic health problems of adults and older people, including for HIV, tuberculosis and non-communicable diseases. A focus on understanding vulnerability and designing methods to enhance equity in access to services are cross-cutting themes in our work. Unrestricted access to core IHDSS data is in preparation, through INDEPTH iSHARE [www.indepth-ishare.org] and the IHI data portal [http://data.ihi.or.tz/index.php/catalog/central].",2015-05-15 +23282181,PCDq: human protein complex database with quality index which summarizes different levels of evidences of protein complexes predicted from h-invitational protein-protein interactions integrative dataset.,"

Background

Proteins interact with other proteins or biomolecules in complexes to perform cellular functions. Existing protein-protein interaction (PPI) databases and protein complex databases for human proteins are not organized to provide protein complex information or facilitate the discovery of novel subunits. Data integration of PPIs focused specifically on protein complexes, subunits, and their functions. Predicted candidate complexes or subunits are also important for experimental biologists.

Description

Based on integrated PPI data and literature, we have developed a human protein complex database with a complex quality index (PCDq), which includes both known and predicted complexes and subunits. We integrated six PPI data (BIND, DIP, MINT, HPRD, IntAct, and GNP_Y2H), and predicted human protein complexes by finding densely connected regions in the PPI networks. They were curated with the literature so that missing proteins were complemented and some complexes were merged, resulting in 1,264 complexes comprising 9,268 proteins with 32,198 PPIs. The evidence level of each subunit was assigned as a categorical variable. This indicated whether it was a known subunit, and a specific function was inferable from sequence or network analysis. To summarize the categories of all the subunits in a complex, we devised a complex quality index (CQI) and assigned it to each complex. We examined the proportion of consistency of Gene Ontology (GO) terms among protein subunits of a complex. Next, we compared the expression profiles of the corresponding genes and found that many proteins in larger complexes tend to be expressed cooperatively at the transcript level. The proportion of duplicated genes in a complex was evaluated. Finally, we identified 78 hypothetical proteins that were annotated as subunits of 82 complexes, which included known complexes. Of these hypothetical proteins, after our prediction had been made, four were reported to be actual subunits of the assigned protein complexes.

Conclusions

We constructed a new protein complex database PCDq including both predicted and curated human protein complexes. CQI is a useful source of experimentally confirmed information about protein complexes and subunits. The predicted protein complexes can provide functional clues about hypothetical proteins. PCDq is freely available at http://h-invitational.jp/hinv/pcdq/.",2012-12-12 +23193285,GenColors-based comparative genome databases for small eukaryotic genomes.,"Many sequence data repositories can give a quick and easily accessible overview on genomes and their annotations. Less widespread is the possibility to compare related genomes with each other in a common database environment. We have previously described the GenColors database system (http://gencolors.fli-leibniz.de) and its applications to a number of bacterial genomes such as Borrelia, Legionella, Leptospira and Treponema. This system has an emphasis on genome comparison. It combines data from related genomes and provides the user with an extensive set of visualization and analysis tools. Eukaryote genomes are normally larger than prokaryote genomes and thus pose additional challenges for such a system. We have, therefore, adapted GenColors to also handle larger datasets of small eukaryotic genomes and to display eukaryotic gene structures. Further recent developments include whole genome views, genome list options and, for bacterial genome browsers, the display of horizontal gene transfer predictions. Two new GenColors-based databases for two fungal species (http://fgb.fli-leibniz.de) and for four social amoebas (http://sacgb.fli-leibniz.de) were set up. Both new resources open up a single entry point for related genomes for the amoebozoa and fungal research communities and other interested users. Comparative genomics approaches are greatly facilitated by these resources.",2012-11-28 +22039101,Newt-omics: a comprehensive repository for omics data from the newt Notophthalmus viridescens.,"Notophthalmus viridescens, a member of the salamander family is an excellent model organism to study regenerative processes due to its unique ability to replace lost appendages and to repair internal organs. Molecular insights into regenerative events have been severely hampered by the lack of genomic, transcriptomic and proteomic data, as well as an appropriate database to store such novel information. Here, we describe 'Newt-omics' (http://newt-omics.mpi-bn.mpg.de), a database, which enables researchers to locate, retrieve and store data sets dedicated to the molecular characterization of newts. Newt-omics is a transcript-centred database, based on an Expressed Sequence Tag (EST) data set from the newt, covering ~50,000 Sanger sequenced transcripts and a set of high-density microarray data, generated from regenerating hearts. Newt-omics also contains a large set of peptides identified by mass spectrometry, which was used to validate 13,810 ESTs as true protein coding. Newt-omics is open to implement additional high-throughput data sets without changing the database structure. Via a user-friendly interface Newt-omics allows access to a huge set of molecular data without the need for prior bioinformatical expertise.",2011-10-27 +27258598,Temperature Variability and Mortality: A Multi-Country Study.,"

Background

The evidence and method are limited for the associations between mortality and temperature variability (TV) within or between days.

Objectives

We developed a novel method to calculate TV and investigated TV-mortality associations using a large multicountry data set.

Methods

We collected daily data for temperature and mortality from 372 locations in 12 countries/regions (Australia, Brazil, Canada, China, Japan, Moldova, South Korea, Spain, Taiwan, Thailand, the United Kingdom, and the United States). We calculated TV from the standard deviation of the minimum and maximum temperatures during the exposure days. Two-stage analyses were used to assess the relationship between TV and mortality. In the first stage, a Poisson regression model allowing over-dispersion was used to estimate the community-specific TV-mortality relationship, after controlling for potential confounders. In the second stage, a meta-analysis was used to pool the effect estimates within each country.

Results

There was a significant association between TV and mortality in all countries, even after controlling for the effects of daily mean temperature. In stratified analyses, TV was still significantly associated with mortality in cold, hot, and moderate seasons. Mortality risks related to TV were higher in hot areas than in cold areas when using short TV exposures (0-1 days), whereas TV-related mortality risks were higher in moderate areas than in cold and hot areas when using longer TV exposures (0-7 days).

Conclusions

The results indicate that more attention should be paid to unstable weather conditions in order to protect health. These findings may have implications for developing public health policies to manage health risks of climate change.

Citation

Guo Y, Gasparrini A, Armstrong BG, Tawatsupa B, Tobias A, Lavigne E, Coelho MS, Pan X, Kim H, Hashizume M, Honda Y, Guo YL, Wu CF, Zanobetti A, Schwartz JD, Bell ML, Overcenco A, Punnasiri K, Li S, Tian L, Saldiva P, Williams G, Tong S. 2016. Temperature variability and mortality: a multi-country study. Environ Health Perspect 124:1554-1559; http://dx.doi.org/10.1289/EHP149.",2016-06-03 +27793091,CoSpliceNet: a framework for co-splicing network inference from transcriptomics data.,"

Background

Alternative splicing has been proposed to increase transcript diversity and protein plasticity in eukaryotic organisms, but the extent to which this is the case is currently unclear, especially with regard to the diversification of molecular function. Eukaryotic splicing involves complex interactions of splicing factors and their targets. Inference of co-splicing networks capturing these types of interactions is important for understanding this crucial, highly regulated post-transcriptional process at the systems level.

Results

First, several transcript and protein attributes, including coding potential of transcripts and differences in functional domains of proteins, were compared between splice variants and protein isoforms to assess transcript and protein diversity in a biological system. Alternative splicing was shown to increase transcript and function-related protein diversity in developing Arabidopsis embryos. Second, CoSpliceNet, which integrates co-expression and motif discovery at splicing regulatory regions to infer co-splicing networks, was developed. CoSpliceNet was applied to temporal RNA sequencing data to identify candidate regulators of splicing events and predict RNA-binding motifs, some of which are supported by prior experimental evidence. Analysis of inferred splicing factor targets revealed an unexpected role for the unfolded protein response in embryo development.

Conclusions

The methods presented here can be used in any biological system to assess transcript diversity and protein plasticity and to predict candidate regulators, their targets, and RNA-binding motifs for splicing factors. CoSpliceNet is freely available at http://delasa.github.io/co-spliceNet/ .",2016-10-28 +23599502,INstruct: a database of high-quality 3D structurally resolved protein interactome networks.,"

Unlabelled

INstruct is a database of high-quality, 3D, structurally resolved protein interactome networks in human and six model organisms. INstruct combines the scale of available high-quality binary protein interaction data with the specificity of atomic-resolution structural information derived from co-crystal evidence using a tested interaction interface inference method. Its web interface is designed to allow for flexible search based on standard and organism-specific protein and gene-naming conventions, visualization of protein architecture highlighting interaction interfaces and viewing and downloading custom 3D structurally resolved interactome datasets.

Availability

INstruct is freely available on the web at http://instruct.yulab.org with all major browsers supported.",2013-04-18 +,Real-time weather analysis reveals the adaptability of direct sea-crossing by raptors,"Many animals seasonally travel between their breeding and wintering grounds. With their advanced mobility, birds often migrate over thousands of kilometres. Recently, satellite-tracking studies have revealed peculiar migration routes for some avian species at a global scale. However, the adaptability of such migration routes has not been clearly demonstrated. Using satellite-tracking data for 33 individuals, we show that the Japanese population of Oriental honey-buzzards (Pernis ptilorhynchus) directly crosses the 650-km-wide East China Sea during their autumn migration, although they fly a longer route around the sea rather than directly crossing it during their spring migration. By applying aerodynamic theory, we show that the buzzards could cross the sea by soaring and gliding flight. Moreover, using a high-resolution meteorological-prediction analysis, we demonstrate that the migratory trajectory of the birds strongly depends on the wind direction at their estimated locations. In the area, northeastern tailwinds blow stably only during autumn. Thermals were abundant ca. 500–1,000 m over the East China Sea in autumn, but that was not the case in spring. We suggest that the autumn-migration route across the East China Sea is likely to have evolved in response to the specific weather conditions over the sea. Animations showing movements of Oriental honey-buzzards and temporal change in weather conditions are available at: http://www.momo-p.com/showdetail-e.php?movieid=momo110822oh01a , http://www.momo-p.com/showdetail-e.php?movieid=momo110822oh02a , http://www.momo-p.com/showdetail-e.php?movieid=momo110822oh04a , http://www.momo-p.com/showdetail-e.php?movieid=momo110822oh05a , http://www.momo-p.com/showdetail-e.php?movieid=momo110822oh06a , and http://www.momo-p.com/showdetail-e.php?movieid=momo110822oh07a .",2012-01-01 +25070634,Informative Bayesian Model Selection: a method for identifying interactions in genome-wide data.,"In high-dimensional genome-wide (GWA) data, a key challenge is to detect genomic variants that interact in a nonlinear fashion in their association with disease. Identifying such genomic interactions is important for elucidating the inheritance of complex phenotypes and diseases. In this paper, we introduce a new computational method called Informative Bayesian Model Selection (IBMS) that leverages correlation among variants in GWA data due to the linkage disequilibrium to identify interactions accurately in a computationally efficient manner. IBMS combines several statistical methods including canonical correlation analysis, logistic regression analysis, and a Bayesians statistical measure of evaluating interactions. Compared to BOOST and BEAM that are two widely used methods for detecting genomic interactions, IBMS had significantly higher power when evaluated on synthetic data. Furthermore, when applied to Alzheimer's disease GWA data, IBMS identified previously reported interactions. IBMS is a useful method for identifying variants in GWA data, and software that implements IBMS is freely available online from http://lbb.ut.ac.ir/Download/LBBsoft/IBMS.",2014-10-01 +24162468,Anatomical entity mention recognition at literature scale.,"

Motivation

Anatomical entities ranging from subcellular structures to organ systems are central to biomedical science, and mentions of these entities are essential to understanding the scientific literature. Despite extensive efforts to automatically analyze various aspects of biomedical text, there have been only few studies focusing on anatomical entities, and no dedicated methods for learning to automatically recognize anatomical entity mentions in free-form text have been introduced.

Results

We present AnatomyTagger, a machine learning-based system for anatomical entity mention recognition. The system incorporates a broad array of approaches proposed to benefit tagging, including the use of Unified Medical Language System (UMLS)- and Open Biomedical Ontologies (OBO)-based lexical resources, word representations induced from unlabeled text, statistical truecasing and non-local features. We train and evaluate the system on a newly introduced corpus that substantially extends on previously available resources, and apply the resulting tagger to automatically annotate the entire open access scientific domain literature. The resulting analyses have been applied to extend services provided by the Europe PubMed Central literature database.

Availability and implementation

All tools and resources introduced in this work are available from http://nactem.ac.uk/anatomytagger.

Contact

sophia.ananiadou@manchester.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-10-25 +26498621,Perl One-Liners: Bridging the Gap Between Large Data Sets and Analysis Tools.,"Computational analyses of biological data are becoming increasingly powerful, and researchers intending on carrying out their own analyses can often choose from a wide array of tools and resources. However, their application might be obstructed by the wide variety of different data formats that are in use, from standard, commonly used formats to output files from high-throughput analysis platforms. The latter are often too large to be opened, viewed, or edited by standard programs, potentially leading to a bottleneck in the analysis. Perl one-liners provide a simple solution to quickly reformat, filter, and merge data sets in preparation for downstream analyses. This chapter presents example code that can be easily adjusted to meet individual requirements. An online version is available at http://bioinf.gen.tcd.ie/pol.",2015-01-01 +25692236,Comprehensive analysis of human small RNA sequencing data provides insights into expression profiles and miRNA editing.,"MicroRNAs (miRNAs) play key regulatory roles in various biological processes and diseases. A comprehensive analysis of large scale small RNA sequencing data (smRNA-seq) will be very helpful to explore tissue or disease specific miRNA markers and uncover miRNA variants. Here, we systematically analyzed 410 human smRNA-seq datasets, which samples are from 24 tissue/disease/cell lines. We tested the mapping strategies and found that it was necessary to make multiple-round mappings with different mismatch parameters. miRNA expression profiles revealed that on average ∼70% of known miRNAs were expressed at low level or not expressed (RPM < 1) in a sample and only ∼9% of known miRNAs were relatively highly expressed (RPM > 100). About 30% known miRNAs were not expressed in all of our used samples. The miRNA expression profiles were compiled into an online database (HMED, http://bioinfo.life.hust.edu.cn/smallRNA/). Dozens of tissue/disease specific miRNAs, disease/control dysregulated miRNAs and miRNAs with arm switching events were discovered. Further, we identified some highly confident editing sites including 24 A-to-I sites and 23 C-to-U sites. About half of them were widespread miRNA editing sites in different tissues. We characterized that the 2 types of editing sites have different features with regard to location, editing level and frequency. Our analyses for expression profiles, specific miRNA markers, arm switching, and editing sites, may provide valuable information for further studies of miRNA function and biomarker finding.",2014-01-01 +25372567,HemI: a toolkit for illustrating heatmaps.,"Recent high-throughput techniques have generated a flood of biological data in all aspects. The transformation and visualization of multi-dimensional and numerical gene or protein expression data in a single heatmap can provide a concise but comprehensive presentation of molecular dynamics under different conditions. In this work, we developed an easy-to-use tool named HemI (Heat map Illustrator), which can visualize either gene or protein expression data in heatmaps. Additionally, the heatmaps can be recolored, rescaled or rotated in a customized manner. In addition, HemI provides multiple clustering strategies for analyzing the data. Publication-quality figures can be exported directly. We propose that HemI can be a useful toolkit for conveniently visualizing and manipulating heatmaps. The stand-alone packages of HemI were implemented in Java and can be accessed at http://hemi.biocuckoo.org/down.php.",2014-11-05 +25766039,REDHORSE-REcombination and Double crossover detection in Haploid Organisms using next-geneRation SEquencing data.,"

Background

Next-generation sequencing technology provides a means to study genetic exchange at a higher resolution than was possible using earlier technologies. However, this improvement presents challenges as the alignments of next generation sequence data to a reference genome cannot be directly used as input to existing detection algorithms, which instead typically use multiple sequence alignments as input. We therefore designed a software suite called REDHORSE that uses genomic alignments, extracts genetic markers, and generates multiple sequence alignments that can be used as input to existing recombination detection algorithms. In addition, REDHORSE implements a custom recombination detection algorithm that makes use of sequence information and genomic positions to accurately detect crossovers. REDHORSE is a portable and platform independent suite that provides efficient analysis of genetic crosses based on Next-generation sequencing data.

Results

We demonstrated the utility of REDHORSE using simulated data and real Next-generation sequencing data. The simulated dataset mimicked recombination between two known haploid parental strains and allowed comparison of detected break points against known true break points to assess performance of recombination detection algorithms. A newly generated NGS dataset from a genetic cross of Toxoplasma gondii allowed us to demonstrate our pipeline. REDHORSE successfully extracted the relevant genetic markers and was able to transform the read alignments from NGS to the genome to generate multiple sequence alignments. Recombination detection algorithm in REDHORSE was able to detect conventional crossovers and double crossovers typically associated with gene conversions whilst filtering out artifacts that might have been introduced during sequencing or alignment. REDHORSE outperformed other commonly used recombination detection algorithms in finding conventional crossovers. In addition, REDHORSE was the only algorithm that was able to detect double crossovers.

Conclusion

REDHORSE is an efficient analytical pipeline that serves as a bridge between genomic alignments and existing recombination detection algorithms. Moreover, REDHORSE is equipped with a recombination detection algorithm specifically designed for Next-generation sequencing data. REDHORSE is portable, platform independent Java based utility that provides efficient analysis of genetic crosses based on Next-generation sequencing data. REDHORSE is available at http://redhorse.sourceforge.net/ .",2015-02-26 +27659453,Evolutionary conservation of Ebola virus proteins predicts important functions at residue level.,"

Motivation

The recent outbreak of Ebola virus disease (EVD) resulted in a large number of human deaths. Due to this devastation, the Ebola virus has attracted renewed interest as model for virus evolution. Recent literature on Ebola virus (EBOV) has contributed substantially to our understanding of the underlying genetics and its scope with reference to the 2014 outbreak. But no study yet, has focused on the conservation patterns of EBOV proteins.

Results

We analyzed the evolution of functional regions of EBOV and highlight the function of conserved residues in protein activities. We apply an array of computational tools to dissect the functions of EBOV proteins in detail: (i) protein sequence conservation, (ii) protein-protein interactome analysis, (iii) structural modeling and (iv) kinase prediction. Our results suggest the presence of novel post-translational modifications in EBOV proteins and their role in the modulation of protein functions and protein interactions. Moreover, on the basis of the presence of ATM recognition motifs in all EBOV proteins we postulate a role of DNA damage response pathways and ATM kinase in EVD. The ATM kinase is put forward, for further evaluation, as novel potential therapeutic target.

Availability and implementation

http://www.biw.kuleuven.be/CSB/EBOV-PTMs CONTACT: vera.vannoort@biw.kuleuven.beSupplementary information: Supplementary data are available at Bioinformatics online.",2016-09-21 +26831908,CIDANE: comprehensive isoform discovery and abundance estimation.,"We present CIDANE, a novel framework for genome-based transcript reconstruction and quantification from RNA-seq reads. CIDANE assembles transcripts efficiently with significantly higher sensitivity and precision than existing tools. Its algorithmic core not only reconstructs transcripts ab initio, but also allows the use of the growing annotation of known splice sites, transcription start and end sites, or full-length transcripts, which are available for most model organisms. CIDANE supports the integrated analysis of RNA-seq and additional gene-boundary data and recovers splice junctions that are invisible to other methods. CIDANE is available at http://ccb.jhu.edu/software/cidane/.",2016-01-30 +28019040,A small-sample multivariate kernel machine test for microbiome association studies.,"High-throughput sequencing technologies have enabled large-scale studies of the role of the human microbiome in health conditions and diseases. Microbial community level association test, as a critical step to establish the connection between overall microbiome composition and an outcome of interest, has now been routinely performed in many studies. However, current microbiome association tests all focus on a single outcome. It has become increasingly common for a microbiome study to collect multiple, possibly related, outcomes to maximize the power of discovery. As these outcomes may share common mechanisms, jointly analyzing these outcomes can amplify the association signal and improve statistical power to detect potential associations. We propose the multivariate microbiome regression-based kernel association test (MMiRKAT) for testing association between multiple continuous outcomes and overall microbiome composition, where the kernel used in MMiRKAT is based on Bray-Curtis or UniFrac distance. MMiRKAT directly regresses all outcomes on the microbiome profiles via a semiparametric kernel machine regression framework, which allows for covariate adjustment and evaluates the association via a variance-component score test. Because most of the current microbiome studies have small sample sizes, a novel small-sample correction procedure is implemented in MMiRKAT to correct for the conservativeness of the association test when the sample size is small or moderate. The proposed method is assessed via simulation studies and an application to a real data set examining the association between host gene expression and mucosal microbiome composition. We demonstrate that MMiRKAT is more powerful than large sample based multivariate kernel association test, while controlling the type I error. A free implementation of MMiRKAT in R language is available at http://research.fhcrc.org/wu/en.html.",2016-12-26 +25640302,Mapping global cropland and field size.,"A new 1 km global IIASA-IFPRI cropland percentage map for the baseline year 2005 has been developed which integrates a number of individual cropland maps at global to regional to national scales. The individual map products include existing global land cover maps such as GlobCover 2005 and MODIS v.5, regional maps such as AFRICOVER and national maps from mapping agencies and other organizations. The different products are ranked at the national level using crowdsourced data from Geo-Wiki to create a map that reflects the likelihood of cropland. Calibration with national and subnational crop statistics was then undertaken to distribute the cropland within each country and subnational unit. The new IIASA-IFPRI cropland product has been validated using very high-resolution satellite imagery via Geo-Wiki and has an overall accuracy of 82.4%. It has also been compared with the EarthStat cropland product and shows a lower root mean square error on an independent data set collected from Geo-Wiki. The first ever global field size map was produced at the same resolution as the IIASA-IFPRI cropland map based on interpolation of field size data collected via a Geo-Wiki crowdsourcing campaign. A validation exercise of the global field size map revealed satisfactory agreement with control data, particularly given the relatively modest size of the field size data set used to create the map. Both are critical inputs to global agricultural monitoring in the frame of GEOGLAM and will serve the global land modelling and integrated assessment community, in particular for improving land use models that require baseline cropland information. These products are freely available for downloading from the http://cropland.geo-wiki.org website.",2015-01-16 +26056424,TRUFA: A User-Friendly Web Server for de novo RNA-seq Analysis Using Cluster Computing.,"Application of next-generation sequencing (NGS) methods for transcriptome analysis (RNA-seq) has become increasingly accessible in recent years and are of great interest to many biological disciplines including, eg, evolutionary biology, ecology, biomedicine, and computational biology. Although virtually any research group can now obtain RNA-seq data, only a few have the bioinformatics knowledge and computation facilities required for transcriptome analysis. Here, we present TRUFA (TRanscriptome User-Friendly Analysis), an open informatics platform offering a web-based interface that generates the outputs commonly used in de novo RNA-seq analysis and comparative transcriptomics. TRUFA provides a comprehensive service that allows performing dynamically raw read cleaning, transcript assembly, annotation, and expression quantification. Due to the computationally intensive nature of such analyses, TRUFA is highly parallelized and benefits from accessing high-performance computing resources. The complete TRUFA pipeline was validated using four previously published transcriptomic data sets. TRUFA's results for the example datasets showed globally similar results when comparing with the original studies, and performed particularly better when analyzing the green tea dataset. The platform permits analyzing RNA-seq data in a fast, robust, and user-friendly manner. Accounts on TRUFA are provided freely upon request at https://trufa.ifca.es.",2015-05-24 +25502381,GENN: a GEneral Neural Network for learning tabulated data with examples from protein structure prediction.,"We present a GEneral Neural Network (GENN) for learning trends from existing data and making predictions of unknown information. The main novelty of GENN is in its generality, simplicity of use, and its specific handling of windowed input/output. Its main strength is its efficient handling of the input data, enabling learning from large datasets. GENN is built on a two-layered neural network and has the option to use separate inputs-output pairs or window-based data using data structures to efficiently represent input-output pairs. The program was tested on predicting the accessible surface area of globular proteins, scoring proteins according to similarity to native, predicting protein disorder, and has performed remarkably well. In this paper we describe the program and its use. Specifically, we give as an example the construction of a similarity to native protein scoring function that was constructed using GENN. The source code and Linux executables for GENN are available from Research and Information Systems at http://mamiris.com and from the Battelle Center for Mathematical Medicine at http://mathmed.org. Bugs and problems with the GENN program should be reported to EF.",2015-01-01 +23342000,Inferring hierarchical orthologous groups from orthologous gene pairs.,"Hierarchical orthologous groups are defined as sets of genes that have descended from a single common ancestor within a taxonomic range of interest. Identifying such groups is useful in a wide range of contexts, including inference of gene function, study of gene evolution dynamics and comparative genomics. Hierarchical orthologous groups can be derived from reconciled gene/species trees but, this being a computationally costly procedure, many phylogenomic databases work on the basis of pairwise gene comparisons instead (""graph-based"" approach). To our knowledge, there is only one published algorithm for graph-based hierarchical group inference, but both its theoretical justification and performance in practice are as of yet largely uncharacterised. We establish a formal correspondence between the orthology graph and hierarchical orthologous groups. Based on that, we devise GETHOGs (""Graph-based Efficient Technique for Hierarchical Orthologous Groups""), a novel algorithm to infer hierarchical groups directly from the orthology graph, thus without needing gene tree inference nor gene/species tree reconciliation. GETHOGs is shown to correctly reconstruct hierarchical orthologous groups when applied to perfect input, and several extensions with stringency parameters are provided to deal with imperfect input data. We demonstrate its competitiveness using both simulated and empirical data. GETHOGs is implemented as a part of the freely-available OMA standalone package (http://omabrowser.org/standalone). Furthermore, hierarchical groups inferred by GETHOGs (""OMA HOGs"") on >1,000 genomes can be interactively queried via the OMA browser (http://omabrowser.org).",2013-01-14 +26810761,Forty-eight novel mutations causing biotinidase deficiency.,"Biotinidase deficiency is an autosomal recessively inherited disorder that results in the inability to recycle the vitamin biotin and is characterized by neurological and cutaneous symptoms. The symptoms can be ameliorated or prevented by administering pharmacological doses of biotin. Since 2008, approximately 300 samples have been submitted to ARUP's Molecular Sequencing Laboratory for biotinidase mutation analysis. Of these, 48 novel alterations in the biotinidase gene have been identified. Correlating the individual's serum enzymatic activity with the genotype, we have been able to determine the effect of the novel alteration on enzyme activity and, thereby, determine its likelihood of being pathogenic in 44 of these individuals. The novel mutations and uncertain alterations have been added to the database established by ARUP (http://arup.utah.edu/database/BTD/BTD_welcome.phps) to help clinicians make decisions about management and to better counsel their patients based on their genotypes.",2016-01-12 +27589962,BioCreative V BioC track overview: collaborative biocurator assistant task for BioGRID. ,"BioC is a simple XML format for text, annotations and relations, and was developed to achieve interoperability for biomedical text processing. Following the success of BioC in BioCreative IV, the BioCreative V BioC track addressed a collaborative task to build an assistant system for BioGRID curation. In this paper, we describe the framework of the collaborative BioC task and discuss our findings based on the user survey. This track consisted of eight subtasks including gene/protein/organism named entity recognition, protein-protein/genetic interaction passage identification and annotation visualization. Using BioC as their data-sharing and communication medium, nine teams, world-wide, participated and contributed either new methods or improvements of existing tools to address different subtasks of the BioC track. Results from different teams were shared in BioC and made available to other teams as they addressed different subtasks of the track. In the end, all submitted runs were merged using a machine learning classifier to produce an optimized output. The biocurator assistant system was evaluated by four BioGRID curators in terms of practical usability. The curators' feedback was overall positive and highlighted the user-friendly design and the convenient gene/protein curation tool based on text mining.Database URL: http://www.biocreative.org/tasks/biocreative-v/track-1-bioc/.",2016-09-01 +27587668,A program for verification of phylogenetic network models.,"

Motivation

Genetic material is transferred in a non-reproductive manner across species more frequently than commonly thought, particularly in the bacteria kingdom. On one hand, extant genomes are thus more properly considered as a fusion product of both reproductive and non-reproductive genetic transfers. This has motivated researchers to adopt phylogenetic networks to study genome evolution. On the other hand, a gene's evolution is usually tree-like and has been studied for over half a century. Accordingly, the relationships between phylogenetic trees and networks are the basis for the reconstruction and verification of phylogenetic networks. One important problem in verifying a network model is determining whether or not certain existing phylogenetic trees are displayed in a phylogenetic network. This problem is formally called the tree containment problem. It is NP-complete even for binary phylogenetic networks.

Results

We design an exponential time but efficient method for determining whether or not a phylogenetic tree is displayed in an arbitrary phylogenetic network. It is developed on the basis of the so-called reticulation-visible property of phylogenetic networks.

Availability and implementation

A C-program is available for download on http://www.math.nus.edu.sg/∼matzlx/tcp_package

Contact

matzlx@nus.edu.sg

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +28113758,Graph-Driven Diffusion and Random Walk Schemes for Image Segmentation.,"We propose graph-driven approaches to image segmentation by developing diffusion processes defined on arbitrary graphs. We formulate a solution to the image segmentation problem modeled as the result of infectious wavefronts propagating on an image-driven graph where pixels correspond to nodes of an arbitrary graph. By relating the popular Susceptible - Infected - Recovered epidemic propagation model to the Random Walker algorithm, we develop the Normalized Random Walker and a lazy random walker variant. The underlying iterative solutions of these methods are derived as the result of infections transmitted on this arbitrary graph. The main idea is to incorporate a degree-aware term into the original Random Walker algorithm in order to account for the node centrality of every neighboring node and to weigh the contribution of every neighbor to the underlying diffusion process. Our lazy random walk variant models the tendency of patients or nodes to resist changes in their infection status. We also show how previous work can be naturally extended to take advantage of this degreeaware term which enables the design of other novel methods. Through an extensive experimental analysis, we demonstrate the reliability of our approach, its small computational burden and the dimensionality reduction capabilities of graph-driven approaches. Without applying any regular grid constraint, the proposed graph clustering scheme allows us to consider pixellevel, node-level approaches and multidimensional input data by naturally integrating the importance of each node to the final clustering or segmentation solution. A software release containing implementations of this work and supplementary material can be found at: http://cvsp.cs.ntua.gr/research/GraphClustering/.",2016-10-26 +26437641,Mass-Up: an all-in-one open software application for MALDI-TOF mass spectrometry knowledge discovery.,"

Background

Mass spectrometry is one of the most important techniques in the field of proteomics. MALDI-TOF mass spectrometry has become popular during the last decade due to its high speed and sensitivity for detecting proteins and peptides. MALDI-TOF-MS can be also used in combination with Machine Learning techniques and statistical methods for knowledge discovery. Although there are many software libraries and tools that can be combined for these kind of analysis, there is still a need for all-in-one solutions with graphical user-friendly interfaces and avoiding the need of programming skills.

Results

Mass-Up, an open software multiplatform application for MALDI-TOF-MS knowledge discovery is herein presented. Mass-Up software allows data preprocessing, as well as subsequent analysis including (i) biomarker discovery, (ii) clustering, (iii) biclustering, (iv) three-dimensional PCA visualization and (v) classification of large sets of spectra data.

Conclusions

Mass-Up brings knowledge discovery within reach of MALDI-TOF-MS researchers. Mass-Up is distributed under license GPLv3 and it is open and free to all users at http://sing.ei.uvigo.es/mass-up.",2015-10-05 +26307543,Sandcastle: software for revealing latent information in multiple experimental ChIP-chip datasets via a novel normalisation procedure.,"ChIP-chip is a microarray based technology for determining the genomic locations of chromatin bound factors of interest, such as proteins. Standard ChIP-chip analyses employ peak detection methodologies to generate lists of genomic binding sites. No previously published method exists to enable comparative analyses of enrichment levels derived from datasets examining different experimental conditions. This restricts the use of the technology to binary comparisons of presence or absence of features between datasets. Here we present the R package Sandcastle — Software for the Analysis and Normalisation of Data from ChIP-chip AssayS of Two or more Linked Experiments — which allows for comparative analyses of data from multiple experiments by normalising all datasets to a common background. Relative changes in binding levels between experimental datasets can thus be determined, enabling the extraction of latent information from ChIP-chip experiments. Novel enrichment detection and peak calling algorithms are also presented, with a range of graphical tools, which facilitate these analyses. The software and documentation are available for download from http://reedlab.cardiff.ac.uk/sandcastle.",2015-08-26 +23342084,Early growth response 3 (Egr3) is highly over-expressed in non-relapsing prostate cancer but not in relapsing prostate cancer.,"Members of the early growth response (EGR) family of transcription factors play diverse functions in response to many cellular stimuli, including growth, stress, and inflammation. Egr3 has gone relatively unstudied, but here through use of the SPECS (Strategic Partners for the Evaluation of Predictive Signatures of Prostate Cancer) Affymetrix whole genome gene expression database we report that Egr3 mRNA is significantly over-expressed in prostate cancer compared to normal prostate tissue (5-fold). The Human Protein Atlas (http://www.proteinatlas.org), a database of tissue microarrays labeled with antibodies against over 11,000 human proteins, was utilized to quantify Egr3 protein expression in normal prostate and prostate cancer patients. In agreement with the SPECS data, we found that Egr3 protein is significantly increased in prostate cancer. The SPECS database has the benefit of extensive clinical follow up for the prostate cancer patients. Analysis of Egr3 mRNA expression in relation to the relapse status reveals that Egr3 mRNA expression is increased in tumor cells of non-relapsed samples (n = 63) compared to normal prostate cells, but is significantly lower in relapsed samples (n = 38) compared to non-relapse. The observations were confirmed using an independent data set. A list of genes correlating with this unique expression pattern was determined. These Egr3-correlated genes were enriched with Egr binding sites in their promoters. The gene list contains inflammatory genes such as IL-6, IL-8, IL1β and COX-2, which have extensive connections to prostate cancer.",2013-01-14 +27347883,CsSNP: A Web-Based Tool for the Detecting of Comparative Segments SNPs.,"SNP (single nucleotide polymorphism) is a popular tool for the study of genetic diversity, evolution, and other areas. Therefore, it is necessary to develop a convenient, utility, robust, rapid, and open source detecting-SNP tool for all researchers. Since the detection of SNPs needs special software and series steps including alignment, detection, analysis and present, the study of SNPs is limited for nonprofessional users. CsSNP (Comparative segments SNP, http://biodb.sdau.edu.cn/cssnp/ ) is a freely available web tool based on the Blat, Blast, and Perl programs to detect comparative segments SNPs and to show the detail information of SNPs. The results are filtered and presented in the statistics figure and a Gbrowse map. This platform contains the reference genomic sequences and coding sequences of 60 plant species, and also provides new opportunities for the users to detect SNPs easily. CsSNP is provided a convenient tool for nonprofessional users to find comparative segments SNPs in their own sequences, and give the users the information and the analysis of SNPs, and display these data in a dynamic map. It provides a new method to detect SNPs and may accelerate related studies.",2016-07-01 +22064855,YMDB: the Yeast Metabolome Database.,"The Yeast Metabolome Database (YMDB, http://www.ymdb.ca) is a richly annotated 'metabolomic' database containing detailed information about the metabolome of Saccharomyces cerevisiae. Modeled closely after the Human Metabolome Database, the YMDB contains >2000 metabolites with links to 995 different genes/proteins, including enzymes and transporters. The information in YMDB has been gathered from hundreds of books, journal articles and electronic databases. In addition to its comprehensive literature-derived data, the YMDB also contains an extensive collection of experimental intracellular and extracellular metabolite concentration data compiled from detailed Mass Spectrometry (MS) and Nuclear Magnetic Resonance (NMR) metabolomic analyses performed in our lab. This is further supplemented with thousands of NMR and MS spectra collected on pure, reference yeast metabolites. Each metabolite entry in the YMDB contains an average of 80 separate data fields including comprehensive compound description, names and synonyms, structural information, physico-chemical data, reference NMR and MS spectra, intracellular/extracellular concentrations, growth conditions and substrates, pathway information, enzyme data, gene/protein sequence data, as well as numerous hyperlinks to images, references and other public databases. Extensive searching, relational querying and data browsing tools are also provided that support text, chemical structure, spectral, molecular weight and gene/protein sequence queries. Because of S. cervesiae's importance as a model organism for biologists and as a biofactory for industry, we believe this kind of database could have considerable appeal not only to metabolomics researchers, but also to yeast biologists, systems biologists, the industrial fermentation industry, as well as the beer, wine and spirit industry.",2011-11-07 +26685281,Prenatal Exposure to Organophosphorous Pesticides and Fetal Growth: Pooled Results from Four Longitudinal Birth Cohort Studies.,"

Background

Organophosphorous (OP) pesticides are associated with reduced fetal growth in animals, but human studies are inconsistent.

Objectives

We pooled data from four cohorts to examine associations of prenatal OP exposure with birth weight (n = 1,169), length (n = 1,152), and head circumference (n = 1,143).

Methods

Data were from the CHAMACOS, HOME, Columbia, and Mount Sinai birth cohorts. Concentrations of three diethyl phosphate (ΣDEP) and three dimethyl phosphate (ΣDMP) metabolites of OP pesticides [summed to six dialkyl phosphates (ΣDAPs)] were measured in maternal urine. Linear regression and mixed-effects models were used to examine associations with birth outcomes.

Results

We found no significant associations of ΣDEP, ΣDMP, or ΣDAPs with birth weight, length, or head circumference overall. However, among non-Hispanic black women, increasing urinary ΣDAP and ΣDMP concentrations were associated with decreased birth length (β = -0.4 cm; 95% CI: -0.9, 0.0 and β = -0.4 cm; 95% CI: -0.8, 0.0, respectively, for each 10-fold increase in metabolite concentration). Among infants with the PON1192RR genotype, ΣDAP and ΣDMP were negatively associated with length (β = -0.4 cm; 95% CI: -0.9, 0.0 and β = -0.5 cm; 95% CI: -0.9, -0.1).

Conclusions

This study confirms previously reported associations of prenatal OP exposure among black women with decreased infant size at birth, but finds no evidence of smaller birth weight, length, or head circumference among whites or Hispanics. Contrary to our hypothesis, we found stronger inverse associations of DAPs and birth outcome in infants with the less susceptible PON1192RR genotype. The large pooled data set facilitated exploration of interactions by race/ethnicity and PON1 genotype, but was limited by differences in study populations.

Citation

Harley KG, Engel SM, Vedar MG, Eskenazi B, Whyatt RM, Lanphear BP, Bradman A, Rauh VA, Yolton K, Hornung RW, Wetmur JG, Chen J, Holland NT, Barr DB, Perera FP, Wolff MS. 2016. Prenatal exposure to organophosphorous pesticides and fetal growth: pooled results from four longitudinal birth cohort studies. Environ Health Perspect 124:1084-1092; http://dx.doi.org/10.1289/ehp.1409362.",2015-12-18 +25914405,What does Medicare pay rhinologists? An analysis of Medicare payment data.,"

Background

Information about charges and payments for physician services continues to be scrutinized. Recently, the Centers for Medicare and Medicaid Services (CMS) released data regarding Medicare charges and payments to all physicians for calendar year 2012. The purpose of this study was to investigate the variability and patterns in Medicare charges and payments among a large sample of rhinologists.

Methods

Charge and payment data were obtained from publicly available CMS datasets (http://www.cms.gov). Data for all otolaryngologists and rhinology subsets were extracted. Charges, payments, fee multipliers, and total submitted claims were compared. Unequal variance 2-tailed t tests were used for analysis.

Results

Mean submitted charges for rhinologists were $291,464 compared to $211,209 for all otolaryngologists (p = 0.0014). Mean payments to rhinologists were $70,172 compared to $77,275 for all otolaryngologists (p = 0.24). Fees for services ranged from 1.33 to 14.29 times Medicare reimbursement rates (mean = 4.47). The fee multiplier was significantly higher for operating room-based codes compared to office-based codes (9.43 vs 3.44, p < 0.001). Academic rhinologists submitted fewer claims and had a higher fee multiplier than private rhinologists (p < 0.001). Academic and private rhinologists had no difference in submitted charges (p = 0.28).

Conclusion

The wide availability of Medicare payment information makes it important for physicians to understand how their individual data compares to that of their colleagues. Medicare payments to rhinologists were comparable to otolaryngologists as a whole. Charges for services commonly performed by rhinologists vary widely. Academic rhinologists submitted fewer claims than their private colleagues, but overall charges and payments were comparable between the 2 groups.",2015-04-25 +24717371,Iron supplementation benefits physical performance in women of reproductive age: a systematic review and meta-analysis.,"Animal and human observational studies suggest that iron deficiency impairs physical exercise performance, but findings from randomized trials on the effects of iron are equivocal. Iron deficiency and anemia are especially common in women of reproductive age (WRA). Clear evidence of benefit from iron supplementation would inform clinical and public health guidelines. Therefore, we performed a systematic review and meta-analysis to determine the effect of iron supplementation compared with control on exercise performance in WRA. We searched the Cochrane Central Register of Clinical Trials, MEDLINE, Scopus (comprising Embase and MEDLINE), WHO regional databases, and other sources in July 2013. Randomized controlled trials that measured exercise outcomes in WRA randomized to daily oral iron supplementation vs. control were eligible. Random-effects meta-analysis was used to calculate mean differences (MDs) and standardized MDs (SMDs). Risk of bias was assessed using the Cochrane risk-of-bias tool. Of 6757 titles screened, 24 eligible studies were identified, 22 of which contained extractable data. Only 3 studies were at overall low risk of bias. Iron supplementation improved both maximal exercise performance, demonstrated by an increase in maximal oxygen consumption (VO2 max) [for relative VO2 max, MD: 2.35 mL/(kg ⋅ min); 95% CI: 0.82, 3.88; P = 0.003, 18 studies; for absolute VO2 max, MD: 0.11 L/min; 95% CI: 0.03, 0.20; P = 0.01, 9 studies; for overall VO2 max, SMD: 0.37; 95% CI: 0.11, 0.62; P = 0.005, 20 studies], and submaximal exercise performance, demonstrated by a lower heart rate (MD: -4.05 beats per minute; 95% CI: -7.25, -0.85; P = 0.01, 6 studies) and proportion of VO2 max (MD: -2.68%; 95% CI: -4.94, -0.41; P = 0.02, 6 studies) required to achieve defined workloads. Daily iron supplementation significantly improves maximal and submaximal exercise performance in WRA, providing a rationale to prevent and treat iron deficiency in this group. This trial was registered with PROSPERO (http://www.crd.york.ac.uk/PROSPERO/prospero.asp) as CRD42013005166.",2014-04-09 +27583344,Data on CUX1 isoforms in idiopathic pulmonary fibrosis lung and systemic sclerosis skin tissue sections.,"This data article contains complementary figures related to the research article entitled, ""Transforming growth factor-β-induced CUX1 isoforms are associated with fibrosis in systemic sclerosis lung fibroblasts"" (Ikeda et al. (2016) [2], http://dx.doi.org/10.1016/j.bbrep.2016.06.022), which presents that TGF-β increased CUX1 binding in the proximal promoter and enhancer of the COL1A2 and regulated COL1. Further, in the scleroderma (SSc) lung and diffuse alveolar damage lung sections, CUX1 localized within the α- smooth muscle actin (α-SMA) positive cells (Fragiadaki et al., 2011) [1], ""High doses of TGF-beta potently suppress type I collagen via the transcription factor CUX1"" (Ikeda et al., 2016) [2]. Here we show that CUX1 isoforms are localized within α-smooth muscle actin-positive cells in SSc skin and idiopathic pulmonary fibrosis (IPF) lung tissue sections. In particular, at the granular and prickle cell layers in the SSc skin sections, CUX1 and α-SMA are co-localized. In addition, at the fibrotic loci in the IPF lung tissue sections, CUX1 localized within the α-smooth muscle actin (α-SMA) positive cells.",2016-08-10 +23118487,CellLineNavigator: a workbench for cancer cell line analysis.,"The CellLineNavigator database, freely available at http://www.medicalgenomics.org/celllinenavigator, is a web-based workbench for large scale comparisons of a large collection of diverse cell lines. It aims to support experimental design in the fields of genomics, systems biology and translational biomedical research. Currently, this compendium holds genome wide expression profiles of 317 different cancer cell lines, categorized into 57 different pathological states and 28 individual tissues. To enlarge the scope of CellLineNavigator, the database was furthermore closely linked to commonly used bioinformatics databases and knowledge repositories. To ensure easy data access and search ability, a simple data and an intuitive querying interface were implemented. It allows the user to explore and filter gene expression, focusing on pathological or physiological conditions. For a more complex search, the advanced query interface may be used to query for (i) differentially expressed genes; (ii) pathological or physiological conditions; or (iii) gene names or functional attributes, such as Kyoto Encyclopaedia of Genes and Genomes pathway maps. These queries may also be combined. Finally, CellLineNavigator allows additional advanced analysis of differentially regulated genes by a direct link to the Database for Annotation, Visualization and Integrated Discovery (DAVID) Bioinformatics Resources.",2012-10-31 +26873932,An empirical Bayes change-point model for identifying 3' and 5' alternative splicing by next-generation RNA sequencing.,"

Motivation

Next-generation RNA sequencing (RNA-seq) has been widely used to investigate alternative isoform regulations. Among them, alternative 3 ': splice site (SS) and 5 ': SS account for more than 30% of all alternative splicing (AS) events in higher eukaryotes. Recent studies have revealed that they play important roles in building complex organisms and have a critical impact on biological functions which could cause disease. Quite a few analytical methods have been developed to facilitate alternative 3 ': SS and 5 ': SS studies using RNA-seq data. However, these methods have various limitations and their performances may be further improved.

Results

We propose an empirical Bayes change-point model to identify alternative 3 ': SS and 5 ': SS. Compared with previous methods, our approach has several unique merits. First of all, our model does not rely on annotation information. Instead, it provides for the first time a systematic framework to integrate various information when available, in particular the useful junction read information, in order to obtain better performance. Second, we utilize an empirical Bayes model to efficiently pool information across genes to improve detection efficiency. Third, we provide a flexible testing framework in which the user can choose to address different levels of questions, namely, whether alternative 3 ': SS or 5 ': SS happens, and/or where it happens. Simulation studies and real data application have demonstrated that our method is powerful and accurate.

Availability and implementation

The software is implemented in Java and can be freely downloaded from http://ebchangepoint.sourceforge.net/

Contact

zhiwei@njit.edu.",2016-02-11 +25242914,dxtbx: the diffraction experiment toolbox.,"Data formats for recording X-ray diffraction data continue to evolve rapidly to accommodate new detector technologies developed in response to more intense light sources. Processing the data from single-crystal X-ray diffraction experiments therefore requires the ability to read, and correctly interpret, image data and metadata from a variety of instruments employing different experimental representations. Tools that have previously been developed to address this problem have been limited either by a lack of extensibility or by inconsistent treatment of image metadata. The dxtbx software package provides a consistent interface to both image data and experimental models, while supporting a completely generic user-extensible approach to reading the data files. The library is written in a mixture of C++ and Python and is distributed as part of the cctbx under an open-source licence at http://cctbx.sourceforge.net.",2014-07-19 +27885428,Normative biometry of the fetal brain using magnetic resonance imaging.,"The fetal brain shows accelerated growth in the latter half of gestation, and these changes can be captured by 2D and 3D biometry measurements. The aim of this study was to quantify brain growth in normal fetuses using Magnetic Resonance Imaging (MRI) and to produce reference biometry data and a freely available centile calculator ( https://www.developingbrain.co.uk/fetalcentiles/ ). A total of 127 MRI examinations (1.5 T) of fetuses with a normal brain appearance (21-38 gestational weeks) were included in this study. 2D and 3D biometric parameters were measured from slice-to-volume reconstructed images, including 3D measurements of supratentorial brain tissue, lateral ventricles, cortex, cerebellum and extra-cerebral CSF and 2D measurements of brain biparietal diameter and fronto-occipital length, skull biparietal diameter and occipitofrontal diameter, head circumference, transverse cerebellar diameter, extra-cerebral CSF, ventricular atrial diameter, and vermis height, width, and area. Centiles were constructed for each measurement. All participants were invited for developmental follow-up. All 2D and 3D measurements, except for atrial diameter, showed a significant positive correlation with gestational age. There was a sex effect on left and total lateral ventricular volumes and the degree of ventricular asymmetry. The 5th, 50th, and 95th centiles and a centile calculator were produced. Developmental follow-up was available for 73.1% of cases [mean chronological age 27.4 (±10.2) months]. We present normative reference charts for fetal brain MRI biometry at 21-38 gestational weeks. Developing growth trajectories will aid in the better understanding of normal fetal brain growth and subsequently of deviations from typical development in high-risk pregnancies or following premature delivery.",2016-11-24 +26087185,Virus-Clip: a fast and memory-efficient viral integration site detection tool at single-base resolution with annotation capability.,"Viral integration into the human genome upon infection is an important risk factor for various human malignancies. We developed viral integration site detection tool called Virus-Clip, which makes use of information extracted from soft-clipped sequencing reads to identify exact positions of human and virus breakpoints of integration events. With initial read alignment to virus reference genome and streamlined procedures, Virus-Clip delivers a simple, fast and memory-efficient solution to viral integration site detection. Moreover, it can also automatically annotate the integration events with the corresponding affected human genes. Virus-Clip has been verified using whole-transcriptome sequencing data and its detection was validated to have satisfactory sensitivity and specificity. Marked advancement in performance was detected, compared to existing tools. It is applicable to versatile types of data including whole-genome sequencing, whole-transcriptome sequencing, and targeted sequencing. Virus-Clip is available at http://web.hku.hk/~dwhho/Virus-Clip.zip.",2015-08-01 +26658340,Burden Calculator: a simple and open analytical tool for estimating the population burden of injuries.,"

Background

Burden of disease and injury methods can be used to summarise and compare the effects of conditions in terms of disability-adjusted life years (DALYs). Burden estimation methods are not inherently complex. However, as commonly implemented, the methods include complex modelling and estimation.

Objectives

To provide a simple and open-source software tool that allows estimation of incidence-DALYs due to injury, given data on incidence of deaths and non-fatal injuries. The tool includes a default set of estimation parameters, which can be replaced by users.

Development of the software tool

The tool was written in Microsoft Excel. All calculations and values can be seen and altered by users. The parameter sets currently used in the tool are based on published sources.

Using the software tool

The tool is available without charge online at http://calculator.globalburdenofinjuries.org. To use the tool with the supplied parameter sets, users need to only paste a table of population and injury case data organised by age, sex and external cause of injury into a specified location in the tool. Estimated DALYs can be read or copied from tables and figures in another part of the tool.

Conclusions

In some contexts, a simple and user-modifiable burden calculator may be preferable to undertaking a more complex study to estimate the burden of disease. The tool and the parameter sets required for its use can be improved by user innovation, by studies comparing DALYs estimates calculated in this way and in other ways, and by shared experience of its use.",2015-12-09 +26748106,EpimiRBase: a comprehensive database of microRNA-epilepsy associations.,"

Unlabelled

MicroRNAs are short non-coding RNA which function to fine-tune protein levels in all cells. This is achieved mainly by sequence-specific binding to 3' untranslated regions of target mRNA. The result is post-transcriptional interference in gene expression which reduces protein levels either by promoting destabilisation of mRNA or translational repression. Research published since 2010 shows that microRNAs are important regulators of gene expression in epilepsy. A series of microRNA profiling studies in rodent and human tissue has revealed that epilepsy is associated with wide ranging changes to microRNA levels in the brain. These are thought to influence processes including cell death, inflammation and re-wiring of neuronal networks. MicroRNAs have also been identified in the blood after injury to the brain and therefore may serve as biomarkers of epilepsy. EpimiRBase is a manually curated database for researchers interested in the role of microRNAs in epilepsy. The fully searchable database includes information on up- and down-regulated microRNAs in the brain and blood, as well as functional studies, and covers both rodent models and human epilepsy.

Availability and implementation

EpimiRBase is available at http://www.epimirbase.eu

Contact

catherinemooney@rcsi.ie.",2016-01-08 +27274534,Whole transcriptome data analysis of zebrafish mutants affecting muscle development.,"Formation of the contractile myofibril of the skeletal muscle is a complex process which when perturbed leads to muscular dystrophy. Herein, we provide a mRNAseq dataset on three different zebrafish mutants affecting muscle organization during embryogenesis. These comprise the myosin folding chaperone unc45b (unc45b-/-), heat shock protein 90aa1.1 (hsp90aa1.1-/-) and the acetylcholine esterase (ache-/-) gene. The transcriptome analysis was performed in duplicate experiments at 72 h post-fertilization (hpf) for all three mutants, with two additional times of development (24 hpf and 48 hpf) for unc45b-/-. A total of 20 samples were analyzed by hierarchical clustering for differential gene expression. The data from this study support the observation made in Etard et al. (2015) [1] (http://dx.doi.org/10.1186/s13059-015-0825-8) that a failure to fold myosin activates a unique transcriptional program in the skeletal muscles that is different from that induced in stressed muscle cells.",2016-05-12 +26861823,Predicting physiologically relevant SH3 domain mediated protein-protein interactions in yeast.,"

Motivation

Many intracellular signaling processes are mediated by interactions involving peptide recognition modules such as SH3 domains. These domains bind to small, linear protein sequence motifs which can be identified using high-throughput experimental screens such as phage display. Binding motif patterns can then be used to computationally predict protein interactions mediated by these domains. While many protein-protein interaction prediction methods exist, most do not work with peptide recognition module mediated interactions or do not consider many of the known constraints governing physiologically relevant interactions between two proteins.

Results

A novel method for predicting physiologically relevant SH3 domain-peptide mediated protein-protein interactions in S. cerevisae using phage display data is presented. Like some previous similar methods, this method uses position weight matrix models of protein linear motif preference for individual SH3 domains to scan the proteome for potential hits and then filters these hits using a range of evidence sources related to sequence-based and cellular constraints on protein interactions. The novelty of this approach is the large number of evidence sources used and the method of combination of sequence based and protein pair based evidence sources. By combining different peptide and protein features using multiple Bayesian models we are able to predict high confidence interactions with an overall accuracy of 0.97.

Availability and implementation

Domain-Motif Mediated Interaction Prediction (DoMo-Pred) command line tool and all relevant datasets are available under GNU LGPL license for download from http://www.baderlab.org/Software/DoMo-Pred The DoMo-Pred command line tool is implemented using Python 2.7 and C ++.

Contact

gary.bader@utoronto.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-09 +25297050,"Meeting new challenges: The 2014 HUPO-PSI/COSMOS Workshop: 13-15 April 2014, Frankfurt, Germany.","The Annual 2014 Spring Workshop of the Proteomics Standards Initiative (PSI) of the Human Proteome Organization (HUPO) was held this year jointly with the metabolomics COordination of Standards in MetabOlomicS (COSMOS) group. The range of existing MS standards (mzML, mzIdentML, mzQuantML, mzTab, TraML) was reviewed and updated in the light of new methodologies and advances in technologies. Adaptations to meet the needs of the metabolomics community were incorporated and a new data format for NMR, nmrML, was presented. The molecular interactions workgroup began work on a new version of the existing XML data interchange format. PSI-MI XML3.0 will enable the capture of more abstract data types such as protein complex topology derived from experimental data, allosteric binding, and dynamic interactions. Further information about the work of the HUPO-PSI can be found at http://www.psidev.info.",2014-10-09 +23757396,FSRD: fungal stress response database.,"Adaptation to different types of environmental stress is a common part of life for today's fungi. A deeper understanding of the organization, regulation and evolution of fungal stress response systems may lead to the development of novel antifungal drugs and technologies or the engineering of industrial strains with elevated stress tolerance. Here we present the Fungal Stress Response Database (http://internal.med.unideb.hu/fsrd) aimed to stimulate further research on stress biology of fungi. The database incorporates 1985 fungal stress response proteins with verified physiological function(s) and their orthologs identified and annotated in 28 species including human and plant pathogens, as well as important industrial fungi. The database will be extended continuously to cover other fully sequenced fungal species. Our database, as a starting point for future stress research, facilitates the analysis of literature data on stress and the identification of ortholog groups of stress response proteins in newly sequenced fungal genomes. Database URL: http://internal.med.unideb.hu/fsrd",2013-06-11 +26235816,Template-based identification of protein-protein interfaces using eFindSitePPI.,"Protein-protein interactions orchestrate virtually all cellular processes, therefore, their exhaustive exploration is essential for the comprehensive understanding of cellular networks. A reliable identification of interfacial residues is vital not only to infer the function of individual proteins and their assembly into biological complexes, but also to elucidate the molecular and physicochemical basis of interactions between proteins. With the exponential growth of protein sequence data, computational approaches for detecting protein interface sites have drawn an increased interest. In this communication, we discuss the major features of eFindSite(PPI), a recently developed template-based method for interface residue prediction available at http://brylinski.cct.lsu.edu/efindsiteppi. We describe the requirements and installation procedures for the stand-alone version, and explain the content and format of output data. Furthermore, the functionality of the eFindSite(PPI) web application that is designed to provide a simple and convenient access for the scientific community is presented with illustrative examples. Finally, we discuss common problems encountered in predicting protein interfaces and set forth directions for the future development of eFindSite(PPI).",2015-07-30 +27318147,Comparison of Gene Expression Patterns Between Mouse Models of Nonalcoholic Fatty Liver Disease and Liver Tissues From Patients.,"

Background & aims

Nonalcoholic fatty liver disease (NAFLD) is the most common chronic liver disorder in industrialized countries. Mouse models of NAFLD have been used in studies of pathogenesis and treatment, and have certain features of the human disease. We performed a systematic transcriptome-wide analysis of liver tissues from patients at different stages of NAFLD progression (ranging from healthy obese individuals to those with steatosis), as well as rodent models of NAFLD, to identify those that most closely resemble human disease progression in terms of gene expression patterns.

Methods

We performed a systematic evaluation of genome-wide messenger RNA expression using liver tissues collected from mice fed a standard chow diet (controls) and 9 mouse models of NAFLD: mice on a high-fat diet (with or without fructose), mice on a Western-type diet, mice on a methionine- and choline-deficient diet, mice on a high-fat diet given streptozotocin, and mice with disruption of Pten in hepatocytes. We compared gene expression patterns with those of liver tissues from 25 patients with nonalcoholic steatohepatitis (NASH), 27 patients with NAFLD, 15 healthy obese individuals, and 39 healthy nonobese individuals (controls). Liver samples were obtained from patients undergoing liver biopsy for suspected NAFLD or NASH, or during liver or bariatric surgeries. Data sets were analyzed using the limma R-package. Overlap of functional profiles was analyzed by gene set enrichment analysis profiles.

Results

We found differences between human and mouse transcriptomes to be significantly larger than differences between disease stages or models. Of the 65 genes with significantly altered expression in patients with NASH and 177 genes with significantly altered expression in patients with NAFLD, compared with controls, only 1-18 of these genes also differed significantly in expression between mouse models of NAFLD and control mice. However, expression of genes that regulate pathways associated with the development of NAFLD were altered in some mouse models (such as pathways associated with lipid metabolism). On a pathway level, gene expression patterns in livers of mice on the high-fat diet were associated more closely with human fatty liver disease than other models.

Conclusions

In comparing gene expression profiles between liver tissues from different mouse models of NAFLD and patients with different stages of NAFLD, we found very little overlap. Our data set is available for studies of pathways that contribute to the development of NASH and NAFLD and selection of the most applicable mouse models (http://www.nash-profiler.com).",2016-06-16 +25959493,obitools: a unix-inspired software package for DNA metabarcoding.,"DNA metabarcoding offers new perspectives in biodiversity research. This recently developed approach to ecosystem study relies heavily on the use of next-generation sequencing (NGS) and thus calls upon the ability to deal with huge sequence data sets. The obitools package satisfies this requirement thanks to a set of programs specifically designed for analysing NGS data in a DNA metabarcoding context. Their capacity to filter and edit sequences while taking into account taxonomic annotation helps to set up tailor-made analysis pipelines for a broad range of DNA metabarcoding applications, including biodiversity surveys or diet analyses. The obitools package is distributed as an open source software available on the following website: http://metabarcoding.org/obitools. A Galaxy wrapper is available on the GenOuest core facility toolshed: http://toolshed.genouest.org.",2015-05-26 +22669907,Cscan: finding common regulators of a set of genes by using a collection of genome-wide ChIP-seq datasets.,"The regulation of transcription of eukaryotic genes is a very complex process, which involves interactions between transcription factors (TFs) and DNA, as well as other epigenetic factors like histone modifications, DNA methylation, and so on, which nowadays can be studied and characterized with techniques like ChIP-Seq. Cscan is a web resource that includes a large collection of genome-wide ChIP-Seq experiments performed on TFs, histone modifications, RNA polymerases and others. Enriched peak regions from the ChIP-Seq experiments are crossed with the genomic coordinates of a set of input genes, to identify which of the experiments present a statistically significant number of peaks within the input genes' loci. The input can be a cluster of co-expressed genes, or any other set of genes sharing a common regulatory profile. Users can thus single out which TFs are likely to be common regulators of the genes, and their respective correlations. Also, by examining results on promoter activation, transcription, histone modifications, polymerase binding and so on, users can investigate the effect of the TFs (activation or repression of transcription) as well as of the cell or tissue specificity of the genes' regulation and expression. The web interface is free for use, and there is no login requirement. Available at: http://www.beaconlab.it/cscan.",2012-06-04 +26740458,A novel semi-supervised algorithm for the taxonomic assignment of metagenomic reads.,"

Background

Taxonomic assignment is a crucial step in a metagenomic project which aims to identify the origin of sequences in an environmental sample. Among the existing methods, since composition-based algorithms are not sufficient for classifying short reads, recent algorithms use only the feature of similarity, or similarity-based combined features. However, those algorithms suffer from the computational expense because the task of similarity search is very time-consuming. Besides, the lack of similarity information between reads and reference sequences due to the length of short reads reduces significantly the classification quality.

Results

This paper presents a novel taxonomic assignment algorithm, called SeMeta, which is based on semi-supervised learning to produce a fast and highly accurate classification of short-length reads with sufficient mutual overlap. The proposed algorithm firstly separates reads into clusters using their composition feature. It then labels the clusters with the support of an efficient filtering technique on results of the similarity search between their reads and reference databases. Furthermore, instead of performing the similarity search for all reads in the clusters, SeMeta only does for reads in their subgroups by utilizing the information of sequence overlapping. The experimental results demonstrate that SeMeta outperforms two other similarity-based algorithms on different aspects.

Conclusions

By using a semi-supervised method as well as taking the advantages of various features, the proposed algorithm is able not only to achieve high classification quality, but also to reduce much computational cost. The source codes of the algorithm can be downloaded at http://it.hcmute.edu.vn/bioinfo/metapro/SeMeta.html.",2016-01-06 +23820583,The organization of the quorum sensing luxI/R family genes in Burkholderia.,"Members of the Burkholderia genus of Proteobacteria are capable of living freely in the environment and can also colonize human, animal and plant hosts. Certain members are considered to be clinically important from both medical and veterinary perspectives and furthermore may be important modulators of the rhizosphere. Quorum sensing via N-acyl homoserine lactone signals (AHL QS) is present in almost all Burkholderia species and is thought to play important roles in lifestyle changes such as colonization and niche invasion. Here we present a census of AHL QS genes retrieved from public databases and indicate that the local arrangement (topology) of QS genes, their location within chromosomes and their gene neighborhoods show characteristic patterns that differ between the known Burkholderia clades. In sequence phylogenies, AHL QS genes seem to cluster according to the local gene topology rather than according to the species, which suggests that the basic topology types were present prior to the appearance of current Burkholderia species. The data are available at http://net.icgeb.org/burkholderia/.",2013-07-02 +26286809,svviz: a read viewer for validating structural variants.,"

Unlabelled

Visualizing read alignments is the most effective way to validate candidate structural variants (SVs) with existing data. We present svviz, a sequencing read visualizer for SVs that sorts and displays only reads relevant to a candidate SV. svviz works by searching input bam(s) for potentially relevant reads, realigning them against the inferred sequence of the putative variant allele as well as the reference allele and identifying reads that match one allele better than the other. Separate views of the two alleles are then displayed in a scrollable web browser view, enabling a more intuitive visualization of each allele, compared with the single reference genome-based view common to most current read browsers. The browser view facilitates examining the evidence for or against a putative variant, estimating zygosity, visualizing affected genomic annotations and manual refinement of breakpoints. svviz supports data from most modern sequencing platforms.

Availability and implementation

svviz is implemented in python and freely available from http://svviz.github.io/.",2015-08-18 +25995230,Trans-species learning of cellular signaling systems with bimodal deep belief networks.,"

Motivation

Model organisms play critical roles in biomedical research of human diseases and drug development. An imperative task is to translate information/knowledge acquired from model organisms to humans. In this study, we address a trans-species learning problem: predicting human cell responses to diverse stimuli, based on the responses of rat cells treated with the same stimuli.

Results

We hypothesized that rat and human cells share a common signal-encoding mechanism but employ different proteins to transmit signals, and we developed a bimodal deep belief network and a semi-restricted bimodal deep belief network to represent the common encoding mechanism and perform trans-species learning. These 'deep learning' models include hierarchically organized latent variables capable of capturing the statistical structures in the observed proteomic data in a distributed fashion. The results show that the models significantly outperform two current state-of-the-art classification algorithms. Our study demonstrated the potential of using deep hierarchical models to simulate cellular signaling systems.

Availability and implementation

The software is available at the following URL: http://pubreview.dbmi.pitt.edu/TransSpeciesDeepLearning/. The data are available through SBV IMPROVER website, https://www.sbvimprover.com/challenge-2/overview, upon publication of the report by the organizers.

Contact

xinghua@pitt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-20 +26731790,Childhood Exposure to Ambient Air Pollutants and the Onset of Asthma: An Administrative Cohort Study in Québec.,"

Background

Although it is well established that air pollutants can exacerbate asthma, the link with new asthma onset in children is less clear.

Objective

We assessed the association between the onset of childhood asthma with both time of birth and time-varying exposures to outdoor air pollutants.

Method

An open cohort of children born in the province of Québec, Canada, was created using linked medical-administrative databases. New cases of asthma were defined as one hospital discharge with a diagnosis of asthma or two physician claims for asthma within a 2 year period. Annual ozone (O3) levels were estimated at the child's residence for all births 1999-2010, and nitrogen dioxide (NO2) levels during 1996-2006 were estimated for births on the Montreal Island. Satellite based concentrations of fine particles (PM2.5) were estimated at a 10 km × 10 km resolution and assigned to residential postal codes throughout the province (1996-2011). Hazard ratios (HRs) were assessed with Cox models for the exposure at the birth address and for the time-dependent exposure. We performed an indirect adjustment for secondhand smoke (SHS).

Results

We followed 1,183,865 children (7,752,083 person-years), of whom 162,752 became asthmatic. After controlling for sex and material and social deprivation, HRs for an interquartile range increase in exposure at the birth address to NO2 (5.45 ppb), O3 (3.22 ppb), and PM2.5 (6.50 μg/m3) were 1.04 (95% CI: 1.02, 1.05), 1.11 (95% CI: 1.10, 1.12), and 1.31 (95% CI: 1.28, 1.33), respectively. Effects of O3 and PM2.5 estimated with time-varying Cox models were similar to those estimated using exposure at birth, whereas the effect of NO2 was slightly stronger (HR = 1.07; 95% CI: 1.05, 1.09).

Conclusions

Asthma onset in children appears to be associated with residential exposure to PM2.5, O3 and NO2.

Citation

Tétreault LF, Doucet M, Gamache P, Fournier M, Brand A, Kosatsky T, Smargiassi A. 2016. Childhood exposure to ambient air pollutants and the onset of asthma: an administrative cohort study in Québec. Environ Health Perspect 124:1276-1282; http://dx.doi.org/10.1289/ehp.1509838.",2016-01-05 +25973533,NGS-based approach to determine the presence of HPV and their sites of integration in human cancer genome.,"

Background

Human papilloma virus (HPV) accounts for the most common cause of all virus-associated human cancers. Here, we describe the first graphic user interface (GUI)-based automated tool 'HPVDetector', for non-computational biologists, exclusively for detection and annotation of the HPV genome based on next-generation sequencing data sets.

Methods

We developed a custom-made reference genome that comprises of human chromosomes along with annotated genome of 143 HPV types as pseudochromosomes. The tool runs on a dual mode as defined by the user: a 'quick mode' to identify presence of HPV types and an 'integration mode' to determine genomic location for the site of integration. The input data can be a paired-end whole-exome, whole-genome or whole-transcriptome data set. The HPVDetector is available in public domain for download: http://www.actrec.gov.in/pi-webpages/AmitDutt/HPVdetector/HPVDetector.html.

Results

On the basis of our evaluation of 116 whole-exome, 23 whole-transcriptome and 2 whole-genome data, we were able to identify presence of HPV in 20 exomes and 4 transcriptomes of cervical and head and neck cancer tumour samples. Using the inbuilt annotation module of HPVDetector, we found predominant integration of viral gene E7, a known oncogene, at known 17q21, 3q27, 7q35, Xq28 and novel sites of integration in the human genome. Furthermore, co-infection with high-risk HPVs such as 16 and 31 were found to be mutually exclusive compared with low-risk HPV71.

Conclusions

HPVDetector is a simple yet precise and robust tool for detecting HPV from tumour samples using variety of next-generation sequencing platforms including whole genome, whole exome and transcriptome. Two different modes (quick detection and integration mode) along with a GUI widen the usability of HPVDetector for biologists and clinicians with minimal computational knowledge.",2015-05-14 +,The Switchgrass Genome: Tools and Strategies,"Switchgrass (Panicum virgatum L.) is a perennial grass species receiving significant focus as a potential bioenergy crop. In the last 5 yr the switchgrass research community has produced a genetic linkage map, an expressed sequence tag (EST) database, a set of single nucleotide polymorphism (SNP) markers that are distributed across the 18 linkage groups, 4x sampling of the P. virgatum AP13 genome in 400-bp reads, and bacterial artificial chromosome (BAC) libraries containing over 200,000 clones. These studies have revealed close collinearity of the switchgrass genome with those of sorghum [Sorghum bicolor (L.) Moench], rice (Oryza sativa L.), and Brachypodium distachyon (L.) P. Beauv. Switchgrass researchers have also developed several microarray technologies for gene expression studies. Switchgrass genomic resources will accelerate the ability of plant breeders to enhance productivity, pest resistance, and nutritional quality. Because switchgrass is a relative newcomer to the genomics world, many secrets of the switchgrass genome have yet to be revealed. To continue to efficiently explore basic and applied topics in switchgrass, it will be critical to capture and exploit the knowledge of plant geneticists and breeders on the next logical steps in the development and utilization of genomic resources for this species. To this end, the community has established a switchgrass genomics executive committee and work group (http://switchgrassgenomics.org/ [verified 28 Oct. 2011]).",2011-01-01 +26772592,3DIANA: 3D Domain Interaction Analysis: A Toolbox for Quaternary Structure Modeling.,"Electron microscopy (EM) is experiencing a revolution with the advent of a new generation of Direct Electron Detectors, enabling a broad range of large and flexible structures to be resolved well below 1 nm resolution. Although EM techniques are evolving to the point of directly obtaining structural data at near-atomic resolution, for many molecules the attainable resolution might not be enough to propose high-resolution structural models. However, accessing information on atomic coordinates is a necessary step toward a deeper understanding of the molecular mechanisms that allow proteins to perform specific tasks. For that reason, methods for the integration of EM three-dimensional maps with x-ray and NMR structural data are being developed, a modeling task that is normally referred to as fitting, resulting in the so called hybrid models. In this work, we present a novel application-3DIANA-specially targeted to those cases in which the EM map resolution is medium or low and additional experimental structural information is scarce or even lacking. In this way, 3DIANA statistically evaluates proposed/potential contacts between protein domains, presents a complete catalog of both structurally resolved and predicted interacting regions involving these domains and, finally, suggests structural templates to model the interaction between them. The evaluation of the proposed interactions is computed with DIMERO, a new method that scores physical binding sites based on the topology of protein interaction networks, which has recently shown the capability to increase by 200% the number of domain-domain interactions predicted in interactomes as compared to previous approaches. The new application displays the information at a sequence and structural level and is accessible through a web browser or as a Chimera plugin at http://3diana.cnb.csic.es.",2016-01-07 +26282778,Enhancing Medical Decision-Making Evaluations: Introduction of Normative Data for the Capacity to Consent to Treatment Instrument.,"A number of measures have been developed to assess medical decision-making capacity (MDC) in adults. However, their clinical utility is limited by a lack of available normative data. In the current study, we introduce age-independent and age-adjusted normative data for a measure of MDC: the Capacity to Consent to Treatment Instrument. The sample consisted of 308 cognitively normal, community-dwelling adults ranging in age from 19 to 86 years. For age-adjusted norms, individual raw scores were first converted to age-corrected scaled scores based on position within a cumulative frequency distribution and then grouped according to empirically supported age ranges. For age-independent norms, the same method was utilized but without age-corrections being applied or participants being grouped into age ranges. This study has the potential to enhance MDC evaluations by allowing clinicians to compare a patient's performance on the Capacity to Consent to Treatment Instrument with that of adults regardless of age as well as to same age peers. Tables containing normative corrections are supplementary material available online at http://asm.sagepub.com/supplemental.",2015-08-17 +22661982,Soybean Proteome Database 2012: update on the comprehensive data repository for soybean proteomics.,"The Soybean Proteome Database (SPD) was created to provide a data repository for functional analyses of soybean responses to flooding stress, thought to be a major constraint for establishment and production of this plant. Since the last publication of the SPD, we thoroughly enhanced the contents of database, particularly protein samples and their annotations from several organelles. The current release contains 23 reference maps of soybean (Glycine max cv. Enrei) proteins collected from several organs, tissues, and organelles including the maps for plasma membrane, cell wall, chloroplast, and mitochondrion, which were analyzed by two-dimensional polyacrylamide gels. Furthermore, the proteins analyzed with gel-free proteomics technique have been added and are available online. In addition to protein fluctuations under flooding, those of salt and drought stress have been included in the current release. A case analysis employing a portion of those newly released data was conducted, and the results will be shown. An 'omics table has also been provided to reveal relationships among mRNAs, proteins, and metabolites with a unified temporal-profile tag in order to facilitate retrieval of the data based on the temporal profiles. An intuitive user interface based on dynamic HTML enables users to browse the network as well as the profiles of the multiple ""omes"" in an integrated fashion. The SPD is available at: http://proteome.dc.affrc.go.jp/Soybean/",2012-05-30 +23283513,EuLoc: a web-server for accurately predict protein subcellular localization in eukaryotes by incorporating various features of sequence segments into the general form of Chou's PseAAC.,"The function of a protein is generally related to its subcellular localization. Therefore, knowing its subcellular localization is helpful in understanding its potential functions and roles in biological processes. This work develops a hybrid method for computationally predicting the subcellular localization of eukaryotic protein. The method is called EuLoc and incorporates the Hidden Markov Model (HMM) method, homology search approach and the support vector machines (SVM) method by fusing several new features into Chou's pseudo-amino acid composition. The proposed SVM module overcomes the shortcoming of the homology search approach in predicting the subcellular localization of a protein which only finds low-homologous or non-homologous sequences in a protein subcellular localization annotated database. The proposed HMM modules overcome the shortcoming of SVM in predicting subcellular localizations using few data on protein sequences. Several features of a protein sequence are considered, including the sequence-based features, the biological features derived from PROSITE, NLSdb and Pfam, the post-transcriptional modification features and others. The overall accuracy and location accuracy of EuLoc are 90.5 and 91.2 %, respectively, revealing a better predictive performance than obtained elsewhere. Although the amounts of data of the various subcellular location groups in benchmark dataset differ markedly, the accuracies of 12 subcellular localizations of EuLoc range from 82.5 to 100 %, indicating that this tool is much more balanced than other tools. EuLoc offers a high, balanced predictive power for each subcellular localization. EuLoc is now available on the web at http://euloc.mbc.nctu.edu.tw/.",2013-01-03 +23143270,The ConsensusPathDB interaction database: 2013 update.,"Knowledge of the various interactions between molecules in the cell is crucial for understanding cellular processes in health and disease. Currently available interaction databases, being largely complementary to each other, must be integrated to obtain a comprehensive global map of the different types of interactions. We have previously reported the development of an integrative interaction database called ConsensusPathDB (http://ConsensusPathDB.org) that aims to fulfill this task. In this update article, we report its significant progress in terms of interaction content and web interface tools. ConsensusPathDB has grown mainly due to the integration of 12 further databases; it now contains 215 541 unique interactions and 4601 pathways from overall 30 databases. Binary protein interactions are scored with our confidence assessment tool, IntScore. The ConsensusPathDB web interface allows users to take advantage of these integrated interaction and pathway data in different contexts. Recent developments include pathway analysis of metabolite lists, visualization of functional gene/metabolite sets as overlap graphs, gene set analysis based on protein complexes and induced network modules analysis that connects a list of genes through various interaction types. To facilitate the interactive, visual interpretation of interaction and pathway data, we have re-implemented the graph visualization feature of ConsensusPathDB using the Cytoscape.js library.",2012-11-11 +26742457,The effects of air pollution and weather conditions on the incidence of acute myocardial infarction.,"

Objective

In this retrospective study, we investigated the association between air pollution and weather conditions with the incidence of acute myocardial infarction (AMI) in the city of Kutahya.

Methods

A total of 402 patients who were admitted with acute ST segment elevation MI and non-ST segment elevation MI were included in the study in 1 year. Daily maximum, minimum, and mean ambient temperature and mean barometric pressure data were obtained from the Kutahya Meteorology Department. Daily air pollution data were obtained from the Web site of National Air Quality Observation Network (http://www.havaizleme.gov.tr).

Results

Increase in ambient air temperature in the day of MI and 2 days before the day of MI according to their control days was correlated with increase in number of MI cases. When we grouped the patients according to ages as 30-54, 55-65, and >65 years, we found that there was a relation between sulfur dioxide (SO2) and the occurrence of AMI for the age group of 30-54 for the same day (D0) (P<.017). The number of AMIs was the lowest in fall season, whereas the number of AMIs was the highest in winter season.

Conclusion

There was no statistically significant association between the particulates with diameter b=10 μm, SO2 concentrations, air pressure, and the risk of AMI, but there was statistically significant relation between occurrence of MI and SO2 for the patients under age of 55 years. The number of AMIs was the lowest in fall season, whereas the number of AMIs was the highest in winter season.",2015-12-04 +23203875,TCMID: Traditional Chinese Medicine integrative database for herb molecular mechanism analysis.,"As an alternative to modern western medicine, Traditional Chinese Medicine (TCM) is receiving increasingly attention worldwide. Great efforts have been paid to TCM's modernization, which tries to bridge the gap between TCM and modern western medicine. As TCM and modern western medicine share a common aspect at molecular level that the compound(s) perturb human's dysfunction network and restore human normal physiological condition, the relationship between compounds (in herb, refer to ingredients) and their targets (proteins) should be the key factor to connect TCM and modern medicine. Accordingly, we construct this Traditional Chinese Medicine Integrated Database (TCMID, http://www.megabionet.org/tcmid/), which records TCM-related information collected from different resources and through text-mining method. To enlarge the scope of the TCMID, the data have been linked to common drug and disease databases, including Drugbank, OMIM and PubChem. Currently, our TCMID contains ∼47 000 prescriptions, 8159 herbs, 25 210 compounds, 6828 drugs, 3791 diseases and 17 521 related targets, which is the largest data set for related field. Our web-based software displays a network for integrative relationships between herbs and their treated diseases, the active ingredients and their targets, which will facilitate the study of combination therapy and understanding of the underlying mechanisms for TCM at molecular level.",2012-11-29 +25494997,Masking as an effective quality control method for next-generation sequencing data analysis.,"

Background

Next generation sequencing produces base calls with low quality scores that can affect the accuracy of identifying simple nucleotide variation calls, including single nucleotide polymorphisms and small insertions and deletions. Here we compare the effectiveness of two data preprocessing methods, masking and trimming, and the accuracy of simple nucleotide variation calls on whole-genome sequence data from Caenorhabditis elegans. Masking substitutes low quality base calls with 'N's (undetermined bases), whereas trimming removes low quality bases that results in a shorter read lengths.

Results

We demonstrate that masking is more effective than trimming in reducing the false-positive rate in single nucleotide polymorphism (SNP) calling. However, both of the preprocessing methods did not affect the false-negative rate in SNP calling with statistical significance compared to the data analysis without preprocessing. False-positive rate and false-negative rate for small insertions and deletions did not show differences between masking and trimming.

Conclusions

We recommend masking over trimming as a more effective preprocessing method for next generation sequencing data analysis since masking reduces the false-positive rate in SNP calling without sacrificing the false-negative rate although trimming is more commonly used currently in the field. The perl script for masking is available at http://code.google.com/p/subn/. The sequencing data used in the study were deposited in the Sequence Read Archive (SRX450968 and SRX451773).",2014-12-13 +23193267,The Protist Ribosomal Reference database (PR2): a catalog of unicellular eukaryote small sub-unit rRNA sequences with curated taxonomy.,"The interrogation of genetic markers in environmental meta-barcoding studies is currently seriously hindered by the lack of taxonomically curated reference data sets for the targeted genes. The Protist Ribosomal Reference database (PR(2), http://ssu-rrna.org/) provides a unique access to eukaryotic small sub-unit (SSU) ribosomal RNA and DNA sequences, with curated taxonomy. The database mainly consists of nuclear-encoded protistan sequences. However, metazoans, land plants, macrosporic fungi and eukaryotic organelles (mitochondrion, plastid and others) are also included because they are useful for the analysis of high-troughput sequencing data sets. Introns and putative chimeric sequences have been also carefully checked. Taxonomic assignation of sequences consists of eight unique taxonomic fields. In total, 136 866 sequences are nuclear encoded, 45 708 (36 501 mitochondrial and 9657 chloroplastic) are from organelles, the remaining being putative chimeric sequences. The website allows the users to download sequences from the entire and partial databases (including representative sequences after clustering at a given level of similarity). Different web tools also allow searches by sequence similarity. The presence of both rRNA and rDNA sequences, taking into account introns (crucial for eukaryotic sequences), a normalized eight terms ranked-taxonomy and updates of new GenBank releases were made possible by a long-term collaboration between experts in taxonomy and computer scientists.",2012-11-27 +23946503,omiRas: a Web server for differential expression analysis of miRNAs derived from small RNA-Seq data.,"

Summary

Small RNA deep sequencing is widely used to characterize non-coding RNAs (ncRNAs) differentially expressed between two conditions, e.g. healthy and diseased individuals and to reveal insights into molecular mechanisms underlying condition-specific phenotypic traits. The ncRNAome is composed of a multitude of RNAs, such as transfer RNA, small nucleolar RNA and microRNA (miRNA), to name few. Here we present omiRas, a Web server for the annotation, comparison and visualization of interaction networks of ncRNAs derived from next-generation sequencing experiments of two different conditions. The Web tool allows the user to submit raw sequencing data and results are presented as: (i) static annotation results including length distribution, mapping statistics, alignments and quantification tables for each library as well as lists of differentially expressed ncRNAs between conditions and (ii) an interactive network visualization of user-selected miRNAs and their target genes based on the combination of several miRNA-mRNA interaction databases.

Availability and implementation

The omiRas Web server is implemented in Python, PostgreSQL, R and can be accessed at: http://tools.genxpro.net/omiras/.",2013-08-13 +24358291,"Overweight people have low levels of implicit weight bias, but overweight nations have high levels of implicit weight bias.","Although a greater degree of personal obesity is associated with weaker negativity toward overweight people on both explicit (i.e., self-report) and implicit (i.e., indirect behavioral) measures, overweight people still prefer thin people on average. We investigated whether the national and cultural context - particularly the national prevalence of obesity - predicts attitudes toward overweight people independent of personal identity and weight status. Data were collected from a total sample of 338,121 citizens from 71 nations in 22 different languages on the Project Implicit website (https://implicit.harvard.edu/) between May 2006 and October 2010. We investigated the relationship of the explicit and implicit weight bias with the obesity both at the individual (i.e., across individuals) and national (i.e., across nations) level. Explicit weight bias was assessed with self-reported preference between overweight and thin people; implicit weight bias was measured with the Implicit Association Test (IAT). The national estimates of explicit and implicit weight bias were obtained by averaging the individual scores for each nation. Obesity at the individual level was defined as Body Mass Index (BMI) scores, whereas obesity at the national level was defined as three national weight indicators (national BMI, national percentage of overweight and underweight people) obtained from publicly available databases. Across individuals, greater degree of obesity was associated with weaker implicit negativity toward overweight people compared to thin people. Across nations, in contrast, a greater degree of national obesity was associated with stronger implicit negativity toward overweight people compared to thin people. This result indicates a different relationship between obesity and implicit weight bias at the individual and national levels.",2013-12-17 +23209799,The duplicated genes database: identification and functional annotation of co-localised duplicated genes across genomes.,"

Background

There has been a surge in studies linking genome structure and gene expression, with special focus on duplicated genes. Although initially duplicated from the same sequence, duplicated genes can diverge strongly over evolution and take on different functions or regulated expression. However, information on the function and expression of duplicated genes remains sparse. Identifying groups of duplicated genes in different genomes and characterizing their expression and function would therefore be of great interest to the research community. The 'Duplicated Genes Database' (DGD) was developed for this purpose.

Methodology

Nine species were included in the DGD. For each species, BLAST analyses were conducted on peptide sequences corresponding to the genes mapped on a same chromosome. Groups of duplicated genes were defined based on these pairwise BLAST comparisons and the genomic location of the genes. For each group, Pearson correlations between gene expression data and semantic similarities between functional GO annotations were also computed when the relevant information was available.

Conclusions

The Duplicated Gene Database provides a list of co-localised and duplicated genes for several species with the available gene co-expression level and semantic similarity value of functional annotation. Adding these data to the groups of duplicated genes provides biological information that can prove useful to gene expression analyses. The Duplicated Gene Database can be freely accessed through the DGD website at http://dgd.genouest.org.",2012-11-28 +26842355,"ReX: A suite of computational tools for the design, visualization, and analysis of chimeric protein libraries.","Directed evolution has greatly facilitated protein engineering and provided new insights into protein structure-function relationships. DNA shuffling using restriction enzymes is a particularly simple and cost-effective means of recombinatorial evolution that is well within the capability of most molecular biologists, but tools for the design and analysis of such experiments are limited. Here we introduce a suite of freely available online tools to make the construction and analysis of chimeric libraries readily accessible to the novice. REcut (http://qpmf.rx.umaryland.edu/REcut.html) facilitates the choice of DNA fragmentation strategy, while Xover (http://qpmf.rx.umaryland.edu/Xover.html) analyzes chimeric mutants to reveal recombination patterns and extract quantitative data.",2016-02-01 +25963975,Species Tree Inference Using a Mixture Model.,"Species tree reconstruction has been a subject of substantial research due to its central role across biology and medicine. A species tree is often reconstructed using a set of gene trees or by directly using sequence data. In either of these cases, one of the main confounding phenomena is the discordance between a species tree and a gene tree due to evolutionary events such as duplications and losses. Probabilistic methods can resolve the discordance by coestimating gene trees and the species tree but this approach poses a scalability problem for larger data sets. We present MixTreEM-DLRS: A two-phase approach for reconstructing a species tree in the presence of gene duplications and losses. In the first phase, MixTreEM, a novel structural expectation maximization algorithm based on a mixture model is used to reconstruct a set of candidate species trees, given sequence data for monocopy gene families from the genomes under study. In the second phase, PrIME-DLRS, a method based on the DLRS model (Åkerborg O, Sennblad B, Arvestad L, Lagergren J. 2009. Simultaneous Bayesian gene tree reconstruction and reconciliation analysis. Proc Natl Acad Sci U S A. 106(14):5714-5719), is used for selecting the best species tree. PrIME-DLRS can handle multicopy gene families since DLRS, apart from modeling sequence evolution, models gene duplication and loss using a gene evolution model (Arvestad L, Lagergren J, Sennblad B. 2009. The gene evolution model and computing its associated probabilities. J ACM. 56(2):1-44). We evaluate MixTreEM-DLRS using synthetic and biological data, and compare its performance with a recent genome-scale species tree reconstruction method PHYLDOG (Boussau B, Szöllősi GJ, Duret L, Gouy M, Tannier E, Daubin V. 2013. Genome-scale coestimation of species and gene trees. Genome Res. 23(2):323-330) as well as with a fast parsimony-based algorithm Duptree (Wehe A, Bansal MS, Burleigh JG, Eulenstein O. 2008. Duptree: a program for large-scale phylogenetic analyses using gene tree parsimony. Bioinformatics 24(13):1540-1541). Our method is competitive with PHYLDOG in terms of accuracy and runs significantly faster and our method outperforms Duptree in accuracy. The analysis constituted by MixTreEM without DLRS may also be used for selecting the target species tree, yielding a fast and yet accurate algorithm for larger data sets. MixTreEM is freely available at http://prime.scilifelab.se/mixtreem/.",2015-05-11 +27665600,Modeling Small Noncanonical RNA Motifs with the Rosetta FARFAR Server.,"Noncanonical RNA motifs help define the vast complexity of RNA structure and function, and in many cases, these loops and junctions are on the order of only ten nucleotides in size. Unfortunately, despite their small size, there is no reliable method to determine the ensemble of lowest energy structures of junctions and loops at atomic accuracy. This chapter outlines straightforward protocols using a webserver for Rosetta Fragment Assembly of RNA with Full Atom Refinement (FARFAR) ( http://rosie.rosettacommons.org/rna_denovo/submit ) to model the 3D structure of small noncanonical RNA motifs for use in visualizing motifs and for further refinement or filtering with experimental data such as NMR chemical shifts.",2016-01-01 +22473445,In the clinic. Eating disorders.,"This issue provides a clinical overview of eating disorders focusing on prevention, diagnosis, treatment, practice improvement, and patient information. Readers can complete the accompanying CME quiz for 1.5 credits. Only ACP members and individual subscribers can access the electronic features of In the Clinic. Non-subscribers who wish to access this issue of In the Clinic can elect ""Pay for View."" Subscribers can receive 1.5 category 1 CME credits by completing the CME quiz that accompanies this issue of In the Clinic. The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians' Information and Education Resource) and MKSAP (Medical Knowledge and Self Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP's Medical Education and Publishing division and with assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult www.acponline.org, http://pier.acponline.org, and other resources referenced within each issue of In the Clinic.",2012-04-01 +26661785,A Sequence-Based Dynamic Ensemble Learning System for Protein Ligand-Binding Site Prediction.,"

Background

Proteins have the fundamental ability to selectively bind to other molecules and perform specific functions through such interactions, such as protein-ligand binding. Accurate prediction of protein residues that physically bind to ligands is important for drug design and protein docking studies. Most of the successful protein-ligand binding predictions were based on known structures. However, structural information is not largely available in practice due to the huge gap between the number of known protein sequences and that of experimentally solved structures.

Results

This paper proposes a dynamic ensemble approach to identify protein-ligand binding residues by using sequence information only. To avoid problems resulting from highly imbalanced samples between the ligand-binding sites and non ligand-binding sites, we constructed several balanced data sets and we trained a random forest classifier for each of them. We dynamically selected a subset of classifiers according to the similarity between the target protein and the proteins in the training data set. The combination of the predictions of the classifier subset to each query protein target yielded the final predictions. The ensemble of these classifiers formed a sequence-based predictor to identify protein-ligand binding sites.

Conclusions

Experimental results on two Critical Assessment of protein Structure Prediction datasets and the ccPDB dataset demonstrated that of our proposed method compared favorably with the state-of-the-art.

Availability

http://www2.ahu.edu.cn/pchen/web/LigandDSES.htm.",2015-12-03 +27165405,Insights into Antimicrobial Peptides from Spiders and Scorpions.,"The venoms of spiders and scorpions contain a variety of chemical compounds. Antimicrobial peptides (AMPs) from these organisms were first discovered in the 1990s. As of May 2015, there were 42 spider's and 63 scorpion's AMPs in the Antimicrobial Peptide Database (http://aps.unmc.edu/AP). These peptides have demonstrated broad or narrow-spectrum activities against bacteria, fungi, viruses, and parasites. In addition, they can be toxic to cancer cells, insects and erythrocytes. To provide insight into such an activity spectrum, this article discusses the discovery, classification, structure and activity relationships, bioinformatics analysis, and potential applications of spider and scorpion AMPs. Our analysis reveals that, in the case of linear peptides, spiders use both glycine-rich and helical peptide models for defense, whereas scorpions use two distinct helical peptide models with different amino acid compositions to exert the observed antimicrobial activities and hemolytic toxicity. Our structural bioinformatics study improves the knowledge in the field and can be used to design more selective peptides to combat tumors, parasites, and viruses.",2016-01-01 +26828034,AmphibiaChina: an online database of Chinese Amphibians.,"AmphibiaChina, an open-access, web-based database, is designed to provide comprehensive and up-to-date information on Chinese amphibians. It offers an integrated module with six major sections. Compared to other known databases including AmphibiaWeb and Amphibian Species of the World, AmphibiaChina has the following new functions: (1) online species identification based on DNA barcode sequences; (2) comparisons and discussions of different major taxonomic systems; and (3) phylogenetic progress on Chinese amphibians. This database offers a window for the world to access available information of Chinese amphibians. AmphibiaChina with its Chinese version can be accessed at http://www.amphibiachina.org.",2016-01-01 +27634207,Anti-inflammatory Action of Green Tea.,"

Background

Green tea has been shown to have beneficial effects against a variety of diseases such as cancer, obesity, diabetes, cardiovascular disease, and neurodegenerative diseases. Through cellular, animal, and human experiments, green tea and its major component, epigallocatechin-3-gallate (EGCG) have been demonstrated to have anti-inflammatory effects. Our previous findings have indicated that green tea and EGCG suppress the gene and/or protein expression of inflammatory cytokines and inflammation-related enzymes.

Methods

Using bibliographic databases, particularly PubMed (provided by the http://www.ncbi.nlm.nih.gov/pubmed, US National Library of Medicine, National Institutes of Health, United States), we examined the potential usefulness of green tea/EGCG for the prevention and treatment of inflammatory diseases in human clinical and epidemiological studies. We also reviewed results from cellular and animal experiments and proposed action mechanisms.

Results

Most of the results from the human studies indicated the beneficial effects of green tea and tea catechins against inflammatory diseases. The cellular and animal studies also provided evidence for the favorable effects of green tea/EGCG. These results are compatible with our previous findings and can be largely explained by a mechanism wherein green tea/EGCG acts as an antioxidant to scavenge reactive oxygen species, leading to attenuation of nuclear factor-κB activity.

Conclusion

Since green tea and EGCG have multiple targets and act in a pleiotropic manner, we may consider their usage to improve the quality of life in patients with inflammatory disease. Green tea and EGCG have beneficial health effects and no severe adverse effects; however, care should be taken to avoid overdosage, which may induce deleterious effects including hepatic injury.",2016-01-01 +26415912,Mesenchymal Stem Cells and Cell Therapy for Bone Repair.,"Mesenchymal stem cells (MSCs) represent a new therapeutic paradigm for a number of diseases because they possess unique biological characteristics such as multipotency, immunomodulation and production of cytokines. Currently, 425 MSC based clinical trials have been conducted for at least 12 kinds of pathological conditions, with many completed trials demonstrating the safety and efficacy of MSCs. Here, we provide an overview of the clinical status of MSCs by searching the public clinical trials database http://clinicaltrials.gov. Particularly, the role of MSCs in clinical trials to treat bone defects and injuries is highlighted.",2016-01-01 +27577426,Implementation of an Execution Engine for SNOMED CT Expression Constraint Language.,"The need to achieve high levels of semantic interoperability in the health domain is regarded as a crucial issue. Nowadays, one of the weaknesses when working in this direction is the lack of a coordinated use of information and terminological models to define the meaning and content of clinical data. IHTSDO is aware of this problem and has recently developed the SNOMED CT Expression Constraint Language to specify subsets of concepts. In this paper, we describe an implementation of an execution engine of this language. Our final objective is to allow advanced terminological binding between archetypes and SNOMED CT as a fundamental pillar to get semantically interoperable systems. The execution engine is available at http://snquery.veratech.es.",2016-01-01 +27007584,Systematic Reviews Published in the October 2015 Issue of the Cochrane Library.,"The Cochrane Library of Systematic Reviews is published quarterly as a DVD and monthly online (http://www.thecochranelibrary.com). The October 2015 issue (fourth DVD for 2015) contains 6622 complete reviews, 2429 protocols for reviews in production, and 36,600 short summaries of systematic reviews published in the general medical literature (this short summary database is no longer being updated). In addition, there are citations of 848,000 randomized controlled trials, and 15,700 cited papers in the Cochrane Methodology Register. The Health Technology Assessment database contains some 15,000 citations. One hundred and nine new reviews have been published in the previous 3 months, of which six have potential relevance for practitioners in pain and palliative medicine. The impact factor of the Cochrane Library stands at 5.939. Readers are encouraged to access the full report for any articles of interest as only a brief commentary is provided.",2016-01-01 +25861770,MetaMirClust: Discovery and Exploration of Evolutionarily Conserved miRNA Clusters.,"Recent emerging studies suggest that a substantial fraction of microRNA (miRNA) genes is likely to form clusters in terms of evolutionary conservation and biological implications, posing a significant challenge for the research community and shifting the bottleneck of scientific discovery from miRNA singletons to miRNA clusters. In addition, the advance in molecular sequencing technique such as next-generation sequencing (NGS) has facilitated researchers to comprehensively characterize miRNAs with low abundance on genome-wide scale in multiple species. Taken together, a large scale, cross-species survey of grouped miRNAs based on genomic location would be valuable for investigating their biological functions and regulations in an evolutionary perspective. In the present chapter, we describe the application of effective and efficient bioinformatics tools on the identification of clustered miRNAs and illustrate how to use the recently developed Web-based database, MetaMirClust (http://fgfr.ibms.sinic.aedu.tw/MetaMirClust) to discover evolutionarily conserved pattern of miRNA clusters across metazoans.",2016-01-01 +27644652,Sex Steroids Regulate Expression of Genes Containing Long Interspersed Elements-1s in Breast Cancer Cells.,"Long interspersed elements-1s (LINE-1s) are dispersed all over the human genome. There is evidence that hypomethylation of LINE-1s and levels of sex steroids regulate gene expression leading to cancer development. Here, we compared mRNA levels of genes containing an intragenic LINE-1 in breast cancer cells treated with various sex steroids from Gene Expression Omnibus (GEO), with the gene expression database using chi-square analysis (http://www.ncbi.nlm.nih.gov/geo). We evaluated whether sex steroids influence expression of genes containing an intragenic LINE-1. Three sex steroids at various concentrations, 1 and 10 nM estradiol (E2), 10 nM progesterone (PG) and 10 nM androgen (AN), were assessed. In breast cancer cells treated with 1 or 10 nM E2, a significant percentage of genes containing an intragenic LINE-1 were down-regulated. A highly significant percentage of E2-regulated genes containing an intragenic LINE-1 was down-regulated in cells treated with 1 nM E2 for 3 hours (<3.70E-25; OR=1.91; 95% CI=2.16-1.69). Similarly, high percentages of PG or AN- regulated genes containing an intragenic LINE-1 were also down-regulated in cells treated with 10 nM PG or 10 nM AN for 16 hr (p=9.53E-06; OR=1.65; 95% CI=2.06-1.32 and p=3.81E-14; OR=2.01; 95% CI=2.42-1.67). Interestingly, a significant percentage of AN-regulated genes containing an intragenic LINE-1 was up-regulated in cells treated with 10 nM AN for 16 hr (p=4.03E-02; OR=1.40; 95% CI=1.95-1.01). These findings suggest that intragenic LINE-1s may play roles in sex steroid mediated gene expression in breast cancer cells, which could have significant implications for the development and progression of sex steroid-dependent cancers.",2016-01-01 +25219235,[WEB-based medical data mining integration].,"An integration of medical data management system based on WEB and data mining tool is reportedly in this paper. In the application process of this system, web-based medical data mining user sends requests to the server by using client browser with http protocol, the commands are then received by the server and the server calls the data mining tools remote object for data processing, and the results are sent back to the customer browser through the http protocol and presented to the user. In order to prove the feasibility of the proposed solution, the test is done under the NET platform by using SAS and SPSS, and the detail steps are given. By the practical test, it was proved that the web-based data mining tool integration solutions proposed in this paper would have its broad prospects for development, which would open up a new route to the development of medical data mining.",2014-06-01 +28259404,Genetic and functional analysis of the bovine uterine microbiota. Part I: Metritis versus healthy cows.,"Metritis is a uterine disease that affects 10 to 30% of all lactating dairy cows and has detrimental effects on reproductive performance, milk production, and survival. Data regarding the identity and abundance of bacterial genes governing traits such as virulence, antibiotic resistance, and stress responses could enable identification of previously unknown agents that play a role in metritis pathogenesis. Moreover, such knowledge could lead to the development of improved treatments or preventive methods. Therefore, the objectives of this study were to characterize the uterine microbial population and to differentiate, for the first time, the microbial functional diversity in cows with metritis versus healthy cows. In addition, we aimed to identify relationships between microbial genes and postpartum uterine health. Uterine swabs were collected from 24 cows within 3 to 12 d in milk; 12 cows were diagnosed with metritis and the other 12 were healthy. Metritis was defined as a watery, reddish or brownish uterine discharge having a fetid smell, and rectal temperature greater than 39.5°C. Cows with a clear and viscous uterine discharge, not fetid or mucopurulent, were classified as healthy. Microbial metagenomic DNA from uterine swab samples was subjected to whole-genome shotgun sequencing on the Illumina MiSeq platform (Illumina Inc., San Diego, CA). The MG-RAST server (metagenomic rapid annotations using subsystems technology; http://metagenomics.anl.gov/) and STAMP software (http://kiwi.cs.dal.ca/Software/STAMP) were used to detect statistically significant differences in the abundance of taxonomic and functional features between the uterine microbial metagenomes of metritic and healthy cows. Our results showed an increased abundance of Fusobacteria and Bacteroidetes in metritic cows, confirming the potential role of those 2 taxa in the pathogenesis of metritis. The MG-RAST analysis revealed a significantly higher abundance of genes for protein transport across the cytoplasmic membrane and type VI bacterial secretion systems in the metritic microbiota. Additionally, genes coding for resistance to acid stress were exclusive to the metritis microbiota, suggesting that microbial resistance to acid stress is important for microbial survival in the infected uterus. On the other hand, genes coding for adhesion molecules, bacteriocins, and antibacterial peptides were significantly associated with the uterine microbiota of healthy cows, as was tolerance to colicin E2.",2017-03-02 +25886980,StructureFold: genome-wide RNA secondary structure mapping and reconstruction in vivo.,"

Motivation

RNAs fold into complex structures that are integral to the diverse mechanisms underlying RNA regulation of gene expression. Recent development of transcriptome-wide RNA structure profiling through the application of structure-probing enzymes or chemicals combined with high-throughput sequencing has opened a new field that greatly expands the amount of in vitro and in vivo RNA structural information available. The resultant datasets provide the opportunity to investigate RNA structural information on a global scale. However, the analysis of high-throughput RNA structure profiling data requires considerable computational effort and expertise.

Results

We present a new platform, StructureFold, that provides an integrated computational solution designed specifically for large-scale RNA structure mapping and reconstruction across any transcriptome. StructureFold automates the processing and analysis of raw high-throughput RNA structure profiling data, allowing the seamless incorporation of wet-bench structural information from chemical probes and/or ribonucleases to restrain RNA secondary structure prediction via the RNAstructure and ViennaRNA package algorithms. StructureFold performs reads mapping and alignment, normalization and reactivity derivation, and RNA structure prediction in a single user-friendly web interface or via local installation. The variation in transcript abundance and length that prevails in living cells and consequently causes variation in the counts of structure-probing events between transcripts is accounted for. Accordingly, StructureFold is applicable to RNA structural profiling data obtained in vivo as well as to in vitro or in silico datasets. StructureFold is deployed via the Galaxy platform.

Availability and implementation

StructureFold is freely available as a component of Galaxy available at: https://usegalaxy.org/.

Contact

yxt148@psu.edu or sma3@psu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-16 +25951947,Performance of a blockwise approach in variable selection using linkage disequilibrium information.,"

Background

Genome-wide association studies (GWAS) aim at finding genetic markers that are significantly associated with a phenotype of interest. Single nucleotide polymorphism (SNP) data from the entire genome are collected for many thousands of SNP markers, leading to high-dimensional regression problems where the number of predictors greatly exceeds the number of observations. Moreover, these predictors are statistically dependent, in particular due to linkage disequilibrium (LD). We propose a three-step approach that explicitly takes advantage of the grouping structure induced by LD in order to identify common variants which may have been missed by single marker analyses (SMA). In the first step, we perform a hierarchical clustering of SNPs with an adjacency constraint using LD as a similarity measure. In the second step, we apply a model selection approach to the obtained hierarchy in order to define LD blocks. Finally, we perform Group Lasso regression on the inferred LD blocks. We investigate the efficiency of this approach compared to state-of-the art regression methods: haplotype association tests, SMA, and Lasso and Elastic-Net regressions.

Results

Our results on simulated data show that the proposed method performs better than state-of-the-art approaches as soon as the number of causal SNPs within an LD block exceeds 2. Our results on semi-simulated data and a previously published HIV data set illustrate the relevance of the proposed method and its robustness to a real LD structure. The method is implemented in the R package BALD (Blockwise Approach using Linkage Disequilibrium), available from http://www.math-evry.cnrs.fr/publications/logiciels .

Conclusions

Our results show that the proposed method is efficient not only at the level of LD blocks by inferring well the underlying block structure but also at the level of individual SNPs. Thus, this study demonstrates the importance of tailored integration of biological knowledge in high-dimensional genomic studies such as GWAS.",2015-05-08 +22301388,"Analysis of high accuracy, quantitative proteomics data in the MaxQB database.","MS-based proteomics generates rapidly increasing amounts of precise and quantitative information. Analysis of individual proteomic experiments has made great strides, but the crucial ability to compare and store information across different proteome measurements still presents many challenges. For example, it has been difficult to avoid contamination of databases with low quality peptide identifications, to control for the inflation in false positive identifications when combining data sets, and to integrate quantitative data. Although, for example, the contamination with low quality identifications has been addressed by joint analysis of deposited raw data in some public repositories, we reasoned that there should be a role for a database specifically designed for high resolution and quantitative data. Here we describe a novel database termed MaxQB that stores and displays collections of large proteomics projects and allows joint analysis and comparison. We demonstrate the analysis tools of MaxQB using proteome data of 11 different human cell lines and 28 mouse tissues. The database-wide false discovery rate is controlled by adjusting the project specific cutoff scores for the combined data sets. The 11 cell line proteomes together identify proteins expressed from more than half of all human genes. For each protein of interest, expression levels estimated by label-free quantification can be visualized across the cell lines. Similarly, the expression rank order and estimated amount of each protein within each proteome are plotted. We used MaxQB to calculate the signal reproducibility of the detected peptides for the same proteins across different proteomes. Spearman rank correlation between peptide intensity and detection probability of identified proteins was greater than 0.8 for 64% of the proteome, whereas a minority of proteins have negative correlation. This information can be used to pinpoint false protein identifications, independently of peptide database scores. The information contained in MaxQB, including high resolution fragment spectra, is accessible to the community via a user-friendly web interface at http://www.biochem.mpg.de/maxqb.",2012-02-02 +23813641,GoSynthetic database tool to analyse natural and engineered molecular processes.,"An essential topic for synthetic biologists is to understand the structure and function of biological processes and involved proteins and plan experiments accordingly. Remarkable progress has been made in recent years towards this goal. However, efforts to collect and present all information on processes and functions are still cumbersome. The database tool GoSynthetic provides a new, simple and fast way to analyse biological processes applying a hierarchical database. Four different search modes are implemented. Furthermore, protein interaction data, cross-links to organism-specific databases (17 organisms including six model organisms and their interactions), COG/KOG, GO and IntAct are warehoused. The built in connection to technical and engineering terms enables a simple switching between biological concepts and concepts from engineering, electronics and synthetic biology. The current version of GoSynthetic covers more than one million processes, proteins, COGs and GOs. It is illustrated by various application examples probing process differences and designing modifications. Database URL: http://gosyn.bioapps.biozentrum.uni-wuerzburg.de.",2013-06-27 +25755929,Language workbench user interfaces for data analysis.,"Biological data analysis is frequently performed with command line software. While this practice provides considerable flexibility for computationally savy individuals, such as investigators trained in bioinformatics, this also creates a barrier to the widespread use of data analysis software by investigators trained as biologists and/or clinicians. Workflow systems such as Galaxy and Taverna have been developed to try and provide generic user interfaces that can wrap command line analysis software. These solutions are useful for problems that can be solved with workflows, and that do not require specialized user interfaces. However, some types of analyses can benefit from custom user interfaces. For instance, developing biomarker models from high-throughput data is a type of analysis that can be expressed more succinctly with specialized user interfaces. Here, we show how Language Workbench (LW) technology can be used to model the biomarker development and validation process. We developed a language that models the concepts of Dataset, Endpoint, Feature Selection Method and Classifier. These high-level language concepts map directly to abstractions that analysts who develop biomarker models are familiar with. We found that user interfaces developed in the Meta-Programming System (MPS) LW provide convenient means to configure a biomarker development project, to train models and view the validation statistics. We discuss several advantages of developing user interfaces for data analysis with a LW, including increased interface consistency, portability and extension by language composition. The language developed during this experiment is distributed as an MPS plugin (available at http://campagnelab.org/software/bdval-for-mps/).",2015-02-24 +27141529,"Digital data for quick response (QR) codes of alkalophilic Bacillus pumilus to identify and to compare bacilli isolated from Lonar Crator Lake, India.","Microbiologists are routinely engaged isolation, identification and comparison of isolated bacteria for their novelty. 16S rRNA sequences of Bacillus pumilus were retrieved from NCBI repository and generated QR codes for sequences (FASTA format and full Gene Bank information). 16SrRNA were used to generate quick response (QR) codes of Bacillus pumilus isolated from Lonar Crator Lake (19° 58' N; 76° 31' E), India. Bacillus pumilus 16S rRNA gene sequences were used to generate CGR, FCGR and PCA. These can be used for visual comparison and evaluation respectively. The hyperlinked QR codes, CGR, FCGR and PCA of all the isolates are made available to the users on a portal https://sites.google.com/site/bhagwanrekadwad/. This generated digital data helps to evaluate and compare any Bacillus pumilus strain, minimizes laboratory efforts and avoid misinterpretation of the species.",2016-04-09 +25638023,SIM-XL: A powerful and user-friendly tool for peptide cross-linking analysis.,"Chemical cross-linking has emerged as a powerful approach for the structural characterization of proteins and protein complexes. However, the correct identification of covalently linked (cross-linked or XL) peptides analyzed by tandem mass spectrometry is still an open challenge. Here we present SIM-XL, a software tool that can analyze data generated through commonly used cross-linkers (e.g., BS3/DSS). Our software introduces a new paradigm for search-space reduction, which ultimately accounts for its increase in speed and sensitivity. Moreover, our search engine is the first to capitalize on reporter ions for selecting tandem mass spectra derived from cross-linked peptides. It also makes available a 2D interaction map and a spectrum-annotation tool unmatched by any of its kind. We show SIM-XL to be more sensitive and faster than a competing tool when analyzing a data set obtained from the human HSP90. The software is freely available for academic use at http://patternlabforproteomics.org/sim-xl. A video demonstrating the tool is available at http://patternlabforproteomics.org/sim-xl/video. SIM-XL is the first tool to support XL data in the mzIdentML format; all data are thus available from the ProteomeXchange consortium (identifier PXD001677). This article is part of a Special Issue entitled: Computational Proteomics.",2015-01-29 +22984411,Theoretical prediction and experimental verification of protein-coding genes in plant pathogen genome Agrobacterium tumefaciens strain C58.,"Agrobacterium tumefaciens strain C58 is a Gram-negative soil bacterium capable of inducing tumors (crown galls) on many dicotyledonous plants. The genome of A. tumefaciens strain C58 was re-annotated based on the Z-curve method. First, all the 'hypothetical genes' were re-identified, and 29 originally annotated 'hypothetical genes' were recognized to be non-coding open reading frames (ORFs). Theoretical evidence obtained from principal component analysis, clusters of orthologous groups of proteins occupation, and average length distribution showed that these non-coding ORFs were highly unlikely to encode proteins. Results from the reverse transcription-polymerase chain reaction (RT-PCR) experiments on three different growth stages of A. tumefaciens C58 confirmed that 23 (79%) of the identified non-coding ORFs have no transcripts in these growth stages. In addition, using theoretical prediction, 19 potential protein-coding genes were predicted to be new protein-coding genes. Fifteen (79%) of these genes were verified with RT-PCR experiments. The RT-PCR experimental results confirmed the reliability of our theoretical prediction, indicating that false-positive prediction and missing genes always exist in the annotation of A. tumefaciens C58 genome. The improved annotation will serve as a valuable resource for the research of the lifestyle, metabolism, and pathogenicity of A. tumefaciens C58. The re-annotation of A. tumefaciens C58 can be obtained from http://211.69.128.148/Atum/.",2012-09-11 +23448259,Whole human genome proteogenomic mapping for ENCODE cell line data: identifying protein-coding regions.,"

Background

Proteogenomic mapping is an approach that uses mass spectrometry data from proteins to directly map protein-coding genes and could aid in locating translational regions in the human genome. In concert with the ENcyclopedia of DNA Elements (ENCODE) project, we applied proteogenomic mapping to produce proteogenomic tracks for the UCSC Genome Browser, to explore which putative translational regions may be missing from the human genome.

Results

We generated ~1 million high-resolution tandem mass (MS/MS) spectra for Tier 1 ENCODE cell lines K562 and GM12878 and mapped them against the UCSC hg19 human genome, and the GENCODE V7 annotated protein and transcript sets. We then compared the results from the three searches to identify the best-matching peptide for each MS/MS spectrum, thereby increasing the confidence of the putative new protein-coding regions found via the whole genome search. At a 1% false discovery rate, we identified 26,472, 24,406, and 13,128 peptides from the protein, transcript, and whole genome searches, respectively; of these, 481 were found solely via the whole genome search. The proteogenomic mapping data are available on the UCSC Genome Browser at http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=wgEncodeUncBsuProt.

Conclusions

The whole genome search revealed that ~4% of the uniquely mapping identified peptides were located outside GENCODE V7 annotated exons. The comparison of the results from the disparate searches also identified 15% more spectra than would have been found solely from a protein database search. Therefore, whole genome proteogenomic mapping is a complementary method for genome annotation when performed in conjunction with other searches.",2013-02-28 +22106335,Data-driven information retrieval in heterogeneous collections of transcriptomics data links SIM2s to malignant pleural mesothelioma.,"

Motivation

Genome-wide measurement of transcript levels is an ubiquitous tool in biomedical research. As experimental data continues to be deposited in public databases, it is becoming important to develop search engines that enable the retrieval of relevant studies given a query study. While retrieval systems based on meta-data already exist, data-driven approaches that retrieve studies based on similarities in the expression data itself have a greater potential of uncovering novel biological insights.

Results

We propose an information retrieval method based on differential expression. Our method deals with arbitrary experimental designs and performs competitively with alternative approaches, while making the search results interpretable in terms of differential expression patterns. We show that our model yields meaningful connections between biological conditions from different studies. Finally, we validate a previously unknown connection between malignant pleural mesothelioma and SIM2s suggested by our method, via real-time polymerase chain reaction in an independent set of mesothelioma samples.

Availability

Supplementary data and source code are available from http://www.ebi.ac.uk/fg/research/rex.",2011-11-20 +23860041,The Moroccan Genetic Disease Database (MGDD): a database for DNA variations related to inherited disorders and disease susceptibility.,"National and ethnic mutation databases provide comprehensive information about genetic variations reported in a population or an ethnic group. In this paper, we present the Moroccan Genetic Disease Database (MGDD), a catalogue of genetic data related to diseases identified in the Moroccan population. We used the PubMed, Web of Science and Google Scholar databases to identify available articles published until April 2013. The Database is designed and implemented on a three-tier model using Mysql relational database and the PHP programming language. To date, the database contains 425 mutations and 208 polymorphisms found in 301 genes and 259 diseases. Most Mendelian diseases in the Moroccan population follow autosomal recessive mode of inheritance (74.17%) and affect endocrine, nutritional and metabolic physiology. The MGDD database provides reference information for researchers, clinicians and health professionals through a user-friendly Web interface. Its content should be useful to improve researches in human molecular genetics, disease diagnoses and design of association studies. MGDD can be publicly accessed at http://mgdd.pasteur.ma.",2013-07-17 +23951102,Dizeez: an online game for human gene-disease annotation.,"Structured gene annotations are a foundation upon which many bioinformatics and statistical analyses are built. However the structured annotations available in public databases are a sparse representation of biological knowledge as a whole. The rate of biomedical data generation is such that centralized biocuration efforts struggle to keep up. New models for gene annotation need to be explored that expand the pace at which we are able to structure biomedical knowledge. Recently, online games have emerged as an effective way to recruit, engage and organize large numbers of volunteers to help address difficult biological challenges. For example, games have been successfully developed for protein folding (Foldit), multiple sequence alignment (Phylo) and RNA structure design (EteRNA). Here we present Dizeez, a simple online game built with the purpose of structuring knowledge of gene-disease associations. Preliminary results from game play online and at scientific conferences suggest that Dizeez is producing valid gene-disease annotations not yet present in any public database. These early results provide a basic proof of principle that online games can be successfully applied to the challenge of gene annotation. Dizeez is available at http://genegames.org.",2013-08-07 +26722120,Characterizing rate limiting steps in transcription from RNA production times in live cells.,"

Motivation

Single-molecule measurements of live Escherichia coli transcription dynamics suggest that this process ranges from sub- to super-Poissonian, depending on the conditions and on the promoter. For its accurate quantification, we propose a model that accommodates all these settings, and statistical methods to estimate the model parameters and to select the relevant components.

Results

The new methodology has improved accuracy and avoids overestimating the transcription rate due to finite measurement time, by exploiting unobserved data and by accounting for the effects of discrete sampling. First, we use Monte Carlo simulations of models based on measurements to show that the methods are reliable and offer substantial improvements over previous methods. Next, we apply the methods on measurements of transcription intervals of different promoters in live E. coli, and show that they produce significantly different results, both in low- and high-noise settings, and that, in the latter case, they even lead to qualitatively different results. Finally, we demonstrate that the methods can be generalized for other similar purposes, such as for estimating gene activation kinetics. In this case, the new methods allow quantifying the inducer uptake dynamics as opposed to just comparing them between cases, which was not previously possible. We expect this new methodology to be a valuable tool for functional analysis of cellular processes using single-molecule or single-event microscopy measurements in live cells.

Availability and implementation

Source code is available under Mozilla Public License at http://www.cs.tut.fi/%7Ehakkin22/censored/

Contact

andre.ribeiro@tut.fi or andre.sanchesribeiro@tut.fi

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-31 +26715946,Smiles2Monomers: a link between chemical and biological structures for polymers.,"

Background

The monomeric composition of polymers is powerful for structure comparison and synthetic biology, among others. Many databases give access to the atomic structure of compounds but the monomeric structure of polymers is often lacking. We have designed a smart algorithm, implemented in the tool Smiles2Monomers (s2m), to infer efficiently and accurately the monomeric structure of a polymer from its chemical structure.

Results

Our strategy is divided into two steps: first, monomers are mapped on the atomic structure by an efficient subgraph-isomorphism algorithm ; second, the best tiling is computed so that non-overlapping monomers cover all the structure of the target polymer. The mapping is based on a Markovian index built by a dynamic programming algorithm. The index enables s2m to search quickly all the given monomers on a target polymer. After, a greedy algorithm combines the mapped monomers into a consistent monomeric structure. Finally, a local branch and cut algorithm refines the structure. We tested this method on two manually annotated databases of polymers and reconstructed the structures de novo with a sensitivity over 90 %. The average computation time per polymer is 2 s.

Conclusion

s2m automatically creates de novo monomeric annotations for polymers, efficiently in terms of time computation and sensitivity. s2m allowed us to detect annotation errors in the tested databases and to easily find the accurate structures. So, s2m could be integrated into the curation process of databases of small compounds to verify the current entries and accelerate the annotation of new polymers. The full method can be downloaded or accessed via a website for peptide-like polymers at http://bioinfo.lifl.fr/norine/smiles2monomers.jsp.Graphical abstract:.",2015-12-29 +27222868,Effect of osmolytes on the conformation and aggregation of some amyloid peptides: CD spectroscopic data.,"Protein misfolding and aggregation are responsible for a large number of diseases called protein conformational diseases or disorders that include Alzheimer׳s disease, Huntington׳s diseases, Prion related encephalopathies and type-II diabetes (http://dx.doi.org/10.1038/35041139) (Kopito and Ron, 2000) [1]. A variety of studies have shown that some small organic molecules, known as osmolytes have the ability to stabilize native conformation of proteins and prevent misfolding and aggregation (http://www.la-press.com/article.php?article_id=447) (Zhao et al., 2008) [2]. It has been shown that certain short segment or fragment of respective proteins can also form amyloids, and the segments also promote the aggregation in the full-length protein (http://dx.doi.org/10.2174/0929867023369187) (Gazit, 2002) [3]. This article presents circular dichroism spectroscopic data on conformational analysis and effect of osmolytes on Aβ peptide fragments, different lengths of polyglutamine peptide and the amyloidogenic segment of islet amyloid polypeptide.",2016-05-04 +22139941,MMMDB: Mouse Multiple Tissue Metabolome Database.,"The Mouse Multiple Tissue Metabolome Database (MMMDB) provides comprehensive and quantitative metabolomic information for multiple tissues from single mice. Manually curated databases that integrate literature-based individual metabolite information have been available so far. However, data sets on the absolute concentration of a single metabolite integrated from multiple resources are often difficult to be used when different metabolomic studies are compared because the relative balance of the multiple metabolite concentrations in the metabolic pathways as a snapshot of a dynamic system is more important than the absolute concentration of a single metabolite. We developed MMMDB by performing non-targeted analyses of cerebra, cerebella, thymus, spleen, lung, liver, kidney, heart, pancreas, testis and plasma using capillary electrophoresis time-of-flight mass spectrometry and detected 428 non-redundant features from which 219 metabolites were successfully identified. Quantified concentrations of the individual metabolites and the corresponding processed raw data; for example, the electropherograms and mass spectra with their annotations, such as isotope and fragment information, are stored in the database. MMMDB is designed to normalize users' data, which can be submitted online and used to visualize overlaid electropherograms. Thus, MMMDB allows newly measured data to be compared with the other data in the database. MMMDB is available at: http://mmmdb.iab.keio.ac.jp.",2011-12-01 +25974010,Computational study of binding affinity to nuclear receptors for some cosmetic ingredients.,"We studied the ingredients of cosmetic products as potential endocrine disruptors (ED) by in silico methods (docking). The structures of 14 human nuclear receptors have been retrieved from the protein data bank (PDB). We only considered the mechanism linked with direct binding to nuclear receptors with well-defined crystal structures. Predictions were performed using the Endocrine Disruptome docking program http://endocrinedisruptome.ki.si/ (Kolšek et al., 2013). 122 compounds were estimated to be possible endocrine disruptors bind to at least one of the receptors, 21 of them which are predicted to be probable toxicants for endocrine disruption as they bind to more than five receptors simultaneously. According to the literature survey and lack of experimental data it remains a challenge to prove or disprove the in silico results experimentally also for other potential endocrine disruptors.",2015-05-15 +28717395,Brain imaging before primary lung cancer resection: a controversial topic.,"

Objective

International and national recommendations for brain imaging in patients planned to undergo potentially curative resection of non-small-cell lung cancer (NSCLC) are variably implemented throughout the United Kingdom [Hudson BJ, Crawford MB, and Curtin J et al (2015) Brain imaging in lung cancer patients without symptoms of brain metastases: a national survey of current practice in EnglandClin Radiol https://doi.org/10.1016/j.crad.2015.02.007]. However, the recommendations are not based on high-quality evidence and do not take into account cost implications and local resources. Our aim was to determine local practice based on historic outcomes in this patient cohort.

Methods

This retrospective study took place in a regional thoracic surgical centre in the United Kingdom. Pathology records for all patients who had undergone lung resection with curative intent during the time period January 2012-December 2014 were analysed in October 2015. Electronic pathology and radiology reports were accessed for each patient and data collected about their histological findings, TNM stage, resection margins, and the presence of brain metastases on either pre-operative or post-operative imaging. From the dates given on imaging, we calculated the number of days post-resection that the brain metastases were detected.

Results

585 patients were identified who had undergone resection of their lung cancer. Of these, 471 had accessible electronic radiology records to assess for the radiological evidence of brain metastases. When their electronic records were evaluated, 25/471 (5.3%) patients had radiological evidence of brain metastasis. Of these, five patients had been diagnosed with a brain metastasis at initial presentation and had undergone primary resection of the brain metastasis followed by resection of the lung primary. One patient had been diagnosed with both a primary lung and a primary bowel adenocarcinoma; on review of the case, it was felt that the brain metastasis was more likely to have originated from the bowel cancer. One had been clinically diagnosed with a cerebral abscess while the radiology had been reported as showing a metastatic deposit. Of the remaining 18/471 (3.8%) patients who presented with brain metastases after their surgical resection, 12 patients had adenocarcinoma, four patients had squamous cell carcinoma, one had basaloid, and one had large-cell neuroendocrine. The mean number of days post-resection that the brain metastases were identified was 371 days, range 14-1032 days, median 295 days (date of metastases not available for two patients).

Conclusion

The rate of brain metastases identified in this study was similar to previous studies. This would suggest that preoperative staging of the central nervous system may change the management pathway in a small group of patients. However, for this group of patients, the change would be significant either sparing them non-curative surgery or allowing aggressive management of oligometastatic disease. Therefore, we would recommend pre-operative brain imaging with MRI for all patients undergoing potentially curative lung resection.",2017-06-20 +26156781,MOTIFSIM: A web tool for detecting similarity in multiple DNA motif datasets.,"Currently, there are a number of motif detection tools available that possess unique functionality. These tools often report different motifs, and therefore use of multiple tools is generally advised since common motifs reported by multiple tools are more likely to be biologically significant. However, results produced by these different tools need to be compared and existing similarity detection tools only allow comparison between two data sets. Here, we describe a motif similarity detection tool (MOTIFSIM) possessing a web-based, user-friendly interface that is capable of detecting similarity from multiple DNA motif data sets concurrently. Results can either be viewed online or downloaded. Users may also download and run MOTIFSIM as a command-line tool in stand-alone mode. The web tool, along with its command-line version, user manuals, and source codes, are freely available at http://biogrid-head.engr.uconn.edu/motifsim/.",2015-07-01 +25736862,GWIPS-viz as a tool for exploring ribosome profiling evidence supporting the synthesis of alternative proteoforms.,"The boundaries of protein coding sequences are more difficult to define at the 5' end than at the 3' end due to potential multiple translation initiation sites (TISs). Even in the presence of phylogenetic data, the use of sequence information only may not be sufficient for the accurate identification of TISs. Traditional proteomics approaches may also fail because the N-termini of newly synthesized proteins are often processed. Thus ribosome profiling (ribo-seq), producing a snapshot of the ribosome distribution across the entire transcriptome, is an attractive experimental technique for the purpose of TIS location exploration. The GWIPS-viz (Genome Wide Information on Protein Synthesis visualized) browser (http://gwips.ucc.ie) provides free access to the genomic alignments of ribo-seq data and corresponding mRNA-seq data along with relevant annotation tracks. In this brief, we illustrate how GWIPS-viz can be used to explore the ribosome occupancy at the 5' ends of protein coding genes to assess the activity of AUG and non-AUG TISs responsible for the synthesis of proteoforms with alternative or heterogeneous N-termini. The presence of ribo-seq tracks for various organisms allows for cross-species comparison of orthologous genes and the availability of datasets from multiple laboratories permits the assessment of the technical reproducibility of the ribosome densities.",2015-04-23 +25643400,Knowledge-Assisted Ranking: A Visual Analytic Application for Sports Event Data.,"Organizing sports video data for performance analysis can be challenging, especially in cases involving multiple attributes and when the criteria for sorting frequently changes depending on the user's task. The proposed visual analytic system enables users to specify a sort requirement in a flexible manner without depending on specific knowledge about individual sort keys. The authors use regression techniques to train different analytical models for different types of sorting requirements and use visualization to facilitate knowledge discovery at different stages of the process. They demonstrate the system with a rugby case study to find key instances for analyzing team and player performance. Organizing sports video data for performance analysis can be challenging in cases with multiple attributes, and when sorting frequently changes depending on the user's task. As this video shows, the proposed visual analytic system allows interactive data sorting and exploration. https://youtu.be/Cs6SLtPVDQQ.",2015-01-26 +22747501,PePPER: a webserver for prediction of prokaryote promoter elements and regulons.,"

Background

Accurate prediction of DNA motifs that are targets of RNA polymerases, sigma factors and transcription factors (TFs) in prokaryotes is a difficult mission mainly due to as yet undiscovered features in DNA sequences or structures in promoter regions. Improved prediction and comparison algorithms are currently available for identifying transcription factor binding sites (TFBSs) and their accompanying TFs and regulon members.

Results

We here extend the current databases of TFs, TFBSs and regulons with our knowledge on Lactococcus lactis and developed a webserver for prediction, mining and visualization of prokaryote promoter elements and regulons via a novel concept. This new approach includes an all-in-one method of data mining for TFs, TFBSs, promoters, and regulons for any bacterial genome via a user-friendly webserver. We demonstrate the power of this method by mining WalRK regulons in Lactococci and Streptococci and, vice versa, use L. lactis regulon data (CodY) to mine closely related species.

Conclusions

The PePPER webserver offers, besides the all-in-one analysis method, a toolbox for mining for regulons, promoters and TFBSs and accommodates a new L. lactis regulon database in addition to already existing regulon data. Identification of putative regulons and full annotation of intergenic regions in any bacterial genome on the basis of existing knowledge on a related organism can now be performed by biologists and it can be done for a wide range of regulons. On the basis of the PePPER output, biologist can design experiments to further verify the existence and extent of the proposed regulons. The PePPER webserver is freely accessible at http://pepper.molgenrug.nl.",2012-07-02 +24203708,SMPDB 2.0: big improvements to the Small Molecule Pathway Database.,"The Small Molecule Pathway Database (SMPDB, http://www.smpdb.ca) is a comprehensive, colorful, fully searchable and highly interactive database for visualizing human metabolic, drug action, drug metabolism, physiological activity and metabolic disease pathways. SMPDB contains >600 pathways with nearly 75% of its pathways not found in any other database. All SMPDB pathway diagrams are extensively hyperlinked and include detailed information on the relevant tissues, organs, organelles, subcellular compartments, protein cofactors, protein locations, metabolite locations, chemical structures and protein quaternary structures. Since its last release in 2010, SMPDB has undergone substantial upgrades and significant expansion. In particular, the total number of pathways in SMPDB has grown by >70%. Additionally, every previously entered pathway has been completely redrawn, standardized, corrected, updated and enhanced with additional molecular or cellular information. Many SMPDB pathways now include transporter proteins as well as much more physiological, tissue, target organ and reaction compartment data. Thanks to the development of a standardized pathway drawing tool (called PathWhiz) all SMPDB pathways are now much more easily drawn and far more rapidly updated. PathWhiz has also allowed all SMPDB pathways to be saved in a BioPAX format. Significant improvements to SMPDB's visualization interface now make the browsing, selection, recoloring and zooming of pathways far easier and far more intuitive. Because of its utility and breadth of coverage, SMPDB is now integrated into several other databases including HMDB and DrugBank.",2013-11-06 +26240227,Prediction of Allogeneic Hematopoietic Stem-Cell Transplantation Mortality 100 Days After Transplantation Using a Machine Learning Algorithm: A European Group for Blood and Marrow Transplantation Acute Leukemia Working Party Retrospective Data Mining Study.,"

Purpose

Allogeneic hematopoietic stem-cell transplantation (HSCT) is potentially curative for acute leukemia (AL), but carries considerable risk. Machine learning algorithms, which are part of the data mining (DM) approach, may serve for transplantation-related mortality risk prediction.

Patients and methods

This work is a retrospective DM study on a cohort of 28,236 adult HSCT recipients from the AL registry of the European Group for Blood and Marrow Transplantation. The primary objective was prediction of overall mortality (OM) at 100 days after HSCT. Secondary objectives were estimation of nonrelapse mortality, leukemia-free survival, and overall survival at 2 years. Donor, recipient, and procedural characteristics were analyzed. The alternating decision tree machine learning algorithm was applied for model development on 70% of the data set and validated on the remaining data.

Results

OM prevalence at day 100 was 13.9% (n=3,936). Of the 20 variables considered, 10 were selected by the model for OM prediction, and several interactions were discovered. By using a logistic transformation function, the crude score was transformed into individual probabilities for 100-day OM (range, 3% to 68%). The model's discrimination for the primary objective performed better than the European Group for Blood and Marrow Transplantation score (area under the receiver operating characteristics curve, 0.701 v 0.646; P<.001). Calibration was excellent. Scores assigned were also predictive of secondary objectives.

Conclusion

The alternating decision tree model provides a robust tool for risk evaluation of patients with AL before HSCT, and is available online (http://bioinfo.lnx.biu.ac.il/∼bondi/web1.html). It is presented as a continuous probabilistic score for the prediction of day 100 OM, extending prediction to 2 years. The DM method has proved useful for clinical prediction in HSCT.",2015-08-03 +26159261,How to Diagnose and Exclude Drug-Induced Liver Injury.,"The diagnosis of drug-induced liver injury (DILI) is largely a diagnosis of exclusion because, with the possible exception of protein:drug adducts in paracetamol overdose, there are no laboratory, biopsy or imaging tests that alone are capable of establishing an unequivocal diagnosis of DILI. However, it is increasingly appreciated that drugs that cause DILI typically have characteristic clinical presentations or 'signatures' that can be very useful in the diagnosis of DILI. Indeed, knowing a drug's DILI signature (or sometimes signatures) and the incidence rate of DILI during treatment with that drug are perhaps the most useful pieces of historical information in arriving at the diagnosis of DILI. Components of the signature include the typical latency from the onset of treatment, whether there are extrahepatic manifestations, whether the injury is hepatocellular, cholestatic or mixed, and sometimes characteristic features on biopsy or serological testing (e.g. liver autoantibodies). A major advance has been the establishment of the LiverTox website (http://livertox.nih.gov/) which provides open access to standardized entries for over 600 different drugs, including the characteristic clinical presentations of DILI when known. LiverTox will also calculate the causality score for individual cases using the RUCAM instrument and case-specific data entered by the site user. However, the problem with standard diagnostic instruments such as the RUCAM is that DILI signatures are not incorporated into the scoring system. The person entering data must therefore subjectively weigh the RUCAM score with the characteristic DILI signature(s) of the drug to arrive at a diagnosis. In the future, it should be possible to construct improved diagnostic instruments that objectively incorporate DILI signatures, data-based estimates of the incidence rates of DILI from each implicated drug, and perhaps genetic variants associated with the risk of DILI.",2015-07-06 +21609420,GiSAO.db: a database for ageing research.,"

Background

Age-related gene expression patterns of Homo sapiens as well as of model organisms such as Mus musculus, Saccharomyces cerevisiae, Caenorhabditis elegans and Drosophila melanogaster are a basis for understanding the genetic mechanisms of ageing. For an effective analysis and interpretation of expression profiles it is necessary to store and manage huge amounts of data in an organized way, so that these data can be accessed and processed easily.

Description

GiSAO.db (Genes involved in senescence, apoptosis and oxidative stress database) is a web-based database system for storing and retrieving ageing-related experimental data. Expression data of genes and miRNAs, annotation data like gene identifiers and GO terms, orthologs data and data of follow-up experiments are stored in the database. A user-friendly web application provides access to the stored data. KEGG pathways were incorporated and links to external databases augment the information in GiSAO.db. Search functions facilitate retrieval of data which can also be exported for further processing.

Conclusions

We have developed a centralized database that is very well suited for the management of data for ageing research. The database can be accessed at https://gisao.genome.tugraz.at and all the stored data can be viewed with a guest account.",2011-05-24 +26819315,Methylthioadenosine (MTA) Regulates Liver Cells Proteome and Methylproteome: Implications in Liver Biology and Disease.,"Methylthioadenosine phosphorylase (MTAP), a key enzyme in the adenine and methionine salvage pathways, catalyzes the hydrolysis of methylthioadenosine (MTA), a compound suggested to affect pivotal cellular processes in part through the regulation of protein methylation. MTAP is expressed in a wide range of cell types and tissues, and its deletion is common to cancer cells and in liver injury. The aim of this study was to investigate the proteome and methyl proteome alterations triggered by MTAP deficiency in liver cells to define novel regulatory mechanisms that may explain the pathogenic processes of liver diseases. iTRAQ analysis resulted in the identification of 216 differential proteins (p < 0.05) that suggest deregulation of cellular pathways as those mediated by ERK or NFκB. R-methyl proteome analysis led to the identification of 74 differentially methylated proteins between SK-Hep1 and SK-Hep1+ cells, including 116 new methylation sites. Restoring normal MTA levels in SK-Hep1+ cells parallels the specific methylation of 56 proteins, including KRT8, TGF, and CTF8A, which provides a novel regulatory mechanism of their activity with potential implications in carcinogenesis. Inhibition of RNA-binding proteins methylation is especially relevant upon accumulation of MTA. As an example, methylation of quaking protein in Arg(242) and Arg(256) in SK-Hep1+ cells may play a pivotal role in the regulation of its activity as indicated by the up-regulation of its target protein p27(kip1) The phenotype associated with a MTAP deficiency was further verified in the liver of MTAP± mice. Our data support that MTAP deficiency leads to MTA accumulation and deregulation of central cellular pathways, increasing proliferation and decreasing the susceptibility to chemotherapeutic drugs, which involves differential protein methylation. Data are available via ProteomeXchange with identifier PXD002957 (http://www.ebi.ac.uk/pride/archive/projects/PXD002957).",2016-01-27 +25099602,MCentridFS: a tool for identifying module biomarkers for multi-phenotypes from high-throughput data.,"Systematically identifying biomarkers, in particular, network biomarkers, from high-throughput data is an important and challenging task, and many methods for two-class comparison have been developed to exploit information of high-throughput data. However, as the high-throughput data with multi-phenotypes are available, there is a great need to develop effective multi-classification models. In this study, we proposed a novel approach, called MCentridFS (Multi-class Centroid Feature Selection), to systematically identify responsive modules or network biomarkers for classifying multi-phenotypes from high-throughput data. MCentridFS formulated the multi-classification model by network modules as a binary integer linear programming problem, which can be solved efficiently and effectively in an accurate manner. The approach is evaluated with respect to two diseases, i.e., multi-stages HCV-induced dysplasia and hepatocellular carcinoma and multi-tissues breast cancer, both of which demonstrated the high classification rate and the cross-validation rate of the approach. The computational results of the five-fold cross-validation of the two data show that MCentridFS outperforms the state-of-the-art multi-classification methods. We further verified the effectiveness of MCentridFS to characterize the multi-phenotype processes using module biomarkers by two independent datasets. In addition, functional enrichment analysis revealed that the identified network modules are strongly related to the corresponding biological processes and pathways. All these results suggest that it can serve as a useful tool for module biomarker detection in multiple biological processes or multi-classification problems by exploring both big biological data and network information. The Matlab code for MCentridFS is freely available from http://www.sysbio.ac.cn/cb/chenlab/images/MCentridFS.rar.",2014-11-01 +27965137,Osteoarthritis year in review 2016: imaging.,"

Purpose

The current narrative review covers original research related to imaging in osteoarthritis (OA) in humans published in English between April 1st 2015 and March 31st 2016, in peer reviewed journals available in Medline via PubMed (http://www.ncbi.nlm.nih.gov/pubmed/).

Methods

Relevant studies in humans, subjectively decided by the authors, contributing significantly to the OA imaging field, were selected from an extensive Medline search using the terms ""Osteoarthritis"" in combination with ""MRI"", ""Imaging"", ""Radiography"", ""X-rays"", ""Ultrasound"", ""Computed tomography"", ""Nuclear medicine"", ""PET-CT"", ""PET-MRI"", ""Scintigraphy"", ""SPECT"". Publications were sorted according to relevance for the OA imaging research community with an emphasis on high impact special interest journals using the software for systematic reviews www.covidence.org.

Results

An overview of newly published studies compared to studies reported previous years is presented, followed by a review of selected imaging studies of primarily knee, hip and hand OA focussing on (1) results for detection of OA and OA-related pathology (2) studies dealing with treatments and (3) studies focussing on prognosis of disease progression or joint replacement. A record high number of 1420 articles were published, among others, of new technologies and tools for improved morphological and pathophysiological understanding of OA-related changes in joints. Also, imaging data were presented of monitoring treatment effect and prognosis of OA progression, primarily using established radiographic, magnetic resonance imaging (MRI), and ultrasound (US) methods.

Conclusion

Imaging continues to play an important role in OA research, where several exciting new technologies and computer aided analysis methods are emerging to complement the conventional imaging approaches.",2016-12-10 +25979472,Seq2pathway: an R/Bioconductor package for pathway analysis of next-generation sequencing data.,"

Unlabelled

Seq2pathway is an R/Python wrapper for pathway (or functional gene-set) analysis of genomic loci, adapted for advances in genome research. Seq2pathway associates the biological significance of genomic loci with their target transcripts and then summarizes the quantified values on the gene-level into pathway scores. It is designed to isolate systematic disturbances and common biological underpinnings from next-generation sequencing (NGS) data. Seq2pathway offers Bioconductor users enhanced capability in discovering collective pathway effects caused by both coding genes and cis-regulation of non-coding elements.

Availability and implementation

The package is freely available at http://www.bioconductor.org/packages/release/bioc/html/seq2pathway.html.

Contact

xyang2@uchicago.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-15 +23782616,"ChroMoS: an integrated web tool for SNP classification, prioritization and functional interpretation.","

Unlabelled

Genome-wide association studies and re-sequencing projects are revealing an increasing number of disease-associated SNPs, a large fraction of which are non-coding. Although they could have relevance for disease susceptibility and progression, the lack of information about regulatory regions impedes the assessment of their functionality. Here we present a web server, ChroMoS (Chromatin Modified SNPs), which combines genetic and epigenetic data with the goal of facilitating SNPs' classification, prioritization and prediction of their functional consequences. ChroMoS uses a large database of SNPs and chromatin states, but allows a user to provide his/her own genetic information. Based on the SNP classification and interactive prioritization, a user can compute the functional impact of multiple SNPs using two prediction tools, one for differential analysis of transcription factor binding (sTRAP) and another for SNPs with potential impact on binding of miRNAs (MicroSNiPer).

Availability

Web server, ChroMoS, is freely available at http://epicenter.immunbio.mpg.de/services/chromos.",2013-06-19 +26395253,Risk factors for delirium after on-pump cardiac surgery: a systematic review.,"

Introduction

As evidence-based effective treatment protocols for delirium after cardiac surgery are lacking, efforts should be made to identify risk factors for preventive interventions. Moreover, knowledge of these risk factors could increase validity of etiological studies in which adjustments need to be made for confounding variables. This review aims to systematically identify risk factors for delirium after cardiac surgery and to grade the evidence supporting these associations.

Method

A prior registered systematic review was performed using EMBASE, CINAHL, MEDLINE and Cochrane from 1990 till January 2015 ( http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42014007371 ). All studies evaluating patients for delirium after cardiac surgery with cardiopulmonary bypass (CPB) using either randomization or multivariable data analyses were included. Data was extracted and quality was scored in duplicate. Heterogeneity impaired pooling of the data; instead a semi-quantitative approach was used in which the strength of the evidence was graded based on the number of investigations, the quality of studies, and the consistency of the association reported across studies.

Results

In total 1462 unique references were screened and 34 were included in this review, of which 16 (47 %) were graded as high quality. A strong level of evidence for an association with the occurrence of postoperative delirium was found for age, previous psychiatric conditions, cerebrovascular disease, pre-existent cognitive impairment, type of surgery, peri-operative blood product transfusion, administration of risperidone, postoperative atrial fibrillation and mechanical ventilation time. Postoperative oxygen saturation and renal insufficiency were supported by a moderate level of evidence, and there is no evidence that gender, education, CPB duration, pre-existent cardiac disease or heart failure are risk factors.

Conclusion

Of many potential risk factors for delirium after cardiac surgery, for only 11 there is a strong or moderate level of evidence. These risk factors should be taken in consideration when designing future delirium prevention strategies trials or when controlling for confounding in future etiological studies.",2015-09-23 +26473631,Using ToxCast™ Data to Reconstruct Dynamic Cell State Trajectories and Estimate Toxicological Points of Departure.,"

Background

High-content imaging (HCI) allows simultaneous measurement of multiple cellular phenotypic changes and is an important tool for evaluating the biological activity of chemicals.

Objectives

Our goal was to analyze dynamic cellular changes using HCI to identify the ""tipping point"" at which the cells did not show recovery towards a normal phenotypic state.

Methods

HCI was used to evaluate the effects of 967 chemicals (in concentrations ranging from 0.4 to 200 μM) on HepG2 cells over a 72-hr exposure period. The HCI end points included p53, c-Jun, histone H2A.x, α-tubulin, histone H3, alpha tubulin, mitochondrial membrane potential, mitochondrial mass, cell cycle arrest, nuclear size, and cell number. A computational model was developed to interpret HCI responses as cell-state trajectories.

Results

Analysis of cell-state trajectories showed that 336 chemicals produced tipping points and that HepG2 cells were resilient to the effects of 334 chemicals up to the highest concentration (200 μM) and duration (72 hr) tested. Tipping points were identified as concentration-dependent transitions in system recovery, and the corresponding critical concentrations were generally between 5 and 15 times (25th and 75th percentiles, respectively) lower than the concentration that produced any significant effect on HepG2 cells. The remaining 297 chemicals require more data before they can be placed in either of these categories.

Conclusions

These findings show the utility of HCI data for reconstructing cell state trajectories and provide insight into the adaptation and resilience of in vitro cellular systems based on tipping points. Cellular tipping points could be used to define a point of departure for risk-based prioritization of environmental chemicals.

Citation

Shah I, Setzer RW, Jack J, Houck KA, Judson RS, Knudsen TB, Liu J, Martin MT, Reif DM, Richard AM, Thomas RS, Crofton KM, Dix DJ, Kavlock RJ. 2016. Using ToxCast™ data to reconstruct dynamic cell state trajectories and estimate toxicological points of departure. Environ Health Perspect 124:910-919; http://dx.doi.org/10.1289/ehp.1409029.",2015-10-16 +28193678,Aspirin Desensitization in Patients With Coronary Artery Disease: Results of the Multicenter ADAPTED Registry (Aspirin Desensitization in Patients With Coronary Artery Disease). ,"There are limited data on aspirin (ASA) desensitization for patients with coronary artery disease. The aim of the present study was to assess the safety and efficacy of a standard rapid desensitization protocol in patients with ASA sensitivity undergoing coronary angiography. This is a prospective, multicenter, observational study including 7 Italian centers including patients with a history of ASA sensitivity undergoing coronary angiography with intent to undergo percutaneous coronary intervention. A total of 330 patients with history of ASA sensitivity with known/suspected stable coronary artery disease or presenting with an acute coronary syndrome, including ST-segment-elevation myocardial infarction were enrolled. Adverse effects to aspirin included urticaria (n=177, 53.6%), angioedema (n=69, 20.9%), asthma (n=65, 19.7%), and anaphylactic reaction (n=19, 5.8%). Among patients with urticaria/angioedema, 13 patients (3.9%) had a history of idiopathic chronic urticaria. All patients underwent a rapid ASA (5.5 hours) desensitization procedure. The desensitization procedure was performed before cardiac catheterization in all patients, except for those (n=78, 23.6%) presenting with ST-segment-elevation myocardial infarction who underwent the desensitization after primary percutaneous coronary intervention. Percutaneous coronary intervention was performed in 235 patients (71%) of the overall study population. The desensitization procedure was successful in 315 patients (95.4%) and in all patients with a history of anaphylactic reaction. Among the 15 patients (4.6%) who did not successfully respond to the desensitization protocol, adverse reactions were minor and responded to treatment with corticosteroids and antihistamines. Among patients with successful in-hospital ASA desensitization, 253 patients (80.3%) continued ASA for at least 12 months. Discontinuation of ASA in the 62 patients (19.7%) who had responded to the desensitization protocol was because of medical decision and not because of hypersensitivity reactions. A standard rapid desensitization protocol is safe and effective across a broad spectrum of patients, irrespective of the type of aspirin sensitivity manifestation, with indications to undergo coronary angiography with intent to perform percutaneous coronary intervention. URL: http://www.clinicaltrials.gov. Unique identifier: NCT02848339.",2017-02-01 +27309738,"Training in High-Throughput Sequencing: Common Guidelines to Enable Material Sharing, Dissemination, and Reusability.","The advancement of high-throughput sequencing (HTS) technologies and the rapid development of numerous analysis algorithms and pipelines in this field has resulted in an unprecedentedly high demand for training scientists in HTS data analysis. Embarking on developing new training materials is challenging for many reasons. Trainers often do not have prior experience in preparing or delivering such materials and struggle to keep them up to date. A repository of curated HTS training materials would support trainers in materials preparation, reduce the duplication of effort by increasing the usage of existing materials, and allow for the sharing of teaching experience among the HTS trainers' community. To achieve this, we have developed a strategy for materials' curation and dissemination. Standards for describing training materials have been proposed and applied to the curation of existing materials. A Git repository has been set up for sharing annotated materials that can now be reused, modified, or incorporated into new courses. This repository uses Git; hence, it is decentralized and self-managed by the community and can be forked/built-upon by all users. The repository is accessible at http://bioinformatics.upsc.se/htmr.",2016-06-16 +27930499,Prevalence and Risk Factors of Prolonged Corrected QT Interval Among Children and Adolescents Treated With Antipsychotic Medications: A Long-Term Follow-Up in a Real-World Population.,"

Purpose

This study aimed to describe the prevalence of corrected QT (QTc) interval disorders and the possible predisposing factors in children and adolescents treated with antipsychotic (AP) medications in a real-world population with a long-term follow-up.

Methods

Data were obtained from the SafEty of NeurolepTics in Infancy and Adolescence (SENTIA) registry (https://sentia.es). The SENTIA includes patients younger than 18 years who are currently taking or initiating treatment with AP medications and have agreed to participate in the registry. The SENTIA's follow-up includes an electrocardiogram (ECG) assessment before starting treatment and at 1, 3, and 6 months after treatment initiation or after any changes in the patient's AP medication treatment. Thereafter, all participants undergo an ECG every 6 months. A QTc interval more than 450 milliseconds, increases in QTc interval of 60 milliseconds or more, or QTc dispersion more than 100 milliseconds were considered abnormal.

Results

Since January 1, 2011, 101 patients have been enrolled in SENTIA and have had at least 1 ECG assessment. The mean age at inclusion was 11.5 years; 75% of the patients were men. The mean follow-up time was 20.0 ± 15.1 months. The most frequently prescribed AP medications were risperidone (52.2%) and aripiprazole (45.5%). Seven patients (6.9%) had abnormal changes in QTc. No patient had a QTc interval more than 500 milliseconds. All patients were asymptomatic. The QTc changes were observed at different times of exposure, with a range of 1 to 39 months after beginning AP treatment. Concomitant use of attention deficit and hyperactivity disorder drugs seemed a possible factor associated with QTc disorders.

Conclusions

Patients should undergo a baseline ECG assessment before starting AP medication treatment, particularly patients with concomitant use of attention deficit and hyperactivity disorder drugs or a family/personal history of heart disease.",2017-02-01 +22668810,National survey of emergency departments in Denmark.,"

Objectives

Emergency departments (EDs) are the basic unit of emergency medicine, but often differ in fundamental features. We sought to describe and characterize EDs in Denmark.

Methods

All EDs open 24/7 to the general public were surveyed using the National ED Inventories survey instrument (http://www.emnet-nedi.org). ED staff were asked about ED characteristics with reference to the calendar year 2008.

Results

Twenty-eight EDs participated (82% response). All were located in hospitals. Less than half [43%, 95% confidence interval (CI) 24-63%] were independent departments. Thirty-nine percent (95% CI 22-59%) had a contiguous layout, with medical and surgical care provided in one area. The vast majority of EDs saw both adults and children; only 10% saw adults only and none saw children only. The median number of annual visits was 32 000 (interquartile range, 14 700-47 000). The majority (68%, 95% CI 47-89%) believed that their ED was at good balance or capacity, with 22% responding that they were under capacity and 9% reporting overcapacity. Technological resources were generally available, with the exception of dedicated computed tomography scanners and negative-pressure rooms. Almost all common emergencies were identified as being treatable 24/7 in the EDs.

Conclusion

Although there is some variation in their layout and characteristics, most Danish EDs have a high degree of resource availability and are able to treat common emergencies. As Denmark seeks to reform emergency care through ED consolidation, this national survey helps to establish a benchmark for future comparisons.",2013-06-01 +33456621,Pathway crosstalk effects: Shrinkage and disentanglement using a Bayesian hierarchical model.,"Identifying the biological pathways that are related to various clinical phenotypes is an important concern in biomedical research. Based on estimated expression levels and/or p-values, over-representation analysis (ORA) methods provide rankings of pathways, but they are tainted because pathways overlap. This crosstalk phenomenon has not been rigorously studied and classical ORA does not take into consideration: (i) that crosstalk effects in cases of overlapping pathways can cause incorrect rankings of pathways, (ii) that crosstalk effects can cause both excess type I errors and type II errors, (iii) that rankings of small pathways are unreliable and (iv) that type I error rates can be inflated due to multiple comparisons of pathways. We develop a Bayesian hierarchical model that addresses these problems, providing sensible estimates and rankings, and reducing error rates. We show, on both real and simulated data, that the results of our method are more accurate than the results produced by the classical over-representation analysis, providing a better understanding of the underlying biological phenomena involved in the phenotypes under study. The R code and the binary datasets for implementing the analyses described in this article are available online at: http://www.eng.wayne.edu/page.php?id=6402.",2016-07-26 +27568654,"EAU-ESTRO-SIOG Guidelines on Prostate Cancer. Part 1: Screening, Diagnosis, and Local Treatment with Curative Intent.","

Objective

To present a summary of the 2016 version of the European Association of Urology (EAU) - European Society for Radiotherapy & Oncology (ESTRO) - International Society of Geriatric Oncology (SIOG) Guidelines on screening, diagnosis, and local treatment with curative intent of clinically localised prostate cancer (PCa).

Evidence acquisition

The working panel performed a literature review of the new data (2013-2015). The guidelines were updated and the levels of evidence and/or grades of recommendation were added based on a systematic review of the evidence.

Evidence synthesis

BRCA2 mutations have been added as risk factors for early and aggressive disease. In addition to the Gleason score, the five-tier 2014 International Society of Urological Pathology grading system should now be provided. Systematic screening is still not recommended. Instead, an individual risk-adapted strategy following a detailed discussion and taking into account the patient's wishes and life expectancy must be considered. An early prostate-specific antigen test, the use of a risk calculator, or one of the promising biomarker tools are being investigated and might be able to limit the overdetection of insignificant PCa. Breaking the link between diagnosis and treatment may lower the overtreatment risk. Multiparametric magnetic resonance imaging using standardised reporting cannot replace systematic biopsy, but robustly nested within the diagnostic work-up, it has a key role in local staging. Active surveillance always needs to be discussed with very low-risk patients. The place of surgery in high-risk disease and the role of lymph node dissection have been clarified, as well as the management of node-positive patients. Radiation therapy using dose-escalated intensity-modulated technology is a key treatment modality with recent improvement in the outcome based on increased doses as well as combination with hormonal treatment. Moderate hypofractionation is safe and effective, but longer-term data are still lacking. Brachytherapy represents an effective way to increase the delivered dose. Focal therapy remains experimental while cryosurgery and HIFU are still lacking long-term convincing results.

Conclusions

The knowledge in the field of diagnosis, staging, and treatment of localised PCa is evolving rapidly. The 2016 EAU-ESTRO-SIOG Guidelines on PCa summarise the most recent findings and advice for the use in clinical practice. These are the first PCa guidelines endorsed by the European Society for Radiotherapy and Oncology and the International Society of Geriatric Oncology and reflect the multidisciplinary nature of PCa management. A full version is available from the EAU office and online (http://uroweb.org/guideline/prostate-cancer/).

Patient summary

The 2016 EAU-STRO-IOG Prostate Cancer (PCa) Guidelines present updated information on the diagnosis, and treatment of clinically localised prostate cancer. In Northern and Western Europe, the number of men diagnosed with PCa has been on the rise. This may be due to an increase in opportunistic screening, but other factors may also be involved (eg, diet, sexual behaviour, low exposure to ultraviolet radiation). We propose that men who are potential candidates for screening should be engaged in a discussion with their clinician (also involving their families and caregivers) so that an informed decision may be made as part of an individualised risk-adapted approach.",2016-08-25 +23592298,"iMole, a web based image retrieval system from biomedical literature.","iMole is a platform that automatically extracts images and captions from biomedical literature. Images are tagged with terms contained in figure captions by means of a sophisticate text-mining tool. Moreover, iMole allows the user to upload directly their own images within the database and manually tag images by curated dictionary. Using iMole the researchers can develop a proper biomedical image database, storing the images extracted from paper of interest, image found on the web repositories, and their own experimental images. In order to show the functioning of the platform, we used iMole to build a 2DE database. Briefly, tagged 2DE gel images were collected and stored in a searchable 2DE gel database, available to users through an interactive web interface. Images were obtained by automatically parsing 16,608 proteomic publications, which yielded more than 16,500 images. The database can be further expanded by users with images of interest trough a manual uploading process. iMole is available with a preloaded set of 2DE gel data at http://imole.biodigitalvalley.com.",2013-06-17 +27717304,"Je, a versatile suite to handle multiplexed NGS libraries with unique molecular identifiers.","

Background

The yield obtained from next generation sequencers has increased almost exponentially in recent years, making sample multiplexing common practice. While barcodes (known sequences of fixed length) primarily encode the sample identity of sequenced DNA fragments, barcodes made of random sequences (Unique Molecular Identifier or UMIs) are often used to distinguish between PCR duplicates and transcript abundance in, for example, single-cell RNA sequencing (scRNA-seq). In paired-end sequencing, different barcodes can be inserted at each fragment end to either increase the number of multiplexed samples in the library or to use one of the barcodes as UMI. Alternatively, UMIs can be combined with the sample barcodes into composite barcodes, or with standard Illumina® indexing. Subsequent analysis must take read duplicates and sample identity into account, by identifying UMIs.

Results

Existing tools do not support these complex barcoding configurations and custom code development is frequently required. Here, we present Je, a suite of tools that accommodates complex barcoding strategies, extracts UMIs and filters read duplicates taking UMIs into account. Using Je on publicly available scRNA-seq and iCLIP data containing UMIs, the number of unique reads increased by up to 36 %, compared to when UMIs are ignored.

Conclusions

Je is implemented in JAVA and uses the Picard API. Code, executables and documentation are freely available at http://gbcs.embl.de/Je . Je can also be easily installed in Galaxy through the Galaxy toolshed.",2016-10-08 +26704598,OfftargetFinder: a web tool for species-specific RNAi design.,

Motivation

RNA interference (RNAi) technology is being developed as a weapon for pest insect control. To maximize the specificity that such an approach affords we have developed a bioinformatic web tool that searches the ever-growing arthropod transcriptome databases so that pest-specific RNAi sequences can be identified. This will help technology developers finesse the design of RNAi sequences and suggests which non-target species should be assessed in the risk assessment process.

Availability and implementation

http://rnai.specifly.org

Contact

crobin@unimelb.edu.au.,2015-12-24 +26112292,damidseq_pipeline: an automated pipeline for processing DamID sequencing datasets.,"

Unlabelled

DamID is a powerful technique for identifying regions of the genome bound by a DNA-binding (or DNA-associated) protein. Currently, no method exists for automatically processing next-generation sequencing DamID (DamID-seq) data, and the use of DamID-seq datasets with normalization based on read-counts alone can lead to high background and the loss of bound signal. DamID-seq thus presents novel challenges in terms of normalization and background minimization. We describe here damidseq_pipeline, a software pipeline that performs automatic normalization and background reduction on multiple DamID-seq FASTQ datasets.

Availability and implementation

Open-source and freely available from http://owenjm.github.io/damidseq_pipeline. The damidseq_pipeline is implemented in Perl and is compatible with any Unix-based operating system (e.g. Linux, Mac OSX).

Contact

o.marshall@gurdon.cam.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-25 +26481351,"Expression Atlas update--an integrated database of gene and protein expression in humans, animals and plants.","Expression Atlas (http://www.ebi.ac.uk/gxa) provides information about gene and protein expression in animal and plant samples of different cell types, organism parts, developmental stages, diseases and other conditions. It consists of selected microarray and RNA-sequencing studies from ArrayExpress, which have been manually curated, annotated with ontology terms, checked for high quality and processed using standardised analysis methods. Since the last update, Atlas has grown seven-fold (1572 studies as of August 2015), and incorporates baseline expression profiles of tissues from Human Protein Atlas, GTEx and FANTOM5, and of cancer cell lines from ENCODE, CCLE and Genentech projects. Plant studies constitute a quarter of Atlas data. For genes of interest, the user can view baseline expression in tissues, and differential expression for biologically meaningful pairwise comparisons-estimated using consistent methodology across all of Atlas. Our first proteomics study in human tissues is now displayed alongside transcriptomics data in the same tissues. Novel analyses and visualisations include: 'enrichment' in each differential comparison of GO terms, Reactome, Plant Reactome pathways and InterPro domains; hierarchical clustering (by baseline expression) of most variable genes and experimental conditions; and, for a given gene-condition, distribution of baseline expression across biological replicates.",2015-10-19 +26600239,SINCERA: A Pipeline for Single-Cell RNA-Seq Profiling Analysis.,"A major challenge in developmental biology is to understand the genetic and cellular processes/programs driving organ formation and differentiation of the diverse cell types that comprise the embryo. While recent studies using single cell transcriptome analysis illustrate the power to measure and understand cellular heterogeneity in complex biological systems, processing large amounts of RNA-seq data from heterogeneous cell populations creates the need for readily accessible tools for the analysis of single-cell RNA-seq (scRNA-seq) profiles. The present study presents a generally applicable analytic pipeline (SINCERA: a computational pipeline for SINgle CEll RNA-seq profiling Analysis) for processing scRNA-seq data from a whole organ or sorted cells. The pipeline supports the analysis for: 1) the distinction and identification of major cell types; 2) the identification of cell type specific gene signatures; and 3) the determination of driving forces of given cell types. We applied this pipeline to the RNA-seq analysis of single cells isolated from embryonic mouse lung at E16.5. Through the pipeline analysis, we distinguished major cell types of fetal mouse lung, including epithelial, endothelial, smooth muscle, pericyte, and fibroblast-like cell types, and identified cell type specific gene signatures, bioprocesses, and key regulators. SINCERA is implemented in R, licensed under the GNU General Public License v3, and freely available from CCHMC PBGE website, https://research.cchmc.org/pbge/sincera.html.",2015-11-24 +26703557,Extended Functional Groups (EFG): An Efficient Set for Chemical Characterization and Structure-Activity Relationship Studies of Chemical Compounds.,"The article describes a classification system termed ""extended functional groups"" (EFG), which are an extension of a set previously used by the CheckMol software, that covers in addition heterocyclic compound classes and periodic table groups. The functional groups are defined as SMARTS patterns and are available as part of the ToxAlerts tool (http://ochem.eu/alerts) of the On-line CHEmical database and Modeling (OCHEM) environment platform. The article describes the motivation and the main ideas behind this extension and demonstrates that EFG can be efficiently used to develop and interpret structure-activity relationship models.",2015-12-23 +24393765,NCBI disease corpus: a resource for disease name recognition and concept normalization.,"Information encoded in natural language in biomedical literature publications is only useful if efficient and reliable ways of accessing and analyzing that information are available. Natural language processing and text mining tools are therefore essential for extracting valuable information, however, the development of powerful, highly effective tools to automatically detect central biomedical concepts such as diseases is conditional on the availability of annotated corpora. This paper presents the disease name and concept annotations of the NCBI disease corpus, a collection of 793 PubMed abstracts fully annotated at the mention and concept level to serve as a research resource for the biomedical natural language processing community. Each PubMed abstract was manually annotated by two annotators with disease mentions and their corresponding concepts in Medical Subject Headings (MeSH®) or Online Mendelian Inheritance in Man (OMIM®). Manual curation was performed using PubTator, which allowed the use of pre-annotations as a pre-step to manual annotations. Fourteen annotators were randomly paired and differing annotations were discussed for reaching a consensus in two annotation phases. In this setting, a high inter-annotator agreement was observed. Finally, all results were checked against annotations of the rest of the corpus to assure corpus-wide consistency. The public release of the NCBI disease corpus contains 6892 disease mentions, which are mapped to 790 unique disease concepts. Of these, 88% link to a MeSH identifier, while the rest contain an OMIM identifier. We were able to link 91% of the mentions to a single disease concept, while the rest are described as a combination of concepts. In order to help researchers use the corpus to design and test disease identification methods, we have prepared the corpus as training, testing and development sets. To demonstrate its utility, we conducted a benchmarking experiment where we compared three different knowledge-based disease normalization methods with a best performance in F-measure of 63.7%. These results show that the NCBI disease corpus has the potential to significantly improve the state-of-the-art in disease name recognition and normalization research, by providing a high-quality gold standard thus enabling the development of machine-learning based approaches for such tasks. The NCBI disease corpus, guidelines and other associated resources are available at: http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/DISEASE/.",2014-01-03 +22114206,Gene expression databases for kidney epithelial cells.,"The 21st century has seen an explosion of new high-throughput data from transcriptomic and proteomic studies. These data are highly relevant to the design and interpretation of modern physiological studies but are not always readily accessible to potential users in user-friendly, searchable formats. Data from our own studies involving transcriptomic and proteomic profiling of renal tubule epithelia have been made available on a variety of online databases. Here, we provide a roadmap to these databases and illustrate how they may be useful in the design and interpretation of physiological studies. The databases can be accessed through http://helixweb.nih.gov/ESBL/Database.",2011-11-23 +26108529,Hyperscape: visualization for complex biological networks.,"

Motivation

Network biology has emerged as a powerful tool to uncover the organizational properties of living systems through the application of graph theoretic approaches. However, due to limitations in underlying data models and visualization software, knowledge relating to large molecular assemblies and biologically active fragments is poorly represented.

Results

Here, we demonstrate a novel hypergraph implementation that better captures hierarchical structures, using components of elastic fibers and chromatin modification as models. These reveal unprecedented views of the biology of these systems, demonstrating the unique capacity of hypergraphs to resolve overlaps and uncover new insights into the subfunctionalization of variant complexes.

Availability and implementation

Hyperscape is available as a web application at http://www.compsysbio.org/hyperscape. Source code, examples and a tutorial are freely available under a GNU license.

Contacts

john.parkinson@utoronto.ca or graham.cromar@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-24 +24647393,Dietary intake of vitamin K is inversely associated with mortality risk.,"Vitamin K has been related to cardiovascular disease and cancer risk. However, data on total mortality are scarce. The aim of the present study was to assess the association between the dietary intake of different types of vitamin K and mortality in a Mediterranean population at high cardiovascular disease risk. A prospective cohort analysis was conducted in 7216 participants from the PREDIMED (Prevención con Dieta Mediterránea) study (median follow-up of 4.8 y). Energy and nutrient intakes were evaluated using a validated 137-item food frequency questionnaire. Dietary vitamin K intake was calculated annually using the USDA food composition database and other published sources. Deaths were ascertained by an end-point adjudication committee unaware of the dietary habits of participants after they had reviewed medical records and linked up to the National Death Index. Cox proportional hazard models were fitted to assess the RR of mortality. Energy-adjusted baseline dietary phylloquinone intake was inversely associated with a significantly reduced risk of cancer and all-cause mortality after controlling for potential confounders (HR: 0.54; 95% CI: 0.30, 0.96; and HR: 0.64; 95% CI: 0.45, 0.90, respectively). In longitudinal assessments, individuals who increased their intake of phylloquinone or menaquinone during follow-up had a lower risk of cancer (HR: 0.64; 95% CI: 0.43, 0.95; and HR: 0.41; 95% CI: 0.26, 0.64, respectively) and all-cause mortality (HR: 0.57; 95% CI: 0.44, 0.73; and HR: 0.55; 95% CI: 0.42, 0.73, respectively) than individuals who decreased or did not change their intake. Also, individuals who increased their intake of dietary phylloquinone had a lower risk of cardiovascular mortality risk (HR: 0.52; 95% CI: 0.31, 0.86). However, no association between changes in menaquinone intake and cardiovascular mortality was observed (HR: 0.76; 95% CI: 0.44, 1.29). An increase in dietary intake of vitamin K is associated with a reduced risk of cardiovascular, cancer, or all-cause mortality in a Mediterranean population at high cardiovascular disease risk. This trial was registered at http://www.controlled-trials.com as ISRCTN35739639.",2014-03-19 +27587692,Simulated linear test applied to quantitative proteomics.,"

Motivation

Omics studies aim to find significant changes due to biological or functional perturbation. However, gene and protein expression profiling experiments contain inherent technical variation. In discovery proteomics studies where the number of samples is typically small, technical variation plays an important role because it contributes considerably to the observed variation. Previous methods place both technical and biological variations in tightly integrated mathematical models that are difficult to adapt for different technological platforms. Our aim is to derive a statistical framework that allows the inclusion of a wide range of technical variability.

Results

We introduce a new method called the simulated linear test, or the s-test, that is easy to implement and easy to adapt for different models of technical variation. It generates virtual data points from the observed values according to a pre-defined technical distribution and subsequently employs linear modeling for significance analysis. We demonstrate the flexibility of the proposed approach by deriving a new significance test for quantitative discovery proteomics for which missing values have been a major issue for traditional methods such as the t-test. We evaluate the result on two label-free (phospho) proteomics datasets based on ion-intensity quantitation.

Availability and implementation

Available at http://www.oncoproteomics.nl/software/stest.html

Contact

: t.pham@vumc.nl.",2016-09-01 +27472178,Contracts and Contracting: A Primer.,"

Purpose/objectives

The underlying guiding principles of case management services and practices of the Case Management Body of Knowledge include the following: ""Case managers must possess the education, skills, knowledge, competencies, and experiences needed to effectively render appropriate, safe, and quality services to clients/support systems"" and ""Case management services are offered according to the clients' benefits as stipulated in their health insurance plans (http://www.cmbodyofknowledge.com/content/case-management-knowledge-2). Fulfilling these principles requires that the case manager engage in negotiating and contract execution.This article explores the concepts of negotiation and some of the many ways case managers contribute to the contracting process.

Primary practice setting

Acute care hospitals, individual practice, managed care.

Findings and conclusions

Case managers can provide valuable information during the contracting process, in many settings. In the managed care arena, case management can help identify the types of services needed by the population the organization serves. The same understanding of data can assist during the payer contracting process in the acute care setting and ensure that the hospital is fairly reimbursed by third party payers. The independent practitioners will, undoubtedly, face the need to negotiate for themselves as well as their clients.

Implications for case management

The case manager, regardless of the setting, benefits from an understanding of the principles and processes associated with negotiation and contracting.",2016-09-01 +25711446,An analytical framework for optimizing variant discovery from personal genomes.,"The standardization and performance testing of analysis tools is a prerequisite to widespread adoption of genome-wide sequencing, particularly in the clinic. However, performance testing is currently complicated by the paucity of standards and comparison metrics, as well as by the heterogeneity in sequencing platforms, applications and protocols. Here we present the genome comparison and analytic testing (GCAT) platform to facilitate development of performance metrics and comparisons of analysis tools across these metrics. Performance is reported through interactive visualizations of benchmark and performance testing data, with support for data slicing and filtering. The platform is freely accessible at http://www.bioplanet.com/gcat.",2015-02-25 +27581337,ge-CRISPR - An integrated pipeline for the prediction and analysis of sgRNAs genome editing efficiency for CRISPR/Cas system.,"Genome editing by sgRNA a component of CRISPR/Cas system emerged as a preferred technology for genome editing in recent years. However, activity and stability of sgRNA in genome targeting is greatly influenced by its sequence features. In this endeavor, a few prediction tools have been developed to design effective sgRNAs but these methods have their own limitations. Therefore, we have developed ""ge-CRISPR"" using high throughput data for the prediction and analysis of sgRNAs genome editing efficiency. Predictive models were employed using SVM for developing pipeline-1 (classification) and pipeline-2 (regression) using 2090 and 4139 experimentally verified sgRNAs respectively from Homo sapiens, Mus musculus, Danio rerio and Xenopus tropicalis. During 10-fold cross validation we have achieved accuracy and Matthew's correlation coefficient of 87.70% and 0.75 for pipeline-1 on training dataset (T(1840)) while it performed equally well on independent dataset (V(250)). In pipeline-2 we attained Pearson correlation coefficient of 0.68 and 0.69 using best models on training (T(3169)) and independent dataset (V(520)) correspondingly. ge-CRISPR (http://bioinfo.imtech.res.in/manojk/gecrispr/) for a given genomic region will identify potent sgRNAs, their qualitative as well as quantitative efficiencies along with potential off-targets. It will be useful to scientific community engaged in CRISPR research and therapeutics development.",2016-09-01 +25886982,"GREGOR: evaluating global enrichment of trait-associated variants in epigenomic features using a systematic, data-driven approach.","

Motivation

The majority of variation identified by genome wide association studies falls in non-coding genomic regions and is hypothesized to impact regulatory elements that modulate gene expression. Here we present a statistically rigorous software tool GREGOR (Genomic Regulatory Elements and Gwas Overlap algoRithm) for evaluating enrichment of any set of genetic variants with any set of regulatory features. Using variants from five phenotypes, we describe a data-driven approach to determine the tissue and cell types most relevant to a trait of interest and to identify the subset of regulatory features likely impacted by these variants. Last, we experimentally evaluate six predicted functional variants at six lipid-associated loci and demonstrate significant evidence for allele-specific impact on expression levels. GREGOR systematically evaluates enrichment of genetic variation with the vast collection of regulatory data available to explore novel biological mechanisms of disease and guide us toward the functional variant at trait-associated loci.

Availability and implementation

GREGOR, including source code, documentation, examples, and executables, is available at http://genome.sph.umich.edu/wiki/GREGOR.

Contact

cristen@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-16 +26690544,CancerNet: a database for decoding multilevel molecular interactions across diverse cancer types.,"Protein-protein interactions (PPIs) and microRNA (miRNA)-target interactions are important for deciphering the mechanisms of tumorigenesis. However, current PPI databases do not support cancer-specific analysis. Also, no available databases can be used to retrieve cancer-associated miRNA-target interactions. As the pathogenesis of human cancers is affected by several miRNAs rather than a single miRNA, it is needed to uncover miRNA synergism in a systems level. Here for each cancer type, we constructed a miRNA-miRNA functionally synergistic network based on the functions of miRNA targets and their topological features in that cancer PPI network. And for the first time, we report the cancer-specific database CancerNet (http://bis.zju.edu.cn/CancerNet), which contains information about PPIs, miRNA-target interactions and functionally synergistic miRNA-miRNA pairs across 33 human cancer types. In addition, PPI information across 33 main normal tissues and cell types are included. Flexible query methods are allowed to retrieve cancer molecular interactions. Network viewer can be used to visualize interactions that users are interested in. Enrichment analysis tool was designed to detect significantly overrepresented Gene Ontology categories of miRNA targets. Thus, CancerNet serves as a comprehensive platform for assessing the roles of proteins and miRNAs, as well as their interactions across human cancers.",2015-12-21 +27153682,RIPPER: a framework for MS1 only metabolomics and proteomics label-free relative quantification.,"

Unlabelled

RIPPER is a framework for mass-spectrometry-based label-free relative quantification for proteomics and metabolomics studies. RIPPER combines a series of previously described algorithms for pre-processing, analyte quantification, retention time alignment, and analyte grouping across runs. It is also the first software framework to implement proximity-based intensity normalization. RIPPER produces lists of analyte signals with their unnormalized and normalized intensities that can serve as input to statistical and directed mass spectrometry (MS) methods for detecting quantitative differences between biological samples using MS.

Availability and implementation

http://www.z.umn.edu/ripper

Contact

vanr0014@umn.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-18 +27072648,"Recent Fast Food Consumption and Bisphenol A and Phthalates Exposures among the U.S. Population in NHANES, 2003-2010.","

Background

Phthalates and bisphenol A (BPA) are widely used industrial chemicals that may adversely impact human health. Human exposure is ubiquitous and can occur through diet, including consumption of processed or packaged food.

Objective

To examine associations between recent fast food intake and BPA and urinary metabolites of di(2-ethylhexyl) phthalate (ΣDEHPm) and diisononyl phthalate (DiNPm) among the U.S.

Population

Methods

We combined data on 8,877 participants from the National Health and Nutrition Examination Survey (NHANES 2003-2010). Using 24-hr dietary recall data, we quantified: a) fast food intake [percent of total energy intake (TEI) from fast food]; b) fast food-derived fat intake (percent of TEI from fat in fast food); and c) fast food intake by food group (dairy, eggs, grains, meat, and other). We examined associations between dietary exposures and urinary chemical concentrations using multivariate linear regression.

Results

We observed evidence of a positive, dose-response relationship between fast food intake and exposure to phthalates (p-trend < 0.0001) but not BPA; participants with high consumption (≥ 34.9% TEI from fast food) had 23.8% (95% CI: 11.9%, 36.9%) and 39.0% (95% CI: 21.9%, 58.5%) higher levels of ΣDEHPm and DiNPm, respectively, than nonconsumers. Fast food-derived fat intake was also positively associated with ΣDEHPm and DiNPm (p-trend < 0.0001). After adjusting for other food groups, ΣDEHPm was associated with grain and other intake, and DiNPm was associated with meat and grain intake.

Conclusion

Fast food may be a source of exposure to DEHP and DiNP. These results, if confirmed, could inform individual and regulatory exposure reduction strategies.

Citation

Zota AR, Phillips CA, Mitro SD. 2016. Recent fast food consumption and bisphenol A and phthalates exposures among the U.S. population in NHANES, 2003-2010. Environ Health Perspect 124:1521-1528; http://dx.doi.org/10.1289/ehp.1510803.",2016-04-13 +27582876,A biologically informed method for detecting rare variant associations.,"

Background

BioBin is a bioinformatics software package developed to automate the process of binning rare variants into groups for statistical association analysis using a biological knowledge-driven framework. BioBin collapses variants into biological features such as genes, pathways, evolutionary conserved regions (ECRs), protein families, regulatory regions, and others based on user-designated parameters. BioBin provides the infrastructure to create complex and interesting hypotheses in an automated fashion thereby circumventing the necessity for advanced and time consuming scripting.

Purpose of the study

In this manuscript, we describe the software package for BioBin, along with type I error and power simulations to demonstrate the strengths and various customizable features and analysis options of this variant binning tool.

Results

Simulation testing highlights the utility of BioBin as a fast, comprehensive and expandable tool for the biologically-inspired binning and analysis of low-frequency variants in sequence data.

Conclusions and potential implications

The BioBin software package has the capability to transform and streamline the analysis pipelines for researchers analyzing rare variants. This automated bioinformatics tool minimizes the manual effort of creating genomic regions for binning such that time can be spent on the much more interesting task of statistical analyses. This software package is open source and freely available from http://ritchielab.com/software/biobin-download.",2016-08-30 +26791367,HIV Whole-Genome Sequencing Now: Answering Still-Open Questions.,"Diversity, evolution, and epidemiology of HIV are directly relevant to HIV transmission and pathogenesis; hence, they play a key role in antiretroviral treatment and vaccine design. Global HIV whole-genome sequencing would provide a treasure chest of data to answer many questions still open in these fields. An article by Berg et al. in this issue of theJournal of Clinical Microbiologydescribes a universal strategy to amplify and sequence heterogeneous HIV whole genomes (M. G. Berg, J. Yamaguchi, E. Alessandri-Gradt, R. W. Tell, J.-C. Plantier, and C. A. Brennan, J Clin Microbiol 54:868-882, 2016,http://dx.doi.org/10.1128/JCM.02479-15).",2016-01-20 +22574683,"FINDSITE(X): a structure-based, small molecule virtual screening approach with application to all identified human GPCRs.","We have developed FINDSITE(X), an extension of FINDSITE, a protein threading based algorithm for the inference of protein binding sites, biochemical function and virtual ligand screening, that removes the limitation that holo protein structures (those containing bound ligands) of a sufficiently large set of distant evolutionarily related proteins to the target be solved; rather, predicted protein structures and experimental ligand binding information are employed. To provide the predicted protein structures, a fast and accurate version of our recently developed TASSER(VMT), TASSER(VMT)-lite, for template-based protein structural modeling applicable up to 1000 residues is developed and tested, with comparable performance to the top CASP9 servers. Then, a hybrid approach that combines structure alignments with an evolutionary similarity score for identifying functional relationships between target and proteins with binding data has been developed. By way of illustration, FINDSITE(X) is applied to 998 identified human G-protein coupled receptors (GPCRs). First, TASSER(VMT)-lite provides updates of all human GPCR structures previously modeled in our lab. We then use these structures and the new function similarity detection algorithm to screen all human GPCRs against the ZINC8 nonredundant (TC < 0.7) ligand set combined with ligands from the GLIDA database (a total of 88,949 compounds). Testing (excluding GPCRs whose sequence identity > 30% to the target from the binding data library) on a 168 human GPCR set with known binding data, the average enrichment factor in the top 1% of the compound library (EF(0.01)) is 22.7, whereas EF(0.01) by FINDSITE is 7.1. For virtual screening when just the target and its native ligands are excluded, the average EF(0.01) reaches 41.4. We also analyze off-target interactions for the 168 protein test set. All predicted structures, virtual screening data and off-target interactions for the 998 human GPCRs are available at http://cssb.biology.gatech.edu/skolnick/webservice/gpcr/index.html .",2012-05-21 +22698731,The insulin-like growth factor mutation database (IGFmdb).,"Insulin-like growth factors (IGF-I and IGF-II), and insulin are evolutionarily conserved hormonal regulators of eukaryotic growth and development. Through interactions with their cognate receptors, all three molecules can influence cellular growth, proliferation, differentiation, migration, and survival, as well as metabolic processes. As such, perturbations in signaling by IGFs and insulin are a well-documented cause of altered growth, development and survival during both embryonic and post-natal life. A key approach in understanding how IGFs and insulin elicit their biological effects has been through identifying structural features of the ligands that influence their receptor interactions. Over the years, the study of many hundreds of specifically engineered IGF and insulin analogues has provided a wealth of knowledge about how specific residues of these ligands contribute to ligand:receptor interactions. Some analogues have even provided the basis for designing therapeutic agents for the treatment of IGF and insulin-related diseases. As the list of IGF and insulin analogues continues to grow we find that, while many have been produced and studied, it would be of considerable value to have a central repository from which information about specific analogues and their receptor binding data were readily available in an easily searchable and comparable format. To address this, we have created the ""Insulin-like growth factor mutation database"" (IGFmdb). The IGFmdb is a web-based curated database of annotated ligand analogues and their receptor binding affinities that can be accessed via http://www.adelaide.edu.au/igfmutation. Currently the IGFmdb contains receptor-binding data for 67 IGF-II analogues that were publicly accessible prior to 2012, as well as 67 IGF-I analogues, including all of those produced and characterised in our laboratory. A small number of these are IGF species homologues. There are also 32 insulin analogues within IGFmdb that were reported within the included IGF analogue studies, representing only a small fraction of existing insulin mutants. Future developments of the IGFmdb will incorporate receptor-binding data for all publicly accessible IGF-I analogues and the data will be expanded to include IGF-binding protein (IGFBP) binding affinities.",2012-06-13 +22943976,[Information needs and internet use in patients with breast cancer in Spain].,"

Objective

To analyze information needs and search strategies among women with breast cancer in Spain. An additional aim was to explore how the internet, as a source of health information, influences the autonomy and active management of this disease among patients. The research was conducted in 2010 and 2011.

Method

This study forms part of a broader qualitative study that focuses on describing patients' experiences of breast cancer and the trajectory of the disease, with the aim of creating a platform of integrated information resources for patients, relatives and healthcare professionals (PyDEsalud: http://www.pydesalud.com). We carried out 41 in-depth, semi-structured interviews with breast cancer patients in different stage of the disease, who were aged between 32 and 69 years. The interviewees' were selected by intentional sampling, which included 15 Spanish regions. The field work was carried out from June to August, 2010. The interviews were recorded on videotape or audio. Based on patients' narratives of their disease, a thematic-inductive analysis was performed of the information gathered.

Results

The findings show the importance of the internet as a source of health information. Moreover, the internet is a resource that is able to promote the empowerment process among patients and, consequently, to aid improvement in disease management.

Conclusions

Users need access to web sites with high quality health information, adapted to their needs and objectives.",2012-09-01 +25184280,Cheburator software for automatically calculating drug inhibitory concentrations from in vitro screening assays.,"In the process of new cancer drug development, as the first step of their assessment, their activities are usually studied in vitro against a panel of cancer cell lines. The results of these in vitro drug screening assays are commonly expressed as inhibitory concentration 50% (IC50): the concentration of the tested agent that inhibits the proliferation of the cancer cell population to 50% of the theoretically possible effect (absolute IC50) or maximum effect practically achieved by the drug (relative IC50). The currently available software for calculating IC50 values requires manual data entry, is time consuming, and is prone to calculation errors. Thus, we have developed open source, free, easy-to-use software for performing standardized data evaluations and automatically calculating the IC50. This software eliminates the laborious and error-prone manual entry of data, substantially reduces the amount of time spent for data analysis. It has been extensively used in our department as the main tool for in vitro data processing during the past several years and can be useful for other research groups working in the area of anticancer drug discovery, either alone or combined with other software packages. The current version of our program, Cheburator, together with sample data, source code, and documentation, is freely available at the following URL: http://www.cheburator.nevozhay.com (it is free for academic use, but a license is required for commercial use).",2014-09-03 +27833741,The 1st Baltic Osseointegration Academy and Lithuanian University of Health Sciences Consensus Conference 2016. Summary and Consensus Statements: Group III - Peri-Implantitis Treatment.,"

Introduction

The task of Group 3 was to review and update the existing data concerning non-surgical, surgical non-regenerative and surgical regenerative treatment of peri-implantitis. Special interest was paid to the preventive and supporting therapy in case of peri-implantitis.

Material and methods

The main areas of interest were as follows: effect of smoking and history of periodontitis, prosthetic treatment mistakes, excess cement, overloading, general diseases influence on peri-implantitis development. The systematic review and/or meta-analysis were registered in PROSPERO, an international prospective register of systematic reviews: http://www.crd.york.ac.uk/PROSPERO/. The literature in the corresponding areas of interest was searched and reported using the PRISMA (Preferred Reporting Item for Systematic Review and Meta-Analysis) Statement: http://www.prisma-statement.org/. The method of preparation of systematic reviews of the literature based on comprehensive search strategies was discussed and standardized. The summary of the materials and methods employed by the authors in preparing the systematic review and/or meta-analysis is presented in Preface chapter.

Results

The results and conclusions of the review process are presented in the respective papers. The group's general commentaries, consensus statements, clinical recommendations and implications for research are presented in this article.",2016-07-01 +21531983,Integrating diverse databases into an unified analysis framework: a Galaxy approach.,"Recent technological advances have lead to the ability to generate large amounts of data for model and non-model organisms. Whereas, in the past, there have been a relatively small number of central repositories that serve genomic data, an increasing number of distinct specialized data repositories and resources have been established. Here, we describe a generic approach that provides for the integration of a diverse spectrum of data resources into a unified analysis framework, Galaxy (http://usegalaxy.org). This approach allows the simplified coupling of external data resources with the data analysis tools available to Galaxy users, while leveraging the native data mining facilities of the external data resources. DATABASE URL: http://usegalaxy.org.",2011-04-29 +22465851,Enhancing a Pathway-Genome Database (PGDB) to capture subcellular localization of metabolites and enzymes: the nucleotide-sugar biosynthetic pathways of Populus trichocarpa.,"Understanding how cellular metabolism works and is regulated requires that the underlying biochemical pathways be adequately represented and integrated with large metabolomic data sets to establish a robust network model. Genetically engineering energy crops to be less recalcitrant to saccharification requires detailed knowledge of plant polysaccharide structures and a thorough understanding of the metabolic pathways involved in forming and regulating cell-wall synthesis. Nucleotide-sugars are building blocks for synthesis of cell wall polysaccharides. The biosynthesis of nucleotide-sugars is catalyzed by a multitude of enzymes that reside in different subcellular organelles, and precise representation of these pathways requires accurate capture of this biological compartmentalization. The lack of simple localization cues in genomic sequence data and annotations however leads to missing compartmentalization information for eukaryotes in automatically generated databases, such as the Pathway-Genome Databases (PGDBs) of the SRI Pathway Tools software that drives much biochemical knowledge representation on the internet. In this report, we provide an informal mechanism using the existing Pathway Tools framework to integrate protein and metabolite sub-cellular localization data with the existing representation of the nucleotide-sugar metabolic pathways in a prototype PGDB for Populus trichocarpa. The enhanced pathway representations have been successfully used to map SNP abundance data to individual nucleotide-sugar biosynthetic genes in the PGDB. The manually curated pathway representations are more conducive to the construction of a computational platform that will allow the simulation of natural and engineered nucleotide-sugar precursor fluxes into specific recalcitrant polysaccharide(s). Database URL: The curated Populus PGDB is available in the BESC public portal at http://cricket.ornl.gov/cgi-bin/beocyc_home.cgi and the nucleotide-sugar biosynthetic pathways can be directly accessed at http://cricket.ornl.gov:1555/PTR/new-image?object=SUGAR-NUCLEOTIDES.",2012-03-31 +24198712,"Database of Vascular Plants of Canada (VASCAN): a community contributed taxonomic checklist of all vascular plants of Canada, Saint Pierre and Miquelon, and Greenland.","The Database of Vascular Plants of Canada or VASCAN (http://data.canadensys.net/vascan) is a comprehensive and curated checklist of all vascular plants reported in Canada, Greenland (Denmark), and Saint Pierre and Miquelon (France). VASCAN was developed at the Université de Montréal Biodiversity Centre and is maintained by a group of editors and contributors. For every core taxon in the checklist (species, subspecies, or variety), VASCAN provides the accepted scientific name, the accepted French and English vernacular names, and their synonyms/alternatives in Canada, as well as the distribution status (native, introduced, ephemeral, excluded, extirpated, doubtful or absent) of the plant for each province or territory, and the habit (tree, shrub, herb and/or vine) of the plant in Canada. For reported hybrids (nothotaxa or hybrid formulas) VASCAN also provides the hybrid parents, except if the parents of the hybrid do not occur in Canada. All taxa are linked to a classification. VASCAN refers to a source for all name, classification and distribution information. All data have been released to the public domain under a CC0 waiver and are available through Canadensys and the Global Biodiversity Information Facility (GBIF). VASCAN is a service to the scientific community and the general public, including administrations, companies, and non-governmental organizations.",2013-07-24 +23582740,LandCaRe DSS--an interactive decision support system for climate change impact assessment and the analysis of potential agricultural land use adaptation strategies.,"Decision support to develop viable climate change adaptation strategies for agriculture and regional land use management encompasses a wide range of options and issues. Up to now, only a few suitable tools and methods have existed for farmers and regional stakeholders that support the process of decision-making in this field. The interactive model-based spatial information and decision support system LandCaRe DSS attempts to close the existing methodical gap. This system supports interactive spatial scenario simulations, multi-ensemble and multi-model simulations at the regional scale, as well as the complex impact assessment of potential land use adaptation strategies at the local scale. The system is connected to a local geo-database and via the internet to a climate data server. LandCaRe DSS uses a multitude of scale-specific ecological impact models, which are linked in various ways. At the local scale (farm scale), biophysical models are directly coupled with a farm economy calculator. New or alternative simulation models can easily be added, thanks to the innovative architecture and design of the DSS. Scenario simulations can be conducted with a reasonable amount of effort. The interactive LandCaRe DSS prototype also offers a variety of data analysis and visualisation tools, a help system for users and a farmer information system for climate adaptation in agriculture. This paper presents the theoretical background, the conceptual framework, and the structure and methodology behind LandCaRe DSS. Scenario studies at the regional and local scale for the two Eastern German regions of Uckermark (dry lowlands, 2600 km(2)) and Weißeritz (humid mountain area, 400 km(2)) were conducted in close cooperation with stakeholders to test the functionality of the DSS prototype. The system is gradually being transformed into a web version (http://www.landcare-dss.de) to ensure the broadest possible distribution of LandCaRe DSS to the public. The system will be continuously developed, updated and used in different research projects and as a learning and knowledge-sharing tool for students. The main objective of LandCaRe DSS is to provide information on the complex long-term impacts of climate change and on potential management options for adaptation by answering ""what-if"" type questions.",2013-04-10 +26685285,A Systems Biology Approach Reveals Converging Molecular Mechanisms that Link Different POPs to Common Metabolic Diseases.,"

Background

A number of epidemiological studies have identified statistical associations between persistent organic pollutants (POPs) and metabolic diseases, but testable hypotheses regarding underlying molecular mechanisms to explain these linkages have not been published.

Objectives

We assessed the underlying mechanisms of POPs that have been associated with metabolic diseases; three well-known POPs [2,3,7,8-tetrachlorodibenzodioxin (TCDD), 2,2´,4,4´,5,5´-hexachlorobiphenyl (PCB 153), and 4,4´-dichlorodiphenyldichloroethylene (p,p´-DDE)] were studied. We used advanced database search tools to delineate testable hypotheses and to guide laboratory-based research studies into underlying mechanisms by which this POP mixture could produce or exacerbate metabolic diseases.

Methods

For our searches, we used proprietary systems biology software (MetaCore™/MetaDrug™) to conduct advanced search queries for the underlying interactions database, followed by directional network construction to identify common mechanisms for these POPs within two or fewer interaction steps downstream of their primary targets. These common downstream pathways belong to various cytokine and chemokine families with experimentally well-documented causal associations with type 2 diabetes.

Conclusions

Our systems biology approach allowed identification of converging pathways leading to activation of common downstream targets. To our knowledge, this is the first study to propose an integrated global set of step-by-step molecular mechanisms for a combination of three common POPs using a systems biology approach, which may link POP exposure to diseases. Experimental evaluation of the proposed pathways may lead to development of predictive biomarkers of the effects of POPs, which could translate into disease prevention and effective clinical treatment strategies.

Citation

Ruiz P, Perlina A, Mumtaz M, Fowler BA. 2016. A systems biology approach reveals converging molecular mechanisms that link different POPs to common metabolic diseases. Environ Health Perspect 124:1034-1041; http://dx.doi.org/10.1289/ehp.1510308.",2015-12-18 +24463183,G-BLASTN: accelerating nucleotide alignment by graphics processors.,"

Motivation

Since 1990, the basic local alignment search tool (BLAST) has become one of the most popular and fundamental bioinformatics tools for sequence similarity searching, receiving extensive attention from the research community. The two pioneering papers on BLAST have received over 96 000 citations. Given the huge population of BLAST users and the increasing size of sequence databases, an urgent topic of study is how to improve the speed. Recently, graphics processing units (GPUs) have been widely used as low-cost, high-performance computing platforms. The existing GPU-BLAST is a promising software tool that uses a GPU to accelerate protein sequence alignment. Unfortunately, there is still no GPU-accelerated software tool for BLAST-based nucleotide sequence alignment.

Results

We developed G-BLASTN, a GPU-accelerated nucleotide alignment tool based on the widely used NCBI-BLAST. G-BLASTN can produce exactly the same results as NCBI-BLAST, and it has very similar user commands. Compared with the sequential NCBI-BLAST, G-BLASTN can achieve an overall speedup of 14.80X under 'megablast' mode. More impressively, it achieves an overall speedup of 7.15X over the multithreaded NCBI-BLAST running on 4 CPU cores. When running under 'blastn' mode, the overall speedups are 4.32X (against 1-core) and 1.56X (against 4-core). G-BLASTN also supports a pipeline mode that further improves the overall performance by up to 44% when handling a batch of queries as a whole. Currently G-BLASTN is best optimized for databases with long sequences. We plan to optimize its performance on short database sequences in our future work.

Availability

http://www.comp.hkbu.edu.hk/∼chxw/software/G-BLASTN.html

Contact

chxw@comp.hkbu.edu.hk

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-01-24 +23106637,Metabolic analysis of the cutaneous fungi Malassezia globosa and M. restricta for insights on scalp condition and dandruff.,"Dandruff is a global consumer problem, characterized by flaking and scaling of the scalp, accompanied by itch and irritancy. However, the aetiology of the condition remains poorly understood, although there is a strong consensus that the cutaneous fungi Malassezia globosa and M. restricta are a major contributory factor. Although there is a paucity of understanding on how these commensal microorganisms adopt a pathogenic phenotype, a rich source of potential insights now exists in the shape of the recently published whole-genome sequence of M. globosa, a functional annotation and metabolic reconstruction of which is freely accessible via the integrated microbial genomes (IMG) online community resource (http://www.hmpdacc-resources.org/cgi-bin/imgm_hmp/main.cgi). In these studies, we have taken a combined in-silico and in-vitro approach to investigate aspects of lipid and amino acid metabolism by M. globosa and M. restricta that have the potential to impact on scalp condition and dandruff. The IMG platform was employed to analyse the metabolism of triacylglycerols and fatty acids, as well as the aromatic amino acid tryptophan, by M. globosa, to investigate pro-inflammatory pathways linked in the literature to dandruff and pityriasis versicolour, respectively. Results were equivocal, leaving question marks over the ability of M. globosa to fully degrade unsaturated fatty acids and metabolize tryptophan to indole-3-pyruvic acid. In-vitro assay systems were then developed to study the biotransformation of these metabolites by both M. globosa and M. restricta, as well as their effect on human keratinocytes, and the results here indicated that neither unsaturated fatty acids nor indole derivatives are likely to be major aetiological factors in dandruff.",2012-11-29 +22713124,Down-weighting overlapping genes improves gene set analysis.,"

Background

The identification of gene sets that are significantly impacted in a given condition based on microarray data is a crucial step in current life science research. Most gene set analysis methods treat genes equally, regardless how specific they are to a given gene set.

Results

In this work we propose a new gene set analysis method that computes a gene set score as the mean of absolute values of weighted moderated gene t-scores. The gene weights are designed to emphasize the genes appearing in few gene sets, versus genes that appear in many gene sets. We demonstrate the usefulness of the method when analyzing gene sets that correspond to the KEGG pathways, and hence we called our method Pathway Analysis with Down-weighting of Overlapping Genes (PADOG). Unlike most gene set analysis methods which are validated through the analysis of 2-3 data sets followed by a human interpretation of the results, the validation employed here uses 24 different data sets and a completely objective assessment scheme that makes minimal assumptions and eliminates the need for possibly biased human assessments of the analysis results.

Conclusions

PADOG significantly improves gene set ranking and boosts sensitivity of analysis using information already available in the gene expression profiles and the collection of gene sets to be analyzed. The advantages of PADOG over other existing approaches are shown to be stable to changes in the database of gene sets to be analyzed. PADOG was implemented as an R package available at: http://bioinformaticsprb.med.wayne.edu/PADOG/or http://www.bioconductor.org.",2012-06-19 +22151536,Empirical comparison of cross-platform normalization methods for gene expression data.,"

Background

Simultaneous measurement of gene expression on a genomic scale can be accomplished using microarray technology or by sequencing based methods. Researchers who perform high throughput gene expression assays often deposit their data in public databases, but heterogeneity of measurement platforms leads to challenges for the combination and comparison of data sets. Researchers wishing to perform cross platform normalization face two major obstacles. First, a choice must be made about which method or methods to employ. Nine are currently available, and no rigorous comparison exists. Second, software for the selected method must be obtained and incorporated into a data analysis workflow.

Results

Using two publicly available cross-platform testing data sets, cross-platform normalization methods are compared based on inter-platform concordance and on the consistency of gene lists obtained with transformed data. Scatter and ROC-like plots are produced and new statistics based on those plots are introduced to measure the effectiveness of each method. Bootstrapping is employed to obtain distributions for those statistics. The consistency of platform effects across studies is explored theoretically and with respect to the testing data sets.

Conclusions

Our comparisons indicate that four methods, DWD, EB, GQ, and XPN, are generally effective, while the remaining methods do not adequately correct for platform effects. Of the four successful methods, XPN generally shows the highest inter-platform concordance when treatment groups are equally sized, while DWD is most robust to differently sized treatment groups and consistently shows the smallest loss in gene detection. We provide an R package, CONOR, capable of performing the nine cross-platform normalization methods considered. The package can be downloaded at http://alborz.sdsu.edu/conor and is available from CRAN.",2011-12-07 +28462382,GATA4 Is Sufficient to Establish Jejunal Versus Ileal Identity in the Small Intestine.,"

Background & aims

Patterning of the small intestinal epithelium along its cephalocaudal axis establishes three functionally distinct regions: duodenum, jejunum, and ileum. Efficient nutrient assimilation and growth depend on the proper spatial patterning of specialized digestive and absorptive functions performed by duodenal, jejunal, and ileal enterocytes. When enterocyte function is disrupted by disease or injury, intestinal failure can occur. One approach to alleviate intestinal failure would be to restore lost enterocyte functions. The molecular mechanisms determining regionally defined enterocyte functions, however, are poorly delineated. We previously showed that GATA binding protein 4 (GATA4) is essential to define jejunal enterocytes. The goal of this study was to test the hypothesis that GATA4 is sufficient to confer jejunal identity within the intestinal epithelium.

Methods

To test this hypothesis, we generated a novel Gata4 conditional knock-in mouse line and expressed GATA4 in the ileum, where it is absent.

Results

We found that GATA4-expressing ileum lost ileal identity. The global gene expression profile of GATA4-expressing ileal epithelium aligned more closely with jejunum and duodenum rather than ileum. Focusing on jejunal vs ileal identity, we defined sets of jejunal and ileal genes likely to be regulated directly by GATA4 to suppress ileal identity and promote jejunal identity. Furthermore, our study implicates GATA4 as a transcriptional repressor of fibroblast growth factor 15 (Fgf15), which encodes an enterokine that has been implicated in an increasing number of human diseases.

Conclusions

Overall, this study refines our understanding of an important GATA4-dependent molecular mechanism to pattern the intestinal epithelium along its cephalocaudal axis by elaborating on GATA4's function as a crucial dominant molecular determinant of jejunal enterocyte identity. Microarray data from this study have been deposited into NCBI Gene Expression Omnibus (http://www.ncbi.nlm.nih.gov/geo) and are accessible through GEO series accession number GSE75870.",2017-01-24 +26776203,A BAYESIAN NONPARAMETRIC MODEL FOR RECONSTRUCTING TUMOR SUBCLONES BASED ON MUTATION PAIRS.,"We present a feature allocation model to reconstruct tumor subclones based on mutation pairs. The key innovation lies in the use of a pair of proximal single nucleotide variants (SNVs) for the subclone reconstruction as opposed to a single SNV. Using the categorical extension of the Indian buffet process (cIBP) we define the subclones as a vector of categorical matrices corresponding to a set of mutation pairs. Through Bayesian inference we report posterior probabilities of the number, genotypes and population frequencies of subclones in one or more tumor sample. We demonstrate the proposed methods using simulated and real-world data. A free software package is available at http://www.compgenome.org/pairclone.",2016-01-01 +26160885,Count ratio model reveals bias affecting NGS fold changes.,"Various biases affect high-throughput sequencing read counts. Contrary to the general assumption, we show that bias does not always cancel out when fold changes are computed and that bias affects more than 20% of genes that are called differentially regulated in RNA-seq experiments with drastic effects on subsequent biological interpretation. Here, we propose a novel approach to estimate fold changes. Our method is based on a probabilistic model that directly incorporates count ratios instead of read counts. It provides a theoretical foundation for pseudo-counts and can be used to estimate fold change credible intervals as well as normalization factors that outperform currently used normalization methods. We show that fold change estimates are significantly improved by our method by comparing RNA-seq derived fold changes to qPCR data from the MAQC/SEQC project as a reference and analyzing random barcoded sequencing data. Our software implementation is freely available from the project website http://www.bio.ifi.lmu.de/software/lfc.",2015-07-08 +26528177,"Cognitive training with and without additional physical activity in healthy older adults: cognitive effects, neurobiological mechanisms, and prediction of training success.","Data is inconsistent concerning the question whether cognitive-physical training (CPT) yields stronger cognitive gains than cognitive training (CT). Effects of additional counseling, neurobiological mechanisms, and predictors have scarcely been studied. Healthy older adults were trained with CT (n = 20), CPT (n = 25), or CPT with counseling (CPT+C; n = 23). Cognition, physical fitness, BDNF, IGF-1, and VEGF were assessed at pre- and post-test. No interaction effects were found except for one effect showing that CPT+C led to stronger gains in verbal fluency than CPT (p = 0.03). However, this superiority could not be assigned to additional physical training gains. Low baseline cognitive performance and BDNF, not carrying apoE4, gains in physical fitness and the moderation of gains in physical fitness × gains in BDNF predicted training success. Although all types of interventions seem successful to enhance cognition, our data do not support the hypotheses that CPT shows superior CT gains compared to CT or that CPT+C adds merit to CPT. However, as CPT leads to additional gains in physical fitness which in turn is known to have positive impact on cognition in the long-term, CPT seems more beneficial. Training success can partly be predicted by neuropsychological, neurobiological, and genetic parameters. Unique Identifier: WHO ICTRP (http://www.who.int/ictrp); ID: DRKS00005194.",2015-10-13 +26624790,SpinCouple: Development of a Web Tool for Analyzing Metabolite Mixtures via Two-Dimensional J-Resolved NMR Database.,"A new Web-based tool, SpinCouple, which is based on the accumulation of a two-dimensional (2D) (1)H-(1)H J-resolved NMR database from 598 metabolite standards, has been developed. The spectra include both J-coupling and (1)H chemical shift information; those are applicable to a wide array of spectral annotation, especially for metabolic mixture samples that are difficult to label through the attachment of (13)C isotopes. In addition, the user-friendly application includes an absolute-quantitative analysis tool. Good agreement was obtained between known concentrations of 20-metabolite mixtures versus the calibration curve-based quantification results obtained from 2D-Jres spectra. We have examined the web tool availability using nine series of biological extracts, obtained from animal gut and waste treatment microbiota, fish, and plant tissues. This web-based tool is publicly available via http://emar.riken.jp/spincpl.",2015-12-16 +27095192,Interactive tree of life (iTOL) v3: an online tool for the display and annotation of phylogenetic and other trees.,"Interactive Tree Of Life (http://itol.embl.de) is a web-based tool for the display, manipulation and annotation of phylogenetic trees. It is freely available and open to everyone. The current version was completely redesigned and rewritten, utilizing current web technologies for speedy and streamlined processing. Numerous new features were introduced and several new data types are now supported. Trees with up to 100,000 leaves can now be efficiently displayed. Full interactive control over precise positioning of various annotation features and an unlimited number of datasets allow the easy creation of complex tree visualizations. iTOL 3 is the first tool which supports direct visualization of the recently proposed phylogenetic placements format. Finally, iTOL's account system has been redesigned to simplify the management of trees in user-defined workspaces and projects, as it is heavily used and currently handles already more than 500,000 trees from more than 10,000 individual users.",2016-04-19 +26574340,"PyEMMA 2: A Software Package for Estimation, Validation, and Analysis of Markov Models.","Markov (state) models (MSMs) and related models of molecular kinetics have recently received a surge of interest as they can systematically reconcile simulation data from either a few long or many short simulations and allow us to analyze the essential metastable structures, thermodynamics, and kinetics of the molecular system under investigation. However, the estimation, validation, and analysis of such models is far from trivial and involves sophisticated and often numerically sensitive methods. In this work we present the open-source Python package PyEMMA ( http://pyemma.org ) that provides accurate and efficient algorithms for kinetic model construction. PyEMMA can read all common molecular dynamics data formats, helps in the selection of input features, provides easy access to dimension reduction algorithms such as principal component analysis (PCA) and time-lagged independent component analysis (TICA) and clustering algorithms such as k-means, and contains estimators for MSMs, hidden Markov models, and several other models. Systematic model validation and error calculation methods are provided. PyEMMA offers a wealth of analysis functions such that the user can conveniently compute molecular observables of interest. We have derived a systematic and accurate way to coarse-grain MSMs to few states and to illustrate the structures of the metastable states of the system. Plotting functions to produce a manuscript-ready presentation of the results are available. In this work, we demonstrate the features of the software and show new methodological concepts and results produced by PyEMMA.",2015-10-14 +27503225,RNAcommender: genome-wide recommendation of RNA-protein interactions.,"

Motivation

Information about RNA-protein interactions is a vital pre-requisite to tackle the dissection of RNA regulatory processes. Despite the recent advances of the experimental techniques, the currently available RNA interactome involves a small portion of the known RNA binding proteins. The importance of determining RNA-protein interactions, coupled with the scarcity of the available information, calls for in silico prediction of such interactions.

Results

We present RNAcommender, a recommender system capable of suggesting RNA targets to unexplored RNA binding proteins, by propagating the available interaction information taking into account the protein domain composition and the RNA predicted secondary structure. Our results show that RNAcommender is able to successfully suggest RNA interactors for RNA binding proteins using little or no interaction evidence. RNAcommender was tested on a large dataset of human RBP-RNA interactions, showing a good ranking performance (average AUC ROC of 0.75) and significant enrichment of correct recommendations for 75% of the tested RBPs. RNAcommender can be a valid tool to assist researchers in identifying potential interacting candidates for the majority of RBPs with uncharacterized binding preferences.

Availability and implementation

The software is freely available at http://rnacommender.disi.unitn.it CONTACT: gianluca.corrado@unitn.it or andrea.passerini@unitn.itSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-08 +27847486,Enhancing Executive Function and Neural Health in Bipolar Disorder through Reasoning Training.,"Cognitive deficits in executive function and memory among individuals with bipolar disorder (BD) are well-documented; however, only recently have efforts begun to address whether such cognitive deficits can be ameliorated through cognitive training. This pilot study examined the effects of a top-down, cognitive reasoning training program in adults with BD on both brain and cognitive measures. Twenty-seven participants (11 males, 16 females), aged 21-70 years old, completed the study. Participants completed neurocognitive testing and functional magnetic resonance imaging (fMRI) before and after training, consisting of 8 h (2 h/week) of training in small groups. The training delivered information processing strategies that were implemented and applicable to a variety of daily living contexts. Results indicated that participants showed significant gains in the primary outcome measure of complex abstraction, also referred to as gist reasoning, as well as in untrained domains of executive function and memory. We found a significant increase in resting cerebral blood flow (CBF) in left inferior frontal gyrus after cognitive training. We also found that resting CBF in the right frontal middle gyrus correlated positively with performance on the measure of complex abstraction. This feasibility study provides promising evidence that short-term reasoning training can enhance cognitive performance and brain health in adults with BD. These data motivate further efforts to explore adjuvant therapeutics to improve cognitive performance and underlying brain systems in bipolar, as well as other psychiatric disorders. Clinicaltrials.gov Identifier: NCT02843282, http://www.clinicaltrials.gov/ct2/show/NCT02843282.",2016-11-01 +27448381,SnapVideo: Personalized Video Generation for a Sightseeing Trip.,"Leisure tourism is an indispensable activity in urban people's life. Due to the popularity of intelligent mobile devices, a large number of photos and videos are recorded during a trip. Therefore, the ability to vividly and interestingly display these media data is a useful technique. In this paper, we propose SnapVideo, a new method that intelligently converts a personal album describing of a trip into a comprehensive, aesthetically pleasing, and coherent video clip. The proposed framework contains three main components. The scenic spot identification model first personalizes the video clips based on multiple prespecified audience classes. We then search for some auxiliary related videos from YouTube1 according to the selected photos. To comprehensively describe a scenery, the view generation module clusters the crawled video frames into a number of views. Finally, a probabilistic model is developed to fit the frames from multiple views into an aesthetically pleasing and coherent video clip, which optimally captures the semantics of a sightseeing trip. Extensive user studies demonstrated the competitiveness of our method from an aesthetic point of view. Moreover, quantitative analysis reflects that semantically important spots are well preserved in the final video clip.1https://www.youtube.com/.",2016-07-19 +25359890,Measuring the wisdom of the crowds in network-based gene function inference.,"

Motivation

Network-based gene function inference methods have proliferated in recent years, but measurable progress remains elusive. We wished to better explore performance trends by controlling data and algorithm implementation, with a particular focus on the performance of aggregate predictions.

Results

Hypothesizing that popular methods would perform well without hand-tuning, we used well-characterized algorithms to produce verifiably 'untweaked' results. We find that most state-of-the-art machine learning methods obtain 'gold standard' performance as measured in critical assessments in defined tasks. Across a broad range of tests, we see close alignment in algorithm performances after controlling for the underlying data being used. We find that algorithm aggregation provides only modest benefits, with a 17% increase in area under the ROC (AUROC) above the mean AUROC. In contrast, data aggregation gains are enormous with an 88% improvement in mean AUROC. Altogether, we find substantial evidence to support the view that additional algorithm development has little to offer for gene function prediction.

Availability and implementation

The supplementary information contains a description of the algorithms, the network data parsed from different biological data resources and a guide to the source code (available at: http://gillislab.cshl.edu/supplements/).",2014-10-29 +26666970,MycoCAP - Mycobacterium Comparative Analysis Platform.,"Mycobacterium spp. are renowned for being the causative agent of diseases like leprosy, Buruli ulcer and tuberculosis in human beings. With more and more mycobacterial genomes being sequenced, any knowledge generated from comparative genomic analysis would provide better insights into the biology, evolution, phylogeny and pathogenicity of this genus, thus helping in better management of diseases caused by Mycobacterium spp.With this motivation, we constructed MycoCAP, a new comparative analysis platform dedicated to the important genus Mycobacterium. This platform currently provides information of 2108 genome sequences of at least 55 Mycobacterium spp. A number of intuitive web-based tools have been integrated in MycoCAP particularly for comparative analysis including the PGC tool for comparison between two genomes, PathoProT for comparing the virulence genes among the Mycobacterium strains and the SuperClassification tool for the phylogenic classification of the Mycobacterium strains and a specialized classification system for strains of Mycobacterium abscessus. We hope the broad range of functions and easy-to-use tools provided in MycoCAP makes it an invaluable analysis platform to speed up the research discovery on mycobacteria for researchers. Database URL: http://mycobacterium.um.edu.my.",2015-12-15 +23462700,FindZebra: a search engine for rare diseases.,"

Background

The web has become a primary information resource about illnesses and treatments for both medical and non-medical users. Standard web search is by far the most common interface to this information. It is therefore of interest to find out how well web search engines work for diagnostic queries and what factors contribute to successes and failures. Among diseases, rare (or orphan) diseases represent an especially challenging and thus interesting class to diagnose as each is rare, diverse in symptoms and usually has scattered resources associated with it.

Methods

We design an evaluation approach for web search engines for rare disease diagnosis which includes 56 real life diagnostic cases, performance measures, information resources and guidelines for customising Google Search to this task. In addition, we introduce FindZebra, a specialized (vertical) rare disease search engine. FindZebra is powered by open source search technology and uses curated freely available online medical information.

Results

FindZebra outperforms Google Search in both default set-up and customised to the resources used by FindZebra. We extend FindZebra with specialized functionalities exploiting medical ontological information and UMLS medical concepts to demonstrate different ways of displaying the retrieved results to medical experts.

Conclusions

Our results indicate that a specialized search engine can improve the diagnostic quality without compromising the ease of use of the currently widely popular standard web search. The proposed evaluation approach can be valuable for future development and benchmarking. The FindZebra search engine is available at http://www.findzebra.com/.",2013-02-23 +26673716,The Pfam protein families database: towards a more sustainable future.,"In the last two years the Pfam database (http://pfam.xfam.org) has undergone a substantial reorganisation to reduce the effort involved in making a release, thereby permitting more frequent releases. Arguably the most significant of these changes is that Pfam is now primarily based on the UniProtKB reference proteomes, with the counts of matched sequences and species reported on the website restricted to this smaller set. Building families on reference proteomes sequences brings greater stability, which decreases the amount of manual curation required to maintain them. It also reduces the number of sequences displayed on the website, whilst still providing access to many important model organisms. Matches to the full UniProtKB database are, however, still available and Pfam annotations for individual UniProtKB sequences can still be retrieved. Some Pfam entries (1.6%) which have no matches to reference proteomes remain; we are working with UniProt to see if sequences from them can be incorporated into reference proteomes. Pfam-B, the automatically-generated supplement to Pfam, has been removed. The current release (Pfam 29.0) includes 16 295 entries and 559 clans. The facility to view the relationship between families within a clan has been improved by the introduction of a new tool.",2015-12-15 +21983993,The ProteoRed MIAPE web toolkit: a user-friendly framework to connect and share proteomics standards.,"The development of the HUPO-PSI's (Proteomics Standards Initiative) standard data formats and MIAPE (Minimum Information About a Proteomics Experiment) guidelines should improve proteomics data sharing within the scientific community. Proteomics journals have encouraged the use of these standards and guidelines to improve the quality of experimental reporting and ease the evaluation and publication of manuscripts. However, there is an evident lack of bioinformatics tools specifically designed to create and edit standard file formats and reports, or embed them within proteomics workflows. In this article, we describe a new web-based software suite (The ProteoRed MIAPE web toolkit) that performs several complementary roles related to proteomic data standards. First, it can verify that the reports fulfill the minimum information requirements of the corresponding MIAPE modules, highlighting inconsistencies or missing information. Second, the toolkit can convert several XML-based data standards directly into human readable MIAPE reports stored within the ProteoRed MIAPE repository. Finally, it can also perform the reverse operation, allowing users to export from MIAPE reports into XML files for computational processing, data sharing, or public database submission. The toolkit is thus the first application capable of automatically linking the PSI's MIAPE modules with the corresponding XML data exchange standards, enabling bidirectional conversions. This toolkit is freely available at http://www.proteored.org/MIAPE/.",2011-10-01 +26057674,"DATASW, a tool for HPLC-SAXS data analysis.","Small-angle X-ray scattering (SAXS) in solution is a common low-resolution method which can efficiently complement the high-resolution information obtained by crystallography or NMR. Sample monodispersity is key to reliable SAXS data interpretation and model building. Beamline setups with inline high-performance liquid chromatography (HPLC) are particularly useful for accurate profiling of heterogeneous samples. The program DATASW performs averaging of individual data frames from HPLC-SAXS experiments using a sliding window of a user-specified size, calculates overall parameters [I(0), Rg, Dmax and molecular weight] and predicts the folding state (folded/unfolded) of the sample. Applications of DATASW are illustrated for several proteins with various oligomerization behaviours recorded on different beamlines. DATASW binaries for major operating systems can be downloaded from http://datasw.sourceforge.net/.",2015-05-23 +27565432,G23D: Online tool for mapping and visualization of genomic variants on 3D protein structures.,"

Background

Evaluation of the possible implications of genomic variants is an increasingly important task in the current high throughput sequencing era. Structural information however is still not routinely exploited during this evaluation process. The main reasons can be attributed to the partial structural coverage of the human proteome and the lack of tools which conveniently convert genomic positions, which are the frequent output of genomic pipelines, to proteins and structure coordinates.

Results

We present G23D, a tool for conversion of human genomic coordinates to protein coordinates and protein structures. G23D allows mapping of genomic positions/variants on evolutionary related (and not only identical) protein three dimensional (3D) structures as well as on theoretical models. By doing so it significantly extends the space of variants for which structural insight is feasible. To facilitate interpretation of the variant consequence, pathogenic variants, functional sites and polymorphism sites are displayed on protein sequence and structure diagrams alongside the input variants. G23D also provides modeling of the mutant structure, analysis of intra-protein contacts and instant access to functional predictions and predictions of thermo-stability changes. G23D is available at http://www.sheba-cancer.org.il/G23D .

Conclusions

G23D extends the fraction of variants for which structural analysis is applicable and provides better and faster accessibility for structural data to biologists and geneticists who routinely work with genomic information.",2016-08-26 +26076722,REDEMPTION: reduced dimension ensemble modeling and parameter estimation.,"

Unlabelled

Here, we present REDEMPTION ( RE: duced D: imension E: nsemble M: odeling and P: arameter estima TION: ), a toolbox for parameter estimation and ensemble modeling of ordinary differential equations (ODEs) using time-series data. For models with more reactions than measured species, a common scenario in biological modeling, the parameter estimation is formulated as a nested optimization problem based on incremental parameter estimation strategy. REDEMPTION also includes a tool for the identification of an ensemble of parameter combinations that provide satisfactory goodness-of-fit to the data. The functionalities of REDEMPTION are accessible through a MATLAB user interface (UI), as well as through programming script. For computational speed-up, REDEMPTION provides a numerical parallelization option using MATLAB Parallel Computing toolbox.

Availability and implementation

REDEMPTION can be downloaded from http://www.cabsel.ethz.ch/tools/redemption.

Contact

rudi.gunawan@chem.ethz.ch.",2015-06-14 +25266226,Hybrid Bayesian-rank integration approach improves the predictive power of genomic dataset aggregation.,"

Motivation

Modern molecular technologies allow the collection of large amounts of high-throughput data on the functional attributes of genes. Often multiple technologies and study designs are used to address the same biological question such as which genes are overexpressed in a specific disease state. Consequently, there is considerable interest in methods that can integrate across datasets to present a unified set of predictions.

Results

An important aspect of data integration is being able to account for the fact that datasets may differ in how accurately they capture the biological signal of interest. While many methods to address this problem exist, they always rely either on dataset internal statistics, which reflect data structure and not necessarily biological relevance, or external gold standards, which may not always be available. We present a new rank aggregation method for data integration that requires neither external standards nor internal statistics but relies on Bayesian reasoning to assess dataset relevance. We demonstrate that our method outperforms established techniques and significantly improves the predictive power of rank-based aggregations. We show that our method, which does not require an external gold standard, provides reliable estimates of dataset relevance and allows the same set of data to be integrated differently depending on the specific signal of interest.

Availability

The method is implemented in R and is freely available at http://www.pitt.edu/~mchikina/BIRRA/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-29 +28243306,Canal Configuration of Mesiobuccal Roots in Permanent Maxillary First Molars in Iranian Population: A Systematic Review.,"

Objectives

It is essential for clinicians to have adequate knowledge about root canal configurations; although its morphology varies largely in different ethnicities and even in different individuals with the same ethnic background. The current study aims to review the root canal configurations of mesiobuccal roots of maxillary first molars in an Iranian population based on different epidemiological studies.

Materials and methods

A comprehensive search was conducted to retrieve articles related to root canal configuration and prevalence of each type of root canal based on Vertucci's classification for the mesiobuccal root of maxillary first molars. An electronic search was conducted in Medline, Scopus and Google Scholar from January 1984 to September 2015. The articles were evaluated and methods, population, number of teeth and percentage of each root canal type evaluated in each study were summarized in the data table. Websites such as http://www.magiran.com/ , http://health.barakatkns.com/journal-internal-list and www.sid.ir were used to search all related studies published in Persian.

Results

Totally, out of nine studies conducted on the Iranian populations in nine provinces of Iran and 798 teeth, the Vertucci's type I was the most common type (35.70%), followed by type II (30.37%), type IV (16.66%), type III (7.93%) and type V (2.61%).

Conclusions

From this review article, it is concluded that the root canal morphology of mesiobuccal roots of maxillary first molars in the Iranian population predominantly has more than one canal. Therefore, careful evaluation of radiographs and anatomy of the pulp chamber is essential in order to achieve a successful root canal therapy.",2016-11-01 +25503062,ViVar: a comprehensive platform for the analysis and visualization of structural genomic variation.,"Structural genomic variations play an important role in human disease and phenotypic diversity. With the rise of high-throughput sequencing tools, mate-pair/paired-end/single-read sequencing has become an important technique for the detection and exploration of structural variation. Several analysis tools exist to handle different parts and aspects of such sequencing based structural variation analyses pipelines. A comprehensive analysis platform to handle all steps, from processing the sequencing data, to the discovery and visualization of structural variants, is missing. The ViVar platform is built to handle the discovery of structural variants, from Depth Of Coverage analysis, aberrant read pair clustering to split read analysis. ViVar provides you with powerful visualization options, enables easy reporting of results and better usability and data management. The platform facilitates the processing, analysis and visualization, of structural variation based on massive parallel sequencing data, enabling the rapid identification of disease loci or genes. ViVar allows you to scale your analysis with your work load over multiple (cloud) servers, has user access control to keep your data safe and is easy expandable as analysis techniques advance. URL: https://www.cmgg.be/vivar/",2014-12-12 +27187205,PRESS: PRotEin S-Sulfenylation server.,"

Motivation

Transient S-sulfenylation of cysteine thiols mediated by reactive oxygen species plays a critical role in pathology, physiology and cell signaling. Therefore, discovery of new S-sulfenylated sites in proteins is of great importance towards understanding how protein function is regulated upon redox conditions.

Results

We developed PRESS (PRotEin S-Sulfenylation) web server, a server which can effectively predict the cysteine thiols of a protein that could undergo S-sulfenylation under redox conditions. We envisage that this server will boost and facilitate the discovery of new and currently unknown functions of proteins triggered upon redox conditions, signal regulation and transduction, thus uncovering the role of S-sulfenylation in human health and disease.

Availability and implementation

The PRESS web server is freely available at http://press-sulfenylation.cse.uoi.gr/

Contacts

agtzakos@gmail.com or gtzortzi@cs.uoi.gr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-13 +23481321,"First report on predictive chemometric modeling, 3D-toxicophore mapping and in silico screening of in vitro basal cytotoxicity of diverse organic chemicals.","Classification and regression based quantitative structure-toxicity relationship (QSTR) as well as toxicophore models were developed for the first time on basal cytotoxicity data (in vitro 3T3 neutral red uptake data) of a diverse series of chemicals (including drugs and environmental pollutants) collected from the ACuteTox database (http://www.acutetox.eu/). Statistically significant QSTR models were obtained using linear discriminant analysis (classification) and partial least squares (regression) methodologies. Generated toxicophore models showed four important features responsible for basal cytotoxicity: (i) two hydrophobic aliphatic groups (HYD Aliphatic), (ii) ring aromatic group (RA) and (iii) hydrogen bond donor (HBD). The most predictive hypothesis (Hypo 1) had a correlation coefficient of 0.932 for the training set, a low rms deviation of 1.105, and an acceptable cost difference of 62.8 bits, which represents a true correlation and a good predictivity. QSTR and toxicophore models were rigorously validated internally as well as externally along with the randomization test to nullify the possibilities of chance correlation. Our in silico models enable to identify the essential structural attributes and quantify the prime molecular pre-requisites which were chiefly responsible for in vitro basal cytotoxicity. The developed models were also implemented to screen basal cytotoxicity for huge number DrugBank database (http://www.drugbank.ca/) compounds.",2012-11-02 +26484149,Whole transcriptome RNA sequencing data from blood leukocytes derived from Parkinson's disease patients prior to and following deep brain stimulation treatment.,"Recent evidence demonstrates the power of RNA sequencing (RNA-Seq) for identifying valuable and urgently needed blood biomarkers and advancing both early and accurate detection of neurological diseases, and in particular Parkinson's disease (PD). RNA sequencing technology enables non-biased, high throughput, probe-independent inspection of expression data and high coverage and both quantification of global transcript levels as well as the detection of expressed exons and junctions given a sufficient sequencing depth (coverage). However, the analysis of sequencing data frequently presents a bottleneck. Tools for quantification of alternative splicing from sequenced libraries hardly exist at the present time, and methods that support multiple sequencing platforms are especially lacking. Here, we describe in details a whole RNA-Seq transcriptome dataset produced from PD patient's blood leukocytes. The samples were taken prior to, and following deep brain stimulation (DBS) treatment while being on stimulation and following 1 h of complete electrical stimulation cessation and from healthy control volunteers. We describe in detail the methodology applied for analyzing the RNA-Seq data including differential expression of long noncoding RNAs (lncRNAs). We also provide details of the corresponding analysis of in-depth splice isoform data from junction and exon reads, with the use of the software AltAnalyze. Both the RNA-Seq raw (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE42608) and analyzed data (https://www.synapse.org/#!Synapse:syn2805267) may be found valuable towards detection of novel blood biomarkers for PD.",2014-11-22 +22843981,Bayesian ontology querying for accurate and noise-tolerant semantic searches.,"

Motivation

Ontologies provide a structured representation of the concepts of a domain of knowledge as well as the relations between them. Attribute ontologies are used to describe the characteristics of the items of a domain, such as the functions of proteins or the signs and symptoms of disease, which opens the possibility of searching a database of items for the best match to a list of observed or desired attributes. However, naive search methods do not perform well on realistic data because of noise in the data, imprecision in typical queries and because individual items may not display all attributes of the category they belong to.

Results

We present a method for combining ontological analysis with Bayesian networks to deal with noise, imprecision and attribute frequencies and demonstrate an application of our method as a differential diagnostic support system for human genetics.

Availability

We provide an implementation for the algorithm and the benchmark at http://compbio.charite.de/boqa/.

Contact

Sebastian.Bauer@charite.de or Peter.Robinson@charite.de

Supplementary information

Supplementary Material for this article is available at Bioinformatics online.",2012-07-26 +27264959,Prediction of change in protein unfolding rates upon point mutations in two state proteins.,"Studies on protein unfolding rates are limited and challenging due to the complexity of unfolding mechanism and the larger dynamic range of the experimental data. Though attempts have been made to predict unfolding rates using protein sequence-structure information there is no available method for predicting the unfolding rates of proteins upon specific point mutations. In this work, we have systematically analyzed a set of 790 single mutants and developed a robust method for predicting protein unfolding rates upon mutations (Δlnku) in two-state proteins by combining amino acid properties and knowledge-based classification of mutants with multiple linear regression technique. We obtain a mean absolute error (MAE) of 0.79/s and a Pearson correlation coefficient (PCC) of 0.71 between predicted unfolding rates and experimental observations using jack-knife test. We have developed a web server for predicting protein unfolding rates upon mutation and it is freely available at https://www.iitm.ac.in/bioinfo/proteinunfolding/unfoldingrace.html. Prominent features that determine unfolding kinetics as well as plausible reasons for the observed outliers are also discussed.",2016-06-02 +25392413,The coffee genome hub: a resource for coffee genomes.,"The whole genome sequence of Coffea canephora, the perennial diploid species known as Robusta, has been recently released. In the context of the C. canephora genome sequencing project and to support post-genomics efforts, we developed the Coffee Genome Hub (http://coffee-genome.org/), an integrative genome information system that allows centralized access to genomics and genetics data and analysis tools to facilitate translational and applied research in coffee. We provide the complete genome sequence of C. canephora along with gene structure, gene product information, metabolism, gene families, transcriptomics, syntenic blocks, genetic markers and genetic maps. The hub relies on generic software (e.g. GMOD tools) for easy querying, visualizing and downloading research data. It includes a Genome Browser enhanced by a Community Annotation System, enabling the improvement of automatic gene annotation through an annotation editor. In addition, the hub aims at developing interoperability among other existing South Green tools managing coffee data (phylogenomics resources, SNPs) and/or supporting data analyses with the Galaxy workflow manager.",2014-11-11 +23618375,SNVDis: a proteome-wide analysis service for evaluating nsSNVs in protein functional sites and pathways.,"Amino acid changes due to non-synonymous variation are included as annotations for individual proteins in UniProtKB/Swiss-Prot and RefSeq which present biological data in a protein- or gene-centric fashion. Unfortunately, proteome-wide analysis of non-synonymous single-nucleotide variations (nsSNVs) is not easy to perform because information on nsSNVs and functionally important sites are not well integrated both within and between databases and their search engines. We have developed SNVDis that allows evaluation of proteome-wide nsSNV distribution in functional sites, domains and pathways. More specifically, we have integrated human-specific data from major variation databases (UniProtKB, dbSNP and COSMIC), comprehensive sequence feature annotation from UniProtKB, Pfam, RefSeq, Conserved Domain Database (CDD) and pathway information from Protein ANalysis THrough Evolutionary Relationships (PANTHER) and mapped all of them in a uniform and comprehensive way to the human reference proteome provided by UniProtKB/Swiss-Prot. Integrated information of active sites, pathways, binding sites, domains, which are extracted from a number of different sources, provides a detailed overview of how nsSNVs are distributed over the human proteome and pathways and how they intersect with functional sites of proteins. Additionally, it is possible to find out whether there is an over- or under-representation of nsSNVs in specific domains, pathways or user-defined protein lists. The underlying datasets are updated once every 3months. SNVDis is freely available at http://hive.biochemistry.gwu.edu/tool/snvdis.",2012-12-05 +22731501,GRACE and the development of an education and training curriculum.,"Antimicrobial resistance is a serious threat and compromises the management of infectious disease. This has particular significance in relation to infections of the respiratory tract, which are the lead cause of antibiotic prescribing. Education is fundamental to the correct use of antibiotics. A novel open access curriculum has been developed in the context of a European Union funded research project Genomics to combat Resistance against Antibiotics in Community-acquired lower respiratory tract infections in Europe (GRACE http://www.grace-lrti.org). The curriculum was developed in modular format and populated with clinical and scientific topics relevant to community-acquired lower respiratory tract infections. This curriculum informed the content of a series of postgraduate courses and workshops and permitted the creation of an open access e-Learning portal. A total of 153 presentations matching the topics within the curriculum together with slide material and handouts and 104 webcasts are available through the GRACE e-Learning portal, which is fully searchable using a 'mindmap' to navigate the contents. Metrics of access provided a means for assessing usage. The GRACE project has permitted the development of a unique on-line open access curriculum that comprehensively addresses the issues relevant to community-acquired lower respiratory tract infections and has provided a resource not only for personal learning, but also to support independent teaching activities such as lectures, workshops, seminars and course work.",2012-06-25 +24341590,A BAC based physical map and genome survey of the rice false smut fungus Villosiclava virens.,"

Background

Rice false smut caused by Villosiclava virens is a devastating fungal disease that spreads in major rice-growing regions throughout the world. However, the genomic information for this fungal pathogen is limited and the pathogenic mechanism of this disease is still not clear. To facilitate genetic, molecular and genomic studies of this fungal pathogen, we constructed the first BAC-based physical map and performed the first genome survey for this species.

Results

High molecular weight genomic DNA was isolated from young mycelia of the Villosiclava virens strain UV-8b and a high-quality, large-insert and deep-coverage Bacterial Artificial Chromosome (BAC) library was constructed with the restriction enzyme HindIII. The BAC library consisted of 5,760 clones, which covers 22.7-fold of the UV-8b genome, with an average insert size of 140 kb and an empty clone rate of lower than 1%. BAC fingerprinting generated successful fingerprints for 2,290 BAC clones. Using the fingerprints, a whole genome-wide BAC physical map was constructed that contained 194 contigs (2,035 clones) spanning 51.2 Mb in physical length. Bidirectional-end sequencing of 4,512 BAC clones generated 6,560 high quality BAC end sequences (BESs), with a total length of 3,030,658 bp, representing 8.54% of the genome sequence. Analysis of the BESs revealed general genome information, including 51.52% GC content, 22.51% repetitive sequences, 376.12/Mb simple sequence repeat (SSR) density and approximately 36.01% coding regions. Sequence comparisons to other available fungal genome sequences through BESs showed high similarities to Metarhizium anisopliae, Trichoderma reesei, Nectria haematococca and Cordyceps militaris, which were generally in agreement with the 18S rRNA gene analysis results.

Conclusion

This study provides the first BAC-based physical map and genome information for the important rice fungal pathogen Villosiclava virens. The BAC clones, physical map and genome information will serve as fundamental resources to accelerate the genetic, molecular and genomic studies of this pathogen, including positional cloning, comparative genomic analysis and whole genome sequencing. The BAC library and physical map have been opened to researchers as public genomic resources (http://gresource.hzau.edu.cn/resource/resource.html).",2013-12-16 +23368412,A computational approach for identifying microRNA-target interactions using high-throughput CLIP and PAR-CLIP sequencing.,"

Background

MicroRNAs (miRNAs) play a critical role in down-regulating gene expression. By coupling with Argonaute family proteins, miRNAs bind to target sites on mRNAs and employ translational repression. A large amount of miRNA-target interactions (MTIs) have been identified by the crosslinking and immunoprecipitation (CLIP) and the photoactivatable-ribonucleoside-enhanced CLIP (PAR-CLIP) along with the next-generation sequencing (NGS). PAR-CLIP shows high efficiency of RNA co-immunoprecipitation, but it also lead to T to C conversion in miRNA-RNA-protein crosslinking regions. This artificial error obviously reduces the mappability of reads. However, a specific tool to analyze CLIP and PAR-CLIP data that takes T to C conversion into account is still in need.

Results

We herein propose the first CLIP and PAR-CLIP sequencing analysis platform specifically for miRNA target analysis, namely miRTarCLIP. From scratch, it automatically removes adaptor sequences from raw reads, filters low quality reads, reverts C to T, aligns reads to 3'UTRs, scans for read clusters, identifies high confidence miRNA target sites, and provides annotations from external databases. With multi-threading techniques and our novel C to T reversion procedure, miRTarCLIP greatly reduces the running time comparing to conventional approaches. In addition, miRTarCLIP serves with a web-based interface to provide better user experiences in browsing and searching targets of interested miRNAs. To demonstrate the superior functionality of miRTarCLIP, we applied miRTarCLIP to two public available CLIP and PAR-CLIP sequencing datasets. miRTarCLIP not only shows comparable results to that of other existing tools in a much faster speed, but also reveals interesting features among these putative target sites. Specifically, we used miRTarCLIP to disclose that T to C conversion within position 1-7 and that within position 8-14 of miRNA target sites are significantly different (p value = 0.02), and even more significant when focusing on sites targeted by top 102 highly expressed miRNAs only (p value = 0.01). These results comply with previous findings and further suggest that combining miRNA expression and PAR-CLIP data can improve accuracy of the miRNA target prediction.

Conclusion

To sum up, we devised a systematic approach for mining miRNA-target sites from CLIP-seq and PAR-CLIP sequencing data, and integrated the workflow with a graphical web-based browser, which provides a user friendly interface and detailed annotations of MTIs. We also showed through real-life examples that miRTarCLIP is a powerful tool for understanding miRNAs. Our integrated tool can be accessed online freely at http://miRTarCLIP.mbc.nctu.edu.tw.",2013-01-21 +26449930,HapFlow: visualizing haplotypes in sequencing data.,"

Summary

HapFlow is a python application for visualizing haplotypes present in sequencing data. It identifies variant profiles present and reads and creates an abstract visual representation of these profiles to make haplotypes easier to identify.

Availability and implementation

HapFlow is freely available (under a GPL license) for download (for Mac OS X, Unix and Microsoft Windows) from github (http://mjsull.github.io/HapFlow).

Contact

apolking@usc.edu.au.",2015-10-07 +27334475,PEP_scaffolder: using (homologous) proteins to scaffold genomes.,"

Motivation

Recovering the gene structures is one of the important goals of genome assembly. In low-quality assemblies, and even some high-quality assemblies, certain gene regions are still incomplete; thus, novel scaffolding approaches are required to complete gene regions.

Results

We developed an efficient and fast genome scaffolding method called PEP_scaffolder, using proteins to scaffold genomes. The pipeline aims to recover protein-coding gene structures. We tested the method on human contigs; using human UniProt proteins as guides, the improvement on N50 size was 17% increase with an accuracy of ∼97%. PEP_scaffolder improved the proportion of fully covered proteins among all proteins, which was close to the proportion in the finished genome. The method provided a high accuracy of 91% using orthologs of distant species. Tested on simulated fly contigs, PEP_scaffolder outperformed other scaffolders, with the shortest running time and the highest accuracy.

Availability and implementation

The software is freely available at http://www.fishbrowser.org/software/PEP_scaffolder/ CONTACT: lijt@cafs.ac.cnSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-22 +24040164,Genome-wide discovery and information resource development of DNA polymorphisms in cassava.,"Cassava (Manihot esculenta Crantz) is an important crop that provides food security and income generation in many tropical countries, and is known for its adaptability to various environmental conditions. Its draft genome sequence and many expressed sequence tags are now publicly available, allowing the development of cassava polymorphism information. Here, we describe the genome-wide discovery of cassava DNA polymorphisms. Using the alignment of predicted transcribed sequences from the cassava draft genome sequence and ESTs from GenBank, we discovered 10,546 single-nucleotide polymorphisms and 647 insertions and deletions. To facilitate molecular marker development for cassava, we designed 9,316 PCR primer pairs to amplify the genomic region around each DNA polymorphism. Of the discovered SNPs, 62.7% occurred in protein-coding regions. Disease-resistance genes were found to have a significantly higher ratio of nonsynonymous-to-synonymous substitutions. We identified 24 read-through (changes of a stop codon to a coding codon) and 38 premature stop (changes of a coding codon to a stop codon) single-nucleotide polymorphisms, and found that the 5 gene ontology terms in biological process were significantly different in genes with read-through single-nucleotide polymorphisms compared with all cassava genes. All data on the discovered DNA polymorphisms were organized into the Cassava Online Archive database, which is available at http://cassava.psc.riken.jp/.",2013-09-11 +26427375,Inferring synthetic lethal interactions from mutual exclusivity of genetic events in cancer.,"

Background

Synthetic lethality (SL) refers to the genetic interaction between two or more genes where only their co-alteration (e.g. by mutations, amplifications or deletions) results in cell death. In recent years, SL has emerged as an attractive therapeutic strategy against cancer: by targeting the SL partners of altered genes in cancer cells, these cells can be selectively killed while sparing the normal cells. Consequently, a number of studies have attempted prediction of SL interactions in human, a majority by extrapolating SL interactions inferred through large-scale screens in model organisms. However, these predicted SL interactions either do not hold in human cells or do not include genes that are (frequently) altered in human cancers, and are therefore not attractive in the context of cancer therapy.

Results

Here, we develop a computational approach to infer SL interactions directly from frequently altered genes in human cancers. It is based on the observation that pairs of genes that are altered in a (significantly) mutually exclusive manner in cancers are likely to constitute lethal combinations. Using genomic copy-number and gene-expression data from four cancers, breast, prostate, ovarian and uterine (total 3980 samples) from The Cancer Genome Atlas, we identify 718 genes that are frequently amplified or upregulated, and are likely to be synthetic lethal with six key DNA-damage response (DDR) genes in these cancers. By comparing with published data on gene essentiality (~16000 genes) from ten DDR-deficient cancer cell lines, we show that our identified genes are enriched among the top quartile of essential genes in these cell lines, implying that our inferred genes are highly likely to be (synthetic) lethal upon knockdown in these cell lines. Among the inferred targets are tousled-like kinase 2 (TLK2) and the deubiquitinating enzyme ubiquitin-specific-processing protease 7 (USP7) whose overexpression correlates with poor survival in cancers.

Conclusion

Mutual exclusivity between frequently occurring genetic events identifies synthetic lethal combinations in cancers. These identified genes are essential in cell lines, and are potential candidates for targeted cancer therapy. Availability: http://bioinformatics.org.au/tools-data/underMutExSL",2015-10-01 +22127867,FlyBase 101--the basics of navigating FlyBase.,"FlyBase (http://flybase.org) is the leading database and web portal for genetic and genomic information on the fruit fly Drosophila melanogaster and related fly species. Whether you use the fruit fly as an experimental system or want to apply Drosophila biological knowledge to another field of study, FlyBase can help you successfully navigate the wealth of available Drosophila data. Here, we review the FlyBase web site with novice and less-experienced users of FlyBase in mind and point out recent developments stemming from the availability of genome-wide data from the modENCODE project. The first section of this paper explains the organization of the web site and describes the report pages available on FlyBase, focusing on the most popular, the Gene Report. The next section introduces some of the search tools available on FlyBase, in particular, our heavily used and recently redesigned search tool QuickSearch, found on the FlyBase homepage. The final section concerns genomic data, including recent modENCODE (http://www.modencode.org) data, available through our Genome Browser, GBrowse.",2011-11-29 +22140115,"IPAVS: Integrated Pathway Resources, Analysis and Visualization System.","Integrated Pathway Resources, Analysis and Visualization System (iPAVS) is an integrated biological pathway database designed to support pathway discovery in the fields of proteomics, transcriptomics, metabolomics and systems biology. The key goal of IPAVS is to provide biologists access to expert-curated pathways from experimental data belonging to specific biological contexts related to cell types, tissues, organs and diseases. IPAVS currently integrates over 500 human pathways (consisting of 24, 574 interactions) that include metabolic-, signaling- and disease-related pathways, drug-action pathways and several large process maps collated from other pathway resources. IPAVS web interface allows biologists to browse and search pathway resources and provides tools for data import, management, visualization and analysis to support the interpretation of biological data in light of cellular processes. Systems Biology Graphical Notations (SBGN) and Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway notations are used for the visual display of pathway information. The integrated datasets in IPAVS are made available in several standard data formats that can be downloaded. IPAVS is available at: http://ipavs.cidms.org.",2011-12-02 +26818118,Identifying micro-inversions using high-throughput sequencing reads.,"

Background

The identification of inversions of DNA segments shorter than read length (e.g., 100 bp), defined as micro-inversions (MIs), remains challenging for next-generation sequencing reads. It is acknowledged that MIs are important genomic variation and may play roles in causing genetic disease. However, current alignment methods are generally insensitive to detect MIs. Here we develop a novel tool, MID (Micro-Inversion Detector), to identify MIs in human genomes using next-generation sequencing reads.

Results

The algorithm of MID is designed based on a dynamic programming path-finding approach. What makes MID different from other variant detection tools is that MID can handle small MIs and multiple breakpoints within an unmapped read. Moreover, MID improves reliability in low coverage data by integrating multiple samples. Our evaluation demonstrated that MID outperforms Gustaf, which can currently detect inversions from 30 bp to 500 bp.

Conclusions

To our knowledge, MID is the first method that can efficiently and reliably identify MIs from unmapped short next-generation sequencing reads. MID is reliable on low coverage data, which is suitable for large-scale projects such as the 1000 Genomes Project (1KGP). MID identified previously unknown MIs from the 1KGP that overlap with genes and regulatory elements in the human genome. We also identified MIs in cancer cell lines from Cancer Cell Line Encyclopedia (CCLE). Therefore our tool is expected to be useful to improve the study of MIs as a type of genetic variant in the human genome. The source code can be downloaded from: http://cqb.pku.edu.cn/ZhuLab/MID .",2016-01-11 +25720129,GenePEN: analysis of network activity alterations in complex diseases via the pairwise elastic net.,"Complex diseases are often characterized by coordinated expression alterations of genes and proteins which are grouped together in a molecular network. Identifying such interconnected and jointly altered gene/protein groups from functional omics data and a given molecular interaction network is a key challenge in bioinformatics. We describe GenePEN, a penalized logistic regression approach for sample classification via convex optimization, using a newly designed Pairwise Elastic Net penalty that favors the selection of discriminative genes/proteins according to their connectedness in a molecular interaction graph. An efficient implementation of the method finds provably optimal solutions on high-dimensional omics data in a few seconds and is freely available at http://lcsb-portal.uni.lu/bioinformatics.",2015-04-01 +26050742,DT-Web: a web-based application for drug-target interaction and drug combination prediction through domain-tuned network-based inference.,"

Background

The identification of drug-target interactions (DTI) is a costly and time-consuming step in drug discovery and design. Computational methods capable of predicting reliable DTI play an important role in the field. Algorithms may aim to design new therapies based on a single approved drug or a combination of them. Recently, recommendation methods relying on network-based inference in connection with knowledge coming from the specific domain have been proposed.

Description

Here we propose a web-based interface to the DT-Hybrid algorithm, which applies a recommendation technique based on bipartite network projection implementing resources transfer within the network. This technique combined with domain-specific knowledge expressing drugs and targets similarity is used to compute recommendations for each drug. Our web interface allows the users: (i) to browse all the predictions inferred by the algorithm; (ii) to upload their custom data on which they wish to obtain a prediction through a DT-Hybrid based pipeline; (iii) to help in the early stages of drug combinations, repositioning, substitution, or resistance studies by finding drugs that can act simultaneously on multiple targets in a multi-pathway environment. Our system is periodically synchronized with DrugBank and updated accordingly. The website is free, open to all users, and available at http://alpha.dmi.unict.it/dtweb/.

Conclusions

Our web interface allows users to search and visualize information on drugs and targets eventually providing their own data to compute a list of predictions. The user can visualize information about the characteristics of each drug, a list of predicted and validated targets, associated enzymes and transporters. A table containing key information and GO classification allows the users to perform their own analysis on our data. A special interface for data submission allows the execution of a pipeline, based on DT-Hybrid, predicting new targets with the corresponding p-values expressing the reliability of each group of predictions. Finally, It is also possible to specify a list of genes tracking down all the drugs that may have an indirect influence on them based on a multi-drug, multi-target, multi-pathway analysis, which aims to discover drugs for future follow-up studies.",2015-06-01 +26678286,Precise genotyping and recombination detection of Enterovirus.,"Enteroviruses (EV) with different genotypes cause diverse infectious diseases in humans and mammals. A correct EV typing result is crucial for effective medical treatment and disease control; however, the emergence of novel viral strains has impaired the performance of available diagnostic tools. Here, we present a web-based tool, named EVIDENCE (EnteroVirus In DEep conception, http://symbiont.iis.sinica.edu.tw/evidence), for EV genotyping and recombination detection. We introduce the idea of using mixed-ranking scores to evaluate the fitness of prototypes based on relatedness and on the genome regions of interest. Using phylogenetic methods, the most possible genotype is determined based on the closest neighbor among the selected references. To detect possible recombination events, EVIDENCE calculates the sequence distance and phylogenetic relationship among sequences of all sliding windows scanning over the whole genome. Detected recombination events are plotted in an interactive figure for viewing of fine details. In addition, all EV sequences available in GenBank were collected and revised using the latest classification and nomenclature of EV in EVIDENCE. These sequences are built into the database and are retrieved in an indexed catalog, or can be searched for by keywords or by sequence similarity. EVIDENCE is the first web-based tool containing pipelines for genotyping and recombination detection, with updated, built-in, and complete reference sequences to improve sensitivity and specificity. The use of EVIDENCE can accelerate genotype identification, aiding clinical diagnosis and enhancing our understanding of EV evolution.",2015-12-09 +26677931,SCMMTP: identifying and characterizing membrane transport proteins using propensity scores of dipeptides.,"

Background

Identifying putative membrane transport proteins (MTPs) and understanding the transport mechanisms involved remain important challenges for the advancement of structural and functional genomics. However, the transporter characters are mainly acquired from MTP crystal structures which are hard to crystalize. Therefore, it is desirable to develop bioinformatics tools for the effective large-scale analysis of available sequences to identify novel transporters and characterize such transporters.

Results

This work proposes a novel method (SCMMTP) based on the scoring card method (SCM) using dipeptide composition to identify and characterize MTPs from an existing dataset containing 900 MTPs and 660 non-MTPs which are separated into a training dataset consisting 1,380 proteins and an independent dataset consisting 180 proteins. The SCMMTP produced estimating propensity scores for amino acids and dipeptides as MTPs. The SCMMTP training and test accuracy levels respectively reached 83.81% and 76.11%. The test accuracy of support vector machine (SVM) using a complicated classification method with a low possibility for biological interpretation and position-specific substitution matrix (PSSM) as a protein feature is 80.56%, thus SCMMTP is comparable to SVM-PSSM. To identify MTPs, SCMMTP is applied to three datasets including: 1) human transmembrane proteins, 2) a photosynthetic protein dataset, and 3) a human protein database. MTPs showing α-helix rich structure is agreed with previous studies. The MTPs used residues with low hydration energy. It is hypothesized that, after filtering substrates, the hydrated water molecules need to be released from the pore regions.

Conclusions

SCMMTP yields estimating propensity scores for amino acids and dipeptides as MTPs, which can be used to identify novel MTPs and characterize transport mechanisms for use in further experiments.

Availability

http://iclab.life.nctu.edu.tw/iclab_webtools/SCMMTP/.",2015-12-09 +25055743,Novel non-parametric models to estimate evolutionary rates and divergence times from heterochronous sequence data.,"

Background

Early methods for estimating divergence times from gene sequence data relied on the assumption of a molecular clock. More sophisticated methods were created to model rate variation and used auto-correlation of rates, local clocks, or the so called ""uncorrelated relaxed clock"" where substitution rates are assumed to be drawn from a parametric distribution. In the case of Bayesian inference methods the impact of the prior on branching times is not clearly understood, and if the amount of data is limited the posterior could be strongly influenced by the prior.

Results

We develop a maximum likelihood method--Physher--that uses local or discrete clocks to estimate evolutionary rates and divergence times from heterochronous sequence data. Using two empirical data sets we show that our discrete clock estimates are similar to those obtained by other methods, and that Physher outperformed some methods in the estimation of the root age of an influenza virus data set. A simulation analysis suggests that Physher can outperform a Bayesian method when the real topology contains two long branches below the root node, even when evolution is strongly clock-like.

Conclusions

These results suggest it is advisable to use a variety of methods to estimate evolutionary rates and divergence times from heterochronous sequence data. Physher and the associated data sets used here are available online at http://code.google.com/p/physher/.",2014-07-24 +26680271,Prediction of linear B-cell epitopes of hepatitis C virus for vaccine development.,"

Background

High genetic heterogeneity in the hepatitis C virus (HCV) is the major challenge of the development of an effective vaccine. Existing studies for developing HCV vaccines have mainly focused on T-cell immune response. However, identification of linear B-cell epitopes that can stimulate B-cell response is one of the major tasks of peptide-based vaccine development. Owing to the variability in B-cell epitope length, the prediction of B-cell epitopes is much more complex than that of T-cell epitopes. Furthermore, the motifs of linear B-cell epitopes in different pathogens are quite different (e. g. HCV and hepatitis B virus). To cope with this challenge, this work aims to propose an HCV-customized sequence-based prediction method to identify B-cell epitopes of HCV.

Results

This work establishes an experimentally verified dataset comprising the B-cell response of HCV dataset consisting of 774 linear B-cell epitopes and 774 non B-cell epitopes from the Immune Epitope Database. An interpretable rule mining system of B-cell epitopes (IRMS-BE) is proposed to select informative physicochemical properties (PCPs) and then extracts several if-then rule-based knowledge for identifying B-cell epitopes. A web server Bcell-HCV was implemented using an SVM with the 34 informative PCPs, which achieved a training accuracy of 79.7% and test accuracy of 70.7% better than the SVM-based methods for identifying B-cell epitopes of HCV and the two general-purpose methods. This work performs advanced analysis of the 34 informative properties, and the results indicate that the most effective property is the alpha-helix structure of epitopes, which influences the connection between host cells and the E2 proteins of HCV. Furthermore, 12 interpretable rules are acquired from top-five PCPs and achieve a sensitivity of 75.6% and specificity of 71.3%. Finally, a conserved promising vaccine candidate, PDREMVLYQE, is identified for inclusion in a vaccine against HCV.

Conclusions

This work proposes an interpretable rule mining system IRMS-BE for extracting interpretable rules using informative physicochemical properties and a web server Bcell-HCV for predicting linear B-cell epitopes of HCV. IRMS-BE may also apply to predict B-cell epitopes for other viruses, which benefits the improvement of vaccines development of these viruses without significant modification. Bcell-HCV is useful for identifying B-cell epitopes of HCV antigen to help vaccine development, which is available at http://e045.life.nctu.edu.tw/BcellHCV.",2015-12-09 +26656949,An updated version of NPIDB includes new classifications of DNA-protein complexes and their families.,"The recent upgrade of nucleic acid-protein interaction database (NPIDB, http://npidb.belozersky.msu.ru/) includes a newly elaborated classification of complexes of protein domains with double-stranded DNA and a classification of families of related complexes. Our classifications are based on contacting structural elements of both DNA: the major groove, the minor groove and the backbone; and protein: helices, beta-strands and unstructured segments. We took into account both hydrogen bonds and hydrophobic interaction. The analyzed material contains 1942 structures of protein domains from 748 PDB entries. We have identified 97 interaction modes of individual protein domain-DNA complexes and 17 DNA-protein interaction classes of protein domain families. We analyzed the sources of diversity of DNA-protein interaction modes in different complexes of one protein domain family. The observed interaction mode is sometimes influenced by artifacts of crystallization or diversity in secondary structure assignment. The interaction classes of domain families are more stable and thus possess more biological sense than a classification of single complexes. Integration of the classification into NPIDB allows the user to browse the database according to the interacting structural elements of DNA and protein molecules. For each family, we present average DNA shape parameters in contact zones with domains of the family.",2015-12-09 +27665746,The motility of esophageal sphincters during liquid and solid bolus swallows: a multicenter normative value study of high-resolution manometry in China. ,"It is gradually accepted that solid bolus swallow needs to be added to the procedure of manometry. The motility differences in the upper esophageal sphincter (UES) and lower esophageal sphincter (LES) were not well described. Sierra Scientific Instruments solid-state high-resolution manometry (HRM) system, the most popular HRM system in China, lacks the Chinese normative values for both liquid and solid bolus swallow parameters. The esophageal HRM data of 88 healthy volunteers were analyzed. The parameters of both sphincters in resting stage were summarized and those during solid and liquid swallows were compared. Normative HRM values of sphincter parameters in solid and liquid bolus swallows in China were established. The UES residual pressure of solid bolus swallows was lower than that of liquid bolus (0.3±5.5 mm Hg vs 4.8±5.9 mm Hg, P=.000). The time parameters of UES relaxation between two types of bolus swallows were similar. In solid bolus swallows, the intrabolus pressure (IBP) (13.8±5.1 mm Hg vs 10.9±5.7 mm Hg, P=.000) and LES relaxation time (11.0±2.1 seconds vs 8.7±1.3 seconds, P=.000) were higher. The 4-second integrated relaxation pressure between both bolus swallows was similar. The function of the UES and LES between solid and liquid bolus swallows is different. Chinese HRM parameters are different from the Chicago Classification (http://www.chictr.org.cn, Number ChiCTR-EOC-15007147).",2016-09-25 +27203131,Reducing Emergency Department Visits for Acute Gastrointestinal Illnesses in North Carolina (USA) by Extending Community Water Service.,"

Background

Previous analyses have suggested that unregulated private drinking water wells carry a higher risk of exposure to microbial contamination than regulated community water systems. In North Carolina, ~35% of the state's population relies on private wells, but the health impact associated with widespread reliance on such unregulated drinking water sources is unknown.

Objectives

We estimated the total number of emergency department visits for acute gastrointestinal illness (AGI) attributable to microbial contamination in private wells in North Carolina per year, the costs of those visits, and the potential health benefits of extending regulated water service to households currently relying on private wells for their drinking water.

Methods

We developed a population intervention model using 2007-2013 data from all 122 North Carolina emergency departments along with microbial contamination data for all 2,120 community water systems and for 16,138 private well water samples collected since 2008.

Results

An estimated 29,400 (95% CI: 26,600, 32,200) emergency department visits per year for acute gastrointestinal illness were attributable to microbial contamination in drinking water, constituting approximately 7.3% (95% CI: 6.6, 7.9%) of all AGI-related visits. Of these attributable cases, 99% (29,200; 95% CI: 26,500, 31,900) were associated with private well contamination. The estimated statewide annual cost of emergency department visits attributable to microbiological contamination of drinking water is 40.2 million USD (95% CI: 2.58 million USD, 193 million USD), of which 39.9 million USD (95% CI: 2.56 million USD, 192 million USD) is estimated to arise from private well contamination. An estimated 2,920 (95% CI: 2,650, 3,190) annual emergency department visits could be prevented by extending community water service to 10% of the population currently relying on private wells.

Conclusions

This research provides new evidence that extending regulated community water service to populations currently relying on private wells may decrease the population burden of acute gastrointestinal illness.

Citation

DeFelice NB, Johnston JE, Gibson JM. 2016. Reducing emergency department visits for acute gastrointestinal illnesses in North Carolina (USA) by extending community water service. Environ Health Perspect 124:1583-1591; http://dx.doi.org/10.1289/EHP160.",2016-05-20 +24130308,FFAS-3D: improving fold recognition by including optimized structural features and template re-ranking.,"

Motivation

Homology detection enables grouping proteins into families and prediction of their structure and function. The range of application of homology-based predictions can be significantly extended by using sequence profiles and incorporation of local structural features. However, incorporation of the latter terms varies a lot between existing methods, and together with many examples of distant relations not recognized even by the best methods, suggests that further improvements are still possible.

Results

Here we describe recent improvements to the fold and function assignment system (FFAS) method, including adding optimized structural features (experimental or predicted), 'symmetrical' Z-score calculation and re-ranking the templates with a neural network. The alignment accuracy in the new FFAS-3D is now 11% higher than the original and comparable with the most accurate template-based structure prediction algorithms. At the same time, FFAS-3D has high success rate at the Structural Classification of Proteins (SCOP) family, superfamily and fold levels. Importantly, FFAS-3D results are not highly correlated with other programs suggesting that it may significantly improve meta-predictions. FFAS-3D does not require 3D structures of the templates, as using predicted features instead of structure-derived does not lead to the decrease of accuracy. Because of that, FFAS-3D can be used for databases other than Protein Data Bank (PDB) such as Protein families database or Clusters of orthologous groups thus extending its applications to functional annotations of genomes and protein families.

Availability and implementation

FFAS-3D is available at http://ffas.godziklab.org.",2013-10-15 +25622107,Inferring biological tasks using Pareto analysis of high-dimensional data.,"We present the Pareto task inference method (ParTI; http://www.weizmann.ac.il/mcb/UriAlon/download/ParTI) for inferring biological tasks from high-dimensional biological data. Data are described as a polytope, and features maximally enriched closest to the vertices (or archetypes) allow identification of the tasks the vertices represent. We demonstrate that human breast tumors and mouse tissues are well described by tetrahedrons in gene expression space, with specific tumor types and biological functions enriched at each of the vertices, suggesting four key tasks.",2015-01-26 +26388142,Estimation of isoform expression in RNA-seq data using a hierarchical Bayesian model.,"Estimation of gene or isoform expression is a fundamental step in many transcriptome analysis tasks, such as differential expression analysis, eQTL (or sQTL) studies, and biological network construction. RNA-seq technology enables us to monitor the expression on genome-wide scale at single base pair resolution and offers the possibility of accurately measuring expression at the level of isoform. However, challenges remain because of non-uniform read sampling and the presence of various biases in RNA-seq data. In this paper, we present a novel hierarchical Bayesian method to estimate isoform expression. While most of the existing methods treat gene expression as a by-product, we incorporate it into our model and explicitly describe its relationship with corresponding isoform expression using a Multinomial distribution. In this way, gene and isoform expression are included in a unified framework and it helps us achieve a better performance over other state-of-the-art algorithms for isoform expression estimation. The effectiveness of the proposed method is demonstrated using both simulated data with known ground truth and two real RNA-seq datasets from MAQC project. The codes are available at http://www.math.pku.edu.cn/teachers/dengmh/GIExp/.",2015-08-11 +25309735,"SkateBase, an elasmobranch genome project and collection of molecular resources for chondrichthyan fishes.","Chondrichthyan fishes are a diverse class of gnathostomes that provide a valuable perspective on fundamental characteristics shared by all jawed and limbed vertebrates. Studies of phylogeny, species diversity, population structure, conservation, and physiology are accelerated by genomic, transcriptomic and protein sequence data. These data are widely available for many sarcopterygii (coelacanth, lungfish and tetrapods) and actinoptergii (ray-finned fish including teleosts) taxa, but limited for chondrichthyan fishes.  In this study, we summarize available data for chondrichthyes and describe resources for one of the largest projects to characterize one of these fish, Leucoraja erinacea, the little skate.  SkateBase ( http://skatebase.org) serves as the skate genome project portal linking data, research tools, and teaching resources.",2014-08-12 +21685050,Genomic data integration using guided clustering.,"

Motivation

In biomedical research transcriptomic, proteomic or metabolomic profiles of patient samples are often combined with genomic profiles from experiments in cell lines or animal models. Integrating experimental data with patient data is still a challenging task due to the lack of tailored statistical tools.

Results

Here we introduce guided clustering, a new data integration strategy that combines experimental and clinical high-throughput data. Guided clustering identifies sets of genes that stand out in experimental data while at the same time display coherent expression in clinical data. We report on two potential applications: The integration of clinical microarray data with (i) genome-wide chromatin immunoprecipitation assays and (ii) with cell perturbation assays. Unlike other analysis strategies, guided clustering does not analyze the two datasets sequentially but instead in a single joint analysis. In a simulation study and in several biological applications, guided clustering performs favorably when compared with sequential analysis approaches.

Availability

Guided clustering is available as a R-package from http://compdiag.uni-regensburg.de/software/guidedClustering.shtml. Documented R code of all our analysis is included in the Supplementary Materials. All newly generated data are available at the GEO database (GSE29700).

Contact

rainer.spang@klinik.uni-regensburg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-17 +28043519,Increased expression of efflux pump genes in extensively drug-resistant isolates of Mycobacterium tuberculosis.,"

Introduction

Extensively drug-resistant tuberculosis (XDR-TB) is defined as tuberculosis (TB) caused by Mycobacterium tuberculosis (MTB) strains that are multidrug resistant (MDR) and also resistant to a fluoroquinolone and to one injectable aminoglycoside or capreomycin. Whilst resistance in MTB has been associated with single nucleotide polymorphisms (SNPs), efflux pumps are thought to play a role in conferring resistance to MTB but little is known about them.

Methods

We studied XDR MTB (n=10) strains characterized by whole genome sequencing (WGS; http://www.ebi.ac.uk/ena/data/view/PRJEB7798). Phenotypic susceptibility testing was performed by the MGIT 960 (Becton, Dickinson and Co., NJ, USA) method. All XDR MTB strains were resistant to at least seven drugs whilst one XDR MTB strain, X54 was resistant to isoniazid, rifampicin, pyrazinamide, streptomycin, ethambutol, fluoroquinolones, capreomycin, kanamycin, amikacin, and ethionamide. The mRNA expression of efflux candidate genes Rv0194, Rv2688c, Rv1634, drrA, and drrB was determined in XDR MTB strains as compared with the ATCC reference strain, H37Rv, and drug-susceptible (DS) MTB (n=9) strains using the relative quantification method normalized to 16S rRNA.

Results

The mRNA expression levels of efflux genes Rv2688c (p=0.0037), Rv1634 (p=0.0042), drrA (p=0.0078) and drrB (p=0.0003) were upregulated in XDR-TB strains as compared with DS MTB strains.

Conclusion

The differences between XDR-TB and drug-susceptible isolates suggest that the increased expression levels of MTB efflux pump genes may contribute to drug resistance in extensively drug-resistant tuberculosis. Future studies are needed to determine whether combining efflux pump inhibitors to antitubercular drugs would be effective to treat resistant tuberculosis.",2016-11-25 +23846596,T2D@ZJU: a knowledgebase integrating heterogeneous connections associated with type 2 diabetes mellitus.,"Type 2 diabetes mellitus (T2D), affecting >90% of the diabetic patients, is one of the major threats to human health. A comprehensive understanding of the mechanisms of T2D at molecular level is essential to facilitate the related translational research. Here, we introduce a comprehensive and up-to-date knowledgebase for T2D, i.e. T2D@ZJU. T2D@ZJU contains three levels of heterogeneous connections associated with T2D, which is retrieved from pathway databases, protein-protein interaction databases and literature, respectively. In current release, T2D@ZJU contains 1078 T2D related entities such as proteins, protein complexes, drugs and others together with their corresponding relationships, which include 3069 manually curated connections, 14,893 protein-protein interactions and 26,716 relationships identified by text-mining technology. Moreover, T2D@ZJU provides a user-friendly web interface for users to browse and search data. A Cytoscape Web-based interactive network browser is available to visualize the corresponding network relationships between T2D-related entities. The functionality of T2D@ZJU is shown by means of several case studies. Database URL: http://tcm.zju.edu.cn/t2d.",2013-07-11 +26545820,Probabilistic models of genetic variation in structured populations applied to global human studies.,"

Motivation

Modern population genetics studies typically involve genome-wide genotyping of individuals from a diverse network of ancestries. An important problem is how to formulate and estimate probabilistic models of observed genotypes that account for complex population structure. The most prominent work on this problem has focused on estimating a model of admixture proportions of ancestral populations for each individual. Here, we instead focus on modeling variation of the genotypes without requiring a higher-level admixture interpretation.

Results

We formulate two general probabilistic models, and we propose computationally efficient algorithms to estimate them. First, we show how principal component analysis can be utilized to estimate a general model that includes the well-known Pritchard-Stephens-Donnelly admixture model as a special case. Noting some drawbacks of this approach, we introduce a new 'logistic factor analysis' framework that seeks to directly model the logit transformation of probabilities underlying observed genotypes in terms of latent variables that capture population structure. We demonstrate these advances on data from the Human Genome Diversity Panel and 1000 Genomes Project, where we are able to identify SNPs that are highly differentiated with respect to structure while making minimal modeling assumptions.

Availability and implementation

A Bioconductor R package called lfa is available at http://www.bioconductor.org/packages/release/bioc/html/lfa.html

Contact

jstorey@princeton.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-06 +22772971,Discovering subgroups using descriptive models of adverse outcomes in medical care.,"

Objectives

Hospital discharge databases store hundreds of thousands of patients. These datasets are usually used by health insurance companies to process claims from hospitals, but they also represent a rich source of information about the patterns of medical care. The proposed subgroup discovery method aims to improve the efficiency of detecting interpretable subgroups in data.

Methods

Supervised descriptive rule discovery techniques can prove inefficient in cases when target class samples represent only an extremely small amount of all available samples. Our approach aims to balance the number of samples in target and control groups prior to subgroup discovery process. Additionally, we introduce some improvements to an existing subgroup discovery algorithm enhancing the user experience and making the descriptive data mining process and visualization of rules more user friendly.

Results

Instance-based subspace subgroup discovery introduced in this paper is demonstrated on hospital discharge data with focus on medical errors. In general, the number of patients with a recorded diagnosis related to a medical error is relatively small in comparison to patients where medical errors did not occur. The ability to produce comprehensible and simple models with high degree of confidence, support, and predictive power using the proposed method is demonstrated.

Conclusions

This paper introduces a subspace subgroup discovery process that can be applied in all settings where a large number of samples with relatively small number of target class samples are present. The proposed method is implemented in Weka machine learning environment and is available at http://ri.fzv.uni-mb.si/ssd.",2012-07-05 +26644460,"The Vigna Genome Server, 'VigGS': A Genomic Knowledge Base of the Genus Vigna Based on High-Quality, Annotated Genome Sequence of the Azuki Bean, Vigna angularis (Willd.) Ohwi & Ohashi.","The genus Vigna includes legume crops such as cowpea, mungbean and azuki bean, as well as >100 wild species. A number of the wild species are highly tolerant to severe environmental conditions including high-salinity, acid or alkaline soil; drought; flooding; and pests and diseases. These features of the genus Vigna make it a good target for investigation of genetic diversity in adaptation to stressful environments; however, a lack of genomic information has hindered such research in this genus. Here, we present a genome database of the genus Vigna, Vigna Genome Server ('VigGS', http://viggs.dna.affrc.go.jp), based on the recently sequenced azuki bean genome, which incorporates annotated exon-intron structures, along with evidence for transcripts and proteins, visualized in GBrowse. VigGS also facilitates user construction of multiple alignments between azuki bean genes and those of six related dicot species. In addition, the database displays sequence polymorphisms between azuki bean and its wild relatives and enables users to design primer sequences targeting any variant site. VigGS offers a simple keyword search in addition to sequence similarity searches using BLAST and BLAT. To incorporate up to date genomic information, VigGS automatically receives newly deposited mRNA sequences of pre-set species from the public database once a week. Users can refer to not only gene structures mapped on the azuki bean genome on GBrowse but also relevant literature of the genes. VigGS will contribute to genomic research into plant biotic and abiotic stresses and to the future development of new stress-tolerant crops.",2015-12-07 +27402902,New quality measure for SNP array based CNV detection.,"

Motivation

Only a few large systematic studies have evaluated the impact of copy number variants (CNVs) on common diseases. Several million individuals have been genotyped on single nucleotide variation arrays, which could be used for genome-wide CNVs association studies. However, CNV calls remain prone to false positives and only empirical filtering strategies exist in the literature. To overcome this issue, we defined a new quality score (QS) estimating the probability of a CNV called by PennCNV to be confirmed by other software.

Results

Out-of-sample comparison showed that the correlation between the consensus CNV status and the QS is twice as high as it is for any previously proposed CNV filters. ROC curves displayed an AUC higher than 0.8 and simulations showed an increase up to 20% in statistical power when using QS in comparison to other filtering strategies. Superior performance was confirmed also for alternative consensus CNV definition and through improving known CNV-trait associations.

Availability and implementation

http://goo.gl/T6yuFM CONTACT: zoltan.kutalik@unil.ch or aurelien@mace@unil.chSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-10 +23702710,CYCLONE--a utility for de novo sequencing of microbial cyclic peptides.,"We have developed a de novo sequencing software tool (CYCLONE) and applied it for determination of cyclic peptides. The program uses a non-redundant database of 312 nonribosomal building blocks identified to date in bacteria and fungi (more than 230 additional residues in the database list were isobaric). The software was used to fully characterize the tandem mass spectrum of several cyclic peptides and provide sequence tags. The general strategy of the script was based on fragment ion pre-characterization to accomplish unambiguous b-ion series assignments. Showcase examples were a cyclic tetradepsipeptide beauverolide, a cyclic hexadepsipeptide roseotoxin A, a lasso-like hexapeptide pseudacyclin A, and a cyclic undecapeptide cyclosporin A. The extent of ion scrambling in smaller peptides was as low as 5 % of total ion current; this demonstrated the feasibility of CYCLONE de novo sequencing. The robustness of the script was also tested against database sets of various sizes and isotope-containing data. It can be downloaded from the http://ms.biomed.cas.cz/MSTools/ website. ᅟ",2013-05-24 +30731816,"First Report of a Bionectria sp. Associated with a Stem Rot of Cardon Cactus (Pachycereus pringlei) in Baja California Sur, Mexico.","Giant cardon (Pachycereus pringlei ((S.Watson) Britton & Rose) is the most common cactus in northwestern Mexico and is endemic to the Baja California Peninsula and Sonora Desert. A large part of the peninsula (El Vizcaino Biosphere Reserve and Gulf of California) now consists of protected areas and is classified as a World Heritage site by UNESCO ( http://whc.unesco.org/en/list/1182 ). Cardon cactus is an important ecological resource for indigenous people and is used as feed for range cattle. Since 2000, in the central and southern part of the State of Baja California Sur, an apical stem rot has spread to ~17% of the natural cardon population around San Pedro (23°29'N, 110°12'W), La Paz (24°08'N, 110°18'W), and El Comitán (24°05'N, 110°21'W). Affected cacti display necrosis of apical branches, dry rot, cracks in the stem and branches, bronzing of mature spines surrounding the affected area, and reddish brown gummy exudate. Thirty samples from the edges of symptomatic lesions were surface disinfected for 2 min in 0.8% (wt/vol) NaOCl and ethanol (70%), rinsed in sterile, distilled water, and grown on potato dextrose agar at 27°C. A cottony, brownish fungus was consistently isolated from affected tissues. Koch's postulates were performed in pots of 10 cm in diameter with 5-year-old cacti inoculated (9-day-old mycelia) and incubated (15 days) at room temperature (26°C). The rough, dry, brownish, circular lesions that appeared were the same as those observed in the field. Healthy cacti inoculated with potato dextrose agar plugs were symptomless. The fungus was always reisolated from infected cacti and morphological examinations (2) were performed: one-septate, olive-green, smooth, ellipsoidal conidium and two-celled ascospores (15 to 20 × 5 to 7 μm) were present. Also present were conidial masses from monomorphic, penicillate conidiophores in sporodochia. Cottony and white-to-light yellow PDA colonies were observed. Genomic DNA was extracted from lyophilized hyphae using the method described by O'Donnell (1) or with a DNeasy Plant Mini Kit (Qiagen, Hilden, Germany). The internal transcribed spacer (ITS) regions 1 and 2 of the 5.8, 18, and 28S ribosomal RNA genes were amplified with the primer pairs ITS1 and ITS4 (3). The expected amplicon of 571 bp was sequenced and compared with fungal sequences available from the GenBank-EMBL database using the BlastN and CLUSTAL programs (MegAlign, DNASTAR, Madison, WI). The closest nucleotide similarity had 99% identity with a Bionectria sp. (GenBank Accession No. HM849058.1). To our knowledge, on the basis of morphological characteristics, DNA comparisons, and pathogenicity tests, this is the first report of a Bionectria sp. causing an apical stem rot disease in cardon cacti in Mexico. Since there are no control measures in Mexico there is a permanent risk that the disease will spread to healthy areas. References: (1) K. O'Donell et al. Mycologia 92:919, 2000. (2) H. J. Schroers. Stud. Mycol. 46:1, 2001. (3) T. J. White et al. PCR Protocols: A Guide to Methods and Applications. Academic Press, San Diego, 1990.",2012-02-01 +27920726,HRVanalysis: A Free Software for Analyzing Cardiac Autonomic Activity.,"Since the pioneering studies of the 1960s, heart rate variability (HRV) has become an increasingly used non-invasive tool for examining cardiac autonomic functions and dysfunctions in various populations and conditions. Many calculation methods have been developed to address these issues, each with their strengths and weaknesses. Although, its interpretation may remain difficult, this technique provides, from a non-invasive approach, reliable physiological information that was previously inaccessible, in many fields including death and health prediction, training and overtraining, cardiac and respiratory rehabilitation, sleep-disordered breathing, large cohort follow-ups, children's autonomic status, anesthesia, or neurophysiological studies. In this context, we developed HRVanalysis, a software to analyse HRV, used and improved for over 20 years and, thus, designed to meet laboratory requirements. The main strength of HRVanalysis is its wide application scope. In addition to standard analysis over short and long periods of RR intervals, the software allows time-frequency analysis using wavelet transform as well as analysis of autonomic nervous system status on surrounding scored events and on preselected labeled areas. Moreover, the interface is designed for easy study of large cohorts, including batch mode signal processing to avoid running repetitive operations. Results are displayed as figures or saved in TXT files directly employable in statistical softwares. Recordings can arise from RR or EKG files of different types such as cardiofrequencemeters, holters EKG, polygraphs, and data acquisition systems. HRVanalysis can be downloaded freely from the Web page at: https://anslabtools.univ-st-etienne.fr HRVanalysis is meticulously maintained and developed for in-house laboratory use. In this article, after a brief description of the context, we present an overall view of HRV analysis and we describe the methodological approach of the different techniques provided by the software.",2016-11-22 +26671572,Analysis of scientific output by spine surgeons from Japan: January 2000 to December 2013.,"

Background

Over the last decade, the growing body of work on spine pathology has led to developments and refinements in the areas of basic science, diagnosis and treatment of a variety of spine conditions. Scientific publications have a global impact on the international scientific community as they share vital information that can be applied by physicians worldwide to solve their everyday medical problems. The historical background of scientific publication in journals in Japan on the subject of spine is unclear.

Methods

We performed a literature search for publications by Japanese spine surgeons regarding spine or spinal cord topics using an online database: Pubmed.gov (http://www.ncbi.nlm.nih.gov/pubmed/). The results were stored and analyzed at the Laboratory of Clinical Studies and Basic Models of Spinal Disorders of the University of Caxias do Sul. Results were limited to articles published from January 2000 to December 2013. The search terms used were ""Japan"" AND (""spine"" OR ""spinal diseases"" OR ""spinal cord"" OR ""spinal cord diseases"" OR ""vertebroplasty"" OR ""arthrodesis"" OR ""discectomy"" OR ""foraminotomy"" OR ""laminectomy"" OR ""denervation"" OR ""back injuries""). Japanese spine surgeons were defined as spine surgeons from orthopedic or neurosurgical specialties where the publication was affiliated with Japanese services.

Results

A total of 16,140 articles were identified by the Medline search. Most of the articles were excluded based on information provided in the title and abstract as they were not related to spine surgery. This study comprised 1768 articles published in the Medline database by Japanese spine surgeons from 2000 to 2013. The number of publications rose in a linear fashion, with the number of papers published increasing by 5.4 per year (p = 0.038). In recent years the publications were increasingly performed in conjunction with the neurosurgery and orthopedics specialties.

Conclusions

This study showed a clear increase in publications (on Medline) by Japanese spine surgeons over the last 14 years. While this is a positive development, there is also cause for concern as there is some evidence that the number of young scientists is declining in Japan. Special attention to educating researchers and improving resources for research is crucial to further increase the number and quality of Japanese publications.",2015-12-06 +26436497,Screening of crucial long non-coding RNAs in oral epithelial dysplasia by serial analysis of gene expression.,"Oral epithelial dysplasia (OED) is a premalignant lesion of the oral mucosa. Considering the poor 5-year survival rate of oral cancer, further investigation is needed in order to determine the pathogenesis of OED. In the present study, serial analysis of gene expression (SAGE) data from patients with OED were compared to normal controls to identify differentially expressed genes (DEGs). SAGE data were obtained from the Gene Expression Omnibus, and included samples from patients with mild, moderate, or severe dysplasia. The DEGs were identified using the edgeR software package and functional-enrichment analysis was performed with the DAVID (https://david.ncifcrf.gov/) software program. The co-expression network was constructed using the CoExpress software and target genes of long non-coding RNAs (lncRNAs) were predicted according to the proximity between the lncRNAs and mRNAs in the genome. A total of 517 DEGs were identified, including 409 mRNAs and 108 lncRNAs. Functional-enrichment analysis showed that mRNAs and lncRNAs involved in epithelial cell differentiation, epithelium development, and epidermal cell differentiation were significantly enriched in the DEGs. Thirty-eight potential regulatory relationships were unveiled between lncRNAs and mRNAs, and two subnetworks were discovered by analyzing the topological properties of the co-expression network. In conclusion, we have identified key mRNAs and lncRNAs in OED, and these findings may aid in understanding the pathogenesis of OED and advance potential future treatments.",2015-10-02 +27153619,ProtPOS: a python package for the prediction of protein preferred orientation on a surface.,"

Unlabelled

Atomistic molecular dynamics simulation is a promising technique to investigate the energetics and dynamics in the protein-surface adsorption process which is of high relevance to modern biotechnological applications. To increase the chance of success in simulating the adsorption process, favorable orientations of the protein at the surface must be determined. Here, we present ProtPOS which is a lightweight and easy-to-use python package that can predict low-energy protein orientations on a surface of interest. It combines a fast conformational sampling algorithm with the energy calculation of GROMACS. The advantage of ProtPOS is it allows users to select any force fields suitable for the system at hand and provide structural output readily available for further simulation studies.

Availability and implementation

ProtPOS is freely available for academic and non-profit uses at http://cbbio.cis.umac.mo/software/protpos

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

shirleysiu@umac.mo.",2016-04-10 +27778106,"The telencephalon of the Göttingen minipig, cytoarchitecture and cortical surface anatomy.","During the last 20 years pigs have become increasingly popular in large animal translational neuroscience research as an economical and ethical feasible substitute to non-human primates. The anatomy of the pig telencephalon is, however, not well known. We present, accordingly, a detailed description of the surface anatomy and cytoarchitecture of the Göttingen minipig telencephalon based on macrophotos and consecutive high-power microphotographs of 15 μm thick paraffin embedded Nissl-stained coronal sections. In 1-year-old specimens the formalin perfused brain measures approximately 55 × 47 × 36 mm (length, width, height) and weighs around 69 g. The telencephalic part of the Göttingen minipig cerebrum covers a large surface area, which can be divided into a neocortical gyrencephalic part located dorsal to the rhinal fissure, and a ventral subrhinal part dominated by olfactory, amygdaloid, septal, and hippocampal structures. This part of the telencephalon is named the subrhinal lobe, and based on cytoarchitectural and sulcal anatomy, can be discerned from the remaining dorsally located neocortical perirhinal/insular, pericallosal, frontal, parietal, temporal, and occipital lobes. The inner subcortical structure of the minipig telencephalon is dominated by a prominent ventricular system and large basal ganglia, wherein the putamen and the caudate nucleus posterior and dorsally are separated into two entities by the internal capsule, whereas both structures ventrally fuse into a large accumbens nucleus. The presented anatomical data is accompanied by surface renderings and high-power macrophotographs illustrating the telencephalic sulcal pattern, and the localization of the identified lobes and cytoarchitectonic areas. Additionally, 24 representative Nissl-stained telencephalic coronal sections are presented as supplementary material in atlas form on http://www.cense.dk/minipig_atlas/index.html and referred to as S1-S24 throughout the manuscript.",2016-10-24 +26357091,Predicting Protein Function Using Multiple Kernels.,"High-throughput experimental techniques provide a wide variety of heterogeneous proteomic data sources. To exploit the information spread across multiple sources for protein function prediction, these data sources are transformed into kernels and then integrated into a composite kernel. Several methods first optimize the weights on these kernels to produce a composite kernel, and then train a classifier on the composite kernel. As such, these approaches result in an optimal composite kernel, but not necessarily in an optimal classifier. On the other hand, some approaches optimize the loss of binary classifiers and learn weights for the different kernels iteratively. For multi-class or multi-label data, these methods have to solve the problem of optimizing weights on these kernels for each of the labels, which are computationally expensive and ignore the correlation among labels. In this paper, we propose a method called Predicting Protein Function using Multiple Kernels (ProMK). ProMK iteratively optimizes the phases of learning optimal weights and reduces the empirical loss of multi-label classifier for each of the labels simultaneously. ProMK can integrate kernels selectively and downgrade the weights on noisy kernels. We investigate the performance of ProMK on several publicly available protein function prediction benchmarks and synthetic datasets. We show that the proposed approach performs better than previously proposed protein function prediction approaches that integrate multiple data sources and multi-label multiple kernel learning methods. The codes of our proposed method are available at https://sites.google.com/site/guoxian85/promk.",2015-01-01 +27531102,iRSpot-EL: identify recombination spots with an ensemble learning approach.,"

Motivation

Coexisting in a DNA system, meiosis and recombination are two indispensible aspects for cell reproduction and growth. With the avalanche of genome sequences emerging in the post-genomic age, it is an urgent challenge to acquire the information of DNA recombination spots because it can timely provide very useful insights into the mechanism of meiotic recombination and the process of genome evolution.

Results

To address such a challenge, we have developed a predictor, called IRSPOT-EL: , by fusing different modes of pseudo K-tuple nucleotide composition and mode of dinucleotide-based auto-cross covariance into an ensemble classifier of clustering approach. Five-fold cross tests on a widely used benchmark dataset have indicated that the new predictor remarkably outperforms its existing counterparts. Particularly, far beyond their reach, the new predictor can be easily used to conduct the genome-wide analysis and the results obtained are quite consistent with the experimental map.

Availability and implementation

For the convenience of most experimental scientists, a user-friendly web-server for iRSpot-EL has been established at http://bioinformatics.hitsz.edu.cn/iRSpot-EL/, by which users can easily obtain their desired results without the need to go through the complicated mathematical equations involved.

Contact

bliu@gordonlifescience.org or bliu@insun.hit.edu.cnSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-16 +26527720,MetaNetX/MNXref--reconciliation of metabolites and biochemical reactions to bring together genome-scale metabolic networks.,"MetaNetX is a repository of genome-scale metabolic networks (GSMNs) and biochemical pathways from a number of major resources imported into a common namespace of chemical compounds, reactions, cellular compartments--namely MNXref--and proteins. The MetaNetX.org website (http://www.metanetx.org/) provides access to these integrated data as well as a variety of tools that allow users to import their own GSMNs, map them to the MNXref reconciliation, and manipulate, compare, analyze, simulate (using flux balance analysis) and export the resulting GSMNs. MNXref and MetaNetX are regularly updated and freely available.",2015-11-02 +27389463,"Incident Management Systems and Building Emergency Management Capacity during the 2014-2016 Ebola Epidemic - Liberia, Sierra Leone, and Guinea.","Establishing a functional incident management system (IMS) is important in the management of public health emergencies. In response to the 2014-2016 Ebola virus disease (Ebola) epidemic in West Africa, CDC established the Emergency Management Development Team (EMDT) to coordinate technical assistance for developing emergency management capacity in Guinea, Liberia, and Sierra Leone. EMDT staff, deployed staff, and partners supported each country to develop response goals and objectives, identify gaps in response capabilities, and determine strategies for coordinating response activities. To monitor key programmatic milestones and assess changes in emergency management and response capacities over time, EMDT implemented three data collection methods in country: coordination calls, weekly written situation reports, and an emergency management dashboard tool. On the basis of the information collected, EMDT observed improvements in emergency management capacity over time in all three countries. The collaborations in each country yielded IMS structures that streamlined response and laid the foundation for long-term emergency management programs.The activities summarized in this report would not have been possible without collaboration with many U.S and international partners (http://www.cdc.gov/vhf/ebola/outbreaks/2014-west-africa/partners.html).",2016-07-08 +22110034,Comparative interactomics with Funcoup 2.0.,"FunCoup (http://FunCoup.sbc.su.se) is a database that maintains and visualizes global gene/protein networks of functional coupling that have been constructed by Bayesian integration of diverse high-throughput data. FunCoup achieves high coverage by orthology-based integration of data sources from different model organisms and from different platforms. We here present release 2.0 in which the data sources have been updated and the methodology has been refined. It contains a new data type Genetic Interaction, and three new species: chicken, dog and zebra fish. As FunCoup extensively transfers functional coupling information between species, the new input datasets have considerably improved both coverage and quality of the networks. The number of high-confidence network links has increased dramatically. For instance, the human network has more than eight times as many links above confidence 0.5 as the previous release. FunCoup provides facilities for analysing the conservation of subnetworks in multiple species. We here explain how to do comparative interactomics on the FunCoup website.",2011-11-21 +27145195,A pointing facilitation system for motor-impaired users combining polynomial smoothing and time-weighted gradient target prediction models.,"As computers become more and more essential for everyday life, people who cannot use them are missing out on an important tool. The predominant method of interaction with a screen is a mouse, and difficulty in using a mouse can be a huge obstacle for people who would otherwise gain great value from using a computer. If mouse pointing were to be made easier, then a large number of users may be able to begin using a computer efficiently where they may previously have been unable to. The present article aimed to improve pointing speeds for people with arm or hand impairments. The authors investigated different smoothing and prediction models on a stored data set involving 25 people, and the best of these algorithms were chosen. A web-based prototype was developed combining a polynomial smoothing algorithm with a time-weighted gradient target prediction model. The adapted interface gave an average improvement of 13.5% in target selection times in a 10-person study of representative users of the system. A demonstration video of the system is available at https://youtu.be/sAzbrKHivEY.",2016-05-04 +23180769,Collaborative biocuration--text-mining development task for document prioritization for curation.,"The Critical Assessment of Information Extraction systems in Biology (BioCreAtIvE) challenge evaluation is a community-wide effort for evaluating text mining and information extraction systems for the biological domain. The 'BioCreative Workshop 2012' subcommittee identified three areas, or tracks, that comprised independent, but complementary aspects of data curation in which they sought community input: literature triage (Track I); curation workflow (Track II) and text mining/natural language processing (NLP) systems (Track III). Track I participants were invited to develop tools or systems that would effectively triage and prioritize articles for curation and present results in a prototype web interface. Training and test datasets were derived from the Comparative Toxicogenomics Database (CTD; http://ctdbase.org) and consisted of manuscripts from which chemical-gene-disease data were manually curated. A total of seven groups participated in Track I. For the triage component, the effectiveness of participant systems was measured by aggregate gene, disease and chemical 'named-entity recognition' (NER) across articles; the effectiveness of 'information retrieval' (IR) was also measured based on 'mean average precision' (MAP). Top recall scores for gene, disease and chemical NER were 49, 65 and 82%, respectively; the top MAP score was 80%. Each participating group also developed a prototype web interface; these interfaces were evaluated based on functionality and ease-of-use by CTD's biocuration project manager. In this article, we present a detailed description of the challenge and a summary of the results.",2012-11-22 +26678663,EC: an efficient error correction algorithm for short reads.,"

Background

In highly parallel next-generation sequencing (NGS) techniques millions to billions of short reads are produced from a genomic sequence in a single run. Due to the limitation of the NGS technologies, there could be errors in the reads. The error rate of the reads can be reduced with trimming and by correcting the erroneous bases of the reads. It helps to achieve high quality data and the computational complexity of many biological applications will be greatly reduced if the reads are first corrected. We have developed a novel error correction algorithm called EC and compared it with four other state-of-the-art algorithms using both real and simulated sequencing reads.

Results

We have done extensive and rigorous experiments that reveal that EC is indeed an effective, scalable, and efficient error correction tool. Real reads that we have employed in our performance evaluation are Illumina-generated short reads of various lengths. Six experimental datasets we have utilized are taken from sequence and read archive (SRA) at NCBI. The simulated reads are obtained by picking substrings from random positions of reference genomes. To introduce errors, some of the bases of the simulated reads are changed to other bases with some probabilities.

Conclusions

Error correction is a vital problem in biology especially for NGS data. In this paper we present a novel algorithm, called Error Corrector (EC), for correcting substitution errors in biological sequencing reads. We plan to investigate the possibility of employing the techniques introduced in this research paper to handle insertion and deletion errors also.

Software availability

The implementation is freely available for non-commercial purposes. It can be downloaded from: http://engr.uconn.edu/~rajasek/EC.zip.",2015-12-07 +21712248,BRISK--research-oriented storage kit for biology-related data.,"

Motivation

In genetic science, large-scale international research collaborations represent a growing trend. These collaborations have demanding and challenging database, storage, retrieval and communication needs. These studies typically involve demographic and clinical data, in addition to the results from numerous genomic studies (omics studies) such as gene expression, eQTL, genome-wide association and methylation studies, which present numerous challenges, thus the need for data integration platforms that can handle these complex data structures. Inefficient methods of data transfer and access control still plague research collaboration. As science becomes more and more collaborative in nature, the need for a system that adequately manages data sharing becomes paramount.

Results

Biology-Related Information Storage Kit (BRISK) is a package of several web-based data management tools that provide a cohesive data integration and management platform. It was specifically designed to provide the architecture necessary to promote collaboration and expedite data sharing between scientists.

Availability and implementation

The software, documentation, Java source code and demo are available at http://genapha.icapture.ubc.ca/brisk/index.jsp. BRISK was developed in Java, and tested on an Apache Tomcat 6 server with a MySQL database.

Contact

denise.daley@hli.ubc.ca.",2011-06-27 +27466626,Drug drug interaction extraction from biomedical literature using syntax convolutional neural network.,"

Motivation

Detecting drug-drug interaction (DDI) has become a vital part of public health safety. Therefore, using text mining techniques to extract DDIs from biomedical literature has received great attentions. However, this research is still at an early stage and its performance has much room to improve.

Results

In this article, we present a syntax convolutional neural network (SCNN) based DDI extraction method. In this method, a novel word embedding, syntax word embedding, is proposed to employ the syntactic information of a sentence. Then the position and part of speech features are introduced to extend the embedding of each word. Later, auto-encoder is introduced to encode the traditional bag-of-words feature (sparse 0-1 vector) as the dense real value vector. Finally, a combination of embedding-based convolutional features and traditional features are fed to the softmax classifier to extract DDIs from biomedical literature. Experimental results on the DDIExtraction 2013 corpus show that SCNN obtains a better performance (an F-score of 0.686) than other state-of-the-art methods.

Availability and implementation

The source code is available for academic use at http://202.118.75.18:8080/DDI/SCNN-DDI.zip CONTACT: yangzh@dlut.edu.cnSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-27 +22719214,Techniques for estimating health care costs with censored data: an overview for the health services researcher.,"

Objective

The aim of this study was to review statistical techniques for estimating the mean population cost using health care cost data that, because of the inability to achieve complete follow-up until death, are right censored. The target audience is health service researchers without an advanced statistical background.

Methods

Data were sourced from longitudinal heart failure costs from Ontario, Canada, and administrative databases were used for estimating costs. The dataset consisted of 43,888 patients, with follow-up periods ranging from 1 to 1538 days (mean 576 days). The study was designed so that mean health care costs over 1080 days of follow-up were calculated using naïve estimators such as full-sample and uncensored case estimators. Reweighted estimators - specifically, the inverse probability weighted estimator - were calculated, as was phase-based costing. Costs were adjusted to 2008 Canadian dollars using the Bank of Canada consumer price index (http://www.bankofcanada.ca/en/cpi.html).

Results

Over the restricted follow-up of 1080 days, 32% of patients were censored. The full-sample estimator was found to underestimate mean cost ($30,420) compared with the reweighted estimators ($36,490). The phase-based costing estimate of $37,237 was similar to that of the simple reweighted estimator.

Conclusion

The authors recommend against the use of full-sample or uncensored case estimators when censored data are present. In the presence of heavy censoring, phase-based costing is an attractive alternative approach.",2012-06-01 +26537797,CFAssay: statistical analysis of the colony formation assay.,"

Background

Colony formation assay is the gold standard to determine cell reproductive death after treatment with ionizing radiation, applied for different cell lines or in combination with other treatment modalities. Associated linear-quadratic cell survival curves can be calculated with different methods. For easy code exchange and methodological standardisation among collaborating laboratories a software package CFAssay for R (R Core Team, R: A Language and Environment for Statistical Computing, 2014) was established to perform thorough statistical analysis of linear-quadratic cell survival curves after treatment with ionizing radiation and of two-way designs of experiments with chemical treatments only.

Methods

CFAssay offers maximum likelihood and related methods by default and the least squares or weighted least squares method can be optionally chosen. A test for comparision of cell survival curves and an ANOVA test for experimental two-way designs are provided.

Results

For the two presented examples estimated parameters do not differ much between maximum-likelihood and least squares. However the dispersion parameter of the quasi-likelihood method is much more sensitive for statistical variation in the data than the multiple R (2) coefficient of determination from the least squares method.

Conclusion

The dispersion parameter for goodness of fit and different plot functions in CFAssay help to evaluate experimental data quality. As open source software interlaboratory code sharing between users is facilitated.

Availability

The package is available at http://www.bioconductor.org/packages/release/bioc/html/CFAssay.html .",2015-11-04 +27832215,Avoiding Pandemic Fears in the Subway and Conquering the Platypus. ,"Metagenomics is increasingly used not just to show patterns of microbial diversity but also as a culture-independent method to detect individual organisms of intense clinical, epidemiological, conservation, forensic, or regulatory interest. A widely reported metagenomic study of the New York subway suggested that the pathogens Yersinia pestis and Bacillus anthracis were part of the ""normal subway microbiome."" In their article in mSystems, Hsu and collaborators (mSystems 1(3):e00018-16, 2016, http://dx.doi.org/10.1128/mSystems.00018-16) showed that microbial communities on transit surfaces in the Boston subway system are maintained from a metapopulation of human skin commensals and environmental generalists and that reanalysis of the New York subway data with appropriate methods did not detect the pathogens. We note that commonly used software pipelines can produce results that lack prima facie validity (e.g., reporting widespread distribution of notorious endemic species such as the platypus or the presence of pathogens) but that appropriate use of inclusion and exclusion sets can avoid this issue.",2016-05-01 +26635394,RBP-Var: a database of functional variants involved in regulation mediated by RNA-binding proteins.,"Transcription factors bind to the genome by forming specific contacts with the primary DNA sequence; however, RNA-binding proteins (RBPs) have greater scope to achieve binding specificity through the RNA secondary structure. It has been revealed that single nucleotide variants (SNVs) that alter RNA structure, also known as RiboSNitches, exhibit 3-fold greater local structure changes than replicates of the same DNA sequence, demonstrated by the fact that depletion of RiboSNitches could result in the alteration of specific RNA shapes at thousands of sites, including 3' UTRs, binding sites of microRNAs and RBPs. However, the network between SNVs and post-transcriptional regulation remains unclear. Here, we developed RBP-Var, a database freely available at http://www.rbp-var.biols.ac.cn/, which provides annotation of functional variants involved in post-transcriptional interaction and regulation. RBP-Var provides an easy-to-use web interface that allows users to rapidly find whether SNVs of interest can transform the secondary structure of RNA and identify RBPs whose binding may be subsequently disrupted. RBP-Var integrates DNA and RNA biology to understand how various genetic variants and post-transcriptional mechanisms cooperate to orchestrate gene expression. In summary, RBP-Var is useful in selecting candidate SNVs for further functional studies and exploring causal SNVs underlying human diseases.",2015-12-03 +26633127,SIFT missense predictions for genomes.,"The SIFT (sorting intolerant from tolerant) algorithm helps bridge the gap between mutations and phenotypic variations by predicting whether an amino acid substitution is deleterious. SIFT has been used in disease, mutation and genetic studies, and a protocol for its use has been previously published with Nature Protocols. This updated protocol describes SIFT 4G (SIFT for genomes), which is a faster version of SIFT that enables practical computations on reference genomes. Users can get predictions for single-nucleotide variants from their organism of interest using the SIFT 4G annotator with SIFT 4G's precomputed databases. The scope of genomic predictions is expanded, with predictions available for more than 200 organisms. Users can also run the SIFT 4G algorithm themselves. SIFT predictions can be retrieved for 6.7 million variants in 4 min once the database has been downloaded. If precomputed predictions are not available, the SIFT 4G algorithm can compute predictions at a rate of 2.6 s per protein sequence. SIFT 4G is available from http://sift-dna.org/sift4g.",2015-12-03 +22659196,"Alkamid database: Chemistry, occurrence and functionality of plant N-alkylamides.","

Ethnopharmacological relevance

N-Alkylamides (NAAs) are a promising group of bioactive compounds, which are anticipated to act as important lead compounds for plant protection and biocidal products, functional food, cosmeceuticals and drugs in the next decennia. These molecules, currently found in more than 25 plant families and with a wide structural diversity, exert a variety of biological-pharmacological effects and are of high ethnopharmacological importance. However, information is scattered in literature, with different, often unstandardized, pharmacological methodologies being used. Therefore, a comprehensive NAA database (acronym: Alkamid) was constructed to collect the available structural and functional NAA data, linked to their occurrence in plants (family, tribe, species, genus).

Materials and methods

For loading information in the database, literature data was gathered over the period 1950-2010, by using several search engines. In order to represent the collected information about NAAs, the plants in which they occur and the functionalities for which they have been examined, a relational database is constructed and implemented on a MySQL back-end.

Results

The database is supported by describing the NAA plant-, functional- and chemical-space. The chemical space includes a NAA classification, according to their fatty acid and amine structures.

Conclusions

The Alkamid database (publicly available on the website http://alkamid.ugent.be/) is not only a central information point, but can also function as a useful tool to prioritize the NAA choice in the evaluation of their functionality, to perform data mining leading to quantitative structure-property relationships (QSPRs), functionality comparisons, clustering, plant biochemistry and taxonomic evaluations.",2012-05-30 +26543175,A hidden Markov random field-based Bayesian method for the detection of long-range chromosomal interactions in Hi-C data.,"

Motivation

Advances in chromosome conformation capture and next-generation sequencing technologies are enabling genome-wide investigation of dynamic chromatin interactions. For example, Hi-C experiments generate genome-wide contact frequencies between pairs of loci by sequencing DNA segments ligated from loci in close spatial proximity. One essential task in such studies is peak calling, that is, detecting non-random interactions between loci from the two-dimensional contact frequency matrix. Successful fulfillment of this task has many important implications including identifying long-range interactions that assist interpreting a sizable fraction of the results from genome-wide association studies. The task - distinguishing biologically meaningful chromatin interactions from massive numbers of random interactions - poses great challenges both statistically and computationally. Model-based methods to address this challenge are still lacking. In particular, no statistical model exists that takes the underlying dependency structure into consideration.

Results

In this paper, we propose a hidden Markov random field (HMRF) based Bayesian method to rigorously model interaction probabilities in the two-dimensional space based on the contact frequency matrix. By borrowing information from neighboring loci pairs, our method demonstrates superior reproducibility and statistical power in both simulation studies and real data analysis.

Availability and implementation

The Source codes can be downloaded at: http://www.unc.edu/∼yunmli/HMRFBayesHiC CONTACT: ming.hu@nyumc.org or yunli@med.unc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-04 +22985190,Accuracy validation of adjuvant! online in Taiwanese breast cancer patients--a 10-year analysis.,"

Background

Adjuvant! Online ( http://www.adjuvantonline.com) is an Internet-based software program that allows clinicians to make predictions about the benefits of adjuvant therapy and 10-year survival probability for early-stage breast cancer patients. This model has been validated in Western countries such as the United States, United Kingdom, Canada, Germany, and Holland. The aim of our study was to investigate the performance and accuracy of Adjuvant! Online in a cohort of Taiwanese breast cancer patients.

Methods

Data on the prognostic factors and clinical outcomes of 559 breast cancer patients diagnosed at the National Cheng Kung University Hospital in Tainan between 1992 and 2001 were enrolled in the study. Comprehensive demographic, clinical outcome data, and adjuvant treatment data were entered into the Adjuvant! Online program. The outcome prediction at 10 years was compared with the observed and predicted outcomes using Adjuvant! Online.

Results

Comparison between low- and high-risk breast cancer patient subgroups showed significant differences in tumor grading, tumor size, and lymph node status (p < 0.0001). The mean 10-year predicted death probability in 559 patients was 19.44%, and the observed death probability was 15.56%. Comparison with the Adjuvant! Online-predicted breast cancer-specific survival (BCSS) showed significant differences in the whole cohort (p < 0.001). In the low-risk subgroup, the predicted and observed outcomes did not differ significantly (3.69% and 3.85%, respectively). In high-risk patients, Adjuvant! Online overestimated breast cancer-specific survival (p = 0.016); the predicted and observed outcomes were 21.99% and 17.46%, respectively.

Conclusions

Adjuvant! Online accurately predicted 10-year outcomes and assisted in decision making about adjuvant treatment in low-risk breast cancer patients in our study, although the results were less accurate in the high-risk subgroup. Development of a prognostic program based on a national database should be considered, especially for high-risk breast cancer patients in Taiwan.",2012-09-17 +26430619,Korea Community Health Survey Data Profiles.,"In 2008, Korea Centers for Disease Control and Prevention initiated the first nationwide survey, Korea Community Health Survey (KCHS), to provide data that could be used to plan, implement, monitor, and evaluate community health promotion and disease prevention programs. This community-based cross-sectional survey has been conducted by 253 community health centers, 35 community universities, and 1500 interviewers. The KCHS standardized questionnaire was developed jointly by the Korea Centers for Disease Control and Prevention staff, a working group of health indicators standardization subcommittee, and 16 metropolitan cities and provinces with 253 regional sites. The questionnaire covers a variety of topics related to health behaviors and prevention, which is used to assess the prevalence of personal health practices and behaviors related to the leading causes of disease, including smoking, alcohol use, drinking and driving, high blood pressure control, physical activity, weight control, quality of life (European Quality of Life-5 Dimensions, European Quality of Life-Visual Analogue Scale, Korean Instrumental Activities of Daily Living ), medical service, accident, injury, etc. The KCHS was administered by trained interviewers, and the quality control of the KCHS was improved by the introduction of a computer-assisted personal interview in 2010. The KCHS data allow a direct comparison of the differences of health issues among provinces. Furthermore, the provinces can use these data for their own cost-effective health interventions to improve health promotion and disease prevention. For users and researchers throughout the world, microdata (in the form of SAS files) and analytic guidelines can be downloaded from the KCHS website (http://KCHS.cdc.go.kr/) in Korean.",2015-06-10 +26633997,High-quality and universal empirical atomic charges for chemoinformatics applications.,"

Background

Partial atomic charges describe the distribution of electron density in a molecule and therefore provide clues to the chemical behaviour of molecules. Recently, these charges have become popular in chemoinformatics, as they are informative descriptors that can be utilised in pharmacophore design, virtual screening, similarity searches etc. Especially conformationally-dependent charges perform very successfully. In particular, their fast and accurate calculation via the Electronegativity Equalization Method (EEM) seems very promising for chemoinformatics applications. Unfortunately, published EEM parameter sets include only parameters for basic atom types and they often miss parameters for halogens, phosphorus, sulphur, triple bonded carbon etc. Therefore their applicability for drug-like molecules is limited.

Results

We have prepared six EEM parameter sets which enable the user to calculate EEM charges in a quality comparable to quantum mechanics (QM) charges based on the most common charge calculation schemes (i.e., MPA, NPA and AIM) and a robust QM approach (HF/6-311G, B3LYP/6-311G). The calculated EEM parameters exhibited very good quality on a training set ([Formula: see text]) and also on a test set ([Formula: see text]). They are applicable for at least 95 % of molecules in key drug databases (DrugBank, ChEMBL, Pubchem and ZINC) compared to less than 60 % of the molecules from these databases for which currently used EEM parameters are applicable.

Conclusions

We developed EEM parameters enabling the fast calculation of high-quality partial atomic charges for almost all drug-like molecules. In parallel, we provide a software solution for their easy computation (http://ncbr.muni.cz/eem_parameters). It enables the direct application of EEM in chemoinformatics.",2015-12-02 +22415763,Rett networked database: an integrated clinical and genetic network of Rett syndrome databases.,"Rett syndrome (RTT) is a neurodevelopmental disorder with one principal phenotype and several distinct, atypical variants (Zappella, early seizure onset and congenital variants). Mutations in MECP2 are found in most cases of classic RTT but at least two additional genes, CDKL5 and FOXG1, can underlie some (usually variant) cases. There is only limited correlation between genotype and phenotype. The Rett Networked Database (http://www.rettdatabasenetwork.org/) has been established to share clinical and genetic information. Through an ""adaptor"" process of data harmonization, a set of 293 clinical items and 16 genetic items was generated; 62 clinical and 7 genetic items constitute the core dataset; 23 clinical items contain longitudinal information. The database contains information on 1838 patients from 11 countries (December 2011), with or without mutations in known genes. These numbers can expand indefinitely. Data are entered by a clinician in each center who supervises accuracy. This network was constructed to make available pooled international data for the study of RTT natural history and genotype-phenotype correlation and to indicate the proportion of patients with specific clinical features and mutations. We expect that the network will serve for the recruitment of patients into clinical trials and for developing quality measures to drive up standards of medical management.",2012-04-13 +27153575,mDCC_tools: characterizing multi-modal atomic motions in molecular dynamics trajectories.,"

Unlabelled

We previously reported the multi-modal Dynamic Cross Correlation (mDCC) method for analyzing molecular dynamics trajectories. This method quantifies the correlation coefficients of atomic motions with complex multi-modal behaviors by using a Bayesian-based pattern recognition technique that can effectively capture transiently formed, unstable interactions. Here, we present an open source toolkit for performing the mDCC analysis, including pattern recognitions, complex network analyses and visualizations. We include a tutorial document that thoroughly explains how to apply this toolkit for an analysis, using the example trajectory of the 100 ns simulation of an engineered endothelin-1 peptide dimer.

Availability and implementation

The source code is available for free at http://www.protein.osaka-u.ac.jp/rcsfp/pi/mdcctools/, implemented in C ++ and Python, and supported on Linux.

Contact

kota.kasahara@protein.osaka-u.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-07 +25965340,Convex clustering: an attractive alternative to hierarchical clustering.,"The primary goal in cluster analysis is to discover natural groupings of objects. The field of cluster analysis is crowded with diverse methods that make special assumptions about data and address different scientific aims. Despite its shortcomings in accuracy, hierarchical clustering is the dominant clustering method in bioinformatics. Biologists find the trees constructed by hierarchical clustering visually appealing and in tune with their evolutionary perspective. Hierarchical clustering operates on multiple scales simultaneously. This is essential, for instance, in transcriptome data, where one may be interested in making qualitative inferences about how lower-order relationships like gene modules lead to higher-order relationships like pathways or biological processes. The recently developed method of convex clustering preserves the visual appeal of hierarchical clustering while ameliorating its propensity to make false inferences in the presence of outliers and noise. The solution paths generated by convex clustering reveal relationships between clusters that are hidden by static methods such as k-means clustering. The current paper derives and tests a novel proximal distance algorithm for minimizing the objective function of convex clustering. The algorithm separates parameters, accommodates missing data, and supports prior information on relationships. Our program CONVEXCLUSTER incorporating the algorithm is implemented on ATI and nVidia graphics processing units (GPUs) for maximal speed. Several biological examples illustrate the strengths of convex clustering and the ability of the proximal distance algorithm to handle high-dimensional problems. CONVEXCLUSTER can be freely downloaded from the UCLA Human Genetics web site at http://www.genetics.ucla.edu/software/.",2015-05-12 +27153583,FRODOCK 2.0: fast protein-protein docking server.,

Unlabelled

The prediction of protein-protein complexes from the structures of unbound components is a challenging and powerful strategy to decipher the mechanism of many essential biological processes. We present a user-friendly protein-protein docking server based on an improved version of FRODOCK that includes a complementary knowledge-based potential. The web interface provides a very effective tool to explore and select protein-protein models and interactively screen them against experimental distance constraints. The competitive success rates and efficiency achieved allow the retrieval of reliable potential protein-protein binding conformations that can be further refined with more computationally demanding strategies.

Availability and implementation

The server is free and open to all users with no login requirement at http://frodock.chaconlab.org

Contact

pablo@chaconlab.org

Supplementary information

Supplementary data are available at Bioinformatics online.,2016-03-12 +25417204,"Genomation: a toolkit to summarize, annotate and visualize genomic intervals.","

Unlabelled

Biological insights can be obtained through computational integration of genomics data sets consisting of diverse types of information. The integration is often hampered by a large variety of existing file formats, often containing similar information, and the necessity to use complicated tools to achieve the desired results. We have built an R package, genomation, to expedite the extraction of biological information from high throughput data. The package works with a variety of genomic interval file types and enables easy summarization and annotation of high throughput data sets with given genomic annotations.

Availability and implementation

The software is currently distributed under MIT artistic license and freely available at http://bioinformatics.mdc-berlin.de/genomation, and through the Bioconductor framework.",2014-11-21 +24988444,Twenty years of high-resolution sea surface temperature imagery around Australia: inter-annual and annual variability.,"The physical climate defines a significant portion of the habitats in which biological communities and species reside. It is important to quantify these environmental conditions, and how they have changed, as this will inform future efforts to study many natural systems. In this article, we present the results of a statistical summary of the variability in sea surface temperature (SST) time-series data for the waters surrounding Australia, from 1993 to 2013. We partition variation in the SST series into annual trends, inter-annual trends, and a number of components of random variation. We utilise satellite data and validate the statistical summary from these data to summaries of data from long-term monitoring stations and from the global drifter program. The spatially dense results, available as maps from the Australian Oceanographic Data Network's data portal (http://www.cmar.csiro.au/geonetwork/srv/en/metadata.show?id=51805), show clear trends that associate with oceanographic features. Noteworthy oceanographic features include: average warming was greatest off southern West Australia and off eastern Tasmania, where the warming was around 0.6°C per decade for a twenty year study period, and insubstantial warming in areas dominated by the East Australian Current, but this area did exhibit high levels of inter-annual variability (long-term trend increases and decreases but does not increase on average). The results of the analyses can be directly incorporated into (biogeographic) models that explain variation in biological data where both biological and environmental data are on a fine scale.",2014-07-02 +26635139,An integrative somatic mutation analysis to identify pathways linked with survival outcomes across 19 cancer types.,"

Motivation

Identification of altered pathways that are clinically relevant across human cancers is a key challenge in cancer genomics. Precise identification and understanding of these altered pathways may provide novel insights into patient stratification, therapeutic strategies and the development of new drugs. However, a challenge remains in accurately identifying pathways altered by somatic mutations across human cancers, due to the diverse mutation spectrum. We developed an innovative approach to integrate somatic mutation data with gene networks and pathways, in order to identify pathways altered by somatic mutations across cancers.

Results

We applied our approach to The Cancer Genome Atlas (TCGA) dataset of somatic mutations in 4790 cancer patients with 19 different types of tumors. Our analysis identified cancer-type-specific altered pathways enriched with known cancer-relevant genes and targets of currently available drugs. To investigate the clinical significance of these altered pathways, we performed consensus clustering for patient stratification using member genes in the altered pathways coupled with gene expression datasets from 4870 patients from TCGA, and multiple independent cohorts confirmed that the altered pathways could be used to stratify patients into subgroups with significantly different clinical outcomes. Of particular significance, certain patient subpopulations with poor prognosis were identified because they had specific altered pathways for which there are available targeted therapies. These findings could be used to tailor and intensify therapy in these patients, for whom current therapy is suboptimal.

Availability and implementation

The code is available at: http://www.taehyunlab.org

Contact

jhcheong@yuhs.ac or taehyun.hwang@utsouthwestern.edu or taehyun.cs@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-03 +25435002,Network-based modular latent structure analysis.,"

Background

High-throughput expression data, such as gene expression and metabolomics data, exhibit modular structures. Groups of features in each module follow a latent factor model, while between modules, the latent factors are quasi-independent. Recovering the latent factors can shed light on the hidden regulation patterns of the expression. The difficulty in detecting such modules and recovering the latent factors lies in the high dimensionality of the data, and the lack of knowledge in module membership.

Methods

Here we describe a method based on community detection in the co-expression network. It consists of inference-based network construction, module detection, and interacting latent factor detection from modules.

Results

In simulations, the method outperformed projection-based modular latent factor discovery when the input signals were not Gaussian. We also demonstrate the method's value in real data analysis.

Conclusions

The new method nMLSA (network-based modular latent structure analysis) is effective in detecting latent structures, and is easy to extend to non-linear cases. The method is available as R code at http://web1.sph.emory.edu/users/tyu8/nMLSA/.",2014-11-13 +27153676,Isoform-level ribosome occupancy estimation guided by transcript abundance with Ribomap.,"

Unlabelled

: Ribosome profiling is a recently developed high-throughput sequencing technique that captures approximately 30 bp long ribosome-protected mRNA fragments during translation. Because of alternative splicing and repetitive sequences, a ribosome-protected read may map to many places in the transcriptome, leading to discarded or arbitrary mappings when standard approaches are used. We present a technique and software that addresses this problem by assigning reads to potential origins proportional to estimated transcript abundance. This yields a more accurate estimate of ribosome profiles compared with a naïve mapping.

Availability and implementation

Ribomap is available as open source at http://www.cs.cmu.edu/∼ckingsf/software/ribomap

Contact

carlk@cs.cmu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-15 +21438073,"An informatics project and online ""Knowledge Centre"" supporting modern genotype-to-phenotype research.","Explosive growth in the generation of genotype-to-phenotype (G2P) data necessitates a concerted effort to tackle the logistical and informatics challenges this presents. The GEN2PHEN Project represents one such effort, with a broad strategy of uniting disparate G2P resources into a hybrid centralized-federated network. This is achieved through a holistic strategy focussed on three overlapping areas: data input standards and pipelines through which to submit and collect data (data in); federated, independent, extendable, yet interoperable database platforms on which to store and curate widely diverse datasets (data storage); and data formats and mechanisms with which to exchange, combine, and extract data (data exchange and output). To fully leverage this data network, we have constructed the ""G2P Knowledge Centre"" (http://www.gen2phen.org). This central platform provides holistic searching of the G2P data domain allied with facilities for data annotation and user feedback, access to extensive G2P and informatics resources, and tools for constructing online working communities centered on the G2P domain. Through the efforts of GEN2PHEN, and through combining data with broader community-derived knowledge, the Knowledge Centre opens up exciting possibilities for organizing, integrating, sharing, and interpreting new waves of G2P data in a collaborative fashion.",2011-03-22 +26420835,Ultra-fast local-haplotype variant calling using paired-end DNA-sequencing data reveals somatic mosaicism in tumor and normal blood samples.,"Somatic mosaicism refers to the existence of somatic mutations in a fraction of somatic cells in a single biological sample. Its importance has mainly been discussed in theory although experimental work has started to emerge linking somatic mosaicism to disease diagnosis. Through novel statistical modeling of paired-end DNA-sequencing data using blood-derived DNA from healthy donors as well as DNA from tumor samples, we present an ultra-fast computational pipeline, LocHap that searches for multiple single nucleotide variants (SNVs) that are scaffolded by the same reads. We refer to scaffolded SNVs as local haplotypes (LH). When an LH exhibits more than two genotypes, we call it a local haplotype variant (LHV). The presence of LHVs is considered evidence of somatic mosaicism because a genetically homogeneous cell population will not harbor LHVs. Applying LocHap to whole-genome and whole-exome sequence data in DNA from normal blood and tumor samples, we find wide-spread LHVs across the genome. Importantly, we find more LHVs in tumor samples than in normal samples, and more in older adults than in younger ones. We confirm the existence of LHVs and somatic mosaicism by validation studies in normal blood samples. LocHap is publicly available at http://www.compgenome.org/lochap.",2015-09-29 +27762428,De novo synthesis and functional analysis of the phosphatase-encoding gene acI-B of uncultured Actinobacteria from Lake Stechlin (NE Germany).,"The National Center for Biotechnology Information [http://www.ncbi.nlm.nih.gov/guide/taxonomy/] database enlists more than 15,500 bacterial species. But this also includes a plethora of uncultured bacterial representations. Owing to their metabolism, they directly influence biogeochemical cycles, which underscores the the important status of bacteria on our planet. To study the function of a gene from an uncultured bacterium, we have undertaken a de novo gene synthesis approach. Actinobacteria of the acI-B subcluster are important but yet uncultured members of the bacterioplankton in temperate lakes of the northern hemisphere such as oligotrophic Lake Stechlin (NE Germany). This lake is relatively poor in phosphate (P) and harbors on average ~1.3 x 10 6 bacterial cells/ml, whereby Actinobacteria of the ac-I lineage can contribute to almost half of the entire bacterial community depending on seasonal variability. Single cell genome analysis of Actinobacterium SCGC AB141-P03, a member of the acI-B tribe in Lake Stechlin has revealed several phosphate-metabolizing genes. The genome of acI-B Actinobacteria indicates potential to degrade polyphosphate compound. To test for this genetic potential, we targeted the exoP-annotated gene potentially encoding polyphosphatase and synthesized it artificially to examine its biochemical role. Heterologous overexpression of the gene in Escherichia coli and protein purification revealed phosphatase activity. Comparative genome analysis suggested that homologs of this gene should be also present in other Actinobacteria of the acI lineages. This strategic retention of specialized genes in their genome provides a metabolic advantage over other members of the aquatic food web in a P-limited ecosystem. [Int Microbiol 2016; 19(1):39-47].",2015-12-01 +26663055,[Analysis of genomic copy number variations in two sisters with primary amenorrhea and hyperandrogenism].,"

Objective

To analyze genomic copy number variations (CNVs) in two sisters with primary amenorrhea and hyperandrogenism.

Methods

G-banding was performed for karyotype analysis. The whole genome of the two sisters were scanned and analyzed by array-based comparative genomic hybridization (array-CGH). The results were confirmed with real-time quantitative PCR (RT-qPCR).

Results

No abnormality was found by conventional G-banded chromosome analysis. Array-CGH has identified 11 identical CNVs from the sisters which, however, overlapped with CNVs reported by the Database of Genomic Variants (http://projects.tcag.ca/variation/). Therefore, they are likely to be benign. In addition, a -8.44 Mb 9p11.1-p13.1 duplication (38,561,587-47,002,387 bp, hg18) and a -80.9 kb 4q13.2 deletion (70,183,990-70,264,889 bp, hg18) were also detected in the elder and younger sister, respectively. The relationship between such CNVs and primary amenorrhea and hyperandrogenism was however uncertain. RT-qPCR results were in accordance with array-CGH.

Conclusion

Two CNVs were detected in two sisters by array-CGH, for which further studies are needed to clarify their correlation with primary amenorrhea and hyperandrogenism.",2015-12-01 +25381023,Adaptation of the International Affective Picture System (IAPS) for European Portuguese.,"This study presents the results of the adaptation of the International Affective Picture System (IAPS) for European Portuguese (EP). Following the original procedure of Lang et al., 2000 native speakers of EP rated the 1,182 pictures of the last version of the IAPS set on the three affective dimensions of valence, arousal, and dominance, using the Self-Assessment Manikin (SAM). Results showed that the normative values of the IAPS for EP are properly distributed in the affective space of valence and arousal, showing the typical boomerang-shaped distribution observed in previous studies. Results also point to important differences in the way Portuguese females and males react to affective pictures that should be taken into consideration when planning and conducting research with Portuguese samples. Furthermore, the results from the cross-cultural comparisons between the EP ratings and the ratings from the American, Spanish, Brazilian, Belgian, Chilean, Indian, and Bosnian-Herzegovinian standardizations, showed that in spite of the fact that IAPS stimuli elicited affective responses that are similar across countries and cultures (at least in Western cultures), there are differences in the way Portuguese individuals react to IAPS pictures that strongly recommend the use of the normative values presented in this work. They can be downloaded as a supplemental archive at http://brm.psychonomic-journals.org/content/supplemental or at http://p-pal.di.uminho.pt/about/databases.",2015-12-01 +26624019,orthoFind Facilitates the Discovery of Homologous and Orthologous Proteins.,"Finding homologous and orthologous protein sequences is often the first step in evolutionary studies, annotation projects, and experiments of functional complementation. Despite all currently available computational tools, there is a requirement for easy-to-use tools that provide functional information. Here, a new web application called orthoFind is presented, which allows a quick search for homologous and orthologous proteins given one or more query sequences, allowing a recurrent and exhaustive search against reference proteomes, and being able to include user databases. It addresses the protein multidomain problem, searching for homologs with the same domain architecture, and gives a simple functional analysis of the results to help in the annotation process. orthoFind is easy to use and has been proven to provide accurate results with different datasets. Availability: http://www.bioinfocabd.upo.es/orthofind/.",2015-12-01 +25928379,Identification of sample-specific regulations using integrative network level analysis.,"

Background

Histologically similar tumors even from the same anatomical position may still show high variability at molecular level hindering analysis of genome-wide data. Leveling the analysis to a gene regulatory network instead of focusing on single genes has been suggested to overcome the heterogeneity issue although the majority of the network methods require large datasets. Network methods that are able to function at a single sample level are needed to overcome the heterogeneity and sample size issues.

Methods

We present a novel network method, Differentially Expressed Regulation Analysis (DERA) that integrates expression data to biological network information at a single sample level. The sample-specific networks are subsequently used to discover samples with similar molecular functions by identification of regulations that are shared between samples or are specific for a subgroup.

Results

We applied DERA to identify key regulations in triple negative breast cancer (TNBC), which is characterized by lack of estrogen receptor, progesterone receptor and HER2 expression and has poorer prognosis than the other breast cancer subtypes. DERA identified 110 core regulations consisting of 28 disconnected subnetworks for TNBC. These subnetworks are related to oncogenic activity, proliferation, cancer survival, invasiveness and metastasis. Our analysis further revealed 31 regulations specific for TNBC as compared to the other breast cancer subtypes and thus form a basis for understanding TNBC. We also applied DERA to high-grade serous ovarian cancer (HGS-OvCa) data and identified several common regulations between HGS-OvCa and TNBC. The performance of DERA was compared to two pathway analysis methods GSEA and SPIA and our results shows better reproducibility and higher sensitivity in a small sample set.

Conclusions

We present a novel method called DERA to identify subnetworks that are similarly active for a group of samples. DERA was applied to breast cancer and ovarian cancer data showing our method is able to identify reliable and potentially important regulations with high reproducibility. R package is available at http://csbi.ltdk.helsinki.fi/pub/czliu/DERA/.",2015-04-28 +25872185,Network-constrained forest for regularized classification of omics data.,"Contemporary molecular biology deals with wide and heterogeneous sets of measurements to model and understand underlying biological processes including complex diseases. Machine learning provides a frequent approach to build such models. However, the models built solely from measured data often suffer from overfitting, as the sample size is typically much smaller than the number of measured features. In this paper, we propose a random forest-based classifier that reduces this overfitting with the aid of prior knowledge in the form of a feature interaction network. We illustrate the proposed method in the task of disease classification based on measured mRNA and miRNA profiles complemented by the interaction network composed of the miRNA-mRNA target relations and mRNA-mRNA interactions corresponding to the interactions between their encoded proteins. We demonstrate that the proposed network-constrained forest employs prior knowledge to increase learning bias and consequently to improve classification accuracy, stability and comprehensibility of the resulting model. The experiments are carried out in the domain of myelodysplastic syndrome that we are concerned about in the long term. We validate our approach in the public domain of ovarian carcinoma, with the same data form. We believe that the idea of a network-constrained forest can straightforwardly be generalized towards arbitrary omics data with an available and non-trivial feature interaction network. The proposed method is publicly available in terms of miXGENE system (http://mixgene.felk.cvut.cz), the workflow that implements the myelodysplastic syndrome experiments is presented as a dedicated case study.",2015-04-11 +26912953,Mammalian Mitochondrial ncRNA Database.,"

Unlabelled

Mammalian Mitochondrial ncRNA is a web-based database, which provides specific information on non-coding RNA in mammals. This database includes easy searching, comparing with BLAST and retrieving information on predicted structure and its function about mammalian ncRNAs.

Availability

The database is available for free at http://www.iitm.ac.in/bioinfo/mmndb/.",2015-11-30 +26912951,RDIS: The Rabies Disease Information System.,"

Unlabelled

Rabies is a deadly viral disease causing acute inflammation or encephalitis of the brain in human beings and other mammals. Therefore, it is of interest to collect information related to the disease from several sources including known literature databases for further analysis and interpretation. Hence, we describe the development of a database called the Rabies Disease Information System (RDIS) for this purpose. The online database describes the etiology, epidemiology, pathogenesis and pathology of the disease using diagrammatic representations. It provides information on several carriers of the rabies viruses like dog, bat, fox and civet, and their distributions around the world. Information related to the urban and sylvatic cycles of transmission of the virus is also made available. The database also contains information related to available diagnostic methods and vaccines for human and other animals. This information is of use to medical, veterinary and paramedical practitioners, students, researchers, pet owners, animal lovers, livestock handlers, travelers and many others.

Availability

The database is available for free http://rabies.mscwbif.org/home.html.",2015-11-30 +23516335,DEBDOM: Database Exploring Banana Diversity of Manipur.,"

Unlabelled

: Being poor man's apple, banana has a wide popularity worldwide. It's one of the important horticultural crops used irrespective of rich and poor alike. Manipur along with the other states of Northeast India harboured with plenty of wild and cultivated species of banana that are not fully explored. A data base named DEBDOM has been developed here describing the diversity of banana resources of Manipur and it comprises twenty eight genotypes of Musaceae. The database DEBDOM provides a sophisticated web base access to the details of the taxonomy, morphological characteristics, utility as well as sites of collection of Musa genotypes, and it would have contribute as a potential gene pool sources for the conservation, sustainability as well as for crop improvement in the future breeding programmes.

Availability

http://ibsd.gov.in/debdom/",2013-03-02 +26484274,Piwi proteins and piRNAs in mammalian oocytes and early embryos: From sample to sequence.,"The role of the Piwi/piRNA pathway during mammalian oogenesis has remained enigmatic thus far, especially since experiments with Piwi knockout mice did not reveal any phenotypic defects in female individuals. This is in striking contrast with results obtained from other species including flies and zebrafish. In mouse oocytes, however, only low levels of piRNAs are found and they are not required for their function. We recently demonstrated dynamic expression of PIWIL1, PIWIL2, and PIWIL3 during mammalian oogenesis and early embryogenesis. In addition, small RNA analysis of human, crab-eating macaque and cattle revealed that piRNAs are also expressed in the female germline and closely resemble piRNAs from testis. Here, we thoroughly describe the experimental and computational methods that we applied for the generation, processing and analyses of next generation sequencing (NGS) data associated with our study on Piwi proteins and piRNAs in mammalian oocytes and embryos (Roovers et al., 2015). The complete sequence data is available at NCBI's Gene Expression Omnibus (http://www.ncbi.nlm.nih.gov/geo/) under the accession GSE64942.",2015-07-10 +21755952,Construction and test of ligand decoy sets using MDock: community structure-activity resource benchmarks for binding mode prediction.,"Two sets of ligand binding decoys have been constructed for the community structure-activity resource (CSAR) benchmark by using the MDock and DOCK programs for rigid- and flexible-ligand docking, respectively. The decoys generated for each complex in the benchmark thoroughly cover the binding site and also contain a certain number of near-native binding modes. A few scoring functions have been evaluated using the ligand binding decoy sets for their abilities of predicting near-native binding modes. Among them, ITScore achieved a success rate of 86.7% for the rigid-ligand decoys and 79.7% for the flexible-ligand decoys, under the common definition of a successful prediction as root-mean-square deviation <2.0 Å from the native structure if the top-scored binding mode was considered. The decoy sets may serve as benchmarks for binding mode prediction of a scoring function, which are available at the CSAR Web site ( http://www.csardock.org/).",2011-08-03 +23412913,Detecting sequence homology at the gene cluster level with MultiGeneBlast.,"The genes encoding many biomolecular systems and pathways are genomically organized in operons or gene clusters. With MultiGeneBlast, we provide a user-friendly and effective tool to perform homology searches with operons or gene clusters as basic units, instead of single genes. The contextualization offered by MultiGeneBlast allows users to get a better understanding of the function, evolutionary history, and practical applications of such genomic regions. The tool is fully equipped with applications to generate search databases from GenBank or from the user's own sequence data. Finally, an architecture search mode allows searching for gene clusters with novel configurations, by detecting genomic regions with any user-specified combination of genes. Sources, precompiled binaries, and a graphical tutorial of MultiGeneBlast are freely available from http://multigeneblast.sourceforge.net/.",2013-02-14 +22672646,Biomine: predicting links between biological entities using network models of heterogeneous databases.,"

Background

Biological databases contain large amounts of data concerning the functions and associations of genes and proteins. Integration of data from several such databases into a single repository can aid the discovery of previously unknown connections spanning multiple types of relationships and databases.

Results

Biomine is a system that integrates cross-references from several biological databases into a graph model with multiple types of edges, such as protein interactions, gene-disease associations and gene ontology annotations. Edges are weighted based on their type, reliability, and informativeness. We present Biomine and evaluate its performance in link prediction, where the goal is to predict pairs of nodes that will be connected in the future, based on current data. In particular, we formulate protein interaction prediction and disease gene prioritization tasks as instances of link prediction. The predictions are based on a proximity measure computed on the integrated graph. We consider and experiment with several such measures, and perform a parameter optimization procedure where different edge types are weighted to optimize link prediction accuracy. We also propose a novel method for disease-gene prioritization, defined as finding a subset of candidate genes that cluster together in the graph. We experimentally evaluate Biomine by predicting future annotations in the source databases and prioritizing lists of putative disease genes.

Conclusions

The experimental results show that Biomine has strong potential for predicting links when a set of selected candidate links is available. The predictions obtained using the entire Biomine dataset are shown to clearly outperform ones obtained using any single source of data alone, when different types of links are suitably weighted. In the gene prioritization task, an established reference set of disease-associated genes is useful, but the results show that under favorable conditions, Biomine can also perform well when no such information is available.The Biomine system is a proof of concept. Its current version contains 1.1 million entities and 8.1 million relations between them, with focus on human genetics. Some of its functionalities are available in a public query interface at http://biomine.cs.helsinki.fi, allowing searching for and visualizing connections between given biological entities.",2012-06-06 +27441584,Combining Human Computing and Machine Learning to Make Sense of Big (Aerial) Data for Disaster Response.,"Aerial imagery captured via unmanned aerial vehicles (UAVs) is playing an increasingly important role in disaster response. Unlike satellite imagery, aerial imagery can be captured and processed within hours rather than days. In addition, the spatial resolution of aerial imagery is an order of magnitude higher than the imagery produced by the most sophisticated commercial satellites today. Both the United States Federal Emergency Management Agency (FEMA) and the European Commission's Joint Research Center (JRC) have noted that aerial imagery will inevitably present a big data challenge. The purpose of this article is to get ahead of this future challenge by proposing a hybrid crowdsourcing and real-time machine learning solution to rapidly process large volumes of aerial data for disaster response in a time-sensitive manner. Crowdsourcing can be used to annotate features of interest in aerial images (such as damaged shelters and roads blocked by debris). These human-annotated features can then be used to train a supervised machine learning system to learn to recognize such features in new unseen images. In this article, we describe how this hybrid solution for image analysis can be implemented as a module (i.e., Aerial Clicker) to extend an existing platform called Artificial Intelligence for Disaster Response (AIDR), which has already been deployed to classify microblog messages during disasters using its Text Clicker module and in response to Cyclone Pam, a category 5 cyclone that devastated Vanuatu in March 2015. The hybrid solution we present can be applied to both aerial and satellite imagery and has applications beyond disaster response such as wildlife protection, human rights, and archeological exploration. As a proof of concept, we recently piloted this solution using very high-resolution aerial photographs of a wildlife reserve in Namibia to support rangers with their wildlife conservation efforts (SAVMAP project, http://lasig.epfl.ch/savmap ). The results suggest that the platform we have developed to combine crowdsourcing and machine learning to make sense of large volumes of aerial images can be used for disaster response.",2016-02-26 +23668932,The zebrafish CreZoo: an easy-to-handle database for novel CreER(T2)-driver lines.,"We report a new open access database, the zebrafish CreZoo ( http://crezoo.crt-dresden.de ), which contains novel CreER(T2)-driver lines that express Cre fused to the mutated human ligand-binding domain of the estrogen receptor (CreER(T2)) in several tissues. Recently, the conditional Cre/loxP technology has been added to the toolbox for the precise manipulation of the zebrafish genome, but currently the number of CreER(T2)-driver lines is limited. To enlarge the pool of existing CreER(T2)-driver lines, we conducted a genome-wide screen using a gene trap cassette comprising a splice acceptor and an mCherry-tagged variant of CreER(T2). All molecular and expression data obtained in this screen are summarized in the CreZoo database, which currently comprises an inventory of about 47 Cre-driver lines expressing CreER(T2) in a cell- and tissue-specific manner during development and adulthood. Combined with other Cre-dependent effector lines, the CreZoo will be a great tool to manipulate the zebrafish genome.",2013-05-13 +26019177,"INGA: protein function prediction combining interaction networks, domain assignments and sequence similarity.","Identifying protein functions can be useful for numerous applications in biology. The prediction of gene ontology (GO) functional terms from sequence remains however a challenging task, as shown by the recent CAFA experiments. Here we present INGA, a web server developed to predict protein function from a combination of three orthogonal approaches. Sequence similarity and domain architecture searches are combined with protein-protein interaction network data to derive consensus predictions for GO terms using functional enrichment. The INGA server can be queried both programmatically through RESTful services and through a web interface designed for usability. The latter provides output supporting the GO term predictions with the annotating sequences. INGA is validated on the CAFA-1 data set and was recently shown to perform consistently well in the CAFA-2 blind test. The INGA web server is available from URL: http://protein.bio.unipd.it/inga.",2015-05-27 +27928018,Expression of the Antisense-to-Latency Transcript Long Noncoding RNA in Kaposi's Sarcoma-Associated Herpesvirus. ,"The regulation of latency is central to herpesvirus biology. Recent transcriptome-wide surveys have uncovered evidence for promiscuous transcription across the entirety of the Kaposi's sarcoma-associated herpesvirus (KSHV) genome and postulated the existence of multiple viral long noncoding RNAs (lncRNAs). Next-generation sequencing studies are highly dependent on the specific experimental approach and particular algorithms of analysis and therefore benefit from independent confirmation of the results. The antisense-to-latency transcript (ALT) lncRNA was discovered by genome-tiling microarray (Chandriani et al., J Virol 86:7934-7942, 2010, https://doi.org/10.1128/JVI.00645-10). To characterize ALT in detail, we physically isolated this lncRNA by a strand-specific hybrid capture assay and then employed transcriptome sequencing and novel reverse transcription-PCR (RT-PCR) assays to distinguish all RNA species in the KSHV latency region. These methods confirm that ALT initiates at positions 120739/121012 and encodes a single splice site, which is shared with the 3'-coterminal K14-vGPCR/ORF74 mRNA, terminating at 130873 (GenBank accession number GQ994935), resulting in an ∼10,000-nucleotide transcript. No shorter ALT isoforms were identified. This study also identified a novel intron within the LANA 5' untranslated region using a splice acceptor at 127888. In summary, ALT joins PAN/nut1/T1.1 as a bona fide lncRNA of KSHV with potentially important roles in viral gene regulation and pathogenesis. Increasing data support the importance of noncoding RNAs (ncRNAs), including microRNAs (miRNAs) and lncRNAs, which have been shown to exert critical regulatory functions without coding for recognizable proteins. Defining the sequences of these ncRNAs is essential for future studies aiming to functionally characterize a specific ncRNA. Most lncRNA studies are highly dependent on high-throughput sequencing and bioinformatic analyses, few studies follow up on the initial predictions, and analyses are at times discordant. The manuscript characterizes one key viral lncRNA, ALT, by physically isolating ALT and by a sequencing-independent assay. It provides for a simple assay to monitor lncRNA expression in experimental and clinical samples. ALT is expressed antisense to the major viral latency transcripts encoding LANA as well as the viral miRNAs and thus has the potential to regulate this key part of the viral life cycle.",2017-01-31 +27830001,PLK1 promotes epithelial-mesenchymal transition and metastasis of gastric carcinoma cells.,"Cancer cell epithelial-mesenchymal transition (EMT) is the crucial event for cancer progression and plays a vital role in the metastasis of cancer cells. Activation of Polo-like kinase 1 (PLK1) signaling has been implicated as the critical event in several tumor metastasis and EMT, however, whether PLK1 participates in gastric carcinoma metastasis and EMT still remains unclear. For this study, we elucidated the potential physiological function of PLK1 in the metastasis of gastric tumors, as well its distinct role in cells EMT and subsequently determined the mechanism involved in PLK1 regulated. Immunoblotting assay and Oncomine data mining analysis indicated that PLK1 expression was highly up-regulated in gastric carcinoma. Kaplan-Meier survival analysis for the relationship between survival outcomes and PLK1 expression in gastric carcinoma was performed with an online Kaplan-Meier plotter (http://kmplot.com/analysis/). Over-expression of PLK1 in gastric cancer cells SGC-7901 and MKN-28 significantly promoted cells profound morphological changes and enhanced metastatic ability of tumor cells. On the contrary, silencing of PLK1 induced mesenchymal epithelial transition (MET)-like morphological and inhibited the metastatic process. Furthermore, we found that the metastatic characters promoting effects of PLK1 in gastric carcinoma was related to the activation of protein kinase B (AKT). Our mechanistic investigations revealed that AKT inhibition reversed PLK1-induced EMT, blocked gastric carcinoma cells invasiveness and metastasis. Additionally, over-expression of AKT promoted the migratory and invasion ability of the two cell lines, which was disrupted by PLK1 down-regulation. To conclude, our findings demonstrate that PLK1 accelerates the metastasis and epithelial-mesenchyme transition of gastric cancer cells through regulating the AKT pathway.",2016-10-15 +27833736,The 1st Baltic Osseointegration Academy and Lithuanian University of Health Sciences Consensus Conference 2016. Summary and Consensus Statements: Group II - Peri-Implantitis Diagnostics and Decision Tree.,"

Introduction

The task of Group 2 was to review and update the existing data concerning clinical and genetic methods of diagnostics of peri-implantitis. Special interest was paid to the peri-implant crevicular fluid (PICF) overview including analysis of enzymes and biomarkers and microbial profiles from implants.

Material and methods

The main areas of interest were as follows: effect of smoking and history of periodontitis, prosthetic treatment mistakes, excess cement, overloading, general diseases influence on peri-implantitis development. The systematic review and/or meta-analysis were registered in PROSPERO, an international prospective register of systematic reviews: http://www.crd.york.ac.uk/PROSPERO/. The literature in the corresponding areas of interest was searched and reported using the PRISMA (Preferred Reporting Item for Systematic Review and Meta-Analysis) Statement: http://www.prisma-statement.org/. The method of preparation of systematic reviews of the literature based on comprehensive search strategies was discussed and standardized. The summary of the materials and methods employed by the authors in preparing the systematic review and/or meta-analysis is presented in Preface chapter.

Results

The results and conclusions of the review process are presented in the respective papers. The group's general commentaries, consensus statements, clinical recommendations and implications for research are presented in this article.",2016-07-01 +27747791,A Descriptive Study of Hot Aches: a Previously Unreported Winter Climbing Phenomenon.,"

Background

Hot aches, also known as the screaming barfies in North America, are a recognised phenomenon amongst winter climbers, assumed to be triggered by the reperfusion of cold peripheries which then rapidly progresses to a systemic vasodilatory syndrome. Symptoms experienced in the hands include pain, numbness and throbbing followed by systemic symptoms such as nausea, irritability, dizziness and in extreme cases a transient loss of vision and hearing. Despite being well known amongst the winter climbing community, there are no publications in the scientific literature characterising the hot aches.

Methods

A survey was posted online at http://www.ukclimbing.com between the dates of 28th September 2014 to 1st December 2014. Data was collected and analysed offline using Microsoft excel.

Results

This is a descriptive epidemiological study of UK winter climbers and their experience of hot aches. We found that hot aches are experienced by 96 % of these climbers. They generally last 1-5 min, and 75 % rate them as being 3-4 (out of 5) on a pain scale. The most common local symptoms are pain (87 %), throbbing (70 %) and tingling (52 %). The most common systemic symptoms are nausea (44 %), irritability (32 %) and dizziness (20 %). Twenty percent of climbers experience hot aches in locations other than their hands.

Conclusions

The hot aches are a highly predictable and consistent experience for almost all winter climbers. This study has characterised, for the first time, a recognised but previously unreported phenomenon that occurs in extreme winter climbers. The short- and long-term consequences are currently unknown and warrant further investigation.",2016-09-12 +27114926,BDA: A novel method for identifying defects in body-centered cubic crystals.,"The accurate and fast identification of crystallographic defects plays a key role for the analysis of atomistic simulation output data. For face-centered cubic (fcc) metals, most existing structure analysis tools allow for the direct distinction of common defects, such as stacking faults or certain low-index surfaces. For body-centered cubic (bcc) metals, on the other hand, a robust way to identify such defects is currently not easily available. We therefore introduce a new method for analyzing atomistic configurations of bcc metals, the BCC Defect Analysis (BDA). It uses existing structure analysis algorithms and combines their results to uniquely distinguish between typical defects in bcc metals. In essence, the BDA method offers the following features:•Identification of typical defect structures in bcc metals.•Reduction of erroneously identified defects by iterative comparison to the defects in the atom's neighborhood.•Availability as ready-to-use Python script for the widespread visualization tool OVITO [http://ovito.org].",2016-03-31 +23840562,Predicting Drug-Target Interactions for New Drug Compounds Using a Weighted Nearest Neighbor Profile.,"In silico discovery of interactions between drug compounds and target proteins is of core importance for improving the efficiency of the laborious and costly experimental determination of drug-target interaction. Drug-target interaction data are available for many classes of pharmaceutically useful target proteins including enzymes, ion channels, GPCRs and nuclear receptors. However, current drug-target interaction databases contain a small number of drug-target pairs which are experimentally validated interactions. In particular, for some drug compounds (or targets) there is no available interaction. This motivates the need for developing methods that predict interacting pairs with high accuracy also for these 'new' drug compounds (or targets). We show that a simple weighted nearest neighbor procedure is highly effective for this task. We integrate this procedure into a recent machine learning method for drug-target interaction we developed in previous work. Results of experiments indicate that the resulting method predicts true interactions with high accuracy also for new drug compounds and achieves results comparable or better than those of recent state-of-the-art algorithms. Software is publicly available at http://cs.ru.nl/~tvanlaarhoven/drugtarget2013/.",2013-06-26 +26508762,"ancGWAS: a post genome-wide association study method for interaction, pathway and ancestry analysis in homogeneous and admixed populations.","

Motivation

Despite numerous successful Genome-wide Association Studies (GWAS), detecting variants that have low disease risk still poses a challenge. GWAS may miss disease genes with weak genetic effects or strong epistatic effects due to the single-marker testing approach commonly used. GWAS may thus generate false negative or inconclusive results, suggesting the need for novel methods to combine effects of single nucleotide polymorphisms within a gene to increase the likelihood of fully characterizing the susceptibility gene.

Results

We developed ancGWAS, an algebraic graph-based centrality measure that accounts for linkage disequilibrium in identifying significant disease sub-networks by integrating the association signal from GWAS data sets into the human protein-protein interaction (PPI) network. We validated ancGWAS using an association study result from a breast cancer data set and the simulation of interactive disease loci in the simulation of a complex admixed population, as well as pathway-based GWAS simulation. This new approach holds promise for deconvoluting the interactions between genes underlying the pathogenesis of complex diseases. Results obtained yield a novel central breast cancer sub-network of the human interactome implicated in the proteoglycan syndecan-mediated signaling events pathway which is known to play a major role in mesenchymal tumor cell proliferation, thus providing further insights into breast cancer pathogenesis.

Availability and implementation

The ancGWAS package and documents are available at http://www.cbio.uct.ac.za/~emile/software.html.",2015-10-27 +25953799,IonGAP: integrative bacterial genome analysis for Ion Torrent sequence data.,"

Unlabelled

We introduce IonGAP, a publicly available Web platform designed for the analysis of whole bacterial genomes using Ion Torrent sequence data. Besides assembly, it integrates a variety of comparative genomics, annotation and bacterial classification routines, based on the widely used FASTQ, BAM and SRA file formats. Benchmarking with different datasets evidenced that IonGAP is a fast, powerful and simple-to-use bioinformatics tool. By releasing this platform, we aim to translate low-cost bacterial genome analysis for microbiological prevention and control in healthcare, agroalimentary and pharmaceutical industry applications.

Availability and implementation

IonGAP is hosted by the ITER's Teide-HPC supercomputer and is freely available on the Web for non-commercial use at http://iongap.hpc.iter.es.

Contact

mcolesan@ull.edu.es or cflores@ull.edu.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-06 +27334471,CAGEd-oPOSSUM: motif enrichment analysis from CAGE-derived TSSs.,"

Unlabelled

With the emergence of large-scale Cap Analysis of Gene Expression (CAGE) datasets from individual labs and the FANTOM consortium, one can now analyze the cis-regulatory regions associated with gene transcription at an unprecedented level of refinement. By coupling transcription factor binding site (TFBS) enrichment analysis with CAGE-derived genomic regions, CAGEd-oPOSSUM can identify TFs that act as key regulators of genes involved in specific mammalian cell and tissue types. The webtool allows for the analysis of CAGE-derived transcription start sites (TSSs) either provided by the user or selected from ∼1300 mammalian samples from the FANTOM5 project with pre-computed TFBS predicted with JASPAR TF binding profiles. The tool helps power insights into the regulation of genes through the study of the specific usage of TSSs within specific cell types and/or under specific conditions.

Availability and implementation

The CAGEd-oPOSUM web tool is implemented in Perl, MySQL and Apache and is available at http://cagedop.cmmt.ubc.ca/CAGEd_oPOSSUM CONTACTS: anthony.mathelier@ncmm.uio.no or wyeth@cmmt.ubc.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-09 +26072515,Inferring orthologous gene regulatory networks using interspecies data fusion.,"

Motivation

The ability to jointly learn gene regulatory networks (GRNs) in, or leverage GRNs between related species would allow the vast amount of legacy data obtained in model organisms to inform the GRNs of more complex, or economically or medically relevant counterparts. Examples include transferring information from Arabidopsis thaliana into related crop species for food security purposes, or from mice into humans for medical applications. Here we develop two related Bayesian approaches to network inference that allow GRNs to be jointly inferred in, or leveraged between, several related species: in one framework, network information is directly propagated between species; in the second hierarchical approach, network information is propagated via an unobserved 'hypernetwork'. In both frameworks, information about network similarity is captured via graph kernels, with the networks additionally informed by species-specific time series gene expression data, when available, using Gaussian processes to model the dynamics of gene expression.

Results

Results on in silico benchmarks demonstrate that joint inference, and leveraging of known networks between species, offers better accuracy than standalone inference. The direct propagation of network information via the non-hierarchical framework is more appropriate when there are relatively few species, while the hierarchical approach is better suited when there are many species. Both methods are robust to small amounts of mislabelling of orthologues. Finally, the use of Saccharomyces cerevisiae data and networks to inform inference of networks in the budding yeast Schizosaccharomyces pombe predicts a novel role in cell cycle regulation for Gas1 (SPAC19B12.02c), a 1,3-beta-glucanosyltransferase.

Availability and implementation

MATLAB code is available from http://go.warwick.ac.uk/systemsbiology/software/.",2015-06-01 +21751369,Reactome pathway analysis to enrich biological discovery in proteomics data sets.,"Reactome (http://www.reactome.org) is an open-source, expert-authored, peer-reviewed, manually curated database of reactions, pathways and biological processes. We provide an intuitive web-based user interface to pathway knowledge and a suite of data analysis tools. The Pathway Browser is a Systems Biology Graphical Notation-like visualization system that supports manual navigation of pathways by zooming, scrolling and event highlighting, and that exploits PSI Common Query Interface web services to overlay pathways with molecular interaction data from the Reactome Functional Interaction Network and interaction databases such as IntAct, ChEMBL and BioGRID. Pathway and expression analysis tools employ web services to provide ID mapping, pathway assignment and over-representation analysis of user-supplied data sets. By applying Ensembl Compara to curated human proteins and reactions, Reactome generates pathway inferences for 20 other species. The Species Comparison tool provides a summary of results for each of these species as a table showing numbers of orthologous proteins found by pathway from which users can navigate to inferred details for specific proteins and reactions. Reactome's diverse pathway knowledge and suite of data analysis tools provide a platform for data mining, modeling and analysis of large-scale proteomics data sets. This Tutorial is part of the International Proteomics Tutorial Programme (IPTP 8).",2011-09-01 +23734660,Query enhancement through the practical application of ontology: the IEDB and OBI.,"Ontologies categorize entities, express relationships between them, and provide standardized definitions. Thus, they can be used to present and enforce the specific relationships between database components. The Immune Epitope Database (IEDB, http://www.iedb.org) utilizes the Ontology for Biomedical Investigations (OBI) and several additional ontologies to represent immune epitope mapping experiments. Here, we describe our experiences utilizing this representation in order to provide enhanced database search functionality. We applied a simple approach to incorporate the benefits of the information captured in a formal ontology directly into the user web interface, resulting in an improved user experience with minimal changes to the database itself. The integration is easy to maintain, provides standardized terms and definitions, and allows for subsumption queries. In addition to these immediate benefits, our long-term goal is to enable true semantic integration of data and knowledge in the biomedical domain. We describe our progress towards that goal and what we perceive as the main obstacles.",2013-04-15 +26615214,Accurate continuous geographic assignment from low- to high-density SNP data.,"

Motivation

Large-scale genotype datasets can help track the dispersal patterns of epidemiological outbreaks and predict the geographic origins of individuals. Such genetically-based geographic assignments also show a range of possible applications in forensics for profiling both victims and criminals, and in wildlife management, where poaching hotspot areas can be located. They, however, require fast and accurate statistical methods to handle the growing amount of genetic information made available from genotype arrays and next-generation sequencing technologies.

Results

We introduce a novel statistical method for geopositioning individuals of unknown origin from genotypes. Our method is based on a geostatistical model trained with a dataset of georeferenced genotypes. Statistical inference under this model can be implemented within the theoretical framework of Integrated Nested Laplace Approximation, which represents one of the major recent breakthroughs in statistics, as it does not require Monte Carlo simulations. We compare the performance of our method and an alternative method for geospatial inference, SPA in a simulation framework. We highlight the accuracy and limits of continuous spatial assignment methods at various scales by analyzing genotype datasets from a diversity of species, including Florida Scrub-jay birds Aphelocoma coerulescens, Arabidopsis thaliana and humans, representing 41-197,146 SNPs. Our method appears to be best suited for the analysis of medium-sized datasets (a few tens of thousands of loci), such as reduced-representation sequencing data that become increasingly available in ecology.

Availability and implementation

http://www2.imm.dtu.dk/∼gigu/Spasiba/

Contact

gilles.b.guillot@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-28 +24166131,Klebsiella spp. in endoscopy-associated infections: we may only be seeing the tip of the iceberg.,"

Purpose

Two endoscopy-associated nosocomial outbreaks caused by carbapenemase-producing Klebsiella pneumoniae (CPKP) were recently observed in two German hospitals. In this study, we performed a systematic search of the medical literature in order to elucidate the epidemiology of Klebsiella spp. in endoscopy-associated outbreaks.

Methods

Medline, the Outbreak Database ( http://www.outbreak-database.com ) and reference lists of articles extracted from these databases were screened for descriptions of endoscopy-associated nosocomial outbreaks. The data extracted and analysed were: (1) the type of medical department affected; (2) characterisation of pathogen to species and conspicuous resistance patterns (if applicable); (3) type of endoscope and the grade of its contamination; (4) number and the types of infections; (5) actual cause of the outbreak.

Results

A total of seven nosocomial outbreaks were identified, of which six were outbreaks of endoscopic retrograde cholangiopancreatography-related infections and caused by contaminated duodenoscopes. Including our own outbreaks in the analysis, we identified one extended-spectrum beta-lactamase-producing K. pneumoniae strain and six CPKP strains. Insufficient reprocessing after the use of the endoscope was the main reason for subsequent pathogen transmission.

Conclusions

There were only two reports of nosocomial outbreaks due to Klebsiella spp. in the first three decades of endoscopic procedures, but seven additional outbreaks of this kind have been reported within the last 4 years. It is very likely that many of such outbreaks have been missed in the past because this pathogen belongs to the physiological gut flora. However, with the emergence of highly resistant (carbapenemase-producing) strains, strict adherence to infection control guidelines is more important than ever.",2013-10-29 +23071556,MK4MDD: a multi-level knowledge base and analysis platform for major depressive disorder.,"

Background

Major depressive disorder (MDD) is a complex neuropsychiatric syndrome with high heterogeneity. There are different levels of biological components that underlie MDD and interact with each other. To uncover the disease mechanism, large numbers of studies at different levels have been conducted. There is a growing need to integrate data from multiple levels of research into a database to provide a systematic review of current research results. The cross level integration will also help bridge gaps of different research levels for further understanding on MDD. So far, there has been no such effort for MDD.

Descriptions

We offer researchers a Multi-level Knowledge base for MDD (MK4MDD) to study the interesting interplay of components in the pathophysiological cascade of MDD from genetic variations to diagnostic syndrome. MK4MDD contains 2,341 components and 5,206 relationships between components based on reported experimental results obtained by diligent literature reading with manual curation. All components were well classified with careful curation and supplementary annotation. The powerful search and visualization tools make all data in MK4MDD form a cross-linked network to be applied to a broad range of both basic and applied research.

Conclusions

MK4MDD aims to provide researchers with a central knowledge base and analysis platform for MDD etiological and pathophysiological mechanisms research. MK4MDD is freely available at http://mdd.psych.ac.cn.",2012-10-05 +25183748,Cohort Profile: The National Academy of Sciences-National Research Council Twin Registry (NAS-NRC Twin Registry).,"The National Academy of Sciences-National Research Council Twin Registry (NAS-NRC Twin Registry) is a comprehensive registry of White male twin pairs born in the USA between 1917 and 1927, both of the twins having served in the military. The purpose was medical research and ultimately improved clinical care. The cohort was assembled in the early 1960s with identification of approximately 16,000 twin pairs, review of service records, a brief mailed questionnaire assessing zygosity, and a health survey largely comparable to questionnaires used at that time with Scandinavian twin registries. Subsequent large-scale data collection occurred in 1974, 1985 and 1998, repeating the health survey and including information on education, employment history and earnings. Self-reported data have been supplemented with mortality, disability and medical data through record linkage. Potential collaborators should access the study website [http://www.iom.edu/Activities/Veterans/TwinsStudy.aspx] or e-mail the Medical Follow-up Agency at [Twins@nas.edu]. Questionnaire data are being prepared for future archiving with the National Archive of Computerized Data on Aging (NACDA) at the Inter-University Consortium for Political and Social Research (ICPSR), University of Michigan, MI.",2014-09-01 +26617077,"Human gut endogenous proteins as a potential source of angiotensin-I-converting enzyme (ACE-I)-, renin inhibitory and antioxidant peptides.","It is well known that endogenous bioactive proteins and peptides play a substantial role in the body's first line of immunological defence, immune-regulation and normal body functioning. Further, the peptides derived from the luminal digestion of proteins are also important for body function. For example, within the peptide database BIOPEP (http://www.uwm.edu.pl/biochemia/index.php/en/biopep) 12 endogenous antimicrobial and 64 angiotensin-I-converting enzyme (ACE-I) inhibitory peptides derived from human milk and plasma proteins are listed. The antimicrobial peptide database (http://aps.unmc.edu/AP/main.php) lists over 111 human host-defence peptides. Several endogenous proteins are secreted in the gut and are subject to the same gastrointestinal digestion processes as food proteins derived from the diet. The human gut endogenous proteins (GEP) include mucins, serum albumin, digestive enzymes, hormones, and proteins from sloughed off epithelial cells and gut microbiota, and numerous other secreted proteins. To date, much work has been carried out regarding the health altering effects of food-derived bioactive peptides but little attention has been paid to the possibility that GEP may also be a source of bioactive peptides. In this review, we discuss the potential of GEP to constitute a gut cryptome from which bioactive peptides such as ACE-I inhibitory, renin inhibitory and antioxidant peptides may be derived.",2015-11-23 +26272983,Mutadelic: mutation analysis using description logic inferencing capabilities.,"

Motivation

As next generation sequencing gains a foothold in clinical genetics, there is a need for annotation tools to characterize increasing amounts of patient variant data for identifying clinically relevant mutations. While existing informatics tools provide efficient bulk variant annotations, they often generate excess information that may limit their scalability.

Results

We propose an alternative solution based on description logic inferencing to generate workflows that produce only those annotations that will contribute to the interpretation of each variant. Workflows are dynamically generated using a novel abductive reasoning framework called a basic framework for abductive workflow generation (AbFab). Criteria for identifying disease-causing variants in Mendelian blood disorders were identified and implemented as AbFab services. A web application was built allowing users to run workflows generated from the criteria to analyze genomic variants. Significant variants are flagged and explanations provided for why they match or fail to match the criteria.

Availability and implementation

The Mutadelic web application is available for use at http://krauthammerlab.med.yale.edu/mutadelic.

Contact

michael.krauthammer@yale.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-12 +26602694,APD3: the antimicrobial peptide database as a tool for research and education.,"The antimicrobial peptide database (APD, http://aps.unmc.edu/AP/) is an original database initially online in 2003. The APD2 (2009 version) has been regularly updated and further expanded into the APD3. This database currently focuses on natural antimicrobial peptides (AMPs) with defined sequence and activity. It includes a total of 2619 AMPs with 261 bacteriocins from bacteria, 4 AMPs from archaea, 7 from protists, 13 from fungi, 321 from plants and 1972 animal host defense peptides. The APD3 contains 2169 antibacterial, 172 antiviral, 105 anti-HIV, 959 antifungal, 80 antiparasitic and 185 anticancer peptides. Newly annotated are AMPs with antibiofilm, antimalarial, anti-protist, insecticidal, spermicidal, chemotactic, wound healing, antioxidant and protease inhibiting properties. We also describe other searchable annotations, including target pathogens, molecule-binding partners, post-translational modifications and animal models. Amino acid profiles or signatures of natural AMPs are important for peptide classification, prediction and design. Finally, we summarize various database applications in research and education.",2015-11-23 +25352556,The eSNV-detect: a computational system to identify expressed single nucleotide variants from transcriptome sequencing data.,"Rapid development of next generation sequencing technology has enabled the identification of genomic alterations from short sequencing reads. There are a number of software pipelines available for calling single nucleotide variants from genomic DNA but, no comprehensive pipelines to identify, annotate and prioritize expressed SNVs (eSNVs) from non-directional paired-end RNA-Seq data. We have developed the eSNV-Detect, a novel computational system, which utilizes data from multiple aligners to call, even at low read depths, and rank variants from RNA-Seq. Multi-platform comparisons with the eSNV-Detect variant candidates were performed. The method was first applied to RNA-Seq from a lymphoblastoid cell-line, achieving 99.7% precision and 91.0% sensitivity in the expressed SNPs for the matching HumanOmni2.5 BeadChip data. Comparison of RNA-Seq eSNV candidates from 25 ER+ breast tumors from The Cancer Genome Atlas (TCGA) project with whole exome coding data showed 90.6-96.8% precision and 91.6-95.7% sensitivity. Contrasting single-cell mRNA-Seq variants with matching traditional multicellular RNA-Seq data for the MD-MB231 breast cancer cell-line delineated variant heterogeneity among the single-cells. Further, Sanger sequencing validation was performed for an ER+ breast tumor with paired normal adjacent tissue validating 29 out of 31 candidate eSNVs. The source code and user manuals of the eSNV-Detect pipeline for Sun Grid Engine and virtual machine are available at http://bioinformaticstools.mayo.edu/research/esnv-detect/.",2014-10-28 +25406415,PCP-ML: protein characterization package for machine learning.,"

Background

Machine Learning (ML) has a number of demonstrated applications in protein prediction tasks such as protein structure prediction. To speed further development of machine learning based tools and their release to the community, we have developed a package which characterizes several aspects of a protein commonly used for protein prediction tasks with machine learning.

Findings

A number of software libraries and modules exist for handling protein related data. The package we present in this work, PCP-ML, is unique in its small footprint and emphasis on machine learning. Its primary focus is on characterizing various aspects of a protein through sets of numerical data. The generated data can then be used with machine learning tools and/or techniques. PCP-ML is very flexible in how the generated data is formatted and as a result is compatible with a variety of existing machine learning packages. Given its small size, it can be directly packaged and distributed with community developed tools for protein prediction tasks.

Conclusions

Source code and example programs are available under a BSD license at http://mlid.cps.cmich.edu/eickh1jl/tools/PCPML/. The package is implemented in C++ and accessible as a Python module.",2014-11-18 +26921398,"Structural analyses to identify selective inhibitors of glyceraldehyde 3-phosphate dehydrogenase-S, a sperm-specific glycolytic enzyme.","

Study hypothesis

Detailed structural comparisons of sperm-specific glyceraldehyde 3-phosphate dehydrogenase, spermatogenic (GAPDHS) and the somatic glyceraldehyde 3-phosphate dehydrogenase (GAPDH) isozyme should facilitate the identification of selective GAPDHS inhibitors for contraceptive development.

Study finding

This study identified a small-molecule GAPDHS inhibitor with micromolar potency and >10-fold selectivity that exerts the expected inhibitory effects on sperm glycolysis and motility.

What is known already

Glycolytic ATP production is required for sperm motility and male fertility in many mammalian species. Selective inhibition of GAPDHS, one of the glycolytic isozymes with restricted expression during spermatogenesis, is a potential strategy for the development of a non-hormonal contraceptive that directly blocks sperm function.

Study design, samples/materials, methods

Homology modeling and x-ray crystallography were used to identify structural features that are conserved in GAPDHS orthologs in mouse and human sperm, but distinct from the GAPDH orthologs present in somatic tissues. We identified three binding pockets surrounding the substrate and cofactor in these isozymes and conducted a virtual screen to identify small-molecule compounds predicted to bind more tightly to GAPDHS than to GAPDH. Following the production of recombinant human and mouse GAPDHS, candidate compounds were tested in dose-response enzyme assays to identify inhibitors that blocked the activity of GAPDHS more effectively than GAPDH. The effects of a selective inhibitor on the motility of mouse and human sperm were monitored by computer-assisted sperm analysis, and sperm lactate production was measured to assess inhibition of glycolysis in the target cell.

Main results and the role of chance

Our studies produced the first apoenzyme crystal structures for human and mouse GAPDHS and a 1.73 Å crystal structure for NAD(+)-bound human GAPDHS, facilitating the identification of unique structural features of this sperm isozyme. In dose-response assays T0501_7749 inhibited human GAPDHS with an IC50 of 1.2 μM compared with an IC50 of 38.5 μM for the somatic isozyme. This compound caused significant reductions in mouse sperm lactate production (P= 0.017 for 100 μM T0501_7749 versus control) and in the percentage of motile mouse and human sperm (P values from <0.05 to <0.0001, depending on incubation conditions).

Limitations, reasons for caution

The chemical properties of T0501_7749, including limited solubility and nonspecific protein binding, are not optimal for drug development.

Wider implications of the findings

This study provides proof-of-principle evidence that GAPDHS can be selectively inhibited, causing significant reductions in sperm glycolysis and motility. These results highlight the utility of structure-based drug design and support further exploration of GAPDHS, and perhaps other sperm-specific isozymes in the glycolytic pathway, as contraceptive targets.

Large scale data

None. Coordinates and data files for three GAPDHS crystal structures were deposited in the RCSB Protein Data Bank (http://www.rcsb.org).

Study funding and competing interests

This work was supported by grants from the National Institutes of Health (NIH), USA, including U01 HD060481 and cooperative agreement U54 HD35041 as part of the Specialized Cooperative Centers Program in Reproduction and Infertility Research from the Eunice Kennedy Shriver National Institute of Child Health and Human Development, and TW/HD00627 from the NIH Fogarty International Center. Additional support was provided by subproject CIG-05-109 from CICCR, a program of CONRAD, Eastern Virginia Medical School, USA. There are no conflicts of interest.",2016-02-26 +28062795,Stable Isotope Labeling with Amino Acids (SILAC)-Based Proteomics of Primary Human Kidney Cells Reveals a Novel Link between Male Sex Hormones and Impaired Energy Metabolism in Diabetic Kidney Disease.,"Male sex predisposes to many kidney diseases. Considering that androgens exert deleterious effects in a variety of cell types within the kidney, we hypothesized that dihydrotestosterone (DHT) would alter the biology of the renal tubular cell by inducing changes in the proteome. We employed stable isotope labeling with amino acids (SILAC) in an indirect spike-in fashion to accurately quantify the proteome in DHT- and 17β-estradiol (EST)-treated human proximal tubular epithelial cells (PTEC). Of the 5043 quantified proteins, 76 were differentially regulated. Biological processes related to energy metabolism were significantly enriched among DHT-regulated proteins. SILAC ratios of 3 candidates representing glycolysis, N-acetylglucosamine metabolism and fatty acid β-oxidation, namely glucose-6-phosphate isomerase (GPI), glucosamine-6-phosphate-N-acetyltransferase 1 (GNPNAT1), and mitochondrial trifunctional protein subunit alpha (HADHA), were verified in vitro. In vivo, renal GPI and HADHA protein expression was significantly increased in males. Furthermore, male sex was associated with significantly higher GPI, GNPNAT1, and HADHA kidney protein expression in two different murine models of diabetes. Enrichment analysis revealed a link between our DHT-regulated proteins and oxidative stress within the diabetic kidney. This finding was validated in vivo, as we observed increased oxidative stress levels in control and diabetic male kidneys, compared with females. This in depth quantitative proteomics study of human primary PTEC response to sex hormone administration suggests that male sex hormone stimulation results in perturbed energy metabolism in kidney cells, and that this perturbation results in increased oxidative stress in the renal cortex. The proteome-level changes associated with androgens may play a crucial role in the development of structural and functional changes in the diseased kidney. With our findings, we propose a possible link between diabetic and non-diabetic kidney disease progression and male sex hormone levels. Data are available via ProteomeXchange (https://www.ebi.ac.uk/pride/archive/) with identifier PXD003811.",2017-01-04 +23846747,CoDNaS: a database of conformational diversity in the native state of proteins.,"

Motivation

Conformational diversity is a key concept in the understanding of different issues related with protein function such as the study of catalytic processes in enzymes, protein-protein recognition, protein evolution and the origins of new biological functions. Here, we present a database of proteins with different degrees of conformational diversity. Conformational Diversity of Native State (CoDNaS) is a redundant collection of three-dimensional structures for the same protein derived from protein data bank. Structures for the same protein obtained under different crystallographic conditions have been associated with snapshots of protein dynamism and consequently could characterize protein conformers. CoDNaS allows the user to explore global and local structural differences among conformers as a function of different parameters such as presence of ligand, post-translational modifications, changes in oligomeric states and differences in pH and temperature. Additionally, CoDNaS contains information about protein taxonomy and function, disorder level and structural classification offering useful information to explore the underlying mechanism of conformational diversity and its close relationship with protein function. Currently, CoDNaS has 122 122 structures integrating 12 684 entries, with an average of 9.63 conformers per protein.

Availability

The database is freely available at http://www.codnas.com.ar/.",2013-07-11 +23821158,Adjustable versus non-adjustable sutures for strabismus.,"

Background

Strabismus, or squint, can be defined as a deviation from perfect ocular alignment and can be classified in many ways according to its aetiology and presentation. Treatment can be broadly divided into medical and surgical options, with a variety of surgical techniques being available, including the use of adjustable or non-adjustable sutures for the extraocular muscles. There exists an uncertainty as to which of these techniques produces a better surgical outcome, and also an opinion that the adjustable suture technique may be of greater benefit in certain situations.

Objectives

To examine whether adjustable or non-adjustable sutures are associated with a more accurate long-term ocular alignment following strabismus surgery and to identify any specific situations in which it would be of benefit to use a particular method.

Search methods

We searched CENTRAL (which contains the Cochrane Eyes and Vision Group Trials Register) (The Cochrane Library 2012, Issue 12), Ovid MEDLINE, Ovid MEDLINE In-Process and Other Non-Indexed Citations, Ovid MEDLINE Daily, Ovid OLDMEDLINE, (January 1950 to January 2013), EMBASE (January 1980 to January 2013), Latin American and Caribbean Literature on Health Sciences (LILACS) (January 1982 to January 2013), the metaRegister of Controlled Trials (mRCT) (www.controlled-trials.com), ClinicalTrials.gov (http://clinicaltrials.gov) and the WHO International Clinical Trials Registry Platform (ICTRP) (www.who.int/ictrp/search/en). We did not use any date or language restrictions in the electronic searches for trials. We last searched the electronic databases on 17 January 2013. We also contacted experts in the field for further information.

Selection criteria

We planned to include only randomised controlled trials (RCTs) comparing adjustable to non-adjustable sutures for strabismus surgery.

Data collection and analysis

We did not find any studies that met the inclusion criteria for this review.

Main results

We did not find any studies that met the inclusion criteria for this review, therefore none were included for analysis. Results of non-randomised studies that compared these techniques are reported.

Authors' conclusions

No reliable conclusions could be reached regarding which technique (adjustable or non-adjustable sutures) produces a more accurate long-term ocular alignment following strabismus surgery or in which specific situations one technique is of greater benefit than the other. High quality RCTs are needed to obtain clinically valid results and to clarify these issues. Such trials should ideally a) recruit participants with any type of strabismus or specify the subgroup of participants to be studied, for example, thyroid, paralytic, non-paralytic, paediatric; b) randomise all consenting participants to have either adjustable or non-adjustable surgery prospectively; c) have at least six months of follow-up data; and d) include re-operation rates as a primary outcome measure.",2013-07-02 +22286086,Using ensemble methods to deal with imbalanced data in predicting protein-protein interactions.,"In proteins, the number of interacting pairs is usually much smaller than the number of non-interacting ones. So the imbalanced data problem will arise in the field of protein-protein interactions (PPIs) prediction. In this article, we introduce two ensemble methods to solve the imbalanced data problem. These ensemble methods combine the based-cluster under-sampling technique and the fusion classifiers. And then we evaluate the ensemble methods using a dataset from Database of Interacting Proteins (DIP) with 10-fold cross validation. All the prediction models achieve area under the receiver operating characteristic curve (AUC) value about 95%. Our results show that the ensemble classifiers are quite effective in predicting PPIs; we also gain some valuable conclusions on the performance of ensemble methods for PPIs in imbalanced data. The prediction software and all dataset employed in the work can be obtained for free at http://cic.scu.edu.cn/bioinformatics/Ensemble_PPIs/index.html.",2012-01-03 +22938150,"Introducing the Forensic Research/Reference on Genetics knowledge base, FROG-kb.","

Background

Online tools and databases based on multi-allelic short tandem repeat polymorphisms (STRPs) are actively used in forensic teaching, research, and investigations. The Fst value of each CODIS marker tends to be low across the populations of the world and most populations typically have all the common STRP alleles present diminishing the ability of these systems to discriminate ethnicity. Recently, considerable research is being conducted on single nucleotide polymorphisms (SNPs) to be considered for human identification and description. However, online tools and databases that can be used for forensic research and investigation are limited.

Methods

The back end DBMS (Database Management System) for FROG-kb is Oracle version 10. The front end is implemented with specific code using technologies such as Java, Java Servlet, JSP, JQuery, and GoogleCharts.

Results

We present an open access web application, FROG-kb (Forensic Research/Reference on Genetics-knowledge base, http://frog.med.yale.edu), that is useful for teaching and research relevant to forensics and can serve as a tool facilitating forensic practice. The underlying data for FROG-kb are provided by the already extensively used and referenced ALlele FREquency Database, ALFRED (http://alfred.med.yale.edu). In addition to displaying data in an organized manner, computational tools that use the underlying allele frequencies with user-provided data are implemented in FROG-kb. These tools are organized by the different published SNP/marker panels available. This web tool currently has implemented general functions possible for two types of SNP panels, individual identification and ancestry inference, and a prediction function specific to a phenotype informative panel for eye color.

Conclusion

The current online version of FROG-kb already provides new and useful functionality. We expect FROG-kb to grow and expand in capabilities and welcome input from the forensic community in identifying datasets and functionalities that will be most helpful and useful. Thus, the structure and functionality of FROG-kb will be revised in an ongoing process of improvement. This paper describes the state as of early June 2012.",2012-09-01 +26927478,A polymer dataset for accelerated property prediction and design.,"Emerging computation- and data-driven approaches are particularly useful for rationally designing materials with targeted properties. Generally, these approaches rely on identifying structure-property relationships by learning from a dataset of sufficiently large number of relevant materials. The learned information can then be used to predict the properties of materials not already in the dataset, thus accelerating the materials design. Herein, we develop a dataset of 1,073 polymers and related materials and make it available at http://khazana.uconn.edu/. This dataset is uniformly prepared using first-principles calculations with structures obtained either from other sources or by using structure search methods. Because the immediate target of this work is to assist the design of high dielectric constant polymers, it is initially designed to include the optimized structures, atomization energies, band gaps, and dielectric constants. It will be progressively expanded by accumulating new materials and including additional properties calculated for the optimized structures provided.",2016-03-01 +26590254,UET: a database of evolutionarily-predicted functional determinants of protein sequences that cluster as functional sites in protein structures.,"The structure and function of proteins underlie most aspects of biology and their mutational perturbations often cause disease. To identify the molecular determinants of function as well as targets for drugs, it is central to characterize the important residues and how they cluster to form functional sites. The Evolutionary Trace (ET) achieves this by ranking the functional and structural importance of the protein sequence positions. ET uses evolutionary distances to estimate functional distances and correlates genotype variations with those in the fitness phenotype. Thus, ET ranks are worse for sequence positions that vary among evolutionarily closer homologs but better for positions that vary mostly among distant homologs. This approach identifies functional determinants, predicts function, guides the mutational redesign of functional and allosteric specificity, and interprets the action of coding sequence variations in proteins, people and populations. Now, the UET database offers pre-computed ET analyses for the protein structure databank, and on-the-fly analysis of any protein sequence. A web interface retrieves ET rankings of sequence positions and maps results to a structure to identify functionally important regions. This UET database integrates several ways of viewing the results on the protein sequence or structure and can be found at http://mammoth.bcm.tmc.edu/uet/.",2015-11-20 +23842463,A guide to best practices for Gene Ontology (GO) manual annotation.,"The Gene Ontology Consortium (GOC) is a community-based bioinformatics project that classifies gene product function through the use of structured controlled vocabularies. A fundamental application of the Gene Ontology (GO) is in the creation of gene product annotations, evidence-based associations between GO definitions and experimental or sequence-based analysis. Currently, the GOC disseminates 126 million annotations covering >374,000 species including all the kingdoms of life. This number includes two classes of GO annotations: those created manually by experienced biocurators reviewing the literature or by examination of biological data (1.1 million annotations covering 2226 species) and those generated computationally via automated methods. As manual annotations are often used to propagate functional predictions between related proteins within and between genomes, it is critical to provide accurate consistent manual annotations. Toward this goal, we present here the conventions defined by the GOC for the creation of manual annotation. This guide represents the best practices for manual annotation as established by the GOC project over the past 12 years. We hope this guide will encourage research communities to annotate gene products of their interest to enhance the corpus of GO annotations available to all. DATABASE URL: http://www.geneontology.org.",2013-07-09 +23874379,Re-annotation of protein-coding genes in the genome of saccharomyces cerevisiae based on support vector machines.,"The annotation of the well-studied organism, Saccharomyces cerevisiae, has been improving over the past decade while there are unresolved debates over the amount of biologically significant open reading frames (ORFs) in yeast genome. We revisited the total count of protein-coding genes in S. cerevisiae S288c genome using a theoretical approach by combining the Support Vector Machine (SVM) method with six widely used measurements of sequence statistical features. The accuracy of our method is over 99.5% in 10-fold cross-validation. Based on the annotation data in Saccharomyces Genome Database (SGD), we studied the coding capacity of all 1744 ORFs which lack experimental results and suggested that the overall number of chromosomal ORFs encoding proteins in yeast should be 6091 by removing 488 spurious ORFs. The importance of the present work lies in at least two aspects. First, cross-validation and retrospective examination showed the fidelity of our method in recognizing ORFs that likely encode proteins. Second, we have provided a web service that can be accessed at http://cobi.uestc.edu.cn/services/yeast/, which enables the prediction of protein-coding ORFs of the genus Saccharomyces with a high accuracy.",2013-07-10 +28003221,Regional Longitudinal Deformation Improves Prediction of Ventricular Tachyarrhythmias in Patients With Heart Failure With Reduced Ejection Fraction: A MADIT-CRT Substudy (Multicenter Automatic Defibrillator Implantation Trial-Cardiac Resynchronization Therapy). ,"Left ventricular dysfunction is a known predictor of ventricular arrhythmias. We hypothesized that measures of regional longitudinal deformation by speckle-tracking echocardiography predict ventricular tachyarrhythmias and provide incremental prognostic information over clinical and conventional echocardiographic characteristics. We studied 1064 patients enrolled in the MADIT-CRT trial (Multicenter Automatic Defibrillator Implantation Trial-Cardiac Resynchronization Therapy) with speckle-tracking data available. Peak longitudinal strain was obtained for the septal, lateral, anterior, and inferior myocardial walls at baseline. The end point was the first event of ventricular tachycardia (VT) or fibrillation (VF). During the median follow-up of 2.9 years, 254 (24%) patients developed VT/VF. Patients with VT/VF had significantly lower left ventricular ejection fraction (28.3% versus 29.5%; P<0.001) and longitudinal strain in all myocardial walls compared with patients without VT/VF (anterior-strain, -7.7% versus -8.8%; P<0.001; lateral-strain, -7.3% versus -7.9%; P=0.022; inferior-strain, -8.3% versus -9.9%; P<0.001; septal-strain, -9.1% versus -10.0%; P<0.001). After multivariate adjustment, only anterior and inferior longitudinal strain remained independent predictors of VT/VF (anterior: hazard ratio, 1.08 [1.03-1.13]; P=0.001; inferior: hazard ratio, 1.08 [1.04-1.12]; P<0.001; per 1% absolute decrease for both). When including B-type natriuretic peptide in the model, only a decreasing myocardial function in the inferior myocardial wall predicted VT/VF (hazard ratio, 1.05 [1.00-1.11]; P=0.039). Only strain obtained from the inferior myocardial wall provided incremental prognostic information for VT/VF over clinical and echocardiographic parameters (C statistic 0.71 versus 0.69; P=0.005). Assessment of regional longitudinal myocardial deformation in the inferior region provided incremental prognostic information over clinical and echocardiographic risk factors in predicting ventricular tachyarrhythmias. URL: http://www.clinicaltrials.gov. Unique identifier: NCT00180271.",2017-01-01 +26209430,"CRISPR-ERA: a comprehensive design tool for CRISPR-mediated gene editing, repression and activation.","

Unlabelled

The CRISPR/Cas9 system was recently developed as a powerful and flexible technology for targeted genome engineering, including genome editing (altering the genetic sequence) and gene regulation (without altering the genetic sequence). These applications require the design of single guide RNAs (sgRNAs) that are efficient and specific. However, this remains challenging, as it requires the consideration of many criteria. Several sgRNA design tools have been developed for gene editing, but currently there is no tool for the design of sgRNAs for gene regulation. With accumulating experimental data on the use of CRISPR/Cas9 for gene editing and regulation, we implement a comprehensive computational tool based on a set of sgRNA design rules summarized from these published reports. We report a genome-wide sgRNA design tool and provide an online website for predicting sgRNAs that are efficient and specific. We name the tool CRISPR-ERA, for clustered regularly interspaced short palindromic repeat-mediated editing, repression, and activation (ERA).

Availability and implementation

http://CRISPR-ERA.stanford.edu.

Contact

stanley.qi@stanford.edu or xwwang@tsinghua.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-23 +28201982,Transcriptome profile of rat genes in injured spinal cord at different stages by RNA-sequencing.,"

Background

Spinal cord injury (SCI) results in fatal damage and currently has no effective treatment. The pathological mechanisms of SCI remain unclear. In this study, genome-wide transcriptional profiling of spinal cord samples from injured rats at different time points after SCI was performed by RNA-Sequencing (RNA-Seq). The transcriptomes were systematically characterized to identify the critical genes and pathways that are involved in SCI pathology.

Results

RNA-Seq results were obtained from total RNA harvested from the spinal cords of sham control rats and rats in the acute, subacute, and chronic phases of SCI (1 day, 6 days and 28 days after injury, respectively; n = 3 in every group). Compared with the sham-control group, the number of differentially expressed genes was 1797 in the acute phase (1223 upregulated and 574 downregulated), 6590 in the subacute phase (3460 upregulated and 3130 downregulated), and 3499 in the chronic phase (1866 upregulated and 1633 downregulated), with an adjusted P-value <0.05 by DESeq. Gene ontology (GO) enrichment analysis showed that differentially expressed genes were most enriched in immune response, MHC protein complex, antigen processing and presentation, translation-related genes, structural constituent of ribosome, ion gated channel activity, small GTPase mediated signal transduction and cytokine and/or chemokine activity. Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway analysis showed that the most enriched pathways included ribosome, antigen processing and presentation, retrograde endocannabinoid signaling, axon guidance, dopaminergic synapses, glutamatergic synapses, GABAergic synapses, TNF, HIF-1, Toll-like receptor, NF-kappa B, NOD-like receptor, cAMP, calcium, oxytocin, Rap1, B cell receptor and chemokine signaling pathway.

Conclusions

This study has not only characterized changes in global gene expression through various stages of SCI progression in rats, but has also systematically identified the critical genes and signaling pathways in SCI pathology. These results will expand our understanding of the complex molecular mechanisms involved in SCI and provide a foundation for future studies of spinal cord tissue damage and repair. The sequence data from this study have been deposited into Sequence Read Archive ( http://www.ncbi.nlm.nih.gov/sra ; accession number PRJNA318311).",2017-02-15 +25344500,A coalescent-based method for population tree inference with haplotypes.,"

Motivation

Population trees represent past population divergence histories. The inference of population trees can be useful for the study of population evolution. With the size of data increases in large-scale population genetic projects, such as the 1000 Genomes Project, there are new computational challenges for ancestral population inference, including population tree inference. Existing methods for population tree inference are mainly designed for unlinked genetic variants (e.g. single nucleotide polymorphisms or SNPs). There is a potential loss of information by not considering the haplotypes.

Results

In this article, we propose a new population tree inference method (called STELLSH) based on coalescent likelihood. The likelihood is for haplotypes over multiple SNPs within a non-recombining region, not unlinked variants. Unlike many existing ancestral inference methods, STELLSH does not use Monte Carlo approaches when computing the likelihood. For efficient computation, the likelihood model is approximated but still retains much information about population divergence history. STELLSH can find the maximum likelihood population tree based on the approximate likelihood. We show through simulation data and the 1000 Genomes Project data that STELLSH gives reasonably accurate inference results. STELLSH is reasonably efficient for data of current interest and can scale to handle whole-genome data.

Availability and implementation

The population tree inference method STELLSH has been implemented as part of the STELLS program: http://www.engr.uconn.edu/∼ywu/STELLS.html.",2014-10-24 +26527189,GITIRBio: A Semantic and Distributed Service Oriented-Architecture for Bioinformatics Pipeline.,"The need to process large quantities of data generated from genomic sequencing has resulted in a difficult task for life scientists who are not familiar with the use of command-line operations or developments in high performance computing and parallelization. This knowledge gap, along with unfamiliarity with necessary processes, can hinder the execution of data processing tasks. Furthermore, many of the commonly used bioinformatics tools for the scientific community are presented as isolated, unrelated entities that do not provide an integrated, guided, and assisted interaction with the scheduling facilities of computational resources or distribution, processing and mapping with runtime analysis. This paper presents the first approximation of a Web Services platform-based architecture (GITIRBio) that acts as a distributed front-end system for autonomous and assisted processing of parallel bioinformatics pipelines that has been validated using multiple sequences. Additionally, this platform allows integration with semantic repositories of genes for search annotations. GITIRBio is available at: http://c-head.ucaldas.edu.co:8080/gitirbio.",2015-05-20 +26586797,DIANA-miRGen v3.0: accurate characterization of microRNA promoters and their regulators.,"microRNAs (miRNAs) are small non-coding RNAs that actively fine-tune gene expression. The accurate characterization of the mechanisms underlying miRNA transcription regulation will further expand our knowledge regarding their implication in homeostatic and pathobiological networks. Aim of DIANA-miRGen v3.0 (http://www.microrna.gr/mirgen) is to provide for the first time accurate cell-line-specific miRNA gene transcription start sites (TSSs), coupled with genome-wide maps of transcription factor (TF) binding sites in order to unveil the mechanisms of miRNA transcription regulation. To this end, more than 7.3 billion RNA-, ChIP- and DNase-Seq next generation sequencing reads were analyzed/assembled and combined with state-of-the-art miRNA TSS prediction and TF binding site identification algorithms. The new database schema and web interface facilitates user interaction, provides advanced queries and innate connection with other DIANA resources for miRNA target identification and pathway analysis. The database currently supports 276 miRNA TSSs that correspond to 428 precursors and >19M binding sites of 202 TFs on a genome-wide scale in nine cell-lines and six tissues of Homo sapiens and Mus musculus.",2015-11-19 +26507355,Mandibulofacial Dysostosis with Microcephaly: Mutation and Database Update.,"Mandibulofacial dysostosis with microcephaly (MFDM) is a multiple malformation syndrome comprising microcephaly, craniofacial anomalies, hearing loss, dysmorphic features, and, in some cases, esophageal atresia. Haploinsufficiency of a spliceosomal GTPase, U5-116 kDa/EFTUD2, is responsible. Here, we review the molecular basis of MFDM in the 69 individuals described to date, and report mutations in 38 new individuals, bringing the total number of reported individuals to 107 individuals from 94 kindreds. Pathogenic EFTUD2 variants comprise 76 distinct mutations and seven microdeletions. Among point mutations, missense substitutions are infrequent (14 out of 76; 18%) relative to stop-gain (29 out of 76; 38%), and splicing (33 out of 76; 43%) mutations. Where known, mutation origin was de novo in 48 out of 64 individuals (75%), dominantly inherited in 12 out of 64 (19%), and due to proven germline mosaicism in four out of 64 (6%). Highly penetrant clinical features include, microcephaly, first and second arch craniofacial malformations, and hearing loss; esophageal atresia is present in an estimated ∼27%. Microcephaly is virtually universal in childhood, with some adults exhibiting late ""catch-up"" growth and normocephaly at maturity. Occasionally reported anomalies, include vestibular and ossicular malformations, reduced mouth opening, atrophy of cerebral white matter, structural brain malformations, and epibulbar dermoid. All reported EFTUD2 mutations can be found in the EFTUD2 mutation database (http://databases.lovd.nl/shared/genes/EFTUD2).",2015-11-19 +30708887,First Report of Downy Mildew Caused by Plasmopara halstedii on Black-eyed Susan (Rudbeckia fulgida cv. 'Goldsturm') in Maryland.,"The North American perennial black-eyed Susan (Rudbeckia fulgida cv. Goldsturm) is an important nursery crop, prized by gardeners and landscapers for its persistent bloom and ease of cultivation. In September 2013, disease symptoms characteristic of downy mildew were observed from multiple R. fulgida plants at two commercial nurseries in the Maryland counties of Howard and Anne Arundel. Over 100 R. fulgida were affected by this disease in both nurseries, rendering the plants unmarketable and causing a substantial financial loss. Plants exhibited dark necrotic lesions on the adaxial leaf surface, and sporulating masses of white mycelium on the abaxial leaf surface and on the adaxial in extreme infections. Plants were stunted with a reduced number of blooms. Microscopic visualization showed coenocytic mycelium, hyaline sporangiophores (length 261 to 904 μm; = 557 μm; n = 20) that were straight and monopodially branched at right angles with several terminal branchlets. Sporangia were hyaline, ovoid to elliptical with smooth surfaces ( = 31 × 28 μm; n = 50). Based on morphological data, the organism was identified as Plasmopara halstedii (Farl.) Berl. & De Toni in Sacc (2). Voucher specimens were deposited in the U.S. National Fungus Collections (BPI 892792 to 892794). Molecular identification was conducted by extracting genomic DNA from sporangiophores and mycelium tweezed from the surface of three infected plants, with extractions performed using the QIAGEN Plant DNA kit (QIAGEN, Gaithersburg, MD). The large subunit of the nuclear rDNA was amplified by PCR using primers LROR and LR7 (3) and sequenced bidirectionally. BLASTn searches of NCBI GenBank showed that the resultant rDNA sequences (accessions KF927152 to KF927154) shared 99% nucleotide identity with curated P. halstedii sequences, consistent with morphological identification. To confirm pathogenicity, three 3.78-liter (1 gallon) containerized R. fulgida cv. Goldsturm plants were inoculated with a sporangial suspension of 2.4 × 104 sporangia/ml and sprayed until both the upper and lower surface of the leaves were completely covered. One negative control plant was sprayed with deionized water. Plants were placed in clear plastic bags in a growth chamber (20°C, 12-h photoperiod). Disease symptoms were observed 3 days post inoculation on all plants. The control plant was symptomless. Morphological features of the pathogen on the surface of inoculated plants were identical to those observed from the original infected plants. Although P. halstedii on R. fulgida cv. Goldsturm has been previously reported in Virginia in 2006 and Florida in 2004, to our knowledge, this is the first report on R. fulgida cv. Goldsturm in Maryland (1). Black-eyed Susans are widely distributed throughout Maryland's landscape and are a staple plant for gardeners, nurserymen and landscape professionals. Given the destructive nature of this disease, downy mildew has the potential to cause considerable economic losses to the state's ornamental crop industry. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Syst. Mycol. Microbiol. Lab., ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , November 18, 2013. (2) P. A. Saccardo. Syllogue Fungorum 7:242, 1888. (3) R. Vilgalys and M. Hester. J. Bacteriol. 172:4238, 1990.",2014-07-01 +26595909,Factorized Graph Matching.,"Graph matching (GM) is a fundamental problem in computer science, and it plays a central role to solve correspondence problems in computer vision. GM problems that incorporate pairwise constraints can be formulated as a quadratic assignment problem (QAP). Although widely used, solving the correspondence problem through GM has two main limitations: (1) the QAP is NP-hard and difficult to approximate; (2) GM algorithms do not incorporate geometric constraints between nodes that are natural in computer vision problems. To address aforementioned problems, this paper proposes factorized graph matching (FGM). FGM factorizes the large pairwise affinity matrix into smaller matrices that encode the local structure of each graph and the pairwise affinity between edges. Four are the benefits that follow from this factorization: (1) There is no need to compute the costly (in space and time) pairwise affinity matrix; (2) The factorization allows the use of a path-following optimization algorithm, that leads to improved optimization strategies and matching performance; (3) Given the factorization, it becomes straight-forward to incorporate geometric transformations (rigid and non-rigid) to the GM problem. (4) Using a matrix formulation for the GM problem and the factorization, it is easy to reveal commonalities and differences between different GM methods. The factorization also provides a clean connection with other matching algorithms such as iterative closest point; Experimental results on synthetic and real databases illustrate how FGM outperforms state-of-the-art algorithms for GM. The code is available at http://humansensing.cs.cmu.edu/fgm.",2015-11-19 +26586804,: a database of ciliate genome rearrangements.,"Ciliated protists exhibit nuclear dimorphism through the presence of somatic macronuclei (MAC) and germline micronuclei (MIC). In some ciliates, DNA from precursor segments in the MIC genome rearranges to form transcriptionally active genes in the mature MAC genome, making these ciliates model organisms to study the process of somatic genome rearrangement. Similar broad scale, somatic rearrangement events occur in many eukaryotic cells and tumors. The (http://oxytricha.princeton.edu/mds_ies_db) is a database of genome recombination and rearrangement annotations, and it provides tools for visualization and comparative analysis of precursor and product genomes. The database currently contains annotations for two completely sequenced ciliate genomes: Oxytricha trifallax and Tetrahymena thermophila.",2015-11-19 +30708643,First Report of Nigrospora Leaf Blight on Sesame Caused by Nigrospora sphaerica in China.,"Sesame (Sesamum indicum L.) is an important oilseed crop widely grown in the central regions of China. A new leaf blight has increasingly been observed in sesame fields in Anhui, Hubei, and Henan provinces since 2010. Approximately 30 to 40% of the plants were symptomatic in the affected fields. Initial symptoms were yellow to brown, irregularly shaped lesions. Lesions later expanded and the affected leaves tuned grayish to dark brown and wilted, with a layer of whitish mycelial growth on the underside. Severe blighting caused the center of lesions to fall out, leaving holes in the leaves. Sections of symptomatic leaf tissues were surface-sterilized in 75% ethanol for 30 s, then in 1% HgCl2 for 30 s, rinsed three times in sterile distilled water, and plated onto potato dextrose agar (PDA). The resulting fungal colonies were initially white, and then became grayish-brown with sporulation. Conidia were single-celled, black, smooth, spherical, 14.2 to 19.8 μm (average 17.1 μm) in diameter, and borne on a hyaline vesicle at the tip of each conidiophore. Morphological characteristics of the isolates were similar to those of Nigrospora sphaerica (1). To verify the identification based on morphological features, the ITS1-5.8S-ITS2 region of the ribosomal RNA was amplified using ITS1 (5'-TCCGTAGGTGAACCTGCGG-3') and ITS4 (5'-TCCTCCGCTTATTGATATGC-3') primers (3), and then sequenced and compared to the GenBank database through a BLAST search. Comparison of the sequence revealed 100% similarity to N. sphaerica (GenBank Accession No. JF817271.1). On the basis of morphological data and the ITS rDNA sequence, the isolate was determined to be N. sphaerica. Pathogenicity tests were conducted using fresh and healthy sesame leaves of 10 plants. A conidial suspension (106 conidia/ml) collected from a 7-day-old culture on PDA was used for inoculation. Leaves of 10 plants were spray-inoculated with the spore suspension at the 6-week-old growth stage, and an additional 10 plants were sprayed with sterile water. Inoculated plants were covered with polyethylene bags to maintain high humidity. Plants were kept at 28°C and observed for symptom every day. Ten to 15 days after inoculation, inoculated leaves developed blight symptoms similar to those observed on naturally infected leaves. No symptoms were observed on the control leaves. N. sphaerica was re-isolated from the inoculated leaves, thus fulfilling Koch's postulates. N. sphaerica has been reported as a leaf pathogen on several hosts worldwide (2). To our knowledge, this is the first report of Nigrospora leaf blight on sesame caused by N. sphaerica in China. References: (1) M. B. Ellis. Dematiaceous Hyphomycetes. CMI, Kew, Surrey, UK, 1971. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ . July 01, 2013. (3) M. A. Innis et al. PCR Protocols: A Guide to Methods and Applications. Academic Press, San Diego, CA, 1990.",2014-06-01 +26581425,Development and evaluation of double locus sequence typing for molecular epidemiological investigations of Clostridium difficile.,"Despite the development of novel typing methods based on whole genome sequencing, most laboratories still rely on classical molecular methods for outbreak investigation or surveillance. Reference methods for Clostridium difficile include ribotyping and pulsed-field gel electrophoresis, which are band-comparing methods often difficult to establish and which require reference strain collections. Here, we present the double locus sequence typing (DLST) scheme as a tool to analyse C. difficile isolates. Using a collection of clinical C. difficile isolates recovered during a 1-year period, we evaluated the performance of DLST and compared the results to multilocus sequence typing (MLST), a sequence-based method that has been used to study the structure of bacterial populations and highlight major clones. DLST had a higher discriminatory power compared to MLST (Simpson's index of diversity of 0.979 versus 0.965) and successfully identified all isolates of the study (100 % typeability). Previous studies showed that the discriminatory power of ribotyping was comparable to that of MLST; thus, DLST might be more discriminatory than ribotyping. DLST is easy to establish and provides several advantages, including absence of DNA extraction [polymerase chain reaction (PCR) is performed on colonies], no specific instrumentation, low cost and unambiguous definition of types. Moreover, the implementation of a DLST typing scheme on an Internet database, such as that previously done for Staphylococcus aureus and Pseudomonas aeruginosa ( http://www.dlst.org ), will allow users to easily obtain the DLST type by submitting directly sequencing files and will avoid problems associated with multiple databases.",2015-11-18 +25376663,MLGO: phylogeny reconstruction and ancestral inference from gene-order data.,"

Background

The rapid accumulation of whole-genome data has renewed interest in the study of using gene-order data for phylogenetic analyses and ancestral reconstruction. Current software and web servers typically do not support duplication and loss events along with rearrangements.

Results

MLGO (Maximum Likelihood for Gene-Order Analysis) is a web tool for the reconstruction of phylogeny and/or ancestral genomes from gene-order data. MLGO is based on likelihood computation and shows advantages over existing methods in terms of accuracy, scalability and flexibility.

Conclusions

To the best of our knowledge, it is the first web tool for analysis of large-scale genomic changes including not only rearrangements but also gene insertions, deletions and duplications. The web tool is available from http://www.geneorder.org/server.php .",2014-11-08 +25496126,Kiwi: a tool for integration and visualization of network topology and gene-set analysis.,"

Background

The analysis of high-throughput data in biology is aided by integrative approaches such as gene-set analysis. Gene-sets can represent well-defined biological entities (e.g. metabolites) that interact in networks (e.g. metabolic networks), to exert their function within the cell. Data interpretation can benefit from incorporating the underlying network, but there are currently no optimal methods that link gene-set analysis and network structures.

Results

Here we present Kiwi, a new tool that processes output data from gene-set analysis and integrates them with a network structure such that the inherent connectivity between gene-sets, i.e. not simply the gene overlap, becomes apparent. In two case studies, we demonstrate that standard gene-set analysis points at metabolites regulated in the interrogated condition. Nevertheless, only the integration of the interactions between these metabolites provides an extra layer of information that highlights how they are tightly connected in the metabolic network.

Conclusions

Kiwi is a tool that enhances interpretability of high-throughput data. It allows the users not only to discover a list of significant entities or processes as in gene-set analysis, but also to visualize whether these entities or processes are isolated or connected by means of their biological interaction. Kiwi is available as a Python package at http://www.sysbio.se/kiwi and an online tool in the BioMet Toolbox at http://www.biomet-toolbox.org.",2014-12-11 +25928663,Elviz - exploration of metagenome assemblies with an interactive visualization tool.,"

Background

Metagenomics, the sequencing of DNA collected from an entire microbial community, enables the study of natural microbial consortia in their native habitats. Metagenomics studies produce huge volumes of data, including both the sequences themselves and metadata describing their abundance, assembly, predicted functional characteristics and environmental parameters. The ability to explore these data visually is critically important to meaningful biological interpretation. Current genomics applications cannot effectively integrate sequence data, assembly metadata, and annotation to support both genome and community-level inquiry.

Results

Elviz (Environmental Laboratory Visualization) is an interactive web-based tool for the visual exploration of assembled metagenomes and their complex metadata. Elviz allows scientists to navigate metagenome assemblies across multiple dimensions and scales, plotting parameters such as GC content, relative abundance, phylogenetic affiliation and assembled contig length. Furthermore Elviz enables interactive exploration using real-time plot navigation, search, filters, axis selection, and the ability to drill from a whole-community profile down to individual gene annotations. Thus scientists engage in a rapid feedback loop of visual pattern identification, hypothesis generation, and hypothesis testing.

Conclusions

Compared to the current alternative of generating a succession of static figures, Elviz can greatly accelerate the speed of metagenome analysis. Elviz can be used to explore both user-submitted datasets and numerous metagenome studies publicly available at the Joint Genome Institute (JGI). Elviz is freely available at http://genome.jgi.doe.gov/viz and runs on most current web-browsers.",2015-04-28 +26516035,Prostanoids in patients with peripheral arterial disease: A meta-analysis of placebo-controlled randomized clinical trials.,"

Aims

Prostanoids are indicated in the treatment of peripheral arterial disease (PAD). Available trials suggest that these compounds could reduce the symptoms of intermittent claudication, even though the quality of studies is poor. The present meta-analysis is aimed at verifying the effects of prostanoids on amputation rate and ulcer healing in patients with lower limb PAD.

Materials and methods

The review protocol was published on http://www.crd.york.ac.uk/prospero (CRD42015020258). A comprehensive search for published and unpublished trials comparing iloprost, alprostadil, prostaglandin-E1, epoprostenol, or taprostene with placebo/no therapy on amputation rate in patients with PAD and ulcer healing rate in patients with concomitant foot ulcers. Mantel-Haenzel odds ratio (MH-OR) was calculated with random effect models for the chosen endpoints.

Results

A total of 18 trials, enrolling 3,077 and 2,763 patients in the prostanoid and comparator groups, respectively were included in the analysis. Only 11 and 10 of those trials reported data on total and major amputations, respectively. Prostanoids were associated with a significantly lower risk of major (MH-OR [95% confidence interval] was 0.77 [0.63; 0.93], p=0.007), but not total, amputations. Healing rate (available only in 7 trials) was not significantly augmented by prostanoid treatment.

Conclusions

Available data are not sufficient to support an extensive use of prostanoids in patients with critical limb ischemia, as an adjunct to revascularization or as an alternative to major amputation in cases which cannot undergo revascularization.",2015-09-12 +26315910,Identification of hierarchical chromatin domains.,"MOTIVATION:The three-dimensional structure of the genome is an important regulator of many cellular processes including differentiation and gene regulation. Recently, technologies such as Hi-C that combine proximity ligation with high-throughput sequencing have revealed domains of self-interacting chromatin, called topologically associating domains (TADs), in many organisms. Current methods for identifying TADs using Hi-C data assume that TADs are non-overlapping, despite evidence for a nested structure in which TADs and sub-TADs form a complex hierarchy. RESULTS:We introduce a model for decomposition of contact frequencies into a hierarchy of nested TADs. This model is based on empirical distributions of contact frequencies within TADs, where positions that are far apart have a greater enrichment of contacts than positions that are close together. We find that the increase in contact enrichment with distance is stronger for the inner TAD than for the outer TAD in a TAD/sub-TAD pair. Using this model, we develop the TADtree algorithm for detecting hierarchies of nested TADs. TADtree compares favorably with previous methods, finding TADs with a greater enrichment of chromatin marks such as CTCF at their boundaries. AVAILABILITY AND IMPLEMENTATION:A python implementation of TADtree is available at http://compbio.cs.brown.edu/software/ CONTACT:braphael@cs.brown.edu SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",2015-08-26 +26578597,HGTree: database of horizontally transferred genes determined by tree reconciliation.,"The HGTree database provides putative genome-wide horizontal gene transfer (HGT) information for 2472 completely sequenced prokaryotic genomes. This task is accomplished by reconstructing approximate maximum likelihood phylogenetic trees for each orthologous gene and corresponding 16S rRNA reference species sets and then reconciling the two trees under parsimony framework. The tree reconciliation method is generally considered to be a reliable way to detect HGT events but its practical use has remained limited because the method is computationally intensive and conceptually challenging. In this regard, HGTree (http://hgtree.snu.ac.kr) represents a useful addition to the biological community and enables quick and easy retrieval of information for HGT-acquired genes to better understand microbial taxonomy and evolution. The database is freely available and can be easily scaled and updated to keep pace with the rapid rise in genomic information.",2015-11-17 +26578586,GREENC: a Wiki-based database of plant lncRNAs.,"Long non-coding RNAs (lncRNAs) are functional non-translated molecules greater than 200 nt. Their roles are diverse and they are usually involved in transcriptional regulation. LncRNAs still remain largely uninvestigated in plants with few exceptions. Experimentally validated plant lncRNAs have been shown to regulate important agronomic traits such as phosphate starvation response, flowering time and interaction with symbiotic organisms, making them of great interest in plant biology and in breeding. There is still a lack of lncRNAs in most sequenced plant species, and in those where they have been annotated, different methods have been used, so making the lncRNAs less useful in comparisons within and between species. We developed a pipeline to annotate lncRNAs and applied it to 37 plant species and six algae, resulting in the annotation of more than 120 000 lncRNAs. To facilitate the study of lncRNAs for the plant research community, the information gathered is organised in the Green Non-Coding Database (GreeNC, http://greenc.sciencedesigners.com/).",2015-11-17 +26578585,Gene3D: expanding the utility of domain assignments.,"Gene3D http://gene3d.biochem.ucl.ac.uk is a database of domain annotations of Ensembl and UniProtKB protein sequences. Domains are predicted using a library of profile HMMs representing 2737 CATH superfamilies. Gene3D has previously featured in the Database issue of NAR and here we report updates to the website and database. The current Gene3D (v14) release has expanded its domain assignments to ∼ 20,000 cellular genomes and over 43 million unique protein sequences, more than doubling the number of protein sequences since our last publication. Amongst other updates, we have improved our Functional Family annotation method. We have also improved the quality and coverage of our 3D homology modelling pipeline of predicted CATH domains. Additionally, the structural models have been expanded to include an extra model organism (Drosophila melanogaster). We also document a number of additional visualization tools in the Gene3D website.",2015-11-17 +25638391,Sensitive and highly resolved identification of RNA-protein interaction sites in PAR-CLIP data.,"

Background

PAR-CLIP is a recently developed Next Generation Sequencing-based method enabling transcriptome-wide identification of interaction sites between RNA and RNA-binding proteins. The PAR-CLIP procedure induces specific base transitions that originate from sites of RNA-protein interactions and can therefore guide the identification of binding sites. However, additional sources of transitions, such as cell type-specific SNPs and sequencing errors, challenge the inference of binding sites and suitable statistical approaches are crucial to control false discovery rates. In addition, a highly resolved delineation of binding sites followed by an extensive downstream analysis is necessary for a comprehensive characterization of the protein binding preferences and the subsequent design of validation experiments.

Results

We present a statistical and computational framework for PAR-CLIP data analysis. We developed a sensitive transition-centered algorithm specifically designed to resolve protein binding sites at high resolution in PAR-CLIP data. Our method employes a Bayesian network approach to associate posterior log-odds with the observed transitions, providing an overall quantification of the confidence in RNA-protein interaction. We use published PAR-CLIP data to demonstrate the advantages of our approach, which compares favorably with alternative algorithms. Lastly, by integrating RNA-Seq data we compute conservative experimentally-based false discovery rates of our method and demonstrate the high precision of our strategy.

Conclusions

Our method is implemented in the R package wavClusteR 2.0. The package is distributed under the GPL-2 license and is available from BioConductor at http://www.bioconductor.org/packages/devel/bioc/html/wavClusteR.html .",2015-02-01 +26476779,GERV: a statistical method for generative evaluation of regulatory variants for transcription factor binding.,"

Motivation

The majority of disease-associated variants identified in genome-wide association studies reside in noncoding regions of the genome with regulatory roles. Thus being able to interpret the functional consequence of a variant is essential for identifying causal variants in the analysis of genome-wide association studies.

Results

We present GERV (generative evaluation of regulatory variants), a novel computational method for predicting regulatory variants that affect transcription factor binding. GERV learns a k-mer-based generative model of transcription factor binding from ChIP-seq and DNase-seq data, and scores variants by computing the change of predicted ChIP-seq reads between the reference and alternate allele. The k-mers learned by GERV capture more sequence determinants of transcription factor binding than a motif-based approach alone, including both a transcription factor's canonical motif and associated co-factor motifs. We show that GERV outperforms existing methods in predicting single-nucleotide polymorphisms associated with allele-specific binding. GERV correctly predicts a validated causal variant among linked single-nucleotide polymorphisms and prioritizes the variants previously reported to modulate the binding of FOXA1 in breast cancer cell lines. Thus, GERV provides a powerful approach for functionally annotating and prioritizing causal variants for experimental follow-up analysis.

Availability and implementation

The implementation of GERV and related data are available at http://gerv.csail.mit.edu/.",2015-10-17 +22659240,CancerProView: a graphical image database of cancer-related genes and proteins.,"We have developed a graphical image database CancerProView (URL: http://cancerproview.dmb.med.keio.ac.jp/php/cpv.html) to assist the search for alterations of the motifs/domains in the cancer-related proteins that are caused by mutations in the corresponding genes. For the CancerProView, we have collected various kinds of data on 180 cancer-related proteins in terms of the motifs/domains, genomic structures of corresponding genes, and 109 charts of the protein interaction pathways. Moreover, we have collected the relevant data on 1041 reference genes including 197 non-cancer disease-associated genes, and the nucleotide sequences for 2011 full-length cDNA's and the alternatively spliced transcript variants. Thus, the CancerProView database system would provide valuable information to facilitate basic cancer research as well as for designing new molecular diagnosis and drug discovery for cancers. The CancerProView database can be operated via Internet with any Web browser, and the system is freely available to interested users without ID and password.",2012-05-31 +24499703,The pathway ontology - updates and applications.,"

Background

The Pathway Ontology (PW) developed at the Rat Genome Database (RGD), covers all types of biological pathways, including altered and disease pathways and captures the relationships between them within the hierarchical structure of a directed acyclic graph. The ontology allows for the standardized annotation of rat, and of human and mouse genes to pathway terms. It also constitutes a vehicle for easy navigation between gene and ontology report pages, between reports and interactive pathway diagrams, between pathways directly connected within a diagram and between those that are globally related in pathway suites and suite networks. Surveys of the literature and the development of the Pathway and Disease Portals are important sources for the ongoing development of the ontology. User requests and mapping of pathways in other databases to terms in the ontology further contribute to increasing its content. Recently built automated pipelines use the mapped terms to make available the annotations generated by other groups.

Results

The two released pipelines - the Pathway Interaction Database (PID) Annotation Import Pipeline and the Kyoto Encyclopedia of Genes and Genomes (KEGG) Annotation Import Pipeline, make available over 7,400 and 31,000 pathway gene annotations, respectively. Building the PID pipeline lead to the addition of new terms within the signaling node, also augmented by the release of the RGD ""Immune and Inflammatory Disease Portal"" at that time. Building the KEGG pipeline lead to a substantial increase in the number of disease pathway terms, such as those within the 'infectious disease pathway' parent term category. The 'drug pathway' node has also seen increases in the number of terms as well as a restructuring of the node. Literature surveys, disease portal deployments and user requests have contributed and continue to contribute additional new terms across the ontology. Since first presented, the content of PW has increased by over 75%.

Conclusions

Ongoing development of the Pathway Ontology and the implementation of pipelines promote an enriched provision of pathway data. The ontology is freely available for download and use from the RGD ftp site at ftp://rgd.mcw.edu/pub/ontology/pathway/ or from the National Center for Biomedical Ontology (NCBO) BioPortal website at http://bioportal.bioontology.org/ontologies/PW.",2014-02-05 +27247972,Etiology of symptomatic urethritis in men and association with sexual behaviors.,"

Introduction

Gonorrhea and chlamydia are sexually transmitted infections (STI) that are the most common causes of urethritis in men. The role of specific sexual behaviors and presentation of urethritis is often overlooked.

Methods

Data was retrospectively reviewed on all men presenting at the major STI clinic in Providence, Rhode Island. Predictors of gonorrhea and chlamydia infection were modeled using a generalized model assuming a binary distribution.

Results

Of the men with urethritis, 27% had chlamydia, 13% gonorrhea, 3% both, and 63% neither (non-gonococcal, non-chlamydial urethritis). MSM were more likely to test positive for gonorrhea than MSW (25% of MSM versus 6% of MSW; p<0.01).

Conclusions

MSM with urethritis were much more likely to test positive for gonorrhea which may be due to increased risk behaviors and spread within concentrated sexual networks. A large number of both MSM and MSW had non-gonococcal, non-chlamydial urethritis, which suggests the need for improved diagnostic testing. [Full article available at http://rimed.org/rimedicaljournal-2016-06.asp, free with no login].",2016-06-01 +26304587,CADBURE: A generic tool to evaluate the performance of spliced aligners on RNA-Seq data.,"The fundamental task in RNA-Seq-based transcriptome analysis is alignment of millions of short reads to the reference genome or transcriptome. Choosing the right tool for the dataset in hand from many existent RNA-Seq alignment packages remains a critical challenge for downstream analysis. To facilitate this choice, we designed a novel tool for comparing alignment results of user data based on the relative reliability of uniquely aligned reads (CADBURE). CADBURE can easily evaluate different aligners, or different parameter sets using the same aligner, and selects the best alignment result for any RNA-Seq dataset. Strengths of CADBURE include the ability to compare alignment results without the need for synthetic data such as simulated genomes, alignment regeneration and randomly subsampled datasets. The benefit of a CADBURE selected alignment result was supported by differentially expressed gene (DEG) analysis. We demonstrated that the use of CADBURE to select the best alignment from a number of different alignment results could change the number of DEGs by as much as 10%. In particular, the CADBURE selected alignment result favors fewer false positives in the DEG analysis. We also verified differential expression of eighteen genes with RT-qPCR validation experiments. CADBURE is an open source tool (http://cadbure.sourceforge.net/).",2015-08-25 +26727238,"Variation in Adult Day Services Center Participant Characteristics, by Center Ownership: United States, 2014.","More than one-quarter million participants were enrolled in adult day services centers in the United States on the day of data collection in 2014. The number of for-profit adult day services centers has grown in recent years. In 2012, 40% of adult day services centers were for-profit, serving more than one-half of all participants. This report presents the most current national estimates of selected characteristics of participants in adult day services centers and compares these characteristics by center ownership type. State-level estimates for the characteristics presented in this report are available online at http://www.cdc.gov/nchs/nsltcp/nsltcp_products.htm.",2015-12-01 +27324196,A simple model predicts UGT-mediated metabolism.,"

Motivation

Uridine diphosphate glucunosyltransferases (UGTs) metabolize 15% of FDA approved drugs. Lead optimization efforts benefit from knowing how candidate drugs are metabolized by UGTs. This paper describes a computational method for predicting sites of UGT-mediated metabolism on drug-like molecules.

Results

XenoSite correctly predicts test molecule's sites of glucoronidation in the Top-1 or Top-2 predictions at a rate of 86 and 97%, respectively. In addition to predicting common sites of UGT conjugation, like hydroxyl groups, it can also accurately predict the glucoronidation of atypical sites, such as carbons. We also describe a simple heuristic model for predicting UGT-mediated sites of metabolism that performs nearly as well (with, respectively, 80 and 91% Top-1 and Top-2 accuracy), and can identify the most challenging molecules to predict on which to assess more complex models. Compared with prior studies, this model is more generally applicable, more accurate and simpler (not requiring expensive quantum modeling).

Availability and implementation

The UGT metabolism predictor developed in this study is available at http://swami.wustl.edu/xenosite/p/ugt CONTACT: : swamidass@wustl.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-20 +25489863,How accurately can we predict the melting points of drug-like compounds?,"This article contributes a highly accurate model for predicting the melting points (MPs) of medicinal chemistry compounds. The model was developed using the largest published data set, comprising more than 47k compounds. The distributions of MPs in drug-like and drug lead sets showed that >90% of molecules melt within [50,250]°C. The final model calculated an RMSE of less than 33 °C for molecules from this temperature interval, which is the most important for medicinal chemistry users. This performance was achieved using a consensus model that performed calculations to a significantly higher accuracy than the individual models. We found that compounds with reactive and unstable groups were overrepresented among outlying compounds. These compounds could decompose during storage or measurement, thus introducing experimental errors. While filtering the data by removing outliers generally increased the accuracy of individual models, it did not significantly affect the results of the consensus models. Three analyzed distance to models did not allow us to flag molecules, which had MP values fell outside the applicability domain of the model. We believe that this negative result and the public availability of data from this article will encourage future studies to develop better approaches to define the applicability domain of models. The final model, MP data, and identified reactive groups are available online at http://ochem.eu/article/55638.",2014-12-09 +24142393,A systematic review and meta-analysis of clinical variables used in Huntington disease research.,"Treatment effect in Huntington disease (HD) clinical trials has relied on primary outcome measures such as total motor score or functional rating scales. However, these measures have limited sensitivity, particularly in pre- to early stages of the disease. We performed a systematic review of HD clinical studies to identify endpoints that correlate with disease severity. Using standard HD keywords and terms, we identified 749 published studies from 1993 to 2011 based on the availability of demographic, biochemical, and clinical measures. The average and variability of each measure was abstracted and stratified according to pre-far, pre-close, early, mild, moderate, and severe HD stages. A fixed-effect meta-analysis on selected variables was conducted at various disease stages. A total of 1,801 different clinical variables and treatment outcomes were identified. Unified Huntington Disease Rating Scale (UHDRS) Motor, UHDRS Independence, and Trail B showed a trend toward separation between HD stages. Other measures, such as UHDRS Apathy, Verbal Fluency, and Symbol Digit, could only distinguish between pre- and early stages of disease and later stages, whereas other measures showed little correlation with increasing HD stages. Using cross-sectional data from published HD clinical trials, we have identified potential endpoints that could be used to track HD disease progression and treatment effect. Longitudinal studies, such as TRACK-HD, are critical for assessing the value of potential markers of disease progression for use in future HD therapeutic trials. A list of variables, references used in this meta-analysis, and database is available at http://www.cmmt.ubc.ca/research/investigators/leavitt/publications.",2013-10-18 +22096227,"MINT, the molecular interaction database: 2012 update.","The Molecular INTeraction Database (MINT, http://mint.bio.uniroma2.it/mint/) is a public repository for protein-protein interactions (PPI) reported in peer-reviewed journals. The database grows steadily over the years and at September 2011 contains approximately 235,000 binary interactions captured from over 4750 publications. The web interface allows the users to search, visualize and download interactions data. MINT is one of the members of the International Molecular Exchange consortium (IMEx) and adopts the Molecular Interaction Ontology of the Proteomics Standard Initiative (PSI-MI) standards for curation and data exchange. MINT data are freely accessible and downloadable at http://mint.bio.uniroma2.it/mint/download.do. We report here the growth of the database, the major changes in curation policy and a new algorithm to assign a confidence to each interaction.",2011-11-16 +26585406,BlastKOALA and GhostKOALA: KEGG Tools for Functional Characterization of Genome and Metagenome Sequences.,"BlastKOALA and GhostKOALA are automatic annotation servers for genome and metagenome sequences, which perform KO (KEGG Orthology) assignments to characterize individual gene functions and reconstruct KEGG pathways, BRITE hierarchies and KEGG modules to infer high-level functions of the organism or the ecosystem. Both servers are made freely available at the KEGG Web site (http://www.kegg.jp/blastkoala/). In BlastKOALA, the KO assignment is performed by a modified version of the internally used KOALA algorithm after the BLAST search against a non-redundant dataset of pangenome sequences at the species, genus or family level, which is generated from the KEGG GENES database by retaining the KO content of each taxonomic category. In GhostKOALA, which utilizes more rapid GHOSTX for database search and is suitable for metagenome annotation, the pangenome dataset is supplemented with Cd-hit clusters including those for viral genes. The result files may be downloaded and manipulated for further KEGG Mapper analysis, such as comparative pathway analysis using multiple BlastKOALA results.",2015-11-14 +23579300,Prevention and treatment of dry socket.,"

Data sources

Cochrane Oral Health Group Trials Register, Cochrane Central Register of Controlled Trials (CENTRAL), Medline and Embase databases were searched together with reference lists of identified articles. Topic experts and organisations were also contacted.

Study selection

Only randomised controlled trials were considered and there were no restrictions regarding language or date of publication.

Data extraction and synthesis

Data abstraction and risk of bias assessment were conducted in duplicate and Cochrane statistical guidelines were followed. The GRADE tool was used to assess the quality of the body of evidence.

Results

Twenty-one trials with 2570 participants were included. Eighteen trials (2376 participants) related to prevention and three to treatment (194 participants). Six studies were at high risk of bias, 14 of unclear risk and one study at low risk. There was moderate evidence (four trials, 750 participants) that chlorhexidine mouthrinses (0.12% and 0.2% concentrations) both before and after extraction(s) prevented approximately 42% of dry socket(s) with a RR of 0.58 (95% CI 0.43 to 0.78; P < 0.001). The number of patients needed to be treated (0.12% and 0.2%) with chlorhexidine rinse to prevent one patient having dry socket (NNT) was 232 (95% CI 176 to 417), 47 (95% CI 35 to 84) and 8 (95% CI 6 to 14) at prevalences of dry socket of 1%, 5% and 30% respectively. Thee was moderate evidence (two trials, in 133 participants) that placing chlorhexidine gel (0.2%) after extractions prevented approximately 58% of dry socket(s) with a RR of 0.42 (95% CI 0.21 to 0.87; P = 0.02) with NNT of 173 (95% CI 127 to 770), 35 (95% CI 25 to 154) and 6 (95% CI 5 to 26) at prevalences of dry socket of 1%, 5% and 30% respectively. There was insufficient evidence to determine the effects of other intrasocket preventive interventions or interventions to treat dry socket.

Conclusions

There is some evidence that rinsing with chlorhexidine (0.12% and 0.2%) or placing chlorhexidine gel (0.2%) in the sockets of extracted teeth, provides a benefit in preventing dry socket. There was insufficient evidence to determine the effects of the other 10 preventative interventions each evaluated in single studies. There was insufficient evidence to determine the effects of any of the interventions to treat dry socket. The present review found some evidence for the association of minor adverse reactions with use of 0.12%, 0.2% and 2% chlorhexidine mouthrinses, though most studies were not designed to detect the presence of hypersensitivity reactions to mouthwash as part of the study protocol. No adverse events were reported in relation to the use of 0.2% chlorhexidine gel placed directly into a socket (though previous allergy to chlorhexidine was an exclusion criterion in these trials). In view of recent reports in the UK of two cases of serious adverse events associated with irrigation of dry socket with chlorhexidine mouthrinse, it is recommended that all members of the dental team prescribing chlorhexidine products are aware of the potential for both minor and serious adverse side effects.

Review

It is beyond the scope of this review to describe and detail The Cochrane Collaboration. The reader can seek out more information at http://www.cochrane.org/. In brief, a systematic review supported and published by the Cochrane group represents the gold standard to support clinical decision-making.",2013-03-01 +21948792,BμG@Sbase--a microbial gene expression and comparative genomic database.,"The reducing cost of high-throughput functional genomic technologies is creating a deluge of high volume, complex data, placing the burden on bioinformatics resources and tool development. The Bacterial Microarray Group at St George's (BμG@S) has been at the forefront of bacterial microarray design and analysis for over a decade and while serving as a hub of a global network of microbial research groups has developed BμG@Sbase, a microbial gene expression and comparative genomic database. BμG@Sbase (http://bugs.sgul.ac.uk/bugsbase/) is a web-browsable, expertly curated, MIAME-compliant database that stores comprehensive experimental annotation and multiple raw and analysed data formats. Consistent annotation is enabled through a structured set of web forms, which guide the user through the process following a set of best practices and controlled vocabulary. The database currently contains 86 expertly curated publicly available data sets (with a further 124 not yet published) and full annotation information for 59 bacterial microarray designs. The data can be browsed and queried using an explorer-like interface; integrating intuitive tree diagrams to present complex experimental details clearly and concisely. Furthermore the modular design of the database will provide a robust platform for integrating other data types beyond microarrays into a more Systems analysis based future.",2011-09-24 +22737589,Fungal genome resources at NCBI.,"The National Center for Biotechnology Information (NCBI) is well known for the nucleotide sequence archive, GenBank and sequence analysis tool BLAST. However, NCBI integrates many types of biomolecular data from variety of sources and makes it available to the scientific community as interactive web resources as well as organized releases of bulk data. These tools are available to explore and compare fungal genomes. Searching all databases with Fungi [organism] at http://www.ncbi.nlm.nih.gov/ is the quickest way to find resources of interest with fungal entries. Some tools though are resources specific and can be indirectly accessed from a particular database in the Entrez system. These include graphical viewers and comparative analysis tools such as TaxPlot, TaxMap and UniGene DDD (found via UniGene Homepage). Gene and BioProject pages also serve as portals to external data such as community annotation websites, BioGrid and UniProt. There are many different ways of accessing genomic data at NCBI. Depending on the focus and goal of research projects or the level of interest, a user would select a particular route for accessing genomic databases and resources. This review article describes methods of accessing fungal genome data and provides examples that illustrate the use of analysis tools.",2011-09-01 +25300367,Spotlite: web application and augmented algorithms for predicting co-complexed proteins from affinity purification--mass spectrometry data.,"Protein-protein interactions defined by affinity purification and mass spectrometry (APMS) suffer from high false discovery rates. Consequently, lists of potential interactions must be pruned of contaminants before network construction and interpretation, historically an expensive, time-intensive, and error-prone task. In recent years, numerous computational methods were developed to identify genuine interactions from the hundreds of candidates. Here, comparative analysis of three popular algorithms, HGSCore, CompPASS, and SAINT, revealed complementarity in their classification accuracies, which is supported by their divergent scoring strategies. We improved each algorithm by an average area under a receiver operating characteristics curve increase of 16% by integrating a variety of indirect data known to correlate with established protein-protein interactions, including mRNA coexpression, gene ontologies, domain-domain binding affinities, and homologous protein interactions. Each APMS scoring approach was incorporated into a separate logistic regression model along with the indirect features; the resulting three classifiers demonstrate improved performance on five diverse APMS data sets. To facilitate APMS data scoring within the scientific community, we created Spotlite, a user-friendly and fast web application. Within Spotlite, data can be scored with the augmented classifiers, annotated, and visualized ( http://cancer.unc.edu/majorlab/software.php ). The utility of the Spotlite platform to reveal physical, functional, and disease-relevant characteristics within APMS data is established through a focused analysis of the KEAP1 E3 ubiquitin ligase.",2014-10-20 +25078893,Validation and assessment of variant calling pipelines for next-generation sequencing.,"

Background

The processing and analysis of the large scale data generated by next-generation sequencing (NGS) experiments is challenging and is a burgeoning area of new methods development. Several new bioinformatics tools have been developed for calling sequence variants from NGS data. Here, we validate the variant calling of these tools and compare their relative accuracy to determine which data processing pipeline is optimal.

Results

We developed a unified pipeline for processing NGS data that encompasses four modules: mapping, filtering, realignment and recalibration, and variant calling. We processed 130 subjects from an ongoing whole exome sequencing study through this pipeline. To evaluate the accuracy of each module, we conducted a series of comparisons between the single nucleotide variant (SNV) calls from the NGS data and either gold-standard Sanger sequencing on a total of 700 variants or array genotyping data on a total of 9,935 single-nucleotide polymorphisms. A head to head comparison showed that Genome Analysis Toolkit (GATK) provided more accurate calls than SAMtools (positive predictive value of 92.55% vs. 80.35%, respectively). Realignment of mapped reads and recalibration of base quality scores before SNV calling proved to be crucial to accurate variant calling. GATK HaplotypeCaller algorithm for variant calling outperformed the UnifiedGenotype algorithm. We also showed a relationship between mapping quality, read depth and allele balance, and SNV call accuracy. However, if best practices are used in data processing, then additional filtering based on these metrics provides little gains and accuracies of >99% are achievable.

Conclusions

Our findings will help to determine the best approach for processing NGS data to confidently call variants for downstream analyses. To enable others to implement and replicate our results, all of our codes are freely available at http://metamoodics.org/wes.",2014-07-30 +28292858,Predictive Value of Age- and Sex-Specific Nomograms of Global Plaque Burden on Coronary Computed Tomography Angiography for Major Cardiac Events. ,"Age-adjusted coronary artery disease (CAD) burden identified on coronary computed tomography angiography predicts major adverse cardiovascular event (MACE) risk; however, it seldom contributes to clinical decision making because of a lack of nomographic data. We aimed to develop clinically pragmatic age- and sex-specific nomograms of CAD burden using coronary computed tomography angiography and to validate their prognostic use. Patients prospectively enrolled in phase I of the CONFIRM registry (Coronary CT Angiography Evaluation for Clinical Outcomes) were included (derivation cohort: n=21,132; 46% female) to develop CAD nomograms based on age-sex percentiles of segment involvement score (SIS) at each year of life (40-79 years). The relationship between SIS age-sex percentiles (SIS%) and MACE (all-cause death, myocardial infarction, unstable angina, and late revascularization) was tested in a nonoverlapping validation cohort (phase II, CONFIRM registry; n=3030, 44% female) by stratifying patients into 3 SIS% groups (≤50th, 51-75th, and >75th) and comparing annualized MACE rates and time to MACE using multivariable Cox proportional hazards models adjusting for Framingham risk and chest pain typicality. Age-sex percentiles were well fitted to second-order polynomial curves (men: R2=0.86±0.12; women: R2=0.86±0.14). Using the nomograms, there were 1576, 965, and 489 patients, respectively, in the ≤50th, 51-75th, and >75th SIS% groups. Annualized event rates were higher among patients with greater CAD burden (2.1% [95% confidence interval: 1.7%-2.7%], 3.9% [95% confidence interval: 3.0%-5.1%], and 7.2% [95% confidence interval: 5.4%-9.6%] in ≤50th, 51-75th, and >75th SIS% groups, respectively; P<0.001). Adjusted MACE risk was significantly increased among patients in SIS% groups above the median compared with patients below the median (hazard ratio [95% confidence interval]: 1.9 [1.3-2.8] for 51-75th SIS% group and 3.4 [2.3-5.0] for >75th SIS% group; P<0.01 for both). We have developed clinically pragmatic age- and sex-specific nomograms of CAD prevalence using coronary computed tomography angiography findings. Global plaque burden measured using SIS% is predictive of cardiac events independent of traditional risk assessment. URL: https://www.clinicaltrials.gov. Unique identifier: NCT01443637.",2017-03-01 +27811151,Computed Tomographic Perfusion Improves Diagnostic Power of Coronary Computed Tomographic Angiography in Women: Analysis of the CORE320 Trial (Coronary Artery Evaluation Using 320-Row Multidetector Computed Tomography Angiography and Myocardial Perfusion) According to Gender. ,"Coronary computed tomographic angiography (CTA) and myocardial perfusion imaging (CTP) is a validated approach for detection and exclusion of flow-limiting coronary artery disease (CAD), but little data are available on gender-specific performance of these modalities. In this study, we aimed to evaluate the diagnostic accuracy of combined coronary CTA and CTP in detecting flow-limiting CAD in women compared with men. Three hundred and eighty-one patients who underwent both CTA-CTP and single-photon emission computed tomography myocardial perfusion imaging preceding invasive coronary angiography as part of the CORE320 multicenter study (Coronary Artery Evaluation Using 320-row Multidetector Computed Tomography Angiography and Myocardial Perfusion) were included in this ancillary study. All 4 image modalities were analyzed in blinded, independent core laboratories. Prevalence of flow-limiting CAD defined by invasive coronary angiography equal to 50% or greater with an associated single-photon emission computed tomography myocardial perfusion imaging defect was 45% (114/252) and 23% (30/129) in males and females, respectively. Patient-based diagnostic accuracy defined by the area under the receiver operating curve for detecting flow-limiting CAD by CTA alone in females was 0.83 (0.75-0.89) and for CTA-CTP was 0.92 (0.86-0.97; P=0.003) compared with men where the area under the receiver operating curve for detecting flow-limiting CAD by CTA alone was 0.82 (0.77-0.87) and for CTA-CTP was 0.84 (0.80-0.89; P=0.29). The combination of CTA-CTP was performed similarly in men and women for identifying flow-limiting coronary stenosis; however, in women, CTP had incremental value over CTA alone, which was not the case in men. URL: http://www.clinicaltrials.gov. Unique identifier: NCT00934037.",2016-11-01 +22139942,RecountDB: a database of mapped and count corrected transcribed sequences.,"The field of gene expression analysis continues to benefit from next-generation sequencing generated data, which enables transcripts to be measured with unmatched accuracy and resolution. But the high-throughput reads from these technologies also contain many errors, which can compromise the ability to accurately detect and quantify rare transcripts. Fortunately, techniques exist to ameliorate the affects of sequencer error. We present RecountDB, a secondary database derived from primary data in NCBI's short read archive. RecountDB holds sequence counts from RNA-seq and 5' capped transcription start site experiments, corrected and mapped to the relevant genome. Via a searchable and browseable interface users can obtain corrected data in formats useful for transcriptomic analysis. The database is currently populated with 2265 entries from 45 organisms and continuously growing. RecountDB is publicly available at: http://recountdb.cbrc.jp.",2011-12-01 +24026199,"RAB38 confers a poor prognosis, associated with malignant progression and subtype preference in glioma.","RAB38 is a new member of the RAB small G protein family that regulates intracellular vesicle trafficking. RAB38 is expressed in melanocytes and it has been shown that a point mutation in the postulated GTP-binding domain of RAB38 is the gene responsible for human Hermansky-Pudlak syndrome. However, the prognostic and molecular features of tumors with RAB38 expression is still unclear, as well as glioma. Whole genome mRNA expression microarray data on 220 glioma samples from the Chinese glioma genome atlas (CGGA) database (http://www.cgga.org.cn) was applied as discovery set. Each grade of glioma patients was analyzed by the Kaplan-Meier method. To determine the protein expression levels of RAB38, further 82 glioma tissues were stained by immunohistochemistry. Three additional datasets (TCGA, GSE16011 and Rembrandt) were obtained as validation sets. The functional annotation of RAB38 was analyzed by Gene ontology (GO) analysis and Gene set variation analysis (GSVA) in 89 glioblastomas (GBMs). High RAB38 expression was mainly increased in high-grade gliomas, and high RAB38 expression also conferred high mortality of glioma in the CGGA cohort. RAB38 showed a mesenchymal subtype, G3 subtype and isocitrate dehydrogenase 1 (IDH1) wild-type preference. GO and GSVA analysis showed that RAB38 was significantly correlated with migration. These results were validated in other 3 datasets. The expression levels of RAB38 were significantly associated with grade progression as well as prognosis in gliomas. RAB38 is an important prognostic biomarker and potential therapeutic target in gliomas.",2013-09-10 +27308685,"Health, United States, 2015: With Special Feature on Racial and Ethnic Health Disparities","Health, United States, 2015 is the 39th report on the health status of the nation and is submitted by the Secretary of the Department of Health and Human Services to the President and the Congress of the United States in compliance with Section 308 of the Public Health Service Act. This report was compiled by the Centers for Disease Control and Prevention's (CDC) National Center for Health Statistics (NCHS). The Health, United States series presents an annual overview of national trends in health statistics. The report contains a Chartbook that assesses the nation's health by presenting trends and current information on selected measures of morbidity, mortality, health care utilization and access, health risk factors, prevention, health insurance, and personal health care expenditures. This year's Chartbook includes a Special Feature on racial and ethnic health disparities. The report also contains 114 Trend Tables organized around four major subject areas: health status and determinants, health care utilization, health care resources, and health care expenditures. A companion report—Health, United States: In Brief—features information extracted from the full report. The complete report, In Brief, and related data products are available on the Health, United States website at: http://www.cdc.gov/nchs/hus.htm.",2016-06-17 +27145998,KinasePA: Phosphoproteomics data annotation using hypothesis driven kinase perturbation analysis.,"Mass spectrometry (MS)-based quantitative phosphoproteomics has become a key approach for proteome-wide profiling of phosphorylation in tissues and cells. Traditional experimental design often compares a single treatment with a control, whereas increasingly more experiments are designed to compare multiple treatments with respect to a control. To this end, the development of bioinformatic tools that can integrate multiple treatments and visualise kinases and substrates under combinatorial perturbations is vital for dissecting concordant and/or independent effects of each treatment. Here, we propose a hypothesis driven kinase perturbation analysis (KinasePA) to annotate and visualise kinases and their substrates that are perturbed by various combinatorial effects of treatments in phosphoproteomics experiments. We demonstrate the utility of KinasePA through its application to two large-scale phosphoproteomics datasets and show its effectiveness in dissecting kinases and substrates within signalling pathways driven by unique combinations of cellular stimuli and inhibitors. We implemented and incorporated KinasePA as part of the ""directPA"" R package available from the comprehensive R archive network (CRAN). Furthermore, KinasePA also has an interactive web interface that can be readily applied to annotate user provided phosphoproteomics data (http://kinasepa.pengyiyang.org).",2016-05-27 +22564364,"A SNPshot of PubMed to associate genetic variants with drugs, diseases, and adverse reactions.","

Motivation

Genetic factors determine differences in pharmacokinetics, drug efficacy, and drug responses between individuals and sub-populations. Wrong dosages of drugs can lead to severe adverse drug reactions in individuals whose drug metabolism drastically differs from the ""assumed average"". Databases such as PharmGKB are excellent sources of pharmacogenetic information on enzymes, genetic variants, and drug response affected by changes in enzymatic activity. Here, we seek to aid researchers, database curators, and clinicians in their search for relevant information by automatically extracting these data from literature.

Approach

We automatically populate a repository of information on genetic variants, relations to drugs, occurrence in sub-populations, and associations with disease. We mine textual data from PubMed abstracts to discover such genotype-phenotype associations, focusing on SNPs that can be associated with variations in drug response. The overall repository covers relations found between genes, variants, alleles, drugs, diseases, adverse drug reactions, populations, and allele frequencies. We cross-reference these data to EntrezGene, PharmGKB, PubChem, and others.

Results

The performance regarding entity recognition and relation extraction yields a precision of 90-92% for the major entity types (gene, drug, disease), and 76-84% for relations involving these types. Comparison of our repository to PharmGKB reveals a coverage of 93% of gene-drug associations in PharmGKB and 97% of the gene-variant mappings based on 180,000 PubMed abstracts.

Availability

http://bioai4core.fulton.asu.edu/snpshot.",2012-04-30 +22760305,AraPath: a knowledgebase for pathway analysis in Arabidopsis.,"

Unlabelled

Studying plants using high-throughput genomics technologies is becoming routine, but interpretation of genome-wide expression data in terms of biological pathways remains a challenge, partly due to the lack of pathway databases. To create a knowledgebase for plant pathway analysis, we collected 1683 lists of differentially expressed genes from 397 gene-expression studies, which constitute a molecular signature database of various genetic and environmental perturbations of Arabidopsis. In addition, we extracted 1909 gene sets from various sources such as Gene Ontology, KEGG, AraCyc, Plant Ontology, predicted target genes of microRNAs and transcription factors, and computational gene clusters defined by meta-analysis. With this knowledgebase, we applied Gene Set Enrichment Analysis to an expression profile of cold acclimation and identified expected functional categories and pathways. Our results suggest that the AraPath database can be used to generate specific, testable hypotheses regarding plant molecular pathways from gene expression data.

Availability

http://bioinformatics.sdstate.edu/arapath/.",2012-07-03 +22013167,mirEX: a platform for comparative exploration of plant pri-miRNA expression data.,"mirEX is a comprehensive platform for comparative analysis of primary microRNA expression data. RT-qPCR-based gene expression profiles are stored in a universal and expandable database scheme and wrapped by an intuitive user-friendly interface. A new way of accessing gene expression data in mirEX includes a simple mouse operated querying system and dynamic graphs for data mining analyses. In contrast to other publicly available databases, the mirEX interface allows a simultaneous comparison of expression levels between various microRNA genes in diverse organs and developmental stages. Currently, mirEX integrates information about the expression profile of 190 Arabidopsis thaliana pri-miRNAs in seven different developmental stages: seeds, seedlings and various organs of mature plants. Additionally, by providing RNA structural models, publicly available deep sequencing results, experimental procedure details and careful selection of auxiliary data in the form of web links, mirEX can function as a one-stop solution for Arabidopsis microRNA information. A web-based mirEX interface can be accessed at http://bioinfo.amu.edu.pl/mirex.",2011-10-19 +25572709,PSHREG: a SAS macro for proportional and nonproportional subdistribution hazards regression.,"We present a new SAS macro %pshreg that can be used to fit a proportional subdistribution hazards model for survival data subject to competing risks. Our macro first modifies the input data set appropriately and then applies SAS's standard Cox regression procedure, PROC PHREG, using weights and counting-process style of specifying survival times to the modified data set. The modified data set can also be used to estimate cumulative incidence curves for the event of interest. The application of PROC PHREG has several advantages, e.g., it directly enables the user to apply the Firth correction, which has been proposed as a solution to the problem of undefined (infinite) maximum likelihood estimates in Cox regression, frequently encountered in small sample analyses. Deviation from proportional subdistribution hazards can be detected by both inspecting Schoenfeld-type residuals and testing correlation of these residuals with time, or by including interactions of covariates with functions of time. We illustrate application of these extended methods for competing risk regression using our macro, which is freely available at: http://cemsiis.meduniwien.ac.at/en/kb/science-research/software/statistical-software/pshreg, by means of analysis of a real chronic kidney disease study. We discuss differences in features and capabilities of %pshreg and the recent (January 2014) SAS PROC PHREG implementation of proportional subdistribution hazards modelling.",2014-12-03 +26658293,EPIMIC: A Simple Homemade Computer Program for Real-Time EPIdemiological Surveillance and Alert Based on MICrobiological Data.,"

Background and aims

Infectious diseases (IDs) are major causes of morbidity and mortality and their surveillance is critical. In 2002, we implemented a simple and versatile homemade tool, named EPIMIC, for the real-time systematic automated surveillance of IDs at Marseille university hospitals, based on the data from our clinical microbiology laboratory, including clinical samples, tests and diagnoses.

Methods

This tool was specifically designed to detect abnormal events as IDs are rarely predicted and modeled. EPIMIC operates using Microsoft Excel software and requires no particular computer skills or resources. An abnormal event corresponds to an increase above, or a decrease below threshold values calculated based on the mean of historical data plus or minus 2 standard deviations, respectively.

Results

Between November 2002 and October 2013 (11 years), 293 items were surveyed weekly, including 38 clinical samples, 86 pathogens, 79 diagnosis tests, and 39 antibacterial resistance patterns. The mean duration of surveillance was 7.6 years (range, 1 month-10.9 years). A total of 108,427 Microsoft Excel file cells were filled with counts of clinical samples, and 110,017 cells were filled with counts of diagnoses. A total of 1,390,689 samples were analyzed. Among them, 172,180 were found to be positive for a pathogen. EPIMIC generated a mean number of 0.5 alert/week on abnormal events.

Conclusions

EPIMIC proved to be efficient for real-time automated laboratory-based surveillance and alerting at our university hospital clinical microbiology laboratory-scale. It is freely downloadable from the following URL: http://www.mediterranee-infection.com/article.php?larub=157&titre=bulletin-epidemiologique (last accessed: 20/11/2015).",2015-12-14 +26460164,Implementation and assessment of a yeast orphan gene research project: involving undergraduates in authentic research experiences and progressing our understanding of uncharacterized open reading frames.,"Saccharomyces cerevisiae was the first eukaryotic organism to be sequenced; however, little progress has been made in recent years in furthering our understanding of all open reading frames (ORFs). From October 2012 to May 2015 the number of verified ORFs had only risen from 75.31% to 78%, while the number of uncharacterized ORFs had decreased from 12.8% to 11% (representing > 700 genes still left in this category; http://www.yeastgenome.org/genomesnapshot). Course-based research has been shown to increase student learning while providing experience with real scientific investigation; however, implementation in large, multi-section courses presents many challenges. This study sought to test the feasibility and effectiveness of incorporating authentic research into a core genetics course, with multiple instructors, to increase student learning and progress our understanding of uncharacterized ORFs. We generated a module-based annotation toolkit and utilized easily accessible bioinformatics tools to predict gene function for uncharacterized ORFs within the Saccharomyces Genome Database (SGD). Students were each assigned an uncharacterized ORF, which they annotated using contemporary comparative genomics methodologies, including multiple sequence alignment, conserved domain identification, signal peptide prediction and cellular localization algorithms. Student learning outcomes were measured by quizzes, project reports and presentations, as well as a post-project questionnaire. Our results indicate that the authentic research experience had positive impacts on students' perception of their learning and their confidence to conduct future research. Furthermore, we believe that creation of an online repository and adoption and/or adaptation of this project across multiple researchers and institutions could speed the process of gene function prediction.",2015-11-10 +24564858,HMPAS: Human Membrane Protein Analysis System.,"

Background

Membrane proteins perform essential roles in diverse cellular functions and are regarded as major pharmaceutical targets. The significance of membrane proteins has led to the developing dozens of resources related with membrane proteins. However, most of these resources are built for specific well-known membrane protein groups, making it difficult to find common and specific features of various membrane protein groups.

Methods

We collected human membrane proteins from the dispersed resources and predicted novel membrane protein candidates by using ortholog information and our membrane protein classifiers. The membrane proteins were classified according to the type of interaction with the membrane, subcellular localization, and molecular function. We also made new feature dataset to characterize the membrane proteins in various aspects including membrane protein topology, domain, biological process, disease, and drug. Moreover, protein structure and ICD-10-CM based integrated disease and drug information was newly included. To analyze the comprehensive information of membrane proteins, we implemented analysis tools to identify novel sequence and functional features of the classified membrane protein groups and to extract features from protein sequences.

Results

We constructed HMPAS with 28,509 collected known membrane proteins and 8,076 newly predicted candidates. This system provides integrated information of human membrane proteins individually and in groups organized by 45 subcellular locations and 1,401 molecular functions. As a case study, we identified associations between the membrane proteins and diseases and present that membrane proteins are promising targets for diseases related with nervous system and circulatory system. A web-based interface of this system was constructed to facilitate researchers not only to retrieve organized information of individual proteins but also to use the tools to analyze the membrane proteins.

Conclusions

HMPAS provides comprehensive information about human membrane proteins including specific features of certain membrane protein groups. In this system, user can acquire the information of individual proteins and specified groups focused on their conserved sequence features, involved cellular processes, and diseases. HMPAS may contribute as a valuable resource for the inference of novel cellular mechanisms and pharmaceutical targets associated with the human membrane proteins. HMPAS is freely available at http://fcode.kaist.ac.kr/hmpas.",2013-11-07 +26252071,"Associations Between Selected Xenobiotics and Antinuclear Antibodies in the National Health and Nutrition Examination Survey, 1999-2004.","

Background

Potential associations between background environmental chemical exposures and autoimmunity are understudied.

Objectives

Our exploratory study investigated exposure to individual environmental chemicals and selected mixtures in relation to the presence of antinuclear antibodies (ANA), a widely used biomarker of autoimmunity, in a representative sample of the U.S.

Methods

This cross-sectional analysis used data on 4,340 participants from the National Health and Nutrition Examination Survey (1999-2004), of whom 14% were ANA positive, to explore associations between ANA and concentrations of dioxins, dibenzofurans, polychlorinated biphenyls, organochlorines, organophosphates, phenols, metals, and other environmental exposures and metabolites measured in participants' serum, whole blood, or urine. For dioxin-like compounds with toxic equivalency factors, we developed and applied a new statistical approach to study selected mixtures. Lognormal models and censored-data methods produced estimates of chemical associations with ANA in males, nulliparous females, and parous females; these estimates were adjusted for confounders and accommodated concentrations below detectable levels.

Results

Several associations between chemical concentration and ANA positivity were observed, but only the association in males exposed to triclosan remained statistically significant after correcting for multiple comparisons (mean concentration ratio = 2.8; 95% CI: 1.8, 4.5; p < 0.00001).

Conclusions

These data suggest that background levels of most xenobiotic exposures typical in the U.S. population are not strongly associated with ANA. Future studies should ideally reduce exposure misclassification by including prospective measurement of the chemicals of concern and should track changes in ANA and other autoantibodies over time.

Citation

Dinse GE, Jusko TA, Whitt IZ, Co CA, Parks CG, Satoh M, Chan EKL, Rose KM, Walker NJ, Birnbaum LS, Zeldin DC, Weinberg CR, Miller FW. 2016. Associations between selected xenobiotics and antinuclear antibodies in the National Health and Nutrition Examination Survey, 1999-2004. Environ Health Perspect 124:426-436; http://dx.doi.org/10.1289/ehp.1409345.",2015-08-07 +21975939,Using the PhenX Toolkit to Add Standard Measures to a Study.,"The PhenX (consensus measures for Phenotypes and eXposures) Toolkit (https://www.phenxtoolkit.org/) offers high-quality, well-established measures of phenotypes and exposures for use by the scientific community. The Toolkit contains 295 measures drawn from 21 research domains (fields of research). The measures were selected by Working Groups of domain experts using a consensus process that included input from the scientific community. The Toolkit provides a description of each PhenX measure, the rationale for including it in the Toolkit, protocol(s) for collecting the measure, and supporting documentation. Users can browse by measures, domains, or collections, or can search the Toolkit using the Smart Query Tool. Once users have selected some measures, they can download a customized Data Collection Worksheet that specifies what information needs to be collected, and a Data Dictionary that describes each variable included in their Data Collection Worksheet. To help researchers find studies with comparable data, PhenX measures and variables are being mapped to studies in the database of Genotypes and Phenotypes (dbGaP).",2011-10-01 +27448251,The Vigna unguiculata Gene Expression Atlas (VuGEA) from de novo assembly and quantification of RNA-seq data provides insights into seed maturation mechanisms.,"Legume research and cultivar development are important for sustainable food production, especially of high-protein seed. Thanks to the development of deep-sequencing technologies, crop species have been taken to the front line, even without completion of their genome sequences. Black-eyed pea (Vigna unguiculata) is a legume species widely grown in semi-arid regions, which has high potential to provide stable seed protein production in a broad range of environments, including drought conditions. The black-eyed pea reference genotype has been used to generate a gene expression atlas of the major plant tissues (i.e. leaf, root, stem, flower, pod and seed), with a developmental time series for pods and seeds. From these various organs, 27 cDNA libraries were generated and sequenced, resulting in more than one billion reads. Following filtering, these reads were de novo assembled into 36 529 transcript sequences that were annotated and quantified across the different tissues. A set of 24 866 unique transcript sequences, called Unigenes, was identified. All the information related to transcript identification, annotation and quantification were stored into a gene expression atlas webserver (http://vugea.noble.org), providing a user-friendly interface and necessary tools to analyse transcript expression in black-eyed pea organs and to compare data with other legume species. Using this gene expression atlas, we inferred details of molecular processes that are active during seed development, and identified key putative regulators of seed maturation. Additionally, we found evidence for conservation of regulatory mechanisms involving miRNA in plant tissues subjected to drought and seeds undergoing desiccation.",2016-10-01 +23292976,Pyteomics--a Python framework for exploratory data analysis and rapid software prototyping in proteomics.,"Pyteomics is a cross-platform, open-source Python library providing a rich set of tools for MS-based proteomics. It provides modules for reading LC-MS/MS data, search engine output, protein sequence databases, theoretical prediction of retention times, electrochemical properties of polypeptides, mass and m/z calculations, and sequence parsing. Pyteomics is available under Apache license; release versions are available at the Python Package Index http://pypi.python.org/pyteomics, the source code repository at http://hg.theorchromo.ru/pyteomics, documentation at http://packages.python.org/pyteomics. Pyteomics.biolccc documentation is available at http://packages.python.org/pyteomics.biolccc/. Questions on installation and usage can be addressed to pyteomics mailing list: pyteomics@googlegroups.com.",2013-01-05 +26246889,pSTR Finder: a rapid method to discover polymorphic short tandem repeat markers from whole-genome sequences.,"

Background

Whole-genome sequencing is performed routinely as a means to identify polymorphic genetic loci such as short tandem repeat loci. We have developed a simple tool, called pSTR Finder, which is freely available as a means of identifying putative polymorphic short tandem repeat (STR) loci from data generated from genome-wide sequences. The program performs cross comparisons on the STR sequences generated using the Tandem Repeats Finder based on multiple-genome samples in a FASTA format. These comparisons generate reports listing identical, polymorphic, and different STR loci when comparing two samples.

Methods

The web site http://forensic.mc.ntu.edu.tw:9000/PSTRWeb/Default has been developed as a means to identify polymorphic STR loci within complex mass genome sequences. The program was developed to generate a series of user-friendly reports.

Results

As proof of concept for the program, four FASTA genome sequence samples of human chromosome X (AC_000155.1, CM000685.1, NC_018934.2, and CM000274.1) were obtained from GenBank and were analyzed for the presence of putative STR regions. The sequences within AC-000155.1 were used as an initial reference sequence from which there were 5443 identical and 4305 polymorphic STR loci identified using a repeat unit of 1-6 and 10 bp as the flanking sequence either side of the putative STR loci. A reliability test was used to compare five FASTA samples, which had sections of DNA sequence removed to mimic partial or fragmented DNA sequences, to determine whether pSTR Finder can efficiently and consistently find identical, polymorphic, and different STR loci.

Conclusions

From the mass of DNA sequence data, the project was found to reproducibly identify polymorphic STR loci and generate user-friendly reports detailing the number and location of these potential polymorphic loci. This freely available program was found to be a useful tool to find polymorphic STR within whole-genome sequence data in forensic genetic studies.",2015-08-05 +27376574,Inter-individual variability and genetic influences on cytokine responses to bacteria and fungi.,"Little is known about the inter-individual variation of cytokine responses to different pathogens in healthy individuals. To systematically describe cytokine responses elicited by distinct pathogens and to determine the effect of genetic variation on cytokine production, we profiled cytokines produced by peripheral blood mononuclear cells from 197 individuals of European origin from the 200 Functional Genomics (200FG) cohort in the Human Functional Genomics Project (http://www.humanfunctionalgenomics.org), obtained over three different years. We compared bacteria- and fungi-induced cytokine profiles and found that most cytokine responses were organized around a physiological response to specific pathogens, rather than around a particular immune pathway or cytokine. We then correlated genome-wide single-nucleotide polymorphism (SNP) genotypes with cytokine abundance and identified six cytokine quantitative trait loci (QTLs). Among them, a cytokine QTL at the NAA35-GOLM1 locus markedly modulated interleukin (IL)-6 production in response to multiple pathogens and was associated with susceptibility to candidemia. Furthermore, the cytokine QTLs that we identified were enriched among SNPs previously associated with infectious diseases and heart diseases. These data reveal and begin to explain the variability in cytokine production by human immune cells in response to pathogens.",2016-07-04 +25477382,Integrative analysis of public ChIP-seq experiments reveals a complex multi-cell regulatory landscape.,"The large collections of ChIP-seq data rapidly accumulating in public data warehouses provide genome-wide binding site maps for hundreds of transcription factors (TFs). However, the extent of the regulatory occupancy space in the human genome has not yet been fully apprehended by integrating public ChIP-seq data sets and combining it with ENCODE TFs map. To enable genome-wide identification of regulatory elements we have collected, analysed and retained 395 available ChIP-seq data sets merged with ENCODE peaks covering a total of 237 TFs. This enhanced repertoire complements and refines current genome-wide occupancy maps by increasing the human genome regulatory search space by 14% compared to ENCODE alone, and also increases the complexity of the regulatory dictionary. As a direct application we used this unified binding repertoire to annotate variant enhancer loci (VELs) from H3K4me1 mark in two cancer cell lines (MCF-7, CRC) and observed enrichments of specific TFs involved in biological key functions to cancer development and proliferation. Those enrichments of TFs within VELs provide a direct annotation of non-coding regions detected in cancer genomes. Finally, full access to this catalogue is available online together with the TFs enrichment analysis tool (http://tagc.univ-mrs.fr/remap/).",2014-12-03 +23808388,Testing evolutionary models to explain the process of nucleotide substitution in gut bacterial 16S rRNA gene sequences.,"The 16S rRNA gene has been widely used as a marker of gut bacterial diversity and phylogeny, yet we do not know the model of evolution that best explains the differences in its nucleotide composition within and among taxa. Over 46 000 good-quality near-full-length 16S rRNA gene sequences from five bacterial phyla were obtained from the ribosomal database project (RDP) by study and, when possible, by within-study characteristics (e.g. anatomical region). Using alignments (RDPX and MUSCLE) of unique sequences, the FINDMODEL tool available at http://www.hiv.lanl.gov/ was utilized to find the model of character evolution (28 models were available) that best describes the input sequence data, based on the Akaike information criterion. The results showed variable levels of agreement (from 33% to 100%) in the chosen models between the RDP-based and the MUSCLE-based alignments among the taxa. Moreover, subgroups of sequences (using either alignment method) from the same study were often explained by different models. Nonetheless, the different representatives of the gut microbiota were explained by different proportions of the available models. This is the first report using evolutionary models to explain the process of nucleotide substitution in gut bacterial 16S rRNA gene sequences.",2013-07-15 +26560340,SynFind: Compiling Syntenic Regions across Any Set of Genomes on Demand.,"The identification of conserved syntenic regions enables discovery of predicted locations for orthologous and homeologous genes, even when no such gene is present. This capability means that synteny-based methods are far more effective than sequence similarity-based methods in identifying true-negatives, a necessity for studying gene loss and gene transposition. However, the identification of syntenic regions requires complex analyses which must be repeated for pairwise comparisons between any two species. Therefore, as the number of published genomes increases, there is a growing demand for scalable, simple-to-use applications to perform comparative genomic analyses that cater to both gene family studies and genome-scale studies. We implemented SynFind, a web-based tool that addresses this need. Given one query genome, SynFind is capable of identifying conserved syntenic regions in any set of target genomes. SynFind is capable of reporting per-gene information, useful for researchers studying specific gene families, as well as genome-wide data sets of syntenic gene and predicted gene locations, critical for researchers focused on large-scale genomic analyses. Inference of syntenic homologs provides the basis for correlation of functional changes around genes of interests between related organisms. Deployed on the CoGe online platform, SynFind is connected to the genomic data from over 15,000 organisms from all domains of life as well as supporting multiple releases of the same organism. SynFind makes use of a powerful job execution framework that promises scalability and reproducibility. SynFind can be accessed at http://genomevolution.org/CoGe/SynFind.pl. A video tutorial of SynFind using Phytophthrora as an example is available at http://www.youtube.com/watch?v=2Agczny9Nyc.",2015-11-11 +26559507,BRAKER1: Unsupervised RNA-Seq-Based Genome Annotation with GeneMark-ET and AUGUSTUS.,"

Motivation

Gene finding in eukaryotic genomes is notoriously difficult to automate. The task is to design a work flow with a minimal set of tools that would reach state-of-the-art performance across a wide range of species. GeneMark-ET is a gene prediction tool that incorporates RNA-Seq data into unsupervised training and subsequently generates ab initio gene predictions. AUGUSTUS is a gene finder that usually requires supervised training and uses information from RNA-Seq reads in the prediction step. Complementary strengths of GeneMark-ET and AUGUSTUS provided motivation for designing a new combined tool for automatic gene prediction.

Results

We present BRAKER1, a pipeline for unsupervised RNA-Seq-based genome annotation that combines the advantages of GeneMark-ET and AUGUSTUS. As input, BRAKER1 requires a genome assembly file and a file in bam-format with spliced alignments of RNA-Seq reads to the genome. First, GeneMark-ET performs iterative training and generates initial gene structures. Second, AUGUSTUS uses predicted genes for training and then integrates RNA-Seq read information into final gene predictions. In our experiments, we observed that BRAKER1 was more accurate than MAKER2 when it is using RNA-Seq as sole source for training and prediction. BRAKER1 does not require pre-trained parameters or a separate expert-prepared training step.

Availability and implementation

BRAKER1 is available for download at http://bioinf.uni-greifswald.de/bioinf/braker/ and http://exon.gatech.edu/GeneMark/

Contact

katharina.hoff@uni-greifswald.de or borodovsky@gatech.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-11 +21632604,Semantic-JSON: a lightweight web service interface for Semantic Web contents integrating multiple life science databases.,"Global cloud frameworks for bioinformatics research databases become huge and heterogeneous; solutions face various diametric challenges comprising cross-integration, retrieval, security and openness. To address this, as of March 2011 organizations including RIKEN published 192 mammalian, plant and protein life sciences databases having 8.2 million data records, integrated as Linked Open or Private Data (LOD/LPD) using SciNetS.org, the Scientists' Networking System. The huge quantity of linked data this database integration framework covers is based on the Semantic Web, where researchers collaborate by managing metadata across public and private databases in a secured data space. This outstripped the data query capacity of existing interface tools like SPARQL. Actual research also requires specialized tools for data analysis using raw original data. To solve these challenges, in December 2009 we developed the lightweight Semantic-JSON interface to access each fragment of linked and raw life sciences data securely under the control of programming languages popularly used by bioinformaticians such as Perl and Ruby. Researchers successfully used the interface across 28 million semantic relationships for biological applications including genome design, sequence processing, inference over phenotype databases, full-text search indexing and human-readable contents like ontology and LOD tree viewers. Semantic-JSON services of SciNetS.org are provided at http://semanticjson.org.",2011-06-01 +27447888,MBMC: An Effective Markov Chain Approach for Binning Metagenomic Reads from Environmental Shotgun Sequencing Projects.,"Metagenomics is a next-generation omics field currently impacting postgenomic life sciences and medicine. Binning metagenomic reads is essential for the understanding of microbial function, compositions, and interactions in given environments. Despite the existence of dozens of computational methods for metagenomic read binning, it is still very challenging to bin reads. This is especially true for reads from unknown species, from species with similar abundance, and/or from low-abundance species in environmental samples. In this study, we developed a novel taxonomy-dependent and alignment-free approach called MBMC (Metagenomic Binning by Markov Chains). Different from all existing methods, MBMC bins reads by measuring the similarity of reads to the trained Markov chains for different taxa instead of directly comparing reads with known genomic sequences. By testing on more than 24 simulated and experimental datasets with species of similar abundance, species of low abundance, and/or unknown species, we report here that MBMC reliably grouped reads from different species into separate bins. Compared with four existing approaches, we demonstrated that the performance of MBMC was comparable with existing approaches when binning reads from sequenced species, and superior to existing approaches when binning reads from unknown species. MBMC is a pivotal tool for binning metagenomic reads in the current era of Big Data and postgenomic integrative biology. The MBMC software can be freely downloaded at http://hulab.ucf.edu/research/projects/metagenomics/MBMC.html .",2016-07-22 +26550779,Moving to a Highly Walkable Neighborhood and Incidence of Hypertension: A Propensity-Score Matched Cohort Study.,"

Background

The impact of moving to a neighborhood more conducive to utilitarian walking on the risk of incident hypertension is uncertain.

Objective

Our study aimed to examine the effect of moving to a highly walkable neighborhood on the risk of incident hypertension.

Methods

A population-based propensity-score matched cohort study design was used based on the Ontario population from the Canadian Community Health Survey (2001-2010). Participants were adults ≥ 20 years of age who moved from a low-walkability neighborhood (defined as any neighborhood with a Walk Score < 90) to either a high- (Walk Score ≥ 90) or another low-walkability neighborhood. The incidence of hypertension was assessed by linking the cohort to administrative health databases using a validated algorithm. Propensity-score matched Cox proportional hazard models were used. Annual health examination was used as a control event.

Results

Among the 1,057 propensity-score matched pairs there was a significantly lower risk of incident hypertension in the low to high vs. the low to low-walkability groups [hazard ratio = 0.46; 95% CI, 0.26, 0.81, p < 0.01]. The crude hypertension incidence rates were 18.0 per 1,000 person-years (95% CI: 11.6, 24.8) among the low- to low-walkability movers compared with 8.6 per 1,000 person-years (95% CI: 5.3, 12.7) among the low- to high-walkability movers (p < 0.001). There were no significant differences in the hazard of annual health examination between the two mover groups.

Conclusions

Moving to a highly walkable neighborhood was associated with a significantly lower risk of incident hypertension. Future research should assess whether specific attributes of walkable neighborhoods (e.g., amenities, density, land-use mix) may be driving this relationship.

Citation

Chiu M, Rezai MR, Maclagan LC, Austin PC, Shah BR, Redelmeier DA, Tu JV. 2016. Moving to a highly walkable neighborhood and incidence of hypertension: a propensity-score matched cohort study. Environ Health Perspect 124:754-760; http://dx.doi.org/10.1289/ehp.1510425.",2015-11-08 +26803161,PAA: an R/bioconductor package for biomarker discovery with protein microarrays.,"

Unlabelled

The R/Bioconductor package Protein Array Analyzer (PAA) facilitates a flexible analysis of protein microarrays for biomarker discovery (esp., ProtoArrays). It provides a complete data analysis workflow including preprocessing and quality control, uni- and multivariate feature selection as well as several different plots and results tables to outline and evaluate the analysis results. As a main feature, PAA's multivariate feature selection methods are based on recursive feature elimination (e.g. SVM-recursive feature elimination, SVM-RFE) with stability ensuring strategies such as ensemble feature selection. This enables PAA to detect stable and reliable biomarker candidate panels.

Availability and implementation

PAA is freely available (BSD 3-clause license) from http://www.bioconductor.org/packages/PAA/ CONTACT: michael.turewicz@rub.de or martin.eisenacher@rub.de.",2016-01-22 +26553809,The Degradome database: expanding roles of mammalian proteases in life and disease.,"Since the definition of the degradome as the complete repertoire of proteases in a given organism, the combined effort of numerous laboratories has greatly expanded our knowledge of its roles in biology and pathology. Once the genomic sequences of several important model organisms were made available, we presented the Degradome database containing the curated sets of known protease genes in human, chimpanzee, mouse and rat. Here, we describe the updated Degradome database, featuring 81 new protease genes and 7 new protease families. Notably, in this short time span, the number of known hereditary diseases caused by mutations in protease genes has increased from 77 to 119. This increase reflects the growing interest on the roles of the degradome in multiple diseases, including cancer and ageing. Finally, we have leveraged the widespread adoption of new webtools to provide interactive graphic views that show information about proteases in the global context of the degradome. The Degradome database can be accessed through its web interface at http://degradome.uniovi.es.",2015-11-08 +25378466,flowDensity: reproducing manual gating of flow cytometry data by automated density-based cell population identification.,"

Summary

flowDensity facilitates reproducible, high-throughput analysis of flow cytometry data by automating a predefined manual gating approach. The algorithm is based on a sequential bivariate gating approach that generates a set of predefined cell populations. It chooses the best cut-off for individual markers using characteristics of the density distribution. The Supplementary Material is linked to the online version of the manuscript.

Availability and implementation

R source code freely available through BioConductor (http://master.bioconductor.org/packages/devel/bioc/html/flowDensity.html.). Data available from FlowRepository.org (dataset FR-FCM-ZZBW).

Contact

rbrinkman@bccrc.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-16 +21602510,"GProX, a user-friendly platform for bioinformatics analysis and visualization of quantitative proteomics data.","Recent technological advances have made it possible to identify and quantify thousands of proteins in a single proteomics experiment. As a result of these developments, the analysis of data has become the bottleneck of proteomics experiment. To provide the proteomics community with a user-friendly platform for comprehensive analysis, inspection and visualization of quantitative proteomics data we developed the Graphical Proteomics Data Explorer (GProX)(1). The program requires no special bioinformatics training, as all functions of GProX are accessible within its graphical user-friendly interface which will be intuitive to most users. Basic features facilitate the uncomplicated management and organization of large data sets and complex experimental setups as well as the inspection and graphical plotting of quantitative data. These are complemented by readily available high-level analysis options such as database querying, clustering based on abundance ratios, feature enrichment tests for e.g. GO terms and pathway analysis tools. A number of plotting options for visualization of quantitative proteomics data is available and most analysis functions in GProX create customizable high quality graphical displays in both vector and bitmap formats. The generic import requirements allow data originating from essentially all mass spectrometry platforms, quantitation strategies and software to be analyzed in the program. GProX represents a powerful approach to proteomics data analysis providing proteomics experimenters with a toolbox for bioinformatics analysis of quantitative proteomics data. The program is released as open-source and can be freely downloaded from the project webpage at http://gprox.sourceforge.net.",2011-05-20 +26553798,InterRNA: a database of base interactions in RNA structures.,"A major component of RNA structure stabilization are the hydrogen bonded interactions between the base residues. The importance and biological relevance for large clusters of base interactions can be much more easily investigated when their occurrences have been systematically detected, catalogued and compared. In this paper, we describe the database InterRNA (INTERactions in RNA structures database-http://mfrlab.org/interrna/) that contains records of known RNA 3D motifs as well as records for clusters of bases that are interconnected by hydrogen bonds. The contents of the database were compiled from RNA structural annotations carried out by the NASSAM (http://mfrlab.org/grafss/nassam) and COGNAC (http://mfrlab.org/grafss/cognac) computer programs. An analysis of the database content and comparisons with the existing corpus of knowledge regarding RNA 3D motifs clearly show that InterRNA is able to provide an extension of the annotations for known motifs as well as able to provide novel interactions for further investigations.",2015-11-08 +22889332,"Avogadro: an advanced semantic chemical editor, visualization, and analysis platform.","

Unlabelled

Background

The Avogadro project has developed an advanced molecule editor and visualizer designed for cross-platform use in computational chemistry, molecular modeling, bioinformatics, materials science, and related areas. It offers flexible, high quality rendering, and a powerful plugin architecture. Typical uses include building molecular structures, formatting input files, and analyzing output of a wide variety of computational chemistry packages. By using the CML file format as its native document type, Avogadro seeks to enhance the semantic accessibility of chemical data types.

Results

The work presented here details the Avogadro library, which is a framework providing a code library and application programming interface (API) with three-dimensional visualization capabilities; and has direct applications to research and education in the fields of chemistry, physics, materials science, and biology. The Avogadro application provides a rich graphical interface using dynamically loaded plugins through the library itself. The application and library can each be extended by implementing a plugin module in C++ or Python to explore different visualization techniques, build/manipulate molecular structures, and interact with other programs. We describe some example extensions, one which uses a genetic algorithm to find stable crystal structures, and one which interfaces with the PackMol program to create packed, solvated structures for molecular dynamics simulations. The 1.0 release series of Avogadro is the main focus of the results discussed here.

Conclusions

Avogadro offers a semantic chemical builder and platform for visualization and analysis. For users, it offers an easy-to-use builder, integrated support for downloading from common databases such as PubChem and the Protein Data Bank, extracting chemical data from a wide variety of formats, including computational chemistry output, and native, semantic support for the CML file format. For developers, it can be easily extended via a powerful plugin mechanism to support new features in organic chemistry, inorganic complexes, drug design, materials, biomolecules, and simulations. Avogadro is freely available under an open-source license from http://avogadro.openmolecules.net.",2012-08-13 +22064852,eQuilibrator--the biochemical thermodynamics calculator.,"The laws of thermodynamics constrain the action of biochemical systems. However, thermodynamic data on biochemical compounds can be difficult to find and is cumbersome to perform calculations with manually. Even simple thermodynamic questions like 'how much Gibbs energy is released by ATP hydrolysis at pH 5?' are complicated excessively by the search for accurate data. To address this problem, eQuilibrator couples a comprehensive and accurate database of thermodynamic properties of biochemical compounds and reactions with a simple and powerful online search and calculation interface. The web interface to eQuilibrator (http://equilibrator.weizmann.ac.il) enables easy calculation of Gibbs energies of compounds and reactions given arbitrary pH, ionic strength and metabolite concentrations. The eQuilibrator code is open-source and all thermodynamic source data are freely downloadable in standard formats. Here we describe the database characteristics and implementation and demonstrate its use.",2011-11-07 +27131786,Evolview v2: an online visualization and management tool for customized and annotated phylogenetic trees.,"Evolview is an online visualization and management tool for customized and annotated phylogenetic trees. It allows users to visualize phylogenetic trees in various formats, customize the trees through built-in functions and user-supplied datasets and export the customization results to publication-ready figures. Its 'dataset system' contains not only the data to be visualized on the tree, but also 'modifiers' that control various aspects of the graphical annotation. Evolview is a single-page application (like Gmail); its carefully designed interface allows users to upload, visualize, manipulate and manage trees and datasets all in a single webpage. Developments since the last public release include a modern dataset editor with keyword highlighting functionality, seven newly added types of annotation datasets, collaboration support that allows users to share their trees and datasets and various improvements of the web interface and performance. In addition, we included eleven new 'Demo' trees to demonstrate the basic functionalities of Evolview, and five new 'Showcase' trees inspired by publications to showcase the power of Evolview in producing publication-ready figures. Evolview is freely available at: http://www.evolgenius.info/evolview/.",2016-04-30 +27131358,"MANORAA (Mapping Analogous Nuclei Onto Residue And Affinity) for identifying protein-ligand fragment interaction, pathways and SNPs.","Protein-ligand interaction analysis is an important step of drug design and protein engineering in order to predict the binding affinity and selectivity between ligands to the target proteins. To date, there are more than 100 000 structures available in the Protein Data Bank (PDB), of which ∼30% are protein-ligand (MW below 1000 Da) complexes. We have developed the integrative web server MANORAA (Mapping Analogous Nuclei Onto Residue And Affinity) with the aim of providing a user-friendly web interface to assist structural study and design of protein-ligand interactions. In brief, the server allows the users to input the chemical fragments and present all the unique molecular interactions to the target proteins with available three-dimensional structures in the PDB. The users can also link the ligands of interest to assess possible off-target proteins, human variants and pathway information using our all-in-one integrated tools. Taken together, we envisage that the server will facilitate and improve the study of protein-ligand interactions by allowing observation and comparison of ligand interactions with multiple proteins at the same time. (http://manoraa.org).",2016-04-29 +26449916,Be-CoDiS: A Mathematical Model to Predict the Risk of Human Diseases Spread Between Countries--Validation and Application to the 2014-2015 Ebola Virus Disease Epidemic.,"Ebola virus disease is a lethal human and primate disease that currently requires a particular attention from the international health authorities due to important outbreaks in some Western African countries and isolated cases in the UK, the USA and Spain. Regarding the emergency of this situation, there is a need for the development of decision tools, such as mathematical models, to assist the authorities to focus their efforts in important factors to eradicate Ebola. In this work, we propose a novel deterministic spatial-temporal model, called Between-Countries Disease Spread (Be-CoDiS), to study the evolution of human diseases within and between countries. The main interesting characteristics of Be-CoDiS are the consideration of the movement of people between countries, the control measure effects and the use of time-dependent coefficients adapted to each country. First, we focus on the mathematical formulation of each component of the model and explain how its parameters and inputs are obtained. Then, in order to validate our approach, we consider two numerical experiments regarding the 2014-2015 Ebola epidemic. The first one studies the ability of the model in predicting the EVD evolution between countries starting from the index cases in Guinea in December 2013. The second one consists of forecasting the evolution of the epidemic by using some recent data. The results obtained with Be-CoDiS are compared to real data and other model outputs found in the literature. Finally, a brief parameter sensitivity analysis is done. A free MATLAB version of Be-CoDiS is available at: http://www.mat.ucm.es/momat/software.htm.",2015-09-01 +21959868,Tripal: a construction toolkit for online genome databases.,"As the availability, affordability and magnitude of genomics and genetics research increases so does the need to provide online access to resulting data and analyses. Availability of a tailored online database is the desire for many investigators or research communities; however, managing the Information Technology infrastructure needed to create such a database can be an undesired distraction from primary research or potentially cost prohibitive. Tripal provides simplified site development by merging the power of Drupal, a popular web Content Management System with that of Chado, a community-derived database schema for storage of genomic, genetic and other related biological data. Tripal provides an interface that extends the content management features of Drupal to the data housed in Chado. Furthermore, Tripal provides a web-based Chado installer, genomic data loaders, web-based editing of data for organisms, genomic features, biological libraries, controlled vocabularies and stock collections. Also available are Tripal extensions that support loading and visualizations of NCBI BLAST, InterPro, Kyoto Encyclopedia of Genes and Genomes and Gene Ontology analyses, as well as an extension that provides integration of Tripal with GBrowse, a popular GMOD tool. An Application Programming Interface is available to allow creation of custom extensions by site developers, and the look-and-feel of the site is completely customizable through Drupal-based PHP template files. Addition of non-biological content and user-management is afforded through Drupal. Tripal is an open source and freely available software package found at http://tripal.sourceforge.net.",2011-09-29 +21906294,diArk 2.0 provides detailed analyses of the ever increasing eukaryotic genome sequencing data.,"

Background

Nowadays, the sequencing of even the largest mammalian genomes has become a question of days with current next-generation sequencing methods. It comes as no surprise that dozens of genome assemblies are released per months now. Since the number of next-generation sequencing machines increases worldwide and new major sequencing plans are announced, a further increase in the speed of releasing genome assemblies is expected. Thus it becomes increasingly important to get an overview as well as detailed information about available sequenced genomes. The different sequencing and assembly methods have specific characteristics that need to be known to evaluate the various genome assemblies before performing subsequent analyses.

Results

diArk has been developed to provide fast and easy access to all sequenced eukaryotic genomes worldwide. Currently, diArk 2.0 contains information about more than 880 species and more than 2350 genome assembly files. Many meta-data like sequencing and read-assembly methods, sequencing coverage, GC-content, extended lists of alternatively used scientific names and common species names, and various kinds of statistics are provided. To intuitively approach the data the web interface makes extensive usage of modern web techniques. A number of search modules and result views facilitate finding and judging the data of interest. Subscribing to the RSS feed is the easiest way to stay up-to-date with the latest genome data.

Conclusions

diArk 2.0 is the most up-to-date database of sequenced eukaryotic genomes compared to databases like GOLD, NCBI Genome, NHGRI, and ISC. It is different in that only those projects are stored for which genome assembly data or considerable amounts of cDNA data are available. Projects in planning stage or in the process of being sequenced are not included. The user can easily search through the provided data and directly access the genome assembly files of the sequenced genome of interest. diArk 2.0 is available at http://www.diark.org.",2011-09-09 +22447338,Review of toxicological effects caused by episodic stressor exposure.,"Water quality monitoring tools that rely on data from stress-response tests with continuous exposure at constant concentrations are not always appropriately protective when stressor exposure in the field is episodic in nature. The present study identifies various approaches that have attempted to account for episodic stressor exposure, describes the development of a toxicological effects database of episodic stressor exposure collated from published scientific literature, and discusses whether any discernible trends are evident when these data are reviewed. The episodic stressor exposure literature indicated that few generalizations can be made regarding associated biological responses. Instead, when attempting to characterize the hazard of a certain episodic pollution event, the following situation-specific information is required: the specific species affected and its age, the specific stressor and its concentration, the number of exposures to the stressor, the duration of exposure to the stressor, and the recovery time after each exposure. The present study identifies four main challenges to the inclusion of episodic toxicity data in environmental water quality management: varying stressor concentration profiles, defining episodic stressor concentration levels, variation resulting from routes of exposure and modes of action, and species-specific responses to episodic stressor exposure. The database, available at http://iwr.ru.ac.za/iwr/download, could be particularly useful for site-specific risk assessments related to episodic exposures.",2012-03-23 +27288493,Meshable: searching PubMed abstracts by utilizing MeSH and MeSH-derived topical terms.,"

Unlabelled

Medical Subject Headings (MeSH(®)) is a controlled vocabulary for indexing and searching biomedical literature. MeSH terms and subheadings are organized in a hierarchical structure and are used to indicate the topics of an article. Biologists can use either MeSH terms as queries or the MeSH interface provided in PubMed(®) for searching PubMed abstracts. However, these are rarely used, and there is no convenient way to link standardized MeSH terms to user queries. Here, we introduce a web interface which allows users to enter queries to find MeSH terms closely related to the queries. Our method relies on co-occurrence of text words and MeSH terms to find keywords that are related to each MeSH term. A query is then matched with the keywords for MeSH terms, and candidate MeSH terms are ranked based on their relatedness to the query. The experimental results show that our method achieves the best performance among several term extraction approaches in terms of topic coherence. Moreover, the interface can be effectively used to find full names of abbreviations and to disambiguate user queries.

Availability and implementation

https://www.ncbi.nlm.nih.gov/IRET/MESHABLE/ CONTACT: sun.kim@nih.gov

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-10 +25542927,Modified screening and ranking algorithm for copy number variation detection.,"

Motivation

Copy number variation (CNV) is a type of structural variation, usually defined as genomic segments that are 1 kb or larger, which present variable copy numbers when compared with a reference genome. The screening and ranking algorithm (SaRa) was recently proposed as an efficient approach for multiple change-points detection, which can be applied to CNV detection. However, some practical issues arise from application of SaRa to single nucleotide polymorphism data.

Results

In this study, we propose a modified SaRa on CNV detection to address these issues. First, we use the quantile normalization on the original intensities to guarantee that the normal mean model-based SaRa is a robust method. Second, a novel normal mixture model coupled with a modified Bayesian information criterion is proposed for candidate change-point selection and further clustering the potential CNV segments to copy number states. Simulations revealed that the modified SaRa became a robust method for identifying change-points and achieved better performance than the circular binary segmentation (CBS) method. By applying the modified SaRa to real data from the HapMap project, we illustrated its performance on detecting CNV segments. In conclusion, our modified SaRa method improves SaRa theoretically and numerically, for identifying CNVs with high-throughput genotyping data.

Availability and implementation

The modSaRa package is implemented in R program and freely available at http://c2s2.yale.edu/software/modSaRa.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-25 +26251825,Microarray analysis of the in vivo response of microglia to Aβ peptides in mice with conditional deletion of the prostaglandin EP2 receptor.,"Amyloid-β (Aβ) peptides accumulate in the brains of patients with Alzheimer's disease (AD), where they generate a persistent inflammatory response from microglia, the innate immune cells of the brain. The immune modulatory cyclooxygenase/prostaglandin E2 (COX/PGE2) pathway has been implicated in preclinical AD development, both in human epidemiology studies1 and in transgenic rodent models of AD2, 3. PGE2 signals through four G-protein-coupled receptors, including the EP2 receptor that has been investigated for its role in mediating the inflammatory and phagocytic responses to Aβ4. To identify transcriptional differences in microglia lacking the EP2 receptor, we examined mice with EP2 conditionally deleted in Cd11b-expressing immune cells. We injected Aβ peptides or saline vehicle into the brains of adult mice, isolated primary microglia, and analyzed RNA expression by microarray. The resulting datasets were analyzed in two studies5, 6, one describing the basal status of microglia with or without EP2 deletion, and the second study analyzing the microglial response to Aβ. Here we describe in detail the experimental design and data analyses. The raw data from these studies are deposited in GEO, accession GSE57181 (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE57181).",2015-09-01 +22540519,Automatic generation of causal networks linking growth factor stimuli to functional cell state changes.,"Despite the increasing number of growth factor-related signalling networks, their lack of logical and causal connection to factual changes in cell states frequently impairs the functional interpretation of microarray data. We present a novel method enabling the automatic inference of causal multi-layer networks from such data, allowing the functional interpretation of growth factor stimulation experiments using pathway databases. Our environment of evaluation was hepatocyte growth factor-stimulated cell migration and proliferation in a keratinocyte-fibroblast co-culture. The network for this system was obtained by applying the steps: (a) automatic integration of the comprehensive set of all known cellular networks from the Pathway Interaction Database into a master structure; (b) retrieval of an active-network from the master structure, where the network edges that connect nodes with an absent mRNA level were excluded; and (c) reduction of the active-network complexity to a causal subnetwork from a set of seed nodes specific for the microarray experiment. The seed nodes comprised the receptors stimulated in the experiment, the consequently differentially expressed genes, and the expected cell states. The resulting network shows how well-known players, in the context of hepatocyte growth factor stimulation, are mechanistically linked in a pathway triggering functional cell state changes. Using BIOQUALI, we checked and validated the consistency of the network with respect to microarray data by computational simulation. The network has properties that can be classified into different functional layers because it not only shows signal processing down to the transcriptional level, but also the modulation of the network structure by the preceeding stimulation. The software for generating computable objects from the Pathway Interaction Database database, as well as the generated networks, are freely available at: http://www.tiga.uni-hd.de/supplements/inferringFromPID.html.",2012-05-22 +22531214,NuST: analysis of the interplay between nucleoid organization and gene expression.,"

Unlabelled

Different experimental results suggest the presence of an interplay between global transcriptional regulation and chromosome spatial organization in bacteria. The identification and clear visualization of spatial clusters of contiguous genes targeted by specific DNA-binding proteins or sensitive to nucleoid perturbations can elucidate links between nucleoid structure and gene expression patterns. Similarly, statistical analysis to assess correlations between results from independent experiments can provide the integrated analysis needed in this line of research. NuST (Nucleoid Survey tools), based on the Escherichia coli genome, gives the non-expert the possibility to analyze the aggregation of genes or loci sets along the genome coordinate, at different scales of observation. It is useful to discover correlations between different sources of data (e.g. expression, binding or genomic data) and genome organization. A user can use it on datasets in the form of gene lists coming from his/her own experiments or bioinformatic analyses, but also make use of the internal database, which collects data from many published studies.

Availability and implementation

NuST is a web server (available at http://www.lgm.upmc.fr/nust/). The website is implemented in PHP, SQLite and Ajax, with all major browsers supported, while the core algorithms are optimized and implemented in C. NuST has an extensive help page and provides a direct visualization of results as well as different downloadable file formats. A template Perl code for automated access to the web server can be downloaded at http://www.lgm.upmc.fr/nust/downloads/, in order to allow the users to use NuST in systematic bioinformatic analyses.",2012-04-23 +26379697,Detecting modules in biological networks by edge weight clustering and entropy significance.,"Detection of the modular structure of biological networks is of interest to researchers adopting a systems perspective for the analysis of omics data. Computational systems biology has provided a rich array of methods for network clustering. To date, the majority of approaches address this task through a network node classification based on topological or external quantifiable properties of network nodes. Conversely, numerical properties of network edges are underused, even though the information content which can be associated with network edges has augmented due to steady advances in molecular biology technology over the last decade. Properly accounting for network edges in the development of clustering approaches can become crucial to improve quantitative interpretation of omics data, finally resulting in more biologically plausible models. In this study, we present a novel technique for network module detection, named WG-Cluster (Weighted Graph CLUSTERing). WG-Cluster's notable features, compared to current approaches, lie in: (1) the simultaneous exploitation of network node and edge weights to improve the biological interpretability of the connected components detected, (2) the assessment of their statistical significance, and (3) the identification of emerging topological properties in the detected connected components. WG-Cluster utilizes three major steps: (i) an unsupervised version of k-means edge-based algorithm detects sub-graphs with similar edge weights, (ii) a fast-greedy algorithm detects connected components which are then scored and selected according to the statistical significance of their scores, and (iii) an analysis of the convolution between sub-graph mean edge weight and connected component score provides a summarizing view of the connected components. WG-Cluster can be applied to directed and undirected networks of different types of interacting entities and scales up to large omics data sets. Here, we show that WG-Cluster can be successfully used in the differential analysis of physical protein-protein interaction (PPI) networks. Specifically, applying WG-Cluster to a PPI network weighted by measurements of differential gene expression permits to explore the changes in network topology under two distinct (normal vs. tumor) conditions. WG-Cluster code is available at https://sites.google.com/site/paolaleccapersonalpage/.",2015-08-27 +25600654,Sample size calculation in metabolic phenotyping studies.,"The number of samples needed to identify significant effects is a key question in biomedical studies, with consequences on experimental designs, costs and potential discoveries. In metabolic phenotyping studies, sample size determination remains a complex step. This is due particularly to the multiple hypothesis-testing framework and the top-down hypothesis-free approach, with no a priori known metabolic target. Until now, there was no standard procedure available to address this purpose. In this review, we discuss sample size estimation procedures for metabolic phenotyping studies. We release an automated implementation of the Data-driven Sample size Determination (DSD) algorithm for MATLAB and GNU Octave. Original research concerning DSD was published elsewhere. DSD allows the determination of an optimized sample size in metabolic phenotyping studies. The procedure uses analytical data only from a small pilot cohort to generate an expanded data set. The statistical recoupling of variables procedure is used to identify metabolic variables, and their intensity distributions are estimated by Kernel smoothing or log-normal density fitting. Statistically significant metabolic variations are evaluated using the Benjamini-Yekutieli correction and processed for data sets of various sizes. Optimal sample size determination is achieved in a context of biomarker discovery (at least one statistically significant variation) or metabolic exploration (a maximum of statistically significant variations). DSD toolbox is encoded in MATLAB R2008A (Mathworks, Natick, MA) for Kernel and log-normal estimates, and in GNU Octave for log-normal estimates (Kernel density estimates are not robust enough in GNU octave). It is available at http://www.prabi.fr/redmine/projects/dsd/repository, with a tutorial at http://www.prabi.fr/redmine/projects/dsd/wiki.",2015-01-19 +26546518,The Transporter Classification Database (TCDB): recent advances.,"The Transporter Classification Database (TCDB; http://www.tcdb.org) is a freely accessible reference database for transport protein research, which provides structural, functional, mechanistic, evolutionary and disease/medical information about transporters from organisms of all types. TCDB is the only transport protein classification database adopted by the International Union of Biochemistry and Molecular Biology (IUBMB). It consists of more than 10,000 non-redundant transport systems with more than 11 000 reference citations, classified into over 1000 transporter families. Transporters in TCDB can be single or multi-component systems, categorized in a functional/phylogenetic hierarchical system of classes, subclasses, families, subfamilies and transport systems. TCDB also includes updated software designed to analyze the distinctive features of transport proteins, extending its usefulness. Here we present a comprehensive update of the database contents and features and summarize recent discoveries recorded in TCDB.",2015-11-05 +23843252,dbNSFP v2.0: a database of human non-synonymous SNVs and their functional predictions and annotations.,"dbNSFP is a database developed for functional prediction and annotation of all potential non-synonymous single-nucleotide variants (nsSNVs) in the human genome. This database significantly facilitates the process of querying predictions and annotations from different databases/web-servers for large amounts of nsSNVs discovered in exome-sequencing studies. Here we report a recent major update of the database to version 2.0. We have rebuilt the SNV collection based on GENCODE 9 and currently the database includes 87,347,043 nsSNVs and 2,270,742 essential splice site SNVs (an 18% increase compared to dbNSFP v1.0). For each nsSNV dbNSFP v2.0 has added two prediction scores (MutationAssessor and FATHMM) and two conservation scores (GERP++ and SiPhy). The original five prediction and conservation scores in v1.0 (SIFT, Polyphen2, LRT, MutationTaster and PhyloP) have been updated. Rich functional annotations for SNVs and genes have also been added into the new version, including allele frequencies observed in the 1000 Genomes Project phase 1 data and the NHLBI Exome Sequencing Project, various gene IDs from different databases, functional descriptions of genes, gene expression and gene interaction information, among others. dbNSFP v2.0 is freely available for download at http://sites.google.com/site/jpopgen/dbNSFP.",2013-07-10 +24122928,Elucidating protein secondary structure with circular dichroism and a neural network.,"Circular dichroism spectroscopy is a quick method for determining the average secondary structures of proteins, probing their interactions with their environment, and aiding drug discovery. This article describes the development of a self-organising map structure-fitting methodology named secondary structure neural network (SSNN) to aid this process and reduce the level of expertise required. SSNN uses a database of spectra from proteins with known X-ray structures; prediction of structures for new proteins is then possible. It has been designed as 3 units: SSNN1 takes spectra for known proteins, clusters them into a map, and SSNN2 creates a matching structure map. SSNN3 places unknown spectra on the map and gives them structure vectors. SSNN3 output illustrates the process and results obtained. We detail the strengths and weaknesses of SSNN and compare it with widely accepted structure fitting programs. Current input format is Δε per amino acid residue from 240 to 190 nm in 1 nm steps for the known and unknown proteins and a vector summarizing the secondary structure elements of the known proteins. The format is readily modified to include input data with, for example, extended wavelength ranges or different assignment of secondary structures. SSNN can be used either pretrained with a reference set from the CDPro web site (direct application of SSNN3, with the provided output from SSNN1 and SSNN2) or all three modules can be used as required. SSNN3 is available trained (with the reference set of the 48-spectra set used in this work complemented by five additional spectra) at http://www2.warwick.ac.uk/fac/sci/chemistry/research/arodger/arodgergroup/research_intro/instrumentation/ssnn/.",2013-10-05 +25661704,Xlink Analyzer: software for analysis and visualization of cross-linking data in the context of three-dimensional structures.,"Structural characterization of large multi-subunit protein complexes often requires integrating various experimental techniques. Cross-linking mass spectrometry (XL-MS) identifies proximal protein residues and thus is increasingly used to map protein interactions and determine the relative orientation of subunits within the structure of protein complexes. To fully adapt XL-MS as a structure characterization technique, we developed Xlink Analyzer, a software tool for visualization and analysis of XL-MS data in the context of the three-dimensional structures. Xlink Analyzer enables automatic visualization of cross-links, identifies cross-links violating spatial restraints, calculates violation statistics, maps chemically modified surfaces, and allows interactive manipulations that facilitate analysis of XL-MS data and aid designing new experiments. We demonstrate these features by mapping interaction sites within RNA polymerase I and the Rvb1/2 complex. Xlink Analyzer is implemented as a plugin to UCSF Chimera, a standard structural biology software tool, and thus enables seamless integration of XL-MS data with, e.g. fitting of X-ray structures to EM maps. Xlink Analyzer is available for download at http://www.beck.embl.de/XlinkAnalyzer.html.",2015-02-07 +23898041,CrusView: a Java-based visualization platform for comparative genomics analyses in Brassicaceae species.,"In plants and animals, chromosomal breakage and fusion events based on conserved syntenic genomic blocks lead to conserved patterns of karyotype evolution among species of the same family. However, karyotype information has not been well utilized in genomic comparison studies. We present CrusView, a Java-based bioinformatic application utilizing Standard Widget Toolkit/Swing graphics libraries and a SQLite database for performing visualized analyses of comparative genomics data in Brassicaceae (crucifer) plants. Compared with similar software and databases, one of the unique features of CrusView is its integration of karyotype information when comparing two genomes. This feature allows users to perform karyotype-based genome assembly and karyotype-assisted genome synteny analyses with preset karyotype patterns of the Brassicaceae genomes. Additionally, CrusView is a local program, which gives its users high flexibility when analyzing unpublished genomes and allows users to upload self-defined genomic information so that they can visually study the associations between genome structural variations and genetic elements, including chromosomal rearrangements, genomic macrosynteny, gene families, high-frequency recombination sites, and tandem and segmental duplications between related species. This tool will greatly facilitate karyotype, chromosome, and genome evolution studies using visualized comparative genomics approaches in Brassicaceae species. CrusView is freely available at http://www.cmbb.arizona.edu/CrusView/.",2013-07-29 +26462614,Mutation Update of ARSA and PSAP Genes Causing Metachromatic Leukodystrophy.,"Metachromatic leukodystrophy is a neurodegenerative disorder characterized by progressive demyelination. The disease is caused by variants in the ARSA gene, which codes for the lysosomal enzyme arylsulfatase A, or, more rarely, in the PSAP gene, which codes for the activator protein saposin B. In this Mutation Update, an extensive review of all the ARSA- and PSAP-causative variants published in the literature to date, accounting for a total of 200 ARSA and 10 PSAP allele types, is presented. The detailed ARSA and PSAP variant lists are freely available on the Leiden Online Variation Database (LOVD) platform at http://www.LOVD.nl/ARSA and http://www.LOVD.nl/PSAP, respectively.",2015-11-04 +27878809,Telephone-assisted self-help for parents of children with attention-deficit/hyperactivity disorder who have residual functional impairment despite methylphenidate treatment: a randomized controlled trial.,"

Background

Self-help parenting interventions have been shown to be effective in the management of children with attention-deficit/hyperactivity disorder (ADHD) and may be useful when there are barriers to face-to-face therapist-led parent trainings. Previous studies indicate that behavioral interventions might be a useful adjunct to medication in children with residual ADHD symptoms, and regarding comorbid oppositional symptoms and multiple domains of functional impairment. In the present study, we examined whether a telephone-assisted self-help (TASH) parenting behavioral intervention (written materials plus telephone counseling) enhanced the effects of methylphenidate treatment in children with ADHD.

Methods

In this randomized controlled trial, parents of 103 school-aged children with ADHD and residual functional impairment despite methylphenidate treatment were randomly assigned to either the enhancement group, which received the TASH intervention as adjunct to routine clinical care (including continued medication), or to the active control group, which received routine clinical care only (including continued medication). Parent-completed outcome measures at baseline and at 12 months (postassessment) included functional impairment, ADHD symptoms, oppositional defiant disorder (ODD) symptoms, parenting behavior, and parental satisfaction with the intervention (ClinicalTrials.gov: NCT01660425; URL: https://clinicaltrials.gov/ct2/show/NCT01660425).

Results

Intention-to-treat analyses of covariance (ANCOVAs), which controlled for baseline data, revealed significant and moderate intervention effects for ODD symptoms and negative parenting behavior at the postassessment, whereas per-protocol analyses additionally showed significant and moderate effects on functional impairment (primary outcome). Parents expressed high satisfaction with the program.

Conclusions

The TASH program enhances effects of methylphenidate treatment in families who complete the intervention. The discontinuation rate of about 30% and comparison between completing and discontinuing families suggest that the program may be more suitable for families with a higher educational level and fewer additional stresses.",2016-11-23 +26680734,Light-RCV: a lightweight read coverage viewer for next generation sequencing data.,"

Background

Next-generation sequencing (NGS) technologies has brought an unprecedented amount of genomic data for analysis. Unlike array-based profiling technologies, NGS can reveal the expression profile across a transcript at the base level. Such a base-level read coverage provides further insights for alternative mRNA splicing, single-nucleotide polymorphism (SNP), novel transcript discovery, etc. However, to our best knowledge, none of existing NGS viewers can timely visualize genome-wide base-level read coverages in an interactive environment.

Results

This study proposes an efficient visualization pipeline and implements a lightweight read coverage viewer, Light-RCV, with the proposed pipeline. Light-RCV consists of four featured designs on the path from raw NGS data to the final visualized read coverage: i) read coverage construction algorithm, ii) multi-resolution profiles, iii) two-stage architecture and iv) storage format. With these designs, Light-RCV achieves a < 0.5s response time on any scale of genomic ranges, including whole chromosomes. Finally, a case study was performed to demonstrate the importance of visualizing base-level read coverage and the value of Light-RCV.

Conclusions

Compared with multi-functional genome viewers such as Artemis, Savant, Tablet and Integrative Genomics Viewer (IGV), Light-RCV is designed only for visualization. Therefore, it does not provide advanced analyses. However, its backend technology provides an efficient kernel of base-level visualization that can be easily embedded to other viewers. This viewer is the first to provide timely visualization of genome-wide read coverage at the base level in an interactive environment. The software is available for free at http://lightrcv.ee.ncku.edu.tw.",2015-12-09 +25161257,Microarray R-based analysis of complex lysate experiments with MIRACLE.,"

Motivation

Reverse-phase protein arrays (RPPAs) allow sensitive quantification of relative protein abundance in thousands of samples in parallel. Typical challenges involved in this technology are antibody selection, sample preparation and optimization of staining conditions. The issue of combining effective sample management and data analysis, however, has been widely neglected.

Results

This motivated us to develop MIRACLE, a comprehensive and user-friendly web application bridging the gap between spotting and array analysis by conveniently keeping track of sample information. Data processing includes correction of staining bias, estimation of protein concentration from response curves, normalization for total protein amount per sample and statistical evaluation. Established analysis methods have been integrated with MIRACLE, offering experimental scientists an end-to-end solution for sample management and for carrying out data analysis. In addition, experienced users have the possibility to export data to R for more complex analyses. MIRACLE thus has the potential to further spread utilization of RPPAs as an emerging technology for high-throughput protein analysis.

Availability

Project URL: http://www.nanocan.org/miracle/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +23825543,LAMP: A Database Linking Antimicrobial Peptides.,"The frequent emergence of drug-resistant bacteria has created an urgent demand for new antimicrobial agents. Traditional methods of novel antibiotic development are almost obsolete. Antimicrobial peptides (AMPs) are now regarded as a potential solution to revive the traditional methods of antibiotic development, although, until now, many AMPs have failed in clinical trials. A comprehensive database of AMPs with information about their antimicrobial activity and cytotoxicity will help promote the process of finding novel AMPs with improved antimicrobial activity and reduced cytotoxicity and eventually accelerate the speed of translating the discovery of new AMPs into clinical or preclinical trials. LAMP, a database linking AMPs, serves as a tool to aid the discovery and design of AMPs as new antimicrobial agents. The current version of LAMP has 5,547 entries, comprising 3,904 natural AMPs and 1,643 synthetic peptides. The database can be queried using either simply keywords or combinatorial conditions searches. Equipped with the detailed antimicrobial activity and cytotoxicity data, the cross-linking and top similar AMPs functions implemented in LAMP will help enhance our current understanding of AMPs and this may speed up the development of new AMPs for medical applications. LAMP is freely available at: http://biotechlab.fudan.edu.cn/database/lamp.",2013-06-18 +27551104,Predicting the errors of predicted local backbone angles and non-local solvent- accessibilities of proteins by deep neural networks.,"

Motivation

Backbone structures and solvent accessible surface area of proteins are benefited from continuous real value prediction because it removes the arbitrariness of defining boundary between different secondary-structure and solvent-accessibility states. However, lacking the confidence score for predicted values has limited their applications. Here we investigated whether or not we can make a reasonable prediction of absolute errors for predicted backbone torsion angles, Cα-atom-based angles and torsion angles, solvent accessibility, contact numbers and half-sphere exposures by employing deep neural networks.

Results

We found that angle-based errors can be predicted most accurately with Spearman correlation coefficient (SPC) between predicted and actual errors at about 0.6. This is followed by solvent accessibility (SPC∼0.5). The errors on contact-based structural properties are most difficult to predict (SPC between 0.2 and 0.3). We showed that predicted errors are significantly better error indicators than the average errors based on secondary-structure and amino-acid residue types. We further demonstrated the usefulness of predicted errors in model quality assessment. These error or confidence indictors are expected to be useful for prediction, assessment, and refinement of protein structures.

Availability and implementation

The method is available at http://sparks-lab.org as a part of SPIDER2 package.

Contact

yuedong.yang@griffith.edu.au or yaoqi.zhou@griffith.edu.auSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-22 +27198219,The RING 2.0 web server for high quality residue interaction networks.,"Residue interaction networks (RINs) are an alternative way of representing protein structures where nodes are residues and arcs physico-chemical interactions. RINs have been extensively and successfully used for analysing mutation effects, protein folding, domain-domain communication and catalytic activity. Here we present RING 2.0, a new version of the RING software for the identification of covalent and non-covalent bonds in protein structures, including π-π stacking and π-cation interactions. RING 2.0 is extremely fast and generates both intra and inter-chain interactions including solvent and ligand atoms. The generated networks are very accurate and reliable thanks to a complex empirical re-parameterization of distance thresholds performed on the entire Protein Data Bank. By default, RING output is generated with optimal parameters but the web server provides an exhaustive interface to customize the calculation. The network can be visualized directly in the browser or in Cytoscape. Alternatively, the RING-Viz script for Pymol allows visualizing the interactions at atomic level in the structure. The web server and RING-Viz, together with an extensive help and tutorial, are available from URL: http://protein.bio.unipd.it/ring.",2016-05-19 +26059716,DISTMIX: direct imputation of summary statistics for unmeasured SNPs from mixed ethnicity cohorts.,"

Motivation

To increase the signal resolution for large-scale meta-analyses of genome-wide association studies, genotypes at unmeasured single nucleotide polymorphisms (SNPs) are commonly imputed using large multi-ethnic reference panels. However, the ever increasing size and ethnic diversity of both reference panels and cohorts makes genotype imputation computationally challenging for moderately sized computer clusters. Moreover, genotype imputation requires subject-level genetic data, which unlike summary statistics provided by virtually all studies, is not publicly available. While there are much less demanding methods which avoid the genotype imputation step by directly imputing SNP statistics, e.g. Directly Imputing summary STatistics (DIST) proposed by our group, their implicit assumptions make them applicable only to ethnically homogeneous cohorts.

Results

To decrease computational and access requirements for the analysis of cosmopolitan cohorts, we propose DISTMIX, which extends DIST capabilities to the analysis of mixed ethnicity cohorts. The method uses a relevant reference panel to directly impute unmeasured SNP statistics based only on statistics at measured SNPs and estimated/user-specified ethnic proportions. Simulations show that the proposed method adequately controls the Type I error rates. The 1000 Genomes panel imputation of summary statistics from the ethnically diverse Psychiatric Genetic Consortium Schizophrenia Phase 2 suggests that, when compared to genotype imputation methods, DISTMIX offers comparable imputation accuracy for only a fraction of computational resources.

Availability and implementation

DISTMIX software, its reference population data, and usage examples are publicly available at http://code.google.com/p/distmix.

Contact

dlee4@vcu.edu

Supplementary information

Supplementary Data are available at Bioinformatics online.",2015-06-09 +27662842,Identification of control targets in Boolean molecular network models via computational algebra.,"

Background

Many problems in biomedicine and other areas of the life sciences can be characterized as control problems, with the goal of finding strategies to change a disease or otherwise undesirable state of a biological system into another, more desirable, state through an intervention, such as a drug or other therapeutic treatment. The identification of such strategies is typically based on a mathematical model of the process to be altered through targeted control inputs. This paper focuses on processes at the molecular level that determine the state of an individual cell, involving signaling or gene regulation. The mathematical model type considered is that of Boolean networks. The potential control targets can be represented by a set of nodes and edges that can be manipulated to produce a desired effect on the system.

Results

This paper presents a method for the identification of potential intervention targets in Boolean molecular network models using algebraic techniques. The approach exploits an algebraic representation of Boolean networks to encode the control candidates in the network wiring diagram as the solutions of a system of polynomials equations, and then uses computational algebra techniques to find such controllers. The control methods in this paper are validated through the identification of combinatorial interventions in the signaling pathways of previously reported control targets in two well studied systems, a p53-mdm2 network and a blood T cell lymphocyte granular leukemia survival signaling network. Supplementary data is available online and our code in Macaulay2 and Matlab are available via http://www.ms.uky.edu/~dmu228/ControlAlg .

Conclusions

This paper presents a novel method for the identification of intervention targets in Boolean network models. The results in this paper show that the proposed methods are useful and efficient for moderately large networks.",2016-09-23 +26527729,sORFs.org: a repository of small ORFs identified by ribosome profiling.,"With the advent of ribosome profiling, a next generation sequencing technique providing a ""snap-shot'' of translated mRNA in a cell, many short open reading frames (sORFs) with ribosomal activity were identified. Follow-up studies revealed the existence of functional peptides, so-called micropeptides, translated from these 'sORFs', indicating a new class of bio-active peptides. Over the last few years, several micropeptides exhibiting important cellular functions were discovered. However, ribosome occupancy does not necessarily imply an actual function of the translated peptide, leading to the development of various tools assessing the coding potential of sORFs. Here, we introduce sORFs.org (http://www.sorfs.org), a novel database for sORFs identified using ribosome profiling. Starting from ribosome profiling, sORFs.org identifies sORFs, incorporates state-of-the-art tools and metrics and stores results in a public database. Two query interfaces are provided, a default one enabling quick lookup of sORFs and a BioMart interface providing advanced query and export possibilities. At present, sORFs.org harbors 263 354 sORFs that demonstrate ribosome occupancy, originating from three different cell lines: HCT116 (human), E14_mESC (mouse) and S2 (fruit fly). sORFs.org aims to provide an extensive sORFs database accessible to researchers with limited bioinformatics knowledge, thus enabling easy integration into personal projects.",2015-11-02 +26521937,WU-CRISPR: characteristics of functional guide RNAs for the CRISPR/Cas9 system.,"The CRISPR/Cas9 system has been rapidly adopted for genome editing. However, one major issue with this system is the lack of robust bioinformatics tools for design of single guide RNA (sgRNA), which determines the efficacy and specificity of genome editing. To address this pressing need, we analyze CRISPR RNA-seq data and identify many novel features that are characteristic of highly potent sgRNAs. These features are used to develop a bioinformatics tool for genome-wide design of sgRNAs with improved efficiency. These sgRNAs as well as the design tool are freely accessible via a web server, WU-CRISPR ( http://crispr.wustl.edu ).",2015-11-02 +26527717,"Twenty years of the MEROPS database of proteolytic enzymes, their substrates and inhibitors.","The MEROPS database (http://merops.sanger.ac.uk) is an integrated source of information about peptidases, their substrates and inhibitors, which are of great relevance to biology, medicine and biotechnology. The hierarchical classification of the database is as follows: homologous sets of sequences are grouped into a protein species; protein species are grouped into a family; families are grouped into clans. There is a type example for each protein species (known as a 'holotype'), family and clan, and each protein species, family and clan has its own unique identifier. Pages to show the involvement of peptidases and peptidase inhibitors in biological pathways have been created. Each page shows the peptidases and peptidase inhibitors involved in the pathway, along with the known substrate cleavages and peptidase-inhibitor interactions, and a link to the KEGG database of biological pathways. Links have also been established with the IUPHAR Guide to Pharmacology. A new service has been set up to allow the submission of identified substrate cleavages so that conservation of the cleavage site can be assessed. This should help establish whether or not a cleavage site is physiologically relevant on the basis that such a cleavage site is likely to be conserved.",2015-11-02 +27133378,[Role of radiotherapy in the management of non-Hodgkin lymphomas].,"The purpose of this review was to summarize recent data about lastest retrospective and prospective studies dealing with radiotherapy of non-Hodgkin lymphoma, in order to precise the schedule and the role of this treatment. A systematic review was done by searching studies on the website http://www.pubmed.gov (Medline) using the following keywords: radiotherapy, radiation therapy, non-Hodgkin lymphoma. The management of non-Hodgkin lymphoma varies a lot according to the histological type and stage. The dose of radiotherapy has been studied in only one randomized trial, which concluded that there was no difference between the low dose and the high dose arms. Radiotherapy is a very good option in follicular, cutaneous, digestive or orbital non-Hodgkin lymphoma. A recent post hoc analysis of randomized trials on radiotherapy for high-grade non-Hodgkin lymphoma strongly suggested a benefit of additional radiotherapy after chemotherapy in some situations. Radiotherapy of low-grade non-Hodgkin lymphoma is a very good option, while its use on high-grade non-Hodgkin lymphoma is sometimes recommended but further randomized trials are ongoing to better understand its role.",2016-04-25 +27189211,Improving microRNA target prediction with gene expression profiles.,"

Background

Mammalian genomes encode for thousands of microRNAs, which can potentially regulate the majority of protein-coding genes. They have been implicated in development and disease, leading to great interest in understanding their function, with computational methods being widely used to predict their targets. Most computational methods rely on sequence features, thermodynamics, and conservation filters; essentially scanning the whole transcriptome to predict one set of targets for each microRNA. This has the limitation of not considering that the same microRNA could have different sets of targets, and thus different functions, when expressed in different types of cells.

Results

To address this problem, we combine popular target prediction methods with expression profiles, via machine learning, to produce a new predictor: TargetExpress. Using independent data from microarrays and high-throughput sequencing, we show that TargetExpress outperforms existing methods, and that our predictions are enriched in functions that are coherent with the added expression profile and literature reports.

Conclusions

Our method should be particularly useful for anyone studying the functions and targets of miRNAs in specific tissues or cells. TargetExpress is available at: http://targetexpress.ceiabreulab.org/ .",2016-05-17 +26671801,BMExpert: Mining MEDLINE for Finding Experts in Biomedical Domains Based on Language Model.,"With the rapid development of biomedical sciences, a great number of documents have been published to report new scientific findings and advance the process of knowledge discovery. By the end of 2013, the largest biomedical literature database, MEDLINE, has indexed over 23 million abstracts. It is thus not easy for scientific professionals to find experts on a certain topic in the biomedical domain. In contrast to the existing services that use some ad hoc approaches, we developed a novel solution to biomedical expert finding, BMExpert, based on the language model. For finding biomedical experts, who are the most relevant to a specific topic query, BMExpert mines MEDLINE documents by considering three important factors: relevance of documents to the query topic, importance of documents, and associations between documents and experts. The performance of BMExpert was evaluated on a benchmark dataset, which was built by collecting the program committee members of ISMB in the past three years (2012-2014) on 14 different topics. Experimental results show that BMExpert outperformed three existing biomedical expert finding services: JANE, GoPubMed, and eTBLAST, with respect to both MAP (mean average precision) and P@50 (Precision). BMExpert is freely accessed at http://datamining-iip.fudan.edu.cn/service/BMExpert/.",2015-11-01 +26684462,Predicting Protein-Protein Interaction Sites with a Novel Membership Based Fuzzy SVM Classifier.,"Predicting residues that participate in protein-protein interactions (PPI) helps to identify, which amino acids are located at the interface. In this paper, we show that the performance of the classical support vector machine (SVM) algorithm can further be improved with the use of a custom-designed fuzzy membership function, for the partner-specific PPI interface prediction problem. We evaluated the performances of both classical SVM and fuzzy SVM (F-SVM) on the PPI databases of three different model proteomes of Homo sapiens, Escherichia coli and Saccharomyces Cerevisiae and calculated the statistical significance of the developed F-SVM over classical SVM algorithm. We also compared our performance with the available state-of-the-art fuzzy methods in this domain and observed significant performance improvements. To predict interaction sites in protein complexes, local composition of amino acids together with their physico-chemical characteristics are used, where the F-SVM based prediction method exploits the membership function for each pair of sequence fragments. The average F-SVM performance (area under ROC curve) on the test samples in 10-fold cross validation experiment are measured as 77.07, 78.39, and 74.91 percent for the aforementioned organisms respectively. Performances on independent test sets are obtained as 72.09, 73.24 and 82.74 percent respectively. The software is available for free download from http://code.google.com/p/cmater-bioinfo.",2015-11-01 +26588855,[Importance of nerve-sparing surgical technique in the treatment of deep infiltrating endometriosis].,"

Introduction

Traditional surgeries performed in cases of deep infiltrating endometriosis lead to impaired quality of life.

Aim

To summarize the postoperative outcome and to compare the rate of postoperative complications after different therapeutic approaches applied in deep infiltrating endometriosis.

Method

The authors analized the articles published between March 31, 2004 and March 31, 2015, in the database http://www.pubmed.org using the following keywords: endometriosis, deep infiltrating, nerve sparing, surgery.

Results

Non-nerve sparing surgery resulted in temporary urinary dysfunction in 19.1-38.5% of patients, while it occurred in 0.61-33.3% of patients after nerve-sparing surgery. Non-nerve sparing surgical technique resulted in an average of 121 days of need for self-catheretisation. When nerve-sparing surgeries were performed the duration of self-catheterisation varied between 7 to 39.8 days. After nerve sparing surgeries, permanent bladder dysfunction was not detected in any case.

Conclusions

Because of the successful treatment of the patients symptoms and the lower postoperative complication rate, nerve-sparing surgical technique leads to a significant improvement of the quality of life.",2015-11-01 +21884636,FISH Oracle: a web server for flexible visualization of DNA copy number data in a genomic context.,"

Background

The rapidly growing amount of array CGH data requires improved visualization software supporting the process of identifying candidate cancer genes. Optimally, such software should work across multiple microarray platforms, should be able to cope with data from different sources and should be easy to operate.

Results

We have developed a web-based software FISH Oracle to visualize data from multiple array CGH experiments in a genomic context. Its fast visualization engine and advanced web and database technology supports highly interactive use. FISH Oracle comes with a convenient data import mechanism, powerful search options for genomic elements (e.g. gene names or karyobands), quick navigation and zooming into interesting regions, and mechanisms to export the visualization into different high quality formats. These features make the software especially suitable for the needs of life scientists.

Conclusions

FISH Oracle offers a fast and easy to use visualization tool for array CGH and SNP array data. It allows for the identification of genomic regions representing minimal common changes based on data from one or more experiments. FISH Oracle will be instrumental to identify candidate onco and tumor suppressor genes based on the frequency and genomic position of DNA copy number changes. The FISH Oracle application and an installed demo web server are available at http://www.zbh.uni-hamburg.de/fishoracle.",2011-07-28 +26956591,Staphylococcus aureus Clumping Factor A Remains a Viable Vaccine Target for Prevention of S. aureus Infection.,"In a recent article, X. Li et al. [mBio 7(1):e02232-15, 2016, http://dx.doi.org/10.1128/mBio.02232-15] investigate the utility of a vaccine composed of the Staphylococcus aureus protein clumping factor A (ClfA) in protecting mice from S. aureus infection. ClfA, one of the first proteins to be identified as a potential vaccine antigen for S. aureus prophylaxis, is currently a component of several investigational vaccines. The authors conclude that ClfA may not be effective for S. aureus prophylaxis. In contrast, previously published papers reporting positive data suggested that ClfA was potentially an important vaccine target to prevent invasive S. aureus disease. This commentary addresses the observed differences between the findings of Li et al. and those from other publications, highlighting the importance for preclinical vaccine antigen assessments to reflect the biological role of said antigen in virulence and, consequently, the importance of choosing appropriate preclinical disease models to test such antigens.",2016-03-08 +25712691,EpiToolKit--a web-based workbench for vaccine design.,"

Unlabelled

EpiToolKit is a virtual workbench for immunological questions with a focus on vaccine design. It offers an array of immunoinformatics tools covering MHC genotyping, epitope and neo-epitope prediction, epitope selection for vaccine design, and epitope assembly. In its recently re-implemented version 2.0, EpiToolKit provides a range of new functionality and for the first time allows combining tools into complex workflows. For inexperienced users it offers simplified interfaces to guide the users through the analysis of complex immunological data sets.

Availability and implementation

http://www.epitoolkit.de

Contact

schubert@informatik.uni-tuebingen.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-20 +22712534,From steady-state to synchronized yeast glycolytic oscillations I: model construction.,"

Unlabelled

An existing detailed kinetic model for the steady-state behavior of yeast glycolysis was tested for its ability to simulate dynamic behavior. Using a small subset of experimental data, the original model was adapted by adjusting its parameter values in three optimization steps. Only small adaptations to the original model were required for realistic simulation of experimental data for limit-cycle oscillations. The greatest changes were required for parameter values for the phosphofructokinase reaction. The importance of ATP for the oscillatory mechanism and NAD(H) for inter-and intra-cellular communications and synchronization was evident in the optimization steps and simulation experiments. In an accompanying paper [du Preez F et al. (2012) FEBS J279, 2823-2836], we validate the model for a wide variety of experiments on oscillatory yeast cells. The results are important for re-use of detailed kinetic models in modular modeling approaches and for approaches such as that used in the Silicon Cell initiative.

Database

The mathematical models described here have been submitted to the JWS Online Cellular Systems Modelling Database and can be accessed at http://jjj.biochem.sun.ac.za/database/dupreez/index.html.",2012-07-09 +22102570,ProPortal: a resource for integrated systems biology of Prochlorococcus and its phage.,"ProPortal (http://proportal.mit.edu/) is a database containing genomic, metagenomic, transcriptomic and field data for the marine cyanobacterium Prochlorococcus. Our goal is to provide a source of cross-referenced data across multiple scales of biological organization--from the genome to the ecosystem--embracing the full diversity of ecotypic variation within this microbial taxon, its sister group, Synechococcus and phage that infect them. The site currently contains the genomes of 13 Prochlorococcus strains, 11 Synechococcus strains and 28 cyanophage strains that infect one or both groups. Cyanobacterial and cyanophage genes are clustered into orthologous groups that can be accessed by keyword search or through a genome browser. Users can also identify orthologous gene clusters shared by cyanobacterial and cyanophage genomes. Gene expression data for Prochlorococcus ecotypes MED4 and MIT9313 allow users to identify genes that are up or downregulated in response to environmental stressors. In addition, the transcriptome in synchronized cells grown on a 24-h light-dark cycle reveals the choreography of gene expression in cells in a 'natural' state. Metagenomic sequences from the Global Ocean Survey from Prochlorococcus, Synechococcus and phage genomes are archived so users can examine the differences between populations from diverse habitats. Finally, an example of cyanobacterial population data from the field is included.",2011-11-18 +26130249,YBYRÁ facilitates comparison of large phylogenetic trees.,"

Background

The number and size of tree topologies that are being compared by phylogenetic systematists is increasing due to technological advancements in high-throughput DNA sequencing. However, we still lack tools to facilitate comparison among phylogenetic trees with a large number of terminals.

Results

The ""YBYRÁ"" project integrates software solutions for data analysis in phylogenetics. It comprises tools for (1) topological distance calculation based on the number of shared splits or clades, (2) sensitivity analysis and automatic generation of sensitivity plots and (3) clade diagnoses based on different categories of synapomorphies. YBYRÁ also provides (4) an original framework to facilitate the search for potential rogue taxa based on how much they affect average matching split distances (using MSdist).

Conclusions

YBYRÁ facilitates comparison of large phylogenetic trees and outperforms competing software in terms of usability and time efficiency, specially for large data sets. The programs that comprises this toolkit are written in Python, hence they do not require installation and have minimum dependencies. The entire project is available under an open-source licence at http://www.ib.usp.br/grant/anfibios/researchSoftware.html .",2015-07-01 +26052282,Text mining for neuroanatomy using WhiteText with an updated corpus and a new web application.,"We describe the WhiteText project, and its progress towards automatically extracting statements of neuroanatomical connectivity from text. We review progress to date on the three main steps of the project: recognition of brain region mentions, standardization of brain region mentions to neuroanatomical nomenclature, and connectivity statement extraction. We further describe a new version of our manually curated corpus that adds 2,111 connectivity statements from 1,828 additional abstracts. Cross-validation classification within the new corpus replicates results on our original corpus, recalling 67% of connectivity statements at 51% precision. The resulting merged corpus provides 5,208 connectivity statements that can be used to seed species-specific connectivity matrices and to better train automated techniques. Finally, we present a new web application that allows fast interactive browsing of the over 70,000 sentences indexed by the system, as a tool for accessing the data and assisting in further curation. Software and data are freely available at http://www.chibi.ubc.ca/WhiteText/.",2015-05-21 +27039396,Primaquine plus artemisinin combination therapy for reduction of malaria transmission: promise and risk.,"Reduction of gametocyte transmission from humans to mosquitoes is a key component of malaria elimination. The study by Gonçalves and colleagues provides valuable new data on how the addition of low-dose primaquine to artemether-lumefantrine affects reduction of gametocytemia and transmission of gametocytes to mosquitoes in asymptomatically Plasmodium falciparum-infected children without G6PD deficiency, and on the degree to which low-dose primaquine affects hemoglobin levels in these children. The study sets the stage for future research required for consideration of an artemisinin combination therapy (ACT)-primaquine regimen in mass drug administration campaigns. Future studies will need to evaluate toxicity in adults and G6PD deficient persons, assess gametocyte transmission from adults, evaluate different ACT drugs with primaquine, and assess the implications of ""rare"" toxicities in large treatment populations, such as hemolysis requiring blood transfusion. The study highlights both the promise and the potential risk of ACT-primaquine treatment in malaria elimination campaigns.Please see related article: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-016-0581-y .",2016-04-01 +27342282,NCBI prokaryotic genome annotation pipeline.,"Recent technological advances have opened unprecedented opportunities for large-scale sequencing and analysis of populations of pathogenic species in disease outbreaks, as well as for large-scale diversity studies aimed at expanding our knowledge across the whole domain of prokaryotes. To meet the challenge of timely interpretation of structure, function and meaning of this vast genetic information, a comprehensive approach to automatic genome annotation is critically needed. In collaboration with Georgia Tech, NCBI has developed a new approach to genome annotation that combines alignment based methods with methods of predicting protein-coding and RNA genes and other functional elements directly from sequence. A new gene finding tool, GeneMarkS+, uses the combined evidence of protein and RNA placement by homology as an initial map of annotation to generate and modify ab initio gene predictions across the whole genome. Thus, the new NCBI's Prokaryotic Genome Annotation Pipeline (PGAP) relies more on sequence similarity when confident comparative data are available, while it relies more on statistical predictions in the absence of external evidence. The pipeline provides a framework for generation and analysis of annotation on the full breadth of prokaryotic taxonomy. For additional information on PGAP see https://www.ncbi.nlm.nih.gov/genome/annotation_prok/ and the NCBI Handbook, https://www.ncbi.nlm.nih.gov/books/NBK174280/.",2016-06-24 +27822515,Open-Source Sequence Clustering Methods Improve the State Of the Art. ,"Sequence clustering is a common early step in amplicon-based microbial community analysis, when raw sequencing reads are clustered into operational taxonomic units (OTUs) to reduce the run time of subsequent analysis steps. Here, we evaluated the performance of recently released state-of-the-art open-source clustering software products, namely, OTUCLUST, Swarm, SUMACLUST, and SortMeRNA, against current principal options (UCLUST and USEARCH) in QIIME, hierarchical clustering methods in mothur, and USEARCH's most recent clustering algorithm, UPARSE. All the latest open-source tools showed promising results, reporting up to 60% fewer spurious OTUs than UCLUST, indicating that the underlying clustering algorithm can vastly reduce the number of these derived OTUs. Furthermore, we observed that stringent quality filtering, such as is done in UPARSE, can cause a significant underestimation of species abundance and diversity, leading to incorrect biological results. Swarm, SUMACLUST, and SortMeRNA have been included in the QIIME 1.9.0 release. IMPORTANCE Massive collections of next-generation sequencing data call for fast, accurate, and easily accessible bioinformatics algorithms to perform sequence clustering. A comprehensive benchmark is presented, including open-source tools and the popular USEARCH suite. Simulated, mock, and environmental communities were used to analyze sensitivity, selectivity, species diversity (alpha and beta), and taxonomic composition. The results demonstrate that recent clustering algorithms can significantly improve accuracy and preserve estimated diversity without the application of aggressive filtering. Moreover, these tools are all open source, apply multiple levels of multithreading, and scale to the demands of modern next-generation sequencing data, which is essential for the analysis of massive multidisciplinary studies such as the Earth Microbiome Project (EMP) (J. A. Gilbert, J. K. Jansson, and R. Knight, BMC Biol 12:69, 2014, http://dx.doi.org/10.1186/s12915-014-0069-1).",2016-01-01 +27592624,Regulatory ecotoxicity testing of nanomaterials - proposed modifications of OECD test guidelines based on laboratory experience with silver and titanium dioxide nanoparticles.,"Regulatory ecotoxicity testing of chemicals is of societal importance and a large effort is undertaken at the OECD to ensure that OECD test guidelines (TGs) for nanomaterials (NMs) are available. Significant progress to support the adaptation of selected TGs to NMs was achieved in the context of the project MARINA ( http://www.marina-fp7.eu/ ) funded within the 7th European Framework Program. Eight OECD TGs were adapted based on the testing of at least one ion-releasing NM (Ag) and two inert NMs (TiO2). With the materials applied, two main variants of NMs (ion releasing vs. inert NMs) were addressed. As the modifications of the test guidelines refer to general test topics (e.g. test duration or measuring principle), we assume that the described approaches and modifications will be suitable for the testing of further NMs with other chemical compositions. Firm proposals for modification of protocols with scientific justification(s) are presented for the following tests: growth inhibition using the green algae Raphidocelis subcapitata (formerly: Pseudokirchneriella subcapitata; TG 201), acute toxicity with the crustacean Daphnia magna (TG 202), development toxicity with the fish Danio rerio (TG 210), reproduction of the sediment-living worm Lumbriculus variegatus (TG 225), activity of soil microflora (TGs 216, 217), and reproduction of the invertebrates (Enchytraeus crypticus, Eisenia fetida, TGs 220, 222). Additionally, test descriptions for two further test systems (root elongation of plants in hydroponic culture; test on fish cells) are presented. Ecotoxicological data obtained with the modified test guidelines for TiO2 NMs and Ag NM and detailed method descriptions are available.",2016-09-20 +22190598,AnnotCompute: annotation-based exploration and meta-analysis of genomics experiments.,"The ever-increasing scale of biological data sets, particularly those arising in the context of high-throughput technologies, requires the development of rich data exploration tools. In this article, we present AnnotCompute, an information discovery platform for repositories of functional genomics experiments such as ArrayExpress. Our system leverages semantic annotations of functional genomics experiments with controlled vocabulary and ontology terms, such as those from the MGED Ontology, to compute conceptual dissimilarities between pairs of experiments. These dissimilarities are then used to support two types of exploratory analysis-clustering and query-by-example. We show that our proposed dissimilarity measures correspond to a user's intuition about conceptual dissimilarity, and can be used to support effective query-by-example. We also evaluate the quality of clustering based on these measures. While AnnotCompute can support a richer data exploration experience, its effectiveness is limited in some cases, due to the quality of available annotations. Nonetheless, tools such as AnnotCompute may provide an incentive for richer annotations of experiments. Database URL: http://www.cbil.upenn.edu/annotCompute/",2011-12-21 +26515641,Mouse polyQ database: a new online resource for research using mouse models of neurodegenerative diseases.,"

Background

The polyglutamine (polyQ) family of disorders comprises 9 genetic diseases, including several types of ataxia and Huntington disease. Approximately two decades of investigation and the creation of more than 130 mouse models of polyQ disorders have revealed many similarities between these diseases. The disorders share common mutation types, neurological characteristics and certain aspects of pathogenesis, including morphological and physiological neuronal alterations. All of the diseases still remain incurable.

Description

The large volume of information collected as a result of the investigation of polyQ models currently represents a great potential for searching, comparing and translating pathogenesis and therapeutic information between diseases. Therefore, we generated a public database comprising the polyQ mouse models, phenotypes and therapeutic interventions tested in vivo. The database is available at http://conyza.man.poznan.pl/ .

Conclusion

The use of the database in the field of polyQ diseases may accelerate research on these and other neurodegenerative diseases and provide new perspectives for future investigation.",2015-10-29 +22533604,"Strategies to work with HLA data in human populations for histocompatibility, clinical transplantation, epidemiology and population genetics: HLA-NET methodological recommendations.","HLA-NET (a European COST Action) aims at networking researchers working in bone marrow transplantation, epidemiology and population genetics to improve the molecular characterization of the HLA genetic diversity of human populations, with an expected strong impact on both public health and fundamental research. Such improvements involve finding consensual strategies to characterize human populations and samples and report HLA molecular typings and ambiguities; proposing user-friendly access to databases and computer tools and defining minimal requirements related to ethical aspects. The overall outcome is the provision of population genetic characterizations and comparisons in a standard way by all interested laboratories. This article reports the recommendations of four working groups (WG1-4) of the HLA-NET network at the mid-term of its activities. WG1 (Population definitions and sampling strategies for population genetics' analyses) recommends avoiding outdated racial classifications and population names (e.g. 'Caucasian') and using instead geographic and/or cultural (e.g. linguistic) criteria to describe human populations (e.g. 'pan-European'). A standard 'HLA-NET POPULATION DATA QUESTIONNAIRE' has been finalized and is available for the whole HLA community. WG2 (HLA typing standards for population genetics analyses) recommends retaining maximal information when reporting HLA typing results. Rather than using the National Marrow Donor Program coding system, all ambiguities should be provided by listing all allele pairs required to explain each genotype, according to the formats proposed in 'HLA-NET GUIDELINES FOR REPORTING HLA TYPINGS'. The group also suggests taking into account a preliminary list of alleles defined by polymorphisms outside the peptide-binding sites that may affect population genetic statistics because of significant frequencies. WG3 (Bioinformatic strategies for HLA population data storage and analysis) recommends the use of programs capable of dealing with ambiguous data, such as the 'gene[rate]' computer tools to estimate frequencies, test for Hardy-Weinberg equilibrium and selective neutrality on data containing any number and kind of ambiguities. WG4 (Ethical issues) proposes to adopt thorough general principles for any HLA population study to ensure that it conforms to (inter)national legislation or recommendations/guidelines. All HLA-NET guidelines and tools are available through its website http://hla-net.eu.",2012-04-26 +25420514,ANGSD: Analysis of Next Generation Sequencing Data.,"

Background

High-throughput DNA sequencing technologies are generating vast amounts of data. Fast, flexible and memory efficient implementations are needed in order to facilitate analyses of thousands of samples simultaneously.

Results

We present a multithreaded program suite called ANGSD. This program can calculate various summary statistics, and perform association mapping and population genetic analyses utilizing the full information in next generation sequencing data by working directly on the raw sequencing data or by using genotype likelihoods.

Conclusions

The open source c/c++ program ANGSD is available at http://www.popgen.dk/angsd . The program is tested and validated on GNU/Linux systems. The program facilitates multiple input formats including BAM and imputed beagle genotype probability files. The program allow the user to choose between combinations of existing methods and can perform analysis that is not implemented elsewhere.",2014-11-25 +25409689,Significant distinct branches of hierarchical trees: a framework for statistical analysis and applications to biological data.,"

Background

One of the most common goals of hierarchical clustering is finding those branches of a tree that form quantifiably distinct data subtypes. Achieving this goal in a statistically meaningful way requires (a) a measure of distinctness of a branch and (b) a test to determine the significance of the observed measure, applicable to all branches and across multiple scales of dissimilarity.

Results

We formulate a method termed Tree Branches Evaluated Statistically for Tightness (TBEST) for identifying significantly distinct tree branches in hierarchical clusters. For each branch of the tree a measure of distinctness, or tightness, is defined as a rational function of heights, both of the branch and of its parent. A statistical procedure is then developed to determine the significance of the observed values of tightness. We test TBEST as a tool for tree-based data partitioning by applying it to five benchmark datasets, one of them synthetic and the other four each from a different area of biology. For each dataset there is a well-defined partition of the data into classes. In all test cases TBEST performs on par with or better than the existing techniques.

Conclusions

Based on our benchmark analysis, TBEST is a tool of choice for detection of significantly distinct branches in hierarchical trees grown from biological data. An R language implementation of the method is available from the Comprehensive R Archive Network: http://www.cran.r-project.org/web/packages/TBEST/index.html.",2014-11-19 +25068440,A collaborative visual analytics suite for protein folding research.,"Molecular dynamics (MD) simulation is a crucial tool for understanding principles behind important biochemical processes such as protein folding and molecular interaction. With the rapidly increasing power of modern computers, large-scale MD simulation experiments can be performed regularly, generating huge amounts of MD data. An important question is how to analyze and interpret such massive and complex data. One of the (many) challenges involved in analyzing MD simulation data computationally is the high-dimensionality of such data. Given a massive collection of molecular conformations, researchers typically need to rely on their expertise and prior domain knowledge in order to retrieve certain conformations of interest. It is not easy to make and test hypotheses as the data set as a whole is somewhat ""invisible"" due to its high dimensionality. In other words, it is hard to directly access and examine individual conformations from a sea of molecular structures, and to further explore the entire data set. There is also no easy and convenient way to obtain a global view of the data or its various modalities of biochemical information. To this end, we present an interactive, collaborative visual analytics tool for exploring massive, high-dimensional molecular dynamics simulation data sets. The most important utility of our tool is to provide a platform where researchers can easily and effectively navigate through the otherwise ""invisible"" simulation data sets, exploring and examining molecular conformations both as a whole and at individual levels. The visualization is based on the concept of a topological landscape, which is a 2D terrain metaphor preserving certain topological and geometric properties of the high dimensional protein energy landscape. In addition to facilitating easy exploration of conformations, this 2D terrain metaphor also provides a platform where researchers can visualize and analyze various properties (such as contact density) overlayed on the top of the 2D terrain. Finally, the software provides a collaborative environment where multiple researchers can assemble observations and biochemical events into storyboards and share them in real time over the Internet via a client-server architecture. The software is written in Scala and runs on the cross-platform Java Virtual Machine. Binaries and source code are available at http://www.aylasoftware.org and have been released under the GNU General Public License.",2014-07-14 +25644271,Transposome: a toolkit for annotation of transposable element families from unassembled sequence reads.,"

Motivation

Transposable elements (TEs) can be found in virtually all eukaryotic genomes and have the potential to produce evolutionary novelty. Despite the broad taxonomic distribution of TEs, the evolutionary history of these sequences is largely unknown for many taxa due to a lack of genomic resources and identification methods. Given that most TE annotation methods are designed to work on genome assemblies, we sought to develop a method to provide a fine-grained classification of TEs from DNA sequence reads. Here, we present a toolkit for the efficient annotation of TE families from low-coverage whole-genome shotgun (WGS) data, enabling the rapid identification of TEs in a large number of taxa. We compared our software, Transposome, with other approaches for annotating repeats from WGS data, and we show that it offers significant improvements in run time and produces more precise estimates of genomic repeat abundance. Transposome may also be used as a general toolkit for working with Next Generation Sequencing (NGS) data, and for constructing custom genome analysis pipelines.

Availability and implementation

The source code for Transposome is freely available (http://sestaton.github.io/Transposome), implemented in Perl and is supported on Linux.",2015-02-01 +22075992,OGEE: an online gene essentiality database.,"OGEE is an Online GEne Essentiality database. Its main purpose is to enhance our understanding of the essentiality of genes. This is achieved by collecting not only experimentally tested essential and non-essential genes, but also associated gene features such as expression profiles, duplication status, conservation across species, evolutionary origins and involvement in embryonic development. We focus on large-scale experiments and complement our data with text-mining results. Genes are organized into data sets according to their sources. Genes with variable essentiality status across data sets are tagged as conditionally essential, highlighting the complex interplay between gene functions and environments. Linked tools allow the user to compare gene essentiality among different gene groups, or compare features of essential genes to non-essential genes, and visualize the results. OGEE is freely available at http://ogeedb.embl.de.",2011-11-10 +22213510,In the clinic. Plantar fasciitis.,"This issue provides a clinical overview of plantar fasciitis focusing on prevention, diagnosis, treatment, practice improvement, and patient information. Readers can complete the accompanying CME quiz for 1.5 credits. Only ACP members and individual subscribers can access the electronic features of In the Clinic. Non-subscribers who wish to access this issue of In the Clinic can elect ""Pay for View."" Subscribers can receive 1.5 category 1 CME credits by completing the CME quiz that accompanies this issue of In the Clinic. The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians' Information and Education Resource) and MKSAP (Medical Knowledge and Self Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP's Medical Education and Publishing division and with assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult www.acponline.org, http://pier.acponline.org, and other resources referenced within each issue of In the Clinic.",2012-01-01 +25849257,Machine learning assisted design of highly active peptides for drug discovery.,"The discovery of peptides possessing high biological activity is very challenging due to the enormous diversity for which only a minority have the desired properties. To lower cost and reduce the time to obtain promising peptides, machine learning approaches can greatly assist in the process and even partly replace expensive laboratory experiments by learning a predictor with existing data or with a smaller amount of data generation. Unfortunately, once the model is learned, selecting peptides having the greatest predicted bioactivity often requires a prohibitive amount of computational time. For this combinatorial problem, heuristics and stochastic optimization methods are not guaranteed to find adequate solutions. We focused on recent advances in kernel methods and machine learning to learn a predictive model with proven success. For this type of model, we propose an efficient algorithm based on graph theory, that is guaranteed to find the peptides for which the model predicts maximal bioactivity. We also present a second algorithm capable of sorting the peptides of maximal bioactivity. Extensive analyses demonstrate how these algorithms can be part of an iterative combinatorial chemistry procedure to speed up the discovery and the validation of peptide leads. Moreover, the proposed approach does not require the use of known ligands for the target protein since it can leverage recent multi-target machine learning predictors where ligands for similar targets can serve as initial training data. Finally, we validated the proposed approach in vitro with the discovery of new cationic antimicrobial peptides. Source code freely available at http://graal.ift.ulaval.ca/peptide-design/.",2015-04-07 +23686313,PPISURV: a novel bioinformatics tool for uncovering the hidden role of specific genes in cancer survival outcome.,"Multiple clinical studies have correlated gene expression with survival outcome in cancer on a genome-wide scale. However, in many cases, no obvious correlation between expression of well-known tumour-related genes (that is, p53, p73 and p21) and survival rates of patients has been observed. This can be mainly explained by the complex molecular mechanisms involved in cancer, which mask the clinical relevance of a gene with multiple functions if only gene expression status is considered. As we demonstrate here, in many such cases, the expression of the gene interaction partners (gene 'interactome') correlates significantly with cancer survival and is indicative of the role of that gene in cancer. On the basis of this principle, we have implemented a free online datamining tool (http://www.bioprofiling.de/PPISURV). PPISURV automatically correlates expression of an input gene interactome with survival rates on >40 publicly available clinical expression data sets covering various tumours involving about 8000 patients in total. To derive the query gene interactome, PPISURV employs several public databases including protein-protein interactions, regulatory and signalling pathways and protein post-translational modifications.",2013-05-20 +24813211,A framework for installable external tools in Skyline.,"

Unlabelled

Skyline is a Windows client application for targeted proteomics method creation and quantitative data analysis. The Skyline document model contains extensive mass spectrometry data from targeted proteomics experiments performed using selected reaction monitoring, parallel reaction monitoring and data-independent and data-dependent acquisition methods. Researchers have developed software tools that perform statistical analysis of the experimental data contained within Skyline documents. The new external tools framework allows researchers to integrate their tools into Skyline without modifying the Skyline codebase. Installed tools provide point-and-click access to downstream statistical analysis of data processed in Skyline. The framework also specifies a uniform interface to format tools for installation into Skyline. Tool developers can now easily share their tools with proteomics researchers using Skyline.

Availability and implementation

Skyline is available as a single-click self-updating web installation at http://skyline.maccosslab.org. This Web site also provides access to installable external tools and documentation.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-09 +25847006,Stratifying tumour subtypes based on copy number alteration profiles using next-generation sequence data.,"

Motivation

The role of personalized medicine and target treatment in the clinical management of cancer patients has become increasingly important in recent years. This has made the task of precise histological substratification of cancers crucial. Increasingly, genomic data are being seen as a valuable classifier. Specifically, copy number alteration (CNA) profiles generated by next-generation sequencing (NGS) can become a determinant for tumours subtyping. The principle purpose of this study is to devise a model with good prediction capability for the tumours histological subtypes as a function of both the patients covariates and their genome-wide CNA profiles from NGS data.

Results

We investigate a logistic regression for modelling tumour histological subtypes as a function of the patients' covariates and their CNA profiles, in a mixed model framework. The covariates, such as age and gender, are considered as fixed predictors and the genome-wide CNA profiles are considered as random predictors. We illustrate the application of this model in lung and oral cancer datasets, and the results indicate that the tumour histological subtypes can be modelled with a good fit. Our cross-validation indicates that the logistic regression exhibits the best prediction relative to other classification methods we considered in this study. The model also exhibits the best agreement in the prediction between smooth-segmented and circular binary-segmented CNA profiles.

Availability and implementation

An R package to run a logistic regression is available in http://www1.maths.leeds.ac.uk/~arief/R/CNALR/.

Contact

a.gusnanto@leeds.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-05 +25195035,"DNA-LCEB: a high-capacity and mutation-resistant DNA data-hiding approach by employing encryption, error correcting codes, and hybrid twofold and fourfold codon-based strategy for synonymous substitution in amino acids.","Data-hiding in deoxyribonucleic acid (DNA) sequences can be used to develop an organic memory and to track parent genes in an offspring as well as in genetically modified organism. However, the main concerns regarding data-hiding in DNA sequences are the survival of organism and successful extraction of watermark from DNA. This implies that the organism should live and reproduce without any functional disorder even in the presence of the embedded data. Consequently, performing synonymous substitution in amino acids for watermarking becomes a primary option. In this regard, a hybrid watermark embedding strategy that employs synonymous substitution in both twofold and fourfold codons of amino acids is proposed. This work thus presents a high-capacity and mutation-resistant watermarking technique, DNA-LCEB, for hiding secret information in DNA of living organisms. By employing the different types of synonymous codons of amino acids, the data storage capacity has been significantly increased. It is further observed that the proposed DNA-LCEB employing a combination of synonymous substitution, lossless compression, encryption, and Bose-Chaudary-Hocquenghem coding is secure and performs better in terms of both capacity and robustness compared to existing DNA data-hiding schemes. The proposed DNA-LCEB is tested against different mutations, including silent, miss-sense, and non-sense mutations, and provides substantial improvement in terms of mutation detection/correction rate and bits per nucleotide. A web application for DNA-LCEB is available at http://111.68.99.218/DNA-LCEB.",2014-09-07 +25344302,"FastMG: a simple, fast, and accurate maximum likelihood procedure to estimate amino acid replacement rate matrices from large data sets.","

Background

Amino acid replacement rate matrices are a crucial component of many protein analysis systems such as sequence similarity search, sequence alignment, and phylogenetic inference. Ideally, the rate matrix reflects the mutational behavior of the actual data under study; however, estimating amino acid replacement rate matrices requires large protein alignments and is computationally expensive and complex. As a compromise, sub-optimal pre-calculated generic matrices are typically used for protein-based phylogeny. Sequence availability has now grown to a point where problem-specific rate matrices can often be calculated if the computational cost can be controlled.

Results

The most time consuming step in estimating rate matrices by maximum likelihood is building maximum likelihood phylogenetic trees from protein alignments. We propose a new procedure, called FastMG, to overcome this obstacle. The key innovation is the alignment-splitting algorithm that splits alignments with many sequences into non-overlapping sub-alignments prior to estimating amino acid replacement rates. Experiments with different large data sets showed that the FastMG procedure was an order of magnitude faster than without splitting. Importantly, there was no apparent loss in matrix quality if an appropriate splitting procedure is used.

Conclusions

FastMG is a simple, fast and accurate procedure to estimate amino acid replacement rate matrices from large data sets. It enables researchers to study the evolutionary relationships for specific groups of proteins or taxa with optimized, data-specific amino acid replacement rate matrices. The programs, data sets, and the new mammalian mitochondrial protein rate matrix are available at http://fastmg.codeplex.com.",2014-10-24 +26484244,Time-series analysis of the transcriptome of the re-establishment of desiccation tolerance by ABA in germinated Arabidopsis thaliana seeds.,"Expression analyses of time series have become a very popular method for studying the dynamics of a wide range of biological processes. Here, we present expression analysis of a time series with the help of microarrays used to study the re-establishment of desiccation tolerance (DT) in germinated Arabidopsis thaliana seeds. Mature seeds of A. thaliana are desiccation tolerant (survive the loss of most of their water content), but they become desiccation sensitive while progressing to germination. Yet, there is a small developmental window during which DT can be re-established by treatment with the plant hormone abscisic acid (ABA). We studied germinated A. thaliana seeds at the stage of radicle protrusion during ABA incubation for 0 h, 2 h, 12 h, 24 h and 72 h. We describe in detail the methodology applied for generating and analyzing this expression data of time series. The microarray raw data (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE62876) may be valuable for further studies on this experimental system, such as the construction of a gene co-expression network [1].",2015-06-06 +25392053,"Popular epidemiology and ""fracking"": citizens' concerns regarding the economic, environmental, health and social impacts of unconventional natural gas drilling operations.","Pennsylvania sits atop the Marcellus Shale, a reservoir of natural gas that was untapped until the 2004 introduction of unconventional natural gas drilling operations (UNGDO) in the state. Colloquially known as fracking, UNGDO is a controversial process that employs large volumes of water to fracture the shale and capture gas; it has become a multi-billion dollar industry in Pennsylvania. We analyzed letters to the editor of the most widely circulated local newspaper in the most heavily drilled county in Pennsylvania (Bradford County) in order to characterize residents' concerns and their involvement in popular epidemiology--the process by which citizens investigate risks associated with a perceived environmental threat. We reviewed 215 letters to the editor that referenced natural gas operations and were published by The Daily Review between January 1, 2008 and June 8, 2013. We used NVivo 10 to code and analyze letters and identify major themes. Nvivo is qualitative data analysis software (http://www.qsrinternational.com/products_nvivo.aspx) that allows researchers to code and analyze ""unstructured"" data, including text files of any type (e.g., interview transcripts, news articles, letters, archival materials) as well as photographs and videos. Nvivo can be used to classify, sort, query, comment on, and share data across a research group. Letters demonstrated citizen engagement in beginning and intermediate stages of lay epidemiology, as well as discord and stress regarding four main issues: socio-economic impacts, perceived threats to water, population growth and implications, and changes to the rural landscape. Residents called for stronger scientific evidence and a balance of economic development and health and environmental protections. Citizens' distress regarding UNGDO appeared to be exacerbated by a dearth of information to guide economic growth and health, environmental, and social concerns. This analysis proposes locally informed questions to guide future surveillance and research.",2015-06-01 +26535646,A rare PAX6 mutation in a Chinese family with congenital aniridia.,"Aniridia is an autosomal dominant disorder characterized by the complete or partial loss of the iris and is almost associated with mutations in the paired box gene 6 (PAX6). We examined three generations of a Chinese family with congenital aniridia and observed genetic defects. Exons of PAX6 from 12 family members were amplified by polymerase chain reaction, sequenced, and compared with reference sequences in NCBI reference sequence database (http://www.ncbi.nlm.nih.gov/nuccore/NG_008679.1?from=5001&to=38170&report=genbank). A rare mutation c.2T>A (M1K) in exon 4 of PAX6 was identified in all affected family members but not in unaffected family members. Our results suggest that the c.2T>A (M1K) mutation may be responsible for the pathogenesis of congenital aniridia in this family. To our knowledge, this is the first report of the M1K mutation in PAX6 in a Chinese family with this disease and the second report worldwide.",2015-10-27 +23494302,One stop shop for everything Dictyostelium: dictyBase and the Dicty Stock Center in 2012.,"dictyBase (http://dictybase.org), the model organism database for Dictyostelium discoideum, includes the complete genome sequence and expression data for this organism. Relevant literature is integrated into the database, and gene models and functional annotation are manually curated from experimental results and comparative multigenome analyses. dictyBase has recently expanded to include the genome sequences of three additional Dictyostelids and has added new software tools to facilitate multigenome comparisons. The Dicty Stock Center, a strain and plasmid repository for Dictyostelium research, has relocated to Northwestern University in 2009. This allowed us integrating all Dictyostelium resources to better serve the research community. In this chapter, we will describe how to navigate the Web site and highlight some of our newer improvements.",2013-01-01 +26316313,Consolidation of proteomics data in the Cancer Proteomics database.,"Cancer is a class of diseases characterized by abnormal cell growth and one of the major reasons for human deaths. Proteins are involved in the molecular mechanisms leading to cancer, furthermore they are affected by anti-cancer drugs, and protein biomarkers can be used to diagnose certain cancer types. Therefore, it is important to explore the proteomics background of cancer. In this report, we developed the Cancer Proteomics database to re-interrogate published proteome studies investigating cancer. The database is divided in three sections related to cancer processes, cancer types, and anti-cancer drugs. Currently, the Cancer Proteomics database contains 9778 entries of 4118 proteins extracted from 143 scientific articles covering all three sections: cell death (cancer process), prostate cancer (cancer type) and platinum-based anti-cancer drugs including carboplatin, cisplatin, and oxaliplatin (anti-cancer drugs). The detailed information extracted from the literature includes basic information about the articles (e.g., PubMed ID, authors, journal name, publication year), information about the samples (type, study/reference, prognosis factor), and the proteomics workflow (Subcellular fractionation, protein, and peptide separation, mass spectrometry, quantification). Useful annotations such as hyperlinks to UniProt and PubMed were included. In addition, many filtering options were established as well as export functions. The database is freely available at http://cancerproteomics.uio.no.",2015-10-26 +28046017,Development and External Validation of the Korean Prostate Cancer Risk Calculator for High-Grade Prostate Cancer: Comparison with Two Western Risk Calculators in an Asian Cohort.,"

Purpose

We developed the Korean Prostate Cancer Risk Calculator for High-Grade Prostate Cancer (KPCRC-HG) that predicts the probability of prostate cancer (PC) of Gleason score 7 or higher at the initial prostate biopsy in a Korean cohort (http://acl.snu.ac.kr/PCRC/RISC/). In addition, KPCRC-HG was validated and compared with internet-based Western risk calculators in a validation cohort.

Materials and methods

Using a logistic regression model, KPCRC-HG was developed based on the data from 602 previously unscreened Korean men who underwent initial prostate biopsies. Using 2,313 cases in a validation cohort, KPCRC-HG was compared with the European Randomized Study of Screening for PC Risk Calculator for high-grade cancer (ERSPCRC-HG) and the Prostate Cancer Prevention Trial Risk Calculator 2.0 for high-grade cancer (PCPTRC-HG). The predictive accuracy was assessed using the area under the receiver operating characteristic curve (AUC) and calibration plots.

Results

PC was detected in 172 (28.6%) men, 120 (19.9%) of whom had PC of Gleason score 7 or higher. Independent predictors included prostate-specific antigen levels, digital rectal examination findings, transrectal ultrasound findings, and prostate volume. The AUC of the KPCRC-HG (0.84) was higher than that of the PCPTRC-HG (0.79, p<0.001) but not different from that of the ERSPCRC-HG (0.83) on external validation. Calibration plots also revealed better performance of KPCRC-HG and ERSPCRC-HG than that of PCPTRC-HG on external validation. At a cut-off of 5% for KPCRC-HG, 253 of the 2,313 men (11%) would not have been biopsied, and 14 of the 614 PC cases with Gleason score 7 or higher (2%) would not have been diagnosed.

Conclusions

KPCRC-HG is the first web-based high-grade prostate cancer prediction model in Korea. It had higher predictive accuracy than PCPTRC-HG in a Korean population and showed similar performance with ERSPCRC-HG in a Korean population. This prediction model could help avoid unnecessary biopsy and reduce overdiagnosis and overtreatment in clinical settings.",2017-01-03 +27826955,Vitamin D supplementation for preventing infections in children under five years of age.,"

Background

Vitamin D is a micronutrient important for bone growth and immune function. Deficiency can lead to rickets and has been linked to various infections, including respiratory infections. The evidence on the effects of supplementation on infections in children has not been assessed systematically.

Objectives

To evaluate the role of vitamin D supplementation in preventing pneumonia, tuberculosis (TB), diarrhoea, and malaria in children under five years of age. This includes high-, middle-, and low-income countries.

Search methods

We searched the Cochrane Infectious Diseases Group Specialized Register, the Cochrane Central Register of Controlled Trials (CENTRAL), the Cochrane Library, MEDLINE, EMBASE, LILACS, the WHO International Clinical Trials Registry Platform (ICTRP; http://www.who.int/ictrp/en/) , ClinicalTrials.gov and the ISRCTN registry (http://www.isrctn.com/) up to 16 June 2016.

Selection criteria

We included randomized controlled trials (RCTs) that evaluated preventive supplementation of vitamin D (versus placebo or no intervention) in children under five years of age.

Data collection and analysis

Two review authors independently screened the titles and abstracts, extracted the data, and assessed the risk of bias of included trials.

Main results

Four trials met the inclusion criteria, with a total of 3198 children under five years of age, and were conducted in Afghanistan, Spain, and the USA. Prevalence of vitamin D deficiency varied widely in these populations (range: 73.1% in Afghanistan, 10 to 12% in USA, and 6.2% in Spain). The included trials evaluated mortality (two trials), pneumonia incidence (two trials), diarrhoea incidence (two trials), hospitalization (two trials), and mean serum vitamin D concentrations (four trials).We do not know whether vitamin D supplementation impacts on all-cause mortality because this outcome was underpowered due to few events (risk ratio (RR) 1.43, 95% confidence interval (CI) 0.54 to 3.74; one trial, 3046 participants, low quality evidence).For pneumonia, episodes of 'radiologically confirmed' first or only episode of pneumonia were little different in the supplemented and unsupplemented group (Rate Ratio: 1.06, 95% confidence interval (CI) 0.89 to 1.26; two trials, 3134 participants, moderate quality evidence), and similarly for children with confirmed or unconfirmed pneumonia (RR 0.95, 95% CI 0.87 to 1.04; one trial, 3046 participants). In these two trials there were no obvious differences between supplemented and unsupplemented children regarding episodes of diarrhoea.In the single large trial from Afghanistan, the trial authors reported that vitamin D supplementation was associated with an increase in repeat episodes of pneumonia confirmed by chest radiograph (RR 1.69, 95% CI 1.28 to 2.21; one trial, 3046 participants), but not reflected in the outcome of confirmed or unconfirmed pneumonia (RR 1.06, 95% CI 1.00 to 1.13; one trial, 3046 participants).For hospital admission measured in one small trial, there was no difference detected (RR 0.86, 95% CI 0.20 to 3.62; one trial, 88 participants; very low quality evidence).The mean serum vitamin D concentrations were higher in supplemented compared to unsupplemented children at the end of supplementation (MD 7.72 ng/mL, 95% CI 0.50 to 14.93; four trials, 266 participants, low quality evidence). These results were driven primarily by two smaller trials with large magnitudes of effect. In the other two bigger trials, serum vitamin D concentrations were elevated in the intervention group for most of the trial duration but not at the end of supplementation. This may be due to time elapsed at measurement from the last dose, incomplete compliance, or increased need of vitamin D with infant age.We did not find any trial that reported on the incidence of TB, malaria or febrile illness, duration of pneumonia, duration of diarrhoea, severity of infection, and cause-specific mortality (due to TB, diarrhoea, or malaria).

Authors' conclusions

Evidence from one large trial did not demonstrate benefit of vitamin D supplementation on the incidence of pneumonia or diarrhoea in children under five years. To our knowledge, trials that evaluated supplementation for preventing other infections, including TB and malaria, have not been performed.",2016-11-09 +,Patterns and Determinants of Floristic Variation across Lowland Forests of Bolivia,"Floristic variation is high in the Neotropics, but little is known about the factors shaping this variation at the mesoscale. We examined floristic composition and its relationship with environmental factors across 220 1-ha permanent plots in tropical lowland Bolivia. For each plot, abundance of 100 species (93 tree and 7 palm species ≥10 cm diam) was obtained. Climatic data, related to rainfall seasonality and temperature, were interpolated from all available weather stations in the region, and soil properties, related to texture and fertility, were obtained for each plot. Floristic variation was strongly associated with differences in water availability and temperature, and therefore the climatic gradient shaped floristic variation more strongly than the edaphic gradient. Detrended correspondence analysis ordination divided lowland Bolivia primarily into two major groups (Southern Chiquitano region vs. the Amazon region) and a multiple response permutation procedure distinguished five floristic regions. Overall, the tested environmental variables differed significantly among the five regions. Using indicator species analysis, we distinguished 82 strong indicator species, which had significant environmental preferences for one floristic region. These species can be used as indicators of environmental conditions or to determine which floristic region a certain forest belongs. Given the predicted decreases in rainfall and increases in temperature for tropical lowland forests, our gradient approach suggests that species composition may shift drastically with climate change. Abstract in Spanish is available at http://www.blackwell-synergy.com/loi/btp.",2011-07-01 +26108279,GDC 2: Compression of large collections of genomes.,"The fall of prices of the high-throughput genome sequencing changes the landscape of modern genomics. A number of large scale projects aimed at sequencing many human genomes are in progress. Genome sequencing also becomes an important aid in the personalized medicine. One of the significant side effects of this change is a necessity of storage and transfer of huge amounts of genomic data. In this paper we deal with the problem of compression of large collections of complete genomic sequences. We propose an algorithm that is able to compress the collection of 1092 human diploid genomes about 9,500 times. This result is about 4 times better than what is offered by the other existing compressors. Moreover, our algorithm is very fast as it processes the data with speed 200 MB/s on a modern workstation. In a consequence the proposed algorithm allows storing the complete genomic collections at low cost, e.g., the examined collection of 1092 human genomes needs only about 700 MB when compressed, what can be compared to about 6.7 TB of uncompressed FASTA files. The source code is available at http://sun.aei.polsl.pl/REFRESH/index.php?page=projects&project=gdc&subpage=about.",2015-06-25 +27166375,ConSurf 2016: an improved methodology to estimate and visualize evolutionary conservation in macromolecules.,"The degree of evolutionary conservation of an amino acid in a protein or a nucleic acid in DNA/RNA reflects a balance between its natural tendency to mutate and the overall need to retain the structural integrity and function of the macromolecule. The ConSurf web server (http://consurf.tau.ac.il), established over 15 years ago, analyses the evolutionary pattern of the amino/nucleic acids of the macromolecule to reveal regions that are important for structure and/or function. Starting from a query sequence or structure, the server automatically collects homologues, infers their multiple sequence alignment and reconstructs a phylogenetic tree that reflects their evolutionary relations. These data are then used, within a probabilistic framework, to estimate the evolutionary rates of each sequence position. Here we introduce several new features into ConSurf, including automatic selection of the best evolutionary model used to infer the rates, the ability to homology-model query proteins, prediction of the secondary structure of query RNA molecules from sequence, the ability to view the biological assembly of a query (in addition to the single chain), mapping of the conservation grades onto 2D RNA models and an advanced view of the phylogenetic tree that enables interactively rerunning ConSurf with the taxa of a sub-tree.",2016-05-10 +26375780,Identification of protein-protein binding sites by incorporating the physicochemical properties and stationary wavelet transforms into pseudo amino acid composition.,"With the explosive growth of protein sequences entering into protein data banks in the post-genomic era, it is highly demanded to develop automated methods for rapidly and effectively identifying the protein-protein binding sites (PPBSs) based on the sequence information alone. To address this problem, we proposed a predictor called iPPBS-PseAAC, in which each amino acid residue site of the proteins concerned was treated as a 15-tuple peptide segment generated by sliding a window along the protein chains with its center aligned with the target residue. The working peptide segment is further formulated by a general form of pseudo amino acid composition via the following procedures: (1) it is converted into a numerical series via the physicochemical properties of amino acids; (2) the numerical series is subsequently converted into a 20-D feature vector by means of the stationary wavelet transform technique. Formed by many individual ""Random Forest"" classifiers, the operation engine to run prediction is a two-layer ensemble classifier, with the 1st-layer voting out the best training data-set from many bootstrap systems and the 2nd-layer voting out the most relevant one from seven physicochemical properties. Cross-validation tests indicate that the new predictor is very promising, meaning that many important key features, which are deeply hidden in complicated protein sequences, can be extracted via the wavelets transform approach, quite consistent with the facts that many important biological functions of proteins can be elucidated with their low-frequency internal motions. The web server of iPPBS-PseAAC is accessible at http://www.jci-bioinfo.cn/iPPBS-PseAAC , by which users can easily acquire their desired results without the need to follow the complicated mathematical equations involved.",2015-10-29 +25431330,"SNiPA: an interactive, genetic variant-centered annotation browser.","

Motivation

Linking genes and functional information to genetic variants identified by association studies remains difficult. Resources containing extensive genomic annotations are available but often not fully utilized due to heterogeneous data formats. To enhance their accessibility, we integrated many annotation datasets into a user-friendly webserver.

Availability and implementation

http://www.snipa.org/

Contact

g.kastenmueller@helmholtz-muenchen.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-26 +30731980,First Report of a Natural Infection of Stevia rebaudiana by a Group 16SrXXIV Phytoplasma in India.,"Stevia rebaudiana Bertoni (Asteraceae) is one of the most important commercial crops in the world (4). It is known to produce glycosides that are as much as 300 times sweeter than sucrose and do not affect blood sugar levels. Unlike artificial sweeteners like saccharin, they are noncarcinogenic and safe for diabetics. An unknown disease emerged during the summers of 2007 to 2009 in a field of S. rebaudiana at CIMAP Lucknow, India, where more than 20% of the plants exhibited symptoms typical of phytoplasma infection including leaf yellowing, reduced size of leaves, shoot proliferation, flower bud deficiency, as well as bushy and stunted growth. Some of these plants were potted and kept in a glasshouse for investigation. Affected plants in the field expressed a quick decline consisting of growth cessation, bronzing of mature leaves, wilting, and death, resulting in a significant reduction in biomass and quality. Typical phytoplasma-like (pleomorphic) bodies ranging from 450 to 900 nm were observed in the phloem cells of infected plants by transmission electron microscopy (1). These bodies were always found in diseased plants, but not in asymptomatic ones. No other microorganisms were noted. Total DNA was extracted from symptomatic as well as asymptomatic plants by a CTAB method. PCR was carried out with the universal phytoplasma primers P1/P6 (P1, 5'-AAGAGTTTGATCCTGGCTCAGGATT-3'; P6, 5'-CGGTAGGGATACCTTGTTACGACTTA-3') (2) followed by nested primers R16F2n/R16R2 (R16F2n, 5'-GAAACGACTGCTAAGACTGG-3'; R16R2, 5'-TGACGGGCGGTGTGTACAAACCCCG-3') targeting the 16S rRNA gene sequence (3). The P1/P6 and R16F2n/R16R2 primers produced the expected 1.5- and 1.2-kb amplicons, respectively, from the symptomatic plants and not from the asymptomatic ones. Seventeen symptomatic and eight asymptomatic samples were analyzed through PCR. Nested PCR products were ligated into the plasmid vector using the TOPO TA Cloning Kit (Invitrogen, Carlsbad, CA). Transformation and selection of recombinant clones was carried out according to the manufacturer's recommended protocol. The sequence obtained from the final PCR product was deposited in the GenBank database (No. JF970603). It was analyzed through the iPhyClassifier ( http://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi ) online tool and found to share 98.2% similarity with that of the 'Sorghum bunchy shoot phytoplasma' reference strain (GenBank No. AF509322) that belongs to 16SrXXIV-A subgroup. The virtual restriction fragment length polymorphism pattern of the S. rebaudiana phytoplasma 16S rRNA gene sequence showed maximum similarity to the reference pattern of AF509322 (similarity coefficient of 0.85). Although a number of phytoplasmas have been detected on a wide range of plants in India, little is known about the leafhopper that presumably transmits them to S. rebaudiana and other medicinal crops. Infections by diverse phytoplasma strains/species underscore the need for phytoplasma-free planting stock and intensification of research efforts to reduce ecological and economic impacts of these phytoplasmas. To our knowledge, this is the first report of a natural infection of S. rebaudiana by a group of 16SrXXIV-A phytoplasma. References: (1) P. V. Ajayakumar et al. Aust. Plant Dis. Notes 2:67, 2007. (2) S. Deng and C. Hiruki. J. Microbiol. Methods 14:53, 1991. (3) D. E. Gundersen and I. M. Lee. Phytopathol. Mediterr. 35:144, 1996. (4) S. M. Savita et al. J. Hum. Ecol. 15:261, 2004.",2011-12-01 +25161229,Personalized identification of altered pathways in cancer using accumulated normal tissue data.,"

Motivation

Identifying altered pathways in an individual is important for understanding disease mechanisms and for the future application of custom therapeutic decisions. Existing pathway analysis techniques are mainly focused on discovering altered pathways between normal and cancer groups and are not suitable for identifying the pathway aberrance that may occur in an individual sample. A simple way to identify individual's pathway aberrance is to compare normal and tumor data from the same individual. However, the matched normal data from the same individual are often unavailable in clinical situation. Therefore, we suggest a new approach for the personalized identification of altered pathways, making special use of accumulated normal data in cases when a patient's matched normal data are unavailable. The philosophy behind our method is to quantify the aberrance of an individual sample's pathway by comparing it with accumulated normal samples. We propose and examine personalized extensions of pathway statistics, overrepresentation analysis and functional class scoring, to generate individualized pathway aberrance score.

Results

Collected microarray data of normal tissue of lung and colon mucosa are served as reference to investigate a number of cancer individuals of lung adenocarcinoma (LUAD) and colon cancer, respectively. Our method concurrently captures known facts of cancer survival pathways and identifies the pathway aberrances that represent cancer differentiation status and survival. It also provides more improved validation rate of survival-related pathways than when a single cancer sample is interpreted in the context of cancer-only cohort. In addition, our method is useful in classifying unknown samples into cancer or normal groups. Particularly, we identified 'amino acid synthesis and interconversion' pathway is a good indicator of LUAD (Area Under the Curve (AUC) 0.982 at independent validation). Clinical importance of the method is providing pathway interpretation of single cancer, even though its matched normal data are unavailable.

Availability and implementation

The method was implemented using the R software, available at our Web site: http://bibs.snu.ac.kr/ipas.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +26506440,Penetrance of pathogenic mutations in haploinsufficient genes for intellectual disability and related disorders.,"De novo loss of function (LOF) mutations in the ASXL3 gene cause Bainbridge-Ropers syndrome, a severe form of intellectual disability (ID) and developmental delay, but there is evidence that they also occur in healthy individuals. This has prompted us to look for non-pathogenic LOF variants in other ID genes. Heterozygous LOF mutations in ASXL1, a paralog of ASXL3, are known to cause Bohring-Opitz syndrome (BOS), and benign LOF mutations in this gene have not been published to date. Therefore, we were surprised to find 56 ASXL1 LOF variants in the ExAC database (http://exac.broadinstitute.org), comprising exomes from 60,706 individuals who had been selected to exclude severe genetic childhood disorders. 4 of these variants have been described as disease-causing in patients with BOS, which rules out the possibility that pathogenic and clinically neutral LOF variants in this gene are functionally distinct. Apparently benign LOF variants were also detected in several other genes for ID and related disorders, including CDH15, KATNAL2, DEPDC5, ARID1B and AUTS2, both in the ExAC database and in the 6,500 exomes of the Exome Variant Server (http://evs.gs.washington.edu/EVS/). These observations argue for low penetrance of LOF mutations in ASXL1 and other genes for ID and related disorders, which could have far-reaching implications for genetic counseling and research.",2015-10-24 +,Host Plant Specialization and Species Turnover of Caterpillars Among Hosts in the Brazilian Cerrado,"Decrease in the species composition similarity of herbivore assemblages with increasing phylogenetic distance between host plants is a widespread pattern. Here we used data for caterpillars in the Brazilian Cerrado to investigate how the similarity in caterpillar species composition decreases as the taxonomic level and genetic distance (GD) of their host plants increases. In addition, we elucidate the plant taxonomic level that provides the greatest contribution to turnover in the caterpillar species composition among host taxa. Adult Lepidoptera were reared from caterpillars collected from 52 plants over 13 yr in the same area, with each host plant sampled for 1 yr. Most species were specialists, with 66 percent of genus specialists among the nonsingleton species. The similarity in caterpillar species composition across plant taxa decreased from host species to genera, and from host genera to orders. Above this level, the similarity was consistently low. The GD between plants explained 82 percent of the variation in the similarity of caterpillar species composition. The contribution of caterpillar species turnover among host orders from the same superorder and among host superorders from the same subclass explained 70 percent of the caterpillar species richness as a whole. Our results lend support to the view that most tropical caterpillars are host specialists. Our findings further indicate that the number of orders and superorders of plants provide the greatest contribution to the total caterpillar richness compared with all of the other host taxonomic levels combined. Abstract in Portuguese is available at http://www.blackwell-synergy.com/loi/btp.",2011-07-01 +,E-Manuscript Article Summaries*,"E-JOURNAL LINKED ABSTRACT URL http://www.current-oncology.com/index.php/oncology/article/view/851/

Background:

Of all mastectomy patients, 90% will use an external prosthesis where the standard of care uses a stock prosthesis that is purchased “off the shelf.” Our objectives were to determine patient demand for and perceived value of a custom breast prosthesis. The information obtained will influence future research and program direction.

Methods:

We asked 65 women who had undergone lumpectomy or mastectomy to participate before exploring rehabilitation options. The quantitative outcome measures were the European Organisation for Research and Treatment of Cancer qlq-C30 general and -BR23 breast cancer–specific quality of life questionnaires, and the Ambulatory Oncology Patients Satisfaction Tool. The qlq results were analyzed using the Mann–Whitney U-test. Results of the satisfaction tool were compared using the Fisher exact and chi-square tests. A descriptive qualitative approach—involving in-depth interviews exploring the experiences of the women—was used to establish the perceived value of the services to the patients. The analysis of the interview transcripts was conducted using a standardized content method to describe the experiences of the women.

Results:

All the women had had previous experiences with a conventional prosthesis, and they reported that wearing a custom prosthesis was more satisfying for them. They reported comfort and ease in wearing it, coupled with a sense of feeling less like a victim. Comparison of the qlq and patient satisfaction scores showed no significant difference between the women wearing the conventional prosthesis and those wearing the custom prosthesis.

Conclusions:

The willingness of women to pay for a prosthesis and the qualitative results from the present study demonstrate that there is demand for a custom approach to treatment. However, if a mixed-methods approach had not been applied during this initial exploration of women’s experiences with custom breast prostheses, the essence of the perceived value of the custom prosthesis would have been lost. Quantitative measures suggest that there is no difference between custom and conventional breast prostheses, but the qualitative data captured in the study provide a sense of aspects of care that a standardized outcome measure cannot capture. Further research with a larger sample size is needed to determine if real differences from a quantitative perspective are possible. Suggestions for improvements in the device and in program operations were gathered and will influence the future development and implementation of a breast prosthesis service, but financial assistance will most likely be needed to make such a service universally accessible.",2012-04-01 +23837963,"Standardized evaluation framework for evaluating coronary artery stenosis detection, stenosis quantification and lumen segmentation algorithms in computed tomography angiography.","Though conventional coronary angiography (CCA) has been the standard of reference for diagnosing coronary artery disease in the past decades, computed tomography angiography (CTA) has rapidly emerged, and is nowadays widely used in clinical practice. Here, we introduce a standardized evaluation framework to reliably evaluate and compare the performance of the algorithms devised to detect and quantify the coronary artery stenoses, and to segment the coronary artery lumen in CTA data. The objective of this evaluation framework is to demonstrate the feasibility of dedicated algorithms to: (1) (semi-)automatically detect and quantify stenosis on CTA, in comparison with quantitative coronary angiography (QCA) and CTA consensus reading, and (2) (semi-)automatically segment the coronary lumen on CTA, in comparison with expert's manual annotation. A database consisting of 48 multicenter multivendor cardiac CTA datasets with corresponding reference standards are described and made available. The algorithms from 11 research groups were quantitatively evaluated and compared. The results show that (1) some of the current stenosis detection/quantification algorithms may be used for triage or as a second-reader in clinical practice, and that (2) automatic lumen segmentation is possible with a precision similar to that obtained by experts. The framework is open for new submissions through the website, at http://coronary.bigr.nl/stenoses/.",2013-06-04 +26395770,Exploring peptide/MHC detachment processes using hierarchical natural move Monte Carlo.,"

Motivation

The binding between a peptide and a major histocompatibility complex (MHC) is one of the most important processes for the induction of an adaptive immune response. Many algorithms have been developed to predict peptide/MHC (pMHC) binding. However, no approach has yet been able to give structural insight into how peptides detach from the MHC.

Results

In this study, we used a combination of coarse graining, hierarchical natural move Monte Carlo and stochastic conformational optimization to explore the detachment processes of 32 different peptides from HLA-A*02:01. We performed 100 independent repeats of each stochastic simulation and found that the presence of experimentally known anchor amino acids affects the detachment trajectories of our peptides. Comparison with experimental binding affinity data indicates the reliability of our approach (area under the receiver operating characteristic curve 0.85). We also compared to a 1000 ns molecular dynamics simulation of a non-binding peptide (AAAKTPVIV) and HLA-A*02:01. Even in this simulation, the longest published for pMHC, the peptide does not fully detach. Our approach is orders of magnitude faster and as such allows us to explore pMHC detachment processes in a way not possible with all-atom molecular dynamics simulations.

Availability and implementation

The source code is freely available for download at http://www.cs.ox.ac.uk/mosaics/.

Contact

bernhard.knapp@stats.ox.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-22 +26836933,Sex-Specific Prediction Models for Sleep Apnea From the Hispanic Community Health Study/Study of Latinos.,"

Objective

We developed and validated the first-ever sleep apnea (SA) risk calculator in a large population-based cohort of Hispanic/Latino subjects.

Methods

Cross-sectional data on adults from the Hispanic Community Health Study/Study of Latinos (2008-2011) were analyzed. Subjective and objective sleep measurements were obtained. Clinically significant SA was defined as an apnea-hypopnea index ≥ 15 events per hour. Using logistic regression, four prediction models were created: three sex-specific models (female-only, male-only, and a sex × covariate interaction model to allow differential predictor effects), and one overall model with sex included as a main effect only. Models underwent 10-fold cross-validation and were assessed by using the C statistic. SA and its predictive variables; a total of 17 variables were considered.

Results

A total of 12,158 participants had complete sleep data available; 7,363 (61%) were women. The population-weighted prevalence of SA (apnea-hypopnea index ≥ 15 events per hour) was 6.1% in female subjects and 13.5% in male subjects. Male-only (C statistic, 0.808) and female-only (C statistic, 0.836) prediction models had the same predictor variables (ie, age, BMI, self-reported snoring). The sex-interaction model (C statistic, 0.836) contained sex, age, age × sex, BMI, BMI × sex, and self-reported snoring. The final overall model (C statistic, 0.832) contained age, BMI, snoring, and sex. We developed two websites for our SA risk calculator: one in English (https://www.montefiore.org/sleepapneariskcalc.html) and another in Spanish (http://www.montefiore.org/sleepapneariskcalc-es.html).

Conclusions

We created an internally validated, highly discriminating, well-calibrated, and parsimonious prediction model for SA. Contrary to the study hypothesis, the variables did not have different predictive magnitudes in male and female subjects.",2016-01-23 +27385121,Self-Tracking: Reflections from the BodyTrack Project.,"Based on the author's experiences the practice of self-tracking can empower individuals to explore and address issues in their lives. This work is inspired by examples of people who have reclaimed their wellness through an iterative process of noticing patterns of ups and downs, trying out new ideas and strategies, and observing the results. In some cases, individuals have realized that certain foods, environmental exposures, or practices have unexpected effects for them, and that adopting custom strategies can greatly improve quality of life, overcoming chronic problems. Importantly, adopting the role of investigator of their own situation appears to be transformative: people who embarked on this path changed their relationship to their health situation even before making discoveries that helped lead to symptom improvement. The author co-founded the BodyTrack project in 2010 with the goal of empowering a broader set of people to embrace this investigator role in their own lives and better address their health and wellness concerns, particularly those with complex environmental or behavioral components. The core of the BodyTrack system is an open source web service called Fluxtream ( https://fluxtream.org ) that allows users to aggregate, visualize, and reflect on data from myriad sources on a common timeline. The project is also working to develop and spread peer coaching practices to help transfer the culture and skills of self-tracking while mentoring individuals in how to self-assess their own situation and guide the process for themselves.",2016-07-06 +26242973,Novel genetic advances in schizophrenia: an interview with Michael O'Donovan.,"In this podcast, we talk to Professor Michael O'Donovan about the latest genetic advances in schizophrenia based on research data from the Schizophrenia Working Group of the Psychiatric Genomics Consortium. Functional and prediction studies from the identified genetic loci are described together with future directions in psychiatric genetics and its interplay with the environment.The podcast for this interview is available at http://media.biomedcentral.com/content/movies/supplementary/s12916-015-0417-1-s1.mp3.",2015-08-05 +25563415,Bioinformatic Suggestions on MiSeq-Based Microbial Community Analysis.,"Recent sequencing technology development has revolutionized fields of microbial ecology. MiSeq-based microbial community analysis allows us to sequence more than a few hundred samples at a time, which is far more cost-effective than pyrosequencing. The approach, however, has not been preferably used owing to computational difficulties of processing huge amounts of data as well as known Illumina-derived artefact problems with amplicon sequencing. The choice of assembly software to take advantage of paired-end sequencing and methods to remove Illumina artefacts sequences are discussed. The protocol we suggest not only removed erroneous reads, but also dramatically reduced computational workload, which allows even a typical desktop computer to process a huge amount of sequence data generated with Illumina sequencers. We also developed a Web interface (http://biotech.jejunu.ac.kr/ ~abl/16s/) that allows users to conduct fastq-merging and mothur batch creation. The study presented here should provide technical advantages and supports in applying MiSeq-based microbial community analysis.",2015-06-01 +26496950,OpenTein: a database of digital whole-slide images of stem cell-derived teratomas.,"Human stem cells are promising sources for regenerative therapy. To ensure safety of future therapeutic applications, the differentiation potency of stem cells has to be tested and be widely opened to the public. The potency is generally assessed by teratoma formation comprising differentiated cells from all three germ layers, and the teratomas can be inspected through high-quality digital images. The teratoma assay, however, lacks consistency in transplantation protocols and even in interpretation, which needs community-based efforts for improving the assay quality. Here, we have developed a novel database OpenTein (Open Teratoma Investigation, http://opentein.hgc.jp/) to archive and freely distribute high-resolution whole-slide images and relevant records. OpenTein has been designed as a searchable, zoomable and annotatable web-based repository system. We have deposited 468 images of teratomas derived by our transplantation of human stem cells, and users can freely access and process such digital teratoma images. Approximately, the current version of OpenTein responds within 11.2 min for processing 2.03 gigapixel teratoma images. Our system offers valuable tools and resources in the new era of stem cell biology.",2015-10-22 +26455268,Peptidase specificity from the substrate cleavage collection in the MEROPS database and a tool to measure cleavage site conservation.,"One peptidase can usually be distinguished from another biochemically by its action on proteins, peptides and synthetic substrates. Since 1996, the MEROPS database (http://merops.sanger.ac.uk) has accumulated a collection of cleavages in substrates that now amounts to 66,615 cleavages. The total number of peptidases for which at least one cleavage is known is 1700 out of a total of 2457 different peptidases. This paper describes how the cleavages are obtained from the scientific literature, how they are annotated and how cleavages in peptides and proteins are cross-referenced to entries in the UniProt protein sequence database. The specificity profiles of 556 peptidases are shown for which ten or more substrate cleavages are known. However, it has been proposed that at least 40 cleavages in disparate proteins are required for specificity analysis to be meaningful, and only 163 peptidases (6.6%) fulfil this criterion. Also described are the various displays shown on the website to aid with the understanding of peptidase specificity, which are derived from the substrate cleavage collection. These displays include a logo, distribution matrix, and tables to summarize which amino acids or groups of amino acids are acceptable (or not acceptable) in each substrate binding pocket. For each protein substrate, there is a display to show how it is processed and degraded. Also described are tools on the website to help with the assessment of the physiological relevance of cleavages in a substrate. These tools rely on the hypothesis that a cleavage site that is conserved in orthologues is likely to be physiologically relevant, and alignments of substrate protein sequences are made utilizing the UniRef50 database, in which in each entry sequences are 50% or more identical. Conservation in this case means substitutions are permitted only if the amino acid is known to occupy the same substrate binding pocket from at least one other substrate cleaved by the same peptidase.",2015-10-21 +27189565,TriLoNet: Piecing Together Small Networks to Reconstruct Reticulate Evolutionary Histories.,"Phylogenetic networks are a generalization of evolutionary trees that can be used to represent reticulate processes such as hybridization and recombination. Here, we introduce a new approach called TriLoNet (Trinet Level- one Network algorithm) to construct such networks directly from sequence alignments which works by piecing together smaller phylogenetic networks. More specifically, using a bottom up approach similar to Neighbor-Joining, TriLoNet constructs level-1 networks (networks that are somewhat more general than trees) from smaller level-1 networks on three taxa. In simulations, we show that TriLoNet compares well with Lev1athan, a method for reconstructing level-1 networks from three-leaved trees. In particular, in simulations we find that Lev1athan tends to generate networks that overestimate the number of reticulate events as compared with those generated by TriLoNet. We also illustrate TriLoNet's applicability using simulated and real sequence data involving recombination, demonstrating that it has the potential to reconstruct informative reticulate evolutionary histories. TriLoNet has been implemented in JAVA and is freely available at https://www.uea.ac.uk/computing/TriLoNet.",2016-04-15 +26227144,SynLinker: an integrated system for designing linkers and synthetic fusion proteins.,"

Unlabelled

Synthetic fusion proteins have shown great potential in various biotechnological and (bio)pharmaceutical applications. They usually contain more than two protein domains joined by a linker peptide sequence which is often selected intuitively or in ad hoc manner. Thus, we developed an integrated web-based system, SynLinker, to provide appropriate linker candidates for constructing fusion proteins. We compiled a total of 2260 linker sequences comprising of natural linkers extracted from a set of non-redundant multi-domain proteins in Protein Data Bank and artificial/empirical linkers collected from literature and patents. Multiple query interface allows users to search for the desired linker candidates based on selection criteria and their preferences. In addition, a selected linker can be combined with two domain structures which are uploaded and appended at its N and C terminals, thereby predicting a de novo structure of the fusion protein. Hence, SynLinker can serve as a systematic tool for researchers who are interested in designing synthetic fusion proteins.

Availability and implementation

SynLinker is freely available at http://bioinfo.bti.a-star.edu.sg/synlinker.

Contact

cheld@nus.edu.sg

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-29 +23499870,Analysis of the NCI-60 dataset for cancer-related microRNA and mRNA using expression profiles.,"

Background

Recent studies have indicated that microRNA (miRNA) may play an oncogenic or tumor suppressor role in human cancer. To study the regulatory role of miRNAs in tumorigenesis, an integrated platform has been set up to provide a user friendly interface for query. The main advantage of the present platform is that all the miRNA target genes' information and disease records are drawn from experimentally verified or high confidence records.

Results

MiRNA target gene results are annotated with reference to the disease gene as well as the pathway database. The correlation strength between miRNA and target gene expression profile is quantified by computing the correlation coefficient using the NCI-60 expression profiling data. Comprehensive analysis of the NCI-60 data found that the cumulative percentage of negative correlation coefficients for cleavage regulation is slightly higher than its positive counterpart; which indicated that the mRNA degradation mechanism is slightly dominant. In addition, the RNAHybrid and TargetScans scores are computed which potentially served as quantitative estimators for miRNA-mRNA binding events. Three scores are defined for each miRNA-mRNA pair, which are based on the disease gene and pathway information. These three scores allow user to sort out high confidence cancer-related miRNA-mRNA pairs. Statistical tests were applied to investigate the relations of three chromosomal features, i.e., CpG island, fragile site, and miRNA cluster, with cancer-related miRNAs. A web-based interface has been set up for query, which can be accessed at: http://ppi.bioinfo.asia.edu.tw/mirna_target/

Conclusions

The main advantage of the present platform on miRNA-mRNA targeting information is that all the target genes' information and disease records are experimentally verified. Although this may limit the number of miRNA-mRNA relationships, the results provided here are more solid and have fewer false positive events. Certain novel cancer-related miRNA-mRNA pairs are identified and confirmed in the literature. Fisher's exact test suggests that CpG island and fragile site associated miRNAs tend to associate with cancer formation. In summary, the present platform provides an easy means of investigating cancer-related miRNAs.",2013-02-19 +25100688,Drug/Cell-line Browser: interactive canvas visualization of cancer drug/cell-line viability assay datasets.,"

Summary

Recently, several high profile studies collected cell viability data from panels of cancer cell lines treated with many drugs applied at different concentrations. Such drug sensitivity data for cancer cell lines provide suggestive treatments for different types and subtypes of cancer. Visualization of these datasets can reveal patterns that may not be obvious by examining the data without such efforts. Here we introduce Drug/Cell-line Browser (DCB), an online interactive HTML5 data visualization tool for interacting with three of the recently published datasets of cancer cell lines/drug-viability studies. DCB uses clustering and canvas visualization of the drugs and the cell lines, as well as a bar graph that summarizes drug effectiveness for the tissue of origin or the cancer subtypes for single or multiple drugs. DCB can help in understanding drug response patterns and prioritizing drug/cancer cell line interactions by tissue of origin or cancer subtype.

Availability and implementation

DCB is an open source Web-based tool that is freely available at: http://www.maayanlab.net/LINCS/DCB CONTACT: avi.maayan@mssm.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-08-06 +23010363,Proteins with an alpha/beta hydrolase fold: Relationships between subfamilies in an ever-growing superfamily.,"Alpha/beta hydrolases function as hydrolases, lyases, transferases, hormone precursors or transporters, chaperones or routers of other proteins. The amount of structural and functional available data related to this protein superfamily expands exponentially, as does the number of proteins classified as alpha/beta hydrolases despite poor sequence similarity and lack of experimental data. However the superfamily can be rationally divided according to sequence or structural homologies, leading to subfamilies of proteins with potentially similar functions. Since the discovery of proteins homologous to cholinesterases but devoid of enzymatic activity (e.g., the neuroligins), divergent functions have been ascribed to members of other subfamilies (e.g., lipases, dipeptidylaminopeptidase IV, etc.). To study the potentially moonlighting properties of alpha/beta hydrolases, the ESTHER database (for ESTerase and alpha/beta Hydrolase Enzymes and Relatives; http://bioweb.ensam.inra.fr/esther), which collects, organizes and disseminates structural and functional information related to alpha/beta hydrolases, has been updated with new tools and the web server interface has been upgraded. A new Overall Table along with a new Tree based on HMM models has been included to tentatively group subfamilies. These tools provide starting points for phylogenetic studies aimed at pinpointing the origin of duplications leading to paralogous genes (e.g., acetylcholinesterase versus butyrylcholinesterase, or neuroligin versus carboxylesterase). Another of our goals is to implement new tools to distinguish catalytically active enzymes from non-catalytic proteins in poorly studied or annotated subfamilies.",2012-09-23 +26490961,5SRNAdb: an information resource for 5S ribosomal RNAs.,"Ribosomal 5S RNA (5S rRNA) is the ubiquitous RNA component found in the large subunit of ribosomes in all known organisms. Due to its small size, abundance and evolutionary conservation 5S rRNA for many years now is used as a model molecule in studies on RNA structure, RNA-protein interactions and molecular phylogeny. 5SRNAdb (http://combio.pl/5srnadb/) is the first database that provides a high quality reference set of ribosomal 5S RNAs (5S rRNA) across three domains of life. Here, we give an overview of new developments in the database and associated web tools since 2002, including updates to database content, curation processes and user web interfaces.",2015-10-20 +24564647,Extracting rate changes in transcriptional regulation from MEDLINE abstracts.,"

Background

Time delays are important factors that are often neglected in gene regulatory network (GRN) inference models. Validating time delays from knowledge bases is a challenge since the vast majority of biological databases do not record temporal information of gene regulations. Biological knowledge and facts on gene regulations are typically extracted from bio-literature with specialized methods that depend on the regulation task. In this paper, we mine evidences for time delays related to the transcriptional regulation of yeast from the PubMed abstracts.

Results

Since the vast majority of abstracts lack quantitative time information, we can only collect qualitative evidences of time delays. Specifically, the speed-up or delay in transcriptional regulation rate can provide evidences for time delays (shorter or longer) in GRN. Thus, we focus on deriving events related to rate changes in transcriptional regulation. A corpus of yeast regulation related abstracts was manually labeled with such events. In order to capture these events automatically, we create an ontology of sub-processes that are likely to result in transcription rate changes by combining textual patterns and biological knowledge. We also propose effective feature extraction methods based on the created ontology to identify the direct evidences with specific details of these events. Our ontologies outperform existing state-of-the-art gene regulation ontologies in the automatic rule learning method applied to our corpus. The proposed deterministic ontology rule-based method can achieve comparable performance to the automatic rule learning method based on decision trees. This demonstrates the effectiveness of our ontology in identifying rate-changing events. We also tested the effectiveness of the proposed feature mining methods on detecting direct evidence of events. Experimental results show that the machine learning method on these features achieves an F1-score of 71.43%.

Conclusions

The manually labeled corpus of events relating to rate changes in transcriptional regulation for yeast is available in https://sites.google.com/site/wentingntu/data. The created ontologies summarized both biological causes of rate changes in transcriptional regulation and corresponding positive and negative textual patterns from the corpus. They are demonstrated to be effective in identifying rate-changing events, which shows the benefits of combining textual patterns and biological knowledge on extracting complex biological events.",2014-01-24 +22084200,AH-DB: collecting protein structure pairs before and after binding.,"This work presents the Apo-Holo DataBase (AH-DB, http://ahdb.ee.ncku.edu.tw/ and http://ahdb.csbb.ntu.edu.tw/), which provides corresponding pairs of protein structures before and after binding. Conformational transitions are commonly observed in various protein interactions that are involved in important biological functions. For example, copper-zinc superoxide dismutase (SOD1), which destroys free superoxide radicals in the body, undergoes a large conformational transition from an 'open' state (apo structure) to a 'closed' state (holo structure). Many studies have utilized collections of apo-holo structure pairs to investigate the conformational transitions and critical residues. However, the collection process is usually complicated, varies from study to study and produces a small-scale data set. AH-DB is designed to provide an easy and unified way to prepare such data, which is generated by identifying/mapping molecules in different Protein Data Bank (PDB) entries. Conformational transitions are identified based on a refined alignment scheme to overcome the challenge that many structures in the PDB database are only protein fragments and not complete proteins. There are 746,314 apo-holo pairs in AH-DB, which is about 30 times those in the second largest collection of similar data. AH-DB provides sophisticated interfaces for searching apo-holo structure pairs and exploring conformational transitions from apo structures to the corresponding holo structures.",2011-11-13 +25717192,Guidance for RNA-seq co-expression network construction and analysis: safety in numbers.,"

Motivation

RNA-seq co-expression analysis is in its infancy and reasonable practices remain poorly defined. We assessed a variety of RNA-seq expression data to determine factors affecting functional connectivity and topology in co-expression networks.

Results

We examine RNA-seq co-expression data generated from 1970 RNA-seq samples using a Guilt-By-Association framework, in which genes are assessed for the tendency of co-expression to reflect shared function. Minimal experimental criteria to obtain performance on par with microarrays were >20 samples with read depth >10 M per sample. While the aggregate network constructed shows good performance (area under the receiver operator characteristic curve ∼0.71), the dependency on number of experiments used is nearly identical to that present in microarrays, suggesting thousands of samples are required to obtain 'gold-standard' co-expression. We find a major topological difference between RNA-seq and microarray co-expression in the form of low overlaps between hub-like genes from each network due to changes in the correlation of expression noise within each technology.

Contact

jgillis@cshl.edu or sballouz@cshl.edu

Supplementary information

Networks are available at: http://gillislab.labsites.cshl.edu/supplements/rna-seq-networks/ and supplementary data are available at Bioinformatics online.",2015-02-24 +21554668,PHENOPSIS DB: an information system for Arabidopsis thaliana phenotypic data in an environmental context.,"

Background

Renewed interest in plant×environment interactions has risen in the post-genomic era. In this context, high-throughput phenotyping platforms have been developed to create reproducible environmental scenarios in which the phenotypic responses of multiple genotypes can be analysed in a reproducible way. These platforms benefit hugely from the development of suitable databases for storage, sharing and analysis of the large amount of data collected. In the model plant Arabidopsis thaliana, most databases available to the scientific community contain data related to genetic and molecular biology and are characterised by an inadequacy in the description of plant developmental stages and experimental metadata such as environmental conditions. Our goal was to develop a comprehensive information system for sharing of the data collected in PHENOPSIS, an automated platform for Arabidopsis thaliana phenotyping, with the scientific community.

Description

PHENOPSIS DB is a publicly available (URL: http://bioweb.supagro.inra.fr/phenopsis/) information system developed for storage, browsing and sharing of online data generated by the PHENOPSIS platform and offline data collected by experimenters and experimental metadata. It provides modules coupled to a Web interface for (i) the visualisation of environmental data of an experiment, (ii) the visualisation and statistical analysis of phenotypic data, and (iii) the analysis of Arabidopsis thaliana plant images.

Conclusions

Firstly, data stored in the PHENOPSIS DB are of interest to the Arabidopsis thaliana community, particularly in allowing phenotypic meta-analyses directly linked to environmental conditions on which publications are still scarce. Secondly, data or image analysis modules can be downloaded from the Web interface for direct usage or as the basis for modifications according to new requirements. Finally, the structure of PHENOPSIS DB provides a useful template for the development of other similar databases related to genotype×environment interactions.",2011-05-09 +26824624,Antiviral and Cytotoxic Isocoumarin Derivatives from an Endophytic Fungus Aspergillus oryzae.,"Oryzaeins A-D (1-4), four new isocoumarin derivatives, along with five known ones (5-9) were isolated from solid cultures of an endophytic fungus Aspergillus oryzae. Their structures were elucidated by detailed spectroscopic analysis and by comparison with reported data of related derivatives. Among them, compounds 1 and 2 represent the first examples of isocoumarins possessing an unusual 2-oxopropyl group and a rare 3-hydroxypropyl group. Compounds 1 and 2 displayed moderate anti-tobacco mosaic virus activities with inhibition rates of 28.4% and 30.6%, respectively, at the concentration of 20 µM. The new compounds showed moderate inhibitory activities against several human tumor cell lines with IC50 values in the range of 2.8-8.8 µM. Supporting information available online at http://www.thieme-connect.de/products.",2016-01-29 +24684958,FISH Oracle 2: a web server for integrative visualization of genomic data in cancer research.,"

Background

A comprehensive view on all relevant genomic data is instrumental for understanding the complex patterns of molecular alterations typically found in cancer cells. One of the most effective ways to rapidly obtain an overview of genomic alterations in large amounts of genomic data is the integrative visualization of genomic events.

Results

We developed FISH Oracle 2, a web server for the interactive visualization of different kinds of downstream processed genomics data typically available in cancer research. A powerful search interface and a fast visualization engine provide a highly interactive visualization for such data. High quality image export enables the life scientist to easily communicate their results. A comprehensive data administration allows to keep track of the available data sets. We applied FISH Oracle 2 to published data and found evidence that, in colorectal cancer cells, the gene TTC28 may be inactivated in two different ways, a fact that has not been published before.

Conclusions

The interactive nature of FISH Oracle 2 and the possibility to store, select and visualize large amounts of downstream processed data support life scientists in generating hypotheses. The export of high quality images supports explanatory data visualization, simplifying the communication of new biological findings. A FISH Oracle 2 demo server and the software is available at http://www.zbh.uni-hamburg.de/fishoracle.",2014-03-31 +26381817,SeqMule: automated pipeline for analysis of human exome/genome sequencing data.,"Next-generation sequencing (NGS) technology has greatly helped us identify disease-contributory variants for Mendelian diseases. However, users are often faced with issues such as software compatibility, complicated configuration, and no access to high-performance computing facility. Discrepancies exist among aligners and variant callers. We developed a computational pipeline, SeqMule, to perform automated variant calling from NGS data on human genomes and exomes. SeqMule integrates computational-cluster-free parallelization capability built on top of the variant callers, and facilitates normalization/intersection of variant calls to generate consensus set with high confidence. SeqMule integrates 5 alignment tools, 5 variant calling algorithms and accepts various combinations all by one-line command, therefore allowing highly flexible yet fully automated variant calling. In a modern machine (2 Intel Xeon X5650 CPUs, 48 GB memory), when fast turn-around is needed, SeqMule generates annotated VCF files in a day from a 30X whole-genome sequencing data set; when more accurate calling is needed, SeqMule generates consensus call set that improves over single callers, as measured by both Mendelian error rate and consistency. SeqMule supports Sun Grid Engine for parallel processing, offers turn-key solution for deployment on Amazon Web Services, allows quality check, Mendelian error check, consistency evaluation, HTML-based reports. SeqMule is available at http://seqmule.openbioinformatics.org.",2015-09-18 +25609367,SimTCM: A human patient simulator with application to diagnostic accuracy studies of Chinese medicine.,"

Objective

The aim of this work is to develop and implement the SimTCM, an advanced computational model that incorporates relevant aspects from traditional Chinese medicine (TCM) theory as well as advanced statistical and epidemiological techniques for simulation and analysis of human patients.

Methods

SimTCM presents five major attributes for simulation: representation of true and false profiles for any single pattern; variable count of manifestations for each manifestation profile; empirical distributions of patterns and manifestations in a disease-specific population; incorporation of uncertainty in clinical data; and the combination of the four examinations. The proposed model is strengthened by following international standards for reporting diagnostic accuracy studies, and incorporates these standards in its treatment of study population, sample size calculation, data collection of manifestation profiles, exclusion criteria and missing data handling, reference standards, randomization and blinding, and test reproducibility.

Results

Simulations using data from patients diagnosed with hypertension and post-stroke sensory-motor impairments yielded no significant differences between expected and simulated frequencies of patterns (P=0.22 or higher). Time for convergence of simulations varied from 9.90 s (9.80, 10.27) to 28.31 s (26.33, 29.52). The ratio iteration profile necessary for convergence varied between 1:1 and 5:1.

Conclusion

This model is directly connected to forthcoming models in a large project to design and implement the SuiteTCM: ProntTCM, SciTCM, DiagTCM, StudentTCM, ResearchTCM, HerbsTCM, AcuTCM, and DataTCM. It is expected that the continuity of the SuiteTCM project enhances the evidence-based practice of Chinese medicine. The software is freely available for download at: http://suitetcm.unisuam.edu.br.",2015-01-01 +25206364,A bioinformatics workflow for detecting signatures of selection in genomic data.,"The detection of ""signatures of selection"" is now possible on a genome-wide scale in many plant and animal species, and can be performed in a population-specific manner due to the wealth of per-population genome-wide genotype data that is available. With genomic regions that exhibit evidence of having been under selection shown to also be enriched for genes associated with biologically important traits, detection of evidence of selective pressure is emerging as an additional approach for identifying novel gene-trait associations. While high-density genotype data is now relatively easy to obtain, for many researchers it is not immediately obvious how to go about identifying signatures of selection in these data sets. Here we describe a basic workflow, constructed from open source tools, for detecting and examining evidence of selection in genomic data. Code to install and implement the pipeline components, and instructions to run a basic analysis using the workflow described here, can be downloaded from our public GitHub repository: http://www.github.com/smilefreak/selectionTools/",2014-08-26 +27453819,"Transcriptomic and functional resources for the small hive beetle Aethina tumida, a worldwide parasite of honey bees.","The small hive beetle (SHB), Aethina tumida, is a major pest of managed honey bee (Apis mellifera) colonies in the United States and Australia, and an emergent threat in Europe. While strong honey bee colonies generally keep SHB populations in check, weak or stressed colonies can succumb to infestations. This parasite has spread from a sub-Saharan Africa to three continents, leading to immense management and regulatory costs. We performed a transcriptomic analysis involving deep sequencing of multiple life stages and both sexes of this species. The assembled transcriptome appears to be nearly complete, as judged by conserved insect orthologs and the ability to find plausible homologs for 11,952 proteins described from the genome of the red flour beetle. Expressed genes include each of the major metabolic, developmental and sensory groups, along with genes for proteins involved with immune defenses and insecticide resistance. We also present a total of 23,085 high-quality SNP's for the assembled contigs. We highlight potential differences between this beetle and its honey bee hosts, and suggest mechanisms of future research into the biology and control of this species. SNP resources will allow functional genetic analyses and analyses of dispersal for this invasive pest. All resources are posted as Supplemental Tables at https://data.nal.usda.gov/dataset/data-transcriptomic-and-functional-resources-small-hive-beetle-aethina-tumida-worldwide, and at NCBI under Bioproject PRJNA256171.",2016-07-02 +30722391,First Report of Powdery Mildew Caused by Erysiphe heraclei on Curled Dock (Rumex crispus) in South Korea.,"Curled dock (Rumex crispus L.) is a perennial flowering plant in family Polygonaceae, native to Europe and western Asia. Curled dock is a widespread naturalized species throughout the temperate world that has become a serious invasive species as a weed in many areas. In contrast, the plant has been widely used as a folk medicine for treatment of indigestion and dermatoses in Asia countries. The plant roots are known to have an antifungal effect against barley powdery mildew pathogens. In late October 2010 to 2011, plants showing typical symptoms of powdery mildew disease were observed in a river bank area located in Gwangju, South Korea. Symptoms included generally white, superficial mycelia and abundant necrotic black spots showing superficial chasmothecia. Mycelia were ectophytic with lobed appressoria. Conidiophores were cylindrical, straight, or slightly flexuous in foot cells and bore single conidia. The foot cell of the fungus had a greater range of size than Erysiphe polygoni. Conidia and conidiophores were 25.4 to 45.4 (36.5) μm long × 10.5 to 18.6 (15.0) μm wide and 34.7 to 126.0 (91.4) μm long × 8 to 10 (8.7) μm wide, respectively. The teleomorph included spherical to subspherical ascocarps that were (blackish) brown to yellow and formed hyphoid appendages. Appendages were slightly flexuous and 62.0 to 128.1 (71.6) μm wide. Mature chasmothecia were 75.1 to 140.9 (105) μm. The ascocarps contained multiple asci that were saccate, ellipsoidal and papillate in apices, bore 3 to 5 ascospores, and were 59.4 to 66.1 (60.9) μm long × 32.6 to 43.9 (38.3) μm wide. Ascospores were subhyaline, oval to ellipsoid, and 17.9 to 24.8 (21.1) μm long × 10.9 to 15.2 (13.3) μm wide. From extracted genomic DNA, the internal transcribed spacer (ITS) region inclusive of 5.8S and 28S rDNA were amplified with ITS1F (5'-TCCGTAGGTGAACCTGCGG-3') and LR5F (5'-GCTATCCTGAGGGAAAC-3'), and LROR (5'-ACCCGCTGAACTTAAGC-3') and LR5F primer sets, respectively. rDNA ITS (JX499184) and 28S (JX888470) homologies of the fungal strain (EML-RCPW1) via NCBI BLASTn search represented 99.7% (618/620) and 100% (667/667) identity values with E. heraclei AB104510 and AB103366, respectively. The identification of the fungus as E. heraclei was based on morphological data combined with the results of sequence analysis. Although there were no 28S sequence data from E. polygoni in GenBank, the phylogenetic tree based on ITS sequence data showed that our strain was differentiated from E. polygoni, forming a separate clade consisting of E. heraclei. So far, 26 records with respect to powdery mildews on curled dock represent those caused by only E. polygoni worldwide (1). E. heraclei has been reported to occur on various herbaceous plants including Angelica spp., Daucus spp., and Torilis japonica, and a woody plant such as Quercus myrsinaefolia in China, Japan, and Korea. To our knowledge, this is the first report of leaf powdery mildew caused by E. heraclei on curled dock in Korea or elsewhere in the world, although the fungus causes powdery mildew on various species of families Polygonaceae and Apiaceae with wide host range (2,3,4). References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Syst. Mycol. Microbiol. Lab., ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , June 22, 2012. (2) D. A. Glawe et al. Online. Plant Health Progress. PHP-2005-0114-01-HN, 2005. (3) M. J. Park et al. New Dis. Rep. 21:14, 2010. (4) G. Rodríguez-Alvarado et al. Plant Dis. 94:483, 2010.",2013-03-01 +26476447,FLOR-ID: an interactive database of flowering-time gene networks in Arabidopsis thaliana.,"Flowering is a hot topic in Plant Biology and important progress has been made in Arabidopsis thaliana toward unraveling the genetic networks involved. The increasing complexity and the explosion of literature however require development of new tools for information management and update. We therefore created an evolutive and interactive database of flowering time genes, named FLOR-ID (Flowering-Interactive Database), which is freely accessible at http://www.flor-id.org. The hand-curated database contains information on 306 genes and links to 1595 publications gathering the work of >4500 authors. Gene/protein functions and interactions within the flowering pathways were inferred from the analysis of related publications, included in the database and translated into interactive manually drawn snapshots.",2015-10-17 +23259833,Protein interface classification by evolutionary analysis.,"

Background

Distinguishing biologically relevant interfaces from lattice contacts in protein crystals is a fundamental problem in structural biology. Despite efforts towards the computational prediction of interface character, many issues are still unresolved.

Results

We present here a protein-protein interface classifier that relies on evolutionary data to detect the biological character of interfaces. The classifier uses a simple geometric measure, number of core residues, and two evolutionary indicators based on the sequence entropy of homolog sequences. Both aim at detecting differential selection pressure between interface core and rim or rest of surface. The core residues, defined as fully buried residues (>95% burial), appear to be fundamental determinants of biological interfaces: their number is in itself a powerful discriminator of interface character and together with the evolutionary measures it is able to clearly distinguish evolved biological contacts from crystal ones. We demonstrate that this definition of core residues leads to distinctively better results than earlier definitions from the literature. The stringent selection and quality filtering of structural and sequence data was key to the success of the method. Most importantly we demonstrate that a more conservative selection of homolog sequences - with relatively high sequence identities to the query - is able to produce a clearer signal than previous attempts.

Conclusions

An evolutionary approach like the one presented here is key to the advancement of the field, which so far was missing an effective method exploiting the evolutionary character of protein interfaces. Its coverage and performance will only improve over time thanks to the incessant growth of sequence databases. Currently our method reaches an accuracy of 89% in classifying interfaces of the Ponstingl 2003 datasets and it lends itself to a variety of useful applications in structural biology and bioinformatics. We made the corresponding software implementation available to the community as an easy-to-use graphical web interface at http://www.eppic-web.org.",2012-12-22 +26150785,The PREP pipeline: standardized preprocessing for large-scale EEG analysis.,"The technology to collect brain imaging and physiological measures has become portable and ubiquitous, opening the possibility of large-scale analysis of real-world human imaging. By its nature, such data is large and complex, making automated processing essential. This paper shows how lack of attention to the very early stages of an EEG preprocessing pipeline can reduce the signal-to-noise ratio and introduce unwanted artifacts into the data, particularly for computations done in single precision. We demonstrate that ordinary average referencing improves the signal-to-noise ratio, but that noisy channels can contaminate the results. We also show that identification of noisy channels depends on the reference and examine the complex interaction of filtering, noisy channel identification, and referencing. We introduce a multi-stage robust referencing scheme to deal with the noisy channel-reference interaction. We propose a standardized early-stage EEG processing pipeline (PREP) and discuss the application of the pipeline to more than 600 EEG datasets. The pipeline includes an automatically generated report for each dataset processed. Users can download the PREP pipeline as a freely available MATLAB library from http://eegstudy.org/prepcode.",2015-06-18 +26076010,MultiElec: A MATLAB Based Application for MEA Data Analysis.,"We present MultiElec, an open source MATLAB based application for data analysis of microelectrode array (MEA) recordings. MultiElec displays an extremely user-friendly graphic user interface (GUI) that allows the simultaneous display and analysis of voltage traces for 60 electrodes and includes functions for activation-time determination, the production of activation-time heat maps with activation time and isoline display. Furthermore, local conduction velocities are semi-automatically calculated along with their corresponding vector plots. MultiElec allows ad hoc signal suppression, enabling the user to easily and efficiently handle signal artefacts and for incomplete data sets to be analysed. Voltage traces and heat maps can be simply exported for figure production and presentation. In addition, our platform is able to produce 3D videos of signal progression over all 60 electrodes. Functions are controlled entirely by a single GUI with no need for command line input or any understanding of MATLAB code. MultiElec is open source under the terms of the GNU General Public License as published by the Free Software Foundation, version 3. Both the program and source code are available to download from http://www.cancer.manchester.ac.uk/MultiElec/.",2015-06-15 +22992673,Quantitative phosphoproteomics in nuclei of vasopressin-sensitive renal collecting duct cells.,"Vasopressin regulates transport across the collecting duct epithelium in part via effects on gene transcription. Transcriptional regulation occurs partially via changes in phosphorylation of transcription factors, transcriptional coactivators, and protein kinases in the nucleus. To test whether vasopressin alters the nuclear phosphoproteome of vasopressin-sensitive cultured mouse mpkCCD cells, we used stable isotope labeling and mass spectrometry to quantify thousands of phosphorylation sites in nuclear extracts and nuclear pellet fractions. Measurements were made in the presence and absence of the vasopressin analog dDAVP. Of the 1,251 sites quantified, 39 changed significantly in response to dDAVP. Network analysis of the regulated proteins revealed two major clusters (""cell-cell adhesion"" and ""transcriptional regulation"") that were connected to known elements of the vasopressin signaling pathway. The hub proteins for these two clusters were the transcriptional coactivator β-catenin and the transcription factor c-Jun. Phosphorylation of β-catenin at Ser552 was increased by dDAVP [log(2)(dDAVP/vehicle) = 1.79], and phosphorylation of c-Jun at Ser73 was decreased [log(2)(dDAVP/vehicle) = -0.53]. The β-catenin site is known to be targeted by either protein kinase A or Akt, both of which are activated in response to vasopressin. The c-Jun site is a canonical target for the MAP kinase Jnk2, which is downregulated in response to vasopressin in the collecting duct. The data support the idea that vasopressin-mediated control of transcription in collecting duct cells involves selective changes in the nuclear phosphoproteome. All data are available to users at http://helixweb.nih.gov/ESBL/Database/mNPPD/.",2012-09-19 +27081675,The dataset from administration of single or combined immunomodulation agents to modulate anti-FVIII antibody responses in FVIII plasmid or protein primed hemophilia A mice.,"Hemophilia A mice with pre-existing inhibitory antibodies against factor VIII (FVIII) were treated with single agents, AMD3100 and GCS-F, respectively. Inhibitor titers in treated mice and control HemA inhibitors mice were followed over time. Total B cells and plasma cells (PCs) were characterized by flow cytometry. HemA inhibitor mice were then treated with a combination regimen of IL-2/IL-2mAb complexes plus rapamycin and AMD3100. Finally, HemA inhibitor mice were treated with a new combination therapy using include IL-2/IL-2mAb complexes + Anti-CD20+AMD3100+G-CSF. The timeline of combination therapy was illustrated. Inhibitor titers following treatment in FVIII plasmid or protein induced inhibitor mice were evaluated overtime. A representative figure and gating strategies to characterize the subsets of Treg cells and B cells are presented. Please see http://dx.doi.org/10.1016/j.cellimm.2016.01.005 [1] for interpretation and discussion of these data and results.",2016-03-17 +21447597,UniProt Knowledgebase: a hub of integrated protein data.,"The UniProt Knowledgebase (UniProtKB) acts as a central hub of protein knowledge by providing a unified view of protein sequence and functional information. Manual and automatic annotation procedures are used to add data directly to the database while extensive cross-referencing to more than 120 external databases provides access to additional relevant information in more specialized data collections. UniProtKB also integrates a range of data from other resources. All information is attributed to its original source, allowing users to trace the provenance of all data. The UniProt Consortium is committed to using and promoting common data exchange formats and technologies, and UniProtKB data is made freely available in a range of formats to facilitate integration with other databases. Database URL: http://www.uniprot.org/",2011-03-29 +26635596,Golgi: Interactive Online Brain Mapping.,"Golgi (http://www.usegolgi.com) is a prototype interactive brain map of the rat brain that helps researchers intuitively interact with neuroanatomy, connectomics, and cellular and chemical architecture. The flood of ""-omic"" data urges new ways to help researchers connect discrete findings to the larger context of the nervous system. Here we explore Golgi's underlying reasoning and techniques and how our design decisions balance the constraints of building both a scientifically useful and usable tool. We demonstrate how Golgi can enhance connectomic literature searches with a case study investigating a thalamocortical circuit involving the Nucleus Accumbens and we explore Golgi's potential and future directions for growth in systems neuroscience and connectomics.",2015-11-17 +27170729,"Cloning, Expression, Purification, and Insecticidal Activity of a Novel Cry1Na3 Toxin From Bacillus thuringiensis BRC-ZYR2.","Bacillus thuringiensis produces a variety of insecticidal crystal proteins (ICPs). Genome sequencing is a promising strategy for detecting and identifying B. thuringiensis ICPs, which are of great interest to the biocontrol field. In this study, a novel ICP gene was cloned from B. thuringiensis BRC-ZYR2 based on genomic data from 454 GS-FLX Titanium sequencing and an analysis of the results using the B. thuringiensis Toxin_Scanner ( http://bcam.hzaubmb.net/BtToxin_scanner/index.php ). cry1Na3 designated by the B. thuringiensis Toxin Nomenclature Committee, encoded a 601-amino acid, 68.0-kDa protein that exhibited 95% identity with Cry1Na1 and 99% identity with Cry1Na2. Cry1Na3 contained three conserved domains commonly found in three-domain ICPs. Cry1Na3 was toxic to Plutella xylostella (L.) and Ostrinia furnacalis (Guenée), with LC 50 values of 3.69 μg/ml and 31.30 μg/ml, respectively. However, Laodelphax striatellus (Fallén) nymphs were unaffected when fed purified Cry1Na3 (250 μg/ml) in their diet. Spodoptera exigua (Hübner) and Colaphellus bowringi (Baly) larvae survived even when the concentration of Cry1Na3 protein reached 500 μg/ml. Cry1Na3 is a promising agent for the control of lepidopteran insect pests.",2016-05-23 +25601689,GridMass: a fast two-dimensional feature detection method for LC/MS.,"One of the initial and critical procedures for the analysis of metabolomics data using liquid chromatography and mass spectrometry is feature detection. Feature detection is the process to detect boundaries of the mass surface from raw data. It consists of detected abundances arranged in a two-dimensional (2D) matrix of mass/charge and elution time. MZmine 2 is one of the leading software environments that provide a full analysis pipeline for these data. However, the feature detection algorithms provided in MZmine 2 are based mainly on the analysis of one-dimension at a time. We propose GridMass, an efficient algorithm for 2D feature detection. The algorithm is based on landing probes across the chromatographic space that are moved to find local maxima providing accurate boundary estimations. We tested GridMass on a controlled marker experiment, on plasma samples, on plant fruits, and in a proteome sample. Compared with other algorithms, GridMass is faster and may achieve comparable or better sensitivity and specificity. As a proof of concept, GridMass has been implemented in Java under the MZmine 2 environment and is available at http://www.bioinformatica.mty.itesm.mx/GridMass and MASSyPup. It has also been submitted to the MZmine 2 developing community.",2015-01-01 +25512690,SNPAAMapperT2K: A genome-wide SNP downstream analysis and annotation pipeline for species annotated with NCBI.tbl data files.,"

Unlabelled

SNPAAMapper, a genome-wide SNP downstream analysis and annotation pipeline, was designed to classify detected variants according to genomic regions and report the mutation class by processing whole-genome and/or whole-exome sequencing data. A widely used sequence and data annotation table format ""knownGene.txt"" has not yet been created for many popular model organisms (e.g. Arabidopsis). Instead, NCBI .tbl annotation format files are provided for these species. Therefore, it is of interest to describe SNPAAMapperT2K, a genome-wide SNP downstream analysis and annotation pipeline for species annotated with NCBI .tbl data files (e.g. Arabidopsis). The pipeline is tested with a deeply sequenced Arabidopsis thaliana strain (Seattle-0). The SNPAAMapperT2K can also annotate and report SNP classes for other species, whose chromosome files are annotated as NCBI .tbl format, but do not have their annotated knownGene.txt files available.

Availability

Perl scripts and required input files are available on the web at http://isu.indstate.edu/ybai2/SNPAAMapperT2K.",2014-11-27 +25952609,Heterozygous genome assembly via binary classification of homologous sequence.,"

Background

Genome assemblers to date have predominantly targeted haploid reference reconstruction from homozygous data. When applied to diploid genome assembly, these assemblers perform poorly, owing to the violation of assumptions during both the contigging and scaffolding phases. Effective tools to overcome these problems are in growing demand. Increasing parameter stringency during contigging is an effective solution to obtaining haplotype-specific contigs; however, effective algorithms for scaffolding such contigs are lacking.

Methods

We present a stand-alone scaffolding algorithm, ScaffoldScaffolder, designed specifically for scaffolding diploid genomes. The algorithm identifies homologous sequences as found in ""bubble"" structures in scaffold graphs. Machine learning classification is used to then classify sequences in partial bubbles as homologous or non-homologous sequences prior to reconstructing haplotype-specific scaffolds. We define four new metrics for assessing diploid scaffolding accuracy: contig sequencing depth, contig homogeneity, phase group homogeneity, and heterogeneity between phase groups.

Results

We demonstrate the viability of using bubbles to identify heterozygous homologous contigs, which we term homolotigs. We show that machine learning classification trained on these homolotig pairs can be used effectively for identifying homologous sequences elsewhere in the data with high precision (assuming error-free reads).

Conclusion

More work is required to comparatively analyze this approach on real data with various parameters and classifiers against other diploid genome assembly methods. However, the initial results of ScaffoldScaffolder supply validity to the idea of employing machine learning in the difficult task of diploid genome assembly. Software is available at http://bioresearch.byu.edu/scaffoldscaffolder.",2015-04-23 +27864175,The Acceptor Side of Photosystem II Is the Initial Target of Nitrite Stress in Synechocystis sp. Strain PCC 6803. ,"Nitrite, a common form of inorganic nitrogen (N), can be used as a nitrogen source through N assimilation. However, high levels of nitrite depress photosynthesis in various organisms. In this study, we investigated which components of the photosynthetic electron transfer chain are targeted by nitrite stress in Synechocystis sp. strain PCC 6803 cells. Measurements of whole-chain and photosystem II (PSII)-mediated electron transport activities revealed that high levels of nitrite primarily impair electron flow in PSII. Changes in PSII activity in response to nitrite stress occurred in two distinct phases. During the first phase, which occurred in the first 3 h of nitrite treatment, electron transfer from the primary quinone acceptor (QA) to the secondary quinone acceptor (QB) was retarded, as indicated by chlorophyll (Chl) a fluorescence induction, S-state distribution, and QA- reoxidation tests. In the second phase, which occurred after 6 h of nitrite exposure, the reaction center was inactivated and the donor side of photosystem II was inhibited, as revealed by changes in Chl fluorescence parameters and thermoluminescence and by immunoblot analysis. Our data suggest that nitrite stress is highly damaging to PSII and disrupts PSII activity by a stepwise mechanism in which the acceptor side is the initial target. IMPORTANCE In our previous studies, an alga-based technology was proposed to fix the large amounts of nitrite that are released from NOX-rich flue gases and proved to be a promising industrial strategy for flue gas NOX bioremediation (W. Chen et al., Environ Sci Technol 50:1620-1627, 2016, https://doi.org/10.1021/acs.est.5b04696; X. Zhang et al., Environ Sci Technol 48:10497-10504, 2014, https://doi.org/10.1021/es5013824). However, the toxic effects of high concentrations of nitrite on algal cells remain obscure. The analysis of growth rates, photochemistry, and protein profiles in our study provides important evidence that the inhibition by nitrite occurs in two phases: in the first phase, electron transfer between QA- and QB is retarded, whereas in the second, the donor side of PSII is affected. This is an excellent example of investigating the ""early"" inhibitory effects (i.e., within the first 6 h) on the PSII electron transfer chain in vivo This paper provides novel insights into the mechanisms of nitrite inhibition of photosynthesis in an oxygenic phototrophic cyanobacterium.",2017-01-17 +26627001,Corrigendum.,"Pages H1237–H1250: Leone M, Magadum A, Engel FB. Cardiomyocyte proliferation in cardiac development and regeneration: a guide to methodologies and interpretations. Am J Physiol Heart Circ Physiol 309: H1237–H1250, 2015. First published September 4, 2015; doi:10.1152/ajpheart.00559.2015 (http://ajpheart.physiology.org/content/309/8/H1237.long).—Under Newborn mouse heart regeneration, the value for the increase in cell number upon Meis1 deletion is corrected as shown in boldface as follows: In fact, the cell number data indicate that Meis1 deletion induced the production of 0.2 million cardiomyocytes in less than 5 wk by an H3P as well as Aurora B fold increase of less than fourfold.",2015-12-01 +27288498,Prediction of conserved long-range RNA-RNA interactions in full viral genomes.,"

Motivation

Long-range RNA-RNA interactions (LRIs) play an important role in viral replication, however, only a few of these interactions are known and only for a small number of viral species. Up to now, it has been impossible to screen a full viral genome for LRIs experimentally or in silico Most known LRIs are cross-reacting structures (pseudoknots) undetectable by most bioinformatical tools.

Results

We present LRIscan, a tool for the LRI prediction in full viral genomes based on a multiple genome alignment. We confirmed 14 out of 16 experimentally known and evolutionary conserved LRIs in genome alignments of HCV, Tombusviruses, Flaviviruses and HIV-1. We provide several promising new interactions, which include compensatory mutations and are highly conserved in all considered viral sequences. Furthermore, we provide reactivity plots highlighting the hot spots of predicted LRIs.

Availability and implementation

Source code and binaries of LRIscan freely available for download at http://www.rna.uni-jena.de/en/supplements/lriscan/, implemented in Ruby/C ++ and supported on Linux and Windows.

Contact

manja@uni-jena.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-10 +26542718,Characterizing Phenotypes and Signaling Networks of Single Human Cells by Mass Cytometry.,"Single cell mass cytometry is revolutionizing our ability to quantitatively characterize cellular biomarkers and signaling networks. Mass cytometry experiments routinely measure 25-35 features of each cell in primary human tissue samples. The relative ease with which a novice user can generate a large amount of high quality data and the novelty of the approach have created a need for example protocols, analysis strategies, and datasets. In this chapter, we present detailed protocols for two mass cytometry experiments designed as training tools. The first protocol describes detection of 26 features on the surface of human peripheral blood mononuclear cells. In the second protocol, a mass cytometry signaling network profile measures 25 node states comprised of five key signaling effectors (AKT, ERK1/2, STAT1, STAT5, and p38) quantified under five conditions (Basal, FLT3L, SCF, IL-3, and IFNγ). This chapter compares manual and unsupervised data analysis approaches, including bivariate plots, heatmaps, histogram overlays, SPADE, and viSNE. Data files in this chapter have been shared online using Cytobank ( http://www.cytobank.org/irishlab/ ).",2015-01-01 +21718534,"CANGS DB: a stand-alone web-based database tool for processing, managing and analyzing 454 data in biodiversity studies.","

Background

Next generation sequencing (NGS) is widely used in metagenomic and transcriptomic analyses in biodiversity. The ease of data generation provided by NGS platforms has allowed researchers to perform these analyses on their particular study systems. In particular the 454 platform has become the preferred choice for PCR amplicon based biodiversity surveys because it generates the longest sequence reads. Nevertheless, the handling and organization of massive amounts of sequencing data poses a major problem for the research community, particularly when multiple researchers are involved in data acquisition and analysis. An integrated and user-friendly tool, which performs quality control, read trimming, PCR primer removal, and data organization is desperately needed, therefore, to make data interpretation fast and manageable.

Findings

We developed CANGS DB (Cleaning and Analyzing Next Generation Sequences DataBase) a flexible, stand alone and user-friendly integrated database tool. CANGS DB is specifically designed to organize and manage the massive amount of sequencing data arising from various NGS projects. CANGS DB also provides an intuitive user interface for sequence trimming and quality control, taxonomy analysis and rarefaction analysis. Our database tool can be easily adapted to handle multiple sequencing projects in parallel with different sample information, amplicon sizes, primer sequences, and quality thresholds, which makes this software especially useful for non-bioinformaticians. Furthermore, CANGS DB is especially suited for projects where multiple users need to access the data. CANGS DB is available at http://code.google.com/p/cangsdb/.

Conclusion

CANGS DB provides a simple and user-friendly solution to process, store and analyze 454 sequencing data. Being a local database that is accessible through a user-friendly interface, CANGS DB provides the perfect tool for collaborative amplicon based biodiversity surveys without requiring prior bioinformatics skills.",2011-06-30 +25485614,A Bayesian mixture model for chromatin interaction data.,"Chromatin interactions mediated by a particular protein are of interest for studying gene regulation, especially the regulation of genes that are associated with, or known to be causative of, a disease. A recent molecular technique, Chromatin interaction analysis by paired-end tag sequencing (ChIA-PET), that uses chromatin immunoprecipitation (ChIP) and high throughput paired-end sequencing, is able to detect such chromatin interactions genomewide. However, ChIA-PET may generate noise (i.e., pairings of DNA fragments by random chance) in addition to true signal (i.e., pairings of DNA fragments by interactions). In this paper, we propose MC_DIST based on a mixture modeling framework to identify true chromatin interactions from ChIA-PET count data (counts of DNA fragment pairs). The model is cast into a Bayesian framework to take into account the dependency among the data and the available information on protein binding sites and gene promoters to reduce false positives. A simulation study showed that MC_DIST outperforms the previously proposed hypergeometric model in terms of both power and type I error rate. A real data study showed that MC_DIST may identify potential chromatin interactions between protein binding sites and gene promoters that may be missed by the hypergeometric model. An R package implementing the MC_DIST model is available at http://www.stat.osu.edu/~statgen/SOFTWARE/MDM.",2015-02-01 +27058171,High-throughput mathematical analysis identifies Turing networks for patterning with equally diffusing signals. ,"The Turing reaction-diffusion model explains how identical cells can self-organize to form spatial patterns. It has been suggested that extracellular signaling molecules with different diffusion coefficients underlie this model, but the contribution of cell-autonomous signaling components is largely unknown. We developed an automated mathematical analysis to derive a catalog of realistic Turing networks. This analysis reveals that in the presence of cell-autonomous factors, networks can form a pattern with equally diffusing signals and even for any combination of diffusion coefficients. We provide a software (available at http://www.RDNets.com) to explore these networks and to constrain topologies with qualitative and quantitative experimental data. We use the software to examine the self-organizing networks that control embryonic axis specification and digit patterning. Finally, we demonstrate how existing synthetic circuits can be extended with additional feedbacks to form Turing reaction-diffusion systems. Our study offers a new theoretical framework to understand multicellular pattern formation and enables the wide-spread use of mathematical biology to engineer synthetic patterning systems.",2016-04-08 +25708473,Pinpointing disease genes through phenomic and genomic data fusion.,"

Background

Pinpointing genes involved in inherited human diseases remains a great challenge in the post-genomics era. Although approaches have been proposed either based on the guilt-by-association principle or making use of disease phenotype similarities, the low coverage of both diseases and genes in existing methods has been preventing the scan of causative genes for a significant proportion of diseases at the whole-genome level.

Results

To overcome this limitation, we proposed a rigorous statistical method called pgFusion to prioritize candidate genes by integrating one type of disease phenotype similarity derived from the Unified Medical Language System (UMLS) and seven types of gene functional similarities calculated from gene expression, gene ontology, pathway membership, protein sequence, protein domain, protein-protein interaction and regulation pattern, respectively. Our method covered a total of 7,719 diseases and 20,327 genes, achieving the highest coverage thus far for both diseases and genes. We performed leave-one-out cross-validation experiments to demonstrate the superior performance of our method and applied it to a real exome sequencing dataset of epileptic encephalopathies, showing the capability of this approach in finding causative genes for complex diseases. We further provided the standalone software and online services of pgFusion at http://bioinfo.au.tsinghua.edu.cn/jianglab/pgfusion.

Conclusions

pgFusion not only provided an effective way for prioritizing candidate genes, but also demonstrated feasible solutions to two fundamental questions in the analysis of big genomic data: the comparability of heterogeneous data and the integration of multiple types of data. Applications of this method in exome or whole genome sequencing studies would accelerate the finding of causative genes for human diseases. Other research fields in genomics could also benefit from the incorporation of our data fusion methodology.",2015-01-21 +23126337,Long-term survival of calcium phosphate-coated dental implants: a meta-analytical approach to the clinical literature.,"

Background

Calcium phosphate ceramic coatings have the potential to compensate for challenging bone conditions such as delayed or impaired bone healing and low bone quantity or density. Thus, the increasing universal prevalence of subjects with such challenging bone conditions might be paralleled by an enhanced global use of calcium phosphate ceramic-coated dental implants. However, it is speculated that the long-term clinical survival of calcium phosphate-coated dental implants might be adversely affected by coating delamination.

Objective

The aims of the current review were (1) to systematically appraise and (2) to meta-analyse long-term survival data of calcium phosphate-coated dental implants in clinical trials.

Materials and methods

An extensive search in the electronic databases of the National Library of Medicine (http://www.ncbi.nlm.nih.gov), The Cochrane Central Register of Controlled Trials and the ISI Web of Knowledge, was carried out for articles published between January 2000 and November 2011 to identify randomized controlled clinical trials, prospective clinical trials as well as retrospective analysis of cases (RA) presenting survival data on the topic of calcium phosphate-coated dental implants. Only publications in English were considered, and the search was narrowed to studies in humans with a follow-up of at least 5 years only. Furthermore, the reference lists of related review articles and publications selected for inclusion in this review were systematically screened. The primary outcome variable was percentage annual failure rate (AFR), and the secondary outcome variable was percentage cumulative survival rate (CSR).

Results

The electronic search in the database of the National Library of Medicine, The Cochrane Central Register of Controlled Trials and the ISI Web of Knowledge, resulted in the identification of 385 titles. These titles were initially screened by the two independent reviewers for possible inclusion, resulting in 29 publications suitable for further consideration. Screening the abstracts led to 20 full-text articles. From these articles, 15 reports were excluded. Finally, five of these original research reports could be selected for evaluation. No additional publications were identified by manual search. Thus, a total of five articles were included for analysis. Meta-analysis revealed that neither AFRs of calcium phosphate-coated dental implants increased progressively nor that long-term CSRs for calcium phosphate-coated dental implants were inferior to survival rates of noncoated implants.

Conclusion

We conclude that (1) published long-term survival data for calcium phosphate-coated dental implants are very limited, (2) AFRs of calcium phosphate-coated dental implants do not increase progressively, and (3) long-term CSRs for calcium phosphate-coated dental implants are comparable to survival rates of noncoated implants.",2012-11-05 +26473382,A Uniform System for the Annotation of Vertebrate microRNA Genes and the Evolution of the Human microRNAome.,"Although microRNAs (miRNAs) are among the most intensively studied molecules of the past 20 years, determining what is and what is not a miRNA has not been straightforward. Here, we present a uniform system for the annotation and nomenclature of miRNA genes. We show that less than a third of the 1,881 human miRBase entries, and only approximately 16% of the 7,095 metazoan miRBase entries, are robustly supported as miRNA genes. Furthermore, we show that the human repertoire of miRNAs has been shaped by periods of intense miRNA innovation and that mature gene products show a very different tempo and mode of sequence evolution than star products. We establish a new open access database--MirGeneDB ( http://mirgenedb.org )--to catalog this set of miRNAs, which complements the efforts of miRBase but differs from it by annotating the mature versus star products and by imposing an evolutionary hierarchy upon this curated and consistently named repertoire.",2015-10-14 +25649622,Reference-based compression of short-read sequences using path encoding.,"

Motivation

Storing, transmitting and archiving data produced by next-generation sequencing is a significant computational burden. New compression techniques tailored to short-read sequence data are needed.

Results

We present here an approach to compression that reduces the difficulty of managing large-scale sequencing data. Our novel approach sits between pure reference-based compression and reference-free compression and combines much of the benefit of reference-based approaches with the flexibility of de novo encoding. Our method, called path encoding, draws a connection between storing paths in de Bruijn graphs and context-dependent arithmetic coding. Supporting this method is a system to compactly store sets of kmers that is of independent interest. We are able to encode RNA-seq reads using 3-11% of the space of the sequence in raw FASTA files, which is on average more than 34% smaller than competing approaches. We also show that even if the reference is very poorly matched to the reads that are being encoded, good compression can still be achieved.

Availability and implementation

Source code and binaries freely available for download at http://www.cs.cmu.edu/∼ckingsf/software/pathenc/, implemented in Go and supported on Linux and Mac OS X.",2015-02-02 +24106335,Efficacy of anti-inflammatory agents to improve symptoms in patients with schizophrenia: an update.,"

Background

The inflammatory hypothesis of schizophrenia is not new, but recently it has regained interest because more data suggest a role of the immune system in the pathogenesis of schizophrenia. If increased inflammation of the brain contributes to the symptoms of schizophrenia, reduction of the inflammatory status could improve the clinical picture. Lately, several trials have been conducted investigating the potential of anti-inflammatory agents to improve symptoms of schizophrenia. This study provides an update regarding the efficacy of anti-inflammatory agents on schizophrenic symptoms in clinical studies performed so far.

Methods

An electronic search was performed using PubMed, Embase, the National Institutes of Health web site http://www.clinicaltrials.gov, Cochrane Schizophrenia Group entries in PsiTri, and the Cochrane Database of Systematic Reviews. Only randomized, double-blind, placebo-controlled studies that investigated clinical outcome were included.

Results

Our search yielded 26 double-blind randomized controlled trials that provided information on the efficacy on symptom severity of the following components: aspirin, celecoxib, davunetide, fatty acids such as eicosapentaenoic acids and docosahexaenoic acids, estrogens, minocycline, and N-acetylcysteine (NAC). Of these components, aspirin (mean weighted effect size [ES]: 0.3, n = 270, 95% CI: 0.06-0.537, I(2) = 0), estrogens (ES: 0.51, n = 262, 95% CI: 0.043-0.972, I(2) = 69%), and NAC (ES: 0.45, n = 140, 95% CI: 0.112-0.779) showed significant effects. Celecoxib, minocycline, davunetide, and fatty acids showed no significant effect.

Conclusion

The results of aspirin addition to antipsychotic treatment seem promising, as does the addition of NAC and estrogens. These 3 agents are all very broadly active substances, and it has to be investigated if the beneficial effects on symptom severity are indeed mediated by their anti-inflammatory aspects.",2013-10-08 +22130416,Developing an open-access antimicrobial resistance learning site for veterinary medical students.,"Recognizing the crucial role of veterinarians in mitigating antimicrobial resistance (AMR), the Centers for Disease Control and Prevention (CDC) has funded the development of a suite of educational materials to promote the responsible veterinary medical use of antimicrobials. An open-access, Web-based multimedia curriculum regarding antimicrobial resistance in veterinary practice was thus created. The antimicrobial-resistance learning site (AMRLS) for veterinary medical students was completed and made available for use in January 2011 (http://amrls.cvm.msu.edu/). Designed for integration into existing veterinary medical courses, the AMRLS is also a resource for continuing education for practicing veterinarians, animal scientists, and food-animal industry specialists. This Web site emphasizes the mechanisms by which AMR emerges and spreads, the significant role of veterinarians in mitigating AMR, and the need to preserve the efficacy of antibiotics for future generations.",2011-01-01 +23519954,Relationships between body roundness with body fat and visceral adipose tissue emerging from a new geometrical model.,"

Objective

To develop a new geometrical index that combines height, waist circumference (WC), and hip circumference (HC) and relate this index to total and visceral body fat.

Design and methods

Subject data were pooled from three databases that contained demographic, anthropometric, dual energy X-ray absorptiometry (DXA) measured fat mass, and magnetic resonance imaging measured visceral adipose tissue (VAT) volume. Two elliptical models of the human body were developed. Body roundness was calculated from the model using a well-established constant arising from the theory. Regression models based on eccentricity and other variables were used to predict %body fat and %VAT.

Results

A body roundness index (BRI) was derived to quantify the individual body shape in a height-independent manner. Body roundness slightly improved predictions of %body fat and %VAT compared to the traditional metrics of body mass index (BMI), WC, or HC. On this basis, healthy body roundness ranges were established. An automated graphical program simulating study results was placed at http://www.pbrc.edu/bodyroundness.

Conclusion

BRI, a new shape measure, is a predictor of %body fat and %VAT and can be applied as a visual tool for health status evaluations.",2013-06-11 +26467479,ChEBI in 2016: Improved services and an expanding collection of metabolites.,"ChEBI is a database and ontology containing information about chemical entities of biological interest. It currently includes over 46,000 entries, each of which is classified within the ontology and assigned multiple annotations including (where relevant) a chemical structure, database cross-references, synonyms and literature citations. All content is freely available and can be accessed online at http://www.ebi.ac.uk/chebi. In this update paper, we describe recent improvements and additions to the ChEBI offering. We have substantially extended our collection of endogenous metabolites for several organisms including human, mouse, Escherichia coli and yeast. Our front-end has also been reworked and updated, improving the user experience, removing our dependency on Java applets in favour of embedded JavaScript components and moving from a monthly release update to a 'live' website. Programmatic access has been improved by the introduction of a library, libChEBI, in Java, Python and Matlab. Furthermore, we have added two new tools, namely an analysis tool, BiNChE, and a query tool for the ontology, OntoQuery.",2015-10-13 +23761449,PosMed: Ranking genes and bioresources based on Semantic Web Association Study.,"Positional MEDLINE (PosMed; http://biolod.org/PosMed) is a powerful Semantic Web Association Study engine that ranks biomedical resources such as genes, metabolites, diseases and drugs, based on the statistical significance of associations between user-specified phenotypic keywords and resources connected directly or inferentially through a Semantic Web of biological databases such as MEDLINE, OMIM, pathways, co-expressions, molecular interactions and ontology terms. Since 2005, PosMed has long been used for in silico positional cloning studies to infer candidate disease-responsible genes existing within chromosomal intervals. PosMed is redesigned as a workbench to discover possible functional interpretations for numerous genetic variants found from exome sequencing of human disease samples. We also show that the association search engine enhances the value of mouse bioresources because most knockout mouse resources have no phenotypic annotation, but can be associated inferentially to phenotypes via genes and biomedical documents. For this purpose, we established text-mining rules to the biomedical documents by careful human curation work, and created a huge amount of correct linking between genes and documents. PosMed associates any phenotypic keyword to mouse resources with 20 public databases and four original data sets as of May 2013.",2013-06-12 +21854988,The human mitochondrial transcriptome.,"The human mitochondrial genome comprises a distinct genetic system transcribed as precursor polycistronic transcripts that are subsequently cleaved to generate individual mRNAs, tRNAs, and rRNAs. Here, we provide a comprehensive analysis of the human mitochondrial transcriptome across multiple cell lines and tissues. Using directional deep sequencing and parallel analysis of RNA ends, we demonstrate wide variation in mitochondrial transcript abundance and precisely resolve transcript processing and maturation events. We identify previously undescribed transcripts, including small RNAs, and observe the enrichment of several nuclear RNAs in mitochondria. Using high-throughput in vivo DNaseI footprinting, we establish the global profile of DNA-binding protein occupancy across the mitochondrial genome at single-nucleotide resolution, revealing regulatory features at mitochondrial transcription initiation sites and functional insights into disease-associated variants. This integrated analysis of the mitochondrial transcriptome reveals unexpected complexity in the regulation, expression, and processing of mitochondrial RNA and provides a resource for future studies of mitochondrial function (accessed at http://mitochondria.matticklab.com).",2011-08-01 +28087519,Sympathetic modulation of electrical activation in normal and infarcted myocardium: implications for arrhythmogenesis.,"The influence of cardiac sympathetic innervation on electrical activation in normal and chronically infarcted ventricular myocardium is not understood. Yorkshire pigs with normal hearts (NL, n = 12) or anterior myocardial infarction (MI, n = 9) underwent high-resolution mapping of the anteroapical left ventricle at baseline and during left and right stellate ganglion stimulation (LSGS and RSGS, respectively). Conduction velocity (CV), activation times (ATs), and directionality of propagation were measured. Myocardial fiber orientation was determined using diffusion tensor imaging and histology. Longitudinal CV (CVL) was increased by RSGS (0.98 ± 0.11 vs. 1.2 ± 0.14m/s, P < 0.001) but not transverse CV (CVT). This increase was abrogated by β-adrenergic receptor and gap junction (GJ) blockade. Neither CVL nor CVT was increased by LSGS. In the peri-infarct region, both RSGS and LSGS shortened ARIs in sinus rhythm (423 ± 37 vs. 322 ± 30 ms, P < 0.001, and 423 ± 36 vs. 398 ± 36 ms, P = 0.035, respectively) and altered activation patterns in all animals. CV, as estimated by mean ATs, increased in a directionally dependent manner by RSGS (14.6 ± 1.2 vs. 17.3 ± 1.6 ms, P = 0.015), associated with GJ lateralization. RSGS and LSGS inhomogeneously modulated AT and induced relative or absolute functional activation delay in parts of the mapped regions in 75 and 67%, respectively, in MI animals, and in 0 and 15%, respectively, in control animals (P < 0.001 for both). In conclusion, sympathoexcitation increases CV in normal myocardium and modulates activation propagation in peri-infarcted ventricular myocardium. These data demonstrate functional control of arrhythmogenic peri-infarct substrates by sympathetic nerves and in part explain the temporal nature of arrhythmogenesis.NEW & NOTEWORTHY This study demonstrates regional control of conduction velocity in normal hearts by sympathetic nerves. In infarcted hearts, however, not only is modulation of propagation heterogeneous, some regions showed paradoxical conduction slowing. Sympathoexcitation altered propagation in all infarcted hearts studied, and we describe the temporal arrhythmogenic potential of these findings.Listen to this article's corresponding podcast at http://ajpheart.podbean.com/e/sympathetic-nerves-and-cardiac-propagation/.",2017-01-13 +24353242,Recall intervals for oral health in primary care patients.,"

Background

The frequency with which patients should attend for a dental check-up and the potential effects on oral health of altering recall intervals between check-ups have been the subject of ongoing international debate in recent decades. Although recommendations regarding optimal recall intervals vary between countries and dental healthcare systems, six-monthly dental check-ups have traditionally been advocated by general dental practitioners in many developed countries.This is an update of a Cochrane review first published in 2005, and previously updated in 2007.

Objectives

To determine the beneficial and harmful effects of different fixed recall intervals (for example six months versus 12 months) for the following different types of dental check-up: a) clinical examination only; b) clinical examination plus scale and polish; c) clinical examination plus preventive advice; d) clinical examination plus preventive advice plus scale and polish.To determine the relative beneficial and harmful effects between any of these different types of dental check-up at the same fixed recall interval.To compare the beneficial and harmful effects of recall intervals based on clinicians' assessment of patients' disease risk with fixed recall intervals.To compare the beneficial and harmful effects of no recall interval/patient driven attendance (which may be symptomatic) with fixed recall intervals.

Search methods

The following electronic databases were searched: the Cochrane Oral Health Group's Trials Register (to 27 September 2013), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library 2013, Issue 9), MEDLINE via OVID (1946 to 27 September 2013) and EMBASE via OVID (1980 to 27 September 2013). We searched the US National Institutes of Health Trials Register (http://clinicaltrials.gov) and the WHO International Clinical Trials Registry Platform (http://www.who.int/ictrp/en/) for ongoing trials. Reference lists from relevant articles were scanned and the authors of some papers were contacted to identify further trials and obtain additional information. We did not apply any restrictions regarding language or date of publication when searching the electronic databases.

Selection criteria

We included randomised controlled trials (RCTs) assessing the effects of different dental recall intervals.

Data collection and analysis

Two review authors independently assessed the search results against the inclusion criteria of the review, extracted data and carried out risk of bias assessment. We contacted study authors for clarification or further information where necessary and feasible. If we had found more than one study with similar comparisons reporting the same outcomes, we would have combined the studies in a meta-analysis using a random-effects model if there were at least four studies, or a fixed-effect model if there were less than four studies. We expressed the estimate of effect as mean difference with 95% confidence intervals (CIs) for continuous outcomes. We would have used risk ratios with 95% CI for any dichotomous outcomes.

Main results

We included one study that analysed 185 participants. The study compared the effects of a clinical examination every 12 months with a clinical examination every 24 months on the outcomes of caries (decayed, missing, filled surfaces (dmfs/DMFS) increment) and economic cost outcomes (total time used per person). As the study was at high risk of bias, had a small sample size and only included low-risk participants, we rated the quality of the body of evidence for these outcomes as very low.For three to five-year olds with primary teeth, the mean difference (MD) in dmfs increment was -0.90 (95% CI -1.96 to 0.16) in favour of 12-month recall. For 16 to 20-year olds with permanent teeth, the MD in DMFS increment was -0.86 (95% CI -1.75 to 0.03) also in favour of 12-month recall. There is insufficient evidence to determine whether 12 or 24-month recall with clinical examination results in better caries outcomes.For three to five-year olds with primary teeth, the MD in time used by each participant was 10 minutes (95% CI -6.7 to 26.7) in favour of 24-month recall. For 16 to 20-year olds with permanent teeth, the MD was 23.7 minutes (95% CI 4.12 to 43.28) also in favour of 24-month recall. This single study at high risk of bias represents insufficient evidence to determine whether 12 or 24-month recall with clinical examination results in better time/cost outcomes.

Authors' conclusions

There is a very low quality body of evidence from one RCT which is insufficient to draw any conclusions regarding the potential beneficial and harmful effects of altering the recall interval between dental check-ups. There is no evidence to support or refute the practice of encouraging patients to attend for dental check-ups at six-monthly intervals. It is important that high quality RCTs are conducted for the outcomes listed in this review in order to address the objectives of this review.",2013-12-19 +26568623,MIEC-SVM: automated pipeline for protein peptide/ligand interaction prediction.,"

Motivation

MIEC-SVM is a structure-based method for predicting protein recognition specificity. Here, we present an automated MIEC-SVM pipeline providing an integrated and user-friendly workflow for construction and application of the MIEC-SVM models. This pipeline can handle standard amino acids and those with post-translational modifications (PTMs) or small molecules. Moreover, multi-threading and support to Sun Grid Engine (SGE) are implemented to significantly boost the computational efficiency.

Availability and implementation

The program is available at http://wanglab.ucsd.edu/MIEC-SVM CONTACT: : wei-wang@ucsd.edu

Supplementary information

Supplementary data available at Bioinformatics online.",2015-11-14 +26803160,Alloscore: a method for predicting allosteric ligand-protein interactions.,"

Unlabelled

Allosteric ligands have increasingly gained attention as potential therapeutic agents due to their higher target selectivity and lower toxicity compared with classic orthosteric ligands. Despite the great interest in the development of allosteric drugs as a new tactic in drug discovery, the understanding of the ligand-protein interactions underlying allosteric binding represents a key challenge. Herein, we introduce Alloscore, a web server that predicts the binding affinities of allosteric ligand-protein interactions. This method exhibits prominent performance in describing allosteric binding and could be useful in allosteric virtual screening and the structural optimization of allosteric agonists/antagonists.

Availability and implementation

The Alloscore server and tutorials are freely available at http://mdl.shsmu.edu.cn/alloscore

Contact

jian.zhang@sjtu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-22 +26757703,Metatranscriptomic analysis of diverse microbial communities reveals core metabolic pathways and microbiome-specific functionality.,"

Background

Metatranscriptomics is emerging as a powerful technology for the functional characterization of complex microbial communities (microbiomes). Use of unbiased RNA-sequencing can reveal both the taxonomic composition and active biochemical functions of a complex microbial community. However, the lack of established reference genomes, computational tools and pipelines make analysis and interpretation of these datasets challenging. Systematic studies that compare data across microbiomes are needed to demonstrate the ability of such pipelines to deliver biologically meaningful insights on microbiome function.

Results

Here, we apply a standardized analytical pipeline to perform a comparative analysis of metatranscriptomic data from diverse microbial communities derived from mouse large intestine, cow rumen, kimchi culture, deep-sea thermal vent and permafrost. Sequence similarity searches allowed annotation of 19 to 76% of putative messenger RNA (mRNA) reads, with the highest frequency in the kimchi dataset due to its relatively low complexity and availability of closely related reference genomes. Metatranscriptomic datasets exhibited distinct taxonomic and functional signatures. From a metabolic perspective, we identified a common core of enzymes involved in amino acid, energy and nucleotide metabolism and also identified microbiome-specific pathways such as phosphonate metabolism (deep sea) and glycan degradation pathways (cow rumen). Integrating taxonomic and functional annotations within a novel visualization framework revealed the contribution of different taxa to metabolic pathways, allowing the identification of taxa that contribute unique functions.

Conclusions

The application of a single, standard pipeline confirms that the rich taxonomic and functional diversity observed across microbiomes is not simply an artefact of different analysis pipelines but instead reflects distinct environmental influences. At the same time, our findings show how microbiome complexity and availability of reference genomes can impact comprehensive annotation of metatranscriptomes. Consequently, beyond the application of standardized pipelines, additional caution must be taken when interpreting their output and performing downstream, microbiome-specific, analyses. The pipeline used in these analyses along with a tutorial has been made freely available for download from our project website: http://www.compsysbio.org/microbiome .",2016-01-12 +26069265,Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data.,"

Unlabelled

Advances in high-throughput sequencing technologies now allow for large-scale characterization of B cell immunoglobulin (Ig) repertoires. The high germline and somatic diversity of the Ig repertoire presents challenges for biologically meaningful analysis, which requires specialized computational methods. We have developed a suite of utilities, Change-O, which provides tools for advanced analyses of large-scale Ig repertoire sequencing data. Change-O includes tools for determining the complete set of Ig variable region gene segment alleles carried by an individual (including novel alleles), partitioning of Ig sequences into clonal populations, creating lineage trees, inferring somatic hypermutation targeting models, measuring repertoire diversity, quantifying selection pressure, and calculating sequence chemical properties. All Change-O tools utilize a common data format, which enables the seamless integration of multiple analyses into a single workflow.

Availability and implementation

Change-O is freely available for non-commercial use and may be downloaded from http://clip.med.yale.edu/changeo.

Contact

steven.kleinstein@yale.edu.",2015-06-10 +22080546,The International Nucleotide Sequence Database Collaboration.,"The members of the International Nucleotide Sequence Database Collaboration (INSDC; http://www.insdc.org) set out to capture, preserve and present globally comprehensive public domain nucleotide sequence information. The work of the long-standing collaboration includes the provision of data formats, annotation conventions and routine global data exchange. Among the many developments to INSDC resources in 2011 are the newly launched BioProject database and improved handling of assembly information. In this article, we outline INSDC services and update the reader on developments in 2011.",2011-11-12 +22403116,Informatics in radiology: use of CouchDB for document-based storage of DICOM objects.,"Picture archiving and communication systems traditionally have depended on schema-based Structured Query Language (SQL) databases for imaging data management. To optimize database size and performance, many such systems store a reduced set of Digital Imaging and Communications in Medicine (DICOM) metadata, discarding informational content that might be needed in the future. As an alternative to traditional database systems, document-based key-value stores recently have gained popularity. These systems store documents containing key-value pairs that facilitate data searches without predefined schemas. Document-based key-value stores are especially suited to archive DICOM objects because DICOM metadata are highly heterogeneous collections of tag-value pairs conveying specific information about imaging modalities, acquisition protocols, and vendor-supported postprocessing options. The authors used an open-source document-based database management system (Apache CouchDB) to create and test two such databases; CouchDB was selected for its overall ease of use, capability for managing attachments, and reliance on HTTP and Representational State Transfer standards for accessing and retrieving data. A large database was created first in which the DICOM metadata from 5880 anonymized magnetic resonance imaging studies (1,949,753 images) were loaded by using a Ruby script. To provide the usual DICOM query functionality, several predefined ""views"" (standard queries) were created by using JavaScript. For performance comparison, the same queries were executed in both the CouchDB database and a SQL-based DICOM archive. The capabilities of CouchDB for attachment management and database replication were separately assessed in tests of a similar, smaller database. Results showed that CouchDB allowed efficient storage and interrogation of all DICOM objects; with the use of information retrieval algorithms such as map-reduce, all the DICOM metadata stored in the large database were searchable with only a minimal increase in retrieval time over that with the traditional database management system. Results also indicated possible uses for document-based databases in data mining applications such as dose monitoring, quality assurance, and protocol optimization.",2012-03-08 +27695050,Graphlet Based Metrics for the Comparison of Gene Regulatory Networks.,"Understanding the control of gene expression remains one of the main challenges in the post-genomic era. Accordingly, a plethora of methods exists to identify variations in gene expression levels. These variations underlay almost all relevant biological phenomena, including disease and adaptation to environmental conditions. However, computational tools to identify how regulation changes are scarce. Regulation of gene expression is usually depicted in the form of a gene regulatory network (GRN). Structural changes in a GRN over time and conditions represent variations in the regulation of gene expression. Like other biological networks, GRNs are composed of basic building blocks called graphlets. As a consequence, two new metrics based on graphlets are proposed in this work: REConstruction Rate (REC) and REC Graphlet Degree (RGD). REC determines the rate of graphlet similarity between different states of a network and RGD identifies the subset of nodes with the highest topological variation. In other words, RGD discerns how th GRN was rewired. REC and RGD were used to compare the local structure of nodes in condition-specific GRNs obtained from gene expression data of Escherichia coli, forming biofilms and cultured in suspension. According to our results, most of the network local structure remains unaltered in the two compared conditions. Nevertheless, changes reported by RGD necessarily imply that a different cohort of regulators (i.e. transcription factors (TFs)) appear on the scene, shedding light on how the regulation of gene expression occurs when E. coli transits from suspension to biofilm. Consequently, we propose that both metrics REC and RGD should be adopted as a quantitative approach to conduct differential analyses of GRNs. A tool that implements both metrics is available as an on-line web server (http://dlab.cl/loto).",2016-10-03 +25134827,KeyPathwayMiner 4.0: condition-specific pathway analysis by combining multiple omics studies and networks with Cytoscape.,"

Background

Over the last decade network enrichment analysis has become popular in computational systems biology to elucidate aberrant network modules. Traditionally, these approaches focus on combining gene expression data with protein-protein interaction (PPI) networks. Nowadays, the so-called omics technologies allow for inclusion of many more data sets, e.g. protein phosphorylation or epigenetic modifications. This creates a need for analysis methods that can combine these various sources of data to obtain a systems-level view on aberrant biological networks.

Results

We present a new release of KeyPathwayMiner (version 4.0) that is not limited to analyses of single omics data sets, e.g. gene expression, but is able to directly combine several different omics data types. Version 4.0 can further integrate existing knowledge by adding a search bias towards sub-networks that contain (avoid) genes provided in a positive (negative) list. Finally the new release now also provides a set of novel visualization features and has been implemented as an app for the standard bioinformatics network analysis tool: Cytoscape.

Conclusion

With KeyPathwayMiner 4.0, we publish a Cytoscape app for multi-omics based sub-network extraction. It is available in Cytoscape's app store http://apps.cytoscape.org/apps/keypathwayminer or via http://keypathwayminer.mpi-inf.mpg.de.",2014-08-19 +21875157,Gene re-annotation in genome of the extremophile Pyrobaculum aerophilum by using bioinformatics methods.,"In this paper, we re-annotated the genome of Pyrobaculum aerophilum str. IM2, particularly for hypothetical ORFs. The annotation process includes three parts. Firstly and most importantly, 23 new genes, which were missed in the original annotation, are found by combining similarity search and the ab initio gene finding approaches. Among these new genes, five have significant similarities with function-known genes and the rest have significant similarities with hypothetical ORFs contained in other genomes. Secondly, the coding potentials of the 1645 hypothetical ORFs are re-predicted by using 33 Z curve variables combined with Fisher linear discrimination method. With the accuracy being 99.68%, 25 originally annotated hypothetical ORFs are recognized as non-coding by our method. Thirdly, 80 hypothetical ORFs are assigned with potential functions by using similarity search with BLAST program. Re-annotation of the genome will benefit related researches on this hyperthermophilic crenarchaeon. Also, the re-annotation procedure could be taken as a reference for other archaeal genomes. Details of the revised annotation are freely available at http://cobi.uestc.edu.cn/resource/paero/",2011-10-01 +27354702,AutoSite: an automated approach for pseudo-ligands prediction-from ligand-binding sites identification to predicting key ligand atoms.,"

Motivation

The identification of ligand-binding sites from a protein structure facilitates computational drug design and optimization, and protein function assignment. We introduce AutoSite: an efficient software tool for identifying ligand-binding sites and predicting pseudo ligand corresponding to each binding site identified. Binding sites are reported as clusters of 3D points called fills in which every point is labelled as hydrophobic or as hydrogen bond donor or acceptor. From these fills AutoSite derives feature points: a set of putative positions of hydrophobic-, and hydrogen-bond forming ligand atoms.

Results

We show that AutoSite identifies ligand-binding sites with higher accuracy than other leading methods, and produces fills that better matches the ligand shape and properties, than the fills obtained with a software program with similar capabilities, AutoLigand In addition, we demonstrate that for the Astex Diverse Set, the feature points identify 79% of hydrophobic ligand atoms, and 81% and 62% of the hydrogen acceptor and donor hydrogen ligand atoms interacting with the receptor, and predict 81.2% of water molecules mediating interactions between ligand and receptor. Finally, we illustrate potential uses of the predicted feature points in the context of lead optimization in drug discovery projects.

Availability and implementation

http://adfr.scripps.edu/AutoDockFR/autosite.html CONTACT: sanner@scripps.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-26 +27341078,A data-driven network model of primary myelofibrosis: transcriptional and post-transcriptional alterations in CD34+ cells.,"microRNAs (miRNAs) are relevant in the pathogenesis of primary myelofibrosis (PMF) but our understanding is limited to specific target genes and the overall systemic scenario islacking. By both knowledge-based and ab initio approaches for comparative analysis of CD34+ cells of PMF patients and healthy controls, we identified the deregulated pathways involving miRNAs and genes and new transcriptional and post-transcriptional regulatory circuits in PMF cells. These converge in a unique and integrated cellular process, in which the role of specific miRNAs is to wire, co-regulate and allow a fine crosstalk between the involved processes. The PMF pathway includes Akt signaling, linked to Rho GTPases, CDC42, PLD2, PTEN crosstalk with the hypoxia response and Calcium-linked cellular processes connected to cyclic AMP signaling. Nested on the depicted transcriptional scenario, predicted circuits are reported, opening new hypotheses. Links between miRNAs (miR-106a-5p, miR-20b-5p, miR-20a-5p, miR-17-5p, miR-19b-3p and let-7d-5p) and key transcription factors (MYCN, ATF, CEBPA, REL, IRF and FOXJ2) and their common target genes tantalizingly suggest new path to approach the disease. The study provides a global overview of transcriptional and post-transcriptional deregulations in PMF, and, unifying consolidated and predicted data, could be helpful to identify new combinatorial therapeutic strategy. Interactive PMF network model: http://compgen.bio.unipd.it/pmf-net/.",2016-06-24 +25627334,TagDust2: a generic method to extract reads from sequencing data.,"

Background

Arguably the most basic step in the analysis of next generation sequencing data (NGS) involves the extraction of mappable reads from the raw reads produced by sequencing instruments. The presence of barcodes, adaptors and artifacts subject to sequencing errors makes this step non-trivial.

Results

Here I present TagDust2, a generic approach utilizing a library of hidden Markov models (HMM) to accurately extract reads from a wide array of possible read architectures. TagDust2 extracts more reads of higher quality compared to other approaches. Processing of multiplexed single, paired end and libraries containing unique molecular identifiers is fully supported. Two additional post processing steps are included to exclude known contaminants and filter out low complexity sequences. Finally, TagDust2 can automatically detect the library type of sequenced data from a predefined selection.

Conclusion

Taken together TagDust2 is a feature rich, flexible and adaptive solution to go from raw to mappable NGS reads in a single step. The ability to recognize and record the contents of raw reads will help to automate and demystify the initial, and often poorly documented, steps in NGS data analysis pipelines. TagDust2 is freely available at: http://tagdust.sourceforge.net .",2015-01-28 +28097046,Molecular classification of tissue from a transformed non-Hogkin's lymphoma case with unexpected long-time remission.,"

Background

The concept of precision medicine in cancer includes individual molecular studies to predict clinical outcomes. In the present N = 1 case we retrospectively have analysed lymphoma tissue by exome sequencing and global gene expression in a patient with unexpected long-term remission following relaps. The goals were to phenotype the diagnostic and relapsed lymphoma tissue and evaluate its pattern. Furthermore, to identify mutations available for targeted therapy and expression of genes to predict specific drug effects by resistance gene signatures (REGS) for R-CHOP as described at http://www.hemaclass.org. We expected that such a study could generate therapeutic information and a frame for future individual evaluation of molecular resistance detected at clinical relapse.

Case presentation

The patient was diagnosed with a transformed high-grade non-Hodgkin lymphoma stage III and treated with conventional R-CHOP [rituximab (R), cyclophosphamide (C), doxorubicin (H), vincristine (O) and prednisone (P)]. Unfortunately, she suffered from severe toxicity but recovered during the following 6 months' remission until biopsy-verified relapse. The patient refused second-line combination chemotherapy, but accepted 3 months' palliation with R and chlorambucil. Unexpectedly, she obtained continuous complete remission and is at present >9 years after primary diagnosis. Molecular studies and data evaluation by principal component analysis, mutation screening and copy number variations of the primary and relapsed tumor, identified a pattern of branched lymphoma evolution, most likely diverging from an in situ follicular lymphoma. Accordingly, the primary diagnosed transformed lymphoma was classified as a diffuse large B cell lymphoma (DLBCL) of the GCB/centrocytic subtype by ""cell of origin BAGS"" assignment and R sensitive and C, H, O and P resistant by ""drug specific REGS"" assignment. The relapsed DLBCL was classified as NC/memory subtype and R, C, H sensitive but O and P resistant.

Conclusions

Thorough analysis of the tumor DNA and RNA documented a branched evolution of the two clinical diagnosed tFL, most likely transformed from an unknown in situ lymphoma. Classification of the malignant tissue for drug-specific resistance did not explain the unexpected long-term remission and potential cure. However, it is tempting to consider the anti-CD20 immunotherapy as the curative intervention in the two independent tumors of this case.",2017-01-11 +25682068,A novel statistical method for quantitative comparison of multiple ChIP-seq datasets.,"

Motivation

ChIP-seq is a powerful technology to measure the protein binding or histone modification strength in the whole genome scale. Although there are a number of methods available for single ChIP-seq data analysis (e.g. 'peak detection'), rigorous statistical method for quantitative comparison of multiple ChIP-seq datasets with the considerations of data from control experiment, signal to noise ratios, biological variations and multiple-factor experimental designs is under-developed.

Results

In this work, we develop a statistical method to perform quantitative comparison of multiple ChIP-seq datasets and detect genomic regions showing differential protein binding or histone modification. We first detect peaks from all datasets and then union them to form a single set of candidate regions. The read counts from IP experiment at the candidate regions are assumed to follow Poisson distribution. The underlying Poisson rates are modeled as an experiment-specific function of artifacts and biological signals. We then obtain the estimated biological signals and compare them through the hypothesis testing procedure in a linear model framework. Simulations and real data analyses demonstrate that the proposed method provides more accurate and robust results compared with existing ones.

Availability and implementation

An R software package ChIPComp is freely available at http://web1.sph.emory.edu/users/hwu30/software/ChIPComp.html.",2015-02-13 +25971742,GEO2Enrichr: browser extension and server app to extract gene sets from GEO and analyze them for biological functions.,"

Motivation

Identification of differentially expressed genes is an important step in extracting knowledge from gene expression profiling studies. The raw expression data from microarray and other high-throughput technologies is deposited into the Gene Expression Omnibus (GEO) and served as Simple Omnibus Format in Text (SOFT) files. However, to extract and analyze differentially expressed genes from GEO requires significant computational skills.

Results

Here we introduce GEO2Enrichr, a browser extension for extracting differentially expressed gene sets from GEO and analyzing those sets with Enrichr, an independent gene set enrichment analysis tool containing over 70 000 annotated gene sets organized into 75 gene-set libraries. GEO2Enrichr adds JavaScript code to GEO web-pages; this code scrapes user selected accession numbers and metadata, and then, with one click, users can submit this information to a web-server application that downloads the SOFT files, parses, cleans and normalizes the data, identifies the differentially expressed genes, and then pipes the resulting gene lists to Enrichr for downstream functional analysis. GEO2Enrichr opens a new avenue for adding functionality to major bioinformatics resources such GEO by integrating tools and resources without the need for a plug-in architecture. Importantly, GEO2Enrichr helps researchers to quickly explore hypotheses with little technical overhead, lowering the barrier of entry for biologists by automating data processing steps needed for knowledge extraction from the major repository GEO.

Availability and implementation

GEO2Enrichr is an open source tool, freely available for installation as browser extensions at the Chrome Web Store and FireFox Add-ons. Documentation and a browser independent web application can be found at http://amp.pharm.mssm.edu/g2e/.

Contact

avi.maayan@mssm.edu.",2015-05-13 +26454013,Deciphering Genome Content and Evolutionary Relationships of Isolates from the Fungus Magnaporthe oryzae Attacking Different Host Plants.,"Deciphering the genetic bases of pathogen adaptation to its host is a key question in ecology and evolution. To understand how the fungus Magnaporthe oryzae adapts to different plants, we sequenced eight M. oryzae isolates differing in host specificity (rice, foxtail millet, wheat, and goosegrass), and one Magnaporthe grisea isolate specific of crabgrass. Analysis of Magnaporthe genomes revealed small variation in genome sizes (39-43 Mb) and gene content (12,283-14,781 genes) between isolates. The whole set of Magnaporthe genes comprised 14,966 shared families, 63% of which included genes present in all the nine M. oryzae genomes. The evolutionary relationships among Magnaporthe isolates were inferred using 6,878 single-copy orthologs. The resulting genealogy was mostly bifurcating among the different host-specific lineages, but was reticulate inside the rice lineage. We detected traces of introgression from a nonrice genome in the rice reference 70-15 genome. Among M. oryzae isolates and host-specific lineages, the genome composition in terms of frequencies of genes putatively involved in pathogenicity (effectors, secondary metabolism, cazome) was conserved. However, 529 shared families were found only in nonrice lineages, whereas the rice lineage possessed 86 specific families absent from the nonrice genomes. Our results confirmed that the host specificity of M. oryzae isolates was associated with a divergence between lineages without major gene flow and that, despite the strong conservation of gene families between lineages, adaptation to different hosts, especially to rice, was associated with the presence of a small number of specific gene families. All information was gathered in a public database (http://genome.jouy.inra.fr/gemo).",2015-10-09 +26449202,Profiling RNA editing in human tissues: towards the inosinome Atlas.,"Adenine to Inosine RNA editing is a widespread co- and post-transcriptional mechanism mediated by ADAR enzymes acting on double stranded RNA. It has a plethora of biological effects, appears to be particularly pervasive in humans with respect to other mammals, and is implicated in a number of diverse human pathologies. Here we present the first human inosinome atlas comprising 3,041,422 A-to-I events identified in six tissues from three healthy individuals. Matched directional total-RNA-Seq and whole genome sequence datasets were generated and analysed within a dedicated computational framework, also capable of detecting hyper-edited reads. Inosinome profiles are tissue specific and edited gene sets consistently show enrichment of genes involved in neurological disorders and cancer. Overall frequency of editing also varies, but is strongly correlated with ADAR expression levels. The inosinome database is available at: http://srv00.ibbe.cnr.it/editing/.",2015-10-09 +22492647,Bayesian integration of networks without gold standards.,"

Motivation

Biological experiments give insight into networks of processes inside a cell, but are subject to error and uncertainty. However, due to the overlap between the large number of experiments reported in public databases it is possible to assess the chances of individual observations being correct. In order to do so, existing methods rely on high-quality 'gold standard' reference networks, but such reference networks are not always available.

Results

We present a novel algorithm for computing the probability of network interactions that operates without gold standard reference data. We show that our algorithm outperforms existing gold standard-based methods. Finally, we apply the new algorithm to a large collection of genetic interaction and protein-protein interaction experiments.

Availability

The integrated dataset and a reference implementation of the algorithm as a plug-in for the Ondex data integration framework are available for download at http://bio-nexus.ncl.ac.uk/projects/nogold/",2012-04-06 +23071489,SNP-SNP interactions discovered by logic regression explain Crohn's disease genetics.,"In genome-wide association studies (GWAS), the association between each single nucleotide polymorphism (SNP) and a phenotype is assessed statistically. To further explore genetic associations in GWAS, we considered two specific forms of biologically plausible SNP-SNP interactions, 'SNP intersection' and 'SNP union,' and analyzed the Crohn's Disease (CD) GWAS data of the Wellcome Trust Case Control Consortium for these interactions using a limited form of logic regression. We found strong evidence of CD-association for 195 genes, identifying novel susceptibility genes (e.g., ISX, SLCO6A1, TMEM183A) as well as confirming many previously identified susceptibility genes in CD GWAS (e.g., IL23R, NOD2, CYLD, NKX2-3, IL12RB2, ATG16L1). Notably, 37 of the 59 chromosomal locations indicated for CD-association by a meta-analysis of CD GWAS, involving over 22,000 cases and 29,000 controls, were represented in the 195 genes, as well as some chromosomal locations previously indicated only in linkage studies, but not in GWAS. We repeated the analysis with two smaller GWASs from the Database of Genotype and Phenotype (dbGaP): in spite of differences of populations and study power across the three datasets, we observed some consistencies across the three datasets. Notable examples included TMEM183A and SLCO6A1 which exhibited strong evidence consistently in our WTCCC and both of the dbGaP SNP-SNP interaction analyses. Examining these specific forms of SNP interactions could identify additional genetic associations from GWAS. R codes, data examples, and a ReadMe file are available for download from our website: http://www.ualberta.ca/~yyasui/homepage.html.",2012-10-12 +24088197,A comprehensive map of the influenza A virus replication cycle.,"

Background

Influenza is a common infectious disease caused by influenza viruses. Annual epidemics cause severe illnesses, deaths, and economic loss around the world. To better defend against influenza viral infection, it is essential to understand its mechanisms and associated host responses. Many studies have been conducted to elucidate these mechanisms, however, the overall picture remains incompletely understood. A systematic understanding of influenza viral infection in host cells is needed to facilitate the identification of influential host response mechanisms and potential drug targets.

Description

We constructed a comprehensive map of the influenza A virus ('IAV') life cycle ('FluMap') by undertaking a literature-based, manual curation approach. Based on information obtained from publicly available pathway databases, updated with literature-based information and input from expert virologists and immunologists, FluMap is currently composed of 960 factors (i.e., proteins, mRNAs etc.) and 456 reactions, and is annotated with ~500 papers and curation comments. In addition to detailing the type of molecular interactions, isolate/strain specific data are also available. The FluMap was built with the pathway editor CellDesigner in standard SBML (Systems Biology Markup Language) format and visualized as an SBGN (Systems Biology Graphical Notation) diagram. It is also available as a web service (online map) based on the iPathways+ system to enable community discussion by influenza researchers. We also demonstrate computational network analyses to identify targets using the FluMap.

Conclusion

The FluMap is a comprehensive pathway map that can serve as a graphically presented knowledge-base and as a platform to analyze functional interactions between IAV and host factors. Publicly available webtools will allow continuous updating to ensure the most reliable representation of the host-virus interaction network. The FluMap is available at http://www.influenza-x.org/flumap/.",2013-10-02 +22110026,Phytozome: a comparative platform for green plant genomics.,"The number of sequenced plant genomes and associated genomic resources is growing rapidly with the advent of both an increased focus on plant genomics from funding agencies, and the application of inexpensive next generation sequencing. To interact with this increasing body of data, we have developed Phytozome (http://www.phytozome.net), a comparative hub for plant genome and gene family data and analysis. Phytozome provides a view of the evolutionary history of every plant gene at the level of sequence, gene structure, gene family and genome organization, while at the same time providing access to the sequences and functional annotations of a growing number (currently 25) of complete plant genomes, including all the land plants and selected algae sequenced at the Joint Genome Institute, as well as selected species sequenced elsewhere. Through a comprehensive plant genome database and web portal, these data and analyses are available to the broader plant science research community, providing powerful comparative genomics tools that help to link model systems with other plants of economic and ecological importance.",2011-11-22 +27587694,L1 regularization facilitates detection of cell type-specific parameters in dynamical systems.,"

Motivation

A major goal of drug development is to selectively target certain cell types. Cellular decisions influenced by drugs are often dependent on the dynamic processing of information. Selective responses can be achieved by differences between the involved cell types at levels of receptor, signaling, gene regulation or further downstream. Therefore, a systematic approach to detect and quantify cell type-specific parameters in dynamical systems becomes necessary.

Results

Here, we demonstrate that a combination of nonlinear modeling with L1 regularization is capable of detecting cell type-specific parameters. To adapt the least-squares numerical optimization routine to L1 regularization, sub-gradient strategies as well as truncation of proposed optimization steps were implemented. Likelihood-ratio tests were used to determine the optimal regularization strength resulting in a sparse solution in terms of a minimal number of cell type-specific parameters that is in agreement with the data. By applying our implementation to a realistic dynamical benchmark model of the DREAM6 challenge we were able to recover parameter differences with an accuracy of 78%. Within the subset of detected differences, 91% were in agreement with their true value. Furthermore, we found that the results could be improved using the profile likelihood. In conclusion, the approach constitutes a general method to infer an overarching model with a minimum number of individual parameters for the particular models.

Availability and implementation

A MATLAB implementation is provided within the freely available, open-source modeling environment Data2Dynamics. Source code for all examples is provided online at http://www.data2dynamics.org/

Contact

bernhard.steiert@fdm.uni-freiburg.de.",2016-09-01 +27635956,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Management of Patients With Nonfunctioning Pituitary Adenomas: Executive Summary.,"

Background

Nonfunctioning pituitary adenomas (NFPAs) are the most frequent pituitary tumors.

Objective

To create evidence-based guidelines for the initial management of NFPAs.

Methods

A multidisciplinary task force composed of physician volunteers and evidence-based medicine-trained methodologists conducted a systematic review of the literature relevant to the management of NFPAs. To ascertain the class of evidence for the posttreatment follow-ups, the task force used the Clinical Assessment evidence-based classification.

Results

Seven topics of importance were chosen for detailed evaluation. The topics addressed include preoperative evaluation, primary treatment, treatment options for residual tumors after surgery, and postoperative patient management. For preoperative patient evaluation, the guideline task force focused on preoperative imaging, preoperative laboratory evaluation, and preoperative ophthalmologic evaluation. For primary treatment, this guideline addresses surgical resection, medical therapy, radiation therapy, the natural history of untreated tumors, surgical methodologies, such as endoscopy, microscopy, or craniotomy, and intraoperative adjuncts like neuronavigation, cerebrospinal fluid diversion, or intraoperative imaging. For residual tumor treatment, the guideline task force evaluated radiation vs observation. Additional topics addressed in this guideline regarding postoperative patient management include the frequency of postoperative imaging, postoperative endocrine evaluation, and postoperative ophthalmologic evaluation.

Conclusion

Although there is clearly a need for more randomized trials generating higher levels of evidence to help guide physicians managing NFPAs, the existing evidence provided valuable data upon which the guidelines described in the 7 articles generated from this effort are based. The full guidelines document can be located at https://www.cns.org/guidelines/guidelines-management-patients-non-functioning-pituitary-adenomas.

Abbreviation

NFPA, nonfunctioning pituitary adenoma.",2016-10-01 +27656855,Impact of regulatory assessment on clinical studies in Brazil.,"

Introduction

Despite the recent expansion of clinical studies allocated to Brazil, the delay of local regulatory deadlines directly impacts their completion.

Objective

This article examines the allocation process of clinical studies to Brazil in comparison with other countries, as well as the financial impact of studies not completed due to interruption caused by the delay in the regulatory process.

Method

The allocation processes of studies were compared in nine countries with similar stages of economic development and countries in Latin America using the websites http://data.worldbank.org/data-catalog/GDP-rankings-table and http://worldpopulationreview.com and clinicaltrials.gov, comprising 185 countries. The 46 studies sponsored by the pharmaceutical industry underwent an analysis of the regulatory review process.

Results

46 studies sponsored by the industry and submitted in the country between June 2007 and June 2013 were analyzed; 18 (39%) were discontinued due to the delay in obtaining the necessary approvals. For the approved studies, patient recruitment began an average of 11 months after the other countries. It is estimated that 530 Brazilians patients did not have the opportunity to participate in these studies. Financial losses were to the order of 14.6 million dollars for the country, including patient, medication and supplies costs, and expenses.

Conclusion

Brazil has enormous potential for the realization of clinical studies. Researchers, associations of disabled people and patients with chronic diseases, sponsors and the authorities must work together to develop an approval process that is efficient, predictable and, most of all, transparent. The current regulatory environment must and can be improved and optimized in order to result in tangible benefits for patients, society and the country's scientific development.",2016-09-01 +26049161,GeIST: a pipeline for mapping integrated DNA elements.,"

Unlabelled

There are several experimental contexts in which it is important to identify DNA integration sites, such as insertional mutagenesis screens, gene and enhancer trap applications, and gene therapy. We previously developed an assay to identify millions of integrations in multiplexed barcoded samples at base-pair resolution. The sheer amount of data produced by this approach makes the mapping of individual sites non-trivial without bioinformatics support. This article presents the Genomic Integration Site Tracker (GeIST), a command-line pipeline designed to map the integration sites produced by this assay and identify the samples from which they came. GeIST version 2.1.0, a more adaptable version of our original pipeline, can identify integrations of murine leukemia virus, adeno-associated virus, Tol2 transposons or Ac/Ds transposons, and can be adapted for other inserted elements. It has been tested on experimental data for each of these delivery vectors and fine-tuned to account for sequencing and cloning artifacts.

Availability and implementation

GeIST uses a combination of Bash shell scripting and Perl. GeIST is available at http://research.nhgri.nih.gov/software/GeIST/.

Contact

burgess@mail.nih.gov.",2015-06-06 +29854569,The National Library of Medicine Pill Image Recognition Challenge: An Initial Report. ,"In January 2016 the U.S. National Library of Medicine announced a challenge competition calling for the development and discovery of high-quality algorithms and software that rank how well consumer images of prescription pills match reference images of pills in its authoritative RxIMAGE collection. This challenge was motivated by the need to easily identify unknown prescription pills both by healthcare personnel and the general public. Potential benefits of this capability include confirmation of the pill in settings where the documentation and medication have been separated, such as in a disaster or emergency; and confirmation of a pill when the prescribed medication changes from brand to generic, or for any other reason the shape and color of the pill change. The data for the competition consisted of two types of images, high quality macro photographs, reference images, and consumer quality photographs of the quality we expect users of a proposed application to acquire. A training dataset consisting of 2000 reference images and 5000 corresponding consumer quality images acquired from 1000 pills was provided to challenge participants. A second dataset acquired from 1000 pills with similar distributions of shape and color was reserved as a segregated testing set. Challenge submissions were required to produce a ranking of the reference images, given a consumer quality image as input. Determination of the winning teams was done using the mean average precision quality metric, with the three winners obtaining mean average precision scores of 0.27, 0.09, and 0.08. In the retrieval results, the correct image was amongst the top five ranked images 43%, 12%, and 11% of the time, out of 5000 query/consumer images. This is an initial promising step towards development of an NLM software system and application-programming interface facilitating pill identification. The training dataset will continue to be freely available online at: http://pir.nlm.nih.gov/challenge/submission.html.",2016-10-01 +25202135,Performance evaluation of DNA copy number segmentation methods.,"A number of bioinformatic or biostatistical methods are available for analyzing DNA copy number profiles measured from microarray or sequencing technologies. In the absence of rich enough gold standard data sets, the performance of these methods is generally assessed using unrealistic simulation studies, or based on small real data analyses. To make an objective and reproducible performance assessment, we have designed and implemented a framework to generate realistic DNA copy number profiles of cancer samples with known truth. These profiles are generated by resampling publicly available SNP microarray data from genomic regions with known copy-number state. The original data have been extracted from dilutions series of tumor cell lines with matched blood samples at several concentrations. Therefore, the signal-to-noise ratio of the generated profiles can be controlled through the (known) percentage of tumor cells in the sample. This article describes this framework and its application to a comparison study between methods for segmenting DNA copy number profiles from SNP microarrays. This study indicates that no single method is uniformly better than all others. It also helps identifying pros and cons of the compared methods as a function of biologically informative parameters, such as the fraction of tumor cells in the sample and the proportion of heterozygous markers. This comparison study may be reproduced using the open source and cross-platform R package jointseg, which implements the proposed data generation and evaluation framework: http://r-forge.r-project.org/R/?group_id=1562.",2014-09-08 +25161252,Integration of molecular network data reconstructs Gene Ontology.,"

Motivation

Recently, a shift was made from using Gene Ontology (GO) to evaluate molecular network data to using these data to construct and evaluate GO. Dutkowski et al. provide the first evidence that a large part of GO can be reconstructed solely from topologies of molecular networks. Motivated by this work, we develop a novel data integration framework that integrates multiple types of molecular network data to reconstruct and update GO. We ask how much of GO can be recovered by integrating various molecular interaction data.

Results

We introduce a computational framework for integration of various biological networks using penalized non-negative matrix tri-factorization (PNMTF). It takes all network data in a matrix form and performs simultaneous clustering of genes and GO terms, inducing new relations between genes and GO terms (annotations) and between GO terms themselves. To improve the accuracy of our predicted relations, we extend the integration methodology to include additional topological information represented as the similarity in wiring around non-interacting genes. Surprisingly, by integrating topologies of bakers' yeasts protein-protein interaction, genetic interaction (GI) and co-expression networks, our method reports as related 96% of GO terms that are directly related in GO. The inclusion of the wiring similarity of non-interacting genes contributes 6% to this large GO term association capture. Furthermore, we use our method to infer new relationships between GO terms solely from the topologies of these networks and validate 44% of our predictions in the literature. In addition, our integration method reproduces 48% of cellular component, 41% of molecular function and 41% of biological process GO terms, outperforming the previous method in the former two domains of GO. Finally, we predict new GO annotations of yeast genes and validate our predictions through GIs profiling.

Availability and implementation

Supplementary Tables of new GO term associations and predicted gene annotations are available at http://bio-nets.doc.ic.ac.uk/GO-Reconstruction/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +26370657,Ambient Particulate Matter Air Pollution Exposure and Mortality in the NIH-AARP Diet and Health Cohort.,"

Background

Outdoor fine particulate matter (≤ 2.5 μm; PM2.5) has been identified as a global health threat, but the number of large U.S. prospective cohort studies with individual participant data remains limited, especially at lower recent exposures.

Objectives

We aimed to test the relationship between long-term exposure PM2.5 and death risk from all nonaccidental causes, cardiovascular (CVD), and respiratory diseases in 517,041 men and women enrolled in the National Institutes of Health-AARP cohort.

Methods

Individual participant data were linked with residence PM2.5 exposure estimates across the continental United States for a 2000-2009 follow-up period when matching census tract-level PM2.5 exposure data were available. Participants enrolled ranged from 50 to 71 years of age, residing in six U.S. states and two cities. Cox proportional hazard models yielded hazard ratio (HR) estimates per 10 μg/m3 of PM2.5 exposure.

Results

PM2.5 exposure was significantly associated with total mortality (HR = 1.03; 95% CI: 1.00, 1.05) and CVD mortality (HR = 1.10; 95% CI: 1.05, 1.15), but the association with respiratory mortality was not statistically significant (HR = 1.05; 95% CI: 0.98, 1.13). A significant association was found with respiratory mortality only among never smokers (HR = 1.27; 95% CI: 1.03, 1.56). Associations with 10-μg/m3 PM2.5 exposures in yearly participant residential annual mean, or in metropolitan area-wide mean, were consistent with baseline exposure model results. Associations with PM2.5 were similar when adjusted for ozone exposures. Analyses of California residents alone also yielded statistically significant PM2.5 mortality HRs for total and CVD mortality.

Conclusions

Long-term exposure to PM2.5 air pollution was associated with an increased risk of total and CVD mortality, providing an independent test of the PM2.5-mortality relationship in a new large U.S. prospective cohort experiencing lower post-2000 PM2.5 exposure levels.

Citation

Thurston GD, Ahn J, Cromar KR, Shao Y, Reynolds HR, Jerrett M, Lim CC, Shanley R, Park Y, Hayes RB. 2016. Ambient particulate matter air pollution exposure and mortality in the NIH-AARP Diet and Health cohort. Environ Health Perspect 124:484-490; http://dx.doi.org/10.1289/ehp.1509676.",2015-09-15 +27587696,A weighted exact test for mutually exclusive mutations in cancer.,"

Motivation

The somatic mutations in the pathways that drive cancer development tend to be mutually exclusive across tumors, providing a signal for distinguishing driver mutations from a larger number of random passenger mutations. This mutual exclusivity signal can be confounded by high and highly variable mutation rates across a cohort of samples. Current statistical tests for exclusivity that incorporate both per-gene and per-sample mutational frequencies are computationally expensive and have limited precision.

Results

We formulate a weighted exact test for assessing the significance of mutual exclusivity in an arbitrary number of mutational events. Our test conditions on the number of samples with a mutation as well as per-event, per-sample mutation probabilities. We provide a recursive formula to compute P-values for the weighted test exactly as well as a highly accurate and efficient saddlepoint approximation of the test. We use our test to approximate a commonly used permutation test for exclusivity that conditions on per-event, per-sample mutation frequencies. However, our test is more efficient and it recovers more significant results than the permutation test. We use our Weighted Exclusivity Test (WExT) software to analyze hundreds of colorectal and endometrial samples from The Cancer Genome Atlas, which are two cancer types that often have extremely high mutation rates. On both cancer types, the weighted test identifies sets of mutually exclusive mutations in cancer genes with fewer false positives than earlier approaches.

Availability and implementation

See http://compbio.cs.brown.edu/projects/wext for software.

Contact

braphael@cs.brown.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +28376262,The effects of iterative reconstruction and kernel selection on quantitative computed tomography measures of lung density.,"

Purpose

To determine the effects of iterative reconstruction (IR) and high-frequency kernels on quantitative computed tomography (qCT) density measures at reduced X-ray dose.

Materials and methods

The COPDGene 2 Phantom (CTP 698, The Phantom Laboratory, Salem, NY) with four embedded lung mimicking foam densities (12lb, 20lb, and 4lb), as well as water, air, and acrylic reference inserts, was imaged using a GE 64 slice CT750 HD scanner in helical mode with four current-time products ranging from 12 to 100 mAs. The raw acquired data were reconstructed using standard (STD - low frequency) and Bone (high frequency) kernels with filtered back projection (FBP), 100% ASiR, and Veo reconstruction algorithms. The reference density inserts were manually segmented using Slicer3D (www.slicer.org), and the mean, standard deviation, and histograms of the segmented regions were generated using Fiji (http://fiji.sc/Fiji) for each reconstruction. Measurements of threshold values placed on the cumulative frequency distribution of voxels determined by these measured histograms at 5%, PD5phant , and 15%, PD15phant , (analogous to the relative area below -950 HU (RA-950) and percent density 15 (PD15) in human lung emphysema quantification, respectively), were also performed.

Results

The use of high-resolution kernels in conjunction with ASiR and Veo did not significantly affect the mean Hounsfield units (HU) of each of the density standards (< 4 HU deviation) and current-time products within the phantom when compared with the STD+FBP reconstruction conventionally used in clinical applications. A truncation of the scanner reported HU values at -1024 that shifts the mean toward more positive values was found to cause a systematic error in lower attenuating regions. Use of IR drove convergence toward the mean of measured histograms (~100-137% increase in the number measured voxels at the mean of the histogram), while the combination of Bone+ASiR preserved the standard deviation of HU values about the mean compared to STD+FBP, with the added effect of improved spatial resolution and accuracy in airway measures. PD5phant and PD15phant were most similar between the Bone+ASiR and STD+FBP in all regions except those affected by the -1024 truncation artifact.

Conclusions

Extension of the scanner reportable HU values below the present limit of -1024 will mitigate discrepancies found in qCT lung densitometry in low-density regions. The density histogram became more sharply peaked, and standard deviation was reduced for IR, directly effecting density thresholds, PD5phant and PD15phant, placed on the cumulative frequency distribution of each region in the phantom, which serve as analogs to RA-950 and PD15 typically used in lung density quantitation. The combination of high-frequency kernels (Bone) with ASiR mitigates this effect and preserves density measures derived from the image histogram. Moreover, previous studies have shown improved accuracy of qCT airway measures of wall thickness (WT) and wall area percentage (WA%) when using high-frequency kernels in combination with ASiR to better represent airway walls. The results therefore suggest an IR approach for accurate assessment of airway and parenchymal density measures in the lungs.",2017-05-12 +26450965,CircNet: a database of circular RNAs derived from transcriptome sequencing data.,"Circular RNAs (circRNAs) represent a new type of regulatory noncoding RNA that only recently has been identified and cataloged. Emerging evidence indicates that circRNAs exert a new layer of post-transcriptional regulation of gene expression. In this study, we utilized transcriptome sequencing datasets to systematically identify the expression of circRNAs (including known and newly identified ones by our pipeline) in 464 RNA-seq samples, and then constructed the CircNet database (http://circnet.mbc.nctu.edu.tw/) that provides the following resources: (i) novel circRNAs, (ii) integrated miRNA-target networks, (iii) expression profiles of circRNA isoforms, (iv) genomic annotations of circRNA isoforms (e.g. 282 948 exon positions), and (v) sequences of circRNA isoforms. The CircNet database is to our knowledge the first public database that provides tissue-specific circRNA expression profiles and circRNA-miRNA-gene regulatory networks. It not only extends the most up to date catalog of circRNAs but also provides a thorough expression analysis of both previously reported and novel circRNAs. Furthermore, it generates an integrated regulatory network that illustrates the regulation between circRNAs, miRNAs and genes.",2015-10-07 +26450966,Intrastrand triplex DNA repeats in bacteria: a source of genomic instability.,"Repetitive nucleic acid sequences are often prone to form secondary structures distinct from B-DNA. Prominent examples of such structures are DNA triplexes. We observed that certain intrastrand triplex motifs are highly conserved and abundant in prokaryotic genomes. A systematic search of 5246 different prokaryotic plasmids and genomes for intrastrand triplex motifs was conducted and the results summarized in the ITxF database available online at http://bioinformatics.uni-konstanz.de/utils/ITxF/. Next we investigated biophysical and biochemical properties of a particular G/C-rich triplex motif (TM) that occurs in many copies in more than 260 bacterial genomes by CD and nuclear magnetic resonance spectroscopy as well as in vivo footprinting techniques. A characterization of putative properties and functions of these unusually frequent nucleic acid motifs demonstrated that the occurrence of the TM is associated with a high degree of genomic instability. TM-containing genomic loci are significantly more rearranged among closely related Escherichia coli strains compared to control sites. In addition, we found very high frequencies of TM motifs in certain Enterobacteria and Cyanobacteria that were previously described as genetically highly diverse. In conclusion we link intrastrand triplex motifs with the induction of genomic instability. We speculate that the observed instability might be an adaptive feature of these genomes that creates variation for natural selection to act upon.",2015-10-07 +27446133,Mining Functional Modules in Heterogeneous Biological Networks Using Multiplex PageRank Approach.,"Identification of functional modules/sub-networks in large-scale biological networks is one of the important research challenges in current bioinformatics and systems biology. Approaches have been developed to identify functional modules in single-class biological networks; however, methods for systematically and interactively mining multiple classes of heterogeneous biological networks are lacking. In this paper, we present a novel algorithm (called mPageRank) that utilizes the Multiplex PageRank approach to mine functional modules from two classes of biological networks. We demonstrate the capabilities of our approach by successfully mining functional biological modules through integrating expression-based gene-gene association networks and protein-protein interaction networks. We first compared the performance of our method with that of other methods using simulated data. We then applied our method to identify the cell division cycle related functional module and plant signaling defense-related functional module in the model plant Arabidopsis thaliana. Our results demonstrated that the mPageRank method is effective for mining sub-networks in both expression-based gene-gene association networks and protein-protein interaction networks, and has the potential to be adapted for the discovery of functional modules/sub-networks in other heterogeneous biological networks. The mPageRank executable program, source code, the datasets and results of the presented two case studies are publicly and freely available at http://plantgrn.noble.org/MPageRank/.",2016-06-22 +27527821,State of the Art of Chromosome 18-Centric HPP in 2016: Transcriptome and Proteome Profiling of Liver Tissue and HepG2 Cells.,"A gene-centric approach was applied for a large-scale study of expression products of a single chromosome. Transcriptome profiling of liver tissue and HepG2 cell line was independently performed using two RNA-Seq platforms (SOLiD and Illumina) and also by Droplet Digital PCR (ddPCR) and quantitative RT-PCR. Proteome profiling was performed using shotgun LC-MS/MS as well as selected reaction monitoring with stable isotope-labeled standards (SRM/SIS) for liver tissue and HepG2 cells. On the basis of SRM/SIS measurements, protein copy numbers were estimated for the Chromosome 18 (Chr 18) encoded proteins in the selected types of biological material. These values were compared with expression levels of corresponding mRNA. As a result, we obtained information about 158 and 142 transcripts for HepG2 cell line and liver tissue, respectively. SRM/SIS measurements and shotgun LC-MS/MS allowed us to detect 91 Chr 18-encoded proteins in total, while an intersection between the HepG2 cell line and liver tissue proteomes was ∼66%. In total, there were 16 proteins specifically observed in HepG2 cell line, while 15 proteins were found solely in the liver tissue. Comparison between proteome and transcriptome revealed a poor correlation (R2 ≈ 0.1) between corresponding mRNA and protein expression levels. The SRM and shotgun data sets (obtained during 2015-2016) are available in PASSEL (PASS00697) and ProteomeExchange/PRIDE (PXD004407). All measurements were also uploaded into the in-house Chr 18 Knowledgebase at http://kb18.ru/protein/matrix/416126 .",2016-08-29 +27140084,Optical constants of e-beam-deposited zirconium dioxide measured in the 55-150 Å wavelength region using the reflectivity technique.,"In the present study, optical constants of e-beam-deposited zirconium dioxide (ZrO2) thin film are determined in the 55-150 Å soft x-ray wavelength region using the angle-dependent reflectivity technique. Soft x-ray reflectivity measurements are carried out using the reflectivity beamline at the Indus-1 synchrotron radiation source. Derived optical constants (δ and β) are compared with the tabulated values of Henke et al. [http://henke.lbl.gov/optical_constants/asf.html]. It is found that the measured δ values are consistently lower than the tabulated bulk values in the 70-150 Å wavelength region. In this region, the delta values are lower by 19%-24% from the tabulated data. Below the Zr M4 edge (66.3 Å), a deviation in delta values is found as ∼2%-21%. These changes are attributed to growth-related defects (oxygen and voids) and variation in film stoichiometry. To the best of our knowledge, the present study gives the first reported experimental values of optical constants for ZrO2 in the 55-150 Å wavelength region.",2016-04-01 +27174931,rMAPS: RNA map analysis and plotting server for alternative exon regulation.,"RNA-binding proteins (RBPs) play a critical role in the regulation of alternative splicing (AS), a prevalent mechanism for generating transcriptomic and proteomic diversity in eukaryotic cells. Studies have shown that AS can be regulated by RBPs in a binding-site-position dependent manner. Depending on where RBPs bind, splicing of an alternative exon can be enhanced or suppressed. Therefore, spatial analyses of RBP motifs and binding sites around alternative exons will help elucidate splicing regulation by RBPs. The development of high-throughput sequencing technologies has allowed transcriptome-wide analyses of AS and RBP-RNA interactions. Given a set of differentially regulated alternative exons obtained from RNA sequencing (RNA-seq) experiments, the rMAPS web server (http://rmaps.cecsresearch.org) performs motif analyses of RBPs in the vicinity of alternatively spliced exons and creates RNA maps that depict the spatial patterns of RBP motifs. Similarly, rMAPS can also perform spatial analyses of RBP-RNA binding sites identified by cross-linking immunoprecipitation sequencing (CLIP-seq) experiments. We anticipate rMAPS will be a useful tool for elucidating RBP regulation of alternative exon splicing using high-throughput sequencing data.",2016-05-12 +25078398,Basic4Cseq: an R/Bioconductor package for analyzing 4C-seq data.,"

Summary

Basic4Cseq is an R/Bioconductor package for basic filtering, analysis and subsequent near-cis visualization of 4C-seq data. The package processes aligned 4C-seq raw data stored in binary alignment/map (BAM) format and maps the short reads to a corresponding virtual fragment library. Functions are included to create virtual fragment libraries providing chromosome position and further information on 4C-seq fragments (length and uniqueness of the fragment ends, and blindness of a fragment) for any BSGenome package. An optional filter is included for BAM files to remove invalid 4C-seq reads, and further filter functions are offered for 4C-seq fragments. Additionally, basic quality controls based on the read distribution are included. Fragment data in the vicinity of the experiment's viewpoint are visualized as coverage plot based on a running median approach and a multi-scale contact profile. Wig files or csv files of the fragment data can be exported for further analyses and visualizations of interactions with other programs.

Availability and implementation

Basic4Cseq is implemented in R and available at http://www.bioconductor.org/. A vignette with detailed descriptions of the functions is included in the package.

Contact

Carolin.Walter@uni-muenster.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-30 +27153690,ORFanFinder: automated identification of taxonomically restricted orphan genes.,"

Motivation

Orphan genes, also known as ORFans, are newly evolved genes in a genome that enable the organism to adapt to specific living environment. The gene content of every sequenced genome can be classified into different age groups, based on how widely/narrowly a gene's homologs are distributed in the context of species taxonomy. Those having homologs restricted to organisms of particular taxonomic ranks are classified as taxonomically restricted ORFans.

Results

Implementing this idea, we have developed an open source program named ORFanFinder and a free web server to allow automated classification of a genome's gene content and identification of ORFans at different taxonomic ranks. ORFanFinder and its web server will contribute to the comparative genomics field by facilitating the study of the origin of new genes and the emergence of lineage-specific traits in both prokaryotes and eukaryotes.

Availability and implementation

http://cys.bios.niu.edu/orfanfinder

Contact

yyin@niu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-07 +27486818,DNA-damage related genes and clinical outcome in hormone receptor positive breast cancer.,"Control of DNA damage is frequently deregulated in solid tumors. Upregulation of genes within this process can be indicative of a more aggressive phenotype and linked with worse outcome. In the present article we identify DNA damage related genes associated with worse outcome in breast cancer.2286 genes were differentially expressed between normal breast tissue and basal-like tumors, and 62 included in the DNA metabolic process function. Expression of RAD51, GINS1, TRIP13 and MCM2 were associated with detrimental relapse free survival (RFS) and overall survival (OS) in luminal tumors. The combined analyses of TRIP13+RAD51+MCM2 showed the worse association for RFS (HR 2.25 (1.51-3.35) log rank p= 4.1e-05) and TRIP13+RAD51 for OS (HR 5.13 (0.6-44.17) log rank p=0.098) in ER+/HER2- tumors. TRIP13 is amplified in 3.1% of breast cancers.Transcriptomic analyses using public datasets evaluating expression values between normal breast tissue and TNBC identified upregulated genes. Genes included in the DNA metabolic process were selected and confirmed using data contained at oncomine (www.oncomine.org). Evaluation of the selected genes with RFS and OS was performed using the KM Plotter Online Tool (http://www.kmplot.com). Evaluation of molecular alterations was performed using cBioportal (www.cbioportal.org).Expression of DNA metabolic related genes RAD51, GINS1, TRIP13 and MCM2 are associated with poor outcome. Combinations of some of these genes are linked to poor RFS or OS in luminal A, B and ER+HER2- tumors. Evaluation of its predictive capacity in prospective studies is required.",2016-07-28 +26437953,Prognostic factors and genes associated with endometrial cancer based on gene expression profiling by bioinformatics analysis.,"

Background

Endometrial cancer (EC) is the most prevalent malignancy worldwide. Although several efforts had been made to explore the molecular mechanism responsible for EC progression, it is still not fully understood.

Aim of the study

To evaluate the clinical characteristics and prognostic factors of patients with EC, and further to search for novel genes associated with EC progression.

Methods

We recruited 328 patients with EC and analyzed prognostic factors using Cox proportional hazard regression model. Further, a gene expression profile of EC was used to identify the differentially expressed genes (DEGs) between normal samples and tumor samples. Subsequently, Kyoto Encyclopedia of Genes and Genomes pathway enrichment analysis ( http://www.genome.jp/kegg/ ) for DEGs were performed, and then protein-protein interaction (PPI) network of DEGs as well as the subnetwork of PPI were constructed with plug-in, MCODE by mapping DEGs into the Search Tool for the Retrieval of Interacting Genes database.

Results

Our results showed that body mass index (BMI), hypertension, myometrial invasion, pathological type, and Glut4 positive expression were prognostic factors in EC (P < 0.05). Bioinformatics analysis showed that upregulated DEGs were associated with cell cycle, and downregulated DEGs were related to MAPK pathway. Meanwhile, PPI network analysis revealed that upregulated CDK1 and CCNA2 as well as downregulated JUN and FOS were listed in top two nodes with high degrees.

Conclusions

Patients with EC should be given more focused attentions in respect of pathological type, BMI, hypertension, and Glut4-positive expression. In addition, CDK1, CCNA2, JUN, and FOS might play important roles in EC development.",2015-10-05 +27408922,Data on expression of lipoxygenases-5 and -12 in the normal and acetaminophen-damaged liver.,"Here we present additional data on the expression of lipoxygenases -5 and -12 in the normal and acetaminophen-damaged liver, which are associated with our manuscript recently published in Chemico-Biological Interactions on lipid metabolism and eicosanoid signaling pathways involved in acetaminophen-induced liver damage in a mouse model (http://dx.doi.org/10.1016/j.cbi.2015.10.019 [1]). It has been demonstrated that the expression of lipoxygenase-5 and leukotriene formation are increased in the livers of rats with carbon tetrachloride (CCl4)-induced cirrhosis (http://dx.doi.org/10.1053/gast.2000.17831 [2]). In addition, the lipoxygenase-12 is known to be expressed in the resident macrophage population of the liver (http://dx.doi.org/10.1016/S0014-5793(99)00396-8 [3]). Mice were injected with acetaminophen, and at 48 h their livers were processed for immunohistochemistry with anti-mouse lipoxygenase-5 and -12 antibodies. At the same time point, the RNA was also extracted from the liver to assess the expression of lipoxygenase-5 and -12 genes via qPCR analysis. Our results show that lipoxygenase-5 expression, but not that of lipoxygenase-12, changes significantly in the acetominophen-damaged liver.",2016-03-31 +26227146,ClicO FS: an interactive web-based service of Circos.,"

Unlabelled

: We present ClicO Free Service, an online web-service based on Circos, which provides a user-friendly, interactive web-based interface with configurable features to generate Circos circular plots.

Availability and implementation

Online web-service is freely available at http://clicofs.codoncloud.com

Contact

: soonjoo.yap@codongenomics.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-29 +23325621,CRAVAT: cancer-related analysis of variants toolkit.,"

Summary

Advances in sequencing technology have greatly reduced the costs incurred in collecting raw sequencing data. Academic laboratories and researchers therefore now have access to very large datasets of genomic alterations but limited time and computational resources to analyse their potential biological importance. Here, we provide a web-based application, Cancer-Related Analysis of Variants Toolkit, designed with an easy-to-use interface to facilitate the high-throughput assessment and prioritization of genes and missense alterations important for cancer tumorigenesis. Cancer-Related Analysis of Variants Toolkit provides predictive scores for germline variants, somatic mutations and relative gene importance, as well as annotations from published literature and databases. Results are emailed to users as MS Excel spreadsheets and/or tab-separated text files.

Availability

http://www.cravat.us/",2013-01-16 +22290800,"The HUPO initiative on Model Organism Proteomes, iMOP.","The community working on model organisms is growing steadily and the number of model organisms for which proteome data are being generated is continuously increasing. To standardize efforts and to make optimal use of proteomics data acquired from model organisms, a new Human Proteome Organisation (HUPO) initiative on model organism proteomes (iMOP) was approved at the HUPO Ninth Annual World Congress in Sydney, 2010. iMOP will seek to stimulate scientific exchange and disseminate HUPO best practices. The needs of model organism researchers for central databases will be better represented, catalyzing the integration of proteomics and organism-specific databases. Full details of iMOP activities, members, tools and resources can be found at our website http://www.imop.uzh.ch/ and new members are invited to join us.",2012-02-01 +24341535,VTCdb: a gene co-expression database for the crop species Vitis vinifera (grapevine).,"

Background

Gene expression datasets in model plants such as Arabidopsis have contributed to our understanding of gene function and how a single underlying biological process can be governed by a diverse network of genes. The accumulation of publicly available microarray data encompassing a wide range of biological and environmental conditions has enabled the development of additional capabilities including gene co-expression analysis (GCA). GCA is based on the understanding that genes encoding proteins involved in similar and/or related biological processes may exhibit comparable expression patterns over a range of experimental conditions, developmental stages and tissues. We present an open access database for the investigation of gene co-expression networks within the cultivated grapevine, Vitis vinifera.

Description

The new gene co-expression database, VTCdb (http://vtcdb.adelaide.edu.au/Home.aspx), offers an online platform for transcriptional regulatory inference in the cultivated grapevine. Using condition-independent and condition-dependent approaches, grapevine co-expression networks were constructed using the latest publicly available microarray datasets from diverse experimental series, utilising the Affymetrix Vitis vinifera GeneChip (16 K) and the NimbleGen Grape Whole-genome microarray chip (29 K), thus making it possible to profile approximately 29,000 genes (95% of the predicted grapevine transcriptome). Applications available with the online platform include the use of gene names, probesets, modules or biological processes to query the co-expression networks, with the option to choose between Affymetrix or Nimblegen datasets and between multiple co-expression measures. Alternatively, the user can browse existing network modules using interactive network visualisation and analysis via CytoscapeWeb. To demonstrate the utility of the database, we present examples from three fundamental biological processes (berry development, photosynthesis and flavonoid biosynthesis) whereby the recovered sub-networks reconfirm established plant gene functions and also identify novel associations.

Conclusions

Together, we present valuable insights into grapevine transcriptional regulation by developing network models applicable to researchers in their prioritisation of gene candidates, for on-going study of biological processes related to grapevine development, metabolism and stress responses.",2013-12-16 +24945300,Methy-Pipe: an integrated bioinformatics pipeline for whole genome bisulfite sequencing data analysis.,"DNA methylation, one of the most important epigenetic modifications, plays a crucial role in various biological processes. The level of DNA methylation can be measured using whole-genome bisulfite sequencing at single base resolution. However, until now, there is a paucity of publicly available software for carrying out integrated methylation data analysis. In this study, we implemented Methy-Pipe, which not only fulfills the core data analysis requirements (e.g. sequence alignment, differential methylation analysis, etc.) but also provides useful tools for methylation data annotation and visualization. Specifically, it uses Burrow-Wheeler Transform (BWT) algorithm to directly align bisulfite sequencing reads to a reference genome and implements a novel sliding window based approach with statistical methods for the identification of differentially methylated regions (DMRs). The capability of processing data parallelly allows it to outperform a number of other bisulfite alignment software packages. To demonstrate its utility and performance, we applied it to both real and simulated bisulfite sequencing datasets. The results indicate that Methy-Pipe can accurately estimate methylation densities, identify DMRs and provide a variety of utility programs for downstream methylation data analysis. In summary, Methy-Pipe is a useful pipeline that can process whole genome bisulfite sequencing data in an efficient, accurate, and user-friendly manner. Software and test dataset are available at http://sunlab.lihs.cuhk.edu.hk/methy-pipe/.",2014-06-19 +26484233,Gene expression profiling by high throughput sequencing to determine signatures for the bovine receptive uterus at early gestation.,"The uterus plays a central role among the reproductive tissues in the context of early embryo-maternal communication and a successful pregnancy depends on a complex series of endometrial molecular and cellular events. The factors responsible for the initial interaction between maternal and embryonic tissues, leading to the establishment of pregnancy, remain poorly understood. In this context, Illumina's next-generation sequencing technology has been used to discover the uterine transcriptome signature that is favourable for ongoing pregnancy. More specifically, the present report documents on a retrospective in vivo study in which data on pregnancy outcome were linked to uterine gene expression signatures on day 6 (bovine model). Using the RNA-Seq method, 14.654 reference genes were effectively analysed for differential expression between pregnant and non-pregnant uterine tissue. Transcriptome data revealed that 216 genes were differently expressed when comparing uterine tissue from pregnant and non-pregnant cows. All read sequences were deposited in the Sequence Read Archive (SRA) of the NCBI (http://www.ncbi.nlm.nih.gov/sra). An overview of the gene expression data has been deposited in NCBI's Gene Expression Omnibus (GEO) and is accessible through GEO Series accession number GSE65117. This allows the research community to enhance reproducibility and allows for new discoveries by comparing datasets of signatures linked to receptivity and/or pregnancy success. The resulting information can serve as tool to identify valuable and urgently needed biomarkers for scoring maternal receptivity and even for accurate detection of early pregnancy, which is a matter of cross-species interest. Beyond gene expression analysis as a marker tool, the RNA-Seq information on pregnant uterine tissue can be used to gain novel mechanistic insights, such as by identifying alternative splicing events, allele-specific expression, and rare and novel transcripts that might be involved in the onset of maternal receptivity. This concept is unique and provides a new approach towards strategies that are highly needed to improve efficiency of fertility treatments.",2015-06-03 +24021981,PPR proteins of green algae.,"Using the repeat finding algorithm FT-Rep, we have identified 154 pentatricopeptide repeat (PPR) proteins in nine fully sequenced genomes from green algae (with a total of 1201 repeats) and grouped them in 47 orthologous groups. All data are available in a database, PPRdb, accessible online at http://giavap-genomes.ibpc.fr/ppr. Based on phylogenetic trees generated from the repeats, we propose evolutionary scenarios for PPR proteins. Two PPRs are clearly conserved in the entire green lineage: MRL1 is a stabilization factor for the rbcL mRNA, while HCF152 binds in plants to the psbH-petB intergenic region. MCA1 (the stabilization factor for petA) and PPR7 (a short PPR also acting on chloroplast mRNAs) are conserved across the entire Chlorophyta. The other PPRs are clade-specific, with evidence for gene losses, duplications, and horizontal transfer. In some PPR proteins, an additional domain found at the C terminus provides clues as to possible functions. PPR19 and PPR26 possess a methyltransferase_4 domain suggesting involvement in RNA guanosine methylation. PPR18 contains a C-terminal CBS domain, similar to the CBSPPR1 protein found in nucleoids. PPR16, PPR29, PPR37, and PPR38 harbor a SmR (MutS-related) domain similar to that found in land plants pTAC2, GUN1, and SVR7. The PPR-cyclins PPR3, PPR4, and PPR6, in addition, contain a cyclin domain C-terminal to their SmR domain. PPR31 is an unusual PPR-cyclin containing at its N terminus an OctotricoPeptide Repeat (OPR) and a RAP domain. We consider the possibility that PPR proteins with a SmR domain can introduce single-stranded nicks in the plastid chromosome.",2013-08-28 +27466623,NCMine: Core-peripheral based functional module detection using near-clique mining.,"

Motivation

The identification of functional modules from protein-protein interaction (PPI) networks is an important step toward understanding the biological features of PPI networks. The detection of functional modules in PPI networks is often performed by identifying internally densely connected subnetworks, and often produces modules with ""core"" and ""peripheral"" proteins. The core proteins are the ones having dense connections to each other in a module. The difference between core and peripheral proteins is important to understand the functional roles of proteins in modules, but there are few methods to explicitly elucidate the internal structure of functional modules at gene level.

Results

We propose NCMine, which is a novel network clustering method and visualization tool for the core-peripheral structure of functional modules. It extracts near-complete subgraphs from networks based on a node-weighting scheme using degree centrality, and reports subgroups as functional modules. We implemented this method as a plugin of Cytoscape, which is widely used to visualize and analyze biological networks. The plugin allows users to extract functional modules from PPI networks and interactively filter modules of interest. We applied the method to human PPI networks, and found several examples with the core-peripheral structure of modules that may be related to cancer development.

Availability and implementation

The Cytoscape plugin and tutorial are available at Cytoscape AppStore. (http://apps.cytoscape.org/apps/ncmine).

Contact

kengo@ecei.tohoku.ac.jpSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-27 +26438539,CRISPRz: a database of zebrafish validated sgRNAs.,"CRISPRz (http://research.nhgri.nih.gov/CRISPRz/) is a database of CRISPR/Cas9 target sequences that have been experimentally validated in zebrafish. Programmable RNA-guided CRISPR/Cas9 has recently emerged as a simple and efficient genome editing method in various cell types and organisms, including zebrafish. Because the technique is so easy and efficient in zebrafish, the most valuable asset is no longer a mutated fish (which has distribution challenges), but rather a CRISPR/Cas9 target sequence to the gene confirmed to have high mutagenic efficiency. With a highly active CRISPR target, a mutant fish can be quickly replicated in any genetic background anywhere in the world. However, sgRNA's vary widely in their activity and models for predicting target activity are imperfect. Thus, it is very useful to collect in one place validated CRISPR target sequences with their relative mutagenic activities. A researcher could then select a target of interest in the database with an expected activity. Here, we report the development of CRISPRz, a database of validated zebrafish CRISPR target sites collected from published sources, as well as from our own in-house large-scale mutagenesis project. CRISPRz can be searched using multiple inputs such as ZFIN IDs, accession number, UniGene ID, or gene symbols from zebrafish, human and mouse.",2015-10-04 +21545712,"SNiPlay: a web-based tool for detection, management and analysis of SNPs. Application to grapevine diversity projects.","

Background

High-throughput re-sequencing, new genotyping technologies and the availability of reference genomes allow the extensive characterization of Single Nucleotide Polymorphisms (SNPs) and insertion/deletion events (indels) in many plant species. The rapidly increasing amount of re-sequencing and genotyping data generated by large-scale genetic diversity projects requires the development of integrated bioinformatics tools able to efficiently manage, analyze, and combine these genetic data with genome structure and external data.

Results

In this context, we developed SNiPlay, a flexible, user-friendly and integrative web-based tool dedicated to polymorphism discovery and analysis. It integrates:1) a pipeline, freely accessible through the internet, combining existing softwares with new tools to detect SNPs and to compute different types of statistical indices and graphical layouts for SNP data. From standard sequence alignments, genotyping data or Sanger sequencing traces given as input, SNiPlay detects SNPs and indels events and outputs submission files for the design of Illumina's SNP chips. Subsequently, it sends sequences and genotyping data into a series of modules in charge of various processes: physical mapping to a reference genome, annotation (genomic position, intron/exon location, synonymous/non-synonymous substitutions), SNP frequency determination in user-defined groups, haplotype reconstruction and network, linkage disequilibrium evaluation, and diversity analysis (Pi, Watterson's Theta, Tajima's D).Furthermore, the pipeline allows the use of external data (such as phenotype, geographic origin, taxa, stratification) to define groups and compare statistical indices.2) a database storing polymorphisms, genotyping data and grapevine sequences released by public and private projects. It allows the user to retrieve SNPs using various filters (such as genomic position, missing data, polymorphism type, allele frequency), to compare SNP patterns between populations, and to export genotyping data or sequences in various formats.

Conclusions

Our experiments on grapevine genetic projects showed that SNiPlay allows geneticists to rapidly obtain advanced results in several key research areas of plant genetic diversity. Both the management and treatment of large amounts of SNP data are rendered considerably easier for end-users through automation and integration. Current developments are taking into account new advances in high-throughput technologies.SNiPlay is available at: http://sniplay.cirad.fr/.",2011-05-05 +22080556,"CoryneRegNet 6.0--Updated database content, new analysis methods and novel features focusing on community demands.","Post-genomic analysis techniques such as next-generation sequencing have produced vast amounts of data about micro organisms including genetic sequences, their functional annotations and gene regulatory interactions. The latter are genetic mechanisms that control a cell's characteristics, for instance, pathogenicity as well as survival and reproduction strategies. CoryneRegNet is the reference database and analysis platform for corynebacterial gene regulatory networks. In this article we introduce the updated version 6.0 of CoryneRegNet and describe the updated database content which includes, 6352 corynebacterial regulatory interactions compared with 4928 interactions in release 5.0 and 3235 regulations in release 4.0, respectively. We also demonstrate how we support the community by integrating analysis and visualization features for transiently imported custom data, such as gene regulatory interactions. Furthermore, with release 6.0, we provide easy-to-use functions that allow the user to submit data for persistent storage with the CoryneRegNet database. Thus, it offers important options to its users in terms of community demands. CoryneRegNet is publicly available at http://www.coryneregnet.de.",2011-11-12 +22843230,CYP-nsSNP: a specialized database focused on effect of non-synonymous SNPs on function of CYPs.,"The cytochrome P450 (CYP) enzymes play the central role in synthesis of endogenous substances and metabolism of xenobiotics. The substitution of single amino acid caused by non-synonymous single nucleotide polymorphism (nsSNP) will lead to the change in enzymatic activity of CYP isozymes, especially the drugmetabolizing ability. CYP-nsSNP is a specialized database focused on the effect of nsSNPs on enzymatic activity of CYPs. Its unique feature lies in providing the qualitative and quantitative description of the CYP variants in terms of enzymatic activity. In addition, the database also offers the general information about nsSNP and compounds that are involved in corresponding enzymatic reaction. The current CYP-nsSNP can be accessible at http://cypdatabase.sjtu.edu.cn/ and includes more than 300 genetic variants of 12 CYP isozymes together with about 100 compounds. In order to keep the accuracy of information within database, all experimental data were collected from the scientific literatures, and the users who conducted research to identify the novel CYP variants are encouraged to contribute their data. Therefore, CYP-nsSNP can be considered as a valuable source for experimental and computational studies of impact of genetic polymorphism on the function of CYPs.",2012-06-01 +23883165,Automatic extraction of biomolecular interactions: an empirical approach.,"

Background

We describe a method for extracting data about how biomolecule pairs interact from texts. This method relies on empirically determined characteristics of sentences. The characteristics are efficient to compute, making this approach to extraction of biomolecular interactions scalable. The results of such interaction mining can support interaction network annotation, question answering, database construction, and other applications.

Results

We constructed a software system to search MEDLINE for sentences likely to describe interactions between given biomolecules. The system extracts a list of the interaction-indicating terms appearing in those sentences, then ranks those terms based on their likelihood of correctly characterizing how the biomolecules interact. The ranking process uses a tf-idf (term frequency-inverse document frequency) based technique using empirically derived knowledge about sentences, and was applied to the MEDLINE literature collection. Software was developed as part of the MetNet toolkit (http://www.metnetdb.org).

Conclusions

Specific, efficiently computable characteristics of sentences about biomolecular interactions were analyzed to better understand how to use these characteristics to extract how biomolecules interact.The text empirics method that was investigated, though arising from a classical tradition, has yet to be fully explored for the task of extracting biomolecular interactions from the literature. The conclusions we reach about the sentence characteristics investigated in this work, as well as the technique itself, could be used by other systems to provide evidence about putative interactions, thus supporting efforts to maximize the ability of hybrid systems to support such tasks as annotating and constructing interaction networks.",2013-07-24 +23792391,Oral diseases affect some 3.9 billion people.,"

Data sources

Medline, Embase, Lilacs.

Study selection

Published and unpublished observational population-based studies presenting information on the prevalence, incidence, case fatality and cause-specific mortality related to untreated caries, severe periodontitis and severe tooth loss between January 1980 and December 2010. There were no language restrictions. Study quality was assessed using the STROBE checklist (http://www.strobe-statement.org/).

Data extraction and synthesis

Prevalence estimates were calculated on the database for all age-gender-country-year groups using a specifically developed Bayesian meta-regression tool. Disability-adjusted life-years (DALYs) and years lived with disability (YLDs) metrics were used to quantify the disease burden. Disability weights were calculated based on population-based surveys in five countries (USA, Peru, Tanzania, Bangladesh and Indonesia) and an open Internet survey. Uncertainties in estimates were examined using Monte Carlo simulation techniques with uncertainty levels presented as the 2.5th and 97.5th centiles, which can be interpreted as a 95% UI.

Results

Oral diseases remain highly prevalent in 2010 affecting 3.9 billion people. Untreated caries in permanent teeth was the most prevalent condition evaluated for the entire GBD (Global Burden of Disease) 2010 Study with a global prevalence of 35% for all ages combined. Severe periodontitis and untreated caries in deciduous teeth were the 6th and 10th most prevalent conditions, affecting, respectively, 11% and 9% of the global population. Oral conditions combined accounted for 15 million DALYs globally (1.9% of all YLDs and 0.6% of all DALYs), implying an average health loss of 224 years per 100,000 people. DALYs due to oral conditions increased 20.8% between 1990 and 2010, mainly due to population growth and aging. While DALYs due to severe periodontitis and untreated caries increased, those due to severe tooth loss decreased.

Conclusions

The findings highlight the challenge in responding to the diversity of urgent oral health needs world-wide, particularly in developing communities.",2013-01-01 +26434508,ExoCarta: A Web-Based Compendium of Exosomal Cargo.,"Exosomes are membranous vesicles that are released by a variety of cells into the extracellular microenvironment and are implicated in intercellular communication. As exosomes contain RNA, proteins and lipids, there is a significant interest in characterizing the molecular cargo of exosomes. Here, we describe ExoCarta (http://www.exocarta.org), a manually curated Web-based compendium of exosomal proteins, RNAs and lipids. Since its inception, the database has been highly accessed (>54,000 visitors from 135 countries). The current version of ExoCarta hosts 41,860 proteins, >7540 RNA and 1116 lipid molecules from more than 286 exosomal studies annotated with International Society for Extracellular Vesicles minimal experimental requirements for definition of extracellular vesicles. Besides, ExoCarta features dynamic protein-protein interaction networks and biological pathways of exosomal proteins. Users can download most often identified exosomal proteins based on the number of studies. The downloaded files can further be imported directly into FunRich (http://www.funrich.org) tool for additional functional enrichment and interaction network analysis.",2015-10-03 +25505090,3Dmol.js: molecular visualization with WebGL.,"

Unlabelled

3Dmol.js is a modern, object-oriented JavaScript library that uses the latest web technologies to provide interactive, hardware-accelerated three-dimensional representations of molecular data without the need to install browser plugins or Java. 3Dmol.js provides a full featured API for developers as well as a straightforward declarative interface that lets users easily share and embed molecular data in websites.

Availability and implementation

3Dmol.js is distributed under the permissive BSD open source license. Source code and documentation can be found at http://3Dmol.csb.pitt.edu

Contact

dkoes@pitt.edu.",2014-12-12 +25075116,Trowel: a fast and accurate error correction module for Illumina sequencing reads.,"

Motivation

The ability to accurately read the order of nucleotides in DNA and RNA is fundamental for modern biology. Errors in next-generation sequencing can lead to many artifacts, from erroneous genome assemblies to mistaken inferences about RNA editing. Uneven coverage in datasets also contributes to false corrections.

Result

We introduce Trowel, a massively parallelized and highly efficient error correction module for Illumina read data. Trowel both corrects erroneous base calls and boosts base qualities based on the k-mer spectrum. With high-quality k-mers and relevant base information, Trowel achieves high accuracy for different short read sequencing applications.The latency in the data path has been significantly reduced because of efficient data access and data structures. In performance evaluations, Trowel was highly competitive with other tools regardless of coverage, genome size read length and fragment size.

Availability and implementation

Trowel is written in C++ and is provided under the General Public License v3.0 (GPLv3). It is available at http://trowel-ec.sourceforge.net.

Contact

euncheon.lim@tue.mpg.de or weigel@tue.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-29 +25524895,VarSim: a high-fidelity simulation and validation framework for high-throughput genome sequencing with cancer applications.,"

Summary

VarSim is a framework for assessing alignment and variant calling accuracy in high-throughput genome sequencing through simulation or real data. In contrast to simulating a random mutation spectrum, it synthesizes diploid genomes with germline and somatic mutations based on a realistic model. This model leverages information such as previously reported mutations to make the synthetic genomes biologically relevant. VarSim simulates and validates a wide range of variants, including single nucleotide variants, small indels and large structural variants. It is an automated, comprehensive compute framework supporting parallel computation and multiple read simulators. Furthermore, we developed a novel map data structure to validate read alignments, a strategy to compare variants binned in size ranges and a lightweight, interactive, graphical report to visualize validation results with detailed statistics. Thus far, it is the most comprehensive validation tool for secondary analysis in next generation sequencing.

Availability and implementation

Code in Java and Python along with instructions to download the reads and variants is at http://bioinform.github.io/varsim.

Contact

rd@bina.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-17 +26727148,"Variation in Operating Characteristics of Adult Day Services Centers, by Center Ownership: United States, 2014.","More than one-quarter of a million participants were enrolled in 4,800 adult day services centers in the United States in 2014. Unlike other long-term care providers, such as nursing homes, home health agencies, hospices, and residential care communities, the majority of adult day services centers are nonprofit. However, for-profit ownership of adult day services centers has increased, from 27% in 2010 to 40% in 2012, and more recently to 44% in 2014. This report presents the most current national estimates of selected adult day services center operating characteristics, and compares these characteristics by center ownership. State estimates for the characteristics presented in this data brief are available online at: http://www.cdc.gov/nchs/ nsltcp/nsltcp_products.htm.",2015-12-01 +23074185,Darned in 2013: inclusion of model organisms and linking with Wikipedia.,"DARNED (DAtabase of RNa EDiting, available at http://darned.ucc.ie) is a centralized repository of reference genome coordinates corresponding to RNA nucleotides having altered templated identities in the process of RNA editing. The data in DARNED are derived from published datasets of RNA editing events. RNA editing instances have been identified with various methods, such as bioinformatics screenings, deep sequencing and/or biochemical techniques. Here we report our current progress in the development and expansion of the DARNED. In addition to novel database features the DARNED update describes inclusion of Drosophila melanogaster and Mus musculus RNA editing events and the launch of a community-based annotation in the RNA WikiProject.",2012-10-15 +26319942,Predicting the spatial organization of chromosomes using epigenetic data.,"Chromosome folding can reinforce the demarcation between euchromatin and heterochromatin. Two new studies show how epigenetic data, including DNA methylation, can accurately predict chromosome folding in three dimensions. Such computational approaches reinforce the idea of a linkage between epigenetically marked chromatin domains and their segregation into distinct compartments at the megabase scale or topological domains at a higher resolution. Please see related articles: http://dx.doi.org/10.1186/s13059-015-0741-y and http://dx.doi.org/10.1186/s13059-015-0740-z.",2015-08-29 +26469904,"STK39, But Not BST1, HLA-DQB1, and SPPL2B Polymorphism, Is Associated With Han-Chinese Parkinson's Disease in Taiwan.","Neuroinflammation is emerging as an important pathway involved in Parkinson's disease (PD) pathogenesis. Herein, we investigated the effect of 4 top PD-associated genetic variants in Caucasians listed on the top risk loci identified by meta-analysis of genome wide-association studies in PDGene database (http://www.pdgene.org/top_results), including serine threonine kinase 39 (STK39) rs1955337, bone marrow stromal cell antigen 1 (BST1) rs11724635, major histocompatibility complex, class II, DQ beta 1 (HLA-DQB1) rs9275326, and signal peptide peptidase-like 2B (SPPL2B) rs62120679, by genotyping 596 Han-Chinese patients with PD and 597 age-matched control subjects. Compared with subjects with STK39 rs1955337 GG genotype, those with TT genotype had a 1.64-fold increased risk of PD (95% confidence interval: 1.13-2.39, P = 0.010). The recessive model also demonstrated an increased PD risk in TT genotype (odds ratio: 1.59, 95% confidence interval: 1.12-2.27) compared with the other genotypes (GT + GG). PD patients demonstrate a similar genotypic and allelic frequency in BST1 rs11724635, HLA-DQB1 rs9275326, and SPPL2B rs62120679 compared with controls. These findings suggested that the STK39 rs1955337 TT genotype is a risk factor for Han-Chinese patients with PD in Taiwan. The ethnic discrepancies of the other 3 genetic variants may indicate a distinct genetic background of neuroinflammation between PD patients in Han-Chinese and Caucasians.",2015-10-01 +26458460,[Basics and clinical application of human mesenchymal stromal/stem cells].,"Human mesenchymal stromal/stem cells (MSCs) show a variety of biological characteristics. The clinical trials database provided by the National Institutes of Health, USA, contains about 400 clinical trials of MSCs for a wide range of therapeutic applications internationally (http://www.clinicaltrials.gov, key words ""mesenchymal stem cells"", as of April, 2015). Encouraging results from these clinical trials include evidence of efficacy against graft versus host disease (GVHD) in hematopoietic stem cell transplantation. Treatment for and/or prevention of engraftment failure and insufficient hematopoietic recovery have also been explored. Herein, we will address the basic principles of MSCs and the current status of clinical studies using MSCs. Future prospects for MSC-based therapy will also be discussed.",2015-10-01 +26428293,JEPEGMIX: gene-level joint analysis of functional SNPs in cosmopolitan cohorts.,"

Motivation

To increase detection power, gene level analysis methods are used to aggregate weak signals. To greatly increase computational efficiency, most methods use as input summary statistics from genome-wide association studies (GWAS). Subsequently, gene statistics are constructed using linkage disequilibrium (LD) patterns from a relevant reference panel. However, all methods, including our own Joint Effect on Phenotype of eQTL/functional single nucleotide polymorphisms (SNPs) associated with a Gene (JEPEG), assume homogeneous panels, e.g. European. However, this renders these tools unsuitable for the analysis of large cosmopolitan cohorts.

Results

We propose a JEPEG extension, JEPEGMIX, which similar to one of our software tools, Direct Imputation of summary STatistics of unmeasured SNPs from MIXed ethnicity cohorts, is capable of estimating accurate LD patterns for cosmopolitan cohorts. JEPEGMIX uses this accurate LD estimates to (i) impute the summary statistics at unmeasured functional variants and (ii) test for the joint effect of all measured and imputed functional variants which are associated with a gene. We illustrate the performance of our tool by analyzing the GWAS meta-analysis summary statistics from the multi-ethnic Psychiatric Genomics Consortium Schizophrenia stage 2 cohort. This practical application supports the immune system being one of the main drivers of the process leading to schizophrenia.

Availability and implementation

Software, annotation database and examples are available at http://dleelab.github.io/jepegmix/.

Contact

donghyung.lee@vcuhealth.org

Supplementary information

Supplementary material is available at Bioinformatics online.",2015-10-01 +26428294,Cell line name recognition in support of the identification of synthetic lethality in cancer from text.,"

Motivation

The recognition and normalization of cell line names in text is an important task in biomedical text mining research, facilitating for instance the identification of synthetically lethal genes from the literature. While several tools have previously been developed to address cell line recognition, it is unclear whether available systems can perform sufficiently well in realistic and broad-coverage applications such as extracting synthetically lethal genes from the cancer literature. In this study, we revisit the cell line name recognition task, evaluating both available systems and newly introduced methods on various resources to obtain a reliable tagger not tied to any specific subdomain. In support of this task, we introduce two text collections manually annotated for cell line names: the broad-coverage corpus Gellus and CLL, a focused target domain corpus.

Results

We find that the best performance is achieved using NERsuite, a machine learning system based on Conditional Random Fields, trained on the Gellus corpus and supported with a dictionary of cell line names. The system achieves an F-score of 88.46% on the test set of Gellus and 85.98% on the independently annotated CLL corpus. It was further applied at large scale to 24 302 102 unannotated articles, resulting in the identification of 5 181 342 cell line mentions, normalized to 11 755 unique cell line database identifiers.

Availability and implementation

The manually annotated datasets, the cell line dictionary, derived corpora, NERsuite models and the results of the large-scale run on unannotated texts are available under open licenses at http://turkunlp.github.io/Cell-line-recognition/.

Contact

sukaew@utu.fi.",2015-10-01 +26753561,iMiRNA-SSF: Improving the Identification of MicroRNA Precursors by Combining Negative Sets with Different Distributions.,"The identification of microRNA precursors (pre-miRNAs) helps in understanding regulator in biological processes. The performance of computational predictors depends on their training sets, in which the negative sets play an important role. In this regard, we investigated the influence of benchmark datasets on the predictive performance of computational predictors in the field of miRNA identification, and found that the negative samples have significant impact on the predictive results of various methods. We constructed a new benchmark set with different data distributions of negative samples. Trained with this high quality benchmark dataset, a new computational predictor called iMiRNA-SSF was proposed, which employed various features extracted from RNA sequences. Experimental results showed that iMiRNA-SSF outperforms three state-of-the-art computational methods. For practical applications, a web-server of iMiRNA-SSF was established at the website http://bioinformatics.hitsz.edu.cn/iMiRNA-SSF/.",2016-01-12 +22686585,From steady-state to synchronized yeast glycolytic oscillations II: model validation.,"

Unlabelled

In an accompanying paper [du Preez et al., (2012) FEBS J279, 2810-2822], we adapt an existing kinetic model for steady-state yeast glycolysis to simulate limit-cycle oscillations. Here we validate the model by testing its capacity to simulate a wide range of experiments on dynamics of yeast glycolysis. In addition to its description of the oscillations of glycolytic intermediates in intact cells and the rapid synchronization observed when mixing out-of-phase oscillatory cell populations (see accompanying paper), the model was able to predict the Hopf bifurcation diagram with glucose as the bifurcation parameter (and one of the bifurcation points with cyanide as the bifurcation parameter), the glucose- and acetaldehyde-driven forced oscillations, glucose and acetaldehyde quenching, and cell-free extract oscillations (including complex oscillations and mixed-mode oscillations). Thus, the model was compliant, at least qualitatively, with the majority of available experimental data for glycolytic oscillations in yeast. To our knowledge, this is the first time that a model for yeast glycolysis has been tested against such a wide variety of independent data sets.

Database

The mathematical models described here have been submitted to the JWS Online Cellular Systems Modelling Database and can be accessed at http://jjj.biochem.sun.ac.za/database/dupreez/index.html.",2012-07-05 +27508060,"search.bioPreprint: a discovery tool for cutting edge, preprint biomedical research articles.","The time it takes for a completed manuscript to be published traditionally can be extremely lengthy. Article publication delay, which occurs in part due to constraints associated with peer review, can prevent the timely dissemination of critical and actionable data associated with new information on rare diseases or developing health concerns such as Zika virus. Preprint servers are open access online repositories housing preprint research articles that enable authors (1) to make their research immediately and freely available and (2) to receive commentary and peer review prior to journal submission. There is a growing movement of preprint advocates aiming to change the current journal publication and peer review system, proposing that preprints catalyze biomedical discovery, support career advancement, and improve scientific communication. While the number of articles submitted to and hosted by preprint servers are gradually increasing, there has been no simple way to identify biomedical research published in a preprint format, as they are not typically indexed and are only discoverable by directly searching the specific preprint server websites. To address this issue, we created a search engine that quickly compiles preprints from disparate host repositories and provides a one-stop search solution. Additionally, we developed a web application that bolsters the discovery of preprints by enabling each and every word or phrase appearing on any web site to be integrated with articles from preprint servers. This tool, search.bioPreprint, is publicly available at http://www.hsls.pitt.edu/resources/preprint.",2016-06-16 +26424080,JRC GMO-Amplicons: a collection of nucleic acid sequences related to genetically modified organisms. ,"The DNA target sequence is the key element in designing detection methods for genetically modified organisms (GMOs). Unfortunately this information is frequently lacking, especially for unauthorized GMOs. In addition, patent sequences are generally poorly annotated, buried in complex and extensive documentation and hard to link to the corresponding GM event. Here, we present the JRC GMO-Amplicons, a database of amplicons collected by screening public nucleotide sequence databanks by in silico determination of PCR amplification with reference methods for GMO analysis. The European Union Reference Laboratory for Genetically Modified Food and Feed (EU-RL GMFF) provides these methods in the GMOMETHODS database to support enforcement of EU legislation and GM food/feed control. The JRC GMO-Amplicons database is composed of more than 240 000 amplicons, which can be easily accessed and screened through a web interface. To our knowledge, this is the first attempt at pooling and collecting publicly available sequences related to GMOs in food and feed. The JRC GMO-Amplicons supports control laboratories in the design and assessment of GMO methods, providing inter-alia in silico prediction of primers specificity and GM targets coverage. The new tool can assist the laboratories in the analysis of complex issues, such as the detection and identification of unauthorized GMOs. Notably, the JRC GMO-Amplicons database allows the retrieval and characterization of GMO-related sequences included in patents documentation. Finally, it can help annotating poorly described GM sequences and identifying new relevant GMO-related sequences in public databases. The JRC GMO-Amplicons is freely accessible through a web-based portal that is hosted on the EU-RL GMFF website. Database URL: http://gmo-crl.jrc.ec.europa.eu/jrcgmoamplicons/.",2015-09-30 +26422234,PATBox: A Toolbox for Classification and Analysis of P-Type ATPases.,"P-Type ATPases are part of the regulatory system of the cell where they are responsible for transporting ions and lipids through the cell membrane. These pumps are found in all eukaryotes and their malfunction has been found to cause several severe diseases. Knowing which substrate is pumped by a certain P-Type ATPase is therefore vital. The P-Type ATPases can be divided into 11 subtypes based on their specificity, that is, the substrate that they pump. Determining the subtype experimentally is time-consuming. Thus it is of great interest to be able to accurately predict the subtype based on the amino acid sequence only. We present an approach to P-Type ATPase sequence classification based on the k-nearest neighbors, similar to a homology search, and show that this method provides performs very well and, to the best of our knowledge, better than any existing method despite its simplicity. The classifier is made available as a web service at http://services.birc.au.dk/patbox/ which also provides access to a database of potential P-Type ATPases and their predicted subtypes.",2015-09-30 +26527854,Molecular marker database for efficient use in agricultural breeding programs.,"

Unlabelled

The National Agricultural Biotechnology Information Center (NABIC) constructed a web-based molecular marker database to provide information about 7,847 sequence-tagged site (STS) markers identified in the 11 species using a next generation sequencing (NGS) technologies. The database consists of three major functional categories: keyword search, detailed viewer and download function. The molecular marker annotation table provides detailed information such as ownership information, basic information, and STS-related characterization information.

Availability

The database is available for free at http://nabic.rda.go.kr/Molecularmarker.",2015-09-30 +21603090,GlycomicsDB - A Data Integration Platform for Glycans and their Strucutres.,"Glycomics is a discipline of biology that deals with the structure and function of glycans (or carbohydrates). Analytical techniques such as mass spectrometry (MS) and nuclear magnetic resonance (NMR) are having a significant impact on the field of glycomics. However, effective progress in glycomics research requires collaboration between laboratories to share experimental data, structural information of glycans, and simulation results. Herein we report the development of a web-based data management system that can incorporate large volumes of data from disparate sources and organize them into a uniform format for users to store and access. This system enables participating laboratories to set up a shared data repository which members of interdisciplinary teams can access. The system is able to manage and share raw MS data and structural information of glycans.The database is available at http://www.glycomics.bcf.ku.edu.",2011-04-19 +22058051,The COL7A1 mutation database.,"Dystrophic Epidermolysis Bullosa (DEB) is a genetic disease caused by mutations in the COL7A1 gene that is inherited in the autosomal dominant or recessive mode. We have developed a curated, freely accessible COL7A1 specific database (http://www.col7.info), which contains more than 730 reported and unpublished sequence variants of the gene. Molecular defects are reported according to HGVS recommendation. The clinical description module is provided with an advanced search tool together with a CSV (comm. separated values) data format download option. This compilation of COL7A1 data and nomenclature is aimed at assisting molecular and clinical geneticists to enhance the collaboration between researchers worldwide.",2011-12-20 +27330424,Temporal bone carcinoma: Classical prognostic variables revisited and modern clinico-pathological evidence.,"

Aim

Prognostic factors, rational management, and the ongoing investigations regarding temporal bone squamous cell carcinoma (TBSCC) have been critically reviewed.

Background

TBSCC is an uncommon, aggressive malignancy. Although some progress has been made in treating this aggressive tumor, the prognosis in advanced cases remains poor.

Materials and methods

A systematic search of the literature for articles published between 2009 and October 2014 was performed using the PubMed (http://www.pubmed.gov) electronic database.

Results

Given the particular anatomical site of TBSCC, its prognosis is significantly influenced by any direct involvement of nearby structures. The extent of the primary tumor is generally considered one of the most important prognostic factors and it is frequently related to prognosis even more strongly than N stage. For TBSCC, biomarker investigations in surgical specimens are only just beginning to appear in the oncological literature.

Conclusion

Given the particular features of TBSCC, the sub-specialty of otologic oncology seems to be emerging as a defined area of practice involving multidisciplinary team comprising oto-neurosurgeons, head and neck surgeons, plastic surgeons, oncologists, radiotherapists, dedicated radiologists, and pathologists.",2015-09-29 +23766289,"Depth: a web server to compute depth, cavity sizes, detect potential small-molecule ligand-binding cavities and predict the pKa of ionizable residues in proteins.","Residue depth accurately measures burial and parameterizes local protein environment. Depth is the distance of any atom/residue to the closest bulk water. We consider the non-bulk waters to occupy cavities, whose volumes are determined using a Voronoi procedure. Our estimation of cavity sizes is statistically superior to estimates made by CASTp and VOIDOO, and on par with McVol over a data set of 40 cavities. Our calculated cavity volumes correlated best with the experimentally determined destabilization of 34 mutants from five proteins. Some of the cavities identified are capable of binding small molecule ligands. In this study, we have enhanced our depth-based predictions of binding sites by including evolutionary information. We have demonstrated that on a database (LigASite) of ∼200 proteins, we perform on par with ConCavity and better than MetaPocket 2.0. Our predictions, while less sensitive, are more specific and precise. Finally, we use depth (and other features) to predict pKas of GLU, ASP, LYS and HIS residues. Our results produce an average error of just <1 pH unit over 60 predictions. Our simple empirical method is statistically on par with two and superior to three other methods while inferior to only one. The DEPTH server (http://mspc.bii.a-star.edu.sg/depth/) is an ideal tool for rapid yet accurate structural analyses of protein structures.",2013-06-12 +25940222,Isavuconazole: A New Option for the Management of Invasive Fungal Infections.,"

Objective

To review the pharmacology, chemistry, in vitro susceptibility, pharmacokinetics, clinical efficacy, safety, tolerability, dosage, and administration of isavuconazole, a triazole antifungal agent.

Data sources

Studies and reviews were identified through an English language MEDLINE search (1978 to March 2015) and from http://www.clinicaltrials.gov, Food and Drug Administration (FDA) briefing documents, program abstracts from international symposia, and the manufacturer's Web site.

Study selection and data extraction

All published and unpublished trials, abstracts, in vitro and preclinical studies, and FDA briefing documents were reviewed.

Data synthesis

Isavuconazole has activity against a number of clinically important yeasts and molds, including Candida spp, Aspergillus spp, Cryptococcus neoformans, and Trichosporon spp and variable activity against the Mucorales. Isavuconazole, available for both oral and intravenous administration, is characterized by slow elimination allowing once-daily dosing, extensive tissue distribution, and high (>99%) protein binding. The most commonly reported adverse events, which are mild and limited in nature, include nausea, diarrhea, and elevated liver function tests. Its drug interaction potential appears to be similar to other azole antifungals but less than those observed with voriconazole. Comparative trials are under way or have been recently completed for the treatment of candidemia, invasive candidiasis and aspergillosis, and rare mold infections.

Conclusions

Isavuconazole has a broad spectrum of activity and favorable pharmacokinetic properties, providing an advantage over other currently available broad-spectrum azole antifungals and a clinically useful alternative to voriconazole for the treatment of invasive aspergillosis. It may also prove useful for the treatment of candidemia and invasive mold infections; however, these indications await the results of clinical trials.",2015-05-04 +26633827,"Variation in Residential Care Community Resident Characteristics, by Size of Community: United States, 2014.","

Key findings

Residents of residential care communities are persons who cannot live independently but generally do not require the skilled care provided by nursing homes. There were 835,200 current residents in residential care communities in 2014 (1,2). ""Current residents"" refers to those who were living in the community on the day of data collection (as opposed to the total number of residents who lived in the community at some time during the calendar year). This report presents national estimates of selected characteristics of current residents in 2014 and compares these characteristics by community bed size. State-level estimates for these characteristics are available online at: http:// www.cdc.gov/nchs/nsltcp/nsltcp_products.htm.",2015-11-01 +22125386,BFluenza: A Proteomic Database on Bird Flu.,"

Unlabelled

Influenza A virus subtype H5N1, also known as ""bird flu"" has been documented to cause an outbreak of respiratory diseases in humans. The unprecedented spread of highly pathogenic avian influenza type A is a threat to veterinary and human health. The BFluenza is a relational database which is solely devoted to proteomic information of H5N1 subtype. Bfluenza has novel features including computed physico-chemical properties data of H5N1 viral proteins, modeled structures of viral proteins, data of protein coordinates, experimental details, molecular description and bibliographic reference. The database also contains nucleotide and their decoded protein sequences data. The database can be searched in various modes by setting search options. The structure of viral protein could be visualized by JMol viewer or by Discovery Studio.

Availability

The database is available for free at http://www.bfluenza.info.",2011-09-28 +23973272,An innovative portal for rare genetic diseases research: the semantic Diseasecard.,"Advances in ""omics"" hardware and software technologies are bringing rare diseases research back from the sidelines. Whereas in the past these disorders were seldom considered relevant, in the era of whole genome sequencing the direct connections between rare phenotypes and a reduced set of genes are of vital relevance. This increased interest in rare genetic diseases research is pushing forward investment and effort towards the creation of software in the field, and leveraging the wealth of available life sciences data. Alas, most of these tools target one or more rare diseases, are focused solely on a single type of user, or are limited to the most relevant scientific breakthroughs for a specific niche. Furthermore, despite some high quality efforts, the ever-growing number of resources, databases, services and applications is still a burden to this area. Hence, there is a clear interest in new strategies to deliver a holistic perspective over the entire rare genetic diseases research domain. This is Diseasecard's reasoning, to build a true lightweight knowledge base covering rare genetic diseases. Developed with the latest semantic web technologies, this portal delivers unified access to a comprehensive network for researchers, clinicians, patients and bioinformatics developers. With in-context access covering over 20 distinct heterogeneous resources, Diseasecard's workspace provides access to the most relevant scientific knowledge regarding a given disorder, whether through direct common identifiers or through full-text search over all connected resources. In addition to its user-oriented features, Diseasecard's semantic knowledge base is also available for direct querying, enabling everyone to include rare genetic diseases knowledge in new or existing information systems. Diseasecard is publicly available at http://bioinformatics.ua.pt/diseasecard/.",2013-08-21 +27559155,Fast-SNP: a fast matrix pre-processing algorithm for efficient loopless flux optimization of metabolic models.,"

Motivation

Computation of steady-state flux solutions in large metabolic models is routinely performed using flux balance analysis based on a simple LP (Linear Programming) formulation. A minimal requirement for thermodynamic feasibility of the flux solution is the absence of internal loops, which are enforced using 'loopless constraints'. The resulting loopless flux problem is a substantially harder MILP (Mixed Integer Linear Programming) problem, which is computationally expensive for large metabolic models.

Results

We developed a pre-processing algorithm that significantly reduces the size of the original loopless problem into an easier and equivalent MILP problem. The pre-processing step employs a fast matrix sparsification algorithm-Fast- sparse null-space pursuit (SNP)-inspired by recent results on SNP. By finding a reduced feasible 'loop-law' matrix subject to known directionalities, Fast-SNP considerably improves the computational efficiency in several metabolic models running different loopless optimization problems. Furthermore, analysis of the topology encoded in the reduced loop matrix enabled identification of key directional constraints for the potential permanent elimination of infeasible loops in the underlying model. Overall, Fast-SNP is an effective and simple algorithm for efficient formulation of loop-law constraints, making loopless flux optimization feasible and numerically tractable at large scale.

Availability and implementation

Source code for MATLAB including examples is freely available for download at http://www.aibn.uq.edu.au/cssb-resources under Software. Optimization uses Gurobi, CPLEX or GLPK (the latter is included with the algorithm).

Contact

lars.nielsen@uq.edu.auSupplementary information: Supplementary data are available at Bioinformatics online.",2016-08-24 +23021814,The Moli-sani project: computerized ECG database in a population-based cohort study.,"Computerized electrocardiogram (ECG) acquisition and interpretation may be extremely useful in handling analysis of data from large cohort studies and exploit research on the use of ECG data as prognostic markers for cardiovascular disease. The Moli-sani project (http://www.moli-sani.org) is a population-based cohort study aiming at evaluating the risk factors linked to chronic-degenerative disease with particular regard to cardiovascular disease and cancer and intermediate metabolic phenotypes such as hypertension, diabetes, dyslipidemia, obesity, and metabolic syndrome. Between March 2005 and April 2010, 24 325 people aged 35 years or older, living in the Molise region (Italy), were randomly recruited. A follow-up based on linkage with hospital discharge records and mortality regional registry and reexamination of the cohort is ongoing and will be repeated at prefixed times. Each subject was administered questionnaires on personal and medical history, food consumption, quality of life (FS36), and psychometry. Plasma serum, cellular pellet, and urinary spots were stored in liquid nitrogen. Subjects were measured blood pressure, weight, height, and waist and hip circumferences, and underwent spirometry to evaluate pulmonary diffusion capacity, gas diffusion, and pulmonary volumes. Standard 12-lead resting ECG was performed by a Cardiette ar2100-view electrocardiograph and tracings stored in digital standard communication protocol format for subsequent analysis. The digital ECG database of the Moli-sani project is currently being used to assess the association between physiologic variables and pathophyiosiologic conditions and parameters derived from the ECG signal. This computerized ECG database represents a unique opportunity to identify and assess prognostic factors associated with cardiovascular and metabolic diseases.",2012-09-27 +27150811,StructMAn: annotation of single-nucleotide polymorphisms in the structural context.,"The next generation sequencing technologies produce unprecedented amounts of data on the genetic sequence of individual organisms. These sequences carry a substantial amount of variation that may or may be not related to a phenotype. Phenotypically important part of this variation often comes in form of protein-sequence altering (non-synonymous) single nucleotide variants (nsSNVs). Here we present StructMAn, a Web-based tool for annotation of human and non-human nsSNVs in the structural context. StructMAn analyzes the spatial location of the amino acid residue corresponding to nsSNVs in the three-dimensional (3D) protein structure relative to other proteins, nucleic acids and low molecular-weight ligands. We make use of all experimentally available 3D structures of query proteins, and also, unlike other tools in the field, of structures of proteins with detectable sequence identity to them. This allows us to provide a structural context for around 20% of all nsSNVs in a typical human sequencing sample, for up to 60% of nsSNVs in genes related to human diseases and for around 35% of nsSNVs in a typical bacterial sample. Each nsSNV can be visualized and inspected by the user in the corresponding 3D structure of a protein or protein complex. The StructMAn server is available at http://structman.mpi-inf.mpg.de.",2016-05-05 +23704099,Mitochondrial genetics.,"

Introduction

In the last 10 years the field of mitochondrial genetics has widened, shifting the focus from rare sporadic, metabolic disease to the effects of mitochondrial DNA (mtDNA) variation in a growing spectrum of human disease. The aim of this review is to guide the reader through some key concepts regarding mitochondria before introducing both classic and emerging mitochondrial disorders.

Sources of data

In this article, a review of the current mitochondrial genetics literature was conducted using PubMed (http://www.ncbi.nlm.nih.gov/pubmed/). In addition, this review makes use of a growing number of publically available databases including MITOMAP, a human mitochondrial genome database (www.mitomap.org), the Human DNA polymerase Gamma Mutation Database (http://tools.niehs.nih.gov/polg/) and PhyloTree.org (www.phylotree.org), a repository of global mtDNA variation.

Areas of agreement

The disruption in cellular energy, resulting from defects in mtDNA or defects in the nuclear-encoded genes responsible for mitochondrial maintenance, manifests in a growing number of human diseases.

Areas of controversy

The exact mechanisms which govern the inheritance of mtDNA are hotly debated.

Growing points

Although still in the early stages, the development of in vitro genetic manipulation could see an end to the inheritance of the most severe mtDNA disease.",2013-05-22 +28321273,"Characterization of a new monoclonal anti-glypican-3 antibody specific to the hepatocellular carcinoma cell line, HepG2.","

Aim

To characterize the antigen on HepG2 cell that is specifically recognized by a new monoclonal antibody raised against human liver heparan sulfate proteoglycan (HSPG), clone 1E4-1D9.

Methods

The antigen recognized by mAb 1E4-1D9 was immunoprecipitated and its amino acid sequence was analyzed LC/MS. The transmembrane domain, number of cysteine residues, and glycosylation sites were predicted from these entire sequences. Data from amino acid analysis was aligned with glypican-3 (https://www.ebi.ac.uk/Tools/msa/clustalo/). The competitive reaction of mAb 1E4-1D9 and anti-glypican-3 on HepG2 cells was demonstrated by indirect immunofluorescence and analyzed by flow cytometry. Moreover, co-immunoprecipitation of mAb 1E4-1D9 and anti-glypican-3 was performed in HepG2 cells by Western immunoblotting. The recognition by mAb 1E4-1D9 of a specific epitope on solid tumor and hematopoietic cell lines was studied using indirect immunofluorescence and analyzed by flow cytometry.

Results

Monoclonal antibody 1E4-1D9 reacted with an HSPG isolated from human liver and a band of 67 kD was detected under both reducing and non-reducing conditions. The specific antigen pulled down by mAb 1E4-1D9, having a MW of 135 kD, was analyzed. The results showed two sequences of interest, gi30722350 (1478 amino acid) and gi60219551 (1378 amino acid). In both sequences no transmembrane regions were observed. Sequence number gi30722350 was 99.7% showed a match to FYCO1, a molecule involved in induction of autophagy. Sequence number gi60219551 contained 15 cysteines and 11 putative glycosylation sites with 6 predicted N-glycosylation sites. It was also matched with all PDZ domain proteins. Moreover, it showed an 85.7% match to glypican-3. Glypican-3 on HepG2 cells competitively reacted with both phycoerythrin-conjugated anti-glypican-3 and mAb 1E4-1C2 and resulted in an increase of double-stained cell population when higher concentration of mAb 1E4-1D9 was used. Moreover, antigens precipitated from HepG2 cell by anti-glypican-3 could be detected by mAb 1E4-1D9 and vice versa. The recognition of antigens, on other solid tumor cell lines, by mAb 1E4-1D9 was studied. The results demonstrated that mAb 1E4-1D9 reacted with Huh7, HepG2, HT29, MCF7, SW620, Caco2, B16F1, U937, K562 and Molt4 cells. It was also found to be weakly positive to SW1353 and HL60 and negative to H460 and Hela cell lines.

Conclusion

All findings show that mAb 1E4-1D9 specifically recognizes glypican-3. Moreover, a new partner molecule of glypican-3, FYCO1 is proposed based on the results from co-precipitation studies.",2017-03-01 +23703215,WEB-based GEne SeT AnaLysis Toolkit (WebGestalt): update 2013.,"Functional enrichment analysis is an essential task for the interpretation of gene lists derived from large-scale genetic, transcriptomic and proteomic studies. WebGestalt (WEB-based GEne SeT AnaLysis Toolkit) has become one of the popular software tools in this field since its publication in 2005. For the last 7 years, WebGestalt data holdings have grown substantially to satisfy the requirements of users from different research areas. The current version of WebGestalt supports 8 organisms and 201 gene identifiers from various databases and different technology platforms, making it directly available to the fast growing omics community. Meanwhile, by integrating functional categories derived from centrally and publicly curated databases as well as computational analyses, WebGestalt has significantly increased the coverage of functional categories in various biological contexts including Gene Ontology, pathway, network module, gene-phenotype association, gene-disease association, gene-drug association and chromosomal location, leading to a total of 78 612 functional categories. Finally, new interactive features, such as pathway map, hierarchical network visualization and phenotype ontology visualization have been added to WebGestalt to help users better understand the enrichment results. WebGestalt can be freely accessed through http://www.webgestalt.org or http://bioinfo.vanderbilt.edu/webgestalt/.",2013-05-23 +25934531,[Review and analysis of the evidence on the role and the impact of pharmacists' activities: Development of an online tool].,"

Background

Considering the increase in healthcare expenses, stakeholders need to make choices, including healthcare program funding, and professional activities to prioritise.

Purpose

The main objective was to list evidences about the role and impact of pharmacists.

Methods

Themes were chosen according to three dimensions of the pharmacist profession: (1) activities, (2) healthcare programs and (3) disorders. A literature search was conducted for each theme. A bibliographic data sheet was completed for each article. An analytic data sheet, consisting of descriptive and impact outcomes, was also completed for the most relevant articles. For each theme, a synthesis was elaborated. The website Impact Pharmacie (http://impactpharmacie.org) was developed.

Results

A total of 70 synthesis were written. A total of 1442 articles were included with a bibliographic data sheet, and 914 with an analytic data sheet. Six hundred and fifty articles had positive outcomes on the role of the pharmacist, representing 803 different positive outcome markers. Pharmacists had positive outcomes on morbidity (n=212), adherence (n=92), costs (n=36), adverse effects (n=26), drug errors (n=31) and mortality (n=13).

Conclusion

This descriptive study presents the review of the evidence on the role and the impact of pharmacists activities, which led to the Impact Pharmacie website. This francophone website can contribute to support clinical pharmacy development, and to a better use of pharmacists in healthcare.",2014-11-11 +23019048,Systematic meta-analyses and field synopsis of genetic association studies in colorectal cancer.,"

Background

Colorectal cancer is a major global public health problem, with approximately 950,000 patients newly diagnosed each year. We report the first comprehensive field synopsis and creation of a parallel publicly available and regularly updated database (CRCgene) that catalogs all genetic association studies on colorectal cancer (http://www.chs.med.ed.ac.uk/CRCgene/).

Methods

We performed two independent systematic reviews, reviewing 10 145 titles, then collated and extracted data from 635 publications reporting on 445 polymorphisms in 110 different genes. We carried out meta-analyses to derive summary effect estimates for 92 polymorphisms in 64 different genes. For assessing the credibility of associations, we applied the Venice criteria and the Bayesian False Discovery Probability (BFDP) test.

Results

We consider 16 independent variants at 13 loci (MUTYH, MTHFR, SMAD7, and common variants tagging the loci 8q24, 8q23.3, 11q23.1, 14q22.2, 1q41, 20p12.3, 20q13.33, 3q26.2, 16q22.1, and 19q13.1) to have the most highly credible associations with colorectal cancer, with all variants except those in MUTYH and 19q13.1 reaching genome-wide statistical significance in at least one meta-analysis model. We identified less-credible (higher heterogeneity, lower statistical power, BFDP >0.2) associations with 23 more variants at 22 loci. The meta-analyses of a further 20 variants for which associations have previously been reported found no evidence to support these as true associations.

Conclusion

The CRCgene database provides the context for genetic association data to be interpreted appropriately and helps inform future research direction.",2012-09-26 +26209433,GeneVetter: a web tool for quantitative monogenic assessment of rare diseases.,"

Unlabelled

When performing DNA sequencing to diagnose affected individuals with monogenic forms of rare diseases, accurate attribution of causality to detected variants is imperative but imperfect. Even if a gene has variants already known to cause a disease, rare disruptive variants predicted to be causal are not always so, mainly due to imperfect ability to predict the pathogenicity of variants. Existing population-scale sequence resources such as 1000 Genomes are useful to quantify the 'background prevalence' of an unaffected individual being falsely predicted to carry causal variants. We developed GeneVetter to allow users to quantify the 'background prevalence' of subjects with predicted causal variants within specific genes under user-specified filtering parameters. GeneVetter helps quantify uncertainty in monogenic diagnosis and design genetic studies with support for power and sample size calculations for specific genes with specific filtering criteria. GeneVetter also allows users to analyze their own sequence data without sending genotype information over the Internet. Overall, GeneVetter is an interactive web tool that facilitates quantifying and accounting for the background prevalence of predicted pathogenic variants in a population.

Availability and implementation

GeneVetter is available at http://genevetter.org/

Contact

mgsamps@med.umich.edu or hmkang@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-23 +24949246,"dDocent: a RADseq, variant-calling pipeline designed for population genomics of non-model organisms.","Restriction-site associated DNA sequencing (RADseq) has become a powerful and useful approach for population genomics. Currently, no software exists that utilizes both paired-end reads from RADseq data to efficiently produce population-informative variant calls, especially for non-model organisms with large effective population sizes and high levels of genetic polymorphism. dDocent is an analysis pipeline with a user-friendly, command-line interface designed to process individually barcoded RADseq data (with double cut sites) into informative SNPs/Indels for population-level analyses. The pipeline, written in BASH, uses data reduction techniques and other stand-alone software packages to perform quality trimming and adapter removal, de novo assembly of RAD loci, read mapping, SNP and Indel calling, and baseline data filtering. Double-digest RAD data from population pairings of three different marine fishes were used to compare dDocent with Stacks, the first generally available, widely used pipeline for analysis of RADseq data. dDocent consistently identified more SNPs shared across greater numbers of individuals and with higher levels of coverage. This is due to the fact that dDocent quality trims instead of filtering, incorporates both forward and reverse reads (including reads with INDEL polymorphisms) in assembly, mapping, and SNP calling. The pipeline and a comprehensive user guide can be found at http://dDocent.wordpress.com.",2014-06-10 +23673292,A review of genomic data warehousing systems.,"To facilitate the integration and querying of genomics data, a number of generic data warehousing frameworks have been developed. They differ in their design and capabilities, as well as their intended audience. We provide a comprehensive and quantitative review of those genomic data warehousing frameworks in the context of large-scale systems biology. We reviewed in detail four genomic data warehouses (BioMart, BioXRT, InterMine and PathwayTools) freely available to the academic community. We quantified 20 aspects of the warehouses, covering the accuracy of their responses, their computational requirements and development efforts. Performance of the warehouses was evaluated under various hardware configurations to help laboratories optimize hardware expenses. Each aspect of the benchmark may be dynamically weighted by scientists using our online tool BenchDW (http://warehousebenchmark.fungalgenomics.ca/benchmark/) to build custom warehouse profiles and tailor our results to their specific needs.",2014-07-01 +21803806,"Gee Fu: a sequence version and web-services database tool for genomic assembly, genome feature and NGS data.","

Summary

Scientists now use high-throughput sequencing technologies and short-read assembly methods to create draft genome assemblies in just days. Tools and pipelines like the assembler, and the workflow management environments make it easy for a non-specialist to implement complicated pipelines to produce genome assemblies and annotations very quickly. Such accessibility results in a proliferation of assemblies and associated files, often for many organisms. These assemblies get used as a working reference by lots of different workers, from a bioinformatician doing gene prediction or a bench scientist designing primers for PCR. Here we describe Gee Fu, a database tool for genomic assembly and feature data, including next-generation sequence alignments. Gee Fu is an instance of a Ruby-On-Rails web application on a feature database that provides web and console interfaces for input, visualization of feature data via AnnoJ, access to data through a web-service interface, an API for direct data access by Ruby scripts and access to feature data stored in BAM files. Gee Fu provides a platform for storing and sharing different versions of an assembly and associated features that can be accessed and updated by bench biologists and bioinformaticians in ways that are easy and useful for each.

Availability

http://tinyurl.com/geefu

Contact

dan.maclean@tsl.ac.uk.",2011-07-29 +25391400,"Hi-Corrector: a fast, scalable and memory-efficient package for normalizing large-scale Hi-C data.","

Unlabelled

Genome-wide proximity ligation assays, e.g. Hi-C and its variant TCC, have recently become important tools to study spatial genome organization. Removing biases from chromatin contact matrices generated by such techniques is a critical preprocessing step of subsequent analyses. The continuing decline of sequencing costs has led to an ever-improving resolution of the Hi-C data, resulting in very large matrices of chromatin contacts. Such large-size matrices, however, pose a great challenge on the memory usage and speed of its normalization. Therefore, there is an urgent need for fast and memory-efficient methods for normalization of Hi-C data. We developed Hi-Corrector, an easy-to-use, open source implementation of the Hi-C data normalization algorithm. Its salient features are (i) scalability-the software is capable of normalizing Hi-C data of any size in reasonable times; (ii) memory efficiency-the sequential version can run on any single computer with very limited memory, no matter how little; (iii) fast speed-the parallel version can run very fast on multiple computing nodes with limited local memory.

Availability and implementation

The sequential version is implemented in ANSI C and can be easily compiled on any system; the parallel version is implemented in ANSI C with the MPI library (a standardized and portable parallel environment designed for solving large-scale scientific problems). The package is freely available at http://zhoulab.usc.edu/Hi-Corrector/.",2014-11-12 +27086844,Potential environmental hazards of photovoltaic panel disposal: Discussion of Tammaro et al. (2015).,"In their recent publication in Journal of Hazardous Materials (http://dx.doi.org/10.1016/j.jhazmat.2015.12.018), Tammaro et al. evaluate the potential environmental impacts of an illegal disposal scenario of photovoltaic panels in the European Union. Critical assumptions that underlie the study's conclusions would benefit from clarification. A scenario of photovoltaic panels finely crushed and abandoned in nature is not supported with field breakage data, in which photovoltaic panels remain largely intact with a number of glass fractures or cracks, as opposed to breakage into cm-scale pieces. Fate and transport analysis is necessary to evaluate how leachate transforms and disperses in moving from the point of emissions to the point of exposure, prior to making comparisons with drinking water limits. Some hazardous metal content has declined in both crystalline silicon and thin film panels, including a 50% decline in semiconductor material intensity in CdTe thin film panels (g CdTe/W) from 2009 to 2015. Waste laws, recycling requirements and minimum treatment standards under the EU WEEE Directive, and illegal disposal rates affect the accuracy of forecasts of releasable metal amounts from PV panels in Europe through 2050.",2016-04-11 +25306238,VAS: a convenient web portal for efficient integration of genomic features with millions of genetic variants.,"

Background

High-throughput experimental methods have fostered the systematic detection of millions of genetic variants from any human genome. To help explore the potential biological implications of these genetic variants, software tools have been previously developed for integrating various types of information about these genomic regions from multiple data sources. Most of these tools were designed either for studying a small number of variants at a time, or for local execution on powerful machines.

Results

To make exploration of whole lists of genetic variants simple and accessible, we have developed a new Web-based system called VAS (Variant Annotation System, available at https://yiplab.cse.cuhk.edu.hk/vas/). It provides a large variety of information useful for studying both coding and non-coding variants, including whole-genome transcription factor binding, open chromatin and transcription data from the ENCODE consortium. By means of data compression, millions of variants can be uploaded from a client machine to the server in less than 50 megabytes of data. On the server side, our customized data integration algorithms can efficiently link millions of variants with tens of whole-genome datasets. These two enabling technologies make VAS a practical tool for annotating genetic variants from large genomic studies. We demonstrate the use of VAS in annotating genetic variants obtained from a migraine meta-analysis study and multiple data sets from the Personal Genomes Project. We also compare the running time of annotating 6.4 million SNPs of the CEU trio by VAS and another tool, showing that VAS is efficient in handling new variant lists without requiring any pre-computations.

Conclusions

VAS is specially designed to handle annotation tasks with long lists of genetic variants and large numbers of annotating features efficiently. It is complementary to other existing tools with more specific aims such as evaluating the potential impacts of genetic variants in terms of disease risk. We recommend using VAS for a quick first-pass identification of potentially interesting genetic variants, to minimize the time required for other more in-depth downstream analyses.",2014-10-11 +26407127,miRTex: A Text Mining System for miRNA-Gene Relation Extraction.,"MicroRNAs (miRNAs) regulate a wide range of cellular and developmental processes through gene expression suppression or mRNA degradation. Experimentally validated miRNA gene targets are often reported in the literature. In this paper, we describe miRTex, a text mining system that extracts miRNA-target relations, as well as miRNA-gene and gene-miRNA regulation relations. The system achieves good precision and recall when evaluated on a literature corpus of 150 abstracts with F-scores close to 0.90 on the three different types of relations. We conducted full-scale text mining using miRTex to process all the Medline abstracts and all the full-length articles in the PubMed Central Open Access Subset. The results for all the Medline abstracts are stored in a database for interactive query and file download via the website at http://proteininformationresource.org/mirtex. Using miRTex, we identified genes potentially regulated by miRNAs in Triple Negative Breast Cancer, as well as miRNA-gene relations that, in conjunction with kinase-substrate relations, regulate the response to abiotic stress in Arabidopsis thaliana. These two use cases demonstrate the usefulness of miRTex text mining in the analysis of miRNA-regulated biological processes.",2015-09-25 +22999295,Next-generation personalized drug discovery: the tripeptide GHK hits center stage in chronic obstructive pulmonary disease.,"Chronic lung diseases (CLDs), including chronic obstructive pulmonary disease (COPD), are the second leading cause of death worldwide. The first report of database-driven drug discovery in carefully phenotyped COPD specimens has now been published in Genome Medicine, combining gene expression data in defined emphysematous areas with connectivity-map-based compound discovery. This joint effort may lead the way to novel and potentially more efficient concepts of personalized drug discovery for COPD in particular, and CLD in general. See research article http://genomemedicine.com/content/4/8/67/abstract.",2012-09-21 +26406914,"De Novo Assembly of the Whole Transcriptome of the Wild Embryo, Preleptocephalus, Leptocephalus, and Glass Eel of Anguilla japonica and Deciphering the Digestive and Absorptive Capacities during Early Development.","Natural stocks of Japanese eel (Anguilla japonica) have decreased drastically because of overfishing, habitat destruction, and changes in the ocean environment over the past few decades. However, to date, artificial mass production of glass eels is far from reality because of the lack of appropriate feed for the eel larvae. In this study, wild glass eel, leptocephali, preleptocephali, and embryos were collected to conduct RNA-seq. Approximately 279 million reads were generated and assembled into 224,043 transcripts. The transcript levels of genes coding for digestive enzymes and nutrient transporters were investigated to estimate the capacities for nutrient digestion and absorption during early development. The results showed that the transcript levels of protein digestion enzymes were higher than those of carbohydrate and lipid digestion enzymes in the preleptocephali and leptocephali, and the transcript levels of amino acid transporters were also higher than those of glucose and fructose transporters and the cholesterol transporter. In addition, the transcript levels of glucose and fructose transporters were significantly raising in the leptocephali. Moreover, the transcript levels of protein, carbohydrate, and lipid digestion enzymes were balanced in the glass eel, but the transcript levels of amino acid transporters were higher than those of glucose and cholesterol transporters. These findings implied that preleptocephali and leptocephali prefer high-protein food, and the nutritional requirements of monosaccharides and lipids for the eel larvae vary with growth. An online database (http://molas.iis.sinica.edu.tw/jpeel/) that will provide the sequences and the annotated results of assembled transcripts was established for the eel research community.",2015-09-25 +25649271,Evaluation of commercially available RNA amplification kits for RNA sequencing using very low input amounts of total RNA.,"This article includes supplemental data. Please visit http://www.fasebj.org to obtain this information.Multiple recent publications on RNA sequencing (RNA-seq) have demonstrated the power of next-generation sequencing technologies in whole-transcriptome analysis. Vendor-specific protocols used for RNA library construction often require at least 100 ng total RNA. However, under certain conditions, much less RNA is available for library construction. In these cases, effective transcriptome profiling requires amplification of subnanogram amounts of RNA. Several commercial RNA amplification kits are available for amplification prior to library construction for next-generation sequencing, but these kits have not been comprehensively field evaluated for accuracy and performance of RNA-seq for picogram amounts of RNA. To address this, 4 types of amplification kits were tested with 3 different concentrations, from 5 ng to 50 pg, of a commercially available RNA. Kits were tested at multiple sites to assess reproducibility and ease of use. The human total reference RNA used was spiked with a control pool of RNA molecules in order to further evaluate quantitative recovery of input material. Additional control data sets were generated from libraries constructed following polyA selection or ribosomal depletion using established kits and protocols. cDNA was collected from the different sites, and libraries were synthesized at a single site using established protocols. Sequencing runs were carried out on the Illumina platform. Numerous metrics were compared among the kits and dilutions used. Overall, no single kit appeared to meet all the challenges of small input material. However, it is encouraging that excellent data can be recovered with even the 50 pg input total RNA.",2015-04-01 +27141091,Chemical entity recognition in patents by combining dictionary-based and statistical approaches. ,"We describe the development of a chemical entity recognition system and its application in the CHEMDNER-patent track of BioCreative 2015. This community challenge includes a Chemical Entity Mention in Patents (CEMP) recognition task and a Chemical Passage Detection (CPD) classification task. We addressed both tasks by an ensemble system that combines a dictionary-based approach with a statistical one. For this purpose the performance of several lexical resources was assessed using Peregrine, our open-source indexing engine. We combined our dictionary-based results on the patent corpus with the results of tmChem, a chemical recognizer using a conditional random field classifier. To improve the performance of tmChem, we utilized three additional features, viz. part-of-speech tags, lemmas and word-vector clusters. When evaluated on the training data, our final system obtained an F-score of 85.21% for the CEMP task, and an accuracy of 91.53% for the CPD task. On the test set, the best system ranked sixth among 21 teams for CEMP with an F-score of 86.82%, and second among nine teams for CPD with an accuracy of 94.23%. The differences in performance between the best ensemble system and the statistical system separately were small.Database URL: http://biosemantics.org/chemdner-patents.",2016-05-02 +24753486,aLFQ: an R-package for estimating absolute protein quantities from label-free LC-MS/MS proteomics data.,"

Motivation

The determination of absolute quantities of proteins in biological samples is necessary for multiple types of scientific inquiry. While relative quantification has been commonly used in proteomics, few proteomic datasets measuring absolute protein quantities have been reported to date. Various technologies have been applied using different types of input data, e.g. ion intensities or spectral counts, as well as different absolute normalization strategies. To date, a user-friendly and transparent software supporting large-scale absolute protein quantification has been lacking.

Results

We present a bioinformatics tool, termed aLFQ, which supports the commonly used absolute label-free protein abundance estimation methods (TopN, iBAQ, APEX, NSAF and SCAMPI) for LC-MS/MS proteomics data, together with validation algorithms enabling automated data analysis and error estimation.

Availability and implementation

aLFQ is written in R and freely available under the GPLv3 from CRAN (http://www.cran.r-project.org). Instructions and example data are provided in the R-package. The raw data can be obtained from the PeptideAtlas raw data repository (PASS00321).

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-20 +25550326,PRSice: Polygenic Risk Score software.,"

Summary

A polygenic risk score (PRS) is a sum of trait-associated alleles across many genetic loci, typically weighted by effect sizes estimated from a genome-wide association study. The application of PRS has grown in recent years as their utility for detecting shared genetic aetiology among traits has become appreciated; PRS can also be used to establish the presence of a genetic signal in underpowered studies, to infer the genetic architecture of a trait, for screening in clinical trials, and can act as a biomarker for a phenotype. Here we present the first dedicated PRS software, PRSice ('precise'), for calculating, applying, evaluating and plotting the results of PRS. PRSice can calculate PRS at a large number of thresholds (""high resolution"") to provide the best-fit PRS, as well as provide results calculated at broad P-value thresholds, can thin Single Nucleotide Polymorphisms (SNPs) according to linkage disequilibrium and P-value or use all SNPs, handles genotyped and imputed data, can calculate and incorporate ancestry-informative variables, and can apply PRS across multiple traits in a single run. We exemplify the use of PRSice via application to data on schizophrenia, major depressive disorder and smoking, illustrate the importance of identifying the best-fit PRS and estimate a P-value significance threshold for high-resolution PRS studies.

Availability and implementation

PRSice is written in R, including wrappers for bash data management scripts and PLINK-1.9 to minimize computational time. PRSice runs as a command-line program with a variety of user-options, and is freely available for download from http://PRSice.info

Contact

jack.euesden@kcl.ac.uk or paul.oreilly@kcl.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-29 +25182276,Crux: rapid open source protein tandem mass spectrometry analysis.,"Efficiently and accurately analyzing big protein tandem mass spectrometry data sets requires robust software that incorporates state-of-the-art computational, machine learning, and statistical methods. The Crux mass spectrometry analysis software toolkit ( http://cruxtoolkit.sourceforge.net ) is an open source project that aims to provide users with a cross-platform suite of analysis tools for interpreting protein mass spectrometry data.",2014-09-09 +21816093,A regression system for estimation of errors introduced by confocal imaging into gene expression data in situ.,"

Background

Accuracy of the data extracted from two-dimensional confocal images is limited due to experimental errors that arise in course of confocal scanning. The common way to reduce the noise in images is sequential scanning of the same specimen several times with the subsequent averaging of multiple frames. Attempts to increase the dynamical range of an image by setting too high values of microscope PMT parameters may cause clipping of single frames and introduce errors into the data extracted from the averaged images. For the estimation and correction of this kind of errors a method based on censoring technique (Myasnikova et al., 2009) is used. However, the method requires the availability of all the confocal scans along with the averaged image, which is normally not provided by the standard scanning procedure.

Results

To predict error size in the data extracted from the averaged image we developed a regression system. The system is trained on the learning sample composed of images obtained from three different microscopes at different combinations of PMT parameters, and for each image all the scans are saved. The system demonstrates high prediction accuracy and was applied for correction of errors in the data on segmentation gene expression in Drosophila blastoderm stored in the FlyEx database (http://urchin.spbcas.ru/flyex/, http://flyex.uchicago.edu/flyex/). The prediction method is realized as a software tool CorrectPattern freely available at http://urchin.spbcas.ru/asp/2011/emm/.

Conclusions

We created a regression system and software to predict the magnitude of errors in the data obtained from a confocal image based on information about microscope parameters used for the image acquisition. An important advantage of the developed prediction system is the possibility to accurately correct the errors in data obtained from strongly clipped images, thereby allowing to obtain images of the higher dynamical range and thus to extract more detailed quantitative information from them.",2011-08-04 +26612489,DASACT: A decision aiding software for axiomatic consensus theory.,"There have been various attempts, solutions, and approaches towards constructing an appropriate consensus tree based on a given set of phylogenetic trees. However, for practitioners, it is not always clear, for a given data set, which of these would create the most relevant consensus tree. In this paper, we introduce an open-source software called DASACT (Decision Aiding Software for Axiomatic Consensus Theory) created to assist practitioners on choosing the most appropriate consensus function. It is based on an exhaustive evaluation of axiomatic properties and consensus functions, which define the knowledge space as a concept lattice. Using a selection of axiomatic properties provided by the user, it is able to aid the user in choosing the most suitable function. DASACT is freely available at http://www.cs.unic.ac.cy/florent/software.htm.",2015-11-24 +23685612,The PhyloFacts FAT-CAT web server: ortholog identification and function prediction using fast approximate tree classification.,"The PhyloFacts 'Fast Approximate Tree Classification' (FAT-CAT) web server provides a novel approach to ortholog identification using subtree hidden Markov model-based placement of protein sequences to phylogenomic orthology groups in the PhyloFacts database. Results on a data set of microbial, plant and animal proteins demonstrate FAT-CAT's high precision at separating orthologs and paralogs and robustness to promiscuous domains. We also present results documenting the precision of ortholog identification based on subtree hidden Markov model scoring. The FAT-CAT phylogenetic placement is used to derive a functional annotation for the query, including confidence scores and drill-down capabilities. PhyloFacts' broad taxonomic and functional coverage, with >7.3 M proteins from across the Tree of Life, enables FAT-CAT to predict orthologs and assign function for most sequence inputs. Four pipeline parameter presets are provided to handle different sequence types, including partial sequences and proteins containing promiscuous domains; users can also modify individual parameters. PhyloFacts trees matching the query can be viewed interactively online using the PhyloScope Javascript tree viewer and are hyperlinked to various external databases. The FAT-CAT web server is available at http://phylogenomics.berkeley.edu/phylofacts/fatcat/.",2013-05-18 +26356253,OpenGrowth: An Automated and Rational Algorithm for Finding New Protein Ligands.,"We present a new open-source software, called OpenGrowth, which aims to create de novo ligands by connecting small organic fragments in the active site of proteins. Molecule growth is biased to produce structures that statistically resemble drugs in an input training database. Consequently, the produced molecules have superior synthetic accessibility and pharmacokinetic properties compared with randomly grown molecules. The growth process can take into account the flexibility of the target protein and can be started from a seed to mimic R-group strategy or fragment-based drug discovery. Primary applications of the software on the HIV-1 protease allowed us to quickly identify new inhibitors with a predicted Kd as low as 18 nM. We also present a graphical user interface that allows a user to select easily the fragments to include in the growth process. OpenGrowth is released under the GNU GPL license and is available free of charge on the authors' website and at http://opengrowth.sourceforge.net/ .",2015-09-23 +26411870,JBASE: Joint Bayesian Analysis of Subphenotypes and Epistasis.,"

Motivation

Rapid advances in genotyping and genome-wide association studies have enabled the discovery of many new genotype-phenotype associations at the resolution of individual markers. However, these associations explain only a small proportion of theoretically estimated heritability of most diseases. In this work, we propose an integrative mixture model called JBASE: joint Bayesian analysis of subphenotypes and epistasis. JBASE explores two major reasons of missing heritability: interactions between genetic variants, a phenomenon known as epistasis and phenotypic heterogeneity, addressed via subphenotyping.

Results

Our extensive simulations in a wide range of scenarios repeatedly demonstrate that JBASE can identify true underlying subphenotypes, including their associated variants and their interactions, with high precision. In the presence of phenotypic heterogeneity, JBASE has higher Power and lower Type 1 Error than five state-of-the-art approaches. We applied our method to a sample of individuals from Mexico with Type 2 diabetes and discovered two novel epistatic modules, including two loci each, that define two subphenotypes characterized by differences in body mass index and waist-to-hip ratio. We successfully replicated these subphenotypes and epistatic modules in an independent dataset from Mexico genotyped with a different platform.

Availability and implementation

JBASE is implemented in C++, supported on Linux and is available at http://www.cs.toronto.edu/∼goldenberg/JBASE/jbase.tar.gz. The genotype data underlying this study are available upon approval by the ethics review board of the Medical Centre Siglo XXI. Please contact Dr Miguel Cruz at mcruzl@yahoo.com for assistance with the application.

Contact

anna.goldenberg@utoronto.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-26 +25480376,SMARTS: reconstructing disease response networks from multiple individuals using time series gene expression data.,"

Motivation

Current methods for reconstructing dynamic regulatory networks are focused on modeling a single response network using model organisms or cell lines. Unlike these models or cell lines, humans differ in their background expression profiles due to age, genetics and life factors. In addition, there are often differences in start and end times for time series human data and in the rate of progress based on the specific individual. Thus, new methods are required to integrate time series data from multiple individuals when modeling and constructing disease response networks.

Results

We developed Scalable Models for the Analysis of Regulation from Time Series (SMARTS), a method integrating static and time series data from multiple individuals to reconstruct condition-specific response networks in an unsupervised way. Using probabilistic graphical models, SMARTS iterates between reconstructing different regulatory networks and assigning individuals to these networks, taking into account varying individual start times and response rates. These models can be used to group different sets of patients and to identify transcription factors that differentiate the observed responses between these groups. We applied SMARTS to analyze human response to influenza and mouse brain development. In both cases, it was able to greatly improve baseline groupings while identifying key relevant TFs that differ between the groups. Several of these groupings and TFs are known to regulate the relevant processes while others represent novel hypotheses regarding immune response and development.

Availability and implementation

Software and Supplementary information are available at http://sb.cs.cmu.edu/smarts/.

Contact

zivbj@cs.cmu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-04 +25078381,A guide for the utilization of Health Insurance Review and Assessment Service National Patient Samples.,"The claims data of the Health Insurance Review and Assessment Service (HIRA) is an important source of information for healthcare service research. The claims data of HIRA is collected when healthcare service providers submit a claim to HIRA to be reimbursed for a service that they provided to patients. To improve the accessibility of healthcare service researchers to claims data of HIRA, HIRA has developed the Patient Samples which are extracted using a stratified randomized sampling method. The Patient Samples of HIRA consist of five tables: a table for general information (Table 20) containing socio-demographic information such as gender, age and medical aid, indicators for inpatient and outpatient services; a table for specific information on healthcare services provided (Table 30); a table for diagnostic information (Table 40); a table for outpatient prescriptions (Table 53) and a table for information on healthcare service providers (Table of providers). Researchers who are interested in using the Patient Sample data for research can apply via HIRA's website (https://www.hira.or.kr).",2014-07-30 +26333163,Classification of Amino Acid Substitutions in Mismatch Repair Proteins Using PON-MMR2.,"Variations in mismatch repair (MMR) system genes are causative of Lynch syndrome and other cancers. Thousands of variants have been identified in MMR genes, but the clinical relevance is known for only a small proportion. Recently, the InSiGHT group classified 2,360 MMR variants into five classes. One-third of variants, majority of which is nonsynonymous variants, remain to be of uncertain clinical relevance. Computational tools can be used to prioritize variants for disease relevance investigations. Previously, we classified 248 MMR variants as likely pathogenic and likely benign using PON-MMR. We have developed a novel tool, PON-MMR2, which is trained on a larger and more reliable dataset. In performance comparison, PON-MMR2 outperforms both generic tolerance prediction methods as well as methods optimized for MMR variants. It achieves accuracy and MCC of 0.89 and 0.78, respectively, in cross-validation and 0.86 and 0.69, respectively, on an independent test dataset. We classified 354 class 3 variants in InSiGHT database as well as all possible amino acid substitutions in four MMR proteins. Likely harmful variants mainly appear in the protein core, whereas likely benign variants are on the surface. PON-MMR2 is a highly reliable tool to prioritize variants for functional analysis. It is freely available at http://structure.bmc.lu.se/PON-MMR2/.",2015-09-22 +24981074,DegPack: a web package using a non-parametric and information theoretic algorithm to identify differentially expressed genes in multiclass RNA-seq samples.,"Gene expression in the whole cell can be routinely measured by microarray technologies or recently by using sequencing technologies. Using these technologies, identifying differentially expressed genes (DEGs) among multiple phenotypes is the very first step to understand difference between phenotypes. Thus many methods for detecting DEGs between two groups have been developed. For example, T-test and relative entropy are used for detecting difference between two probability distributions. When more than two phenotypes are considered, these methods are not applicable and other methods such as ANOVA F-test and Kruskal-Wallis are used for finding DEGs in the multiclass data. However, ANOVA F-test assumes a normal distribution and it is not designed to identify DEGs where genes are expressed distinctively in each of phenotypes. Kruskal-Wallis method, a non-parametric method, is more robust but sensitive to outliers. In this paper, we propose a non-parametric and information theoretical approach for identifying DEGs. Our method identified DEGs effectively and it is shown less sensitive to outliers in two data sets: a three-class drought resistant rice data set and a three-class breast cancer data set. In extensive experiments with simulated and real data, our method was shown to outperform existing tools in terms of accuracy of characterizing phenotypes using DEGs. A web service is implemented at http://biohealth.snu.ac.kr/software/degpack for the analysis of multi-class data and it includes SAMseq and PoissonSeq methods in addition to the method described in this paper.",2014-06-26 +26400163,PlantDHS: a database for DNase I hypersensitive sites in plants.,"Gene expression is regulated by orchestrated binding of regulatory proteins to promoters and other cis-regulatory DNA elements (CREs). Several plant databases have been developed for mapping promoters or DNA motifs associated with promoters. However, there is a lack of databases that allow investigation for all CREs. Here we present PlantDHS (http://plantdhs.org), a plant DNase I hypersensitive site (DHS) database that integrates histone modification, RNA sequencing, nucleosome positioning/occupancy, transcription factor binding sites, and genomic sequence within an easily navigated user interface. DHSs are indicative of all CREs, including promoters, enhancers, silencers, insulators and transcription factor binding sites; all of which play immense roles in global gene expression regulation. PlantDHS provides a platform to predict all CREs associated with individual genes from three model plant species, including Arabidopsis thaliana, Brachypodium distachyon and rice (Oryza sativa). PlantDHS is especially valuable in the detection of distant CREs that are located away from promoters.",2015-09-22 +25684545,Clumpak: a program for identifying clustering modes and packaging population structure inferences across K.,"The identification of the genetic structure of populations from multilocus genotype data has become a central component of modern population-genetic data analysis. Application of model-based clustering programs often entails a number of steps, in which the user considers different modelling assumptions, compares results across different predetermined values of the number of assumed clusters (a parameter typically denoted K), examines multiple independent runs for each fixed value of K, and distinguishes among runs belonging to substantially distinct clustering solutions. Here, we present Clumpak (Cluster Markov Packager Across K), a method that automates the postprocessing of results of model-based population structure analyses. For analysing multiple independent runs at a single K value, Clumpak identifies sets of highly similar runs, separating distinct groups of runs that represent distinct modes in the space of possible solutions. This procedure, which generates a consensus solution for each distinct mode, is performed by the use of a Markov clustering algorithm that relies on a similarity matrix between replicate runs, as computed by the software Clumpp. Next, Clumpak identifies an optimal alignment of inferred clusters across different values of K, extending a similar approach implemented for a fixed K in Clumpp and simplifying the comparison of clustering results across different K values. Clumpak incorporates additional features, such as implementations of methods for choosing K and comparing solutions obtained by different programs, models, or data subsets. Clumpak, available at http://clumpak.tau.ac.il, simplifies the use of model-based analyses of population structure in population genetics and molecular ecology.",2015-02-27 +23156680,[Protein families specific for plastoms in small taxonomy groups of algae and protozoa].,Protein clustering is useful for refinement of protein annotation as well as cluster finding by its phylogenetic profile. We performed clustering of plastid encoded proteins from Rhodophyta as well as other plastid containing species related to Rhodophyta branch on species tree. Data base for cluster finding by its phylogenetic profile is available on http://lab6.iitp.ru/ppc/redline. By means of the database distinctive proteins for plastoms from small taxonomy groups of algae and protozoa were found. We performed finding and analysis of RNA polymerases encoded in Apicomplexa nuclei.,2012-09-01 +25411634,Annotation of phenotypic diversity: decoupling data curation and ontology curation using Phenex.,"

Background

Phenex (http://phenex.phenoscape.org/) is a desktop application for semantically annotating the phenotypic character matrix datasets common in evolutionary biology. Since its initial publication, we have added new features that address several major bottlenecks in the efficiency of the phenotype curation process: allowing curators during the data curation phase to provisionally request terms that are not yet available from a relevant ontology; supporting quality control against annotation guidelines to reduce later manual review and revision; and enabling the sharing of files for collaboration among curators.

Results

We decoupled data annotation from ontology development by creating an Ontology Request Broker (ORB) within Phenex. Curators can use the ORB to request a provisional term for use in data annotation; the provisional term can be automatically replaced with a permanent identifier once the term is added to an ontology. We added a set of annotation consistency checks to prevent common curation errors, reducing the need for later correction. We facilitated collaborative editing by improving the reliability of Phenex when used with online folder sharing services, via file change monitoring and continual autosave.

Conclusions

With the addition of these new features, and in particular the Ontology Request Broker, Phenex users have been able to focus more effectively on data annotation. Phenoscape curators using Phenex have reported a smoother annotation workflow, with much reduced interruptions from ontology maintenance and file management issues.",2014-11-05 +26456067,"Forensic Loci Allele Database (FLAD): Automatically generated, permanent identifiers for sequenced forensic alleles.","It is difficult to predict if and when massively parallel sequencing of forensic STR loci will replace capillary electrophoresis as the new standard technology in forensic genetics. The main benefits of sequencing are increased multiplexing scales and SNP detection. There is not yet a consensus on how sequenced profiles should be reported. We present the Forensic Loci Allele Database (FLAD) service, made freely available on http://forensic.ugent.be/FLAD/. It offers permanent identifiers for sequenced forensic alleles (STR or SNP) and their microvariants for use in forensic allele nomenclature. Analogous to Genbank, its aim is to provide permanent identifiers for forensically relevant allele sequences. Researchers that are developing forensic sequencing kits or are performing population studies, can register on http://forensic.ugent.be/FLAD/ and add loci and allele sequences with a short and simple application interface (API).",2015-09-21 +22782549,dbSNO: a database of cysteine S-nitrosylation.,"

Unlabelled

S-nitrosylation (SNO), a selective and reversible protein post-translational modification that involves the covalent attachment of nitric oxide (NO) to the sulfur atom of cysteine, critically regulates protein activity, localization and stability. Due to its importance in regulating protein functions and cell signaling, a mass spectrometry-based proteomics method rapidly evolved to increase the dataset of experimentally determined SNO sites. However, there is currently no database dedicated to the integration of all experimentally verified S-nitrosylation sites with their structural or functional information. Thus, the dbSNO database is created to integrate all available datasets and to provide their structural analysis. Up to April 15, 2012, the dbSNO has manually accumulated >3000 experimentally verified S-nitrosylated peptides from 219 research articles using a text mining approach. To solve the heterogeneity among the data collected from different sources, the sequence identity of these reported S-nitrosylated peptides are mapped to the UniProtKB protein entries. To delineate the structural correlation and consensus motif of these SNO sites, the dbSNO database also provides structural and functional analyses, including the motifs of substrate sites, solvent accessibility, protein secondary and tertiary structures, protein domains and gene ontology.

Availability

The dbSNO is now freely accessible via http://dbSNO.mbc.nctu.edu.tw. The database content is regularly updated upon collecting new data obtained from continuously surveying research articles.",2012-07-10 +27131377,Dali server update.,"The Dali server (http://ekhidna2.biocenter.helsinki.fi/dali) is a network service for comparing protein structures in 3D. In favourable cases, comparing 3D structures may reveal biologically interesting similarities that are not detectable by comparing sequences. The Dali server has been running in various places for over 20 years and is used routinely by crystallographers on newly solved structures. The latest update of the server provides enhanced analytics for the study of sequence and structure conservation. The server performs three types of structure comparisons: (i) Protein Data Bank (PDB) search compares one query structure against those in the PDB and returns a list of similar structures; (ii) pairwise comparison compares one query structure against a list of structures specified by the user; and (iii) all against all structure comparison returns a structural similarity matrix, a dendrogram and a multidimensional scaling projection of a set of structures specified by the user. Structural superimpositions are visualized using the Java-free WebGL viewer PV. The structural alignment view is enhanced by sequence similarity searches against Uniprot. The combined structure-sequence alignment information is compressed to a stack of aligned sequence logos. In the stack, each structure is structurally aligned to the query protein and represented by a sequence logo.",2016-04-29 +21893519,Integrated pathway-level analysis of transcriptomics and metabolomics data with IMPaLA.,"

Summary

Pathway-level analysis is a powerful approach enabling interpretation of post-genomic data at a higher level than that of individual biomolecules. Yet, it is currently hard to integrate more than one type of omics data in such an approach. Here, we present a web tool 'IMPaLA' for the joint pathway analysis of transcriptomics or proteomics and metabolomics data. It performs over-representation or enrichment analysis with user-specified lists of metabolites and genes using over 3000 pre-annotated pathways from 11 databases. As a result, pathways can be identified that may be disregulated on the transcriptional level, the metabolic level or both. Evidence of pathway disregulation is combined, allowing for the identification of additional pathways with changed activity that would not be highlighted when analysis is applied to any of the functional levels alone. The tool has been implemented both as an interactive website and as a web service to allow a programming interface.

Availability

The web interface of IMPaLA is available at http://impala.molgen.mpg.de. A web services programming interface is provided at http://impala.molgen.mpg.de/wsdoc.

Contact

kamburov@molgen.mpg.de; r.cavill@imperial.ac.uk; h.keun@imperial.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-04 +22218860,The integration and annotation of the human interactome in the UniHI Database.,"In recent years, remarkable progress has been made toward the systematic charting of human protein interactions. The utilization of the generated interaction data remained however challenging for biomedical researchers due to lack of integration of currently available resources. To facilitate the direct access and analysis of the human interactome, we have developed the Unified Human Interactome (UniHI) database. It provides researchers with a user-friendly Web-interface and integrates interaction data from 12 major resources in its latest version, establishing one of the largest catalogs for human PPIs worldwide. At present, UniHI houses over 250,000 distinct interactions between 22,300 unique proteins and is publically available at http://www.unihi.org.",2012-01-01 +23692364,Disease co-morbidity and the human Wnt signaling pathway: a network-wise study.,"The human Wnt signaling pathway contains 57 genes communicating among themselves by 70 experimentally established associations, as given in the KEGG/PATHWAY database. It is responsible for a variety of crucial biological functions such as regulation of cell fate determination, proliferation, differentiation, migration, and apoptosis. Abnormal behavior of its members causes numerous types of human cancers, dramatic changes in bone mass density that lead to diseases such as osteoporosis-pseudo-glioma syndrome, Van-Buchem disease, skeletal malformation, autosomal dominant sclerosteosis, and osteoporosis type I syndromes. So far, single genes have been investigated for their disease-causing properties, and single diseases have been traced backwards to discover foul-play of the system pathways. Differential expression of the whole genome has been mapped by microarray. But how all the genes involved in a pathway affect each other in single/multiple disease state(s) and whether the presence of one disease state makes a person prone to another kind of disease(s) (i.e., co-morbidity among diseases associated with a certain important biological pathway) is still unknown. We have developed a human Wnt signaling pathway diseasome and analyzed it for finding answers to such questions. Data used in constructing the diseasome can be downloaded from the publicly accessible webserver http://www.isical.ac.in/-rajat/diseasome/index.php.",2013-06-01 +22984983,A pipeline for automated annotation of yeast genome sequences by a conserved-synteny approach.,"

Background

Yeasts are a model system for exploring eukaryotic genome evolution. Next-generation sequencing technologies are poised to vastly increase the number of yeast genome sequences, both from resequencing projects (population studies) and from de novo sequencing projects (new species). However, the annotation of genomes presents a major bottleneck for de novo projects, because it still relies on a process that is largely manual.

Results

Here we present the Yeast Genome Annotation Pipeline (YGAP), an automated system designed specifically for new yeast genome sequences lacking transcriptome data. YGAP does automatic de novo annotation, exploiting homology and synteny information from other yeast species stored in the Yeast Gene Order Browser (YGOB) database. The basic premises underlying YGAP's approach are that data from other species already tells us what genes we should expect to find in any particular genomic region and that we should also expect that orthologous genes are likely to have similar intron/exon structures. Additionally, it is able to detect probable frameshift sequencing errors and can propose corrections for them. YGAP searches intelligently for introns, and detects tRNA genes and Ty-like elements.

Conclusions

In tests on Saccharomyces cerevisiae and on the genomes of Naumovozyma castellii and Tetrapisispora blattae newly sequenced with Roche-454 technology, YGAP outperformed another popular annotation program (AUGUSTUS). For S. cerevisiae and N. castellii, 91-93% of YGAP's predicted gene structures were identical to those in previous manually curated gene sets. YGAP has been implemented as a webserver with a user-friendly interface at http://wolfe.gen.tcd.ie/annotation.",2012-09-17 +26928531,conSSert: Consensus SVM Model for Accurate Prediction of Ordered Secondary Structure.,"Accurate prediction of protein secondary structure remains a crucial step in most approaches to the protein-folding problem, yet the prediction of ordered secondary structure, specifically beta-strands, remains a challenge. We developed a consensus secondary structure prediction method, conSSert, which is based on support vector machines (SVM) and provides exceptional accuracy for the prediction of beta-strands with QE accuracy of over 0.82 and a Q2-EH of 0.86. conSSert uses as input probabilities for the three types of secondary structure (helix, strand, and coil) that are predicted by four top performing methods: PSSpred, PSIPRED, SPINE-X, and RAPTOR. conSSert was trained/tested using 4261 protein chains from PDBSelect25, and 8632 chains from PISCES. Further validation was performed using targets from CASP9, CASP10, and CASP11. Our data suggest that poor performance in strand prediction is likely a result of training bias and not solely due to the nonlocal nature of beta-sheet contacts. conSSert is freely available for noncommercial use as a webservice: http://ares.tamu.edu/conSSert/.",2016-03-15 +26722110,Clinafloxacin for Treatment of Burkholderia cenocepacia Infection in a Cystic Fibrosis Patient.,"Respiratory infection with Burkholderia cenocepacia is associated with accelerated decline in lung function and increased mortality in cystic fibrosis (CF) patients (A. M. Jones, M. E. Dodd, J. R. W. Govan, V. Barcus, C. J. Doherty, J. Morris, and A. K. Webb, Thorax 59:948-951, 2004, http://dx.doi.org/10.1136/thx.2003.017210). B. cenocepacia often possesses innate resistance to multiple antimicrobial classes, making eradication uncommon in established infection (P. B. Davis, Am J Respir Crit Care Med 173:475-482, 2006, http://dx.doi.org/10.1164/rccm.200505-840OE). We report the use of clinafloxacin in a CF patient with advanced B. cenocepacia infection, present pharmacokinetic (PK) data, and discuss the potential therapeutic role of clinafloxacin in patients with this condition.",2015-12-31 +26356868,RNA-Seq Analysis Pipeline Based on Oshell Environment.,"Advances in transcriptome sequencing (RNA-Seq) have revolutionized the way to characterize and quantify transcripts. The breakthroughs in RNA-Seq technologies give rise to the ever-increasing volumes of data, making data processing the bottleneck of transcriptome research. It becomes crucial to develop an efficient analysis pipeline to automate RNA-Seq data analysis. Based on Oshell environment, we present here an ultra-fast and powerful RNA-Seq analysis pipeline for quality control, sequence alignment, variation detection, expression quantification and junction discovery. The pipeline runs on both Linux and Windows operating systems, with either stand-alone or cluster computing environment. Parallel computing is also supported for improved processing speed. Oshell is free for non-commercial use at http://omicsoft.com/oshell.",2014-09-01 +23193275,"The NIH genetic testing registry: a new, centralized database of genetic tests to enable access to comprehensive information and improve transparency.","The National Institutes of Health Genetic Testing Registry (GTR; available online at http://www.ncbi.nlm.nih.gov/gtr/) maintains comprehensive information about testing offered worldwide for disorders with a genetic basis. Information is voluntarily submitted by test providers. The database provides details of each test (e.g. its purpose, target populations, methods, what it measures, analytical validity, clinical validity, clinical utility, ordering information) and laboratory (e.g. location, contact information, certifications and licenses). Each test is assigned a stable identifier of the format GTR000000000, which is versioned when the submitter updates information. Data submitted by test providers are integrated with basic information maintained in National Center for Biotechnology Information's databases and presented on the web and through FTP (ftp.ncbi.nih.gov/pub/GTR/_README.html).",2012-11-27 +22661649,MobiDB: a comprehensive database of intrinsic protein disorder annotations.,"

Motivation

Disordered protein regions are key to the function of numerous processes within an organism and to the determination of a protein's biological role. The most common source for protein disorder annotations, DisProt, covers only a fraction of the available sequences. Alternatively, the Protein Data Bank (PDB) has been mined for missing residues in X-ray crystallographic structures. Herein, we provide a centralized source for data on different flavours of disorder in protein structures, MobiDB, building on and expanding the content provided by already existing sources. In addition to the DisProt and PDB X-ray structures, we have added experimental information from NMR structures and five different flavours of two disorder predictors (ESpritz and IUpred). These are combined into a weighted consensus disorder used to classify disordered regions into flexible and constrained disorder. Users are encouraged to submit manual annotations through a submission form. MobiDB features experimental annotations for 17 285 proteins, covering the entire PDB and predictions for the SwissProt database, with 565 200 annotated sequences. Depending on the disorder flavour, 6-20% of the residues are predicted as disordered.

Availability

The database is freely available at http://mobidb.bio.unipd.it/.

Contact

silvio.tosatto@unipd.it.",2012-06-01 +22120205,Using text mining to link journal articles to neuroanatomical databases.,"The electronic linking of neuroscience information, including data embedded in the primary literature, would permit powerful queries and analyses driven by structured databases. This task would be facilitated by automated procedures that can identify biological concepts in journals. Here we apply an approach for automatically mapping formal identifiers of neuroanatomical regions to text found in journal abstracts, applying it to a large body of abstracts from the Journal of Comparative Neurology (JCN). The analyses yield over 100,000 brain region mentions, which we map to 8,225 brain region concepts in multiple organisms. Based on the analysis of a manually annotated corpus, we estimate mentions are mapped at 95% precision and 63% recall. Our results provide insights into the patterns of publication on brain regions and species of study in JCN but also point to important challenges in the standardization of neuroanatomical nomenclatures. We find that many terms in the formal terminologies never appear in a JCN abstract, and, conversely, many terms that authors use are not reflected in the terminologies. To improve the terminologies, we deposited 136 unrecognized brain regions into the Neuroscience Lexicon (NeuroLex). The training data, terminologies, normalizations, evaluations, and annotated journal abstracts are freely available at http://www.chibi.ubc.ca/WhiteText/.",2012-06-01 +23826941,The systems biology simulation core algorithm.,"

Background

With the increasing availability of high dimensional time course data for metabolites, genes, and fluxes, the mathematical description of dynamical systems has become an essential aspect of research in systems biology. Models are often encoded in formats such as SBML, whose structure is very complex and difficult to evaluate due to many special cases.

Results

This article describes an efficient algorithm to solve SBML models that are interpreted in terms of ordinary differential equations. We begin our consideration with a formal representation of the mathematical form of the models and explain all parts of the algorithm in detail, including several preprocessing steps. We provide a flexible reference implementation as part of the Systems Biology Simulation Core Library, a community-driven project providing a large collection of numerical solvers and a sophisticated interface hierarchy for the definition of custom differential equation systems. To demonstrate the capabilities of the new algorithm, it has been tested with the entire SBML Test Suite and all models of BioModels Database.

Conclusions

The formal description of the mathematics behind the SBML format facilitates the implementation of the algorithm within specifically tailored programs. The reference implementation can be used as a simulation backend for Java™-based programs. Source code, binaries, and documentation can be freely obtained under the terms of the LGPL version 3 from http://simulation-core.sourceforge.net. Feature requests, bug reports, contributions, or any further discussion can be directed to the mailing list simulation-core-development@lists.sourceforge.net.",2013-07-05 +26913188,GOsummaries: an R Package for Visual Functional Annotation of Experimental Data.,"Functional characterisation of gene lists using Gene Ontology (GO) enrichment analysis is a common approach in computational biology, since many analysis methods end up with a list of genes as a result. Often there can be hundreds of functional terms that are significantly associated with a single list of genes and proper interpretation of such results can be a challenging endeavour. There are methods to visualise and aid the interpretation of these results, but most of them are limited to the results associated with one list of genes. However, in practice the number of gene lists can be considerably higher and common tools are not effective in such situations. We introduce a novel R package, 'GOsummaries' that visualises the GO enrichment results as concise word clouds that can be combined together if the number of gene lists is larger. By also adding the graphs of corresponding raw experimental data, GOsummaries can create informative summary plots for various analyses such as differential expression or clustering. The case studies show that the GOsummaries plots allow rapid functional characterisation of complex sets of gene lists. The GOsummaries approach is particularly effective for Principal Component Analysis (PCA). By adding functional annotation to the principal components, GOsummaries improves  significantly the interpretability of PCA results. The GOsummaries layout for PCA can be effective even in situations where we cannot directly apply the GO analysis. For example, in case of metabolomics or metagenomics data it is possible to show the features with significant associations to the components instead of GO terms.   The GOsummaries package is available under GPL-2 licence at Bioconductor (http://www.bioconductor.org/packages/release/bioc/html/GOsummaries.html).",2015-08-18 +23390356,BIRS - Bioterrorism Information Retrieval System.,"

Unlabelled

Bioterrorism is the intended use of pathogenic strains of microbes to widen terror in a population. There is a definite need to promote research for development of vaccines, therapeutics and diagnostic methods as a part of preparedness to any bioterror attack in the future. BIRS is an open-access database of collective information on the organisms related to bioterrorism. The architecture of database utilizes the current open-source technology viz PHP ver 5.3.19, MySQL and IIS server under windows platform for database designing. Database stores information on literature, generic- information and unique pathways of about 10 microorganisms involved in bioterrorism. This may serve as a collective repository to accelerate the drug discovery and vaccines designing process against such bioterrorist agents (microbes). The available data has been validated from various online resources and literature mining in order to provide the user with a comprehensive information system.

Availability

The database is freely available at http://www.bioterrorism.biowaves.org.",2013-01-18 +22041966,In the clinic. Atopic dermatitis (eczema).,"This issue provides a clinical overview of atopic dermatitis (exzema) focusing on prevention, diagnosis, treatment, practice improvement, and patient information. Readers can complete the accompanying CME quiz for 1.5 credits. Only ACP members and individual subscribers can access the electronic features of In the Clinic. Non-subscribers who wish to access this issue of In the Clinic can elect ""Pay for View."" Subscribers can receive 1.5 category 1 CME credits by completing the CME quiz that accompanies this issue of In the Clinic. The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians' Information and Education Resource) and MKSAP (Medical Knowledge and Self Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP's Medical Education and Publishing division and with assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult www.acponline.org, http://pier.acponline.org, and other resources referenced within each issue of In the Clinic.",2011-11-01 +26384373,FARE-CAFE: a database of functional and regulatory elements of cancer-associated fusion events. ,"Chromosomal translocation (CT) is of enormous clinical interest because this disorder is associated with various major solid tumors and leukemia. A tumor-specific fusion gene event may occur when a translocation joins two separate genes. Currently, various CT databases provide information about fusion genes and their genomic elements. However, no database of the roles of fusion genes, in terms of essential functional and regulatory elements in oncogenesis, is available. FARE-CAFE is a unique combination of CTs, fusion proteins, protein domains, domain-domain interactions, protein-protein interactions, transcription factors and microRNAs, with subsequent experimental information, which cannot be found in any other CT database. Genomic DNA information including, for example, manually collected exact locations of the first and second break points, sequences and karyotypes of fusion genes are included. FARE-CAFE will substantially facilitate the cancer biologist's mission of elucidating the pathogenesis of various types of cancer. This database will ultimately help to develop 'novel' therapeutic approaches. Database URL: http://ppi.bioinfo.asia.edu.tw/FARE-CAFE.",2015-09-16 +22180793,"Toward an open-access global database for mapping, control, and surveillance of neglected tropical diseases.","

Background

After many years of general neglect, interest has grown and efforts came under way for the mapping, control, surveillance, and eventual elimination of neglected tropical diseases (NTDs). Disease risk estimates are a key feature to target control interventions, and serve as a benchmark for monitoring and evaluation. What is currently missing is a georeferenced global database for NTDs providing open-access to the available survey data that is constantly updated and can be utilized by researchers and disease control managers to support other relevant stakeholders. We describe the steps taken toward the development of such a database that can be employed for spatial disease risk modeling and control of NTDs.

Methodology

With an emphasis on schistosomiasis in Africa, we systematically searched the literature (peer-reviewed journals and 'grey literature'), contacted Ministries of Health and research institutions in schistosomiasis-endemic countries for location-specific prevalence data and survey details (e.g., study population, year of survey and diagnostic techniques). The data were extracted, georeferenced, and stored in a MySQL database with a web interface allowing free database access and data management.

Principal findings

At the beginning of 2011, our database contained more than 12,000 georeferenced schistosomiasis survey locations from 35 African countries available under http://www.gntd.org. Currently, the database is expanded to a global repository, including a host of other NTDs, e.g. soil-transmitted helminthiasis and leishmaniasis.

Conclusions

An open-access, spatially explicit NTD database offers unique opportunities for disease risk modeling, targeting control interventions, disease monitoring, and surveillance. Moreover, it allows for detailed geostatistical analyses of disease distribution in space and time. With an initial focus on schistosomiasis in Africa, we demonstrate the proof-of-concept that the establishment and running of a global NTD database is feasible and should be expanded without delay.",2011-12-13 +22359445,"Geno viewer, a SAM/BAM viewer tool.","

Unlabelled

The ever evolving Next Generation Sequencing technology is calling for new and innovative ways of data processing and visualization. Following a detailed survey of the current needs of researchers and service providers, the authors have developed GenoViewer: a highly user-friendly, easy-to-operate SAM/BAM viewer and aligner tool. GenoViewer enables fast and efficient NGS assembly browsing, analysis and read mapping. It is highly customized, making it suitable for a wide range of NGS related tasks. Due to its relatively simple architecture, it is easy to add specialised visualization functionalities, facilitating further customised data analysis. The software's source code is freely available; it is open for project and task-specific modifications.

Availability

The database is available for free at http://www.genoviewer.com/",2012-01-20 +21542900,CompaGB: An open framework for genome browsers comparison.,"

Background

Tools to visualize and explore genomes hold a central place in genomics and the diversity of genome browsers has increased dramatically over the last few years. It often turns out to be a daunting task to compare and choose a well-adapted genome browser, as multidisciplinary knowledge is required to carry out this task and the number of tools, functionalities and features are overwhelming.

Findings

To assist in this task, we propose a community-based framework based on two cornerstones: (i) the implementation of industry promoted software qualification method (QSOS) adapted for genome browser evaluations, and (ii) a web resource providing numerous facilities either for visualizing comparisons or performing new evaluations. We formulated 60 criteria specifically for genome browsers, and incorporated another 65 directly from QSOS's generic section. Those criteria aim to answer versatile needs, ranging from a biologist whose interest primarily lies into user-friendly and informative functionalities, a bioinformatician who wants to integrate the genome browser into a wider framework, or a computer scientist who might choose a software according to more technical features. We developed a dedicated web application to enrich the existing QSOS functionalities (weighting of criteria, user profile) with features of interest to a community-based framework: easy management of evolving data, user comments...

Conclusions

The framework is available at http://genome.jouy.inra.fr/CompaGB. It is open to anyone who wishes to participate in the evaluations. It helps the scientific community to (1) choose a genome browser that would better fit their particular project, (2) visualize features comparatively with easily accessible formats, such as tables or radar plots and (3) perform their own evaluation against the defined criteria. To illustrate the CompaGB functionalities, we have evaluated seven genome browsers according to the implemented methodology. A summary of the features of the compared genome browsers is presented and discussed.",2011-05-04 +24793019,"A reliable, low-cost picture archiving and communications system for small and medium veterinary practices built using open-source technology.","Picture Archiving and Communications Systems (PACS) are the most needed system in a modern hospital. As an integral part of the Digital Imaging and Communications in Medicine (DICOM) standard, they are charged with the responsibility for secure storage and accessibility of the diagnostic imaging data. These machines need to offer high performance, stability, and security while proving reliable and ergonomic in the day-to-day and long-term storage and retrieval of the data they safeguard. This paper reports the experience of the authors in developing and installing a compact and low-cost solution based on open-source technologies in the Veterinary Teaching Hospital for the University of Torino, Italy, during the course of the summer of 2012. The PACS server was built on low-cost x86-based hardware and uses an open source operating system derived from Oracle OpenSolaris (Oracle Corporation, Redwood City, CA, USA) to host the DCM4CHEE PACS DICOM server (DCM4CHEE, http://www.dcm4che.org ). This solution features very high data security and an ergonomic interface to provide easy access to a large amount of imaging data. The system has been in active use for almost 2 years now and has proven to be a scalable, cost-effective solution for practices ranging from small to very large, where the use of different hardware combinations allows scaling to the different deployments, while the use of paravirtualization allows increased security and easy migrations and upgrades.",2014-10-01 +23667459,The PARIGA server for real time filtering and analysis of reciprocal BLAST results.,"BLAST-based similarity searches are commonly used in several applications involving both nucleotide and protein sequences. These applications span from simple tasks such as mapping sequences over a database to more complex procedures as clustering or annotation processes. When the amount of analysed data increases, manual inspection of BLAST results become a tedious procedure. Tools for parsing or filtering BLAST results for different purposes are then required. We describe here PARIGA (http://resources.bioinformatica.crs4.it/pariga/), a server that enables users to perform all-against-all BLAST searches on two sets of sequences selected by the user. Moreover, since it stores the two BLAST output in a python-serialized-objects database, results can be filtered according to several parameters in real-time fashion, without re-running the process and avoiding additional programming efforts. Results can be interrogated by the user using logical operations, for example to retrieve cases where two queries match same targets, or when sequences from the two datasets are reciprocal best hits, or when a query matches a target in multiple regions. The Pariga web server is designed to be a helpful tool for managing the results of sequence similarity searches. The design and implementation of the server renders all operations very fast and easy to use.",2013-05-07 +25979475,Likelihood-based complex trait association testing for arbitrary depth sequencing data.,"

Unlabelled

In next generation sequencing (NGS)-based genetic studies, researchers typically perform genotype calling first and then apply standard genotype-based methods for association testing. However, such a two-step approach ignores genotype calling uncertainty in the association testing step and may incur power loss and/or inflated type-I error. In the recent literature, a few robust and efficient likelihood based methods including both likelihood ratio test (LRT) and score test have been proposed to carry out association testing without intermediate genotype calling. These methods take genotype calling uncertainty into account by directly incorporating genotype likelihood function (GLF) of NGS data into association analysis. However, existing LRT methods are computationally demanding or do not allow covariate adjustment; while existing score tests are not applicable to markers with low minor allele frequency (MAF). We provide an LRT allowing flexible covariate adjustment, develop a statistically more powerful score test and propose a combination strategy (UNC combo) to leverage the advantages of both tests. We have carried out extensive simulations to evaluate the performance of our proposed LRT and score test. Simulations and real data analysis demonstrate the advantages of our proposed combination strategy: it offers a satisfactory trade-off in terms of computational efficiency, applicability (accommodating both common variants and variants with low MAF) and statistical power, particularly for the analysis of quantitative trait where the power gain can be up to ∼60% when the causal variant is of low frequency (MAF < 0.01).

Availability and implementation

UNC combo and the associated R files, including documentation, examples, are available at http://www.unc.edu/∼yunmli/UNCcombo/

Contact

yunli@med.unc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-14 +27307640,Novel applications of multitask learning and multiple output regression to multiple genetic trait prediction.,"

Unlabelled

Given a set of biallelic molecular markers, such as SNPs, with genotype values encoded numerically on a collection of plant, animal or human samples, the goal of genetic trait prediction is to predict the quantitative trait values by simultaneously modeling all marker effects. Genetic trait prediction is usually represented as linear regression models. In many cases, for the same set of samples and markers, multiple traits are observed. Some of these traits might be correlated with each other. Therefore, modeling all the multiple traits together may improve the prediction accuracy. In this work, we view the multitrait prediction problem from a machine learning angle: as either a multitask learning problem or a multiple output regression problem, depending on whether different traits share the same genotype matrix or not. We then adapted multitask learning algorithms and multiple output regression algorithms to solve the multitrait prediction problem. We proposed a few strategies to improve the least square error of the prediction from these algorithms. Our experiments show that modeling multiple traits together could improve the prediction accuracy for correlated traits.

Availability and implementation

The programs we used are either public or directly from the referred authors, such as MALSAR (http://www.public.asu.edu/~jye02/Software/MALSAR/) package. The Avocado data set has not been published yet and is available upon request.

Contact

dhe@us.ibm.com.",2016-06-01 +25514926,"PhosphoSitePlus, 2014: mutations, PTMs and recalibrations.","PhosphoSitePlus(®) (PSP, http://www.phosphosite.org/), a knowledgebase dedicated to mammalian post-translational modifications (PTMs), contains over 330,000 non-redundant PTMs, including phospho, acetyl, ubiquityl and methyl groups. Over 95% of the sites are from mass spectrometry (MS) experiments. In order to improve data reliability, early MS data have been reanalyzed, applying a common standard of analysis across over 1,000,000 spectra. Site assignments with P > 0.05 were filtered out. Two new downloads are available from PSP. The 'Regulatory sites' dataset includes curated information about modification sites that regulate downstream cellular processes, molecular functions and protein-protein interactions. The 'PTMVar' dataset, an intersect of missense mutations and PTMs from PSP, identifies over 25,000 PTMVars (PTMs Impacted by Variants) that can rewire signaling pathways. The PTMVar data include missense mutations from UniPROTKB, TCGA and other sources that cause over 2000 diseases or syndromes (MIM) and polymorphisms, or are associated with hundreds of cancers. PTMVars include 18 548 phosphorlyation sites, 3412 ubiquitylation sites, 2316 acetylation sites, 685 methylation sites and 245 succinylation sites.",2014-12-16 +22012987,The Reactome BioMart.,"Reactome is an open source, expert-authored, manually curated and peer-reviewed database of reactions, pathways and biological processes. We provide an intuitive web-based user interface to pathway knowledge and a suite of data analysis tools. The Reactome BioMart provides biologists and bioinformaticians with a single web interface for performing simple or elaborate queries of the Reactome database, aggregating data from different sources and providing an opportunity to integrate experimental and computational results with information relating to biological pathways. Database URL: http://www.reactome.org.",2011-10-19 +25596188,Emerging themes for sensitivity training modules of African healthcare workers attending to men who have sex with men: a systematic review.,"Sensitivity training of front-line African health care workers (HCWs) attending to men who have sex with men (MSM) is actively promoted through national HIV prevention programming in Kenya. Over 970 Kenyan-based HCWs have completed an eight-modular online training free of charge (http://www.marps-africa.org) since its creation in 2011. Before updating these modules, we performed a systematic review of published literature of MSM studies conducted in sub-Saharan Africa (sSA) in the period 2011-2014, to investigate if recent studies provided: important new knowledge currently not addressed in existing online modules; contested information of existing module topics; or added depth to topics covered already. We used learning objectives of the eight existing modules to categorise data from the literature. If data could not be categorised, new modules were suggested. Our review identified 142 MSM studies with data from sSA, including 34 studies requiring module updates, one study contesting current content, and 107 studies reinforcing existing module content. ART adherence and community engagement were identified as new modules. Recent MSM studies conducted in sSA provided new knowledge, contested existing information, and identified new areas of MSM service needs currently unaddressed in the online training.",2015-01-16 +26482796,Genome-Wide Scan for Adaptive Divergence and Association with Population-Specific Covariates.,"In population genomics studies, accounting for the neutral covariance structure across population allele frequencies is critical to improve the robustness of genome-wide scan approaches. Elaborating on the BayEnv model, this study investigates several modeling extensions (i) to improve the estimation accuracy of the population covariance matrix and all the related measures, (ii) to identify significantly overly differentiated SNPs based on a calibration procedure of the XtX statistics, and (iii) to consider alternative covariate models for analyses of association with population-specific covariables. In particular, the auxiliary variable model allows one to deal with multiple testing issues and, providing the relative marker positions are available, to capture some linkage disequilibrium information. A comprehensive simulation study was carried out to evaluate the performances of these different models. Also, when compared in terms of power, robustness, and computational efficiency to five other state-of-the-art genome-scan methods (BayEnv2, BayScEnv, BayScan, flk, and lfmm), the proposed approaches proved highly effective. For illustration purposes, genotyping data on 18 French cattle breeds were analyzed, leading to the identification of 13 strong signatures of selection. Among these, four (surrounding the KITLG, KIT, EDN3, and ALB genes) contained SNPs strongly associated with the piebald coloration pattern while a fifth (surrounding PLAG1) could be associated to morphological differences across the populations. Finally, analysis of Pool-Seq data from 12 populations of Littorina saxatilis living in two different ecotypes illustrates how the proposed framework might help in addressing relevant ecological issues in nonmodel species. Overall, the proposed methods define a robust Bayesian framework to characterize adaptive genetic differentiation across populations. The BayPass program implementing the different models is available at http://www1.montpellier.inra.fr/CBGP/software/baypass/.",2015-10-19 +27532062,omniClassifier: a Desktop Grid Computing System for Big Data Prediction Modeling.,"Robust prediction models are important for numerous science, engineering, and biomedical applications. However, best-practice procedures for optimizing prediction models can be computationally complex, especially when choosing models from among hundreds or thousands of parameter choices. Computational complexity has further increased with the growth of data in these fields, concurrent with the era of ""Big Data"". Grid computing is a potential solution to the computational challenges of Big Data. Desktop grid computing, which uses idle CPU cycles of commodity desktop machines, coupled with commercial cloud computing resources can enable research labs to gain easier and more cost effective access to vast computing resources. We have developed omniClassifier, a multi-purpose prediction modeling application that provides researchers with a tool for conducting machine learning research within the guidelines of recommended best-practices. omniClassifier is implemented as a desktop grid computing system using the Berkeley Open Infrastructure for Network Computing (BOINC) middleware. In addition to describing implementation details, we use various gene expression datasets to demonstrate the potential scalability of omniClassifier for efficient and robust Big Data prediction modeling. A prototype of omniClassifier can be accessed at http://omniclassifier.bme.gatech.edu/.",2014-09-01 +27384611,Rationale and design of the PREFERS (Preserved and Reduced Ejection Fraction Epidemiological Regional Study) Stockholm heart failure study: an epidemiological regional study in Stockholm county of 2.1 million inhabitants.,"

Aims

Heart failure (HF) with preserved (HFpEF) or reduced (HFrEF) ejection fraction is associated with poor prognosis and quality of life. While the incidence of HFrEF is declining and HF treatment is effective, HFpEF is increasing, with no established therapy. PREFERS Stockholm is an epidemiological study with the aim of improving clinical care and research in HF and to find new targets for drug treatment in HFpEF (https://internwebben.ki.se/sites/default/files/20150605_4d_research_appendix_final.pdf).

Methods

Patients with new-onset HF (n = 2000) will be characterized at baseline and after 1-year follow-up by standardized protocols for clinical evaluation, echocardiography, and ECG. In one subset undergoing elective coronary bypass surgery (n = 100) and classified according to LV function, myocardial biopsies will be collected during surgery, and cardiac magnetic resonance (CMR) imaging will be performed at baseline and after 1 year. Blood and tissue samples will be stored in a biobank. We will characterize and compare new-onset HFpEF and HFrEF patients regarding clinical findings and cardiac imaging, genomics, proteomics, and transcriptomics from blood and cardiac biopsies, and by established biomarkers of fibrosis, inflammation, haemodynamics, haemostasis, and thrombosis. The data will be explored by state-of-the-art bioinformatics methods to investigate gene expression patterns, sequence variation, DNA methylation, and post-translational modifications, and using systems biology approaches including pathway and network analysis.

Conclusions

In this epidemiological HF study with biopsy studies in a subset of patients, we aim to identify new biomarkers of disease progression and to find pathophysiological mechanisms to support explorations of new treatment regimens for HFpEF.",2016-07-07 +23175606,WholeCellKB: model organism databases for comprehensive whole-cell models.,"Whole-cell models promise to greatly facilitate the analysis of complex biological behaviors. Whole-cell model development requires comprehensive model organism databases. WholeCellKB (http://wholecellkb.stanford.edu) is an open-source web-based software program for constructing model organism databases. WholeCellKB provides an extensive and fully customizable data model that fully describes individual species including the structure and function of each gene, protein, reaction and pathway. We used WholeCellKB to create WholeCellKB-MG, a comprehensive database of the Gram-positive bacterium Mycoplasma genitalium using over 900 sources. WholeCellKB-MG is extensively cross-referenced to existing resources including BioCyc, KEGG and UniProt. WholeCellKB-MG is freely accessible through a web-based user interface as well as through a RESTful web service.",2012-11-21 +22874333,Teaching medicine with a terminology/ontology portal.,"

Unlabelled

The Health Terminology/Ontology Portal (HeTOP) was developed to provide easy access to health terminologies and ontologie. The repository is not only dedicated to professionals but is also a valuable teaching tool. Currently, it provides access to thirty two health terminologies and ontologies available mainly in French or in English, but also in German, Italian, Chinese, etc. HeTOP can be used by both humans and computers via Web services. To integrate new resources into HeTOP, three steps are necessary: (1) designing a meta-model into which each terminology (or ontology) can be integrated, (2) developing a process to include terminologies into HeTOP, (3) building and integrating existing and new inter & intra-terminology semantic harmonization into HeTOP. Currently, 600 unique machines use the MeSH version of HeTOP every day and restricted terminologies/ontologies are used for teaching purposes in several medical schools in France. The multilingual version of HeTOP is available (URL: http://hetop.eu/) and provides free access to ICD10 and FMA in ten languages.

Conclusion

HeTOP is a rich tool, useful for a wide range of applications and users, especially in education and resource indexing but also in information retrieval or performing audits in terminology management.",2012-01-01 +25584184,AISO: Annotation of Image Segments with Ontologies.,"

Background

Large quantities of digital images are now generated for biological collections, including those developed in projects premised on the high-throughput screening of genome-phenome experiments. These images often carry annotations on taxonomy and observable features, such as anatomical structures and phenotype variations often recorded in response to the environmental factors under which the organisms were sampled. At present, most of these annotations are described in free text, may involve limited use of non-standard vocabularies, and rarely specify precise coordinates of features on the image plane such that a computer vision algorithm could identify, extract and annotate them. Therefore, researchers and curators need a tool that can identify and demarcate features in an image plane and allow their annotation with semantically contextual ontology terms. Such a tool would generate data useful for inter and intra-specific comparison and encourage the integration of curation standards. In the future, quality annotated image segments may provide training data sets for developing machine learning applications for automated image annotation.

Results

We developed a novel image segmentation and annotation software application, ""Annotation of Image Segments with Ontologies"" (AISO). The tool enables researchers and curators to delineate portions of an image into multiple highlighted segments and annotate them with an ontology-based controlled vocabulary. AISO is a freely available Java-based desktop application and runs on multiple platforms. It can be downloaded at http://www.plantontology.org/software/AISO.

Conclusions

AISO enables curators and researchers to annotate digital images with ontology terms in a manner which ensures the future computational value of the annotated images. We foresee uses for such data-encoded image annotations in biological data mining, machine learning, predictive annotation, semantic inference, and comparative analyses.",2014-12-17 +25086505,Epiviz: interactive visual analytics for functional genomics data.,"Visualization is an integral aspect of genomics data analysis. Algorithmic-statistical analysis and interactive visualization are most effective when used iteratively. Epiviz (http://epiviz.cbcb.umd.edu/), a web-based genome browser, and the Epivizr Bioconductor package allow interactive, extensible and reproducible visualization within a state-of-the-art data-analysis platform.",2014-08-03 +27506226,DTMiner: identification of potential disease targets through biomedical literature mining.,"

Motivation

Biomedical researchers often search through massive catalogues of literature to look for potential relationships between genes and diseases. Given the rapid growth of biomedical literature, automatic relation extraction, a crucial technology in biomedical literature mining, has shown great potential to support research of gene-related diseases. Existing work in this field has produced datasets that are limited both in scale and accuracy.

Results

In this study, we propose a reliable and efficient framework that takes large biomedical literature repositories as inputs, identifies credible relationships between diseases and genes, and presents possible genes related to a given disease and possible diseases related to a given gene. The framework incorporates name entity recognition (NER), which identifies occurrences of genes and diseases in texts, association detection whereby we extract and evaluate features from gene-disease pairs, and ranking algorithms that estimate how closely the pairs are related. The F1-score of the NER phase is 0.87, which is higher than existing studies. The association detection phase takes drastically less time than previous work while maintaining a comparable F1-score of 0.86. The end-to-end result achieves a 0.259 F1-score for the top 50 genes associated with a disease, which performs better than previous work. In addition, we released a web service for public use of the dataset.

Availability and implementation

The implementation of the proposed algorithms is publicly available at http://gdr-web.rwebox.com/public_html/index.php?page=download.php The web service is available at http://gdr-web.rwebox.com/public_html/index.php CONTACT: jenny.wei@astrazeneca.com or kzhu@cs.sjtu.edu.cn Supplementary information: Supplementary data are available at Bioinformatics online.",2016-08-09 +27547217,A pedagogical walkthrough of computational modeling and simulation of Wnt signaling pathway using static causal models in MATLAB.,"Simulation study in systems biology involving computational experiments dealing with Wnt signaling pathways abound in literature but often lack a pedagogical perspective that might ease the understanding of beginner students and researchers in transition, who intend to work on the modeling of the pathway. This paucity might happen due to restrictive business policies which enforce an unwanted embargo on the sharing of important scientific knowledge. A tutorial introduction to computational modeling of Wnt signaling pathway in a human colorectal cancer dataset using static Bayesian network models is provided. The walkthrough might aid biologists/informaticians in understanding the design of computational experiments that is interleaved with exposition of the Matlab code and causal models from Bayesian network toolbox. The manuscript elucidates the coding contents of the advance article by Sinha (Integr. Biol. 6:1034-1048, 2014) and takes the reader in a step-by-step process of how (a) the collection and the transformation of the available biological information from literature is done, (b) the integration of the heterogeneous data and prior biological knowledge in the network is achieved, (c) the simulation study is designed, (d) the hypothesis regarding a biological phenomena is transformed into computational framework, and (e) results and inferences drawn using d-connectivity/separability are reported. The manuscript finally ends with a programming assignment to help the readers get hands-on experience of a perturbation project. Description of Matlab files is made available under GNU GPL v3 license at the Google code project on https://code.google.com/p/static-bn-for-wnt-signaling-pathway and https: //sites.google.com/site/shriprakashsinha/shriprakashsinha/projects/static-bn-for-wnt-signaling-pathway. Latest updates can be found in the latter website.",2016-08-08 +26272982,OVA: integrating molecular and physical phenotype data from multiple biomedical domain ontologies with variant filtering for enhanced variant prioritization.,"

Motivation

Exome sequencing has become a de facto standard method for Mendelian disease gene discovery in recent years, yet identifying disease-causing mutations among thousands of candidate variants remains a non-trivial task.

Results

Here we describe a new variant prioritization tool, OVA (ontology variant analysis), in which user-provided phenotypic information is exploited to infer deeper biological context. OVA combines a knowledge-based approach with a variant-filtering framework. It reduces the number of candidate variants by considering genotype and predicted effect on protein sequence, and scores the remainder on biological relevance to the query phenotype.We take advantage of several ontologies in order to bridge knowledge across multiple biomedical domains and facilitate computational analysis of annotations pertaining to genes, diseases, phenotypes, tissues and pathways. In this way, OVA combines information regarding molecular and physical phenotypes and integrates both human and model organism data to effectively prioritize variants. By assessing performance on both known and novel disease mutations, we show that OVA performs biologically meaningful candidate variant prioritization and can be more accurate than another recently published candidate variant prioritization tool.

Availability and implementation

OVA is freely accessible at http://dna2.leeds.ac.uk:8080/OVA/index.jsp.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

umaan@leeds.ac.uk.",2015-08-12 +27166369,GPCR-ModSim: A comprehensive web based solution for modeling G-protein coupled receptors.,"GPCR-ModSim (http://open.gpcr-modsim.org) is a centralized and easy to use service dedicated to the structural modeling of G-protein Coupled Receptors (GPCRs). 3D molecular models can be generated from amino acid sequence by homology-modeling techniques, considering different receptor conformations. GPCR-ModSim includes a membrane insertion and molecular dynamics (MD) equilibration protocol, which can be used to refine the generated model or any GPCR structure uploaded to the server, including if desired non-protein elements such as orthosteric or allosteric ligands, structural waters or ions. We herein revise the main characteristics of GPCR-ModSim and present new functionalities. The templates used for homology modeling have been updated considering the latest structural data, with separate profile structural alignments built for inactive, partially-active and active groups of templates. We have also added the possibility to perform multiple-template homology modeling in a unique and flexible way. Finally, our new MD protocol considers a series of distance restraints derived from a recently identified conserved network of helical contacts, allowing for a smoother refinement of the generated models which is particularly advised when there is low homology to the available templates. GPCR- ModSim has been tested on the GPCR Dock 2013 competition with satisfactory results.",2016-05-10 +22318478,A weighted power framework for integrating multisource information: gene function prediction in yeast.,"Predicting the functions of unannotated genes is one of the major challenges of biological investigation. In this study, we propose a weighted power scoring framework, called weighted power biological score (WPBS), for combining different biological data sources and predicting the function of some of the unclassified yeast Saccharomyces cerevisiae genes. The relative power and weight coefficients of different data sources, in the proposed score, are estimated systematically by utilizing functional annotations [yeast Gene Ontology (GO)-Slim: Process] of classified genes, available from Saccharomyces Genome Database. Genes are then clustered by applying k-medoids algorithm on WPBS, and functional categories of 334 unclassified genes are predicted using a P-value cutoff 1 ×10(-5). The WPBS is available online at http://www.isical.ac.in/~ shubhra/WPBS/WPBS.html, where one can download WPBS, related files, and a MATLAB code to predict functions of unclassified genes.",2012-02-03 +23606422,Patterns and mutational signatures of tandem base substitutions causing human inherited disease.,"Tandem base substitutions (TBSs) are multiple mutations that comprise two or more contiguous nucleotide substitutions without any net gain or loss of bases. They have recently become recognized as a distinct category of human genomic variant. However, their role in causing human inherited disease so far has not been studied methodically. Here, using data from the Human Gene Mutation Database (http://www.hgmd.org), we identified 477 events to be TBSs (doublets, 448; triplets, 16; and quadruplets to octuplets, 13). A comprehensive sequence pattern and context analysis implied the likely fundamental importance of translesion synthesis (TLS) DNA polymerases in generating these diverse TBSs but revealed that TLS polymerases may operate differently in generating TBSs of ≤ 3 bases (bypass of endogenous DNA lesions) than those of ≥ 4 bases (serial replication slippage). Moreover, GC was found to be the most frequently affected dinucleotide with GC/GC>AA/TT being the most frequent double TBS. Comparison with cancer genome mutational spectra allowed us to conclude that human germline TBSs arise predominantly through the action of endogenous mechanisms of mutagenesis rather than through exposure to exogenous mutagens. Finally, the rates of double and triple TBSs were estimated to be 0.2-1.2 × 10(-10) and 0.8-4.8 × 10(-12) per base per generation, respectively.",2013-05-20 +25034693,SpliceNet: recovering splicing isoform-specific differential gene networks from RNA-Seq data of normal and diseased samples.,"Conventionally, overall gene expressions from microarrays are used to infer gene networks, but it is challenging to account splicing isoforms. High-throughput RNA Sequencing has made splice variant profiling practical. However, its true merit in quantifying splicing isoforms and isoform-specific exon expressions is not well explored in inferring gene networks. This study demonstrates SpliceNet, a method to infer isoform-specific co-expression networks from exon-level RNA-Seq data, using large dimensional trace. It goes beyond differentially expressed genes and infers splicing isoform network changes between normal and diseased samples. It eases the sample size bottleneck; evaluations on simulated data and lung cancer-specific ERBB2 and MAPK signaling pathways, with varying number of samples, evince the merit in handling high exon to sample size ratio datasets. Inferred network rewiring of well established Bcl-x and EGFR centered networks from lung adenocarcinoma expression data is in good agreement with literature. Gene level evaluations demonstrate a substantial performance of SpliceNet over canonical correlation analysis, a method that is currently applied to exon level RNA-Seq data. SpliceNet can also be applied to exon array data. SpliceNet is distributed as an R package available at http://www.jjwanglab.org/SpliceNet.",2014-07-17 +23681723,The comprehensive peptaibiotics database.,"Peptaibiotics are nonribosomally biosynthesized peptides, which - according to definition - contain the marker amino acid α-aminoisobutyric acid (Aib) and possess antibiotic properties. Being known since 1958, a constantly increasing number of peptaibiotics have been described and investigated with a particular emphasis on hypocrealean fungi. Starting from the existing online 'Peptaibol Database', first published in 1997, an exhaustive literature survey of all known peptaibiotics was carried out and resulted in a list of 1043 peptaibiotics. The gathered information was compiled and used to create the new 'The Comprehensive Peptaibiotics Database', which is presented here. The database was devised as a software tool based on Microsoft (MS) Access. It is freely available from the internet at http://peptaibiotics-database.boku.ac.at and can easily be installed and operated on any computer offering a Windows XP/7 environment. It provides useful information on characteristic properties of the peptaibiotics included such as peptide category, group name of the microheterogeneous mixture to which the peptide belongs, amino acid sequence, sequence length, producing fungus, peptide subfamily, molecular formula, and monoisotopic mass. All these characteristics can be used and combined for automated search within the database, which makes The Comprehensive Peptaibiotics Database a versatile tool for the retrieval of valuable information about peptaibiotics. Sequence data have been considered as to December 14, 2012.",2013-05-01 +22034521,Automatic rebuilding and optimization of crystallographic structures in the Protein Data Bank.,"

Motivation

Macromolecular crystal structures in the Protein Data Bank (PDB) are a key source of structural insight into biological processes. These structures, some >30 years old, were constructed with methods of their era. With PDB_REDO, we aim to automatically optimize these structures to better fit their corresponding experimental data, passing the benefits of new methods in crystallography on to a wide base of non-crystallographer structure users.

Results

We developed new algorithms to allow automatic rebuilding and remodeling of main chain peptide bonds and side chains in crystallographic electron density maps, and incorporated these and further enhancements in the PDB_REDO procedure. Applying the updated PDB_REDO to the oldest, but also to some of the newest models in the PDB, corrects existing modeling errors and brings these models to a higher quality, as judged by standard validation methods.

Availability and implementation

The PDB_REDO database and links to all software are available at http://www.cmbi.ru.nl/pdb_redo.

Contact

r.joosten@nki.nl; a.perrakis@nki.nl

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-27 +24700103,fastSTRUCTURE: variational inference of population structure in large SNP data sets.,"Tools for estimating population structure from genetic data are now used in a wide variety of applications in population genetics. However, inferring population structure in large modern data sets imposes severe computational challenges. Here, we develop efficient algorithms for approximate inference of the model underlying the STRUCTURE program using a variational Bayesian framework. Variational methods pose the problem of computing relevant posterior distributions as an optimization problem, allowing us to build on recent advances in optimization theory to develop fast inference tools. In addition, we propose useful heuristic scores to identify the number of populations represented in a data set and a new hierarchical prior to detect weak population structure in the data. We test the variational algorithms on simulated data and illustrate using genotype data from the CEPH-Human Genome Diversity Panel. The variational algorithms are almost two orders of magnitude faster than STRUCTURE and achieve accuracies comparable to those of ADMIXTURE. Furthermore, our results show that the heuristic scores for choosing model complexity provide a reasonable range of values for the number of populations represented in the data, with minimal bias toward detecting structure when it is very weak. Our algorithm, fastSTRUCTURE, is freely available online at http://pritchardlab.stanford.edu/structure.html.",2014-04-02 +27378291,SwiSpot: modeling riboswitches by spotting out switching sequences.,"

Motivation

Riboswitches are cis-regulatory elements in mRNA, mostly found in Bacteria, which exhibit two main secondary structure conformations. Although one of them prevents the gene from being expressed, the other conformation allows its expression, and this switching process is typically driven by the presence of a specific ligand. Although there are a handful of known riboswitches, our knowledge in this field has been greatly limited due to our inability to identify their alternate structures from their sequences. Indeed, current methods are not able to predict the presence of the two functionally distinct conformations just from the knowledge of the plain RNA nucleotide sequence. Whether this would be possible, for which cases, and what prediction accuracy can be achieved, are currently open questions.

Results

Here we show that the two alternate secondary structures of riboswitches can be accurately predicted once the 'switching sequence' of the riboswitch has been properly identified. The proposed SwiSpot approach is capable of identifying the switching sequence inside a putative, complete riboswitch sequence, on the basis of pairing behaviors, which are evaluated on proper sets of configurations. Moreover, it is able to model the switching behavior of riboswitches whose generated ensemble covers both alternate configurations. Beyond structural predictions, the approach can also be paired to homology-based riboswitch searches.

Availability and implementation

SwiSpot software, along with the reference dataset files, is available at: http://www.iet.unipi.it/a.bechini/swispot/Supplementary information: Supplementary data are available at Bioinformatics online.

Contact

a.bechini@ing.unipi.it.",2016-07-04 +23132505,D-peaks: a visual tool to display ChIP-seq peaks along the genome.,"ChIP-sequencing is a method of choice to localize the positions of protein binding sites on DNA on a whole genomic scale. The deciphering of the sequencing data produced by this novel technique is challenging and it is achieved by their rigorous interpretation using dedicated tools and adapted visualization programs. Here, we present a bioinformatics tool (D-peaks) that adds several possibilities (including, user-friendliness, high-quality, relative position with respect to the genomic features) to the well-known visualization browsers or databases already existing. D-peaks is directly available through its web interface http://rsat.ulb.ac.be/dpeaks/ as well as a command line tool.",2012-09-01 +22135294,NONCODE v3.0: integrative annotation of long noncoding RNAs.,"Facilitated by the rapid progress of high-throughput sequencing technology, a large number of long noncoding RNAs (lncRNAs) have been identified in mammalian transcriptomes over the past few years. LncRNAs have been shown to play key roles in various biological processes such as imprinting control, circuitry controlling pluripotency and differentiation, immune responses and chromosome dynamics. Notably, a growing number of lncRNAs have been implicated in disease etiology. With the increasing number of published lncRNA studies, the experimental data on lncRNAs (e.g. expression profiles, molecular features and biological functions) have accumulated rapidly. In order to enable a systematic compilation and integration of this information, we have updated the NONCODE database (http://www.noncode.org) to version 3.0 to include the first integrated collection of expression and functional lncRNA data obtained from re-annotated microarray studies in a single database. NONCODE has a user-friendly interface with a variety of search or browse options, a local Genome Browser for visualization and a BLAST server for sequence-alignment search. In addition, NONCODE provides a platform for the ongoing collation of ncRNAs reported in the literature. All data in NONCODE are open to users, and can be downloaded through the website or obtained through the SOAP API and DAS services.",2011-12-01 +22563474,Regulatory Snapshots: integrative mining of regulatory modules from expression time series and regulatory networks.,"Explaining regulatory mechanisms is crucial to understand complex cellular responses leading to system perturbations. Some strategies reverse engineer regulatory interactions from experimental data, while others identify functional regulatory units (modules) under the assumption that biological systems yield a modular organization. Most modular studies focus on network structure and static properties, ignoring that gene regulation is largely driven by stimulus-response behavior. Expression time series are key to gain insight into dynamics, but have been insufficiently explored by current methods, which often (1) apply generic algorithms unsuited for expression analysis over time, due to inability to maintain the chronology of events or incorporate time dependency; (2) ignore local patterns, abundant in most interesting cases of transcriptional activity; (3) neglect physical binding or lack automatic association of regulators, focusing mainly on expression patterns; or (4) limit the discovery to a predefined number of modules. We propose Regulatory Snapshots, an integrative mining approach to identify regulatory modules over time by combining transcriptional control with response, while overcoming the above challenges. Temporal biclustering is first used to reveal transcriptional modules composed of genes showing coherent expression profiles over time. Personalized ranking is then applied to prioritize prominent regulators targeting the modules at each time point using a network of documented regulatory associations and the expression data. Custom graphics are finally depicted to expose the regulatory activity in a module at consecutive time points (snapshots). Regulatory Snapshots successfully unraveled modules underlying yeast response to heat shock and human epithelial-to-mesenchymal transition, based on regulations documented in the YEASTRACT and JASPAR databases, respectively, and available expression data. Regulatory players involved in functionally enriched processes related to these biological events were identified. Ranking scores further suggested ability to discern the primary role of a gene (target or regulator). Prototype is available at: http://kdbio.inesc-id.pt/software/regulatorysnapshots.",2012-05-01 +26301843,Predicting effects of noncoding variants with deep learning-based sequence model.,"Identifying functional effects of noncoding variants is a major challenge in human genetics. To predict the noncoding-variant effects de novo from sequence, we developed a deep learning-based algorithmic framework, DeepSEA (http://deepsea.princeton.edu/), that directly learns a regulatory sequence code from large-scale chromatin-profiling data, enabling prediction of chromatin effects of sequence alterations with single-nucleotide sensitivity. We further used this capability to improve prioritization of functional variants including expression quantitative trait loci (eQTLs) and disease-associated variants.",2015-08-24 +27235414,An atlas of gene expression and gene co-regulation in the human retina.,"The human retina is a specialized tissue involved in light stimulus transduction. Despite its unique biology, an accurate reference transcriptome is still missing. Here, we performed gene expression analysis (RNA-seq) of 50 retinal samples from non-visually impaired post-mortem donors. We identified novel transcripts with high confidence (Observed Transcriptome (ObsT)) and quantified the expression level of known transcripts (Reference Transcriptome (RefT)). The ObsT included 77 623 transcripts (23 960 genes) covering 137 Mb (35 Mb new transcribed genome). Most of the transcripts (92%) were multi-exonic: 81% with known isoforms, 16% with new isoforms and 3% belonging to new genes. The RefT included 13 792 genes across 94 521 known transcripts. Mitochondrial genes were among the most highly expressed, accounting for about 10% of the reads. Of all the protein-coding genes in Gencode, 65% are expressed in the retina. We exploited inter-individual variability in gene expression to infer a gene co-expression network and to identify genes specifically expressed in photoreceptor cells. We experimentally validated the photoreceptors localization of three genes in human retina that had not been previously reported. RNA-seq data and the gene co-expression network are available online (http://retina.tigem.it).",2016-05-27 +26623893,An updated list of the plants associated with plant-parasitic Aphelenchoides (Nematoda: Aphelenchoididae) and its implications for plant-parasitism within this genus.,"Few Aphelenchoides spp. are facultative plant-parasites (foliar and bulb nematodes); three of them are well known in agricultural systems, namely Aphelenchoides besseyi, A. fragariae and A. ritzemabosi. Ten other plant-parasitic species, A. arachidis, A. bicaudatus, A. blastophthorus, A. dalianensis, A. ensete, A. nechaleos, A. paranechaleos, A. saprophilus, A. sphaerocephalus and A. subtenuis, have been reported from a limited number of plant species. We compiled a new database of the associated plants for these thirteen species, a comprehensive list that includes 1104 reports from 126 botanical families. A. besseyi, A. fragariae and A. ritzemabosi represent 94% of the reports, circa 83% and 16% of the total reports correspond to flowering plants and ferns, respectively, with three records on conifers and two from other botanical groups also listed. Most plant-parasitic Aphelenchoides show a remarkably broad diversity of associated plants. Most species appear to have no specific plant hosts (i.e. are generalists). The broad host ranges of these species and absence of more intimate interactions with the associated plants highlights the primitive mode of parasitism in Aphelenchoides species, making them potentially interesting in the study of the evolution of plant parasitism. Even though the compiled list of associated plants is long, it probably only represents a fraction of the potential range. The complete compilation has been uploaded to http://nematodes.myspecies.info/.",2015-09-08 +25859761,Identification and Prioritization of Relationships between Environmental Stressors and Adverse Human Health Impacts.,"

Background

There are > 80,000 chemicals in commerce with few data available describing their impacts on human health. Biomonitoring surveys, such as the NHANES (National Health and Nutrition Examination Survey), offer one route to identifying possible relationships between environmental chemicals and health impacts, but sparse data and the complexity of traditional models make it difficult to leverage effectively.

Objective

We describe a workflow to efficiently and comprehensively evaluate and prioritize chemical-health impact relationships from the NHANES biomonitoring survey studies.

Methods

Using a frequent itemset mining (FIM) approach, we identified relationships between chemicals and health biomarkers and diseases.

Results

The FIM method identified 7,848 relationships between 219 chemicals and 93 health outcomes/biomarkers. Two case studies used to evaluate the FIM rankings demonstrate that the FIM approach is able to identify published relationships. Because the relationships are derived from the vast majority of the chemicals monitored by NHANES, the resulting list of associations is appropriate for evaluating results from targeted data mining or identifying novel candidate relationships for more detailed investigation.

Conclusions

Because of the computational efficiency of the FIM method, all chemicals and health effects can be considered in a single analysis. The resulting list provides a comprehensive summary of the chemical/health co-occurrences from NHANES that are higher than expected by chance. This information enables ranking and prioritization on chemicals or health effects of interest for evaluation of published results and design of future studies.

Citation

Bell SM, Edwards SW. 2015. Identification and prioritization of relationships between environmental stressors and adverse human health impacts. Environ Health Perspect 123:1193-1199; http://dx.doi.org/10.1289/ehp.1409138.",2015-04-10 +26139634,Functional classification of CATH superfamilies: a domain-based approach for protein function annotation.,"

Motivation

Computational approaches that can predict protein functions are essential to bridge the widening function annotation gap especially since <1.0% of all proteins in UniProtKB have been experimentally characterized. We present a domain-based method for protein function classification and prediction of functional sites that exploits functional sub-classification of CATH superfamilies. The superfamilies are sub-classified into functional families (FunFams) using a hierarchical clustering algorithm supervised by a new classification method, FunFHMMer.

Results

FunFHMMer generates more functionally coherent groupings of protein sequences than other domain-based protein classifications. This has been validated using known functional information. The conserved positions predicted by the FunFams are also found to be enriched in known functional residues. Moreover, the functional annotations provided by the FunFams are found to be more precise than other domain-based resources. FunFHMMer currently identifies 110,439 FunFams in 2735 superfamilies which can be used to functionally annotate>16 million domain sequences.

Availability and implementation

All FunFam annotation data are made available through the CATH webpages (http://www.cathdb.info). The FunFHMMer webserver (http://www.cathdb.info/search/by_funfhmmer) allows users to submit query sequences for assignment to a CATH FunFam.

Contact

sayoni.das.12@ucl.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-02 +22942020,Assignment of protein sequences to existing domain and family classification systems: Pfam and the PDB.,"

Motivation

Automating the assignment of existing domain and protein family classifications to new sets of sequences is an important task. Current methods often miss assignments because remote relationships fail to achieve statistical significance. Some assignments are not as long as the actual domain definitions because local alignment methods often cut alignments short. Long insertions in query sequences often erroneously result in two copies of the domain assigned to the query. Divergent repeat sequences in proteins are often missed.

Results

We have developed a multilevel procedure to produce nearly complete assignments of protein families of an existing classification system to a large set of sequences. We apply this to the task of assigning Pfam domains to sequences and structures in the Protein Data Bank (PDB). We found that HHsearch alignments frequently scored more remotely related Pfams in Pfam clans higher than closely related Pfams, thus, leading to erroneous assignment at the Pfam family level. A greedy algorithm allowing for partial overlaps was, thus, applied first to sequence/HMM alignments, then HMM-HMM alignments and then structure alignments, taking care to join partial alignments split by large insertions into single-domain assignments. Additional assignment of repeat Pfams with weaker E-values was allowed after stronger assignments of the repeat HMM. Our database of assignments, presented in a database called PDBfam, contains Pfams for 99.4% of chains >50 residues.

Availability

The Pfam assignment data in PDBfam are available at http://dunbrack2.fccc.edu/ProtCid/PDBfam, which can be searched by PDB codes and Pfam identifiers. They will be updated regularly.",2012-08-31 +23986771,Construction of a rice glycoside hydrolase phylogenomic database and identification of targets for biofuel research.,"Glycoside hydrolases (GH) catalyze the hydrolysis of glycosidic bonds in cell wall polymers and can have major effects on cell wall architecture. Taking advantage of the massive datasets available in public databases, we have constructed a rice phylogenomic database of GHs (http://ricephylogenomics.ucdavis.edu/cellwalls/gh/). This database integrates multiple data types including the structural features, orthologous relationships, mutant availability, and gene expression patterns for each GH family in a phylogenomic context. The rice genome encodes 437 GH genes classified into 34 families. Based on pairwise comparison with eight dicot and four monocot genomes, we identified 138 GH genes that are highly diverged between monocots and dicots, 57 of which have diverged further in rice as compared with four monocot genomes scanned in this study. Chromosomal localization and expression analysis suggest a role for both whole-genome and localized gene duplications in expansion and diversification of GH families in rice. We examined the meta-profiles of expression patterns of GH genes in twenty different anatomical tissues of rice. Transcripts of 51 genes exhibit tissue or developmental stage-preferential expression, whereas, seventeen other genes preferentially accumulate in actively growing tissues. When queried in RiceNet, a probabilistic functional gene network that facilitates functional gene predictions, nine out of seventeen genes form a regulatory network with the well-characterized genes involved in biosynthesis of cell wall polymers including cellulose synthase and cellulose synthase-like genes of rice. Two-thirds of the GH genes in rice are up regulated in response to biotic and abiotic stress treatments indicating a role in stress adaptation. Our analyses identify potential GH targets for cell wall modification.",2013-08-26 +24872914,Development and validation of web-based nomograms to predict postoperative invasive component in ductal carcinoma in situ at needle breast biopsy.,"

Objectives

Although sonography-guided core needle biopsy is a highly targeted method, there is a possibility of an invasive component after surgical excision of ductal carcinoma in situ (DCIS) of the breast. This study was performed to develop and validate nomograms to predict the postoperative invasive component in DCIS at core needle biopsy.

Methods

Two nomograms were developed using the data of previous meta-analysis and multivariate analysis. Nomograms were validated externally using the data of the authors' affiliation. The accuracy was validated by the expected-to-observed ratio and the Hosmer-Lemeshow goodness-of-fit test. Discrimination was validated by the area under the curve (AUC) of receiver operating characteristic (ROC) curve analysis.

Results

The nomogram using the meta-analysis study data was developed at http://dcis-m.surgery.kr.pe/, and the nomogram using the multivariate analysis study data was developed at http://dcis-k.surgery.kr.pe/. The Hosmer-Lemeshow goodness-of-fit test showed that the nomogram using multivariate analysis data (p = 0.131) was better calibrated than that using meta-analysis data (p < 0.001). ROC curve analysis showed statistically significant power of discrimination in both nomograms (AUC = 0.776, 0.751).

Conclusions

Both nomograms showed statistically significant discriminatory power, but the nomogram using the data of multivariate analysis was simpler and more reliable. These would be useful for the prediction of invasive cancer and the need for sentinel node biopsy in DCIS at core needle biopsy.",2014-04-30 +21370079,Omics data management and annotation.,"Technological Omics breakthroughs, including next generation sequencing, bring avalanches of data which need to undergo effective data management to ensure integrity, security, and maximal knowledge-gleaning. Data management system requirements include flexible input formats, diverse data entry mechanisms and views, user friendliness, attention to standards, hardware and software platform definition, as well as robustness. Relevant solutions elaborated by the scientific community include Laboratory Information Management Systems (LIMS) and standardization protocols facilitating data sharing and managing. In project planning, special consideration has to be made when choosing relevant Omics annotation sources, since many of them overlap and require sophisticated integration heuristics. The data modeling step defines and categorizes the data into objects (e.g., genes, articles, disorders) and creates an application flow. A data storage/warehouse mechanism must be selected, such as file-based systems and relational databases, the latter typically used for larger projects. Omics project life cycle considerations must include the definition and deployment of new versions, incorporating either full or partial updates. Finally, quality assurance (QA) procedures must validate data and feature integrity, as well as system performance expectations. We illustrate these data management principles with examples from the life cycle of the GeneCards Omics project (http://www.genecards.org), a comprehensive, widely used compendium of annotative information about human genes. For example, the GeneCards infrastructure has recently been changed from text files to a relational database, enabling better organization and views of the growing data. Omics data handling benefits from the wealth of Web-based information, the vast amount of public domain software, increasingly affordable hardware, and effective use of data management and annotation principles as outlined in this chapter.",2011-01-01 +25762456,Methods for transition toward computer assisted cognitive examination.,"

Introduction

We present a software framework which enables the extension of current methods for the assessment of cognitive fitness using recent technological advances.

Background

Screening for cognitive impairment is becoming more important as the world's population grows older. Current methods could be enhanced by use of computers. Introduction of new methods to clinics requires basic tools for collection and communication of collected data.

Objectives

To develop tools that, with minimal interference, offer new opportunities for the enhancement of the current interview based cognitive examinations.

Methods

We suggest methods and discuss process by which established cognitive tests can be adapted for data collection through digitization by pen enabled tablets. We discuss a number of methods for evaluation of collected data, which promise to increase the resolution and objectivity of the common scoring strategy based on visual inspection. By involving computers in the roles of both instructing and scoring, we aim to increase the precision and reproducibility of cognitive examination.

Results

The tools provided in Python framework CogExTools available at http://bsp. brain.riken.jp/cogextools/ enable the design, application and evaluation of screening tests for assessment of cognitive impairment. The toolbox is a research platform; it represents a foundation for further collaborative development by the wider research community and enthusiasts. It is free to download and use, and open-source.

Conclusion

We introduce a set of open-source tools that facilitate the design and development of new cognitive tests for modern technology. We provide these tools in order to enable the adaptation of technology for cognitive examination in clinical settings. The tools provide the first step in a possible transition toward standardized mental state examination using computers.",2015-03-12 +22139911,neXtProt: a knowledge platform for human proteins.,"neXtProt (http://www.nextprot.org/) is a new human protein-centric knowledge platform. Developed at the Swiss Institute of Bioinformatics (SIB), it aims to help researchers answer questions relevant to human proteins. To achieve this goal, neXtProt is built on a corpus containing both curated knowledge originating from the UniProtKB/Swiss-Prot knowledgebase and carefully selected and filtered high-throughput data pertinent to human proteins. This article presents an overview of the database and the data integration process. We also lay out the key future directions of neXtProt that we consider the necessary steps to make neXtProt the one-stop-shop for all research projects focusing on human proteins.",2011-12-01 +23935468,Co-expression profiling of autism genes in the mouse brain.,"Autism spectrum disorder (ASD) is one of the most prevalent and highly heritable neurodevelopmental disorders in humans. There is significant evidence that the onset and severity of ASD is governed in part by complex genetic mechanisms affecting the normal development of the brain. To date, a number of genes have been associated with ASD. However, the temporal and spatial co-expression of these genes in the brain remain unclear. To address this issue, we examined the co-expression network of 26 autism genes from AutDB (http://mindspec.org/autdb.html), in the framework of 3,041 genes whose expression energies have the highest correlation between the coronal and sagittal images from the Allen Mouse Brain Atlas database (http://mouse.brain-map.org). These data were derived from in situ hybridization experiments conducted on male, 56-day old C57BL/6J mice co-registered to the Allen Reference Atlas, and were used to generate a normalized co-expression matrix indicating the cosine similarity between expression vectors of genes in this database. The network formed by the autism-associated genes showed a higher degree of co-expression connectivity than seen for the other genes in this dataset (Kolmogorov-Smirnov P = 5×10⁻²⁸). Using Monte Carlo simulations, we identified two cliques of co-expressed genes that were significantly enriched with autism genes (A Bonferroni corrected P<0.05). Genes in both these cliques were significantly over-expressed in the cerebellar cortex (P = 1×10⁻⁵) suggesting possible implication of this brain region in autism. In conclusion, our study provides a detailed profiling of co-expression patterns of autism genes in the mouse brain, and suggests specific brain regions and new candidate genes that could be involved in autism etiology.",2013-07-25 +26981370,Microarray analysis of microRNA expression in bone marrow-derived progenitor cells from mice with type 2 diabetes.,"Bone-marrow derived vascular precursors are an important endogenous repair reservoir for vascular repair and neovascularization [1]. Therapies of stem/progenitor cells targeting on angiogenesis are considered hopeful solutions for tissue repair and regeneration. However, the dysfunction of patient-derived progenitor cells has been implicated in diabetes [2], which limited the efficacy of autologous cell therapies in the clinic [3,4]. MicroRNAs are important gene regulators whose functions remain largely unknown. In this project we reported the different microRNA expression profiles in bone marrow-derived progenitor cells from type 2 diabetic mice and their normal controls using microRNA array analysis. All microarray data are available at the Gene Expression Omnibus (GEO) at NCBI (http://www.ncbi.nlm.nih.gov/geo), under accession number GSE72616.",2015-11-23 +21689434,PhyloMap: an algorithm for visualizing relationships of large sequence data sets and its application to the influenza A virus genome.,"

Background

Results of phylogenetic analysis are often visualized as phylogenetic trees. Such a tree can typically only include up to a few hundred sequences. When more than a few thousand sequences are to be included, analyzing the phylogenetic relationships among them becomes a challenging task. The recent frequent outbreaks of influenza A viruses have resulted in the rapid accumulation of corresponding genome sequences. Currently, there are more than 7500 influenza A virus genomes in the database. There are no efficient ways of representing this huge data set as a whole, thus preventing a further understanding of the diversity of the influenza A virus genome.

Results

Here we present a new algorithm, ""PhyloMap"", which combines ordination, vector quantization, and phylogenetic tree construction to give an elegant representation of a large sequence data set. The use of PhyloMap on influenza A virus genome sequences reveals the phylogenetic relationships of the internal genes that cannot be seen when only a subset of sequences are analyzed.

Conclusions

The application of PhyloMap to influenza A virus genome data shows that it is a robust algorithm for analyzing large sequence data sets. It utilizes the entire data set, minimizes bias, and provides intuitive visualization. PhyloMap is implemented in JAVA, and the source code is freely available at http://www.biochem.uni-luebeck.de/public/software/phylomap.html.",2011-06-20 +22057158,AURA: Atlas of UTR Regulatory Activity.,"

Summary

The Atlas of UTR Regulatory Activity (AURA) is a manually curated and comprehensive catalog of human mRNA untranslated regions (UTRs) and UTR regulatory annotations. Through its intuitive web interface, it provides full access to a wealth of information on UTRs that integrates phylogenetic conservation, RNA sequence and structure data, single nucleotide variation, gene expression and gene functional descriptions from literature and specialized databases.

Availability

http://aura.science.unitn.it

Contact

aura@science.unitn.it; dassi@science.unitn

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-04 +22717648,wANNOVAR: annotating genetic variants for personal genomes via the web.,"

Background

High-throughput DNA sequencing platforms have become widely available. As a result, personal genomes are increasingly being sequenced in research and clinical settings. However, the resulting massive amounts of variants data pose significant challenges to the average biologists and clinicians without bioinformatics skills.

Methods and results

We developed a web server called wANNOVAR to address the critical needs for functional annotation of genetic variants from personal genomes. The server provides simple and intuitive interface to help users determine the functional significance of variants. These include annotating single nucleotide variants and insertions/deletions for their effects on genes, reporting their conservation levels (such as PhyloP and GERP++ scores), calculating their predicted functional importance scores (such as SIFT and PolyPhen scores), retrieving allele frequencies in public databases (such as the 1000 Genomes Project and NHLBI-ESP 5400 exomes), and implementing a 'variants reduction' protocol to identify a subset of potentially deleterious variants/genes. We illustrated how wANNOVAR can help draw biological insights from sequencing data, by analysing genetic variants generated on two Mendelian diseases.

Conclusions

We conclude that wANNOVAR will help biologists and clinicians take advantage of the personal genome information to expedite scientific discoveries. The wANNOVAR server is available at http://wannovar.usc.edu, and will be continuously updated to reflect the latest annotation information.",2012-06-20 +27964762,New tools to analyze overlapping coding regions.,"

Background

Retroviruses transcribe messenger RNA for the overlapping Gag and Gag-Pol polyproteins, by using a programmed -1 ribosomal frameshift which requires a slippery sequence and an immediate downstream stem-loop secondary structure, together called frameshift stimulating signal (FSS). It follows that the molecular evolution of this genomic region of HIV-1 is highly constrained, since the retroviral genome must contain a slippery sequence (sequence constraint), code appropriate peptides in reading frames 0 and 1 (coding requirements), and form a thermodynamically stable stem-loop secondary structure (structure requirement).

Results

We describe a unique computational tool, RNAsampleCDS, designed to compute the number of RNA sequences that code two (or more) peptides p,q in overlapping reading frames, that are identical (or have BLOSUM/PAM similarity that exceeds a user-specified value) to the input peptides p,q. RNAsampleCDS then samples a user-specified number of messenger RNAs that code such peptides; alternatively, RNAsampleCDS can exactly compute the position-specific scoring matrix and codon usage bias for all such RNA sequences. Our software allows the user to stipulate overlapping coding requirements for all 6 possible reading frames simultaneously, even allowing IUPAC constraints on RNA sequences and fixing GC-content. We generalize the notion of codon preference index (CPI) to overlapping reading frames, and use RNAsampleCDS to generate control sequences required in the computation of CPI. Moreover, by applying RNAsampleCDS, we are able to quantify the extent to which the overlapping coding requirement in HIV-1 [resp. HCV] contribute to the formation of the stem-loop [resp. double stem-loop] secondary structure known as the frameshift stimulating signal. Using our software, we confirm that certain experimentally determined deleterious HCV mutations occur in positions for which our software RNAsampleCDS and RNAiFold both indicate a single possible nucleotide. We generalize the notion of codon preference index (CPI) to overlapping coding regions, and use RNAsampleCDS to generate control sequences required in the computation of CPI for the Gag-Pol overlapping coding region of HIV-1. These applications show that RNAsampleCDS constitutes a unique tool in the software arsenal now available to evolutionary biologists.

Conclusion

Source code for the programs and additional data are available at http://bioinformatics.bc.edu/clotelab/RNAsampleCDS/ .",2016-12-13 +27627842,Salmonella Typhimurium and Staphylococcus aureus dynamics in/on variable (micro)structures of fish-based model systems at suboptimal temperatures.,"The limited knowledge concerning the influence of food (micro)structure on microbial dynamics decreases the accuracy of the developed predictive models, as most studies have mainly been based on experimental data obtained in liquid microbiological media or in/on real foods. The use of model systems has a great potential when studying this complex factor. Apart from the variability in (micro)structural properties, model systems vary in compositional aspects, as a consequence of their (micro)structural variation. In this study, different experimental food model systems, with compositional and physicochemical properties similar to fish patés, are developed to study the influence of food (micro)structure on microbial dynamics. The microbiological safety of fish products is of major importance given the numerous cases of salmonellosis and infections attributed to staphylococcus toxins. The model systems understudy represent food (micro)structures of liquids, aqueous gels, emulsions and gelled emulsions. The growth/inactivation dynamics and a modelling approach of combined growth and inactivation of Salmonella Typhimurium and Staphylococcus aureus, related to fish products, are investigated in/on these model systems at temperatures relevant to fish products' common storage (4°C) and to abuse storage temperatures (8 and 12°C). ComBase (http://www.combase.cc/) predictions compared with the maximum specific growth rate (μmax) values estimated by the Baranyi and Roberts model in the current study indicated that the (micro)structure influences the microbial dynamics. Overall, ComBase overestimated microbial growth at the same pH, aw and storage temperature. Finally, the storage temperature had also an influence on how much each model system affected the microbial dynamics.",2016-08-03 +22833525,RedoxDB--a curated database for experimentally verified protein oxidative modification.,"

Summary

Redox regulation and signaling, which are involved in various cellular processes, have become one of the research focuses in the past decade. Cysteine thiol groups are particularly susceptible to post-translational modification, and their reversible oxidation is of critical role in redox regulation and signaling. With the tremendous improvement of techniques, hundreds of redox proteins along with their redox-sensitive cysteines have been reported, and the number is still fast growing. However, until now there is no database to accommodate the rapid accumulation of information on protein oxidative modification. Here we present RedoxDB-a manually curated database for experimentally validated redox proteins. RedoxDB (version 1.0) consists of two datasets (A and B, for proteins with or without verified modified cysteines, respectively) and includes 2157 redox proteins containing 2203 cysteine residues with oxidative modification. For each modified cysteine, the exact position, modification type and flanking sequence are provided. Additional information, including gene name, organism, sequence, literature references and links to UniProt and PDB, is also supplied. The database supports several functions including data search, blast and browsing. Bulk download of the entire dataset is also available. We expect that RedoxDB will be useful for both experimental studies and computational analyses of protein oxidative modification.

Availability

The database is freely available at: http://biocomputer.bio.cuhk.edu.hk/RedoxDB.

Contact

djguo@cuhk.edu.hk

Supplementary information

Supplementary data are available at Bioinformatics Online.",2012-07-25 +26931183,HGVS Recommendations for the Description of Sequence Variants: 2016 Update.,"The consistent and unambiguous description of sequence variants is essential to report and exchange information on the analysis of a genome. In particular, DNA diagnostics critically depends on accurate and standardized description and sharing of the variants detected. The sequence variant nomenclature system proposed in 2000 by the Human Genome Variation Society has been widely adopted and has developed into an internationally accepted standard. The recommendations are currently commissioned through a Sequence Variant Description Working Group (SVD-WG) operating under the auspices of three international organizations: the Human Genome Variation Society (HGVS), the Human Variome Project (HVP), and the Human Genome Organization (HUGO). Requests for modifications and extensions go through the SVD-WG following a standard procedure including a community consultation step. Version numbers are assigned to the nomenclature system to allow users to specify the version used in their variant descriptions. Here, we present the current recommendations, HGVS version 15.11, and briefly summarize the changes that were made since the 2000 publication. Most focus has been on removing inconsistencies and tightening definitions allowing automatic data processing. An extensive version of the recommendations is available online, at http://www.HGVS.org/varnomen.",2016-03-25 +22188658,New concepts for building vocabulary for cell image ontologies.,"

Background

There are significant challenges associated with the building of ontologies for cell biology experiments including the large numbers of terms and their synonyms. These challenges make it difficult to simultaneously query data from multiple experiments or ontologies. If vocabulary terms were consistently used and reused across and within ontologies, queries would be possible through shared terms. One approach to achieving this is to strictly control the terms used in ontologies in the form of a pre-defined schema, but this approach limits the individual researcher's ability to create new terms when needed to describe new experiments.

Results

Here, we propose the use of a limited number of highly reusable common root terms, and rules for an experimentalist to locally expand terms by adding more specific terms under more general root terms to form specific new vocabulary hierarchies that can be used to build ontologies. We illustrate the application of the method to build vocabularies and a prototype database for cell images that uses a visual data-tree of terms to facilitate sophisticated queries based on a experimental parameters. We demonstrate how the terminology might be extended by adding new vocabulary terms into the hierarchy of terms in an evolving process. In this approach, image data and metadata are handled separately, so we also describe a robust file-naming scheme to unambiguously identify image and other files associated with each metadata value. The prototype database http://sbd.nist.gov/ consists of more than 2000 images of cells and benchmark materials, and 163 metadata terms that describe experimental details, including many details about cell culture and handling. Image files of interest can be retrieved, and their data can be compared, by choosing one or more relevant metadata values as search terms. Metadata values for any dataset can be compared with corresponding values of another dataset through logical operations.

Conclusions

Organizing metadata for cell imaging experiments under a framework of rules that include highly reused root terms will facilitate the addition of new terms into a vocabulary hierarchy and encourage the reuse of terms. These vocabulary hierarchies can be converted into XML schema or RDF graphs for displaying and querying, but this is not necessary for using it to annotate cell images. Vocabulary data trees from multiple experiments or laboratories can be aligned at the root terms to facilitate query development. This approach of developing vocabularies is compatible with the major advances in database technology and could be used for building the Semantic Web.",2011-12-21 +26342387,DENdb: database of integrated human enhancers. ,"Enhancers are cis-acting DNA regulatory regions that play a key role in distal control of transcriptional activities. Identification of enhancers, coupled with a comprehensive functional analysis of their properties, could improve our understanding of complex gene transcription mechanisms and gene regulation processes in general. We developed DENdb, a centralized on-line repository of predicted enhancers derived from multiple human cell-lines. DENdb integrates enhancers predicted by five different methods generating an enriched catalogue of putative enhancers for each of the analysed cell-lines. DENdb provides information about the overlap of enhancers with DNase I hypersensitive regions, ChIP-seq regions of a number of transcription factors and transcription factor binding motifs, means to explore enhancer interactions with DNA using several chromatin interaction assays and enhancer neighbouring genes. DENdb is designed as a relational database that facilitates fast and efficient searching, browsing and visualization of information. Database URL: http://www.cbrc.kaust.edu.sa/dendb/.",2015-09-05 +27113629,Enrichment of SNPs in Functional Categories Reveals Genes Affecting Complex Traits.,"Genome-wide association studies (GWAS) have indicated potential to identify heritability of common complex phenotypes, but traditional approaches have limited ability to detect hiding signals because single SNP has weak effect size accounting for only a small fraction of overall phenotypic variations. To improve the power of GWAS, methods have been developed to identify truly associated genes by jointly testing effects of all SNPs. However, equally considering all SNPs within a gene might dilute strong signals of SNPs in real functional categories. Here, we observed a consistent pattern on enrichment of significant SNPs in eight functional categories across six phenotypes, with the highest enrichment in coding and both UTR regions while the lowest enrichment in the intron. Based on the pattern of SNP enrichment in functional categories, we developed a new approach for detecting gene associations on traits (DGAT) by selecting the most significant functional category and then using SNPs within it to assess gene associations. The method was found to be robust in type I error rate on simulated data, and to have mostly higher power in detecting associated genes for three different diseases than other methods. Further analysis indicated ability of the DGAT to detect novel genes. The DGAT is available by http://sparks-lab.org/server/DGAT.",2016-05-24 +22086948,MetaCrop 2.0: managing and exploring information about crop plant metabolism.,"MetaCrop is a manually curated repository of high-quality data about plant metabolism, providing different levels of detail from overview maps of primary metabolism to kinetic data of enzymes. It contains information about seven major crop plants with high agronomical importance and two model plants. MetaCrop is intended to support research aimed at the improvement of crops for both nutrition and industrial use. It can be accessed via web, web services and an add-on to the Vanted software. Here, we present several novel developments of the MetaCrop system and the extended database content. MetaCrop is now available in version 2.0 at http://metacrop.ipk-gatersleben.de.",2011-11-15 +22529179,"Towards semi-automated curation: using text mining to recreate the HIV-1, human protein interaction database.","Manual curation has long been used for extracting key information found within the primary literature for input into biological databases. The human immunodeficiency virus type 1 (HIV-1), human protein interaction database (HHPID), for example, contains 2589 manually extracted interactions, linked to 14,312 mentions in 3090 articles. The advancement of text-mining (TM) techniques has offered a possibility to rapidly retrieve such data from large volumes of text to a high degree of accuracy. Here, we present a recreation of the HHPID using the current state of the art in TM. To retrieve interactions, we performed gene/protein named entity recognition (NER) and applied two molecular event extraction tools on all abstracts and titles cited in the HHPID. Our best NER scores for precision, recall and F-score were 87.5%, 90.0% and 88.6%, respectively, while event extraction achieved 76.4%, 84.2% and 80.1%, respectively. We demonstrate that over 50% of the HHPID interactions can be recreated from abstracts and titles. Furthermore, from 49 available open-access full-text articles, we extracted a total of 237 unique HIV-1-human interactions, as opposed to 187 interactions recorded in the HHPID from the same articles. On average, we extracted 23 times more mentions of interactions and events from a full-text article than from an abstract and title, with a 6-fold increase in the number of unique interactions. We further demonstrated that more frequently occurring interactions extracted by TM are more likely to be true positives. Overall, the results demonstrate that TM was able to recover a large proportion of interactions, many of which were found within the HHPID, making TM a useful assistant in the manual curation process. Finally, we also retrieved other types of interactions in the context of HIV-1 that are not currently present in the HHPID, thus, expanding the scope of this data set. All data is available at http://gnode1.mib.man.ac.uk/HIV1-text-mining.",2012-04-23 +25342065,"piPipes: a set of pipelines for piRNA and transposon analysis via small RNA-seq, RNA-seq, degradome- and CAGE-seq, ChIP-seq and genomic DNA sequencing.","

Motivation

PIWI-interacting RNAs (piRNAs), 23-36 nt small silencing RNAs, repress transposon expression in the metazoan germ line, thereby protecting the genome. Although high-throughput sequencing has made it possible to examine the genome and transcriptome at unprecedented resolution, extracting useful information from gigabytes of sequencing data still requires substantial computational skills. Additionally, researchers may analyze and interpret the same data differently, generating results that are difficult to reconcile. To address these issues, we developed a coordinated set of pipelines, 'piPipes', to analyze piRNA and transposon-derived RNAs from a variety of high-throughput sequencing libraries, including small RNA, RNA, degradome or 7-methyl guanosine cap analysis of gene expression (CAGE), chromatin immunoprecipitation (ChIP) and genomic DNA-seq. piPipes can also produce figures and tables suitable for publication. By facilitating data analysis, piPipes provides an opportunity to standardize computational methods in the piRNA field.

Supplementary information

Supplementary information, including flowcharts and example figures for each pipeline, are available at Bioinformatics online.

Availability and implementation

piPipes is implemented in Bash, C++, Python, Perl and R. piPipes is free, open-source software distributed under the GPLv3 license and is available at http://bowhan.github.io/piPipes/.

Contact

Phillip.Zamore@umassmed.edu or Zhiping.Weng@umassmed.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-17 +22285827,Chado controller: advanced annotation management with a community annotation system.,"

Summary

We developed a controller that is compliant with the Chado database schema, GBrowse and genome annotation-editing tools such as Artemis and Apollo. It enables the management of public and private data, monitors manual annotation (with controlled vocabularies, structural and functional annotation controls) and stores versions of annotation for all modified features. The Chado controller uses PostgreSQL and Perl.

Availability

The Chado Controller package is available for download at http://www.gnpannot.org/content/chado-controller and runs on any Unix-like operating system, and documentation is available at http://www.gnpannot.org/content/chado-controller-doc The system can be tested using the GNPAnnot Sandbox at http://www.gnpannot.org/content/gnpannot-sandbox-form

Contact

valentin.guignon@cirad.fr; stephanie.sidibe-bocs@cirad.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-28 +25297886,KvarQ: targeted and direct variant calling from fastq reads of bacterial genomes.,"

Background

High-throughput DNA sequencing produces vast amounts of data, with millions of short reads that usually have to be mapped to a reference genome or newly assembled. Both reference-based mapping and de novo assembly are computationally intensive, generating large intermediary data files, and thus require bioinformatics skills that are often lacking in the laboratories producing the data. Moreover, many research and practical applications in microbiology require only a small fraction of the whole genome data.

Results

We developed KvarQ, a new tool that directly scans fastq files of bacterial genome sequences for known variants, such as single nucleotide polymorphisms (SNP), bypassing the need of mapping all sequencing reads to a reference genome and de novo assembly. Instead, KvarQ loads ""testsuites"" that define specific SNPs or short regions of interest in a reference genome, and directly synthesizes the relevant results based on the occurrence of these markers in the fastq files. KvarQ has a versatile command line interface and a graphical user interface. KvarQ currently ships with two ""testsuites"" for Mycobacterium tuberculosis, but new ""testsuites"" for other organisms can easily be created and distributed. In this article, we demonstrate how KvarQ can be used to successfully detect all main drug resistance mutations and phylogenetic markers in 880 bacterial whole genome sequences. The average scanning time per genome sequence was two minutes. The variant calls of a subset of these genomes were validated with a standard bioinformatics pipeline and revealed >99% congruency.

Conclusion

KvarQ is a user-friendly tool that directly extracts relevant information from fastq files. This enables researchers and laboratory technicians with limited bioinformatics expertise to scan and analyze raw sequencing data in a matter of minutes. KvarQ is open-source, and pre-compiled packages with a graphical user interface are available at http://www.swisstph.ch/kvarq.",2014-10-09 +25068035,"The Software Ontology (SWO): a resource for reproducibility in biomedical data analysis, curation and digital preservation.","

Motivation

Biomedical ontologists to date have concentrated on ontological descriptions of biomedical entities such as gene products and their attributes, phenotypes and so on. Recently, effort has diversified to descriptions of the laboratory investigations by which these entities were produced. However, much biological insight is gained from the analysis of the data produced from these investigations, and there is a lack of adequate descriptions of the wide range of software that are central to bioinformatics. We need to describe how data are analyzed for discovery, audit trails, provenance and reproducibility.

Results

The Software Ontology (SWO) is a description of software used to store, manage and analyze data. Input to the SWO has come from beyond the life sciences, but its main focus is the life sciences. We used agile techniques to gather input for the SWO and keep engagement with our users. The result is an ontology that meets the needs of a broad range of users by describing software, its information processing tasks, data inputs and outputs, data formats versions and so on. Recently, the SWO has incorporated EDAM, a vocabulary for describing data and related concepts in bioinformatics. The SWO is currently being used to describe software used in multiple biomedical applications.

Conclusion

The SWO is another element of the biomedical ontology landscape that is necessary for the description of biomedical entities and how they were discovered. An ontology of software used to analyze data produced by investigations in the life sciences can be made in such a way that it covers the important features requested and prioritized by its users. The SWO thus fits into the landscape of biomedical ontologies and is produced using techniques designed to keep it in line with user's needs.

Availability

The Software Ontology is available under an Apache 2.0 license at http://theswo.sourceforge.net/; the Software Ontology blog can be read at http://softwareontology.wordpress.com.",2014-06-02 +26685305,FluxModeCalculator: an efficient tool for large-scale flux mode computation.,"

Unlabelled

Elementary flux mode (EFM) analysis is a powerful technique for determining the metabolic capacities and robustness of stoichiometric networks. Recently, several improvements have been made to the algorithm for enumerating the EFMs, making the study of large models possible. However, currently these tools require high performance workstations to perform large-scale EFM computations, thus limiting their applicability. We developed a more time and memory efficient implementation of the algorithm for EFM enumeration in MATLAB, called FluxModeCalculator, which enables large-scale EFM computation on ordinary desktop computers.

Availability and implementation

FluxModeCalculator is open source and freely available under the terms of the GNU General Public License v3.0 at http://www.lumc.nl/jan-bert-van-klinken

Contact

j.b.van_klinken@lumc.nl

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-18 +25005748,Improving peak detection in high-resolution LC/MS metabolomics data using preexisting knowledge and machine learning approach.,"

Motivation

Peak detection is a key step in the preprocessing of untargeted metabolomics data generated from high-resolution liquid chromatography-mass spectrometry (LC/MS). The common practice is to use filters with predetermined parameters to select peaks in the LC/MS profile. This rigid approach can cause suboptimal performance when the choice of peak model and parameters do not suit the data characteristics.

Results

Here we present a method that learns directly from various data features of the extracted ion chromatograms (EICs) to differentiate between true peak regions from noise regions in the LC/MS profile. It utilizes the knowledge of known metabolites, as well as robust machine learning approaches. Unlike currently available methods, this new approach does not assume a parametric peak shape model and allows maximum flexibility. We demonstrate the superiority of the new approach using real data. Because matching to known metabolites entails uncertainties and cannot be considered a gold standard, we also developed a probabilistic receiver-operating characteristic (pROC) approach that can incorporate uncertainties.

Availability and implementation

The new peak detection approach is implemented as part of the apLCMS package available at http://web1.sph.emory.edu/apLCMS/ CONTACT: tyu8@emory.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-07 +26335387,SFESA: a web server for pairwise alignment refinement by secondary structure shifts.,"

Background

Protein sequence alignment is essential for a variety of tasks such as homology modeling and active site prediction. Alignment errors remain the main cause of low-quality structure models. A bioinformatics tool to refine alignments is needed to make protein alignments more accurate.

Results

We developed the SFESA web server to refine pairwise protein sequence alignments. Compared to the previous version of SFESA, which required a set of 3D coordinates for a protein, the new server will search a sequence database for the closest homolog with an available 3D structure to be used as a template. For each alignment block defined by secondary structure elements in the template, SFESA evaluates alignment variants generated by local shifts and selects the best-scoring alignment variant. A scoring function that combines the sequence score of profile-profile comparison and the structure score of template-derived contact energy is used for evaluation of alignments. PROMALS pairwise alignments refined by SFESA are more accurate than those produced by current advanced alignment methods such as HHpred and CNFpred. In addition, SFESA also improves alignments generated by other software.

Conclusions

SFESA is a web-based tool for alignment refinement, designed for researchers to compute, refine, and evaluate pairwise alignments with a combined sequence and structure scoring of alignment blocks. To our knowledge, the SFESA web server is the only tool that refines alignments by evaluating local shifts of secondary structure elements. The SFESA web server is available at http://prodata.swmed.edu/sfesa.",2015-09-03 +25980368,A novel mixed integer programming for multi-biomarker panel identification by distinguishing malignant from benign colorectal tumors.,"Multi-biomarker panels can capture the nonlinear synergy among biomarkers and they are important to aid in the early diagnosis and ultimately battle complex diseases. However, identification of these multi-biomarker panels from case and control data is challenging. For example, the exhaustive search method is computationally infeasible when the data dimension is high. Here, we propose a novel method, MILP_k, to identify serum-based multi-biomarker panel to distinguish colorectal cancers (CRC) from benign colorectal tumors. Specifically, the multi-biomarker panel detection problem is modeled by a mixed integer programming to maximize the classification accuracy. Then we measured the serum profiling data for 101 CRC patients and 95 benign patients. The 61 biomarkers were analyzed individually and further their combinations by our method. We discovered 4 biomarkers as the optimal small multi-biomarker panel, including known CRC biomarkers CEA and IL-10 as well as novel biomarkers IMA and NSE. This multi-biomarker panel obtains leave-one-out cross-validation (LOOCV) accuracy to 0.7857 by nearest centroid classifier. An independent test of this panel by support vector machine (SVM) with threefold cross validation gets an AUC 0.8438. This greatly improves the predictive accuracy by 20% over the single best biomarker. Further extension of this 4-biomarker panel to a larger 13-biomarker panel improves the LOOCV to 0.8673 with independent AUC 0.8437. Comparison with the exhaustive search method shows that our method dramatically reduces the searching time by 1000-fold. Experiments on the early cancer stage samples reveal two panel of biomarkers and show promising accuracy. The proposed method allows us to select the subset of biomarkers with best accuracy to distinguish case and control samples given the number of selected biomarkers. Both receiver operating characteristic curve and precision-recall curve show our method's consistent performance gain in accuracy. Our method also shows its advantage in capturing synergy among selected biomarkers. The multi-biomarker panel far outperforms the simple combination of best single features. Close investigation of the multi-biomarker panel illustrates that our method possesses the ability to remove redundancy and reveals complementary biomarker combinations. In addition, our method is efficient and can select multi-biomarker panel with more than 5 biomarkers, for which the exhaustive methods fail. In conclusion, we propose a promising model to improve the clinical data interpretability and to serve as a useful tool for other complex disease studies. Our small multi-biomarker panel, CEA, IL-10, IMA, and NSE, may provide insights on the disease status of colorectal diseases. The implementation of our method in MATLAB is available via the website: http://doc.aporc.org/wiki/MILP_k.",2015-05-15 +26677962,CG2AA: backmapping protein coarse-grained structures.,"

Unlabelled

Coarse grain (CG) models allow long-scale simulations with a much lower computational cost than that of all-atom simulations. However, the absence of atomistic detail impedes the analysis of specific atomic interactions that are determinant in most interesting biomolecular processes. In order to study these phenomena, it is necessary to reconstruct the atomistic structure from the CG representation. This structure can be analyzed by itself or be used as an onset for atomistic molecular dynamics simulations. In this work, we present a computer program that accurately reconstructs the atomistic structure from a CG model for proteins, using a simple geometrical algorithm.

Availability and implementation

The software is free and available online at http://www.ic.fcen.uba.ar/cg2aa/cg2aa.py

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

lula@qi.fcen.uba.ar.",2015-12-16 +21980353,pubmed2ensembl: a resource for mining the biological literature on genes.,"

Background

The last two decades have witnessed a dramatic acceleration in the production of genomic sequence information and publication of biomedical articles. Despite the fact that genome sequence data and publications are two of the most heavily relied-upon sources of information for many biologists, very little effort has been made to systematically integrate data from genomic sequences directly with the biological literature. For a limited number of model organisms dedicated teams manually curate publications about genes; however for species with no such dedicated staff many thousands of articles are never mapped to genes or genomic regions.

Methodology/principal findings

To overcome the lack of integration between genomic data and biological literature, we have developed pubmed2ensembl (http://www.pubmed2ensembl.org), an extension to the BioMart system that links over 2,000,000 articles in PubMed to nearly 150,000 genes in Ensembl from 50 species. We use several sources of curated (e.g., Entrez Gene) and automatically generated (e.g., gene names extracted through text-mining on MEDLINE records) sources of gene-publication links, allowing users to filter and combine different data sources to suit their individual needs for information extraction and biological discovery. In addition to extending the Ensembl BioMart database to include published information on genes, we also implemented a scripting language for automated BioMart construction and a novel BioMart interface that allows text-based queries to be performed against PubMed and PubMed Central documents in conjunction with constraints on genomic features. Finally, we illustrate the potential of pubmed2ensembl through typical use cases that involve integrated queries across the biomedical literature and genomic data.

Conclusion/significance

By allowing biologists to find the relevant literature on specific genomic regions or sets of functionally related genes more easily, pubmed2ensembl offers a much-needed genome informatics inspired solution to accessing the ever-increasing biomedical literature.",2011-09-29 +27141960,SimpleSynteny: a web-based tool for visualization of microsynteny across multiple species.,"Defining syntenic relationships among orthologous gene clusters is a frequent undertaking of biologists studying organismal evolution through comparative genomic approaches. With the increasing availability of genome data made possible through next-generation sequencing technology, there is a growing need for user-friendly tools capable of assessing synteny. Here we present SimpleSynteny, a new web-based platform capable of directly interrogating collinearity of local genomic neighbors across multiple species in a targeted manner. SimpleSynteny provides a pipeline for evaluating the synteny of a preselected set of gene targets across multiple organismal genomes. An emphasis has been placed on ease-of-use, and users are only required to submit FASTA files for their genomes and genes of interest. SimpleSynteny then guides the user through an iterative process of exploring and customizing genomes individually before combining them into a final high-resolution figure. Because the process is iterative, it allows the user to customize the organization of multiple contigs and incorporate knowledge from additional sources, rather than forcing complete dependence on the computational predictions. Additional tools are provided to help the user identify which contigs in a genome assembly contain gene targets and to optimize analyses of circular genomes. SimpleSynteny is freely available at: http://www.SimpleSynteny.com.",2016-05-03 +27128514,Modifiable Risk Factors in Total Joint Arthroplasty: A Pilot Study.,"

Unlabelled

Strong evidence exists to suggest that morbid obesity, smoking, and poorly controlled diabetes mellitus are associated with poorer outcomes after total joint arthroplasty. To our knowledge, no study has reported the effect of the implementation of a risk reduction strategy. Risk factors, based on published data, were defined as Body Mass Index (BMI)>40, Hemoglobin A1c (HbA1c) >8.0, and use of any tobacco product. A retrospective pilot review was done of a 3-month period using this protocol in the practice of a single fellowship-trained academic arthroplasty surgeon (DRJ). Outcomes were evaluated in the subsequent 3-month period. Overall 19/29 (65.5%) patients identified to be ""at risk"" and offered support for modification followed up under the care of their index surgeon. 11/19 (57.9%) improved their risk factors and 8/19 (42.1%) ultimately met the specific goals set for surgery with 4 (21%) ultimately undergoing their replacement procedure during the 6-month study period. These initial results suggest that a significant proportion of our patients were willing and able to modify their risk before surgery.

Level of evidence

Level III retrospective study. [Full article available at http://rimed.org/rimedicaljournal-2016-05.asp, free with no login].",2016-05-02 +25055984,Knowledge-fused differential dependency network models for detecting significant rewiring in biological networks.,"

Background

Modeling biological networks serves as both a major goal and an effective tool of systems biology in studying mechanisms that orchestrate the activities of gene products in cells. Biological networks are context-specific and dynamic in nature. To systematically characterize the selectively activated regulatory components and mechanisms, modeling tools must be able to effectively distinguish significant rewiring from random background fluctuations. While differential networks cannot be constructed by existing knowledge alone, novel incorporation of prior knowledge into data-driven approaches can improve the robustness and biological relevance of network inference. However, the major unresolved roadblocks include: big solution space but a small sample size; highly complex networks; imperfect prior knowledge; missing significance assessment; and heuristic structural parameter learning.

Results

To address these challenges, we formulated the inference of differential dependency networks that incorporate both conditional data and prior knowledge as a convex optimization problem, and developed an efficient learning algorithm to jointly infer the conserved biological network and the significant rewiring across different conditions. We used a novel sampling scheme to estimate the expected error rate due to ""random"" knowledge. Based on that scheme, we developed a strategy that fully exploits the benefit of this data-knowledge integrated approach. We demonstrated and validated the principle and performance of our method using synthetic datasets. We then applied our method to yeast cell line and breast cancer microarray data and obtained biologically plausible results. The open-source R software package and the experimental data are freely available at http://www.cbil.ece.vt.edu/software.htm.

Conclusions

Experiments on both synthetic and real data demonstrate the effectiveness of the knowledge-fused differential dependency network in revealing the statistically significant rewiring in biological networks. The method efficiently leverages data-driven evidence and existing biological knowledge while remaining robust to the false positive edges in the prior knowledge. The identified network rewiring events are supported by previous studies in the literature and also provide new mechanistic insight into the biological systems. We expect the knowledge-fused differential dependency network analysis, together with the open-source R package, to be an important and useful bioinformatics tool in biological network analyses.",2014-07-24 +27076997,FOVEA: a new program to standardize the measurement of foveal pit morphology.,"The fovea is one of the most studied retinal specializations in vertebrates, which consists of an invagination of the retinal tissue with high packing of cone photoreceptors, leading to high visual resolution. Between species, foveae differ morphologically in the depth and width of the foveal pit and the steepness of the foveal walls, which could influence visual perception. However, there is no standardized methodology to measure the contour of the foveal pit across species. We present here FOVEA, a program for the quantification of foveal parameters (width, depth, slope of foveal pit) using images from histological cross-sections or optical coherence tomography (OCT). FOVEA is based on a new algorithm to detect the inner retina contour based on the color variation of the image. We evaluated FOVEA by comparing the fovea morphology of two Passerine birds based on histological cross-sections and its performance with data from previously published OCT images. FOVEA detected differences between species and its output was not significantly different from previous estimates using OCT software. FOVEA can be used for comparative studies to better understand the evolution of the fovea morphology in vertebrates as well as for diagnostic purposes in veterinary pathology. FOVEA is freely available for academic use and can be downloaded at: http://estebanfj.bio.purdue.edu/fovea.",2016-04-11 +26338767,DIGGIT: a Bioconductor package to infer genetic variants driving cellular phenotypes.,"

Unlabelled

Identification of driver mutations in human diseases is often limited by cohort size and availability of appropriate statistical models. We propose a method for the systematic discovery of genetic alterations that are causal determinants of disease, by prioritizing genes upstream of functional disease drivers, within regulatory networks inferred de novo from experimental data. Here we present the implementation of Driver-gene Inference by Genetical-Genomic Information Theory as an R-system package.

Availability and implementation

The diggit package is freely available under the GPL-2 license from Bioconductor (http://www.bioconductor.org).",2015-09-02 +25730499,Constructing query-driven dynamic machine learning model with application to protein-ligand binding sites prediction.,"We are facing an era with annotated biological data rapidly and continuously generated. How to effectively incorporate new annotated data into the learning step is crucial for enhancing the performance of a bioinformatics prediction model. Although machine-learning-based methods have been extensively used for dealing with various biological problems, existing approaches usually train static prediction models based on fixed training datasets. The static approaches are found having several disadvantages such as low scalability and impractical when training dataset is huge. In view of this, we propose a dynamic learning framework for constructing query-driven prediction models. The key difference between the proposed framework and the existing approaches is that the training set for the machine learning algorithm of the proposed framework is dynamically generated according to the query input, as opposed to training a general model regardless of queries in traditional static methods. Accordingly, a query-driven predictor based on the smaller set of data specifically selected from the entire annotated base dataset will be applied on the query. The new way for constructing the dynamic model enables us capable of updating the annotated base dataset flexibly and using the most relevant core subset as the training set makes the constructed model having better generalization ability on the query, showing ""part could be better than all"" phenomenon. According to the new framework, we have implemented a dynamic protein-ligand binding sites predictor called OSML (On-site model for ligand binding sites prediction). Computer experiments on 10 different ligand types of three hierarchically organized levels show that OSML outperforms most existing predictors. The results indicate that the current dynamic framework is a promising future direction for bridging the gap between the rapidly accumulated annotated biological data and the effective machine-learning-based predictors. OSML web server and datasets are freely available at: http://www.csbio.sjtu.edu.cn/bioinf/OSML/ for academic use.",2015-01-01 +25262207,Comprehensive analysis of DNA methylation data with RnBeads.,"RnBeads is a software tool for large-scale analysis and interpretation of DNA methylation data, providing a user-friendly analysis workflow that yields detailed hypertext reports (http://rnbeads.mpi-inf.mpg.de/). Supported assays include whole-genome bisulfite sequencing, reduced representation bisulfite sequencing, Infinium microarrays and any other protocol that produces high-resolution DNA methylation data. Notable applications of RnBeads include the analysis of epigenome-wide association studies and epigenetic biomarker discovery in cancer cohorts.",2014-09-28 +25610492,Mapping mutations in plant genomes with the user-friendly web application CandiSNP.,"

Background

Analysis of mutants isolated from forward-genetic screens has revealed key components of several plant signalling pathways. Mapping mutations by position, either using classical methods or whole genome high-throughput sequencing (HTS), largely relies on the analysis of genome-wide polymorphisms in F2 recombinant populations. Combining bulk segregant analysis with HTS has accelerated the identification of causative mutations and has been widely adopted in many research programmes. A major advantage of HTS is the ability to perform bulk segregant analysis after back-crossing to the parental line rather than out-crossing to a polymorphic ecotype, which reduces genetic complexity and avoids issues with phenotype penetrance in different ecotypes. Plotting the positions of homozygous polymorphisms in a mutant genome identifies areas of low recombination and is an effective way to detect molecular linkage to a phenotype of interest.

Results

We describe the use of single nucleotide polymorphism (SNP) density plots as a mapping strategy to identify and refine chromosomal positions of causative mutations from screened plant populations. We developed a web application called CandiSNP that generates density plots from user-provided SNP data obtained from HTS. Candidate causative mutations, defined as SNPs causing non-synonymous changes in annotated coding regions are highlighted on the plots and listed in a table. We use data generated from a recent mutant screen in the model plant Arabidopsis thaliana as proof-of-concept for the validity of our tool.

Conclusions

CandiSNP is a user-friendly application that will aid in novel discoveries from forward-genetic mutant screens. It is particularly useful for analysing HTS data from bulked back-crossed mutants, which contain fewer polymorphisms than data generated from out-crosses. The web-application is freely available online at http://candisnp.tsl.ac.uk.",2014-12-30 +27153713,ecceTERA: comprehensive gene tree-species tree reconciliation using parsimony.,"

Unlabelled

: A gene tree-species tree reconciliation explains the evolution of a gene tree within the species tree given a model of gene-family evolution. We describe ecceTERA, a program that implements a generic parsimony reconciliation algorithm, which accounts for gene duplication, loss and transfer (DTL) as well as speciation, involving sampled and unsampled lineages, within undated, fully dated or partially dated species trees. The ecceTERA reconciliation model and algorithm generalize or improve upon most published DTL parsimony algorithms for binary species trees and binary gene trees. Moreover, ecceTERA can estimate accurate species-tree aware gene trees using amalgamation.

Availability and implementation

ecceTERA is freely available under http://mbb.univ-montp2.fr/MBB/download_sources/16__ecceTERA and can be run online at http://mbb.univ-montp2.fr/MBB/subsection/softExec.php?soft=eccetera

Contact

celine.scornavacca@umontpellier.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-26 +28968952,DNA-damage related genes and clinical outcome in hormone receptor positive breast cancer.,"

Background

Control of DNA damage is frequently deregulated in solid tumors. Upregulation of genes within this process can be indicative of a more aggressive phenotype and linked with worse outcome. In the present article we identify DNA damage related genes associated with worse outcome in breast cancer.

Results

2286 genes were differentially expressed between normal breast tissue and basal-like tumors, and 62 included in the DNA metabolic process function. Expression of RAD51, GINS1, TRIP13 and MCM2 were associated with detrimental relapse free survival (RFS) and overall survival (OS) in luminal tumors. The combined analyses of TRIP13+RAD51+MCM2 showed the worse association for RFS (HR 2.25 (1.51-3.35) log rank p= 4.1e-05) and TRIP13+RAD51 for OS (HR 5.13 (0.6-44.17) log rank p=0.098) in ER+/HER2- tumors. TRIP13 is amplified in 3.1% of breast cancers.

Methods

Transcriptomic analyses using public datasets evaluating expression values between normal breast tissue and TNBC identified upregulated genes. Genes included in the DNA metabolic process were selected and confirmed using data contained at oncomine (www.oncomine.org). Evaluation of the selected genes with RFS and OS was performed using the KM Plotter Online Tool (http://www.kmplot.com). Evaluation of molecular alterations was performed using cBioportal (www.cbioportal.org).

Conclusions

Expression of DNA metabolic related genes RAD51, GINS1, TRIP13 and MCM2 are associated with poor outcome. Combinations of some of these genes are linked to poor RFS or OS in luminal A, B and ER+HER2- tumors. Evaluation of its predictive capacity in prospective studies is required.",2016-07-28 +23539303,An accessible database for mouse and human whole transcriptome qPCR primers.,"

Motivation

Real time quantitative polymerase chain reaction (qPCR) is an important tool in quantitative studies of DNA and RNA molecules; especially in transcriptome studies, where different primer combinations allow identification of specific transcripts such as splice variants or precursor messenger RNA. Several softwares that implement various rules for optimal primer design are available. Nevertheless, as designing qPCR primers needs to be done manually, the repeated task is tedious, time consuming and prone to errors.

Results

We used a set of rules to automatically design all possible exon-exon and intron-exon junctions in the human and mouse transcriptomes. The resulting database is included as a track in the UCSC genome browser, making it widely accessible and easy to use.

Availability

The database is available from the UCSC genome browser (http://genome.ucsc.edu/), track name 'Whole Transcriptome qPCR Primers' for the hg19 (Human) and mm10 (Mouse) genome versions. Batch query is available in the following: http://www.weizmann.ac.il/complex/compphys/software/Amit/primers/batch_query_qpcr_primers.htm

Contact

amit.zeisel@weizmann.ac.il or eytan.domany@weizmann.ac.il

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-28 +22332237,Phylogenomic analysis of bacterial and archaeal sequences with AMPHORA2.,"

Summary

With the explosive growth of bacterial and archaeal sequence data, large-scale phylogenetic analyses present both opportunities and challenges. Here we describe AMPHORA2, an automated phylogenomic inference tool that can be used for high-throughput, high-quality genome tree reconstruction and metagenomic phylotyping. Compared with its predecessor, AMPHORA2 has several major enhancements and new functions: it has a greatly expanded phylogenetic marker database and can analyze both bacterial and archaeal sequences; it incorporates probability-based sequence alignment masks that improve the phylogenetic accuracy; it can analyze DNA as well as protein sequences and is more sensitive in marker identification; finally, it is over 100× faster in metagenomic phylotyping.

Availability

http://wolbachia.biology.virginia.edu/WuLab/Software.html.

Contact

mw4yv@virginia.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-12 +22102591,"IBIS (Inferred Biomolecular Interaction Server) reports, predicts and integrates multiple types of conserved interactions for proteins.","We have recently developed the Inferred Biomolecular Interaction Server (IBIS) and database, which reports, predicts and integrates different types of interaction partners and locations of binding sites in proteins based on the analysis of homologous structural complexes. Here, we highlight several new IBIS features and options. The server's webpage is now redesigned to allow users easier access to data for different interaction types. An entry page is added to give a quick summary of available results and to now accept protein sequence accessions. To elucidate the formation of protein complexes, not just binary interactions, IBIS currently presents an expandable interaction network. Previously, IBIS provided annotations for four different types of binding partners: proteins, small molecules, nucleic acids and peptides; in the current version a new protein-ion interaction type has been added. Several options provide easy downloads of IBIS data for all Protein Data Bank (PDB) protein chains and the results for each query. In this study, we show that about one-third of all RefSeq sequences can be annotated with IBIS interaction partners and binding sites. The IBIS server is available at http://www.ncbi.nlm.nih.gov/Structure/ibis/ibis.cgi and updated biweekly.",2011-11-18 +26519506,Riboswitch Scanner: an efficient pHMM-based web-server to detect riboswitches in genomic sequences.,"

Unlabelled

Riboswitches are non-coding RNA located in the 5' untranslated regions where they bind a target metabolite used to specify the riboswitch class and control the expression of associated genes. Accurate identification of riboswitches is the first step towards understanding their regulatory and functional roles in the cell. In this article, we describe a new web application named Riboswitch Scanner which provides an automated pipeline for pHMM-based detection of riboswitches in partial as well as complete genomic sequences rapidly, with high sensitivity and specificity.

Availability and implementation

Riboswitch Scanner can be freely accessed on the web at http://service.iiserkol.ac.in/∼riboscan/

Contact

mukherjee.sumit89@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-30 +24885275,Predicting the fungal CUG codon translation with Bagheera.,"

Background

Many eukaryotes have been shown to use alternative schemes to the universal genetic code. While most Saccharomycetes, including Saccharomyces cerevisiae, use the standard genetic code translating the CUG codon as leucine, some yeasts, including many but not all of the ""Candida"", translate the same codon as serine. It has been proposed that the change in codon identity was accomplished by an almost complete loss of the original CUG codons, making the CUG positions within the extant species highly discriminative for the one or other translation scheme.

Results

In order to improve the prediction of genes in yeast species by providing the correct CUG decoding scheme we implemented a web server, called Bagheera, that allows determining the most probable CUG codon translation for a given transcriptome or genome assembly based on extensive reference data. As reference data we use 2071 manually assembled and annotated sequences from 38 cytoskeletal and motor proteins belonging to 79 yeast species. The web service includes a pipeline, which starts with predicting and aligning homologous genes to the reference data. CUG codon positions within the predicted genes are analysed with respect to amino acid similarity and CUG codon conservation in related species. In addition, the tRNACAG gene is predicted in genomic data and compared to known leu-tRNACAG and ser-tRNACAG genes. Bagheera can also be used to evaluate any mRNA and protein sequence data with the codon usage of the respective species. The usage of the system has been demonstrated by analysing six genomes not included in the reference data.

Conclusions

Gene prediction and consecutive comparison with reference data from other Saccharomycetes are sufficient to predict the most probable decoding scheme for CUG codons. This approach has been implemented into Bagheera (http://www.motorprotein.de/bagheera).",2014-05-29 +26761736,Isotonic Modeling with Non-Differentiable Loss Functions with Application to Lasso Regularization.,"In this paper we present an algorithmic approach for fitting isotonic models under convex, yet non-differentiable, loss functions. It is a generalization of the greedy non-regret approach proposed by Luss and Rosset (2014) for differentiable loss functions, taking into account the sub-gradiental extensions required. We prove that our suggested algorithm solves the isotonic modeling problem while maintaining favorable computational and statistical properties. As our suggested algorithm may be used for any non-differentiable loss function, we focus our interest on isotonic modeling for either regression or two-class classification with appropriate log-likelihood loss and lasso penalty on the fitted values. This combination allows us to maintain the non-parametric nature of isotonic modeling, while controlling model complexity through regularization. We demonstrate the efficiency and usefulness of this approach on both synthetic and real world data. An implementation of our suggested solution is publicly available from the first author's website (https://sites.google.com/site/amichaipainsky/software).",2016-02-01 +27153597,gEVAL - a web-based browser for evaluating genome assemblies.,"

Motivation

For most research approaches, genome analyses are dependent on the existence of a high quality genome reference assembly. However, the local accuracy of an assembly remains difficult to assess and improve. The gEVAL browser allows the user to interrogate an assembly in any region of the genome by comparing it to different datasets and evaluating the concordance. These analyses include: a wide variety of sequence alignments, comparative analyses of multiple genome assemblies, and consistency with optical and other physical maps. gEVAL highlights allelic variations, regions of low complexity, abnormal coverage, and potential sequence and assembly errors, and offers strategies for improvement. Although gEVAL focuses primarily on sequence integrity, it can also display arbitrary annotation including from Ensembl or TrackHub sources. We provide gEVAL web sites for many human, mouse, zebrafish and chicken assemblies to support the Genome Reference Consortium, and gEVAL is also downloadable to enable its use for any organism and assembly.

Availability and implementation

Web Browser: http://geval.sanger.ac.uk, Plugin: http://wchow.github.io/wtsi-geval-plugin

Contact

kj2@sanger.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-07 +24883265,Hypoxia alters MicroRNA expression in rat cortical pericytes.,"Microvascular adaptation to metabolic stress is important in the maintenance of tissue homeostasis. Nowhere is this more important than in the central nervous system (CNS) where the cellular constituents of the neurovascularture including endothelial cells, pericytes and some astroglia must make fine-tuned autoregulatory modulations that maintain the delicate balance between oxygen availability and metabolic demand. miRNAs have been reported to play an important regulatory role in many cellular functions including cell differentiation, growth and proliferation, lineage determination, and metabolism. In this study, we investigated the possible role of miRNAs in the CNS capillary pericyte response to hypoxic stress. Micro-array analysis was used to examine the expression of 388 rat miRNAs in primary rat cortical pericytes with and without exposure to low oxygen (1%) after 24 or 48 hr. Pericytes subjected to hypoxia showed 27 miRNAs that were higher than control and 31 that were lower. Validation and quantification was performed by Real Time RT-PCR on pericytes subjected to 2 hr, 24 hr, or 48 hr of hypoxia. Hypoxia induced changes included physiological pathways governing the stress response, angiogenesis, migration and cell cycle regulation. miRNAs associated with HIF-1α (miR-322[1], miR-199a [2]), TGF-β1 (miR-140[3], miR-145[4], miR-376b-3p[5]) and VEGF (miR-126a[6], miR-297[7], miR-16[8], miR-17-5p[9]) were differentially regulated. Systematic and integrative analysis of possible gene targets analyzed by DAVID bioinformatics resource (http://david.abcc.ncifcrf.gov) and MetaSearch 2.0 (GeneGo) for some of these miRNAs was conducted to determine possible gene targets and pathways that may be affected by the post-transcriptional changes after hypoxic insult.",2013-01-01 +26794669,The Deepwater Horizon Oil Spill and Physical Health among Adult Women in Southern Louisiana: The Women and Their Children's Health (WaTCH) Study.,"

Background

The Deepwater Horizon Oil Spill (DHOS) is the largest oil spill in U.S. history, negatively impacting Gulf Coast residents and the surrounding ecosystem. To date, no studies have been published concerning physical health outcomes associated with the DHOS in the general community.

Objectives

We characterized individual DHOS exposure using survey data and examined the association between DHOS exposure and physical health.

Methods

Baseline data from 2,126 adult women residing in southern Louisiana and enrolled in the Women and Their Children's Health study were analyzed. Exploratory factor analysis was used to characterize DHOS exposure. Odds ratios and 95% confidence intervals for the associations between DHOS exposure and physical health symptoms were estimated using multivariate logistic regression.

Results

A two-factor solution was identified as the best fit for DHOS exposure: physical-environmental exposure and economic exposure. High physical-environmental exposure was significantly associated with all of the physical health symptoms, with the strongest associations for burning in nose, throat, or lungs (OR = 4.73; 95% CI: 3.10, 7.22), sore throat (OR = 4.66; 95% CI: 2.89, 7.51), dizziness (OR = 4.21; 95% CI: 2.69, 6.58), and wheezing (OR = 4.20; 95% CI: 2.86, 6.17). Women who had high-economic exposure were significantly more likely to report wheezing (OR = 1.92; 95% CI: 1.32, 2.79); headaches (OR = 1.81; 95% CI: 1.41, 2.58); watery, burning, itchy eyes (OR = 1.61; 95% CI: 1.20, 2.16); and stuffy, itchy, runny nose (OR = 1.56; 95% CI: 1.16, 2.08).

Conclusions

Among southern Louisiana women, both physical-environmental and economic exposure to the DHOS were associated with an increase in self-reported physical health outcomes. Additional longitudinal studies of this unique cohort are needed to elucidate the impact of the DHOS on short- and long-term human health.

Citation

Peres LC, Trapido E, Rung AL, Harrington DJ, Oral E, Fang Z, Fontham E, Peters ES. 2016. The Deepwater Horizon Oil Spill and physical health among adult women in southern Louisiana: the Women and Their Children's Health (WaTCH) study. Environ Health Perspect 124:1208-1213; http://dx.doi.org/10.1289/ehp.1510348.",2016-01-22 +25228922,"ExpressionData - A public resource of high quality curated datasets representing gene expression across anatomy, development and experimental conditions.","Reference datasets are often used to compare, interpret or validate experimental data and analytical methods. In the field of gene expression, several reference datasets have been published. Typically, they consist of individual baseline or spike-in experiments carried out in a single laboratory and representing a particular set of conditions. Here, we describe a new type of standardized datasets representative for the spatial and temporal dimensions of gene expression. They result from integrating expression data from a large number of globally normalized and quality controlled public experiments. Expression data is aggregated by anatomical part or stage of development to yield a representative transcriptome for each category. For example, we created a genome-wide expression dataset representing the FDA tissue panel across 35 tissue types. The proposed datasets were created for human and several model organisms and are publicly available at http://www.expressiondata.org.",2014-08-31 +26081755,Whole-genome cartography of p53 response elements ranked on transactivation potential.,"

Background

Many recent studies using ChIP-seq approaches cross-referenced to trascriptome data and also to potentially unbiased in vitro DNA binding selection experiments are detailing with increasing precision the p53-directed gene regulatory network that, nevertheless, is still expanding. However, most experiments have been conducted in established cell lines subjected to specific p53-inducing stimuli, both factors potentially biasing the results.

Results

We developed p53retriever, a pattern search algorithm that maps p53 response elements (REs) and ranks them according to predicted transactivation potentials in five classes. Besides canonical, full site REs, we developed specific pattern searches for non-canonical half sites and 3/4 sites and show that they can mediate p53-dependent responsiveness of associated coding sequences. Using ENCODE data, we also mapped p53 REs in about 44,000 distant enhancers and identified a 16-fold enrichment for high activity REs within those sites in the comparison with genomic regions near transcriptional start sites (TSS). Predictions from our pattern search were cross-referenced to ChIP-seq, ChIP-exo, expression, and various literature data sources. Based on the mapping of predicted functional REs near TSS, we examined expression changes of thirteen genes as a function of different p53-inducing conditions, providing further evidence for PDE2A, GAS6, E2F7, APOBEC3H, KCTD1, TRIM32, DICER, HRAS, KITLG and TGFA p53-dependent regulation, while MAP2K3, DNAJA1 and potentially YAP1 were identified as new direct p53 target genes.

Conclusions

We provide a comprehensive annotation of canonical and non-canonical p53 REs in the human genome, ranked on predicted transactivation potential. We also establish or corroborate direct p53 transcriptional control of thirteen genes. The entire list of identified and functionally classified p53 REs near all UCSC-annotated genes and within ENCODE mapped enhancer elements is provided. Our approach is distinct from, and complementary to, existing methods designed to identify p53 response elements. p53retriever is available as an R package at: http://tomateba.github.io/p53retriever .",2015-06-17 +25252852,BADGE: a novel Bayesian model for accurate abundance quantification and differential analysis of RNA-Seq data.,"

Background

Recent advances in RNA sequencing (RNA-Seq) technology have offered unprecedented scope and resolution for transcriptome analysis. However, precise quantification of mRNA abundance and identification of differentially expressed genes are complicated due to biological and technical variations in RNA-Seq data.

Results

We systematically study the variation in count data and dissect the sources of variation into between-sample variation and within-sample variation. A novel Bayesian framework is developed for joint estimate of gene level mRNA abundance and differential state, which models the intrinsic variability in RNA-Seq to improve the estimation. Specifically, a Poisson-Lognormal model is incorporated into the Bayesian framework to model within-sample variation; a Gamma-Gamma model is then used to model between-sample variation, which accounts for over-dispersion of read counts among multiple samples. Simulation studies, where sequencing counts are synthesized based on parameters learned from real datasets, have demonstrated the advantage of the proposed method in both quantification of mRNA abundance and identification of differentially expressed genes. Moreover, performance comparison on data from the Sequencing Quality Control (SEQC) Project with ERCC spike-in controls has shown that the proposed method outperforms existing RNA-Seq methods in differential analysis. Application on breast cancer dataset has further illustrated that the proposed Bayesian model can 'blindly' estimate sources of variation caused by sequencing biases.

Conclusions

We have developed a novel Bayesian hierarchical approach to investigate within-sample and between-sample variations in RNA-Seq data. Simulation and real data applications have validated desirable performance of the proposed method. The software package is available at http://www.cbil.ece.vt.edu/software.htm.",2014-09-10 +26295373,An Aggregation Advisor for Ligand Discovery.,"Colloidal aggregation of organic molecules is the dominant mechanism for artifactual inhibition of proteins, and controls against it are widely deployed. Notwithstanding an increasingly detailed understanding of this phenomenon, a method to reliably predict aggregation has remained elusive. Correspondingly, active molecules that act via aggregation continue to be found in early discovery campaigns and remain common in the literature. Over the past decade, over 12 thousand aggregating organic molecules have been identified, potentially enabling a precedent-based approach to match known aggregators with new molecules that may be expected to aggregate and lead to artifacts. We investigate an approach that uses lipophilicity, affinity, and similarity to known aggregators to advise on the likelihood that a candidate compound is an aggregator. In prospective experimental testing, five of seven new molecules with Tanimoto coefficients (Tc's) between 0.95 and 0.99 to known aggregators aggregated at relevant concentrations. Ten of 19 with Tc's between 0.94 and 0.90 and three of seven with Tc's between 0.89 and 0.85 also aggregated. Another three of the predicted compounds aggregated at higher concentrations. This method finds that 61 827 or 5.1% of the ligands acting in the 0.1 to 10 μM range in the medicinal chemistry literature are at least 85% similar to a known aggregator with these physical properties and may aggregate at relevant concentrations. Intriguingly, only 0.73% of all drug-like commercially available compounds resemble the known aggregators, suggesting that colloidal aggregators are enriched in the literature. As a percentage of the literature, aggregator-like compounds have increased 9-fold since 1995, partly reflecting the advent of high-throughput and virtual screens against molecular targets. Emerging from this study is an aggregator advisor database and tool ( http://advisor.bkslab.org ), free to the community, that may help distinguish between fruitful and artifactual screening hits acting by this mechanism.",2015-08-28 +26336143,Mem-mEN: Predicting Multi-Functional Types of Membrane Proteins by Interpretable Elastic Nets.,"Membrane proteins play important roles in various biological processes within organisms. Predicting the functional types of membrane proteins is indispensable to the characterization of membrane proteins. Recent studies have extended to predicting single- and multi-type membrane proteins. However, existing predictors perform poorly and more importantly, they are often lack of interpretability. To address these problems, this paper proposes an efficient predictor, namely Mem-mEN, which can produce sparse and interpretable solutions for predicting membrane proteins with single- and multi-label functional types. Given a query membrane protein, its associated gene ontology (GO) information is retrieved by searching a compact GO-term database with its homologous accession number, which is subsequently classified by a multi-label elastic net (EN) classifier. Experimental results show that Mem-mEN significantly outperforms existing state-of-the-art membrane-protein predictors. Moreover, by using Mem-mEN, 338 out of more than 7,900 GO terms are found to play more essential roles in determining the functional types. Based on these 338 essential GO terms, Mem-mEN can not only predict the functional type of a membrane protein, but also explain why it belongs to that type. For the reader's convenience, the Mem-mEN server is available online at http://bioinfo.eie.polyu.edu.hk/MemmENServer/.",2015-08-28 +26314736,GlycoMob: an ion mobility-mass spectrometry collision cross section database for glycomics.,"Ion mobility mass spectrometry (IM-MS) is a promising analytical technique for glycomics that separates glycan ions based on their collision cross section (CCS) and provides glycan precursor and fragment masses. It has been shown that isomeric oligosaccharide species can be separated by IM and identified on basis of their CCS and fragmentation. These results indicate that adding CCSs information for glycans and glycan fragments to searchable databases and analysis pipelines will increase identification confidence and accuracy. We have developed a freely accessible database, GlycoMob ( http://www.glycomob.org ), containing over 900 CCSs values of glycans, oligosaccharide standards and their fragments that will be continually updated. We have measured the absolute CCSs of calibration standards, biologically derived and synthetic N-glycans ionized with various adducts in positive and negative mode or as protonated (positive ion) and deprotonated (negative ion) ions.",2015-08-28 +22140105,ScerTF: a comprehensive database of benchmarked position weight matrices for Saccharomyces species.,"Saccharomyces cerevisiae is a primary model for studies of transcriptional control, and the specificities of most yeast transcription factors (TFs) have been determined by multiple methods. However, it is unclear which position weight matrices (PWMs) are most useful; for the roughly 200 TFs in yeast, there are over 1200 PWMs in the literature. To address this issue, we created ScerTF, a comprehensive database of 1226 motifs from 11 different sources. We identified a single matrix for each TF that best predicts in vivo data by benchmarking matrices against chromatin immunoprecipitation and TF deletion experiments. We also used in vivo data to optimize thresholds for identifying regulatory sites with each matrix. To correct for biases from different methods, we developed a strategy to combine matrices. These aligned matrices outperform the best available matrix for several TFs. We used the matrices to predict co-occurring regulatory elements in the genome and identified many known TF combinations. In addition, we predict new combinations and provide evidence of combinatorial regulation from gene expression data. The database is available through a web interface at http://ural.wustl.edu/ScerTF. The site allows users to search the database with a regulatory site or matrix to identify the TFs most likely to bind the input sequence.",2011-12-02 +27923790,Dynamin-related protein 1 mediates low glucose-induced endothelial dysfunction in human arterioles.,"Intensive glycemic regulation has resulted in an increased incidence of hypoglycemia. Hypoglycemic burden correlates with adverse cardiovascular complications and contributes acutely and chronically to endothelial dysfunction. Prior data indicate that mitochondrial dysfunction contributes to hypoglycemia-induced endothelial dysfunction, but the mechanisms behind this linkage remain unknown. We attempt to determine whether clinically relevant low-glucose (LG) exposures acutely induce endothelial dysfunction through activation of the mitochondrial fission process. Characterization of mitochondrial morphology was carried out in cultured endothelial cells by using confocal microscopy. Isolated human arterioles were used to explore the effect LG-induced mitochondrial fission has on the formation of detrimental reactive oxygen species (ROS), bioavailability of nitric oxide (NO), and endothelial-dependent vascular relaxation. Fluorescence microscopy was employed to visualize changes in mitochondrial ROS and NO levels and videomicroscopy applied to measure vasodilation response. Pharmacological disruption of the profission protein Drp1 with Mdivi-1 during LG exposure reduced mitochondrial fragmentation among vascular endothelial cells (LG: 0.469; LG+Mdivi-1: 0.276; P = 0.003), prevented formation of vascular ROS (LG: 2.036; LG+Mdivi-1: 1.774; P = 0.005), increased the presence of NO (LG: 1.352; LG+Mdivi-1: 1.502; P = 0.048), and improved vascular dilation response to acetylcholine (LG: 31.6%; LG+Mdivi-1; 78.5% at maximum dose; P < 0.001). Additionally, decreased expression of Drp1 via siRNA knockdown during LG conditions also improved vascular relaxation. Exposure to LG imparts endothelial dysfunction coupled with altered mitochondrial phenotypes among isolated human arterioles. Disruption of Drp1 and subsequent mitochondrial fragmentation events prevents impaired vascular dilation, restores mitochondrial phenotype, and implicates mitochondrial fission as a primary mediator of LG-induced endothelial dysfunction.NEW & NOTEWORTHY Acute low-glucose exposure induces mitochondrial fragmentation in endothelial cells via Drp1 and is associated with impaired endothelial function in human arterioles. Targeting of Drp1 prevents fragmentation, improves vasofunction, and may provide a therapeutic target for improving cardiovascular complications among diabetics.Listen to this article's corresponding podcast @ http://ajpheart.podbean.com/e/mitochondrial-dynamics-impact-endothelial-function/.",2016-12-06 +25301849,NOVA: a software to analyze complexome profiling data.,"

Summary

We introduce nova, a software for the analysis of complexome profiling data. nova supports the investigation of the composition of complexes, cluster analysis of the experimental data, visual inspection and comparison of experiments and many other features.

Availability and implementation

nova is licensed under the Artistic License 2.0. It is freely available at http://www.bioinformatik.uni-frankfurt.de. nova requires at least Java 7 and runs under Linux, Microsoft Windows and Mac OS.

Contact

ina.koch@bioinformatik.uni-frankfurt.de.",2014-10-09 +27288623,Neuroscience and awareness in the dying human brain: Implications for organ donation practices.,"Consciousness has 2 components: wakefulness (arousal) and awareness (perception of the self and the external environment). Functional neuroimaging has identified 2 distinctive functional networks that mediate external awareness of the surrounding environment and internal awareness of the self. Recent studies suggest that awareness is not always associated with wakefulness. There is little clinical research that has specifically focused on determining awareness in the dying phase, after the cessation of systemic circulation. Pana et al (J Crit Care, http://dx.doi.org/10.1016/j.jcrc.2016.04.001) concluded from a retrospective analysis of published human and animal studies that the cessation of clinical brain function and spontaneous electroencephalography activity occurred within 30 seconds of circulatory arrest. They inferred from this that a 5-minute period of cessation of circulation constitutes a valid indicator that awareness has ceased. This aligns with the 5-minute no-touch time after the loss of arterial pulse, the current circulatory standard of death determination in non-heart-beating organ donation. We argue that the capacity for awareness may not be irreversibly lost after a relatively brief period of cessation of systemic circulation, and outline empirical data in support of the claim that awareness without wakefulness may be present. Obviously, if correct, this will have practical and ethical implications on organ donation practices.",2016-04-26 +25433763,Séance: reference-based phylogenetic analysis for 18S rRNA studies.,"

Background

Marker gene studies often use short amplicons spanning one or more hypervariable regions from an rRNA gene to interrogate the community structure of uncultured environmental samples. Target regions are chosen for their discriminatory power, but the limited phylogenetic signal of short high-throughput sequencing reads precludes accurate phylogenetic analysis. This is particularly unfortunate in the study of microscopic eukaryotes where horizontal gene flow is limited and the rRNA gene is expected to accurately reflect the species phylogeny. A promising alternative to full phylogenetic analysis is phylogenetic placement, where a reference phylogeny is inferred using the complete marker gene and iteratively extended with the short sequences from a metagenetic sample under study.

Results

Based on the phylogenetic placement approach we built Séance, a community analysis pipeline focused on the analysis of 18S marker gene data. Séance combines the alignment extension and phylogenetic placement capabilities of the Pagan multiple sequence alignment program with a suite of tools to preprocess, cluster and visualise datasets composed of many samples. We showcase Séance by analysing 454 data from a longitudinal study of intestinal parasite communities in wild rufous mouse lemurs (Microcebus rufus) as well as in simulation. We demonstrate both improved OTU picking at higher levels of sequence similarity for 454 data and show the accuracy of phylogenetic placement to be comparable to maximum likelihood methods for lower numbers of taxa.

Conclusions

Séance is an open source community analysis pipeline that provides reference-based phylogenetic analysis for rRNA marker gene studies. Whilst in this article we focus on studying nematodes using the 18S marker gene, the concepts are generic and reference data for alternative marker genes can be easily created. Séance can be downloaded from http://wasabiapp.org/software/seance/ .",2014-11-30 +,Selecting a Hiding Place: Anuran Diversity and the use of Bromeliads in a Threatened Coastal Sand Dune Habitat in Brazil,"Among vertebrates, anuran amphibians represent the highest number of species associated with bromeliads and possess a range of ecological, behavioral, and morphological specializations to life in these plants. Despite the importance of bromeliads as biodiversity amplifiers, and their diversity in some habitats, studies of the relationship between anurans and these plants are scarce in Brazil. Here, we investigated the way anurans select and use bromeliads in a threatened coastal habitat. We analyzed data from 23 standardized samples of the anurans associated with the bromeliad Neoregelia cruenta in the Restinga de Maricá, State of Rio de Janeiro, Southeastern Brazil. We found nine anuran species using these bromeliads, representing the highest richness reported for a Brazilian restinga. We identified a general pattern of bromeliad usage, where plants located at the edges of scrub patches (exposed to the sun) were more frequently occupied by anurans than those located more to the center (in the shade). There is strong evidence of an active selective process based on the quality of the water stored in the rosette, which differs between plants depending on their position in the scrub patch. Although the number of individuals varied during the period of study, the frequency of bromeliads used was constant, indicating that plant occupation follows a regular pattern throughout the year. Furthermore, the high frequency of bromeliads used by anurans during the whole year highlights the importance of considering these plants in developing conservation programs concerning the protection of anurans. Abstract in Portuguese is available at http://www.blackwell-synergy.com/loi/btp.",2011-03-01 +27323709,Multiple Threats to Child Health from Fossil Fuel Combustion: Impacts of Air Pollution and Climate Change.,"

Background

Approaches to estimating and addressing the risk to children from fossil fuel combustion have been fragmented, tending to focus either on the toxic air emissions or on climate change. Yet developing children, and especially poor children, now bear a disproportionate burden of disease from both environmental pollution and climate change due to fossil fuel combustion.

Objective

This commentary summarizes the robust scientific evidence regarding the multiple current and projected health impacts of fossil fuel combustion on the young to make the case for a holistic, child-centered energy and climate policy that addresses the full array of physical and psychosocial stressors resulting from fossil fuel pollution.

Discussion

The data summarized here show that by sharply reducing our dependence on fossil fuels we would achieve highly significant health and economic benefits for our children and their future. These benefits would occur immediately and also play out over the life course and potentially across generations.

Conclusion

Going beyond the powerful scientific and economic arguments for urgent action to reduce the burning of fossil fuels is the strong moral imperative to protect our most vulnerable populations. Citation: Perera FP. 2017. Multiple threats to child health from fossil fuel combustion: impacts of air pollution and climate change. Environ Health Perspect 125:141-148; http://dx.doi.org/10.1289/EHP299.",2016-06-21 +23749465,"Construction of protein phosphorylation networks by data mining, text mining and ontology integration: analysis of the spindle checkpoint.","Knowledge representation of the role of phosphorylation is essential for the meaningful understanding of many biological processes. However, such a representation is challenging because proteins can exist in numerous phosphorylated forms with each one having its own characteristic protein-protein interactions (PPIs), functions and subcellular localization. In this article, we evaluate the current state of phosphorylation event curation and then present a bioinformatics framework for the annotation and representation of phosphorylated proteins and construction of phosphorylation networks that addresses some of the gaps in current curation efforts. The integrated approach involves (i) text mining guided by RLIMS-P, a tool that identifies phosphorylation-related information in scientific literature; (ii) data mining from curated PPI databases; (iii) protein form and complex representation using the Protein Ontology (PRO); (iv) functional annotation using the Gene Ontology (GO); and (v) network visualization and analysis with Cytoscape. We use this framework to study the spindle checkpoint, the process that monitors the assembly of the mitotic spindle and blocks cell cycle progression at metaphase until all chromosomes have made bipolar spindle attachments. The phosphorylation networks we construct, centered on the human checkpoint kinase BUB1B (BubR1) and its yeast counterpart MAD3, offer a unique view of the spindle checkpoint that emphasizes biologically relevant phosphorylated forms, phosphorylation-state-specific PPIs and kinase-substrate relationships. Our approach for constructing protein phosphorylation networks can be applied to any biological process that is affected by phosphorylation. Database URL: http://www.yeastgenome.org/",2013-06-07 +26318525,[MEG]PLS: A pipeline for MEG data analysis and partial least squares statistics.,"The emphasis of modern neurobiological theories has recently shifted from the independent function of brain areas to their interactions in the context of whole-brain networks. As a result, neuroimaging methods and analyses have also increasingly focused on network discovery. Magnetoencephalography (MEG) is a neuroimaging modality that captures neural activity with a high degree of temporal specificity, providing detailed, time varying maps of neural activity. Partial least squares (PLS) analysis is a multivariate framework that can be used to isolate distributed spatiotemporal patterns of neural activity that differentiate groups or cognitive tasks, to relate neural activity to behavior, and to capture large-scale network interactions. Here we introduce [MEG]PLS, a MATLAB-based platform that streamlines MEG data preprocessing, source reconstruction and PLS analysis in a single unified framework. [MEG]PLS facilitates MRI preprocessing, including segmentation and coregistration, MEG preprocessing, including filtering, epoching, and artifact correction, MEG sensor analysis, in both time and frequency domains, MEG source analysis, including multiple head models and beamforming algorithms, and combines these with a suite of PLS analyses. The pipeline is open-source and modular, utilizing functions from FieldTrip (Donders, NL), AFNI (NIMH, USA), SPM8 (UCL, UK) and PLScmd (Baycrest, CAN), which are extensively supported and continually developed by their respective communities. [MEG]PLS is flexible, providing both a graphical user interface and command-line options, depending on the needs of the user. A visualization suite allows multiple types of data and analyses to be displayed and includes 4-D montage functionality. [MEG]PLS is freely available under the GNU public license (http://meg-pls.weebly.com).",2015-08-28 +22727241,HPV-beyond cervical cancer (online resource center).,"The human papillomavirus (HPV) causes more than 99% of all cervical cancers (see Am J Med Resource Center: http://supplements.amjmed.com/2011/HPV/). Exposure to HPV infections occurs in a high proportion of the overall population; however, 2 safe and effective vaccines, HPV2 and HPV4, are approved for the prevention of HPV-16 and HPV-18 infection, the most common causes of cervical cancer. Additionally, HPV4 prevents HPV-6 and HPV-11-related genital warts. While prevention of cervical cancer in women has been the initial aim of vaccination programs, it has now become apparent that HPV causes other types of cancer as well, including vulvar and vaginal cancers in women, penile cancer in men, and anal cancer in both sexes. Furthermore, these viruses have been implicated in head and neck cancers in both men and women as well. It is estimated that HPV-related cancers occur in 10,000 American males annually, suggesting that limiting vaccination programs to females may be underserving a significant proportion of the population. The efficacy of the 2 available vaccines against oncogenic HPV is more than 90% for both cervical and anal intraepithelial neoplasia. For those receiving the HPV4 vaccine, efficacy against genital warts is nearly 90%. Adverse effects are few and include episodes of syncope in the period immediately following vaccination. Benefits of vaccinating males include reduction in disease burden in men and enhanced herd immunity to reduce disease burden in women.",2012-07-01 +25311132,"Analysis of nuclear organization with TANGO, software for high-throughput quantitative analysis of 3D fluorescence microscopy images.","The cell nucleus is a highly organized cellular organelle that contains the genome. An important step to understand the relationships between genome positioning and genome functions is to extract quantitative data from three-dimensional (3D) fluorescence imaging. However, such approaches are limited by the requirement for processing and analyzing large sets of images. Here we present a practical approach using TANGO (Tools for Analysis of Nuclear Genome Organization), an image analysis tool dedicated to the study of nuclear architecture. TANGO is a generic tool able to process large sets of images, allowing quantitative study of nuclear organization. In this chapter a practical description of the software is drawn in order to give an overview of its different concepts and functionalities. This description is illustrated with a precise example that can be performed step-by-step on experimental data provided on the website http://biophysique.mnhn.fr/tango/HomePage.",2015-01-01 +26289378,PredRet: prediction of retention time by direct mapping between multiple chromatographic systems.,"Demands in research investigating small molecules by applying untargeted approaches have been a key motivator for the development of repositories for mass spectrometry spectra and automated tools to aid compound identification. Comparatively little attention has been afforded to using retention times (RTs) to distinguish compounds and for liquid chromatography there are currently no coordinated efforts to share and exploit RT information. We therefore present PredRet; the first tool that makes community sharing of RT information possible across laboratories and chromatographic systems (CSs). At http://predret.org , a database of RTs from different CSs is available and users can upload their own experimental RTs and download predicted RTs for compounds which they have not experimentally determined in their own experiments. For each possible pair of CSs in the database, the RTs are used to construct a projection model between the RTs in the two CSs. The number of compounds for which RTs can be predicted and the accuracy of the predictions are dependent upon the compound coverage overlap between the CSs used for construction of projection models. At the moment, it is possible to predict up to 400 RTs with a median error between 0.01 and 0.28 min depending on the CS and the median width of the prediction interval ranging from 0.08 to 1.86 min. By comparing experimental and predicted RTs, the user can thus prioritize which isomers to target for further characterization and potentially exclude some structures completely. As the database grows, the number and accuracy of predictions will increase.",2015-08-25 +21968266,Institute of social justice and medicine: developing a think tank to promote policy formation.,"The World Health Organization (WHO) defines health as a ""resource for everyday living, not the objective of living""; however, worldwide, there remains an unmistakable inequity in level of health and access to healthcare. The WHO has published documents on financing health systems towards universal health coverage [1], promoting healthy life [2], improving performance of health systems [3], and enriching humanity [4], highlighting our shared responsibility towards improving both national and global health and access to healthcare. These documents also recognize that, despite our local and regional priorities, there is a global desire to develop international strategies to improve healthcare. [1] WHO Report. Health systems financing and the path to universal coverage. 2010. http://www.who.int/bulletin/health_financing/en/index.html [2] WHO Report. Reducing risks, promoting healthy life. 2002. http://www.who.int/whr/2002/en/index.html [3] WHO Bulletin. Health systems: improving performance. 2000. http://www.who.int/whr/2000/en/index.html [4] WHO Bulletin. Conquering suffering, enriching humanity 1997. http://www.who.int/whr/1997/en/index.html.",2011-10-01 +26305368,"Cyanobacterial KnowledgeBase (CKB), a Compendium of Cyanobacterial Genomes and Proteomes.","Cyanobacterial KnowledgeBase (CKB) is a free access database that contains the genomic and proteomic information of 74 fully sequenced cyanobacterial genomes belonging to seven orders. The database also contains tools for sequence analysis. The Species report and the gene report provide details about each species and gene (including sequence features and gene ontology annotations) respectively. The database also includes cyanoBLAST, an advanced tool that facilitates comparative analysis, among cyanobacterial genomes and genomes of E. coli (prokaryote) and Arabidopsis (eukaryote). The database is developed and maintained by the Sub-Distributed Informatics Centre (sponsored by the Department of Biotechnology, Govt. of India) of the National Facility for Marine Cyanobacteria, a facility dedicated to marine cyanobacterial research. CKB is freely available at http://nfmc.res.in/ckb/index.html.",2015-08-25 +22365971,SITVITWEB--a publicly available international multimarker database for studying Mycobacterium tuberculosis genetic diversity and molecular epidemiology.,"Among various genotyping methods to study Mycobacterium tuberculosis complex (MTC) genotypic polymorphism, spoligotyping and mycobacterial interspersed repetitive units-variable number of DNA tandem repeats (MIRU-VNTRs) have recently gained international approval as robust, fast, and reproducible typing methods generating data in a portable format. Spoligotyping constituted the backbone of a publicly available database SpolDB4 released in 2006; nonetheless this method possesses a low discriminatory power when used alone and should be ideally used in conjunction with a second typing method such as MIRU-VNTRs for high-resolution epidemiological studies. We hereby describe a publicly available international database named SITVITWEB which incorporates such multimarker data allowing to have a global vision of MTC genetic diversity worldwide based on 62,582 clinical isolates corresponding to 153 countries of patient origin (105 countries of isolation). We report a total of 7105 spoligotype patterns (corresponding to 58,180 clinical isolates) - grouped into 2740 shared-types or spoligotype international types (SIT) containing 53,816 clinical isolates and 4364 orphan patterns. Interestingly, only 7% of the MTC isolates worldwide were orphans whereas more than half of SITed isolates (n=27,059) were restricted to only 24 most prevalent SITs. The database also contains a total of 2379 MIRU patterns (from 8161 clinical isolates) from 87 countries of patient origin (35 countries of isolation); these were grouped in 847 shared-types or MIRU international types (MIT) containing 6626 isolates and 1533 orphan patterns. Lastly, data on 5-locus exact tandem repeats (ETRs) were available on 4626 isolates from 59 countries of patient origin (22 countries of isolation); a total of 458 different VNTR patterns were observed - split into 245 shared-types or VNTR International Types (VIT) containing 4413 isolates) and 213 orphan patterns. Datamining of SITVITWEB further allowed to update rules defining MTC genotypic lineages as well to have a new insight into MTC population structure and worldwide distribution at country, sub-regional and continental levels. At evolutionary level, the data compiled may be useful to distinguish the occasional convergent evolution of genotypes versus specific evolution of sublineages essentially influenced by adaptation to the host. This database is publicly available at: http://www.pasteur-guadeloupe.fr:8081/SITVIT_ONLINE.",2012-02-17 +21969353,In the clinic. Management of newly diagnosed HIV infection.,"This issue provides a clinical overview of management of newly diagnosed HIV infection focusing on prevention, diagnosis, treatment, practice improvement, and patient information. Readers can complete the accompanying CME quiz for 1.5 credits. Only ACP members and individual subscribers can access the electronic features of In the Clinic. Non-subscribers who wish to access this issue of In the Clinic can elect ""Pay for View."" Subscribers can receive 1.5 category 1 CME credits by completing the CME quiz that accompanies this issue of In the Clinic. The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians' Information and Education Resource) and MKSAP (Medical Knowledge and Self Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP's Medical Education and Publishing division and with assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult www.acponline.org, http://pier.acponline.org, and other resources referenced within each issue of In the Clinic.",2011-10-01 +22661170,Hospital-related outbreaks due to rare fungal pathogens: a review of the literature from 1990 to June 2011.,"Fungi can cause severe infections. Two or more nosocomial unusual fungal infections diagnosed in a short period should be assumed as an outbreak. The review's aim was to collect data to improve their management. The free online worldwide database for nosocomial outbreaks ( http://www.outbreak-database.com ) and the PubMed/MEDLINE database were used to collect the English literature published from 1990 to June 2011. The more common Candida spp. and Aspergillus spp. infections were excluded. For each outbreak, the following data were reviewed: species, duration, source and site of infection, ward, risk factors, number of patients infected, treatment, related mortality, type of epidemiological study and time elapsed between index cases and second cases. Thirty-six reports were considered: yeasts caused the majority of the outbreaks (16 out of 36). The median values for the overall duration, number of infected people per outbreak and infection-related mortality were 5 months, 4 and 20 %, respectively. Eighteen cases were caused by contaminated substances and 13 cases were hypothesised as human-transmitted. Nosocomial outbreaks due to rare fungal pathogens involve few patients but have high related mortality. These results could be explained by the diagnostic delay, the inability of recognising the source of the infections and the challenges of the treatment. More efforts should be concentrated to implement the application of proper hygiene practices to avoid human-human transmission.",2012-06-03 +27187202,Calculating and scoring high quality multiple flexible protein structure alignments.,"

Motivation

Calculating multiple protein structure alignments (MSAs) is important for understanding functional and evolutionary relationships between protein families, and for modeling protein structures by homology. While incorporating backbone flexibility promises to circumvent many of the limitations of rigid MSA algorithms, very few flexible MSA algorithms exist today. This article describes several novel improvements to the Kpax algorithm which allow high quality flexible MSAs to be calculated. This article also introduces a new Gaussian-based MSA quality measure called 'M-score', which circumvents the pitfalls of RMSD-based quality measures.

Results

As well as calculating flexible MSAs, the new version of Kpax can also score MSAs from other aligners and from previously aligned reference datasets. Results are presented for a large-scale evaluation of the Homstrad, SABmark and SISY benchmark sets using Kpax and Matt as examples of state-of-the-art flexible aligners and 3DCOMB as an example of a state-of-the-art rigid aligner. These results demonstrate the utility of the M-score as a measure of MSA quality and show that high quality MSAs may be achieved when structural flexibility is properly taken into account.

Availability and implementation

Kpax 5.0 may be downloaded for academic use at http://kpax.loria.fr/

Contact

dave.ritchie@inria.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-05-13 +26600562,Key Characteristics of Carcinogens as a Basis for Organizing Data on Mechanisms of Carcinogenesis.,"

Background

A recent review by the International Agency for Research on Cancer (IARC) updated the assessments of the > 100 agents classified as Group 1, carcinogenic to humans (IARC Monographs Volume 100, parts A-F). This exercise was complicated by the absence of a broadly accepted, systematic method for evaluating mechanistic data to support conclusions regarding human hazard from exposure to carcinogens.

Objectives and methods

IARC therefore convened two workshops in which an international Working Group of experts identified 10 key characteristics, one or more of which are commonly exhibited by established human carcinogens.

Discussion

These characteristics provide the basis for an objective approach to identifying and organizing results from pertinent mechanistic studies. The 10 characteristics are the abilities of an agent to 1) act as an electrophile either directly or after metabolic activation; 2) be genotoxic; 3) alter DNA repair or cause genomic instability; 4) induce epigenetic alterations; 5) induce oxidative stress; 6) induce chronic inflammation; 7) be immunosuppressive; 8) modulate receptor-mediated effects; 9) cause immortalization; and 10) alter cell proliferation, cell death, or nutrient supply.

Conclusion

We describe the use of the 10 key characteristics to conduct a systematic literature search focused on relevant end points and construct a graphical representation of the identified mechanistic information. Next, we use benzene and polychlorinated biphenyls as examples to illustrate how this approach may work in practice. The approach described is similar in many respects to those currently being implemented by the U.S. EPA's Integrated Risk Information System Program and the U.S. National Toxicology Program.

Citation

Smith MT, Guyton KZ, Gibbons CF, Fritz JM, Portier CJ, Rusyn I, DeMarini DM, Caldwell JC, Kavlock RJ, Lambert P, Hecht SS, Bucher JR, Stewart BW, Baan R, Cogliano VJ, Straif K. 2016. Key characteristics of carcinogens as a basis for organizing data on mechanisms of carcinogenesis. Environ Health Perspect 124:713-721; http://dx.doi.org/10.1289/ehp.1509912.",2015-11-24 +21507258,"PheMaDB: a solution for storage, retrieval, and analysis of high throughput phenotype data.","

Background

OmniLog™ phenotype microarrays (PMs) have the capability to measure and compare the growth responses of biological samples upon exposure to hundreds of growth conditions such as different metabolites and antibiotics over a time course of hours to days. In order to manage the large amount of data produced from the OmniLog™ instrument, PheMaDB (Phenotype Microarray DataBase), a web-based relational database, was designed. PheMaDB enables efficient storage, retrieval and rapid analysis of the OmniLog™ PM data.

Description

PheMaDB allows the user to quickly identify records of interest for data analysis by filtering with a hierarchical ordering of Project, Strain, Phenotype, Replicate, and Temperature. PheMaDB then provides various statistical analysis options to identify specific growth pattern characteristics of the experimental strains, such as: outlier analysis, negative controls analysis (signal/background calibration), bar plots, pearson's correlation matrix, growth curve profile search, k-means clustering, and a heat map plot. This web-based database management system allows for both easy data sharing among multiple users and robust tools to phenotype organisms of interest.

Conclusions

PheMaDB is an open source system standardized for OmniLog™ PM data. PheMaDB could facilitate the banking and sharing of phenotype data. The source code is available for download at http://phemadb.sourceforge.net.",2011-04-20 +24499292,VAMPS: a website for visualization and analysis of microbial population structures.,"

Background

The advent of next-generation DNA sequencing platforms has revolutionized molecular microbial ecology by making the detailed analysis of complex communities over time and space a tractable research pursuit for small research groups. However, the ability to generate 10⁵-10⁸ reads with relative ease brings with it many downstream complications. Beyond the computational resources and skills needed to process and analyze data, it is difficult to compare datasets in an intuitive and interactive manner that leads to hypothesis generation and testing.

Results

We developed the free web service VAMPS (Visualization and Analysis of Microbial Population Structures, http://vamps.mbl.edu) to address these challenges and to facilitate research by individuals or collaborating groups working on projects with large-scale sequencing data. Users can upload marker gene sequences and associated metadata; reads are quality filtered and assigned to both taxonomic structures and to taxonomy-independent clusters. A simple point-and-click interface allows users to select for analysis any combination of their own or their collaborators' private data and data from public projects, filter these by their choice of taxonomic and/or abundance criteria, and then explore these data using a wide range of analytic methods and visualizations. Each result is extensively hyperlinked to other analysis and visualization options, promoting data exploration and leading to a greater understanding of data relationships.

Conclusions

VAMPS allows researchers using marker gene sequence data to analyze the diversity of microbial communities and the relationships between communities, to explore these analyses in an intuitive visual context, and to download data, results, and images for publication. VAMPS obviates the need for individual research groups to make the considerable investment in computational infrastructure and bioinformatic support otherwise necessary to process, analyze, and interpret massive amounts of next-generation sequence data. Any web-capable device can be used to upload, process, explore, and extract data and results from VAMPS. VAMPS encourages researchers to share sequence and metadata, and fosters collaboration between researchers of disparate biomes who recognize common patterns in shared data.",2014-02-05 +26867747,Using the CPTAC Assay Portal to Identify and Implement Highly Characterized Targeted Proteomics Assays.,"The Clinical Proteomic Tumor Analysis Consortium (CPTAC) of the National Cancer Institute (NCI) has launched an Assay Portal (http://assays.cancer.gov) to serve as an open-source repository of well-characterized targeted proteomic assays. The portal is designed to curate and disseminate highly characterized, targeted mass spectrometry (MS)-based assays by providing detailed assay performance characterization data, standard operating procedures, and access to reagents. Assay content is accessed via the portal through queries to find assays targeting proteins associated with specific cellular pathways, protein complexes, or specific chromosomal regions. The position of the peptide analytes for which there are available assays are mapped relative to other features of interest in the protein, such as sequence domains, isoforms, single nucleotide polymorphisms, and posttranslational modifications. The overarching goals are to enable robust quantification of all human proteins and to standardize the quantification of targeted MS-based assays to ultimately enable harmonization of results over time and across laboratories.",2016-01-01 +25754992,Using REDItools to Detect RNA Editing Events in NGS Datasets.,"RNA editing is a post-transcriptional/co-transcriptional molecular phenomenon whereby a genetic message is modified from the corresponding DNA template by means of substitutions, insertions, and/or deletions. It occurs in a variety of organisms and different cellular locations through evolutionally and biochemically unrelated proteins. RNA editing has a plethora of biological effects including the modulation of alternative splicing and fine-tuning of gene expression. RNA editing events by base substitutions can be detected on a genomic scale by NGS technologies through the REDItools package, an ad hoc suite of Python scripts to study RNA editing using RNA-Seq and DNA-Seq data or RNA-Seq data alone. REDItools implement effective filters to minimize biases due to sequencing errors, mapping errors, and SNPs. The package is freely available at Google Code repository (http://code.google.com/p/reditools/) and released under the MIT license. In the present unit we show three basic protocols corresponding to three main REDItools scripts.",2015-03-09 +27231842,"Improving Walking with an Implanted Neuroprosthesis for Hip, Knee, and Ankle Control After Stroke.","

Objective

The objective of this work was to quantify the effects of a fully implanted pulse generator to activate or augment actions of hip, knee, and ankle muscles after stroke.

Design

The subject was a 64-year-old man with left hemiparesis resulting from hemorrhagic stroke 21 months before participation. He received an 8-channel implanted pulse generator and intramuscular stimulating electrodes targeting unilateral hip, knee, and ankle muscles on the paretic side. After implantation, a stimulation pattern was customized to assist with hip, knee, and ankle movement during gait.The subject served as his own concurrent and longitudinal control with and without stimulation. Outcome measures included 10-m walk and 6-minute timed walk to assess gait speed, maximum walk time, and distance to measure endurance, and quantitative motion analysis to evaluate spatial-temporal characteristics. Assessments were repeated under 3 conditions: (1) volitional walking at baseline, (2) volitional walking after training, and (3) walking with stimulation after training.

Results

Volitional gait speed improved with training from 0.29 m/s to 0.35 m/s and further increased to 0.72 m/s with stimulation. Most spatial-temporal characteristics improved and represented more symmetrical and dynamic gait.

Conclusions

These data suggest that a multijoint approach to implanted neuroprostheses can provide clinically relevant improvements in gait after stroke.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES:: Upon completion of this article, the reader should be able to do the following: (1) Describe the rationale for evaluating a multijoint implanted neuroprosthesis to improvewalkingafter stroke; (2)Understand the study design and conclusions that can be inferred as a result of the design; and (3) Discuss the statistical significance and clinical relevance of changes between (a) volitional walking at baseline, (b) volitional walking after training, and (c) walking with stimulation after training.

Level

Advanced ACCREDITATION:: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians. The Association of Academic Physiatrists designates this activity for a maximum of 1.5 AMA PRA Category 1 Credit(s)™. Physicians should only claim credit commensurate with the extent of their participation in the activity.",2016-12-01 +27502039,Fascin Is Critical for the Maintenance of Breast Cancer Stem Cell Pool Predominantly via the Activation of the Notch Self-Renewal Pathway.,"An emerging dogma shows that tumors are initiated and maintained by a subpopulation of cancer cells that hijack some stem cell features and thus referred to as ""cancer stem cells"" (CSCs). The exact mechanism that regulates the maintenance of CSC pool remains largely unknown. Fascin is an actin-bundling protein that we have previously demonstrated to be a major regulator of breast cancer chemoresistance and metastasis, two cardinal features of CSCs. Here, we manipulated fascin expression in breast cancer cell lines and used several in vitro and in vivo approaches to examine the relationship between fascin expression and breast CSCs. Fascin knockdown significantly reduced stem cell-like phenotype (CD44hi /CD24lo and ALDH+ ) and reversal of epithelial to mesenchymal transition. Interestingly, expression of the embryonic stem cell transcriptional factors (Oct4, Nanog, Sox2, and Klf4) was significantly reduced when fascin expression was down-regulated. Functionally, fascin-knockdown cells were less competent in forming colonies and tumorspheres, consistent with lower basal self-renewal activity and higher susceptibility to chemotherapy. Fascin effect on CSC chemoresistance and self-renewability was associated with Notch signaling. Activation of Notch induced the relevant downstream targets predominantly in the fascin-positive cells. Limiting-dilution xenotransplantation assay showed higher frequency of tumor-initiating cells in the fascin-positive group. Collectively, our data demonstrated fascin as a critical regulator of breast CSC pool at least partially via activation of the Notch self-renewal signaling pathway and modification of the expression embryonic transcriptional factors. Targeting fascin may halt CSCs and thus presents a novel therapeutic approach for effective treatment of breast cancer. Stem Cells 2016;34:2799-2813 Video Highlight: https://youtu.be/GxS4fJ_Ow-o.",2016-08-21 +26764970,Con-Current versus Counter-Current Dialysate Flow during CVVHD. A Comparative Study for Creatinine and Urea Removal.,"

Background

Dialysate fluid connection to the membrane in continuous dialysis may affect solute clearance. Although circuit connections are routinely made counter-current to blood flow in intermittent dialysis, no study has assessed the effect of this dialysate fluid flow direction on removal of small solutes creatinine and urea during treatment using continuous veno-venous haemodialysis (CVVHD).

Aims

To assess if dialysate flow direction during CVVHD affects small solute removal.

Methods

This ethics-approved study recruited a convenience sample of 26 adult ICU patients requiring continuous dialysis to assess urea and creatinine removal for con-current vs. counter-current dialysate flow direction. The circuit was adjusted from continuous veno-venous haemodiafiltration to CVVHD 20 min prior to sampling with no fluid removal. Blood (b) and spent dialysate fluid (f) were taken in both concurrent and counter-current fluid flow at 1 (T1) and 4 (T4) hours with a new treatment. Blood flow was 200 ml/min. Dialysate flow 33 ml/min. Removal of urea and creatinine was expressed as the diafiltrate/plasma concentration ratio: Uf/b and Cf/b respectively. Data lacking normal distribution are presented as median with 25th and 75th interquartile ranges (IQR), otherwise as mean with SD and assessed with the independent t test for paired data. p < 0.5 was considered significant.

Results

Fifteen male patients were included with a median (IQR) age of 67 years (52-75), and APACHE x0399;x0399; score 17 (14-19) with all patients meeting RIFLE criteria 'F'. At both times, the counter-current dialysate flow was associated with higher mean (SD) diafiltrate/plasma concentration ratios: T1 0.87 (0.16) vs. 0.77 (0.10), p = 0.006; T2 0.96 (0.16) vs. 0.76 (0.09), p < 0.001 for creatinine and T1 0.98 (0.09) vs. 0.81 (0.09), p < 0.001; T2 0.99 (0.07) vs. 0.82 (0.08), p < 0.001 for urea.

Conclusion

Counter-current dialysate flow during CVVHD for ICU patients is associated with an approximately 20% increase in removal of small solutes creatinine and urea. Video Journal Club 'Cappuccino with Claudio Ronco' at http://www.karger.com/?doi=441270.",2016-01-15 +24825612,PyCorrFit-generic data evaluation for fluorescence correlation spectroscopy.,"

Unlabelled

We present a graphical user interface (PyCorrFit) for the fitting of theoretical model functions to experimental data obtained by fluorescence correlation spectroscopy (FCS). The program supports many data file formats and features a set of tools specialized in FCS data evaluation.

Availability and implementation

The Python source code is freely available for download from the PyCorrFit web page at http://pycorrfit.craban.de. We offer binaries for Ubuntu Linux, Mac OS X and Microsoft Windows.",2014-05-13 +22877077,Genome-scale identification of cell-wall related genes in Arabidopsis based on co-expression network analysis.,"

Background

Identification of the novel genes relevant to plant cell-wall (PCW) synthesis represents a highly important and challenging problem. Although substantial efforts have been invested into studying this problem, the vast majority of the PCW related genes remain unknown.

Results

Here we present a computational study focused on identification of the novel PCW genes in Arabidopsis based on the co-expression analyses of transcriptomic data collected under 351 conditions, using a bi-clustering technique. Our analysis identified 217 highly co-expressed gene clusters (modules) under some experimental conditions, each containing at least one gene annotated as PCW related according to the Purdue Cell Wall Gene Families database. These co-expression modules cover 349 known/annotated PCW genes and 2,438 new candidates. For each candidate gene, we annotated the specific PCW synthesis stages in which it is involved and predicted the detailed function. In addition, for the co-expressed genes in each module, we predicted and analyzed their cis regulatory motifs in the promoters using our motif discovery pipeline, providing strong evidence that the genes in each co-expression module are transcriptionally co-regulated. From the all co-expression modules, we infer that 108 modules are related to four major PCW synthesis components, using three complementary methods.

Conclusions

We believe our approach and data presented here will be useful for further identification and characterization of PCW genes. All the predicted PCW genes, co-expression modules, motifs and their annotations are available at a web-based database: http://csbl.bmb.uga.edu/publications/materials/shanwang/CWRPdb/index.html.",2012-08-09 +22970114,A public HTLV-1 molecular epidemiology database for sequence management and data mining.,"

Background

It is estimated that 15 to 20 million people are infected with the human T-cell lymphotropic virus type 1 (HTLV-1). At present, there are more than 2,000 unique HTLV-1 isolate sequences published. A central database to aggregate sequence information from a range of epidemiological aspects including HTLV-1 infections, pathogenesis, origins, and evolutionary dynamics would be useful to scientists and physicians worldwide. Described here, we have developed a database that collects and annotates sequence data and can be accessed through a user-friendly search interface. The HTLV-1 Molecular Epidemiology Database website is available at http://htlv1db.bahia.fiocruz.br/.

Methodology/principal findings

All data was obtained from publications available at GenBank or through contact with the authors. The database was developed using Apache Webserver 2.1.6 and SGBD MySQL. The webpage interfaces were developed in HTML and sever-side scripting written in PHP. The HTLV-1 Molecular Epidemiology Database is hosted on the Gonçalo Moniz/FIOCRUZ Research Center server. There are currently 2,457 registered sequences with 2,024 (82.37%) of those sequences representing unique isolates. Of these sequences, 803 (39.67%) contain information about clinical status (TSP/HAM, 17.19%; ATL, 7.41%; asymptomatic, 12.89%; other diseases, 2.17%; and no information, 60.32%). Further, 7.26% of sequences contain information on patient gender while 5.23% of sequences provide the age of the patient.

Conclusions/significance

The HTLV-1 Molecular Epidemiology Database retrieves and stores annotated HTLV-1 proviral sequences from clinical, epidemiological, and geographical studies. The collected sequences and related information are now accessible on a publically available and user-friendly website. This open-access database will support clinical research and vaccine development related to viral genotype.",2012-09-10 +25956653,xVis: a web server for the schematic visualization and interpretation of crosslink-derived spatial restraints.,"The identification of crosslinks by mass spectrometry has recently been established as an integral part of the hybrid structural analysis of protein complexes and networks. The crosslinking analysis determines distance restraints between two covalently linked amino acids which are typically summarized in a table format that precludes the immediate and comprehensive interpretation of the topological data. xVis displays crosslinks in clear schematic representations in form of a circular, bar or network diagram. The interactive graphs indicate the linkage sites and identification scores, depict the spatial proximity of structurally and functionally annotated protein regions and the evolutionary conservation of amino acids and facilitate clustering of proteins into subcomplexes according to the crosslink density. Furthermore, xVis offers two options for the qualitative assessment of the crosslink identifications by filtering crosslinks according to identification scores or false discovery rates and by displaying the corresponding fragment ion spectrum of each crosslink for the manual validation of the mass spectrometric data. Our web server provides an easy-to-use tool for the fast topological and functional interpretation of distance information on protein complex architectures and for the evaluation of crosslink fragment ion spectra. xVis is available under a Creative Commons Attribution-ShareAlike 4.0 International license at http://xvis.genzentrum.lmu.de/.",2015-05-08 +27045519,Anemia Management in the China Dialysis Outcomes and Practice Patterns Study.,"

Background

As the utilization of hemodialysis increases in China, it is critical to examine anemia management.

Methods

Using data from the China Dialysis Outcomes and Practice Patterns Study (DOPPS), we describe hemoglobin (Hgb) distribution and anemia-related therapies.

Results

Twenty one percent of China's DOPPS patients had Hgb <9 g/dl, compared with ≤10% in Japan and North America. A majority of medical directors targeted Hgb ≥11. Patients who were female, younger, or recently hospitalized had higher odds of Hgb <9; those with insurance coverage or on twice weekly dialysis had lower odds of Hgb <9. Iron use and erythropoietin-stimulating agents (ESAs) dose were modestly higher for patients with Hgb <9 compared with Hgb in the range 10-12.

Conclusion

A large proportion of hemodialysis patients in China's DOPPS do not meet the expressed Hgb targets. Less frequent hemodialysis, patient financial contribution, and lack of a substantial increase in ESA dose at lower Hgb concentrations may partially explain this gap. Video Journal Club 'Cappuccino with Claudio Ronco' at http://www.karger.com/?doi=442741.",2016-03-31 +,Above- and Belowground Carbon Stocks in a Miombo Woodland Landscape of Mozambique,"Quantifying ecosystem carbon stocks is vital for understanding the relationship between changes in land use and carbon dioxide emissions. Here, we estimate carbon stocks in an area of miombo woodland in Mozambique, by identifying the major carbon stocks and their variability. Data on the biomass of tree stems and roots, saplings, and soil carbon stocks are reported and compared with other savannas systems around the globe. A new allometric relationship between stem diameter and tree stem and root biomass is presented, based on the destructive harvest of 29 trees. These allometrics are combined with an inventory of 12,733 trees on 58 plots over an area of 27 ha. Ecosystem carbon stocks totaled 110 tC/ha, with 76 tC/ha in the soil carbon pool (to 50 cm depth), 21.2 tC/ha in tree stem biomass, 8.5 tC/ha in tree coarse root biomass, and 3.6 tC/ha in total sapling biomass. Plot-level tree root:stem (R:S) ratio varied from 0.27 to 0.58, with a mean of 0.42, slightly higher than the mean reported for 18 other savanna sites with comparable aboveground biomass (R:S=0.35). Tree biomass (stem+root) ranged from 3.1 to 86.5 tC/ha, but the mean (32.1 tC/ha) was well constrained (95% CI 28-36.6). In contrast, soil carbon stocks were almost uniformly distributed and varied from 32 to 133 tC/ha. Soil carbon stocks are thus the major uncertainty in the carbon storage of these woodlands. Soil texture explained 53 percent of the variation in soil carbon content, but only 13 percent of the variation in woody carbon stocks. The history of disturbance (fire, elephants, logging/charcoal production, and shifting cultivation) is likely to decouple changes in woody carbon stocks from soil carbon stocks, mediated by tree-grass interactions. Abstract in Portuguese is available at http://www.blackwell-synergy.com/loi/btp.",2011-07-01 +22496762,Functional annotation of hierarchical modularity.,"In biological networks of molecular interactions in a cell, network motifs that are biologically relevant are also functionally coherent, or form functional modules. These functionally coherent modules combine in a hierarchical manner into larger, less cohesive subsystems, thus revealing one of the essential design principles of system-level cellular organization and function-hierarchical modularity. Arguably, hierarchical modularity has not been explicitly taken into consideration by most, if not all, functional annotation systems. As a result, the existing methods would often fail to assign a statistically significant functional coherence score to biologically relevant molecular machines. We developed a methodology for hierarchical functional annotation. Given the hierarchical taxonomy of functional concepts (e.g., Gene Ontology) and the association of individual genes or proteins with these concepts (e.g., GO terms), our method will assign a Hierarchical Modularity Score (HMS) to each node in the hierarchy of functional modules; the HMS score and its p-value measure functional coherence of each module in the hierarchy. While existing methods annotate each module with a set of ""enriched"" functional terms in a bag of genes, our complementary method provides the hierarchical functional annotation of the modules and their hierarchically organized components. A hierarchical organization of functional modules often comes as a bi-product of cluster analysis of gene expression data or protein interaction data. Otherwise, our method will automatically build such a hierarchy by directly incorporating the functional taxonomy information into the hierarchy search process and by allowing multi-functional genes to be part of more than one component in the hierarchy. In addition, its underlying HMS scoring metric ensures that functional specificity of the terms across different levels of the hierarchical taxonomy is properly treated. We have evaluated our method using Saccharomyces cerevisiae data from KEGG and MIPS databases and several other computationally derived and curated datasets. The code and additional supplemental files can be obtained from http://code.google.com/p/functional-annotation-of-hierarchical-modularity/ (Accessed 2012 March 13).",2012-04-04 +25677125,DOSE: an R/Bioconductor package for disease ontology semantic and enrichment analysis.,

Summary

Disease ontology (DO) annotates human genes in the context of disease. DO is important annotation in translating molecular findings from high-throughput data to clinical relevance. DOSE is an R package providing semantic similarity computations among DO terms and genes which allows biologists to explore the similarities of diseases and of gene functions in disease perspective. Enrichment analyses including hypergeometric model and gene set enrichment analysis are also implemented to support discovering disease associations of high-throughput biological data. This allows biologists to verify disease relevance in a biological experiment and identify unexpected disease associations. Comparison among gene clusters is also supported.

Availability and implementation

DOSE is released under Artistic-2.0 License. The source code and documents are freely available through Bioconductor (http://www.bioconductor.org/packages/release/bioc/html/DOSE.html).

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

gcyu@connect.hku.hk or tqyhe@jnu.edu.cn.,2014-10-17 +26293226,CoMOGrad and PHOG: From Computer Vision to Fast and Accurate Protein Tertiary Structure Retrieval.,"The number of entries in a structural database of proteins is increasing day by day. Methods for retrieving protein tertiary structures from such a large database have turn out to be the key to comparative analysis of structures that plays an important role to understand proteins and their functions. In this paper, we present fast and accurate methods for the retrieval of proteins having tertiary structures similar to a query protein from a large database. Our proposed methods borrow ideas from the field of computer vision. The speed and accuracy of our methods come from the two newly introduced features- the co-occurrence matrix of the oriented gradient and pyramid histogram of oriented gradient- and the use of Euclidean distance as the distance measure. Experimental results clearly indicate the superiority of our approach in both running time and accuracy. Our method is readily available for use from this website: http://research.buet.ac.bd:8080/Comograd/.",2015-08-21 +26492038,"Summary of Notifiable Infectious Diseases and Conditions - United States, 2013.","The Summary of Notifiable Infectious Diseases and Condition-United States, 2013 (hereafter referred to as the summary) contains the official statistics, in tabular and graphic form, for the reported occurrence of nationally notifiable infectious diseases and conditions in the United States for 2013. Unless otherwise noted, data are final totals for 2013 reported as of June 30, 2014. These statistics are collected and compiled from reports sent by U.S. state and territory, New York City, and District of Columbia health departments to the National Notifiable Diseases Surveillance System (NNDSS), which is operated by CDC in collaboration with the Council of State and Territorial Epidemiologists (CSTE). This summary is available at http://www.cdc.gov/mmwr/mmwr_nd/index.html. This site also includes summary publications from previous years.",2015-10-23 +23493324,ChemoPy: freely available python package for computational biology and chemoinformatics.,"

Motivation

Molecular representation for small molecules has been routinely used in QSAR/SAR, virtual screening, database search, ranking, drug ADME/T prediction and other drug discovery processes. To facilitate extensive studies of drug molecules, we developed a freely available, open-source python package called chemoinformatics in python (ChemoPy) for calculating the commonly used structural and physicochemical features. It computes 16 drug feature groups composed of 19 descriptors that include 1135 descriptor values. In addition, it provides seven types of molecular fingerprint systems for drug molecules, including topological fingerprints, electro-topological state (E-state) fingerprints, MACCS keys, FP4 keys, atom pairs fingerprints, topological torsion fingerprints and Morgan/circular fingerprints. By applying a semi-empirical quantum chemistry program MOPAC, ChemoPy can also compute a large number of 3D molecular descriptors conveniently.

Availability

The python package, ChemoPy, is freely available via http://code.google.com/p/pychem/downloads/list, and it runs on Linux and MS-Windows.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-14 +24766612,Normalyzer: a tool for rapid evaluation of normalization methods for omics data sets.,"High-throughput omics data often contain systematic biases introduced during various steps of sample processing and data generation. As the source of these biases is usually unknown, it is difficult to select an optimal normalization method for a given data set. To facilitate this process, we introduce the open-source tool ""Normalyzer"". It normalizes the data with 12 different normalization methods and generates a report with several quantitative and qualitative plots for comparative evaluation of different methods. The usefulness of Normalyzer is demonstrated with three different case studies from quantitative proteomics and transcriptomics. The results from these case studies show that the choice of normalization method strongly influences the outcome of downstream quantitative comparisons. Normalyzer is an R package and can be used locally or through the online implementation at http://quantitativeproteomics.org/normalyzer .",2014-05-02 +27853512,Contextual Hub Analysis Tool (CHAT): A Cytoscape app for identifying contextually relevant hubs in biological networks.,"Highly connected nodes (hubs) in biological networks are topologically important to the structure of the network and have also been shown to be preferentially associated with a range of phenotypes of interest. The relative importance of a hub node, however, can change depending on the biological context. Here, we report a Cytoscape app, the Contextual Hub Analysis Tool (CHAT), which enables users to easily construct and visualize a network of interactions from a gene or protein list of interest, integrate contextual information, such as gene expression or mass spectrometry data, and identify hub nodes that are more highly connected to contextual nodes (e.g. genes or proteins that are differentially expressed) than expected by chance. In a case study, we use CHAT to construct a network of genes that are differentially expressed in Dengue fever, a viral infection. CHAT was used to identify and compare contextual and degree-based hubs in this network. The top 20 degree-based hubs were enriched in pathways related to the cell cycle and cancer, which is likely due to the fact that proteins involved in these processes tend to be highly connected in general. In comparison, the top 20 contextual hubs were enriched in pathways commonly observed in a viral infection including pathways related to the immune response to viral infection. This analysis shows that such contextual hubs are considerably more biologically relevant than degree-based hubs and that analyses which rely on the identification of hubs solely based on their connectivity may be biased towards nodes that are highly connected in general rather than in the specific context of interest.

Availability

CHAT is available for Cytoscape 3.0+ and can be installed via the Cytoscape App Store ( http://apps.cytoscape.org/apps/chat).",2016-07-19 +25681631,Alignment of direct detection device micrographs using a robust Optical Flow approach.,"The introduction of direct detection devices in cryo-EM has shown that specimens present beam-induced motion (BIM). Consequently, in this work, we develop a BIM correction method at the image level, resulting in an integrated image in which the in-plane BIM blurring is compensated prior to particle picking. The methodology is based on a robust Optical Flow (OF) approach that can efficiently correct for local movements in a rapid manner. The OF works particularly well if the BIM pattern presents a substantial degree of local movements, which occurs in our data sets for Falcon II data. However, for those cases in which the BIM pattern corresponds to global movements, we have found it advantageous to first run a global motion correction approach and to subsequently apply OF. Additionally, spatial analysis of the Optical Flow allows for quantitative analysis of the BIM pattern. The software that incorporates the new approach is available in XMIPP (http://xmipp.cnb.csic.es).",2015-02-12 +26272709,GenomewidePDB 2.0: A Newly Upgraded Versatile Proteogenomic Database for the Chromosome-Centric Human Proteome Project.,"Since the launch of the Chromosome-centric Human Proteome Project (C-HPP) in 2012, the number of ""missing"" proteins has fallen to 2932, down from ∼5932 since the number was first counted in 2011. We compared the characteristics of missing proteins with those of already annotated proteins with respect to transcriptional expression pattern and the time periods in which newly identified proteins were annotated. We learned that missing proteins commonly exhibit lower levels of transcriptional expression and less tissue-specific expression compared with already annotated proteins. This makes it more difficult to identify missing proteins as time goes on. One of the C-HPP goals is to identify alternative spliced product of proteins (ASPs), which are usually difficult to find by shot-gun proteomic methods due to their sequence similarities with the representative proteins. To resolve this problem, it may be necessary to use a targeted proteomics approach (e.g., selected and multiple reaction monitoring [S/MRM] assays) and an innovative bioinformatics platform that enables the selection of target peptides for rarely expressed missing proteins or ASPs. Given that the success of efforts to identify missing proteins may rely on more informative public databases, it was necessary to upgrade the available integrative databases. To this end, we attempted to improve the features and utility of GenomewidePDB by integrating transcriptomic information (e.g., alternatively spliced transcripts), annotated peptide information, and an advanced search interface that can find proteins of interest when applying a targeted proteomics strategy. This upgraded version of the database, GenomewidePDB 2.0, may not only expedite identification of the remaining missing proteins but also enhance the exchange of information among the proteome community. GenomewidePDB 2.0 is available publicly at http://genomewidepdb.proteomix.org/.",2015-08-19 +26289427,CicArVarDB: SNP and InDel database for advancing genetics research and breeding applications in chickpea. ,"Molecular markers are valuable tools for breeders to help accelerate crop improvement. High throughput sequencing technologies facilitate the discovery of large-scale variations such as single nucleotide polymorphisms (SNPs) and simple sequence repeats (SSRs). Sequencing of chickpea genome along with re-sequencing of several chickpea lines has enabled the discovery of 4.4 million variations including SNPs and InDels. Here we report a repository of 1.9 million variations (SNPs and InDels) anchored on eight pseudomolecules in a custom database, referred as CicArVarDB that can be accessed at http://cicarvardb.icrisat.org/. It includes an easy interface for users to select variations around specific regions associated with quantitative trait loci, with embedded webBLAST search and JBrowse visualisation. We hope that this database will be immensely useful for the chickpea research community for both advancing genetics research as well as breeding applications for crop improvement. Database URL: http://cicarvardb.icrisat.org.",2015-08-19 +25820431,IntFOLD: an integrated server for modelling protein structures and functions from amino acid sequences.,"IntFOLD is an independent web server that integrates our leading methods for structure and function prediction. The server provides a simple unified interface that aims to make complex protein modelling data more accessible to life scientists. The server web interface is designed to be intuitive and integrates a complex set of quantitative data, so that 3D modelling results can be viewed on a single page and interpreted by non-expert modellers at a glance. The only required input to the server is an amino acid sequence for the target protein. Here we describe major performance and user interface updates to the server, which comprises an integrated pipeline of methods for: tertiary structure prediction, global and local 3D model quality assessment, disorder prediction, structural domain prediction, function prediction and modelling of protein-ligand interactions. The server has been independently validated during numerous CASP (Critical Assessment of Techniques for Protein Structure Prediction) experiments, as well as being continuously evaluated by the CAMEO (Continuous Automated Model Evaluation) project. The IntFOLD server is available at: http://www.reading.ac.uk/bioinf/IntFOLD/.",2015-03-27 +24222012,Sexual dysfunction related to psychotropic drugs: a critical review. Part III: mood stabilizers and anxiolytic drugs.,"

Introduction

Sexual dysfunction is a potential side effect of mood stabilizers and anxiolytic drugs: this article presents a critical review of the current literature. Although many studies have been published on sexual side effects of psychopharmacological treatment, only a minority relate to mood stabilizers and anxiolytic drugs. Most of these studies are not methodologically robust, few are RCTs and most did not use a validated rating scale to evaluate sexual functioning. In addition, many of the studies on sexual dysfunction associated with mood stabilizers and anxiolytic drugs are limited by other methodological flaws. While there is evidence to suggest that mood stabilizers, with some exceptions, negatively affect sexual functioning, there is still insufficient evidence to draw any clear conclusions about the effects of anxiolytic drugs on sexual function. There is some weak evidence to indicate that switching from enzyme-inducing to non-enzyme-inducing anticonvulsant drugs, could be clinically useful. Some researchers recommend that sexual dysfunction in patients taking antiepileptic drugs should in general be treated according to standard guidelines for the management of sexual dysfunction, since reliable data on special populations is not available. However, specific approaches may be useful, but cannot yet be recommended until further validating research has been conducted. We did not find evidence supporting the use of any specific treatment strategy for sexual dysfunction associated with anxiolytic treatment.

Methods

This study was conducted in 2013 using the paper and electronic resources of the library of the Azienda Provinciale per i Servizi Sanitari (APSS) in Trento, Italy (http://atoz.ebsco.com/Titles/2793). The library has access to a wide range of databases including DYNAMED, MEDLINE Full Text, CINAHL Plus Full Text, The Cochrane Library, Micromedex healthcare series, BMJ Clinical Evidence. The full list of available journals can be viewed at http://atoz.ebsco.com/Titles/2793, or at the APSS web site (http://www.apss.tn.it). In completing this review, a literature search was conducted using the key words ""anxiolytic drugs"", ""mood stabilizers"", ""benzodiazepines"", ""psychotrophic drugs"", ""sexual dysfunction"", ""sexual side effects"", ""treatment-emergent sexual dysfunction"". All resulting listed articles were reviewed.

Discussion

This review includes studies that investigated the relationship between mood stabilizer and anxiolytic drug treatment and sexual dysfunction. The purpose was to identify possible intervention strategies for sexual dysfunction related to these drugs.",2013-11-12 +26794316,Inclusion of dyad-repeat pattern improves topology prediction of transmembrane β-barrel proteins.,"

Unlabelled

: Accurate topology prediction of transmembrane β-barrels is still an open question. Here, we present BOCTOPUS2, an improved topology prediction method for transmembrane β-barrels that can also identify the barrel domain, predict the topology and identify the orientation of residues in transmembrane β-strands. The major novelty of BOCTOPUS2 is the use of the dyad-repeat pattern of lipid and pore facing residues observed in transmembrane β-barrels. In a cross-validation test on a benchmark set of 42 proteins, BOCTOPUS2 predicts the correct topology in 69% of the proteins, an improvement of more than 10% over the best earlier method (BOCTOPUS) and in addition, it produces significantly fewer erroneous predictions on non-transmembrane β-barrel proteins.

Availability and implementation

BOCTOPUS2 webserver along with full dataset and source code is available at http://boctopus.bioinfo.se/

Contact

: arne@bioinfo.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-21 +26803158,FamAgg: an R package to evaluate familial aggregation of traits in large pedigrees.,"

Unlabelled

Familial aggregation analysis is the first fundamental step to perform when assessing the extent of genetic background of a disease. However, there is a lack of software to analyze the familial clustering of complex phenotypes in very large pedigrees. Such pedigrees can be utilized to calculate measures that express trait aggregation on both the family and individual level, providing valuable directions in choosing families for detailed follow-up studies. We developed FamAgg, an open source R package that contains both established and novel methods to investigate familial aggregation of traits in large pedigrees. We demonstrate its use and interpretation by analyzing a publicly available cancer dataset with more than 20 000 participants distributed across approximately 400 families.

Availability and implementation

The FamAgg package is freely available at the Bioconductor repository, http://www.bioconductor.org/packages/FamAgg

Contact

Christian.Weichenberger@eurac.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-22 +22102771,EuDBase: An online resource for automated EST analysis pipeline (ESTFrontier) and database for red seaweed Eucheuma denticulatum.,"Functional genomics has proven to be an efficient tool in identifying genes involved in various biological functions. However the availability of commercially important seaweed Eucheuma denticulatum functional resources is still limited. EuDBase is the first seaweed online repository that provides integrated access to ESTs of Eucheuma denticulatum generated from samples collected from Kudat and Semporna in Sabah, Malaysia. The database stored 10,031 ESTs that are clustered and assembled into 2,275 unique transcripts (UT) and 955 singletons. Raw data were automatically processed using ESTFrontier, an in-house automated EST analysis pipeline. Data was collected in MySQL database. Web interface is implemented using PHP and it allows browsing and querying EuDBase through search engine. Data is searchable via BLAST hit, domain search, Gene Ontology or KEGG Pathway. A user-friendly interface allows the identification of sequences either using a simple text query or similarity search. The development of EuDBase is initiated to store, manage and analyze the E. denticulatum ESTs and to provide accumulative digital resources for the use of global scientific community. EuDBase is freely available from http://www.inbiosis.ukm.my/eudbase/.",2011-10-14 +27296980,Unbiased probabilistic taxonomic classification for DNA barcoding.,"

Motivation

When targeted to a barcoding region, high-throughput sequencing can be used to identify species or operational taxonomical units from environmental samples, and thus to study the diversity and structure of species communities. Although there are many methods which provide confidence scores for assigning taxonomic affiliations, it is not straightforward to translate these values to unbiased probabilities. We present a probabilistic method for taxonomical classification (PROTAX) of DNA sequences. Given a pre-defined taxonomical tree structure that is partially populated by reference sequences, PROTAX decomposes the probability of one to the set of all possible outcomes. PROTAX accounts for species that are present in the taxonomy but that do not have reference sequences, the possibility of unknown taxonomical units, as well as mislabeled reference sequences. PROTAX is based on a statistical multinomial regression model, and it can utilize any kind of sequence similarity measures or the outputs of other classifiers as predictors.

Results

We demonstrate the performance of PROTAX by using as predictors the output from BLAST, the phylogenetic classification software TIPP, and the RDP classifier. We show that PROTAX improves the predictions of the baseline implementations of TIPP and RDP classifiers, and that it is able to combine complementary information provided by BLAST and TIPP, resulting in accurate and unbiased classifications even with very challenging cases such as 50% mislabeling of reference sequences.

Availability and implementation

Perl/R implementation of PROTAX is available at http://www.helsinki.fi/science/metapop/Software.htm

Contact

panu.somervuo@helsinki.fi

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-13 +27367037,LotuS: an efficient and user-friendly OTU processing pipeline.,"

Background

16S ribosomal DNA (rDNA) amplicon sequencing is frequently used to analyse the structure of bacterial communities from oceans to the human microbiota. However, computational power is still a major bottleneck in the analysis of continuously enlarging metagenomic data sets. Analysis is further complicated by the technical complexity of current bioinformatics tools.

Results

Here we present the less operational taxonomic units scripts (LotuS), a fast and user-friendly open-source tool to calculate denoised, chimera-checked, operational taxonomic units (OTUs). These are the basis to generate taxonomic abundance tables and phylogenetic trees from multiplexed, next-generation sequencing data (454, illumina MiSeq and HiSeq). LotuS is outstanding in its execution speed, as it can process 16S rDNA data up to two orders of magnitude faster than other existing pipelines. This is partly due to an included stand-alone fast simultaneous demultiplexer and quality filter C++ program, simple demultiplexer (sdm), which comes packaged with LotuS. Additionally, we sequenced two MiSeq runs with the intent to validate future pipelines by sequencing 40 technical replicates; these are made available in this work.

Conclusion

We show that LotuS analyses microbial 16S data with comparable or even better results than existing pipelines, requiring a fraction of the execution time and providing state-of-the-art denoising and phylogenetic reconstruction. LotuS is available through the following URL: http://psbweb05.psb.ugent.be/lotus .",2014-09-30 +27154786,The German Thorotrast Cohort Study: a review and how to get access to the data.,"It is well known that exposures like those from (226)Ra, (224)Ra and Thorotrast(®) injections increase the risk of neoplasia in bone marrow and liver. The thorium-based radioactive contrast agent Thorotrast(®) was introduced in 1929 and applied worldwide until the 1950s, especially in angiography and arteriography. Due to the extremely long half-life of several hundred years and the life-long retention of the thorium dioxide particles in the human body, patients suffer lifetime internal exposure. The health effects from the incorporated thorium were investigated in a few cohort studies with a German study being the largest among them. This retrospective cohort study was set up in 1968 with a follow-up until 2004. The study comprises 2326 Thorotrast patients and 1890 patients of a matched control group. For those being alive at the start of the study in 1968 follow-up was done by clinical examinations on a biannual basis. For the others, causes of death were collected in various ways. Additionally, clinical, radiological and biophysical studies of patients were conducted and large efforts were made to best estimate the radiation doses associated with incorporation of the Thorotrast. The aim of this paper is to describe the cohort, important results and some open questions. The data from the German Thorotrast Study are available to other interested researchers. Information can be found at http://storedb.org .",2016-05-06 +22139918,AutismKB: an evidence-based knowledgebase of autism genetics.,"Autism spectrum disorder (ASD) is a heterogeneous neurodevelopmental disorder with a prevalence of 0.9-2.6%. Twin studies showed a heritability of 38-90%, indicating strong genetic contributions. Yet it is unclear how many genes have been associated with ASD and how strong the evidence is. A comprehensive review and analysis of literature and data may bring a clearer big picture of autism genetics. We show that as many as 2193 genes, 2806 SNPs/VNTRs, 4544 copy number variations (CNVs) and 158 linkage regions have been associated with ASD by GWAS, genome-wide CNV studies, linkage analyses, low-scale genetic association studies, expression profiling and other low-scale experimental studies. To evaluate the evidence, we collected metadata about each study including clinical and demographic features, experimental design and statistical significance, and used a scoring and ranking approach to select a core data set of 434 high-confidence genes. The genes mapped to pathways including neuroactive ligand-receptor interaction, synapse transmission and axon guidance. To better understand the genes we parsed over 30 databases to retrieve extensive data about expression patterns, protein interactions, animal models and pharmacogenetics. We constructed a MySQL-based online database and share it with the broader autism research community at http://autismkb.cbi.pku.edu.cn, supporting sophisticated browsing and searching functionalities.",2011-12-01 +23717182,"Molluscan fauna of Gueishan Island, Taiwan.","This dataset records the occurrence and inventory of molluscan fauna on Gueishan Island, the only active volcanic island in Taiwan, based on the literature survey and field investigation conducted between 2011 and 2012. The literature review involved seven studies published from 1934 to 2003, which collectively reported 112 species from 61 genera and 37 families of Mollusca on Gueishan Island. Through our field investigation, we identified 34 species from 28 genera and 23 families. Fourteen of these species were new records on Gueishan Island: Liolophura japonica, Lottia luchuana, Nerita costata, Nerita rumphii, Diplommatina suganikeiensis, Littoraria undulata, Solenomphala taiwanensis, Assiminea sp., Siphonaria laciniosa, Laevapex nipponica, Carychium hachijoensis, Succinea erythrophana, Zaptyx crassilamellata, and Allopeas pyrgula. In Total, there are 126 species from 71 genera and 45 families of Mollusca on Gueishan Island. These data have been published through GBIF [http://taibif.org.tw/ipt/resource.do?r=gueishan_island] and integrated into the Taiwan Malacofauna Database (http://shell.sinica.edu.tw/).",2013-01-24 +25256572,A two-stage statistical procedure for feature selection and comparison in functional analysis of metagenomes.,"

Motivation

With the advance of new sequencing technologies producing massive short reads data, metagenomics is rapidly growing, especially in the fields of environmental biology and medical science. The metagenomic data are not only high dimensional with large number of features and limited number of samples but also complex with a large number of zeros and skewed distribution. Efficient computational and statistical tools are needed to deal with these unique characteristics of metagenomic sequencing data. In metagenomic studies, one main objective is to assess whether and how multiple microbial communities differ under various environmental conditions.

Results

We propose a two-stage statistical procedure for selecting informative features and identifying differentially abundant features between two or more groups of microbial communities. In the functional analysis of metagenomes, the features may refer to the pathways, subsystems, functional roles and so on. In the first stage of the proposed procedure, the informative features are selected using elastic net as reducing the dimension of metagenomic data. In the second stage, the differentially abundant features are detected using generalized linear models with a negative binomial distribution. Compared with other available methods, the proposed approach demonstrates better performance for most of the comprehensive simulation studies. The new method is also applied to two real metagenomic datasets related to human health. Our findings are consistent with those in previous reports.

Availability

R code and two example datasets are available at http://cals.arizona.edu/∼anling/software.htm.

Supplementary information

Supplementary file is available at Bioinformatics online.",2014-09-24 +25519348,Fast genome-wide pedigree quantitative trait loci analysis using MENDEL.,"The linkage era left a rich legacy of pedigree samples that can be used for modern genome-wide association sequencing (GWAS) or next-generation sequencing (NGS) studies. Family designs are naturally equipped to detect rare variants, control for population stratification, and facilitate the study of parent-of-origin effects. Unfortunately, pedigree likelihoods are notoriously hard to compute, and current software for association mapping in pedigrees is prohibitively slow in processing dense marker maps. In a recent release of the comprehensive genetic analysis software MENDEL, we implemented an ultra-fast score test for association mapping with pedigree-based GWAS or NGS study data. Our implementation (a) works for random sample data, pedigree data, or a mix of both;(b) allows for covariate adjustment, including correction for population stratification;(c) accommodates both univariate and multivariate quantitative traits; and (d) allows missing values in multivariate traits. In this paper, we assess the capabilities of MENDEL on the Genetic Analysis Workshop 18 sequencing data. For instance, when jointly testing the 4 longitudinally measured diastolic blood pressure traits, it takes MENDEL less than 51 minutes on a standard laptop computer to read, quality check, and analyze a data set with 959 individuals and 8.3 million single-nucleotide polymorphisms (SNPs). Our analysis reveals association of one SNP in the q32.2 region of chromosome 1. MENDEL is freely available on http://www.genetics.ucla.edu/software.",2014-06-17 +24843271,"The relevance, biases, and importance of digitising opportunistic non-standardised collections: A case study in Iberian harvestmen fauna with BOS Arthropod Collection datasets (Arachnida, Opiliones).","In this study, we analyse the relevance of harvestmen distribution data derived from opportunistic, unplanned, and non-standardised collection events in an area in the north of the Iberian Peninsula. Using specimens deposited in the BOS Arthropod Collection at the University of Oviedo, we compared these data with data from planned, standardised, and periodic collections with pitfall traps in several locations in the same area. The Arthropod Collection, begun in 1977, includes specimens derived from both sampling types, and its recent digitisation allows for this type of comparative analysis. Therefore, this is the first data-paper employing a hybrid approach, wherein subset metadata are described alongside a comparative analysis. The full dataset can be accessed through Spanish GBIF IPT at http://www.gbif.es:8080/ipt/archive.do?r=Bos-Opi, and the metadata of the unplanned collection events at http://www.gbif.es:8080/ipt/resource.do?r=bos-opi_unplanned_collection_events. We have mapped the data on the 18 harvestmen species included in the unplanned collections and provided records for some species in six provinces for the first time. We have also provided the locations of Phalangium opilio in eight provinces without published records. These results highlight the importance of digitising data from unplanned biodiversity collections, as well as those derived from planned collections, especially in scarcely studied groups and areas.",2014-04-24 +21177657,mESAdb: microRNA expression and sequence analysis database.,"microRNA expression and sequence analysis database (http://konulab.fen.bilkent.edu.tr/mirna/) (mESAdb) is a regularly updated database for the multivariate analysis of sequences and expression of microRNAs from multiple taxa. mESAdb is modular and has a user interface implemented in PHP and JavaScript and coupled with statistical analysis and visualization packages written for the R language. The database primarily comprises mature microRNA sequences and their target data, along with selected human, mouse and zebrafish expression data sets. mESAdb analysis modules allow (i) mining of microRNA expression data sets for subsets of microRNAs selected manually or by motif; (ii) pair-wise multivariate analysis of expression data sets within and between taxa; and (iii) association of microRNA subsets with annotation databases, HUGE Navigator, KEGG and GO. The use of existing and customized R packages facilitates future addition of data sets and analysis tools. Furthermore, the ability to upload and analyze user-specified data sets makes mESAdb an interactive and expandable analysis tool for microRNA sequence and expression data.",2011-01-01 +32313365,"Analysis and visualization of H7 influenza using genomic, evolutionary and geographic information in a modular web service.","We have reported previously on use of a web-based application, Supramap (http://supramap.org) for the study of biogeographic, genotypic, and phenotypic evolution. Using Supramap we have developed maps of the spread of drug-resistant influenza and host shifts in H1N1 and H5N1 influenza and coronaviruses such as SARS. Here we report on another zoonotic pathogen, H7 influenza, and provide an update on the implementation of Supramap as a web service. We find that the emergence of pathogenic strains of H7 is labile with many transitions from high to low pathogenicity, and from low to high pathogenicity. We use Supramap to put these events in a temporal and geospatial context. We identify several lineages of H7 influenza with biomarkers of high pathogenicity in regions that have not been reported in the scientific literature. The original implementation of Supramap was built with tightly coupled client and server software. Now we have decoupled the components to provide a modular web service for POY (http://poyws.org) that can be consumed by a data provider to create a novel application. To demonstrate the web service, we have produced an application, Geogenes (http://geogenes.org). Unlike in Supramap, in which the user is required to create and upload data files, in Geogenes the user works from a graphical interface to query an underlying dataset. Geogenes demonstrates how the web service can provide underlying processing for any sequence and metadata database. © The Willi Hennig Society 2012.",2012-05-21 +26398339,Small Supernumerary Marker Chromosomes in Human Infertility.,"Small supernumerary marker chromosomes (sSMC) are structurally abnormal chromosomes that cannot be unambiguously identified by banding cytogenetics. The objective of this study was to provide an overview of sSMC frequency and characterization in a context of infertility and to review the literature describing sSMC in relation with male and female infertility. Therefore, a systematic literature review on sSMC associated with infertility was conducted by means of a PubMed literature and a sSMC database (http://ssmc-tl.com/sSMC.html) search. A total of 234 patients with infertility were identified as carriers of sSMC. All chromosomes, except chromosomes 10, 19 and the X, were involved in sSMC, and in 72% the sSMC originated from acrocentric chromosomes. Euchromatic imbalances were caused by the presence of sSMC in 30% of the cases. Putative genes have been identified in only 1.2% of sSMC associated with infertility. The implication of sSMC in infertility could be due to a partial trisomy of some genes but also to mechanical effects perturbing meiosis. Further precise molecular and interphase-architecture studies on sSMC are needed in the future to characterize the relationship between this chromosomal anomaly and human infertility.",2015-08-14 +25174004,Genetic variability in the regulation of gene expression in ten regions of the human brain.,"Germ-line genetic control of gene expression occurs via expression quantitative trait loci (eQTLs). We present a large, exon-specific eQTL data set covering ten human brain regions. We found that cis-eQTL signals (within 1 Mb of their target gene) were numerous, and many acted heterogeneously among regions and exons. Co-regulation analysis of shared eQTL signals produced well-defined modules of region-specific co-regulated genes, in contrast to standard coexpression analysis of the same samples. We report cis-eQTL signals for 23.1% of catalogued genome-wide association study hits for adult-onset neurological disorders. The data set is publicly available via public data repositories and via http://www.braineac.org/. Our study increases our understanding of the regulation of gene expression in the human brain and will be of value to others pursuing functional follow-up of disease-associated variants.",2014-08-31 +26516186,NCG 5.0: updates of a manually curated repository of cancer genes and associated properties from cancer mutational screenings.,"The Network of Cancer Genes (NCG, http://ncg.kcl.ac.uk/) is a manually curated repository of cancer genes derived from the scientific literature. Due to the increasing amount of cancer genomic data, we have introduced a more robust procedure to extract cancer genes from published cancer mutational screenings and two curators independently reviewed each publication. NCG release 5.0 (August 2015) collects 1571 cancer genes from 175 published studies that describe 188 mutational screenings of 13 315 cancer samples from 49 cancer types and 24 primary sites. In addition to collecting cancer genes, NCG also provides information on the experimental validation that supports the role of these genes in cancer and annotates their properties (duplicability, evolutionary origin, expression profile, function and interactions with proteins and miRNAs).",2015-10-29 +21082439,Submitting proteomics data to PRIDE using PRIDE Converter.,"With the continuously growing amount of proteomics data being produced, it has become increasingly important to make these data publicly available so that they can be audited, reanalyzed, and reused. More and more journals are also starting to request the deposition of MS data in publicly available repositories for submitted proteomics manuscripts. In this chapter we focus on one of the most commonly used proteomics data repositories, PRIDE (the PRoteomics IDEntifications database, http://www.ebi.ac.uk/pride), and demonstrate how a new graphical user interface tool called PRIDE Converter (http://pride-converter.googlecode.com) greatly simplifies the submission of data to PRIDE.",2011-01-01 +25871613,Improved Prediction of CYP-Mediated Metabolism with Chemical Fingerprints.,"Molecule and atom fingerprints, similar to path-based Daylight fingerprints, can substantially improve the accuracy of P450 site-of-metabolism prediction models. Only two chemical fingerprints have been used in metabolism prediction, so little is known about the importance of fingerprint parameters on site of metabolism predictions. It is possible that different fingerprints might yield more accurate models. Here, we study if tuning fingerprints to specific site of metabolism data sets can lead to improved models. We measure the impact of 484 specific chemical fingerprints on the accuracy of P450 site-of-metabolism prediction models on nine P450 isoform site of metabolism data sets. Using a range of search depths, we study path, circular, and subgraph fingerprints. Two different labelings, also, are considered, both standard SMILES labels and also a labeling that marks ring bonds differently than nonring bonds, enabling ortho, para, and meta positioning of substituents to be more clearly encoded. Optimal fingerprint models chosen by cross-validation performance on the full training data are, on average, 3.8% (Top-2; percent of molecules with a site of metabolism in the top two predictions) and 1.4% (AUC; area under the ROC curve) more accurate than base fingerprint models. These gains represent, respectively, a 25.6% and 16.7% reduction in error. A more rigorous assessment selects fingerprints within each cross-validation fold, sometimes selecting different fingerprints for different folds, but yielding a more reliable estimate of generalization error. In this assessment, averaging the scores from the top few fingerprints yields performances improvements of, on average, 3.0% (Top-2) and 0.7% (AUC). These gains are statistically significant and represent, respectively, a 20.1% and 8.8% reduction in error. Between different isoforms, not many consistencies were observed among the top performing fingerprints, with different fingerprints working best for different isoforms. These results suggest that there are important gains achievable in site of metabolism modeling by including and optimizing atom and molecule fingerprints. The optimal site of metabolism models determined by this approach are available for use at http://swami.wustl.edu/.",2015-05-08 +25761929,Comparison of human expert and computer-automated systems using magnitude-squared coherence (MSC) and bootstrap distribution statistics for the interpretation of pattern electroretinograms (PERGs) in infants with optic nerve hypoplasia (ONH).,"

Purpose

Pattern electroretinograms (PERGs) have inherently low signal-to-noise ratios and can be difficult to detect when degraded by pathology or noise. We compare an objective system for automated PERG analysis with expert human interpretation in children with optic nerve hypoplasia (ONH) with PERGs ranging from clear to undetectable.

Methods

PERGs were recorded uniocularly with chloral hydrate sedation in children with ONH (aged 3.5-35 months). Stimuli were reversing checks of four sizes focused using an optical system incorporating the cycloplegic refraction. Forty PERG records were analysed; 20 selected at random and 20 from eyes with good vision (fellow eyes or eyes with mild ONH) from over 300 records. Two experts identified P50 and N95 of the PERGs after manually deleting trials with movement artefact, slow-wave EEG (4-8 Hz) or other noise from raw data for 150 check reversals. The automated system first identified present/not-present responses using a magnitude-squared coherence criterion and then, for responses confirmed as present, estimated the P50 and N95 cardinal positions as the turning points in local third-order polynomials fitted in the -3 dB bandwidth [0.25 … 45] Hz. Confidence limits were estimated from bootstrap re-sampling with replacement. The automated system uses an interactive Internet-available webpage tool (see http://clinengnhs.liv.ac.uk/esp_perg_1.htm).

Results

The automated system detected 28 PERG signals above the noise level (p ≤ 0.05 for H0). Good subjective quality ratings were indicative of significant PERGs; however, poor subjective quality did not necessarily predict non-significant signals. P50 and N95 implicit times showed good agreement between the two experts and between experts and the automated system. For the N95 amplitude measured to P50, the experts differed by an average of 13% consistent with differing interpretations of peaks within noise, while the automated amplitude measure was highly correlated with the expert measures but was proportionally larger. Trial-by-trial review of these data required approximately 6.5 h for each human expert, while automated data processing required <4 min, excluding overheads relating to data transfer.

Conclusions

An automated computer system for PERG analysis, using a panel of signal processing and statistical techniques, provides objective present/not-present detection and cursor positioning with explicit confidence intervals. The system achieves, within an efficient and robust statistical framework, estimates of P50 and N95 amplitudes and implicit times similar to those of clinical experts.",2015-03-12 +30708559,First Report of Leaf Blight of Japanese Yew Caused by Pestalotiopsis microspora in Korea.,"Worldwide, Japanese yew (Taxus cuspidata Sieb. & Zucc.) is a popular garden tree, with large trees also being used for timber. In July 2012, leaf blight was observed on 10% of Japanese yew seedling leaves planted in a 500-m2 field in Andong, Gyeongsangbuk-do Province, South Korea. Typical symptoms included small, brown lesions that were first visible on the leaf margin, which enlarged and coalesced into the leaf becoming brown and blighted. To isolate potential pathogens from infected leaves, small sections of leaf tissue (5 to 10 mm2) were excised from lesion margins. Eight fungi were isolated from eight symptomatic trees, respectively. These fungi were hyphal tipped twice and transferred to potato dextrose agar (PDA) plates for incubation at 25°C. After 7 days, the fungi produced circular mats of white aerial mycelia. After 12 days, black acervuli containing slimy spore masses formed over the mycelial mats. Two representative isolates were further characterized. Their conidia were straight or slightly curved, fusiform to clavate, five-celled with constrictions at the septa, and 17.4 to 28.5 × 5.8 to 7.1 μm. Two to four 19.8- to 30.7-μm-long hyaline filamentous appendages (mostly three appendages) were attached to each apical cell, whereas one 3.7- to 7.1-μm-long hyaline appendage was attached to each basal cell, matching the description for Pestalotiopsis microspora (2). The pathogenicity of the two isolates was tested using 2-year-old plants (T. cuspidata var. nana Rehder; three plants per isolate) in 30-cm-diameter pots filled with soil under greenhouse conditions. The plants were inoculated by spraying the leaves with an atomizer with a conidial suspension (105 conidia/ml; ~50 ml on each plant) cultured for 10 days on PDA. As a control, three plants were inoculated with sterilized water. The plants were covered with plastic bags for 72 h to maintain high relative humidity (24 to 28°C). At 20 days after inoculation, small dark lesions enlarged into brown blight similar to that observed on naturally infected leaves. P. microspora was isolated from all inoculated plants, but not the controls. The fungus was confirmed by molecular analysis of the 5.8S subunit and flanking internal transcribed spaces (ITS1 and ITS2) of rDNA amplified from DNA extracted from single-spore cultures, and amplified with the ITS1/ITS4 primers and sequenced as previously described (4). Sequences were compared with other DNA sequences in GenBank using a BLASTN search. The P. microspora isolates were 99% homologous to other P. microspora (DQ456865, EU279435, FJ459951, and FJ459950). The morphological characteristics, pathogenicity, and molecular data assimilated in this study corresponded with the fungus P. microspora (2). This fungus has been previously reported as the causal agent of scab disease of Psidium guajava in Hawaii, the decline of Torreya taxifolia in Florida, and the leaf blight of Reineckea carnea in China (1,3). Therefore, this study presents the first report of P. microspora as a pathogen on T. cuspidata in Korea. The degree of pathogenicity of P. microspora to the Korean garden evergreen T. cuspidata requires quantification to determine its potential economic damage and to establish effective management practices. References: (1) D. F. Farr and A. Y. Rossman, Fungal Databases, Syst. Mycol. Microbiol. Lab. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ (2) L. M. Keith et al. Plant Dis. 90:16, 2006. (3) S. S. N. Maharachchikumbura. Fungal Diversity 50:167, 2011. (4) T. J. White et al. PCR Protocols. Academic Press, San Diego, CA, 1990.",2014-05-01 +26607834,Inferring the hosts of coronavirus using dual statistical models based on nucleotide composition.,"Many coronaviruses are capable of interspecies transmission. Some of them have caused worldwide panic as emerging human pathogens in recent years, e.g., severe acute respiratory syndrome coronavirus (SARS-CoV) and Middle East respiratory syndrome coronavirus (MERS-CoV). In order to assess their threat to humans, we explored to infer the potential hosts of coronaviruses using a dual-model approach based on nineteen parameters computed from spike genes of coronaviruses. Both the support vector machine (SVM) model and the Mahalanobis distance (MD) discriminant model achieved high accuracies in leave-one-out cross-validation of training data consisting of 730 representative coronaviruses (99.86% and 98.08% respectively). Predictions on 47 additional coronaviruses precisely conformed to conclusions or speculations by other researchers. Our approach is implemented as a web server that can be accessed at http://bioinfo.ihb.ac.cn/seq2hosts.",2015-11-26 +22067098,"ApoptoProteomics, an integrated database for analysis of proteomics data obtained from apoptotic cells.","Apoptosis is the most commonly described form of programmed cell death, and dysfunction is implicated in a large number of human diseases. Many quantitative proteome analyses of apoptosis have been performed to gain insight in proteins involved in the process. This resulted in large and complex data sets that are difficult to evaluate. Therefore, we developed the ApoptoProteomics database for storage, browsing, and analysis of the outcome of large scale proteome analyses of apoptosis derived from human, mouse, and rat. The proteomics data of 52 publications were integrated and unified with protein annotations from UniProt-KB, the caspase substrate database homepage (CASBAH), and gene ontology. Currently, more than 2300 records of more than 1500 unique proteins were included, covering a large proportion of the core signaling pathways of apoptosis. Analysis of the data set revealed a high level of agreement between the reported changes in directionality reported in proteomics studies and expected apoptosis-related function and may disclose proteins without a current recognized involvement in apoptosis based on gene ontology. Comparison between induction of apoptosis by the intrinsic and the extrinsic apoptotic signaling pathway revealed slight differences. Furthermore, proteomics has significantly contributed to the field of apoptosis in identifying hundreds of caspase substrates. The database is available at http://apoptoproteomics.uio.no.",2011-11-08 +25430569,Idiopathic scoliosis and the vestibular system.,"

Purpose

Despite its high prevalence, the etiology underlying idiopathic scoliosis remains unclear. Although initial scrutiny has focused on genetic, biochemical, biomechanical, nutritional and congenital causes, there is growing evidence that aberrations in the vestibular system may play a role in the etiology of scoliosis. In this article, we discuss putative mechanisms for adolescent idiopathic scoliosis and review the current evidence supporting a role for the vestibular system in adolescent idiopathic scoliosis.

Methods

A comprehensive search of the English literature was performed using PubMed ( http://www.ncbi.nlm.nih.gov/pubmed ). Research articles studying interactions between adolescent idiopathic scoliosis and the vestibular system were selected and evaluated for inclusion in a literature review.

Results

Eighteen manuscripts of level 3-4 clinical evidence to support an association between adolescent idiopathic scoliosis (AIS) and dysfunction of the vestibular system were identified. These studies include data from physiologic and morphologic studies in humans. Clinical data are supported by animal model studies to suggest a causative link between the vestibular system and AIS.

Conclusions

Clinical data and a limited number of animal model studies suggest a causative role of the vestibular system in AIS, although this association has not been reproduced in all studies.",2014-11-28 +26272981,cyNeo4j: connecting Neo4j and Cytoscape.,"

Unlabelled

We developed cyNeo4j, a Cytoscape App to link Cytoscape and Neo4j databases to utilize the performance and storage capacities Neo4j offers. We implemented a Neo4j NetworkAnalyzer, ForceAtlas2 layout and Cypher component to demonstrate the possibilities a distributed setup of Cytoscape and Neo4j have.

Availability and implementation

The app is available from the Cytoscape App Store at http://apps.cytoscape.org/apps/cyneo4j, the Neo4j plugins at www.github.com/gsummer/cyneo4j-parent and the community and commercial editions of Neo4j can be found at http://www.neo4j.com.

Contact

georg.summer@gmail.com.",2015-08-12 +22462644,Speeding up chemical searches using the inverted index: the convergence of chemoinformatics and text search methods.,"In ligand-based screening, retrosynthesis, and other chemoinformatics applications, one often seeks to search large databases of molecules in order to retrieve molecules that are similar to a given query. With the expanding size of molecular databases, the efficiency and scalability of data structures and algorithms for chemical searches are becoming increasingly important. Remarkably, both the chemoinformatics and information retrieval communities have converged on similar solutions whereby molecules or documents are represented by binary vectors, or fingerprints, indexing their substructures such as labeled paths for molecules and n-grams for text, with the same Jaccard-Tanimoto similarity measure. As a result, similarity search methods from one field can be adapted to the other. Here we adapt recent, state-of-the-art, inverted index methods from information retrieval to speed up similarity searches in chemoinformatics. Our results show a several-fold speed-up improvement over previous methods for both threshold searches and top-K searches. We also provide a mathematical analysis that allows one to predict the level of pruning achieved by the inverted index approach and validate the quality of these predictions through simulation experiments. All results can be replicated using data freely downloadable from http://cdb.ics.uci.edu/ .",2012-04-10 +27191382,Deep biomarkers of human aging: Application of deep neural networks to biomarker development.,"One of the major impediments in human aging research is the absence of a comprehensive and actionable set of biomarkers that may be targeted and measured to track the effectiveness of therapeutic interventions. In this study, we designed a modular ensemble of 21 deep neural networks (DNNs) of varying depth, structure and optimization to predict human chronological age using a basic blood test. To train the DNNs, we used over 60,000 samples from common blood biochemistry and cell count tests from routine health exams performed by a single laboratory and linked to chronological age and sex. The best performing DNN in the ensemble demonstrated 81.5 % epsilon-accuracy r = 0.90 with R(2) = 0.80 and MAE = 6.07 years in predicting chronological age within a 10 year frame, while the entire ensemble achieved 83.5% epsilon-accuracy r = 0.91 with R(2) = 0.82 and MAE = 5.55 years. The ensemble also identified the 5 most important markers for predicting human chronological age: albumin, glucose, alkaline phosphatase, urea and erythrocytes. To allow for public testing and evaluate real-life performance of the predictor, we developed an online system available at http://www.aging.ai. The ensemble approach may facilitate integration of multi-modal data linked to chronological age and sex that may lead to simple, minimally invasive, and affordable methods of tracking integrated biomarkers of aging in humans and performing cross-species feature importance analysis.",2016-05-01 +27766961,VDJML: a file format with tools for capturing the results of inferring immune receptor rearrangements.,"

Background

The genes that produce antibodies and the immune receptors expressed on lymphocytes are not germline encoded; rather, they are somatically generated in each developing lymphocyte by a process called V(D)J recombination, which assembles specific, independent gene segments into mature composite genes. The full set of composite genes in an individual at a single point in time is referred to as the immune repertoire. V(D)J recombination is the distinguishing feature of adaptive immunity and enables effective immune responses against an essentially infinite array of antigens. Characterization of immune repertoires is critical in both basic research and clinical contexts. Recent technological advances in repertoire profiling via high-throughput sequencing have resulted in an explosion of research activity in the field. This has been accompanied by a proliferation of software tools for analysis of repertoire sequencing data. Despite the widespread use of immune repertoire profiling and analysis software, there is currently no standardized format for output files from V(D)J analysis. Researchers utilize software such as IgBLAST and IMGT/High V-QUEST to perform V(D)J analysis and infer the structure of germline rearrangements. However, each of these software tools produces results in a different file format, and can annotate the same result using different labels. These differences make it challenging for users to perform additional downstream analyses.

Results

To help address this problem, we propose a standardized file format for representing V(D)J analysis results. The proposed format, VDJML, provides a common standardized format for different V(D)J analysis applications to facilitate downstream processing of the results in an application-agnostic manner. The VDJML file format specification is accompanied by a support library, written in C++ and Python, for reading and writing the VDJML file format.

Conclusions

The VDJML suite will allow users to streamline their V(D)J analysis and facilitate the sharing of scientific knowledge within the community. The VDJML suite and documentation are available from https://vdjserver.org/vdjml/ . We welcome participation from the community in developing the file format standard, as well as code contributions.",2016-10-06 +27716031,Boolean regulatory network reconstruction using literature based knowledge with a genetic algorithm optimization method.,"

Background

Prior knowledge networks (PKNs) provide a framework for the development of computational biological models, including Boolean models of regulatory networks which are the focus of this work. PKNs are created by a painstaking process of literature curation, and generally describe all relevant regulatory interactions identified using a variety of experimental conditions and systems, such as specific cell types or tissues. Certain of these regulatory interactions may not occur in all biological contexts of interest, and their presence may dramatically change the dynamical behaviour of the resulting computational model, hindering the elucidation of the underlying mechanisms and reducing the usefulness of model predictions. Methods are therefore required to generate optimized contextual network models from generic PKNs.

Results

We developed a new approach to generate and optimize Boolean networks, based on a given PKN. Using a genetic algorithm, a model network is built as a sub-network of the PKN and trained against experimental data to reproduce the experimentally observed behaviour in terms of attractors and the transitions that occur between them under specific perturbations. The resulting model network is therefore contextualized to the experimental conditions and constitutes a dynamical Boolean model closer to the observed biological process used to train the model than the original PKN. Such a model can then be interrogated to simulate response under perturbation, to detect stable states and their properties, to get insights into the underlying mechanisms and to generate new testable hypotheses.

Conclusions

Generic PKNs attempt to synthesize knowledge of all interactions occurring in a biological process of interest, irrespective of the specific biological context. This limits their usefulness as a basis for the development of context-specific, predictive dynamical Boolean models. The optimization method presented in this article produces specific, contextualized models from generic PKNs. These contextualized models have improved utility for hypothesis generation and experimental design. The general applicability of this methodological approach makes it suitable for a variety of biological systems and of general interest for biological and medical research. Our method was implemented in the software optimusqual, available online at http://www.vital-it.ch/software/optimusqual/ .",2016-10-06 +26122437,Correction.,"ENCORE1 Study Group. Efficacy and safety of efavirenz 400 mg daily versus 600 mg daily: 96-week data from the randomised, double-blind, placebo-controlled, non-inferiority ENCORE1 study. Lancet Infect Dis 2015; published online April 13. http://dx.doi.org/10.1016/S1473- 3099(15)70060-5—In this Article, the numbers of patients in the 96-week adverse events analysis were incorrect.",2015-06-03 +22257667,FX: an RNA-Seq analysis tool on the cloud.,"

Unlabelled

FX is an RNA-Seq analysis tool, which runs in parallel on cloud computing infrastructure, for the estimation of gene expression levels and genomic variant calling. In the mapping of short RNA-Seq reads, FX uses a transcriptome-based reference primarily, generated from ~160 000 mRNA sequences from RefSeq, UCSC and Ensembl databases. This approach reduces the misalignment of reads originating from splicing junctions. Unmapped reads not aligned on known transcripts are then mapped on the human genome reference. FX allows analysis of RNA-Seq data on cloud computing infrastructures, supporting access through a user-friendly web interface.

Availability

FX is freely available on the web at (http://fx.gmi.ac.kr), and can be installed on local Hadoop clusters. Guidance for the installation and operation of FX can be found under the 'Documentation' menu on the website.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-17 +22730431,Predicting drug-target interactions from chemical and genomic kernels using Bayesian matrix factorization.,"

Motivation

Identifying interactions between drug compounds and target proteins has a great practical importance in the drug discovery process for known diseases. Existing databases contain very few experimentally validated drug-target interactions and formulating successful computational methods for predicting interactions remains challenging.

Results

In this study, we consider four different drug-target interaction networks from humans involving enzymes, ion channels, G-protein-coupled receptors and nuclear receptors. We then propose a novel Bayesian formulation that combines dimensionality reduction, matrix factorization and binary classification for predicting drug-target interaction networks using only chemical similarity between drug compounds and genomic similarity between target proteins. The novelty of our approach comes from the joint Bayesian formulation of projecting drug compounds and target proteins into a unified subspace using the similarities and estimating the interaction network in that subspace. We propose using a variational approximation in order to obtain an efficient inference scheme and give its detailed derivations. Finally, we demonstrate the performance of our proposed method in three different scenarios: (i) exploratory data analysis using low-dimensional projections, (ii) predicting interactions for the out-of-sample drug compounds and (iii) predicting unknown interactions of the given network.

Availability

Software and Supplementary Material are available at http://users.ics.aalto.fi/gonen/kbmf2k.

Contact

mehmet.gonen@aalto.fi

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-06-23 +26259927,Impact of adherence to WHO infant feeding recommendations on later risk of obesity and non-communicable diseases: systematic review.,"Adherence to WHO infant feeding recommendations has short-term benefits and may also help in the prevention of non-communicable diseases (NCDs). This study reviewed the evidence on whether adherence to all elements of the WHO infant feeding recommendations (comparison group those exclusively breastfed to 6 months, introduced to appropriate complementary feeding from 6 months, with continued breastfeeding to at least 24 months; exposure group characterised by non-adherence to any of the three recommendations) is associated with reduced risk of later obesity or cardiometabolic disease. The population of interest was children not classified as very low weight (weight-for-age z-score >-3.0). MEDLINE, EMBASE, Global Health, CINAHL plus, ProQuest Dissertations and Thesis were systematically searched from 2001 to July 2014, manual reference searching of a birth cohort register (http://www.birthcohorts.net/) as well as papers identified in the search and selected journals was carried out. The database search yielded 9050 records, 275 English-language full-text articles were screened, but no studies were eligible, failing to meet the following criteria: comparison (213); exposure (14); population (3); relevant outcome (5); outcome before 24 months (9); insufficient information provided (30); plus one study was qualitative. Eight studies met the inclusion criterion of exclusive breastfeeding to 6 months, but did not meet the other inclusion criteria. The present study has revealed an important gap in the evidence on NCD prevention, and suggestions for addressing this evidence gap are provided.",2015-08-11 +23829323,Exploring site-specific N-glycosylation microheterogeneity of haptoglobin using glycopeptide CID tandem mass spectra and glycan database search.,"Glycosylation is a common protein modification with a significant role in many vital cellular processes and human diseases, making the characterization of protein-attached glycan structures important for understanding cell biology and disease processes. Direct analysis of protein N-glycosylation by tandem mass spectrometry of glycopeptides promises site-specific elucidation of N-glycan microheterogeneity, something that detached N-glycan and deglycosylated peptide analyses cannot provide. However, successful implementation of direct N-glycopeptide analysis by tandem mass spectrometry remains a challenge. In this work, we consider algorithmic techniques for the analysis of LC-MS/MS data acquired from glycopeptide-enriched fractions of enzymatic digests of purified proteins. We implement a computational strategy that takes advantage of the properties of CID fragmentation spectra of N-glycopeptides, matching the MS/MS spectra to peptide-glycan pairs from protein sequences and glycan structure databases. Significantly, we also propose a novel false discovery rate estimation technique to estimate and manage the number of false identifications. We use a human glycoprotein standard, haptoglobin, digested with trypsin and GluC, enriched for glycopeptides using HILIC chromatography, and analyzed by LC-MS/MS to demonstrate our algorithmic strategy and evaluate its performance. Our software, GlycoPeptideSearch (GPS), assigned glycopeptide identifications to 246 of the spectra at a false discovery rate of 5.58%, identifying 42 distinct haptoglobin peptide-glycan pairs at each of the four haptoglobin N-linked glycosylation sites. We further demonstrate the effectiveness of this approach by analyzing plasma-derived haptoglobin, identifying 136 N-linked glycopeptide spectra at a false discovery rate of 0.4%, representing 15 distinct glycopeptides on at least three of the four N-linked glycosylation sites. The software, GlycoPeptideSearch, is available for download from http://edwardslab.bmcb.georgetown.edu/GPS .",2013-07-22 +24675236,Integrated visualization of a multi-omics study of starvation in mouse intestine.,"Our understanding of complex biological processes can be enhanced by combining different kinds of high-throughput experimental data, but the use of incompatible identifiers makes data integration a challenge. We aimed to improve methods for integrating and visualizing different types of omics data. To validate these methods, we applied them to two previous studies on starvation in mice, one using proteomics and the other using transcriptomics technology. We extended the PathVisio software with new plugins to link proteins, transcripts and pathways. A low overall correlation between proteome and transcriptome data was detected (Spearman rank correlation: 0.21). At the level of individual genes, correlation was highly variable. Many mRNA/protein pairs, such as fructose biphosphate aldolase B and ATP Synthase, show good correlation. For other pairs, such as ferritin and elongation factor 2, an interesting effect is observed, where mRNA and protein levels change in opposite directions, suggesting they are not primarily regulated at the transcriptional level. We used pathway diagrams to visualize the integrated datasets and found it encouraging that transcriptomics and proteomics data supported each other at the pathway level. Visualization of the integrated dataset on pathways led to new observations on gene-regulation in the response of the gut to starvation. Our methods are generic and can be applied to any multi-omics study. The PathVisio software can be obtained at http://www.pathvisio.org. Supplemental data are available at http://www.bigcat.unimaas.nl/data/jib-supplemental/ , including instructions on reproducing the pathway visualizations of this manuscript.",2014-03-28 +27507231,Refining Ovarian Cancer Test accuracy Scores (ROCkeTS): protocol for a prospective longitudinal test accuracy study to validate new risk scores in women with symptoms of suspected ovarian cancer.,"

Introduction

Ovarian cancer (OC) is associated with non-specific symptoms such as bloating, making accurate diagnosis challenging: only 1 in 3 women with OC presents through primary care referral. National Institute for Health and Care Excellence guidelines recommends sequential testing with CA125 and routine ultrasound in primary care. However, these diagnostic tests have limited sensitivity or specificity. Improving accurate triage in women with vague symptoms is likely to improve mortality by streamlining referral and care pathways. The Refining Ovarian Cancer Test Accuracy Scores (ROCkeTS; HTA 13/13/01) project will derive and validate new tests/risk prediction models that estimate the probability of having OC in women with symptoms. This protocol refers to the prospective study only (phase III).

Methods and analysis

ROCkeTS comprises four parallel phases. The full ROCkeTS protocol can be found at http://www.birmingham.ac.uk/ROCKETS. Phase III is a prospective test accuracy study. The study will recruit 2450 patients from 15 UK sites. Recruited patients complete symptom and anxiety questionnaires, donate a serum sample and undergo ultrasound scored as per International Ovarian Tumour Analysis (IOTA) criteria. Recruitment is at rapid access clinics, emergency departments and elective clinics. Models to be evaluated include those based on ultrasound derived by the IOTA group and novel models derived from analysis of existing data sets. Estimates of sensitivity, specificity, c-statistic (area under receiver operating curve), positive predictive value and negative predictive value of diagnostic tests are evaluated and a calibration plot for models will be presented. ROCkeTS has received ethical approval from the NHS West Midlands REC (14/WM/1241) and is registered on the controlled trials website (ISRCTN17160843) and the National Institute of Health Research Cancer and Reproductive Health portfolios.",2016-08-09 +22923291,EFICAz2.5: application of a high-precision enzyme function predictor to 396 proteomes.,"

Unlabelled

High-quality enzyme function annotation is essential for understanding the biochemistry, metabolism and disease processes of organisms. Previously, we developed a multi-component high-precision enzyme function predictor, EFICAz(2) (enzyme function inference by a combined approach). Here, we present an updated improved version, EFICAz(2.5), that is trained on a significantly larger data set of enzyme sequences and PROSITE patterns. We also present the results of the application of EFICAz(2.5) to the enzyme reannotation of 396 genomes cataloged in the ENSEMBL database.

Availability

The EFICAz(2.5) server and database is freely available with a use-friendly interface at http://cssb.biology.gatech.edu/EFICAz2.5.",2012-08-24 +26817603,Long- versus short-interval follow-up of cytologically benign thyroid nodules: a prospective cohort study.,"

Background

Thyroid nodules are common, and most are benign. Given the risk of false-negative cytology (i.e. malignancy), follow-up is recommended after 1-2 years, though this recommendation is based solely on expert opinion. Sonographic appearance may assist with planning, but is limited by large inter-observer variability. We therefore compared the safety and efficacy of long- versus short-interval follow-up after a benign initial aspiration, regardless of sonographic appearance.

Methods

This study evaluated all patients referred to the Brigham and Women's Hospital Thyroid Nodule Clinic, between 1999 and 2010, with a cytologically benign nodule >1 cm and who had returned for follow-up sonographic evaluation. Despite standard clinical recommendations, variation in patient compliance resulted in variable follow-up intervals from time of initial aspiration to the first repeat evaluation. Main outcome measures included nodule growth, repeat fine needle aspiration (FNA), thyroidectomy, malignancy, and disease-specific mortality.

Results

We evaluated 1,254 patients with 1,819 cytologically benign nodules, with a median time to first follow-up of 1.4 years (range, 0.5-14.1 years). The longer the follow-up interval, the more nodules grew and the more repeat FNAs were performed (P <0.001). The most clinical meaningful endpoints of malignancy or mortality, however, did not differ between the various follow-up intervals. The risk of a thyroidectomy (usually because of compressive symptoms) increased when time to first follow-up exceeded >3 years (4.9% vs. 1.2%, P = 0.0001), though no difference in malignancy risk was identified (0.2-0.8%, P = 0.77). No (0%) thyroid cancer-specific deaths were identified in either cohort.

Conclusions

While expert opinion currently recommends repeat evaluation of a cytologically benign nodule at 1-2 years, these are the first data to demonstrate that this interval can be safely extended to 3 years without increased mortality or patient harm. Nodule growth can be expected, though detection of malignancies is unchanged. While replication of these data in large prospective multicenter studies is needed, this extension in follow-up interval would reduce unnecessary visits and medical interventions for millions of affected patients worldwide, leading to healthcare savings. Please see related commentary article: http://dx.doi.org/10.1186/s12916-016-0559-9 and research article: http://dx.doi.org/10.1186/s12916-015-0419-z .",2016-01-27 +27153629,INPS-MD: a web server to predict stability of protein variants from sequence and structure.,"

Motivation

Protein function depends on its structural stability. The effects of single point variations on protein stability can elucidate the molecular mechanisms of human diseases and help in developing new drugs. Recently, we introduced INPS, a method suited to predict the effect of variations on protein stability from protein sequence and whose performance is competitive with the available state-of-the-art tools.

Results

In this article, we describe INPS-MD (Impact of Non synonymous variations on Protein Stability-Multi-Dimension), a web server for the prediction of protein stability changes upon single point variation from protein sequence and/or structure. Here, we complement INPS with a new predictor (INPS3D) that exploits features derived from protein 3D structure. INPS3D scores with Pearson's correlation to experimental ΔΔG values of 0.58 in cross validation and of 0.72 on a blind test set. The sequence-based INPS scores slightly lower than the structure-based INPS3D and both on the same blind test sets well compare with the state-of-the-art methods.

Availability and implementation

INPS and INPS3D are available at the same web server: http://inpsmd.biocomp.unibo.it

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

gigi@biocomp.unibo.it.",2016-04-10 +25465054,VTBuilder: a tool for the assembly of multi isoform transcriptomes.,"

Background

Within many research areas, such as transcriptomics, the millions of short DNA fragments (reads) produced by current sequencing platforms need to be assembled into transcript sequences before they can be utilized. Despite recent advances in assembly software, creating such transcripts from read data harboring isoform variation remains challenging. This is because current approaches fail to identify all variants present or they create chimeric transcripts within which relationships between co-evolving sites and other evolutionary factors are disrupted. We present VTBuilder, a tool for constructing non-chimeric transcripts from read data that has been sequenced from sources containing isoform complexity.

Results

We validated VTBuilder using reads simulated from 54 Sanger sequenced transcripts (SSTs) expressed in the venom gland of the saw scaled viper, Echis ocellatus. The SSTs were selected to represent genes from major co-expressed toxin groups known to harbor isoform variants. From the simulated reads, VTBuilder constructed 55 transcripts, 50 of which had a greater than 99% sequence similarity to 48 of the SSTs. In contrast, using the popular assembler tool Trinity (r2013-02-25), only 14 transcripts were constructed with a similar level of sequence identity to just 11 SSTs. Furthermore VTBuilder produced transcripts with a similar length distribution to the SSTs while those produced by Trinity were considerably shorter. To demonstrate that our approach can be scaled to real world data we assembled the venom gland transcriptome of the African puff adder Bitis arietans using paired-end reads sequenced on Illumina's MiSeq platform. VTBuilder constructed 1481 transcripts from 5 million reads and, following annotation, all major toxin genes were recovered demonstrating reconstruction of complex underlying sequence and isoform diversity.

Conclusion

Unlike other approaches, VTBuilder strives to maintain the relationships between co-evolving sites within the constructed transcripts, and thus increases transcript utility for a wide range of research areas ranging from transcriptomics to phylogenetics and including the monitoring of drug resistant parasite populations. Additionally, improving the quality of transcripts assembled from read data will have an impact on future studies that query these data. VTBuilder has been implemented in java and is available, under the GPL GPU V0.3 license, from http:// http://www.lstmed.ac.uk/vtbuilder .",2014-12-03 +25654717,Real-time noise removal for line-scanning hyperspectral devices using a minimum noise fraction-based approach.,"Processing line-by-line and in real-time can be convenient for some applications of line-scanning hyperspectral imaging technology. Some types of processing, like inverse modeling and spectral analysis, can be sensitive to noise. The MNF (minimum noise fraction) transform provides suitable denoising performance, but requires full image availability for the estimation of image and noise statistics. In this work, a modified algorithm is proposed. Incrementally-updated statistics enables the algorithm to denoise the image line-by-line. The denoising performance has been compared to conventional MNF and found to be equal. With a satisfying denoising performance and real-time implementation, the developed algorithm can denoise line-scanned hyperspectral images in real-time. The elimination of waiting time before denoised data are available is an important step towards real-time visualization of processed hyperspectral data. The source code can be found at http://www.github.com/ntnu-bioopt/mnf. This includes an implementation of conventional MNF denoising.",2015-02-03 +25702779,Modeling the ComD/ComE/comcde interaction network using small angle X-ray scattering.,"The ComD-ComE two-component system controls the competence state of Streptococcus pneumoniae via the phospho-regulation of ComE, which fluctuates between monomeric and dimeric states. We previously showed that the non-phosphorylatable ComE(D) (58A) mutant is monomeric in solution, whereas the ComE(D) (58E) active mimic mutant dimerizes via its REC domains. The crystal structure of ComE(D) (58A) revealed an asymmetric dimer that may represent the activated form of ComE. Here, we investigated the binding between the catalytic domain of ComD, ComE and the promoter region comcde, using small angle X-ray scattering. ComD(catdom) is a dimer that adapts two monomers of ComE, one on each side, placing (Com) (E) D58 residue in front of (Com) (D) H248, a location that is convenient for the intermolecular transfer reaction of the phosphoryl group. The LytTR, ComE(D) (58A) and ComE(D) (58E) complexed with comcde are composed of two protein molecules per DNA duplex. Modeling the complexes against small angle X-ray scattering data indicated that ComE(D) (58E) bound to comcde forms a compact dimer similar to the crystal structure, whereas ComE(D) (58A) -comcde adopts more than one conformation with or without dimer contacts. The various oligomeric states of ComE induce different bending angles of the promoter, which provides a mechanistic scenario for the activation of ComE: the phosphorylation of ComE forces additional bending of comcde, and the release of this bending strain on DNA via the disruption of the ComE dimer may signal the shut-off of the competence state.The molecular models and experimental SAXS data have been deposited on SASBDB (Small Angle Scattering Biological Data Bank) (see http://www.sasbdb.org/aboutSASBDB/) under the SAS codes SASDAA7, SASDAB7 and SASDAC7.",2015-03-11 +26504146,DARA: a web server for rapid search of structural neighbours using solution small angle X-ray scattering data.,"

Motivation

Small angle X-ray scattering (SAXS) is an established method for studying biological macromolecules in solution, whereby the experimental scattering patterns relate to the quaternary and tertiary structure of the macromolecule. Here we present DARA, a web-server, that queries over 150 000 scattering profiles pre-computed from the high resolution models of macromolecules and biological assemblies in the Protein Data Bank, to rapidly find nearest neighbours of a given experimental or theoretical SAXS pattern. Identification of the best scattering equivalents provides a straightforward and automated way of structural assessment of macromolecules based on a SAXS profile. DARA results are useful e.g. for fold recognition and finding of biologically active oligomers.

Availability and implementation

http://dara.embl-hamburg.de/.",2015-10-25 +22685225,Protocol for a population-based study of rheumatic heart disease prevalence and cardiovascular outcomes among schoolchildren in Nepal. ,"Rheumatic heart disease (RHD) remains a major contributor to morbidity and mortality in developing countries. The reported prevalence rates of RHD are highly variable and mainly attributable to differences in the sensitivity of either clinical screening to detect advanced heart disease or echocardiographic evaluation where disease is diagnosed earlier across a continuous spectrum. The clinical significance of diagnosis of subclinical RHD by echocardiographic screening and early implementation of secondary prevention has not been clearly established. The authors designed a cross-sectional survey to determine the prevalence of RHD in children from private and public schools between the age of 5 and 15 years in urban and rural areas of Eastern Nepal using both cardiac auscultation and echocardiographic evaluation. Children with RHD will be treated with secondary prevention and enrolled in a prospective cohort study. The authors will compare the prevalence rates by cardiac auscultation and echocardiography, determine risk factors associated with diagnosis and progression of RHD, investigate social and economic barriers for receiving adequate cardiac care and assess clinical outcomes with regular medical surveillance as a function of stage of disease at the time of diagnosis. Prospective clinical studies investigating the impact of secondary prevention for subclinical RHD on long-term clinical outcome will be of central relevance for future health resource utilisation in developing countries. The study was considered ethically uncritical and was given an exempt status by the ethics committee at University of Bern, Switzerland. The study has been submitted to the National Nepal Health Research Council and was registered with http://www.ClinicalTrials.gov (NCT01550068). The study findings will be reported in peer-reviewed publications. CLINICALTRIALS.GOV IDENTIFIER: NCT01550068.",2012-06-08 +21984760,"MyBioNet: interactively visualize, edit and merge biological networks on the web.","

Summary

MyBioNet is a web-based application for biological network analysis, which provides user-friendly web interfaces to visualize, edit and merge biological networks. In addition, MyBioNet integrated KEGG metabolic network data from 1366 organisms and allows users to search and navigate interesting networks.

Availability and implementation

All KEGG metabolic network data are organized and stored in the MySQL database. MyBioNet is implemented in Flex/Actionscript and PHP languages and deployed on an Apache web server. MyBioNet is accessible through all the Flash-embedded browsers at http://bis.zju.edu.cn/mybionet/.

Contact

mchen@zju.edu.cn.",2011-10-07 +21813477,The GNAT library for local and remote gene mention normalization.,"

Summary

Identifying mentions of named entities, such as genes or diseases, and normalizing them to database identifiers have become an important step in many text and data mining pipelines. Despite this need, very few entity normalization systems are publicly available as source code or web services for biomedical text mining. Here we present the Gnat Java library for text retrieval, named entity recognition, and normalization of gene and protein mentions in biomedical text. The library can be used as a component to be integrated with other text-mining systems, as a framework to add user-specific extensions, and as an efficient stand-alone application for the identification of gene and protein names for data analysis. On the BioCreative III test data, the current version of Gnat achieves a Tap-20 score of 0.1987.

Availability

The library and web services are implemented in Java and the sources are available from http://gnat.sourceforge.net.

Contact

jorg.hakenberg@roche.com.",2011-08-03 +26851352,Accurate prediction of helix interactions and residue contacts in membrane proteins.,"Accurate prediction of intra-molecular interactions from amino acid sequence is an important pre-requisite for obtaining high-quality protein models. Over the recent years, remarkable progress in this area has been achieved through the application of novel co-variation algorithms, which eliminate transitive evolutionary connections between residues. In this work we present a new contact prediction method for α-helical transmembrane proteins, MemConP, in which evolutionary couplings are combined with a machine learning approach. MemConP achieves a substantially improved accuracy (precision: 56.0%, recall: 17.5%, MCC: 0.288) compared to the use of either machine learning or co-evolution methods alone. The method also achieves 91.4% precision, 42.1% recall and a MCC of 0.490 in predicting helix-helix interactions based on predicted contacts. The approach was trained and rigorously benchmarked by cross-validation and independent testing on up-to-date non-redundant datasets of 90 and 30 experimental three dimensional structures, respectively. MemConP is a standalone tool that can be downloaded together with the associated training data from http://webclu.bio.wzw.tum.de/MemConP.",2016-02-03 +26072492,Cypiripi: exact genotyping of CYP2D6 using high-throughput sequencing data.,"

Motivation

CYP2D6 is highly polymorphic gene which encodes the (CYP2D6) enzyme, involved in the metabolism of 20-25% of all clinically prescribed drugs and other xenobiotics in the human body. CYP2D6 genotyping is recommended prior to treatment decisions involving one or more of the numerous drugs sensitive to CYP2D6 allelic composition. In this context, high-throughput sequencing (HTS) technologies provide a promising time-efficient and cost-effective alternative to currently used genotyping techniques. To achieve accurate interpretation of HTS data, however, one needs to overcome several obstacles such as high sequence similarity and genetic recombinations between CYP2D6 and evolutionarily related pseudogenes CYP2D7 and CYP2D8, high copy number variation among individuals and short read lengths generated by HTS technologies.

Results

In this work, we present the first algorithm to computationally infer CYP2D6 genotype at basepair resolution from HTS data. Our algorithm is able to resolve complex genotypes, including alleles that are the products of duplication, deletion and fusion events involving CYP2D6 and its evolutionarily related cousin CYP2D7. Through extensive experiments using simulated and real datasets, we show that our algorithm accurately solves this important problem with potential clinical implications.

Availability and implementation

Cypiripi is available at http://sfu-compbio.github.io/cypiripi.",2015-06-01 +21478484,The Rat Genome Database pathway portal.,"The set of interacting molecules collectively referred to as a pathway or network represents a fundamental structural unit, the building block of the larger, highly integrated networks of biological systems. The scientific community's interest in understanding the fine details of how pathways work, communicate with each other and synergize, and how alterations in one or several pathways may converge into a disease phenotype, places heightened demands on pathway data and information providers. To meet such demands, the Rat Genome Database [(RGD) http://rgd.mcw.edu] has adopted a multitiered approach to pathway data acquisition and presentation. Resources and tools are continuously added or expanded to offer more comprehensive pathway data sets as well as enhanced pathway data manipulation, exploration and visualization capabilities. At RGD, users can easily identify genes in pathways, see how pathways relate to each other and visualize pathways in a dynamic and integrated manner. They can access these and other components from several entry points and effortlessly navigate between them and they can download the data of interest. The Pathway Portal resources at RGD are presented, and future directions are discussed. Database URL: http://rgd.mcw.edu.",2011-04-08 +25627673,Aber-OWL: a framework for ontology-based data access in biology.,"

Background

Many ontologies have been developed in biology and these ontologies increasingly contain large volumes of formalized knowledge commonly expressed in the Web Ontology Language (OWL). Computational access to the knowledge contained within these ontologies relies on the use of automated reasoning.

Results

We have developed the Aber-OWL infrastructure that provides reasoning services for bio-ontologies. Aber-OWL consists of an ontology repository, a set of web services and web interfaces that enable ontology-based semantic access to biological data and literature. Aber-OWL is freely available at http://aber-owl.net .

Conclusions

Aber-OWL provides a framework for automatically accessing information that is annotated with ontologies or contains terms used to label classes in ontologies. When using Aber-OWL, access to ontologies and data annotated with them is not merely based on class names or identifiers but rather on the knowledge the ontologies contain and the inferences that can be drawn from it.",2015-01-28 +27900317,Does Early Career Achievement Lead to Earlier Death? Assessment of the Precocity-Longevity Effect in Professional Basketball Players.,"

Objectives

To examine the precocity-longevity (P-L) effect in North American professional basketball players who debuted between 1946 and 1979, and to determine whether playing position and decade of play influenced the relationship between age of career achievements and life span.

Methods

A total of 1852 players were evaluated from a recognized sports archive (i.e., http://sports-reference.com), which provided information on date of birth, death, and career debut, playing position, and indicators of achievement (i.e., All-Star team and/or All-League team selection). Athletes were categorized as above or below the median age of professional debut and median age of selection to first All-Star team and/or All-League team. Analyses of deceased players (n = 598) were comprised of bivariate correlations between age of achievement (age of debut, age of first All-Star game, and age of first All-League team selection) and age of death, and t-tests to compare the average age of death of early and late achievers (p < 0.05). Survival analyses, using the entire sample (living and deceased players), compared the life spans between those who debuted above and below the median age of achievement for each indicator of achievement.

Results

Only the correlation between age of professional debut and age of death (r = 0.33, p < 0.001), age of first All-Star game and age of death (r = 0.29, p < 0.05), and the t-test comparing the average death age of early (66.4 years) and later (69.3 years) debut age groups (p = 0.01) reached statistical significance. However, survival analyses demonstrated a trend for lower risk of death for early achievers, with one exception (i.e., age of debut); this trend was not statistically significant.

Conclusion

Results did not support the P-L hypothesis, suggesting that sample characteristics (i.e., physical fitness of high performance athletes), and measurement methodologies, may influence support for the proposed hypothesis in sport. However, future research would benefit form larger sample sizes and cause of death data.",2016-11-16 +25505095,Epock: rapid analysis of protein pocket dynamics.,"

Summary

The volume of an internal protein pocket is fundamental to ligand accessibility. Few programs that compute such volumes manage dynamic data from molecular dynamics (MD) simulations. Limited performance often prohibits analysis of large datasets. We present Epock, an efficient command-line tool that calculates pocket volumes from MD trajectories. A plugin for the VMD program provides a graphical user interface to facilitate input creation, run Epock and analyse the results.

Availability and implementation

Epock C++ source code, Python analysis scripts, VMD Tcl plugin, documentation and installation instructions are freely available at http://epock.bitbucket.org.

Contact

benoist.laurent@gmail.com or baaden@smplinux.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-12 +25900069,NMRFAM-SDF: a protein structure determination framework.,"The computationally demanding nature of automated NMR structure determination necessitates a delicate balancing of factors that include the time complexity of data collection, the computational complexity of chemical shift assignments, and selection of proper optimization steps. During the past two decades the computational and algorithmic aspects of several discrete steps of the process have been addressed. Although no single comprehensive solution has emerged, the incorporation of a validation protocol has gained recognition as a necessary step for a robust automated approach. The need for validation becomes even more pronounced in cases of proteins with higher structural complexity, where potentially larger errors generated at each step can propagate and accumulate in the process of structure calculation, thereby significantly degrading the efficacy of any software framework. This paper introduces a complete framework for protein structure determination with NMR--from data acquisition to the structure determination. The aim is twofold: to simplify the structure determination process for non-NMR experts whenever feasible, while maintaining flexibility by providing a set of modules that validate each step, and to enable the assessment of error propagations. This framework, called NMRFAM-SDF (NMRFAM-Structure Determination Framework), and its various components are available for download from the NMRFAM website (http://nmrfam.wisc.edu/software.htm).",2015-04-22 +27583132,An open RNA-Seq data analysis pipeline tutorial with an example of reprocessing data from a recent Zika virus study.,"RNA-seq analysis is becoming a standard method for global gene expression profiling. However, open and standard pipelines to perform RNA-seq analysis by non-experts remain challenging due to the large size of the raw data files and the hardware requirements for running the alignment step. Here we introduce a reproducible open source RNA-seq pipeline delivered as an IPython notebook and a Docker image. The pipeline uses state-of-the-art tools and can run on various platforms with minimal configuration overhead. The pipeline enables the extraction of knowledge from typical RNA-seq studies by generating interactive principal component analysis (PCA) and hierarchical clustering (HC) plots, performing enrichment analyses against over 90 gene set libraries, and obtaining lists of small molecules that are predicted to either mimic or reverse the observed changes in mRNA expression. We apply the pipeline to a recently published RNA-seq dataset collected from human neuronal progenitors infected with the Zika virus (ZIKV). In addition to confirming the presence of cell cycle genes among the genes that are downregulated by ZIKV, our analysis uncovers significant overlap with upregulated genes that when knocked out in mice induce defects in brain morphology. This result potentially points to the molecular processes associated with the microcephaly phenotype observed in newborns from pregnant mothers infected with the virus. In addition, our analysis predicts small molecules that can either mimic or reverse the expression changes induced by ZIKV. The IPython notebook and Docker image are freely available at:  http://nbviewer.jupyter.org/github/maayanlab/Zika-RNAseq-Pipeline/blob/master/Zika.ipynb and  https://hub.docker.com/r/maayanlab/zika/.",2016-07-05 +26253334,"Two unusual hepatitis C virus subtypes, 2j and 2q, in Spain: Identification by nested-PCR and sequencing of a NS5B region.","Many studies have reported the use of the NS5B gene to subtype hepatitis C virus (HCV). Other HCV genes, such as HCV-5' UTR, Core (C) and E1, have also been used. In some studies, NS5B have been used together with 5'-UTR or C genes to improve genotyping results obtained using commercial procedures. Only two studies in Spain have compared molecular techniques versus commercial procedures regarding the efficacy of HCV subtyping. The aim of this study was to determine whether nested PCR and sequencing of a NS5B region was more reliable than commercial procedures to subtype HCV. We analyzed the results of HCV genotyping in [726] serum specimens collected from 2001 to 2013. From 2001 to 2011, we used PCR and INNO-LiPA hybridization or its new version Versant HCV Genotype 2.0 assay (471 samples). From 2012 to 2013, we used nested PCR and sequencing of a NS5B region (255 cases). This method used two pairs of primers to amplify the RNA of the sample converted to DNA by retrotranscription. The amplification product of 270 base pairs was further sequenced. To identify the subtype, the sequences obtained were compared to those in the international database: http://hcv.lanl.gov./content/sequence/, HCV/ToolsOutline.html and Geno2pheno[hcv] http://hcv.bioinf.mpi-inf.mpg.de/index.php. Nested PCR of a NS5B region and sequencing identified all but one subtype (0.4%, 1/255), differentiated all 1a subtypes from 1b subtypes, and characterized all HCV 2-4 subtypes. This approach also distinguished two subtypes, 2j and 2q, that had rarely been detected previously in Spain. However, commercial procedures failed to subtype 12.7% (60/471) of samples and to genotype 0.6% of specimens (3/471). Nested PCR and sequencing of a NS5B region improved the subtyping of HCV in comparison with classical procedures and identified two rare subtypes in Spain: 2j and 2q. However, full length genome sequencing is recommended to confirm HCV 2j and 2q subtypes.",2015-08-05 +27307645,BioASF: a framework for automatically generating executable pathway models specified in BioPAX.,"

Motivation

Biological pathways play a key role in most cellular functions. To better understand these functions, diverse computational and cell biology researchers use biological pathway data for various analysis and modeling purposes. For specifying these biological pathways, a community of researchers has defined BioPAX and provided various tools for creating, validating and visualizing BioPAX models. However, a generic software framework for simulating BioPAX models is missing. Here, we attempt to fill this gap by introducing a generic simulation framework for BioPAX. The framework explicitly separates the execution model from the model structure as provided by BioPAX, with the advantage that the modelling process becomes more reproducible and intrinsically more modular; this ensures natural biological constraints are satisfied upon execution. The framework is based on the principles of discrete event systems and multi-agent systems, and is capable of automatically generating a hierarchical multi-agent system for a given BioPAX model.

Results

To demonstrate the applicability of the framework, we simulated two types of biological network models: a gene regulatory network modeling the haematopoietic stem cell regulators and a signal transduction network modeling the Wnt/β-catenin signaling pathway. We observed that the results of the simulations performed using our framework were entirely consistent with the simulation results reported by the researchers who developed the original models in a proprietary language.

Availability and implementation

The framework, implemented in Java, is open source and its source code, documentation and tutorial are available at http://www.ibi.vu.nl/programs/BioASF CONTACT: j.heringa@vu.nl.",2016-06-01 +27282848,Effect of Gender on Outcomes After Cardiac Resynchronization Therapy in Patients With a Narrow QRS Complex: A Subgroup Analysis of the EchoCRT Trial. ,"In EchoCRT, a randomized controlled trial evaluating the effect of cardiac resynchronization therapy (CRT) in patients with a QRS duration of <130 ms and echocardiographic evidence of left ventricular dyssynchrony, the primary outcome (death from any cause or first hospitalization for worsening heart failure) occurred more frequently in the CRT-ON when compared with the control group. In this prespecified subgroup analysis, we evaluated the effect of sex on clinical outcome in EchoCRT. In EchoCRT, 585 (72%) of included patients were men. At baseline, male patients had a higher incidence of ischemic cardiomyopathy and longer QRS duration. On uni- and multivariable analysis, no significant interaction was observed regarding sex for the primary or any of the secondary end points. Numerically, a higher all-cause mortality was observed in male patients randomized to CRT-ON versus CRT-OFF on univariable analysis (hazard ratio, 1.83; 95% confidence interval, 1.08-3.12); however, no statistically significant interaction compared with females randomized to CRT-ON versus CRT-OFF was noted (hazard ratio, 0.99; P interaction, 0.56). There was no difference in the primary safety end point of system-related complications, including CRT system- and implantation-related events. The largest hazard for all-cause mortality in EchoCRT was observed in men randomized to CRT-ON; the comparison with women did not reach statistical significance, which may be because of the premature termination of the trial and the limited data. These results suggest that male sex may be a risk factor for harm by CRT in patients with narrow QRS width, an observation which deserves further investigation. URL: https://clinicaltrials.gov. Unique identifier: NCT00683696.",2016-06-01 +27378296,Application of the MAFFT sequence alignment program to large data-reexamination of the usefulness of chained guide trees.,"

Motivation

Large multiple sequence alignments (MSAs), consisting of thousands of sequences, are becoming more and more common, due to advances in sequencing technologies. The MAFFT MSA program has several options for building large MSAs, but their performances have not been sufficiently assessed yet, because realistic benchmarking of large MSAs has been difficult. Recently, such assessments have been made possible through the HomFam and ContTest benchmark protein datasets. Along with the development of these datasets, an interesting theory was proposed: chained guide trees increase the accuracy of MSAs of structurally conserved regions. This theory challenges the basis of progressive alignment methods and needs to be examined by being compared with other known methods including computationally intensive ones.

Results

We used HomFam, ContTest and OXFam (an extended version of OXBench) to evaluate several methods enabled in MAFFT: (1) a progressive method with approximate guide trees, (2) a progressive method with chained guide trees, (3) a combination of an iterative refinement method and a progressive method and (4) a less approximate progressive method that uses a rigorous guide tree and consistency score. Other programs, Clustal Omega and UPP, available for large MSAs, were also included into the comparison. The effect of method 2 (chained guide trees) was positive in ContTest but negative in HomFam and OXFam. Methods 3 and 4 increased the benchmark scores more consistently than method 2 for the three datasets, suggesting that they are safer to use.

Availability and implementation

http://mafft.cbrc.jp/alignment/software/ CONTACT: katoh@ifrec.osaka-u.ac.jpSupplementary information: Supplementary data are available at Bioinformatics online.",2016-07-04 +21880229,Creation of the Web-based University of Chicago Monogenic Diabetes Registry: using technology to facilitate longitudinal study of rare subtypes of diabetes.,"

Background

Monogenic diabetes is a group of disorders caused by mutations in any one of a number of genes. Although a monogenic diagnosis--estimated to represent as much as 2% of all diabetes patients--can have a transformational impact on treatment, the majority of monogenic cases remain unidentified and little is known about their natural history. We thus created the first United States Monogenic Diabetes Registry (http://www.kovlerdiabetescenter.org/registry/) for individuals with either neonatal diabetes diagnosed before 1 year of age or with a phenotype suggestive of maturity-onset diabetes of the young.

Methods

Inclusion criteria and consent documents are viewable on our Web site, which allows secure collection of contact information to facilitate telephone consent and enrollment. Relevant medical, family, and historical data are collected longitudinally from a variety of sources and stored in our Web-accessible secure database.

Results

We have enrolled well over 700 subjects in the registry so far, with steady recruitment of those diagnosed under 1 year of age and increasing enrollment of those diagnosed later in life. Initially, participants were mostly self-referred but are increasingly being referred by their physicians. Comprehensive survey and medical records data are collected at enrollment, with ongoing collection of longitudinal data. Associated private Facebook and email discussion groups that we established have already fostered active participation.

Conclusions

Our early success with the Monogenic Diabetes Registry demonstrates the effectiveness of low-cost Web-based tools, including surveys, the Research Electronic Data Capture database program, and discussion groups, for efficient enrollment and support of rare patients, and collection and maintenance of their data.",2011-07-01 +21609792,SPIRE: Systematic protein investigative research environment.,"The SPIRE (Systematic Protein Investigative Research Environment) provides web-based experiment-specific mass spectrometry (MS) proteomics analysis (https://www.proteinspire.org). Its emphasis is on usability and integration of the best analytic tools. SPIRE provides an easy to use web-interface and generates results in both interactive and simple data formats. In contrast to run-based approaches, SPIRE conducts the analysis based on the experimental design. It employs novel methods to generate false discovery rates and local false discovery rates (FDR, LFDR) and integrates the best and complementary open-source search and data analysis methods. The SPIRE approach of integrating X!Tandem, OMSSA and SpectraST can produce an increase in protein IDs (52-88%) over current combinations of scoring and single search engines while also providing accurate multi-faceted error estimation. One of SPIRE's primary assets is combining the results with data on protein function, pathways and protein expression from model organisms. We demonstrate some of SPIRE's capabilities by analyzing mitochondrial proteins from the wild type and 3 mutants of C. elegans. SPIRE also connects results to publically available proteomics data through its Model Organism Protein Expression Database (MOPED). SPIRE can also provide analysis and annotation for user supplied protein ID and expression data.",2011-05-13 +21896510,Robust relative compression of genomes with random access.,"

Motivation

Storing, transferring and maintaining genomic databases becomes a major challenge because of the rapid technology progress in DNA sequencing and correspondingly growing pace at which the sequencing data are being produced. Efficient compression, with support for extraction of arbitrary snippets of any sequence, is the key to maintaining those huge amounts of data.

Results

We present an LZ77-style compression scheme for relative compression of multiple genomes of the same species. While the solution bears similarity to known algorithms, it offers significantly higher compression ratios at compression speed over an order of magnitude greater. In particular, 69 differentially encoded human genomes are compressed over 400 times at fast compression, or even 1000 times at slower compression (the reference genome itself needs much more space). Adding fast random access to text snippets decreases the ratio to ~300.

Availability

GDC is available at http://sun.aei.polsl.pl/gdc.

Contact

sebastian.deorowicz@polsl.pl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-05 +26247924,"A Database of Force-Field Parameters, Dynamics, and Properties of Antimicrobial Compounds.","We present an on-line database of all-atom force-field parameters and molecular properties of compounds with antimicrobial activity (mostly antibiotics and some beta-lactamase inhibitors). For each compound, we provide the General Amber Force Field parameters for the major species at physiological pH, together with an analysis of properties of interest as extracted from µs-long molecular dynamics simulations in explicit water solution. The properties include number and population of structural clusters, molecular flexibility, hydrophobic and hydrophilic molecular surfaces, the statistics of intraand inter-molecular H-bonds, as well as structural and dynamical properties of solvent molecules within first and second solvation shells. In addition, the database contains several key molecular parameters, such as energy of the frontier molecular orbitals, vibrational properties, rotational constants, atomic partial charges and electric dipole moment, computed by Density Functional Theory. The present database (to our knowledge the first extensive one including dynamical properties) is part of a wider project aiming to build-up a database containing structural, physico-chemical and dynamical properties of medicinal compounds using different force-field parameters with increasing level of complexity and reliability. The database is freely accessible at http://www.dsf.unica.it/translocation/db/.",2015-08-03 +24979058,A protocol for RNA methylation differential analysis with MeRIP-Seq data and exomePeak R/Bioconductor package.,"Despite the prevalent studies of DNA/Chromatin related epigenetics, such as, histone modifications and DNA methylation, RNA epigenetics has not drawn deserved attention until a new affinity-based sequencing approach MeRIP-Seq was developed and applied to survey the global mRNA N6-methyladenosine (m(6)A) in mammalian cells. As a marriage of ChIP-Seq and RNA-Seq, MeRIP-Seq has the potential to study the transcriptome-wide distribution of various post-transcriptional RNA modifications. We have previously developed an R/Bioconductor package 'exomePeak' for detecting RNA methylation sites under a specific experimental condition or the identifying the differential RNA methylation sites in a case control study from MeRIP-Seq data. Compared with other relatively well studied data types such as ChIP-Seq and RNA-Seq, the study of MeRIP-Seq data is still at very early stage, and existing protocols are not optimized for dealing with the intrinsic characteristic of MeRIP-Seq data. We therein provide here a detailed and easy-to-use protocol of using exomePeak R/Bioconductor package along with other software programs for analysis of MeRIP-Seq data, which covers raw reads alignment, RNA methylation site detection, motif discovery, differential RNA methylation analysis, and functional analysis. Particularly, the rationales behind each processing step as well as the specific method used, the best practice, and possible alternative strategies are briefly discussed. The exomePeak R/Bioconductor package is freely available from Bioconductor: http://www.bioconductor.org/packages/release/bioc/html/exomePeak.html.",2014-06-27 +22084198,PLEXdb: gene expression resources for plants and plant pathogens.,"PLEXdb (http://www.plexdb.org), in partnership with community databases, supports comparisons of gene expression across multiple plant and pathogen species, promoting individuals and/or consortia to upload genome-scale data sets to contrast them to previously archived data. These analyses facilitate the interpretation of structure, function and regulation of genes in economically important plants. A list of Gene Atlas experiments highlights data sets that give responses across different developmental stages, conditions and tissues. Tools at PLEXdb allow users to perform complex analyses quickly and easily. The Model Genome Interrogator (MGI) tool supports mapping gene lists onto corresponding genes from model plant organisms, including rice and Arabidopsis. MGI predicts homologies, displays gene structures and supporting information for annotated genes and full-length cDNAs. The gene list-processing wizard guides users through PLEXdb functions for creating, analyzing, annotating and managing gene lists. Users can upload their own lists or create them from the output of PLEXdb tools, and then apply diverse higher level analyses, such as ANOVA and clustering. PLEXdb also provides methods for users to track how gene expression changes across many different experiments using the Gene OscilloScope. This tool can identify interesting expression patterns, such as up-regulation under diverse conditions or checking any gene's suitability as a steady-state control.",2011-11-13 +22803829,Pocketcheck: updating the HLA class I peptide specificity roadmap.,"The structural determination of peptide:HLA (human leucocyte antigen) class I complexes by X-ray crystallography has provided valuable information for understanding how peptides bind to individual HLA class I molecules and how this may influence the immune response. We compared 101 crystal structures of 9-mer peptide:HLA class I complexes available in the protein data bank (PDB) by performing a contact analysis using the Contact Map Analysis webserver http://ligin.weizmann.ac.il/cma. An InterSystems Caché 'post-relational' database containing residue position, amino acid (AA) and buried surface that contact a particular peptide position was then created allowing data comparison for all the structures (Pocketcheck). The analysis illustrates that the HLA class I residues 24, 45, 63 and 67 show high contact frequencies to both the p1 and/or p2 position of bound peptides, indicating that they might influence the nature of a peptide anchor. To determine the influence of these residues we utilized soluble HLA technology and mass spectrometry to analyze peptides derived from HLA-B*44:06 since it differs from the previously described allele B*44:02 by seven AA exchanges located in the alpha 1 domain (residues 24, 32, 41, 45, 63, 67 and 80). HLA-B*44:06 features an anchor motif of P or A at p2 and Y or W at the C-terminal. Additionally B*44:06-derived peptides feature an auxiliary anchor motif at p1, comprising D or E. Our results illustrate that structural analysis can provide valuable information to understand allogenicity and provides a further step towards intelligent HLA mismatching.",2012-07-14 +27833732,"The 1st Baltic Osseointegration Academy and Lithuanian University of Health Sciences Consensus Conference 2016. Summary and Consensus Statements: Group I - Peri-Implantitis Aetiology, Risk Factors and Pathogenesis.","

Introduction

The task of Group 1 was to review and update the existing data concerning aetiology, risk factors and pathogenesis of peri-implantitis. Previous history of periodontitis, poor oral hygiene, smoking and presence of general diseases have been considered among the aetiological risk factors for the onset of peri-implant pathologies, while late dental implant failures are commonly associated with peri-implantitis and/or with the application of incorrect biomechanical forces. Special interest was paid to the bone cells dynamics as part of the pathogenesis of peri-implantitis.

Material and methods

The main areas indagated by this group were as follows: influence of smoking, history of periodontitis and general diseases on peri-implantitis development, bio-mechanics of implant loading and its influence on peri-implant bone and cellular dynamics related to the pathogenesis of peri-implantitis. The systematic reviews and/or meta-analyses were registered in PROSPERO, an international prospective register of systematic reviews: http://www.crd.york.ac.uk/PROSPERO/. The literature in the corresponding areas of interest was screened and reported following the PRISMA (Preferred Reporting Item for Systematic Review and Meta-Analysis) Statement: http://www.prisma-statement.org/. Method of preparation of the systematic reviews, based on comprehensive search strategies, was discussed and standardized. The summary of the materials and methods employed by the authors in preparing the systematic reviews and/or meta-analyses is presented in Preface chapter.

Results

The results and conclusions of the review process are presented in the respective papers. One systematic review with meta-analysis, three systematic reviews and one theoretical analysis were performed. The group's general commentaries, consensus statements, clinical recommendations and implications for research are presented in this article.",2016-07-01 +27413037,"Heart Failure With Improved Ejection Fraction: Clinical Characteristics, Correlates of Recovery, and Survival: Results From the Valsartan Heart Failure Trial. ","Heart failure with recovered or improved ejection fraction (HFiEF) has been proposed as a new category of HF. Whether HFiEF is clinically distinct from HF with persistently reduced ejection fraction remains to be validated. Of the 5010 subjects enrolled in the Valsartan Heart Failure Trial (Val-HeFT), 3519 had a baseline left ventricular EF of <35% and a follow-up echocardiographic assessment of EF at 12 months. Of these, 321 (9.1%) patients who had a 12-month EF of >40% constituted the subgroup with HFiEF. EF improved from 28.7±5.6% to 46.5±5.6% in the subgroup with HFiEF and remained reduced (25.2±6.2% and 27.5±7.1%) in the subgroup with HF with reduced ejection fraction. The group with HFiEF had a less severe hemodynamic, biomarker, and neurohormonal profile, and it was treated with a more intense HF medication regimen. Subjects who had higher blood pressure and those treated with a β-blocker or randomized to valsartan had greater odds of being in the HFiEF group, whereas those with an ischemic pathogenesis, a more dilated left ventricle, and a detectable hs-troponin had lower odds of an improvement in EF. Recovery of the EF to >40% was associated with a better survival compared with persistently reduced EF. Our data support HFiEF as a stratum of HF with reduced ejection fraction with a more favorable outcome, which occurs in a minority of patients with HF with reduced ejection fraction who have a lower prevalence of ischemic heart disease, a less severe hemodynamic, biomarker, and neurohormonal profile, and who are treated with a more intense HF medication regimen. URL: http://www.clinicaltrials.gov. Unique identifier: NCT00336336.",2016-07-01 +26357078,Reachability Analysis in Probabilistic Biological Networks.,"Extra-cellular molecules trigger a response inside the cell by initiating a signal at special membrane receptors (i.e., sources), which is then transmitted to reporters (i.e., targets) through various chains of interactions among proteins. Understanding whether such a signal can reach from membrane receptors to reporters is essential in studying the cell response to extra-cellular events. This problem is drastically complicated due to the unreliability of the interaction data. In this paper, we develop a novel method, called PReach (Probabilistic Reachability), that precisely computes the probability that a signal can reach from a given collection of receptors to a given collection of reporters when the underlying signaling network is uncertain. This is a very difficult computational problem with no known polynomial-time solution. PReach represents each uncertain interaction as a bi-variate polynomial. It transforms the reachability problem to a polynomial multiplication problem. We introduce novel polynomial collapsing operators that associate polynomial terms with possible paths between sources and targets as well as the cuts that separate sources from targets. These operators significantly shrink the number of polynomial terms and thus the running time. PReach has much better time complexity than the recent solutions for this problem. Our experimental results on real data sets demonstrate that this improvement leads to orders of magnitude of reduction in the running time over the most recent methods. Availability: All the data sets used, the software implemented and the alignments found in this paper are available at http://bioinformatics.cise.ufl.edu/PReach/.",2015-01-01 +26108102,Applying random forest and subtractive fuzzy c-means clustering techniques for the development of a novel G protein-coupled receptor discrimination method using pseudo amino acid compositions.,"G protein-coupled receptors (GPCRs) constitute the largest superfamily of integral membrane proteins (IMPs) and they tremendously contribute in the flow of information into cells. In this study, the random forest (RF) and the subtractive fuzzy c-means clustering (SBC) methods have been used to determine the importance of input variables and discriminate GPCRs from non-GPCRs using twenty amino acid and fifty pseudo amino acid compositions derived from GPCR sequences. The studied dataset was retrieved from the UniProt/SWISSPROT database and consists of 1000 GPCR and 1000 non-GPCR reviewed sequences. The top ranked RF-SBC-based model discriminates GPCRs and non-GPCRs successfully with the accuracy, sensitivity, specificity and Matthew's coefficient correlation (MCC) rates of 99.15%, 99.60%, 98.70% and 0.983%, respectively. These rates were obtained from averaged values of 5-fold cross validation using only twenty four out of fifty pseudo amino acid composition features. The results show that the proposed RF-SBC-based model outperforms other existing algorithms in terms of the evaluated performance criteria. The webserver for the proposed algorithm is available at http://brcinfo.shinyapps.io/GPCRIden.",2015-08-01 +23247048,MolShaCS: a free and open source tool for ligand similarity identification based on Gaussian descriptors.,"Molecular similarity evaluation is an important step in most drug development strategies, since molecular similarity is usually related to functional similarity. Here, we developed a method based on the Gaussian description of molecular shape and charge distribution for molecular similarity identification. The method was evaluated using the Directory of Useful Decoys (DUD) and a retrospective test. Enrichment factors computed for DUD targets showed that the proposed method performs very well in recognizing molecules with similar physicochemical properties and dissimilar topologies, reaching an average AUC of 0.63 and enrichment factor of 10 at 0.5% of decoys. A retrospective test also showed that nine mineralocorticoid ligands were ranked among the top ten molecules in a search of a database of approved drugs for molecules similar to aldosterone. Altogether, these data show that the Gaussian-based description of molecular shape and charge distribution implemented in the program MolShaCS is an efficient method for molecular similarity identification. The program is publicly available at the address http://www.ifsc.usp.br/biotechmol.",2012-11-17 +27460614,Classifying Schizophrenia Using Multimodal Multivariate Pattern Recognition Analysis: Evaluating the Impact of Individual Clinical Profiles on the Neurodiagnostic Performance.,"Previous studies have shown that structural brain changes are among the best-studied candidate markers for schizophrenia (SZ) along with functional connectivity (FC) alterations of resting-state (RS) patterns. This study aimed to investigate effects of clinical and sociodemographic variables on the classification by applying multivariate pattern analysis (MVPA) to both gray matter (GM) volume and FC measures in patients with SZ and healthy controls (HC). RS and structural magnetic resonance imaging data (sMRI) from 74 HC and 71 SZ patients were obtained from a Mind Research Network COBRE dataset available via COINS (http://coins.mrn.org/dx). We used a MVPA framework using support-vector machines embedded in a repeated, nested cross-validation to generate a multi-modal diagnostic system and evaluate its generalizability. The dependence of neurodiagnostic performance on clinical and sociodemographic variables was evaluated. The RS classifier showed a slightly higher accuracy (70.5%) compared to the structural classifier (69.7%). The combination of sMRI and RS outperformed single MRI modalities classification by reaching 75% accuracy. The RS based moderator analysis revealed that the neurodiagnostic performance was driven by older SZ patients with an earlier illness onset and more pronounced negative symptoms. In contrast, there was no linear relationship between the clinical variables and neuroanatomically derived group membership measures. This study achieved higher accuracy distinguishing HC from SZ patients by fusing 2 imaging modalities. In addition the results of RS based moderator analysis showed that age of patients, as well as their age at the illness onset were the most important clinical features.",2016-07-01 +24764462,On non-detects in qPCR data.,"

Motivation

Quantitative real-time PCR (qPCR) is one of the most widely used methods to measure gene expression. Despite extensive research in qPCR laboratory protocols, normalization and statistical analysis, little attention has been given to qPCR non-detects-those reactions failing to produce a minimum amount of signal.

Results

We show that the common methods of handling qPCR non-detects lead to biased inference. Furthermore, we show that non-detects do not represent data missing completely at random and likely represent missing data occurring not at random. We propose a model of the missing data mechanism and develop a method to directly model non-detects as missing data. Finally, we show that our approach results in a sizeable reduction in bias when estimating both absolute and differential gene expression.

Availability and implementation

The proposed algorithm is implemented in the R package, nondetects. This package also contains the raw data for the three example datasets used in this manuscript. The package is freely available at http://mnmccall.com/software and as part of the Bioconductor project.",2014-04-23 +25928589,ContextMap 2: fast and accurate context-based RNA-seq mapping.,"

Background

Mapping of short sequencing reads is a crucial step in the analysis of RNA sequencing (RNA-seq) data. ContextMap is an RNA-seq mapping algorithm that uses a context-based approach to identify the best alignment for each read and allows parallel mapping against several reference genomes.

Results

In this article, we present ContextMap 2, a new and improved version of ContextMap. Its key novel features are: (i) a plug-in structure that allows easily integrating novel short read alignment programs with improved accuracy and runtime; (ii) context-based identification of insertions and deletions (indels); (iii) mapping of reads spanning an arbitrary number of exons and indels. ContextMap 2 using Bowtie, Bowtie 2 or BWA was evaluated on both simulated and real-life data from the recently published RGASP study.

Conclusions

We show that ContextMap 2 generally combines similar or higher recall compared to other state-of-the-art approaches with significantly higher precision in read placement and junction and indel prediction. Furthermore, runtime was significantly lower than for the best competing approaches. ContextMap 2 is freely available at http://www.bio.ifi.lmu.de/ContextMap .",2015-04-17 +26229585,Improving the Sequence Ontology terminology for genomic variant annotation.,"

Background

The Genome Variant Format (GVF) uses the Sequence Ontology (SO) to enable detailed annotation of sequence variation. The annotation includes SO terms for the type of sequence alteration, the genomic features that are changed and the effect of the alteration. The SO maintains and updates the specification and provides the underlying ontologicial structure.

Methods

A requirements analysis was undertaken to gather terms missing in the SO release at the time, but needed to adequately describe the effects of sequence alteration on a set of variant genomic annotations. We have extended and remodeled the SO to include and define all terms that describe the effect of variation upon reference genomic features in the Ensembl variation databases.

Results

The new terminology was used to annotate the human reference genome with a set of variants from both COSMIC and dbSNP. A GVF file containing 170,853 sequence alterations was generated using the SO terminology to annotate the kinds of alteration, the effect of the alteration and the reference feature changed. There are four kinds of alteration and 24 kinds of effect seen in this dataset. (Ensembl Variation annotates 34 different SO consequence terms: http://www.ensembl.org/info/docs/variation/predicted_data.html).

Conclusions

We explain the updates to the Sequence Ontology to describe the effect of variation on existing reference features. We have provided a set of annotations using this terminology, and the well defined GVF specification. We have also provided a provisional exploration of this large annotation dataset.",2015-07-31 +21777475,GPS-Prot: a web-based visualization platform for integrating host-pathogen interaction data.,"

Background

The increasing availability of HIV-host interaction datasets, including both physical and genetic interactions, has created a need for software tools to integrate and visualize the data. Because these host-pathogen interactions are extensive and interactions between human proteins are found within many different databases, it is difficult to generate integrated HIV-human interaction networks.

Results

We have developed a web-based platform, termed GPS-Prot http://www.gpsprot.org, that allows for facile integration of different HIV interaction data types as well as inclusion of interactions between human proteins derived from publicly-available databases, including MINT, BioGRID and HPRD. The software has the ability to group proteins into functional modules or protein complexes, generating more intuitive network representations and also allows for the uploading of user-generated data.

Conclusions

GPS-Prot is a software tool that allows users to easily create comprehensive and integrated HIV-host networks. A major advantage of this platform compared to other visualization tools is its web-based format, which requires no software installation or data downloads. GPS-Prot allows novice users to quickly generate networks that combine both genetic and protein-protein interactions between HIV and its human host into a single representation. Ultimately, the platform is extendable to other host-pathogen systems.",2011-07-22 +26228684,De-DUFing the DUFs: Deciphering distant evolutionary relationships of Domains of Unknown Function using sensitive homology detection methods.,"

Background

In the post-genomic era where sequences are being determined at a rapid rate, we are highly reliant on computational methods for their tentative biochemical characterization. The Pfam database currently contains 3,786 families corresponding to ""Domains of Unknown Function"" (DUF) or ""Uncharacterized Protein Family"" (UPF), of which 3,087 families have no reported three-dimensional structure, constituting almost one-fourth of the known protein families in search for both structure and function.

Results

We applied a 'computational structural genomics' approach using five state-of-the-art remote similarity detection methods to detect the relationship between uncharacterized DUFs and domain families of known structures. The association with a structural domain family could serve as a start point in elucidating the function of a DUF. Amongst these five methods, searches in SCOP-NrichD database have been applied for the first time. Predictions were classified into high, medium and low- confidence based on the consensus of results from various approaches and also annotated with enzyme and Gene ontology terms. 614 uncharacterized DUFs could be associated with a known structural domain, of which high confidence predictions, involving at least four methods, were made for 54 families. These structure-function relationships for the 614 DUF families can be accessed on-line at http://proline.biochem.iisc.ernet.in/RHD_DUFS/ . For potential enzymes in this set, we assessed their compatibility with the associated fold and performed detailed structural and functional annotation by examining alignments and extent of conservation of functional residues. Detailed discussion is provided for interesting assignments for DUF3050, DUF1636, DUF1572, DUF2092 and DUF659.

Conclusions

This study provides insights into the structure and potential function for nearly 20 % of the DUFs. Use of different computational approaches enables us to reliably recognize distant relationships, especially when they converge to a common assignment because the methods are often complementary. We observe that while pointers to the structural domain can offer the right clues to the function of a protein, recognition of its precise functional role is still 'non-trivial' with many DUF domains conserving only some of the critical residues. It is not clear whether these are functional vestiges or instances involving alternate substrates and interacting partners.",2015-07-31 +22161332,A modular model of the apoptosis machinery.,"

Unlabelled

Using a modular principle of computer hardware as a metaphor, we defined and implemented in the BioUML platform a module concept for biological pathways. BioUML provides a user interface to create modular models and convert them automatically into plain models for further simulations. Using this approach, we created the apoptosis model including 13 modules: death stimuli (TRAIL, CD95L, and TNF-α)-induced activation of caspase-8; survival stimuli (p53, EGF, and NF-κB) regulation; the mitochondria level; cytochrome C- and Smac-induced activation of caspase-3; direct activation of effector caspases by caspase-8 and - 12; PARP and apoptosis execution phase modules. Each module is based on earlier published models and extended by data from the Reactome and TRANSPATH databases. The model ability to simulate the apoptosis-related processes was checked; the modules were validated using experimental data.

Availability

http://www.biouml.org/apoptosis.shtml .",2012-01-01 +25887481,Family-based association analysis: a fast and efficient method of multivariate association analysis with multiple variants.,"

Background

Many disease phenotypes are outcomes of the complicated interplay between multiple genes, and multiple phenotypes are affected by a single or multiple genotypes. Therefore, joint analysis of multiple phenotypes and multiple markers has been considered as an efficient strategy for genome-wide association analysis, and in this work we propose an omnibus family-based association test for the joint analysis of multiple genotypes and multiple phenotypes.

Results

The proposed test can be applied for both quantitative and dichotomous phenotypes, and it is robust under the presence of population substructure, as long as large-scale genomic data is available. Using simulated data, we showed that our method is statistically more efficient than the existing methods, and the practical relevance is illustrated by application of the approach to obesity-related phenotypes.

Conclusions

The proposed method may be more statistically efficient than the existing methods. The application was developed in C++ and is available at the following URL: http://healthstat.snu.ac.kr/software/mfqls/ .",2015-02-15 +21893620,In the clinic. Sickle cell disease.,"This issue provides a clinical overview of sickle cell disease focusing on prevention, diagnosis, treatment, practice improvement, and patient information. Readers can complete the accompanying CME quiz for 1.5 credits. Only ACP members and individual subscribers can access the electronic features of In the Clinic. Non-subscribers who wish to access this issue of In the Clinic can elect ""Pay for View."" Subscribers can receive 1.5 category 1 CME credits by completing the CME quiz that accompanies this issue of In the Clinic. The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians' Information and Education Resource) and MKSAP (Medical Knowledge and Self Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP's Medical Education and Publishing division and with assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult www.acponline.org, http://pier.acponline.org, and other resources referenced within each issue of In the Clinic.",2011-09-01 +25432701,Different approaches for interpretation and reporting of immunohistochemistry analysis results in the bone tissue - a review.,"

Background

Immunohistochemistry (IHC) is a well-established, widely accepted method in both clinical and experimental parts of medical science. It allows receiving valuable information about any process in any tissue, and especially in bone. Each year the amount of data, received by IHC, grows in geometric progression. But the lack of standardization, especially on the post-analytical stage (interpreting and reporting of results), makes the comparison of the results of different studies impossible.

Methods

Comprehensive PubMED literature search with a combination of search words ""immunohistochemistry"" and ""scoring system"" was performed and 773 articles describing IHC results were identified. After further manual analysis 120 articles were selected for detailed evaluation of used approaches.

Results

Six major approaches to the interpretation and presentation of IHC analysis results were identified, analyzed and described.

Conclusions

The overview of the existing approaches in evaluation and interpretation of IHC data, which are provided in the article, can be used in bone tissue research and for either better understanding of existing scoring systems or developing a new one. Standard multiparametric, semiquantitative IHC scoring systems should simplify and clarify the process of interpretation and reporting of received data.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_221.",2014-11-29 +21703007,PlantPhos: using maximal dependence decomposition to identify plant phosphorylation sites with substrate site specificity.,"

Background

Protein phosphorylation catalyzed by kinases plays crucial regulatory roles in intracellular signal transduction. Due to the difficulty in performing high-throughput mass spectrometry-based experiment, there is a desire to predict phosphorylation sites using computational methods. However, previous studies regarding in silico prediction of plant phosphorylation sites lack the consideration of kinase-specific phosphorylation data. Thus, we are motivated to propose a new method that investigates different substrate specificities in plant phosphorylation sites.

Results

Experimentally verified phosphorylation data were extracted from TAIR9-a protein database containing 3006 phosphorylation data from the plant species Arabidopsis thaliana. In an attempt to investigate the various substrate motifs in plant phosphorylation, maximal dependence decomposition (MDD) is employed to cluster a large set of phosphorylation data into subgroups containing significantly conserved motifs. Profile hidden Markov model (HMM) is then applied to learn a predictive model for each subgroup. Cross-validation evaluation on the MDD-clustered HMMs yields an average accuracy of 82.4% for serine, 78.6% for threonine, and 89.0% for tyrosine models. Moreover, independent test results using Arabidopsis thaliana phosphorylation data from UniProtKB/Swiss-Prot show that the proposed models are able to correctly predict 81.4% phosphoserine, 77.1% phosphothreonine, and 83.7% phosphotyrosine sites. Interestingly, several MDD-clustered subgroups are observed to have similar amino acid conservation with the substrate motifs of well-known kinases from Phospho.ELM-a database containing kinase-specific phosphorylation data from multiple organisms.

Conclusions

This work presents a novel method for identifying plant phosphorylation sites with various substrate motifs. Based on cross-validation and independent testing, results show that the MDD-clustered models outperform models trained without using MDD. The proposed method has been implemented as a web-based plant phosphorylation prediction tool, PlantPhos http://csb.cse.yzu.edu.tw/PlantPhos/. Additionally, two case studies have been demonstrated to further evaluate the effectiveness of PlantPhos.",2011-06-26 +26841357,mutation3D: Cancer Gene Prediction Through Atomic Clustering of Coding Variants in the Structural Proteome.,"A new algorithm and Web server, mutation3D (http://mutation3d.org), proposes driver genes in cancer by identifying clusters of amino acid substitutions within tertiary protein structures. We demonstrate the feasibility of using a 3D clustering approach to implicate proteins in cancer based on explorations of single proteins using the mutation3D Web interface. On a large scale, we show that clustering with mutation3D is able to separate functional from nonfunctional mutations by analyzing a combination of 8,869 known inherited disease mutations and 2,004 SNPs overlaid together upon the same sets of crystal structures and homology models. Further, we present a systematic analysis of whole-genome and whole-exome cancer datasets to demonstrate that mutation3D identifies many known cancer genes as well as previously underexplored target genes. The mutation3D Web interface allows users to analyze their own mutation data in a variety of popular formats and provides seamless access to explore mutation clusters derived from over 975,000 somatic mutations reported by 6,811 cancer sequencing studies. The mutation3D Web interface is freely available with all major browsers supported.",2016-02-18 +25882789,ReMo-SNPs: a new software tool for identification of polymorphisms in regions and motifs genome-wide.,"Studies of complex genetic diseases have revealed many risk factors of small effect, but the combined amount of heritability explained is still low. Genome-wide association studies are often underpowered to identify true effects because of the very large number of parallel tests. There is, therefore, a great need to generate data sets that are enriched for those markers that have an increased a priori chance of being functional, such as markers in genomic regions involved in gene regulation. ReMo-SNPs is a computational program developed to aid researchers in the process of selecting functional SNPs for association analyses in user-specified regions and/or motifs genome-wide. The useful feature of automatic selection of genotyped markers in the user-provided material makes the output data ready to be used in a following association study. In this article we describe the program and its functions. We also validate the program by including an example study on three different transcription factors and results from an association study on two psychiatric phenotypes. The flexibility of the ReMo-SNPs program enables the user to study any region or sequence of interest, without limitation to transcription factor binding regions and motifs. The program is freely available at: http://www.neuro.ki.se/ReMo-SNPs/.",2015-04-17 +27099113,Unilateral cochlear implantation in children with a potentially useable contralateral ear.,"Increasingly, children are considered for a unilateral CI, even if the contralateral ear falls outside current audiological guidelines, especially if they are not considered to be reaching their educational potential. The primary aim was to investigate the benefit of unilateral CI in children currently outside UK [National Institute for Health and Care Excellence Technology Appraisal Guidance. 2009. Cochlear implants for children and adults with severe to profound deafness. NICE technology appraisal guidance [TAG166]. Available January 29, 2016 from http://www.nice.org.uk/ta166 ] audiological guidelines in the contralateral ear. The secondary aim was to measure compliance. A retrospective case review with standard demographic data was performed. Forty-seven children were identified as having received a unilateral CI with the contralateral ear falling outside of current UK audiological criteria. These children were allocated to two groups; with hearing between 50 and 70 dB, and 70 and 90 dB at 2 and 4 kHz in the contralateral ear, respectively. Categories of auditory performance (CAP) were assessed. Pre- and post-operative CAP scores demonstrated a statistically significant improvement in auditory perception. We would suggest that assessing candidacy in individual ears and subsequent unilateral CI, has given these children a benefit they may not otherwise have acquired if they only had bilateral hearing aid.",2016-04-01 +27076336,A Cohesive and Integrated Platform for Immunogenicity Prediction.,"In silico methods for immunogenicity prediction mine the enormous quantity of data arising from deciphered genomes and proteomes to identify immunogenic proteins. While high and productive immunogenicity is essential for vaccines, therapeutic proteins and monoclonal antibodies should be minimally immunogenic. Here, we present a cohesive platform for immunogenicity and MHC class I and/or II binding affinity prediction. The platform integrates three quasi-independent modular servers: VaxiJen, EpiJen, and EpiTOP. VaxiJen (http://www.ddg-pharmfac.net/vaxijen) predicts immunogenicity of proteins of different origin; EpiJen (http://www.ddg-pharmfac.net/epijen) predicts peptide binding to MHC class I proteins; and EpiTOP (http://www.ddg-pharmfac.net/epitop) predicts peptide binding to MHC class II proteins. The platform is freely accessible and user-friendly. The protocol for immunogenicity prediction is demonstrated by selecting immunogenic proteins from Mycobacterium tuberculosis and predicting how the peptide epitopes within them bind to MHC class I and class II proteins.",2016-01-01 +24965016,Mass++: A Visualization and Analysis Tool for Mass Spectrometry.,"We have developed Mass++, a plug-in style visualization and analysis tool for mass spectrometry. Its plug-in style enables users to customize it and to develop original functions. Mass++ has several kinds of plug-ins, including rich viewers and analysis methods for proteomics and metabolomics. Plug-ins for supporting vendors' raw data are currently available; hence, Mass++ can read several data formats. Mass++ is both a desktop tool and a software development platform. Original functions can be developed without editing the Mass++ source code. Here, we present this tool's capability to rapidly analyze MS data and develop functions by providing examples of label-free quantitation and implementing plug-ins or scripts. Mass++ is freely available at http://www.first-ms3d.jp/english/ .",2014-07-07 +22876798,ToxAlerts: a Web server of structural alerts for toxic chemicals and compounds with potential adverse reactions.,"The article presents a Web-based platform for collecting and storing toxicological structural alerts from literature and for virtual screening of chemical libraries to flag potentially toxic chemicals and compounds that can cause adverse side effects. An alert is uniquely identified by a SMARTS template, a toxicological endpoint, and a publication where the alert was described. Additionally, the system allows storing complementary information such as name, comments, and mechanism of action, as well as other data. Most importantly, the platform can be easily used for fast virtual screening of large chemical datasets, focused libraries, or newly designed compounds against the toxicological alerts, providing a detailed profile of the chemicals grouped by structural alerts and endpoints. Such a facility can be used for decision making regarding whether a compound should be tested experimentally, validated with available QSAR models, or eliminated from consideration altogether. The alert-based screening can also be helpful for an easier interpretation of more complex QSAR models. The system is publicly accessible and tightly integrated with the Online Chemical Modeling Environment (OCHEM, http://ochem.eu). The system is open and expandable: any registered OCHEM user can introduce new alerts, browse, edit alerts introduced by other users, and virtually screen his/her data sets against all or selected alerts. The user sets being passed through the structural alerts can be used at OCHEM for other typical tasks: exporting in a wide variety of formats, development of QSAR models, additional filtering by other criteria, etc. The database already contains almost 600 structural alerts for such endpoints as mutagenicity, carcinogenicity, skin sensitization, compounds that undergo metabolic activation, and compounds that form reactive metabolites and, thus, can cause adverse reactions. The ToxAlerts platform is accessible on the Web at http://ochem.eu/alerts, and it is constantly growing.",2012-08-10 +26284240,PhenoMeter: A Metabolome Database Search Tool Using Statistical Similarity Matching of Metabolic Phenotypes for High-Confidence Detection of Functional Links.,"This article describes PhenoMeter (PM), a new type of metabolomics database search that accepts metabolite response patterns as queries and searches the MetaPhen database of reference patterns for responses that are statistically significantly similar or inverse for the purposes of detecting functional links. To identify a similarity measure that would detect functional links as reliably as possible, we compared the performance of four statistics in correctly top-matching metabolic phenotypes of Arabidopsis thaliana metabolism mutants affected in different steps of the photorespiration metabolic pathway to reference phenotypes of mutants affected in the same enzymes by independent mutations. The best performing statistic, the PM score, was a function of both Pearson correlation and Fisher's Exact Test of directional overlap. This statistic outperformed Pearson correlation, biweight midcorrelation and Fisher's Exact Test used alone. To demonstrate general applicability, we show that the PM reliably retrieved the most closely functionally linked response in the database when queried with responses to a wide variety of environmental and genetic perturbations. Attempts to match metabolic phenotypes between independent studies were met with varying success and possible reasons for this are discussed. Overall, our results suggest that integration of pattern-based search tools into metabolomics databases will aid functional annotation of newly recorded metabolic phenotypes analogously to the way sequence similarity search algorithms have aided the functional annotation of genes and proteins. PM is freely available at MetabolomeExpress (https://www.metabolome-express.org/phenometer.php).",2015-07-29 +21321022,"The Rat Genome Database curation tool suite: a set of optimized software tools enabling efficient acquisition, organization, and presentation of biological data.","The Rat Genome Database (RGD) is the premier repository of rat genomic and genetic data and currently houses over 40,000 rat gene records as well as human and mouse orthologs, 1771 rat and 1911 human quantitative trait loci (QTLs) and 2209 rat strains. Biological information curated for these data objects includes disease associations, phenotypes, pathways, molecular functions, biological processes and cellular components. A suite of tools has been developed to aid curators in acquiring and validating data objects, assigning nomenclature, attaching biological information to objects and making connections among data types. The software used to assign nomenclature, to create and edit objects and to make annotations to the data objects has been specifically designed to make the curation process as fast and efficient as possible. The user interfaces have been adapted to the work routines of the curators, creating a suite of tools that is intuitive and powerful. Database URL: http://rgd.mcw.edu.",2011-02-14 +27098848,A novel method for identifying polymorphic transposable elements via scanning of high-throughput short reads.,"Identification of polymorphic transposable elements (TEs) is important because TE polymorphism creates genetic diversity and influences the function of genes in the host genome. However, de novo scanning of polymorphic TEs remains a challenge. Here, we report a novel computational method, called PTEMD (polymorphic TEs and their movement detection), for de novo discovery of genome-wide polymorphic TEs. PTEMD searches highly identical sequences using reads supported breakpoint evidences. Using PTEMD, we identified 14 polymorphic TE families (905 sequences) in rice blast fungus Magnaporthe oryzae, and 68 (10,618 sequences) in maize. We validated one polymorphic TE family experimentally, MoTE-1; all MoTE-1 family members are located in different genomic loci in the three tested isolates. We found that 57.1% (8 of 14) of the PTEMD-detected polymorphic TE families in M. oryzae are active. Furthermore, our data indicate that there are more polymorphic DNA transposons in maize than their counterparts of retrotransposons despite the fact that retrotransposons occupy largest fraction of genomic mass. We demonstrated that PTEMD is an effective tool for identifying polymorphic TEs in M. oryzae and maize genomes. PTEMD and the genome-wide polymorphic TEs in M. oryzae and maize are publically available at http://www.kanglab.cn/blast/PTEMD_V1.02.htm.",2016-04-20 +25680210,A generalized dSpliceType framework to detect differential splicing and differential expression events using RNA-Seq.,"Transcriptomes are routinely compared in term of a list of differentially expressed genes followed by functional enrichment analysis. Due to the technology limitations of microarray, the molecular mechanisms of differential expression is poorly understood. Using RNA-seq data, we propose a generalized dSpliceType framework to systematically investigate the synergistic and antagonistic effects of differential splicing and differential expression. We applied the method to two public RNA-seq data sets and compared the transcriptomes between treatment and control conditions. The generalized dSpliceType detects and prioritizes a list of genes that are differentially expressed and/or spliced. In particular, the multivariate dSpliceType is among the fist to utilize sequential dependency of normalized base-wise read coverage signals and capture biological variability among replicates using a multivariate statistical model. We compared dSpliceType with two other methods in terms of five most common types of differential splicing events between two conditions using RNA-Seq. dSpliceType is free, available from http://dsplicetype.sourceforge.net/.",2015-02-11 +24131510,Harvest: an open platform for developing web-based biomedical data discovery and reporting applications.,"Biomedical researchers share a common challenge of making complex data understandable and accessible as they seek inherent relationships between attributes in disparate data types. Data discovery in this context is limited by a lack of query systems that efficiently show relationships between individual variables, but without the need to navigate underlying data models. We have addressed this need by developing Harvest, an open-source framework of modular components, and using it for the rapid development and deployment of custom data discovery software applications. Harvest incorporates visualizations of highly dimensional data in a web-based interface that promotes rapid exploration and export of any type of biomedical information, without exposing researchers to underlying data models. We evaluated Harvest with two cases: clinical data from pediatric cardiology and demonstration data from the OpenMRS project. Harvest's architecture and public open-source code offer a set of rapid application development tools to build data discovery applications for domain-specific biomedical data repositories. All resources, including the OpenMRS demonstration, can be found at http://harvest.research.chop.edu.",2013-10-16 +24618186,"Cohort profile: the lidA Cohort Study-a German Cohort Study on Work, Age, Health and Work Participation.","The lidA Cohort Study (German Cohort Study on Work, Age, Health and Work Participation) was set up to investigate and follow the effects of work and work context on the physical and psychological health of the ageing workforce in Germany and subsequently on work participation. Cohort participants are initially employed people subject to social security contributions and born in either 1959 (n = 2909) or 1965 (n = 3676). They were personally interviewed in their homes in 2011 and will be visited every 3 years. Data collection comprises socio-demographic data, work and private exposures, work ability, work and work participation attitudes, health, health-related behaviour, personality and attitudinal indicators. Employment biographies are assessed using register data. Subjective health reports and physical strength measures are complemented by health insurance claims data, where permission was given. A conceptual framework has been developed for the lidA Cohort Study within which three confirmatory sub-models assess the interdependencies of work and health considering age, gender and socioeconomic status. The first set of the data will be available to the scientific community by 2015. Access will be given by the Research Data Centre of the German Federal Employment Agency at the Institute for Employment Research (http://fdz.iab.de/en.aspx).",2014-03-11 +22489867,EnzyBase: a novel database for enzybiotic studies.,"

Background

Enzybiotics are becoming increasingly recognized as potential alternative therapies for drug-resistant bacteria. Although only a few enzybiotics are currently well characterized, much information is still missing or is unavailable for researchers. The construction of an enzybiotics database would therefore increase efficiency and convenience in investigating these bioactive proteins and thus help reduce or delay the recent increase in antibiotic resistance.

Description

In the present manuscript, we describe the development of a novel and original database called EnzyBase, which contains 1144 enzybiotics from 216 natural sources. To ensure data quality, we limited the source of information to authoritative public databases and published scientific literature. The interface of EnzyBase is easy to use and allows users to rapidly retrieve data according to their desired search criteria and blast the database for homologous sequences. We also describe examples of database-aided enzybiotics discovery and design.

Conclusion

EnzyBase serves as a unique tool for enzybiotic studies. It has several potential applications, e.g. in silico enzybiotic combination as cocktails, and novel enzybiotic design, in response to continuously emerging drug-resistant pathogens. This database is a valuable platform for researchers who are interested in enzybiotic studies. EnzyBase is available online at http://biotechlab.fudan.edu.cn/database/EnzyBase/home.php.",2012-04-11 +23080122,The IMGT/HLA database.,"It is 14 years since the IMGT/HLA database was first released, providing the HLA community with a searchable repository of highly curated HLA sequences. The HLA complex is located within the 6p21.3 region of human chromosome 6 and contains more than 220 genes of diverse function. Of these, 21 genes encode proteins of the immune system that are highly polymorphic. The naming of these HLA genes and alleles and their quality control is the responsibility of the World Health Organization Nomenclature Committee for Factors of the HLA System. Through the work of the HLA Informatics Group and in collaboration with the European Bioinformatics Institute, we are able to provide public access to these data through the website http://www.ebi.ac.uk/imgt/hla/. Regular updates to the website ensure that new and confirmatory sequences are dispersed to the HLA community and the wider research and clinical communities. This article describes the latest updates and additional tools added to the IMGT/HLA project.",2012-10-17 +21749710,Algal Functional Annotation Tool: a web-based analysis suite to functionally interpret large gene lists using integrated annotation and expression data.,"

Background

Progress in genome sequencing is proceeding at an exponential pace, and several new algal genomes are becoming available every year. One of the challenges facing the community is the association of protein sequences encoded in the genomes with biological function. While most genome assembly projects generate annotations for predicted protein sequences, they are usually limited and integrate functional terms from a limited number of databases. Another challenge is the use of annotations to interpret large lists of 'interesting' genes generated by genome-scale datasets. Previously, these gene lists had to be analyzed across several independent biological databases, often on a gene-by-gene basis. In contrast, several annotation databases, such as DAVID, integrate data from multiple functional databases and reveal underlying biological themes of large gene lists. While several such databases have been constructed for animals, none is currently available for the study of algae. Due to renewed interest in algae as potential sources of biofuels and the emergence of multiple algal genome sequences, a significant need has arisen for such a database to process the growing compendiums of algal genomic data.

Description

The Algal Functional Annotation Tool is a web-based comprehensive analysis suite integrating annotation data from several pathway, ontology, and protein family databases. The current version provides annotation for the model alga Chlamydomonas reinhardtii, and in the future will include additional genomes. The site allows users to interpret large gene lists by identifying associated functional terms, and their enrichment. Additionally, expression data for several experimental conditions were compiled and analyzed to provide an expression-based enrichment search. A tool to search for functionally-related genes based on gene expression across these conditions is also provided. Other features include dynamic visualization of genes on KEGG pathway maps and batch gene identifier conversion.

Conclusions

The Algal Functional Annotation Tool aims to provide an integrated data-mining environment for algal genomics by combining data from multiple annotation databases into a centralized tool. This site is designed to expedite the process of functional annotation and the interpretation of gene lists, such as those derived from high-throughput RNA-seq experiments. The tool is publicly available at http://pathways.mcdb.ucla.edu.",2011-07-12 +25463038,Drug discovery FAQs: workflows for answering multidomain drug discovery questions.,"Modern data-driven drug discovery requires integrated resources to support decision-making and enable new discoveries. The Open PHACTS Discovery Platform (http://dev.openphacts.org) was built to address this requirement by focusing on drug discovery questions that are of high priority to the pharmaceutical industry. Although complex, most of these frequently asked questions (FAQs) revolve around the combination of data concerning compounds, targets, pathways and diseases. Computational drug discovery using workflow tools and the integrated resources of Open PHACTS can deliver answers to most of these questions. Here, we report on a selection of workflows used for solving these use cases and discuss some of the research challenges. The workflows are accessible online from myExperiment (http://www.myexperiment.org) and are available for reuse by the scientific community.",2014-11-20 +26716705,Correlated confocal and super-resolution imaging by VividSTORM.,"Single-molecule localization microscopy (SMLM) is rapidly gaining popularity in the life sciences as an efficient approach to visualize molecular distribution with nanoscale precision. However, it has been challenging to obtain and analyze such data within a cellular context in tissue preparations. Here we describe a 5-d tissue processing and immunostaining procedure that is optimized for SMLM, and we provide example applications to fixed mouse brain, heart and kidney tissues. We then describe how to perform correlated confocal and 3D-superresolution imaging on these sections, which allows the visualization of nanoscale protein localization within labeled subcellular compartments of identified target cells in a few minutes. Finally, we describe the use of VividSTORM (http://katonalab.hu/index.php/vividstorm), an open-source software for correlated confocal and SMLM image analysis, which facilitates the measurement of molecular abundance, clustering, internalization, surface density and intermolecular distances in a cell-specific and subcellular compartment-restricted manner. The protocol requires only basic skills in tissue staining and microscopy.",2015-12-30 +24924268,Quality assessment of data discrimination using self-organizing maps.,"

Motivation

One of the important aspects of the data classification problem lies in making the most appropriate selection of features. The set of variables should be small and, at the same time, should provide reliable discrimination of the classes. The method for the discriminating power evaluation that enables a comparison between different sets of variables will be useful in the search for the set of variables.

Results

A new approach to feature selection is presented. Two methods of evaluation of the data discriminating power of a feature set are suggested. Both of the methods implement self-organizing maps (SOMs) and the newly introduced exponents of the degree of data clusterization on the SOM. The first method is based on the comparison of intraclass and interclass distances on the map. Another method concerns the evaluation of the relative number of best matching unit's (BMUs) nearest neighbors of the same class. Both methods make it possible to evaluate the discriminating power of a feature set in cases when this set provides nonlinear discrimination of the classes.

Availability

Current algorithms in program code can be downloaded for free at http://mekler.narod.ru/Science/Articles_support.html, as well as the supporting data files.",2014-06-09 +27514893,A systematic review on the global occurrence of Taenia hydatigena in pigs and cattle.,"Taenia hydatigena, a non-zoonotic tapeworm species shares the same intermediate hosts with other Taenia zoonotic species, such as Taenia solium in pigs and Taenia saginata in cattle. The occurrence of T. hydatigena in pigs and cattle may cause cross-reactions in immunodiagnostic tests and therefore, complicate the diagnosis of the zoonotic species. This study was conducted to systematically review the data on the prevalence of T. hydatigena in pigs and cattle, with the aim to assess the potential interference in serological diagnosis of zoonotic Taenia spp. due to T. hydatigena infection. We searched PubMed, Web of Science, Africa Journal Online, website http://www.google.com and article reference lists in English, French and Vietnamese with no restriction on research time and publication status. Eligible studies included observational studies that showed the occurrence of T. hydatigena. Twenty-six studies, divided into two animal groups, i.e. pigs and cattle, met the eligibility criteria for qualitative synthesis and 17 studies were included for the meta-analysis in three continents. T. hydatigena was found by necropsy in all included studies, which mostly were abattoir surveys. Overall, results showed the worldwide occurrence of T. hydatigena cysticercosis in pigs and cattle. In pigs, there was a marked higher prevalence in Asia and South America that was 17.2% (95% CI: 10.6-26.8%) and 27.5% (CI: 20.8-35.3%), respectively, compared to a low prevalence of 3.9% (95% CI: 1.9-7.9%) in Africa. Overall, the prevalence of T. hydatigena in cattle was low with a mean of 1.1% (95% CI: 0.2-5.2%). These results show that interpretation of results of sero-diagnostic tests for zoonotic Taenia species in pigs and cattle has to take into account the prevalence of T. hydatigena infections in different settings.",2016-06-25 +22689752,Identifying disease sensitive and quantitative trait-relevant biomarkers from multidimensional heterogeneous imaging genetics data via sparse multimodal multitask learning.,"

Motivation

Recent advances in brain imaging and high-throughput genotyping techniques enable new approaches to study the influence of genetic and anatomical variations on brain functions and disorders. Traditional association studies typically perform independent and pairwise analysis among neuroimaging measures, cognitive scores and disease status, and ignore the important underlying interacting relationships between these units.

Results

To overcome this limitation, in this article, we propose a new sparse multimodal multitask learning method to reveal complex relationships from gene to brain to symptom. Our main contributions are three-fold: (i) introducing combined structured sparsity regularizations into multimodal multitask learning to integrate multidimensional heterogeneous imaging genetics data and identify multimodal biomarkers; (ii) utilizing a joint classification and regression learning model to identify disease-sensitive and cognition-relevant biomarkers; (iii) deriving a new efficient optimization algorithm to solve our non-smooth objective function and providing rigorous theoretical analysis on the global optimum convergency. Using the imaging genetics data from the Alzheimer's Disease Neuroimaging Initiative database, the effectiveness of the proposed method is demonstrated by clearly improved performance on predicting both cognitive scores and disease status. The identified multimodal biomarkers could predict not only disease status but also cognitive function to help elucidate the biological pathway from gene to brain structure and function, and to cognition and disease.

Availability

Software is publicly available at: http://ranger.uta.edu/%7eheng/multimodal/.",2012-06-01 +26412344,A tool to make reporting checklists work.,"Although the use of reporting guidelines has been demonstrated to increase the completeness and transparency of health research published in journals, there is still a long way to translate their use to the authors at the time where they are needed - during the actual research process and manuscript writing. An online tool for writing methodology section of a randomized controlled trial has been successfully tested in an experimental setting and provides a direction for the development of writing tools for health research. Writing tools should not replace original thinking and the excitement of communicating original discoveries, but make sure that all relevant data are in the manuscript so that research results can be understood, critically evaluated and used in practice. Please see related article: http://www.biomedcentral.com/1741-7015/13/221.",2015-09-28 +23732273,SPANNER: taxonomic assignment of sequences using pyramid matching of similarity profiles.,"

Background

Homology-based taxonomic assignment is impeded by differences between the unassigned read and reference database, forcing a rank-specific classification to the closest (and possibly incorrect) reference lineage. This assignment may be correct only to a general rank (e.g. order) and incorrect below that rank (e.g. family and genus). Algorithms like LCA avoid this by varying the predicted taxonomic rank based on matches to a set of taxonomic references. LCA and related approaches can be conservative, especially if best matches are taxonomically widespread because of events such as lateral gene transfer (LGT).

Results

Our extension to LCA called SPANNER (similarity profile annotater) uses the set of best homology matches (the LCA Profile) for a given sequence and compares this profile with a set of profiles inferred from taxonomic reference organisms. SPANNER provides an assignment that is less sensitive to LGT and other confounding phenomena. In a series of trials on real and artificial datasets, SPANNER outperformed LCA-style algorithms in terms of taxonomic precision and outperformed best BLAST at certain levels of taxonomic novelty in the dataset. We identify examples where LCA made an overly conservative prediction, but SPANNER produced a more precise and correct prediction.

Conclusions

By using profiles of homology matches to represent patterns of genomic similarity that arise because of vertical and lateral inheritance, SPANNER offers an effective compromise between taxonomic assignment based on best BLAST scores, and the conservative approach of LCA and similar approaches.

Availability

C++ source code and binaries are freely available at http://kiwi.cs.dal.ca/Software/SPANNER.

Contact

beiko@cs.dal.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-06-03 +22371334,"Long-term trends in food availability, food prices, and obesity in Samoa.","

Objectives

To describe long-term food availability and prices from 1961 to 2007 and body mass index (BMI) trends from 1980 to 2010 in Samoa, and to contextualize these trends within political, economic, cultural, behavioral, and climatic influences.

Methods

National level data on food availability and pricing were obtained from the open access database FAO (http://faostat.fao.org). Data for Samoa were collected from annual food balance sheets available for the period 1961-2007. Mean BMI for Samoan men and women aged 35-44 years of age is reported from four different time periods, 1979-1982, 1991, 2003, and 2010.

Results

Total energy availability increased substantially, by 47%, with more than 900 extra calories available per capita per day in 2007 than in 1961. Many of these extra calories are supplied by dietary fat, the availability of which rose by a proportionally greater amount, 73%. Availability of both meat and vegetable oils rose substantially. Poultry meat increased the most proportionally, from 10 to 117 kcal per capita per day. Coconut products, fruits, and starchy root crops-all locally grown-showed little to no increase over this time. As import prices for poultry and mutton increased their availability decreased, but the availability of vegetable oils rose despite a rise in their price. Mean BMI for men and women aged 35-44 years rose 18% rise from 1980 to 2010.

Conclusions

These long-term trends in food availability and prices, and the temporal pattern of BMI provide national level data for understanding the process of the nutritional transition in Samoa. Further work on consumer food prices, diet, food security, and health is needed to further contextualize the transformation of the local food system in Samoa.",2012-02-27 +27354692,Accurate in silico prediction of species-specific methylation sites based on information gain feature optimization.,"As one of the most important reversible types of post-translational modification, protein methylation catalyzed by methyltransferases carries many pivotal biological functions as well as many essential biological processes. Identification of methylation sites is prerequisite for decoding methylation regulatory networks in living cells and understanding their physiological roles. Experimental methods are limitations of labor-intensive and time-consuming. While in silicon approaches are cost-effective and high-throughput manner to predict potential methylation sites, but those previous predictors only have a mixed model and their prediction performances are not fully satisfactory now. Recently, with increasing availability of quantitative methylation datasets in diverse species (especially in eukaryotes), there is a growing need to develop a species-specific predictor. Here, we designed a tool named PSSMe based on information gain (IG) feature optimization method for species-specific methylation site prediction. The IG method was adopted to analyze the importance and contribution of each feature, then select the valuable dimension feature vectors to reconstitute a new orderly feature, which was applied to build the finally prediction model. Finally, our method improves prediction performance of accuracy about 15% comparing with single features. Furthermore, our species-specific model significantly improves the predictive performance compare with other general methylation prediction tools. Hence, our prediction results serve as useful resources to elucidate the mechanism of arginine or lysine methylation and facilitate hypothesis-driven experimental design and validation.

Availability and implementation

The tool online service is implemented by C# language and freely available at http://bioinfo.ncu.edu.cn/PSSMe.aspx CONTACT: jdqiu@ncu.edu.cnSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-26 +22945945,GenScalpel: an application for sequence retrieval and extraction from the GenBank flatfile.,"GenScalpel is a program designed for the retrieval and extraction of specified sequences from large-scale sequence sets in NCBI GenBank flatfile format. This routine task in bioinformatics analysis is a pressing need for laboratory biologists. Another objective of application development is to respond to the new form of the NCBI Nucleotide Sequence Database, which was updated in November 2011. In addition to a powerful sequence refinement application, GenScalpel provides convenient functions for web-based sequence downloading or multiple files batch processing. This note discusses major applications of the program and includes example data sets to demonstrate its performance. The program is written in PERL. GenScalpel, including installation packages for Windows and Linux systems as well as the accompanying documentation, are available free of charge at http://genscalpel.biosv.com/.",2012-09-02 +24676893,Hexicon 2: automated processing of hydrogen-deuterium exchange mass spectrometry data with improved deuteration distribution estimation.,"Hydrogen-deuterium exchange (HDX) experiments analyzed by mass spectrometry (MS) provide information about the dynamics and the solvent accessibility of protein backbone amide hydrogen atoms. Continuous improvement of MS instrumentation has contributed to the increasing popularity of this method; however, comprehensive automated data analysis is only beginning to mature. We present Hexicon 2, an automated pipeline for data analysis and visualization based on the previously published program Hexicon (Lou et al. 2010). Hexicon 2 employs the sensitive NITPICK peak detection algorithm of its predecessor in a divide-and-conquer strategy and adds new features, such as chromatogram alignment and improved peptide sequence assignment. The unique feature of deuteration distribution estimation was retained in Hexicon 2 and improved using an iterative deconvolution algorithm that is robust even to noisy data. In addition, Hexicon 2 provides a data browser that facilitates quality control and provides convenient access to common data visualization tasks. Analysis of a benchmark dataset demonstrates superior performance of Hexicon 2 compared with its predecessor in terms of deuteration centroid recovery and deuteration distribution estimation. Hexicon 2 greatly reduces data analysis time compared with manual analysis, whereas the increased number of peptides provides redundant coverage of the entire protein sequence. Hexicon 2 is a standalone application available free of charge under http://hx2.mpimf-heidelberg.mpg.de.",2014-03-28 +22039206,RAPSearch2: a fast and memory-efficient protein similarity search tool for next-generation sequencing data.,"

Summary

With the wide application of next-generation sequencing (NGS) techniques, fast tools for protein similarity search that scale well to large query datasets and large databases are highly desirable. In a previous work, we developed RAPSearch, an algorithm that achieved a ~20-90-fold speedup relative to BLAST while still achieving similar levels of sensitivity for short protein fragments derived from NGS data. RAPSearch, however, requires a substantial memory footprint to identify alignment seeds, due to its use of a suffix array data structure. Here we present RAPSearch2, a new memory-efficient implementation of the RAPSearch algorithm that uses a collision-free hash table to index a similarity search database. The utilization of an optimized data structure further speeds up the similarity search-another 2-3 times. We also implemented multi-threading in RAPSearch2, and the multi-thread modes achieve significant acceleration (e.g. 3.5X for 4-thread mode). RAPSearch2 requires up to 2G memory when running in single thread mode, or up to 3.5G memory when running in 4-thread mode.

Availability and implementation

Implemented in C++, the source code is freely available for download at the RAPSearch2 website: http://omics.informatics.indiana.edu/mg/RAPSearch2/.

Contact

yye@indiana.edu

Supplementary information

Available at the RAPSearch2 website.",2011-10-28 +26208136,Precore/Core Region Mutations in Hepatitis B Virus DNA Predict Postoperative Survival in Hepatocellular Carcinoma.,"Hepatitis B virus (HBV) DNA is prone to mutations because of the proofreading deficiencies of HBV polymerase. We have identified hepatocellular carcinoma (HCC) survival-associated HBV mutations in the X protein region of HBV DNA. In the present study, we extend our research to assess HCC survival-associated HBV mutations in the HBV precore/core (PreC/C) region. The PreC/C region was amplified and sequenced and the HBV mutations were identified according to the NCBI database (http://www.ncbi.nlm.nih.gov/genome/5536). The relationships between the mutations in the PreC/C region and HCC survival were analyzed. Survival curves were generated using the Kaplan-Meier method, and comparisons between the curves were made using the log-rank test. Multivariate survival analysis was performed using a Cox proportional hazards model. After adjusting for clinical characteristics, the 1915, 2134, 2221, 2245 and 2288 mutational sites were identified as statistically significant independent predictors of HCC survival by multivariate survival analysis using a Cox proportional hazards model. In addition, the mutational site of 1896 was identified for its association with survival at a borderline significance level. A total of five mutations in the precore/core region were identified as independent predictors of postoperative survival in HCC patients. The analysis of HBV DNA mutations may help identify patient subgroups with poor prognosis and may help refine therapeutic decisions regarding HCC patients.",2015-07-24 +26411474,Nucleosome positioning: resources and tools online.,"Nucleosome positioning is an important process required for proper genome packing and its accessibility to execute the genetic program in a cell-specific, timely manner. In the recent years hundreds of papers have been devoted to the bioinformatics, physics and biology of nucleosome positioning. The purpose of this review is to cover a practical aspect of this field, namely, to provide a guide to the multitude of nucleosome positioning resources available online. These include almost 300 experimental datasets of genome-wide nucleosome occupancy profiles determined in different cell types and more than 40 computational tools for the analysis of experimental nucleosome positioning data and prediction of intrinsic nucleosome formation probabilities from the DNA sequence. A manually curated, up to date list of these resources will be maintained at http://generegulation.info.",2015-09-26 +27366783,Data set for diet specific differential gene expression analysis in three Spodoptera moths.,"Examination of closely related species pairs is suggested for evolutionary comparisons of different degrees of polyphagy, which we did here with three taxa of lepidopteran herbivores, Spodoptera spp (S. littoralis, S. frugiperda maize (C) and rice (R) strains) for a RNAseq analysis of the midguts from the 3rd instar insect larvae for differential metabolic responses after feeding on pinto bean based artificial diet vs maize leaves. Paired-end (2×100 bp) Illumina HiSeq2500 sequencing resulted in a total of 24, 23, 24, and 21 million reads for the SF-C-Maize, SF-C-Pinto, SF-R-Maize, SF-R Pinto, and a total of 35 and 36 million reads for the SL-Maize and SL-Pinto samples, respectively. After quality control measures, a total of 62.2 million reads from SL and 71.7 million reads from SF were used for transcriptome assembly (TA). The resulting final de novo reference TA (backbone) for the SF taxa contained 37,985 contigs with a N50 contig size of 1030 bp and a maximum contig length of 17,093 bp, while for SL, 28,329 contigs were generated with a N50 contig size of 1980 bp and a maximum contig length of 18,267 bp. The data presented herein contains supporting information related to our research article Roy et al. (2016) http://dx.doi.org/10.1016/j.ibmb.2016.02.006[1].",2016-05-20 +23812986,A context-sensitive framework for the analysis of human signalling pathways in molecular interaction networks.,"

Motivation

A major challenge in systems biology is to reveal the cellular pathways that give rise to specific phenotypes and behaviours. Current techniques often rely on a network representation of molecular interactions, where each node represents a protein or a gene and each interaction is assigned a single static score. However, the use of single interaction scores fails to capture the tendency of proteins to favour different partners under distinct cellular conditions.

Results

Here, we propose a novel context-sensitive network model, in which genes and protein nodes are assigned multiple contexts based on their gene ontology annotations, and their interactions are associated with multiple context-sensitive scores. Using this model, we developed a new approach and a corresponding tool, ContextNet, based on a dynamic programming algorithm for identifying signalling paths linking proteins to their downstream target genes. ContextNet finds high-ranking context-sensitive paths in the interactome, thereby revealing the intermediate proteins in the path and their path-specific contexts. We validated the model using 18 348 manually curated cellular paths derived from the SPIKE database. We next applied our framework to elucidate the responses of human primary lung cells to influenza infection. Top-ranking paths were much more likely to contain infection-related proteins, and this likelihood was highly correlated with path score. Moreover, the contexts assigned by the algorithm pointed to putative, as well as previously known responses to viral infection. Thus, context sensitivity is an important extension to current network biology models and can be efficiently used to elucidate cellular response mechanisms.

Availability

ContextNet is publicly available at http://netbio.bgu.ac.il/ContextNet.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +25725497,IVA: accurate de novo assembly of RNA virus genomes.,"

Motivation

An accurate genome assembly from short read sequencing data is critical for downstream analysis, for example allowing investigation of variants within a sequenced population. However, assembling sequencing data from virus samples, especially RNA viruses, into a genome sequence is challenging due to the combination of viral population diversity and extremely uneven read depth caused by amplification bias in the inevitable reverse transcription and polymerase chain reaction amplification process of current methods.

Results

We developed a new de novo assembler called IVA (Iterative Virus Assembler) designed specifically for read pairs sequenced at highly variable depth from RNA virus samples. We tested IVA on datasets from 140 sequenced samples from human immunodeficiency virus-1 or influenza-virus-infected people and demonstrated that IVA outperforms all other virus de novo assemblers.

Availability and implementation

The software runs under Linux, has the GPLv3 licence and is freely available from http://sanger-pathogens.github.io/iva",2015-02-28 +22224407,Protein secondary structure prediction with SPARROW.,"A first step toward predicting the structure of a protein is to determine its secondary structure. The secondary structure information is generally used as starting point to solve protein crystal structures. In the present study, a machine learning approach based on a complete set of two-class scoring functions was used. Such functions discriminate between two specific structural classes or between a single specific class and the rest. The approach uses a hierarchical scheme of scoring functions and a neural network. The parameters are determined by optimizing the recall of learning data. Quality control is performed by predicting separate independent test data. A first set of scoring functions is trained to correlate the secondary structures of residues with profiles of sequence windows of width 15, centered at these residues. The sequence profiles are obtained by multiple sequence alignment with PSI-BLAST. A second set of scoring functions is trained to correlate the secondary structures of the center residues with the secondary structures of all other residues in the sequence windows used in the first step. Finally, a neural network is trained using the results from the second set of scoring functions as input to make a decision on the secondary structure class of the residue in the center of the sequence window. Here, we consider the three-class problem of helix, strand, and other secondary structures. The corresponding prediction scheme ""SPARROW"" was trained with the ASTRAL40 database, which contains protein domain structures with less than 40% sequence identity. The secondary structures were determined with DSSP. In a loose assignment, the helix class contains all DSSP helix types (α, 3-10, π), the strand class contains β-strand and β-bridge, and the third class contains the other structures. In a tight assignment, the helix and strand classes contain only α-helix and β-strand classes, respectively. A 10-fold cross validation showed less than 0.8% deviation in the fraction of correct structure assignments between true prediction and recall of data used for training. Using sequences of 140,000 residues as a test data set, 80.46% ± 0.35% of secondary structures are predicted correctly in the loose assignment, a prediction performance, which is very close to the best results in the field. Most applications are done with the loose assignment. However, the tight assignment yields 2.25% better prediction performance. With each individual prediction, we also provide a confidence measure providing the probability that the prediction is correct. The SPARROW software can be used and downloaded on the Web page http://agknapp.chemie.fu-berlin.de/sparrow/ .",2012-01-23 +22208852,MotifMap: integrative genome-wide maps of regulatory motif sites for model species.,"

Background

A central challenge of biology is to map and understand gene regulation on a genome-wide scale. For any given genome, only a small fraction of the regulatory elements embedded in the DNA sequence have been characterized, and there is great interest in developing computational methods to systematically map all these elements and understand their relationships. Such computational efforts, however, are significantly hindered by the overwhelming size of non-coding regions and the statistical variability and complex spatial organizations of regulatory elements and interactions. Genome-wide catalogs of regulatory elements for all model species simply do not yet exist.

Results

The MotifMap system uses databases of transcription factor binding motifs, refined genome alignments, and a comparative genomic statistical approach to provide comprehensive maps of candidate regulatory elements encoded in the genomes of model species. The system is used to derive new genome-wide maps for yeast, fly, worm, mouse, and human. The human map contains 519,108 sites for 570 matrices with a False Discovery Rate of 0.1 or less. The new maps are assessed in several ways, for instance using high-throughput experimental ChIP-seq data and AUC statistics, providing strong evidence for their accuracy and coverage. The maps can be usefully integrated with many other kinds of omic data and are available at http://motifmap.igb.uci.edu/.

Conclusions

MotifMap and its integration with other data provide a foundation for analyzing gene regulation on a genome-wide scale, and for automatically generating regulatory pathways and hypotheses. The power of this approach is demonstrated and discussed using the P53 apoptotic pathway and the Gli hedgehog pathways as examples.",2011-12-30 +23725941,The effect of model fidelity on colonoscopic skills acquisition. A randomized controlled study.,"

Introduction

Colonoscopic simulators offer the opportunity for skill acquisition in the preclinical setting. Currently available simulators vary widely with respect to level of fidelity and technological sophistication. Despite the belief that more realistic is better, there is a paucity of evidence regarding the relative effectiveness of simulator fidelity (high vs low) on the acquisition of basic colonoscopic skills. We hypothesized that novice learners can acquire basic colonoscopic skills using simulators, however fidelity of the simulator does not make a difference.

Methods

We randomly assigned novice third-year and fourth-year medical students to practice on either a low-fidelity or high-fidelity colonoscopy model. The low-fidelity model used is described in the module 16 of the American College of Surgeons/Association of Program Directors in Surgery surgical skills curriculum for residents, Phase 1: basic or core skills and tasks < http://elearning.facs.org/mod/resource/view.php?1d=450 >. The high-fidelity model was the AccuTouch colonoscopy simulator, Immersion Medical (AccuTouch CS) that has 6 different simulated scenarios for diagnostic colonoscopy (level 1-6). Both groups had 16 students and were given standard instruction by an expert with respect to the procedure and instrument handling on both models. Both groups were pretested and posttested on level 1 of the AccuTouch CS. The high-fidelity group practiced on level 2 and 4 of the AccuTouch CS, whereas the low-fidelity group practiced on the low-fidelity model for 2 sessions of 1 hour each. The computer-based evaluation parameters available on the AccuTouch CS were used to compare performances.

Results

Both groups had similar demographics. There were no significant differences in the baseline performances of either group. Each group demonstrated significant improvement for insertion time and percentage of mucosa visualized. However, there were no significant differences between the groups on posttesting on any of the measured parameters.

Conclusions

Colonoscopic skill training on a low-fidelity model appears to be as effective as high-fidelity model training for basic endoscopic skill acquisition for novice learners.",2013-04-16 +26202118,Development of a Multilocus Sequence Typing Scheme for Molecular Typing of Mycoplasma pneumoniae.,"Mycoplasma pneumoniae is a major human respiratory pathogen causing both upper and lower respiratory disease in humans of all ages, and it can also result in other serious extrapulmonary sequelae. A multilocus sequence typing (MLST) scheme for M. pneumoniae was developed based on the sequences of eight housekeeping genes (ppa, pgm, gyrB, gmk, glyA, atpA, arcC, and adk) and applied to 55 M. pneumoniae clinical isolates and the two type strains M129 and FH. A total of 12 sequence types (STs) resulted for 57 M. pneumoniae isolates tested, with a discriminatory index of 0.21 STs per isolate. The MLST loci used in this scheme were shown to be stable in 10 strains following 10 sequential subculture passages. Phylogenetic analysis of concatenated sequences of the eight loci indicated two distinct genetic clusters that were directly linked to multilocus variable-number tandem repeat analysis (MLVA) type. Genetic MLST clustering was confirmed by genomic sequence analysis, indicating that the MLST scheme developed in this study is representative of the genome. Furthermore, this MLST scheme was shown to be more discriminatory than both MLVA and P1 typing for the M. pneumoniae isolates examined, providing a method for further and more detailed analysis of observed epidemic peaks of M. pneumoniae infection. This scheme is supported by a public Web-based database (http://pubmlst.org/mpneumoniae).",2015-07-22 +24489733,Accurate data processing improves the reliability of Affymetrix gene expression profiles from FFPE samples.,"Formalin fixed paraffin-embedded (FFPE) tumor specimens are the conventionally archived material in clinical practice, representing an invaluable tissue source for biomarkers development, validation and routine implementation. For many prospective clinical trials, this material has been collected allowing for a prospective-retrospective study design which represents a successful strategy to define clinical utility for candidate markers. Gene expression data can be obtained even from FFPE specimens with the broadly used Affymetrix HG-U133 Plus 2.0 microarray platform. Nevertheless, important major discrepancies remain in expression data obtained from FFPE compared to fresh-frozen samples, prompting the need for appropriate data processing which could help to obtain more consistent results in downstream analyses. In a publicly available dataset of matched frozen and FFPE expression data, the performances of different normalization methods and specifically designed Chip Description Files (CDFs) were compared. The use of an alternative CDFs together with fRMA normalization significantly improved frozen-FFPE sample correlations, frozen-FFPE probeset correlations and agreement of differential analysis between different tumor subtypes. The relevance of our optimized data processing was assessed and validated using two independent datasets. In this study we demonstrated that an appropriate data processing can significantly improve the reliability of gene expression data derived from FFPE tissues using the standard Affymetrix platform. Tools for the implementation of our data processing algorithm are made publicly available at http://www.biocut.unito.it/cdf-ffpe/.",2014-01-29 +25535243,A Bayesian framework for de novo mutation calling in parents-offspring trios.,"

Motivation

Spontaneous (de novo) mutations play an important role in the disease etiology of a range of complex diseases. Identifying de novo mutations (DNMs) in sporadic cases provides an effective strategy to find genes or genomic regions implicated in the genetics of disease. High-throughput next-generation sequencing enables genome- or exome-wide detection of DNMs by sequencing parents-proband trios. It is challenging to sift true mutations through massive amount of noise due to sequencing error and alignment artifacts. One of the critical limitations of existing methods is that for all genomic regions the same pre-specified mutation rate is assumed, which has a significant impact on the DNM calling accuracy.

Results

In this study, we developed and implemented a novel Bayesian framework for DNM calling in trios (TrioDeNovo), which overcomes these limitations by disentangling prior mutation rates from evaluation of the likelihood of the data so that flexible priors can be adjusted post-hoc at different genomic sites. Through extensively simulations and application to real data we showed that this new method has improved sensitivity and specificity over existing methods, and provides a flexible framework to further improve the efficiency by incorporating proper priors. The accuracy is further improved using effective filtering based on sequence alignment characteristics.

Availability and implementation

The C++ source code implementing TrioDeNovo is freely available at https://medschool.vanderbilt.edu/cgg.

Contact

bingshan.li@vanderbilt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-21 +26704597,Foldalign 2.5: multithreaded implementation for pairwise structural RNA alignment.,"

Motivation

Structured RNAs can be hard to search for as they often are not well conserved in their primary structure and are local in their genomic or transcriptomic context. Thus, the need for tools which in particular can make local structural alignments of RNAs is only increasing.

Results

To meet the demand for both large-scale screens and hands on analysis through web servers, we present a new multithreaded version of Foldalign. We substantially improve execution time while maintaining all previous functionalities, including carrying out local structural alignments of sequences with low similarity. Furthermore, the improvements allow for comparing longer RNAs and increasing the sequence length. For example, lengths in the range 2000-6000 nucleotides improve execution up to a factor of five.

Availability and implementation

The Foldalign software and the web server are available at http://rth.dk/resources/foldalign

Contact

gorodkin@rth.dk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-24 +23423175,SoyProDB: A database for the identification of soybean seed proteins.,"

Unlabelled

Soybean continues to serve as a rich and inexpensive source of protein for humans and animals. A substantial amount of information has been reported on the genotypic variation and beneficial genetic manipulation of soybeans. For better understanding of the consequences of genetic manipulation, elucidation of soybean protein composition is necessary, because of its direct relationship to phenotype. We have conducted studies to determine the composition of storage, allergen and anti-nutritional proteins in cultivated soybean using a combined proteomics approach. Two-dimensional polyacrylamide gel electrophoresis (2DPAGE) was implemented for the separation of proteins along with matrix-assisted laser desorption/ionization time of flight mass spectrometry (MALDI-TOF-MS) and liquid chromatography mass spectrometry (LC-MS/MS) for the identification of proteins. Our analysis resulted in the identification of several proteins, and a web based database named soybean protein database (SoyProDB) was subsequently built to house and allow scientists to search the data. This database will be useful to scientists who wish to genetically alter soybean with higher quality storage proteins, and also helpful for consumers to get a greater understanding about proteins that compose soy products available in the market. The database is freely accessible.

Availability

http://bioinformatics.towson.edu/Soybean_Seed_Proteins_2D_Gel_DB/Home.aspx.",2013-02-06 +23334977,Identification and classification of seafood-borne pathogenic and spoilage bacteria: 16S rRNA sequencing versus MALDI-TOF MS fingerprinting.,"The present study aims to compare two molecular technologies, 16S rRNA sequencing and MALDI-TOF MS, for bacterial species identification in seafood. With this aim, 70 reference strains from culture collections, including important seafood-borne pathogenic and spoilage bacterial species, and 50 strains isolated from commercial seafood products, were analysed by both techniques. Genomic analysis only identified the species of 50% of the isolated strains, proving to be particularly poor at identifying members of the Pseudomonas and Bacillus genera. In contrast, MALDI-TOF MS fingerprinting identified 76% of the strains at the species level. The mass spectral data were submitted to the SpectraBank database (http://www.spectrabank.org), making this information available to other researchers. Furthermore, cluster analysis of the peak mass lists was carried out with the web application SPECLUST and the calculated groupings were consistent with results determined by a phylogenetic approach that is based on the 16S rRNA sequences. However, the MALDI-TOF MS analysis demonstrated more discriminating potential that allowed for better classification, especially for the Pseudomonas and Bacillus genera. This is of importance with respect to the varying pathogenic and spoilage character at the intragenus and intraspecies level. In this sense, MALDI-TOF MS demonstrated to be a competent bacterial typing tool that extends phenotypic and genotypic approaches, allowing a more ample classification of bacterial strains.",2013-02-25 +21292827,Research resource: Update and extension of a glycoprotein hormone receptors web application.,"The SSFA-GPHR (Sequence-Structure-Function-Analysis of Glycoprotein Hormone Receptors) database provides a comprehensive set of mutation data for the glycoprotein hormone receptors (covering the lutropin, the FSH, and the TSH receptors). Moreover, it provides a platform for comparison and investigation of these homologous receptors and helps in understanding protein malfunctions associated with several diseases. Besides extending the data set (> 1100 mutations), the database has been completely redesigned and several novel features and analysis tools have been added to the web site. These tools allow the focused extraction of semiquantitative mutant data from the GPHR subtypes and different experimental approaches. Functional and structural data of the GPHRs are now linked interactively at the web interface, and new tools for data visualization (on three-dimensional protein structures) are provided. The interpretation of functional findings is supported by receptor morphings simulating intramolecular changes during the activation process, which thus help to trace the potential function of each amino acid and provide clues to the local structural environment, including potentially relocated spatial counterpart residues. Furthermore, double and triple mutations are newly included to allow the analysis of their functional effects related to their spatial interrelationship in structures or homology models. A new important feature is the search option and data visualization by interactive and user-defined snake-plots. These new tools allow fast and easy searches for specific functional data and thereby give deeper insights in the mechanisms of hormone binding, signal transduction, and signaling regulation. The web application ""Sequence-Structure-Function-Analysis of GPHRs"" is accessible on the internet at http://www.ssfa-gphr.de/.",2011-02-03 +25136349,The CNVrd2 package: measurement of copy number at complex loci using high-throughput sequencing data.,"Recent advances in high-throughout sequencing technologies have made it possible to accurately assign copy number (CN) at CN variable loci. However, current analytic methods often perform poorly in regions in which complex CN variation is observed. Here we report the development of a read depth-based approach, CNVrd2, for investigation of CN variation using high-throughput sequencing data. This methodology was developed using data from the 1000 Genomes Project from the CCL3L1 locus, and tested using data from the DEFB103A locus. In both cases, samples were selected for which paralog ratio test data were also available for comparison. The CNVrd2 method first uses observed read-count ratios to refine segmentation results in one population. Then a linear regression model is applied to adjust the results across multiple populations, in combination with a Bayesian normal mixture model to cluster segmentation scores into groups for individual CN counts. The performance of CNVrd2 was compared to that of two other read depth-based methods (CNVnator, cn.mops) at the CCL3L1 and DEFB103A loci. The highest concordance with the paralog ratio test method was observed for CNVrd2 (77.8/90.4% for CNVrd2, 36.7/4.8% for cn.mops and 7.2/1% for CNVnator at CCL3L1 and DEF103A). CNVrd2 is available as an R package as part of the Bioconductor project: http://www.bioconductor.org/packages/release/bioc/html/CNVrd2.html.",2014-08-01 +27920478,Diabetes mellitus may affect the long-term survival of hepatitis B virus-related hepatocellular carcinoma patients after liver transplantation.,"

Aim

To determine whether diabetes mellitus (DM) affects prognosis/recurrence after liver transplantation (LT) for hepatitis B virus (HBV)-related hepatocellular carcinoma (HCC).

Methods

A retrospective study was conducted between January 2000 and August 2013 on 1631 patients with HBV-related HCC who underwent LT with antiviral prophylaxis. Patient data were obtained from the China Liver Transplant Registry (https://www.cltr.org/). To compare the outcomes and tumor recurrence in the HBV-related HCC patients with or without DM, statistical analyses were conducted using χ2 tests, Mann-Whitney tests, the Kaplan-Meier method, log-rank tests and multivariate step-wise Cox regression analysis.

Results

Univariate analysis of 1631 patients who underwent LT found overall 1-, 3- and 5-year survival rates of 79%, 73% and 71% respectively in the DM patients, and 84%, 78% and 76% in the non-DM patients respectively. Overall survival rate differences after LT between the two groups were significant (P = 0.041), but recurrence-free survival rates were not (P = 0.096). By stratified analysis, the overall survival rates in DM patients for age > 50 years (P = 0.002), the presence of vascular invasion (P = 0.096), tumors ≤ 3 cm (P = 0.047), two to three tumor nodules (P = 0.007), Child-Pugh grade B (P = 0.018), and pre-LT alanine aminotransferase levels between 40 and 80 IU/L (P = 0.017) were significantly lower than in non-DM patients. Additionally, serum α-fetoprotein level > 2000 ng/mL (P = 0.052) was associated with a significant survival difference trend between DM and non-DM patients. Multivariate analysis showed that the presence of DM (P < 0.001, HR = 1.591; 95%CI: 1.239-2.041) was an independent predictor associated with poor survival after LT.

Conclusion

HBV-related HCC patients with DM have decreased long-term overall survival and poor LT outcomes. Prevention strategies for HCC patients with DM are recommended.",2016-11-01 +21235794,BiologicalNetworks--tools enabling the integration of multi-scale data for the host-pathogen studies.,"

Background

Understanding of immune response mechanisms of pathogen-infected host requires multi-scale analysis of genome-wide data. Data integration methods have proved useful to the study of biological processes in model organisms, but their systematic application to the study of host immune system response to a pathogen and human disease is still in the initial stage.

Results

To study host-pathogen interaction on the systems biology level, an extension to the previously described BiologicalNetworks system is proposed. The developed methods and data integration and querying tools allow simplifying and streamlining the process of integration of diverse experimental data types, including molecular interactions and phylogenetic classifications, genomic sequences and protein structure information, gene expression and virulence data for pathogen-related studies. The data can be integrated from the databases and user's files for both public and private use.

Conclusions

The developed system can be used for the systems-level analysis of host-pathogen interactions, including host molecular pathways that are induced/repressed during the infections, co-expressed genes, and conserved transcription factor binding sites. Previously unknown to be associated with the influenza infection genes were identified and suggested for further investigation as potential drug targets. Developed methods and data are available through the Java application (from BiologicalNetworks program at http://www.biologicalnetworks.org) and web interface (at http://flu.sdsc.edu).",2011-01-14 +22554190,Pattern analysis approach reveals restriction enzyme cutting abnormalities and other cDNA library construction artifacts using raw EST data.,"

Background

Expressed Sequence Tag (EST) sequences are widely used in applications such as genome annotation, gene discovery and gene expression studies. However, some of GenBank dbEST sequences have proven to be ""unclean"". Identification of cDNA termini/ends and their structures in raw ESTs not only facilitates data quality control and accurate delineation of transcription ends, but also furthers our understanding of the potential sources of data abnormalities/errors present in the wet-lab procedures for cDNA library construction.

Results

After analyzing a total of 309,976 raw Pinus taeda ESTs, we uncovered many distinct variations of cDNA termini, some of which prove to be good indicators of wet-lab artifacts, and characterized each raw EST by its cDNA terminus structure patterns. In contrast to the expected patterns, many ESTs displayed complex and/or abnormal patterns that represent potential wet-lab errors such as: a failure of one or both of the restriction enzymes to cut the plasmid vector; a failure of the restriction enzymes to cut the vector at the correct positions; the insertion of two cDNA inserts into a single vector; the insertion of multiple and/or concatenated adapters/linkers; the presence of 3'-end terminal structures in designated 5'-end sequences or vice versa; and so on. With a close examination of these artifacts, many problematic ESTs that have been deposited into public databases by conventional bioinformatics pipelines or tools could be cleaned or filtered by our methodology. We developed a software tool for Abnormality Filtering and Sequence Trimming for ESTs (AFST, http://code.google.com/p/afst/) using a pattern analysis approach. To compare AFST with other pipelines that submitted ESTs into dbEST, we reprocessed 230,783 Pinus taeda and 38,709 Arachis hypogaea GenBank ESTs. We found 7.4% of Pinus taeda and 29.2% of Arachis hypogaea GenBank ESTs are ""unclean"" or abnormal, all of which could be cleaned or filtered by AFST.

Conclusions

cDNA terminal pattern analysis, as implemented in the AFST software tool, can be utilized to reveal wet-lab errors such as restriction enzyme cutting abnormities and chimeric EST sequences, detect various data abnormalities embedded in existing Sanger EST datasets, improve the accuracy of identifying and extracting bona fide cDNA inserts from raw ESTs, and therefore greatly benefit downstream EST-based applications.",2012-05-03 +26520853,catRAPID signature: identification of ribonucleoproteins and RNA-binding regions.,"

Motivation

Recent technological advances revealed that an unexpected large number of proteins interact with transcripts even if the RNA-binding domains are not annotated. We introduce catRAPID signature to identify ribonucleoproteins based on physico-chemical features instead of sequence similarity searches. The algorithm, trained on human proteins and tested on model organisms, calculates the overall RNA-binding propensity followed by the prediction of RNA-binding regions. catRAPID signature outperforms other algorithms in the identification of RNA-binding proteins and detection of non-classical RNA-binding regions. Results are visualized on a webpage and can be downloaded or forwarded to catRAPID omics for predictions of RNA targets.

Availability and implementation

catRAPID signature can be accessed at http://s.tartaglialab.com/new_submission/signature

Contact

gian.tartaglia@crg.es or gian@tartaglialab.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-31 +23275735,ExonVisualiser - application for visualization exon units in 2D and 3D protein structures.,"

Unlabelled

The web application oriented on identification and visualization of protein regions encoded by exons is presented. The Exon Visualiser can be used for visualisation on different levels of protein structure: at the primary (sequence) level and secondary structures level, as well as at the level of tertiary protein structure. The programme is suitable for processing data for all genes which have protein expressions deposited in the PDB database. The procedure steps implemented in the application: I) loading exons sequences and theirs coordinates from GenBank file as well as protein sequences: CDS from GenBank and aminoacid sequence from PDB II) consensus sequence creation (comparing amino acid sequences form PDB file with the CDS sequence from GenBank file) III) matching exon coordinates IV) visualisation in 2D and 3D protein structures. Presented web-tool among others provides the color-coded graphical display of protein sequences and chains in three dimensional protein structures which are correlated with the corresponding exons.

Availability

http://149.156.12.53/ExonVisualiser/",2012-12-19 +24551370,"Cloudwave: distributed processing of ""big data"" from electrophysiological recordings for epilepsy clinical research using Hadoop.","Epilepsy is the most common serious neurological disorder affecting 50-60 million persons worldwide. Multi-modal electrophysiological data, such as electroencephalography (EEG) and electrocardiography (EKG), are central to effective patient care and clinical research in epilepsy. Electrophysiological data is an example of clinical ""big data"" consisting of more than 100 multi-channel signals with recordings from each patient generating 5-10GB of data. Current approaches to store and analyze signal data using standalone tools, such as Nihon Kohden neurology software, are inadequate to meet the growing volume of data and the need for supporting multi-center collaborative studies with real time and interactive access. We introduce the Cloudwave platform in this paper that features a Web-based intuitive signal analysis interface integrated with a Hadoop-based data processing module implemented on clinical data stored in a ""private cloud"". Cloudwave has been developed as part of the National Institute of Neurological Disorders and Strokes (NINDS) funded multi-center Prevention and Risk Identification of SUDEP Mortality (PRISM) project. The Cloudwave visualization interface provides real-time rendering of multi-modal signals with ""montages"" for EEG feature characterization over 2TB of patient data generated at the Case University Hospital Epilepsy Monitoring Unit. Results from performance evaluation of the Cloudwave Hadoop data processing module demonstrate one order of magnitude improvement in performance over 77GB of patient data. (Cloudwave project: http://prism.case.edu/prism/index.php/Cloudwave).",2013-11-16 +25510495,Update on RefSeq microbial genomes resources.,"NCBI RefSeq genome collection http://www.ncbi.nlm.nih.gov/genome represents all three major domains of life: Eukarya, Bacteria and Archaea as well as Viruses. Prokaryotic genome sequences are the most rapidly growing part of the collection. During the year of 2014 more than 10,000 microbial genome assemblies have been publicly released bringing the total number of prokaryotic genomes close to 30,000. We continue to improve the quality and usability of the microbial genome resources by providing easy access to the data and the results of the pre-computed analysis, and improving analysis and visualization tools. A number of improvements have been incorporated into the Prokaryotic Genome Annotation Pipeline. Several new features have been added to RefSeq prokaryotic genomes data processing pipeline including the calculation of genome groups (clades) and the optimization of protein clusters generation using pan-genome approach.",2014-12-15 +27153623,iDHS-EL: identifying DNase I hypersensitive sites by fusing three different modes of pseudo nucleotide composition into an ensemble learning framework.,"

Motivation

Regulatory DNA elements are associated with DNase I hypersensitive sites (DHSs). Accordingly, identification of DHSs will provide useful insights for in-depth investigation into the function of noncoding genomic regions.

Results

In this study, using the strategy of ensemble learning framework, we proposed a new predictor called iDHS-EL for identifying the location of DHS in human genome. It was formed by fusing three individual Random Forest (RF) classifiers into an ensemble predictor. The three RF operators were respectively based on the three special modes of the general pseudo nucleotide composition (PseKNC): (i) kmer, (ii) reverse complement kmer and (iii) pseudo dinucleotide composition. It has been demonstrated that the new predictor remarkably outperforms the relevant state-of-the-art methods in both accuracy and stability.

Availability and implementation

For the convenience of most experimental scientists, a web server for iDHS-EL is established at http://bioinformatics.hitsz.edu.cn/iDHS-EL, which is the first web-server predictor ever established for identifying DHSs, and by which users can easily get their desired results without the need to go through the mathematical details. We anticipate that IDHS-EL: will become a very useful high throughput tool for genome analysis.

Contact

bliu@gordonlifescience.org or bliu@insun.hit.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-08 +25676918,wKGGSeq: A Comprehensive Strategy-Based and Disease-Targeted Online Framework to Facilitate Exome Sequencing Studies of Inherited Disorders.,"With the rapid advances in high-throughput sequencing technologies, exome sequencing and targeted region sequencing have become routine approaches for identifying mutations of inherited disorders in both genetics research and molecular diagnosis. There is an imminent need for comprehensive and easy-to-use downstream analysis tools to isolate causal mutations in exome sequencing studies. We have developed a user-friendly online framework, wKGGSeq, to provide systematic annotation, filtration, prioritization, and visualization functions for characterizing causal mutation(s) in exome sequencing studies of inherited disorders. wKGGSeq provides: (1) a novel strategy-based procedure for downstream analysis of a large amount of exome sequencing data and (2) a disease-targeted analysis procedure to facilitate clinical diagnosis of well-studied genetic diseases. In addition, it is also equipped with abundant online annotation functions for sequence variants. We demonstrate that wKGGSeq either outperforms or is comparable to two popular tools in several real exome sequencing samples. This tool will greatly facilitate the downstream analysis of exome sequencing data and can play a useful role for researchers and clinicians in identifying causal mutations of inherited disorders. The wKGGSeq is freely available at http://statgenpro.psychiatry.hku.hk/wkggseq or http://jjwanglab.org/wkggseq, and will be updated frequently.",2015-04-04 +25585107,A proxy design to leverage the interconnection of CoAP Wireless Sensor Networks with Web applications.,"In this paper, we present the design of a Constrained Application Protocol (CoAP) proxy able to interconnect Web applications based on Hypertext Transfer Protocol (HTTP) and WebSocket with CoAP based Wireless Sensor Networks. Sensor networks are commonly used to monitor and control physical objects or environments. Smart Cities represent applications of such a nature. Wireless Sensor Networks gather data from their surroundings and send them to a remote application. This data flow may be short or long lived. The traditional HTTP long-polling used by Web applications may not be adequate in long-term communications. To overcome this problem, we include the WebSocket protocol in the design of the CoAP proxy. We evaluate the performance of the CoAP proxy in terms of latency and memory consumption. The tests consider long and short-lived communications. In both cases, we evaluate the performance obtained by the CoAP proxy according to the use of WebSocket and HTTP long-polling.",2015-01-09 +26387008,WONKA: objective novel complex analysis for ensembles of protein-ligand structures.,"WONKA is a tool for the systematic analysis of an ensemble of protein-ligand structures. It makes the identification of conserved and unusual features within such an ensemble straightforward. WONKA uses an intuitive workflow to process structural co-ordinates. Ligand and protein features are summarised and then presented within an interactive web application. WONKA's power in consolidating and summarising large amounts of data is described through the analysis of three bromodomain datasets. Furthermore, and in contrast to many current methods, WONKA relates analysis to individual ligands, from which we find unusual and erroneous binding modes. Finally the use of WONKA as an annotation tool to share observations about structures is demonstrated. WONKA is freely available to download and install locally or can be used online at http://wonka.sgc.ox.ac.uk.",2015-09-19 +27153712,Computational clustering for viral reference proteomes.,"

Motivation

The enormous number of redundant sequenced genomes has hindered efforts to analyze and functionally annotate proteins. As the taxonomy of viruses is not uniformly defined, viral proteomes pose special challenges in this regard. Grouping viruses based on the similarity of their proteins at proteome scale can normalize against potential taxonomic nomenclature anomalies.

Results

We present Viral Reference Proteomes (Viral RPs), which are computed from complete virus proteomes within UniProtKB. Viral RPs based on 95, 75, 55, 35 and 15% co-membership in proteome similarity based clusters are provided. Comparison of our computational Viral RPs with UniProt's curator-selected Reference Proteomes indicates that the two sets are consistent and complementary. Furthermore, each Viral RP represents a cluster of virus proteomes that was consistent with virus or host taxonomy. We provide BLASTP search and FTP download of Viral RP protein sequences, and a browser to facilitate the visualization of Viral RPs.

Availability and implementation

http://proteininformationresource.org/rps/viruses/

Contact

chenc@udel.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-26 +22009677,PhenoM: a database of morphological phenotypes caused by mutation of essential genes in Saccharomyces cerevisiae.,"About one-fifth of the genes in the budding yeast are essential for haploid viability and cannot be functionally assessed using standard genetic approaches such as gene deletion. To facilitate genetic analysis of essential genes, we and others have assembled collections of yeast strains expressing temperature-sensitive (ts) alleles of essential genes. To explore the phenotypes caused by essential gene mutation we used a panel of genetically engineered fluorescent markers to explore the morphology of cells in the ts strain collection using high-throughput microscopy. Here, we describe the design and implementation of an online database, PhenoM (Phenomics of yeast Mutants), for storing, retrieving, visualizing and data mining the quantitative single-cell measurements extracted from micrographs of the ts mutant cells. PhenoM allows users to rapidly search and retrieve raw images and their quantified morphological data for genes of interest. The database also provides several data-mining tools, including a PhenoBlast module for phenotypic comparison between mutant strains and a Gene Ontology module for functional enrichment analysis of gene sets showing similar morphological alterations. The current PhenoM version 1.0 contains 78,194 morphological images and 1,909,914 cells covering six subcellular compartments or structures for 775 ts alleles spanning 491 essential genes. PhenoM is freely available at http://phenom.ccbr.utoronto.ca/.",2011-10-18 +26614127,MetaQUAST: evaluation of metagenome assemblies.,"

Unlabelled

During the past years we have witnessed the rapid development of new metagenome assembly methods. Although there are many benchmark utilities designed for single-genome assemblies, there is no well-recognized evaluation and comparison tool for metagenomic-specific analogues. In this article, we present MetaQUAST, a modification of QUAST, the state-of-the-art tool for genome assembly evaluation based on alignment of contigs to a reference. MetaQUAST addresses such metagenome datasets features as (i) unknown species content by detecting and downloading reference sequences, (ii) huge diversity by giving comprehensive reports for multiple genomes and (iii) presence of highly relative species by detecting chimeric contigs. We demonstrate MetaQUAST performance by comparing several leading assemblers on one simulated and two real datasets.

Availability and implementation

http://bioinf.spbau.ru/metaquast

Contact

aleksey.gurevich@spbu.ru

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-26 +23426256,propy: a tool to generate various modes of Chou's PseAAC.,"

Summary

Sequence-derived structural and physiochemical features have been frequently used for analysing and predicting structural, functional, expression and interaction profiles of proteins and peptides. To facilitate extensive studies of proteins and peptides, we developed a freely available, open source python package called protein in python (propy) for calculating the widely used structural and physicochemical features of proteins and peptides from amino acid sequence. It computes five feature groups composed of 13 features, including amino acid composition, dipeptide composition, tripeptide composition, normalized Moreau-Broto autocorrelation, Moran autocorrelation, Geary autocorrelation, sequence-order-coupling number, quasi-sequence-order descriptors, composition, transition and distribution of various structural and physicochemical properties and two types of pseudo amino acid composition (PseAAC) descriptors. These features could be generally regarded as different Chou's PseAAC modes. In addition, it can also easily compute the previous descriptors based on user-defined properties, which are automatically available from the AAindex database.

Availability

The python package, propy, is freely available via http://code.google.com/p/protpy/downloads/list, and it runs on Linux and MS-Windows.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-19 +27318208,Statistical modeling of isoform splicing dynamics from RNA-seq time series data.,"

Motivation

Isoform quantification is an important goal of RNA-seq experiments, yet it remains problematic for genes with low expression or several isoforms. These difficulties may in principle be ameliorated by exploiting correlated experimental designs, such as time series or dosage response experiments. Time series RNA-seq experiments, in particular, are becoming increasingly popular, yet there are no methods that explicitly leverage the experimental design to improve isoform quantification.

Results

Here, we present DICEseq, the first isoform quantification method tailored to correlated RNA-seq experiments. DICEseq explicitly models the correlations between different RNA-seq experiments to aid the quantification of isoforms across experiments. Numerical experiments on simulated datasets show that DICEseq yields more accurate results than state-of-the-art methods, an advantage that can become considerable at low coverage levels. On real datasets, our results show that DICEseq provides substantially more reproducible and robust quantifications, increasing the correlation of estimates from replicate datasets by up to 10% on genes with low or moderate expression levels (bottom third of all genes). Furthermore, DICEseq permits to quantify the trade-off between temporal sampling of RNA and depth of sequencing, frequently an important choice when planning experiments. Our results have strong implications for the design of RNA-seq experiments, and offer a novel tool for improved analysis of such datasets.

Availability and implementation

Python code is freely available at http://diceseq.sf.net

Contact

G.Sanguinetti@ed.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-17 +26186784,Referenceless Prediction of Perceptual Fog Density and Perceptual Image Defogging.,"We propose a referenceless perceptual fog density prediction model based on natural scene statistics (NSS) and fog aware statistical features. The proposed model, called Fog Aware Density Evaluator (FADE), predicts the visibility of a foggy scene from a single image without reference to a corresponding fog-free image, without dependence on salient objects in a scene, without side geographical camera information, without estimating a depth-dependent transmission map, and without training on human-rated judgments. FADE only makes use of measurable deviations from statistical regularities observed in natural foggy and fog-free images. Fog aware statistical features that define the perceptual fog density index derive from a space domain NSS model and the observed characteristics of foggy images. FADE not only predicts perceptual fog density for the entire image, but also provides a local fog density index for each patch. The predicted fog density using FADE correlates well with human judgments of fog density taken in a subjective study on a large foggy image database. As applications, FADE not only accurately assesses the performance of defogging algorithms designed to enhance the visibility of foggy images, but also is well suited for image defogging. A new FADE-based referenceless perceptual image defogging, dubbed DEnsity of Fog Assessment-based DEfogger (DEFADE) achieves better results for darker, denser foggy images as well as on standard foggy images than the state of the art defogging methods. A software release of FADE and DEFADE is available online for public use: http://live.ece.utexas.edu/research/fog/index.html.",2015-07-15 +26177200,Novel Phenotypic Outcomes Identified for a Public Collection of Approved Drugs from a Publicly Accessible Panel of Assays.,"Phenotypic assays have a proven track record for generating leads that become first-in-class therapies. Whole cell assays that inform on a phenotype or mechanism also possess great potential in drug repositioning studies by illuminating new activities for the existing pharmacopeia. The National Center for Advancing Translational Sciences (NCATS) pharmaceutical collection (NPC) is the largest reported collection of approved small molecule therapeutics that is available for screening in a high-throughput setting. Via a wide-ranging collaborative effort, this library was analyzed in the Open Innovation Drug Discovery (OIDD) phenotypic assay modules publicly offered by Lilly. The results of these tests are publically available online at www.ncats.nih.gov/expertise/preclinical/pd2 and via the PubChem Database (https://pubchem.ncbi.nlm.nih.gov/) (AID 1117321). Phenotypic outcomes for numerous drugs were confirmed, including sulfonylureas as insulin secretagogues and the anti-angiogenesis actions of multikinase inhibitors sorafenib, axitinib and pazopanib. Several novel outcomes were also noted including the Wnt potentiating activities of rotenone and the antifolate class of drugs, and the anti-angiogenic activity of cetaben.",2015-07-15 +28488814,iPhos-PseEvo: Identifying Human Phosphorylated Proteins by Incorporating Evolutionary Information into General PseAAC via Grey System Theory. ,"Protein phosphorylation plays a critical role in human body by altering the structural conformation of a protein, causing it to become activated/deactivated, or functional modification. Given an uncharacterized protein sequence, can we predict whether it may be phosphorylated or may not? This is no doubt a very meaningful problem for both basic research and drug development. Unfortunately, to our best knowledge, so far no high throughput bioinformatics tool whatsoever has been developed to address such a very basic but important problem due to its extremely complexity and lacking sufficient training data. Here we proposed a predictor called iPhos-PseEvo by (1) incorporating the protein sequence evolutionary information into the general pseudo amino acid composition (PseAAC) via the grey system theory, (2) balancing out the skewed training datasets by the asymmetric bootstrap approach, and (3) constructing an ensemble predictor by fusing an array of individual random forest classifiers thru a voting system. Rigorous jackknife tests have indicated that very promising success rates have been achieved by iPhos-PseEvo even for such a difficult problem. A user-friendly web-server for iPhos-PseEvo has been established at http://www.jci-bioinfo.cn/iPhos-PseEvo, by which users can easily obtain their desired results without the need to go through the complicated mathematical equations involved. It has not escaped our notice that the formulation and approach presented here can be used to analyze many other problems in protein science as well.",2016-05-12 +25897115,pyDockSAXS: protein-protein complex structure by SAXS and computational docking.,"Structural characterization of protein-protein interactions at molecular level is essential to understand biological processes and identify new therapeutic opportunities. However, atomic resolution structural techniques cannot keep pace with current advances in interactomics. Low-resolution structural techniques, such as small-angle X-ray scattering (SAXS), can be applied at larger scale, but they miss atomic details. For efficient application to protein-protein complexes, low-resolution information can be combined with theoretical methods that provide energetic description and atomic details of the interactions. Here we present the pyDockSAXS web server (http://life.bsc.es/pid/pydocksaxs) that provides an automatic pipeline for modeling the structure of a protein-protein complex from SAXS data. The method uses FTDOCK to generate rigid-body docking models that are subsequently evaluated by a combination of pyDock energy-based scoring function and their capacity to describe SAXS data. The only required input files are structural models for the interacting partners and a SAXS curve. The server automatically provides a series of structural models for the complex, sorted by the pyDockSAXS scoring function. The user can also upload a previously computed set of docking poses, which opens the possibility to filter the docking solutions by potential interface residues or symmetry restraints. The server is freely available to all users without restriction.",2015-04-20 +25260021,Methylation plotter: a web tool for dynamic visualization of DNA methylation data.,"Methylation plotter is a Web tool that allows the visualization of methylation data in a user-friendly manner and with publication-ready quality. The user is asked to introduce a file containing the methylation status of a genomic region. This file can contain up to 100 samples and 100 CpGs. Optionally, the user can assign a group for each sample (i.e. whether a sample is a tumoral or normal tissue). After the data upload, the tool produces different graphical representations of the results following the most commonly used styles to display this type of data. They include an interactive plot that summarizes the status of every CpG site and for every sample in lollipop or grid styles. Methylation values ranging from 0 (unmethylated) to 1 (fully methylated) are represented using a gray color gradient. A practical feature of the tool allows the user to choose from different types of arrangement of the samples in the display: for instance, sorting by overall methylation level, by group, by unsupervised clustering or just following the order in which data were entered. In addition to the detailed plot, Methylation plotter produces a methylation profile plot that summarizes the status of the scrutinized region, a boxplot that sums up the differences between groups (if any) and a dendrogram that classifies the data by unsupervised clustering. Coupled with this analysis, descriptive statistics and testing for differences at both CpG and group levels are provided. The implementation is based in R/shiny, providing a highly dynamic user interface that generates quality graphics without the need of writing R code. Methylation plotter is freely available at http://gattaca.imppc.org:3838/methylation_plotter/.",2014-06-07 +26607491,Synchronized navigation and comparative analyses across Ensembl complete bacterial genomes with INSYGHT.,"

Motivation

High-throughput sequencing technologies provide access to an increasing number of bacterial genomes. Today, many analyses involve the comparison of biological properties among many strains of a given species, or among species of a particular genus. Tools that can help the microbiologist with these tasks become increasingly important.

Results

Insyght is a comparative visualization tool whose core features combine a synchronized navigation across genomic data of multiple organisms with a versatile interoperability between complementary views. In this work, we have greatly increased the scope of the Insyght public dataset by including 2688 complete bacterial genomes available in Ensembl thus vastly improving its phylogenetic coverage. We also report the development of a virtual machine that allows users to easily set up and customize their own local Insyght server.

Availability and implementation

http://genome.jouy.inra.fr/Insyght

Contact

Thomas.Lacroix@jouy.inra.fr.",2015-11-24 +26183225,Near East University Genetic Mutation Database (NEU-GD): The first mutation database of Northern Cyprus.,"The health care system is negatively affected by the genetic disorders that lead to an increasing rate of morbidity and neonatal deaths and affect adults as well. These create a substantial government's psychosocial and economic burden on clinicians, patients and their families with the advancement in the field of genetics. There has been a tremendous increase in the rate in which diseases associated with variant DNA sequences are being sought and identified. The goal behind the creation of Near East University Genetic Mutation Database (NEU-GD) is to map and apprehend the patterns of common genetic diversity in the human genetic makeup in order to accelerate the search for the genetic causes of human disease. NEU-GD will allow scientists to generate extraordinarily useful information such as allelic variations among population, and description of the genetic blueprint of mutations occurring in human beings. In this communication we report the construction of the first genetic mutation database for the people belonging to different ethnic groups living in North Cyprus (http://genetics-db.neu.edu.tr/). Therefore NEU-GD can serve as an important tool available online for molecular genetic testing of inherited disorder and persuade for further investigation of novel genetic disorders in North Cyprus population.",2015-07-14 +24508279,Meta-QC-Chain: comprehensive and fast quality control method for metagenomic data.,"Next-generation sequencing (NGS) technology has revolutionized and significantly impacted metagenomic research. However, the NGS data usually contains sequencing artifacts such as low-quality reads and contaminating reads, which will significantly compromise downstream analysis. Many quality control (QC) tools have been proposed, however, few of them have been verified to be suitable or efficient for metagenomic data, which are composed of multiple genomes and are more complex than other kinds of NGS data. Here we present a metagenomic data QC method named Meta-QC-Chain. Meta-QC-Chain combines multiple QC functions: technical tests describe input data status and identify potential errors, quality trimming filters poor sequencing-quality bases and reads, and contamination screening identifies higher eukaryotic species, which are considered as contamination for metagenomic data. Most computing processes are optimized based on parallel programming. Testing on an 8-GB real dataset showed that Meta-QC-Chain trimmed low sequencing-quality reads and contaminating reads, and the whole quality control procedure was completed within 20 min. Therefore, Meta-QC-Chain provides a comprehensive, useful and high-performance QC tool for metagenomic data. Meta-QC-Chain is publicly available for free at: http://computationalbioenergy.org/meta-qc-chain.html.",2014-02-04 +23353585,ViewMotions Rainbow: a new method to illustrate molecular motions in proteins.,"The biological functions of many enzymes are often coupled with significant conformational changes. The end states of these conformational changes can often be determined by X-ray crystallography. These X-ray structures are snapshots of the two extreme conformations in which the macromolecule exists, but the dynamic movements between the states are not easily visualized in a two-dimensional illustration. Here we have developed a new method to visualize macromolecular motions called a ViewMotions Rainbow diagram. These diagrams show the initial and final states overlaid along with approximately 30 intermediate structures calculated by linear interpolation of the backbone coordinates of the initial and final states. This group of structures is then spectrally colored from the initial structure in blue to the final structure in red. ViewMotions Rainbow diagrams provide the reader with a much easier way to understand the macromolecular motions using a single two-dimensional illustration. Since producing these diagrams requires a number of different software packages, we have setup the ViewMotions Web Server (http://viewmotions.bc.edu) to automatically generate these diagrams from two Protein Data Bank files or from the Database of Macromolecular Movements (http://molmovdb.org).",2013-01-05 +25558376,The Midlife in the United States (MIDUS) Series: A National Longitudinal Study of Health and Well-being. ,"Midlife in the United States (MIDUS) is a national longitudinal study of health and well-being (http://midus.wisc.edu/). It was conceived by a multidisciplinary team of scholars interested in understanding aging as an integrated bio-psycho-social process, and as such it includes data collected in a wide array of research protocols using a variety of survey and non-survey instruments. The data captured by these different protocols (comprising around 20,000 variables) represent survey measures, cognitive assessments, daily stress diaries, clinical, biomarker and neuroscience data which are contained in separate flat or stacked data files with a common ID system that allows easy data merges among them. All MIDUS datasets and documentation are archived at the ICPSR (http://www.icpsr.umich.edu/) repository at the University of Michigan and are publicly available in a variety of formats and statistical packages. Special attention is given to providing clear user-friendly documentation; the study has embraced the Data Documentation Initiative (DDI) metadata standard and produces DDI-Lifecycle compliant codebooks. Potential for secondary use of MIDUS is high and actively encouraged. The study has become very popular with the research public as measured by data downloads and citation counts (see Reuse Potential below).",2014-01-01 +25435486,"Commentary on ""spectral characterization of the binding and conformational changes of serum albumins upon interaction with an anticancer drug, anastrozole"".","The manuscript by R. Punith and J. Seetharamappa (http://dx.doi.org/10.1016/j.saa.201202.038) presents the interaction between serum albumin from human (HAS) and from bovine (BSA) with a drug called Anastrozole (AZ). The drug is on the market for treating patients with breast cancer after surgery and for metastasis in women. The study utilizes various spectroscopic techniques such as; fluorescence, synchronous fluorescence, 3D fluorescence measurements, FTIR, CD and UV. Although there are some relatively minor comments on the paper, the main point that needs to be reviewed by the authors is the result of FTIR measurements. Based on the data provided in the text (there is no figure), the protein sample is not in its native state, which makes the data inconvenient to be used in drawing conclusions. Authors are kindly requested to take another look at the FTIR experiments.",2014-11-18 +25890305,ScaffMatch: scaffolding algorithm based on maximum weight matching.,"

Motivation

Next-generation high-throughput sequencing has become a state-of-the-art technique in genome assembly. Scaffolding is one of the main stages of the assembly pipeline. During this stage, contigs assembled from the paired-end reads are merged into bigger chains called scaffolds. Because of a high level of statistical noise, chimeric reads, and genome repeats the problem of scaffolding is a challenging task. Current scaffolding software packages widely vary in their quality and are highly dependent on the read data quality and genome complexity. There are no clear winners and multiple opportunities for further improvements of the tools still exist.

Results

This article presents an efficient scaffolding algorithm ScaffMatch that is able to handle reads with both short (<600 bp) and long (>35 000 bp) insert sizes producing high-quality scaffolds. We evaluate our scaffolding tool with the F score and other metrics (N50, corrected N50) on eight datasets comparing it with the most available packages. Our experiments show that ScaffMatch is the tool of preference for the most datasets.

Availability and implementation

The source code is available at http://alan.cs.gsu.edu/NGS/?q=content/scaffmatch.

Contact

mandric@cs.gsu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-17 +27423432,"No effects of quercetin from onion skin extract on serum leptin and adiponectin concentrations in overweight-to-obese patients with (pre-)hypertension: a randomized double-blinded, placebo-controlled crossover trial.","

Purpose

Chronic low-level systemic and adipose tissue inflammation has been identified as a major etiologic factor in many chronic diseases, including hypertension and cardiovascular diseases. Evidence from experimental studies suggests anti-inflammatory effects of dietary flavonols such as quercetin.

Methods

We investigated the effects of regular intake of quercetin on leptin, adiponectin, biomarkers of inflammation, glucose and insulin in overweight-to-obese patients with pre- and stage 1 hypertension. Another objective was to assess the safety of daily quercetin supplementation measured by parameters of liver and kidney function and of hematology. Subjects (n = 70) were randomized to receive a supra-nutritional dose of 162 mg/d quercetin or placebo in a double-blinded, placebo-controlled crossover trial with 6-week treatment periods separated by a 6-week washout period. Two subjects dropped out for personal reasons. Only data from the remaining 68 subjects were included in the analysis.

Results

Compared to placebo, quercetin did not significantly affect serum concentrations of leptin and adiponectin, HOMA-AD or the ratios of leptin/adiponectin and adiponectin/leptin. Neither quercetin nor placebo significantly changed serum C-reactive protein and plasma tumor necrosis factor alpha. Compared to placebo, quercetin did not significantly affect glucose, insulin, HOMA-IR, blood biomarkers of liver and renal function, hematology and serum electrolytes.

Conclusion

A supra-nutritional dose of 162 mg/d quercetin from onion skin extract for 6 weeks is safe but without significant effects on parameters of systemic and adipose tissue inflammation as well as glucose and insulin in overweight-to-obese subjects with (pre-)hypertension. This trial was registered at www.germanctr.de/ and http://apps.who.int/trialsearch/ as DRKS00000555.",2016-07-16 +27076459,Inferring microRNA-disease associations by random walk on a heterogeneous network with multiple data sources.,"Since the discovery of the regulatory function of microRNA (miRNA), increased attention has focused on identifying the relationship between miRNA and disease. It has been suggested that computational method are an efficient way to identify potential disease-related miRNAs for further confirmation using biological experiments. In this paper, we first highlighted three limitations commonly associated with previous computational methods. To resolve these limitations, we established disease similarity subnetwork and miRNA similarity subnetwork by integrating multiple data sources, where the disease similarity is composed of disease semantic similarity and disease functional similarity, and the miRNA similarity is calculated using the miRNA-target gene and miRNA-lncRNA (long non-coding RNA) associations. Then, a heterogeneous network was constructed by connecting the disease similarity subnetwork and the miRNA similarity subnetwork using the known miRNA-disease associations. We extended random walk with restart to predict miRNA-disease associations in the heterogeneous network. The leave-one-out cross-validation achieved an average area under the curve (AUC) of 0:8049 across 341 diseases and 476 miRNAs. For five-fold cross-validation, our method achieved an AUC from 0:7970 to 0:9249 for 15 human diseases. Case studies further demonstrated the feasibility of our method to discover potential miRNA-disease associations. An online service for prediction is freely available at http://ifmda.aliapp.com.",2016-04-05 +27044684,Identification of Small-Molecule Frequent Hitters of Glutathione S-Transferase-Glutathione Interaction.,"In high-throughput screening (HTS) campaigns, the binding of glutathione S-transferase (GST) to glutathione (GSH) is used for detection of GST-tagged proteins in protein-protein interactions or enzyme assays. However, many false-positives, so-called frequent hitters (FH), arise that either prevent GST/GSH interaction or interfere with assay signal generation or detection. To identify GST-FH compounds, we analyzed the data of five independent AlphaScreen-based screening campaigns to classify compounds that inhibit the GST/GSH interaction. We identified 53 compounds affecting GST/GSH binding but not influencing His-tag/Ni(2+)-NTA interaction and general AlphaScreen signals. The structures of these 53 experimentally identified GST-FHs were analyzed in chemoinformatic studies to categorize substructural features that promote interference with GST/GSH binding. Here, we confirmed several existing chemoinformatic filters and more importantly extended them as well as added novel filters that specify compounds with anti-GST/GSH activity. Selected compounds were also tested using different antibody-based GST detection technologies and exhibited no interference clearly demonstrating specificity toward their GST/GSH interaction. Thus, these newly described GST-FH will further contribute to the identification of FH compounds containing promiscuous substructures. The developed filters were uploaded to the OCHEM website (http://ochem.eu) and are publicly accessible for analysis of future HTS results.",2016-04-04 +25886981,PDIviz: analysis and visualization of protein-DNA binding interfaces.,"

Unlabelled

Specific recognition of DNA by proteins is a crucial step of many biological processes. PDIviz is a plugin for the PyMOL molecular visualization system that analyzes protein-DNA binding interfaces by comparing the solvent accessible surface area of the complex against the free protein and free DNA. The plugin provides three distinct three-dimensional visualization modes to highlight interactions with DNA bases and backbone, major and minor groove, and with atoms of different pharmacophoric type (hydrogen bond donors/acceptors, hydrophobic and thymine methyl). Each mode comes in three styles to focus the visual analysis on the protein or DNA side of the interface, or on the nucleotide sequence. PDIviz allows for the generation of publication quality images, all calculated data can be written to disk, and a command line interface is provided for automating tasks. The plugin may be helpful for the detailed identification of regions involved in DNA base and shape readout, and can be particularly useful in rapidly pinpointing the overall mode of interaction.

Availability and implementation

Freely available at http://melolab.org/pdiviz/ as a PyMOL plugin. Tested with incentive, educational, and open source versions of PyMOL on Windows, Mac and Linux systems.

Contact

aschueller@bio.puc.cl

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-16 +27517530,The Prevalence and Use of Walking Loops in Neighborhood Parks: A National Study.,"

Background

Previous studies indicate that the design of streets and sidewalks can influence physical activity among residents. Park features also influence park use and park-based physical activity. Although individuals can walk on streets and sidewalks, walking loops in parks offer a setting to walk in nature and to avoid interruptions from traffic.

Objectives

Here we describe the use of walking loops in parks and compare the number of park users and their physical activity in urban neighborhood parks with and without walking loops.

Methods

We analyzed data from the National Study of Neighborhood Parks in which a representative sample of neighborhood parks (n = 174) from 25 U.S. cities with > 100,000 population were observed systematically to document facilities and park users by age group and sex. We compared the number of people and their physical activity in parks with and without walking loops, controlling for multiple factors, including park size, facilities, and population density.

Results

Overall, compared with parks without walking loops, on average during an hourly observation, parks with walking loops had 80% more users (95% CI: 42, 139%), and levels of moderate-to-vigorous physical activity were 90% higher (95% CI: 49, 145%). The additional park use and park-based physical activity occurred not only on the walking loops but throughout the park.

Conclusions

Walking loops may be a promising means of increasing population level physical activity. Further studies are needed to confirm a causal relationship. Citation: Cohen DA, Han B, Evenson KR, Nagel C, McKenzie TL, Marsh T, Williamson S, Harnik P. 2017. The prevalence and use of walking loops in neighborhood parks: a national study. Environ Health Perspect 125:170-174; http://dx.doi.org/10.1289/EHP293.",2016-08-12 +23584835,Identification and prioritization of novel uncharacterized peptidases for biochemical characterization.,"Genome sequencing projects are generating enormous amounts of biological data that require analysis, which in turn identifies genes and proteins that require characterization. Enzymes that act on proteins are especially difficult to characterize because of the time required to distinguish one from another. This is particularly true of peptidases, the enzymes that activate, inactivate and degrade proteins. This article aims to identify clusters of sequences each of which represents the species variants of a single putative peptidase that is widely distributed and is thus merits biochemical characterization. The MEROPS database maintains large collections of sequences, references, substrate cleavage positions and inhibitor interactions of peptidases and their homologues. MEROPS also maintains a hierarchical classification of peptidase homologues, in which sequences are clustered as species variants of a single peptidase; homologous sequences are assembled into a family; and families are clustered into a clan. For each family, an alignment and a phylogenetic tree are generated. By assigning an identifier to a peptidase that has been biochemically characterized from a particular species (called a holotype), the identifier can be automatically extended to sequences from other species that cluster with the holotype. This permits transference of annotation from the holotype to other members of the cluster. By extending this concept to all peptidase homologues (including those of unknown function that have not been characterized) from model organisms representing all the major divisions of cellular life, clusters of sequences representing putative peptidases can also be identified. The 42 most widely distributed of these putative peptidases have been identified and discussed here and are prioritized as ideal candidates for biochemical characterization. Database URL: http://merops.sanger.ac.uk.",2013-04-12 +22606288,"MGEx-Udb: a mammalian uterus database for expression-based cataloguing of genes across conditions, including endometriosis and cervical cancer.","

Background

Gene expression profiling of uterus tissue has been performed in various contexts, but a significant amount of the data remains underutilized as it is not covered by the existing general resources.

Methodology/principal findings

We curated 2254 datasets from 325 uterus related mass scale gene expression studies on human, mouse, rat, cow and pig species. We then computationally derived a 'reliability score' for each gene's expression status (transcribed/dormant), for each possible combination of conditions and locations, based on the extent of agreement or disagreement across datasets. The data and derived information has been compiled into the Mammalian Gene Expression Uterus database (MGEx-Udb, http://resource.ibab.ac.in/MGEx-Udb/). The database can be queried with gene names/IDs, sub-tissue locations, as well as various conditions such as the cervical cancer, endometrial cycles and disorders, and experimental treatments. Accordingly, the output would be a) transcribed and dormant genes listed for the queried condition/location, or b) expression profile of the gene of interest in various uterine conditions. The results also include the reliability score for the expression status of each gene. MGEx-Udb also provides information related to Gene Ontology annotations, protein-protein interactions, transcripts, promoters, and expression status by other sequencing techniques, and facilitates various other types of analysis of the individual genes or co-expressed gene clusters.

Conclusions/significance

In brief, MGEx-Udb enables easy cataloguing of co-expressed genes and also facilitates bio-marker discovery for various uterine conditions.",2012-05-11 +26162598,Exploration of SNP variants affecting hair colour prediction in Europeans.,"DNA profiling is a key tool for forensic analysis; however, current methods identify a suspect either by direct comparison or from DNA database searches. In cases with unidentified suspects, prediction of visible physical traits e.g. pigmentation or hair distribution of the DNA donors can provide important probative information. This study aimed to explore single nucleotide polymorphism (SNP) variants for their effect on hair colour prediction. A discovery panel of 63 SNPs consisting of already established hair colour markers from the HIrisPlex hair colour phenotyping assay as well as additional markers for which associations to human pigmentation traits were previously identified was used to develop multiplex assays based on SNaPshot single-base extension technology. A genotyping study was performed on a range of European populations (n = 605). Hair colour phenotyping was accomplished by matching donor's hair to a graded colour category system of reference shades and photography. Since multiple SNPs in combination contribute in varying degrees to hair colour predictability in Europeans, we aimed to compile a compact marker set that could provide a reliable hair colour inference from the fewest SNPs. The predictive approach developed uses a naïve Bayes classifier to provide hair colour assignment probabilities for the SNP profiles of the key SNPs and was embedded into the Snipper online SNP classifier ( http://mathgene.usc.es/snipper/ ). Results indicate that red, blond, brown and black hair colours are predictable with informative probabilities in a high proportion of cases. Our study resulted in the identification of 12 most strongly associated SNPs to hair pigmentation variation in six genes.",2015-07-11 +23715991,Uniform curation protocol of metazoan signaling pathways to predict novel signaling components.,"A relatively large number of signaling databases available today have strongly contributed to our understanding of signaling pathway properties. However, pathway comparisons both within and across databases are currently severely hampered by the large variety of data sources and the different levels of detail of their information content (on proteins and interactions). In this chapter, we present a protocol for a uniform curation method of signaling pathways, which intends to overcome this insufficiency. This uniformly curated database called SignaLink ( http://signalink.org ) allows us to systematically transfer pathway annotations between different species, based on orthology, and thereby to predict novel signaling pathway components. Thus, this method enables the compilation of a comprehensive signaling map of a given species and identification of new potential drug targets in humans. We strongly believe that the strict curation protocol we have established to compile a signaling pathway database can also be applied for the compilation of other (e.g., metabolic) databases. Similarly, the detailed guide to the orthology-based prediction of novel signaling components across species may also be utilized for predicting components of other biological processes.",2013-01-01 +23284086,TreeTFDB: an integrative database of the transcription factors from six economically important tree crops for functional predictions and comparative and functional genomics.,"Crop plants, whose productivity is affected by a wide range of growing and environmental conditions, are grown for economic purposes. Transcription factors (TFs) play central role in regulation of many biological processes, including plant development and responses to environmental stimuli, by activating or repressing spatiotemporal gene expression. Here, we describe the TreeTFDB (http://treetfdb.bmep.riken.jp/index.pl) that houses the TF repertoires of six economically important tree crop species: Jatropha curcas, papaya, cassava, poplar, castor bean and grapevine. Among these, the TF repertoire of J. curcas has not been reported by any other TF databases. In addition to their basic information, such as sequence and domain features, domain alignments, gene ontology assignment and sequence comparison, information on available full-length cDNAs, identity and positions of all types of known cis-motifs found in the promoter regions, gene expression data are provided. With its newly designed and friendly interface and its unique features, TreeTFDB will enable research community to predict the functions and provide access to available genetic resources for performing comparative and functional genomics of the crop TFs, either individually or at whole family level, in a comprehensive and convenient manner.",2013-01-02 +26169944,miFRame: analysis and visualization of miRNA sequencing data in neurological disorders.,"

Background

While in the past decades nucleic acid analysis has been predominantly carried out using quantitative low- and high-throughput approaches such as qRT-PCR and microarray technology, next-generation sequencing (NGS) with its single base resolution is now frequently applied in DNA and RNA testing. Especially for small non-coding RNAs such as microRNAs there is a need for analysis and visualization tools that facilitate interpretation of the results also for clinicians.

Methods

We developed miFRame, which supports the analysis of human small RNA NGS data. Our tool carries out different data analyses for known as well as predicted novel mature microRNAs from known precursors and presents the results in a well interpretable manner. Analyses include among others expression analysis of precursors and mature miRNAs, detection of novel precursors and detection of potential iso-microRNAs. Aggregation of results from different users moreover allows for evaluation whether remarkable results, such as novel mature miRNAs, are indeed specific for the respective experimental set-up or are frequently detected across a broad range of experiments.

Results

We demonstrate the capabilities of miFRame, which is freely available at http://www.ccb.uni-saarland.de/miframe on two studies, circulating biomarker screening for Multiple Sclerosis (cohort includes clinically isolated syndrome, relapse remitting MS, matched controls) as well as Alzheimer Disease (cohort includes Alzheimer Disease, Mild Cognitive Impairment, matched controls). Here, our tool allowed for an improved biomarker discovery by identifying likely false positive marker candidates.",2015-07-14 +23677940,Identification of hidden relationships from the coupling of hydrophobic cluster analysis and domain architecture information.,"

Motivation

Describing domain architecture is a critical step in the functional characterization of proteins. However, some orphan domains do not match any profile stored in dedicated domain databases and are thereby difficult to analyze.

Results

We present here an original novel approach, called TREMOLO-HCA, for the analysis of orphan domain sequences and inspired from our experience in the use of Hydrophobic Cluster Analysis (HCA). Hidden relationships between protein sequences can be more easily identified from the PSI-BLAST results, using information on domain architecture, HCA plots and the conservation degree of amino acids that may participate in the protein core. This can lead to reveal remote relationships with known families of domains, as illustrated here with the identification of a hidden Tudor tandem in the human BAHCC1 protein and a hidden ET domain in the Saccharomyces cerevisiae Taf14p and human AF9 proteins. The results obtained in such a way are consistent with those provided by HHPRED, based on pairwise comparisons of HHMs. Our approach can, however, be applied even in absence of domain profiles or known 3D structures for the identification of novel families of domains. It can also be used in a reverse way for refining domain profiles, by starting from known protein domain families and identifying highly divergent members, hitherto considered as orphan.

Availability

We provide a possible integration of this approach in an open TREMOLO-HCA package, which is fully implemented in python v2.7 and is available on request. Instructions are available at http://www.impmc.upmc.fr/∼callebau/tremolohca.html.

Contact

isabelle.callebaut@impmc.upmc.fr

Supplementary information

Supplementary Data are available at Bioinformatics online.",2013-05-14 +26484198,"Technical data of the transcriptomic analysis performed on tsetse fly symbionts, Sodalis glossinidius and Wigglesworthia glossinidia, harbored, respectively by non-infected, Trypanosoma brucei gambiense infected and self-cured Glossina palpalis gambiensis tsetse flies.","Microarray is a powerful and cheap method to identify and quantify gene expression in particular in a mix of total RNA extracted from biological samples such as the tsetse fly gut, including several organisms (here, the fly tissue and the intestinal microorganisms). Besides, biostatistics and bioinformatics allow comparing the transcriptomes from samples collected from differently treated flies, and thus to identify and quantify differential expressed genes. Here, we describe in details a whole microarray transcriptome dataset produced from tsetse flies symbionts, Sodalis glossinidius and Wigglesworthia glossinidia. The tsetse fly midguts were sampled at key steps of tsetse fly infection by trypanosomes, 3-day and 10-day sampling times to target differentially expressed genes involved, respectively, in early events associated with trypanosome entry into the midgut and with the establishment of infection; 20 days to target the genes involved in events occurring later in the infection process. We describe in detail the methodology applied for analyzing the microarray data including differential expression as well as functional annotation of the identified symbiont genes. Both the microarray data and design are available at http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE48360;http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE48361;http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE55931.",2015-04-14 +26589273,CellTracker (not only) for dummies.,"

Motivation

Time-lapse experiments play a key role in studying the dynamic behavior of cells. Single-cell tracking is one of the fundamental tools for such analyses. The vast majority of the recently introduced cell tracking methods are limited to fluorescently labeled cells. An equally important limitation is that most software cannot be effectively used by biologists without reasonable expertise in image processing. Here we present CellTracker, a user-friendly open-source software tool for tracking cells imaged with various imaging modalities, including fluorescent, phase contrast and differential interference contrast (DIC) techniques.

Availability and implementation

CellTracker is written in MATLAB (The MathWorks, Inc., USA). It works with Windows, Macintosh and UNIX-based systems. Source code and graphical user interface (GUI) are freely available at: http://celltracker.website/

Contact

horvath.peter@brc.mta.hu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-20 +25813048,RiceNet v2: an improved network prioritization server for rice genes.,"Rice is the most important staple food crop and a model grass for studies of bioenergy crops. We previously published a genome-scale functional network server called RiceNet, constructed by integrating diverse genomics data and demonstrated the use of the network in genetic dissection of rice biotic stress responses and its usefulness for other grass species. Since the initial construction of the network, there has been a significant increase in the amount of publicly available rice genomics data. Here, we present an updated network prioritization server for Oryza sativa ssp. japonica, RiceNet v2 (http://www.inetbio.org/ricenet), which provides a network of 25 765 genes (70.1% of the coding genome) and 1 775 000 co-functional links. Ricenet v2 also provides two complementary methods for network prioritization based on: (i) network direct neighborhood and (ii) context-associated hubs. RiceNet v2 can use genes of the related subspecies O. sativa ssp. indica and the reference plant Arabidopsis for versatility in generating hypotheses. We demonstrate that RiceNet v2 effectively identifies candidate genes involved in rice root/shoot development and defense responses, demonstrating its usefulness for the grass research community.",2015-03-26 +26411867,XLmap: an R package to visualize and score protein structure models based on sites of protein cross-linking.,

Motivation

Chemical cross-linking with mass spectrometry (XL-MS) provides structural information for proteins and protein complexes in the form of crosslinked residue proximity and distance constraints between reactive residues. Utilizing spatial information derived from cross-linked residues can therefore assist with structural modeling of proteins. Selection of computationally derived model structures of proteins remains a major challenge in structural biology. The comparison of site interactions resulting from XL-MS with protein structure contact maps can assist the selection of structural models.

Availability and implementation

XLmap was implemented in R and is freely available at: http://brucelab.gs.washington.edu/software.php.

Contact

jimbruce@uw.edu

Supplementary information

Supplementary data are available at Bioinformatics online.,2015-09-26 +24309102,supraHex: an R/Bioconductor package for tabular omics data analysis using a supra-hexagonal map.,"Biologists are increasingly confronted with the challenge of quickly understanding genome-wide biological data, which usually involve a large number of genomic coordinates (e.g. genes) but a much smaller number of samples. To meet the need for data of this shape, we present an open-source package called 'supraHex' for training, analysing and visualising omics data. This package devises a supra-hexagonal map to self-organise the input data, offers scalable functionalities for post-analysing the map, and more importantly, allows for overlaying additional data for multilayer omics data comparisons. Via applying to DNA replication timing data of mouse embryogenesis, we demonstrate that supraHex is capable of simultaneously carrying out gene clustering and sample correlation, providing intuitive visualisation at each step of the analysis. By overlaying CpG and expression data onto the trained replication-timing map, we also show that supraHex is able to intuitively capture an inherent relationship between late replication, low CpG density promoters and low expression levels. As part of the Bioconductor project, supraHex makes accessible to a wide community in a simple way, what would otherwise be a complex framework for the ultrafast understanding of any tabular omics data, both scientifically and artistically. This package can run on Windows, Mac and Linux, and is freely available together with many tutorials on featuring real examples at http://supfam.org/supraHex.",2013-12-02 +27597435,LMAP: Lightweight Multigene Analyses in PAML.,"

Background

Uncovering how phenotypic diversity arises and is maintained in nature has long been a major interest of evolutionary biologists. Recent advances in genome sequencing technologies have remarkably increased the efficiency to pinpoint genes involved in the adaptive evolution of phenotypes. Reliability of such findings is most often examined with statistical and computational methods using Maximum Likelihood codon-based models (i.e., site, branch, branch-site and clade models), such as those available in codeml from the Phylogenetic Analysis by Maximum Likelihood (PAML) package. While these models represent a well-defined workflow for documenting adaptive evolution, in practice they can be challenging for researchers having a vast amount of data, as multiple types of relevant codon-based datasets are generated, making the overall process hard and tedious to handle, error-prone and time-consuming.

Results

We introduce LMAP (Lightweight Multigene Analyses in PAML), a user-friendly command-line and interactive package, designed to handle the codeml workflow, namely: directory organization, execution, results gathering and organization for Likelihood Ratio Test estimations with minimal manual user intervention. LMAP was developed for the workstation multi-core environment and provides a unique advantage for processing one, or more, if not all codeml codon-based models for multiple datasets at a time. Our software, proved efficiency throughout the codeml workflow, including, but not limited, to simultaneously handling more than 20 datasets.

Conclusions

We have developed a simple and versatile LMAP package, with outstanding performance, enabling researchers to analyze multiple different codon-based datasets in a high-throughput fashion. At minimum, two file types are required within a single input directory: one for the multiple sequence alignment and another for the phylogenetic tree. To our knowledge, no other software combines all codeml codon substitution models of adaptive evolution. LMAP has been developed as an open-source package, allowing its integration into more complex open-source bioinformatics pipelines. LMAP package is released under GPLv3 license and is freely available at http://lmapaml.sourceforge.net/ .",2016-09-06 +23044036,HIV-1 drug resistance genotyping from antiretroviral therapy (ART) naïve and first-line treatment failures in Djiboutian patients.,"In this study we report the prevalence of antiretroviral drug resistant HIV-1 genotypes of virus isolated from Djiboutian patients who failed first-line antiretroviral therapy (ART) and from ART naïve patients.

Patients and methods

A total of 35 blood samples from 16 patients who showed first-line ART failure (>1000 viral genome copies/ml) and 19 ART-naïve patients were collected in Djibouti from October 2009 to December 2009. Both the protease (PR) and reverse transcriptase (RT) genes were amplified and sequenced using National Agency for AIDS Research (ANRS) protocols. The Stanford HIV database algorithm was used for interpretation of resistance data and genotyping.

Results

Among the 16 patients with first-line ART failure, nine (56.2%) showed reverse transcriptase inhibitor-resistant HIV-1 strains: two (12.5%) were resistant to nucleoside (NRTI), one (6.25%) to non-nucleoside (NNRTI) reverse transcriptase inhibitors, and six (37.5%) to both. Analysis of the DNA sequencing data indicated that the most common mutations conferring drug resistance were M184V (38%) for NRTI and K103N (25%) for NNRTI. Only NRTI primary mutations K101Q, K103N and the PI minor mutation L10V were found in ART naïve individuals. No protease inhibitor resistant strains were detected. In our study, we found no detectable resistance in ∼ 44% of all patients who experienced therapeutic failure which was explained by low compliance, co-infection with tuberculosis and malnutrition. Genotyping revealed that 65.7% of samples were infected with subtype C, 20% with CRF02_AG, 8.5% with B, 2.9% with CRF02_AG/C and 2.9% with K/C.

Conclusion

The results of this first study about drug resistance mutations in first-line ART failures show the importance of performing drug resistance mutation test which guides the choice of a second-line regimen. This will improve the management of HIV-infected Djiboutian patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2051206212753973.",2012-10-08 +27153678,"2dSpAn: semiautomated 2-d segmentation, classification and analysis of hippocampal dendritic spine plasticity.","

Motivation

Accurate and effective dendritic spine segmentation from the dendrites remains as a challenge for current neuroimaging research community. In this article, we present a new method (2dSpAn) for 2-d segmentation, classification and analysis of structural/plastic changes of hippocampal dendritic spines. A user interactive segmentation method with convolution kernels is designed to segment the spines from the dendrites. Formal morphological definitions are presented to describe key attributes related to the shape of segmented spines. Spines are automatically classified into one of four classes: Stubby, Filopodia, Mushroom and Spine-head Protrusions.

Results

The developed method is validated using confocal light microscopy images of dendritic spines from dissociated hippocampal cultures for: (i) quantitative analysis of spine morphological changes, (ii) reproducibility analysis for assessment of user-independence of the developed software and (iii) accuracy analysis with respect to the manually labeled ground truth images, and also with respect to the available state of the art. The developed method is monitored and used to precisely describe the morphology of individual spines in real-time experiments, i.e. consequent images of the same dendritic fragment.

Availability and implementation

The software and the source code are available at https://sites.google.com/site/2dspan/ under open-source license for non-commercial use.

Contact

subhadip@cse.jdvu.ac.in or j.wlodarczyk@nencki.gov.pl

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-01 +26264428,ImmunExplorer (IMEX): a software framework for diversity and clonality analyses of immunoglobulins and T cell receptors on the basis of IMGT/HighV-QUEST preprocessed NGS data.,"

Background

Today's modern research of B and T cell antigen receptors (the immunoglobulins (IG) or antibodies and T cell receptors (TR)) forms the basis for detailed analyses of the human adaptive immune system. For instance, insights in the state of the adaptive immune system provide information that is essentially important in monitoring transplantation processes and the regulation of immune suppressiva. In this context, algorithms and tools are necessary for analyzing the IG and TR diversity on nucleotide as well as on amino acid sequence level, identifying highly proliferated clonotypes, determining the diversity of the cell repertoire found in a sample, comparing different states of the human immune system, and visualizing all relevant information.

Results

We here present IMEX, a software framework for the detailed characterization and visualization of the state of human IG and TR repertoires. IMEX offers a broad range of algorithms for statistical analysis of IG and TR data, CDR and V-(D)-J analysis, diversity analysis by calculating the distribution of IG and TR, calculating primer efficiency, and comparing multiple data sets. We use a mathematical model that is able to describe the number of unique clonotypes in a sample taking into account the true number of unique sequences and read errors; we heuristically optimize the parameters of this model. IMEX uses IMGT/HighV-QUEST analysis outputs and includes methods for splitting and merging to enable the submission to this portal and to combine the outputs results, respectively. All calculation results can be visualized and exported.

Conclusion

IMEX is an user-friendly and flexible framework for performing clonality experiments based on CDR and V-(D)-J rearranged regions, diversity analysis, primer efficiency, and various different visualization experiments. Using IMEX, various immunological reactions and alterations can be investigated in detail. IMEX is freely available for Windows and Unix platforms at http://bioinformatics.fh-hagenberg.at/immunexplorer/.",2015-08-12 +22139938,"Gene3D: a domain-based resource for comparative genomics, functional annotation and protein network analysis.","Gene3D http://gene3d.biochem.ucl.ac.uk is a comprehensive database of protein domain assignments for sequences from the major sequence databases. Domains are directly mapped from structures in the CATH database or predicted using a library of representative profile HMMs derived from CATH superfamilies. As previously described, Gene3D integrates many other protein family and function databases. These facilitate complex associations of molecular function, structure and evolution. Gene3D now includes a domain functional family (FunFam) level below the homologous superfamily level assignments. Additions have also been made to the interaction data. More significantly, to help with the visualization and interpretation of multi-genome scale data sets, we have developed a new, revamped website. Searching has been simplified with more sophisticated filtering of results, along with new tools based on Cytoscape Web, for visualizing protein-protein interaction networks, differences in domain composition between genomes and the taxonomic distribution of individual superfamilies.",2011-12-01 +24267918,An efficient method for mining cross-timepoint gene regulation sequential patterns from time course gene expression datasets.,"

Background

Observation of gene expression changes implying gene regulations using a repetitive experiment in time course has become more and more important. However, there is no effective method which can handle such kind of data. For instance, in a clinical/biological progression like inflammatory response or cancer formation, a great number of differentially expressed genes at different time points could be identified through a large-scale microarray approach. For each repetitive experiment with different samples, converting the microarray datasets into transactional databases with significant singleton genes at each time point would allow sequential patterns implying gene regulations to be identified. Although traditional sequential pattern mining methods have been successfully proposed and widely used in different interesting topics, like mining customer purchasing sequences from a transactional database, to our knowledge, the methods are not suitable for such biological dataset because every transaction in the converted database may contain too many items/genes.

Results

In this paper, we propose a new algorithm called CTGR-Span (Cross-Timepoint Gene Regulation Sequential pattern) to efficiently mine CTGR-SPs (Cross-Timepoint Gene Regulation Sequential Patterns) even on larger datasets where traditional algorithms are infeasible. The CTGR-Span includes several biologically designed parameters based on the characteristics of gene regulation. We perform an optimal parameter tuning process using a GO enrichment analysis to yield CTGR-SPs more meaningful biologically. The proposed method was evaluated with two publicly available human time course microarray datasets and it was shown that it outperformed the traditional methods in terms of execution efficiency. After evaluating with previous literature, the resulting patterns also strongly correlated with the experimental backgrounds of the datasets used in this study.

Conclusions

We propose an efficient CTGR-Span to mine several biologically meaningful CTGR-SPs. We postulate that the biologist can benefit from our new algorithm since the patterns implying gene regulations could provide further insights into the mechanisms of novel gene regulations during a biological or clinical progression. The Java source code, program tutorial and other related materials used in this program are available at http://websystem.csie.ncku.edu.tw/CTGR-Span.rar.",2013-09-24 +23521697,Adverse drug events: database construction and in silico prediction.,"Adverse drug events (ADEs) are the harms associated with uses of given medications at normal dosages, which are crucial for a drug to be approved in clinical use or continue to stay on the market. Many ADEs are not identified in trials until the drug is approved for clinical use, which results in adverse morbidity and mortality. To date, millions of ADEs have been reported around the world. Methods to avoid or reduce ADEs are an important issue for drug discovery and development. Here, we reported a comprehensive database of adverse drug events (namely MetaADEDB), which included more than 520,000 drug-ADE associations among 3059 unique compounds (including 1330 drugs) and 13,200 ADE items by data integration and text mining. All compounds and ADEs were annotated with the most commonly used concepts defined in Medical Subject Headings (MeSH). Meanwhile, a computational method, namely the phenotypic network inference model (PNIM), was developed for prediction of potential ADEs based on the database. The area under the receive operating characteristic curve (AUC) is more than 0.9 by 10-fold cross validation, while the AUC value was 0.912 for an external validation set extracted from the US-FDA Adverse Events Reporting System, which indicated that the prediction capability of the method was reliable. MetaADEDB is accessible free of charge at http://www.lmmd.org/online_services/metaadedb/. The database and the method provide us a useful tool to search for known side effects or predict potential side effects for a given drug or compound.",2013-04-08 +22064864,Gene Expression Atlas update--a value-added database of microarray and sequencing-based functional genomics experiments.,"Gene Expression Atlas (http://www.ebi.ac.uk/gxa) is an added-value database providing information about gene expression in different cell types, organism parts, developmental stages, disease states, sample treatments and other biological/experimental conditions. The content of this database derives from curation, re-annotation and statistical analysis of selected data from the ArrayExpress Archive and the European Nucleotide Archive. A simple interface allows the user to query for differential gene expression either by gene names or attributes or by biological conditions, e.g. diseases, organism parts or cell types. Since our previous report we made 20 monthly releases and, as of Release 11.08 (August 2011), the database supports 19 species, which contains expression data measured for 19,014 biological conditions in 136,551 assays from 5598 independent studies.",2011-11-07 +25063568,Online Diagnosis System: a webserver for analysis of Sanger sequencing-based genetic testing data.,"Sanger sequencing is a well-established molecular technique for diagnosis of genetic diseases. In these tests, DNA sequencers produce vast amounts of data that need to be examined and annotated within a short period of time. To achieve this goal, an online bioinformatics platform that can automate the process is essential. However, to date, there is no such integrated bioinformatics platform available. To fulfill this gap, we developed the Online Diagnosis System (ODS), which is a freely available webserver and supports the commonly used file format of Sanger sequencing data. ODS seamlessly integrates base calling, single nucleotide variation (SNV) identification, and SNV annotation into one single platform. It also allows laboratorians to manually inspect the quality of the identified SNVs in the final report. ODS can significantly reduce the data analysis time therefore allows Sanger sequencing-based genetic testing to be finished in a timely manner. ODS is freely available at http://sunlab.lihs.cuhk.edu.hk/ODS/.",2014-07-22 +26955500,"How can we improve Science, Technology, Engineering, and Math education to encourage careers in Biomedical and Pathology Informatics?","The Computer Science, Biology, and Biomedical Informatics (CoSBBI) program was initiated in 2011 to expose the critical role of informatics in biomedicine to talented high school students.[1] By involving them in Science, Technology, Engineering, and Math (STEM) training at the high school level and providing mentorship and research opportunities throughout the formative years of their education, CoSBBI creates a research infrastructure designed to develop young informaticians. Our central premise is that the trajectory necessary to be an expert in the emerging fields of biomedical informatics and pathology informatics requires accelerated learning at an early age.In our 4(th) year of CoSBBI as a part of the University of Pittsburgh Cancer Institute (UPCI) Academy (http://www.upci.upmc.edu/summeracademy/), and our 2nd year of CoSBBI as an independent informatics-based academy, we enhanced our classroom curriculum, added hands-on computer science instruction, and expanded research projects to include clinical informatics. We also conducted a qualitative evaluation of the program to identify areas that need improvement in order to achieve our goal of creating a pipeline of exceptionally well-trained applicants for both the disciplines of pathology informatics and biomedical informatics in the era of big data and personalized medicine.",2016-01-29 +21647451,Discovery of protein phosphorylation motifs through exploratory data analysis.,"

Background

The need for efficient algorithms to uncover biologically relevant phosphorylation motifs has become very important with rapid expansion of the proteomic sequence database along with a plethora of new information on phosphorylation sites. Here we present a novel unsupervised method, called Motif Finder (in short, F-Motif) for identification of phosphorylation motifs. F-Motif uses clustering of sequence information represented by numerical features that exploit the statistical information hidden in some foreground data. Furthermore, these identified motifs are then filtered to find ""actual"" motifs with statistically significant motif scores.

Results and discussion

We have applied F-Motif to several new and existing data sets and compared its performance with two well known state-of-the-art methods. In almost all cases F-Motif could identify all statistically significant motifs extracted by the state-of-the-art methods. More importantly, in addition to this, F-Motif uncovers several novel motifs. We have demonstrated using clues from the literature that most of these new motifs discovered by F-Motif are indeed novel. We have also found some interesting phenomena. For example, for CK2 kinase, the conserved sites appear only on the right side of S. However, for CDK kinase, the adjacent site on the right of S is conserved with residue P. In addition, three different encoding methods, including a novel position contrast matrix (PCM) and the simplest binary coding, are used and the ability of F-motif to discover motifs remains quite robust with respect to encoding schemes.

Conclusions

An iterative algorithm proposed here uses exploratory data analysis to discover motifs from phosphorylated data. The effectiveness of F-Motif has been demonstrated using several real data sets as well as using a synthetic data set. The method is quite general in nature and can be used to find other types of motifs also. We have also provided a server for F-Motif at http://f-motif.classcloud.org/, http://bio.classcloud.org/f-motif/ or http://ymu.classcloud.org/f-motif/.",2011-05-25 +24432028,Multi-omic network signatures of disease.,"To better understand dynamic disease processes, integrated multi-omic methods are needed, yet comparing different types of omic data remains difficult. Integrative solutions benefit experimenters by eliminating potential biases that come with single omic analysis. We have developed the methods needed to explore whether a relationship exists between co-expression network models built from transcriptomic and proteomic data types, and whether this relationship can be used to improve the disease signature discovery process. A naïve, correlation based method is utilized for comparison. Using publicly available infectious disease time series data, we analyzed the related co-expression structure of the transcriptome and proteome in response to SARS-CoV infection in mice. Transcript and peptide expression data was filtered using quality scores and subset by taking the intersection on mapped Entrez IDs. Using this data set, independent co-expression networks were built. The networks were integrated by constructing a bipartite module graph based on module member overlap, module summary correlation, and correlation to phenotypes of interest. Compared to the module level results, the naïve approach is hindered by a lack of correlation across data types, less significant enrichment results, and little functional overlap across data types. Our module graph approach avoids these problems, resulting in an integrated omic signature of disease progression, which allows prioritization across data types for down-stream experiment planning. Integrated modules exhibited related functional enrichments and could suggest novel interactions in response to infection. These disease and platform-independent methods can be used to realize the full potential of multi-omic network signatures. The data (experiment SM001) are publically available through the NIAID Systems Virology (https://www.systemsvirology.org) and PNNL (http://omics.pnl.gov) web portals. Phenotype data is found in the supplementary information. The ProCoNA package is available as part of Bioconductor 2.13.",2014-01-07 +25910279,Satellite-Based Estimates of Long-Term Exposure to Fine Particles and Association with Mortality in Elderly Hong Kong Residents.,"

Background

A limited number of studies on long-term effects of particulate matter with aerodynamic diameter < 2.5 μm (PM2.5) on health suggest it can be an important cause of morbidity and mortality. In Asia where air quality is poor and deteriorating, local data on long-term effects of PM2.5 to support policy on air quality management are scarce.

Objectives

We assessed long-term effects of PM2.5 on the mortality in a single Asian city.

Methods

For 10-13 years, we followed up a cohort of 66,820 participants ≥ 65 years of age who were enrolled and interviewed in all 18 Elderly Health Centres of the Department of Health, Hong Kong, in 1998-2001. Their residential addresses were geocoded into x- and y-coordinates, and their proxy exposures to PM2.5 at their addresses in 1 × 1 km grids were estimated from the U.S. National Aeronautics and Space Administration (NASA) satellite data. We used Cox regression models to calculate hazard ratios (HRs) of mortality associated with PM2.5.

Results

Mortality HRs per 10-μg/m3 increase in PM2.5 were 1.14 (95% CI: 1.07, 1.22) for all natural causes, 1.22 (95% CI: 1.08, 1.39) for cardiovascular causes, 1.42 (95% CI: 1.16, 1.73) for ischemic heart disease, 1.24 (95% CI: 1.00, 1.53) for cerebrovascular disease, and 1.05 (95% CI: 0.90, 1.22) for respiratory causes.

Conclusions

Our methods in using NASA satellite data provide a readily accessible and affordable approach to estimation of a sufficient range of individual PM2.5 exposures in a single city. This approach can expand the capacity to conduct environmental accountability studies in areas with few measurements of fine particles.

Citation

Wong CM, Lai HK, Tsang H, Thach TQ, Thomas GN, Lam KB, Chan KP, Yang L, Lau AK, Ayres JG, Lee SY, Chan WM, Hedley AJ, Lam TH. 2015. Satellite-based estimates of long-term exposure to fine particles and association with mortality in elderly Hong Kong residents. Environ Health Perspect 123:1167-1172; http://dx.doi.org/10.1289/ehp.1408264.",2015-04-24 +27402904,Towards the knowledge-based design of universal influenza epitope ensemble vaccines.,"

Motivation

Influenza A viral heterogeneity remains a significant threat due to unpredictable antigenic drift in seasonal influenza and antigenic shifts caused by the emergence of novel subtypes. Annual review of multivalent influenza vaccines targets strains of influenza A and B likely to be predominant in future influenza seasons. This does not induce broad, cross protective immunity against emergent subtypes. Better strategies are needed to prevent future pandemics. Cross-protection can be achieved by activating CD8+ and CD4+ T cells against highly conserved regions of the influenza genome. We combine available experimental data with informatics-based immunological predictions to help design vaccines potentially able to induce cross-protective T-cells against multiple influenza subtypes.

Results

To exemplify our approach we designed two epitope ensemble vaccines comprising highly conserved and experimentally verified immunogenic influenza A epitopes as putative non-seasonal influenza vaccines; one specifically targets the US population and the other is a universal vaccine. The USA-specific vaccine comprised 6 CD8+ T cell epitopes (GILGFVFTL, FMYSDFHFI, GMDPRMCSL, SVKEKDMTK, FYIQMCTEL, DTVNRTHQY) and 3 CD4+ epitopes (KGILGFVFTLTVPSE, EYIMKGVYINTALLN, ILGFVFTLTVPSERG). The universal vaccine comprised 8 CD8+ epitopes: (FMYSDFHFI, GILGFVFTL, ILRGSVAHK, FYIQMCTEL, ILKGKFQTA, YYLEKANKI, VSDGGPNLY, YSHGTGTGY) and the same 3 CD4+ epitopes. Our USA-specific vaccine has a population protection coverage (portion of the population potentially responsive to one or more component epitopes of the vaccine, PPC) of over 96 and 95% coverage of observed influenza subtypes. The universal vaccine has a PPC value of over 97 and 88% coverage of observed subtypes.

Availability and implementation

http://imed.med.ucm.es/Tools/episopt.html CONTACT: d.r.flower@aston.ac.uk.",2016-07-10 +25799103,VDJSeq-Solver: in silico V(D)J recombination detection tool.,"In this paper we present VDJSeq-Solver, a methodology and tool to identify clonal lymphocyte populations from paired-end RNA Sequencing reads derived from the sequencing of mRNA neoplastic cells. The tool detects the main clone that characterises the tissue of interest by recognizing the most abundant V(D)J rearrangement among the existing ones in the sample under study. The exact sequence of the clone identified is capable of accounting for the modifications introduced by the enzymatic processes. The proposed tool overcomes limitations of currently available lymphocyte rearrangements recognition methods, working on a single sequence at a time, that are not applicable to high-throughput sequencing data. In this work, VDJSeq-Solver has been applied to correctly detect the main clone and identify its sequence on five Mantle Cell Lymphoma samples; then the tool has been tested on twelve Diffuse Large B-Cell Lymphoma samples. In order to comply with the privacy, ethics and intellectual property policies of the University Hospital and the University of Verona, data is available upon request to supporto.utenti@ateneo.univr.it after signing a mandatory Materials Transfer Agreement. VDJSeq-Solver JAVA/Perl/Bash software implementation is free and available at http://eda.polito.it/VDJSeq-Solver/.",2015-03-23 +26170513,MAD Bayes for Tumor Heterogeneity - Feature Allocation with Exponential Family Sampling.,"We propose small-variance asymptotic approximations for inference on tumor heterogeneity (TH) using next-generation sequencing data. Understanding TH is an important and open research problem in biology. The lack of appropriate statistical inference is a critical gap in existing methods that the proposed approach aims to fill. We build on a hierarchical model with an exponential family likelihood and a feature allocation prior. The proposed implementation of posterior inference generalizes similar small-variance approximations proposed by Kulis and Jordan (2012) and Broderick et.al (2012b) for inference with Dirichlet process mixture and Indian buffet process prior models under normal sampling. We show that the new algorithm can successfully recover latent structures of different haplotypes and subclones and is magnitudes faster than available Markov chain Monte Carlo samplers. The latter are practically infeasible for high-dimensional genomics data. The proposed approach is scalable, easy to implement and benefits from the exibility of Bayesian nonparametric models. More importantly, it provides a useful tool for applied scientists to estimate cell subtypes in tumor samples. R code is available on http://www.ma.utexas.edu/users/yxu/.",2015-03-01 +23842461,Managing the data deluge: data-driven GO category assignment improves while complexity of functional annotation increases.,"The available curated data lag behind current biological knowledge contained in the literature. Text mining can assist biologists and curators to locate and access this knowledge, for instance by characterizing the functional profile of publications. Gene Ontology (GO) category assignment in free text already supports various applications, such as powering ontology-based search engines, finding curation-relevant articles (triage) or helping the curator to identify and encode functions. Popular text mining tools for GO classification are based on so called thesaurus-based--or dictionary-based--approaches, which exploit similarities between the input text and GO terms themselves. But their effectiveness remains limited owing to the complex nature of GO terms, which rarely occur in text. In contrast, machine learning approaches exploit similarities between the input text and already curated instances contained in a knowledge base to infer a functional profile. GO Annotations (GOA) and MEDLINE make possible to exploit a growing amount of curated abstracts (97 000 in November 2012) for populating this knowledge base. Our study compares a state-of-the-art thesaurus-based system with a machine learning system (based on a k-Nearest Neighbours algorithm) for the task of proposing a functional profile for unseen MEDLINE abstracts, and shows how resources and performances have evolved. Systems are evaluated on their ability to propose for a given abstract the GO terms (2.8 on average) used for curation in GOA. We show that since 2006, although a massive effort was put into adding synonyms in GO (+300%), our thesaurus-based system effectiveness is rather constant, reaching from 0.28 to 0.31 for Recall at 20 (R20). In contrast, thanks to its knowledge base growth, our machine learning system has steadily improved, reaching from 0.38 in 2006 to 0.56 for R20 in 2012. Integrated in semi-automatic workflows or in fully automatic pipelines, such systems are more and more efficient to provide assistance to biologists. DATABASE URL: http://eagl.unige.ch/GOCat/",2013-07-09 +27679839,Social Networking Services-Based Communicative Care for Patients with Diabetes Mellitus in Korea.,"

Objectives

Social networking services (SNS)-based online communities are good examples of improving quality of care by incorporating information technology into medicine. Therefore, we created an SNS-based community care webpage for communication among patients with diabetes mellitus (DM). We aimed to identify what diabetic patients wanted to know and were interested in by analyzing their posts and classified content in which users were interested.

Methods

As opposed to the existing physician-focused health information websites, we built a patient-focused experience exchange website, ""I love insulin (http://www.iloveinsulin.co.kr)."" The DM communication webpage was divided into ""My Web-Chart,"" ""My community-free board,"" and ""Life & Health."" The contents analysis targeted users' postings, and replies were classified by theme from May 2012 to June 2013. The data included number of questions asked, answers, and question-to-answer (QA) ratio in each category.

Results

A total of 264 patients registered on the ""I Love Insulin"" website. The most frequent topic of posts classified as questions were about diabetes itself (23%), diet (22%), and glucose levels (19%). Conversely, most answers and information provided by users were about daily life with no relationship to diabetes mellitus (54%). While there were many questions about diet, there were very few answers (2%). Whereas there was much provision of knowledge about general DM, sharing diet information was rare. The ratios of answers to questions on diet (ratio=0.059, 1/17), glucose level (ratio=0.067, 1/15), insulin regulation (ratio=0.222, 2/9) and webpage (ratio=0.167, 1/6) were significantly low compared to DM itself (all p < 0.001).

Discussion

Patients in Korea with DM tend to have insufficient knowledge about diet and insulin regulation; continuously providing diet and insulin regulation information are desirable. It is hoped that the patients would be motivated to participate actively by ""knowledge sharing."" Through this process, patients learn about their diseases not from the physicians but from among themselves.",2016-09-28 +23736950,Increased consumption of fruit and vegetables for the primary prevention of cardiovascular diseases.,"

Editorial note

This Cochrane Review has been superseded by a review entitled Vegan dietary pattern for the primary and secondary prevention of cardiovascular diseases (https://www.cochranelibrary.com/cdsr/doi/10.1002/14651858.CD013501.pub2/full) https://pubmed.ncbi.nlm.nih.gov/33629376/

Background

There is increasing evidence that high consumption of fruit and vegetables is beneficial for cardiovascular disease (CVD) prevention.

Objectives

The primary objective is to determine the effectiveness of i) advice to increase fruit and vegetable consumption ii) the provision of fruit and vegetables to increase consumption, for the primary prevention of CVD.

Search methods

We searched the following electronic databases: The Cochrane Library (2012, issue 9-CENTRAL, HTA, DARE, NEED), MEDLINE (1946 to week 3 September 2012); EMBASE (1980 to 2012 week 39) and the Conference Proceedings Citation Index - Science on ISI Web of Science (5 October 2012). We searched trial registers, screened reference lists and contacted authors for additional information where necessary. No language restrictions were applied.

Selection criteria

Randomised controlled trials with at least three months follow-up (follow-up was considered to be the time elapsed since the start of the intervention) involving healthy adults or those at high risk of CVD. Trials investigated either advice to increase fruit and vegetable intake (via any source or modality) or the provision of fruit and vegetables to increase intake. The comparison group was no intervention or minimal intervention. Outcomes of interest were CVD clinical events (mortality (CVD and all-cause), myocardial infarction (MI), coronary artery bypass grafting (CABG) or percutaneous transluminal coronary angioplasty (PTCA), angiographically-defined angina pectoris, stroke, carotid endarterectomy, peripheral arterial disease (PAD)) and major CVD risk factors (blood pressure, blood lipids, type 2 diabetes). Trials involving multifactorial lifestyle interventions (including different dietary patterns, exercise) or where the focus was weight loss were excluded to avoid confounding.

Data collection and analysis

Two review authors independently selected trials for inclusion, extracted data and assessed the risk of bias. Trials of provision of fruit and vegetables were analysed separately from trials of dietary advice.

Main results

We identified 10 trials with a total of 1730 participants randomised, and one ongoing trial. Six trials investigated the provision of fruit and vegetables, and four trials examined advice to increase fruit and vegetable consumption.The ongoing trial is examining the provision of an avocado-rich diet.The number and type of intervention components for provision, and the dietary advice provided differed between trials.None of the trials reported clinical events as they were all relatively short term. There was no strong evidence for effects of individual trials of provision of fruit and vegetables on cardiovascular risk factors, but trials were heterogeneous and short term. Furthermore, five of the six trials only provided one fruit or vegetable. Dietary advice showed some favourable effects on blood pressure (systolic blood pressure (SBP): mean difference (MD) -3.0 mmHg (95% confidence interval (CI) -4.92 to -1.09), diastolic blood pressure (DBP): MD -0.90 mmHg (95% CI -2.03 to 0.24)) and low-density lipoprotein (LDL) cholesterol but analyses were based on only two trials. Three of the 10 included trials examined adverse effects, which included increased bowel movements, bad breath and body odour.

Authors' conclusions

There are very few studies to date examining provision of, or advice to increase the consumption of, fruit and vegetables in the absence of additional dietary interventions or other lifestyle interventions for the primary prevention of CVD. The limited evidence suggests advice to increase fruit and vegetables as a single intervention has favourable effects on CVD risk factors but more trials are needed to confirm this.",2013-06-04 +26147120,Comparison of Gene Coexpression Profiles and Construction of Conserved Gene Networks to Find Functional Modules.,"

Background

Computational approaches toward gene annotation are a formidable challenge, now that many genome sequences have been determined. Each gene has its own function, but complicated cellular functions are achieved by sets of genes. Therefore, sets of genes with strong functional relationships must be identified. For this purpose, the similarities of gene expression patterns and gene sequences have been separately utilized, although the combined information will provide a better solution.

Result & discussion

We propose a new method to find functional modules, by comparing gene coexpression profiles among species. A coexpression pattern is represented as a list of coexpressed genes with each guide gene. We compared two coexpression lists, one from a human guide gene and the other from a homologous mouse gene, and defined a measure to evaluate the similarity between the lists. Based on this coexpression similarity, we detected the highly conserved genes, and constructed human gene networks with conserved coexpression between human and mouse. Some of the tightly coupled genes (modules) showed clear functional enrichment, such as immune system and cell cycle, indicating that our method could identify functionally related genes without any prior knowledge. We also found a few functional modules without any annotations, which may be good candidates for novel functional modules. All of the comparisons are available at the http://v1.coxsimdb.info web database.",2015-07-06 +26968364,Analysis of the interface variability in NMR structure ensembles of protein-protein complexes.,"NMR structures consist in ensembles of conformers, all satisfying the experimental restraints, which exhibit a certain degree of structural variability. We analyzed here the interface in NMR ensembles of protein-protein heterodimeric complexes and found it to span a wide range of different conservations. The different exhibited conservations do not simply correlate with the size of the systems/interfaces, and are most probably the result of an interplay between different factors, including the quality of experimental data and the intrinsic complex flexibility. In any case, this information is not to be missed when NMR structures of protein-protein complexes are analyzed; especially considering that, as we also show here, the first NMR conformer is usually not the one which best reflects the overall interface. To quantify the interface conservation and to analyze it, we used an approach originally conceived for the analysis and ranking of ensembles of docking models, which has now been extended to directly deal with NMR ensembles. We propose this approach, based on the conservation of the inter-residue contacts at the interface, both for the analysis of the interface in whole ensembles of NMR complexes and for the possible selection of a single conformer as the best representative of the overall interface. In order to make the analyses automatic and fast, we made the protocol available as a web tool at: https://www.molnac.unisa.it/BioTools/consrank/consrank-nmr.html.",2016-03-09 +25213199,GSAASeqSP: a toolset for gene set association analysis of RNA-Seq data.,"RNA-Seq is quickly becoming the preferred method for comprehensively characterizing whole transcriptome activity, and the analysis of count data from RNA-Seq requires new computational tools. We developed GSAASeqSP, a novel toolset for genome-wide gene set association analysis of sequence count data. This toolset offers a variety of statistical procedures via combinations of multiple gene-level and gene set-level statistics, each having their own strengths under different sample and experimental conditions. These methods can be employed independently, or results generated from multiple or all methods can be integrated to determine more robust profiles of significantly altered biological pathways. Using simulations, we demonstrate the ability of these methods to identify association signals and to measure the strength of the association. We show that GSAASeqSP analyses of RNA-Seq data from diverse tissue samples provide meaningful insights into the biological mechanisms that differentiate these samples. GSAASeqSP is a powerful platform for investigating molecular underpinnings of complex traits and diseases arising from differential activity within the biological pathways. GSAASeqSP is available at http://gsaa.unc.edu.",2014-09-12 +25861968,MetaSV: an accurate and integrative structural-variant caller for next generation sequencing.,"

Unlabelled

Structural variations (SVs) are large genomic rearrangements that vary significantly in size, making them challenging to detect with the relatively short reads from next-generation sequencing (NGS). Different SV detection methods have been developed; however, each is limited to specific kinds of SVs with varying accuracy and resolution. Previous works have attempted to combine different methods, but they still suffer from poor accuracy particularly for insertions. We propose MetaSV, an integrated SV caller which leverages multiple orthogonal SV signals for high accuracy and resolution. MetaSV proceeds by merging SVs from multiple tools for all types of SVs. It also analyzes soft-clipped reads from alignment to detect insertions accurately since existing tools underestimate insertion SVs. Local assembly in combination with dynamic programming is used to improve breakpoint resolution. Paired-end and coverage information is used to predict SV genotypes. Using simulation and experimental data, we demonstrate the effectiveness of MetaSV across various SV types and sizes.

Availability and implementation

Code in Python is at http://bioinform.github.io/metasv/.

Contact

rd@bina.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-10 +26208256,2D map projections for visualization and quantitative analysis of 3D fluorescence micrographs.,"We introduce Map3-2D, a freely available software to accurately project up to five-dimensional (5D) fluorescence microscopy image data onto full-content 2D maps. Similar to the Earth's projection onto cartographic maps, Map3-2D unfolds surface information from a stack of images onto a single, structurally connected map. We demonstrate its applicability for visualization and quantitative analyses of spherical and uneven surfaces in fixed and dynamic live samples by using mammalian and yeast cells, and giant unilamellar vesicles. Map3-2D software is available at http://www.zmbh.uni-heidelberg.de//Central_Services/Imaging_Facility/Map3-2D.html.",2015-07-24 +26968363,"Structure of γ-tubulin small complex based on a cryo-EM map, chemical cross-links, and a remotely related structure.","Modeling protein complex structures based on distantly related homologues can be challenging due to poor sequence and structure conservation. Therefore, utilizing even low-resolution experimental data can significantly increase model precision and accuracy. Here, we present models of the two key functional states of the yeast γ-tubulin small complex (γTuSC): one for the low-activity ""open"" state and another for the higher-activity ""closed"" state. Both models were computed based on remotely related template structures and cryo-EM density maps at 6.9Å and 8.0Å resolution, respectively. For each state, extensive sampling of alignments and conformations was guided by the fit to the corresponding cryo-EM density map. The resulting good-scoring models formed a tightly clustered ensemble of conformations in most regions. We found significant structural differences between the two states, primarily in the γ-tubulin subunit regions where the microtubule binds. We also report a set of chemical cross-links that were found to be consistent with equilibrium between the open and closed states. The protocols developed here have been incorporated into our open-source Integrative Modeling Platform (IMP) software package (http://integrativemodeling.org), and can therefore be applied to many other systems.",2016-03-08 +22058127,MACiE: exploring the diversity of biochemical reactions.,"MACiE (which stands for Mechanism, Annotation and Classification in Enzymes) is a database of enzyme reaction mechanisms, and can be accessed from http://www.ebi.ac.uk/thornton-srv/databases/MACiE/. This article presents the release of Version 3 of MACiE, which not only extends the dataset to 335 entries, covering 182 of the EC sub-subclasses with a crystal structure available (~90%), but also incorporates greater chemical and structural detail. This version of MACiE represents a shift in emphasis for new entries, from non-homologous representatives covering EC reaction space to enzymes with mechanisms of interest to our users and collaborators with a view to exploring the chemical diversity of life. We present new tools for exploring the data in MACiE and comparing entries as well as new analyses of the data and new searches, many of which can now be accessed via dedicated Perl scripts.",2011-11-03 +24512253,COV2HTML: a visualization and analysis tool of bacterial next generation sequencing (NGS) data for postgenomics life scientists.,"COV2HTML is an interactive web interface, which is addressed to biologists, and allows performing both coverage visualization and analysis of NGS alignments performed on prokaryotic organisms (bacteria and phages). It combines two processes: a tool that converts the huge NGS mapping or coverage files into light specific coverage files containing information on genetic elements; and a visualization interface allowing a real-time analysis of data with optional integration of statistical results. To demonstrate the scope of COV2HTML, the program was tested with data from two published studies. The first data were from RNA-seq analysis of Campylobacter jejuni, based on comparison of two conditions with two replicates. We were able to recover 26 out of 27 genes highlighted in the publication using COV2HTML. The second data comprised of stranded TSS and RNA-seq data sets on the Archaea Sulfolobus solfataricus. COV2HTML was able to highlight most of the TSSs from the article and allows biologists to visualize both TSS and RNA-seq on the same screen. The strength of the COV2HTML interface is making possible NGS data analysis without software installation, login, or a long training period. A web version is accessible at https://mmonot.eu/COV2HTML/ . This website is free and open to users without any login requirement.",2014-02-10 +26107650,Construction of an Ultrahigh Pressure Liquid Chromatography-Tandem Mass Spectral Library of Plant Natural Products and Comparative Spectral Analyses.,"A plant natural product tandem mass spectral library has been constructed using authentic standards and purified compounds. Currently, the library contains 1734 tandem mass spectra for 289 compounds, with the majority (76%) of the compounds being plant phenolics such as flavonoids, isoflavonoids, and phenylpropanoids. Tandem mass spectra and chromatographic retention data were acquired on a triple quadrupole mass spectrometer coupled to an ultrahigh pressure liquid chromatograph using six different collision energies (CEs) (10-60 eV). Comparative analyses of the tandem mass spectral data revealed that the loss of ring substituents preceded the C-ring opening during the fragmentation of flavonoids and isoflavonoids. At lower CE (i.e., 10 and 20 eV), the flavonoids and isoflavonoid central ring structures typically remained intact, and fragmentation was characterized by the loss of the substituents (i.e., methyl and glycosyl groups). At higher CE, the flavonoid and isoflavonoid core ring systems underwent C-ring cleavage and/or rearrangement depending on the structure, particularly hydroxylation patterns. In-source electrochemical oxidation was observed for phenolics that had ortho-diphenol moieties (i.e., vicinal hydroxyl groups on the aromatic rings). The ortho-diphenols were oxidized to ortho-quinones, yielding an intensive and, in most cases, a base ion peak corresponding to a [(M - 2H) - H](-) ion in their mass spectra. The library also contains reverse-phase retention times, allowing for the construction, validation, and testing of an artificial neural network retention prediction of other flavonoids and isoflavonoids not contained within the library. The library is freely available for nonprofit, academic use and it can be downloaded at http://www.noble.org/apps/Scientific/WebDownloadManager/DownloadArea.aspx.",2015-07-08 +27586368,Early-Life Exposure to Perfluoroalkyl Substances and Childhood Metabolic Function.,"

Background

Perfluoroalkyl substances (PFASs) are synthetic chemicals that may persist in the environment and in humans. There is a possible association between early-life PFAS exposure and metabolic dysfunction in later life, but data are limited.

Methods

We studied 665 mother-child pairs in Project Viva, a Boston, Massachusetts-area cohort recruited 1999-2002. We quantified concentrations of PFASs [perfluorooctanoate (PFOA), perfluorooctane sulfonate (PFOS), perfluorononanoate (PFNA), perfluorohexane sulfonate (PFHxS), and perfluorodecanoate (PFDeA)] in maternal plasma collected at the first prenatal visit (median, 9.6 weeks gestation) and in child plasma from the mid-childhood research visit (median, 7.7 years). We assessed leptin, adiponectin, and homeostatic model assessment of insulin resistance (HOMA-IR) in mid-childhood. We fit covariate-adjusted linear regression models and conducted stratified analyses by child sex.

Results

Children with higher PFAS concentrations had lower HOMA-IR [e.g., -10.1% (95% CI: -17.3, -2.3) per interquartile range increment in PFOA]. This inverse association between child PFAS and HOMA-IR was more pronounced in females [e.g., PFOA: -15.6% (95% CI: -25.4, -4.6) vs. -6.1% (95% CI: -16.2, 5.2) for males]. Child PFAS plasma concentrations were not associated with leptin or adiponectin. Prenatal PFAS plasma concentrations were not associated with leptin, adiponectin, or HOMA-IR in offspring.

Conclusions

We found no evidence for an adverse effect of early-life PFAS exposure on metabolic function in mid-childhood. In fact, children with higher PFAS concentrations had lower insulin resistance. Citation: Fleisch AF, Rifas-Shiman SL, Mora AM, Calafat AM, Ye X, Luttmann-Gibson H, Gillman MW, Oken E, Sagiv SK. 2017. Early-life exposure to perfluoroalkyl substances and childhood metabolic function. Environ Health Perspect 125:481-487; http://dx.doi.org/10.1289/EHP303.",2016-09-02 +26687838,Purification and Characterization of Progenitor and Mature Human Astrocytes Reveals Transcriptional and Functional Differences with Mouse.,"The functional and molecular similarities and distinctions between human and murine astrocytes are poorly understood. Here, we report the development of an immunopanning method to acutely purify astrocytes from fetal, juvenile, and adult human brains and to maintain these cells in serum-free cultures. We found that human astrocytes have abilities similar to those of murine astrocytes in promoting neuronal survival, inducing functional synapse formation, and engulfing synaptosomes. In contrast to existing observations in mice, we found that mature human astrocytes respond robustly to glutamate. Next, we performed RNA sequencing of healthy human astrocytes along with astrocytes from epileptic and tumor foci and compared these to human neurons, oligodendrocytes, microglia, and endothelial cells (available at http://www.brainrnaseq.org). With these profiles, we identified novel human-specific astrocyte genes and discovered a transcriptome-wide transformation between astrocyte precursor cells and mature post-mitotic astrocytes. These data represent some of the first cell-type-specific molecular profiles of the healthy and diseased human brain.",2015-12-10 +24990603,GATB: Genome Assembly & Analysis Tool Box.,"

Motivation

Efficient and fast next-generation sequencing (NGS) algorithms are essential to analyze the terabytes of data generated by the NGS machines. A serious bottleneck can be the design of such algorithms, as they require sophisticated data structures and advanced hardware implementation.

Results

We propose an open-source library dedicated to genome assembly and analysis to fasten the process of developing efficient software. The library is based on a recent optimized de-Bruijn graph implementation allowing complex genomes to be processed on desktop computers using fast algorithms with low memory footprints.

Availability and implementation

The GATB library is written in C++ and is available at the following Web site http://gatb.inria.fr under the A-GPL license.

Contact

lavenier@irisa.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-01 +27587680,A unified model based multifactor dimensionality reduction framework for detecting gene-gene interactions.,"

Motivation

Gene-gene interaction (GGI) is one of the most popular approaches for finding and explaining the missing heritability of common complex traits in genome-wide association studies. The multifactor dimensionality reduction (MDR) method has been widely studied for detecting GGI effects. However, there are several disadvantages of the existing MDR-based approaches, such as the lack of an efficient way of evaluating the significance of multi-locus models and the high computational burden due to intensive permutation. Furthermore, the MDR method does not distinguish marginal effects from pure interaction effects.

Methods

We propose a two-step unified model based MDR approach (UM-MDR), in which, the significance of a multi-locus model, even a high-order model, can be easily obtained through a regression framework with a semi-parametric correction procedure for controlling Type I error rates. In comparison to the conventional permutation approach, the proposed semi-parametric correction procedure avoids heavy computation in order to achieve the significance of a multi-locus model. The proposed UM-MDR approach is flexible in the sense that it is able to incorporate different types of traits and evaluate significances of the existing MDR extensions.

Results

The simulation studies and the analysis of a real example are provided to demonstrate the utility of the proposed method. UM-MDR can achieve at least the same power as MDR for most scenarios, and it outperforms MDR especially when there are some single nucleotide polymorphisms that only have marginal effects, which masks the detection of causal epistasis for the existing MDR approaches.

Conclusions

UM-MDR provides a very good supplement of existing MDR method due to its efficiency in achieving significance for every multi-locus model, its power and its flexibility of handling different types of traits.

Availability and implementation

A R package ""umMDR"" and other source codes are freely available at http://statgen.snu.ac.kr/software/umMDR/ CONTACT: tspark@stats.snu.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-09-01 +27539611,Diastolic Backward-Traveling Decompression (Suction) Wave Correlates With Simultaneously Acquired Indices of Diastolic Function and Is Reduced in Left Ventricular Stunning. ,"Wave intensity analysis can distinguish proximal (propulsion) and distal (suction) influences on coronary blood flow and is purported to reflect myocardial performance and microvascular function. Quantifying the amplitude of the peak, backwards expansion wave (BEW) may have clinical utility. However, simultaneously acquired wave intensity analysis and left ventricular (LV) pressure-volume loop data, confirming the origin and effect of myocardial function on the BEW in humans, have not been previously reported. Patients with single-vessel left anterior descending coronary disease and normal ventricular function (n=13) were recruited prospectively. We simultaneously measured LV function with a conductance catheter and derived wave intensity analysis using a pressure-low velocity guidewire at baseline and again 30 minutes after a 1-minute coronary balloon occlusion. The peak BEW correlated with the indices of diastolic LV function: LV dP/dtmin (rs=-0.59; P=0.002) and τ (rs=-0.59; P=0.002), but not with systolic function. In 12 patients with paired measurements 30 minutes post balloon occlusion, LV dP/dtmax decreased from 1437.1±163.9 to 1299.4±152.9 mm Hg/s (median difference, -110.4 [-183.3 to -70.4]; P=0.015) and τ increased from 48.3±7.4 to 52.4±7.9 ms (difference, 4.1 [1.3-6.9]; P=0.01), but basal average peak coronary flow velocity was unchanged, indicating LV stunning post balloon occlusion. However, the peak BEW amplitude decreased from -9.95±5.45 W·m(-2)/s(2)×10(5) to -7.52±5.00 W·m(-2)/s(2)×10(5) (difference 2.43×10(5) [0.20×10(5) to 4.67×10(5); P=0.04]). Peak BEW assessed by coronary wave intensity analysis correlates with invasive indices of LV diastolic function and mirrors changes in LV diastolic function confirming the origin of the suction wave. This may have implications for physiological lesion assessment after percutaneous coronary intervention. URL: http://www.isrctn.org. Unique identifier: ISRCTN42864201.",2016-09-01 +26140380,Dexamethasone affects mouse olfactory mucosa gene expression and attenuates genes related to neurite outgrowth.,"

Background

Olfaction is one of the important senses for humans. Systemic glucocorticoids are the most commonly used medications for olfactory loss because of their strong anti-inflammatory effects. However, their effect on olfactory function is still controversial and the precise mechanism is not clear. To gain a global view of the effect of systematic glucocorticoid treatment on gene expression in the olfactory mucosa (OM), we profiled these changes in a murine model of olfaction in order to identify underlying molecular mechanisms.

Methods

C57BL/6 mice were injected daily for 2 weeks (WK2) with dexamethasone (DEX, intraperitoneally, 1 mg/kg body weight) vs 1 day of DEX (D1) vs controls, which received saline (Ctrl) (n = 9/group). Total RNA from the OM was used to analyze global gene expression. Genes showing changes in expression were compared using the Database for Annotation, Visualization and Integrated Discovery (DAVID, v6.7) and the General Olfactory Sensitivity Database (GOSdb; http://genome.weizmann.ac.il/GOSdb).

Results

Between the WK2 and Ctrl groups, 3351 genes were differentially expressed, of which 236 genes were related to olfactory function. Genes involved in axon guidance, cell projection, and inflammation were enriched and overlapped significantly with those in the GOSdb.

Conclusion

Systemic glucocorticoids exert effects on transcription of a notable number of genes in the OM and appear to orchestrate changes related to axon guidance, cell projection, and inflammation. Further examination may allow targeted therapies that lack the side effects of this category of medication.",2015-07-03 +27384154,Early Postnatal Manganese Exposure Causes Lasting Impairment of Selective and Focused Attention and Arousal Regulation in Adult Rats.,"

Background

Studies in children and adolescents have associated early developmental manganese (Mn) exposure with inattention, impulsivity, hyperactivity, and oppositional behaviors, but causal inferences are precluded by the correlational nature of the data and generally limited control for potential confounders.

Objectives

To determine whether early postnatal oral Mn exposure causes lasting attentional and impulse control deficits in adulthood, and whether continued lifelong Mn exposure exacerbates these effects, using a rat model of environmental Mn exposure.

Methods

Neonates were exposed orally to 0, 25 or 50 mg Mn/kg/day during early postnatal life (PND 1-21) or throughout life from PND 1 until the end of the study. In adulthood, the animals were tested on a series of learning and attention tasks using the five-choice serial reaction time task.

Results

Early postnatal Mn exposure caused lasting attentional dysfunction due to impairments in attentional preparedness, selective attention, and arousal regulation, whereas associative ability (learning) and impulse control were spared. The presence and severity of these deficits varied with the dose and duration of Mn exposure.

Conclusions

This study is the first to show that developmental Mn exposure can cause lasting impairments in focused and selective attention and arousal regulation, and to identify the specific nature of the impairments. Given the importance of attention and arousal regulation in cognitive functioning, these findings substantiate concerns about the adverse effects of developmental Mn exposure in humans. Citation: Beaudin SA, Strupp BJ, Strawderman M, Smith DR. 2017. Early postnatal manganese exposure causes lasting impairment of selective and focused attention and arousal regulation in adult rats. Environ Health Perspect 125:230-237; http://dx.doi.org/10.1289/EHP258.",2016-07-06 +26656583,Region-Based Convolutional Networks for Accurate Object Detection and Segmentation.,"Object detection performance, as measured on the canonical PASCAL VOC Challenge datasets, plateaued in the final years of the competition. The best-performing methods were complex ensemble systems that typically combined multiple low-level image features with high-level context. In this paper, we propose a simple and scalable detection algorithm that improves mean average precision (mAP) by more than 50 percent relative to the previous best result on VOC 2012-achieving a mAP of 62.4 percent. Our approach combines two ideas: (1) one can apply high-capacity convolutional networks (CNNs) to bottom-up region proposals in order to localize and segment objects and (2) when labeled training data are scarce, supervised pre-training for an auxiliary task, followed by domain-specific fine-tuning, boosts performance significantly. Since we combine region proposals with CNNs, we call the resulting model an R-CNN or Region-based Convolutional Network. Source code for the complete system is available at http://www.cs.berkeley.edu/~rbg/rcnn.",2016-01-01 +25767696,Enhancement of COPD biological networks using a web-based collaboration interface.,"The construction and application of biological network models is an approach that offers a holistic way to understand biological processes involved in disease. Chronic obstructive pulmonary disease (COPD) is a progressive inflammatory disease of the airways for which therapeutic options currently are limited after diagnosis, even in its earliest stage. COPD network models are important tools to better understand the biological components and processes underlying initial disease development. With the increasing amounts of literature that are now available, crowdsourcing approaches offer new forms of collaboration for researchers to review biological findings, which can be applied to the construction and verification of complex biological networks. We report the construction of 50 biological network models relevant to lung biology and early COPD using an integrative systems biology and collaborative crowd-verification approach. By combining traditional literature curation with a data-driven approach that predicts molecular activities from transcriptomics data, we constructed an initial COPD network model set based on a previously published non-diseased lung-relevant model set. The crowd was given the opportunity to enhance and refine the networks on a website ( https://bionet.sbvimprover.com/) and to add mechanistic detail, as well as critically review existing evidence and evidence added by other users, so as to enhance the accuracy of the biological representation of the processes captured in the networks. Finally, scientists and experts in the field discussed and refined the networks during an in-person jamboree meeting. Here, we describe examples of the changes made to three of these networks: Neutrophil Signaling, Macrophage Signaling, and Th1-Th2 Signaling. We describe an innovative approach to biological network construction that combines literature and data mining and a crowdsourcing approach to generate a comprehensive set of COPD-relevant models that can be used to help understand the mechanisms related to lung pathobiology. Registered users of the website can freely browse and download the networks.",2015-01-29 +26146076,Regional Cell-Specific Transcriptome Mapping Reveals Regulatory Complexity in the Adult Drosophila Midgut.,"Deciphering contributions of specific cell types to organ function is experimentally challenging. The Drosophila midgut is a dynamic organ with five morphologically and functionally distinct regions (R1-R5), each composed of multipotent intestinal stem cells (ISCs), progenitor enteroblasts (EBs), enteroendocrine cells (EEs), enterocytes (ECs), and visceral muscle (VM). To characterize cellular specialization and regional function in this organ, we generated RNA-sequencing transcriptomes of all five cell types isolated by FACS from each of the five regions, R1-R5. In doing so, we identify transcriptional diversities among cell types and document regional differences within each cell type that define further specialization. We validate cell-specific and regional Gal4 drivers; demonstrate roles for transporter Smvt and transcription factors GATAe, Sna, and Ptx1 in global and regional ISC regulation, and study the transcriptional response of midgut cells upon infection. The resulting transcriptome database (http://flygutseq.buchonlab.com) will foster studies of regionalization, homeostasis, immunity, and cell-cell interactions.",2015-07-02 +26133946,"The direct effect of exposure to disease in early life on the height of young adult men in southern Sweden, 1814-1948.","This paper considers whether short-term variation in exposure to disease early in life, as measured by a variety of mortality rates, has an effect on the height of young adults. Height information for men born in southern Sweden, 1814-1948, and included in the Scanian Economic Demographic Database (SEDD), was obtained from records of medical inspections carried out as part of Sweden's system of universal conscription. Community-level infant mortality rates were calculated not only by year of birth but also for time in utero and in the first year of life. Comparison between brothers was used to remove the influence of confounding factors. The results suggest that any effect that exposure to disease in early life, as measured by mortality rates, may have had on height, either through selection or scarring, is likely to have been very weak. Supplementary material for this article is available at: http://dx.doi.org/10.1080/00324728.2015.1045545.",2015-07-02 +26098815,From Ramachandran Maps to Tertiary Structures of Proteins.,"Sequence to structure of proteins is an unsolved problem. A possible coarse grained resolution to this entails specification of all the torsional (Φ, Ψ) angles along the backbone of the polypeptide chain. The Ramachandran map quite elegantly depicts the allowed conformational (Φ, Ψ) space of proteins which is still very large for the purposes of accurate structure generation. We have divided the allowed (Φ, Ψ) space in Ramachandran maps into 27 distinct conformations sufficient to regenerate a structure to within 5 Å from the native, at least for small proteins, thus reducing the structure prediction problem to a specification of an alphanumeric string, i.e., the amino acid sequence together with one of the 27 conformations preferred by each amino acid residue. This still theoretically results in 27(n) conformations for a protein comprising ""n"" amino acids. We then investigated the spatial correlations at the two-residue (dipeptide) and three-residue (tripeptide) levels in what may be described as higher order Ramachandran maps, with the premise that the allowed conformational space starts to shrink as we introduce neighborhood effects. We found, for instance, for a tripeptide which potentially can exist in any of the 27(3) ""allowed"" conformations, three-fourths of these conformations are redundant to the 95% confidence level, suggesting sequence context dependent preferred conformations. We then created a look-up table of preferred conformations at the tripeptide level and correlated them with energetically favorable conformations. We found in particular that Boltzmann probabilities calculated from van der Waals energies for each conformation of tripeptides correlate well with the observed populations in the structural database (the average correlation coefficient is ∼0.8). An alpha-numeric string and hence the tertiary structure can be generated for any sequence from the look-up table within minutes on a single processor and to a higher level of accuracy if secondary structure can be specified. We tested the methodology on 100 small proteins, and in 90% of the cases, a structure within 5 Å is recovered. We thus believe that the method presented here provides the missing link between Ramachandran maps and tertiary structures of proteins. A Web server to convert a tertiary structure to an alphanumeric string and to predict the tertiary structure from the sequence of a protein using the above methodology is created and made freely accessible at http://www.scfbio-iitd.res.in/software/proteomics/rm2ts.jsp.",2015-07-02 +27227915,An Online Atlas for Exploring Spatio-Temporal Patterns of Cancer Mortality (1972-2011) and Incidence (1995-2008) in Taiwan.,"Public health mapping and Geographical Information Systems (GIS) are already being used to locate the geographical spread of diseases. This study describes the construction of an easy-to-use online atlas of cancer mortality (1972-2011) and incidence (1995-2008) in Taiwan.Two sets of color maps were made based on ""age-adjusted mortality by rate"" and ""age-adjusted mortality by rank."" AJAX (Asynchronous JavaScript and XML), JSON (JavaScript Object Notation), and SVG (Scaling Vector Graphic) were used to create the online atlas. Spatio-temporal patterns of cancer mortality and incidence in Taiwan over the period from 1972 to 2011 and from 1995 to 2008.The constructed online atlas contains information on cancer mortality and incidence (http://taiwancancermap.csmu-liawyp.tw/). The common GIS functions include zoom and pan and identity tools. Users can easily customize the maps to explore the spatio-temporal trends of cancer mortality and incidence using different devices (such as personal computers, mobile phone, or pad). This study suggests an easy- to-use, low-cost, and independent platform for exploring cancer incidence and mortality. It is expected to serve as a reference tool for cancer prevention and risk assessment.This online atlas is a cheap and fast tool that integrates various cancer maps. Therefore, it can serve as a powerful tool that allows users to examine and compare spatio-temporal patterns of various maps. Furthermore, it is an-easy-to use tool for updating data and assessing risk factors of cancer in Taiwan.",2016-05-01 +21672955,PHAST: a fast phage search tool.,"PHAge Search Tool (PHAST) is a web server designed to rapidly and accurately identify, annotate and graphically display prophage sequences within bacterial genomes or plasmids. It accepts either raw DNA sequence data or partially annotated GenBank formatted data and rapidly performs a number of database comparisons as well as phage 'cornerstone' feature identification steps to locate, annotate and display prophage sequences and prophage features. Relative to other prophage identification tools, PHAST is up to 40 times faster and up to 15% more sensitive. It is also able to process and annotate both raw DNA sequence data and Genbank files, provide richly annotated tables on prophage features and prophage 'quality' and distinguish between intact and incomplete prophage. PHAST also generates downloadable, high quality, interactive graphics that display all identified prophage components in both circular and linear genomic views. PHAST is available at (http://phast.wishartlab.com).",2011-06-14 +23262288,"LepChorionDB, a database of Lepidopteran chorion proteins and a set of tools useful for the identification of chorion proteins in Lepidopteran proteomes.","Chorion proteins of Lepidoptera have a tripartite structure, which consists of a central domain and two, more variable, flanking arms. The central domain is highly conserved and it is used for the classification of chorion proteins into two major classes, A and B. Annotated and unreviewed Lepidopteran chorion protein sequences are available in various databases. A database, named LepChorionDB, was constructed by searching 5 different protein databases using class A and B central domain-specific profile Hidden Markov Models (pHMMs), developed in this work. A total of 413 Lepidopteran chorion proteins from 9 moths and 1 butterfly species were retrieved. These data were enriched and organised in order to populate LepChorionDB, the first relational database, available on the web, containing Lepidopteran chorion proteins grouped in A and B classes. LepChorionDB may provide insights in future functional and evolutionary studies of Lepidopteran chorion proteins and thus, it will be a useful tool for the Lepidopteran scientific community and Lepidopteran genome annotators, since it also provides access to the two pHMMs developed in this work, which may be used to discriminate A and B class chorion proteins. LepChorionDB is freely available at http://bioinformatics.biol.uoa.gr/LepChorionDB.",2012-12-20 +26564687,[Management of impacted cuspid--July 2015].,"The French Society of Stomatology, Oral and Maxillofacial Surgery (SFSCMFCO) together with the Medical Society of Dento-Maxillofacial Orthopedics has drawn up in 2015 a new practice guideline concerning the management of one or several impacted cuspids. As the previous ones, this guideline is based on a rigorous French Heath Regulation Authorities type methodology. It is thus intended to become a major reference in its field. We report hereafter the short version of the text in the same way it has been presented during the 2015 French National Congress of the SFSCMFCO in Lyon - France. Each of these recommendations is marked A, B or C according to a decreasing evidence based rating scale. Lacking any evidence-based data, the recommendation is considered as an expert opinion (AE). The full text of this guideline is available on the website of the SFSCMFCO at the following address: http://www.sfscmfco.fr/; ""Recommandations de bonnes pratiques"" section. A patient information sheet is also proposed by the working group. Happy reading.",2015-11-10 +25970778,"CEMTDD: The database for elucidating the relationships among herbs, compounds, targets and related diseases for Chinese ethnic minority traditional drugs.","China has different ethnic minorities that establish their own medical systems and practice experience for thousand years, thereafter named Chinese Ethnic Minority Traditional Drugs (CEMTDs) (http://www.cemtdd.com/index.html). Since many compounds from CEMTDs have been reported to perturb human's dysfunction network and restore human normal physiological conditions, the relationships amongst a series of compounds from specific herbs, their targets and relevant diseases have become our main focus in CEMTD modernization. Herein, we have constructed the first Chinese Ethnic Minority Traditional Drug Database (CEMTDD) mainly from Xinjiang Uygur Autonomous Region (XUAR), retrieving CEMTD-related information from different resources. CEMTDD contains about 621 herbs, 4, 060 compounds, 2, 163 targets and 210 diseases, among which most of herbs can be applied into gerontology therapy including inflammation, cardiovascular disease and neurodegenerative disease. Gerontology is highly occurred in XUAR, and has abundant experience in treating such diseases, which may benefit for developing a new gerontology therapeutic strategy. CEMTDD displays networks for intricate relationships between CEMTDs and treated diseases, as well as the interrelations between active compounds and action targets, which may shed new light on the combination therapy of CEMTDs and further understanding of their herb molecular mechanisms for better modernized utilizations of CEMTDs, especially in gerontology.",2015-07-01 +27547378,Biomedical Mutation Analysis (BMA): A software tool for analyzing mutations associated with antiviral resistance.,"

Introduction

Hepatitis C virus (HCV) is considered a major public health problem, with 200 million people infected worldwide. The treatment for HCV chronic infection with pegylated interferon alpha plus ribavirin inhibitors is unspecific; consequently, the treatment is effective in only 50% of patients infected. This has prompted the development of direct-acting antivirals (DAA) that target virus proteins. These DAA have demonstrated a potent effect in vitro and in vivo; however, virus mutations associated with the development of resistance have been described.

Objective

To design and develop an online information system for detecting mutations in amino acids known to be implicated in resistance to DAA.

Materials and methods

   We have used computer applications, technological tools, standard languages, infrastructure systems and algorithms, to analyze positions associated with resistance to DAA for the NS3, NS5A, and NS5B genes of HCV.

Results

We have designed and developed an online information system named Biomedical Mutation Analysis (BMA), which allows users to calculate changes in nucleotide and amino acid sequences for each selected sequence from conventional Sanger and cloning sequencing using a graphical interface.

Conclusion

BMA quickly, easily and effectively analyzes mutations, including complete documentation and examples. Furthermore, the development of different visualization techniques allows proper interpretation and understanding of the results. The data obtained using BMA will be useful for the assessment and surveillance of HCV resistance to new antivirals, and for the treatment regimens by selecting those DAA to which the virus is not resistant, avoiding unnecessary treatment failures. The software is available at: http://bma.itiud.org.",2016-06-03 +26710610,Comparison of different PCR primers on detecting arbuscular mycorrhizal communities inside plant roots.,"

Objective

Communities of arbuscular mycorrhizal fungi (AMF) colonizing roots have been increasingly investigated by molecular approaches with AMF-specific PCR primers. However, it is difficult to compare the species diversity and species compositions of AMF communities across various studies due to the PCR primers used differently, and also little is known if significant difference of community compositions is characterized by different primers. We aim to compare the difference of efficiency of four primers for AMF.

Methods

We chose four commonly used AMF-specific primer combinations (NS31-AM1, AMLl-AML2, NS31-AML2 and SSUmCf-LSUmBr), and used 18S rDNA clone libraries to describe the AMF diversity and community.

Results

Our results showed that the specificity and coverage varied among the tested primers, different primer combinations would yield distinct patterns of species diversity and composition of AMF community. SSUmCf-LSUmBr had the best specificity and coverage in amplifying AMF sequences, followed by NS31-AML2 and NS31-AM1, and AML1-AML2 showed the lowest specificity towards AMF sequences.

Conclusion

SSUmCf-LSUmBr is not the optimal primer pair for AMF community study in current stage due to limited reference sequences and large DNA size. As an alternative, NS31-AML2 is more suitable in AMF community study, because its target rDNA region could well match the increasingly used virtual taxonomy database (http://maarjam. botany.ut.ee) and also its suitable DNA size could be efficiently used in high-throughput sequencing.",2015-07-01 +26357317,Curatable Named-Entity Recognition Using Semantic Relations.,"Named-entity recognition (NER) plays an important role in the development of biomedical databases. However, the existing NER tools produce multifarious named-entities which may result in both curatable and non-curatable markers. To facilitate biocuration with a straightforward approach, classifying curatable named-entities is helpful with regard to accelerating the biocuration workflow. Co-occurrence Interaction Nexus with Named-entity Recognition (CoINNER) is a web-based tool that allows users to identify genes, chemicals, diseases, and action term mentions in the Comparative Toxicogenomic Database (CTD). To further discover interactions, CoINNER uses multiple advanced algorithms to recognize the mentions in the BioCreative IV CTD Track. CoINNER is developed based on a prototype system that annotated gene, chemical, and disease mentions in PubMed abstracts at BioCreative 2012 Track I (literature triage). We extended our previous system in developing CoINNER. The pre-tagging results of CoINNER were developed based on the state-of-the-art named entity recognition tools in BioCreative III. Next, a method based on conditional random fields (CRFs) is proposed to predict chemical and disease mentions in the articles. Finally, action term mentions were collected by latent Dirichlet allocation (LDA). At the BioCreative IV CTD Track, the best F-measures reached for gene/protein, chemical/drug and disease NER were 54 percent while CoINNER achieved a 61.5 percent F-measure. System URL: http://ikmbio.csie.ncku.edu.tw/coinner/ introduction.htm.",2015-07-01 +26133389,Quest for Orthologs Entails Quest for Tree of Life: In Search of the Gene Stream.,"Quest for Orthologs (QfO) is a community effort with the goal to improve and benchmark orthology predictions. As quality assessment assumes prior knowledge on species phylogenies, we investigated the congruency between existing species trees by comparing the relationships of 147 QfO reference organisms from six Tree of Life (ToL)/species tree projects: The National Center for Biotechnology Information (NCBI) taxonomy, Opentree of Life, the sequenced species/species ToL, the 16S ribosomal RNA (rRNA) database, and trees published by Ciccarelli et al. (Ciccarelli FD, et al. 2006. Toward automatic reconstruction of a highly resolved tree of life. Science 311:1283-1287) and by Huerta-Cepas et al. (Huerta-Cepas J, Marcet-Houben M, Gabaldon T. 2014. A nested phylogenetic reconstruction approach provides scalable resolution in the eukaryotic Tree Of Life. PeerJ PrePrints 2:223) Our study reveals that each species tree suggests a different phylogeny: 87 of the 146 (60%) possible splits of a dichotomous and rooted tree are congruent, while all other splits are incongruent in at least one of the species trees. Topological differences are observed not only at deep speciation events, but also within younger clades, such as Hominidae, Rodentia, Laurasiatheria, or rosids. The evolutionary relationships of 27 archaea and bacteria are highly inconsistent. By assessing 458,108 gene trees from 65 genomes, we show that consistent species topologies are more often supported by gene phylogenies than contradicting ones. The largest concordant species tree includes 77 of the QfO reference organisms at the most. Results are summarized in the form of a consensus ToL (http://swisstree.vital-it.ch/species_tree) that can serve different benchmarking purposes.",2015-07-01 +26133894,Preparing a collection of radiology examinations for distribution and retrieval.,"

Objective

Clinical documents made available for secondary use play an increasingly important role in discovery of clinical knowledge, development of research methods, and education. An important step in facilitating secondary use of clinical document collections is easy access to descriptions and samples that represent the content of the collections. This paper presents an approach to developing a collection of radiology examinations, including both the images and radiologist narrative reports, and making them publicly available in a searchable database.

Materials and methods

The authors collected 3996 radiology reports from the Indiana Network for Patient Care and 8121 associated images from the hospitals' picture archiving systems. The images and reports were de-identified automatically and then the automatic de-identification was manually verified. The authors coded the key findings of the reports and empirically assessed the benefits of manual coding on retrieval.

Results

The automatic de-identification of the narrative was aggressive and achieved 100% precision at the cost of rendering a few findings uninterpretable. Automatic de-identification of images was not quite as perfect. Images for two of 3996 patients (0.05%) showed protected health information. Manual encoding of findings improved retrieval precision.

Conclusion

Stringent de-identification methods can remove all identifiers from text radiology reports. DICOM de-identification of images does not remove all identifying information and needs special attention to images scanned from film. Adding manual coding to the radiologist narrative reports significantly improved relevancy of the retrieved clinical documents. The de-identified Indiana chest X-ray collection is available for searching and downloading from the National Library of Medicine (http://openi.nlm.nih.gov/).",2015-07-01 +24087878,OpenMSI: a high-performance web-based platform for mass spectrometry imaging.,"Mass spectrometry imaging (MSI) enables researchers to directly probe endogenous molecules directly within the architecture of the biological matrix. Unfortunately, efficient access, management, and analysis of the data generated by MSI approaches remain major challenges to this rapidly developing field. Despite the availability of numerous dedicated file formats and software packages, it is a widely held viewpoint that the biggest challenge is simply opening, sharing, and analyzing a file without loss of information. Here we present OpenMSI, a software framework and platform that addresses these challenges via an advanced, high-performance, extensible file format and Web API for remote data access (http://openmsi.nersc.gov). The OpenMSI file format supports storage of raw MSI data, metadata, and derived analyses in a single, self-describing format based on HDF5 and is supported by a large range of analysis software (e.g., Matlab and R) and programming languages (e.g., C++, Fortran, and Python). Careful optimization of the storage layout of MSI data sets using chunking, compression, and data replication accelerates common, selective data access operations while minimizing data storage requirements and are critical enablers of rapid data I/O. The OpenMSI file format has shown to provide >2000-fold improvement for image access operations, enabling spectrum and image retrieval in less than 0.3 s across the Internet even for 50 GB MSI data sets. To make remote high-performance compute resources accessible for analysis and to facilitate data sharing and collaboration, we describe an easy-to-use yet powerful Web API, enabling fast and convenient access to MSI data, metadata, and derived analysis results stored remotely to facilitate high-performance data analysis and enable implementation of Web based data sharing, visualization, and analysis.",2013-10-25 +23185041,ChemProt-2.0: visual navigation in a disease chemical biology database.,"ChemProt-2.0 (http://www.cbs.dtu.dk/services/ChemProt-2.0) is a public available compilation of multiple chemical-protein annotation resources integrated with diseases and clinical outcomes information. The database has been updated to >1.15 million compounds with 5.32 millions bioactivity measurements for 15 290 proteins. Each protein is linked to quality-scored human protein-protein interactions data based on more than half a million interactions, for studying diseases and biological outcomes (diseases, pathways and GO terms) through protein complexes. In ChemProt-2.0, therapeutic effects as well as adverse drug reactions have been integrated allowing for suggesting proteins associated to clinical outcomes. New chemical structure fingerprints were computed based on the similarity ensemble approach. Protein sequence similarity search was also integrated to evaluate the promiscuity of proteins, which can help in the prediction of off-target effects. Finally, the database was integrated into a visual interface that enables navigation of the pharmacological space for small molecules. Filtering options were included in order to facilitate and to guide dynamic search of specific queries.",2012-11-26 +25461337,Right ventricle segmentation from cardiac MRI: a collation study.,"Magnetic Resonance Imaging (MRI), a reference examination for cardiac morphology and function in humans, allows to image the cardiac right ventricle (RV) with high spatial resolution. The segmentation of the RV is a difficult task due to the variable shape of the RV and its ill-defined borders in these images. The aim of this paper is to evaluate several RV segmentation algorithms on common data. More precisely, we report here the results of the Right Ventricle Segmentation Challenge (RVSC), concretized during the MICCAI'12 Conference with an on-site competition. Seven automated and semi-automated methods have been considered, along them three atlas-based methods, two prior based methods, and two prior-free, image-driven methods that make use of cardiac motion. The obtained contours were compared against a manual tracing by an expert cardiac radiologist, taken as a reference, using Dice metric and Hausdorff distance. We herein describe the cardiac data composed of 48 patients, the evaluation protocol and the results. Best results show that an average 80% Dice accuracy and a 1cm Hausdorff distance can be expected from semi-automated algorithms for this challenging task on the datasets, and that an automated algorithm can reach similar performance, at the expense of a high computational burden. Data are now publicly available and the website remains open for new submissions (http://www.litislab.eu/rvsc/).",2014-10-28 +22800569,Modeling of folds and folding pathways for some protein families of (α + β)- and (α/β)-classes.,"In this paper, updated structural trees for α/β-proteins containing five- and seven-segment (α/β)-motifs are represented. Novel structural motifs occurring in some families of (α + β)- and (α/β)-proteins are also characterized. Databases of these proteins have been compiled from the Protein Data Bank (PDB) and Structural Classification of Proteins (SCOP) and the corresponding structural trees have been constructed. The classification of these proteins has been developed and organized as an extension of the PCBOST database, which is available at http://strees.protres.ru . In total, the updated Protein Classification Based on Structural Trees database contains 11 structural trees, 106 levels, 635 folds, 4911 proteins and domains, and 14,202 PDB entries.",2012-07-16 +22847935,GlycoCD: a repository for carbohydrate-related CD antigens.,

Summary

The open access comprehensive GlycoCD database application is for representation and retrieval of carbohydrate-related clusters of differentiation (CDs). The main objective of this database platform is to provide information about interactions of carbohydrate moieties with proteins that are important for identification of specific cell surface molecule with a focus on the integration of data from carbohydrate microarray databases. GlycoCD database comprises two sections: the carbohydrate recognition CD and glycan CD. It allows easy access through a user-friendly web interface to all carbohydrate-defined CDs and those that interact with carbohydrates along with other relevant information.

Availability

The database is freely available at http://glycosciences.de/glycocd/index.php

Contact

r.s-albiez@dkfz.de.,2012-07-30 +21875969,An ontology of fungal subcellular traits.,"

Unlabelled

Premise of the study

The Fungal Subcellular Ontology used in the Assembling the Fungal Tree of Life project is a taxon-wide ontology (controlled vocabulary for attributes) designed to clarify and integrate the broad range of subcellular characters and character states used in higher-level fungal systematics. As in the algae, cellular characters are important phylogenetic markers in kingdom Fungi. The Fungal Subcellular Ontology has been developed primarily to help researchers, especially systematists, in their search for information on subcellular characters across the Fungi, and it complements existing biological ontologies, including the Gene Ontology. •

Methods

The character and character state data set used in the Assembling the Fungal Tree of Life Structural and Biochemical Database (http://aftol.umn.edu) is the source of terms for generating the ontology. After the terms were accessioned and defined, they were combined in OBO-Edit file format, and the ontology was edited using OBO-Edit, an open source Java tool supported by the Gene Ontology project. •

Key results

The Fungal Subcellular Ontology covers both model and nonmodel fungi in great detail and is downloadable in OBO-Edit format at website http://aftol.umn.edu/ontology/fungal_subcellular.obo. •

Conclusions

The ontology provides a controlled vocabulary of fungal subcellular terms and functions as an operating framework for the Assembling the Fungal Tree of Life Structural and Biochemical Database. An ontology-based design enhances reuse of data deposited in the Structural and Biochemical Database from other independent biological and genetic databases. Data integration approaches that advance access to data from the diversity of biological databases are imperative as interdisciplinary research gains importance. In this sense, the Fungal Subcellular Ontology becomes highly relevant to mycologists as well as nonmycologists because fungi interact actively as symbionts and parasites or passively with many other life forms.",2011-08-29 +21584191,MTB-PCDB: Mycobacterium tuberculosis proteome comparison database.,"

Unlabelled

The Mycobacterium tuberculosis Proteome Comparison Database (MTB-PCDB) is an online database providing integrated access to proteome sequence comparison data for five strains of Mycobacterium tuberculosis (H37Rv, H37Ra, CDC 1551, F11 and KZN 1435) sequenced completely so far. MTB-PCDB currently hosts 40252 protein sequence comparison data obtained through inter-strain proteome comparison of five different strains of MTB. 2373 proteins were found to be identical in all 5 strains using MTB H(37)Rv as reference strain. To enable wide use of this data, MTB-PCDB provides a set of tools for searching, browsing, analyzing and downloading the data. By bringing together, M. tuberculosis proteome comparison among virulent & avirulent strains and also drug susceptible & drug resistance strains MTB-PCDB provides a unique discovery platform for comparative proteomics among these strains which may give insights into the discovery & development of TB drugs, vaccines and biomarkers.

Availability

The database is available for free at http://www.bicjbtdrc-mgims.in/MTB-PCDB/",2011-04-22 +23650520,CLUSTOM: a novel method for clustering 16S rRNA next generation sequences by overlap minimization.,"The recent nucleic acid sequencing revolution driven by shotgun and high-throughput technologies has led to a rapid increase in the number of sequences for microbial communities. The availability of 16S ribosomal RNA (rRNA) gene sequences from a multitude of natural environments now offers a unique opportunity to study microbial diversity and community structure. The large volume of sequencing data however makes it time consuming to assign individual sequences to phylotypes by searching them against public databases. Since ribosomal sequences have diverged across prokaryotic species, they can be grouped into clusters that represent operational taxonomic units. However, available clustering programs suffer from overlap of sequence spaces in adjacent clusters. In natural environments, gene sequences are homogenous within species but divergent between species. This evolutionary constraint results in an uneven distribution of genetic distances of genes in sequence space. To cluster 16S rRNA sequences more accurately, it is therefore essential to select core sequences that are located at the centers of the distributions represented by the genetic distance of sequences in taxonomic units. Based on this idea, we here describe a novel sequence clustering algorithm named CLUSTOM that minimizes the overlaps between adjacent clusters. The performance of this algorithm was evaluated in a comparative exercise with existing programs, using the reference sequences of the SILVA database as well as published pyrosequencing datasets. The test revealed that our algorithm achieves higher accuracy than ESPRIT-Tree and mothur, few of the best clustering algorithms. Results indicate that the concept of an uneven distribution of sequence distances can effectively and successfully cluster 16S rRNA gene sequences. The algorithm of CLUSTOM has been implemented both as a web and as a standalone command line application, which are available at http://clustom.kribb.re.kr.",2013-05-01 +26369336,Chi8: a GPU program for detecting significant interacting SNPs with the Chi-square 8-df test.,"

Background

Determining interacting SNPs in genome-wide association studies is computationally expensive yet of considerable interest in genomics.

Findings

We present a program Chi8 that calculates the Chi-square 8 degree of freedom test between all pairs of SNPs in a brute force manner on a Graphics Processing Unit. We analyze each of the seven WTCCC genome-wide association studies that have about 5000 total case and controls and 400,000 SNPs in an average of 9.6 h on a single GPU. We also study the power, false positives, and area under curve of our program on simulated data and provide a comparison to the GBOOST program. Our program source code is freely available from http://www.cs.njit.edu/usman/Chi8.",2015-09-14 +27491653,ADOMA: A Command Line Tool to Modify ClustalW Multiple Alignment Output.,"We present ADOMA, a command line tool that produces alternative outputs from ClustalW multiple alignments of nucleotide or protein sequences. ADOMA can simplify the output of alignments by showing only the different residues between sequences, which is often desirable when only small differences such as single nucleotide polymorphisms are present (e.g., between different alleles). Another feature of ADOMA is that it can enhance the ClustalW output by coloring the residues in the alignment. This tool is easily integrated into automated Linux pipelines for next-generation sequencing data analysis, and may be useful for researchers in a broad range of scientific disciplines including evolutionary biology and biomedical sciences. The source code is freely available at https://sourceforge. net/projects/adoma/.",2015-08-31 +27489955,SNVSniffer: an integrated caller for germline and somatic single-nucleotide and indel mutations.,"

Background

Various approaches to calling single-nucleotide variants (SNVs) or insertion-or-deletion (indel) mutations have been developed based on next-generation sequencing (NGS). However, most of them are dedicated to a particular type of mutation, e.g. germline SNVs in normal cells, somatic SNVs in cancer/tumor cells, or indels only. In the literature, efficient and integrated callers for both germline and somatic SNVs/indels have not yet been extensively investigated.

Results

We present SNVSniffer, an efficient and integrated caller identifying both germline and somatic SNVs/indels from NGS data. In this algorithm, we propose the use of Bayesian probabilistic models to identify SNVs and investigate a multiple ungapped alignment approach to call indels. For germline variant calling, we model allele counts per site to follow a multinomial conditional distribution. For somatic variant calling, we rely on paired tumor-normal pairs from identical individuals and introduce a hybrid subtraction and joint sample analysis approach by modeling tumor-normal allele counts per site to follow a joint multinomial conditional distribution. A comprehensive performance evaluation has been conducted using a diversity of variant calling benchmarks. For germline variant calling, SNVSniffer demonstrates highly competitive accuracy with superior speed in comparison with the state-of-the-art FaSD, GATK and SAMtools. For somatic variant calling, our algorithm achieves comparable or even better accuracy, at fast speed, than the leading VarScan2, SomaticSniper, JointSNVMix2 and MuTect.

Conclusions

SNVSniffers demonstrates the feasibility to develop integrated solutions to fast and efficient identification of germline and somatic variants. Nonetheless, accurate discovery of genetic variations is critical yet challenging, and still requires substantially more research efforts being devoted. SNVSniffer and synthetic samples are publicly available at http://snvsniffer.sourceforge.net .",2016-08-01 +26510841,"VariantMetaCaller: automated fusion of variant calling pipelines for quantitative, precision-based filtering.","

Background

The low concordance between different variant calling methods still poses a challenge for the wide-spread application of next-generation sequencing in research and clinical practice. A wide range of variant annotations can be used for filtering call sets in order to improve the precision of the variant calls, but the choice of the appropriate filtering thresholds is not straightforward. Variant quality score recalibration provides an alternative solution to hard filtering, but it requires large-scale, genomic data.

Results

We evaluated germline variant calling pipelines based on BWA and Bowtie 2 aligners in combination with GATK UnifiedGenotyper, GATK HaplotypeCaller, FreeBayes and SAMtools variant callers, using simulated and real benchmark sequencing data (NA12878 with Illumina Platinum Genomes). We argue that these pipelines are not merely discordant, but they extract complementary useful information. We introduce VariantMetaCaller to test the hypothesis that the automated fusion of measurement related information allows better performance than the recommended hard-filtering settings or recalibration and the fusion of the individual call sets without using annotations. VariantMetaCaller uses Support Vector Machines to combine multiple information sources generated by variant calling pipelines and estimates probabilities of variants. This novel method had significantly higher sensitivity and precision than the individual variant callers in all target region sizes, ranging from a few hundred kilobases to whole exomes. We also demonstrated that VariantMetaCaller supports a quantitative, precision based filtering of variants under wider conditions. Specifically, the computed probabilities of the variants can be used to order the variants, and for a given threshold, probabilities can be used to estimate precision. Precision then can be directly translated to the number of true called variants, or equivalently, to the number of false calls, which allows finding problem-specific balance between sensitivity and precision.

Conclusions

VariantMetaCaller can be applied to small target regions and whole exomes as well, and it can be used in cases of organisms for which highly accurate variant call sets are not yet available, therefore it can be a viable alternative to hard filtering in cases where variant quality score recalibration cannot be used. VariantMetaCaller is freely available at http://bioinformatics.mit.bme.hu/VariantMetaCaller .",2015-10-28 +25183434,Locus minimization in breed prediction using artificial neural network approach.,"Molecular markers, viz. microsatellites and single nucleotide polymorphisms, have revolutionized breed identification through the use of small samples of biological tissue or germplasm, such as blood, carcass samples, embryos, ova and semen, that show no evident phenotype. Classical tools of molecular data analysis for breed identification have limitations, such as the unavailability of referral breed data, causing increased cost of collection each time, compromised computational accuracy and complexity of the methodology used. We report here the successful use of an artificial neural network (ANN) in background to decrease the cost of genotyping by locus minimization. The webserver is freely accessible (http://nabg.iasri.res.in/bisgoat) to the research community. We demonstrate that the machine learning (ANN) approach for breed identification is capable of multifold advantages such as locus minimization, leading to a drastic reduction in cost, and web availability of reference breed data, alleviating the need for repeated genotyping each time one investigates the identity of an unknown breed. To develop this model web implementation based on ANN, we used 51,850 samples of allelic data of microsatellite-marker-based DNA fingerprinting on 25 loci covering 22 registered goat breeds of India for training. Minimizing loci to up to nine loci through the use of a multilayer perceptron model, we achieved 96.63% training accuracy. This server can be an indispensable tool for identification of existing breeds and new synthetic commercial breeds, leading to protection of intellectual property in case of sovereignty and bio-piracy disputes. This server can be widely used as a model for cost reduction by locus minimization for various other flora and fauna in terms of variety, breed and/or line identification, especially in conservation and improvement programs.",2014-09-03 +23152263,Interventions for drooling in children with cerebral palsy.,"

Background

Drooling is a common problem for children with cerebral palsy (CP). This can be distressing for these children as well as for their parents and caregivers. The consequences of drooling include risk of social rejection, damp and soiled clothing, unpleasant odour, irritated chapped skin, mouth infections, dehydration, interference with speech, damage to books, communication aids, computers, and the risk of social isolation (Blasco 1992; Van der Burg 2006). A range of interventions exist that aim to reduce or eliminate drooling. There is a lack of consensus regarding which interventions are most effective for children with CP.

Objectives

(1) To evaluate the effectiveness and safety of interventions aimed at reducing or eliminating drooling in children with cerebral palsy. (2) To provide the best available evidence to inform clinical practice. (3) To assist with future research planning.

Search methods

We searched the following databases from inception to December 2010 : Cochrane Central Register of Controlled Trials (CENTRAL); Medline via Ovid; EMBASE; CINAHL; ERIC; Psych INFO; Web of Science; Web of Knowledge; AMED; SCOPUS; Dissertation Abstracts.We searched for ongoing clinical trials in the Clinical Trials web site (http://clinicaltrials.gov.) and in the Current Controlled Trials web site (http://www.controlled-trials.com/). We hand searched a range of relevant journals and conference proceeding abstracts.

Selection criteria

Only randomised controlled trials (RCTs) and controlled clinical trials (CCTs) were included.

Data collection and analysis

Data were extracted independently by MW, MS and LP and differences resolved through discussion.

Main results

Six studies were eligible for inclusion in the review. Four of these studies were trials using botulinum toxin-A (BoNT-A) and two were trials on the pharmacological interventions, benztropine and glycopyrrolate. No RCTs or CCTs were retrieved on surgery, physical, oro-motor and oro-sensory therapies, behavioural interventions, intra-oral appliances or acupuncture. In the studies eligible for review, there was considerable heterogeneity within and across interventions and a meta-analysis was not possible. A descriptive summary of each study is provided. All studies showed some statistically significant change for treatment groups up to 1 month post intervention. However, there were methodological flaws associated with all six studies.

Authors' conclusions

It was not possible to reach a conclusion on the effectiveness and safety of either BoNT-A or the pharmaceutical interventions, benztropine and glycopyrrolate. There is insufficient evidence to inform clinical practice on interventions for drooling in children with CP. Directions for future research are provided.",2012-11-14 +23110448,YeastIP: a database for identification and phylogeny of Saccharomycotina yeasts.,"With the advances in sequencing techniques, identification of ascomycetous yeasts to the species level and phylogeny reconstruction increasingly require curated and updated taxonomic information. A specific database with nucleotide sequences of the most common markers used for yeast taxonomy and phylogeny and a user-friendly interface allowing identification, taxonomy and phylogeny of yeasts species was developed. By 1 September 2012, the YeastIP database contained all the described Saccharomycotina species for which sequences used for taxonomy and phylogeny, such as D1/D2 rDNA and ITS, are available. The database interface was developed to provide a maximum of relevant information and data mining tools, including the following features: (1) the blast n program for the sequences of the YeastIP database; (2) easy retrieval of selected sequences; (3) display of the available markers for each selected group of species; and (4) a tool to concatenate marker sequences, including those provided by the user. The concatenation tool allows phylogeny reconstruction through a direct link to the Phylogeny.fr platform. YeastIP is thus a unique database in that it provides taxonomic information and guides users in their taxonomic analyses. YeastIP facilitates multigenic analysis to encourage good practice in ascomycetous yeast phylogeny (URL: http://genome.jouy.inra.fr/yeastip.).",2012-12-17 +24659032,TroX: a new method to learn about the genesis of aneuploidy from trisomic products of conception.,"

Motivation

An estimated 10-30% of clinically recognized conceptions are aneuploid, leading to spontaneous miscarriages, in vitro fertilization failures and, when viable, severe developmental disabilities. With the ongoing reduction in the cost of genotyping and DNA sequencing, the use of high-density single nucleotide polymorphism (SNP) markers for clinical diagnosis of aneuploidy and biomedical research into its causes is becoming common practice. A reliable, flexible and computationally feasible method for inferring the sources of aneuploidy is thus crucial.

Results

We propose a new method, TroX, for analyzing human trisomy data using high density SNP markers from a trisomic individual or product of conception and one parent. Using a hidden Markov model, we infer the stage of the meiotic error (I or II) and the individual in which non-disjunction event occurred, as well as the crossover locations on the trisomic chromosome. A novel and important feature of the method is its reliance on data from the proband and only one parent, reducing the experimental cost by a third and enabling a larger set of data to be used. We evaluate our method by applying it to simulated trio data as well as to genotype data for 282 trios that include a child trisomic for chromosome 21. The analyses show the method to be highly reliable even when data from only one parent are available. With the increasing availability of DNA samples from mother and fetus, application of approaches such as ours should yield unprecedented insights into the genetic risk factors for aneuploidy.

Availability and implementation

An R package implementing TroX is available for download at http://przeworski.uchicago.edu/.",2014-03-21 +25805426,Real-Time Motion Capture Toolbox (RTMocap): an open-source code for recording 3-D motion kinematics to study action-effect anticipations during motor and social interactions.,"We present here a toolbox for the real-time motion capture of biological movements that runs in the cross-platform MATLAB environment (The MathWorks, Inc., Natick, MA). It provides instantaneous processing of the 3-D movement coordinates of up to 20 markers at a single instant. Available functions include (1) the setting of reference positions, areas, and trajectories of interest; (2) recording of the 3-D coordinates for each marker over the trial duration; and (3) the detection of events to use as triggers for external reinforcers (e.g., lights, sounds, or odors). Through fast online communication between the hardware controller and RTMocap, automatic trial selection is possible by means of either a preset or an adaptive criterion. Rapid preprocessing of signals is also provided, which includes artifact rejection, filtering, spline interpolation, and averaging. A key example is detailed, and three typical variations are developed (1) to provide a clear understanding of the importance of real-time control for 3-D motion in cognitive sciences and (2) to present users with simple lines of code that can be used as starting points for customizing experiments using the simple MATLAB syntax. RTMocap is freely available (http://sites.google.com/site/RTMocap/) under the GNU public license for noncommercial use and open-source development, together with sample data and extensive documentation.",2016-03-01 +26257915,"MUS-2, a novel variant of the chromosome-encoded β-lactamase MUS-1, from Myroides odoratimimus.","The aim of the present study was to investigate the molecular mechanism of carbapenem resistance of three imipenem-resistant isolates of Myroides odoratimimus recovered from two livestock farms of cows and pigeons by rectal swab in Lebanon in January 2014. Investigation of imipenem resistance of these isolates using the modified Hodge test, the EDTA test, the modified CarbaNP test and the matrix-assisted laser desorption/ionization time-of-flight mass spectrometry Ultraflex assay showed a carbapenemase activity due to the presence of a chromosome-encoded β-lactamase MUS, verified by PCR. However amplification and sequencing of this chromosomal gene showed a novel variant of it designated MUS-2 by the curators of the Lahey database of β-lactamases (http://www.lahey.org/Studies/webt.asp). Cloning of the bla MUS-2 was performed, followed by protein expression in Escherichia coli TOP 10. Pulsed-field gel electrophoresis clearly showed that the three isolates belonged to the same clone. This study reports a novel variant of the chromosome-encoded bla MUS-1 associated with carbapenem resistance in Myroides odoratimimus and shows that animals may represent a reservoir of bacteria harbouring several variants of resistance genes.",2015-06-27 +26454273,Jflow: a workflow management system for web applications.,"

Summary

Biologists produce large data sets and are in demand of rich and simple web portals in which they can upload and analyze their files. Providing such tools requires to mask the complexity induced by the needed High Performance Computing (HPC) environment. The connection between interface and computing infrastructure is usually specific to each portal. With Jflow, we introduce a Workflow Management System (WMS), composed of jQuery plug-ins which can easily be embedded in any web application and a Python library providing all requested features to setup, run and monitor workflows.

Availability and implementation

Jflow is available under the GNU General Public License (GPL) at http://bioinfo.genotoul.fr/jflow. The package is coming with full documentation, quick start and a running test portal.

Contact

Jerome.Mariette@toulouse.inra.fr.",2015-10-10 +26115965,A deeper look into Comet--implementation and features.,"The Comet database search software was initially released as an open source project in late 2012. Prior to that, Comet existed as the University of Washington's academic version of the SEQUEST database search tool. Despite its availability and widespread use over the years, some details about its implementation have not been previously disseminated or are not well understood. We address a few of these details in depth and highlight new features available in the latest release. Comet is freely available for download at http://comet-ms.sourceforge.net or it can be accessed as a component of a number of larger software projects into which it has been incorporated. Graphical Abstract ᅟ.",2015-06-27 +27472895,"Improvement in Protein Domain Identification Is Reached by Breaking Consensus, with the Agreement of Many Profiles and Domain Co-occurrence.","Traditional protein annotation methods describe known domains with probabilistic models representing consensus among homologous domain sequences. However, when relevant signals become too weak to be identified by a global consensus, attempts for annotation fail. Here we address the fundamental question of domain identification for highly divergent proteins. By using high performance computing, we demonstrate that the limits of state-of-the-art annotation methods can be bypassed. We design a new strategy based on the observation that many structural and functional protein constraints are not globally conserved through all species but might be locally conserved in separate clades. We propose a novel exploitation of the large amount of data available: 1. for each known protein domain, several probabilistic clade-centered models are constructed from a large and differentiated panel of homologous sequences, 2. a decision-making protocol combines outcomes obtained from multiple models, 3. a multi-criteria optimization algorithm finds the most likely protein architecture. The method is evaluated for domain and architecture prediction over several datasets and statistical testing hypotheses. Its performance is compared against HMMScan and HHblits, two widely used search methods based on sequence-profile and profile-profile comparison. Due to their closeness to actual protein sequences, clade-centered models are shown to be more specific and functionally predictive than the broadly used consensus models. Based on them, we improved annotation of Plasmodium falciparum protein sequences on a scale not previously possible. We successfully predict at least one domain for 72% of P. falciparum proteins against 63% achieved previously, corresponding to 30% of improvement over the total number of Pfam domain predictions on the whole genome. The method is applicable to any genome and opens new avenues to tackle evolutionary questions such as the reconstruction of ancient domain duplications, the reconstruction of the history of protein architectures, and the estimation of protein domain age. Website and software: http://www.lcqb.upmc.fr/CLADE.",2016-07-29 +24518221,"Time-series RNA-seq analysis package (TRAP) and its application to the analysis of rice, Oryza sativa L. ssp. Japonica, upon drought stress.","Measuring expression levels of genes at the whole genome level can be useful for many purposes, especially for revealing biological pathways underlying specific phenotype conditions. When gene expression is measured over a time period, we have opportunities to understand how organisms react to stress conditions over time. Thus many biologists routinely measure whole genome level gene expressions at multiple time points. However, there are several technical difficulties for analyzing such whole genome expression data. In addition, these days gene expression data is often measured by using RNA-sequencing rather than microarray technologies and then analysis of expression data is much more complicated since the analysis process should start with mapping short reads and produce differentially activated pathways and also possibly interactions among pathways. In addition, many useful tools for analyzing microarray gene expression data are not applicable for the RNA-seq data. Thus a comprehensive package for analyzing time series transcriptome data is much needed. In this article, we present a comprehensive package, Time-series RNA-seq Analysis Package (TRAP), integrating all necessary tasks such as mapping short reads, measuring gene expression levels, finding differentially expressed genes (DEGs), clustering and pathway analysis for time-series data in a single environment. In addition to implementing useful algorithms that are not available for RNA-seq data, we extended existing pathway analysis methods, ORA and SPIA, for time series analysis and estimates statistical values for combined dataset by an advanced metric. TRAP also produces visual summary of pathway interactions. Gene expression change labeling, a practical clustering method used in TRAP, enables more accurate interpretation of the data when combined with pathway analysis. We applied our methods on a real dataset for the analysis of rice (Oryza sativa L. Japonica nipponbare) upon drought stress. The result showed that TRAP was able to detect pathways more accurately than several existing methods. TRAP is available at http://biohealth.snu.ac.kr/software/TRAP/.",2014-02-08 +21385461,"LabKey Server: an open source platform for scientific data integration, analysis and collaboration.","

Background

Broad-based collaborations are becoming increasingly common among disease researchers. For example, the Global HIV Enterprise has united cross-disciplinary consortia to speed progress towards HIV vaccines through coordinated research across the boundaries of institutions, continents and specialties. New, end-to-end software tools for data and specimen management are necessary to achieve the ambitious goals of such alliances. These tools must enable researchers to organize and integrate heterogeneous data early in the discovery process, standardize processes, gain new insights into pooled data and collaborate securely.

Results

To meet these needs, we enhanced the LabKey Server platform, formerly known as CPAS. This freely available, open source software is maintained by professional engineers who use commercially proven practices for software development and maintenance. Recent enhancements support: (i) Submitting specimens requests across collaborating organizations (ii) Graphically defining new experimental data types, metadata and wizards for data collection (iii) Transitioning experimental results from a multiplicity of spreadsheets to custom tables in a shared database (iv) Securely organizing, integrating, analyzing, visualizing and sharing diverse data types, from clinical records to specimens to complex assays (v) Interacting dynamically with external data sources (vi) Tracking study participants and cohorts over time (vii) Developing custom interfaces using client libraries (viii) Authoring custom visualizations in a built-in R scripting environment. Diverse research organizations have adopted and adapted LabKey Server, including consortia within the Global HIV Enterprise. Atlas is an installation of LabKey Server that has been tailored to serve these consortia. It is in production use and demonstrates the core capabilities of LabKey Server. Atlas now has over 2,800 active user accounts originating from approximately 36 countries and 350 organizations. It tracks roughly 27,000 assay runs, 860,000 specimen vials and 1,300,000 vial transfers.

Conclusions

Sharing data, analysis tools and infrastructure can speed the efforts of large research consortia by enhancing efficiency and enabling new insights. The Atlas installation of LabKey Server demonstrates the utility of the LabKey platform for collaborative research. Stable, supported builds of LabKey Server are freely available for download at http://www.labkey.org. Documentation and source code are available under the Apache License 2.0.",2011-03-09 +27370413,Editor's Highlight: Sequence Alignment to Predict Across Species Susceptibility (SeqAPASS): A Web-Based Tool for Addressing the Challenges of Cross-Species Extrapolation of Chemical Toxicity.,"Conservation of a molecular target across species can be used as a line-of-evidence to predict the likelihood of chemical susceptibility. The web-based Sequence Alignment to Predict Across Species Susceptibility (SeqAPASS; https://seqapass.epa.gov/seqapass/) application was developed to simplify, streamline, and quantitatively assess protein sequence/structural similarity across taxonomic groups as a means to predict relative intrinsic susceptibility. The intent of the tool is to allow for evaluation of any potential protein target while remaining amenable to variable degrees of protein characterization, in the context of available information about the chemical/protein interaction and the molecular target itself. To accommodate this flexibility in the analysis, 3 levels of evaluation were developed. The first level of the SeqAPASS analysis compares primary amino acid sequences to a query sequence, calculating a metric for sequence similarity (including detection of orthologs); the second level evaluates sequence similarity within selected functional domains (eg, ligand-binding domain); and the third level of analysis compares individual amino acid residue positions of importance for protein conformation and/or interaction with the chemical upon binding. Each level of the SeqAPASS analysis provides additional evidence to apply toward rapid, screening-level assessments of probable cross species susceptibility. Such analyses can support prioritization of chemicals for further evaluation, selection of appropriate species for testing, extrapolation of empirical toxicity data, and/or assessment of the cross-species relevance of adverse outcome pathways. Three case studies are described herein to demonstrate application of the SeqAPASS tool: the first 2 focused on predictions of pollinator susceptibility to molt-accelerating compounds and neonicotinoid insecticides, and the third on evaluation of cross-species susceptibility to strobilurin fungicides. These analyses illustrate challenges in species extrapolation and demonstrate the broad utility of SeqAPASS for risk-based decision making and research.",2016-06-30 +25167919,Inferring copy number and genotype in tumour exome data.,"

Background

Using whole exome sequencing to predict aberrations in tumours is a cost effective alternative to whole genome sequencing, however is predominantly used for variant detection and infrequently utilised for detection of somatic copy number variation.

Results

We propose a new method to infer copy number and genotypes using whole exome data from paired tumour/normal samples. Our algorithm uses two Hidden Markov Models to predict copy number and genotypes and computationally resolves polyploidy/aneuploidy, normal cell contamination and signal baseline shift. Our method makes explicit detection on chromosome arm level events, which are commonly found in tumour samples. The methods are combined into a package named ADTEx (Aberration Detection in Tumour Exome). We applied our algorithm to a cohort of 17 in-house generated and 18 TCGA paired ovarian cancer/normal exomes and evaluated the performance by comparing against the copy number variations and genotypes predicted using Affymetrix SNP 6.0 data of the same samples. Further, we carried out a comparison study to show that ADTEx outperformed its competitors in terms of precision and F-measure.

Conclusions

Our proposed method, ADTEx, uses both depth of coverage ratios and B allele frequencies calculated from whole exome sequencing data, to predict copy number variations along with their genotypes. ADTEx is implemented as a user friendly software package using Python and R statistical language. Source code and sample data are freely available under GNU license (GPLv3) at http://adtex.sourceforge.net/.",2014-08-28 +26958241,Approaches to Supporting the Analysis of Historical Medication Datasets with RxNorm.,"

Objective

To investigate approaches to supporting the analysis of historical medication datasets with RxNorm.

Methods

We created two sets of National Drug Codes (NDCs). One is based on historical NDCs harvested from versions of RxNorm from 2007 to present. The other comprises all sources of NDCs in the current release of RxNorm, including proprietary sources. We evaluated these two resources against four sets of NDCs obtained from various sources.

Results

In two historical medication datasets, 14-19% of the NDCs were obsolete, but 91-96% of these obsolete NDCs could be recovered and mapped to active drug concepts.

Conclusion

Adding historical data significantly increases NDC mapping to active RxNorm drugs. A service for mapping historical NDC datasets leveraging RxNorm was added to the RxNorm API and is available at https://rxnav.nlm.nih.gov/.",2015-11-05 +23819482,WS-SNPs&GO: a web server for predicting the deleterious effect of human protein variants using functional annotation.,"

Background

SNPs&GO is a method for the prediction of deleterious Single Amino acid Polymorphisms (SAPs) using protein functional annotation. In this work, we present the web server implementation of SNPs&GO (WS-SNPs&GO). The server is based on Support Vector Machines (SVM) and for a given protein, its input comprises: the sequence and/or its three-dimensional structure (when available), a set of target variations and its functional Gene Ontology (GO) terms. The output of the server provides, for each protein variation, the probabilities to be associated to human diseases.

Results

The server consists of two main components, including updated versions of the sequence-based SNPs&GO (recently scored as one of the best algorithms for predicting deleterious SAPs) and of the structure-based SNPs&GO(3d) programs. Sequence and structure based algorithms are extensively tested on a large set of annotated variations extracted from the SwissVar database. Selecting a balanced dataset with more than 38,000 SAPs, the sequence-based approach achieves 81% overall accuracy, 0.61 correlation coefficient and an Area Under the Curve (AUC) of the Receiver Operating Characteristic (ROC) curve of 0.88. For the subset of ~6,600 variations mapped on protein structures available at the Protein Data Bank (PDB), the structure-based method scores with 84% overall accuracy, 0.68 correlation coefficient, and 0.91 AUC. When tested on a new blind set of variations, the results of the server are 79% and 83% overall accuracy for the sequence-based and structure-based inputs, respectively.

Conclusions

WS-SNPs&GO is a valuable tool that includes in a unique framework information derived from protein sequence, structure, evolutionary profile, and protein function. WS-SNPs&GO is freely available at http://snps.biofold.org/snps-and-go.",2013-05-28 +26852902,An exploration of integrated data on the social dynamics of suicide among women.,"The gender-based nature of suicide-related behaviour is largely accepted. However, studies that report exclusively on female suicides are rare. Here we demonstrate how female suicide has effectively been 'othered' and appears incidental in studies which compare female and male behaviour. We highlight how recent studies of suicide have tended to be dominated by male-only approaches, which increasingly link issues of masculinity with male death by suicide. Drawing on data collected from the general practitioner and coroner's office, we then apply the sociological autopsy approach to a cohort of 78 deaths recorded as suicides in the UK between 2007 and 2009. By focusing on females in isolation from males, we demonstrate that, as in male-only suicide studies, it is similarly possible to draw out issues associated with the feminine identity, which can be linked to death by suicide. We find that bereavement, sexual violence and motherhood could all be linked to the lives and help-seeking of the females who died. In closing, we suggest that a reorientation towards sociological analytic approaches of female suicide may help to produce further reductions in the rate of female death by suicide. A Virtual Abstract of this paper can be found at: https://www.youtube.com/watch?v=a0w9KKMFdIQ.",2016-02-08 +22954629,MaConDa: a publicly accessible mass spectrometry contaminants database.,"

Unlabelled

Mass spectrometry is widely used in bioanalysis, including the fields of metabolomics and proteomics, to simultaneously measure large numbers of molecules in complex biological samples. Contaminants routinely occur within these samples, for example, originating from the solvents or plasticware. Identification of these contaminants is crucial to enable their removal before data analysis, in particular to maintain the validity of conclusions drawn from uni- and multivariate statistical analyses. Although efforts have been made to report contaminants within mass spectra, this information is fragmented and its accessibility is relatively limited. In response to the needs of the bioanalytical community, here we report the creation of an extensive manually well-annotated database of currently known small molecule contaminants.

Availability

The Mass spectrometry Contaminants Database (MaConDa) is freely available and accessible through all major browsers or by using the MaConDa web service http://www.maconda.bham.ac.uk.",2012-09-06 +27357169,Local versus global biological network alignment.,"

Motivation

Network alignment (NA) aims to find regions of similarities between species' molecular networks. There exist two NA categories: local (LNA) and global (GNA). LNA finds small highly conserved network regions and produces a many-to-many node mapping. GNA finds large conserved regions and produces a one-to-one node mapping. Given the different outputs of LNA and GNA, when a new NA method is proposed, it is compared against existing methods from the same category. However, both NA categories have the same goal: to allow for transferring functional knowledge from well- to poorly-studied species between conserved network regions. So, which one to choose, LNA or GNA? To answer this, we introduce the first systematic evaluation of the two NA categories.

Results

We introduce new measures of alignment quality that allow for fair comparison of the different LNA and GNA outputs, as such measures do not exist. We provide user-friendly software for efficient alignment evaluation that implements the new and existing measures. We evaluate prominent LNA and GNA methods on synthetic and real-world biological networks. We study the effect on alignment quality of using different interaction types and confidence levels. We find that the superiority of one NA category over the other is context-dependent. Further, when we contrast LNA and GNA in the application of learning novel protein functional knowledge, the two produce very different predictions, indicating their complementarity. Our results and software provide guidelines for future NA method development and evaluation.

Availability and implementation

Software: http://www.nd.edu/~cone/LNA_GNA CONTACT: : tmilenko@nd.eduSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-29 +25416749,NucleusJ: an ImageJ plugin for quantifying 3D images of interphase nuclei.,"

Unlabelled

NucleusJ is a simple and user-friendly ImageJ plugin dedicated to the characterization of nuclear morphology and chromatin organization in 3D. Starting from image stacks, the nuclear boundary is delimited by combining the Otsu segmentation method with optimization of nuclear sphericity. Chromatin domains are segmented by partitioning the nucleus using a 3D watershed algorithm and by thresholding a contrast measure over the resulting regions. As output, NucleusJ quantifies 15 parameters including shape and size of nuclei as well as intra-nuclear objects and their position within the nucleus. A step-by-step documentation is available for self-training, together with data sets of nuclei with different nuclear organization.

Availability and implementation

Dataset of nuclei is available at https://www.gred-clermont.fr/media/WorkDirectory.zip. NucleusJ is available at http://imagejdocu.tudor.lu/doku.php?id=plugin:stacks:nuclear_analysis_plugin:start.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-20 +22759420,Comparative evaluation of set-level techniques in predictive classification of gene expression samples.,"

Background

Analysis of gene expression data in terms of a priori-defined gene sets has recently received significant attention as this approach typically yields more compact and interpretable results than those produced by traditional methods that rely on individual genes. The set-level strategy can also be adopted with similar benefits in predictive classification tasks accomplished with machine learning algorithms. Initial studies into the predictive performance of set-level classifiers have yielded rather controversial results. The goal of this study is to provide a more conclusive evaluation by testing various components of the set-level framework within a large collection of machine learning experiments.

Results

Genuine curated gene sets constitute better features for classification than sets assembled without biological relevance. For identifying the best gene sets for classification, the Global test outperforms the gene-set methods GSEA and SAM-GS as well as two generic feature selection methods. To aggregate expressions of genes into a feature value, the singular value decomposition (SVD) method as well as the SetSig technique improve on simple arithmetic averaging. Set-level classifiers learned with 10 features constituted by the Global test slightly outperform baseline gene-level classifiers learned with all original data features although they are slightly less accurate than gene-level classifiers learned with a prior feature-selection step.

Conclusion

Set-level classifiers do not boost predictive accuracy, however, they do achieve competitive accuracy if learned with the right combination of ingredients.

Availability

Open-source, publicly available software was used for classifier learning and testing. The gene expression datasets and the gene set database used are also publicly available. The full tabulation of experimental results is available at http://ida.felk.cvut.cz/CESLT.",2012-06-25 +24371156,ALEA: a toolbox for allele-specific epigenomics analysis.,"The assessment of expression and epigenomic status using sequencing based methods provides an unprecedented opportunity to identify and correlate allelic differences with epigenomic status. We present ALEA, a computational toolbox for allele-specific epigenomics analysis, which incorporates allelic variation data within existing resources, allowing for the identification of significant associations between epigenetic modifications and specific allelic variants in human and mouse cells. ALEA provides a customizable pipeline of command line tools for allele-specific analysis of next-generation sequencing data (ChIP-seq, RNA-seq, etc.) that takes the raw sequencing data and produces separate allelic tracks ready to be viewed on genome browsers. The pipeline has been validated using human and hybrid mouse ChIP-seq and RNA-seq data.

Availability

The package, test data and usage instructions are available online at http://www.bcgsc.ca/platform/bioinfo/software/alea CONTACT: : mkarimi1@interchange.ubc.ca or sjones@bcgsc.ca Supplementary information: Supplementary data are available at Bioinformatics online.",2013-12-26 +26699810,Distinctive Behaviors of Druggable Proteins in Cellular Networks.,"The interaction environment of a protein in a cellular network is important in defining the role that the protein plays in the system as a whole, and thus its potential suitability as a drug target. Despite the importance of the network environment, it is neglected during target selection for drug discovery. Here, we present the first systematic, comprehensive computational analysis of topological, community and graphical network parameters of the human interactome and identify discriminatory network patterns that strongly distinguish drug targets from the interactome as a whole. Importantly, we identify striking differences in the network behavior of targets of cancer drugs versus targets from other therapeutic areas and explore how they may relate to successful drug combinations to overcome acquired resistance to cancer drugs. We develop, computationally validate and provide the first public domain predictive algorithm for identifying druggable neighborhoods based on network parameters. We also make available full predictions for 13,345 proteins to aid target selection for drug discovery. All target predictions are available through canSAR.icr.ac.uk. Underlying data and tools are available at https://cansar.icr.ac.uk/cansar/publications/druggable_network_neighbourhoods/.",2015-12-23 +25630378,MetAmp: combining amplicon data from multiple markers for OTU analysis.,"

Motivation

We present a novel method and corresponding application, MetAmp, to combine amplicon data from multiple genomic markers into Operational Taxonomic Units (OTUs) for microbial community analysis, calibrating the markers using data from known microbial genomes. When amplicons for multiple markers such as the 16S rRNA gene hypervariable regions are available, MetAmp improves the accuracy of OTU-based methods for characterizing bacterial composition and community structure. MetAmp works best with at least three markers, and is applicable to non-bacterial analyses and to non 16S markers. Our application and testing have been limited to 16S analysis of microbial communities.

Results

We clustered standard test sequences derived from the Human Microbiome Mock Community test sets and compared MetAmp and other tools with respect to their ability to recover OTUs for these benchmark bacterial communities. MetAmp compared favorably to QIIME, UPARSE and Mothur using amplicons from one, two, and three markers.

Availability and implementation

MetAmp is available at http://izhbannikov.github.io/MetAmp/.",2015-01-27 +25246651,Power analysis and sample size estimation for RNA-Seq differential expression.,"It is crucial for researchers to optimize RNA-seq experimental designs for differential expression detection. Currently, the field lacks general methods to estimate power and sample size for RNA-Seq in complex experimental designs, under the assumption of the negative binomial distribution. We simulate RNA-Seq count data based on parameters estimated from six widely different public data sets (including cell line comparison, tissue comparison, and cancer data sets) and calculate the statistical power in paired and unpaired sample experiments. We comprehensively compare five differential expression analysis packages (DESeq, edgeR, DESeq2, sSeq, and EBSeq) and evaluate their performance by power, receiver operator characteristic (ROC) curves, and other metrics including areas under the curve (AUC), Matthews correlation coefficient (MCC), and F-measures. DESeq2 and edgeR tend to give the best performance in general. Increasing sample size or sequencing depth increases power; however, increasing sample size is more potent than sequencing depth to increase power, especially when the sequencing depth reaches 20 million reads. Long intergenic noncoding RNAs (lincRNA) yields lower power relative to the protein coding mRNAs, given their lower expression level in the same RNA-Seq experiment. On the other hand, paired-sample RNA-Seq significantly enhances the statistical power, confirming the importance of considering the multifactor experimental design. Finally, a local optimal power is achievable for a given budget constraint, and the dominant contributing factor is sample size rather than the sequencing depth. In conclusion, we provide a power analysis tool (http://www2.hawaii.edu/~lgarmire/RNASeqPowerCalculator.htm) that captures the dispersion in the data and can serve as a practical reference under the budget constraint of RNA-Seq experiments.",2014-09-22 +24687303,Open source software implementation of an integrated testing strategy for skin sensitization potency based on a Bayesian network.,"An open-source implementation of a previously published integrated testing strategy (ITS) for skin sensitization using a Bayesian network has been developed using R, a free and open-source statistical computing language. The ITS model provides probabilistic predictions of skin sensitization potency based on in silico and in vitro information as well as skin penetration characteristics from a published bioavailability model (Kasting et al., 2008). The structure of the Bayesian network was designed to be consistent with the adverse outcome pathway published by the OECD (Jaworska et al., 2011, 2013). In this paper, the previously published data set (Jaworska et al., 2013) is improved by two data corrections and a modified application of the Kasting model. The new data set implemented in the original commercial software package and the new R version produced consistent results. The data and a fully documented version of the code are publicly available (http://ntp.niehs.nih.gov/go/its).",2014-03-31 +24771516,ThunderSTORM: a comprehensive ImageJ plug-in for PALM and STORM data analysis and super-resolution imaging.,"

Unlabelled

ThunderSTORM is an open-source, interactive and modular plug-in for ImageJ designed for automated processing, analysis and visualization of data acquired by single-molecule localization microscopy methods such as photo-activated localization microscopy and stochastic optical reconstruction microscopy. ThunderSTORM offers an extensive collection of processing and post-processing methods so that users can easily adapt the process of analysis to their data. ThunderSTORM also offers a set of tools for creation of simulated data and quantitative performance evaluation of localization algorithms using Monte Carlo simulations.

Availability and implementation

ThunderSTORM and the online documentation are both freely accessible at https://code.google.com/p/thunder-storm/.",2014-04-25 +24564238,Signal extraction from movies of honeybee brain activity: the ImageBee plugin for KNIME.,"

Background

In the antennal lobe, a dedicated olfactory center of the honeybee brain, odours are encoded as activity patterns of coding units, the so-called glomeruli. Optical imaging with calcium-sensitive dyes allows us to record these activity patterns and to gain insight into olfactory information processing in the brain.

Method

We introduce ImageBee, a plugin for the data analysis platform KNIME. ImageBee provides a variety of tools for processing optical imaging data. The main algorithm behind ImageBee is a matrix factorisation approach. Motivated by a data-specific, non-negative mixture model, the algorithm aims to select the generating extreme vectors of a convex cone that contains the data. It approximates the movie matrix by non-negative combinations of the extreme vectors. These correspond to pure glomerular signals that are not mixed with neighbour signals.

Results

Evaluation shows that the proposed algorithm can identify the relevant biological signals on imaging data from the honeybee AL, as well as it can recover implanted source signals from artificial data.

Conclusions

ImageBee enables automated data processing and visualisation for optical imaging data from the insect AL. The modular implementation for KNIME offers a flexible platform for data analysis projects, where modules can be rearranged or added depending on the particular application.

Availability

ImageBee can be installed via the KNIME update service. Installation instructions are available at http://tech.knime.org/imagebee-analysing-imaging-data-from-the-honeybee-brain.",2013-11-05 +27928825,Patient involvement in own rehabilitation after early discharge.,"

Background

A European Union Commission report in 2015 concluded that the concept of patient involvement refers specially to the right of patients to have a central position in the healthcare process (EU Commission 2012, http://ec.europa.eu/public_opinion/archives/quali/ql_5937_patient_en.pdf). Recent research suggests that patient involvement after hip replacement is a very effective strategy in older adults with regard to improving walking ability and reducing pain and thereby lessening loss of physical, mental and social aspects of the quality of life. The growing number of older adults all over the world will increase the need for hip surgery.

Hypothesis

Older adults' involvement in own rehabilitation improves their health and quality of life after hip replacement.

Aim

To involve and coach older adults with a hip replacement to self-care after early discharge in transition between hospital and home.

Population

Empirical data were collected by carrying out three randomised clinical trials (RCT) focusing on self-rated health and involvement of patients undergoing total hip replacement in three Danish orthopaedic clinics. Based on power calculation, 260 patients (mean age 67.5 years) were invited to participate. In this third study in 2010-2013, patients were randomised to either an intervention group or a control group.

Methodology

Randomised clinical trials (RCT). Questionnaire SF-36 a generic tool measuring patients' self-rated health status and quality of life. All patients filled out SF-36 before surgery and again 3, 6, 9 and 12 months after surgery. Patients in the intervention group had an additional follow-up 1, 3 and 7 weeks after discharge by nurses from orthopaedic clinic in hospital. The nurses used a semi-structured interview guide as intervention model to coach, counsel and involve patients to improve their self-care and planned rehabilitation after hip replacement.

Results

Older adults benefit through involvement based on an intervention model of expectations, measurement of self-rated health and quality of life. The results of the randomised trials one and two documented that patients' self-rated health status in the intervention group reached their habitual level 3 months after hip replacement vs. 9 months in the control group. Since our working hypothesis 'patient involvement in own rehabilitation improves health and quality of life after hip replacement' was found valid, our ambition is now based on results from this third study to contribute to further research and development within patient involvement.

Relevance to clinical practice

To follow new evidence-based research, results concluded that walking ability is very necessary in order to increase physical activity to benefit the health of older adults and prevent disease.",2016-12-08 +25169943,Notch3 overexpression promotes anoikis resistance in epithelial ovarian cancer via upregulation of COL4A2.,"

Unlabelled

Ovarian cancer is a lethal disease with the majority of diagnosed women having distant metastases. Interestingly, although Notch3 overexpression has been correlated with poor survival in epithelial ovarian cancer (EOC), little is known about its mechanism of action. Data show that Notch3 specifically promotes anoikis resistance. In addition, data indicate a positive role for focal adhesion kinase (FAK) as well as downstream signaling kinases such as Akt and Erk 1/2 in promoting anchorage-independent growth. Mechanistically, both mRNA transcript and protein levels of type IV collagen (COL4A2) are reduced when Notch3 levels are decreased and exogenous collagen IV supplementation reverses the anoikis sensitivity. Reduction of COL4A2 expression by RNAI-mediated knockdown induces cell death. Finally, elevated Notch3 expression levels correlate with higher COL4A2 expression in human ovarian tumor specimens.

Implications

These data highlight type IV collagen as a novel therapeutic target for metastatic EOC. Visual Overview: http://mcr.aacrjournals.org/content/early/2014/11/25/1541-7786.MCR-14-0334/F1.large.jpg",2014-08-28 +22267905,EvoluCode: Evolutionary Barcodes as a Unifying Framework for Multilevel Evolutionary Data.,"Evolutionary systems biology aims to uncover the general trends and principles governing the evolution of biological networks. An essential part of this process is the reconstruction and analysis of the evolutionary histories of these complex, dynamic networks. Unfortunately, the methodologies for representing and exploiting such complex evolutionary histories in large scale studies are currently limited. Here, we propose a new formalism, called EvoluCode (Evolutionary barCode), which allows the integration of different evolutionary parameters (eg, sequence conservation, orthology, synteny …) in a unifying format and facilitates the multilevel analysis and visualization of complex evolutionary histories at the genome scale. The advantages of the approach are demonstrated by constructing barcodes representing the evolution of the complete human proteome. Two large-scale studies are then described: (i) the mapping and visualization of the barcodes on the human chromosomes and (ii) automatic clustering of the barcodes to highlight protein subsets sharing similar evolutionary histories and their functional analysis. The methodologies developed here open the way to the efficient application of other data mining and knowledge extraction techniques in evolutionary systems biology studies. A database containing all EvoluCode data is available at: http://lbgi.igbmc.fr/barcodes.",2011-12-21 +26975659,NetDecoder: a network biology platform that decodes context-specific biological networks and gene activities.,"The sequential chain of interactions altering the binary state of a biomolecule represents the 'information flow' within a cellular network that determines phenotypic properties. Given the lack of computational tools to dissect context-dependent networks and gene activities, we developed NetDecoder, a network biology platform that models context-dependent information flows using pairwise phenotypic comparative analyses of protein-protein interactions. Using breast cancer, dyslipidemia and Alzheimer's disease as case studies, we demonstrate NetDecoder dissects subnetworks to identify key players significantly impacting cell behaviour specific to a given disease context. We further show genes residing in disease-specific subnetworks are enriched in disease-related signalling pathways and information flow profiles, which drive the resulting disease phenotypes. We also devise a novel scoring scheme to quantify key genes-network routers, which influence many genes, key targets, which are influenced by many genes, and high impact genes, which experience a significant change in regulation. We show the robustness of our results against parameter changes. Our network biology platform includes freely available source code (http://www.NetDecoder.org) for researchers to explore genome-wide context-dependent information flow profiles and key genes, given a set of genes of particular interest and transcriptome data. More importantly, NetDecoder will enable researchers to uncover context-dependent drug targets.",2016-03-14 +24731942,Interventional heart wall motion analysis with cardiac C-arm CT systems.,"Today, quantitative analysis of three-dimensional (3D) dynamics of the left ventricle (LV) cannot be performed directly in the catheter lab using a current angiographic C-arm system, which is the workhorse imaging modality for cardiac interventions. Therefore, myocardial wall analysis is completely based on the 2D angiographic images or pre-interventional 3D/4D imaging. In this paper, we present a complete framework to study the ventricular wall motion in 4D (3D+t) directly in the catheter lab. From the acquired 2D projection images, a dynamic 3D surface model of the LV is generated, which is then used to detect ventricular dyssynchrony. Different quantitative features to evaluate LV dynamics known from other modalities (ultrasound, magnetic resonance imaging) are transferred to the C-arm CT data. We use the ejection fraction, the systolic dyssynchrony index a 3D fractional shortening and the phase to maximal contraction (ϕi, max) to determine an indicator of LV dyssynchrony and to discriminate regionally pathological from normal myocardium. The proposed analysis tool was evaluated on simulated phantom LV data with and without pathological wall dysfunctions. The LV data used is publicly available online at https://conrad.stanford.edu/data/heart. In addition, the presented framework was tested on eight clinical patient data sets. The first clinical results demonstrate promising performance of the proposed analysis tool and encourage the application of the presented framework to a larger study in clinical practice.",2014-04-15 +26093644,Genomic-associated Markers and comparative Genome Maps of Xanthomonas oryzae pv. oryzae and X. oryzae pv. oryzicola.,"Xanthomonas oryzae pv. oryzae (Xoo) and X. oryzae pv. oryzicola (Xoc) cause two major seed quarantine diseases in rice, bacterial blight and bacterial leaf streak, respectively. Xoo and Xoc share high similarity in genomic sequence, which results in hard differentiation of the two pathogens. Genomic-associated Markers and comparative Genome Maps database (GMGM) is an integrated database providing comprehensive information including compared genome maps and full genomic-coverage molecular makers of Xoo and Xoc. This database was established based on bioinformatic analysis of complete sequenced genomes of several X. oryzae pathovars of which the similarity of the genomes was up to 91.39 %. The program was designed with a series of specific PCR primers, including 286 pairs of Xoo dominant markers, 288 pairs of Xoc dominant markers, and 288 pairs of Xoo and Xoc co-dominant markers, which were predicted to distinguish two pathovars. Test on a total of 40 donor pathogen strains using randomly selected 120 pairs of primers demonstrated that over 52.5 % of the primers were efficacious. The GMGM web portal ( http://biodb.sdau.edu.cn/gmgm/ ) will be a powerful tool that can present highly specific diagnostic markers, and it also provides information about comparative genome maps of the two pathogens for future evolution study.",2015-06-21 +24618473,"Orione, a web-based framework for NGS analysis in microbiology.","

Unlabelled

End-to-end next-generation sequencing microbiology data analysis requires a diversity of tools covering bacterial resequencing, de novo assembly, scaffolding, bacterial RNA-Seq, gene annotation and metagenomics. However, the construction of computational pipelines that use different software packages is difficult owing to a lack of interoperability, reproducibility and transparency. To overcome these limitations we present Orione, a Galaxy-based framework consisting of publicly available research software and specifically designed pipelines to build complex, reproducible workflows for next-generation sequencing microbiology data analysis. Enabling microbiology researchers to conduct their own custom analysis and data manipulation without software installation or programming, Orione provides new opportunities for data-intensive computational analyses in microbiology and metagenomics.

Availability and implementation

Orione is available online at http://orione.crs4.it.",2014-03-10 +24194600,"The International Mouse Phenotyping Consortium Web Portal, a unified point of access for knockout mice and related phenotyping data.","The International Mouse Phenotyping Consortium (IMPC) web portal (http://www.mousephenotype.org) provides the biomedical community with a unified point of access to mutant mice and rich collection of related emerging and existing mouse phenotype data. IMPC mouse clinics worldwide follow rigorous highly structured and standardized protocols for the experimentation, collection and dissemination of data. Dedicated 'data wranglers' work with each phenotyping center to collate data and perform quality control of data. An automated statistical analysis pipeline has been developed to identify knockout strains with a significant change in the phenotype parameters. Annotation with biomedical ontologies allows biologists and clinicians to easily find mouse strains with phenotypic traits relevant to their research. Data integration with other resources will provide insights into mammalian gene function and human disease. As phenotype data become available for every gene in the mouse, the IMPC web portal will become an invaluable tool for researchers studying the genetic contributions of genes to human diseases.",2013-11-04 +26116993,Approaches for identifying multiple-SNP haplotype blocks for use in human identification.,"Single nucleotide polymorphism (SNP) discrimination effectiveness is low due to the bi-allelic nature of SNPs, and large numbers of loci must be analyzed for human identification in forensic casework. To resolve these issues, the authors support the use of multiple SNP haplotypes that will generate many haplotypes based on the combination of SNP alleles. First, 27 regions were selected from the JSNP database (http://snp.ims.u-tokyo.ac.jp) according to the following criteria: (1) 3 or more SNP loci within 100bp; (2) on-intron or out-of-gene location; and (3) frequency of more than 40% for each SNP allele. PCR amplification and high-resolution melting curve (HRM) analysis were then carried out for all selected regions to determine variation in the haplotypes of each. HRM analysis indicated that 7 regions (1q25, 1q42.2, 3p24, 10p13, 11p15.1, 14q12-q13, and 20q12) containing 3 SNP loci had more than 2 haplotypes. The frequencies of the haplotypes for each region were observed via direct sequencing of more than 100 individuals. Not only haplotyping increases the effectiveness of individual identification but also the analysis region is shorter than in common short tandem repeat analysis, representing a further advantage for fragmented DNA samples in SNP typing.",2015-06-20 +23773438,Transcriptome sequencing and de novo annotation of the critically endangered Adriatic sturgeon.,"

Background

Sturgeons are a group of Condrostean fish with very high evolutionary, economical and conservation interest. The eggs of these living fossils represent one of the most high prized foods of animal origin. The intense fishing pressure on wild stocks to harvest caviar has caused in the last decades a dramatic decline of their distribution and abundance leading the International Union for Conservation of Nature to list them as the more endangered group of species. As a direct consequence, world-wide efforts have been made to develop sturgeon aquaculture programmes for caviar production. In this context, the characterization of the genes involved in sex determination could provide relevant information for the selective farming of the more profitable females.

Results

The 454 sequencing of two cDNA libraries from the gonads and brain of one male and one female full-sib A. naccarii, yielded 182,066 and 167,776 reads respectively, which, after strict quality control, were iterative assembled into more than 55,000 high quality ESTs. The average per-base coverage reached by assembling the two libraries was 4X. The multi-step annotation process resulted in 16% successfully annotated sequences with GO terms. We screened the transcriptome for 32 sex-related genes and highlighted 7 genes that are potentially specifically expressed, 5 in male and 2 in females, at the first life stage at which sex is histologically identifiable. In addition we identified 21,791 putative EST-linked SNPs and 5,295 SSRs.

Conclusions

This study represents the first large massive release of sturgeon transcriptome information that we organized into the public database AnaccariiBase, which is freely available at http://compgen.bio.unipd.it/anaccariibase/. This transcriptomic data represents an important source of information for further studies on sturgeon species. The hundreds of putative EST-linked molecular makers discovered in this study will be invaluable for sturgeon reintroduction and breeding programs.",2013-06-18 +26990570,Gene Expression in Biopsies of Acute Rejection and Interstitial Fibrosis/Tubular Atrophy Reveals Highly Shared Mechanisms That Correlate With Worse Long-Term Outcomes.,"Interstitial fibrosis and tubular atrophy (IFTA) is found in approximately 25% of 1-year biopsies posttransplant. It is known that IFTA correlates with decreased graft survival when histological evidence of inflammation is present. Identifying the mechanistic etiology of IFTA is important to understanding why long-term graft survival has not changed as expected despite improved immunosuppression and dramatically reduced rates of clinical acute rejection (AR) (Services UDoHaH. http://www.ustransplant.org/annual_reports/current/509a_ki.htm). Gene expression profiles of 234 graft biopsy samples were obtained with matching clinical and outcome data. Eighty-one IFTA biopsies were divided into subphenotypes by degree of histological inflammation: IFTA with AR, IFTA with inflammation, and IFTA without inflammation. Samples with AR (n = 54) and normally functioning transplants (TX; n = 99) were used in comparisons. A novel analysis using gene coexpression networks revealed that all IFTA phenotypes were strongly enriched for dysregulated gene pathways and these were shared with the biopsy profiles of AR, including IFTA samples without histological evidence of inflammation. Thus, by molecular profiling we demonstrate that most IFTA samples have ongoing immune-mediated injury or chronic rejection that is more sensitively detected by gene expression profiling. These molecular biopsy profiles correlated with future graft loss in IFTA samples without inflammation.",2016-03-15 +26091106,"Asian Citrus Psyllid Expression Profiles Suggest Candidatus Liberibacter Asiaticus-Mediated Alteration of Adult Nutrition and Metabolism, and of Nymphal Development and Immunity.","The Asian citrus psyllid (ACP) Diaphorina citri Kuwayama (Hemiptera: Psyllidae) is the insect vector of the fastidious bacterium Candidatus Liberibacter asiaticus (CLas), the causal agent of citrus greening disease, or Huanglongbing (HLB). The widespread invasiveness of the psyllid vector and HLB in citrus trees worldwide has underscored the need for non-traditional approaches to manage the disease. One tenable solution is through the deployment of RNA interference technology to silence protein-protein interactions essential for ACP-mediated CLas invasion and transmission. To identify psyllid interactor-bacterial effector combinations associated with psyllid-CLas interactions, cDNA libraries were constructed from CLas-infected and CLas-free ACP adults and nymphs, and analyzed for differential expression. Library assemblies comprised 24,039,255 reads and yielded 45,976 consensus contigs. They were annotated (UniProt), classified using Gene Ontology, and subjected to in silico expression analyses using the Transcriptome Computational Workbench (TCW) (http://www.sohomoptera.org/ACPPoP/). Functional-biological pathway interpretations were carried out using the Kyoto Encyclopedia of Genes and Genomes databases. Differentially expressed contigs in adults and/or nymphs represented genes and/or metabolic/pathogenesis pathways involved in adhesion, biofilm formation, development-related, immunity, nutrition, stress, and virulence. Notably, contigs involved in gene silencing and transposon-related responses were documented in a psyllid for the first time. This is the first comparative transcriptomic analysis of ACP adults and nymphs infected and uninfected with CLas. The results provide key initial insights into host-parasite interactions involving CLas effectors that contribute to invasion-virulence, and to host nutritional exploitation and immune-related responses that appear to be essential for successful ACP-mediated circulative, propagative CLas transmission.",2015-06-19 +25596308,mirTrios: an integrated pipeline for detection of de novo and rare inherited mutations from trios-based next-generation sequencing.,"

Objectives

Recently, several studies documented that de novo mutations (DNMs) play important roles in the aetiology of sporadic diseases. Next-generation sequencing (NGS) enables variant calling at single-base resolution on a genome-wide scale. However, accurate identification of DNMs from NGS data still remains a major challenge. We developed mirTrios, a web server, to accurately detect DNMs and rare inherited mutations from NGS data in sporadic diseases.

Methods

The expectation-maximisation (EM) model was adopted to accurately identify DNMs from variant call files of a trio generated by GATK (Genome Analysis Toolkit). The GATK results, which contain certain basic properties (such as PL, PRT and PART), are iteratively integrated into the EM model to strike a threshold for DNMs detection. Training sets of true and false positive DNMs in the EM model were built from whole genome sequencing data of 64 trios.

Results

With our in-house whole exome sequencing datasets from 20 trios, mirTrios totally identified 27 DNMs in the coding region, 25 of which (92.6%) are validated as true positives. In addition, to facilitate the interpretation of diverse mutations, mirTrios can also be employed in the identification of rare inherited mutations. Embedded with abundant annotation of DNMs and rare inherited mutations, mirTrios also supports known diagnostic variants and causative gene identification, as well as the prioritisation of novel and promising candidate genes.

Conclusions

mirTrios provides an intuitive interface for the general geneticist and clinician, and can be widely used for detection of DNMs and rare inherited mutations, and annotation in sporadic diseases. mirTrios is freely available at http://centre.bioinformatics.zj.cn/mirTrios/.",2015-01-16 +21414990,AStream: an R package for annotating LC/MS metabolomic data.,"

Unlabelled

AStream, an R-statistical software package for the curation and identification of feature peaks extracted from liquid chromatography mass spectrometry (LC/MS) metabolomics data, is described. AStream detects isotopic, fragment and adduct patterns by identifying feature pairs that fulfill expected relational patterns. Data reduction by AStream allows compounds to be identified reliably and subsequently linked to metabolite databases. AStream provides researchers with a fast, reliable tool for summarizing metabolomic data, notably reducing curation time and increasing consistency of results.

Availability

The AStream R package and a study example can be freely accessed at http://www.urr.cat/AStream/AStream.html.",2011-03-16 +27793852,Acute exposure to progesterone attenuates cardiac contraction by modifying myofilament calcium sensitivity in the female mouse heart.,"Acute application of progesterone attenuates cardiac contraction, although the underlying mechanisms are unclear. We investigated whether progesterone modified contraction in isolated ventricular myocytes and identified the Ca2+ handling mechanisms involved in female C57BL/6 mice (6-9 mo; sodium pentobarbital anesthesia). Cells were field-stimulated (4 Hz; 37°C) and exposed to progesterone (0.001-10.0 μM) or vehicle (35 min). Ca2+ transients (fura-2) and cell shortening were recorded simultaneously. Maximal concentrations of progesterone inhibited peak contraction by 71.4% (IC50 = 160 ± 50 nM; n = 12) and slowed relaxation by 75.4%. By contrast, progesterone had no effect on amplitudes or time courses of underlying Ca2+ transients. Progesterone (1 µM) also abbreviated action potential duration. When the duration of depolarization was controlled by voltage-clamp, progesterone attenuated contraction and slowed relaxation but did not affect Ca2+ currents, Ca2+ transients, sarcoplasmic reticulum (SR) content, or fractional release of SR Ca2+ Actomyosin MgATPase activity was assayed in myofilaments from hearts perfused with progesterone (1 μM) or vehicle (35 min). While maximal responses to Ca2+ were not affected by progesterone, myofilament Ca2+ sensitivity was reduced (EC50 = 0.94 ± 0.01 µM for control, n = 7 vs. 1.13 ± 0.05 μM for progesterone, n = 6; P < 0.05) and progesterone increased phosphorylation of myosin binding protein C. The effects on contraction were inhibited by lonaprisan (progesterone receptor antagonist) and levosimendan (Ca2+ sensitizer). Unlike results in females, progesterone had no effect on contraction or myofilament Ca2+ sensitivity in age-matched male mice. These data indicate that progesterone reduces myofilament Ca2+ sensitivity in female hearts, which may exacerbate manifestations of cardiovascular disease late in pregnancy when progesterone levels are high.

New & noteworthy

We investigated myocardial effects of acute application of progesterone. In females, but not males, progesterone attenuates and slows cardiomyocyte contraction with no effect on calcium transients. Progesterone also reduces myofilament calcium sensitivity in female hearts. This may adversely affect heart function, especially when serum progesterone levels are high in pregnancy.Listen to this article's corresponding podcast at https://ajpheart.podbean.com/e/acute-progesterone-modifies-cardiac-contraction/.",2016-10-28 +21993301,"Polbase: a repository of biochemical, genetic and structural information about DNA polymerases.","Polbase (http://polbase.neb.com) is a freely accessible database of DNA polymerases and related references. It has been developed in a collaborative model with experts whose contributions reflect their varied backgrounds in genetics, structural biology and biochemistry. Polbase is designed to compile detailed results of polymerase experimentation, presenting them in a dynamic view to inform further research. After validation, results from references are displayed in context with relevant experimental details and are always traceable to their source publication. Polbase is connected to other resources, including PubMed, UniProt and the RCSB Protein Data Bank, to provide multi-faceted views of polymerase knowledge. In addition to a simple web interface, Polbase data is exposed for custom analysis by external software. With the contributions of many polymerase investigators, Polbase has become a powerful research tool covering most important aspects of polymerases, from sequence and structure to biochemistry.",2011-10-12 +21394829,WAVe: web analysis of the variome.,"DNA sequence variation is the underlying basis of common human traits and rarer single-gene disorders. Understanding the variome, the variants in an individual's genome, is essential to enable the ultimate goals of personalized medicine. This critical research field has grown dramatically in recent years, mostly due to the spread and development of genotyping technologies. Despite these activities being promoted by the Human Genome Variation Society and projects such as the Human Variome Project or the European GEN2PHEN Project, variome data-integration systems are far from being widely used in the research community workflow. Most of ongoing research is focused on improving locus-specific databases. Although the quality and manual curation of LSDBs adds true value to this domain, they are often narrow, heterogeneous, and independent systems. This hampers data harmonization and interoperability between systems, stifling the aggregation of data from LSDBs and related data sources. A new platform entitled Web Analysis of the Variome, WAVe, is introduced. It offers direct and programmatic access to multiple locus-specific databases, with the integration of genetic variation datasets and enrichment with relevant information. WAVe's agile and innovative Web interface is accessible at http://bioinformatics.ua.pt/WAVe.",2011-04-07 +26602690,Aging Chart: a community resource for rapid exploratory pathway analysis of age-related processes.,"Aging research is a multi-disciplinary field encompassing knowledge from many areas of basic, applied and clinical research. Age-related processes occur on molecular, cellular, tissue, organ, system, organismal and even psychological levels, trigger the onset of multiple debilitating diseases and lead to a loss of function, and there is a need for a unified knowledge repository designed to track, analyze and visualize the cause and effect relationships and interactions between the many elements and processes on all levels. Aging Chart (http://agingchart.org/) is a new, community-curated collection of aging pathways and knowledge that provides a platform for rapid exploratory analysis. Building on an initial content base constructed by a team of experts from peer-reviewed literature, users can integrate new data into biological pathway diagrams for a visible, intuitive, top-down framework of aging processes that fosters knowledge-building and collaboration. As the body of knowledge in aging research is rapidly increasing, an open visual encyclopedia of aging processes will be useful to both the new entrants and experts in the field.",2015-11-23 +27398867,Predicting the Presence of Uncommon Elements in Unknown Biomolecules from Isotope Patterns.,"The determination of the molecular formula is one of the earliest and most important steps when investigating the chemical nature of an unknown compound. Common approaches use the isotopic pattern of a compound measured using mass spectrometry. Computational methods to determine the molecular formula from this isotopic pattern require a fixed set of elements. Considering all possible elements severely increases running times and more importantly the chance for false positive identifications as the number of candidate formulas for a given target mass rises significantly if the constituting elements are not prefiltered. This negative effect grows stronger for compounds of higher molecular mass as the effect of a single atom on the overall isotopic pattern grows smaller. On the other hand, hand-selected restrictions on this set of elements may prevent the identification of the correct molecular formula. Thus, it is a crucial step to determine the set of elements most likely comprising the compound prior to the assignment of an elemental formula to an exact mass. In this paper, we present a method to determine the presence of certain elements (sulfur, chlorine, bromine, boron, and selenium) in the compound from its (high mass accuracy) isotopic pattern. We limit ourselves to biomolecules, in the sense of products from nature or synthetic products with potential bioactivity. The classifiers developed here predict the presence of an element with a very high sensitivity and high specificity. We evaluate classifiers on three real-world data sets with 663 isotope patterns in total: 184 isotope patterns containing sulfur, 187 containing chlorine, 14 containing bromine, one containing boron, one containing selenium. In no case do we make a false negative prediction; for chlorine, bromine, boron, and selenium, we make ten false positive predictions in total. We also demonstrate the impact of our method on the identification of molecular formulas, in particular on the number of considered candidates and running time. The element prediction will be part of the next SIRIUS release, available from https://bio.informatik.uni-jena.de/software/sirius/ .",2016-07-22 +26217721,γ-tubulin is differentially expressed in mitotic and non-mitotic cardiomyocytes in the regenerating zebrafish heart.,"This data article contains complementary figures related to the research article entitled, "" A dual epimorphic and compensatory mode of heart regeneration"" ([10], http://dx.doi.org/10.1016/j.ydbio.2014.12.002), which presents a spatial and temporal characterization of cardiomyocyte proliferation and dedifferentiation after cryoinjury-induced myocardial infarction. This study demonstrated that mitotic divisions occur in cardiac cells at distinct differentiation status, namely in dedifferentiated cells at the injury border as well as in mature cardiac cells within the remaining intact myocardium. One of the important aspects supporting our conclusions is a characterization of proteins that are upregulated during mitosis in the regenerating hearts. The data presented here reveal a dynamic change in the expression level and in the subcellular distribution of γ-tubulin between mitotic and non-mitotic cardiac cells. We report that in the non-mitotic cells, γ-tubulin expression is restricted to the centrosome. By contrast, during the mitosis, γ-tubulin strongly expands its localization within the spindle apparatus that interacts with the condensed chromosomes. We demonstrated that the differential distribution of γ-tubulin in non-mitotic and mitotic cells requires adjusted image processing for the appropriate visualization of both expression patterns in the same histological specimens.",2015-02-10 +23082188,MOCAT: a metagenomics assembly and gene prediction toolkit.,"MOCAT is a highly configurable, modular pipeline for fast, standardized processing of single or paired-end sequencing data generated by the Illumina platform. The pipeline uses state-of-the-art programs to quality control, map, and assemble reads from metagenomic samples sequenced at a depth of several billion base pairs, and predict protein-coding genes on assembled metagenomes. Mapping against reference databases allows for read extraction or removal, as well as abundance calculations. Relevant statistics for each processing step can be summarized into multi-sheet Excel documents and queryable SQL databases. MOCAT runs on UNIX machines and integrates seamlessly with the SGE and PBS queuing systems, commonly used to process large datasets. The open source code and modular architecture allow users to modify or exchange the programs that are utilized in the various processing steps. Individual processing steps and parameters were benchmarked and tested on artificial, real, and simulated metagenomes resulting in an improvement of selected quality metrics. MOCAT can be freely downloaded at http://www.bork.embl.de/mocat/.",2012-10-17 +26428291,ALP & FALP: C++ libraries for pairwise local alignment E-values.,"

Motivation

Pairwise local alignment is an indispensable tool for molecular biologists. In real time (i.e. in about 1 s), ALP (Ascending Ladder Program) calculates the E-values for protein-protein or DNA-DNA local alignments of random sequences, for arbitrary substitution score matrix, gap costs and letter abundances; and FALP (Frameshift Ascending Ladder Program) performs a similar task, although more slowly, for frameshifting DNA-protein alignments.

Availability and implementation

To permit other C++ programmers to implement the computational efficiencies in ALP and FALP directly within their own programs, C++ source codes are available in the public domain at http://go.usa.gov/3GTSW under 'ALP' and 'FALP', along with the standalone programs ALP and FALP.

Contact

spouge@nih.gov

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-01 +26088800,AmyLoad: website dedicated to amyloidogenic protein fragments.,"

Unlabelled

Analyses of amyloidogenic sequence fragments are essential in studies of neurodegenerative diseases. However, there is no one internet dataset that collects all the sequences that have been investigated for their amyloidogenicity. Therefore, we have created the AmyLoad website which collects the amyloidogenic sequences from all major sources. The website allows for filtration of the fragments and provides detailed information about each of them. Registered users can both personalize their work with the website and submit their own sequences into the database. To maintain database reliability, submitted sequences are reviewed before making them available to the public. Finally, we re-implemented several amyloidogenic sequence predictors, thus the AmyLoad website can be used as a sequence analysis tool. We encourage researchers working on amyloid proteins to contribute to our service.

Availability and implementation

The AmyLoad website is freely available at http://comprec-lin.iiar.pwr.edu.pl/amyload/.

Contact

malgorzata.kotulska@pwr.edu.pl.",2015-06-17 +25398896,FlyBase: introduction of the Drosophila melanogaster Release 6 reference genome assembly and large-scale migration of genome annotations.,"Release 6, the latest reference genome assembly of the fruit fly Drosophila melanogaster, was released by the Berkeley Drosophila Genome Project in 2014; it replaces their previous Release 5 genome assembly, which had been the reference genome assembly for over 7 years. With the enormous amount of information now attached to the D. melanogaster genome in public repositories and individual laboratories, the replacement of the previous assembly by the new one is a major event requiring careful migration of annotations and genome-anchored data to the new, improved assembly. In this report, we describe the attributes of the new Release 6 reference genome assembly, the migration of FlyBase genome annotations to this new assembly, how genome features on this new assembly can be viewed in FlyBase (http://flybase.org) and how users can convert coordinates for their own data to the corresponding Release 6 coordinates.",2014-11-14 +26969411,HiView: an integrative genome browser to leverage Hi-C results for the interpretation of GWAS variants.,"

Background

Genome-wide association studies (GWAS) have identified thousands of genetic variants associated with complex traits and diseases. However, most of them are located in the non-protein coding regions, and therefore it is challenging to hypothesize the functions of these non-coding GWAS variants. Recent large efforts such as the ENCODE and Roadmap Epigenomics projects have predicted a large number of regulatory elements. However, the target genes of these regulatory elements remain largely unknown. Chromatin conformation capture based technologies such as Hi-C can directly measure the chromatin interactions and have generated an increasingly comprehensive catalog of the interactome between the distal regulatory elements and their potential target genes. Leveraging such information revealed by Hi-C holds the promise of elucidating the functions of genetic variants in human diseases.

Results

In this work, we present HiView, the first integrative genome browser to leverage Hi-C results for the interpretation of GWAS variants. HiView is able to display Hi-C data and statistical evidence for chromatin interactions in genomic regions surrounding any given GWAS variant, enabling straightforward visualization and interpretation.

Conclusions

We believe that as the first GWAS variants-centered Hi-C genome browser, HiView is a useful tool guiding post-GWAS functional genomics studies. HiView is freely accessible at: http://www.unc.edu/~yunmli/HiView .",2016-03-11 +24564278,Explorative search of distributed bio-data to answer complex biomedical questions.,"

Background

The huge amount of biomedical-molecular data increasingly produced is providing scientists with potentially valuable information. Yet, such data quantity makes difficult to find and extract those data that are most reliable and most related to the biomedical questions to be answered, which are increasingly complex and often involve many different biomedical-molecular aspects. Such questions can be addressed only by comprehensively searching and exploring different types of data, which frequently are ordered and provided by different data sources. Search Computing has been proposed for the management and integration of ranked results from heterogeneous search services. Here, we present its novel application to the explorative search of distributed biomedical-molecular data and the integration of the search results to answer complex biomedical questions.

Results

A set of available bioinformatics search services has been modelled and registered in the Search Computing framework, and a Bioinformatics Search Computing application (Bio-SeCo) using such services has been created and made publicly available at http://www.bioinformatics.deib.polimi.it/bio-seco/seco/. It offers an integrated environment which eases search, exploration and ranking-aware combination of heterogeneous data provided by the available registered services, and supplies global results that can support answering complex multi-topic biomedical questions.

Conclusions

By using Bio-SeCo, scientists can explore the very large and very heterogeneous biomedical-molecular data available. They can easily make different explorative search attempts, inspect obtained results, select the most appropriate, expand or refine them and move forward and backward in the construction of a global complex biomedical query on multiple distributed sources that could eventually find the most relevant results. Thus, it provides an extremely useful automated support for exploratory integrated bio search, which is fundamental for Life Science data driven knowledge discovery.",2014-01-10 +26896088,Integrated decoys and effector traps: how to catch a plant pathogen.,"Plant immune receptors involved in disease resistance and crop protection are related to the animal Nod-like receptor (NLR) class, and recognise the virulence effectors of plant pathogens, whereby they arm the plant's defensive response. Although plant NLRs mainly contain three protein domains, about 10% of these receptors identified by extensive cross-plant species data base searches have now been shown to include novel and highly variable integrated domains, some of which have been shown to detect pathogen effectors by direct interaction. Sarris et al. have identified a large number of integrated domains that can be used to detect effector targets in host plant proteomes and identify unknown pathogen effectors.Please see related Research article: Comparative analysis of plant immune receptor architectures uncovers host proteins likely targeted by pathogens, http://dx.doi.org/10.1186/s12915-016-0228-7 Since the time of writing, a closely related paper has been released: Kroj T, Chanclud E, Michel-Romiti C, Grand X, Morel J-B. Integration of decoy domains derived from protein targets of pathogen effectors into plant immune receptors is widespread. New Phytol. 2016 (ahead of print).",2016-02-19 +27153696,ll-ACHRB: a scalable algorithm for sampling the feasible solution space of metabolic networks.,"

Motivation

Random sampling of the solution space has emerged as a popular tool to explore and infer properties of large metabolic networks. However, conventional sampling approaches commonly used do not eliminate thermodynamically unfeasible loops.

Results

In order to overcome this limitation, we developed an efficient sampling algorithm called loopless Artificially Centered Hit-and-Run on a Box (ll-ACHRB). This algorithm is inspired by the Hit-and-Run on a Box algorithm for uniform sampling from general regions, but employs the directions of choice approach of Artificially Centered Hit-and-Run. A novel strategy for generating feasible warmup points improved both sampling efficiency and mixing. ll-ACHRB shows overall better performance than current strategies to generate feasible flux samples across several models. Furthermore, we demonstrate that a failure to eliminate unfeasible loops greatly affects sample statistics, in particular the correlation structure. Finally, we discuss recommendations for the interpretation of sampling results and possible algorithmic improvements.

Availability and implementation

Source code for MATLAB and OCTAVE including examples are freely available for download at http://www.aibn.uq.edu.au/cssb-resources under Software. Optimization runs can use Gurobi Optimizer (by default if available) or GLPK (included with the algorithm).

Contact

lars.nielsen@uq.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-11 +27325568,Perinatal DDT Exposure Induces Hypertension and Cardiac Hypertrophy in Adult Mice.,"

Background

Dichlorodiphenyltrichloroethane (DDT) was used extensively to control malaria, typhus, body lice, and bubonic plague worldwide, until countries began restricting its use in the 1970s. However, the use of DDT to control vector-borne diseases continues in developing countries. Prenatal DDT exposure is associated with elevated blood pressure in humans.

Objective

We hypothesized that perinatal DDT exposure causes hypertension in adult mice.

Methods

DDT was administered to C57BL/6J dams from gestational day 11.5 to postnatal day 5. Blood pressure (BP) and myocardial wall thickness were measured in male and female adult offspring. Adult mice were treated with an angiotensin converting enzyme (ACE) inhibitor, captopril, to evaluate sensitivity to amelioration of DDT-associated hypertension by ACE inhibition. We further assessed the influence of DDT exposure on the expression of mRNAs that regulate BP through renal ion transport.

Results

Adult mice perinatally exposed to DDT exhibited chronically increased systolic BP, increased myocardial wall thickness, and elevated expression of mRNAs of several renal ion transporters. Captopril completely reversed hypertension in mice perinatally exposed to DDT.

Conclusions

These data demonstrate that perinatal exposure to DDT causes hypertension and cardiac hypertrophy in adult offspring. A key mechanism underpinning this hypertension is an overactivated renin angiotensin system because ACE inhibition reverses the hypertension induced by perinatal DDT exposure. Citation: La Merrill M, Sethi S, Benard L, Moshier E, Haraldsson B, Buettner C. 2016. Perinatal DDT exposure induces hypertension and cardiac hypertrophy in adult mice. Environ Health Perspect 124:1722-1727; http://dx.doi.org/10.1289/EHP164.",2016-06-21 +25837579,Maximum-Likelihood Phylogenetic Inference with Selection on Protein Folding Stability.,"Despite intense work, incorporating constraints on protein native structures into the mathematical models of molecular evolution remains difficult, because most models and programs assume that protein sites evolve independently, whereas protein stability is maintained by interactions between sites. Here, we address this problem by developing a new mean-field substitution model that generates independent site-specific amino acid distributions with constraints on the stability of the native state against both unfolding and misfolding. The model depends on a background distribution of amino acids and one selection parameter that we fix maximizing the likelihood of the observed protein sequence. The analytic solution of the model shows that the main determinant of the site-specific distributions is the number of native contacts of the site and that the most variable sites are those with an intermediate number of native contacts. The mean-field models obtained, taking into account misfolded conformations, yield larger likelihood than models that only consider the native state, because their average hydrophobicity is more realistic, and they produce on the average stable sequences for most proteins. We evaluated the mean-field model with respect to empirical substitution models on 12 test data sets of different protein families. In all cases, the observed site-specific sequence profiles presented smaller Kullback-Leibler divergence from the mean-field distributions than from the empirical substitution model. Next, we obtained substitution rates combining the mean-field frequencies with an empirical substitution model. The resulting mean-field substitution model assigns larger likelihood than the empirical model to all studied families when we consider sequences with identity larger than 0.35, plausibly a condition that enforces conservation of the native structure across the family. We found that the mean-field model performs better than other structurally constrained models with similar or higher complexity. With respect to the much more complex model recently developed by Bordner and Mittelmann, which takes into account pairwise terms in the amino acid distributions and also optimizes the exchangeability matrix, our model performed worse for data with small sequence divergence but better for data with larger sequence divergence. The mean-field model has been implemented into the computer program Prot_Evol that is freely available at http://ub.cbm.uam.es/software/Prot_Evol.php.",2015-04-02 +25979265,CSI 3.0: a web server for identifying secondary and super-secondary structure in proteins using NMR chemical shifts.,"The Chemical Shift Index or CSI 3.0 (http://csi3.wishartlab.com) is a web server designed to accurately identify the location of secondary and super-secondary structures in protein chains using only nuclear magnetic resonance (NMR) backbone chemical shifts and their corresponding protein sequence data. Unlike earlier versions of CSI, which only identified three types of secondary structure (helix, β-strand and coil), CSI 3.0 now identifies total of 11 types of secondary and super-secondary structures, including helices, β-strands, coil regions, five common β-turns (type I, II, I', II' and VIII), β hairpins as well as interior and edge β-strands. CSI 3.0 accepts experimental NMR chemical shift data in multiple formats (NMR Star 2.1, NMR Star 3.1 and SHIFTY) and generates colorful CSI plots (bar graphs) and secondary/super-secondary structure assignments. The output can be readily used as constraints for structure determination and refinement or the images may be used for presentations and publications. CSI 3.0 uses a pipeline of several well-tested, previously published programs to identify the secondary and super-secondary structures in protein chains. Comparisons with secondary and super-secondary structure assignments made via standard coordinate analysis programs such as DSSP, STRIDE and VADAR on high-resolution protein structures solved by X-ray and NMR show >90% agreement between those made with CSI 3.0.",2015-05-15 +24446261,MASSyPup--an 'out of the box' solution for the analysis of mass spectrometry data.,"Mass spectrometry has evolved to a key technology in the areas of metabolomics and proteomics. Centralized facilities generate vast amount of data, which frequently need to be processed off-site. Therefore, the distribution of data and software, as well as the training of personnel in the analysis of mass spectrometry data, becomes increasingly important. Thus, we created a comprehensive collection of mass spectrometry software which can be run directly from different media such as DVD or USB without local installation. MASSyPup is based on a Linux Live distribution and was complemented with programs for conversion, visualization and analysis of mass spectrometry (MS) data. A special emphasis was put on protein analysis and proteomics, encompassing the measurement of complete proteins, the identification of proteins based on Peptide Mass Fingerprints (PMF) or LC-MS/MS data, and de novo sequencing. Another focus was directed to the study of metabolites and metabolomics, covering the detection, identification and quantification of compounds, as well as subsequent statistical analyses. Additionally, we added software for Mass Spectrometry Imaging (MSI), including hardware support for self-made MSI devices. MASSyPup represents a 'ready to work' system for teaching or MS data analysis, but also represents an ideal platform for the distribution of MS data and the development of related software. The current Live DVD version can be downloaded free of charge from http://www.bioprocess.org/massypup.",2014-01-01 +25857669,GS-align for glycan structure alignment and similarity measurement.,"

Motivation

Glycans play critical roles in many biological processes, and their structural diversity is key for specific protein-glycan recognition. Comparative structural studies of biological molecules provide useful insight into their biological relationships. However, most computational tools are designed for protein structure, and despite their importance, there is no currently available tool for comparing glycan structures in a sequence order- and size-independent manner.

Results

A novel method, GS-align, is developed for glycan structure alignment and similarity measurement. GS-align generates possible alignments between two glycan structures through iterative maximum clique search and fragment superposition. The optimal alignment is then determined by the maximum structural similarity score, GS-score, which is size-independent. Benchmark tests against the Protein Data Bank (PDB) N-linked glycan library and PDB homologous/non-homologous N-glycoprotein sets indicate that GS-align is a robust computational tool to align glycan structures and quantify their structural similarity. GS-align is also applied to template-based glycan structure prediction and monosaccharide substitution matrix generation to illustrate its utility.

Availability and implementation

http://www.glycanstructure.org/gsalign.

Contact

wonpil@ku.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-08 +24604165,Selection of relevant features from amino acids enables development of robust classifiers.,"Machine learning (ML) has been extensively applied to develop models and to understand high-throughput data of biological processes. However, new ML models, trained with novel experimental results, are required to build regularly for more precise predictions. ML methods can build models from numeric data, whereas biological data are generally textual (DNA, protein sequences) or images and needs feature calculation algorithms to generate quantitative features. Programming skills along with domain knowledge are required to develop these algorithms. Therefore, the process of knowledge discovery through ML is decelerated due to lack of generic tools to construct features and to build models directly from the data. Hence, we developed a schema that calculates about 5,000 features, selects relevant features and develops protein classifiers from the training data. To demonstrate the general applicability and robustness of our method, fungal adhesins and nuclear receptor proteins were used for building classifiers which outperformed existing classifiers when tested on independent data. Next, we built a classifier for mitochondrial proteins of Plasmodium falciparum which causes human malaria because the latest corresponding classifiers are not publically accessible. Our classifier attained 98.18 % accuracy and 0.95 Matthews correlation coefficient by fivefold cross-validation and outperformed existing classifiers on independent test set. We implemented this schema as user-friendly and open source application Pro-Gyan ( http://code.google.com/p/pro-gyan/ ), to build and share executable classifiers without programming knowledge.",2014-03-07 +26078289,"Breast Cancer and Occupation: The Need for Action: APHA Policy Statement Number 20146, Issued November 18, 2014.","Breast cancer is the most prevalent cancer among women in the United States and other countries, making it a major public health concern. Despite significant scientific evidence about its known or suspected causes, research and prevention measures to identify and eliminate occupational and other environmental hazards and risk factors for breast cancer remain largely overlooked. As a result, hazards continue unabated for women generally, especially those who work outside the home. The science linking breast cancer and occupation in particular is growing. Researchers have identified commonly used chemicals that induce breast tumors in test animals. Animal studies link chemicals that mimic reproductive hormones to elevated breast cancer rates. Other animal and human studies link chemical exposures to increased breast cancer rates, including two recent investigations focused on occupational hazards. But the latter are the exception. Studies that attempt to identify and characterize workplace agents linked to breast cancer, as well as intervention studies focusing on the use of less toxic processes and substances, are limited. In what might be construed as a case of gender and social class bias, many research and funding agencies have ignored or downplayed the role of occupational studies despite their relevance to prevention efforts. Action required starts with making a national priority of promoting and supporting research on occupational and other environmental causes of breast cancer. Other public health actions include hazard surveillance and primary prevention activities such as reductions in the use of toxic materials, informed substitution, and green chemistry efforts. The original document is accessible at the APHA website, http://www.apha.org/policies-and-advocacy/public-health-policy-statements/policy-database/2015/01/07/14/55/breast-cancer-and-occupation.",2015-06-15 +25392423,Profiling small RNA reveals multimodal substructural signals in a Boltzmann ensemble.,"As the biomedical impact of small RNAs grows, so does the need to understand competing structural alternatives for regions of functional interest. Suboptimal structure analysis provides significantly more RNA base pairing information than a single minimum free energy prediction. Yet computational enhancements like Boltzmann sampling have not been fully adopted by experimentalists since identifying meaningful patterns in this data can be challenging. Profiling is a novel approach to mining RNA suboptimal structure data which makes the power of ensemble-based analysis accessible in a stable and reliable way. Balancing abstraction and specificity, profiling identifies significant combinations of base pairs which dominate low-energy RNA secondary structures. By design, critical similarities and differences are highlighted, yielding crucial information for molecular biologists. The code is freely available via http://gtfold.sourceforge.net/profiling.html.",2014-11-11 +24656595,piClust: a density based piRNA clustering algorithm.,"Piwi-interacting RNAs (piRNAs) are recently discovered, endogenous small non-coding RNAs. piRNAs protect the genome from invasive transposable elements (TE) and sustain integrity of the genome in germ cell lineages. Small RNA-sequencing data can be used to detect piRNA activations in a cell under a specific condition. However, identification of cell specific piRNA activations requires sophisticated computational methods. As of now, there is only one computational method, proTRAC, to locate activated piRNAs from the sequencing data. proTRAC detects piRNA clusters based on a probabilistic analysis with assumption of a uniform distribution. Unfortunately, we were not able to locate activated piRNAs from our proprietary sequencing data in chicken germ cells using proTRAC. With a careful investigation on data sets, we found that a uniform or any statistical distribution for detecting piRNA clusters may not be assumed. Furthermore, small RNA-seq data contains many different types of RNAs which was not carefully taken into account in previous studies. To improve piRNA cluster identification, we developed piClust that uses a density based clustering approach without assumption of any parametric distribution. In previous studies, it is known that piRNAs exhibit a strong tendency of forming piRNA clusters in syntenic regions of the genome. Thus, the density based clustering approach is effective and robust to the existence of non-piRNAs or noise in the data. In experiments with piRNA data from human, mouse, rat and chicken, piClust was able to detect piRNA clusters from total small RNA-seq data from germ cell lines, while proTRAC was not successful. piClust outperformed proTRAC in terms of sensitivity and running time (up to 200 folds). piClust is currently available as a web service at http://epigenomics.snu.ac.kr/piclustweb.",2014-01-23 +24565035,BiCluE - Exact and heuristic algorithms for weighted bi-cluster editing of biomedical data.,"

Background

The explosion of biological data has dramatically reformed today's biology research. The biggest challenge to biologists and bioinformaticians is the integration and analysis of large quantity of data to provide meaningful insights. One major problem is the combined analysis of data from different types. Bi-cluster editing, as a special case of clustering, which partitions two different types of data simultaneously, might be used for several biomedical scenarios. However, the underlying algorithmic problem is NP-hard.

Results

Here we contribute with BiCluE, a software package designed to solve the weighted bi-cluster editing problem. It implements (1) an exact algorithm based on fixed-parameter tractability and (2) a polynomial-time greedy heuristics based on solving the hardest part, edge deletions, first. We evaluated its performance on artificial graphs. Afterwards we exemplarily applied our implementation on real world biomedical data, GWAS data in this case. BiCluE generally works on any kind of data types that can be modeled as (weighted or unweighted) bipartite graphs.

Conclusions

To our knowledge, this is the first software package solving the weighted bi-cluster editing problem. BiCluE as well as the supplementary results are available online at http://biclue.mpi-inf.mpg.de.",2013-12-20 +23203878,D²P²: database of disordered protein predictions.,"We present the Database of Disordered Protein Prediction (D(2)P(2)), available at http://d2p2.pro (including website source code). A battery of disorder predictors and their variants, VL-XT, VSL2b, PrDOS, PV2, Espritz and IUPred, were run on all protein sequences from 1765 complete proteomes (to be updated as more genomes are completed). Integrated with these results are all of the predicted (mostly structured) SCOP domains using the SUPERFAMILY predictor. These disorder/structure annotations together enable comparison of the disorder predictors with each other and examination of the overlap between disordered predictions and SCOP domains on a large scale. D(2)P(2) will increase our understanding of the interplay between disorder and structure, the genomic distribution of disorder, and its evolutionary history. The parsed data are made available in a unified format for download as flat files or SQL tables either by genome, by predictor, or for the complete set. An interactive website provides a graphical view of each protein annotated with the SCOP domains and disordered regions from all predictors overlaid (or shown as a consensus). There are statistics and tools for browsing and comparing genomes and their disorder within the context of their position on the tree of life.",2012-11-29 +21668943,"MeRy-B: a web knowledgebase for the storage, visualization, analysis and annotation of plant NMR metabolomic profiles.","

Background

Improvements in the techniques for metabolomics analyses and growing interest in metabolomic approaches are resulting in the generation of increasing numbers of metabolomic profiles. Platforms are required for profile management, as a function of experimental design, and for metabolite identification, to facilitate the mining of the corresponding data. Various databases have been created, including organism-specific knowledgebases and analytical technique-specific spectral databases. However, there is currently no platform meeting the requirements for both profile management and metabolite identification for nuclear magnetic resonance (NMR) experiments.

Description

MeRy-B, the first platform for plant (1)H-NMR metabolomic profiles, is designed (i) to provide a knowledgebase of curated plant profiles and metabolites obtained by NMR, together with the corresponding experimental and analytical metadata, (ii) for queries and visualization of the data, (iii) to discriminate between profiles with spectrum visualization tools and statistical analysis, (iv) to facilitate compound identification. It contains lists of plant metabolites and unknown compounds, with information about experimental conditions, the factors studied and metabolite concentrations for several plant species, compiled from more than one thousand annotated NMR profiles for various organs or tissues.

Conclusion

MeRy-B manages all the data generated by NMR-based plant metabolomics experiments, from description of the biological source to identification of the metabolites and determinations of their concentrations. It is the first database allowing the display and overlay of NMR metabolomic profiles selected through queries on data or metadata. MeRy-B is available from http://www.cbib.u-bordeaux2.fr/MERYB/index.php.",2011-06-13 +27456943,Current Trends of Lung Cancer Surgery and Demographic and Social Factors Related to Changes in the Trends of Lung Cancer Surgery: An Analysis of the National Database from 2010 to 2014.,"

Purpose

We investigated current trends in lung cancer surgery and identified demographic and social factors related to changes in these trends.

Materials and methods

We estimated the incidence of lung cancer surgery using a procedure code-based approach provided by the Health Insurance Review and Assessment Service (http://opendata.hira.or.kr). The population data were obtained every year from 2010 to 2014 from the Korean Statistical Information Service (http://kosis.kr/). The annual percent change (APC) and statistical significance were calculated using the Joinpoint software.

Results

From January 2010 to December 2014, 25,687 patients underwent 25,921 lung cancer surgeries, which increased by 45.1% from 2010 to 2014. The crude incidence rate of lung cancer surgery in each year increased significantly (APC, 9.5; p < 0.05). The male-to-female ratio decreased from 2.1 to 1.6 (APC, -6.3; p < 0.05). The incidence increased in the age group of ≥ 70 years for both sexes (male: APC, 3.7; p < 0.05; female: APC, 5.96; p < 0.05). Furthermore, the proportion of female patients aged ≥ 65 years increased (APC, 7.2; p < 0.05), while that of male patients aged < 65 years decreased (APC, -3.9; p < 0.05). The proportions of segmentectomies (APC, 17.8; p < 0.05) and lobectomies (APC, 7.5; p < 0.05) increased, while the proportion of pneumonectomies decreased (APC, -6.3; p < 0.05). Finally, the proportion of patients undergoing surgery in Seoul increased (APC, 1.1; p < 0.05), while the proportion in other areas decreased (APC, -1.5; p < 0.05).

Conclusion

An increase in the use of lung cancer surgery in elderly patients and female patients, and a decrease in the proportion of patients requiring extensive pulmonary resection were identified. Furthermore, centralization of lung cancer surgery was noted.",2016-07-18 +21624162,Developing a kidney and urinary pathway knowledge base.,"

Background

Chronic renal disease is a global health problem. The identification of suitable biomarkers could facilitate early detection and diagnosis and allow better understanding of the underlying pathology. One of the challenges in meeting this goal is the necessary integration of experimental results from multiple biological levels for further analysis by data mining. Data integration in the life science is still a struggle, and many groups are looking to the benefits promised by the Semantic Web for data integration.

Results

We present a Semantic Web approach to developing a knowledge base that integrates data from high-throughput experiments on kidney and urine. A specialised KUP ontology is used to tie the various layers together, whilst background knowledge from external databases is incorporated by conversion into RDF. Using SPARQL as a query mechanism, we are able to query for proteins expressed in urine and place these back into the context of genes expressed in regions of the kidney.

Conclusions

The KUPKB gives KUP biologists the means to ask queries across many resources in order to aggregate knowledge that is necessary for answering biological questions. The Semantic Web technologies we use, together with the background knowledge from the domain's ontologies, allows both rapid conversion and integration of this knowledge base. The KUPKB is still relatively small, but questions remain about scalability, maintenance and availability of the knowledge itself.

Availability

The KUPKB may be accessed via http://www.e-lico.eu/kupkb.",2011-05-17 +25196084,Evaluation of matched control algorithms in EHR-based phenotyping studies: a case study of inflammatory bowel disease comorbidities.,"The success of many population studies is determined by proper matching of cases to controls. Some of the confounding and bias that afflict electronic health record (EHR)-based observational studies may be reduced by creating effective methods for finding adequate controls. We implemented a method to match case and control populations to compensate for sparse and unequal data collection practices common in EHR data. We did this by matching the healthcare utilization of patients after observing that more complete data was collected on high healthcare utilization patients vs. low healthcare utilization patients. In our results, we show that many of the anomalous differences in population comparisons are mitigated using this matching method compared to other traditional age and gender-based matching. As an example, the comparison of the disease associations of ulcerative colitis and Crohn's disease show differences that are not present when the controls are chosen in a random or even a matched age/gender/race algorithm. In conclusion, the use of healthcare utilization-based matching algorithms to find adequate controls greatly enhanced the accuracy of results in EHR studies. Full source code and documentation of the control matching methods is available at https://community.i2b2.org/wiki/display/conmat/.",2014-09-06 +27457716,Identification and Regulation of Genes for Cobalamin Transport in the Cyanobacterium Synechococcus sp. Strain PCC 7002.,"

Unlabelled

The cyanobacterium Synechococcus sp. strain PCC 7002 is a cobalamin auxotroph and utilizes this coenzyme solely for the synthesis of l-methionine by methionine synthase (MetH). Synechococcus sp. strain PCC 7002 is unable to synthesize cobalamin de novo, and because of the large size of this tetrapyrrole, an active-transport system must exist for cobalamin uptake. Surprisingly, no cobalamin transport system was identified in the initial annotation of the genome of this organism. With more sophisticated in silico prediction tools, a btuB-cpdA-btuC-btuF operon encoding components putatively required for a B12 uptake (btu) system was identified. The expression of these genes was predicted to be controlled by a cobalamin riboswitch. Global transcriptional profiling by high-throughput RNA sequencing of a cobalamin-independent form of Synechococcus sp. strain PCC 7002 grown in the absence or presence of cobalamin confirmed regulation of the btu operon by cobalamin. Pérez et al. (A. A. Pérez, Z. Liu, D. A. Rodionov, Z. Li, and D. A. Bryant, J Bacteriol 198:2743-2752, 2016, http://dx.doi.org/10.1128/JB.00475-16) developed a cobalamin-dependent yellow fluorescent protein reporter system in a Synechococcus sp. strain PCC 7002 variant that had been genetically modified to allow cobalamin-independent growth. This reporter system was exploited to validate components of the btu uptake system by assessing the ability of targeted mutants to transport cobalamin. The btuB promoter and a variant counterpart mutated in an essential element of the predicted cobalamin riboswitch were fused to a yfp reporter. The combined data indicate that the btuB-cpdA-btuF-btuC operon in this cyanobacterium is transcriptionally regulated by a cobalamin riboswitch.

Importance

With a cobalamin-regulated reporter system for expression of yellow fluorescent protein, genes previously misidentified as encoding subunits of a siderophore transporter were shown to encode components of cobalamin uptake in the cyanobacterium Synechococcus sp. strain PCC 7002. This study demonstrates the importance of experimental validation of in silico predictions and provides a general scheme for in vivo verification of similar cobalamin transport systems. A putative cobalamin riboswitch was identified in Synechococcus sp. strain PCC 7002. This riboswitch acts as a potential transcriptional attenuator of the btu operon that encodes the components of the cobalamin active-transport system.",2016-09-09 +26078090,Accessible surface area from NMR chemical shifts.,"Accessible surface area (ASA) is the surface area of an atom, amino acid or biomolecule that is exposed to solvent. The calculation of a molecule's ASA requires three-dimensional coordinate data and the use of a ""rolling ball"" algorithm to both define and calculate the ASA. For polymers such as proteins, the ASA for individual amino acids is closely related to the hydrophobicity of the amino acid as well as its local secondary and tertiary structure. For proteins, ASA is a structural descriptor that can often be as informative as secondary structure. Consequently there has been considerable effort over the past two decades to try to predict ASA from protein sequence data and to use ASA information (derived from chemical modification studies) as a structure constraint. Recently it has become evident that protein chemical shifts are also sensitive to ASA. Given the potential utility of ASA estimates as structural constraints for NMR we decided to explore this relationship further. Using machine learning techniques (specifically a boosted tree regression model) we developed an algorithm called ""ShiftASA"" that combines chemical-shift and sequence derived features to accurately estimate per-residue fractional ASA values of water-soluble proteins. This method showed a correlation coefficient between predicted and experimental values of 0.79 when evaluated on a set of 65 independent test proteins, which was an 8.2 % improvement over the next best performing (sequence-only) method. On a separate test set of 92 proteins, ShiftASA reported a mean correlation coefficient of 0.82, which was 12.3 % better than the next best performing method. ShiftASA is available as a web server ( http://shiftasa.wishartlab.com ) for submitting input queries for fractional ASA calculation.",2015-06-16 +24590442,BicOverlapper 2.0: visual analysis for gene expression.,"

Motivation

Systems biology demands the use of several point of views to get a more comprehensive understanding of biological problems. This usually leads to take into account different data regarding the problem at hand, but it also has to do with using different perspectives of the same data. This multifaceted aspect of systems biology often requires the use of several tools, and it is often hard to get a seamless integration of all of them, which would help the analyst to have an interactive discourse with the data.

Results

Focusing on expression profiling, BicOverlapper 2.0 visualizes the most relevant aspects of the analysis, including expression data, profiling analysis results and functional annotation. It also integrates several state-of-the-art numerical methods, such as differential expression analysis, gene set enrichment or biclustering.

Availability and implementation

BicOverlapper 2.0 is available at: http://vis.usal.es/bicoverlapper2",2014-03-03 +26820531,Meta-analysis method for discovering reliable biomarkers by integrating statistical and biological approaches: An application to liver toxicity.,"Biomarkers that are identified from a single study often appear to be biologically irrelevant or false positives. Meta-analysis techniques allow integrating data from multiple studies that are related but independent in order to identify biomarkers across multiple conditions. However, existing biomarker meta-analysis methods tend to be sensitive to the dataset being analyzed. Here, we propose a meta-analysis method, iMeta, which integrates t-statistic and fold change ratio for improved robustness. For evaluation of predictive performance of the biomarkers identified by iMeta, we compare our method with other meta-analysis methods. As a result, iMeta outperforms the other methods in terms of sensitivity and specificity, and especially shows robustness to study variance increase; it consistently shows higher classification accuracy on diverse datasets, while the performance of the others is highly affected by the dataset being analyzed. Application of iMeta to 59 drug-induced liver injury studies identified three key biomarker genes: Zwint, Abcc3, and Ppp1r3b. Experimental evaluation using RT-PCR and qRT-PCR shows that their expressional changes in response to drug toxicity are concordant with the result of our method. iMeta is available at http://imeta.kaist.ac.kr/index.html.",2016-01-25 +23971971,"Association between the hOGG1 Ser326Cys polymorphism and lung cancer susceptibility: a meta-analysis based on 22,475 subjects.","

Objectives

The Ser326Cys polymorphism in the human 8-oxogunaine glycosylase (hOGG1) gene with lung cancer susceptibility had been investigated, but results were inconsistent and underpowered. The aim of this study was to conduct a meta-analysis assessing the association of hOGG1 Ser326Cys polymorphism with risk of lung cancer.

Materials and methods

Relevant studies were identified through a search of MEDLINE, PubMed, Web of Science, EMBASE, and Chinese Biomedical Literature database (CBM) using terms ""lung cancer"", ""hOGG1"" or ""OGG1"", ""polymorphism"" or ""variation"" and the last search updated on May 1, 2013. In this meta-analysis, we assessed 30 published studies involving 22,475 subjects that investigated the association between the hOGG1 Ser326Cys polymorphism and lung cancer susceptibility.

Results

Overall, the hOGG1 Ser326Cys polymorphism was not associated with lung cancer susceptibility in different genetic models (dominant model comparison: OR = 0.133; 95% CI = 0.111-0.161; P(heterogeneity) = 0.000), and recessive model: OR = 0.543; 95% CI = 0.399-0.739; P(heterogeneity) = 0.000). Similarly, in the stratified analyses by ethnicity, significantly increased risks were found among Asians for homozygote comparison (OR = 0.850; 95% CI = 0.732 0.986; P(heterogeneity) = 0.064), and dominant model (OR = 0.160; 95% CI = 0.137-0.187; P(heterogeneity) = 0.001), and Caucasians for dominant model (OR = 1.35; 95% CI = 1.03-1.77; P(heterogeneity) = 0.015), and recessive model (OR = 1.35; 95% CI = 1.03-1.77; P(heterogeneity) = 0.015). In population-based populations, marginally significant increased risks were found in dominant model (OR = 0.143; 95% CI = 0.111 0.184; P(heterogeneity) = 0.000) and recessive model (OR = 0.429; 95% CI = 0.261-0.705; P(heterogeneity) = 0.000). We also found a significant difference between hOGG1 Ser326Cys genotype and lung cancer susceptibility in studies with hospital-based controls for homozygote model (OR = 0.798; 95% CI = 0.649-0.982; P(heterogeneity )= 0.007),dominant model (OR = 0.122; 95% CI = 0.091-0.163; P(heterogeneity) = 0.000).

Conclusion

Our data showed that the hOGG1 Ser326Cys polymorphism contributed to the risk of lung cancer.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/3842531131031605.",2013-08-23 +27318205,NegGOA: negative GO annotations selection using ontology structure.,"

Motivation

Predicting the biological functions of proteins is one of the key challenges in the post-genomic era. Computational models have demonstrated the utility of applying machine learning methods to predict protein function. Most prediction methods explicitly require a set of negative examples-proteins that are known not carrying out a particular function. However, Gene Ontology (GO) almost always only provides the knowledge that proteins carry out a particular function, and functional annotations of proteins are incomplete. GO structurally organizes more than tens of thousands GO terms and a protein is annotated with several (or dozens) of these terms. For these reasons, the negative examples of a protein can greatly help distinguishing true positive examples of the protein from such a large candidate GO space.

Results

In this paper, we present a novel approach (called NegGOA) to select negative examples. Specifically, NegGOA takes advantage of the ontology structure, available annotations and potentiality of additional annotations of a protein to choose negative examples of the protein. We compare NegGOA with other negative examples selection algorithms and find that NegGOA produces much fewer false negatives than them. We incorporate the selected negative examples into an efficient function prediction model to predict the functions of proteins in Yeast, Human, Mouse and Fly. NegGOA also demonstrates improved accuracy than these comparing algorithms across various evaluation metrics. In addition, NegGOA is less suffered from incomplete annotations of proteins than these comparing methods.

Availability and implementation

The Matlab and R codes are available at https://sites.google.com/site/guoxian85/neggoa

Contact

gxyu@swu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-17 +25297067,Visual workflows for 13C-metabolic flux analysis.,"

Motivation

The precise quantification of intracellular metabolic flow rates is of fundamental importance in bio(techno)logy and medical research. The gold standard in the field is metabolic flux analysis (MFA) with 13C-labeling experiments. 13C-MFA workflows orchestrate several, mainly human-in-the-loop, software applications, integrating them with plenty of heterogeneous information. In practice, this had posed a major practical barrier for evaluating, interpreting and understanding isotopic data from carbon labeling experiments.

Results

Graphical modeling, interactive model exploration and visual data analysis are the key to overcome this limitation. We have developed a first-of-its-kind graphical tool suite providing scientists with an integrated software framework for all aspects of 13C-MFA. Almost 30 modules (plug-ins) have been implemented for the Omix visualization software. Several advanced graphical workflows and ergonomic user interfaces support major domain-specific modeling and proofreading tasks. With that, the graphical suite is a productivity enhancing tool and an original educational training instrument supporting the adoption of 13C-MFA applications in all life science fields.

Availability

The Omix Light Edition is freely available at http://www.omix-visualization.com

Contact

k.noeh@fz-juelich.de, p.droste@omix-visualization.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-07 +22594998,Genovar: a detection and visualization tool for genomic variants.,"

Background

Along with single nucleotide polymorphisms (SNPs), copy number variation (CNV) is considered an important source of genetic variation associated with disease susceptibility. Despite the importance of CNV, the tools currently available for its analysis often produce false positive results due to limitations such as low resolution of array platforms, platform specificity, and the type of CNV. To resolve this problem, spurious signals must be separated from true signals by visual inspection. None of the previously reported CNV analysis tools support this function and the simultaneous visualization of comparative genomic hybridization arrays (aCGH) and sequence alignment. The purpose of the present study was to develop a useful program for the efficient detection and visualization of CNV regions that enables the manual exclusion of erroneous signals.

Results

A JAVA-based stand-alone program called Genovar was developed. To ascertain whether a detected CNV region is a novel variant, Genovar compares the detected CNV regions with previously reported CNV regions using the Database of Genomic Variants (DGV, http://projects.tcag.ca/variation) and the Single Nucleotide Polymorphism Database (dbSNP). The current version of Genovar is capable of visualizing genomic data from sources such as the aCGH data file and sequence alignment format files.

Conclusions

Genovar is freely accessible and provides a user-friendly graphic user interface (GUI) to facilitate the detection of CNV regions. The program also provides comprehensive information to help in the elimination of spurious signals by visual inspection, making Genovar a valuable tool for reducing false positive CNV results.

Availability

http://genovar.sourceforge.net/.",2012-05-08 +25505007,MOROKOSHI: transcriptome database in Sorghum bicolor.,"In transcriptome analysis, accurate annotation of each transcriptional unit and its expression profile is essential. A full-length cDNA (FL-cDNA) collection facilitates the refinement of transcriptional annotation, and accurate transcription start sites help to unravel transcriptional regulation. We constructed a normalized FL-cDNA library from eight growth stages of aerial tissues in Sorghum bicolor and isolated 37,607 clones. These clones were Sanger sequenced from the 5' and/or 3' ends and in total 38,981 high-quality expressed sequence tags (ESTs) were obtained. About one-third of the transcripts of known genes were captured as FL-cDNA clone resources. In addition to these, we also annotated 272 novel genes, 323 antisense transcripts and 1,672 candidate isoforms. These clones are available from the RIKEN Bioresource Center. After obtaining accurate annotation of transcriptional units, we performed expression profile analysis. We carried out spikelet-, seed- and stem-specific RNA sequencing (RNA-Seq) analysis and confirmed the expression of 70.6% of the newly identified genes. We also downloaded 23 sorghum RNA-Seq samples that are publicly available and these are shown on a genome browser together with our original FL-cDNA and RNA-Seq data. Using our original and publicly available data, we made an expression profile of each gene and identified the top 20 genes with the most similar expression. In addition, we visualized their relationships in gene co-expression networks. Users can access and compare various transcriptome data from S, bicolor at http://sorghum.riken.jp.",2014-12-09 +26724062,"Development of intelligent instruments with embedded HTTP servers for control and data acquisition in a cryogenic setup--The hardware, firmware, and software implementation.","The power of Ethernet for control and automation technology is being largely understood by the automation industry in recent times. Ethernet with HTTP (Hypertext Transfer Protocol) is one of the most widely accepted communication standards today. Ethernet is best known for being able to control through internet from anywhere in the globe. The Ethernet interface with built-in on-chip embedded servers ensures global connections for crate-less model of control and data acquisition systems which have several advantages over traditional crate-based control architectures for slow applications. This architecture will completely eliminate the use of any extra PLC (Programmable Logic Controller) or similar control hardware in any automation network as the control functions are firmware coded inside intelligent meters itself. Here, we describe the indigenously built project of a cryogenic control system built for linear accelerator at Inter University Accelerator Centre, known as ""CADS,"" which stands for ""Complete Automation of Distribution System."" CADS deals with complete hardware, firmware, and software implementation of the automated linac cryogenic distribution system using many Ethernet based embedded cryogenic instruments developed in-house. Each instrument works as an intelligent meter called device-server which has the control functions and control loops built inside the firmware itself. Dedicated meters with built-in servers were designed out of ARM (Acorn RISC (Reduced Instruction Set Computer) Machine) and ATMEL processors and COTS (Commercially Off-the-Shelf) SMD (Surface Mount Devices) components, with analog sensor front-end and a digital back-end web server implementing remote procedure call over HTTP for digital control and readout functions. At present, 24 instruments which run 58 embedded servers inside, each specific to a particular type of sensor-actuator combination for closed loop operations, are now deployed and distributed across control LAN (Local Area Network). A group of six categories of such instruments have been identified for all cryogenic applications required for linac operation which were designed to build this medium-scale cryogenic automation setup. These devices have special features like remote rebooters, daughter boards for PIDs (Proportional Integral Derivative), etc., to operate them remotely in radiation areas and also have emergency switches by which each device can be taken to emergency mode temporarily. Finally, all the data are monitored, logged, controlled, and analyzed online at a central control room which has a user-friendly control interface developed using LabVIEW(®). This paper discusses the overall hardware, firmware, software design, and implementation for the cryogenics setup.",2015-12-01 +26096292,"Genetic variants of Kudoa septempunctata (Myxozoa: Multivalvulida), a flounder parasite causing foodborne disease.","Foodborne disease outbreaks caused by raw olive flounders (Paralichthys olivaceus) parasitized with Kudoa septempunctata have been reported in Japan. Origins of olive flounders consumed in Japan vary, being either domestic or imported, and aquaculture-raised or natural. Although it is unknown whether different sources are associated with different outcomes, it is desirable to identify whether this is the case by determining whether unique K. septempunctata strains occur and if so, whether some are associated with foodborne illness. We here developed an intraspecific genotyping method, using the sequence variation of mitochondrial genes. We collected olive flounder samples from foodborne disease outbreaks, domestic fish farms or quarantine offices and investigated whether K. septempunctata genotype is associated with pathogenicity or geographic origin. The 104 samples were classified into three genotypes, ST1, ST2 and ST3. Frequency of symptomatic cases differed by genotypes, but the association was not statistically significant. Whereas K. septempunctata detected from aquaculture-raised and natural fish from Japan were either ST1 or ST2, those from fish inspected at quarantine from Korea to Japan were ST3. Our method can be applied to phylogeographic analysis of K. septempunctata and contribute to containing the foodborne disease. The genotype database is hosted in the PubMLST website (http://pubmlst.org/kseptempunctata/).",2015-06-11 +26062809,CMRegNet-An interspecies reference database for corynebacterial and mycobacterial regulatory networks.,"

Background

Organisms utilize a multitude of mechanisms for responding to changing environmental conditions, maintaining their functional homeostasis and to overcome stress situations. One of the most important mechanisms is transcriptional gene regulation. In-depth study of the transcriptional gene regulatory network can lead to various practical applications, creating a greater understanding of how organisms control their cellular behavior.

Description

In this work, we present a new database, CMRegNet for the gene regulatory networks of Corynebacterium glutamicum ATCC 13032 and Mycobacterium tuberculosis H37Rv. We furthermore transferred the known networks of these model organisms to 18 other non-model but phylogenetically close species (target organisms) of the CMNR group. In comparison to other network transfers, for the first time we utilized two model organisms resulting into a more diverse and complete network of the target organisms.

Conclusion

CMRegNet provides easy access to a total of 3,103 known regulations in C. glutamicum ATCC 13032 and M. tuberculosis H37Rv and to 38,940 evolutionary conserved interactions for 18 non-model species of the CMNR group. This makes CMRegNet to date the most comprehensive database of regulatory interactions of CMNR bacteria. The content of CMRegNet is publicly available online via a web interface found at http://lgcm.icb.ufmg.br/cmregnet .",2015-06-11 +25115705,Modeling dynamic functional relationship networks and application to ex vivo human erythroid differentiation.,"

Motivation

Functional relationship networks, which summarize the probability of co-functionality between any two genes in the genome, could complement the reductionist focus of modern biology for understanding diverse biological processes in an organism. One major limitation of the current networks is that they are static, while one might expect functional relationships to consistently reprogram during the differentiation of a cell lineage. To address this potential limitation, we developed a novel algorithm that leverages both differentiation stage-specific expression data and large-scale heterogeneous functional genomic data to model such dynamic changes. We then applied this algorithm to the time-course RNA-Seq data we collected for ex vivo human erythroid cell differentiation.

Results

Through computational cross-validation and literature validation, we show that the resulting networks correctly predict the (de)-activated functional connections between genes during erythropoiesis. We identified known critical genes, such as HBD and GATA1, and functional connections during erythropoiesis using these dynamic networks, while the traditional static network was not able to provide such information. Furthermore, by comparing the static and the dynamic networks, we identified novel genes (such as OSBP2 and PDZK1IP1) that are potential drivers of erythroid cell differentiation. This novel method of modeling dynamic networks is applicable to other differentiation processes where time-course genome-scale expression data are available, and should assist in generating greater understanding of the functional dynamics at play across the genome during development.

Availability and implementation

The network described in this article is available at http://guanlab.ccmb.med.umich.edu/stageSpecificNetwork.",2014-08-12 +23762306,"Genome-wide and species-wide in silico screening for intragenic MicroRNAs in human, mouse and chicken.","MicroRNAs (miRNAs) are non-coding RNAs (ncRNAs) involved in regulation of gene expression. Intragenic miRNAs, especially those exhibiting a high degree of evolutionary conservation, have been shown to be coordinately regulated and/or expressed with their host genes, either with synergistic or antagonistic correlation patterns. However, the degree of cross-species conservation of miRNA/host gene co-location is not known and co-expression information is incomplete and fragmented among several studies. Using the genomic resources (miRBase and Ensembl) we performed a genome-wide in silico screening (GWISS) for miRNA/host gene pairs in three well-annotated vertebrate species: human, mouse, and chicken. Approximately half of currently annotated miRNA genes resided within host genes: 53.0% (849/1,600) in human, 48.8% (418/855) in mouse, and 42.0% (210/499) in chicken, which we present in a central publicly available Catalog of intragenic miRNAs (http://www.integratomics-time.com/miR-host/catalog). The miRNA genes resided within either protein-coding or ncRNA genes, which include long intergenic ncRNAs (lincRNAs) and small nucleolar RNAs (snoRNAs). Twenty-seven miRNA genes were found to be located within the same host genes in all three species and the data integration from literature and databases showed that most (26/27) have been found to be co-expressed. Particularly interesting are miRNA genes located within genes encoding for miRNA silencing machinery (DGCR8, DICER1, and SND1 in human and Cnot3, Gdcr8, Eif4e, Tnrc6b, and Xpo5 in mouse). We furthermore discuss a potential for phenotype misattribution of miRNA host gene polymorphism or gene modification studies due to possible collateral effects on miRNAs hosted within them. In conclusion, the catalog of intragenic miRNAs and identified 27 miRNA/host gene pairs with cross-species conserved co-location, co-expression, and potential co-regulation, provide excellent candidates for further functional annotation of intragenic miRNAs in health and disease.",2013-06-06 +24264591,A graphical user interface for infant ERP analysis.,"Recording of event-related potentials (ERPs) is one of the best-suited technologies for examining brain function in human infants. Yet the existing software packages are not optimized for the unique requirements of analyzing artifact-prone ERP data from infants. We developed a new graphical user interface that enables an efficient implementation of a two-stage approach to the analysis of infant ERPs. In the first stage, video records of infant behavior are synchronized with ERPs at the level of individual trials to reject epochs with noncompliant behavior and other artifacts. In the second stage, the interface calls MATLAB and EEGLAB (Delorme & Makeig, Journal of Neuroscience Methods 134(1):9-21, 2004) functions for further preprocessing of the ERP signal itself (i.e., filtering, artifact removal, interpolation, and rereferencing). Finally, methods are included for data visualization and analysis by using bootstrapped group averages. Analyses of simulated and real EEG data demonstrated that the proposed approach can be effectively used to establish task compliance, remove various types of artifacts, and perform representative visualizations and statistical comparisons of ERPs. The interface is available for download from http://www.uta.fi/med/icl/methods/eeg.html in a format that is widely applicable to ERP studies with special populations and open for further editing by users.",2014-09-01 +27301718,Neurotoxicity of the Parkinson Disease-Associated Pesticide Ziram Is Synuclein-Dependent in Zebrafish Embryos.,"

Background

Exposure to the commonly used dithiocarbamate (DTC) pesticides is associated with an increased risk of developing Parkinson disease (PD), although the mechanisms by which they exert their toxicity are not completely understood.

Objective

We studied the mechanisms of ziram's (a DTC fungicide) neurotoxicity in vivo.

Methods

Zebrafish (ZF) embryos were utilized to determine ziram's effects on behavior, neuronal toxicity, and the role of synuclein in its toxicity.

Results

Nanomolar-range concentrations of ziram caused selective loss of dopaminergic (DA) neurons and impaired swimming behavior. Because ziram increases α-synuclein (α-syn) concentrations in rat primary neuronal cultures, we investigated the effect of ziram on ZF γ-synuclein 1 (γ1). ZF express 3 synuclein isoforms, and ZF γ1 appears to be the closest functional homologue to α-syn. We found that recombinant ZF γ1 formed fibrils in vitro, and overexpression of ZF γ1 in ZF embryos led to the formation of neuronal aggregates and neurotoxicity in a manner similar to that of α-syn. Importantly, knockdown of ZF γ1 with morpholinos and disruption of oligomers with the molecular tweezer CLR01 prevented ziram's DA toxicity.

Conclusions

These data show that ziram is selectively toxic to DA neurons in vivo, and this toxicity is synuclein-dependent. These findings have important implications for understanding the mechanisms by which pesticides may cause PD. Citation: Lulla A, Barnhill L, Bitan G, Ivanova MI, Nguyen B, O'Donnell K, Stahl MC, Yamashiro C, Klärner FG, Schrader T, Sagasti A, Bronstein JM. 2016. Neurotoxicity of the Parkinson disease-associated pesticide ziram is synuclein-dependent in zebrafish embryos. Environ Health Perspect 124:1766-1775; http://dx.doi.org/10.1289/EHP141.",2016-06-15 +27162970,Modeling Epoxidation of Drug-like Molecules with a Deep Machine Learning Network.,"Drug toxicity is frequently caused by electrophilic reactive metabolites that covalently bind to proteins. Epoxides comprise a large class of three-membered cyclic ethers. These molecules are electrophilic and typically highly reactive due to ring tension and polarized carbon-oxygen bonds. Epoxides are metabolites often formed by cytochromes P450 acting on aromatic or double bonds. The specific location on a molecule that undergoes epoxidation is its site of epoxidation (SOE). Identifying a molecule's SOE can aid in interpreting adverse events related to reactive metabolites and direct modification to prevent epoxidation for safer drugs. This study utilized a database of 702 epoxidation reactions to build a model that accurately predicted sites of epoxidation. The foundation for this model was an algorithm originally designed to model sites of cytochromes P450 metabolism (called XenoSite) that was recently applied to model the intrinsic reactivity of diverse molecules with glutathione. This modeling algorithm systematically and quantitatively summarizes the knowledge from hundreds of epoxidation reactions with a deep convolution network. This network makes predictions at both an atom and molecule level. The final epoxidation model constructed with this approach identified SOEs with 94.9% area under the curve (AUC) performance and separated epoxidized and non-epoxidized molecules with 79.3% AUC. Moreover, within epoxidized molecules, the model separated aromatic or double bond SOEs from all other aromatic or double bonds with AUCs of 92.5% and 95.1%, respectively. Finally, the model separated SOEs from sites of sp(2) hydroxylation with 83.2% AUC. Our model is the first of its kind and may be useful for the development of safer drugs. The epoxidation model is available at http://swami.wustl.edu/xenosite.",2015-06-09 +24895436,DiseaseConnect: a comprehensive web server for mechanism-based disease-disease connections.,"The DiseaseConnect (http://disease-connect.org) is a web server for analysis and visualization of a comprehensive knowledge on mechanism-based disease connectivity. The traditional disease classification system groups diseases with similar clinical symptoms and phenotypic traits. Thus, diseases with entirely different pathologies could be grouped together, leading to a similar treatment design. Such problems could be avoided if diseases were classified based on their molecular mechanisms. Connecting diseases with similar pathological mechanisms could inspire novel strategies on the effective repositioning of existing drugs and therapies. Although there have been several studies attempting to generate disease connectivity networks, they have not yet utilized the enormous and rapidly growing public repositories of disease-related omics data and literature, two primary resources capable of providing insights into disease connections at an unprecedented level of detail. Our DiseaseConnect, the first public web server, integrates comprehensive omics and literature data, including a large amount of gene expression data, Genome-Wide Association Studies catalog, and text-mined knowledge, to discover disease-disease connectivity via common molecular mechanisms. Moreover, the clinical comorbidity data and a comprehensive compilation of known drug-disease relationships are additionally utilized for advancing the understanding of the disease landscape and for facilitating the mechanism-based development of new drug treatments.",2014-06-03 +23118485,MBGD update 2013: the microbial genome database for exploring the diversity of microbial world.,"The microbial genome database for comparative analysis (MBGD, available at http://mbgd.genome.ad.jp/) is a platform for microbial genome comparison based on orthology analysis. As its unique feature, MBGD allows users to conduct orthology analysis among any specified set of organisms; this flexibility allows MBGD to adapt to a variety of microbial genomic study. Reflecting the huge diversity of microbial world, the number of microbial genome projects now becomes several thousands. To efficiently explore the diversity of the entire microbial genomic data, MBGD now provides summary pages for pre-calculated ortholog tables among various taxonomic groups. For some closely related taxa, MBGD also provides the conserved synteny information (core genome alignment) pre-calculated using the CoreAligner program. In addition, efficient incremental updating procedure can create extended ortholog table by adding additional genomes to the default ortholog table generated from the representative set of genomes. Combining with the functionalities of the dynamic orthology calculation of any specified set of organisms, MBGD is an efficient and flexible tool for exploring the microbial genome diversity.",2012-10-30 +24330805,The Older Persons' Transitions in Care (OPTIC) study: pilot testing of the transition tracking tool.,"

Background

OPTIC is a mixed method Partnership for Health System Improvement (http://www.cihr-irsc.gc.ca/e/34348.html) study focused on improving care for nursing home (NH) residents who are transferred to and from emergency departments (EDs) via emergency medical services (EMS). In the pilot study we tested feasibility of concurrently collecting individual resident data during transitions across settings using the Transition Tracking Tool (T3).

Methods

The pilot study tracked 54 residents transferred from NHs to one of two EDs in two western Canadian provinces over a three month period. The T3 is an electronic data collection tool developed for this study to record data relevant to describing and determining success of transitions in care. It comprises 800+ data elements including resident characteristics, reasons and precipitating factors for transfer, advance directives, family involvement, healthcare services provided, disposition decisions, and dates/times and timing.

Results

Residents were elderly (mean age = 87.1 years) and the majority were female (61.8%). Feasibility of collecting data from multiple sources across two research sites was established. We identified resources and requirements to access and retrieve specific data elements in various settings to manage data collection processes and allocate research staff resources. We present preliminary data from NH, EMS, and ED settings.

Conclusions

While most research in this area has focused on a unidirectional process of patient progression from one care setting to another, this study established feasibility of collecting detailed data from beginning to end of a transition across multiple settings and in multiple directions.",2013-12-14 +24281365,Retrieving Y chromosomal haplogroup trees using GWAS data.,"Phylogenetically informative Y chromosomal single-nucleotide polymorphisms (Y-SNPs) integrated in DNA chips have not been sufficiently explored in most genome-wide association studies (GWAS). Herein, we introduce a pipeline to retrieve Y-SNP data. We introduce the software YTool (http://mitotool.org/ytool/) to handle conversion, filtering, and annotation of the data. Genome-wide SNP data from populations in Myanmar are used to construct a haplogroup tree for 117 Y chromosomes based on 369 high-confidence Y-SNPs. Parallel genotyping and published resequencing data of Y chromosomes confirm the validity of our pipeline. We apply this strategy to the CEU HapMap data set and construct a haplogroup tree with 107 Y-SNPs from 39 individuals. The retrieved Y-SNPs can discern the parental genetic structure of populations. Given the massive quantity of data from GWAS, this method facilitates future investigations of Y chromosome diversity.",2013-11-27 +26871716,Time to manage: patient strategies for coping with an absence of care coordination and continuity.,"This paper examines how people with chronic illnesses respond to absences of continuity and coordination of care. Little work has been done on how the ill person might mitigate flaws in a less than optimal system. Our qualitative research, carried out among 91 participants in Australia, reveals that people with chronic illnesses create strategies to facilitate the management of their care. These strategies included efforts to improve communication between themselves and their health care practitioners; keeping personal up-to-date medication lists; and generating their own specific management plans. While we do not submit that it is patients' responsibility to attend to gaps in the health system, our data suggests that chronically ill people can, in and through such strategies, exert a measure of agency over their own care; making it effectively more continuous and coordinated. Participants crafted strategies according to the particular social and bodily rhythms that their ongoing illnesses had lent to their lives. Our analysis advances the view that the ill body itself is capable of enfolding the health system into the rhythms of illness - rather than the ill body always fitting into the overarching structural tempo. This entails an agent-centric view of time in illness experience. A Virtual Abstract of this paper can be found at: https://youtu.be/UwbxlEJOTx8.",2016-02-12 +25803548,mQTL.NMR: an integrated suite for genetic mapping of quantitative variations of (1)H NMR-based metabolic profiles.,"High-throughput (1)H nuclear magnetic resonance (NMR) is an increasingly popular robust approach for qualitative and quantitative metabolic profiling, which can be used in conjunction with genomic techniques to discover novel genetic associations through metabotype quantitative trait locus (mQTL) mapping. There is therefore a crucial necessity to develop specialized tools for an accurate detection and unbiased interpretability of the genetically determined metabolic signals. Here we introduce and implement a combined chemoinformatic approach for objective and systematic analysis of untargeted (1)H NMR-based metabolic profiles in quantitative genetic contexts. The R/Bioconductor mQTL.NMR package was designed to (i) perform a series of preprocessing steps restoring spectral dependency in collinear NMR data sets to reduce the multiple testing burden, (ii) carry out robust and accurate mQTL mapping in human cohorts as well as in rodent models, (iii) statistically enhance structural assignment of genetically determined metabolites, and (iv) illustrate results with a series of visualization tools. Built-in flexibility and implementation in the powerful R/Bioconductor framework allow key preprocessing steps such as peak alignment, normalization, or dimensionality reduction to be tailored to specific problems. The mQTL.NMR package is freely available with its source code through the Comprehensive R/Bioconductor repository and its own website ( http://www.ican-institute.org/tools/ ). It represents a significant advance to facilitate untargeted metabolomic data processing and quantitative analysis and their genetic mapping.",2015-04-02 +23459781,The FoodCast research image database (FRIDa).,"In recent years we have witnessed an increasing interest in food processing and eating behaviors. This is probably due to several reasons. The biological relevance of food choices, the complexity of the food-rich environment in which we presently live (making food-intake regulation difficult), and the increasing health care cost due to illness associated with food (food hazards, food contamination, and aberrant food-intake). Despite the importance of the issues and the relevance of this research, comprehensive and validated databases of stimuli are rather limited, outdated, or not available for non-commercial purposes to independent researchers who aim at developing their own research program. The FoodCast Research Image Database (FRIDa) we present here includes 877 images belonging to eight different categories: natural-food (e.g., strawberry), transformed-food (e.g., french fries), rotten-food (e.g., moldy banana), natural-non-food items (e.g., pinecone), artificial food-related objects (e.g., teacup), artificial objects (e.g., guitar), animals (e.g., camel), and scenes (e.g., airport). FRIDa has been validated on a sample of healthy participants (N = 73) on standard variables (e.g., valence, familiarity, etc.) as well as on other variables specifically related to food items (e.g., perceived calorie content); it also includes data on the visual features of the stimuli (e.g., brightness, high frequency power, etc.). FRIDa is a well-controlled, flexible, validated, and freely available (http://foodcast.sissa.it/neuroscience/) tool for researchers in a wide range of academic fields and industry.",2013-03-01 +25297069,ExomeAI: detection of recurrent allelic imbalance in tumors using whole-exome sequencing data.,"

Summary

Whole-exome sequencing (WES) has extensively been used in cancer genome studies; however, the use of WES data in the study of loss of heterozygosity or more generally allelic imbalance (AI) has so far been very limited, which highlights the need for user-friendly and flexible software that can handle low-quality datasets. We have developed a statistical approach, ExomeAI, for the detection of recurrent AI events using WES datasets, specifically where matched normal samples are not available.

Availability

ExomeAI is a web-based application, publicly available at: http://genomequebec.mcgill.ca/exomeai.

Contact

JavadNadaf@gmail.com or somayyeh.fahiminiya@mcgill.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-08 +23918247,"CellMissy: a tool for management, storage and analysis of cell migration data produced in wound healing-like assays.","

Summary

Automated image processing has allowed cell migration research to evolve to a high-throughput research field. As a consequence, there is now an unmet need for data management in this domain. The absence of a generic management system for the quantitative data generated in cell migration assays results in each dataset being treated in isolation, making data comparison across experiments difficult. Moreover, by integrating quality control and analysis capabilities into such a data management system, the common practice of having to manually transfer data across different downstream analysis tools will be markedly sped up and made more robust. In addition, access to a data management solution creates gateways for data standardization, meta-analysis and structured public data dissemination. We here present CellMissy, a cross-platform data management system for cell migration data with a focus on wound healing data. CellMissy simplifies and automates data management, storage and analysis from the initial experimental set-up to data exploration.

Availability and implementation

CellMissy is a cross-platform open-source software developed in Java. Source code and cross-platform binaries are freely available under the Apache2 open source license at http://cellmissy.googlecode.com.",2013-08-05 +24928210,Detecting differential protein expression in large-scale population proteomics.,"

Motivation

Mass spectrometry (MS)-based high-throughput quantitative proteomics shows great potential in large-scale clinical biomarker studies, identifying and quantifying thousands of proteins in biological samples. However, there are unique challenges in analyzing the quantitative proteomics data. One issue is that the quantification of a given peptide is often missing in a subset of the experiments, especially for less abundant peptides. Another issue is that different MS experiments of the same study have significantly varying numbers of peptides quantified, which can result in more missing peptide abundances in an experiment that has a smaller total number of quantified peptides. To detect as many biomarker proteins as possible, it is necessary to develop bioinformatics methods that appropriately handle these challenges.

Results

We propose a Significance Analysis for Large-scale Proteomics Studies (SALPS) that handles missing peptide intensity values caused by the two mechanisms mentioned above. Our model has a robust performance in both simulated data and proteomics data from a large clinical study. Because varying patients' sample qualities and deviating instrument performances are not avoidable for clinical studies performed over the course of several years, we believe that our approach will be useful to analyze large-scale clinical proteomics data.

Availability and implementation

R codes for SALPS are available at http://www.stanford.edu/%7eclairesr/software.html.",2014-06-12 +21435154,Gene expression analysis in clear cell renal cell carcinoma using gene set enrichment analysis for biostatistical management.,"

Objective

To improve the workflow for standardizing the statistical interpretation provides an opportunity for the analysis of gene expression in clear cell renal cell carcinoma (ccRCC). RCC as a solid tumour entity represents a very suitable tumour model for such investigations. Although it is possible to investigate expression profiles by microarray technologies, the main problem is how to adequately interpret the accumulated mass of data derived from microarray technologies. There is a clear lack of a defined, consistent and comparable biostatistical analysis system, with no specific biostatistical standard methodology being available to compare the results of microarray analyses. We used the gene set enrichment analysis (GSEA) method to analyze microarray data from RCC tissue. The present study aimed to analyze differential expression profiles and establish biomarkers suitable for prognostication at the time of renal surgery by comparing RCC patients with long-term survival data against RCC samples of patients with poorly differentiated (grade 3) RCC, concomitant metastatic disease and short survival.

Patients and methods

In the present study, a total of 29 ccRCC fresh-frozen tissue samples were used; 14 samples from grade 1 (G1) RCC patients without metastatic disease and 15 from grade 3 (G3) RCC patients with synchronous metastatic disease. Expression profiling was performed with the Human Genome U133 Plus 2.0 Array (Affymetrix Corp., Santa Clara, CA, USA). Clinical data and long-term follow-up were obtained for all patients. The primary probe level analysis was performed using the Affymetrix MAS 5 algorithm. Further statistical processing was carried out by GSEA, using the Molecular Signatures Database, MSigDB (http://www.broad.mit.edu/gsea/msigdb/index.jsp). After selecting gene sets with the highest leading edge subsets, a cluster and a further analyses based on MSigDB data bank analysis was performed.

Results

In total, 15 poorly G3 ccRCC, 14 well differentiated G1 ccRCC and 14 normal renal tissue samples were analyzed for comparative gene expression profiling. There were 12 of 15 G3 ccRCC patients who had synchronous metastatic disease at the time of surgery (pN+ and/or distant metastases: pN+ only = 4, M+ only = 11 and pN+M+ = 3). The GSEA identified 700 gene sets. Out of these, 120 sets with the highest leading edge subset were selected monitored by hierarchical clustering G1 vs G3. Comparative analysis using the the MSigDB data bank for pathway network identified 16 gene sets that were differentially strongly over- or underexpressed in G3 vs G1 tumours and are involved in various aspects of tumour physiology, such as metastases and cell motility, signalling and cell proliferation, as well as gene products that are involved in the building of the extracellular matrix and as cell surface markers.

Conclusions

We analyzed microarray data of gene expression in ccRCC comparing poorly differentiated and well differentiated tumour tissue samples. Using GSEA, we found a number of genes set candidates relevant to biological network processes with high complexity; conspicuously, these comprised members of the interleukin- and chemokine-family, cyclin-dependent kinases, angiogenic growth factors and transcriptional factors. This suggests that, in poorly differentiated aggressive ccRCC, there may be a limited number of gene sets that are responsible for the very aggressive biological behaviour. This comparison performed at a gene set level enables the identification of such congruency between different gene sets and whole data sets with respect to a specific biological question. GSEA embedded in the statistical workflow procedure for the suitable preparation of expression data may improve the analysis and avoid missing changes at the molecular level. A systematic approach such as GSEA is clearly needed to analyze raw data from microarray analyses, although these data can only be descriptive and the mass of raw data is derived from a relatively small number of tissue samples. However, consistent alterations of gene expression found in specific tumour entities may allow a better understanding of certain aspects of specific tumour biology. Therefore, the molecular characterization of individual tumours may potentially be useful for the better individual assessment of prognosis and, finally, the identification of biomarkers and targets of specific treatments may eventually help to improve treatment.",2011-03-16 +23368702,FitSearch: a robust way to interpret a yeast fitness profile in terms of drug's mode-of-action.,"

Background

Yeast deletion-mutant collections have been successfully used to infer the mode-of-action of drugs especially by profiling chemical-genetic and genetic-genetic interactions on a genome-wide scale. Although tens of thousands of those profiles are publicly available, a lack of an accurate method for mining such data has been a major bottleneck for more widespread use of these useful resources.

Results

For general usage of those public resources, we designed FitRankDB as a general repository of fitness profiles, and developed a new search algorithm, FitSearch, for identifying the profiles that have a high similarity score with statistical significance for a given fitness profile. We demonstrated that our new repository and algorithm are highly beneficial to researchers who attempting to make hypotheses based on unknown modes-of-action of bioactive compounds, regardless of the types of experiments that have been performed using yeast deletion-mutant collection in various types of different measurement platforms, especially non-chip-based platforms.

Conclusions

We showed that our new database and algorithm are useful when attempting to construct a hypothesis regarding the unknown function of a bioactive compound through small-scale experiments with a yeast deletion collection in a platform independent manner. The FitRankDB and FitSearch enhance the ease of searching public yeast fitness profiles and obtaining insights into unknown mechanisms of action of drugs. FitSearch is freely available at http://fitsearch.kaist.ac.kr.",2013-01-21 +25712276,"Midwife-attended births in the United States, 1990-2012: results from revised birth certificate data.","Data on attendance at birth by midwives in the United States have been available on the national level since 1989, allowing for the documentation of long-term trends. New items on payer source and prepregnancy body mass index (BMI) from a 2003 revision of the birth certificate provide an opportunity to examine additional aspects of US midwifery practice.The data in this report are based on records on birth attendant gathered as part of the US National Standard Certificate of Live Birth from a public use Web site, Vital Stats (http://www.cdc.gov/nchs/VitalStats.htm), which allows users to create and download specialized tables. Analysis of new items on prepregnancy BMI and birth payer source are limited to the 38 states (86% of US births) that adopted the revised birth certificate by 2012.Between 1989 and 2012, the proportion of all births attended by certified nurse-midwives (CNMs) increased from 3.3% to 7.9%. The proportion of vaginal births attended by CNMs reached an all-time high of 11.9%. Births attended by ""other midwives"" (typically certified professional midwives) rose to a peak of 28,343, or 0.7% of all US births. The distribution of payer source for CNM-attended births (44% Medicaid; 44% private insurance; 6% self-pay) is very similar to the national distribution, whereas the majority (53%) of births attended by other midwives are self-pay. Women whose births are attended by other midwives are less likely (13%) to have a prepregnancy BMI in the obese range than women attended by CNMs (19%) or overall (24%).The total number of births attended by CNMs and other midwives has remained steady or grown at a time when total US births have declined, resulting in the largest proportions of midwife-attended births in the quarter century that such data have been collected.",2015-01-01 +22293322,"Glucose-6-phosphate dehydrogenase (G6PD) mutations database: review of the ""old"" and update of the new mutations.","In the present paper we have updated the G6PD mutations database, including all the last discovered G6PD genetic variants. We underline that the last database has been published by Vulliamy et al. [1] who analytically reported 140 G6PD mutations: along with Vulliamy's database, there are two main sites, such as http://202.120.189.88/mutdb/ and www.LOVD.nl/MR, where almost all G6PD mutations can be found. Compared to the previous mutation reports, in our paper we have included for each mutation some additional information, such as: the secondary structure and the enzyme 3D position involving by mutation, the creation or abolition of a restriction site (with the enzyme involved) and the conservation score associated with each amino acid position. The mutations reported in the present tab have been divided according to the gene's region involved (coding and non-coding) and mutations affecting the coding region in: single, multiple (at least with two bases involved) and deletion. We underline that for the listed mutations, reported in italic, literature doesn't provide all the biochemical or bio-molecular information or the research data. Finally, for the ""old"" mutations, we tried to verify features previously reported and, when subsequently modified, we updated the specific information using the latest literature data.",2012-01-30 +23275726,MycoProtease-DB: Useful resource for Mycobacterium tuberculosis complex and nontuberculous mycobacterial proteases.,"

Unlabelled

MycoProtease-DB is an online MS SQL and CGI-PERL driven relational database that domiciles protease information of Mycobacterium tuberculosis (MTB) complex and Nontuberculous Mycobacteria (NTM), whose complete genome sequence is available. Our effort is to provide comprehensive information on proteases of 5 strains of Mycobacterium tuberculosis (H(37)Rv, H(37)Ra, CDC1551, F11 and KZN 1435), 3 strains of Mycobacterium bovis (AF2122/97, BCG Pasteur 1173P2 and BCG Tokyo 172) and 4 strains of NTM (Mycobacterium avium 104, Mycobacterium smegmatis MC2 155, Mycobacterium avium paratuberculosis K-10 and Nocardia farcinica IFM 10152) at gene, protein and structural level. MycoProtease-DB currently hosts 1324 proteases, which include 906 proteases from MTB complex with 237distinct proteases & 418 from NTM with 404 distinct proteases. Flexible database design and easy expandability & retrieval of information are the main features of MycoProtease-DB. All the data were validated with various online resources and published literatures for reliable serving as comprehensive resources of various Mycobacterial proteases.

Availability

The Database is publicly available at http://www.bicjbtdrc-mgims.in/MycoProtease-DB/",2012-12-08 +26708335,Fast and efficient QTL mapper for thousands of molecular phenotypes.,"

Motivation

In order to discover quantitative trait loci, multi-dimensional genomic datasets combining DNA-seq and ChiP-/RNA-seq require methods that rapidly correlate tens of thousands of molecular phenotypes with millions of genetic variants while appropriately controlling for multiple testing.

Results

We have developed FastQTL, a method that implements a popular cis-QTL mapping strategy in a user- and cluster-friendly tool. FastQTL also proposes an efficient permutation procedure to control for multiple testing. The outcome of permutations is modeled using beta distributions trained from a few permutations and from which adjusted P-values can be estimated at any level of significance with little computational cost. The Geuvadis & GTEx pilot datasets can be now easily analyzed an order of magnitude faster than previous approaches.

Availability and implementation

Source code, binaries and comprehensive documentation of FastQTL are freely available to download at http://fastqtl.sourceforge.net/

Contact

emmanouil.dermitzakis@unige.ch or olivier.delaneau@unige.ch

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-26 +21410990,AnyExpress: integrated toolkit for analysis of cross-platform gene expression data using a fast interval matching algorithm.,"

Background

Cross-platform analysis of gene express data requires multiple, intricate processes at different layers with various platforms. However, existing tools handle only a single platform and are not flexible enough to support custom changes, which arise from the new statistical methods, updated versions of reference data, and better platforms released every month or year. Current tools are so tightly coupled with reference information, such as reference genome, transcriptome database, and SNP, which are often erroneous or outdated, that the output results are incorrect and misleading.

Results

We developed AnyExpress, a software package that combines cross-platform gene expression data using a fast interval-matching algorithm. Supported platforms include next-generation-sequencing technology, microarray, SAGE, MPSS, and more. Users can define custom target transcriptome database references for probe/read mapping in any species, as well as criteria to remove undesirable probes/reads. AnyExpress offers scalable processing features such as binding, normalization, and summarization that are not present in existing software tools. As a case study, we applied AnyExpress to published Affymetrix microarray and Illumina NGS RNA-Seq data from human kidney and liver. The mean of within-platform correlation coefficient was 0.98 for within-platform samples in kidney and liver, respectively. The mean of cross-platform correlation coefficients was 0.73. These results confirmed those of the original and secondary studies. Applying filtering produced higher agreement between microarray and NGS, according to an agreement index calculated from differentially expressed genes.

Conclusion

AnyExpress can combine cross-platform gene expression data, process data from both open- and closed-platforms, select a custom target reference, filter out undesirable probes or reads based on custom-defined biological features, and perform quantile-normalization with a large number of microarray samples. AnyExpress is fast, comprehensive, flexible, and freely available at http://anyexpress.sourceforge.net.",2011-03-17 +21646343,g:Profiler--a web server for functional interpretation of gene lists (2011 update).,"Functional interpretation of candidate gene lists is an essential task in modern biomedical research. Here, we present the 2011 update of g:Profiler (http://biit.cs.ut.ee/gprofiler/), a popular collection of web tools for functional analysis. g:GOSt and g:Cocoa combine comprehensive methods for interpreting gene lists, ordered lists and list collections in the context of biomedical ontologies, pathways, transcription factor and microRNA regulatory motifs and protein-protein interactions. Additional tools, namely the biomolecule ID mapping service (g:Convert), gene expression similarity searcher (g:Sorter) and gene homology searcher (g:Orth) provide numerous ways for further analysis and interpretation. In this update, we have implemented several features of interest to the community: (i) functional analysis of single nucleotide polymorphisms and other DNA polymorphisms is supported by chromosomal queries; (ii) network analysis identifies enriched protein-protein interaction modules in gene lists; (iii) functional analysis covers human disease genes; and (iv) improved statistics and filtering provide more concise results. g:Profiler is a regularly updated resource that is available for a wide range of species, including mammals, plants, fungi and insects.",2011-06-06 +26089836,PrOnto database : GO term functional dissimilarity inferred from biological data.,"Moonlighting proteins are defined by their involvement in multiple, unrelated functions. The computational prediction of such proteins requires a formal method of assessing the similarity of cellular processes, for example, by identifying dissimilar Gene Ontology terms. While many measures of Gene Ontology term similarity exist, most depend on abstract mathematical analyses of the structure of the GO tree and do not necessarily represent the underlying biology. Here, we propose two metrics of GO term functional dissimilarity derived from biological information, one based on the protein annotations and the other on the interactions between proteins. They have been collected in the PrOnto database, a novel tool which can be of particular use for the identification of moonlighting proteins. The database can be queried via an web-based interface which is freely available at http://tagc.univ-mrs.fr/pronto.",2015-06-03 +26268369,Prediction of protein-protein interaction sites from weakly homologous template structures using meta-threading and machine learning.,"The identification of protein-protein interactions is vital for understanding protein function, elucidating interaction mechanisms, and for practical applications in drug discovery. With the exponentially growing protein sequence data, fully automated computational methods that predict interactions between proteins are becoming essential components of system-level function inference. A thorough analysis of protein complex structures demonstrated that binding site locations as well as the interfacial geometry are highly conserved across evolutionarily related proteins. Because the conformational space of protein-protein interactions is highly covered by experimental structures, sensitive protein threading techniques can be used to identify suitable templates for the accurate prediction of interfacial residues. Toward this goal, we developed eFindSite(PPI) , an algorithm that uses the three-dimensional structure of a target protein, evolutionarily remotely related templates and machine learning techniques to predict binding residues. Using crystal structures, the average sensitivity (specificity) of eFindSite(PPI) in interfacial residue prediction is 0.46 (0.92). For weakly homologous protein models, these values only slightly decrease to 0.40-0.43 (0.91-0.92) demonstrating that eFindSite(PPI) performs well not only using experimental data but also tolerates structural imperfections in computer-generated structures. In addition, eFindSite(PPI) detects specific molecular interactions at the interface; for instance, it correctly predicts approximately one half of hydrogen bonds and aromatic interactions, as well as one third of salt bridges and hydrophobic contacts. Comparative benchmarks against several dimer datasets show that eFindSite(PPI) outperforms other methods for protein-binding residue prediction. It also features a carefully tuned confidence estimation system, which is particularly useful in large-scale applications using raw genomic data. eFindSite(PPI) is freely available to the academic community at http://www.brylinski.org/efindsiteppi.",2015-01-01 +25273109,KDDN: an open-source Cytoscape app for constructing differential dependency networks with significant rewiring.,"

Unlabelled

We have developed an integrated molecular network learning method, within a well-grounded mathematical framework, to construct differential dependency networks with significant rewiring. This knowledge-fused differential dependency networks (KDDN) method, implemented as a Java Cytoscape app, can be used to optimally integrate prior biological knowledge with measured data to simultaneously construct both common and differential networks, to quantitatively assign model parameters and significant rewiring p-values and to provide user-friendly graphical results. The KDDN algorithm is computationally efficient and provides users with parallel computing capability using ubiquitous multi-core machines. We demonstrate the performance of KDDN on various simulations and real gene expression datasets, and further compare the results with those obtained by the most relevant peer methods. The acquired biologically plausible results provide new insights into network rewiring as a mechanistic principle and illustrate KDDN's ability to detect them efficiently and correctly. Although the principal application here involves microarray gene expressions, our methodology can be readily applied to other types of quantitative molecular profiling data.

Availability

Source code and compiled package are freely available for download at http://apps.cytoscape.org/apps/kddn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-30 +23427990,"Motif discovery with data mining in 3D protein structure databases: discovery, validation and prediction of the U-shape zinc binding (""Huf-Zinc"") motif.","Data mining in protein databases, derivatives from more fundamental protein 3D structure and sequence databases, has considerable unearthed potential for the discovery of sequence motif--structural motif--function relationships as the finding of the U-shape (Huf-Zinc) motif, originally a small student's project, exemplifies. The metal ion zinc is critically involved in universal biological processes, ranging from protein-DNA complexes and transcription regulation to enzymatic catalysis and metabolic pathways. Proteins have evolved a series of motifs to specifically recognize and bind zinc ions. Many of these, so called zinc fingers, are structurally independent globular domains with discontinuous binding motifs made up of residues mostly far apart in sequence. Through a systematic approach starting from the BRIX structure fragment database, we discovered that there exists another predictable subset of zinc-binding motifs that not only have a conserved continuous sequence pattern but also share a characteristic local conformation, despite being included in totally different overall folds. While this does not allow general prediction of all Zn binding motifs, a HMM-based web server, Huf-Zinc, is available for prediction of these novel, as well as conventional, zinc finger motifs in protein sequences. The Huf-Zinc webserver can be freely accessed through this URL (http://mendel.bii.a-star.edu.sg/METHODS/hufzinc/).",2013-01-16 +25880215,Infertility etiologies are genetically and clinically linked with other diseases in single meta-diseases.,"The present review aims to ascertain whether different infertility etiologies share particular genes and/or molecular pathways with other pathologies and are associated with distinct and particular risks of later-life morbidity and mortality. In order to reach this aim, we use two different sources of information: (1) a public web server named DiseaseConnect ( http://disease-connect.org ) focused on the analysis of common genes and molecular mechanisms shared by diseases by integrating comprehensive omics and literature data; and (2) a literature search directed to find clinical comorbid relationships of infertility etiologies with only those diseases appearing after infertility is manifested. This literature search is performed because DiseaseConnect web server does not discriminate between pathologies emerging before, concomitantly or after infertility is manifested. Data show that different infertility etiologies not only share particular genes and/or molecular pathways with other pathologies but they have distinct clinical relationships with other diseases appearing after infertility is manifested. In particular, (1) testicular and high-grade prostate cancer in male infertility; (2) non-fatal stroke and endometrial cancer, and likely non-fatal coronary heart disease and ovarian cancer in polycystic ovary syndrome; (3) osteoporosis, psychosexual dysfunction, mood disorders and dementia in premature ovarian failure; (4) breast and ovarian cancer in carriers of BRCA1/2 mutations in diminished ovarian reserve; (5) clear cell and endometrioid histologic subtypes of invasive ovarian cancer, and likely low-grade serous invasive ovarian cancer, melanoma and non-Hodgkin lymphoma in endometriosis; and (6) endometrial and ovarian cancer in idiopathic infertility. The present data endorse the principle that the occurrence of a disease (in our case infertility) is non-random in the population and suggest that different infertility etiologies are genetically and clinically linked with other diseases in single meta-diseases. This finding opens new insights for clinicians and reproductive biologists to treat infertility problems using a phenomic approach instead of considering infertility as an isolated and exclusive disease of the reproductive system/hypothalamic-pituitary-gonadal axis. In agreement with a previous validation analysis of the utility of DiseaseConnect web server, the present study does not show a univocal correspondence between common gene expression and clinical comorbid relationship. Further work is needed to untangle the potential genetic, epigenetic and phenotypic relationships that may be present among different infertility etiologies, morbid conditions and physical/cognitive traits.",2015-04-15 +27395473,Glove Contamination during Endodontic Treatment Is One of the Sources of Nosocomial Endodontic Propionibacterium acnes Infections.,"

Introduction

The opportunistic Propionibacterium acnes recovered frequently from failed endodontic treatments might be the result of nosocomial endodontic infections. The study was aimed to determine if gloves worn by dentists could be one of the sources of these nosocomial infections and to investigate the P. acnes phylotypes involved.

Methods

The cultivable microbiota of gloves (n = 8) at 4 time points (T1, immediately after wearing gloves; T2, after access cavity preparation; T3, after taking a working length/master cone radiograph; and T4, before sealing the cavity) were identified using 16S ribosomal RNA gene sequencing. recA gene sequencing of P. acnes isolates was done. The phylogenetic relationship was determined using MEGA 6 (http://www.megasoftware.net/fixedbugs.html; Megasoftware, Tempe, AZ). Data distributions were compared using the Fisher exact test; means were compared using the Mann-Whitney U test in SPSSPC (version 21; IBM, Armonk, NY).

Results

The quantitative viable counts at T4 (aerobically [2.93 ± 0.57], anaerobically [3.35 ± 0.43]) were greater (P < .001) than at T1 [(aerobically [0.48 ± 0.73], anaerobically [0.66 ± 0.86]) and T2 (aerobically [1.80 ± 0.54], anaerobically [2.41 ± 0.71]). Eighty cultivable bacterial taxa (5 phyla) were identified. The most prevalent ones were P. acnes and Staphylococcus epidermidis (100%). recA gene sequencing (n = 88) revealed 2 phylogenetic lineages with type I split into type IA and type IB. Type II was prevalent on gloves.

Conclusions

Contamination of the gloves was detected at the final stages of the treatment. P. acnes and S. epidermidis are the prevalent taxa on gloves and are opportunistic endodontic pathogens. Changing gloves frequently, after gaining access into the pulp space and also after taking the working length/master gutta-percha point radiographs, is likely to reduce the risk of root canal reinfection.",2016-07-07 +21646553,In the clinic. Delirium.,"This issue provides a clinical overview of delirium focusing on prevention, diagnosis, treatment, practice improvement, and patient information. Readers can complete the accompanying CME quiz for 1.5 credits. Only ACP members and individual subscribers can access the electronic features of In the Clinic. Non-subscribers who wish to access this issue of In the Clinic can elect ""Pay for View."" Subscribers can receive 1.5 category 1 CME credits by completing the CME quiz that accompanies this issue of In the Clinic. The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians' Information and Education Resource) and MKSAP (Medical Knowledge and Self Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP's Medical Education and Publishing division and with assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult www.acponline.org, http://pier.acponline.org, and other resources referenced within each issue of In the Clinic.",2011-06-01 +25649125,Bayesian models for syndrome- and gene-specific probabilities of novel variant pathogenicity.,"

Background

With the advent of affordable and comprehensive sequencing technologies, access to molecular genetics for clinical diagnostics and research applications is increasing. However, variant interpretation remains challenging, and tools that close the gap between data generation and data interpretation are urgently required. Here we present a transferable approach to help address the limitations in variant annotation.

Methods

We develop a network of Bayesian logistic regression models that integrate multiple lines of evidence to evaluate the probability that a rare variant is the cause of an individual's disease. We present models for genes causing inherited cardiac conditions, though the framework is transferable to other genes and syndromes.

Results

Our models report a probability of pathogenicity, rather than a categorisation into pathogenic or benign, which captures the inherent uncertainty of the prediction. We find that gene- and syndrome-specific models outperform genome-wide approaches, and that the integration of multiple lines of evidence performs better than individual predictors. The models are adaptable to incorporate new lines of evidence, and results can be combined with familial segregation data in a transparent and quantitative manner to further enhance predictions. Though the probability scale is continuous, and innately interpretable, performance summaries based on thresholds are useful for comparisons. Using a threshold probability of pathogenicity of 0.9, we obtain a positive predictive value of 0.999 and sensitivity of 0.76 for the classification of variants known to cause long QT syndrome over the three most important genes, which represents sufficient accuracy to inform clinical decision-making. A web tool APPRAISE [http://www.cardiodb.org/APPRAISE] provides access to these models and predictions.

Conclusions

Our Bayesian framework provides a transparent, flexible and robust framework for the analysis and interpretation of rare genetic variants. Models tailored to specific genes outperform genome-wide approaches, and can be sufficiently accurate to inform clinical decision-making.",2015-01-28 +25422674,SRST2: Rapid genomic surveillance for public health and hospital microbiology labs.,"Rapid molecular typing of bacterial pathogens is critical for public health epidemiology, surveillance and infection control, yet routine use of whole genome sequencing (WGS) for these purposes poses significant challenges. Here we present SRST2, a read mapping-based tool for fast and accurate detection of genes, alleles and multi-locus sequence types (MLST) from WGS data. Using >900 genomes from common pathogens, we show SRST2 is highly accurate and outperforms assembly-based methods in terms of both gene detection and allele assignment. We include validation of SRST2 within a public health laboratory, and demonstrate its use for microbial genome surveillance in the hospital setting. In the face of rising threats of antimicrobial resistance and emerging virulence among bacterial pathogens, SRST2 represents a powerful tool for rapidly extracting clinically useful information from raw WGS data. Source code is available from http://katholt.github.io/srst2/.",2014-11-20 +21702939,BioAssay Ontology (BAO): a semantic description of bioassays and high-throughput screening results.,"

Background

High-throughput screening (HTS) is one of the main strategies to identify novel entry points for the development of small molecule chemical probes and drugs and is now commonly accessible to public sector research. Large amounts of data generated in HTS campaigns are submitted to public repositories such as PubChem, which is growing at an exponential rate. The diversity and quantity of available HTS assays and screening results pose enormous challenges to organizing, standardizing, integrating, and analyzing the datasets and thus to maximize the scientific and ultimately the public health impact of the huge investments made to implement public sector HTS capabilities. Novel approaches to organize, standardize and access HTS data are required to address these challenges.

Results

We developed the first ontology to describe HTS experiments and screening results using expressive description logic. The BioAssay Ontology (BAO) serves as a foundation for the standardization of HTS assays and data and as a semantic knowledge model. In this paper we show important examples of formalizing HTS domain knowledge and we point out the advantages of this approach. The ontology is available online at the NCBO bioportal http://bioportal.bioontology.org/ontologies/44531.

Conclusions

After a large manual curation effort, we loaded BAO-mapped data triples into a RDF database store and used a reasoner in several case studies to demonstrate the benefits of formalized domain knowledge representation in BAO. The examples illustrate semantic querying capabilities where BAO enables the retrieval of inferred search results that are relevant to a given query, but are not explicitly defined. BAO thus opens new functionality for annotating, querying, and analyzing HTS datasets and the potential for discovering new knowledge by means of inference.",2011-06-24 +26085609,Whole-Genome Sequencing Data for Serotyping Escherichia coli-It's Time for a Change!,"The accessibility of whole-genome sequencing (WGS) presents the opportunity for national reference laboratories to provide a state-of-the-art public health surveillance service. The replacement of traditional serology-based typing of Escherichia coli by WGS is supported by user-friendly, freely available data analysis Web tools. An article in this issue of the Journal of Clinical Microbiology (K. G. Joensen, A. M. M. Tetzschner, A. Iguchi, F. M. Aarestrup, and F. Scheutz, J Clin Microbiol, 53:2410-2426, 2015, http://dx.doi.org/10.1128/JCM.00008-15) describes SerotypeFinder, an essential guide to serotyping E. coli in the 21st century.",2015-06-17 +27281533,TBC1D24 genotype-phenotype correlation: Epilepsies and other neurologic features.,"

Objective

To evaluate the phenotypic spectrum associated with mutations in TBC1D24.

Methods

We acquired new clinical, EEG, and neuroimaging data of 11 previously unreported and 37 published patients. TBC1D24 mutations, identified through various sequencing methods, can be found online (http://lovd.nl/TBC1D24).

Results

Forty-eight patients were included (28 men, 20 women, average age 21 years) from 30 independent families. Eighteen patients (38%) had myoclonic epilepsies. The other patients carried diagnoses of focal (25%), multifocal (2%), generalized (4%), and unclassified epilepsy (6%), and early-onset epileptic encephalopathy (25%). Most patients had drug-resistant epilepsy. We detail EEG, neuroimaging, developmental, and cognitive features, treatment responsiveness, and physical examination. In silico evaluation revealed 7 different highly conserved motifs, with the most common pathogenic mutation located in the first. Neuronal outgrowth assays showed that some TBC1D24 mutations, associated with the most severe TBC1D24-associated disorders, are not necessarily the most disruptive to this gene function.

Conclusions

TBC1D24-related epilepsy syndromes show marked phenotypic pleiotropy, with multisystem involvement and severity spectrum ranging from isolated deafness (not studied here), benign myoclonic epilepsy restricted to childhood with complete seizure control and normal intellect, to early-onset epileptic encephalopathy with severe developmental delay and early death. There is no distinct correlation with mutation type or location yet, but patterns are emerging. Given the phenotypic breadth observed, TBC1D24 mutation screening is indicated in a wide variety of epilepsies. A TBC1D24 consortium was formed to develop further research on this gene and its associated phenotypes.",2016-06-08 +25914306,DOCK 6: Impact of new features and current docking performance.,"This manuscript presents the latest algorithmic and methodological developments to the structure-based design program DOCK 6.7 focused on an updated internal energy function, new anchor selection control, enhanced minimization options, a footprint similarity scoring function, a symmetry-corrected root-mean-square deviation algorithm, a database filter, and docking forensic tools. An important strategy during development involved use of three orthogonal metrics for assessment and validation: pose reproduction over a large database of 1043 protein-ligand complexes (SB2012 test set), cross-docking to 24 drug-target protein families, and database enrichment using large active and decoy datasets (Directory of Useful Decoys [DUD]-E test set) for five important proteins including HIV protease and IGF-1R. Relative to earlier versions, a key outcome of the work is a significant increase in pose reproduction success in going from DOCK 4.0.2 (51.4%) → 5.4 (65.2%) → 6.7 (73.3%) as a result of significant decreases in failure arising from both sampling 24.1% → 13.6% → 9.1% and scoring 24.4% → 21.1% → 17.5%. Companion cross-docking and enrichment studies with the new version highlight other strengths and remaining areas for improvement, especially for systems containing metal ions. The source code for DOCK 6.7 is available for download and free for academic users at http://dock.compbio.ucsf.edu/.",2015-06-01 +26357352,From Pixels to Response Maps: Discriminative Image Filtering for Face Alignment in the Wild.,"We propose a face alignment framework that relies on the texture model generated by the responses of discriminatively trained part-based filters. Unlike standard texture models built from pixel intensities or responses generated by generic filters (e.g. Gabor), our framework has two important advantages. First, by virtue of discriminative training, invariance to external variations (like identity, pose, illumination and expression) is achieved. Second, we show that the responses generated by discriminatively trained filters (or patch-experts) are sparse and can be modeled using a very small number of parameters. As a result, the optimization methods based on the proposed texture model can better cope with unseen variations. We illustrate this point by formulating both part-based and holistic approaches for generic face alignment and show that our framework outperforms the state-of-the-art on multiple ""wild"" databases. The code and dataset annotations are available for research purposes from http://ibug.doc.ic.ac.uk/resources.",2015-06-01 +26030752,Dietary MicroRNA Database (DMD): An Archive Database and Analytic Tool for Food-Borne microRNAs.,"With the advent of high throughput technology, a huge amount of microRNA information has been added to the growing body of knowledge for non-coding RNAs. Here we present the Dietary MicroRNA Databases (DMD), the first repository for archiving and analyzing the published and novel microRNAs discovered in dietary resources. Currently there are fifteen types of dietary species, such as apple, grape, cow milk, and cow fat, included in the database originating from 9 plant and 5 animal species. Annotation for each entry, a mature microRNA indexed as DM0000*, covers information of the mature sequences, genome locations, hairpin structures of parental pre-microRNAs, cross-species sequence comparison, disease relevance, and the experimentally validated gene targets. Furthermore, a few functional analyses including target prediction, pathway enrichment and gene network construction have been integrated into the system, which enable users to generate functional insights through viewing the functional pathways and building protein-protein interaction networks associated with each microRNA. Another unique feature of DMD is that it provides a feature generator where a total of 411 descriptive attributes can be calculated for any given microRNAs based on their sequences and structures. DMD would be particularly useful for research groups studying microRNA regulation from a nutrition point of view. The database can be accessed at http://sbbi.unl.edu/dmd/.",2015-06-01 +26074707,Hepatitis B virus genotypes and genome characteristics in China.,"

Aim

To analyze the hepatitis B virus (HBV) characters in China, as well as the correlation between several HBV mutation and hepatitis symptoms.

Methods

A total of 1148 HBV genome sequences from patients throughout China were collected via the National Center For Biotechnology Information database (information including: genotype, territory and clinical status). HBV genotypes were classified by a direct reference from the Genbank sequence annotation, phylogenetic tree and online software analysis (http://www.ncbi.nlm.nih.gov/projects/genotyping/formpage.cgi). The phylogenetic tree was constructed based on the neighbor-joining method by MEGA5.0 software. HBV sequences were grouped based on phylogenetic tree and the distance between the groups was calculated by using the computer between group mean distance methods. Seven hundred and twelve HBV sequences with clear annotation of clinical symptoms were selected to analyses the correlation of mutation and clinical symptoms. Characteristics of sequences were analyzed by using DNAStar and BioEdit software packages. The codon usage bias and RNA secondary structures analysis were performed by RNAdraw software. Recombination analysis was performed by using Simplot software.

Results

In China, HBV genotype C was the predominant in Northeastern, genotype B was predominant in Central Southern areas, genotype B and C were both dominant in Southwestern areas, and the recombinant genotype C/D was predominant in Northwestern areas. C2 and B2 were identified as the two major sub-genotypes, FJ386674 might be a putative sub-genotype as B10. The basal core promoter double mutation and pre-C mutation showed various significant differences between hepatitis symptoms. In addition to ATG, many other HBV initiation codons also exist. HBV has codon usage bias; the termination codon of X, C and P open reading frames (ORF) were TAA, TAG, and TGA, respectively. The major stop codons of S-ORF were TAA (96.45%) and TGA (83.60%) in B2 and C2 subtype, respectively.

Conclusion

This study recapitulated the epidemiology of HBV in China, and the information might be meaningful critical for the future prevention and therapy of HBV infections.",2015-06-01 +24942246,Subtlex-pl: subtitle-based word frequency estimates for Polish.,"We present SUBTLEX-PL, Polish word frequencies based on movie subtitles. In two lexical decision experiments, we compare the new measures with frequency estimates derived from another Polish text corpus that includes predominantly written materials. We show that the frequencies derived from the two corpora perform best in predicting human performance in a lexical decision task if used in a complementary way. Our results suggest that the two corpora may have unequal potential for explaining human performance for words in different frequency ranges and that corpora based on written materials severely overestimate frequencies for formal words. We discuss some of the implications of these findings for future studies comparing different frequency estimates. In addition to frequencies for word forms, SUBTLEX-PL includes measures of contextual diversity, part-of-speech-specific word frequencies, frequencies of associated lemmas, and word bigrams, providing researchers with necessary tools for conducting psycholinguistic research in Polish. The database is freely available for research purposes and may be downloaded from the authors' university Web site at http://crr.ugent.be/subtlex-pl .",2015-06-01 +23325629,bc-GenExMiner 3.0: new mining module computes breast cancer gene expression correlation analyses.,"We recently developed a user-friendly web-based application called bc-GenExMiner (http://bcgenex.centregauducheau.fr), which offered the possibility to evaluate prognostic informativity of genes in breast cancer by means of a 'prognostic module'. In this study, we develop a new module called 'correlation module', which includes three kinds of gene expression correlation analyses. The first one computes correlation coefficient between 2 or more (up to 10) chosen genes. The second one produces two lists of genes that are most correlated (positively and negatively) to a 'tested' gene. A gene ontology (GO) mining function is also proposed to explore GO 'biological process', 'molecular function' and 'cellular component' terms enrichment for the output lists of most correlated genes. The third one explores gene expression correlation between the 15 telomeric and 15 centromeric genes surrounding a 'tested' gene. These correlation analyses can be performed in different groups of patients: all patients (without any subtyping), in molecular subtypes (basal-like, HER2+, luminal A and luminal B) and according to oestrogen receptor status. Validation tests based on published data showed that these automatized analyses lead to results consistent with studies' conclusions. In brief, this new module has been developed to help basic researchers explore molecular mechanisms of breast cancer. DATABASE URL: http://bcgenex.centregauducheau.fr",2013-01-15 +26111206,Differential Evolution approach to detect recent admixture.,"The genetic structure of human populations is extraordinarily complex and of fundamental importance to studies of anthropology, evolution, and medicine. As increasingly many individuals are of mixed origin, there is an unmet need for tools that can infer multiple origins. Misclassification of such individuals can lead to incorrect and costly misinterpretations of genomic data, primarily in disease studies and drug trials. We present an advanced tool to infer ancestry that can identify the biogeographic origins of highly mixed individuals. reAdmix can incorporate individual's knowledge of ancestors (e.g. having some ancestors from Turkey or a Scottish grandmother). reAdmix is an online tool available at http://chcb.saban-chla.usc.edu/reAdmix/.",2015-06-18 +26909688,Spatiospectral Decomposition of Multi-subject EEG: Evaluating Blind Source Separation Algorithms on Real and Realistic Simulated Data.,"Electroencephalographic (EEG) oscillations predominantly appear with periods between 1 s (1 Hz) and 20 ms (50 Hz), and are subdivided into distinct frequency bands which appear to correspond to distinct cognitive processes. A variety of blind source separation (BSS) approaches have been developed and implemented within the past few decades, providing an improved isolation of these distinct processes. Within the present study, we demonstrate the feasibility of multi-subject BSS for deriving distinct EEG spatiospectral maps. Multi-subject spatiospectral EEG decompositions were implemented using the EEGIFT toolbox ( http://mialab.mrn.org/software/eegift/ ) with real and realistic simulated datasets (the simulation code is available at http://mialab.mrn.org/software/simeeg ). Twelve different decomposition algorithms were evaluated. Within the simulated data, WASOBI and COMBI appeared to be the best performing algorithms, as they decomposed the four sources across a range of component numbers and noise levels. RADICAL ICA, ERBM, INFOMAX ICA, ICA EBM, FAST ICA, and JADE OPAC decomposed a subset of sources within a smaller range of component numbers and noise levels. INFOMAX ICA, FAST ICA, WASOBI, and COMBI generated the largest number of stable sources within the real dataset and provided partially distinct views of underlying spatiospectral maps. We recommend the multi-subject BSS approach and the selected algorithms for further studies examining distinct spatiospectral networks within healthy and clinical populations.",2016-02-24 +22434842,Tracking and coordinating an international curation effort for the CCDS Project.,"The Consensus Coding Sequence (CCDS) collaboration involves curators at multiple centers with a goal of producing a conservative set of high quality, protein-coding region annotations for the human and mouse reference genome assemblies. The CCDS data set reflects a 'gold standard' definition of best supported protein annotations, and corresponding genes, which pass a standard series of quality assurance checks and are supported by manual curation. This data set supports use of genome annotation information by human and mouse researchers for effective experimental design, analysis and interpretation. The CCDS project consists of analysis of automated whole-genome annotation builds to identify identical CDS annotations, quality assurance testing and manual curation support. Identical CDS annotations are tracked with a CCDS identifier (ID) and any future change to the annotated CDS structure must be agreed upon by the collaborating members. CCDS curation guidelines were developed to address some aspects of curation in order to improve initial annotation consistency and to reduce time spent in discussing proposed annotation updates. Here, we present the current status of the CCDS database and details on our procedures to track and coordinate our efforts. We also present the relevant background and reasoning behind the curation standards that we have developed for CCDS database treatment of transcripts that are nonsense-mediated decay (NMD) candidates, for transcripts containing upstream open reading frames, for identifying the most likely translation start codons and for the annotation of readthrough transcripts. Examples are provided to illustrate the application of these guidelines. DATABASE URL: http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi.",2012-03-20 +25075307,Porting and using PanGIA for Cytoscape 3: challenges and solutions.,"Much of the biologically significant functionality in Cytoscape is contained within third-party add-ons, called plugins in Cytoscape 2 and apps in Cytoscape 3. In the transition from Cytoscape 2 to Cystoscape 3, some of the underlying assumptions upon which plugins relied changed, requiring a significant porting effort for plugins to work as Cytoscape 3 apps. PanGIA is a Cytoscape add-on (http://apps.cytoscape.org/apps/pangia) designed to analyze and visualize genetic interaction data in light of physical interaction data. In order to convert the PanGIA plugin to an app, various challenges, including those related to a transformed data model, concurrency, and randomization had to be overcome. In the process, the ability to control randomization was added to the GUI, a feature which was not only integral to the porting process, but which also ensures more easily reproducible scientific analysis for PanGIA users. Most authors of Cytoscape 2 plugins will face similar challenges porting their software to work with Cytoscape 3, and this paper gives details of how the PanGIA port addressed them.",2014-07-01 +25788620,SiPAN: simultaneous prediction and alignment of protein-protein interaction networks.,"

Motivation

Network prediction as applied to protein-protein interaction (PPI) networks has received considerable attention within the last decade. Because of the limitations of experimental techniques for interaction detection and network construction, several computational methods for PPI network reconstruction and growth have been suggested. Such methods usually limit the scope of study to a single network, employing data based on genomic context, structure, domain, sequence information or existing network topology. Incorporating multiple species network data for network reconstruction and growth entails the design of novel models encompassing both network reconstruction and network alignment, since the goal of network alignment is to provide functionally orthologous proteins from multiple networks and such orthology information can be used in guiding interolog transfers. However, such an approach raises the classical chicken or egg problem; alignment methods assume error-free networks, whereas network prediction via orthology works affectively if the functionally orthologous proteins are determined with high precision. Thus to resolve this intertwinement, we propose a framework to handle both problems simultaneously, that of SImultaneous Prediction and Alignment of Networks (SiPAN).

Results

We present an algorithm that solves the SiPAN problem in accordance with its simultaneous nature. Bearing the same name as the defined problem itself, the SiPAN algorithm employs state-of-the-art alignment and topology-based interaction confidence construction algorithms, which are used as benchmark methods for comparison purposes as well. To demonstrate the effectiveness of the proposed network reconstruction via SiPAN, we consider two scenarios; one that preserves the network sizes and the other where the network sizes are increased. Through extensive tests on real-world biological data, we show that the network qualities of SiPAN reconstructions are as good as those of original networks and in some cases SiPAN networks are even better, especially for the former scenario. An alternative state-of-the-art network reconstruction algorithm random walk with resistance produces networks considerably worse than the original networks and those reproduced via SiPAN in both cases.

Availability and implementation

Freely available at http://webprs.khas.edu.tr/∼cesim/SiPAN.tar.gz.",2015-03-18 +25918639,VirAmp: a galaxy-based viral genome assembly pipeline.,"

Background

Advances in next generation sequencing make it possible to obtain high-coverage sequence data for large numbers of viral strains in a short time. However, since most bioinformatics tools are developed for command line use, the selection and accessibility of computational tools for genome assembly and variation analysis limits the ability of individual labs to perform further bioinformatics analysis.

Findings

We have developed a multi-step viral genome assembly pipeline named VirAmp, which combines existing tools and techniques and presents them to end users via a web-enabled Galaxy interface. Our pipeline allows users to assemble, analyze, and interpret high coverage viral sequencing data with an ease and efficiency that was not possible previously. Our software makes a large number of genome assembly and related tools available to life scientists and automates the currently recommended best practices into a single, easy to use interface. We tested our pipeline with three different datasets from human herpes simplex virus (HSV).

Conclusions

VirAmp provides a user-friendly interface and a complete pipeline for viral genome analysis. We make our software available via an Amazon Elastic Cloud disk image that can be easily launched by anyone with an Amazon web service account. A fully functional demonstration instance of our system can be found at http://viramp.com/. We also maintain detailed documentation on each tool and methodology at http://docs.viramp.com.",2015-04-28 +25924884,UrQt: an efficient software for the Unsupervised Quality trimming of NGS data.,"

Background

Quality control is a necessary step of any Next Generation Sequencing analysis. Although customary, this step still requires manual interventions to empirically choose tuning parameters according to various quality statistics. Moreover, current quality control procedures that provide a ""good quality"" data set, are not optimal and discard many informative nucleotides. To address these drawbacks, we present a new quality control method, implemented in UrQt software, for Unsupervised Quality trimming of Next Generation Sequencing reads.

Results

Our trimming procedure relies on a well-defined probabilistic framework to detect the best segmentation between two segments of unreliable nucleotides, framing a segment of informative nucleotides. Our software only requires one user-friendly parameter to define the minimal quality threshold (phred score) to consider a nucleotide to be informative, which is independent of both the experiment and the quality of the data. This procedure is implemented in C++ in an efficient and parallelized software with a low memory footprint. We tested the performances of UrQt compared to the best-known trimming programs, on seven RNA and DNA sequencing experiments and demonstrated its optimality in the resulting tradeoff between the number of trimmed nucleotides and the quality objective.

Conclusions

By finding the best segmentation to delimit a segment of good quality nucleotides, UrQt greatly increases the number of reads and of nucleotides that can be retained for a given quality objective. UrQt source files, binary executables for different operating systems and documentation are freely available (under the GPLv3) at the following address: https://lbbe.univ-lyon1.fr/-UrQt-.html .",2015-04-29 +26072480,Exploring the structure and function of temporal networks with dynamic graphlets.,"

Motivation

With increasing availability of temporal real-world networks, how to efficiently study these data? One can model a temporal network as a single aggregate static network, or as a series of time-specific snapshots, each being an aggregate static network over the corresponding time window. Then, one can use established methods for static analysis on the resulting aggregate network(s), but losing in the process valuable temporal information either completely, or at the interface between different snapshots, respectively. Here, we develop a novel approach for studying a temporal network more explicitly, by capturing inter-snapshot relationships.

Results

We base our methodology on well-established graphlets (subgraphs), which have been proven in numerous contexts in static network research. We develop new theory to allow for graphlet-based analyses of temporal networks. Our new notion of dynamic graphlets is different from existing dynamic network approaches that are based on temporal motifs (statistically significant subgraphs). The latter have limitations: their results depend on the choice of a null network model that is required to evaluate the significance of a subgraph, and choosing a good null model is non-trivial. Our dynamic graphlets overcome the limitations of the temporal motifs. Also, when we aim to characterize the structure and function of an entire temporal network or of individual nodes, our dynamic graphlets outperform the static graphlets. Clearly, accounting for temporal information helps. We apply dynamic graphlets to temporal age-specific molecular network data to deepen our limited knowledge about human aging.

Availability and implementation

http://www.nd.edu/∼cone/DG.",2015-06-01 +22217156,BuildSummary: using a group-based approach to improve the sensitivity of peptide/protein identification in shotgun proteomics.,"The target-decoy database search strategy is widely accepted as a standard method for estimating the false discovery rate (FDR) of peptide identification, based on which peptide-spectrum matches (PSMs) from the target database are filtered. To improve the sensitivity of protein identification given a fixed accuracy (frequently defined by a protein FDR threshold), a postprocessing procedure is often used that integrates results from different peptide search engines that had assayed the same data set. In this work, we show that PSMs that are grouped by the precursor charge, the number of missed internal cleavage sites, the modification state, and the numbers of protease termini and that the proteins grouped by their unique peptide count should be filtered separately according to the given FDR. We also develop an iterative procedure to filter the PSMs and proteins simultaneously, according to the given FDR. Finally, we present a general framework to integrate the results from different peptide search engines using the same FDR threshold. Our method was tested with several shotgun proteomics data sets that were acquired by multiple LC/MS instruments from two different biological samples. The results showed a satisfactory performance. We implemented the method in a user-friendly software package called BuildSummary, which can be downloaded for free from http://www.proteomics.ac.cn/software/proteomicstools/index.htm as part of the software suite ProteomicsTools.",2012-02-08 +26501925,DrugTargetInspector: An assistance tool for patient treatment stratification.,"Cancer is a large class of diseases that are characterized by a common set of features, known as the Hallmarks of cancer. One of these hallmarks is the acquisition of genome instability and mutations. This, combined with high proliferation rates and failure of repair mechanisms, leads to clonal evolution as well as a high genotypic and phenotypic diversity within the tumor. As a consequence, treatment and therapy of malignant tumors is still a grand challenge. Moreover, under selective pressure, e.g., caused by chemotherapy, resistant subpopulations can emerge that then may lead to relapse. In order to minimize the risk of developing multidrug-resistant tumor cell populations, optimal (combination) therapies have to be determined on the basis of an in-depth characterization of the tumor's genetic and phenotypic makeup, a process that is an important aspect of stratified medicine and precision medicine. We present DrugTargetInspector (DTI), an interactive assistance tool for treatment stratification. DTI analyzes genomic, transcriptomic, and proteomic datasets and provides information on deregulated drug targets, enriched biological pathways, and deregulated subnetworks, as well as mutations and their potential effects on putative drug targets and genes of interest. To demonstrate DTI's broad scope of applicability, we present case studies on several cancer types and different types of input -omics data. DTI's integrative approach allows users to characterize the tumor under investigation based on various -omics datasets and to elucidate putative treatment options based on clinical decision guidelines, but also proposing additional points of intervention that might be neglected otherwise. DTI can be freely accessed at http://dti.bioinf.uni-sb.de.",2016-04-01 +22844193,Phylogenomic and domain analysis of iterative polyketide synthases in Aspergillus species.,"Aspergillus species are industrially and agriculturally important as fermentors and as producers of various secondary metabolites. Among them, fungal polyketides such as lovastatin and melanin are considered a gold mine for bioactive compounds. We used a phylogenomic approach to investigate the distribution of iterative polyketide synthases (PKS) in eight sequenced Aspergilli and classified over 250 fungal genes. Their genealogy by the conserved ketosynthase (KS) domain revealed three large groups of nonreducing PKS, one group inside bacterial PKS, and more than 9 small groups of reducing PKS. Polyphyly of nonribosomal peptide synthase (NRPS)-PKS genes raised questions regarding the recruitment of the elegant conjugation machinery. High rates of gene duplication and divergence were frequent. All data are accessible through our web database at http://metabolomics.jp/wiki/Category:PK.",2012-07-04 +24650446,Beryllium10: a free and simple tool for creating and managing group safety data sheets.,"

Background

Countless chemicals and mixtures are used in laboratories today, which all possess their own properties and dangers. Therefore, it is important to brief oneself about possible risks and hazards before doing any experiments. However, this task is laborious and time consuming.

Summary

Beryllium10 is a program, which supports users by carrying out a large part of the work such as collecting/importing data sets from different providers and compiling most of the information into a single group safety data sheet, which is suitable for having all necessary information at hand while an experiment is in progress. We present here the features of Beryllium10, their implementation, and their design and development criteria and ideas.

Conclusion

A program for creating and managing of group safety data sheets was developed and released as open source under GPL. The program provides a fast and clear user-interface, and well-conceived design for collecting and managing safety data. It is available for download from the web page http://beryllium.keksecks.de.",2014-03-20 +26455800,ConTemplate Suggests Possible Alternative Conformations for a Query Protein of Known Structure.,"Protein function involves conformational changes, but often, for a given protein, only some of these conformations are known. The missing conformations could be predicted using the wealth of data in the PDB. Most PDB proteins have multiple structures, and proteins sharing one similar conformation often share others as well. The ConTemplate web server (http://bental.tau.ac.il/contemplate) exploits these observations to suggest conformations for a query protein with at least one known conformation (or model thereof). We demonstrate ConTemplate on a ribose-binding protein that undergoes significant conformational changes upon substrate binding. Querying ConTemplate with the ligand-free (or bound) structure of the protein produces the ligand-bound (or free) conformation with a root-mean-square deviation of 1.7 Å (or 2.2 Å); the models are derived from conformations of other sugar-binding proteins, sharing approximately 30% sequence identity with the query. The calculation also suggests intermediate conformations and a pathway between the bound and free conformations.",2015-10-09 +26043858,RCARE: RNA Sequence Comparison and Annotation for RNA Editing.,"The post-transcriptional sequence modification of transcripts through RNA editing is an important mechanism for regulating protein function and is associated with human disease phenotypes. The identification of RNA editing or RNA-DNA difference (RDD) sites is a fundamental step in the study of RNA editing. However, a substantial number of false-positive RDD sites have been identified recently. A major challenge in identifying RDD sites is to distinguish between the true RNA editing sites and the false positives. Furthermore, determining the location of condition-specific RDD sites and elucidating their functional roles will help toward understanding various biological phenomena that are mediated by RNA editing. The present study developed RNA-sequence comparison and annotation for RNA editing (RCARE) for searching, annotating, and visualizing RDD sites using thousands of previously known editing sites, which can be used for comparative analyses between multiple samples. RCARE also provides evidence for improving the reliability of identified RDD sites. RCARE is a web-based comparison, annotation, and visualization tool, which provides rich biological annotations and useful summary plots. The developers of previous tools that identify or annotate RNA-editing sites seldom mention the reliability of their respective tools. In order to address the issue, RCARE utilizes a number of scientific publications and databases to find specific documentations respective to a particular RNA-editing site, which generates evidence levels to convey the reliability of RCARE. Sequence-based alignment files can be converted into VCF files using a Python script and uploaded to the RCARE server for further analysis. RCARE is available for free at http://www.snubi.org/software/rcare/.",2015-05-29 +25024289,Summary of the BioLINK SIG 2013 meeting at ISMB/ECCB 2013.,"

Unlabelled

The ISMB Special Interest Group on Linking Literature, Information and Knowledge for Biology (BioLINK) organized a one-day workshop at ISMB/ECCB 2013 in Berlin, Germany. The theme of the workshop was 'Roles for text mining in biomedical knowledge discovery and translational medicine'. This summary reviews the outcomes of the workshop. Meeting themes included concept annotation methods and applications, extraction of biological relationships and the use of text-mined data for biological data analysis.

Availability and implementation

All articles are available at http://biolinksig.org/proceedings-online/.",2014-07-14 +21779320,COLOMBOS: access port for cross-platform bacterial expression compendia.,"

Background

Microarrays are the main technology for large-scale transcriptional gene expression profiling, but the large bodies of data available in public databases are not useful due to the large heterogeneity. There are several initiatives that attempt to bundle these data into expression compendia, but such resources for bacterial organisms are scarce and limited to integration of experiments from the same platform or to indirect integration of per experiment analysis results.

Methodology/principal findings

We have constructed comprehensive organism-specific cross-platform expression compendia for three bacterial model organisms (Escherichia coli, Bacillus subtilis, and Salmonella enterica serovar Typhimurium) together with an access portal, dubbed COLOMBOS, that not only provides easy access to the compendia, but also includes a suite of tools for exploring, analyzing, and visualizing the data within these compendia. It is freely available at http://bioi.biw.kuleuven.be/colombos. The compendia are unique in directly combining expression information from different microarray platforms and experiments, and we illustrate the potential benefits of this direct integration with a case study: extending the known regulon of the Fur transcription factor of E. coli. The compendia also incorporate extensive annotations for both genes and experimental conditions; these heterogeneous data are functionally integrated in the COLOMBOS analysis tools to interactively browse and query the compendia not only for specific genes or experiments, but also metabolic pathways, transcriptional regulation mechanisms, experimental conditions, biological processes, etc.

Conclusions/significance

We have created cross-platform expression compendia for several bacterial organisms and developed a complementary access port COLOMBOS, that also serves as a convenient expression analysis tool to extract useful biological information. This work is relevant to a large community of microbiologists by facilitating the use of publicly available microarray experiments to support their research.",2011-07-14 +26044949,Inferring drug-disease associations based on known protein complexes.,"Inferring drug-disease associations is critical in unveiling disease mechanisms, as well as discovering novel functions of available drugs, or drug repositioning. Previous work is primarily based on drug-gene-disease relationship, which throws away many important information since genes execute their functions through interacting others. To overcome this issue, we propose a novel methodology that discover the drug-disease association based on protein complexes. Firstly, the integrated heterogeneous network consisting of drugs, protein complexes, and disease are constructed, where we assign weights to the drug-disease association by using probability. Then, from the tripartite network, we get the indirect weighted relationships between drugs and diseases. The larger the weight, the higher the reliability of the correlation. We apply our method to mental disorders and hypertension, and validate the result by using comparative toxicogenomics database. Our ranked results can be directly reinforced by existing biomedical literature, suggesting that our proposed method obtains higher specificity and sensitivity. The proposed method offers new insight into drug-disease discovery. Our method is publicly available at http://1.complexdrug.sinaapp.com/Drug_Complex_Disease/Data_Download.html.",2015-05-29 +26043787,Detection and analysis of disease-associated single nucleotide polymorphism influencing post-translational modification.,"Post-translational modification (PTM) plays a crucial role in biological functions and corresponding disease developments. Discovering disease-associated non-synonymous SNPs (nsSNPs) altering PTM sites can help to estimate the various PTM candidates involved in diseases, therefore, an integrated analysis between SNPs, PTMs and diseases is necessary. However, only a few types of PTMs affected by nsSNPs have been studied without considering disease-association until now. In this study, we developed a new database called PTM-SNP which contains a comprehensive collection of human nsSNPs that affect PTM sites, together with disease information. Total 179,325 PTM-SNPs were collected by aligning missense SNPs and stop-gain SNPs on PTM sites (position 0) or their flanking region (position -7 to 7). Disease-associated SNPs from GWAS catalogs were also matched with detected PTM-SNP to find disease associated PTM-SNPs. Our result shows PTM-SNPs are highly associated with diseases, compared with other nsSNP sites and functional classes including near gene, intron and so on. PTM-SNP can provide an insight about discovering important PTMs involved in the diseases easily through the web site. PTM-SNP is freely available at http://gcode.kaist.ac.kr/ptmsnp.",2015-05-29 +25707673,IAOseq: inferring abundance of overlapping genes using RNA-seq data.,"

Background

Overlapping transcription constitutes a common mechanism for regulating gene expression. A major limitation of the overlapping transcription assays is the lack of high throughput expression data.

Results

We developed a new tool (IAOseq) that is based on reads distributions along the transcribed regions to identify the expression levels of overlapping genes from standard RNA-seq data. Compared with five commonly used quantification methods, IAOseq showed better performance in the estimation accuracy of overlapping transcription levels. For the same strand overlapping transcription, currently existing high-throughput methods are rarely available to distinguish which strand was present in the original mRNA template. The IAOseq results showed that the commonly used methods gave an average of 1.6 fold overestimation of the expression levels of same strand overlapping genes.

Conclusions

This work provides a useful tool for mining overlapping transcription levels from standard RNA-seq libraries. IAOseq could be used to help us understand the complex regulatory mechanism mediated by overlapping transcripts. IAOseq is freely available at http://lifecenter.sgst.cn/main/en/IAO_seq.jsp.",2015-01-21 +24489367,Bayesian joint analysis of heterogeneous genomics data.,"

Summary

A non-parametric Bayesian factor model is proposed for joint analysis of multi-platform genomics data. The approach is based on factorizing the latent space (feature space) into a shared component and a data-specific component with the dimensionality of these components (spaces) inferred via a beta-Bernoulli process. The proposed approach is demonstrated by jointly analyzing gene expression/copy number variations and gene expression/methylation data for ovarian cancer patients, showing that the proposed model can potentially uncover key drivers related to cancer.

Availability and implementation

The source code for this model is written in MATLAB and has been made publicly available at https://sites.google.com/site/jointgenomics/.

Contact

catherine.ll.zheng@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-01-30 +22185559,Systematic review: comparative effectiveness of adjunctive devices in patients with ST-segment elevation myocardial infarction undergoing percutaneous coronary intervention of native vessels.,"

Background

During percutaneous coronary intervention (PCI), dislodgement of atherothrombotic material from coronary lesions can result in distal embolization, and may lead to increased major adverse cardiovascular events (MACE) and mortality. We sought to systematically review the comparative effectiveness of adjunctive devices to remove thrombi or protect against distal embolization in patients with ST-segment elevation myocardial infarction (STEMI) undergoing PCI of native vessels.

Methods

We conducted a systematic literature search of Medline, the Cochrane Database, and Web of Science (January 1996-March 2011), http://www.clinicaltrials.gov, abstracts from major cardiology meetings, TCTMD, and CardioSource Plus. Two investigators independently screened citations and extracted data from randomized controlled trials (RCTs) that compared the use of adjunctive devices plus PCI to PCI alone, evaluated patients with STEMI, enrolled a population with 95% of target lesion(s) in native vessels, and reported data on at least one pre-specified outcome. Quality was graded as good, fair or poor and the strength of evidence was rated as high, moderate, low or insufficient. Disagreement was resolved through consensus.

Results

37 trials met inclusion criteria. At the maximal duration of follow-up, catheter aspiration devices plus PCI significantly decreased the risk of MACE by 27% compared to PCI alone. Catheter aspiration devices also significantly increased the achievement of ST-segment resolution by 49%, myocardial blush grade of 3 (MBG-3) by 39%, and thrombolysis in myocardial infarction (TIMI) 3 flow by 8%, while reducing the risk of distal embolization by 44%, no reflow by 48% and coronary dissection by 70% versus standard PCI alone. In a majority of trials, the use of catheter aspiration devices increased procedural time upon qualitative assessment.Distal filter embolic protection devices significantly increased the risk of target revascularization by 39% although the use of mechanical thrombectomy or embolic protection devices did not significantly impact other final health outcomes. Distal balloon or any embolic protection device increased the achievement of MBG-3 by 61% and 20% and TIMI3 flow by 11% and 6% but did not significantly impact other intermediate outcomes versus control. Upon qualitative analysis, all device categories, with exception of catheter aspiration devices, appear to significantly prolong procedure time compared to PCI alone while none appear to significantly impact ejection fraction. Many of the final health outcome and adverse event evaluations were underpowered and the safety of devices overall is unclear due to insufficient amounts of data.

Conclusions

In patients with STEMI, for most devices, few RCTs evaluated final health outcomes over a long period of follow-up. Due to insufficient data, the safety of these devices is unclear.",2011-12-20 +25246432,Comprehensive large-scale assessment of intrinsic protein disorder.,"

Motivation

Intrinsically disordered regions are key for the function of numerous proteins. Due to the difficulties in experimental disorder characterization, many computational predictors have been developed with various disorder flavors. Their performance is generally measured on small sets mainly from experimentally solved structures, e.g. Protein Data Bank (PDB) chains. MobiDB has only recently started to collect disorder annotations from multiple experimental structures.

Results

MobiDB annotates disorder for UniProt sequences, allowing us to conduct the first large-scale assessment of fast disorder predictors on 25 833 different sequences with X-ray crystallographic structures. In addition to a comprehensive ranking of predictors, this analysis produced the following interesting observations. (i) The predictors cluster according to their disorder definition, with a consensus giving more confidence. (ii) Previous assessments appear over-reliant on data annotated at the PDB chain level and performance is lower on entire UniProt sequences. (iii) Long disordered regions are harder to predict. (iv) Depending on the structural and functional types of the proteins, differences in prediction performance of up to 10% are observed.

Availability

The datasets are available from Web site at URL: http://mobidb.bio.unipd.it/lsd.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-21 +21265623,The European radiobiological archives: online access to data from radiobiological experiments.,"For financial and ethical reasons, the large-scale radiobiological animal studies conducted over the past 50 years are, to a large extent, unrepeatable experiments. It is therefore important to retain the primary data from these experiments to allow reanalysis, reinterpretation and re-evaluation of results from, for example, carcinogenicity studies, in the light of new knowledge in radiation biology. Consequently, there is an imperative need to keep these data available for the research community. The European Radiobiological Archives (ERA) were developed to fulfill this task. ERA has become a unique archive, including information from almost all European long-term studies carried out between the 1960s and the 1990s. The legacy database was originally developed in a manner that precluded online use. Therefore, strong efforts were made to transform it into a version that is available online through the web. This went together with quality assurance measures, including first the estimation of the rate of non-systematic errors in data entry, which at 2% proved to be very low. Second, every data set was compared against two external sources of information. Standardization of terminology and histopathology is a prerequisite for meaningful comparison of data across studies and analysis of potential carcinogenic effects. Standardization is particularly critical for the construction of a database that includes data from different studies evaluated by pathologists in different laboratories. A harmonized pathology nomenclature with modern standard pathology terms was introduced. As far as possible, references for the various studies were directly linked to the studies themselves. Further, a direct link to the JANUS database was established. ERA is now in a position where it has the potential to become a worldwide radiobiological research tool. ERA can be accessed at no cost at https://era.bfs.de. An ID and password can be obtained from the curators at era@bfs.de .",2011-01-25 +23203879,BSRD: a repository for bacterial small regulatory RNA.,"In bacteria, small regulatory non-coding RNAs (sRNAs) are the most abundant class of post-transcriptional regulators. They are involved in diverse processes including quorum sensing, stress response, virulence and carbon metabolism. Recent developments in high-throughput techniques, such as genomic tiling arrays and RNA-Seq, have allowed efficient detection and characterization of bacterial sRNAs. However, a comprehensive repository to host sRNAs and their annotations is not available. Existing databases suffer from a limited number of bacterial species or sRNAs included. In addition, these databases do not have tools to integrate or analyse high-throughput sequencing data. Here, we have developed BSRD (http://kwanlab.bio.cuhk.edu.hk/BSRD), a comprehensive bacterial sRNAs database, as a repository for published bacterial sRNA sequences with annotations and expression profiles. BSRD contains over nine times more experimentally validated sRNAs than any other available databases. BSRD also provides combinatorial regulatory networks of transcription factors and sRNAs with their common targets. We have built and implemented in BSRD a novel RNA-Seq analysis platform, sRNADeep, to characterize sRNAs in large-scale transcriptome sequencing projects. We will update BSRD regularly.",2012-11-29 +21835078,Drug resistance maps to guide intermittent preventive treatment of malaria in African infants.,"Intermittent preventive treatment of infants (IPTi) with sulphadoxine pyrimethamine (SP) is recommended as an additional malaria control intervention in high transmission areas of sub-Saharan Africa, provided its protective efficacy is not compromised by SP resistance. A significant obstacle in implementing SP-IPTi, is in establishing the degree of resistance in an area. Since SP monotherapy is discontinued, no contemporary measures of in vivo efficacy can be made, so the World Health Organisation has recommended a cut-off based upon molecular markers, stating that SP-IPTi should not be implemented when the prevalence of the dhps 540E mutation among infections exceeds 50%. We created a geo-referenced database of SP resistance markers in Africa from published literature. By selecting surveys of malaria infected blood samples conducted since 2004 we have mapped the contemporary prevalence of dhps 540E. Additional maps are freely available in interactive form at http://www.drugresistancemaps.org/ipti/. Eight countries in East Africa are classified as unsuitable for SP-IPTi when data are considered at a national level. Fourteen countries in Central and West Africa were classified as suitable while seven countries had no available contemporary data to guide policy. There are clear deficiencies in molecular surveillance data coverage. We discuss requirements for ongoing surveillance of SP resistance markers in support of the use of SP-IPTi.",2011-08-11 +26017195,Ambivalent covariance models.,"

Background

Evolutionary variations let us define a set of similar nucleic acid sequences as a family if these different molecules execute a common function. Capturing their sequence variation by using e. g. position specific scoring matrices significantly improves sensitivity of detection tools. Members of a functional (non-coding) RNA family are affected by these variations not only on the sequence, but also on the structural level. For example, some transfer-RNAs exhibit a fifth helix in addition to the typical cloverleaf structure. Current covariance models - the unrivaled homology search approach for structured RNA - do not benefit from structural variation within a family, but rather penalize it. This leads to artificial subdivision of families and loss of information in the RFAM database.

Results

We propose an extension to the fundamental architecture of covariance models to allow for several, compatible consensus structures. The resulting models are called ambivalent covariance models. Evaluation on several RFAM families shows that coalescence of structural variation within a family by using ambivalent consensus models is superior to subdividing the family into multiple classical covariance models.

Conclusion

A prototype and source code is available at http://bibiserv.cebitec.uni-bielefeld.de/acms.",2015-05-28 +25734917,FASTAptamer: A Bioinformatic Toolkit for High-throughput Sequence Analysis of Combinatorial Selections.,"High-throughput sequence (HTS) analysis of combinatorial selection populations accelerates lead discovery and optimization and offers dynamic insight into selection processes. An underlying principle is that selection enriches high-fitness sequences as a fraction of the population, whereas low-fitness sequences are depleted. HTS analysis readily provides the requisite numerical information by tracking the evolutionary trajectory of individual sequences in response to selection pressures. Unlike genomic data, for which a number of software solutions exist, user-friendly tools are not readily available for the combinatorial selections field, leading many users to create custom software. FASTAptamer was designed to address the sequence-level analysis needs of the field. The open source FASTAptamer toolkit counts, normalizes and ranks read counts in a FASTQ file, compares populations for sequence distribution, generates clusters of sequence families, calculates fold-enrichment of sequences throughout the course of a selection and searches for degenerate sequence motifs. While originally designed for aptamer selections, FASTAptamer can be applied to any selection strategy that can utilize next-generation DNA sequencing, such as ribozyme or deoxyribozyme selections, in vivo mutagenesis and various surface display technologies (peptide, antibody fragment, mRNA, etc.). FASTAptamer software, sample data and a user's guide are available for download at http://burkelab.missouri.edu/fastaptamer.html.",2015-03-03 +24322294,SIOMICS: a novel approach for systematic identification of motifs in ChIP-seq data.,"The identification of transcription factor binding motifs is important for the study of gene transcriptional regulation. The chromatin immunoprecipitation (ChIP), followed by massive parallel sequencing (ChIP-seq) experiments, provides an unprecedented opportunity to discover binding motifs. Computational methods have been developed to identify motifs from ChIP-seq data, while at the same time encountering several problems. For example, existing methods are often not scalable to the large number of sequences obtained from ChIP-seq peak regions. Some methods heavily rely on well-annotated motifs even though the number of known motifs is limited. To simplify the problem, de novo motif discovery methods often neglect underrepresented motifs in ChIP-seq peak regions. To address these issues, we developed a novel approach called SIOMICS to de novo discover motifs from ChIP-seq data. Tested on 13 ChIP-seq data sets, SIOMICS identified motifs of many known and new cofactors. Tested on 13 simulated random data sets, SIOMICS discovered no motif in any data set. Compared with two recently developed methods for motif discovery, SIOMICS shows advantages in terms of speed, the number of known cofactor motifs predicted in experimental data sets and the number of false motifs predicted in random data sets. The SIOMICS software is freely available at http://eecs.ucf.edu/∼xiaoman/SIOMICS/SIOMICS.html.",2013-12-09 +26217778,A 2-D guinea pig lung proteome map.,"Guinea pigs represent an important model for a number of infectious and non-infectious pulmonary diseases. The guinea pig genome has recently been sequenced to full coverage, opening up new research avenues using genomics, transcriptomics and proteomics techniques in this species. In order to further annotate the guinea pig genome and to facilitate future pulmonary proteomics in this species we constructed a 2-D guinea pig proteome map including 486 protein identifications and post translational modifications (PTMs). The map has been up-loaded to the UCD 2D-PAGE open access database (http://proteomics-portal.ucd.ie/). Transit peptides, N-terminal acetylations and other PTMs are available via Peptideatlas (ftp://PASS00619:NM455hi@ftp.peptideatlas.org/). This dataset is associated with a research article published in the Journal of Proteomics [1].",2015-05-27 +23193284,PTMcode: a database of known and predicted functional associations between post-translational modifications in proteins.,"Post-translational modifications (PTMs) are involved in the regulation and structural stabilization of eukaryotic proteins. The combination of individual PTM states is a key to modulate cellular functions as became evident in a few well-studied proteins. This combinatorial setting, dubbed the PTM code, has been proposed to be extended to whole proteomes in eukaryotes. Although we are still far from deciphering such a complex language, thousands of protein PTM sites are being mapped by high-throughput technologies, thus providing sufficient data for comparative analysis. PTMcode (http://ptmcode.embl.de) aims to compile known and predicted PTM associations to provide a framework that would enable hypothesis-driven experimental or computational analysis of various scales. In its first release, PTMcode provides PTM functional associations of 13 different PTM types within proteins in 8 eukaryotes. They are based on five evidence channels: a literature survey, residue co-evolution, structural proximity, PTMs at the same residue and location within PTM highly enriched protein regions (hotspots). PTMcode is presented as a protein-based searchable database with an interactive web interface providing the context of the co-regulation of nearly 75 000 residues in >10 000 proteins.",2012-11-28 +22096232,The BioSample Database (BioSD) at the European Bioinformatics Institute.,"The BioSample Database (http://www.ebi.ac.uk/biosamples) is a new database at EBI that stores information about biological samples used in molecular experiments, such as sequencing, gene expression or proteomics. The goals of the BioSample Database include: (i) recording and linking of sample information consistently within EBI databases such as ENA, ArrayExpress and PRIDE; (ii) minimizing data entry efforts for EBI database submitters by enabling submitting sample descriptions once and referencing them later in data submissions to assay databases and (iii) supporting cross database queries by sample characteristics. Each sample in the database is assigned an accession number. The database includes a growing set of reference samples, such as cell lines, which are repeatedly used in experiments and can be easily referenced from any database by their accession numbers. Accession numbers for the reference samples will be exchanged with a similar database at NCBI. The samples in the database can be queried by their attributes, such as sample types, disease names or sample providers. A simple tab-delimited format facilitates submissions of sample information to the database, initially via email to biosamples@ebi.ac.uk.",2011-11-16 +24098076,NEIMiner: nanomaterial environmental impact data miner.,"As more engineered nanomaterials (eNM) are developed for a wide range of applications, it is crucial to minimize any unintended environmental impacts resulting from the application of eNM. To realize this vision, industry and policymakers must base risk management decisions on sound scientific information about the environmental fate of eNM, their availability to receptor organisms (eg, uptake), and any resultant biological effects (eg, toxicity). To address this critical need, we developed a model-driven, data mining system called NEIMiner, to study nanomaterial environmental impact (NEI). NEIMiner consists of four components: NEI modeling framework, data integration, data management and access, and model building. The NEI modeling framework defines the scope of NEI modeling and the strategy of integrating NEI models to form a layered, comprehensive predictability. The data integration layer brings together heterogeneous data sources related to NEI via automatic web services and web scraping technologies. The data management and access layer reuses and extends a popular content management system (CMS), Drupal, and consists of modules that model the complex data structure for NEI-related bibliography and characterization data. The model building layer provides an advanced analysis capability for NEI data. Together, these components provide significant value to the process of aggregating and analyzing large-scale distributed NEI data. A prototype of the NEIMiner system is available at http://neiminer.i-a-i.com/.",2013-09-18 +27381058,"Drug use and its associated factors among money boys in Hunan Province, China.","

Objectives

To describe drug use, types of drugs and related factors among money boys in Hunan Province, China.

Study design

A cross-sectional study was conducted between July 2012 and January 2013.

Methods

Based on respondent-driven sampling, researchers located seven 'seeds' via a gay-dating website: http://www.ixxqy.org. After three waves of recruitment, 234 money boys were enrolled. They were asked to complete a 23-item questionnaire regarding demographic characteristics, drug use, a history of human immunodeficiency virus infection and family environment. Descriptive statistics and logistic regression analysis were conducted using Statistical Package for the Social Sciences Version 20.0.

Results

In total, 205 valid questionnaires were collected. Based on the data collected, 80 (39.0%) money boys had used drugs within the last 3 months. Rush popper (36.6%) and methamphetamine (12.7%) were used most commonly, and other drugs used were ecstasy (7.8%), ketamine (5.9%), marijuana (2.4%), morphine (1.5%), heroin (1.0%) and cocaine (0.5%). Factors included in the logistic regression were length of service (odds ratio [OR] 0.395, 95% confidence interval [CI] 0.175-0.896), being an only child (OR 2.272, 95% CI 1.108-4.659), relationship between parents (OR 0.428, 95% CI 0.213-0.858) and social network (OR 2.387, 95% CI 1.144-4.970). A shorter length of service and a good relationship between parents were protective factors against drug use, while being an only child and having a wide social network were risk factors.

Conclusion

Drug use is common among money boys. This study found that length of service, being an only child, relationship between parents and social network are associated with drug use.",2016-07-02 +25481008,flowCL: ontology-based cell population labelling in flow cytometry.,"

Motivation

Finding one or more cell populations of interest, such as those correlating to a specific disease, is critical when analysing flow cytometry data. However, labelling of cell populations is not well defined, making it difficult to integrate the output of algorithms to external knowledge sources.

Results

We developed flowCL, a software package that performs semantic labelling of cell populations based on their surface markers and applied it to labelling of the Federation of Clinical Immunology Societies Human Immunology Project Consortium lyoplate populations as a use case.

Conclusion

By providing automated labelling of cell populations based on their immunophenotype, flowCL allows for unambiguous and reproducible identification of standardized cell types.

Availability and implementation

Code, R script and documentation are available under the Artistic 2.0 license through Bioconductor (http://www.bioconductor.org/packages/devel/bioc/html/flowCL.html).

Contact

rbrinkman@bccrc.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-06 +27307636,DFLpred: High-throughput prediction of disordered flexible linker regions in protein sequences.,"

Motivation

Disordered flexible linkers (DFLs) are disordered regions that serve as flexible linkers/spacers in multi-domain proteins or between structured constituents in domains. They are different from flexible linkers/residues because they are disordered and longer. Availability of experimentally annotated DFLs provides an opportunity to build high-throughput computational predictors of these regions from protein sequences. To date, there are no computational methods that directly predict DFLs and they can be found only indirectly by filtering predicted flexible residues with predictions of disorder.

Results

We conceptualized, developed and empirically assessed a first-of-its-kind sequence-based predictor of DFLs, DFLpred. This method outputs propensity to form DFLs for each residue in the input sequence. DFLpred uses a small set of empirically selected features that quantify propensities to form certain secondary structures, disordered regions and structured regions, which are processed by a fast linear model. Our high-throughput predictor can be used on the whole-proteome scale; it needs <1 h to predict entire proteome on a single CPU. When assessed on an independent test dataset with low sequence-identity proteins, it secures area under the receiver operating characteristic curve equal 0.715 and outperforms existing alternatives that include methods for the prediction of flexible linkers, flexible residues, intrinsically disordered residues and various combinations of these methods. Prediction on the complete human proteome reveals that about 10% of proteins have a large content of over 30% DFL residues. We also estimate that about 6000 DFL regions are long with ≥30 consecutive residues.

Availability and implementation

http://biomine.ece.ualberta.ca/DFLpred/

Contact

lkurgan@vcu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +22144684,Description and analysis of genetic variants in French hereditary breast and ovarian cancer families recorded in the UMD-BRCA1/BRCA2 databases.,"BRCA1 and BRCA2 are the two main genes responsible for predisposition to breast and ovarian cancers, as a result of protein-inactivating monoallelic mutations. It remains to be established whether many of the variants identified in these two genes, so-called unclassified/unknown variants (UVs), contribute to the disease phenotype or are simply neutral variants (or polymorphisms). Given the clinical importance of establishing their status, a nationwide effort to annotate these UVs was launched by laboratories belonging to the French GGC consortium (Groupe Génétique et Cancer), leading to the creation of the UMD-BRCA1/BRCA2 databases (http://www.umd.be/BRCA1/ and http://www.umd.be/BRCA2/). These databases have been endorsed by the French National Cancer Institute (INCa) and are designed to collect all variants detected in France, whether causal, neutral or UV. They differ from other BRCA databases in that they contain co-occurrence data for all variants. Using these data, the GGC French consortium has been able to classify certain UVs also contained in other databases. In this article, we report some novel UVs not contained in the BIC database and explore their impact in cancer predisposition based on a structural approach.",2011-12-05 +25911996,Comprehensive transcriptomic analysis of molecularly targeted drugs in cancer for target pathway evaluation.,"Targeted therapy is a rational and promising strategy for the treatment of advanced cancer. For the development of clinical agents targeting oncogenic signaling pathways, it is important to define the specificity of compounds to the target molecular pathway. Genome-wide transcriptomic analysis is an unbiased approach to evaluate the compound mode of action, but it is still unknown whether the analysis could be widely applicable to classify molecularly targeted anticancer agents. We comprehensively obtained and analyzed 129 transcriptomic datasets of cancer cells treated with 83 anticancer drugs or related agents, covering most clinically used, molecularly targeted drugs alongside promising inhibitors of molecular cancer targets. Hierarchical clustering and principal component analysis revealed that compounds targeting similar target molecules or pathways were clustered together. These results confirmed that the gene signatures of these drugs reflected their modes of action. Of note, inhibitors of oncogenic kinase pathways formed a large unique cluster, showing that these agents affect a shared molecular pathway distinct from classical antitumor agents and other classes of agents. The gene signature analysis further classified kinome-targeting agents depending on their target signaling pathways, and we identified target pathway-selective signature gene sets. The gene expression analysis was also valuable in uncovering unexpected target pathways of some anticancer agents. These results indicate that comprehensive transcriptomic analysis with our database (http://scads.jfcr.or.jp/db/cs/) is a powerful strategy to validate and re-evaluate the target pathways of anticancer compounds.",2015-05-25 +28048842,SU-F-R-18: Updates to the Computational Environment for Radiological Research for Image Analysis.,"To present new tools in CERR for Texture Analysis and Visualization.(1) Quantitative Image Analysis: We added the ability to compute Haralick texture features based on local neighbourhood. The Texture features depend on many parameters used in their derivation. For example: (a) directionality, (b) quantization of image, (c) patch-size for the neighborhood, (d) handling of the edge voxels within the region of interest, (e) Averaging co-occurance matrix vs texture features for different directions etc. A graphical user interface was built to set these parameters and then visualize their impact on the resulting texture maps. The entire functionality was written in Matlab. Array indexing was used to speed up the texture calculation. The computation speed is very competitive with the ITK library. Moreover, our implementation works with multiple CPUs and the computation time can be further reduced by using multiple processor threads. In order to reduce the Haralick texture maps into scalar features, we propose the use of Texture Volume Histograms. This lets users make use of the entire distribution of texture values within the region of interest rather than using just the mean and the standard deviations. (2) Qualitative/Visualization tools: The derived texture maps are stored as a new scan (derived) within CERR's planC data structure. A display that compares various scans was built to show the raw image and the derived texture maps side-by-side. These images are positionally linked and can be navigated together. CERR's graphics handling was updated and sped-up to be compatible with the newer Matlab versions. As a result, the users can use (a) different window levels and colormaps for different viewports, (b) click-and-drag or use mouse scroll-wheel to navigate slices.The new features and updates are available via https://www.github.com/adityaapte/cerr.Features added to CERR increase its utility in Radiomics and Outcomes modeling.",2016-06-01 +25954459,Scalable and High-Throughput Execution of Clinical Quality Measures from Electronic Health Records using MapReduce and the JBoss® Drools Engine.,"Automated execution of electronic Clinical Quality Measures (eCQMs) from electronic health records (EHRs) on large patient populations remains a significant challenge, and the testability, interoperability, and scalability of measure execution are critical. The High Throughput Phenotyping (HTP; http://phenotypeportal.org) project aligns with these goals by using the standards-based HL7 Health Quality Measures Format (HQMF) and Quality Data Model (QDM) for measure specification, as well as Common Terminology Services 2 (CTS2) for semantic interpretation. The HQMF/QDM representation is automatically transformed into a JBoss(®) Drools workflow, enabling horizontal scalability via clustering and MapReduce algorithms. Using Project Cypress, automated verification metrics can then be produced. Our results show linear scalability for nine executed 2014 Center for Medicare and Medicaid Services (CMS) eCQMs for eligible professionals and hospitals for >1,000,000 patients, and verified execution correctness of 96.4% based on Project Cypress test data of 58 eCQMs.",2014-11-14 +25150249,Inter-species pathway perturbation prediction via data-driven detection of functional homology.,"

Motivation

Experiments in animal models are often conducted to infer how humans will respond to stimuli by assuming that the same biological pathways will be affected in both organisms. The limitations of this assumption were tested in the IMPROVER Species Translation Challenge, where 52 stimuli were applied to both human and rat cells and perturbed pathways were identified. In the Inter-species Pathway Perturbation Prediction sub-challenge, multiple teams proposed methods to use rat transcription data from 26 stimuli to predict human gene set and pathway activity under the same perturbations. Submissions were evaluated using three performance metrics on data from the remaining 26 stimuli.

Results

We present two approaches, ranked second in this challenge, that do not rely on sequence-based orthology between rat and human genes to translate pathway perturbation state but instead identify transcriptional response orthologs across a set of training conditions. The translation from rat to human accomplished by these so-called direct methods is not dependent on the particular analysis method used to identify perturbed gene sets. In contrast, machine learning-based methods require performing a pathway analysis initially and then mapping the pathway activity between organisms. Unlike most machine learning approaches, direct methods can be used to predict the activation of a human pathway for a new (test) stimuli, even when that pathway was never activated by a training stimuli.

Availability

Gene expression data are available from ArrayExpress (accession E-MTAB-2091), while software implementations are available from http://bioinformaticsprb.med.wayne.edu?p=50 and http://goo.gl/hJny3h.

Contact

christoph.hafemeister@nyu.edu or atarca@med.wayne.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-08-22 +27307635,CMsearch: simultaneous exploration of protein sequence space and structure space improves not only protein homology detection but also protein structure prediction.,"

Motivation

Protein homology detection, a fundamental problem in computational biology, is an indispensable step toward predicting protein structures and understanding protein functions. Despite the advances in recent decades on sequence alignment, threading and alignment-free methods, protein homology detection remains a challenging open problem. Recently, network methods that try to find transitive paths in the protein structure space demonstrate the importance of incorporating network information of the structure space. Yet, current methods merge the sequence space and the structure space into a single space, and thus introduce inconsistency in combining different sources of information.

Method

We present a novel network-based protein homology detection method, CMsearch, based on cross-modal learning. Instead of exploring a single network built from the mixture of sequence and structure space information, CMsearch builds two separate networks to represent the sequence space and the structure space. It then learns sequence-structure correlation by simultaneously taking sequence information, structure information, sequence space information and structure space information into consideration.

Results

We tested CMsearch on two challenging tasks, protein homology detection and protein structure prediction, by querying all 8332 PDB40 proteins. Our results demonstrate that CMsearch is insensitive to the similarity metrics used to define the sequence and the structure spaces. By using HMM-HMM alignment as the sequence similarity metric, CMsearch clearly outperforms state-of-the-art homology detection methods and the CASP-winning template-based protein structure prediction methods.

Availability and implementation

Our program is freely available for download from http://sfb.kaust.edu.sa/Pages/Software.aspx

Contact

: xin.gao@kaust.edu.sa

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +23837716,"KONAGAbase: a genomic and transcriptomic database for the diamondback moth, Plutella xylostella.","

Background

The diamondback moth (DBM), Plutella xylostella, is one of the most harmful insect pests for crucifer crops worldwide. DBM has rapidly evolved high resistance to most conventional insecticides such as pyrethroids, organophosphates, fipronil, spinosad, Bacillus thuringiensis, and diamides. Therefore, it is important to develop genomic and transcriptomic DBM resources for analysis of genes related to insecticide resistance, both to clarify the mechanism of resistance of DBM and to facilitate the development of insecticides with a novel mode of action for more effective and environmentally less harmful insecticide rotation. To contribute to this goal, we developed KONAGAbase, a genomic and transcriptomic database for DBM (KONAGA is the Japanese word for DBM).

Description

KONAGAbase provides (1) transcriptomic sequences of 37,340 ESTs/mRNAs and 147,370 RNA-seq contigs which were clustered and assembled into 84,570 unigenes (30,695 contigs, 50,548 pseudo singletons, and 3,327 singletons); and (2) genomic sequences of 88,530 WGS contigs with 246,244 degenerate contigs and 106,455 singletons from which 6,310 de novo identified repeat sequences and 34,890 predicted gene-coding sequences were extracted. The unigenes and predicted gene-coding sequences were clustered and 32,800 representative sequences were extracted as a comprehensive putative gene set. These sequences were annotated with BLAST descriptions, Gene Ontology (GO) terms, and Pfam descriptions, respectively. KONAGAbase contains rich graphical user interface (GUI)-based web interfaces for easy and efficient searching, browsing, and downloading sequences and annotation data. Five useful search interfaces consisting of BLAST search, keyword search, BLAST result-based search, GO tree-based search, and genome browser are provided. KONAGAbase is publicly available from our website (http://dbm.dna.affrc.go.jp/px/) through standard web browsers.

Conclusions

KONAGAbase provides DBM comprehensive transcriptomic and draft genomic sequences with useful annotation information with easy-to-use web interfaces, which helps researchers to efficiently search for target sequences such as insect resistance-related genes. KONAGAbase will be continuously updated and additional genomic/transcriptomic resources and analysis tools will be provided for further efficient analysis of the mechanism of insecticide resistance and the development of effective insecticides with a novel mode of action for DBM.",2013-07-09 +26697359,Transcriptome analysis of Streptococcus pneumoniae D39 in the presence of cobalt.,"Cobalt (Co(2 +)) is an important transition metal ion that plays a vital role in cellular physiology of bacteria. The role of Co(2 +) in the regulation of several genes/operons in Streptococcus pneumoniae has recently been reported [1]. The data described in this article relate to the genome-wide transcriptional profiling of Streptococcus pneumoniae D39, either in the presence or absence of 0.5 mM Co(2 +) in chemically defined medium (CDM) using DNA microarray analysis. Genes belonging to a broad range of cellular processes such as virulence, transport and efflux systems, stress response and surface attachment were differentially expressed in the presence of Co(2 +). We used transcriptional lacZ assays and electrophoretic mobility shift assays (EMSAs) to confirm our results [1]. The dataset is publicly available at the Gene Expression Omnibus (GEO) repository (http://www.ncbi.nlm.nih.gov/geo/) with accession number GSE57696.",2015-09-08 +23406793,"T-HOD: a literature-based candidate gene database for hypertension, obesity and diabetes.","Researchers are finding it more and more difficult to follow the changing status of disease candidate genes due to the exponential increase in gene mapping studies. The Text-mined Hypertension, Obesity and Diabetes candidate gene database (T-HOD) is developed to help trace existing research on three kinds of cardiovascular diseases: hypertension, obesity and diabetes, with the last disease categorized into Type 1 and Type 2, by regularly and semiautomatically extracting HOD-related genes from newly published literature. Currently, there are 837, 835 and 821 candidate genes recorded in T-HOD for hypertension, obesity and diabetes, respectively. T-HOD employed the state-of-art text-mining technologies, including a gene/disease identification system and a disease-gene relation extraction system, which can be used to affirm the association of genes with three diseases and provide more evidence for further studies. The primary inputs of T-HOD are the three kinds of diseases, and the output is a list of disease-related genes that can be ranked based on their number of appearance, protein-protein interactions and single-nucleotide polymorphisms. Unlike manually constructed disease gene databases, the content of T-HOD is regularly updated by our text-mining system and verified by domain experts. The interface of T-HOD facilitates easy browsing for users and allows T-HOD curators to verify data efficiently. We believe that T-HOD can help life scientists in search for more disease candidate genes in a less time- and effort-consuming manner. Database URL: http://bws.iis.sinica.edu.tw/THOD.",2013-02-12 +23180789,The ChEBI reference database and ontology for biologically relevant chemistry: enhancements for 2013.,"ChEBI (http://www.ebi.ac.uk/chebi) is a database and ontology of chemical entities of biological interest. Over the past few years, ChEBI has continued to grow steadily in content, and has added several new features. In addition to incorporating all user-requested compounds, our annotation efforts have emphasized immunology, natural products and metabolites in many species. All database entries are now 'is_a' classified within the ontology, meaning that all of the chemicals are available to semantic reasoning tools that harness the classification hierarchy. We have completely aligned the ontology with the Open Biomedical Ontologies (OBO) Foundry-recommended upper level Basic Formal Ontology. Furthermore, we have aligned our chemical classification with the classification of chemical-involving processes in the Gene Ontology (GO), and as a result of this effort, the majority of chemical-involving processes in GO are now defined in terms of the ChEBI entities that participate in them. This effort necessitated incorporating many additional biologically relevant compounds. We have incorporated additional data types including reference citations, and the species and component for metabolites. Finally, our website and web services have had several enhancements, most notably the provision of a dynamic new interactive graph-based ontology visualization.",2012-11-24 +28698795,Semi-automated Modular Program Constructor for physiological modeling: Building cell and organ models.,"The Modular Program Constructor (MPC) is an open-source Java based modeling utility, built upon JSim's Mathematical Modeling Language (MML) ( http://www.physiome.org/jsim/) that uses directives embedded in model code to construct larger, more complicated models quickly and with less error than manually combining models. A major obstacle in writing complex models for physiological processes is the large amount of time it takes to model the myriad processes taking place simultaneously in cells, tissues, and organs. MPC replaces this task with code-generating algorithms that take model code from several different existing models and produce model code for a new JSim model. This is particularly useful during multi-scale model development where many variants are to be configured and tested against data. MPC encodes and preserves information about how a model is built from its simpler model modules, allowing the researcher to quickly substitute or update modules for hypothesis testing. MPC is implemented in Java and requires JSim to use its output. MPC source code and documentation are available at http://www.physiome.org/software/MPC/.",2015-12-16 +26674530,Prediction of Spontaneous Protein Deamidation from Sequence-Derived Secondary Structure and Intrinsic Disorder.,"Asparagine residues in proteins undergo spontaneous deamidation, a post-translational modification that may act as a molecular clock for the regulation of protein function and turnover. Asparagine deamidation is modulated by protein local sequence, secondary structure and hydrogen bonding. We present NGOME, an algorithm able to predict non-enzymatic deamidation of internal asparagine residues in proteins in the absence of structural data, using sequence-based predictions of secondary structure and intrinsic disorder. Compared to previous algorithms, NGOME does not require three-dimensional structures yet yields better predictions than available sequence-only methods. Four case studies of specific proteins show how NGOME may help the user identify deamidation-prone asparagine residues, often related to protein gain of function, protein degradation or protein misfolding in pathological processes. A fifth case study applies NGOME at a proteomic scale and unveils a correlation between asparagine deamidation and protein degradation in yeast. NGOME is freely available as a webserver at the National EMBnet node Argentina, URL: http://www.embnet.qb.fcen.uba.ar/ in the subpage ""Protein and nucleic acid structure and sequence analysis"".",2015-12-16 +26827622,A knowledgebase of the human Alu repetitive elements.,"Alu elements are the most abundant retrotransposons in the human genome with more than one million copies. Alu repeats have been reported to participate in multiple processes related with genome regulation and compartmentalization. Moreover, they have been involved in the facilitation of pathological mutations in many diseases, including cancer. The contribution of Alus and other repeats in genomic regulation is often overlooked because their study poses technical and analytical challenges hardly attainable with conventional strategies. Here we propose the integration of ontology-based semantic methods to query a knowledgebase for the human Alus. The knowledgebase for the human Alus leverages Sequence (SO) and Gene Ontologies (GO) and is devoted to address functional and genetic information in the genomic context of the Alus. For each Alu element, the closest gene and transcript are stored, as well their functional annotation according to GO, the state of the chromatin and the transcription factors binding sites inside the Alu. The model uses Web Ontology Language (OWL) and Semantic Web Rule Language (SWRL). As a case of use and to illustrate the utility of the tool, we have evaluated the epigenetic states of Alu repeats associated with gene promoters according to their transcriptional activity. The ontology is easily extendable, offering a scaffold for the inclusion of new experimental data. The RDF/XML formalization is freely available at http://aluontology.sourceforge.net/.",2016-01-28 +24215023,RNAseqViewer: visualization tool for RNA-Seq data.,"

Summary

With the advances of RNA sequencing technologies, scientists need new tools to analyze transcriptome data. We introduce RNAseqViewer, a new visualization tool dedicated to RNA-Seq data. The program offers innovative ways to represent transcriptome data for single or multiple samples. It is a handy tool for scientists who use RNA-Seq data to compare multiple transcriptomes, for example, to compare gene expression and alternative splicing of cancer samples or of different development stages.

Availability and implementation

RNAseqViewer is freely available for academic use at http://bioinfo.au.tsinghua.edu.cn/software/RNAseqViewer/

Contact

zhangxg@tsinghua.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-11-08 +23590708,"BMP4, a strong better prognosis predictor, has a subtype preference and cell development association in gliomas.","

Background

The bone morphogenetic family proteins (BMP) are phytogenetically conserved proteins, which are essential for embryonic development. The key regulatory subunit, the bone morphogenetic protein 4 (BMP4), is overexpressed and associated with tumor metastasis in a variety of cancers. However, the prognostic and molecular features of gliomas with BMP4 expression is still unclear.

Methods

We obtained whole genome mRNA expression microarray data of 220 glioma samples of all grades from Chinese Glioma Genome Atlas (CGGA) database (http://www.cgga.org.cn) as discovery set. Of the 123 high-grade gliomas in this set, 33 Grade III tumors and 88 GBMs were analyzed by Kaplan-Meier method. Immunohistochemistry was used for validating the expression of BMP4 in another 77 glioma samples. Three additional datasets were obtained as validation sets. Gene ontology (GO) analysis and gene set variation analysis (GSVA) were used for functional annotation of BMP4.

Results

In the discovery set, BMP4 overexpression was significantly associated with low grade as well as the lower mortality of high-grade gliomas in survival analysis (log-rank, p<0.05 in GBM patients and p<0.01 in anaplastic gliomas, respectively). BMP4 also showed a Proneural subtype, G1 subtype and Isocitrate Dehydrogenase 1 (IDH1) mutation preference and cell development association. The results of validation 4 datasets showed similar findings. The overexpression of BMP4 was also detected in low grade gliomas compared to the high grade ones by immunohistochemistry (p<0.05, chi-square test).

Conclusion

BMP4 expression was independently associated with grade and good prognosis in grade III and grade IV gliomas, suggesting BMP4 as a novel biomarker with potential important therapeutic implications.",2013-04-16 +22075991,The Gene Wiki in 2011: community intelligence applied to human gene annotation.,"The Gene Wiki is an open-access and openly editable collection of Wikipedia articles about human genes. Initiated in 2008, it has grown to include articles about more than 10,000 genes that, collectively, contain more than 1.4 million words of gene-centric text with extensive citations back to the primary scientific literature. This growing body of useful, gene-centric content is the result of the work of thousands of individuals throughout the scientific community. Here, we describe recent improvements to the automated system that keeps the structured data presented on Gene Wiki articles in sync with the data from trusted primary databases. We also describe the expanding contents, editors and users of the Gene Wiki. Finally, we introduce a new automated system, called WikiTrust, which can effectively compute the quality of Wikipedia articles, including Gene Wiki articles, at the word level. All articles in the Gene Wiki can be freely accessed and edited at Wikipedia, and additional links and information can be found at the project's Wikipedia portal page: http://en.wikipedia.org/wiki/Portal:Gene_Wiki.",2011-11-10 +23497320,Deep sequencing for de novo construction of a marine fish (Sparus aurata) transcriptome database with a large coverage of protein-coding transcripts.,"

Background

The gilthead sea bream (Sparus aurata) is the main fish species cultured in the Mediterranean area and constitutes an interesting model of research. Nevertheless, transcriptomic and genomic data are still scarce for this highly valuable species. A transcriptome database was constructed by de novo assembly of gilthead sea bream sequences derived from public repositories of mRNA and collections of expressed sequence tags together with new high-quality reads from five cDNA 454 normalized libraries of skeletal muscle (1), intestine (1), head kidney (2) and blood (1).

Results

Sequencing of the new 454 normalized libraries produced 2,945,914 high-quality reads and the de novo global assembly yielded 125,263 unique sequences with an average length of 727 nt. Blast analysis directed to protein and nucleotide databases annotated 63,880 sequences encoding for 21,384 gene descriptions, that were curated for redundancies and frameshifting at the homopolymer regions of open reading frames, and hosted at http://www.nutrigroup-iats.org/seabreamdb. Among the annotated gene descriptions, 16,177 were mapped in the Ingenuity Pathway Analysis (IPA) database, and 10,899 were eligible for functional analysis with a representation in 341 out of 372 IPA canonical pathways. The high representation of randomly selected stickleback transcripts by Blast search in the nucleotide gilthead sea bream database evidenced its high coverage of protein-coding transcripts.

Conclusions

The newly assembled gilthead sea bream transcriptome represents a progress in genomic resources for this species, as it probably contains more than 75% of actively transcribed genes, constituting a valuable tool to assist studies on functional genomics and future genome projects.",2013-03-15 +23044546,miR-EdiTar: a database of predicted A-to-I edited miRNA target sites.,"

Motivation

A-to-I RNA editing is an important mechanism that consists of the conversion of specific adenosines into inosines in RNA molecules. Its dysregulation has been associated to several human diseases including cancer. Recent work has demonstrated a role for A-to-I editing in microRNA (miRNA)-mediated gene expression regulation. In fact, edited forms of mature miRNAs can target sets of genes that differ from the targets of their unedited forms. The specific deamination of mRNAs can generate novel binding sites in addition to potentially altering existing ones.

Results

This work presents miR-EdiTar, a database of predicted A-to-I edited miRNA binding sites. The database contains predicted miRNA binding sites that could be affected by A-to-I editing and sites that could become miRNA binding sites as a result of A-to-I editing.

Availability

miR-EdiTar is freely available online at http://microrna.osumc.edu/mireditar.

Contact

alessandro.lagana@osumc.edu or carlo.croce@osumc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-07 +23586463,Enrichr: interactive and collaborative HTML5 gene list enrichment analysis tool.,"

Background

System-wide profiling of genes and proteins in mammalian cells produce lists of differentially expressed genes/proteins that need to be further analyzed for their collective functions in order to extract new knowledge. Once unbiased lists of genes or proteins are generated from such experiments, these lists are used as input for computing enrichment with existing lists created from prior knowledge organized into gene-set libraries. While many enrichment analysis tools and gene-set libraries databases have been developed, there is still room for improvement.

Results

Here, we present Enrichr, an integrative web-based and mobile software application that includes new gene-set libraries, an alternative approach to rank enriched terms, and various interactive visualization approaches to display enrichment results using the JavaScript library, Data Driven Documents (D3). The software can also be embedded into any tool that performs gene list analysis. We applied Enrichr to analyze nine cancer cell lines by comparing their enrichment signatures to the enrichment signatures of matched normal tissues. We observed a common pattern of up regulation of the polycomb group PRC2 and enrichment for the histone mark H3K27me3 in many cancer cell lines, as well as alterations in Toll-like receptor and interlukin signaling in K562 cells when compared with normal myeloid CD33+ cells. Such analyses provide global visualization of critical differences between normal tissues and cancer cell lines but can be applied to many other scenarios.

Conclusions

Enrichr is an easy to use intuitive enrichment analysis web-based tool providing various types of visualization summaries of collective functions of gene lists. Enrichr is open source and freely available online at: http://amp.pharm.mssm.edu/Enrichr.",2013-04-15 +27023833,"Travel-Associated Zika Virus Disease Cases Among U.S. Residents--United States, January 2015-February 2016.","Zika virus is an emerging mosquito-borne flavivirus. Recent outbreaks of Zika virus disease in the Pacific Islands and the Region of the Americas have identified new modes of transmission and clinical manifestations, including adverse pregnancy outcomes. However, data on the epidemiology and clinical findings of laboratory-confirmed Zika virus disease remain limited. During January 1, 2015-February 26, 2016, a total of 116 residents of 33 U.S. states and the District of Columbia had laboratory evidence of recent Zika virus infection based on testing performed at CDC. Cases include one congenital infection and 115 persons who reported recent travel to areas with active Zika virus transmission (n = 110) or sexual contact with such a traveler (n = 5). All 115 patients had clinical illness, with the most common signs and symptoms being rash (98%; n = 113), fever (82%; 94), and arthralgia (66%; 76). Health care providers should educate patients, particularly pregnant women, about the risks for, and measures to prevent, infection with Zika virus and other mosquito-borne viruses. Zika virus disease should be considered in patients with acute onset of fever, rash, arthralgia, or conjunctivitis, who traveled to areas with ongoing Zika virus transmission (http://www.cdc.gov/zika/geo/index.html) or who had unprotected sex with a person who traveled to one of those areas and developed compatible symptoms within 2 weeks of returning.",2016-03-25 +25765651,Analysis of strand-specific RNA-seq data using machine learning reveals the structures of transcription units in Clostridium thermocellum.,"Identification of transcription units (TUs) encoded in a bacterial genome is essential to elucidation of transcriptional regulation of the organism. To gain a detailed understanding of the dynamically composed TU structures, we have used four strand-specific RNA-seq (ssRNA-seq) datasets collected under two experimental conditions to derive the genomic TU organization of Clostridium thermocellum using a machine-learning approach. Our method accurately predicted the genomic boundaries of individual TUs based on two sets of parameters measuring the RNA-seq expression patterns across the genome: expression-level continuity and variance. A total of 2590 distinct TUs are predicted based on the four RNA-seq datasets. Among the predicted TUs, 44% have multiple genes. We assessed our prediction method on an independent set of RNA-seq data with longer reads. The evaluation confirmed the high quality of the predicted TUs. Functional enrichment analyses on a selected subset of the predicted TUs revealed interesting biology. To demonstrate the generality of the prediction method, we have also applied the method to RNA-seq data collected on Escherichia coli and achieved high prediction accuracies. The TU prediction program named SeqTU is publicly available at https://code.google.com/p/seqtu/. We expect that the predicted TUs can serve as the baseline information for studying transcriptional and post-transcriptional regulation in C. thermocellum and other bacteria.",2015-03-12 +23676619,Global detection and identification of developmental stage specific transcripts in mouse brain using subtractive cross-screening algorithm.,"

Background

Pre-mRNA splicing is a crucial step for genetic regulation and accounts largely for downstream translational diversity. The current time of biological research is characterized by advances in functional genomics study and the understanding of the pre-mRNA splicing process has thus become a major portal for biologists to gain insights into the complex gene regulatory mechanism. The intranuclear alternative splicing process can form a variety of genomic transcripts that modulate the growth and development of an organism, particularly in the immune and neural systems.

Methods

In the current study, we investigated and identified alternative splicing transcripts at different stages of embryonic mouse brain morphogenesis using subtractive cross-screening algorithm.

Results

A total of 195 candidate transcripts were found during organogenesis; 1629 identified at fetus stage, 116 in juvenile and 148 transcripts from adulthood. To document our findings, we developed a database named DMBAS, which can be accessed through the link: http://173.234.48.5/DMBAS. We further investigated the alternative splicing products obtained in our experiment and noted the existence of chromosome preference between prenatal and postnatal transcripts. Additionally, the distribution of splicing sites and the splicing types were found to have distinct genomic features at varying stages of brain development. The majority of identified alternative splices (72.3%) at fetus stage were confirmed later using separate RNA-seq data sets.

Conclusion

This study is a comprehensive profiling of alternative splicing transcripts of mouse brain morphogenesis using advanced computational algorithm. A series of developmental stage specific transcripts, as well as their splicing sites and chromosome preferences were revealed in the current study. Our findings and the related online database would form a solid foundation for studies of broader biological significance and paved the way for future investigations in relevant human brain diseases.",2013-05-12 +26111046,Region-Based Association Test for Familial Data under Functional Linear Models.,"Region-based association analysis is a more powerful tool for gene mapping than testing of individual genetic variants, particularly for rare genetic variants. The most powerful methods for regional mapping are based on the functional data analysis approach, which assumes that the regional genome of an individual may be considered as a continuous stochastic function that contains information about both linkage and linkage disequilibrium. Here, we extend this powerful approach, earlier applied only to independent samples, to the samples of related individuals. To this end, we additionally include a random polygene effects in functional linear model used for testing association between quantitative traits and multiple genetic variants in the region. We compare the statistical power of different methods using Genetic Analysis Workshop 17 mini-exome family data and a wide range of simulation scenarios. Our method increases the power of regional association analysis of quantitative traits compared with burden-based and kernel-based methods for the majority of the scenarios. In addition, we estimate the statistical power of our method using regions with small number of genetic variants, and show that our method retains its advantage over burden-based and kernel-based methods in this case as well. The new method is implemented as the R-function 'famFLM' using two types of basis functions: the B-spline and Fourier bases. We compare the properties of the new method using models that differ from each other in the type of their function basis. The models based on the Fourier basis functions have an advantage in terms of speed and power over the models that use the B-spline basis functions and those that combine B-spline and Fourier basis functions. The 'famFLM' function is distributed under GPLv3 license and is freely available at http://mga.bionet.nsc.ru/soft/famFLM/.",2015-06-25 +25337457,"A randomized trial in a massive online open course shows people don't know what a statistically significant relationship looks like, but they can learn.","Scatterplots are the most common way for statisticians, scientists, and the public to visually detect relationships between measured variables. At the same time, and despite widely publicized controversy, P-values remain the most commonly used measure to statistically justify relationships identified between variables. Here we measure the ability to detect statistically significant relationships from scatterplots in a randomized trial of 2,039 students in a statistics massive open online course (MOOC). Each subject was shown a random set of scatterplots and asked to visually determine if the underlying relationships were statistically significant at the P < 0.05 level. Subjects correctly classified only 47.4% (95% CI [45.1%-49.7%]) of statistically significant relationships, and 74.6% (95% CI [72.5%-76.6%]) of non-significant relationships. Adding visual aids such as a best fit line or scatterplot smooth increased the probability a relationship was called significant, regardless of whether the relationship was actually significant. Classification of statistically significant relationships improved on repeat attempts of the survey, although classification of non-significant relationships did not. Our results suggest: (1) that evidence-based data analysis can be used to identify weaknesses in theoretical procedures in the hands of average users, (2) data analysts can be trained to improve detection of statistically significant results with practice, but (3) data analysts have incorrect intuition about what statistically significant relationships look like, particularly for small effects. We have built a web tool for people to compare scatterplots with their corresponding p-values which is available here: http://glimmer.rstudio.com/afisher/EDA/.",2014-10-16 +26656933,"BioFVM: an efficient, parallelized diffusive transport solver for 3-D biological simulations.","

Motivation

Computational models of multicellular systems require solving systems of PDEs for release, uptake, decay and diffusion of multiple substrates in 3D, particularly when incorporating the impact of drugs, growth substrates and signaling factors on cell receptors and subcellular systems biology.

Results

We introduce BioFVM, a diffusive transport solver tailored to biological problems. BioFVM can simulate release and uptake of many substrates by cell and bulk sources, diffusion and decay in large 3D domains. It has been parallelized with OpenMP, allowing efficient simulations on desktop workstations or single supercomputer nodes. The code is stable even for large time steps, with linear computational cost scalings. Solutions are first-order accurate in time and second-order accurate in space. The code can be run by itself or as part of a larger simulator.

Availability and implementation

BioFVM is written in C ++ with parallelization in OpenMP. It is maintained and available for download at http://BioFVM.MathCancer.org and http://BioFVM.sf.net under the Apache License (v2.0).

Contact

paul.macklin@usc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-12 +23584832,MalaCards: an integrated compendium for diseases and their annotation.,"Comprehensive disease classification, integration and annotation are crucial for biomedical discovery. At present, disease compilation is incomplete, heterogeneous and often lacking systematic inquiry mechanisms. We introduce MalaCards, an integrated database of human maladies and their annotations, modeled on the architecture and strategy of the GeneCards database of human genes. MalaCards mines and merges 44 data sources to generate a computerized card for each of 16 919 human diseases. Each MalaCard contains disease-specific prioritized annotations, as well as inter-disease connections, empowered by the GeneCards relational database, its searches and GeneDecks set analyses. First, we generate a disease list from 15 ranked sources, using disease-name unification heuristics. Next, we use four schemes to populate MalaCards sections: (i) directly interrogating disease resources, to establish integrated disease names, synonyms, summaries, drugs/therapeutics, clinical features, genetic tests and anatomical context; (ii) searching GeneCards for related publications, and for associated genes with corresponding relevance scores; (iii) analyzing disease-associated gene sets in GeneDecks to yield affiliated pathways, phenotypes, compounds and GO terms, sorted by a composite relevance score and presented with GeneCards links; and (iv) searching within MalaCards itself, e.g. for additional related diseases and anatomical context. The latter forms the basis for the construction of a disease network, based on shared MalaCards annotations, embodying associations based on etiology, clinical features and clinical conditions. This broadly disposed network has a power-law degree distribution, suggesting that this might be an inherent property of such networks. Work in progress includes hierarchical malady classification, ontological mapping and disease set analyses, striving to make MalaCards an even more effective tool for biomedical research. Database URL: http://www.malacards.org/",2013-04-12 +25701575,Deep sequencing analysis of viral infection and evolution allows rapid and detailed characterization of viral mutant spectrum.,"

Motivation

The study of RNA virus populations is a challenging task. Each population of RNA virus is composed of a collection of different, yet related genomes often referred to as mutant spectra or quasispecies. Virologists using deep sequencing technologies face major obstacles when studying virus population dynamics, both experimentally and in natural settings due to the relatively high error rates of these technologies and the lack of high performance pipelines. In order to overcome these hurdles we developed a computational pipeline, termed ViVan (Viral Variance Analysis). ViVan is a complete pipeline facilitating the identification, characterization and comparison of sequence variance in deep sequenced virus populations.

Results

Applying ViVan on deep sequenced data obtained from samples that were previously characterized by more classical approaches, we uncovered novel and potentially crucial aspects of virus populations. With our experimental work, we illustrate how ViVan can be used for studies ranging from the more practical, detection of resistant mutations and effects of antiviral treatments, to the more theoretical temporal characterization of the population in evolutionary studies.

Availability and implementation

Freely available on the web at http://www.vivanbioinfo.org

Contact

: nshomron@post.tau.ac.il

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-19 +27448388,An Integrated Experimental Design for the Assessment of Multiple Toxicological End Points in Rat Bioassays.,"

Background

For nearly five decades long-term studies in rodents have been the accepted benchmark for assessing chronic long-term toxic effects, particularly carcinogenicity, of chemicals. The European Food Safety Authority (EFSA) and the World Health Organization (WHO) have pointed out that the current set of internationally utilized test methods capture only some of the potential adverse effects associated with exposures to these agents over the lifetime.

Objectives

In this paper, we propose the adaption of the carcinogenicity bioassay to integrate additional protocols for comprehensive long-term toxicity assessment that includes developmental exposures and long-term outcomes, capable of generating information on a broad spectrum of different end points.

Discussion

An integrated study design based on a stepwise process is described that includes the priority end points of the Economic Co-operation and Development and the National Toxicology Program guidelines on carcinogenicity and chronic toxicity and developmental and reproductive toxicity. Integrating a comprehensive set of relevant toxicological end points in a single protocol represents an opportunity to optimize animal use in accordance with the 3Rs (replacement, reduction and refinement). This strategy has the potential to provide sufficient data on multiple windows of susceptibility of specific interest for risk assessments and public health decision-making by including prenatal, lactational, neonatal exposures and evaluating outcomes over the lifespan.

Conclusion

This integrated study design is efficient in that the same generational cohort of rats used for evaluating long-term outcomes can be monitored in satellite parallel experiments to measure biomarkers and other parameters related to system-specific responses including metabolic alterations and endocrine disturbances. Citation: Manservisi F, Babot Marquillas C, Buscaroli A, Huff J, Lauriola M, Mandrioli D, Manservigi M, Panzacchi S, Silbergeld EK, Belpoggi F. 2017. An integrated experimental design for the assessment of multiple toxicological end points in rat bioassays. Environ Health Perspect 125:289-295; http://dx.doi.org/10.1289/EHP419.",2016-07-22 +25496383,CompPhy: a web-based collaborative platform for comparing phylogenies.,"

Background

Collaborative tools are of great help in conducting projects involving distant workers. Recent web technologies have helped to build such tools for jointly editing office documents and scientific data, yet none are available for handling phylogenies. Though a large number of studies and projects in evolutionary biology and systematics involve collaborations between scientists of different institutes, current tree comparison visualization software and websites are directed toward single-user access. Moreover, tree comparison functionalities are dispersed between different software that mainly focus on high level single tree visualization but to the detriment of basic tree comparison features.

Results

The web platform presented here, named CompPhy, intends to fill this gap by allowing collaborative work on phylogenies and by gathering simple advanced tools dedicated to tree comparison. It offers functionalities for tree edition, tree comparison, supertree inference and data management in a collaborative environment. The latter aspect is a specific feature of the platform, allowing people located in different places to work together at the same time on a common project. CompPhy thus proposes shared tree visualization, both synchronous and asynchronous tree manipulation, data exchange/storage, as well as facilities to keep track of the progress of analyses in working sessions. Specific advanced comparison tools are also available, such as consensus and supertree inference, or automated branch swaps of compared trees. As projects can be readily created and shared, CompPhy is also a tool that can be used easily to interact with students in a educational setting, either in the classroom or for assignments.

Conclusions

CompPhy is the first web platform devoted to the comparison of phylogenetic trees allowing real-time distant collaboration on a phylogenetic/phylogenomic project. This application can be accessed freely with a recent browser at the following page of the ATGC bioinformatics platform: http://www.atgc-montpellier.fr/compphy/ .",2014-12-14 +25704113,DrugNet: network-based drug-disease prioritization by integrating heterogeneous data.,"

Objective

Computational drug repositioning can lead to a considerable reduction in cost and time in any drug development process. Recent approaches have addressed the network-based nature of biological information for performing complex prioritization tasks. In this work, we propose a new methodology based on heterogeneous network prioritization that can aid researchers in the drug repositioning process.

Methods

We have developed DrugNet, a new methodology for drug-disease and disease-drug prioritization. Our approach is based on a network-based prioritization method called ProphNet which has recently been developed by the authors. ProphNet is able to integrate data from complex networks involving a wide range of types of elements and interactions. In this work, we built a network of interconnected drugs, proteins and diseases and applied DrugNet to different types of tests for drug repositioning.

Results

We tested the performance of our approach on different validation tests, including cross validation and tests based on real clinical trials. DrugNet achieved a mean AUC value of 0.9552±0.0015 in 5-fold cross validation tests, and a mean AUC value of 0.8364 for tests based on recent clinical trials (phases 0-4) not present in our data. These results suggest that DrugNet could be very useful for discovering new drug uses. We also studied specific cases of particular interest, proving the benefits of heterogeneous data integration in this problem.

Conclusions

Our methodology suggests that new drugs can be repositioned by generating ranked lists of drugs based on a given disease query or vice versa. Our study shows that the simultaneous integration of information about diseases, drugs and targets can lead to a significant improvement in drug repositioning tasks. DrugNet is available as a web tool from http://genome2.ugr.es/drugnet/ (accessed 23.09.14). Matlab source code is also available on the website.",2015-01-13 +21420460,AllerML: markup language for allergens.,"Many concerns have been raised about the potential allergenicity of novel, recombinant proteins into food crops. Guidelines, proposed by WHO/FAO and EFSA, include the use of bioinformatics screening to assess the risk of potential allergenicity or cross-reactivities of all proteins introduced, for example, to improve nutritional value or promote crop resistance. However, there are no universally accepted standards that can be used to encode data on the biology of allergens to facilitate using data from multiple databases in this screening. Therefore, we developed AllerML a markup language for allergens to assist in the automated exchange of information between databases and in the integration of the bioinformatics tools that are used to investigate allergenicity and cross-reactivity. As proof of concept, AllerML was implemented using the Structural Database of Allergenic Proteins (SDAP; http://fermi.utmb.edu/SDAP/) database. General implementation of AllerML will promote automatic flow of validated data that will aid in allergy research and regulatory analysis.",2011-03-21 +25990723,MS2PIP prediction server: compute and visualize MS2 peak intensity predictions for CID and HCD fragmentation.,"We present an MS(2) peak intensity prediction server that computes MS(2) charge 2+ and 3+ spectra from peptide sequences for the most common fragment ions. The server integrates the Unimod public domain post-translational modification database for modified peptides. The prediction model is an improvement of the previously published MS(2)PIP model for Orbitrap-LTQ CID spectra. Predicted MS(2) spectra can be downloaded as a spectrum file and can be visualized in the browser for comparisons with observations. In addition, we added prediction models for HCD fragmentation (Q-Exactive Orbitrap) and show that these models compute accurate intensity predictions on par with CID performance. We also show that training prediction models for CID and HCD separately improves the accuracy for each fragmentation method. The MS(2)PIP prediction server is accessible from http://iomics.ugent.be/ms2pip.",2015-05-18 +24140881,SPINVERT: a program for refinement of paramagnetic diffuse scattering data.,"We present a program (spinvert; http://spinvert.chem.ox.ac.uk) for refinement of magnetic diffuse scattering data for frustrated magnets, spin liquids, spin glasses, and other magnetically disordered materials. The approach uses reverse Monte Carlo refinement to fit a large configuration of spins to experimental powder neutron diffraction data. Despite fitting to spherically averaged data, this approach allows the recovery of the three-dimensional magnetic diffuse scattering pattern and the spin-pair correlation function. We illustrate the use of the spinvert program with two case studies. First, we use simulated powder data for the canonical antiferromagnetic Heisenberg model on the kagome lattice to discuss the sensitivity of spinvert refinement to both pairwise and higher-order spin correlations. The effect of limited experimental data on the results is also considered. Second, we re-analyse published experimental data on the frustrated system Y0.5Ca0.5BaCo4O7. The results from spinvert refinement indicate similarities between Y0.5Ca0.5BaCo4O7 and its parent compound YBaCo4O7, which were overlooked in previous analyses using powder data.",2013-10-18 +23219992,MENT: methylation and expression database of normal and tumor tissues.,"Integrated analysis of DNA methylation and gene expression can reveal specific epigenetic patterns that are important during carcinogenesis. We built an integrated database of DNA methylation and gene expression termed MENT (Methylation and Expression database of Normal and Tumor tissues) to provide researchers information on both DNA methylation and gene expression in diverse cancers. It contains integrated data of DNA methylation, gene expression, correlation of DNA methylation and gene expression in paired samples, and clinicopathological conditions gathered from the GEO (Gene Expression Omnibus) and TCGA (The Cancer Genome Atlas). A user-friendly interface allows users to search for differential DNA methylation by either 'gene search' or 'dataset search'. The 'gene search' returns which conditions are differentially methylated in a gene of interest, while 'dataset search' returns which genes are differentially methylated in a condition of interest based on filtering options such as direction, DM (differential methylation value), and p-value. MENT is the first database which provides both DNA methylation and gene expression information in diverse normal and tumor tissues. Its user-friendly interface allows users to easily search and view both DNA methylation and gene expression patterns. MENT is freely available at http://mgrc.kribb.re.kr:8080/MENT/.",2012-12-07 +23104379,Glycan fragment database: a database of PDB-based glycan 3D structures.,"The glycan fragment database (GFDB), freely available at http://www.glycanstructure.org, is a database of the glycosidic torsion angles derived from the glycan structures in the Protein Data Bank (PDB). Analogous to protein structure, the structure of an oligosaccharide chain in a glycoprotein, referred to as a glycan, can be characterized by the torsion angles of glycosidic linkages between relatively rigid carbohydrate monomeric units. Knowledge of accessible conformations of biologically relevant glycans is essential in understanding their biological roles. The GFDB provides an intuitive glycan sequence search tool that allows the user to search complex glycan structures. After a glycan search is complete, each glycosidic torsion angle distribution is displayed in terms of the exact match and the fragment match. The exact match results are from the PDB entries that contain the glycan sequence identical to the query sequence. The fragment match results are from the entries with the glycan sequence whose substructure (fragment) or entire sequence is matched to the query sequence, such that the fragment results implicitly include the influences from the nearby carbohydrate residues. In addition, clustering analysis based on the torsion angle distribution can be performed to obtain the representative structures among the searched glycan structures.",2012-10-26 +24960286,"De novo reconstruction of gene regulatory networks from time series data, an approach based on formal methods.","Reverse engineering of gene regulatory relationships from genomics data is a crucial task to dissect the complex underlying regulatory mechanism occurring in a cell. From a computational point of view the reconstruction of gene regulatory networks is an undetermined problem as the large number of possible solutions is typically high in contrast to the number of available independent data points. Many possible solutions can fit the available data, explaining the data equally well, but only one of them can be the biologically true solution. Several strategies have been proposed in literature to reduce the search space and/or extend the amount of independent information. In this paper we propose a novel algorithm based on formal methods, mathematically rigorous techniques widely adopted in engineering to specify and verify complex software and hardware systems. Starting with a formal specification of gene regulatory hypotheses we are able to mathematically prove whether a time course experiment belongs or not to the formal specification, determining in fact whether a gene regulation exists or not. The method is able to detect both direction and sign (inhibition/activation) of regulations whereas most of literature methods are limited to undirected and/or unsigned relationships. We empirically evaluated the approach on experimental and synthetic datasets in terms of precision and recall. In most cases we observed high levels of accuracy outperforming the current state of art, despite the computational cost increases exponentially with the size of the network. We made available the tool implementing the algorithm at the following url: http://www.bioinformatics.unisannio.it.",2014-06-21 +21247929,A genotypic and phenotypic information source for marker-assisted selection of cereals: the CEREALAB database.,"The CEREALAB database aims to store genotypic and phenotypic data obtained by the CEREALAB project and to integrate them with already existing data sources in order to create a tool for plant breeders and geneticists. The database can help them in unravelling the genetics of economically important phenotypic traits; in identifying and choosing molecular markers associated to key traits; and in choosing the desired parentals for breeding programs. The database is divided into three sub-schemas corresponding to the species of interest: wheat, barley and rice; each sub-schema is then divided into two sub-ontologies, regarding genotypic and phenotypic data, respectively. Database URL: http://www.cerealab.unimore.it/jws/cerealab.jnlp.",2011-01-18 +23113130,Cigarette smoking in iran.,"

Background

Cigarette smoking is the largest preventable cause of death worldwide. No systematic review is available on the situation of the smoking in Iran, so we decided to provide an overview of the studies in the field of smoking in Iranian populations.

Methods

Published Persian-language papers of all types until 2009 indexed in the IranMedex (http://www.iranmedex.com) and Magiran (http://www.magiran.com). Reports of World Health Organization were also searched and optionally employed. The studies concerning passive smoking or presenting the statistically insignificant side effects were excluded. Databases were searched using various combinations of the following terms: cigarette, smoking, smoking cessation, prevalence, history, side effects, and lung cancer by independent reviewers. All the 83 articles concerning the prevalence or side effects of the smoking habit in any Iranian population were selected. The prevalence rate of daily cigarette smoking and the 95% confidence interval as well as smoking health risk associated odds ratio (OR) were retrieved from the articles or calculated.

Results

The reported prevalence rates of the included studies, the summary of smoking-related side effects and the ORs (95%CI) of smoking associated risks and the available data on smoking cessation in Iran have been shown in the article.

Conclusion

Because of lack of certain data, special studies on local pattern of tobacco use in different districts, about the relationship between tobacco use and other diseases, especially non communicable diseases, and besides extension of smoking cessation strategies, studies on efficacy of these methods seems to be essential in this field.",2012-02-29 +21700674,UCHIME improves sensitivity and speed of chimera detection.,"

Motivation

Chimeric DNA sequences often form during polymerase chain reaction amplification, especially when sequencing single regions (e.g. 16S rRNA or fungal Internal Transcribed Spacer) to assess diversity or compare populations. Undetected chimeras may be misinterpreted as novel species, causing inflated estimates of diversity and spurious inferences of differences between populations. Detection and removal of chimeras is therefore of critical importance in such experiments.

Results

We describe UCHIME, a new program that detects chimeric sequences with two or more segments. UCHIME either uses a database of chimera-free sequences or detects chimeras de novo by exploiting abundance data. UCHIME has better sensitivity than ChimeraSlayer (previously the most sensitive database method), especially with short, noisy sequences. In testing on artificial bacterial communities with known composition, UCHIME de novo sensitivity is shown to be comparable to Perseus. UCHIME is >100× faster than Perseus and >1000× faster than ChimeraSlayer.

Contact

robert@drive5.com

Availability

Source, binaries and data: http://drive5.com/uchime.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-23 +26873927,An efficient gene-gene interaction test for genome-wide association studies in trio families.,"

Motivation

Several efficient gene-gene interaction tests have been developed for unrelated case-control samples in genome-wide association studies (GWAS), making it possible to test tens of billions of interaction pairs of single-nucleotide polymorphisms (SNPs) in a reasonable timeframe. However, current family-based gene-gene interaction tests are computationally expensive and are not applicable to genome-wide interaction analysis.

Results

We developed an efficient family-based gene-gene interaction test, GCORE, for trios (i.e. two parents and one affected sib). The GCORE compares interlocus correlations at two SNPs between the transmitted and non-transmitted alleles. We used simulation studies to compare the statistical properties such as type I error rates and power for the GCORE with several other family-based interaction tests under various scenarios. We applied the GCORE to a family-based GWAS for autism consisting of approximately 2000 trios. Testing a total of 22 471 383 013 interaction pairs in the GWAS can be finished in 36 h by the GCORE without large-scale computing resources, demonstrating that the test is practical for genome-wide gene-gene interaction analysis in trios.

Availability and implementation

GCORE is implemented with C ++ and is available at http://gscore.sourceforge.net

Contact

rchung@nhri.org.tw

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-11 +21331789,Development of a data entry auditing protocol and quality assurance for a tissue bank database.,"Human transcription error is an acknowledged risk when extracting information from paper records for entry into a database. For a tissue bank, it is critical that accurate data are provided to researchers with approved access to tissue bank material. The challenges of tissue bank data collection include manual extraction of data from complex medical reports that are accessed from a number of sources and that differ in style and layout. As a quality assurance measure, the Breast Cancer Tissue Bank (http:\\www.abctb.org.au) has implemented an auditing protocol and in order to efficiently execute the process, has developed an open source database plug-in tool (eAuditor) to assist in auditing of data held in our tissue bank database. Using eAuditor, we have identified that human entry errors range from 0.01% when entering donor's clinical follow-up details, to 0.53% when entering pathological details, highlighting the importance of an audit protocol tool such as eAuditor in a tissue bank database. eAuditor was developed and tested on the Caisis open source clinical-research database; however, it can be integrated in other databases where similar functionality is required.",2011-02-18 +28332976,Mental Health and Latent Toxoplasmosis: Comparison of Individuals with and without Anti-Toxoplasma Antibodies.,"

Aim

There is evidence to suggest that the protozoan Toxoplasma gondii affects the mental health of people who are infected with it. The aim of the present study was to examine the relationship between T. gondii and mental health.

Methods

A total of 200 students (87 men and 113 women) of Jundishapur University of Medical Sciences (Ahvaz, Iran) were tested for the presence of anti-Toxoplasma antibodies and completed the General Health Questionnaire (see Appendix 1, available at: http://www.longwoods.com/content/24938) and a demographic form. Data were analyzed using independent samples t-test, chi-square test and Fisher's exact test.

Results

Infected women had significantly lower scores in somatic symptoms (p = 0.04), anxiety/insomnia (p = 0.006) and depression (p = 0.04) compared with non-infected women. Difference in social dysfunction was not significant (p > 0.05). There were no significant differences in somatic symptoms, anxiety/insomnia, depression and social dysfunction between infected and non-infected men (all p > 0.05).

Conclusion

Our findings indicate that latent toxoplasmosis can affect some components of mental health just in women.",2016-01-01 +24489365,bwtool: a tool for bigWig files.,"

Unlabelled

BigWig files are a compressed, indexed, binary format for genome-wide signal data for calculations (e.g. GC percent) or experiments (e.g. ChIP-seq/RNA-seq read depth). bwtool is a tool designed to read bigWig files rapidly and efficiently, providing functionality for extracting data and summarizing it in several ways, globally or at specific regions. Additionally, the tool enables the conversion of the positions of signal data from one genome assembly to another, also known as 'lifting'. We believe bwtool can be useful for the analyst frequently working with bigWig data, which is becoming a standard format to represent functional signals along genomes. The article includes supplementary examples of running the software.

Availability and implementation

The C source code is freely available under the GNU public license v3 at http://cromatina.crg.eu/bwtool.",2014-01-30 +22829725,Bacterial genome mapper: A comparative bacterial genome mapping tool.,"

Unlabelled

Recently, next generation sequencing (NGS) technologies have led to a revolutionary increase in sequencing speed and costefficacy. Consequently, a vast number of contigs from many recently sequenced bacterial genomes remain to be accurately mapped and annotated, requiring the development of more convenient bioinformatics programs. In this paper, we present a newly developed web-based bioinformatics program, Bacterial Genome Mapper, which is suitable for mapping and annotating contigs that have been assembled from bacterial genome sequence raw data. By constructing a multiple alignment map between target contig sequences and two reference bacterial genome sequences, this program also provides very useful comparative genomics analysis of draft bacterial genomes.

Availability

The database is available for free at http://mbgm.kribb.re.kr.",2012-06-16 +27224861,PEPIS: A Pipeline for Estimating Epistatic Effects in Quantitative Trait Locus Mapping and Genome-Wide Association Studies.,"The term epistasis refers to interactions between multiple genetic loci. Genetic epistasis is important in regulating biological function and is considered to explain part of the 'missing heritability,' which involves marginal genetic effects that cannot be accounted for in genome-wide association studies. Thus, the study of epistasis is of great interest to geneticists. However, estimating epistatic effects for quantitative traits is challenging due to the large number of interaction effects that must be estimated, thus significantly increasing computing demands. Here, we present a new web server-based tool, the Pipeline for estimating EPIStatic genetic effects (PEPIS), for analyzing polygenic epistatic effects. The PEPIS software package is based on a new linear mixed model that has been used to predict the performance of hybrid rice. The PEPIS includes two main sub-pipelines: the first for kinship matrix calculation, and the second for polygenic component analyses and genome scanning for main and epistatic effects. To accommodate the demand for high-performance computation, the PEPIS utilizes C/C++ for mathematical matrix computing. In addition, the modules for kinship matrix calculations and main and epistatic-effect genome scanning employ parallel computing technology that effectively utilizes multiple computer nodes across our networked cluster, thus significantly improving the computational speed. For example, when analyzing the same immortalized F2 rice population genotypic data examined in a previous study, the PEPIS returned identical results at each analysis step with the original prototype R code, but the computational time was reduced from more than one month to about five minutes. These advances will help overcome the bottleneck frequently encountered in genome wide epistatic genetic effect analysis and enable accommodation of the high computational demand. The PEPIS is publically available at http://bioinfo.noble.org/PolyGenic_QTL/.",2016-05-25 +27334473,iPTM-mLys: identifying multiple lysine PTM sites and their different types.,"

Motivation

Post-translational modification, abbreviated as PTM, refers to the change of the amino acid side chains of a protein after its biosynthesis. Owing to its significance for in-depth understanding various biological processes and developing effective drugs, prediction of PTM sites in proteins have currently become a hot topic in bioinformatics. Although many computational methods were established to identify various single-label PTM types and their occurrence sites in proteins, no method has ever been developed for multi-label PTM types. As one of the most frequently observed PTMs, the K-PTM, namely, the modification occurring at lysine (K), can be usually accommodated with many different types, such as 'acetylation', 'crotonylation', 'methylation' and 'succinylation'. Now we are facing an interesting challenge: given an uncharacterized protein sequence containing many K residues, which ones can accommodate two or more types of PTM, which ones only one, and which ones none?

Results

To address this problem, a multi-label predictor called IPTM-MLYS: has been developed. It represents the first multi-label PTM predictor ever established. The novel predictor is featured by incorporating the sequence-coupled effects into the general PseAAC, and by fusing an array of basic random forest classifiers into an ensemble system. Rigorous cross-validations via a set of multi-label metrics indicate that the first multi-label PTM predictor is very promising and encouraging.

Availability and implementation

For the convenience of most experimental scientists, a user-friendly web-server for iPTM-mLys has been established at http://www.jci-bioinfo.cn/iPTM-mLys, by which users can easily obtain their desired results without the need to go through the complicated mathematical equations involved.

Contact

wqiu@gordonlifescience.org, xxiao@gordonlifescience.org, kcchou@gordonlifescience.orgSupplementary information: Supplementary data are available at Bioinformatics online.",2016-06-22 +24402049,A data integration and visualization resource for the metabolic network of Synechocystis sp. PCC 6803.,"Data integration is a central activity in systems biology. The integration of genomic, transcript, protein, metabolite, flux, and computational data yields unprecedented information about the system level functioning of organisms. Often, data integration is done purely computationally, leaving the user with little insight in addition to statistical information. In this article, we present a visualization tool for the metabolic network of Synechocystis sp. PCC 6803, an important model cyanobacterium for sustainable biofuel production. We illustrate how this metabolic map can be used to integrate experimental and computational data for Synechocystis sp. PCC 6803 systems biology and metabolic engineering studies. Additionally, we discuss how this map, and the software infrastructure that we supply with it, can be used in the development of other organism-specific metabolic network visualizations. In addition to the Python console package VoNDA (http://vonda.sf.net), we provide a working demonstration of the interactive metabolic map and the associated Synechocystis sp. PCC 6803 genome-scale stoichiometric model, as well as various ready-to-visualize microarray data sets, at http://f-a-m-e.org/synechocytis.",2014-01-08 +23442919,Computational assembly of polymorphic amyloid fibrils reveals stable aggregates.,"Amyloid proteins aggregate into polymorphic fibrils that damage tissues of the brain, nerves, and heart. Experimental and computational studies have examined the structural basis and the nucleation of short fibrils, but the ability to predict and precisely quantify the stability of larger aggregates has remained elusive. We established a complete classification of fibril shapes and developed a tool called CreateFibril to build such complex, polymorphic, modular structures automatically. We applied stability landscapes, a technique we developed to reveal reliable fibril structural parameters, to assess fibril stability. CreateFibril constructed HET-s, Aβ, and amylin fibrils up to 17 nm in length, and utilized a novel dipolar solvent model that captured the effect of dipole-dipole interactions between water and very large molecular systems to assess their aqueous stability. Our results validate experimental data for HET-s and Aβ, and suggest novel (to our knowledge) findings for amylin. In particular, we predicted the correct structural parameters (rotation angles, packing distances, hydrogen bond lengths, and helical pitches) for the one and three predominant HET-s protofilaments. We reveal and structurally characterize all known Aβ polymorphic fibrils, including structures recently classified as wrapped fibrils. Finally, we elucidate the predominant amylin fibrils and assert that native amylin is more stable than its amyloid form. CreateFibril and a database of all stable polymorphic fibril models we tested, along with their structural energy landscapes, are available at http://amyloid.cs.mcgill.ca.",2013-02-01 +25978092,DBDiaSNP: An Open-Source Knowledgebase of Genetic Polymorphisms and Resistance Genes Related to Diarrheal Pathogens.,"Diarrhea is a highly common infection among children, responsible for significant morbidity and mortality rate worldwide. After pneumonia, diarrhea remains the second leading cause of neonatal deaths. Numerous viral, bacterial, and parasitic enteric pathogens are associated with diarrhea. With increasing antibiotic resistance among enteric pathogens, there is an urgent need for global surveillance of the mutations and resistance genes primarily responsible for resistance to antibiotic treatment. Single Nucleotide Polymorphisms are important in this regard as they have a vast potential to be utilized as molecular diagnostics for gene-disease or pharmacogenomics association studies linking genotype to phenotype. DBDiaSNP is a comprehensive repository of mutations and resistance genes among various diarrheal pathogens and hosts to advance breakthroughs that will find applications from development of sequence-based diagnostic tools to drug discovery. It contains information about 946 mutations and 326 resistance genes compiled from literature and various web resources. As of March 2015, it houses various pathogen genes and the mutations responsible for antibiotic resistance. The pathogens include, for example, DEC (Diarrheagenic E.coli), Salmonella spp., Campylobacter spp., Shigella spp., Clostridium difficile, Aeromonas spp., Helicobacter pylori, Entamoeba histolytica, Vibrio cholera, and viruses. It also includes mutations from hosts (e.g., humans, pigs, others) that render them either susceptible or resistant to a certain type of diarrhea. DBDiaSNP is therefore intended as an integrated open access database for researchers and clinicians working on diarrheal diseases. Additionally, we note that the DBDiaSNP is one of the first antibiotic resistance databases for the diarrheal pathogens covering mutations and resistance genes that have clinical relevance from a broad range of pathogens and hosts. For future translational research involving integrative biology and global health, the database offers veritable potentials, particularly for developing countries and worldwide monitoring and personalized effective treatment of pathogens associated with diarrhea. The database is accessible on the public domain at http://www.juit.ac.in/attachments/dbdiasnp/ .",2015-05-15 +25253206,ProbeAlign: incorporating high-throughput sequencing-based structure probing information into ncRNA homology search.,"

Background

Recent advances in RNA structure probing technologies, including the ones based on high-throughput sequencing, have improved the accuracy of thermodynamic folding with quantitative nucleotide-resolution structural information.

Results

In this paper, we present a novel approach, ProbeAlign, to incorporate the reactivities from high-throughput RNA structure probing into ncRNA homology search for functional annotation. To reduce the overhead of structure alignment on large-scale data, the specific pairing patterns in the query sequences are ignored. On the other hand, the partial structural information of the target sequences embedded in probing data is retrieved to guide the alignment. Thus the structure alignment problem is transformed into a sequence alignment problem with additional reactivity information. The benchmark results show that the prediction accuracy of ProbeAlign outperforms filter-based CMsearch with high computational efficiency. The application of ProbeAlign to the FragSeq data, which is based on genome-wide structure probing, has demonstrated its capability to search ncRNAs in a large-scale dataset from high-throughput sequencing.

Conclusions

By incorporating high-throughput sequencing-based structure probing information, ProbeAlign can improve the accuracy and efficiency of ncRNA homology search. It is a promising tool for ncRNA functional annotation on genome-wide datasets.

Availability

The source code of ProbeAlign is available at http://genome.ucf.edu/ProbeAlign.",2014-09-10 +24927962,rTRM-web: a web tool for predicting transcriptional regulatory modules for ChIP-seq-ed transcription factors.,"Transcription factors (TFs) bind to specific DNA regions, although their binding specificities cannot account for their cell type-specific functions. It has been shown in well-studied systems that TFs combine with co-factors into transcriptional regulatory modules (TRMs), which endow them with cell type-specific functions and additional modes of regulation. Therefore, the prediction of TRMs can provide fundamental mechanistic insights, especially when experimental data are limiting or when no regulatory proteins have been identified. Our method rTRM predicts TRMs by integrating genomic information from TF ChIP-seq data, cell type-specific gene expression and protein-protein interaction data. Here we present a freely available web interface to rTRM (http://www.rTRM.org/) supporting all the options originally described for rTRM while featuring flexible display and network calculation parameters, publication-quality figures as well as annotated information on the list of genes constituting the TRM.",2014-06-11 +21926122,MapReduce implementation of a hybrid spectral library-database search method for large-scale peptide identification.,"

Summary

A MapReduce-based implementation called MR-MSPolygraph for parallelizing peptide identification from mass spectrometry data is presented. The underlying serial method, MSPolygraph, uses a novel hybrid approach to match an experimental spectrum against a combination of a protein sequence database and a spectral library. Our MapReduce implementation can run on any Hadoop cluster environment. Experimental results demonstrate that, relative to the serial version, MR-MSPolygraph reduces the time to solution from weeks to hours, for processing tens of thousands of experimental spectra. Speedup and other related performance studies are also reported on a 400-core Hadoop cluster using spectral datasets from environmental microbial communities as inputs.

Availability

The source code along with user documentation are available on http://compbio.eecs.wsu.edu/MR-MSPolygraph.

Contact

ananth@eecs.wsu.edu; william.cannon@pnnl.gov.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-16 +21943367,iCTNet: a Cytoscape plugin to produce and analyze integrative complex traits networks.,"

Background

The speed at which biological datasets are being accumulated stands in contrast to our ability to integrate them meaningfully. Large-scale biological databases containing datasets of genes, proteins, cells, organs, and diseases are being created but they are not connected. Integration of these vast but heterogeneous sources of information will allow the systematic and comprehensive analysis of molecular and clinical datasets, spanning hundreds of dimensions and thousands of individuals. This integration is essential to capitalize on the value of current and future molecular- and cellular-level data on humans to gain novel insights about health and disease.

Results

We describe a new open-source Cytoscape plugin named iCTNet (integrated Complex Traits Networks). iCTNet integrates several data sources to allow automated and systematic creation of networks with up to five layers of omics information: phenotype-SNP association, protein-protein interaction, disease-tissue, tissue-gene, and drug-gene relationships. It facilitates the generation of general or specific network views with diverse options for more than 200 diseases. Built-in tools are provided to prioritize candidate genes and create modules of specific phenotypes.

Conclusions

iCTNet provides a user-friendly interface to search, integrate, visualize, and analyze genome-scale biological networks for human complex traits. We argue this tool is a key instrument that facilitates systematic integration of disparate large-scale data through network visualization, ultimately allowing the identification of disease similarities and the design of novel therapeutic approaches.The online database and Cytoscape plugin are freely available for academic use at: http://www.cs.queensu.ca/ictnet.",2011-09-26 +26860903,Mortality Risk for Acute Cholangitis (MAC): a risk prediction model for in-hospital mortality in patients with acute cholangitis.,"

Background

Acute cholangitis is a life-threatening bacterial infection of the biliary tract. Main focus of this study was to create a useful risk prediction model that helps physicians to assign patients with acute cholangitis into different management groups.

Methods

981 cholangitis episodes from 810 patients were analysed retrospectively at a German tertiary center.

Results

Out of eleven investigated statistical models fit to 22 predictors, the Random Forest model achieved the best (cross-)validated performance to predict mortality. The receiver operating characteristics (ROC) curve revealed a mean area under the curve (AUC) of 91.5 %. Dependent on the calculated mortality risk, we propose to stratify patients with acute cholangitis into a high and low risk group. The mean sensitivity, specificity, positive and negative predictive value of the corresponding optimal cutpoint were 82.9 %, 85.1 %, 19.0 % and 99.3 %, respectively. All of these results emerge from nested (cross-)validation and are supposed to reflect the model's performance expected for external data. An implementation of our risk prediction model including the specific treatment recommendations adopted from the Tokyo guidelines is available on http://www2.imse.med.tum.de:3838/ .

Conclusion

Our risk prediction model for mortality appears promising to stratify patients with acute cholangitis into different management groups. Additional validation of its performance should be provided by further prospective trails.",2016-02-09 +25974656,Multilocus Sequence Typing of Cronobacter Strains Isolated from Retail Foods and Environmental Samples.,"Cronobacter spp. are bacterial pathogens that affect children and immunocompromised adults. In this study, we used multilocus sequence typing (MLST) to determine sequence types (STs) in 11 Cronobacter spp. strains isolated from retail foods, 29 strains from dust samples obtained from vacuum cleaners, and 4 clinical isolates. Using biochemical tests, species-specific polymerase chain reaction, and MLST analysis, 36 strains were identified as Cronobacter sakazakii, and 6 were identified as Cronobacter malonaticus. In addition, one strain that originated from retail food and one from a dust sample from a vacuum cleaner were identified on the basis of MLST analysis as Cronobacter dublinensis and Cronobacter turicensis, respectively. Cronobacter spp. strains isolated from the retail foods were assigned to eight different MLST sequence types, seven of which were newly identified. The strains isolated from the dust samples were assigned to 7 known STs and 14 unknown STs. Three clinical isolates and one household dust isolate were assigned to ST4, which is the predominant ST associated with neonatal meningitis. One clinical isolate was classified based on MLST analysis as Cronobacter malonaticus and belonged to an as-yet-unknown ST. Three strains isolated from the household dust samples were assigned to ST1, which is another clinically significant ST. It can be concluded that Cronobacter spp. strains of different origin are genetically quite variable. The recovery of C. sakazakii strains belonging to ST1 and ST4 from the dust samples suggests the possibility that contamination could occur during food preparation. All of the novel STs and alleles for C. sakazakii, C. malonaticus, C. dublinensis, and C. turicensis determined in this study were deposited in the Cronobacter MLST database available online ( http://pubmlst.org/cronobacter/).",2015-05-14 +21385384,Comparative analysis of copy number variation detection methods and database construction.,"

Background

Array-based detection of copy number variations (CNVs) is widely used for identifying disease-specific genetic variations. However, the accuracy of CNV detection is not sufficient and results differ depending on the detection programs used and their parameters. In this study, we evaluated five widely used CNV detection programs, Birdsuite (mainly consisting of the Birdseye and Canary modules), Birdseye (part of Birdsuite), PennCNV, CGHseg, and DNAcopy from the viewpoint of performance on the Affymetrix platform using HapMap data and other experimental data. Furthermore, we identified CNVs of 180 healthy Japanese individuals using parameters that showed the best performance in the HapMap data and investigated their characteristics.

Results

The results indicate that Hidden Markov model-based programs PennCNV and Birdseye (part of Birdsuite), or Birdsuite show better detection performance than other programs when the high reproducibility rates of the same individuals and the low Mendelian inconsistencies are considered. Furthermore, when rates of overlap with other experimental results were taken into account, Birdsuite showed the best performance from the view point of sensitivity but was expected to include many false negatives and some false positives. The results of 180 healthy Japanese demonstrate that the ratio containing repeat sequences, not only segmental repeats but also long interspersed nuclear element (LINE) sequences both in the start and end regions of the CNVs, is higher in CNVs that are commonly detected among multiple individuals than that in randomly selected regions, and the conservation score based on primates is lower in these regions than in randomly selected regions. Similar tendencies were observed in HapMap data and other experimental data.

Conclusions

Our results suggest that not only segmental repeats but also interspersed repeats, especially LINE sequences, are deeply involved in CNVs, particularly in common CNV formations.The detected CNVs are stored in the CNV repository database newly constructed by the ""Japanese integrated database project"" for sharing data among researchers. http://gwas.lifesciencedb.jp/cgi-bin/cnvdb/cnv_top.cgi.",2011-03-07 +27517362,Biomarker Levels of Toxic Metals among Asian Populations in the United States: NHANES 2011-2012.,"

Introduction

The Centers for Disease Control and Prevention (CDC) recently found that Asians have considerably higher biomarker levels of cadmium, lead, mercury, and arsenic than whites, blacks, Mexican Americans, and other Hispanics in the United States.

Objective

Our goal was to further evaluate the higher metal biomarker levels among Asians.

Methods

Biomarker data (blood cadmium, blood lead, blood mercury, urinary total arsenic, and urinary dimethylarsinic acic) from individuals ≥ 6 years of age were obtained from the 2011-2012 National Health and Nutrition Examination Survey (NHANES). We compared geometric mean levels of these five metal biomarkers in Asians with those of four other NHANES race/ethnic groups (white, black, Mexican American, and other Hispanic), and across three Asian subgroups (Chinese, Asian Indian, and other Asian). We also evaluated associations between biomarker levels and sociodemographic, physical, dietary, and behavioral covariates across the Asian subgroups.

Results

Asians had significantly higher levels of all five metal biomarkers than other race/ethnic groups (p < 0.05), regardless of sociodemographic, physical, dietary, behavioral, or geographic characteristics. We also found variations in biomarker levels across the Asian subgroups. In general, Asian Indians had lower levels than the other two Asian subgroups, except for blood lead. The following characteristics were found to be significant predictors of several biomarker levels: sex, age, education, birthplace, smoking, and fish consumption.

Conclusions

Overall, the Asian group had the highest geometric mean biomarker levels for all of the five metal variables. Furthermore, we provided evidence that significant variations in the biomarker levels are present across the Asian subgroups in the United States. Citation: Awata H, Linder S, Mitchell LE, Delclos GL. 2017. Biomarker levels of toxic metals among Asian populations in the United States: NHANES 2011-2012. Environ Health Perspect 125:306-313; http://dx.doi.org/10.1289/EHP27.",2016-08-12 +27518881,Disinfection By-Product Exposures and the Risk of Specific Cardiac Birth Defects.,"

Background

Epidemiological studies suggest that women exposed to disinfection by-products (DBPs) have an increased risk of delivering babies with cardiovascular defects (CVDs).

Objective

We examined nine CVDs in relation to categorical DBP exposures including bromoform, chloroform, dibromochloromethane (DBCM), bromodichloromethane (BDCM), monobromoacetic acid (MBAA), dichloroacetic acid (DCAA), trichloroacetic acid (TCAA), and summary DBP measures (HAA5, THMBr, THM4, and DBP9).

Methods

We calculated adjusted odds ratios (aORs) in a case-control study of birth defects in Massachusetts with complete quarterly 1999-2004 trihalomethane (THM) and haloacetic acid (HAA) data. We randomly matched 10 controls each to 904 CVD cases based on week of conception. Weight-averaged aggregate first-trimester DBP exposures were assigned to individuals based on residence at birth.

Results

We detected associations for tetralogy of Fallot and the upper exposure categories for TCAA, DCAA, and HAA5 (aOR range, 3.34-6.51) including positive exposure-response relationships for DCAA and HAA5. aORs consistent in magnitude were detected between atrial septal defects and bromoform (aOR = 1.56; 95% CI: 1.01, 2.43), as well as DBCM, chloroform, and THM4 (aOR range, 1.26-1.67). Ventricular septal defects (VSDs) were associated with the highest bromoform (aOR = 1.85; 95% CI: 1.20, 2.83), MBAA (aOR = 1.81; 95% CI: 0.85, 3.84), and DBCM (aOR = 1.54; 95% CI: 1.00, 2.37) exposure categories.

Conclusions

To our knowledge, this is the first birth defect study to develop multi-DBP adjusted regression models as well as the first CVD study to evaluate HAA exposures and the second to evaluate bromoform exposures. Our findings, therefore, inform exposure specificity for the consistent associations previously reported between THM4 and CVDs including VSDs. Citation: Wright JM, Evans A, Kaufman JA, Rivera-Núñez Z, Narotsky MG. 2017. Disinfection by-product exposures and the risk of specific cardiac birth defects. Environ Health Perspect 125:269-277; http://dx.doi.org/10.1289/EHP103.",2016-08-12 +24261964,"CAPER 2.0: an interactive, configurable, and extensible workflow-based platform to analyze data sets from the Chromosome-centric Human Proteome Project.","The Chromosome-centric Human Proteome Project (C-HPP) aims to map and annotate the entire human proteome by the ""chromosome-by-chromosome"" strategy. As the C-HPP proceeds, the increasing volume of proteomic data sets presents a challenge for customized and reproducible bioinformatics data analyses for mining biological knowledge. To address this challenge, we updated the previous static proteome browser CAPER into a higher version, CAPER 2.0 - an interactive, configurable and extensible workflow-based platform for C-HPP data analyses. In addition to the previous visualization functions of track-view and heatmap-view, CAPER 2.0 presents a powerful toolbox for C-HPP data analyses and also integrates a configurable workflow system that supports the view, construction, edit, run, and share of workflows. These features allow users to easily conduct their own C-HPP proteomic data analyses and visualization by CAPER 2.0. We illustrate the usage of CAPER 2.0 with four specific workflows for finding missing proteins, mapping peptides to chromosomes for genome annotation, integrating peptides with transcription factor binding sites from ENCODE data sets, and functionally annotating proteins. The updated CAPER is available at http://www.bprc.ac.cn/CAPE.",2013-12-03 +24121332,Experiences with archived raw diffraction images data: capturing cisplatin after chemical conversion of carboplatin in high salt conditions for a protein crystal.,"The archiving of raw diffraction images data is the focus of an IUCr Diffraction Data Deposition Working Group (see http://forums.iucr.org/). Experience in archiving and sharing of raw diffraction images data in collaboration between Manchester and Utrecht Universities, studying the binding of the important anti-cancer agents, cisplatin and carboplatin to histidine in a protein, has recently been published. Subsequently, these studies have been expanded due to further analyses of each data set of raw diffraction images using the diffraction data processing program XDS. The raw diffraction images, measured at Manchester University, are available for download at Utrecht University and now also mirrored at the Tardis Raw Diffraction Data Archive in Australia. Thus a direct comparison of processed diffraction and derived protein model data from XDS with the published results has been made. The issue of conversion of carboplatin to cisplatin under a high chloride salt concentration has been taken up and a detailed crystallographic assessment is provided. Overall, these new structural chemistry research results are presented followed by a short summary of developing raw data archiving policy and practicalities as well as documenting the challenge of making appropriate and detailed recording of the metadata for crystallography.",2013-10-01 +26180989,The ElderSmile TimeMap: Benefits of Connecting Statistics With Time and Place.,"Community-based programs are critical for locally targeted public health education and accessible service delivery. Deriving useful information from such programs is important for their own evaluation and improvement and may facilitate research collaboration with partners and experts. Here we present an interactive Web-based application designed for a community-based oral health outreach program called ElderSmile to demonstrate how data can be summarized, filtered, compared, and visualized by time and place to inform program planning, evaluation, and research. The ElderSmile TimeMap ( http://www.acsu.buffalo.edu/∼smetcalf/resources/timemap.html ) is an emergent product of a US National Institutes of Health-funded collaboration of knowledge sharing among multidisciplinary team members at the University at Buffalo, Columbia University, and New York University.",2015-07-16 +25133496,GPCRsort-responding to the next generation sequencing data challenge: prediction of G protein-coupled receptor classes using only structural region lengths.,"Next generation sequencing (NGS) and the attendant data deluge are increasingly impacting molecular life sciences research. Chief among the challenges and opportunities is to enhance our ability to classify molecular target data into meaningful and cohesive systematic nomenclature. In this vein, the G protein-coupled receptors (GPCRs) are the largest and most divergent receptor family that plays a crucial role in a host of pathophysiological pathways. For the pharmaceutical industry, GPCRs are a major drug target and it is estimated that 60%-70% of all medicines in development today target GPCRs. Hence, they require an efficient and rapid classification to group the members according to their functions. In addition to NGS and the Big Data challenge we currently face, an emerging number of orphan GPCRs further demand for novel, rapid, and accurate classification of the receptors since the current classification tools are inadequate and slow. This study presents the development of a new classification tool for GPCRs using the structural features derived from their primary sequences: GPCRsort. Comparison experiments with the current known GPCR classification techniques showed that GPCRsort is able to rapidly (in the order of minutes) classify uncharacterized GPCRs with 97.3% accuracy, whereas the best available technique's accuracy is 90.7%. GPCRsort is available in the public domain for postgenomics life scientists engaged in GPCR research with NGS: http://bioserver.ceng.metu.edu.tr/GPCRSort .",2014-08-18 +22992047,The substrate/product-binding modes of a novel GH120 β-xylosidase (XylC) from Thermoanaerobacterium saccharolyticum JW/SL-YS485.,"Xylan-1,4-β-xylosidase (β-xylosidase) hydrolyses xylo-oligomers at their non-reducing ends into individual xylose units. Recently, XylC, a β-xylosidase from Thermoanaerobacterium saccharolyticum JW/SL-YS485, was found to be structurally different from corresponding glycosyl hydrolases in the CAZy database (http://www.cazy.org/), and was subsequently classified as the first member of a novel family of glycoside hydrolases (GH120). In the present paper, we report three crystal structures of XylC in complex with Tris, xylobiose and xylose at 1.48-2.05 Å (1 Å=0.1 nm) resolution. XylC assembles into a tetramer, and each monomer comprises two distinct domains. The core domain is a right-handed parallel β-helix (residues 1-75 and 201-638) and the flanking region (residues 76-200) folds into a β-sandwich domain. The enzyme contains an open carbohydrate-binding cleft, allowing accommodation of longer xylo-oligosaccharides. On the basis of the crystal structures and in agreement with previous kinetic data, we propose that XylC cleaves the glycosidic bond by the retaining mechanism using two acidic residues Asp382 (nucleophile) and Glu405 (general acid/base). In addition to the active site, nine other xylose-binding sites were consistently observed in each of the four monomers, providing a possible reason for the high tolerance of product inhibition.",2012-12-01 +25969446,The TOPCONS web server for consensus prediction of membrane protein topology and signal peptides.,"TOPCONS (http://topcons.net/) is a widely used web server for consensus prediction of membrane protein topology. We hereby present a major update to the server, with some substantial improvements, including the following: (i) TOPCONS can now efficiently separate signal peptides from transmembrane regions. (ii) The server can now differentiate more successfully between globular and membrane proteins. (iii) The server now is even slightly faster, although a much larger database is used to generate the multiple sequence alignments. For most proteins, the final prediction is produced in a matter of seconds. (iv) The user-friendly interface is retained, with the additional feature of submitting batch files and accessing the server programmatically using standard interfaces, making it thus ideal for proteome-wide analyses. Indicatively, the user can now scan the entire human proteome in a few days. (v) For proteins with homology to a known 3D structure, the homology-inferred topology is also displayed. (vi) Finally, the combination of methods currently implemented achieves an overall increase in performance by 4% as compared to the currently available best-scoring methods and TOPCONS is the only method that can identify signal peptides and still maintain a state-of-the-art performance in topology predictions.",2015-05-12 +26311864,Fluoroquinolone Resistance in Salmonella and the Utility of Pefloxacin Disk Diffusion [corrected].,"Fluoroquinolone resistance is a serious and increasingly common problem in Salmonella. Two companion studies in this issue of the Journal of Clinical Microbiology (E. Deak, R. Skov, J. A. Hindler, and R. M. Humphries, J Clin Microbiol 53:3405-3410, 2015, http://dx.doi.org/10.1128/JCM.01393-15; R. Skov, E. Matuschek, M. Sjölund-Karlsson, J. Åhman, A. Petersen, M. Stegger, M. Torpdahl, and G. Kahlmeter, J Clin Microbiol 53:3411-3417, 2015, http://dx.doi.org/10.1128/JCM.01287-15) provide data to support the use of pefloxacin disk diffusion as a convenient and inexpensive surrogate laboratory method to detect fluoroquinolone resistance in Salmonella when the direct measurement of fluoroquinolone MICs is not feasible [corrected]. Recently updated CLSI and EUCAST susceptibility breakpoints will help to optimize clinical outcomes and reduce the likelihood of emergent resistance.",2015-08-26 +21926893,Hyperbilirubinemia: current guidelines and emerging therapies.,"It is estimated that about two thirds of newborns will appear clinically jaundiced during their first weeks of life. As newborns and their mothers spend fewer days in the hospital after birth, the number of infants readmitted yearly in the United States for neonatal jaundice over the last 10 years has increased by 160%. A portion of these infants present to the emergency department, requiring a careful history and physical examination assessing them for the risk factors associated with pathologic bilirubin levels. Although the spectrum of illness may be great, the overwhelming etiology of neonatal jaundice presenting to an emergency department is physiologic and not due to infection or isoimmunization. Therefore, a little more than a good history, physical examination, and indirect/direct bilirubin levels are needed to evaluate an otherwise well-appearing jaundiced newborn. The American Academy of Pediatrics' 2004 clinical practice guidelines for ""Management of Hyperbilirubinemia in the Newborn Infant 35 or More Weeks of Gestation"" are a helpful and easily accessible resource when evaluating jaundiced newborns (available at http://aappolicy.aappublications.org/cgi/content/full/pediatrics;114/1/297). There are several exciting developments on the horizon for the diagnosis and management of hyperbilirubinemia including increasing use of transcutaneous bilirubin measuring devices and medications such as tin mesoporphyrin and intravenous immunoglobulin that may decrease the need for exchange transfusions.",2011-09-01 +25961669,Evaluation and integration of cancer gene classifiers: identification and ranking of plausible drivers.,"The number of mutated genes in cancer cells is far larger than the number of mutations that drive cancer. The difficulty this creates for identifying relevant alterations has stimulated the development of various computational approaches to distinguishing drivers from bystanders. We develop and apply an ensemble classifier (EC) machine learning method, which integrates 10 classifiers that are publically available, and apply it to breast and ovarian cancer. In particular we find the following: (1) Using both standard and non-standard metrics, EC almost always outperforms single method classifiers, often by wide margins. (2) Of the 50 highest ranked genes for breast (ovarian) cancer, 34 (30) are associated with other cancers in either the OMIM, CGC or NCG database (P < 10(-22)). (3) Another 10, for both breast and ovarian cancer, have been identified by GWAS studies. (4) Several of the remaining genes--including a protein kinase that regulates the Fra-1 transcription factor which is overexpressed in ER negative breast cancer cells; and Fyn, which is overexpressed in pancreatic and prostate cancer, among others--are biologically plausible. Biological implications are briefly discussed. Source codes and detailed results are available at http://www.visantnet.org/misi/driver_integration.zip.",2015-05-11 +21536935,In the clinic. Celiac disease.,"This issue provides a clinical overview of celiac disease focusing on prevention, diagnosis, treatment, practice improvement, and patient information. Readers can complete the accompanying CME quiz for 1.5 credits. Only ACP members and individual subscribers can access the electronic features of In the Clinic. Non-subscribers who wish to access this issue of In the Clinic can elect ""Pay for View."" Subscribers can receive 1.5 category 1 CME credits by completing the CME quiz that accompanies this issue of In the Clinic. The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians' Information and Education Resource) and MKSAP (Medical Knowledge and Self Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP's Medical Education and Publishing division and with assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult www.acponline.org, http://pier.acponline.org, and other resources referenced within each issue of In the Clinic.",2011-05-01 +23203880,YM500: a small RNA sequencing (smRNA-seq) database for microRNA research.,"MicroRNAs (miRNAs) are small RNAs ∼22 nt in length that are involved in the regulation of a variety of physiological and pathological processes. Advances in high-throughput small RNA sequencing (smRNA-seq), one of the next-generation sequencing applications, have reshaped the miRNA research landscape. In this study, we established an integrative database, the YM500 (http://ngs.ym.edu.tw/ym500/), containing analysis pipelines and analysis results for 609 human and mice smRNA-seq results, including public data from the Gene Expression Omnibus (GEO) and some private sources. YM500 collects analysis results for miRNA quantification, for isomiR identification (incl. RNA editing), for arm switching discovery, and, more importantly, for novel miRNA predictions. Wetlab validation on >100 miRNAs confirmed high correlation between miRNA profiling and RT-qPCR results (R = 0.84). This database allows researchers to search these four different types of analysis results via our interactive web interface. YM500 allows researchers to define the criteria of isomiRs, and also integrates the information of dbSNP to help researchers distinguish isomiRs from SNPs. A user-friendly interface is provided to integrate miRNA-related information and existing evidence from hundreds of sequencing datasets. The identified novel miRNAs and isomiRs hold the potential for both basic research and biotech applications.",2012-11-29 +24494671,QC metrics from CPTAC raw LC-MS/MS data interpreted through multivariate statistics.,"Shotgun proteomics experiments integrate a complex sequence of processes, any of which can introduce variability. Quality metrics computed from LC-MS/MS data have relied upon identifying MS/MS scans, but a new mode for the QuaMeter software produces metrics that are independent of identifications. Rather than evaluating each metric independently, we have created a robust multivariate statistical toolkit that accommodates the correlation structure of these metrics and allows for hierarchical relationships among data sets. The framework enables visualization and structural assessment of variability. Study 1 for the Clinical Proteomics Technology Assessment for Cancer (CPTAC), which analyzed three replicates of two common samples at each of two time points among 23 mass spectrometers in nine laboratories, provided the data to demonstrate this framework, and CPTAC Study 5 provided data from complex lysates under Standard Operating Procedures (SOPs) to complement these findings. Identification-independent quality metrics enabled the differentiation of sites and run-times through robust principal components analysis and subsequent factor analysis. Dissimilarity metrics revealed outliers in performance, and a nested ANOVA model revealed the extent to which all metrics or individual metrics were impacted by mass spectrometer and run time. Study 5 data revealed that even when SOPs have been applied, instrument-dependent variability remains prominent, although it may be reduced, while within-site variability is reduced significantly. Finally, identification-independent quality metrics were shown to be predictive of identification sensitivity in these data sets. QuaMeter and the associated multivariate framework are available from http://fenchurch.mc.vanderbilt.edu and http://homepages.uc.edu/~wang2x7/ , respectively.",2014-02-17 +25861967,GeneTIER: prioritization of candidate disease genes using tissue-specific gene expression profiles.,"

Motivation

In attempts to determine the genetic causes of human disease, researchers are often faced with a large number of candidate genes. Linkage studies can point to a genomic region containing hundreds of genes, while the high-throughput sequencing approach will often identify a great number of non-synonymous genetic variants. Since systematic experimental verification of each such candidate gene is not feasible, a method is needed to decide which genes are worth investigating further. Computational gene prioritization presents itself as a solution to this problem, systematically analyzing and sorting each gene from the most to least likely to be the disease-causing gene, in a fraction of the time it would take a researcher to perform such queries manually.

Results

Here, we present Gene TIssue Expression Ranker (GeneTIER), a new web-based application for candidate gene prioritization. GeneTIER replaces knowledge-based inference traditionally used in candidate disease gene prioritization applications with experimental data from tissue-specific gene expression datasets and thus largely overcomes the bias toward the better characterized genes/diseases that commonly afflict other methods. We show that our approach is capable of accurate candidate gene prioritization and illustrate its strengths and weaknesses using case study examples.

Availability and implementation

Freely available on the web at http://dna.leeds.ac.uk/GeneTIER/.

Contact

umaan@leeds.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-09 +24909410,"MS Amanda, a universal identification algorithm optimized for high accuracy tandem mass spectra.","Today's highly accurate spectra provided by modern tandem mass spectrometers offer considerable advantages for the analysis of proteomic samples of increased complexity. Among other factors, the quantity of reliably identified peptides is considerably influenced by the peptide identification algorithm. While most widely used search engines were developed when high-resolution mass spectrometry data were not readily available for fragment ion masses, we have designed a scoring algorithm particularly suitable for high mass accuracy. Our algorithm, MS Amanda, is generally applicable to HCD, ETD, and CID fragmentation type data. The algorithm confidently explains more spectra at the same false discovery rate than Mascot or SEQUEST on examined high mass accuracy data sets, with excellent overlap and identical peptide sequence identification for most spectra also explained by Mascot or SEQUEST. MS Amanda, available at http://ms.imp.ac.at/?goto=msamanda , is provided free of charge both as standalone version for integration into custom workflows and as a plugin for the Proteome Discoverer platform.",2014-06-26 +25348212,Navigating protected genomics data with UCSC Genome Browser in a Box.,"

Unlabelled

Genome Browser in a Box (GBiB) is a small virtual machine version of the popular University of California Santa Cruz (UCSC) Genome Browser that can be run on a researcher's own computer. Once GBiB is installed, a standard web browser is used to access the virtual server and add personal data files from the local hard disk. Annotation data are loaded on demand through the Internet from UCSC or can be downloaded to the local computer for faster access.

Availability and implementation

Software downloads and installation instructions are freely available for non-commercial use at https://genome-store.ucsc.edu/. GBiB requires the installation of open-source software VirtualBox, available for all major operating systems, and the UCSC Genome Browser, which is open source and free for non-commercial use. Commercial use of GBiB and the Genome Browser requires a license (http://genome.ucsc.edu/license/).",2014-10-27 +24297542,"Drug-target interaction prediction by integrating chemical, genomic, functional and pharmacological data.","In silico prediction of unknown drug-target interactions (DTIs) has become a popular tool for drug repositioning and drug development. A key challenge in DTI prediction lies in integrating multiple types of data for accurate DTI prediction. Although recent studies have demonstrated that genomic, chemical and pharmacological data can provide reliable information for DTI prediction, it remains unclear whether functional information on proteins can also contribute to this task. Little work has been developed to combine such information with other data to identify new interactions between drugs and targets. In this paper, we introduce functional data into DTI prediction and construct biological space for targets using the functional similarity measure. We present a probabilistic graphical model, called conditional random field (CRF), to systematically integrate genomic, chemical, functional and pharmacological data plus the topology of DTI networks into a unified framework to predict missing DTIs. Tests on two benchmark datasets show that our method can achieve excellent prediction performance with the area under the precision-recall curve (AUPR) up to 94.9. These results demonstrate that our CRF model can successfully exploit heterogeneous data to capture the latent correlations of DTIs, and thus will be practically useful for drug repositioning. Supplementary Material is available at http://iiis.tsinghua.edu.cn/~compbio/papers/psb2014/psb2014_sm.pdf.",2014-01-01 +25161243,RidgeRace: ridge regression for continuous ancestral character estimation on phylogenetic trees.,"

Motivation

Ancestral character state reconstruction describes a set of techniques for estimating phenotypic or genetic features of species or related individuals that are the predecessors of those present today. Such reconstructions can reach into the distant past and can provide insights into the history of a population or a set of species when fossil data are not available, or they can be used to test evolutionary hypotheses, e.g. on the co-evolution of traits. Typical methods for ancestral character state reconstruction of continuous characters consider the phylogeny of the underlying data and estimate the ancestral process along the branches of the tree. They usually assume a Brownian motion model of character evolution or extensions thereof, requiring specific assumptions on the rate of phenotypic evolution.

Results

We suggest using ridge regression to infer rates for each branch of the tree and the ancestral values at each inner node. We performed extensive simulations to evaluate the performance of this method and have shown that the accuracy of its reconstructed ancestral values is competitive to reconstructions using other state-of-the-art software. Using a hierarchical clustering of gene mutation profiles from an ovarian cancer dataset, we demonstrate the use of the method as a feature selection tool.

Availability and implementation

The algorithm described here is implemented in C++ as a stand-alone program, and the source code is freely available at http://algbio.cs.uni-duesseldorf.de/software/RidgeRace.tar.gz.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +22135297,TarBase 6.0: capturing the exponential growth of miRNA targets with experimental support.,"As the relevant literature and the number of experiments increase at a super linear rate, databases that curate and collect experimentally verified microRNA (miRNA) targets have gradually emerged. These databases attempt to provide efficient access to this wealth of experimental data, which is scattered in thousands of manuscripts. Aim of TarBase 6.0 (http://www.microrna.gr/tarbase) is to face this challenge by providing a significant increase of available miRNA targets derived from all contemporary experimental techniques (gene specific and high-throughput), while incorporating a powerful set of tools in a user-friendly interface. TarBase 6.0 hosts detailed information for each miRNA-gene interaction, ranging from miRNA- and gene-related facts to information specific to their interaction, the experimental validation methodologies and their outcomes. All database entries are enriched with function-related data, as well as general information derived from external databases such as UniProt, Ensembl and RefSeq. DIANA microT miRNA target prediction scores and the relevant prediction details are available for each interaction. TarBase 6.0 hosts the largest collection of manually curated experimentally validated miRNA-gene interactions (more than 65,000 targets), presenting a 16.5-175-fold increase over other available manually curated databases.",2011-12-01 +26697344,Identification of differentially expressed genes between developing seeds of different soybean cultivars.,"Soybean is a major source of protein and oil and a primary feedstock for biodiesel production. Research on soybean seed composition and yield has revealed that protein, oil and yield are controlled quantitatively and quantitative trait loci (QTL) have been identified for each of these traits. However, very limited information is available regarding the genetic mechanisms controlling seed composition and yield. To help address this deficiency, we used Affymetrix Soybean GeneChips® to identify genes that are differentially expressed between developing seeds of the Minsoy and Archer soybean cultivars, which differ in seed weight, yield, protein content and oil content. A total of 700 probe sets were found to be expressed at significantly different (defined as having an adjusted p-value below or equal to 0.05 and an at least 2-fold difference) levels between the two cultivars at one or more of the three developmental stages and in at least one of the two years assayed. Comparison of data from soybeans collected in two different years revealed that 97 probe sets were expressed at significantly different levels in both years. Functional annotations were assigned to 78% of these 97 probe sets based on the SoyBase Affymetrix™ GeneChip® Soybean Genome Array Annotation. Genes involved in receptor binding/activity and protein binding are overrepresented among the group of 97 probe sets that were differentially expressed in both years assayed. Probe sets involved in growth/development, signal transduction, transcription, defense/stress response and protein and lipid metabolism were also identified among the 97 probe sets and their possible implications in the regulation of agronomic traits are discussed. As the Minsoy and Archer soybean cultivars differ with respect to seed size, yield, protein content and lipid content, some of the differentially expressed probe sets identified in this study may thus play important roles in controlling these traits. Others of these probe sets may be involved in regulation of general seed development or metabolism. All microarray data and expression values after GCRMA are available at the Gene Expression Omnibus (GEO) at NCBI (http://www.ncbi.nlm.nih.gov/geo), under accession number GSE21598.",2015-08-13 +26005412,Spindles in Svarog: framework and software for parametrization of EEG transients.,"We present a complete framework for time-frequency parametrization of EEG transients, based upon matching pursuit (MP) decomposition, applied to the detection of sleep spindles. Ranges of spindles duration (>0.5 s) and frequency (11-16 Hz) are taken directly from their standard definitions. Minimal amplitude is computed from the distribution of the root mean square (RMS) amplitude of the signal within the frequency band of sleep spindles. Detection algorithm depends on the choice of just one free parameter, which is a percentile of this distribution. Performance of detection is assessed on the first cohort/second subset of the Montreal Archive of Sleep Studies (MASS-C1/SS2). Cross-validation performed on the 19 available overnight recordings returned the optimal percentile of the RMS distribution close to 97 in most cases, and the following overall performance measures: sensitivity 0.63 ± 0.06, positive predictive value 0.47 ± 0.08, and Matthews coefficient of correlation 0.51 ± 0.04. These concordances are similar to the results achieved on this database by other automatic methods. Proposed detailed parametrization of sleep spindles within a universal framework, encompassing also other EEG transients, opens new possibilities of high resolution investigation of their relations and detailed characteristics. MP decomposition, selection of relevant structures, and simple creation of EEG profiles used previously for assessment of brain activity of patients in disorders of consciousness are implemented in a freely available software package Svarog (Signal Viewer, Analyzer and Recorder On GPL) with user-friendly, mouse-driven interface for review and analysis of EEG. Svarog can be downloaded from http://braintech.pl/svarog.",2015-05-08 +26843426,PON-mt-tRNA: a multifactorial probability-based method for classification of mitochondrial tRNA variations.,"Transfer RNAs (tRNAs) are essential for encoding the transcribed genetic information from DNA into proteins. Variations in the human tRNAs are involved in diverse clinical phenotypes. Interestingly, all pathogenic variations in tRNAs are located in mitochondrial tRNAs (mt-tRNAs). Therefore, it is crucial to identify pathogenic variations in mt-tRNAs for disease diagnosis and proper treatment. We collected mt-tRNA variations using a classification based on evidence from several sources and used the data to develop a multifactorial probability-based prediction method, PON-mt-tRNA, for classification of mt-tRNA single nucleotide substitutions. We integrated a machine learning-based predictor and an evidence-based likelihood ratio for pathogenicity using evidence of segregation, biochemistry and histochemistry to predict the posterior probability of pathogenicity of variants. The accuracy and Matthews correlation coefficient (MCC) of PON-mt-tRNA are 1.00 and 0.99, respectively. In the absence of evidence from segregation, biochemistry and histochemistry, PON-mt-tRNA classifies variations based on the machine learning method with an accuracy and MCC of 0.69 and 0.39, respectively. We classified all possible single nucleotide substitutions in all human mt-tRNAs using PON-mt-tRNA. The variations in the loops are more often tolerated compared to the variations in stems. The anticodon loop contains comparatively more predicted pathogenic variations than the other loops. PON-mt-tRNA is available at http://structure.bmc.lu.se/PON-mt-tRNA/.",2016-02-03 +25093069,CSEO - the Cigarette Smoke Exposure Ontology.,"

Background

In the past years, significant progress has been made to develop and use experimental settings for extensive data collection on tobacco smoke exposure and tobacco smoke exposure-associated diseases. Due to the growing number of such data, there is a need for domain-specific standard ontologies to facilitate the integration of tobacco exposure data.

Results

The CSEO (version 1.0) is composed of 20091 concepts. The ontology in its current form is able to capture a wide range of cigarette smoke exposure concepts within the knowledge domain of exposure science with a reasonable sensitivity and specificity. Moreover, it showed a promising performance when used to answer domain expert questions. The CSEO complies with standard upper-level ontologies and is freely accessible to the scientific community through a dedicated wiki at https://publicwiki-01.fraunhofer.de/CSEO-Wiki/index.php/Main_Page.

Conclusions

The CSEO has potential to become a widely used standard within the academic and industrial community. Mainly because of the emerging need of systems toxicology to controlled vocabularies and also the lack of suitable ontologies for this domain, the CSEO prepares the ground for integrative systems-based research in the exposure science.",2014-07-10 +23180784,RiceFREND: a platform for retrieving coexpressed gene networks in rice.,"Similarity of gene expression across a wide range of biological conditions can be efficiently used in characterization of gene function. We have constructed a rice gene coexpression database, RiceFREND (http://ricefrend.dna.affrc.go.jp/), to identify gene modules with similar expression profiles and provide a platform for more accurate prediction of gene functions. Coexpression analysis of 27 201 genes was performed against 815 microarray data derived from expression profiling of various organs and tissues at different developmental stages, mature organs throughout the growth from transplanting until harvesting in the field and plant hormone treatment conditions, using a single microarray platform. The database is provided with two search options, namely, 'single guide gene search' and 'multiple guide gene search' to efficiently retrieve information on coexpressed genes. A user-friendly web interface facilitates visualization and interpretation of gene coexpression networks in HyperTree, Cytoscape Web and Graphviz formats. In addition, analysis tools for identification of enriched Gene Ontology terms and cis-elements provide clue for better prediction of biological functions associated with the coexpressed genes. These features allow users to clarify gene functions and gene regulatory networks that could lead to a more thorough understanding of many complex agronomic traits.",2012-11-24 +22139933,IGDB.NSCLC: integrated genomic database of non-small cell lung cancer.,"Lung cancer is the most common cause of cancer-related mortality with more than 1.4 million deaths per year worldwide. To search for significant somatic alterations in lung cancer, we analyzed, integrated and manually curated various data sets and literatures to present an integrated genomic database of non-small cell lung cancer (IGDB.NSCLC, http://igdb.nsclc.ibms.sinica.edu.tw). We collected data sets derived from hundreds of human NSCLC (lung adenocarcinomas and/or squamous cell carcinomas) to illustrate genomic alterations [chromosomal regions with copy number alterations (CNAs), gain/loss and loss of heterozygosity], aberrant expressed genes and microRNAs, somatic mutations and experimental evidence and clinical information of alterations retrieved from literatures. IGDB.NSCLC provides user friendly interfaces and searching functions to display multiple layers of evidence especially emphasizing on concordant alterations of CNAs with co-localized altered gene expression, aberrant microRNAs expression, somatic mutations or genes with associated clinicopathological features. These significant concordant alterations in NSCLC are graphically or tabularly presented to facilitate and prioritize as the putative cancer targets for pathological and mechanistic studies of lung tumorigenesis and for developing new strategies in clinical interventions.",2011-12-01 +26819345,Role of Increased n-acetylaspartate Levels in Cancer.,"

Background

The clinical and biological effects of metabolic alterations in cancer are not fully understood.

Methods

In high-grade serous ovarian cancer (HGSOC) samples (n = 101), over 170 metabolites were profiled and compared with normal ovarian tissues (n = 15). To determine NAT8L gene expression across different cancer types, we analyzed the RNA expression of cancer types using RNASeqV2 data available from the open access The Cancer Genome Atlas (TCGA) website (http://www.cbioportal.org/public-portal/). Using NAT8L siRNA, molecular techniques and histological analysis, we determined cancer cell viability, proliferation, apoptosis, and tumor growth in in vitro and in vivo (n = 6-10 mice/group) settings. Data were analyzed with the Student's t test and Kaplan-Meier analysis. Statistical tests were two-sided.

Results

Patients with high levels of tumoral NAA and its biosynthetic enzyme, aspartate N-acetyltransferase (NAT8L), had worse overall survival than patients with low levels of NAA and NAT8L. The overall survival duration of patients with higher-than-median NAA levels (3.6 years) was lower than that of patients with lower-than-median NAA levels (5.1 years, P = .03). High NAT8L gene expression in other cancers (melanoma, renal cell, breast, colon, and uterine cancers) was associated with worse overall survival. NAT8L silencing reduced cancer cell viability (HEYA8: control siRNA 90.61% ± 2.53, NAT8L siRNA 39.43% ± 3.00, P < .001; A2780: control siRNA 90.59% ± 2.53, NAT8L siRNA 7.44% ± 1.71, P < .001) and proliferation (HEYA8: control siRNA 74.83% ± 0.92, NAT8L siRNA 55.70% ± 1.54, P < .001; A2780: control siRNA 50.17% ± 4.13, NAT8L siRNA 26.52% ± 3.70, P < .001), which was rescued by addition of NAA. In orthotopic mouse models (ovarian cancer and melanoma), NAT8L silencing reduced tumor growth statistically significantly (A2780: control siRNA 0.52 g ± 0.15, NAT8L siRNA 0.08 g ± 0.17, P < .001; HEYA8: control siRNA 0.79 g ± 0.42, NAT8L siRNA 0.24 g ± 0.18, P = .008, A375-SM: control siRNA 0.55 g ± 0.22, NAT8L siRNA 0.21 g ± 0.17 g, P = .001). NAT8L silencing downregulated the anti-apoptotic pathway, which was mediated through FOXM1.

Conclusion

These findings indicate that the NAA pathway has a prominent role in promoting tumor growth and represents a valuable target for anticancer therapy.Altered energy metabolism is a hallmark of cancer (1). Proliferating cancer cells have much greater metabolic requirements than nonproliferating differentiated cells (2,3). Moreover, altered cancer metabolism elevates unique metabolic intermediates, which can promote cancer survival and progression (4,5). Furthermore, emerging evidence suggests that proliferating cancer cells exploit alternative metabolic pathways to meet their high demand for energy and to accumulate biomass (6-8).",2016-01-26 +25953080,CerebralWeb: a Cytoscape.js plug-in to visualize networks stratified by subcellular localization.,"CerebralWeb is a light-weight JavaScript plug-in that extends Cytoscape.js to enable fast and interactive visualization of molecular interaction networks stratified based on subcellular localization or other user-supplied annotation. The application is designed to be easily integrated into any website and is configurable to support customized network visualization. CerebralWeb also supports the automatic retrieval of Cerebral-compatible localizations for human, mouse and bovine genes via a web service and enables the automated parsing of Cytoscape compatible XGMML network files. CerebralWeb currently supports embedded network visualization on the InnateDB (www.innatedb.com) and Allergy and Asthma Portal (allergen.innatedb.com) database and analysis resources. Database tool URL: http://www.innatedb.com/CerebralWeb",2015-05-07 +25161225,Broad-Enrich: functional interpretation of large sets of broad genomic regions.,"

Motivation

Functional enrichment testing facilitates the interpretation of Chromatin immunoprecipitation followed by high-throughput sequencing (ChIP-seq) data in terms of pathways and other biological contexts. Previous methods developed and used to test for key gene sets affected in ChIP-seq experiments treat peaks as points, and are based on the number of peaks associated with a gene or a binary score for each gene. These approaches work well for transcription factors, but histone modifications often occur over broad domains, and across multiple genes.

Results

To incorporate the unique properties of broad domains into functional enrichment testing, we developed Broad-Enrich, a method that uses the proportion of each gene's locus covered by a peak. We show that our method has a well-calibrated false-positive rate, performing well with ChIP-seq data having broad domains compared with alternative approaches. We illustrate Broad-Enrich with 55 ENCODE ChIP-seq datasets using different methods to define gene loci. Broad-Enrich can also be applied to other datasets consisting of broad genomic domains such as copy number variations.

Availability and implementation

http://broad-enrich.med.umich.edu for Web version and R package.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +23180787,SUBA3: a database for integrating experimentation and prediction to define the SUBcellular location of proteins in Arabidopsis.,"The subcellular location database for Arabidopsis proteins (SUBA3, http://suba.plantenergy.uwa.edu.au) combines manual literature curation of large-scale subcellular proteomics, fluorescent protein visualization and protein-protein interaction (PPI) datasets with subcellular targeting calls from 22 prediction programs. More than 14 500 new experimental locations have been added since its first release in 2007. Overall, nearly 650 000 new calls of subcellular location for 35 388 non-redundant Arabidopsis proteins are included (almost six times the information in the previous SUBA version). A re-designed interface makes the SUBA3 site more intuitive and easier to use than earlier versions and provides powerful options to search for PPIs within the context of cell compartmentation. SUBA3 also includes detailed localization information for reference organelle datasets and incorporates green fluorescent protein (GFP) images for many proteins. To determine as objectively as possible where a particular protein is located, we have developed SUBAcon, a Bayesian approach that incorporates experimental localization and targeting prediction data to best estimate a protein's location in the cell. The probabilities of subcellular location for each protein are provided and displayed as a pictographic heat map of a plant cell in SUBA3.",2012-11-24 +25948580,PACCMIT/PACCMIT-CDS: identifying microRNA targets in 3' UTRs and coding sequences.,"The purpose of the proposed web server, publicly available at http://paccmit.epfl.ch, is to provide a user-friendly interface to two algorithms for predicting messenger RNA (mRNA) molecules regulated by microRNAs: (i) PACCMIT (Prediction of ACcessible and/or Conserved MIcroRNA Targets), which identifies primarily mRNA transcripts targeted in their 3' untranslated regions (3' UTRs), and (ii) PACCMIT-CDS, designed to find mRNAs targeted within their coding sequences (CDSs). While PACCMIT belongs among the accurate algorithms for predicting conserved microRNA targets in the 3' UTRs, the main contribution of the web server is 2-fold: PACCMIT provides an accurate tool for predicting targets also of weakly conserved or non-conserved microRNAs, whereas PACCMIT-CDS addresses the lack of similar portals adapted specifically for targets in CDS. The web server asks the user for microRNAs and mRNAs to be analyzed, accesses the precomputed P-values for all microRNA-mRNA pairs from a database for all mRNAs and microRNAs in a given species, ranks the predicted microRNA-mRNA pairs, evaluates their significance according to the false discovery rate and finally displays the predictions in a tabular form. The results are also available for download in several standard formats.",2015-05-06 +22892045,Web-based GIS: the vector-borne disease airline importation risk (VBD-AIR) tool.,"

Background

Over the past century, the size and complexity of the air travel network has increased dramatically. Nowadays, there are 29.6 million scheduled flights per year and around 2.7 billion passengers are transported annually. The rapid expansion of the network increasingly connects regions of endemic vector-borne disease with the rest of the world, resulting in challenges to health systems worldwide in terms of vector-borne pathogen importation and disease vector invasion events. Here we describe the development of a user-friendly Web-based GIS tool: the Vector-Borne Disease Airline Importation Risk Tool (VBD-AIR), to help better define the roles of airports and airlines in the transmission and spread of vector-borne diseases.

Methods

Spatial datasets on modeled global disease and vector distributions, as well as climatic and air network traffic data were assembled. These were combined to derive relative risk metrics via air travel for imported infections, imported vectors and onward transmission, and incorporated into a three-tier server architecture in a Model-View-Controller framework with distributed GIS components. A user-friendly web-portal was built that enables dynamic querying of the spatial databases to provide relevant information.

Results

The VBD-AIR tool constructed enables the user to explore the interrelationships among modeled global distributions of vector-borne infectious diseases (malaria. dengue, yellow fever and chikungunya) and international air service routes to quantify seasonally changing risks of vector and vector-borne disease importation and spread by air travel, forming an evidence base to help plan mitigation strategies. The VBD-AIR tool is available at http://www.vbd-air.com.

Conclusions

VBD-AIR supports a data flow that generates analytical results from disparate but complementary datasets into an organized cartographical presentation on a web map for the assessment of vector-borne disease movements on the air travel network. The framework built provides a flexible and robust informatics infrastructure by separating the modules of functionality through an ontological model for vector-borne disease. The VBD‒AIR tool is designed as an evidence base for visualizing the risks of vector-borne disease by air travel for a wide range of users, including planners and decisions makers based in state and local government, and in particular, those at international and domestic airports tasked with planning for health risks and allocating limited resources.",2012-08-14 +22303335,MIRNA-DISTILLER: A Stand-Alone Application to Compile microRNA Data from Databases.,"MicroRNAs (miRNA) are small non-coding RNA molecules of ∼22 nucleotides which regulate large numbers of genes by binding to seed sequences at the 3'-untranslated region of target gene transcripts. The target mRNA is then usually degraded or translation is inhibited, although thus resulting in posttranscriptional down regulation of gene expression at the mRNA and/or protein level. Due to the bioinformatic difficulties in predicting functional miRNA binding sites, several publically available databases have been developed that predict miRNA binding sites based on different algorithms. The parallel use of different databases is currently indispensable, but highly uncomfortable and time consuming, especially when working with numerous genes of interest. We have therefore developed a new stand-alone program, termed MIRNA-DISTILLER, which allows to compile miRNA data for given target genes from public databases. Currently implemented are TargetScan, microCosm, and miRDB, which may be queried independently, pairwise, or together to calculate the respective intersections. Data are stored locally for application of further analysis tools including freely definable biological parameter filters, customized output-lists for both miRNAs and target genes, and various graphical facilities. The software, a data example file and a tutorial are freely available at http://www.ikp-stuttgart.de/content/language1/html/10415.asp.",2011-07-08 +25724382,Efficient multiple-trait association and estimation of genetic correlation using the matrix-variate linear mixed model.,"Multiple-trait association mapping, in which multiple traits are used simultaneously in the identification of genetic variants affecting those traits, has recently attracted interest. One class of approaches for this problem builds on classical variance component methodology, utilizing a multitrait version of a linear mixed model. These approaches both increase power and provide insights into the genetic architecture of multiple traits. In particular, it is possible to estimate the genetic correlation, which is a measure of the portion of the total correlation between traits that is due to additive genetic effects. Unfortunately, the practical utility of these methods is limited since they are computationally intractable for large sample sizes. In this article, we introduce a reformulation of the multiple-trait association mapping approach by defining the matrix-variate linear mixed model. Our approach reduces the computational time necessary to perform maximum-likelihood inference in a multiple-trait model by utilizing a data transformation. By utilizing a well-studied human cohort, we show that our approach provides more than a 10-fold speedup, making multiple-trait association feasible in a large population cohort on the genome-wide scale. We take advantage of the efficiency of our approach to analyze gene expression data. By decomposing gene coexpression into a genetic and environmental component, we show that our method provides fundamental insights into the nature of coexpressed genes. An implementation of this method is available at http://genetics.cs.ucla.edu/mvLMM.",2015-02-27 +25943549,CCTOP: a Consensus Constrained TOPology prediction web server.,"The Consensus Constrained TOPology prediction (CCTOP; http://cctop.enzim.ttk.mta.hu) server is a web-based application providing transmembrane topology prediction. In addition to utilizing 10 different state-of-the-art topology prediction methods, the CCTOP server incorporates topology information from existing experimental and computational sources available in the PDBTM, TOPDB and TOPDOM databases using the probabilistic framework of hidden Markov model. The server provides the option to precede the topology prediction with signal peptide prediction and transmembrane-globular protein discrimination. The initial result can be recalculated by (de)selecting any of the prediction methods or mapped experiments or by adding user specified constraints. CCTOP showed superior performance to existing approaches. The reliability of each prediction is also calculated, which correlates with the accuracy of the per protein topology prediction. The prediction results and the collected experimental information are visualized on the CCTOP home page and can be downloaded in XML format. Programmable access of the CCTOP server is also available, and an example of client-side script is provided.",2015-05-05 +25943547,HMMER web server: 2015 update.,"The HMMER website, available at http://www.ebi.ac.uk/Tools/hmmer/, provides access to the protein homology search algorithms found in the HMMER software suite. Since the first release of the website in 2011, the search repertoire has been expanded to include the iterative search algorithm, jackhmmer. The continued growth of the target sequence databases means that traditional tabular representations of significant sequence hits can be overwhelming to the user. Consequently, additional ways of presenting homology search results have been developed, allowing them to be summarised according to taxonomic distribution or domain architecture. The taxonomy and domain architecture representations can be used in combination to filter the results according to the needs of a user. Searches can also be restricted prior to submission using a new taxonomic filter, which not only ensures that the results are specific to the requested taxonomic group, but also improves search performance. The repertoire of profile hidden Markov model libraries, which are used for annotation of query sequences with protein families and domains, has been expanded to include the libraries from CATH-Gene3D, PIRSF, Superfamily and TIGRFAMs. Finally, we discuss the relocation of the HMMER webserver to the European Bioinformatics Institute and the potential impact that this will have.",2015-05-05 +25433697,Exploiting hidden information interleaved in the redundancy of the genetic code without prior knowledge.,"

Motivation

Dozens of studies in recent years have demonstrated that codon usage encodes various aspects related to all stages of gene expression regulation. When relevant high-quality large-scale gene expression data are available, it is possible to statistically infer and model these signals, enabling analysing and engineering gene expression. However, when these data are not available, it is impossible to infer and validate such models.

Results

In this current study, we suggest Chimera-an unsupervised computationally efficient approach for exploiting hidden high-dimensional information related to the way gene expression is encoded in the open reading frame (ORF), based solely on the genome of the analysed organism. One version of the approach, named Chimera Average Repetitive Substring (ChimeraARS), estimates the adaptability of an ORF to the intracellular gene expression machinery of a genome (host), by computing its tendency to include long substrings that appear in its coding sequences; the second version, named ChimeraMap, engineers the codons of a protein such that it will include long substrings of codons that appear in the host coding sequences, improving its adaptation to a new host's gene expression machinery. We demonstrate the applicability of the new approach for analysing and engineering heterologous genes and for analysing endogenous genes. Specifically, focusing on Escherichia coli, we show that it can exploit information that cannot be detected by conventional approaches (e.g. the CAI-Codon Adaptation Index), which only consider single codon distributions; for example, we report correlations of up to 0.67 for the ChimeraARS measure with heterologous gene expression, when the CAI yielded no correlation.

Availability and implementation

For non-commercial purposes, the code of the Chimera approach can be downloaded from http://www.cs.tau.ac.il/∼tamirtul/Chimera/download.htm.

Contact

tamirtul@post.tau.ac.il

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-29 +27494442,Perinatal Exposure to Traffic-Related Air Pollution and Autism Spectrum Disorders.,"

Background

Studies from the United States indicate that exposure to air pollution in early life is associated with autism spectrum disorders (ASD) in children, but the evidence is not consistent with European data.

Objective

We aimed to investigate the association between exposure to air pollution from road traffic and the risk of ASD in children, with careful adjustment for socioeconomic and other confounders.

Method

Children born and residing in Stockholm, Sweden, during 1993-2007 with an ASD diagnosis were identified through multiple health registers and classified as cases (n = 5,136). A randomly selected sample of 18,237 children from the same study base constituted controls. Levels of nitrogen oxides (NOx) and particulate matter with diameter ≤ 10 μm (PM10) from road traffic were estimated at residential addresses during mother's pregnancy and the child's first year of life by dispersion models. Odds ratios (OR) and 95% confidence intervals (CI) for ASD with or without intellectual disability (ID) were estimated using logistic regression models after conditioning on municipality and calendar year of birth as well as adjustment for potential confounders.

Result

Air pollution exposure during the prenatal period was not associated with ASD overall (OR = 1.00; 95% CI: 0.86, 1.15 per 10-μg/m3 increase in PM10 and OR = 1.02; 95% CI: 0.94, 1.10 per 20-μg/m3 increase in NOx during mother's pregnancy). Similar results were seen for exposure during the first year of life, and for ASD in combination with ID. An inverse association between air pollution exposure and ASD risk was observed among children of mothers who moved to a new residence during pregnancy.

Conclusion

Early-life exposure to low levels of NOx and PM10 from road traffic does not appear to increase the risk of ASD. Citation: Gong T, Dalman C, Wicks S, Dal H, Magnusson C, Lundholm C, Almqvist C, Pershagen G. 2017. Perinatal exposure to traffic-related air pollution and autism spectrum disorders. Environ Health Perspect 125:119-126; http://dx.doi.org/10.1289/EHP118.",2016-08-05 +25598764,Molecular phenotyping of a UK population: defining the human serum metabolome.,"Phenotyping of 1,200 'healthy' adults from the UK has been performed through the investigation of diverse classes of hydrophilic and lipophilic metabolites present in serum by applying a series of chromatography-mass spectrometry platforms. These data were made robust to instrumental drift by numerical correction; this was prerequisite to allow detection of subtle metabolic differences. The variation in observed metabolite relative concentrations between the 1,200 subjects ranged from less than 5 % to more than 200 %. Variations in metabolites could be related to differences in gender, age, BMI, blood pressure, and smoking. Investigations suggest that a sample size of 600 subjects is both necessary and sufficient for robust analysis of these data. Overall, this is a large scale and non-targeted chromatographic MS-based metabolomics study, using samples from over 1,000 individuals, to provide a comprehensive measurement of their serum metabolomes. This work provides an important baseline or reference dataset for understanding the 'normal' relative concentrations and variation in the human serum metabolome. These may be related to our increasing knowledge of the human metabolic network map. Information on the Husermet study is available at http://www.husermet.org/. Importantly, all of the data are made freely available at MetaboLights (http://www.ebi.ac.uk/metabolights/).",2014-07-25 +27297238,Do Socioeconomic Factors and Race Determine the Likelihood of Breast-Conserving Surgery?,"

Background

Racial disparities in the use of breast-conserving surgery (BCS) have been reported and may be due to advanced stage at diagnosis. Our hypothesis was that low-income and ethnic minority patients have an increased tumor size at diagnosis and decreased likelihood of BCS.

Patients and methods

A retrospective review was conducted of early stage breast cancer patients from 10 hospitals in Harris County, Texas, between 2004 and 2011. Clinical stage was calculated on the basis of data from the institutional tumor registries and electronic medical records. Zip code-based socioeconomic factors were downloaded from the US Census Bureau (http://www.census.gov/). Linear regression was used to identify predictors of tumor size, and logistic regression was used to identify predictors of BCS.

Results

The cohort included 3937 patients, comprising 2546 (65%) whites, 535 (14%) African Americans, 482 (11%) Hispanics, and 374 (10%) Asian/others. Multivariate linear regression demonstrated socioeconomic status (SES), younger age, African American, Hispanic race, and hormone receptor-negative tumors to be associated with increased tumor size at diagnosis (P < .05). Hispanic and Asian/other race, larger tumor size, combined estrogen receptor-negative/progesterone receptor-negative tumors were associated with not receiving BCS.

Conclusion

Race and SES were both associated with larger tumor size at diagnosis. Larger tumor size, negative hormone receptor status, and Hispanic and Asian race were associated with lack of receipt of BCS. Breast cancer screening programs should target both minority and low SES groups. Rates of BCS should be interpreted cautiously when used as a quality metric because of the multiple factors, including tumor size and biology, contributing to its use.",2016-05-14 +25064564,Compression and fast retrieval of SNP data.,"

Motivation

The increasing interest in rare genetic variants and epistatic genetic effects on complex phenotypic traits is currently pushing genome-wide association study design towards datasets of increasing size, both in the number of studied subjects and in the number of genotyped single nucleotide polymorphisms (SNPs). This, in turn, is leading to a compelling need for new methods for compression and fast retrieval of SNP data.

Results

We present a novel algorithm and file format for compressing and retrieving SNP data, specifically designed for large-scale association studies. Our algorithm is based on two main ideas: (i) compress linkage disequilibrium blocks in terms of differences with a reference SNP and (ii) compress reference SNPs exploiting information on their call rate and minor allele frequency. Tested on two SNP datasets and compared with several state-of-the-art software tools, our compression algorithm is shown to be competitive in terms of compression rate and to outperform all tools in terms of time to load compressed data.

Availability and implementation

Our compression and decompression algorithms are implemented in a C++ library, are released under the GNU General Public License and are freely downloadable from http://www.dei.unipd.it/~sambofra/snpack.html.",2014-07-26 +26545824,Automatic generation of bioinformatics tools for predicting protein-ligand binding sites.,"

Motivation

Predictive tools that model protein-ligand binding on demand are needed to promote ligand research in an innovative drug-design environment. However, it takes considerable time and effort to develop predictive tools that can be applied to individual ligands. An automated production pipeline that can rapidly and efficiently develop user-friendly protein-ligand binding predictive tools would be useful.

Results

We developed a system for automatically generating protein-ligand binding predictions. Implementation of this system in a pipeline of Semantic Web technique-based web tools will allow users to specify a ligand and receive the tool within 0.5-1 day. We demonstrated high prediction accuracy for three machine learning algorithms and eight ligands.

Availability and implementation

The source code and web application are freely available for download at http://utprot.net They are implemented in Python and supported on Linux.

Contact

shimizu@bi.a.u-tokyo.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-05 +25940623,MapMyFlu: visualizing spatio-temporal relationships between related influenza sequences.,"Understanding the molecular dynamics of viral spreading is crucial for anticipating the epidemiological implications of disease outbreaks. In the case of influenza, reassortments or point mutations affect the adaption to new hosts or resistance to anti-viral drugs and can determine whether a new strain will result in a pandemic infection or a less severe progression. To this end, tools integrating molecular information with epidemiological parameters are important to understand how molecular characteristics reflect in the infection dynamics. We present a new web tool, MapMyFlu, which allows to spatially and temporally display influenza viruses related to a query sequence on a Google Map based on BLAST results against the NCBI Influenza Database. Temporal and geographical trends appear clearly and may help in reconstructing the evolutionary history of a particular sequence. The tool is accessible through a web server, hence without the need for local installation. The website has an intuitive design and provides an easy-to-use service, and is available at http://mapmyflu.ipmb.uni-heidelberg.de.",2015-05-04 +25940562,HTT-DB: horizontally transferred transposable elements database.,"

Motivation

Horizontal transfer of transposable (HTT) elements among eukaryotes was discovered in the mid-1980s. As then, >300 new cases have been described. New findings about HTT are revealing the evolutionary impact of this phenomenon on host genomes. In order to provide an up to date, interactive and expandable database for such events, we developed the HTT-DB database.

Results

HTT-DB allows easy access to most of HTT cases reported along with rich information about each case. Moreover, it allows the user to generate tables and graphs based on searches using Transposable elements and/or host species classification and export them in several formats.

Availability and implementation

This database is freely available on the web at http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase. HTT-DB was developed based on Java and MySQL with all major browsers supported. Tools and software packages used are free for personal or non-profit projects.

Contact

bdotto82@gmail.com or gabriel.wallau@gmail.com.",2015-05-04 +24860646,edgeR: a versatile tool for the analysis of shRNA-seq and CRISPR-Cas9 genetic screens.,"Pooled library sequencing screens that perturb gene function in a high-throughput manner are becoming increasingly popular in functional genomics research. Irrespective of the mechanism by which loss of function is achieved, via either RNA interference using short hairpin RNAs (shRNAs) or genetic mutation using single guide RNAs (sgRNAs) with the CRISPR-Cas9 system, there is a need to establish optimal analysis tools to handle such data. Our open-source processing pipeline in edgeR provides a complete analysis solution for screen data, that begins with the raw sequence reads and ends with a ranked list of candidate genes for downstream biological validation. We first summarize the raw data contained in a fastq file into a matrix of counts (samples in the columns, genes in the rows) with options for allowing mismatches and small shifts in sequence position. Diagnostic plots, normalization and differential representation analysis can then be performed using established methods to prioritize results in a statistically rigorous way, with the choice of either the classic exact testing methodology or generalized linear modeling that can handle complex experimental designs. A detailed users' guide that demonstrates how to analyze screen data in edgeR along with a point-and-click implementation of this workflow in Galaxy are also provided. The edgeR package is freely available from http://www.bioconductor.org.",2014-04-24 +22884117,Graft for prevention of Frey syndrome after parotidectomy: a systematic review and meta-analysis of randomized controlled trials.,"

Purpose

To detect the effect and safety of different types of grafts for the prevention of Frey syndrome after parotidectomy.

Materials and methods

The following data bases were searched electronically: MEDLINE (using OVID, from 1948 to July 2011), Cochrane Central Register of Controlled Trials (CENTRAL, issue 2, 2011), EMBASE (available from: http://embase.com, 1984 to July 2011), World Health Organization International Clinical Trials Registry Platform (July 2011), Chinese BioMedical Literature Database (1978 to July 2011), and the China National Knowledge Infrastructure (1994 to July 2011). The relevant journals and reference lists of the included studies were manually searched for randomized controlled trials studying the effect and safety of different types of grafts for preventing Frey syndrome after parotidectomy. The risk of bias assessment using Cochrane Collaboration's tool and data extraction was independently performed by 2 reviewers. The meta-analysis was performed using Review Manager, version 5.1.

Results

A total of 14 randomized clinical trials and 1,098 participants were included. All had an unclear risk of bias. The meta-analysis results showed that the use of an acellular dermis matrix can reduce by 82% the risk of Frey syndrome compared with the no-graft group using an objective assessment (relative risk [RR] 0.18, 95% confidence interval [CI] 0.12 to 0.26; P < .00001; Grading of Recommendations, Assessment, Development, and Evaluation [GRADE] quality of evidence: high). The acellular dermis matrix can also reduce by 90% the risk of Frey syndrome compared with the no-graft group using a subjective assessment (RR 0.10, 95% CI 0.05 to 0.22; P < .00001; GRADE quality of evidence: high). The muscle flaps can reduce by 81% the risk of Frey syndrome compared with the no-graft group (RR 0.19, 95% CI 0.13 to 0.27; P < .00001; GRADE quality of evidence: high). No statistically significant difference was found between the acellular dermal matrix and muscle flap groups (RR 0.73, 95% CI 0.15 to 3.53, P = .70; GRADE quality of evidence: low). No serious adverse events were reported.

Conclusions

The present clinical evidence suggests that grafts are effective in preventing Frey syndrome after parotidectomy. More randomized clinical trials are needed to confirm our conclusions and prove the safety of the grafts.",2012-08-11 +25054200,Mining TCGA data using Boolean implications.,"Boolean implications (if-then rules) provide a conceptually simple, uniform and highly scalable way to find associations between pairs of random variables. In this paper, we propose to use Boolean implications to find relationships between variables of different data types (mutation, copy number alteration, DNA methylation and gene expression) from the glioblastoma (GBM) and ovarian serous cystadenoma (OV) data sets from The Cancer Genome Atlas (TCGA). We find hundreds of thousands of Boolean implications from these data sets. A direct comparison of the relationships found by Boolean implications and those found by commonly used methods for mining associations show that existing methods would miss relationships found by Boolean implications. Furthermore, many relationships exposed by Boolean implications reflect important aspects of cancer biology. Examples of our findings include cis relationships between copy number alteration, DNA methylation and expression of genes, a new hierarchy of mutations and recurrent copy number alterations, loss-of-heterozygosity of well-known tumor suppressors, and the hypermethylation phenotype associated with IDH1 mutations in GBM. The Boolean implication results used in the paper can be accessed at http://crookneck.stanford.edu/microarray/TCGANetworks/.",2014-07-23 +22608002,RCDB: Renal Cancer Gene Database.,"

Background

Renal cell carcinoma or RCC is one of the common and most lethal urological cancers, with 40% of the patients succumbing to death because of metastatic progression of the disease. Treatment of metastatic RCC remains highly challenging because of its resistance to chemotherapy as well as radiotherapy, besides surgical resection. Whereas RCC comprises tumors with differing histological types, clear cell RCC remains the most common. A major problem in the clinical management of patients presenting with localized ccRCC is the inability to determine tumor aggressiveness and accurately predict the risk of metastasis following surgery. As a measure to improve the diagnosis and prognosis of RCC, researchers have identified several molecular markers through a number of techniques. However the wealth of information available is scattered in literature and not easily amenable to data-mining. To reduce this gap, this work describes a comprehensive repository called Renal Cancer Gene Database, as an integrated gateway to study renal cancer related data.

Findings

Renal Cancer Gene Database is a manually curated compendium of 240 protein-coding and 269 miRNA genes contributing to the etiology and pathogenesis of various forms of renal cell carcinomas. The protein coding genes have been classified according to the kind of gene alteration observed in RCC. RCDB also includes the miRNAsdysregulated in RCC, along with the corresponding information regarding the type of RCC and/or metastatic or prognostic significance. While some of the miRNA genes showed an association with other types of cancers few were unique to RCC. Users can query the database using keywords, category and chromosomal location of the genes. The knowledgebase can be freely accessed via a user-friendly web interface at http://www.juit.ac.in/attachments/jsr/rcdb/homenew.html.

Conclusions

It is hoped that this database would serve as a useful complement to the existing public resources and as a good starting point for researchers and physicians interested in RCC genetics.",2012-05-18 +24376038,Computational identification of protein binding sites on RNAs using high-throughput RNA structure-probing data.,"

Motivation

High-throughput sequencing has been used to probe RNA structures, by treating RNAs with reagents that preferentially cleave or mark certain nucleotides according to their local structures, followed by sequencing of the resulting fragments. The data produced contain valuable information for studying various RNA properties.

Results

We developed methods for statistically modeling these structure-probing data and extracting structural features from them. We show that the extracted features can be used to predict RNA 'zipcodes' in yeast, regions bound by the She complex in asymmetric localization. The prediction accuracy was better than using raw RNA probing data or sequence features. We further demonstrate the use of the extracted features in identifying binding sites of RNA binding proteins from whole-transcriptome global photoactivatable-ribonucleoside-enhanced cross-linking and immunopurification (gPAR-CLIP) data.

Availability

The source code of our implemented methods is available at http://yiplab.cse.cuhk.edu.hk/probrna/ CONTACT: kevinyip@cse.cuhk.edu.hk Supplementary information: Supplementary data are available at Bioinformatics online.",2013-12-27 +24875479,BiQ Analyzer HiMod: an interactive software tool for high-throughput locus-specific analysis of 5-methylcytosine and its oxidized derivatives.,"Recent data suggest important biological roles for oxidative modifications of methylated cytosines, specifically hydroxymethylation, formylation and carboxylation. Several assays are now available for profiling these DNA modifications genome-wide as well as in targeted, locus-specific settings. Here we present BiQ Analyzer HiMod, a user-friendly software tool for sequence alignment, quality control and initial analysis of locus-specific DNA modification data. The software supports four different assay types, and it leads the user from raw sequence reads to DNA modification statistics and publication-quality plots. BiQ Analyzer HiMod combines well-established graphical user interface of its predecessor tool, BiQ Analyzer HT, with new and extended analysis modes. BiQ Analyzer HiMod also includes updates of the analysis workspace, an intuitive interface, a custom vector graphics engine and support of additional input and output data formats. The tool is freely available as a stand-alone installation package from http://biq-analyzer-himod.bioinf.mpi-inf.mpg.de/.",2014-05-29 +23324169,Orchidstra: an integrated orchid functional genomics database.,"A specialized orchid database, named Orchidstra (URL: http://orchidstra.abrc.sinica.edu.tw), has been constructed to collect, annotate and share genomic information for orchid functional genomics studies. The Orchidaceae is a large family of Angiosperms that exhibits extraordinary biodiversity in terms of both the number of species and their distribution worldwide. Orchids exhibit many unique biological features; however, investigation of these traits is currently constrained due to the limited availability of genomic information. Transcriptome information for five orchid species and one commercial hybrid has been included in the Orchidstra database. Altogether, these comprise >380,000 non-redundant orchid transcript sequences, of which >110,000 are protein-coding genes. Sequences from the transcriptome shotgun assembly (TSA) were obtained either from output reads from next-generation sequencing technologies assembled into contigs, or from conventional cDNA library approaches. An annotation pipeline using Gene Ontology, KEGG and Pfam was built to assign gene descriptions and functional annotation to protein-coding genes. Deep sequencing of small RNA was also performed for Phalaenopsis aphrodite to search for microRNAs (miRNAs), extending the information archived for this species to miRNA annotation, precursors and putative target genes. The P. aphrodite transcriptome information was further used to design probes for an oligonucleotide microarray, and expression profiling analysis was carried out. The intensities of hybridized probes derived from microarray assays of various tissues were incorporated into the database as part of the functional evidence. In the future, the content of the Orchidstra database will be expanded with transcriptome data and genomic information from more orchid species.",2013-01-16 +24951946,RPPApipe: a pipeline for the analysis of reverse-phase protein array data.,"

Background and scope

Today, web-based data analysis pipelines exist for a wide variety of microarray platforms, such as ordinary gene-centered arrays, exon arrays and SNP arrays. However, most of the available software tools provide only limited support for reverse-phase protein arrays (RPPA), as relevant inherent properties of the corresponding datasets are not taken into account. Thus, we developed the web-based data analysis pipeline RPPApipe, which was specifically tailored to suit the characteristics of the RPPA platform and encompasses various tools for data preprocessing, statistical analysis, clustering and pathway analysis.

Implementation and performance

All tools which are part of the RPPApipe software were implemented using R/Bioconductor. The software was embedded into our web-based ZBIT Bioinformatics Toolbox which is a customized instance of the Galaxy platform.

Availability

RPPApipe is freely available under GNU Public License from http://webservices.cs.uni-tuebingen.de. A full documentation of the tool can be found on the corresponding website http://www.cogsys.cs.uni-tuebingen.de/software/RPPApipe.",2014-06-18 +25359894,Fast and accurate site frequency spectrum estimation from low coverage sequence data.,"

Motivation

The distribution of allele frequencies across polymorphic sites, also known as the site frequency spectrum (SFS), is of primary interest in population genetics. It is a complete summary of sequence variation at unlinked sites and more generally, its shape reflects underlying population genetic processes. One practical challenge is that inferring the SFS from low coverage sequencing data in a straightforward manner by using genotype calls can lead to significant bias. To reduce bias, previous studies have used a statistical method that directly estimates the SFS from sequencing data by first computing site allele frequency (SAF) likelihood for each site (i.e. the likelihood a site has each possible allele frequency conditional on observed sequence reads) using a dynamic programming (DP) algorithm. Although this method produces an accurate SFS, computing the SAF likelihood is quadratic in the number of samples sequenced.

Results

To overcome this computational challenge, we propose an algorithm, 'score-limited DP' algorithm, which is linear in the number of genomes to compute the SAF likelihood. This algorithm works because in a lower triangular matrix that arises in the DP algorithm, all non-negligible values of the SAF likelihood are concentrated on a few cells around the best-guess allele counts. We show that our score-limited DP algorithm has comparable accuracy but is faster than the original DP algorithm. This speed improvement makes SFS estimation practical when using low coverage NGS data from a large number of individuals.

Availability and implementation

The program will be available via a link from the Novembre lab website (http://jnpopgen.org/).",2014-10-30 +26759820,Expression of hepatic miRNAs targeting porcine glucocorticoid receptor (GR) 3'UTR in the neonatal piglets under a maternal gestational betaine supplementation.,"Glucocorticoid receptor (GR) has been previously demonstrated an important transcriptional factor of hepatic metabolic genes in the neonates under a maternal gestational betaine supplementation (""Gestational dietary betaine supplementation suppresses hepatic expression of lipogenic genes in neonatal piglets through epigenetic and glucocorticoid receptor-dependent mechanisms"" Cai et al., 2015 [1]). Here we provide accompanying data about the expression of hepatic miRNAs targeting porcine GR 3'UTR in the neonatal piglets. Liver samples were obtained and RNA was isolated. RNA was polyadenylated by poly (A) polymerase and then dissolved and reverse transcribed using poly (T) adapter. The diluted cDNA were used in each real-time PCR assay. The sequences of all the porcine miRNAs were acquired from miRBase (http://www.mirbase.org/). miRNAs targeting GR were predicted using the PITA algorithm. Among all the predicted miRNAs, 4 miRNAs targeting GR were quantitated by real-time PCR and miRNA-124a, which has been identified to target GR 3'UTR [2], [3], was more highly expressed in betaine-exposed neonatal livers.",2015-11-26 +25717191,GeNOSA: inferring and experimentally supporting quantitative gene regulatory networks in prokaryotes.,"

Motivation

The establishment of quantitative gene regulatory networks (qGRNs) through existing network component analysis (NCA) approaches suffers from shortcomings such as usage limitations of problem constraints and the instability of inferred qGRNs. The proposed GeNOSA framework uses a global optimization algorithm (OptNCA) to cope with the stringent limitations of NCA approaches in large-scale qGRNs.

Results

OptNCA performs well against existing NCA-derived algorithms in terms of utilization of connectivity information and reconstruction accuracy of inferred GRNs using synthetic and real Escherichia coli datasets. For comparisons with other non-NCA-derived algorithms, OptNCA without using known qualitative regulations is also evaluated in terms of qualitative assessments using a synthetic Saccharomyces cerevisiae dataset of the DREAM3 challenges. We successfully demonstrate GeNOSA in several applications including deducing condition-dependent regulations, establishing high-consensus qGRNs and validating a sub-network experimentally for dose-response and time-course microarray data, and discovering and experimentally confirming a novel regulation of CRP on AscG.

Availability and implementation

All datasets and the GeNOSA framework are freely available from http://e045.life.nctu.edu.tw/GeNOSA.

Contact

syho@mail.nctu.edu.tw

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-24 +21856736,PTPan--overcoming memory limitations in oligonucleotide string matching for primer/probe design.,"

Motivation

Nucleic acid diagnostics has high demands for non-heuristic exact and approximate oligonucleotide string matching concerning in silico primer/probe design in huge nucleic acid sequence collections. Unfortunately, public sequence repositories grow much faster than computer hardware performance and main memory capacity do. This growth imposes severe problems on existing oligonucleotide primer/probe design applications necessitating new approaches based on space-efficient indexing structures.

Results

We developed PTPan (spoken Peter Pan, 'PT' is for Position Tree, the earlier name of suffix trees), a space-efficient indexing structure for approximate oligonucleotide string matching in nucleic acid sequence data. Based on suffix trees, it combines partitioning, truncation and a new suffix tree stream compression to deal with large amounts of aligned and unaligned data. PTPan operates efficiently in main memory and on secondary storage, balancing between memory consumption and runtime during construction and application. Based on PTPan, applications supporting similarity search and primer/probe design have been implemented, namely FindFamily, ProbeMatch and ProbeDesign. All three use a weighted Levenshtein distance metric for approximative queries to find and rate matches with indels as well as substitutions. We integrated PTPan in the worldwide used software package ARB to demonstrate usability and performance. Comparing PTPan and the original ARB index for the very large ssu-rRNA database SILVA, we recognized a shorter construction time, extended functionality and dramatically reduced memory requirements at the price of expanded, but very reasonable query times. PTPan enables indexing of huge nucleic acid sequence collections at reasonable application response times. Not being limited by main memory, PTPan constitutes a major advancement regarding rapid oligonucleotide string matching in primer/probe design now and in the future facing the enormous growth of molecular sequence data.

Availability

Supplementary Material, PTPan stand-alone library and ARB-PTPan binary on http://ptpan.lrr.in.tum.de/.

Contact

meierh@in.tum.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-19 +27153701,Tally: a scoring tool for boundary determination between repetitive and non-repetitive protein sequences.,"

Motivation

Tandem Repeats (TRs) are abundant in proteins, having a variety of fundamental functions. In many cases, evolution has blurred their repetitive patterns. This leads to the problem of distinguishing between sequences that contain highly imperfect TRs, and the sequences without TRs. The 3D structure of proteins can be used as a benchmarking criterion for TR detection in sequences, because the vast majority of proteins having TRs in sequences are built of repetitive 3D structural blocks. According to our benchmark, none of the existing scoring methods are able to clearly distinguish, based on the sequence analysis, between structures with and without 3D TRs.

Results

We developed a scoring tool called Tally, which is based on a machine learning approach. Tally is able to achieve a better separation between sequences with structural TRs and sequences of aperiodic structures, than existing scoring procedures. It performs at a level of 81% sensitivity, while achieving a high specificity of 74% and an Area Under the Receiver Operating Characteristic Curve of 86%. Tally can be used to select a set of structurally and functionally meaningful TRs from all TRs detected in proteomes. The generated dataset is available for benchmarking purposes.

Availability and implementation

Source code is available upon request. Tool and dataset can be accessed through our website: http://bioinfo.montp.cnrs.fr/?r=Tally

Contact

andrey.kajava@crbm.cnrs.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-07 +22053089,InterEvol database: exploring the structure and evolution of protein complex interfaces.,"Capturing how the structures of interacting partners evolved at their binding interfaces is a fundamental issue for understanding interactomes evolution. In that scope, the InterEvol database was designed for exploring 3D structures of homologous interfaces of protein complexes. For every chain forming a complex in the protein data bank (PDB), close and remote structural interologs were identified providing essential snapshots for studying interfaces evolution. The database provides tools to retrieve and visualize these structures. In addition, pre-computed multiple sequence alignments of most likely interologs retrieved from a wide range of species can be downloaded to enrich the analysis. The database can be queried either directly by pdb code or keyword but also from the sequence of one or two partners. Interologs multiple sequence alignments can also be recomputed online with tailored parameters using the InterEvolAlign facility. Last, an InterEvol PyMol plugin was developed to improve interactive exploration of structures versus sequence alignments at the interfaces of complexes. Based on a series of automatic methods to extract structural and sequence data, the database will be monthly updated. Structures coordinates and sequence alignments can be queried and downloaded from the InterEvol web interface at http://biodev.cea.fr/interevol/.",2011-11-03 +26003108,Rare chromosome structural aberration characterizing oncology malignancy.,"

Unlabelled

Ring chromosome aberration are rare abnormality potentially involving any chromosome in patients diagnosing in Oncology. The present review and case study has focused on the ring chromosome associated with oncology malignancies.

Material and methods

An electronic peer review article search was performed systematically to obtain relevant literature with the CINAHL, Google scholar, and Pub Med databases. The keywords included marker, abnormalities, structural, Ring chromosome. The inclusion criteria for the review were that the documents were original quantitative research and published in English. This was also initiated using Medline, Mitelman database (http://cgap.nci.nih.gov/Chromosomes/Mitelman), Danish cytogenetic register and other pertinent web references on ring chromosomes in Oncology malignancies. Articles that were not directly relevant to the present objective were excluded. Also the un-stimulated bone marrow specimen of present case manipulated with Methotrexate cells culture synchronization and finally was treated by GTGbanding technique.

Results

Ring chromosome was observed in 10% of the total cells. Cytogenetic analysis demonstrated apparently ring (15) 46, XY, r(15) karyotype. The clinical findings revealed history of nausea, loss of appetite, diarrhea, night sweats, and a weight loss, anemia and diagnosed as accelerated CML.

Conclusion

Our finding adds to the spectrum of both morphology and genetic rearrangements in oncology malignancies. Additional future analyses in similar subject will be necessary to draw firm conclusions.",2015-05-01 +25933103,Metabolomic Profiling of the Nectars of Aquilegia pubescens and A. Canadensis.,"To date, variation in nectar chemistry of flowering plants has not been studied in detail. Such variation exerts considerable influence on pollinator-plant interactions, as well as on flower traits that play important roles in the selection of a plant for visitation by specific pollinators. Over the past 60 years the Aquilegia genus has been used as a key model for speciation studies. In this study, we defined the metabolomic profiles of flower samples of two Aquilegia species, A. Canadensis and A. pubescens. We identified a total of 75 metabolites that were classified into six main categories: organic acids, fatty acids, amino acids, esters, sugars, and unknowns. The mean abundances of 25 of these metabolites were significantly different between the two species, providing insights into interspecies variation in floral chemistry. Using the PlantSEED biochemistry database, we found that the majority of these metabolites are involved in biosynthetic pathways. Finally, we explored the annotated genome of A. coerulea, using the PlantSEED pipeline and reconstructed the metabolic network of Aquilegia. This network, which contains the metabolic pathways involved in generating the observed chemical variation, is now publicly available from the DOE Systems Biology Knowledge Base (KBase; http://kbase.us).",2015-05-01 +26357264,Improving Retrieval Efficacy of Homology Searches Using the False Discovery Rate.,"Over the past few decades, discovery based on sequence homology has become a widely accepted practice. Consequently, comparative accuracy of retrieval algorithms (e.g., BLAST) has been rigorously studied for improvement. Unlike most components of retrieval algorithms, the E-value threshold criterion has yet to be thoroughly investigated. An investigation of the threshold is important as it exclusively dictates which sequences are declared relevant and irrelevant. In this paper, we introduce the false discovery rate (FDR) statistic as a replacement for the uniform threshold criterion in order to improve efficacy in retrieval systems. Using NCBI's BLAST and PSI-BLAST software packages, we demonstrate the applicability of such a replacement in both non-iterative (BLASTFDR) and iterative (PSI-BLAST(FDR)) homology searches. For each application, we performed an evaluation of retrieval efficacy with five different multiple testing methods on a large training database. For each algorithm, we choose the best performing method, Benjamini-Hochberg, as the default statistic. As measured by the threshold average precision, BLAST(FDR) yielded 14.1 percent better retrieval performance than BLAST on a large (5,161 queries) test database and PSI-BLAST(FDR) attained 11.8 percent better retrieval performance than PSI-BLAST. The C++ source code specific to BLAST(FDR) and PSI-BLAST(FDR) and instructions are available at http://www.cs.mtsu.edu/~hcarroll/blast_fdr/.",2015-05-01 +26090295,Individualized Risk of Surgical Complications: An Application of the Breast Reconstruction Risk Assessment Score.,"

Background

Risk discussion is a central tenet of the dialogue between surgeon and patient. Risk calculators have recently offered a new way to integrate evidence-based practice into the discussion of individualized patient risk and expectation management. Focusing on the comprehensive Tracking Operations and Outcomes for Plastic Surgeons (TOPS) database, we endeavored to add plastic surgical outcomes to the previously developed Breast Reconstruction Risk Assessment (BRA) score.

Methods

The TOPS database from 2008 to 2011 was queried for patients undergoing breast reconstruction. Regression models were constructed for the following complications: seroma, dehiscence, surgical site infection (SSI), explantation, flap failure, reoperation, and overall complications.

Results

Of 11,992 cases, 4439 met inclusion criteria. Overall complication rate was 15.9%, with rates of 3.4% for seroma, 4.0% for SSI, 6.1% for dehiscence, 3.7% for explantation, 7.0% for flap loss, and 6.4% for reoperation. Individualized risk models were developed with acceptable goodness of fit, accuracy, and internal validity. Distribution of overall complication risk was broad and asymmetric, meaning that the average risk was often a poor estimate of the risk for any given patient. These models were added to the previously developed open-access version of the risk calculator, available at http://www.BRAscore.org.

Conclusions

Population-based measures of risk may not accurately reflect risk for many individual patients. In this era of increasing emphasis on evidence-based medicine, we have developed a breast reconstruction risk assessment calculator from the robust TOPS database. The BRA Score tool can aid in individualizing-and quantifying-risk to better inform surgical decision making and better manage patient expectations.",2015-05-01 +27346987,IntraFace. ,"Within the last 20 years, there has been an increasing interest in the computer vision community in automated facial image analysis algorithms. This has been driven by applications in animation, market research, autonomous-driving, surveillance, and facial editing among others. To date, there exist several commercial packages for specific facial image analysis tasks such as facial expression recognition, facial attribute analysis or face tracking. However, free and easy-to-use software that incorporates all these functionalities is unavailable. This paper presents IntraFace (IF), a publicly-available software package for automated facial feature tracking, head pose estimation, facial attribute recognition, and facial expression analysis from video. In addition, IFincludes a newly develop technique for unsupervised synchrony detection to discover correlated facial behavior between two or more persons, a relatively unexplored problem in facial image analysis. In tests, IF achieved state-of-the-art results for emotion expression and action unit detection in three databases, FERA, CK+ and RU-FACS; measured audience reaction to a talk given by one of the authors; and discovered synchrony for smiling in videos of parent-infant interaction. IF is free of charge for academic use at http://www.humansensing.cs.cmu.edu/intraface/.",2015-05-01 +25927199,Optimally choosing PWM motif databases and sequence scanning approaches based on ChIP-seq data.,"

Background

For many years now, binding preferences of Transcription Factors have been described by so called motifs, usually mathematically defined by position weight matrices or similar models, for the purpose of predicting potential binding sites. However, despite the availability of thousands of motif models in public and commercial databases, a researcher who wants to use them is left with many competing methods of identifying potential binding sites in a genome of interest and there is little published information regarding the optimality of different choices. Thanks to the availability of large number of different motif models as well as a number of experimental datasets describing actual binding of TFs in hundreds of TF-ChIP-seq pairs, we set out to perform a comprehensive analysis of this matter.

Results

We focus on the task of identifying potential transcription factor binding sites in the human genome. Firstly, we provide a comprehensive comparison of the coverage and quality of models available in different databases, showing that the public databases have comparable TFs coverage and better motif performance than commercial databases. Secondly, we compare different motif scanners showing that, regardless of the database used, the tools developed by the scientific community outperform the commercial tools. Thirdly, we calculate for each motif a detection threshold optimizing the accuracy of prediction. Finally, we provide an in-depth comparison of different methods of choosing thresholds for all motifs a priori. Surprisingly, we show that selecting a common false-positive rate gives results that are the least biased by the information content of the motif and therefore most uniformly accurate.

Conclusion

We provide a guide for researchers working with transcription factor motifs. It is supplemented with detailed results of the analysis and the benchmark datasets at http://bioputer.mimuw.edu.pl/papers/motifs/ .",2015-05-01 +24167589,Experimental design-based functional mining and characterization of high-throughput sequencing data in the sequence read archive.,"High-throughput sequencing technology, also called next-generation sequencing (NGS), has the potential to revolutionize the whole process of genome sequencing, transcriptomics, and epigenetics. Sequencing data is captured in a public primary data archive, the Sequence Read Archive (SRA). As of January 2013, data from more than 14,000 projects have been submitted to SRA, which is double that of the previous year. Researchers can download raw sequence data from SRA website to perform further analyses and to compare with their own data. However, it is extremely difficult to search entries and download raw sequences of interests with SRA because the data structure is complicated, and experimental conditions along with raw sequences are partly described in natural language. Additionally, some sequences are of inconsistent quality because anyone can submit sequencing data to SRA with no quality check. Therefore, as a criterion of data quality, we focused on SRA entries that were cited in journal articles. We extracted SRA IDs and PubMed IDs (PMIDs) from SRA and full-text versions of journal articles and retrieved 2748 SRA ID-PMID pairs. We constructed a publication list referring to SRA entries. Since, one of the main themes of -omics analyses is clarification of disease mechanisms, we also characterized SRA entries by disease keywords, according to the Medical Subject Headings (MeSH) extracted from articles assigned to each SRA entry. We obtained 989 SRA ID-MeSH disease term pairs, and constructed a disease list referring to SRA data. We previously developed feature profiles of diseases in a system called ""Gendoo"". We generated hyperlinks between diseases extracted from SRA and the feature profiles of it. The developed project, publication and disease lists resulting from this study are available at our web service, called ""DBCLS SRA"" (http://sra.dbcls.jp/). This service will improve accessibility to high-quality data from SRA.",2013-10-22 +26048599,Sparse multi-view matrix factorization: a multivariate approach to multiple tissue comparisons.,"

Motivation

Within any given tissue, gene expression levels can vary extensively among individuals. Such heterogeneity can be caused by genetic and epigenetic variability and may contribute to disease. The abundance of experimental data now enables the identification of features of gene expression profiles that are shared across tissues and those that are tissue-specific. While most current research is concerned with characterizing differential expression by comparing mean expression profiles across tissues, it is believed that a significant difference in a gene expression's variance across tissues may also be associated with molecular mechanisms that are important for tissue development and function.

Results

We propose a sparse multi-view matrix factorization (sMVMF) algorithm to jointly analyse gene expression measurements in multiple tissues, where each tissue provides a different 'view' of the underlying organism. The proposed methodology can be interpreted as an extension of principal component analysis in that it provides the means to decompose the total sample variance in each tissue into the sum of two components: one capturing the variance that is shared across tissues and one isolating the tissue-specific variances. sMVMF has been used to jointly model mRNA expression profiles in three tissues obtained from a large and well-phenotyped twins cohort, TwinsUK. Using sMVMF, we are able to prioritize genes based on whether their variation patterns are specific to each tissue. Furthermore, using DNA methylation profiles available, we provide supporting evidence that adipose-specific gene expression patterns may be driven by epigenetic effects.

Availability and implementation

Python code is available at http://wwwf.imperial.ac.uk/~gmontana/.

Contact

giovanni.montana@kcl.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-04 +23160416,Biocuration workflows and text mining: overview of the BioCreative 2012 Workshop Track II.,"Manual curation of data from the biomedical literature is a rate-limiting factor for many expert curated databases. Despite the continuing advances in biomedical text mining and the pressing needs of biocurators for better tools, few existing text-mining tools have been successfully integrated into production literature curation systems such as those used by the expert curated databases. To close this gap and better understand all aspects of literature curation, we invited submissions of written descriptions of curation workflows from expert curated databases for the BioCreative 2012 Workshop Track II. We received seven qualified contributions, primarily from model organism databases. Based on these descriptions, we identified commonalities and differences across the workflows, the common ontologies and controlled vocabularies used and the current and desired uses of text mining for biocuration. Compared to a survey done in 2009, our 2012 results show that many more databases are now using text mining in parts of their curation workflows. In addition, the workshop participants identified text-mining aids for finding gene names and symbols (gene indexing), prioritization of documents for curation (document triage) and ontology concept assignment as those most desired by the biocurators. DATABASE URL: http://www.biocreative.org/tasks/bc-workshop-2012/workflow/.",2012-11-17 +26671795,Cophylogenetic Reconciliation with ILP.,"In this paper, we present an integer linear programming (ILP) approach, called CoRe-ILP, for finding an optimal time consistent cophylogenetic host-parasite reconciliation under the cophylogenetic event model with the events cospeciation, duplication, sorting, host switch, and failure to diverge. Instead of assuming event costs, a simplified model is used, maximizing primarily for cospeciations and secondarily minimizing host switching events. Duplications, sortings, and failure to diverge events are not explicitly scored. Different from existing event based reconciliation methods, CoRe-ILP can use (approximate) phylogenetic branch lengths for filtering possible ancestral host-parasite interactions. Experimentally, it is shown that CoRe-ILP can successfully use branch length information and performs well for biological and simulated data sets. The results of CoRe-ILP are compared with the results of the reconciliation tools Jane 4, Treemap 3b, NOTUNG 2.8 Beta, and Ranger-DTL. Algorithm CoRe-ILP is implemented using IBM ILOG CPLEX Optimizer 12.6 and is freely available from http://pacosy.informatik.uni-leipzig.de/core-ilp.",2015-11-01 +22110027,DNAtraffic--a new database for systems biology of DNA dynamics during the cell life.,"DNAtraffic (http://dnatraffic.ibb.waw.pl/) is dedicated to be a unique comprehensive and richly annotated database of genome dynamics during the cell life. It contains extensive data on the nomenclature, ontology, structure and function of proteins related to the DNA integrity mechanisms such as chromatin remodeling, histone modifications, DNA repair and damage response from eight organisms: Homo sapiens, Mus musculus, Drosophila melanogaster, Caenorhabditis elegans, Saccharomyces cerevisiae, Schizosaccharomyces pombe, Escherichia coli and Arabidopsis thaliana. DNAtraffic contains comprehensive information on the diseases related to the assembled human proteins. DNAtraffic is richly annotated in the systemic information on the nomenclature, chemistry and structure of DNA damage and their sources, including environmental agents or commonly used drugs targeting nucleic acids and/or proteins involved in the maintenance of genome stability. One of the DNAtraffic database aim is to create the first platform of the combinatorial complexity of DNA network analysis. Database includes illustrations of pathways, damage, proteins and drugs. Since DNAtraffic is designed to cover a broad spectrum of scientific disciplines, it has to be extensively linked to numerous external data sources. Our database represents the result of the manual annotation work aimed at making the DNAtraffic much more useful for a wide range of systems biology applications.",2011-11-22 +26781368,DNA barcoding of fungi causing infections in humans and animals.,"Correct species identification is becoming increasingly important in clinical diagnostics. Till now, many mycological laboratories rely on conventional phenotypic identification. But this is slow and strongly operator-dependent. Therefore, to improve the quality of pathogen identification, rapid, reliable, and objective identification methods are essential. One of the most encouraging approaches is molecular barcoding using the internal transcribed spacer (ITS) of the rDNA, which is rapid, easily achievable, accurate, and applicable directly from clinical specimens. It relies on the comparison of a single ITS sequence with a curated reference database. The International Society for Human and Animal Mycology (ISHAM) working group for DNA barcoding has recently established such a database, focusing on the majority of human and animal pathogenic fungi (ISHAM-ITS, freely accessible at http://www.isham.org/ or directly from http://its.mycologylab.org). For some fungi the use of secondary barcodes may be necessary.",2015-04-30 +27557093,Climate Change and Future Pollen Allergy in Europe.,"

Background

Globally, pollen allergy is a major public health problem, but a fundamental unknown is the likely impact of climate change. To our knowledge, this is the first study to quantify the consequences of climate change upon pollen allergy in humans.

Objectives

We produced quantitative estimates of the potential impact of climate change upon pollen allergy in humans, focusing upon common ragweed (Ambrosia artemisiifolia) in Europe.

Methods

A process-based model estimated the change in ragweed's range under climate change. A second model simulated current and future ragweed pollen levels. These findings were translated into health burdens using a dose-response curve generated from a systematic review and from current and future population data. Models considered two different suites of regional climate/pollen models, two greenhouse gas emissions scenarios [Representative Concentration Pathways (RCPs) 4.5 and 8.5], and three different plant invasion scenarios.

Results

Our primary estimates indicated that sensitization to ragweed will more than double in Europe, from 33 to 77 million people, by 2041-2060. According to our projections, sensitization will increase in countries with an existing ragweed problem (e.g., Hungary, the Balkans), but the greatest proportional increases will occur where sensitization is uncommon (e.g., Germany, Poland, France). Higher pollen concentrations and a longer pollen season may also increase the severity of symptoms. Our model projections were driven predominantly by changes in climate (66%) but were also influenced by current trends in the spread of this invasive plant species. Assumptions about the rate at which ragweed spreads throughout Europe had a large influence upon the results.

Conclusions

Our quantitative estimates indicate that ragweed pollen allergy will become a common health problem across Europe, expanding into areas where it is currently uncommon. Control of ragweed spread may be an important adaptation strategy in response to climate change. Citation: Lake IR, Jones NR, Agnew M, Goodess CM, Giorgi F, Hamaoui-Laguel L, Semenov MA, Solomon F, Storkey J, Vautard R, Epstein MM. 2017. Climate change and future pollen allergy in Europe. Environ Health Perspect 125:385-391; http://dx.doi.org/10.1289/EHP173.",2016-08-24 +25258492,Tbl2KnownGene: A command-line program to convert NCBI.tbl to UCSC knownGene.txt data file.,"

Unlabelled

The schema for UCSC Known Genes (knownGene.txt) has been widely adopted for use in both standard and custom downstream analysis tools/scripts. For many popular model organisms (e.g. Arabidopsis), sequence and annotation data tables (including ""knownGene.txt"") have not yet been made available to the public. Therefore, it is of interest to describe Tbl2KnownGene, a .tbl file parser that can process the contents of a NCBI .tbl file and produce a UCSC Known Genes annotation feature table. The algorithm is tested with chromosome datasets from Arabidopsis genome (TAIR10). The Tbl2KnownGene parser finds utility for data with other organisms having similar .tbl annotations.

Availability

Perl scripts and required input files are available on the web at http://thoth.indstate.edu/~ybai2/Tbl2KnownGene/ index.html.",2014-08-30 +24480173,SITDEM: a simulation tool for disease/endpoint models of association studies based on single nucleotide polymorphism genotypes.,"The association analysis between single nucleotide polymorphisms (SNPs) and disease or endpoint in genome-wide association studies (GWAS) has been considered as a powerful strategy for investigating genetic susceptibility and for identifying significant biomarkers. The statistical analysis approaches with simulated data have been widely used to review experimental designs and performance measurements. In recent years, a number of authors have proposed methods for the simulation of biological data in the genomic field. However, these methods use large-scale genomic data as a reference to simulate experiments, which may limit the use of the methods in the case where the data in specific studies are not available. Few methods use experimental results or observed parameters for simulation. The goal of this study is to develop a Web application called SITDEM to simulate disease/endpoint models in three different approaches based on only parameters observed in GWAS. In our simulation, a key task is to compute the probability of genotypes. Based on that, we randomly sample simulation data. Simulation results are shown as a function of p-value against odds ratio or relative risk of a SNP in dominant and recessive models. Our simulation results show the potential of SITDEM for simulating genotype data. SITDEM could be particularly useful for investigating the relationship among observed parameters for target SNPs and for estimating the number of variables (SNPs) required to result in significant p-values in multiple comparisons. The proposed simulation tool is freely available at http://www.snpmodel.com.",2013-12-19 +25925572,"PolySearch2: a significantly improved text-mining system for discovering associations between human diseases, genes, drugs, metabolites, toxins and more.","PolySearch2 (http://polysearch.ca) is an online text-mining system for identifying relationships between biomedical entities such as human diseases, genes, SNPs, proteins, drugs, metabolites, toxins, metabolic pathways, organs, tissues, subcellular organelles, positive health effects, negative health effects, drug actions, Gene Ontology terms, MeSH terms, ICD-10 medical codes, biological taxonomies and chemical taxonomies. PolySearch2 supports a generalized 'Given X, find all associated Ys' query, where X and Y can be selected from the aforementioned biomedical entities. An example query might be: 'Find all diseases associated with Bisphenol A'. To find its answers, PolySearch2 searches for associations against comprehensive collections of free-text collections, including local versions of MEDLINE abstracts, PubMed Central full-text articles, Wikipedia full-text articles and US Patent application abstracts. PolySearch2 also searches 14 widely used, text-rich biological databases such as UniProt, DrugBank and Human Metabolome Database to improve its accuracy and coverage. PolySearch2 maintains an extensive thesaurus of biological terms and exploits the latest search engine technology to rapidly retrieve relevant articles and databases records. PolySearch2 also generates, ranks and annotates associative candidates and present results with relevancy statistics and highlighted key sentences to facilitate user interpretation.",2015-04-29 +25926497,Global Multilocus Sequence Type Analysis of Chlamydia trachomatis Strains from 16 Countries.,"The Uppsala University Chlamydia trachomatis multilocus sequence type (MLST) database (http://mlstdb.bmc.uu.se) is based on five target regions (non-housekeeping genes) and the ompA gene. Each target has various numbers of alleles-hctB, 89; CT058, 51; CT144, 30; CT172, 38; and pbpB, 35-derived from 13 studies. Our aims were to perform an overall analysis of all C. trachomatis MLST sequence types (STs) in the database, examine STs with global spread, and evaluate the phylogenetic capability by using the five targets. A total of 415 STs were recognized from 2,089 specimens. The addition of 49 ompA gene variants created 459 profiles. ST variation and their geographical distribution were characterized using eBURST and minimum spanning tree analyses. There were 609 samples from men having sex with men (MSM), with 4 predominating STs detected in this group, comprising 63% of MSM cases. Four other STs predominated among 1,383 heterosexual cases comprising, 31% of this group. The diversity index in ocular trachoma cases was significantly lower than in sexually transmitted chlamydia infections. Predominating STs were identified in 12 available C. trachomatis whole genomes which were compared to 22 C. trachomatis full genomes without predominating STs. No specific gene in the 12 genomes with predominating STs could be linked to successful spread of certain STs. Phylogenetic analysis showed that MLST targets provide a tree similar to trees based on whole-genome analysis. The presented MLST scheme identified C. trachomatis strains with global spread. It provides a tool for epidemiological investigations and is useful for phylogenetic analyses.",2015-04-29 +27472835,IL-33 Drives Augmented Responses to Ozone in Obese Mice.,"

Background

Ozone increases IL-33 in the lungs, and obesity augments the pulmonary effects of acute ozone exposure.

Objectives

We assessed the role of IL-33 in the augmented effects of ozone observed in obese mice.

Methods

Lean wildtype and obese db/db mice were pretreated with antibodies blocking the IL-33 receptor, ST2, and then exposed to ozone (2 ppm for 3 hr). Airway responsiveness was assessed, bronchoalveolar lavage (BAL) was performed, and lung cells harvested for flow cytometry 24 hr later. Effects of ozone were also assessed in obese and lean mice deficient in γδ T cells and their wildtype controls.

Results and discussion

Ozone caused greater increases in BAL IL-33, neutrophils, and airway responsiveness in obese than lean mice. Anti-ST2 reduced ozone-induced airway hyperresponsiveness and inflammation in obese mice but had no effect in lean mice. Obesity also augmented ozone-induced increases in BAL CXCL1 and IL-6, and in BAL type 2 cytokines, whereas anti-ST2 treatment reduced these cytokines. In obese mice, ozone increased lung IL-13+ innate lymphoid cells type 2 (ILC2) and IL-13+ γδ T cells. Ozone increased ST2+ γδ T cells, indicating that these cells can be targets of IL-33, and γδ T cell deficiency reduced obesity-related increases in the response to ozone, including increases in type 2 cytokines.

Conclusions

Our data indicate that IL-33 contributes to augmented responses to ozone in obese mice. Obesity and ozone also interacted to promote type 2 cytokine production in γδ T cells and ILC2 in the lungs, which may contribute to the observed effects of IL-33. Citation: Mathews JA, Krishnamoorthy N, Kasahara DI, Cho Y, Wurmbrand AP, Ribeiro L, Smith D, Umetsu D, Levy BD, Shore SA. 2017. IL-33 drives augmented responses to ozone in obese mice. Environ Health Perspect 125:246-253; http://dx.doi.org/10.1289/EHP272.",2016-07-29 +26484273,"Draft genome sequence of Paenibacillus algorifonticola sp. nov., an antimicrobial-producing strain.","Paenibacillus algorifonticola sp. nov. is isolated from a cold spring sample from Xinjiang Uyghur Autonomous Region (China), a novel strain that can produce antimicrobial substance against human pathogenic bacteria and fungi, including Staphylococcus aureus and Candida albicans. Here we report a 7.60-Mb assembly of its genome sequence and other useful information, including the coding sequences (CDSs) responsible for the biosynthesis of antibacterial factors, anaerobic respiration and several immune-associated reactions. Also, prospective studies on P. algorifonticola sp. nov. in the cold spring might offer a potential source for the discovery of bioactive compounds with medical value. The data repository is deposited on the website http://www.ncbi.nlm.nih.gov/nuccore/LAQO00000000 and the accession number is LAQO00000000.",2015-07-02 +25536966,Disk-based compression of data from genome sequencing.,"

Motivation

High-coverage sequencing data have significant, yet hard to exploit, redundancy. Most FASTQ compressors cannot efficiently compress the DNA stream of large datasets, since the redundancy between overlapping reads cannot be easily captured in the (relatively small) main memory. More interesting solutions for this problem are disk based, where the better of these two, from Cox et al. (2012), is based on the Burrows-Wheeler transform (BWT) and achieves 0.518 bits per base for a 134.0 Gbp human genome sequencing collection with almost 45-fold coverage.

Results

We propose overlapping reads compression with minimizers, a compression algorithm dedicated to sequencing reads (DNA only). Our method makes use of a conceptually simple and easily parallelizable idea of minimizers, to obtain 0.317 bits per base as the compression ratio, allowing to fit the 134.0 Gbp dataset into only 5.31 GB of space.

Availability and implementation

http://sun.aei.polsl.pl/orcom under a free license.

Contact

sebastian.deorowicz@polsl.pl

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-22 +24650281,The characteristic direction: a geometrical approach to identify differentially expressed genes.,"

Background

Identifying differentially expressed genes (DEG) is a fundamental step in studies that perform genome wide expression profiling. Typically, DEG are identified by univariate approaches such as Significance Analysis of Microarrays (SAM) or Linear Models for Microarray Data (LIMMA) for processing cDNA microarrays, and differential gene expression analysis based on the negative binomial distribution (DESeq) or Empirical analysis of Digital Gene Expression data in R (edgeR) for RNA-seq profiling.

Results

Here we present a new geometrical multivariate approach to identify DEG called the Characteristic Direction. We demonstrate that the Characteristic Direction method is significantly more sensitive than existing methods for identifying DEG in the context of transcription factor (TF) and drug perturbation responses over a large number of microarray experiments. We also benchmarked the Characteristic Direction method using synthetic data, as well as RNA-Seq data. A large collection of microarray expression data from TF perturbations (73 experiments) and drug perturbations (130 experiments) extracted from the Gene Expression Omnibus (GEO), as well as an RNA-Seq study that profiled genome-wide gene expression and STAT3 DNA binding in two subtypes of diffuse large B-cell Lymphoma, were used for benchmarking the method using real data. ChIP-Seq data identifying DNA binding sites of the perturbed TFs, as well as known drug targets of the perturbing drugs, were used as prior knowledge silver-standard for validation. In all cases the Characteristic Direction DEG calling method outperformed other methods. We find that when drugs are applied to cells in various contexts, the proteins that interact with the drug-targets are differentially expressed and more of the corresponding genes are discovered by the Characteristic Direction method. In addition, we show that the Characteristic Direction conceptualization can be used to perform improved gene set enrichment analyses when compared with the gene-set enrichment analysis (GSEA) and the hypergeometric test.

Conclusions

The application of the Characteristic Direction method may shed new light on relevant biological mechanisms that would have remained undiscovered by the current state-of-the-art DEG methods. The method is freely accessible via various open source code implementations using four popular programming languages: R, Python, MATLAB and Mathematica, all available at: http://www.maayanlab.net/CD.",2014-03-21 +25399028,SYSBIONS: nested sampling for systems biology.,"

Motivation

Model selection is a fundamental part of the scientific process in systems biology. Given a set of competing hypotheses, we routinely wish to choose the one that best explains the observed data. In the Bayesian framework, models are compared via Bayes factors (the ratio of evidences), where a model's evidence is the support given to the model by the data. A parallel interest is inferring the distribution of the parameters that define a model. Nested sampling is a method for the computation of a model's evidence and the generation of samples from the posterior parameter distribution.

Results

We present a C-based, GPU-accelerated implementation of nested sampling that is designed for biological applications. The algorithm follows a standard routine with optional extensions and additional features. We provide a number of methods for sampling from the prior subject to a likelihood constraint.

Availability and implementation

The software SYSBIONS is available from http://www.theosysbio.bio.ic.ac.uk/resources/sysbions/

Contact

m.stumpf@imperial.ac.uk, robert.johnson11@imperial.ac.uk.",2014-10-16 +22319563,ECOMICS: a web-based toolkit for investigating the biomolecular web in ecosystems using a trans-omics approach.,"Ecosystems can be conceptually thought of as interconnected environmental and metabolic systems, in which small molecules to macro-molecules interact through diverse networks. State-of-the-art technologies in post-genomic science offer ways to inspect and analyze this biomolecular web using omics-based approaches. Exploring useful genes and enzymes, as well as biomass resources responsible for anabolism and catabolism within ecosystems will contribute to a better understanding of environmental functions and their application to biotechnology. Here we present ECOMICS, a suite of web-based tools for ECosystem trans-OMICS investigation that target metagenomic, metatranscriptomic, and meta-metabolomic systems, including biomacromolecular mixtures derived from biomass. ECOMICS is made of four integrated webtools. E-class allows for the sequence-based taxonomic classification of eukaryotic and prokaryotic ribosomal data and the functional classification of selected enzymes. FT2B allows for the digital processing of NMR spectra for downstream metabolic or chemical phenotyping. Bm-Char allows for statistical assignment of specific compounds found in lignocellulose-based biomass, and HetMap is a data matrix generator and correlation calculator that can be applied to trans-omics datasets as analyzed by these and other web tools. This web suite is unique in that it allows for the monitoring of biomass metabolism in a particular environment, i.e., from macromolecular complexes (FT2DB and Bm-Char) to microbial composition and degradation (E-class), and makes possible the understanding of relationships between molecular and microbial elements (HetMap). This website is available to the public domain at: https://database.riken.jp/ecomics/.",2012-02-01 +23203985,Dfam: a database of repetitive DNA based on profile hidden Markov models.,"We present a database of repetitive DNA elements, called Dfam (http://dfam.janelia.org). Many genomes contain a large fraction of repetitive DNA, much of which is made up of remnants of transposable elements (TEs). Accurate annotation of TEs enables research into their biology and can shed light on the evolutionary processes that shape genomes. Identification and masking of TEs can also greatly simplify many downstream genome annotation and sequence analysis tasks. The commonly used TE annotation tools RepeatMasker and Censor depend on sequence homology search tools such as cross_match and BLAST variants, as well as Repbase, a collection of known TE families each represented by a single consensus sequence. Dfam contains entries corresponding to all Repbase TE entries for which instances have been found in the human genome. Each Dfam entry is represented by a profile hidden Markov model, built from alignments generated using RepeatMasker and Repbase. When used in conjunction with the hidden Markov model search tool nhmmer, Dfam produces a 2.9% increase in coverage over consensus sequence search methods on a large human benchmark, while maintaining low false discovery rates, and coverage of the full human genome is 54.5%. The website provides a collection of tools and data views to support improved TE curation and annotation efforts. Dfam is also available for download in flat file format or in the form of MySQL table dumps.",2012-11-30 +23203891,2P2Idb: a structural database dedicated to orthosteric modulation of protein-protein interactions.,"Protein-protein interactions are considered as one of the next generation of therapeutic targets. Specific tools thus need to be developed to tackle this challenging chemical space. In an effort to derive some common principles from recent successes, we have built 2P2Idb (freely accessible at http://2p2idb.cnrs-mrs.fr), a hand-curated structural database dedicated to protein-protein interactions with known orthosteric modulators. It includes all interactions for which both the protein-protein and protein-ligand complexes have been structurally characterized. A web server provides links to related sites of interest, binding affinity data, pre-calculated structural information about protein-protein interfaces and 3D interactive views through java applets. Comparison of interfaces in 2P2Idb to those of representative datasets of heterodimeric complexes has led to the identification of geometrical parameters and residue properties to assess the druggability of protein-protein complexes. A tool is proposed to calculate a series of biophysical and geometrical parameters that characterize protein-protein interfaces. A large range of descriptors are computed including, buried accessible surface area, gap volume, non-bonded contacts, hydrogen-bonds, atom and residue composition, number of segments and secondary structure contribution. All together the 2P2I database represents a structural source of information for scientists from academic institutions or pharmaceutical industries.",2012-11-30 +23232527,[Construction of 3D model of CD28 chimeric antibody with its antigen docked].,"

Aim

To construct a 3D model of the chimeric antibodies (AntiCD28: ch-2F5) with corresponding antigen molecule docked to theoretically verify the rationality of the binding of antibody with its antigen and to provide a method of 3D identification between antigen and antibody and spatial structure analysis.

Methods

We analyzed the sequence by submitting it to http://www.ncbi.nlm.nih.gov/ and made a comparison using integratly the 3 databases of GenBank, Protein data bank and GENO-3D. The 3D model was constructed by Swiss-model homology modeling server and molecular docking online was performed by GRAMM-X Protein Docking Web Server. Chimeric heavy chain, light chain, heavy-light chain complex, heavy-light chain and antigen complex were displayed and photographed by the Chimera Software. Meanwhile, the spatial structures of heavy, light chains, variable region, constant region, CDR and frame area were marked by different colours respectively to exhibit the 3D structure on every side.

Results

The 3D structure of the heavy-light chain and antigen complex we constructed was consistent well with the theory of antigen binding to antibody molecules.

Conclusion

The structure of the chimeric antibody we constructed with the bioinformatic method was in accordance with the general structure of antibody, and its antigen binding site was also consistent with the molecular theory. Thus, the model helps to analyze the 3D structure of antibody and antigen-antibody interaction.",2012-12-01 +22086956,The SEQanswers wiki: a wiki database of tools for high-throughput sequencing analysis.,"Recent advances in sequencing technology have created unprecedented opportunities for biological research. However, the increasing throughput of these technologies has created many challenges for data management and analysis. As the demand for sophisticated analyses increases, the development time of software and algorithms is outpacing the speed of traditional publication. As technologies continue to be developed, methods change rapidly, making publications less relevant for users. The SEQanswers wiki (SEQwiki) is a wiki database that is actively edited and updated by the members of the SEQanswers community (http://SEQanswers.com/). The wiki provides an extensive catalogue of tools, technologies and tutorials for high-throughput sequencing (HTS), including information about HTS service providers. It has been implemented in MediaWiki with the Semantic MediaWiki and Semantic Forms extensions to collect structured data, providing powerful navigation and reporting features. Within 2 years, the community has created pages for over 500 tools, with approximately 400 literature references and 600 web links. This collaborative effort has made SEQwiki the most comprehensive database of HTS tools anywhere on the web. The wiki includes task-focused mini-reviews of commonly used tools, and a growing collection of more than 100 HTS service providers. SEQwiki is available at: http://wiki.SEQanswers.com/.",2011-11-15 +22086951,The UCSC Genome Browser database: extensions and updates 2011.,"The University of California Santa Cruz Genome Browser (http://genome.ucsc.edu) offers online public access to a growing database of genomic sequence and annotations for a wide variety of organisms. The Browser is an integrated tool set for visualizing, comparing, analyzing and sharing both publicly available and user-generated genomic data sets. In the past year, the local database has been updated with four new species assemblies, and we anticipate another four will be released by the end of 2011. Further, a large number of annotation tracks have been either added, updated by contributors, or remapped to the latest human reference genome. Among these are new phenotype and disease annotations, UCSC genes, and a major dbSNP update, which required new visualization methods. Growing beyond the local database, this year we have introduced 'track data hubs', which allow the Genome Browser to provide access to remotely located sets of annotations. This feature is designed to significantly extend the number and variety of annotation tracks that are publicly available for visualization and analysis from within our site. We have also introduced several usability features including track search and a context-sensitive menu of options available with a right-click anywhere on the Browser's image.",2011-11-15 +26958596,Analytical purification of a 60-kDa target protein of artemisinin detected in Trypanosoma brucei brucei.,"Here we describe the isolation and purity determination of Trypanosoma brucei (T. b.) brucei candidate target proteins of artemisinin. The candidate target proteins were detected and purified from their biological source (T. b. brucei lysate) using the diazirine-free biotinylated probe 5 for an affinity binding to a streptavidin-tagged resin and, subsequently, the labeled target proteins were purified by sodium dodecyl sulfate-polyacrylamide gel electrophoresis (SDS-PAGE). We herein showed the electrophoresis gel and the immunoblotting film containing the 60-kDa trypanosomal candidate target protein of artemisinin as a single band, which was visualized on-gel by the reverse-staining method and on a Western blotting film by enhanced chemiluminescence. The data provided in this article are related to the original research article ""Biotinylated probes of artemisinin with labeling affinity toward Trypanosoma brucei brucei target proteins"", by Konziase (Anal. Biochem., vol. 482, 2015, pp. 25-31. http://dx.doi.org/10.1016/j.ab.2015.04.020).",2015-10-03 +24813215,compcodeR--an R package for benchmarking differential expression methods for RNA-seq data.,"

Unlabelled

compcodeR is an R package for benchmarking of differential expression analysis methods, in particular, methods developed for analyzing RNA-seq data. The package provides functionality for simulating realistic RNA-seq count datasets, an interface to several of the most commonly used differential expression analysis methods and extensive functionality for evaluating and comparing different approaches on real and simulated data.

Availability and implementation

compcodeR is available from http://www.bioconductor.org/packages/release/bioc/html/compcodeR.html.",2014-05-09 +21492431,A quantitative literature-curated gold standard for kinase-substrate pairs.,"We describe the Yeast Kinase Interaction Database (KID, http://www.moseslab.csb.utoronto.ca/KID/), which contains high- and low-throughput data relevant to phosphorylation events. KID includes 6,225 low-throughput and 21,990 high-throughput interactions, from greater than 35,000 experiments. By quantitatively integrating these data, we identified 517 high-confidence kinase-substrate pairs that we consider a gold standard. We show that this gold standard can be used to assess published high-throughput datasets, suggesting that it will enable similar rigorous assessments in the future.",2011-04-14 +24497972,WBSA: web service for bisulfite sequencing data analysis.,"Whole-Genome Bisulfite Sequencing (WGBS) and genome-wide Reduced Representation Bisulfite Sequencing (RRBS) are widely used to study DNA methylation. However, data analysis is complicated, lengthy, and hampered by a lack of seamless analytical pipelines. To address these issues, we developed a convenient, stable, and efficient web service called Web Service for Bisulfite Sequencing Data Analysis (WBSA) to analyze bisulfate sequencing data. WBSA focuses on not only CpG methylation, which is the most common biochemical modification in eukaryotic DNA, but also non-CG methylation, which have been observed in plants, iPS cells, oocytes, neurons and stem cells of human. WBSA comprises three main modules as follows: WGBS data analysis, RRBS data analysis, and differentially methylated region (DMR) identification. The WGBS and RRBS modules execute read mapping, methylation site identification, annotation, and advanced analysis, whereas the DMR module identifies actual DMRs and annotates their correlations to genes. WBSA can be accessed and used without charge either online or local version. WBSA also includes the executables of the Portable Batch System (PBS) and standalone versions that can be downloaded from the website together with the installation instructions. WBSA is available at no charge for academic users at http://wbsa.big.ac.cn.",2014-01-30 +25907256,Exploring the Molecular Mechanism and Biomakers of Liver Cancer Based on Gene Expression Microarray.,"Liver cancer is one of the most common cancers worldwide with high morbidity and mortality. Its molecular mechanism hasn't been fully understood though many studies have been conducted and thus further researches are still needed to improve the prognosis of liver cancer. Firstly, differentially expressed genes (DEGs) between six Mdr2-knockout (Mdr2-KO) mutant mice samples (3-month-old and 12-month-old) and six control mice samples were identified. Then, the enriched GO terms and KEGG pathways of those DEGs were obtained using the Database for Annotation, Visualization and Integrated Discovery (DAVID, http://david.abcc.ncifcrf.gov/). Finally, protein-protein interactions (PPI) network of those DEGs were constructed using STRING database ( http://www.string-db.org/) and visualized by Cytoscape software, at the same time, genes with high degree were selected out. Several novel biomarkers that might play important roles in liver cancer were identified through the analysis of gene microarray in GEO. Also, some genes such as Tyrobp, Ctss and pathways such as Pathways in cancer, ECM-receptor interaction that had been researched previously were further confirmed in this study. Through the bioinformatics analysis of the gene microarray in GEO, we found some novel biomarkers of liver cancer and further confirmed some known biomarkers.",2015-04-25 +25477242,The Unipept metaproteomics analysis pipeline.,"Unipept (http://unipept.ugent.be) is a web application that offers a user-friendly way to explore the biodiversity of complex metaproteome samples by providing interactive visualizations. In this article, the updates and changes to Unipept since its initial release are presented. This includes the addition of interactive sunburst and treeview visualizations to the multipeptide analysis, the foundations of an application programming interface (API) and a command line interface, updated data sources, and the open-sourcing of the entire application under the MIT license.",2015-02-11 +22491036,Modeling protein evolution with several amino acid replacement matrices depending on site rates.,"Most protein substitution models use a single amino acid replacement matrix summarizing the biochemical properties of amino acids. However, site evolution is highly heterogeneous and depends on many factors that influence the substitution patterns. In this paper, we investigate the use of different substitution matrices for different site evolutionary rates. Indeed, the variability of evolutionary rates corresponds to one of the most apparent heterogeneity factors among sites, and there is no reason to assume that the substitution patterns remain identical regardless of the evolutionary rate. We first introduce LG4M, which is composed of four matrices, each corresponding to one discrete gamma rate category (of four). These matrices differ in their amino acid equilibrium distributions and in their exchangeabilities, contrary to the standard gamma model where only the global rate differs from one category to another. Next, we present LG4X, which also uses four different matrices, but leaves aside the gamma distribution and follows a distribution-free scheme for the site rates. All these matrices are estimated from a very large alignment database, and our two models are tested using a large sample of independent alignments. Detailed analysis of resulting matrices and models shows the complexity of amino acid substitutions and the advantage of flexible models such as LG4M and LG4X. Both significantly outperform single-matrix models, providing gains of dozens to hundreds of log-likelihood units for most data sets. LG4X obtains substantial gains compared with LG4M, thanks to its distribution-free scheme for site rates. Since LG4M and LG4X display such advantages but require the same memory space and have comparable running times to standard models, we believe that LG4M and LG4X are relevant alternatives to single replacement matrices. Our models, data, and software are available from http://www.atgc-montpellier.fr/models/lg4x.",2012-04-06 +25900537,Aerobic exercise to improve cognitive function in older people without known cognitive impairment.,"

Background

There is increasing evidence that physical activity supports healthy ageing. Exercise is helpful for cardiovascular, respiratory and musculoskeletal systems, among others. Aerobic activity, in particular, improves cardiovascular fitness and, based on recently reported findings, may also have beneficial effects on cognition among older people.

Objectives

To assess the effect of aerobic physical activity, aimed at improving cardiorespiratory fitness, on cognitive function in older people without known cognitive impairment.

Search methods

We searched ALOIS - the Cochrane Dementia and Cognitive Improvement Group's Specialized Register, the Cochrane Controlled Trials Register (CENTRAL) (all years to Issue 2 of 4, 2013), MEDLINE (Ovid SP 1946 to August 2013), EMBASE (Ovid SP 1974 to August 2013), PEDro, SPORTDiscus, Web of Science, PsycINFO (Ovid SP 1806 to August 2013), CINAHL (all dates to August 2013), LILACS (all dates to August 2013), World Health Organization (WHO) International Clinical Trials Registry Platform (ICTRP) (http://apps.who.int/trialsearch), ClinicalTrials.gov (https://clinicaltrials.gov) and Dissertation Abstracts International (DAI) up to 24 August 2013, with no language restrictions.

Selection criteria

We included all published randomised controlled trials (RCTs) comparing the effect on cognitive function of aerobic physical activity programmes with any other active intervention, or no intervention, in cognitively healthy participants aged over 55 years.

Data collection and analysis

Two review authors independently extracted the data from included trials. We grouped cognitive outcome measures into eleven categories covering attention, memory, perception, executive functions, cognitive inhibition, cognitive speed and motor function. We used the mean difference (or standardised mean difference) between groups as the measure of the treatment effect and synthesised data using a random-effects model. We conducted separate analyses to compare aerobic exercise interventions with no intervention and with other exercise, social or cognitive interventions. Also, we performed analyses including only trials in which an increase in the cardiovascular fitness of participants had been demonstrated.

Main results

Twelve trials including 754 participants met our inclusion criteria. Trials were from eight to 26 weeks in duration.We judged all trials to be at moderate or high risk of bias in at least some domains. Reporting of some risk of bias domains was poor.Our analyses comparing aerobic exercise to any active intervention showed no evidence of benefit from aerobic exercise in any cognitive domain. This was also true of our analyses comparing aerobic exercise to no intervention. Analysing only the subgroup of trials in which cardiorespiratory fitness improved in the aerobic exercise group showed that this improvement did not coincide with improvements in any cognitive domains assessed. Our subgroup analyses of aerobic exercise versus flexibility or balance interventions also showed no benefit of aerobic exercise in any cognitive domain.Dropout rates did not differ between aerobic exercise and control groups. No trial reported on adverse effects.Overall none of our analyses showed a cognitive benefit from aerobic exercise even when the intervention was shown to lead to improved cardiorespiratory fitness.

Authors' conclusions

We found no evidence in the available data from RCTs that aerobic physical activities, including those which successfully improve cardiorespiratory fitness, have any cognitive benefit in cognitively healthy older adults. Larger studies examining possible moderators are needed to confirm whether or not aerobic training improves cognition.",2015-04-22 +21875867,Predicting the outcome of renal transplantation.,"

Objective

Renal transplantation has dramatically improved the survival rate of hemodialysis patients. However, with a growing proportion of marginal organs and improved immunosuppression, it is necessary to verify that the established allocation system, mostly based on human leukocyte antigen matching, still meets today's needs. The authors turn to machine-learning techniques to predict, from donor-recipient data, the estimated glomerular filtration rate (eGFR) of the recipient 1 year after transplantation.

Design

The patient's eGFR was predicted using donor-recipient characteristics available at the time of transplantation. Donors' data were obtained from Eurotransplant's database, while recipients' details were retrieved from Charité Campus Virchow-Klinikum's database. A total of 707 renal transplantations from cadaveric donors were included.

Measurements

Two separate datasets were created, taking features with <10% missing values for one and <50% missing values for the other. Four established regressors were run on both datasets, with and without feature selection.

Results

The authors obtained a Pearson correlation coefficient between predicted and real eGFR (COR) of 0.48. The best model for the dataset was a Gaussian support vector machine with recursive feature elimination on the more inclusive dataset. All results are available at http://transplant.molgen.mpg.de/.

Limitations

For now, missing values in the data must be predicted and filled in. The performance is not as high as hoped, but the dataset seems to be the main cause.

Conclusions

Predicting the outcome is possible with the dataset at hand (COR=0.48). Valuable features include age and creatinine levels of the donor, as well as sex and weight of the recipient.",2011-08-28 +26807151,Prediction of donor splice sites using random forest with a new sequence encoding approach.,"

Background

Detection of splice sites plays a key role for predicting the gene structure and thus development of efficient analytical methods for splice site prediction is vital. This paper presents a novel sequence encoding approach based on the adjacent di-nucleotide dependencies in which the donor splice site motifs are encoded into numeric vectors. The encoded vectors are then used as input in Random Forest (RF), Support Vector Machines (SVM) and Artificial Neural Network (ANN), Bagging, Boosting, Logistic regression, kNN and Naïve Bayes classifiers for prediction of donor splice sites.

Results

The performance of the proposed approach is evaluated on the donor splice site sequence data of Homo sapiens, collected from Homo Sapiens Splice Sites Dataset (HS3D). The results showed that RF outperformed all the considered classifiers. Besides, RF achieved higher prediction accuracy than the existing methods viz., MEM, MDD, WMM, MM1, NNSplice and SpliceView, while compared using an independent test dataset.

Conclusion

Based on the proposed approach, we have developed an online prediction server (MaLDoSS) to help the biological community in predicting the donor splice sites. The server is made freely available at http://cabgrid.res.in:8080/maldoss. Due to computational feasibility and high prediction accuracy, the proposed approach is believed to help in predicting the eukaryotic gene structure.",2016-01-22 +26261224,SuccFind: a novel succinylation sites online prediction tool via enhanced characteristic strategy.,"

Unlabelled

Lysine succinylation orchestrates a variety of biological processes. Annotation of succinylation in proteomes is the first-crucial step to decipher physiological roles of succinylation implicated in the pathological processes. In this work, we developed a novel succinylation site online prediction tool, called SuccFind, which is constructed to predict the lysine succinylation sites based on two major categories of characteristics: sequence-derived features and evolutionary-derived information of sequence and via an enhanced feature strategy for further optimizations. The assessment results obtained from cross-validation suggest that SuccFind can provide more instructive guidance for further experimental investigation of protein succinylation.

Availability and implementation

A user-friendly server is freely available on the web at: http://bioinfo.ncu.edu.cn/SuccFind.aspx.

Contact

jdqiu@ncu.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-10 +23292636,TargetHunter: an in silico target identification tool for predicting therapeutic potential of small organic molecules based on chemogenomic database.,"Target identification of the known bioactive compounds and novel synthetic analogs is a very important research field in medicinal chemistry, biochemistry, and pharmacology. It is also a challenging and costly step towards chemical biology and phenotypic screening. In silico identification of potential biological targets for chemical compounds offers an alternative avenue for the exploration of ligand-target interactions and biochemical mechanisms, as well as for investigation of drug repurposing. Computational target fishing mines biologically annotated chemical databases and then maps compound structures into chemogenomical space in order to predict the biological targets. We summarize the recent advances and applications in computational target fishing, such as chemical similarity searching, data mining/machine learning, panel docking, and the bioactivity spectral analysis for target identification. We then described in detail a new web-based target prediction tool, TargetHunter (http://www.cbligand.org/TargetHunter). This web portal implements a novel in silico target prediction algorithm, the Targets Associated with its MOst SImilar Counterparts, by exploring the largest chemogenomical databases, ChEMBL. Prediction accuracy reached 91.1% from the top 3 guesses on a subset of high-potency compounds from the ChEMBL database, which outperformed a published algorithm, multiple-category models. TargetHunter also features an embedded geography tool, BioassayGeoMap, developed to allow the user easily to search for potential collaborators that can experimentally validate the predicted biological target(s) or off target(s). TargetHunter therefore provides a promising alternative to bridge the knowledge gap between biology and chemistry, and significantly boost the productivity of chemogenomics researchers for in silico drug design and discovery.",2013-01-05 +25907774,"Gene expression of OCT4, SOX2, KLF4 and MYC (OSKM) induced pluripotent stem cells: identification for potential mechanisms.","

Background

Somatic cells could be reprogrammed to induced pluripotent stem cells (iPS) by ectopic expression of OCT4, SOX2, KLF4 and MYC (OSKM). We aimed to gain insights into the early mechanisms underlying the induction of pluripotency.

Methods

GSE28688 containing 14 gene expression profiles were downloaded from GEO, including untreated human neonatal foreskin fibroblasts (HFF1) as control, OSKM-induced HFF1 (at 24, 48, 72 h post-transduction of OSKM encoding viruses), two iPS cell lines, and two embryonic stem (ES) cell lines. Differentially expressed genes (DEGs) were screened between different cell lines and the control by Limma package in Bioconductor. KEGG pathway enrichment analysis was performed by DAVID. The STRING database was used to construct protein-protein interaction (PPI) network. Activities and regulatory networks of transcription factors (TFs) were calculated and constructed by Fast Network Component Analysis (FastNCA).

Results

Compared with untreated HFF1, 117, 347, 557, 2263 and 2307 DEGs were obtained from three point post-transduction HFF1, iPS and ES cells. Meanwhile, up-regulated DEGs in first two days of HFF1 were mainly enriched in RIG-I-like receptor (RLR) and Toll-like receptor (TLR) signaling pathways. Down-regulated DEGs at 72 h were significantly enriched in focal adhesion pathway which was similar to iPS cells. Moreover, ISG15, IRF7, STAT1 and DDX58 were with higher degree in PPI networks during time series. Furthermore, the targets of six selected TFs were mainly enriched in screened DEGs.

Conclusion

In this study, screened DEGs including ISG15, IRF7 and CCL5 participated in OSKM-induced pluripotency might attenuate immune response post-transduction through RLR and TLR signaling pathways.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2503890341543007 .",2015-04-24 +27665594,STarMir Tools for Prediction of microRNA Binding Sites.,"MicroRNAs (miRNAs) are a class of endogenous short noncoding RNAs that regulate gene expression by targeting messenger RNAs (mRNAs), which results in translational repression and/or mRNA degradation. As regulatory molecules, miRNAs are involved in many mammalian biological processes and also in the manifestation of certain human diseases. As miRNAs play central role in the regulation of gene expression, understanding miRNA-binding patterns is essential to gain an insight of miRNA mediated gene regulation and also holds promise for therapeutic applications. Computational prediction of miRNA binding sites on target mRNAs facilitates experimental investigation of miRNA functions. This chapter provides protocols for using the STarMir web server for improved predictions of miRNA binding sites on a target mRNA. As an application module of the Sfold RNA package, the current version of STarMir is an implementation of logistic prediction models developed with high-throughput miRNA binding data from cross-linking immunoprecipitation (CLIP) studies. The models incorporated comprehensive thermodynamic, structural, and sequence features, and were found to make improved predictions of both seed and seedless sites, in comparison to the established algorithms (Liu et al., Nucleic Acids Res 41:e138, 2013). Their broad applicability was indicated by their good performance in cross-species validation. STarMir is freely available at http://sfold.wadsworth.org/starmir.html .",2016-01-01 +24463182,ECplot: an online tool for making standardized plots from large datasets for bioinformatics publications.,"

Motivation and results

We have implemented ECplot, an online tool for plotting charts from large datasets. This tool supports a variety of chart types commonly used in bioinformatics publications. In our benchmarking, it was able to create a Box-and-Whisker plot with about 67 000 data points and 8 MB total file size within several seconds. The design of the tool makes common formatting operations easy to perform. It also allows more complex operations to be achieved by advanced XML (Extensible Markup Language) and programming options. Data and formatting styles are stored in separate files, such that style templates can be made and applied to new datasets. The text-based file formats based on XML facilitate efficient manipulation of formatting styles for a large number of data series. These file formats also provide a means to reproduce published figures from raw data, which complement parallel efforts in making the data and software involved in published analysis results accessible. We demonstrate this idea by using ECplot to replicate some complex figures from a previous publication.

Availability and implementation

ECplot and its source code (under MIT license) are available at https://yiplab.cse.cuhk.edu.hk/ecplot/.

Contact

kevinyip@cse.cuhk.edu.hk.",2014-01-24 +25804569,Impact of heart rate dynamics on mortality in the early phase after ischemic stroke: a prospective observational trial.,"

Background

Growing evidence suggests that the heart rate (HR) at rest is an independent predictor of cardiovascular mortality. In ischemic stroke, continuous monitoring of HR is the standard of care, but systematic data on its dynamics and prognostic value during the acute phase are limited.

Methods

In this prospective observational study, HR was measured by continuous electrocardiographic monitoring on admission and during the first 72 hours of care among patients who were awake with ischemic stroke and survived until discharge. Functional outcome was assessed after 90 days.

Results

Data from 702 consecutive patients were analyzed (median age, 73 years, 54% men). The time course of HR was initially characterized by a rapid decline during the first 12 hours after admission. Among patients who survived until day 90, this was followed by a continuous downward trend in HR, whereas death after discharge was associated with a secondary increase and a reversal point 12 hours after admission. After adjustment for established risk factors, this secondary increase during the acute period was an independent predictor of death (hazard ratio, 3.73; 95% confidence interval, 1.47-9.43; P = .005).

Conclusions

A secondary rise of HR during care for acute ischemic stroke is an early sign of fatality and may represent a surrogate for an unfavorable sympathetic disinhibition. Further research is warranted to clarify the role of targeted HR reduction after ischemic stroke (http://clinicaltrials.gov/, unique identifier NCT01858779).",2015-03-21 +26722117,SPMM: estimating infection duration of multivariant HIV-1 infections.,"

Motivation

Illustrating how HIV-1 is transmitted and how it evolves in the following weeks is an important step for developing effective vaccination and prevention strategies. It is currently possible through DNA sequencing to account for the diverse array of viral strains within an infected individual. This provides an unprecedented opportunity to pinpoint when each patient was infected and which viruses were transmitted.

Results

Here we develop a mathematical tool for early HIV-1 evolution within a subject whose infection originates either from a single or multiple viral variants. The shifted Poisson mixture model (SPMM) provides a quantitative guideline for segregating viral lineages, which in turn enables us to assess when a subject was infected. The infection duration estimated by SPMM showed a statistically significant linear relationship with that by Fiebig laboratory staging (P = 0.00059) among 37 acutely infected subjects. Our tool provides a functional approach to understanding early genetic diversity, one of the most important parameters for deciphering HIV-1 transmission and predicting the rate of disease progression.

Availability and implementation

SPMM, webserver, is available at http://www.hayounlee.org/web-tools.html.

Contact

hayoun@usc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-31 +25911152,"PEIMAN 1.0: Post-translational modification Enrichment, Integration and Matching ANalysis.","Conventional proteomics has discovered a wide gap between protein sequences and biological functions. The third generation of proteomics was provoked to bridge this gap. Targeted and untargeted post-translational modification (PTM) studies are the most important parts of today's proteomics. Considering the expensive and time-consuming nature of experimental methods, computational methods are developed to study, analyze, predict, count and compute the PTM annotations on proteins. The enrichment analysis softwares are among the common computational biology and bioinformatic software packages. The focus of such softwares is to find the probability of occurrence of the desired biological features in any arbitrary list of genes/proteins. We introduce Post-translational modification Enrichment Integration and Matching Analysis (PEIMAN) software to explore more probable and enriched PTMs on proteins. Here, we also represent the statistics of detected PTM terms used in enrichment analysis in PEIMAN software based on the latest released version of UniProtKB/Swiss-Prot. These results, in addition to giving insight to any given list of proteins, could be useful to design targeted PTM studies for identification and characterization of special chemical groups. Database URL: http://bs.ipm.ir/softwares/PEIMAN/",2015-04-23 +25075118,FARVAT: a family-based rare variant association test.,"

Motivation

Individuals in each family are genetically more homogeneous than unrelated individuals, and family-based designs are often recommended for the analysis of rare variants. However, despite the importance of family-based samples analysis, few statistical methods for rare variant association analysis are available.

Results

In this report, we propose a FAmily-based Rare Variant Association Test (FARVAT). FARVAT is based on the quasi-likelihood of whole families, and is statistically and computationally efficient for the extended families. FARVAT assumed that families were ascertained with the disease status of family members, and incorporation of the estimated genetic relationship matrix to the proposed method provided robustness under the presence of the population substructure. Depending on the choice of working matrix, our method could be a burden test or a variance component test, and could be extended to the SKAT-O-type statistic. FARVAT was implemented in C++, and application of the proposed method to schizophrenia data and simulated data for GAW17 illustrated its practical importance.

Availability

The software calculates various statistics for the analysis of related samples, and it is freely downloadable from http://healthstats.snu.ac.kr/software/farvat.

Contact

won1@snu.ac.kr or tspark@stats.snu.ac.kr

Supplementary information

supplementary data are available at Bioinformatics online.",2014-07-29 +24417759,"EUCAST technical note on Candida and micafungin, anidulafungin and fluconazole.","The European Committee on Antimicrobial Susceptibility Testing Subcommittee on Antifungal Susceptibility Testing has determined breakpoints for micafungin and revised breakpoints for anidulafungin and fluconazole for Candida spp. This Technical Note is based on the corresponding rationale documents (http://www.eucast.org). The micafungin breakpoints are based on PK data, animal PK/PD data, microbiological data and clinical experience. The anidulafungin breakpoints for C. parapsilosis and fluconazole breakpoints for C. glabrata have been modified to species-specific values that categorise the wild-type as intermediate to accommodate use of these compounds in some clinical situations.",2014-01-13 +25648087,Analysis of pattern overlaps and exact computation of P-values of pattern occurrences numbers: case of Hidden Markov Models.,"

Background

Finding new functional fragments in biological sequences is a challenging problem. Methods addressing this problem commonly search for clusters of pattern occurrences that are statistically significant. A measure of statistical significance is the P-value of a number of pattern occurrences, i.e. the probability to find at least S occurrences of words from a pattern in a random text of length N generated according to a given probability model. All words of the pattern are supposed to be of same length.

Results

We present a novel algorithm SufPref that computes an exact P-value for Hidden Markov models (HMM). The algorithm is based on recursive equations on text sets related to pattern occurrences; the equations can be used for any probability model. The algorithm inductively traverses a specific data structure, an overlap graph. The nodes of the graph are associated with the overlaps of words from . The edges are associated to the prefix and suffix relations between overlaps. An originality of our data structure is that pattern need not be explicitly represented in nodes or leaves. The algorithm relies on the Cartesian product of the overlap graph and the graph of HMM states; this approach is analogous to the automaton approach from JBCB 4: 553-569. The gain in size of SufPref data structure leads to significant improvements in space and time complexity compared to existent algorithms. The algorithm SufPref was implemented as a C++ program; the program can be used both as Web-server and a stand alone program for Linux and Windows. The program interface admits special formats to describe probability models of various types (HMM, Bernoulli, Markov); a pattern can be described with a list of words, a PSSM, a degenerate pattern or a word and a number of mismatches. It is available at http://server2.lpm.org.ru/bio/online/sf/. The program was applied to compare sensitivity and specificity of methods for TFBS prediction based on P-values computed for Bernoulli models, Markov models of orders one and two and HMMs. The experiments show that the methods have approximately the same qualities.",2014-12-16 +26947033,An investigation of jogging biomechanics using the full-body lumbar spine model: Model development and validation.,"The ability of a biomechanical simulation to produce results that can translate to real-life situations is largely dependent on the physiological accuracy of the musculoskeletal model. There are a limited number of freely-available, full-body models that exist in OpenSim, and those that do exist are very limited in terms of trunk musculature and degrees of freedom in the spine. Properly modeling the motion and musculature of the trunk is necessary to most accurately estimate lower extremity and spinal loading. The objective of this study was to develop and validate a more physiologically accurate OpenSim full-body model. By building upon three previously developed OpenSim models, the full-body lumbar spine (FBLS) model, comprised of 21 segments, 30 degrees-of-freedom, and 324 musculotendon actuators, was developed. The five lumbar vertebrae were modeled as individual bodies, and coupled constraints were implemented to describe the net motion of the spine. The eight major muscle groups of the lumbar spine were modeled (rectus abdominis, external and internal obliques, erector spinae, multifidus, quadratus lumborum, psoas major, and latissimus dorsi), and many of these muscle groups were modeled as multiple fascicles allowing the large muscles to act in multiple directions. The resulting FBLS model׳s trunk muscle geometry, maximal isometric joint moments, and simulated muscle activations compare well to experimental data. The FBLS model will be made freely available (https://simtk.org/home/fullbodylumbar) for others to perform additional analyses and develop simulations investigating full-body dynamics and contributions of the trunk muscles to dynamic tasks.",2016-02-27 +23193273,"EPD and EPDnew, high-quality promoter resources in the next-generation sequencing era.","The Eukaryotic Promoter Database (EPD), available online at http://epd.vital-it.ch, is a collection of experimentally defined eukaryotic POL II promoters which has been maintained for more than 25 years. A promoter is represented by a single position in the genome, typically the major transcription start site (TSS). EPD primarily serves biologists interested in analysing the motif content, chromatin structure or DNA methylation status of co-regulated promoter subsets. Initially, promoter evidence came from TSS mapping experiments targeted at single genes and published in journal articles. Today, the TSS positions provided by EPD are inferred from next-generation sequencing data distributed in electronic form. Traditionally, EPD has been a high-quality database with low coverage. The focus of recent efforts has been to reach complete gene coverage for important model organisms. To this end, we introduced a new section called EPDnew, which is automatically assembled from multiple, carefully selected input datasets. As another novelty, we started to use chromatin signatures in addition to mRNA 5'tags to locate promoters of weekly expressed genes. Regarding user interfaces, we introduced a new promoter viewer which enables users to explore promoter-defining experimental evidence in a UCSC genome browser window.",2012-11-27 +26794319,ConceptMetab: exploring relationships among metabolite sets to identify links among biomedical concepts.,"

Motivation

Capabilities in the field of metabolomics have grown tremendously in recent years. Many existing resources contain the chemical properties and classifications of commonly identified metabolites. However, the annotation of small molecules (both endogenous and synthetic) to meaningful biological pathways and concepts still lags behind the analytical capabilities and the chemistry-based annotations. Furthermore, no tools are available to visually explore relationships and networks among functionally related groups of metabolites (biomedical concepts). Such a tool would provide the ability to establish testable hypotheses regarding links among metabolic pathways, cellular processes, phenotypes and diseases.

Results

Here we present ConceptMetab, an interactive web-based tool for mapping and exploring the relationships among 16 069 biologically defined metabolite sets developed from Gene Ontology, KEGG and Medical Subject Headings, using both KEGG and PubChem compound identifiers, and based on statistical tests for association. We demonstrate the utility of ConceptMetab with multiple scenarios, showing it can be used to identify known and potentially novel relationships among metabolic pathways, cellular processes, phenotypes and diseases, and provides an intuitive interface for linking compounds to their molecular functions and higher level biological effects.

Availability and implementation

http://conceptmetab.med.umich.edu

Contacts

akarnovsky@umich.edu or sartorma@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-01-21 +26953177,Hyperspectral chemical plume detection algorithms based on multidimensional iterative filtering decomposition.,"Chemicals released in the air can be extremely dangerous for human beings and the environment. Hyperspectral images can be used to identify chemical plumes, however the task can be extremely challenging. Assuming we know a priori that some chemical plume, with a known frequency spectrum, has been photographed using a hyperspectral sensor, we can use standard techniques such as the so-called matched filter or adaptive cosine estimator, plus a properly chosen threshold value, to identify the position of the chemical plume. However, due to noise and inadequate sensing, the accurate identification of chemical pixels is not easy even in this apparently simple situation. In this paper, we present a post-processing tool that, in a completely adaptive and data-driven fashion, allows us to improve the performance of any classification methods in identifying the boundaries of a plume. This is done using the multidimensional iterative filtering (MIF) algorithm (Cicone et al. 2014 (http://arxiv.org/abs/1411.6051); Cicone & Zhou 2015 (http://arxiv.org/abs/1507.07173)), which is a non-stationary signal decomposition method like the pioneering empirical mode decomposition method (Huang et al. 1998 Proc. R. Soc. Lond. A 454, 903. (doi:10.1098/rspa.1998.0193)). Moreover, based on the MIF technique, we propose also a pre-processing method that allows us to decorrelate and mean-centre a hyperspectral dataset. The cosine similarity measure, which often fails in practice, appears to become a successful and outperforming classifier when equipped with such a pre-processing method. We show some examples of the proposed methods when applied to real-life problems.",2016-04-01 +27307621,An algorithm for computing the gene tree probability under the multispecies coalescent and its application in the inference of population tree.,"

Motivation

Gene tree represents the evolutionary history of gene lineages that originate from multiple related populations. Under the multispecies coalescent model, lineages may coalesce outside the species (population) boundary. Given a species tree (with branch lengths), the gene tree probability is the probability of observing a specific gene tree topology under the multispecies coalescent model. There are two existing algorithms for computing the exact gene tree probability. The first algorithm is due to Degnan and Salter, where they enumerate all the so-called coalescent histories for the given species tree and the gene tree topology. Their algorithm runs in exponential time in the number of gene lineages in general. The second algorithm is the STELLS algorithm (2012), which is usually faster but also runs in exponential time in almost all the cases.

Results

In this article, we present a new algorithm, called CompactCH, for computing the exact gene tree probability. This new algorithm is based on the notion of compact coalescent histories: multiple coalescent histories are represented by a single compact coalescent history. The key advantage of our new algorithm is that it runs in polynomial time in the number of gene lineages if the number of populations is fixed to be a constant. The new algorithm is more efficient than the STELLS algorithm both in theory and in practice when the number of populations is small and there are multiple gene lineages from each population. As an application, we show that CompactCH can be applied in the inference of population tree (i.e. the population divergence history) from population haplotypes. Simulation results show that the CompactCH algorithm enables efficient and accurate inference of population trees with much more haplotypes than a previous approach.

Availability

The CompactCH algorithm is implemented in the STELLS software package, which is available for download at http://www.engr.uconn.edu/ywu/STELLS.html

Contact

ywu@engr.uconn.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +24444368,SeqBench: integrated solution for the management and analysis of exome sequencing data.,"

Background

The rapid development of next generation sequencing technologies, including the recently introduced benchtop sequencers, made sequencing affordable for smaller research institutions. A widely applied method to identify causing mutations of diseases is exome sequencing, which proved to be cost-effective and time-saving.

Findings

SeqBench, a web-based application, combines management and analysis of exome sequencing data into one solution. It provides a user friendly data acquisition module to facilitate comprehensive and intuitive data handling. SeqBench provides direct access to the analysis pipeline SIMPLEX, which can be configured to run locally, on a cluster, or in the cloud. Identified genomic variants are presented along with several functional annotations and can be interpreted in a family context.

Conclusions

The web-based application SeqBench supports the management and analysis of exome sequencing data, is open-source and available at http://www.icbi.at/SeqBench.",2014-01-20 +22493537,BacterialLectinDb: An integrated bacterial lectin database.,"

Unlabelled

Studies of various diversified bacterial lectins/ lectin data may serve as a tool with enormous promise to help biotechnologists/ geneticists in their innovative technology to explore a deeper understanding in proteomics/ genomics research for finding the molecular basis of infectious diseases and also to new approaches for their prevention and in development of new bacterial vaccines. Hence we developed a bacterial lectin database named 'BacterialLectinDb'. An organized database schema for BacterialLectinDb was designed to collate all the available information about all bacterial lectins as a central repository. The database was designed using HTML, XML.

Availability

The database is available for free at http://www.research-bioinformatics.in.",2012-03-31 +23475683,Update on allele nomenclature for human cytochromes P450 and the Human Cytochrome P450 Allele (CYP-allele) Nomenclature Database.,"Interindividual variability in xenobiotic metabolism and drug response is extensive and genetic factors play an important role in this variation. A majority of clinically used drugs are substrates for the cytochrome P450 (CYP) enzyme system and interindividual variability in expression and function of these enzymes is a major factor for explaining individual susceptibility for adverse drug reactions and drug response. Because of the existence of many polymorphic CYP genes, for many of which the number of allelic variants is continually increasing, a universal and official nomenclature system is important. Since 1999, all functionally relevant polymorphic CYP alleles are named and published on the Human Cytochrome P450 Allele (CYP-allele) Nomenclature Web site (http://www.cypalleles.ki.se). Currently, the database covers nomenclature of more than 660 alleles in a total of 30 genes that includes 29 CYPs as well as the cytochrome P450 oxidoreductase (POR) gene. On the CYP-allele Web site, each gene has its own Webpage, which lists the alleles with their nucleotide changes, their functional consequences, and links to publications identifying or characterizing the alleles. CYP2D6, CYP2C9, CYP2C19, and CYP3A4 are the most important CYPs in terms of drug metabolism, which is also reflected in their corresponding highest number of Webpage hits at the CYP-allele Web site.The main advantage of the CYP-allele database is that it offers a rapid online publication of CYP-alleles and their effects and provides an overview of peer-reviewed data to the scientific community. Here, we provide an update of the CYP-allele database and the associated nomenclature.",2013-01-01 +26657631,"HaploReg v4: systematic mining of putative causal variants, cell types, regulators and target genes for human complex traits and disease.","More than 90% of common variants associated with complex traits do not affect proteins directly, but instead the circuits that control gene expression. This has increased the urgency of understanding the regulatory genome as a key component for translating genetic results into mechanistic insights and ultimately therapeutics. To address this challenge, we developed HaploReg (http://compbio.mit.edu/HaploReg) to aid the functional dissection of genome-wide association study (GWAS) results, the prediction of putative causal variants in haplotype blocks, the prediction of likely cell types of action, and the prediction of candidate target genes by systematic mining of comparative, epigenomic and regulatory annotations. Since first launching the website in 2011, we have greatly expanded HaploReg, increasing the number of chromatin state maps to 127 reference epigenomes from ENCODE 2012 and Roadmap Epigenomics, incorporating regulator binding data, expanding regulatory motif disruption annotations, and integrating expression quantitative trait locus (eQTL) variants and their tissue-specific target genes from GTEx, Geuvadis, and other recent studies. We present these updates as HaploReg v4, and illustrate a use case of HaploReg for attention deficit hyperactivity disorder (ADHD)-associated SNPs with putative brain regulatory mechanisms.",2015-12-10 +27050040,Automated Microscopy: Macro Language Controlling a Confocal Microscope and its External Illumination: Adaptation for Photosynthetic Organisms.,"Photosynthesis research employs several biophysical methods, including the detection of fluorescence. Even though fluorescence is a key method to detect photosynthetic efficiency, it has not been applied/adapted to single-cell confocal microscopy measurements to examine photosynthetic microorganisms. Experiments with photosynthetic cells may require automation to perform a large number of measurements with different parameters, especially concerning light conditions. However, commercial microscopes support custom protocols (through Time Controller offered by Olympus or Experiment Designer offered by Zeiss) that are often unable to provide special set-ups and connection to external devices (e.g., for irradiation). Our new system combining an Arduino microcontroller with the Cell⊕Finder software was developed for controlling Olympus FV1000 and FV1200 confocal microscopes and the attached hardware modules. Our software/hardware solution offers (1) a text file-based macro language to control the imaging functions of the microscope; (2) programmable control of several external hardware devices (light sources, thermal controllers, actuators) during imaging via the Arduino microcontroller; (3) the Cell⊕Finder software with ergonomic user environment, a fast selection method for the biologically important cells and precise positioning feature that reduces unwanted bleaching of the cells by the scanning laser. Cell⊕Finder can be downloaded from http://www.alga.cz/cellfinder. The system was applied to study changes in fluorescence intensity in Synechocystis sp. PCC6803 cells under long-term illumination. Thus, we were able to describe the kinetics of phycobilisome decoupling. Microscopy data showed that phycobilisome decoupling appears slowly after long-term (>1 h) exposure to high light.",2016-04-01 +25681405,Walking on multiple disease-gene networks to prioritize candidate genes.,"Uncovering causal genes for human inherited diseases, as the primary step toward understanding the pathogenesis of these diseases, requires a combined analysis of genetic and genomic data. Although bioinformatics methods have been designed to prioritize candidate genes resulting from genetic linkage analysis or association studies, the coverage of both diseases and genes in existing methods is quite limited, thereby preventing the scan of causal genes for a significant proportion of diseases at the whole-genome level. To overcome this limitation, we propose a method named pgWalk to prioritize candidate genes by integrating multiple phenomic and genomic data. We derive three types of phenotype similarities among 7719 diseases and nine types of functional similarities among 20327 genes. Based on a pair of phenotype and gene similarities, we construct a disease-gene network and then simulate the process that a random walker wanders on such a heterogeneous network to quantify the strength of association between a candidate gene and a query disease. A weighted version of the Fisher's method with dependent correction is adopted to integrate 27 scores obtained in this way, and a final q-value is calibrated for prioritizing candidate genes. A series of validation experiments are conducted to demonstrate the superior performance of this approach. We further show the effectiveness of this method in exome sequencing studies of autism and epileptic encephalopathies. An online service and the standalone software of pgWalk can be found at http://bioinfo.au.tsinghua.edu.cn/jianglab/pgwalk.",2015-02-13 +26023104,xHeinz: an algorithm for mining cross-species network modules under a flexible conservation model.,"

Motivation

Integrative network analysis methods provide robust interpretations of differential high-throughput molecular profile measurements. They are often used in a biomedical context-to generate novel hypotheses about the underlying cellular processes or to derive biomarkers for classification and subtyping. The underlying molecular profiles are frequently measured and validated on animal or cellular models. Therefore the results are not immediately transferable to human. In particular, this is also the case in a study of the recently discovered interleukin-17 producing helper T cells (Th17), which are fundamental for anti-microbial immunity but also known to contribute to autoimmune diseases.

Results

We propose a mathematical model for finding active subnetwork modules that are conserved between two species. These are sets of genes, one for each species, which (i) induce a connected subnetwork in a species-specific interaction network, (ii) show overall differential behavior and (iii) contain a large number of orthologous genes. We propose a flexible notion of conservation, which turns out to be crucial for the quality of the resulting modules in terms of biological interpretability. We propose an algorithm that finds provably optimal or near-optimal conserved active modules in our model. We apply our algorithm to understand the mechanisms underlying Th17 T cell differentiation in both mouse and human. As a main biological result, we find that the key regulation of Th17 differentiation is conserved between human and mouse.

Availability and implementation

xHeinz, an implementation of our algorithm, as well as all input data and results, are available at http://software.cwi.nl/xheinz and as a Galaxy service at http://services.cbib.u-bordeaux2.fr/galaxy in CBiB Tools.

Contact

gunnar.klau@cwi.nl

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-27 +27307648,Analysis of aggregated cell-cell statistical distances within pathways unveils therapeutic-resistance mechanisms in circulating tumor cells.,"

Motivation

As 'omics' biotechnologies accelerate the capability to contrast a myriad of molecular measurements from a single cell, they also exacerbate current analytical limitations for detecting meaningful single-cell dysregulations. Moreover, mRNA expression alone lacks functional interpretation, limiting opportunities for translation of single-cell transcriptomic insights to precision medicine. Lastly, most single-cell RNA-sequencing analytic approaches are not designed to investigate small populations of cells such as circulating tumor cells shed from solid tumors and isolated from patient blood samples.

Results

In response to these characteristics and limitations in current single-cell RNA-sequencing methodology, we introduce an analytic framework that models transcriptome dynamics through the analysis of aggregated cell-cell statistical distances within biomolecular pathways. Cell-cell statistical distances are calculated from pathway mRNA fold changes between two cells. Within an elaborate case study of circulating tumor cells derived from prostate cancer patients, we develop analytic methods of aggregated distances to identify five differentially expressed pathways associated to therapeutic resistance. Our aggregation analyses perform comparably with Gene Set Enrichment Analysis and better than differentially expressed genes followed by gene set enrichment. However, these methods were not designed to inform on differential pathway expression for a single cell. As such, our framework culminates with the novel aggregation method, cell-centric statistics (CCS). CCS quantifies the effect size and significance of differentially expressed pathways for a single cell of interest. Improved rose plots of differentially expressed pathways in each cell highlight the utility of CCS for therapeutic decision-making.

Availability and implementation

http://www.lussierlab.org/publications/CCS/ CONTACT: yves@email.arizona.edu or piegorsch@math.arizona.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-06-01 +26457579,Estimating Metabolic Fluxes Using a Maximum Network Flexibility Paradigm.,"

Motivation

Genome-scale metabolic networks can be modeled in a constraint-based fashion. Reaction stoichiometry combined with flux capacity constraints determine the space of allowable reaction rates. This space is often large and a central challenge in metabolic modeling is finding the biologically most relevant flux distributions. A widely used method is flux balance analysis (FBA), which optimizes a biologically relevant objective such as growth or ATP production. Although FBA has proven to be highly useful for predicting growth and byproduct secretion, it cannot predict the intracellular fluxes under all environmental conditions. Therefore, alternative strategies have been developed to select flux distributions that are in agreement with experimental ""omics"" data, or by incorporating experimental flux measurements. The latter, unfortunately can only be applied to a limited set of reactions and is currently not feasible at the genome-scale. On the other hand, it has been observed that micro-organisms favor a suboptimal growth rate, possibly in exchange for a more ""flexible"" metabolic network. Instead of dedicating the internal network state to an optimal growth rate in one condition, a suboptimal growth rate is used, that allows for an easier switch to other nutrient sources. A small decrease in growth rate is exchanged for a relatively large gain in metabolic capability to adapt to changing environmental conditions.

Results

Here, we propose Maximum Metabolic Flexibility (MMF) a computational method that utilizes this observation to find the most probable intracellular flux distributions. By mapping measured flux data from central metabolism to the genome-scale models of Escherichia coli and Saccharomyces cerevisiae we show that i) indeed, most of the measured fluxes agree with a high adaptability of the network, ii) this result can be used to further reduce the space of feasible solutions iii) this reduced space improves the quantitative predictions made by FBA and contains a significantly larger fraction of the measured fluxes compared to the flux space that was reduced by a uniform sampling approach and iv) MMF can be used to select reactions in the network that contribute most to the steady-state flux space. Constraining the selected reactions improves the quantitative predictions of FBA considerably more than adding an equal amount of flux constraints, selected using a more naïve approach. Our method can be applied to any cell type without requiring prior information.

Availability

MMF is freely available as a MATLAB plugin at: http://cs.ru.nl/~wmegchel/mmf.",2015-10-12 +25762601,[pIPredict: a computer tool for predicting isoelectric points of peptides and proteins].,"The data on approximate values of isoelectric point (pI) of peptides obtained during their fractionation by isoelectric focusing can be successfully used for the calculation of the pKa's scale for amino acid residues. This scale can be used for pI prediction. The data of peptide fractionation also provides information about various posttranslational modifications (PTM), so that the prediction of pI may be performed for a wide range of protein forms. In this study, pKa values were calculated using a set of 13448 peptides (including 300 peptides with PTMs significant for pI calculation). The pKa constants were calculated for N-terminal, internal and C-terminal amino acid residues separately. The comparative analysis has shown that our scale increases the accuracy of pI prediction for peptides and proteins and successfully competes with traditional scales and such methods as support vector machines and artificial neural networks. The prediction performed by this scale, can be made in our program pIPredict with GUI written in JAVA as executable jar-archive. The program is freely available for academic users at http://www.ibmc.msk.ru/LPCIT/pIPredict. The software has also the possibility of pI predicting by some other scales; it recognizes some PTM and has the ability to use a custom scale.",2015-01-01 +24849578,HTML5 PivotViewer: high-throughput visualization and querying of image data on the web.,"

Motivation

Visualization and analysis of large numbers of biological images has generated a bottle neck in research. We present HTML5 PivotViewer, a novel, open source, platform-independent viewer making use of the latest web technologies that allows seamless access to images and associated metadata for each image. This provides a powerful method to allow end users to mine their data.

Availability and implementation

Documentation, examples and links to the software are available from http://www.cbrg.ox.ac.uk/data/pivotviewer/. The software is licensed under GPLv2.",2014-05-21 +26059717,BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs.,"

Motivation

Genomics has revolutionized biological research, but quality assessment of the resulting assembled sequences is complicated and remains mostly limited to technical measures like N50.

Results

We propose a measure for quantitative assessment of genome assembly and annotation completeness based on evolutionarily informed expectations of gene content. We implemented the assessment procedure in open-source software, with sets of Benchmarking Universal Single-Copy Orthologs, named BUSCO.

Availability and implementation

Software implemented in Python and datasets available for download from http://busco.ezlab.org.

Contact

evgeny.zdobnov@unige.ch

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-09 +27490034,Improving AutoDock Vina Using Random Forest: The Growing Accuracy of Binding Affinity Prediction by the Effective Exploitation of Larger Data Sets.,"There is a growing body of evidence showing that machine learning regression results in more accurate structure-based prediction of protein-ligand binding affinity. Docking methods that aim at optimizing the affinity of ligands for a target rely on how accurate their predicted ranking is. However, despite their proven advantages, machine-learning scoring functions are still not widely applied. This seems to be due to insufficient understanding of their properties and the lack of user-friendly software implementing them. Here we present a study where the accuracy of AutoDock Vina, arguably the most commonly-used docking software, is strongly improved by following a machine learning approach. We also analyse the factors that are responsible for this improvement and their generality. Most importantly, with the help of a proposed benchmark, we demonstrate that this improvement will be larger as more data becomes available for training Random Forest models, as regression models implying additive functional forms do not improve with more training data. We discuss how the latter opens the door to new opportunities in scoring function development. In order to facilitate the translation of this advance to enhance structure-based molecular design, we provide software to directly re-score Vina-generated poses and thus strongly improve their predicted binding affinity. The software is available at http://istar.cse.cuhk.edu.hk/rf-score-3.tgz and http://crcm. marseille.inserm.fr/fileadmin/rf-score-3.tgz.",2015-02-12 +27296201,Early Recovery of Left Ventricular Systolic Function After CoreValve Transcatheter Aortic Valve Replacement. ,"Approximately one third of patients with symptomatic aortic stenosis have reduced left ventricular ejection fraction (LVEF) before transcatheter aortic valve replacement. The incidence, predictors, and significance of early LVEF recovery after CoreValve transcatheter aortic valve replacement have not been described. We studied 156 patients from the CoreValve Extreme and High-Risk trials with LVEF ≤40% at baseline who had 30-day LVEF data. All patients underwent core laboratory echocardiographic assessment of LVEF at baseline, post procedure, discharge, 30 days, 6 months, and 1 year. Early LVEF recovery was defined as an absolute increase of ≥10% in EF at 30 days. One-year outcomes were compared between patients with and without early recovery. Multivariable analysis was performed to determine independent predictors of early recovery. Early LVEF recovery occurred in 62% of patients, generally before discharge. By 30 days LVEF increased >17% compared with baseline in the early recovery group with minimal increase in the no-early recovery group (48.9±8.8% versus 31.5±6.9%; P<0.001). One-year all-cause mortality was numerically (but not statistically) higher in the no-early recovery group (24% versus 12%; P=0.07). Absence of previous myocardial infarction (odds ratio, 0.44; 95% confidence interval, 0.19-1.03) and baseline mean gradient ≥40 mm Hg (odds ratio, 4.59; 95% confidence interval, 1.76-11.96) were identified as predictors of early LVEF recovery. Nearly two thirds of patients with reduced LVEF will have a marked early improvement after transcatheter aortic valve replacement. Early LVEF recovery is associated with improved clinical outcomes and is most likely among patients with higher baseline aortic valve gradients and no previous myocardial infarction. URL: http://www.clinicaltrials.gov. Unique identifier: NCT01240902.",2016-06-01 +27266853,Combining Diuretic Response and Hemoconcentration to Predict Rehospitalization After Admission for Acute Heart Failure. ,"Both diuretic response and hemoconcentration are indicators of decongestion and have individually been found to predict rehospitalization after admission for acute heart failure (HF). This study examines the value of combining diuretic response and hemoconcentration to better predict patients at low risk for rehospitalization after admission for acute HF. Diuretic response (defined as weight change per 40 mg of furosemide on day 4 after admission) and hemoconcentration (change in hemoglobin at discharge or day 7) were tested both individually and combined to predict the risk of HF and cardiovascular rehospitalization 60 days after hospitalization for acute HF. Analyses were performed in 1180 patients enrolled in the Placebo-Controlled Randomized Study of the Selective Adenosine Receptor Antagonist Rolofylline for Patients Hospitalized With Acute Decompensated Heart Failure and Volume Overload to Assess Treatment Effect on Congestion and Renal Function (PROTECT) trial and validated in 1776 patients enrolled in the Efficacy of Vasopressin Antagonism in Heart Failure Outcome Study With Tolvaptan (EVEREST) trial. Poor diuretic response was associated with low systolic blood pressure, high blood urea nitrogen, and history of coronary revascularization in both data sets (all P<0.05). Hemoconcentration was mainly associated with better renal function (P<0.05). Patients who displayed both favorable diuretic response and hemoconcentration had a markedly lower risk of rehospitalization for HF in PROTECT (multivariable HR, 0.41; 95% CI, 0.24 to 0.70; P<0.001) compared with all other patients. This finding was confirmed in EVEREST (multivariable HR, 0.52; 95% CI, 0.33 to 0.82; P=0.004) for patients with favorable diuretic response and hemoconcentration compared with all other patients. Combining 2 indicators of decongestion, hemoconcentration and diuretic response improves risk prediction for early rehospitalization after an admission for acute HF and may provide clinicians with an easily accessible tool to identify low-risk patients. URL: http://www.clinicaltrials.gov. Unique identifiers: NCT00354458 and NCT00071331.",2016-06-01 +26108437,Multiobjective triclustering of time-series transcriptome data reveals key genes of biological processes.,"

Background

Exploratory analysis of multi-dimensional high-throughput datasets, such as microarray gene expression time series, may be instrumental in understanding the genetic programs underlying numerous biological processes. In such datasets, variations in the gene expression profiles are usually observed across replicates and time points. Thus mining the temporal expression patterns in such multi-dimensional datasets may not only provide insights into the key biological processes governing organs to grow and develop but also facilitate the understanding of the underlying complex gene regulatory circuits.

Results

In this work we have developed an evolutionary multi-objective optimization for our previously introduced triclustering algorithm δ-TRIMAX. Its aim is to make optimal use of δ-TRIMAX in extracting groups of co-expressed genes from time series gene expression data, or from any 3D gene expression dataset, by adding the powerful capabilities of an evolutionary algorithm to retrieve overlapping triclusters. We have compared the performance of our newly developed algorithm, EMOA- δ-TRIMAX, with that of other existing triclustering approaches using four artificial dataset and three real-life datasets. Moreover, we have analyzed the results of our algorithm on one of these real-life datasets monitoring the differentiation of human induced pluripotent stem cells (hiPSC) into mature cardiomyocytes. For each group of co-expressed genes belonging to one tricluster, we identified key genes by computing their membership values within the tricluster. It turned out that to a very high percentage, these key genes were significantly enriched in Gene Ontology categories or KEGG pathways that fitted very well to the biological context of cardiomyocytes differentiation.

Conclusions

EMOA- δ-TRIMAX has proven instrumental in identifying groups of genes in transcriptomic data sets that represent the functional categories constituting the biological process under study. The executable file can be found at http://www.bioinf.med.uni-goettingen.de/fileadmin/download/EMOA-delta-TRIMAX.tar.gz .",2015-06-26 +24280345,"A genome scale metabolic network for rice and accompanying analysis of tryptophan, auxin and serotonin biosynthesis regulation under biotic stress.","

Background

Functional annotations of large plant genome projects mostly provide information on gene function and gene families based on the presence of protein domains and gene homology, but not necessarily in association with gene expression or metabolic and regulatory networks. These additional annotations are necessary to understand the physiology, development and adaptation of a plant and its interaction with the environment.

Results

RiceCyc is a metabolic pathway networks database for rice. It is a snapshot of the substrates, metabolites, enzymes, reactions and pathways of primary and intermediary metabolism in rice. RiceCyc version 3.3 features 316 pathways and 6,643 peptide-coding genes mapped to 2,103 enzyme-catalyzed and 87 protein-mediated transport reactions. The initial functional annotations of rice genes with InterPro, Gene Ontology, MetaCyc, and Enzyme Commission (EC) numbers were enriched with annotations provided by KEGG and Gramene databases. The pathway inferences and the network diagrams were first predicted based on MetaCyc reference networks and plant pathways from the Plant Metabolic Network, using the Pathologic module of Pathway Tools. This was enriched by manually adding metabolic pathways and gene functions specifically reported for rice. The RiceCyc database is hierarchically browsable from pathway diagrams to the associated genes, metabolites and chemical structures. Through the integrated tool OMICs Viewer, users can upload transcriptomic, proteomic and metabolomic data to visualize expression patterns in a virtual cell. RiceCyc, along with additional species-specific pathway databases hosted in the Gramene project, facilitates comparative pathway analysis.

Conclusions

Here we describe the RiceCyc network development and discuss its contribution to rice genome annotations. As a case study to demonstrate the use of RiceCyc network as a discovery environment we carried out an integrated bioinformatic analysis of rice metabolic genes that are differentially regulated under diurnal photoperiod and biotic stress treatments. The analysis of publicly available rice transcriptome datasets led to the hypothesis that the complete tryptophan biosynthesis and its dependent metabolic pathways including serotonin biosynthesis are induced by taxonomically diverse pathogens while also being under diurnal regulation. The RiceCyc database is available online for free access at http://www.gramene.org/pathway/.",2013-05-29 +25322839,FuncPatch: a web server for the fast Bayesian inference of conserved functional patches in protein 3D structures.,"

Motivation

A number of statistical phylogenetic methods have been developed to infer conserved functional sites or regions in proteins. Many methods, e.g. Rate4Site, apply the standard phylogenetic models to infer site-specific substitution rates and totally ignore the spatial correlation of substitution rates in protein tertiary structures, which may reduce their power to identify conserved functional patches in protein tertiary structures when the sequences used in the analysis are highly similar. The 3D sliding window method has been proposed to infer conserved functional patches in protein tertiary structures, but the window size, which reflects the strength of the spatial correlation, must be predefined and is not inferred from data. We recently developed GP4Rate to solve these problems under the Bayesian framework. Unfortunately, GP4Rate is computationally slow. Here, we present an intuitive web server, FuncPatch, to perform a fast approximate Bayesian inference of conserved functional patches in protein tertiary structures.

Results

Both simulations and four case studies based on empirical data suggest that FuncPatch is a good approximation to GP4Rate. However, FuncPatch is orders of magnitudes faster than GP4Rate. In addition, simulations suggest that FuncPatch is potentially a useful tool complementary to Rate4Site, but the 3D sliding window method is less powerful than FuncPatch and Rate4Site. The functional patches predicted by FuncPatch in the four case studies are supported by experimental evidence, which corroborates the usefulness of FuncPatch.

Availability and implementation

The software FuncPatch is freely available at the web site, http://info.mcmaster.ca/yifei/FuncPatch

Contact

golding@mcmaster.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-15 +25592581,Variable selection method for the identification of epistatic models.,"Standard analysis methods for genome wide association studies (GWAS) are not robust to complex disease models, such as interactions between variables with small main effects. These types of effects likely contribute to the heritability of complex human traits. Machine learning methods that are capable of identifying interactions, such as Random Forests (RF), are an alternative analysis approach. One caveat to RF is that there is no standardized method of selecting variables so that false positives are reduced while retaining adequate power. To this end, we have developed a novel variable selection method called relative recurrency variable importance metric (r2VIM). This method incorporates recurrency and variance estimation to assist in optimal threshold selection. For this study, we specifically address how this method performs in data with almost completely epistatic effects (i.e. no marginal effects). Our results show that with appropriate parameter settings, r2VIM can identify interaction effects when the marginal effects are virtually nonexistent. It also outperforms logistic regression, which has essentially no power under this type of model when the number of potential features (genetic variants) is large. (All Supplementary Data can be found here: http://research.nhgri.nih.gov/manuscripts/Bailey-Wilson/r2VIM_epi/).",2015-01-01 +24603277,POPTREEW: web version of POPTREE for constructing population trees from allele frequency data and computing some other quantities.,"POPTREE software, including the command line (POPTREE) and the Windows (POPTREE2) versions, is available to perform evolutionary analyses of allele frequency data, computing distance measures for constructing population trees and average heterozygosity (H) (measure of genetic diversity within populations) and G(ST) (measure of genetic differentiation among subdivided populations). We have now developed a web version POPTREEW (http://www.med.kagawa-u.ac.jp/∼genomelb/takezaki/poptreew/) to provide cross-platform access to all POPTREE functions including interactive tree editing. Furthermore, new POPTREE software (POPTREE, POPTREE2, and POPTREEW) computes standardized G(ST) and Jost's D, which may be appropriate for data with high variability, and accepts genotype data in GENEPOP format as an input.",2014-03-06 +26441624,A predictive model of muscle excitations based on muscle modularity for a large repertoire of human locomotion conditions.,"Humans can efficiently walk across a large variety of terrains and locomotion conditions with little or no mental effort. It has been hypothesized that the nervous system simplifies neuromuscular control by using muscle synergies, thus organizing multi-muscle activity into a small number of coordinative co-activation modules. In the present study we investigated how muscle modularity is structured across a large repertoire of locomotion conditions including five different speeds and five different ground elevations. For this we have used the non-negative matrix factorization technique in order to explain EMG experimental data with a low-dimensional set of four motor components. In this context each motor components is composed of a non-negative factor and the associated muscle weightings. Furthermore, we have investigated if the proposed descriptive analysis of muscle modularity could be translated into a predictive model that could: (1) Estimate how motor components modulate across locomotion speeds and ground elevations. This implies not only estimating the non-negative factors temporal characteristics, but also the associated muscle weighting variations. (2) Estimate how the resulting muscle excitations modulate across novel locomotion conditions and subjects. The results showed three major distinctive features of muscle modularity: (1) the number of motor components was preserved across all locomotion conditions, (2) the non-negative factors were consistent in shape and timing across all locomotion conditions, and (3) the muscle weightings were modulated as distinctive functions of locomotion speed and ground elevation. Results also showed that the developed predictive model was able to reproduce well the muscle modularity of un-modeled data, i.e., novel subjects and conditions. Muscle weightings were reconstructed with a cross-correlation factor greater than 70% and a root mean square error less than 0.10. Furthermore, the generated muscle excitations matched well the experimental excitation with a cross-correlation factor greater than 85% and a root mean square error less than 0.09. The ability of synthetizing the neuromuscular mechanisms underlying human locomotion across a variety of locomotion conditions will enable solutions in the field of neurorehabilitation technologies and control of bipedal artificial systems. Open-access of the model implementation is provided for further analysis at https://simtk.org/home/p-mep/.",2015-09-17 +23819846,Collective judgment predicts disease-associated single nucleotide variants.,"

Background

In recent years the number of human genetic variants deposited into the publicly available databases has been increasing exponentially. The latest version of dbSNP, for example, contains ~50 million validated Single Nucleotide Variants (SNVs). SNVs make up most of human variation and are often the primary causes of disease. The non-synonymous SNVs (nsSNVs) result in single amino acid substitutions and may affect protein function, often causing disease. Although several methods for the detection of nsSNV effects have already been developed, the consistent increase in annotated data is offering the opportunity to improve prediction accuracy.

Results

Here we present a new approach for the detection of disease-associated nsSNVs (Meta-SNP) that integrates four existing methods: PANTHER, PhD-SNP, SIFT and SNAP. We first tested the accuracy of each method using a dataset of 35,766 disease-annotated mutations from 8,667 proteins extracted from the SwissVar database. The four methods reached overall accuracies of 64%-76% with a Matthew's correlation coefficient (MCC) of 0.38-0.53. We then used the outputs of these methods to develop a machine learning based approach that discriminates between disease-associated and polymorphic variants (Meta-SNP). In testing, the combined method reached 79% overall accuracy and 0.59 MCC, ~3% higher accuracy and ~0.05 higher correlation with respect to the best-performing method. Moreover, for the hardest-to-define subset of nsSNVs, i.e. variants for which half of the predictors disagreed with the other half, Meta-SNP attained 8% higher accuracy than the best predictor.

Conclusions

Here we find that the Meta-SNP algorithm achieves better performance than the best single predictor. This result suggests that the methods used for the prediction of variant-disease associations are orthogonal, encoding different biologically relevant relationships. Careful combination of predictions from various resources is therefore a good strategy for the selection of high reliability predictions. Indeed, for the subset of nsSNVs where all predictors were in agreement (46% of all nsSNVs in the set), our method reached 87% overall accuracy and 0.73 MCC. Meta-SNP server is freely accessible at http://snps.biofold.org/meta-snp.",2013-05-28 +26921406,Distribution of miRNA expression across human tissues.,"We present a human miRNA tissue atlas by determining the abundance of 1997 miRNAs in 61 tissue biopsies of different organs from two individuals collected post-mortem. One thousand three hundred sixty-four miRNAs were discovered in at least one tissue, 143 were present in each tissue. To define the distribution of miRNAs, we utilized a tissue specificity index (TSI). The majority of miRNAs (82.9%) fell in a middle TSI range i.e. were neither specific for single tissues (TSI > 0.85) nor housekeeping miRNAs (TSI < 0.5). Nonetheless, we observed many different miRNAs and miRNA families that were predominantly expressed in certain tissues. Clustering of miRNA abundances revealed that tissues like several areas of the brain clustered together. Considering -3p and -5p mature forms we observed miR-150 with different tissue specificity. Analysis of additional lung and prostate biopsies indicated that inter-organism variability was significantly lower than inter-organ variability. Tissue-specific differences between the miRNA patterns appeared not to be significantly altered by storage as shown for heart and lung tissue. MiRNAs TSI values of human tissues were significantly (P = 10(-8)) correlated with those of rats; miRNAs that were highly abundant in certain human tissues were likewise abundant in according rat tissues. We implemented a web-based repository enabling scientists to access and browse the data (https://ccb-web.cs.uni-saarland.de/tissueatlas).",2016-02-25 +23819870,Identifying Mendelian disease genes with the variant effect scoring tool.,"

Background

Whole exome sequencing studies identify hundreds to thousands of rare protein coding variants of ambiguous significance for human health. Computational tools are needed to accelerate the identification of specific variants and genes that contribute to human disease.

Results

We have developed the Variant Effect Scoring Tool (VEST), a supervised machine learning-based classifier, to prioritize rare missense variants with likely involvement in human disease. The VEST classifier training set comprised ~ 45,000 disease mutations from the latest Human Gene Mutation Database release and another ~45,000 high frequency (allele frequency >1%) putatively neutral missense variants from the Exome Sequencing Project. VEST outperforms some of the most popular methods for prioritizing missense variants in carefully designed holdout benchmarking experiments (VEST ROC AUC = 0.91, PolyPhen2 ROC AUC = 0.86, SIFT4.0 ROC AUC = 0.84). VEST estimates variant score p-values against a null distribution of VEST scores for neutral variants not included in the VEST training set. These p-values can be aggregated at the gene level across multiple disease exomes to rank genes for probable disease involvement. We tested the ability of an aggregate VEST gene score to identify candidate Mendelian disease genes, based on whole-exome sequencing of a small number of disease cases. We used whole-exome data for two Mendelian disorders for which the causal gene is known. Considering only genes that contained variants in all cases, the VEST gene score ranked dihydroorotate dehydrogenase (DHODH) number 2 of 2253 genes in four cases of Miller syndrome, and myosin-3 (MYH3) number 2 of 2313 genes in three cases of Freeman Sheldon syndrome.

Conclusions

Our results demonstrate the potential power gain of aggregating bioinformatics variant scores into gene-level scores and the general utility of bioinformatics in assisting the search for disease genes in large-scale exome sequencing studies. VEST is available as a stand-alone software package at http://wiki.chasmsoftware.org and is hosted by the CRAVAT web server at http://www.cravat.us.",2013-05-28 +22255115,PhenOMIM: an OMIM-based secondary database purported for phenotypic comparison.,"Phenotypic comparison may provide crucial information for obtaining insights into molecular interactions underlying various diseases. However, few attempts have been made to systematically analyze the phenotypes of hereditary disorders, mainly owing to the poor quality of text descriptions and lack of a unified system of descriptors. Here we present a secondary database, PHENOMIM, for translating the phenotypic data obtained from the Online Mendelian Inheritance in Man (OMIM) database into a structured form. Moreover, a web interface has also been developed for visualizing the data and related information from the OMIM and PhenOMIM databases. The data is freely available online for reviewing and commenting purposes and can be found at http://faculty.neu.edu.cn/bmie/han/PhenOMIM/.",2011-01-01 +25592605,Bayclone: Bayesian nonparametric inference of tumor subclones using NGS data.,"In this paper, we present a novel feature allocation model to describe tumor heterogeneity (TH) using next-generation sequencing (NGS) data. Taking a Bayesian approach, we extend the Indian buffet process (IBP) to define a class of nonparametric models, the categorical IBP (cIBP). A cIBP takes categorical values to denote homozygous or heterozygous genotypes at each SNV. We define a subclone as a vector of these categorical values, each corresponding to an SNV. Instead of partitioning somatic mutations into non-overlapping clusters with similar cellular prevalences, we took a different approach using feature allocation. Importantly, we do not assume somatic mutations with similar cellular prevalence must be from the same subclone and allow overlapping mutations shared across subclones. We argue that this is closer to the underlying theory of phylogenetic clonal expansion, as somatic mutations occurred in parent subclones should be shared across the parent and child subclones. Bayesian inference yields posterior probabilities of the number, genotypes, and proportions of subclones in a tumor sample, thereby providing point estimates as well as variabilities of the estimates for each subclone. We report results on both simulated and real data. BayClone is available at http://health.bsd.uchicago.edu/yji/soft.html.",2015-01-01 +25535244,PBOOST: a GPU-based tool for parallel permutation tests in genome-wide association studies.,"

Motivation

The importance of testing associations allowing for interactions has been demonstrated by Marchini et al. (2005). A fast method detecting associations allowing for interactions has been proposed by Wan et al. (2010a). The method is based on likelihood ratio test with the assumption that the statistic follows the χ(2) distribution. Many single nucleotide polymorphism (SNP) pairs with significant associations allowing for interactions have been detected using their method. However, the assumption of χ(2) test requires the expected values in each cell of the contingency table to be at least five. This assumption is violated in some identified SNP pairs. In this case, likelihood ratio test may not be applicable any more. Permutation test is an ideal approach to checking the P-values calculated in likelihood ratio test because of its non-parametric nature. The P-values of SNP pairs having significant associations with disease are always extremely small. Thus, we need a huge number of permutations to achieve correspondingly high resolution for the P-values. In order to investigate whether the P-values from likelihood ratio tests are reliable, a fast permutation tool to accomplish large number of permutations is desirable.

Results

We developed a permutation tool named PBOOST. It is based on GPU with highly reliable P-value estimation. By using simulation data, we found that the P-values from likelihood ratio tests will have relative error of >100% when 50% cells in the contingency table have expected count less than five or when there is zero expected count in any of the contingency table cells. In terms of speed, PBOOST completed 10(7) permutations for a single SNP pair from the Wellcome Trust Case Control Consortium (WTCCC) genome data (Wellcome Trust Case Control Consortium, 2007) within 1 min on a single Nvidia Tesla M2090 device, while it took 60 min in a single CPU Intel Xeon E5-2650 to finish the same task. More importantly, when simultaneously testing 256 SNP pairs for 10(7) permutations, our tool took only 5 min, while the CPU program took 10 h. By permuting on a GPU cluster consisting of 40 nodes, we completed 10(12) permutations for all 280 SNP pairs reported with P-values smaller than 1.6 × 10⁻¹² in the WTCCC datasets in 1 week.

Availability and implementation

The source code and sample data are available at http://bioinformatics.ust.hk/PBOOST.zip.

Contact

gyang@ust.hk; eeyu@ust.hk

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-21 +24954124,[Intravenous lipid emulsion and local anesthetic-induced systemic toxicity: mechanisms and limits].,"

Objectives

Intravenous lipid emulsions (ILE) are recommended today in cases of local anesthetic-induced systemic toxicity (LAST). The objectives of this review consists in describing mechanisms involved in the interaction between ILE and local anesthetic (LA) factors influencing this interaction and the limits associated with the use of ILE.

Data sources

References were obtained from Pubmed data bank (http://www.ncbi.nlm.nih.gov/pubmed) using the following keywords: Intralipid(®), local anesthetic, toxicity, intravenous lipid emulsion.

Data synthesis

Effects of the association between ILE-LA are based on droplet formations as well as changes in cell metabolism involving survival cell pathway, on functional properties and on direct hemodynamic parameters. Hypoxia, acidosis and high doses of epinephrine modified the effects of ILE-LA association.

Conclusion

Prescription of ILE is recommended by published guidelines on LAST resuscitation. ILE cannot substitute to the standard resuscitation protocol. It should be added to that protocol. Experimental studies as well as a case report registry will allow understanding further the effects induced by the ILE-LA association.",2014-06-20 +25675148,An evaluation of the prognostic model PREDICT using the POSH cohort of women aged ⩽40 years at breast cancer diagnosis.,"

Background

Breast cancer is the most common cancer in younger women (aged ⩽40 years) in the United Kingdom. PREDICT (http://www.predict.nhs.uk) is an online prognostic tool developed to help determine the best available treatment and outcome for early breast cancer. This study was conducted to establish how well PREDICT performs in estimating survival in a large cohort of younger women recruited to the UK POSH study.

Methods

The POSH cohort includes data from 3000 women aged ⩽40 years at breast cancer diagnosis. Study end points were overall and breast cancer-specific survival at 5, 8, and 10 years. Evaluation of PREDICT included model discrimination and comparison of the number of predicted versus observed events.

Results

PREDICT provided accurate long-term (8- and 10-year) survival estimates for younger women. Five-year estimates were less accurate, with the tool overestimating survival by 25% overall, and by 56% for patients with oestrogen receptor (ER)-positive tumours. PREDICT underestimated survival at 5 years among patients with ER-negative tumours.

Conclusions

PREDICT is a useful tool for providing reliable long-term (10-year) survival estimates for younger patients. However, for more accurate short-term estimates, the model requires further calibration using more data from young onset cases. Short-term prediction may be most relevant for the increasing number of women considering risk-reducing bilateral mastectomy.",2015-03-17 +26245130,[Analysis on Research Projects Supported by the National Natural Science Foundation of China at the National Institute of Parasitic Diseases during 2003-2013].,"The data of the National Natural Science Foundation (NSFC) projests obtained by the National Institute of Parasitic Diseases (NIPD), Chinese Center for Disease Control and Prevention (China CDC) during 2003-2013 were collected from internet-based science information system of NSFC, and NSFC search tool of Dingxiang Garden (http://nsfc.biomart.cn/). The number of funded projects, their subject classification and approved amount were analyzed, and compared with the other institutes of China CDC. Furthermore, the rationalization proposals were given in order to enhance the level of foundation management in the future.",2015-04-01 +22581809,Database of the radioactivity of norm used as industrial raw materials.,"Most ores used as industrial raw materials are imported mainly because Japan has poor natural resources. The activity concentrations in these materials should be investigated to evaluate the radiation exposure of workers. In this study, imported industrial raw materials were collected, and the activity concentrations in these resources were measured by using inductively coupled plasma mass spectrometry and gamma ray spectrometry. Furthermore,  a database of activity concentrations of NORMs was developed by referring to the measured results as well as referring to the literature, and a database on the web was published. The purpose of the database is to relieve anxieties among the general public and to provide extensive data regarding NORM for researchers and regulators. The database provides more than 900 activity concentrations in worldwide NORMs at no fee. (NORM database; http://www.nirs.go.jp/db/anzendb/NORMDB/ENG/index.php).",2012-05-11 +22536969,Improving biomarker list stability by integration of biological knowledge in the learning process.,"

Background

The identification of robust lists of molecular biomarkers related to a disease is a fundamental step for early diagnosis and treatment. However, methodologies for biomarker discovery using microarray data often provide results with limited overlap. It has been suggested that one reason for these inconsistencies may be that in complex diseases, such as cancer, multiple genes belonging to one or more physiological pathways are associated with the outcomes. Thus, a possible approach to improve list stability is to integrate biological information from genomic databases in the learning process; however, a comprehensive assessment based on different types of biological information is still lacking in the literature. In this work we have compared the effect of using different biological information in the learning process like functional annotations, protein-protein interactions and expression correlation among genes.

Results

Biological knowledge has been codified by means of gene similarity matrices and expression data linearly transformed in such a way that the more similar two features are, the more closely they are mapped. Two semantic similarity matrices, based on Biological Process and Molecular Function Gene Ontology annotation, and geodesic distance applied on protein-protein interaction networks, are the best performers in improving list stability maintaining almost equal prediction accuracy.

Conclusions

The performed analysis supports the idea that when some features are strongly correlated to each other, for example because are close in the protein-protein interaction network, then they might have similar importance and are equally relevant for the task at hand. Obtained results can be a starting point for additional experiments on combining similarity matrices in order to obtain even more stable lists of biomarkers. The implementation of the classification algorithm is available at the link: http://www.math.unipd.it/~dasan/biomarkers.html.",2012-03-28 +26484169,Data for chromosome contacts and matched transcription profiles at three cell cycle phases in the fission yeast.,"The data described in this article pertains to Grand et al. (2014), ""Chromosome conformation maps in fission yeast reveal cell cycle dependent sub nuclear structure"" [1]. Temperature sensitive Schizosaccharomyces pombe cell division cycle (cdc) mutants, which are induced by a shift in temperature to 36 °C, were chosen for the analysis of genome structure in the G1 phase, G2 phase and mitotic anaphase of the cell cycle. Chromatin and total RNA were isolated from the same cell culture following synchronization. Two biological replicates were analyzed for each condition. The global, three-dimensional organization of the chromosomes was captured at high resolution using Genome Conformation Capture (GCC). GCC libraries and RNA samples were sequenced using an Illumina Hi-Seq 2000 platform (Beijing Genomics Institute (China)). DNA sequences were processed using the Topography suite v1.19 [2] to obtain chromosome contact frequency matrices. RNA sequences were processed using the Cufflinks pipeline [3] to measure gene transcript levels and how these varied between the conditions. All sequence data, processed GCC and transcriptome files are available under the Gene Expression Omnibus (GEO) accession number GSE52287 (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE52287).",2015-01-20 +25900361,Enzyme Function Initiative-Enzyme Similarity Tool (EFI-EST): A web tool for generating protein sequence similarity networks.,"The Enzyme Function Initiative, an NIH/NIGMS-supported Large-Scale Collaborative Project (EFI; U54GM093342; http://enzymefunction.org/), is focused on devising and disseminating bioinformatics and computational tools as well as experimental strategies for the prediction and assignment of functions (in vitro activities and in vivo physiological/metabolic roles) to uncharacterized enzymes discovered in genome projects. Protein sequence similarity networks (SSNs) are visually powerful tools for analyzing sequence relationships in protein families (H.J. Atkinson, J.H. Morris, T.E. Ferrin, and P.C. Babbitt, PLoS One 2009, 4, e4345). However, the members of the biological/biomedical community have not had access to the capability to generate SSNs for their ""favorite"" protein families. In this article we announce the EFI-EST (Enzyme Function Initiative-Enzyme Similarity Tool) web tool (http://efi.igb.illinois.edu/efi-est/) that is available without cost for the automated generation of SSNs by the community. The tool can create SSNs for the ""closest neighbors"" of a user-supplied protein sequence from the UniProt database (Option A) or of members of any user-supplied Pfam and/or InterPro family (Option B). We provide an introduction to SSNs, a description of EFI-EST, and a demonstration of the use of EFI-EST to explore sequence-function space in the OMP decarboxylase superfamily (PF00215). This article is designed as a tutorial that will allow members of the community to use the EFI-EST web tool for exploring sequence/function space in protein families.",2015-04-18 +27139126,"Prognostic value of blood-biomarkers related to hypoxia, inflammation, immune response and tumour load in non-small cell lung cancer - A survival model with external validation.","

Aim

Improve the prognostic prediction of clinical variables for non-small cell lung cancer (NSCLC), by selecting from blood-biomarkers, non-invasively describing hypoxia, inflammation and tumour load.

Methods

Model development and validation included 182 and 181 inoperable stage I-IIIB NSCLC patients treated radically with radiotherapy (55.2%) or chemo-radiotherapy (44.8%). Least absolute shrinkage and selection operator (LASSO), selected from blood-biomarkers related to hypoxia [osteopontin (OPN) and carbonic anhydrase IX (CA-IX)], inflammation [interleukin-6 (IL-6), IL-8, and C-reactive protein (CRP)], and tumour load [carcinoembryonic antigen (CEA), and cytokeratin fragment 21-1 (Cyfra 21-1)]. Sequent model extension selected from alpha-2-macroglobulin (α2M), serum interleukin-2 receptor (sIL2r), toll-like receptor 4 (TLR4), and vascular endothelial growth factor (VEGF). Discrimination was reported by concordance-index.

Results

OPN and Cyfra 21-1 (hazard ratios of 3.3 and 1.7) significantly improved a clinical model comprising gender, World Health Organization performance-status, forced expiratory volume in 1s, number of positive lymph node stations, and gross tumour volume, from a concordance-index of 0.66 to 0.70 (validation=0.62 and 0.66). Extension of the validated model yielded a concordance-index of 0.67, including α2M, sIL2r and VEGF (hazard ratios of 4.6, 3.1, and 1.4).

Conclusion

Improvement of a clinical model including hypoxia and tumour load blood-biomarkers was validated. New immunological markers were associated with overall survival. Data and models can be found at www.cancerdata.org (http://dx.doi.org/10.17195/candat.2016.04.1) and www.predictcancer.org.",2016-04-29 +26606640,Residential Road Traffic Noise and High Depressive Symptoms after Five Years of Follow-up: Results from the Heinz Nixdorf Recall Study.,"

Background

Traffic noise affects a large number of people, particularly in urbanized areas. Noise causes stress and annoyance, but less is known about the relationship between noise and depression.

Objective

We investigated the association of residential road traffic noise with depressive symptoms using 5-year follow-up data from a German population-based study.

Methods

We analyzed data from 3,300 participants in the Heinz Nixdorf Recall study who were between 45 and 75 years old and were without depressive symptoms at baseline (2000-2003). Depressive symptoms were defined based on the Center for Epidemiologic Studies Depression scale (CES-D) 15-item questionnaire (total score ≥ 17) and antidepressant medication intake. Road traffic noise was modeled according to European Parliament/Council Directive 2002/49/EC. High noise exposure was defined as annual mean 24-hr noise levels > 55 A-weighted decibels [dB(A)]. Poisson regression with robust variance was used to estimate relative risks (RRs) a) adjusting for the potential confounders age, sex, socioeconomic status (SES), neighborhood-level SES, and traffic proximity; b) additionally adjusting for body mass index and smoking; and c) additionally adjusting for the potential confounders/intermediates comorbidities and insomnia.

Results

Overall, 35.7% of the participants were exposed to high residential road traffic noise levels. At follow-up (mean = 5.1 years after baseline), 302 participants were classified as having high depressive symptoms, corresponding to an adjusted RR of 1.29 (95% CI: 1.03, 1.62; Model 1) for exposure to > 55 versus ≤ 55 dB(A). Adjustment for potential confounders/intermediates did not substantially alter the results. Associations were stronger among those who reported insomnia at baseline (RR = 1.62; 95% CI: 1.10, 2.59 vs. RR = 1.21; 95% CI: 0.94, 1.57) and appeared to be limited to those with ≤ 13 years of education (RR = 1.43; 95% CI: 1.10, 1.85 vs. 0.92; 95% CI: 0.56, 1.53 for > 13 years).

Conclusion

Our results suggest that exposure to residential road traffic noise increases the risk of depressive symptoms.

Citation

Orban E, McDonald K, Sutcliffe R, Hoffmann B, Fuks KB, Dragano N, Viehmann A, Erbel R, Jöckel KH, Pundt N, Moebus S. 2016. Residential road traffic noise and high depressive symptoms after five years of follow-up: results from the Heinz Nixdorf Recall Study. Environ Health Perspect 124:578-585; http://dx.doi.org/10.1289/ehp.1409400.",2015-11-25 +24669835,An algebra-based method for inferring gene regulatory networks.,"

Background

The inference of gene regulatory networks (GRNs) from experimental observations is at the heart of systems biology. This includes the inference of both the network topology and its dynamics. While there are many algorithms available to infer the network topology from experimental data, less emphasis has been placed on methods that infer network dynamics. Furthermore, since the network inference problem is typically underdetermined, it is essential to have the option of incorporating into the inference process, prior knowledge about the network, along with an effective description of the search space of dynamic models. Finally, it is also important to have an understanding of how a given inference method is affected by experimental and other noise in the data used.

Results

This paper contains a novel inference algorithm using the algebraic framework of Boolean polynomial dynamical systems (BPDS), meeting all these requirements. The algorithm takes as input time series data, including those from network perturbations, such as knock-out mutant strains and RNAi experiments. It allows for the incorporation of prior biological knowledge while being robust to significant levels of noise in the data used for inference. It uses an evolutionary algorithm for local optimization with an encoding of the mathematical models as BPDS. The BPDS framework allows an effective representation of the search space for algebraic dynamic models that improves computational performance. The algorithm is validated with both simulated and experimental microarray expression profile data. Robustness to noise is tested using a published mathematical model of the segment polarity gene network in Drosophila melanogaster. Benchmarking of the algorithm is done by comparison with a spectrum of state-of-the-art network inference methods on data from the synthetic IRMA network to demonstrate that our method has good precision and recall for the network reconstruction task, while also predicting several of the dynamic patterns present in the network.

Conclusions

Boolean polynomial dynamical systems provide a powerful modeling framework for the reverse engineering of gene regulatory networks, that enables a rich mathematical structure on the model search space. A C++ implementation of the method, distributed under LPGL license, is available, together with the source code, at http://www.paola-vera-licona.net/Software/EARevEng/REACT.html.",2014-03-26 +21333005,TRAM (Transcriptome Mapper): database-driven creation and analysis of transcriptome maps from multiple sources.,"

Background

Several tools have been developed to perform global gene expression profile data analysis, to search for specific chromosomal regions whose features meet defined criteria as well as to study neighbouring gene expression. However, most of these tools are tailored for a specific use in a particular context (e.g. they are species-specific, or limited to a particular data format) and they typically accept only gene lists as input.

Results

TRAM (Transcriptome Mapper) is a new general tool that allows the simple generation and analysis of quantitative transcriptome maps, starting from any source listing gene expression values for a given gene set (e.g. expression microarrays), implemented as a relational database. It includes a parser able to assign univocal and updated gene symbols to gene identifiers from different data sources. Moreover, TRAM is able to perform intra-sample and inter-sample data normalization, including an original variant of quantile normalization (scaled quantile), useful to normalize data from platforms with highly different numbers of investigated genes. When in 'Map' mode, the software generates a quantitative representation of the transcriptome of a sample (or of a pool of samples) and identifies if segments of defined lengths are over/under-expressed compared to the desired threshold. When in 'Cluster' mode, the software searches for a set of over/under-expressed consecutive genes. Statistical significance for all results is calculated with respect to genes localized on the same chromosome or to all genome genes. Transcriptome maps, showing differential expression between two sample groups, relative to two different biological conditions, may be easily generated. We present the results of a biological model test, based on a meta-analysis comparison between a sample pool of human CD34+ hematopoietic progenitor cells and a sample pool of megakaryocytic cells. Biologically relevant chromosomal segments and gene clusters with differential expression during the differentiation toward megakaryocyte were identified.

Conclusions

TRAM is designed to create, and statistically analyze, quantitative transcriptome maps, based on gene expression data from multiple sources. The release includes FileMaker Pro database management runtime application and it is freely available at http://apollo11.isto.unibo.it/software/, along with preconfigured implementations for mapping of human, mouse and zebrafish transcriptomes.",2011-02-18 +24674136,TSSAR: TSS annotation regime for dRNA-seq data.,"

Background

Differential RNA sequencing (dRNA-seq) is a high-throughput screening technique designed to examine the architecture of bacterial operons in general and the precise position of transcription start sites (TSS) in particular. Hitherto, dRNA-seq data were analyzed by visualizing the sequencing reads mapped to the reference genome and manually annotating reliable positions. This is very labor intensive and, due to the subjectivity, biased.

Results

Here, we present TSSAR, a tool for automated de novo TSS annotation from dRNA-seq data that respects the statistics of dRNA-seq libraries. TSSAR uses the premise that the number of sequencing reads starting at a certain genomic position within a transcriptional active region follows a Poisson distribution with a parameter that depends on the local strength of expression. The differences of two dRNA-seq library counts thus follow a Skellam distribution. This provides a statistical basis to identify significantly enriched primary transcripts.We assessed the performance by analyzing a publicly available dRNA-seq data set using TSSAR and two simple approaches that utilize user-defined score cutoffs. We evaluated the power of reproducing the manual TSS annotation. Furthermore, the same data set was used to reproduce 74 experimentally validated TSS in H. pylori from reliable techniques such as RACE or primer extension. Both analyses showed that TSSAR outperforms the static cutoff-dependent approaches.

Conclusions

Having an automated and efficient tool for analyzing dRNA-seq data facilitates the use of the dRNA-seq technique and promotes its application to more sophisticated analysis. For instance, monitoring the plasticity and dynamics of the transcriptomal architecture triggered by different stimuli and growth conditions becomes possible.The main asset of a novel tool for dRNA-seq analysis that reaches out to a broad user community is usability. As such, we provide TSSAR both as intuitive RESTful Web service ( http://rna.tbi.univie.ac.at/TSSAR) together with a set of post-processing and analysis tools, as well as a stand-alone version for use in high-throughput dRNA-seq data analysis pipelines.",2014-03-27 +21296746,RGD: a comparative genomics platform.,"The Rat Genome Database (RGD) (http://rgd.mcw.edu) provides a comprehensive platform for comparative genomics and genetics research. RGD houses gene, QTL and polymorphic marker data for rat, mouse and human and provides easy access to data through sophisticated searches, disease portals, interactive pathway diagrams and rat and human genome browsers.",2011-01-01 +22053087,MimoDB 2.0: a mimotope database and beyond.,"Mimotopes are peptides with affinities to given targets. They are readily obtained through biopanning against combinatorial peptide libraries constructed by phage display and other display technologies such as mRNA display, ribosome display, bacterial display and yeast display. Mimotopes have been used to infer the protein interaction sites and networks; they are also ideal candidates for developing new diagnostics, therapeutics and vaccines. However, such valuable peptides are not collected in the central data resources such as UniProt and NCBI GenPept due to their 'unnatural' short sequences. The MimoDB database is an information portal to biopanning results of random libraries. In version 2.0, it has 15,633 peptides collected from 849 papers and grouped into 1818 sets. Besides the core data on panning experiments and their results, broad background information on target, template, library and structure is included. An accompanied benchmark has also been compiled for bioinformaticians to develop and evaluate their new models, algorithms and programs. In addition, the MimoDB database provides tools for simple and advanced searches, structure visualization, BLAST and alignment view on the fly. The experimental biologists can easily use the database as a virtual control to exclude possible target-unrelated peptides. The MimoDB database is freely available at http://immunet.cn/mimodb.",2011-11-03 +24747190,PrimerSeq: Design and visualization of RT-PCR primers for alternative splicing using RNA-seq data.,"The vast majority of multi-exon genes in higher eukaryotes are alternatively spliced and changes in alternative splicing (AS) can impact gene function or cause disease. High-throughput RNA sequencing (RNA-seq) has become a powerful technology for transcriptome-wide analysis of AS, but RT-PCR still remains the gold-standard approach for quantifying and validating exon splicing levels. We have developed PrimerSeq, a user-friendly software for systematic design and visualization of RT-PCR primers using RNA-seq data. PrimerSeq incorporates user-provided transcriptome profiles (i.e., RNA-seq data) in the design process, and is particularly useful for large-scale quantitative analysis of AS events discovered from RNA-seq experiments. PrimerSeq features a graphical user interface (GUI) that displays the RNA-seq data juxtaposed with the expected RT-PCR results. To enable primer design and visualization on user-provided RNA-seq data and transcript annotations, we have developed PrimerSeq as a stand-alone software that runs on local computers. PrimerSeq is freely available for Windows and Mac OS X along with source code at http://primerseq.sourceforge.net/. With the growing popularity of RNA-seq for transcriptome studies, we expect PrimerSeq to help bridge the gap between high-throughput RNA-seq discovery of AS events and molecular analysis of candidate events by RT-PCR.",2014-04-18 +22772948,A sequence comparison and gene expression data integration add-on for the Pathway Tools software.,"

Unlabelled

We present a plug-in for Pathway Tools, an integrated systems biology software to create, maintain and query Pathway/Genome Databases. Fully integrated into the graphical user interface and menu, this plug-in extends the application's functionality by the ability to create multiple sequence alignments, systematically annotate insertion sequence (IS) elements and analyse their activity by cross-species comparison tools. Microarray probes can be automatically mapped to target genes, and expression data obtained with these arrays can be transformed into input formats needed to visualize them in the various omics viewers of Pathway Tools. The plug-in API itself allows developers to integrate their own functions into the Pathway Tools menu.

Availability

Binaries are freely available for non-commercial users at http://genome.tugraz.at/PGDBToolbox/ and can be used on all platforms supported by Pathway Tools. A user guide is freely available at: http://genome.tugraz.at/PGDBToolbox/documentation.shtml.",2012-07-05 +25885405,Thrombosis in essential thrombocytemia and early/prefibrotic primary myelofibrosis: the role of the WHO histological diagnosis.,"

Background

Vascular events represent the most frequent complications of thrombocytemias. We aimed to evaluate their risk in the WHO histologic categories of Essential Thrombocytemia (ET) and early Primary Myelofibrosis (PMF).

Methods

From our clinical database of 283 thrombocytemic patients, we selected those with available bone marrow histology performed before any treatment, at or within 1 year from diagnosis, and reclassified the 131 cases as true ET or early PMF, with or without fibrosis, according to the WHO histological criteria. Vaso-occlusive events at diagnosis and in the follow-up were compared in the WHO-groups.

Results

Histologic review reclassified 61 cases as ET and 72 cases as early PMF (26 prefibrotic and 42 with grade 1 or 2 fibrosis). Compared to ET, early PMF showed a significant higher rate of thrombosis both in the past history (22% vs 8%) and at diagnosis (15.2% vs 1.6%), and an increased leukocyte count (8389 vs 7500/mmc). Venous thromboses (mainly atypical) were relatively more common in PMF than in ET. Patients with prefibrotic PMF, although younger, showed a significant higher 15-year risk of developing thrombosis (48% vs 16% in fibrotic PMF and 17% in ET). At multivariate analysis, age and WHO histology were both independent risk-factors for thrombosis during follow-up; patients >60 yr-old or with prefibrotic PMF showed a significantly higher risk at 20 years than patients <60 yr-old with ET or fibrotic PMF (47% vs 4%, p = 0.005).

Conclusions

Our study support the importance of WHO histologic categories in the thrombotic risk stratification of patients with thrombocytemias.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2020211863144412 .",2015-04-16 +25282641,CompMap: a reference-based compression program to speed up read mapping to related reference sequences.,"

Summary

Exhaustive mapping of next-generation sequencing data to a set of relevant reference sequences becomes an important task in pathogen discovery and metagenomic classification. However, the runtime and memory usage increase as the number of reference sequences and the repeat content among these sequences increase. In many applications, read mapping time dominates the entire application. We developed CompMap, a reference-based compression program, to speed up this process. CompMap enables the generation of a non-redundant representative sequence for the input sequences. We have demonstrated that reads can be mapped to this representative sequence with a much reduced time and memory usage, and the mapping to the original reference sequences can be recovered with high accuracy.

Availability and implementation

CompMap is implemented in C and freely available at http://csse.szu.edu.cn/staff/zhuzx/CompMap/.

Contact

xiaoyang@broadinstitute.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-04 +27153604,Genome-scale prediction of moonlighting proteins using diverse protein association information.,"

Motivation

Moonlighting proteins (MPs) show multiple cellular functions within a single polypeptide chain. To understand the overall landscape of their functional diversity, it is important to establish a computational method that can identify MPs on a genome scale. Previously, we have systematically characterized MPs using functional and omics-scale information. In this work, we develop a computational prediction model for automatic identification of MPs using a diverse range of protein association information.

Results

We incorporated a diverse range of protein association information to extract characteristic features of MPs, which range from gene ontology (GO), protein-protein interactions, gene expression, phylogenetic profiles, genetic interactions and network-based graph properties to protein structural properties, i.e. intrinsically disordered regions in the protein chain. Then, we used machine learning classifiers using the broad feature space for predicting MPs. Because many known MPs lack some proteomic features, we developed an imputation technique to fill such missing features. Results on the control dataset show that MPs can be predicted with over 98% accuracy when GO terms are available. Furthermore, using only the omics-based features the method can still identify MPs with over 75% accuracy. Last, we applied the method on three genomes: Saccharomyces cerevisiae, Caenorhabditis elegans and Homo sapiens, and found that about 2-10% of proteins in the genomes are potential MPs.

Availability and implementation

Code available at http://kiharalab.org/MPprediction

Contact

dkihara@purdue.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-03-26 +24835279,Statistical models for detecting differential chromatin interactions mediated by a protein.,"Chromatin interactions mediated by a protein of interest are of great scientific interest. Recent studies show that protein-mediated chromatin interactions can have different intensities in different types of cells or in different developmental stages of a cell. Such differences can be associated with a disease or with the development of a cell. Thus, it is of great importance to detect protein-mediated chromatin interactions with different intensities in different cells. A recent molecular technique, Chromatin Interaction Analysis by Paired-End Tag Sequencing (ChIA-PET), which uses formaldehyde cross-linking and paired-end sequencing, is able to detect genome-wide chromatin interactions mediated by a protein of interest. Here we proposed two models (One-Step Model and Two-Step Model) for two sample ChIA-PET count data (one biological replicate in each sample) to identify differential chromatin interactions mediated by a protein of interest. Both models incorporate the data dependency and the extent to which a fragment pair is related to a pair of DNA loci of interest to make accurate identifications. The One-Step Model makes use of the data more efficiently but is more computationally intensive. An extensive simulation study showed that the models can detect those differentially interacted chromatins and there is a good agreement between each classification result and the truth. Application of the method to a two-sample ChIA-PET data set illustrates its utility. The two models are implemented as an R package MDM (available at http://www.stat.osu.edu/~statgen/SOFTWARE/MDM).",2014-05-16 +24093424,Guide: a desktop application for analysing gene expression data.,"

Background

Multiplecompeting bioinformatics tools exist for next-generation sequencing data analysis. Many of these tools are available as R/Bioconductor modules, and it can be challenging for the bench biologist without any programming background to quickly analyse genomics data. Here, we present an application that is designed to be simple to use, while leveraging the power of R as the analysis engine behind the scenes.

Results

Genome Informatics Data Explorer (Guide) is a desktop application designed for the bench biologist to analyse RNA-seq and microarray gene expression data. It requires a text file of summarised read counts or expression values as input data, and performs differential expression analyses at both the gene and pathway level. It uses well-established R/Bioconductor packages such as limma for its analyses, without requiring the user to have specific knowledge of the underlying R functions. Results are presented in figures or interactive tables which integrate useful data from multiple sources such as gene annotation and orthologue data. Advanced options include the ability to edit R commands to customise the analysis pipeline.

Conclusions

Guide is a desktop application designed to query gene expression data in a user-friendly way while automatically communicating with R. Its customisation options make it possible to use different bioinformatics tools available through R/Bioconductor for its analyses, while keeping the core usage simple. Guide is written in the cross-platform framework of Qt, and is freely available for use from http://guide.wehi.edu.au.",2013-10-07 +23161684,"DcGO: database of domain-centric ontologies on functions, phenotypes, diseases and more.","We present 'dcGO' (http://supfam.org/SUPERFAMILY/dcGO), a comprehensive ontology database for protein domains. Domains are often the functional units of proteins, thus instead of associating ontological terms only with full-length proteins, it sometimes makes more sense to associate terms with individual domains. Domain-centric GO, 'dcGO', provides associations between ontological terms and protein domains at the superfamily and family levels. Some functional units consist of more than one domain acting together or acting at an interface between domains; therefore, ontological terms associated with pairs of domains, triplets and longer supra-domains are also provided. At the time of writing the ontologies in dcGO include the Gene Ontology (GO); Enzyme Commission (EC) numbers; pathways from UniPathway; human phenotype ontology and phenotype ontologies from five model organisms, including plants; anatomy ontologies from three organisms; human disease ontology and drugs from DrugBank. All ontological terms have probabilistic scores for their associations. In addition to associations to domains and supra-domains, the ontological terms have been transferred to proteins, through homology, providing annotations of >80 million sequences covering 2414 complete genomes, hundreds of meta-genomes, thousands of viruses and so forth. The dcGO database is updated fortnightly, and its website provides downloads, search, browse, phylogenetic context and other data-mining facilities.",2012-11-17 +25936478,Comparison of different techniques for in microgravity-a simple mathematic estimation of cardiopulmonary resuscitation quality for space environment.,"

Background

Since astronauts are selected carefully, are usually young, and are intensively observed before and during training, relevant medical problems are rare. Nevertheless, there is a certain risk for a cardiac arrest in space requiring cardiopulmonary resuscitation (CPR). Up to now, there are 5 known techniques to perform CPR in microgravity. The aim of the present study was to analyze different techniques for CPR during microgravity about quality of CPR.

Material and methods

To identify relevant publications on CPR quality in microgravity, a systematic analysis with defined searching criteria was performed in the PubMed database (http://www.pubmed.com). For analysis, the keywords (""reanimation"" or ""CPR"" or ""resuscitation"") and (""space"" or ""microgravity"" or ""weightlessness"") and the specific names of the techniques (""Standard-technique"" or ""Straddling-manoeuvre"" or ""Reverse-bear-hug-technique"" or ""Evetts-Russomano-technique"" or ""Hand-stand-technique"") were used. To compare quality and effectiveness of different techniques, we used the compression product (CP), a mathematical estimation for cardiac output.

Results

Using the predefined keywords for literature search, 4 different publications were identified (parabolic flight or under simulated conditions on earth) dealing with CPR efforts in microgravity and giving specific numbers. No study was performed under real-space conditions. Regarding compression depth, the handstand (HS) technique as well as the reverse bear hug (RBH) technique met parameters of the guidelines for CPR in 1G environments best (HS ratio, 0.91 ± 0.07; RBH ratio, 0.82 ± 0.13). Concerning compression rate, 4 of 5 techniques reached the required compression rate (ratio: HS, 1.08 ± 0.11; Evetts-Russomano [ER], 1.01 ± 0.06; standard side straddle, 1.00 ± 0.03; and straddling maneuver, 1.03 ± 0.12). The RBH method did not meet the required criteria (0.89 ± 0.09). The HS method showed the highest cardiac output (69.3% above the required CP), followed by the ER technique (33.0% above the required CP).

Conclusions

Concerning CPR quality, the HS seems to be most effective to treat a cardiac arrest. In some environmental conditions where this technique cannot be used, the ER technique is a good alternative because CPR quality is only slightly lower.",2015-04-15 +25878034,NGS-eval: NGS Error analysis and novel sequence VAriant detection tooL.,"Massively parallel sequencing of microbial genetic markers (MGMs) is used to uncover the species composition in a multitude of ecological niches. These sequencing runs often contain a sample with known composition that can be used to evaluate the sequencing quality or to detect novel sequence variants. With NGS-eval, the reads from such (mock) samples can be used to (i) explore the differences between the reads and their references and to (ii) estimate the sequencing error rate. This tool maps these reads to references and calculates as well as visualizes the different types of sequencing errors. Clearly, sequencing errors can only be accurately calculated if the reference sequences are correct. However, even with known strains, it is not straightforward to select the correct references from databases. We previously analysed a pyrosequencing dataset from a mock sample to estimate sequencing error rates and detected sequence variants in our mock community, allowing us to obtain an accurate error estimation. Here, we demonstrate the variant detection and error analysis capability of NGS-eval with Illumina MiSeq reads from the same mock community. While tailored towards the field of metagenomics, this server can be used for any type of MGM-based reads. NGS-eval is available at http://www.ibi.vu.nl/programs/ngsevalwww/.",2015-04-15 +24532724,ADaCGH2: parallelized analysis of (big) CNA data.,"

Motivation

Studies of genomic DNA copy number alteration can deal with datasets with several million probes and thousands of subjects. Analyzing these data with currently available software (e.g. as available from BioConductor) can be extremely slow and may not be feasible because of memory requirements.

Results

We have developed a BioConductor package, ADaCGH2, that parallelizes the main segmentation algorithms (using forking on multicore computers or parallelization via message passing interface, etc., in clusters of computers) and uses ff objects for reading and data storage. We show examples of data with 6 million probes per array; we can analyze data that would otherwise not fit in memory, and compared with the non-parallelized versions we can achieve speedups of 25-40 times on a 64-cores machine.

Availability and implementation

ADaCGH2 is an R package available from BioConductor. Version 2.3.11 or higher is available from the development branch: http://www.bioconductor.org/packages/devel/bioc/html/ADaCGH2.html.",2014-02-14 +22106336,Integrating human and murine anatomical gene expression data for improved comparisons.,"

Motivation

Information concerning the gene expression pattern in four dimensions (species, genes, anatomy and developmental stage) is crucial for unraveling the roles of genes through time. There are a variety of anatomical gene expression databases, but extracting information from them can be hampered by their diversity and heterogeneity.

Results

aGEM 3.1 (anatomic Gene Expression Mapping) addresses the issues of diversity and heterogeneity of anatomical gene expression databases by integrating six mouse gene expression resources (EMAGE, GXD, GENSAT, Allen Brain Atlas data base, EUREXPRESS and BioGPS) and three human gene expression databases (HUDSEN, Human Protein Atlas and BioGPS). Furthermore, aGEM 3.1 provides new cross analysis tools to bridge these resources.

Availability and implementation

aGEM 3.1 can be queried using gene and anatomical structure. Output information is presented in a friendly format, allowing the user to display expression maps and correlation matrices for a gene or structure during development. An in-depth study of a specific developmental stage is also possible using heatmaps that relate gene expression with anatomical components. http://agem.cnb.csic.es

Contact

natalia@cnb.csic.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-20 +23267173,In silico identification of oncogenic potential of fyn-related kinase in hepatocellular carcinoma.,"

Motivation

Cancer development is a complex and heterogeneous process. It is estimated that 5-10% of human genes probably contribute to oncogenesis, whereas current experimentally validated cancer genes only cover 1% of the human genome. Thus hundreds of cancer genes may still remain to be identified. To search for new genes that play roles in carcinogenesis and facilitate cancer research, we developed a systematic workflow to use information saved in a previously established tumor-associated gene (TAG) database.

Results

By exploiting the information of conserved protein domains from the TAG, we identified 183 potential new TAGs. As a proof-of-concept, one predicted oncogene, fyn-related kinase (FRK), which shows an aberrant digital expression pattern in liver cancer cells, was selected for further investigation. Using 68 paired hepatocellular carcinoma samples, we found that FRK was up-regulated in 52% of cases (P < 0.001). Tumorigenic assays performed in Hep3B and HepG2 cell lines revealed a significant correlation between the level of FRK expression and invasiveness, suggesting that FRK is a positive regulator of invasiveness in liver cancer cells.

Conclusion

These findings implied that FRK is a multitalented signal transduction molecule that produces diverse biological responses in different cell types in various microenvironments. In addition, our data demonstrated the accuracy of computational prediction and suggested that other predicted TAGs can be potential targets for future cancer research.

Availability

The TAG database is available online at the Bioinformatics Center website: http://www.binfo.ncku.edu.tw/TAG/.",2012-12-24 +26089389,By the company they keep: interaction networks define the binding ability of transcription factors.,"Access to genome-wide data provides the opportunity to address questions concerning the ability of transcription factors (TFs) to assemble in distinct macromolecular complexes. Here, we introduce the PAnDA (Protein And DNA Associations) approach to characterize DNA associations with human TFs using expression profiles, protein-protein interactions and recognition motifs. Our method predicts TF binding events with >0.80 accuracy revealing cell-specific regulatory patterns that can be exploited for future investigations. Even when the precise DNA-binding motifs of a specific TF are not available, the information derived from protein-protein networks is sufficient to perform high-confidence predictions (area under the ROC curve of 0.89). PAnDA is freely available at http://service.tartaglialab.com/new_submission/panda.",2015-06-18 +26317128,RASMOL AB - new functionalities in the program for structure analysis.,"For many years RasMol was one of the most used programs for molecular visualization. It was an excellent tool due to its simplicity and its low demand of computer power. Today it is replaced by OpenGL programs, which have excellent graphics that new computers can additionally handle. Molecular graphics is one of the best tools for the analysis of biomolecular data. With high efficiency and a low demand of computer power, RasMol can still be used as a quick and handy tool used for the analysis of biomolecular structures with good results. In this paper, we describe modifications to the RasMol program, as implemented on the base of RasMol AB 2. We introduced several new functions, namely: the identification of histidine isomers, and advanced structural selection and macro capabilities (as implemented in the point-click menu), which result in an increase in the speed and accuracy of structural analyses. The program can be downloaded from the project page: http://etoh.chem.univ.gda.pl/rasmol/.",2015-08-28 +25877689,Age at the time of sulfonylurea initiation influences treatment outcomes in KCNJ11-related neonatal diabetes.,"

Aims/hypothesis

Individuals with heterozygous activating mutations of the KCNJ11 gene encoding a subunit of the ATP-sensitive potassium channel (KATP) can usually be treated with oral sulfonylurea (SU) pills in lieu of insulin injections. The aim of this study was to test our hypothesis that younger age at the time of initiation of SU therapy is correlated with lower required doses of SU therapy, shorter transition time and decreased likelihood of requiring additional diabetes medications.

Methods

We performed a retrospective cohort study using data on 58 individuals with neonatal diabetes due to KCNJ11 mutations identified through the University of Chicago Monogenic Diabetes Registry ( http://monogenicdiabetes.uchicago.edu/registry ). We assessed the influence of age at initiation of SU therapy on treatment outcomes.

Results

HbA1c fell from an average of 8.5% (69 mmol/mol) before transition to 6.2% (44 mmol/mol) after SU therapy (p < 0.001). Age of initiation of SU correlated with the dose (mg kg(-1) day(-1)) of SU required at follow-up (r = 0.80, p < 0.001). Similar associations were observed across mutation subtypes. Ten participants required additional glucose-lowering medications and all had initiated SU at age 13 years or older. No serious adverse events were reported.

Conclusions/interpretation

Earlier age at initiation of SU treatment is associated with improved response to SU therapy. Declining sensitivity to SU may be due to loss of beta cell mass over time in those treated with insulin. Our data support the need for early genetic diagnosis and appropriate personalised treatment in all cases of neonatal diabetes.",2015-04-17 +24990606,Intensity drift removal in LC/MS metabolomics by common variance compensation.,"

Unlabelled

Liquid chromatography coupled to mass spectrometry (LC/MS) has become widely used in Metabolomics. Several artefacts have been identified during the acquisition step in large LC/MS metabolomics experiments, including ion suppression, carryover or changes in the sensitivity and intensity. Several sources have been pointed out as responsible for these effects. In this context, the drift effects of the peak intensity is one of the most frequent and may even constitute the main source of variance in the data, resulting in misleading statistical results when the samples are analysed. In this article, we propose the introduction of a methodology based on a common variance analysis before the data normalization to address this issue. This methodology was tested and compared with four other methods by calculating the Dunn and Silhouette indices of the quality control classes. The results showed that our proposed methodology performed better than any of the other four methods. As far as we know, this is the first time that this kind of approach has been applied in the metabolomics context.

Availability and implementation

The source code of the methods is available as the R package intCor at http://b2slab.upc.edu/software-and-downloads/intensity-drift-correction/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-02 +22434843,Community gene annotation in practice.,"Manual annotation of genomic data is extremely valuable to produce an accurate reference gene set but is expensive compared with automatic methods and so has been limited to model organisms. Annotation tools that have been developed at the Wellcome Trust Sanger Institute (WTSI, http://www.sanger.ac.uk/.) are being used to fill that gap, as they can be used remotely and so open up viable community annotation collaborations. We introduce the 'Blessed' annotator and 'Gatekeeper' approach to Community Annotation using the Otterlace/ZMap genome annotation tool. We also describe the strategies adopted for annotation consistency, quality control and viewing of the annotation. DATABASE URL: http://vega.sanger.ac.uk/index.html.",2012-03-20 +25867189,"T346Hunter: a novel web-based tool for the prediction of type III, type IV and type VI secretion systems in bacterial genomes.","T346Hunter (Type Three, Four and Six secretion system Hunter) is a web-based tool for the identification and localisation of type III, type IV and type VI secretion systems (T3SS, T4SS and T6SS, respectively) clusters in bacterial genomes. Non-flagellar T3SS (NF-T3SS) and T6SS are complex molecular machines that deliver effector proteins from bacterial cells into the environment or into other eukaryotic or prokaryotic cells, with significant implications for pathogenesis of the strains encoding them. Meanwhile, T4SS is a more functionally diverse system, which is involved in not only effector translocation but also conjugation and DNA uptake/release. Development of control strategies against bacterial-mediated diseases requires genomic identification of the virulence arsenal of pathogenic bacteria, with T3SS, T4SS and T6SS being major determinants in this regard. Therefore, computational methods for systematic identification of these specialised machines are of particular interest. With the aim of facilitating this task, T346Hunter provides a user-friendly web-based tool for the prediction of T3SS, T4SS and T6SS clusters in newly sequenced bacterial genomes. After inspection of the available scientific literature, we constructed a database of hidden Markov model (HMM) protein profiles and sequences representing the various components of T3SS, T4SS and T6SS. T346Hunter performs searches of such a database against user-supplied bacterial sequences and localises enriched regions in any of these three types of secretion systems. Moreover, through the T346Hunter server, users can visualise the predicted clusters obtained for approximately 1700 bacterial chromosomes and plasmids. T346Hunter offers great help to researchers in advancing their understanding of the biological mechanisms in which these sophisticated molecular machines are involved. T346Hunter is freely available at http://bacterial-virulence-factors.cbgp.upm.es/T346Hunter.",2015-04-13 +23148484,Oculus: faster sequence alignment by streaming read compression.,"

Background

Despite significant advancement in alignment algorithms, the exponential growth of nucleotide sequencing throughput threatens to outpace bioinformatic analysis. Computation may become the bottleneck of genome analysis if growing alignment costs are not mitigated by further improvement in algorithms. Much gain has been gleaned from indexing and compressing alignment databases, but many widely used alignment tools process input reads sequentially and are oblivious to any underlying redundancy in the reads themselves.

Results

Here we present Oculus, a software package that attaches to standard aligners and exploits read redundancy by performing streaming compression, alignment, and decompression of input sequences. This nearly lossless process (> 99.9%) led to alignment speedups of up to 270% across a variety of data sets, while requiring a modest amount of memory. We expect that streaming read compressors such as Oculus could become a standard addition to existing RNA-Seq and ChIP-Seq alignment pipelines, and potentially other applications in the future as throughput increases.

Conclusions

Oculus efficiently condenses redundant input reads and wraps existing aligners to provide nearly identical SAM output in a fraction of the aligner runtime. It includes a number of useful features, such as tunable performance and fidelity options, compatibility with FASTA or FASTQ files, and adherence to the SAM format. The platform-independent C++ source code is freely available online, at http://code.google.com/p/oculus-bio.",2012-11-13 +24257187,Effect of separate sampling on classification accuracy.,"

Motivation

Measurements are commonly taken from two phenotypes to build a classifier, where the number of data points from each class is predetermined, not random. In this 'separate sampling' scenario, the data cannot be used to estimate the class prior probabilities. Moreover, predetermined class sizes can severely degrade classifier performance, even for large samples.

Results

We employ simulations using both synthetic and real data to show the detrimental effect of separate sampling on a variety of classification rules. We establish propositions related to the effect on the expected classifier error owing to a sampling ratio different from the population class ratio. From these we derive a sample-based minimax sampling ratio and provide an algorithm for approximating it from the data. We also extend to arbitrary distributions the classical population-based Anderson linear discriminant analysis minimax sampling ratio derived from the discriminant form of the Bayes classifier.

Availability

All the codes for synthetic data and real data examples are written in MATLAB. A function called mmratio, whose output is an approximation of the minimax sampling ratio of a given dataset, is also written in MATLAB. All the codes are available at: http://gsp.tamu.edu/Publications/supplementary/shahrokh13b.",2013-11-20 +26356339,A Simple but Powerful Heuristic Method for Accelerating k-Means Clustering of Large-Scale Data in Life Science.,"K-means clustering has been widely used to gain insight into biological systems from large-scale life science data. To quantify the similarities among biological data sets, Pearson correlation distance and standardized Euclidean distance are used most frequently; however, optimization methods have been largely unexplored. These two distance measurements are equivalent in the sense that they yield the same k-means clustering result for identical sets of k initial centroids. Thus, an efficient algorithm used for one is applicable to the other. Several optimization methods are available for the Euclidean distance and can be used for processing the standardized Euclidean distance; however, they are not customized for this context. We instead approached the problem by studying the properties of the Pearson correlation distance, and we invented a simple but powerful heuristic method for markedly pruning unnecessary computation while retaining the final solution. Tests using real biological data sets with 50-60K vectors of dimensions 10-2001 (~400 MB in size) demonstrated marked reduction in computation time for k = 10-500 in comparison with other state-of-the-art pruning methods such as Elkan's and Hamerly's algorithms. The BoostKCP software is available at http://mlab.cb.k.u-tokyo.ac.jp/~ichikawa/boostKCP/.",2014-07-01 +23990412,Bayesian consensus clustering.,"

Motivation

In biomedical research a growing number of platforms and technologies are used to measure diverse but related information, and the task of clustering a set of objects based on multiple sources of data arises in several applications. Most current approaches to multisource clustering either independently determine a separate clustering for each data source or determine a single 'joint' clustering for all data sources. There is a need for more flexible approaches that simultaneously model the dependence and the heterogeneity of the data sources.

Results

We propose an integrative statistical model that permits a separate clustering of the objects for each data source. These separate clusterings adhere loosely to an overall consensus clustering, and hence they are not independent. We describe a computationally scalable Bayesian framework for simultaneous estimation of both the consensus clustering and the source-specific clusterings. We demonstrate that this flexible approach is more robust than joint clustering of all data sources, and is more powerful than clustering each data source independently. We present an application to subtype identification of breast cancer tumor samples using publicly available data from The Cancer Genome Atlas.

Availability

R code with instructions and examples is available at http://people.duke.edu/%7Eel113/software.html.",2013-08-28 +26893301,Clustering Genes of Common Evolutionary History.,"Phylogenetic inference can potentially result in a more accurate tree using data from multiple loci. However, if the loci are incongruent-due to events such as incomplete lineage sorting or horizontal gene transfer-it can be misleading to infer a single tree. To address this, many previous contributions have taken a mechanistic approach, by modeling specific processes. Alternatively, one can cluster loci without assuming how these incongruencies might arise. Such ""process-agnostic"" approaches typically infer a tree for each locus and cluster these. There are, however, many possible combinations of tree distance and clustering methods; their comparative performance in the context of tree incongruence is largely unknown. Furthermore, because standard model selection criteria such as AIC cannot be applied to problems with a variable number of topologies, the issue of inferring the optimal number of clusters is poorly understood. Here, we perform a large-scale simulation study of phylogenetic distances and clustering methods to infer loci of common evolutionary history. We observe that the best-performing combinations are distances accounting for branch lengths followed by spectral clustering or Ward's method. We also introduce two statistical tests to infer the optimal number of clusters and show that they strongly outperform the silhouette criterion, a general-purpose heuristic. We illustrate the usefulness of the approach by 1) identifying errors in a previous phylogenetic analysis of yeast species and 2) identifying topological incongruence among newly sequenced loci of the globeflower fly genus Chiastocheta We release treeCl, a new program to cluster genes of common evolutionary history (http://git.io/treeCl).",2016-02-17 +25649619,mFASD: a structure-based algorithm for discriminating different types of metal-binding sites.,"

Motivation

A large number of proteins contain metal ions that are essential for their stability and biological activity. Identifying and characterizing metal-binding sites through computational methods is necessary when experimental clues are lacking. Almost all published computational methods are designed to distinguish metal-binding sites from non-metal-binding sites. However, discrimination between different types of metal-binding sites is also needed to make more accurate predictions.

Results

In this work, we proposed a novel algorithm called mFASD, which could discriminate different types of metal-binding sites effectively based on 3D structure data and is useful for accurate metal-binding site prediction. mFASD captures the characteristics of a metal-binding site by investigating the local chemical environment of a set of functional atoms that are considered to be in contact with the bound metal. Then a distance measure defined on functional atom sets enables the comparison between different metal-binding sites. The algorithm could discriminate most types of metal-binding sites from each other with high sensitivity and accuracy. We showed that cascading our method with existing ones could achieve a substantial improvement of the accuracy for metal-binding site prediction.

Availability and implementation

Source code and data used are freely available from http://staff.ustc.edu.cn/∼liangzhi/mfasd/",2015-02-02 +24913605,QMachine: commodity supercomputing in web browsers.,"

Background

Ongoing advancements in cloud computing provide novel opportunities in scientific computing, especially for distributed workflows. Modern web browsers can now be used as high-performance workstations for querying, processing, and visualizing genomics' ""Big Data"" from sources like The Cancer Genome Atlas (TCGA) and the International Cancer Genome Consortium (ICGC) without local software installation or configuration. The design of QMachine (QM) was driven by the opportunity to use this pervasive computing model in the context of the Web of Linked Data in Biomedicine.

Results

QM is an open-sourced, publicly available web service that acts as a messaging system for posting tasks and retrieving results over HTTP. The illustrative application described here distributes the analyses of 20 Streptococcus pneumoniae genomes for shared suffixes. Because all analytical and data retrieval tasks are executed by volunteer machines, few server resources are required. Any modern web browser can submit those tasks and/or volunteer to execute them without installing any extra plugins or programs. A client library provides high-level distribution templates including MapReduce. This stark departure from the current reliance on expensive server hardware running ""download and install"" software has already gathered substantial community interest, as QM received more than 2.2 million API calls from 87 countries in 12 months.

Conclusions

QM was found adequate to deliver the sort of scalable bioinformatics solutions that computation- and data-intensive workflows require. Paradoxically, the sandboxed execution of code by web browsers was also found to enable them, as compute nodes, to address critical privacy concerns that characterize biomedical environments.",2014-06-09 +23144556,RKN Lethal DB: A database for the identification of Root Knot Nematode (Meloidogyne spp.) candidate lethal genes.,"

Unlabelled

Root Knot nematode (RKN; Meloidogyne spp.) is one of the most devastating parasites that infect the roots of hundreds of plant species. RKN cannot live independently from their hosts and are the biggest contributors to the loss of the world's primary foods. RNAi gene silencing studies have demonstrated that there are fewer galls and galls are smaller when RNAi constructs targeted to silence certain RKN genes are expressed in plant roots. We conducted a comparative genomics analysis, comparing RKN genes of six species: Meloidogyne Arenaria, Meloidogyne Chitwoodi, Meloidogyne Hapla, Meloidogyne Incognita, Meloidogyne Javanica, and Meloidogyne Paranaensis to that of the free living nematode Caenorhabditis elegans, to identify candidate genes that will be lethal to RKN when silenced or mutated. Our analysis yielded a number of such candidate lethal genes in RKN, some of which have been tested and proven to be effective in soybean roots. A web based database was built to house and allow scientists to search the data. This database will be useful to scientists seeking to identify candidate genes as targets for gene silencing to confer resistance in plants to RKN.

Availability

The database can be accessed from http://bioinformatics.towson.edu/RKN/",2012-10-01 +22589183,PRDB: Protein Repeat DataBase.,"Rapidly increasing genomic data present new challenges for scientists: making sense of millions of amino acid sequences requires a systematic approach and information about their 3D structure, function, and evolution. Over the last decade, numerous studies demonstrated the fundamental importance of protein tandem repeats and their involvement in human diseases. Bioinformatics analysis of these regions requires special computer programs and databases, since the conventional approaches predominantly developed for globular domains have limited success. To perform a global comparative analysis of protein tandem repeats, we developed the Protein Tandem Repeat DataBase (PRDB). PRDB is a curated database that includes the protein tandem repeats found in sequence databanks by the T-REKS program. The database is available at http://bioinfo.montp.cnrs.fr/?r=repeatDB.",2012-05-01 +25869675,Sensible method for updating motif instances in an increased biological network.,"A network motif is defined as an over-represented subgraph pattern in a network. Network motif based techniques have been widely applied in analyses of biological networks such as transcription regulation networks (TRNs), protein-protein interaction networks (PPIs), and metabolic networks. The detection of network motifs involves the computationally expensive enumeration of subgraphs, NP-complete graph isomorphism testing, and significance testing through the generation of many random graphs to determine the statistical uniqueness of a given subgraph. These computational obstacles make network motif analysis unfeasible for many real-world applications. We observe that the fast growth of biotechnology has led to the rapid accretion of molecules (vertices) and interactions (edges) to existing biological network databases. Even with a small percentage of additions, revised networks can have a large number of differing motif instances. Currently, no existing algorithms recalculate motif instances in 'updated' networks in a practical manner. In this paper, we introduce a sensible method for efficiently recalculating motif instances by performing motif enumeration from only updated vertices and edges. Preliminary experimental results indicate that our method greatly reduces computational time by eliminating the repeated enumeration of overlapped subgraph instances detected in earlier versions of the network. The software program implementing this algorithm, defined as SUNMI (Sensible Update of Network Motif Instances), is currently a stand-alone java program and we plan to upgrade it as a web-interactive program that will be available through http://faculty.washington.edu/kimw6/research.htm in near future. Meanwhile it is recommended to contact authors to obtain the stand-alone SUNMI program.",2015-04-11 +21586520,Genome-wide association studies pipeline (GWASpi): a desktop application for genome-wide SNP analysis and management.,"

Motivation

Genome-wide association studies (GWAS) based on single nucleotide polymorphism (SNP) arrays are the most widely used approach to detect loci associated to human traits. Due to the complexity of the methods and software packages available, each with its particular format requiring intricate management workflows, the analysis of GWAS usually confronts scientists with steep learning curves. Indeed, the wide variety of tools makes the parsing and manipulation of data the most time consuming and error prone part of a study. To help resolve these issues, we present GWASpi, a user-friendly, multiplatform, desktop-able application for the management and analysis of GWAS data, with a novel approach on database technologies to leverage the most out of commonly available desktop hardware. GWASpi aims to be a start-to-finish GWAS management application, from raw data to results, containing the most common analysis tools. As a result, GWASpi is easy to use and reduces in up to two orders of magnitude the time needed to perform the fundamental steps of a GWAS.

Availability

Freely available on the web at http://www.gwaspi.org. Implemented in Java, Apache-Derby and NetCDF-3, with all major operating systems supported.

Contact

gwaspi@upf.edu; arcadi.navarro@upf.edu.",2011-05-17 +26925206,KENeV: A web-application for the automated reconstruction and visualization of the enriched metabolic and signaling super-pathways deriving from genomic experiments.,"Gene expression analysis, using high throughput genomic technologies,has become an indispensable step for the meaningful interpretation of the underlying molecular complexity, which shapes the phenotypic manifestation of the investigated biological mechanism. The modularity of the cellular response to different experimental conditions can be comprehended through the exploitation of molecular pathway databases, which offer a controlled, curated background for statistical enrichment analysis. Existing tools enable pathway analysis, visualization, or pathway merging but none integrates a fully automated workflow, combining all above-mentioned modules and destined to non-programmer users. We introduce an online web application, named KEGG Enriched Network Visualizer (KENeV), which enables a fully automated workflow starting from a list of differentially expressed genes and deriving the enriched KEGG metabolic and signaling pathways, merged into two respective, non-redundant super-networks. The final networks can be downloaded as SBML files, for further analysis, or instantly visualized through an interactive visualization module. In conclusion, KENeV (available online at http://www.grissom.gr/kenev) provides an integrative tool, suitable for users with no programming experience, for the functional interpretation, at both the metabolic and signaling level, of differentially expressed gene subsets deriving from genomic experiments.",2015-04-09 +23282099,Automatically clustering large-scale miRNA sequences: methods and experiments.,"

Background

Since the initial annotation of microRNAs (miRNAs) in 2001, many studies have sought to identify additional miRNAs experimentally or computationally in various species. MiRNAs act with the Argonaut family of proteins to regulate target messenger RNAs (mRNAs) post-transcriptionally. Currently, researches mainly focus on single miRNA function study. Considering that members in the same miRNA family might participate in the same pathway or regulate the same target(s) and thus share similar biological functions, people can explore useful knowledge from high quality miRNA family architecture.

Results

In this article, we developed an unsupervised clustering-based method miRCluster to automatically group miRNAs. In order to evaluate this method, several data sets were constructed from the online database miRBase. Results showed that miRCluster can efficiently arrange miRNAs (e.g identify 354 families in miRBase16 with an accuracy of 92.08%, and can recognize 9 of all 10 newly-added families in miRBase 17). By far, ~30% mature miRNAs registered in miRBase are unclassified. With miRCluster, over 85% unclassified miRNAs can be assigned to certain families, while ~44% of these miRNAs distributed in ~300novel families.

Conclusions

In short, miRCluster is an automatic and efficient miRNA family identification method, which does not require any prior knowledge. It can be helpful in real use, especially when exploring functions of novel miRNAs. All relevant materials could be freely accessed online (http://admis.fudan.edu.cn/projects/miRCluster).",2012-12-17 +24618467,An adaptive workflow coupled with Random Forest algorithm to identify intact N-glycopeptides detected from mass spectrometry.,"

Motivation

Despite many attempts for algorithm development in recent years, automated identification of intact glycopeptides from LC-MS(2) spectral data is still a challenge in both sensitivity and precision.

Results

We implemented a supervised machine learning algorithm, Random Forest, in an automated workflow to identify N-glycopeptides using spectral features derived from ion trap-based LC-MS(2) data. The workflow streamlined high-confident N-glycopeptide spectral data and enabled adaptive model optimization with respect to different sampling strategies, training sample size and feature set. A critical evaluation of the features important for glycopeptide identification further facilitated effective feature selection for model improvement. Using split sample testing method from 577 high-confident N-glycopeptide spectral data, we demonstrated that an optimal true-positive rate, precision and false-positive rate of 73, 88 and 10%, respectively, can be attained for overall N-glycopeptide identification Availability and implementation: The workflow developed in this work and the application suite, Sweet-Heart, that the workflow supports for N-glycopeptide identification are available for download at http://sweet-heart.glycoproteomics.proteome.bc.sinica.edu.tw/.",2014-03-10 +25979473,SpeeDB: fast structural protein searches.,"

Motivation

Interactions between amino acids are important determinants of the structure, stability and function of proteins. Several tools have been developed for the identification and analysis of such interactions in proteins based on the extensive studies carried out on high-resolution structures from Protein Data Bank (PDB). Although these tools allow users to identify and analyze interactions, analysis can only be performed on one structure at a time. This makes it difficult and time consuming to study the significance of these interactions on a large scale.

Results

SpeeDB is a web-based tool for the identification of protein structures based on structural properties. SpeeDB queries are executed on all structures in the PDB at once, quickly enough for interactive use. SpeeDB includes standard queries based on published criteria for identifying various structures: disulphide bonds, catalytic triads and aromatic-aromatic, sulphur-aromatic, cation-π and ionic interactions. Users can also construct custom queries in the user interface without any programming. Results can be downloaded in a Comma Separated Value (CSV) format for further analysis with other tools. Case studies presented in this article demonstrate how SpeeDB can be used to answer various biological questions. Analysis of human proteases revealed that disulphide bonds are the predominant type of interaction and are located close to the active site, where they promote substrate specificity. When comparing the two homologous G protein-coupled receptors and the two protein kinase paralogs analyzed, the differences in the types of interactions responsible for stability accounts for the differences in specificity and functionality of the structures.

Availability and implementation

SpeeDB is available at http://www.parallelcomputing.ca as a web service.

Contact

d@drobilla.net

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-15 +27209209,"A systematic, large-scale comparison of transcription factor binding site models.","

Background

The modelling of gene regulation is a major challenge in biomedical research. This process is dominated by transcription factors (TFs) and mutations in their binding sites (TFBSs) may cause the misregulation of genes, eventually leading to disease. The consequences of DNA variants on TF binding are modelled in silico using binding matrices, but it remains unclear whether these are capable of accurately representing in vivo binding. In this study, we present a systematic comparison of binding models for 82 human TFs from three freely available sources: JASPAR matrices, HT-SELEX-generated models and matrices derived from protein binding microarrays (PBMs). We determined their ability to detect experimentally verified ""real"" in vivo TFBSs derived from ENCODE ChIP-seq data. As negative controls we chose random downstream exonic sequences, which are unlikely to harbour TFBS. All models were assessed by receiver operating characteristics (ROC) analysis.

Results

While the area-under-curve was low for most of the tested models with only 47 % reaching a score of 0.7 or higher, we noticed strong differences between the various position-specific scoring matrices with JASPAR and HT-SELEX models showing higher success rates than PBM-derived models. In addition, we found that while TFBS sequences showed a higher degree of conservation than randomly chosen sequences, there was a high variability between individual TFBSs.

Conclusions

Our results show that only few of the matrix-based models used to predict potential TFBS are able to reliably detect experimentally confirmed TFBS. We compiled our findings in a freely accessible web application called ePOSSUM ( http:/mutationtaster.charite.de/ePOSSUM/ ) which uses a Bayes classifier to assess the impact of genetic alterations on TF binding in user-defined sequences. Additionally, ePOSSUM provides information on the reliability of the prediction using our test set of experimentally confirmed binding sites.",2016-05-21 +27203433,"A Difference-in-Differences Approach to Assess the Effect of a Heat Action Plan on Heat-Related Mortality, and Differences in Effectiveness According to Sex, Age, and Socioeconomic Status (Montreal, Quebec).","

Background

The impact of heat waves on mortality and health inequalities is well documented. Very few studies have assessed the effectiveness of heat action plans (HAPs) on health, and none has used quasi-experimental methods to estimate causal effects of such programs.

Objectives

We developed a quasi-experimental method to estimate the causal effects associated with HAPs that allows the identification of heterogeneity across subpopulations, and to apply this method specifically to the case of the Montreal (Quebec, Canada) HAP.

Methods

A difference-in-differences approach was undertaken using Montreal death registry data for the summers of 2000-2007 to assess the effectiveness of the Montreal HAP, implemented in 2004, on mortality. To study equity in the effect of HAP implementation, we assessed whether the program effects were heterogeneous across sex (male vs. female), age (≥ 65 years vs. < 65 years), and neighborhood education levels (first vs. third tertile). We conducted sensitivity analyses to assess the validity of the estimated causal effect of the HAP program.

Results

We found evidence that the HAP contributed to reducing mortality on hot days, and that the mortality reduction attributable to the program was greater for elderly people and people living in low-education neighborhoods.

Conclusion

These findings show promise for programs aimed at reducing the impact of extreme temperatures and health inequities. We propose a new quasi-experimental approach that can be easily applied to evaluate the impact of any program or intervention triggered when daily thresholds are reached. Citation: Benmarhnia T, Bailey Z, Kaiser D, Auger N, King N, Kaufman J. 2016. A difference-in-differences approach to assess the effect of a heat action plan on heat-related mortality, and differences in effectiveness according to sex, age, and socioeconomic status (Montreal, Quebec). Environ Health Perspect 124:1694-1699; http://dx.doi.org/10.1289/EHP203.",2016-05-20 +26680733,EffectorP: predicting fungal effector proteins from secretomes using machine learning.,"Eukaryotic filamentous plant pathogens secrete effector proteins that modulate the host cell to facilitate infection. Computational effector candidate identification and subsequent functional characterization delivers valuable insights into plant-pathogen interactions. However, effector prediction in fungi has been challenging due to a lack of unifying sequence features such as conserved N-terminal sequence motifs. Fungal effectors are commonly predicted from secretomes based on criteria such as small size and cysteine-rich, which suffers from poor accuracy. We present EffectorP which pioneers the application of machine learning to fungal effector prediction. EffectorP improves fungal effector prediction from secretomes based on a robust signal of sequence-derived properties, achieving sensitivity and specificity of over 80%. Features that discriminate fungal effectors from secreted noneffectors are predominantly sequence length, molecular weight and protein net charge, as well as cysteine, serine and tryptophan content. We demonstrate that EffectorP is powerful when combined with in planta expression data for predicting high-priority effector candidates. EffectorP is the first prediction program for fungal effectors based on machine learning. Our findings will facilitate functional fungal effector studies and improve our understanding of effectors in plant-pathogen interactions. EffectorP is available at http://effectorp.csiro.au.",2015-12-17 +25161233,TEMPI: probabilistic modeling time-evolving differential PPI networks with multiPle information.,"

Motivation

Time-evolving differential protein-protein interaction (PPI) networks are essential to understand serial activation of differentially regulated (up- or downregulated) cellular processes (DRPs) and their interplays over time. Despite developments in the network inference, current methods are still limited in identifying temporal transition of structures of PPI networks, DRPs associated with the structural transition and the interplays among the DRPs over time.

Results

Here, we present a probabilistic model for estimating Time-Evolving differential PPI networks with MultiPle Information (TEMPI). This model describes probabilistic relationships among network structures, time-course gene expression data and Gene Ontology biological processes (GOBPs). By maximizing the likelihood of the probabilistic model, TEMPI estimates jointly the time-evolving differential PPI networks (TDNs) describing temporal transition of PPI network structures together with serial activation of DRPs associated with transiting networks. This joint estimation enables us to interpret the TDNs in terms of temporal transition of the DRPs. To demonstrate the utility of TEMPI, we applied it to two time-course datasets. TEMPI identified the TDNs that correctly delineated temporal transition of DRPs and time-dependent associations between the DRPs. These TDNs provide hypotheses for mechanisms underlying serial activation of key DRPs and their temporal associations.

Availability and implementation

Source code and sample data files are available at http://sbm.postech.ac.kr/tempi/sources.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +26987840,On the comparison of regulatory sequences with multiple resolution Entropic Profiles.,"

Background

Enhancers are stretches of DNA (100-1000 bp) that play a major role in development gene expression, evolution and disease. It has been recently shown that in high-level eukaryotes enhancers rarely work alone, instead they collaborate by forming clusters of cis-regulatory modules (CRMs). Although the binding of transcription factors is sequence-specific, the identification of functionally similar enhancers is very difficult and it cannot be carried out with traditional alignment-based techniques.

Results

The use of fast similarity measures, like alignment-free measures, to detect related regulatory sequences is crucial to understand functional correlation between two enhancers. In this paper we study the use of alignment-free measures for the classification of CRMs. However, alignment-free measures are generally tied to a fixed resolution k. Here we propose an alignment-free statistic, called [Formula: see text], that is based on multiple resolution patterns derived from the Entropic Profiles (EPs). The Entropic Profile is a function of the genomic location that captures the importance of that region with respect to the whole genome. As a byproduct we provide a formula to compute the exact variance of variable length word counts, a result that can be of general interest also in other applications.

Conclusions

We evaluate several alignment-free statistics on simulated data and real mouse ChIP-seq sequences. The new statistic, [Formula: see text], is highly successful in discriminating functionally related enhancers and, in almost all experiments, it outperforms fixed-resolution methods. We implemented the new alignment-free measures, as well as traditional ones, in a software called EP-sim that is freely available: http://www.dei.unipd.it/~ciompin/main/EP-sim.html .",2016-03-18 +23087378,BioLiP: a semi-manually curated database for biologically relevant ligand-protein interactions.,"BioLiP (http://zhanglab.ccmb.med.umich.edu/BioLiP/) is a semi-manually curated database for biologically relevant ligand-protein interactions. Establishing interactions between protein and biologically relevant ligands is an important step toward understanding the protein functions. Most ligand-binding sites prediction methods use the protein structures from the Protein Data Bank (PDB) as templates. However, not all ligands present in the PDB are biologically relevant, as small molecules are often used as additives for solving the protein structures. To facilitate template-based ligand-protein docking, virtual ligand screening and protein function annotations, we develop a hierarchical procedure for assessing the biological relevance of ligands present in the PDB structures, which involves a four-step biological feature filtering followed by careful manual verifications. This procedure is used for BioLiP construction. Each entry in BioLiP contains annotations on: ligand-binding residues, ligand-binding affinity, catalytic sites, Enzyme Commission numbers, Gene Ontology terms and cross-links to the other databases. In addition, to facilitate the use of BioLiP for function annotation of uncharacterized proteins, a new consensus-based algorithm COACH is developed to predict ligand-binding sites from protein sequence or using 3D structure. The BioLiP database is updated weekly and the current release contains 204 223 entries.",2012-10-18 +22135417,SNPxGE(2): a database for human SNP-coexpression associations.,"

Motivation

Recently, gene-coexpression relationships have been found to be often conditional and dynamic. Many studies have suggested that single nucleotide polymorphisms (SNPs) have impacts on gene expression variations in human populations.

Results

The SNPxGE(2) database contains the computationally predicted human SNP-coexpression associations, i.e. the differential coexpression between two genes is associated with the genotypes of an SNP. These data were generated from a large-scale association study that was based on the HapMap phase I data, which covered 269 individuals from 4 human populations, 556 873 SNPs and 15 000 gene expression profiles. In order to reduce the computational cost, the SNP-coexpression associations were assessed using gap/substitution models, proven to have a comparable power to logistic regression models. The results, at a false discovery rate (FDR) cutoff of 0.1, consisted of 44 769 and 50 792 SNP-coexpression associations based on single and pooled populations, respectively, and can be queried in the SNPxGE(2) database via either gene symbol or reference SNP ID. For each reported association, a detailed information page is provided.

Availability

http://lambchop.ads.uga.edu/snpxge2/index.php

Contact

wyp1125@uga.edu, rrekaya@uga.edu.",2011-11-30 +26677963,OMPPM: online multiple palindrome pattern matching.,"

Motivation

A palindrome is a string that reads the same forward and backward. Finding palindromic substructures is important in DNA, RNA or protein sequence analysis. We say that two strings of the same length are pal-equivalent if, for each possible centre, they have the same length of the maximal palindrome. Given a text T of length n and a pattern P of length m, we study the palindrome pattern matching problem that finds all indices i such that P and [Formula: see text] are pal-equivalent.

Results

We first solve the online palindrome pattern matching problem in O(m(2)) preprocessing time and O(mn) query time using O(m(2)) space. We then extend the problem for multiple patterns and solve the online multiple palindrome pattern matching problem in [Formula: see text] preprocessing time and [Formula: see text] query time using [Formula: see text] space, where M is the sum of all pattern lengths, mk is the longest pattern length and c is the number of pattern occurrences.

Availability and implementation

The source code for all algorithms is freely available at http://toc.yonsei.ac.kr/OMPPM CONTACT: kimhwee@cs.yonsei.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-16 +21904425,SSPred: A prediction server based on SVM for the identification and classification of proteins involved in bacterial secretion systems.,"Protein secretion systems used by almost all bacteria are highly significant for the normal existence and interaction of bacteria with their host. The accumulation of genome sequence data in past few years has provided great insights into the distribution and function of these secretion systems. In this study, a support vector machine (SVM)- based method, SSPred was developed for the automated functional annotation of proteins involved in secretion systems further classifying them into five major sub-types (Type-I, Type-II, Type-III, Type-IV and Sec systems). The dataset used in this study for training and testing was obtained from KEGG and SwissProt database and was curated in order to avoid redundancy. To overcome the problem of imbalance in positive and negative dataset, an ensemble of SVM modules, each trained on a balanced subset of the training data were used. Firstly, protein sequence features like amino-acid composition (AAC), dipeptide composition (DPC) and physico-chemical composition (PCC) were used to develop the SVM-based modules that achieved an average accuracy of 84%, 85.17% and 82.59%, respectively. Secondly, a hybrid module (hybrid-I) integrating all the previously used features was developed that achieved an average accuracy of 86.12%. Another hybrid module (hybrid-II) developed using evolutionary information of a protein sequence extracted from position-specific scoring matrix and amino-acid composition achieved a maximum average accuracy of 89.73%. On unbiased evaluation using an independent data set, SSPred showed good prediction performance in identification and classification of secretion systems. SSPred is a freely available World Wide Web server at http//www.bioinformatics.org/sspred.",2011-08-02 +24990607,Fast and accurate imputation of summary statistics enhances evidence of functional enrichment.,"

Motivation

Imputation using external reference panels (e.g. 1000 Genomes) is a widely used approach for increasing power in genome-wide association studies and meta-analysis. Existing hidden Markov models (HMM)-based imputation approaches require individual-level genotypes. Here, we develop a new method for Gaussian imputation from summary association statistics, a type of data that is becoming widely available.

Results

In simulations using 1000 Genomes (1000G) data, this method recovers 84% (54%) of the effective sample size for common (>5%) and low-frequency (1-5%) variants [increasing to 87% (60%) when summary linkage disequilibrium information is available from target samples] versus the gold standard of 89% (67%) for HMM-based imputation, which cannot be applied to summary statistics. Our approach accounts for the limited sample size of the reference panel, a crucial step to eliminate false-positive associations, and it is computationally very fast. As an empirical demonstration, we apply our method to seven case-control phenotypes from the Wellcome Trust Case Control Consortium (WTCCC) data and a study of height in the British 1958 birth cohort (1958BC). Gaussian imputation from summary statistics recovers 95% (105%) of the effective sample size (as quantified by the ratio of [Formula: see text] association statistics) compared with HMM-based imputation from individual-level genotypes at the 227 (176) published single nucleotide polymorphisms (SNPs) in the WTCCC (1958BC height) data. In addition, for publicly available summary statistics from large meta-analyses of four lipid traits, we publicly release imputed summary statistics at 1000G SNPs, which could not have been obtained using previously published methods, and demonstrate their accuracy by masking subsets of the data. We show that 1000G imputation using our approach increases the magnitude and statistical evidence of enrichment at genic versus non-genic loci for these traits, as compared with an analysis without 1000G imputation. Thus, imputation of summary statistics will be a valuable tool in future functional enrichment analyses.

Availability and implementation

Publicly available software package available at http://bogdan.bioinformatics.ucla.edu/software/.

Contact

bpasaniuc@mednet.ucla.edu or aprice@hsph.harvard.edu

Supplementary information

Supplementary materials are available at Bioinformatics online.",2014-07-01 +24389659,"DIYABC v2.0: a software to make approximate Bayesian computation inferences about population history using single nucleotide polymorphism, DNA sequence and microsatellite data.","

Motivation

DIYABC is a software package for a comprehensive analysis of population history using approximate Bayesian computation on DNA polymorphism data. Version 2.0 implements a number of new features and analytical methods. It allows (i) the analysis of single nucleotide polymorphism data at large number of loci, apart from microsatellite and DNA sequence data, (ii) efficient Bayesian model choice using linear discriminant analysis on summary statistics and (iii) the serial launching of multiple post-processing analyses. DIYABC v2.0 also includes a user-friendly graphical interface with various new options. It can be run on three operating systems: GNU/Linux, Microsoft Windows and Apple Os X.

Availability

Freely available with a detailed notice document and example projects to academic users at http://www1.montpellier.inra.fr/CBGP/diyabc CONTACT: estoup@supagro.inra.fr Supplementary information: Supplementary data are available at Bioinformatics online.",2014-01-02 +26454275,miTRATA: a web-based tool for microRNA Truncation and Tailing Analysis.,"

Summary

We describe miTRATA, the first web-based tool for microRNA Truncation and Tailing Analysis--the analysis of 3' modifications of microRNAs including the loss or gain of nucleotides relative to the canonical sequence. miTRATA is implemented in Python (version 3) and employs parallel processing modules to enhance its scalability when analyzing multiple small RNA (sRNA) sequencing datasets. It utilizes miRBase, currently version 21, as a source of known microRNAs for analysis. miTRATA notifies user(s) via email to download as well as visualize the results online. miTRATA's strengths lie in (i) its biologist-focused web interface, (ii) improved scalability via parallel processing and (iii) its uniqueness as a webtool to perform microRNA truncation and tailing analysis.

Availability and implementation

miTRATA is developed in Python and PHP. It is available as a web-based application from https://wasabi.dbi.udel.edu/∼apps/ta/.

Contact

meyers@dbi.udel.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-10 +24728854,methylC Track: visual integration of single-base resolution DNA methylation data on the WashU EpiGenome Browser.,"

Summary

We present methylC track, an efficient mechanism for visualizing single-base resolution DNA methylation data on a genome browser. The methylC track dynamically integrates the level of methylation, the position and context of the methylated cytosine (i.e. CG, CHG and CHH), strand and confidence level (e.g. read coverage depth in the case of whole-genome bisulfite sequencing data). Investigators can access and integrate these information visually at specific locus or at the genome-wide level on the WashU EpiGenome Browser in the context of other rich epigenomic datasets.

Availability and implementation

The methylC track is part of the WashU EpiGenome Browser, which is open source and freely available at http://epigenomegateway.wustl.edu/browser/. The most up-to-date instructions and tools for preparing methylC track are available at http://epigenomegateway.wustl.edu/+/cmtk.

Contact

twang@genetics.wustl.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-10 +27606391,"Evidence Brief: The Effectiveness Of Mandatory Computer-Based Trainings On Government Ethics, Workplace Harassment, Or Privacy And Information Security-Related Topics","

Unlabelled

In large organizations such as the VA, mandatory training has become an integral part of workforce learning. Some common reasons for adopting mandatory training for all employees include showing employees management's commitment to the topic area, promoting positive change, promoting overall staff safety, and legal or compliance considerations. In some cases, such as for the topic of diversity, mandatory training efforts are directly tied to federal requirements instituted in response to employee wrongdoings that resulted in corporate lawsuits. Numerous courts have held that to avoid punitive damages, employers must provide training to their employees on harassment and discrimination prevention. (187 F.3d 1241 (10th Cir. 1999); 270 F.3d 794 (9th Cir. 2001); 281 F.3d 452 (4th Cir. 2002)) The Tenth Circuit Court of Appeals stated that “the extent to which an employer has adopted antidiscrimination policies and educated its employees about the requirement of [the discrimination laws] is important in deciding whether it is insulated from vicarious punitive liability.” (187 F.3d 1241 (10th Cir. 1999)) The costs associated with mandatory training program management can be quite high, resulting in annual expenditures in the hundreds of millions of dollars across U.S. organizations. The laws that motivate mandatory compliance training are often broad in nature and generally do not set standards on training content or evaluation. This leaves organizations with the ability to implement training in a compulsory manner to serve a symbolic purpose, with little attention to whether their training methods are actually effective. Unfortunately, training implemented merely to serve a symbolic purpose may be creating a false sense of organizational security. Research has found that the mere existence of an anti-harassment policy is not always sufficient to protect the employer from liability (239 F.3d 848 (7th Cir. 2001)). Likewise, in an article on the effectiveness of diversity training, a Harvard sociologist indicated that there is no history of any court giving an employer credit for the mere existence of diversity training. Mandatory training is traditionally unpopular, and there is a perception that it is ineffective and decreases motivation to learn. Some education theory-related barriers to learning that may reduce the effectiveness of mandatory training include employee resentment about their lack of control, lack of interest, perception of irrelevancy to their specific workplace context, and workplace time pressures. Considering the high cost associated with mandatory training and doubts about its effectiveness, organizations would be well served to more closely consider the benefits of their programs. An extensive literature on general organizational training research is available to inform decisions about how to design, implement, and evaluate training in a variety of settings., The design and evaluation of training is based on a wide variety of theoretical frameworks. There is a good deal of consensus about the best practices that organizations should engage in before, during, and after training in order to maximize effectiveness. Pre-training factors associated with training effectiveness include individual characteristics such as cognitive ability, self-effcacy,, and motivation, and needs assessments. Experts recommend that one of the most important steps in developing training is to conduct a pre-training needs analysis to identify the competencies needed, training priorities, and who needs the training. Factors that matter during training include individual characteristics and instructional strategies and principles. In recent years, group training, distance learning, and computer-based training have become common training delivery systems in many work organizations. Post-training factors associated with effectiveness include the ability to use skills and knowledge gained from training, delay between training and use of skills and knowledge, social, peer, subordinate, and supervisor support and training evaluation efforts. The Kirkpatrick Model of Training Evaluation is commonly used as a framework for evaluating training programs. Organizations have struggled to conduct training evaluation due to the labor and costs involved, and difficulty with credible field evaluation. Since most empirical research is still relying on surveys to measure learning outcomes, there is still a need for more research using formal experimental designs to evaluate training effectiveness. Although there has been an increase in general training-related research, it is unclear whether the best practices identified in the general training literature have their desired effect on outcomes in the mandatory training domain. The VA currently requires all employees to undergo mandatory training on the topics of Government Ethics, Prevention of Workplace Harassment/No Fear Act, and Privacy and Information Security Awareness and Rules of Behavior (http://www.valu.va.gov/Home/MandatoryTraining). Table 1 summarizes content and timing details for each of these training topics. The requirements for the three trainings that are mandatory for all VA employees originate from Executive Orders, Congressional mandates, the Office of Personnel Management, regulatory bodies, and VA department-level requirements. These regulatory directives do not specify requirements about the format, content, or method of delivery of training material. Originally, the VA provided local facility leadership with the flexibility to locally manage their mandatory training processes, including tracking and recording employee attendance. This allowed local facilities to customize their approach to meeting the training mandates based on the local culture, which included a variety of training formats including face-to-face sessions, videos, handouts, or multiple modalities. Eventually, national tracking became more of a priority for the VA, and this led to centralization and standardization of mandatory training. Currently, all VA mandatory training is computer-based, and it is delivered and tracked via the VA Learning University Training Management System (VALU-TMS). In order to evaluate the use and outcomes of mandatory training in the VHA, the National Leadership Council's Human Resource Committee chartered the Mandatory Training Workgroup in November 2008. The workgroup was designated as a standing subcommittee in October 2010. The goal of the Subcommittee is to “envision a strategic evidence-based approach to Mandatory Training that linked employee learning to organizational outcomes.” The workgroup envisions “that mandatory training, used sparingly, would become meaningful, focused, effective, flexible, and satisfying to all employees.” The Subcommittee has suggested various revisions to the VA's current mandatory training approach that include rescinding the requirements entirely, changing the requirements to “highly recommended” instead of mandatory, combining topics, reducing course length and/or frequency, limiting target audience, substituting a competency-based or stepped training approach, and adding additional delivery formats to allow learners to select resources that best fit their individual learning styles. The theoretical advantages of computer-based training include convenience, flexibility in scheduling, consistency of material presented, and tracking and documentation capabilities. But what is not taken into consideration by computer-based methods is that people learn in different ways. In their 2009 report on the burden of VA mandatory training, the Mandatory Training Subcommittee raised questions about the value of the VA's mandatory training program. The Subcommittee estimated that VHA spends $40 million a year just for the three core trainings mandated for VA employees (Table 1). Despite these high estimated costs, the Mandatory Training Subcommittee did not identify any studies in VA or otherwise of the effectiveness of any VA mandatory training programs. Additionally, the Subcommittee's qualitative evaluation of employee perceptions found universal unhappiness about the mandatory training requirements. Common themes include criticism that the mandatory trainings take up too much time, are not optimally accessible (including the varying locations and usability of courses), vary in quality, lack alternatives to online courses, and do not adapt to an individual's role and his or her existing knowledge on the subject. The Subcommittee report concluded that, given the enormous burdens of cost and negative employee perceptions, the VA would be well served to more closely consider the benefits of their mandatory training programs. In January 2009, the Mandatory Training Workgroup asked the VA Technology Assessment Program (TAP) to conduct a Brief Overview of evidence on the organizational effectiveness of mandatory learning strategies. The VA TAP Brief identified very little evidence on the subject and their main findings were that volition may be an important determinant of organizational learning and that training effectiveness may vary as a function of evaluation criteria, training delivery method, the subject being taught, and the criterion used to operationalize effectiveness. In February, 2014, to maintain the currency of knowledge about evidence on mandatory learning strategies, the Mandatory Training Workgroup requested that the VA Evidence-based Synthesis Program Coordinating Center (ESP CC) conduct an updated Evidence Brief on this topic. An evidence brief differs from a full systematic review in that the scope is narrowly defined and the traditional review methods are streamlined in order to synthesize evidence within a shortened timeframe. An evidence brief does not outline the full context in which the information is to be used and does not present a comprehensive assessment of knowledge on the topic. Brief or rapid review methodology is still developing and there is not yet consensus on what represents best practice.

Scope

The objective of this Evidence Brief is to synthesize the literature on the effectiveness of mandatory online employee compliance training. The ESP Coordinating Center investigators and representatives of the VHA Mandatory Training Subcommittee worked together to identify the population, comparator, outcome, timing, setting, and study design characteristics of interest. The VHA Mandatory Training Subcommittee approved the following key questions and eligibility criteria to guide this review: KEY QUESTIONS: Key Question 1: What is the effectiveness and comparative effectiveness of mandatory computer-based trainings on government ethics, workplace harassment, or privacy and information security-related topics? ○. Key Question 1a: Does the effectiveness of these mandatory computer-based trainings vary by format (eg, just-in-time training, competency-based assessment, stepped training delivery) or repetition of training? ○. Key Question 1b: Does the effectiveness of these mandatory computer-based trainings vary by the method of training delivery (eg, length, audiovisual components)? Key Question 2: What are the harms (eg, turnover, morale, grievances, institutional and opportunity costs) of these mandatory computer-based trainings?

Inclusion criteria

The ESP included studies that met the following criteria: Population: adults in the workforce. Intervention: mandatory online training targeted to a broad base of employees to address an organization-wide need (eg, ethics, prevention of workplace harassment, information security). Comparator: no training, other training methods, or other activities. Outcomes: trainee learning (eg, changes in knowledge or skills), trainee behavior, or organizational change (eg, changes in productivity, turnover, morale, grievances, or patient outcomes). Timing: longitudinal studies. Setting: workplace. Study design: randomized controlled trials and observational studies. This Evidence Brief will not include the following: Population: students of any age. Intervention: continuing medical education. Outcomes: trainee reaction (eg, attitudes towards or satisfaction with the training program). Study design: qualitative studies.",2016-09-09 +26340797,"Birth Weight, Ethnicity, and Exposure to Trihalomethanes and Haloacetic Acids in Drinking Water during Pregnancy in the Born in Bradford Cohort.","

Background

Evidence for a relationship between trihalomethane (THM) or haloacetic acid (HAA) exposure and adverse fetal growth is inconsistent. Disinfection by-products exist as complex mixtures in water supplies, but THMs and HAAs have typically been examined separately.

Objectives

We investigated joint exposure at the individual level to THMs and HAAs in relation to birth weight in the multi-ethnic Born in Bradford birth cohort.

Methods

Pregnant women reported their water consumption and activities via questionnaire. These data were combined with area-level THM and HAA concentrations to estimate integrated uptake of THMs into blood and HAA ingestion, accounting for boiling/filtering. We examined the relationship between THM and HAA exposures and birth weight of up to 7,438 singleton term babies using multiple linear regression, stratified by ethnicity.

Results

Among Pakistani-origin infants, mean birth weight was significantly lower in association with the highest versus lowest tertiles of integrated THM uptake (e.g., -53.7 g; 95% CI: -89.9, -17.5 for ≥ 1.82 vs. < 1.05 μg/day of total THM) and there were significant trends (p < 0.01) across increasing tertiles, but there were no associations among white British infants. Neither ingestion of HAAs alone or jointly with THMs was associated with birth weight. Estimated THM uptake via showering, bathing, and swimming was significantly associated with lower birth weight in Pakistani-origin infants, when adjusting for THM and HAA ingestion via water consumption.

Conclusions

To our knowledge, this is the largest DBP and fetal growth study to date with individual water use data, and the first to examine individual-level estimates of joint THM-HAA exposure. Our findings demonstrate associations between THM, but not HAA, exposure during pregnancy and reduced birth weight, but suggest this differs by ethnicity. This study suggests that THMs are not acting as a proxy for HAAs, or vice-versa.

Citation

Smith RB, Edwards SC, Best N, Wright J, Nieuwenhuijsen MJ, Toledano MB. 2016. Birth weight, ethnicity, and exposure to trihalomethanes and haloacetic acids in drinking water during pregnancy in the Born in Bradford cohort. Environ Health Perspect 124:681-689; http://dx.doi.org/10.1289/ehp.1409480.",2015-09-04 +25845596,The EMBL-EBI bioinformatics web and programmatic tools framework.,"Since 2009 the EMBL-EBI Job Dispatcher framework has provided free access to a range of mainstream sequence analysis applications. These include sequence similarity search services (https://www.ebi.ac.uk/Tools/sss/) such as BLAST, FASTA and PSI-Search, multiple sequence alignment tools (https://www.ebi.ac.uk/Tools/msa/) such as Clustal Omega, MAFFT and T-Coffee, and other sequence analysis tools (https://www.ebi.ac.uk/Tools/pfa/) such as InterProScan. Through these services users can search mainstream sequence databases such as ENA, UniProt and Ensembl Genomes, utilising a uniform web interface or systematically through Web Services interfaces (https://www.ebi.ac.uk/Tools/webservices/) using common programming languages, and obtain enriched results with novel visualisations. Integration with EBI Search (https://www.ebi.ac.uk/ebisearch/) and the dbfetch retrieval service (https://www.ebi.ac.uk/Tools/dbfetch/) further expands the usefulness of the framework. New tools and updates such as NCBI BLAST+, InterProScan 5 and PfamScan, new categories such as RNA analysis tools (https://www.ebi.ac.uk/Tools/rna/), new databases such as ENA non-coding, WormBase ParaSite, Pfam and Rfam, and new workflow methods, together with the retirement of depreciated services, ensure that the framework remains relevant to today's biological community.",2015-04-06 +21715384,RuleGO: a logical rules-based tool for description of gene groups by means of Gene Ontology.,"Genome-wide expression profiles obtained with the use of DNA microarray technology provide abundance of experimental data on biological and molecular processes. Such amount of data need to be further analyzed and interpreted in order to obtain biological conclusions on the basis of experimental results. The analysis requires a lot of experience and is usually time-consuming process. Thus, frequently various annotation databases are used to improve the whole process of analysis. Here, we present RuleGO--the web-based application that allows the user to describe gene groups on the basis of logical rules that include Gene Ontology (GO) terms in their premises. Presented application allows obtaining rules that reflect coappearance of GO-terms describing genes supported by the rules. The ontology level and number of coappearing GO-terms is adjusted in automatic manner. The user limits the space of possible solutions only. The RuleGO application is freely available at http://rulego.polsl.pl/.",2011-07-01 +23322167,Corpus callosum shape changes in early Alzheimer's disease: an MRI study using the OASIS brain database.,"The corpus callosum (CC) is the largest fiber bundle connecting the left and right cerebral hemispheres. It has been a region examined extensively for indications of various pathologies, including Alzheimer's disease (AD). Almost all previous studies of the CC in AD have been concerned with its size, particularly its mid-sagittal cross-sectional area (CCA). In this study, we show that the CC shape, characterized by its circularity (CIR), may be affected more profoundly than its size in early AD. MRI scans (n = 196) were obtained from the publicly available Open Access Series of Imaging Studies database. The CC cross-sectional region on the mid-sagittal section of the brain was automatically segmented using a novel algorithm. The CCA and CIR were compared in 98 normal controls (NC) subjects, 70 patients with very mild AD (AD-VM), and 28 patients with mild AD (AD-M). Statistical analysis of covariance controlling for age and intracranial capacity showed that both the CIR and the CCA were significantly reduced in the AD-VM group relative to the NC group (CIR: p = 0.004; CCA: p = 0.005). However, only the CIR was significantly different between the AD-M and AD-VM groups (p = 0.006) being smaller in the former. The CCA was not significantly different between the AD-M and AD-VM groups. The results suggest that CC shape may be a more sensitive marker than its size for monitoring the progression of AD. In order to facilitate independent analyses, the CC segmentations and the CCA and CIR data used in this study have been made publicly available (http://www.nitrc.org/projects/art).",2013-01-16 +23323735,SyntTax: a web server linking synteny to prokaryotic taxonomy.,"

Background

The study of the conservation of gene order or synteny constitutes a powerful methodology to assess the orthology of genomic regions and to predict functional relationships between genes. The exponential growth of microbial genomic databases is expected to improve synteny predictions significantly. Paradoxically, this genomic data plethora, without information on organisms relatedness, could impair the performance of synteny analysis programs.

Results

In this work, I present SyntTax, a synteny web service designed to take full advantage of the large amount or archaeal and bacterial genomes by linking them through taxonomic relationships. SyntTax incorporates a full hierarchical taxonomic tree allowing intuitive access to all completely sequenced prokaryotes. Single or multiple organisms can be chosen on the basis of their lineage by selecting the corresponding rank nodes in the tree. The synteny methodology is built upon our previously described Absynte algorithm with several additional improvements.

Conclusions

SyntTax aims to produce robust syntenies by providing prompt access to the taxonomic relationships connecting all completely sequenced microbial genomes. The reduction in redundancy offered by lineage selection presents the benefit of increasing accuracy while reducing computation time. This web tool was used to resolve successfully several conserved complex gene clusters described in the literature. In addition, particular features of SyntTax permit the confirmation of the involvement of the four components constituting the E. coli YgjD multiprotein complex responsible for tRNA modification. By analyzing the clustering evolution of alternative gene fusions, new proteins potentially interacting with this complex could be proposed. The web service is available at http://archaea.u-psud.fr/SyntTax.",2013-01-16 +22962464,SANS: high-throughput retrieval of protein sequences allowing 50% mismatches.,"

Motivation

The genomic era in molecular biology has brought on a rapidly widening gap between the amount of sequence data and first-hand experimental characterization of proteins. Fortunately, the theory of evolution provides a simple solution: functional and structural information can be transferred between homologous proteins. Sequence similarity searching followed by k-nearest neighbor classification is the most widely used tool to predict the function or structure of anonymous gene products that come out of genome sequencing projects.

Results

We present a novel word filter, suffix array neighborhood search (SANS), to identify protein sequence similarities in the range of 50-100% identity with sensitivity comparable to BLAST and 10 times the speed of USEARCH. In contrast to these previous approaches, the complexity of the search is proportional only to the length of the query sequence and independent of database size, enabling fast searching and functional annotation into the future despite rapidly expanding databases.

Availability and implementation

The software is freely available to non-commercial users from our website http://ekhidna.biocenter.helsinki.fi/downloads/sans.

Contact

liisa.holm@helsinki.fi.",2012-09-01 +21300700,"BioGRID REST Service, BiogridPlugin2 and BioGRID WebGraph: new tools for access to interaction data at BioGRID.","

Unlabelled

The Biological General Repository for Interaction Datasets (BioGRID) representational state transfer (REST) service allows full URL-based access to curated protein and genetic interaction data at the BioGRID database. Appending URL parameters allows filtering of data by various attributes including gene names and identifiers, PubMed ID and evidence type. We also describe two visualization tools that interface with the REST service, the BiogridPlugin2 for Cytoscape and the BioGRID WebGraph.

Availability and implementation

BioGRID data and applications are completely free for commercial and non-commercial use. http://webservice.thebiogrid.org/resources/interactions (REST Service), http://wiki.thebiogrid.org/doku.php/biogridrest(REST Service parameter list and help), http://webservice.thebiogrid.org/resources/application.wadl(REST Service WADL), http://thebiogrid.org/download.php (BiogridPlugin2, v2.1 download), http://wiki.thebiogrid.org/doku.php/biogridplugin2 (BiogridPlugin2 help) and http://tyerslab.bio.ed.ac.uk/tools/BioGRID_webgraph.php(BioGRID WebGraph).",2011-02-07 +25851949,PROVEAN web server: a tool to predict the functional effect of amino acid substitutions and indels.,"

Unlabelled

We present a web server to predict the functional effect of single or multiple amino acid substitutions, insertions and deletions using the prediction tool PROVEAN. The server provides rapid analysis of protein variants from any organisms, and also supports high-throughput analysis for human and mouse variants at both the genomic and protein levels.

Availability and implementation

The web server is freely available and open to all users with no login requirements at http://provean.jcvi.org.

Contact

achan@jcvi.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-06 +25847005,MetaMapR: pathway independent metabolomic network analysis incorporating unknowns.,"

Unlabelled

Metabolic network mapping is a widely used approach for integration of metabolomic experimental results with biological domain knowledge. However, current approaches can be limited by biochemical domain or pathway knowledge which results in sparse disconnected graphs for real world metabolomic experiments. MetaMapR integrates enzymatic transformations with metabolite structural similarity, mass spectral similarity and empirical associations to generate richly connected metabolic networks. This open source, web-based or desktop software, written in the R programming language, leverages KEGG and PubChem databases to derive associations between metabolites even in cases where biochemical domain or molecular annotations are unknown. Network calculation is enhanced through an interface to the Chemical Translation System, which allows metabolite identifier translation between >200 common biochemical databases. Analysis results are presented as interactive visualizations or can be exported as high-quality graphics and numerical tables which can be imported into common network analysis and visualization tools.

Availability and implementation

Freely available at http://dgrapov.github.io/MetaMapR/. Requires R and a modern web browser. Installation instructions, tutorials and application examples are available at http://dgrapov.github.io/MetaMapR/.

Contact

ofiehn@ucdavis.edu.",2015-04-05 +21819938,The Génolevures database.,"The Génolevures online database (URL: http://www.genolevures.org) stores and provides the data and results obtained by the Génolevures Consortium through several campaigns of genome annotation of the yeasts in the Saccharomycotina subphylum (hemiascomycetes). This database is dedicated to large-scale comparison of these genomes, storing not only the different chromosomal elements detected in the sequences, but also the logical relations between them. The database is divided into a public part, accessible to anyone through Internet, and a private part where the Consortium members make genome annotations with our Magus annotation system; this system is used to annotate several related genomes in parallel. The public database is widely consulted and offers structured data, organized using a REST web site architecture that allows for automated requests. The implementation of the database, as well as its associated tools and methods, is evolving to cope with the influx of genome sequences produced by Next Generation Sequencing (NGS).",2011-06-30 +24182195,Identification of direction in gene networks from expression and methylation.,"

Background

Reverse-engineering gene regulatory networks from expression data is difficult, especially without temporal measurements or interventional experiments. In particular, the causal direction of an edge is generally not statistically identifiable, i.e., cannot be inferred as a statistical parameter, even from an unlimited amount of non-time series observational mRNA expression data. Some additional evidence is required and high-throughput methylation data can viewed as a natural multifactorial gene perturbation experiment.

Results

We introduce IDEM (Identifying Direction from Expression and Methylation), a method for identifying the causal direction of edges by combining DNA methylation and mRNA transcription data. We describe the circumstances under which edge directions become identifiable and experiments with both real and synthetic data demonstrate that the accuracy of IDEM for inferring both edge placement and edge direction in gene regulatory networks is significantly improved relative to other methods.

Conclusion

Reverse-engineering directed gene regulatory networks from static observational data becomes feasible by exploiting the context provided by high-throughput DNA methylation data.An implementation of the algorithm described is available at http://code.google.com/p/idem/.",2013-11-01 +27187178,A Novel Method to Verify Multilevel Computational Models of Biological Systems Using Multiscale Spatio-Temporal Meta Model Checking.,"Insights gained from multilevel computational models of biological systems can be translated into real-life applications only if the model correctness has been verified first. One of the most frequently employed in silico techniques for computational model verification is model checking. Traditional model checking approaches only consider the evolution of numeric values, such as concentrations, over time and are appropriate for computational models of small scale systems (e.g. intracellular networks). However for gaining a systems level understanding of how biological organisms function it is essential to consider more complex large scale biological systems (e.g. organs). Verifying computational models of such systems requires capturing both how numeric values and properties of (emergent) spatial structures (e.g. area of multicellular population) change over time and across multiple levels of organization, which are not considered by existing model checking approaches. To address this limitation we have developed a novel approximate probabilistic multiscale spatio-temporal meta model checking methodology for verifying multilevel computational models relative to specifications describing the desired/expected system behaviour. The methodology is generic and supports computational models encoded using various high-level modelling formalisms because it is defined relative to time series data and not the models used to generate it. In addition, the methodology can be automatically adapted to case study specific types of spatial structures and properties using the spatio-temporal meta model checking concept. To automate the computational model verification process we have implemented the model checking approach in the software tool Mule (http://mule.modelchecking.org). Its applicability is illustrated against four systems biology computational models previously published in the literature encoding the rat cardiovascular system dynamics, the uterine contractions of labour, the Xenopus laevis cell cycle and the acute inflammation of the gut and lung. Our methodology and software will enable computational biologists to efficiently develop reliable multilevel computational models of biological systems.",2016-05-17 +22972125,Chlorambucil for patients with primary biliary cirrhosis.,"

Background

Chlorambucil has been used for patients with primary biliary cirrhosis as it possesses immunosuppressive properties. But it is unknown whether it benefits or harms these patients.

Objectives

To evaluate the beneficial and any harmful effects of chlorambucil for primary biliary cirrhosis patients.

Search methods

Eligible trials were identified by searching the Cochrane Hepato-Biliary Group Controlled Trials Register (March 2012), the Cochrane Central Register of Controlled Trials (CENTRAL) in The Cochrane Library (2012, Issue 2), MEDLINE (1946 to March 2012), EMBASE (1974 to March 2012), Science Citation Index EXPANDED (1900 to March 2012), The Chinese Biomedical Database (1976 to March 2012), The Chinese Medical Current Contents (1994 to March 2012), The China Hospital Knowledge Database (1994 to March 2012), and a database of ongoing trials (http://www.controlled-trials.com/mrct/) (accessed 6 March 2012). The reference lists of the retrieved publications and review articles were also read through, and pharmaceutical companies known to produce chlorambucil were contacted.

Selection criteria

Randomised clinical trials, irrespective of language, year of publication, and publication status, comparing chlorambucil at any dose versus placebo, no intervention, another active drug, or one dose of chlorambucil with another dose.

Data collection and analysis

We planned to assess continuous data with mean differences (MD), and dichotomous outcomes with relative risk (RR), both with 95% confidence intervals (CI). As we only identified one trial, Fisher's exact tests were employed.

Main results

Only one randomised trial was identified and included in the review. The bias risk in the trial was high. The trial compared chlorambucil versus no intervention in 24 patients with primary biliary cirrhosis. Fisher's exact test did not show a significant reduction of mortality when comparing chlorambucil with no treatment (0/13 (0%) versus (2/11 (18.2%); P = 0.20). There was no significant difference regarding adverse events for chlorambucil compared with no treatment, but all patients receiving chlorambucil experienced adverse events (13/13 (100%) versus (3/11 (27%); P = 0.1). According to the authors of the trial, chlorambucil led to a significant improvement in mean serum levels of bilirubin (P < 0.05), albumin (P < 0.05), immunoglobulin M (P < 0.01), serum aspartate aminotransferase activity (P < 0.01), and hepatic inflammatory infiltrates (P < 0.01).

Authors' conclusions

There is not sufficient evidence to support or reject the use of chlorambucil for patients with primary biliary cirrhosis. Chlorambucil may show benefit in some unvalidated surrogate outcome measures (for example, serum bilirubin and immunoglobulin M levels). Chlorambucil is, however, connected with a number of adverse events. Bone marrow suppression should be noted in particular. Further randomised clinical trials are necessary to assess the benefits and harms of chlorambucil in this indication.",2012-09-12 +23404398,A genetic resource for rapid and comprehensive phenotype screening of nonessential Staphylococcus aureus genes.,"

Unlabelled

To enhance the research capabilities of investigators interested in Staphylococcus aureus, the Nebraska Center for Staphylococcal Research (CSR) has generated a sequence-defined transposon mutant library consisting of 1,952 strains, each containing a single mutation within a nonessential gene of the epidemic community-associated methicillin-resistant S. aureus (CA-MRSA) isolate USA300. To demonstrate the utility of this library for large-scale screening of phenotypic alterations, we spotted the library on indicator plates to assess hemolytic potential, protease production, pigmentation, and mannitol utilization. As expected, we identified many genes known to function in these processes, thus validating the utility of this approach. Importantly, we also identified genes not previously associated with these phenotypes. In total, 71 mutants displayed differential hemolysis activities, the majority of which were not previously known to influence hemolysin production. Furthermore, 62 mutants were defective in protease activity, with only 14 previously demonstrated to be involved in the production of extracellular proteases. In addition, 38 mutations affected pigment formation, while only 7 influenced mannitol fermentation, underscoring the sensitivity of this approach to identify rare phenotypes. Finally, 579 open reading frames were not interrupted by a transposon, thus providing potentially new essential gene targets for subsequent antibacterial discovery. Overall, the Nebraska Transposon Mutant Library represents a valuable new resource for the research community that should greatly enhance investigations of this important human pathogen.

Importance

Infections caused by Staphylococcus aureus cause significant morbidity and mortality in both community and hospital environments. Specific-allelic-replacement mutants are required to study the biology of this organism; however, this process is costly and time-consuming. We describe the construction and validation of a sequence-defined transposon mutant library available for use by the scientific community through the Network on Antimicrobial Resistance in Staphylococcus aureus (NARSA) strain repository. In addition, complementary resources, including a website (http://app1.unmc.edu/fgx/) and genetic tools that expedite the allelic replacement of the transposon in the mutants with useful selectable markers and fluorescent reporter fusions, have been generated. Overall, this library and associated tools will have a significant impact on studies investigating S. aureus pathogenesis and biology and serve as a useful paradigm for the study of other bacterial systems.",2013-02-12 +27387395,Implementing an Ebola Vaccine Study - Sierra Leone.,"In October 2014, the College of Medicine and Allied Health Sciences of the University of Sierra Leone, the Sierra Leone Ministry of Health and Sanitation, and CDC joined the global effort to accelerate assessment and availability of candidate Ebola vaccines and began planning for the Sierra Leone Trial to Introduce a Vaccine against Ebola (STRIVE). STRIVE was an individually randomized controlled phase II/III trial to evaluate efficacy, immunogenicity, and safety of the recombinant vesicular stomatitis virus Ebola vaccine (rVSV-ZEBOV). The study population was health care and frontline workers in select chiefdoms of the five most affected districts in Sierra Leone. Participants were randomized to receive a single intramuscular dose of rVSV-ZEBOV at enrollment or to receive a single intramuscular dose 18-24 weeks after enrollment. All participants were followed up monthly until 6 months after vaccination. Two substudies separately assessed detailed reactogenicity over 1 month and immunogenicity over 12 months. During the 5 months before the trial, STRIVE and partners built a research platform in Sierra Leone comprising participant follow-up sites, cold chain, reliable power supply, and vaccination clinics and hired and trained at least 350 national staff. Wide-ranging community outreach, informational sessions, and messaging were conducted before and during the trial to ensure full communication to the population of the study area regarding procedures and current knowledge about the trial vaccine. During April 9-August 15, 2015, STRIVE enrolled 8,673 participants, of whom 453 and 539 were also enrolled in the safety and immunogenicity substudies, respectively. As of April 28, 2016, no Ebola cases and no vaccine-related serious adverse events, which by regulatory definition include death, life-threatening illness, hospitalization or prolongation of hospitalization, or permanent disability, were reported in the study population. Although STRIVE will not produce an estimate of vaccine efficacy because of low case frequency as the epidemic was controlled, data on safety and immunogenicity will support decisions on licensure of rVSV-ZEBOV.The activities summarized in this report would not have been possible without collaboration with many U.S. and international partners (http://www.cdc.gov/vhf/ebola/outbreaks/2014-west-africa/partners.html).",2016-07-08 +23066098,"PlantRNA, a database for tRNAs of photosynthetic eukaryotes.","PlantRNA database (http://plantrna.ibmp.cnrs.fr/) compiles transfer RNA (tRNA) gene sequences retrieved from fully annotated plant nuclear, plastidial and mitochondrial genomes. The set of annotated tRNA gene sequences has been manually curated for maximum quality and confidence. The novelty of this database resides in the inclusion of biological information relevant to the function of all the tRNAs entered in the library. This includes 5'- and 3'-flanking sequences, A and B box sequences, region of transcription initiation and poly(T) transcription termination stretches, tRNA intron sequences, aminoacyl-tRNA synthetases and enzymes responsible for tRNA maturation and modification. Finally, data on mitochondrial import of nuclear-encoded tRNAs as well as the bibliome for the respective tRNAs and tRNA-binding proteins are also included. The current annotation concerns complete genomes from 11 organisms: five flowering plants (Arabidopsis thaliana, Oryza sativa, Populus trichocarpa, Medicago truncatula and Brachypodium distachyon), a moss (Physcomitrella patens), two green algae (Chlamydomonas reinhardtii and Ostreococcus tauri), one glaucophyte (Cyanophora paradoxa), one brown alga (Ectocarpus siliculosus) and a pennate diatom (Phaeodactylum tricornutum). The database will be regularly updated and implemented with new plant genome annotations so as to provide extensive information on tRNA biology to the research community.",2012-10-12 +25605595,An approach to creating a more realistic working model from a protein data bank entry.,"An accurate model of three-dimensional protein structure is important in a variety of fields such as structure-based drug design and mechanistic studies of enzymatic reactions. While the entries in the Protein Data Bank ( http://www.pdb.org ) provide valuable information about protein structures, a small fraction of the PDB structures were found to contain anomalies not reported in the PDB file. The semiempirical PM7 method in MOPAC2012 was used for identifying anomalously short hydrogen bonds, C-H⋯O/C-H⋯N interactions, non-bonding close contacts, and unrealistic covalent bond lengths in recently published Protein Data Bank files. It was also used to generate new structures with these faults removed. When the semiempirical models were compared to those of PDB_REDO (http://www.cmbi.ru.nl/pdb_redo/), the clashscores, as defined by MolProbity ( http://molprobity.biochem.duke.edu/), were better in about 50% of the structures. The semiempirical models also had a lower root-mean-square-deviation value in nearly all cases than those from PDB_REDO, indicative of a better conservation of the tertiary structure. Finally, the semiempirical models were found to have lower clashscores than the initial PDB file in all but one case. Because this approach maintains as much of the original tertiary structure as possible while improving anomalous interactions, it should be useful to theoreticians, experimentalists, and crystallographers investigating the structure and function of proteins.",2015-01-22 +24974204,Learning protein-DNA interaction landscapes by integrating experimental data through computational models.,"

Motivation

Transcriptional regulation is directly enacted by the interactions between DNA and many proteins, including transcription factors (TFs), nucleosomes and polymerases. A critical step in deciphering transcriptional regulation is to infer, and eventually predict, the precise locations of these interactions, along with their strength and frequency. While recent datasets yield great insight into these interactions, individual data sources often provide only partial information regarding one aspect of the complete interaction landscape. For example, chromatin immunoprecipitation (ChIP) reveals the binding positions of a protein, but only for one protein at a time. In contrast, nucleases like MNase and DNase can be used to reveal binding positions for many different proteins at once, but cannot easily determine the identities of those proteins. Currently, few statistical frameworks jointly model these different data sources to reveal an accurate, holistic view of the in vivo protein-DNA interaction landscape.

Results

Here, we develop a novel statistical framework that integrates different sources of experimental information within a thermodynamic model of competitive binding to jointly learn a holistic view of the in vivo protein-DNA interaction landscape. We show that our framework learns an interaction landscape with increased accuracy, explaining multiple sets of data in accordance with thermodynamic principles of competitive DNA binding. The resulting model of genomic occupancy provides a precise mechanistic vantage point from which to explore the role of protein-DNA interactions in transcriptional regulation.

Availability and implementation

The C source code for compete and Python source code for MCMC-based inference are available at http://www.cs.duke.edu/∼amink.

Contact

amink@cs.duke.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-27 +21873637,Spatial clustering of protein binding sites for template based protein docking.,"

Motivation

In recent years, much structural information on protein domains and their pair-wise interactions has been made available in public databases. However, it is not yet clear how best to use this information to discover general rules or interaction patterns about structural protein-protein interactions. Improving our ability to detect and exploit structural interaction patterns will help to provide a better 3D picture of the known protein interactome, and will help to guide docking-based predictions of the 3D structures of unsolved protein complexes.

Results

This article presents KBDOCK, a 3D database approach for spatially clustering protein binding sites and for performing template-based (knowledge-based) protein docking. KBDOCK combines residue contact information from the 3DID database with the Pfam protein domain family classification together with coordinate data from the Protein Data Bank. This allows the 3D configurations of all known hetero domain-domain interactions to be superposed and clustered for each Pfam family. We find that most Pfam domain families have up to four hetero binding sites, and over 60% of all domain families have just one hetero binding site. The utility of this approach for template-based docking is demonstrated using 73 complexes from the Protein Docking Benchmark. Overall, up to 45 out of 73 complexes may be modelled by direct homology to existing domain interfaces, and key binding site information is found for 24 of the 28 remaining complexes. These results show that KBDOCK can often provide useful information for predicting the structures of unknown protein complexes.

Availability

http://kbdock.loria.fr/

Contact

Dave.Ritchie@inria.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-27 +24423865,Bisulfighter: accurate detection of methylated cytosines and differentially methylated regions.,"Analysis of bisulfite sequencing data usually requires two tasks: to call methylated cytosines (mCs) in a sample, and to detect differentially methylated regions (DMRs) between paired samples. Although numerous tools have been proposed for mC calling, methods for DMR detection have been largely limited. Here, we present Bisulfighter, a new software package for detecting mCs and DMRs from bisulfite sequencing data. Bisulfighter combines the LAST alignment tool for mC calling, and a novel framework for DMR detection based on hidden Markov models (HMMs). Unlike previous attempts that depend on empirical parameters, Bisulfighter can use the expectation-maximization algorithm for HMMs to adjust parameters for each data set. We conduct extensive experiments in which accuracy of mC calling and DMR detection is evaluated on simulated data with various mC contexts, read qualities, sequencing depths and DMR lengths, as well as on real data from a wide range of biological processes. We demonstrate that Bisulfighter consistently achieves better accuracy than other published tools, providing greater sensitivity for mCs with fewer false positives, more precise estimates of mC levels, more exact locations of DMRs and better agreement of DMRs with gene expression and DNase I hypersensitivity. The source code is available at http://epigenome.cbrc.jp/bisulfighter.",2014-01-13 +26862568,Top-down characterization data on the speciation of the Candida albicans immunome in candidemia.,"The characterization of pathogen-specific antigenic proteins at the protein species level is crucial in the development and molecular optimization of novel immunodiagnostics, vaccines or immunotherapeutics for infectious diseases. The major requirements to achieve this molecular level are to obtain 100% sequence coverage and identify all post-translational modifications of each antigenic protein species. In this article, we show nearly complete sequence information for five discrete antigenic species of Candida albicans Tdh3 (glyceraldehyde-3-phosphate dehydrogenase), which have been reported to be differentially recognized both among candidemia patients and between candidemia and control patients. A comprehensive description of the top-down immunoproteomic strategy used for seroprofiling at the C. albicans protein species level in candidemia as well as for the chemical characterization of this immunogenic protein (based on high-resolution 2-DE, Western blotting, peptide mass fingerprinting, tandem mass spectrometry and de novo peptide sequencing) is also provided. The top-down characterization data on the speciation of the C. albicans immunome in candidemia presented here are related to our research article entitled ""Seroprofiling at the Candida albicans protein species level unveils an accurate molecular discriminator for candidemia"" (Pitarch et al., J. Proteomics, 2015, http://dx.doi.org/10.1016/j.jprot.2015.10.022).",2015-12-11 +25984558,Genomic Analysis of the Evolution and Global Spread of Hyper-invasive Meningococcal Lineage 5.,"

Background

The predominant model for bacterial pandemics is the emergence of a virulent variant that diversifies as it spreads in human populations. We investigated a 40-year meningococcal disease pandemic caused by the hyper-invasive ET-5/ST-32 complex.

Methods

A global collection of Neisseria meningitidis isolates dating from 1969 to 2008 was whole genome sequenced (WGS) and analysed using a gene-by-gene approach at http://pubmlst.org/neisseria.

Findings

Analysis of WGS data identified a 'Lineage 5 pan genome' of 1940 genes, 1752 (92%) of which were present in all isolates (Lineage 5 'core genome'). Genetic diversity, which was mostly generated by horizontal gene transfer, was unevenly distributed in the genome; however, genealogical analysis of diverse and conserved core genes, accessory genes, and antigen encoding genes, robustly identified a star phylogeny with a number of sub-lineages. Most European and American isolates belonged to one of two closely related sub-lineages, which had diversified before the identification of the pandemic in the 1970s. A third, genetically more diverse sub-lineage, was associated with Asian isolates. Several isolates had acquired DNA from the related gonococcus.

Interpretation

These data were inconsistent with a single point of origin followed by pandemic spread, rather suggesting that the sub-lineages had diversified and spread by asymptomatic transmission, with multiple distinct strains causing localised hyperendemic outbreaks.",2015-03-01 +27385358,"Outdoor PM2.5, Ambient Air Temperature, and Asthma Symptoms in the Past 14 Days among Adults with Active Asthma.","

Background

Relationships between air quality and health are well-described, but little information is available about the joint associations between particulate air pollution, ambient temperature, and respiratory morbidity.

Objectives

We evaluated associations between concentrations of particulate matter ≤ 2.5 μm in diameter (PM2.5) and exacerbation of existing asthma and modification of the associations by ambient air temperature.

Methods

Data from 50,356 adult respondents to the Asthma Call-back Survey from 2006-2010 were linked by interview date and county of residence to estimates of daily averages of PM2.5 and maximum air temperature. Associations between 14-day average PM2.5 and the presence of any asthma symptoms during the 14 days leading up to and including the interview date were evaluated using binomial regression. We explored variation by air temperature using similar models, stratified into quintiles of the 14-day average maximum temperature.

Results

Among adults with active asthma, 57.1% reported asthma symptoms within the past 14 days, and 14-day average PM2.5 ≥ 7.07 μg/m3 was associated with an estimated 4-5% higher asthma symptom prevalence. In the range of 4.00-7.06 μg/m3 of PM2.5, each 1-μg/m3 increase was associated with a 3.4% [95% confidence interval (CI): 1.1, 5.7] increase in symptom prevalence; across categories of temperature from 1.1 to 80.5°F, each 1-μg/m3 increase was associated with increased symptom prevalence (1.1-44.4°F: 7.9%; 44.5-58.6°F: 6.9%; 58.7-70.1°F: 2.9%; 70.2-80.5°F: 7.3%).

Conclusions

These results suggest that each unit increase in PM2.5 may be associated with an increase in the prevalence of asthma symptoms, even at levels as low as 4.00-7.06 μg/m3. Citation: Mirabelli MC, Vaidyanathan A, Flanders WD, Qin X, Garbe P. 2016. Outdoor PM2.5, ambient air temperature, and asthma symptoms in the past 14 days among adults with active asthma. Environ Health Perspect 124:1882-1890; http://dx.doi.org/10.1289/EHP92.",2016-07-06 +23116482,CooVar: co-occurring variant analyzer.,"

Background

Evaluating the impact of genomic variations (GV) on protein-coding transcripts is an important step in identifying variants of functional significance. Currently available programs for variant annotation depend on external databases or annotate multiple variants affecting the same transcript independently, which limits program use to organisms available in these databases or results in potentially incorrect or incomplete annotations.

Findings

We have developed CooVar (Co-occurring Variant Analyzer), a database-independent program for assessing the impact of GVs on protein-coding transcripts. CooVar takes GVs, reference genome sequence, and protein-coding exons as input and provides annotated GVs and transcripts as output. Other than similar programs, CooVar considers the combined impact of all GVs affecting the same transcript, generating biologically more accurate annotations. CooVar is operated from the command-line and supports standard file formats VCF, GFF/GTF, and GVF, which makes it easy to integrate into existing computational pipelines. We have extensively tested CooVar on worm and human data sets and demonstrate that it generates correct annotations in only a short amount of time.

Conclusions

CooVar is an easy-to-use and lightweight variant annotation tool that considers the combined impact of GVs on protein-coding transcripts. CooVar is freely available at http://genome.sfu.ca/projects/coovar/.",2012-11-01 +26589280,hybridSPAdes: an algorithm for hybrid assembly of short and long reads.,"

Motivation

Recent advances in single molecule real-time (SMRT) and nanopore sequencing technologies have enabled high-quality assemblies from long and inaccurate reads. However, these approaches require high coverage by long reads and remain expensive. On the other hand, the inexpensive short reads technologies produce accurate but fragmented assemblies. Thus, a hybrid approach that assembles long reads (with low coverage) and short reads has a potential to generate high-quality assemblies at reduced cost.

Results

We describe hybridSPAdes algorithm for assembling short and long reads and benchmark it on a variety of bacterial assembly projects. Our results demonstrate that hybridSPAdes generates accurate assemblies (even in projects with relatively low coverage by long reads) thus reducing the overall cost of genome sequencing. We further present the first complete assembly of a genome from single cells using SMRT reads.

Availability and implementation

hybridSPAdes is implemented in C++ as a part of SPAdes genome assembler and is publicly available at http://bioinf.spbau.ru/en/spades

Contact

d.antipov@spbu.ru

Supplementary information

supplementary data are available at Bioinformatics online.",2015-11-20 +24897343,CloudDOE: a user-friendly tool for deploying Hadoop clouds and analyzing high-throughput sequencing data with MapReduce.,"

Background

Explosive growth of next-generation sequencing data has resulted in ultra-large-scale data sets and ensuing computational problems. Cloud computing provides an on-demand and scalable environment for large-scale data analysis. Using a MapReduce framework, data and workload can be distributed via a network to computers in the cloud to substantially reduce computational latency. Hadoop/MapReduce has been successfully adopted in bioinformatics for genome assembly, mapping reads to genomes, and finding single nucleotide polymorphisms. Major cloud providers offer Hadoop cloud services to their users. However, it remains technically challenging to deploy a Hadoop cloud for those who prefer to run MapReduce programs in a cluster without built-in Hadoop/MapReduce.

Results

We present CloudDOE, a platform-independent software package implemented in Java. CloudDOE encapsulates technical details behind a user-friendly graphical interface, thus liberating scientists from having to perform complicated operational procedures. Users are guided through the user interface to deploy a Hadoop cloud within in-house computing environments and to run applications specifically targeted for bioinformatics, including CloudBurst, CloudBrush, and CloudRS. One may also use CloudDOE on top of a public cloud. CloudDOE consists of three wizards, i.e., Deploy, Operate, and Extend wizards. Deploy wizard is designed to aid the system administrator to deploy a Hadoop cloud. It installs Java runtime environment version 1.6 and Hadoop version 0.20.203, and initiates the service automatically. Operate wizard allows the user to run a MapReduce application on the dashboard list. To extend the dashboard list, the administrator may install a new MapReduce application using Extend wizard.

Conclusions

CloudDOE is a user-friendly tool for deploying a Hadoop cloud. Its smart wizards substantially reduce the complexity and costs of deployment, execution, enhancement, and management. Interested users may collaborate to improve the source code of CloudDOE to further incorporate more MapReduce bioinformatics tools into CloudDOE and support next-generation big data open source tools, e.g., Hadoop BigTop and Spark.

Availability

CloudDOE is distributed under Apache License 2.0 and is freely available at http://clouddoe.iis.sinica.edu.tw/.",2014-06-04 +24947751,Correcting for link loss in causal network inference caused by regulator interference.,"

Motivation

There are a number of algorithms to infer causal regulatory networks from time series (gene expression) data. Here we analyse the phenomena of regulator interference, where regulators with similar dynamics mutually suppress both the probability of regulating a target and the associated link strength; for instance, interference between two identical strong regulators reduces link probabilities by ∼50%.

Results

We construct a robust method to define an interference-corrected causal network based on an analysis of the conditional link probabilities that recovers links lost through interference. On a large real network (Streptomyces coelicolor, phosphate depletion), we demonstrate that significant interference can occur between regulators with a correlation as low as 0.865, losing an estimated 34% of links by interference. However, levels of interference cannot be predicted from the correlation between regulators alone and are data specific. Validating against known networks, we show that high numbers of functional links are lost by regulator interference. Performance against other methods on DREAM4 data is excellent.

Availability and implementation

The method is implemented in R and is publicly available as the NIACS package at http://www2.warwick.ac.uk/fac/sci/systemsbiology/research/software.",2014-06-19 +25819232,"New systemic agents in dermatology with respect to fertility, pregnancy, and lactation.","With the increasing use of new, predominantly biologic drugs in dermatology, questions frequently arise in clinical practice as to their safety in women wishing to conceive as well as during pregnancy and lactation. Apart from the Summary of Product Characteristics and the Physician's Desk Reference, reliable information may be obtained from databases such as the one compiled by the Center for Pharmacovigilance and Consultation on Embryonal Toxicology at Charité University Medical Center Berlin (https://www.embryotox.de). Another source of information is researching recent publications, for example via PubMed (http://www.ncbi.nlm.nih.gov/pubmed). This article presents current knowledge from the sources mentioned above, and gives detailed information about the use of new biologic agents in women wishing to conceive as well as during pregnancy and lactation. Drugs reviewed include: infliximab, adalimumab, etanercept, metastatic for psoriasis, vemurafenib, dabrafenib, imatinib, ipilimumab for melanoma, vismodegib for basal cell carcinoma, rituximab for cutaneous lymphoma as well as omalizumab and anakinra used in the treatment of allergies.",2015-04-01 +23896666,International Spinal Cord Injury Urinary Tract Infection Basic Data Set.,"

Objectives

To develop an International Spinal Cord Injury (SCI) Urinary Tract Infection (UTI) Basic Data Set presenting a standardized format for the collection and reporting of a minimal amount of information on UTIs in daily practice or research.

Setting

International working group.

Methods

The draft of the Data Set developed by a working group was reviewed by the Executive Committee of the International SCI Standards and Data Sets, and later by the International Spinal Cord Society (ISCoS) Scientific Committee and the American Spinal Injury Association (ASIA) Board. Relevant and interested scientific and professional (international) organizations and societies (∼40) were also invited to review the data set, and it was posted on the ISCoS and ASIA websites for 3 months to allow comments and suggestions. The ISCoS Scientific Committee, Executive Committee and ASIA Board received the data set for final review and approval.

Results

The International SCI UTI Basic Data Set includes the following variables: date of data collection, length of time of sign(s)/symptom(s), results of urine dipstick test for nitrite and leukocyte esterase, urine culture results and resistance pattern. The complete instructions for data collection and the data form itself are freely available on the website of ISCoS (http://www.iscos.org.uk).",2013-07-30 +25826798,Large-scale chemical similarity networks for target profiling of compounds identified in cell-based chemical screens.,"Target identification is one of the most critical steps following cell-based phenotypic chemical screens aimed at identifying compounds with potential uses in cell biology and for developing novel disease therapies. Current in silico target identification methods, including chemical similarity database searches, are limited to single or sequential ligand analysis that have limited capabilities for accurate deconvolution of a large number of compounds with diverse chemical structures. Here, we present CSNAP (Chemical Similarity Network Analysis Pulldown), a new computational target identification method that utilizes chemical similarity networks for large-scale chemotype (consensus chemical pattern) recognition and drug target profiling. Our benchmark study showed that CSNAP can achieve an overall higher accuracy (>80%) of target prediction with respect to representative chemotypes in large (>200) compound sets, in comparison to the SEA approach (60-70%). Additionally, CSNAP is capable of integrating with biological knowledge-based databases (Uniprot, GO) and high-throughput biology platforms (proteomic, genetic, etc) for system-wise drug target validation. To demonstrate the utility of the CSNAP approach, we combined CSNAP's target prediction with experimental ligand evaluation to identify the major mitotic targets of hit compounds from a cell-based chemical screen and we highlight novel compounds targeting microtubules, an important cancer therapeutic target. The CSNAP method is freely available and can be accessed from the CSNAP web server (http://services.mbi.ucla.edu/CSNAP/).",2015-03-31 +25443961,High-resolution modeling of protein structures based on flexible fitting of low-resolution structural data.,"To circumvent the difficulty of directly solving high-resolution biomolecular structures, low-resolution structural data from Cryo-electron microscopy (EM) and small angle solution X-ray scattering (SAXS) are increasingly used to explore multiple conformational states of biomolecular assemblies. One promising venue to obtain high-resolution structural models from low-resolution data is via data-constrained flexible fitting. To this end, we have developed a new method based on a coarse-grained Cα-only protein representation, and a modified form of the elastic network model (ENM) that allows large-scale conformational changes while maintaining the integrity of local structures including pseudo-bonds and secondary structures. Our method minimizes a pseudo-energy which linearly combines various terms of the modified ENM energy with an EM/SAXS-fitting score and a collision energy that penalizes steric collisions. Unlike some previous flexible fitting efforts using the lowest few normal modes, our method effectively utilizes all normal modes so that both global and local structural changes can be fully modeled with accuracy. This method is also highly efficient in computing time. We have demonstrated our method using adenylate kinase as a test case which undergoes a large open-to-close conformational change. The EM-fitting method is available at a web server (http://enm.lobos.nih.gov), and the SAXS-fitting method is available as a pre-compiled executable upon request.",2014-08-24 +21599929,CLOTU: an online pipeline for processing and clustering of 454 amplicon reads into OTUs followed by taxonomic annotation.,"

Background

The implementation of high throughput sequencing for exploring biodiversity poses high demands on bioinformatics applications for automated data processing. Here we introduce CLOTU, an online and open access pipeline for processing 454 amplicon reads. CLOTU has been constructed to be highly user-friendly and flexible, since different types of analyses are needed for different datasets.

Results

In CLOTU, the user can filter out low quality sequences, trim tags, primers, adaptors, perform clustering of sequence reads, and run BLAST against NCBInr or a customized database in a high performance computing environment. The resulting data may be browsed in a user-friendly manner and easily forwarded to downstream analyses. Although CLOTU is specifically designed for analyzing 454 amplicon reads, other types of DNA sequence data can also be processed. A fungal ITS sequence dataset generated by 454 sequencing of environmental samples is used to demonstrate the utility of CLOTU.

Conclusions

CLOTU is a flexible and easy to use bioinformatics pipeline that includes different options for filtering, trimming, clustering and taxonomic annotation of high throughput sequence reads. Some of these options are not included in comparable pipelines. CLOTU is implemented in a Linux computer cluster and is freely accessible to academic users through the Bioportal web-based bioinformatics service (http://www.bioportal.uio.no).",2011-05-20 +21544171,The Booly aliasing resource: a database of grouped biological identifiers.,"

Unlabelled

Redundancy among sequence identifiers is a recurring problem in bioinformatics. Here, we present a rapid and efficient method of fingerprinting identifiers to ascertain whether two or more aliases are identical. A number of tools and approaches have been developed to resolve differing names for the same genes and proteins, however, these methods each have their own limitations associated with their various goals. We have taken a different approach to the aliasing problem by simplifying the way aliases are stored and curated with the objective of simultaneously achieving speed and flexibility. Our approach (Booly-hashing) is to link identifiers with their corresponding hash keys derived from unique fingerprints such as gene or protein sequences. This tool has proven invaluable for designing a new data integration platform known as Booly, and has wide applicability to situations in which a dedicated efficient aliasing system is required. Compared with other aliasing techniques, Booly-hashing methodology provides 1) reduced run time complexity, 2) increased flexibility (aliasing of other data types, e.g. pharmaceutical drugs), 3) no required assumptions regarding gene clusters or hierarchies, and 4) simplicity in data addition, updating, and maintenance. The new Booly-hashing aliasing model has been incorporated as a central component of the Booly data integration platform we have recently developed and shoud be broadly applicable to other situations in which an efficient streamlined aliasing systems is required. This aliasing tool and database, which allows users to quickly group the same genes and proteins together can be accessed at: http://booly.ucsd.edu/alias.

Availability

The database is available for free at http://booly.ucsd.edu/alias.",2011-03-26 +26656005,CASSIS and SMIPS: promoter-based prediction of secondary metabolite gene clusters in eukaryotic genomes.,"

Motivation

Secondary metabolites (SM) are structurally diverse natural products of high pharmaceutical importance. Genes involved in their biosynthesis are often organized in clusters, i.e., are co-localized and co-expressed. In silico cluster prediction in eukaryotic genomes remains problematic mainly due to the high variability of the clusters' content and lack of other distinguishing sequence features.

Results

We present Cluster Assignment by Islands of Sites (CASSIS), a method for SM cluster prediction in eukaryotic genomes, and Secondary Metabolites by InterProScan (SMIPS), a tool for genome-wide detection of SM key enzymes ('anchor' genes): polyketide synthases, non-ribosomal peptide synthetases and dimethylallyl tryptophan synthases. Unlike other tools based on protein similarity, CASSIS exploits the idea of co-regulation of the cluster genes, which assumes the existence of common regulatory patterns in the cluster promoters. The method searches for 'islands' of enriched cluster-specific motifs in the vicinity of anchor genes. It was validated in a series of cross-validation experiments and showed high sensitivity and specificity.

Availability and implementation

CASSIS and SMIPS are freely available at https://sbi.hki-jena.de/cassis

Contact

thomas.wolf@leibniz-hki.de or ekaterina.shelest@leibniz-hki.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-09 +26656003,NBSPred: a support vector machine-based high-throughput pipeline for plant resistance protein NBSLRR prediction.,"

Unlabelled

The nucleotide binding site leucine-rich repeats (NBSLRRs) belong to one of the largest known families of disease resistance genes that encode resistance proteins (R-protein) against the pathogens of plants. Various defence mechanisms have explained the regulation of plant immunity, but still, we have limited understanding about plant defence against different pathogens. Identification of R-proteins and proteins having R-protein-like features across the genome, transcriptome and proteome would be highly useful to develop the global understanding of plant defence mechanisms, but it is laborious and time-consuming task. Therefore, we have developed a support vector machine-based high-throughput pipeline called NBSPred to differentiate NBSLRR and NBSLRR-like protein from Non-NBSLRR proteins from genome, transcriptome and protein sequences. The pipeline was tested and validated with input sequences from three dicot and two monocot plants including Arabidopsis thaliana, Boechera stricta, Brachypodium distachyon Solanum lycopersicum and Zea mays.

Availability and implementation

The NBSPred pipeline is available at http://soilecology.biol.lu.se/nbs/

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

sandeep.kushwaha@biol.lu.se.",2015-12-09 +25711738,Teaching laryngeal electromyography.,"To achieve consensus in the methodology, interpretation, validity, and clinical application of laryngeal electromyography (LEMG), a working group on neurolaryngology from the European Laryngological Society (ELS) was founded in 2010. The main task of the working group was to teach key techniques like LEMG procedures. The objective of this study was to collect information on the teaching techniques used and describe them. A multicenter registry was created to analyze the data collected from LEMGs in 14 departments. We screened how often different departments participated in teaching events. Teaching events were classified retrospectively: presentations at conferences and meetings; workshops with hands-on training on patients; workshops with hands-on training on animal models; workshops with hands-on training on anatomic specimens; and supervision by experts to perform LEMG together. Both, supervision to perform LEMG together and the total number of PCA-LEMGs (r = 0.713), as well as supervision to perform LEMG together and the PCA/total-number-of-LEMG ratio (r = 0.814) were correlated significantly (p < 0.05). Similarly, the sum of teaching events was correlated significantly with the total number of PCA-LEMGs (r = 0.605), and so did the sum of teaching events with the PCA/total-number-of-LEMG ratio (r = 0.704). Participation in hands-on training in humans was correlated significantly with the PCA/total-number-of-LEMG ratio (r = 0.640). The data presented herein suggest that multimodal teaching techniques are most effective. To promote multimodal learning an interactive webpage ( http://www.lemg.org) providing videos and animations, and the possibility to discuss cases with other experts was established.",2015-02-25 +26969654,First nationwide web-based surveillance system for influenza-like illness in pregnant women: participation and representativeness of the French G-GrippeNet cohort.,"

Background

Pregnancy is a risk factor for severe influenza resulting in increased risks of hospitalisation and death in mothers and their new-borns. Our objective was to assess the representativeness and participation of French women to a new web-based collaborative tool for data collection and monitoring of Influenza Like Illness (ILI) during pregnancy.

Methods

During the 2014/2015 influenza season, pregnant women living in metropolitan France were enrolled through a web platform ( https://www.grippenet.fr/). Then throughout the season, participants were asked to report, on a weekly basis, if they had experienced symptoms of ILI. Representativeness was assessed by comparing the characteristics of participants to those of the French National Perinatal Survey. For each participant, the participation rate was the number of weekly questionnaires completed, divided by the length of follow-up (in weeks). Predictors of active participation (participation rate >15%) were assessed by multivariate logistic regression.

Results

A total of 153 women were enrolled. Participants were older (mean age 34 years vs. 29 years) and more highly educated (high school level 89% versus 52%) than the general population of pregnant women in France, but the sample did not differ on pregnancy-related characteristics (parity, history of hospitalisation during a previous pregnancy). The median rate of participation was high (78%, interquartile range: 34-96). Higher educational level and participation to a previous GrippeNet.fr season were associated with active participation.

Conclusion

Despite small sample size and lack of representativeness, the retention rate was high, suggesting that pregnant women are prone to adhere to a longitudinal follow-up of their health status via the Internet.",2016-03-11 +25841759,[Penile prosthesis: Systematic review of infectious complications].,"

Objectives

In the absence of practice recommendations, it was realized a review of the literature to establish the epidemiological and bacteriological data, prevention of infection, therapeutic attitude according to the clinical situation as well as the future prospects about the infections of penile prostheses.

Methods

A systematic review of the scientific literature was realized by the base of Pubmed data (http://www.ncbi.nim.gov/pubmed/). The literature search was made between 1992 and 2014 using the keywords: penile prostheses, penile implant, infection. The article was developed according to the recommendations Preferred reporting items for systematic reviews and meta-analyses 2009 (Prisma).

Results

The analysis of 10 meta-analysis and series published in various expert centers allowed us to synthesize the care recommended at present. Coagulasse négative staphylococcus were germs most frequently persons in charge but variations are secondarily observed in the current practices. The physiopathological knowledge (biofilm and risk factors) allowed to develop the antibiotic antibioprophylaxis, the precautionary measures of the infection of the operating site, the design of prostheses antimicrobial-impregnated or antibiotic-dipped and meticulous surgical technique (""Wash-Out"", ""No Touch""). In case of real infection, it was recommended in the absence of contra-indication to realize immediate salvage procedure allowing to set up a new penile prostheses, so avoiding the penile fibrosis.

Conclusion

All these measures have induced a decrease of the infection of penile implants significantly as well in case of primary implantation as of surgical revision. The future perspectives aim at preventing the infection by inhibition of the formation of the biofilm and by a more effective action of antibiotics about germs which it contains; or to use devices intrapenile ""spacer"" when the immediate salvage procedure is not feasible to facilitate the next implantation.",2015-04-02 +26504145,NMRe: a web server for NMR protein structure refinement with high-quality structure validation scores.,"

Unlabelled

Protein structure refinement is a necessary step for the study of protein function. In particular, some nuclear magnetic resonance (NMR) structures are of lower quality than X-ray crystallographic structures. Here, we present NMRe, a web-based server for NMR structure refinement. The previously developed knowledge-based energy function STAP (Statistical Torsion Angle Potential) was used for NMRe refinement. With STAP, NMRe provides two refinement protocols using two types of distance restraints. If a user provides NOE (Nuclear Overhauser Effect) data, the refinement is performed with the NOE distance restraints as a conventional NMR structure refinement. Additionally, NMRe generates NOE-like distance restraints based on the inter-hydrogen distances derived from the input structure. The efficiency of NMRe refinement was validated on 20 NMR structures. Most of the quality assessment scores of the refined NMR structures were better than those of the original structures. The refinement results are provided as a three-dimensional structure view, a secondary structure scheme, and numerical and graphical structure validation scores.

Availability and implementation

NMRe is available at http://psb.kobic.re.kr/nmre/.",2015-10-26 +25352545,A series of PDB-related databanks for everyday needs.,"We present a series of databanks (http://swift.cmbi.ru.nl/gv/facilities/) that hold information that is computationally derived from Protein Data Bank (PDB) entries and that might augment macromolecular structure studies. These derived databanks run parallel to the PDB, i.e. they have one entry per PDB entry. Several of the well-established databanks such as HSSP, PDBREPORT and PDB_REDO have been updated and/or improved. The software that creates the DSSP databank, for example, has been rewritten to better cope with π-helices. A large number of databanks have been added to aid computational structural biology; some examples are lists of residues that make crystal contacts, lists of contacting residues using a series of contact definitions or lists of residue accessibilities. PDB files are not the optimal presentation of the underlying data for many studies. We therefore made a series of databanks that hold PDB files in an easier to use or more consistent representation. The BDB databank holds X-ray PDB files with consistently represented B-factors. We also added several visualization tools to aid the users of our databanks.",2014-10-28 +26427498,Prediction of retention characteristics of heterocyclic compounds.,"The CORAL software ( http://www.insilico.eu/coral ) was used to build up quantitative structure-property relationships (QSPRs) for the retention characteristics of 93 derivatives of three groups of heterocyclic compounds: 2-phenyl-1,3-benzoxazoles, 4-benzylsulfanylpyridines, and benzoxazines. The QSPRs are one-variable models based on the optimal descriptors calculated from the molecular structure represented by simplified molecular input-line entry systems (SMILES). Each symbol (or two undivided symbols) of SMILES is characterized by correlation weight. The optimal descriptor is the sum of the correlation weights. The numerical data on the correlation weights were calculated with the Monte Carlo method by the manner which provides best correlation between endpoint and optimal descriptor for the calibration set. The predictive ability of the model is checked with the validation set (compounds invisible during building up of the model). The approach has been checked with three random splits into the training, calibration, and validation sets: all models have apparent predictive potential. The mechanistic interpretation of the molecular features extracted from SMILES as the promoters of increase or decrease of examined endpoints is suggested.",2015-10-01 +21278191,Bambino: a variant detector and alignment viewer for next-generation sequencing data in the SAM/BAM format.,"

Summary

Bambino is a variant detector and graphical alignment viewer for next-generation sequencing data in the SAM/BAM format, which is capable of pooling data from multiple source files. The variant detector takes advantage of SAM-specific annotations, and produces detailed output suitable for genotyping and identification of somatic mutations. The assembly viewer can display reads in the context of either a user-provided or automatically generated reference sequence, retrieve genome annotation features from a UCSC genome annotation database, display histograms of non-reference allele frequencies, and predict protein-coding changes caused by SNPs.

Availability

Bambino is written in platform-independent Java and available from https://cgwb.nci.nih.gov/goldenPath/bamview/documentation/index.html, along with documentation and example data. Bambino may be launched online via Java Web Start or downloaded and run locally.",2011-01-28 +22417913,A new database (GCD) on genome composition for eukaryote and prokaryote genome sequences and their initial analyses.,"Eukaryote genomes contain many noncoding regions, and they are quite complex. To understand these complexities, we constructed a database, Genome Composition Database, for the whole genome composition statistics for 101 eukaryote genome data, as well as more than 1,000 prokaryote genomes. Frequencies of all possible one to ten oligonucleotides were counted for each genome, and these observed values were compared with expected values computed under observed oligonucleotide frequencies of length 1-4. Deviations from expected values were much larger for eukaryotes than prokaryotes, except for fungal genomes. Mammalian genomes showed the largest deviation among animals. The results of comparison are available online at http://esper.lab.nig.ac.jp/genome-composition-database/.",2012-03-14 +21751342,A 2-D gel reference map of the basic human heart proteome.,"We have undertaken the identification of basic proteins (pH 6-11) of the human heart using 2-DE. Tissue from the left ventricle of human heart was lysed and proteins were separated in the first dimension on pH 6-11 IPG strips using paper-bridge loading followed by separation on 12% SDS polyacrylamide gels in the second dimension. Proteins were then identified by mass spectrometry and analysed using Proline, a proteomic data analysis platform that was developed in-house. The proteome map contains 176 identified spots with 151 unique proteins and has been made available as part of the UCD-2DPAGE database at http://proteomics-portal.ucd.ie:8082. The associated mass spectrometry data have been submitted to PRIDE (Accession number ♯10098). This reference map, and the other heart reference maps available through the UCD-2DPAGE database, will aid further proteomic studies of heart diseases such as dilated cardiomyopathy and ischaemic heart disease.",2011-08-04 +21854555,A first-generation integrated tammar wallaby map and its use in creating a tammar wallaby first-generation virtual genome map.,"

Background

The limited (2X) coverage of the tammar wallaby (Macropus eugenii) genome sequence dataset currently presents a challenge for assembly and anchoring onto chromosomes. To provide a framework for this assembly, it would be a great advantage to have a dense map of the tammar wallaby genome. However, only limited mapping data are available for this non-model species, comprising a physical map and a linkage map.

Results

We combined all available tammar wallaby mapping data to create a tammar wallaby integrated map, using the Location DataBase (LDB) strategy. This first-generation integrated map combines all available information from the second-generation tammar wallaby linkage map with 148 loci, and extensive FISH mapping data for 492 loci, especially for genes likely to be located at the ends of wallaby chromosomes or at evolutionary breakpoints inferred from comparative information. For loci whose positions are only approximately known, their location in the integrated map was refined on the basis of comparative information from opossum (Monodelphis domestica) and human. Interpolation of segments from the opossum and human assemblies into the integrated map enabled the subsequent construction of a tammar wallaby first-generation virtual genome map, which comprises 14336 markers, including 13783 genes recruited from opossum and human assemblies. Both maps are freely available at http://compldb.angis.org.au.

Conclusions

The first-generation integrated map and the first-generation virtual genome map provide a backbone for the chromosome assembly of the tammar wallaby genome sequence. For example, 78% of the 10257 gene-scaffolds in the Ensembl annotation of the tammar wallaby genome sequence (including 10522 protein-coding genes) can now be given a chromosome location in the tammar wallaby virtual genome map.",2011-08-19 +25256930,World Endometriosis Research Foundation Endometriosis Phenome and Biobanking Harmonization Project: II. Clinical and covariate phenotype data collection in endometriosis research.,"

Objective

To harmonize the collection of nonsurgical clinical and epidemiologic data relevant to endometriosis research, allowing large-scale collaboration.

Design

An international collaboration involving 34 clinical/academic centers and three industry collaborators from 16 countries on five continents.

Setting

In 2013, two workshops followed by global consultation, bringing together 54 leaders in endometriosis research.

Patients

None.

Intervention(s)

Development of a self-administered endometriosis patient questionnaire (EPQ), based on [1] systematic comparison of questionnaires from eight centers that collect data from endometriosis cases (and controls/comparison women) on a medium to large scale (publication on >100 cases); [2] literature evidence; and [3] several global consultation rounds.

Main outcome measure(s)

Standard recommended and minimum required questionnaires to capture detailed clinical and covariate data.

Result(s)

The standard recommended (EPHect EPQ-S) and minimum required (EPHect EPQ-M) questionnaires contain questions on pelvic pain, subfertility and menstrual/reproductive history, hormone/medication use, medical history, and personal information.

Conclusion(s)

The EPQ captures the basic set of patient characteristics and exposures considered by the WERF EPHect Working Group to be most critical for the advancement of endometriosis research, but is also relevant to other female conditions with similar risk factors and/or symptomatology. The instruments will be reviewed based on feedback from investigators, and-after a first review after 1 year-triannually through systematic follow-up surveys. Updated versions will be made available through http://endometriosisfoundation.org/ephect.",2014-09-22 +27266903,Serum Levels of Persistent Organic Pollutants and Insulin Secretion among Children Age 7-9 Years: A Prospective Cohort Study.,"

Background

Persistent organic pollutants (POPs) are endocrine disruptors and have been suggested as possible risk factors for diabetes. Few studies have been performed to investigate this association among children.

Objectives

In this study, we prospectively examined the relationship between the serum concentration of POPs and glucose metabolism in children.

Methods

Data were collected from the Ewha Birth & Growth Cohort Study, an ongoing birth cohort study initially constructed between 2001 and 2006. In 2010-2012, the POP concentration was measured in serum from a total of 214 children, 7-9 years of age. Using fasting glucose and insulin measurements at both baseline and the second year of follow-up, the homeostatic model assessment of beta-cell function (HOMA-β) and homeostatic model assessment of insulin resistance (HOMA-IR) were calculated. Multiple linear regression analysis and a linear mixed-effects model were used to determine the relationship between POP tertiles and metabolic biomarkers.

Results

Compared with the lowest tertile of total marker PCBs, participants in the third tertile had decreased HOMA-β values, after adjustment for age, sex, body mass index z-score, mother's education, ponderal index, and history of breastfeeding (-18.94%; 95% CI: -32.97%, -1.98%). In a linear mixed model, the HOMA-β values were still lower in subjects in the highest compared with the lowest tertile of total PCBs at the 2-year follow-up period (108.3 vs. 135.0, respectively).

Conclusion

The results of the study suggested that exposure to POPs among children might affect insulin secretory function, which could lead to an increased risk of developing diabetes. Citation: Park SH, Ha EH, Hong YS, Park H. 2016. Serum levels of persistent organic pollutants and insulin secretion among children age 7-9 years: a prospective cohort study. Environ Health Perspect 124:1924-1930; http://dx.doi.org/10.1289/EHP147.",2016-06-07 +23176546,Development of a gene expression database and related analysis programs for evaluation of anticancer compounds.,"Genome-wide transcriptional expression analysis is a powerful strategy for characterizing the biological activity of anticancer compounds. It is often instructive to identify gene sets involved in the activity of a given drug compound for comparison with different compounds. Currently, however, there is no comprehensive gene expression database and related application system that is; (i) specialized in anticancer agents; (ii) easy to use; and (iii) open to the public. To develop a public gene expression database of antitumor agents, we first examined gene expression profiles in human cancer cells after exposure to 35 compounds including 25 clinically used anticancer agents. Gene signatures were extracted that were classified as upregulated or downregulated after exposure to the drug. Hierarchical clustering showed that drugs with similar mechanisms of action, such as genotoxic drugs, were clustered. Connectivity map analysis further revealed that our gene signature data reflected modes of action of the respective agents. Together with the database, we developed analysis programs that calculate scores for ranking changes in gene expression and for searching statistically significant pathways from the Kyoto Encyclopedia of Genes and Genomes database in order to analyze the datasets more easily. Our database and the analysis programs are available online at our website (http://scads.jfcr.or.jp/db/cs/). Using these systems, we successfully showed that proteasome inhibitors are selectively classified as endoplasmic reticulum stress inducers and induce atypical endoplasmic reticulum stress. Thus, our public access database and related analysis programs constitute a set of efficient tools to evaluate the mode of action of novel compounds and identify promising anticancer lead compounds.",2013-01-04 +25399415,GenoBase: comprehensive resource database of Escherichia coli K-12.,"Comprehensive experimental resources, such as ORFeome clone libraries and deletion mutant collections, are fundamental tools for elucidation of gene function. Data sets by omics analysis using these resources provide key information for functional analysis, modeling and simulation both in individual and systematic approaches. With the long-term goal of complete understanding of a cell, we have over the past decade created a variety of clone and mutant sets for functional genomics studies of Escherichia coli K-12. We have made these experimental resources freely available to the academic community worldwide. Accordingly, these resources have now been used in numerous investigations of a multitude of cell processes. Quality control is extremely important for evaluating results generated by these resources. Because the annotation has been changed since 2005, which we originally used for the construction, we have updated these genomic resources accordingly. Here, we describe GenoBase (http://ecoli.naist.jp/GB/), which contains key information about comprehensive experimental resources of E. coli K-12, their quality control and several omics data sets generated using these resources.",2014-11-15 +24026093,Estimating individual admixture proportions from next generation sequencing data.,"Inference of population structure and individual ancestry is important both for population genetics and for association studies. With next generation sequencing technologies it is possible to obtain genetic data for all accessible genetic variations in the genome. Existing methods for admixture analysis rely on known genotypes. However, individual genotypes cannot be inferred from low-depth sequencing data without introducing errors. This article presents a new method for inferring an individual's ancestry that takes the uncertainty introduced in next generation sequencing data into account. This is achieved by working directly with genotype likelihoods that contain all relevant information of the unobserved genotypes. Using simulations as well as publicly available sequencing data, we demonstrate that the presented method has great accuracy even for very low-depth data. At the same time, we demonstrate that applying existing methods to genotypes called from the same data can introduce severe biases. The presented method is implemented in the NGSadmix software available at http://www.popgen.dk/software.",2013-09-11 +27412868,Everolimus-Eluting Stents in Patients With Bare-Metal and Drug-Eluting In-Stent Restenosis: Results From a Patient-Level Pooled Analysis of the RIBS IV and V Trials. ,"Treatment of patients with drug-eluting stent (DES) in-stent restenosis (ISR) is more challenging than that of patients with bare-metal stent ISR. However, the results of everolimus-eluting stents (EES) in these distinct scenarios remain unsettled. A pooled analysis of the RIBS IV (Restenosis Intra-Stent of Drug-Eluting Stents: Paclitaxel-Eluting Balloon vs Everolimus-Eluting Stent) and RIBS V (Restenosis Intra-Stent of Bare Metal Stents: Paclitaxel-Eluting Balloon vs Everolimus-Eluting Stent) randomized trials was performed using patient-level data to compare the efficacy of EES in bare-metal stent ISR and DES-ISR. Inclusion and exclusion criteria were identical in both trials. Results of 94 patients treated with EES for bare-metal stent ISR were compared with those of 155 patients treated with EES for DES-ISR. Baseline characteristics were more adverse in patients with DES-ISR, although they presented later and more frequently with a focal pattern. After intervention, minimal lumen diameter (2.22±0.5 versus 2.38±0.5 mm, P=0.01) was smaller in the DES-ISR group. Late angiographic findings (89.3% of eligible patients), including minimal lumen diameter (2.03±0.7 versus 2.36±0.6 mm, P<0.001) and diameter stenosis (23±22 versus 13±17%, P<0.001) were poorer in patients with DES-ISR. Results were consistent in the in-segment and in-lesion analyses. On multiple linear regression analysis, minimal lumen diameter at follow-up remained significantly smaller in patients with DES-ISR. Finally, at 1-year clinical follow-up (100% of patients), mortality (2.6 versus 0%, P<0.01) and need for target vessel revascularization (8 versus 2%, P=0.03) were higher in the DES-ISR group. This patient-level pooled analysis of the RIBS IV and RIBS V randomized clinical trials suggests that EES provide favorable outcomes in patients with ISR. However, the results of EES are less satisfactory in patients with DES-ISR than in those with bare-metal stent ISR. URL: http://www.clinicaltrials.gov. Unique identifiers: NCT01239953 and NCT01239940.",2016-07-01 +25082147,Transcriptome assembly and quantification from Ion Torrent RNA-Seq data.,"

Background

High throughput RNA sequencing (RNA-Seq) can generate whole transcriptome information at the single transcript level providing a powerful tool with multiple interrelated applications including transcriptome reconstruction and quantification. The sequences of novel transcripts can be reconstructed from deep RNA-Seq data, but this is computationally challenging due to sequencing errors, uneven coverage of expressed transcripts, and the need to distinguish between highly similar transcripts produced by alternative splicing. Another challenge in transcriptomic analysis comes from the ambiguities in mapping reads to transcripts.

Results

We present MaLTA, a method for simultaneous transcriptome assembly and quantification from Ion Torrent RNA-Seq data. Our approach explores transcriptome structure and incorporates a maximum likelihood model into the assembly and quantification procedure. A new version of the IsoEM algorithm suitable for Ion Torrent RNA-Seq reads is used to accurately estimate transcript expression levels. The MaLTA-IsoEM tool is publicly available at: http://alan.cs.gsu.edu/NGS/?q=malta

Conclusions

Experimental results on both synthetic and real datasets show that Ion Torrent RNA-Seq data can be successfully used for transcriptome analyses. Experimental results suggest increased transcriptome assembly and quantification accuracy of MaLTA-IsoEM solution compared to existing state-of-the-art approaches.",2014-07-14 +25693513,Optimized distance-dependent atom-pair-based potential DOOP for protein structure prediction.,"The DOcking decoy-based Optimized Potential (DOOP) energy function for protein structure prediction is based on empirical distance-dependent atom-pair interactions. To optimize the atom-pair interactions, native protein structures are decomposed into polypeptide chain segments that correspond to structural motives involving complete secondary structure elements. They constitute near native ligand-receptor systems (or just pairs). Thus, a total of 8609 ligand-receptor systems were prepared from 954 selected proteins. For each of these hypothetical ligand-receptor systems, 1000 evenly sampled docking decoys with 0-10 Å interface root-mean-square-deviation (iRMSD) were generated with a method used before for protein-protein docking. A neural network-based optimization method was applied to derive the optimized energy parameters using these decoys so that the energy function mimics the funnel-like energy landscape for the interaction between these hypothetical ligand-receptor systems. Thus, our method hierarchically models the overall funnel-like energy landscape of native protein structures. The resulting energy function was tested on several commonly used decoy sets for native protein structure recognition and compared with other statistical potentials. In combination with a torsion potential term which describes the local conformational preference, the atom-pair-based potential outperforms other reported statistical energy functions in correct ranking of native protein structures for a variety of decoy sets. This is especially the case for the most challenging ROSETTA decoy set, although it does not take into account side chain orientation-dependence explicitly. The DOOP energy function for protein structure prediction, the underlying database of protein structures with hypothetical ligand-receptor systems and their decoys are freely available at http://agknapp.chemie.fu-berlin.de/doop/.",2015-03-25 +22972132,Topical cyclosporine for atopic keratoconjunctivitis.,"

Background

Atopic keratoconjunctivitis (AKC) is a chronic ocular surface non-infectious inflammatory condition that atopic dermatitis patients may suffer at any time point in the course of their dermatologic disease and is independent of its degree of severity. AKC is usually not self resolving and it poses a higher risk of corneal injuries and severe sequelae. Management of AKC should prevent or treat corneal damage. Although topical corticosteroids remain the standard treatment for patients with AKC, prolonged use may lead to complications. Topical cyclosporine A (CsA) may improve AKC signs and symptoms, and be used as a corticosteroid sparing agent.

Objectives

To determine the efficacy and gather evidence on safety from randomised controlled trials (RCTs) of topical CsA in patients with AKC.

Search methods

We searched CENTRAL (which contains the Cochrane Eyes and Vision Group Trials Register) (The Cochrane Library 2012, Issue 6), MEDLINE (January 1946 to July 2012), EMBASE (January 1980 to July 2012), Latin American and Caribbean Literature on Health Sciences (LILACS) (January 1982 to July 2012), Cumulative Index to Nursing and Allied Health Literature (CINAHL) (January 1937 to July 2012), OpenGrey (System for Information on Grey Literature in Europe) (www.opengrey.eu/), the metaRegister of Controlled Trials (mRCT) (www.controlled-trials.com), ClinicalTrials.gov (www.clinicaltrials.gov), the WHO International Clinical Trials Registry Platform (ICTRP) (www.who.int/ictrp/search/en), the IFPMA Clinical Trials Portal (http://clinicaltrials.ifpma.org/no_cache/en/myportal/index.htm) and Web of Science Conference Proceedings Citation Index- Science (CPCI-S). We did not use any date or language restrictions in the electronic searches for trials. The electronic databases were last searched on 9 July 2012. We also handsearched the following conference proceedings: American Academy of Ophthalmology, Association for Research in Vision and Ophthalmology, International Council of Opthalmology and Societas Ophthalmologica Europaea from 2005 to July 2011.

Selection criteria

We included randomised controlled trials only.

Data collection and analysis

Two review authors independently extracted data. Due to the small number of studies and the diversity of outcome measures, interventions and participants, we presented results narratively.

Main results

We found three RCTs with a total of 58 participants that were eligible for inclusion. There was significant variability between the trials in interventions, methodology and outcome measures and therefore we did not perform meta-analysis.One study reported on the use of 2% CsA in maize oil and two on the use of a commercial emulsion of 0.05% CsA. Of these three studies, one showed a beneficial effect of topical CsA in controlling signs and symptoms of AKC, one in controlling signs of AKC and one did not show evidence of an improvement. Only two studies analysed the effect of topical CsA in reducing topical steroid use; one showed a significant reduction in topical steroid use with CsA, but the other did not show evidence of this improvement. No serious adverse events were reported in the trials.

Authors' conclusions

This systematic review highlights the relative scarcity of controlled clinical trials assessing the efficacy of topical CsA therapy in AKC and suggests that evidence on the efficacy and safety of topical CsA treatment in patients with CsA remains limited. However, the data suggest that topical CsA may provide clinical and symptomatic relief in AKC and may help to reduce topical steroid use in patients with steroid-dependent or steroid-resistant AKC. No serious adverse events were reported. Reported adverse events in patients treated with topical CsA include intense stinging and eyelid skin maceration. One patient in the placebo group developed a severe allergic response to maize antigens. However, the total number of patients in the trials was too small to assess the safety of this treatment.Additional well-designed and powered RCTs of topical CsA in AKC are needed. Ideal study designs should include adequate randomisation and concealment of allocation; masking of participants, personnel and outcome assessors; adequate follow-up periods and minimisation of attrition bias; and comparison groups with similar clinical and epidemiologic characteristics. Samples should be large enough to provide sufficient statistical power to assess the safety of CsA and to detect clinically relevant treatment effect sizes of the primary outcomes. Analyses should be appropriate to the study's design and outcome measures. Moreover, standardisation of outcome measures and follow-up periods across studies would be beneficial to maximise study comparability.",2012-09-12 +26338660,bz-rates: A Web Tool to Estimate Mutation Rates from Fluctuation Analysis.,"Fluctuation analysis is the standard experimental method for measuring mutation rates in micro-organisms. The appearance of mutants is classically described by a Luria-Delbrück distribution composed of two parameters: the number of mutations per culture (m) and the differential growth rate between mutant and wild-type cells (b). A precise estimation of these two parameters is a prerequisite to the calculation of the mutation rate. Here, we developed bz-rates, a Web tool to calculate mutation rates that provides three useful advances over existing Web tools. First, it allows taking into account b, the differential growth rate between mutant and wild-type cells, in the estimation of m with the generating function. Second, bz-rates allows the user to take into account a deviation from the Luria-Delbrück distribution called z, the plating efficiency, in the estimation of m. Finally, the Web site provides a graphical visualization of the goodness-of-fit between the experimental data and the model. bz-rates is accessible at http://www.lcqb.upmc.fr/bzrates.",2015-09-02 +25451770,CytoNCA: a cytoscape plugin for centrality analysis and evaluation of protein interaction networks.,"

Background and scope

Nowadays, centrality analysis has become a principal method for identifying essential proteins in biological networks. Here we present CytoNCA, a Cytoscape plugin integrating calculation, evaluation and visualization analysis for multiple centrality measures.

Implementation and performance

(i) CytoNCA supports eight different centrality measures and each can be applied to both weighted and unweighted biological networks. (ii) It allows users to upload biological information of both nodes and edges in the network, to integrate biological data with topological data to detect specific nodes. (iii) CytoNCA offers multiple potent visualization analysis modules, which generate various forms of output such as graph, table, and chart, and analyze associations among all measures. (iv) It can be utilized to quantitatively assess the calculation results, and evaluate the accuracy by statistical measures. (v) Besides current eight centrality measures, the biological characters from other sources could also be analyzed and assessed by CytoNCA. This makes CytoNCA an excellent tool for calculating centrality, evaluating and visualizing biological networks.

Availability

http://apps.cytoscape.org/apps/cytonca.",2014-11-15 +25187691,TmiRUSite and TmiROSite scripts: searching for mRNA fragments with miRNA binding sites with encoded amino acid residues.,

Unlabelled

microRNAs are small RNA molecules that inhibit the translation of target genes. microRNA binding sites are located in the untranslated regions as well as in the coding domains. We describe TmiRUSite and TmiROSite scripts developed using python as tools for the extraction of nucleotide sequences for miRNA binding sites with their encoded amino acid residue sequences. The scripts allow for retrieving a set of additional sequences at left and at right from the binding site. The scripts presents all received data in table formats that are easy to analyse further. The predicted data finds utility in molecular and evolutionary biology studies. They find use in studying miRNA binding sites in animals and plants.

Availability

TmiRUSite and TmiROSite scripts are available for free from authors upon request and at https: //sites.google.com/site/malaheenee/downloads for download.,2014-07-22 +24475928,geneCommittee: a web-based tool for extensively testing the discriminatory power of biologically relevant gene sets in microarray data classification.,"

Background

The diagnosis and prognosis of several diseases can be shortened through the use of different large-scale genome experiments. In this context, microarrays can generate expression data for a huge set of genes. However, to obtain solid statistical evidence from the resulting data, it is necessary to train and to validate many classification techniques in order to find the best discriminative method. This is a time-consuming process that normally depends on intricate statistical tools.

Results

geneCommittee is a web-based interactive tool for routinely evaluating the discriminative classification power of custom hypothesis in the form of biologically relevant gene sets. While the user can work with different gene set collections and several microarray data files to configure specific classification experiments, the tool is able to run several tests in parallel. Provided with a straightforward and intuitive interface, geneCommittee is able to render valuable information for diagnostic analyses and clinical management decisions based on systematically evaluating custom hypothesis over different data sets using complementary classifiers, a key aspect in clinical research.

Conclusions

geneCommittee allows the enrichment of microarrays raw data with gene functional annotations, producing integrated datasets that simplify the construction of better discriminative hypothesis, and allows the creation of a set of complementary classifiers. The trained committees can then be used for clinical research and diagnosis. Full documentation including common use cases and guided analysis workflows is freely available at http://sing.ei.uvigo.es/GC/.",2014-01-30 +23204843,Connecting research discovery with care delivery in dementia: the development of the Indianapolis Discovery Network for Dementia.,"

Background

The US Institute of Medicine has recommended an integrated, locally sensitive collaboration among the various members of the community, health care systems, and research organizations to improve dementia care and dementia research.

Methods

Using complex adaptive system theory and reflective adaptive process, we developed a professional network called the ""Indianapolis Discovery Network for Dementia"" (IDND). The IDND facilitates effective and sustainable interactions among a local and diverse group of dementia researchers, clinical providers, and community advocates interested in improving care for dementia patients in Indianapolis, Indiana.

Results

The IDND was established in February 2006 and now includes more than 250 members from more than 30 local (central Indiana) organizations representing 20 disciplines. The network uses two types of communication to connect its members. The first is a 2-hour face-to-face bimonthly meeting open to all members. The second is a web-based resource center (http://www.indydiscoverynetwork.org ). To date, the network has: (1) accomplished the development of a network website with an annual average of 12,711 hits per day; (2) produced clinical tools such as the Healthy Aging Brain Care Monitor and the Anticholinergic Cognitive Burden Scale; (3) translated and implemented the collaborative dementia care model into two local health care systems; (4) created web-based tracking software, the Enhanced Medical Record for Aging Brain Care (eMR-ABC), to support care coordination for patients with dementia; (5) received more than USD$24 million in funding for members for dementia-related research studies; and (6) adopted a new group-based problem-solving process called the ""IDND consultancy round.""

Conclusion

A local interdisciplinary ""think-tank"" network focused on dementia that promotes collaboration in research projects, educational initiatives, and quality improvement efforts that meet the local research, clinical, and community needs relevant to dementia care has been built.",2012-11-16 +24399965,pySPACE-a signal processing and classification environment in Python.,"In neuroscience large amounts of data are recorded to provide insights into cerebral information processing and function. The successful extraction of the relevant signals becomes more and more challenging due to increasing complexities in acquisition techniques and questions addressed. Here, automated signal processing and machine learning tools can help to process the data, e.g., to separate signal and noise. With the presented software pySPACE (http://pyspace.github.io/pyspace), signal processing algorithms can be compared and applied automatically on time series data, either with the aim of finding a suitable preprocessing, or of training supervised algorithms to classify the data. pySPACE originally has been built to process multi-sensor windowed time series data, like event-related potentials from the electroencephalogram (EEG). The software provides automated data handling, distributed processing, modular build-up of signal processing chains and tools for visualization and performance evaluation. Included in the software are various algorithms like temporal and spatial filters, feature generation and selection, classification algorithms, and evaluation schemes. Further, interfaces to other signal processing tools are provided and, since pySPACE is a modular framework, it can be extended with new algorithms according to individual needs. In the presented work, the structural hierarchies are described. It is illustrated how users and developers can interface the software and execute offline and online modes. Configuration of pySPACE is realized with the YAML format, so that programming skills are not mandatory for usage. The concept of pySPACE is to have one comprehensive tool that can be used to perform complete signal processing and classification tasks. It further allows to define own algorithms, or to integrate and use already existing libraries.",2013-12-24 +23087376,IUPHAR-DB: updated database content and new features.,"The International Union of Basic and Clinical Pharmacology (IUPHAR) database, IUPHAR-DB (http://www.iuphar-db.org) is an open access, online database providing detailed, expert-driven annotation of the primary literature on human and rodent receptors and other drug targets, together with the substances that act on them. The present release includes information on the products of 646 genes from four major protein classes (G protein-coupled receptors, nuclear hormone receptors, voltage- and ligand-gated ion channels) and ∼3180 bioactive molecules (endogenous ligands, licensed drugs and key pharmacological tools) that interact with them. We have described previously the classification and curation of data for small molecule ligands in the database; in this update we have annotated 366 endogenous peptide ligands with their amino acid sequences, post-translational modifications, links to precursor genes, species differences and relationships with other molecules in the database (e.g. those derived from the same precursor). We have also matched targets with their endogenous ligands (peptides and small molecules), with particular attention paid to identifying bioactive peptide ligands generated by post-translational modification of precursor proteins. Other improvements to the database include enhanced information on the clinical relevance of targets and ligands in the database, more extensive links to other databases and a pilot project for the curation of enzymes as drug targets.",2012-10-18 +22637737,Proliferative and nonproliferative lesions of the rat and mouse central and peripheral nervous systems.,"Harmonization of diagnostic nomenclature used in the pathology analysis of tissues from rodent toxicity studies will enhance the comparability and consistency of data sets from different laboratories worldwide. The INHAND Project (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) is a joint initiative of four major societies of toxicologic pathology to develop a globally recognized nomenclature for proliferative and nonproliferative lesions in rodents. This article recommends standardized terms for classifying changes observed in tissues of the mouse and rat central (CNS) and peripheral (PNS) nervous systems. Sources of material include academic, government, and industrial histopathology databases from around the world. Covered lesions include frequent, spontaneous, and aging-related changes as well as principal toxicant-induced findings. Common artifacts that might be confused with genuine lesions are also illustrated. The neural nomenclature presented in this document is also available electronically on the Internet at the goRENI website (http://www.goreni.org/).",2012-06-01 +26694538,PredβTM: A Novel β-Transmembrane Region Prediction Algorithm.,"Predicting the transmembrane regions is an important aspect of understanding the structures and architecture of different β-barrel membrane proteins. Despite significant efforts, currently available β-transmembrane region predictors are still limited in terms of prediction accuracy, especially in precision. Here, we describe PredβTM, a transmembrane region prediction algorithm for β-barrel proteins. Using amino acid pair frequency information in known β-transmembrane protein sequences, we have trained a support vector machine classifier to predict β-transmembrane segments. Position-specific amino acid preference data is incorporated in the final prediction. The predictor does not incorporate evolutionary profile information explicitly, but is based on sequence patterns generated implicitly by encoding the protein segments using amino acid adjacency matrix. With a benchmark set of 35 β-transmembrane proteins, PredβTM shows a sensitivity and precision of 83.71% and 72.98%, respectively. The segment overlap score is 82.19%. In comparison with other state-of-art methods, PredβTM provides a higher precision and segment overlap without compromising with sensitivity. Further, we applied PredβTM to analyze the β-barrel membrane proteins without defined transmembrane regions and the uncharacterized protein sequences in eight bacterial genomes and predict possible β-transmembrane proteins. PredβTM can be freely accessed on the web at http://transpred.ki.si/.",2015-12-22 +25266225,GLAD: a mixed-membership model for heterogeneous tumor subtype classification.,"

Motivation

Genomic analyses of many solid cancers have demonstrated extensive genetic heterogeneity between as well as within individual tumors. However, statistical methods for classifying tumors by subtype based on genomic biomarkers generally entail an all-or-none decision, which may be misleading for clinical samples containing a mixture of subtypes and/or normal cell contamination.

Results

We have developed a mixed-membership classification model, called glad, that simultaneously learns a sparse biomarker signature for each subtype as well as a distribution over subtypes for each sample. We demonstrate the accuracy of this model on simulated data, in-vitro mixture experiments, and clinical samples from the Cancer Genome Atlas (TCGA) project. We show that many TCGA samples are likely a mixture of multiple subtypes.

Availability

A python module implementing our algorithm is available from http://genomics.wpi.edu/glad/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-29 +22759648,Automated extraction and semantic analysis of mutation impacts from the biomedical literature.,"

Background

Mutations as sources of evolution have long been the focus of attention in the biomedical literature. Accessing the mutational information and their impacts on protein properties facilitates research in various domains, such as enzymology and pharmacology. However, manually curating the rich and fast growing repository of biomedical literature is expensive and time-consuming. As a solution, text mining approaches have increasingly been deployed in the biomedical domain. While the detection of single-point mutations is well covered by existing systems, challenges still exist in grounding impacts to their respective mutations and recognizing the affected protein properties, in particular kinetic and stability properties together with physical quantities.

Results

We present an ontology model for mutation impacts, together with a comprehensive text mining system for extracting and analysing mutation impact information from full-text articles. Organisms, as sources of proteins, are extracted to help disambiguation of genes and proteins. Our system then detects mutation series to correctly ground detected impacts using novel heuristics. It also extracts the affected protein properties, in particular kinetic and stability properties, as well as the magnitude of the effects and validates these relations against the domain ontology. The output of our system can be provided in various formats, in particular by populating an OWL-DL ontology, which can then be queried to provide structured information. The performance of the system is evaluated on our manually annotated corpora. In the impact detection task, our system achieves a precision of 70.4%-71.1%, a recall of 71.3%-71.5%, and grounds the detected impacts with an accuracy of 76.5%-77%. The developed system, including resources, evaluation data and end-user and developer documentation is freely available under an open source license at http://www.semanticsoftware.info/open-mutation-miner.

Conclusion

We present Open Mutation Miner (OMM), the first comprehensive, fully open-source approach to automatically extract impacts and related relevant information from the biomedical literature. We assessed the performance of our work on manually annotated corpora and the results show the reliability of our approach. The representation of the extracted information into a structured format facilitates knowledge management and aids in database curation and correction. Furthermore, access to the analysis results is provided through multiple interfaces, including web services for automated data integration and desktop-based solutions for end user interactions.",2012-06-18 +26037051,Development and validation of a brain maturation index using longitudinal neuroanatomical scans.,"

Background

Major psychiatric disorders are increasingly being conceptualized as 'neurodevelopmental', because they are associated with aberrant brain maturation. Several studies have hypothesized that a brain maturation index integrating patterns of neuroanatomical measurements may reliably identify individual subjects deviating from a normative neurodevelopmental trajectory. However, while recent studies have shown great promise in developing accurate brain maturation indices using neuroimaging data and multivariate machine learning techniques, this approach has not been validated using a large sample of longitudinal data from children and adolescents.

Methods

T1-weighted scans from 303 healthy subjects aged 4.88 to 18.35years were acquired from the National Institute of Health (NIH) pediatric repository (http://www.pediatricmri.nih.gov). Out of the 303 subjects, 115 subjects were re-scanned after 2years. The least absolute shrinkage and selection operator algorithm (LASSO) was 'trained' to integrate neuroanatomical changes across chronological age and predict each individual's brain maturity. The resulting brain maturation index was developed using first-visit scans only, and was validated using second-visit scans.

Results

We report a high correlation between the first-visit chronological age and brain maturation index (r=0.82, mean absolute error or MAE=1.69years), and a high correlation between the second-visit chronological age and brain maturation index (r=0.83, MAE=1.71years). The brain maturation index captured neuroanatomical volume changes between the first and second visits with an MAE of 0.27years.

Conclusions

The brain maturation index developed in this study accurately predicted individual subjects' brain maturation longitudinally. Due to its strong clinical potentials in identifying individuals with an abnormal brain maturation trajectory, the brain maturation index may allow timely clinical interventions for individuals at risk for psychiatric disorders.",2015-05-30 +21357905,In the clinic. Herpes zoster.,"This issue provides a clinical overview of herpes zoster focusing on prevention, diagnosis, treatment, practice improvement, and patient information. Readers can complete the accompanying CME quiz for 1.5 credits. Only ACP members and individual subscribers can access the electronic features of In the Clinic. Non-subscribers who wish to access this issue of In the Clinic can elect ""Pay for View."" Subscribers can receive 1.5 category 1 CME credits by completing the CME quiz that accompanies this issue of In the Clinic. The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians' Information and Education Resource) and MKSAP (Medical Knowledge and Self Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP's Medical Education and Publishing division and with assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult www.acponline.org, http://pier.acponline.org, and other resources referenced within each issue of In the Clinic.",2011-03-01 +22406755,Genome-wide analysis of long noncoding RNA stability.,"Transcriptomic analyses have identified tens of thousands of intergenic, intronic, and cis-antisense long noncoding RNAs (lncRNAs) that are expressed from mammalian genomes. Despite progress in functional characterization, little is known about the post-transcriptional regulation of lncRNAs and their half-lives. Although many are easily detectable by a variety of techniques, it has been assumed that lncRNAs are generally unstable, but this has not been examined genome-wide. Utilizing a custom noncoding RNA array, we determined the half-lives of ∼800 lncRNAs and ∼12,000 mRNAs in the mouse Neuro-2a cell line. We find only a minority of lncRNAs are unstable. LncRNA half-lives vary over a wide range, comparable to, although on average less than, that of mRNAs, suggestive of complex metabolism and widespread functionality. Combining half-lives with comprehensive lncRNA annotations identified hundreds of unstable (half-life < 2 h) intergenic, cis-antisense, and intronic lncRNAs, as well as lncRNAs showing extreme stability (half-life > 16 h). Analysis of lncRNA features revealed that intergenic and cis-antisense RNAs are more stable than those derived from introns, as are spliced lncRNAs compared to unspliced (single exon) transcripts. Subcellular localization of lncRNAs indicated widespread trafficking to different cellular locations, with nuclear-localized lncRNAs more likely to be unstable. Surprisingly, one of the least stable lncRNAs is the well-characterized paraspeckle RNA Neat1, suggesting Neat1 instability contributes to the dynamic nature of this subnuclear domain. We have created an online interactive resource (http://stability.matticklab.com) that allows easy navigation of lncRNA and mRNA stability profiles and provides a comprehensive annotation of ~7200 mouse lncRNAs.",2012-03-09 +25802363,International Society of Human and Animal Mycology (ISHAM)-ITS reference DNA barcoding database--the quality controlled standard tool for routine identification of human and animal pathogenic fungi.,"Human and animal fungal pathogens are a growing threat worldwide leading to emerging infections and creating new risks for established ones. There is a growing need for a rapid and accurate identification of pathogens to enable early diagnosis and targeted antifungal therapy. Morphological and biochemical identification methods are time-consuming and require trained experts. Alternatively, molecular methods, such as DNA barcoding, a powerful and easy tool for rapid monophasic identification, offer a practical approach for species identification and less demanding in terms of taxonomical expertise. However, its wide-spread use is still limited by a lack of quality-controlled reference databases and the evolving recognition and definition of new fungal species/complexes. An international consortium of medical mycology laboratories was formed aiming to establish a quality controlled ITS database under the umbrella of the ISHAM working group on ""DNA barcoding of human and animal pathogenic fungi."" A new database, containing 2800 ITS sequences representing 421 fungal species, providing the medical community with a freely accessible tool at http://www.isham.org/ and http://its.mycologylab.org/ to rapidly and reliably identify most agents of mycoses, was established. The generated sequences included in the new database were used to evaluate the variation and overall utility of the ITS region for the identification of pathogenic fungi at intra-and interspecies level. The average intraspecies variation ranged from 0 to 2.25%. This highlighted selected pathogenic fungal species, such as the dermatophytes and emerging yeast, for which additional molecular methods/genetic markers are required for their reliable identification from clinical and veterinary specimens.",2015-03-22 +26723495,iSuc-PseOpt: Identifying lysine succinylation sites in proteins by incorporating sequence-coupling effects into pseudo components and optimizing imbalanced training dataset.,"Succinylation is a posttranslational modification (PTM) where a succinyl group is added to a Lys (K) residue of a protein molecule. Lysine succinylation plays an important role in orchestrating various biological processes, but it is also associated with some diseases. Therefore, we are challenged by the following problem from both basic research and drug development: given an uncharacterized protein sequence containing many Lys residues, which one of them can be succinylated, and which one cannot? With the avalanche of protein sequences generated in the postgenomic age, the answer to the problem has become even more urgent. Fortunately, the statistical significance experimental data for succinylated sites in proteins have become available very recently, an indispensable prerequisite for developing a computational method to address this problem. By incorporating the sequence-coupling effects into the general pseudo amino acid composition and using KNNC (K-nearest neighbors cleaning) treatment and IHTS (inserting hypothetical training samples) treatment to optimize the training dataset, a predictor called iSuc-PseOpt has been developed. Rigorous cross-validations indicated that it remarkably outperformed the existing method. A user-friendly web-server for iSuc-PseOpt has been established at http://www.jci-bioinfo.cn/iSuc-PseOpt, where users can easily get their desired results without needing to go through the complicated mathematical equations involved.",2015-12-23 +26369974,Colistin Population Pharmacokinetics after Application of a Loading Dose of 9 MU Colistin Methanesulfonate in Critically Ill Patients.,"Colistin has been revived, in the era of extensively drug-resistant (XDR) Gram-negative infections, as the last-resort treatment in critically ill patients. Recent studies focusing on the optimal dosing strategy of colistin have demonstrated the necessity of a loading dose at treatment initiation (D. Plachouras, M. Karvanen, L. E. Friberg, E. Papadomichelakis, A. Antoniadou, I. Tsangaris, I. Karaiskos, G. Poulakou, F. Kontopidou, A. Armaganidis, O. Cars, and H. Giamarellou, Antimicrob Agents Chemother 53:3430-3436, 2009, http://dx.doi.org/10.1128/AAC.01361-08; A. F. Mohamed, I. Karaiskos, D. Plachouras, M. Karvanen, K. Pontikis, B. Jansson, E. Papadomichelakis, A. Antoniadou, H. Giamarellou, A. Armaganidis, O. Cars, and L. E. Friberg, Antimicrob Agents Chemother 56:4241- 4249, 2012, http://dx.doi.org/10.1128/AAC.06426-11; S. M. Garonzik, J. Li, V. Thamlikitkul, D. L. Paterson, S. Shoham, J. Jacob, F. P. Silveira, A. Forrest, and R. L. Nation, Antimicrob Agents Chemother 55:3284-3294, 2011, http://dx.doi.org/10.1128/AAC.01733-10). In 19 critically ill patients with suspected or microbiologically documented infections caused by XDR Gram-negative strains, a loading dose of 9 MU colistin methanesulfonate (CMS) (∼ 270 mg colistin base activity) was administered with a maintenance dose of 4.5 MU every 12 h, commenced after 24 h. Patients on renal replacement were excluded. CMS infusion was given over 30 min or 1 h. Repeated blood sampling was performed after the loading dose and after the 5th or 6th dose. Colistin concentrations and measured CMS, determined after hydrolization to colistin and including the partially sulfomethylated derivatives, were determined with a liquid chromatography-tandem mass spectrometry assay. Population pharmacokinetic analysis was conducted in NONMEM with the new data combined with data from previous studies. Measured colistimethate concentrations were described by 4 compartments for distribution and removal of sulfomethyl groups, while colistin disposition followed a 1-compartment model. The average observed maximum colistin A plus B concentration was 2.65 mg/liter after the loading dose (maximum time was 8 h). A significantly higher availability of the measured A and B forms of colistimethate and colistin explained the higher-than-expected concentrations in the present study compared to those in previous studies. Creatinine clearance was a time-varying covariate of colistimethate clearance. The incidence of acute renal injury was 20%.",2015-09-14 +25910698,PDBest: a user-friendly platform for manipulating and enhancing protein structures.,"

Unlabelled

PDBest (PDB Enhanced Structures Toolkit) is a user-friendly, freely available platform for acquiring, manipulating and normalizing protein structures in a high-throughput and seamless fashion. With an intuitive graphical interface it allows users with no programming background to download and manipulate their files. The platform also exports protocols, enabling users to easily share PDB searching and filtering criteria, enhancing analysis reproducibility.

Availability and implementation

PDBest installation packages are freely available for several platforms at http://www.pdbest.dcc.ufmg.br

Contact

wellisson@dcc.ufmg.br, dpires@dcc.ufmg.br, raquelcm@dcc.ufmg.br

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-24 +24204885,RNA-CODE: a noncoding RNA classification tool for short reads in NGS data lacking reference genomes.,"The number of transcriptomic sequencing projects of various non-model organisms is still accumulating rapidly. As non-coding RNAs (ncRNAs) are highly abundant in living organism and play important roles in many biological processes, identifying fragmentary members of ncRNAs in small RNA-seq data is an important step in post-NGS analysis. However, the state-of-the-art ncRNA search tools are not optimized for next-generation sequencing (NGS) data, especially for very short reads. In this work, we propose and implement a comprehensive ncRNA classification tool (RNA-CODE) for very short reads. RNA-CODE is specifically designed for ncRNA identification in NGS data that lack quality reference genomes. Given a set of short reads, our tool classifies the reads into different types of ncRNA families. The classification results can be used to quantify the expression levels of different types of ncRNAs in RNA-seq data and ncRNA composition profiles in metagenomic data, respectively. The experimental results of applying RNA-CODE to RNA-seq of Arabidopsis and a metagenomic data set sampled from human guts demonstrate that RNA-CODE competes favorably in both sensitivity and specificity with other tools. The source codes of RNA-CODE can be downloaded at http://www.cse.msu.edu/~chengy/RNA_CODE.",2013-10-25 +25262153,switchBox: an R package for k-Top Scoring Pairs classifier development.,"

Unlabelled

k-Top Scoring Pairs (kTSP) is a classification method for prediction from high-throughput data based on a set of the paired measurements. Each of the two possible orderings of a pair of measurements (e.g. a reversal in the expression of two genes) is associated with one of two classes. The kTSP prediction rule is the aggregation of voting among such individual two-feature decision rules based on order switching. kTSP, like its predecessor, Top Scoring Pair (TSP), is a parameter-free classifier relying only on ranking of a small subset of features, rendering it robust to noise and potentially easy to interpret in biological terms. In contrast to TSP, kTSP has comparable accuracy to standard genomics classification techniques, including Support Vector Machines and Prediction Analysis for Microarrays. Here, we describe 'switchBox', an R package for kTSP-based prediction.

Availability

The 'switchBox' package is freely available from Bioconductor: http://www.bioconductor.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-26 +27517294,Occupational Asbestos Exposure and Incidence of Colon and Rectal Cancers in French Men: The Asbestos-Related Diseases Cohort (ARDCo-Nut).,"

Background

The relationships between asbestos exposure and colorectal cancer remain controversial.

Objectives

We examined the association between asbestos exposure and colorectal cancer incidence.

Methods

Volunteer retired workers previously exposed to asbestos were invited to participate in the French ARDCo screening program between 2003 and 2005. Additional data on risk factors for colorectal cancer were collected from the ARDCo-Nut subsample of 3,769 participants in 2011. Cases of colon and rectal cancer were ascertained each year through 2014 based on eligibility for free medical care following a cancer diagnosis. Survival regression based on the Cox model was used to estimate the relative risk of colon and rectal cancer separately, in relation to the time since first exposure (TSFE) and cumulative exposure index (CEI) to asbestos, and with adjustment for smoking in the overall cohort and for smoking, and certain risk factors for these cancers in the ARDCo-Nut subsample.

Results

Mean follow-up was 10.2 years among 14,515 men, including 181 colon cancer and 62 rectal cancer cases (41 and 17, respectively, in the ARDCo-Nut subsample). In the overall cohort, after adjusting for smoking, colon cancer was significantly associated with cumulative exposure (HR = 1.14; 95% CI: 1.04, 1.26 for a 1-unit increase in ln-CEI) and ≥ 20-40 years since first exposure (HR = 4.67; 95% CI: 1.92, 11.46 vs. 0-20 years TSFE), and inversely associated with 60 years TSFE (HR = 0.26; 95% CI: 0.10, 0.70). Although rectal cancer was also associated with TSFE 20-40 years (HR = 4.57; 95% CI: 1.14, 18.27), it was not associated with ln-CEI, but these findings must be interpreted cautiously due to the small number of cases.

Conclusions

Our findings provide support for an association between occupational exposure to asbestos and colon cancer incidence in men. Citation: Paris C, Thaon I, Hérin F, Clin B, Lacourt A, Luc A, Coureau G, Brochard P, Chamming's S, Gislard A, Galan P, Hercberg S, Wild P, Pairon JC, Andujar P. 2017. Occupational asbestos exposure and incidence of colon and rectal cancers in French men: the Asbestos-Related Diseases Cohort (ARDCo-Nut). Environ Health Perspect 125:409-415; http://dx.doi.org/10.1289/EHP153.",2016-08-12 +25503233,Genome-wide characterization of the routes to pluripotency.,"Somatic cell reprogramming to a pluripotent state continues to challenge many of our assumptions about cellular specification, and despite major efforts, we lack a complete molecular characterization of the reprograming process. To address this gap in knowledge, we generated extensive transcriptomic, epigenomic and proteomic data sets describing the reprogramming routes leading from mouse embryonic fibroblasts to induced pluripotency. Through integrative analysis, we reveal that cells transition through distinct gene expression and epigenetic signatures and bifurcate towards reprogramming transgene-dependent and -independent stable pluripotent states. Early transcriptional events, driven by high levels of reprogramming transcription factor expression, are associated with widespread loss of histone H3 lysine 27 (H3K27me3) trimethylation, representing a general opening of the chromatin state. Maintenance of high transgene levels leads to re-acquisition of H3K27me3 and a stable pluripotent state that is alternative to the embryonic stem cell (ESC)-like fate. Lowering transgene levels at an intermediate phase, however, guides the process to the acquisition of ESC-like chromatin and DNA methylation signature. Our data provide a comprehensive molecular description of the reprogramming routes and is accessible through the Project Grandiose portal at http://www.stemformatics.org.",2014-12-01 +24267485,Towards human-computer synergetic analysis of large-scale biological data.,"

Background

Advances in technology have led to the generation of massive amounts of complex and multifarious biological data in areas ranging from genomics to structural biology. The volume and complexity of such data leads to significant challenges in terms of its analysis, especially when one seeks to generate hypotheses or explore the underlying biological processes. At the state-of-the-art, the application of automated algorithms followed by perusal and analysis of the results by an expert continues to be the predominant paradigm for analyzing biological data. This paradigm works well in many problem domains. However, it also is limiting, since domain experts are forced to apply their instincts and expertise such as contextual reasoning, hypothesis formulation, and exploratory analysis after the algorithm has produced its results. In many areas where the organization and interaction of the biological processes is poorly understood and exploratory analysis is crucial, what is needed is to integrate domain expertise during the data analysis process and use it to drive the analysis itself.

Results

In context of the aforementioned background, the results presented in this paper describe advancements along two methodological directions. First, given the context of biological data, we utilize and extend a design approach called experiential computing from multimedia information system design. This paradigm combines information visualization and human-computer interaction with algorithms for exploratory analysis of large-scale and complex data. In the proposed approach, emphasis is laid on: (1) allowing users to directly visualize, interact, experience, and explore the data through interoperable visualization-based and algorithmic components, (2) supporting unified query and presentation spaces to facilitate experimentation and exploration, (3) providing external contextual information by assimilating relevant supplementary data, and (4) encouraging user-directed information visualization, data exploration, and hypotheses formulation. Second, to illustrate the proposed design paradigm and measure its efficacy, we describe two prototype web applications. The first, called XMAS (Experiential Microarray Analysis System) is designed for analysis of time-series transcriptional data. The second system, called PSPACE (Protein Space Explorer) is designed for holistic analysis of structural and structure-function relationships using interactive low-dimensional maps of the protein structure space. Both these systems promote and facilitate human-computer synergy, where cognitive elements such as domain knowledge, contextual reasoning, and purpose-driven exploration, are integrated with a host of powerful algorithmic operations that support large-scale data analysis, multifaceted data visualization, and multi-source information integration.

Conclusions

The proposed design philosophy, combines visualization, algorithmic components and cognitive expertise into a seamless processing-analysis-exploration framework that facilitates sense-making, exploration, and discovery. Using XMAS, we present case studies that analyze transcriptional data from two highly complex domains: gene expression in the placenta during human pregnancy and reaction of marine organisms to heat stress. With PSPACE, we demonstrate how complex structure-function relationships can be explored. These results demonstrate the novelty, advantages, and distinctions of the proposed paradigm. Furthermore, the results also highlight how domain insights can be combined with algorithms to discover meaningful knowledge and formulate evidence-based hypotheses during the data analysis process. Finally, user studies against comparable systems indicate that both XMAS and PSPACE deliver results with better interpretability while placing lower cognitive loads on the users. XMAS is available at: http://tintin.sfsu.edu:8080/xmas. PSPACE is available at: http://pspace.info/.",2013-10-09 +24905018,GWAS in a box: statistical and visual analytics of structured associations via GenAMap.,"With the continuous improvement in genotyping and molecular phenotyping technology and the decreasing typing cost, it is expected that in a few years, more and more clinical studies of complex diseases will recruit thousands of individuals for pan-omic genetic association analyses. Hence, there is a great need for algorithms and software tools that could scale up to the whole omic level, integrate different omic data, leverage rich structure information, and be easily accessible to non-technical users. We present GenAMap, an interactive analytics software platform that 1) automates the execution of principled machine learning methods that detect genome- and phenome-wide associations among genotypes, gene expression data, and clinical or other macroscopic traits, and 2) provides new visualization tools specifically designed to aid in the exploration of association mapping results. Algorithmically, GenAMap is based on a new paradigm for GWAS and PheWAS analysis, termed structured association mapping, which leverages various structures in the omic data. We demonstrate the function of GenAMap via a case study of the Brem and Kruglyak yeast dataset, and then apply it on a comprehensive eQTL analysis of the NIH heterogeneous stock mice dataset and report some interesting findings. GenAMap is available from http://sailing.cs.cmu.edu/genamap.",2014-06-06 +23766290,INMEX--a web-based tool for integrative meta-analysis of expression data.,"The widespread applications of various 'omics' technologies in biomedical research together with the emergence of public data repositories have resulted in a plethora of data sets for almost any given physiological state or disease condition. Properly combining or integrating these data sets with similar basic hypotheses can help reduce study bias, increase statistical power and improve overall biological understanding. However, the difficulties in data management and the complexities of analytical approaches have significantly limited data integration to enable meta-analysis. Here, we introduce integrative meta-analysis of expression data (INMEX), a user-friendly web-based tool designed to support meta-analysis of multiple gene-expression data sets, as well as to enable integration of data sets from gene expression and metabolomics experiments. INMEX contains three functional modules. The data preparation module supports flexible data processing, annotation and visualization of individual data sets. The statistical analysis module allows researchers to combine multiple data sets based on P-values, effect sizes, rank orders and other features. The significant genes can be examined in functional analysis module for enriched Gene Ontology terms or Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways, or expression profile visualization. INMEX has built-in support for common gene/metabolite identifiers (IDs), as well as 45 popular microarray platforms for human, mouse and rat. Complex operations are performed through a user-friendly web interface in a step-by-step manner. INMEX is freely available at http://www.inmex.ca.",2013-06-12 +21619655,"LabKey Server NAb: a tool for analyzing, visualizing and sharing results from neutralizing antibody assays.","

Background

Multiple types of assays allow sensitive detection of virus-specific neutralizing antibodies. For example, the extent of antibody neutralization of HIV-1, SIV and SHIV can be measured in the TZM-bl cell line through the degree of luciferase reporter gene expression after infection. In the past, neutralization curves and titers for this standard assay have been calculated using an Excel macro. Updating all instances of such a macro with new techniques can be unwieldy and introduce non-uniformity across multi-lab teams. Using Excel also poses challenges in centrally storing, sharing and associating raw data files and results.

Results

We present LabKey Server's NAb tool for organizing, analyzing and securely sharing data, files and results for neutralizing antibody (NAb) assays, including the luciferase-based TZM-bl NAb assay. The customizable tool supports high-throughput experiments and includes a graphical plate template designer, allowing researchers to quickly adapt calculations to new plate layouts. The tool calculates the percent neutralization for each serum dilution based on luminescence measurements, fits a range of neutralization curves to titration results and uses these curves to estimate the neutralizing antibody titers for benchmark dilutions. Results, curve visualizations and raw data files are stored in a database and shared through a secure, web-based interface. NAb results can be integrated with other data sources based on sample identifiers. It is simple to make results public after publication by updating folder security settings.

Conclusions

Standardized tools for analyzing, archiving and sharing assay results can improve the reproducibility, comparability and reliability of results obtained across many labs. LabKey Server and its NAb tool are freely available as open source software at http://www.labkey.com under the Apache 2.0 license. Many members of the HIV research community can also access the LabKey Server NAb tool without installing the software by using the Atlas Science Portal (https://atlas.scharp.org). Atlas is an installation of LabKey Server.",2011-05-27 +24650594,Fast and accurate modelling of longitudinal and repeated measures neuroimaging data.,"Despite the growing importance of longitudinal data in neuroimaging, the standard analysis methods make restrictive or unrealistic assumptions (e.g., assumption of Compound Symmetry--the state of all equal variances and equal correlations--or spatially homogeneous longitudinal correlations). While some new methods have been proposed to more accurately account for such data, these methods are based on iterative algorithms that are slow and failure-prone. In this article, we propose the use of the Sandwich Estimator method which first estimates the parameters of interest with a simple Ordinary Least Square model and second estimates variances/covariances with the ""so-called"" Sandwich Estimator (SwE) which accounts for the within-subject correlation existing in longitudinal data. Here, we introduce the SwE method in its classic form, and we review and propose several adjustments to improve its behaviour, specifically in small samples. We use intensive Monte Carlo simulations to compare all considered adjustments and isolate the best combination for neuroimaging data. We also compare the SwE method to other popular methods and demonstrate its strengths and weaknesses. Finally, we analyse a highly unbalanced longitudinal dataset from the Alzheimer's Disease Neuroimaging Initiative and demonstrate the flexibility of the SwE method to fit within- and between-subject effects in a single model. Software implementing this SwE method has been made freely available at http://warwick.ac.uk/tenichols/SwE.",2014-03-18 +24451213,PIPE-CLIP: a comprehensive online tool for CLIP-seq data analysis.,"CLIP-seq is widely used to study genome-wide interactions between RNA-binding proteins and RNAs. However, there are few tools available to analyze CLIP-seq data, thus creating a bottleneck to the implementation of this methodology. Here, we present PIPE-CLIP, a Galaxy framework-based comprehensive online pipeline for reliable analysis of data generated by three types of CLIP-seq protocol: HITS-CLIP, PAR-CLIP and iCLIP. PIPE-CLIP provides both data processing and statistical analysis to determine candidate cross-linking regions, which are comparable to those regions identified from the original studies or using existing computational tools. PIPE-CLIP is available at http://pipeclip.qbrc.org/.",2014-01-22 +24919879,Blue: correcting sequencing errors using consensus and context.,"

Motivation

Bioinformatics tools, such as assemblers and aligners, are expected to produce more accurate results when given better quality sequence data as their starting point. This expectation has led to the development of stand-alone tools whose sole purpose is to detect and remove sequencing errors. A good error-correcting tool would be a transparent component in a bioinformatics pipeline, simply taking sequence data in any of the standard formats and producing a higher quality version of the same data containing far fewer errors. It should not only be able to correct all of the types of errors found in real sequence data (substitutions, insertions, deletions and uncalled bases), but it has to be both fast enough and scalable enough to be usable on the large datasets being produced by current sequencing technologies, and work on data derived from both haploid and diploid organisms.

Results

This article presents Blue, an error-correction algorithm based on k-mer consensus and context. Blue can correct substitution, deletion and insertion errors, as well as uncalled bases. It accepts both FASTQ and FASTA formats, and corrects quality scores for corrected bases. Blue also maintains the pairing of reads, both within a file and between pairs of files, making it compatible with downstream tools that depend on read pairing. Blue is memory efficient, scalable and faster than other published tools, and usable on large sequencing datasets. On the tests undertaken, Blue also proved to be generally more accurate than other published algorithms, resulting in more accurately aligned reads and the assembly of longer contigs containing fewer errors. One significant feature of Blue is that its k-mer consensus table does not have to be derived from the set of reads being corrected. This decoupling makes it possible to correct one dataset, such as small set of 454 mate-pair reads, with the consensus derived from another dataset, such as Illumina reads derived from the same DNA sample. Such cross-correction can greatly improve the quality of small (and expensive) sets of long reads, leading to even better assemblies and higher quality finished genomes.

Availability and implementation

The code for Blue and its related tools are available from http://www.bioinformatics.csiro.au/Blue. These programs are written in C# and run natively under Windows and under Mono on Linux.",2014-06-11 +24214989,Assembly information services in the European Nucleotide Archive.,"The European Nucleotide Archive (ENA; http://www.ebi.ac.uk/ena) is a repository for the world public domain nucleotide sequence data output. ENA content covers a spectrum of data types including raw reads, assembly data and functional annotation. ENA has faced a dramatic growth in genome assembly submission rates, data volumes and complexity of datasets. This has prompted a broad reworking of assembly submission services, for which we now reach the end of a major programme of work and many enhancements have already been made available over the year to components of the submission service. In this article, we briefly review ENA content and growth over 2013, describe our rapidly developing services for genome assembly information and outline further major developments over the last year.",2013-11-08 +25786896,"A Comprehensive, Automatically Updated Fungal ITS Sequence Dataset for Reference-Based Chimera Control in Environmental Sequencing Efforts.","The nuclear ribosomal internal transcribed spacer (ITS) region is the most commonly chosen genetic marker for the molecular identification of fungi in environmental sequencing and molecular ecology studies. Several analytical issues complicate such efforts, one of which is the formation of chimeric-artificially joined-DNA sequences during PCR amplification or sequence assembly. Several software tools are currently available for chimera detection, but rely to various degrees on the presence of a chimera-free reference dataset for optimal performance. However, no such dataset is available for use with the fungal ITS region. This study introduces a comprehensive, automatically updated reference dataset for fungal ITS sequences based on the UNITE database for the molecular identification of fungi. This dataset supports chimera detection throughout the fungal kingdom and for full-length ITS sequences as well as partial (ITS1 or ITS2 only) datasets. The performance of the dataset on a large set of artificial chimeras was above 99.5%, and we subsequently used the dataset to remove nearly 1,000 compromised fungal ITS sequences from public circulation. The dataset is available at http://unite.ut.ee/repository.php and is subject to web-based third-party curation.",2015-03-19 +25585022,Waxholm Space atlas of the rat brain hippocampal region: three-dimensional delineations based on magnetic resonance and diffusion tensor imaging.,"Atlases of the rat brain are widely used as reference for orientation, planning of experiments, and as tools for assigning location to experimental data. Improved quality and use of magnetic resonance imaging (MRI) and other tomographical imaging techniques in rats have allowed the development of new three-dimensional (3-D) volumetric brain atlas templates. The rat hippocampal region is a commonly used model for basic research on memory and learning, and for preclinical investigations of brain disease. The region features a complex anatomical organization with multiple subdivisions that can be identified on the basis of specific cytoarchitectonic or chemoarchitectonic criteria. We here investigate the extent to which it is possible to identify boundaries of divisions of the hippocampal region on the basis of high-resolution MRI contrast. We present the boundaries of 13 divisions, identified and delineated based on multiple types of image contrast observed in the recently published Waxholm Space MRI/DTI template for the Sprague Dawley rat brain (Papp et al., Neuroimage 97:374-386, 2014). The new detailed delineations of the hippocampal formation and parahippocampal region (Waxholm Space atlas of the Sprague Dawley rat brain, v2.0) are shared via the INCF Software Center (http://software.incf.org/), where also the MRI/DTI reference template is available. The present update of the Waxholm Space atlas of the rat brain is intended to facilitate interpretation, analysis, and integration of experimental data from this anatomically complex region.",2015-01-10 +22110041,ProtChemSI: a network of protein-chemical structural interactions.,"Progress in structure determination methods means that the set of experimentally determined 3D structures of proteins in complex with small molecules is growing exponentially. ProtChemSI exploits and extends this useful set of structures by both collecting and annotating the existing data as well as providing models of potential complexes inferred by protein or chemical structure similarity. The database currently includes 7704 proteins from 1803 organisms, 11,324 chemical compounds and 202, 289 complexes including 178,974 predicted. It is publicly available at http://pcidb.russelllab.org.",2011-11-21 +24946880,Automated peptide mapping and protein-topographical annotation of proteomics data.,"

Background

In quantitative proteomics, peptide mapping is a valuable approach to combine positional quantitative information with topographical and domain information of proteins. Quantitative proteomic analysis of cell surface shedding is an exemplary application area of this approach.

Results

We developed ImproViser ( http://www.improviser.uni-freiburg.de) for fully automated peptide mapping of quantitative proteomics data in the protXML data. The tool generates sortable and graphically annotated output, which can be easily shared with further users. As an exemplary application, we show its usage in the proteomic analysis of regulated intramembrane proteolysis.

Conclusion

ImproViser is the first tool to enable automated peptide mapping of the widely-used protXML format.",2014-06-19 +22383735,GENI-DB: a database of global events for epidemic intelligence.,"

Unlabelled

We present a novel public health database (GENI-DB) in which news events on the topic of over 176 infectious diseases and chemicals affecting human and animal health are compiled from surveillance of the global online news media in 10 languages. News event frequency data were gathered systematically through the BioCaster public health surveillance system from July 2009 to the present and is available to download by the research community for purposes of analyzing trends in the global burden of infectious diseases. Database search can be conducted by year, country, disease and language.

Availability

The GENI-DB is freely available via a web portal at http://born.nii.ac.jp/.",2012-03-01 +22338386,Non-invasive grading of astrocytic tumours from the relative contents of myo-inositol and glycine measured by in vivo MRS.,"MRI and MRS are established methodologies for evaluating intracranial lesions. One MR spectral feature suggested for in vivo grading of astrocytic tumours is the apparent myo-lnositol (ml) intensity (ca 3.55 ppm) at short echo times, although glycine (gly) may also contribute in vivo to this resonance. The purpose of this study was to quantitatively evaluate the ml + gly contribution to the recorded spectral pattern in vivo and correlate it with in vitro data obtained from perchloric acid extraction of tumour biopsies. Patient spectra (n = 95) at 1.5T at short (20-31 ms) and long (135-136 ms) echo times were obtained from the INTERPRET MRS database (http://gabrmn.uab.eslinterpretvalidateddbl). Phantom spectra were acquired with a comparable protocol. Spectra were automatically processed and the ratios of the (ml + gly) to Cr peak heights ((ml + gly)/Cr) calculated. Perchloric acid extracts of brain tumour biopsies were analysed by high-resolution NMR at 9.4T. The ratio (ml + gly)/Cr decreased significantly with astrocytic grade in vivo between low-grade astrocytoma (A2) and glioblastoma multiforme (GBM). In vitro results displayed a somewhat different tendency, with anaplastic astrocytomas having significantly higher (ml + gly)/Cr than A2 and GBM. The discrepancy between in vivo and in vitro data suggests that the NMR visibility of glycine in glial brain tumours is restricted in vivo.",2011-11-01 +22002696,Pathos: a web facility that uses metabolic maps to display experimental changes in metabolites identified by mass spectrometry.,"This work describes a freely available web-based facility which can be used to analyse raw or processed mass spectrometric data from metabolomics experiments and display the metabolites identified--and changes in their experimental abundance--in the context of the metabolic pathways in which they occur. The facility, Pathos (http://motif.gla.ac.uk/Pathos/), employs Java servlets and is underpinned by a relational database populated from the Kyoto Encyclopaedia of Genes and Genomes (KEGG). Input files can contain either raw m/z values from experiments conducted in different modes, or KEGG or MetaCyc IDs assigned by the user on the basis of the m/z values and other criteria. The textual output lists the KEGG pathways on an XHTML page according to the number of metabolites or potential metabolites that they contain. Filtering by organism is also available. For metabolic pathways of interest, the user is able to retrieve a pathway map with identified metabolites highlighted. A particular feature of Pathos is its ability to process relative quantification data for metabolites identified under different experimental conditions, and to present this in an easily comprehensible manner. Results are colour-coded according to the degree of experimental change, and bar charts of the results can be generated interactively from either the text listings or the pathway maps. The visual presentation of the output from Pathos is designed to allow the rapid identification of metabolic areas of potential interest, after which particular results may be examined in detail.",2011-11-01 +25304781,PhosphoPICK: modelling cellular context to map kinase-substrate phosphorylation events.,"

Motivation

The determinants of kinase-substrate phosphorylation can be found both in the substrate sequence and the surrounding cellular context. Cell cycle progression, interactions with mediating proteins and even prior phosphorylation events are necessary for kinases to maintain substrate specificity. While much work has focussed on the use of sequence-based methods to predict phosphorylation sites, there has been very little work invested into the application of systems biology to understand phosphorylation. Lack of specificity in many kinase substrate binding motifs means that sequence methods for predicting kinase binding sites are susceptible to high false-positive rates.

Results

We present here a model that takes into account protein-protein interaction information, and protein abundance data across the cell cycle to predict kinase substrates for 59 human kinases that are representative of important biological pathways. The model shows high accuracy for substrate prediction (with an average AUC of 0.86) across the 59 kinases tested. When using the model to complement sequence-based kinase-specific phosphorylation site prediction, we found that the additional information increased prediction performance for most comparisons made, particularly on kinases from the CMGC family. We then used our model to identify functional overlaps between predicted CDK2 substrates and targets from the E2F family of transcription factors. Our results demonstrate that a model harnessing context data can account for the short-falls in sequence information and provide a robust description of the cellular events that regulate protein phosphorylation.

Availability and implementation

The method is freely available online as a web server at the website http://bioinf.scmb.uq.edu.au/phosphopick.

Contact

m.boden@uq.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-09 +22350090,Development of gene-based markers and construction of an integrated linkage map in eggplant by using Solanum orthologous (SOL) gene sets.,"We constructed an integrated DNA marker linkage map of eggplant (Solanum melongena L.) using DNA marker segregation data sets obtained from two independent intraspecific F(2) populations. The linkage map consisted of 12 linkage groups and encompassed 1,285.5 cM in total. We mapped 952 DNA markers, including 313 genomic SSR markers developed by random sequencing of simple sequence repeat (SSR)-enriched genomic libraries, and 623 single-nucleotide polymorphisms (SNP) and insertion/deletion polymorphisms (InDels) found in eggplant-expressed sequence tags (ESTs) and related genomic sequences [introns and untranslated regions (UTRs)]. Because of their co-dominant inheritance and their highly polymorphic and multi-allelic nature, the SSR markers may be more versatile than the SNP and InDel markers for map-based genetic analysis of any traits of interest using segregating populations derived from any intraspecific crosses of practical breeding materials. However, we found that the distribution of microsatellites in the genome was biased to some extent, and therefore a considerable part of the eggplant genome was first detected when gene-derived SNP and InDel markers were mapped. Of the 623 SNP and InDel markers mapped onto the eggplant integrated map, 469 were derived from eggplant unigenes contained within Solanum orthologous (SOL) gene sets (i.e., sets of orthologous unigenes from eggplant, tomato, and potato). Out of the 469 markers, 326 could also be mapped onto the tomato map. These common markers will be informative landmarks for the transfer of tomato's more saturated genomic information to eggplant and will also provide comparative information on the genome organization of the two solanaceous species. The data are available from the DNA marker database of vegetables, VegMarks (http://vegmarks.nivot.affrc.go.jp).",2012-02-16 +25075117,Greater power and computational efficiency for kernel-based association testing of sets of genetic variants.,"

Motivation

Set-based variance component tests have been identified as a way to increase power in association studies by aggregating weak individual effects. However, the choice of test statistic has been largely ignored even though it may play an important role in obtaining optimal power. We compared a standard statistical test-a score test-with a recently developed likelihood ratio (LR) test. Further, when correction for hidden structure is needed, or gene-gene interactions are sought, state-of-the art algorithms for both the score and LR tests can be computationally impractical. Thus we develop new computationally efficient methods.

Results

After reviewing theoretical differences in performance between the score and LR tests, we find empirically on real data that the LR test generally has more power. In particular, on 15 of 17 real datasets, the LR test yielded at least as many associations as the score test-up to 23 more associations-whereas the score test yielded at most one more association than the LR test in the two remaining datasets. On synthetic data, we find that the LR test yielded up to 12% more associations, consistent with our results on real data, but also observe a regime of extremely small signal where the score test yielded up to 25% more associations than the LR test, consistent with theory. Finally, our computational speedups now enable (i) efficient LR testing when the background kernel is full rank, and (ii) efficient score testing when the background kernel changes with each test, as for gene-gene interaction tests. The latter yielded a factor of 2000 speedup on a cohort of size 13 500.

Availability

Software available at http://research.microsoft.com/en-us/um/redmond/projects/MSCompBio/Fastlmm/.

Contact

heckerma@microsoft.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-29 +25886899,Measuring semantic similarities by combining gene ontology annotations and gene co-function networks.,"

Background

Gene Ontology (GO) has been used widely to study functional relationships between genes. The current semantic similarity measures rely only on GO annotations and GO structure. This limits the power of GO-based similarity because of the limited proportion of genes that are annotated to GO in most organisms.

Results

We introduce a novel approach called NETSIM (network-based similarity measure) that incorporates information from gene co-function networks in addition to using the GO structure and annotations. Using metabolic reaction maps of yeast, Arabidopsis, and human, we demonstrate that NETSIM can improve the accuracy of GO term similarities. We also demonstrate that NETSIM works well even for genomes with sparser gene annotation data. We applied NETSIM on large Arabidopsis gene families such as cytochrome P450 monooxygenases to group the members functionally and show that this grouping could facilitate functional characterization of genes in these families.

Conclusions

Using NETSIM as an example, we demonstrated that the performance of a semantic similarity measure could be significantly improved after incorporating genome-specific information. NETSIM incorporates both GO annotations and gene co-function network data as a priori knowledge in the model. Therefore, functional similarities of GO terms that are not explicitly encoded in GO but are relevant in a taxon-specific manner become measurable when GO annotations are limited. Supplementary information and software are available at http://www.msu.edu/~jinchen/NETSIM .",2015-02-14 +22677585,E-learning resources for vascular surgeons: a needs analysis study.,"

Objectives

To obtain the views of vascular surgeons about online resources in their specialty as a guide to future e-learning development.

Design

A focused questionnaire regarding e-learning resources in vascular surgery was circulated online. A combination of structured and open-ended questions addressed users' ranking of various resource types, examples of presently used websites, suggestions for future growth, and the opportunity to become actively involved in e-learning development. The responses were collected over a 4-week period and remained anonymous.

Setting

The study was conducted online at http://www.vasculareducation.com as part of an ongoing project on e-learning for vascular surgeons by the Department of Educational Development and Research, Faculty of Health, Medicine and Life Sciences, Maastricht University, Maastricht, The Netherlands.

Participants

The survey population consisted of vascular surgeons and surgical trainees in Europe. The participants were contacted via their membership of the European Society for Vascular Surgery and national academic or administrative vascular surgical organizations. Demographic information was collected about clinical seniority and country of work.

Results

In all, 252 responses were obtained. Respondents favored the development of a variety of online resources in vascular surgery. The strongest demand was for illustrations and videos of surgical techniques, followed by an interactive calendar and peer-reviewed multiple-choice questions. Overall, 46% of respondents wished to contribute actively toward e-learning development, with consultants being more willing than trainees to do so.

Conclusions

Members of the vascular surgical community value online resources in their specialty, especially for procedural techniques. Vascular surgeons would like to be actively involved in subsequent development of e-learning resources.",2012-03-10 +23162084,An application of a relational database system for high-throughput prediction of elemental compositions from accurate mass values.,"

Summary

High-accuracy mass values detected by high-resolution mass spectrometry analysis enable prediction of elemental compositions, and thus are used for metabolite annotations in metabolomic studies. Here, we report an application of a relational database to significantly improve the rate of elemental composition predictions. By searching a database of pre-calculated elemental compositions with fixed kinds and numbers of atoms, the approach eliminates redundant evaluations of the same formula that occur in repeated calculations with other tools. When our approach is compared with HR2, which is one of the fastest tools available, our database search times were at least 109 times shorter than those of HR2. When a solid-state drive (SSD) was applied, the search time was 488 times shorter at 5 ppm mass tolerance and 1833 times at 0.1 ppm. Even if the search by HR2 was performed with 8 threads in a high-spec Windows 7 PC, the database search times were at least 26 and 115 times shorter without and with the SSD. These improvements were enhanced in a low spec Windows XP PC. We constructed a web service 'MFSearcher' to query the database in a RESTful manner.

Availability and implementation

Available for free at http://webs2.kazusa.or.jp/mfsearcher. The web service is implemented in Java, MySQL, Apache and Tomcat, with all major browsers supported.

Contact

sakurai@kazusa.or.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-18 +26448704,"The Myriapoda and Onychophora collection (MY) of the Muséum national d'Histoire naturelle (MNHN, Paris).","The Myriapoda and Onychophora collection dataset inventories the occurrence records of the collection of myriapods and onychophorans in the Muséum national d'Histoire naturelle, Paris. The dataset currently consists of 202 lots of onychophorans, representing all of those present, and almost ten thousand (9 795) lots of myriapods, representing 33 to 40% of the MNHN Myriapoda collection. This collection, which is of key historic importance, represents the results of two centuries of myriapod and onychophoran studies. The sources of the collection are worldwide, with a high representation for metropolitan France for the myriapods. None of the occurrences are yet georeferenced. Access to the dataset via the data portals of the MNHN and the GBIF has been made possible through the e-ReColNat project (ANR-11-INBS-0004). The Myriapoda and Onychophora collection of MNHN is actively expanding, hence both the collection and dataset are in continuous growth. The dataset can be accessed through the portals of GBIF at http://www.gbif.org/dataset/3287044c-8c48-4ad6-81d4-4908071bc8db and the MNHN at http://science.mnhn.fr/institution/mnhn/collection/my/item/search/form.",2015-08-25 +22682155,European and international collaboration in affinity proteomics.,"In affinity proteomics, specific protein-binding molecules (a.k.a. binders), principally antibodies, are applied as reagents in proteome analysis. In recent years, advances in binder technologies have created the potential for an unprecedented view on protein expression and distribution patterns in plasma, cells and tissues and increasingly on protein function. Particular strengths of affinity proteomics methods include detecting proteins in their natural environments of cell or tissue, high sensitivity and selectivity for detection of low abundance proteins and exploiting binding actions such as functional interference in living cells. To maximise the use and impact of affinity reagents, it will be essential to create comprehensive, standardised binder collections. With this in mind, the EU FP7 programme AFFINOMICS (http://www.affinomics.org), together with the preceding EU programmes ProteomeBinders and AffinityProteome, aims to extend affinity proteomics research by generating a large-scale resource of validated protein-binding molecules for characterisation of the human proteome. Activity is directed at producing binders to about 1000 protein targets, primarily in signal transduction and cancer, by establishing a high throughput, coordinated production pipeline. An important aspect of AFFINOMICS is the development of highly efficient recombinant selection methods, based on phage, cell and ribosome display, capable of producing high quality binders at greater throughput and lower cost than hitherto. The programme also involves development of innovative and sensitive technologies for specific detection of target proteins and their interactions, and deployment of binders in proteomics studies of clinical relevance. The need for such binder generation programmes is now recognised internationally, with parallel initiatives in the USA for cancer (NCI) and transcription factors (NIH) and within the Human Proteome Organisation (HUPO). The papers in this volume of New Biotechnology are all contributed by participants at the 5th ESF Workshop on Affinity Proteomics organised by the AFFINOMICS consortium and held in Alpbach, Austria, in March 2011.",2012-06-01 +27128028,Acute Impact of Hourly Ambient Air Pollution on Preterm Birth.,"

Background

Preterm birth is a major perinatal health problem, but factors leading to it are still not completely understood.

Objectives

Our goal was to identify the relation between acute increase in ambient air pollution in a few hours before onset of labor and the risk of preterm birth.

Methods

We collected registered birth outcome data and hourly ambient air pollution measurements during 2009‒2013 in Brisbane, Australia. Using a time-stratified case-crossover design and conditional logistic regression models with natural cubic splines, we assessed the shape of air pollution-preterm birth curve, after controlling for potential confounders. We also examined the effect modification of other factors.

Results

The association between air pollution [nitrogen dioxide (NO2), sulfur dioxide (SO2), and carbon monoxide (CO)] and preterm birth was nonlinear. Threshold concentrations for the mean of 0‒24 hr NO2, 24‒48 hr SO2, and 24‒48 hr CO before onset of labor were 7.6 parts per billion (ppb), 3.8 ppb, and 162.5 ppb, respectively. Increases in air pollution concentrations above thresholds were associated with increased risks of preterm birth. The odds ratios of preterm birth at the 95th percentile of NO2, SO2, and CO against the thresholds were 1.17 (95% CI: 1.08, 1.27), 1.01 (95% CI: 0.99, 1.04), and 1.18 (95% CI: 1.06, 1.32), respectively. The associations were modified by demographic factors, such as maternal smoking and socioeconomic status.

Conclusion

Acute increases in ambient air pollution concentrations above certain levels before onset of labor may stimulate preterm birth.

Citation

Li S, Guo Y, Williams G. 2016. Acute impact of hourly ambient air pollution on preterm birth. Environ Health Perspect 124:1623-1629; http://dx.doi.org/10.1289/EHP200.",2016-04-29 +26677964,Computational identification of piRNA targets on mouse mRNAs.,"

Motivation

PIWI-interacting RNAs (piRNAs) are a class of small non-coding RNAs that are highly abundant in the germline. One important role of piRNAs is to defend genome integrity by guiding PIWI proteins to silence transposable elements (TEs), which have a high potential to cause deleterious effects on their host. The mechanism of piRNA-mediated post-transcriptional silencing was also observed to affect mRNAs, suggesting that piRNAs might play a broad role in gene expression regulation. However, there has been no systematic report with regard to how many protein-coding genes might be targeted and regulated by piRNAs.

Results

We trained a support vector machine classifier based on a combination of Miwi CLIP-Seq-derived features and position-derived features to predict the potential targets of piRNAs on mRNAs in the mouse. Reanalysis of a published microarray dataset suggested that the expression level of the 2587 protein-coding genes predicted as piRNA targets showed significant upregulation as a whole after abolishing the slicer activity of Miwi, supporting the conclusion that they are subject to piRNA-mediated regulation.

Availability and implementation

A web version of the method called pirnaPre as well as our results for browse is available at http://www.regulatoryrna.org/software/piRNA/piRNA_target_mRNA/index.php

Contact

crs@sun5.ibp.ac.cn or heshunmin@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-16 +25887214,PathwayBooster: a tool to support the curation of metabolic pathways.,"

Background

Despite several recent advances in the automated generation of draft metabolic reconstructions, the manual curation of these networks to produce high quality genome-scale metabolic models remains a labour-intensive and challenging task.

Results

We present PathwayBooster, an open-source software tool to support the manual comparison and curation of metabolic models. It combines gene annotations from GenBank files and other sources with information retrieved from the metabolic databases BRENDA and KEGG to produce a set of pathway diagrams and reports summarising the evidence for the presence of a reaction in a given organism's metabolic network. By comparing multiple sources of evidence within a common framework, PathwayBooster assists the curator in the identification of likely false positive (misannotated enzyme) and false negative (pathway hole) reactions. Reaction evidence may be taken from alternative annotations of the same genome and/or a set of closely related organisms.

Conclusions

By integrating and visualising evidence from multiple sources, PathwayBooster reduces the manual effort required in the curation of a metabolic model. The software is available online at http://www.theosysbio.bio.ic.ac.uk/resources/pathwaybooster/ .",2015-03-15 +25895970,A Heterologous Reporter Defines the Role of the Tetanus Toxin Interchain Disulfide in Light-Chain Translocation.,"Botulinum neurotoxins (BoNTs) and tetanus toxin (TeNT) are the most potent toxins for humans and elicit unique pathologies due to their ability to traffic within motor neurons. BoNTs act locally within motor neurons to elicit flaccid paralysis, while retrograde TeNT traffics to inhibitory neurons within the central nervous system (CNS) to elicit spastic paralysis. BoNT and TeNT are dichain proteins linked by an interchain disulfide bond comprised of an N-terminal catalytic light chain (LC) and a C-terminal heavy chain (HC) that encodes an LC translocation domain (HCT) and a receptor-binding domain (HCR). LC translocation is the least understood property of toxin action, but it involves low pH, proteolysis, and an intact interchain disulfide bridge. Recently, Pirazzini et al. (FEBS Lett 587:150-155, 2013, http://dx.doi.org/10.1016/j.febslet.2012.11.007) observed that inhibitors of thioredoxin reductase (TrxR) blocked TeNT and BoNT action in cerebellar granular neurons. In the current study, an atoxic TeNT LC translocation reporter was engineered by fusing β-lactamase to the N terminus of TeNT [βlac-TeNT(RY)] to investigate LC translocation in primary cortical neurons and Neuro-2a cells. βlac-TeNT(RY) retained the interchain disulfide bond, showed ganglioside-dependent binding to neurons, required acidification to promote βlac translocation, and was sensitive to auranofin, an inhibitor of thioredoxin reductase. Mutation of βlac-TeNT(RY) at C439S and C467S eliminated the interchain disulfide bond and inhibited βlac translocation. These data support the requirement of an intact interchain disulfide for LC translocation and imply that disulfide reduction is a prerequisite for LC delivery into the host cytosol. The data also support a model that LC translocation proceeds from the C to the N terminus. βlac-TeNT(RY) is the first reporter system to measure translocation by an AB single-chain toxin in intact cells.",2015-04-20 +26382194,ScaffoldScaffolder: solving contig orientation via bidirected to directed graph reduction.,"

Motivation

The contig orientation problem, which we formally define as the MAX-DIR problem, has at times been addressed cursorily and at times using various heuristics. In setting forth a linear-time reduction from the MAX-CUT problem to the MAX-DIR problem, we prove the latter is NP-complete. We compare the relative performance of a novel greedy approach with several other heuristic solutions.

Results

Our results suggest that our greedy heuristic algorithm not only works well but also outperforms the other algorithms due to the nature of scaffold graphs. Our results also demonstrate a novel method for identifying inverted repeats and inversion variants, both of which contradict the basic single-orientation assumption. Such inversions have previously been noted as being difficult to detect and are directly involved in the genetic mechanisms of several diseases.

Availability and implementation

http://bioresearch.byu.edu/scaffoldscaffolder.

Contact

paulmbodily@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-17 +23339513,Systematic review and meta-analysis: influence of smoking cessation on incidence of pneumonia in HIV.,"

Background

Smoking is common in people infected with HIV but cessation support is not a routine part of clinical care. The aim was to assess whether smoking is a risk factor for pneumonia in people with HIV and whether smoking cessation ameliorates excess risk.

Methods

We performed MEDLINE and Embase database searches and included cohort or case-control studies conducted in adult patients infected with HIV extracting a hazard ratio (HR) or odds ratio (OR) that compared the incidence of bacterial pneumonia or pneumonia caused by Pneumocystis jiroveci (PCP) between two smoking categories. Studies were appraised for quality and combined using inverse variance meta-analysis.

Results

Fourteen cohort and case-control studies were included. Assessment of outcome was good, but assessment of exposure status was poor. Current smokers were at higher risk of bacterial pneumonia than former smokers: HR 1.37 (95% confidence interval (CI): 1.06, 1.78). There was no evidence that former smokers were at higher risk than never smokers: HR 1.24 (95%CI: 0.96, 1.60). Current smokers were at higher risk of bacterial pneumonia than current non-smokers: HR of 1.73 (95%CI: 1.44, 2.06). There was no evidence that smoking increased the incidence of PCP. The HR for current versus non-smokers was 0.94 (95%CI: 0.79, 1.12), but from case-control studies the OR was 1.76 (95%CI: 1.25, 2.48) with heterogeneity. Confined to higher quality studies, the OR was 0.97 (95%CI: 0.81, 1.16). Residual confounding is possible, but available data suggest this is not an adequate explanation.

Conclusions

Smoking is a risk factor for bacterial pneumonia but not PCP and smoking cessation reduces this risk.See related article: http://www.biomedcentral.com/1741-7015/11/16.",2013-01-22 +23264352,The DegraBase: a database of proteolysis in healthy and apoptotic human cells.,"Proteolysis is a critical post-translational modification for regulation of cellular processes. Our lab has previously developed a technique for specifically labeling unmodified protein N termini, the α-aminome, using the engineered enzyme, subtiligase. Here we present a database, called the DegraBase (http://wellslab.ucsf.edu/degrabase/), which compiles 8090 unique N termini from 3206 proteins directly identified in subtiligase-based positive enrichment mass spectrometry experiments in healthy and apoptotic human cell lines. We include both previously published and unpublished data in our analysis, resulting in a total of 2144 unique α-amines identified in healthy cells, and 6990 in cells undergoing apoptosis. The N termini derive from three general categories of proteolysis with respect to cleavage location and functional role: translational N-terminal methionine processing (∼10% of total proteolysis), sites close to the translational N terminus that likely represent removal of transit or signal peptides (∼25% of total), and finally, other endoproteolytic cuts (∼65% of total). Induction of apoptosis causes relatively little change in the first two proteolytic categories, but dramatic changes are seen in endoproteolysis. For example, we observed 1706 putative apoptotic caspase cuts, more than double the total annotated sites in the CASBAH and MEROPS databases. In the endoproteolysis category, there are a total of nearly 3000 noncaspase nontryptic cleavages that are not currently reported in the MEROPS database. These studies significantly increase the annotation for all categories of proteolysis in human cells and allow public access for investigators to explore interesting proteolytic events in healthy and apoptotic human cells.",2012-12-20 +25633490,Comparative modeling and benchmarking data sets for human histone deacetylases and sirtuin families.,"Histone deacetylases (HDACs) are an important class of drug targets for the treatment of cancers, neurodegenerative diseases, and other types of diseases. Virtual screening (VS) has become fairly effective approaches for drug discovery of novel and highly selective histone deacetylase inhibitors (HDACIs). To facilitate the process, we constructed maximal unbiased benchmarking data sets for HDACs (MUBD-HDACs) using our recently published methods that were originally developed for building unbiased benchmarking sets for ligand-based virtual screening (LBVS). The MUBD-HDACs cover all four classes including Class III (Sirtuins family) and 14 HDAC isoforms, composed of 631 inhibitors and 24609 unbiased decoys. Its ligand sets have been validated extensively as chemically diverse, while the decoy sets were shown to be property-matching with ligands and maximal unbiased in terms of ""artificial enrichment"" and ""analogue bias"". We also conducted comparative studies with DUD-E and DEKOIS 2.0 sets against HDAC2 and HDAC8 targets and demonstrate that our MUBD-HDACs are unique in that they can be applied unbiasedly to both LBVS and SBVS approaches. In addition, we defined a novel metric, i.e. NLBScore, to detect the ""2D bias"" and ""LBVS favorable"" effect within the benchmarking sets. In summary, MUBD-HDACs are the only comprehensive and maximal-unbiased benchmark data sets for HDACs (including Sirtuins) that are available so far. MUBD-HDACs are freely available at http://www.xswlab.org/ .",2015-02-09 +25914847,Urinary tract infections and Candida albicans.,"

Introduction

Urinary tract candidiasis is known as the most frequent nosocomial fungal infection worldwide. Candida albicans is the most common cause of nosocomial fungal urinary tract infections; however, a rapid change in the distribution of Candida species is undergoing. Simultaneously, the increase of urinary tract candidiasis has led to the appearance of antifungal resistant Candida species. In this review, we have an in depth look into Candida albicans uropathogenesis and distribution of the three most frequent Candida species contributing to urinary tract candidiasis in different countries around the world.

Material and methods

For writing this review, Google Scholar -a scholarly search engine- (http://scholar.google.com/) and PubMed database (http://www.ncbi.nlm.nih.gov/pubmed/) were used. The most recently published original articles and reviews of literature relating to the first three Candida species causing urinary tract infections in different countries and the pathogenicity of Candida albicans were selected and studied.

Results

Although some studies show rapid changes in the uropathogenesis of Candida species causing urinary tract infections in some countries, Candida albicans is still the most important cause of candidal urinary tract infections.

Conclusions

Despite the ranking of Candida albicans as the dominant species for urinary tract candidiasis, specific changes have occurred in some countries. At this time, it is important to continue the surveillance related to Candida species causing urinary tract infections to prevent, control and treat urinary tract candidiasis in future.",2015-03-13 +24092766,iPEAP: integrating multiple omics and genetic data for pathway enrichment analysis.,"

Unlabelled

A challenge in biodata analysis is to understand the underlying phenomena among many interactions in signaling pathways. Such study is formulated as the pathway enrichment analysis, which identifies relevant pathways functional enriched in high-throughput data. The question faced here is how to analyze different data types in a unified and integrative way by characterizing pathways that these data simultaneously reveal. To this end, we developed integrative Pathway Enrichment Analysis Platform, iPEAP, which handles transcriptomics, proteomics, metabolomics and GWAS data under a unified aggregation schema. iPEAP emphasizes on the ability to aggregate various pathway enrichment results generated in different high-throughput experiments, as well as the quantitative measurements of different ranking results, thus providing the first benchmark platform for integration, comparison and evaluation of multiple types of data and enrichment methods.

Availability and implementation

iPEAP is freely available at http://www.tongji.edu.cn/∼qiliu/ipeap.html.",2013-10-03 +21541350,jMOTU and Taxonerator: turning DNA Barcode sequences into annotated operational taxonomic units.,"

Background

DNA barcoding and other DNA sequence-based techniques for investigating and estimating biodiversity require explicit methods for associating individual sequences with taxa, as it is at the taxon level that biodiversity is assessed. For many projects, the bioinformatic analyses required pose problems for laboratories whose prime expertise is not in bioinformatics. User-friendly tools are required for both clustering sequences into molecular operational taxonomic units (MOTU) and for associating these MOTU with known organismal taxonomies.

Results

Here we present jMOTU, a Java program for the analysis of DNA barcode datasets that uses an explicit, determinate algorithm to define MOTU. We demonstrate its usefulness for both individual specimen-based Sanger sequencing surveys and bulk-environment metagenetic surveys using long-read next-generation sequencing data. jMOTU is driven through a graphical user interface, and can analyse tens of thousands of sequences in a short time on a desktop computer. A companion program, Taxonerator, that adds traditional taxonomic annotation to MOTU, is also presented. Clustering and taxonomic annotation data are stored in a relational database, and are thus amenable to subsequent data mining and web presentation.

Conclusions

jMOTU efficiently and robustly identifies the molecular taxa present in survey datasets, and Taxonerator decorates the MOTU with putative identifications. jMOTU and Taxonerator are freely available from http://www.nematodes.org/.",2011-04-25 +23962734,Modeling visual working memory with the MemToolbox. ,"The MemToolbox is a collection of MATLAB functions for modeling visual working memory. In support of its goal to provide a full suite of data analysis tools, the toolbox includes implementations of popular models of visual working memory, real and simulated data sets, Bayesian and maximum likelihood estimation procedures for fitting models to data, visualizations of data and fit, validation routines, model comparison metrics, and experiment scripts. The MemToolbox is released under the permissive BSD license and is available at http://memtoolbox.org.",2013-08-20 +27112575,CompNet: a GUI based tool for comparison of multiple biological interaction networks.,"

Background

Network visualization and analysis tools aid in better understanding of complex biological systems. Furthermore, to understand the differences in behaviour of system(s) under various environmental conditions (e.g. stress, infection), comparing multiple networks becomes necessary. Such comparisons between multiple networks may help in asserting causation and in identifying key components of the studied biological system(s). Although many available network comparison methods exist, which employ techniques like network alignment and querying to compute pair-wise similarity between selected networks, most of them have limited features with respect to interactive visual comparison of multiple networks.

Results

In this paper, we present CompNet - a graphical user interface based network comparison tool, which allows visual comparison of multiple networks based on various network metrics. CompNet allows interactive visualization of the union, intersection and/or complement regions of a selected set of networks. Different visualization features (e.g. pie-nodes, edge-pie matrix, etc.) aid in easy identification of the key nodes/interactions and their significance across the compared networks. The tool also allows one to perform network comparisons on the basis of neighbourhood architecture of constituent nodes and community compositions, a feature particularly useful while analyzing biological networks. To demonstrate the utility of CompNet, we have compared a (time-series) human gene-expression dataset, post-infection by two strains of Mycobacterium tuberculosis, overlaid on the human protein-protein interaction network. Using various functionalities of CompNet not only allowed us to comprehend changes in interaction patterns over the course of infection, but also helped in inferring the probable fates of the host cells upon infection by the two strains.

Conclusions

CompNet is expected to be a valuable visual data mining tool and is freely available for academic use from http://metagenomics.atc.tcs.com/compnet/ or http://121.241.184.233/compnet/.",2016-04-26 +25282642,LocalAli: an evolutionary-based local alignment approach to identify functionally conserved modules in multiple networks.,"

Motivation

Sequences and protein interaction data are of significance to understand the underlying molecular mechanism of organisms. Local network alignment is one of key systematic ways for predicting protein functions, identifying functional modules and understanding the phylogeny from these data. Most of currently existing tools, however, encounter their limitations, which are mainly concerned with scoring scheme, speed and scalability. Therefore, there are growing demands for sophisticated network evolution models and efficient local alignment algorithms.

Results

We developed a fast and scalable local network alignment tool called LocalAli for the identification of functionally conserved modules in multiple networks. In this algorithm, we firstly proposed a new framework to reconstruct the evolution history of conserved modules based on a maximum-parsimony evolutionary model. By relying on this model, LocalAli facilitates interpretation of resulting local alignments in terms of conserved modules, which have been evolved from a common ancestral module through a series of evolutionary events. A meta-heuristic method simulated annealing was used to search for the optimal or near-optimal inner nodes (i.e. ancestral modules) of the evolutionary tree. To evaluate the performance and the statistical significance, LocalAli were tested on 26 real datasets and 1040 randomly generated datasets. The results suggest that LocalAli outperforms all existing algorithms in terms of coverage, consistency and scalability, meanwhile retains a high precision in the identification of functionally coherent subnetworks.

Availability

The source code and test datasets are freely available for download under the GNU GPL v3 license at https://code.google.com/p/localali/.

Contact

jialu.hu@fu-berlin.de or knut.reinert@fu-berlin.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-04 +25086003,Detection of active transcription factor binding sites with the combination of DNase hypersensitivity and histone modifications.,"

Motivation

The identification of active transcriptional regulatory elements is crucial to understand regulatory networks driving cellular processes such as cell development and the onset of diseases. It has recently been shown that chromatin structure information, such as DNase I hypersensitivity (DHS) or histone modifications, significantly improves cell-specific predictions of transcription factor binding sites. However, no method has so far successfully combined both DHS and histone modification data to perform active binding site prediction.

Results

We propose here a method based on hidden Markov models to integrate DHS and histone modifications occupancy for the detection of open chromatin regions and active binding sites. We have created a framework that includes treatment of genomic signals, model training and genome-wide application. In a comparative analysis, our method obtained a good trade-off between sensitivity versus specificity and superior area under the curve statistics than competing methods. Moreover, our technique does not require further training or sequence information to generate binding location predictions. Therefore, the method can be easily applied on new cell types and allow flexible downstream analysis such as de novo motif finding.

Availability and implementation

Our framework is available as part of the Regulatory Genomics Toolbox. The software information and all benchmarking data are available at http://costalab.org/wp/dh-hmm.

Contact

ivan.costa@rwth-aachen.de or eduardo.gusmao@rwth-aachen.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-08-01 +21647445,"HIVToolbox, an integrated web application for investigating HIV.","Many bioinformatic databases and applications focus on a limited domain of knowledge federating links to information in other databases. This segregated data structure likely limits our ability to investigate and understand complex biological systems. To facilitate research, therefore, we have built HIVToolbox, which integrates much of the knowledge about HIV proteins and allows virologists and structural biologists to access sequence, structure, and functional relationships in an intuitive web application. HIV-1 integrase protein was used as a case study to show the utility of this application. We show how data integration facilitates identification of new questions and hypotheses much more rapid and convenient than current approaches using isolated repositories. Several new hypotheses for integrase were created as an example, and we experimentally confirmed a predicted CK2 phosphorylation site. Weblink: [http://hivtoolbox.bio-toolkit.com].",2011-05-25 +23419361,Yes-associated protein up-regulates Jagged-1 and activates the Notch pathway in human hepatocellular carcinoma.,"

Background & aims

Cancer cells often lose contact inhibition to undergo anchorage-independent proliferation and become resistant to apoptosis by inactivating the Hippo signaling pathway, resulting in activation of the transcriptional co-activator yes-associated protein (YAP). However, the oncogenic mechanisms of YAP activity are unclear.

Methods

By using cross-species analysis of expression data, the Notch ligand Jagged-1 (Jag-1) was identified as a downstream target of YAP in hepatocytes and hepatocellular carcinoma (HCC) cells. We analyzed the functions of YAP in HCC cells via overexpression and RNA silencing experiments. We used transgenic mice that overexpressed a constitutively activated form of YAP (YAP(S127A)), and measured protein levels in HCC, colorectal and pancreatic tumor samples from patients.

Results

Human HCC cell lines and mouse hepatocytes that overexpress YAP(S127A) up-regulated Jag-1, leading to activation of the Notch pathway and increased proliferation. Induction of Jag-1, activation of Notch, and cell proliferation required binding of YAP to its transcriptional partner TEA domain family member 4 (TEAD4); TEAD4 binding required the Mst1/2 but not β-catenin signaling. Levels of YAP correlated with Jag-1 expression and Notch signaling in human tumor samples and correlated with shorter survival times of patients with HCC or colorectal cancer.

Conclusions

The transcriptional regulator YAP up-regulates Jag-1 to activate Notch signaling in HCC cells and mouse hepatocytes. YAP-dependent activity of Jag-1 and Notch correlate in human HCC and colorectal tumor samples with patient survival times, suggesting the use of YAP and Notch inhibitors as therapeutics for gastrointestinal cancer. Transcript profiling: microarray information was deposited at the Gene Expression Omnibus database (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?token=jxepvsumwosqkve&acc=GSE35004).",2013-02-16 +24695404,Trimmomatic: a flexible trimmer for Illumina sequence data.,"

Motivation

Although many next-generation sequencing (NGS) read preprocessing tools already existed, we could not find any tool or combination of tools that met our requirements in terms of flexibility, correct handling of paired-end data and high performance. We have developed Trimmomatic as a more flexible and efficient preprocessing tool, which could correctly handle paired-end data.

Results

The value of NGS read preprocessing is demonstrated for both reference-based and reference-free tasks. Trimmomatic is shown to produce output that is at least competitive with, and in many cases superior to, that produced by other tools, in all scenarios tested.

Availability and implementation

Trimmomatic is licensed under GPL V3. It is cross-platform (Java 1.5+ required) and available at http://www.usadellab.org/cms/index.php?page=trimmomatic

Contact

usadel@bio1.rwth-aachen.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-01 +25893715,Profile of European adults interested in internet-based personalised nutrition: the Food4Me study.,"

Purpose

Personalised interventions may have greater potential for reducing the global burden of non-communicable diseases and for promoting better health and well-being across the lifespan than the conventional ""one size fits all"" approach. However, the characteristics of individuals interested in personalised nutrition (PN) are unclear. Therefore, the aim of this study was to describe the characteristics of European adults interested in taking part in an internet-based PN study.

Methods

Individuals from seven European countries (UK, Ireland, Germany, The Netherlands, Spain, Greece and Poland) were invited to participate in the study via the Food4Me website ( http://www.food4me.org ). Two screening questionnaires were used to collect data on socio-demographic, anthropometric and health-related characteristics as well as dietary intakes.

Results

A total of 5662 individuals expressed an interest in the study (mean age 40 ± 12.7; range 15-87 years). Of these, 65 % were female and 97 % were Caucasian. Overall, 13 % were smokers and 47 % reported the presence of a clinically diagnosed disease. Furthermore, 47 % were overweight or obese and 35 % were sedentary during leisure time. Assessment of dietary intakes showed that 54 % of individuals reported consuming at least 5 portions of fruit and vegetables per day, 46 % consumed more than 3 servings of wholegrains and 37 % limited their salt intake to <5.75 g per day.

Conclusions

Our data indicate that individuals volunteering to participate in an internet-based PN study are broadly representative of the European adult population, most of whom had adequate nutrient intakes but could benefit from improved dietary choices and greater physical activity. Future use of internet-based PN approaches is thus relevant to a wide target audience.",2015-04-17 +26589279,CDSfold: an algorithm for designing a protein-coding sequence with the most stable secondary structure.,"

Motivation

An important problem in synthetic biology is to design a nucleotide sequence of an mRNA that confers a desirable expression level of a target protein. The secondary structure of protein-coding sequences (CDSs) is one potential factor that could have both positive and negative effects on protein production. To elucidate the role of secondary structure in CDSs, algorithms for manipulating secondary structure should be developed.

Results

We developed an algorithm for designing a CDS with the most stable secondary structure among all possible ones translated into the same protein, and implemented it as the program CDSfold. The algorithm runs the Zuker algorithm under the constraint of a given amino acid sequence. The time and space complexity is O(L(3)) and O(L(2)), respectively, where L is the length of the CDS to be designed. Although our algorithm is slower than the original Zuker algorithm, it could design a relatively long (2.7-kb) CDS in approximately 1 h.

Availability and implementation

The CDSfold program is freely available for non-commercial users as stand-alone and web-based software from http://cdsfold.trahed.jp/cdsfold/

Contacts

terai-goro@aist.go.jp or asai@k.u-tokyo.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-20 +24332632,Converting positive and negative symptom scores between PANSS and SAPS/SANS.,"The Scale for the Assessment of Positive Symptoms (SAPS), the Scale for the Assessment of Negative Symptoms (SANS), and the Positive and Negative Syndrome Scale for Schizophrenia (PANSS) are the most widely used schizophrenia symptom rating scales, but despite their co-existence for 25 years no easily usable between-scale conversion mechanism exists. The aim of this study was to provide equations for between-scale symptom rating conversions. Two-hundred-and-five schizophrenia patients [mean age±SD=39.5±11.6, 156 males] were assessed with the SANS, SAPS, and PANSS. Pearson's correlations between symptom scores from each of the scales were computed. Linear regression analyses, on data from 176 randomly selected patients, were performed to derive equations for converting ratings between the scales. Intraclass correlations, on data from the remaining 29 patients, not part of the regression analyses, were performed to determine rating conversion accuracy. Between-scale positive and negative symptom ratings were highly correlated. Intraclass correlations between the original positive and negative symptom ratings and those obtained via conversion of alternative ratings using the conversion equations were moderate to high (ICCs=0.65 to 0.91). Regression-based equations may be useful for conversion between schizophrenia symptom severity as measured by the SANS/SAPS and PANSS, though additional validation is warranted. This study's conversion equations, implemented at http:/converteasy.org, may aid in the comparison of medication efficacy studies, in meta- and mega-analyses examining symptoms as moderator variables, and in retrospective combination of symptom data in multi-center data sharing projects that need to pool symptom rating data when such data are obtained using different scales.",2013-12-11 +21864367,"Datgan, a reusable software system for facile interrogation and visualization of complex transcription profiling data.","

Background

We introduce Glaucoma Discovery Platform (GDP), an online environment for facile visualization and interrogation of complex transcription profiling datasets for glaucoma. We also report the availability of Datgan, the suite of scripts that was developed to construct GDP. This reusable software system complements existing repositories such as NCBI GEO or EBI ArrayExpress as it allows the construction of searchable databases to maximize understanding of user-selected transcription profiling datasets.

Description

Datgan scripts were used to construct both the underlying data tables and the web interface that form GDP. GDP is populated using data from a mouse model of glaucoma. The data was generated using the DBA/2J strain, a widely used mouse model of glaucoma. The DBA/2J-Gpnmb+ strain provided a genetically matched control strain that does not develop glaucoma. We separately assessed both the retina and the optic nerve head, important tissues in glaucoma. We used hierarchical clustering to identify early molecular stages of glaucoma that could not be identified using morphological assessment of disease. GDP has two components. First, an interactive search and retrieve component provides the ability to assess gene(s) of interest in all identified stages of disease in both the retina and optic nerve head. The output is returned in graphical and tabular format with statistically significant differences highlighted for easy visual analysis. Second, a bulk download component allows lists of differentially expressed genes to be retrieved as a series of files compatible with Excel. To facilitate access to additional information available for genes of interest, GDP is linked to selected external resources including Mouse Genome Informatics and Online Medelian Inheritance in Man (OMIM).

Conclusion

Datgan-constructed databases allow user-friendly access to datasets that involve temporally ordered stages of disease or developmental stages. Datgan and GDP are available from http://glaucomadb.jax.org/glaucoma.",2011-08-24 +25273503,CSI 2.0: a significantly improved version of the Chemical Shift Index.,"Protein chemical shifts have long been used by NMR spectroscopists to assist with secondary structure assignment and to provide useful distance and torsion angle constraint data for structure determination. One of the most widely used methods for secondary structure identification is called the Chemical Shift Index (CSI). The CSI method uses a simple digital chemical shift filter to locate secondary structures along the protein chain using backbone (13)C and (1)H chemical shifts. While the CSI method is simple to use and easy to implement, it is only about 75-80% accurate. Here we describe a significantly improved version of the CSI (2.0) that uses machine-learning techniques to combine all six backbone chemical shifts ((13)Cα, (13)Cβ, (13)C, (15)N, (1)HN, (1)Hα) with sequence-derived features to perform far more accurate secondary structure identification. Our tests indicate that CSI 2.0 achieved an average identification accuracy (Q3) of 90.56% for a training set of 181 proteins in a repeated tenfold cross-validation and 89.35% for a test set of 59 proteins. This represents a significant improvement over other state-of-the-art chemical shift-based methods. In particular, the level of performance of CSI 2.0 is equal to that of standard methods, such as DSSP and STRIDE, used to identify secondary structures via 3D coordinate data. This suggests that CSI 2.0 could be used both in providing accurate NMR constraint data in the early stages of protein structure determination as well as in defining secondary structure locations in the final protein model(s). A CSI 2.0 web server (http://csi.wishartlab.com) is available for submitting the input queries for secondary structure identification.",2014-10-02 +26717258,Prediction of logP for Pt(II) and Pt(IV) complexes: Comparison of statistical and quantum-chemistry based approaches.,"The octanol/water partition coefficient, logP, is one of the most important physico-chemical parameters for the development of new metal-based anticancer drugs with improved pharmacokinetic properties. This study addresses an issue with the absence of publicly available models to predict logP of Pt(IV) complexes. Following data collection and subsequent development of models based on 187 complexes from literature, we validate new and previously published models on a new set of 11 Pt(II) and 35 Pt(IV) complexes, which were kept blind during the model development step. The error of the consensus model, 0.65 for Pt(IV) and 0.37 for Pt(II) complexes, indicates its good accuracy of predictions. The lower accuracy for Pt(IV) complexes was attributed to experimental difficulties with logP measurements for some poorly-soluble compounds. This model was developed using general-purpose descriptors such as extended functional groups, molecular fragments and E-state indices. Surprisingly, models based on quantum-chemistry calculations provided lower prediction accuracy. We also found that all the developed models strongly overestimate logP values for the three complexes measured in the presence of DMSO. Considering that DMSO is frequently used as a solvent to store chemicals, its effect should not be overlooked when logP measurements by means of the shake flask method are performed. The final models are freely available at http://ochem.eu/article/76903.",2015-12-11 +26794354,Extending ITC to Kinetics with kinITC.,"Isothermal titration calorimetry (ITC) has long been used for kinetic studies in chemistry, but this remained confined to enzymatic studies in the biological field. In fact, the biological community has long had the tendency of ignoring the kinetic possibilities of ITC considering it solely as a thermodynamic technique, whereas surface plasmon resonance is seen as the kinetic technique par excellence. However, the primary signal recorded by ITC is a heat power which is directly related to the kinetics of the reaction. Here, it is shown how this kinetic signal can be recovered by using kinITC, the kinetic extension of ITC. The theoretical basis of kinITC is detailed for the most common situation of a second-order reaction A+B Ω C characterized by kinetic parameters kon, koff. A simplified kinITC-ETC method based upon the determination of an ""Equilibration Time Curve"" (ETC) is presented. The ETC is obtained by automatic determination of the ""effective end"" of each injection. The method is illustrated with experimental results with a comparison to Surface Plasmon Resonance (SPR) data. kon values were obtained in a wide range, from 10(3) to 0.5×10(6) M(-1) s(-1). All procedures were implemented in the program AFFINImeter (https://www.affinimeter.com/).",2015-10-30 +22171328,seeQTL: a searchable database for human eQTLs.,"

Summary

seeQTL is a comprehensive and versatile eQTL database, including various eQTL studies and a meta-analysis of HapMap eQTL information. The database presents eQTL association results in a convenient browser, using both segmented local-association plots and genome-wide Manhattan plots.

Availability and implementation

seeQTL is freely available for non-commercial use at http://www.bios.unc.edu/research/genomic_software/seeQTL/.

Contact

fred_wright@unc.edu; kxia@bios.unc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-13 +25409663,Indel reliability in indel-based phylogenetic inference.,"It is often assumed that it is unlikely that the same insertion or deletion (indel) event occurred at the same position in two independent evolutionary lineages, and thus, indel-based inference of phylogeny should be less subject to homoplasy compared with standard inference which is based on substitution events. Indeed, indels were successfully used to solve debated evolutionary relationships among various taxonomical groups. However, indels are never directly observed but rather inferred from the alignment and thus indel-based inference may be sensitive to alignment errors. It is hypothesized that phylogenetic reconstruction would be more accurate if it relied only on a subset of reliable indels instead of the entire indel data. Here, we developed a method to quantify the reliability of indel characters by measuring how often they appear in a set of alternative multiple sequence alignments. Our approach is based on the assumption that indels that are consistently present in most alternative alignments are more reliable compared with indels that appear only in a small subset of these alignments. Using simulated and empirical data, we studied the impact of filtering and weighting indels by their reliability scores on the accuracy of indel-based phylogenetic reconstruction. The new method is available as a web-server at http://guidance.tau.ac.il/RELINDEL/.",2014-11-18 +26680011,LENS: web-based lens for enrichment and network studies of human proteins.,"

Background

Network analysis is a common approach for the study of genetic view of diseases and biological pathways. Typically, when a set of genes are identified to be of interest in relation to a disease, say through a genome wide association study (GWAS) or a different gene expression study, these genes are typically analyzed in the context of their protein-protein interaction (PPI) networks. Further analysis is carried out to compute the enrichment of known pathways and disease-associations in the network. Having tools for such analysis at the fingertips of biologists without the requirement for computer programming or curation of data would accelerate the characterization of genes of interest. Currently available tools do not integrate network and enrichment analysis and their visualizations, and most of them present results in formats not most conducive to human cognition.

Results

We developed the tool Lens for Enrichment and Network Studies of human proteins (LENS) that performs network and pathway and diseases enrichment analyses on genes of interest to users. The tool creates a visualization of the network, provides easy to read statistics on network connectivity, and displays Venn diagrams with statistical significance values of the network's association with drugs, diseases, pathways, and GWASs. We used the tool to analyze gene sets related to craniofacial development, autism, and schizophrenia.

Conclusion

LENS is a web-based tool that does not require and download or plugins to use. The tool is free and does not require login for use, and is available at http://severus.dbmi.pitt.edu/LENS.",2015-12-09 +26930707,Haemophilus ducreyi Seeks Alternative Carbon Sources and Adapts to Nutrient Stress and Anaerobiosis during Experimental Infection of Human Volunteers.,"Haemophilus ducreyi causes the sexually transmitted disease chancroid in adults and cutaneous ulcers in children. In humans, H. ducreyi resides in an abscess and must adapt to a variety of stresses. Previous studies (D. Gangaiah, M. Labandeira-Rey, X. Zhang, K. R. Fortney, S. Ellinger, B. Zwickl, B. Baker, Y. Liu, D. M. Janowicz, B. P. Katz, C. A. Brautigam, R. S. MunsonJr, E. J. Hansen, and S. M. Spinola, mBio 5:e01081-13, 2014, http://dx.doi.org/10.1128/mBio.01081-13) suggested that H. ducreyi encounters growth conditions in human lesions resembling those found in stationary phase. However, how H. ducreyi transcriptionally responds to stress during human infection is unknown. Here, we determined the H. ducreyi transcriptome in biopsy specimens of human lesions and compared it to the transcriptomes of bacteria grown to mid-log, transition, and stationary phases. Multidimensional scaling showed that the in vivo transcriptome is distinct from those of in vitro growth. Compared to the inoculum (mid-log-phase bacteria), H. ducreyi harvested from pustules differentially expressed ∼93 genes, of which 62 were upregulated. The upregulated genes encode homologs of proteins involved in nutrient transport, alternative carbon pathways (l-ascorbate utilization and metabolism), growth arrest response, heat shock response, DNA recombination, and anaerobiosis. H. ducreyi upregulated few genes (hgbA, flp-tad, and lspB-lspA2) encoding virulence determinants required for human infection. Most genes regulated by CpxRA, RpoE, Hfq, (p)ppGpp, and DksA, which control the expression of virulence determinants and adaptation to a variety of stresses, were not differentially expressed in vivo, suggesting that these systems are cycling on and off during infection. Taken together, these data suggest that the in vivo transcriptome is distinct from those of in vitro growth and that adaptation to nutrient stress and anaerobiosis is crucial for H. ducreyi survival in humans.",2016-04-22 +26578570,PlanMine--a mineable resource of planarian biology and biodiversity.,"Planarian flatworms are in the midst of a renaissance as a model system for regeneration and stem cells. Besides two well-studied model species, hundreds of species exist worldwide that present a fascinating diversity of regenerative abilities, tissue turnover rates, reproductive strategies and other life history traits. PlanMine (http://planmine.mpi-cbg.de/) aims to accomplish two primary missions: First, to provide an easily accessible platform for sharing, comparing and value-added mining of planarian sequence data. Second, to catalyze the comparative analysis of the phenotypic diversity amongst planarian species. Currently, PlanMine houses transcriptomes independently assembled by our lab and community contributors. Detailed assembly/annotation statistics, a custom-developed BLAST viewer and easy export options enable comparisons at the contig and assembly level. Consistent annotation of all transcriptomes by an automated pipeline, the integration of published gene expression information and inter-relational query tools provide opportunities for mining planarian gene sequences and functions. For inter-species comparisons, we include transcriptomes of, so far, six planarian species, along with images, expert-curated information on their biology and pre-calculated cross-species sequence homologies. PlanMine is based on the popular InterMine system in order to make the rich biology of planarians accessible to the general life sciences research community.",2015-11-17 +24823498,RNAbrowse: RNA-Seq de novo assembly results browser.,"Transcriptome analysis based on a de novo assembly of next generation RNA sequences is now performed routinely in many laboratories. The generated results, including contig sequences, quantification figures, functional annotations and variation discovery outputs are usually bulky and quite diverse. This article presents a user oriented storage and visualisation environment permitting to explore the data in a top-down manner, going from general graphical views to all possible details. The software package is based on biomart, easy to install and populate with local data. The software package is available under the GNU General Public License (GPL) at http://bioinfo.genotoul.fr/RNAbrowse.",2014-05-13 +25678934,AdmixKJump: identifying population structure in recently diverged groups.,"

Motivation

Correctly modeling population structure is important for understanding recent evolution and for association studies in humans. While pre-existing knowledge of population history can be used to specify expected levels of subdivision, objective metrics to detect population structure are important and may even be preferable for identifying groups in some situations. One such metric for genomic scale data is implemented in the cross-validation procedure of the program ADMIXTURE, but it has not been evaluated on recently diverged and potentially cryptic levels of population structure. Here, I develop a new method, AdmixKJump, and test both metrics under this scenario.

Findings

I show that AdmixKJump is more sensitive to recent population divisions compared to the cross-validation metric using both realistic simulations, as well as 1000 Genomes Project European genomic data. With two populations of 50 individuals each, AdmixKJump is able to detect two populations with 100% accuracy that split at least 10KYA, whereas cross-validation obtains this 100% level at 14KYA. I also show that AdmixKJump is more accurate with fewer samples per population. Furthermore, in contrast to the cross-validation approach, AdmixKJump is able to detect the population split between the Finnish and Tuscan populations of the 1000 Genomes Project.

Conclusion

AdmixKJump has more power to detect the number of populations in a cohort of samples with smaller sample sizes and shorter divergence times.

Availability

A java implementation can be found at https://sites.google.com/site/igsevolgenomicslab/home/downloads.",2015-02-03 +25150250,"ProtocolNavigator: emulation-based software for the design, documentation and reproduction biological experiments.","

Motivation

Experimental reproducibility is fundamental to the progress of science. Irreproducible research decreases the efficiency of basic biological research and drug discovery and impedes experimental data reuse. A major contributing factor to irreproducibility is difficulty in interpreting complex experimental methodologies and designs from written text and in assessing variations among different experiments. Current bioinformatics initiatives either are focused on computational research reproducibility (i.e. data analysis) or laboratory information management systems. Here, we present a software tool, ProtocolNavigator, which addresses the largely overlooked challenges of interpretation and assessment. It provides a biologist-friendly open-source emulation-based tool for designing, documenting and reproducing biological experiments.

Availability and implementation

ProtocolNavigator was implemented in Python 2.7, using the wx module to build the graphical user interface. It is a platform-independent software and freely available from http://protocolnavigator.org/index.html under the GPL v2 license.",2014-08-22 +22009674,The DARC site: a database of aligned ribosomal complexes.,"The ribosome is a highly dynamic machine responsible for protein synthesis within the cell. Cryo-electron microscopy (cryo-EM) and X-ray crystallography structures of ribosomal particles, alone and in complex with diverse ligands (protein factors, RNAs and small molecules), have revealed the dynamic nature of the ribosome and provided much needed insight into translation and its regulation. In the past years, there has been exponential growth in the deposition of cryo-EM maps into the Electron Microscopy Data Bank (EMDB) as well as atomic structures into the Protein Data Bank (PDB). Unfortunately, the deposited ribosomal particles usually have distinct orientations with respect to one another, which complicate the comparison of the available structures. To simplify this, we have developed a Database of Aligned Ribosomal Complexes, the DARC site (http://darcsite.genzentrum.lmu.de/darc/), which houses the available cryo-EM maps and atomic coordinates of ribosomal particles from the EMDB and PDB aligned within a common coordinate system. An easy-to-use, searchable interface allows users to access and download >130 cryo-EM maps and >300 atomic models in the format of brix and pdb files, respectively. The aligned coordinate system substantially simplifies direct visualization of conformational changes in the ribosome, such as subunit rotation and head-swiveling, as well as direct comparison of bound ligands, such as antibiotics or translation factors.",2011-10-18 +25161220,Fiona: a parallel and automatic strategy for read error correction.,"

Motivation

Automatic error correction of high-throughput sequencing data can have a dramatic impact on the amount of usable base pairs and their quality. It has been shown that the performance of tasks such as de novo genome assembly and SNP calling can be dramatically improved after read error correction. While a large number of methods specialized for correcting substitution errors as found in Illumina data exist, few methods for the correction of indel errors, common to technologies like 454 or Ion Torrent, have been proposed.

Results

We present Fiona, a new stand-alone read error-correction method. Fiona provides a new statistical approach for sequencing error detection and optimal error correction and estimates its parameters automatically. Fiona is able to correct substitution, insertion and deletion errors and can be applied to any sequencing technology. It uses an efficient implementation of the partial suffix array to detect read overlaps with different seed lengths in parallel. We tested Fiona on several real datasets from a variety of organisms with different read lengths and compared its performance with state-of-the-art methods. Fiona shows a constantly higher correction accuracy over a broad range of datasets from 454 and Ion Torrent sequencers, without compromise in speed.

Conclusion

Fiona is an accurate parameter-free read error-correction method that can be run on inexpensive hardware and can make use of multicore parallelization whenever available. Fiona was implemented using the SeqAn library for sequence analysis and is publicly available for download at http://www.seqan.de/projects/fiona.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +23129220,"SNObase, a database for S-nitrosation modification.","S-Nitros(yl)ation is a ubiquitous redox-based post-translational modification of protein cysteine thiols by nitric oxide or its derivatives, which transduces the bioactivity of nitric oxide (NO) by regulation of protein conformation, activity, stability, localization and protein-protein interactions. These years, more and more S-nitrosated proteins were identified in physiological and pathological processes and the number is still growing. Here we developed a database named SNObase ( http://www.nitrosation.org ), which collected S-nitrosation targets extracted from literatures up to June 1st, 2012. SNObase contained 2561 instances, and provided information about S-nitrosation targets, sites, biological model, related diseases, trends of S-nitrosation level and effects of S-nitrosation on protein function. With SNObase, we did functional analysis for all the SNO targets: In the gene ontology (GO) biological process category, some processes were discovered to be related to S-nitrosation (""response to drug"", ""regulation of cell motion"") besides the previously reported related processes. In the GO cellular component category, cytosol and mitochondrion were both enriched. From the KEGG pathway enrichment results, we found SNO targets were enriched in different diseases, which suggests possible significant roles of S-nitrosation in the progress of these diseases. This SNObase means to be a database with precise, comprehensive and easily accessible information, an environment to help researchers integrate data with comparison and relevancy analysis between different groups or works, and also an SNO knowledgebase offering feasibility for systemic and global analysis of S-nitrosation in interdisciplinary studies.",2012-11-06 +25414365,SAMNetWeb: identifying condition-specific networks linking signaling and transcription.,"

Motivation

High-throughput datasets such as genetic screens, mRNA expression assays and global phospho-proteomic experiments are often difficult to interpret due to inherent noise in each experimental system. Computational tools have improved interpretation of these datasets by enabling the identification of biological processes and pathways that are most likely to explain the measured results. These tools are primarily designed to analyse data from a single experiment (e.g. drug treatment versus control), creating a need for computational algorithms that can handle heterogeneous datasets across multiple experimental conditions at once.

Summary

We introduce SAMNetWeb, a web-based tool that enables functional enrichment analysis and visualization of high-throughput datasets. SAMNetWeb can analyse two distinct data types (e.g. mRNA expression and global proteomics) simultaneously across multiple experimental systems to identify pathways activated in these experiments and then visualize the pathways in a single interaction network. Through the use of a multi-commodity flow based algorithm that requires each experiment 'share' underlying protein interactions, SAMNetWeb can identify distinct and common pathways across experiments.

Availability and implementation

SAMNetWeb is freely available at http://fraenkel.mit.edu/samnetweb.",2014-11-19 +24270047,ProfileDB: a resource for proteomics and cross-omics biomarker discovery.,"The increasing size and complexity of high-throughput datasets pose a growing challenge for researchers. Often very different (cross-omics) techniques with individual data analysis pipelines are employed making a unified biomarker discovery strategy and a direct comparison of different experiments difficult and time consuming. Here we present the comprehensive web-based application ProfileDB. The application is designed to integrate data from different high-throughput 'omics' data types (Transcriptomics, Proteomics, Metabolomics) with clinical parameters and prior knowledge on pathways and ontologies. Beyond data storage, ProfileDB provides a set of dedicated tools for study inspection and data visualization. The user can gain insights into a complex experiment with just a few mouse clicks. We will demonstrate the application by presenting typical use cases for the identification of proteomics biomarkers. All presented analyses can be reproduced using the public ProfileDB web server. The ProfileDB application is available by standard browser (Firefox 18+, Internet Explorer Version 9+) technology via http://profileDB.-microdiscovery.de/ (login and pass-word: profileDB). The installation contains several public datasets including different cross-'omics' experiments. This article is part of a Special Issue entitled: Biomarkers: A Proteomic Challenge.",2013-11-20 +24740330,Analysis of ultra-deep pyrosequencing and cloning based sequencing of the basic core promoter/precore/core region of hepatitis B virus using newly developed bioinformatics tools.,"

Aims

The aims of this study were to develop bioinformatics tools to explore ultra-deep pyrosequencing (UDPS) data, to test these tools, and to use them to determine the optimum error threshold, and to compare results from UDPS and cloning based sequencing (CBS).

Methods

Four serum samples, infected with either genotype D or E, from HBeAg-positive and HBeAg-negative patients were randomly selected. UDPS and CBS were used to sequence the basic core promoter/precore region of HBV. Two online bioinformatics tools, the ""Deep Threshold Tool"" and the ""Rosetta Tool"" (http://hvdr.bioinf.wits.ac.za/tools/), were built to test and analyze the generated data.

Results

A total of 10952 reads were generated by UDPS on the 454 GS Junior platform. In the four samples, substitutions, detected at 0.5% threshold or above, were identified at 39 unique positions, 25 of which were non-synonymous mutations. Sample #2 (HBeAg-negative, genotype D) had substitutions in 26 positions, followed by sample #1 (HBeAg-negative, genotype E) in 12 positions, sample #3 (HBeAg-positive, genotype D) in 7 positions and sample #4 (HBeAg-positive, genotype E) in only four positions. The ratio of nucleotide substitutions between isolates from HBeAg-negative and HBeAg-positive patients was 3.5 ∶ 1. Compared to genotype E isolates, genotype D isolates showed greater variation in the X, basic core promoter/precore and core regions. Only 18 of the 39 positions identified by UDPS were detected by CBS, which detected 14 of the 25 non-synonymous mutations detected by UDPS.

Conclusion

UDPS data should be approached with caution. Appropriate curation of read data is required prior to analysis, in order to clean the data and eliminate artefacts. CBS detected fewer than 50% of the substitutions detected by UDPS. Furthermore it is important that the appropriate consensus (reference) sequence is used in order to identify variants correctly.",2014-04-16 +26067384,ChemCom: A Software Program for Searching and Comparing Chemical Libraries.,"An efficient chemical comparator, a computer application facilitating searching and comparing chemical libraries, is useful in drug discovery and other relevant areas. The need for an efficient and user-friendly chemical comparator prompted us to develop ChemCom (Chemical Comparator) based on Java Web Start (JavaWS) technology. ChemCom provides a user-friendly graphical interface to a number of fast algorithms including a novel algorithm termed UnionBit Tree Algorithm. It utilizes an intuitive stepwise mechanism for selecting chemical comparison parameters before starting the comparison process. UnionBit has shown approximately an 165% speedup on average compared to its closest competitive algorithm implemented in ChemCom over real data. It is approximately 11 times faster than the Open Babel FastSearch algorithm in our tests. ChemCom can be accessed free-of-charge via a user-friendly website at http://bioinformatics.org/chemcom/.",2015-06-24 +25061390,"A Linnaeus NG (TM) interactive key to the Lithocolletinae of North-West Europe aimed at accelerating the accumulation of reliable biodiversity data (Lepidoptera, Gracillariidae).","We present an interactive key that is available online through any web browser without the need to install any additional software, making it an easily accessible tool for the larger public. The key can be found at http://identify.naturalis.nl/lithocolletinae. The key includes all 86 North-West European Lithocolletinae, a subfamily of smaller moths (""micro-moths"") that is commonly not treated in field guides. The user can input data on several external morphological character systems in addition to distribution, host plant and even characteristics of the larval feeding traces to reach an identification. We expect that this will enable more people to contribute with reliable observation data on this group of moths and alleviate the workload of taxonomic specialists, allowing them to focus on other new keys or taxonomic work.",2014-07-03 +25341390,SeAMotE: a method for high-throughput motif discovery in nucleic acid sequences.,"

Background

The large amount of data produced by high-throughput sequencing poses new computational challenges. In the last decade, several tools have been developed for the identification of transcription and splicing factor binding sites.

Results

Here, we introduce the SeAMotE (Sequence Analysis of Motifs Enrichment) algorithm for discovery of regulatory regions in nucleic acid sequences. SeAMotE provides (i) a robust analysis of high-throughput sequence sets, (ii) a motif search based on pattern occurrences and (iii) an easy-to-use web-server interface. We applied our method to recently published data including 351 chromatin immunoprecipitation (ChIP) and 13 crosslinking immunoprecipitation (CLIP) experiments and compared our results with those of other well-established motif discovery tools. SeAMotE shows an average accuracy of 80% in finding discriminative motifs and outperforms other methods available in literature.

Conclusions

SeAMotE is a fast, accurate and flexible algorithm for the identification of sequence patterns involved in protein-DNA and protein-RNA recognition. The server can be freely accessed at http://s.tartaglialab.com/new_submission/seamote.",2014-10-23 +25170027,RVboost: RNA-seq variants prioritization using a boosting method.,"

Motivation

RNA-seq has become the method of choice to quantify genes and exons, discover novel transcripts and detect fusion genes. However, reliable variant identification from RNA-seq data remains challenging because of the complexities of the transcriptome, the challenges of accurately mapping exon boundary spanning reads and the bias introduced during the sequencing library preparation.

Method

We developed RVboost, a novel method specific for RNA variant prioritization. RVboost uses several attributes unique in the process of RNA library preparation, sequencing and RNA-seq data analyses. It uses a boosting method to train a model of 'good quality' variants using common variants from HapMap, and prioritizes and calls the RNA variants based on the trained model. We packaged RVboost in a comprehensive workflow, which integrates tools of variant calling, annotation and filtering.

Results

RVboost consistently outperforms the variant quality score recalibration from the Genome Analysis Tool Kit and the RNA-seq variant-calling pipeline SNPiR in 12 RNA-seq samples using ground-truth variants from paired exome sequencing data. Several RNA-seq-specific attributes were identified as critical to differentiate true and false variants, including the distance of the variant positions to exon boundaries, and the percent of the reads supporting the variant in the first six base pairs. The latter identifies false variants introduced by the random hexamer priming during the library construction.

Availability and implementation

The RVboost package is implemented to readily run in Mac or Linux environments. The software and user manual are available at http://bioinformaticstools.mayo.edu/research/rvboost/.",2014-08-27 +25417202,Molecular principles of human virus protein-protein interactions.,"

Motivation

Viruses, from the human protein-protein interaction network perspective, target hubs, bottlenecks and interconnected nodes enriched in certain biological pathways. However, not much is known about the general characteristic features of the human proteins interacting with viral proteins (referred to as hVIPs) as well as the motifs and domains utilized by human-virus protein-protein interactions (referred to as Hu-Vir PPIs).

Results

Our study has revealed that hVIPs are mostly disordered proteins, whereas viral proteins are mostly ordered proteins. Protein disorder in viral proteins and hVIPs varies from one subcellular location to another. In any given viral-human PPI pair, at least one of the two proteins is structurally disordered suggesting that disorder associated conformational flexibility as one of the characteristic features of virus-host interaction. Further analyses reveal that hVIPs are (i) slowly evolving proteins, (ii) associated with high centrality scores in human-PPI network, (iii) involved in multiple pathways, (iv) enriched in eukaryotic linear motifs (ELMs) associated with protein modification, degradation and regulatory processes, (v) associated with high number of splice variants and (vi) expressed abundantly across multiple tissues. These aforementioned findings suggest that conformational flexibility, spatial diversity, abundance and slow evolution are the characteristic features of the human proteins targeted by viral proteins. Hu-Vir PPIs are mostly mediated via domain-motif interactions (DMIs) where viral proteins employ motifs that mimic host ELMs to bind to domains in human proteins. DMIs are shared among viruses belonging to different families indicating a possible convergent evolution of these motifs to help viruses to adopt common strategies to subvert host cellular pathways.

Availability and implementation

Hu-Vir PPI data, DDI and DMI data for human-virus PPI can be downloaded from http://cdfd.org.in/labpages/computational_biology_datasets.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-21 +26198102,Roary: rapid large-scale prokaryote pan genome analysis.,"

Unlabelled

A typical prokaryote population sequencing study can now consist of hundreds or thousands of isolates. Interrogating these datasets can provide detailed insights into the genetic structure of prokaryotic genomes. We introduce Roary, a tool that rapidly builds large-scale pan genomes, identifying the core and accessory genes. Roary makes construction of the pan genome of thousands of prokaryote samples possible on a standard desktop without compromising on the accuracy of results. Using a single CPU Roary can produce a pan genome consisting of 1000 isolates in 4.5 hours using 13 GB of RAM, with further speedups possible using multiple processors.

Availability and implementation

Roary is implemented in Perl and is freely available under an open source GPLv3 license from http://sanger-pathogens.github.io/Roary

Contact

roary@sanger.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-20 +26317635,"Prenatal Ambient Air Pollution, Placental Mitochondrial DNA Content, and Birth Weight in the INMA (Spain) and ENVIRONAGE (Belgium) Birth Cohorts.","

Background

Mitochondria are sensitive to environmental toxicants due to their lack of repair capacity. Changes in mitochondrial DNA (mtDNA) content may represent a biologically relevant intermediate outcome in mechanisms linking air pollution and fetal growth restriction.

Objective

We investigated whether placental mtDNA content is a possible mediator of the association between prenatal nitrogen dioxide (NO2) exposure and birth weight.

Methods

We used data from two independent European cohorts: INMA (n = 376; Spain) and ENVIRONAGE (n = 550; Belgium). Relative placental mtDNA content was determined as the ratio of two mitochondrial genes (MT-ND1 and MTF3212/R3319) to two control genes (RPLP0 and ACTB). Effect estimates for individual cohorts and the pooled data set were calculated using multiple linear regression and mixed models. We also performed a mediation analysis.

Results

Pooled estimates indicated that a 10-μg/m3 increment in average NO2 exposure during pregnancy was associated with a 4.9% decrease in placental mtDNA content (95% CI: -9.3, -0.3%) and a 48-g decrease (95% CI: -87, -9 g) in birth weight. However, the association with birth weight was significant for INMA (-66 g; 95% CI: -111, -23 g) but not for ENVIRONAGE (-20 g; 95% CI: -101, 62 g). Placental mtDNA content was associated with significantly higher mean birth weight (pooled analysis, interquartile range increase: 140 g; 95% CI: 43, 237 g). Mediation analysis estimates, which were derived for the INMA cohort only, suggested that 10% (95% CI: 6.6, 13.0 g) of the association between prenatal NO2 and birth weight was mediated by changes in placental mtDNA content.

Conclusion

Our results suggest that mtDNA content can be one of the potential mediators of the association between prenatal air pollution exposure and birth weight.

Citation

Clemente DB, Casas M, Vilahur N, Begiristain H, Bustamante M, Carsin AE, Fernández MF, Fierens F, Gyselaers W, Iñiguez C, Janssen BG, Lefebvre W, Llop S, Olea N, Pedersen M, Pieters N, Santa Marina L, Souto A, Tardón A, Vanpoucke C, Vrijheid M, Sunyer J, Nawrot TS. 2016. Prenatal ambient air pollution, placental mitochondrial DNA content, and birth weight in the INMA (Spain) and ENVIRONAGE (Belgium) birth cohorts. Environ Health Perspect 124:659-665; http://dx.doi.org/10.1289/ehp.1408981.",2015-08-28 +26285222,A Granular Self-Organizing Map for Clustering and Gene Selection in Microarray Data.,"A new granular self-organizing map (GSOM) is developed by integrating the concept of a fuzzy rough set with the SOM. While training the GSOM, the weights of a winning neuron and the neighborhood neurons are updated through a modified learning procedure. The neighborhood is newly defined using the fuzzy rough sets. The clusters (granules) evolved by the GSOM are presented to a decision table as its decision classes. Based on the decision table, a method of gene selection is developed. The effectiveness of the GSOM is shown in both clustering samples and developing an unsupervised fuzzy rough feature selection (UFRFS) method for gene selection in microarray data. While the superior results of the GSOM, as compared with the related clustering methods, are provided in terms of β -index, DB-index, Dunn-index, and fuzzy rough entropy, the genes selected by the UFRFS are not only better in terms of classification accuracy and a feature evaluation index, but also statistically more significant than the related unsupervised methods. The C-codes of the GSOM and UFRFS are available online at http://avatharamg.webs.com/software-code.",2015-08-13 +25735772,14-3-3-Pred: improved methods to predict 14-3-3-binding phosphopeptides.,"

Motivation

The 14-3-3 family of phosphoprotein-binding proteins regulates many cellular processes by docking onto pairs of phosphorylated Ser and Thr residues in a constellation of intracellular targets. Therefore, there is a pressing need to develop new prediction methods that use an updated set of 14-3-3-binding motifs for the identification of new 14-3-3 targets and to prioritize the downstream analysis of >2000 potential interactors identified in high-throughput experiments.

Results

Here, a comprehensive set of 14-3-3-binding targets from the literature was used to develop 14-3-3-binding phosphosite predictors. Position-specific scoring matrix, support vector machines (SVM) and artificial neural network (ANN) classification methods were trained to discriminate experimentally determined 14-3-3-binding motifs from non-binding phosphopeptides. ANN, position-specific scoring matrix and SVM methods showed best performance for a motif window spanning from -6 to +4 around the binding phosphosite, achieving Matthews correlation coefficient of up to 0.60. Blind prediction showed that all three methods outperform two popular 14-3-3-binding site predictors, Scansite and ELM. The new methods were used for prediction of 14-3-3-binding phosphosites in the human proteome. Experimental analysis of high-scoring predictions in the FAM122A and FAM122B proteins confirms the predictions and suggests the new 14-3-3-predictors will be generally useful.

Availability and implementation

A standalone prediction web server is available at http://www.compbio.dundee.ac.uk/1433pred. Human candidate 14-3-3-binding phosphosites were integrated in ANIA: ANnotation and Integrated Analysis of the 14-3-3 interactome database.",2015-03-03 +25732075,An obstructive sleep apnea detection approach using kernel density classification based on single-lead electrocardiogram.,"Obstructive sleep apnea (OSA) is a common sleep disorder that often remains undiagnosed, leading to an increased risk of developing cardiovascular diseases. Polysomnogram (PSG) is currently used as a golden standard for screening OSA. However, because it is time consuming, expensive and causes discomfort, alternative techniques based on a reduced set of physiological signals are proposed to solve this problem. This study proposes a convenient non-parametric kernel density-based approach for detection of OSA using single-lead electrocardiogram (ECG) recordings. Selected physiologically interpretable features are extracted from segmented RR intervals, which are obtained from ECG signals. These features are fed into the kernel density classifier to detect apnea event and bandwidths for density of each class (normal or apnea) are automatically chosen through an iterative bandwidth selection algorithm. To validate the proposed approach, RR intervals are extracted from ECG signals of 35 subjects obtained from a sleep apnea database ( http://physionet.org/cgi-bin/atm/ATM ). The results indicate that the kernel density classifier, with two features for apnea event detection, achieves a mean accuracy of 82.07 %, with mean sensitivity of 83.23 % and mean specificity of 80.24 %. Compared with other existing methods, the proposed kernel density approach achieves a comparably good performance but by using fewer features without significantly losing discriminant power, which indicates that it could be widely used for home-based screening or diagnosis of OSA.",2015-03-03 +24790157,ReadXplorer--visualization and analysis of mapped sequences.,"

Motivation

Fast algorithms and well-arranged visualizations are required for the comprehensive analysis of the ever-growing size of genomic and transcriptomic next-generation sequencing data.

Results

ReadXplorer is a software offering straightforward visualization and extensive analysis functions for genomic and transcriptomic DNA sequences mapped on a reference. A unique specialty of ReadXplorer is the quality classification of the read mappings. It is incorporated in all analysis functions and displayed in ReadXplorer's various synchronized data viewers for (i) the reference sequence, its base coverage as (ii) normalizable plot and (iii) histogram, (iv) read alignments and (v) read pairs. ReadXplorer's analysis capability covers RNA secondary structure prediction, single nucleotide polymorphism and deletion-insertion polymorphism detection, genomic feature and general coverage analysis. Especially for RNA-Seq data, it offers differential gene expression analysis, transcription start site and operon detection as well as RPKM value and read count calculations. Furthermore, ReadXplorer can combine or superimpose coverage of different datasets.

Availability and implementation

ReadXplorer is available as open-source software at http://www.readxplorer.org along with a detailed manual.",2014-04-30 +25391397,Optimization method for obtaining nearest-neighbour DNA entropies and enthalpies directly from melting temperatures.,"

Motivation

Free energy nearest-neighbour (NN) thermodynamics is widely used in DNA biochemistry, ranging from the calculation of melting temperatures to the prediction of secondary structures. Methods to calculate NN parameters require the knowledge of total sequence entropies and enthalpies, which are not always available.

Results

Here, we implement and test a new melting temperature optimization method where we obtain the NN parameters directly from the temperatures. In this way, we bypass the constraints imposed by total sequence entropies and enthalpies. This enabled us to calculate the missing NN entropies and enthalpies for some published datasets, including salt-dependent parameters. Also this allowed us to combine 281 sequences from different types of melting temperature data for which we derived a new set of NN parameters, which have a smaller uncertainty and an improved predictive power.

Availability and implementation

C++ source code and compiled binaries for several Linux distributions are available from https://sites.google.com/site/geraldweberufmg/vargibbs and from OpenSuse build service at https://build.opensuse.org/package/show/home:drgweber/VarGibbs. The software package contains scripts and data files to reproduce all results presented here.",2014-11-12 +26420834,ANISEED 2015: a digital framework for the comparative developmental biology of ascidians.,"Ascidians belong to the tunicates, the sister group of vertebrates and are recognized model organisms in the field of embryonic development, regeneration and stem cells. ANISEED is the main information system in the field of ascidian developmental biology. This article reports the development of the system since its initial publication in 2010. Over the past five years, we refactored the system from an initial custom schema to an extended version of the Chado schema and redesigned all user and back end interfaces. This new architecture was used to improve and enrich the description of Ciona intestinalis embryonic development, based on an improved genome assembly and gene model set, refined functional gene annotation, and anatomical ontologies, and a new collection of full ORF cDNAs. The genomes of nine ascidian species have been sequenced since the release of the C. intestinalis genome. In ANISEED 2015, all nine new ascidian species can be explored via dedicated genome browsers, and searched by Blast. In addition, ANISEED provides full functional gene annotation, anatomical ontologies and some gene expression data for the six species with highest quality genomes. ANISEED is publicly available at: http://www.aniseed.cnrs.fr.",2015-09-29 +21427194,ExpEdit: a webserver to explore human RNA editing in RNA-Seq experiments.,"

Unlabelled

ExpEdit is a web application for assessing RNA editing in human at known or user-specified sites supported by transcript data obtained by RNA-Seq experiments. Mapping data (in SAM/BAM format) or directly sequence reads [in FASTQ/short read archive (SRA) format] can be provided as input to carry out a comparative analysis against a large collection of known editing sites collected in DARNED database as well as other user-provided potentially edited positions. Results are shown as dynamic tables containing University of California, Santa Cruz (UCSC) links for a quick examination of the genomic context.

Availability

ExpEdit is freely available on the web at http://www.caspur.it/ExpEdit/.",2011-03-22 +25653448,African green monkey TRIM5α restriction in simian immunodeficiency virus-specific rhesus macaque effector CD4 T cells enhances their survival and antiviral function.,"

Unlabelled

The expression of xenogeneic TRIM5α proteins can restrict infection in various retrovirus/host cell pairings. Previously, we have shown that African green monkey TRIM5α (AgmTRIM5α) potently restricts both human immunodeficiency virus type 1 (HIV-1) and simian immunodeficiency virus mac239 (SIV(mac239)) replication in a transformed human T-cell line (L. V. Coren, et al., Retrovirology 12:11, 2015, http://dx.doi.org/10.1186/s12977-015-0137-9). To assess AgmTRIM5α restriction in primary cells, we transduced AgmTRIM5α into primary rhesus macaque CD4 T cells and infected them with SIV(mac239). Experiments with T-cell clones revealed that AgmTRIM5α could reproducibly restrict SIV(mac239) replication, and that this restriction synergizes with an intrinsic resistance to infection present in some CD4 T-cell clones. AgmTRIM5α transduction of virus-specific CD4 T-cell clones increased and prolonged their ability to suppress SIV spread in CD4 target cells. This increased antiviral function was strongly linked to decreased viral replication in the AgmTRIM5α-expressing effectors, consistent with restriction preventing the virus-induced cytopathogenicity that disables effector function. Taken together, our data show that AgmTRIM5α restriction, although not absolute, reduces SIV replication in primary rhesus CD4 T cells which, in turn, increases their antiviral function. These results support prior in vivo data indicating that the contribution of virus-specific CD4 T-cell effectors to viral control is limited due to infection.

Importance

The potential of effector CD4 T cells to immunologically modulate SIV/HIV infection likely is limited by their susceptibility to infection and subsequent inactivation or elimination. Here, we show that AgmTRIM5α expression inhibits SIV spread in primary effector CD4 T cells in vitro. Importantly, protection of effector CD4 T cells by AgmTRIM5α markedly enhanced their antiviral function by delaying SIV infection, thereby extending their viability despite the presence of virus. Our in vitro data support prior in vivo HIV-1 studies suggesting that the antiviral CD4 effector response is impaired due to infection and subsequent cytopathogenicity. The ability of AgmTRIM5α expression to restrict SIV infection in primary rhesus effector CD4 T cells now opens an opportunity to use the SIV/rhesus macaque model to further elucidate the potential and scope of anti-AIDS virus effector CD4 T-cell function.",2015-02-04 +21349870,MPID-T2: a database for sequence-structure-function analyses of pMHC and TR/pMHC structures.,"

Unlabelled

Sequence-structure-function information is critical in understanding the mechanism of pMHC and TR/pMHC binding and recognition. A database for sequence-structure-function information on pMHC and TR/pMHC interactions, MHC-Peptide Interaction Database-TR version 2 (MPID-T2), is now available augmented with the latest PDB and IMGT/3Dstructure-DB data, advanced features and new parameters for the analysis of pMHC and TR/pMHC structures.

Availability

http://biolinfo.org/mpid-t2.

Contact

shoba.ranganathan@mq.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-23 +26306324,Data supporting chitosan facilitates structure formation of the salivary gland by regulating the basement membrane components.,"To investigate the role of basement membrane (BM) in chitosan-mediated morphogenesis of the salivary glands, the embryonic submandibular gland (SMG) experimental model was used. Chitosan promotes branching at distinct stages in SMG morphogenesis. When enzymes such as type IV collagenase, dispase, and cathepsin B were used to digest the BM components, the morphogenetic effect mediated by chitosan disappeared. Immunofluorescence revealed that the corresponding receptors for BM components, including CD49c, CD49f, CD29, and dystroglycan, were locally enriched at the epithelial-mesenchymal junction around BM areas. The functional roles of laminin α1 and α5 in SMG branching were explored via siRNA knockdown, and suppression was confirmed at both the RNA and protein levels (Yang and Hsiao, Biomaterials, http://dx.doi.org/10.1016/j.biomaterials.2015.06.028, 2015). This data article demonstrates the experimental approaches to investigate the role of basement membrane in the structure formation of the salivary gland engineered by biomaterials.",2015-07-17 +23390499,Fungi in Thailand: a case study of the efficacy of an ITS barcode for automatically identifying species within the Annulohypoxylon and Hypoxylon genera.,"Thailand, a part of the Indo-Burma biodiversity hotspot, has many endemic animals and plants. Some of its fungal species are difficult to recognize and separate, complicating assessments of biodiversity. We assessed species diversity within the fungal genera Annulohypoxylon and Hypoxylon, which produce biologically active and potentially therapeutic compounds, by applying classical taxonomic methods to 552 teleomorphs collected from across Thailand. Using probability of correct identification (PCI), we also assessed the efficacy of automated species identification with a fungal barcode marker, ITS, in the model system of Annulohypoxylon and Hypoxylon. The 552 teleomorphs yielded 137 ITS sequences; in addition, we examined 128 GenBank ITS sequences, to assess biases in evaluating a DNA barcode with GenBank data. The use of multiple sequence alignment in a barcode database like BOLD raises some concerns about non-protein barcode markers like ITS, so we also compared species identification using different alignment methods. Our results suggest the following. (1) Multiple sequence alignment of ITS sequences is competitive with pairwise alignment when identifying species, so BOLD should be able to preserve its present bioinformatics workflow for species identification for ITS, and possibly therefore with at least some other non-protein barcode markers. (2) Automated species identification is insensitive to a specific choice of evolutionary distance, contributing to resolution of a current debate in DNA barcoding. (3) Statistical methods are available to address, at least partially, the possibility of expert misidentification of species. Phylogenetic trees discovered a cryptic species and strongly supported monophyletic clades for many Annulohypoxylon and Hypoxylon species, suggesting that ITS can contribute usefully to a barcode for these fungi. The PCIs here, derived solely from ITS, suggest that a fungal barcode will require secondary markers in Annulohypoxylon and Hypoxylon, however. The URL http://tinyurl.com/spouge-barcode contains computer programs and other supplementary material relevant to this article.",2013-02-04 +26011358,Association between the receptor for advanced glycation end products gene polymorphisms and cancer risk: a systematic review and meta-analysis.,"

Purpose

Polymorphisms in the receptor for advanced glycation end products (RAGE) gene may influence the risk of cancer, but the results are inconsistent. Therefore, we performed a systematic review to identify statistical evidence of the association between the 3 polymorphisms rs2070600 G/S (82G>S), rs1800624 T/A ( -374 T>A) and rs1800625C/T (-429 C>T) and the risk of cancer.

Methods

We searched PubMed database (http://www.ncbi. nlm.nih.gov/pubmed/), EMBASE database (http://www.elsevier.com/online-tools/embase ) and China National Knowledge Infrastructure (CNKI) database (http://www.cnki.net/) until Aug 30, 2014 to identify eligible studies.

Results

The pooled analysis revealed positive association between RAGE rs2070600 polymorphism and cancer risk in all genetic models (homozygous: OR=1.831, 95%CI: 1.548-2.166, p<0.001, allele: OR=1.321, 95%CI: 1.164-1.499, p<0.001, heterozygous: OR=1.42, 95%CI:1.126-1.792, p=0.003, dominant: OR=1.499, 95%CI: 1.200-1.874 ; p<0.001, recessive: OR=1.376, 95%CI: 1.197-1.583, p<0.001). We failed to get an effective conclusion about the association between the rs1800624 and rs1800625 polymorphisms and cancer risk in overall comparison. But in subgroup analysis, the rs1800624 polymorphism significantly increased lung cancer susceptibility in the homozygous model (OR=1.486, 95%CI:1.147-1.924, p=0.003) and the allele model (OR=1.15, 95%CI:1.029-1.285, p=0.014), but most likely contributed to decreased susceptibility to breast cancer in the allele model (OR=0.791 95%CI: 0.648-0.965, p=0.021), the heterozygous model (OR=0.733, 95%CI:0.577-0.931, p=0.011) and the dominant model (OR=0.741, 95%CI:0.588-0.934, p=0.011). No significant association was found between RAGE rs1088625 polymorphism and cancer risk in Caucasians, but these results should be interpreted with caution.

Conclusion

The polymorphism of rs2070600 in the RAGE gene may increase the susceptibility to several human cancers, especially to lung cancer and to Asians. The rs1800264 most likely contributes to decreased susceptibility to breast cancer but increased susceptibility to lung cancer. However, large-scale studies involving various cancer types and different populations are needed for a precise conclusion.",2015-03-01 +30699719,First Report of Nigrospora Leaf Blight on Tea Caused by Nigrospora sphaerica in India.,"Tea [Camellia sinensis (L.) O. Kuntze] is an economically important non-alcoholic caffeine-containing beverage crop widely cultivated for leaves in India, especially in the Darjeeling district of West Bengal. In May 2012, distinct blight symptoms were observed on leaves of popular tea cultivars AV-2, Tukdah 78, Rungli Rungliot 17/144, and Bannockburn 157 in commercial tea estates of the Darjeeling district. This disease reduces yield and quality of the leaves. The initial symptoms were frequently observed on the young leaf margins and apices. Foliar symptoms are characterized by grayish to brown, semicircular or irregular shaped lesions, often surrounded by pale yellow zones up to 9 mm in diameter. The lesions later expand and the affected leaves turn grayish to dark brown and eventually the dried tissue falls, leading to complete defoliation of the plant. The disease causes damage to leaves of all ages and is severe in young leaves. A portion of the symptomatic leaf tissues were surface sterilized in 70% ethanol for 30 s, then in 2% NaClO for 3 min, rinsed three times in sterile distilled water, and plated onto potato dextrose agar (PDA). The fungal colonies were initially white and then became grayish to brown with sporulation. Conidia were spherical to sub spherical, single-celled, black, 19 to 21 μm in diameter, and were borne on a hyaline vesicle at the tip of each conidiophore. Morphological characteristics of the isolates were concurring to those of Nigrospora sphaerica (1). Moreover, the internal transcribed spacer (ITS) region of the ribosomal RNA was amplified by using primers ITS1 and ITS4 and sequenced (GenBank Accession No. KJ767520). The sequence was compared to the GenBank database through nucleotide BLAST search and the isolate showed 100% similarity to N. sphaerica (KC519729.1). On the basis of morphological characteristics and nucleotide homology, the isolate was identified as N. sphaerica. Koch's postulates were fulfilled in the laboratory on tea leaves inoculated with N. sphaerica conidial suspension (106 conidia ml-1) collected from a 7-day-old culture on PDA. Six inoculated 8-month-old seedlings of tea cultivars AV-2 and S.3/3 were incubated in a controlled environment chamber at 25°C and 80 to 85% humidity with a 12-h photoperiod. In addition, three plants of each cultivar were sprayed with sterile distilled water to serve as controls. Twelve to 14 days after inoculation, inoculated leaves developed blight symptoms similar to those observed on naturally infected tea leaves in the field. No symptoms were observed on the control leaves. The pathogen was re-isolated from lesions and its identity was confirmed by morphological characteristics. It was reported that N. sphaerica is frequently encountered as a secondary invader or as a saprophyte on many plant species and also as a causative organism of foliar disease on several hosts worldwide (2,3). To our knowledge, this is first report of N. sphaerica as a foliar pathogen of Camellia sinensis in Darjeeling, West Bengal, India, or worldwide. References: (1) M. B. Ellis. Dematiaceous Hyphomycetes. CMI, Kew, Surrey, UK, 1971. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Syst. Mycol. Microbiol. Lab., ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ July 01, 2013. (3) E. R. Wright et al. Plant Dis. 92:171, 2008.",2015-03-01 +30699709,"First Report of Pseudomonas syringae pv. coriandricola Causing Bacterial Leaf Spot on Carrot, Parsley, and Parsnip in Serbia.","During the spring of 2014, a severe leaf spot disease was observed on carrot (Daucus carota), parsley (Petroselinum crispum), and parsnip (Pastinaca sativa) on a 0.5-ha vegetable farm in Vojvodina Province, Serbia. The disease appeared under wet and cool conditions with 5 to 25% of plants infected for each of the three crops. Symptoms were characterized as brown angular leaf spots, ~2 mm in diameter, often limited by veins. Collected symptomatic leaves were rinsed and dried at room temperature, and leaf sections taken from the margin of necrotic tissue were macerated in sterile phosphate buffer and streaked onto nutrient agar with 5% (w/v) sucrose (NAS). After isolation, whitish, circular, dome-shaped, Levan-positive colonies consistently formed. Five strains from each host (carrot, parsley, and parsnip) were used for further study. Strains were gram-negative, aerobic, and positive for catalase and tobacco hypersensitive reaction but negative for oxidase, rot of potato slices, and arginine dihydrolase. These reactions corresponded to LOPAT group Ia, which includes Pseudomonas syringae pathovars (3). Repetitive extragenic palindromic sequence (Rep)-PCR fingerprint profiles using the REP, ERIC, and BOX primers (4) were identical for all strains. Sequence typing of the housekeeping genes gyrB and rpoD (1) was performed for three representative strains (one from each host). Sequences were deposited in the NCBI GenBank database as accessions KM979434 to KM979436 (strains from carrot, parsnip, and parsley, respectively) for the gyrB gene and KM979437 to KM979439 (strains from parsnip, parsley and carrot, respectively) for the rpoD gene. Sequences were compared with pathotype strain Pseudomonas syringae pv. coriandricola ICMP12471 deposited in the Plant Associated and Environmental Microbes Database ( http://genome.ppws.vt.edu/cgi-bin/MLST/home.pl ). BLAST analysis revealed 100% homology for gyrB and 99% homology for rpoD. Pathogenicity was tested with five representative strains from each host on four-week-old plants of carrot (cv. Nantes), parsley (cv. NS Molski), and parsnip (cv. Dugi beli glatki) using two methods: spraying the bacterial suspension (108 CFU ml-1) on the leaves until runoff (5) and injecting the bacterial suspension into leaves with a hypodermic syringe (2). Four plants were used per strain and method. Sterile distilled water was applied as a negative control treatment for each plant species. All plants were kept in a mist room with 100% humidity for 4 h, then transferred to a greenhouse at 25°C and 80% relative humidity and examined for symptom development over a period of three weeks. For all strains, inoculated leaves first developed water-soaked lesions on the leaves 5 to 7 days after inoculation (DAI); 14 DAI lesions became dark brown, often surrounded by haloes. No symptoms were observed on control plants inoculated with sterile distilled water. For fulfillment of Koch's postulates, re-isolations were done onto NAS. Re-isolated bacteria were obtained from each inoculated host and confirmed to be identical to the original isolates using the LOPAT tests and Rep-PCR fingerprinting profiles. Based on the pathogenicity test accompanied by completion of Koch's postulates, sequence analysis, and bacteriological tests, the strains were identified as P. s. pv. coriandricola. To our knowledge, this is the first report of bacterial leaf spot of carrot, parsley, and parsnip in Serbia. It may present a threat to production due to quality requirements for fresh market. References: (1) P. Ferrente and M. Scortichini. Plant Pathol. 59:954, 2010. (2) M. Gupta et al. Plant Dis. 97:418, 2013. (3) R. A. Lelliott et al. J. Appl. Bacteriol. 29:470, 1966. (4) F. J. Louws et al. Appl. Environ. Microb. 60:2286, 1994. (5) X. Xu and S. A. Miller. Plant Dis. 97:988, 2013.",2015-03-01 +30699723,First Report of Basil Downy Mildew Caused by Peronospora belbahrii in the Czech Republic.,"Sweet basil (Ocimum basilicum L.) is an annual aromatic and medicinal plant in the Lamiaceae that is originally native to India but is grown in warm regions all over the world. It is a popular culinary herb used fresh and dried, and is used in traditional folk medicine. In the Czech Republic, sweet basil is grown commercially in South Moravia or by home gardeners as a potted plant. In 2012, severe downy mildew was observed in a field of basil plants (cv. Dark Green) at the Crop Research Institute (CRI) in Olomouc, Czech Republic. Infected leaves each exhibited large, interveinal, chlorotic lesions, and violet-gray, fuzzy growth on the lower leaf surface. Within a few days, lesions turned necrotic and severely infected leaves dropped prematurely. Microscopic observations revealed hyaline conidiophores typical of Peronospora Corda, emerging from stomata. Conidiophores (n = 100) were usually 239.9 to 296.5 × 8.7 to 10.6 μm, straight, and were branched 4 or 5 times submonopodially at the upper ends. Ultimate branchlets (n = 100) were slightly curved and obtuse, with the longer branchlets usually 17.8 to 22.7 μm and the shorter branchlets 10.0 to 12.9 μm, and each bearing a single conidium. Conidia (n = 100) were olive-brown, mostly ellipsoidal to subglobose, and typically 29.0 to 31.0 × 23.2 to 25.4 μm, with a length/width ratio of 1.2 to 1.3. Oospores were not observed. Based on these morphological characteristics, the pathogen was identified as Peronospora belbahrii Thines (5). The specimen was deposited in a local herbarium at the CRI in Olomouc, as voucher PB-1. Genomic DNA was extracted from conidia, and the internal transcribed spacer (ITS) region of ribosomal DNA (rDNA) amplified with primers DC-6 (1) and LR-0 (4). A sequence was deposited in the NCBI database (GenBank Accession No. KJ960193). A BLAST search of the NCBI database revealed 99% identity to the deposited ITS sequences of P. belbahrii from basil and other host species (EU863410, FJ394334-7, GQ390794, GQ390795, HM462241, HM462242, HM486901, HQ702191, HQ730979, KC756923, KF419289, and KF419290). P. belbahrii was first described by Thines et al. (5) as a pathogen of sweet basil and coleus (Solenostemon scutellarioides), but can also infect Agastache spp. (2). There are many reports indicating the pathogen is spreading throughout the world (5). In Europe, chronologically, basil downy mildew has been reported from Italy, France, Switzerland, Germany, Hungary, and Cyprus (2,3,5). To our knowledge, this is the first report of natural occurrence of downy mildew on sweet basil in the Czech Republic. References: (1) D. E. L. Cooke et al. Fung. Genet. Biol. 30:17, 2000. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, USDA ARS. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 16 June 2014. (3) A. Garibaldi et al. Plant Dis. 89:683, 2005. (4) O. Spring et al. Eur. J. Plant Pathol. 114:309, 2006. (5) M. Thines et al. Mycol. Res. 113:532, 2009.",2015-03-01 +26485539,Femoral Access and Delivery of Continuous Renal Replacement Therapy Dose.,"The study aims to describe the use of dialysis catheters in critically ill patients treated with continuous renal replacement therapy (CRRT) and to study the impact of femoral versus non-femoral access on CRRT dose.Statistical analysis and predictive modelling of data from the Randomized Evaluation of Normal vs. Augmented Level renal replacement therapy trial.The femoral vein was the first access site in 937 (67%) of 1,399 patients. These patients had higher Acute Physiology and Chronic Health Evaluation and Sequential Organ Failure Assessment scores (p = 0.009) and lower pH (p < 0.001) but similar mortality to patients with non-femoral access (44 vs. 45%; p = 0.63). Lower body weight was independently associated with femoral access placement (OR 0.97, 95% CI 0.96-0.98). Femoral access was associated with a 1.03% lower CRRT dose (p = 0.05), but a 4.20% higher dose was achieved with 13.5 Fr catheters (p = 0.03).Femoral access was preferred in lighter and sicker patients. Catheter gauge had greater impact than catheter site in CRRT dose delivery. Video Journal Club ""Cappuccino with Claudio Ronco"" at http://www.karger.com/?doi=439581.",2015-10-20 +25932575,Corrections.,"Worldwide Antimalarial Resistance Network (WWARN) AL Dose Impact Study Group. The effect of dose on the antimalarial efficacy of artemetherlumefantrine: a systematic review and pooled analysis of individual patient data. Lancet Infect Dis 2015; published online March 13. http://dx.doi. org/10.1016/S1473-3099(15)70024-1— In the WWARN Dose Impact Study Group, the affiliation for S Borrmann was incorrect and has been corrected to Institute for Tropical Medicine, University of Tubingen, Tubingen. The correction has been made to the online version as of April 20.",2015-02-11 +25645238,iMiRNA-PseDPC: microRNA precursor identification with a pseudo distance-pair composition approach.,"A microRNA (miRNA) is a small non-coding RNA molecule, functioning in transcriptional and post-transcriptional regulation of gene expression. The human genome may encode over 1000 miRNAs. Albeit poorly characterized, miRNAs are widely deemed as important regulators of biological processes. Aberrant expression of miRNAs has been observed in many cancers and other disease states, indicating that they are deeply implicated with these diseases, particularly in carcinogenesis. Therefore, it is important for both basic research and miRNA-based therapy to discriminate the real pre-miRNAs from the false ones (such as hairpin sequences with similar stem-loops). Particularly, with the avalanche of RNA sequences generated in the post-genomic age, it is highly desired to develop computational sequence-based methods for effectively identifying the human pre-miRNAs. Here, we propose a predictor called ""iMiRNA-PseDPC"", in which the RNA sequences are formulated by a novel feature vector called ""pseudo distance-pair composition"" (PseDPC) with 10 types of structure statuses. Rigorous cross-validations on a much larger and more stringent newly constructed benchmark data-set showed that our approach has remarkably outperformed the existing ones in either prediction accuracy or efficiency, indicating the new predictor is quite promising or at least may become a complementary tool to the existing predictors in this area. For the convenience of most experimental scientists, a user-friendly web server for the new predictor has been established at http://bioinformatics.hitsz.edu.cn/iMiRNA-PseDPC/, by which users can easily get their desired results without the need to go through the mathematical details. It is anticipated that the new predictor may become a useful high throughput tool for genome analysis particularly in dealing with large-scale data.",2015-03-03 +21752800,A novel signal processing approach for the detection of copy number variations in the human genome.,"

Motivation

Human genomic variability occurs at different scales, from single nucleotide polymorphisms (SNPs) to large DNA segments. Copy number variations (CNVs) represent a significant part of our genetic heterogeneity and have also been associated with many diseases and disorders. Short, localized CNVs, which may play an important role in human disease, may be undetectable in noisy genomic data. Therefore, robust methodologies are needed for their detection. Furthermore, for meaningful identification of pathological CNVs, estimation of normal allelic aberrations is necessary.

Results

We developed a signal processing-based methodology for sequence denoising followed by pattern matching, to increase SNR in genomic data and improve CNV detection. We applied this signal-decomposition-matched filtering (SDMF) methodology to 429 normal genomic sequences, and compared detected CNVs to those in the Database of Genomic Variants. SDMF successfully detected a significant number of previously identified CNVs with frequencies of occurrence ≥10%, as well as unreported short CNVs. Its performance was also compared to circular binary segmentation (CBS). through simulations. SDMF had a significantly lower false detection rate and was significantly faster than CBS, an important advantage for handling large datasets generated with high-resolution arrays. By focusing on improving SNR (instead of the robustness of the detection algorithm), SDMF is a very promising methodology for identifying CNVs at all genomic spatial scales.

Availability

The data are available at http://tcga-data.nci.nih.gov/tcga/ The software and list of analyzed sequence IDs are available at http://www.hsph.harvard.edu/~betensky/ A Matlab code for Empirical Mode Decomposition may be found at: http://www.clear.rice.edu/elec301/Projects02/empiricalMode/code.html

Contact

caterina@mit.edu.",2011-07-12 +26049162,The pervasiveness and plasticity of circadian oscillations: the coupled circadian-oscillators framework.,"

Motivation

Circadian oscillations have been observed in animals, plants, fungi and cyanobacteria and play a fundamental role in coordinating the homeostasis and behavior of biological systems. Genetically encoded molecular clocks found in nearly every cell, based on negative transcription/translation feedback loops and involving only a dozen genes, play a central role in maintaining these oscillations. However, high-throughput gene expression experiments reveal that in a typical tissue, a much larger fraction ([Formula: see text]) of all transcripts oscillate with the day-night cycle and the oscillating species vary with tissue type suggesting that perhaps a much larger fraction of all transcripts, and perhaps also other molecular species, may bear the potential for circadian oscillations.

Results

To better quantify the pervasiveness and plasticity of circadian oscillations, we conduct the first large-scale analysis aggregating the results of 18 circadian transcriptomic studies and 10 circadian metabolomic studies conducted in mice using different tissues and under different conditions. We find that over half of protein coding genes in the cell can produce transcripts that are circadian in at least one set of conditions and similarly for measured metabolites. Genetic or environmental perturbations can disrupt existing oscillations by changing their amplitudes and phases, suppressing them or giving rise to novel circadian oscillations. The oscillating species and their oscillations provide a characteristic signature of the physiological state of the corresponding cell/tissue. Molecular networks comprise many oscillator loops that have been sculpted by evolution over two trillion day-night cycles to have intrinsic circadian frequency. These oscillating loops are coupled by shared nodes in a large network of coupled circadian oscillators where the clock genes form a major hub. Cells can program and re-program their circadian repertoire through epigenetic and other mechanisms.

Availability and implementation

High-resolution and tissue/condition specific circadian data and networks available at http://circadiomics.igb.uci.edu.

Contact

pfbaldi@ics.uci.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-06 +27084274,Proposals for enhanced health risk assessment and stratification in an integrated care scenario.,"

Objectives

Population-based health risk assessment and stratification are considered highly relevant for large-scale implementation of integrated care by facilitating services design and case identification. The principal objective of the study was to analyse five health-risk assessment strategies and health indicators used in the five regions participating in the Advancing Care Coordination and Telehealth Deployment (ACT) programme (http://www.act-programme.eu). The second purpose was to elaborate on strategies toward enhanced health risk predictive modelling in the clinical scenario.

Settings

The five ACT regions: Scotland (UK), Basque Country (ES), Catalonia (ES), Lombardy (I) and Groningen (NL).

Participants

Responsible teams for regional data management in the five ACT regions.

Primary and secondary outcome measures

We characterised and compared risk assessment strategies among ACT regions by analysing operational health risk predictive modelling tools for population-based stratification, as well as available health indicators at regional level. The analysis of the risk assessment tool deployed in Catalonia in 2015 (GMAs, Adjusted Morbidity Groups) was used as a basis to propose how population-based analytics could contribute to clinical risk prediction.

Results

There was consensus on the need for a population health approach to generate health risk predictive modelling. However, this strategy was fully in place only in two ACT regions: Basque Country and Catalonia. We found marked differences among regions in health risk predictive modelling tools and health indicators, and identified key factors constraining their comparability. The research proposes means to overcome current limitations and the use of population-based health risk prediction for enhanced clinical risk assessment.

Conclusions

The results indicate the need for further efforts to improve both comparability and flexibility of current population-based health risk predictive modelling approaches. Applicability and impact of the proposals for enhanced clinical risk assessment require prospective evaluation.",2016-04-15 +25725061,Automatic concept recognition using the human phenotype ontology reference and test suite corpora. ,"Concept recognition tools rely on the availability of textual corpora to assess their performance and enable the identification of areas for improvement. Typically, corpora are developed for specific purposes, such as gene name recognition. Gene and protein name identification are longstanding goals of biomedical text mining, and therefore a number of different corpora exist. However, phenotypes only recently became an entity of interest for specialized concept recognition systems, and hardly any annotated text is available for performance testing and training. Here, we present a unique corpus, capturing text spans from 228 abstracts manually annotated with Human Phenotype Ontology (HPO) concepts and harmonized by three curators, which can be used as a reference standard for free text annotation of human phenotypes. Furthermore, we developed a test suite for standardized concept recognition error analysis, incorporating 32 different types of test cases corresponding to 2164 HPO concepts. Finally, three established phenotype concept recognizers (NCBO Annotator, OBO Annotator and Bio-LarK CR) were comprehensively evaluated, and results are reported against both the text corpus and the test suites. The gold standard and test suites corpora are available from http://bio-lark.org/hpo_res.html. Database URL: http://bio-lark.org/hpo_res.html.",2015-02-27 +26342102,Deciphering the associations between gene expression and copy number alteration using a sparse double Laplacian shrinkage approach.,"MOTIVATION:Both gene expression levels (GEs) and copy number alterations (CNAs) have important biological implications. GEs are partly regulated by CNAs, and much effort has been devoted to understanding their relations. The regulation analysis is challenging with one gene expression possibly regulated by multiple CNAs and one CNA potentially regulating the expressions of multiple genes. The correlations among GEs and among CNAs make the analysis even more complicated. The existing methods have limitations and cannot comprehensively describe the regulation. RESULTS:A sparse double Laplacian shrinkage method is developed. It jointly models the effects of multiple CNAs on multiple GEs. Penalization is adopted to achieve sparsity and identify the regulation relationships. Network adjacency is computed to describe the interconnections among GEs and among CNAs. Two Laplacian shrinkage penalties are imposed to accommodate the network adjacency measures. Simulation shows that the proposed method outperforms the competing alternatives with more accurate marker identification. The Cancer Genome Atlas data are analysed to further demonstrate advantages of the proposed method. AVAILABILITY AND IMPLEMENTATION:R code is available at http://works.bepress.com/shuangge/49/.",2015-09-03 +23530628,"A benchmark server using high resolution protein structure data, and benchmark results for membrane helix predictions.","

Background

Helical membrane proteins are vital for the interaction of cells with their environment. Predicting the location of membrane helices in protein amino acid sequences provides substantial understanding of their structure and function and identifies membrane proteins in sequenced genomes. Currently there is no comprehensive benchmark tool for evaluating prediction methods, and there is no publication comparing all available prediction tools. Current benchmark literature is outdated, as recently determined membrane protein structures are not included. Current literature is also limited to global assessments, as specialised benchmarks for predicting specific classes of membrane proteins were not previously carried out.

Description

We present a benchmark server at http://sydney.edu.au/pharmacy/sbio/software/TMH_benchmark.shtml that uses recent high resolution protein structural data to provide a comprehensive assessment of the accuracy of existing membrane helix prediction methods. The server further allows a user to compare uploaded predictions generated by novel methods, permitting the comparison of these novel methods against all existing methods compared by the server. Benchmark metrics include sensitivity and specificity of predictions for membrane helix location and orientation, and many others. The server allows for customised evaluations such as assessing prediction method performances for specific helical membrane protein subtypes.We report results for custom benchmarks which illustrate how the server may be used for specialised benchmarks. Which prediction method is the best performing method depends on which measure is being benchmarked. The OCTOPUS membrane helix prediction method is consistently one of the highest performing methods across all measures in the benchmarks that we performed.

Conclusions

The benchmark server allows general and specialised assessment of existing and novel membrane helix prediction methods. Users can employ this benchmark server to determine the most suitable method for the type of prediction the user needs to perform, be it general whole-genome annotation or the prediction of specific types of helical membrane protein. Creators of novel prediction methods can use this benchmark server to evaluate the performance of their new methods. The benchmark server will be a valuable tool for researchers seeking to extract more sophisticated information from the large and growing protein sequence databases.",2013-03-27 +23286825,NeuroDNet - an open source platform for constructing and analyzing neurodegenerative disease networks.,"

Background

Genetic networks control cellular functions. Aberrations in normal cellular function are caused by mutations in genes that disrupt the fine tuning of genetic networks and cause disease or disorder. However, the large number of signalling molecules, genes and proteins that constitute such networks, and the consequent complexity of interactions, has restrained progress in research elucidating disease mechanisms. Hence, carrying out a systematic analysis of how diseases alter the character of these networks is important. We illustrate this through our work on neurodegenerative disease networks. We created a database, NeuroDNet, which brings together relevant information about signalling molecules, genes and proteins, and their interactions, for constructing neurodegenerative disease networks.

Description

NeuroDNet is a database with interactive tools that enables the creation of interaction networks for twelve neurodegenerative diseases under one portal for interrogation and analyses. It is the first of its kind, which enables the construction and analysis of neurodegenerative diseases through protein interaction networks, regulatory networks and Boolean networks. The database has a three-tier architecture - foundation, function and interface. The foundation tier contains the human genome data with 23857 protein-coding genes linked to more than 300 genes reported in clinical studies of neurodegenerative diseases. The database architecture was designed to retrieve neurodegenerative disease information seamlessly through the interface tier using specific functional information. Features of this database enable users to extract, analyze and display information related to a disease in many different ways.

Conclusions

The application of NeuroDNet was illustrated using three case studies. Through these case studies, the construction and analyses of a PPI network for angiogenin protein in amyotrophic lateral sclerosis, a signal-gene-protein interaction network for presenilin protein in Alzheimer's disease and a Boolean network for a mammalian cell cycle was demonstrated. NeuroDNet is accessible at http://bioschool.iitd.ac.in/NeuroDNet/.",2013-01-03 +25760616,Brickworx builds recurrent RNA and DNA structural motifs into medium- and low-resolution electron-density maps.,"Brickworx is a computer program that builds crystal structure models of nucleic acid molecules using recurrent motifs including double-stranded helices. In a first step, the program searches for electron-density peaks that may correspond to phosphate groups; it may also take into account phosphate-group positions provided by the user. Subsequently, comparing the three-dimensional patterns of the P atoms with a database of nucleic acid fragments, it finds the matching positions of the double-stranded helical motifs (A-RNA or B-DNA) in the unit cell. If the target structure is RNA, the helical fragments are further extended with recurrent RNA motifs from a fragment library that contains single-stranded segments. Finally, the matched motifs are merged and refined in real space to find the most likely conformations, including a fit of the sequence to the electron-density map. The Brickworx program is available for download and as a web server at http://iimcb.genesilico.pl/brickworx.",2015-02-26 +24475069,A new exhaustive method and strategy for finding motifs in ChIP-enriched regions.,"ChIP-seq, which combines chromatin immunoprecipitation (ChIP) with next-generation parallel sequencing, allows for the genome-wide identification of protein-DNA interactions. This technology poses new challenges for the development of novel motif-finding algorithms and methods for determining exact protein-DNA binding sites from ChIP-enriched sequencing data. State-of-the-art heuristic, exhaustive search algorithms have limited application for the identification of short (l, d) motifs (l ≤ 10, d ≤ 2) contained in ChIP-enriched regions. In this work we have developed a more powerful exhaustive method (FMotif) for finding long (l, d) motifs in DNA sequences. In conjunction with our method, we have adopted a simple ChIP-enriched sampling strategy for finding these motifs in large-scale ChIP-enriched regions. Empirical studies on synthetic samples and applications using several ChIP data sets including 16 TF (transcription factor) ChIP-seq data sets and five TF ChIP-exo data sets have demonstrated that our proposed method is capable of finding these motifs with high efficiency and accuracy. The source code for FMotif is available at http://211.71.76.45/FMotif/.",2014-01-24 +27362431,How to Direct the Edges of the Connectomes: Dynamics of the Consensus Connectomes and the Development of the Connections in the Human Brain.,"The human braingraph or the connectome is the object of an intensive research today. The advantage of the graph-approach to brain science is that the rich structures, algorithms and definitions of graph theory can be applied to the anatomical networks of the connections of the human brain. In these graphs, the vertices correspond to the small (1-1.5 cm2) areas of the gray matter, and two vertices are connected by an edge, if a diffusion-MRI based workflow finds fibers of axons, running between those small gray matter areas in the white matter of the brain. One main question of the field today is discovering the directions of the connections between the small gray matter areas. In a previous work we have reported the construction of the Budapest Reference Connectome Server http://connectome.pitgroup.org from the data recorded in the Human Connectome Project of the NIH. The server generates the consensus braingraph of 96 subjects in Version 2, and of 418 subjects in Version 3, according to selectable parameters. After the Budapest Reference Connectome Server had been published, we recognized a surprising and unforeseen property of the server. The server can generate the braingraph of connections that are present in at least k graphs out of the 418, for any value of k = 1, 2, …, 418. When the value of k is changed from k = 418 through 1 by moving a slider at the webserver from right to left, certainly more and more edges appear in the consensus graph. The astonishing observation is that the appearance of the new edges is not random: it is similar to a growing shrub. We refer to this phenomenon as the Consensus Connectome Dynamics. We hypothesize that this movement of the slider in the webserver may copy the development of the connections in the human brain in the following sense: the connections that are present in all subjects are the oldest ones, and those that are present only in a decreasing fraction of the subjects are gradually the newer connections in the individual brain development. An animation on the phenomenon is available at https://youtu.be/yxlyudPaVUE. Based on this observation and the related hypothesis, we can assign directions to some of the edges of the connectome as follows: Let Gk + 1 denote the consensus connectome where each edge is present in at least k+1 graphs, and let Gk denote the consensus connectome where each edge is present in at least k graphs. Suppose that vertex v is not connected to any other vertices in Gk+1, and becomes connected to a vertex u in Gk, where u was connected to other vertices already in Gk+1. Then we direct this (v, u) edge from v to u.",2016-06-30 +25708839,Tracking the changes in virus taxonomy.,"A database and website ( http://www.ictvonline.org/taxonomyReleases.asp ) have been established where the history of changes in virus taxonomy from 1971 to the present day can easily be traced. Each change is linked to a source document confirming the change or, for most changes since 2002, to the taxonomic proposal approved by the International Committee on Taxonomy of Viruses (ICTV).",2015-02-25 +23104888,MycPermCheck: the Mycobacterium tuberculosis permeability prediction tool for small molecules.,"

Motivation

With >8 million new cases in 2010, particularly documented in developing countries, tuberculosis (TB) is still a highly present pandemic and often terminal. This is also due to the emergence of antibiotic-resistant strains (MDR-TB and XDR-TB) of the primary causative TB agent Mycobacterium tuberculosis (MTB). Efforts to develop new effective drugs against MTB are restrained by the unique and largely impermeable composition of the mycobacterial cell wall.

Results

Based on a database of antimycobacterial substances (CDD TB), 3815 compounds were classified as active and thus permeable. A data mining approach was conducted to gather the physico-chemical similarities of these substances and delimit them from a generic dataset of drug-like molecules. On the basis of the differences in these datasets, a regression model was generated and implemented into the online tool MycPermCheck to predict the permeability probability of small organic compounds.

Discussion

Given the current lack of precise molecular criteria determining mycobacterial permeability, MycPermCheck represents an unprecedented prediction tool intended to support antimycobacterial drug discovery. It follows a novel knowledge-driven approach to estimate the permeability probability of small organic compounds. As such, MycPermCheck can be used intuitively as an additional selection criterion for potential new inhibitors against MTB. Based on the validation results, its performance is expected to be of high practical value for virtual screening purposes.

Availability

The online tool is freely accessible under the URL http://www.mycpermcheck.aksotriffer.pharmazie.uni-wuerzburg.de",2012-10-25 +25726332,Exosomal protein interactors as emerging therapeutic targets in urothelial bladder cancer.,"

Background

Exosomes are rich sources of biological material (proteins and nucleic acids) secreted by both tumor and normal cells, and found in urine of urinary bladder cancer patients.

Objective

The objective of the study was to identify interacting exosomal proteins in bladder cancer for future use in targeted therapy.

Methods

The Exocarta database (www.exocarta.org) was mined for urinary bladder cancer specific exosomal proteins. The urinary bladder cancer specific exosomal proteins (n=248) were analyzed to identify enriched pathways by Onto-tool Pathway Express (http://vortex.cs.wayne.edu/ontoexpress).

Results

Enriched pathways included cellular architecture, motility, cell to cell adhesion, tumorigenesis and metastasis. Proteins in the 9 top-ranked pathways included CTNNA1 (alpha-catenin), CTNNB1 (beta-catenin), VSAP, ITGA4, PAK1, DDR1, CDC42, RHOA, NRAS, RHO, PIK3AR1, MLC1, MMRN1, and CTTNBP2 and network analysis revealed 10 important hub proteins and identified inferred interactor NF2.

Conclusions

The importance of identifying interactors is that that they can be used as targets for therapy, for example, using Bevacizumab (avastin--an angiogenesis inhibitor) against NF2 to inhibit protein-protein interactions will inhibit tumor growth and progression by hindering the exosome biogenesis.",2015-02-25 +25725126,Soft computing model for optimized siRNA design by identifying off target possibilities using artificial neural network model.,"The ability of small interfering RNA (siRNA) to do posttranscriptional gene regulation by knocking down targeted genes is an important research topic in functional genomics, biomedical research and in cancer therapeutics. Many tools had been developed to design exogenous siRNA with high experimental inhibition. Even though considerable amount of work has been done in designing exogenous siRNA, design of effective siRNA sequences is still a challenging work because the target mRNAs must be selected such that their corresponding siRNAs are likely to be efficient against that target and unlikely to accidentally silence other transcripts due to sequence similarity. In some cases, siRNAs may tolerate mismatches with the target mRNA, but knockdown of genes other than the intended target could make serious consequences. Hence to design siRNAs, two important concepts must be considered: the ability in knocking down target genes and the off target possibility on any nontarget genes. So before doing gene silencing by siRNAs, it is essential to analyze their off target effects in addition to their inhibition efficacy against a particular target. Only a few methods have been developed by considering both efficacy and off target possibility of siRNA against a gene. In this paper we present a new design of neural network model with whole stacking energy (ΔG) that enables to identify the efficacy and off target effect of siRNAs against target genes. The tool lists all siRNAs against a particular target with their inhibition efficacy and number of matches or sequence similarity with other genes in the database. We could achieve an excellent performance of Pearson Correlation Coefficient (R=0. 74) and Area Under Curve (AUC=0.906) when the threshold of whole stacking energy is ≥-34.6 kcal/mol. To the best of the author's knowledge, this is one of the best score while considering the ""combined efficacy and off target possibility"" of siRNA for silencing a gene. The proposed model shall be useful for designing exogenous siRNA for therapeutic applications and gene silencing techniques in the area of bioinformatics. The software is developed as a desktop application and available at http://opsid.in/opsid/.",2015-02-25 +23448151,"The effect of statins on testosterone in men and women, a systematic review and meta-analysis of randomized controlled trials.","

Background

Statins are extensively used for cardiovascular disease prevention. Statins reduce mortality rates more than other lipid-modulating drugs, although evidence from randomized controlled trials also suggests that statins unexpectedly increase the risk of diabetes and improve immune function. Physiologically, statins would be expected to lower androgens because statins inhibit production of the substrate for the local synthesis of androgens and statins' pleiotropic effects are somewhat similar to the physiological effects of lowering testosterone, so we hypothesized that statins lower testosterone.

Methods

A meta-analysis of placebo-controlled randomized trials of statins to test the a priori hypothesis that statins lower testosterone. We searched the PubMed, Medline and ISI Web of Science databases until the end of 2011, using '(Testosterone OR androgen) AND (CS-514 OR statin OR simvastatin OR atorvastatin OR fluvastatin OR lovastatin OR rosuvastatin OR pravastatin)' restricted to randomized controlled trials in English, supplemented by a bibliographic search. We included studies with durations of 2+ weeks reporting changes in testosterone. Two reviewers independently searched, selected and assessed study quality. Two statisticians independently abstracted and analyzed data, using random or fixed effects models, as appropriate, with inverse variance weighting.

Results

Of the 29 studies identified 11 were eligible. In 5 homogenous trials of 501 men, mainly middle aged with hypercholesterolemia, statins lowered testosterone by -0.66 nmol/l (95% confidence interval (CI) -0.14 to -1.18). In 6 heterogeneous trials of 368 young women with polycystic ovary syndrome, statins lowered testosterone by -0.40 nmol/l (95% CI -0.05 to -0.75). Overall statins lowered testosterone by -0.44 nmol/l (95% CI -0.75 to -0.13).

Conclusions

Statins may partially operate by lowering testosterone. Whether this is a detrimental side effect or mode of action warrants investigation given the potential implications for drug development and prevention of non-communicable chronic diseases. See commentary article here http://www.biomedcentral.com/1741-7015/11/58.",2013-02-28 +24215027,Bayesian network prior: network analysis of biological data using external knowledge.,"

Motivation

Reverse engineering GI networks from experimental data is a challenging task due to the complex nature of the networks and the noise inherent in the data. One way to overcome these hurdles would be incorporating the vast amounts of external biological knowledge when building interaction networks. We propose a framework where GI networks are learned from experimental data using Bayesian networks (BNs) and the incorporation of external knowledge is also done via a BN that we call Bayesian Network Prior (BNP). BNP depicts the relation between various evidence types that contribute to the event 'gene interaction' and is used to calculate the probability of a candidate graph (G) in the structure learning process.

Results

Our simulation results on synthetic, simulated and real biological data show that the proposed approach can identify the underlying interaction network with high accuracy even when the prior information is distorted and outperforms existing methods.

Availability

Accompanying BNP software package is freely available for academic use at http://bioe.bilgi.edu.tr/BNP.

Contact

hasan.otu@bilgi.edu.tr

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-11-09 +22073123,Structural annotation of Mycobacterium tuberculosis proteome.,"Of the ∼4000 ORFs identified through the genome sequence of Mycobacterium tuberculosis (TB) H37Rv, experimentally determined structures are available for 312. Since knowledge of protein structures is essential to obtain a high-resolution understanding of the underlying biology, we seek to obtain a structural annotation for the genome, using computational methods. Structural models were obtained and validated for ∼2877 ORFs, covering ∼70% of the genome. Functional annotation of each protein was based on fold-based functional assignments and a novel binding site based ligand association. New algorithms for binding site detection and genome scale binding site comparison at the structural level, recently reported from the laboratory, were utilized. Besides these, the annotation covers detection of various sequence and sub-structural motifs and quaternary structure predictions based on the corresponding templates. The study provides an opportunity to obtain a global perspective of the fold distribution in the genome. The annotation indicates that cellular metabolism can be achieved with only 219 folds. New insights about the folds that predominate in the genome, as well as the fold-combinations that make up multi-domain proteins are also obtained. 1728 binding pockets have been associated with ligands through binding site identification and sub-structure similarity analyses. The resource (http://proline.physics.iisc.ernet.in/Tbstructuralannotation), being one of the first to be based on structure-derived functional annotations at a genome scale, is expected to be useful for better understanding of TB and for application in drug discovery. The reported annotation pipeline is fairly generic and can be applied to other genomes as well.",2011-10-31 +23098784,"Diagnosis, evaluation and follow-up of asymptomatic microhematuria (AMH) in adults: AUA guideline.","

Purpose

The purpose of this guideline is to provide a clinical framework for the diagnosis, evaluation and follow-up of asymptomatic microhematuria.

Materials and methods

A systematic literature review using the MEDLINE® database was conducted to identify peer reviewed publications relevant to the definition, diagnosis, evaluation and follow-up for AMH. The review yielded 191 evidence-based articles, and these publications were used to create the majority of the guideline statements. There was insufficient evidence-based data for certain concepts; therefore, clinical principles and consensus expert opinions were used for portions of the guideline statements.

Results

Guideline statements are provided for diagnosis, evaluation and follow-up. The panel identified multiphasic computed tomography as the preferred imaging technique and developed guideline statements for persistent or recurrent AMH as well as follow-up.

Conclusions

AMH is only diagnosed by microscopy; a dipstick reading suggestive of hematuria should not lead to imaging or further investigation without confirmation of three or greater red blood cells per high power field. The evaluation and follow-up algorithm and guidelines provide a systematic approach to the patient with AMH. All patients 35 years or older should undergo cystoscopy, and upper urinary tract imaging is indicated in all adults with AMH in the absence of known benign causation. The imaging modalities and physical evaluation techniques are evolving, and these guidelines will need to be updated as the effectiveness of these become available. Please visit the AUA website at http://www.auanet.org/content/media/asymptomatic_microhematuria_guideline.pdf to view this guideline in its entirety.",2012-10-24 +25712693,GlycanAnalysis Plug-in: a database search tool for N-glycan structures using mass spectrometry.,"Tandem mass spectrometry (MS/MS or MS(n)) is a potent technique for characterizing N-glycan structures. GlycanAnalysis searches a glycan database to support the identification of glycan structures from MS/MS spectra. It also calculates diagnostic ions of glycan structures registered in a glycan database (GlycomeDB or KEGG GLYCAN) and searches for MS/MS spectra of N-glycans that match diagnostic ions to determine the structures. This program functions as a plug-in for Mass++, a freeware mass spectrum visualization and analysis program.The executable files of Mass++ are available for free at http://www.first-ms3d.jp/english/. The GlycanAnalysis plug-in is included in the standard package of Mass++ for Windows.k-morimt@shimadzu.co.jp or nishikaz@shimadzu.co.jp or acyshzw@shimadzu.co.jpSupplementary material are available at Bioinformatics online.",2015-02-23 +26545029,Computational Exposure Science: An Emerging Discipline to Support 21st-Century Risk Assessment.,"

Background

Computational exposure science represents a frontier of environmental science that is emerging and quickly evolving.

Objectives

In this commentary, we define this burgeoning discipline, describe a framework for implementation, and review some key ongoing research elements that are advancing the science with respect to exposure to chemicals in consumer products.

Discussion

The fundamental elements of computational exposure science include the development of reliable, computationally efficient predictive exposure models; the identification, acquisition, and application of data to support and evaluate these models; and generation of improved methods for extrapolating across chemicals. We describe our efforts in each of these areas and provide examples that demonstrate both progress and potential.

Conclusions

Computational exposure science, linked with comparable efforts in toxicology, is ushering in a new era of risk assessment that greatly expands our ability to evaluate chemical safety and sustainability and to protect public health.

Citation

Egeghy PP, Sheldon LS, Isaacs KK, Özkaynak H, Goldsmith M-R, Wambaugh JF, Judson RS, Buckley TJ. 2016. Computational exposure science: an emerging discipline to support 21st-century risk assessment. Environ Health Perspect 124:697-702; http://dx.doi.org/10.1289/ehp.1509748.",2015-11-06 +23028852,Multi-edge gene set networks reveal novel insights into global relationships between biological themes.,"Curated gene sets from databases such as KEGG Pathway and Gene Ontology are often used to systematically organize lists of genes or proteins derived from high-throughput data. However, the information content inherent to some relationships between the interrogated gene sets, such as pathway crosstalk, is often underutilized. A gene set network, where nodes representing individual gene sets such as KEGG pathways are connected to indicate a functional dependency, is well suited to visualize and analyze global gene set relationships. Here we introduce a novel gene set network construction algorithm that integrates gene lists derived from high-throughput experiments with curated gene sets to construct co-enrichment gene set networks. Along with previously described co-membership and linkage algorithms, we apply the co-enrichment algorithm to eight gene set collections to construct integrated multi-evidence gene set networks with multiple edge types connecting gene sets. We demonstrate the utility of approach through examples of novel gene set networks such as the chromosome map co-differential expression gene set network. A total of twenty-four gene set networks are exposed via a web tool called MetaNet, where context-specific multi-edge gene set networks are constructed from enriched gene sets within user-defined gene lists. MetaNet is freely available at http://blaispathways.dfci.harvard.edu/metanet/.",2012-09-13 +26249810,NRGsuite: a PyMOL plugin to perform docking simulations in real time using FlexAID.,"

Unlabelled

Ligand protein docking simulations play a fundamental role in understanding molecular recognition. Herein we introduce the NRGsuite, a PyMOL plugin that permits the detection of surface cavities in proteins, their refinements, calculation of volume and use, individually or jointly, as target binding-sites for docking simulations with FlexAID. The NRGsuite offers the users control over a large number of important parameters in docking simulations including the assignment of flexible side-chains and definition of geometric constraints. Furthermore, the NRGsuite permits the visualization of the docking simulation in real time. The NRGsuite give access to powerful docking simulations that can be used in structure-guided drug design as well as an educational tool. The NRGsuite is implemented in Python and C/C++ with an easy to use package installer. The NRGsuite is available for Windows, Linux and MacOS.

Availability and implementation

http://bcb.med.usherbrooke.ca/flexaid.

Contact

rafael.najmanovich@usherbroke.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-06 +27157823,"Ten-Year Monitored Natural Recovery of Lead-Contaminated Mine Tailing in Klity Creek, Kanchanaburi Province, Thailand.","

Background

Klity Creek has become Thailand's first official remediation ordered by the court in 2013, 15 years after the spill of lead (Pb)-contaminated mine tailing into the creek. The Pollution Control Department (PCD) decided to restore the creek through monitored natural recovery (MNR) since 2006 but has not been successful. Interestingly, the most recent remediation plan in 2015 will still apply MNR to five out of the seven portions of the creek, despite no scientific feasibility evaluation of using MNR to restore the creek.

Objective

This study qualitatively and quantitatively evaluated the feasibility of using MNR to clean up the creek in order to protect the Klity children from excess Pb exposure.

Methods

We analyzed the physical and chemical transformation of Pb contaminated sediment in the creek and developed a remedial action goal and cleanup level using the Integrated Exposure Uptake Biokinetic model (IEUBK). We empirically determined the natural recovery (NR) potentials and rates using 10 years of data monitoring the water and sediment samples from eight monitoring stations (KC1 to KC8).

Results

Klity Creek has NR potential for water except at KC2, which is closest to the spill and the other improperly managed Pb sources. However, the creek has no NR potential for sediment except at the KC8 location (NR rate = 11.1 ± 3.0 × 10-3 month-1) farthest from the spill.

Conclusion

The MNR method is not suitable to use as the sole remedial approach for Klity Creek (KC2 to KC7). Although MNR is applicable at KC8, it may require up to 377 ± 76 years to restore the sediment to the background Pb concentration.

Citation

Phenrat T, Otwong A, Chantharit A, Lowry GV. 2016. Ten-year monitored natural recovery of lead-contaminated mine tailing in Klity Creek, Kanchanaburi Province, Thailand. Environ Health Perspect 124:1511-1520; http://dx.doi.org/10.1289/EHP215.",2016-05-08 +27346385,Urban and Transport Planning Related Exposures and Mortality: A Health Impact Assessment for Cities.,"

Background

By 2050, nearly 70% of the global population is projected to live in urban areas. Because the environments we inhabit affect our health, urban and transport designs that promote healthy living are needed.

Objective

We estimated the number of premature deaths preventable under compliance with international exposure recommendations for physical activity (PA), air pollution, noise, heat, and access to green spaces.

Methods

We developed and applied the Urban and TranspOrt Planning Health Impact Assessment (UTOPHIA) tool to Barcelona, Spain. Exposure estimates and mortality data were available for 1,357,361 residents. We compared recommended with current exposure levels. We quantified the associations between exposures and mortality and calculated population attributable fractions to estimate the number of premature deaths preventable. We also modeled life-expectancy and economic impacts.

Results

We estimated that annually, nearly 20% of mortality could be prevented if international recommendations for performance of PA; exposure to air pollution, noise, and heat; and access to green space were followed. Estimations showed that the greatest portion of preventable deaths was attributable to increases in PA, followed by reductions of exposure to air pollution, traffic noise, and heat. Access to green spaces had smaller effects on mortality. Compliance was estimated to increase the average life expectancy by 360 (95% CI: 219, 493) days and result in economic savings of 9.3 (95% CI: 4.9, 13.2) billion EUR/year.

Conclusions

PA factors and environmental exposures can be modified by changes in urban and transport planning. We emphasize the need for a) the reduction of motorized traffic through the promotion of active and public transport and b) the provision of green infrastructure, both of which are suggested to provide opportunities for PA and for mitigation of air pollution, noise, and heat. Citation: Mueller N, Rojas-Rueda D, Basagaña X, Cirach M, Cole-Hunter T, Dadvand P, Donaire-Gonzalez D, Foraster M, Gascon M, Martinez D, Tonne C, Triguero-Mas M, Valentín A, Nieuwenhuijsen M. 2017. Urban and transport planning related exposures and mortality: a health impact assessment for cities. Environ Health Perspect 125:89-96; http://dx.doi.org/10.1289/EHP220.",2016-06-27 +24260313,Compression of structured high-throughput sequencing data.,"Large biological datasets are being produced at a rapid pace and create substantial storage challenges, particularly in the domain of high-throughput sequencing (HTS). Most approaches currently used to store HTS data are either unable to quickly adapt to the requirements of new sequencing or analysis methods (because they do not support schema evolution), or fail to provide state of the art compression of the datasets. We have devised new approaches to store HTS data that support seamless data schema evolution and compress datasets substantially better than existing approaches. Building on these new approaches, we discuss and demonstrate how a multi-tier data organization can dramatically reduce the storage, computational and network burden of collecting, analyzing, and archiving large sequencing datasets. For instance, we show that spliced RNA-Seq alignments can be stored in less than 4% the size of a BAM file with perfect data fidelity. Compared to the previous compression state of the art, these methods reduce dataset size more than 40% when storing exome, gene expression or DNA methylation datasets. The approaches have been integrated in a comprehensive suite of software tools (http://goby.campagnelab.org) that support common analyses for a range of high-throughput sequencing assays.",2013-11-18 +23965047,A flexible count data model to fit the wide diversity of expression profiles arising from extensively replicated RNA-seq experiments.,"

Background

High-throughput RNA sequencing (RNA-seq) offers unprecedented power to capture the real dynamics of gene expression. Experimental designs with extensive biological replication present a unique opportunity to exploit this feature and distinguish expression profiles with higher resolution. RNA-seq data analysis methods so far have been mostly applied to data sets with few replicates and their default settings try to provide the best performance under this constraint. These methods are based on two well-known count data distributions: the Poisson and the negative binomial. The way to properly calibrate them with large RNA-seq data sets is not trivial for the non-expert bioinformatics user.

Results

Here we show that expression profiles produced by extensively-replicated RNA-seq experiments lead to a rich diversity of count data distributions beyond the Poisson and the negative binomial, such as Poisson-Inverse Gaussian or Pólya-Aeppli, which can be captured by a more general family of count data distributions called the Poisson-Tweedie. The flexibility of the Poisson-Tweedie family enables a direct fitting of emerging features of large expression profiles, such as heavy-tails or zero-inflation, without the need to alter a single configuration parameter. We provide a software package for R called tweeDEseq implementing a new test for differential expression based on the Poisson-Tweedie family. Using simulations on synthetic and real RNA-seq data we show that tweeDEseq yields P-values that are equally or more accurate than competing methods under different configuration parameters. By surveying the tiny fraction of sex-specific gene expression changes in human lymphoblastoid cell lines, we also show that tweeDEseq accurately detects differentially expressed genes in a real large RNA-seq data set with improved performance and reproducibility over the previously compared methodologies. Finally, we compared the results with those obtained from microarrays in order to check for reproducibility.

Conclusions

RNA-seq data with many replicates leads to a handful of count data distributions which can be accurately estimated with the statistical model illustrated in this paper. This method provides a better fit to the underlying biological variability; this may be critical when comparing groups of RNA-seq samples with markedly different count data distributions. The tweeDEseq package forms part of the Bioconductor project and it is available for download at http://www.bioconductor.org.",2013-08-21 +26860319,LowMACA: exploiting protein family analysis for the identification of rare driver mutations in cancer.,"

Background

The increasing availability of resequencing data has led to a better understanding of the most important genes in cancer development. Nevertheless, the mutational landscape of many tumor types is heterogeneous and encompasses a long tail of potential driver genes that are systematically excluded by currently available methods due to the low frequency of their mutations. We developed LowMACA (Low frequency Mutations Analysis via Consensus Alignment), a method that combines the mutations of various proteins sharing the same functional domains to identify conserved residues that harbor clustered mutations in multiple sequence alignments. LowMACA is designed to visualize and statistically assess potential driver genes through the identification of their mutational hotspots.

Results

We analyzed the Ras superfamily exploiting the known driver mutations of the trio K-N-HRAS, identifying new putative driver mutations and genes belonging to less known members of the Rho, Rab and Rheb subfamilies. Furthermore, we applied the same concept to a list of known and candidate driver genes, and observed that low confidence genes show similar patterns of mutation compared to high confidence genes of the same protein family.

Conclusions

LowMACA is a software for the identification of gain-of-function mutations in putative oncogenic families, increasing the amount of information on functional domains and their possible role in cancer. In this context LowMACA emphasizes the role of genes mutated at low frequency otherwise undetectable by classical single gene analysis. LowMACA is an R package available at http://www.bioconductor.org/packages/release/bioc/html/LowMACA.html. It is also available as a GUI standalone downloadable at: https://cgsb.genomics.iit.it/wiki/projects/LowMACA.",2016-02-09 +27153618,R2C: improving ab initio residue contact map prediction using dynamic fusion strategy and Gaussian noise filter.,"

Motivation

Inter-residue contacts in proteins dictate the topology of protein structures. They are crucial for protein folding and structural stability. Accurate prediction of residue contacts especially for long-range contacts is important to the quality of ab inito structure modeling since they can enforce strong restraints to structure assembly.

Results

In this paper, we present a new Residue-Residue Contact predictor called R2C that combines machine learning-based and correlated mutation analysis-based methods, together with a two-dimensional Gaussian noise filter to enhance the long-range residue contact prediction. Our results show that the outputs from the machine learning-based method are concentrated with better performance on short-range contacts; while for correlated mutation analysis-based approach, the predictions are widespread with higher accuracy on long-range contacts. An effective query-driven dynamic fusion strategy proposed here takes full advantages of the two different methods, resulting in an impressive overall accuracy improvement. We also show that the contact map directly from the prediction model contains the interesting Gaussian noise, which has not been discovered before. Different from recent studies that tried to further enhance the quality of contact map by removing its transitive noise, we designed a new two-dimensional Gaussian noise filter, which was especially helpful for reinforcing the long-range residue contact prediction. Tested on recent CASP10/11 datasets, the overall top L/5 accuracy of our final R2C predictor is 17.6%/15.5% higher than the pure machine learning-based method and 7.8%/8.3% higher than the correlated mutation analysis-based approach for the long-range residue contact prediction.

Availability and implementation

http://www.csbio.sjtu.edu.cn/bioinf/R2C/Contact:hbshen@sjtu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-10 +24051823,Visualizing change over time using dynamic hierarchies: TreeVersity2 and the StemView.,"To analyze data such as the US Federal Budget or characteristics of the student population of a University it is common to look for changes over time. This task can be made easier and more fruitful if the analysis is performed by grouping by attributes, such as by Agencies, Bureaus and Accounts for the Budget, or Ethnicity, Gender and Major in a University. We present TreeVersity2, a web based interactive data visualization tool that allows users to analyze change in datasets by creating dynamic hierarchies based on the data attributes. TreeVersity2 introduces a novel space filling visualization (StemView) to represent change in trees at multiple levels--not just at the leaf level. With this visualization users can explore absolute and relative changes, created and removed nodes, and each node's actual values, while maintaining the context of the tree. In addition, TreeVersity2 provides overviews of change over the entire time period, and a reporting tool that lists outliers in textual form, which helps users identify the major changes in the data without having to manually setup filters. We validated TreeVersity2 with 12 case studies with organizations as diverse as the National Cancer Institute, Federal Drug Administration, Department of Transportation, Office of the Bursar of the University of Maryland, or eBay. Our case studies demonstrated that TreeVersity2 is flexible enough to be used in different domains and provide useful insights for the data owners. A TreeVersity2 demo can be found at https://treeversity.cattlab.umd.edu.",2013-12-01 +25810778,A document processing pipeline for annotating chemical entities in scientific documents.,"

Background

The recognition of drugs and chemical entities in text is a very important task within the field of biomedical information extraction, given the rapid growth in the amount of published texts (scientific papers, patents, patient records) and the relevance of these and other related concepts. If done effectively, this could allow exploiting such textual resources to automatically extract or infer relevant information, such as drug profiles, relations and similarities between drugs, or associations between drugs and potential drug targets. The objective of this work was to develop and validate a document processing and information extraction pipeline for the identification of chemical entity mentions in text.

Results

We used the BioCreative IV CHEMDNER task data to train and evaluate a machine-learning based entity recognition system. Using a combination of two conditional random field models, a selected set of features, and a post-processing stage, we achieved F-measure results of 87.48% in the chemical entity mention recognition task and 87.75% in the chemical document indexing task.

Conclusions

We present a machine learning-based solution for automatic recognition of chemical and drug names in scientific documents. The proposed approach applies a rich feature set, including linguistic, orthographic, morphological, dictionary matching and local context features. Post-processing modules are also integrated, performing parentheses correction, abbreviation resolution and filtering erroneous mentions using an exclusion list derived from the training data. The developed methods were implemented as a document annotation tool and web service, freely available at http://bioinformatics.ua.pt/becas-chemicals/.",2015-01-19 +24919878,Network-guided regression for detecting associations between DNA methylation and gene expression.,"

Motivation

High-throughput profiling in biological research has resulted in the availability of a wealth of data cataloguing the genetic, epigenetic and transcriptional states of cells. These data could yield discoveries that may lead to breakthroughs in the diagnosis and treatment of human disease, but require statistical methods designed to find the most relevant patterns from millions of potential interactions. Aberrant DNA methylation is often a feature of cancer, and has been proposed as a therapeutic target. However, the relationship between DNA methylation and gene expression remains poorly understood.

Results

We propose Network-sparse Reduced-Rank Regression (NsRRR), a multivariate regression framework capable of using prior biological knowledge expressed as gene interaction networks to guide the search for associations between gene expression and DNA methylation signatures. We use simulations to show the advantage of our proposed model in terms of variable selection accuracy over alternative models that do not use prior network information. We discuss an application of NsRRR to The Cancer Genome Atlas datasets on primary ovarian tumours.

Availability and implementation

R code implementing the NsRRR model is available at http://www2.imperial.ac.uk/∼gmontana

Contact

giovanni.montana@kcl.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-11 +26949515,The unfolded protein response and its potential role in Huntington's disease elucidated by a systems biology approach.,"Huntington ´s disease (HD) is a progressive, neurodegenerative disease with a fatal outcome. Although the disease-causing gene (huntingtin) has been known for over 20 years, the exact mechanisms leading to neuronal cell death are still controversial. One potential mechanism contributing to the massive loss of neurons observed in the brain of HD patients could be the unfolded protein response (UPR) activated by accumulation of misfolded proteins in the endoplasmic reticulum (ER). As an adaptive response to counter-balance accumulation of un- or misfolded proteins, the UPR upregulates transcription of chaperones, temporarily attenuates new translation, and activates protein degradation via the proteasome. However, persistent ER stress and an activated UPR can also cause apoptotic cell death. Although different studies have indicated a role for the UPR in HD, the evidence remains inconclusive. Here, we present extensive bioinformatic analyses that revealed UPR activation in different experimental HD models based on transcriptomic data. Accordingly, we have identified 53 genes, including RAB5A, HMGB1, CTNNB1, DNM1, TUBB, TSG101, EEF2, DYNC1H1, SLC12A5, ATG5, AKT1, CASP7 and SYVN1 that provide a potential link between UPR and HD. To further elucidate the potential role of UPR as a disease-relevant process, we examined its connection to apoptosis based on molecular interaction data, and identified a set of 40 genes including ADD1, HSP90B1, IKBKB, IKBKG, RPS3A and LMNB1, which seem to be at the crossroads between these two important cellular processes. Remarkably, we also found strong correlation of UPR gene expression with the length of the polyglutamine tract of Huntingtin, which is a critical determinant of age of disease onset in human HD patients pointing to the UPR as a promising target for therapeutic intervention. The study is complemented by a newly developed web-portal called UPR-HD (http://uprhd.sysbiolab.eu) that enables visualization and interactive analysis of UPR-associated gene expression across various HD models.",2015-05-01 +26320097,The Transgenic RNAi Project at Harvard Medical School: Resources and Validation.,"To facilitate large-scale functional studies in Drosophila, the Drosophila Transgenic RNAi Project (TRiP) at Harvard Medical School (HMS) was established along with several goals: developing efficient vectors for RNAi that work in all tissues, generating a genome-scale collection of RNAi stocks with input from the community, distributing the lines as they are generated through existing stock centers, validating as many lines as possible using RT-qPCR and phenotypic analyses, and developing tools and web resources for identifying RNAi lines and retrieving existing information on their quality. With these goals in mind, here we describe in detail the various tools we developed and the status of the collection, which is currently composed of 11,491 lines and covering 71% of Drosophila genes. Data on the characterization of the lines either by RT-qPCR or phenotype is available on a dedicated website, the RNAi Stock Validation and Phenotypes Project (RSVP, http://www.flyrnai.org/RSVP.html), and stocks are available from three stock centers, the Bloomington Drosophila Stock Center (United States), National Institute of Genetics (Japan), and TsingHua Fly Center (China).",2015-08-28 +21510905,"A cluster-randomized, placebo-controlled, maternal vitamin A or beta-carotene supplementation trial in Bangladesh: design and methods.","

Background

We present the design, methods and population characteristics of a large community trial that assessed the efficacy of a weekly supplement containing vitamin A or beta-carotene, at recommended dietary levels, in reducing maternal mortality from early gestation through 12 weeks postpartum. We identify challenges faced and report solutions in implementing an intervention trial under low-resource, rural conditions, including the importance of population choice in promoting generalizability, maintaining rigorous data quality control to reduce inter- and intra- worker variation, and optimizing efficiencies in information and resources flow from and to the field.

Methods

This trial was a double-masked, cluster-randomized, dual intervention, placebo-controlled trial in a contiguous rural area of ~435 sq km with a population of ~650,000 in Gaibandha and Rangpur Districts of Northwestern Bangladesh. Approximately 120,000 married women of reproductive age underwent 5-weekly home surveillance, of whom ~60,000 were detected as pregnant, enrolled into the trial and gave birth to ~44,000 live-born infants. Upon enrollment, at ~ 9 weeks' gestation, pregnant women received a weekly oral supplement containing vitamin A (7000 ug retinol equivalents (RE)), beta-carotene (42 mg, or ~7000 ug RE) or a placebo through 12 weeks postpartum, according to prior randomized allocation of their cluster of residence. Systems described include enlistment and 5-weekly home surveillance for pregnancy based on menstrual history and urine testing, weekly supervised supplementation, periodic risk factor interviews, maternal and infant vital outcome monitoring, birth defect surveillance and clinical/biochemical substudies.

Results

The primary outcome was pregnancy-related mortality assessed for 3 months following parturition. Secondary outcomes included fetal loss due to miscarriage or stillbirth, infant mortality under three months of age, maternal obstetric and infectious morbidity, infant infectious morbidity, maternal and infant micronutrient status, fetal and infant growth and prematurity, external birth defects and postnatal infant growth to 3 months of age.

Conclusion

Aspects of study site selection and its ""resonance"" with national and rural qualities of Bangladesh, the trial's design, methods and allocation group comparability achieved by randomization, field procedures and innovative approaches to solving challenges in trial conduct are described and discussed. This trial is registered with http://Clinicaltrials.gov as protocol NCT00198822.",2011-04-21 +23173617,"MirSNP, a database of polymorphisms altering miRNA target sites, identifies miRNA-related SNPs in GWAS SNPs and eQTLs.","

Background

Numerous single nucleotide polymorphisms (SNPs) associated with complex diseases have been identified by genome-wide association studies (GWAS) and expression quantitative trait loci (eQTLs) studies. However, few of these SNPs have explicit biological functions. Recent studies indicated that the SNPs within the 3'UTR regions of susceptibility genes could affect complex traits/diseases by affecting the function of miRNAs. These 3'UTR SNPs are functional candidates and therefore of interest to GWAS and eQTL researchers.

Description

We developed a publicly available online database, MirSNP (http://cmbi.bjmu.edu.cn/mirsnp), which is a collection of human SNPs in predicted miRNA-mRNA binding sites. We identified 414,510 SNPs that might affect miRNA-mRNA binding. Annotations were added to these SNPs to predict whether a SNP within the target site would decrease/break or enhance/create an miRNA-mRNA binding site. By applying MirSNP database to three brain eQTL data sets, we identified four unreported SNPs (rs3087822, rs13042, rs1058381, and rs1058398), which might affect miRNA binding and thus affect the expression of their host genes in the brain. We also applied the MirSNP database to our GWAS for schizophrenia: seven predicted miRNA-related SNPs (p < 0.0001) were found in the schizophrenia GWAS. Our findings identified the possible functions of these SNP loci, and provide the basis for subsequent functional research.

Conclusion

MirSNP could identify the putative miRNA-related SNPs from GWAS and eQTLs researches and provide the direction for subsequent functional researches.",2012-11-23 +26967525,Incomplete Lineage Sorting and Hybridization Statistics for Large-Scale Retroposon Insertion Data.,"Ancient retroposon insertions can be used as virtually homoplasy-free markers to reconstruct the phylogenetic history of species. Inherited, orthologous insertions in related species offer reliable signals of a common origin of the given species. One prerequisite for such a phylogenetically informative insertion is that the inserted element was fixed in the ancestral population before speciation; if not, polymorphically inserted elements may lead to random distributions of presence/absence states during speciation and possibly to apparently conflicting reconstructions of their ancestry. Fortunately, such misleading fixed cases are relatively rare but nevertheless, need to be considered. Here, we present novel, comprehensive statistical models applicable for (1) analyzing any pattern of rare genomic changes, (2) testing and differentiating conflicting phylogenetic reconstructions based on rare genomic changes caused by incomplete lineage sorting or/and ancestral hybridization, and (3) differentiating between search strategies involving genome information from one or several lineages. When the new statistics are applied, in non-conflicting cases a minimum of three elements present in both of two species and absent in a third group are considered significant support (p<0.05) for the branching of the third from the other two, if all three of the given species are screened equally for genome or experimental data. Five elements are necessary for significant support (p<0.05) if a diagnostic locus derived from only one of three species is screened, and no conflicting markers are detected. Most potentially conflicting patterns can be evaluated for their significance and ancestral hybridization can be distinguished from incomplete lineage sorting by considering symmetric or asymmetric distribution of rare genomic changes among possible tree configurations. Additionally, we provide an R-application to make the new KKSC insertion significance test available for the scientific community at http://retrogenomics.uni-muenster.de:3838/KKSC_significance_test/.",2016-03-11 +21873645,ppiTrim: constructing non-redundant and up-to-date interactomes.,"Robust advances in interactome analysis demand comprehensive, non-redundant and consistently annotated data sets. By non-redundant, we mean that the accounting of evidence for every interaction should be faithful: each independent experimental support is counted exactly once, no more, no less. While many interactions are shared among public repositories, none of them contains the complete known interactome for any model organism. In addition, the annotations of the same experimental result by different repositories often disagree. This brings up the issue of which annotation to keep while consolidating evidences that are the same. The iRefIndex database, including interactions from most popular repositories with a standardized protein nomenclature, represents a significant advance in all aspects, especially in comprehensiveness. However, iRefIndex aims to maintain all information/annotation from original sources and requires users to perform additional processing to fully achieve the aforementioned goals. Another issue has to do with protein complexes. Some databases represent experimentally observed complexes as interactions with more than two participants, while others expand them into binary interactions using spoke or matrix model. To avoid untested interaction information buildup, it is preferable to replace the expanded protein complexes, either from spoke or matrix models, with a flat list of complex members. To address these issues and to achieve our goals, we have developed ppiTrim, a script that processes iRefIndex to produce non-redundant, consistently annotated data sets of physical interactions. Our script proceeds in three stages: mapping all interactants to gene identifiers and removing all undesired raw interactions, deflating potentially expanded complexes, and reconciling for each interaction the annotation labels among different source databases. As an illustration, we have processed the three largest organismal data sets: yeast, human and fruitfly. While ppiTrim can resolve most apparent conflicts between different labelings, we also discovered some unresolvable disagreements mostly resulting from different annotation policies among repositories. Database URL: http://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads/ppiTrim.html.",2011-08-27 +27153213,Assessing Temporal and Spatial Patterns of Observed and Predicted Ozone in Multiple Urban Areas.,"

Background

Ambient monitoring data show spatial gradients in ozone (O3) across urban areas. Nitrogen oxide (NOx) emissions reductions will likely alter these gradients. Epidemiological studies often use exposure surrogates that may not fully account for the impacts of spatially and temporally changing concentrations on population exposure.

Objectives

We examined the impact of large NOx decreases on spatial and temporal O3 patterns and the implications on exposure.

Methods

We used a photochemical model to estimate O3 response to large NOx reductions. We derived time series of 2006-2008 O3 concentrations consistent with 50% and 75% NOx emissions reduction scenarios in three urban areas (Atlanta, Philadelphia, and Chicago) at each monitor location and spatially interpolated O3 to census-tract centroids.

Results

We predicted that low O3 concentrations would increase and high O3 concentrations would decrease in response to NOx reductions within an urban area. O3 increases occurred across larger areas for the seasonal mean metric than for the regulatory metric (annual 4th highest daily 8-hr maximum) and were located only in urban core areas. O3 always decreased outside the urban core (e.g., at locations of maximum local ozone concentration) for both metrics and decreased within the urban core in some instances. NOx reductions led to more uniform spatial gradients and diurnal and seasonal patterns and caused seasonal peaks in midrange O3 concentrations to shift from midsummer to earlier in the year.

Conclusions

These changes have implications for how O3 exposure may change in response to NOx reductions and are informative for the design of future epidemiology studies and risk assessments.

Citation

Simon H, Wells B, Baker KR, Hubbell B. 2016. Assessing temporal and spatial patterns of observed and predicted ozone in multiple urban areas. Environ Health Perspect 124:1443-1452; http://dx.doi.org/10.1289/EHP190.",2016-05-06 +24642062,Piecewise-constant and low-rank approximation for identification of recurrent copy number variations.,"

Motivation

The post-genome era sees urgent need for more novel approaches to extracting useful information from the huge amount of genetic data. The identification of recurrent copy number variations (CNVs) from array-based comparative genomic hybridization (aCGH) data can help understand complex diseases, such as cancer. Most of the previous computational methods focused on single-sample analysis or statistical testing based on the results of single-sample analysis. Finding recurrent CNVs from multi-sample data remains a challenging topic worth further study.

Results

We present a general and robust method to identify recurrent CNVs from multi-sample aCGH profiles. We express the raw dataset as a matrix and demonstrate that recurrent CNVs will form a low-rank matrix. Hence, we formulate the problem as a matrix recovering problem, where we aim to find a piecewise-constant and low-rank approximation (PLA) to the input matrix. We propose a convex formulation for matrix recovery and an efficient algorithm to globally solve the problem. We demonstrate the advantages of PLA compared with alternative methods using synthesized datasets and two breast cancer datasets. The experimental results show that PLA can successfully reconstruct the recurrent CNV patterns from raw data and achieve better performance compared with alternative methods under a wide range of scenarios.

Availability and implementation

The MATLAB code is available at http://bioinformatics.ust.hk/pla.zip.",2014-03-17 +23894138,GALANT: a Cytoscape plugin for visualizing data as functional landscapes projected onto biological networks.,"

Summary

Network-level visualization of functional data is a key aspect of both analysis and understanding of biological systems. In a continuing effort to create clear and integrated visualizations that facilitate the gathering of novel biological insights despite the overwhelming complexity of data, we present here the GrAph LANdscape VisualizaTion (GALANT), a Cytoscape plugin that builds functional landscapes onto biological networks. By using GALANT, it is possible to project any type of numerical data onto a network to create a smoothed data map resembling the network layout. As a Cytoscape plugin, GALANT is further improved by the functionalities of Cytoscape, the popular bioinformatics package for biological network visualization and data integration.

Availability

http://www.lbbc.ibb.unesp.br/galant.",2013-07-26 +25884684,histoneHMM: Differential analysis of histone modifications with broad genomic footprints.,"

Background

ChIP-seq has become a routine method for interrogating the genome-wide distribution of various histone modifications. An important experimental goal is to compare the ChIP-seq profiles between an experimental sample and a reference sample, and to identify regions that show differential enrichment. However, comparative analysis of samples remains challenging for histone modifications with broad domains, such as heterochromatin-associated H3K27me3, as most ChIP-seq algorithms are designed to detect well defined peak-like features.

Results

To address this limitation we introduce histoneHMM, a powerful bivariate Hidden Markov Model for the differential analysis of histone modifications with broad genomic footprints. histoneHMM aggregates short-reads over larger regions and takes the resulting bivariate read counts as inputs for an unsupervised classification procedure, requiring no further tuning parameters. histoneHMM outputs probabilistic classifications of genomic regions as being either modified in both samples, unmodified in both samples or differentially modified between samples. We extensively tested histoneHMM in the context of two broad repressive marks, H3K27me3 and H3K9me3, and evaluated region calls with follow up qPCR as well as RNA-seq data. Our results show that histoneHMM outperforms competing methods in detecting functionally relevant differentially modified regions.

Conclusion

histoneHMM is a fast algorithm written in C++ and compiled as an R package. It runs in the popular R computing environment and thus seamlessly integrates with the extensive bioinformatic tool sets available through Bioconductor. This makeshistoneHMM an attractive choice for the differential analysis of ChIP-seq data. Software is available from http://histonehmm.molgen.mpg.de .",2015-02-22 +27152641,Associations between Repeated Measures of Maternal Urinary Phthalate Metabolites and Thyroid Hormone Parameters during Pregnancy.,"

Background

Maintaining thyroid homeostasis during pregnancy is essential for normal fetal growth and development. Growing evidence suggests that phthalates interfere with normal thyroid function. Few human studies have investigated the degree to which phthalates may affect thyroid hormone levels in particularly susceptible populations such as pregnant women.

Objectives

We examined the associations between repeated measures of urinary phthalate metabolites and plasma thyroid hormone levels in samples collected at up to four time points per subject in pregnancy. Additionally, we investigated the potential windows of susceptibility to thyroid hormone disturbances related to study visit of sample collection.

Methods

Data were obtained from pregnant women (n = 439) participating in a nested case-control study of preterm birth with 116 cases and 323 controls. We measured 9 phthalate metabolite concentrations in urine samples collected at up to four study visits per subject during pregnancy (median = 10, 18, 26, and 35 weeks of gestation, respectively). We also measured a panel of thyroid function markers in plasma collected at the same four time points per subject during pregnancy.

Results

Although our results were generally null, in repeated measures analyses we observed that phthalate metabolites were largely inversely associated with thyrotropin and positively associated with free and total thyroid hormones. Cross-sectional analyses by study visit revealed that the magnitude and/or direction of these relationships varied by timing of exposure during gestation.

Conclusions

These results support previous reports showing the potential for environmental phthalate exposure to alter circulating levels of thyroid hormones in pregnant women. Citation: Johns LE, Ferguson KK, McElrath TF, Mukherjee B, Meeker JD. 2016. Associations between repeated measures of maternal urinary phthalate metabolites and thyroid hormone parameters during pregnancy. Environ Health Perspect 124:1808-1815; http://dx.doi.org/10.1289/EHP170.",2016-05-06 +25694106,Systematic review and meta-analysis of Japanese familial Alzheimer's disease and FTDP-17.,"Mutations in APP, PSEN1 and PSEN2 as the genetic causes of familial Alzheimer's disease (FAD) have been found in various ethnic populations. A substantial number of FAD pedigrees with mutations have been reported in the Japanese population; however, it remains unclear whether the genetic and clinical features of FAD in the Japanese population differ from those in other populations. To address this issue, we conducted a systematic review and meta-analysis of Japanese FAD and frontotemporal dementia with parkinsonism linked to chromosome 17 (FTDP-17) by literature search. Using this analysis, we identified 39 different PSEN1 mutations in 140 patients, 5 APP mutations in 35 patients and 16 MAPT mutations in 84 patients. There was no PSEN2 mutation among Japanese patients. The age at onset in Japanese FAD patients with PSEN1 mutations was significantly younger than that in patients with APP mutations. Kaplan-Meier analysis revealed that patients with MAPT mutations showed a shorter survival than patients with PSEN1 or APP mutations. Patients with mutations in different genes exhibit characteristic clinical presentations, suggesting that mutations in causative genes may modify the clinical presentations. By collecting and cataloging genetic and clinical information on Japanese FAD and FTDP-17, we developed an original database designated as Japanese Familial Alzheimer's Disease Database, which is accessible at http://alzdb.bri.niigata-u.ac.jp/.",2015-02-19 +25237393,ProfileGrids: a sequence alignment visualization paradigm that avoids the limitations of Sequence Logos.,"

Background

The 2013 BioVis Contest provided an opportunity to evaluate different paradigms for visualizing protein multiple sequence alignments. Such data sets are becoming extremely large and thus taxing current visualization paradigms. Sequence Logos represent consensus sequences but have limitations for protein alignments. As an alternative, ProfileGrids are a new protein sequence alignment visualization paradigm that represents an alignment as a color-coded matrix of the residue frequency occurring at every homologous position in the aligned protein family.

Results

The JProfileGrid software program was used to analyze the BioVis contest data sets to generate figures for comparison with the Sequence Logo reference images.

Conclusions

The ProfileGrid representation allows for the clear and effective analysis of protein multiple sequence alignments. This includes both a general overview of the conservation and diversity sequence patterns as well as the interactive ability to query the details of the protein residue distributions in the alignment. The JProfileGrid software is free and available from http://www.ProfileGrid.org.",2014-08-28 +22693223,pKNOT v.2: the protein KNOT web server.,"Knotted proteins have recently received lots of attention due to their interesting topological novelty as well as its puzzling folding mechanisms. We previously published a pKNOT server, which provides a structural database of knotted proteins, analysis tools for detecting and analyzing knotted regions from structures as well as a Java-based 3D graphics viewer for visualizing knotted structures. However, there lacks a convenient platform performing similar tasks directly from 'protein sequences'. In the current version of the web server, referred to as pKNOT v.2, we implement a homology modeling tool such that the server can now accept protein sequences in addition to 3D structures or Protein Data Bank (PDB) IDs and return knot analysis. In addition, we have updated the database of knotted proteins from the current PDB with a combination of automatic and manual procedure. We believe that the updated pKNOT server with its extended functionalities will provide better service to biologists interested in the research of knotted proteins. The pKNOT v.2 is available from http://pknot.life.nctu.edu.tw/.",2012-06-12 +22961258,Mouse large-scale phenotyping initiatives: overview of the European Mouse Disease Clinic (EUMODIC) and of the Wellcome Trust Sanger Institute Mouse Genetics Project.,"Two large-scale phenotyping efforts, the European Mouse Disease Clinic (EUMODIC) and the Wellcome Trust Sanger Institute Mouse Genetics Project (SANGER-MGP), started during the late 2000s with the aim to deliver a comprehensive assessment of phenotypes or to screen for robust indicators of diseases in mouse mutants. They both took advantage of available mouse mutant lines but predominantly of the embryonic stem (ES) cells resources derived from the European Conditional Mouse Mutagenesis programme (EUCOMM) and the Knockout Mouse Project (KOMP) to produce and study 799 mouse models that were systematically analysed with a comprehensive set of physiological and behavioural paradigms. They captured more than 400 variables and an additional panel of metadata describing the conditions of the tests. All the data are now available through EuroPhenome database (www.europhenome.org) and the WTSI mouse portal (http://www.sanger.ac.uk/mouseportal/), and the corresponding mouse lines are available through the European Mouse Mutant Archive (EMMA), the International Knockout Mouse Consortium (IKMC), or the Knockout Mouse Project (KOMP) Repository. Overall conclusions from both studies converged, with at least one phenotype scored in at least 80% of the mutant lines. In addition, 57% of the lines were viable, 13% subviable, 30% embryonic lethal, and 7% displayed fertility impairments. These efforts provide an important underpinning for a future global programme that will undertake the complete functional annotation of the mammalian genome in the mouse model.",2012-09-09 +24373360,The national inventory of core capabilities for pandemic influenza preparedness and response: an instrument for planning and evaluation.,"

Background

Reviews of the global response to the 2009 pandemic of influenza A/H1N1 affirmed the importance of assessment of preparedness and response capabilities.

Design

The U. S. Centers for Disease Control and Prevention (CDC) and partners developed the National Inventory of Core Capabilities for Pandemic Influenza Preparedness and Response (http://www.cdc.gov/flu/international/tools.htm) to collect data on coverage, quality, and timeliness in 12 domains: country planning, research and use of findings, communications, epidemiologic capability, laboratory capability, routine influenza surveillance, national respiratory disease surveillance and reporting, outbreak response, resources for containment, community-based interventions to prevent the spread of influenza, infection control, and health sector pandemic response. For each of the capabilities, we selected four indicators. Each indicator includes four levels of performance (0-3), ranging from no or limited capability to fully capable.

Results

In 2008, 40 countries in 6 regions of the World Health Organization (WHO) collected data using the instrument. In 2010 and 2012, 36 and 39 countries did so, respectively. Data collection at regular intervals allows changes in preparedness and response capabilities to be documented. In most countries, participants used the instrument and data collected to inform discussion and planning toward improving the country's level of preparedness for pandemic influenza.

Conclusions

The National Inventory provides countries with a systematic method to document the status of their capabilities with regard to pandemic influenza and to assess progress over time. The National Inventory produces data and findings that serve a wide range of users and uses.",2013-12-23 +26521675,NEFI: Network Extraction From Images.,"Networks are amongst the central building blocks of many systems. Given a graph of a network, methods from graph theory enable a precise investigation of its properties. Software for the analysis of graphs is widely available and has been applied to study various types of networks. In some applications, graph acquisition is relatively simple. However, for many networks data collection relies on images where graph extraction requires domain-specific solutions. Here we introduce NEFI, a tool that extracts graphs from images of networks originating in various domains. Regarding previous work on graph extraction, theoretical results are fully accessible only to an expert audience and ready-to-use implementations for non-experts are rarely available or insufficiently documented. NEFI provides a novel platform allowing practitioners to easily extract graphs from images by combining basic tools from image processing, computer vision and graph theory. Thus, NEFI constitutes an alternative to tedious manual graph extraction and special purpose tools. We anticipate NEFI to enable time-efficient collection of large datasets. The analysis of these novel datasets may open up the possibility to gain new insights into the structure and function of various networks. NEFI is open source and available at http://nefi.mpi-inf.mpg.de.",2015-11-02 +24453188,The jmzQuantML programming interface and validator for the mzQuantML data standard.,"The mzQuantML standard from the HUPO Proteomics Standards Initiative has recently been released, capturing quantitative data about peptides and proteins, following analysis of MS data. We present a Java application programming interface (API) for mzQuantML called jmzQuantML. The API provides robust bridges between Java classes and elements in mzQuantML files and allows random access to any part of the file. The API provides read and write capabilities, and is designed to be embedded in other software packages, enabling mzQuantML support to be added to proteomics software tools (http://code.google.com/p/jmzquantml/). The mzQuantML standard is designed around a multilevel validation system to ensure that files are structurally and semantically correct for different proteomics quantitative techniques. In this article, we also describe a Java software tool (http://code.google.com/p/mzquantml-validator/) for validating mzQuantML files, which is a formal part of the data standard.",2014-02-18 +22230699,Prediction and characterization of protein-protein interaction networks in swine.,"

Background

Studying the large-scale protein-protein interaction (PPI) network is important in understanding biological processes. The current research presents the first PPI map of swine, which aims to give new insights into understanding their biological processes.

Results

We used three methods, Interolog-based prediction of porcine PPI network, domain-motif interactions from structural topology-based prediction of porcine PPI network and motif-motif interactions from structural topology-based prediction of porcine PPI network, to predict porcine protein interactions among 25,767 porcine proteins. We predicted 20,213, 331,484, and 218,705 porcine PPIs respectively, merged the three results into 567,441 PPIs, constructed four PPI networks, and analyzed the topological properties of the porcine PPI networks. Our predictions were validated with Pfam domain annotations and GO annotations. Averages of 70, 10,495, and 863 interactions were related to the Pfam domain-interacting pairs in iPfam database. For comparison, randomized networks were generated, and averages of only 4.24, 66.79, and 44.26 interactions were associated with Pfam domain-interacting pairs in iPfam database. In GO annotations, we found 52.68%, 75.54%, 27.20% of the predicted PPIs sharing GO terms respectively. However, the number of PPI pairs sharing GO terms in the 10,000 randomized networks reached 52.68%, 75.54%, 27.20% is 0. Finally, we determined the accuracy and precision of the methods. The methods yielded accuracies of 0.92, 0.53, and 0.50 at precisions of about 0.93, 0.74, and 0.75, respectively.

Conclusion

The results reveal that the predicted PPI networks are considerably reliable. The present research is an important pioneering work on protein function research. The porcine PPI data set, the confidence score of each interaction and a list of related data are available at (http://pppid.biositemap.com/).",2012-01-10 +25887972,aTRAM - automated target restricted assembly method: a fast method for assembling loci across divergent taxa from next-generation sequencing data.,"

Background

Assembling genes from next-generation sequencing data is not only time consuming but computationally difficult, particularly for taxa without a closely related reference genome. Assembling even a draft genome using de novo approaches can take days, even on a powerful computer, and these assemblies typically require data from a variety of genomic libraries. Here we describe software that will alleviate these issues by rapidly assembling genes from distantly related taxa using a single library of paired-end reads: aTRAM, automated Target Restricted Assembly Method. The aTRAM pipeline uses a reference sequence, BLAST, and an iterative approach to target and locally assemble the genes of interest.

Results

Our results demonstrate that aTRAM rapidly assembles genes across distantly related taxa. In comparative tests with a closely related taxon, aTRAM assembled the same sequence as reference-based and de novo approaches taking on average < 1 min per gene. As a test case with divergent sequences, we assembled >1,000 genes from six taxa ranging from 25 - 110 million years divergent from the reference taxon. The gene recovery was between 97 - 99% from each taxon.

Conclusions

aTRAM can quickly assemble genes across distantly-related taxa, obviating the need for draft genome assembly of all taxa of interest. Because aTRAM uses a targeted approach, loci can be assembled in minutes depending on the size of the target. Our results suggest that this software will be useful in rapidly assembling genes for phylogenomic projects covering a wide taxonomic range, as well as other applications. The software is freely available http://www.github.com/juliema/aTRAM .",2015-03-25 +24078684,INVEX--a web-based tool for integrative visualization of expression data.,"

Summary

Gene expression or metabolomics data generated from clinical settings are often associated with multiple metadata (i.e. diagnosis, genotype, gender, etc.). It is of great interest to analyze and to visualize the data in these contexts. Here, we introduce INVEX-a novel web-based tool that integrates the server-side capabilities for data analysis with the browse-based technology for data visualization. INVEX has two key features: (i) flexible differential expression analysis for a wide variety of experimental designs; and (ii) interactive visualization within the context of metadata and biological annotations. INVEX has built-in support for gene/metabolite annotation and a fully functional heatmap builder.

Availability and implementation

Freely available at http://www.invex.ca.",2013-09-26 +27146002,Creation of an Accurate Algorithm to Detect Snellen Best Documented Visual Acuity from Ophthalmology Electronic Health Record Notes.,"

Background

Visual acuity is the primary measure used in ophthalmology to determine how well a patient can see. Visual acuity for a single eye may be recorded in multiple ways for a single patient visit (eg, Snellen vs. Jäger units vs. font print size), and be recorded for either distance or near vision. Capturing the best documented visual acuity (BDVA) of each eye in an individual patient visit is an important step for making electronic ophthalmology clinical notes useful in research.

Objective

Currently, there is limited methodology for capturing BDVA in an efficient and accurate manner from electronic health record (EHR) notes. We developed an algorithm to detect BDVA for right and left eyes from defined fields within electronic ophthalmology clinical notes.

Methods

We designed an algorithm to detect the BDVA from defined fields within 295,218 ophthalmology clinical notes with visual acuity data present. About 5668 unique responses were identified and an algorithm was developed to map all of the unique responses to a structured list of Snellen visual acuities.

Results

Visual acuity was captured from a total of 295,218 ophthalmology clinical notes during the study dates. The algorithm identified all visual acuities in the defined visual acuity section for each eye and returned a single BDVA for each eye. A clinician chart review of 100 random patient notes showed a 99% accuracy detecting BDVA from these records and 1% observed error.

Conclusions

Our algorithm successfully captures best documented Snellen distance visual acuity from ophthalmology clinical notes and transforms a variety of inputs into a structured Snellen equivalent list. Our work, to the best of our knowledge, represents the first attempt at capturing visual acuity accurately from large numbers of electronic ophthalmology notes. Use of this algorithm can benefit research groups interested in assessing visual acuity for patient centered outcome. All codes used for this study are currently available, and will be made available online at https://phekb.org.",2016-05-04 +25001169,BacillusRegNet: a transcriptional regulation database and analysis platform for Bacillus species.,"As high-throughput technologies become cheaper and easier to use, raw sequence data and corresponding annotations for many organisms are becoming available. However, sequence data alone is not sufficient to explain the biological behaviour of organisms, which arises largely from complex molecular interactions. There is a need to develop new platform technologies that can be applied to the investigation of whole-genome datasets in an efficient and cost-effective manner. One such approach is the transfer of existing knowledge from well-studied organisms to closely-related organisms. In this paper, we describe a system, BacillusRegNet, for the use of a model organism, Bacillus subtilis, to infer genome-wide regulatory networks in less well-studied close relatives. The putative transcription factors, their binding sequences and predicted promoter sequences along with annotations are available from the associated BacillusRegNet website (http://bacillus.ncl.ac.uk).",2014-07-08 +26684460,Using Semantic Association to Extend and Infer Literature-Oriented Relativity Between Terms.,"Relative terms often appear together in the literature. Methods have been presented for weighting relativity of pairwise terms by their co-occurring literature and inferring new relationship. Terms in the literature are also in the directed acyclic graph of ontologies, such as Gene Ontology and Disease Ontology. Therefore, semantic association between terms may help for establishing relativities between terms in literature. However, current methods do not use these associations. In this paper, an adjusted R-scaled score (ARSS) based on information content (ARSSIC) method is introduced to infer new relationship between terms. First, set inclusion relationship between terms of ontology was exploited to extend relationships between these terms and literature. Next, the ARSS method was presented to measure relativity between terms across ontologies according to these extensional relationships. Then, the ARSSIC method using ratios of information shared of term's ancestors was designed to infer new relationship between terms across ontologies. The result of the experiment shows that ARSS identified more pairs of statistically significant terms based on corresponding gene sets than other methods. And the high average area under the receiver operating characteristic curve (0.9293) shows that ARSSIC achieved a high true positive rate and a low false positive rate. Data is available at http://mlg.hit.edu.cn/ARSSIC/.",2015-11-01 +23006014,"Assessment, management, and prevention of childhood temper tantrums.","

Purpose

To provide an overview of normal and abnormal temper tantrum behavior as well as give recommendations nurse practitioners (NPs) can use in counseling families.

Data sources

Articles were identified from the following databases: CINAHL, HEALTH SOURCE: Nursing/Academic edition, Medline, Social Work Abstracts, Social Science Abstracts, Psych INFO, Psychology and Behavioral Science Collection. Textbook references were also identified using Stat!Ref.

Conclusions

Temper tantrums are one of the most common behavior problems in children. Although most children will have tantrums, with NPs' support and guidance in primary care encounters, most children will not require further intervention.

Implications for practice

NPs caring for children will need to identify normal and abnormal tantrum behavior as well as rule out other causes of tantrums in order to help parents handle the tantrum behavior. To obtain CE credit for this activity, go to http://www.aanp.org and click on the CE Center. Locate the listing for this article and complete the post-test. Follow the instructions to print your CE certificate.",2012-07-02 +21699738,DADA: Degree-Aware Algorithms for Network-Based Disease Gene Prioritization.,"

Background

High-throughput molecular interaction data have been used effectively to prioritize candidate genes that are linked to a disease, based on the observation that the products of genes associated with similar diseases are likely to interact with each other heavily in a network of protein-protein interactions (PPIs). An important challenge for these applications, however, is the incomplete and noisy nature of PPI data. Information flow based methods alleviate these problems to a certain extent, by considering indirect interactions and multiplicity of paths.

Results

We demonstrate that existing methods are likely to favor highly connected genes, making prioritization sensitive to the skewed degree distribution of PPI networks, as well as ascertainment bias in available interaction and disease association data. Motivated by this observation, we propose several statistical adjustment methods to account for the degree distribution of known disease and candidate genes, using a PPI network with associated confidence scores for interactions. We show that the proposed methods can detect loosely connected disease genes that are missed by existing approaches, however, this improvement might come at the price of more false negatives for highly connected genes. Consequently, we develop a suite called DADA, which includes different uniform prioritization methods that effectively integrate existing approaches with the proposed statistical adjustment strategies. Comprehensive experimental results on the Online Mendelian Inheritance in Man (OMIM) database show that DADA outperforms existing methods in prioritizing candidate disease genes.

Conclusions

These results demonstrate the importance of employing accurate statistical models and associated adjustment methods in network-based disease gene prioritization, as well as other network-based functional inference applications. DADA is implemented in Matlab and is freely available at http://compbio.case.edu/dada/.",2011-06-24 +23061897,"Sifting through genomes with iterative-sequence clustering produces a large, phylogenetically diverse protein-family resource.","

Background

New computational resources are needed to manage the increasing volume of biological data from genome sequencing projects. One fundamental challenge is the ability to maintain a complete and current catalog of protein diversity. We developed a new approach for the identification of protein families that focuses on the rapid discovery of homologous protein sequences.

Results

We implemented fully automated and high-throughput procedures to de novo cluster proteins into families based upon global alignment similarity. Our approach employs an iterative clustering strategy in which homologs of known families are sifted out of the search for new families. The resulting reduction in computational complexity enables us to rapidly identify novel protein families found in new genomes and to perform efficient, automated updates that keep pace with genome sequencing. We refer to protein families identified through this approach as ""Sifting Families,"" or SFams. Our analysis of ~10.5 million protein sequences from 2,928 genomes identified 436,360 SFams, many of which are not represented in other protein family databases. We validated the quality of SFam clustering through statistical as well as network topology-based analyses.

Conclusions

We describe the rapid identification of SFams and demonstrate how they can be used to annotate genomes and metagenomes. The SFam database catalogs protein-family quality metrics, multiple sequence alignments, hidden Markov models, and phylogenetic trees. Our source code and database are publicly available and will be subject to frequent updates (http://edhar.genomecenter.ucdavis.edu/sifting_families/).",2012-10-13 +25152232,PHOXTRACK-a tool for interpreting comprehensive datasets of post-translational modifications of proteins.,"

Unlabelled

We introduce PHOXTRACK (PHOsphosite-X-TRacing Analysis of Causal Kinases), a user-friendly freely available software tool for analyzing large datasets of post-translational modifications of proteins, such as phosphorylation, which are commonly gained by mass spectrometry detection. In contrast to other currently applied data analysis approaches, PHOXTRACK uses full sets of quantitative proteomics data and applies non-parametric statistics to calculate whether defined kinase-specific sets of phosphosite sequences indicate statistically significant concordant differences between various biological conditions. PHOXTRACK is an efficient tool for extracting post-translational information of comprehensive proteomics datasets to decipher key regulatory proteins and to infer biologically relevant molecular pathways.

Availability

PHOXTRACK will be maintained over the next years and is freely available as an online tool for non-commercial use at http://phoxtrack.molgen.mpg.de. Users will also find a tutorial at this Web site and can additionally give feedback at https://groups.google.com/d/forum/phoxtrack-discuss.",2014-08-24 +25359889,Computational framework for next-generation sequencing of heterogeneous viral populations using combinatorial pooling.,"

Motivation

Next-generation sequencing (NGS) allows for analyzing a large number of viral sequences from infected patients, providing an opportunity to implement large-scale molecular surveillance of viral diseases. However, despite improvements in technology, traditional protocols for NGS of large numbers of samples are still highly cost and labor intensive. One of the possible cost-effective alternatives is combinatorial pooling. Although a number of pooling strategies for consensus sequencing of DNA samples and detection of SNPs have been proposed, these strategies cannot be applied to sequencing of highly heterogeneous viral populations.

Results

We developed a cost-effective and reliable protocol for sequencing of viral samples, that combines NGS using barcoding and combinatorial pooling and a computational framework including algorithms for optimal virus-specific pools design and deconvolution of individual samples from sequenced pools. Evaluation of the framework on experimental and simulated data for hepatitis C virus showed that it substantially reduces the sequencing costs and allows deconvolution of viral populations with a high accuracy.

Availability and implementation

The source code and experimental data sets are available at http://alan.cs.gsu.edu/NGS/?q=content/pooling.",2014-10-29 +24905985,Construction and analysis of high-density linkage map using high-throughput sequencing data.,"Linkage maps enable the study of important biological questions. The construction of high-density linkage maps appears more feasible since the advent of next-generation sequencing (NGS), which eases SNP discovery and high-throughput genotyping of large population. However, the marker number explosion and genotyping errors from NGS data challenge the computational efficiency and linkage map quality of linkage study methods. Here we report the HighMap method for constructing high-density linkage maps from NGS data. HighMap employs an iterative ordering and error correction strategy based on a k-nearest neighbor algorithm and a Monte Carlo multipoint maximum likelihood algorithm. Simulation study shows HighMap can create a linkage map with three times as many markers as ordering-only methods while offering more accurate marker orders and stable genetic distances. Using HighMap, we constructed a common carp linkage map with 10,004 markers. The singleton rate was less than one-ninth of that generated by JoinMap4.1. Its total map distance was 5,908 cM, consistent with reports on low-density maps. HighMap is an efficient method for constructing high-density, high-quality linkage maps from high-throughput population NGS data. It will facilitate genome assembling, comparative genomic analysis, and QTL studies. HighMap is available at http://highmap.biomarker.com.cn/.",2014-06-06 +26218856,The boundaries of genocide: Quantifying the uncertainty of the death toll during the Pol Pot regime in Cambodia (1975-79).,"The range of estimates of excess deaths under Pol Pot's rule of Cambodia (1975-79) is too wide to be useful: they range from under 1 to over 3 million, with the more plausible estimates still varying from 1 to 2 million. By stochastically reconstructing population dynamics in Cambodia from extant historical and demographic data, we produced interpretable distributions of the death toll and other demographic indicators. The resulting 95 per cent simulation interval (1.2-2.8 million excess deaths) demonstrates substantial uncertainty over the exact scale of mortality, yet it still excludes nearly half of the previous death-toll estimates. The 1.5-2.25 million interval contains 69 per cent of the simulations for the actual number of excess deaths, more than the wider (1-2 million) range of previous plausible estimates. The median value of 1.9 million excess deaths represents 21 per cent of the population at risk. Supplementary material for this article is available at: http://dx.doi.org/10.1080/00324728.2015.1045546.",2015-07-28 +25689795,Using circular RNA as a novel type of biomarker in the screening of gastric cancer.,"

Background

Circular RNAs (circRNAs), a class of endogenous RNAs, have emerged as an enigmatic class of RNAs. Little is known about their value in the diagnosis of cancers.

Methods

The targeted circRNA of this study was selected using two circRNA databases: CircBase (http://circbase.org/) and circ2Traits (http://gyanxet-beta.com/circdb/). Divergent primers, rather than commonly used convergent primers, for the circRNA were designed. The circRNA levels in 101 paired gastric cancer tissues and adjacent nontumorous tissues from surgical gastric cancer patients and 36 paired plasma samples from preoperative and postoperative gastric cancer patients were analyzed by real-time quantitative reverse transcription-polymerase chain reaction (qRT-PCR). The specificity of the amplified products was measured by melting curve analysis and DNA sequencing. To observe the stability of circRNA, three randomly selected samples of gastric cancer tissues were stored at room temperature, 4°C and -20°C, and then, their circRNA levels were analyzed. To verify the reproducibility of qRT-PCR, circRNA levels were detected in a set of specimens (n=15) in two independent experiments with an interval of one day. Then, the correlation of their Ct values was determined. The relationships between circRNA expression levels and clinicopathological factors of patients with gastric cancer were further analyzed by one-way analysis of variance. A receiver operating characteristic (ROC) curve was established to evaluate the diagnostic value.

Results

Hsa_circ_002059, a typical circular RNA, was first found to be significantly downregulated in gastric cancer tissues compared with paired adjacent nontumorous tissues (p<0.001). Its levels in plasma collected from postoperative gastric cancer patients were found significantly different from those from preoperative gastric cancer patients. The area under the ROC curve was 0.73. Importantly, we further found that lower expression levels were significantly correlated with distal metastasis (P=0.036), TNM stage (P=0.042), gender (P=0.002) and age (P=0.022). The stability of circRNAs and the reproducibility of the qRT-PCR method for detecting circRNA levels were determined.

Conclusion

These results suggested that circRNAs are highly stable in mammalian cells and that one specific circRNA, hsa_circ_002059, may be a potential novel and stable biomarker for the diagnosis of gastric carcinoma.",2015-02-14 +26142184,"Nsite, NsiteH and NsiteM computer tools for studying transcription regulatory elements.","

Unlabelled

Gene transcription is mostly conducted through interactions of various transcription factors and their binding sites on DNA (regulatory elements, REs). Today, we are still far from understanding the real regulatory content of promoter regions. Computer methods for identification of REs remain a widely used tool for studying and understanding transcriptional regulation mechanisms. The Nsite, NsiteH and NsiteM programs perform searches for statistically significant (non-random) motifs of known human, animal and plant one-box and composite REs in a single genomic sequence, in a pair of aligned homologous sequences and in a set of functionally related sequences, respectively.

Availability and implementation

Pre-compiled executables built under commonly used operating systems are available for download by visiting http://www.molquest.kaust.edu.sa and http://www.softberry.com.

Contact

solovictor@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-02 +26142188,Data2Dynamics: a modeling environment tailored to parameter estimation in dynamical systems.,"

Unlabelled

Modeling of dynamical systems using ordinary differential equations is a popular approach in the field of systems biology. Two of the most critical steps in this approach are to construct dynamical models of biochemical reaction networks for large datasets and complex experimental conditions and to perform efficient and reliable parameter estimation for model fitting. We present a modeling environment for MATLAB that pioneers these challenges. The numerically expensive parts of the calculations such as the solving of the differential equations and of the associated sensitivity system are parallelized and automatically compiled into efficient C code. A variety of parameter estimation algorithms as well as frequentist and Bayesian methods for uncertainty analysis have been implemented and used on a range of applications that lead to publications.

Availability and implementation

The Data2Dynamics modeling environment is MATLAB based, open source and freely available at http://www.data2dynamics.org.

Contact

andreas.raue@fdm.uni-freiburg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-03 +23272172,Signalling network construction for modelling plant defence response.,"Plant defence signalling response against various pathogens, including viruses, is a complex phenomenon. In resistant interaction a plant cell perceives the pathogen signal, transduces it within the cell and performs a reprogramming of the cell metabolism leading to the pathogen replication arrest. This work focuses on signalling pathways crucial for the plant defence response, i.e., the salicylic acid, jasmonic acid and ethylene signal transduction pathways, in the Arabidopsis thaliana model plant. The initial signalling network topology was constructed manually by defining the representation formalism, encoding the information from public databases and literature, and composing a pathway diagram. The manually constructed network structure consists of 175 components and 387 reactions. In order to complement the network topology with possibly missing relations, a new approach to automated information extraction from biological literature was developed. This approach, named Bio3graph, allows for automated extraction of biological relations from the literature, resulting in a set of (component1, reaction, component2) triplets and composing a graph structure which can be visualised, compared to the manually constructed topology and examined by the experts. Using a plant defence response vocabulary of components and reaction types, Bio3graph was applied to a set of 9,586 relevant full text articles, resulting in 137 newly detected reactions between the components. Finally, the manually constructed topology and the new reactions were merged to form a network structure consisting of 175 components and 524 reactions. The resulting pathway diagram of plant defence signalling represents a valuable source for further computational modelling and interpretation of omics data. The developed Bio3graph approach, implemented as an executable language processing and graph visualisation workflow, is publically available at http://ropot.ijs.si/bio3graph/and can be utilised for modelling other biological systems, given that an adequate vocabulary is provided.",2012-12-18 +25075113,Circleator: flexible circular visualization of genome-associated data with BioPerl and SVG.,"

Summary

Circleator is a Perl application that generates circular figures of genome-associated data. It leverages BioPerl to support standard annotation and sequence file formats and produces publication-quality SVG output. It is designed to be both flexible and easy to use. It includes a library of circular track types and predefined configuration files for common use-cases, including. (i) visualizing gene annotation and DNA sequence data from a GenBank flat file, (ii) displaying patterns of gene conservation in related microbial strains, (iii) showing Single Nucleotide Polymorphisms (SNPs) and indels relative to a reference genome and gene set and (iv) viewing RNA-Seq plots.

Availability and implementation

Circleator is freely available under the Artistic License 2.0 from http://jonathancrabtree.github.io/Circleator/ and is integrated with the CloVR cloud-based sequence analysis Virtual Machine (VM), which can be downloaded from http://clovr.org or run on Amazon EC2.",2014-07-29 +24753490,Rgb: a scriptable genome browser for R.,"

Summary

Thanks to its free licensing and the development of initiatives like Bioconductor, R has become an essential part of the bioinformatics toolbox in the past years and is more and more confronted with genomically located data. While separate solutions are available to manipulate and visualize such data, no R package currently offers the efficiency required for computationally intensive tasks such as interactive genome browsing. The package proposed here fulfills this specific need, providing a multilevel interface suitable for most needs, from a completely interfaced genome browser to low-level classes and methods. Its time and memory efficiency have been challenged in a human dataset, where it outperformed existing solutions by several orders of magnitude.

Availability and implementation

R sources and packages are freely available at the CRAN repository and dedicated Web site: http://bioinformatics.ovsa.fr/Rgb. Distributed under the GPL 3 license, compatible with most operating systems (Windows, Linux, Mac OS) and architectures.

Contact

maressyl@gmail.com or fabrice.jardin@chb.unicancer.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-09 +24009883,ExoCarta as a resource for exosomal research. ,"Exosomes are a class of extracellular vesicles that are secreted by various cell types. Unlike other extracellular vesicles (ectosomes and apoptotic blebs), exosomes are of endocytic origin. The roles of exosomes in vaccine/drug delivery, intercellular communication and as a possible source of disease biomarkers have sparked immense interest in them, resulting in a plethora of studies. Whilst multidimensional datasets are continuously generated, it is difficult to harness the true potential of the data until they are compiled and made accessible to the biomedical researchers. Here, we describe ExoCarta (http://www.exocarta.org), a manually curated database of exosomal proteins, RNA and lipids. Datasets currently present in ExoCarta are integrated from both published and unpublished exosomal studies. Since its launch in 2009, ExoCarta has been accessed by more than 16,000 unique users. In this article, we discuss the utility of ExoCarta for exosomal research and urge biomedical researchers in the field to deposit their datasets directly to ExoCarta.",2012-04-16 +23152555,"Multilocus sequence typing of Candida tropicalis shows the presence of different clonal clusters and fluconazole susceptibility profiles in sequential isolates from candidemia patients in Sao Paulo, Brazil.","The profiles of 61 Candida tropicalis isolates from 43 patients (28 adults and 15 children) diagnosed with candidemia at two teaching hospitals in São Paulo, Brazil, were characterized by multilocus sequence typing (MLST). For the 14 patients who had bloodstream infections, 32 isolates were serially collected from their blood and/or catheters. Thirty-nine diploid sequence types (DSTs) were differentiated. According to the C. tropicalis MLST database (http://pubmlst.org/ctropicalis/), 36 DSTs and 23 genotypes identified from the 61 isolates had not previously been described. This report represents the first study to characterize sequential isolates of C. tropicalis from candidemia cases in South America. Microvariation in a single gene was found in the sequential isolates from 7 patients. The main polymorphisms occurred in the alleles of the XYR1 gene, specifically at nucleotide positions 215, 242, and 344. Macrovariation in six gene fragments was detected in the isolates from 3 patients. eBURST analysis added two new groups to this study (groups 6 and 18). Additionally, susceptibility tests indicate that 3 isolates were resistant to fluconazole. No correlation was found between the DSTs and susceptibility to fluconazole and/or selective antifungal pressure. Two patients were sequentially infected with resistant and susceptible strains. MLST is an important tool for studying the genetic diversity of multiple/sequential isolates of patients with candidemia, allowing the comparison of our data with those from other regions of the world, as well as allowing an analysis of the genetic relationship among several clones in sequential isolates from the same or different candidemia patient sites (blood or catheter).",2012-11-14 +25898129,"A multivariate genome-wide association analysis of 10 LDL subfractions, and their response to statin treatment, in 1868 Caucasians.","We conducted a genome-wide association analysis of 7 subfractions of low density lipoproteins (LDLs) and 3 subfractions of intermediate density lipoproteins (IDLs) measured by gradient gel electrophoresis, and their response to statin treatment, in 1868 individuals of European ancestry from the Pharmacogenomics and Risk of Cardiovascular Disease study. Our analyses identified four previously-implicated loci (SORT1, APOE, LPA, and CETP) as containing variants that are very strongly associated with lipoprotein subfractions (log(10)Bayes Factor > 15). Subsequent conditional analyses suggest that three of these (APOE, LPA and CETP) likely harbor multiple independently associated SNPs. Further, while different variants typically showed different characteristic patterns of association with combinations of subfractions, the two SNPs in CETP show strikingly similar patterns--both in our original data and in a replication cohort--consistent with a common underlying molecular mechanism. Notably, the CETP variants are very strongly associated with LDL subfractions, despite showing no association with total LDLs in our study, illustrating the potential value of the more detailed phenotypic measurements. In contrast with these strong subfraction associations, genetic association analysis of subfraction response to statins showed much weaker signals (none exceeding log(10)Bayes Factor of 6). However, two SNPs (in APOE and LPA) previously-reported to be associated with LDL statin response do show some modest evidence for association in our data, and the subfraction response proles at the LPA SNP are consistent with the LPA association, with response likely being due primarily to resistance of Lp(a) particles to statin therapy. An additional important feature of our analysis is that, unlike most previous analyses of multiple related phenotypes, we analyzed the subfractions jointly, rather than one at a time. Comparisons of our multivariate analyses with standard univariate analyses demonstrate that multivariate analyses can substantially increase power to detect associations. Software implementing our multivariate analysis methods is available at http://stephenslab.uchicago.edu/software.html.",2015-04-21 +22962483,ReLiance: a machine learning and literature-based prioritization of receptor--ligand pairings.,"

Motivation

The prediction of receptor-ligand pairings is an important area of research as intercellular communications are mediated by the successful interaction of these key proteins. As the exhaustive assaying of receptor-ligand pairs is impractical, a computational approach to predict pairings is necessary. We propose a workflow to carry out this interaction prediction task, using a text mining approach in conjunction with a state of the art prediction method, as well as a widely accessible and comprehensive dataset. Among several modern classifiers, random forests have been found to be the best at this prediction task. The training of this classifier was carried out using an experimentally validated dataset of Database of Ligand-Receptor Partners (DLRP) receptor-ligand pairs. New examples, co-cited with the training receptors and ligands, are then classified using the trained classifier. After applying our method, we find that we are able to successfully predict receptor-ligand pairs within the GPCR family with a balanced accuracy of 0.96. Upon further inspection, we find several supported interactions that were not present in the Database of Interacting Proteins (DIPdatabase). We have measured the balanced accuracy of our method resulting in high quality predictions stored in the available database ReLiance.

Availability

http://homes.esat.kuleuven.be/~bioiuser/ReLianceDB/index.php

Contact

yves.moreau@esat.kuleuven.be; ernesto.iacucci@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-01 +22230935,Database for crude drugs and Kampo medicine.,"A wiki-based repository for crude drugs and Kampo medicine is introduced. It provides taxonomic and chemical information for 158 crude drugs and 348 prescriptions of the traditional Kampo medicine in Japan, which is a variation of ancient Chinese medicine. The system is built on MediaWiki with extensions for inline page search and for sending user-input elements to the server. These functions together realize implementation of word checks and data integration at the user-level. In this scheme, any user can participate in creating an integrated database with controlled vocabularies on the wiki system. Our implementation and data are accessible at http://metabolomics.jp/wiki/.",2011-01-01 +26063822,Tensor methods for parameter estimation and bifurcation analysis of stochastic reaction networks.,"Stochastic modelling of gene regulatory networks provides an indispensable tool for understanding how random events at the molecular level influence cellular functions. A common challenge of stochastic models is to calibrate a large number of model parameters against the experimental data. Another difficulty is to study how the behaviour of a stochastic model depends on its parameters, i.e. whether a change in model parameters can lead to a significant qualitative change in model behaviour (bifurcation). In this paper, tensor-structured parametric analysis (TPA) is developed to address these computational challenges. It is based on recently proposed low-parametric tensor-structured representations of classical matrices and vectors. This approach enables simultaneous computation of the model properties for all parameter values within a parameter space. The TPA is illustrated by studying the parameter estimation, robustness, sensitivity and bifurcation structure in stochastic models of biochemical networks. A Matlab implementation of the TPA is available at http://www.stobifan.org.",2015-07-01 +22369265,i-rDNA: alignment-free algorithm for rapid in silico detection of ribosomal gene fragments from metagenomic sequence data sets.,"

Background

Obtaining accurate estimates of microbial diversity using rDNA profiling is the first step in most metagenomics projects. Consequently, most metagenomic projects spend considerable amounts of time, money and manpower for experimentally cloning, amplifying and sequencing the rDNA content in a metagenomic sample. In the second step, the entire genomic content of the metagenome is extracted, sequenced and analyzed. Since DNA sequences obtained in this second step also contain rDNA fragments, rapid in silico identification of these rDNA fragments would drastically reduce the cost, time and effort of current metagenomic projects by entirely bypassing the experimental steps of primer based rDNA amplification, cloning and sequencing. In this study, we present an algorithm called i-rDNA that can facilitate the rapid detection of 16S rDNA fragments from amongst millions of sequences in metagenomic data sets with high detection sensitivity.

Results

Performance evaluation with data sets/database variants simulating typical metagenomic scenarios indicates the significantly high detection sensitivity of i-rDNA. Moreover, i-rDNA can process a million sequences in less than an hour on a simple desktop with modest hardware specifications.

Conclusions

In addition to the speed of execution, high sensitivity and low false positive rate, the utility of the algorithmic approach discussed in this paper is immense given that it would help in bypassing the entire experimental step of primer-based rDNA amplification, cloning and sequencing. Application of this algorithmic approach would thus drastically reduce the cost, time and human efforts invested in all metagenomic projects.

Availability

A web-server for the i-rDNA algorithm is available at http://metagenomics.atc.tcs.com/i-rDNA/",2011-11-30 +21646335,"The Gene3D Web Services: a platform for identifying, annotating and comparing structural domains in protein sequences.","The Gene3D structural domain database provides domain annotations for 7 million proteins, based on the manually curated structural domain superfamilies in CATH. These annotations are integrated with functional, genomic and molecular information from external resources, such as GO, EC, UniProt and the NCBI Taxonomy database. We have constructed a set of web services that provide programmatic access to this integrated database, as well as the Gene3D domain recognition tool (Gene3DScan) and protein sequence annotation pipeline for analysing novel protein sequences. Example queries include retrieving all curated GO terms for a domain superfamily or all the multi-domain architectures for the human genome. The services can be accessed using simple HTTP calls and are able to return results in a range of formats for quick downloading and easy parsing, graphical rendering and data storage. Hence, they provide a simple, but flexible means of integrating domain annotations and associated data sets into locally run pipelines and analysis software. The services can be found at http://gene3d.biochem.ucl.ac.uk/WebServices/.",2011-06-06 +25776805,Rsite: a computational method to identify the functional sites of noncoding RNAs.,"There is an increasing demand for identifying the functional sites of noncoding RNAs (ncRNAs). Here we introduce a tertiary-structure based computational approach, Rsite, which first calculates the Euclidean distances between each nucleotide and all the other nucleotides in a RNA molecule and then determines the nucleotides that are the extreme points in the distance curve as the functional sites. By analyzing two ncRNAs, tRNA (Lys) and Diels-Alder ribozyme, we demonstrated the efficiency of Rsite. As a result, Rsite recognized all of the known functional sites of the two ncRNAs, suggesting that Rsite could be a potentially useful tool for discovering the functional sites of ncRNAs. The source codes and data sets of Rsite are available at http://www.cuilab.cn/rsite.",2015-03-17 +24586784,RNA CoMPASS: a dual approach for pathogen and host transcriptome analysis of RNA-seq datasets.,"High-throughput RNA sequencing (RNA-seq) has become an instrumental assay for the analysis of multiple aspects of an organism's transcriptome. Further, the analysis of a biological specimen's associated microbiome can also be performed using RNA-seq data and this application is gaining interest in the scientific community. There are many existing bioinformatics tools designed for analysis and visualization of transcriptome data. Despite the availability of an array of next generation sequencing (NGS) analysis tools, the analysis of RNA-seq data sets poses a challenge for many biomedical researchers who are not familiar with command-line tools. Here we present RNA CoMPASS, a comprehensive RNA-seq analysis pipeline for the simultaneous analysis of transcriptomes and metatranscriptomes from diverse biological specimens. RNA CoMPASS leverages existing tools and parallel computing technology to facilitate the analysis of even very large datasets. RNA CoMPASS has a web-based graphical user interface with intrinsic queuing to control a distributed computational pipeline. RNA CoMPASS was evaluated by analyzing RNA-seq data sets from 45 B-cell samples. Twenty-two of these samples were derived from lymphoblastoid cell lines (LCLs) generated by the infection of naïve B-cells with the Epstein Barr virus (EBV), while another 23 samples were derived from Burkitt's lymphomas (BL), some of which arose in part through infection with EBV. Appropriately, RNA CoMPASS identified EBV in all LCLs and in a fraction of the BLs. Cluster analysis of the human transcriptome component of the RNA CoMPASS output clearly separated the BLs (which have a germinal center-like phenotype) from the LCLs (which have a blast-like phenotype) with evidence of activated MYC signaling and lower interferon and NF-kB signaling in the BLs. Together, this analysis illustrates the utility of RNA CoMPASS in the simultaneous analysis of transcriptome and metatranscriptome data. RNA CoMPASS is freely available at http://rnacompass.sourceforge.net/.",2014-02-25 +26944778,"Gender Preference in the Sexual Attractions, Fantasies, and Relationships of Voluntarily Castrated Men.","

Introduction

Some men seek castration outside a clear medical need. This study explored how their sexuality changed after castration.

Aim

To explore changes in preferred gender(s) of sexual attraction, fantasy, and relationships in voluntarily castrated men with or without gonadal hormone therapy.

Methods

A questionnaire was posted at http://www.eunuch.org that yielded data on men who had been voluntarily castrated physically (n = 198) or chemically (n = 96).

Main outcome measures

Respondents were asked to report retrospectively on their sexuality, including their sexual activity and which gender(s) they were sexually attracted to, fantasized about, or had sexual relations with 6 months to 1 year before and after castration.

Results

A substantial proportion of men remained sexually active after castration; 37% had sex at least several times per week. Most respondents did not report a change in preferred gender(s) of attraction (65%, n = 181), fantasies (62%, n = 169), or sexual relationships (66%, n = 163), although approximately 20% to 30% of respondents did report such changes and 8% to 11% became non-sexual after castration. Respondents who were attracted to and fantasized about ""only men"" or who had sexual relationship with ""only women"" before castration were the least likely to report a change subsequent to castration. Respondents who were taking neither supplemental testosterone nor estrogen were more likely to report (i) becoming attracted to no one, (ii) fantasizing about no one, and (iii) becoming sexually inactive.

Conclusion

Sexual changes in voluntarily castrated men vary and can be influenced by various factors including the use of supplemental testosterone or estrogen therapy.",2016-03-01 +23044540,A novel missense-mutation-related feature extraction scheme for 'driver' mutation identification.,"

Motivation

It becomes widely accepted that human cancer is a disease involving dynamic changes in the genome and that the missense mutations constitute the bulk of human genetic variations. A multitude of computational algorithms, especially the machine learning-based ones, has consequently been proposed to distinguish missense changes that contribute to the cancer progression ('driver' mutation) from those that do not ('passenger' mutation). However, the existing methods have multifaceted shortcomings, in the sense that they either adopt incomplete feature space or depend on protein structural databases which are usually far from integrated.

Results

In this article, we investigated multiple aspects of a missense mutation and identified a novel feature space that well distinguishes cancer-associated driver mutations from passenger ones. An index (DX score) was proposed to evaluate the discriminating capability of each feature, and a subset of these features which ranks top was selected to build the SVM classifier. Cross-validation showed that the classifier trained on our selected features significantly outperforms the existing ones both in precision and robustness. We applied our method to several datasets of missense mutations culled from published database and literature and obtained more reasonable results than previous studies.

Availability

The software is available online at http://www.methodisthealth.com/software and https://sites.google.com/site/drivermutationidentification/.

Contact

xzhou@tmhs.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-07 +25860434,Re-alignment of the unmapped reads with base quality score.,"

Motivation

Based on the next generation genome sequencing technologies, a variety of biological applications are developed, while alignment is the first step once the sequencing reads are obtained. In recent years, many software tools have been developed to efficiently and accurately align short reads to the reference genome. However, there are still many reads that can't be mapped to the reference genome, due to the exceeding of allowable mismatches. Moreover, besides the unmapped reads, the reads with low mapping qualities are also excluded from the downstream analysis, such as variance calling. If we can take advantages of the confident segments of these reads, not only can the alignment rates be improved, but also more information will be provided for the downstream analysis.

Results

This paper proposes a method, called RAUR (Re-align the Unmapped Reads), to re-align the reads that can not be mapped by alignment tools. Firstly, it takes advantages of the base quality scores (reported by the sequencer) to figure out the most confident and informative segments of the unmapped reads by controlling the number of possible mismatches in the alignment. Then, combined with an alignment tool, RAUR re-align these segments of the reads. We run RAUR on both simulated data and real data with different read lengths. The results show that many reads which fail to be aligned by the most popular alignment tools (BWA and Bowtie2) can be correctly re-aligned by RAUR, with a similar Precision. Even compared with the BWA-MEM and the local mode of Bowtie2, which perform local alignment for long reads to improve the alignment rate, RAUR also shows advantages on the Alignment rate and Precision in some cases. Therefore, the trimming strategy used in RAUR is useful to improve the Alignment rate of alignment tools for the next-generation genome sequencing.

Availability

All source code are available at http://netlab.csu.edu.cn/bioinformatics/RAUR.html.",2015-03-18 +26834506,Semiparametric Estimation in the Secondary Analysis of Case-Control Studies.,"We study the regression relationship among covariates in case-control data, an area known as the secondary analysis of case-control studies. The context is such that only the form of the regression mean is specified, so that we allow an arbitrary regression error distribution, which can depend on the covariates and thus can be heteroscedastic. Under mild regularity conditions we establish the theoretical identifiability of such models. Previous work in this context has either (a) specified a fully parametric distribution for the regression errors, (b) specified a homoscedastic distribution for the regression errors, (c) has specified the rate of disease in the population (we refer this as true population), or (d) has made a rare disease approximation. We construct a class of semiparametric estimation procedures that rely on none of these. The estimators differ from the usual semiparametric ones in that they draw conclusions about the true population, while technically operating in a hypothetic superpopulation. We also construct estimators with a unique feature, in that they are robust against the misspecification of the regression error distribution in terms of variance structure, while all other nonparametric effects are estimated despite of the biased samples. We establish the asymptotic properties of the estimators and illustrate their finite sample performance through simulation studies, as well as through an empirical example on the relation between red meat consumption and heterocyclic amines. Our analysis verified the positive relationship between red meat consumption and two forms of HCA, indicating that increased red meat consumption leads to increased levels of MeIQA and PhiP, both being risk factors for colorectal cancer. Computer software as well as data to illustrate the methodology are available at http://wileyonlinelibrary.com/journal/rss-datasets.",2015-02-15 +26829645,EDISON-WMW: Exact Dynamic Programing Solution of the Wilcoxon-Mann-Whitney Test.,"In many research disciplines, hypothesis tests are applied to evaluate whether findings are statistically significant or could be explained by chance. The Wilcoxon-Mann-Whitney (WMW) test is among the most popular hypothesis tests in medicine and life science to analyze if two groups of samples are equally distributed. This nonparametric statistical homogeneity test is commonly applied in molecular diagnosis. Generally, the solution of the WMW test takes a high combinatorial effort for large sample cohorts containing a significant number of ties. Hence, P value is frequently approximated by a normal distribution. We developed EDISON-WMW, a new approach to calculate the exact permutation of the two-tailed unpaired WMW test without any corrections required and allowing for ties. The method relies on dynamic programing to solve the combinatorial problem of the WMW test efficiently. Beyond a straightforward implementation of the algorithm, we presented different optimization strategies and developed a parallel solution. Using our program, the exact P value for large cohorts containing more than 1000 samples with ties can be calculated within minutes. We demonstrate the performance of this novel approach on randomly-generated data, benchmark it against 13 other commonly-applied approaches and moreover evaluate molecular biomarkers for lung carcinoma and chronic obstructive pulmonary disease (COPD). We found that approximated P values were generally higher than the exact solution provided by EDISON-WMW. Importantly, the algorithm can also be applied to high-throughput omics datasets, where hundreds or thousands of features are included. To provide easy access to the multi-threaded version of EDISON-WMW, a web-based solution of our algorithm is freely available at http://www.ccb.uni-saarland.de/software/wtest/.",2016-01-29 +24982428,COSMOS: Python library for massively parallel workflows.,"

Summary

Efficient workflows to shepherd clinically generated genomic data through the multiple stages of a next-generation sequencing pipeline are of critical importance in translational biomedical science. Here we present COSMOS, a Python library for workflow management that allows formal description of pipelines and partitioning of jobs. In addition, it includes a user interface for tracking the progress of jobs, abstraction of the queuing system and fine-grained control over the workflow. Workflows can be created on traditional computing clusters as well as cloud-based services.

Availability and implementation

Source code is available for academic non-commercial research purposes. Links to code and documentation are provided at http://lpm.hms.harvard.edu and http://wall-lab.stanford.edu.

Contact

dpwall@stanford.edu or peter_tonellato@hms.harvard.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-30 +25051568,A flexible pinhole camera model for coherent nonuniform sampling.,"The flexible pinhole camera (FPC) allows flexible modulation of the sampling rate over the field of view. The FPC is defined by a viewpoint and a map specifying the sampling locations on the image plane. The map is constructed from known regions of interest with interactive and automatic approaches. The FPC provides inexpensive 3D projection that allows rendering complex datasets quickly, in feed-forward fashion, by projection followed by rasterization. The FPC supports many types of data, including image, height field, geometry, and volume data. The resulting image is a coherent nonuniform sampling (CoNUS) of the dataset that matches the local variation of the dataset's importance. CoNUS images have been successfully implemented for remote visualization, focus-plus-context visualization, and acceleration of expensive rendering effects such as surface geometric detail and specular reflection. A video explaining and demonstrating the FPC is at http://youtu.be/kvFe5XjOPNM.",2014-07-01 +21949797,"Tidying up international nucleotide sequence databases: ecological, geographical and sequence quality annotation of its sequences of mycorrhizal fungi.","Sequence analysis of the ribosomal RNA operon, particularly the internal transcribed spacer (ITS) region, provides a powerful tool for identification of mycorrhizal fungi. The sequence data deposited in the International Nucleotide Sequence Databases (INSD) are, however, unfiltered for quality and are often poorly annotated with metadata. To detect chimeric and low-quality sequences and assign the ectomycorrhizal fungi to phylogenetic lineages, fungal ITS sequences were downloaded from INSD, aligned within family-level groups, and examined through phylogenetic analyses and BLAST searches. By combining the fungal sequence database UNITE and the annotation and search tool PlutoF, we also added metadata from the literature to these accessions. Altogether 35,632 sequences belonged to mycorrhizal fungi or originated from ericoid and orchid mycorrhizal roots. Of these sequences, 677 were considered chimeric and 2,174 of low read quality. Information detailing country of collection, geographical coordinates, interacting taxon and isolation source were supplemented to cover 78.0%, 33.0%, 41.7% and 96.4% of the sequences, respectively. These annotated sequences are publicly available via UNITE (http://unite.ut.ee/) for downstream biogeographic, ecological and taxonomic analyses. In European Nucleotide Archive (ENA; http://www.ebi.ac.uk/ena/), the annotated sequences have a special link-out to UNITE. We intend to expand the data annotation to additional genes and all taxonomic groups and functional guilds of fungi.",2011-09-15 +26894674,"Fitmunk: improving protein structures by accurate, automatic modeling of side-chain conformations.","Improvements in crystallographic hardware and software have allowed automated structure-solution pipelines to approach a near-`one-click' experience for the initial determination of macromolecular structures. However, in many cases the resulting initial model requires a laborious, iterative process of refinement and validation. A new method has been developed for the automatic modeling of side-chain conformations that takes advantage of rotamer-prediction methods in a crystallographic context. The algorithm, which is based on deterministic dead-end elimination (DEE) theory, uses new dense conformer libraries and a hybrid energy function derived from experimental data and prior information about rotamer frequencies to find the optimal conformation of each side chain. In contrast to existing methods, which incorporate the electron-density term into protein-modeling frameworks, the proposed algorithm is designed to take advantage of the highly discriminatory nature of electron-density maps. This method has been implemented in the program Fitmunk, which uses extensive conformational sampling. This improves the accuracy of the modeling and makes it a versatile tool for crystallographic model building, refinement and validation. Fitmunk was extensively tested on over 115 new structures, as well as a subset of 1100 structures from the PDB. It is demonstrated that the ability of Fitmunk to model more than 95% of side chains accurately is beneficial for improving the quality of crystallographic protein models, especially at medium and low resolutions. Fitmunk can be used for model validation of existing structures and as a tool to assess whether side chains are modeled optimally or could be better fitted into electron density. Fitmunk is available as a web service at http://kniahini.med.virginia.edu/fitmunk/server/ or at http://fitmunk.bitbucket.org/.",2016-01-28 +24550818,A versatile software package for inter-subject correlation based analyses of fMRI.,"In the inter-subject correlation (ISC) based analysis of the functional magnetic resonance imaging (fMRI) data, the extent of shared processing across subjects during the experiment is determined by calculating correlation coefficients between the fMRI time series of the subjects in the corresponding brain locations. This implies that ISC can be used to analyze fMRI data without explicitly modeling the stimulus and thus ISC is a potential method to analyze fMRI data acquired under complex naturalistic stimuli. Despite of the suitability of ISC based approach to analyze complex fMRI data, no generic software tools have been made available for this purpose, limiting a widespread use of ISC based analysis techniques among neuroimaging community. In this paper, we present a graphical user interface (GUI) based software package, ISC Toolbox, implemented in Matlab for computing various ISC based analyses. Many advanced computations such as comparison of ISCs between different stimuli, time window ISC, and inter-subject phase synchronization are supported by the toolbox. The analyses are coupled with re-sampling based statistical inference. The ISC based analyses are data and computation intensive and the ISC toolbox is equipped with mechanisms to execute the parallel computations in a cluster environment automatically and with an automatic detection of the cluster environment in use. Currently, SGE-based (Oracle Grid Engine, Son of a Grid Engine, or Open Grid Scheduler) and Slurm environments are supported. In this paper, we present a detailed account on the methods behind the ISC Toolbox, the implementation of the toolbox and demonstrate the possible use of the toolbox by summarizing selected example applications. We also report the computation time experiments both using a single desktop computer and two grid environments demonstrating that parallelization effectively reduces the computing time. The ISC Toolbox is available in https://code.google.com/p/isc-toolbox/",2014-01-31 +22753137,ALSoD: A user-friendly online bioinformatics tool for amyotrophic lateral sclerosis genetics.,"Amyotrophic lateral sclerosis (ALS) is the commonest adult onset motor neuron disease, with a peak age of onset in the seventh decade. With advances in genetic technology, there is an enormous increase in the volume of genetic data produced, and a corresponding need for storage, analysis, and interpretation, particularly as our understanding of the relationships between genotype and phenotype mature. Here, we present a system to enable this in the form of the ALS Online Database (ALSoD at http://alsod.iop.kcl.ac.uk), a freely available database that has been transformed from a single gene storage facility recording mutations in the SOD1 gene to a multigene ALS bioinformatics repository and analytical instrument combining genotype, phenotype, and geographical information with associated analysis tools. These include a comparison tool to evaluate genes side by side or jointly with user configurable features, a pathogenicity prediction tool using a combination of computational approaches to distinguish variants with nonfunctional characteristics from disease-associated mutations with more dangerous consequences, and a credibility tool to enable ALS researchers to objectively assess the evidence for gene causation in ALS. Furthermore, integration of external tools, systems for feedback, annotation by users, and two-way links to collaborators hosting complementary databases further enhance the functionality of ALSoD.",2012-07-16 +21200033,In the clinic. Transient ischemic attack.,"This issue provides a clinical overview of transient ischemic attack focusing on prevention, diagnosis, treatment, practice improvement, and patient information. Readers can complete the accompanying CME quiz for 1.5 credits. Only ACP members and individual subscribers can access the electronic features of In the Clinic. Non-subscribers who wish to access this issue of In the Clinic can elect ""Pay for View."" Subscribers can receive 1.5 category 1 CME credits by completing the CME quiz that accompanies this issue of In the Clinic. The content of In the Clinic is drawn from the clinical information and education resources of the American College of Physicians (ACP), including PIER (Physicians' Information and Education Resource) and MKSAP (Medical Knowledge and Self Assessment Program). Annals of Internal Medicine editors develop In the Clinic from these primary sources in collaboration with the ACP's Medical Education and Publishing division and with assistance of science writers and physician writers. Editorial consultants from PIER and MKSAP provide expert review of the content. Readers who are interested in these primary resources for more detail can consult www.acponline.org, http://pier.acponline.org, and other resources referenced within each issue of In the Clinic.",2011-01-01 +26563468,CVTree3 Web Server for Whole-genome-based and Alignment-free Prokaryotic Phylogeny and Taxonomy.,"A faithful phylogeny and an objective taxonomy for prokaryotes should agree with each other and ultimately follow the genome data. With the number of sequenced genomes reaching tens of thousands, both tree inference and detailed comparison with taxonomy are great challenges. We now provide one solution in the latest Release 3.0 of the alignment-free and whole-genome-based web server CVTree3. The server resides in a cluster of 64 cores and is equipped with an interactive, collapsible, and expandable tree display. It is capable of comparing the tree branching order with prokaryotic classification at all taxonomic ranks from domains down to species and strains. CVTree3 allows for inquiry by taxon names and trial on lineage modifications. In addition, it reports a summary of monophyletic and non-monophyletic taxa at all ranks as well as produces print-quality subtree figures. After giving an overview of retrospective verification of the CVTree approach, the power of the new server is described for the mega-classification of prokaryotes and determination of taxonomic placement of some newly-sequenced genomes. A few discrepancies between CVTree and 16S rRNA analyses are also summarized with regard to possible taxonomic revisions. CVTree3 is freely accessible to all users at http://tlife.fudan.edu.cn/cvtree3/ without login requirements.",2015-10-01 +22923304,Qualitative translation of relations from BioPAX to SBML qual.,"

Motivation

The biological pathway exchange language (BioPAX) and the systems biology markup language (SBML) belong to the most popular modeling and data exchange languages in systems biology. The focus of SBML is quantitative modeling and dynamic simulation of models, whereas the BioPAX specification concentrates mainly on visualization and qualitative analysis of pathway maps. BioPAX describes reactions and relations. In contrast, SBML core exclusively describes quantitative processes such as reactions. With the SBML qualitative models extension (qual), it has recently also become possible to describe relations in SBML. Before the development of SBML qual, relations could not be properly translated into SBML. Until now, there exists no BioPAX to SBML converter that is fully capable of translating both reactions and relations.

Results

The entire nature pathway interaction database has been converted from BioPAX (Level 2 and Level 3) into SBML (Level 3 Version 1) including both reactions and relations by using the new qual extension package. Additionally, we present the new webtool BioPAX2SBML for further BioPAX to SBML conversions. Compared with previous conversion tools, BioPAX2SBML is more comprehensive, more robust and more exact.

Availability

BioPAX2SBML is freely available at http://webservices.cs.uni-tuebingen.de/ and the complete collection of the PID models is available at http://www.cogsys.cs.uni-tuebingen.de/downloads/Qualitative-Models/.",2012-08-24 +25885226,Decreased functional activity of multidrug resistance protein in primary colorectal cancer.,"

Background

The ATP-Binding Cassette (ABC)-transporter MultiDrug Resistance Protein 1 (MDR1) and Multidrug Resistance Related Protein 1 (MRP1) are expressed on the surface of enterocytes, which has led to the belief that these high capacity transporters are responsible for modulating chemosensitvity of colorectal cancer. Several immunohistochemistry and reverse transcription polymerase chain reaction (RT-PCR) studies have provided controversial results in regards to the expression levels of these two ABC-transporters in colorectal cancer. Our study was designed to determine the yet uninvestigated functional activity of MDR1 and MRP1 transporters in normal human enterocytes compared to colorectal cancer cells from surgical biopsies.

Methods

100 colorectal cancer and 28 adjacent healthy mucosa samples were obtained by intraoperative surgical sampling. Activity of MDR1 and MRP1 of viable epithelial and cancer cells were determined separately with the modified calcein-assay for multidrug resistance activity and sufficient data of 73 cancer and 11 healthy mucosa was analyzed statistically.

Results

Significantly decreased mean MDR1 activity was found in primary colorectal cancer samples compared to normal mucosa, while mean MRP1 activity showed no significant change. Functional activity was not affected by gender, age, stage or grade and localization of the tumor.

Conclusion

We found lower MDR activity in cancer cells versus adjacent, apparently, healthy control tissue, thus, contrary to general belief, MDR activity seems not to play a major role in primary drug resistance, but might rather explain preferential/selective activity of Irinotecan and/or Oxaliplatin. Still, this picture might be more complex since chemotherapy by itself might alter MDR activity, and furthermore, today limited data is available about MDR activity of cancer stem cells in colorectal cancers.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1675739129145824.",2015-04-16 +24753412,Robustly detecting differential expression in RNA sequencing data using observation weights.,"A popular approach for comparing gene expression levels between (replicated) conditions of RNA sequencing data relies on counting reads that map to features of interest. Within such count-based methods, many flexible and advanced statistical approaches now exist and offer the ability to adjust for covariates (e.g. batch effects). Often, these methods include some sort of 'sharing of information' across features to improve inferences in small samples. It is important to achieve an appropriate tradeoff between statistical power and protection against outliers. Here, we study the robustness of existing approaches for count-based differential expression analysis and propose a new strategy based on observation weights that can be used within existing frameworks. The results suggest that outliers can have a global effect on differential analyses. We demonstrate the effectiveness of our new approach with real data and simulated data that reflects properties of real datasets (e.g. dispersion-mean trend) and develop an extensible framework for comprehensive testing of current and future methods. In addition, we explore the origin of such outliers, in some cases highlighting additional biological or technical factors within the experiment. Further details can be downloaded from the project website: http://imlspenticton.uzh.ch/robinson_lab/edgeR_robust/.",2014-04-20 +23430897,"Identification of 11 Novel Homogentisate 1,2 Dioxygenase Variants in Alkaptonuria Patients and Establishment of a Novel LOVD-Based HGD Mutation Database.","Enzymatic loss in alkaptonuria (AKU), an autosomal recessive disorder, is caused by mutations in the homogentisate 1,2 dioxygenase (HGD) gene, which decrease or completely inactivate the function of the HGD protein to metabolize homogentisic acid (HGA). AKU shows a very low prevalence (1:100,000-250,000) in most ethnic groups, but there are countries with much higher incidence, such as Slovakia and the Dominican Republic. In this work, we report 11 novel HGD mutations identified during analysis of 36 AKU patients and 41 family members from 27 families originating from 9 different countries, mainly from Slovakia and France. In Slovak patients, we identified two additional mutations, thus a total number of HGD mutations identified in this small country is 12. In order to record AKU-causing mutations and variants of the HGD gene, we have created a HGD mutation database that is open for future submissions and is available online ( http://hgddatabase.cvtisr.sk/ ). It is founded on the Leiden Open (source) Variation Database (LOVD) system and includes data from the original AKU database ( http://www.alkaptonuria.cib.csic.es ) and also all so far reported variants and AKU patients. Where available, HGD-haplotypes associated with the mutations are also presented. Currently, this database contains 148 unique variants, of which 115 are reported pathogenic mutations. It provides a valuable tool for information exchange in AKU research and care fields and certainly presents a useful data source for genotype-phenotype correlations and also for future clinical trials.",2011-10-20 +25708928,Identifying DNA-binding proteins by combining support vector machine and PSSM distance transformation.,"

Background

DNA-binding proteins play a pivotal role in various intra- and extra-cellular activities ranging from DNA replication to gene expression control. Identification of DNA-binding proteins is one of the major challenges in the field of genome annotation. There have been several computational methods proposed in the literature to deal with the DNA-binding protein identification. However, most of them can't provide an invaluable knowledge base for our understanding of DNA-protein interactions.

Results

We firstly presented a new protein sequence encoding method called PSSM Distance Transformation, and then constructed a DNA-binding protein identification method (SVM-PSSM-DT) by combining PSSM Distance Transformation with support vector machine (SVM). First, the PSSM profiles are generated by using the PSI-BLAST program to search the non-redundant (NR) database. Next, the PSSM profiles are transformed into uniform numeric representations appropriately by distance transformation scheme. Lastly, the resulting uniform numeric representations are inputted into a SVM classifier for prediction. Thus whether a sequence can bind to DNA or not can be determined. In benchmark test on 525 DNA-binding and 550 non DNA-binding proteins using jackknife validation, the present model achieved an ACC of 79.96%, MCC of 0.622 and AUC of 86.50%. This performance is considerably better than most of the existing state-of-the-art predictive methods. When tested on a recently constructed independent dataset PDB186, SVM-PSSM-DT also achieved the best performance with ACC of 80.00%, MCC of 0.647 and AUC of 87.40%, and outperformed some existing state-of-the-art methods.

Conclusions

The experiment results demonstrate that PSSM Distance Transformation is an available protein sequence encoding method and SVM-PSSM-DT is a useful tool for identifying the DNA-binding proteins. A user-friendly web-server of SVM-PSSM-DT was constructed, which is freely accessible to the public at the web-site on http://bioinformatics.hitsz.edu.cn/PSSM-DT/.",2015-02-06 +25659145,UFSRAT: Ultra-fast Shape Recognition with Atom Types--the discovery of novel bioactive small molecular scaffolds for FKBP12 and 11βHSD1.,"

Motivation

Using molecular similarity to discover bioactive small molecules with novel chemical scaffolds can be computationally demanding. We describe Ultra-fast Shape Recognition with Atom Types (UFSRAT), an efficient algorithm that considers both the 3D distribution (shape) and electrostatics of atoms to score and retrieve molecules capable of making similar interactions to those of the supplied query.

Results

Computational optimization and pre-calculation of molecular descriptors enables a query molecule to be run against a database containing 3.8 million molecules and results returned in under 10 seconds on modest hardware. UFSRAT has been used in pipelines to identify bioactive molecules for two clinically relevant drug targets; FK506-Binding Protein 12 and 11β-hydroxysteroid dehydrogenase type 1. In the case of FK506-Binding Protein 12, UFSRAT was used as the first step in a structure-based virtual screening pipeline, yielding many actives, of which the most active shows a KD, app of 281 µM and contains a substructure present in the query compound. Success was also achieved running solely the UFSRAT technique to identify new actives for 11β-hydroxysteroid dehydrogenase type 1, for which the most active displays an IC50 of 67 nM in a cell based assay and contains a substructure radically different to the query. This demonstrates the valuable ability of the UFSRAT algorithm to perform scaffold hops.

Availability and implementation

A web-based implementation of the algorithm is freely available at http://opus.bch.ed.ac.uk/ufsrat/.",2015-02-06 +26351566,Tentacle: distributed quantification of genes in metagenomes.,"

Background

In metagenomics, microbial communities are sequenced at increasingly high resolution, generating datasets with billions of DNA fragments. Novel methods that can efficiently process the growing volumes of sequence data are necessary for the accurate analysis and interpretation of existing and upcoming metagenomes.

Findings

Here we present Tentacle, which is a novel framework that uses distributed computational resources for gene quantification in metagenomes. Tentacle is implemented using a dynamic master-worker approach in which DNA fragments are streamed via a network and processed in parallel on worker nodes. Tentacle is modular, extensible, and comes with support for six commonly used sequence aligners. It is easy to adapt Tentacle to different applications in metagenomics and easy to integrate into existing workflows.

Conclusions

Evaluations show that Tentacle scales very well with increasing computing resources. We illustrate the versatility of Tentacle on three different use cases. Tentacle is written for Linux in Python 2.7 and is published as open source under the GNU General Public License (v3). Documentation, tutorials, installation instructions, and the source code are freely available online at: http://bioinformatics.math.chalmers.se/tentacle.",2015-09-07 +24753421,Deciphering key features in protein structures with the new ENDscript server.,"ENDscript 2 is a friendly Web server for extracting and rendering a comprehensive analysis of primary to quaternary protein structure information in an automated way. This major upgrade has been fully re-engineered to enhance speed, accuracy and usability with interactive 3D visualization. It takes advantage of the new version 3 of ESPript, our well-known sequence alignment renderer, improved to handle a large number of data with reduced computation time. From a single PDB entry or file, ENDscript produces high quality figures displaying multiple sequence alignment of proteins homologous to the query, colored according to residue conservation. Furthermore, the experimental secondary structure elements and a detailed set of relevant biophysical and structural data are depicted. All this information and more are now mapped on interactive 3D PyMOL representations. Thanks to its adaptive and rigorous algorithm, beginner to expert users can modify settings to fine-tune ENDscript to their needs. ENDscript has also been upgraded as an open platform for the visualization of multiple biochemical and structural data coming from external biotool Web servers, with both 2D and 3D representations. ENDscript 2 and ESPript 3 are freely available at http://endscript.ibcp.fr and http://espript.ibcp.fr, respectively.",2014-04-21 +25685613,The UCSC Ebola Genome Portal. ,"With the Ebola epidemic raging out of control in West Africa, there has been a flurry of research into the Ebola virus, resulting in the generation of much genomic data. In response to the clear need for tools that integrate multiple strands of research around molecular sequences, we have created the University of California Santa Cruz (UCSC) Ebola Genome Browser, an adaptation of our popular UCSC Genome Browser web tool, which can be used to view the Ebola virus genome sequence from GenBank and nearly 30 annotation tracks generated by mapping external data to the reference sequence. Significant annotations include a multiple alignment comprising 102 Ebola genomes from the current outbreak, 56 from previous outbreaks, and 2 Marburg genomes as an outgroup; a gene track curated by NCBI; protein annotations curated by UniProt and antibody-binding epitopes curated by IEDB. We have extended the Genome Browser's multiple alignment color-coding scheme to distinguish mutations resulting from non-synonymous coding changes, synonymous changes, or changes in untranslated regions. Our Ebola Genome portal at http://genome.ucsc.edu/ebolaPortal/ links to the Ebola virus Genome Browser and an aggregate of useful information, including a collection of Ebola antibodies we are curating.",2014-11-07 +22268964,ProBiS-database: precalculated binding site similarities and local pairwise alignments of PDB structures.,"ProBiS-Database is a searchable repository of precalculated local structural alignments in proteins detected by the ProBiS algorithm in the Protein Data Bank. Identification of functionally important binding regions of the protein is facilitated by structural similarity scores mapped to the query protein structure. PDB structures that have been aligned with a query protein may be rapidly retrieved from the ProBiS-Database, which is thus able to generate hypotheses concerning the roles of uncharacterized proteins. Presented with uncharacterized protein structure, ProBiS-Database can discern relationships between such a query protein and other better known proteins in the PDB. Fast access and a user-friendly graphical interface promote easy exploration of this database of over 420 million local structural alignments. The ProBiS-Database is updated weekly and is freely available online at http://probis.cmm.ki.si/database.",2012-02-07 +25380958,Development of a robust classifier for quality control of reverse-phase protein arrays.,"

Motivation

High-throughput reverse-phase protein array (RPPA) technology allows for the parallel measurement of protein expression levels in approximately 1000 samples. However, the many steps required in the complex protocol (sample lysate preparation, slide printing, hybridization, washing and amplified detection) may create substantial variability in data quality. We are not aware of any other quality control algorithm that is tuned to the special characteristics of RPPAs.

Results

We have developed a novel classifier for quality control of RPPA experiments using a generalized linear model and logistic function. The outcome of the classifier, ranging from 0 to 1, is defined as the probability that a slide is of good quality. After training, we tested the classifier using two independent validation datasets. We conclude that the classifier can distinguish RPPA slides of good quality from those of poor quality sufficiently well such that normalization schemes, protein expression patterns and advanced biological analyses will not be drastically impacted by erroneous measurements or systematic variations.

Availability and implementation

The classifier, implemented in the ""SuperCurve"" R package, can be freely downloaded at http://bioinformatics.mdanderson.org/main/OOMPA:Overview or http://r-forge.r-project.org/projects/supercurve/. The data used to develop and validate the classifier are available at http://bioinformatics.mdanderson.org/MOAR.",2014-11-06 +24438387,The epidemiology ontology: an ontology for the semantic annotation of epidemiological resources.,"

Background

Epidemiology is a data-intensive and multi-disciplinary subject, where data integration, curation and sharing are becoming increasingly relevant, given its global context and time constraints. The semantic annotation of epidemiology resources is a cornerstone to effectively support such activities. Although several ontologies cover some of the subdomains of epidemiology, we identified a lack of semantic resources for epidemiology-specific terms. This paper addresses this need by proposing the Epidemiology Ontology (EPO) and by describing its integration with other related ontologies into a semantic enabled platform for sharing epidemiology resources.

Results

The EPO follows the OBO Foundry guidelines and uses the Basic Formal Ontology (BFO) as an upper ontology. The first version of EPO models several epidemiology and demography parameters as well as transmission of infection processes, participants and related procedures. It currently has nearly 200 classes and is designed to support the semantic annotation of epidemiology resources and data integration, as well as information retrieval and knowledge discovery activities.

Conclusions

EPO is under active development and is freely available at https://code.google.com/p/epidemiology-ontology/. We believe that the annotation of epidemiology resources with EPO will help researchers to gain a better understanding of global epidemiological events by enhancing data integration and sharing.",2014-01-17 +26235613,Onset and durability of pain relief in knee osteoarthritis: pooled results from two placebo trials of naproxen/esomeprazole combination and celecoxib.,"

Objective

To further characterize time-to-first pain relief, effect size, correlations between various outcome measures and durability of relief for single-tablet naproxen 500 mg/esomeprazole 20 mg (NAP/ESO) given twice daily and celecoxib (CEL) (200 mg) given once daily versus placebo in knee osteoarthritis (OA).

Methods

Unpublished data from two double-blind, double-dummy, placebo-controlled trials in which patients aged ≥50 years with knee OA were randomized to NAP/ESO (n = 487), CEL (n = 486) or placebo (n = 246) were pooled (NCT00664560 and NCT00665431). Acute response endpoints: 1) Time to first significant pain response, 2) Western Ontario and McMaster Osteoarthritis Index (WOMAC) pain subscale and 3) American Pain Society Patient Outcome Questionnaire (APS-POQ) scores. Sustainability endpoints: 1) Routine Assessment of Patient Index Data (RAPID3) and 2) WOMAC Stiffness, Pain and Total scores; and Patient Global Assessment (PGA) at 6 and 12 weeks. Effect sizes for all measures were calculated. Rescue pain medication use also was analyzed, as was the correlation of WOMAC to RAPID3.

Results

NAP/ESO produced statistically significant decreases in WOMAC Pain on Days 2-7 and at Weeks 6 and 12 (all p < 0.05); most APS-POQ pain assessments with NAP/ESO were significantly improved on Days 2-7 compared with placebo (all p < 0.05). A good or excellent response occurred in a median of 6 days. RAPID3 and WOMAC total/stiffness/function/PGA scores decreased significantly at Weeks 6 and 12 (all p < 0.05). Placebo-adjusted WOMAC pain effect sizes were 0.44, 0.34 and 0.25 at Day 7, week 6 and week 12, respectively. RAPID3 to WOMAC total and WOMAC pain to RAPID3: Pain scores were highly correlated at 6 and 12 weeks (correlation coefficients >0.80). No significant differences in overall responses were found between CEL and NAP/ESO.

Conclusion

Naproxen/esomeprazole produced a significant absolute moderate early pain response, which was maintained for 12 weeks. RAPID3 was found to be highly correlated with the typical OA measure (WOMAC) and might be a useful clinical tool for measuring NSAID response. NCT00664560: https://clinicaltrials.gov/ct2/show/NCT00664560, NCT00665431: https://www.clinicaltrials.gov/ct2/show/NCT00665431.",2015-08-03 +25191639,Rapid publication-ready MS-Word tables for one-way ANOVA.,"

Background

Statistical tables are an important component of data analysis and reports in biological sciences. However, the traditional manual processes for computation and presentation of statistically significant results using a letter-based algorithm are tedious and prone to errors.

Results

Based on the R language, we present two web-based software for individual and summary data, freely available online, at http://shiny.stat.tamu.edu:3838/hassaad/Table_report1/ and http://shiny.stat.tamu.edu:3838/hassaad/SumAOV1/, respectively. The software are capable of rapidly generating publication-ready tables containing one-way analysis of variance (ANOVA) results. No download is required. Additionally, the software can perform multiple comparisons of means using the Duncan, Student-Newman-Keuls, Tukey Kramer, and Fisher's least significant difference (LSD) tests. If the LSD test is selected, multiple methods (e.g., Bonferroni and Holm) are available for adjusting p-values. Using the software, the procedures of ANOVA can be completed within seconds using a web-browser, preferably Mozilla Firefox or Google Chrome, and a few mouse clicks. Furthermore, the software can handle one-way ANOVA for summary data (i.e. sample size, mean, and SD or SEM per treatment group) with post-hoc multiple comparisons among treatment means. To our awareness, none of the currently available commercial (e.g., SPSS and SAS) or open-source software (e.g., R and Python) can perform such a rapid task without advanced knowledge of the corresponding programming language.

Conclusions

Our new and user-friendly software to perform statistical analysis and generate publication-ready MS-Word tables for one-way ANOVA are expected to facilitate research in agriculture, biomedicine, and other fields of life sciences.",2014-08-27 +21233089,The Biomolecular Interaction Network Database in PSI-MI 2.5.,"The Biomolecular Interaction Network Database (BIND) is a major source of curated biomolecular interactions, which has been unmaintained for the last few years, a trend which will eventually result in the loss of a significant amount of unique biomolecular interaction information, mostly as database identifiers become out of date. To help reverse this trend, we converted BIND to a standard format, Proteomics Standard Initiative-Molecular Interaction 2.5, starting from the last curated data release (from 2005) available in a custom XML format and made the core components (interactions and complexes) plus additional valuable curated information available for download (http://download.baderlab.org/BINDTranslation/). Major work during the conversion process was required to update out of date molecule identifiers resulting in a more comprehensive conversion of BIND, by measures including number of species and interactor types covered, than what is currently accessible elsewhere. This work also highlights issues of data modeling, controlled vocabulary adoption and data cleaning that can serve as a general case study on the future compatibility of interaction databases. Database URL: http://download.baderlab.org/BINDTranslation/",2011-01-12 +24694167,NEV2lkit: a new open source tool for handling neuronal event files from multi-electrode recordings.,"The analysis and discrimination of action potentials, or ""spikes"", is a central issue to systems neuroscience research. Here we introduce a free open source software for the analysis and discrimination of neural spikes based on principal component analysis and different clustering algorithms. The main objective is to supply a friendly user interface that links the experimental data to a basic set of routines for analysis, visualization and classification of spikes in a consistent framework. The tool has been tested on artificial data sets, on multi-electrode extracellular recordings from ganglion cell populations in isolated superfused mouse, rabbit and turtle retinas, and on electrophysiological recordings from mouse visual cortex. Our results show that NEV2lkit is very reliable and able to satisfy the experimental demands in terms of accuracy, efficiency and consistency across experiments. It performs fast unit sorting in single or multiple experiments and allows the extraction of spikes from over large time intervals in continuously recorded data streams. The tool is implemented in C++ and runs cross-platform on Linux, OS X and Windows systems. To facilitate the adaptation and extension as well as the addition of new routines, tools and algorithms for data analysis, the source code, binary distributions for different operating systems and documentation are all freely available at http://nev2lkit.sourceforge.net .",2013-12-29 +26348538,Prediction of protein disorder on amino acid substitutions.,"Intrinsically disordered regions of proteins are known to have many functional roles in cell signaling and regulatory pathways. The altered expression of these proteins due to mutations is associated with various diseases. Currently, most of the available methods focus on predicting the disordered proteins or the disordered regions in a protein. On the other hand, methods developed for predicting protein disorder on mutation showed a poor performance with a maximum accuracy of 70%. Hence, in this work, we have developed a novel method to classify the disorder-related amino acid substitutions using amino acid properties, substitution matrices, and the effect of neighboring residues that showed an accuracy of 90.0% with a sensitivity and specificity of 94.9 and 80.6%, respectively, in 10-fold cross-validation. The method was evaluated with a test set of 20% data using 10 iterations, which showed an average accuracy of 88.9%. Furthermore, we systematically analyzed the features responsible for the better performance of our method and observed that neighboring residues play an important role in defining the disorder of a given residue in a protein sequence. We have developed a prediction server to identify disorder-related mutations, and it is available at http://www.iitm.ac.in/bioinfo/DIM_Pred/.",2015-09-06 +26015273,A statistical framework to predict functional non-coding regions in the human genome through integrated analysis of annotation data.,"Identifying functional regions in the human genome is a major goal in human genetics. Great efforts have been made to functionally annotate the human genome either through computational predictions, such as genomic conservation, or high-throughput experiments, such as the ENCODE project. These efforts have resulted in a rich collection of functional annotation data of diverse types that need to be jointly analyzed for integrated interpretation and annotation. Here we present GenoCanyon, a whole-genome annotation method that performs unsupervised statistical learning using 22 computational and experimental annotations thereby inferring the functional potential of each position in the human genome. With GenoCanyon, we are able to predict many of the known functional regions. The ability of predicting functional regions as well as its generalizable statistical framework makes GenoCanyon a unique and powerful tool for whole-genome annotation. The GenoCanyon web server is available at http://genocanyon.med.yale.edu.",2015-05-27 +21398669,UniCarb-DB: a database resource for glycomic discovery.,"

Unlabelled

Glycosylation is one of the most important post-translational modifications of proteins, known to be involved in pathogen recognition, innate immune response and protection of epithelial membranes. However, when compared to the tools and databases available for the processing of high-throughput proteomic data, the glycomic domain is severely lacking. While tools to assist the analysis of mass spectrometry (MS) and HPLC are continuously improving, there are few resources available to support liquid chromatography (LC)-MS/MS techniques for glycan structure profiling. Here, we present a platform for presenting oligosaccharide structures and fragment data characterized by LC-MS/MS strategies. The database is annotated with high-quality datasets and is designed to extend and reinforce those standards and ontologies developed by existing glycomics databases.

Availability

http://www.unicarb-db.org",2011-03-12 +26342230,"Computational modeling of development by epithelia, mesenchyme and their interactions: a unified model.","

Motivation

The transformation of the embryo during development requires complex gene networks, cell signaling and gene-regulated cell behaviors (division, adhesion, polarization, apoptosis, contraction, extracellular matrix secretion, signal secretion and reception, etc.). There are several models of development implementing these phenomena, but none considers at the same time the very different bio-mechanical properties of epithelia, mesenchyme, extracellular matrix and their interactions.

Results

Here, we present a new computational model and accompanying open-source software, EmbryoMaker, that allows the user to simulate custom developmental processes by designing custom gene networks capable of regulating cell signaling and all animal basic cell behaviors. We also include an editor to implement different initial conditions, mutations and experimental manipulations. We show the applicability of the model by simulating several complex examples of animal development.

Availability and implementation

The source code can be downloaded from: http://www.biocenter.helsinki.fi/salazar/software.html.

Contact

isalazar@mappi.helsinki.fi

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-05 +26553056,Sequence information gain based motif analysis.,"

Background

The detection of regulatory regions in candidate sequences is essential for the understanding of the regulation of a particular gene and the mechanisms involved. This paper proposes a novel methodology based on information theoretic metrics for finding regulatory sequences in promoter regions.

Results

This methodology (SIGMA) has been tested on genomic sequence data for Homo sapiens and Mus musculus. SIGMA has been compared with different publicly available alternatives for motif detection, such as MEME/MAST, Biostrings (Bioconductor package), MotifRegressor, and previous work such Qresiduals projections or information theoretic based detectors. Comparative results, in the form of Receiver Operating Characteristic curves, show how, in 70% of the studied Transcription Factor Binding Sites, the SIGMA detector has a better performance and behaves more robustly than the methods compared, while having a similar computational time. The performance of SIGMA can be explained by its parametric simplicity in the modelling of the non-linear co-variability in the binding motif positions.

Conclusions

Sequence Information Gain based Motif Analysis is a generalisation of a non-linear model of the cis-regulatory sequences detection based on Information Theory. This generalisation allows us to detect transcription factor binding sites with maximum performance disregarding the covariability observed in the positions of the training set of sequences. SIGMA is freely available to the public at http://b2slab.upc.edu.",2015-11-09 +22161423,Face-down positioning or posturing after macular hole surgery.,"

Background

Macular holes cause significant loss of central vision. With the aim of improving the outcome of surgery, a variable period of face-down positioning may be advised.

Objectives

To evaluate the evidence of the impact of postoperative face-down positioning on the outcome of surgery for macular hole.

Search methods

We searched CENTRAL (which contains the Cochrane Eyes and Vision Group Trials Register) (The Cochrane Library 2011, Issue 8), MEDLINE (January 1950 to August 2011), EMBASE (January 1980 to August 2011), the International Standard Randomised Controlled Trial Number Register (ISRCTN Register) (http://www.controlled-trials.com), the WHO International Clinical Trials Registry Platform (ICTRP) (http://www.who.int/ictrp/search/en) and ClinicalTrials.gov (http://clinicaltrials.gov). There were no date or language restrictions in the electronic searches for trials. The electronic databases were last searched on 29 August 2011.

Selection criteria

We included randomised controlled trials (RCTs) in which postoperative face-down positioning was compared to no face-down positioning following surgery for macular holes.

Data collection and analysis

Data were collected and analysed independently by two authors.

Main results

Three RCTs were identified, A, B and C; one of which was unpublished data. We were unable to conduct a meta-analysis due to study heterogeneity regarding duration of face-down positioning and surgical methods (use of inner limiting peel). All three studies suggested an overall beneficial effect of posturing in terms of closure of holes: (A: risk ratio (RR) 1.10; 95% confidence interval (CI) 1.00 to 1.20, P = 0.05); B: RR 1.58, CI 1.0 to 2.5, P = 0.01; C: RR 1.03, CI 0.9 to 1.17, P = 0.67). For holes which were smaller than 400 microns in size, all three studies reported that there was no significant effect of face-down positioning on successful hole closure (A: RR 1.03, CI 0.95 to 1.12; B: RR 1.0, CI 0.68 to 1.46; C: RR 1.03, CI 0.9 to 1.17). However, for holes which were larger than 400 microns in size, both of the studies which examined macular holes of this size agreed on the effectiveness of face-down positioning on hole closure following surgery (A: RR 1.2, CI 1.01 to 1.42, P = 0.04; B: RR 2.27, CI 1.04 to 4.97, P = 0.04).

Authors' conclusions

There is currently insufficient evidence from which to draw firm conclusions about the impact of postoperative face-down positioning on the outcome of surgery for macular hole. Of three RCTs, two suggested a benefit in larger holes but none demonstrated evidence of a benefit in smaller holes.CONSORT adherent RCTs and large scale, well designed non-randomised observational studies are needed to determine with confidence the value of this intervention.",2011-12-07 +25651832,Pulp and plaque microbiotas of children with severe early childhood caries.,"

Background and objective

Bacterial invasion into pulps of primary teeth can lead to infection and premature tooth loss in children. This pilot study aimed to explore whether the microbiota of carious exposures of dental pulps resembles that of carious dentin or that of infected root canals.

Design

Children with severe early childhood caries were studied. Children were consented and extent of caries, plaque, and gingivitis measured. Bacteria were sampled from carious lesion biofilms and vital carious exposures of pulps, and processed by anaerobic culture. Isolates were characterized from partial sequences of the 16S rRNA gene and identified by comparison with taxa in the Human Oral Microbiome Database (http://www.HOMD.org). The microbiotas of carious lesions and dental pulps were compared using univariate and multivariate approaches.

Results

The microbiota of cariously exposed pulps was similar in composition to that of carious lesion biofilms except that fewer species/taxa were identified from pulps. The major taxa identified belonged to the phyla Firmicutes (mainly streptococci) and Actinobacteria (mainly Actinomyces species). Actinomyces and Selenomonas species were associated with carious lesions whereas Veillonella species, particularly Veillonella dispar was associated with pulps. Other bacteria detected in pulps included Streptococcus mutans, Parascardovia denticolens, Bifidobacterium longum, and several Lactobacillus and Actinomyces species. By principal, component analysis pulp microbiotas grouped together, whereas those in caries biofilms were widely dispersed.

Conclusions

We conclude that the microbiota of cariously exposed vital primary pulps is composed of a subset of species associated with carious lesions. Vital primary pulps had a dominant Firmicutes and Actinobacteria microbiota which contrasts with reports of endodontic infections which can harbor a gram-negative microbiota. The microbiota of exposed primary pulps may provide insight into bacterial species at the forefront of caries invasion in dentinal lesions that can invade into the pulp and the nature of species that need suppressing for successful pulp therapy.",2015-02-03 +25588070,Expert system for predicting reaction conditions: the Michael reaction case.,"A generic chemical transformation may often be achieved under various synthetic conditions. However, for any specific reagents, only one or a few among the reported synthetic protocols may be successful. For example, Michael β-addition reactions may proceed under different choices of solvent (e.g., hydrophobic, aprotic polar, protic) and catalyst (e.g., Brønsted acid, Lewis acid, Lewis base, etc.). Chemoinformatics methods could be efficiently used to establish a relationship between the reagent structures and the required reaction conditions, which would allow synthetic chemists to waste less time and resources in trying out various protocols in search for the appropriate one. In order to address this problem, a number of 2-classes classification models have been built on a set of 198 Michael reactions retrieved from literature. Trained models discriminate between processes that are compatible and respectively processes not feasible under a specific reaction condition option (feasible or not with a Lewis acid catalyst, feasible or not in hydrophobic solvent, etc.). Eight distinct models were built to decide the compatibility of a Michael addition process with each considered reaction condition option, while a ninth model was aimed to predict whether the assumed Michael addition is feasible at all. Different machine-learning methods (Support Vector Machine, Naive Bayes, and Random Forest) in combination with different types of descriptors (ISIDA fragments issued from Condensed Graphs of Reactions, MOLMAP, Electronic Effect Descriptors, and Chemistry Development Kit computed descriptors) have been used. Models have good predictive performance in 3-fold cross-validation done three times: balanced accuracy varies from 0.7 to 1. Developed models are available for the users at http://infochim.u-strasbg.fr/webserv/VSEngine.html . Eventually, these were challenged to predict feasibility conditions for ∼50 novel Michael reactions from the eNovalys database (originally from patent literature).",2015-02-03 +21224340,Chloroplast 2010: a database for large-scale phenotypic screening of Arabidopsis mutants.,"Large-scale phenotypic screening presents challenges and opportunities not encountered in typical forward or reverse genetics projects. We describe a modular database and laboratory information management system that was implemented in support of the Chloroplast 2010 Project, an Arabidopsis (Arabidopsis thaliana) reverse genetics phenotypic screen of more than 5,000 mutants (http://bioinfo.bch.msu.edu/2010_LIMS; www.plastid.msu.edu). The software and laboratory work environment were designed to minimize operator error and detect systematic process errors. The database uses Ruby on Rails and Flash technologies to present complex quantitative and qualitative data and pedigree information in a flexible user interface. Examples are presented where the database was used to find opportunities for process changes that improved data quality. We also describe the use of the data-analysis tools to discover mutants defective in enzymes of leucine catabolism (heteromeric mitochondrial 3-methylcrotonyl-coenzyme A carboxylase [At1g03090 and At4g34030] and putative hydroxymethylglutaryl-coenzyme A lyase [At2g26800]) based upon a syndrome of pleiotropic seed amino acid phenotypes that resembles previously described isovaleryl coenzyme A dehydrogenase (At3g45300) mutants. In vitro assay results support the computational annotation of At2g26800 as hydroxymethylglutaryl-coenzyme A lyase.",2011-01-11 +26779228,CoExpNetViz: Comparative Co-Expression Networks Construction and Visualization Tool.,"

Motivation

Comparative transcriptomics is a common approach in functional gene discovery efforts. It allows for finding conserved co-expression patterns between orthologous genes in closely related plant species, suggesting that these genes potentially share similar function and regulation. Several efficient co-expression-based tools have been commonly used in plant research but most of these pipelines are limited to data from model systems, which greatly limit their utility. Moreover, in addition, none of the existing pipelines allow plant researchers to make use of their own unpublished gene expression data for performing a comparative co-expression analysis and generate multi-species co-expression networks.

Results

We introduce CoExpNetViz, a computational tool that uses a set of query or ""bait"" genes as an input (chosen by the user) and a minimum of one pre-processed gene expression dataset. The CoExpNetViz algorithm proceeds in three main steps; (i) for every bait gene submitted, co-expression values are calculated using mutual information and Pearson correlation coefficients, (ii) non-bait (or target) genes are grouped based on cross-species orthology, and (iii) output files are generated and results can be visualized as network graphs in Cytoscape.

Availability

The CoExpNetViz tool is freely available both as a PHP web server (link: http://bioinformatics.psb.ugent.be/webtools/coexpr/) (implemented in C++) and as a Cytoscape plugin (implemented in Java). Both versions of the CoExpNetViz tool support LINUX and Windows platforms.",2015-01-01 +25897372,Optimizing the ultrasound visualization of the endometrial-myometrial junction (EMJ).,"

Objectives

The aim of this study was to find the best 3D reconstruction technique to visualize the endometrial-myometrial junction (EMJ).

Methods

Retrospective observational study on 240 stored 3D volumes of 80 patients. The first author reconstructed the 2D midcoronal image without volume contrast imaging (VCI), with VCI set at 4 mm and with VCI set at 2 mm. Three images per patient (240 images) were saved and integrated in the web-based electronic data capture software Clinical Data Miner (CDM) (http://cdm.esat.kuleuven.be). Five experienced gynaecologists analysed the images shown in random order. They scored the image quality (good, moderate, poor, insufficient) and described the EMJ of these images using IETA terminology (regular, irregular, interrupted, not defined). One of the examiners (CVP) also re-evaluated the same set of images after 12 days to assess intra-observer variability.

Results

The use of VCI significantly improved the recorded subjective image quality. The Fleiss' kappa coefficient for evaluating the inter-observer variability of the EMJ description using coronal view without VCI, with VCI at 4 mm and VCI at 2 mm were 0.36 ± 0.05, 0.34 ± 0.05 and 0.42 ± 0.05, respectively. The corresponding figures for the intra-observer variability were 0.58 ± 0.08, 0.36 ± 0.08 and 0.68 ± 0.07, respectively.

Discussion

In this study on 3D reconstructed coronal images of the uterine cavity, the 2 mm VCI slices gave the best quality images of the EMJ.",2015-01-01 +24736476,Automated protein turnover calculations from 15N partial metabolic labeling LC/MS shotgun proteomics data.,"Protein turnover is a well-controlled process in which polypeptides are constantly being degraded and subsequently replaced with newly synthesized copies. Extraction of composite spectral envelopes from complex LC/MS shotgun proteomics data can be a challenging task, due to the inherent complexity of biological samples. With partial metabolic labeling experiments this complexity increases as a result of the emergence of additional isotopic peaks. Automated spectral extraction and subsequent protein turnover calculations enable the analysis of gigabytes of data within minutes, a prerequisite for systems biology high throughput studies. Here we present a fully automated method for protein turnover calculations from shotgun proteomics data. The approach enables the analysis of complex shotgun LC/MS 15N partial metabolic labeling experiments. Spectral envelopes of 1419 peptides can be extracted within an hour. The method quantifies turnover by calculating the Relative Isotope Abundance (RIA), which is defined as the ratio between the intensity sum of all heavy (15N) to the intensity sum of all light (14N) and heavy peaks. To facilitate this process, we have developed a computer program based on our method, which is freely available to download at http://promex.pph.univie.ac.at/protover.",2014-04-15 +25422051,DISSECT: an assignment-free Bayesian discovery method for species delimitation under the multispecies coalescent.,"

Motivation

The multispecies coalescent model provides a formal framework for the assignment of individual organisms to species, where the species are modeled as the branches of the sp tree. None of the available approaches so far have simultaneously co-estimated all the relevant parameters in the model, without restricting the parameter space by requiring a guide tree and/or prior assignment of individuals to clusters or species.

Results

We present DISSECT, which explores the full space of possible clusterings of individuals and species tree topologies in a Bayesian framework. It uses an approximation to avoid the need for reversible-jump Markov Chain Monte Carlo, in the form of a prior that is a modification of the birth-death prior for the species tree. It incorporates a spike near zero in the density for node heights. The model has two extra parameters: one controls the degree of approximation and the second controls the prior distribution on the numbers of species. It is implemented as part of BEAST and requires only a few changes from a standard *BEAST analysis. The method is evaluated on simulated data and demonstrated on an empirical dataset. The method is shown to be insensitive to the degree of approximation, but quite sensitive to the second parameter, suggesting that large numbers of sequences are needed to draw firm conclusions.

Availability and implementation

http://tree.bio.ed.ac.uk/software/beast/, http://www.indriid.com/dissectinbeast.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-23 +25650278,EcoliNet: a database of cofunctional gene network for Escherichia coli. ,"During the past several decades, Escherichia coli has been a treasure chest for molecular biology. The molecular mechanisms of many fundamental cellular processes have been discovered through research on this bacterium. Although much basic research now focuses on more complex model organisms, E. coli still remains important in metabolic engineering and synthetic biology. Despite its long history as a subject of molecular investigation, more than one-third of the E. coli genome has no pathway annotation supported by either experimental evidence or manual curation. Recently, a network-assisted genetics approach to the efficient identification of novel gene functions has increased in popularity. To accelerate the speed of pathway annotation for the remaining uncharacterized part of the E. coli genome, we have constructed a database of cofunctional gene network with near-complete genome coverage of the organism, dubbed EcoliNet. We find that EcoliNet is highly predictive for diverse bacterial phenotypes, including antibiotic response, indicating that it will be useful in prioritizing novel candidate genes for a wide spectrum of bacterial phenotypes. We have implemented a web server where biologists can easily run network algorithms over EcoliNet to predict novel genes involved in a pathway or novel functions for a gene. All integrated cofunctional associations can be downloaded, enabling orthology-based reconstruction of gene networks for other bacterial species as well. Database URL: http://www.inetbio.org/ecolinet.",2015-02-02 +26786290,Genome-wide methylation profiling identifies novel methylated genes in neuroblastoma tumors.,"Neuroblastoma is a very heterogeneous tumor of childhood. The clinical spectra range from very aggressive metastatic disease to spontaneous regression, even without therapy. Aberrant DNA methylation pattern is a common feature of most cancers. For neuroblastoma, it has been demonstrated both for single genes as well as genome-wide, where a so-called methylator phenotype has been described. Here, we present a study using Illumina 450K methylation arrays on 60 neuroblastoma tumors. We show that aggressive tumors, characterized by International Neuroblastoma Risk Group (INRG) as stage M, are hypermethylated compared to low-grade tumors. On the contrary, INRG stage L tumors display more non-CpG methylation. The genes with the highest number of hypermethylated CpG sites in INRG M tumors are TERT, PCDHGA4, DLX5, and DLX6-AS1. Gene ontology analysis showed a representation of neuronal tumor relevant gene functions among the differentially methylated genes. For validation, we used a set of independent tumors previously analyzed with the Illumina 27K methylation arrays, which confirmed the differentially methylated sites. Top candidate genes with aberrant methylation were analyzed for altered gene expression through the R2 platform ( http://r2.amc.nl), and for correlations between methylation and gene expression in a public dataset. Altered expression in nonsurvivors was found for the genes B3GALT4 and KIAA1949, CLIC5, DLX6-AS, TERT, and PIRT, and strongest correlations were found for TRIM36, KIAA0513, and PIRT. Our data indicate that methylation profiling can be used for patient stratification and informs on epigenetically deregulated genes with the potential of increasing our knowledge about the underlying mechanisms of tumor development.",2016-01-19 +25805332,Regional bronchodilator response assessed by computed tomography in chronic obstructive pulmonary disease.,"

Background and objective

The reliability of CT assessment of regional bronchodilation is not universally accepted. In this study, using our proprietary 3D-CT software, we first examined airway inner luminal area (Ai) before and after inhalation of SFC in a group of COPD patients and then evaluated the same parameters for two sets of CT data obtained from clinically stable subjects with no intervention.

Methods

We conducted CT at deep inspiration and pulmonary function tests before and one week after inhalation of SFC in 23 COPD patients. As a non-intervention group, we used two sets of CT data obtained with one-year interval in another group of subjects who demonstrated stable pulmonary function (n=8). We measured Ai at the mid-portions of 3rd to 6th generation in 8 bronchi of the right lung, a total of 32 identical sites before and after intervention.

Results

The average bronchodilation at all sites (ΔAi%: 28.2 ± 4.1 (SE)%) (r=0.65, p<0.001) and that of each generation significantly correlated with % improvement of FEV1 (ΔFEV1%), which increased from 1.40 ± 0.10 L to 1.58 ± 0.10 L. When subjects were classified into two groups in terms of mean ΔFEV1%, even the poor responders (ΔFEV1% <14% above baseline, n=13) displayed significantly larger ΔAi% compared with the non-intervention group (19.1 ± 4.6% versus 2.1 ± 3.9%). Inter-observer variability for overall ΔAi% was within acceptable levels.

Conclusions

CT can reliably detect the regional bronchodilation in 3rd to 6th generation airways when ΔFEV1 is as small as 180 ml on average. This study was registered in the UMIN Clinical Trials Registry (UMIN-CTR) system (http://www.umin.ac.jp/. No. UMIN 000002668).",2015-03-09 +25692584,Minimalistic predictor of protein binding energy: contribution of solvation factor to protein binding.,"It has long been known that solvation plays an important role in protein-protein interactions. Here, we use a minimalistic solvation-based model for predicting protein binding energy to estimate quantitatively the contribution of the solvation factor in protein binding. The factor is described by a simple linear combination of buried surface areas according to amino-acid types. Even without structural optimization, our minimalistic model demonstrates a predictive power comparable to more complex methods, making the proposed approach the basis for high throughput applications. Application of the model to a proteomic database shows that receptor-substrate complexes involved in signaling have lower affinities than enzyme-inhibitor and antibody-antigen complexes, and they differ by chemical compositions on interfaces. Also, we found that protein complexes with components that come from the same genes generally have lower affinities than complexes formed by proteins from different genes, but in this case the difference originates from different interface areas. The model was implemented in the software PYTHON, and the source code can be found on the Shakhnovich group webpage: http://faculty.chemistry.harvard.edu/shakhnovich/software.",2015-02-01 +26673785,"Integrating, summarizing and visualizing GWAS-hits and human diversity with DANCE (Disease-ANCEstry networks).","

Motivation

The 1000 Genomes Project (1KGP) and thousands of Genome-Wide Association Studies (GWAS) performed during the last years have generated an enormous amount of information that needs to be integrated to better understand the genetic architecture of complex diseases in different populations. This integration is important in areas such as genetics, epidemiology, anthropology, as well as admixture mapping design and GWAS-replications. Network-based approaches that explore the genetic bases of human diseases and traits have not yet incorporated information on genetic diversity among human populations.

Results

We propose Disease-ANCEstry networks (DANCE), a graph-based web tool that allows to integrate and visualize information on human complex phenotypes and their GWAS-hits, as well as their risk allele frequencies in different populations. DANCE provides an interactive way to explore the human SNP-Disease Network and its projection, a Disease-Disease Network. With these functionalities, DANCE fills a gap in our ability to handle and understand the knowledge generated by GWAS and 1KGP. We provide a number of case studies that show how DANCE can be used to explore the relationships between human complex diseases, their genetic bases and variability in different human populations.

Availability and implementation

DANCE is freely available at http://ldgh.com.br/dance/ We recommend using DANCE with Mozilla Firefox, Safari, Chrome or Internet Explorer (v9 or v10).

Contact

gilderlanio@gmail.com or maira.r.rodrigues@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-15 +23794914,A specialist's audit of aggregated occurrence records: An 'aggregator's' perspective.,"A recent ZooKeys' paper (Mesibov, 2013: http://www.pensoft.net/journal_home_page.php?journal_id=1&page=article&SESID=df7bcb35b02603283dcb83ee0e0af0c9&type=show&article_id=5111) has highlighted data quality issues in aggregated data sets, but did not provide a realistic way to address these issues. This paper provides an aggregator's perspective including ways that the whole community can help to address data quality issues. The establishment of GBIF and national nodes (national aggregators) such as the Atlas of Living Australia (ALA) have integrated and exposed a huge diversity of biological observations along with many associated issues. Much of the admirable work by Mesibov (2013) was enabled by having the data exposed. Data quality, one of the highest priorities for GBIF, the national nodes and other aggregators, depends on both automatic methods and community experts to detect and correct data issues. Not all issues can however be automatically detected or corrected, so community assistance is needed to help improve the quality of exposed biological data. We do need to improve the infrastructure and associated processes to more easily identify data issues and document all changes to ensure a full record is permanently and publicly available.",2013-05-30 +23962281,Hierarchical Bayesian modelling of gene expression time series across irregularly sampled replicates and clusters.,"

Background

Time course data from microarrays and high-throughput sequencing experiments require simple, computationally efficient and powerful statistical models to extract meaningful biological signal, and for tasks such as data fusion and clustering. Existing methodologies fail to capture either the temporal or replicated nature of the experiments, and often impose constraints on the data collection process, such as regularly spaced samples, or similar sampling schema across replications.

Results

We propose hierarchical Gaussian processes as a general model of gene expression time-series, with application to a variety of problems. In particular, we illustrate the method's capacity for missing data imputation, data fusion and clustering.The method can impute data which is missing both systematically and at random: in a hold-out test on real data, performance is significantly better than commonly used imputation methods. The method's ability to model inter- and intra-cluster variance leads to more biologically meaningful clusters. The approach removes the necessity for evenly spaced samples, an advantage illustrated on a developmental Drosophila dataset with irregular replications.

Conclusion

The hierarchical Gaussian process model provides an excellent statistical basis for several gene-expression time-series tasks. It has only a few additional parameters over a regular GP, has negligible additional complexity, is easily implemented and can be integrated into several existing algorithms. Our experiments were implemented in python, and are available from the authors' website: http://staffwww.dcs.shef.ac.uk/people/J.Hensman/.",2013-08-20 +25058159,iRegulon: from a gene list to a gene regulatory network using large motif and track collections.,"Identifying master regulators of biological processes and mapping their downstream gene networks are key challenges in systems biology. We developed a computational method, called iRegulon, to reverse-engineer the transcriptional regulatory network underlying a co-expressed gene set using cis-regulatory sequence analysis. iRegulon implements a genome-wide ranking-and-recovery approach to detect enriched transcription factor motifs and their optimal sets of direct targets. We increase the accuracy of network inference by using very large motif collections of up to ten thousand position weight matrices collected from various species, and linking these to candidate human TFs via a motif2TF procedure. We validate iRegulon on gene sets derived from ENCODE ChIP-seq data with increasing levels of noise, and we compare iRegulon with existing motif discovery methods. Next, we use iRegulon on more challenging types of gene lists, including microRNA target sets, protein-protein interaction networks, and genetic perturbation data. In particular, we over-activate p53 in breast cancer cells, followed by RNA-seq and ChIP-seq, and could identify an extensive up-regulated network controlled directly by p53. Similarly we map a repressive network with no indication of direct p53 regulation but rather an indirect effect via E2F and NFY. Finally, we generalize our computational framework to include regulatory tracks such as ChIP-seq data and show how motif and track discovery can be combined to map functional regulatory interactions among co-expressed genes. iRegulon is available as a Cytoscape plugin from http://iregulon.aertslab.org.",2014-07-24 +27003205,Self-Selected and Maximal Walking Speeds Provide Greater Insight Into Fall Status Than Walking Speed Reserve Among Community-Dwelling Older Adults.,"

Objective

To determine the degree to which self-selected walking speed (SSWS), maximal walking speed (MWS), and walking speed reserve (WSR) are associated with fall status among community-dwelling older adults.

Design

WS and 1-year falls history data were collected on 217 community-dwelling older adults (median age = 82, range 65-93 years) at a local outpatient PT clinic and local retirement communities and senior centers. WSR was calculated as a difference (WSRdiff = MWS - SSWS) and ratio (WSRratio = MWS/SSWS).

Results

SSWS (P < 0.001), MWS (P < 0.001), and WSRdiff (P < 0.01) were associated with fall status. The cutpoints identified were 0.76 m/s for SSWS (65.4% sensitivity, 70.9% specificity), 1.13 m/s for MWS (76.6% sensitivity, 60.0% specificity), and 0.24 m/s for WSRdiff (56.1% sensitivity, 70.9% specificity). SSWS and MWS better discriminated between fallers and non-fallers (SSWS: AUC = 0.69, MWS: AUC = 0.71) than WSRdiff (AUC = 0.64).

Conclusions

SSWS and MWS seem to be equally informative measures for assessing fall status in community-dwelling older adults. Older adults with SSWSs less than 0.76 m/s and those with MWSs less than 1.13 m/s may benefit from further fall risk assessment. Combining SSWS and MWS to calculate an individual's WSR does not provide additional insight into fall status in this population.

To claim cme credits

Complete the self-assessment activity and evaluation online at http://www.physiatry.org/JournalCME CME OBJECTIVES:: Upon completion of this article, the reader should be able to: (1) Describe the different methods for calculating walking speed reserve and discuss the potential of the metric as an outcome measure; (2) Explain the degree to which self-selected walking speed, maximal walking speed, and walking speed reserve are associated with fall status among community-dwelling older adults; and (3) Discuss potential limitations to using walking speed reserve to identify fall status in populations without mobility restrictions.

Level

Advanced

Accreditation

: The Association of Academic Physiatrists is accredited by the Accreditation Council for Continuing Medical Education to provide continuing medical education for physicians. The Association of Academic Physiatrists designates this activity for a maximum of 1.5 AMA PRA Category 1 Credit(s). Physicians should only claim credit commensurate with the extent of their participation in the activity.",2016-07-01 +25846271,RBRIdent: An algorithm for improved identification of RNA-binding residues in proteins from primary sequences.,"Rapid and correct identification of RNA-binding residues based on the protein primary sequences is of great importance. In most prevalent machine-learning-based identification methods; however, either some features are inefficiently represented, or the redundancy between features is not effectively removed. Both problems may weaken the performance of a classifier system and raise its computational complexity. Here, we addressed the above problems and developed a better classifier (RBRIdent) to identify the RNA-binding residues. In an independent benchmark test, RBRIdent achieved an accuracy of 76.79%, Matthews correlation coefficient of 0.3819 and F-measure of 75.58%, remarkably outperforming all prevalent methods. These results suggest the necessity of proper feature description and the essential role of feature selection in this project. All source data and codes are freely available at http://166.111.152.91/RBRIdent.",2015-04-22 +25636267,MaxMod: a hidden Markov model based novel interface to MODELLER for improved prediction of protein 3D models.,"Modeling the three-dimensional (3D) structures of proteins assumes great significance because of its manifold applications in biomolecular research. Toward this goal, we present MaxMod, a graphical user interface (GUI) of the MODELLER program that combines profile hidden Markov model (profile HMM) method with Clustal Omega program to significantly improve the selection of homologous templates and target-template alignment for construction of accurate 3D protein models. MaxMod distinguishes itself from other existing GUIs of MODELLER software by implementing effortless modeling of proteins using templates that bear modified residues. Additionally, it provides various features such as loop optimization, express modeling (a feature where protein model can be generated directly from its sequence, without any further user intervention) and automatic update of PDB database, thus enhancing the user-friendly control of computational tasks. We find that HMM-based MaxMod performs better than other modeling packages in terms of execution time and model quality. MaxMod is freely available as a downloadable standalone tool for academic and non-commercial purpose at http://www.immt.res.in/maxmod/.",2015-01-31 +22674159,How to use the IUPHAR receptor database to navigate pharmacological data.,"Today's data-intensive, interdisciplinary research challenges scientists to keep up to date with key experimental techniques and tools reported in the literature. The International Union of Basic and Clinical Pharmacology Database (IUPHAR-DB) goes some way to addressing this need by providing expert-curated information sourced from primary literature and displayed in a user-friendly manner online. The database provides a channel for the IUPHAR Nomenclature Committee (NC-IUPHAR) to provide recommendations on the nomenclature of receptors and ion channels, to document their properties and the ligands that are useful for receptor characterization. Here we describe IUPHAR-DB's main features and provide examples of techniques for navigating and exploring the information. The database is freely available online at http://www.iuphar-db.org/.",2012-01-01 +26669930,CytoGEDEVO-global alignment of biological networks with Cytoscape.,"

Motivation

In the systems biology era, high-throughput omics technologies have enabled the unraveling of the interplay of some biological entities on a large scale (e.g. genes, proteins, metabolites or RNAs). Huge biological networks have emerged, where nodes correspond to these entities and edges between them model their relations. Protein-protein interaction networks, for instance, show the physical interactions of proteins in an organism. The comparison of such networks promises additional insights into protein and cell function as well as knowledge-transfer across species. Several computational approaches have been developed previously to solve the network alignment (NA) problem, but only a few concentrate on the usability of the implemented tools for the evaluation of protein-protein interactions by the end users (biologists and medical researchers).

Results

We have created CytoGEDEVO, a Cytoscape app for visual and user-assisted NA. It extends the previous GEDEVO methodology for global pairwise NAs with new graphical and functional features. Our main focus was on the usability, even by non-programmers and the interpretability of the NA results with Cytoscape.

Availability and implementation

CytoGEDEVO is publicly available from the Cytoscape app store at http://apps.cytoscape.org/apps/cytogedevo In addition, we provide stand-alone command line executables, source code, documentation and step-by-step user instructions at http://cytogedevo.compbio.sdu.dk

Contact

malek@tugraz.at

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-14 +22689642,BIPS: BIANA Interolog Prediction Server. A tool for protein-protein interaction inference.,"Protein-protein interactions (PPIs) play a crucial role in biology, and high-throughput experiments have greatly increased the coverage of known interactions. Still, identification of complete inter- and intraspecies interactomes is far from being complete. Experimental data can be complemented by the prediction of PPIs within an organism or between two organisms based on the known interactions of the orthologous genes of other organisms (interologs). Here, we present the BIANA (Biologic Interactions and Network Analysis) Interolog Prediction Server (BIPS), which offers a web-based interface to facilitate PPI predictions based on interolog information. BIPS benefits from the capabilities of the framework BIANA to integrate the several PPI-related databases. Additional metadata can be used to improve the reliability of the predicted interactions. Sensitivity and specificity of the server have been calculated using known PPIs from different interactomes using a leave-one-out approach. The specificity is between 72 and 98%, whereas sensitivity varies between 1 and 59%, depending on the sequence identity cut-off used to calculate similarities between sequences. BIPS is freely accessible at http://sbi.imim.es/BIPS.php.",2012-06-11 +21364759,NAViGaTing the micronome--using multiple microRNA prediction databases to identify signalling pathway-associated microRNAs.,"

Background

MicroRNAs are a class of small RNAs known to regulate gene expression at the transcript level, the protein level, or both. Since microRNA binding is sequence-based but possibly structure-specific, work in this area has resulted in multiple databases storing predicted microRNA:target relationships computed using diverse algorithms. We integrate prediction databases, compare predictions to in vitro data, and use cross-database predictions to model the microRNA:transcript interactome--referred to as the micronome--to study microRNA involvement in well-known signalling pathways as well as associations with disease. We make this data freely available with a flexible user interface as our microRNA Data Integration Portal--mirDIP (http://ophid.utoronto.ca/mirDIP).

Results

mirDIP integrates prediction databases to elucidate accurate microRNA:target relationships. Using NAViGaTOR to produce interaction networks implicating microRNAs in literature-based, KEGG-based and Reactome-based pathways, we find these signalling pathway networks have significantly more microRNA involvement compared to chance (p<0.05), suggesting microRNAs co-target many genes in a given pathway. Further examination of the micronome shows two distinct classes of microRNAs; universe microRNAs, which are involved in many signalling pathways; and intra-pathway microRNAs, which target multiple genes within one signalling pathway. We find universe microRNAs to have more targets (p<0.0001), to be more studied (p<0.0002), and to have higher degree in the KEGG cancer pathway (p<0.0001), compared to intra-pathway microRNAs.

Conclusions

Our pathway-based analysis of mirDIP data suggests microRNAs are involved in intra-pathway signalling. We identify two distinct classes of microRNAs, suggesting a hierarchical organization of microRNAs co-targeting genes both within and between pathways, and implying differential involvement of universe and intra-pathway microRNAs at the disease level.",2011-02-25 +24845652,CLImAT: accurate detection of copy number alteration and loss of heterozygosity in impure and aneuploid tumor samples using whole-genome sequencing data.,"

Motivation

Whole-genome sequencing of tumor samples has been demonstrated as an efficient approach for comprehensive analysis of genomic aberrations in cancer genome. Critical issues such as tumor impurity and aneuploidy, GC-content and mappability bias have been reported to complicate identification of copy number alteration and loss of heterozygosity in complex tumor samples. Therefore, efficient computational methods are required to address these issues.

Results

We introduce CLImAT (CNA and LOH Assessment in Impure and Aneuploid Tumors), a bioinformatics tool for identification of genomic aberrations from tumor samples using whole-genome sequencing data. Without requiring a matched normal sample, CLImAT takes integrated analysis of read depth and allelic frequency and provides extensive data processing procedures including GC-content and mappability correction of read depth and quantile normalization of B-allele frequency. CLImAT accurately identifies copy number alteration and loss of heterozygosity even for highly impure tumor samples with aneuploidy. We evaluate CLImAT on both simulated and real DNA sequencing data to demonstrate its ability to infer tumor impurity and ploidy and identify genomic aberrations in complex tumor samples.

Availability and implementation

The CLImAT software package can be freely downloaded at http://bioinformatics.ustc.edu.cn/CLImAT/.",2014-05-19 +22537153,Chemocentric informatics approach to drug discovery: identification and experimental validation of selective estrogen receptor modulators as ligands of 5-hydroxytryptamine-6 receptors and as potential cognition enhancers.,"We have devised a chemocentric informatics methodology for drug discovery integrating independent approaches to mining biomolecular databases. As a proof of concept, we have searched for novel putative cognition enhancers. First, we generated Quantitative Structure-Activity Relationship (QSAR) models of compounds binding to 5-hydroxytryptamine-6 receptor (5-HT(6)R), a known target for cognition enhancers, and employed these models for virtual screening to identify putative 5-HT(6)R actives. Second, we queried chemogenomics data from the Connectivity Map ( http://www.broad.mit.edu/cmap/ ) with the gene expression profile signatures of Alzheimer's disease patients to identify compounds putatively linked to the disease. Thirteen common hits were tested in 5-HT(6)R radioligand binding assays and ten were confirmed as actives. Four of them were known selective estrogen receptor modulators that were never reported as 5-HT(6)R ligands. Furthermore, nine of the confirmed actives were reported elsewhere to have memory-enhancing effects. The approaches discussed herein can be used broadly to identify novel drug-target-disease associations.",2012-06-11 +27504716,Genes Interacting with Occupational Exposures to Low Molecular Weight Agents and Irritants on Adult-Onset Asthma in Three European Studies.,"

Background

The biological mechanisms by which cleaning products and disinfectants-an emerging risk factor-affect respiratory health remain incompletely evaluated. Studying genes by environment interactions (G × E) may help identify new genes related to adult-onset asthma.

Objectives

We identified interactions between genetic polymorphisms of a large set of genes involved in the response to oxidative stress and occupational exposures to low molecular weight (LMW) agents or irritants on adult-onset asthma.

Methods

Our data came from three large European cohorts: Epidemiological Family-based Study of the Genetics and Environment of Asthma (EGEA), Swiss Cohort Study on Air Pollution and Lung and Heart Disease in Adults (SAPALDIA), and European Community Respiratory Health Survey in Adults (ECRHS). A candidate pathway-based strategy identified 163 genes involved in the response to oxidative stress and potentially related to exposures to LMW agents/irritants. Occupational exposures were evaluated using an asthma job-exposure matrix and job-specific questionnaires for cleaners and healthcare workers. Logistic regression models were used to detect G × E interactions, adjusted for age, sex, and population ancestry, in 2,599 adults (mean age, 47 years; 60% women, 36% exposed, 18% asthmatics). p-Values were corrected for multiple comparisons.

Results

Ever exposure to LMW agents/irritants was associated with current adult-onset asthma [OR = 1.28 (95% CI: 1.04, 1.58)]. Eight single nucleotide polymorphism (SNP) by exposure interactions at five loci were found at p < 0.005: PLA2G4A (rs932476, chromosome 1), near PLA2R1 (rs2667026, chromosome 2), near RELA (rs931127, rs7949980, chromosome 11), PRKD1 (rs1958980, rs11847351, rs1958987, chromosome 14), and PRKCA (rs6504453, chromosome 17). Results were consistent across the three studies and after accounting for smoking.

Conclusions

Using a pathway-based selection process, we identified novel genes potentially involved in adult asthma by interaction with occupational exposure. These genes play a role in the NF-κB pathway, which is involved in inflammation. Citation: Rava M, Ahmed I, Kogevinas M, Le Moual N, Bouzigon E, Curjuric I, Dizier MH, Dumas O, Gonzalez JR, Imboden M, Mehta AJ, Tubert-Bitter P, Zock JP, Jarvis D, Probst-Hensch NM, Demenais F, Nadif R. 2017. Genes interacting with occupational exposures to low molecular weight agents and irritants on adult-onset asthma in three European studies. Environ Health Perspect 125:207-214; http://dx.doi.org/10.1289/EHP376.",2016-08-09 +26079347,JSBML 1.0: providing a smorgasbord of options to encode systems biology models.,"

Unlabelled

JSBML, the official pure Java programming library for the Systems Biology Markup Language (SBML) format, has evolved with the advent of different modeling formalisms in systems biology and their ability to be exchanged and represented via extensions of SBML. JSBML has matured into a major, active open-source project with contributions from a growing, international team of developers who not only maintain compatibility with SBML, but also drive steady improvements to the Java interface and promote ease-of-use with end users.

Availability and implementation

Source code, binaries and documentation for JSBML can be freely obtained under the terms of the LGPL 2.1 from the website http://sbml.org/Software/JSBML. More information about JSBML can be found in the user guide at http://sbml.org/Software/JSBML/docs/.

Contact

jsbml-development@googlegroups.com or andraeger@eng.ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-16 +25638810,Similarity-based prediction for Anatomical Therapeutic Chemical classification of drugs by integrating multiple data sources.,"

Motivation

Anatomical Therapeutic Chemical (ATC) classification system, widely applied in almost all drug utilization studies, is currently the most widely recognized classification system for drugs. Currently, new drug entries are added into the system only on users' requests, which leads to seriously incomplete drug coverage of the system, and bioinformatics prediction is helpful during this process.

Results

Here we propose a novel prediction model of drug-ATC code associations, using logistic regression to integrate multiple heterogeneous data sources including chemical structures, target proteins, gene expression, side-effects and chemical-chemical associations. The model obtains good performance for the prediction not only on ATC codes of unclassified drugs but also on new ATC codes of classified drugs assessed by cross-validation and independent test sets, and its efficacy exceeds previous methods. Further to facilitate the use, the model is developed into a user-friendly web service SPACE ( S: imilarity-based P: redictor of A: TC C: od E: ), which for each submitted compound, will give candidate ATC codes (ranked according to the decreasing probability_score predicted by the model) together with corresponding supporting evidence. This work not only contributes to knowing drugs' therapeutic, pharmacological and chemical properties, but also provides clues for drug repositioning and side-effect discovery. In addition, the construction of the prediction model also provides a general framework for similarity-based data integration which is suitable for other drug-related studies such as target, side-effect prediction etc.

Availability and implementation

The web service SPACE is available at http://www.bprc.ac.cn/space.",2015-01-31 +24651380,Integrating multiple genomic data to predict disease-causing nonsynonymous single nucleotide variants in exome sequencing studies.,"Exome sequencing has been widely used in detecting pathogenic nonsynonymous single nucleotide variants (SNVs) for human inherited diseases. However, traditional statistical genetics methods are ineffective in analyzing exome sequencing data, due to such facts as the large number of sequenced variants, the presence of non-negligible fraction of pathogenic rare variants or de novo mutations, and the limited size of affected and normal populations. Indeed, prevalent applications of exome sequencing have been appealing for an effective computational method for identifying causative nonsynonymous SNVs from a large number of sequenced variants. Here, we propose a bioinformatics approach called SPRING (Snv PRioritization via the INtegration of Genomic data) for identifying pathogenic nonsynonymous SNVs for a given query disease. Based on six functional effect scores calculated by existing methods (SIFT, PolyPhen2, LRT, MutationTaster, GERP and PhyloP) and five association scores derived from a variety of genomic data sources (gene ontology, protein-protein interactions, protein sequences, protein domain annotations and gene pathway annotations), SPRING calculates the statistical significance that an SNV is causative for a query disease and hence provides a means of prioritizing candidate SNVs. With a series of comprehensive validation experiments, we demonstrate that SPRING is valid for diseases whose genetic bases are either partly known or completely unknown and effective for diseases with a variety of inheritance styles. In applications of our method to real exome sequencing data sets, we show the capability of SPRING in detecting causative de novo mutations for autism, epileptic encephalopathies and intellectual disability. We further provide an online service, the standalone software and genome-wide predictions of causative SNVs for 5,080 diseases at http://bioinfo.au.tsinghua.edu.cn/spring.",2014-03-20 +25061067,Predicting protein phosphorylation from gene expression: top methods from the IMPROVER Species Translation Challenge.,"

Motivation

Using gene expression to infer changes in protein phosphorylation levels induced in cells by various stimuli is an outstanding problem. The intra-species protein phosphorylation challenge organized by the IMPROVER consortium provided the framework to identify the best approaches to address this issue.

Results

Rat lung epithelial cells were treated with 52 stimuli, and gene expression and phosphorylation levels were measured. Competing teams used gene expression data from 26 stimuli to develop protein phosphorylation prediction models and were ranked based on prediction performance for the remaining 26 stimuli. Three teams were tied in first place in this challenge achieving a balanced accuracy of about 70%, indicating that gene expression is only moderately predictive of protein phosphorylation. In spite of the similar performance, the approaches used by these three teams, described in detail in this article, were different, with the average number of predictor genes per phosphoprotein used by the teams ranging from 3 to 124. However, a significant overlap of gene signatures between teams was observed for the majority of the proteins considered, while Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways were enriched in the union of the predictor genes of the three teams for multiple proteins.

Availability and implementation

Gene expression and protein phosphorylation data are available from ArrayExpress (E-MTAB-2091). Software implementation of the approach of Teams 49 and 75 are available at http://bioinformaticsprb.med.wayne.edu and http://people.cs.clemson.edu/∼luofeng/sbv.rar, respectively.

Contact

gyanbhanot@gmail.com or luofeng@clemson.edu or atarca@med.wayne.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-23 +21431616,From ontology selection and semantic web to an integrated information system for food-borne diseases and food safety.,"Several factors have hindered effective use of information and resources related to food safety due to inconsistency among semantically heterogeneous data resources, lack of knowledge on profiling of food-borne pathogens, and knowledge gaps among research communities, government risk assessors/managers, and end-users of the information. This paper discusses technical aspects in the establishment of a comprehensive food safety information system consisting of the following steps: (a) computational collection and compiling publicly available information, including published pathogen genomic, proteomic, and metabolomic data; (b) development of ontology libraries on food-borne pathogens and design automatic algorithms with formal inference and fuzzy and probabilistic reasoning to address the consistency and accuracy of distributed information resources (e.g., PulseNet, FoodNet, OutbreakNet, PubMed, NCBI, EMBL, and other online genetic databases and information); (c) integration of collected pathogen profiling data, Foodrisk.org ( http://www.foodrisk.org ), PMP, Combase, and other relevant information into a user-friendly, searchable, ""homogeneous"" information system available to scientists in academia, the food industry, and government agencies; and (d) development of a computational model in semantic web for greater adaptability and robustness.",2011-01-01 +25359893,eQTL epistasis: detecting epistatic effects and inferring hierarchical relationships of genes in biological pathways.,"

Motivation

Epistasis is the interactions among multiple genetic variants. It has emerged to explain the 'missing heritability' that a marginal genetic effect does not account for by genome-wide association studies, and also to understand the hierarchical relationships between genes in the genetic pathways. The Fisher's geometric model is common in detecting the epistatic effects. However, despite the substantial successes of many studies with the model, it often fails to discover the functional dependence between genes in an epistasis study, which is an important role in inferring hierarchical relationships of genes in the biological pathway.

Results

We justify the imperfectness of Fisher's model in the simulation study and its application to the biological data. Then, we propose a novel generic epistasis model that provides a flexible solution for various biological putative epistatic models in practice. The proposed method enables one to efficiently characterize the functional dependence between genes. Moreover, we suggest a statistical strategy for determining a recessive or dominant link among epistatic expression quantitative trait locus to enable the ability to infer the hierarchical relationships. The proposed method is assessed by simulation experiments of various settings and is applied to human brain data regarding schizophrenia.

Availability and implementation

The MATLAB source codes are publicly available at: http://biomecis.uta.edu/epistasis.",2014-10-30 +24974934,"ChIPseek, a web-based analysis tool for ChIP data.","

Background

Chromatin is a dynamic but highly regulated structure. DNA-binding proteins such as transcription factors, epigenetic and chromatin modifiers are responsible for regulating specific gene expression pattern and may result in different phenotypes. To reveal the identity of the proteins associated with the specific region on DNA, chromatin immunoprecipitation (ChIP) is the most widely used technique. ChIP assay followed by next generation sequencing (ChIP-seq) or microarray (ChIP-chip) is often used to study patterns of protein-binding profiles in different cell types and in cancer samples on a genome-wide scale. However, only a limited number of bioinformatics tools are available for ChIP datasets analysis.

Results

We present ChIPseek, a web-based tool for ChIP data analysis providing summary statistics in graphs and offering several commonly demanded analyses. ChIPseek can provide statistical summary of the dataset including histogram of peak length distribution, histogram of distances to the nearest transcription start site (TSS), and pie chart (or bar chart) of genomic locations for users to have a comprehensive view on the dataset for further analysis. For examining the potential functions of peaks, ChIPseek provides peak annotation, visualization of peak genomic location, motif identification, sequence extraction, and comparison between datasets. Beyond that, ChIPseek also offers users the flexibility to filter peaks and re-analyze the filtered subset of peaks. ChIPseek supports 20 different genome assemblies for 12 model organisms including human, mouse, rat, worm, fly, frog, zebrafish, chicken, yeast, fission yeast, Arabidopsis, and rice. We use demo datasets to demonstrate the usage and intuitive user interface of ChIPseek.

Conclusions

ChIPseek provides a user-friendly interface for biologists to analyze large-scale ChIP data without requiring any programing skills. All the results and figures produced by ChIPseek can be downloaded for further analysis. The analysis tools built into ChIPseek, especially the ones for selecting and examine a subset of peaks from ChIP data, provides invaluable helps for exploring the high through-put data from either ChIP-seq or ChIP-chip. ChIPseek is freely available at http://chipseek.cgu.edu.tw.",2014-06-30 +23193223,Defining and predicting structurally conserved regions in protein superfamilies.,"

Motivation

The structures of homologous proteins are generally better conserved than their sequences. This phenomenon is demonstrated by the prevalence of structurally conserved regions (SCRs) even in highly divergent protein families. Defining SCRs requires the comparison of two or more homologous structures and is affected by their availability and divergence, and our ability to deduce structurally equivalent positions among them. In the absence of multiple homologous structures, it is necessary to predict SCRs of a protein using information from only a set of homologous sequences and (if available) a single structure. Accurate SCR predictions can benefit homology modelling and sequence alignment.

Results

Using pairwise DaliLite alignments among a set of homologous structures, we devised a simple measure of structural conservation, termed structural conservation index (SCI). SCI was used to distinguish SCRs from non-SCRs. A database of SCRs was compiled from 386 SCOP superfamilies containing 6489 protein domains. Artificial neural networks were then trained to predict SCRs with various features deduced from a single structure and homologous sequences. Assessment of the predictions via a 5-fold cross-validation method revealed that predictions based on features derived from a single structure perform similarly to ones based on homologous sequences, while combining sequence and structural features was optimal in terms of accuracy (0.755) and Matthews correlation coefficient (0.476). These results suggest that even without information from multiple structures, it is still possible to effectively predict SCRs for a protein. Finally, inspection of the structures with the worst predictions pinpoints difficulties in SCR definitions.

Availability

The SCR database and the prediction server can be found at http://prodata.swmed.edu/SCR.

Contact

91huangi@gmail.com or grishin@chop.swmed.edu

Supplementary information

Supplementary data are available at Bioinformatics Online.",2012-11-28 +24530376,Condenser: a statistical aggregation tool for multi-sample quantitative proteomic data from Matrix Science Mascot Distiller™.,"We describe Condenser, a freely available, comprehensive open-source tool for merging multidimensional quantitative proteomics data from the Matrix Science Mascot Distiller Quantitation Toolbox into a common format ready for subsequent bioinformatic analysis. A number of different relative quantitation technologies, such as metabolic (15)N and amino acid stable isotope incorporation, label-free and chemical-label quantitation are supported. The program features multiple options for curative filtering of the quantified peptides, allowing the user to choose data quality thresholds appropriate for the current dataset, and ensure the quality of the calculated relative protein abundances. Condenser also features optional global normalization, peptide outlier removal, multiple testing and calculation of t-test statistics for highlighting and evaluating proteins with significantly altered relative protein abundances. Condenser provides an attractive addition to the gold-standard quantitative workflow of Mascot Distiller, allowing easy handling of larger multi-dimensional experiments. Source code, binaries, test data set and documentation are available at http://condenser.googlecode.com/.",2014-02-13 +22174796,Calculating orthologs in bacteria and Archaea: a divide and conquer approach.,"Among proteins, orthologs are defined as those that are derived by vertical descent from a single progenitor in the last common ancestor of their host organisms. Our goal is to compute a complete set of protein orthologs derived from all currently available complete bacterial and archaeal genomes. Traditional approaches typically rely on all-against-all BLAST searching which is prohibitively expensive in terms of hardware requirements or computational time (requiring an estimated 18 months or more on a typical server). Here, we present xBASE-Orth, a system for ongoing ortholog annotation, which applies a ""divide and conquer"" approach and adopts a pragmatic scheme that trades accuracy for speed. Starting at species level, xBASE-Orth carefully constructs and uses pan-genomes as proxies for the full collections of coding sequences at each level as it progressively climbs the taxonomic tree using the previously computed data. This leads to a significant decrease in the number of alignments that need to be performed, which translates into faster computation, making ortholog computation possible on a global scale. Using xBASE-Orth, we analyzed an NCBI collection of 1,288 bacterial and 94 archaeal complete genomes with more than 4 million coding sequences in 5 weeks and predicted more than 700 million ortholog pairs, clustered in 175,531 orthologous groups. We have also identified sets of highly conserved bacterial and archaeal orthologs and in so doing have highlighted anomalies in genome annotation and in the proposed composition of the minimal bacterial genome. In summary, our approach allows for scalable and efficient computation of the bacterial and archaeal ortholog annotations. In addition, due to its hierarchical nature, it is suitable for incorporating novel complete genomes and alternative genome annotations. The computed ortholog data and a continuously evolving set of applications based on it are integrated in the xBASE database, available at http://www.xbase.ac.uk/.",2011-12-12 +21063943,"PRIDE and ""Database on Demand"" as valuable tools for computational proteomics.","The Proteomics Identifications Database (PRIDE, http://www.ebi.ac.uk/pride ) provides users with the ability to explore and compare mass spectrometry-based proteomics experiments that reveal details of the protein expression found in a broad range of taxonomic groups, tissues, and disease states. A PRIDE experiment typically includes identifications of proteins, peptides, and protein modifications. Additionally, many of the submitted experiments also include the mass spectra that provide the evidence for these identifications. Finally, one of the strongest advantages of PRIDE in comparison with other proteomics repositories is the amount of metadata it contains, a key point to put the above-mentioned data in biological and/or technical context. Several informatics tools have been developed in support of the PRIDE database. The most recent one is called ""Database on Demand"" (DoD), which allows custom sequence databases to be built in order to optimize the results from search engines. We describe the use of DoD in this chapter. Additionally, in order to show the potential of PRIDE as a source for data mining, we also explore complex queries using federated BioMart queries to integrate PRIDE data with other resources, such as Ensembl, Reactome, or UniProt.",2011-01-01 +26318087,Enhancing protein function prediction with taxonomic constraints--The Argot2.5 web server.,"Argot2.5 (Annotation Retrieval of Gene Ontology Terms) is a web server designed to predict protein function. It is an updated version of the previous Argot2 enriched with new features in order to enhance its usability and its overall performance. The algorithmic strategy exploits the grouping of Gene Ontology terms by means of semantic similarity to infer protein function. The tool has been challenged over two independent benchmarks and compared to Argot2, PANNZER, and a baseline method relying on BLAST, proving to obtain a better performance thanks to the contribution of some key interventions in critical steps of the working pipeline. The most effective changes regard: (a) the selection of the input data from sequence similarity searches performed against a clustered version of UniProt databank and a remodeling of the weights given to Pfam hits, (b) the application of taxonomic constraints to filter out annotations that cannot be applied to proteins belonging to the species under investigation. The taxonomic rules are derived from our in-house developed tool, FunTaxIS, that extends those provided by the Gene Ontology consortium. The web server is free for academic users and is available online at http://www.medcomp.medicina.unipd.it/Argot2-5/.",2015-08-28 +24351709,CrossMap: a versatile tool for coordinate conversion between genome assemblies.,"

Motivation

Reference genome assemblies are subject to change and refinement from time to time. Generally, researchers need to convert the results that have been analyzed according to old assemblies to newer versions, or vice versa, to facilitate meta-analysis, direct comparison, data integration and visualization. Several useful conversion tools can convert genome interval files in browser extensible data or general feature format, but none have the functionality to convert files in sequence alignment map or BigWig format. This is a significant gap in computational genomics tools, as these formats are the ones most widely used for representing high-throughput sequencing data, such as RNA-seq, chromatin immunoprecipitation sequencing, DNA-seq, etc.

Results

Here we developed CrossMap, a versatile and efficient tool for converting genome coordinates between assemblies. CrossMap supports most of the commonly used file formats, including BAM, sequence alignment map, Wiggle, BigWig, browser extensible data, general feature format, gene transfer format and variant call format.

Availability and implementation

CrossMap is written in Python and C. Source code and a comprehensive user's manual are freely available at: http://crossmap.sourceforge.net/.",2013-12-18 +25237817,A strategy for genome-wide identification of gene based polymorphisms in rice reveals non-synonymous variation and functional genotypic markers.,"The genetic diversity of plants has traditionally been employed to improve crop plants to suit human needs, and in the future feed the increasing population and protect crops from environmental stresses and climate change. Genome-wide sequencing is a reality and can be used to make association to crop traits to be utilized by high-throughput marker based selection methods. This study describes a strategy of using next generation sequencing (NGS) data from the rice genome to make comparisons to the high-quality reference genome, identify functional polymorphisms within genes that might result in function changes and be used to study correlations to traits and employed in genetic mapping. We analyzed the NGS data of Oryza sativa ssp indica cv. G4 covering 241 Mb with ∼20X coverage and compared to the reference genome of Oryza sativa ssp. japonica to describe the genome-wide distribution of gene-based single nucleotide polymorphisms (SNPs). The analysis shows that the 63% covered genome consists of 1.6 million SNPs with 6.9 SNPs/Kb, and including 80,146 insertions and 92,655 deletions (INDELs) genome-wide. There are a total of 1,139,801 intergenic SNPs, 295,136 SNPs in intronic/non-coding regions, 195,098 in coding regions, 23,242 SNPs at the five-prime (5') UTR regions and 22,686 SNPs at the three-prime (3') UTR region. SNP variation was found in 40,761 gene loci, which include 75,262 synonymous and 119,836 non-synonymous changes, and functional reading frame changes through 3,886 inducing STOP-codon (isSNP) and 729 preventing STOP-codon (psSNP) variation. There are quickly evolving 194 high SNP hotspot genes (>100 SNPs/gene), and 1,513 out of 2,458 transcription factors displaying 2,294 non-synonymous SNPs that can be a major source of phenotypic diversity within the species. All data is searchable at https://plantstress-pereira.uark.edu/oryza2. We envision that this strategy will be useful for the identification of genes for crop traits and molecular breeding of rice cultivars.",2014-09-19 +26671805,Detecting Protein Complexes from Signed Protein-Protein Interaction Networks.,"Identification of protein complexes is fundamental for understanding the cellular functional organization. With the accumulation of physical protein-protein interaction (PPI) data, computational detection of protein complexes from available PPI networks has drawn a lot of attentions. While most of the existing protein complex detection algorithms focus on analyzing the physical protein-protein interaction network, none of them take into account the ""signs""  (i.e., activation-inhibition relationships) of physical interactions. As the ""signs""  of interactions reflect the way proteins communicate, considering the ""signs""  of interactions can not only increase the accuracy of protein complex identification, but also deepen our understanding of the mechanisms of cell functions. In this study, we proposed a novel Signed Graph regularized Nonnegative Matrix Factorization (SGNMF) model to identify protein complexes from signed PPI networks. In our experiments, we compared the results collected by our model on signed PPI networks with those predicted by the state-of-the-art complex detection techniques on the original unsigned PPI networks. We observed that considering the ""signs""  of interactions significantly benefits the detection of protein complexes. Furthermore, based on the predicted complexes, we predicted a set of signed complex-complex interactions for each dataset, which provides a novel insight of the higher level organization of the cell. All the experimental results and codes can be downloaded from http://mail.sysu.edu.cn/home/stsddq@mail.sysu.edu.cn/dai/others/SGNMF.zip.",2015-11-01 +25406469,A framework for inferring fitness landscapes of patient-derived viruses using quasispecies theory.,"Fitness is a central quantity in evolutionary models of viruses. However, it remains difficult to determine viral fitness experimentally, and existing in vitro assays can be poor predictors of in vivo fitness of viral populations within their hosts. Next-generation sequencing can nowadays provide snapshots of evolving virus populations, and these data offer new opportunities for inferring viral fitness. Using the equilibrium distribution of the quasispecies model, an established model of intrahost viral evolution, we linked fitness parameters to the composition of the virus population, which can be estimated by next-generation sequencing. For inference, we developed a Bayesian Markov chain Monte Carlo method to sample from the posterior distribution of fitness values. The sampler can overcome situations where no maximum-likelihood estimator exists, and it can adaptively learn the posterior distribution of highly correlated fitness landscapes without prior knowledge of their shape. We tested our approach on simulated data and applied it to clinical human immunodeficiency virus 1 samples to estimate their fitness landscapes in vivo. The posterior fitness distributions allowed for differentiating viral haplotypes from each other, for determining neutral haplotype networks, in which no haplotype is more or less credibly fit than any other, and for detecting epistasis in fitness landscapes. Our implemented approach, called QuasiFit, is available at http://www.cbg.ethz.ch/software/quasifit.",2014-11-17 +25539727,A neutrality test for detecting selection on DNA methylation using single methylation polymorphism frequency spectrum.,"Inheritable epigenetic mutations (epimutations) can contribute to transmittable phenotypic variation. Thus, epimutations can be subject to natural selection and impact the fitness and evolution of organisms. Based on the framework of the modified Tajima's D test for DNA mutations, we developed a neutrality test with the statistic ""D(m)"" to detect selection forces on DNA methylation mutations using single methylation polymorphisms. With computer simulation and empirical data analysis, we compared the D(m) test with the original and modified Tajima's D tests and demonstrated that the D(m) test is suitable for detecting selection on epimutations and outperforms original/modified Tajima's D tests. Due to the higher resetting rate of epimutations, the interpretation of D(m) on epimutations and Tajima's D test on DNA mutations could be different in inferring natural selection. Analyses using simulated and empirical genome-wide polymorphism data suggested that genes under genetic and epigenetic selections behaved differently. We applied the D(m) test to recently originated Arabidopsis and human genes, and showed that newly evolved genes contain higher level of rare epialleles, suggesting that epimutation may play a role in origination and evolution of genes and genomes. Overall, we demonstrate the utility of the D(m) test to detect whether the loci are under selection regarding DNA methylation. Our analytical metrics and methodology could contribute to our understanding of evolutionary processes of genes and genomes in the field of epigenetics. The Perl script for the ""D(m)"" test is available at http://fanlab.wayne.edu/ (last accessed December 18, 2014).",2014-12-23 +25620112,Developing genome-wide microsatellite markers of bamboo and their applications on molecular marker assisted taxonomy for accessions in the genus Phyllostachys.,"Morphology-based taxonomy via exiguously reproductive organ has severely limitation on bamboo taxonomy, mainly owing to infrequent and unpredictable flowering events of bamboo. Here, we present the first genome-wide analysis and application of microsatellites based on the genome of moso bamboo (Phyllostachys edulis) to assist bamboo taxonomy. Of identified 127,593 microsatellite repeat-motifs, the primers of 1,451 microsatellites were designed and 1,098 markers were physically mapped on the genome of moso bamboo. A total of 917 markers were successfully validated in 9 accessions with ~39.8% polymorphic potential. Retrieved from validated microsatellite markers, 23 markers were selected for polymorphic analysis among 78 accessions and 64 alleles were detected with an average of 2.78 alleles per primers. The cluster result indicated the majority of the accessions were consistent with their current taxonomic classification, confirming the suitability and effectiveness of the developed microsatellite markers. The variations of microsatellite marker in different species were confirmed by sequencing and in silico comparative genome mapping were investigated. Lastly, a bamboo microsatellites database (http://www.bamboogdb.org/ssr) was implemented to browse and search large information of bamboo microsatellites. Consequently, our results of microsatellite marker development are valuable for assisting bamboo taxonomy and investigating genomic studies in bamboo and related grass species.",2015-01-26 +25708089,PNImodeler: web server for inferring protein-binding nucleotides from sequence data.,"

Background

Interactions between DNA and proteins are essential to many biological processes such as transcriptional regulation and DNA replication. With the increased availability of structures of protein-DNA complexes, several computational studies have been conducted to predict DNA binding sites in proteins. However, little attempt has been made to predict protein binding sites in DNA.

Results

From an extensive analysis of protein-DNA complexes, we identified powerful features of DNA and protein sequences which can be used in predicting protein binding sites in DNA sequences. We developed two support vector machine (SVM) models that predict protein binding nucleotides from DNA and/or protein sequences. One SVM model that used DNA sequence data alone achieved a sensitivity of 73.4%, a specificity of 64.8%, an accuracy of 68.9% and a correlation coefficient of 0.382 with a test dataset that was not used in training. Another SVM model that used both DNA and protein sequences achieved a sensitivity of 67.6%, a specificity of 74.3%, an accuracy of 71.4% and a correlation coefficient of 0.418.

Conclusions

Predicting binding sites in double-stranded DNAs is a more difficult task than predicting binding sites in single-stranded molecules. Our study showed that protein binding sites in double-stranded DNA molecules can be predicted with a comparable accuracy as those in single-stranded molecules. Our study also demonstrated that using both DNA and protein sequences resulted in a better prediction performance than using DNA sequence data alone. The SVM models and datasets constructed in this study are available at http://bclab.inha.ac.kr/pnimodeler.",2015-01-29 +24013927,Multi-profile Bayesian alignment model for LC-MS data analysis with integration of internal standards.,"

Motivation

Liquid chromatography-mass spectrometry (LC-MS) has been widely used for profiling expression levels of biomolecules in various '-omic' studies including proteomics, metabolomics and glycomics. Appropriate LC-MS data preprocessing steps are needed to detect true differences between biological groups. Retention time (RT) alignment, which is required to ensure that ion intensity measurements among multiple LC-MS runs are comparable, is one of the most important yet challenging preprocessing steps. Current alignment approaches estimate RT variability using either single chromatograms or detected peaks, but do not simultaneously take into account the complementary information embedded in the entire LC-MS data.

Results

We propose a Bayesian alignment model for LC-MS data analysis. The alignment model provides estimates of the RT variability along with uncertainty measures. The model enables integration of multiple sources of information including internal standards and clustered chromatograms in a mathematically rigorous framework. We apply the model to LC-MS metabolomic, proteomic and glycomic data. The performance of the model is evaluated based on ground-truth data, by measuring correlation of variation, RT difference across runs and peak-matching performance. We demonstrate that Bayesian alignment model improves significantly the RT alignment performance through appropriate integration of relevant information.

Availability and implementation

MATLAB code, raw and preprocessed LC-MS data are available at http://omics.georgetown.edu/alignLCMS.html.

Contact

hwr@georgetown.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-09-06 +26671811,Probabilistic Inference on Multiple Normalized Signal Profiles from Next Generation Sequencing: Transcription Factor Binding Sites.,"With the prevalence of chromatin immunoprecipitation (ChIP) with sequencing (ChIP-Seq) technology, massive ChIP-Seq data has been accumulated. The ChIP-Seq technology measures the genome-wide occupancy of DNA-binding proteins in vivo. It is well-known that different DNA-binding protein occupancies may result in a gene being regulated in different conditions (e.g. different cell types). To fully understand a gene's function, it is essential to develop probabilistic models on multiple ChIP-Seq profiles for deciphering the gene transcription causalities. In this work, we propose and describe two probabilistic models. Assuming the conditional independence of different DNA-binding proteins' occupancies, the first method (SignalRanker) is developed as an intuitive method for ChIP-Seq genome-wide signal profile inference. Unfortunately, such an assumption may not always hold in some gene regulation cases. Thus, we propose and describe another method (FullSignalRanker) which does not make the conditional independence assumption. The proposed methods are compared with other existing methods on ENCODE ChIP-Seq datasets, demonstrating its regression and classification ability. The results suggest that FullSignalRanker is the best-performing method for recovering the signal ranks on the promoter and enhancer regions. In addition, FullSignalRanker is also the best-performing method for peak sequence classification. We envision that SignalRanker and FullSignalRanker will become important in the era of next generation sequencing. FullSignalRanker program is available on the following website: http://www.cs.toronto.edu/~wkc/FullSignalRanker/.",2015-11-01 +25882187,Prediction of Protein-Protein Interactions with Physicochemical Descriptors and Wavelet Transform via Random Forests.,"Protein-protein interactions (PPIs) provide valuable insight into the inner workings of cells, and it is significant to study the network of PPIs. It is vitally important to develop an automated method as a high-throughput tool to timely predict PPIs. Based on the physicochemical descriptors, a protein was converted into several digital signals, and then wavelet transform was used to analyze them. With such a formulation frame to represent the samples of protein sequences, the random forests algorithm was adopted to conduct prediction. The results on a large-scale independent-test data set show that the proposed model can achieve a good performance with an accuracy value of about 0.86 and a geometric mean value of about 0.85. Therefore, it can be a usefully supplementary tool for PPI prediction. The predictor used in this article is freely available at http://www.jci-bioinfo.cn/PPI_RF.",2015-04-16 +25539927,Conditional mutual inclusive information enables accurate quantification of associations in gene regulatory networks.,"Mutual information (MI), a quantity describing the nonlinear dependence between two random variables, has been widely used to construct gene regulatory networks (GRNs). Despite its good performance, MI cannot separate the direct regulations from indirect ones among genes. Although the conditional mutual information (CMI) is able to identify the direct regulations, it generally underestimates the regulation strength, i.e. it may result in false negatives when inferring gene regulations. In this work, to overcome the problems, we propose a novel concept, namely conditional mutual inclusive information (CMI2), to describe the regulations between genes. Furthermore, with CMI2, we develop a new approach, namely CMI2NI (CMI2-based network inference), for reverse-engineering GRNs. In CMI2NI, CMI2 is used to quantify the mutual information between two genes given a third one through calculating the Kullback-Leibler divergence between the postulated distributions of including and excluding the edge between the two genes. The benchmark results on the GRNs from DREAM challenge as well as the SOS DNA repair network in Escherichia coli demonstrate the superior performance of CMI2NI. Specifically, even for gene expression data with small sample size, CMI2NI can not only infer the correct topology of the regulation networks but also accurately quantify the regulation strength between genes. As a case study, CMI2NI was also used to reconstruct cancer-specific GRNs using gene expression data from The Cancer Genome Atlas (TCGA). CMI2NI is freely accessible at http://www.comp-sysbio.org/cmi2ni.",2014-12-24 +24479843,Quantitative structure-property relationship modeling: a valuable support in high-throughput screening quality control.,"Evaluation of important pharmacokinetic properties such as hydrophobicity by high-throughput screening (HTS) methods is a major issue in drug discovery. In this paper, we present measurements of the chromatographic hydrophobicity index (CHI) on a subset of the French chemical library Chimiothèque Nationale (CN). The data were used in quantitative structure-property relationship (QSPR) modeling in order to annotate the CN. An algorithm is proposed to detect problematic molecules with large prediction errors, called outliers. In order to find an explanation for these large discrepancies between predicted and experimental values, these compounds were reanalyzed experimentally. As the first selected outliers indeed had experimental problems, including hydrolysis or sheer absence of expected structure, we herewith propose the use of QSPR as a support tool for quality control of screening data and encourage cooperation between experimental and theoretical teams to improve results. The corrected data were used to produce a model, which is freely available on our web server at http://infochim.u-strasbg.fr/webserv/VSEngine.html .",2014-02-12 +27258721,Particulate Matter and Subclinical Atherosclerosis: Associations between Different Particle Sizes and Sources with Carotid Intima-Media Thickness in the SAPALDIA Study.,"

Background

Subclinical atherosclerosis has been associated with long-term exposure to particulate matter (PM), but the relevance of particle size and sources of exposure remains unclear.

Objectives

We investigated the association of long-term exposure to PM10 (≤ 10 μm), PM2.5 (≤ 2.5 μm: total mass, vehicular, and crustal sources), and ultrafine particles [UFP < 0.1 μm: particle number concentration (PNC) and lung-deposited surface area (LDSA)] with carotid intima-media thickness (CIMT).

Methods

We used data from 1,503 participants ≥ 50 years old who participated in the third examination of the Swiss SAPALDIA cohort. Exposures were obtained from dispersion models and land-use regression models. Covariate information, including previous cardiovascular risk factors, was obtained from the second and third SAPALDIA examinations.

Results

The adjusted percent difference in CIMT associated with an exposure contrast between the 10th and 90th percentile was 1.58% (95% CI: -0.30, 3.47%) for PM10, 2.10% (95% CI: 0.04, 4.16%) for PM2.5, 1.67% (95% CI: -0.13, 3.48%) for the vehicular source of PM2.5, -0.58% (95% CI: -3.95, 2.79%) for the crustal source of PM2.5, 2.06% (95% CI: 0.03, 4.10%) for PNC, and 2.32% (95% CI: 0.23, 4.40%) for LDSA. Stronger associations were observed among diabetics, subjects with low-educational level, and those at higher cardiovascular risk.

Conclusions

CIMT was associated with exposure to PM10, PM2.5, and UFP. The PM2.5 source-specific analysis showed a positive association for the vehicular source but not for the crustal source. Although the effects of PNC and LDSA were similar in magnitude, two-pollutant and residual-based models suggested that LDSA may be a better marker for the health relevance of UFP. Citation: Aguilera I, Dratva J, Caviezel S, Burdet L, de Groot E, Ducret-Stich RE, Eeftens M, Keidel D, Meier R, Perez L, Rothe T, Schaffner E, Schmit-Trucksäss A, Tsai MY, Schindler C, Künzli N, Probst-Hensch N. 2016. Particulate matter and subclinical atherosclerosis: associations between different particle sizes and sources with carotid intima-media thickness in the SAPALDIA study. Environ Health Perspect 124:1700-1706; http://dx.doi.org/10.1289/EHP161.",2016-06-03 +26484145,Comparison of genomic DNA methylation pattern among septic and non-septic newborns - An epigenome wide association study.,"DNA methylation is the current strategy in the field of biomarker discovery due to its prognostic efficiency. Its role in prognosis and early diagnosis has been recognized in various types of cancer. Sepsis still remains one of the major causes of neonatal mortality. Delay in diagnosis of sepsis leads to treatment difficulties and poor outcome. In this study, we have done an epigenome wide search to identify potential markers for prognosis of neonatal sepsis which may improve the treatment strategies. We analyzed the CpG methylation status in the epigenome of three septic and non-septic babies using Illumina Infinium HumanMethylation450K methylation microarray. The microarray data was analyzed with Illumina GenomeStudio v2011.1. After screening for biological and clinical significance, we found 81 differentially methylated CpGs located in 64 genes. Bioinformatic analysis using DAVID and GeneMania revealed a panel of differentially methylated protocadherin beta (PCDHB) genes that play vital role in leukocyte cell adhesion and Wnt signaling pathway. Apart, genes like CCS, DNAJA3, and DEGS2 were potentially hyper/hypo methylated which can be utilized in the development of novel biomarkers. This study will be helpful in exploring the role of DNA methylation in the pathophysiology of neonatal sepsis. The complete microarray data can be accessed from the public domain, Gene Expression Omnibus of NCBI (http://www.ncbi.nlm.nih.gov/geo/). The accession number is GSE58651.",2014-11-15 +26306642,FAPI: Fast and accurate P-value Imputation for genome-wide association study.,"Imputing individual-level genotypes (or genotype imputation) is now a standard procedure in genome-wide association studies (GWAS) to examine disease associations at untyped common genetic variants. Meta-analysis of publicly available GWAS summary statistics can allow more disease-associated loci to be discovered, but these data are usually provided for various variant sets. Thus imputing these summary statistics of different variant sets into a common reference panel for meta-analyses is impossible using traditional genotype imputation methods. Here we develop a fast and accurate P-value imputation (FAPI) method that utilizes summary statistics of common variants only. Its computational cost is linear with the number of untyped variants and has similar accuracy compared with IMPUTE2 with prephasing, one of the leading methods in genotype imputation. In addition, based on the FAPI idea, we develop a metric to detect abnormal association at a variant and showed that it had a significantly greater power compared with LD-PAC, a method that quantifies the evidence of spurious associations based on likelihood ratio. Our method is implemented in a user-friendly software tool, which is available at http://statgenpro.psychiatry.hku.hk/fapi.",2015-08-26 +25629524,A bioinformatics tool for epitope-based vaccine design that accounts for human ethnic diversity: application to emerging infectious diseases.,"

Background

Peptide vaccination based on multiple T-cell epitopes can be used to target well-defined ethnic populations. Because the response to T-cell epitopes is restricted by HLA proteins, the HLA specificity of T-cell epitopes becomes a major consideration for epitope-based vaccine design. We have previously shown that CD4+ T-cell epitopes restricted by 95% of human MHC class II proteins can be predicted with high-specificity.

Methods

We describe here the integration of epitope prediction with population coverage and epitope selection algorithms. The population coverage assessment makes use of the Allele Frequency Net Database. We present the computational platform Predivac-2.0 for HLA class II-restricted epitope-based vaccine design, which accounts comprehensively for human genetic diversity.

Results

We validated the performance of the tool on the identification of promiscuous and immunodominant CD4+ T-cell epitopes from the human immunodeficiency virus (HIV) protein Gag. We further describe an application for epitope-based vaccine design in the context of emerging infectious diseases associated with Lassa, Nipah and Hendra viruses. Putative CD4+ T-cell epitopes were mapped on the surface glycoproteins of these pathogens and are good candidates to be experimentally tested, as they hold potential to provide cognate help in vaccination settings in their respective target populations.

Conclusion

Predivac-2.0 is a novel approach in epitope-based vaccine design, particularly suited to be applied to virus-related emerging infectious diseases, because the geographic distributions of the viruses are well defined and ethnic populations in need of vaccination can be determined (""ethnicity-oriented approach""). Predivac-2.0 is accessible through the website http://predivac.biosci.uq.edu.au/.",2015-01-25 +26680743,g-FLUA2H: a web-based application to study the dynamics of animal-to-human mutation transmission for influenza viruses.,"g-FLUA2H is a web-based application focused on the analysis of the dynamics of influenza virus animal-to-human (A2H) mutation transmissions. The application only requires the viral protein sequences from both the animal and human host populations as input datasets. The comparative analyses between the co-aligned sequences of the two viral populations is based on a sliding window approach of size nine for statistical significance and data application to the major histocompatibility complex (MHC) and T-cell receptor (TCR) immune response mechanisms. The sequences at each of the aligned overlapping nonamer positions for the respective virus hosts are classified as four patterns of characteristic diversity motifs, as a basis for quantitative analyses: (i) ""index"", the most prevalent sequence; (ii) ""major"" variant, the second most common sequence and the single most prevalent variant of the index, with at least one amino acid mutation; (iii) ""minor"" variants, multiple different sequences, each with an incidence (percent occurrence) less than that of the major variant; and (iv) ""unique"" variants, each with only one occurrence in the alignment. The diversity motifs and their incidences at each of the nonamer positions allow evaluation of the mutation transmission dynamics and selectivity of the viral sequences in relation to the animal and the human hosts. g-FLUA2H is facilitated by a grid back-end for parallel processing of large sequence datasets. The web-application is publicly available at http://bioinfo.perdanauniversity.edu.my/g-FLUA2H. It can be used for a detailed characterization of the composition and incidence of mutations present in the proteomes of influenza viruses from animal and human host populations, for a better understanding of host tropism.",2015-12-09 +27164619,Temporal Trends of Secondhand Smoke Exposure: Nonsmoking Workers in the United States (NHANES 2001-2010).,"

Background

The workplace is one of the major locations outside of the home for nonsmokers' exposure to secondhand smoke (SHS). New policies in many U.S. states and localities restrict or prohibit smoking in the workplace, and information on current trends in the exposure of nonsmokers to SHS across various occupational groups is therefore needed.

Objective

We evaluated temporal trends in SHS exposure among nonsmoking workers in the United States and identified those occupations with workers with the highest levels of SHS exposure.

Methods

We combined serum cotinine (sCOT) measurements and questionnaire data from five survey cycles of the National Health and Nutrition Examination Survey (NHANES: 2001-2010). Trends in SHS exposure by occupations were determined from percent changes and least-squares geometric means (LSGMs) of sCOT concentrations computed using sample-weighted multiple regression models.

Results

Between NHANES 2001-2002 and NHANES 2009-2010, LSGMs of sCOT levels had changed -25% (95% CI: -39, -7%) in nonsmoking workers. The largest decrease was identified among food preparation workers [-54% (95% CI: -74, -19%)], followed by white-collar [-40%, (95% CI: -56, -19%)] and blue-collar workers (-32%, 95% CI: -51, -5%). LSGMs of sCOT remained highest in food preparation workers in all survey cycles, but the gap between occupations narrowed in the latest survey cycle (2009-2010). For example, the gap in LSGMs of sCOT between food preparation and science/education workers dropped > 70% during 2000 to 2010.

Conclusions

During the period from 2001 to 2010, the overall SHS exposure in nonsmoking workers declined with substantial drops in food preparation/service and blue-collar workers. Although disparities persist in SHS exposure, the gaps among occupations have narrowed.

Citation

Wei B, Bernert JT, Blount BC, Sosnoff CS, Wang L, Richter P, Pirkle JL. 2016. Temporal trends of secondhand smoke exposure: nonsmoking workers in the United States (NHANES 2001-2010). Environ Health Perspect 124:1568-1574; http://dx.doi.org/10.1289/EHP165.",2016-05-10 +26315914,CoD: inferring immune-cell quantities related to disease states.,"

Motivation

The immune system comprises a complex network of genes, cells and tissues, coordinated through signaling pathways and cell-cell communications. However, the orchestrated role of the multiple immunological components in disease is still poorly understood. Classifications based on gene-expression data have revealed immune-related signaling pathways in various diseases, but how such pathways describe the immune cellular physiology remains largely unknown.

Results

We identify alterations in cell quantities discriminating between disease states using ' Cell type of Disease' (CoD), a classification-based approach that relies on computational immune-cell decomposition in gene-expression datasets. CoD attains significantly higher accuracy than alternative state-of-the-art methods. Our approach is shown to recapitulate and extend previous knowledge acquired with experimental cell-quantification technologies.

Conclusions

The results suggest that CoD can reveal disease-relevant cell types in an unbiased manner, potentially heralding improved diagnostics and treatment.

Availability and implementation

The software described in this article is available at http://www.csgi.tau.ac.il/CoD/.",2015-08-26 +25522086,"Update: influenza activity - United States, September 28- December 6, 2014.","CDC collects, compiles, and analyzes data on influenza activity year-round in the United States (http://www.cdc.gov/flu/weekly/fluactivitysurv.htm). The influenza season generally begins in the fall and continues through the winter and spring months; however, the timing and severity of circulating influenza viruses can vary by geographic location and season. Influenza activity in the United States increased starting mid-October through December. This report summarizes U.S. influenza activity during September 28-December 6, 2014.",2014-12-01 +25524593,Improving accuracy of protein contact prediction using balanced network deconvolution.,"Residue contact map is essential for protein three-dimensional structure determination. But most of the current contact prediction methods based on residue co-evolution suffer from high false-positives as introduced by indirect and transitive contacts (i.e., residues A-B and B-C are in contact, but A-C are not). Built on the work by Feizi et al. (Nat Biotechnol 2013; 31:726-733), which demonstrated a general network model to distinguish direct dependencies by network deconvolution, this study presents a new balanced network deconvolution (BND) algorithm to identify optimized dependency matrix without limit on the eigenvalue range in the applied network systems. The algorithm was used to filter contact predictions of five widely used co-evolution methods. On the test of proteins from three benchmark datasets of the 9th critical assessment of protein structure prediction (CASP9), CASP10, and PSICOV (precise structural contact prediction using sparse inverse covariance estimation) database experiments, the BND can improve the medium- and long-range contact predictions at the L/5 cutoff by 55.59% and 47.68%, respectively, without additional central processing unit cost. The improvement is statistically significant, with a P-value < 5.93 × 10(-3) in the Student's t-test. A further comparison with the ab initio structure predictions in CASPs showed that the usefulness of the current co-evolution-based contact prediction to the three-dimensional structure modeling relies on the number of homologous sequences existing in the sequence databases. BND can be used as a general contact refinement method, which is freely available at: http://www.csbio.sjtu.edu.cn/bioinf/BND/.",2015-01-24 +24285602,DDGni: dynamic delay gene-network inference from high-temporal data using gapped local alignment.,"

Motivation

Inferring gene-regulatory networks is very crucial in decoding various complex mechanisms in biological systems. Synthesis of a fully functional transcriptional factor/protein from DNA involves series of reactions, leading to a delay in gene regulation. The complexity increases with the dynamic delay induced by other small molecules involved in gene regulation, and noisy cellular environment. The dynamic delay in gene regulation is quite evident in high-temporal live cell lineage-imaging data. Although a number of gene-network-inference methods are proposed, most of them ignore the associated dynamic time delay.

Results

Here, we propose DDGni (dynamic delay gene-network inference), a novel gene-network-inference algorithm based on the gapped local alignment of gene-expression profiles. The local alignment can detect short-term gene regulations, that are usually overlooked by traditional correlation and mutual Information based methods. DDGni uses 'gaps' to handle the dynamic delay and non-uniform sampling frequency in high-temporal data, like live cell imaging data. Our algorithm is evaluated on synthetic and yeast cell cycle data, and Caenorhabditis elegans live cell imaging data against other prominent methods. The area under the curve of our method is significantly higher when compared to other methods on all three datasets.

Availability

The program, datasets and supplementary files are available at http://www.jjwanglab.org/DDGni/.",2013-11-27 +25620721,IMMAN: free software for information theory-based chemometric analysis.,"The features and theoretical background of a new and free computational program for chemometric analysis denominated IMMAN (acronym for Information theory-based CheMoMetrics ANalysis) are presented. This is multi-platform software developed in the Java programming language, designed with a remarkably user-friendly graphical interface for the computation of a collection of information-theoretic functions adapted for rank-based unsupervised and supervised feature selection tasks. A total of 20 feature selection parameters are presented, with the unsupervised and supervised frameworks represented by 10 approaches in each case. Several information-theoretic parameters traditionally used as molecular descriptors (MDs) are adapted for use as unsupervised rank-based feature selection methods. On the other hand, a generalization scheme for the previously defined differential Shannon's entropy is discussed, as well as the introduction of Jeffreys information measure for supervised feature selection. Moreover, well-known information-theoretic feature selection parameters, such as information gain, gain ratio, and symmetrical uncertainty are incorporated to the IMMAN software ( http://mobiosd-hub.com/imman-soft/ ), following an equal-interval discretization approach. IMMAN offers data pre-processing functionalities, such as missing values processing, dataset partitioning, and browsing. Moreover, single parameter or ensemble (multi-criteria) ranking options are provided. Consequently, this software is suitable for tasks like dimensionality reduction, feature ranking, as well as comparative diversity analysis of data matrices. Simple examples of applications performed with this program are presented. A comparative study between IMMAN and WEKA feature selection tools using the Arcene dataset was performed, demonstrating similar behavior. In addition, it is revealed that the use of IMMAN unsupervised feature selection methods improves the performance of both IMMAN and WEKA supervised algorithms. Graphic representation for Shannon's distribution of MD calculating software.",2015-01-26 +22146221,Minimotif Miner 3.0: database expansion and significantly improved reduction of false-positive predictions from consensus sequences.,"Minimotif Miner (MnM available at http://minimotifminer.org or http://mnm.engr.uconn.edu) is an online database for identifying new minimotifs in protein queries. Minimotifs are short contiguous peptide sequences that have a known function in at least one protein. Here we report the third release of the MnM database which has now grown 60-fold to approximately 300,000 minimotifs. Since short minimotifs are by their nature not very complex we also summarize a new set of false-positive filters and linear regression scoring that vastly enhance minimotif prediction accuracy on a test data set. This online database can be used to predict new functions in proteins and causes of disease.",2011-12-06 +21471019,Reasoning with bio-ontologies: using relational closure rules to enable practical querying.,"

Motivation

Ontologies have become indispensable in the Life Sciences for managing large amounts of knowledge. The use of logics in ontologies ranges from sound modelling to practical querying of that knowledge, thus adding a considerable value. We conceive reasoning on bio-ontologies as a semi-automated process in three steps: (i) defining a logic-based representation language; (ii) building a consistent ontology using that language; and (iii) exploiting the ontology through querying.

Results

Here, we report on how we have implemented this approach to reasoning on the OBO Foundry ontologies within BioGateway, a biological Resource Description Framework knowledge base. By separating the three steps in a manual curation effort on Metarel, a vocabulary that specifies relation semantics, we were able to apply reasoning on a large scale. Starting from an initial 401 million triples, we inferred about 158 million knowledge statements that allow for a myriad of prospective queries, potentially leading to new hypotheses about for instance gene products, processes, interactions or diseases.

Availability

SPARUL code, a query end point and curated relation types in OBO Format, RDF and OWL 2 DL are freely available at http://www.semantic-systems-biology.org/metarel.",2011-04-05 +24918764,"HIVE-hexagon: high-performance, parallelized sequence alignment for next-generation sequencing data analysis.","

Unlabelled

Due to the size of Next-Generation Sequencing data, the computational challenge of sequence alignment has been vast. Inexact alignments can take up to 90% of total CPU time in bioinformatics pipelines. High-performance Integrated Virtual Environment (HIVE), a cloud-based environment optimized for storage and analysis of extra-large data, presents an algorithmic solution: the HIVE-hexagon DNA sequence aligner. HIVE-hexagon implements novel approaches to exploit both characteristics of sequence space and CPU, RAM and Input/Output (I/O) architecture to quickly compute accurate alignments. Key components of HIVE-hexagon include non-redundification and sorting of sequences; floating diagonals of linearized dynamic programming matrices; and consideration of cross-similarity to minimize computations.

Availability

https://hive.biochemistry.gwu.edu/hive/",2014-06-11 +24123674,PLIDA: cross-platform gene expression normalization using perturbed topic models.,"

Motivation

Gene expression data are currently collected on a wide range of platforms. Differences between platforms make it challenging to combine and compare data collected on different platforms. We propose a new method of cross-platform normalization that uses topic models to summarize the expression patterns in each dataset before normalizing the topics learned from each dataset using per-gene multiplicative weights.

Results

This method allows for cross-platform normalization even when samples profiled on different platforms have systematic differences, allows the simultaneous normalization of data from an arbitrary number of platforms and, after suitable training, allows for online normalization of expression data collected individually or in small batches. In addition, our method outperforms existing state-of-the-art platform normalization tools.

Availability and implementation

MATLAB code is available at http://morrislab.med.utoronto.ca/plida/.",2013-10-11 +26069263,IBS: an illustrator for the presentation and visualization of biological sequences.,"

Unlabelled

Biological sequence diagrams are fundamental for visualizing various functional elements in protein or nucleotide sequences that enable a summarization and presentation of existing information as well as means of intuitive new discoveries. Here, we present a software package called illustrator of biological sequences (IBS) that can be used for representing the organization of either protein or nucleotide sequences in a convenient, efficient and precise manner. Multiple options are provided in IBS, and biological sequences can be manipulated, recolored or rescaled in a user-defined mode. Also, the final representational artwork can be directly exported into a publication-quality figure.

Availability and implementation

The standalone package of IBS was implemented in JAVA, while the online service was implemented in HTML5 and JavaScript. Both the standalone package and online service are freely available at http://ibs.biocuckoo.org.

Contact

renjian.sysu@gmail.com or xueyu@hust.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-10 +26644416,Improved topology prediction using the terminal hydrophobic helices rule.,"

Motivation

The translocon recognizes sufficiently hydrophobic regions of a protein and inserts them into the membrane. Computational methods try to determine what hydrophobic regions are recognized by the translocon. Although these predictions are quite accurate, many methods still fail to distinguish marginally hydrophobic transmembrane (TM) helices and equally hydrophobic regions in soluble protein domains. In vivo, this problem is most likely avoided by targeting of the TM-proteins, so that non-TM proteins never see the translocon. Proteins are targeted to the translocon by an N-terminal signal peptide. The targeting is also aided by the fact that the N-terminal helix is more hydrophobic than other TM-helices. In addition, we also recently found that the C-terminal helix is more hydrophobic than central helices. This information has not been used in earlier topology predictors.

Results

Here, we use the fact that the N- and C-terminal helices are more hydrophobic to develop a new version of the first-principle-based topology predictor, SCAMPI. The new predictor has two main advantages; first, it can be used to efficiently separate membrane and non-membrane proteins directly without the use of an extra prefilter, and second it shows improved performance for predicting the topology of membrane proteins that contain large non-membrane domains.

Availability and implementation

The predictor, a web server and all datasets are available at http://scampi.bioinfo.se/

Contact

arne@bioinfo.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-07 +22968136,Expanding the MTM1 mutational spectrum: novel variants including the first multi-exonic duplication and development of a locus-specific database.,"Myotubular myopathy (MIM#310400), the X-linked form of Centronuclear myopathy (CNM) is mainly characterized by neonatal hypotonia and inability to maintain unassisted respiration. The MTM1 gene, responsible for this disease, encodes myotubularin - a lipidic phosphatase involved in vesicle trafficking regulation and maturation. Recently, it was shown that myotubularin interacts with desmin, being a major regulator of intermediate filaments. We report the development of a locus-specific database for MTM1 using the Leiden Open Variation database software (http://www.lovd.nl/MTM1), with data collated for 474 mutations identified in 472 patients (by June 2012). Among the entries are a total of 25 new mutations, including a large deletion encompassing introns 2-15. During database implementation it was noticed that no large duplications had been reported. We tested a group of eight uncharacterized CNM patients for this specific type of mutation, by multiple ligation-dependent probe amplification (MLPA) analysis. A large duplication spanning exons 1-5 was identified in a boy with a mild phenotype, with results pointing toward possible somatic mosaicism. Further characterization revealed that this duplication causes an in-frame deletion at the mRNA level (r.343_444del). Results obtained with a next generation sequencing approach suggested that the duplication extends into the neighboring MAMLD1 gene and subsequent cDNA analysis detected the presence of a MTM1/MAMLD1 fusion transcript. A complex rearrangement involving the duplication of exon 10 has since been reported, with detection also enabled by MLPA analysis. It is thus conceivable that large duplications in MTM1 may account for a number of CNM cases that have remained genetically unresolved.",2012-09-12 +21854652,Haplo2Ped: a tool using haplotypes as markers for linkage analysis.,"

Background

Generally, SNPs are abundant in the genome; however, they display low power in linkage analysis because of their limited heterozygosity. Haplotype markers, on the other hand, which are composed of many SNPs, greatly increase heterozygosity and have superiority in linkage statistics.

Results

Here we developed Haplo2Ped to automatically transform SNP data into haplotype markers and then to compute the logarithm (base 10) of odds (LOD) scores of regional haplotypes that are homozygous within the disease co-segregation haploid group. The results are reported as a hypertext file and a 3D figure to help users to obtain the candidate linkage regions. The hypertext file contains parameters of the disease linked regions, candidate genes, and their links to public databases. The 3D figure clearly displays the linkage signals in each chromosome. We tested Haplo2Ped in a simulated SNP dataset and also applied it to data from a real study. It successfully and accurately located the causative genomic regions. Comparison of Haplo2Ped with other existing software for linkage analysis further indicated the high effectiveness of this software.

Conclusions

Haplo2Ped uses haplotype fragments as mapping markers in whole genome linkage analysis. The advantages of Haplo2Ped over other existing software include straightforward output files, increased accuracy and superior ability to deal with pedigrees showing incomplete penetrance. Haplo2Ped is freely available at: http://bighapmap.big.ac.cn/software.html.",2011-08-22 +25708243,SCMPSP: Prediction and characterization of photosynthetic proteins based on a scoring card method.,"

Background

Photosynthetic proteins (PSPs) greatly differ in their structure and function as they are involved in numerous subprocesses that take place inside an organelle called a chloroplast. Few studies predict PSPs from sequences due to their high variety of sequences and structues. This work aims to predict and characterize PSPs by establishing the datasets of PSP and non-PSP sequences and developing prediction methods.

Results

A novel bioinformatics method of predicting and characterizing PSPs based on scoring card method (SCMPSP) was used. First, a dataset consisting of 649 PSPs was established by using a Gene Ontology term GO:0015979 and 649 non-PSPs from the SwissProt database with sequence identity <= 25%.- Several prediction methods are presented based on support vector machine (SVM), decision tree J48, Bayes, BLAST, and SCM. The SVM method using dipeptide features-performed well and yielded - a test accuracy of 72.31%. The SCMPSP method uses the estimated propensity scores of 400 dipeptides - as PSPs and has a test accuracy of 71.54%, which is comparable to that of the SVM method. The derived propensity scores of 20 amino acids were further used to identify informative physicochemical properties for characterizing PSPs. The analytical results reveal the following four characteristics of PSPs: 1) PSPs favour hydrophobic side chain amino acids; 2) PSPs are composed of the amino acids prone to form helices in membrane environments; 3) PSPs have low interaction with water; and 4) PSPs prefer to be composed of the amino acids of electron-reactive side chains.

Conclusions

The SCMPSP method not only estimates the propensity of a sequence to be PSPs, it also discovers characteristics that further improve understanding of PSPs. The SCMPSP source code and the datasets used in this study are available at http://iclab.life.nctu.edu.tw/SCMPSP/.",2015-01-21 +22862831,Distribution and prediction of catalytic domains in 2-oxoglutarate dependent dioxygenases.,"

Background

The 2-oxoglutarate dependent superfamily is a diverse group of non-haem dioxygenases, and is present in prokaryotes, eukaryotes, and archaea. The enzymes differ in substrate preference and reaction chemistry, a factor that precludes their classification by homology studies and electronic annotation schemes alone. In this work, I propose and explore the rationale of using substrates to classify structurally similar alpha-ketoglutarate dependent enzymes.

Findings

Differential catalysis in phylogenetic clades of 2-OG dependent enzymes, is determined by the interactions of a subset of active-site amino acids. Identifying these with existing computational methods is challenging and not feasible for all proteins. A clustering protocol based on validated mechanisms of catalysis of known molecules, in tandem with group specific hidden markov model profiles is able to differentiate and sequester these enzymes. Access to this repository is by a web server that compares user defined unknown sequences to these pre-defined profiles and outputs a list of predicted catalytic domains. The server is free and is accessible at the following URL (http://comp-biol.theacms.in/H2OGpred.html).

Conclusions

The proposed stratification is a novel attempt at classifying and predicting 2-oxoglutarate dependent function. In addition, the server will provide researchers with a tool to compare their data to a comprehensive list of HMM profiles of catalytic domains. This work, will aid efforts by investigators to screen and characterize putative 2-OG dependent sequences. The profile database will be updated at regular intervals.",2012-08-04 +23935862,Improved inference of gene regulatory networks through integrated Bayesian clustering and dynamic modeling of time-course expression data.,"Inferring gene regulatory networks from expression data is difficult, but it is common and often useful. Most network problems are under-determined--there are more parameters than data points--and therefore data or parameter set reduction is often necessary. Correlation between variables in the model also contributes to confound network coefficient inference. In this paper, we present an algorithm that uses integrated, probabilistic clustering to ease the problems of under-determination and correlated variables within a fully Bayesian framework. Specifically, ours is a dynamic Bayesian network with integrated Gaussian mixture clustering, which we fit using variational Bayesian methods. We show, using public, simulated time-course data sets from the DREAM4 Challenge, that our algorithm outperforms non-clustering methods in many cases (7 out of 25) with fewer samples, rarely underperforming (1 out of 25), and often selects a non-clustering model if it better describes the data. Source code (GNU Octave) for BAyesian Clustering Over Networks (BACON) and sample data are available at: http://code.google.com/p/bacon-for-genetic-networks.",2013-07-23 +27054178,Effect of diazepam on sociability of rats submitted to neonatal seizures.,"Status epilepticus (SE), an acute condition characterized by repetitive or ongoing seizures activity, may produce long-term deleterious consequences. Previous data demonstrated that Wistar rats subjected to neonatal SE displayed autistic behavior, characterized by social play impairment, low preference by novelty, deficit in social discrimination; anxiety related behavior and stereotyped behavior with no changes in locomotor activity (doi: http://dx.doi.org/10.1007/s00702-010-0460-1, doi: http://dx.doi.org/10.3389/fnbeh.2013.00036, doi: http://dx.doi.org/10.1007/s00702-014-1291-2[1], [2], [3]). Taking into account the bi-directional relationship between the state of anxiety and social interaction (doi: http://dx.doi.org/10.1007/s10567-009-0062-3[4]), we evaluated the impact of the state of anxiety on social interaction. Male Wistar rats at postnatal day 9 were subjected to pilocarpine-induced neonatal SE (380 mg/kg, ip) and the controls received 0.9% saline (0.1 ml/10 g). The groups received saline or diazepam (1.0 mg/kg) 45 min prior each behavioral testing that started from 60 days of postnatal life. In the open field, rats subjected to neonatal seizure exhibited less central zone activity as compared to animals treated with diazepam, with no changes in the total locomotor activity. In elevated plus maze, rats subjected to neonatal seizure and treated with diazepam exhibited higher locomotor activity and spent more time on the open arms as compared to untreated animals. In approach phase of sociability paradigm, animals subjected to neonatal seizures similarly to controls, regardless the treatment, spent more time with social stimulus as compared to non social stimulus. In social novelty phase of sociability paradigm, animals subjected to neonatal seizures differently of controls, regardless the treatment, spent similar time with familiar and novel stimulus.",2016-03-12 +26510657,Identification of Microorganisms by High Resolution Tandem Mass Spectrometry with Accurate Statistical Significance.,"Correct and rapid identification of microorganisms is the key to the success of many important applications in health and safety, including, but not limited to, infection treatment, food safety, and biodefense. With the advance of mass spectrometry (MS) technology, the speed of identification can be greatly improved. However, the increasing number of microbes sequenced is challenging correct microbial identification because of the large number of choices present. To properly disentangle candidate microbes, one needs to go beyond apparent morphology or simple 'fingerprinting'; to correctly prioritize the candidate microbes, one needs to have accurate statistical significance in microbial identification. We meet these challenges by using peptidome profiles of microbes to better separate them and by designing an analysis method that yields accurate statistical significance. Here, we present an analysis pipeline that uses tandem MS (MS/MS) spectra for microbial identification or classification. We have demonstrated, using MS/MS data of 81 samples, each composed of a single known microorganism, that the proposed pipeline can correctly identify microorganisms at least at the genus and species levels. We have also shown that the proposed pipeline computes accurate statistical significances, i.e., E-values for identified peptides and unified E-values for identified microorganisms. The proposed analysis pipeline has been implemented in MiCId, a freely available software for Microorganism Classification and Identification. MiCId is available for download at http://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads.html . Graphical Abstract ᅟ.",2015-10-28 +25521334,SG-ADVISER CNV: copy-number variant annotation and interpretation.,"

Purpose

Copy-number variants have been associated with a variety of diseases, especially cancer, autism, schizophrenia, and developmental delay. The majority of clinically relevant events occur de novo, necessitating the interpretation of novel events. In this light, we present the Scripps Genome ADVISER CNV annotation pipeline and Web server, which aims to fill the gap between copy number variant detection and interpretation by performing in-depth annotations and functional predictions for copy number variants.

Methods

The Scripps Genome ADVISER CNV suite includes a Web server interface to a high-performance computing environment for calculations of annotations and a table-based user interface that allows for the execution of numerous annotation-based variant filtration strategies and statistics.

Results

The annotation results include details regarding location, impact on the coding portion of genes, allele frequency information (including allele frequencies from the Scripps Wellderly cohort), and overlap information with other reference data sets (including ClinVar, DGV, DECIPHER). A summary variant classification is produced (ADVISER score) based on the American College of Medical Genetics and Genomics scoring guidelines. We demonstrate >90% sensitivity/specificity for detection of pathogenic events.

Conclusion

Scripps Genome ADVISER CNV is designed to allow users with no prior bioinformatics expertise to manipulate large volumes of copy-number variant data. Scripps Genome ADVISER CNV is available at http://genomics.scripps.edu/ADVISER/.",2014-12-18 +25527098,MicroRNA modules prefer to bind weak and unconventional target sites.,"

Motivation

MicroRNAs (miRNAs) play critical roles in gene regulation. Although it is well known that multiple miRNAs may work as miRNA modules to synergistically regulate common target mRNAs, the understanding of miRNA modules is still in its infancy.

Results

We employed the recently generated high throughput experimental data to study miRNA modules. We predicted 181 miRNA modules and 306 potential miRNA modules. We observed that the target sites of these predicted modules were in general weaker compared with those not bound by miRNA modules. We also discovered that miRNAs in predicted modules preferred to bind unconventional target sites rather than canonical sites. Surprisingly, contrary to a previous study, we found that most adjacent miRNA target sites from the same miRNA modules were not within the range of 10-130 nucleotides. Interestingly, the distance of target sites bound by miRNAs in the same modules was shorter when miRNA modules bound unconventional instead of canonical sites. Our study shed new light on miRNA binding and miRNA target sites, which will likely advance our understanding of miRNA regulation.

Availability and implementation

The software miRModule can be freely downloaded at http://hulab.ucf.edu/research/projects/miRNA/miRModule.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

haihu@cs.ucf.edu or xiaoman@mail.ucf.edu.",2014-12-18 +22753041,Review and update of SPRED1 mutations causing Legius syndrome.,"Legius syndrome presents as a mild neurofibromatosis type 1 (NF1) phenotype. Multiple café-au-lait spots and macrocephaly are present with or without axillary or inguinal freckling. Other typical NF1-associated features (Lisch nodules, bone abnormalities, neurofibromas, optic pathway gliomas, and malignant peripheral nerve sheath tumors) are systematically absent. Legius syndrome is caused by germline loss-of-function SPRED1 mutations, resulting in overactivation of the RAS-MAPK signal transduction cascade. The first families were identified in 2007. Here, we review all identified SPRED1 mutations and summarize molecular, clinical, and functional data. All mutations have been deposited in a database created using the Leiden Open Variation Database software and accessible at http://www.lovd.nl/SPRED1. At present, the database contains 89 different mutations identified in 146 unrelated probands, including 16 new variants described for the first time. The database contains a spectrum of mutations: 29 missense, 28 frameshift, 19 nonsense, eight copy number changes, two splicing, one silent, one in-frame deletion and a mutation affecting the initiation codon. Sixty-three mutations and deletions are definitely pathogenic or most likely pathogenic, eight SPRED1 mutations are probably benign rare variants, and 17 SPRED1 missense mutations are still unclassified and need further family and functional studies to help with the interpretation.",2012-08-01 +23933456,CloudNMF: a MapReduce implementation of nonnegative matrix factorization for large-scale biological datasets.,"In the past decades, advances in high-throughput technologies have led to the generation of huge amounts of biological data that require analysis and interpretation. Recently, nonnegative matrix factorization (NMF) has been introduced as an efficient way to reduce the complexity of data as well as to interpret them, and has been applied to various fields of biological research. In this paper, we present CloudNMF, a distributed open-source implementation of NMF on a MapReduce framework. Experimental evaluation demonstrated that CloudNMF is scalable and can be used to deal with huge amounts of data, which may enable various kinds of a high-throughput biological data analysis in the cloud. CloudNMF is freely accessible at http://admis.fudan.edu.cn/projects/CloudNMF.html.",2013-08-08 +21214365,DBCAT: database of CpG islands and analytical tools for identifying comprehensive methylation profiles in cancer cells.,"DBCAT (database of CpG islands and analytical tools, http://dbcat.cgm.ntu.edu.tw/ ), developed to characterize comprehensive DNA methylation profiles in human cancers, is a web-based application and methylation database containing several convenient tools for investigating epigenetic regulation in human diseases. To our knowledge, DBCAT is the first online methylation analytical tool, and is composed of three parts: a CpG island finder, a genome query browser, and a tool for analyzing methylation microarray data. The analytical tools can quickly identify genes with methylated regions from microarray data, compare the methylation status changes between different arrays, and provide functional analysis in addition to colocalizing transcription factor binding sites.",2011-01-08 +22411954,RNAimmuno: a database of the nonspecific immunological effects of RNA interference and microRNA reagents.,"The RNAimmuno database was created to provide easy access to information regarding the nonspecific effects generated in cells by RNA interference triggers and microRNA regulators. Various RNAi and microRNA reagents, which differ in length and structure, often cause non-sequence-specific immune responses, in addition to triggering the intended sequence-specific effects. The activation of the cellular sensors of foreign RNA or DNA may lead to the induction of type I interferon and proinflammatory cytokine release. Subsequent changes in the cellular transcriptome and proteome may result in adverse effects, including cell death during therapeutic treatments or the misinterpretation of experimental results in research applications. The manually curated RNAimmuno database gathers the majority of the published data regarding the immunological side effects that are caused in investigated cell lines, tissues, and model organisms by different reagents. The database is accessible at http://rnaimmuno.ibch.poznan.pl and may be helpful in the further application and development of RNAi- and microRNA-based technologies.",2012-03-12 +24564491,Eureka-DMA: an easy-to-operate graphical user interface for fast comprehensive investigation and analysis of DNA microarray data.,"

Background

In the past decade, the field of molecular biology has become increasingly quantitative; rapid development of new technologies enables researchers to investigate and address fundamental issues quickly and in an efficient manner which were once impossible. Among these technologies, DNA microarray provides methodology for many applications such as gene discovery, diseases diagnosis, drug development and toxicological research and it has been used increasingly since it first emerged. Multiple tools have been developed to interpret the high-throughput data produced by microarrays. However, many times, less consideration has been given to the fact that an extensive and effective interpretation requires close interplay between the bioinformaticians who analyze the data and the biologists who generate it. To bridge this gap and to simplify the usability of such tools we developed Eureka-DMA - an easy-to-operate graphical user interface that allows bioinformaticians and bench-biologists alike to initiate analyses as well as to investigate the data produced by DNA microarrays.

Results

In this paper, we describe Eureka-DMA, a user-friendly software that comprises a set of methods for the interpretation of gene expression arrays. Eureka-DMA includes methods for the identification of genes with differential expression between conditions; it searches for enriched pathways and gene ontology terms and combines them with other relevant features. It thus enables the full understanding of the data for following testing as well as generating new hypotheses. Here we show two analyses, demonstrating examples of how Eureka-DMA can be used and its capability to produce relevant and reliable results.

Conclusions

We have integrated several elementary expression analysis tools to provide a unified interface for their implementation. Eureka-DMA's simple graphical user interface provides effective and efficient framework in which the investigator has the full set of tools for the visualization and interpretation of the data with the option of exporting the analysis results for later use in other platforms. Eureka-DMA is freely available for academic users and can be downloaded at http://blue-meduza.org/Eureka-DMA.",2014-02-24 +26139635,LDlink: a web-based application for exploring population-specific haplotype structure and linking correlated alleles of possible functional variants.,"

Unlabelled

Assessing linkage disequilibrium (LD) across ancestral populations is a powerful approach for investigating population-specific genetic structure as well as functionally mapping regions of disease susceptibility. Here, we present LDlink, a web-based collection of bioinformatic modules that query single nucleotide polymorphisms (SNPs) in population groups of interest to generate haplotype tables and interactive plots. Modules are designed with an emphasis on ease of use, query flexibility, and interactive visualization of results. Phase 3 haplotype data from the 1000 Genomes Project are referenced for calculating pairwise metrics of LD, searching for proxies in high LD, and enumerating all observed haplotypes. LDlink is tailored for investigators interested in mapping common and uncommon disease susceptibility loci by focusing on output linking correlated alleles and highlighting putative functional variants.

Availability and implementation

LDlink is a free and publically available web tool which can be accessed at http://analysistools.nci.nih.gov/LDlink/.

Contact

mitchell.machiela@nih.gov.",2015-07-02 +25663124,Information and Decision-Making Needs Among People with Anxiety Disorders: Results of an Online Survey.,"

Background

People with anxiety disorders are faced with treatment decisions considerably affecting their life. Patient decision aids are aimed at enabling patients to deliberate treatment options based on individual values and to participate in medical decisions.

Objective

This is the first study to determine patients' information and decision-making needs as a pre-requisite for the development of patient decision aids for anxiety disorders.

Methods

An online cross-sectional survey was conducted between January and April 2013 on the e-health portal http://www.psychenet.de by using a self-administered questionnaire with items on internet use, online health information needs, role in decision making and important treatment decisions. Descriptive and inferential statistical as well as qualitative data analyses were performed.

Results

A total of 60 people with anxiety disorders with a mean age of 33.3 years (SD 10.5) participated in the survey. The most prevalent reasons for online health information search were the need for general information on anxiety disorders, the search for a physician or psychiatrist and the insufficiency of information given by the healthcare provider. Respondents experienced less shared and more autonomous decisions than they preferred. They assessed decisions on psychotherapy, medication, and treatment setting (inpatient or outpatient) as the most difficult decisions.

Conclusion

Our results confirm the importance of offering patient decision aids for people with anxiety disorders that encourage patients to participate in decision making by providing information about the pros and cons of evidence-based treatment options.",2015-12-01 +22954628,Application and evaluation of automated methods to extract neuroanatomical connectivity statements from free text.,"

Motivation

Automated annotation of neuroanatomical connectivity statements from the neuroscience literature would enable accessible and large-scale connectivity resources. Unfortunately, the connectivity findings are not formally encoded and occur as natural language text. This hinders aggregation, indexing, searching and integration of the reports. We annotated a set of 1377 abstracts for connectivity relations to facilitate automated extraction of connectivity relationships from neuroscience literature. We tested several baseline measures based on co-occurrence and lexical rules. We compare results from seven machine learning methods adapted from the protein interaction extraction domain that employ part-of-speech, dependency and syntax features.

Results

Co-occurrence based methods provided high recall with weak precision. The shallow linguistic kernel recalled 70.1% of the sentence-level connectivity statements at 50.3% precision. Owing to its speed and simplicity, we applied the shallow linguistic kernel to a large set of new abstracts. To evaluate the results, we compared 2688 extracted connections with the Brain Architecture Management System (an existing database of rat connectivity). The extracted connections were connected in the Brain Architecture Management System at a rate of 63.5%, compared with 51.1% for co-occurring brain region pairs. We found that precision increases with the recency and frequency of the extracted relationships.

Availability and implementation

The source code, evaluations, documentation and other supplementary materials are available at http://www.chibi.ubc.ca/WhiteText.

Contact

paul@chibi.ubc.ca.

Supplementary information

Supplementary data are available at Bioinformatics Online.",2012-09-06 +23151233,PolySac3DB: an annotated data base of 3 dimensional structures of polysaccharides.,"

Background

Polysaccharides are ubiquitously present in the living world. Their structural versatility makes them important and interesting components in numerous biological and technological processes ranging from structural stabilization to a variety of immunologically important molecular recognition events. The knowledge of polysaccharide three-dimensional (3D) structure is important in studying carbohydrate-mediated host-pathogen interactions, interactions with other bio-macromolecules, drug design and vaccine development as well as material science applications or production of bio-ethanol.

Description

PolySac3DB is an annotated database that contains the 3D structural information of 157 polysaccharide entries that have been collected from an extensive screening of scientific literature. They have been systematically organized using standard names in the field of carbohydrate research into 18 categories representing polysaccharide families. Structure-related information includes the saccharides making up the repeat unit(s) and their glycosidic linkages, the expanded 3D representation of the repeat unit, unit cell dimensions and space group, helix type, diffraction diagram(s) (when applicable), experimental and/or simulation methods used for structure description, link to the abstract of the publication, reference and the atomic coordinate files for visualization and download. The database is accompanied by a user-friendly graphical user interface (GUI). It features interactive displays of polysaccharide structures and customized search options for beginners and experts, respectively. The site also serves as an information portal for polysaccharide structure determination techniques. The web-interface also references external links where other carbohydrate-related resources are available.

Conclusion

PolySac3DB is established to maintain information on the detailed 3D structures of polysaccharides. All the data and features are available via the web-interface utilizing the search engine and can be accessed at http://polysac3db.cermav.cnrs.fr.",2012-11-14 +26306323,Identification of Drosophila centromere associated proteins by quantitative affinity purification-mass spectrometry.,"Centromeres of higher eukaryotes are epigenetically defined by the centromere specific histone H3 variant CENP-A(CID). CENP-A(CID) builds the foundation for the assembly of a large network of proteins. In contrast to mammalian systems, the protein composition of Drosophila centromeres has not been comprehensively investigated. Here we describe the proteome of Drosophila melanogaster centromeres as analyzed by quantitative affinity purification-mass spectrometry (AP-MS). The AP-MS input chromatin material was prepared from D. melanogaster cell lines expressing CENP-A(CID) or H3.3 fused to EGFP as baits. Centromere chromatin enriched proteins were identified based on their relative abundance in CENP-A(CID)-GFP compared to H3.3-GFP or mock affinity-purifications. The analysis yielded 86 proteins specifically enriched in centromere chromatin preparations. The data accompanying the manuscript on this approach (Barth et al., 2015, Proteomics 14:2167-78, DOI: 10.1002/pmic.201400052) has been deposited to the ProteomeXchange Consortium (http://www.proteomexchange.org) via the PRIDE partner repository with the dataset identifier PXD000758.",2015-07-26 +21895812,Genome-wide transcriptome dissection of the rice root system: implications for developmental and physiological functions.,"The root system is a crucial determinant of plant growth potential because of its important functions, e.g. uptake of water and nutrients, structural support and interaction with symbiotic organisms. Elucidating the molecular mechanism of root development and functions is therefore necessary for improving plant productivity, particularly for crop plants, including rice (Oryza sativa). As an initial step towards developing a comprehensive understanding of the root system, we performed a large-scale transcriptome analysis of the rice root via a combined laser microdissection and microarray approach. The crown root was divided into eight developmental stages along the longitudinal axis and three radial tissue types at two different developmental stages, namely: epidermis, exodermis and sclerenchyma; cortex; and endodermis, pericycle and stele. We analyzed a total of 38 microarray data and identified 22,297 genes corresponding to 17,010 loci that showed sufficient signal intensity as well as developmental- and tissue type-specific transcriptome signatures. Moreover, we clarified gene networks associated with root cap function and lateral root formation, and further revealed antagonistic and synergistic interactions of phytohormones such as auxin, cytokinin, brassinosteroids and ethylene, based on the expression pattern of genes related to phytohormone biosynthesis and signaling. Expression profiling of transporter genes defined not only major sites for uptake and transport of water and nutrients, but also distinct signatures of the radial transport system from the rhizosphere to the xylem vessel for each nutrient. All data can be accessed from our gene expression profile database, RiceXPro (http://ricexpro.dna.affrc.go.jp), thereby providing useful information for understanding the molecular mechanisms involved in root system development of crop plants.",2011-10-25 +25326237,A novel and fast approach for population structure inference using kernel-PCA and optimization.,"Population structure is a confounding factor in genome-wide association studies, increasing the rate of false positive associations. To correct for it, several model-based algorithms such as ADMIXTURE and STRUCTURE have been proposed. These tend to suffer from the fact that they have a considerable computational burden, limiting their applicability when used with large datasets, such as those produced by next generation sequencing techniques. To address this, nonmodel based approaches such as sparse nonnegative matrix factorization (sNMF) and EIGENSTRAT have been proposed, which scale better with larger data. Here we present a novel nonmodel-based approach, population structure inference using kernel-PCA and optimization (PSIKO), which is based on a unique combination of linear kernel-PCA and least-squares optimization and allows for the inference of admixture coefficients, principal components, and number of founder populations of a dataset. PSIKO has been compared against existing leading methods on a variety of simulation scenarios, as well as on real biological data. We found that in addition to producing results of the same quality as other tested methods, PSIKO scales extremely well with dataset size, being considerably (up to 30 times) faster for longer sequences than even state-of-the-art methods such as sNMF. PSIKO and accompanying manual are freely available at https://www.uea.ac.uk/computing/psiko.",2014-10-16 +25648210,A two-phase binning algorithm using l-mer frequency on groups of non-overlapping reads.,"

Background

Metagenomics is the study of genetic materials derived directly from complex microbial samples, instead of from culture. One of the crucial steps in metagenomic analysis, referred to as ""binning"", is to separate reads into clusters that represent genomes from closely related organisms. Among the existing binning methods, unsupervised methods base the classification on features extracted from reads, and especially taking advantage in case of the limitation of reference database availability. However, their performance, under various aspects, is still being investigated by recent theoretical and empirical studies. The one addressed in this paper is among those efforts to enhance the accuracy of the classification.

Results

This paper presents an unsupervised algorithm, called BiMeta, for binning of reads from different species in a metagenomic dataset. The algorithm consists of two phases. In the first phase of the algorithm, reads are grouped into groups based on overlap information between the reads. The second phase merges the groups by using an observation on l-mer frequency distribution of sets of non-overlapping reads. The experimental results on simulated and real datasets showed that BiMeta outperforms three state-of-the-art binning algorithms for both short and long reads (≥700 b p) datasets.

Conclusions

This paper developed a novel and efficient algorithm for binning of metagenomic reads, which does not require any reference database. The software implementing the algorithm and all test datasets mentioned in this paper can be downloaded at http://it.hcmute.edu.vn/bioinfo/bimeta/index.htm.",2015-01-16 +25592675,Biomedical question answering using semantic relations.,"

Background

The proliferation of the scientific literature in the field of biomedicine makes it difficult to keep abreast of current knowledge, even for domain experts. While general Web search engines and specialized information retrieval (IR) systems have made important strides in recent decades, the problem of accurate knowledge extraction from the biomedical literature is far from solved. Classical IR systems usually return a list of documents that have to be read by the user to extract relevant information. This tedious and time-consuming work can be lessened with automatic Question Answering (QA) systems, which aim to provide users with direct and precise answers to their questions. In this work we propose a novel methodology for QA based on semantic relations extracted from the biomedical literature.

Results

We extracted semantic relations with the SemRep natural language processing system from 122,421,765 sentences, which came from 21,014,382 MEDLINE citations (i.e., the complete MEDLINE distribution up to the end of 2012). A total of 58,879,300 semantic relation instances were extracted and organized in a relational database. The QA process is implemented as a search in this database, which is accessed through a Web-based application, called SemBT (available at http://sembt.mf.uni-lj.si ). We conducted an extensive evaluation of the proposed methodology in order to estimate the accuracy of extracting a particular semantic relation from a particular sentence. Evaluation was performed by 80 domain experts. In total 7,510 semantic relation instances belonging to 2,675 distinct relations were evaluated 12,083 times. The instances were evaluated as correct 8,228 times (68%).

Conclusions

In this work we propose an innovative methodology for biomedical QA. The system is implemented as a Web-based application that is able to provide precise answers to a wide range of questions. A typical question is answered within a few seconds. The tool has some extensions that make it especially useful for interpretation of DNA microarray results.",2015-01-16 +22983720,Are your asset data as good as you think? Conducting a comprehensive census of built assets to improve urban population health.,"Secondary data sources are widely used to measure the built asset environment, although their validity for this purpose is not well-established. Using community-engaged research methodology, this study conducted a census of public-facing, built assets via direct observation and then tested the performance of these data against widely used secondary datasets. After engaging community organizations, a community education campaign was implemented. Using web-enabled cell phones and a web-based application prepopulated with the secondary data, census workers verified, modified, and/or added assets using street-level observation, supplementing data with web searches and telephone calls. Data were uploaded to http://www.SouthSideHealth.org . Using direct observation as the criterion standard, the sensitivity of secondary datasets was calculated. Of 5,773 assets on the prepopulated list, direct observation of public-facing assets verified 1,612 as operating; another 653 operating assets were newly identified. Sensitivity of the commercial list for nonresidential, operating assets was 61 %. Using the asset census as the criterion standard, secondary datasets were incomplete and inaccurate. Comprehensive, accurate built asset data are needed to advance urban health research, inform policy, and improve individuals' access to assets.",2013-08-01 +27516747,Smartphone-Based Psychotherapeutic Micro-Interventions to Improve Mood in a Real-World Setting.,"

Background

Using mobile communication technology as new personalized approach to treat mental disorders or to more generally improve quality of life is highly promising. Knowledge about intervention components that target key psychopathological processes in terms of transdiagnostic psychotherapy approaches is urgently needed. We explored the use of smartphone-based micro-interventions based on psychotherapeutic techniques, guided by short video-clips, to elicit mood changes.

Method

As part of a larger neurofeedback study, all subjects-after being randomly assigned to an experimental or control neurofeedback condition-underwent daily smartphone-based micro-interventions for 13 consecutive days. They were free to choose out of provided techniques, including viscerosensory attention, emotional imagery, facial expression, and contemplative repetition. Changes in mood were assessed in real world using the Multidimensional Mood State Questionnaire (scales: good-bad, GB; awake-tired, AT; and calm-nervous, CN).

Results

Twenty-seven men participated on at least 11 days and were thus included in the analyses. Altogether, they underwent 335, generally well-tolerated, micro-intervention sessions, with viscerosensory attention (178 sessions, 53.13%) and contemplative repetition (68 sessions, 20.30%) being the most frequently applied techniques. Mixed models indicated that subjects showed better mood [GB: b = 0.464, 95%confidence interval (CI) [0.068, 0.860], t (613.3) = 2.298, p = 0.022] and became more awake [AT: b = 0.514, 95%CI [0.103, 0.925], t (612.4) = 2.456, p = 0.014] and calmer [CN: b = 0.685, 95%CI [0.360, 1.010], t (612.3) = 4.137, p < 0.001] from pre- to post-micro-intervention. These mood improvements from pre- to post-micro-intervention were associated with changes in mood from the 1st day until the last day with regard to GB mood (r = 0.614, 95%CI [0.297, 0.809], p < 0.001), but not AT mood (r = 0.279, 95%CI [-0.122, 0.602], p = 0.167) and CN mood (r = 0.277, 95%CI [0.124, 0.601], p = 0.170).

Discussion

Our findings provide evidence for the applicability of smartphone-based micro-interventions eliciting short-term mood changes, based on techniques used in psychotherapeutic approaches, such as mindfulness-based psychotherapy, transcendental meditation, and other contemplative therapies. The results encourage exploring these techniques' capability to improve mood in randomized controlled studies and patients. Smartphone-based micro-interventions are promising to modify mood in real-world settings, complementing other psychotherapeutic interventions, in line with the precision medicine approach. The here presented data were collected within a randomized trial, registered at ClinicalTrials.gov (Identifier: NCT01921088) https://clinicaltrials.gov/ct2/show/NCT01921088.",2016-07-28 +25452682,"In silico prediction of synthetic lethality by meta-analysis of genetic interactions, functions, and pathways in yeast and human cancer.","A major goal in cancer medicine is to find selective drugs with reduced side effect. A pair of genes is called synthetic lethality (SL) if mutations of both genes will kill a cell while mutation of either gene alone will not. Hence, a gene in SL interactions with a cancer-specific mutated gene will be a promising drug target with anti-cancer selectivity. Wet-lab screening approach is still so costly that even for yeast only a small fraction of gene pairs has been covered. Computational methods are therefore important for large-scale discovery of SL interactions. Most existing approaches focus on individual features or machine-learning methods, which are prone to noise or overfitting. In this paper, we propose an approach named MetaSL for predicting yeast SL, which integrates 17 genomic and proteomic features and the outputs of 10 classification methods. MetaSL thus combines the strengths of existing methods and achieves the highest area under the Receiver Operating Characteristics (ROC) curve (AUC) of 87.1% among all competitors on yeast data. Moreover, through orthologous mapping from yeast to human genes, we then predicted several lists of candidate SL pairs in human cancer. Our method and predictions would thus shed light on mechanisms of SL and lead to discovery of novel anti-cancer drugs. In addition, all the experimental results can be downloaded from http://www.ntu.edu.sg/home/zhengjie/data/MetaSL.",2014-11-05 +26742147,DISC: Deep Image Saliency Computing via Progressive Representation Learning.,"Salient object detection increasingly receives attention as an important component or step in several pattern recognition and image processing tasks. Although a variety of powerful saliency models have been intensively proposed, they usually involve heavy feature (or model) engineering based on priors (or assumptions) about the properties of objects and backgrounds. Inspired by the effectiveness of recently developed feature learning, we provide a novel deep image saliency computing (DISC) framework for fine-grained image saliency computing. In particular, we model the image saliency from both the coarse-and fine-level observations, and utilize the deep convolutional neural network (CNN) to learn the saliency representation in a progressive manner. In particular, our saliency model is built upon two stacked CNNs. The first CNN generates a coarse-level saliency map by taking the overall image as the input, roughly identifying saliency regions in the global context. Furthermore, we integrate superpixel-based local context information in the first CNN to refine the coarse-level saliency map. Guided by the coarse saliency map, the second CNN focuses on the local context to produce fine-grained and accurate saliency map while preserving object details. For a testing image, the two CNNs collaboratively conduct the saliency computing in one shot. Our DISC framework is capable of uniformly highlighting the objects of interest from complex background while preserving well object details. Extensive experiments on several standard benchmarks suggest that DISC outperforms other state-of-the-art methods and it also generalizes well across data sets without additional training. The executable version of DISC is available online: http://vision.sysu.edu.cn/projects/DISC.",2016-01-05 +23281827,Helminth secretome database (HSD): a collection of helminth excretory/secretory proteins predicted from expressed sequence tags (ESTs).,"

Background

Helminths are important socio-economic organisms, responsible for causing major parasitic infections in humans, other animals and plants. These infections impose a significant public health and economic burden globally. Exceptionally, some helminth organisms like Caenorhabditis elegans are free-living in nature and serve as model organisms for studying parasitic infections. Excretory/secretory proteins play an important role in parasitic helminth infections which make these proteins attractive targets for therapeutic use. In the case of helminths, large volume of expressed sequence tags (ESTs) has been generated to understand parasitism at molecular level and for predicting excretory/secretory proteins for developing novel strategies to tackle parasitic infections. However, mostly predicted ES proteins are not available for further analysis and there is no repository available for such predicted ES proteins. Furthermore, predictions have, in the main, focussed on classical secretory pathways while it is well established that helminth parasites also utilise non-classical secretory pathways.

Results

We developed a free Helminth Secretome Database (HSD), which serves as a repository for ES proteins predicted using classical and non-classical secretory pathways, from EST data for 78 helminth species (64 nematodes, 7 trematodes and 7 cestodes) ranging from parasitic to free-living organisms. Approximately 0.9 million ESTs compiled from the largest EST database, dbEST were cleaned, assembled and analysed by different computational tools in our bioinformatics pipeline and predicted ES proteins were submitted to HSD.

Conclusion

We report the large-scale prediction and analysis of classically and non-classically secreted ES proteins from diverse helminth organisms. All the Unigenes (contigs and singletons) and excretory/secretory protein datasets generated from this analysis are freely available. A BLAST server is available at http://estexplorer.biolinfo.org/hsd, for checking the sequence similarity of new protein sequences against predicted helminth ES proteins.",2012-12-13 +21486466,OryzaPG-DB: rice proteome database based on shotgun proteogenomics.,"

Background

Proteogenomics aims to utilize experimental proteome information for refinement of genome annotation. Since mass spectrometry-based shotgun proteomics approaches provide large-scale peptide sequencing data with high throughput, a data repository for shotgun proteogenomics would represent a valuable source of gene expression evidence at the translational level for genome re-annotation.

Description

Here, we present OryzaPG-DB, a rice proteome database based on shotgun proteogenomics, which incorporates the genomic features of experimental shotgun proteomics data. This version of the database was created from the results of 27 nanoLC-MS/MS runs on a hybrid ion trap-orbitrap mass spectrometer, which offers high accuracy for analyzing tryptic digests from undifferentiated cultured rice cells. Peptides were identified by searching the product ion spectra against the protein, cDNA, transcript and genome databases from Michigan State University, and were mapped to the rice genome. Approximately 3200 genes were covered by these peptides and 40 of them contained novel genomic features. Users can search, download or navigate the database per chromosome, gene, protein, cDNA or transcript and download the updated annotations in standard GFF3 format, with visualization in PNG format. In addition, the database scheme of OryzaPG was designed to be generic and can be reused to host similar proteogenomic information for other species. OryzaPG is the first proteogenomics-based database of the rice proteome, providing peptide-based expression profiles, together with the corresponding genomic origin, including the annotation of novelty for each peptide.

Conclusions

The OryzaPG database was constructed and is freely available at http://oryzapg.iab.keio.ac.jp/.",2011-04-12 +22843985,TMBB-DB: a transmembrane β-barrel proteome database.,"

Motivation

We previously reported the development of a highly accurate statistical algorithm for identifying β-barrel outer membrane proteins or transmembrane β-barrels (TMBBs), from genomic sequence data of Gram-negative bacteria (Freeman,T.C. and Wimley,W.C. (2010) Bioinformatics, 26, 1965-1974). We have now applied this identification algorithm to all available Gram-negative bacterial genomes (over 600 chromosomes) and have constructed a publicly available, searchable, up-to-date, database of all proteins in these genomes.

Results

For each protein in the database, there is information on (i) β-barrel membrane protein probability for identification of β-barrels, (ii) β-strand and β-hairpin propensity for structure and topology prediction, (iii) signal sequence score because most TMBBs are secreted through the inner membrane translocon and, thus, have a signal sequence, and (iv) transmembrane α-helix predictions, for reducing false positive predictions. This information is sufficient for the accurate identification of most β-barrel membrane proteins in these genomes. In the database there are nearly 50 000 predicted TMBBs (out of 1.9 million total putative proteins). Of those, more than 15 000 are 'hypothetical' or 'putative' proteins, not previously identified as TMBBs. This wealth of genomic information is not available anywhere else.

Availability

The TMBB genomic database is available at http://beta-barrel.tulane.edu/.

Contact

wwimley@tulane.edu.",2012-07-27 +23766287,PromoterCAD: Data-driven design of plant regulatory DNA.,"Synthetic promoters can control the timing, location and amount of gene expression for any organism. PromoterCAD is a web application for designing synthetic promoters with altered transcriptional regulation. We use a data-first approach, using published high-throughput expression and motif data from for Arabidopsis thaliana to guide DNA design. We demonstrate data mining tools for finding motifs related to circadian oscillations and tissue-specific expression patterns. PromoterCAD is built on the LinkData open platform for data publication and rapid web application development, allowing new data to be easily added, and the source code modified to add new functionality. PromoterCAD URL: http://promotercad.org. LinkData URL: http://linkdata.org.",2013-06-12 +22121216,"OriDB, the DNA replication origin database updated and extended.","OriDB (http://www.oridb.org/) is a database containing collated genome-wide mapping studies of confirmed and predicted replication origin sites. The original database collated and curated Saccharomyces cerevisiae origin mapping studies. Here, we report that the OriDB database and web site have been revamped to improve user accessibility to curated data sets, to greatly increase the number of curated origin mapping studies, and to include the collation of replication origin sites in the fission yeast Schizosaccharomyces pombe. The revised database structure underlies these improvements and will facilitate further expansion in the future. The updated OriDB for S. cerevisiae is available at http://cerevisiae.oridb.org/ and for S. pombe at http://pombe.oridb.org/.",2011-11-24 +24135263,Discretized Gaussian mixture for genotyping of microsatellite loci containing homopolymer runs.,"

Motivation

Inferring lengths of inherited microsatellite alleles with single base pair resolution from short sequence reads is challenging due to several sources of noise caused by the repetitive nature of microsatellites and the technologies used to generate raw sequence data.

Results

We have developed a program, GenoTan, using a discretized Gaussian mixture model combined with a rules-based approach to identify inherited variation of microsatellite loci from short sequence reads without paired-end information. It effectively distinguishes length variants from noise including insertion/deletion errors in homopolymer runs by addressing the bidirectional aspect of insertion and deletion errors in sequence reads. Here we first introduce a homopolymer decomposition method which estimates error bias toward insertion or deletion in homopolymer sequence runs. Combining these approaches, GenoTan was able to genotype 94.9% of microsatellite loci accurately from simulated data with 40x sequence coverage quickly while the other programs showed <90% correct calls for the same data and required 5∼30× more computational time than GenoTan. It also showed the highest true-positive rate for real data using mixed sequence data of two Drosophila inbred lines, which was a novel validation approach for genotyping.

Availability

GenoTan is open-source software available at http://genotan.sourceforge.net.",2013-10-17 +27140204,Changes in Dyspnea Status During Hospitalization and Postdischarge Health-Related Quality of Life in Patients Hospitalized for Heart Failure: Findings From the EVEREST Trial. ,"Dyspnea is the most common symptom among hospitalized patients with heart failure and represents a therapeutic target. However, the association between short-term dyspnea relief and postdischarge clinical outcomes and health-related quality of life (HRQOL) remains uncertain. A post hoc analysis was performed of the Efficacy of Vasopressin Antagonism in Heart Failure: Outcome Study with Tolvaptan (EVEREST) trial, which enrolled 4133 patients within 48 hours of admission for heart failure with an ejection fraction ≤40%. Physician-assessed dyspnea was recorded on a daily basis from baseline until discharge or day 7 as none, seldom, frequent, or continuous. Patient-reported dyspnea was measured using a 7-point Likert scale, and patients experiencing moderate or marked dyspnea improvement on day 1 were classified as early responders. The Kansas City Cardiomyopathy Questionnaire summary score, which ranges from 0 to 100, was collected postdischarge at week 1. The primary outcome was unfavorable HRQOL, defined a priori as a Kansas City Cardiomyopathy Questionnaire score <45. Secondary outcomes included 30-day all-cause mortality, and all-cause and cause-specific hospitalizations. The final analytic cohort included 1567 patients discharged alive with complete HRQOL data. Patients were 66.0±12.7 years old and had a mean ejection fraction of 25±8%. Physician-assessed dyspnea was rated as frequent or continuous in 1399 patients (90%) at baseline, which decreased to 250 patients (16%) by discharge, whereas patient-reported early dyspnea relief was reported by 610 patients (40%). The median Kansas City Cardiomyopathy Questionnaire score at week 1 was 50 (35, 65). All-cause mortality was 3.0%, and all-cause hospitalization was 20.5% within 30 days of discharge. Physician-assessed and patient-reported dyspnea was not independently associated with HRQOL, all-cause mortality, or all-cause or cause-specific hospitalization. In-hospital physician-assessed, and patient-reported dyspnea was not independently associated with postdischarge HRQOL, survival, or readmissions. Although dyspnea relief remains a goal of therapy for hospitalized patients with heart failure with reduced ejection fraction, this measure may not be a reliable surrogate for long-term patient-centered or hard clinical outcomes. URL: http://www.clinicaltrials.gov. Unique identifier: NCT00071331.",2016-05-01 +25189780,Estimation of GFP-tagged RNA numbers from temporal fluorescence intensity data.,"

Motivation

MS2-GFP-tagging of RNA is currently the only method to measure intervals between consecutive transcription events in live cells. For this, new transcripts must be accurately detected from intensity time traces.

Results

We present a novel method for automatically estimating RNA numbers and production intervals from temporal data of cell fluorescence intensities that reduces uncertainty by exploiting temporal information. We also derive a robust variant, more resistant to outliers caused e.g. by RNAs moving out of focus. Using Monte Carlo simulations, we show that the quantification of RNA numbers and production intervals is generally improved compared with previous methods. Finally, we analyze data from live Escherichia coli and show statistically significant differences to previous methods. The new methods can be used to quantify numbers and production intervals of any fluorescent probes, which are present in low copy numbers, are brighter than the cell background and degrade slowly.

Availability

Source code is available under Mozilla Public License at http://www.cs.tut.fi/%7ehakkin22/jumpdet/.",2014-09-03 +25801552,Pharmacokinetics of a novel sublingual spray formulation of the antimalarial drug artemether in African children with malaria.,"The pharmacokinetics of sublingual artemether (ArTiMist) was investigated in 91 young African children with severe malaria or who could not tolerate oral antimalarial therapy. Each received 3.0 mg/kg of body weight of artemether at 0, 8, 24, 36, 48, and 60 h or until the initiation of oral treatment. Few blood samples were drawn postdose. Plasma artemether and dihydroartemisinin (DHA) levels were measured using liquid chromatography-mass spectrometry, and the data were analyzed using established population compartmental pharmacokinetic models. Parasite clearance was prompt (median parasite clearance time, 24 h), and there were no serious adverse events. Consistent with studies in healthy adults (S. Salman, D. Bendel, T. C. Lee, D. Templeton, and T. M. E. Davis, Antimicrob Agents Chemother 59:3197-3207, 2015, http://dx.doi.org/10.1128/AAC.05013-14), the absorption of sublingual artemether was biphasic, and multiple dosing was associated with the autoinduction of the metabolism of artemether to DHA (which itself has potent antimalarial activity). In contrast to studies using healthy volunteers, pharmacokinetic modeling indicated that the first absorption phase did not avoid first-pass metabolism, suggesting that the drug is transferred to the upper intestine through postdose fluid/food intake. Simulations using the present data and those from an earlier study in older Melanesian children with uncomplicated malaria treated with artemether-lumefantrine tablets suggested that the bioavailability of sublingual artemether was at least equivalent to that after conventional oral artemether-lumefantrine (median [interquartile range] areas under the concentration-time curve for artemether, 3,403 [2,471 to 4,771] versus 3,063 [2,358 to 4,514] μg · h/liter, respectively; and for DHA, 2,958 [2,146 to 4,278] versus 2,839 [1,812 to 3,488] μg · h/liter, respectively; P ≥ 0.42). These findings suggest that sublingual artemether could be used as prereferral treatment for sick children before transfer for definitive management of severe or moderately severe malaria.",2015-03-23 +23975765,GIM3E: condition-specific models of cellular metabolism developed from metabolomics and expression data.,"

Motivation

Genome-scale metabolic models have been used extensively to investigate alterations in cellular metabolism. The accuracy of these models to represent cellular metabolism in specific conditions has been improved by constraining the model with omics data sources. However, few practical methods for integrating metabolomics data with other omics data sources into genome-scale models of metabolism have been developed.

Results

GIM(3)E (Gene Inactivation Moderated by Metabolism, Metabolomics and Expression) is an algorithm that enables the development of condition-specific models based on an objective function, transcriptomics and cellular metabolomics data. GIM(3)E establishes metabolite use requirements with metabolomics data, uses model-paired transcriptomics data to find experimentally supported solutions and provides calculations of the turnover (production/consumption) flux of metabolites. GIM(3)E was used to investigate the effects of integrating additional omics datasets to create increasingly constrained solution spaces of Salmonella Typhimurium metabolism during growth in both rich and virulence media. This integration proved to be informative and resulted in a requirement of additional active reactions (12 in each case) or metabolites (26 or 29, respectively). The addition of constraints from transcriptomics also impacted the allowed solution space, and the cellular metabolites with turnover fluxes that were necessarily altered by the change in conditions increased from 118 to 271 of 1397.

Availability

GIM(3)E has been implemented in Python and requires a COBRApy 0.2.x. The algorithm and sample data described here are freely available at: http://opencobra.sourceforge.net/

Contacts

brianjamesschmidt@gmail.com",2013-08-23 +24359289,Mars for Earthlings: an analog approach to Mars in undergraduate education.,"Mars for Earthlings (MFE) is a terrestrial Earth analog pedagogical approach to teaching undergraduate geology, planetary science, and astrobiology. MFE utilizes Earth analogs to teach Mars planetary concepts, with a foundational backbone in Earth science principles. The field of planetary science is rapidly changing with new technologies and higher-resolution data sets. Thus, it is increasingly important to understand geological concepts and processes for interpreting Mars data. MFE curriculum is topically driven to facilitate easy integration of content into new or existing courses. The Earth-Mars systems approach explores planetary origins, Mars missions, rocks and minerals, active driving forces/tectonics, surface sculpting processes, astrobiology, future explorations, and hot topics in an inquiry-driven environment. Curriculum leverages heavily upon multimedia resources, software programs such as Google Mars and JMARS, as well as NASA mission data such as THEMIS, HiRISE, CRISM, and rover images. Two years of MFE class evaluation data suggest that science literacy and general interest in Mars geology and astrobiology topics increased after participation in the MFE curriculum. Students also used newly developed skills to create a Mars mission team presentation. The MFE curriculum, learning modules, and resources are available online at http://serc.carleton.edu/marsforearthlings/index.html.",2013-12-21 +25940629,NFFinder: an online bioinformatics tool for searching similar transcriptomics experiments in the context of drug repositioning.,"Drug repositioning, using known drugs for treating conditions different from those the drug was originally designed to treat, is an important drug discovery tool that allows for a faster and cheaper development process by using drugs that are already approved or in an advanced trial stage for another purpose. This is especially relevant for orphan diseases because they affect too few people to make drug research de novo economically viable. In this paper we present NFFinder, a bioinformatics tool for identifying potential useful drugs in the context of orphan diseases. NFFinder uses transcriptomic data to find relationships between drugs, diseases and a phenotype of interest, as well as identifying experts having published on that domain. The application shows in a dashboard a series of graphics and tables designed to help researchers formulate repositioning hypotheses and identify potential biological relationships between drugs and diseases. NFFinder is freely available at http://nffinder.cnb.csic.es.",2015-05-04 +25161234,Stronger findings for metabolomics through Bayesian modeling of multiple peaks and compound correlations.,"

Motivation

Data analysis for metabolomics suffers from uncertainty because of the noisy measurement technology and the small sample size of experiments. Noise and the small sample size lead to a high probability of false findings. Further, individual compounds have natural variation between samples, which in many cases renders them unreliable as biomarkers. However, the levels of similar compounds are typically highly correlated, which is a phenomenon that we model in this work.

Results

We propose a hierarchical Bayesian model for inferring differences between groups of samples more accurately in metabolomic studies, where the observed compounds are collinear. We discover that the method decreases the error of weak and non-existent covariate effects, and thereby reduces false-positive findings. To achieve this, the method makes use of the mass spectral peak data by clustering similar peaks into latent compounds, and by further clustering latent compounds into groups that respond in a coherent way to the experimental covariates. We demonstrate the method with three simulated studies and validate it with a metabolomic benchmark dataset.

Availability and implementation

An implementation in R is available at http://research.ics.aalto.fi/mi/software/peakANOVA/.",2014-09-01 +27591931,"EAU-ESTRO-SIOG Guidelines on Prostate Cancer. Part II: Treatment of Relapsing, Metastatic, and Castration-Resistant Prostate Cancer.","

Objective

To present a summary of the 2016 version of the European Association of Urology (EAU) - European Society for Radiotherapy & Oncology (ESTRO) - International Society of Geriatric Oncology (SIOG) Guidelines on the treatment of relapsing, metastatic, and castration-resistant prostate cancer (CRPC).

Evidence acquisition

The working panel performed a literature review of the new data (2013-2015). The guidelines were updated, and the levels of evidence and/or grades of recommendation were added based on a systematic review of the literature.

Evidence synthesis

Relapse after local therapy is defined by a rising prostate-specific antigen (PSA) level >0.2ng/ml following radical prostatectomy (RP) and >2ng/ml above the nadir after radiation therapy (RT). 11C-choline positron emission tomography/computed tomography is of limited importance if PSA is <1.0ng/ml; bone scans and computed tomography can be omitted unless PSA is >10ng/ml. Multiparametric magnetic resonance imaging and biopsy are important to assess biochemical failure following RT. Therapy for PSA relapse after RP includes salvage RT at PSA levels <0.5ng/ml and salvage RP, high-intensity focused ultrasound, cryosurgical ablation or salvage brachytherapy of the prostate in radiation failures. Androgen deprivation therapy (ADT) remains the basis for treatment of men with metastatic prostate cancer (PCa). However, docetaxel combined with ADT should be considered the standard of care for men with metastases at first presentation, provided they are fit enough to receive the drug. Follow-up of ADT should include analysis of PSA, testosterone levels, and screening for cardiovascular disease and metabolic syndrome. Level 1 evidence for the treatment of metastatic CRPC (mCRPC) includes, abiraterone acetate plus prednisone (AA/P), enzalutamide, radium 223 (Ra 223), docetaxel at 75 mg/m2 every 3 wk and sipuleucel-T. Cabazitaxel, AA/P, enzalutamide, and radium are approved for second-line treatment of CRPC following docetaxel. Zoledronic acid and denosumab can be used in men with mCRPC and osseous metastases to prevent skeletal-related complications.

Conclusions

The knowledge in the field of advanced and metastatic PCa and CRPC is changing rapidly. The 2016 EAU-ESTRO-SIOG Guidelines on PCa summarise the most recent findings and advice for use in clinical practice. These PCa guidelines are the first endorsed by the European Society for Therapeutic Radiology and Oncology and the International Society of Geriatric Oncology and reflect the multidisciplinary nature of PCa management. A full version is available from the EAU office or online (http://uroweb.org/guideline/prostate-cancer/).

Patient summary

In men with a rise in their PSA levels after prior local treatment for prostate cancer only, it is important to balance overtreatment against further progression of the disease since survival and quality of life may never be affected in many of these patients. For patients diagnosed with metastatic castrate-resistant prostate cancer, several new drugs have become available which may provide a clear survival benefit but the optimal choice will have to be made on an individual basis.",2016-08-31 +26432355,A context-aware approach for progression tracking of medical concepts in electronic medical records.,"Electronic medical records (EMRs) for diabetic patients contain information about heart disease risk factors such as high blood pressure, cholesterol levels, and smoking status. Discovering the described risk factors and tracking their progression over time may support medical personnel in making clinical decisions, as well as facilitate data modeling and biomedical research. Such highly patient-specific knowledge is essential to driving the advancement of evidence-based practice, and can also help improve personalized medicine and care. One general approach for tracking the progression of diseases and their risk factors described in EMRs is to first recognize all temporal expressions, and then assign each of them to the nearest target medical concept. However, this method may not always provide the correct associations. In light of this, this work introduces a context-aware approach to assign the time attributes of the recognized risk factors by reconstructing contexts that contain more reliable temporal expressions. The evaluation results on the i2b2 test set demonstrate the efficacy of the proposed approach, which achieved an F-score of 0.897. To boost the approach's ability to process unstructured clinical text and to allow for the reproduction of the demonstrated results, a set of developed .NET libraries used to develop the system is available at https://sites.google.com/site/hongjiedai/projects/nttmuclinicalnet.",2015-09-30 +25249626,"Insyght: navigating amongst abundant homologues, syntenies and gene functional annotations in bacteria, it's that symbol! ","High-throughput techniques have considerably increased the potential of comparative genomics whilst simultaneously posing many new challenges. One of those challenges involves efficiently mining the large amount of data produced and exploring the landscape of both conserved and idiosyncratic genomic regions across multiple genomes. Domains of application of these analyses are diverse: identification of evolutionary events, inference of gene functions, detection of niche-specific genes or phylogenetic profiling. Insyght is a comparative genomic visualization tool that combines three complementary displays: (i) a table for thoroughly browsing amongst homologues, (ii) a comparator of orthologue functional annotations and (iii) a genomic organization view designed to improve the legibility of rearrangements and distinctive loci. The latter display combines symbolic and proportional graphical paradigms. Synchronized navigation across multiple species and interoperability between the views are core features of Insyght. A gene filter mechanism is provided that helps the user to build a biologically relevant gene set according to multiple criteria such as presence/absence of homologues and/or various annotations. We illustrate the use of Insyght with scenarios. Currently, only Bacteria and Archaea are supported. A public instance is available at http://genome.jouy.inra.fr/Insyght. The tool is freely downloadable for private data set analysis.",2014-09-23 +25488297,Bacteria in solitary confinement.,"Even in clonal bacterial cultures, individual bacteria can show substantial stochastic variation, leading to pitfalls in the interpretation of data derived from millions of cells in a culture. In this issue of the Journal of Bacteriology, as part of their study on osmoadaptation in a cyanobacterium, Nanatani et al. describe employing an ingenious microfluidic device that gently cages individual cells (J Bacteriol 197:676-687, 2015, http://dx.doi.org/10.1128/JB.02276-14). The device is a welcome addition to the toolkit available to probe the responses of individual cells to environmental cues.",2014-12-08 +21276275,MEMOSys: Bioinformatics platform for genome-scale metabolic models.,"

Background

Recent advances in genomic sequencing have enabled the use of genome sequencing in standard biological and biotechnological research projects. The challenge is how to integrate the large amount of data in order to gain novel biological insights. One way to leverage sequence data is to use genome-scale metabolic models. We have therefore designed and implemented a bioinformatics platform which supports the development of such metabolic models.

Results

MEMOSys (MEtabolic MOdel research and development System) is a versatile platform for the management, storage, and development of genome-scale metabolic models. It supports the development of new models by providing a built-in version control system which offers access to the complete developmental history. Moreover, the integrated web board, the authorization system, and the definition of user roles allow collaborations across departments and institutions. Research on existing models is facilitated by a search system, references to external databases, and a feature-rich comparison mechanism. MEMOSys provides customizable data exchange mechanisms using the SBML format to enable analysis in external tools. The web application is based on the Java EE framework and offers an intuitive user interface. It currently contains six annotated microbial metabolic models.

Conclusions

We have developed a web-based system designed to provide researchers a novel application facilitating the management and development of metabolic models. The system is freely available at http://www.icbi.at/MEMOSys.",2011-01-31 +24354303,Focus: a robust workflow for one-dimensional NMR spectral analysis.,"One-dimensional (1)H NMR represents one of the most commonly used analytical techniques in metabolomic studies. The increase in the number of samples analyzed as well as the technical improvements involving instrumentation and spectral acquisition demand increasingly accurate and efficient high-throughput data processing workflows. We present FOCUS, an integrated and innovative methodology that provides a complete data analysis workflow for one-dimensional NMR-based metabolomics. This tool will allow users to easily obtain a NMR peak feature matrix ready for chemometric analysis as well as metabolite identification scores for each peak that greatly simplify the biological interpretation of the results. The algorithm development has been focused on solving the critical difficulties that appear at each data processing step and that can dramatically affect the quality of the results. As well as method integration, simplicity has been one of the main objectives in FOCUS development, requiring very little user input to perform accurate peak alignment, peak picking, and metabolite identification. The new spectral alignment algorithm, RUNAS, allows peak alignment with no need of a reference spectrum, and therefore, it reduces the bias introduced by other alignment approaches. Spectral alignment has been tested against previous methodologies obtaining substantial improvements in the case of moderate or highly unaligned spectra. Metabolite identification has also been significantly improved, using the positional and correlation peak patterns in contrast to a reference metabolite panel. Furthermore, the complete workflow has been tested using NMR data sets from 60 human urine samples and 120 aqueous liver extracts, reaching a successful identification of 42 metabolites from the two data sets. The open-source software implementation of this methodology is available at http://www.urr.cat/FOCUS.",2013-12-31 +22962482,An approach to describing and analysing bulk biological annotation quality: a case study using UniProtKB.,"

Motivation

Annotations are a key feature of many biological databases, used to convey our knowledge of a sequence to the reader. Ideally, annotations are curated manually, however manual curation is costly, time consuming and requires expert knowledge and training. Given these issues and the exponential increase of data, many databases implement automated annotation pipelines in an attempt to avoid un-annotated entries. Both manual and automated annotations vary in quality between databases and annotators, making assessment of annotation reliability problematic for users. The community lacks a generic measure for determining annotation quality and correctness, which we look at addressing within this article. Specifically we investigate word reuse within bulk textual annotations and relate this to Zipf's Principle of Least Effort. We use the UniProt Knowledgebase (UniProtKB) as a case study to demonstrate this approach since it allows us to compare annotation change, both over time and between automated and manually curated annotations.

Results

By applying power-law distributions to word reuse in annotation, we show clear trends in UniProtKB over time, which are consistent with existing studies of quality on free text English. Further, we show a clear distinction between manual and automated analysis and investigate cohorts of protein records as they mature. These results suggest that this approach holds distinct promise as a mechanism for judging annotation quality.

Availability

Source code is available at the authors website: http://homepages.cs.ncl.ac.uk/m.j.bell1/annotation.

Contact

phillip.lord@newcastle.ac.uk.",2012-09-01 +26415724,Drug-set enrichment analysis: a novel tool to investigate drug mode of action.,"

Motivation

Automated screening approaches are able to rapidly identify a set of small molecules inducing a desired phenotype from large small-molecule libraries. However, the resulting set of candidate molecules is usually very diverse pharmacologically, thus little insight on the shared mechanism of action (MoA) underlying their efficacy can be gained.

Results

We introduce a computational method (Drug-Set Enrichment Analysis-DSEA) based on drug-induced gene expression profiles, which is able to identify the molecular pathways that are targeted by most of the drugs in the set. By diluting drug-specific effects unrelated to the phenotype of interest, DSEA is able to highlight phenotype-specific pathways, thus helping to formulate hypotheses on the MoA shared by the drugs in the set. We validated the method by analysing five different drug-sets related to well-known pharmacological classes. We then applied DSEA to identify the MoA shared by drugs known to be partially effective in rescuing mutant cystic fibrosis transmembrane conductance regulator (CFTR) gene function in Cystic Fibrosis.

Availability and implementation

The method is implemented as an online web tool publicly available at http://dsea.tigem.it.

Contact

dibernardo@tigem.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-28 +25513722,iDrug-Target: predicting the interactions between drug compounds and target proteins in cellular networking via benchmark dataset optimization approach.,"Information about the interactions of drug compounds with proteins in cellular networking is very important for drug development. Unfortunately, all the existing predictors for identifying drug-protein interactions were trained by a skewed benchmark data-set where the number of non-interactive drug-protein pairs is overwhelmingly larger than that of the interactive ones. Using this kind of highly unbalanced benchmark data-set to train predictors would lead to the outcome that many interactive drug-protein pairs might be mispredicted as non-interactive. Since the minority interactive pairs often contain the most important information for drug design, it is necessary to minimize this kind of misprediction. In this study, we adopted the neighborhood cleaning rule and synthetic minority over-sampling technique to treat the skewed benchmark datasets and balance the positive and negative subsets. The new benchmark datasets thus obtained are called the optimized benchmark datasets, based on which a new predictor called iDrug-Target was developed that contains four sub-predictors: iDrug-GPCR, iDrug-Chl, iDrug-Ezy, and iDrug-NR, specialized for identifying the interactions of drug compounds with GPCRs (G-protein-coupled receptors), ion channels, enzymes, and NR (nuclear receptors), respectively. Rigorous cross-validations on a set of experiment-confirmed datasets have indicated that these new predictors remarkably outperformed the existing ones for the same purpose. To maximize users' convenience, a public accessible Web server for iDrug-Target has been established at http://www.jci-bioinfo.cn/iDrug-Target/ , by which users can easily get their desired results. It has not escaped our notice that the aforementioned strategy can be widely used in many other areas as well.",2015-01-14 +21326365,In silico identification and characterization of microRNAs and their putative target genes in Solanaceae plants.,"MicroRNAs (miRNAs) are a class of small, single-stranded, noncoding RNAs ranging from 19 to 25 nucleotides. The miRNA control various cellular functions by negatively regulating gene expression at the post-transcriptional level. The miRNA regulation over their target genes has a central role in regulating plant growth and development; however, only a few reports have been published on the function of miRNAs in the family Solanaceae. We identified Solanaceae miRNAs and their target genes by analyzing expressed sequence tag (EST) data from five different Solanaceae species. A comprehensive bioinformatic analysis of EST data of Solanaceae species revealed the presence of at least 11 miRNAs and 54 target genes in pepper (Capsicum annuum L.), 22 miRNAs and 221 target genes in potato (Solanum tuberosum L.), 12 miRNAs and 417 target genes in tomato (Solanum lycopersicum L.), 46 miRNAs and 60 target genes in tobacco (Nicotiana tabacum L.), and 7 miRNAs and 28 target genes in Nicotiana benthamiana. The identified Solanaceae miRNAs and their target genes were deposited in the SolmiRNA database, which is freely available for academic research only at http://genepool.kribb.re.kr/SolmiRNA. Our data indicate that the Solanaceae family has both conserved and specific miRNAs and that their target genes may play important roles in growth and development of Solanaceae plants.",2011-02-01 +23144783,Ranking transitive chemical-disease inferences using local network topology in the comparative toxicogenomics database.,"Exposure to chemicals in the environment is believed to play a critical role in the etiology of many human diseases. To enhance understanding about environmental effects on human health, the Comparative Toxicogenomics Database (CTD; http://ctdbase.org) provides unique curated data that enable development of novel hypotheses about the relationships between chemicals and diseases. CTD biocurators read the literature and curate direct relationships between chemicals-genes, genes-diseases, and chemicals-diseases. These direct relationships are then computationally integrated to create additional inferred relationships; for example, a direct chemical-gene statement can be combined with a direct gene-disease statement to generate a chemical-disease inference (inferred via the shared gene). In CTD, the number of inferences has increased exponentially as the number of direct chemical, gene and disease interactions has grown. To help users navigate and prioritize these inferences for hypothesis development, we implemented a statistic to score and rank them based on the topology of the local network consisting of the chemical, disease and each of the genes used to make an inference. In this network, chemicals, diseases and genes are nodes connected by edges representing the curated interactions. Like other biological networks, node connectivity is an important consideration when evaluating the CTD network, as the connectivity of nodes follows the power-law distribution. Topological methods reduce the influence of highly connected nodes that are present in biological networks. We evaluated published methods that used local network topology to determine the reliability of protein-protein interactions derived from high-throughput assays. We developed a new metric that combines and weights two of these methods and uniquely takes into account the number of common neighbors and the connectivity of each entity involved. We present several CTD inferences as case studies to demonstrate the value of this metric and the biological relevance of the inferences.",2012-11-07 +27120296,Intrauterine Inflammation and Maternal Exposure to Ambient PM2.5 during Preconception and Specific Periods of Pregnancy: The Boston Birth Cohort.,"

Background

Prenatal exposure to ambient PM2.5, (i.e., fine particulate matter, aerodynamic diameter ≤ 2.5 μm) has been associated with preterm birth and low birth weight. The association between prenatal PM2.5 exposure and intrauterine inflammation (IUI), an important risk factor for preterm birth and neurodevelopmental outcomes, has not been evaluated.

Objectives

We aimed to investigate the association between maternal exposure to PM2.5 and IUI in the Boston Birth Cohort, a predominantly urban low-income minority population.

Methods

This analysis included 5,059 mother-infant pairs in the Boston Birth Cohort. IUI was assessed based on intrapartum fever and placenta pathology. PM2.5 exposure was assigned using data from the U.S. EPA's Air Quality System. Odds ratios (OR) and 95% confidence intervals (CI) quantified the association of maternal PM2.5 exposure during preconception and various periods of pregnancy with IUI.

Results

Comparing the highest with the lowest PM2.5 exposure quartiles, the multi-adjusted association with IUI was significant for all exposure periods considered, including 3 months before conception (OR = 1.52; 95% CI: 1.22, 1.89), first trimester (OR = 1.93; 95% CI: 1.55, 2.40), second trimester (OR = 1.67; 95% CI: 1.35, 2.08), third trimester (OR = 1.53; 95% CI: 1.24, 1.90), and whole pregnancy (OR = 1.92; 95% CI: 1.55, 2.37).

Conclusions

Despite relatively low exposures, our results suggest a monotonic positive relationship between PM2.5 exposure during preconception and pregnancy and IUI. IUI may be a sensitive biomarker for assessing early biological effect of PM2.5 exposure on the developing fetus.

Citation

Nachman RM, Mao G, Zhang X, Hong X, Chen Z, Soria CS, He H, Wang G, Caruso D, Pearson C, Biswal S, Zuckerman B, Wills-Karp M, Wang X. 2016. Intrauterine inflammation and maternal exposure to ambient PM2.5 during preconception and specific periods of pregnancy: the Boston Birth Cohort. Environ Health Perspect 124:1608-1615; http://dx.doi.org/10.1289/EHP243.",2016-04-27 +26851799,Blood Eosinophils and Outcomes in Severe Hospitalized Exacerbations of COPD.,"

Background

Patients with moderate exacerbations of COPD and the eosinophilic phenotype have better outcomes with prednisolone. Whether this outcome is similar in patients hospitalized with a severe exacerbation of COPD is unclear. We investigated the rate of recovery of eosinophilic and noneosinophilic exacerbations in patients participating in a multicenter randomized controlled trial assessing health outcomes in hospitalized exacerbations.

Methods

Patients were recruited at presentation to the hospital with an exacerbation of COPD. They were stratified into groups according to eosinophilic exacerbations if the peripheral blood eosinophil count on admission was ≥ 200 cells/μL and/or ≥ 2% of the total leukocyte count. Admission details, serum C-reactive protein levels, length of stay, and subsequent rehospitalization data were compared between groups.

Results

A total of 243 patients with COPD (117 men) with a mean age of 71 years (range, 45-93 years) were recruited. The inpatient mortality rate was 3% (median time to death, 12 days; range, 9-16 days). The median absolute eosinophil count was 100 cells/μL (range, 10-1,500 cells/μL), and 25% met our criteria for an eosinophilic exacerbation; in this population, the mean length of stay (in days) was shorter than in patients with noneosinophilic exacerbations (5.0 [range, 1-19] vs 6.5 [range, 1-33]; P = .015) following treatment with oral corticosteroids and independent of treatment prior to admission. Readmission rates at 12 months were similar between groups.

Conclusions

The study patients presenting to the hospital with a severe eosinophilic exacerbation of COPD had a shorter length of stay. The exacerbations were usually not associated with elevated C-reactive protein levels, suggesting that better treatment stratification of exacerbations can be used.

Trial registry

http://www.isrctn.com/ISRCTN05557928.",2016-02-03 +24404838,An integrated approach (CLuster Analysis Integration Method) to combine expression data and protein-protein interaction networks in agrigenomics: application on Arabidopsis thaliana.,"Experimental co-expression data and protein-protein interaction networks are frequently used to analyze the interactions among genes or proteins. Recent studies have investigated methods to integrate these two sources of information. We propose a new method to integrate co-expression data obtained through DNA microarray analysis (MA) and protein-protein interaction (PPI) network data, and apply it to Arabidopsis thaliana. The proposed method identifies small subsets of highly interacting proteins. Based on the analysis of the basis of co-localization and mRNA developmental expression, we show that these groups provide important biological insights; additionally, these subsets are significantly enriched with respect to KEGG Pathways and can be used to predict successfully whether proteins belong to known pathways. Thus, the method is able to provide relevant biological information and support the functional identification of complex genetic traits of economic value in plant agrigenomics research. The method has been implemented in a prototype software tool named CLAIM (CLuster Analysis Integration Method) and can be downloaded from http://bio.cs.put.poznan.pl/research_fields . CLAIM is based on the separate clustering of MA and PPI data; the clusters are merged in a special graph; cliques of this graph are subsets of strongly connected proteins. The proposed method was successfully compared with existing methods. CLAIM appears to be a useful semi-automated tool for protein functional analysis and warrants further evaluation in agrigenomics research.",2014-01-03 +26357263,Multiple 3D RNA Structure Superposition Using Neighbor Joining.,"Recent advances in RNA research and the steady growth of available RNA structures call for bioinformatics methods for handling and analyzing RNA structural data. Recently, we introduced SETTER-a fast and accurate method for RNA pairwise structure alignment. In this paper, we describe MultiSETTER, SETTER extension for multiple RNA structure alignment. MultiSETTER combines SETTER's decomposition of RNA structures into non-overlapping structural subunits with the multiple sequence alignment algorithm ClustalW adapted for the structure alignment. The accuracy of MultiSETTER was assessed by the automatic classification of RNA structures and its comparison to SCOR annotations. In addition, MultiSETTER classification was also compared to multiple sequence alignment-based and secondary structure alignment-based classifications provided by LocARNA and RNADistance tools, respectively. MultiSETTER precompiled Windows libraries, as well as the C++ source code, are freely available from http://siret.cz/multisetter.",2015-05-01 +21821666,NeuroPedia: neuropeptide database and spectral library.,"

Summary

Neuropeptides are essential for cell-cell communication in neurological and endocrine physiological processes in health and disease. While many neuropeptides have been identified in previous studies, the resulting data has not been structured to facilitate further analysis by tandem mass spectrometry (MS/MS), the main technology for high-throughput neuropeptide identification. Many neuropeptides are difficult to identify when searching MS/MS spectra against large protein databases because of their atypical lengths (e.g. shorter/longer than common tryptic peptides) and lack of tryptic residues to facilitate peptide ionization/fragmentation. NeuroPedia is a neuropeptide encyclopedia of peptide sequences (including genomic and taxonomic information) and spectral libraries of identified MS/MS spectra of homolog neuropeptides from multiple species. Searching neuropeptide MS/MS data against known NeuroPedia sequences will improve the sensitivity of database search tools. Moreover, the availability of neuropeptide spectral libraries will also enable the utilization of spectral library search tools, which are known to further improve the sensitivity of peptide identification. These will also reinforce the confidence in peptide identifications by enabling visual comparisons between new and previously identified neuropeptide MS/MS spectra.

Availability

http://proteomics.ucsd.edu/Software/NeuroPedia.html

Contact

bandeira@ucsd.edu

Supplementary information

Supplementary materials are available at Bioinformatics online.",2011-08-05 +24875471,"PTHGRN: unraveling post-translational hierarchical gene regulatory networks using PPI, ChIP-seq and gene expression data.","Interactions among transcriptional factors (TFs), cofactors and other proteins or enzymes can affect transcriptional regulatory capabilities of eukaryotic organisms. Post-translational modifications (PTMs) cooperate with TFs and epigenetic alterations to constitute a hierarchical complexity in transcriptional gene regulation. While clearly implicated in biological processes, our understanding of these complex regulatory mechanisms is still limited and incomplete. Various online software have been proposed for uncovering transcriptional and epigenetic regulatory networks, however, there is a lack of effective web-based software capable of constructing underlying interactive organizations between post-translational and transcriptional regulatory components. Here, we present an open web server, post-translational hierarchical gene regulatory network (PTHGRN) to unravel relationships among PTMs, TFs, epigenetic modifications and gene expression. PTHGRN utilizes a graphical Gaussian model with partial least squares regression-based methodology, and is able to integrate protein-protein interactions, ChIP-seq and gene expression data and to capture essential regulation features behind high-throughput data. The server provides an integrative platform for users to analyze ready-to-use public high-throughput Omics resources or upload their own data for systems biology study. Users can choose various parameters in the method, build network topologies of interests and dissect their associations with biological functions. Application of the software to stem cell and breast cancer demonstrates that it is an effective tool for understanding regulatory mechanisms in biological complex systems. PTHGRN web server is publically available at web site http://www.byanbioinfo.org/pthgrn.",2014-05-29 +24489370,Integrative gene set analysis of multi-platform data with sample heterogeneity.,"

Motivation

Gene set analysis is a popular method for large-scale genomic studies. Because genes that have common biological features are analyzed jointly, gene set analysis often achieves better power and generates more biologically informative results. With the advancement of technologies, genomic studies with multi-platform data have become increasingly common. Several strategies have been proposed that integrate genomic data from multiple platforms to perform gene set analysis. To evaluate the performances of existing integrative gene set methods under various scenarios, we conduct a comparative simulation analysis based on The Cancer Genome Atlas breast cancer dataset.

Results

We find that existing methods for gene set analysis are less effective when sample heterogeneity exists. To address this issue, we develop three methods for multi-platform genomic data with heterogeneity: two non-parametric methods, multi-platform Mann-Whitney statistics and multi-platform outlier robust T-statistics, and a parametric method, multi-platform likelihood ratio statistics. Using simulations, we show that the proposed multi-platform Mann-Whitney statistics method has higher power for heterogeneous samples and comparable performance for homogeneous samples when compared with the existing methods. Our real data applications to two datasets of The Cancer Genome Atlas also suggest that the proposed methods are able to identify novel pathways that are missed by other strategies.

Availability and implementation

http://www4.stat.ncsu.edu/∼jytzeng/Software/Multiplatform_gene_set_analysis/",2014-01-30 +24878920,ChIP-Enrich: gene set enrichment testing for ChIP-seq data.,"Gene set enrichment testing can enhance the biological interpretation of ChIP-seq data. Here, we develop a method, ChIP-Enrich, for this analysis which empirically adjusts for gene locus length (the length of the gene body and its surrounding non-coding sequence). Adjustment for gene locus length is necessary because it is often positively associated with the presence of one or more peaks and because many biologically defined gene sets have an excess of genes with longer or shorter gene locus lengths. Unlike alternative methods, ChIP-Enrich can account for the wide range of gene locus length-to-peak presence relationships (observed in ENCODE ChIP-seq data sets). We show that ChIP-Enrich has a well-calibrated type I error rate using permuted ENCODE ChIP-seq data sets; in contrast, two commonly used gene set enrichment methods, Fisher's exact test and the binomial test implemented in Genomic Regions Enrichment of Annotations Tool (GREAT), can have highly inflated type I error rates and biases in ranking. We identify DNA-binding proteins, including CTCF, JunD and glucocorticoid receptor α (GRα), that show different enrichment patterns for peaks closer to versus further from transcription start sites. We also identify known and potential new biological functions of GRα. ChIP-Enrich is available as a web interface (http://chip-enrich.med.umich.edu) and Bioconductor package.",2014-05-30 +30722515,"First Report of Globisporangium ultimum Causing Pythium Damping-Off on Aleppo Pine in Algeria, Africa, and the Mediterranean Region.","Globisporangium ultimum (Trow) Uzuhashi, Tojo & Kakish. (syn. Pythium ultimum Trow, syn. P. ultimum Trow var. ultimum) is a known oomycetal species from Pythium s.l. causing damping-off and/or root rot on a great variety of plants throughout the world, including some pine species (Pinus L.) and conifers (2,3). Aleppo pine (Pinus halepensis Mill.) is a common native forest tree in the Mediterranean region. Pre- and post-emergence damping-off disease symptoms were observed during 2008 and 2009 in four forest nurseries from northwestern Algeria (Relizane, Sidi Belabes, and Tlemcen departments). This disease occurred under cool conditions, and Aleppo pines were significantly affected, reducing seedling emergence. Disinfected segments, about 5 mm in length, from decayed root and collar, were cultured on CMA at 25°C. This oomycetal species was identified based on the species description in Pythium keys (3,4). For the molecular identification, PCR was used to amplify the ITS region of Pythium isolates. It was amplified with the flanking primers ITS1 and ITS4, and these products were directly sequenced. Sequence data were compared to known sequences deposited in the NCBI non redundant database to confirm morphological identification. A BLAST search identified U3CR, U7CR, U1RT, U2CR, U4CR, U14CR, U7RT, and U17RT isolates (GenBank Accession Nos. JX191921, 22, 27, 29, 31, and 33 to 35, respectively) as G. ultimum based on 100% similarity with corresponding sequence of the reference isolate no. UZ056 MAFF240024 (AB468781) (3). Phytopathogenicity testing was conducted in a petri dish and pot experiment. In the petri dish experiment, a 3 mm diameter plug was transferred from a 7-day-old CMA colony to the center of a CMA petri dish, with three replicates per isolate, and three control plates were inoculated with sterile agar plugs. After 72 h, 10 Aleppo pine seeds were placed equally spaced to 1 cm from the edge of each plug. After 7 days at 22°C in the dark, germination inhibition (46.1 to 87.6%) and root growth inhibition (62.3 to 92.2%) were calculated. In the control plates, germination failure (13.4%) and root length (27.7 cm) were observed. For the pot experiment, inocula were produced by adding a 5 mm diameter plug from a 7-day-old CMA culture to a previously sterilized 500 ml flask containing 237.5 g sand, 12.5 g cornmeal, and 80 ml SDW. Nine-day-old inoculum was mixed with sterile soil at a rate of 1:3 (v:v). Inoculum was transferred to 500 ml pot, and 10 Aleppo pine seeds were planted, with three replicates per isolate, and three control pots were used. After 2 weeks, all of the isolates tested caused typical symptoms of Aleppo pine Pythium damping-off, the percentage of inoculated plants that became infected was 36.6 to 83.3%. In the control pots, no infected plants were observed. To our knowledge, this is the first report of G. ultimum causing damping-off on Aleppo pine in Algeria, Africa, and the Mediterranean Region. Before, Aleppo pine damping-off caused by G. ultimum was reported in Australia (1). References: (1) R. P. Cook and A. J. Dubé. Host-pathogen index of plant diseases in South Australia. SADA, Melbourne, Australia, 1989. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory. ARS, USDA, Bestville, MD. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , June 24, 2012. (3) S. Uzuhashi et al. Mycoscience 51:337, 2010. (4) A. J. van der Plaats-Niterink. Stud. Mycol. 21:1, 1981.",2013-08-01 +22740138,An interethnic variability and a functional prediction of DNA repair gene polymorphisms: the example of XRCC3 (p.Thr241>Met) and XPD (p.Lys751>Gln) in a healthy Tunisian population.,"Genetic polymorphisms in DNA repair genes might influence the repair activities of the enzymes predisposing individuals to cancer risk. Owing to the presence of these genetic variants, interethnic differences in DNA repair capacity have been observed in various populations. The present study was undertaken to determine the allele and genotype frequencies of two common non-synonymous SNPs, XRCC3 p.Thr241>Met (C > T, rs861539) and XPD p.Lys751>Gln (T > G, rs13181) in a healthy Tunisian population and to compare them with HapMap ( http://www.hapmap.org/ ) populations. Also, we predicted their eventual functional effect based on bioinformatics tools. The genotypes of 154 healthy and unrelated individuals were determined by PCR-RFLP procedure. Our findings showed a close relatedness with Caucasians from European ancestry which might be explained by the strategic geographic location of Tunisia in the Mediterranean, thus allowing exchanges with Europeans countries. The in silico predictions showed that p.Thr241>Met substitution in XRCC3 protein was predicted as possibly damaging, indicating that it is likely to have functional consequences as well. To the best of our knowledge, this is the first study in this regard in Tunisia. So, these data could provide baseline database and help us to explore the relationship of XRCC3 and XPD polymorphisms with both cancer risk and DNA repair variability in our population.",2012-06-28 +25385275,"VariSNP, a benchmark database for variations from dbSNP.","For development and evaluation of methods for predicting the effects of variations, benchmark datasets are needed. Some previously developed datasets are available for this purpose, but newer and larger benchmark sets for benign variants have largely been missing. VariSNP datasets are selected from dbSNP. These subsets were filtered against disease-related variants in the ClinVar, UniProtKB/Swiss-Prot, and PhenCode databases, to identify neutral or nonpathogenic cases. All variant descriptions include mapping to reference sequences on chromosomal, genomic, coding DNA, and protein levels. The datasets will be updated with automated scripts on a regular basis and are freely available at http://structure.bmc.lu.se/VariSNP.",2015-01-08 +21738333,OntoVisT: A general purpose Ontological Visualization Tool.,"

Unlabelled

Ontologies have emerged as a fast growing research topic in the area of semantic web during last decade. Currently there are 204 ontologies that are available through OBO Foundry and BioPortal. Several excellent tools for navigating the ontological structure are available, however most of them are dedicated to a specific annotation data or integrated with specific analysis applications, and do not offer flexibility in terms of general-purpose usage for ontology exploration. We developed OntoVisT, a web based ontological visualization tool. This application is designed for interactive visualization of any ontological hierarchy for a specific node of interest, up to the chosen level of children and/or ancestor. It takes any ontology file in OBO format as input and generates output as DAG hierarchical graph for the chosen query. To enhance the navigation capabilities of complex networks, we have embedded several features such as search criteria, zoom in/out, center focus, nearest neighbor highlights and mouse hover events. The application has been tested on all 72 data sets available in OBO format through OBO foundry. The results for few of them can be accessed through OntoVisT-Gallery.

Availability

The database is available for free at http://ccbb.jnu.ac.in/OntoVisT.html.",2011-06-23 +21558324,mirConnX: condition-specific mRNA-microRNA network integrator.,"mirConnX is a user-friendly web interface for inferring, displaying and parsing mRNA and microRNA (miRNA) gene regulatory networks. mirConnX combines sequence information with gene expression data analysis to create a disease-specific, genome-wide regulatory network. A prior, static network has been constructed for all human and mouse genes. It consists of computationally predicted transcription factor (TF)-gene associations and miRNA target predictions. The prior network is supplemented with known interactions from the literature. Dynamic TF- and miRNA-gene associations are inferred from user-provided expression data using an association measure of choice. The static and dynamic networks are then combined using an integration function with user-specified weights. Visualization of the network and subsequent analysis are provided via a very responsive graphic user interface. Two organisms are currently supported: Homo sapiens and Mus musculus. The intuitive user interface and large database make mirConnX a useful tool for clinical scientists for hypothesis generation and explorations. mirConnX is freely available for academic use at http://www.benoslab.pitt.edu/mirconnx.",2011-05-10 +22369214,Construction and analysis of a plant non-specific lipid transfer protein database (nsLTPDB).,"

Background

Plant non-specific lipid transfer proteins (nsLTPs) are small and basic proteins. Recently, nsLTPs have been reported involved in many physiological functions such as mediating phospholipid transfer, participating in plant defence activity against bacterial and fungal pathogens, and enhancing cell wall extension in tobacco. However, the lipid transfer mechanism of nsLTPs is still unclear, and comprehensive information of nsLTPs is difficult to obtain.

Methods

In this study, we identified 595 nsLTPs from 121 different species and constructed an nsLTPs database--nsLTPDB--which comprises the sequence information, structures, relevant literatures, and biological data of all plant nsLTPs http://nsltpdb.life.nthu.edu.tw/.

Results

Meanwhile, bioinformatics and statistics methods were implemented to develop a classification method for nsLTPs based on the patterns of the eight highly-conserved cysteine residues, and to suggest strict Prosite-styled patterns for Type I and Type II nsLTPs. The pattern of Type I is C X2 V X5-7 C [V, L, I] × Y [L, A, V] X8-13 CC × G X12 D × [Q, K, R] X2 CXC X16-21 P X2 C X13-15C, and that of Type II is C X4 L X2 C X9-11 P [S, T] X2 CC X5 Q X2-4 C[L, F]C X2 [A, L, I] × [D, N] P X10-12 [K, R] X4-5 C X3-4 P X0-2 C. Moreover, we referred the Prosite-styled patterns to the experimental mutagenesis data that previously established by our group, and found that the residues with higher conservation played an important role in the structural stability or lipid binding ability of nsLTPs.

Conclusions

Taken together, this research has suggested potential residues that might be essential to modulate the structural and functional properties of plant nsLTPs. Finally, we proposed some biologically important sites of the nsLTPs, which are described by using a new Prosite-styled pattern that we defined.",2012-01-17 +25573618,Performance of genotypic tools for prediction of tropism in HIV-1 subtype C V3 loop sequences.,"Currently, there is no consensus on the genotypic tools to be used for tropism analysis in HIV-1 subtype C strains. Thus, the aim of the study was to evaluate the performance of the different V3 loop-based genotypic algorithms available. We compiled a dataset of 645 HIV-1 subtype C V3 loop sequences of known coreceptor phenotypes (531 R5-tropic/non-syncytium-inducing and 114 X4-tropic/R5X4-tropic/syncytium-inducing sequences) from the Los Alamos database (http://www.hiv.lanl.gov/) and previously published literature. Coreceptor usage was predicted based on this dataset using different software-based machine-learning algorithms as well as simple classical rules. All the sophisticated machine-learning methods showed a good concordance of above 85%. Geno2Pheno (false-positive rate cutoff of 5-15%) and CoRSeqV3-C were found to have a high predicting capability in determining both HIV-1 subtype C X4-tropic and R5-tropic strains. The current sophisticated genotypic tropism tools based on V3 loop perform well for tropism prediction in HIV-1 subtype C strains and can be used in clinical settings.",2015-01-07 +24755303,A power set-based statistical selection procedure to locate susceptible rare variants associated with complex traits with sequencing data.,"

Motivation

Existing association methods for rare variants from sequencing data have focused on aggregating variants in a gene or a genetic region because of the fact that analysing individual rare variants is underpowered. However, these existing rare variant detection methods are not able to identify which rare variants in a gene or a genetic region of all variants are associated with the complex diseases or traits. Once phenotypic associations of a gene or a genetic region are identified, the natural next step in the association study with sequencing data is to locate the susceptible rare variants within the gene or the genetic region.

Results

In this article, we propose a power set-based statistical selection procedure that is able to identify the locations of the potentially susceptible rare variants within a disease-related gene or a genetic region. The selection performance of the proposed selection procedure was evaluated through simulation studies, where we demonstrated the feasibility and superior power over several comparable existing methods. In particular, the proposed method is able to handle the mixed effects when both risk and protective variants are present in a gene or a genetic region. The proposed selection procedure was also applied to the sequence data on the ANGPTL gene family from the Dallas Heart Study to identify potentially susceptible rare variants within the trait-related genes.

Availability and implementation

An R package 'rvsel' can be downloaded from http://www.columbia.edu/∼sw2206/ and http://statsun.pusan.ac.kr.",2014-04-22 +26029379,A database for the monitoring of thermal anomalies over the Amazon forest and adjacent intertropical oceans.,"Advances in information technologies and accessibility to climate and satellite data in recent years have favored the development of web-based tools with user-friendly interfaces in order to facilitate the dissemination of geo/biophysical products. These products are useful for the analysis of the impact of global warming over different biomes. In particular, the study of the Amazon forest responses to drought have recently received attention by the scientific community due to the occurrence of two extreme droughts and sustained warming over the last decade. Thermal Amazoni@ is a web-based platform for the visualization and download of surface thermal anomalies products over the Amazon forest and adjacent intertropical oceans using Google Earth as a baseline graphical interface (http://ipl.uv.es/thamazon/web). This platform is currently operational at the servers of the University of Valencia (Spain), and it includes both satellite (MODIS) and climatic (ERA-Interim) datasets. Thermal Amazoni@ is composed of the viewer system and the web and ftp sites with ancillary information and access to product download.",2015-05-26 +25568936,Analysis of individual protein regions provides novel insights on cancer pharmacogenomics.,"The promise of personalized cancer medicine cannot be fulfilled until we gain better understanding of the connections between the genomic makeup of a patient's tumor and its response to anticancer drugs. Several datasets that include both pharmacologic profiles of cancer cell lines as well as their genomic alterations have been recently developed and extensively analyzed. However, most analyses of these datasets assume that mutations in a gene will have the same consequences regardless of their location. While this assumption might be correct in some cases, such analyses may miss subtler, yet still relevant, effects mediated by mutations in specific protein regions. Here we study such perturbations by separating effects of mutations in different protein functional regions (PFRs), including protein domains and intrinsically disordered regions. Using this approach, we have been able to identify 171 novel associations between mutations in specific PFRs and changes in the activity of 24 drugs that couldn't be recovered by traditional gene-centric analyses. Our results demonstrate how focusing on individual protein regions can provide novel insights into the mechanisms underlying the drug sensitivity of cancer cell lines. Moreover, while these new correlations are identified using only data from cancer cell lines, we have been able to validate some of our predictions using data from actual cancer patients. Our findings highlight how gene-centric experiments (such as systematic knock-out or silencing of individual genes) are missing relevant effects mediated by perturbations of specific protein regions. All the associations described here are available from http://www.cancer3d.org.",2015-01-08 +25481010,SNPlice: variants that modulate Intron retention from RNA-sequencing data.,"

Rationale

The growing recognition of the importance of splicing, together with rapidly accumulating RNA-sequencing data, demand robust high-throughput approaches, which efficiently analyze experimentally derived whole-transcriptome splice profiles.

Results

We have developed a computational approach, called SNPlice, for identifying cis-acting, splice-modulating variants from RNA-seq datasets. SNPlice mines RNA-seq datasets to find reads that span single-nucleotide variant (SNV) loci and nearby splice junctions, assessing the co-occurrence of variants and molecules that remain unspliced at nearby exon-intron boundaries. Hence, SNPlice highlights variants preferentially occurring on intron-containing molecules, possibly resulting from altered splicing. To illustrate co-occurrence of variant nucleotide and exon-intron boundary, allele-specific sequencing was used. SNPlice results are generally consistent with splice-prediction tools, but also indicate splice-modulating elements missed by other algorithms. SNPlice can be applied to identify variants that correlate with unexpected splicing events, and to measure the splice-modulating potential of canonical splice-site SNVs.

Availability and implementation

SNPlice is freely available for download from https://code.google.com/p/snplice/ as a self-contained binary package for 64-bit Linux computers and as python source-code.

Contact

pmudvari@gwu.edu or horvatha@gwu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-06 +25236464,RAMONA: a Web application for gene set analysis on multilevel omics data.,"

Summary

Decreasing costs of modern high-throughput experiments allow for the simultaneous analysis of altered gene activity on various molecular levels. However, these multi-omics approaches lead to a large amount of data, which is hard to interpret for a non-bioinformatician. Here, we present the remotely accessible multilevel ontology analysis (RAMONA). It offers an easy-to-use interface for the simultaneous gene set analysis of combined omics datasets and is an extension of the previously introduced MONA approach. RAMONA is based on a Bayesian enrichment method for the inference of overrepresented biological processes among given gene sets. Overrepresentation is quantified by interpretable term probabilities. It is able to handle data from various molecular levels, while in parallel coping with redundancies arising from gene set overlaps and related multiple testing problems. The comprehensive output of RAMONA is easy to interpret and thus allows for functional insight into the affected biological processes. With RAMONA, we provide an efficient implementation of the Bayesian inference problem such that ontologies consisting of thousands of terms can be processed in the order of seconds.

Availability and implementation

RAMONA is implemented as ASP.NET Web application and publicly available at http://icb.helmholtz-muenchen.de/ramona.",2014-09-18 +22730434,gSearch: a fast and flexible general search tool for whole-genome sequencing.,"

Background

Various processes such as annotation and filtering of variants or comparison of variants in different genomes are required in whole-genome or exome analysis pipelines. However, processing different databases and searching among millions of genomic loci is not trivial.

Results

gSearch compares sequence variants in the Genome Variation Format (GVF) or Variant Call Format (VCF) with a pre-compiled annotation or with variants in other genomes. Its search algorithms are subsequently optimized and implemented in a multi-threaded manner. The proposed method is not a stand-alone annotation tool with its own reference databases. Rather, it is a search utility that readily accepts public or user-prepared reference files in various formats including GVF, Generic Feature Format version 3 (GFF3), Gene Transfer Format (GTF), VCF and Browser Extensible Data (BED) format. Compared to existing tools such as ANNOVAR, gSearch runs more than 10 times faster. For example, it is capable of annotating 52.8 million variants with allele frequencies in 6 min.

Availability

gSearch is available at http://ml.ssu.ac.kr/gSearch. It can be used as an independent search tool or can easily be integrated to existing pipelines through various programming environments such as Perl, Ruby and Python.",2012-06-23 +24312246,"PIIKA 2: an expanded, web-based platform for analysis of kinome microarray data.","Kinome microarrays are comprised of peptides that act as phosphorylation targets for protein kinases. This platform is growing in popularity due to its ability to measure phosphorylation-mediated cellular signaling in a high-throughput manner. While software for analyzing data from DNA microarrays has also been used for kinome arrays, differences between the two technologies and associated biologies previously led us to develop Platform for Intelligent, Integrated Kinome Analysis (PIIKA), a software tool customized for the analysis of data from kinome arrays. Here, we report the development of PIIKA 2, a significantly improved version with new features and improvements in the areas of clustering, statistical analysis, and data visualization. Among other additions to the original PIIKA, PIIKA 2 now allows the user to: evaluate statistically how well groups of samples cluster together; identify sets of peptides that have consistent phosphorylation patterns among groups of samples; perform hierarchical clustering analysis with bootstrapping; view false negative probabilities and positive and negative predictive values for t-tests between pairs of samples; easily assess experimental reproducibility; and visualize the data using volcano plots, scatterplots, and interactive three-dimensional principal component analyses. Also new in PIIKA 2 is a web-based interface, which allows users unfamiliar with command-line tools to easily provide input and download the results. Collectively, the additions and improvements described here enhance both the breadth and depth of analyses available, simplify the user interface, and make the software an even more valuable tool for the analysis of kinome microarray data. Both the web-based and stand-alone versions of PIIKA 2 can be accessed via http://saphire.usask.ca.",2013-11-29 +22915576,Systematic prediction of cis-regulatory elements in the Chlamydomonas reinhardtii genome using comparative genomics.,"Chlamydomonas reinhardtii is one of the most important microalgae model organisms and has been widely studied toward the understanding of chloroplast functions and various cellular processes. Further exploitation of C. reinhardtii as a model system to elucidate various molecular mechanisms and pathways requires systematic study of gene regulation. However, there is a general lack of genome-scale gene regulation study, such as global cis-regulatory element (CRE) identification, in C. reinhardtii. Recently, large-scale genomic data in microalgae species have become available, which enable the development of efficient computational methods to systematically identify CREs and characterize their roles in microalgae gene regulation. Here, we performed in silico CRE identification at the whole genome level in C. reinhardtii using a comparative genomics-based method. We predicted a large number of CREs in C. reinhardtii that are consistent with experimentally verified CREs. We also discovered that a large percentage of these CREs form combinations and have the potential to work together for coordinated gene regulation in C. reinhardtii. Multiple lines of evidence from literature, gene transcriptional profiles, and gene annotation resources support our prediction. The predicted CREs will serve, to our knowledge, as the first large-scale collection of CREs in C. reinhardtii to facilitate further experimental study of microalgae gene regulation. The accompanying software tool and the predictions in C. reinhardtii are also made available through a Web-accessible database (http://hulab.ucf.edu/research/projects/Microalgae/sdcre/motifcomb.html).",2012-08-22 +23748957,The UCSC Interaction Browser: multidimensional data views in pathway context.,"High-throughput data sets such as genome-wide protein-protein interactions, protein-DNA interactions and gene expression data have been published for several model systems, especially for human cancer samples. The University of California, Santa Cruz (UCSC) Interaction Browser (http://sysbio.soe.ucsc.edu/nets) is an online tool for biologists to view high-throughput data sets simultaneously for the analysis of functional relationships between biological entities. Users can access several public interaction networks and functional genomics data sets through the portal as well as upload their own networks and data sets for analysis. Users can navigate through correlative relationships for focused sets of genes belonging to biological pathways using a standard web browser. Using a new visual modality called the CircleMap, multiple 'omics' data sets can be viewed simultaneously within the context of curated, predicted, directed and undirected regulatory interactions. The Interaction Browser provides an integrative viewing of biological networks based on the consensus of many observations about genes and their products, which may provide new insights about normal and disease processes not obvious from any isolated data set.",2013-06-08 +27153703,Phasing for medical sequencing using rare variants and large haplotype reference panels.,"

Motivation

There is growing recognition that estimating haplotypes from high coverage sequencing of single samples in clinical settings is an important problem. At the same time very large datasets consisting of tens and hundreds of thousands of high-coverage sequenced samples will soon be available. We describe a method that takes advantage of these huge human genetic variation resources and rare variant sharing patterns to estimate haplotypes on single sequenced samples. Sharing rare variants between two individuals is more likely to arise from a recent common ancestor and, hence, also more likely to indicate similar shared haplotypes over a substantial flanking region of sequence.

Results

Our method exploits this idea to select a small set of highly informative copying states within a Hidden Markov Model (HMM) phasing algorithm. Using rare variants in this way allows us to avoid iterative MCMC methods to infer haplotypes. Compared to other approaches that do not explicitly use rare variants we obtain significant gains in phasing accuracy, less variation over phasing runs and improvements in speed. For example, using a reference panel of 7420 haplotypes from the UK10K project, we are able to reduce switch error rates by up to 50% when phasing samples sequenced at high-coverage. In addition, a single step rephasing of the UK10K panel, using rare variant information, has a downstream impact on phasing performance. These results represent a proof of concept that rare variant sharing patterns can be utilized to phase large high-coverage sequencing studies such as the 100 000 Genomes Project dataset.

Availability and implementation

A webserver that includes an implementation of this new method and allows phasing of high-coverage clinical samples is available at https://phasingserver.stats.ox.ac.uk/

Contact

marchini@stats.ox.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-02-27 +26458889,A multi-objective optimization approach accurately resolves protein domain architectures.,"

Motivation

Given a protein sequence and a number of potential domains matching it, what are the domain content and the most likely domain architecture for the sequence? This problem is of fundamental importance in protein annotation, constituting one of the main steps of all predictive annotation strategies. On the other hand, when potential domains are several and in conflict because of overlapping domain boundaries, finding a solution for the problem might become difficult. An accurate prediction of the domain architecture of a multi-domain protein provides important information for function prediction, comparative genomics and molecular evolution.

Results

We developed DAMA (Domain Annotation by a Multi-objective Approach), a novel approach that identifies architectures through a multi-objective optimization algorithm combining scores of domain matches, previously observed multi-domain co-occurrence and domain overlapping. DAMA has been validated on a known benchmark dataset based on CATH structural domain assignments and on the set of Plasmodium falciparum proteins. When compared with existing tools on both datasets, it outperforms all of them.

Availability and implementation

DAMA software is implemented in C++ and the source code can be found at http://www.lcqb.upmc.fr/DAMA.

Contact

juliana.silva_bernardes@upmc.fr or alessandra.carbone@lip6.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-12 +24366875,CCAT: Combinatorial Code Analysis Tool for transcriptional regulation.,"Combinatorial interplay among transcription factors (TFs) is an important mechanism by which transcriptional regulatory specificity is achieved. However, despite the increasing number of TFs for which either binding specificities or genome-wide occupancy data are known, knowledge about cooperativity between TFs remains limited. To address this, we developed a computational framework for predicting genome-wide co-binding between TFs (CCAT, Combinatorial Code Analysis Tool), and applied it to Drosophila melanogaster to uncover cooperativity among TFs during embryo development. Using publicly available TF binding specificity data and DNaseI chromatin accessibility data, we first predicted genome-wide binding sites for 324 TFs across five stages of D. melanogaster embryo development. We then applied CCAT in each of these developmental stages, and identified from 19 to 58 pairs of TFs in each stage whose predicted binding sites are significantly co-localized. We found that nearby binding sites for pairs of TFs predicted to cooperate were enriched in regions bound in relevant ChIP experiments, and were more evolutionarily conserved than other pairs. Further, we found that TFs tend to be co-localized with other TFs in a dynamic manner across developmental stages. All generated data as well as source code for our front-to-end pipeline are available at http://cat.princeton.edu.",2013-12-23 +24682815,Bi-Force: large-scale bicluster editing and its application to gene expression data biclustering.,"The explosion of the biological data has dramatically reformed today's biological research. The need to integrate and analyze high-dimensional biological data on a large scale is driving the development of novel bioinformatics approaches. Biclustering, also known as 'simultaneous clustering' or 'co-clustering', has been successfully utilized to discover local patterns in gene expression data and similar biomedical data types. Here, we contribute a new heuristic: 'Bi-Force'. It is based on the weighted bicluster editing model, to perform biclustering on arbitrary sets of biological entities, given any kind of pairwise similarities. We first evaluated the power of Bi-Force to solve dedicated bicluster editing problems by comparing Bi-Force with two existing algorithms in the BiCluE software package. We then followed a biclustering evaluation protocol in a recent review paper from Eren et al. (2013) (A comparative analysis of biclustering algorithms for gene expressiondata. Brief. Bioinform., 14:279-292.) and compared Bi-Force against eight existing tools: FABIA, QUBIC, Cheng and Church, Plaid, BiMax, Spectral, xMOTIFs and ISA. To this end, a suite of synthetic datasets as well as nine large gene expression datasets from Gene Expression Omnibus were analyzed. All resulting biclusters were subsequently investigated by Gene Ontology enrichment analysis to evaluate their biological relevance. The distinct theoretical foundation of Bi-Force (bicluster editing) is more powerful than strict biclustering. We thus outperformed existing tools with Bi-Force at least when following the evaluation protocols from Eren et al. Bi-Force is implemented in Java and integrated into the open source software package of BiCluE. The software as well as all used datasets are publicly available at http://biclue.mpi-inf.mpg.de.",2014-03-20 +26084794,iSuc-PseAAC: predicting lysine succinylation in proteins by incorporating peptide position-specific propensity.,"Lysine succinylation in protein is one type of post-translational modifications (PTMs). Succinylation is associated with some diseases and succinylated sites data just has been found in recent years in experiments. It is highly desired to develop computational methods to identify the candidate proteins and their sites. In view of this, a new predictor called iSuc-PseAAC was proposed by incorporating the peptide position-specific propensity into the general form of pseudo amino acid composition. The accuracy is 79.94%, sensitivity 51.07%, specificity 89.42% and MCC 0.431 in leave-one-out cross validation with support vector machine algorithm. It demonstrated by rigorous leave-one-out on stringent benchmark dataset that the new predictor is quite promising and may become a useful high throughput tool in this area. Meanwhile a user-friendly web-server for iSuc-PseAAC is accessible at http://app.aporc.org/iSuc-PseAAC/. Users can easily obtain their desired results without the need to understand the complicated mathematical equations presented in this paper just for its integrity.",2015-06-18 +25951428,IPeak: An open source tool to combine results from multiple MS/MS search engines.,"Liquid chromatography coupled tandem mass spectrometry (LC-MS/MS) is an important technique for detecting peptides in proteomics studies. Here, we present an open source software tool, termed IPeak, a peptide identification pipeline that is designed to combine the Percolator post-processing algorithm and multi-search strategy to enhance the sensitivity of peptide identifications without compromising accuracy. IPeak provides a graphical user interface (GUI) as well as a command-line interface, which is implemented in JAVA and can work on all three major operating system platforms: Windows, Linux/Unix and OS X. IPeak has been designed to work with the mzIdentML standard from the Proteomics Standards Initiative (PSI) as an input and output, and also been fully integrated into the associated mzidLibrary project, providing access to the overall pipeline, as well as modules for calling Percolator on individual search engine result files. The integration thus enables IPeak (and Percolator) to be used in conjunction with any software packages implementing the mzIdentML data standard. IPeak is freely available and can be downloaded under an Apache 2.0 license at https://code.google.com/p/mzidentml-lib/.",2015-08-06 +22121218,The MAPPER2 Database: a multi-genome catalog of putative transcription factor binding sites.,"The mapper(2) Database (http://genome.ufl.edu/mapperdb) is a component of mapper(2), a web-based system for the analysis of transcription factor binding sites in multiple genomes. The database contains predicted binding sites identified in the promoters of all human, mouse and Drosophila genes using 1017 probabilistic models representing over 600 different transcription factors. In this article we outline the current contents of the database and we describe its web-based user interface in detail. We then discuss ongoing work to extend the database contents to experimental data and to add analysis capabilities. Finally, we provide information about recent improvements to the hardware and software platform that mapper(2) is based on.",2011-11-24 +23794635,GeneMANIA prediction server 2013 update.,"GeneMANIA (http://www.genemania.org) is a flexible user-friendly web interface for generating hypotheses about gene function, analyzing gene lists and prioritizing genes for functional assays. Given a query gene list, GeneMANIA extends the list with functionally similar genes that it identifies using available genomics and proteomics data. GeneMANIA also reports weights that indicate the predictive value of each selected data set for the query. GeneMANIA can also be used in a function prediction setting: given a query gene, GeneMANIA finds a small set of genes that are most likely to share function with that gene based on their interactions with it. Enriched Gene Ontology categories among this set can sometimes point to the function of the gene. Seven organisms are currently supported (Arabidopsis thaliana, Caenorhabditis elegans, Drosophila melanogaster, Mus musculus, Homo sapiens, Rattus norvegicus and Saccharomyces cerevisiae), and hundreds of data sets have been collected from GEO, BioGRID, IRefIndex and I2D, as well as organism-specific functional genomics data sets. Users can customize their search by selecting specific data sets to query and by uploading their own data sets to analyze.",2013-07-01 +24790830,SENTIA: a systematic online monitoring registry for children and adolescents treated with antipsychotics.,"

Introduction

Despite drastic increases in antipsychotic prescribing in youth, data are still limited regarding their safety in this vulnerable population, necessitating additional tools for capturing long-term, real world data.

Methods

We present SENTIA (SafEty of NeurolepTics in Infancy and Adolescence; https://SENTIA.es), an online registry created in 2010 to track antipsychotic adverse effects in Spanish youth <18 years old currently taking or initiating with any antipsychotic treatment. SENTIA collects information on sociodemographic, diagnostic and treatment characteristics, past personal medical/psychiatric history, healthy lifestyle habits and treatment adherence. Additionally, efficacy and adverse effect data are recorded including the Children's Global Assessment Scale; Clinical Global Impressions scale for Severity and Improvement, the Safety Monitoring Uniform Report Form, Simpson-Angus Scale, Abnormal Involuntary Movement Scale, vital signs, blood pressure, and EKG. Finally, fasting blood is drawn for hematology, electrolytes, renal, liver and thyroid function, glucose, insulin, lipid, prolactin and sex hormone levels. Initially, a diagnostic interview and several psychopathology scales were also included. Patients are assessed regularly and followed even beyond stopping antipsychotics.

Results

Since 01/17/2011, 85 youth (11.5 ± 2.9 (range = 4-17) years old, 70.6% male) have been included at one inaugural center. After a mean duration of 17 ± 11 (range = 1-34) months, 78.8% are still actively followed. For feasibility reasons, the diagnostic interview and detailed psychopathology scales were dropped. The remaining data can be entered in <30 minutes. Several additional centers are currently being added to SENTIA.

Conclusions

Implementation of a systematic online pharmacovigilance system for antipsychotic adverse effects in youth is feasible and promises to generate important information.",2014-04-14 +26672762,CyREST: Turbocharging Cytoscape Access for External Tools via a RESTful API.,"As bioinformatic workflows become increasingly complex and involve multiple specialized tools, so does the difficulty of reliably reproducing those workflows. Cytoscape is a critical workflow component for executing network visualization, analysis, and publishing tasks, but it can be operated only manually via a point-and-click user interface. Consequently, Cytoscape-oriented tasks are laborious and often error prone, especially with multistep protocols involving many networks. In this paper, we present the new cyREST Cytoscape app and accompanying harmonization libraries. Together, they improve workflow reproducibility and researcher productivity by enabling popular languages (e.g., Python and R, JavaScript, and C#) and tools (e.g., IPython/Jupyter Notebook and RStudio) to directly define and query networks, and perform network analysis, layouts and renderings. We describe cyREST's API and overall construction, and present Python- and R-based examples that illustrate how Cytoscape can be integrated into large scale data analysis pipelines. cyREST is available in the Cytoscape app store (http://apps.cytoscape.org) where it has been downloaded over 1900 times since its release in late 2014.",2015-08-05 +24695403,SECA: SNP effect concordance analysis using genome-wide association summary results.,"

Unlabelled

The genomics era provides opportunities to assess the genetic overlap across phenotypes at the measured genotype level; however, current approaches require individual-level genome-wide association (GWA) single nucleotide polymorphism (SNP) genotype data in one or both of a pair of GWA samples. To facilitate the discovery of pleiotropic effects and examine genetic overlap across two phenotypes, I have developed a user-friendly web-based application called SECA to perform SNP effect concordance analysis using GWA summary results. The method is validated using publicly available summary data from the Psychiatric Genomics Consortium.

Availability and implementation

http://neurogenetics.qimrberghofer.edu.au/SECA.",2014-04-01 +25622285,Correction to toporek (2014).,"Reports an error in ""Pedagogy of the privileged: Review of Deconstructing Privilege: Teaching and Learning as Allies in the Classroom"" by Rebecca L. Toporek (Cultural Diversity and Ethnic Minority Psychology, 2014[Oct], Vol 20[4], 621-622). This article was originally published online incorrectly as a Brief Report. The article authored by Rebecca L. Toporek has been published correctly as a Book Review in the October 2014 print publication (Vol. 20, No. 4, pp. 621-622. http://dx.doi.org/10.1037/a0036529). (The following abstract of the original article appeared in record 2014-42484-006.) Reviews the book, Deconstructing Privilege: Teaching and Learning as Allies in the Classroom edited by Kim A. Case (2013). The purpose of this book is to provide a collection of resources for those teaching about privilege directly, much of this volume may be useful for expanding the context within which educators teach all aspects of psychology. Understanding the history and systems of psychology, clinical practice, research methods, assessment, and all the core areas of psychology could be enhanced by consideration of the structural framework through which psychology has developed and is maintained. The book presents a useful guide for educators, and in particular, those who teach about systems of oppression and privilege directly. For psychologists, this guide provides scholarship and concrete strategies for facilitating students' awareness of multiple dimensions of privilege across content areas. (PsycINFO Database Record (c) 2015 APA, all rights reserved).",2015-01-01 +25753716,Glyco3D: a portal for structural glycosciences.,"The present work describes, in a detailed way, a family of databases covering the three-dimensional features of monosaccharides, disaccharides, oligosaccharides, polysaccharides, glycosyltransferases, lectins, monoclonal antibodies against carbohydrates, and glycosaminoglycan-binding proteins. These databases have been developed with non-proprietary software, and they are open freely to the scientific community. They are accessible through the common portal called ""Glyco3D"" http://www.glyco3d.cermav.cnrs.fr. The databases are accompanied by a user-friendly graphical user interface (GUI) which offers several search options. All three-dimensional structures are available for visual consultations (with basic measurements possibilities) and can be downloaded in commonly used formats for further uses.",2015-01-01 +25636622,"Gene essentiality analysis based on DEG 10, an updated database of essential genes.","The database of essential genes (DEG, available at http://www.essentialgene.org), constructed in 2003, has been timely updated to harbor essential-gene records of bacteria, archaea, and eukaryotes. DEG 10, the current release, includes not only essential protein-coding genes determined by genome-wide gene essentiality screens but also essential noncoding RNAs, promoters, regulatory sequences, and replication origins. Therefore, DEG 10 includes essential genomic elements under different conditions in three domains of life, with customizable BLAST tools. Based on the analysis of DEG 10, we show that the percentage of essential genes in bacterial genomes exhibits an exponential decay with increasing genome sizes. The functions, ATP binding (GO:0005524), GTP binding (GO:0005525), and DNA-directed RNA polymerase activity (GO:0003899), are likely required for organisms across life domains.",2015-01-01 +25910700,iFoldRNA v2: folding RNA with constraints.,"

Unlabelled

A key to understanding RNA function is to uncover its complex 3D structure. Experimental methods used for determining RNA 3D structures are technologically challenging and laborious, which makes the development of computational prediction methods of substantial interest. Previously, we developed the iFoldRNA server that allows accurate prediction of short (<50 nt) tertiary RNA structures starting from primary sequences. Here, we present a new version of the iFoldRNA server that permits the prediction of tertiary structure of RNAs as long as a few hundred nucleotides. This substantial increase in the server capacity is achieved by utilization of experimental information such as base-pairing and hydroxyl-radical probing. We demonstrate a significant benefit provided by integration of experimental data and computational methods.

Availability and implementation

http://ifoldrna.dokhlab.org

Contact

dokh@unc.eu.",2015-04-24 +26639688,Russian translations for Cochrane.,"

Background

Cochrane collaboration has made a huge contribution to the development of evidence-based medicine; Cochrane work is the international gold standard of independent, credible and reliable high-quality information in medicine. Over the past 20 years the Cochrane Collaboration helped transforming decision-making in health and reforming it significantly, saving lives and contributing to longevity [1]. Until recently, Cochrane evidence were available only in English, which represents a significant barrier to their wider use in non-English speaking countries. To provide access to evidence, obtained from Cochrane Reviews, for health professionals and general public (from non-English-speaking countries), bypassing language barriers, Cochrane collaboration in 2014 initiated an international project of translating Plain language summaries of Cochrane Reviews into other languages [2, 3]. Russian translations of Plain language summaries were started in May 2014 by the team from Kazan Federal University (Department of Basic and Clinical Pharmacology; 2014-2015 as an Affiliated Centre in Tatarstan of the Nordic Cochrane Centre, since August 2015 as Cochrane Russia, a Russian branch of Cochrane Nordic, Head - Liliya Eugenevna Ziganshina) on a voluntary basis.

Objective

To assess the quality of Russian translations of Cochrane Plain Language Summaries (PLS) and their potential impact on the Russian speaking community through user feedback with the overarching aim of furthering the translations project.

Methods

We conducted the continuous online survey via Google Docs. We invited respondents through the electronic Russian language discussion forum on Essential Medicines (E-lek), links to survey on the Russian Cochrane.org website, invitations to Cochrane contributors registered in Archie from potential Russian-speaking countries. We set up the survey in Russian and English. The respondents were asked to respond to the questionnaire regarding the relevance and potential impact of the Cochrane Russian translations project, topics of interest in the field of health and health care, the quality and clarity of translated content, the preferred style of presentation and suggestions to improve the quality of translations of Plain language summaries of Cochrane Reviews.

Results

Currently the team of translators includes volunteers from the staff, Masters and PhD students of the Department of Basic and Clinical Pharmacology of the Kazan Federal University, and Kazan Medical University, our colleagues from Kazan and other cities of Russia, from the Republic of Armenia and the USA. By September 20th 2015, 446 Plain language summaries of Cochrane Reviews were translated into Russian and published on the web-site http://www.cochrane.org/ru/evidence. Our project ""Russian translations for Cochrane"" has already covered a wide range of health priority areas with translations of Plain language summaries and abstracts of the most topical and priority Cochrane reviews. During the period from 03.03.2015 to 20.09.2015 we received 113 answers from our respondents (103 answers in Russian and 10 answers in English). These were representatives of the medical and pharmaceutical professions (60%), representatives of non-medical professions (17%), students/graduate students (16%), retirees (4%) and others categories of citizens among the respondents. Half of the respondents (50%) belonged to the age group of 36-60 years, followed by the group of 18-35 years (41%). According to the survey the vast majority of respondents consider that the Cochrane Russian translations project is needed for Russia and Russian speaking countries (94%; n = 106), it is needed for their work, studies, and life in general (91%; n = 103). Nobody answered ""No"" to the question: ""Do you think that this project is needed for Russia and Russian-speaking countries?"" Information from the Cochrane evidence can affect (change) individual practice and/or attitude to drugs or diagnostic procedures of 87% (n = 98) of respondents. Only two people answered negatively to this question. However, only one third of respondents would like to become volunteer members of the translations project. The Russian texts of translations of Cochrane summaries and their main message were completely understandable or mostly clear to the vast majority of respondents (92%; n = 104). Respondents, proficient in English (n = 61), answered that the Russian-language translations fully complied (43%; n = 26) or in general corresponded to (57%; n = 35) the original English text. The majority of respondents (85%, n = 96) rated the quality of the translated texts as excellent and good. ""More than half of respondents (61%; n = 69) would prefer the translations to be adapted to the usual style of presentation in Russian. The respondents agreed that mistakes, or typos or both very few. Our respondents provided valuable suggestions for further improvement of the Russian translations project. We would like to present here some of these: ""More translations needed"", ""The ultimate goal... is to try to adapt the summaries to Russian language style as much as possible. This is a very challenging task, however and at present format the summaries are already great"", ""Go great as you do!"" ""Move forward and be efficient!"" ""Distribute information about the project through social networks and different means of social media"", ""Studying Cochrane Database should be included in the Russian medical school's curriculum at a much larger extent than it is included (if at all) now. It would be beneficial for high school students as well.""

Conclusions

The survey provided positive feedback on the Russian translations project concerning the clarity and quality of Russian texts and overall satisfaction of the readers. It confirmed the importance and relevance of the Russian translations project for Russian speaking audience, representing various professions and age groups. The survey results with detailed feedback contribute to further improvement of the Russian translations project.

Limitations

Selective and subjective evaluation of translations by the respondents, difficulties with clear criteria for the objective evaluation. Further quality improvement of original PLS texts would contribute to higher translation quality.

Acknowledgments

We would like to thank Juliane Reed, Coordinator of the Cochrane Translations Project, Professor Peter C Gøtzsche, Director of the Cochrane Nordic, co-founder of the Cochrane Collaboration, Cochrane leadership and the global Cochrane network together with the leadership of the Kazan Federal University for continuous encouragement, spirit and support.",2015-01-01 +25505810,Effects of context and word class on lexical retrieval in Chinese speakers with anomic aphasia.,"

Background

Differences in processing nouns and verbs have been investigated intensely in psycholinguistics and neuropsychology in past decades. However, the majority of studies examining retrieval of these word classes have involved tasks of single word stimuli or responses. While the results have provided rich information for addressing issues about grammatical class distinctions, it is unclear whether they have adequate ecological validity for understanding lexical retrieval in connected speech which characterizes daily verbal communication. Previous investigations comparing retrieval of nouns and verbs in single word production and connected speech have reported either discrepant performance between the two contexts with presence of word class dissociation in picture naming but absence in connected speech, or null effects of word class. In addition, word finding difficulties have been found to be less severe in connected speech than picture naming. However, these studies have failed to match target stimuli of the two word classes and between tasks on psycholinguistic variables known to affect performance in response latency and/or accuracy.

Aims

The present study compared lexical retrieval of nouns and verbs in picture naming and connected speech from picture description, procedural description, and story-telling among 19 Chinese speakers with anomic aphasia and their age, gender, and education matched healthy controls, to understand the influence of grammatical class on word production across speech contexts when target items were balanced for confounding variables between word classes and tasks.

Methods & procedures

Elicitation of responses followed the protocol of the AphasiaBank consortium (http://talkbank.org/AphasiaBank/). Target words for confrontation naming were based on well-established naming tests, while those for narrative were drawn from a large database of normal speakers. Selected nouns and verbs in the two contexts were matched for age-of-acquisition (AoA) and familiarity. Influence of imageability was removed through statistical control.

Outcomes & results

When AoA and familiarity were balanced, nouns were retrieved better than verbs, and performance was higher in picture naming than connected speech. When imageability was further controlled for, only the effect of task remained significant.

Conclusions

The absence of word class effects when confounding variables are controlled for is similar to many previous reports; however, the pattern of better word retrieval in naming is rare but compatible with the account that processing demands are higher in narrative than naming. The overall findings have strongly suggested the importance of including connected speech tasks in any language assessment and evaluation of language rehabilitation of individuals with aphasia.",2015-01-01 +25555720,"Improved methods for classification, prediction, and design of antimicrobial peptides.","Peptides with diverse amino acid sequences, structures, and functions are essential players in biological systems. The construction of well-annotated databases not only facilitates effective information management, search, and mining but also lays the foundation for developing and testing new peptide algorithms and machines. The antimicrobial peptide database (APD) is an original construction in terms of both database design and peptide entries. The host defense antimicrobial peptides (AMPs) registered in the APD cover the five kingdoms (bacteria, protists, fungi, plants, and animals) or three domains of life (bacteria, archaea, and eukaryota). This comprehensive database ( http://aps.unmc.edu/AP ) provides useful information on peptide discovery timeline, nomenclature, classification, glossary, calculation tools, and statistics. The APD enables effective search, prediction, and design of peptides with antibacterial, antiviral, antifungal, antiparasitic, insecticidal, spermicidal, anticancer activities, chemotactic, immune modulation, or antioxidative properties. A universal classification scheme is proposed herein to unify innate immunity peptides from a variety of biological sources. As an improvement, the upgraded APD makes predictions based on the database-defined parameter space and provides a list of the sequences most similar to natural AMPs. In addition, the powerful pipeline design of the database search engine laid a solid basis for designing novel antimicrobials to combat resistant superbugs, viruses, fungi, or parasites. This comprehensive AMP database is a useful tool for both research and education.",2015-01-01 +26226151,ElemeNT: a computational tool for detecting core promoter elements.,"Core promoter elements play a pivotal role in the transcriptional output, yet they are often detected manually within sequences of interest. Here, we present 2 contributions to the detection and curation of core promoter elements within given sequences. First, the Elements Navigation Tool (ElemeNT) is a user-friendly web-based, interactive tool for prediction and display of putative core promoter elements and their biologically-relevant combinations. Second, the CORE database summarizes ElemeNT-predicted core promoter elements near CAGE and RNA-seq-defined Drosophila melanogaster transcription start sites (TSSs). ElemeNT's predictions are based on biologically-functional core promoter elements, and can be used to infer core promoter compositions. ElemeNT does not assume prior knowledge of the actual TSS position, and can therefore assist in annotation of any given sequence. These resources, freely accessible at http://lifefaculty.biu.ac.il/gershon-tamar/index.php/resources, facilitate the identification of core promoter elements as active contributors to gene expression.",2015-01-01 +24685258,SPiCE: a web-based tool for sequence-based protein classification and exploration.,"

Background

Amino acid sequences and features extracted from such sequences have been used to predict many protein properties, such as subcellular localization or solubility, using classifier algorithms. Although software tools are available for both feature extraction and classifier construction, their application is not straightforward, requiring users to install various packages and to convert data into different formats. This lack of easily accessible software hampers quick, explorative use of sequence-based classification techniques by biologists.

Results

We have developed the web-based software tool SPiCE for exploring sequence-based features of proteins in predefined classes. It offers data upload/download, sequence-based feature calculation, data visualization and protein classifier construction and testing in a single integrated, interactive environment. To illustrate its use, two example datasets are included showing the identification of differences in amino acid composition between proteins yielding low and high production levels in fungi and low and high expression levels in yeast, respectively.

Conclusions

SPiCE is an easy-to-use online tool for extracting and exploring sequence-based features of sets of proteins, allowing non-experts to apply advanced classification techniques. The tool is available at http://helix.ewi.tudelft.nl/spice.",2014-03-31 +26639684,Improving data retrieval quality: Evidence based medicine perspective.,"

Background

The actively developing approach in modern medicine is the approach focused on principles of evidence-based medicine. The assessment of quality and reliability of studies is needed. However, in some cases studies corresponding to the first level of evidence may contain errors in randomized control trials (RCTs). Solution of the problem is the Grading of Recommendations Assessment, Development and Evaluation (GRADE) system. Studies both in the fields of medicine and information retrieval are conducted for developing search engines for the MEDLINE database [1]; combined techniques for summarization and information retrieval targeted to solving problems of finding the best medication based on the levels of evidence are being developed [2].

Objective

Based on the relevance and demand for studies both in the field of medicine and information retrieval, it was decided to start the development of a search engine for the MEDLINE database search on the basis of the Saint-Petersburg State University with the support of Pavlov First Saint-Petersburg State Medical University and Tashkent Institute of Postgraduate Medical Education. Novelty and value of the proposed system are characterized by the use of ranking method of relevant abstracts. It is suggested that the system will be able to perform ranking based on studies level of evidence and to apply GRADE criteria for system evaluation.

Methods

The assigned task falls within the domain of information retrieval and machine learning. Based on the results of implementation from previous work [3], in which the main goal was to cluster abstracts from MEDLINE database by subtypes of medical interventions, a set of algorithms for clustering in this study was selected: K-means, K-means ++, EM from the sklearn (http://scikit-learn.org) and WEKA (http://www.cs.waikato.ac.nz/~ml/weka/) libraries, together with the methods of Latent Semantic Analysis (LSA) [4] choosing the first 210 facts and the model ""bag of words"" [5] to represent clustered documents. During the process of abstracts classification, few algorithms were tested including: Complement Naive Bayes [6], Sequential Minimal Optimization (SMO) [7] and non linear SVM from the WEKA library.

Results

The first step of this study was to markup abstracts of articles from the MEDLINE by containing and not containing a medical intervention. For this purpose, based on our previous work [8] a web-crawler was modified to perform the necessary markuping. The next step was to evaluate the clustering algorithms at the markup abstracts. As a result of clustering abstracts by two groups, when applying the LSA and choosing first 210 facts, the following results were obtained:1) K-means: Purity = 0,5598, Normalized Entropy = 0.5994;2)K-means ++: Purity = 0,6743, Normalized Entropy = 0.4996;3)EM: Purity = 0,5443, Normalized Entropy = 0.6344.When applying the model ""bag of words"":1)K-means: Purity = 0,5134, Normalized Entropy = 0.6254;2)K-means ++: Purity = 0,5645, Normalized Entropy = 0.5299;3)EM: Purity = 0,5247, Normalized Entropy = 0.6345.Then, studies which contain medical intervention have been considered and classified by the subtypes of medical interventions. At the process of classification abstracts by subtypes of medical interventions, abstracts were presented as a ""bag of words"" model with the removal of stop words.

The results

1)Complement Naive Bayes: macro F-measure = 0.6934, micro F-measure = 0.7234;2)Sequantial Minimal Optimization: macro F-measure = 0.6543, micro F-measure = 0.7042;3)Non linear SVM: macro F-measure = 0.6835, micro F-measure = 0.7642.

Conclusions

Based on the results of computational experiments, the best results of abstract clustering by containing and not containing medical intervention were obtained using the K-Means ++ algorithm together with LSA, choosing the first 210 facts. The quality of classification abstracts by subtypes of medical interventions value for existing ones [8] has been improved using non linear SVM algorithm, with ""bag of words"" model and the removal of stop words. The results of clustering obtained in this study will help in grouping abstracts by levels of evidence, using the classification by subtypes of medical interventions and it will be possible to extract information from the abstracts on specific types of interventions.",2015-01-01 +26030361,"A user-friendly phytoremediation database: creating the searchable database, the users, and the broader implications.","Designers, students, teachers, gardeners, farmers, landscape architects, architects, engineers, homeowners, and others have uses for the practice of phytoremediation. This research looks at the creation of a phytoremediation database which is designed for ease of use for a non-scientific user, as well as for students in an educational setting ( http://www.steviefamulari.net/phytoremediation ). During 2012, Environmental Artist & Professor of Landscape Architecture Stevie Famulari, with assistance from Kyla Witz, a landscape architecture student, created an online searchable database designed for high public accessibility. The database is a record of research of plant species that aid in the uptake of contaminants, including metals, organic materials, biodiesels & oils, and radionuclides. The database consists of multiple interconnected indexes categorized into common and scientific plant name, contaminant name, and contaminant type. It includes photographs, hardiness zones, specific plant qualities, full citations to the original research, and other relevant information intended to aid those designing with phytoremediation search for potential plants which may be used to address their site's need. The objective of the terminology section is to remove uncertainty for more inexperienced users, and to clarify terms for a more user-friendly experience. Implications of the work, including education and ease of browsing, as well as use of the database in teaching, are discussed.",2015-01-01 +25338716,DANN: a deep learning approach for annotating the pathogenicity of genetic variants.,"

Unlabelled

Annotating genetic variants, especially non-coding variants, for the purpose of identifying pathogenic variants remains a challenge. Combined annotation-dependent depletion (CADD) is an algorithm designed to annotate both coding and non-coding variants, and has been shown to outperform other annotation algorithms. CADD trains a linear kernel support vector machine (SVM) to differentiate evolutionarily derived, likely benign, alleles from simulated, likely deleterious, variants. However, SVMs cannot capture non-linear relationships among the features, which can limit performance. To address this issue, we have developed DANN. DANN uses the same feature set and training data as CADD to train a deep neural network (DNN). DNNs can capture non-linear relationships among features and are better suited than SVMs for problems with a large number of samples and features. We exploit Compute Unified Device Architecture-compatible graphics processing units and deep learning techniques such as dropout and momentum training to accelerate the DNN training. DANN achieves about a 19% relative reduction in the error rate and about a 14% relative increase in the area under the curve (AUC) metric over CADD's SVM methodology.

Availability and implementation

All data and source code are available at https://cbcl.ics.uci.edu/public_data/DANN/.",2014-10-22 +22509333,PolysacDB: a database of microbial polysaccharide antigens and their antibodies.,"Vaccines based on microbial cell surface polysaccharides have long been considered as attractive means to control infectious diseases. To realize this goal, detailed systematic information about the antigenic polysaccharide is necessary. However, only a few databases that provide limited knowledge in this area are available. This paper describes PolysacDB, a manually curated database of antigenic polysaccharides. We collected and compiled comprehensive information from literature and web resources about antigenic polysaccharides of microbial origin. The current version of the database has 1,554 entries of 149 different antigenic polysaccharides from 347 different microbes. Each entry provides comprehensive information about an antigenic polysaccharide, i.e., its origin, function, protocols for its conjugation to carriers, antibodies produced, details of assay systems, specificities of antibodies, proposed epitopes involved and antibody utilities. For convenience to the user, we have integrated web interface for searching, advanced searching and browsing data in database. This database will be useful for researchers working on polysaccharide-based vaccines. It is freely available from the URL: http://crdd.osdd.net/raghava/polysacdb/.",2012-04-11 +23327649,Kerfuffle: a web tool for multi-species gene colocalization analysis.,"

Background

The evolutionary pressures that underlie the large-scale functional organization of the genome are not well understood in eukaryotes. Recent evidence suggests that functionally similar genes may colocalize (cluster) in the eukaryotic genome, suggesting the role of chromatin-level gene regulation in shaping the physical distribution of coordinated genes. However, few of the bioinformatic tools currently available allow for a systematic study of gene colocalization across several, evolutionarily distant species. Furthermore, most tools require the user to input manually curated lists of gene position information, DNA sequence or gene homology relations between species. With the growing number of sequenced genomes, there is a need to provide new comparative genomics tools that can address the analysis of multi-species gene colocalization.

Results

Kerfuffle is a web tool designed to help discover, visualize, and quantify the physical organization of genomes by identifying significant gene colocalization and conservation across the assembled genomes of available species (currently up to 47, from humans to worms). Kerfuffle only requires the user to specify a list of human genes and the names of other species of interest. Without further input from the user, the software queries the e!Ensembl BioMart server to obtain positional information and discovers homology relations in all genes and species specified. Using this information, Kerfuffle performs a multi-species clustering analysis, presents downloadable lists of clustered genes, performs Monte Carlo statistical significance calculations, estimates how conserved gene clusters are across species, plots histograms and interactive graphs, allows users to save their queries, and generates a downloadable visualization of the clusters using the Circos software. These analyses may be used to further explore the functional roles of gene clusters by interrogating the enriched molecular pathways associated with each cluster.

Conclusions

Kerfuffle is a new, easy-to-use and publicly available tool to aid our understanding of functional genomics and comparative genomics. This software allows for flexibility and quick investigations of a user-defined set of genes, and the results may be saved online for further analysis. Kerfuffle is freely available at http://atwallab.org/kerfuffle, is implemented in JavaScript (using jQuery and jsCharts libraries) and PHP 5.2, runs on an Apache server, and stores data in flat files and an SQLite database.",2013-01-17 +25524787,Corrected QT changes during antipsychotic treatment of children and adolescents: a systematic review and meta-analysis of clinical trials.,"

Objective

To evaluate the effect of antipsychotics on the corrected QT (QTc) interval in youth.

Method

We searched PubMed (http://www.ncbi.nlm.nih.gov/pubmed) for randomized or open clinical trials of antipsychotics in youth <18 years with QTc data, meta-analyzing the results. Meta-regression analyses evaluated the effect of age, sex, dose, and study duration on QTc. Incidences of study-defined QTc prolongation (>440-470 milliseconds), QTc >500 milliseconds, and QTc change >60 milliseconds were also evaluated.

Results

A total of 55 studies were meta-analyzed, evaluating 108 treatment arms covering 9 antipsychotics and including 5,423 patients with QTc data (mean age = 12.8 ± 3.6 years, female = 32.1%). Treatments included aripiprazole: studies = 14; n = 814; haloperidol: studies = 1; n = 15; molindone: studies = 3; n = 125; olanzapine: studies = 5; n = 212; paliperidone: studies = 3; n = 177; pimozide: studies = 1; n = 25; quetiapine: studies = 5; n = 336; risperidone: studies = 23; n = 2,234; ziprasidone: studies = 10, n = 523; and placebo: studies = 19, n = 962. Within group, from baseline to endpoint, aripiprazole significantly decreased the QTc interval (-1.44 milliseconds, CI = -2.63 to -0.26, p = .017), whereas risperidone (+1.68, CI = +0.67 to +2.70, p = .001) and especially ziprasidone (+8.74, CI = +5.19 to +12.30, p < .001) significantly increased QTc. Compared to pooled placebo arms, aripiprazole decreased QTc (p = .007), whereas ziprasidone increased QTc (p < .001). Compared to placebo, none of the investigated antipsychotics caused a significant increase in the incidence of the 3 studied QTc prolongation measures, but there was significant reporting bias.

Conclusion

Based on these data, the risk of pathological QTc prolongation seems low during treatment with the 9 studied antipsychotics in otherwise healthy youth. Nevertheless, because individual risk factors interact with medication-related QTc effects, both medication and patient factors need to be considered when choosing antipsychotic treatment.",2014-10-16 +27235434,Evaluation of Various Campylobacter-Specific Quantitative PCR (qPCR) Assays for Detection and Enumeration of Campylobacteraceae in Irrigation Water and Wastewater via a Miniaturized Most-Probable-Number-qPCR Assay.,"

Unlabelled

Campylobacter spp. are the leading cause of bacterial gastroenteritis worldwide, and water is increasingly seen as a risk factor in transmission. Here we describe a most-probable-number (MPN)-quantitative PCR (qPCR) assay in which water samples are centrifuged and aliquoted into microtiter plates and the bacteria are enumerated by qPCR. We observed that commonly used Campylobacter molecular assays produced vastly different detection rates. In irrigation water samples, detection rates varied depending upon the PCR assay and culture method used, as follows: 0% by the de Boer Lv1-16S qPCR assay, 2.5% by the Van Dyke 16S and Jensen glyA qPCR assays, and 75% by the Linton 16S endpoint PCR when cultured at 37°C. Primer/probe specificity was the major confounder, with Arcobacter spp. routinely yielding false-positive results. The primers and PCR conditions described by Van Dyke et al. (M. I. Van Dyke, V. K. Morton, N. L. McLellan, and P. M. Huck, J Appl Microbiol 109:1053-1066, 2010, http://dx.doi.org/10.1111/j.1365-2672.2010.04730.x) proved to be the most sensitive and specific for Campylobacter detection in water. Campylobacter occurrence in irrigation water was found to be very low (<2 MPN/300 ml) when this Campylobacter-specific qPCR was used, with the most commonly detected species being C. jejuni, C. coli, and C. lari Campylobacters in raw sewage were present at ∼10(2)/100 ml, with incubation at 42°C required for reducing microbial growth competition from arcobacters. Overall, when Campylobacter prevalence and/or concentration in water is reported using molecular methods, considerable validation is recommended when adapting methods largely developed for clinical applications. Furthermore, combining MPN methods with molecular biology-based detection algorithms allows for the detection and quantification of Campylobacter spp. in environmental samples and is potentially suited to quantitative microbial risk assessment for improved public health disease prevention related to food and water exposures.

Importance

The results of this study demonstrate the importance of assay validation upon data interpretation of environmental monitoring for Campylobacter when using molecular biology-based assays. Previous studies describing Campylobacter prevalence in Canada utilized primers that we have determined to be nonspecific due to their cross-amplification of Arcobacter spp. As such, Campylobacter prevalence may have been vastly overestimated in other studies. Additionally, the development of a quantitative assay described in this study will allow accurate determination of Campylobacter concentrations in environmental water samples, allowing more informed decisions to be made about water usage based on quantitative microbial risk assessment.",2016-07-15 +25855939,"Extreme Precipitation and Emergency Room Visits for Gastrointestinal Illness in Areas with and without Combined Sewer Systems: An Analysis of Massachusetts Data, 2003-2007.","

Background

Combined sewer overflows (CSOs) occur in combined sewer systems when sewage and stormwater runoff are released into water bodies, potentially contaminating water sources. CSOs are often caused by heavy precipitation and are expected to increase with increasing extreme precipitation associated with climate change.

Objectives

The aim of this study was to assess whether the association between heavy rainfall and rate of emergency room (ER) visits for gastrointestinal (GI) illness differed in the presence of CSOs.

Methods

For the study period 2003-2007, time series of daily rate of ER visits for GI illness and meteorological data were organized for three exposure regions: a) CSOs impacting drinking water sources, b) CSOs impacting recreational waters, c) no CSOs. A distributed lag Poisson regression assessed cumulative effects for an 8-day lag period following heavy (≥ 90th and ≥ 95th percentile) and extreme (≥ 99th percentile) precipitation events, controlling for temperature and long-term time trends.

Results

The association between extreme rainfall and rate of ER visits for GI illness differed among regions. Only the region with drinking water exposed to CSOs demonstrated a significant increased cumulative risk for rate (CRR) of ER visits for GI for all ages in the 8-day period following extreme rainfall: CRR: 1.13 (95% CI: 1.00, 1.28) compared with no rainfall.

Conclusions

The rate of ER visits for GI illness was associated with extreme precipitation in the area with CSO discharges to a drinking water source. Our findings suggest an increased risk for GI illness among consumers whose drinking water source may be impacted by CSOs after extreme precipitation.

Citation

Jagai JS, Li Q, Wang S, Messier KP, Wade TJ, Hilborn ED. 2015. Extreme precipitation and emergency room visits for gastrointestinal illness in areas with and without combined sewer systems: an analysis of Massachusetts data, 2003-2007. Environ Health Perspect 123:873-879; http://dx.doi.org/10.1289/ehp.1408971.",2015-04-09 +25108785,Prediction of Kunitz ion channel effectors and protease inhibitors from the Ixodes ricinus sialome.,"In the next generation sequencing era we are encountering hundreds of thousands of sequences from specific organisms. Such massive data must be accurately classified both functionally and structurally. Determining appropriate sequences with a specific function from next generation sequencing, however, is a daunting experimental task. A recent salivary gland transcriptome from the hard tick Ixodes ricinus, a European disease vector, has been made publicly available. Among the protein families sequenced by the salivary gland transcriptome of I. ricinus, the Kunitz-domain is one of the highly represented protein families. Thus far, recent tick transciptomes solely classify (computationally) Kunitz sequences as putative serine protease inhibitors. We present here a novel method using a machine-learning algorithm to ""fish"" for candidate ion-channel effectors and loss of serine protease inhibitor function within the Kunitz-domain protein family of the I. ricinus salivary gland transcriptome. The models, data and scripts used in this work are available online from http://life.bsc.es/pid/web/imoal/kunitz-classification.html.",2014-07-29 +25048627,Clinical prediction from structural brain MRI scans: a large-scale empirical study.,"Multivariate pattern analysis (MVPA) methods have become an important tool in neuroimaging, revealing complex associations and yielding powerful prediction models. Despite methodological developments and novel application domains, there has been little effort to compile benchmark results that researchers can reference and compare against. This study takes a significant step in this direction. We employed three classes of state-of-the-art MVPA algorithms and common types of structural measurements from brain Magnetic Resonance Imaging (MRI) scans to predict an array of clinically relevant variables (diagnosis of Alzheimer's, schizophrenia, autism, and attention deficit and hyperactivity disorder; age, cerebrospinal fluid derived amyloid-β levels and mini-mental state exam score). We analyzed data from over 2,800 subjects, compiled from six publicly available datasets. The employed data and computational tools are freely distributed ( https://www.nmr.mgh.harvard.edu/lab/mripredict), making this the largest, most comprehensive, reproducible benchmark image-based prediction experiment to date in structural neuroimaging. Finally, we make several observations regarding the factors that influence prediction performance and point to future research directions. Unsurprisingly, our results suggest that the biological footprint (effect size) has a dramatic influence on prediction performance. Though the choice of image measurement and MVPA algorithm can impact the result, there was no universally optimal selection. Intriguingly, the choice of algorithm seemed to be less critical than the choice of measurement type. Finally, our results showed that cross-validation estimates of performance, while generally optimistic, correlate well with generalization accuracy on a new dataset.",2015-01-01 +23082758,Structural basis of peptide recognition by the angiotensin-1 converting enzyme homologue AnCE from Drosophila melanogaster.,"Human somatic angiotensin-1 converting enzyme (ACE) is a zinc-dependent exopeptidase, that catalyses the conversion of the decapeptide angiotensin I to the octapeptide angiotensin II, by removing a C-terminal dipeptide. It is the principal component of the renin-angiotensin-aldosterone system that regulates blood pressure. Hence it is an important therapeutic target for the treatment of hypertension and cardiovascular disorders. Here, we report the structures of an ACE homologue from Drosophila melanogaster (AnCE; a proven structural model for the more complex human ACE) co-crystallized with mammalian peptide substrates (bradykinin, Thr(6) -bradykinin, angiotensin I and a snake venom peptide inhibitor, bradykinin-potentiating peptide-b). The structures determined at 2-Å resolution illustrate that both angiotensin II (the cleaved product of angiotensin I by AnCE) and bradykinin-potentiating peptide-b bind in an analogous fashion at the active site of AnCE, but also exhibit significant differences. In addition, the binding of Arg-Pro-Pro, the cleavage product of bradykinin and Thr(6) - bradykinin, provides additional detail of the general peptide binding in AnCE. Thus the new structures of AnCE complexes presented here improves our understanding of the binding of peptides and the mechanism by which peptides inhibit this family of enzymes.

Database

The atomic coordinates and structure factors for AnCE-Ang II (code 4AA1), AnCE-BPPb (code 4AA2), AnCE-BK (code 4ASQ) and AnCE-Thr6-BK (code 4ASR) complexes have been deposited in the Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers University, New Brunswick, NJ (http://www.rcsb.org/)

Structured digital abstract

• AnCE cleaves Ang I by enzymatic study (View interaction) • Bradykinin and AnCE bind by x-ray crystallography (View interaction) • BPP and AnCE bind by x-ray crystallography (View interaction) • AnCE cleaves Bradykinin by enzymatic study (View interaction) • Ang II and AnCE bind by x-ray crystallography (View interaction).",2012-11-22 +24931992,A statistical approach for inferring the 3D structure of the genome.,"

Motivation

Recent technological advances allow the measurement, in a single Hi-C experiment, of the frequencies of physical contacts among pairs of genomic loci at a genome-wide scale. The next challenge is to infer, from the resulting DNA-DNA contact maps, accurate 3D models of how chromosomes fold and fit into the nucleus. Many existing inference methods rely on multidimensional scaling (MDS), in which the pairwise distances of the inferred model are optimized to resemble pairwise distances derived directly from the contact counts. These approaches, however, often optimize a heuristic objective function and require strong assumptions about the biophysics of DNA to transform interaction frequencies to spatial distance, and thereby may lead to incorrect structure reconstruction.

Methods

We propose a novel approach to infer a consensus 3D structure of a genome from Hi-C data. The method incorporates a statistical model of the contact counts, assuming that the counts between two loci follow a Poisson distribution whose intensity decreases with the physical distances between the loci. The method can automatically adjust the transfer function relating the spatial distance to the Poisson intensity and infer a genome structure that best explains the observed data.

Results

We compare two variants of our Poisson method, with or without optimization of the transfer function, to four different MDS-based algorithms-two metric MDS methods using different stress functions, a non-metric version of MDS and ChromSDE, a recently described, advanced MDS method-on a wide range of simulated datasets. We demonstrate that the Poisson models reconstruct better structures than all MDS-based methods, particularly at low coverage and high resolution, and we highlight the importance of optimizing the transfer function. On publicly available Hi-C data from mouse embryonic stem cells, we show that the Poisson methods lead to more reproducible structures than MDS-based methods when we use data generated using different restriction enzymes, and when we reconstruct structures at different resolutions.

Availability and implementation

A Python implementation of the proposed method is available at http://cbio.ensmp.fr/pastis.",2014-06-01 +24357408,Combining DGE and RNA-sequencing data to identify new polyA+ non-coding transcripts in the human genome.,"Recent sequencing technologies that allow massive parallel production of short reads are the method of choice for transcriptome analysis. Particularly, digital gene expression (DGE) technologies produce a large dynamic range of expression data by generating short tag signatures for each cell transcript. These tags can be mapped back to a reference genome to identify new transcribed regions that can be further covered by RNA-sequencing (RNA-Seq) reads. Here, we applied an integrated bioinformatics approach that combines DGE tags, RNA-Seq, tiling array expression data and species-comparison to explore new transcriptional regions and their specific biological features, particularly tissue expression or conservation. We analysed tags from a large DGE data set (designated as 'TranscriRef'). We then annotated 750,000 tags that were uniquely mapped to the human genome according to Ensembl. We retained transcripts originating from both DNA strands and categorized tags corresponding to protein-coding genes, antisense, intronic- or intergenic-transcribed regions and computed their overlap with annotated non-coding transcripts. Using this bioinformatics approach, we identified ∼34,000 novel transcribed regions located outside the boundaries of known protein-coding genes. As demonstrated using sequencing data from human pluripotent stem cells for biological validation, the method could be easily applied for the selection of tissue-specific candidate transcripts. DigitagCT is available at http://cractools.gforge.inria.fr/softwares/digitagct.",2013-12-18 +25610631,Macromolecular ab initio phasing enforcing secondary and tertiary structure.,"Ab initio phasing of macromolecular structures, from the native intensities alone with no experimental phase information or previous particular structural knowledge, has been the object of a long quest, limited by two main barriers: structure size and resolution of the data. Current approaches to extend the scope of ab initio phasing include use of the Patterson function, density modification and data extrapolation. The authors' approach relies on the combination of locating model fragments such as polyalanine α-helices with the program PHASER and density modification with the program SHELXE. Given the difficulties in discriminating correct small substructures, many putative groups of fragments have to be tested in parallel; thus calculations are performed in a grid or supercomputer. The method has been named after the Italian painter Arcimboldo, who used to compose portraits out of fruit and vegetables. With ARCIMBOLDO, most collections of fragments remain a 'still-life', but some are correct enough for density modification and main-chain tracing to reveal the protein's true portrait. Beyond α-helices, other fragments can be exploited in an analogous way: libraries of helices with modelled side chains, β-strands, predictable fragments such as DNA-binding folds or fragments selected from distant homologues up to libraries of small local folds that are used to enforce nonspecific tertiary structure; thus restoring the ab initio nature of the method. Using these methods, a number of unknown macromolecules with a few thousand atoms and resolutions around 2 Å have been solved. In the 2014 release, use of the program has been simplified. The software mediates the use of massive computing to automate the grid access required in difficult cases but may also run on a single multicore workstation (http://chango.ibmb.csic.es/ARCIMBOLDO_LITE) to solve straightforward cases.",2015-01-01 +21524302,Simrank: Rapid and sensitive general-purpose k-mer search tool.,"

Background

Terabyte-scale collections of string-encoded data are expected from consortia efforts such as the Human Microbiome Project http://nihroadmap.nih.gov/hmp. Intra- and inter-project data similarity searches are enabled by rapid k-mer matching strategies. Software applications for sequence database partitioning, guide tree estimation, molecular classification and alignment acceleration have benefited from embedded k-mer searches as sub-routines. However, a rapid, general-purpose, open-source, flexible, stand-alone k-mer tool has not been available.

Results

Here we present a stand-alone utility, Simrank, which allows users to rapidly identify database strings the most similar to query strings. Performance testing of Simrank and related tools against DNA, RNA, protein and human-languages found Simrank 10X to 928X faster depending on the dataset.

Conclusions

Simrank provides molecular ecologists with a high-throughput, open source choice for comparing large sequence sets to find similarity.",2011-04-27 +23850384,Open data in public health surveillance systems: a case study using the French Sentinelles network.,"

Introduction

Public Health Surveillance (PHS) produces an increasing number of health indicators. Exposing these data is at the core of interoperability; however no standard has yet been adopted for such information on the internet.

Method

Here, we compared two approaches to expose data from the French Sentinelles network, an information system focusing on communicable diseases surveillance in the general population. We implemented SDMX-HD (Statistical Data and Metadata Exchange-Health Domain), a standard supported by government agencies to exchange statistical data and OpenData (OData), a general purpose protocol proposed by Microsoft Corp. The same data were described using SDMX-HD (available at http://sdmx.sentiweb.fr) and using OData (http://odata.sentiweb.fr).

Discussion

These two use cases proved the feasibility of opening public health data on the internet, and highlighted difficulties: SDMX, a full-featured solution, encouraged harmonization and reusability, sustainability, but required complex developments and tools; OData was much simpler to implement but required a ""from scratch"" description and did not encourage reusability. From an end-user perspective, integration in every-day tools is not achieved yet. These two approaches are a first step to interoperability in PHS.",2013-07-10 +24681903,Census 2: isobaric labeling data analysis.,"

Motivation

We introduce Census 2, an update of a mass spectrometry data analysis tool for peptide/protein quantification. New features for analysis of isobaric labeling, such as Tandem Mass Tag (TMT) or Isobaric Tags for Relative and Absolute Quantification (iTRAQ), have been added in this version, including a reporter ion impurity correction, a reporter ion intensity threshold filter and an option for weighted normalization to correct mixing errors. TMT/iTRAQ analysis can be performed on experiments using HCD (High Energy Collision Dissociation) only, CID (Collision Induced Dissociation)/HCD (High Energy Collision Dissociation) dual scans or HCD triple-stage mass spectrometry data. To improve measurement accuracy, we implemented weighted normalization, multiple tandem spectral approach, impurity correction and dynamic intensity threshold features.

Availability and implementation

Census 2 supports multiple input file formats including MS1/MS2, DTASelect, mzXML and pepXML. It requires JAVA version 6 or later to run. Free download of Census 2 for academic users is available at http://fields.scripps.edu/census/index.php.

Contact

jyates@scripps.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-03-28 +21435986,A potential causal association mining algorithm for screening adverse drug reactions in postmarketing surveillance.,"Early detection of unknown adverse drug reactions (ADRs) in postmarketing surveillance saves lives and prevents harmful consequences. We propose a novel data mining approach to signaling potential ADRs from electronic health databases. More specifically, we introduce potential causal association rules (PCARs) to represent the potential causal relationship between a drug and ICD-9 (CDC. (2010). International Classification of Diseases, Ninth Revision (ICD-9). [Online]. Available: http://www.cdc.gov/nchs/icd/icd9.html) coded signs or symptoms representing potential ADRs. Due to the infrequent nature of ADRs, the existing frequency-based data mining methods cannot effectively discover PCARs. We introduce a new interestingness measure, potential causal leverage, to quantify the degree of association of a PCAR. This measure is based on the computational, experience-based fuzzy recognition-primed decision (RPD) model that we developed previously (Y. Ji, R. M. Massanari, J. Ager, J. Yen, R. E. Miller, and H. Ying, ""A fuzzy logic-based computational recognition-primed decision model,"" Inf. Sci., vol. 177, pp. 4338-4353, 2007) on the basis of the well-known, psychology-originated qualitative RPD model (G. A. Klein, ""A recognition-primed decision making model of rapid decision making,"" in Decision Making in Action: Models and Methods, 1993, pp. 138-147). The potential causal leverage assesses the strength of the association of a drug-symptom pair given a collection of patient cases. To test our data mining approach, we retrieved electronic medical data for 16,206 patients treated by one or more than eight drugs of our interest at the Veterans Affairs Medical Center in Detroit between 2007 and 2009. We selected enalapril as the target drug for this ADR signal generation study. We used our algorithm to preliminarily evaluate the associations between enalapril and all the ICD-9 codes associated with it. The experimental results indicate that our approach has a potential to better signal potential ADRs than risk ratio and leverage, two traditional frequency-based measures. Among the top 50 signal pairs (i.e., enalapril versus symptoms) ranked by the potential causal-leverage measure, the physicians on the project determined that eight of them probably represent true causal associations.",2011-03-24 +25037738,Bis-class: a new classification tool of methylation status using bayes classifier and local methylation information.,"

Background

Whole genome sequencing of bisulfite converted DNA ('methylC-seq') method provides comprehensive information of DNA methylation. An important application of these whole genome methylation maps is classifying each position as a methylated versus non-methylated nucleotide. A widely used current method for this purpose, the so-called binomial method, is intuitive and straightforward, but lacks power when the sequence coverage and the genome-wide methylation level are low. These problems present a particular challenge when analyzing sparsely methylated genomes, such as those of many invertebrates and plants.

Results

We demonstrate that the number of sequence reads per position from methylC-seq data displays a large variance and can be modeled as a shifted negative binomial distribution. We also show that DNA methylation levels of adjacent CpG sites are correlated, and this similarity in local DNA methylation levels extends several kilobases. Taking these observations into account, we propose a new method based on Bayesian classification to infer DNA methylation status while considering the neighborhood DNA methylation levels of a specific site. We show that our approach has higher sensitivity and better classification performance than the binomial method via multiple analyses, including computational simulations, Area Under Curve (AUC) analyses, and improved consistencies across biological replicates. This method is especially advantageous in the analyses of sparsely methylated genomes with low coverage.

Conclusions

Our method improves the existing binomial method for binary methylation calls by utilizing a posterior odds framework and incorporating local methylation information. This method should be widely applicable to the analyses of methylC-seq data from diverse sparsely methylated genomes. Bis-Class and example data are provided at a dedicated website (http://bibs.snu.ac.kr/software/Bisclass).",2014-07-18 +25550042,Semantic similarity measurement between gene ontology terms based on exclusively inherited shared information.,"Quantifying the semantic similarities between pairs of terms in the Gene Ontology (GO) structure can help to explore the functional relationships between biological entities. A common approach to this problem is to measure the information they have in common based on the information content of their common ancestors. However, many studies have their limitations in measuring the information two GO terms share. This study presented a new measurement, exclusively inherited shared information (EISI) that captured the information shared by two terms based on an intuitive observation on the multiple inheritance relationships among the terms in the GO graph. EISI was derived from the information content of the exclusively inherited common ancestors (EICAs), which were screened from the common ancestors according to the attribute of their direct children. The effectiveness of EISI was evaluated against some state-of-the-art measurements on both artificial and real datasets, it produced more relevant results with experts' scores on the artificial dataset, and supported the prior knowledge of gene function in pathways on the Saccharomyces genome database (SGD). The promising features of EISI are the following: (1) it provides a more effective way to characterize the semantic relationship between two GO terms by taking into account multiple common ancestors related, and (2) can quickly detect all EICAs with time complexity of O(n), which is much more efficient than other methods based on disjunctive common ancestors. It is a promising alternative to multiple inheritance based methods for practical applications on large-scale dataset. The algorithm EISI was implemented in Matlab and is freely available from http://treaton.evai.pl/EISI/.",2014-12-28 +26001950,Early life factors associated with the exclusivity and duration of breast feeding in an Irish birth cohort study.,"

Objective

to investigate the influence of parental and infant characteristics on exclusive breast feeding from birth to six months of age and breast feeding rates at two, six and 12 months of age in Ireland.

Methodology

secondary data analysis from the Cork BASELINE Birth Cohort Study (http://www.baselinestudy.net/). Infants were seen at birth and two, six, and 12 months of age. Maternal and paternal history, neonatal course and feeding data were collected at birth and using parental questionnaires at each time point.

Participants

1094 singleton infants of primiparous women recruited at 20 weeks' gestation who were breastfeeding on discharge from the maternity hospital.

Findings

at discharge from the maternity hospital and at two months, neonatal intensive-care unit admission had the strongest influence on exclusive breast feeding status (adjusted OR 0.17, 95% CI 0.07-0.41 at discharge) and at two months (adjusted OR=0.20, 95% CI 0.05-0.83). A shorter duration of breast feeding was significantly associated with younger maternal age, non-tertiary education, Irish nationality and neonatal intensive-care unit admission. There was a significant difference in the duration of any breast feeding between infants who were and were not admitted to the neonatal intensive-care unit, 28(10.50, 32) weeks versus 32(27, 40) weeks. Mothers whose maternity leave was between seven and 12 months (adjusted OR=2.76, 95% CI 1.51-5.05) breast fed for a longer duration compared to mothers who had less than six months of maternity leave.

Key conclusions

admission to the neonatal intensive care unit negatively influenced both exclusivity and duration of breast feeding. Length of maternity leave, and not employment status, was significantly associated with duration of breast feeding.

Implications for practice

additional support may be required to ensure continued breast feeding in infants admitted to the neonatal intensive-care unit. Length of maternity leave is a modifiable influence on breast feeding and offers the opportunity for intervention to improve our rates of breast feeding.",2015-05-05 +21586118,The Roche Cancer Genome Database 2.0.,"

Background

Cancer is a disease of genome alterations that arise through the acquisition of multiple somatic DNA sequence mutations. Some of these mutations can be critical for the development of a tumor and can be useful to characterize tumor types or predict outcome.

Description

We have constructed an integrated biological information system termed the Roche Cancer Genome Database (RCGDB) combining different human mutation databases already publicly available. This data is further extended by hand-curated information from publications.The current version of the RCGDB provides a user-friendly graphical interface that gives access to the data in different ways: (1) Single interactive search by genes, samples, cell lines, diseases, as well as pathways, (2) batch searches for genes and cell lines, (3) customized searches for regularly occurring requests, and (4) an advanced query interface enabling the user to query for samples and mutations by various filter criteria.

Conclusion

The interfaces of the presented database enable the user to search and view mutations in an intuitive and straight-forward manner. The database is freely accessible at http://rcgdb.bioinf.uni-sb.de/MutomeWeb/.",2011-05-17 +21575196,PRIN: a predicted rice interactome network.,"

Background

Protein-protein interactions play a fundamental role in elucidating the molecular mechanisms of biomolecular function, signal transductions and metabolic pathways of living organisms. Although high-throughput technologies such as yeast two-hybrid system and affinity purification followed by mass spectrometry are widely used in model organisms, the progress of protein-protein interactions detection in plants is rather slow. With this motivation, our work presents a computational approach to predict protein-protein interactions in Oryza sativa.

Results

To better understand the interactions of proteins in Oryza sativa, we have developed PRIN, a Predicted Rice Interactome Network. Protein-protein interaction data of PRIN are based on the interologs of six model organisms where large-scale protein-protein interaction experiments have been applied: yeast (Saccharomyces cerevisiae), worm (Caenorhabditis elegans), fruit fly (Drosophila melanogaster), human (Homo sapiens), Escherichia coli K12 and Arabidopsis thaliana. With certain quality controls, altogether we obtained 76,585 non-redundant rice protein interaction pairs among 5,049 rice proteins. Further analysis showed that the topology properties of predicted rice protein interaction network are more similar to yeast than to the other 5 organisms. This may not be surprising as the interologs based on yeast contribute nearly 74% of total interactions. In addition, GO annotation, subcellular localization information and gene expression data are also mapped to our network for validation. Finally, a user-friendly web interface was developed to offer convenient database search and network visualization.

Conclusions

PRIN is the first well annotated protein interaction database for the important model plant Oryza sativa. It has greatly extended the current available protein-protein interaction data of rice with a computational approach, which will certainly provide further insights into rice functional genomics and systems biology. PRIN is available online at http://bis.zju.edu.cn/prin/.",2011-05-16 +26353005,Zero-Aliasing Correlation Filters for Object Recognition.,"Correlation filters (CFs) are a class of classifiers that are attractive for object localization and tracking applications. Traditionally, CFs have been designed in the frequency domain using the discrete Fourier transform (DFT), where correlation is efficiently implemented. However, existing CF designs do not account for the fact that the multiplication of two DFTs in the frequency domain corresponds to a circular correlation in the time/spatial domain. Because this was previously unaccounted for, prior CF designs are not truly optimal, as their optimization criteria do not accurately quantify their optimization intention. In this paper, we introduce new zero-aliasing constraints that completely eliminate this aliasing problem by ensuring that the optimization criterion for a given CF corresponds to a linear correlation rather than a circular correlation. This means that previous CF designs can be significantly improved by this reformulation. We demonstrate the benefits of this new CF design approach with several important CFs. We present experimental results on diverse data sets and present solutions to the computational challenges associated with computing these CFs. Code for the CFs described in this paper and their respective zero-aliasing versions is available at http://vishnu.boddeti.net/projects/correlation-filters.html.",2015-08-01 +24260380,DAG expression: high-throughput gene expression analysis of real-time PCR data using standard curves for relative quantification.,"

Background

Real-time quantitative PCR (qPCR) is still the gold-standard technique for gene-expression quantification. Recent technological advances of this method allow for the high-throughput gene-expression analysis, without the limitations of sample space and reagent used. However, non-commercial and user-friendly software for the management and analysis of these data is not available.

Results

The recently developed commercial microarrays allow for the drawing of standard curves of multiple assays using the same n-fold diluted samples. Data Analysis Gene (DAG) Expression software has been developed to perform high-throughput gene-expression data analysis using standard curves for relative quantification and one or multiple reference genes for sample normalization. We discuss the application of DAG Expression in the analysis of data from an experiment performed with Fluidigm technology, in which 48 genes and 115 samples were measured. Furthermore, the quality of our analysis was tested and compared with other available methods.

Conclusions

DAG Expression is a freely available software that permits the automated analysis and visualization of high-throughput qPCR. A detailed manual and a demo-experiment are provided within the DAG Expression software at http://www.dagexpression.com/dage.zip.",2013-11-18 +24336804,DIVE: a data intensive visualization engine.,"

Summary

Modern scientific investigation is generating increasingly larger datasets, yet analyzing these data with current tools is challenging. DIVE is a software framework intended to facilitate big data analysis and reduce the time to scientific insight. Here, we present features of the framework and demonstrate DIVE's application to the Dynameomics project, looking specifically at two proteins.

Availability and implementation

Binaries and documentation are available at http://www.dynameomics.org/DIVE/DIVESetup.exe.",2013-12-13 +23160411,"Developing a biocuration workflow for AgBase, a non-model organism database.","AgBase provides annotation for agricultural gene products using the Gene Ontology (GO) and Plant Ontology, as appropriate. Unlike model organism species, agricultural species have a body of literature that does not just focus on gene function; to improve efficiency, we use text mining to identify literature for curation. The first component of our annotation interface is the gene prioritization interface that ranks gene products for annotation. Biocurators select the top-ranked gene and mark annotation for these genes as 'in progress' or 'completed'; links enable biocurators to move directly to our biocuration interface (BI). Our BI includes all current GO annotation for gene products and is the main interface to add/modify AgBase curation data. The BI also displays Extracting Genic Information from Text (eGIFT) results for each gene product. eGIFT is a web-based, text-mining tool that associates ranked, informative terms (iTerms) and the articles and sentences containing them, with genes. Moreover, iTerms are linked to GO terms, where they match either a GO term name or a synonym. This enables AgBase biocurators to rapidly identify literature for further curation based on possible GO terms. Because most agricultural species do not have standardized literature, eGIFT searches all gene names and synonyms to associate articles with genes. As many of the gene names can be ambiguous, eGIFT applies a disambiguation step to remove matches that do not correspond to this gene, and filtering is applied to remove abstracts that mention a gene in passing. The BI is linked to our Journal Database (JDB) where corresponding journal citations are stored. Just as importantly, biocurators also add to the JDB citations that have no GO annotation. The AgBase BI also supports bulk annotation upload to facilitate our Inferred from electronic annotation of agricultural gene products. All annotations must pass standard GO Consortium quality checking before release in AgBase. Database URL: http://www.agbase.msstate.edu/.",2012-11-17 +27074702,Exposure to Greenness and Mortality in a Nationwide Prospective Cohort Study of Women.,"

Background

Green, natural environments may ameliorate adverse environmental exposures (e.g., air pollution, noise, and extreme heat), increase physical activity and social engagement, and lower stress.

Objectives

We aimed to examine the prospective association between residential greenness and mortality.

Methods

Using data from the U.S.-based Nurses' Health Study prospective cohort, we defined cumulative average time-varying seasonal greenness surrounding each participant's address using satellite imagery [Normalized Difference Vegetation Index (NDVI)]. We followed 108,630 women and observed 8,604 deaths between 2000 and 2008.

Results

In models adjusted for mortality risk factors (age, race/ethnicity, smoking, and individual- and area-level socioeconomic status), women living in the highest quintile of cumulative average greenness (accounting for changes in residence during follow-up) in the 250-m area around their home had a 12% lower rate of all-cause nonaccidental mortality [95% confidence interval (CI); 0.82, 0.94] than those in the lowest quintile. The results were consistent for the 1,250-m area, although the relationship was slightly attenuated. These associations were strongest for respiratory and cancer mortality. The findings from a mediation analysis suggested that the association between greenness and mortality may be at least partly mediated by physical activity, particulate matter < 2.5 μm, social engagement, and depression.

Conclusions

Higher levels of green vegetation were associated with decreased mortality. Policies to increase vegetation may provide opportunities for physical activity, reduce harmful exposures, increase social engagement, and improve mental health. Planting vegetation may mitigate the effects of climate change; in addition, evidence of an association between vegetation and lower mortality rates suggests it also might be used to improve health.

Citation

James P, Hart JE, Banay RF, Laden F. 2016. Exposure to greenness and mortality in a nationwide prospective cohort study of women. Environ Health Perspect 124:1344-1352; http://dx.doi.org/10.1289/ehp.1510363.",2016-04-14 +22693222,BioMe: biologically relevant metals.,"In this article, we introduce BioMe (biologically relevant metals), a web-based platform for calculation of various statistical properties of metal-binding sites. Users can obtain the following statistical properties: presence of selected ligands in metal coordination sphere, distribution of coordination numbers, percentage of metal ions coordinated by the combination of selected ligands, distribution of monodentate and bidentate metal-carboxyl, bindings for ASP and GLU, percentage of particular binuclear metal centers, distribution of coordination geometry, descriptive statistics for a metal ion-donor distance and percentage of the selected metal ions coordinated by each of the selected ligands. Statistics is presented in numerical and graphical forms. The underlying database contains information about all contacts within the range of 3 Å from a metal ion found in the asymmetric crystal unit. The stored information for each metal ion includes Protein Data Bank code, structure determination method, types of metal-binding chains [protein, ribonucleic acid (RNA), deoxyribonucleic acid (DNA), water and other] and names of the bounded ligands (amino acid residue, RNA nucleotide, DNA nucleotide, water and other) and the coordination number, the coordination geometry and, if applicable, another metal(s). BioMe is on a regular weekly update schedule. It is accessible at http://metals.zesoi.fer.hr.",2012-06-12 +23802613,Rainbow: a tool for large-scale whole-genome sequencing data analysis using cloud computing.,"

Background

Technical improvements have decreased sequencing costs and, as a result, the size and number of genomic datasets have increased rapidly. Because of the lower cost, large amounts of sequence data are now being produced by small to midsize research groups. Crossbow is a software tool that can detect single nucleotide polymorphisms (SNPs) in whole-genome sequencing (WGS) data from a single subject; however, Crossbow has a number of limitations when applied to multiple subjects from large-scale WGS projects. The data storage and CPU resources that are required for large-scale whole genome sequencing data analyses are too large for many core facilities and individual laboratories to provide. To help meet these challenges, we have developed Rainbow, a cloud-based software package that can assist in the automation of large-scale WGS data analyses.

Results

Here, we evaluated the performance of Rainbow by analyzing 44 different whole-genome-sequenced subjects. Rainbow has the capacity to process genomic data from more than 500 subjects in two weeks using cloud computing provided by the Amazon Web Service. The time includes the import and export of the data using Amazon Import/Export service. The average cost of processing a single sample in the cloud was less than 120 US dollars. Compared with Crossbow, the main improvements incorporated into Rainbow include the ability: (1) to handle BAM as well as FASTQ input files; (2) to split large sequence files for better load balance downstream; (3) to log the running metrics in data processing and monitoring multiple Amazon Elastic Compute Cloud (EC2) instances; and (4) to merge SOAPsnp outputs for multiple individuals into a single file to facilitate downstream genome-wide association studies.

Conclusions

Rainbow is a scalable, cost-effective, and open-source tool for large-scale WGS data analysis. For human WGS data sequenced by either the Illumina HiSeq 2000 or HiSeq 2500 platforms, Rainbow can be used straight out of the box. Rainbow is available for third-party implementation and use, and can be downloaded from http://s3.amazonaws.com/jnj_rainbow/index.html.",2013-06-27 +24821734,Scalable Collaborative Infrastructure for a Learning Healthcare System (SCILHS): architecture.,"We describe the architecture of the Patient Centered Outcomes Research Institute (PCORI) funded Scalable Collaborative Infrastructure for a Learning Healthcare System (SCILHS, http://www.SCILHS.org) clinical data research network, which leverages the $48 billion dollar federal investment in health information technology (IT) to enable a queryable semantic data model across 10 health systems covering more than 8 million patients, plugging universally into the point of care, generating evidence and discovery, and thereby enabling clinician and patient participation in research during the patient encounter. Central to the success of SCILHS is development of innovative 'apps' to improve PCOR research methods and capacitate point of care functions such as consent, enrollment, randomization, and outreach for patient-reported outcomes. SCILHS adapts and extends an existing national research network formed on an advanced IT infrastructure built with open source, free, modular components.",2014-05-12 +25933359,Temporal Variation in Heat-Mortality Associations: A Multicountry Study.,"

Background

Recent investigations have reported a decline in the heat-related mortality risk during the last decades. However, these studies are frequently based on modeling approaches that do not fully characterize the complex temperature-mortality relationship, and are limited to single cities or countries.

Objectives

We assessed the temporal variation in heat-mortality associations in a multi-country data set using flexible modelling techniques.

Methods

We collected data for 272 locations in Australia, Canada, Japan, South Korea, Spain, the United Kingdom, and the United States, with a total 20,203,690 deaths occurring in summer months between 1985 and 2012. The analysis was based on two-stage time-series models. The temporal variation in heat-mortality relationships was estimated in each location with time-varying distributed lag nonlinear models, expressed through an interaction between the transformed temperature variables and time. The estimates were pooled by country through multivariate meta-analysis.

Results

Mortality risk due to heat appeared to decrease over time in several countries, with relative risks associated to high temperatures significantly lower in 2006 compared with 1993 in the United States, Japan, and Spain, and a nonsignificant decrease in Canada. Temporal changes are difficult to assess in Australia and South Korea due to low statistical power, and we found little evidence of variation in the United Kingdom. In the United States, the risk seems to be completely abated in 2006 for summer temperatures below their 99th percentile, but some significant excess persists for higher temperatures in all the countries.

Conclusions

We estimated a statistically significant decrease in the relative risk for heat-related mortality in 2006 compared with 1993 in the majority of countries included in the analysis.

Citation

Gasparrini A, Guo Y, Hashizume M, Kinney PL, Petkova EP, Lavigne E, Zanobetti A, Schwartz JD, Tobias A, Leone M, Tong S, Honda Y, Kim H, Armstrong BG. 2015. Temporal variation in heat-mortality associations: a multicountry study. Environ Health Perspect 123:1200-1207; http://dx.doi.org/10.1289/ehp.1409070.",2015-05-01 +22768229,MetaboSearch: tool for mass-based metabolite identification using multiple databases.,"

Unlabelled

Searching metabolites against databases according to their masses is often the first step in metabolite identification for a mass spectrometry-based untargeted metabolomics study. Major metabolite databases include Human Metabolome DataBase (HMDB), Madison Metabolomics Consortium Database (MMCD), Metlin, and LIPID MAPS. Since each one of these databases covers only a fraction of the metabolome, integration of the search results from these databases is expected to yield a more comprehensive coverage. However, the manual combination of multiple search results is generally difficult when identification of hundreds of metabolites is desired. We have implemented a web-based software tool that enables simultaneous mass-based search against the four major databases, and the integration of the results. In addition, more complete chemical identifier information for the metabolites is retrieved by cross-referencing multiple databases. The search results are merged based on IUPAC International Chemical Identifier (InChI) keys. Besides a simple list of m/z values, the software can accept the ion annotation information as input for enhanced metabolite identification. The performance of the software is demonstrated on mass spectrometry data acquired in both positive and negative ionization modes. Compared with search results from individual databases, MetaboSearch provides better coverage of the metabolome and more complete chemical identifier information.

Availability

The software tool is available at http://omics.georgetown.edu/MetaboSearch.html.",2012-06-29 +25566282,plantDARIO: web based quantitative and qualitative analysis of small RNA-seq data in plants.,"High-throughput sequencing techniques have made it possible to assay an organism's entire repertoire of small non-coding RNAs (ncRNAs) in an efficient and cost-effective manner. The moderate size of small RNA-seq datasets makes it feasible to provide free web services to the research community that provide many basic features of a small RNA-seq analysis, including quality control, read normalization, ncRNA quantification, and the prediction of putative novel ncRNAs. DARIO is one such system that so far has been focussed on animals. Here we introduce an extension of this system to plant short non-coding RNAs (sncRNAs). It includes major modifications to cope with plant-specific sncRNA processing. The current version of plantDARIO covers analyses of mapping files, small RNA-seq quality control, expression analyses of annotated sncRNAs, including the prediction of novel miRNAs and snoRNAs from unknown expressed loci and expression analyses of user-defined loci. At present Arabidopsis thaliana, Beta vulgaris, and Solanum lycopersicum are covered. The web tool links to a plant specific visualization browser to display the read distribution of the analyzed sample. The easy-to-use platform of plantDARIO quantifies RNA expression of annotated sncRNAs from different sncRNA databases together with new sncRNAs, annotated by our group. The plantDARIO website can be accessed at http://plantdario.bioinf.uni-leipzig.de/.",2014-12-23 +25540186,Knowledge-based modeling of peptides at protein interfaces: PiPreD.,"

Motivation

Protein-protein interactions (PPIs) underpin virtually all cellular processes both in health and disease. Modulating the interaction between proteins by means of small (chemical) agents is therefore a promising route for future novel therapeutic interventions. In this context, peptides are gaining momentum as emerging agents for the modulation of PPIs.

Results

We reported a novel computational, structure and knowledge-based approach to model orthosteric peptides to target PPIs: PiPreD. PiPreD relies on a precompiled and bespoken library of structural motifs, iMotifs, extracted from protein complexes and a fast structural modeling algorithm driven by the location of native chemical groups on the interface of the protein target named anchor residues. PiPreD comprehensive and systematically samples the entire interface deriving peptide conformations best suited for the given region on the protein interface. PiPreD complements the existing technologies and provides new solutions for the disruption of selected interactions.

Availability and implementation

Database and accessory scripts and programs are available upon request to the authors or at http://www.bioinsilico.org/PIPRED.

Contact

narcis.fernandez@gmail.com.",2014-12-23 +26549790,Prediction of amino acid positions specific for functional groups in a protein family based on local sequence similarity.,"The exchange of single amino acid residue in protein can substantially affect the specificity of molecular recognition. Many protein families can be divided into the groups based on specificity to recognized ligands. Prediction of group-discriminating residues within the certain family is extremely necessary for theoretical studies, enzyme engineering, drug design, and so on. The most existing methods use the multiple sequence alignment. They have the limitations in prediction accuracy due to the family sequence divergence and ligand-based grouping. We developed a new method SPrOS (Specificity Projection On Sequence) for estimating the specificity of residues to user-defined groups. SPrOS compares the sequence segments from the test protein and training proteins. Contrary to other segment-comparison approaches extracting the string motifs, SPrOS calculates the scores for single positions by the similarity of their surroundings. The method was evaluated on the simulated sequences and real protein families. The high-prediction accuracy was achieved for simulated sequences, in which SPrOS detected specific positions not predicted with the alignment-based method. For bacterial transcription factors (LacI/GalR) clearly divided into functional groups, the predicted specific residues corresponded to the published experimental data. In a more complicated case of protein kinases classified by inhibitor specificity, the positions predicted with high significance were located in ligand-binding areas. As the ligand specificity is not necessary coincided with phylogeny, evolutionary-coupled mutations could disturb the detection of ligand-specific residues. Excluding proximate homologs of the test protein kinase from the training set, we improved the prediction of the ligand-specific residues. The SPrOS is available at http://www.way2drug.com/spros/",2015-11-08 +24597989,p63 isoforms regulate metabolism of cancer stem cells.,"p63 is an important regulator of epithelial development expressed in different variants containing (TA) or lacking (ΔN) the N-terminal transactivation domain. The different isoforms regulate stem-cell renewal and differentiation as well as cell senescence. Several studies indicate that p63 isoforms also play a role in cancer development; however, very little is known about the role played by p63 in regulating the cancer stem phenotype. Here we investigate the cellular signals regulated by TAp63 and ΔNp63 in a model of epithelial cancer stem cells. To this end, we used colon cancer stem cells, overexpressing either TAp63 or ΔNp63 isoforms, to carry out a proteomic study by chemical-labeling approach coupled to network analysis. Our results indicate that p63 is implicated in a wide range of biological processes, including metabolism. This was further investigated by a targeted strategy at both protein and metabolite levels. The overall data show that TAp63 overexpressing cells are more glycolytic-active than ΔNp63 cells, indicating that the two isoforms may regulate the key steps of glycolysis in an opposite manner. The mass-spectrometry proteomics data of the study have been deposited to the ProteomeXchange Consortium ( http://proteomecentral.proteomexchange.org ) via the PRIDE partner repository with data set identifiers PXD000769 and PXD000768.",2014-03-19 +21693556,Inferring causative variants in microRNA target sites.,"MicroRNAs (miRNAs) regulate genes post transcription by pairing with messenger RNA (mRNA). Variants such as single nucleotide polymorphisms (SNPs) in miRNA regulatory regions might result in altered protein levels and disease. Genome-wide association studies (GWAS) aim at identifying genomic regions that contain variants associated with disease, but lack tools for finding causative variants. We present a computational tool that can help identifying SNPs associated with diseases, by focusing on SNPs affecting miRNA-regulation of genes. The tool predicts the effects of SNPs in miRNA target sites and uses linkage disequilibrium to map these miRNA-related variants to SNPs of interest in GWAS. We compared our predicted SNP effects in miRNA target sites with measured SNP effects from allelic imbalance sequencing. Our predictions fit measured effects better than effects based on differences in free energy or differences of TargetScan context scores. We also used our tool to analyse data from published breast cancer and Parkinson's disease GWAS and significant trait-associated SNPs from the NHGRI GWAS Catalog. A database of predicted SNP effects is available at http://www.bigr.medisin.ntnu.no/mirsnpscore/. The database is based on haplotype data from the CEU HapMap population and miRNAs from miRBase 16.0.",2011-06-21 +26496759,Development of a graphical user interface for sgRNAcas9 and its application.,"The CRISPR/Cas9 genome editing technique is a powerful tool for researchers. However, off-target effects of the Cas9 nuclease activity is a recurrent concern of the CRISPR system. Thus, designing sgRNA (single guide RNA) with minimal off-target effects is very important. sgRNAcas9 is a software package, which can be used to design sgRNA and to evaluate potential off-target cleavage sites. In this study, a graphical user interface for sgRNAcas9 was developed using the Java programming language. In addition, off-target effect for sgRNAs was evaluated according to mismatched number and ""seed sequence"" specification. Moreover, sgRNAcas9 software was used to design 34 124 sgRNAs, which can target 4691 microRNA (miRNA) precursors from human, mouse, rat, pig, and chicken. In particular, the off-target effect of a sgRNA targeting to human miR-206 precursor was analyzed, and the on/off-target activity of this sgRNA was validated by T7E1 assay in vitro. Taken together, these data showed that the interface can simplify the usage of the sgRNAcas9 program, which can be used to design sgRNAs for the majority of miRNA precursors. We also found that the GC% of those sgRNAs ranged from 40% to 60%. In summary, the sgRNAcas9 software can be easily used to design sgRNA with minimal off-target effects for any species. The software can be downloaded from BiooTools website (http://www.biootools.com/).",2015-10-01 +26146086,Semi-supervised Learning Predicts Approximately One Third of the Alternative Splicing Isoforms as Functional Proteins.,"Alternative splicing acts on transcripts from almost all human multi-exon genes. Notwithstanding its ubiquity, fundamental ramifications of splicing on protein expression remain unresolved. The number and identity of spliced transcripts that form stably folded proteins remain the sources of considerable debate, due largely to low coverage of experimental methods and the resulting absence of negative data. We circumvent this issue by developing a semi-supervised learning algorithm, positive unlabeled learning for splicing elucidation (PULSE; http://www.kimlab.org/software/pulse), which uses 48 features spanning various categories. We validated its accuracy on sets of bona fide protein isoforms and directly on mass spectrometry (MS) spectra for an overall AU-ROC of 0.85. We predict that around 32% of ""exon skipping"" alternative splicing events produce stable proteins, suggesting that the process engenders a significant number of previously uncharacterized proteins. We also provide insights into the distribution of positive isoforms in various functional classes and into the structural effects of alternative splicing.",2015-07-02 +24125644,MarinegenomicsDB: an integrated genome viewer for community-based annotation of genomes.,"We constructed a web-based genome annotation platform, MarinegenomicsDB, to integrate genome data from various marine organisms including the pearl oyster Pinctada fucata and the coral Acropora digitifera. This newly developed viewer application provides open access to published data and a user-friendly environment for community-based manual gene annotation. Development on a flexible framework enables easy expansion of the website on demand. To date, more than 2000 genes have been annotated using this system. In the future, the website will be expanded to host a wider variety of data, more species, and different types of genome-wide analyses. The website is available at the following URL: http://marinegenomics.oist.jp.",2013-10-01 +24642061,An R package to analyse LC/MS metabolomic data: MAIT (Metabolite Automatic Identification Toolkit).,"

Unlabelled

Current tools for liquid chromatography and mass spectrometry for metabolomic data cover a limited number of processing steps, whereas online tools are hard to use in a programmable fashion. This article introduces the Metabolite Automatic Identification Toolkit (MAIT) package, which makes it possible for users to perform metabolomic end-to-end liquid chromatography and mass spectrometry data analysis. MAIT is focused on improving the peak annotation stage and provides essential tools to validate statistical analysis results. MAIT generates output files with the statistical results, peak annotation and metabolite identification.

Availability and implementation

http://b2slab.upc.edu/software-and-downloads/metabolite-automatic-identification-toolkit/.",2014-03-17 +26628858,Characterizing Cancer-Specific Networks by Integrating TCGA Data.,"The Cancer Genome Atlas (TCGA) generates comprehensive genomic data for thousands of patients over more than 20 cancer types. TCGA data are typically whole-genome measurements of multiple genomic features, such as DNA copy numbers, DNA methylation, and gene expression, providing unique opportunities for investigating cancer mechanism from multiple molecular and regulatory layers. We propose a Bayesian graphical model to systemically integrate multi-platform TCGA data for inference of the interactions between different genomic features either within a gene or between multiple genes. The presence or absence of edges in the graph indicates the presence or absence of conditional dependence between genomic features. The inference is restricted to genes within a known biological network, but can be extended to any sets of genes. Applying the model to the same genes using patient samples in two different cancer types, we identify network components that are common as well as different between cancer types. The examples and codes are available at https://www.ma.utexas.edu/users/yxu/software.html.",2014-01-01 +25055913,De novo approach to classify protein-coding and noncoding transcripts based on sequence composition.,"Each day, more and more transcripts are being discovered along the genome (especially in poorly annotated species) thanks to the rapid progress of high-throughput technology such as RNA sequencing. However, this situation unravels the challenge of how to classify the newly identified transcripts into protein coding or noncoding. Here, we describe a de novo approach named coding-noncoding index (CNCI), a powerful signature tool by profiling adjoining nucleotide triplets (ANT) to effectively distinguish between protein-coding and noncoding sequences independently of known annotations. The main advantage of CNCI is its ability to accurately classify transcripts assembled from whole-transcriptome sequencing data in a cross-species manner, which allowed it to be used for all vertebrates and invertebrates based on the training data of well-annotated species (such as human and Arabidopsis). In this chapter, we illustrate the CNCI method in detail through an example of RNA-sequencing data generated from six biological replicates of six mouse tissues. CNCI software is available at http://www.bioinfo.org/software/cnci.",2014-01-01 +23467464,"How do German veterinarians use social networks? A study, using the example of the 'NOVICE' veterinary medicine network.","

Objective

NOVICE (Network Of Veterinary ICT in Education, http://www.noviceproject.eu/), is a professional online social network for veterinarians, lecturers and students of veterinary medicine as well as for e-Learning advisers and others working in establishments that teach veterinary medicine. This study sets out to investigate to what extent German veterinarians, lecturers, students of veterinary medicine and e-Learning representatives would accept a specialist network, what requirements would have to be met by an online social network, how to use web 2.0 tools [21], [30] and what advantages a specialist network could offer.

Methodology

The investigation was carried out by analysing data from the Elgg platform database as well as using Google Analytics. Annual focus group surveys and individual interviews were carried out in order to perform an analysis of acceptance among network users.

Results

1961 users from 73 different countries registered on the NOVICE site between 1 September 2010 and 21 March 2012. Germany represents the biggest user group, with 565 users (28.81%). During this period, most individual hits on the website came from Germany too. In total, 24.83% of all members are active, while 19.22% of German members participate actively. In terms of gender, there are significantly more female members than male members, both in the NOVICE network as a whole as well as in Germany. The most used web 2.0 tools are chat and email messaging services as well as writing wikis and contributing to forum discussions. The focus group surveys showed that respondents generally make use of other online communities too. Active members generally use more web 2.0 tools than in other networks, while passive members are generally more reluctant in all networks. All participants of the survey welcomed the idea of having a network specifically set up for the profession and believe that it could be very useful for veterinary medicine.

Conclusions

The network and its membership figures developed very positively during the assessed time period. Until now, the focus of the content of contributions in NOVICE (Network of Veterinary ICT in Education) has been on veterinary medicine teaching supported by e-Learning. An increase in the number of members would, however, be beneficial in order to further develop the network so that valuable exchange of information and informal learning can also take place in other specialist areas of veterinary medicine.",2013-02-21 +24813543,Universal dynamical properties preclude standard clustering in a large class of biochemical data.,"

Motivation

Clustering of chemical and biochemical data based on observed features is a central cognitive step in the analysis of chemical substances, in particular in combinatorial chemistry, or of complex biochemical reaction networks. Often, for reasons unknown to the researcher, this step produces disappointing results. Once the sources of the problem are known, improved clustering methods might revitalize the statistical approach of compound and reaction search and analysis. Here, we present a generic mechanism that may be at the origin of many clustering difficulties.

Results

The variety of dynamical behaviors that can be exhibited by complex biochemical reactions on variation of the system parameters are fundamental system fingerprints. In parameter space, shrimp-like or swallow-tail structures separate parameter sets that lead to stable periodic dynamical behavior from those leading to irregular behavior. We work out the genericity of this phenomenon and demonstrate novel examples for their occurrence in realistic models of biophysics. Although we elucidate the phenomenon by considering the emergence of periodicity in dependence on system parameters in a low-dimensional parameter space, the conclusions from our simple setting are shown to continue to be valid for features in a higher-dimensional feature space, as long as the feature-generating mechanism is not too extreme and the dimension of this space is not too high compared with the amount of available data.

Availability and implementation

For online versions of super-paramagnetic clustering see http://stoop.ini.uzh.ch/research/clustering.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-10 +22904610,The chordate proteome history database.,"The chordate proteome history database (http://ioda.univ-provence.fr) comprises some 20,000 evolutionary analyses of proteins from chordate species. Our main objective was to characterize and study the evolutionary histories of the chordate proteome, and in particular to detect genomic events and automatic functional searches. Firstly, phylogenetic analyses based on high quality multiple sequence alignments and a robust phylogenetic pipeline were performed for the whole protein and for each individual domain. Novel approaches were developed to identify orthologs/paralogs, and predict gene duplication/gain/loss events and the occurrence of new protein architectures (domain gains, losses and shuffling). These important genetic events were localized on the phylogenetic trees and on the genomic sequence. Secondly, the phylogenetic trees were enhanced by the creation of phylogroups, whereby groups of orthologous sequences created using OrthoMCL were corrected based on the phylogenetic trees; gene family size and gene gain/loss in a given lineage could be deduced from the phylogroups. For each ortholog group obtained from the phylogenetic or the phylogroup analysis, functional information and expression data can be retrieved. Database searches can be performed easily using biological objects: protein identifier, keyword or domain, but can also be based on events, eg, domain exchange events can be retrieved. To our knowledge, this is the first database that links group clustering, phylogeny and automatic functional searches along with the detection of important events occurring during genome evolution, such as the appearance of a new domain architecture.",2012-08-01 +25527097,CDvist: a webserver for identification and visualization of conserved domains in protein sequences.,"

Summary

Identification of domains in protein sequences allows their assigning to biological functions. Several webservers exist for identification of protein domains using similarity searches against various databases of protein domain models. However, none of them provides comprehensive domain coverage while allowing bulk querying and their visualization schemes can be improved. To address these issues, we developed CDvist (a comprehensive domain visualization tool), which combines the best available search algorithms and databases into a user-friendly framework. First, a given protein sequence is matched to domain models using high-specificity tools and only then unmatched segments are subjected to more sensitive algorithms resulting in a best possible comprehensive coverage. Bulk querying and rich visualization and download options provide improved functionality to domain architecture analysis.

Availability and implementation

Freely available on the web at http://cdvist.utk.edu

Contact

oadebali@vols.utk.edu or ijouline@utk.edu.",2014-12-18 +22155869,Identification and removal of ribosomal RNA sequences from metatranscriptomes.,"

Summary

Here, we present riboPicker, a robust framework for the rapid, automated identification and removal of ribosomal RNA sequences from metatranscriptomic datasets. The results can be exported for subsequent analysis, and the databases used for the web-based version are updated on a regular basis. riboPicker categorizes rRNA-like sequences and provides graphical visualizations and tabular outputs of ribosomal coverage, alignment results and taxonomic classifications.

Availability and implementation

This open-source application was implemented in Perl and can be used as stand-alone version or accessed online through a user-friendly web interface. The source code, user help and additional information is available at http://ribopicker.sourceforge.net/.

Contact

rschmied@sciences.sdsu.edu; rschmied@sciences.sdsu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-06 +24885641,Improving the sensitivity of sample clustering by leveraging gene co-expression networks in variable selection.,"

Background

Many variable selection techniques have been proposed for the clustering of gene expression data. While these methods tend to filter out irrelevant genes and identify informative genes that contribute to a clustering solution, they are based on criteria that do not consider the potential interactive influence among individual genes. Motivated by ensemble clustering, there is a strong interest in leveraging the structure of gene networks for gene selection, so that the relationship information between genes can be effectively utilized, while the selected genes are expected to preserve all the possible clustering structures in the data.

Results

We present a new filter method that uses the gene connectivity in the gene co-expression network as the evaluation criteria for variable selection. The gene connectivity measures the importance of the genes in term of their expression similarity with others in the co-expression network. The hard threshold and soft threshold transformations are employed to construct the gene co-expression networks. Both simulation studies and real data analysis have shown that the network based on soft thresholding is more effective in selecting relevant variables and provides better clustering results compared to the hard thresholding transformation and two other canonical filter methods for variable selection. Furthermore, a new module analysis approach is proposed to reveal the higher order organization of the gene space, where the genes of a module share significant topological similarity and are associated with a consensus partition of the sample space. We demonstrate that the identified modules can lead to biologically meaningful sample partitions that might be missed by other methods.

Conclusions

By leveraging the structure of gene co-expression network, first we propose a variable selection method that selects individual genes with top connectivity. Both simulation studies and real data application have demonstrated that our method has better performance in terms of the reliability of the selected genes and sample clustering results. In addition, we propose a module recovery method that can help discover novel sample partitions that might be hidden when performing clustering analyses using all available genes. The source code of our program is available at http://nba.uth.tmc.edu/homepage/liu/netVar/.",2014-05-20 +26000881,Pan-phylum Comparison of Nematode Metabolic Potential.,"Nematodes are among the most important causative pathogens of neglected tropical diseases. The increased availability of genomic and transcriptomic data for many understudied nematode species provides a great opportunity to investigate different aspects of their biology. Increasingly, metabolic potential of pathogens is recognized as a critical determinant governing their development, growth and pathogenicity. Comparing metabolic potential among species with distinct trophic ecologies can provide insights on overall biology or molecular adaptations. Furthermore, ascertaining gene expression at pathway level can help in understanding metabolic dynamics over development. Comparison of biochemical pathways (or subpathways, i.e. pathway modules) among related species can also retrospectively indicate potential mistakes in gene-calling and functional annotation. We show with numerous illustrative case studies that comparisons at the level of pathway modules have the potential to uncover biological insights while remaining computationally tractable. Here, we reconstruct and compare metabolic modules found in the deduced proteomes of 13 nematodes and 10 non-nematode species (including hosts of the parasitic nematode species). We observed that the metabolic potential is, in general, concomitant with phylogenetic and/or ecological similarity. Varied metabolic strategies are required among the nematodes, with only 8 out of 51 pathway modules being completely conserved. Enzyme comparison based on topology of metabolic modules uncovered diversification between parasite and host that can potentially guide therapeutic intervention. Gene expression data from 4 nematode species were used to study metabolic dynamics over their life cycles. We report unexpected differential metabolism between immature and mature microfilariae of the human filarial parasite Brugia malayi. A set of genes potentially important for parasitism is also reported, based on an analysis of gene expression in C. elegans and the human hookworm Necator americanus. We illustrate how analyzing and comparing metabolism at the level of pathway modules can improve existing knowledge of nematode metabolic potential and can provide parasitism related insights. Our reconstruction and comparison of nematode metabolic pathways at a pan-phylum and inter-phylum level enabled determination of phylogenetic restrictions and differential expression of pathways. A visualization of our results is available at http://nematode.net and the program for identification of module completeness (modDFS) is freely available at SourceForge. The methods reported will help biologists to predict biochemical potential of any organism with available deduced proteome, to direct experiments and test hypotheses.",2015-05-22 +25518728,Comparative genomic analysis of clinical and environmental strains provides insight into the pathogenicity and evolution of Vibrio parahaemolyticus.,"

Background

Vibrio parahaemolyticus is a Gram-negative halophilic bacterium. Infections with the bacterium could become systemic and can be life-threatening to immunocompromised individuals. Genome sequences of a few clinical isolates of V. parahaemolyticus are currently available, but the genome dynamics across the species and virulence potential of environmental strains on a genome-scale have not been described before.

Results

Here we present genome sequences of four V. parahaemolyticus clinical strains from stool samples of patients and five environmental strains in Hong Kong. Phylogenomics analysis based on single nucleotide polymorphisms revealed a clear distinction between the clinical and environmental isolates. A new gene cluster belonging to the biofilm associated proteins of V. parahaemolyticus was found in clincial strains. In addition, a novel small genomic island frequently found among clinical isolates was reported. A few environmental strains were found harboring virulence genes and prophage elements, indicating their virulence potential. A unique biphenyl degradation pathway was also reported. A database for V. parahaemolyticus (http://kwanlab.bio.cuhk.edu.hk/vp) was constructed here as a platform to access and analyze genome sequences and annotations of the bacterium.

Conclusions

We have performed a comparative genomics analysis of clinical and environmental strains of V. parahaemolyticus. Our analyses could facilitate understanding of the phylogenetic diversity and niche adaptation of this bacterium.",2014-12-18 +26410841,CustusX: an open-source research platform for image-guided therapy.,"

Purpose

CustusX is an image-guided therapy (IGT) research platform dedicated to intraoperative navigation and ultrasound imaging. In this paper, we present CustusX as a robust, accurate, and extensible platform with full access to data and algorithms and show examples of application in technological and clinical IGT research.

Methods

CustusX has been developed continuously for more than 15 years based on requirements from clinical and technological researchers within the framework of a well-defined software quality process. The platform was designed as a layered architecture with plugins based on the CTK/OSGi framework, a superbuild that manages dependencies and features supporting the IGT workflow. We describe the use of the system in several different clinical settings and characterize major aspects of the system such as accuracy, frame rate, and latency.

Results

The validation experiments show a navigation system accuracy of [Formula: see text]1.1 mm, a frame rate of 20 fps, and latency of 285 ms for a typical setup. The current platform is extensible, user-friendly and has a streamlined architecture and quality process. CustusX has successfully been used for IGT research in neurosurgery, laparoscopic surgery, vascular surgery, and bronchoscopy.

Conclusions

CustusX is now a mature research platform for intraoperative navigation and ultrasound imaging and is ready for use by the IGT research community. CustusX is open-source and freely available at http://www.custusx.org.",2015-09-26 +26321999,Speech error and tip of the tongue diary for mobile devices.,"Collections of various types of speech errors have increased our understanding of the acquisition, production, and perception of language. Although such collections of naturally occurring language errors are invaluable for a number of reasons, the process of collecting various types of speech errors presents many challenges to the researcher interested in building such a collection, among them a significant investment of time and effort to obtain a sufficient number of examples to enable statistical analysis. Here we describe a freely accessible website http://spedi.ku.edu that helps users document slips of the tongue, slips of the ear, and tip of the tongue states that they experience firsthand or observe in others. The documented errors are amassed, and made available for other users to analyze, thereby distributing the time and effort involved in collecting errors across a large number of individuals instead of saddling the lone researcher, and facilitating distribution of the collection to other researchers. This approach also addresses some issues related to data curation that hampered previous error collections, and enables the collection to continue to grow over a longer period of time than previous collections. Finally, this web-based tool creates an opportunity for language scientists to engage in outreach efforts to increase the understanding of language disorders and research in the general public.",2015-08-13 +22715304,FBIS: A regional DNA barcode archival & analysis system for Indian fishes.,"

Unlabelled

DNA barcode is a new tool for taxon recognition and classification of biological organisms based on sequence of a fragment of mitochondrial gene, cytochrome c oxidase I (COI). In view of the growing importance of the fish DNA barcoding for species identification, molecular taxonomy and fish diversity conservation, we developed a Fish Barcode Information System (FBIS) for Indian fishes, which will serve as a regional DNA barcode archival and analysis system. The database presently contains 2334 sequence records of COI gene for 472 aquatic species belonging to 39 orders and 136 families, collected from available published data sources. Additionally, it contains information on phenotype, distribution and IUCN Red List status of fishes. The web version of FBIS was designed using MySQL, Perl and PHP under Linux operating platform to (a) store and manage the acquisition (b) analyze and explore DNA barcode records (c) identify species and estimate genetic divergence. FBIS has also been integrated with appropriate tools for retrieving and viewing information about the database statistics and taxonomy. It is expected that FBIS would be useful as a potent information system in fish molecular taxonomy, phylogeny and genomics.

Availability

The database is available for free at http://mail.nbfgr.res.in/fbis/",2012-05-31 +26680539,A two-layered machine learning method to identify protein O-GlcNAcylation sites with O-GlcNAc transferase substrate motifs.,"Protein O-GlcNAcylation, involving the β-attachment of single N-acetylglucosamine (GlcNAc) to the hydroxyl group of serine or threonine residues, is an O-linked glycosylation catalyzed by O-GlcNAc transferase (OGT). Molecular level investigation of the basis for OGT's substrate specificity should aid understanding how O-GlcNAc contributes to diverse cellular processes. Due to an increasing number of O-GlcNAcylated peptides with site-specific information identified by mass spectrometry (MS)-based proteomics, we were motivated to characterize substrate site motifs of O-GlcNAc transferases. In this investigation, a non-redundant dataset of 410 experimentally verified O-GlcNAcylation sites were manually extracted from dbOGAP, OGlycBase and UniProtKB. After detection of conserved motifs by using maximal dependence decomposition, profile hidden Markov model (profile HMM) was adopted to learn a first-layered model for each identified OGT substrate motif. Support Vector Machine (SVM) was then used to generate a second-layered model learned from the output values of profile HMMs in first layer. The two-layered predictive model was evaluated using a five-fold cross validation which yielded a sensitivity of 85.4%, a specificity of 84.1%, and an accuracy of 84.7%. Additionally, an independent testing set from PhosphoSitePlus, which was really non-homologous to the training data of predictive model, was used to demonstrate that the proposed method could provide a promising accuracy (84.05%) and outperform other O-GlcNAcylation site prediction tools. A case study indicated that the proposed method could be a feasible means of conducting preliminary analyses of protein O-GlcNAcylation and has been implemented as a web-based system, OGTSite, which is now freely available at http://csb.cse.yzu.edu.tw/OGTSite/.",2015-12-09 +22846459,HINT: High-quality protein interactomes and their applications in understanding human disease.,"

Background

A global map of protein-protein interactions in cellular systems provides key insights into the workings of an organism. A repository of well-validated high-quality protein-protein interactions can be used in both large- and small-scale studies to generate and validate a wide range of functional hypotheses.

Results

We develop HINT (http://hint.yulab.org) - a database of high-quality protein-protein interactomes for human, Saccharomyces cerevisiae, Schizosaccharomyces pombe, and Oryza sativa. These were collected from several databases and filtered both systematically and manually to remove low-quality/erroneous interactions. The resulting datasets are classified by type (binary physical interactions vs. co-complex associations) and data source (high-throughput systematic setups vs. literature-curated small-scale experiments). We find strong sociological sampling biases in literature-curated datasets of small-scale interactions. An interactome without such sampling biases was used to understand network properties of human disease-genes - hubs are unlikely to cause disease, but if they do, they usually cause multiple disorders.

Conclusions

HINT is of significant interest to researchers in all fields of biology as it addresses the ubiquitous need of having a repository of high-quality protein-protein interactions. These datasets can be utilized to generate specific hypotheses about specific proteins and/or pathways, as well as analyzing global properties of cellular networks. HINT will be regularly updated and all versions will be tracked.",2012-07-30 +26527345,How to measure the agroecological performance of farming in order to assist with the transition process.,"The use of plant protection products enables farmers to maximize economic performance and yields, but in return, the environment and human health can be greatly affected because of their toxicity. There are currently strong calls for farmers to reduce the use of these toxic products for the preservation of the environment and the human health, and it has become urgent to invest in more sustainable models that help reduce these risks. One possible solution is the transition toward agroecological production systems. These new systems must be beneficial economically, socially, and environmentally in terms of human health. There are many tools available, based on a range of indicators, for assessing the sustainability of agricultural systems on conventional farm holdings. These methods are little suitable to agroecological farms and do not measure the performance of agroecological transition farms. In this article, we therefore develop a model for the strategic definition, guidance, and assistance for a transition to agroecological practices, capable of assessing performance of this transition and simulating the consequences of possible changes. This model was built by coupling (i) a decision-support tool and a technico-economic simulator with (ii) a conceptual model built from the dynamics of agroecological practices. This tool is currently being tested in the framework of a Compte d'Affectation Spéciale pour le Développement Agricole et Rural (CASDAR) project (CASDAR: project launched in 2013 by the French Ministry of Agriculture, Food and Forestry, on the theme ""collective mobilisation for agroecology,"" http://agriculture.gouv.fr/Appel-a-projets-CASDAR ) using data from farms, most of which are engaged in agroenvironmental process and reducing plant protection treatments since 2008.",2015-11-03 +21542931,The PathOlogist: an automated tool for pathway-centric analysis.,"

Background

The PathOlogist is a new tool designed to transform large sets of gene expression data into quantitative descriptors of pathway-level behavior. The tool aims to provide a robust alternative to the search for single-gene-to-phenotype associations by accounting for the complexity of molecular interactions.

Results

Molecular abundance data is used to calculate two metrics--'activity' and 'consistency'--for each pathway in a set of more than 500 canonical molecular pathways (source: Pathway Interaction Database, http://pid.nci.nih.gov). The tool then allows a detailed exploration of these metrics through integrated visualization of pathway components and structure, hierarchical clustering of pathways and samples, and statistical analyses designed to detect associations between pathway behavior and clinical features.

Conclusions

The PathOlogist provides a straightforward means to identify the functional processes, rather than individual molecules, that are altered in disease. The statistical power and biologic significance of this approach are made easily accessible to laboratory researchers and informatics analysts alike. Here we show as an example, how the PathOlogist can be used to establish pathway signatures that robustly differentiate breast cancer cell lines based on response to treatment.",2011-05-04 +26410103,An integrative structure-based framework for predicting biological effects mediated by antipeptide antibodies.,"A general framework is presented for predicting quantitative biological effects mediated by antipeptide antibodies, primarily on the basis of antigen structure (possibly featuring intrinsic disorder) analyzed to estimate epitope-paratope binding affinities, which in turn is considered within the context of dose-response relationships as regards antibody concentration. This is illustrated mainly using an approach based on protein structural energetics, whereby expected amounts of solvent-accessible surface area buried upon epitope-paratope binding are related to the corresponding binding affinity, which is estimated from putative B-cell epitope structure with implicit treatment of paratope structure, for antipeptide antibodies either reacting with peptides or cross-reacting with cognate protein antigens. Key methods described are implemented in SAPPHIRE/SUITE (Structural-energetic Analysis Program for Predicting Humoral Immune Response Epitopes/SAPPHIRE User Interface Tool Ensemble; publicly accessible via http://freeshell.de/~badong/suite.htm). Representative results thus obtained are compared with published experimental data on binding affinities and quantitative biological effects, with special attention to loss of paratope sidechain conformational entropy (neglected in previous analyses) and in light of key in-vivo constraints on antigen-antibody binding affinity and antibody-mediated effects. Implications for further refinement of B-cell epitope prediction methods are discussed as regards envisioned biomedical applications including the development of prophylactic and therapeutic antibodies, peptide-based vaccines and immunodiagnostics.",2015-09-26 +25995283,Palm Oil Consumption Increases LDL Cholesterol Compared with Vegetable Oils Low in Saturated Fat in a Meta-Analysis of Clinical Trials.,"

Background

Palm oil contains a high amount of saturated fat compared with most other vegetable oils, but studies have reported inconsistent effects of palm oil on blood lipids.

Objective

We systematically reviewed the effect of palm oil consumption on blood lipids compared with other cooking oils using data from clinical trials.

Methods

We searched PubMed and the Cochrane Library for trials of at least 2 wk duration that compared the effects of palm oil consumption with any of the predefined comparison oils: vegetable oils low in saturated fat, trans fat-containing partially hydrogenated vegetable oils, and animal fats. Data were pooled by using random-effects meta-analysis.

Results

Palm oil significantly increased LDL cholesterol by 0.24 mmol/L (95% CI: 0.13, 0.35 mmol/L; I(2) = 83.2%) compared with vegetable oils low in saturated fat. This effect was observed in randomized trials (0.31 mmol/L; 95% CI: 0.20, 0.42 mmol/L) but not in nonrandomized trials (0.03 mmol/L; 95% CI: -0.15, 0.20 mmol/L; P-difference = 0.02). Among randomized trials, only modest heterogeneity in study results remained after considering the test oil dose and the comparison oil type (I(2) = 27.5%). Palm oil increased HDL cholesterol by 0.02 mmol/L (95% CI: 0.01, 0.04 mmol/L; I(2) = 49.8%) compared with vegetable oils low in saturated fat and by 0.09 mmol/L (95% CI: 0.06, 0.11 mmol/L; I(2) = 47.8%) compared with trans fat-containing oils.

Conclusions

Palm oil consumption results in higher LDL cholesterol than do vegetable oils low in saturated fat and higher HDL cholesterol than do trans fat-containing oils in humans. The effects of palm oil on blood lipids are as expected on the basis of its high saturated fat content, which supports the reduction in palm oil use by replacement with vegetable oils low in saturated and trans fat. This systematic review was registered with the PROSPERO registry at http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42012002601#.VU3wvSGeDRZ as CRD42012002601.",2015-05-20 +25515150,"Cronobacter, the emergent bacterial pathogen Enterobacter sakazakii comes of age; MLST and whole genome sequence analysis.","

Background

Following the association of Cronobacter spp. to several publicized fatal outbreaks in neonatal intensive care units of meningitis and necrotising enterocolitis, the World Health Organization (WHO) in 2004 requested the establishment of a molecular typing scheme to enable the international control of the organism. This paper presents the application of Next Generation Sequencing (NGS) to Cronobacter which has led to the establishment of the Cronobacter PubMLST genome and sequence definition database (http://pubmlst.org/cronobacter/) containing over 1000 isolates with metadata along with the recognition of specific clonal lineages linked to neonatal meningitis and adult infections

Results

Whole genome sequencing and multilocus sequence typing (MLST) has supports the formal recognition of the genus Cronobacter composed of seven species to replace the former single species Enterobacter sakazakii. Applying the 7-loci MLST scheme to 1007 strains revealed 298 definable sequence types, yet only C. sakazakii clonal complex 4 (CC4) was principally associated with neonatal meningitis. This clonal lineage has been confirmed using ribosomal-MLST (51-loci) and whole genome-MLST (1865 loci) to analyse 107 whole genomes via the Cronobacter PubMLST database. This database has enabled the retrospective analysis of historic cases and outbreaks following re-identification of those strains.

Conclusions

The Cronobacter PubMLST database offers a central, open access, reliable sequence-based repository for researchers. It has the capacity to create new analysis schemes 'on the fly', and to integrate metadata (source, geographic distribution, clinical presentation). It is also expandable and adaptable to changes in taxonomy, and able to support the development of reliable detection methods of use to industry and regulatory authorities. Therefore it meets the WHO (2004) request for the establishment of a typing scheme for this emergent bacterial pathogen. Whole genome sequencing has additionally shown a range of potential virulence and environmental fitness traits which may account for the association of C. sakazakii CC4 pathogenicity, and propensity for neonatal CNS.",2014-12-16 +23652425,pyGenClean: efficient tool for genetic data clean up before association testing.,"

Unlabelled

Genetic association studies making use of high-throughput genotyping arrays need to process large amounts of data in the order of millions of markers per experiment. The first step of any analysis with genotyping arrays is typically the conduct of a thorough data clean up and quality control to remove poor quality genotypes and generate metrics to inform and select individuals for downstream statistical analysis. We have developed pyGenClean, a bioinformatics tool to facilitate and standardize the genetic data clean up pipeline with genotyping array data. In conjunction with a source batch-queuing system, the tool minimizes data manipulation errors, accelerates the completion of the data clean up process and provides informative plots and metrics to guide decision making for statistical analysis.

Availability and implementation

pyGenClean is an open source Python 2.7 software and is freely available, along with documentation and examples, from http://www.statgen.org.",2013-05-06 +24585852,Cohort profile: Wisconsin longitudinal study (WLS).,"The Wisconsin Longitudinal Study (WLS) is a longitudinal study of men and women who graduated from Wisconsin high schools in 1957 and one of their randomly selected siblings. Wisconsin is located in the upper midwest of the United States and had a population of approximately 14 000 000 in 1957, making it the 14th most populous state at that time. Data spanning almost 60 years allow researchers to link family background, adolescent characteristics, educational experiences, employment experiences, income, wealth, family formation and social and religious engagement to midlife and late-life physical health, mental health, psychological well-being, cognition, end of life planning and mortality. The WLS is one of the few longitudinal data sets that include an administrative measure of cognition from childhood. Further, recently collected saliva samples allow researchers to explore the inter-relationships among genes, behaviours and environment, including genetic determinants of behaviours (e.g. educational attainment); the interactions between genes and environment; and how these interactions predict behaviours. Most panel members were born in 1939, and the sample is broadly representative of White, non-Hispanic American men and women who have completed at least a high school education. Siblings cover several adjoining cohorts: they were born primarily between 1930 and 1948. At each interview, about two-thirds of the sample lived in Wisconsin, and about one-third lived elsewhere in the United States or abroad. The data, along with documentation, are publicly accessible and can be accessed at http://www.ssc.wisc.edu/wlsresearch/. Requests for protected data or assistance should be sent to wls@ssc.wisc.edu.",2014-02-01 +25003610,An integrated SNP mining and utilization (ISMU) pipeline for next generation sequencing data.,"Open source single nucleotide polymorphism (SNP) discovery pipelines for next generation sequencing data commonly requires working knowledge of command line interface, massive computational resources and expertise which is a daunting task for biologists. Further, the SNP information generated may not be readily used for downstream processes such as genotyping. Hence, a comprehensive pipeline has been developed by integrating several open source next generation sequencing (NGS) tools along with a graphical user interface called Integrated SNP Mining and Utilization (ISMU) for SNP discovery and their utilization by developing genotyping assays. The pipeline features functionalities such as pre-processing of raw data, integration of open source alignment tools (Bowtie2, BWA, Maq, NovoAlign and SOAP2), SNP prediction (SAMtools/SOAPsnp/CNS2snp and CbCC) methods and interfaces for developing genotyping assays. The pipeline outputs a list of high quality SNPs between all pairwise combinations of genotypes analyzed, in addition to the reference genome/sequence. Visualization tools (Tablet and Flapjack) integrated into the pipeline enable inspection of the alignment and errors, if any. The pipeline also provides a confidence score or polymorphism information content value with flanking sequences for identified SNPs in standard format required for developing marker genotyping (KASP and Golden Gate) assays. The pipeline enables users to process a range of NGS datasets such as whole genome re-sequencing, restriction site associated DNA sequencing and transcriptome sequencing data at a fast speed. The pipeline is very useful for plant genetics and breeding community with no computational expertise in order to discover SNPs and utilize in genomics, genetics and breeding studies. The pipeline has been parallelized to process huge datasets of next generation sequencing. It has been developed in Java language and is available at http://hpc.icrisat.cgiar.org/ISMU as a standalone free software.",2014-07-08 +22098693,Patterns of Long Term Care in 29 European countries: evidence from an exploratory study.,"

Background

The challenges posed by the rapidly ageing population, and the increased preponderance of disabled people in this group, coupled with the rising level of public expenditure required to service the complex organization of long term care (LTC) delivery are causing increased pressure on LTC systems in Europe. A pan-European survey was carried out to evaluate whether patterns of LTC can be identified across Europe and what are the trends of the countries along them.

Methods

An ecological study was conducted on the 27 EU Member States plus Norway and Iceland, referring to the period 2003-2007. Several variables related to organizational features, elderly needs and expenditure were drawn from OECD Health Data and the Eurostat Statistics database and combined using Multiple Factor Analysis (MFA).

Results

Two global Principal Components were taken into consideration given that their expressed total variance was greater than 60%. They were interpreted according to the higher (more than 0.5) positive or negative correlation coefficients between them and the original variables; thus patterns of LTC were identified. High alignment between old age related expenditure and elderly needs characterizes Nordic and Western European countries, the former also having a higher level of formal care than the latter. Mediterranean as well as Central and South Eastern European countries show lower alignment between old age related expenditure and elderly needs, coupled with a level of provision of formal care that is around or slightly above the average European level. In the dynamic comparison, linear, stable or unclear trends were shown for the studied countries.

Conclusions

The analysis carried out is an explorative and descriptive study, which is an attempt to reveal patterns and trends of LTC in Europe, allowing comparisons between countries. It also stimulates further researches with lower aggregated data useful to gain meaningful policy-making evidence.Please see related article: http://www.biomedcentral.com/1741-7015/9/124.",2011-11-18 +26641091,Semi-Supervised Multi-View Learning for Gene Network Reconstruction.,"The task of gene regulatory network reconstruction from high-throughput data is receiving increasing attention in recent years. As a consequence, many inference methods for solving this task have been proposed in the literature. It has been recently observed, however, that no single inference method performs optimally across all datasets. It has also been shown that the integration of predictions from multiple inference methods is more robust and shows high performance across diverse datasets. Inspired by this research, in this paper, we propose a machine learning solution which learns to combine predictions from multiple inference methods. While this approach adds additional complexity to the inference process, we expect it would also carry substantial benefits. These would come from the automatic adaptation to patterns on the outputs of individual inference methods, so that it is possible to identify regulatory interactions more reliably when these patterns occur. This article demonstrates the benefits (in terms of accuracy of the reconstructed networks) of the proposed method, which exploits an iterative, semi-supervised ensemble-based algorithm. The algorithm learns to combine the interactions predicted by many different inference methods in the multi-view learning setting. The empirical evaluation of the proposed algorithm on a prokaryotic model organism (E. coli) and on a eukaryotic model organism (S. cerevisiae) clearly shows improved performance over the state of the art methods. The results indicate that gene regulatory network reconstruction for the real datasets is more difficult for S. cerevisiae than for E. coli. The software, all the datasets used in the experiments and all the results are available for download at the following link: http://figshare.com/articles/Semi_supervised_Multi_View_Learning_for_Gene_Network_Reconstruction/1604827.",2015-12-07 +26070610,TeratoScore: Assessing the Differentiation Potential of Human Pluripotent Stem Cells by Quantitative Expression Analysis of Teratomas.,"Teratoma formation is the gold standard assay for testing the capacity of human pluripotent stem cells to differentiate into all embryonic germ layers. Although widely used, little effort has been made to transform this qualitative assay into a quantitative one. Using gene expression data from a wide variety of cells, we created a scorecard representing tissues from all germ layers and extraembryonic tissues. TeratoScore, an online, open-source platform based on this scorecard, distinguishes pluripotent stem cell-derived teratomas from malignant tumors, translating cell potency into a quantitative measure (http://benvenisty.huji.ac.il/teratoscore.php). The teratomas used for the algorithm also allowed us to examine gene expression differences between tumors with a diploid karyotype and those initiated by aneuploid cells. Chromosomally aberrant teratomas show a significantly different gene expression signature from that of teratomas originating from diploid cells, particularly in central nervous system-specific genes, congruent with human chromosomal syndromes.",2015-06-01 +22434829,Building a biomedical semantic network in Wikipedia with Semantic Wiki Links.,"Wikipedia is increasingly used as a platform for collaborative data curation, but its current technical implementation has significant limitations that hinder its use in biocuration applications. Specifically, while editors can easily link between two articles in Wikipedia to indicate a relationship, there is no way to indicate the nature of that relationship in a way that is computationally accessible to the system or to external developers. For example, in addition to noting a relationship between a gene and a disease, it would be useful to differentiate the cases where genetic mutation or altered expression causes the disease. Here, we introduce a straightforward method that allows Wikipedia editors to embed computable semantic relations directly in the context of current Wikipedia articles. In addition, we demonstrate two novel applications enabled by the presence of these new relationships. The first is a dynamically generated information box that can be rendered on all semantically enhanced Wikipedia articles. The second is a prototype gene annotation system that draws its content from the gene-centric articles on Wikipedia and exposes the new semantic relationships to enable previously impossible, user-defined queries. DATABASE URL: http://en.wikipedia.org/wiki/Portal:Gene_Wiki.",2012-03-20 +26264058,Identifying relevant group of miRNAs in cancer using fuzzy mutual information.,"MicroRNAs (miRNAs) act as a major biomarker of cancer. All miRNAs in human body are not equally important for cancer identification. We propose a methodology, called FMIMS, which automatically selects the most relevant miRNAs for a particular type of cancer. In FMIMS, miRNAs are initially grouped by using a SVM-based algorithm; then the group with highest relevance is determined and the miRNAs in that group are finally ranked for selection according to their redundancy. Fuzzy mutual information is used in computing the relevance of a group and the redundancy of miRNAs within it. Superiority of the most relevant group to all others, in deciding normal or cancer, is demonstrated on breast, renal, colorectal, lung, melanoma and prostate data. The merit of FMIMS as compared to several existing methods is established. While 12 out of 15 selected miRNAs by FMIMS corroborate with those of biological investigations, three of them viz., ""hsa-miR-519,"" ""hsa-miR-431"" and ""hsa-miR-320c"" are possible novel predictions for renal cancer, lung cancer and melanoma, respectively. The selected miRNAs are found to be involved in disease-specific pathways by targeting various genes. The method is also able to detect the responsible miRNAs even at the primary stage of cancer. The related code is available at http://www.jayanta.droppages.com/FMIMS.html .",2015-08-12 +26955062,Long-Term Exposure to Ambient Fine Particulate Matter and Renal Function in Older Men: The Veterans Administration Normative Aging Study.,"

Background

It is unknown if ambient fine particulate matter (PM2.5) is associated with lower renal function, a cardiovascular risk factor.

Objective

We investigated whether long-term PM2.5 exposure was associated with estimated glomerular filtration rate (eGFR) in a cohort of older men living in the Boston Metropolitan area.

Methods

This longitudinal analysis included 669 participants from the Veterans Administration Normative Aging Study with up to four visits between 2000 and 2011 (n = 1,715 visits). Serum creatinine was measured at each visit, and eGFR was calculated according to the Chronic Kidney Disease Epidemiology Collaboration equation. One-year exposure to PM2.5 prior to each visit was assessed using a validated spatiotemporal model that utilized satellite remote-sensing aerosol optical depth data. eGFR was modeled in a time-varying linear mixed-effects regression model as a continuous function of 1-year PM2.5, adjusting for important covariates.

Results

One-year PM2.5 exposure was associated with lower eGFRs; a 2.1-μg/m3 interquartile range higher 1-year PM2.5 was associated with a 1.87 mL/min/1.73 m2 lower eGFR [95% confidence interval (CI): -2.99, -0.76]. A 2.1 μg/m3-higher 1-year PM2.5 was also associated with an additional annual decrease in eGFR of 0.60 mL/min/1.73 m2 per year (95% CI: -0.79, -0.40).

Conclusions

In this longitudinal sample of older men, the findings supported the hypothesis that long-term PM2.5 exposure negatively affects renal function and increases renal function decline.

Citation

Mehta AJ, Zanobetti A, Bind MC, Kloog I, Koutrakis P, Sparrow D, Vokonas PS, Schwartz JD. 2016. Long-term exposure to ambient fine particulate matter and renal function in older men: the VA Normative Aging Study. Environ Health Perspect 124:1353-1360; http://dx.doi.org/10.1289/ehp.1510269.",2016-03-08 +22807434,Teaching the fluctuation test in silico by using mutate: a program to distinguish between the adaptive and spontaneous mutation hypotheses.,"Mutate is a program developed for teaching purposes to impart a virtual laboratory class for undergraduate students of Genetics in Biology. The program emulates the so-called fluctuation test whose aim is to distinguish between spontaneous and adaptive mutation hypotheses in bacteria. The plan is to train students in certain key multidisciplinary aspects of current genetics such as sequence databases, DNA mutations, and hypothesis testing, while introducing the fluctuation test. This seminal experiment was originally performed studying Escherichia coli resistance to the infection by bacteriophage T1. The fluctuation test initiated the modern bacterial genetics that 25 years later ushered in the era of the recombinant DNA. Nowadays we know that some deletions in fhuA, the gene responsible for E. coli membrane receptor of T1, could cause the E. coli resistance to this phage. For the sake of simplicity, we will introduce the assumption that a single mutation generates the resistance to T1. During the practical, the students use the program to download some fhuA gene sequences, manually introduce some stop codon mutations, and design a fluctuation test to obtain data for distinguishing between preadaptative (spontaneous) and induced (adaptive) mutation hypotheses. The program can be launched from a browser or, if preferred, its executable file can be downloaded from http://webs.uvigo.es/acraaj/MutateWeb/Mutate.html. It requires the Java 5.0 (or higher) Runtime Environment (freely available at http://www.java.com).",2012-06-18 +26097806,RNA-Seq analysis and whole genome DNA-binding profile of the Vibrio cholerae histone-like nucleoid structuring protein (H-NS).,"The data described in this article pertain to the genome-wide transcription profiling of a Vibrio cholerae mutant lacking the histone-like nucleoid structuring protein (H-NS) and the mapping of the H-NS chromosome binding sites [1, 2]. H-NS is a nucleoid-associated protein with two interrelated functions: organization of the bacterial nucleoid and transcriptional silencing [3]. Both functions require DNA binding and protein oligomerization [4, 5]. H-NS commonly silences the expression of virulence factors acquired by lateral gene transfer [6]. The highly pleiotropic nature of hns mutants in V. cholerae indicates that H-NS impacts a broad range of cellular processes such as virulence, stress response, surface attachment, biofilm development, motility and chemotaxis. We used a V. cholerae strain harboring a deletion of hns and a strain expressing H-NS tagged at the C-terminus with the FLAG epitope to generate datasets representing the hns transcriptome and DNA binding profile under laboratory conditions (LB medium, 37°C). The datasets are publicly available at the Gene Expression Omnibus (GEO) repository (http://www.ncbi.nlm.nih.gov/geo/) with accession numbers GSE62785 and GSE64249.",2015-09-01 +24132931,MFCompress: a compression tool for FASTA and multi-FASTA data.,"

Motivation

The data deluge phenomenon is becoming a serious problem in most genomic centers. To alleviate it, general purpose tools, such as gzip, are used to compress the data. However, although pervasive and easy to use, these tools fall short when the intention is to reduce as much as possible the data, for example, for medium- and long-term storage. A number of algorithms have been proposed for the compression of genomics data, but unfortunately only a few of them have been made available as usable and reliable compression tools.

Results

In this article, we describe one such tool, MFCompress, specially designed for the compression of FASTA and multi-FASTA files. In comparison to gzip and applied to multi-FASTA files, MFCompress can provide additional average compression gains of almost 50%, i.e. it potentially doubles the available storage, although at the cost of some more computation time. On highly redundant datasets, and in comparison with gzip, 8-fold size reductions have been obtained.

Availability

Both source code and binaries for several operating systems are freely available for non-commercial use at http://bioinformatics.ua.pt/software/mfcompress/.",2013-10-16 +21591763,GlycoFish: a database of zebrafish N-linked glycoproteins identified using SPEG method coupled with LC/MS.,"Zebrafish (Danio rerio) is a model organism that is used to study the mechanisms and pathways of human disorders. Many dysfunctions in neurological, development, and neuromuscular systems are due to glycosylation deficiencies, but the glycoproteins involved in zebrafish embryonic development have not been established. In this study, a mass spectrometry-based glycoproteomic characterization of zebrafish embryos was performed to identify the N-linked glycoproteins and N-linked glycosylation sites. To increase the number of glycopeptides, proteins from zebrafish were digested with two different proteases--chymotrypsin and trypsin--into peptides of different length. The N-glycosylated peptides of zebrafish were then captured by the solid-phase extraction of N-linked glycopeptides (SPEG) method and the peptides were identified with an LTQ OrbiTrap Velos mass spectrometer. From 265 unique glycopeptides, including 269 consensus NXT/S glycosites, we identified 169 different N-glycosylated proteins. The identified glycoproteins were highly abundant in proteins belonging to the transporter, cell adhesion, and ion channel/ion binding categories, which are important to embryonic, organ, and central nervous system development. This proteomics data will expand our knowledge about glycoproteins in zebrafish and may be used to elucidate the role that glycosylation plays in cellular processes and disease. The glycoprotein data are available through the GlycoFish database (http://betenbaugh.jhu.edu/GlycoFish) introduced in this paper.",2011-06-08 +23182485,Risk of intracerebral aneurysm rupture during carotid revascularization.,"

Objective

Robust guidelines exist for the treatment of carotid stenosis and intracranial aneurysms independently, however, the management of tandem carotid stenosis and intracranial aneurysms remains uncertain. Although the prevalence of tandem pathologies is small (1.9%-3.2%), treating carotid stenosis can alter intracranial hemodynamics potentially predisposing to aneurysm rupture. In this review, our aim was to assess the safety of intervention in this cohort, by analyzing outcomes from the published literature.

Methods

The preferred reporting items for systematic reviews and meta-analyses (PRISMA) guidelines were used to conduct the review. Articles from 1947 to 2012 were searched using EMBASE Classic and EMBASE (November, 1947 -March, 2012) and Ovid MEDLINE(R) In-Process and other NonIndexed Citations and Ovid MEDLINE(R) on Ovid SP, http://ClinicalTrials.gov, http://controlled-trials.com and the Cochrane review database using a predefined search strategy.

Results

One hundred forty-one patients from 27 articles were included. Interventions ranged from single (n=104, 74%), staged (n=26, 18%) to simultaneous procedures (n=11, 8%). The largest cohort of patients was treated by carotid endarterectomy alone (n=92, 66%). The majority of patients presented with a symptomatic carotid stenosis and an asymptomatic ipsilateral intracranial aneurysm (n=70, 50%). Five subarachnoid hemorrhages occurred (4% [5/140], three within 30 days of the procedure and two thereafter) of which two were fatal. All five occurred in patients who underwent carotid endarterectomy as a single procedure (5%). Two of the five patients presented with ruptured posterior communicating artery aneurysms.

Conclusions

Published reports of perioperative aneurysm rupture are rare in individuals with tandem carotid stenosis and intracranial aneurysms. This is the first analysis of all published cases. However, it is limited by the small number of studies and the possible underreporting due to publication bias and underdiagnosis where angiography was not performed. Although we report a low incidence of subarachnoid hemorrhage, analysis of registry data with a larger cohort is warranted to confirm these findings.",2012-12-01 +30722535,First Report of Bacterial Leaf Spot of Parsley Caused by Pseudomonas syringae pv. coriandricola in Ohio.,"A severe leaf spot of parsley (Petroselinum crispum L. cvs. Dark Green Italian and Gigante) was observed on ∼1.5 ha in 2007 and 8 ha in 2012 on three vegetable farms in northern Ohio. Tiny, water-soaked spots that enlarged to necrotic lesions (∼5 mm wide) were first observed in June of each year. Lesions often coalesced and leaf marginal necrosis was common. Disease incidence initially ranged from 20 to 50%, and a 1.5-ha field was completely lost in 2012 as a result of the disease. Bacterial streaming was observed microscopically from leaf lesions. Diseased leaf tissue was dipped briefly in 70% ethanol, rinsed in sterile water, and blotted dry. Bacteria were isolated by plating 10-fold serial dilutions of diseased tissue extracts onto yeast dextrose carbonate and Pseudomonas F (PF) agar media. Whitish, opaque, circular colonies were isolated consistently from all samples. One isolate was purified from each of four fields. They were all gram-negative, non-fluorescent on PF medium, levan positive, oxidase negative, arginine dihydrolase negative, potato rot negative, and tobacco hypersensitive reaction positive. Repetitive extragenic palindromic sequence (Rep)-PCR fingerprint profiles using the BOXA1R primer (4) were identical for the four isolates. A pathogenicity test was conducted with strain SM69-07 isolated in 2007. A bacterial culture was suspended in sterile potassium phosphate buffer (0.01M, pH 7.4) and adjusted to 108 CFU/ml. Four 4-week-old plants each of parsley and cilantro (Ferry-Morse Seed Co.) were inoculated by spraying the bacterial suspension on the leaves until runoff. Potassium phosphate buffer was applied as a negative control treatment for each plant species. Plants were kept in a mist room with 100% humidity for 4 h, then transferred to a greenhouse with average maximum and minimum temperatures of 30 and 25°C. Leaf symptoms similar to those on the original plants were observed on the inoculated parsley and cilantro plants within 14 days of inoculation, whereas no symptoms developed on the negative control plants. One bacterial isolate obtained from each inoculated host using the isolation method described above was confirmed to be identical to the original isolates using the LOPAT tests and Rep-PCR DNA fingerprint profiles; no target bacteria were isolated from the negative control plants. Multilocus sequence typing (MLST) of the housekeeping genes gap1, gltA, gyrB, and rpoD was conducted for strain Sm69-07 (2,3). Sequence data were subjected to BLASTn searches in the Plant-Associated Microbes Database (PAMDB, http://genome.ppws.vt.edu/cgi-bin/MLST/home.pl ) (1). The sequences aligned with those of Pseudomonas syringae pv. coriandricola with 100% identity to alleles 101 (gyrB), 123 (rpoD), 7 (gap1), and 64 (gltA). Strain information and sequence alignment results for SM69-07 were submitted to PAMDB and assigned as isolate ID 1138. Based on bacterial culture morphology, LOPAT profile, pathogenicity test results, and MLST, the pathogen was confirmed as P. syringae pv. coriandricola. To our knowledge, this is the first report of bacterial spot of parsley caused by P. syringae pv. coriandricola in Ohio. Due to stringent quality requirements for fresh market parsley, this disease may pose a threat to the economic sustainability of parsley production in Ohio. References: (1) N. F. Almeida et al. Phytopathology 100:208, 2010. (2) C. T. Bull et al. Phytopathology 101:847, 2011. (3) M. S. Hwang et al. Appl. Environ. Microbiol. 71:5182, 2002. (4) J. Versalovic et al. Methods Mol. Cell Biol. 5:25, 1994.",2013-07-01 +23940252,ROBNCA: robust network component analysis for recovering transcription factor activities.,"

Motivation

Network component analysis (NCA) is an efficient method of reconstructing the transcription factor activity (TFA), which makes use of the gene expression data and prior information available about transcription factor (TF)-gene regulations. Most of the contemporary algorithms either exhibit the drawback of inconsistency and poor reliability, or suffer from prohibitive computational complexity. In addition, the existing algorithms do not possess the ability to counteract the presence of outliers in the microarray data. Hence, robust and computationally efficient algorithms are needed to enable practical applications.

Results

We propose ROBust Network Component Analysis (ROBNCA), a novel iterative algorithm that explicitly models the possible outliers in the microarray data. An attractive feature of the ROBNCA algorithm is the derivation of a closed form solution for estimating the connectivity matrix, which was not available in prior contributions. The ROBNCA algorithm is compared with FastNCA and the non-iterative NCA (NI-NCA). ROBNCA estimates the TF activity profiles as well as the TF-gene control strength matrix with a much higher degree of accuracy than FastNCA and NI-NCA, irrespective of varying noise, correlation and/or amount of outliers in case of synthetic data. The ROBNCA algorithm is also tested on Saccharomyces cerevisiae data and Escherichia coli data, and it is observed to outperform the existing algorithms. The run time of the ROBNCA algorithm is comparable with that of FastNCA, and is hundreds of times faster than NI-NCA.

Availability

The ROBNCA software is available at http://people.tamu.edu/∼amina/ROBNCA",2013-08-11 +25501940,Using the structure-function linkage database to characterize functional domains in enzymes.,"The Structure-Function Linkage Database (SFLD; http://sfld.rbvi.ucsf.edu/) is a Web-accessible database designed to link enzyme sequence, structure, and functional information. This unit describes the protocols by which a user may query the database to predict the function of uncharacterized enzymes and to correct misannotated functional assignments. The information in this unit is especially useful in helping a user discriminate functional capabilities of a sequence that is only distantly related to characterized sequences in publicly available databases.",2014-12-12 +23842811,MIG: Multi-Image Genome viewer.,"

Summary

Multi-Image Genome (MIG) viewer is a web-based application for visualizing, querying and filtering many thousands of genome browser regions as well as for exporting the data in a variety of formats. This methodology has been used successfully to analyze ChIP-Seq data and RNA-Seq data and to detect somatic mutations in genome resequencing projects.

Availability

MIG is available at https://mig.molbiol.ox.ac.uk/mig/",2013-07-10 +26023105,antaRNA: ant colony-based RNA sequence design.,"

Motivation

RNA sequence design is studied at least as long as the classical folding problem. Although for the latter the functional fold of an RNA molecule is to be found ,: inverse folding tries to identify RNA sequences that fold into a function-specific target structure. In combination with RNA-based biotechnology and synthetic biology ,: reliable RNA sequence design becomes a crucial step to generate novel biochemical components.

Results

In this article ,: the computational tool antaRNA is presented. It is capable of compiling RNA sequences for a given structure that comply in addition with an adjustable full range objective GC-content distribution ,: specific sequence constraints and additional fuzzy structure constraints. antaRNA applies ant colony optimization meta-heuristics and its superior performance is shown on a biological datasets.

Availability and implementation

http://www.bioinf.uni-freiburg.de/Software/antaRNA CONTACT: backofen@informatik.uni-freiburg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-27 +25497042,DNA methylation patterns of protein coding genes and long noncoding RNAs in female schizophrenic patients.,"Schizophrenia (SCZ) is a complex mental disorder contributed by both genetic and epigenetic factors. Long noncoding RNAs (lncRNAs) was recently found playing an important regulatory role in mental disorders. However, little was known about the DNA methylation of lncRNAs, although numerous SCZ studies have been performed on genetic polymorphisms or epigenetic marks in protein coding genes. We presented a comprehensive genome wide DNA methylation study of both protein coding genes and lncRNAs in female patients with paranoid and undifferentiated SCZ. Using the methyl-CpG binding domain (MBD) protein-enriched genome sequencing (MBD-seq), 8,163 and 764 peaks were identified in paranoid and undifferentiated SCZ, respectively (p < 1 × 10-5). Gene ontology analysis showed that the hypermethylated regions were enriched in the genes related to neuron system and brain for both paranoid and undifferentiated SCZ (p < 0.05). Among these peaks, 121 peaks were located in gene promoter regions that might affect gene expression and influence the SCZ related pathways. Interestingly, DNA methylation of 136 and 23 known lncRNAs in Refseq database were identified in paranoid and undifferentiated SCZ, respectively. In addition, ∼20% of intergenic peaks annotated based on Refseq genes were overlapped with lncRNAs in UCSC and gencode databases. In order to show the results well for most biological researchers, we created an online database to display and visualize the information of DNA methyation peaks in both types of SCZ (http://www.bioinfo.org/scz/scz.htm). Our results showed that the aberrant DNA methylation of lncRNAs might be another important epigenetic factor for SCZ.",2014-12-11 +22973224,Automated regional behavioral analysis for human brain images.,"Behavioral categories of functional imaging experiments along with standardized brain coordinates of associated activations were used to develop a method to automate regional behavioral analysis of human brain images. Behavioral and coordinate data were taken from the BrainMap database (http://www.brainmap.org/), which documents over 20 years of published functional brain imaging studies. A brain region of interest (ROI) for behavioral analysis can be defined in functional images, anatomical images or brain atlases, if images are spatially normalized to MNI or Talairach standards. Results of behavioral analysis are presented for each of BrainMap's 51 behavioral sub-domains spanning five behavioral domains (Action, Cognition, Emotion, Interoception, and Perception). For each behavioral sub-domain the fraction of coordinates falling within the ROI was computed and compared with the fraction expected if coordinates for the behavior were not clustered, i.e., uniformly distributed. When the difference between these fractions is large behavioral association is indicated. A z-score ≥ 3.0 was used to designate statistically significant behavioral association. The left-right symmetry of ~100K activation foci was evaluated by hemisphere, lobe, and by behavioral sub-domain. Results highlighted the classic left-side dominance for language while asymmetry for most sub-domains (~75%) was not statistically significant. Use scenarios were presented for anatomical ROIs from the Harvard-Oxford cortical (HOC) brain atlas, functional ROIs from statistical parametric maps in a TMS-PET study, a task-based fMRI study, and ROIs from the ten ""major representative"" functional networks in a previously published resting state fMRI study. Statistically significant behavioral findings for these use scenarios were consistent with published behaviors for associated anatomical and functional regions.",2012-08-28 +26518129,The Leishmania metaphylome: a comprehensive survey of Leishmania protein phylogenetic relationships.,"

Background

Leishmaniasis is a neglected parasitic disease with diverse clinical manifestations and a complex epidemiology. It has been shown that its parasite-related traits vary between species and that they modulate infectivity, pathogenicity, and virulence. However, understanding of the species-specific adaptations responsible for these features and their evolutionary background is limited. To improve our knowledge regarding the parasite biology and adaptation mechanisms of different Leishmania species, we conducted a proteome-wide phylogenomic analysis to gain insights into Leishmania evolution.

Results

The analysis of the reconstructed phylomes (totaling 45,918 phylogenies) allowed us to detect genes that are shared in pathogenic Leishmania species, such as calpain-like cysteine peptidases and 3'a2rel-related proteins, or genes that could be associated with visceral or cutaneous development. This analysis also established the phylogenetic relationship of several hypothetical proteins whose roles remain to be characterized. Our findings demonstrated that gene duplication constitutes an important evolutionary force in Leishmania, acting on protein families that mediate host-parasite interactions, such as amastins, GP63 metallopeptidases, cathepsin L-like proteases, and our methods permitted a deeper analysis of their phylogenetic relationships.

Conclusions

Our results highlight the importance of proteome wide phylogenetic analyses to detect adaptation and evolutionary processes in different organisms and underscore the need to characterize the role of expanded and species-specific proteins in the context of Leishmania evolution by providing a framework for the phylogenetic relationships of Leishmania proteins. Phylogenomic data are publicly available for use through PhylomeDB (http://www.phylomedb.org).",2015-10-30 +25828799,ProSight Lite: graphical software to analyze top-down mass spectrometry data.,"Many top-down proteomics experiments focus on identifying and localizing PTMs and other potential sources of “mass shift” on a known protein sequence. A simple application to match ion masses and facilitate the iterative hypothesis testing of PTM presence and location would assist with the data analysis in these experiments. ProSight Lite is a free software tool for matching a single candidate sequence against a set of mass spectrometric observations. Fixed or variable modifications, including both PTMs and a select number of glycosylations, can be applied to the amino acid sequence. The application reports multiple scores and a matching fragment list. Fragmentation maps can be exported for publication in either portable network graphic (PNG) or scalable vector graphic (SVG) format. ProSight Lite can be freely downloaded from http://prosightlite.northwestern.edu, installs and updates from the web, and requires Windows 7 or a higher version.",2015-04-01 +25555998,Fiber estimation and tractography in diffusion MRI: development of simulated brain images and comparison of multi-fiber analysis methods at clinical b-values.,"Advances in diffusion-weighted magnetic resonance imaging (DW-MRI) have led to many alternative diffusion sampling strategies and analysis methodologies. A common objective among methods is estimation of white matter fiber orientations within each voxel, as doing so permits in-vivo fiber-tracking and the ability to study brain connectivity and networks. Knowledge of how DW-MRI sampling schemes affect fiber estimation accuracy, tractography and the ability to recover complex white-matter pathways, differences between results due to choice of analysis method, and which method(s) perform optimally for specific data sets, all remain important problems, especially as tractography-based studies become common. In this work, we begin to address these concerns by developing sets of simulated diffusion-weighted brain images which we then use to quantitatively evaluate the performance of six DW-MRI analysis methods in terms of estimated fiber orientation accuracy, false-positive (spurious) and false-negative (missing) fiber rates, and fiber-tracking. The analysis methods studied are: 1) a two-compartment ""ball and stick"" model (BSM) (Behrens et al., 2003); 2) a non-negativity constrained spherical deconvolution (CSD) approach (Tournier et al., 2007); 3) analytical q-ball imaging (QBI) (Descoteaux et al., 2007); 4) q-ball imaging with Funk-Radon and Cosine Transform (FRACT) (Haldar and Leahy, 2013); 5) q-ball imaging within constant solid angle (CSA) (Aganj et al., 2010); and 6) a generalized Fourier transform approach known as generalized q-sampling imaging (GQI) (Yeh et al., 2010). We investigate these methods using 20, 30, 40, 60, 90 and 120 evenly distributed q-space samples of a single shell, and focus on a signal-to-noise ratio (SNR = 18) and diffusion-weighting (b = 1000 s/mm(2)) common to clinical studies. We found that the BSM and CSD methods consistently yielded the least fiber orientation error and simultaneously greatest detection rate of fibers. Fiber detection rate was found to be the most distinguishing characteristic between the methods, and a significant factor for complete recovery of tractography through complex white-matter pathways. For example, while all methods recovered similar tractography of prominent white matter pathways of limited fiber crossing, CSD (which had the highest fiber detection rate, especially for voxels containing three fibers) recovered the greatest number of fibers and largest fraction of correct tractography for complex three-fiber crossing regions. The synthetic data sets, ground-truth, and tools for quantitative evaluation are publically available on the NITRC website as the project ""Simulated DW-MRI Brain Data Sets for Quantitative Evaluation of Estimated Fiber Orientations"" at http://www.nitrc.org/projects/sim_dwi_brain.",2014-12-30 +25491031,Integrating protein structural dynamics and evolutionary analysis with Bio3D.,"

Background

Popular bioinformatics approaches for studying protein functional dynamics include comparisons of crystallographic structures, molecular dynamics simulations and normal mode analysis. However, determining how observed displacements and predicted motions from these traditionally separate analyses relate to each other, as well as to the evolution of sequence, structure and function within large protein families, remains a considerable challenge. This is in part due to the general lack of tools that integrate information of molecular structure, dynamics and evolution.

Results

Here, we describe the integration of new methodologies for evolutionary sequence, structure and simulation analysis into the Bio3D package. This major update includes unique high-throughput normal mode analysis for examining and contrasting the dynamics of related proteins with non-identical sequences and structures, as well as new methods for quantifying dynamical couplings and their residue-wise dissection from correlation network analysis. These new methodologies are integrated with major biomolecular databases as well as established methods for evolutionary sequence and comparative structural analysis. New functionality for directly comparing results derived from normal modes, molecular dynamics and principal component analysis of heterogeneous experimental structure distributions is also included. We demonstrate these integrated capabilities with example applications to dihydrofolate reductase and heterotrimeric G-protein families along with a discussion of the mechanistic insight provided in each case.

Conclusions

The integration of structural dynamics and evolutionary analysis in Bio3D enables researchers to go beyond a prediction of single protein dynamics to investigate dynamical features across large protein families. The Bio3D package is distributed with full source code and extensive documentation as a platform independent R package under a GPL2 license from http://thegrantlab.org/bio3d/ .",2014-12-10 +25487439,An annotated database of Arabidopsis mutants of acyl lipid metabolism.,"

Key message

We have constructed and annotated a web-based database of over 280 Arabidopsis genes that have characterized mutants associated with Arabidopsis acyl lipid metabolism. Mutants have played a fundamental role in gene discovery and in understanding the function of genes involved in plant acyl lipid metabolism. The first mutant in Arabidopsis lipid metabolism (fad4) was described in 1985. Since that time, characterization of mutants in more than 280 genes associated with acyl lipid metabolism has been reported. This review provides a brief background and history on identification of mutants in acyl lipid metabolism, an analysis of the distribution of mutants in different areas of acyl lipid metabolism and presents an annotated database (ARALIPmutantDB) of these mutants. The database provides information on the phenotypes of mutants, pathways and enzymes/proteins associated with the mutants, and allows rapid access via hyperlinks to summaries of information about each mutant and to literature that provides information on the lipid composition of the mutants. In addition, the database of mutants is integrated within the ARALIP plant acyl lipid metabolism website ( http://aralip.plantbiology.msu.edu ) so that information on mutants is displayed on and can be accessed from metabolic pathway maps. Mutants for at least 30% of the genes in the database have multiple names, which have been compiled here to reduce ambiguities in searches for information. The database should also provide a tool for exploring the relationships between mutants in acyl lipid-related genes and their lipid phenotypes and point to opportunities for further research.",2014-12-10 +25143291,Poretools: a toolkit for analyzing nanopore sequence data.,"

Motivation

Nanopore sequencing may be the next disruptive technology in genomics, owing to its ability to detect single DNA molecules without prior amplification, lack of reliance on expensive optical components, and the ability to sequence long fragments. The MinION™ from Oxford Nanopore Technologies (ONT) is the first nanopore sequencer to be commercialized and is now available to early-access users. The MinION™ is a USB-connected, portable nanopore sequencer that permits real-time analysis of streaming event data. Currently, the research community lacks a standardized toolkit for the analysis of nanopore datasets.

Results

We introduce poretools, a flexible toolkit for exploring datasets generated by nanopore sequencing devices from MinION™ for the purposes of quality control and downstream analysis. Poretools operates directly on the native FAST5 (an application of the HDF5 standard) file format produced by ONT and provides a wealth of format conversion utilities and data exploration and visualization tools.

Availability and implementation

Poretools is an open-source software and is written in Python as both a suite of command line utilities and a Python application programming interface. Source code is freely available in Github at https://www.github.com/arq5x/poretools.",2014-08-20 +24931974,Methods for time series analysis of RNA-seq data with application to human Th17 cell differentiation.,"

Motivation

Gene expression profiling using RNA-seq is a powerful technique for screening RNA species' landscapes and their dynamics in an unbiased way. While several advanced methods exist for differential expression analysis of RNA-seq data, proper tools to anal.yze RNA-seq time-course have not been proposed.

Results

In this study, we use RNA-seq to measure gene expression during the early human T helper 17 (Th17) cell differentiation and T-: cell activation (Th0). To quantify Th17-: specific gene expression dynamics, we present a novel statistical methodology, DyNB, for analyzing time-course RNA-seq data. We use non-parametric Gaussian processes to model temporal correlation in gene expression and combine that with negative binomial likelihood for the count data. To account for experiment-: specific biases in gene expression dynamics, such as differences in cell differentiation efficiencies, we propose a method to rescale the dynamics between replicated measurements. We develop an MCMC sampling method to make inference of differential expression dynamics between conditions. DyNB identifies several known and novel genes involved in Th17 differentiation. Analysis of differentiation efficiencies revealed consistent patterns in gene expression dynamics between different cultures. We use qRT-PCR to validate differential expression and differentiation efficiencies for selected genes. Comparison of the results with those obtained via traditional timepoint-: wise analysis shows that time-course analysis together with time rescaling between cultures identifies differentially expressed genes which would not otherwise be detected.

Availability

An implementation of the proposed computational methods will be available at http://research.ics.aalto.fi/csb/software/",2014-06-01 +26484072,Genome-wide mapping of Painting of fourth on Drosophila melanogaster salivary gland polytene chromosomes.,"The protein Painting of fourth (POF) in Drosophila melanogaster specifically targets and stimulates expression output from the heterochromatic 4th chromosome, thereby representing an autosome specific protein [1,2]. Despite the high specificity for chromosome 4 genes, POF is occasionally observed binding to the cytological region 2L:31 in males and females [3] and two loci on the X-chromosome, PoX1 and PoX2 only in females [4]. Here we provide a detailed description of the experimental design and analysis of the tiling array data presented by Lundberg and colleagues in G3: Genes, Genomes, Genetics 2013 [4], where the female specific POF binding to PoX1 and PoX2 loci on the X chromosome was reported. We show the genome-wide high resolution binding profile of the POF protein where these different POF binding sites are detected. The complete data set is available at http://www.ncbi.nlm.nih.gov/geo/ (accession: GSE45402).",2014-04-28 +24848012,EvoCor: a platform for predicting functionally related genes using phylogenetic and expression profiles.,"The wealth of publicly available gene expression and genomic data provides unique opportunities for computational inference to discover groups of genes that function to control specific cellular processes. Such genes are likely to have co-evolved and be expressed in the same tissues and cells. Unfortunately, the expertise and computational resources required to compare tens of genomes and gene expression data sets make this type of analysis difficult for the average end-user. Here, we describe the implementation of a web server that predicts genes involved in affecting specific cellular processes together with a gene of interest. We termed the server 'EvoCor', to denote that it detects functional relationships among genes through evolutionary analysis and gene expression correlation. This web server integrates profiles of sequence divergence derived by a Hidden Markov Model (HMM) and tissue-wide gene expression patterns to determine putative functional linkages between pairs of genes. This server is easy to use and freely available at http://pilot-hmm.vbi.vt.edu/.",2014-05-21 +26382288,"Antiretroviral therapy and changing patterns of HIV stigmatisation in Entebbe, Uganda.","Antiretroviral therapy (ART) has the potential to change processes of HIV stigmatisation. In this article, changing processes of stigmatisation among a group of people living with HIV (PLWH) on ART in Wakiso District, Uganda, are analysed using qualitative data from a study of PLWH's self-management of HIV on ART. There were 38 respondents (20 women, 18 men) who had been taking ART for at least 1 year. They were purposefully selected from government and non-government ART providers. Two in-depth interviews were held with each participant. Processes of reduced self-stigmatisation were clearly evident, caused by the recovery of their physical appearance and support from health workers. However most participants continued to conceal their status because they anticipated stigma; for example, they feared gossip, rejection and their status being used against them. Anticipated stigma was gendered: women expressed greater fear of enacted forms of stigma such as rejection by their partner; in contrast men's fears focused on gossip, loss of dignity and self-stigmatisation. The evidence indicates that ART has not reduced underlying structural drivers of stigmatisation, notably gender identities and inequalities, and that interventions are still required to mitigate and tackle stigmatisation, such as counselling, peer-led education and support groups that can help PLWH reconstruct alternative and more positive identities. A video abstract of this article can be found at: https://youtu.be/WtIaZJQ3Y_8.",2015-09-18 +24250217,Prediction of protein essentiality by the support vector machine with statistical tests.,"Essential proteins include the minimum required set of proteins to support cell life. Identifying essential proteins is important for understanding the cellular processes of an organism. However, identifying essential proteins experimentally is extremely time-consuming and labor-intensive. Alternative methods must be developed to examine essential proteins. There were two goals in this study: identifying the important features and building learning machines for discriminating essential proteins. Data for Saccharomyces cerevisiae and Escherichia coli were used. We first collected information from a variety of sources. We next proposed a modified backward feature selection method and build support vector machines (SVM) predictors based on the selected features. To evaluate the performance, we conducted cross-validations for the originally imbalanced data set and the down-sampling balanced data set. The statistical tests were applied on the performance associated with obtained feature subsets to confirm their significance. In the first data set, our best values of F-measure and Matthews correlation coefficient (MCC) were 0.549 and 0.495 in the imbalanced experiments. For the balanced experiment, the best values of F-measure and MCC were 0.770 and 0.545, respectively. In the second data set, our best values of F-measure and MCC were 0.421 and 0.407 in the imbalanced experiments. For the balanced experiment, the best values of F-measure and MCC were 0.718 and 0.448, respectively. The experimental results show that our selected features are compact and the performance improved. Prediction can also be conducted by users at the following internet address: http://bio2.cse.nsysu.edu.tw/esspredict.aspx.",2013-10-03 +23920668,Exposing public health surveillance data using existing standards.,"With the growing use of information technologies, an increased volume of data is produced in Public Health Surveillance, enabling utilization of new data sources and analysis methods. Public health and research will benefit from the use of data standards promoting harmonization and data description through metadata. No data standard has yet been universally accepted for exchanging public health data. In this work, we implemented two existing standards eligible to expose public health data: Statistical Data and Metadata Exchange - Health Domain (SDMX-HD) proposed by the World Health Organization and Open Data Protocol (OData) proposed by Microsoft Corp. SDMX-HD promotes harmonization through controlled vocabulary and predefined data structure suitable for public health but requires important investment, while OData, a generic purpose standard, proposes a simple way to expose data with minimal documentation and end-user integration tools. The two solutions were implemented and are publicly available at http://sdmx.sentiweb.fr and http://odata.sentiweb.fr. These solutions show that data sharing and interoperability are already possible in Public Health Surveillance.",2013-01-01 +22618878,PocketAnnotate: towards site-based function annotation.,"A computational pipeline PocketAnnotate for functional annotation of proteins at the level of binding sites has been proposed in this study. The pipeline integrates three in-house algorithms for site-based function annotation: PocketDepth, for prediction of binding sites in protein structures; PocketMatch, for rapid comparison of binding sites and PocketAlign, to obtain detailed alignment between pair of binding sites. A novel scheme has been developed to rapidly generate a database of non-redundant binding sites. For a given input protein structure, putative ligand-binding sites are identified, matched in real time against the database and the query substructure aligned with the promising hits, to obtain a set of possible ligands that the given protein could bind to. The input can be either whole protein structures or merely the substructures corresponding to possible binding sites. Structure-based function annotation at the level of binding sites thus achieved could prove very useful for cases where no obvious functional inference can be obtained based purely on sequence or fold-level analyses. An attempt has also been made to analyse proteins of no known function from Protein Data Bank. PocketAnnotate would be a valuable tool for the scientific community and contribute towards structure-based functional inference. The web server can be freely accessed at http://proline.biochem.iisc.ernet.in/pocketannotate/.",2012-05-22 +24565220,Missing value imputation for microarray data: a comprehensive comparison study and a web tool.,"

Background

Microarray data are usually peppered with missing values due to various reasons. However, most of the downstream analyses for microarray data require complete datasets. Therefore, accurate algorithms for missing value estimation are needed for improving the performance of microarray data analyses. Although many algorithms have been developed, there are many debates on the selection of the optimal algorithm. The studies about the performance comparison of different algorithms are still incomprehensive, especially in the number of benchmark datasets used, the number of algorithms compared, the rounds of simulation conducted, and the performance measures used.

Results

In this paper, we performed a comprehensive comparison by using (I) thirteen datasets, (II) nine algorithms, (III) 110 independent runs of simulation, and (IV) three types of measures to evaluate the performance of each imputation algorithm fairly. First, the effects of different types of microarray datasets on the performance of each imputation algorithm were evaluated. Second, we discussed whether the datasets from different species have different impact on the performance of different algorithms. To assess the performance of each algorithm fairly, all evaluations were performed using three types of measures. Our results indicate that the performance of an imputation algorithm mainly depends on the type of a dataset but not on the species where the samples come from. In addition to the statistical measure, two other measures with biological meanings are useful to reflect the impact of missing value imputation on the downstream data analyses. Our study suggests that local-least-squares-based methods are good choices to handle missing values for most of the microarray datasets.

Conclusions

In this work, we carried out a comprehensive comparison of the algorithms for microarray missing value imputation. Based on such a comprehensive comparison, researchers could choose the optimal algorithm for their datasets easily. Moreover, new imputation algorithms could be compared with the existing algorithms using this comparison strategy as a standard protocol. In addition, to assist researchers in dealing with missing values easily, we built a web-based and easy-to-use imputation tool, MissVIA (http://cosbi.ee.ncku.edu.tw/MissVIA), which supports many imputation algorithms. Once users upload a real microarray dataset and choose the imputation algorithms, MissVIA will determine the optimal algorithm for the users' data through a series of simulations, and then the imputed results can be downloaded for the downstream data analyses.",2013-12-13 +24330401,iMir: an integrated pipeline for high-throughput analysis of small non-coding RNA data obtained by smallRNA-Seq.,"

Background

Qualitative and quantitative analysis of small non-coding RNAs by next generation sequencing (smallRNA-Seq) represents a novel technology increasingly used to investigate with high sensitivity and specificity RNA population comprising microRNAs and other regulatory small transcripts. Analysis of smallRNA-Seq data to gather biologically relevant information, i.e. detection and differential expression analysis of known and novel non-coding RNAs, target prediction, etc., requires implementation of multiple statistical and bioinformatics tools from different sources, each focusing on a specific step of the analysis pipeline. As a consequence, the analytical workflow is slowed down by the need for continuous interventions by the operator, a critical factor when large numbers of datasets need to be analyzed at once.

Results

We designed a novel modular pipeline (iMir) for comprehensive analysis of smallRNA-Seq data, comprising specific tools for adapter trimming, quality filtering, differential expression analysis, biological target prediction and other useful options by integrating multiple open source modules and resources in an automated workflow. As statistics is crucial in deep-sequencing data analysis, we devised and integrated in iMir tools based on different statistical approaches to allow the operator to analyze data rigorously. The pipeline created here proved to be efficient and time-saving than currently available methods and, in addition, flexible enough to allow the user to select the preferred combination of analytical steps. We present here the results obtained by applying this pipeline to analyze simultaneously 6 smallRNA-Seq datasets from either exponentially growing or growth-arrested human breast cancer MCF-7 cells, that led to the rapid and accurate identification, quantitation and differential expression analysis of ~450 miRNAs, including several novel miRNAs and isomiRs, as well as identification of the putative mRNA targets of differentially expressed miRNAs. In addition, iMir allowed also the identification of ~70 piRNAs (piwi-interacting RNAs), some of which differentially expressed in proliferating vs growth arrested cells.

Conclusion

The integrated data analysis pipeline described here is based on a reliable, flexible and fully automated workflow, useful to rapidly and efficiently analyze high-throughput smallRNA-Seq data, such as those produced by the most recent high-performance next generation sequencers. iMir is available at http://www.labmedmolge.unisa.it/inglese/research/imir.",2013-12-13 +26379232,EEGNET: An Open Source Tool for Analyzing and Visualizing M/EEG Connectome.,"The brain is a large-scale complex network often referred to as the ""connectome"". Exploring the dynamic behavior of the connectome is a challenging issue as both excellent time and space resolution is required. In this context Magneto/Electroencephalography (M/EEG) are effective neuroimaging techniques allowing for analysis of the dynamics of functional brain networks at scalp level and/or at reconstructed sources. However, a tool that can cover all the processing steps of identifying brain networks from M/EEG data is still missing. In this paper, we report a novel software package, called EEGNET, running under MATLAB (Math works, inc), and allowing for analysis and visualization of functional brain networks from M/EEG recordings. EEGNET is developed to analyze networks either at the level of scalp electrodes or at the level of reconstructed cortical sources. It includes i) Basic steps in preprocessing M/EEG signals, ii) the solution of the inverse problem to localize / reconstruct the cortical sources, iii) the computation of functional connectivity among signals collected at surface electrodes or/and time courses of reconstructed sources and iv) the computation of the network measures based on graph theory analysis. EEGNET is the unique tool that combines the M/EEG functional connectivity analysis and the computation of network measures derived from the graph theory. The first version of EEGNET is easy to use, flexible and user friendly. EEGNET is an open source tool and can be freely downloaded from this webpage: https://sites.google.com/site/eegnetworks/.",2015-09-17 +25522231,funRNA: a fungi-centered genomics platform for genes encoding key components of RNAi.,"

Background

RNA interference (RNAi) is involved in genome defense as well as diverse cellular, developmental, and physiological processes. Key components of RNAi are Argonaute, Dicer, and RNA-dependent RNA polymerase (RdRP), which have been functionally characterized mainly in model organisms. The key components are believed to exist throughout eukaryotes; however, there is no systematic platform for archiving and dissecting these important gene families. In addition, few fungi have been studied to date, limiting our understanding of RNAi in fungi. Here we present funRNA http://funrna.riceblast.snu.ac.kr/, a fungal kingdom-wide comparative genomics platform for putative genes encoding Argonaute, Dicer, and RdRP.

Description

To identify and archive genes encoding the abovementioned key components, protein domain profiles were determined from reference sequences obtained from UniProtKB/SwissProt. The domain profiles were searched using fungal, metazoan, and plant genomes, as well as bacterial and archaeal genomes. 1,163, 442, and 678 genes encoding Argonaute, Dicer, and RdRP, respectively, were predicted. Based on the identification results, active site variation of Argonaute, diversification of Dicer, and sequence analysis of RdRP were discussed in a fungus-oriented manner. funRNA provides results from diverse bioinformatics programs and job submission forms for BLAST, BLASTMatrix, and ClustalW. Furthermore, sequence collections created in funRNA are synced with several gene family analysis portals and databases, offering further analysis opportunities.

Conclusions

funRNA provides identification results from a broad taxonomic range and diverse analysis functions, and could be used in diverse comparative and evolutionary studies. It could serve as a versatile genomics workbench for key components of RNAi.",2014-12-08 +26513779,Recognizing Focal Liver Lesions in CEUS With Dynamically Trained Latent Structured Models.,"This work investigates how to automatically classify Focal Liver Lesions (FLLs) into three specific benign or malignant types in Contrast-Enhanced Ultrasound (CEUS) videos, and aims at providing a computational framework to assist clinicians in FLL diagnosis. The main challenge for this task is that FLLs in CEUS videos often show diverse enhancement patterns at different temporal phases. To handle these diverse patterns, we propose a novel structured model, which detects a number of discriminative Regions of Interest (ROIs) for the FLL and recognize the FLL based on these ROIs. Our model incorporates an ensemble of local classifiers in the attempt to identify different enhancement patterns of ROIs, and in particular, we make the model reconfigurable by introducing switch variables to adaptively select appropriate classifiers during inference. We formulate the model learning as a non-convex optimization problem, and present a principled optimization method to solve it in a dynamic manner: the latent structures (e.g. the selections of local classifiers, and the sizes and locations of ROIs) are iteratively determined along with the parameter learning. Given the updated model parameters in each step, the data-driven inference is also proposed to efficiently determine the latent structures by using the sequential pruning and dynamic programming method. In the experiments, we demonstrate superior performances over the state-of-the-art approaches. We also release hundreds of CEUS FLLs videos used to quantitatively evaluate this work, which to the best of our knowledge forms the largest dataset in the literature. Please find more information at ""http://vision.sysu.edu.cn/projects/fllrecog/"".",2015-10-26 +21333001,Southeast Asian diversity: first insights into the complex mtDNA structure of Laos.,"

Background

Vast migrations and subsequent assimilation processes have shaped the genetic composition of Southeast Asia, an area of close contact between several major ethnic groups. To better characterize the genetic variation of this region, we analyzed the entire mtDNA control region of 214 unrelated donors from Laos according to highest forensic quality standards. To detail the phylogeny, we inspected selected SNPs from the mtDNA coding region. For a posteriori data quality control, quasi-median network constructions and autosomal STR typing were performed. In order to describe the mtDNA setup of Laos more thoroughly, the data were subjected to population genetic comparisons with 16 East Asian groups.

Results

The Laos sample exhibited ample mtDNA diversity, reflecting the huge number of ethnic groups listed. We found several new, so far undescribed mtDNA lineages in this dataset and surrounding populations. The Laos population was characteristic in terms of haplotype composition and genetic structure, however, genetic comparisons with other Southeast Asian populations revealed limited, but significant genetic differentiation. Notable differences in the maternal relationship to the major indigenous Southeast Asian ethnolinguistic groups were detected.

Conclusions

In this study, we portray the great mtDNA variety of Laos for the first time. Our findings will contribute to clarify the migration history of the region. They encourage setting up regional and subpopulation databases, especially for forensic applications. The Laotian sequences will be incorporated into the collaborative EMPOP mtDNA database http://www.empop.org upon publication and will be available as the first mtDNA reference data for this country.",2011-02-18 +24039560,"Genome-wide signatures of transcription factor activity: connecting transcription factors, disease, and small molecules.","Identifying transcription factors (TF) involved in producing a genome-wide transcriptional profile is an essential step in building mechanistic model that can explain observed gene expression data. We developed a statistical framework for constructing genome-wide signatures of TF activity, and for using such signatures in the analysis of gene expression data produced by complex transcriptional regulatory programs. Our framework integrates ChIP-seq data and appropriately matched gene expression profiles to identify True REGulatory (TREG) TF-gene interactions. It provides genome-wide quantification of the likelihood of regulatory TF-gene interaction that can be used to either identify regulated genes, or as genome-wide signature of TF activity. To effectively use ChIP-seq data, we introduce a novel statistical model that integrates information from all binding ""peaks"" within 2 Mb window around a gene's transcription start site (TSS), and provides gene-level binding scores and probabilities of regulatory interaction. In the second step we integrate these binding scores and regulatory probabilities with gene expression data to assess the likelihood of True REGulatory (TREG) TF-gene interactions. We demonstrate the advantages of TREG framework in identifying genes regulated by two TFs with widely different distribution of functional binding events (ERα and E2f1). We also show that TREG signatures of TF activity vastly improve our ability to detect involvement of ERα in producing complex diseases-related transcriptional profiles. Through a large study of disease-related transcriptional signatures and transcriptional signatures of drug activity, we demonstrate that increase in statistical power associated with the use of TREG signatures makes the crucial difference in identifying key targets for treatment, and drugs to use for treatment. All methods are implemented in an open-source R package treg. The package also contains all data used in the analysis including 494 TREG binding profiles based on ENCODE ChIP-seq data. The treg package can be downloaded at http://GenomicsPortals.org.",2013-09-05 +25236784,Distinguishing between driver and passenger mutations in individual cancer genomes by network enrichment analysis.,"

Background

In somatic cancer genomes, delineating genuine driver mutations against a background of multiple passenger events is a challenging task. The difficulty of determining function from sequence data and the low frequency of mutations are increasingly hindering the search for novel, less common cancer drivers. The accumulation of extensive amounts of data on somatic point and copy number alterations necessitates the development of systematic methods for driver mutation analysis.

Results

We introduce a framework for detecting driver mutations via functional network analysis, which is applied to individual genomes and does not require pooling multiple samples. It probabilistically evaluates 1) functional network links between different mutations in the same genome and 2) links between individual mutations and known cancer pathways. In addition, it can employ correlations of mutation patterns in pairs of genes. The method was used to analyze genomic alterations in two TCGA datasets, one for glioblastoma multiforme and another for ovarian carcinoma, which were generated using different approaches to mutation profiling. The proportions of drivers among the reported de novo point mutations in these cancers were estimated to be 57.8% and 16.8%, respectively. The both sets also included extended chromosomal regions with synchronous duplications or losses of multiple genes. We identified putative copy number driver events within many such segments. Finally, we summarized seemingly disparate mutations and discovered a functional network of collagen modifications in the glioblastoma. In order to select the most efficient network for use with this method, we used a novel, ROC curve-based procedure for benchmarking different network versions by their ability to recover pathway membership.

Conclusions

The results of our network-based procedure were in good agreement with published gold standard sets of cancer genes and were shown to complement and expand frequency-based driver analyses. On the other hand, three sequence-based methods applied to the same data yielded poor agreement with each other and with our results. We review the difference in driver proportions discovered by different sequencing approaches and discuss the functional roles of novel driver mutations. The software used in this work and the global network of functional couplings are publicly available at http://research.scilifelab.se/andrej_alexeyenko/downloads.html.",2014-09-19 +26720473,Concordance of Macular Pigment Measurement Using Customized Heterochromatic Flicker Photometry and Fundus Autofluorescence in Age-Related Macular Degeneration.,"

Purpose

We compared macular pigment (MP) measurements using customized heterochromatic flicker photometry (Macular Metrics Densitometer) and dual-wavelength fundus autofluorescence (Heidelberg Spectralis HRA + OCT MultiColor) in subjects with early age-related macular degeneration (AMD).

Methods

Macular pigment was measured in 117 subjects with early AMD (age, 44-88 years) using the Densitometer and Spectralis, as part of the Central Retinal Enrichment Supplementation Trial (CREST; ISRCTN13894787). Baseline and 6-month study visits data were used for the analyses. Agreement was investigated at four different retinal eccentricities, graphically and using indices of agreement, including Pearson correlation coefficient (precision), accuracy coefficient, and concordance correlation coefficient (ccc).

Results

Agreement was poor between the Densitometer and Spectralis at all eccentricities, at baseline (e.g., at 0.25° eccentricity, accuracy = 0.63, precision = 0.35, ccc = 0.22) and at 6 months (e.g., at 0.25° eccentricity, accuracy = 0.52, precision = 0.43, ccc = 0.22). Agreement between the two devices was significantly greater for males at 0.5° and 1.0° of eccentricity. At all eccentricities, agreement was unaffected by cataract grade.

Conclusions

In subjects with early AMD, MP measurements obtained using the Densitometer and Spectralis are not statistically comparable and should not be used interchangeably in either the clinical or research setting. Despite this lack of agreement, statistically significant increases in MP, following 6 months of supplementation with macular carotenoids, were detected with each device, confirming that these devices are capable of measuring change in MP within subjects over time. (http://www.controlled-trials.com number, ISRCTN13894787.).",2015-12-01 +26398851,Effect of aliskiren on vascular remodelling in small retinal circulation.,"

Background

In hypertension, changes in small arterial structure are characterized by an increased wall-to-lumen ratio (WLR). These adaptive processes are modulated by the rennin-angiotensin system. It is unclear whether direct renin inhibitors exert protective effects on small arteries in hypertensive patients.

Methods

In this double-blind, randomized, placebo-controlled study (http://www.clinicaltrials.gov: NCT01318395), 114 patients with primary hypertension were randomized to additional therapy with either placebo or aliskiren 300 mg for 8 weeks after 4 weeks of standardized open-label treatment with valsartan 320 mg (run-in phase). Parameter of arteriolar remodelling was WLR of retinal arterioles (80 - 140 μm) assessed noninvasively and in vivo by scanning laser Doppler flowmetry (Heidelberg Engineering, Germany). In addition, pulse wave analysis (SphygmoCor, AtCor Medical, Australia) and pulse pressure (PP) amplification were determined.

Results

In the whole study population, no clear effect of additional therapy with aliskiren on vascular parameters was documented. When analyses were restricted to patients with vascular remodelling, defined by a median of WLR more than 0.3326 (n = 57), WLR was reduced after 8 weeks by the treatment with aliskiren compared with placebo (-0.044 ± 0.07 versus 0.0043 ± 0.07, P = 0.015). Consistently, after 8 weeks of on-top treatment with aliskiren, there was an improvement of PP amplification compared with placebo (0.025 ± 0.07 versus -0.034 ± 0.08, P = 0.013), indicative of less stiff arteries in the peripheral circulation.

Conclusion

Thus, our data indicate that treatment with aliskiren, given on top of valsartan therapy, improves altered vascular remodelling in hypertensive patients.",2015-12-01 +22805427,Detecting pore-lining regions in transmembrane protein sequences.,"

Background

Alpha-helical transmembrane channel and transporter proteins play vital roles in a diverse range of essential biological processes and are crucial in facilitating the passage of ions and molecules across the lipid bilayer. However, the experimental difficulties associated with obtaining high quality crystals has led to their significant under-representation in structural databases. Computational methods that can identify structural features from sequence alone are therefore of high importance.

Results

We present a method capable of automatically identifying pore-lining regions in transmembrane proteins from sequence information alone, which can then be used to determine the pore stoichiometry. By labelling pore-lining residues in crystal structures using geometric criteria, we have trained a support vector machine classifier to predict the likelihood of a transmembrane helix being involved in pore formation. Results from testing this approach under stringent cross-validation indicate that prediction accuracy of 72% is possible, while a support vector regression model is able to predict the number of subunits participating in the pore with 62% accuracy.

Conclusion

To our knowledge, this is the first tool capable of identifying pore-lining regions in proteins and we present the results of applying it to a data set of sequences with available crystal structures. Our method provides a way to characterise pores in transmembrane proteins and may even provide a starting point for discovering novel routes of therapeutic intervention in a number of important diseases. This software is freely available as source code from: http://bioinf.cs.ucl.ac.uk/downloads/memsat-svm/.",2012-07-17 +22355226,A protein short motif search tool using amino acid sequence and their secondary structure assignment.,"

Unlabelled

We present the development of a web server, a protein short motif search tool that allows users to simultaneously search for a protein sequence motif and its secondary structure assignments. The web server is able to query very short motifs searches against PDB structural data from the RCSB Protein Databank, with the users defining the type of secondary structures of the amino acids in the sequence motif. The output utilises 3D visualisation ability that highlights the position of the motif in the structure and on the corresponding sequence. Researchers can easily observe the locations and conformation of multiple motifs among the results. Protein short motif search also has an application programming interface (API) for interfacing with other bioinformatics tools.

Availability

The database is available for free at http://birg3.fbb.utm.my/proteinsms.",2011-11-20 +21383909,Glaucoma database.,"

Unlabelled

Glaucoma, a complex heterogenous disease, is the leading cause for optic nerve-related blindness worldwide. Primary open angle glaucoma (POAG) is the most common subset and by the year 2020 it is estimated that approximately 60 million people will be affected. MYOC, OPTN, CYP1B1 and WDR36 are the important candidate genes. Nearly 4% of the glaucoma patients have mutation in any one of these genes. Mutation in any of these genes causes disease either directly or indirectly and the severity of the disease varies according to position of the genes. We have compiled all the related mutations and SNPs in the above genes and developed a database, to help access statistical and clinical information of particular mutation. This database is available online at http:bicmku.in:8081/glaucoma The database, constructed using SQL, contains data pertaining to the SNPs and mutation information involved in the above genes and relevant study data.

Availability

The database is available for free at http:bicmku.in:8081/glaucoma.",2011-02-07 +26046983,Elemental Constituents of Particulate Matter and Newborn's Size in Eight European Cohorts.,"

Background

The health effects of suspended particulate matter (PM) may depend on its chemical composition. Associations between maternal exposure to chemical constituents of PM and newborn's size have been little examined.

Objective

We aimed to investigate the associations of exposure to elemental constituents of PM with term low birth weight (LBW; weight < 2,500 g among births after 37 weeks of gestation), mean birth weight, and head circumference, relying on standardized fine-scale exposure assessment and with extensive control for potential confounders.

Methods

We pooled data from eight European cohorts comprising 34,923 singleton births in 1994-2008. Annual average concentrations of elemental constituents of PM ≤ 2.5 and ≤ 10 μm (PM2.5 and PM10) at maternal home addresses during pregnancy were estimated using land-use regression models. Adjusted associations between each birth measurement and concentrations of eight elements (copper, iron, potassium, nickel, sulfur, silicon, vanadium, and zinc) were calculated using random-effects regression on pooled data.

Results

A 200-ng/m3 increase in sulfur in PM2.5 was associated with an increased risk of LBW (adjusted odds ratio = 1.36; 95% confidence interval: 1.17, 1.58). Increased nickel and zinc in PM2.5 concentrations were also associated with an increased risk of LBW. Head circumference was reduced at higher exposure to all elements except potassium. All associations with sulfur were most robust to adjustment for PM2.5 mass concentration. All results were similar for PM10.

Conclusion

Sulfur, reflecting secondary combustion particles in this study, may adversely affect LBW and head circumference, independently of particle mass.

Citation

Pedersen M, Gehring U, Beelen R, Wang M, Giorgis-Allemand L, Andersen AM, Basagaña X, Bernard C, Cirach M, Forastiere F, de Hoogh K, Gražulevičienė R, Gruzieva O, Hoek G, Jedynska A, Klümper C, Kooter IM, Krämer U, Kukkonen J, Porta D, Postma DS, Raaschou-Nielsen O, van Rossem L, Sunyer J, Sørensen M, Tsai MY, Vrijkotte TG, Wilhelm M, Nieuwenhuijsen MJ, Pershagen G, Brunekreef B, Kogevinas M, Slama R. 2016. Elemental constituents of particulate matter and newborn's size in eight European cohorts. Environ Health Perspect 124:141-150; http://dx.doi.org/10.1289/ehp.1409546.",2015-06-05 +22267904,SAFE Software and FED Database to Uncover Protein-Protein Interactions using Gene Fusion Analysis.,"Domain Fusion Analysis takes advantage of the fact that certain proteins in a given proteome A, are found to have statistically significant similarity with two separate proteins in another proteome B. In other words, the result of a fusion event between two separate proteins in proteome B is a specific full-length protein in proteome A. In such a case, it can be safely concluded that the protein pair has a common biological function or even interacts physically. In this paper, we present the Fusion Events Database (FED), a database for the maintenance and retrieval of fusion data both in prokaryotic and eukaryotic organisms and the Software for the Analysis of Fusion Events (SAFE), a computational platform implemented for the automated detection, filtering and visualization of fusion events (both available at: http://www.bioacademy.gr/bioinformatics/projects/ProteinFusion/index.htm). Finally, we analyze the proteomes of three microorganisms using these tools in order to demonstrate their functionality.",2011-12-18 +24271385,"NGSmethDB: an updated genome resource for high quality, single-cytosine resolution methylomes.","The updated release of 'NGSmethDB' (http://bioinfo2.ugr.es/NGSmethDB) is a repository for single-base whole-genome methylome maps for the best-assembled eukaryotic genomes. Short-read data sets from NGS bisulfite-sequencing projects of cell lines, fresh and pathological tissues are first pre-processed and aligned to the corresponding reference genome, and then the cytosine methylation levels are profiled. One major improvement is the application of a unique bioinformatics protocol to all data sets, thereby assuring the comparability of all values with each other. We implemented stringent quality controls to minimize important error sources, such as sequencing errors, bisulfite failures, clonal reads or single nucleotide variants (SNVs). This leads to reliable and high-quality methylomes, all obtained under uniform settings. Another significant improvement is the detection in parallel of SNVs, which might be crucial for many downstream analyses (e.g. SNVs and differential-methylation relationships). A next-generation methylation browser allows fast and smooth scrolling and zooming, thus speeding data download/upload, at the same time requiring fewer server resources. Several data mining tools allow the comparison/retrieval of methylation levels in different tissues or genome regions. NGSmethDB methylomes are also available as native tracks through a UCSC hub, which allows comparison with a wide range of third-party annotations, in particular phenotype or disease annotations.",2013-11-22 +24753485,MRMPROBS suite for metabolomics using large-scale MRM assays.,"

Unlabelled

We developed new software environment for the metabolome analysis of large-scale multiple reaction monitoring (MRM) assays. It supports the data format of four major mass spectrometer vendors and mzML common data format. This program provides a process pipeline from the raw-format import to high-dimensional statistical analyses. The novel aspect is graphical user interface-based visualization to perform peak quantification, to interpolate missing values and to normalize peaks interactively based on quality control samples. Together with the software platform, the MRM standard library of 301 metabolites with 775 transitions is also available, which contributes to the reliable peak identification by using retention time and ion abundances.

Availability and implementation

MRMPROBS is available for Windows OS under the creative-commons by-attribution license at http://prime.psc.riken.jp.",2014-04-20 +24833805,RNASeqExpressionBrowser--a web interface to browse and visualize high-throughput expression data.,"

Motivation

RNA-seq techniques generate massive amounts of expression data. Several pipelines (e.g. Tophat and Cufflinks) are broadly applied to analyse these datasets. However, accessing and handling the analytical output remain challenging for non-experts.

Results

We present the RNASeqExpressionBrowser, an open-source web interface that can be used to access the output from RNA-seq expression analysis packages in different ways, as it allows browsing for genes by identifiers, annotations or sequence similarity. Gene expression information can be loaded as long as it is represented in a matrix-like format. Additionally, data can be made available by setting up the tool on a public server. For demonstration purposes, we have set up a version providing expression information from the barley genome.

Availability and implementation

The source code and a show case are accessible at http://mips.helmholtz-muenchen.de/plant/RNASeqExpressionBrowser/.",2014-05-14 +24646301,ShatterProof: operational detection and quantification of chromothripsis.,"

Background

Chromothripsis, a newly discovered type of complex genomic rearrangement, has been implicated in the evolution of several types of cancers. To date, it has been described in bone cancer, SHH-medulloblastoma and acute myeloid leukemia, amongst others, however there are still no formal or automated methods for detecting or annotating it in high throughput sequencing data. As such, findings of chromothripsis are difficult to compare and many cases likely escape detection altogether.

Results

We introduce ShatterProof, a software tool for detecting and quantifying chromothriptic events. ShatterProof takes structural variation calls (translocations, copy-number variations, short insertions and loss of heterozygosity) produced by any algorithm and using an operational definition of chromothripsis performs robust statistical tests to accurately predict the presence and location of chromothriptic events. Validation of our tool was conducted using clinical data sets including matched normal, prostate cancer samples in addition to the colorectal cancer and SCLC data sets used in the original description of chromothripsis.

Conclusions

ShatterProof is computationally efficient, having low memory requirements and near linear computation time. This allows it to become a standard component of sequencing analysis pipelines, enabling researchers to routinely and accurately assess samples for chromothripsis. Source code and documentation can be found at http://search.cpan.org/~sgovind/Shatterproof.",2014-03-19 +25411919,"Characteristics of residents living in residential care communities, by community bed size: United States, 2012.","In 2012, there was a higher percentage of older, female residents in communities with more than 25 beds compared with communities with 4–25 beds. Residents in communities with 4–25 beds were more racially diverse than residents in larger communities. The percentage of Medicaid beneficiaries was higher in communities with 4–25 beds than it was in communities with 26–50 and more than 50 beds. A higher percentage of residents living in communities with 4–25 beds had a diagnosis of Alzheimer’s disease or other dementias compared with residents in larger communities. Need for assistance with each of the activities of daily living (ADLs) examined (except walking or locomotion) was substantially higher among residents in communities with 4–25 beds, compared with residents in larger communities. Emergency department visits and discharges from an overnight hospital stay in a 90-day period did not vary across residents by community bed size. This report presents national estimates of residents living in residential care, using data from the first wave of NSLTCP. This brief profile of residential care residents provides useful information to policymakers, providers, researchers, and consumer advocates as they plan to meet the needs of an aging population. The findings also highlight the diversity of residents across the different sizes of residential care communities. Corresponding state estimates and their standard errors for the national figures in this data brief can be found on the NSLTCP website, available from: http://www.cdc.gov/nchs/nsltcp/nsltcp_products.htm. These national and state estimates establish a baseline for monitoring trends among residents living in residential care.",2014-11-01 +26831696,Evaluating the Quantitative Capabilities of Metagenomic Analysis Software.,"DNA sequencing technologies are applied widely and frequently today to describe metagenomes, i.e., microbial communities in environmental or clinical samples, without the need for culturing them. These technologies usually return short (100-300 base-pairs long) DNA reads, and these reads are processed by metagenomic analysis software that assign phylogenetic composition-information to the dataset. Here we evaluate three metagenomic analysis software (AmphoraNet--a webserver implementation of AMPHORA2--, MG-RAST, and MEGAN5) for their capabilities of assigning quantitative phylogenetic information for the data, describing the frequency of appearance of the microorganisms of the same taxa in the sample. The difficulties of the task arise from the fact that longer genomes produce more reads from the same organism than shorter genomes, and some software assign higher frequencies to species with longer genomes than to those with shorter ones. This phenomenon is called the ""genome length bias."" Dozens of complex artificial metagenome benchmarks can be found in the literature. Because of the complexity of those benchmarks, it is usually difficult to judge the resistance of a metagenomic software to this ""genome length bias."" Therefore, we have made a simple benchmark for the evaluation of the ""taxon-counting"" in a metagenomic sample: we have taken the same number of copies of three full bacterial genomes of different lengths, break them up randomly to short reads of average length of 150 bp, and mixed the reads, creating our simple benchmark. Because of its simplicity, the benchmark is not supposed to serve as a mock metagenome, but if a software fails on that simple task, it will surely fail on most real metagenomes. We applied three software for the benchmark. The ideal quantitative solution would assign the same proportion to the three bacterial taxa. We have found that AMPHORA2/AmphoraNet gave the most accurate results and the other two software were under-performers: they counted quite reliably each short read to their respective taxon, producing the typical genome length bias. The benchmark dataset is available at http://pitgroup.org/static/3RandomGenome-100kavg150bps.fna.",2016-01-30 +24795618,libNeuroML and PyLEMS: using Python to combine procedural and declarative modeling approaches in computational neuroscience.,"NeuroML is an XML-based model description language, which provides a powerful common data format for defining and exchanging models of neurons and neuronal networks. In the latest version of NeuroML, the structure and behavior of ion channel, synapse, cell, and network model descriptions are based on underlying definitions provided in LEMS, a domain-independent language for expressing hierarchical mathematical models of physical entities. While declarative approaches for describing models have led to greater exchange of model elements among software tools in computational neuroscience, a frequent criticism of XML-based languages is that they are difficult to work with directly. Here we describe two Application Programming Interfaces (APIs) written in Python (http://www.python.org), which simplify the process of developing and modifying models expressed in NeuroML and LEMS. The libNeuroML API provides a Python object model with a direct mapping to all NeuroML concepts defined by the NeuroML Schema, which facilitates reading and writing the XML equivalents. In addition, it offers a memory-efficient, array-based internal representation, which is useful for handling large-scale connectomics data. The libNeuroML API also includes support for performing common operations that are required when working with NeuroML documents. Access to the LEMS data model is provided by the PyLEMS API, which provides a Python implementation of the LEMS language, including the ability to simulate most models expressed in LEMS. Together, libNeuroML and PyLEMS provide a comprehensive solution for interacting with NeuroML models in a Python environment.",2014-04-23 +26615194,GWASdb v2: an update database for human genetic variants identified by genome-wide association studies.,"Genome-wide association studies (GWASs), now as a routine approach to study single-nucleotide polymorphism (SNP)-trait association, have uncovered over ten thousand significant trait/disease associated SNPs (TASs). Here, we updated GWASdb (GWASdb v2, http://jjwanglab.org/gwasdb) which provides comprehensive data curation and knowledge integration for GWAS TASs. These updates include: (i) Up to August 2015, we collected 2479 unique publications from PubMed and other resources; (ii) We further curated moderate SNP-trait associations (P-value < 1.0 × 10(-3)) from each original publication, and generated a total of 252,530 unique TASs in all GWASdb v2 collected studies; (iii) We manually mapped 1610 GWAS traits to 501 Human Phenotype Ontology (HPO) terms, 435 Disease Ontology (DO) terms and 228 Disease Ontology Lite (DOLite) terms. For each ontology term, we also predicted the putative causal genes; (iv) We curated the detailed sub-populations and related sample size for each study; (v) Importantly, we performed extensive function annotation for each TAS by incorporating gene-based information, ENCODE ChIP-seq assays, eQTL, population haplotype, functional prediction across multiple biological domains, evolutionary signals and disease-related annotation; (vi) Additionally, we compiled a SNP-drug response association dataset for 650 pharmacogenetic studies involving 257 drugs in this update; (vii) Last, we improved the user interface of website.",2015-11-28 +25475113,CBMAR: a comprehensive β-lactamase molecular annotation resource.,"β-Lactam antibiotics are among the most widely used antibiotics against microbial pathogens. However, enzymatic hydrolysis of these antibiotics by bacterial β-lactamases is increasingly compromising their efficiency. Although new generation β-lactam antibiotics have been developed to combat antibiotic resistance, β-lactamases have also evolved along with the new variants of the substrate. A strong selection pressure from the newer generation of β-lactam antibiotics has resulted in evolution of different families within each class of β-lactamase. To facilitate detailed characterization of different families of β-lactamases, we have created a database, CBMAR, which facilitates comprehensive molecular annotation and discovery of novel β-lactamases. As against the limited scope of other existing similar databases, CBMAR provides information useful for molecular and biochemical characterization of each family of β-lactamase. The basic architecture of CBMAR is based on Ambler classification, which divides β-lactamases as serine (Classes A, C and D) and metallo-β-lactamases (Class B). Each class is further divided into several families on the basis of their hydrolytic character. In CBMAR, each family is annotated with (i) sequence variability, (ii) antibiotic resistance profile, (iii) inhibitor susceptibility, (iv) active site, (v) family fingerprints, (vi) mutational profile, (vii) variants, (viii) gene location, (ix) phylogenetic tree and several other features. Each entry also has external links to the relevant protein/nucleotide sequence and structure databases. The database also supports sequence similarity searches using BLAST and assigns a new β-lactamase protein to its respective family on the basis of family-specific fingerprint. Database URL: http://14.139.227.92/mkumar/lactamasedb",2014-12-03 +22583488,3DMolNavi: a web-based retrieval and navigation tool for flexible molecular shape comparison.,"

Background

Many molecules of interest are flexible and undergo significant shape deformation as part of their function, but most existing methods of molecular shape comparison treat them as rigid shapes, which may lead to incorrect measure of the shape similarity of flexible molecules. Currently, there still is a limited effort in retrieval and navigation for flexible molecular shape comparison, which would improve data retrieval by helping users locate the desirable molecule in a convenient way.

Results

To address this issue, we develop a web-based retrieval and navigation tool, named 3DMolNavi, for flexible molecular shape comparison. This tool is based on the histogram of Inner Distance Shape Signature (IDSS) for fast retrieving molecules that are similar to a query molecule, and uses dimensionality reduction to navigate the retrieved results in 2D and 3D spaces. We tested 3DMolNavi in the Database of Macromolecular Movements (MolMovDB) and CATH. Compared to other shape descriptors, it achieves good performance and retrieval results for different classes of flexible molecules.

Conclusions

The advantages of 3DMolNavi, over other existing softwares, are to integrate retrieval for flexible molecular shape comparison and enhance navigation for user's interaction. 3DMolNavi can be accessed via https://engineering.purdue.edu/PRECISE/3dmolnavi/index.html.",2012-05-14 +24180558,DCE@urLAB: a dynamic contrast-enhanced MRI pharmacokinetic analysis tool for preclinical data.,"

Background

DCE@urLAB is a software application for analysis of dynamic contrast-enhanced magnetic resonance imaging data (DCE-MRI). The tool incorporates a friendly graphical user interface (GUI) to interactively select and analyze a region of interest (ROI) within the image set, taking into account the tissue concentration of the contrast agent (CA) and its effect on pixel intensity.

Results

Pixel-wise model-based quantitative parameters are estimated by fitting DCE-MRI data to several pharmacokinetic models using the Levenberg-Marquardt algorithm (LMA). DCE@urLAB also includes the semi-quantitative parametric and heuristic analysis approaches commonly used in practice. This software application has been programmed in the Interactive Data Language (IDL) and tested both with publicly available simulated data and preclinical studies from tumor-bearing mouse brains.

Conclusions

A user-friendly solution for applying pharmacokinetic and non-quantitative analysis DCE-MRI in preclinical studies has been implemented and tested. The proposed tool has been specially designed for easy selection of multi-pixel ROIs. A public release of DCE@urLAB, together with the open source code and sample datasets, is available at http://www.die.upm.es/im/archives/DCEurLAB/.",2013-11-04 +24174545,HapFABIA: identification of very short segments of identity by descent characterized by rare variants in large sequencing data.,"Identity by descent (IBD) can be reliably detected for long shared DNA segments, which are found in related individuals. However, many studies contain cohorts of unrelated individuals that share only short IBD segments. New sequencing technologies facilitate identification of short IBD segments through rare variants, which convey more information on IBD than common variants. Current IBD detection methods, however, are not designed to use rare variants for the detection of short IBD segments. Short IBD segments reveal genetic structures at high resolution. Therefore, they can help to improve imputation and phasing, to increase genotyping accuracy for low-coverage sequencing and to increase the power of association studies. Since short IBD segments are further assumed to be old, they can shed light on the evolutionary history of humans. We propose HapFABIA, a computational method that applies biclustering to identify very short IBD segments characterized by rare variants. HapFABIA is designed to detect short IBD segments in genotype data that were obtained from next-generation sequencing, but can also be applied to DNA microarray data. Especially in next-generation sequencing data, HapFABIA exploits rare variants for IBD detection. HapFABIA significantly outperformed competing algorithms at detecting short IBD segments on artificial and simulated data with rare variants. HapFABIA identified 160 588 different short IBD segments characterized by rare variants with a median length of 23 kb (mean 24 kb) in data for chromosome 1 of the 1000 Genomes Project. These short IBD segments contain 752 000 single nucleotide variants (SNVs), which account for 39% of the rare variants and 23.5% of all variants. The vast majority-152 000 IBD segments-are shared by Africans, while only 19 000 and 11 000 are shared by Europeans and Asians, respectively. IBD segments that match the Denisova or the Neandertal genome are found significantly more often in Asians and Europeans but also, in some cases exclusively, in Africans. The lengths of IBD segments and their sharing between continental populations indicate that many short IBD segments from chromosome 1 existed before humans migrated out of Africa. Thus, rare variants that tag these short IBD segments predate human migration from Africa. The software package HapFABIA is available from Bioconductor. All data sets, result files and programs for data simulation, preprocessing and evaluation are supplied at http://www.bioinf.jku.at/research/short-IBD.",2013-10-29 +24907201,SeaBase: a multispecies transcriptomic resource and platform for gene network inference.,"Marine and aquatic animals are extraordinarily useful as models for identifying mechanisms of development and evolution, regeneration, resistance to cancer, longevity and symbiosis, among many other areas of research. This is due to the great diversity of these organisms and their wide-ranging capabilities. Genomics tools are essential for taking advantage of these ""free lessons"" of nature. However, genomics and transcriptomics are challenging in emerging model systems. Here, we present SeaBase, a tool for helping to meet these needs. Specifically, SeaBase provides a platform for sharing and searching transcriptome data. More importantly, SeaBase will support a growing number of tools for inferring gene network mechanisms. The first dataset available on SeaBase is a developmental transcriptomic profile of the sea anemone Nematostella vectensis (Anthozoa, Cnidaria). Additional datasets are currently being prepared and we are aiming to expand SeaBase to include user-supplied data for any number of marine and aquatic organisms, thereby supporting many potentially new models for gene network studies. SeaBase can be accessed online at: http://seabase.core.cli.mbl.edu.",2014-06-06 +24886250,KRLMM: an adaptive genotype calling method for common and low frequency variants.,"

Background

SNP genotyping microarrays have revolutionized the study of complex disease. The current range of commercially available genotyping products contain extensive catalogues of low frequency and rare variants. Existing SNP calling algorithms have difficulty dealing with these low frequency variants, as the underlying models rely on each genotype having a reasonable number of observations to ensure accurate clustering.

Results

Here we develop KRLMM, a new method for converting raw intensities into genotype calls that aims to overcome this issue. Our method is unique in that it applies careful between sample normalization and allows a variable number of clusters k (1, 2 or 3) for each SNP, where k is predicted using the available data. We compare our method to four genotyping algorithms (GenCall, GenoSNP, Illuminus and OptiCall) on several Illumina data sets that include samples from the HapMap project where the true genotypes are known in advance. All methods were found to have high overall accuracy (> 98%), with KRLMM consistently amongst the best. At low minor allele frequency, the KRLMM, OptiCall and GenoSNP algorithms were observed to be consistently more accurate than GenCall and Illuminus on our test data.

Conclusions

Methods that tailor their approach to calling low frequency variants by either varying the number of clusters (KRLMM) or using information from other SNPs (OptiCall and GenoSNP) offer improved accuracy over methods that do not (GenCall and Illuminus). The KRLMM algorithm is implemented in the open-source crlmm package distributed via the Bioconductor project (http://www.bioconductor.org).",2014-05-23 +25693938,The relationship between P16 gene promoter methylation and gastric cancer: a meta-analysis based on Chinese patients.,"

Objective

To evaluate the P16 gene promoter methylation rate in gastric cancer tissue and healthy controls. And further assess the clinical value of P16 gene promoter methylation as a biomarker for gastric cancer diagnosis.

Materials and methods

Four databases, Medline, VIP, CNKI, WANFANG were searched to find the diagnostic trials about P16 gene promoter methylation in gastric cancer and healthy control. The pooled sensitivity, specificity, positive likelihood ratio (+LR), negative likelihood ratio (-LR) and the receiver operating characteristic curve (ROC) were calculated by Meta-DiSc1.4 (http://www.hrc.es/investigacion/metadisc.html) software.

Results

Nine studies involving 487 gastric cancer patients and 271 healthy controls were included in this meta-analysis. The median methylation rate for gastric cancer group was 43.3% with its range of 28.3-64.4%. And the median methylation rate for healthy control group was 0.0% with its range of 0.0-13.3%. The methylation rate in gastric cancer was statistical higher than in the healthy control (P < 0.05). The pooled sensitivity, specificity, +LR, -LR and the area under the ROC curve were 0.44 (95% confidence interval [CI]: 0.40-0.49), 0.97 (95% CI: 0.95-0.99), 13.11 (95% CI: 4.02-42.63), 0.58 (95% CI: 0.49-0.70), 23.62 (95% CI: 6.90-80.90) and 0.44, respectively.

Conclusion

Our meta-analysis indicates that P16 gene promoter methylation array is a useful method for diagnosis of gastric cancer with relatively low sensitivity and very high specificity.",2014-12-01 +25468930,The Halophile protein database.,"Halophilic archaea/bacteria adapt to different salt concentration, namely extreme, moderate and low. These type of adaptations may occur as a result of modification of protein structure and other changes in different cell organelles. Thus proteins may play an important role in the adaptation of halophilic archaea/bacteria to saline conditions. The Halophile protein database (HProtDB) is a systematic attempt to document the biochemical and biophysical properties of proteins from halophilic archaea/bacteria which may be involved in adaptation of these organisms to saline conditions. In this database, various physicochemical properties such as molecular weight, theoretical pI, amino acid composition, atomic composition, estimated half-life, instability index, aliphatic index and grand average of hydropathicity (Gravy) have been listed. These physicochemical properties play an important role in identifying the protein structure, bonding pattern and function of the specific proteins. This database is comprehensive, manually curated, non-redundant catalogue of proteins. The database currently contains 59 897 proteins properties extracted from 21 different strains of halophilic archaea/bacteria. The database can be accessed through link. Database URL: http://webapp.cabgrid.res.in/protein/",2014-12-01 +25446693,[Drug-induced interstitial lung diseases: often forgotten].,"Drug-induced interstitial lung diseases (DILD) are probably more common than diagnosed. Due to their potential reversibility, increased vigilance towards DILD is appropriate also from the radiologist's point of view, particularly as these diseases regularly exhibit radiological correlates in high-resolution computed tomography (HRCT) of the lungs.Based on personal experience typical relatively common manifestations of DILD are diffuse alveolar damage (DAD), eosinophilic pneumonia (EP), hypersensitivity pneumonitis (HP), organizing pneumonia (OP), non-specific interstitial pneumonia (NSIP) and usual interstitial pneumonia (UIP). These patterns are presented based on case studies, whereby emphasis is placed on the clinical context. This is to highlight the relevance of interdisciplinary communication and discussion in the diagnostic field of DILD as it is a diagnosis of exclusion or of probability in most cases.Helpful differential diagnostic indications for the presence of DILD, such as an accompanying eosinophilia or increased attenuation of pulmonary consolidations in amiodarone-induced pneumopathy are mentioned and the freely available online database http://www.pneumotox.com is presented.",2014-12-01 +22126435,"Production of a reference transcriptome and transcriptomic database (PocilloporaBase) for the cauliflower coral, Pocillopora damicornis.","

Background

Motivated by the precarious state of the world's coral reefs, there is currently a keen interest in coral transcriptomics. By identifying changes in coral gene expression that are triggered by particular environmental stressors, we can begin to characterize coral stress responses at the molecular level, which should lead to the development of more powerful diagnostic tools for evaluating the health of corals in the field. Furthermore, the identification of genetic variants that are more or less resilient in the face of particular stressors will help us to develop more reliable prognoses for particular coral populations. Toward this end, we performed deep mRNA sequencing of the cauliflower coral, Pocillopora damicornis, a geographically widespread Indo-Pacific species that exhibits a great diversity of colony forms and is able to thrive in habitats subject to a wide range of human impacts. Importantly, P. damicornis is particularly amenable to laboratory culture. We collected specimens from three geographically isolated Hawaiian populations subjected to qualitatively different levels of human impact. We isolated RNA from colony fragments (""nubbins"") exposed to four environmental stressors (heat, desiccation, peroxide, and hypo-saline conditions) or control conditions. The RNA was pooled and sequenced using the 454 platform.

Description

Both the raw reads (n=1, 116, 551) and the assembled contigs (n=70, 786; mean length=836 nucleotides) were deposited in a new publicly available relational database called PocilloporaBase http://www.PocilloporaBase.org. Using BLASTX, 47.2% of the contigs were found to match a sequence in the NCBI database at an E-value threshold of ≤.001; 93.6% of those contigs with matches in the NCBI database appear to be of metazoan origin and 2.3% bacterial origin, while most of the remaining 4.1% match to other eukaryotes, including algae and amoebae.

Conclusions

P. damicornis now joins the handful of coral species for which extensive transcriptomic data are publicly available. Through PocilloporaBase http://www.PocilloporaBase.org, one can obtain assembled contigs and raw reads and query the data according to a wide assortment of attributes including taxonomic origin, PFAM motif, KEGG pathway, and GO annotation.",2011-11-29 +24334396,Protein function prediction using multilabel ensemble classification.,"High-throughput experimental techniques produce several kinds of heterogeneous proteomic and genomic data sets. To computationally annotate proteins, it is necessary and promising to integrate these heterogeneous data sources. Some methods transform these data sources into different kernels or feature representations. Next, these kernels are linearly (or nonlinearly) combined into a composite kernel. The composite kernel is utilized to develop a predictive model to infer the function of proteins. A protein can have multiple roles and functions (or labels). Therefore, multilabel learning methods are also adapted for protein function prediction. We develop a transductive multilabel classifier (TMC) to predict multiple functions of proteins using several unlabeled proteins. We also propose a method called transductive multilabel ensemble classifier (TMEC) for integrating the different data sources using an ensemble approach. The TMEC trains a graph-based multilabel classifier on each single data source, and then combines the predictions of the individual classifiers. We use a directed birelational graph to capture the relationships between pairs of proteins, between pairs of functions, and between proteins and functions. We evaluate the effectiveness of the TMC and TMEC to predict the functions of proteins on three benchmarks. We show that our approaches perform better than recently proposed protein function prediction methods on composite and multiple kernels. The code, data sets used in this paper and supplemental material are available at https://sites.google.com/site/guoxian85/tmec.",2013-07-01 +26494522,Eating behavior associated with gray matter volume alternations: A voxel based morphometry study.,"

Unlabelled

Little is known about whether eating behavior is associated with alterations of brain structure or whether the possible alterations are related to body weight status. The current study employed structural imaging from an open MRI data set (http://fcon_1000.

Projects

nitrc.org/indi/pro/nki.html) to examine the relationship between eating behavior traits and brain structural changes. The eating behavior traits were measured by the Three Factor Eating Questionnaire Scale. The brain structural alterations were analyzed using the Voxel Based Morphometry (VBM) method, and a multiple linear regression model was constructed to identify significant brain structural changes that related to eating behavior factors. We found that cognitive restraint of eating was positively correlated with the gray matter volume (GMV) in the dorsolateral prefrontal cortex (DLPFC) and negatively correlated with the GMV in the putamen; disinhibition scores were negatively associated with the GMV in the left middle frontal gyrus; hunger scores showed a positive correlation with the GMV in the hypothalamus and the visual memory areas and a negative association with the GMV in the inferior temporal gyrus and the bilateral middle frontal gyrus. These results indicated a close connection between the eating behavior traits and structural changes in particular brain regions. Conjunction analysis was also performed to further explore the brain structural alterations that were commonly associated with eating behavior and weight status. The findings add to our understanding of the neural basis underlying eating behaviors, and the connection between these behaviors and body weight status.",2015-10-19 +25600152,"Churchill: an ultra-fast, deterministic, highly scalable and balanced parallelization strategy for the discovery of human genetic variation in clinical and population-scale genomics.","While advances in genome sequencing technology make population-scale genomics a possibility, current approaches for analysis of these data rely upon parallelization strategies that have limited scalability, complex implementation and lack reproducibility. Churchill, a balanced regional parallelization strategy, overcomes these challenges, fully automating the multiple steps required to go from raw sequencing reads to variant discovery. Through implementation of novel deterministic parallelization techniques, Churchill allows computationally efficient analysis of a high-depth whole genome sample in less than two hours. The method is highly scalable, enabling full analysis of the 1000 Genomes raw sequence dataset in a week using cloud resources. http://churchill.nchri.org/.",2015-01-20 +27012178,MEGAHIT v1.0: A fast and scalable metagenome assembler driven by advanced methodologies and community practices.,"The study of metagenomics has been much benefited from low-cost and high-throughput sequencing technologies, yet the tremendous amount of data generated make analysis like de novo assembly to consume too much computational resources. In late 2014 we released MEGAHIT v0.1 (together with a brief note of Li et al. (2015) [1]), which is the first NGS metagenome assembler that can assemble genome sequences from metagenomic datasets of hundreds of Giga base-pairs (bp) in a time- and memory-efficient manner on a single server. The core of MEGAHIT is an efficient parallel algorithm for constructing succinct de Bruijn Graphs (SdBG), implemented on a graphical processing unit (GPU). The software has been well received by the assembly community, and there is interest in how to adapt the algorithms to integrate popular assembly practices so as to improve the assembly quality, as well as how to speed up the software using better CPU-based algorithms (instead of GPU). In this paper we first describe the details of the core algorithms in MEGAHIT v0.1, and then we show the new modules to upgrade MEGAHIT to version v1.0, which gives better assembly quality, runs faster and uses less memory. For the Iowa Prairie Soil dataset (252Gbp after quality trimming), the assembly quality of MEGAHIT v1.0, when compared with v0.1, has a significant improvement, namely, 36% increase in assembly size and 23% in N50. More interestingly, MEGAHIT v1.0 is no slower than before (even running with the extra modules). This is primarily due to a new CPU-based algorithm for SdBG construction that is faster and requires less memory. Using CPU only, MEGAHIT v1.0 can assemble the Iowa Prairie Soil sample in about 43h, reducing the running time of v0.1 by at least 25% and memory usage by up to 50%. MEGAHIT v1.0, exhibiting a smaller memory footprint, can process even larger datasets. The Kansas Prairie Soil sample (484Gbp), the largest publicly available dataset, can now be assembled using no more than 500GB of memory in 7.5days. The assemblies of these datasets (and other large metgenomic datasets), as well as the software, are available at the website https://hku-bal.github.io/megabox.",2016-03-21 +21441347,miTALOS: analyzing the tissue-specific regulation of signaling pathways by human and mouse microRNAs.,"MicroRNAs (miRNAs) are an important class of post-transcriptional regulators of gene expression that are involved in various cellular and phenotypic processes. A number of studies have shown that miRNA expression is induced by signaling pathways. Moreover, miRNAs emerge as regulators of signaling pathways. Here, we present the miTALOS web resource, which provides insight into miRNA-mediated regulation of signaling pathways. As a novel feature, miTALOS considers the tissue-specific expression signatures of miRNAs and target transcripts to improve the analysis of miRNA regulation in biological pathways. MiTALOS identifies potential pathway regulation by (i) an enrichment analysis of miRNA targets genes and (ii) by using a proximity score to evaluate the functional role of miRNAs in biological pathways by their network proximity. Moreover, miTALOS integrates five different miRNA target prediction tools and two different signaling pathway resources (KEGG and NCI). A graphical visualization of miRNA targets in both KEGG and NCI PID signaling pathways is provided to illustrate their respective pathway context. We perform a functional analysis on prostate cancer-related miRNAs and are able to infer a model of miRNA-mediated regulation on tumor proliferation, mobility and anti-apoptotic behavior. miTALOS provides novel features that accomplish a substantial support to systematically infer regulation of signaling pathways mediated by miRNAs. The web-server is freely accessible at http://hmgu.de/cmb/mitalos.",2011-03-25 +25432889,AlzBase: an Integrative Database for Gene Dysregulation in Alzheimer's Disease.,"Alzheimer's disease (AD) affects a significant portion of elderly people worldwide. Although the amyloid-β (Aβ) cascade hypothesis has been the prevailing theory for the molecular mechanism of AD in the past few decades, treatment strategies targeting the Aβ cascade have not demonstrated effectiveness as yet. Thus, elucidating the spatial and temporal evolution of the molecular pathways in AD remains to be a daunting task. To facilitate novel discoveries in this filed, here, we have integrated information from multiple sources for the better understanding of gene functions in AD pathogenesis. Several categories of information have been collected, including (1) gene dysregulation in AD and closely related processes/diseases such as aging and neurological disorders, (2) correlation of gene dysregulation with AD severity, (3) a wealth of annotations on the functional and regulatory information, and (4) network connections for gene-gene relationship. In addition, we have also provided a comprehensive summary for the top ranked genes in AlzBase. By evaluating the information curated in AlzBase, researchers can prioritize genes from their own research and generate novel hypothesis regarding the molecular mechanism of AD. To demonstrate the utility of AlzBase, we examined the genes from the genetic studies of AD. It revealed links between the upstream genetic variations and downstream endo-phenotype and suggested several genes with higher priority. This integrative database is freely available on the web at http://alz.big.ac.cn/alzBase .",2014-11-29 +22568790,Augmented annotation and orthologue analysis for Oryctolagus cuniculus: Better Bunny.,"

Background

The rabbit is an important model organism used in a wide range of biomedical research. However, the rabbit genome is still sparsely annotated, thus prohibiting extensive functional analysis of gene sets derived from whole-genome experiments. We developed a web-based application that provides augmented annotation and orthologue analysis for rabbit genes. Importantly, the application allows comprehensive functional analysis through the use of orthologous relationships.

Results

Using data extracted from several public bioinformatics repositories we created Better Bunny, a database and query tool that extensively augments the available functional annotation for rabbit genes. Using the complete set of target genes from a commercial rabbit gene expression microarray as our benchmark, we are able to obtain functional information for 88 % of the genes on the microarray. Previously, functional information was available for fewer than 10 % of the rabbit genes.

Conclusions

We have developed a freely available, web-accessible bioinformatics tool that enables investigators to quickly and easily perform extensive functional analysis of rabbit genes (http://cptweb.cpt.wayne.edu). The software application fills a critical void for a wide range of biomedical research that relies on the rabbit model and requires characterization of biological function for large sets of genes.",2012-05-08 +24061930,Build your own social network laboratory with Social Lab: a tool for research in social media.,"Social networking has surpassed e-mail and instant messaging as the dominant form of online communication (Meeker, Devitt, & Wu, 2010). Currently, all large social networks are proprietary, making it difficult to impossible for researchers to make changes to such networks for the purpose of study design and access to user-generated data from the networks. To address this issue, the authors have developed and present Social Lab, an Internet-based free and open-source social network software system available from http://www.sociallab.es . Having full availability of navigation and communication data in Social Lab allows researchers to investigate behavior in social media on an individual and group level. Automated artificial users (""bots"") are available to the researcher to simulate and stimulate social networking situations. These bots respond dynamically to situations as they unfold. The bots can easily be configured with scripts and can be used to experimentally manipulate social networking situations in Social Lab. Examples for setting up, configuring, and using Social Lab as a tool for research in social media are provided.",2014-06-01 +24314207,The neurological disease ontology.,"

Background

We are developing the Neurological Disease Ontology (ND) to provide a framework to enable representation of aspects of neurological diseases that are relevant to their treatment and study. ND is a representational tool that addresses the need for unambiguous annotation, storage, and retrieval of data associated with the treatment and study of neurological diseases. ND is being developed in compliance with the Open Biomedical Ontology Foundry principles and builds upon the paradigm established by the Ontology for General Medical Science (OGMS) for the representation of entities in the domain of disease and medical practice. Initial applications of ND will include the annotation and analysis of large data sets and patient records for Alzheimer's disease, multiple sclerosis, and stroke.

Description

ND is implemented in OWL 2 and currently has more than 450 terms that refer to and describe various aspects of neurological diseases. ND directly imports the development version of OGMS, which uses BFO 2. Term development in ND has primarily extended the OGMS terms 'disease', 'diagnosis', 'disease course', and 'disorder'. We have imported and utilize over 700 classes from related ontology efforts including the Foundational Model of Anatomy, Ontology for Biomedical Investigations, and Protein Ontology. ND terms are annotated with ontology metadata such as a label (term name), term editors, textual definition, definition source, curation status, and alternative terms (synonyms). Many terms have logical definitions in addition to these annotations. Current development has focused on the establishment of the upper-level structure of the ND hierarchy, as well as on the representation of Alzheimer's disease, multiple sclerosis, and stroke. The ontology is available as a version-controlled file at http://code.google.com/p/neurological-disease-ontology along with a discussion list and an issue tracker.

Conclusion

ND seeks to provide a formal foundation for the representation of clinical and research data pertaining to neurological diseases. ND will enable its users to connect data in a robust way with related data that is annotated using other terminologies and ontologies in the biomedical domain.",2013-12-06 +26142187,PSIKO2: a fast and versatile tool to infer population stratification on various levels in GWAS.,"

Unlabelled

Genome-wide association studies are an invaluable tool for identifying genotypic loci linked with agriculturally important traits or certain diseases. The signal on which such studies rely upon can, however, be obscured by population stratification making it necessary to account for it in some way. Population stratification is dependent on when admixture happened and thus can occur at various levels. To aid in its inference at the genome level, we recently introduced psiko, and comparison with leading methods indicates that it has attractive properties. However, until now, it could not be used for local ancestry inference which is preferable in cases of recent admixture as the genome level tends to be too coarse to properly account for processes acting on small segments of a genome. To also bring the powerful ideas underpinning psiko to bear in such studies, we extended it to psiko2, which we introduce here.

Availability and implementation

Source code, binaries and user manual are freely available at https://www.uea.ac.uk/computing/psiko.

Contact

Andrei-Alin.Popescu@uea.ac.uk or Katharina.Huber@cmp.uea.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-02 +23981227,NPEBseq: nonparametric empirical bayesian-based procedure for differential expression analysis of RNA-seq data.,"

Background

RNA-seq, a massive parallel-sequencing-based transcriptome profiling method, provides digital data in the form of aligned sequence read counts. The comparative analyses of the data require appropriate statistical methods to estimate the differential expression of transcript variants across different cell/tissue types and disease conditions.

Results

We developed a novel nonparametric empirical Bayesian-based approach (NPEBseq) to model the RNA-seq data. The prior distribution of the Bayesian model is empirically estimated from the data without any parametric assumption, and hence the method is ""nonparametric"" in nature. Based on this model, we proposed a method for detecting differentially expressed genes across different conditions. We also extended this method to detect differential usage of exons from RNA-seq data. The evaluation of NPEBseq on both simulated and publicly available RNA-seq datasets and comparison with three popular methods showed improved results for experiments with or without biological replicates.

Conclusions

NPEBseq can successfully detect differential expression between different conditions not only at gene level but also at exon level from RNA-seq datasets. In addition, NPEBSeq performs significantly better than current methods and can be applied to genome-wide RNA-seq datasets. Sample datasets and R package are available at http://bioinformatics.wistar.upenn.edu/NPEBseq.",2013-08-27 +24564496,RMaNI: Regulatory Module Network Inference framework.,"

Background

Cell survival and development are orchestrated by complex interlocking programs of gene activation and repression. Understanding how this gene regulatory network (GRN) functions in normal states, and is altered in cancers subtypes, offers fundamental insight into oncogenesis and disease progression, and holds great promise for guiding clinical decisions. Inferring a GRN from empirical microarray gene expression data is a challenging task in cancer systems biology. In recent years, module-based approaches for GRN inference have been proposed to address this challenge. Despite the demonstrated success of module-based approaches in uncovering biologically meaningful regulatory interactions, their application remains limited a single condition, without supporting the comparison of multiple disease subtypes/conditions. Also, their use remains unnecessarily restricted to computational biologists, as accurate inference of modules and their regulators requires integration of diverse tools and heterogeneous data sources, which in turn requires scripting skills, data infrastructure and powerful computational facilities. New analytical frameworks are required to make module-based GRN inference approach more generally useful to the research community.

Results

We present the RMaNI (Regulatory Module Network Inference) framework, which supports cancer subtype-specific or condition specific GRN inference and differential network analysis. It combines both transcriptomic as well as genomic data sources, and integrates heterogeneous knowledge resources and a set of complementary bioinformatic methods for automated inference of modules, their condition specific regulators and facilitates downstream network analyses and data visualization. To demonstrate its utility, we applied RMaNI to a hepatocellular microarray data containing normal and three disease conditions. We demonstrate that how RMaNI can be employed to understand the genetic architecture underlying three disease conditions. RMaNI is freely available at http://inspect.braembl.org.au/bi/inspect/rmani

Conclusion

RMaNI makes available a workflow with comprehensive set of tools that would otherwise be challenging for non-expert users to install and apply. The framework presented in this paper is flexible and can be easily extended to analyse any dataset with multiple disease conditions.",2013-10-22 +25512689,PubstractHelper: A Web-based Text-Mining Tool for Marking Sentences in Abstracts from PubMed Using Multiple User-Defined Keywords.,"

Unlabelled

While a huge amount of information about biological literature can be obtained by searching the PubMed database, reading through all the titles and abstracts resulting from such a search for useful information is inefficient. Text mining makes it possible to increase this efficiency. Some websites use text mining to gather information from the PubMed database; however, they are database-oriented, using pre-defined search keywords while lacking a query interface for user-defined search inputs. We present the PubMed Abstract Reading Helper (PubstractHelper) website which combines text mining and reading assistance for an efficient PubMed search. PubstractHelper can accept a maximum of ten groups of keywords, within each group containing up to ten keywords. The principle behind the text-mining function of PubstractHelper is that keywords contained in the same sentence are likely to be related. PubstractHelper highlights sentences with co-occurring keywords in different colors. The user can download the PMID and the abstracts with color markings to be reviewed later. The PubstractHelper website can help users to identify relevant publications based on the presence of related keywords, which should be a handy tool for their research.

Availability

http://bio.yungyun.com.tw/ATM/PubstractHelper.aspx and http://holab.med.ncku.edu.tw/ATM/PubstractHelper.aspx.",2014-11-27 +27152464,Occupational Exposure to Endocrine-Disrupting Chemicals and Birth Weight and Length of Gestation: A European Meta-Analysis.,"

Background

Women of reproductive age can be exposed to endocrine-disrupting chemicals (EDCs) at work, and exposure to EDCs in pregnancy may affect fetal growth.

Objectives

We assessed whether maternal occupational exposure to EDCs during pregnancy as classified by application of a job exposure matrix was associated with birth weight, term low birth weight (LBW), length of gestation, and preterm delivery.

Methods

Using individual participant data from 133,957 mother-child pairs in 13 European cohorts spanning births from 1994 through 2011, we linked maternal job titles with exposure to 10 EDC groups as assessed through a job exposure matrix. For each group, we combined the two levels of exposure categories (possible and probable) and compared birth outcomes with the unexposed group (exposure unlikely). We performed meta-analyses of cohort-specific estimates.

Results

Eleven percent of pregnant women were classified as exposed to EDCs at work during pregnancy, based on job title. Classification of exposure to one or more EDC group was associated with an increased risk of term LBW [odds ratio (OR) = 1.25; 95% CI: 1.04, 1.49], as were most specific EDC groups; this association was consistent across cohorts. Further, the risk increased with increasing number of EDC groups (OR = 2.11; 95% CI: 1.10, 4.06 for exposure to four or more EDC groups). There were few associations (p < 0.05) with the other outcomes; women holding job titles classified as exposed to bisphenol A or brominated flame retardants were at higher risk for longer length of gestation.

Conclusion

Results from our large population-based birth cohort design indicate that employment during pregnancy in occupations classified as possibly or probably exposed to EDCs was associated with an increased risk of term LBW. Citation: Birks L, Casas M, Garcia AM, Alexander J, Barros H, Bergström A, Bonde JP, Burdorf A, Costet N, Danileviciute A, Eggesbø M, Fernández MF, González-Galarzo MC, Gražulevičienė R, Hanke W, Jaddoe V, Kogevinas M, Kull I, Lertxundi A, Melaki V, Andersen AM, Olea N, Polanska K, Rusconi F, Santa-Marina L, Santos AC, Vrijkotte T, Zugna D, Nieuwenhuijsen M, Cordier S, Vrijheid M. 2016. Occupational exposure to endocrine-disrupting chemicals and birth weight and length of gestation: a European meta-analysis. Environ Health Perspect 124:1785-1793; http://dx.doi.org/10.1289/EHP208.",2016-05-06 +25429972,"InParanoid 8: orthology analysis between 273 proteomes, mostly eukaryotic.","The InParanoid database (http://InParanoid.sbc.su.se) provides a user interface to orthologs inferred by the InParanoid algorithm. As there are now international efforts to curate and standardize complete proteomes, we have switched to using these resources rather than gathering and curating the proteomes ourselves. InParanoid release 8 is based on the 66 reference proteomes that the 'Quest for Orthologs' community has agreed on using, plus 207 additional proteomes from the UniProt complete proteomes--in total 273 species. These represent 246 eukaryotes, 20 bacteria and seven archaea. Compared to the previous release, this increases the number of species by 173% and the number of pairwise species comparisons by 650%. In turn, the number of ortholog groups has increased by 423%. We present the contents and usages of InParanoid 8, and a detailed analysis of how the proteome content has changed since the previous release.",2014-11-27 +25429973,SNP-Seek database of SNPs derived from 3000 rice genomes.,"We have identified about 20 million rice SNPs by aligning reads from the 3000 rice genomes project with the Nipponbare genome. The SNPs and allele information are organized into a SNP-Seek system (http://www.oryzasnp.org/iric-portal/), which consists of Oracle database having a total number of rows with SNP genotypes close to 60 billion (20 M SNPs × 3 K rice lines) and web interface for convenient querying. The database allows quick retrieving of SNP alleles for all varieties in a given genome region, finding different alleles from predefined varieties and querying basic passport and morphological phenotypic information about sequenced rice lines. SNPs can be visualized together with the gene structures in JBrowse genome browser. Evolutionary relationships between rice varieties can be explored using phylogenetic trees or multidimensional scaling plots.",2014-11-27 +21685093,Detection and interpretation of metabolite-transcript coresponses using combined profiling data.,"

Motivation

Studying the interplay between gene expression and metabolite levels can yield important information on the physiology of stress responses and adaptation strategies. Performing transcriptomics and metabolomics in parallel during time-series experiments represents a systematic way to gain such information. Several combined profiling datasets have been added to the public domain and they form a valuable resource for hypothesis generating studies. Unfortunately, detecting coresponses between transcript levels and metabolite abundances is non-trivial: they cannot be assumed to overlap directly with underlying biochemical pathways and they may be subject to time delays and obscured by considerable noise.

Results

Our aim was to predict pathway comemberships between metabolites and genes based on their coresponses to applied stress. We found that in the presence of strong noise and time-shifted responses, a hidden Markov model-based similarity outperforms the simpler Pearson correlation but performs comparably or worse in their absence. Therefore, we propose a supervised method that applies pathway information to summarize similarity statistics to a consensus statistic that is more informative than any of the single measures. Using four combined profiling datasets, we show that comembership between metabolites and genes can be predicted for numerous KEGG pathways; this opens opportunities for the detection of transcriptionally regulated pathways and novel metabolically related genes.

Availability

A command-line software tool is available at http://www.cin.ufpe.br/~igcf/Metabolites.

Contact

henning@psc.riken.jp; igcf@cin.ufpe.br",2011-07-01 +26217723,Chemical gas sensor array dataset.,"To address drift in chemical sensing, an extensive dataset was collected over a period of three years. An array of 16 metal-oxide gas sensors was exposed to six different volatile organic compounds at different concentration levels under tightly-controlled operating conditions. Moreover, the generated dataset is suitable to tackle a variety of challenges in chemical sensing such as sensor drift, sensor failure or system calibration. The data is related to ""Chemical gas sensor drift compensation using classifier ensembles"", by Vergara et al. [1], and ""On the calibration of sensor arrays for pattern recognition using the minimal number of experiments"", by Rodriguez-Lujan et al. [2] The dataset can be accessed publicly at the UCI repository upon citation of: http://archive.ics.uci.edu/ml/datasets/Gas+Sensor+Array+Drift+Dataset+at+Different+Concentrations.",2015-02-16 +25428365,Expanded microbial genome coverage and improved protein family annotation in the COG database.,"Microbial genome sequencing projects produce numerous sequences of deduced proteins, only a small fraction of which have been or will ever be studied experimentally. This leaves sequence analysis as the only feasible way to annotate these proteins and assign to them tentative functions. The Clusters of Orthologous Groups of proteins (COGs) database (http://www.ncbi.nlm.nih.gov/COG/), first created in 1997, has been a popular tool for functional annotation. Its success was largely based on (i) its reliance on complete microbial genomes, which allowed reliable assignment of orthologs and paralogs for most genes; (ii) orthology-based approach, which used the function(s) of the characterized member(s) of the protein family (COG) to assign function(s) to the entire set of carefully identified orthologs and describe the range of potential functions when there were more than one; and (iii) careful manual curation of the annotation of the COGs, aimed at detailed prediction of the biological function(s) for each COG while avoiding annotation errors and overprediction. Here we present an update of the COGs, the first since 2003, and a comprehensive revision of the COG annotations and expansion of the genome coverage to include representative complete genomes from all bacterial and archaeal lineages down to the genus level. This re-analysis of the COGs shows that the original COG assignments had an error rate below 0.5% and allows an assessment of the progress in functional genomics in the past 12 years. During this time, functions of many previously uncharacterized COGs have been elucidated and tentative functional assignments of many COGs have been validated, either by targeted experiments or through the use of high-throughput methods. A particularly important development is the assignment of functions to several widespread, conserved proteins many of which turned out to participate in translation, in particular rRNA maturation and tRNA modification. The new version of the COGs is expected to become an important tool for microbial genomics.",2014-11-26 +25428361,GRASP v2.0: an update on the Genome-Wide Repository of Associations between SNPs and phenotypes.,"Here, we present an update on the Genome-Wide Repository of Associations between SNPs and Phenotypes (GRASP) database version 2.0 (http://apps.nhlbi.nih.gov/Grasp/Overview.aspx). GRASP is a centralized repository of publically available genome-wide association study (GWAS) results. GRASP v2.0 contains ∼ 8.87 million SNP associations reported in 2082 studies, an increase of ∼ 2.59 million SNP associations (41.4% increase) and 693 studies (48.9% increase) from our previous version. Our goal in developing and maintaining GRASP is to provide a user-friendly means for diverse sets of researchers to query reported SNP associations (P ≤ 0.05) with human traits, including methylation and expression quantitative trait loci (QTL) studies. Therefore, in addition to making the full database available for download, we developed a user-friendly web interface that allows for direct querying of GRASP. We provide details on the use of this web interface and what information may be gleaned from using this interactive option. Additionally, we describe potential uses of GRASP and how the scientific community may benefit from the convenient availability of all SNP association results from GWAS (P ≤ 0.05). We plan to continue updating GRASP with newly published GWAS and increased annotation depth.",2014-11-26 +22434828,Biocurators and biocuration: surveying the 21st century challenges.,"Curated databases are an integral part of the tool set that researchers use on a daily basis for their work. For most users, however, how databases are maintained, and by whom, is rather obscure. The International Society for Biocuration (ISB) represents biocurators, software engineers, developers and researchers with an interest in biocuration. Its goals include fostering communication between biocurators, promoting and describing their work, and highlighting the added value of biocuration to the world. The ISB recently conducted a survey of biocurators to better understand their educational and scientific backgrounds, their motivations for choosing a curatorial job and their career goals. The results are reported here. From the responses received, it is evident that biocuration is performed by highly trained scientists and perceived to be a stimulating career, offering both intellectual challenges and the satisfaction of performing work essential to the modern scientific community. It is also apparent that the ISB has at least a dual role to play to facilitate biocurators' work: (i) to promote biocuration as a career within the greater scientific community; (ii) to aid the development of resources for biomedical research through promotion of nomenclature and data-sharing standards that will allow interconnection of biological databases and better exploit the pivotal contributions that biocurators are making. DATABASE URL: http://biocurator.org.",2012-03-20 +24058397,A consistency-based feature selection method allied with linear SVMs for HIV-1 protease cleavage site prediction.,"

Background

Predicting type-1 Human Immunodeficiency Virus (HIV-1) protease cleavage site in protein molecules and determining its specificity is an important task which has attracted considerable attention in the research community. Achievements in this area are expected to result in effective drug design (especially for HIV-1 protease inhibitors) against this life-threatening virus. However, some drawbacks (like the shortage of the available training data and the high dimensionality of the feature space) turn this task into a difficult classification problem. Thus, various machine learning techniques, and specifically several classification methods have been proposed in order to increase the accuracy of the classification model. In addition, for several classification problems, which are characterized by having few samples and many features, selecting the most relevant features is a major factor for increasing classification accuracy.

Results

We propose for HIV-1 data a consistency-based feature selection approach in conjunction with recursive feature elimination of support vector machines (SVMs). We used various classifiers for evaluating the results obtained from the feature selection process. We further demonstrated the effectiveness of our proposed method by comparing it with a state-of-the-art feature selection method applied on HIV-1 data, and we evaluated the reported results based on attributes which have been selected from different combinations.

Conclusion

Applying feature selection on training data before realizing the classification task seems to be a reasonable data-mining process when working with types of data similar to HIV-1. On HIV-1 data, some feature selection or extraction operations in conjunction with different classifiers have been tested and noteworthy outcomes have been reported. These facts motivate for the work presented in this paper.

Software availability

The software is available at http://ozyer.etu.edu.tr/c-fs-svm.rar. The software can be downloaded at esnag.etu.edu.tr/software/hiv_cleavage_site_prediction.rar; you will find a readme file which explains how to set the software in order to work.",2013-08-23 +23895117,PARma: identification of microRNA target sites in AGO-PAR-CLIP data.,"PARma is a complete data analysis software for AGO-PAR-CLIP experiments to identify target sites of microRNAs as well as the microRNA binding to these sites. It integrates specific characteristics of the experiments into a generative model. The model and a novel pattern discovery tool are iteratively applied to data to estimate seed activity probabilities, cluster confidence scores and to assign the most probable microRNA. Based on differential PAR-CLIP analysis and comparison to RIP-Chip data, we show that PARma is more accurate than existing approaches. PARma is available from http://www.bio.ifi.lmu.de/PARma.",2013-07-29 +22336850,Interventions for drooling in children with cerebral palsy.,"

Background

Drooling is a common problem for children with cerebral palsy (CP). This can be distressing for these children as well as for their parents and caregivers. The consequences of drooling include risk of social rejection, damp and soiled clothing, unpleasant odour, irritated chapped skin, mouth infections, dehydration, interference with speech, damage to books, communication aids, computers, and the risk of social isolation (Blasco 1992; Van der Burg 2006). A range of interventions exist that aim to reduce or eliminate drooling. There is a lack of consensus regarding which interventions are most effective for children with CP.

Objectives

(1) To evaluate the effectiveness and safety of interventions aimed at reducing or eliminating drooling in children with cerebral palsy. (2) To provide the best available evidence to inform clinical practice. (3) To assist with future research planning.

Search methods

We searched the following databases from inception to December 2010 : Cochrane Central Register of Controlled Trials (CENTRAL); Medline via Ovid; EMBASE; CINAHL; ERIC; Psych INFO; Web of Science; Web of Knowledge; AMED; SCOPUS; Dissertation Abstracts.We searched for ongoing clinical trials in the Clinical Trials web site (http://clinicaltrials.gov.) and in the Current Controlled Trials web site (http://www.controlled-trials.com/). We hand searched a range of relevant journals and conference proceeding abstracts.

Selection criteria

Only randomised controlled trials (RCTs) and controlled clinical trials (CCTs) were included.

Data collection and analysis

Data were extracted independently by MW, MS and LP and differences resolved through discussion.

Main results

Six studies were eligible for inclusion in the review. Four of these studies were trials using botulinum toxin-A (BoNT-A) and two were trials on the pharmacological interventions, benztropine and glycopyrrolate. No RCTs or CCTs were retrieved on surgery, physical, oro-motor and oro-sensory therapies, behavioural interventions, intra-oral appliances or acupuncture. In the studies eligible for review, there was considerable heterogeneity within and across interventions and a meta-analysis was not possible. A descriptive summary of each study is provided. All studies showed some statistically significant change for treatment groups up to 1 month post intervention. However, there were methodological flaws associated with all six studies.

Authors' conclusions

It was not possible to reach a conclusion on the effectiveness and safety of either BoNT-A or the pharmaceutical interventions, benztropine and glycopyrrolate. There is insufficient evidence to inform clinical practice on interventions for drooling in children with CP. Directions for future research are provided.",2012-02-15 +25451822,EDCs DataBank: 3D-Structure database of endocrine disrupting chemicals.,"Endocrine disrupting chemicals (EDCs) are a group of compounds that affect the endocrine system, frequently found in everyday products and epidemiologically associated with several diseases. The purpose of this work was to develop EDCs DataBank, the only database of EDCs with three-dimensional structures. This database was built on MySQL using the EU list of potential endocrine disruptors and TEDX list. It contains the three-dimensional structures available on PubChem, as well as a wide variety of information from different databases and text mining tools, useful for almost any kind of research regarding EDCs. The web platform was developed employing HTML, CSS and PHP languages, with dynamic contents in a graphic environment, facilitating information analysis. Currently EDCs DataBank has 615 molecules, including pesticides, natural and industrial products, cosmetics, drugs and food additives, among other low molecular weight xenobiotics. Therefore, this database can be used to study the toxicological effects of these molecules, or to develop pharmaceuticals targeting hormone receptors, through docking studies, high-throughput virtual screening and ligand-protein interaction analysis. EDCs DataBank is totally user-friendly and the 3D-structures of the molecules can be downloaded in several formats. This database is freely available at http://edcs.unicartagena.edu.co.",2014-11-25 +24165882,YeastNet v3: a public database of data-specific and integrated functional gene networks for Saccharomyces cerevisiae.,"Saccharomyces cerevisiae, i.e. baker's yeast, is a widely studied model organism in eukaryote genetics because of its simple protocols for genetic manipulation and phenotype profiling. The high abundance of publicly available data that has been generated through diverse 'omics' approaches has led to the use of yeast for many systems biology studies, including large-scale gene network modeling to better understand the molecular basis of the cellular phenotype. We have previously developed a genome-scale gene network for yeast, YeastNet v2, which has been used for various genetics and systems biology studies. Here, we present an updated version, YeastNet v3 (available at http://www.inetbio.org/yeastnet/), that significantly improves the prediction of gene-phenotype associations. The extended genome in YeastNet v3 covers up to 5818 genes (∼99% of the coding genome) wired by 362 512 functional links. YeastNet v3 provides a new web interface to run the tools for network-guided hypothesis generations. YeastNet v3 also provides edge information for all data-specific networks (∼2 million functional links) as well as the integrated networks. Therefore, users can construct alternative versions of the integrated network by applying their own data integration algorithm to the same data-specific links.",2013-10-27 +25429060,A bio-inspired computing model for ovarian carcinoma classification and oncogene detection.,"

Motivation

Ovarian cancer is the fifth leading cause of cancer deaths in women in the western world for 2013. In ovarian cancer, benign tumors turn malignant, but the point of transition is difficult to predict and diagnose. The 5-year survival rate of all types of ovarian cancer is 44%, but this can be improved to 92% if the cancer is found and treated before it spreads beyond the ovary. However, only 15% of all ovarian cancers are found at this early stage. Therefore, the ability to automatically identify and diagnose ovarian cancer precisely and efficiently as the tissue changes from benign to invasive is important for clinical treatment and for increasing the cure rate. This study proposes a new ovarian carcinoma classification model using two algorithms: a novel discretization of food sources for an artificial bee colony (DfABC), and a support vector machine (SVM). For the first time in the literature, oncogene detection using this method is also investigated.

Results

A novel bio-inspired computing model and hybrid algorithms combining DfABC and SVM was applied to ovarian carcinoma and oncogene classification. This study used the human ovarian cDNA expression database to collect 41 patient samples and 9600 genes in each pathological stage. Feature selection methods were used to detect and extract 15 notable oncogenes. We then used the DfABC-SVM model to examine these 15 oncogenes, dividing them into eight different classifications according to their gene expressions of various pathological stages. The average accuracyof the eight classification experiments was 94.76%. This research also found some oncogenes that had not been discovered or indicated in previous scientific studies. The main contribution of this research is the proof that these newly discovered oncogenes are highly related to ovarian or other cancers.

Availability and implementation

http://mht.mis.nchu.edu.tw/moodle/course/view.php?id=7.",2014-11-25 +25637559,NetExplore: a web server for modeling small network motifs.,"

Motivation

Quantitative and qualitative assessment of biological data often produces small essential recurrent networks, containing 3-5 components called network motifs. In this context, model solutions for small network motifs represent very high interest.

Results

Software package NetExplore has been created in order to generate, classify and analyze solutions for network motifs including up to six network components. NetExplore allows plotting and visualization of the solution's phase spaces and bifurcation diagrams.

Availability and implementation

The current version of NetExplore has been implemented in Perl-CGI and is accessible at the following locations: http://line.bioinfolab.net/nex/NetExplore.htm and http://nex.autosome.ru/nex/NetExplore.htm.",2015-01-30 +26187250,Challenges in clinical natural language processing for automated disorder normalization.,"

Background

Identifying key variables such as disorders within the clinical narratives in electronic health records has wide-ranging applications within clinical practice and biomedical research. Previous research has demonstrated reduced performance of disorder named entity recognition (NER) and normalization (or grounding) in clinical narratives than in biomedical publications. In this work, we aim to identify the cause for this performance difference and introduce general solutions.

Methods

We use closure properties to compare the richness of the vocabulary in clinical narrative text to biomedical publications. We approach both disorder NER and normalization using machine learning methodologies. Our NER methodology is based on linear-chain conditional random fields with a rich feature approach, and we introduce several improvements to enhance the lexical knowledge of the NER system. Our normalization method - never previously applied to clinical data - uses pairwise learning to rank to automatically learn term variation directly from the training data.

Results

We find that while the size of the overall vocabulary is similar between clinical narrative and biomedical publications, clinical narrative uses a richer terminology to describe disorders than publications. We apply our system, DNorm-C, to locate disorder mentions and in the clinical narratives from the recent ShARe/CLEF eHealth Task. For NER (strict span-only), our system achieves precision=0.797, recall=0.713, f-score=0.753. For the normalization task (strict span+concept) it achieves precision=0.712, recall=0.637, f-score=0.672. The improvements described in this article increase the NER f-score by 0.039 and the normalization f-score by 0.036. We also describe a high recall version of the NER, which increases the normalization recall to as high as 0.744, albeit with reduced precision.

Discussion

We perform an error analysis, demonstrating that NER errors outnumber normalization errors by more than 4-to-1. Abbreviations and acronyms are found to be frequent causes of error, in addition to the mentions the annotators were not able to identify within the scope of the controlled vocabulary.

Conclusion

Disorder mentions in text from clinical narratives use a rich vocabulary that results in high term variation, which we believe to be one of the primary causes of reduced performance in clinical narrative. We show that pairwise learning to rank offers high performance in this context, and introduce several lexical enhancements - generalizable to other clinical NER tasks - that improve the ability of the NER system to handle this variation. DNorm-C is a high performing, open source system for disorders in clinical text, and a promising step toward NER and normalization methods that are trainable to a wide variety of domains and entities. (DNorm-C is open source software, and is available with a trained model at the DNorm demonstration website: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/#DNorm.).",2015-07-14 +26471454,3DRobot: automated generation of diverse and well-packed protein structure decoys.,"

Motivation

Computationally generated non-native protein structure conformations (or decoys) are often used for designing protein folding simulation methods and force fields. However, almost all the decoy sets currently used in literature suffer from uneven root mean square deviation (RMSD) distribution with bias to non-protein like hydrogen-bonding and compactness patterns. Meanwhile, most protein decoy sets are pre-calculated and there is a lack of methods for automated generation of high-quality decoys for any target proteins.

Results

We developed a new algorithm, 3DRobot, to create protein structure decoys by free fragment assembly with enhanced hydrogen-bonding and compactness interactions. The method was benchmarked with three widely used decoy sets from ab initio folding and comparative modeling simulations. The decoys generated by 3DRobot are shown to have significantly enhanced diversity and evenness with a continuous distribution in the RMSD space. The new energy terms introduced in 3DRobot improve the hydrogen-bonding network and compactness of decoys, which eliminates the possibility of native structure recognition by trivial potentials. Algorithms that can automatically create such diverse and well-packed non-native conformations from any protein structure should have a broad impact on the development of advanced protein force field and folding simulation methods. AVAILIABLITY AND IMPLEMENTATION: http://zhanglab.ccmb.med.umich.edu/3DRobot/

Contact

jiay@phy.ccnu.edu.cn; zhng@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-14 +24395071,Development of an improved risk calculator for complications in proctectomy.,"

Background

Rectal surgery is associated with high complication rates, but tools to prospectively define surgical risk are lacking. Improved preoperative risk assessment could better inform patients and refine decision making by surgeons. Our objective was to develop a validated model for proctectomy risk prediction.

Methods

We reviewed non-emergent ACS-NSQIP proctectomy data from 2005 to 2011 (n = 13,385). Logistic regression identified variables available prior to surgery showing independent association with 30-day morbidity in 2010-2011 (n = 5,570). The resulting risk model's discrimination and calibration were tested against the NSQIP-supplied morbidity model, and performance was validated against independent 2005-2009 data.

Results

Overall morbidity for proctectomy in 2010-2011 was 40.2%; significantly higher than the 23.0 % rate predicted by the NSQIP-provided general and vascular surgery risk model. Frequent complications included bleeding (16.3%), superficial infection (9.2%), and sepsis (7.4%). Our novel model incorporating 17 preoperative variables provided better discrimination and calibration (p < 0.05) than the NSQIP model and was validated against the 2005-2009 data. A web-based calculator makes this new model available for prospective risk assessment.

Conclusions

We conclude that the NSQIP-supplied risk model underestimates proctectomy morbidity and that this new, validated risk model and risk prediction tool ( http://myweb.uiowa.edu/sksherman ) may allow clinicians to counsel patients with accurate risk estimates using data available in the preoperative setting.",2014-01-07 +24490765,Using high-density DNA methylation arrays to profile copy number alterations.,"The integration of genomic and epigenomic data is an increasingly popular approach for studying the complex mechanisms driving cancer development. We have developed a method for evaluating both methylation and copy number from high-density DNA methylation arrays. Comparing copy number data from Infinium HumanMethylation450 BeadChips and SNP arrays, we demonstrate that Infinium arrays detect copy number alterations with the sensitivity of SNP platforms. These results show that high-density methylation arrays provide a robust and economic platform for detecting copy number and methylation changes in a single experiment. Our method is available in the ChAMP Bioconductor package: http://www.bioconductor.org/packages/2.13/bioc/html/ChAMP.html.",2014-02-03 +26484143,Small molecule inhibition of FOXM1: How to bring a novel compound into genomic context.,"Deregulation of transcription factor (TF) networks is emerging as a major pathogenic event in many human cancers (Darnell, 2002 [1]; Libermann and Zerbini, 2006 [2]; Laoukili et al., 2007 [3]). Small molecule intervention is an attractive avenue to understand TF regulatory mechanisms in healthy and disease state, as well as for exploiting these targets therapeutically (Koehler et al., 2003 [4]; Berg, 2008 [5]; Koehler, 2010 [6]). However, because of their physico-chemical properties, TF targeting has been proven to be difficult (Verdine and Walensky, 2007 [7]). The TF FOXM1 is an important mitotic player (Wonsey and Follettie, 2005 [8]; Laoukili et al., 2005 [9]; McDonald, 2005 [10]) also implicated in cancer progression (Laoukili et al., 2007 [3]; Teh, 2011 [11]; Koo, 2012 [12]) and drug resistance development (Kwok et al., 2010 [13]; Carr et al., [14]). Therefore, its inhibition is an attractive goal for cancer therapy. Here, we describe a computational biology approach, by giving detailed insights into methodologies and technical results, which was used to analyze the transcriptional RNA-Seq data presented in our previous work (Gormally et al., 2014 [20]). Our Bioinformatics analysis shed light on the cellular effect of a novel FOXM1 inhibitor (FDI-6) newly identified through a biophysical screen. The data for this report is available at the public GEO repository (accession number http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE58626).",2014-10-22 +26835476,The Effect of Rosuvastatin on Markers of Immune Activation in Treatment-Naive Human Immunodeficiency Virus-Patients.,"Background.  Immune activation has been implicated in the excess mortality in human immunodeficiency virus (HIV)-infected patients, due to cardiovascular diseases and malignancies. Statins may modulate this immune activation. We assessed the capacity of rosuvastatin to mitigate immune activation in treatment-naive HIV-infected patients. Methods.  In a randomized double-blind placebo-controlled crossover study, we explored the effects of 8 weeks of rosuvastatin 20 mg in treatment-naive male HIV-infected patients (n = 28) on immune activation markers: neopterin, soluble Toll-like receptor (TLR)2, sTLR4, interleukin (IL)-6, IL-1Ra, IL-18, d-dimer, highly sensitive C-reactive protein, and CD38 and/or human leukocyte antigen-DR expression on T cells. Baseline data were compared with healthy male controls (n = 10). Furthermore, the effects of rosuvastatin on HIV-1 RNA, CD4/CD8 T-cell count, and low-density lipoprotein cholesterol were examined and side effects were registered. Results.  T-cell activation levels were higher in patients than in controls. Patients had higher levels of circulating IL-18, sTLR2, and neopterin (all P < .01). Twenty patients completed the study. Rosuvastatin increased the CD4/CD8 T-cell ratio (P = .02). No effect on other markers was found. Conclusions.  Patients infected with HIV had higher levels of circulating neopterin, IL-18, sTLR2, and T-cell activation markers. Rosuvastatin had a small but significant positive effect on CD4/CD8 T-cell ratio, but no influence on other markers of T-cell activation and innate immunity was identified (The Netherlands National Trial Register [NTR] NTR 2349, http://www.trialregister.nl/trialreg/index.asp).",2015-12-23 +24049071,DEXUS: identifying differential expression in RNA-Seq studies with unknown conditions.,"Detection of differential expression in RNA-Seq data is currently limited to studies in which two or more sample conditions are known a priori. However, these biological conditions are typically unknown in cohort, cross-sectional and nonrandomized controlled studies such as the HapMap, the ENCODE or the 1000 Genomes project. We present DEXUS for detecting differential expression in RNA-Seq data for which the sample conditions are unknown. DEXUS models read counts as a finite mixture of negative binomial distributions in which each mixture component corresponds to a condition. A transcript is considered differentially expressed if modeling of its read counts requires more than one condition. DEXUS decomposes read count variation into variation due to noise and variation due to differential expression. Evidence of differential expression is measured by the informative/noninformative (I/NI) value, which allows differentially expressed transcripts to be extracted at a desired specificity (significance level) or sensitivity (power). DEXUS performed excellently in identifying differentially expressed transcripts in data with unknown conditions. On 2400 simulated data sets, I/NI value thresholds of 0.025, 0.05 and 0.1 yielded average specificities of 92, 97 and 99% at sensitivities of 76, 61 and 38%, respectively. On real-world data sets, DEXUS was able to detect differentially expressed transcripts related to sex, species, tissue, structural variants or quantitative trait loci. The DEXUS R package is publicly available from Bioconductor and the scripts for all experiments are available at http://www.bioinf.jku.at/software/dexus/.",2013-09-17 +25380959,PyFDAP: automated analysis of fluorescence decay after photoconversion (FDAP) experiments.,"

Unlabelled

We developed the graphical user interface PyFDAP for the fitting of linear and non-linear decay functions to data from fluorescence decay after photoconversion (FDAP) experiments. PyFDAP structures and analyses large FDAP datasets and features multiple fitting and plotting options.

Availability and implementation

PyFDAP was written in Python and runs on Ubuntu Linux, Mac OS X and Microsoft Windows operating systems. The software, a user guide and a test FDAP dataset are freely available for download from http://people.tuebingen.mpg.de/mueller-lab.",2014-11-06 +24206606,BS-Seeker2: a versatile aligning pipeline for bisulfite sequencing data.,"

Background

DNA methylation is an important epigenetic modification involved in many biological processes. Bisulfite treatment coupled with high-throughput sequencing provides an effective approach for studying genome-wide DNA methylation at base resolution. Libraries such as whole genome bisulfite sequencing (WGBS) and reduced represented bisulfite sequencing (RRBS) are widely used for generating DNA methylomes, demanding efficient and versatile tools for aligning bisulfite sequencing data.

Results

We have developed BS-Seeker2, an updated version of BS Seeker, as a full pipeline for mapping bisulfite sequencing data and generating DNA methylomes. BS-Seeker2 improves mappability over existing aligners by using local alignment. It can also map reads from RRBS library by building special indexes with improved efficiency and accuracy. Moreover, BS-Seeker2 provides additional function for filtering out reads with incomplete bisulfite conversion, which is useful in minimizing the overestimation of DNA methylation levels. We also defined CGmap and ATCGmap file formats for full representations of DNA methylomes, as part of the outputs of BS-Seeker2 pipeline together with BAM and WIG files.

Conclusions

Our evaluations on the performance show that BS-Seeker2 works efficiently and accurately for both WGBS data and RRBS data. BS-Seeker2 is freely available at http://pellegrini.mcdb.ucla.edu/BS_Seeker2/ and the Galaxy server.",2013-11-10 +21378592,Pediatric respiratory diseases: 2011 update for the Rogers' Textbook of Pediatric Intensive Care.,"

Objectives

To review articles relevant to the field of pediatric respiratory disease that were published after the 2008 Rogers' Textbook of Pediatric Intensive Care.

Data sources

The authors searched the PubMed database (http://www.ncbi.nlm.nih.gov/sites/entrez) from the National Library of Medicine for citations from the pediatric and adult literature relevant to pediatric status asthmaticus, bronchiolitis, pneumonia, acute lung injury, acute respiratory distress syndrome, and neonatal respiratory failure. The authors also searched the reference lists of key primary publications and recent review articles, and queried the National Institutes of Health's ClinicalTrials.gov Web site (www.clinicaltrials.gov) to obtain information about ongoing clinical trials for acute lung injury. The authors had knowledge of new publications in the field of respiratory monitoring, which were considered for inclusion in the review.

Study selection and data extraction

The authors reviewed the promising articles and the decision to include any article in the review was based on its potential to inform pediatric intensive care practice or future research.

Data synthesis

Articles in six categories were selected for inclusion: status asthmaticus, bronchiolitis, pneumonia, acute lung injury/acute respiratory distress syndrome, respiratory monitoring, and neonatal respiratory failure.

Conclusions

There have been important new developments relevant to the pathogenesis and management of pediatric respiratory diseases. In particular, new insights into the causal pathways of respiratory syncytial virus-induced airways disease can potentially lead to novel therapies. Computed tomography imaging of the injured lung during mechanical ventilation has opened new avenues for future research directed at testing new treatments in acute lung injury subpopulations defined according to lung mechanics. Promising new monitoring techniques may play a supporting role in the conduct of these studies. Finally, evidence from the neonatal literature recently has shown how the course and future consequences of respiratory failure in this population may be modified through more widespread use of noninvasive support.",2011-05-01 +22140108,CharProtDB: a database of experimentally characterized protein annotations.,"CharProtDB (http://www.jcvi.org/charprotdb/) is a curated database of biochemically characterized proteins. It provides a source of direct rather than transitive assignments of function, designed to support automated annotation pipelines. The initial data set in CharProtDB was collected through manual literature curation over the years by analysts at the J. Craig Venter Institute (JCVI) [formerly The Institute of Genomic Research (TIGR)] as part of their prokaryotic genome sequencing projects. The CharProtDB has been expanded by import of selected records from publicly available protein collections whose biocuration indicated direct rather than homology-based assignment of function. Annotations in CharProtDB include gene name, symbol and various controlled vocabulary terms, including Gene Ontology terms, Enzyme Commission number and TransportDB accession. Each annotation is referenced with the source; ideally a journal reference, or, if imported and lacking one, the original database source.",2011-12-02 +25179222,Score_set: a CAPRI benchmark for scoring protein complexes.,"Critical Assessment of PRedicted Interactions (CAPRI) has proven to be a catalyst for the development of docking algorithms. An essential step in docking is the scoring of predicted binding modes in order to identify stable complexes. In 2005, CAPRI introduced the scoring experiment, where upon completion of a prediction round, a larger set of models predicted by different groups and comprising both correct and incorrect binding modes, is made available to all participants for testing new scoring functions independently from docking calculations. Here we present an expanded benchmark data set for testing scoring functions, which comprises the consolidated ensemble of predicted complexes made available in the CAPRI scoring experiment since its inception. This consolidated scoring benchmark contains predicted complexes for 15 published CAPRI targets. These targets were subjected to 23 CAPRI assessments, due to existence of multiple binding modes for some targets. The benchmark contains more than 19,000 protein complexes. About 10% of the complexes represent docking predictions of acceptable quality or better, the remainder represent incorrect solutions (decoys). The benchmark set contains models predicted by 47 different predictor groups including web servers, which use different docking and scoring procedures, and is arguably as diverse as one may expect, representing the state of the art in protein docking. The data set is publicly available at the following URL: http://cb.iri.univ-lille1.fr/Users/lensink/Score_set.",2014-09-11 +25431328,MGAS: a powerful tool for multivariate gene-based genome-wide association analysis.,"

Motivation

Standard genome-wide association studies, testing the association between one phenotype and a large number of single nucleotide polymorphisms (SNPs), are limited in two ways: (i) traits are often multivariate, and analysis of composite scores entails loss in statistical power and (ii) gene-based analyses may be preferred, e.g. to decrease the multiple testing problem.

Results

Here we present a new method, multivariate gene-based association test by extended Simes procedure (MGAS), that allows gene-based testing of multivariate phenotypes in unrelated individuals. Through extensive simulation, we show that under most trait-generating genotype-phenotype models MGAS has superior statistical power to detect associated genes compared with gene-based analyses of univariate phenotypic composite scores (i.e. GATES, multiple regression), and multivariate analysis of variance (MANOVA). Re-analysis of metabolic data revealed 32 False Discovery Rate controlled genome-wide significant genes, and 12 regions harboring multiple genes; of these 44 regions, 30 were not reported in the original analysis.

Conclusion

MGAS allows researchers to conduct their multivariate gene-based analyses efficiently, and without the loss of power that is often associated with an incorrectly specified genotype-phenotype models.

Availability and implementation

MGAS is freely available in KGG v3.0 (http://statgenpro.psychiatry.hku.hk/limx/kgg/download.php). Access to the metabolic dataset can be requested at dbGaP (https://dbgap.ncbi.nlm.nih.gov/). The R-simulation code is available from http://ctglab.nl/people/sophie_van_der_sluis.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-26 +24174567,"MSPrep--summarization, normalization and diagnostics for processing of mass spectrometry-based metabolomic data.","

Motivation

Although R packages exist for the pre-processing of metabolomic data, they currently do not incorporate additional analysis steps of summarization, filtering and normalization of aligned data. We developed the MSPrep R package to complement other packages by providing these additional steps, implementing a selection of popular normalization algorithms and generating diagnostics to help guide investigators in their analyses.

Availability

http://www.sourceforge.net/projects/msprep",2013-10-29 +25416797,DoRiNA 2.0--upgrading the doRiNA database of RNA interactions in post-transcriptional regulation.,"The expression of almost all genes in animals is subject to post-transcriptional regulation by RNA binding proteins (RBPs) and microRNAs (miRNAs). The interactions between both RBPs and miRNAs with mRNA can be mapped on a whole-transcriptome level using experimental and computational techniques established in the past years. The combined action of RBPs and miRNAs is thought to form a post-transcriptional regulatory code. Here we present doRiNA 2.0, available at http://dorina.mdc-berlin.de. In this highly improved new version, we have completely reworked the user interface and expanded the database to improve the usability of the website. Taking into account user feedback over the past years, the input forms for both the simple and the combinatorial search function have been streamlined and combined into a single web page that will also display the search results. Especially, custom uploads is one of the key new features in doRiNA 2.0. To enable the inclusion of doRiNA into third-party analysis pipelines, all operations are accessible via a REST API. Alternatively, local installations can be queried using a Python API. Both the web application and the APIs are available under an OSI-approved Open Source license that allows research and commercial access and re-use.",2014-11-21 +25592768,Putative functional variants of XRCC1 identified by RegulomeDB were not associated with lung cancer risk in a Korean population.,"The Encyclopedia of DNA elements (ENCODE) project revealed that nearby or distantly located non-coding DNA regulates the expression of coding genes. RegulomeDB (http://regulome.stanford.edu) is a new database that can be used to predict whether a variant affects transcription factor binding and gene expression. We investigated the association between lung cancer risk and potentially functional polymorphisms of XRCC1 that were selected using RegulomeDB in a Korean population. A total of 185 polymorphisms of XRCC1 were evaluated using RegulomeDB. Strong evidence suggested that 10 polymorphisms, from among the 185, affected XRCC1 expression with scores of 1a-1f that were based on the RegulomeDB scoring system. The rs2854510 polymorphism was rare in Asians (minor allele frequency < 0.05). Eight polymorphisms were in strong linkage disequilibrium (LD). The rs2854509 polymorphism, which was one of the 8 polymorphisms in LD, and rs7248167, which was not in the LD block, were genotyped in 610 lung cancer patients and 607 age- and sex-matched controls. Additionally, four polymorphisms of XRCC1 (rs25487, rs25489, rs1799782, and rs3213245), which were investigated with regard to their association with lung cancer risk in previous studies, were also genotyped. Two polymorphisms (rs2854509 and rs7248167) that were predicted to affect XRCC1 expression based on their RegulomeDB scores were not associated with lung cancer risk (P = 0.31 and 0.93, respectively). When stratified according to age, gender, smoking status, and tumor histology, the two polymorphisms of XRCC1 were not associated with lung cancer risk. Among the four polymorphisms that were previously studied, only rs25489 of XRCC1 was significantly associated with lung cancer risk (dominant model, adjusted odds ratio = 0.61, 95% confidence interval = 0.46-0.83, P = 0.002). Although RegulomeDB is an attractive tool for predicting the regulatory potential of variants, the two polymorphisms that were selected using RegulomeDB were not associated with lung cancer risk.",2014-11-21 +26836922,Major Bleeding and Hemorrhagic Stroke With Direct Oral Anticoagulants in Patients With Renal Failure: Systematic Review and Meta-Analysis of Randomized Trials.,"

Background

Direct oral anticoagulants (DOACs) are used as an alternative for traditional antithrombotic therapy. However, the safety profile of DOACs in patients with renal failure (RF) has not been determined.

Methods

A systematic review was performed assessing the reported safety of DOACs compared with vitamin K antagonists (VKAs) in patients with RF and estimated creatinine clearance (eCrCL) < 50 mL/min and eCrCL 50 to 80 mL/min. MEDLINE, EMBASE, Cochrane, and the Clinical Trials Registry (ClinicalTrials.gov) were searched for randomized clinical trials up to November 2015. The data were pooled by using both traditional frequentist and Bayesian random effects models.

Results

Nine trials met the inclusion criteria. Among 94,897 participants, 54,667 (58%) had RF. Compared with VKAs, DOACs were associated with a significantly decreased risk for major bleeding in patients with eCrCL 50 to 80 mL/min (risk ratio, 0.87 [95% CI, 0.81-0.93]) and a nonsignificant decrease in the risk for major bleeding in patients with eCrCL < 50 mL/min (risk ratio, 0.83 [95% CI, 0.68-1.02]); there was evidence of significant heterogeneity. Indirect comparisons, using Bayesian network analysis, indicated that apixaban was associated with a decreased rate of major bleeding compared with other DOACs in patients with eCrCL < 50 mL/min. DOACs were associated with a significant decrease in the risk for hemorrhagic stroke compared with VKAs in patients with eCrCL < 50 mL/min and 50 to 80 mL/min.

Conclusions

As a class, DOACs are associated with a reduced risk for hemorrhagic stroke compared with VKAs in patients with RF. However, DOACs may differ from each other in their relative risk for major bleeding in patients with eCrCL < 50 mL/min.

Trial registry

PROSPERO registry; No.: CRD42014013730; URL: http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42014013730.",2016-01-18 +24363379,Ondex Web: web-based visualization and exploration of heterogeneous biological networks.,"

Summary

Ondex Web is a new web-based implementation of the network visualization and exploration tools from the Ondex data integration platform. New features such as context-sensitive menus and annotation tools provide users with intuitive ways to explore and manipulate the appearance of heterogeneous biological networks. Ondex Web is open source, written in Java and can be easily embedded into Web sites as an applet. Ondex Web supports loading data from a variety of network formats, such as XGMML, NWB, Pajek and OXL.

Availability and implementation

http://ondex.rothamsted.ac.uk/OndexWeb.",2013-12-20 +26005672,SPECTRA: An Integrated Knowledge Base for Comparing Tissue and Tumor-Specific PPI Networks in Human.,"Protein-protein interaction (PPI) networks available in public repositories usually represent relationships between proteins within the cell. They ignore the specific set of tissues or tumors where the interactions take place. Indeed, proteins can form tissue-selective complexes, while they remain inactive in other tissues. For these reasons, a great attention has been recently paid to tissue-specific PPI networks, in which nodes are proteins of the global PPI network whose corresponding genes are preferentially expressed in specific tissues. In this paper, we present SPECTRA, a knowledge base to build and compare tissue or tumor-specific PPI networks. SPECTRA integrates gene expression and protein interaction data from the most authoritative online repositories. We also provide tools for visualizing and comparing such networks, in order to identify the expression and interaction changes of proteins across tissues, or between the normal and pathological states of the same tissue. SPECTRA is available as a web server at http://alpha.dmi.unict.it/spectra.",2015-05-08 +25420551,A statistical approach for 5' splice site prediction using short sequence motifs and without encoding sequence data.,"

Background

Most of the approaches for splice site prediction are based on machine learning techniques. Though, these approaches provide high prediction accuracy, the window lengths used are longer in size. Hence, these approaches may not be suitable to predict the novel splice variants using the short sequence reads generated from next generation sequencing technologies. Further, machine learning techniques require numerically encoded data and produce different accuracy with different encoding procedures. Therefore, splice site prediction with short sequence motifs and without encoding sequence data became a motivation for the present study.

Results

An approach for finding association among nucleotide bases in the splice site motifs is developed and used further to determine the appropriate window size. Besides, an approach for prediction of donor splice sites using sum of absolute error criterion has also been proposed. The proposed approach has been compared with commonly used approaches i.e., Maximum Entropy Modeling (MEM), Maximal Dependency Decomposition (MDD), Weighted Matrix Method (WMM) and Markov Model of first order (MM1) and was found to perform equally with MEM and MDD and better than WMM and MM1 in terms of prediction accuracy.

Conclusions

The proposed prediction approach can be used in the prediction of donor splice sites with higher accuracy using short sequence motifs and hence can be used as a complementary method to the existing approaches. Based on the proposed methodology, a web server was also developed for easy prediction of donor splice sites by users and is available at http://cabgrid.res.in:8080/sspred .",2014-11-25 +23958307,PROmiRNA: a new miRNA promoter recognition method uncovers the complex regulation of intronic miRNAs.,"The regulation of intragenic miRNAs by their own intronic promoters is one of the open problems of miRNA biogenesis. Here, we describe PROmiRNA, a new approach for miRNA promoter annotation based on a semi-supervised statistical model trained on deepCAGE data and sequence features. We validate our results with existing annotation, PolII occupancy data and read coverage from RNA-seq data. Compared to previous methods PROmiRNA increases the detection rate of intronic promoters by 30%, allowing us to perform a large-scale analysis of their genomic features, as well as elucidate their contribution to tissue-specific regulation. PROmiRNA can be downloaded from http://promirna.molgen.mpg.de.",2013-08-16 +24565500,MOABS: model based analysis of bisulfite sequencing data.,"Bisulfite sequencing (BS-seq) is the gold standard for studying genome-wide DNA methylation. We developed MOABS to increase the speed, accuracy, statistical power and biological relevance of BS-seq data analysis. MOABS detects differential methylation with 10-fold coverage at single-CpG resolution based on a Beta-Binomial hierarchical model and is capable of processing two billion reads in 24 CPU hours. Here, using simulated and real BS-seq data, we demonstrate that MOABS outperforms other leading algorithms, such as Fisher's exact test and BSmooth. Furthermore, MOABS analysis can be easily extended to differential 5hmC analysis using RRBS and oxBS-seq. MOABS is available at http://code.google.com/p/moabs/.",2014-02-24 +25407965,Vindel: a simple pipeline for checking indel redundancy.,"

Background

With the advance of next generation sequencing (NGS) technologies, a large number of insertion and deletion (indel) variants have been identified in human populations. Despite much research into variant calling, it has been found that a non-negligible proportion of the identified indel variants might be false positives due to sequencing errors, artifacts caused by ambiguous alignments, and annotation errors.

Results

In this paper, we examine indel redundancy in dbSNP, one of the central databases for indel variants, and develop a standalone computational pipeline, dubbed Vindel, to detect redundant indels. The pipeline first applies indel position information to form candidate redundant groups, then performs indel mutations to the reference genome to generate corresponding indel variant substrings. Finally the indel variant substrings in the same candidate redundant groups are compared in a pairwise fashion to identify redundant indels. We applied our pipeline to check for redundancy in the human indels in dbSNP. Our pipeline identified approximately 8% redundancy in insertion type indels, 12% in deletion type indels, and overall 10% for insertions and deletions combined. These numbers are largely consistent across all human autosomes. We also investigated indel size distribution and adjacent indel distance distribution for a better understanding of the mechanisms generating indel variants.

Conclusions

Vindel, a simple yet effective computational pipeline, can be used to check whether a set of indels are redundant with respect to those already in the database of interest such as NCBI's dbSNP. Of the approximately 5.9 million indels we examined, nearly 0.6 million are redundant, revealing a serious limitation in the current indel annotation. Statistics results prove the consistency of the pipeline on indel redundancy detection for all 22 chromosomes. Apart from the standalone Vindel pipeline, the indel redundancy check algorithm is also implemented in the web server http://bioinformatics.cs.vt.edu/zhanglab/indelRedundant.php .",2014-11-19 +25414383,Knowledge-rich temporal relation identification and classification in clinical notes.,"

Motivation

We examine the task of temporal relation classification for the clinical domain. Our approach to this task departs from existing ones in that it is (i) 'knowledge-rich', employing sophisticated knowledge derived from discourse relations as well as both domain-independent and domain-dependent semantic relations, and (ii) 'hybrid', combining the strengths of rule-based and learning-based approaches. Evaluation results on the i2b2 Clinical Temporal Relations Challenge corpus show that our approach yields a 17-24% and 8-14% relative reduction in error over a state-of-the-art learning-based baseline system when gold-standard and automatically identified temporal relations are used, respectively. Database URL: http://www.hlt.utdallas.edu/~jld082000/temporal-relations/",2014-11-19 +24565028,PhosSA: Fast and accurate phosphorylation site assignment algorithm for mass spectrometry data.,"Phosphorylation site assignment of high throughput tandem mass spectrometry (LC-MS/MS) data is one of the most common and critical aspects of phosphoproteomics. Correctly assigning phosphorylated residues helps us understand their biological significance. The design of common search algorithms (such as Sequest, Mascot etc.) do not incorporate site assignment; therefore additional algorithms are essential to assign phosphorylation sites for mass spectrometry data. The main contribution of this study is the design and implementation of a linear time and space dynamic programming strategy for phosphorylation site assignment referred to as PhosSA. The proposed algorithm uses summation of peak intensities associated with theoretical spectra as an objective function. Quality control of the assigned sites is achieved using a post-processing redundancy criteria that indicates the signal-to-noise ratio properties of the fragmented spectra. The quality assessment of the algorithm was determined using experimentally generated data sets using synthetic peptides for which phosphorylation sites were known. We report that PhosSA was able to achieve a high degree of accuracy and sensitivity with all the experimentally generated mass spectrometry data sets. The implemented algorithm is shown to be extremely fast and scalable with increasing number of spectra (we report up to 0.5 million spectra/hour on a moderate workstation). The algorithm is designed to accept results from both Sequest and Mascot search engines. An executable is freely available at http://helixweb.nih.gov/ESBL/PhosSA/ for academic research purposes.",2013-11-07 +25414382,Kin-Driver: a database of driver mutations in protein kinases.,"Somatic mutations in protein kinases (PKs) are frequent driver events in many human tumors, while germ-line mutations are associated with hereditary diseases. Here we present Kin-driver, the first database that compiles driver mutations in PKs with experimental evidence demonstrating their functional role. Kin-driver is a manual expert-curated database that pays special attention to activating mutations (AMs) and can serve as a validation set to develop new generation tools focused on the prediction of gain-of-function driver mutations. It also offers an easy and intuitive environment to facilitate the visualization and analysis of mutations in PKs. Because all mutations are mapped onto a multiple sequence alignment, analogue positions between kinases can be identified and tentative new mutations can be proposed for studying by transferring annotation. Finally, our database can also be of use to clinical and translational laboratories, helping them to identify uncommon AMs that can correlate with response to new antitumor drugs. The website was developed using PHP and JavaScript, which are supported by all major browsers; the database was built using MySQL server. Kin-driver is available at: http://kin-driver.leloir.org.ar/",2014-11-19 +25138652,"CoroEval: a multi-platform, multi-modality tool for the evaluation of 3D coronary vessel reconstructions.","We present a software, called CoroEval, for the evaluation of 3D coronary vessel reconstructions from clinical data. It runs on multiple operating systems and is designed to be independent of the imaging modality used. At this point, its purpose is the comparison of reconstruction algorithms or acquisition protocols, not the clinical diagnosis. Implemented metrics are vessel sharpness and diameter. All measurements are taken from the raw intensity data to be independent of display windowing functions. The user can either import a vessel centreline segmentation from other software, or perform a manual segmentation in CoroEval. An automated segmentation correction algorithm is provided to improve non-perfect centrelines. With default settings, measurements are taken at 1 mm intervals along the vessel centreline and from 10 different angles at each measurement point. This allows for outlier detection and noise-robust measurements without the burden and subjectivity a manual measurement process would incur. Graphical measurement results can be directly exported to vector or bitmap graphics for integration into scientific publications. Centreline and lumen segmentations can be exported as point clouds and in various mesh formats. We evaluated the diameter measurement process using three phantom datasets. An average deviation of 0.03 ± 0.03 mm was found. The software is available in binary and source code form at http://www5.cs.fau.de/CoroEval/.",2014-08-20 +23180944,Understanding the health and social care needs of people living with IBD: a meta-synthesis of the evidence.,"

Aim

To undertake a metasynthesis of qualitative studies to understand the health and social needs of people living with inflammatory bowel disease (IBD).

Methods

A systematic search strategy identified qualitative studies exploring the phenomenon of living with inflammatory bowel disease. Databases included MEDLINE, PsychInfo, EMBASE, CINAHL and the British Nursing Index via the OVID platform. Qualitative search filters were adapted from Hedges database (http://www.urmc.rochester.edu/hslt/miner/digital_library/tip_sheets/Cinahl_eb_filters.pdf). Qualitative empirical studies exploring the health and social needs of people living with inflammatory bowel disease were selected. Study eligibility and data extraction were independently completed using the Critical Appraisal Skills Programme for qualitative studies. The studies were analysed and synthesised using metasynthesis methodology. The themes from the studies allowed for common translations into a new interpretation of the impact of living with inflammatory bowel disease.

Results

Of 1395 studies, six published studies and one unpublished thesis fulfilled the inclusion criteria. First iteration of synthesis identified 16 themes, 2nd iteration synthesised these into three main 2nd order constructs: ""detained by the disease""; ""living in a world of disease"" and ""wrestling with life"". ""Detained by the disease"" is the fear of incontinence, the behaviour the patients display due to the fear, and the impact this has on the individual, such as social isolation and missing out on life events. All of these serve to ""pull"" the patient back from normal living. ""Living in a world of disease"" is the long term effects of living with a long term condition and the fear of these effects. ""Wrestling with life"" is the continued fight to thrive, the ""push"" to continue normal living.

Conclusion

The metasynthesis provides a comprehensive representation of living with IBD. The unmistakeable burden of incontinence is exposed and its ongoing effects are demonstrated. The combined overall impact of living with IBD is the tension these patients live with: ""Pushed and pulled: a compromised life"", people living with IBD experience a constant conflict throughout their lives, they push to be normal but IBD pulls them back. The impact of the fear of incontinence and behaviour of the individual as a result, requires further qualitative enquiry.",2012-11-01 +25405774,Sexual dysfunction related to drugs: a critical review. Part IV: cardiovascular drugs.,"

Introduction

Sexual dysfunction is a potential side effect of cardiovascular drugs: this article is a critical review of the current literature. Many studies have been published on this topic. Most of these studies are not methodologically robust, few are RCTs and most did not use a validated rating scale to evaluate sexual functioning. In addition, other methodological flaws limit greatly the conclusions of these studies. Most studies relate to male populations and only a few have been conducted on women. Also, the majority of studies on sexual dysfunction induced by cardiovascular drugs relate to antihypertensive drugs. While there is evidence to suggest that older antihypertensive drugs (diuretics, beta-blockers, centrally acting agents) have a negative impact on erectile function, newer agents seem to have either neutral (ACE inhibitors, calcium antagonists) or beneficial effects (i. e., angiotensin receptor blockers, nebivolol). Other cardiovascular drugs analyzed in this review also appear to have an inhibitory action on sexual function. For men, there is some weak evidence supporting the use of specific treatment strategies for sexual dysfunction associated with these drugs.

Methods

This study was conducted in 2014 using the paper and electronic resources of the library of the ""Azienda Provinciale per i Servizi Sanitari (APSS)"" in Trento, Italy (http://atoz.ebsco.com/Titles/2793). The library has access to a wide range of databases including DYNAMED, MEDLINE Full Text, CINAHL Plus Full Text, The Cochrane Library, Micromedex healthcare series, BMJ Clinical Evidence. The full list of available journals can be viewed at http://atoz.ebsco.com/Titles/2793 or at the APSS web site (http://www.apss.tn.it). In completing this review, a literature search was conducted using the key words ""cardiovascular"", ""adrenergic beta antagonist"", ""α1-adrenoceptor antagonist"", ""angiotensin converting enzyme inhibitor"", ""angiotensin receptor antagonist"", ""angiotensin receptor blocker"", ""beta blocker"", ""beta receptor antagonist"", ""calcium channel blocker"", ""diuretic"", ""antihypertensive"", ""sexual dysfunction"", ""sexual side effects"", ""treatment-emergent sexual dysfunction"". All resulting listed articles were reviewed.

Conclusion

The review includes studies that investigated the relationship between these drug treatments and sexual dysfunction. The purpose was to identify possible intervention strategies for sexual dysfunction related to these drugs.",2014-11-18 +26973707,Estimation of genetic diversity in viral populations from next generation sequencing data with extremely deep coverage.,"

Background

In this paper we propose a method and discuss its computational implementation as an integrated tool for the analysis of viral genetic diversity on data generated by high-throughput sequencing. The main motivation for this work is to better understand the genetic diversity of viruses with high rates of nucleotide substitution, as HIV-1 and Influenza. Most methods for viral diversity estimation proposed so far are intended to take benefit of the longer reads produced by some next-generation sequencing platforms in order to estimate a population of haplotypes which represent the diversity of the original population. The method proposed here is custom-made to take advantage of the very low error rate and extremely deep coverage per site, which are the main features of some neglected technologies that have not received much attention due to the short length of its reads, which precludes haplotype estimation. This approach allowed us to avoid some hard problems related to haplotype reconstruction (need of long reads, preliminary error filtering and assembly).

Results

We propose to measure genetic diversity of a viral population through a family of multinomial probability distributions indexed by the sites of the virus genome, each one representing the distribution of nucleic bases per site. Moreover, the implementation of the method focuses on two main optimization strategies: a read mapping/alignment procedure that aims at the recovery of the maximum possible number of short-reads; the inference of the multinomial parameters in a Bayesian framework with smoothed Dirichlet estimation. The Bayesian approach provides conditional probability distributions for the multinomial parameters allowing one to take into account the prior information of the control experiment and providing a natural way to separate signal from noise, since it automatically furnishes Bayesian confidence intervals and thus avoids the drawbacks of preliminary error filtering.

Conclusions

The methods described in this paper have been implemented as an integrated tool called Tanden (Tool for Analysis of Diversity in Viral Populations) and successfully tested on samples obtained from HIV-1 strain NL4-3 (group M, subtype B) cultivations on primary human cell cultures in many distinct viral propagation conditions. Tanden is written in C# (Microsoft), runs on the Windows operating system, and can be downloaded from: http://tanden.url.ph/.",2016-03-11 +21233524,Discriminative motif finding for predicting protein subcellular localization.,"Many methods have been described to predict the subcellular location of proteins from sequence information. However, most of these methods either rely on global sequence properties or use a set of known protein targeting motifs to predict protein localization. Here, we develop and test a novel method that identifies potential targeting motifs using a discriminative approach based on hidden Markov models (discriminative HMMs). These models search for motifs that are present in a compartment but absent in other, nearby, compartments by utilizing an hierarchical structure that mimics the protein sorting mechanism. We show that both discriminative motif finding and the hierarchical structure improve localization prediction on a benchmark data set of yeast proteins. The motifs identified can be mapped to known targeting motifs and they are more conserved than the average protein sequence. Using our motif-based predictions, we can identify potential annotation errors in public databases for the location of some of the proteins. A software implementation and the data set described in this paper are available from http://murphylab.web.cmu.edu/software/2009_TCBB_motif/.",2011-03-01 +23923012,ParallelStructure: a R package to distribute parallel runs of the population genetics program STRUCTURE on multi-core computers.,"This software package provides an R-based framework to make use of multi-core computers when running analyses in the population genetics program STRUCTURE. It is especially addressed to those users of STRUCTURE dealing with numerous and repeated data analyses, and who could take advantage of an efficient script to automatically distribute STRUCTURE jobs among multiple processors. It also consists of additional functions to divide analyses among combinations of populations within a single data set without the need to manually produce multiple projects, as it is currently the case in STRUCTURE. The package consists of two main functions: MPI_structure() and parallel_structure() as well as an example data file. We compared the performance in computing time for this example data on two computer architectures and showed that the use of the present functions can result in several-fold improvements in terms of computation time. ParallelStructure is freely available at https://r-forge.r-project.org/projects/parallstructure/.",2013-07-29 +25322838,Differential regulation enrichment analysis via the integration of transcriptional regulatory network and gene expression data.,"

Motivation

Although many gene set analysis methods have been proposed to explore associations between a phenotype and a group of genes sharing common biological functions or involved in the same biological process, the underlying biological mechanisms of identified gene sets are typically unexplained.

Results

We propose a method called Differential Regulation-based enrichment Analysis for GENe sets (DRAGEN) to identify gene sets in which a significant proportion of genes have their transcriptional regulatory patterns changed in a perturbed phenotype. We conduct comprehensive simulation studies to demonstrate the capability of our method in identifying differentially regulated gene sets. We further apply our method to three human microarray expression datasets, two with hormone treated and control samples and one concerning different cell cycle phases. Results indicate that the capability of DRAGEN in identifying phenotype-associated gene sets is significantly superior to those of four existing methods for analyzing differentially expressed gene sets. We conclude that the proposed differential regulation enrichment analysis method, though exploratory in nature, complements the existing gene set analysis methods and provides a promising new direction for the interpretation of gene expression data.

Availability and implementation

The program of DRAGEN is freely available at http://bioinfo.au.tsinghua.edu.cn/dragen/.

Contact

ruijiang@tsinghua.edu.cn or jiang@cs.ucr.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-15 +26254434,Improving protein fold recognition with hybrid profiles combining sequence and structure evolution.,"

Motivation

Template-based modeling, the most successful approach for predicting protein 3D structure, often requires detecting distant evolutionary relationships between the target sequence and proteins of known structure. Developed for this purpose, fold recognition methods use elaborate strategies to exploit evolutionary information, mainly by encoding amino acid sequence into profiles. Since protein structure is more conserved than sequence, the inclusion of structural information can improve the detection of remote homology.

Results

Here, we present ORION, a new fold recognition method based on the pairwise comparison of hybrid profiles that contain evolutionary information from both protein sequence and structure. Our method uses the 16-state structural alphabet Protein Blocks, which provides an accurate 1D description of protein structure local conformations. ORION systematically outperforms PSI-BLAST and HHsearch on several benchmarks, including target sequences from the modeling competitions CASP8, 9 and 10, and detects ∼10% more templates at fold and superfamily SCOP levels.

Availability

Software freely available for download at http://www.dsimb.inserm.fr/orion/.

Contact

jean-christophe.gelly@univ-paris-diderot.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-07 +25667547,FUN-L: gene prioritization for RNAi screens.,"

Motivation

Most biological processes remain only partially characterized with many components still to be identified. Given that a whole genome can usually not be tested in a functional assay, identifying the genes most likely to be of interest is of critical importance to avoid wasting resources.

Results

Given a set of known functionally related genes and using a state-of-the-art approach to data integration and mining, our Functional Lists (FUN-L) method provides a ranked list of candidate genes for testing. Validation of predictions from FUN-L with independent RNAi screens confirms that FUN-L-produced lists are enriched in genes with the expected phenotypes. In this article, we describe a website front end to FUN-L.

Availability and implementation

The website is freely available to use at http://funl.org",2015-02-08 +25404129,PoSSuM v.2.0: data update and a new function for investigating ligand analogs and target proteins of small-molecule drugs.,"PoSSuM (http://possum.cbrc.jp/PoSSuM/) is a database for detecting similar small-molecule binding sites on proteins. Since its initial release in 2011, PoSSuM has grown to provide information related to 49 million pairs of similar binding sites discovered among 5.5 million known and putative binding sites. This enlargement of the database is expected to enhance opportunities for biological and pharmaceutical applications, such as predictions of new functions and drug discovery. In this release, we have provided a new service named PoSSuM drug search (PoSSuMds) at http://possum.cbrc.jp/PoSSuM/drug_search/, in which we selected 194 approved drug compounds retrieved from ChEMBL, and detected their known binding pockets and pockets that are similar to them. Users can access and download all of the search results via a new web interface, which is useful for finding ligand analogs as well as potential target proteins. Furthermore, PoSSuMds enables users to explore the binding pocket universe within PoSSuM. Additionally, we have improved the web interface with new functions, including sortable tables and a viewer for visualizing and downloading superimposed pockets.",2014-11-17 +25866705,Simbody: multibody dynamics for biomedical research.,"Multibody software designed for mechanical engineering has been successfully employed in biomedical research for many years. For real time operation some biomedical researchers have also adapted game physics engines. However, these tools were built for other purposes and do not fully address the needs of biomedical researchers using them to analyze the dynamics of biological structures and make clinically meaningful recommendations. We are addressing this problem through the development of an open source, extensible, high performance toolkit including a multibody mechanics library aimed at the needs of biomedical researchers. The resulting code, Simbody, supports research in a variety of fields including neuromuscular, prosthetic, and biomolecular simulation, and related research such as biologically-inspired design and control of humanoid robots and avatars. Simbody is the dynamics engine behind OpenSim, a widely used biomechanics simulation application. This article reviews issues that arise uniquely in biomedical research, and reports on the architecture, theory, and computational methods Simbody uses to address them. By addressing these needs explicitly Simbody provides a better match to the needs of researchers than can be obtained by adaptation of mechanical engineering or gaming codes. Simbody is a community resource, free for any purpose. We encourage wide adoption and invite contributions to the code base at https://simtk.org/home/simbody.",2011-01-01 +26938139,"Malaria Surveillance - United States, 2013.","

Problem/condition

Malaria in humans is caused by intraerythrocytic protozoa of the genus Plasmodium. These parasites are transmitted by the bite of an infective female Anopheles mosquito. The majority of malaria infections in the United States occur among persons who have traveled to regions with ongoing malaria transmission. However, malaria is also occasionally acquired by persons who have not traveled out of the country through exposure to infected blood products, congenital transmission, laboratory exposure, or local mosquitoborne transmission. Malaria surveillance in the United States is conducted to identify episodes of local transmission and to guide prevention recommendations for travelers.

Period covered

This report summarizes cases in persons with onset of illness in 2013 and summarizes trends during previous years.

Description of system

Malaria cases diagnosed by blood film, polymerase chain reaction, or rapid diagnostic tests are mandated to be reported to local and state health departments by health care providers or laboratory staff. Case investigations are conducted by local and state health departments, and reports are transmitted to CDC through the National Malaria Surveillance System, National Notifiable Diseases Surveillance System, or direct CDC consultations. CDC conducted antimalarial drug resistance marker testing on blood samples submitted to CDC by health care providers or local/state health departments. Data from these reporting systems serve as the basis for this report.

Results

CDC received 1,727 reported cases of malaria, including two congenital cases, with an onset of symptoms in 2013 among persons in the United States. The total number of cases represents a 2% increase from the 1,687 cases reported for 2012. Plasmodium falciparum, P. vivax, P. malariae, and P. ovale were identified in 61%, 14%, 3%, and 4% of cases, respectively. Forty (2%) patients were infected by two species. The infecting species was unreported or undetermined in 17% of cases. Polymerase chain reaction testing determined or corrected the species for 85 of the 137 (62%) samples evaluated for drug resistance marker testing. Of the 904 patients who reported purpose of travel, 635 (70%) were visiting friends or relatives (VFR). Among the 961 cases in U.S. civilians for whom information on chemoprophylaxis use and travel region was known, 42 (4%) patients reported that they had initiated and adhered to a chemoprophylaxis drug regimen recommended by CDC for the regions to which they had traveled. Thirty-six cases were reported in pregnant women, none of whom had adhered to chemoprophylaxis. Among all reported cases, approximately 270 (16%) were classified as severe illnesses in 2013. Of these, 10 persons with malaria died in 2013, the highest number since 2001. In 2013, a total of 137 blood samples submitted to CDC were tested for molecular markers associated with antimalarial drug resistance. Of the 100 P. falciparum-positive samples, 95 were tested for pyrimethamine resistance: 88 (93%) had genetic polymorphisms associated with pyrimethamine drug resistance, 74 (76%) with sulfadoxine resistance, 53 (53%) with chloroquine resistance, one (1%) with atovaquone resistance, none with mefloquine drug resistance, and none with artemisinin resistance.

Interpretation

The overall trend of malaria cases has been increasing since 1973; the number of cases reported in 2013 is the third highest annual total since then. Despite progress in reducing the global burden of malaria, the disease remains endemic in many regions, and the use of appropriate prevention measures by travelers is still inadequate.

Public health actions

Completion of data elements on the malaria case report form increased slightly in 2013 compared with 2012, but still remains unacceptably low. This incomplete reporting compromises efforts to examine trends in malaria cases and prevent infections. VFRs continue to be a difficult population to reach with effective malaria prevention strategies. Evidence-based prevention strategies that effectively target VFRs need to be developed and implemented to have a substantial impact on the numbers of imported malaria cases in the United States. Fewer patients reported taking chemoprophylaxis in 2013 (32%) compared with 2012 (34%), and adherence was poor among those who did take chemoprophylaxis. Proper use of malaria chemoprophylaxis will prevent the majority of malaria illness and reduce the risk for severe disease (http://www.cdc.gov/malaria/travelers/drugs.html). Malaria infections can be fatal if not diagnosed and treated promptly with antimalarial medications appropriate for the patient's age and medical history, the likely country of malaria acquisition, and previous use of antimalarial chemoprophylaxis. Recent molecular laboratory advances have enabled CDC to identify and conduct molecular surveillance of antimalarial drug resistance markers (http://www.cdc.gov/malaria/features/ars.html). These advances will allow CDC to track, guide treatment, and manage drug resistance in malaria parasites both domestically and globally. For this to be successful, specimens should be submitted for all cases diagnosed in the United States. Clinicians should consult the CDC Guidelines for Treatment of Malaria and contact the CDC's Malaria Hotline for case management advice, when needed. Malaria treatment recommendations can be obtained online (http://www.cdc.gov/malaria/diagnosis_treatment) or by calling the Malaria Hotline (770-488-7788 or toll-free at 855-856-4713).",2016-03-04 +30722630,First Report of Powdery Mildew Caused by Erysiphe platani on Sycamore (Platanus occidentalis) in South Korea.,"Platanus occidentalis L. (sycamore) is an important shade tree distributed throughout the Northern Hemisphere and in South Korea. It has been widely used as an ornamental tree, especially in urban regions and by roadsides. The average rate of roadside planting throughout South Korea covers about 5.7% (up to 38% in Seoul), equivalent to 0.36 million trees. In early July 2012, after a rainy spell in summer, an outbreak of powdery mildew on sycamore was first observed on roadside trees in Gwangju, a southern province of South Korea. A more extensive nationwide survey revealed no powdery mildew in northern or central regions of South Korea. The disease has spread rapidly within Gwangju, even though fungicide applications were carried out after the rainy spell. Major symptoms included white, superficial mycelia, grey to brown lesions on the surface of the leaves due to the presence of a hyperparasite (tentatively identified as Ampelomyces sp.), a slight chlorosis, and severe leaf distortion followed by defoliation. Conidiophores were produced singly, straight, and unbranched, with lengths of 35.2 to 315.2 μm (average 170.4 μm). Conidia were ellipsoid or doliiform, ranging in size from 34.9 to 47.4 μm (average 38.2 μm) long × 16.5 to 26.8 μm (average 23.9 μm) wide. Primary conidia had a truncate base and rounded apex; secondary conidia had both a truncate base and apex. The conidial outer surface had a reticulated wrinkling. Cleistothecia (i.e., sexual spore structures) were not found during the survey, which extended from July to October. These characteristics and the host species match those of Microsphaera platani (syn. Erysiphe platani), which was described on P. occidentalis in Washington State (2). Fungal rDNA was amplified using primers ITS1 and LR5F (4) for one sample (EML-PLA1, GenBank JX485651). BLASTn searches of GenBank revealed high sequence identity to E. platani (99.5% to JQ365943 and 99.3% to JQ365940). Recently, Liang et al. (3) reported the first occurrence of powdery mildew by E. platani on P. orientalis in China based only on its morphology. Thus, in this study, author could only use ITS sequence data from the United States and Europe to characterize the isolate. To date, nine records of powdery mildews of Platanus spp. have been reported worldwide: on P. hispanica from Brazil, Japan, Hungary, and Slovakia; P. orientalis from Israel; P. racemosa from the United States; P. × acerifolia from the United Kingdom and Germany; and Platanus sp. from Argentina and Australia (1). Interestingly, the hyperparasite, Ampelomyces sp., was found with E. platani, suggesting that there may be some level of biocontrol in nature. Pathogenicity was confirmed by gently pressing diseased leaves onto six leaves of healthy sycamore plants in the field in September. The treated leaves were sealed in sterilized vinyl pack to maintain humid condition for 2 days. Similar symptoms were observed on the inoculated leaves 10 days after inoculation. Koch's postulates were fulfilled by re-observing the fungal pathogen. To our knowledge, this is the first report of powdery mildew caused by E. platani on sycamore in South Korea. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. http://nt.ars-grin.gov/fungaldatabases/ , 2012. (2) D. A. Glawe. Plant Health Progress, doi:10.1094/PHP-2003-0818-01-HN, 2003. (3) C. Liang et al. Plant Pathol. 57:375, 2008. (4) T. J White et al., pp. 315-322 in: PCR Protocols: A Guide to Methods and Applications. M. A. Innis et al., ed. Academic Press, New York, 1990.",2013-06-01 +21856738,Detecting biological network organization and functional gene orthologs.,"

Summary

We developed a package TripletSearch to compute relationships within triplets of genes based on Roundup, an orthologous gene database containing >1500 genomes. These relationships, derived from the coevolution of genes, provide valuable information in the detection of biological network organization from the local to the system level, in the inference of protein functions and in the identification of functional orthologs. To run the computation, users need to provide the GI IDs of the genes of interest.

Availability

http://wall.hms.harvard.edu/sites/default/files/tripletSearch.tar.gz

Contact

dpwall@hms.harvard.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-19 +24930720,"Whole genome single-nucleotide variation profile-based phylogenetic tree building methods for analysis of viral, bacterial and human genomes.","

Unlabelled

Next-generation sequencing data can be mapped to a reference genome to identify single-nucleotide polymorphisms/variations (SNPs/SNVs; called SNPs hereafter). In theory, SNPs can be compared across several samples and the differences can be used to create phylogenetic trees depicting relatedness among the samples. However, in practice this is difficult because currently there is no stand-alone tool that takes SNP data directly as input and produces phylogenetic trees. In response to this need, PhyloSNP application was created with two analysis methods 1) a quantitative method that creates the presence/absence matrix which can be directly used to generate phylogenetic trees or creates a tree from a shrunk genome alignment (includes additional bases surrounding the SNP position) and 2) a qualitative method that clusters samples based on the frequency of different bases found at a particular position. The algorithms were used to generate trees from Poliovirus, Burkholderia and human cancer genomics NGS datasets.

Availability

PhyloSNP is freely available for download at http://hive.biochemistry.gwu.edu/dna.cgi?cmd=phylosnp.",2014-06-12 +25403759,Improvements to REDCRAFT: a software tool for simultaneous characterization of protein backbone structure and dynamics from residual dipolar couplings.,"Within the past two decades, there has been an increase in the acquisition of residual dipolar couplings (RDC) for investigations of biomolecular structures. Their use however is still not as widely adopted as the traditional methods of structure determination by NMR, despite their potential for extending the limits in studies that examine both the structure and dynamics of biomolecules. This is in part due to the difficulties associated with the analysis of this information-rich data type. The software analysis tool REDCRAFT was previously introduced to address some of these challenges. Here we describe and evaluate a number of additional features that have been incorporated in order to extend its computational and analytical capabilities. REDCRAFT's more traditional enhancements integrate a modified steric collision term, as well as structural refinement in the rotamer space. Other, non-traditional improvements include: the filtering of viable structures based on relative order tensor estimates, decimation of the conformational space based on structural similarity, and forward/reverse folding of proteins. Utilizing REDCRAFT's newest features we demonstrate de-novo folding of proteins 1D3Z and 1P7E to within less than 1.6 Å of the corresponding X-ray structures, using as many as four RDCs per residue and as little as two RDCs per residue, in two alignment media. We also show the successful folding of a structure to less than 1.6 Å of the X-ray structure using {C(i-1)-N(i), N(i)-H(i), and C(i-1)-H(i)} RDCs in one alignment medium, and only {N(i)-H(i)} in the second alignment medium (a set of data which can be collected on deuterated samples). The program is available for download from our website at http://ifestos.cse.sc.edu .",2014-11-18 +25399422,LncRNA2Target: a database for differentially expressed genes after lncRNA knockdown or overexpression.,"Long non-coding RNAs (lncRNAs) have emerged as critical regulators of genes at epigenetic, transcriptional and post-transcriptional levels, yet what genes are regulated by a specific lncRNA remains to be characterized. To assess the effects of the lncRNA on gene expression, an increasing number of researchers profiled the genome-wide or individual gene expression level change after knocking down or overexpressing the lncRNA. Herein, we describe a curated database named LncRNA2Target, which stores lncRNA-to-target genes and is publicly accessible at http://www.lncrna2target.org. A gene was considered as a target of a lncRNA if it is differentially expressed after the lncRNA knockdown or overexpression. LncRNA2Target provides a web interface through which its users can search for the targets of a particular lncRNA or for the lncRNAs that target a particular gene. Both search types are performed either by browsing a provided catalog of lncRNA names or by inserting lncRNA/target gene IDs/names in a search box.",2014-11-15 +25399417,LncRNAWiki: harnessing community knowledge in collaborative curation of human long non-coding RNAs.,"Long non-coding RNAs (lncRNAs) perform a diversity of functions in numerous important biological processes and are implicated in many human diseases. In this report we present lncRNAWiki (http://lncrna.big.ac.cn), a wiki-based platform that is open-content and publicly editable and aimed at community-based curation and collection of information on human lncRNAs. Current related databases are dependent primarily on curation by experts, making it laborious to annotate the exponentially accumulated information on lncRNAs, which inevitably requires collective efforts in community-based curation of lncRNAs. Unlike existing databases, lncRNAWiki features comprehensive integration of information on human lncRNAs obtained from multiple different resources and allows not only existing lncRNAs to be edited, updated and curated by different users but also the addition of newly identified lncRNAs by any user. It harnesses community collective knowledge in collecting, editing and annotating human lncRNAs and rewards community-curated efforts by providing explicit authorship based on quantified contributions. LncRNAWiki relies on the underling knowledge of scientific community for collective and collaborative curation of human lncRNAs and thus has the potential to serve as an up-to-date and comprehensive knowledgebase for human lncRNAs.",2014-11-15 +24273245,A classification approach for DNA methylation profiling with bisulfite next-generation sequencing data.,"

Motivation

With the advent of high-throughput sequencing technology, bisulfite-sequencing-based DNA methylation profiling methods have emerged as the most promising approaches due to their single-base resolution and genome-wide coverage. However, statistical analysis methods for analyzing this type of methylation data are not well developed. Although the most widely used proportion-based estimation method is simple and intuitive, it is not statistically adequate in dealing with the various sources of noise in bisulfite-sequencing data. Furthermore, it is not biologically satisfactory in applications that require binary methylation status calls.

Results

In this article, we use a mixture of binomial model to characterize bisulfite-sequencing data, and based on the model, we propose to use a classification-based procedure, called the methylation status calling (MSC) procedure, to make binary methylation status calls. The MSC procedure is optimal in terms of maximizing the overall correct allocation rate, and the false discovery rate (FDR) and false non-discovery rate (FNDR) of MSC can be estimated. To control FDR at any given level, we further develop an FDR-controlled MSC procedure, which combines a local FDR-based adaptive procedure with the MSC procedure. Both simulation study and real data application are carried out to examine the performance of the proposed procedures. It is shown in our simulation study that the estimates of FDR and FNDR of the MSC procedure are appropriate. Simulation study also demonstrates that the FDR-controlled MSC procedure is valid in controlling FDR at a prespecified level and is more powerful than the individual binomial testing procedure. In the real data application, the MSC procedure exhibits an estimated FDR of 0.1426 and an estimated FNDR of 0.0067. The overall correct allocation rate is >0.97. These results suggest the effectiveness of our proposed procedures.

Availability and implementation

The proposed procedures are implemented in R and are available at http://www.stat.purdue.edu/*cheng70/code.html.",2013-11-21 +24389658,CMGRN: a web server for constructing multilevel gene regulatory networks using ChIP-seq and gene expression data.,"ChIP-seq technology provides an accurate characterization of transcription or epigenetic factors binding on genomic sequences. With integration of such ChIP-based and other high-throughput information, it would be dedicated to dissecting cross-interactions among multilevel regulators, genes and biological functions. Here, we devised an integrative web server CMGRN (constructing multilevel gene regulatory networks), to unravel hierarchical interactive networks at different regulatory levels. The newly developed method used the Bayesian network modeling to infer causal interrelationships among transcription factors or epigenetic modifications by using ChIP-seq data. Moreover, it used Bayesian hierarchical model with Gibbs sampling to incorporate binding signals of these regulators and gene expression profile together for reconstructing gene regulatory networks. The example applications indicate that CMGRN provides an effective web-based framework that is able to integrate heterogeneous high-throughput data and to reveal hierarchical 'regulome' and the associated gene expression programs.

Availability

http://bioinfo.icts.hkbu.edu.hk/cmgrn; http://www.byanbioinfo.org/cmgrn CONTACT: yanbinai6017@gmail.com or junwen@hku.hk Supplementary Information: Supplementary data are available at Bioinformatics online.",2014-01-02 +26631838,"Relative Prognostic and Predictive Value of Gene Signature and Histologic Grade in Estrogen Receptor-Positive, HER2-Negative Breast Cancer.","

Background

In estrogen receptor (ER)-positive, human epidermal growth factor receptor 2 (HER2)-negative breast cancer, first-generation genomic signatures serve predominately as prognostic biomarkers and secondarily as predictors of response to chemotherapy. We compared both the prognostic and predictive value of histologic grades and genomic markers.

Methods

We retrieved publicly available cDNA microarray data from 1373 primary ER(+)/HER2(-) breast cancers and developed a genomic signature simulated from Recurrence Online (http://www.recurrenceonline.com/) to calculate the recurrence score and risk using predefined sets of genes in the cDNA microarray. We then compared the prognostic and predictive information provided by histologic grade and genomic signature.

Results

Based on genomic signatures, 55%, 28%, and 17% of breast cancers were classified as low, intermediate, and high risk, respectively, whereas the histologic grades were I, II, and III in 22%, 59%, and 19% of breast cancers, respectively. Univariate analysis in the untreated cohort revealed that both histologic grade (overall P = .007) and genomic signature (P < .001) could predict prognosis. Results were similar using the genomic signature, with pathologic complete response rates of 4.6%, 5.7%, and 16.5% for low-, intermediate-, and high-risk cancers, respectively. Neither biomarker was statistically significant in multivariate analysis for predictive response to neoadjuvant chemotherapy (NAC).

Conclusion

Genomic signature was better at identifying low-risk cases compared to histologic grade alone, but both markers had similar predictive values for NAC response. Better predictive biomarkers for NAC response are still needed.",2015-11-10 +25398897,DDMGD: the database of text-mined associations between genes methylated in diseases from different species.,"Gathering information about associations between methylated genes and diseases is important for diseases diagnosis and treatment decisions. Recent advancements in epigenetics research allow for large-scale discoveries of associations of genes methylated in diseases in different species. Searching manually for such information is not easy, as it is scattered across a large number of electronic publications and repositories. Therefore, we developed DDMGD database (http://www.cbrc.kaust.edu.sa/ddmgd/) to provide a comprehensive repository of information related to genes methylated in diseases that can be found through text mining. DDMGD's scope is not limited to a particular group of genes, diseases or species. Using the text mining system DEMGD we developed earlier and additional post-processing, we extracted associations of genes methylated in different diseases from PubMed Central articles and PubMed abstracts. The accuracy of extracted associations is 82% as estimated on 2500 hand-curated entries. DDMGD provides a user-friendly interface facilitating retrieval of these associations ranked according to confidence scores. Submission of new associations to DDMGD is provided. A comparison analysis of DDMGD with several other databases focused on genes methylated in diseases shows that DDMGD is comprehensive and includes most of the recent information on genes methylated in diseases.",2014-11-14 +25398905,Type material in the NCBI Taxonomy Database.,"Type material is the taxonomic device that ties formal names to the physical specimens that serve as exemplars for the species. For the prokaryotes these are strains submitted to the culture collections; for the eukaryotes they are specimens submitted to museums or herbaria. The NCBI Taxonomy Database (http://www.ncbi.nlm.nih.gov/taxonomy) now includes annotation of type material that we use to flag sequences from type in GenBank and in Genomes. This has important implications for many NCBI resources, some of which are outlined below.",2014-11-14 +21257716,"Nuclear DNA amounts in angiosperms: targets, trends and tomorrow.","

Background and aims

The amount of DNA in an unreplicated gametic chromosome complement is known as the C-value and is a key biodiversity character of fundamental significance with many practical and predictive uses. Since 1976, Bennett and colleagues have assembled eight compilations of angiosperm C-values for reference purposes and subsequently these have been pooled into the Angiosperm DNA C-values Database (http://data.kew.org/cvalues/). Since the last compilation was published in 2005, a large amount of data on angiosperm genome size has been published. It is therefore timely to bring these data together into a ninth compilation of DNA amounts. Scope The present work lists DNA C-values for 2221 species from 151 original sources (including first values for 1860 species not listed in previous compilations). Combining these data with those published previously shows that C-values are now available for 6287 angiosperm species.

Key findings

Analysis of the dataset, which is by far the largest of the nine compilations published since 1976, shows that angiosperm C-values are now being generated at the highest rate since the first genome sizes were estimated in the 1950s. The compilation includes new record holders for the smallest (1C = 0·0648 pg in Genlisea margaretae) and largest (1C = 152·23 pg in Paris japonica) genome sizes so far reported, extending the range encountered in angiosperms to nearly 2400-fold. A review of progress in meeting targets set at the Plant Genome Size meetings shows that although representation for genera, geographical regions and some plant life forms (e.g. island floras and parasitic plants) has improved, progress to increase familial representation is still slow. In terms of technique it is now clear that flow cytometry is soon likely to become the only method available for plant genome size estimations. Fortunately, this has been accompanied by numerous careful studies to improve the quality of data generated using this technique (e.g. design of new buffers, increased awareness and understanding of problems caused by cytosolic inhibitors). It is also clear that although the speed of DNA sequencing continues to rise dramatically with the advent of next-generation and third-generation sequencing technologies, 'complete genome sequencing' projects are still unable to generate accurate plant genome size estimates.",2011-01-21 +25296554,Visual analysis of the quantitative composition of metagenomic communities: the AmphoraVizu webserver.,"Low-cost DNA sequencing methods have given rise to an enormous development of metagenomics in the past few years. One basic--and difficult--task is the phylogenetic annotation of the metagenomic samples studied. The difficulty comes from the fact that the typical environmental sample contains hundreds of unknown and still uncharacterized microorganisms. There are several possible methods to assign at least partial phylogenetic information to these uncharacterized data. Originally, the 16S ribosomal RNA was used as phylogenetic marker, then genome sequence alignments and similarity measures between the unknown genome and the reference genomes were applied (e.g., in the MEGAN software), and more recently, phylogeny-based methods applying suitable sets of marker genes were suggested (AMPHORA, AMPHORA2, and the webserver implementation AmphoraNet). Here, we present a visual analysis tool that is capable of demonstrating the quantitative relations gained from the output of the AMPHORA2 program or the easy-to-use AmphoraNet webserver. Our web-based tool, the AmphoraVizu webserver, makes the phylogenetic distribution of the metagenomic sample clearly visible by using the native output format of AMPHORA2 or AmphoraNet. The user may set the phylogenetic resolution (i.e., superkingdom, phylum, class, order, family, genus, and species) along with the chart type and will receive the distribution data detailed for all relevant marker genes in the sample. For publication quality results, the chart labels can be customized by the user. The visualization webserver is available at the address http://amphoravizu.pitgroup.org. The AmphoraNet webserver is available at http://amphoranet.pitgroup.org. The open-source version of the AmphoraVizu program is available for download at http://pitgroup.org/apps/amphoravizu/AmphoraVizu.pl.",2014-10-10 +25161231,HubAlign: an accurate and efficient method for global alignment of protein-protein interaction networks.,"

Motivation

High-throughput experimental techniques have produced a large amount of protein-protein interaction (PPI) data. The study of PPI networks, such as comparative analysis, shall benefit the understanding of life process and diseases at the molecular level. One way of comparative analysis is to align PPI networks to identify conserved or species-specific subnetwork motifs. A few methods have been developed for global PPI network alignment, but it still remains challenging in terms of both accuracy and efficiency.

Results

This paper presents a novel global network alignment algorithm, denoted as HubAlign, that makes use of both network topology and sequence homology information, based upon the observation that topologically important proteins in a PPI network usually are much more conserved and thus, more likely to be aligned. HubAlign uses a minimum-degree heuristic algorithm to estimate the topological and functional importance of a protein from the global network topology information. Then HubAlign aligns topologically important proteins first and gradually extends the alignment to the whole network. Extensive tests indicate that HubAlign greatly outperforms several popular methods in terms of both accuracy and efficiency, especially in detecting functionally similar proteins.

Availability

HubAlign is available freely for non-commercial purposes at http://ttic.uchicago.edu/∼hashemifar/software/HubAlign.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +23589649,Exome-based analysis for RNA epigenome sequencing data.,"

Motivation

Fragmented RNA immunoprecipitation combined with RNA sequencing enabled the unbiased study of RNA epigenome at a near single-base resolution; however, unique features of this new type of data call for novel computational techniques.

Result

Through examining the connections of RNA epigenome sequencing data with two well-studied data types, ChIP-Seq and RNA-Seq, we unveiled the salient characteristics of this new data type. The computational strategies were discussed accordingly, and a novel data processing pipeline was proposed that combines several existing tools with a newly developed exome-based approach 'exomePeak' for detecting, representing and visualizing the post-transcriptional RNA modification sites on the transcriptome.

Availability

The MATLAB package 'exomePeak' and additional details are available at http://compgenomics.utsa.edu/exomePeak/.",2013-04-14 +26557215,The Tolerability and Efficacy of a Three-product Anti-aging Treatment Regimen in Subjects with Moderate-to-severe Photodamage.,"Retinoids and alpha hydroxy acids differ in mechanism of action for treatment of photodamage, but concurrent use may produce a synergistic effect by combining retinoid-induced normalization of cellular differentiation with alpha hydroxy acid-induced exfoliation (in hydrophilic areas) and enhanced dermal and epidermal hydration. A recent bioengineered molecule, ethyl lactyl retinoate (alpha hydroxy acid retinoid conjugate), is the first to deliver alpha hydroxy acids and retinoids together in a hydrolysis-based time-released fashion. This could improve efficacy while minimizing irritation commonly associated with retinoid use. An eight-week clinical study was conducted to examine the efficacy and tolerability of this formulation; 25 women aged 54.1 ±8.9 years (mean ± SD) with moderate-to-severe photodamage (as determined by physician investigators using the Glogau Wrinkle Scale) employed a twice-daily regimen of cleanser (7.8% 1-lactic acid, 2% salicylic acid) and anti-aging serum (0.1% alpha hydroxy acids-retinoids, 6.5% 1-lactic acid) with concurrent use of sun protection factor 50+ sunscreen as needed. Longitudinal analysis of study data revealed statistically significant improvement in photodamage, dryness/flaking, dyschromia, and global appearance at eight weeks. All study products were well-tolerated throughout. Investigators concluded that the alpha hydroxy acid retinoid conjugate is a safe and effective topical therapy for moderate-to-severe photodamage, warranting further study, (clinicaltrials.gov, NCT02422836, https://clinicaltrials.gov/ct2/show/NCT02422836?term=NCT02422836).",2015-10-01 +27258851,Nitrate from Drinking Water and Diet and Bladder Cancer Among Postmenopausal Women in Iowa.,"

Background

Nitrate is a drinking water contaminant arising from agricultural sources, and it is a precursor in the endogenous formation of N-nitroso compounds (NOC), which are possible bladder carcinogens.

Objectives

We investigated the ingestion of nitrate and nitrite from drinking water and diet and bladder cancer risk in women.

Methods

We identified incident bladder cancers among a cohort of 34,708 postmenopausal women in Iowa (1986-2010). Dietary nitrate and nitrite intakes were estimated from a baseline food frequency questionnaire. Drinking water source and duration were assessed in a 1989 follow-up. For women using public water supplies (PWS) > 10 years (n = 15,577), we estimated average nitrate (NO3-N) and total trihalomethane (TTHM) levels and the number of years exceeding one-half the maximum contaminant level (NO3-N: 5 mg/L, TTHM: 40 μg/mL) from historical monitoring data. We computed hazard ratios (HRs) and 95% confidence intervals (CIs), and assessed nitrate interactions with TTHM and with modifiers of NOC formation (smoking, vitamin C).

Results

We identified 258 bladder cancer cases, including 130 among women > 10 years at their PWS. In multivariable-adjusted models, we observed nonsignificant associations among women in the highest versus lowest quartile of average drinking water nitrate concentration (HR = 1.48; 95% CI: 0.92, 2.40; ptrend = 0.11), and we found significant associations among those exposed ≥ 4 years to drinking water with > 5 mg/L NO3-N (HR = 1.62; 95% CI: 1.06, 2.47; ptrend = 0.03) compared with women having 0 years of comparable exposure. TTHM adjustment had little influence on associations, and we observed no modification by vitamin C intake. Relative to a common reference group of never smokers with the lowest nitrate exposures, associations were strongest for current smokers with the highest nitrate exposures (HR = 3.67; 95% CI: 1.43, 9.38 for average water NO3-N and HR = 3.48; 95% CI: 1.20, 10.06 and ≥ 4 years > 5 mg/L, respectively). Dietary nitrate and nitrite intakes were not associated with bladder cancer.

Conclusions

Long-term ingestion of elevated nitrate in drinking water was associated with an increased risk of bladder cancer among postmenopausal women. Citation: Jones RR, Weyer PJ, DellaValle CT, Inoue-Choi M, Anderson KE, Cantor KP, Krasner S, Robien K, Beane Freeman LE, Silverman DT, Ward MH. 2016. Nitrate from drinking water and diet and bladder cancer among postmenopausal women in Iowa. Environ Health Perspect 124:1751-1758; http://dx.doi.org/10.1289/EHP191.",2016-06-03 +25392422,tRFdb: a database for transfer RNA fragments.,"We have created tRFdb, the first database of transfer RNA fragments (tRFs), available at http://genome.bioch.virginia.edu/trfdb/. With over 100 small RNA libraries analyzed, the database currently contains the sequences and read counts of the three classes of tRFs for eight species: R. sphaeroides, S. pombe, D. melanogaster, C. elegans, Xenopus, zebra fish, mouse and human, for a total of 12,877 tRFs. The database can be searched by tRF ID or tRF sequence, and the results can be limited by organism. The search results show the genome coordinates and names of the tRNAs the sequence may derive from, and there are links for the sequence of the tRF and parental tRNA, and links for the read counts in all the corresponding small RNA libraries. As a case study for how this database may be used, we have shown that a certain class of tRFs, tRF-1s, is highly upregulated in B-cell malignancies.",2014-11-11 +25392407,SEVA 2.0: an update of the Standard European Vector Architecture for de-/re-construction of bacterial functionalities.,"The Standard European Vector Architecture 2.0 database (SEVA-DB 2.0, http://seva.cnb.csic.es) is an improved and expanded version of the platform released in 2013 (doi: 10.1093/nar/gks1119) aimed at assisting the choice of optimal genetic tools for de-constructing and re-constructing complex prokaryotic phenotypes. By adopting simple compositional rules, the SEVA standard facilitates combinations of functional DNA segments that ease both the analysis and the engineering of diverse Gram-negative bacteria for fundamental or biotechnological purposes. The large number of users of the SEVA-DB during its first two years of existence has resulted in a valuable feedback that we have exploited for fixing DNA sequence errors, improving the nomenclature of the SEVA plasmids, expanding the vector collection, adding new features to the web interface and encouraging contributions of materials from the community of users. The SEVA platform is also adopting the Synthetic Biology Open Language (SBOL) for electronic-like description of the constructs available in the collection and their interfacing with genetic devices developed by other Synthetic Biology communities. We advocate the SEVA format as one interim asset for the ongoing transition of genetic design of microorganisms from being a trial-and-error endeavor to become an authentic engineering discipline.",2014-11-11 +25392416,PubAngioGen: a database and knowledge for angiogenesis and related diseases.,"Angiogenesis is the process of generating new blood vessels based on existing ones, which is involved in many diseases including cancers, cardiovascular diseases and diabetes mellitus. Recently, great efforts have been made to explore the mechanisms of angiogenesis in various diseases and many angiogenic factors have been discovered as therapeutic targets in anti- or pro-angiogenic drug development. However, the resulted information is sparsely distributed and no systematical summarization has been made. In order to integrate these related results and facilitate the researches for the community, we conducted manual text-mining from published literature and built a database named as PubAngioGen (http://www.megabionet.org/aspd/). Our online application displays a comprehensive network for exploring the connection between angiogenesis and diseases at multilevels including protein-protein interaction, drug-target, disease-gene and signaling pathways among various cells and animal models recorded through text-mining. To enlarge the scope of the PubAngioGen application, our database also links to other common resources including STRING, DrugBank and OMIM databases, which will facilitate understanding the underlying molecular mechanisms of angiogenesis and drug development in clinical therapy.",2014-11-11 +25886992,Pheno2Geno - High-throughput generation of genetic markers and maps from molecular phenotypes for crosses between inbred strains.,"

Background

Genetic markers and maps are instrumental in quantitative trait locus (QTL) mapping in segregating populations. The resolution of QTL localization depends on the number of informative recombinations in the population and how well they are tagged by markers. Larger populations and denser marker maps are better for detecting and locating QTLs. Marker maps that are initially too sparse can be saturated or derived de novo from high-throughput omics data, (e.g. gene expression, protein or metabolite abundance). If these molecular phenotypes are affected by genetic variation due to a major QTL they will show a clear multimodal distribution. Using this information, phenotypes can be converted into genetic markers.

Results

The Pheno2Geno tool uses mixture modeling to select phenotypes and transform them into genetic markers suitable for construction and/or saturation of a genetic map. Pheno2Geno excludes candidate genetic markers that show evidence for multiple possibly epistatically interacting QTL and/or interaction with the environment, in order to provide a set of robust markers for follow-up QTL mapping. We demonstrate the use of Pheno2Geno on gene expression data of 370,000 probes in 148 A. thaliana recombinant inbred lines. Pheno2Geno is able to saturate the existing genetic map, decreasing the average distance between markers from 7.1 cM to 0.89 cM, close to the theoretical limit of 0.68 cM (with 148 individuals we expect a recombination every 100/148=0.68 cM); this pinpointed almost all of the informative recombinations in the population.

Conclusion

The Pheno2Geno package makes use of genome-wide molecular profiling and provides a tool for high-throughput de novo map construction and saturation of existing genetic maps. Processing of the showcase dataset takes less than 30 minutes on an average desktop PC. Pheno2Geno improves QTL mapping results at no additional laboratory cost and with minimum computational effort. Its results are formatted for direct use in R/qtl, the leading R package for QTL studies. Pheno2Geno is freely available on CRAN under ""GNU GPL v3"". The Pheno2Geno package as well as the tutorial can also be found at: http://pheno2geno.nl .",2015-02-19 +26240515,Markov Chain Monte Carlo from Lagrangian Dynamics.,"Hamiltonian Monte Carlo (HMC) improves the computational e ciency of the Metropolis-Hastings algorithm by reducing its random walk behavior. Riemannian HMC (RHMC) further improves the performance of HMC by exploiting the geometric properties of the parameter space. However, the geometric integrator used for RHMC involves implicit equations that require fixed-point iterations. In some cases, the computational overhead for solving implicit equations undermines RHMC's benefits. In an attempt to circumvent this problem, we propose an explicit integrator that replaces the momentum variable in RHMC by velocity. We show that the resulting transformation is equivalent to transforming Riemannian Hamiltonian dynamics to Lagrangian dynamics. Experimental results suggests that our method improves RHMC's overall computational e ciency in the cases considered. All computer programs and data sets are available online (http://www.ics.uci.edu/~babaks/Site/Codes.html) in order to allow replication of the results reported in this paper.",2015-04-01 +27022889,Exposure to Traffic-Related Air Pollution in Relation to Progression in Physical Disability among Older Adults.,"

Background

Physical disability is common though not inevitable in older age and has direct bearing on a person's ability to perform activities essential for self-care and independent living. Air pollution appears to increase the risk of several chronic diseases that contribute to the progression of disability.

Objective

We evaluated long-term exposure to traffic-related air pollution (TRAP) in relation to progression in physical disability.

Methods

We conducted our investigation within the Chicago Health and Aging Project. We measured participants' exposures to TRAP using two surrogates: residential proximity to major roads (1993 onwards) and ambient concentrations of oxides of nitrogen (NOX; 1999 onwards), predicted via a geographic information systems-based spatiotemporal smoothing model (cross-validation R2 = 0.87) that incorporated community-based monitoring and resolved intraurban exposure gradients at a spatial scale of tens of meters. Participants' lower-extremity physical ability was assessed every 3 years (1993-2012) via tandem stand, chair stand, and timed walking speed.

Results

In multivariable-adjusted analyses (n = 5,708), higher long-term NOX exposure was associated with significantly faster progression in disability. Compared with the 5-year decline in physical ability score among participants in the lowest quartile of NOX exposure, decline among those in the highest exposure quartile was 1.14 units greater (95% confidence interval [CI]: -1.86, -0.42), equivalent to 3 additional years of decline among those in the lowest exposure quartile. The association was linear across the continuum of NOX exposure: per 10-ppb increment in exposure, the 5-year decline in physical ability score was 0.87 unit greater (95% CI: -1.35, -0.39). Proximity to a major road was not associated with disability progression (n = 9,994).

Conclusions

These data join a growing body of evidence suggesting that TRAP exposures may accelerate aging-related declines in health.

Citation

Weuve J, Kaufman JD, Szpiro AA, Curl C, Puett RC, Beck T, Evans DA, Mendes de Leon CF. 2016. Exposure to traffic-related air pollution in relation to progression in physical disability among older adults. Environ Health Perspect 124:1000-1008; http://dx.doi.org/10.1289/ehp.1510089.",2016-03-29 +25580234,F1000Research: Tics welcomes you to 21st century biomedical publishing.,"Tics are repeated, usually suppressible movements or vocalizations. They are the defining features of tic disorders including Tourette syndrome, but many people have them for shorter durations at some point in childhood. This editorial marks the beginning of the F1000RESEARCH: Tics specialty section, an effort to provide a single portal to modern research on tics and tic disorders. Publications in F1000RESEARCH: Tics benefit from F1000RESEARCH's novel approach to publishing, in which articles can be published within days of submission. Peer review happens after publication and is fully open. When the submitted article or a revision is approved, it is promptly submitted to repositories including NIH's PubMed Central. In addition to research articles and reviews, F1000RESEARCH: Tics will publish study protocols, clinical practice articles, case reports, and data notes. The home page will also provide links to expert recommendations of articles that have appeared elsewhere, and to relevant posters from scientific meetings (http://f1000.com/posters/). F1000RESEARCH's approach is enabled by the capabilities of internet publication, including space to publish the full results of a study rather than just a few graphs selected from the data. Publishing methodologically sound studies without requiring subjective editorial judgments of novelty or broad appeal brings numerous advantages, including minimizing publication bias and shining the light of openness on peer review. To celebrate the launch of the Tics section, F1000RESEARCH is offering discounted article processing charges for manuscripts submitted by March 1st 2015. I have had good experiences publishing in F1000RESEARCH, and look forward to seeing a wide range of tic-related manuscripts submitted.",2014-11-12 +22913485,NetiNeti: discovery of scientific names from text using machine learning methods.,"

Background

A scientific name for an organism can be associated with almost all biological data. Name identification is an important step in many text mining tasks aiming to extract useful information from biological, biomedical and biodiversity text sources. A scientific name acts as an important metadata element to link biological information.

Results

We present NetiNeti (Name Extraction from Textual Information-Name Extraction for Taxonomic Indexing), a machine learning based approach for recognition of scientific names including the discovery of new species names from text that will also handle misspellings, OCR errors and other variations in names. The system generates candidate names using rules for scientific names and applies probabilistic machine learning methods to classify names based on structural features of candidate names and features derived from their contexts. NetiNeti can also disambiguate scientific names from other names using the contextual information. We evaluated NetiNeti on legacy biodiversity texts and biomedical literature (MEDLINE). NetiNeti performs better (precision = 98.9% and recall = 70.5%) compared to a popular dictionary based approach (precision = 97.5% and recall = 54.3%) on a 600-page biodiversity book that was manually marked by an annotator. On a small set of PubMed Central's full text articles annotated with scientific names, the precision and recall values are 98.5% and 96.2% respectively. NetiNeti found more than 190,000 unique binomial and trinomial names in more than 1,880,000 PubMed records when used on the full MEDLINE database. NetiNeti also successfully identifies almost all of the new species names mentioned within web pages.

Conclusions

We present NetiNeti, a machine learning based approach for identification and discovery of scientific names. The system implementing the approach can be accessed at http://namefinding.ubio.org.",2012-08-22 +24252878,Three-dimensional electron crystallography of protein microcrystals.,"We demonstrate that it is feasible to determine high-resolution protein structures by electron crystallography of three-dimensional crystals in an electron cryo-microscope (CryoEM). Lysozyme microcrystals were frozen on an electron microscopy grid, and electron diffraction data collected to 1.7 Å resolution. We developed a data collection protocol to collect a full-tilt series in electron diffraction to atomic resolution. A single tilt series contains up to 90 individual diffraction patterns collected from a single crystal with tilt angle increment of 0.1-1° and a total accumulated electron dose less than 10 electrons per angstrom squared. We indexed the data from three crystals and used them for structure determination of lysozyme by molecular replacement followed by crystallographic refinement to 2.9 Å resolution. This proof of principle paves the way for the implementation of a new technique, which we name 'MicroED', that may have wide applicability in structural biology. DOI: http://dx.doi.org/10.7554/eLife.01345.001.",2013-11-19 +23617841,Empirical Bayesian analysis of paired high-throughput sequencing data with a beta-binomial distribution.,"

Background

Pairing of samples arises naturally in many genomic experiments; for example, gene expression in tumour and normal tissue from the same patients. Methods for analysing high-throughput sequencing data from such experiments are required to identify differential expression, both within paired samples and between pairs under different experimental conditions.

Results

We develop an empirical Bayesian method based on the beta-binomial distribution to model paired data from high-throughput sequencing experiments. We examine the performance of this method on simulated and real data in a variety of scenarios. Our methods are implemented as part of the RbaySeq package (versions 1.11.6 and greater) available from Bioconductor (http://www.bioconductor.org).

Conclusions

We compare our approach to alternatives based on generalised linear modelling approaches and show that our method offers significant gains in performance on simulated data. In testing on real data from oral squamous cell carcinoma patients, we discover greater enrichment of previously identified head and neck squamous cell carcinoma associated gene sets than has previously been achieved through a generalised linear modelling approach, suggesting that similar gains in performance may be found in real data. Our methods thus show real and substantial improvements in analyses of high-throughput sequencing data from paired samples.",2013-04-23 +23193272,ArrayExpress update--trends in database growth and links to data analysis tools.,"The ArrayExpress Archive of Functional Genomics Data (http://www.ebi.ac.uk/arrayexpress) is one of three international functional genomics public data repositories, alongside the Gene Expression Omnibus at NCBI and the DDBJ Omics Archive, supporting peer-reviewed publications. It accepts data generated by sequencing or array-based technologies and currently contains data from almost a million assays, from over 30 000 experiments. The proportion of sequencing-based submissions has grown significantly over the last 2 years and has reached, in 2012, 15% of all new data. All data are available from ArrayExpress in MAGE-TAB format, which allows robust linking to data analysis and visualization tools, including Bioconductor and GenomeSpace. Additionally, R objects, for microarray data, and binary alignment format files, for sequencing data, have been generated for a significant proportion of ArrayExpress data.",2012-11-27 +24532840,Predicting the dynamics of protein abundance.,"Protein synthesis is finely regulated across all organisms, from bacteria to humans, and its integrity underpins many important processes. Emerging evidence suggests that the dynamic range of protein abundance is greater than that observed at the transcript level. Technological breakthroughs now mean that sequencing-based measurement of mRNA levels is routine, but protocols for measuring protein abundance remain both complex and expensive. This paper introduces a Bayesian network that integrates transcriptomic and proteomic data to predict protein abundance and to model the effects of its determinants. We aim to use this model to follow a molecular response over time, from condition-specific data, in order to understand adaptation during processes such as the cell cycle. With microarray data now available for many conditions, the general utility of a protein abundance predictor is broad. Whereas most quantitative proteomics studies have focused on higher organisms, we developed a predictive model of protein abundance for both Saccharomyces cerevisiae and Schizosaccharomyces pombe to explore the latitude at the protein level. Our predictor primarily relies on mRNA level, mRNA-protein interaction, mRNA folding energy and half-life, and tRNA adaptation. The combination of key features, allowing for the low certainty and uneven coverage of experimental observations, gives comparatively minor but robust prediction accuracy. The model substantially improved the analysis of protein regulation during the cell cycle: predicted protein abundance identified twice as many cell-cycle-associated proteins as experimental mRNA levels. Predicted protein abundance was more dynamic than observed mRNA expression, agreeing with experimental protein abundance from a human cell line. We illustrate how the same model can be used to predict the folding energy of mRNA when protein abundance is available, lending credence to the emerging view that mRNA folding affects translation efficiency. The software and data used in this research are available at http://bioinf.scmb.uq.edu.au/proteinabundance/.",2014-02-16 +24874113,Inferring host gene subnetworks involved in viral replication.,"Systematic, genome-wide loss-of-function experiments can be used to identify host factors that directly or indirectly facilitate or inhibit the replication of a virus in a host cell. We present an approach that combines an integer linear program and a diffusion kernel method to infer the pathways through which those host factors modulate viral replication. The inputs to the method are a set of viral phenotypes observed in single-host-gene mutants and a background network consisting of a variety of host intracellular interactions. The output is an ensemble of subnetworks that provides a consistent explanation for the measured phenotypes, predicts which unassayed host factors modulate the virus, and predicts which host factors are the most direct interfaces with the virus. We infer host-virus interaction subnetworks using data from experiments screening the yeast genome for genes modulating the replication of two RNA viruses. Because a gold-standard network is unavailable, we assess the predicted subnetworks using both computational and qualitative analyses. We conduct a cross-validation experiment in which we predict whether held-aside test genes have an effect on viral replication. Our approach is able to make high-confidence predictions more accurately than several baselines, and about as well as the best baseline, which does not infer mechanistic pathways. We also examine two kinds of predictions made by our method: which host factors are nearest to a direct interaction with a viral component, and which unassayed host genes are likely to be involved in viral replication. Multiple predictions are supported by recent independent experimental data, or are components or functional partners of confirmed relevant complexes or pathways. Integer program code, background network data, and inferred host-virus subnetworks are available at http://www.biostat.wisc.edu/~craven/chasman_host_virus/.",2014-05-29 +23625965,ValiDichro: a website for validating and quality control of protein circular dichroism spectra.,"Circular dichroism (CD) spectroscopy is widely used in structural biology as a technique for examining the structure, folding and conformational changes of proteins. A new server, ValiDichro, has been developed for checking the quality and validity of CD spectral data and metadata, both as an aid to data collection and processing and as a validation procedure for spectra to be included in publications. ValiDichro currently includes 25 tests for data completeness, consistency and quality. For each test that is done, not only is a validation report produced, but the user is also provided with suggestions for correcting or improving the data. The ValiDichro server is freely available at http://valispec.cryst.bbk.ac.uk/circularDichroism/ValiDichro/upload.html.",2013-04-26 +22434834,The mouse-human anatomy ontology mapping project.,"The overall objective of the Mouse-Human Anatomy Project (MHAP) was to facilitate the mapping and harmonization of anatomical terms used for mouse and human models by Mouse Genome Informatics (MGI) and the National Cancer Institute (NCI). The anatomy resources designated for this study were the Adult Mouse Anatomy (MA) ontology and the set of anatomy concepts contained in the NCI Thesaurus (NCIt). Several methods and software tools were identified and evaluated, then used to conduct an in-depth comparative analysis of the anatomy ontologies. Matches between mouse and human anatomy terms were determined and validated, resulting in a highly curated set of mappings between the two ontologies that has been used by other resources. These mappings will enable linking of data from mouse and human. As the anatomy ontologies have been expanded and refined, the mappings have been updated accordingly. Insights are presented into the overall process of comparing and mapping between ontologies, which may prove useful for further comparative analyses and ontology mapping efforts, especially those involving anatomy ontologies. Finally, issues concerning further development of the ontologies, updates to the mapping files, and possible additional applications and significance were considered. DATABASE URL: http://obofoundry.org/cgi-bin/detail.cgi?id=ma2ncit.",2012-03-20 +22369201,Liverome: a curated database of liver cancer-related gene signatures with self-contained context information.,"

Background

Hepatocellular carcinoma (HCC) is the fifth most common cancer worldwide. A number of molecular profiling studies have investigated the changes in gene and protein expression that are associated with various clinicopathological characteristics of HCC and generated a wealth of scattered information, usually in the form of gene signature tables. A database of the published HCC gene signatures would be useful to liver cancer researchers seeking to retrieve existing differential expression information on a candidate gene and to make comparisons between signatures for prioritization of common genes. A challenge in constructing such database is that a direct import of the signatures as appeared in articles would lead to a loss or ambiguity of their context information that is essential for a correct biological interpretation of a gene's expression change. This challenge arises because designation of compared sample groups is most often abbreviated, ad hoc, or even missing from published signature tables. Without manual curation, the context information becomes lost, leading to uninformative database contents. Although several databases of gene signatures are available, none of them contains informative form of signatures nor shows comprehensive coverage on liver cancer. Thus we constructed Liverome, a curated database of liver cancer-related gene signatures with self-contained context information.

Description

Liverome's data coverage is more than three times larger than any other signature database, consisting of 143 signatures taken from 98 HCC studies, mostly microarray and proteome, and involving 6,927 genes. The signatures were post-processed into an informative and uniform representation and annotated with an itemized summary so that all context information is unambiguously self-contained within the database. The signatures were further informatively named and meaningfully organized according to ten functional categories for guided browsing. Its web interface enables a straightforward retrieval of known differential expression information on a query gene and a comparison of signatures to prioritize common genes. The utility of Liverome-collected data is shown by case studies in which useful biological insights on HCC are produced.

Conclusion

Liverome database provides a comprehensive collection of well-curated HCC gene signatures and straightforward interfaces for gene search and signature comparison as well. Liverome is available at http://liverome.kobic.re.kr.",2011-11-30 +25382310,Expected degree for RNA secondary structure networks.,"Consider the network of all secondary structures of a given RNA sequence, where nodes are connected when the corresponding structures have base pair distance one. The expected degree of the network is the average number of neighbors, where average may be computed with respect to the either the uniform or Boltzmann probability. Here, we describe the first algorithm, RNAexpNumNbors, that can compute the expected number of neighbors, or expected network degree, of an input sequence. For RNA sequences from the Rfam database, the expected degree is significantly less than the constrained minimum free energy structure, defined to have minimum free energy (MFE) over all structures consistent with the Rfam consensus structure. The expected degree of structural RNAs, such as purine riboswitches, paradoxically appears to be smaller than that of random RNA, yet the difference between the degree of the MFE structure and the expected degree is larger than that of random RNA. Expected degree does not seem to correlate with standard structural diversity measures of RNA, such as positional entropy and ensemble defect. The program RNAexpNumNbors is written in C, runs in cubic time and quadratic space, and is publicly available at http://bioinformatics.bc.edu/clotelab/RNAexpNumNbors.",2014-11-07 +22537301,Towards accurate detection and genotyping of expressed variants from whole transcriptome sequencing data.,"

Background

Massively parallel transcriptome sequencing (RNA-Seq) is becoming the method of choice for studying functional effects of genetic variability and establishing causal relationships between genetic variants and disease. However, RNA-Seq poses new technical and computational challenges compared to genome sequencing. In particular, mapping transcriptome reads onto the genome is more challenging than mapping genomic reads due to splicing. Furthermore, detection and genotyping of single nucleotide variants (SNVs) requires statistical models that are robust to variability in read coverage due to unequal transcript expression levels.

Results

In this paper we present a strategy to more reliably map transcriptome reads by taking advantage of the availability of both the genome reference sequence and transcript databases such as CCDS. We also present a novel Bayesian model for SNV discovery and genotyping based on quality scores.

Conclusions

Experimental results on RNA-Seq data generated from blood cell tissue of three Hapmap individuals show that our methods yield increased accuracy compared to several widely used methods. The open source code implementing our methods, released under the GNU General Public License, is available at http://dna.engr.uconn.edu/software/NGSTools/.",2012-04-12 +21876203,A tool for biomarker discovery in the urinary proteome: a manually curated human and animal urine protein biomarker database.,"Urine is an important source of biomarkers. A single proteomics assay can identify hundreds of differentially expressed proteins between disease and control samples; however, the ability to select biomarker candidates with the most promise for further validation study remains difficult. A bioinformatics tool that allows accurate and convenient comparison of all of the existing related studies can markedly aid the development of this area. In this study, we constructed the Urinary Protein Biomarker (UPB) database to collect existing studies of urinary protein biomarkers from published literature. To ensure the quality of data collection, all literature was manually curated. The website (http://122.70.220.102/biomarker) allows users to browse the database by disease categories and search by protein IDs in bulk. Researchers can easily determine whether a biomarker candidate has already been identified by another group for the same disease or for other diseases, which allows for the confidence and disease specificity of their biomarker candidate to be evaluated. Additionally, the pathophysiological processes of the diseases can be studied using our database with the hypothesis that diseases that share biomarkers may have the same pathophysiological processes. Because of the natural relationship between urinary proteins and the urinary system, this database may be especially suitable for studying the pathogenesis of urological diseases. Currently, the database contains 553 and 275 records compiled from 174 and 31 publications of human and animal studies, respectively. We found that biomarkers identified by different proteomic methods had a poor overlap with each other. The differences between sample preparation and separation methods, mass spectrometers, and data analysis algorithms may be influencing factors. Biomarkers identified from animal models also overlapped poorly with those from human samples, but the overlap rate was not lower than that of human proteomics studies. Therefore, it is not clear how well the animal models mimic human diseases.",2011-08-29 +25474588,G-Bean: an ontology-graph based web tool for biomedical literature retrieval.,"

Background

Currently, most people use NCBI's PubMed to search the MEDLINE database, an important bibliographical information source for life science and biomedical information. However, PubMed has some drawbacks that make it difficult to find relevant publications pertaining to users' individual intentions, especially for non-expert users. To ameliorate the disadvantages of PubMed, we developed G-Bean, a graph based biomedical search engine, to search biomedical articles in MEDLINE database more efficiently.

Methods

G-Bean addresses PubMed's limitations with three innovations: (1) Parallel document index creation: a multithreaded index creation strategy is employed to generate the document index for G-Bean in parallel; (2) Ontology-graph based query expansion: an ontology graph is constructed by merging four major UMLS (Version 2013AA) vocabularies, MeSH, SNOMEDCT, CSP and AOD, to cover all concepts in National Library of Medicine (NLM) database; a Personalized PageRank algorithm is used to compute concept relevance in this ontology graph and the Term Frequency - Inverse Document Frequency (TF-IDF) weighting scheme is used to re-rank the concepts. The top 500 ranked concepts are selected for expanding the initial query to retrieve more accurate and relevant information; (3) Retrieval and re-ranking of documents based on user's search intention: after the user selects any article from the existing search results, G-Bean analyzes user's selections to determine his/her true search intention and then uses more relevant and more specific terms to retrieve additional related articles. The new articles are presented to the user in the order of their relevance to the already selected articles.

Results

Performance evaluation with 106 OHSUMED benchmark queries shows that G-Bean returns more relevant results than PubMed does when using these queries to search the MEDLINE database. PubMed could not even return any search result for some OHSUMED queries because it failed to form the appropriate Boolean query statement automatically from the natural language query strings. G-Bean is available at http://bioinformatics.clemson.edu/G-Bean/index.php.

Conclusions

G-Bean addresses PubMed's limitations with ontology-graph based query expansion, automatic document indexing, and user search intention discovery. It shows significant advantages in finding relevant articles from the MEDLINE database to meet the information need of the user.",2014-11-06 +25378326,Genomicus update 2015: KaryoView and MatrixView provide a genome-wide perspective to multispecies comparative genomics.,"The Genomicus web server (http://www.genomicus.biologie.ens.fr/genomicus) is a visualization tool allowing comparative genomics in four different phyla (Vertebrate, Fungi, Metazoan and Plants). It provides access to genomic information from extant species, as well as ancestral gene content and gene order for vertebrates and flowering plants. Here we present the new features available for vertebrate genome with a focus on new graphical tools. The interface to enter the database has been improved, two pairwise genome comparison tools are now available (KaryoView and MatrixView) and the multiple genome comparison tools (PhyloView and AlignView) propose three new kinds of representation and a more intuitive menu. These new developments have been implemented for Genomicus portal dedicated to vertebrates. This allows the analysis of 68 extant animal genomes, as well as 58 ancestral reconstructed genomes. The Genomicus server also provides access to ancestral gene orders, to facilitate evolutionary and comparative genomics studies, as well as computationally predicted regulatory interactions, thanks to the representation of conserved non-coding elements with their putative gene targets.",2014-11-06 +25378337,APASdb: a database describing alternative poly(A) sites and selection of heterogeneous cleavage sites downstream of poly(A) signals.,"Increasing amounts of genes have been shown to utilize alternative polyadenylation (APA) 3'-processing sites depending on the cell and tissue type and/or physiological and pathological conditions at the time of processing, and the construction of genome-wide database regarding APA is urgently needed for better understanding poly(A) site selection and APA-directed gene expression regulation for a given biology. Here we present a web-accessible database, named APASdb (http://mosas.sysu.edu.cn/utr), which can visualize the precise map and usage quantification of different APA isoforms for all genes. The datasets are deeply profiled by the sequencing alternative polyadenylation sites (SAPAS) method capable of high-throughput sequencing 3'-ends of polyadenylated transcripts. Thus, APASdb details all the heterogeneous cleavage sites downstream of poly(A) signals, and maintains near complete coverage for APA sites, much better than the previous databases using conventional methods. Furthermore, APASdb provides the quantification of a given APA variant among transcripts with different APA sites by computing their corresponding normalized-reads, making our database more useful. In addition, APASdb supports URL-based retrieval, browsing and display of exon-intron structure, poly(A) signals, poly(A) sites location and usage reads, and 3'-untranslated regions (3'-UTRs). Currently, APASdb involves APA in various biological processes and diseases in human, mouse and zebrafish.",2014-11-06 +25377257,BactPepDB: a database of predicted peptides from a exhaustive survey of complete prokaryote genomes. ,"With the recent progress in complete genome sequencing, mining the increasing amount of genomic information available should in theory provide the means to discover new classes of peptides. However, annotation pipelines often do not consider small reading frames likely to be expressed. BactPepDB, available online at http://bactpepdb.rpbs.univ-paris-diderot.fr, is a database that aims at providing an exhaustive re-annotation of all complete prokaryotic genomes-chromosomal and plasmid DNA-available in RefSeq for coding sequences ranging between 10 and 80 amino acids. The identified peptides are classified as (i) previously identified in RefSeq, (ii) entity-overlapping (intragenic) or intergenic, and (iii) potential pseudogenes-intergenic sequences corresponding to a portion of a previously annotated larger gene. Additional information is related to homologs within order, predicted signal sequence, transmembrane segments, disulfide bonds, secondary structure, and the existence of a related 3D structure in the Protein Databank. As a result, BactPepDB provides insights about candidate peptides, and provides information about their conservation, together with some of their expected biological/structural features. The BactPepDB interface allows to search for candidate peptides in the database, or to search for peptides similar to a query, according to the multiple properties predicted or related to genomic localization. Database URL: http://www.yeastgenome.org/",2014-11-06 +27091416,The European Medicines Agency Review of Decitabine (Dacogen) for the Treatment of Adult Patients With Acute Myeloid Leukemia: Summary of the Scientific Assessment of the Committee for Medicinal Products for Human Use.,"

Unlabelled

: On September 20, 2012, a marketing authorization valid throughout the European Union (EU) was issued for decitabine for the treatment of adult patients aged 65 years and older with newly diagnosed de novo or secondary acute myeloid leukemia (AML) who are not candidates for standard induction chemotherapy. Decitabine is a pyrimidine analog incorporated into DNA, where it irreversibly inhibits DNA methyltransferases through covalent adduct formation with the enzyme. The use of decitabine was studied in an open-label, randomized, multicenter phase III study (DACO-016) in patients with newly diagnosed de novo or secondary AML. Decitabine (n = 242) was compared with patient's choice with physician's advice (n = 243) of low-dose cytarabine or supportive care alone. The primary endpoint of the study was overall survival. The median overall survival in the intent-to-treat (ITT) population was 7.7 months among patients treated with decitabine compared with 5.0 months for those in the control arm (hazard ratio [HR], 0.85; 95% confidence interval [CI], 0.69-1.04; p = .1079). Mature survival data after an additional year of follow-up were consistent with these results, with a median overall survival of 7.7 months in patients treated with decitabine and 5.0 months in the control arm (HR, 0.82; 95% CI, 0.68-0.99; p = .0373). Secondary endpoints, including response rates, progression-free survival, and event-free survival, were increased in favor of decitabine when compared with control treatment. The most common adverse drug reactions reported during treatment with decitabine are pyrexia, anemia, thrombocytopenia, febrile neutropenia, neutropenia, nausea, and diarrhea. This paper summarizes the scientific review of the application leading to approval of decitabine in the EU. The detailed scientific assessment report and product information (including the summary of product characteristics) for this product are available on the EMA website (http://www.ema.europa.eu).

Implications for practice

Acute myeloid leukemia (AML) remains an area of significant unmet need, especially in older patients. Older patients and those with comorbidities are often considered ineligible for standard induction therapy, and outcome for these patients is poor. Decitabine has favorable effects in terms of overall survival, which were considered clinically meaningful in the context of a manageable toxicity profile and after consideration of the lack of therapeutic alternatives for these patients. Decitabine is widely used in the treatment of AML in patients aged >60 years, as per current guidelines, including the European LeukemiaNet and the U.S. National Cancer Comprehensive Network.",2016-04-18 +26284155,Smartphone applications: A contemporary resource for dermatopathology.,"

Introduction

Smartphone applications in medicine are becoming increasingly prevalent. Given that most pathologists and pathology trainees today use smartphones, an obvious modality for pathology education is through smartphone applications. ""MyDermPath"" is a novel smartphone application that was developed as an interactive reference tool for dermatology and dermatopathology, available for iOS and Android.

Materials and methods

""MyDermPath"" was developed using Apple Xcode and Google Android SDK. Dermatology images (static and virtual slides) were annotated and configured into an algorithmic format. Each image comprised educational data (diagnosis, clinical information, histopathology, special stains, differential diagnosis, clinical management, linked PubMed references). Added functionality included personal note taking, pop quiz, and image upload capabilities. A website was created (http://mydermpath.com) to mirror the app.

Results

The application was released in August 2011 and updated in November 2013. More than 1,100 reference diagnoses, with over 2,000 images are available via the application and website. The application has been downloaded approximately 14,000 times. The application is available for use on iOS and Android platforms.

Conclusions

Smartphone applications have tremendous potential for advancing pathology education. ""MyDermPath"" represents an interactive reference tool for dermatology and dermatopathologists.",2015-07-28 +25378306,Plastid-LCGbase: a collection of evolutionarily conserved plastid-associated gene pairs.,"Plastids carry their own genetic material that encodes a variable set of genes that are limited in number but functionally important. Aside from orthology, the lineage-specific order and orientation of these genes are also relevant. Here, we develop a database, Plastid-LCGbase (http://lcgbase.big.ac.cn/plastid-LCGbase/), which focuses on organizational variability of plastid genes and genomes from diverse taxonomic groups. The current Plastid-LCGbase contains information from 470 plastid genomes and exhibits several unique features. First, through a genome-overview page generated from OrganellarGenomeDRAW, it displays general arrangement of all plastid genes (circular or linear). Second, it shows patterns and modes of all paired plastid genes and their physical distances across user-defined lineages, which are facilitated by a step-wise stratification of taxonomic groups. Third, it divides the paired genes into three categories (co-directionally-paired genes or CDPGs, convergently-paired genes or CPGs and divergently-paired genes or DPGs) and three patterns (separation, overlap and inclusion) and provides basic statistics for each species. Fourth, the gene pairing scheme is expandable, where neighboring genes can also be included in species-/lineage-specific comparisons. We hope that Plastid-LCGbase facilitates gene variation (insertion-deletion, translocation and rearrangement) and transcription-level studies of plastid genomes.",2014-11-05 +25378311,The tmRNA website.,"The transfer-messenger RNA (tmRNA) and its partner protein SmpB act together in resolving problems arising when translating bacterial ribosomes reach the end of mRNA with no stop codon. Their genes have been found in nearly all bacterial genomes and in some organelles. The tmRNA Website serves tmRNA sequences, alignments and feature annotations, and has recently moved to http://bioinformatics.sandia.gov/tmrna/. New features include software used to find the sequences, an update raising the number of unique tmRNA sequences from 492 to 1716, and a database of SmpB sequences which are served along with the tmRNA sequence from the same organism.",2014-11-05 +25378302,Islander: a database of precisely mapped genomic islands in tRNA and tmRNA genes.,"Genomic islands are mobile DNAs that are major agents of bacterial and archaeal evolution. Integration into prokaryotic chromosomes usually occurs site-specifically at tRNA or tmRNA gene (together, tDNA) targets, catalyzed by tyrosine integrases. This splits the target gene, yet sequences within the island restore the disrupted gene; the regenerated target and its displaced fragment precisely mark the endpoints of the island. We applied this principle to search for islands in genomic DNA sequences. Our algorithm identifies tDNAs, finds fragments of those tDNAs in the same replicon and removes unlikely candidate islands through a series of filters. A search for islands in 2168 whole prokaryotic genomes produced 3919 candidates. The website Islander (recently moved to http://bioinformatics.sandia.gov/islander/) presents these precisely mapped candidate islands, the gene content and the island sequence. The algorithm further insists that each island encode an integrase, and attachment site sequence identity is carefully noted; therefore, the database also serves in the study of integrase site-specificity and its evolution.",2014-11-05 +25269767,"Sequential expression of miR-182 and miR-503 cooperatively targets FBXW7, contributing to the malignant transformation of colon adenoma to adenocarcinoma.","Genetic changes in colon cancer are known to parallel the tissue abnormalities associated with the disease, namely adenoma and adenocarcinoma. The role of microRNA dysregulation in dysplastic progression, however, is not well understood. Here, we show that miR-182 and miR-503 undergo sequential up-regulation and drive the progression of colon adenoma to adenocarcinoma by cooperatively down-regulating the tumour suppressor FBXW7. We identified that increased expression of miR-182 is a feature of adenomas. A subsequent increase in miR-503 expression works cooperatively with miR-182 to induce transformation of an adenoma to adenocarcinoma. We show that introducing miR-503 into AAC1 cells, which are derived from a benign adenoma, confers tumourigenic potential. We also demonstrated that blocking both miR-182 and miR-503 in HCT116 colon cancer cells resulted in increased FBXW7 expression and significantly reduced tumour size in xenograft models. We confirmed relevance of these results in patients by examining the expression levels of miR-182 and miR-503 in over 200 colon cancer patients with 12 year survival outcome data. Decreased patient survival was correlated with elevated expression of both miRNAs, suggesting that elevated levels of both miR-182 and miR-503 define a novel prognostic biomarker for colon cancer patients. In conclusion, we show that a sequential expression of miR-182 and miR-503 in benign adenoma cooperatively regulates the tumour suppressor FBXW7, contributing to the malignant transformation of colon adenoma to adenocarcinoma and miR-182 and miR-503 may prove to be novel therapeutic targets. Array data are available at: http://www.oncomir.umn.edu/",2014-10-01 +24467754,ODEion--a software module for structural identification of ordinary differential equations.,"In the systems biology field, algorithms for structural identification of ordinary differential equations (ODEs) have mainly focused on fixed model spaces like S-systems and/or on methods that require sufficiently good data so that derivatives can be accurately estimated. There is therefore a lack of methods and software that can handle more general models and realistic data. We present ODEion, a software module for structural identification of ODEs. Main characteristic features of the software are: • The model space is defined by arbitrary user-defined functions that can be nonlinear in both variables and parameters, such as for example chemical rate reactions. • ODEion implements computationally efficient algorithms that have been shown to efficiently handle sparse and noisy data. It can run a range of realistic problems that previously required a supercomputer. • ODEion is easy to use and provides SBML output. We describe the mathematical problem, the ODEion system itself, and provide several examples of how the system can be used. Available at: http://www.odeidentification.org.",2013-11-12 +22493527,Cell-culture Database: Literature-based reference tool for human and mammalian experimentallybased cell culture applications.,"

Unlabelled

Cultivation of primary cells is essential for biotechnological research and viral vaccine production. Significant advances in cell and tissue culture, more specifically, advances in the transfection and transduction of human and mammalian cells, has directly led to giant leaps forward in fields such as cancer research, genetics, and public health. At the same time, a corresponding increase has been seen in available cell culture related literature. Often times, due to the sheer number and degree of variability of available literature, it is a challenge to find specific, yet practical cell culture related information.To respond to this rising tide of information, a practical, user-friendly database containing cell-lines, plasmids, vectors, selection agents, concentrations and media was created. The database currently consists of over 3,900 cell lines (Human and Mammalian) and 1,900 plasmids/vectors collected from 2,700 pieces of published literature. The database is continually being expanded and it is hoped that through the continual addition of unique data, the database can further serve and enrich the work of cell and molecular biologists, life-science professionals, and the worldwide scientific community at large.

Availability

The database is available for free at http://cell-lines.toku-e.com/",2012-03-17 +26220960,"Canonical, stable, general mapping using context schemes.","

Motivation

Sequence mapping is the cornerstone of modern genomics. However, most existing sequence mapping algorithms are insufficiently general.

Results

We introduce context schemes: a method that allows the unambiguous recognition of a reference base in a query sequence by testing the query for substrings from an algorithmically defined set. Context schemes only map when there is a unique best mapping, and define this criterion uniformly for all reference bases. Mappings under context schemes can also be made stable, so that extension of the query string (e.g. by increasing read length) will not alter the mapping of previously mapped positions. Context schemes are general in several senses. They natively support the detection of arbitrary complex, novel rearrangements relative to the reference. They can scale over orders of magnitude in query sequence length. Finally, they are trivially extensible to more complex reference structures, such as graphs, that incorporate additional variation. We demonstrate empirically the existence of high-performance context schemes, and present efficient context scheme mapping algorithms.

Availability and implementation

The software test framework created for this study is available from https://registry.hub.docker.com/u/adamnovak/sequence-graphs/.

Contact

anovak@soe.ucsc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-07-27 +23665844,MetPP: a computational platform for comprehensive two-dimensional gas chromatography time-of-flight mass spectrometry-based metabolomics.,"

Motivation

Due to the high complexity of metabolome, the comprehensive 2D gas chromatography time-of-flight mass spectrometry (GC×GC-TOF MS) is considered as a powerful analytical platform for metabolomics study. However, the applications of GC×GC-TOF MS in metabolomics are not popular owing to the lack of bioinformatics system for data analysis.

Results

We developed a computational platform entitled metabolomics profiling pipeline (MetPP) for analysis of metabolomics data acquired on a GC×GC-TOF MS system. MetPP can process peak filtering and merging, retention index matching, peak list alignment, normalization, statistical significance tests and pattern recognition, using the peak lists deconvoluted from the instrument data as its input. The performance of MetPP software was tested with two sets of experimental data acquired in a spike-in experiment and a biomarker discovery experiment, respectively. MetPP not only correctly aligned the spiked-in metabolite standards from the experimental data, but also correctly recognized their concentration difference between sample groups. For analysis of the biomarker discovery data, 15 metabolites were recognized with significant concentration difference between the sample groups and these results agree with the literature results of histological analysis, demonstrating the effectiveness of applying MetPP software for disease biomarker discovery.

Availability

The source code of MetPP is available at http://metaopen.sourceforge.net

Contact

xiang.zhang@louisville.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-11 +23814189,DNA motif elucidation using belief propagation.,"Protein-binding microarray (PBM) is a high-throughout platform that can measure the DNA-binding preference of a protein in a comprehensive and unbiased manner. A typical PBM experiment can measure binding signal intensities of a protein to all the possible DNA k-mers (k=8∼10); such comprehensive binding affinity data usually need to be reduced and represented as motif models before they can be further analyzed and applied. Since proteins can often bind to DNA in multiple modes, one of the major challenges is to decompose the comprehensive affinity data into multimodal motif representations. Here, we describe a new algorithm that uses Hidden Markov Models (HMMs) and can derive precise and multimodal motifs using belief propagations. We describe an HMM-based approach using belief propagations (kmerHMM), which accepts and preprocesses PBM probe raw data into median-binding intensities of individual k-mers. The k-mers are ranked and aligned for training an HMM as the underlying motif representation. Multiple motifs are then extracted from the HMM using belief propagations. Comparisons of kmerHMM with other leading methods on several data sets demonstrated its effectiveness and uniqueness. Especially, it achieved the best performance on more than half of the data sets. In addition, the multiple binding modes derived by kmerHMM are biologically meaningful and will be useful in interpreting other genome-wide data such as those generated from ChIP-seq. The executables and source codes are available at the authors' websites: e.g. http://www.cs.toronto.edu/∼wkc/kmerHMM.",2013-06-29 +25368989,Diagnostics for stochastic genome-scale modeling via model slicing and debugging.,"Modeling of biological behavior has evolved from simple gene expression plots represented by mathematical equations to genome-scale systems biology networks. However, due to obstacles in complexity and scalability of creating genome-scale models, several biological modelers have turned to programming or scripting languages and away from modeling fundamentals. In doing so, they have traded the ability to have exchangeable, standardized model representation formats, while those that remain true to standardized model representation are faced with challenges in model complexity and analysis. We have developed a model diagnostic methodology inspired by program slicing and debugging and demonstrate the effectiveness of the methodology on a genome-scale metabolic network model published in the BioModels database. The computer-aided identification revealed specific points of interest such as reversibility of reactions, initialization of species amounts, and parameter estimation that improved a candidate cell's adenosine triphosphate production. We then compared the advantages of our methodology over other modeling techniques such as model checking and model reduction. A software application that implements the methodology is available at http://gel.ym.edu.tw/gcs/.",2014-11-04 +24564209,Towards accurate modeling of noncovalent interactions for protein rigidity analysis.,"

Background

Protein rigidity analysis is an efficient computational method for extracting flexibility information from static, X-ray crystallography protein data. Atoms and bonds are modeled as a mechanical structure and analyzed with a fast graph-based algorithm, producing a decomposition of the flexible molecule into interconnected rigid clusters. The result depends critically on noncovalent atomic interactions, primarily on how hydrogen bonds and hydrophobic interactions are computed and modeled. Ongoing research points to the stringent need for benchmarking rigidity analysis software systems, towards the goal of increasing their accuracy and validating their results, either against each other and against biologically relevant (functional) parameters. We propose two new methods for modeling hydrogen bonds and hydrophobic interactions that more accurately reflect a mechanical model, without being computationally more intensive. We evaluate them using a novel scoring method, based on the B-cubed score from the information retrieval literature, which measures how well two cluster decompositions match.

Results

To evaluate the modeling accuracy of KINARI, our pebble-game rigidity analysis system, we use a benchmark data set of 20 proteins, each with multiple distinct conformations deposited in the Protein Data Bank. Cluster decompositions for them were previously determined with the RigidFinder method from Gerstein's lab and validated against experimental data. When KINARI's default tuning parameters are used, an improvement of the B-cubed score over a crude baseline is observed in 30% of this data. With our new modeling options, improvements were observed in over 70% of the proteins in this data set. We investigate the sensitivity of the cluster decomposition score with case studies on pyruvate phosphate dikinase and calmodulin.

Conclusion

To substantially improve the accuracy of protein rigidity analysis systems, thorough benchmarking must be performed on all current systems and future extensions. We have measured the gain in performance by comparing different modeling methods for noncovalent interactions. We showed that new criteria for modeling hydrogen bonds and hydrophobic interactions can significantly improve the results. The two new methods proposed here have been implemented and made publicly available in the current version of KINARI (v1.3), together with the benchmarking tools, which can be downloaded from our software's website, http://kinari.cs.umass.edu.",2013-11-05 +25369365,A phylogeny-based benchmarking test for orthology inference reveals the limitations of function-based validation.,"Accurate orthology prediction is crucial for many applications in the post-genomic era. The lack of broadly accepted benchmark tests precludes a comprehensive analysis of orthology inference. So far, functional annotation between orthologs serves as a performance proxy. However, this violates the fundamental principle of orthology as an evolutionary definition, while it is often not applicable due to limited experimental evidence for most species. Therefore, we constructed high quality ""gold standard"" orthologous groups that can serve as a benchmark set for orthology inference in bacterial species. Herein, we used this dataset to demonstrate 1) why a manually curated, phylogeny-based dataset is more appropriate for benchmarking orthology than other popular practices and 2) how it guides database design and parameterization through careful error quantification. More specifically, we illustrate how function-based tests often fail to identify false assignments, misjudging the true performance of orthology inference methods. We also examined how our dataset can instruct the selection of a ""core"" species repertoire to improve detection accuracy. We conclude that including more genomes at the proper evolutionary distances can influence the overall quality of orthology detection. The curated gene families, called Reference Orthologous Groups, are publicly available at http://eggnog.embl.de/orthobench2.",2014-11-04 +25422545,Cytochrome C oxidase subunit I barcodes provide an efficient tool for Jinqian Baihua She (Bungarus parvus) authentication.,"

Objective

To test the feasibility of DNA barcoding for accurate identification of Jinqian Baihua She and its adulterants.

Materials and methods

Standard cytochrome C oxidase subunit I (COI) gene fragments were sequenced for DNA barcoding of 39 samples from 9 snake species, including Bungarus multicinctus, the officially recognized origin animal by Chinese Pharmacopoeia, and other 8 adulterate species. The aligned sequences, 658 base pairs in length, were analyzed for divergence using the Kimura-2-parameter (K2P) distance model with MEGA5.0.

Results

The mean intraspecific K2P distance was 0.0103 and the average interspecific genetic distance was 0.2178 in B. multicinctus, far greater than the minimal interspecific genetic distance of 0.027 recommended for species identification. A neighbor-joining (NJ) tree was constructed, in which each species formed a monophyletic clade with bootstrap supports of 100%. All the data were submitted to Barcode of Life Data system version 3.0 (BOLD, http://www.barcodinglife.org) under the project title ""DNA barcoding Bungarus multicinctus and its adulterants"". Ten samples of commercially available crude drugs of JBS were identified using the identification engine provided by BOLD. All the samples were clearly identified at the species level, among which five were found to be the adulterants and identified as Dinodon rufozonatum.

Conclusion

DNA barcoding using the standard COI gene fragments provides an effective and accurate means for JBS identification and authentication.",2014-10-01 +26611658,Refining the head and neck cancer referral guidelines: a two-centre analysis of 4715 referrals.,"

Objectives

To identify the set of referral criteria that will offer optimal diagnostic efficacy in patients suspected to have head and neck cancer (HNC) in the primary care setting.

Design

Statistical analysis of referral criteria and outcomes.

Setting

Two tertiary care cancer centres in the United Kingdom.

Participants

4715 patients who were referred via the fast-track system with a suspected HNC between 2007 and 2010.

Main outcome measures

Parameters of diagnostic efficacy, multivariate regression model to calculate estimated probability of HNC and area under the receiver operating characteristic curve (AUROC).

Results

The majority of referring symptoms had a positive predictive value higher than the 3% cut-off point stated to be significant for HNC detection in the 2015 NICE recommendations. Nevertheless, our multivariate analysis identified nine symptoms to be linked with HNC. Of these, only four are included in the latest NICE guidelines. The best fit predictive model for this data set included the following symptoms: hoarseness >3 weeks, dysphagia >3 weeks, odynophagia, unexplained neck mass, oral swelling >3 weeks, oral ulcer >3 weeks, prolonged otalgia with normal otoscopy, the presence of blood in mouth with concurrent sensation of lump in throat and the presence of otalgia with concurrent lump in throat sensation. Intermittent hoarseness and sensation of lump in throat were negatively associated with HNC. The AUROC demonstrated that our model had a higher predictive value (0.77) compared to those generated using the NICE 2005 (0.69) and 2015 (0.68) referral criteria (P < 0.0001). An online risk calculator based on this study is available at http://www.orlhealth.com/risk-calculator.html.

Conclusions

This study presents a significantly refined version of referral guidelines which demonstrate greater diagnostic efficacy than the current NICE guidelines. We recommend that further iterative refinements of referral criteria be considered when referring patients with suspected HNC.",2016-02-01 +24872604,Monte Carlo study of MLC fields for cobalt therapy machine.,"An automated Multi-Leaf Collimator (MLC) system has been developed as add-on for the cobalt-60 teletherapy machines available in India. The goal of the present computational study is to validate the MLC design using Monte Carlo (MC) modeling. The study was based on the Kirloskar-supplied Phoenix model machines that closely match the Atomic Energy of Canada Limited (AECL) theratron-80 machine. The MLC is a retrofit attachment to the collimator assembly, with 14 non-divergent leaf pairs of 40 mm thick, 7 mm wide, and 150 mm long tungsten alloy plates with rounded edges and 20 mm tongue and 2 mm groove in each leaf. In the present work, the source and collimator geometry has been investigated in detail to arrive at a model that best represents the measured dosimetric data. The authors have studied in detail the proto-I MLC built for cobalt-60. The MLC field sizes were MC simulated for 2 × 2 cm(2) to 14 × 14 cm(2) square fields as well as irregular fields, and the percent depth dose (PDD) and profile data were compared with ROPS(†) treatment planning system (TPS). In addition, measured profiles using the IMATRIXX system(‡) were also compared with the MC simulations. The proto-I MLC can define radiation fields up to 14 × 14 cm(2) within 3 mm accuracy. The maximum measured leakage through the leaf ends in closed condition was 3.4% and interleaf leakage observed was 7.3%. Good agreement between MC results, ROPS and IMATRIXX results has been observed. The investigation also supports the hypothesis that optical and radiation field coincidence exists for the square fields studied with the MLC. Plots of the percent depth dose (PDD) data and profile data for clinically significant irregular fields have also been presented. The MC model was also investigated to speed up the calculations to allow calculations of clinically relevant conformal beams. (†)Radiation Oncology Planning System (ROPS) is supplied by Tirumala Jyothi Computer Systems described at https://sites.google.com/site/tjcsrops/ (‡)IMATRIXX is supplied by IBA Dosimetry described at HYPERLINK http://www.iba-dosimetry.com.",2014-04-01 +24286480,From small studies to precision medicine: prioritizing candidate biomarkers.,"There are still many open questions in data-analytic research pertaining to biomarker development in the era of personalized/precision medicine and big data. Among them is the question of what constitutes best practice for the extraction of prioritized lists of candidate biomarkers from smaller studies that are 'hypothesis generating' in nature. A recent comparison of methods to detect patient-specific aberrant expression events in small- to medium-sized (10 to 50 samples) studies provides results that favor the use of outlying degree methods. See related Research, http://genomemedicine.com/content/5/11/103.",2013-11-29 +24297253,MycoCosm portal: gearing up for 1000 fungal genomes.,"MycoCosm is a fungal genomics portal (http://jgi.doe.gov/fungi), developed by the US Department of Energy Joint Genome Institute to support integration, analysis and dissemination of fungal genome sequences and other 'omics' data by providing interactive web-based tools. MycoCosm also promotes and facilitates user community participation through the nomination of new species of fungi for sequencing, and the annotation and analysis of resulting data. By efficiently filling gaps in the Fungal Tree of Life, MycoCosm will help address important problems associated with energy and the environment, taking advantage of growing fungal genomics resources.",2013-12-01 +26540677,SALSA: A Novel Dataset for Multimodal Group Behavior Analysis.,"Studying free-standing conversational groups (FCGs) in unstructured social settings (e.g., cocktail party ) is gratifying due to the wealth of information available at the group (mining social networks) and individual (recognizing native behavioral and personality traits) levels. However, analyzing social scenes involving FCGs is also highly challenging due to the difficulty in extracting behavioral cues such as target locations, their speaking activity and head/body pose due to crowdedness and presence of extreme occlusions. To this end, we propose SALSA, a novel dataset facilitating multimodal and Synergetic sociAL Scene Analysis, and make two main contributions to research on automated social interaction analysis: (1) SALSA records social interactions among 18 participants in a natural, indoor environment for over 60 minutes, under the poster presentation and cocktail party contexts presenting difficulties in the form of low-resolution images, lighting variations, numerous occlusions, reverberations and interfering sound sources; (2) To alleviate these problems we facilitate multimodal analysis by recording the social interplay using four static surveillance cameras and sociometric badges worn by each participant, comprising the microphone, accelerometer, bluetooth and infrared sensors. In addition to raw data, we also provide annotations concerning individuals' personality as well as their position, head, body orientation and F-formation information over the entire event duration. Through extensive experiments with state-of-the-art approaches, we show (a) the limitations of current methods and (b) how the recorded multiple cues synergetically aid automatic analysis of social interactions. SALSA is available at http://tev.fbk.eu/salsa.",2015-10-30 +25668446,Metabolic pathway predictions for metabolomics: a molecular structure matching approach.,"Metabolic pathways are composed of a series of chemical reactions occurring within a cell. In each pathway, enzymes catalyze the conversion of substrates into structurally similar products. Thus, structural similarity provides a potential means for mapping newly identified biochemical compounds to known metabolic pathways. In this paper, we present TrackSM, a cheminformatics tool designed to associate a chemical compound to a known metabolic pathway based on molecular structure matching techniques. Validation experiments show that TrackSM is capable of associating 93% of tested structures to their correct KEGG pathway class and 88% to their correct individual KEGG pathway. This suggests that TrackSM may be a valuable tool to aid in associating previously unknown small molecules to known biochemical pathways and improve our ability to link metabolomics, proteomic, and genomic data sets. TrackSM is freely available at http://metabolomics.pharm.uconn.edu/?q=Software.html .",2015-02-24 +24356771,Lysine-specific chemical cross-linking of protein complexes and identification of cross-linking sites using LC-MS/MS and the xQuest/xProphet software pipeline.,"Chemical cross-linking in combination with LC-MS/MS (XL-MS) is an emerging technology to obtain low-resolution structural (distance) restraints of proteins and protein complexes. These restraints can also be used to characterize protein complexes by integrative modeling of the XL-MS data, either in combination with other types of structural information or by themselves, to establish spatial relationships of subunits in protein complexes. Here we present a protocol that has been successfully used to generate XL-MS data from a multitude of native proteins and protein complexes. It includes the experimental steps for performing the cross-linking reaction using disuccinimidyl suberate (a homobifunctional, lysine-reactive cross-linking reagent), the enrichment of cross-linked peptides by peptide size-exclusion chromatography (SEC; to remove smaller, non-cross-linked peptides), instructions for tandem MS analysis and the analysis of MS data via the open-source computational software pipeline xQuest and xProphet (available from http://proteomics.ethz.ch). Once established, this robust protocol should take ∼4 d to complete, and it is generally applicable to purified proteins and protein complexes.",2013-12-19 +25625434,The MORPH-R web server and software tool for predicting missing genes in biological pathways.,"A biological pathway is the set of molecular entities involved in a given biological process and the interrelations among them. Even though biological pathways have been studied extensively, discovering missing genes in pathways remains a fundamental challenge. Here, we present an easy-to-use tool that allows users to run MORPH (MOdule-guided Ranking of candidate PatHway genes), an algorithm for revealing missing genes in biological pathways, and demonstrate its capabilities. MORPH supports the analysis in tomato, Arabidopsis and the two new species: rice and the newly sequenced potato genome. The new tool, called MORPH-R, is available both as a web server (at http://bioinformatics.psb.ugent.be/webtools/morph/) and as standalone software that can be used locally. In the standalone version, the user can apply the tool to new organisms using any proprietary and public data sources.",2015-02-24 +30699835,First Report of Leaf Spot Caused by Myrothecium roridum on Coffea canephora in Brazil.,"Coffea canephora (conilon coffee) represents approximately 30% of the coffee marketed worldwide. The state of Espírito Santo is the largest conilon coffee-producing state in Brazil. In 2013 and 2014, leaves with a leaf spot were observed on most of the conilon coffee seedlings in a commercial nursery in Laranja da Terra, Espírito Santo, Brazil. The infected leaves were deposited in the VIC Herbarium (VIC 42482) and a pure single-spore culture of the pathogen was deposited in the culture collection of the Universidade Federal de Viçosa (Accession No. COAD 1729). The initial symptoms were circular, brown to dark brown lesions with yellow margins occurring on both leaf surfaces. In high humidity, concentric rings formed and the lesions expanded rapidly to reach up to 30 mm in diameter, and later became dark brown with a grayish center. Black sporodochia with white, and marginal mycelial tuffs bearing black spore masses were observed in the older lesions. These symptoms were consistent with those of Myrothecium leaf spot reported on Coffea spp. (3). Microscopic observation revealed aseptate, hyaline, and cylindrical conidia, rounded at both ends, greenish to black in mass, and 5 to 6 μm long and 1 to 2 μm wide. The symptoms and morphological characteristics described above matched the description of Myrothecium roridum Tode (4). To confirm this identification, DNA was extracted using a Wizard Genomic DNA Purification Kit and the sequence of an internal transcribed spacer (ITS) region was obtained and deposited in GenBank (Accession No. KJ815095). The sequence of the ITS region exhibited 100% identity over 561 bp with another M. roridum sequence in GenBank (JF343832). To verify the pathogenicity of the fungus, healthy leaves of the C. canephora clones 12v and 14 (four seedlings each) were wounded superficially with a sterilized needle and inoculated by spraying them with a suspension of M. roridum conidia (106 conidia ml-1). The seedlings were covered with plastic bags and incubated in a growth chamber at 25°C under a photoperiod of 12 h light/12 h dark for 5 days. The control seedlings were sprayed with distilled water and incubated similarly. Fifteen days after inoculation, symptoms in all inoculated seedlings were consistent with those initially observed on the naturally infected seedlings, whereas the controls remained healthy. Re-isolation and identification confirmed Koch's postulates. M. roridum has a wide host range, and symptoms were similar to those reported in other hosts of the pathogen in Brazil (2,3). There is only one report of M. roridum on C. canephora in Colombia (1); however, this pathogen was previously reported on C. arabica in Brazil, Colombia, Costa Rica, Guatemala, India, Indonesia, Puerto Rico, and the Virgin Islands (1,3). To our knowledge, this is the first report of a leaf spot caused by M. roridum on conilon coffee in Brazil. The cultivation of conilon coffee is increasing and the reported leaf spot disease affects the quality of the seedlings in nurseries. It is therefore important to conduct a thorough study of management strategies for this disease. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Syst. Mycol. Microbiol. Lab. ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases , 27 May 2014. (2) A. M. Quezado Duval et al. Braz. J. Microbiol. 41:246, 2010. (3) S. F. Silveira et al. Fitopatol. Bras. 32:440, 2007. (4) M. Tulloch. Mycol. Pap. No. 130. CMI, Wallingford, UK, 1972.",2014-11-01 +25747556,Developing the HTA core model for the online environment.,"

Background

A framework for collaborative production and sharing of HTA information, the HTA Core Model, was originally developed within EUnetHTA in 2006-08. In this paper, we describe the further development of the Model to allow implementation and utilization of the Model online. The aim was to capture a generic HTA process that would allow effective use of the HTA Core Model and resulting HTA information while at the same time not interfering with HTA agencies' internal processes.

Methods

The work was coordinated by a development team in Finland, supported by an international expert group. Two pilot testing rounds were organized among EUnetHTA agencies and two extensive core HTA projects tested the tool in a real setting. The final work was also formally validated by a group of HTA agencies.

Results

The HTA Core Model Online--available at http://www.corehta.info--is a web site hosting a) a tool to allow electronic utilization of the HTA Core Model and b) a database of produced HTA information. While access to the HTA information is free to all, the production features are currently available to EUnetHTA member agencies only. A policy was crafted to steer the use of the Model and produced information.

Conclusions

We have successfully enabled electronic use of the HTA Core Model and agreed on a policy for its utilization. The system is already being used in subsequent HTA projects within EUnetHTA Joint Action 2. Identified shortcomings and further needs will be addressed in subsequent development.",2014-11-01 +30699841,"First Report of Plum pox virus Strain M Isolates in Apricot in Sicily, Italy.","Sharka or plum pox disease is one of the most economically important virus diseases of stone fruits. Plum pox virus (PPV), the causal agent, is a member of the genus Potyvirus of the family Potyviridae transmitted by aphids in a non-persistent manner and by grafting. To date, nine PPV strains have been described on the basis of their biological, serological, and molecular properties: M and D are the most widespread and economically important strains, PPV-Rec and PPV-C have been reported mainly in Europe, PPV-EA confined to Egypt, PPV-T to Turkey, PPV-W from Canada, Ukraine, Latvia, and Russia, PPV-CR detected in Russia, and finally a putative PPV strain infecting plum in Albania described as the ancestor of the M. PPV-M is responsible for major epidemics in many Italian regions and despite phytosanitary measures, the infection rate increases each year. The D and Rec isolates are sporadically reported while PPV-C, once signaled in Apulia, has been successfully eradicated. Except for a report from the 1980s, which is no longer traceable, Sicily was considered free from the virus (2). In 2012, two new foci of sharka in a coastal area of Catania in Sicily were first reported by the national plant protection service to the European Commission (DG-SANCO). In spring 2013, plants of different varieties of apricot (Prunus armeniaca) and peach (P. persica) showing typical symptoms of flower color break, yellowing and leaf deformation, chlorotic spots or rings, and malformation on fruits were tested positive to PPV by DAS-ELISA using polyclonal antibodies. In order to characterize two isolates from apricot varieties (Carmen Top and Ninfa), total RNAs, extracted using the RNeasy Plant Mini Kit (Qiagen) from ELISA-positive samples, were analyzed by RT-PCR with primers P1/P2, targeting the 3'-terminal region of the coat protein (CP) gene (5) followed by RFLP analysis after digestion with Rsa1. Subsequently total RNAs were analyzed with the type-specific primers P1/PM and P1/PD (3), P3M/P4b and P3D/P4b amplifying the N-terminal region of the CP gene (1) and, finally, with primers mD5/mD3, mM5/mM3, and mD5/mM3, amplifying the region 3'NIb-5'CP, including the recombination site of Rec isolates (4). Only primer pairs P1/P2, P1/PM, P3M/P4b, and mM5/mM3 produced amplicons of the expected size (243, 198, 466, and 459 bp, respectively). The RFLP assay confirmed both isolates belonging to the M strain. Moreover, no reaction was obtained with primer pair mD5/mM3, excluding isolates belonging to Rec-type. Isolate characterization was completed by direct sequencing in both directions of the of P1/P2 and P3M/P4b amplicons obtained from apricot samples L9-1 (Carmen Top isolate) and 9-335 (Ninfa isolate). The P1/P2 sequences (KJ994235, KJ994237) showed 98% similarity with PPV-M or PPV-Rec isolates. The P3M/P4b sequences (KJ994236, KJ994238) confirmed that Sicilian isolates belong to the PPV-M strain showing 99% similarity with those already present in GenBank, thus ruling out the possibility of an infection with a PPV-Rec isolate. This outbreak of the Marcus strain of PPV in Sicily represents a high risk for the expanding production of stone fruit in southern Italy. An eradication plan was quickly activated by the regional phytosanitary service. References: (1) T. Candresse et al. Phytopathology 101:611, 2011. (2) EPPO. PQR-EPPO database on quarantine pests (available online). http://www.eppo.int , 2014. (3) A. Olmos et al. J. Virol. Methods 68:127, 1997. (4) Z. Subr et al. Acta Virol. 48:173, 2004. (5) T. Wetzel et al. J. Virol. Methods 33:355, 1991.",2014-11-01 +21598611,[Genetic characteristics of Neisseria meningitidis strains obtained from healthy carriers during meningococcal infection outbreaks].,"

Aim

Genetic and antigenic characterization of Neisseria meningitidis strains isolated during meningococcal infection outbreaks from individuals in contact with patients with generalized form of meningococcal infection.

Materials and methods

Strains obtained in 2007 - 2009 in Moscow during examination of individuals that were in contact with patients during meningococcal infection outbreaks were analyzed. Multilocus sequence typing, genetic subtyping and typing of VR fragment (FetA) techniques were used.

Results

Data regarding investigated strains were submitted to the database at http://pubmlst.org/neisseria/. Previously undescribed sequence types were found in 12 strains, sequence-type could not be determined in 2 strains, 2 strains lacked VR fragment (FetA). Serogroup A meningococci had ""P1.5-2,10: F3-5"" antigenic profile and belonged to ST-75 and ST-3349 sequence-type, these data does not support the emergence of epidemically significant strains in the territory under surveillance. All typed serogroup C strains and 1 serogroup B strain are of ""ST-41/44 complex/Lineage 3"" clonal complex. Subtypes of serogroup C meningococci strains match subtypes of strains that cause generalized forms of infection, while serogroup B strains isolated from the carriers and strains isolated from the patients had different antigenic profiles. Ungrouppable strains had notably higher level of genetic and antigenic diversity: only 6 of 16 strains (37.5%) could be sequence-typed using earlier data, all these strains are of clonal complex ""ST-53 complex"" that consists mostly of strains isolated from the carriers. CONCLUSION. Ratio of meningococci population circulating in Moscow and subpopulation capable of causing generalized form of meningococcal infection (GFMI) is different for meningococci of various serogroups. Ungrouppable strains isolated from the carriers are highly different from strains causing GFMI.",2011-03-01 +24091140,Fast automated protein NMR data collection and assignment by ADAPT-NMR on Bruker spectrometers.,"ADAPT-NMR (Assignment-directed Data collection Algorithm utilizing a Probabilistic Toolkit in NMR) supports automated NMR data collection and backbone and side chain assignment for [U-(13)C, U-(15)N]-labeled proteins. Given the sequence of the protein and data for the orthogonal 2D (1)H-(15)N and (1)H-(13)C planes, the algorithm automatically directs the collection of tilted plane data from a variety of triple-resonance experiments so as to follow an efficient pathway toward the probabilistic assignment of (1)H, (13)C, and (15)N signals to specific atoms in the covalent structure of the protein. Data collection and assignment calculations continue until the addition of new data no longer improves the assignment score. ADAPT-NMR was first implemented on Varian (Agilent) spectrometers [A. Bahrami, M. Tonelli, S.C. Sahu, K.K. Singarapu, H.R. Eghbalnia, J.L. Markley, PLoS One 7 (2012) e33173]. Because of broader interest in the approach, we present here a version of ADAPT-NMR for Bruker spectrometers. We have developed two AU console programs (ADAPT_ORTHO_run and ADAPT_NMR_run) that run under TOPSPIN Versions 3.0 and higher. To illustrate the performance of the algorithm on a Bruker spectrometer, we tested one protein, chlorella ubiquitin (76 amino acid residues), that had been used with the Varian version: the Bruker and Varian versions achieved the same level of assignment completeness (98% in 20 h). As a more rigorous evaluation of the Bruker version, we tested a larger protein, BRPF1 bromodomain (114 amino acid residues), which yielded an automated assignment completeness of 86% in 55 h. Both experiments were carried out on a 500 MHz Bruker AVANCE III spectrometer equipped with a z-gradient 5 mm TCI probe. ADAPT-NMR is available at http://pine.nmrfam.wisc.edu/ADAPT-NMR in the form of pulse programs, the two AU programs, and instructions for installation and use.",2013-08-30 +25361973,KnotProt: a database of proteins with knots and slipknots.,"The protein topology database KnotProt, http://knotprot.cent.uw.edu.pl/, collects information about protein structures with open polypeptide chains forming knots or slipknots. The knotting complexity of the cataloged proteins is presented in the form of a matrix diagram that shows users the knot type of the entire polypeptide chain and of each of its subchains. The pattern visible in the matrix gives the knotting fingerprint of a given protein and permits users to determine, for example, the minimal length of the knotted regions (knot's core size) or the depth of a knot, i.e. how many amino acids can be removed from either end of the cataloged protein structure before converting it from a knot to a different type of knot. In addition, the database presents extensive information about the biological functions, families and fold types of proteins with non-trivial knotting. As an additional feature, the KnotProt database enables users to submit protein or polymer chains and generate their knotting fingerprints.",2014-10-31 +25449328,mPLR-Loc: an adaptive decision multi-label classifier based on penalized logistic regression for protein subcellular localization prediction.,"Proteins located in appropriate cellular compartments are of paramount importance to exert their biological functions. Prediction of protein subcellular localization by computational methods is required in the post-genomic era. Recent studies have been focusing on predicting not only single-location proteins but also multi-location proteins. However, most of the existing predictors are far from effective for tackling the challenges of multi-label proteins. This article proposes an efficient multi-label predictor, namely mPLR-Loc, based on penalized logistic regression and adaptive decisions for predicting both single- and multi-location proteins. Specifically, for each query protein, mPLR-Loc exploits the information from the Gene Ontology (GO) database by using its accession number (AC) or the ACs of its homologs obtained via BLAST. The frequencies of GO occurrences are used to construct feature vectors, which are then classified by an adaptive decision-based multi-label penalized logistic regression classifier. Experimental results based on two recent stringent benchmark datasets (virus and plant) show that mPLR-Loc remarkably outperforms existing state-of-the-art multi-label predictors. In addition to being able to rapidly and accurately predict subcellular localization of single- and multi-label proteins, mPLR-Loc can also provide probabilistic confidence scores for the prediction decisions. For readers' convenience, the mPLR-Loc server is available online (http://bioinfo.eie.polyu.edu.hk/mPLRLocServer).",2014-10-31 +25361966,ADReCS: an ontology database for aiding standardization and hierarchical classification of adverse drug reaction terms.,"Adverse drug reactions (ADRs) are noxious and unexpected effects during normal drug therapy. They have caused significant clinical burden and been responsible for a large portion of new drug development failure. Molecular understanding and in silico evaluation of drug (or candidate) safety in laboratory is thus so desired, and unfortunately has been largely hindered by misuse of ADR terms. The growing impact of bioinformatics and systems biology in toxicological research also requires a specialized ADR term system that works beyond a simple glossary. Adverse Drug Reaction Classification System (ADReCS; http://bioinf.xmu.edu.cn/ADReCS) is a comprehensive ADR ontology database that provides not only ADR standardization but also hierarchical classification of ADR terms. The ADR terms were pre-assigned with unique digital IDs and at the same time were well organized into a four-level ADR hierarchy tree for building an ADR-ADR relation. Currently, the database covers 6544 standard ADR terms and 34,796 synonyms. It also incorporates information of 1355 single active ingredient drugs and 134,022 drug-ADR pairs. In summary, ADReCS offers an opportunity for direct computation on ADR terms and also provides clues to mining common features underlying ADRs.",2014-10-31 +25361971,GeneFriends: a human RNA-seq-based gene and transcript co-expression database.,"Co-expression networks have proven effective at assigning putative functions to genes based on the functional annotation of their co-expressed partners, in candidate gene prioritization studies and in improving our understanding of regulatory networks. The growing number of genome resequencing efforts and genome-wide association studies often identify loci containing novel genes and there is a need to infer their functions and interaction partners. To facilitate this we have expanded GeneFriends, an online database that allows users to identify co-expressed genes with one or more user-defined genes. This expansion entails an RNA-seq-based co-expression map that includes genes and transcripts that are not present in the microarray-based co-expression maps, including over 10,000 non-coding RNAs. The results users obtain from GeneFriends include a co-expression network as well as a summary of the functional enrichment among the co-expressed genes. Novel insights can be gathered from this database for different splice variants and ncRNAs, such as microRNAs and lincRNAs. Furthermore, our updated tool allows candidate transcripts to be linked to diseases and processes using a guilt-by-association approach. GeneFriends is freely available from http://www.GeneFriends.org and can be used to quickly identify and rank candidate targets relevant to the process or disease under study.",2014-10-31 +25632258,Araneae Sloveniae: a national spider species checklist.,"The research of the spider fauna of Slovenia dates back to the very beginning of binomial nomenclature, and has gone through more and less prolific phases with authors concentrating on taxonomy, faunistics, ecology and zoogeographic reviews. Although the body of published works is remarkable for a small nation, the faunistic data has remained too scattered for a thorough understanding of regional biotic diversity, for comparative and ecological research, and for informed conservation purposes. A national checklist is long overdue. Here, a critical review of all published records in any language is provided. The species list currently comprises 738 species, is published online at http://www.bioportal.si/katalog/araneae.php under the title Araneae Sloveniae, and will be updated in due course. This tool will fill the void in cataloguing regional spider faunas and will facilitate further araneological research in central and southern Europe.",2015-01-21 +23560875,An AUC-based permutation variable importance measure for random forests.,"

Background

The random forest (RF) method is a commonly used tool for classification with high dimensional data as well as for ranking candidate predictors based on the so-called random forest variable importance measures (VIMs). However the classification performance of RF is known to be suboptimal in case of strongly unbalanced data, i.e. data where response class sizes differ considerably. Suggestions were made to obtain better classification performance based either on sampling procedures or on cost sensitivity analyses. However to our knowledge the performance of the VIMs has not yet been examined in the case of unbalanced response classes. In this paper we explore the performance of the permutation VIM for unbalanced data settings and introduce an alternative permutation VIM based on the area under the curve (AUC) that is expected to be more robust towards class imbalance.

Results

We investigated the performance of the standard permutation VIM and of our novel AUC-based permutation VIM for different class imbalance levels using simulated data and real data. The results suggest that the new AUC-based permutation VIM outperforms the standard permutation VIM for unbalanced data settings while both permutation VIMs have equal performance for balanced data settings.

Conclusions

The standard permutation VIM loses its ability to discriminate between associated predictors and predictors not associated with the response for increasing class imbalance. It is outperformed by our new AUC-based permutation VIM for unbalanced data settings, while the performance of both VIMs is very similar in the case of balanced classes. The new AUC-based VIM is implemented in the R package party for the unbiased RF variant based on conditional inference trees. The codes implementing our study are available from the companion website: http://www.ibe.med.uni-muenchen.de/organisation/mitarbeiter/070_drittmittel/janitza/index.html.",2013-04-05 +24938749,FGAP: an automated gap closing tool.,"

Background

The fast reduction of prices of DNA sequencing allowed rapid accumulation of genome data. However, the process of obtaining complete genome sequences is still very time consuming and labor demanding. In addition, data produced from various sequencing technologies or alternative assemblies remain underexplored to improve assembly of incomplete genome sequences.

Findings

We have developed FGAP, a tool for closing gaps of draft genome sequences that takes advantage of different datasets. FGAP uses BLAST to align multiple contigs against a draft genome assembly aiming to find sequences that overlap gaps. The algorithm selects the best sequence to fill and eliminate the gap.

Conclusions

FGAP reduced the number of gaps by 78% in an E. coli draft genome assembly using two different sequencing technologies, Illumina and 454. Using PacBio long reads, 98% of gaps were solved. In human chromosome 14 assemblies, FGAP reduced the number of gaps by 35%. All the inserted sequences were validated with a reference genome using QUAST. The source code and a web tool are available at http://www.bioinfo.ufpr.br/fgap/.",2014-06-18 +22135288,Dr.VIS: a database of human disease-related viral integration sites.,"Viral integration plays an important role in the development of malignant diseases. Viruses differ in preferred integration site and flanking sequence. Viral integration sites (VIS) have been found next to oncogenes and common fragile sites. Understanding the typical DNA features near VIS is useful for the identification of potential oncogenes, prediction of malignant disease development and assessing the probability of malignant transformation in gene therapy. Therefore, we have built a database of human disease-related VIS (Dr.VIS, http://www.scbit.org/dbmi/drvis) to collect and maintain human disease-related VIS data, including characteristics of the malignant disease, chromosome region, genomic position and viral-host junction sequence. The current build of Dr.VIS covers about 600 natural VIS of 5 oncogenic viruses representing 11 diseases. Among them, about 200 VIS have viral-host junction sequence.",2011-12-01 +27153631,MINTbase: a framework for the interactive exploration of mitochondrial and nuclear tRNA fragments.,"

Motivation

It has been known that mature transfer RNAs (tRNAs) that are encoded in the nuclear genome give rise to short molecules, collectively known as tRNA fragments or tRFs. Recently, we reported that, in healthy individuals and in patients, tRFs are constitutive, arise from mitochondrial as well as from nuclear tRNAs, and have composition and abundances that depend on a person's sex, population origin and race as well as on tissue, disease and disease subtype. Our findings as well as similar work by other groups highlight the importance of tRFs and presage an increase in the community's interest in elucidating the roles of tRFs in health and disease.

Results

We created MINTbase, a web-based framework that serves the dual-purpose of being a content repository for tRFs and a tool for the interactive exploration of these newly discovered molecules. A key feature of MINTbase is that it deterministically and exhaustively enumerates all possible genomic locations where a sequence fragment can be found and indicates which fragments are exclusive to tRNA space, and thus can be considered as tRFs: this is a very important consideration given that the genomes of higher organisms are riddled with partial tRNA sequences and with tRNA-lookalikes whose aberrant transcripts can be mistaken for tRFs. MINTbase is extremely flexible and integrates and presents tRF information from multiple yet interconnected vantage points ('vistas'). Vistas permit the user to interactively personalize the information that is returned and the manner in which it is displayed. MINTbase can report comparative information on how a tRF is distributed across all anticodon/amino acid combinations, provides alignments between a tRNA and multiple tRFs with which the user can interact, provides details on published studies that reported a tRF as expressed, etc. Importantly, we designed MINTbase to contain all possible tRFs that could ever be produced by mature tRNAs: this allows us to report on their genomic distributions, anticodon/amino acid properties, alignments, etc. while giving users the ability to at-will investigate candidate tRF molecules before embarking on focused experimental explorations. Lastly, we also introduce a new labeling scheme that is tRF-sequence-based and allows users to associate a tRF with a universally unique label ('tRF-license plate') that is independent of a genome assembly and does not require any brokering mechanism.

Availability and implementation

MINTbase is freely accessible at http://cm.jefferson.edu/MINTbase/. Dataset submissions to MINTbase can be initiated at http://cm.jefferson.edu/MINTsubmit/

Contact

isidore.rigoutsos@jefferson.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2016-04-13 +22332784,The metabolic blueprint of Phaeodactylum tricornutum reveals a eukaryotic Entner-Doudoroff glycolytic pathway.,"Diatoms are one of the most successful groups of unicellular eukaryotic algae. Successive endosymbiotic events contributed to their flexible metabolism, making them competitive in variable aquatic habitats. Although the recently sequenced genomes of the model diatoms Phaeodactylum tricornutum and Thalassiosira pseudonana have provided the first insights into their metabolic organization, the current knowledge on diatom biochemistry remains fragmentary. By means of a genome-wide approach, we developed DiatomCyc, a detailed pathway/genome database of P. tricornutum. DiatomCyc contains 286 pathways with 1719 metabolic reactions and 1613 assigned enzymes, spanning both the central and parts of the secondary metabolism of P. tricornutum. Central metabolic pathways, such as those of carbohydrates, amino acids and fatty acids, were covered. Furthermore, our understanding of the carbohydrate model in P. tricornutum was extended. In particular we highlight the discovery of a functional Entner-Doudoroff pathway, an ancient alternative for the glycolytic Embden-Meyerhof-Parnas pathway, and a putative phosphoketolase pathway, both uncommon in eukaryotes. DiatomCyc is accessible online (http://www.diatomcyc.org), and offers a range of software tools for the visualization and analysis of metabolic networks and 'omics' data. We anticipate that DiatomCyc will be key to gaining further understanding of diatom metabolism and, ultimately, will feed metabolic engineering strategies for the industrial valorization of diatoms.",2012-03-31 +25489177,RiceQTLPro: an integrated database for quantitative trait loci marker mapping in rice plant.,"

Unlabelled

The National Agricultural Biotechnology Information Center (NABIC) in South Korea reconstructed a RiceQTLPro database for gene positional analysis and structure prediction of the chromosomes. This database is an integrated web-based system providing information about quantitative trait loci (QTL) markers in rice plant. The RiceQTLPro has the three main features namely, (1) QTL markers list, (2) searching of markers using keyword, and (3) searching of marker position on the rice chromosomes. This updated database provides 112 QTL markers information with 817 polymorphic markers on each of the 12 chromosomes in rice.

Availability

The database is available for free at http://nabic.rda.go.kr/gere/rice/geneticMap/",2014-10-30 +25785185,"Ontorat: automatic generation of new ontology terms, annotations, and axioms based on ontology design patterns.","

Background

It is time-consuming to build an ontology with many terms and axioms. Thus it is desired to automate the process of ontology development. Ontology Design Patterns (ODPs) provide a reusable solution to solve a recurrent modeling problem in the context of ontology engineering. Because ontology terms often follow specific ODPs, the Ontology for Biomedical Investigations (OBI) developers proposed a Quick Term Templates (QTTs) process targeted at generating new ontology classes following the same pattern, using term templates in a spreadsheet format.

Results

Inspired by the ODPs and QTTs, the Ontorat web application is developed to automatically generate new ontology terms, annotations of terms, and logical axioms based on a specific ODP(s). The inputs of an Ontorat execution include axiom expression settings, an input data file, ID generation settings, and a target ontology (optional). The axiom expression settings can be saved as a predesigned Ontorat setting format text file for reuse. The input data file is generated based on a template file created by a specific ODP (text or Excel format). Ontorat is an efficient tool for ontology expansion. Different use cases are described. For example, Ontorat was applied to automatically generate over 1,000 Japan RIKEN cell line cell terms with both logical axioms and rich annotation axioms in the Cell Line Ontology (CLO). Approximately 800 licensed animal vaccines were represented and annotated in the Vaccine Ontology (VO) by Ontorat. The OBI team used Ontorat to add assay and device terms required by ENCODE project. Ontorat was also used to add missing annotations to all existing Biobank specific terms in the Biobank Ontology. A collection of ODPs and templates with examples are provided on the Ontorat website and can be reused to facilitate ontology development.

Conclusions

With ever increasing ontology development and applications, Ontorat provides a timely platform for generating and annotating a large number of ontology terms by following design patterns.

Availability

http://ontorat.hegroup.org/.",2015-01-09 +25568282,A haplotype-based framework for group-wise transmission/disequilibrium tests for rare variant association analysis.,"

Motivation

A major focus of current sequencing studies for human genetics is to identify rare variants associated with complex diseases. Aside from reduced power of detecting associated rare variants, controlling for population stratification is particularly challenging for rare variants. Transmission/disequilibrium tests (TDT) based on family designs are robust to population stratification and admixture, and therefore provide an effective approach to rare variant association studies to eliminate spurious associations. To increase power of rare variant association analysis, gene-based collapsing methods become standard approaches for analyzing rare variants. Existing methods that extend this strategy to rare variants in families usually combine TDT statistics at individual variants and therefore lack the flexibility of incorporating other genetic models.

Results

In this study, we describe a haplotype-based framework for group-wise TDT (gTDT) that is flexible to encompass a variety of genetic models such as additive, dominant and compound heterozygous (CH) (i.e. recessive) models as well as other complex interactions. Unlike existing methods, gTDT constructs haplotypes by transmission when possible and inherently takes into account the linkage disequilibrium among variants. Through extensive simulations we showed that type I error was correctly controlled for rare variants under all models investigated, and this remained true in the presence of population stratification. Under a variety of genetic models, gTDT showed increased power compared with the single marker TDT. Application of gTDT to an autism exome sequencing data of 118 trios identified potentially interesting candidate genes with CH rare variants.

Availability and implementation

We implemented gTDT in C++ and the source code and the detailed usage are available on the authors' website (https://medschool.vanderbilt.edu/cgg).

Contact

bingshan.li@vanderbilt.edu or wei.chen@chp.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-01-06 +21576754,A memory efficient method for structure-based RNA multiple alignment.,"Structure-based RNA multiple alignment is particularly challenging because covarying mutations make sequence information alone insufficient. Existing tools for RNA multiple alignment first generate pairwise RNA structure alignments and then build the multiple alignment using only sequence information. Here we present PMFastR, an algorithm which iteratively uses a sequence-structure alignment procedure to build a structure-based RNA multiple alignment from one sequence with known structure and a database of sequences from the same family. PMFastR also has low memory consumption allowing for the alignment of large sequences such as 16S and 23S rRNA. The algorithm also provides a method to utilize a multicore environment. We present results on benchmark data sets from BRAliBase, which shows PMFastR performs comparably to other state-of-the-art programs. Finally, we regenerate 607 Rfam seed alignments and show that our automated process creates multiple alignments similar to the manually curated Rfam seed alignments. Thus, the techniques presented in this paper allow for the generation of multiple alignments using sequence-structure guidance, while limiting memory consumption. As a result, multiple alignments of long RNA sequences, such as 16S and 23S rRNAs, can easily be generated locally on a personal computer. The software and supplementary data are available at http://genome.ucf.edu/PMFastR.",2011-04-29 +25359887,Automated structural classification of lipids by machine learning.,"

Motivation

Modern lipidomics is largely dependent upon structural ontologies because of the great diversity exhibited in the lipidome, but no automated lipid classification exists to facilitate this partitioning. The size of the putative lipidome far exceeds the number currently classified, despite a decade of work. Automated classification would benefit ongoing classification efforts by decreasing the time needed and increasing the accuracy of classification while providing classifications for mass spectral identification algorithms.

Results

We introduce a tool that automates classification into the LIPID MAPS ontology of known lipids with >95% accuracy and novel lipids with 63% accuracy. The classification is based upon simple chemical characteristics and modern machine learning algorithms. The decision trees produced are intelligible and can be used to clarify implicit assumptions about the current LIPID MAPS classification scheme. These characteristics and decision trees are made available to facilitate alternative implementations. We also discovered many hundreds of lipids that are currently misclassified in the LIPID MAPS database, strongly underscoring the need for automated classification.

Availability and implementation

Source code and chemical characteristic lists as SMARTS search strings are available under an open-source license at https://www.github.com/princelab/lipid_classifier.",2014-10-29 +24876995,"The korean social life, health and aging project-health examination cohort.","The Korean Social Life, Health, and Aging Project (KSHAP) is a population-based longitudinal study of health determinants among elderly Koreans. The target population of the KSHAP are people aged 60 years or older and their spouses living in a rural community of Korea. A complete enumeration survey was conducted in the first wave of the KSHAP on 94.7% (814 of 860) of the target population between December 2011 and July 2012. The KSHAP-Health Examination (KSHAP-HE) cohort consists of 698 people who completed additional health examinations at a public health center (n=533) or at their home (n=165). Face-to-face questionnaires were used to interview participants on their demographics, social network characteristics, medical history, health behaviors, cognitive function, and depression symptoms. Health center examinations included anthropometric measures, body impedance analysis, resting blood pressure measurement, radial artery tonometry, bone densitometry, the timed up-and-go test, and fasting blood analysis. However, only anthropometric measures, blood pressure measurement, and non-fasting blood analysis were available for home health examinations. Collaboration is encouraged and access to the KSHAP baseline data will be available via the website of the Korean Social Science Data Archive (http://www.kossda.or.kr). The Korean Social Life, Health, and Aging Project (KSHAP) is a population-based longitudinal study of health determinants among elderly Koreans. The target population of the KSHAP are people aged 60 years or older and their spouses living in a rural community of Korea. A complete enumeration survey was conducted in the first wave of the KSHAP on 94.7% (814 of 860) of the target population between December 2011 and July 2012. The KSHAP-Health Examination (KSHAP-HE) cohort consists of 698 people who completed additional health examinations at a public health center (n=533) or at their home (n=165). Face-to-face questionnaires were used to interview participants on their demographics, social network characteristics, medical history, health behaviors, cognitive function, and depression symptoms. Health center examinations included anthropometric measures, body impedance analysis, resting blood pressure measurement, radial artery tonometry, bone densitometry, the timed up-and-go test, and fasting blood analysis. However, only anthropometric measures, blood pressure measurement, and non-fasting blood analysis were available for home health examinations. Collaboration is encouraged and access to the KSHAP baseline data will be available via the website of the Korean Social Science Data Archive (http://www.kossda.or.kr).",2014-05-13 +25411834,"Operating characteristics of residential care communities, by community bed size: United States, 2012.","In 2012, the majority of residential care communities had 4–25 beds, yet 71% of residents lived in communities with more than 50 beds. A lower percentage of communities with 4–25 beds were chain-affiliated, nonprofit, and in operation 10 years or more, compared with communities with 26–50 and more than 50 beds. Dementia-exclusive care or dementia care units were more common as community size increased. A higher percentage of communities with more than 50 beds screened for cognitive impairment and offered dementia-specific programming compared with communities with 4–25 and 26–50 beds. A higher percentage of communities with more than 50 beds screened for depression compared with communities with 4–25 beds. Compared with communities with 4–25 beds, a higher percentage of communities with 26–50 beds and more than 50 beds provided therapeutic, hospice, mental health, and dental services; but a lower percentage of communities with more than 50 beds provided skilled nursing services than did smaller communities. This report presents national estimates of residential care communities, using data from the first wave of NSLTCP. This brief profile of residential care communities provides useful information to policymakers, providers, researchers, and consumer advocates as they plan to meet the needs of an aging population. The findings also highlight the diversity of residential care communities across different sizes. Corresponding state estimates and their standard errors for the national figures in this data brief can be found on the NSLTCP website at http://www.cdc.gov/nchs/nsltcp/ nsltcp_products.htm. These national and state estimates establish a baseline for monitoring trends among residents living in residential care.",2014-11-01 +25352549,euL1db: the European database of L1HS retrotransposon insertions in humans.,"Retrotransposons account for almost half of our genome. They are mobile genetics elements-also known as jumping genes--but only the L1HS subfamily of Long Interspersed Nuclear Elements (LINEs) has retained the ability to jump autonomously in modern humans. Their mobilization in germline--but also some somatic tissues--contributes to human genetic diversity and to diseases, such as cancer. Here, we present euL1db, the European database of L1HS retrotransposon insertions in humans (available at http://euL1db.unice.fr). euL1db provides a curated and comprehensive summary of L1HS insertion polymorphisms identified in healthy or pathological human samples and published in peer-reviewed journals. A key feature of euL1db is its sample--wise organization. Hence L1HS insertion polymorphisms are connected to samples, individuals, families and clinical conditions. The current version of euL1db centralizes results obtained in 32 studies. It contains >900 samples, >140,000 sample-wise insertions and almost 9000 distinct merged insertions. euL1db will help understanding the link between L1 retrotransposon insertion polymorphisms and phenotype or disease.",2014-10-28 +25792551,QSLiMFinder: improved short linear motif prediction using specific query protein data.,"

Motivation

The sensitivity of de novo short linear motif (SLiM) prediction is limited by the number of patterns (the motif space) being assessed for enrichment. QSLiMFinder uses specific query protein information to restrict the motif space and thereby increase the sensitivity and specificity of predictions.

Results

QSLiMFinder was extensively benchmarked using known SLiM-containing proteins and simulated protein interaction datasets of real human proteins. Exploiting prior knowledge of a query protein likely to be involved in a SLiM-mediated interaction increased the proportion of true positives correctly returned and reduced the proportion of datasets returning a false positive prediction. The biggest improvement was seen if a short region of the query protein flanking the interaction site was known.

Availability and implementation

All the tools and data used in this study, including QSLiMFinder and the SLiMBench benchmarking software, are freely available under a GNU license as part of SLiMSuite, at: http://bioware.soton.ac.uk.",2015-03-19 +23445565,Fast probabilistic file fingerprinting for big data.,"

Background

Biological data acquisition is raising new challenges, both in data analysis and handling. Not only is it proving hard to analyze the data at the rate it is generated today, but simply reading and transferring data files can be prohibitively slow due to their size. This primarily concerns logistics within and between data centers, but is also important for workstation users in the analysis phase. Common usage patterns, such as comparing and transferring files, are proving computationally expensive and are tying down shared resources.

Results

We present an efficient method for calculating file uniqueness for large scientific data files, that takes less computational effort than existing techniques. This method, called Probabilistic Fast File Fingerprinting (PFFF), exploits the variation present in biological data and computes file fingerprints by sampling randomly from the file instead of reading it in full. Consequently, it has a flat performance characteristic, correlated with data variation rather than file size. We demonstrate that probabilistic fingerprinting can be as reliable as existing hashing techniques, with provably negligible risk of collisions. We measure the performance of the algorithm on a number of data storage and access technologies, identifying its strengths as well as limitations.

Conclusions

Probabilistic fingerprinting may significantly reduce the use of computational resources when comparing very large files. Utilisation of probabilistic fingerprinting techniques can increase the speed of common file-related workflows, both in the data center and for workbench analysis. The implementation of the algorithm is available as an open-source tool named pfff, as a command-line tool as well as a C library. The tool can be downloaded from http://biit.cs.ut.ee/pfff.",2013-02-15 +23782512,SeqSIMLA: a sequence and phenotype simulation tool for complex disease studies.,"

Background

Association studies based on next-generation sequencing (NGS) technology have become popular, and statistical association tests for NGS data have been developed rapidly. A flexible tool for simulating sequence data in either unrelated case-control or family samples with different disease and quantitative trait models would be useful for evaluating the statistical power for planning a study design and for comparing power among statistical methods based on NGS data.

Results

We developed a simulation tool, SeqSIMLA, which can simulate sequence data with user-specified disease and quantitative trait models. We implemented two disease models, in which the user can flexibly specify the number of disease loci, effect sizes or population attributable risk, disease prevalence, and risk or protective loci. We also implemented a quantitative trait model, in which the user can specify the number of quantitative trait loci (QTL), proportions of variance explained by the QTL, and genetic models. We compiled recombination rates from the HapMap project so that genomic structures similar to the real data can be simulated.

Conclusions

SeqSIMLA can efficiently simulate sequence data with disease or quantitative trait models specified by the user. SeqSIMLA will be very useful for evaluating statistical properties for new study designs and new statistical methods using NGS. SeqSIMLA can be downloaded for free at http://seqsimla.sourceforge.net.",2013-06-20 +26040700,SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations.,"SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.",2015-06-03 +26042154,GAML: genome assembly by maximum likelihood.,"BACKGROUND:Resolution of repeats and scaffolding of shorter contigs are critical parts of genome assembly. Modern assemblers usually perform such steps by heuristics, often tailored to a particular technology for producing paired or long reads. RESULTS:We propose a new framework that allows systematic combination of diverse sequencing datasets into a single assembly. We achieve this by searching for an assembly with the maximum likelihood in a probabilistic model capturing error rate, insert lengths, and other characteristics of the sequencing technology used to produce each dataset. We have implemented a prototype genome assembler GAML that can use any combination of insert sizes with Illumina or 454 reads, as well as PacBio reads. Our experiments show that we can assemble short genomes with N50 sizes and error rates comparable to ALLPATHS-LG or Cerulean. While ALLPATHS-LG and Cerulean require each a specific combination of datasets, GAML works on any combination. CONCLUSIONS:We have introduced a new probabilistic approach to genome assembly and demonstrated that this approach can lead to superior results when used to combine diverse set of datasets from different sequencing technologies. Data and software is available at http://compbio.fmph.uniba.sk/gaml.",2015-06-03 +26382192,Component-wise gradient boosting and false discovery control in survival analysis with high-dimensional covariates.,"

Motivation

Technological advances that allow routine identification of high-dimensional risk factors have led to high demand for statistical techniques that enable full utilization of these rich sources of information for genetics studies. Variable selection for censored outcome data as well as control of false discoveries (i.e. inclusion of irrelevant variables) in the presence of high-dimensional predictors present serious challenges. This article develops a computationally feasible method based on boosting and stability selection. Specifically, we modified the component-wise gradient boosting to improve the computational feasibility and introduced random permutation in stability selection for controlling false discoveries.

Results

We have proposed a high-dimensional variable selection method by incorporating stability selection to control false discovery. Comparisons between the proposed method and the commonly used univariate and Lasso approaches for variable selection reveal that the proposed method yields fewer false discoveries. The proposed method is applied to study the associations of 2339 common single-nucleotide polymorphisms (SNPs) with overall survival among cutaneous melanoma (CM) patients. The results have confirmed that BRCA2 pathway SNPs are likely to be associated with overall survival, as reported by previous literature. Moreover, we have identified several new Fanconi anemia (FA) pathway SNPs that are likely to modulate survival of CM patients.

Availability and implementation

The related source code and documents are freely available at https://sites.google.com/site/bestumich/issues.

Contact

yili@umich.edu.",2015-09-17 +26048600,Quantitative frame analysis and the annotation of GC-rich (and other) prokaryotic genomes. An application to Anaeromyxobacter dehalogenans.,"

Motivation

Graphical representations of contrasts in GC usage among codon frame positions (frame analysis) provide evidence of genes missing from the annotations of prokaryotic genomes of high GC content but the qualitative approach of visual frame analysis prevents its applicability on a genomic scale.

Results

We developed two quantitative methods for the identification and statistical characterization in sequence regions of three-base periodicity (hits) associated with open reading frame structures. The methods were implemented in the N-Profile Analysis Computational Tool (NPACT), which highlights in graphical representations inconsistencies between newly identified ORFs and pre-existing annotations of coding-regions. We applied the NPACT procedures to two recently annotated strains of the deltaproteobacterium Anaeromyxobacter dehalogenans, identifying in both genomes numerous conserved ORFs not included in the published annotation of coding regions.

Availability and implementation

NPACT is available as a web-based service and for download at http://genome.ufl.edu/npact.

Contact

lucianob@ufl.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-04 +21278187,Identity-by-descent filtering of exome sequence data for disease-gene identification in autosomal recessive disorders.,"

Motivation

Next-generation sequencing and exome-capture technologies are currently revolutionizing the way geneticists screen for disease-causing mutations in rare Mendelian disorders. However, the identification of causal mutations is challenging due to the sheer number of variants that are identified in individual exomes. Although databases such as dbSNP or HapMap can be used to reduce the plethora of candidate genes by filtering out common variants, the remaining set of genes still remains on the order of dozens.

Results

Our algorithm uses a non-homogeneous hidden Markov model that employs local recombination rates to identify chromosomal regions that are identical by descent (IBD = 2) in children of consanguineous or non-consanguineous parents solely based on genotype data of siblings derived from high-throughput sequencing platforms. Using simulated and real exome sequence data, we show that our algorithm is able to reduce the search space for the causative disease gene to a fifth or a tenth of the entire exome.

Availability

An R script and an accompanying tutorial are available at http://compbio.charite.de/index.php/ibd2.html.",2011-01-28 +24389656,Socrates: identification of genomic rearrangements in tumour genomes by re-aligning soft clipped reads.,"

Motivation

Methods for detecting somatic genome rearrangements in tumours using next-generation sequencing are vital in cancer genomics. Available algorithms use one or more sources of evidence, such as read depth, paired-end reads or split reads to predict structural variants. However, the problem remains challenging due to the significant computational burden and high false-positive or false-negative rates.

Results

In this article, we present Socrates (SOft Clip re-alignment To idEntify Structural variants), a highly efficient and effective method for detecting genomic rearrangements in tumours that uses only split-read data. Socrates has single-nucleotide resolution, identifies micro-homologies and untemplated sequence at break points, has high sensitivity and high specificity and takes advantage of parallelism for efficient use of resources. We demonstrate using simulated and real data that Socrates performs well compared with a number of existing structural variant detection tools.

Availability and implementation

Socrates is released as open source and available from http://bioinf.wehi.edu.au/socrates CONTACT: papenfuss@wehi.edu.au Supplementary information: Supplementary data are available at Bioinformatics online.",2014-01-02 +26254489,TIPR: transcription initiation pattern recognition on a genome scale.,"

Motivation

The computational identification of gene transcription start sites (TSSs) can provide insights into the regulation and function of genes without performing expensive experiments, particularly in organisms with incomplete annotations. High-resolution general-purpose TSS prediction remains a challenging problem, with little recent progress on the identification and differentiation of TSSs which are arranged in different spatial patterns along the chromosome.

Results

In this work, we present the Transcription Initiation Pattern Recognizer (TIPR), a sequence-based machine learning model that identifies TSSs with high accuracy and resolution for multiple spatial distribution patterns along the genome, including broadly distributed TSS patterns that have previously been difficult to characterize. TIPR predicts not only the locations of TSSs but also the expected spatial initiation pattern each TSS will form along the chromosome-a novel capability for TSS prediction algorithms. As spatial initiation patterns are associated with spatiotemporal expression patterns and gene function, this capability has the potential to improve gene annotations and our understanding of the regulation of transcription initiation. The high nucleotide resolution of this model locates TSSs within 10 nucleotides or less on average.

Availability and implementation

Model source code is made available online at http://megraw.cgrb.oregonstate.edu/software/TIPR/.

Contact

megrawm@science.oregonstate.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-08 +24886210,Network topology-based detection of differential gene regulation and regulatory switches in cell metabolism and signaling.,"

Background

Common approaches to pathway analysis treat pathways merely as lists of genes disregarding their topological structures, that is, ignoring the genes' interactions on which a pathway's cellular function depends. In contrast, PathWave has been developed for the analysis of high-throughput gene expression data that explicitly takes the topology of networks into account to identify both global dysregulation of and localized (switch-like) regulatory shifts within metabolic and signaling pathways. For this purpose, it applies adjusted wavelet transforms on optimized 2D grid representations of curated pathway maps.

Results

Here, we present the new version of PathWave with several substantial improvements including a new method for optimally mapping pathway networks unto compact 2D lattice grids, a more flexible and user-friendly interface, and pre-arranged 2D grid representations. These pathway representations are assembled for several species now comprising H. sapiens, M. musculus, D. melanogaster, D. rerio, C. elegans, and E. coli. We show that PathWave is more sensitive than common approaches and apply it to RNA-seq expression data, identifying crucial metabolic pathways in lung adenocarcinoma, as well as microarray expression data, identifying pathways involved in longevity of Drosophila.

Conclusions

PathWave is a generic method for pathway analysis complementing established tools like GSEA, and the update comprises efficient new features. In contrast to the tested commonly applied approaches which do not take network topology into account, PathWave enables identifying pathways that are either known be involved in or very likely associated with such diverse conditions as human lung cancer or aging of D. melanogaster. The PathWave R package is freely available at http://www.ichip.de/software/pathwave.html.",2014-05-16 +23956305,A Turing test for artificial expression data.,"

Motivation

The lack of reliable, comprehensive gold standards complicates the development of many bioinformatics tools, particularly for the analysis of expression data and biological networks. Simulation approaches can provide provisional gold standards, such as regulatory networks, for the assessment of network inference methods. However, this just defers the problem, as it is difficult to assess how closely simulators emulate the properties of real data.

Results

In analogy to Turing's test discriminating humans and computers based on responses to questions, we systematically compare real and artificial systems based on their gene expression output. Different expression data analysis techniques such as clustering are applied to both types of datasets. We define and extract distributions of properties from the results, for instance, distributions of cluster quality measures or transcription factor activity patterns. Distributions of properties are represented as histograms to enable the comparison of artificial and real datasets. We examine three frequently used simulators that generate expression data from parameterized regulatory networks. We identify features distinguishing real from artificial datasets that suggest how simulators could be adapted to better emulate real datasets and, thus, become more suitable for the evaluation of data analysis tools.

Availability

See http://www2.bio.ifi.lmu.de/∼kueffner/attfad/ and the supplement for precomputed analyses; other compendia can be analyzed via the CRAN package attfad. The full datasets can be obtained from http://www2.bio.ifi.lmu.de/∼kueffner/attfad/data.tar.gz.",2013-08-16 +22753780,AutoBind: automatic extraction of protein-ligand-binding affinity data from biological literature.,"

Motivation

Determination of the binding affinity of a protein-ligand complex is important to quantitatively specify whether a particular small molecule will bind to the target protein. Besides, collection of comprehensive datasets for protein-ligand complexes and their corresponding binding affinities is crucial in developing accurate scoring functions for the prediction of the binding affinities of previously unknown protein-ligand complexes. In the past decades, several databases of protein-ligand-binding affinities have been created via visual extraction from literature. However, such approaches are time-consuming and most of these databases are updated only a few times per year. Hence, there is an immediate demand for an automatic extraction method with high precision for binding affinity collection.

Result

We have created a new database of protein-ligand-binding affinity data, AutoBind, based on automatic information retrieval. We first compiled a collection of 1586 articles where the binding affinities have been marked manually. Based on this annotated collection, we designed four sentence patterns that are used to scan full-text articles as well as a scoring function to rank the sentences that match our patterns. The proposed sentence patterns can effectively identify the binding affinities in full-text articles. Our assessment shows that AutoBind achieved 84.22% precision and 79.07% recall on the testing corpus. Currently, 13 616 protein-ligand complexes and the corresponding binding affinities have been deposited in AutoBind from 17 221 articles.

Availability

AutoBind is automatically updated on a monthly basis, and it is freely available at http://autobind.csie.ncku.edu.tw/ and http://autobind.mc.ntu.edu.tw/. All of the deposited binding affinities have been refined and approved manually before being released.",2012-07-02 +24844244,MELK is an oncogenic kinase essential for mitotic progression in basal-like breast cancer cells.,"Despite marked advances in breast cancer therapy, basal-like breast cancer (BBC), an aggressive subtype of breast cancer usually lacking estrogen and progesterone receptors, remains difficult to treat. In this study, we report the identification of MELK as a novel oncogenic kinase from an in vivo tumorigenesis screen using a kinome-wide open reading frames (ORFs) library. Analysis of clinical data reveals a high level of MELK overexpression in BBC, a feature that is largely dependent on FoxM1, a master mitotic transcription factor that is also found to be highly overexpressed in BBC. Ablation of MELK selectively impairs proliferation of basal-like, but not luminal breast cancer cells both in vitro and in vivo. Mechanistically, depletion of MELK in BBC cells induces caspase-dependent cell death, preceded by defective mitosis. Finally, we find that Melk is not required for mouse development and physiology. Together, these data indicate that MELK is a normally non-essential kinase, but is critical for BBC and thus represents a promising selective therapeutic target for the most aggressive subtype of breast cancer.DOI: http://dx.doi.org/10.7554/eLife.01763.001.",2014-05-20 +25595311,VQone MATLAB toolbox: A graphical experiment builder for image and video quality evaluations: VQone MATLAB toolbox.,"This article presents VQone, a graphical experiment builder, written as a MATLAB toolbox, developed for image and video quality ratings. VQone contains the main elements needed for the subjective image and video quality rating process. This includes building and conducting experiments and data analysis. All functions can be controlled through graphical user interfaces. The experiment builder includes many standardized image and video quality rating methods. Moreover, it enables the creation of new methods or modified versions from standard methods. VQone is distributed free of charge under the terms of the GNU general public license and allows code modifications to be made so that the program's functions can be adjusted according to a user's requirements. VQone is available for download from the project page (http://www.helsinki.fi/psychology/groups/visualcognition/).",2015-01-17 +24990767,A spatial simulation approach to account for protein structure when identifying non-random somatic mutations.,"

Background

Current research suggests that a small set of ""driver"" mutations are responsible for tumorigenesis while a larger body of ""passenger"" mutations occur in the tumor but do not progress the disease. Due to recent pharmacological successes in treating cancers caused by driver mutations, a variety of methodologies that attempt to identify such mutations have been developed. Based on the hypothesis that driver mutations tend to cluster in key regions of the protein, the development of cluster identification algorithms has become critical.

Results

We have developed a novel methodology, SpacePAC (Spatial Protein Amino acid Clustering), that identifies mutational clustering by considering the protein tertiary structure directly in 3D space. By combining the mutational data in the Catalogue of Somatic Mutations in Cancer (COSMIC) and the spatial information in the Protein Data Bank (PDB), SpacePAC is able to identify novel mutation clusters in many proteins such as FGFR3 and CHRM2. In addition, SpacePAC is better able to localize the most significant mutational hotspots as demonstrated in the cases of BRAF and ALK. The R package is available on Bioconductor at: http://www.bioconductor.org/packages/release/bioc/html/SpacePAC.html.

Conclusion

SpacePAC adds a valuable tool to the identification of mutational clusters while considering protein tertiary structure.",2014-07-03 +26249808,ARResT/AssignSubsets: a novel application for robust subclassification of chronic lymphocytic leukemia based on B cell receptor IG stereotypy.,"

Motivation

An ever-increasing body of evidence supports the importance of B cell receptor immunoglobulin (BcR IG) sequence restriction, alias stereotypy, in chronic lymphocytic leukemia (CLL). This phenomenon accounts for ∼30% of studied cases, one in eight of which belong to major subsets, and extends beyond restricted sequence patterns to shared biologic and clinical characteristics and, generally, outcome. Thus, the robust assignment of new cases to major CLL subsets is a critical, and yet unmet, requirement.

Results

We introduce a novel application, ARResT/AssignSubsets, which enables the robust assignment of BcR IG sequences from CLL patients to major stereotyped subsets. ARResT/AssignSubsets uniquely combines expert immunogenetic sequence annotation from IMGT/V-QUEST with curation to safeguard quality, statistical modeling of sequence features from more than 7500 CLL patients, and results from multiple perspectives to allow for both objective and subjective assessment. We validated our approach on the learning set, and evaluated its real-world applicability on a new representative dataset comprising 459 sequences from a single institution.

Availability and implementation

ARResT/AssignSubsets is freely available on the web at http://bat.infspire.org/arrest/assignsubsets/

Contact

nikos.darzentas@gmail.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-06 +26254435,Accurate disulfide-bonding network predictions improve ab initio structure prediction of cysteine-rich proteins.,"

Motivation

Cysteine-rich proteins cover many important families in nature but there are currently no methods specifically designed for modeling the structure of these proteins. The accuracy of disulfide connectivity pattern prediction, particularly for the proteins of higher-order connections, e.g., >3 bonds, is too low to effectively assist structure assembly simulations.

Results

We propose a new hierarchical order reduction protocol called Cyscon for disulfide-bonding prediction. The most confident disulfide bonds are first identified and bonding prediction is then focused on the remaining cysteine residues based on SVR training. Compared with purely machine learning-based approaches, Cyscon improved the average accuracy of connectivity pattern prediction by 21.9%. For proteins with more than 5 disulfide bonds, Cyscon improved the accuracy by 585% on the benchmark set of PDBCYS. When applied to 158 non-redundant cysteine-rich proteins, Cyscon predictions helped increase (or decrease) the TM-score (or RMSD) of the ab initio QUARK modeling by 12.1% (or 14.4%). This result demonstrates a new avenue to improve the ab initio structure modeling for cysteine-rich proteins.

Availability and implementation

http://www.csbio.sjtu.edu.cn/bioinf/Cyscon/

Contact

zhng@umich.edu or hbshen@sjtu.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-07 +25516281,Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2.,"In comparative high-throughput sequencing assays, a fundamental task is the analysis of count data, such as read counts per gene in RNA-seq, for evidence of systematic changes across experimental conditions. Small replicate numbers, discreteness, large dynamic range and the presence of outliers require a suitable statistical approach. We present DESeq2, a method for differential analysis of count data, using shrinkage estimation for dispersions and fold changes to improve stability and interpretability of estimates. This enables a more quantitative analysis focused on the strength rather than the mere presence of differential expression. The DESeq2 package is available at http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html webcite.",2014-01-01 +24339933,Stability-based comparison of class discovery methods for DNA copy number profiles.,"

Motivation

Array-CGH can be used to determine DNA copy number, imbalances in which are a fundamental factor in the genesis and progression of tumors. The discovery of classes with similar patterns of array-CGH profiles therefore adds to our understanding of cancer and the treatment of patients. Various input data representations for array-CGH, dissimilarity measures between tumor samples and clustering algorithms may be used for this purpose. The choice between procedures is often difficult. An evaluation procedure is therefore required to select the best class discovery method (combination of one input data representation, one dissimilarity measure and one clustering algorithm) for array-CGH. Robustness of the resulting classes is a common requirement, but no stability-based comparison of class discovery methods for array-CGH profiles has ever been reported.

Results

We applied several class discovery methods and evaluated the stability of their solutions, with a modified version of Bertoni's [Formula: see text]-based test [1]. Our version relaxes the assumption of independency required by original Bertoni's [Formula: see text]-based test. We conclude that Minimal Regions of alteration (a concept introduced by [2]) for input data representation, sim [3] or agree [4] for dissimilarity measure and the use of average group distance in the clustering algorithm produce the most robust classes of array-CGH profiles.

Availability

The software is available from http://bioinfo.curie.fr/projects/cgh-clustering. It has also been partly integrated into ""Visualization and analysis of array-CGH""(VAMP)[5]. The data sets used are publicly available from ACTuDB [6].",2013-12-05 +21828087,"OrganismTagger: detection, normalization and grounding of organism entities in biomedical documents.","

Motivation

Semantic tagging of organism mentions in full-text articles is an important part of literature mining and semantic enrichment solutions. Tagged organism mentions also play a pivotal role in disambiguating other entities in a text, such as proteins. A high-precision organism tagging system must be able to detect the numerous forms of organism mentions, including common names as well as the traditional taxonomic groups: genus, species and strains. In addition, such a system must resolve abbreviations and acronyms, assign the scientific name and if possible link the detected mention to the NCBI Taxonomy database for further semantic queries and literature navigation.

Results

We present the OrganismTagger, a hybrid rule-based/machine learning system to extract organism mentions from the literature. It includes tools for automatically generating lexical and ontological resources from a copy of the NCBI Taxonomy database, thereby facilitating system updates by end users. Its novel ontology-based resources can also be reused in other semantic mining and linked data tasks. Each detected organism mention is normalized to a canonical name through the resolution of acronyms and abbreviations and subsequently grounded with an NCBI Taxonomy database ID. In particular, our system combines a novel machine-learning approach with rule-based and lexical methods for detecting strain mentions in documents. On our manually annotated OT corpus, the OrganismTagger achieves a precision of 95%, a recall of 94% and a grounding accuracy of 97.5%. On the manually annotated corpus of Linnaeus-100, the results show a precision of 99%, recall of 97% and grounding accuracy of 97.4%.

Availability

The OrganismTagger, including supporting tools, resources, training data and manual annotations, as well as end user and developer documentation, is freely available under an open-source license at http://www.semanticsoftware.info/organism-tagger.

Contact

witte@semanticsoftware.info.",2011-08-09 +22761802,SECOM: a novel hash seed and community detection based-approach for genome-scale protein domain identification.,"With rapid advances in the development of DNA sequencing technologies, a plethora of high-throughput genome and proteome data from a diverse spectrum of organisms have been generated. The functional annotation and evolutionary history of proteins are usually inferred from domains predicted from the genome sequences. Traditional database-based domain prediction methods cannot identify novel domains, however, and alignment-based methods, which look for recurring segments in the proteome, are computationally demanding. Here, we propose a novel genome-wide domain prediction method, SECOM. Instead of conducting all-against-all sequence alignment, SECOM first indexes all the proteins in the genome by using a hash seed function. Local similarity can thus be detected and encoded into a graph structure, in which each node represents a protein sequence and each edge weight represents the shared hash seeds between the two nodes. SECOM then formulates the domain prediction problem as an overlapping community-finding problem in this graph. A backward graph percolation algorithm that efficiently identifies the domains is proposed. We tested SECOM on five recently sequenced genomes of aquatic animals. Our tests demonstrated that SECOM was able to identify most of the known domains identified by InterProScan. When compared with the alignment-based method, SECOM showed higher sensitivity in detecting putative novel domains, while it was also three orders of magnitude faster. For example, SECOM was able to predict a novel sponge-specific domain in nucleoside-triphosphatase (NTPases). Furthermore, SECOM discovered two novel domains, likely of bacterial origin, that are taxonomically restricted to sea anemone and hydra. SECOM is an open-source program and available at http://sfb.kaust.edu.sa/Pages/Software.aspx.",2012-06-28 +25338682,Unveiling transcription factor regulation and differential co-expression genes in Duchenne muscular dystrophy.,"

Background

Gene expression analysis is powerful for investigating the underlying mechanisms of Duchenne muscular dystrophy (DMD). Previous studies mainly neglected co-expression or transcription factor (TF) information. Here we integrated TF information into differential co-expression analysis (DCEA) to explore new understandings of DMD pathogenesis.

Methods

Using two microarray datasets from Gene Expression Omnibus (GEO) database, we firstly detected differentially expressed genes (DEGs) and pathways enriched with DEGs. Secondly, we constructed differentially regulated networks to integrate the TF-to-target information and the differential co-expression genes.

Results

A total of 454 DEGs were detected and both KEGG pathway and ingenuity pathway analysis revealed that pathways enriched with aberrantly regulated genes are mostly involved in the immune response processes. DCEA results generated 610 pairs of DEGs regulated by at least one common TF, including 78 pairs of co-expressed DEGs. A network was constructed to illustrate their relationships and a subnetwork for DMD related molecules was constructed to show genes and TFs that may play important roles in the secondary changes of DMD. Among the DEGs which shared TFs with DMD, six genes were co-expressed with DMD, including ATP1A2, C1QB, MYOF, SAT1, TRIP10, and IFI6.

Conclusion

Our results may provide a new understanding of DMD and contribute potential targets for future therapeutic tests.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_210.",2014-10-23 +27226377,Critical Role of the PA-X C-Terminal Domain of Influenza A Virus in Its Subcellular Localization and Shutoff Activity.,"

Unlabelled

PA-X is a recently identified influenza virus protein that is composed of the PA N-terminal 191 amino acids and unique C-terminal 41 or 61 residues. We and others showed that PA-X has a strong ability to suppress host protein synthesis via host mRNA decay, which is mediated by endonuclease activity in its N-terminal domain (B. W. Jagger, H. M. Wise, J. C. Kash, K. A. Walters, N. M. Wills, Y. L. Xiao, R. L. Dunfee, L. M. Schwartzman, A. Ozinsky, G. L. Bell, R. M. Dalton, A. Lo, S. Efstathiou, J. F. Atkins, A. E. Firth, J. K. Taubenberger, and P. Digard, 2012, Science 337:199-204, http://dx.doi.org/10.1126/science.1222213, and E. A. Desmet, K. A. Bussey, R. Stone, and T. Takimoto, 2013, J Virol 87:3108-3118, http://dx.doi.org/10.1128/JVI.02826-12). However, the mechanism of host mRNA degradation, especially where and how PA-X targets mRNAs, has not been analyzed. In this study, we determined the localization of PA-X and the role of the C-terminal unique region in shutoff activity. Quantitative subcellular localization analysis revealed that PA-X was located equally in both cytoplasm and nucleus. By characterizing a series of PA-X C-terminal deletion mutants, we found that the first 9 amino acids were sufficient for nuclear localization, but an additional 6 residues were required to induce the maximum shutoff activity observed with intact PA-X. Importantly, forced nuclear localization of the PA-X C-terminal deletion mutant enhanced shutoff activity, highlighting the ability of nuclear PA-X to degrade host mRNAs more efficiently. However, PA-X also inhibited luciferase expression from transfected mRNAs synthesized in vitro, suggesting that PA-X also degrades mRNAs in the cytoplasm. Among the basic amino acids in the PA-X C-terminal region, 3 residues, 195K, 198K, and 199R, were identified as key residues for inducing host shutoff and nuclear localization. Overall, our data indicate a critical role for the 15 residues in the PA-X C-terminal domain in degrading mRNAs in both the cytoplasm and nucleus.

Importance

Influenza A viruses express PA-X proteins to suppress global host gene expression, including host antiviral genes, to allow efficient viral replication in infected cells. However, little is known about how PA-X induces host shutoff. In this study, we showed that PA-X localized equally in both the cytoplasm and nucleus of the cells, but the nuclear localization of PA-X mediated by its C-terminal region has a significant impact on shutoff activity. Three basic residues at the C-terminal region play a critical role in nuclear localization, but additional basic residues were required for maximum shutoff activity. Our findings indicate that PA-X targets and degrades mRNAs in both the nucleus and cytoplasm, and that the first 15 residues of the PA-X unique C-terminal region play a critical role in shutoff activity.",2016-07-27 +24489366,Mobyle SNAP Workbench: a web-based analysis portal for population genetics and evolutionary genomics.,"

Summary

Previously we developed the stand-alone SNAP Workbench toolkit that integrated a wide array of bioinformatics tools for phylogenetic and population genetic analyses. We have now developed a web-based portal front-end, using the Mobyle portal framework, which executes all of the programs available in the stand-alone SNAP Workbench toolkit on a high-performance Linux cluster. Additionally, we have expanded the selection of programs to over 189 tools, including population genetic, genome assembly and analysis tools, as well as metagenomic and large-scale phylogenetic analyses. The Mobyle SNAP Workbench web portal allows end users to (i) execute and manage otherwise complex command-line programs, (ii) launch multiple exploratory analyses of parameter-rich and computationally intensive methods and (iii) track the sequence of steps and parameters that were used to perform a specific analysis. Analysis pipelines or workflows for population genetic, metagenomic and genome assembly provide automation of data conversion, analysis and graphical visualization for biological inference.

Availability

The Mobyle SNAP Workbench portal is freely available online at http://snap.hpc.ncsu.edu/. The XMLs can be downloaded at http://carbonelab.org/system/files/snap_xmls.tgz. Each XML provides links to help files, online documentation and sample data.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-01-30 +23134636,Pathway Distiller - multisource biological pathway consolidation.,"

Background

One method to understand and evaluate an experiment that produces a large set of genes, such as a gene expression microarray analysis, is to identify overrepresentation or enrichment for biological pathways. Because pathways are able to functionally describe the set of genes, much effort has been made to collect curated biological pathways into publicly accessible databases. When combining disparate databases, highly related or redundant pathways exist, making their consolidation into pathway concepts essential. This will facilitate unbiased, comprehensive yet streamlined analysis of experiments that result in large gene sets.

Methods

After gene set enrichment finds representative pathways for large gene sets, pathways are consolidated into representative pathway concepts. Three complementary, but different methods of pathway consolidation are explored. Enrichment Consolidation combines the set of the pathways enriched for the signature gene list through iterative combining of enriched pathways with other pathways with similar signature gene sets; Weighted Consolidation utilizes a Protein-Protein Interaction network based gene-weighting approach that finds clusters of both enriched and non-enriched pathways limited to the experiments' resultant gene list; and finally the de novo Consolidation method uses several measurements of pathway similarity, that finds static pathway clusters independent of any given experiment.

Results

We demonstrate that the three consolidation methods provide unified yet different functional insights of a resultant gene set derived from a genome-wide profiling experiment. Results from the methods are presented, demonstrating their applications in biological studies and comparing with a pathway web-based framework that also combines several pathway databases. Additionally a web-based consolidation framework that encompasses all three methods discussed in this paper, Pathway Distiller (http://cbbiweb.uthscsa.edu/PathwayDistiller), is established to allow researchers access to the methods and example microarray data described in this manuscript, and the ability to analyze their own gene list by using our unique consolidation methods.

Conclusions

By combining several pathway systems, implementing different, but complementary pathway consolidation methods, and providing a user-friendly web-accessible tool, we have enabled users the ability to extract functional explanations of their genome wide experiments.",2012-10-26 +22434846,The importance of identifying alternative splicing in vertebrate genome annotation.,"While alternative splicing (AS) can potentially expand the functional repertoire of vertebrate genomes, relatively few AS transcripts have been experimentally characterized. We describe our detailed manual annotation of vertebrate genomes, which is generating a publicly available geneset rich in AS. In order to achieve this we have adopted a highly sensitive approach to annotating gene models supported by correctly mapped, canonically spliced transcriptional evidence combined with a highly cautious approach to adding unsupported extensions to models and making decisions on their functional potential. We use information about the predicted functional potential and structural properties of every AS transcript annotated at a protein-coding or non-coding locus to place them into one of eleven subclasses. We describe the incorporation of new sequencing and proteomics technologies into our annotation pipelines, which are used to identify and validate AS. Combining all data sources has led to the production of a rich geneset containing an average of 6.3 AS transcripts for every human multi-exon protein-coding gene. The datasets produced have proved very useful in providing context to studies investigating the functional potential of genes and the effect of variation may have on gene structure and function. DATABASE URL: http://www.ensembl.org/index.html, http://vega.sanger.ac.uk/index.html.",2012-03-20 +22419843,ESMP: A high-throughput computational pipeline for mining SSR markers from ESTs.,"

Unlabelled

With the advent of high-throughput sequencing technology, sequences from many genomes are being deposited to public databases at a brisk rate. Open access to large amount of expressed sequence tag (EST) data in the public databases has provided a powerful platform for simple sequence repeat (SSR) development in species where sequence information is not available. SSRs are markers of choice for their high reproducibility, abundant polymorphism and high inter-specific transferability. The mining of SSRs from ESTs requires different high-throughput computational tools that need to be executed individually which are computationally intensive and time consuming. To reduce the time lag and to streamline the cumbersome process of SSR mining from ESTs, we have developed a user-friendly, web-based EST-SSR pipeline ""EST-SSR-MARKER PIPELINE (ESMP)"". This pipeline integrates EST pre-processing, clustering, assembly and subsequently mining of SSRs from assembled EST sequences. The mining of SSRs from ESTs provides valuable information on the abundance of SSRs in ESTs and will facilitate the development of markers for genetic analysis and related applications such as marker-assisted breeding.

Availability

The database is available for free at http://bioinfo.aau.ac.in/ESMP.",2012-02-28 +25336619,PAIDB v2.0: exploration and analysis of pathogenicity and resistance islands.,"Pathogenicity is a complex multifactorial process confounded by the concerted activity of genetic regions associated with virulence and/or resistance determinants. Pathogenicity islands (PAIs) and resistance islands (REIs) are key to the evolution of pathogens and appear to play complimentary roles in the process of bacterial infection. While PAIs promote disease development, REIs give a fitness advantage to the host against multiple antimicrobial agents. The Pathogenicity Island Database (PAIDB, http://www.paidb.re.kr) has been the only database dedicated to providing comprehensive information on all reported PAIs and candidate PAIs in prokaryotic genomes. In this study, we present PAIDB v2.0, whose functionality is extended to incorporate REIs. PAIDB v2.0 contains 223 types of PAIs with 1331 accessions, and 88 types of REIs with 108 accessions. With an improved detection scheme, 2673 prokaryotic genomes were analyzed to locate candidate PAIs and REIs. With additional quantitative and qualitative advancements in database content and detection accuracy, PAIDB will continue to facilitate pathogenomic studies of both pathogenic and non-pathogenic organisms.",2014-10-21 +24555116,"JSim, an open-source modeling system for data analysis.","JSim is a simulation system for developing models, designing experiments, and evaluating hypotheses on physiological and pharmacological systems through the testing of model solutions against data. It is designed for interactive, iterative manipulation of the model code, handling of multiple data sets and parameter sets, and for making comparisons among different models running simultaneously or separately. Interactive use is supported by a large collection of graphical user interfaces for model writing and compilation diagnostics, defining input functions, model runs, selection of algorithms solving ordinary and partial differential equations, run-time multidimensional graphics, parameter optimization (8 methods), sensitivity analysis, and Monte Carlo simulation for defining confidence ranges. JSim uses Mathematical Modeling Language (MML) a declarative syntax specifying algebraic and differential equations. Imperative constructs written in other languages (MATLAB, FORTRAN, C++, etc.) are accessed through procedure calls. MML syntax is simple, basically defining the parameters and variables, then writing the equations in a straightforward, easily read and understood mathematical form. This makes JSim good for teaching modeling as well as for model analysis for research.   For high throughput applications, JSim can be run as a batch job.  JSim can automatically translate models from the repositories for Systems Biology Markup Language (SBML) and CellML models. Stochastic modeling is supported. MML supports assigning physical units to constants and variables and automates checking dimensional balance as the first step in verification testing. Automatic unit scaling follows, e.g. seconds to minutes, if needed. The JSim Project File sets a standard for reproducible modeling analysis: it includes in one file everything for analyzing a set of experiments: the data, the models, the data fitting, and evaluation of parameter confidence ranges. JSim is open source; it and about 400 human readable open source physiological/biophysical models are available at http://www.physiome.org/jsim/.",2013-12-30 +26099263,Forna (force-directed RNA): Simple and effective online RNA secondary structure diagrams.,"

Motivation

The secondary structure of RNA is integral to the variety of functions it carries out in the cell and its depiction allows researchers to develop hypotheses about which nucleotides and base pairs are functionally relevant. Current approaches to visualizing secondary structure provide an adequate platform for the conversion of static text-based representations to 2D images, but are limited in their offer of interactivity as well as their ability to display larger structures, multiple structures and pseudoknotted structures.

Results

In this article, we present forna, a web-based tool for displaying RNA secondary structure which allows users to easily convert sequences and secondary structures to clean, concise and customizable visualizations. It supports, among other features, the simultaneous visualization of multiple structures, the display of pseudoknotted structures, the interactive editing of the displayed structures, and the automatic generation of secondary structure diagrams from PDB files. It requires no software installation apart from a modern web browser.

Availability and implementation

The web interface of forna is available at http://rna.tbi.univie.ac.at/forna while the source code is available on github at www.github.com/pkerpedjiev/forna.

Contact

pkerp@tbi.univie.ac.at

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-22 +22021380,CAPS-DB: a structural classification of helix-capping motifs.,"The regions of the polypeptide chain immediately preceding or following an α-helix are known as Nt- and Ct cappings, respectively. Cappings play a central role stabilizing α-helices due to lack of intrahelical hydrogen bonds in the first and last turn. Sequence patterns of amino acid type preferences have been derived for cappings but the structural motifs associated to them are still unclassified. CAPS-DB is a database of clusters of structural patterns of different capping types. The clustering algorithm is based in the geometry and the (Φ-ψ)-space conformation of these regions. CAPS-DB is a relational database that allows the user to search, browse, inspect and retrieve structural data associated to cappings. The contents of CAPS-DB might be of interest to a wide range of scientist covering different areas such as protein design and engineering, structural biology and bioinformatics. The database is accessible at: http://www.bioinsilico.org/CAPSDB.",2011-10-22 +26589271,AccessFold: predicting RNA-RNA interactions with consideration for competing self-structure.,"

Motivation

There are numerous examples of RNA-RNA complexes, including microRNA-mRNA and small RNA-mRNA duplexes for regulation of translation, guide RNA interactions with target RNA for post-transcriptional modification and small nuclear RNA duplexes for splicing. Predicting the base pairs formed between two interacting sequences remains difficult, at least in part because of the competition between unimolecular and bimolecular structure.

Results

Two algorithms were developed for improved prediction of bimolecular RNA structure that consider the competition between self-structure and bimolecular structure. These algorithms utilize two novel approaches to evaluate accessibility: free energy density minimization and pseudo-energy minimization. Free energy density minimization minimizes the folding free energy change per nucleotide involved in an intermolecular secondary structure. Pseudo-energy minimization (called AccessFold) minimizes the sum of free energy change and a pseudo-free energy penalty for bimolecular pairing of nucleotides that are unlikely to be accessible for bimolecular structure. The pseudo-free energy, derived from unimolecular pairing probabilities, is applied per nucleotide in bimolecular pairs, and this approach is able to predict binding sites that are split by unimolecular structures. A benchmark set of 17 bimolecular RNA structures was assembled to assess structure prediction. Pseudo-energy minimization provides a statistically significant improvement in sensitivity over the method that was found in a benchmark to be the most accurate previously available method, with an improvement from 36.8% to 57.8% in mean sensitivity for base pair prediction.

Availability and implementation

Pseudo-energy minimization is available for download as AccessFold, under an open-source license and as part of the RNAstructure package, at: http://rna.urmc.rochester.edu/RNAstructure.html

Contact

david_mathews@urmc.rochester.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-20 +21723077,[Transcutaneous aortic valve implantation: Anesthetic and perioperative management].,"

Objective

To describe the perioperative management, from the point of view of the anesthesia-intensive care unit specialist, of patients with aortic stenosis who undergo transcatheter aortic valve implantation (femoral or apical TAVI).

Data source

The PubMed database (http://www.ncbi.nlm.nih.gov/entrez/query.fcgi) was queried, using the following keywords: aortic stenosis, transcatheter aortic valve implantation TAVI, outcome, complications, anesthesia.

Data synthesis

TAVI is performed in patients suffering from aortic stenosis and presenting with numerous comorbidities, high-predicted perioperative mortality and/or contraindications to conventional cardiac surgery. TAVI is performed either by percutaneous transfemoral or transapical puncture of the left ventricle (LV) apex. These patients are older, have more comorbidities than those undergoing aortic valve replacement surgery and perioperative mortality predicted by risk scores is higher. While transapical TAVI is performed with general anaesthesia, transfemoral TAVI can be performed with either general or locoregional anaesthesia and/or sedation. The choice of the anaesthetic technique for transfemoral TAVI depends on the patient's medical history, the technique chosen for valve implantation, the type of monitoring and the anticipated hemodynamic problems. The incidence of complications following TAVI is high, some are common to surgical aortic valve replacement, and others are specific to this technique. Because of the prevalence of comorbidities, the hemodynamic-specific constraints of this technique and the incidence of complications, anaesthetic and perioperative management (evaluation, anaesthetic technique, monitoring, post-surgery care) requires the same level of expertise as in cardiac surgery anaesthesia.

Conclusion

TAVI expands treatment options for patients with aortic valve stenosis. The anaesthesia team must be involved in the care of these patients with the same level of expertise and care as in heart surgery on critical patients.",2011-06-30 +21253872,TSdb: a database of transporter substrates linking metabolic pathways and transporter systems on a genome scale via their shared substrates.,"TSdb ( http://tsdb.cbi.pku.edu.cn ) is the first manually curated central repository that stores formatted information on the substrates of transporters. In total, 37608 transporters with 15075 substrates from 884 organisms were curated from UniProt functional annotation. A unique feature of TSdb is that all the substrates are mapped to identifiers from the KEGG Ligand compound database. Thus, TSdb links current metabolic pathway schema with compound transporter systems via the shared compounds in the pathways. Furthermore, all the transporter substrates in TSdb are classified according to their biochemical properties, biological roles and subcellular localizations. In addition to the functional annotation of transporters, extensive compound annotation that includes inhibitor information from the KEGG Ligand and BRENDA databases has been integrated, making TSdb a useful source for the discovery of potential inhibitory mechanisms linking transporter substrates and metabolic enzymes. User-friendly web interfaces are designed for easy access, query and download of the data. Text and BLAST searches against all transporters in the database are provided. We will regularly update the substrate data with evidence from new publications.",2011-01-21 +25111964,SLaP mapper: a webserver for identifying and quantifying spliced-leader addition and polyadenylation site usage in kinetoplastid genomes.,"The Kinetoplastida are a diverse and globally distributed class of free-living and parasitic single-celled eukaryotes that collectively cause a significant burden on human health and welfare. In kinetoplastids individual genes do not have promoters, but rather all genes are arranged downstream of a small number of RNA polymerase II transcription initiation sites and are thus transcribed in polycistronic gene clusters. Production of individual mRNAs from this continuous transcript occurs co-transcriptionally by trans-splicing of a ∼39 nucleotide capped RNA and subsequent polyadenylation of the upstream mRNA. SLaP mapper (Spliced-Leader and Polyadenylation mapper) is a fully automated web-service for identification, quantitation and gene-assignment of both spliced-leader and polyadenylation addition sites in Kinetoplastid genomes. SLaP mapper only requires raw read data from paired-end Illumina RNAseq and performs all read processing, mapping, quality control, quantification, and analysis in a fully automated pipeline. To provide usage examples and estimates of the quantity of sequence data required we use RNAseq obtained from two different library preparations from both Trypanosoma brucei and Leishmania mexicana to show the number of expected reads that are obtained from each preparation type. SLaP mapper is an easy to use, platform independent webserver that is freely available for use at http://www.stevekellylab.com/software/slap. Example files are provided on the website.",2014-08-08 +25431331,MetaPSICOV: combining coevolution methods for accurate prediction of contacts and long range hydrogen bonding in proteins.,"

Motivation

Recent developments of statistical techniques to infer direct evolutionary couplings between residue pairs have rendered covariation-based contact prediction a viable means for accurate 3D modelling of proteins, with no information other than the sequence required. To extend the usefulness of contact prediction, we have designed a new meta-predictor (MetaPSICOV) which combines three distinct approaches for inferring covariation signals from multiple sequence alignments, considers a broad range of other sequence-derived features and, uniquely, a range of metrics which describe both the local and global quality of the input multiple sequence alignment. Finally, we use a two-stage predictor, where the second stage filters the output of the first stage. This two-stage predictor is additionally evaluated on its ability to accurately predict the long range network of hydrogen bonds, including correctly assigning the donor and acceptor residues.

Results

Using the original PSICOV benchmark set of 150 protein families, MetaPSICOV achieves a mean precision of 0.54 for top-L predicted long range contacts-around 60% higher than PSICOV, and around 40% better than CCMpred. In de novo protein structure prediction using FRAGFOLD, MetaPSICOV is able to improve the TM-scores of models by a median of 0.05 compared with PSICOV. Lastly, for predicting long range hydrogen bonding, MetaPSICOV-HB achieves a precision of 0.69 for the top-L/10 hydrogen bonds compared with just 0.26 for the baseline MetaPSICOV.

Availability and implementation

MetaPSICOV is available as a freely available web server at http://bioinf.cs.ucl.ac.uk/MetaPSICOV. Raw data (predicted contact lists and 3D models) and source code can be downloaded from http://bioinf.cs.ucl.ac.uk/downloads/MetaPSICOV.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-26 +24906883,"LINCS Canvas Browser: interactive web app to query, browse and interrogate LINCS L1000 gene expression signatures.","For the Library of Integrated Network-based Cellular Signatures (LINCS) project many gene expression signatures using the L1000 technology have been produced. The L1000 technology is a cost-effective method to profile gene expression in large scale. LINCS Canvas Browser (LCB) is an interactive HTML5 web-based software application that facilitates querying, browsing and interrogating many of the currently available LINCS L1000 data. LCB implements two compacted layered canvases, one to visualize clustered L1000 expression data, and the other to display enrichment analysis results using 30 different gene set libraries. Clicking on an experimental condition highlights gene-sets enriched for the differentially expressed genes from the selected experiment. A search interface allows users to input gene lists and query them against over 100 000 conditions to find the top matching experiments. The tool integrates many resources for an unprecedented potential for new discoveries in systems biology and systems pharmacology. The LCB application is available at http://www.maayanlab.net/LINCS/LCB. Customized versions will be made part of the http://lincscloud.org and http://lincs.hms.harvard.edu websites.",2014-06-06 +26690490,"PEPstrMOD: structure prediction of peptides containing natural, non-natural and modified residues.","

Background

In the past, many methods have been developed for peptide tertiary structure prediction but they are limited to peptides having natural amino acids. This study describes a method PEPstrMOD, which is an updated version of PEPstr, developed specifically for predicting the structure of peptides containing natural and non-natural/modified residues.

Results

PEPstrMOD integrates Forcefield_NCAA and Forcefield_PTM force field libraries to handle 147 non-natural residues and 32 types of post-translational modifications respectively by performing molecular dynamics using AMBER. AMBER was also used to handle other modifications like peptide cyclization, use of D-amino acids and capping of terminal residues. In addition, GROMACS was used to implement 210 non-natural side-chains in peptides using SwissSideChain force field library. We evaluated the performance of PEPstrMOD on three datasets generated from Protein Data Bank; i) ModPep dataset contains 501 non-natural peptides, ii) ModPep16, a subset of ModPep, and iii) CyclicPep contains 34 cyclic peptides. We achieved backbone Root Mean Square Deviation between the actual and predicted structure of peptides in the range of 3.81-4.05 Å.

Conclusions

In summary, the method PEPstrMOD has been developed that predicts the structure of modified peptide from the sequence/structure given as input. We validated the PEPstrMOD application using a dataset of peptides having non-natural/modified residues. PEPstrMOD offers unique advantages that allow the users to predict the structures of peptides having i) natural residues, ii) non-naturally modified residues, iii) terminal modifications, iv) post-translational modifications, v) D-amino acids, and also allows extended simulation of predicted peptides. This will help the researchers to have prior structural information of modified peptides to further design the peptides for desired therapeutic property. PEPstrMOD is freely available at http://osddlinux.osdd.net/raghava/pepstrmod/.",2015-12-21 +24603985,PyRAD: assembly of de novo RADseq loci for phylogenetic analyses.,"

Motivation

Restriction-site-associated genomic markers are a powerful tool for investigating evolutionary questions at the population level, but are limited in their utility at deeper phylogenetic scales where fewer orthologous loci are typically recovered across disparate taxa. While this limitation stems in part from mutations to restriction recognition sites that disrupt data generation, an additional source of data loss comes from the failure to identify homology during bioinformatic analyses. Clustering methods that allow for lower similarity thresholds and the inclusion of indel variation will perform better at assembling RADseq loci at the phylogenetic scale.

Results

PyRAD is a pipeline to assemble de novo RADseq loci with the aim of optimizing coverage across phylogenetic datasets. It uses a wrapper around an alignment-clustering algorithm, which allows for indel variation within and between samples, as well as for incomplete overlap among reads (e.g. paired-end). Here I compare PyRAD with the program Stacks in their performance analyzing a simulated RADseq dataset that includes indel variation. Indels disrupt clustering of homologous loci in Stacks but not in PyRAD, such that the latter recovers more shared loci across disparate taxa. I show through reanalysis of an empirical RADseq dataset that indels are a common feature of such data, even at shallow phylogenetic scales. PyRAD uses parallel processing as well as an optional hierarchical clustering method, which allows it to rapidly assemble phylogenetic datasets with hundreds of sampled individuals.

Availability

Software is written in Python and freely available at http://www.dereneaton.com/software/.",2014-03-05 +26441715,Erotic subset for the Nencki Affective Picture System (NAPS ERO): cross-sexual comparison study.,"Research on the processing of sexual stimuli has proved that such material has high priority in human cognition. Yet, although sex differences in response to sexual stimuli were extensively discussed in the literature, sexual orientation was given relatively little consideration, and material suitable for relevant research is difficult to come by. With this in mind, we present a collection of 200 erotic images, accompanied by their self-report ratings of emotional valence and arousal by homo- and heterosexual males and females (n = 80, divided into four equal-sized subsamples). The collection complements the Nencki Affective Picture System (NAPS) and is intended to be used as stimulus material in experimental research. The erotic images are divided into five categories, depending on their content: opposite-sex couple (50), male couple (50), female couple (50), male (25) and female (25). Additional 100 control images from the NAPS depicting people in a non-erotic context were also used in the study. We showed that recipient sex and sexual orientation strongly influenced the evaluation of erotic content. Thus, comparisons of valence and arousal ratings in different subject groups will help researchers select stimuli set for the purpose of various experimental designs. To facilitate the use of the dataset, we provide an on-line tool, which allows the user to browse the images interactively and select proper stimuli on the basis of several parameters. The NAPS ERO image collection together with the data are available to the scientific community for non-commercial use at http://naps.nencki.gov.pl.",2015-09-10 +23845962,De novo transcript sequence reconstruction from RNA-seq using the Trinity platform for reference generation and analysis.,"De novo assembly of RNA-seq data enables researchers to study transcriptomes without the need for a genome sequence; this approach can be usefully applied, for instance, in research on 'non-model organisms' of ecological and evolutionary importance, cancer samples or the microbiome. In this protocol we describe the use of the Trinity platform for de novo transcriptome assembly from RNA-seq data in non-model organisms. We also present Trinity-supported companion utilities for downstream applications, including RSEM for transcript abundance estimation, R/Bioconductor packages for identifying differentially expressed transcripts across samples and approaches to identify protein-coding genes. In the procedure, we provide a workflow for genome-independent transcriptome analysis leveraging the Trinity platform. The software, documentation and demonstrations are freely available from http://trinityrnaseq.sourceforge.net. The run time of this protocol is highly dependent on the size and complexity of data to be analyzed. The example data set analyzed in the procedure detailed herein can be processed in less than 5 h.",2013-07-11 +22952610,Increasing coverage of transcription factor position weight matrices through domain-level homology.,"Transcription factor-DNA interactions, central to cellular regulation and control, are commonly described by position weight matrices (PWMs). These matrices are frequently used to predict transcription factor binding sites in regulatory regions of DNA to complement and guide further experimental investigation. The DNA sequence preferences of transcription factors, encoded in PWMs, are dictated primarily by select residues within the DNA binding domain(s) that interact directly with DNA. Therefore, the DNA binding properties of homologous transcription factors with identical DNA binding domains may be characterized by PWMs derived from different species. Accordingly, we have implemented a fully automated domain-level homology searching method for identical DNA binding sequences.By applying the domain-level homology search to transcription factors with existing PWMs in the JASPAR and TRANSFAC databases, we were able to significantly increase coverage in terms of the total number of PWMs associated with a given species, assign PWMs to transcription factors that did not previously have any associations, and increase the number of represented species with PWMs over an order of magnitude. Additionally, using protein binding microarray (PBM) data, we have validated the domain-level method by demonstrating that transcription factor pairs with matching DNA binding domains exhibit comparable DNA binding specificity predictions to transcription factor pairs with completely identical sequences.The increased coverage achieved herein demonstrates the potential for more thorough species-associated investigation of protein-DNA interactions using existing resources. The PWM scanning results highlight the challenging nature of transcription factors that contain multiple DNA binding domains, as well as the impact of motif discovery on the ability to predict DNA binding properties. The method is additionally suitable for identifying domain-level homology mappings to enable utilization of additional information sources in the study of transcription factors. The domain-level homology search method, resulting PWM mappings, web-based user interface, and web API are publicly available at http://dodoma.systemsbiology.netdodoma.systemsbiology.net.",2012-08-27 +21498548,Allie: a database and a search service of abbreviations and long forms.,"Many abbreviations are used in the literature especially in the life sciences, and polysemous abbreviations appear frequently, making it difficult to read and understand scientific papers that are outside of a reader's expertise. Thus, we have developed Allie, a database and a search service of abbreviations and their long forms (a.k.a. full forms or definitions). Allie searches for abbreviations and their corresponding long forms in a database that we have generated based on all titles and abstracts in MEDLINE. When a user query matches an abbreviation, Allie returns all potential long forms of the query along with their bibliographic data (i.e. title and publication year). In addition, for each candidate, co-occurring abbreviations and a research field in which it frequently appears in the MEDLINE data are displayed. This function helps users learn about the context in which an abbreviation appears. To deal with synonymous long forms, we use a dictionary called GENA that contains domain-specific terms such as gene, protein or disease names along with their synonymic information. Conceptually identical domain-specific terms are regarded as one term, and then conceptually identical abbreviation-long form pairs are grouped taking into account their appearance in MEDLINE. To keep up with new abbreviations that are continuously introduced, Allie has an automatic update system. In addition, the database of abbreviations and their long forms with their corresponding PubMed IDs is constructed and updated weekly. Database URL: The Allie service is available at http://allie.dbcls.jp/.",2011-04-15 +26086064,"Health, United States, 2014: With Special Feature on Adults Aged 55–64","Health, United States, 2014 is the 38th report on the health status of the nation and is submitted by the Secretary of the Department of Health and Human Services to the President and the Congress of the United States in compliance with Section 308 of the Public Health Service Act. This report was compiled by the Centers for Disease Control and Prevention’s (CDC) National Center for Health Statistics (NCHS). The Health, United States series presents an annual overview of national trends in health statistics. The report contains a Chartbook that assesses the nation’s health by presenting trends and current information on selected measures of morbidity, mortality, health care utilization and access, health risk factors, prevention, health insurance, and personal health care expenditures. This year’s Chartbook includes a Special Feature on the health of adults aged 55–64. The report also contains 123 Trend Tables organized around four major subject areas: health status and determinants, health care utilization, health care resources, and health care expenditures. A companion report—Health, United States: In Brief—features information extracted from the full report. The complete report, In Brief, and related data products are available on the Health, United States website at: http://www.cdc.gov/nchs/hus.htm.",2015-06-19 +24499245,There are not enough data to conclude that Monomethylsilanetriol is safe.,": This article is in response to Jugdaohsingh et al.: The silicon supplement 'Monomethylsilanetriol' is safe and increases the body pool of silicon in healthy Pre-menopausal women. Nutrition & Metabolism 2013 10:37: http://www.nutritionandmetabolism.com/content/10/1/37 The response from the authors is published as Jugdaohsingh et al.: Response to Prof D. Vanden Berghe letter: 'There are not enough data to conclude that Monomethylsilanetriol is safe'. Nutrition & Metabolism 2013 10:65: http://www.nutritionandmetabolism.com/content/10/1/65 ABSTRACT: The authors claim that the silicon supplement 'Monomethylsilanetriol' (MMST) is safe and is converted to orthosilicic acid (OSA) after ingestion. Critical analysis of the study results indicates that the presented data are insufficient to conclude that the use of MMST in food or food supplements is safe. Long term safety studies in humans and toxicological testing in vitro and in animals are an absolute requisite for such a conclusion but these are lacking in the present study and in the literature. Furthermore, none of the presented data show that MMST is actually converted to OSA, as OSA was not analyzed in neither serum or urine of supplemented subjects.",2013-10-25 +24147600,"Metabolite profiling of a NIST Standard Reference Material for human plasma (SRM 1950): GC-MS, LC-MS, NMR, and clinical laboratory analyses, libraries, and web-based resources.","Recent progress in metabolomics and the development of increasingly sensitive analytical techniques have renewed interest in global profiling, i.e., semiquantitative monitoring of all chemical constituents of biological fluids. In this work, we have performed global profiling of NIST SRM 1950, ""Metabolites in Human Plasma"", using GC-MS, LC-MS, and NMR. Metabolome coverage, difficulties, and reproducibility of the experiments on each platform are discussed. A total of 353 metabolites have been identified in this material. GC-MS provides 65 unique identifications, and most of the identifications from NMR overlap with the LC-MS identifications, except for some small sugars that are not directly found by LC-MS. Also, repeatability and intermediate precision analyses show that the SRM 1950 profiling is reproducible enough to consider this material as a good choice to distinguish between analytical and biological variability. Clinical laboratory data shows that most results are within the reference ranges for each assay. In-house computational tools have been developed or modified for MS data processing and interactive web display. All data and programs are freely available online at http://peptide.nist.gov/ and http://srmd.nist.gov/ .",2013-12-03 +26476782,iEnhancer-2L: a two-layer predictor for identifying enhancers and their strength by pseudo k-tuple nucleotide composition.,"

Motivation

Enhancers are of short regulatory DNA elements. They can be bound with proteins (activators) to activate transcription of a gene, and hence play a critical role in promoting gene transcription in eukaryotes. With the avalanche of DNA sequences generated in the post-genomic age, it is a challenging task to develop computational methods for timely identifying enhancers from extremely complicated DNA sequences. Although some efforts have been made in this regard, they were limited at only identifying whether a query DNA element being of an enhancer or not. According to the distinct levels of biological activities and regulatory effects on target genes, however, enhancers should be further classified into strong and weak ones in strength.

Results

In view of this, a two-layer predictor called ' IENHANCER-2L: ' was proposed by formulating DNA elements with the 'pseudo k-tuple nucleotide composition', into which the six DNA local parameters were incorporated. To the best of our knowledge, it is the first computational predictor ever established for identifying not only enhancers, but also their strength. Rigorous cross-validation tests have indicated that IENHANCER-2L: holds very high potential to become a useful tool for genome analysis.

Availability and implementation

For the convenience of most experimental scientists, a web server for the two-layer predictor was established at http://bioinformatics.hitsz.edu.cn/iEnhancer-2L/, by which users can easily get their desired results without the need to go through the mathematical details.

Contact

bliu@gordonlifescience.org, bliu@insun.hit.edu.cn, xlan@stanford.edu, kcchou@gordonlifescience.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-17 +25329667,Accurate assignment of significance to neuropeptide identifications using Monte Carlo k-permuted decoy databases.,"In support of accurate neuropeptide identification in mass spectrometry experiments, novel Monte Carlo permutation testing was used to compute significance values. Testing was based on k-permuted decoy databases, where k denotes the number of permutations. These databases were integrated with a range of peptide identification indicators from three popular open-source database search software (OMSSA, Crux, and X! Tandem) to assess the statistical significance of neuropeptide spectra matches. Significance p-values were computed as the fraction of the sequences in the database with match indicator value better than or equal to the true target spectra. When applied to a test-bed of all known manually annotated mouse neuropeptides, permutation tests with k-permuted decoy databases identified up to 100% of the neuropeptides at p-value < 10(-5). The permutation test p-values using hyperscore (X! Tandem), E-value (OMSSA) and Sp score (Crux) match indicators outperformed all other match indicators. The robust performance to detect peptides of the intuitive indicator ""number of matched ions between the experimental and theoretical spectra"" highlights the importance of considering this indicator when the p-value was borderline significant. Our findings suggest permutation decoy databases of size 1×105 are adequate to accurately detect neuropeptides and this can be exploited to increase the speed of the search. The straightforward Monte Carlo permutation testing (comparable to a zero order Markov model) can be easily combined with existing peptide identification software to enable accurate and effective neuropeptide detection. The source code is available at http://stagbeetle.animal.uiuc.edu/pepshop/MSMSpermutationtesting.",2014-10-17 +25326331,ImmuCo: a database of gene co-expression in immune cells.,"Current gene co-expression databases and correlation networks do not support cell-specific analysis. Gene co-expression and expression correlation are subtly different phenomena, although both are likely to be functionally significant. Here, we report a new database, ImmuCo (http://immuco.bjmu.edu.cn), which is a cell-specific database that contains information about gene co-expression in immune cells, identifying co-expression and correlation between any two genes. The strength of co-expression of queried genes is indicated by signal values and detection calls, whereas expression correlation and strength are reflected by Pearson correlation coefficients. A scatter plot of the signal values is provided to directly illustrate the extent of co-expression and correlation. In addition, the database allows the analysis of cell-specific gene expression profile across multiple experimental conditions and can generate a list of genes that are highly correlated with the queried genes. Currently, the database covers 18 human cell groups and 10 mouse cell groups, including 20,283 human genes and 20,963 mouse genes. More than 8.6 × 10(8) and 7.4 × 10(8) probe set combinations are provided for querying each human and mouse cell group, respectively. Sample applications support the distinctive advantages of the database.",2014-10-17 +26396658,bPE toolkit: toolkit for computational protein engineering.,"We present a computational toolkit consisting of five utility tools, for performing basic operations on a protein structure file in PDB format. The toolkit consists of five different programs which can be integrated as part of a pipeline for computational protein structure characterization or as a standalone analysis package. The programs include tools for chirality check for amino acids (ProChiral), contact map generation (CoMa), data redundancy (DaRe), hydrogen bond potential energy (HyPE) and electrostatic interaction energy (EsInE). All programs in the toolkit can be accessed and downloaded through the following link: http://www.iitg.ac.in/bpetoolkit/.",2014-10-19 +25640425,Quantifying mitochondrial content in living cells.,"We describe a novel version of MitoGraph, our fully automated image processing method and software, dedicated to calculating the volume of 3D intracellular structures and organelles in live cells. MitoGraph is optimized and validated for quantifying the volume of tubular mitochondrial networks in budding yeast. We therefore include the experimental protocol, microscopy conditions, and software parameters focusing on mitochondria in budding yeast. However, MitoGraph can also be applied to mitochondria in other cell types and possibly other intracellular structures. We begin with our protocol and then include substantial discussion of the validation, requirements, and limits of MitoGraph to aid a wide range of potential users in applying MitoGraph to their data and troubleshooting any potential problems that arise. MitoGraph is freely available at the Web site http://rafelski.com/susanne/MitoGraph.",2015-01-08 +25931517,RVD2: an ultra-sensitive variant detection model for low-depth heterogeneous next-generation sequencing data.,"

Motivation

Next-generation sequencing technology is increasingly being used for clinical diagnostic tests. Clinical samples are often genomically heterogeneous due to low sample purity or the presence of genetic subpopulations. Therefore, a variant calling algorithm for calling low-frequency polymorphisms in heterogeneous samples is needed.

Results

We present a novel variant calling algorithm that uses a hierarchical Bayesian model to estimate allele frequency and call variants in heterogeneous samples. We show that our algorithm improves upon current classifiers and has higher sensitivity and specificity over a wide range of median read depth and minor allele fraction. We apply our model and identify 15 mutated loci in the PAXP1 gene in a matched clinical breast ductal carcinoma tumor sample; two of which are likely loss-of-heterozygosity events.

Availability and implementation

http://genomics.wpi.edu/rvd2/.

Contact

pjflaherty@wpi.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-29 +26223264,Predicting the combined effect of multiple genetic variants.,"

Background

Many genetic variants have been identified in the human genome. The functional effects of a single variant have been intensively studied. However, the joint effects of multiple variants in the same genes have been largely ignored due to their complexity or lack of data. This paper uses HMMvar, a hidden Markov model based approach, to investigate the combined effect of multiple variants from the 1000 Genomes Project. Two tumor suppressor genes, TP53 and phosphatase and tensin homolog (PTEN), are also studied for the joint effect of compensatory indel variants.

Results

Results show that there are cases where the joint effect of having multiple variants in the same genes is significantly different from that of a single variant. The deleterious effect of a single indel variant can be alleviated by their compensatory indels in TP53 and PTEN. Compound mutations in two genes, β-MHC and MyBP-C, leading to severer cardiovascular disease compared to single mutations, are also validated.

Conclusions

This paper extends the functionality of HMMvar, a tool for assigning a quantitative score to a variant, to measure not only the deleterious effect of a single variant but also the joint effect of multiple variants. HMMvar is the first tool that can predict the functional effects of both single and general multiple variations on proteins. The precomputed scores for multiple variants from the 1000 Genomes Project and the HMMvar package are available at https://bioinformatics.cs.vt.edu/zhanglab/HMMvar/.",2015-07-30 +22817272,"Genetic dissection of growth, wood basic density and gene expression in interspecific backcrosses of Eucalyptus grandis and E. urophylla.","

Background

F1 hybrid clones of Eucalyptus grandis and E. urophylla are widely grown for pulp and paper production in tropical and subtropical regions. Volume growth and wood quality are priority objectives in Eucalyptus tree improvement. The molecular basis of quantitative variation and trait expression in eucalypt hybrids, however, remains largely unknown. The recent availability of a draft genome sequence (http://www.phytozome.net) and genome-wide genotyping platforms, combined with high levels of genetic variation and high linkage disequilibrium in hybrid crosses, greatly facilitate the detection of quantitative trait loci (QTLs) as well as underlying candidate genes for growth and wood property traits. In this study, we used Diversity Arrays Technology markers to assess the genetic architecture of volume growth (diameter at breast height, DBH) and wood basic density in four-year-old progeny of an interspecific backcross pedigree of E. grandis and E. urophylla. In addition, we used Illumina RNA-Seq expression profiling in the E. urophylla backcross family to identify cis- and trans-acting polymorphisms (eQTLs) affecting transcript abundance of genes underlying QTLs for wood basic density.

Results

A total of five QTLs for DBH and 12 for wood basic density were identified in the two backcross families. Individual QTLs for DBH and wood basic density explained 3.1 to 12.2% of phenotypic variation. Candidate genes underlying QTLs for wood basic density on linkage groups 8 and 9 were found to share trans-acting eQTLs located on linkage groups 4 and 10, which in turn coincided with QTLs for wood basic density suggesting that these QTLs represent segregating components of an underlying transcriptional network.

Conclusion

This is the first demonstration of the use of next-generation expression profiling to quantify transcript abundance in a segregating tree population and identify candidate genes potentially affecting wood property variation. The QTLs identified in this study provide a resource for identifying candidate genes and developing molecular markers for marker-assisted breeding of volume growth and wood basic density. Our results suggest that integrated analysis of transcript and trait variation in eucalypt hybrids can be used to dissect the molecular basis of quantitative variation in wood property traits.",2012-07-20 +24747219,DSRC 2--Industry-oriented compression of FASTQ files.,"

Summary

Modern sequencing platforms produce huge amounts of data. Archiving them raises major problems but is crucial for reproducibility of results, one of the most fundamental principles of science. The widely used gzip compressor, used for reduction of storage and transfer costs, is not a perfect solution, so a few specialized FASTQ compressors were proposed recently. Unfortunately, they are often impractical because of slow processing, lack of support for some variants of FASTQ files or instability. We propose DSRC 2 that offers compression ratios comparable with the best existing solutions, while being a few times faster and more flexible.

Availability and implementation

DSRC 2 is freely available at http://sun.aei.polsl.pl/dsrc. The package contains command-line compressor, C and Python libraries for easy integration with existing software and technical documentation with examples of usage.

Contact

sebastian.deorowicz@polsl.pl

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-18 +25324314,The TTSMI database: a catalog of triplex target DNA sites associated with genes and regulatory elements in the human genome.,"A triplex target DNA site (TTS), a stretch of DNA that is composed of polypurines, is able to form a triple-helix (triplex) structure with triplex-forming oligonucleotides (TFOs) and is able to influence the site-specific modulation of gene expression and/or the modification of genomic DNA. The co-localization of a genomic TTS with gene regulatory signals and functional genome structures suggests that TFOs could potentially be exploited in antigene strategies for the therapy of cancers and other genetic diseases. Here, we present the TTS Mapping and Integration (TTSMI; http://ttsmi.bii.a-star.edu.sg) database, which provides a catalog of unique TTS locations in the human genome and tools for analyzing the co-localization of TTSs with genomic regulatory sequences and signals that were identified using next-generation sequencing techniques and/or predicted by computational models. TTSMI was designed as a user-friendly tool that facilitates (i) fast searching/filtering of TTSs using several search terms and criteria associated with sequence stability and specificity, (ii) interactive filtering of TTSs that co-localize with gene regulatory signals and non-B DNA structures, (iii) exploration of dynamic combinations of the biological signals of specific TTSs and (iv) visualization of a TTS simultaneously with diverse annotation tracks via the UCSC genome browser.",2014-10-16 +24524735,QSAR modeling of imbalanced high-throughput screening data in PubChem.,"Many of the structures in PubChem are annotated with activities determined in high-throughput screening (HTS) assays. Because of the nature of these assays, the activity data are typically strongly imbalanced, with a small number of active compounds contrasting with a very large number of inactive compounds. We have used several such imbalanced PubChem HTS assays to test and develop strategies to efficiently build robust QSAR models from imbalanced data sets. Different descriptor types [Quantitative Neighborhoods of Atoms (QNA) and ""biological"" descriptors] were used to generate a variety of QSAR models in the program GUSAR. The models obtained were compared using external test and validation sets. We also report on our efforts to incorporate the most predictive of our models in the publicly available NCI/CADD Group Web services ( http://cactus.nci.nih.gov/chemical/apps/cap).",2014-02-28 +26771251,Epigenome-Wide Assessment of DNA Methylation in the Placenta and Arsenic Exposure in the New Hampshire Birth Cohort Study (USA).,"

Background

Arsenic is one of the most commonly encountered environmental toxicants, and research from model systems has suggested that one mode of its toxic activity may be through alterations in DNA methylation. In utero exposure to arsenic can affect fetal, newborn, and infant health, resulting in a range of phenotypic outcomes.

Objectives

This study examined variation in placental DNA methylation and its relationship to arsenic exposure in 343 individuals enrolled in the New Hampshire Birth Cohort Study.

Methods

Linear regression models using a reference-free correction to account for cellular composition were employed to determine CpG loci affected by arsenic levels.

Results

Total arsenic measured in maternal urine during the second trimester was not associated with methylation in the placenta, whereas arsenic levels quantified through maternal toenail collected at birth were associated with methylation at a single CpG locus (p = 4.1 × 10-8). Placenta arsenic levels were associated with 163 differentially methylated loci (false discovery rate < 0.05), with 11 probes within the LYRM2 gene reaching genome-wide significance (p < 10-8). Measurement of LYRM2 mRNA levels indicated that methylation was weakly to moderately correlated with expression (r = 0.15, p < 0.06). In addition, we identified pathways suggesting changes in placental cell subpopulation proportions associated with arsenic exposure.

Conclusions

These data demonstrate the potential for arsenic, even at levels commonly experienced in a U.S. population, to have effects on the DNA methylation status of specific genes in the placenta and thus supports a potentially novel mechanism for arsenic to affect long-term children's health.

Citation

Green BB, Karagas MR, Punshon T, Jackson BP, Robbins DJ, Houseman EA, Marsit CJ. 2016. Epigenome-wide assessment of DNA methylation in the placenta and arsenic exposure in the New Hampshire Birth Cohort Study (USA). Environ Health Perspect 124:1253-1260; http://dx.doi.org/10.1289/ehp.1510437.",2016-01-15 +26232543,"Japanese Encephalitis Vaccines: WHO position paper, February 2015--Recommendations.","This article presents the World Health Organization's (WHO) recommendations on the use of Japanese Encephalitis (JE) vaccines excerpted from the WHO position paper on Japanese Encephalitis vaccines recently published in the Weekly Epidemiological Record [1]. This updated position paper on JE vaccines replaces the 2006 position paper on this subject [2]; it focuses on new information concerning the availability, safety, immunogenicity and effectiveness of JE vaccines and the duration of protection they confer. Recent data on global prevalence and burden of disease caused by JE and cost-effectiveness considerations regarding JE vaccination are also summarized. Footnotes to this paper provide a number of core references including references to grading tables that assess the quality of the scientific evidence. In accordance with its mandate to provide guidance to Member States on health policy matters, WHO issues a series of regularly updated position papers on vaccines and combinations of vaccines against diseases that have an international public health impact. These papers are concerned primarily with the use of vaccines in large-scale immunization programmes; they summarize essential background information on diseases and vaccines, and conclude with WHO's current position on the use of vaccines in the global context. This paper reflects the recommendations of WHO's Strategic Advisory Group of Experts (SAGE) on immunization. These recommendations were discussed by SAGE at its October 2014 meeting. Evidence presented at the meeting can be accessed at http://www.who.int/immunization/sage/previous/en/index.html.",2015-07-29 +24578402,ngsCAT: a tool to assess the efficiency of targeted enrichment sequencing.,"

Motivation

Targeted enrichment sequencing by next-generation sequencing is a common approach to interrogate specific loci or the whole exome in the human genome. The efficiency and the lack of bias in the enrichment process need to be assessed as a quality control step before performing downstream analysis of the sequence data. Tools that can report on the sensitivity, specificity, uniformity and other enrichment-specific features are needed.

Results

We have implemented the next-generation sequencing data Capture Assessment Tool (ngsCAT), a tool that takes the information of the mapped reads and the coordinates of the targeted regions as input files, and generates a report with metrics and figures that allows the evaluation of the efficiency of the enrichment process. The tool can also take as input the information of two samples allowing the comparison of two different experiments.

Availability and implementation

Documentation and downloads for ngsCAT can be found at http://www.bioinfomgp.org/ngscat.",2014-02-26 +26153515,LayerCake: a tool for the visual comparison of viral deep sequencing data.,"

Motivation

The advent of next-generation sequencing (NGS) has created unprecedented opportunities to examine viral populations within individual hosts, among infected individuals and over time. Comparing sequence variability across viral genomes allows for the construction of complex population structures, the analysis of which can yield powerful biological insights. However, the simultaneous display of sequence variation, coverage depth and quality scores across thousands of bases presents a unique visualization challenge that has not been fully met by current NGS analysis tools.

Results

Here, we present LayerCake, a self-contained visualization tool that allows for the rapid analysis of variation in viral NGS data. LayerCake enables the user to simultaneously visualize variations in multiple viral populations across entire genomes within a highly customizable framework, drawing attention to pertinent and interesting patterns of variation. We have successfully deployed LayerCake to assist with a variety of different genomics datasets.

Availability and implementation

Program downloads and detailed instructions are available at http://graphics.cs.wisc.edu/WP/layercake under a modified MIT license. LayerCake is a cross-platform tool written in the Processing framework for Java.

Contact

mcorrell@cs.wisc.edu.",2015-07-07 +25313158,Organ system heterogeneity DB: a database for the visualization of phenotypes at the organ system level.,"Perturbations of mammalian organisms including diseases, drug treatments and gene perturbations in mice affect organ systems differently. Some perturbations impair relatively few organ systems while others lead to highly heterogeneous or systemic effects. Organ System Heterogeneity DB (http://mips.helmholtz-muenchen.de/Organ_System_Heterogeneity/) provides information on the phenotypic effects of 4865 human diseases, 1667 drugs and 5361 genetically modified mouse models on 26 different organ systems. Disease symptoms, drug side effects and mouse phenotypes are mapped to the System Organ Class (SOC) level of the Medical Dictionary of Regulatory Activities (MedDRA). Then, the organ system heterogeneity value, a measurement of the systemic impact of a perturbation, is calculated from the relative frequency of phenotypic features across all SOCs. For perturbations of interest, the database displays the distribution of phenotypic effects across organ systems along with the heterogeneity value and the distance between organ system distributions. In this way, it allows, in an easy and comprehensible fashion, the comparison of the phenotypic organ system distributions of diseases, drugs and their corresponding genetically modified mouse models of associated disease genes and drug targets. The Organ System Heterogeneity DB is thus a platform for the visualization and comparison of organ system level phenotypic effects of drugs, diseases and genes.",2014-10-13 +24574112,Incorporating post-translational modifications and unnatural amino acids into high-throughput modeling of protein structures.,"

Motivation

Accurately predicting protein side-chain conformations is an important subproblem of the broader protein structure prediction problem. Several methods exist for generating fairly accurate models for moderate-size proteins in seconds or less. However, a major limitation of these methods is their inability to model post-translational modifications (PTMs) and unnatural amino acids. In natural living systems, the chemical groups added following translation are often critical for the function of the protein. In engineered systems, unnatural amino acids are incorporated into proteins to explore structure-function relationships and create novel proteins.

Results

We present a new version of SIDEpro to predict the side chains of proteins containing non-standard amino acids, including 15 of the most frequently observed PTMs in the Protein Data Bank and all types of phosphorylation. SIDEpro uses energy functions that are parameterized by neural networks trained from available data. For PTMs, the [Formula: see text] and [Formula: see text] accuracies are comparable with those obtained for the precursor amino acid, and so are the RMSD values for the atoms shared with the precursor amino acid. In addition, SIDEpro can accommodate any PTM or unnatural amino acid, thus providing a flexible prediction system for high-throughput modeling of proteins beyond the standard amino acids.

Availability and implementation

SIDEpro programs and Web server, rotamer libraries and data are available through the SCRATCH suite of protein structure predictors at http://scratch.proteomics.ics.uci.edu/",2014-02-25 +22817640,atBioNet--an integrated network analysis tool for genomics and biomarker discovery.,"

Background

Large amounts of mammalian protein-protein interaction (PPI) data have been generated and are available for public use. From a systems biology perspective, Proteins/genes interactions encode the key mechanisms distinguishing disease and health, and such mechanisms can be uncovered through network analysis. An effective network analysis tool should integrate different content-specific PPI databases into a comprehensive network format with a user-friendly platform to identify key functional modules/pathways and the underlying mechanisms of disease and toxicity.

Results

atBioNet integrates seven publicly available PPI databases into a network-specific knowledge base. Knowledge expansion is achieved by expanding a user supplied proteins/genes list with interactions from its integrated PPI network. The statistically significant functional modules are determined by applying a fast network-clustering algorithm (SCAN: a Structural Clustering Algorithm for Networks). The functional modules can be visualized either separately or together in the context of the whole network. Integration of pathway information enables enrichment analysis and assessment of the biological function of modules. Three case studies are presented using publicly available disease gene signatures as a basis to discover new biomarkers for acute leukemia, systemic lupus erythematosus, and breast cancer. The results demonstrated that atBioNet can not only identify functional modules and pathways related to the studied diseases, but this information can also be used to hypothesize novel biomarkers for future analysis.

Conclusion

atBioNet is a free web-based network analysis tool that provides a systematic insight into proteins/genes interactions through examining significant functional modules. The identified functional modules are useful for determining underlying mechanisms of disease and biomarker discovery. It can be accessed at: http://www.fda.gov/ScienceResearch/BioinformaticsTools/ucm285284.htm.",2012-07-20 +26794894,"Impact of Chronic Kidney Disease on Long-Term Outcomes in Type 2 Diabetic Patients With Coronary Artery Disease on Surgical, Angioplasty, or Medical Treatment.","

Background

Coronary artery disease (CAD) among patients with diabetes and chronic kidney disease (CKD) is not well studied, and the best treatment for this condition is not established. Our aim was to compare three therapeutic strategies for CAD in diabetic patients stratified by renal function.

Methods

Patients with multivessel CAD that underwent coronary artery bypass graft (CABG), angioplasty (percutaneous coronary intervention [PCI]), or medical therapy alone (MT) were included. Data were analyzed according to glomerular filtration rate in three strata: normal (>90 mL/min), mild CKD (60 to 89 mL/min), and moderate CKD (30 to 59 mL/min). End points comprised overall rate of mortality, acute myocardial infarction, and need for additional revascularization.

Results

Among patients with normal renal function (n = 270), 122 underwent CABG, 72 PCI, and 76 MT; among patients with mild CKD (n = 367), 167 underwent CABG, 92 PCI, and 108 MT; and among patients with moderate CKD (n = 126), 46 underwent CABG, 40 PCI, and 40 MT. Event-free survival was 80.4%, 75.7%, 67.5% for strata 1, 2, and 3, respectively (p = 0.037). Survival rates among patients with no, mild, and moderate CKD are 91.1%, 89.6%, and 76.2%, respectively (p = 0.001) (hazard ratio 0.69; 95% confidence interval 0.51 to 0.95; p = 0.024 for stratum 1 versus 3). We found no differences for overall number of deaths or acute myocardial infarctions irrespective of strata. The need of new revascularization was different in all strata, favoring CABG (p < 0.001, p < 0.001, and p = 0.029 for no, mild, and moderate CKD, respectively).

Conclusions

Mortality rates were higher in patients with mild and moderate CKD. Higher event-free survival was observed in the CABG group among patients with no and mild CKD. Besides, CABG was associated with less need for new revascularization compared with PCI and MT in all renal function strata. This trial was registered at http://www.controlled-trials.com as ISRCTN66068876.",2016-01-12 +25918555,ClinSeK: a targeted variant characterization framework for clinical sequencing.,"Applying genomics to patient care demands sensitive, unambiguous and rapid characterization of a known set of clinically relevant variants in patients' samples, an objective substantially different from the standard discovery process, in which every base in every sequenced read must be examined. Further, the approach must be sufficiently robust as to be able to detect multiple and potentially rare variants from heterogeneous samples. To meet this critical objective, we developed a novel variant characterization framework, ClinSeK, which performs targeted analysis of relevant reads from high-throughput sequencing data. ClinSeK is designed for efficient targeted short read alignment and is capable of characterizing a wide spectrum of genetic variants from single nucleotide variation to large-scale genomic rearrangement breakpoints. Applying ClinSeK to over a thousand cancer patients demonstrated substantively better performance, in terms of accuracy, runtime and disk storage, for clinical applications than existing variant discovery tools. ClinSeK is freely available for academic use at http://bioinformatics.mdanderson.org/main/clinsek.",2015-03-31 +25994031,Identification and Functional Assessment of a New CYP2C9 Allelic Variant CYP2C9*59.,"CYP2C9, one of the most important drug-metabolizing enzymes, is responsible for metabolizing approximately 15% of clinically important drugs, including warfarin, diclofenac, and losartan. Similar to other CYP members, human CYP2C9 exhibits marked genetic polymorphisms among individuals of different ethnicities. In this study, a novel missense mutation (1300A>T) was identified in a warfarin-sensitive patient after a genetic screen of three candidate genes related to high variability in response to warfarin doses. This base transversion leads to an Ile-to-Phe amino acid substitution at codon 434 within the CYP2C9 protein, and this new variant has been named a novel allele, CYP2C9*59, by the Human CYP Allele Nomenclature Committee (http://www.cypalleles.ki.se/cyp2c9.htm). The exogenous expression of CYP2C9.59 in insect cell microsomes revealed that, despite a similar protein expression level as wild-type CYP2C9, variant CYP2C9.59 exhibited significantly reduced maximal velocity, Vmax, and/or increased Michaelis constant, Km, values toward three CYP2C9-specific substrates. Our data suggest that the 1300A>T mutation can greatly decrease the enzymatic activity of the CYP2C9 protein both in vitro and in vivo.",2015-05-20 +23849655,A big data approach to the ultra-fast prediction of DFT-calculated bond energies.,"

Background

The rapid access to intrinsic physicochemical properties of molecules is highly desired for large scale chemical data mining explorations such as mass spectrum prediction in metabolomics, toxicity risk assessment and drug discovery. Large volumes of data are being produced by quantum chemistry calculations, which provide increasing accurate estimations of several properties, e.g. by Density Functional Theory (DFT), but are still too computationally expensive for those large scale uses. This work explores the possibility of using large amounts of data generated by DFT methods for thousands of molecular structures, extracting relevant molecular properties and applying machine learning (ML) algorithms to learn from the data. Once trained, these ML models can be applied to new structures to produce ultra-fast predictions. An approach is presented for homolytic bond dissociation energy (BDE).

Results

Machine learning models were trained with a data set of >12,000 BDEs calculated by B3LYP/6-311++G(d,p)//DFTB. Descriptors were designed to encode atom types and connectivity in the 2D topological environment of the bonds. The best model, an Associative Neural Network (ASNN) based on 85 bond descriptors, was able to predict the BDE of 887 bonds in an independent test set (covering a range of 17.67-202.30 kcal/mol) with RMSD of 5.29 kcal/mol, mean absolute deviation of 3.35 kcal/mol, and R (2) = 0.953. The predictions were compared with semi-empirical PM6 calculations, and were found to be superior for all types of bonds in the data set, except for O-H, N-H, and N-N bonds. The B3LYP/6-311++G(d,p)//DFTB calculations can approach the higher-level calculations B3LYP/6-311++G(3df,2p)//B3LYP/6-31G(d,p) with an RMSD of 3.04 kcal/mol, which is less than the RMSD of ASNN (against both DFT methods). An experimental web service for on-line prediction of BDEs is available at http://joao.airesdesousa.com/bde.

Conclusion

Knowledge could be automatically extracted by machine learning techniques from a data set of calculated BDEs, providing ultra-fast access to accurate estimations of DFT-calculated BDEs. This demonstrates how to extract value from large volumes of data currently being produced by quantum chemistry calculations at an increasing speed mostly without human intervention. In this way, high-level theoretical quantum calculations can be used in large-scale applications that otherwise would not afford the intrinsic computational cost.",2013-07-12 +25650947,Statistical analysis and reporting: common errors found during peer review and how to avoid them.,"When performing statistical peer review for Swiss Medical Weekly papers there often appear to be common errors or recurring themes regarding the reporting of study designs, statistical analysis methods, results and their interpretation. In order to help authors with choosing and describing the most appropriate analysis methods and reporting their results, we have created a guide to the most common issues and how to avoid them. This guide will follow the recommended structure for original papers as provided in the guidelines for authors (http://blog.smw.ch/what-smw-has-to-offer/guidelines-for-authors/), and provide advice for each section. This paper is intended to provide an overview of statistical methods and tips for writing your paper; it is not a comprehensive review of all statistical methods. Guidance is provided about the choice of statistical methods for different situations and types of data, how to report the methods, present figures and tables, and how to correctly present and interpret the results.",2015-02-04 +27188414,Cataract.,"Cataract is the leading cause of reversible blindness and visual impairment globally. Blindness from cataract is more common in populations with low socioeconomic status and in developing countries than in developed countries. The only treatment for cataract is surgery. Phacoemulsification is the gold standard for cataract surgery in the developed world, whereas manual small incision cataract surgery is used frequently in developing countries. In general, the outcomes of surgery are good and complications, such as endophthalmitis, often can be prevented or have good ouctomes if properly managed. Femtosecond laser-assisted cataract surgery, an advanced technology, can automate several steps; initial data show no superiority of this approach over current techniques, but the results of many large clinical trials are pending. The greatest challenge remains the growing 'backlog' of patients with cataract blindness in the developing world because of lack of access to affordable surgery. Efforts aimed at training additional cataract surgeons in these countries do not keep pace with the increasing demand associated with ageing population demographics. In the absence of strategie that can prevent or delay cataract formation, it is important to focus efforts and resources on developing models for efficient delivery of cataract surgical services in underserved regions. For an illustrated summary of this Primer, visit: http://go.nature.com/eQkKll.",2015-06-11 +23322530,The NCDR CathPCI Registry: a US national perspective on care and outcomes for percutaneous coronary intervention.,"

Aims

The NCDR CathPCI Registry collects detailed clinical, process-of-care and outcomes data for patients undergoing coronary angiography and percutaneous coronary intervention (PCI) in the USA. The registry contributes to quality of care by providing data feedback on a wide range of performance metrics to participating centres and by facilitating local and national quality improvement efforts.

Interventions

No treatments are mandated, participating centres receive routine quality-of-care and outcomes performance feedback reports and access to a quality dashboard for personalized performance reports.

Population

Patients undergoing cardiac catheterization and PCI are retrospectively identified. No informed consent is required, as data are anonymised. From inception in 1998, more than 12 million records have been submitted from 1577 participating US centres.

Baseline data

Approximately 250 fields encompassing patient demographics, medical history and risk factors, hospital presentation, initial cardiac status, procedural details, medications, laboratory values, and in-hospital outcomes. Linkages with outside sources of data have permitted longitudinal outcomes assessment in some cases. Centre personnel enter the data into the registry, in some cases facilitated by software vendors. There are non-financial incentives for centre participation. Data completeness is noteworthy with most fields missing at rates less than 5%. A comprehensive data quality program is employed to enhance data validity.

Endpoints

Main outcome measures include quality process metrics and in-hospital patient outcomes. Data are available for research by application to: http://www.ncdr.com.",2013-01-15 +24573472,The MULTICOM protein tertiary structure prediction system.,"With the expansion of genomics and proteomics data aided by the rapid progress of next-generation sequencing technologies, computational prediction of protein three-dimensional structure is an essential part of modern structural genomics initiatives. Prediction of protein structure through understanding of the theories behind protein sequence-structure relationship, however, remains one of the most challenging problems in contemporary life sciences. Here, we describe MULTICOM, a multi-level combination technique, intended to predict moderate- to high-resolution structure of a protein through a novel approach of combining multiple sources of complementary information derived from the experimentally solved protein structures in the Protein Data Bank. The MULTICOM web server is freely available at http://sysbio.rnet.missouri.edu/multicom_toolbox/.",2014-01-01 +24872422,MPBind: a Meta-motif-based statistical framework and pipeline to Predict Binding potential of SELEX-derived aptamers.,"

Unlabelled

Aptamers are 'synthetic antibodies' that can bind to target molecules with high affinity and specificity. Aptamers are chemically synthesized and their discovery can be performed completely in vitro, rather than relying on in vivo biological processes, making them well-suited for high-throughput discovery. However, a large fraction of the most enriched aptamers in Systematic Evolution of Ligands by EXponential enrichment (SELEX) rounds display poor binding activity. Here, we present MPBind, a Meta-motif-based statistical framework and pipeline to Predict the BIND: ing potential of SELEX-derived aptamers. Using human embryonic stem cell SELEX-Seq data, MPBind achieved high prediction accuracy for binding potential. Further analysis showed that MPBind is robust to both polymerase chain reaction amplification bias and incomplete sequencing of aptamer pools. These two biases usually confound aptamer analysis.

Availability and implementation

MPBind software and documents are available at http://www.morgridge.net/MPBind.html. The human embryonic stem cells whole-cell SELEX-Seq data are available at http://www.morgridge.net/Aptamer/.",2014-05-28 +25078397,Walking the interactome for candidate prioritization in exome sequencing studies of Mendelian diseases.,"

Motivation

Whole-exome sequencing (WES) has opened up previously unheard of possibilities for identifying novel disease genes in Mendelian disorders, only about half of which have been elucidated to date. However, interpretation of WES data remains challenging.

Results

Here, we analyze protein-protein association (PPA) networks to identify candidate genes in the vicinity of genes previously implicated in a disease. The analysis, using a random-walk with restart (RWR) method, is adapted to the setting of WES by developing a composite variant-gene relevance score based on the rarity, location and predicted pathogenicity of variants and the RWR evaluation of genes harboring the variants. Benchmarking using known disease variants from 88 disease-gene families reveals that the correct gene is ranked among the top 10 candidates in ≥50% of cases, a figure which we confirmed using a prospective study of disease genes identified in 2012 and PPA data produced before that date. We implement our method in a freely available Web server, ExomeWalker, that displays a ranked list of candidates together with information on PPAs, frequency and predicted pathogenicity of the variants to allow quick and effective searches for candidates that are likely to reward closer investigation.

Availability and implementation

http://compbio.charite.de/ExomeWalker

Contact

: peter.robinson@charite.de.",2014-07-30 +22209237,MTCID: a database of genetic polymorphisms in clinical isolates of Mycobacterium tuberculosis.,"Tuberculosis (TB) is a major cause of morbidity and mortality throughout the world, particularly in developing countries. The response of the patients and treatment outcome depends, in addition to diagnosis, appropriate and timely treatment and host factors, on the virulence of Mycobacterium tuberculosis and genetic polymorphism prevalent in clinical isolates of the bacterium. A number of studies have been carried out to characterize clinical isolates of M. tuberculosis obtained from TB patients. However, the data is scattered in a large number of publications. Though attempts have been made to catalog the observed variations, there is no database that has been developed for cataloging, storing and dissemination of genetic polymorphism information. MTCID (M. tuberculosis clinical isolate genetic polymorphism database) is an attempt to provide a comprehensive repository to store, access and disseminate single nucleotide polymorphism (SNPs) and spoligotyping profiles of M. tuberculosis. It can be used to automatically upload the information available with a user that adds to the existing database at the backend. Besides it may also aid in maintaining clinical profiles of TB and treatment of patients. The database has 'search' features and is available at http://ccbb.jnu.ac.in/Tb.",2011-12-29 +25190042,PONDEROSA-C/S: client-server based software package for automated protein 3D structure determination.,"Peak-picking Of Noe Data Enabled by Restriction Of Shift Assignments-Client Server (PONDEROSA-C/S) builds on the original PONDEROSA software (Lee et al. in Bioinformatics 27:1727-1728. doi: 10.1093/bioinformatics/btr200, 2011) and includes improved features for structure calculation and refinement. PONDEROSA-C/S consists of three programs: Ponderosa Server, Ponderosa Client, and Ponderosa Analyzer. PONDEROSA-C/S takes as input the protein sequence, a list of assigned chemical shifts, and nuclear Overhauser data sets ((13)C- and/or (15)N-NOESY). The output is a set of assigned NOEs and 3D structural models for the protein. Ponderosa Analyzer supports the visualization, validation, and refinement of the results from Ponderosa Server. These tools enable semi-automated NMR-based structure determination of proteins in a rapid and robust fashion. We present examples showing the use of PONDEROSA-C/S in solving structures of four proteins: two that enable comparison with the original PONDEROSA package, and two from the Critical Assessment of automated Structure Determination by NMR (Rosato et al. in Nat Methods 6:625-626. doi: 10.1038/nmeth0909-625 , 2009) competition. The software package can be downloaded freely in binary format from http://pine.nmrfam.wisc.edu/download_packages.html. Registered users of the National Magnetic Resonance Facility at Madison can submit jobs to the PONDEROSA-C/S server at http://ponderosa.nmrfam.wisc.edu, where instructions, tutorials, and instructions can be found. Structures are normally returned within 1-2 days.",2014-09-05 +23805196,An Evaluation of Methods for Inferring Boolean Networks from Time-Series Data.,"Regulatory networks play a central role in cellular behavior and decision making. Learning these regulatory networks is a major task in biology, and devising computational methods and mathematical models for this task is a major endeavor in bioinformatics. Boolean networks have been used extensively for modeling regulatory networks. In this model, the state of each gene can be either 'on' or 'off' and that next-state of a gene is updated, synchronously or asynchronously, according to a Boolean rule that is applied to the current-state of the entire system. Inferring a Boolean network from a set of experimental data entails two main steps: first, the experimental time-series data are discretized into Boolean trajectories, and then, a Boolean network is learned from these Boolean trajectories. In this paper, we consider three methods for data discretization, including a new one we propose, and three methods for learning Boolean networks, and study the performance of all possible nine combinations on four regulatory systems of varying dynamics complexities. We find that employing the right combination of methods for data discretization and network learning results in Boolean networks that capture the dynamics well and provide predictive power. Our findings are in contrast to a recent survey that placed Boolean networks on the low end of the ""faithfulness to biological reality"" and ""ability to model dynamics"" spectra. Further, contrary to the common argument in favor of Boolean networks, we find that a relatively large number of time points in the time-series data is required to learn good Boolean networks for certain data sets. Last but not least, while methods have been proposed for inferring Boolean networks, as discussed above, missing still are publicly available implementations thereof. Here, we make our implementation of the methods available publicly in open source at http://bioinfo.cs.rice.edu/.",2013-06-21 +24278218,DNA sequences at a glance.,"Data summarization and triage is one of the current top challenges in visual analytics. The goal is to let users visually inspect large data sets and examine or request data with particular characteristics. The need for summarization and visual analytics is also felt when dealing with digital representations of DNA sequences. Genomic data sets are growing rapidly, making their analysis increasingly more difficult, and raising the need for new, scalable tools. For example, being able to look at very large DNA sequences while immediately identifying potentially interesting regions would provide the biologist with a flexible exploratory and analytical tool. In this paper we present a new concept, the ""information profile"", which provides a quantitative measure of the local complexity of a DNA sequence, independently of the direction of processing. The computation of the information profiles is computationally tractable: we show that it can be done in time proportional to the length of the sequence. We also describe a tool to compute the information profiles of a given DNA sequence, and use the genome of the fission yeast Schizosaccharomyces pombe strain 972 h(-) and five human chromosomes 22 for illustration. We show that information profiles are useful for detecting large-scale genomic regularities by visual inspection. Several discovery strategies are possible, including the standalone analysis of single sequences, the comparative analysis of sequences from individuals from the same species, and the comparative analysis of sequences from different organisms. The comparison scale can be varied, allowing the users to zoom-in on specific details, or obtain a broad overview of a long segment. Software applications have been made available for non-commercial use at http://bioinformatics.ua.pt/software/dna-at-glance.",2013-11-21 +22802394,How well do ITS rDNA sequences differentiate species of true morels (Morchella)?,"Arguably more mycophiles hunt true morels (Morchella) during their brief fruiting season each spring in the northern hemisphere than any other wild edible fungus. Concerns about overharvesting by individual collectors and commercial enterprises make it essential that science-based management practices and conservation policies are developed to ensure the sustainability of commercial harvests and to protect and preserve morel species diversity. Therefore, the primary objectives of the present study were to: (i) investigate the utility of the ITS rDNA locus for identifying Morchella species, using phylogenetic species previously inferred from multilocus DNA sequence data as a reference; and (ii) clarify insufficiently identified sequences and determine whether the named sequences in GenBank were identified correctly. To this end, we generated 553 Morchella ITS rDNA sequences and downloaded 312 additional ones generated by other researchers from GenBank using emerencia and analyzed them phylogenetically. Three major findings emerged: (i) ITS rDNA sequences were useful in identifying 48/62 (77.4%) of the known phylospecies; however, they failed to identify 12 of the 22 species within the species-rich Elata Subclade and two closely related species in the Esculenta Clade; (ii) at least 66% of the named Morchella sequences in GenBank are misidentified; and (iii) ITS rDNA sequences of up to six putatively novel Morchella species were represented in GenBank. Recognizing the need for a dedicated Web-accessible reference database to facilitate the rapid identification of known and novel species, we constructed Morchella MLST (http://www.cbs.knaw.nl/morchella/), which can be queried with ITS rDNA sequences and those of the four other genes used in our prior multilocus molecular systematic studies of this charismatic genus.",2012-07-16 +25075119,"BitPAl: a bit-parallel, general integer-scoring sequence alignment algorithm.","

Motivation

Mapping of high-throughput sequencing data and other bulk sequence comparison applications have motivated a search for high-efficiency sequence alignment algorithms. The bit-parallel approach represents individual cells in an alignment scoring matrix as bits in computer words and emulates the calculation of scores by a series of logic operations composed of AND, OR, XOR, complement, shift and addition. Bit-parallelism has been successfully applied to the longest common subsequence (LCS) and edit-distance problems, producing fast algorithms in practice.

Results

We have developed BitPAl, a bit-parallel algorithm for general, integer-scoring global alignment. Integer-scoring schemes assign integer weights for match, mismatch and insertion/deletion. The BitPAl method uses structural properties in the relationship between adjacent scores in the scoring matrix to construct classes of efficient algorithms, each designed for a particular set of weights. In timed tests, we show that BitPAl runs 7-25 times faster than a standard iterative algorithm.

Availability and implementation

Source code is freely available for download at http://lobstah.bu.edu/BitPAl/BitPAl.html. BitPAl is implemented in C and runs on all major operating systems.

Contact

jloving@bu.edu or yhernand@bu.edu or gbenson@bu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-29 +26477251,Spin labeling and Double Electron-Electron Resonance (DEER) to Deconstruct Conformational Ensembles of HIV Protease.,"An understanding of macromolecular conformational equilibrium in biological systems is oftentimes essential to understand function, dysfunction, and disease. For the past few years, our lab has been utilizing site-directed spin labeling (SDSL), coupled with electron paramagnetic resonance (EPR) spectroscopy, to characterize the conformational ensemble and ligand-induced conformational shifts of HIV-1 protease (HIV-1PR). The biomedical importance of characterizing the fractional occupancy of states within the conformational ensemble critically impacts our hypothesis of a conformational selection mechanism of drug-resistance evolution in HIV-1PR. The purpose of the following chapter is to give a timeline perspective of our SDSL EPR approach to characterizing conformational sampling of HIV-1PR. We provide detailed instructions for the procedure utilized in analyzing distance profiles for HIV-1PR obtained from pulsed electron-electron double resonance (PELDOR). Specifically, we employ a version of PELDOR known as double electron-electron resonance (DEER). Data are processed with the software package ""DeerAnalysis"" (http://www.epr.ethz.ch/software), which implements Tikhonov regularization (TKR), to generate a distance profile from electron spin-echo amplitude modulations. We assign meaning to resultant distance profiles based upon a conformational sampling model, which is described herein. The TKR distance profiles are reconstructed with a linear combination of Gaussian functions, which is then statistically analyzed. In general, DEER has proven powerful for observing structural ensembles in proteins and, more recently, nucleic acids. Our goal is to present our advances in order to aid readers in similar applications.",2015-09-01 +25312511,Influence of diabetes mellitus on mortality in breast cancer patients.,"

Background

Breast cancer is one of the most common malignant tumours among women worldwide. Besides, diabetes mellitus is also a major health problem in developed countries. This study explores the association between diabetes mellitus and breast cancer patients' survival outcomes.

Methods

A systematic literature search in Embase (http://www.embase.com) and MEDLINE (http://www.ncbi.nlm.nih.gov/pubmed) was conducted from January 1960 to April 2014 and systematically identified clinical studies that evaluated the association between breast cancer mortality and diabetes mellitus. Clinical studies investigating the association between diabetes mellitus and breast cancer patients' survival outcomes were included.

Results

Twenty publications were chosen for the meta-analysis, of which 16 studies had all-cause mortality data and 12 studies had breast cancer mortality data. Published from 2001 to 2013, all 20 studies followed a total of 2,645,249 patients including more than 207,832 diabetic patients. Pre-existing diabetes mellitus was associated with a 37% increased risk for all-cause mortality in women with breast cancer (hazard ratio (HR) = 1.37; 95% confidence interval (CI): 1.34-1.41; P = 0.02). Diabetes mellitus was associated with a 17% increased risk for breast cancer mortality in women with breast cancer (HR = 1.17; 95% CI: 1.11-1.22; P < 0.01).

Conclusions

Women with diabetes mellitus are at higher risk of breast cancer-specific and all-cause mortality after initial breast cancer diagnosis.",2014-10-13 +24872427,DualAligner: a dual alignment-based strategy to align protein interaction networks.,"

Motivation

Given the growth of large-scale protein-protein interaction (PPI) networks obtained across multiple species and conditions, network alignment is now an important research problem. Network alignment performs comparative analysis across multiple PPI networks to understand their connections and relationships. However, PPI data in high-throughput experiments still suffer from significant false-positive and false-negatives rates. Consequently, high-confidence network alignment across entire PPI networks is not possible. At best, local network alignment attempts to alleviate this problem by completely ignoring low-confidence mappings; global network alignment, on the other hand, pairs all proteins regardless. To this end, we propose an alternative strategy: instead of full alignment across the entire network or completely ignoring low-confidence regions, we aim to perform highly specific protein-to-protein alignments where data confidence is high, and fall back on broader functional region-to-region alignment where detailed protein-protein alignment cannot be ascertained. The basic idea is to provide an alignment of multiple granularities to allow biological predictions at varying specificity.

Results

DualAligner performs dual network alignment, in which both region-to-region alignment, where whole subgraph of one network is aligned to subgraph of another, and protein-to-protein alignment, where individual proteins in networks are aligned to one another, are performed to achieve higher accuracy network alignments. Dual network alignment is achieved in DualAligner via background information provided by a combination of Gene Ontology annotation information and protein interaction network data. We tested DualAligner on the global networks from IntAct and demonstrated the superiority of our approach compared with state-of-the-art network alignment methods. We studied the effects of parameters in DualAligner in controlling the quality of the alignment. We also performed a case study that illustrates the utility of our approach.

Availability and implementation

http://www.cais.ntu.edu.sg/∼assourav/DualAligner/.",2014-05-28 +25570217,Comparative performance investigation of DICOM C-STORE and DICOM HTTP-based requests.,"Increasingly, physicians have to access clinical images distributed over multiple healthcare organizations. To this end, two DICOM protocols may be used: a regular DICOM C-STORE transaction or an HTTP-based DICOM request such as WADO or STOW. A major problem of the DICOM C-STORE transaction is that it is inefficient to transfer DICOM data sets that consist of thousands of DICOM objects (such as functional MRI data set) because of the large number of negotiations involved in the transfer. We compare the performances of C-STORE transactions with the STOW HTTP-based protocol, and show that the STOW protocol can divide the transfer time by about 50 when compared to a DICOM C-STORE transaction for studies that consists of thousands of DICOM objects.",2014-01-01 +24039558,Finding associations among histone modifications using sparse partial correlation networks.,"Histone modifications are known to play an important role in the regulation of transcription. While individual modifications have received much attention in genome-wide analyses, little is known about their relationships. Some authors have built Bayesian networks of modifications, however most often they have used discretized data, and relied on unrealistic assumptions such as the absence of feedback mechanisms or hidden confounding factors. Here, we propose to infer undirected networks based on partial correlations between histone modifications. Within the partial correlation framework, correlations among two variables are controlled for associations induced by the other variables. Partial correlation networks thus focus on direct associations of histone modifications. We apply this methodology to data in CD4+ cells. The resulting network is well supported by common knowledge. When pairs of modifications show a large difference between their correlation and their partial correlation, a potential confounding factor is identified and provided as explanation. Data from different cell types (IMR90, H1) is also exploited in the analysis to assess the stability of the networks. The results are remarkably similar across cell types. Based on this observation, the networks from the three cell types are integrated into a consensus network to increase robustness. The data and the results discussed in the manuscript can be found, together with code, on http://spcn.molgen.mpg.de/index.html.",2013-09-05 +24947013,Stronger findings from mass spectral data through multi-peak modeling.,"

Background

Mass spectrometry-based metabolomic analysis depends upon the identification of spectral peaks by their mass and retention time. Statistical analysis that follows the identification currently relies on one main peak of each compound. However, a compound present in the sample typically produces several spectral peaks due to its isotopic properties and the ionization process of the mass spectrometer device. In this work, we investigate the extent to which these additional peaks can be used to increase the statistical strength of differential analysis.

Results

We present a Bayesian approach for integrating data of multiple detected peaks that come from one compound. We demonstrate the approach through a simulated experiment and validate it on ultra performance liquid chromatography-mass spectrometry (UPLC-MS) experiments for metabolomics and lipidomics. Peaks that are likely to be associated with one compound can be clustered by the similarity of their chromatographic shape. Changes of concentration between sample groups can be inferred more accurately when multiple peaks are available.

Conclusions

When the sample-size is limited, the proposed multi-peak approach improves the accuracy at inferring covariate effects. An R implementation and data are available at http://research.ics.aalto.fi/mi/software/peakANOVA/.",2014-06-19 +22479466,GPS-MBA: computational analysis of MHC class II epitopes in type 1 diabetes.,"As a severe chronic metabolic disease and autoimmune disorder, type 1 diabetes (T1D) affects millions of people world-wide. Recent advances in antigen-based immunotherapy have provided a great opportunity for further treating T1D with a high degree of selectivity. It is reported that MHC class II I-A(g7) in the non-obese diabetic (NOD) mouse and human HLA-DQ8 are strongly linked to susceptibility to T1D. Thus, the identification of new I-A(g7) and HLA-DQ8 epitopes would be of great help to further experimental and biomedical manipulation efforts. In this study, a novel GPS-MBA (MHC Binding Analyzer) software package was developed for the prediction of I-A(g7) and HLA-DQ8 epitopes. Using experimentally identified epitopes as the training data sets, a previously developed GPS (Group-based Prediction System) algorithm was adopted and improved. By extensive evaluation and comparison, the GPS-MBA performance was found to be much better than other tools of this type. With this powerful tool, we predicted a number of potentially new I-A(g7) and HLA-DQ8 epitopes. Furthermore, we designed a T1D epitope database (TEDB) for all of the experimentally identified and predicted T1D-associated epitopes. Taken together, this computational prediction result and analysis provides a starting point for further experimental considerations, and GPS-MBA is demonstrated to be a useful tool for generating starting information for experimentalists. The GPS-MBA is freely accessible for academic researchers at: http://mba.biocuckoo.org.",2012-03-27 +26059715,UniAlign: protein structure alignment meets evolution.,"

Motivation

During the evolution, functional sites on the surface of the protein as well as the hydrophobic core maintaining the structural integrity are well-conserved. However, available protein structure alignment methods align protein structures based solely on the 3D geometric similarity, limiting their ability to detect functionally relevant correspondences between the residues of the proteins, especially for distantly related homologous proteins.

Results

In this article, we propose a new protein pairwise structure alignment algorithm (UniAlign) that incorporates additional evolutionary information captured in the form of sequence similarity, sequence profiles and residue conservation. We define a per-residue score (UniScore) as a weighted sum of these and other features and develop an iterative optimization procedure to search for an alignment with the best overall UniScore. Our extensive experiments on CDD, HOMSTRAD and BAliBASE benchmark datasets show that UniAlign outperforms commonly used structure alignment methods. We further demonstrate UniAlign's ability to develop family-specific models to drastically improve the quality of the alignments.

Availability and implementation

UniAlign is available as a web service at: http://sacan.biomed.drexel.edu/unialign

Contact

ahmet.sacan@drexel.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-09 +26862865,"Ambient Fine Particulate Matter, Nitrogen Dioxide, and Preterm Birth in New York City.","

Background

Recent studies have suggested associations between air pollution and various birth outcomes, but the evidence for preterm birth is mixed.

Objective

We aimed to assess the relationship between air pollution and preterm birth using 2008-2010 New York City (NYC) birth certificates linked to hospital records.

Methods

We analyzed 258,294 singleton births with 22-42 completed weeks gestation to nonsmoking mothers. Exposures to ambient fine particles (PM2.5) and nitrogen dioxide (NO2) during the first, second, and cumulative third trimesters within 300 m of maternal address were estimated using data from the NYC Community Air Survey and regulatory monitors. We estimated the odds ratio (OR) of spontaneous preterm (gestation < 37 weeks) births for the first- and second-trimester exposures in a logistic mixed model, and the third-trimester cumulative exposures in a discrete time survival model, adjusting for maternal characteristics and delivery hospital. Spatial and temporal components of estimated exposures were also separately analyzed.

Results

PM2.5 was not significantly associated with spontaneous preterm birth. NO2 in the second trimester was negatively associated with spontaneous preterm birth in the adjusted model (OR = 0.90; 95% CI: 0.83, 0.97 per 20 ppb). Neither pollutant was significantly associated with spontaneous preterm birth based on adjusted models of temporal exposures, whereas the spatial exposures showed significantly reduced odds ratios (OR = 0.80; 95% CI: 0.67, 0.96 per 10 μg/m3 PM2.5 and 0.88; 95% CI: 0.79, 0.98 per 20 ppb NO2). Without adjustment for hospital, these negative associations were stronger.

Conclusion

Neither PM2.5 nor NO2 was positively associated with spontaneous preterm delivery in NYC. Delivery hospital was an important spatial confounder.

Citation

Johnson S, Bobb JF, Ito K, Savitz DA, Elston B, Shmool JL, Dominici F, Ross Z, Clougherty JE, Matte T. 2016. Ambient fine particulate matter, nitrogen dioxide, and preterm birth in New York City. Environ Health Perspect 124:1283-1290; http://dx.doi.org/10.1289/ehp.1510266.",2016-02-05 +25096029,Using gene expression to improve the power of genome-wide association analysis.,"

Background/aims

Genome-wide association (GWA) studies have reported susceptible regions in the human genome for many common diseases and traits; however, these loci only explain a minority of trait heritability. To boost the power of a GWA study, substantial research endeavors have been focused on integrating other available genomic information in the analysis. Advances in high through-put technologies have generated a wealth of genomic data and made combining SNP and gene expression data become feasible.

Results

In this paper, we propose a novel procedure to incorporate gene expression information into GWA analysis. This procedure utilizes weights constructed by gene expression measurements to adjust p values from a GWA analysis. RESULTS from simulation analyses indicate that the proposed procedures may achieve substantial power gains, while controlling family-wise type I error rates at the nominal level. To demonstrate the implementation of our proposed approach, we apply the weight adjustment procedure to a GWA study on serum interferon-regulated chemokine levels in systemic lupus erythematosus patients. The study results can provide valuable insights for the functional interpretation of GWA signals.

Availability

The R source code for implementing the proposed weighting procedure is available at http://www.biostat.umn.edu/∼yho/research.html.",2014-07-30 +25303679,To the novel paradigm of proteome-based cell therapy of tumors: through comparative proteome mapping of tumor stem cells and tissue-specific stem cells of humans.,"We performed proteome mapping (PM), cataloging, and bioinformation analysis of protein lysates of human neural (CD133(+)) progenitor and stem cells (NPSCs) isolated from the olfactory sheath of a nose, multipotent mesenchymal (CD29(+), CD44(+), CD73(+), CD90(+), CD34(-)) stromal cells (MMSCs) isolated from human bone marrow, and tumor (CD133(+)) stem cells (TSCs) isolated from the human U87 glioblastoma (GB) cell line. We identified 1,664 proteins in the examined lysates of stem cells (SCs), 1,052 (63.2%) of which are identical in NPSCs and TSCs and 607 proteins (36.47%) of which are identical in MMSCs and TSCs. Other proteins in U87 GB TSCs are oncospecific or carcinogenesis associated. The biological processes, molecular functions, cell localization, and protein signal pathways of the proteins available in all three proteomes were annotated by PubMed (http://www.ncbi.nlm.nih.gov/pubmed/), PANTHER (http://www.pantherdb.org/), GeneOntology (http://www.geneontology.org/), and KEGG (http://www.genome.jp/kegg/) databases. It was shown that gliomaspheres of U87 GB had only 10 intracellular signal transduction pathways (ISTP) that were not modified by the neoplastic process, but only two of them (integrin and focal adhesion pathways) were accessible for regulatory action on gene candidates in the TSC nucleus. Carcinogenesis-free membrane proteins, IPST, and genes expressing proteins of these pathways in U87 GB TSCs can be viewed as main targets for regulatory effects on TSCs. We offer a novel concept of proteome-based complex therapy of tumors. This manuscript is published as part of the International Association of Neurorestoratology (IANR) special issue of Cell Transplantation.",2014-10-09 +25300487,Super Natural II--a database of natural products.,"Natural products play a significant role in drug discovery and development. Many topological pharmacophore patterns are common between natural products and commercial drugs. A better understanding of the specific physicochemical and structural features of natural products is important for corresponding drug development. Several encyclopedias of natural compounds have been composed, but the information remains scattered or not freely available. The first version of the Supernatural database containing ∼ 50,000 compounds was published in 2006 to face these challenges. Here we present a new, updated and expanded version of natural product database, Super Natural II (http://bioinformatics.charite.de/supernatural), comprising ∼ 326,000 molecules. It provides all corresponding 2D structures, the most important structural and physicochemical properties, the predicted toxicity class for ∼ 170,000 compounds and the vendor information for the vast majority of compounds. The new version allows a template-based search for similar compounds as well as a search for compound names, vendors, specific physical properties or any substructures. Super Natural II also provides information about the pathways associated with synthesis and degradation of the natural products, as well as their mechanism of action with respect to structurally similar drugs and their target proteins.",2014-10-09 +25434742,GenePainter v. 2.0 resolves the taxonomic distribution of intron positions.,"

Unlabelled

Conserved intron positions in eukaryotic genes can be used to reconstruct phylogenetic trees, to resolve ambiguous subfamily relationships in protein families and to infer the history of gene families. This version of GenePainter facilitates working with large datasets through options to select specific subsets for analysis and visualization, and through providing exhaustive statistics. GenePainter's application in phylogenetic analyses is considerably extended by the newly implemented integration of the exon-intron pattern conservation with phylogenetic trees.

Availability and implementation

The software along with detailed documentation is available at http://www.motorprotein.de/genepainter and as Supplementary Material.

Contact

mako@nmr.mpibpc.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-29 +26595494,Stump Invagination Versus Simple Ligation in Open Appendicectomy: A Systematic Review and Meta-Analysis.,"The aim of this meta-analysis was to compare the superiority of stump closure between stump invagination (SI) and simple ligation (SL) during open appendicectomy (OA). The literature searching was conducted in MEDLINE, EMBASE, the Cochrane Library, and http://scholar.google.com . Available data was extracted by 3 independent reviewers. The clinical outcomes were analyzed by meta-analytic software to compare the difference between 2 methods during OA. The pooled odds ratios (ORs) and weighted mean differences (WMDs) with 95% confidence intervals (95% CIs) were obtained by using fixed effect model. Eleven randomized controlled trials (RCTs) were finally included in this study involving 2634 patients. Postoperative pyrexia and infections were similar between SL and SI groups, respectively, but the former group had a shorter operative time (WMD: 8.72; 95% CI: 6.87-10.56; P < 0.00001); less incidence of postoperative ileus (WMD: 2.02; 95% CI: 1.36-3.01; P = 0.0005); and quicker postoperative recovery (WMD: 0.30; 95% CI: 0.11-0.48; P = 0.002). The above results were based on 5, 11, 4, 11, and 9 articles, respectively. The clinical results revealed that SL was significantly superior to SI. SL should be suggested during OA.",2015-07-01 +25783428,Development and validation of risk prediction algorithms to estimate future risk of common cancers in men and women: prospective cohort study.,"

Objective

To derive and validate a set of clinical risk prediction algorithm to estimate the 10-year risk of 11 common cancers.

Design

Prospective open cohort study using routinely collected data from 753 QResearch general practices in England. We used 565 practices to develop the scores and 188 for validation.

Subjects

4.96 million patients aged 25-84 years in the derivation cohort; 1.64 million in the validation cohort. Patients were free of the relevant cancer at baseline.

Methods

Cox proportional hazards models in the derivation cohort to derive 10-year risk algorithms. Risk factors considered included age, ethnicity, deprivation, body mass index, smoking, alcohol, previous cancer diagnoses, family history of cancer, relevant comorbidities and medication. Measures of calibration and discrimination in the validation cohort.

Outcomes

Incident cases of blood, breast, bowel, gastro-oesophageal, lung, oral, ovarian, pancreas, prostate, renal tract and uterine cancers. Cancers were recorded on any one of four linked data sources (general practitioner (GP), mortality, hospital or cancer records).

Results

We identified 228,241 incident cases during follow-up of the 11 types of cancer. Of these 25,444 were blood; 41,315 breast; 32,626 bowel, 12,808 gastro-oesophageal; 32,187 lung; 4811 oral; 6635 ovarian; 7119 pancreatic; 35,256 prostate; 23,091 renal tract; 6949 uterine cancers. The lung cancer algorithm had the best performance with an R(2) of 64.2%; D statistic of 2.74; receiver operating characteristic curve statistic of 0.91 in women. The sensitivity for the top 10% of women at highest risk of lung cancer was 67%. Performance of the algorithms in men was very similar to that for women.

Conclusions

We have developed and validated a prediction models to quantify absolute risk of 11 common cancers. They can be used to identify patients at high risk of cancers for prevention or further assessment. The algorithms could be integrated into clinical computer systems and used to identify high-risk patients.

Web calculator

There is a simple web calculator to implement the Qcancer 10 year risk algorithm together with the open source software for download (available at http://qcancer.org/10yr/).",2015-03-17 +21558151,Multi-source and ontology-based retrieval engine for maize mutant phenotypes.,"Model Organism Databases, including the various plant genome databases, collect and enable access to massive amounts of heterogeneous information, including sequence data, gene product information, images of mutant phenotypes, etc, as well as textual descriptions of many of these entities. While a variety of basic browsing and search capabilities are available to allow researchers to query and peruse the names and attributes of phenotypic data, next-generation search mechanisms that allow querying and ranking of text descriptions are much less common. In addition, the plant community needs an innovative way to leverage the existing links in these databases to search groups of text descriptions simultaneously. Furthermore, though much time and effort have been afforded to the development of plant-related ontologies, the knowledge embedded in these ontologies remains largely unused in available plant search mechanisms. Addressing these issues, we have developed a unique search engine for mutant phenotypes from MaizeGDB. This advanced search mechanism integrates various text description sources in MaizeGDB to aid a user in retrieving desired mutant phenotype information. Currently, descriptions of mutant phenotypes, loci and gene products are utilized collectively for each search, though expansion of the search mechanism to include other sources is straightforward. The retrieval engine, to our knowledge, is the first engine to exploit the content and structure of available domain ontologies, currently the Plant and Gene Ontologies, to expand and enrich retrieval results in major plant genomic databases. Database URL: http:www.PhenomicsWorld.org/QBTA.php.",2011-05-10 +26066767,Mechanisms of antibiotic resistance to enrofloxacin in uropathogenic Escherichia coli in dog.,"Escherichia coli (E. coli) urinary tract infections (UTIs) are becoming a serious problem both for pets and humans (zoonosis) due to the close contact and to the increasing resistance to antibiotics. This study has been performed in order to unravel the mechanism of induced enrofloxacin resistance in canine E. coli isolates that represent a good tool to study this pathology. The isolated E. coli has been induced with enrofloxacin and studied through 2D DIGE and shotgun MS. Discovered differentially expressed proteins are principally involved in antibiotic resistance and linked to oxidative stress response, to DNA protection and to membrane permeability. Moreover, since enrofloxacin is an inhibitor of DNA gyrase, the overexpression of DNA starvation/stationary phase protection protein (Dsp) could be a central point to discover the mechanism of this clone to counteract the effects of enrofloxacin. In parallel, the dramatic decrease of the synthesis of the outer membrane protein W, which represents one of the main gates for enrofloxacin entrance, could explain additional mechanism of E. coli defense against this antibiotic. All 2D DIGE and MS data have been deposited into the ProteomeXchange Consortium with identifier PXD002000 and DOI http://dx.doi.org/10.6019/PXD002000. This article is part of a Special Issue entitled: HUPO 2014.",2015-06-09 +22058129,DistiLD Database: diseases and traits in linkage disequilibrium blocks.,"Genome-wide association studies (GWAS) have identified thousands of single nucleotide polymorphisms (SNPs) associated with the risk of hundreds of diseases. However, there is currently no database that enables non-specialists to answer the following simple questions: which SNPs associated with diseases are in linkage disequilibrium (LD) with a gene of interest? Which chromosomal regions have been associated with a given disease, and which are the potentially causal genes in each region? To answer these questions, we use data from the HapMap Project to partition each chromosome into so-called LD blocks, so that SNPs in LD with each other are preferentially in the same block, whereas SNPs not in LD are in different blocks. By projecting SNPs and genes onto LD blocks, the DistiLD database aims to increase usage of existing GWAS results by making it easy to query and visualize disease-associated SNPs and genes in their chromosomal context. The database is available at http://distild.jensenlab.org/.",2011-11-03 +26262165,Generating and Executing Complex Natural Language Queries across Linked Data.,"With the recent and intensive research in the biomedical area, the knowledge accumulated is disseminated through various knowledge bases. Links between these knowledge bases are needed in order to use them jointly. Linked Data, SPARQL language, and interfaces in Natural Language question-answering provide interesting solutions for querying such knowledge bases. We propose a method for translating natural language questions in SPARQL queries. We use Natural Language Processing tools, semantic resources, and the RDF triples description. The method is designed on 50 questions over 3 biomedical knowledge bases, and evaluated on 27 questions. It achieves 0.78 F-measure on the test set. The method for translating natural language questions into SPARQL queries is implemented as Perl module available at http://search.cpan.org/ thhamon/RDF-NLP-SPARQLQuery.",2015-01-01 +26357316,An Efficient Search Algorithm for Finding Genomic-Range Overlaps Based on the Maximum Range Length.,"Efficient search algorithms for finding genomic-range overlaps are essential for various bioinformatics applications. A majority of fast algorithms for searching the overlaps between a query range (e.g., a genomic variant) and a set of N reference ranges (e.g., exons) has time complexity of O(k + logN), where kdenotes a term related to the length and location of the reference ranges. Here, we present a simple but efficient algorithm that reduces k, based on the maximum reference range length. Specifically, for a given query range and the maximum reference range length, the proposed method divides the reference range set into three subsets: always, potentially, and never overlapping. Therefore, search effort can be reduced by excluding never overlapping subset. We demonstrate that the running time of the proposed algorithm is proportional to potentially overlapping subset size, that is proportional to the maximum reference range length if all the other conditions are the same. Moreover, an implementation of our algorithm was 13.8 to 30.0 percent faster than one of the fastest range search methods available when tested on various genomic-range data sets. The proposed algorithm has been incorporated into a disease-linked variant prioritization pipeline for WGS (http://gnome.tchlab.org) and its implementation is available at http://ml.ssu.ac.kr/gSearch.",2015-07-01 +25855375,Alterations of Functional Connectivity Among Resting-State Networks in Hypothyroidism.,"Hypothyroidism affects brain functioning as suggested by various neuroimaging studies. The primary focus of the present study was to examine whether hypothyroidism would impact connectivity among resting-state networks (RSNs) using resting-state functional magnetic resonance imaging (rsfMRI). Twenty-two patients with hypothyroidism and 22 healthy controls were recruited and scanned using rsfMRI. The data were analysed using independent component analysis and a dual regression approach that was applied on five RSNs that were identified using fsl software (http://fsl.fmrib.ox.ac.uk). Hypothyroid patients showed significantly decreased functional connectivity in the regions of the right frontoparietal network (frontal pole), the medial visual network (lateral occipital gyrus, precuneus cortex and cuneus) and the motor network (precentral gyrus, postcentral gyrus, precuneus cortex, paracingulate gyrus, cingulate gyrus and supramarginal gyrus) compared to healthy controls. The reduced functional connectivity in the right frontoparietal network, the medial visual network and the motor network suggests neurocognitive alterations in hypothyroid patients in the corresponding functions. However, the study would be further continued to investigate the effects of thyroxine treatment and correlation with neurocognitive scores. The findings of the present study provide further interesting insights into our understanding of the action of thyroid hormone on the adult human brain.",2015-07-01 +24632498,FSuite: exploiting inbreeding in dense SNP chip and exome data.,"

Unlabelled

FSuite is a user-friendly pipeline developed for exploiting inbreeding information derived from human genomic data. It can make use of single nucleotide polymorphism chip or exome data. Compared with other software, the advantage of FSuite is that it provides a complete suite of scripts to describe and use the inbreeding information. It includes a module to detect inbred individuals and estimate their inbreeding coefficient, a module to describe the proportion of different mating types in the population and the individual probability to be offspring of different mating types that can be useful for population genetic studies. It also allows the identification of shared regions of homozygosity between affected individuals (homozygosity mapping) that can be used to identify rare recessive mutations involved in monogenic or multifactorial diseases.

Availability and implementation

FSuite is developed in Perl and uses R functions to generate graphical outputs. This pipeline is freely available under GNU GPL license at: http://genestat.cephb.fr/software/index.php/FSuite.",2014-03-14 +25977292,BCSearch: fast structural fragment mining over large collections of protein structures.,"Resources to mine the large amount of protein structures available today are necessary to better understand how amino acid variations are compatible with conformation preservation, to assist protein design, engineering and, further, the development of biologic therapeutic compounds. BCSearch is a versatile service to efficiently mine large collections of protein structures. It relies on a new approach based on a Binet-Cauchy kernel that is more discriminative than the widely used root mean square deviation criterion. It has statistics independent of size even for short fragments, and is fast. The systematic mining of large collections of structures such as the complete SCOPe protein structural classification or comprehensive subsets of the Protein Data Bank can be performed in few minutes. Based on this new score, we propose four innovative applications: BCFragSearch and BCMirrorSearch, respectively, search for fragments similar and anti-similar to a query and return information on the diversity of the sequences of the hits. BCLoopSearch identifies candidate fragments of fixed size matching the flanks of a gaped structure. BCSpecificitySearch analyzes a complete protein structure and returns information about sites having few similar fragments. BCSearch is available at http://bioserv.rpbs.univ-paris-diderot.fr/services/BCSearch.",2015-05-14 +25304778,Integrative data analysis indicates an intrinsic disordered domain character of Argonaute-binding motifs.,"

Motivation

Argonaute-interacting WG/GW proteins are characterized by the presence of repeated sequence motifs containing glycine (G) and tryptophan (W). The motifs seem to be remarkably adaptive to amino acid substitutions and their sequences show non-contiguity. Our previous approach to the detection of GW domains, based on scoring their gross amino acid composition, allowed annotation of several novel proteins involved in gene silencing. The accumulation of new experimental data and more advanced applications revealed some deficiency of the algorithm in prediction selectivity. Additionally, W-motifs, though critical in gene regulation, have not yet been annotated in any available online resources.

Results

We present an improved set of computational tools allowing efficient management and annotation of W-based motifs involved in gene silencing. The new prediction algorithms provide novel functionalities by annotation of the W-containing domains at the local sequence motif level rather than by overall compositional properties. This approach represents a significant improvement over the previous method in terms of prediction sensitivity and selectivity. Application of the algorithm allowed annotation of a comprehensive list of putative Argonaute-interacting proteins across eukaryotes. An in-depth characterization of the domains' properties indicates its intrinsic disordered character. In addition, we created a knowledge-based portal (whub) that provides access to tools and information on RNAi-related tryptophan-containing motifs.

Availability and implementation

The web portal and tools are freely available at http://www.comgen.pl/whub.

Contact

wmk@amu.edu.pl

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-09 +25977296,NPDock: a web server for protein-nucleic acid docking.,"Protein-RNA and protein-DNA interactions play fundamental roles in many biological processes. A detailed understanding of these interactions requires knowledge about protein-nucleic acid complex structures. Because the experimental determination of these complexes is time-consuming and perhaps futile in some instances, we have focused on computational docking methods starting from the separate structures. Docking methods are widely employed to study protein-protein interactions; however, only a few methods have been made available to model protein-nucleic acid complexes. Here, we describe NPDock (Nucleic acid-Protein Docking); a novel web server for predicting complexes of protein-nucleic acid structures which implements a computational workflow that includes docking, scoring of poses, clustering of the best-scored models and refinement of the most promising solutions. The NPDock server provides a user-friendly interface and 3D visualization of the results. The smallest set of input data consists of a protein structure and a DNA or RNA structure in PDB format. Advanced options are available to control specific details of the docking process and obtain intermediate results. The web server is available at http://genesilico.pl/NPDock.",2015-05-14 +22672714,In silico polymorphism analysis for the development of simple sequence repeat and transposon markers and construction of linkage map in cultivated peanut.,"

Background

Peanut (Arachis hypogaea) is an autogamous allotetraploid legume (2n = 4x = 40) that is widely cultivated as a food and oil crop. More than 6,000 DNA markers have been developed in Arachis spp., but high-density linkage maps useful for genetics, genomics, and breeding have not been constructed due to extremely low genetic diversity. Polymorphic marker loci are useful for the construction of such high-density linkage maps. The present study used in silico analysis to develop simple sequence repeat-based and transposon-based markers.

Results

The use of in silico analysis increased the efficiency of polymorphic marker development by more than 3-fold. In total, 926 (34.2%) of 2,702 markers showed polymorphisms between parental lines of the mapping population. Linkage analysis of the 926 markers along with 253 polymorphic markers selected from 4,449 published markers generated 21 linkage groups covering 2,166.4 cM with 1,114 loci. Based on the map thus produced, 23 quantitative trait loci (QTLs) for 15 agronomical traits were detected. Another linkage map with 326 loci was also constructed and revealed a relationship between the genotypes of the FAD2 genes and the ratio of oleic/linoleic acid in peanut seed.

Conclusions

In silico analysis of polymorphisms increased the efficiency of polymorphic marker development, and contributed to the construction of high-density linkage maps in cultivated peanut. The resultant maps were applicable to QTL analysis. Marker subsets and linkage maps developed in this study should be useful for genetics, genomics, and breeding in Arachis. The data are available at the Kazusa DNA Marker Database (http://marker.kazusa.or.jp).",2012-06-06 +23855787,"Development of dimethyl sulfoxide solubility models using 163,000 molecules: using a domain applicability metric to select more reliable predictions.","The dimethyl sulfoxide (DMSO) solubility data from Enamine and two UCB pharma compound collections were analyzed using 8 different machine learning methods and 12 descriptor sets. The analyzed data sets were highly imbalanced with 1.7-5.8% nonsoluble compounds. The libraries' enrichment by soluble molecules from the set of 10% of the most reliable predictions was used to compare prediction performances of the methods. The highest accuracies were calculated using a C4.5 decision classification tree, random forest, and associative neural networks. The performances of the methods developed were estimated on individual data sets and their combinations. The developed models provided on average a 2-fold decrease of the number of nonsoluble compounds amid all compounds predicted as soluble in DMSO. However, a 4-9-fold enrichment was observed if only 10% of the most reliable predictions were considered. The structural features influencing compounds to be soluble or nonsoluble in DMSO were also determined. The best models developed with the publicly available Enamine data set are freely available online at http://ochem.eu/article/33409 .",2013-07-15 +25280560,Association between Catechol-O-methyltransferase rs4680 (G>A) polymorphism and lung cancer risk.,"

Background

The association between the Val158Met polymorphism in the catechol-O-methyltransferase (COMT) gene and lung cancer risk remains controversial and inconclusive. Therefore, the meta-analysis was performed to provide a quality reevaluation of the association between the COMT Val158Met polymorphism and the risk of lung cancer.

Methods

Two major public databases (Pubmed and Embase) and several Chinese databases were searched for eligible studies. Pooled odds ratios (OR) and 95% confidence intervals (CI) were calculated to estimate the strength of the association.

Results

Five publications, including six individual studies with a total of 4,043 subjects (1,796 cases and 2,247 controls) regarding the association of COMT Val158Met polymorphism with lung cancer susceptibility were included in this meta-analysis. Overall, pooled analysis indicated that there was no significant association between COMT Val158Met polymorphism and lung cancer susceptibility under all genetic models. Likewise, no association was observed in the stratified analysis by ethnicity and control source, either. However, Val158Met polymorphism was shown to increase lung cancer risk among women (AG vs. GG, OR=1.190, 95% CI=1.001-1.422, p=0.049).

Conclusion

These findings suggested that the COMT l58Val/Met polymorphism confer genetic susceptibility to lung cancer among women. However, no evidence was found for the association with lung cancer risk in ethnicity and smoking status.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_192.",2014-10-04 +24564280,MHC2SKpan: a novel kernel based approach for pan-specific MHC class II peptide binding prediction.,"

Background

Computational methods for the prediction of Major Histocompatibility Complex (MHC) class II binding peptides play an important role in facilitating the understanding of immune recognition and the process of epitope discovery. To develop an effective computational method, we need to consider two important characteristics of the problem: (1) the length of binding peptides is highly flexible; and (2) MHC molecules are extremely polymorphic and for the vast majority of them there are no sufficient training data.

Methods

We develop a novel string kernel MHC2SK (MHC-II String Kernel) method to measure the similarities among peptides with variable lengths. By considering the distinct features of MHC-II peptide binding prediction problem, MHC2SK differs significantly from the recently developed kernel based method, GS (Generic String) kernel, in the way of computing similarities. Furthermore, we extend MHC2SK to MHC2SKpan for pan-specific MHC-II peptide binding prediction by leveraging the binding data of various MHC molecules.

Results

MHC2SK outperformed GS in allele specific prediction using a benchmark dataset, which demonstrates the effectiveness of MHC2SK. Furthermore, we evaluated the performance of MHC2SKpan using various benckmark data sets from several different perspectives: Leave-one-allele-out (LOO), 5-fold cross validation as well as independent data testing. MHC2SKpan has achieved comparable performance with NetMHCIIpan-2.0 and outperformed NetMHCIIpan-1.0, TEPITOPEpan and MultiRTA, being statistically significant. MHC2SKpan can be freely accessed at http://datamining-iip.fudan.edu.cn/service/MHC2SKpan/index.html.",2013-10-16 +21691702,Transcriptional profile and response to neoadjuvante chemotherapy in breast cancer.,"

Objective

To improve the accuracy predictive models of response to neoadjuvante chemotherapy in breast cancer, cDNA microarray technology was used to study tumor transcriptional profile. Gene signatures associated with predicting the response to neoadjuvante chemotherapy are the subject of this review.

Methods

The data base http://www.ncbi.nlm.nih.gov/pubmed/ search was conducted by using the words ""breast cancer"" AND ""neoadjuvante/primary chemotherapy"" AND ""gene expression profile/microarray"". After excluding the repeats and selecting the publications considered most relevant by the authors to be presented, 279 publications were retrieved.

Results

The number of publications regarding this subject has been increasing over the years, reaching over 50 in 2010, including the response to different chemotherapeutic drugs, such as anthracyclines and taxanes either alone or in combination. The first studies are from early last decade and used microarray platforms produced by the investigators. Recent studies have used commercial microarray platforms whose data have been stored in public databases, allowing for the analysis of a higher number of samples. Several transcriptional profiles associated with the complete pathological response were identified. Other authors used the clinical response to treatment as an endpoint, and, in this case, a predictive panel of resistance to the chemotherapeutic regimen at issue was determined. This is also a key issue, as it can contribute to individualize treatment, allowing patients resistant to a certain chemotherapeutic agent to be offered another therapeutic regimen.

Conclusion

Identifying patients responsive to chemotherapy is of essential interest and despite major steps have been taken, the issue warrants further studies in view of its complexity.",2011-05-01 +24884954,A fast and robust iterative algorithm for prediction of RNA pseudoknotted secondary structures.,"

Background

Improving accuracy and efficiency of computational methods that predict pseudoknotted RNA secondary structures is an ongoing challenge. Existing methods based on free energy minimization tend to be very slow and are limited in the types of pseudoknots that they can predict. Incorporating known structural information can improve prediction accuracy; however, there are not many methods for prediction of pseudoknotted structures that can incorporate structural information as input. There is even less understanding of the relative robustness of these methods with respect to partial information.

Results

We present a new method, Iterative HFold, for pseudoknotted RNA secondary structure prediction. Iterative HFold takes as input a pseudoknot-free structure, and produces a possibly pseudoknotted structure whose energy is at least as low as that of any (density-2) pseudoknotted structure containing the input structure. Iterative HFold leverages strengths of earlier methods, namely the fast running time of HFold, a method that is based on the hierarchical folding hypothesis, and the energy parameters of HotKnots V2.0.Our experimental evaluation on a large data set shows that Iterative HFold is robust with respect to partial information, with average accuracy on pseudoknotted structures steadily increasing from roughly 54% to 79% as the user provides up to 40% of the input structure.Iterative HFold is much faster than HotKnots V2.0, while having comparable accuracy. Iterative HFold also has significantly better accuracy than IPknot on our HK-PK and IP-pk168 data sets.

Conclusions

Iterative HFold is a robust method for prediction of pseudoknotted RNA secondary structures, whose accuracy with more than 5% information about true pseudoknot-free structures is better than that of IPknot, and with about 35% information about true pseudoknot-free structures compares well with that of HotKnots V2.0 while being significantly faster. Iterative HFold and all data used in this work are freely available at http://www.cs.ubc.ca/~hjabbari/software.php.",2014-05-18 +26529796,Human Health Effects of Biphenyl: Key Findings and Scientific Issues.,"

Background

In support of the Integrated Risk Information System (IRIS), the U.S. Environmental Protection Agency (EPA) has evaluated the human health hazards of biphenyl exposure.

Objectives

We review key findings and scientific issues regarding expected human health effects of biphenyl.

Methods

Scientific literature from 1926 through September 2012 was critically evaluated to identify potential human health hazards associated with biphenyl exposure. Key issues related to the carcinogenicity and noncancer health hazards of biphenyl were examined based on evidence from experimental animal bioassays and mechanistic studies.

Discussion

Systematic consideration of experimental animal studies of oral biphenyl exposure took into account the variety of study designs (e.g., study sizes, exposure levels, and exposure durations) to reconcile differing reported results. The available mechanistic and toxicokinetic evidence supports the hypothesis that male rat urinary bladder tumors arise through urinary bladder calculi formation but is insufficient to hypothesize a mode of action for liver tumors in female mice. Biphenyl and its metabolites may induce genetic damage, but a role for genotoxicity in biphenyl-induced carcinogenicity has not been established.

Conclusions

The available health effects data for biphenyl provides suggestive evidence for carcinogenicity in humans, based on increased incidences of male rat urinary bladder tumors at high exposure levels and on female mouse liver tumors. Kidney toxicity is also a potential human health hazard of biphenyl exposure.

Citation

Li Z, Hogan KA, Cai C, Rieth S. 2016. Human health effects of biphenyl: key findings and scientific issues. Environ Health Perspect 124:703-712; http://dx.doi.org/10.1289/ehp.1509730.",2015-11-03 +30704012,First Report of Phytophthora hedraiandra Causing Rhododendron Dieback and Root Rot of Common Beech in the Czech Republic.,"From 2010 to 2012, Phytophthora isolates were obtained from brownish diffusion leaf lesions usually up to 2 to 3 cm in diameter of Rhododendron caucasicum 'Cheer,' from withered twigs of Rhododendron sp. with blackish elongated lesions up to ~5 cm in length, and from rotten feeder roots of 2-year-old, chlorotic, wilting seedlings of Fagus sylvatica collected from ornamental and forest nurseries in three areas (central and eastern Bohemia and northern Moravia) in the Czech Republic. Isolates formed chrysanthemum-like to slightly stellate, appressed colonies with sparse aerial mycelium on V8 agar (V8A) plates at 20°C after 5 days, whereas on carrot agar (CA) plates the pattern was vague with no aerial mycelium. The cardinal growth temperatures were: min. 3°C, optimum 23 to 27°C, and max. 31°C. Radial growth was 5.7 to 6.6 mm/day at 20°C on V8A. The isolates were homothallic and produced colorless, smooth-walled, spherical oogonia with an average diameter 29.9 to 33.8 μm on CA. Oospores were aplerotic (avg. 26.4 to 29.3 μm), oospore wall was hyaline and averaged 1.3 to 1.7 μm thick, oospore wall index was 0.26 to 0.32. Paragynous or occasionally amphigynous antheridia averaged 13.4 to 15.0 × 10.9 to 12.5 μm (height × width). Sporangia were caducous, papillate, globose, spherical to ovoid, with short pedicels (avg. 2.1 to 2.6 μm) and averaged 30.9 to 41.5 × 25.5 to 30.6 μm, L:B ratio was 1.2 to 1.4. Chlamydospores were not observed. Morphological characters resembled those described for P. hedraiandra (1). The isolates were deposited in the collection of phytopathogenic oomycetes of RILOG Pruhonice and given accession nos. 450.11, 531.11, and 578.12. The isolates were sequenced for nuclear rDNA ITS region and partial Cox I gene. Obtained sequences were compared with sequences present in GenBank database using BLAST. The ITS sequences of all isolates (GenBank Accession Nos. KJ567081, 82, and 83) of overall length of 792 bp were identical to that of P. hedraiandra AY707987 (1). The Cox I sequences of overall length of 880 bp (KJ567084, 85, and 86) showed 99% homology (1 bp substitution) with AY769115 (1) and 100% identity with other Cox I sequences of P. hedraiandra, i.e., JN376067 (4) and EF174432 (3). Koch's postulates were confirmed by wound-inoculating of 3-year-old rhododendron and common beech plants (10 host plants per corresponding isolate). Rhododendron leaves were gently abraded near the midrib, whereas 5-mm-diameter bark plugs were removed from the beech collars. The inoculum (5-mm-diameter V8A plug with actively growing mycelium) was placed over wounds and sealed with Parafilm. Control plants were treated in the same manner with sterile agar plugs. Plants were maintained in a greenhouse at 22°C. All inoculated plants showed disease symptoms after 10 days of incubation; the lesions were up to 2 cm in rhododendron leaves and ~1 cm in beech collars. Control plants remained healthy. The pathogen was re-isolated from all infected plants. To our knowledge, this is the first report of P. hedraiandra in the Czech Republic. Besides it, the pathogen was found in southern and western Europe (Italy, Slovenia, Spain, the Netherlands) and in the United States (2). References: (1) A. W. A. M. de Cock and A. Lévesque. Stud. Mycol. 50:481, 2004. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Syst. Mycol. Microbiol. Lab., ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , May 13, 2014. (4) E. Moralejo et al. Span. J. Agric. Res. 5:82, 2007. (2) X. Yang et al. Plant Dis. 96:915, 2012.",2014-10-01 +30703972,First Report of Downy Mildew Caused by Hyaloperonospora camelinae on Camelina sativa in Slovenia.,"Camelina or false flax (Camelina sativa), of the Brassicaceae, is an annual flowering plant native to Europe and Central Asia where it is grown commercially as an oilseed crop. At the end of May 2012, symptoms of downy mildew were observed on camelina plants grown in the Savinja Valley in Slovenia. The disease was found in four monitored fields (total area 3 ha), and the incidence ranged from 2 to 38% depending on the variety. Symptomatic plants showed whitish, abundant, and fluffy mycelia covering the stems, flowers, seed pods, and undersides of the leaves. The disease mainly affected the upper half of the plants, and the stems were reduced and distorted. During disease progression, the mycelium turned from gray to black. Microscopic observations revealed hyaline, straight conidiophores that were branched monopodially (3 to 4 times) with 6 to 12 re-curved tips/branch, and measured 140 to 300 × 12 to 20 μm. Conidia were hyaline, oval to broadly ellipsoidal, 24 to 29 × 18 to 24 μm. Oospores formed in necrotic stem and leaf tissues were dark brown and measured 30 to 38 μm in diameter. Based on these morphological characteristics, the causal agent was identified as Hyaloperonospora camelinae (1,3,4,5). DNA was extracted from mycelium and conidia collected from infected plants in two fields in the Savinja Valley (1HpC and 2HpC). Nuclear internal transcribed spacer (ITS) regions of ribosomal DNA (rDNA) were amplified by PCR assay from two isolates using the universal primers ITS4 and ITS5, and sequenced. Both samples yielded a 781-bp sequence, which showed 100% identity to H. camelinae ITS sequence JX445136 in GenBank. The nucleotide sequence was assigned to GenBank Accession No. KJ768405. Pathogenicity was confirmed by spraying 25 3-week-old plants of C. sativa cv. Ligena planted in pots (5 plants/pot) with a conidial suspension (105 conidia/ml) obtained from 10 infected plants of the same variety collected from the field 1HpC. Inoculated plants were covered with polyethylene bags for 2 days to maintain high humidity, and incubated at 20°C with a 12-h photoperiod/day in a growth chamber. Downy mildew symptoms first developed on leaves 6 days after inoculation. An additional 25 control plants sprayed with sterilized distilled water and otherwise treated similarly to the inoculated plants developed no symptoms. The identity of the pathogen on the inoculated plants as H. camelinae was confirmed based on the morphological features described above. Downy mildew of false flax caused by H. camelinae has been reported in Europe from Austria, Bulgaria, Germany, Poland, Portugal, Spain, and Switzerland (2); and in the United States from Florida, Oregon, Minnesota, Montana, Nebraska, and Washington (1,3,4,5). To the best of our knowledge, this is the first report of downy mildew caused by H. camelinae on C. sativa in Slovenia. The representative samples were deposited in the phytopatological herbarium of the Slovenian Institute of Hop Research and Brewing. References: (1) E. M. Babiker et al. Plant Dis. 96:1670, 2012. (2) D. F. Farr and A. Y. Rossman, Fungal Databases, Syst. Mycol. Microbiol. Lab. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ . (3) R. M. Harveson et al. Plant Health Progress. doi: 10.1094/PHP-2011-1014-01-BR, 2011. (4) M. L. Putnam et al. Plant Health Progress. doi: 10.1094/PHP-2009-0910-01-BR, 2009. (5) P. Srivastava et al. Plant Dis. 96:1692, 2012.",2014-10-01 +25178289,Native Pig and Chicken Breed Database: NPCDB.,"Indigenous (native) breeds of livestock have higher disease resistance and adaptation to the environment due to high genetic diversity. Even though their extinction rate is accelerated due to the increase of commercial breeds, natural disaster, and civil war, there is a lack of well-established databases for the native breeds. Thus, we constructed the native pig and chicken breed database (NPCDB) which integrates available information on the breeds from around the world. It is a nonprofit public database aimed to provide information on the genetic resources of indigenous pig and chicken breeds for their conservation. The NPCDB (http://npcdb.snu.ac.kr/) provides the phenotypic information and population size of each breed as well as its specific habitat. In addition, it provides information on the distribution of genetic resources across the country. The database will contribute to understanding of the breed's characteristics such as disease resistance and adaptation to environmental changes as well as the conservation of indigenous genetic resources.",2014-10-01 +26184874,DSSR: an integrated software tool for dissecting the spatial structure of RNA.,"Insight into the three-dimensional architecture of RNA is essential for understanding its cellular functions. However, even the classic transfer RNA structure contains features that are overlooked by existing bioinformatics tools. Here we present DSSR (Dissecting the Spatial Structure of RNA), an integrated and automated tool for analyzing and annotating RNA tertiary structures. The software identifies canonical and noncanonical base pairs, including those with modified nucleotides, in any tautomeric or protonation state. DSSR detects higher-order coplanar base associations, termed multiplets. It finds arrays of stacked pairs, classifies them by base-pair identity and backbone connectivity, and distinguishes a stem of covalently connected canonical pairs from a helix of stacked pairs of arbitrary type/linkage. DSSR identifies coaxial stacking of multiple stems within a single helix and lists isolated canonical pairs that lie outside of a stem. The program characterizes 'closed' loops of various types (hairpin, bulge, internal, and junction loops) and pseudoknots of arbitrary complexity. Notably, DSSR employs isolated pairs and the ends of stems, whether pseudoknotted or not, to define junction loops. This new, inclusive definition provides a novel perspective on the spatial organization of RNA. Tests on all nucleic acid structures in the Protein Data Bank confirm the efficiency and robustness of the software, and applications to representative RNA molecules illustrate its unique features. DSSR and related materials are freely available at http://x3dna.org/.",2015-07-15 +22563389,Global diversity and phylogeny of the Asteroidea (Echinodermata).,"Members of the Asteroidea (phylum Echinodermata), popularly known as starfish or sea stars, are ecologically important and diverse members of marine ecosystems in all of the world's oceans. We present a comprehensive overview of diversity and phylogeny as they have figured into the evolution of the Asteroidea from Paleozoic to the living fauna. Living post-Paleozoic asteroids, the Neoasteroidea, are morphologically separate from those in the Paleozoic. Early Paleozoic asteroid faunas were diverse and displayed morphology that foreshadowed later living taxa. Preservation presents significant difficulties, but fossil occurrence and current accounts suggests a diverse Paleozoic fauna, which underwent extinction around the Permian-Triassic interval was followed by re-diversification of at least one surviving lineage. Ongoing phylogenetic classification debates include the status of the Paxillosida and the Concentricycloidea. Fossil and molecular evidence has been and continues to be part of the ongoing evolution of asteroid phylogenetic research. The modern lineages of asteroids include the Valvatacea, the Forcipulatacea, the Spinlosida, and the Velatida. We present an overview of diversity in these taxa, as well as brief notes on broader significance, ecology, and functional morphology of each. Although much asteroid taxonomy is stable, many new taxa remain to be discovered with many new species currently awaiting description. The Goniasteridae is currently one of the most diverse families within the Asteroidea. New data from molecular phylogenetics and the advent of global biodiversity databases, such as the World Asteroidea Database (http://www.marinespecies.org/Asteroidea/) present important new springboards for understanding the global biodiversity and evolution of asteroids.",2012-04-27 +25379446,Independent contribution of individual white matter pathways to language function in pediatric epilepsy patients.,"

Background and purpose

Patients with epilepsy and malformations of cortical development (MCDs) are at high risk for language and other cognitive impairment. Specific impairments, however, are not well correlated with the extent and locale of dysplastic cortex; such findings highlight the relevance of aberrant cortico-cortical interactions, or connectivity, to the clinical phenotype. The goal of this study was to determine the independent contribution of well-described white matter pathways to language function in a cohort of pediatric patients with epilepsy.

Materials and methods

Patients were retrospectively identified from an existing database of pediatric epilepsy patients with the following inclusion criteria: 1. diagnosis of MCDs, 2. DTI performed at 3 T, and 3. language characterized by a pediatric neurologist. Diffusion Toolkit and Trackvis (http://www.trackvis.org) were used for segmentation and analysis of the following tracts: corpus callosum, corticospinal tracts, inferior longitudinal fasciculi (ILFs), inferior fronto-occipital fasciculi (IFOFs), uncinate fasciculi (UFs), and arcuate fasciculi (AFs). Mean diffusivity (MD) and fractional anisotropy (FA) were calculated for each tract. Wilcoxon rank sum test (corrected for multiple comparisons) was used to assess potential differences in tract parameters between language-impaired and language-intact patients. In a separate analysis, a machine learning algorithm (random forest approach) was applied to measure the independent contribution of the measured diffusion parameters for each tract to the clinical phenotype (language impairment). In other words, the importance of each tract parameter was measured after adjusting for the contribution of all other tracts.

Results

Thirty-three MCD patients were included (age range: 3-18 years). Twenty-one patients had intact language, twelve had language impairment. All tracts were identified bilaterally in all patients except for the AF, which was not identified on the right in 10 subjects and not identified on the left in 11 subjects. MD and/or FA within the left AF, UF, ILF, and IFOF differed between language-intact and language-impaired groups. However, only parameters related to the left uncinate, inferior fronto-occipital, and arcuate fasciculi were independently associated with the clinical phenotype.

Conclusions

Scalar metrics derived from the left uncinate, inferior fronto-occipital, and arcuate fasciculi were independently associated with language function. These results support the importance of these pathways in human language function in patients with MCDs.",2014-09-30 +24743308,A toolbox for representational similarity analysis.,"Neuronal population codes are increasingly being investigated with multivariate pattern-information analyses. A key challenge is to use measured brain-activity patterns to test computational models of brain information processing. One approach to this problem is representational similarity analysis (RSA), which characterizes a representation in a brain or computational model by the distance matrix of the response patterns elicited by a set of stimuli. The representational distance matrix encapsulates what distinctions between stimuli are emphasized and what distinctions are de-emphasized in the representation. A model is tested by comparing the representational distance matrix it predicts to that of a measured brain region. RSA also enables us to compare representations between stages of processing within a given brain or model, between brain and behavioral data, and between individuals and species. Here, we introduce a Matlab toolbox for RSA. The toolbox supports an analysis approach that is simultaneously data- and hypothesis-driven. It is designed to help integrate a wide range of computational models into the analysis of multichannel brain-activity measurements as provided by modern functional imaging and neuronal recording techniques. Tools for visualization and inference enable the user to relate sets of models to sets of brain regions and to statistically test and compare the models using nonparametric inference methods. The toolbox supports searchlight-based RSA, to continuously map a measured brain volume in search of a neuronal population code with a specific geometry. Finally, we introduce the linear-discriminant t value as a measure of representational discriminability that bridges the gap between linear decoding analyses and RSA. In order to demonstrate the capabilities of the toolbox, we apply it to both simulated and real fMRI data. The key functions are equally applicable to other modalities of brain-activity measurement. The toolbox is freely available to the community under an open-source license agreement (http://www.mrc-cbu.cam.ac.uk/methods-and-resources/toolboxes/license/).",2014-04-17 +25352731,TargetCompare: A web interface to compare simultaneous miRNAs targets.,"

Unlabelled

MicroRNAs (miRNAs) are small non-coding nucleotide sequences between 17 and 25 nucleotides in length that primarily function in the regulation of gene expression. A since miRNA has thousand of predict targets in a complex, regulatory cell signaling network. Therefore, it is of interest to study multiple target genes simultaneously. Hence, we describe a web tool (developed using Java programming language and MySQL database server) to analyse multiple targets of pre-selected miRNAs. We cross validated the tool in eight most highly expressed miRNAs in the antrum region of stomach. This helped to identify 43 potential genes that are target of at least six of the referred miRNAs. The developed tool aims to reduce the randomness and increase the chance of selecting strong candidate target genes and miRNAs responsible for playing important roles in the studied tissue.

Availability

http://lghm.ufpa.br/targetcompare.",2014-09-30 +22449399,MotViz: a tool for sequence motif prediction in parallel to structural visualization and analyses.,"Linking similar proteins structurally is a challenging task that may help in finding the novel members of a protein family. In this respect, identification of conserved sequence can facilitate understanding and classifying the exact role of proteins. However, the exact role of these conserved elements cannot be elucidated without structural and physiochemical information. In this work, we present a novel desktop application MotViz designed for searching and analyzing the conserved sequence segments within protein structure. With MotViz, the user can extract a complete list of sequence motifs from loaded 3D structures, annotate the motifs structurally and analyze their physiochemical properties. The conservation value calculated for an individual motif can be visualized graphically. To check the efficiency, predicted motifs from the data sets of 9 protein families were analyzed and MotViz algorithm was more efficient in comparison to other online motif prediction tools. Furthermore, a database was also integrated for storing, retrieving and performing the detailed functional annotation studies. In summary, MotViz effectively predicts motifs with high sensitivity and simultaneously visualizes them into 3D strucures. Moreover, MotViz is user-friendly with optimized graphical parameters and better processing speed due to the inclusion of a database at the back end. MotViz is available at http://www.fi-pk.com/motviz.html.",2012-02-01 +24194826,Bayesian hierarchical clustering for studying cancer gene expression data with unknown statistics.,"Clustering analysis is an important tool in studying gene expression data. The Bayesian hierarchical clustering (BHC) algorithm can automatically infer the number of clusters and uses Bayesian model selection to improve clustering quality. In this paper, we present an extension of the BHC algorithm. Our Gaussian BHC (GBHC) algorithm represents data as a mixture of Gaussian distributions. It uses normal-gamma distribution as a conjugate prior on the mean and precision of each of the Gaussian components. We tested GBHC over 11 cancer and 3 synthetic datasets. The results on cancer datasets show that in sample clustering, GBHC on average produces a clustering partition that is more concordant with the ground truth than those obtained from other commonly used algorithms. Furthermore, GBHC frequently infers the number of clusters that is often close to the ground truth. In gene clustering, GBHC also produces a clustering partition that is more biologically plausible than several other state-of-the-art methods. This suggests GBHC as an alternative tool for studying gene expression data. The implementation of GBHC is available at https://sites.google.com/site/gaussianbhc/",2013-10-23 +24162465,Protter: interactive protein feature visualization and integration with experimental proteomic data.,"

Summary

The ability to integrate and visualize experimental proteomic evidence in the context of rich protein feature annotations represents an unmet need of the proteomics community. Here we present Protter, a web-based tool that supports interactive protein data analysis and hypothesis generation by visualizing both annotated sequence features and experimental proteomic data in the context of protein topology. Protter supports numerous proteomic file formats and automatically integrates a variety of reference protein annotation sources, which can be readily extended via modular plug-ins. A built-in export function produces publication-quality customized protein illustrations, also for large datasets. Visualizations of surfaceome datasets show the specific utility of Protter for the integrated visual analysis of membrane proteins and peptide selection for targeted proteomics.

Availability and implementation

The Protter web application is available at http://wlab.ethz.ch/protter. Source code and installation instructions are available at http://ulo.github.io/Protter/.

Contact

wbernd@ethz.ch

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-10-24 +25617416,Quantitative visualization of alternative exon expression from RNA-seq data.,"

Motivation

Analysis of RNA sequencing (RNA-Seq) data revealed that the vast majority of human genes express multiple mRNA isoforms, produced by alternative pre-mRNA splicing and other mechanisms, and that most alternative isoforms vary in expression between human tissues. As RNA-Seq datasets grow in size, it remains challenging to visualize isoform expression across multiple samples.

Results

To help address this problem, we present Sashimi plots, a quantitative visualization of aligned RNA-Seq reads that enables quantitative comparison of exon usage across samples or experimental conditions. Sashimi plots can be made using the Broad Integrated Genome Viewer or with a stand-alone command line program.

Availability and implementation

Software code and documentation freely available here: http://miso.readthedocs.org/en/fastmiso/sashimi.html",2015-01-22 +21935630,OLAF: standardization of international olfactory tests.,"Developed in the 1990 s, the ""Sniffin 'Sticks"" test for the assessment of olfactory threshold, odor identification and discrimination has become a widely used tool both in clinical and research settings. Originally pencil-and-paper documented, it may now be applied using a computer program. The ""Filemaker"" based software ""OLAF"" guides the examiner through any user-defined arrangement of the test battery, stores all data in a database, and offers results sheets to be printed out for convenience. The royalty-free program may be downloaded from http://www.tu-dresden.de/medkhno/riechen_schmecken/olaf.zip as a runtime solution application. It is currently available in four languages (English, French, German, and Italian) which can be toggled by a single mouse click, and is suitable for Windows as well as Apple platforms. In conclusion, the currently described software is expected to further facilitate and standardize olfactory testing with the ""Sniffin' Sticks"" test battery.",2011-09-21 +25269378,Streptococcus pneumoniae Genome Database (SPGDB): a database for strain specific comparative analysis of Streptococcus pneumoniae genes and proteins.,"Streptococcus pneumoniae causes pneumonia, septicemia and meningitis. S. pneumoniae is responsible for significant mortality both in children and in the elderly. In recent years, the whole genome sequencing of various S. pneumoniae strains have increased manifold and there is an urgent need to provide organism specific annotations to the scientific community. This prompted us to develop the Streptococcus pneumoniae Genome Database (SPGDB) to integrate and analyze the completely sequenced and available S. pneumoniae genome sequences. Further, links to several tools are provided to compare the pool of gene and protein sequences, and proteins structure across different strains of S. pneumoniae. SPGDB aids in the analysis of phenotypic variations as well as to perform extensive genomics and evolutionary studies with reference to S. pneumoniae. The database will be updated at regular intervals and is freely accessible through the URL: http://pranag.physics.iisc.ernet.in/SPGDB/.",2014-09-28 +29474593,The importance of structures and processes in determining outcomes for abdominal aortic aneurysm repair: an international perspective.,"Annual procedural mortality reports have become mandatory for vascular surgery in England, reflecting a more widespread appetite for transparency and accountability across the National Health Service (NHS) [BMJ 2013;346:f854]. The outcomes of abdominal aortic aneurysm (AAA) repair, in particular, have attracted considerable commentary: from 1999 to 2006, postoperative mortality was higher in England than in many other countries (7.9 vs. 1.9-4.5%) [European Society for Vascular Surgery. 2nd Vascunet Report. 2008]. This stimulated considerable service reconfiguration (centralization), quality improvement initiatives, the uptake of endovascular technology, and the examination of institution-level mortality data [http://www.vascularsociety.org.uk/library/quality-improvement.html], which resulted in a fall in elective AAA mortality to 1.8% by 2012 [http://www.hqip.org.uk/assets/NCAPOP-Library/NCAPOP-2013-2014/Outcomes-after-Elective-Repair-of-Infra-renal-Abdominal-Aortic-Aneurysm.pdf (February 2015)]. Despite improvements at a national level, the outcomes of AAA repair vary considerably between different hospitals in the NHS [Circ Cardiovasc Qual Outcomes 2014;7:131-141], analogous to interprovider variation that has been reported across a range of emergency medical and surgical conditions [BMC Health Serv Res 2014;14:270]. This suggests that underlying institution structures and processes contribute independently to patients' outcomes. There is also considerable evidence that the outcomes of AAA repair vary in different healthcare systems, both in the elective European Society for Vascular Surgery, 2008 and emergency settings. A consideration of the role of structures and processes in influencing outcomes for AAA repair can be conducted across different institutions or even different healthcare systems. This can help identify which factors are consistently associated with the best outcomes, informing efforts to better organize and deliver services for patients requiring vascular surgery.",2015-11-01 +28077930,"Before the freeze: otoliths from the Eocene of Seymour Island, Antarctica, reveal dominance of gadiform fishes (Teleostei).","The first record of fossil teleostean otoliths from Antarctica is reported. The fossils were obtained from late Early Eocene shell beds of the La Meseta Formation, Seymour Island that represent the last temperate marine climate phase in Antarctica prior to the onset of cooling and subsequent glaciation during the late Eocene. A total of 17 otolith-based teleost taxa are recognized, with 10 being identifiable to species level containing nine new species and one new genus: Argentina antarctica sp. nov., Diaphus? marambionis sp. nov., Macruronus eastmani sp. nov., Coelorinchus balushkini sp. nov., Coelorinchus nordenskjoeldi sp. nov., Palimphemus seymourensis sp. nov., Hoplobrotula? antipoda sp. nov., Notoberyx cionei gen. et sp. nov. and Cepola anderssoni sp. nov. Macruronus eastmani sp. nov. is also known from the late Eocene of Southern Australia, and Tripterophycis immutatus Schwarzhans, widespread in the southern oceans during the Eocene, has been recorded from New Zealand, southern Australia, and now Antarctica. The otolith assemblage shows a typical composition of temperate fishes dominated by gadiforms, very similar at genus and family levels to associations known from middle Eocene strata of New Zealand and the late Eocene of southern Australia, but also to the temperate Northern Hemisphere associations from the Paleocene of Denmark. The Seymour Island fauna bridges a gap in the record of global temperate marine teleost faunas during the early Eocene climate maximum. The dominant gadiforms are interpreted as the main temperate faunal component, as in the Paleocene of Denmark. Here they are represented by the families Moridae, Merlucciidae (Macruroninae), Macrouridae and Gadidae. Nowadays Gadidae are a chiefly Northern Hemisphere temperate family. Moridae, Macruroninae and Macrouridae live today on the lower shelf to deep-water or mesopelagically with Macruroninae being restricted to the Southern Ocean. The extant endemic Antarctic gadiform family Muraenolepididae is missing, as are the dominant modern Antarctic fishes of the perciform suborder Notothenioidei. Recently, there has been much debate on isolated jaw bones of teleost fishes found in the La Meseta Formation and whether they would represent gadiforms (Merlucciidae in this case) or some early, primitive notothenioid. Otoliths are known to often complement rather than duplicate skeletal finds. With this in mind, we conclude that our otolith data support the presence of gadiforms in the early Eocene of Antarctica while it does not rule out the presence of notothenioids at the same time. http://zoobank.org/urn:lsid:zoobank.org:pub:A30E5364-0003-4467-B902-43A41AD456CC.",2016-03-16 +25339027,Incidence of cancers in Kuzestan province of iran: trend from 2004 to 2008.,"

Background

Cancer is an increasing cause of mortality and morbidity worldwide. Incidences of common cancers has been growing in different provinces of Iran in recent years but trends in Khuzestan which shares a border with Iraq and is located in south west of Iran have not been investigated. This study aimed to assess secular changes in incidences of common cancers in Khuzestan province from 2004 to 2008.

Materials and methods

Data were collected from Khuzestan cancer registry which is a branch of Iranian Ministry of Health Cancer Registry (http://ircancer.ir) for the period 2004-2008. Data were presented as incidence rates by site, sex, age, using the crude rate and age-standardized rate (ASR) per 105 persons. A direct method of standardization was applied according to the WHO guideline and data analysis was performed using the SPSS package.

Results

During the 2004-2008 period, 14,893 new cases of cancer were registered in Khuzestan cancer registry. The age- standardized incidence rate of all cancers was 153.7 per 105 in males and 156.4 per 105 in females. The incidence was increased over the period of five years. The most incident cancers among males were skin cancer (ASR =18.7/105), stomach cancer (ASR13.8/105), lung cancer (ASR12.9/105), leukemia (ASR=12.6/105) and prostate cancer (ASR=12.4/105). In females, the most incident cancers were breast cancer (ASR=41/105), skin cancer (ASR=16.4/105), colorectal cancer (ASR=10.0/105), leukemia (ASR=8.1/105) and lung cancer (ASR=6.9/105).

Conclusions

Incidences of various cancers are rising in Khuzestan. It is necessary to develop and implement comprehensive cancer control programs in this region which could be monitored and evaluated by the future trend data from cancer registry.",2014-01-01 +21791068,miRTar: an integrated system for identifying miRNA-target interactions in human.,"

Background

MicroRNAs (miRNAs) are small non-coding RNA molecules that are ~22-nt-long sequences capable of suppressing protein synthesis. Previous research has suggested that miRNAs regulate 30% or more of the human protein-coding genes. The aim of this work is to consider various analyzing scenarios in the identification of miRNA-target interactions, as well as to provide an integrated system that will aid in facilitating investigation on the influence of miRNA targets by alternative splicing and the biological function of miRNAs in biological pathways.

Results

This work presents an integrated system, miRTar, which adopts various analyzing scenarios to identify putative miRNA target sites of the gene transcripts and elucidates the biological functions of miRNAs toward their targets in biological pathways. The system has three major features. First, the prediction system is able to consider various analyzing scenarios (1 miRNA:1 gene, 1:N, N:1, N:M, all miRNAs:N genes, and N miRNAs: genes involved in a pathway) to easily identify the regulatory relationships between interesting miRNAs and their targets, in 3'UTR, 5'UTR and coding regions. Second, miRTar can analyze and highlight a group of miRNA-regulated genes that participate in particular KEGG pathways to elucidate the biological roles of miRNAs in biological pathways. Third, miRTar can provide further information for elucidating the miRNA regulation, i.e., miRNA-target interactions, affected by alternative splicing.

Conclusions

In this work, we developed an integrated resource, miRTar, to enable biologists to easily identify the biological functions and regulatory relationships between a group of known/putative miRNAs and protein coding genes. miRTar is now available at http://miRTar.mbc.nctu.edu.tw/.",2011-07-26 +24209455,Design of RNA splicing analysis null models for post hoc filtering of Drosophila head RNA-Seq data with the splicing analysis kit (Spanki).,"

Background

The production of multiple transcript isoforms from one gene is a major source of transcriptome complexity. RNA-Seq experiments, in which transcripts are converted to cDNA and sequenced, allow the resolution and quantification of alternative transcript isoforms. However, methods to analyze splicing are underdeveloped and errors resulting in incorrect splicing calls occur in every experiment.

Results

We used RNA-Seq data to develop sequencing and aligner error models. By applying these error models to known input from simulations, we found that errors result from false alignment to minor splice motifs and antisense stands, shifted junction positions, paralog joining, and repeat induced gaps. By using a series of quantitative and qualitative filters, we eliminated diagnosed errors in the simulation, and applied this to RNA-Seq data from Drosophila melanogaster heads. We used high-confidence junction detections to specifically interrogate local splicing differences between transcripts. This method out-performed commonly used RNA-seq methods to identify known alternative splicing events in the Drosophila sex determination pathway. We describe a flexible software package to perform these tasks called Splicing Analysis Kit (Spanki), available at http://www.cbcb.umd.edu/software/spanki.

Conclusions

Splice-junction centric analysis of RNA-Seq data provides advantages in specificity for detection of alternative splicing. Our software provides tools to better understand error profiles in RNA-Seq data and improve inference from this new technology. The splice-junction centric approach that this software enables will provide more accurate estimates of differentially regulated splicing than current tools.",2013-11-09 +21501451,Methods used in the Lives Saved Tool (LiST).,"

Background

Choosing an optimum set of child health interventions for maximum mortality impact is important within resource poor policy environments. The Lives Saved Tool (LiST) is a computer model that estimates the mortality and stillbirth impact of scaling up proven maternal and child health interventions. This paper will describe the methods used to estimate the impact of scaling up interventions on neonatal and child mortality.

Model structure and assumptions

LiST estimates mortality impact via five age bands 0 months, 1-5 months, 6-11 months, 12-23 months and 24 to 59 months. For each of these age bands reductions in cause specific mortality are estimated. Nutrition interventions can impact either nutritional statuses or directly impact mortality. In the former case, LiST acts as a cohort model where current nutritional statuses such as stunting impact the probability of stunting as the cohort ages. LiST links with a demographic projections model (DemProj) to estimate the deaths and deaths averted due to the reductions in mortality rates.

Using list

LiST can be downloaded at http://www.jhsph.edu/dept/ih/IIP/list/ where simple instructions are available for installation. LiST includes default values for coverage and effectiveness for many less developed countries obtained from credible sources.

Conclusions

The development of LiST is a continuing process. Via technical inputs from the Child Health Epidemiological Group, effectiveness values are updated, interventions are adopted and new features added.",2011-04-13 +25856537,"Top five chemicals resulting in injuries from acute chemical incidents—Hazardous Substances Emergency Events Surveillance, nine states, 1999-2008.","

Problem/condition

The Toxic Substances Control Act Chemical Substance Inventory lists >84,000 chemicals used in commerce (http://www.epa.gov/oppt/existingchemicals/pubs/tscainventory/basic.html). With chemicals having a multitude of uses, persons are potentially at risk daily for exposure to chemicals as a result of an acute chemical incident (lasting <72 hours). Depending on the level of exposure and the type of chemical, exposure can result in morbidity and, in some cases, mortality.

Reporting period

1999-2008.

Description of system

The Hazardous Substances Emergency Events Surveillance (HSEES) system was operated by the Agency for Toxic Substances and Disease Registry during January 1991-September 2009 to collect data that would enable researchers to describe the public health consequences of chemical incidents and to develop activities aimed at reducing the harm from such incidents. This report identifies the top five chemicals that caused injuries in the nine states (Colorado, Iowa, Minnesota, New York, North Carolina, Oregon, Texas, Washington, and Wisconsin) that participated in HSEES during its last 10 full years of data collection (1999-2008).

Results

Of the 57,975 incidents that were reported, 54,989 (95%) involved the release of only one chemical. The top five chemicals associated with injury were carbon monoxide (2,364), ammonia (1,153), chlorine (763), hydrochloric acid (326), and sulfuric acid (318). Carbon monoxide and ammonia by far caused the most injuries, deaths, and evacuations. Chlorine, while not in the top 10 chemicals released, was in the top five chemicals associated with injury because of its hazardous properties.

Interpretation

Multiple measures can be taken to prevent injuries associated with the top five chemicals. Because many carbon monoxide releases occur in residential settings, use of carbon monoxide detectors can prevent injuries. Substituting chemicals with less lethal alternatives can result in mitigating injuries associated with ammonia. Routine maintenance of equipment and engineering controls can reduce injuries associated with chlorine and sulfuric acid, and proper chemical handling training can reduce injuries associated with hydrochloric acid. PUBLIC HEALTH IMPLICATIONS: Understanding the most frequently reported locations where carbon monoxide, ammonia, chlorine, hydrochloric acid, and sulfuric acid are released along with the most frequently reported contributing factors can help mitigate injuries associated with these releases. Prevention initiatives should focus on educating the public and workers about the dangers of these chemicals and about proper handling of these chemicals along with routine maintenance of equipment.",2015-04-01 +22052953,"Characteristics and capabilities of emergency departments in Abuja, Nigeria.","

Objectives

Emergency departments (ED) are the basic unit of international emergency medicine, but often differ in fundamental features. This study sought to describe and characterise ED in the capital city of Nigeria, Abuja.

Methods

All ED open 24 h/day 7 days/week to the general public were surveyed using the national ED inventories survey instrument (http://www.emnet-nedi.org). ED staff were asked about ED characteristics with reference to calendar year 2008.

Results

Twenty-four ED participated (83% response). All were located in hospitals, which ranged in size from six to 250 beds. The majority (92% CI 73% to 100%) had a contiguous layout with medical and surgical care provided in one area. All ED saw both adults and children, with a median of 1500 annual visits (IQR 648-2328). Almost half of respondents (46%; CI 26% to 67%) thought their ED operated under capacity, none thought that their ED was over capacity. Only 4% of ED surveyed had dedicated CT scanners, 25% had cardiac monitoring and none had negative-pressure rooms. There was wide variation in the types of emergencies that were identified as being treatable 24 h/day 7 days/week; these appeared to correlate with ED consultant availability.

Conclusions

Although ED location and layout in Abuja do not differ greatly from that in a typical US city, ED utilisation was lower and fewer resources and capabilities were available. The lack of technological and human resources raise questions about what critical technologies are needed in resource-limited settings, and whether Nigeria should consider training emergency medicine physicians to meet its workforce needs.",2011-11-02 +23193291,DbVar and DGVa: public archives for genomic structural variation.,"Much has changed in the last two years at DGVa (http://www.ebi.ac.uk/dgva) and dbVar (http://www.ncbi.nlm.nih.gov/dbvar). We are now processing direct submissions rather than only curating data from the literature and our joint study catalog includes data from over 100 studies in 11 organisms. Studies from human dominate with data from control and case populations, tumor samples as well as three large curated studies derived from multiple sources. During the processing of these data, we have made improvements to our data model, submission process and data representation. Additionally, we have made significant improvements in providing access to these data via web and FTP interfaces.",2012-11-27 +25245994,Identification of patients with high osteoporosis risk: analysis of FRAX and phalangeal ultrasonography in a female population in North-East Italy.,"

Aim

Osteoporosis is a worldwide health problem and bone fractures from osteoporosis are among the major causes of disability, with a great impact on the national health budgets. The aim of our study was to analyze the efficacy of FRAX algorithm and phalanger ultrasound to predict the risk of osteoporotic fractures, in order to identify a high risk population to examine with a second level diagnostic analysis.

Methods

The study population was composed of 1208 premenopausal, perimenopausal, and postmenopausal women, between 40 and 81 years. For each woman phalangeal QUS was performed and FRAX index was calculated. The FRAX index was evaluated according to standard plots available on web (http://www.shed.ac.uk/FRAX/index.htm).

Results

Analysing the correlation between women age and phalanger bone ultrasound values we know that all parameter decrease with increasing of age. We found a significant difference between FRAX index and the ultrasound parameters (P<0.05) to predict a major osteoporotic fracture, we did not find differences between age and ultrasound parameters. Furthermore, we show that after correction per age of the women the ultrasound parameters lose their significant correlation with major osteoporotic fractures. Finally, FRAX index showed a good AUC and in our population and a 10-year probability over 9.4% of major osteoporotic fractures had a good specificity (88%) and sensitivity (83%) to predict osteoporotic fractures.

Conclusion

Our data suggest that FRAX index alone could be used to address high risk women to more invasive tests but we need more data about ultrasound parameters.",2014-10-01 +25260589,FOAM (Functional Ontology Assignments for Metagenomes): a Hidden Markov Model (HMM) database with environmental focus.,"A new functional gene database, FOAM (Functional Ontology Assignments for Metagenomes), was developed to screen environmental metagenomic sequence datasets. FOAM provides a new functional ontology dedicated to classify gene functions relevant to environmental microorganisms based on Hidden Markov Models (HMMs). Sets of aligned protein sequences (i.e. 'profiles') were tailored to a large group of target KEGG Orthologs (KOs) from which HMMs were trained. The alignments were checked and curated to make them specific to the targeted KO. Within this process, sequence profiles were enriched with the most abundant sequences available to maximize the yield of accurate classifier models. An associated functional ontology was built to describe the functional groups and hierarchy. FOAM allows the user to select the target search space before HMM-based comparison steps and to easily organize the results into different functional categories and subcategories. FOAM is publicly available at http://portal.nersc.gov/project/m1317/FOAM/.",2014-09-26 +25257132,Dysregulation of cell cycle related genes and microRNAs distinguish the low- from high-risk of prostate cancer.,"

Background

Prostate cancer (PCa) is a biologically heterogeneous disease with considerable variation in clinical aggressiveness. In this study, bioinformatics was used to detect the patterns of gene expression alterations of PCa patients.

Methods

The gene expression profile GSE21034 and GSE21036 were downloaded from Gene Expression Omnibus (GEO) database. Significantly changed mRNA transcripts and microRNAs were identified between subtypes with favorable (cluster 2) and unfavorable (cluster 5) prognosis by two-side unequal variances t test. MicroRNAs and their potential target genes were identified by TargetScan and miRTarBase, respectively. Besides, the overlapped genes between the target genes of microRNAs and mRNA transcripts were assessed by Fisher' exact test (one side). The functional annotation was performed by DAVID, followed by construction of protein-protein interaction (PPI) network.

Results

Compared to cluster 2, 1556 up-regulated and 1288 down-regulated transcripts were identified in cluster 5. Total 28 microRNAs were up-regulated and 30 microRNAs were down-regulated in cluster 5. Besides, 12 microRNAs target transcripts were significantly overlapped with down-regulated transcripts in cluster 5 with none of them was found overlapped with up-regulated transcripts. Functional annotation showed that cell cycle was the most significant function. In the PPI network, BRCA1, CDK1, TK1 and TRAF2 were hub protein of signature genes in cluster 5, and TGFBR1, SMAD2 and SMAD4 were hub proteins of signature gnens in cluster 2.

Conclusions

Our findings raise the possibility that genes related with cell cycle and dysregulated miRNA at diagnosis might have clinical utility in distinguishing low- from high-risk PCa patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_156.",2014-09-26 +25257822,Associations between estrogen receptor-beta polymorphisms and endometriosis risk: a meta-analysis.,"

Background

Many epidemiological studies have suggested an association between estrogen receptor-beta (ER-β) polymorphisms with endometriosis risk. However, the results of these studies have been inconsistent. In the present study, we performed a meta-analysis to clarify the associations between the ER-β rs4986938 and rs1256049 polymorphisms and endometriosis risk.

Methods

Eligible publications were retrieved from the PubMed, ISI Web of Science, and several Chinese language databases. Pooled odds ratios (ORs) with 95% confidence intervals (CIs) were calculated using a random or fixed effect model.

Results

A total of eight studies (1100 cases/1485 controls) for the rs4986938 polymorphism and four studies (353 cases/450 controls) for the rs1256049 polymorphism were included in this meta-analysis. Regarding the rs4986938 polymorphism, no obvious associations were found for all genetic models when all studies were pooled into the meta-analysis. In the subgroup analyses by ethnicity, study sample size, endometriosis-associated infertility, and stage of endometriosis, a significantly increased risk was observed among mixed populations (dominant model, OR=2.03, 95% CI=1.56-2.64) and among cases with endometriosis-associated infertility (dominant model, OR=1.83, 95% CI=1.26-2.67). Regarding the rs1256049 polymorphism, no obvious associations were found for all genetic models in the overall population. Subgroup analyses by ethnicity and study sample size revealed that only one study of a mixed population with small sample size showed an increased risk of endometriosis. No publication bias was found in the present study.

Conclusions

The results of this meta-analysis suggest that the ER-β rs4986938 and rs1256049 polymorphisms may not be associated with endometriosis risk, while the observed increased risk of endometriosis-associated infertility may be due to bias by the inclusion of small-scale studies.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_184.",2014-09-26 +25261406,GPAC-genome presence/absence compiler: a web application to comparatively visualize multiple genome-level changes.,"Our understanding of genome-wide and comparative sequence information has been broadened considerably by the databases available from the University of California Santa Cruz (UCSC) Genome Bioinformatics Department. In particular, the identification and visualization of genomic sequences, present in some species but absent in others, led to fundamental insights into gene and genome evolution. However, the UCSC tools currently enable one to visualize orthologous genomic loci for a range of species in only a single locus. For large-scale comparative analyses of such presence/absence patterns a multilocus view would be more desirable. Such a tool would enable us to compare thousands of relevant loci simultaneously and to resolve many different questions about, for example, phylogeny, specific aspects of genome and gene evolution, such as the gain or loss of exons and introns, the emergence of novel transposed elements, nonprotein-coding RNAs, and viral genomic particles. Here, we present the first tool to facilitate the parallel analysis of thousands of genomic loci for cross-species presence/absence patterns based on multiway genome alignments. This genome presence/absence compiler uses annotated or other compilations of coordinates of genomic locations and compiles all presence/absence patterns in a flexible, color-coded table linked to the individual UCSC Genome Browser alignments. We provide examples of the versatile information content of such a screening system especially for 7SL-derived transposed elements, nuclear mitochondrial DNA, DNA transposons, and miRNAs in primates (http://www.bioinformatics.uni-muenster.de/tools/gpac, last accessed October 1, 2014).",2014-09-25 +25146089,A web tool for age-period-cohort analysis of cancer incidence and mortality rates.,"

Background

Age-period-cohort (APC) analysis can inform registry-based studies of cancer incidence and mortality, but concerns about statistical identifiability and interpretability, as well as the learning curves of statistical software packages, have limited its uptake.

Methods

We implemented a panel of easy-to-interpret estimable APC functions and corresponding Wald tests in R code that can be accessed through a user-friendly Web tool.

Results

Input data for the Web tool consist of age-specific numbers of events and person-years over time, in the form of a rate matrix of paired columns. Output functions include model-based estimators of cross-sectional and longitudinal age-specific rates, period and cohort rate ratios that incorporate the overall annual percentage change (net drift), and estimators of the age-specific annual percentage change (local drifts). The Web tool includes built-in examples for teaching and demonstration. User data can be input from a Microsoft Excel worksheet or by uploading a comma-separated-value file. Model outputs can be saved in a variety of formats, including R and Excel.

Conclusions

APC methodology can now be carried out through a freely available user-friendly Web tool. The tool can be accessed at http://analysistools.nci.nih.gov/apc/.

Impact

The Web tool can help cancer surveillance researchers make important discoveries about emerging cancer trends and patterns.",2014-08-21 +26355508,Reliable and Fast Estimation of Recombination Rates by Convergence Diagnosis and Parallel Markov Chain Monte Carlo.,"Genetic recombination is an essential event during the process of meiosis resulting in an exchange of segments between paired chromosomes. Estimating recombination rate is crucial for understanding the process of recombination. Experimental methods are normally difficult and limited to small scale estimations. Thus statistical methods using population genetics data are important for large-scale analysis. LDhat is an extensively used statistical method using rjMCMC algorithm to predict recombination rates. Due to the complexity of rjMCMC scheme, LDhat may take a long time for large SNP data sets. In addition, rjMCMC parameters should be manually defined in the original program which directly impact results. To address these issues, we designed an improved algorithm based on LDhat implementing MCMC convergence diagnostic algorithms to automatically predict values of parameters and monitor the mixing process. Then parallel computation methods were employed to further accelerate the new program. The new algorithms have been tested on ten samples from HapMap phase 2 data set. The results were compared with previous code and showed nearly identical output. However, our new methods achieved significant acceleration proving that they are more efficient and reliable for the estimation of recombination rates. The stand-alone package is freely available for download http://www.ntu.edu.sg/home/zhengjie/software/CPLDhat.",2014-01-01 +26019179,sRNAtoolbox: an integrated collection of small RNA research tools.,"Small RNA research is a rapidly growing field. Apart from microRNAs, which are important regulators of gene expression, other types of functional small RNA molecules have been reported in animals and plants. MicroRNAs are important in host-microbe interactions and parasite microRNAs might modulate the innate immunity of the host. Furthermore, small RNAs can be detected in bodily fluids making them attractive non-invasive biomarker candidates. Given the general broad interest in small RNAs, and in particular microRNAs, a large number of bioinformatics aided analysis types are needed by the scientific community. To facilitate integrated sRNA research, we developed sRNAtoolbox, a set of independent but interconnected tools for expression profiling from high-throughput sequencing data, consensus differential expression, target gene prediction, visual exploration in a genome context as a function of read length, gene list analysis and blast search of unmapped reads. All tools can be used independently or for the exploration and downstream analysis of sRNAbench results. Workflows like the prediction of consensus target genes of parasite microRNAs in the host followed by the detection of enriched pathways can be easily established. The web-interface interconnecting all these tools is available at http://bioinfo5.ugr.es/srnatoolbox.",2015-05-27 +24728858,A change-point model for identifying 3'UTR switching by next-generation RNA sequencing.,"

Motivation

Next-generation RNA sequencing offers an opportunity to investigate transcriptome in an unprecedented scale. Recent studies have revealed widespread alternative polyadenylation (polyA) in eukaryotes, leading to various mRNA isoforms differing in their 3' untranslated regions (3'UTR), through which, the stability, localization and translation of mRNA can be regulated. However, very few, if any, methods and tools are available for directly analyzing this special alternative RNA processing event. Conventional methods rely on annotation of polyA sites; yet, such knowledge remains incomplete, and identification of polyA sites is still challenging. The goal of this article is to develop methods for detecting 3'UTR switching without any prior knowledge of polyA annotations.

Results

We propose a change-point model based on a likelihood ratio test for detecting 3'UTR switching. We develop a directional testing procedure for identifying dramatic shortening or lengthening events in 3'UTR, while controlling mixed directional false discovery rate at a nominal level. To our knowledge, this is the first approach to analyze 3'UTR switching directly without relying on any polyA annotations. Simulation studies and applications to two real datasets reveal that our proposed method is powerful, accurate and feasible for the analysis of next-generation RNA sequencing data.

Conclusions

The proposed method will fill a void among alternative RNA processing analysis tools for transcriptome studies. It can help to obtain additional insights from RNA sequencing data by understanding gene regulation mechanisms through the analysis of 3'UTR switching.

Availability and implementation

The software is implemented in Java and can be freely downloaded from http://utr.sourceforge.net/.

Contact

zhiwei@njit.edu or hongzhe@mail.med.upenn.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-11 +26406244,msCentipede: Modeling Heterogeneity across Genomic Sites and Replicates Improves Accuracy in the Inference of Transcription Factor Binding.,"Understanding global gene regulation depends critically on accurate annotation of regulatory elements that are functional in a given cell type. CENTIPEDE, a powerful, probabilistic framework for identifying transcription factor binding sites from tissue-specific DNase I cleavage patterns and genomic sequence content, leverages the hypersensitivity of factor-bound chromatin and the information in the DNase I spatial cleavage profile characteristic of each DNA binding protein to accurately infer functional factor binding sites. However, the model for the spatial profile in this framework fails to account for the substantial variation in the DNase I cleavage profiles across different binding sites. Neither does it account for variation in the profiles at the same binding site across multiple replicate DNase I experiments, which are increasingly available. In this work, we introduce new methods, based on multi-scale models for inhomogeneous Poisson processes, to account for such variation in DNase I cleavage patterns both within and across binding sites. These models account for the spatial structure in the heterogeneity in DNase I cleavage patterns for each factor. Using DNase-seq measurements assayed in a lymphoblastoid cell line, we demonstrate the improved performance of this model for several transcription factors by comparing against the Chip-seq peaks for those factors. Finally, we explore the effects of DNase I sequence bias on inference of factor binding using a simple extension to our framework that allows for a more flexible background model. The proposed model can also be easily applied to paired-end ATAC-seq and DNase-seq data. msCentipede, a Python implementation of our algorithm, is available at http://rajanil.github.io/msCentipede.",2015-09-25 +24084870,Exploring TCGA Pan-Cancer data at the UCSC Cancer Genomics Browser.,"The UCSC Cancer Genomics Browser (https://genome-cancer.ucsc.edu) offers interactive visualization and exploration of TCGA genomic, phenotypic, and clinical data, as produced by the Cancer Genome Atlas Research Network. Researchers can explore the impact of genomic alterations on phenotypes by visualizing gene and protein expression, copy number, DNA methylation, somatic mutation and pathway inference data alongside clinical features, Pan-Cancer subtype classifications and genomic biomarkers. Integrated Kaplan-Meier survival analysis helps investigators to assess survival stratification by any of the information.",2013-10-02 +23966761,Improved Sparse Multi-Class SVM and Its Application for Gene Selection in Cancer Classification.,"

Background

Microarray techniques provide promising tools for cancer diagnosis using gene expression profiles. However, molecular diagnosis based on high-throughput platforms presents great challenges due to the overwhelming number of variables versus the small sample size and the complex nature of multi-type tumors. Support vector machines (SVMs) have shown superior performance in cancer classification due to their ability to handle high dimensional low sample size data. The multi-class SVM algorithm of Crammer and Singer provides a natural framework for multi-class learning. Despite its effective performance, the procedure utilizes all variables without selection. In this paper, we propose to improve the procedure by imposing shrinkage penalties in learning to enforce solution sparsity.

Results

The original multi-class SVM of Crammer and Singer is effective for multi-class classification but does not conduct variable selection. We improved the method by introducing soft-thresholding type penalties to incorporate variable selection into multi-class classification for high dimensional data. The new methods were applied to simulated data and two cancer gene expression data sets. The results demonstrate that the new methods can select a small number of genes for building accurate multi-class classification rules. Furthermore, the important genes selected by the methods overlap significantly, suggesting general agreement among different variable selection schemes.

Conclusions

High accuracy and sparsity make the new methods attractive for cancer diagnostics with gene expression data and defining targets of therapeutic intervention.

Availability

The source MATLAB code are available from http://math.arizona.edu/~hzhang/software.html.",2013-08-04 +25862487,The Budapest Reference Connectome Server v2.0.,"The connectomes of different human brains are pairwise distinct: we cannot talk about an abstract ""graph of the brain"". Two typical connectomes, however, have quite a few common graph edges that may describe the same connections between the same cortical areas. The Budapest Reference Connectome Server v2.0 generates the common edges of the connectomes of 96 distinct cortexes, each with 1015 vertices, computed from 96 MRI data sets of the Human Connectome Project. The user may set numerous parameters for the identification and filtering of common edges, and the graphs are downloadable in both csv and GraphML formats; both formats carry the anatomical annotations of the vertices, generated by the FreeSurfer program. The resulting consensus graph is also automatically visualized in a 3D rotating brain model on the website. The consensus graphs, generated with various parameter settings, can be used as reference connectomes based on different, independent MRI images, therefore they may serve as reduced-error, low-noise, robust graph representations of the human brain. The webserver is available at http://connectome.pitgroup.org.",2015-04-07 +22911241,Germline transgenesis and insertional mutagenesis in Schistosoma mansoni mediated by murine leukemia virus.,"Functional studies will facilitate characterization of role and essentiality of newly available genome sequences of the human schistosomes, Schistosoma mansoni, S. japonicum and S. haematobium. To develop transgenesis as a functional approach for these pathogens, we previously demonstrated that pseudotyped murine leukemia virus (MLV) can transduce schistosomes leading to chromosomal integration of reporter transgenes and short hairpin RNA cassettes. Here we investigated vertical transmission of transgenes through the developmental cycle of S. mansoni after introducing transgenes into eggs. Although MLV infection of schistosome eggs from mouse livers was efficient in terms of snail infectivity, >10-fold higher transgene copy numbers were detected in cercariae derived from in vitro laid eggs (IVLE). After infecting snails with miracidia from eggs transduced by MLV, sequencing of genomic DNA from cercariae released from the snails also revealed the presence of transgenes, demonstrating that transgenes had been transmitted through the asexual developmental cycle, and thereby confirming germline transgenesis. High-throughput sequencing of genomic DNA from schistosome populations exposed to MLV mapped widespread and random insertion of transgenes throughout the genome, along each of the autosomes and sex chromosomes, validating the utility of this approach for insertional mutagenesis. In addition, the germline-transmitted transgene encoding neomycin phosphotransferase rescued cultured schistosomules from toxicity of the antibiotic G418, and PCR analysis of eggs resulting from sexual reproduction of the transgenic worms in mice confirmed that retroviral transgenes were transmitted to the next (F1) generation. These findings provide the first description of wide-scale, random insertional mutagenesis of chromosomes and of germline transmission of a transgene in schistosomes. Transgenic lines of schistosomes expressing antibiotic resistance could advance functional genomics for these significant human pathogens. DATABASE ACCESSION: Sequence data from this study have been submitted to the European Nucleotide Archive (http://www.ebi.ac.uk/embl) under accession number ERP000379.",2012-07-26 +24522603,Uncertainties in transpiration estimates.,"arising from S. Jasechko et al. Nature 496, 347-350 (2013)10.1038/nature11983How best to assess the respective importance of plant transpiration over evaporation from open waters, soils and short-term storage such as tree canopies and understories (interception) has long been debated. On the basis of data from lake catchments, Jasechko et al. conclude that transpiration accounts for 80-90% of total land evaporation globally (Fig. 1a). However, another choice of input data, together with more conservative accounting of the related uncertainties, reduces and widens the transpiration ratio estimation to 35-80%. Hence, climate models do not necessarily conflict with observations, but more measurements on the catchment scale are needed to reduce the uncertainty range. There is a Reply to this Brief Communications Arising by Jasechko, S. et al. Nature 506, http://dx.doi.org/10.1038/nature12926 (2014).",2014-02-01 +23525069,Robust data-driven incorporation of prior knowledge into the inference of dynamic regulatory networks.,"

Motivation

Inferring global regulatory networks (GRNs) from genome-wide data is a computational challenge central to the field of systems biology. Although the primary data currently used to infer GRNs consist of gene expression and proteomics measurements, there is a growing abundance of alternate data types that can reveal regulatory interactions, e.g. ChIP-Chip, literature-derived interactions, protein-protein interactions. GRN inference requires the development of integrative methods capable of using these alternate data as priors on the GRN structure. Each source of structure priors has its unique biases and inherent potential errors; thus, GRN methods using these data must be robust to noisy inputs.

Results

We developed two methods for incorporating structure priors into GRN inference. Both methods [Modified Elastic Net (MEN) and Bayesian Best Subset Regression (BBSR)] extend the previously described Inferelator framework, enabling the use of prior information. We test our methods on one synthetic and two bacterial datasets, and show that both MEN and BBSR infer accurate GRNs even when the structure prior used has significant amounts of error (>90% erroneous interactions). We find that BBSR outperforms MEN at inferring GRNs from expression data and noisy structure priors.

Availability and implementation

Code, datasets and networks presented in this article are available at http://bonneaulab.bio.nyu.edu/software.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-21 +24497942,Integrative gene network construction to analyze cancer recurrence using semi-supervised learning.,"

Background

The prognosis of cancer recurrence is an important research area in bioinformatics and is challenging due to the small sample sizes compared to the vast number of genes. There have been several attempts to predict cancer recurrence. Most studies employed a supervised approach, which uses only a few labeled samples. Semi-supervised learning can be a great alternative to solve this problem. There have been few attempts based on manifold assumptions to reveal the detailed roles of identified cancer genes in recurrence.

Results

In order to predict cancer recurrence, we proposed a novel semi-supervised learning algorithm based on a graph regularization approach. We transformed the gene expression data into a graph structure for semi-supervised learning and integrated protein interaction data with the gene expression data to select functionally-related gene pairs. Then, we predicted the recurrence of cancer by applying a regularization approach to the constructed graph containing both labeled and unlabeled nodes.

Conclusions

The average improvement rate of accuracy for three different cancer datasets was 24.9% compared to existing supervised and semi-supervised methods. We performed functional enrichment on the gene networks used for learning. We identified that those gene networks are significantly associated with cancer-recurrence-related biological functions. Our algorithm was developed with standard C++ and is available in Linux and MS Windows formats in the STL library. The executable program is freely available at: http://embio.yonsei.ac.kr/~Park/ssl.php.",2014-01-31 +24007178,Integration of advanced 3D SPECT modeling into the open-source STIR framework.,"

Purpose

The Software for Tomographic Image Reconstruction (STIR, http://stir.sourceforge.net) package is an open source object-oriented library implemented in C++. Although its modular design is suitable for reconstructing data from several modalities, it currently only supports Positron Emission Tomography (PET) data. In this work, the authors present results for Single Photon Emission Computed Tomography (SPECT) imaging.

Methods

This was achieved by the complete integration of a 3D SPECT system matrix modeling library into STIR.

Results

The authors demonstrate the flexibility of the combined software by reconstructing simulated and acquired projections from three different scanners with different iterative algorithms of STIR.

Conclusions

The extension of the open source STIR project with advanced SPECT modeling will enable the research community to study the performance of several algorithms on SPECT data, and potentially implement new algorithms by expanding the existing framework.",2013-09-01 +26002592,Predicting success or failure of brace treatment for adolescents with idiopathic scoliosis.,"Adolescent idiopathic scoliosis (AIS) is a three-dimensional spinal deformity. Brace treatment is a common non-surgical treatment, intended to prevent progression (worsening) of the condition during adolescence. Estimating a braced patient's risk of progression is an essential part of planning treatment, so method for predicting this risk would be a useful decision support tool for practitioners. This work attempts to discover whether failure of brace treatment (progression) can be predicted at the start of treatment. Records were obtained for 62 AIS patients who had completed brace treatment. Subjects were labeled as ""progressive"" if their condition had progressed despite brace treatment and ""non-progressive"" otherwise. Wrapper-based feature selection selected two useful predictor variables from a list of 14 clinical measurements taken from the records. A logistic regression model was trained to classify patients as ""progressive"" or ""non-progressive"" using these two variables. The logistic regression model's simplicity and interpretability should facilitate its clinical acceptance. The model was tested on data from an additional 28 patients and found to be 75 % accurate. This accuracy is sufficient to make the predictions clinically useful. It can be used online: http://www.ece.ualberta.ca/~dchalmer/SimpleBracePredictor.html .",2015-05-23 +26275894,Protein contact prediction by integrating joint evolutionary coupling analysis and supervised learning.,"

Motivation

Protein contact prediction is important for protein structure and functional study. Both evolutionary coupling (EC) analysis and supervised machine learning methods have been developed, making use of different information sources. However, contact prediction is still challenging especially for proteins without a large number of sequence homologs.

Results

This article presents a group graphical lasso (GGL) method for contact prediction that integrates joint multi-family EC analysis and supervised learning to improve accuracy on proteins without many sequence homologs. Different from existing single-family EC analysis that uses residue coevolution information in only the target protein family, our joint EC analysis uses residue coevolution in both the target family and its related families, which may have divergent sequences but similar folds. To implement this, we model a set of related protein families using Gaussian graphical models and then coestimate their parameters by maximum-likelihood, subject to the constraint that these parameters shall be similar to some degree. Our GGL method can also integrate supervised learning methods to further improve accuracy. Experiments show that our method outperforms existing methods on proteins without thousands of sequence homologs, and that our method performs better on both conserved and family-specific contacts.

Availability and implementation

See http://raptorx.uchicago.edu/ContactMap/ for a web server implementing the method.

Contact

j3xu@ttic.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-14 +27752427,Possibilities of using the German Federal States' permanent soil monitoring program for the monitoring of potential effects of genetically modified organisms (GMO).,"

Background

In the Directive 2001/18/EC on the deliberate release of genetically modified organisms (GMO) into the environment, a monitoring of potential risks is prescribed after their deliberate release or placing on the market. Experience and data of already existing monitoring networks should be included. The present paper summarizes the major findings of a project funded by the Federal Agency for Nature Conservation (Nutzungsmöglichkeiten der Boden-Dauerbeobachtung der Länder für das Monitoring der Umweltwirkungen gentechnisch veränderter Pflanzen. BfN Skripten, Bonn-Bad Godesberg 369, 2014). The full report in german language can be accessed on http://www.bfn.de and is available as Additional file 1. The aim of the project was to check if it is possible to use the German permanent soil monitoring program (PSM) for the monitoring of GMO. Soil organism communities are highly diverse and relevant with respect to the sustainability of soil functions. They are exposed to GMO material directly by feeding or indirectly through food chain interactions. Other impacts are possible due to their close association to soil particles.

Results

The PSM program can be considered as representative with regard to different soil types and ecoregions in Germany, but not for all habitat types relevant for soil organisms. Nevertheless, it is suitable as a basic grid for monitoring the potential effects of GMO on soil invertebrates.

Conclusions

PSM sites should be used to derive reference values, i.e. range of abundance and presence of different relevant species of soil organisms. Based on these references, it is possible to derive threshold values to define the limit of acceptable change or impact. Therefore, a minimum set of sites and minimum set of standardized methods are needed, i.e. characterization of each site, sampling of selected soil organism groups, adequate adaptation of methods for the purpose of monitoring of potential effects of GMO. Finally, and probably most demanding, it is needed to develop a harmonized evaluation concept.",2015-10-23 +23935581,HDDM: Hierarchical Bayesian estimation of the Drift-Diffusion Model in Python.,"The diffusion model is a commonly used tool to infer latent psychological processes underlying decision-making, and to link them to neural mechanisms based on response times. Although efficient open source software has been made available to quantitatively fit the model to data, current estimation methods require an abundance of response time measurements to recover meaningful parameters, and only provide point estimates of each parameter. In contrast, hierarchical Bayesian parameter estimation methods are useful for enhancing statistical power, allowing for simultaneous estimation of individual subject parameters and the group distribution that they are drawn from, while also providing measures of uncertainty in these parameters in the posterior distribution. Here, we present a novel Python-based toolbox called HDDM (hierarchical drift diffusion model), which allows fast and flexible estimation of the the drift-diffusion model and the related linear ballistic accumulator model. HDDM requires fewer data per subject/condition than non-hierarchical methods, allows for full Bayesian data analysis, and can handle outliers in the data. Finally, HDDM supports the estimation of how trial-by-trial measurements (e.g., fMRI) influence decision-making parameters. This paper will first describe the theoretical background of the drift diffusion model and Bayesian inference. We then illustrate usage of the toolbox on a real-world data set from our lab. Finally, parameter recovery studies show that HDDM beats alternative fitting methods like the χ(2)-quantile method as well as maximum likelihood estimation. The software and documentation can be downloaded at: http://ski.clps.brown.edu/hddm_docs/",2013-08-02 +26063651,BitMapper: an efficient all-mapper based on bit-vector computing.,"

Background

As the next-generation sequencing (NGS) technologies producing hundreds of millions of reads every day, a tremendous computational challenge is to map NGS reads to a given reference genome efficiently. However, existing methods of all-mappers, which aim at finding all mapping locations of each read, are very time consuming. The majority of existing all-mappers consist of 2 main parts, filtration and verification. This work significantly reduces verification time, which is the dominant part of the running time.

Results

An efficient all-mapper, BitMapper, is developed based on a new vectorized bit-vector algorithm, which simultaneously calculates the edit distance of one read to multiple locations in a given reference genome. Experimental results on both simulated and real data sets show that BitMapper is from several times to an order of magnitude faster than the current state-of-the-art all-mappers, while achieving higher sensitivity, i.e., better quality solutions.

Conclusions

We present BitMapper, which is designed to return all mapping locations of raw reads containing indels as well as mismatches. BitMapper is implemented in C under a GPL license. Binaries are freely available at http://home.ustc.edu.cn/%7Echhy.",2015-06-11 +25229694,Search for β2 adrenergic receptor ligands by virtual screening via grid computing and investigation of binding modes by docking and molecular dynamics simulations.,"We designed a program called MolGridCal that can be used to screen small molecule database in grid computing on basis of JPPF grid environment. Based on MolGridCal program, we proposed an integrated strategy for virtual screening and binding mode investigation by combining molecular docking, molecular dynamics (MD) simulations and free energy calculations. To test the effectiveness of MolGridCal, we screened potential ligands for β2 adrenergic receptor (β2AR) from a database containing 50,000 small molecules. MolGridCal can not only send tasks to the grid server automatically, but also can distribute tasks using the screensaver function. As for the results of virtual screening, the known agonist BI-167107 of β2AR is ranked among the top 2% of the screened candidates, indicating MolGridCal program can give reasonable results. To further study the binding mode and refine the results of MolGridCal, more accurate docking and scoring methods are used to estimate the binding affinity for the top three molecules (agonist BI-167107, neutral antagonist alprenolol and inverse agonist ICI 118,551). The results indicate agonist BI-167107 has the best binding affinity. MD simulation and free energy calculation are employed to investigate the dynamic interaction mechanism between the ligands and β2AR. The results show that the agonist BI-167107 also has the lowest binding free energy. This study can provide a new way to perform virtual screening effectively through integrating molecular docking based on grid computing, MD simulations and free energy calculations. The source codes of MolGridCal are freely available at http://molgridcal.codeplex.com.",2014-09-17 +23852772,Identifying protein complexes from heterogeneous biological data.,"With the increasing availability of diverse biological information for proteins, integration of heterogeneous data becomes more useful for many problems in proteomics, such as annotating protein functions, predicting novel protein-protein interactions and so on. In this paper, we present an integrative approach called InteHC (Integrative Hierarchical Clustering) to identify protein complexes from multiple data sources. Although integrating multiple sources could effectively improve the coverage of current insufficient protein interactome (the false negative issue), it could also introduce potential false-positive interactions that could hurt the performance of protein complex prediction. Our proposed InteHC method can effectively address these issues to facilitate accurate protein complex prediction and it is summarized into the following three steps. First, for each individual source/feature, InteHC computes the matrices to store the affinity scores between a protein pair that indicate their propensity to interact or co-complex relationship. Second, InteHC computes a final score matrix, which is the weighted sum of affinity scores from individual sources. In particular, the weights indicating the reliability of individual sources are learned from a supervised model (i.e., a linear ranking SVM). Finally, a hierarchical clustering algorithm is performed on the final score matrix to generate clusters as predicted protein complexes. In our experiments, we compared the results collected by our hierarchical clustering on each individual feature with those predicted by InteHC on the combined matrix. We observed that integration of heterogeneous data significantly benefits the identification of protein complexes. Moreover, a comprehensive comparison demonstrates that InteHC performs much better than 14 state-of-the-art approaches. All the experimental data and results can be downloaded from http://www.ntu.edu.sg/home/zhengjie/data/InteHC.",2013-08-23 +26484231,Use of multiple time points to model parotid differentiation.,"In order to understand the process of terminal differentiation in salivary acinar cells, mRNA and microRNA expression was measured across the month long process of differentiation in the parotid gland of the rat. Acinar cells were isolated at either nine time points (mRNA) or four time points (microRNA) in triplicate using laser capture microdissection (LCM). One of the values of this dataset comes from the high quality RNA (RIN > 7) that was used in this study, which can be prohibitively difficult to obtain from such an RNaseI-rich tissue. Global mRNA expression was measured by rat genome microarray hybridization (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE65586), and expression of microRNAs by qPCR array (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE65324). Comparing expression at different ages, 2656 mRNAs and 64 microRNAs were identified as differentially expressed. Because mRNA expression was sampled at many time points, clustering and regression analysis were able to identify dynamic expression patterns that had not been implicated in acinar differentiation before. Integration of the two datasets allowed the identification of microRNA target genes, and a gene regulatory network. Bioinformatics R code and additional details of experimental methods and data analysis are provided.",2015-05-20 +23742983,REDItools: high-throughput RNA editing detection made easy.,"

Summary

The reliable detection of RNA editing sites from massive sequencing data remains challenging and, although several methodologies have been proposed, no computational tools have been released to date. Here, we introduce REDItools a suite of python scripts to perform high-throughput investigation of RNA editing using next-generation sequencing data.

Availability and implementation

REDItools are in python programming language and freely available at http://code.google.com/p/reditools/.

Contact

ernesto.picardi@uniba.it or graziano.pesole@uniba.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-06-05 +24563424,Combined Analysis of Phenotypic and Target-Based Screening in Assay Networks.,"Small-molecule screens are an integral part of drug discovery. Public domain data in PubChem alone represent more than 158 million measurements, 1.2 million molecules, and 4300 assays. We conducted a global analysis of these data, building a network of assays and connecting the assays if they shared nonpromiscuous active molecules. This network spans both phenotypic and target-based screens, recapitulates known biology, and identifies new polypharmacology. Phenotypic screens are extremely important for drug discovery, contributing to the discovery of a large proportion of new drugs. Connections between phenotypic and biochemical, target-based screens can suggest strategies for repurposing both small-molecule and biologic drugs. For example, a screen for molecules that prevent cell death from a mutated version of superoxide-dismutase is linked with ALOX15. This connection suggests a therapeutic role for ALOX15 inhibitors in amyotrophic lateral sclerosis. An interactive version of the network is available online (http://swami.wustl.edu/flow/assay_network.html).",2014-02-21 +24316576,Ensembl 2014.,"Ensembl (http://www.ensembl.org) creates tools and data resources to facilitate genomic analysis in chordate species with an emphasis on human, major vertebrate model organisms and farm animals. Over the past year we have increased the number of species that we support to 77 and expanded our genome browser with a new scrollable overview and improved variation and phenotype views. We also report updates to our core datasets and improvements to our gene homology relationships from the addition of new species. Our REST service has been extended with additional support for comparative genomics and ontology information. Finally, we provide updated information about our methods for data access and resources for user training.",2013-12-06 +24639448,Cohort Profile: The Malawi Longitudinal Study of Families and Health (MLSFH).,"The Malawi Longitudinal Study of Families and Health (MLSFH) is one of very few long-standing, publicly available longitudinal cohort studies in a sub-Saharan African (SSA) context. It provides a rare record of more than a decade of demographic, socioeconomic and health conditions in one of the world's poorest countries. The MLSFH was initially established in 1998 to study social network influences on fertility behaviours and HIV risk perceptions, and over time the focus of the study expanded to include health, sexual behaviours, intergenerational relations and family/household dynamics. The currently available data include MLSFH rounds collected in 1998, 2001, 2004, 2006, 2008, 2010 and 2012 for up to 4000 individuals, providing information about socioeconomic and demographic characteristics, sexual behaviours, marriage, household/family structure, risk perceptions, social networks and social capital, intergenerational relations, HIV/AIDS and other dimensions of health. The MLSFH public use data can be requested on the project website: http://www.malawi.pop.upenn.edu/.",2014-03-16 +25421113,Effects of tissue decalcification on the quantification of breast cancer biomarkers by digital image analysis.,"

Background

Recent technical advances in digital image capture and analysis greatly improve the measurement of protein expression in tissues. Breast cancer biomarkers provide a unique opportunity to utilize digital image analysis to evaluate sources of variability that are caused by the tissue preparation, in particular the decalcification treatment associated with the analysis of bone metastatic breast cancer, and to develop methods for comparison of digital data and categorical scores rendered by pathologists.

Methods

Tissues were prospectively decalcified for up to 24 hours and stained by immunohistochemistry (IHC) for ER, PR, Ki-67 and p53. HER2 positive breast cancer sections were retrieved from the pathology archives, and annotated with the categorical HER2 expression scores from the pathology reports. Digital images were captured with Leica and Aperio slide scanners. The conversion of the digital to categorical scores was accomplished with a Gaussian mixture model and tested for accuracy by comparison to clinical scores.

Results

We observe significant effects of the decalcification treatment on common breast cancer biomarkers that are used in the clinic. ER, PR and p53 staining intensities decreased 15 - 20%, whereas Ki-67 decreased > 90% during the first 6 hrs of treatment and stabilized thereafter. In comparison with the Aperio images, pixel intensities generated by the Leica system are lower. A novel statistical model for conversion of digital to categorical scores provides a systematic approach for conversion of nuclear and membrane stains and demonstrated a high concordance with clinical scores.

Conclusion

Digital image analysis greatly improves the quantification of protein expression in human tissues. Decalcification affects the accuracy of immunohistochemical staining results and cannot be reversed by image analysis. Measurement data obtained on a continuous scoring scale can be converted to categorical scores for comparison with categorical dataset that are generated by pathologists.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_213.",2014-11-25 +26859738,Methods to Estimate Acclimatization to Urban Heat Island Effects on Heat- and Cold-Related Mortality.,"

Background

Investigators have examined whether heat mortality risk is increased in neighborhoods subject to the urban heat island (UHI) effect but have not identified degrees of difference in susceptibility to heat and cold between cool and hot areas, which we call acclimatization to the UHI.

Objectives

We developed methods to examine and quantify the degree of acclimatization to heat- and cold-related mortality in relation to UHI anomalies and applied these methods to London, UK.

Methods

Case-crossover analyses were undertaken on 1993-2006 mortality data from London UHI decile groups defined by anomalies from the London average of modeled air temperature at a 1-km grid resolution. We estimated how UHI anomalies modified excess mortality on cold and hot days for London overall and displaced a fixed-shape temperature-mortality function (""shifted spline"" model). We also compared the observed associations with those expected under no or full acclimatization to the UHI.

Results

The relative risk of death on hot versus normal days differed very little across UHI decile groups. A 1°C UHI anomaly multiplied the risk of heat death by 1.004 (95% CI: 0.950, 1.061) (interaction rate ratio) compared with the expected value of 1.070 (1.057, 1.082) if there were no acclimatization. The corresponding UHI interaction for cold was 1.020 (0.979, 1.063) versus 1.030 (1.026, 1.034) (actual versus expected under no acclimatization, respectively). Fitted splines for heat shifted little across UHI decile groups, again suggesting acclimatization. For cold, the splines shifted somewhat in the direction of no acclimatization, but did not exclude acclimatization.

Conclusions

We have proposed two analytical methods for estimating the degree of acclimatization to the heat- and cold-related mortality burdens associated with UHIs. The results for London suggest relatively complete acclimatization to the UHI effect on summer heat-related mortality, but less clear evidence for cold-related mortality.

Citation

Milojevic A, Armstrong BG, Gasparrini A, Bohnenstengel SI, Barratt B, Wilkinson P. 2016. Methods to estimate acclimatization to urban heat island effects on heat- and cold-related mortality. Environ Health Perspect 124:1016-1022; http://dx.doi.org/10.1289/ehp.1510109.",2016-02-09 +24997477,Addition of MR imaging features and genetic biomarkers strengthens glioblastoma survival prediction in TCGA patients.,"

Purpose

The purpose of our study was to assess whether a model combining clinical factors, MR imaging features, and genomics would better predict overall survival of patients with glioblastoma (GBM) than either individual data type.

Methods

The study was conducted leveraging The Cancer Genome Atlas (TCGA) effort supported by the National Institutes of Health. Six neuroradiologists reviewed MRI images from The Cancer Imaging Archive (http://cancerimagingarchive.net) of 102 GBM patients using the VASARI scoring system. The patients' clinical and genetic data were obtained from the TCGA website (http://www.cancergenome.nih.gov/). Patient outcome was measured in terms of overall survival time. The association between different categories of biomarkers and survival was evaluated using Cox analysis.

Results

The features that were significantly associated with survival were: (1) clinical factors: chemotherapy; (2) imaging: proportion of tumor contrast enhancement on MRI; and (3) genomics: HRAS copy number variation. The combination of these three biomarkers resulted in an incremental increase in the strength of prediction of survival, with the model that included clinical, imaging, and genetic variables having the highest predictive accuracy (area under the curve 0.679±0.068, Akaike's information criterion 566.7, P<0.001).

Conclusion

A combination of clinical factors, imaging features, and HRAS copy number variation best predicts survival of patients with GBM.",2014-07-02 +25835733,The carbon tetrachloride model in mice.,"Recently, the need for more standardized operation procedures in experimental liver fibrosis research was suggested due to dramatic changes in European animal welfare rules. Here, we present a short series of standard operation procedures (SOPs) summarizing the most relevant and widely accepted experimental models for the induction of liver injury leading to liver fibrosis. The described procedures are based on the long-term experience of the Collaborative Research Centre 'Organ Fibrosis: From Mechanisms of Injury to Modulation of Disease' (http://www.sfbtrr57.rwth-aachen.de/), which is supported by the German Research Foundation (SFB/TRR57). These SOPs will help to improve standardization of fibrosis models and to increase the comparability of data between different laboratories with the aim of reducing animal experimentation according to the principle that was proposed in 1959 by Russell and Burch as an ethical framework for conducting scientific experiments with animals, namely the replacement, refinement and reduction (3R) principle. In the first section we focus on the carbon tetrachloride (CCl4) model in mice, which is the toxic model of liver fibrosis induction most commonly used worldwide.",2015-04-01 +23522030,Cloud-based solution to identify statistically significant MS peaks differentiating sample categories.,"

Background

Mass spectrometry (MS) has evolved to become the primary high throughput tool for proteomics based biomarker discovery. Until now, multiple challenges in protein MS data analysis remain: large-scale and complex data set management; MS peak identification, indexing; and high dimensional peak differential analysis with the concurrent statistical tests based false discovery rate (FDR). ""Turnkey"" solutions are needed for biomarker investigations to rapidly process MS data sets to identify statistically significant peaks for subsequent validation.

Findings

Here we present an efficient and effective solution, which provides experimental biologists easy access to ""cloud"" computing capabilities to analyze MS data. The web portal can be accessed at http://transmed.stanford.edu/ssa/.

Conclusions

Presented web application supplies large scale MS data online uploading and analysis with a simple user interface. This bioinformatic tool will facilitate the discovery of the potential protein biomarkers using MS.",2013-03-23 +26130576,Estimating beta diversity for under-sampled communities using the variably weighted Odum dissimilarity index and OTUshuff.,"

Motivation

In profiling the composition and structure of complex microbial communities via high throughput amplicon sequencing, a very low proportion of community members are typically sampled. As a result of this incomplete sampling, estimates of dissimilarity between communities are often inflated, an issue we term pseudo β-diversity.

Results

We present a set of tools to identify and correct for the presence of pseudo β-diversity in contrasts between microbial communities. The variably weighted Odum dissimilarity (DwOdum) allows for down-weighting the influence of either abundant or rare taxa in calculating a measure of similarity between two communities. We show that down-weighting the influence of rare taxa can be used to minimize pseudo β-diversity arising from incomplete sampling. Down-weighting the influence of abundant taxa can increase the sensitivity of hypothesis testing. OTUshuff is an associated test for identifying the presence of pseudo β-diversity in pairwise community contrasts.

Availability and implementation

A Perl script for calculating the DwOdum score from a taxon abundance table and performing pairwise contrasts with OTUshuff can be obtained at http://www.ars.usda.gov/services/software/software.htm?modecode=30-12-10-00.

Contact

daniel.manter@ars.usda.gov

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-30 +24957465,Reading between the lines; understanding drug response in the post genomic era.,"Following the fanfare of initial, often dramatic, success with small molecule inhibitors in the treatment of defined genomic subgroups, it can be argued that the extension of targeted therapeutics to the majority of patients with solid cancers has stalled. Despite encouraging FDA approval rates, the attrition rates of these compounds remains high in early stage clinical studies, with single agent studies repeatedly showing poor efficacy In striking contrast, our understanding of the complexity of solid neoplasms has increased in huge increments, following the publication of large-scale genomic and transcriptomic datasets from large collaborations such as the International Cancer Genome Consortium (ICGC http://www.icgc.org/) and The Cancer Genome Atlas (TCGA http://cancergenome.nih.gov/). However, there remains a clear disconnect between these rich datasets describing the genomic complexity of cancer, including both intra- and inter-tumour heterogeneity, and what a treating oncologist can consider to be a clinically ""actionable"" mutation profile. Our understanding of these data is in its infancy and we still find difficulties ascribing characteristics to tumours that consistently predict therapeutic response for the majority of small molecule inhibitors. This article will seek to explore the recent studies of the patterns and impact of mutations in drug resistance, and demonstrate how we may use this data to reshape our thinking about biological pathways, critical dependencies and their therapeutic interruption.",2014-06-10 +25903272,Thyroid function: a new road to understanding age-related macular degeneration?,"Age-related macular degeneration (AMD) continues to be amongst the leading causes of blindness and visual impairment worldwide. AMD remains a degenerative disorder of unknown etiology with rising prevalence. It induces retinal changes and damages those parts of the retina which are essential for central vision. The risk of developing this condition is associated with increasing age. Early stages usually progress without warning signs over years. The major identified risk factors for AMD development are age, ethnicity, family history, and current smoking. Associations of other modifiable risk factors with AMD have been widely published but these studies have reported conflicting results and showed a lack of consistency. According to recent data published in BMC Medicine from the population-based Rotterdam study, thyroid hormones may contribute to a better characterization of AMD in clinical practice. In that study serum free thyroxine levels were positively associated with development of AMD. More studies are needed to validate these findings and to understand better the role of thyroid hormones in the pathogenesis of AMD disease. Please see related article: http://dx.doi.org/10.1186/s12916-015-0329-0.",2015-04-23 +21904428,A web accessible resource for investigating cassava phenomics and genomics information: BIOGEN BASE.,"

Unlabelled

The goal of our research is to establish a unique portal to bring out the potential outcome of the research in the Casssava crop. The Biogen base for cassava clearly brings out the variations of different traits of the germplasms, maintained at the Tapioca and Castor Research Station, Tamil Nadu Agricultural University. Phenotypic and genotypic variations of the accessions are clearly depicted, for the users to browse and interpret the variations using the microsatellite markers. Database (BIOGEN BASE - CASSAVA) is designed using PHP and MySQL and is equipped with extensive search options. It is more user-friendly and made publicly available, to improve the research and development of cassava by making a wealth of genetics and genomics data available through open, common, and worldwide forum for all individuals interested in the field.

Availability

The database is available for free at http://www.tnaugenomics.com/biogenbase/casava.php.",2011-08-02 +21699217,Predicting drug-induced hepatotoxicity using QSAR and toxicogenomics approaches.,"Quantitative structure-activity relationship (QSAR) modeling and toxicogenomics are typically used independently as predictive tools in toxicology. In this study, we evaluated the power of several statistical models for predicting drug hepatotoxicity in rats using different descriptors of drug molecules, namely, their chemical descriptors and toxicogenomics profiles. The records were taken from the Toxicogenomics Project rat liver microarray database containing information on 127 drugs ( http://toxico.nibio.go.jp/datalist.html ). The model end point was hepatotoxicity in the rat following 28 days of continuous exposure, established by liver histopathology and serum chemistry. First, we developed multiple conventional QSAR classification models using a comprehensive set of chemical descriptors and several classification methods (k nearest neighbor, support vector machines, random forests, and distance weighted discrimination). With chemical descriptors alone, external predictivity (correct classification rate, CCR) from 5-fold external cross-validation was 61%. Next, the same classification methods were employed to build models using only toxicogenomics data (24 h after a single exposure) treated as biological descriptors. The optimized models used only 85 selected toxicogenomics descriptors and had CCR as high as 76%. Finally, hybrid models combining both chemical descriptors and transcripts were developed; their CCRs were between 68 and 77%. Although the accuracy of hybrid models did not exceed that of the models based on toxicogenomics data alone, the use of both chemical and biological descriptors enriched the interpretation of the models. In addition to finding 85 transcripts that were predictive and highly relevant to the mechanisms of drug-induced liver injury, chemical structural alerts for hepatotoxicity were identified. These results suggest that concurrent exploration of the chemical features and acute treatment-induced changes in transcript levels will both enrich the mechanistic understanding of subchronic liver injury and afford models capable of accurate prediction of hepatotoxicity from chemical structure and short-term assay results.",2011-07-21 +26254488,BLSSpeller: exhaustive comparative discovery of conserved cis-regulatory elements.,"

Motivation

The accurate discovery and annotation of regulatory elements remains a challenging problem. The growing number of sequenced genomes creates new opportunities for comparative approaches to motif discovery. Putative binding sites are then considered to be functional if they are conserved in orthologous promoter sequences of multiple related species. Existing methods for comparative motif discovery usually rely on pregenerated multiple sequence alignments, which are difficult to obtain for more diverged species such as plants. As a consequence, misaligned regulatory elements often remain undetected.

Results

We present a novel algorithm that supports both alignment-free and alignment-based motif discovery in the promoter sequences of related species. Putative motifs are exhaustively enumerated as words over the IUPAC alphabet and screened for conservation using the branch length score. Additionally, a confidence score is established in a genome-wide fashion. In order to take advantage of a cloud computing infrastructure, the MapReduce programming model is adopted. The method is applied to four monocotyledon plant species and it is shown that high-scoring motifs are significantly enriched for open chromatin regions in Oryza sativa and for transcription factor binding sites inferred through protein-binding microarrays in O.sativa and Zea mays. Furthermore, the method is shown to recover experimentally profiled ga2ox1-like KN1 binding sites in Z.mays.

Availability and implementation

BLSSpeller was written in Java. Source code and manual are available at http://bioinformatics.intec.ugent.be/blsspeller

Contact

Klaas.Vandepoele@psb.vib-ugent.be or jan.fostier@intec.ugent.be.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-08-08 +21584190,A database of six eukaryotic hypothetical genes and proteins.,"

Unlabelled

Assigning functions to proteins of unknown function is of considerable interest to the proteomic researchers as the genes encoding them are conserved over various species. Here, we describe HypoDB, a database of hypothetical genes and proteins in six eukaryotes. The database was collected and organized based on the number of entries in each chromosome with few annotations. Hypothetical protein database contains information related to gene and protein sequences, chromosome number and location, secondary and tertiary structure related data.

Availability

The database is available for free at http://www.trimslabs.com/database/hypodb/index.html.",2011-04-22 +24564791,QTREDS: a Ruby on Rails-based platform for omics laboratories.,"

Background

In recent years, the experimental aspects of the laboratory activities have been growing in complexity in terms of amount and diversity of data produced, equipment used, of computer-based workflows needed to process and analyze the raw data generated. To enhance the level of quality control over the laboratory activities and efficiently handle the large amounts of data produced, a Laboratory Management Information System (LIMS) is highly-recommended. A LIMS is a complex software platform that helps researchers to have a complete knowledge of the laboratory activities at each step encouraging them to adopt good laboratory practices.

Results

We have designed and implemented Quality and TRacEability Data System--QTREDS, a software platform born to address the specific needs of the CRS4 Sequencing and Genotyping Platform (CSGP). The system written in the Ruby programming language and developed using the Rails framework is based on four main functional blocks: a sample handler, a workflow generator, an inventory management system and a user management system. The wizard-based sample handler allows to manage one or multiple samples at a time, tracking the path of each sample and providing a full chain of custody. The workflow generator encapsulates a user-friendly JavaScript-based visual tool that allows users to design customized workflows even for those without a technical background. With the inventory management system, reagents, laboratory glassware and consumables can be easily added through their barcodes and minimum stock levels can be controlled to avoid shortages of essential laboratory supplies. QTREDS provides a system for privileges management and authorizations to create different user roles, each with a well-defined access profile.

Conclusions

Tracking and monitoring all the phases of the laboratory activities can help to identify and troubleshoot problems more quickly, reducing the risk of process failures and their related costs. QTREDS was designed to address the specific needs of the CSGP laboratory, where it has been successfully used for over a year, but thanks to its flexibility it can be easily adapted to other ""omics"" laboratories. The software is freely available for academic users from http://qtreds.crs4.it.",2014-01-10 +21738316,CytReD: A database collecting human cytokinome information.,"

Unlabelled

The cytokines/related receptors system represents a complex regulatory network that is involved in those chronic inflammatory processes which lead to many diseases as cancers. We developed a Cytokine Receptor Database (CytReD) to collect information on cytokine receptors related to their biological activity, gene data, protein structures and diseases in which these and their ligands are implicated. This large set of information may be used by researchers as well as by physicians or clinicians to identify which cytokines, reported in the literature, are important in a given disease and, therefore, useful for purposes of diagnosis or prognostic.

Availability

The database is available for free at http://www.cro-m.eu/CytReD/",2011-05-26 +25904632,RSAT 2015: Regulatory Sequence Analysis Tools.,"RSAT (Regulatory Sequence Analysis Tools) is a modular software suite for the analysis of cis-regulatory elements in genome sequences. Its main applications are (i) motif discovery, appropriate to genome-wide data sets like ChIP-seq, (ii) transcription factor binding motif analysis (quality assessment, comparisons and clustering), (iii) comparative genomics and (iv) analysis of regulatory variations. Nine new programs have been added to the 43 described in the 2011 NAR Web Software Issue, including a tool to extract sequences from a list of coordinates (fetch-sequences from UCSC), novel programs dedicated to the analysis of regulatory variants from GWAS or population genomics (retrieve-variation-seq and variation-scan), a program to cluster motifs and visualize the similarities as trees (matrix-clustering). To deal with the drastic increase of sequenced genomes, RSAT public sites have been reorganized into taxon-specific servers. The suite is well-documented with tutorials and published protocols. The software suite is available through Web sites, SOAP/WSDL Web services, virtual machines and stand-alone programs at http://www.rsat.eu/.",2015-04-22 +26931313,A cross-sectional internet-based patient survey of the management strategies for gout.,"

Background

Almost half of the patients with gout are not prescribed urate-lowering therapy (ULT) by their health care provider and >50 % use complementary and alternative therapies. Diet modification is popular among gout patients due to known associations of certain foods with gout flares. The interplay of the use of dietary supplements, diet modification, and ULT adherence in gout patients is not known. Despite the recent interest in diet and supplements, there are limited data on their use. Our objective was to assess ULT use and adherence and patient preference for non-pharmacological interventions by patients with gout, using a cross-sectional survey.

Methods

People who self-reported physician-diagnosed gout during their visit to a gout website ( http://gouteducation.org ) were invited to participate in a brief anonymous cross-sectional Internet survey between 08/11/2014 to 04/14/2015 about the management of their gout. The survey queried ULT prescription, ULT adherence, the use of non-pharmacological interventions (cherry extract, diet modification) and the likelihood of making a lifelong diet modification for gout management.

Results

A total of 499 respondents with a mean age 56.3 years were included; 74% were males and 74% were White. Of these, 57% (285/499) participants were prescribed a ULT for gout, of whom 88% (251/285) were currently taking ULT. Of those using ULT, 78% (97/251) reported ULT adherence >80%. Gender, race, and age were not significantly associated with the likelihood of receiving a ULT prescription or ULT adherence >80%. Fifty-six percent of patients with gout preferred ULT as a lifelong treatment for gout, 24% preferred cherry extract and 16% preferred diet modification (4% preferred none). Men had significantly lower odds of preferring ULT as the lifelong treatment choice for gout vs. other choices (p = 0.03). We found that 38.3% participants were highly motivated to make a lifelong dietary modification to improve their gout (score of 9-10 on a 0-10 likelihood scale). Older age was significantly associated with high level of willingness to modify diet (p = 0.02).

Conclusion

We found that only 57% of gout patients reported being prescribed ULT. 40% of gout patients preferred non- pharmacological interventions such as cherry extract and diet modification for gout management. The latter finding requires further investigation.",2016-03-01 +21881655,Computational characterization of zeolite porous networks: an automated approach.,"An automated method has been developed to fully characterize the three-dimensional structure of zeolite porous networks. The proposed optimization-based approach starts with the crystallographic coordinates of a structure and identifies all portals, channels, and cages in a unit cell, as well as their connectivity. We apply our algorithms to known zeolites, hypothetical zeolites, and zeolite-like structures and use the characterizations to calculate important quantities such as pore size distribution, accessible volume, surface area, and largest cavity and pore limiting diameters. We aggregate this data over many framework types to gain insights about zeolite selectivity. Finally, we develop a continuous-time Markov chain model to estimate the probability of occupancy of adsorption sites throughout the porous network. ZEOMICS, an online database of structure characterizations and web tool for the automated approach is freely available to the scientific community (http://helios.princeton.edu/zeomics/).",2011-08-31 +24152242,Compact representation of k-mer de Bruijn graphs for genome read assembly.,"

Background

Processing of reads from high throughput sequencing is often done in terms of edges in the de Bruijn graph representing all k-mers from the reads. The memory requirements for storing all k-mers in a lookup table can be demanding, even after removal of read errors, but can be alleviated by using a memory efficient data structure.

Results

The FM-index, which is based on the Burrows-Wheeler transform, provides an efficient data structure providing a searchable index of all substrings from a set of strings, and is used to compactly represent full genomes for use in mapping reads to a genome: the memory required to store this is in the same order of magnitude as the strings themselves. However, reads from high throughput sequences mostly have high coverage and so contain the same substrings multiple times from different reads. I here present a modification of the FM-index, which I call the kFM-index, for indexing the set of k-mers from the reads. For DNA sequences, this requires 5 bit of information for each vertex of the corresponding de Bruijn subgraph, i.e. for each different k-1-mer, plus some additional overhead, typically 0.5 to 1 bit per vertex, for storing the equivalent of the FM-index for walking the underlying de Bruijn graph and reproducing the actual k-mers efficiently.

Conclusions

The kFM-index could replace more memory demanding data structures for storing the de Bruijn k-mer graph representation of sequence reads. A Java implementation with additional technical documentation is provided which demonstrates the applicability of the data structure (http://folk.uio.no/einarro/Projects/KFM-index/).",2013-10-23 +22832957,Association between SNPs and gene expression in multiple regions of the human brain.,"Identifying the genetic cis associations between DNA variants (single-nucleotide polymorphisms (SNPs)) and gene expression in brain tissue may be a promising approach to find functionally relevant pathways that contribute to the etiology of psychiatric disorders. In this study, we examined the association between genetic variations and gene expression in prefrontal cortex, hippocampus, temporal cortex, thalamus and cerebellum in subjects with psychiatric disorders and in normal controls. We identified cis associations between 648 transcripts and 6725 SNPs in the various brain regions. Several SNPs showed brain regional-specific associations. The expression level of only one gene, PDE4DIP, was associated with a SNP, rs12124527, in all the brain regions tested here. From our data, we generated a list of brain cis expression quantitative trait loci (eQTL) genes that we compared with a list of schizophrenia candidate genes downloaded from the Schizophrenia Forum (SZgene) database (http://www.szgene.org/). Of the SZgene candidate genes, we found that the expression levels of four genes, HTR2A, PLXNA2, SRR and TCF4, were significantly associated with cis SNPs in at least one brain region tested. One gene, SRR, was also involved in a coexpression module that we found to be associated with disease status. In addition, a substantial number of cis eQTL genes were also involved in the module, suggesting eQTL analysis of brain tissue may identify more reliable susceptibility genes for schizophrenia than case-control genetic association analyses. In an attempt to facilitate the identification of genetic variations that may underlie the etiology of major psychiatric disorders, we have integrated the brain eQTL results into a public and online database, Stanley Neuropathology Consortium Integrative Database (SNCID; http://sncid.stanleyresearch.org).",2012-05-08 +25157598,MIDAS: a database-searching algorithm for metabolite identification in metabolomics.,"A database searching approach can be used for metabolite identification in metabolomics by matching measured tandem mass spectra (MS/MS) against the predicted fragments of metabolites in a database. Here, we present the open-source MIDAS algorithm (Metabolite Identification via Database Searching). To evaluate a metabolite-spectrum match (MSM), MIDAS first enumerates possible fragments from a metabolite by systematic bond dissociation, then calculates the plausibility of the fragments based on their fragmentation pathways, and finally scores the MSM to assess how well the experimental MS/MS spectrum from collision-induced dissociation (CID) is explained by the metabolite's predicted CID MS/MS spectrum. MIDAS was designed to search high-resolution tandem mass spectra acquired on time-of-flight or Orbitrap mass spectrometer against a metabolite database in an automated and high-throughput manner. The accuracy of metabolite identification by MIDAS was benchmarked using four sets of standard tandem mass spectra from MassBank. On average, for 77% of original spectra and 84% of composite spectra, MIDAS correctly ranked the true compounds as the first MSMs out of all MetaCyc metabolites as decoys. MIDAS correctly identified 46% more original spectra and 59% more composite spectra at the first MSMs than an existing database-searching algorithm, MetFrag. MIDAS was showcased by searching a published real-world measurement of a metabolome from Synechococcus sp. PCC 7002 against the MetaCyc metabolite database. MIDAS identified many metabolites missed in the previous study. MIDAS identifications should be considered only as candidate metabolites, which need to be confirmed using standard compounds. To facilitate manual validation, MIDAS provides annotated spectra for MSMs and labels observed mass spectral peaks with predicted fragments. The database searching and manual validation can be performed online at http://midas.omicsbio.org.",2014-09-11 +25900916,MultiP-SChlo: multi-label protein subchloroplast localization prediction with Chou's pseudo amino acid composition and a novel multi-label classifier.,"

Motivation

Identifying protein subchloroplast localization in chloroplast organelle is very helpful for understanding the function of chloroplast proteins. There have existed a few computational prediction methods for protein subchloroplast localization. However, these existing works have ignored proteins with multiple subchloroplast locations when constructing prediction models, so that they can predict only one of all subchloroplast locations of this kind of multilabel proteins.

Results

To address this problem, through utilizing label-specific features and label correlations simultaneously, a novel multilabel classifier was developed for predicting protein subchloroplast location(s) with both single and multiple location sites. As an initial study, the overall accuracy of our proposed algorithm reaches 55.52%, which is quite high to be able to become a promising tool for further studies.

Availability and implementation

An online web server for our proposed algorithm named MultiP-SChlo was developed, which are freely accessible at http://biomed.zzuli.edu.cn/bioinfo/multip-schlo/.

Contact

pandaxiaoxi@gmail.com or gzli@tongji.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-20 +22901092,DTome: a web-based tool for drug-target interactome construction.,"

Background

Understanding drug bioactivities is crucial for early-stage drug discovery, toxicology studies and clinical trials. Network pharmacology is a promising approach to better understand the molecular mechanisms of drug bioactivities. With a dramatic increase of rich data sources that document drugs' structural, chemical, and biological activities, it is necessary to develop an automated tool to construct a drug-target network for candidate drugs, thus facilitating the drug discovery process.

Results

We designed a computational workflow to construct drug-target networks from different knowledge bases including DrugBank, PharmGKB, and the PINA database. To automatically implement the workflow, we created a web-based tool called DTome (Drug-Target interactome tool), which is comprised of a database schema and a user-friendly web interface. The DTome tool utilizes web-based queries to search candidate drugs and then construct a DTome network by extracting and integrating four types of interactions. The four types are adverse drug interactions, drug-target interactions, drug-gene associations, and target-/gene-protein interactions. Additionally, we provided a detailed network analysis and visualization process to illustrate how to analyze and interpret the DTome network. The DTome tool is publicly available at http://bioinfo.mc.vanderbilt.edu/DTome.

Conclusions

As demonstrated with the antipsychotic drug clozapine, the DTome tool was effective and promising for the investigation of relationships among drugs, adverse interaction drugs, drug primary targets, drug-associated genes, and proteins directly interacting with targets or genes. The resultant DTome network provides researchers with direct insights into their interest drug(s), such as the molecular mechanisms of drug actions. We believe such a tool can facilitate identification of drug targets and drug adverse interactions.",2012-06-11 +24700318,"SVAMP: sequence variation analysis, maps and phylogeny.","

Summary

SVAMP is a stand-alone desktop application to visualize genomic variants (in variant call format) in the context of geographical metadata. Users of SVAMP are able to generate phylogenetic trees and perform principal coordinate analysis in real time from variant call format (VCF) and associated metadata files. Allele frequency map, geographical map of isolates, Tajima's D metric, single nucleotide polymorphism density, GC and variation density are also available for visualization in real time. We demonstrate the utility of SVAMP in tracking a methicillin-resistant Staphylococcus aureus outbreak from published next-generation sequencing data across 15 countries. We also demonstrate the scalability and accuracy of our software on 245 Plasmodium falciparum malaria isolates from three continents.

Availability and implementation

The Qt/C++ software code, binaries, user manual and example datasets are available at http://cbrc.kaust.edu.sa/svamp

Contact

arnab.pain@kaust.edu.sa or arnab.pain@cantab.net

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-03 +23631706,"Quality control, analysis and secure sharing of Luminex® immunoassay data using the open source LabKey Server platform.","

Background

Immunoassays that employ multiplexed bead arrays produce high information content per sample. Such assays are now frequently used to evaluate humoral responses in clinical trials. Integrated software is needed for the analysis, quality control, and secure sharing of the high volume of data produced by such multiplexed assays. Software that facilitates data exchange and provides flexibility to perform customized analyses (including multiple curve fits and visualizations of assay performance over time) could increase scientists' capacity to use these immunoassays to evaluate human clinical trials.

Results

The HIV Vaccine Trials Network and the Statistical Center for HIV/AIDS Research and Prevention collaborated with LabKey Software to enhance the open source LabKey Server platform to facilitate workflows for multiplexed bead assays. This system now supports the management, analysis, quality control, and secure sharing of data from multiplexed immunoassays that leverage Luminex xMAP® technology. These assays may be custom or kit-based. Newly added features enable labs to: (i) import run data from spreadsheets output by Bio-Plex Manager™ software; (ii) customize data processing, curve fits, and algorithms through scripts written in common languages, such as R; (iii) select script-defined calculation options through a graphical user interface; (iv) collect custom metadata for each titration, analyte, run and batch of runs; (v) calculate dose-response curves for titrations; (vi) interpolate unknown concentrations from curves for titrated standards; (vii) flag run data for exclusion from analysis; (viii) track quality control metrics across runs using Levey-Jennings plots; and (ix) automatically flag outliers based on expected values. Existing system features allow researchers to analyze, integrate, visualize, export and securely share their data, as well as to construct custom user interfaces and workflows.

Conclusions

Unlike other tools tailored for Luminex immunoassays, LabKey Server allows labs to customize their Luminex analyses using scripting while still presenting users with a single, graphical interface for processing and analyzing data. The LabKey Server system also stands out among Luminex tools for enabling smooth, secure transfer of data, quality control information, and analyses between collaborators. LabKey Server and its Luminex features are freely available as open source software at http://www.labkey.com under the Apache 2.0 license.",2013-04-30 +23868748,Multiple imputation of missing covariates in NONMEM and evaluation of the method's sensitivity to η-shrinkage.,"Multiple imputation (MI) is an approach widely used in statistical analysis of incomplete data. However, its application to missing data problems in nonlinear mixed-effects modelling is limited. The objective was to implement a four-step MI method for handling missing covariate data in NONMEM and to evaluate the method's sensitivity to η-shrinkage. Four steps were needed; (1) estimation of empirical Bayes estimates (EBEs) using a base model without the partly missing covariate, (2) a regression model for the covariate values given the EBEs from subjects with covariate information, (3) imputation of covariates using the regression model and (4) estimation of the population model. Steps (3) and (4) were repeated several times. The procedure was automated in PsN and is now available as the mimp functionality ( http://psn.sourceforge.net/ ). The method's sensitivity to shrinkage in EBEs was evaluated in a simulation study where the covariate was missing according to a missing at random type of missing data mechanism. The η-shrinkage was increased in steps from 4.5 to 54%. Two hundred datasets were simulated and analysed for each scenario. When shrinkage was low the MI method gave unbiased and precise estimates of all population parameters. With increased shrinkage the estimates became less precise but remained unbiased.",2013-07-19 +24343026,Identifying RNA-binding residues based on evolutionary conserved structural and energetic features.,"Increasing numbers of protein structures are solved each year, but many of these structures belong to proteins whose sequences are homologous to sequences in the Protein Data Bank. Nevertheless, the structures of homologous proteins belonging to the same family contain useful information because functionally important residues are expected to preserve physico-chemical, structural and energetic features. This information forms the basis of our method, which detects RNA-binding residues of a given RNA-binding protein as those residues that preserve physico-chemical, structural and energetic features in its homologs. Tests on 81 RNA-bound and 35 RNA-free protein structures showed that our method yields a higher fraction of true RNA-binding residues (higher precision) than two structure-based and two sequence-based machine-learning methods. Because the method requires no training data set and has no parameters, its precision does not degrade when applied to 'novel' protein sequences unlike methods that are parameterized for a given training data set. It was used to predict the 'unknown' RNA-binding residues in the C-terminal RNA-binding domain of human CPEB3. The two predicted residues, F430 and F474, were experimentally verified to bind RNA, in particular F430, whose mutation to alanine or asparagine nearly abolished RNA binding. The method has been implemented in a webserver called DR_bind1, which is freely available with no login requirement at http://drbind.limlab.ibms.sinica.edu.tw.",2013-12-16 +21520333,LOVD v.2.0: the next generation in gene variant databases.,"Locus-Specific DataBases (LSDBs) store information on gene sequence variation associated with human phenotypes and are frequently used as a reference by researchers and clinicians. We developed the Leiden Open-source Variation Database (LOVD) as a platform-independent Web-based LSDB-in-a-Box package. LOVD was designed to be easy to set up and maintain and follows the Human Genome Variation Society (HGVS) recommendations. Here we describe LOVD v.2.0, which adds enhanced flexibility and functionality and has the capacity to store sequence variants in multiple genes per patient. To reduce redundancy, patient and sequence variant data are stored in separate tables. Tables are linked to generate connections between sequence variant data for each gene and every patient. The dynamic structure allows database managers to add custom columns. The database structure supports fast queries and allows storage of sequence variants from high-throughput sequence analysis, as demonstrated by the X-chromosomal Mental Retardation LOVD installation. LOVD contains measures to ensure database security from unauthorized access. Currently, the LOVD Website (http://www.LOVD.nl/) lists 71 public LOVD installations hosting 3,294 gene variant databases with 199,000 variants in 84,000 patients. To promote LSDB standardization and thereby database interoperability, we offer free server space and help to establish an LSDB on our Leiden server.",2011-02-22 +24363375,GPU-Meta-Storms: computing the structure similarities among massive amount of microbial community samples using GPU.,"

Motivation

The number of microbial community samples is increasing with exponential speed. Data-mining among microbial community samples could facilitate the discovery of valuable biological information that is still hidden in the massive data. However, current methods for the comparison among microbial communities are limited by their ability to process large amount of samples each with complex community structure.

Summary

We have developed an optimized GPU-based software, GPU-Meta-Storms, to efficiently measure the quantitative phylogenetic similarity among massive amount of microbial community samples. Our results have shown that GPU-Meta-Storms would be able to compute the pair-wise similarity scores for 10 240 samples within 20 min, which gained a speed-up of >17 000 times compared with single-core CPU, and >2600 times compared with 16-core CPU. Therefore, the high-performance of GPU-Meta-Storms could facilitate in-depth data mining among massive microbial community samples, and make the real-time analysis and monitoring of temporal or conditional changes for microbial communities possible.

Availability and implementation

GPU-Meta-Storms is implemented by CUDA (Compute Unified Device Architecture) and C++. Source code is available at http://www.computationalbioenergy.org/meta-storms.html.",2013-12-19 +23868073,Large-scale gene function analysis with the PANTHER classification system.,"The PANTHER (protein annotation through evolutionary relationship) classification system (http://www.pantherdb.org/) is a comprehensive system that combines gene function, ontology, pathways and statistical analysis tools that enable biologists to analyze large-scale, genome-wide data from sequencing, proteomics or gene expression experiments. The system is built with 82 complete genomes organized into gene families and subfamilies, and their evolutionary relationships are captured in phylogenetic trees, multiple sequence alignments and statistical models (hidden Markov models or HMMs). Genes are classified according to their function in several different ways: families and subfamilies are annotated with ontology terms (Gene Ontology (GO) and PANTHER protein class), and sequences are assigned to PANTHER pathways. The PANTHER website includes a suite of tools that enable users to browse and query gene functions, and to analyze large-scale experimental data with a number of statistical tests. It is widely used by bench scientists, bioinformaticians, computer scientists and systems biologists. In the 2013 release of PANTHER (v.8.0), in addition to an update of the data content, we redesigned the website interface to improve both user experience and the system's analytical capability. This protocol provides a detailed description of how to analyze genome-wide experimental data with the PANTHER classification system.",2013-07-18 +22102572,STAP Refinement of the NMR database: a database of 2405 refined solution NMR structures.,"According to several studies, some nuclear magnetic resonance (NMR) structures are of lower quality, less reliable and less suitable for structural analysis than high-resolution X-ray crystallographic structures. We present a public database of 2405 refined NMR solution structures [statistical torsion angle potentials (STAP) refinement of the NMR database, http://psb.kobic.re.kr/STAP/refinement] from the Protein Data Bank (PDB). A simulated annealing protocol was employed to obtain refined structures with target potentials, including the newly developed STAP. The refined database was extensively analysed using various quality indicators from several assessment programs to determine the nuclear Overhauser effect (NOE) completeness, Ramachandran appearance, χ(1)-χ(2) rotamer normality, various parameters for protein stability and other indicators. Most quality indicators are improved in our protocol mainly due to the inclusion of the newly developed knowledge-based potentials. This database can be used by the NMR structure community for further development of research and validation tools, structure-related studies and modelling in many fields of research.",2011-11-18 +24009693,DistMap: a toolkit for distributed short read mapping on a Hadoop cluster.,"With the rapid and steady increase of next generation sequencing data output, the mapping of short reads has become a major data analysis bottleneck. On a single computer, it can take several days to map the vast quantity of reads produced from a single Illumina HiSeq lane. In an attempt to ameliorate this bottleneck we present a new tool, DistMap - a modular, scalable and integrated workflow to map reads in the Hadoop distributed computing framework. DistMap is easy to use, currently supports nine different short read mapping tools and can be run on all Unix-based operating systems. It accepts reads in FASTQ format as input and provides mapped reads in a SAM/BAM format. DistMap supports both paired-end and single-end reads thereby allowing the mapping of read data produced by different sequencing platforms. DistMap is available from http://code.google.com/p/distmap/",2013-08-23 +25558467,Cold Climate Is a Risk Factor for Thyroid Cancer.,"

Background

The incidence rates of differentiated thyroid cancers of all sizes increased between 1988 and 2005 in both men and women. Exposure to ionizing radiation is the best-established environmental risk factor for thyroid cancer. Nonionizing radiation from cell phones has also been implicated. A positive correlation between all-cancer incidence rates and latitude and an inverse correlation between all-cancer incidence rates and temperature have been reported. In the present study, we examined the relationship between thyroid cancer incidence and average temperature in 50 U.S. states.

Methods

The age-adjusted incidence of thyroid cancer is from U.S. Cancer Statistics Working Group, United States Cancer Statistics: 1999-2010, Incidence and Mortality Web-based Report (Atlanta: Department of Health and Human Services, Centers for Disease Control and Prevention and National Cancer Institute; 2013, available at: www.cdc.gov/uscs). Average temperature by state is from the National Climatic Data Center, National Oceanic and Atmospheric Administration (http://www.ncdc.noaa.gov). Information on high-impact exposure to nuclear radiation by state is from the National Radiation Exposure Screening and Education Program, U.S. Health Resources and Services Administration (http://www.hrsa.gov/gethealthcare/conditions/radiationexposure). Cell-phone subscriber data for 2007 is from the Governing State and Local Sourcebook (http://sourcebook.governing.com). Mean elevation and latitude of U.S. states is from ""Elevations and Distances in the United States,"" Reston, VA: U.S. Geological Survey, April 29, 2005 (http://pubs.er.usgs.gov).

Results

There was a significant negative correlation between average temperature by state and the age-adjusted incidence of all thyroid cancers (r2 = -0.212, P = 0.001). Because of the possible effects of ionizing radiation exposure from nuclear testing and nonionizing radiation exposure from cell phones, multiple linear regression analysis was performed. The analysis was done only for all thyroid cancers and for thyroid cancers in whites. The data from blacks and Hispanics were too fragmentary to analyze. In all thyroid cancers and thyroid cancers in whites, there was a significant negative correlation between average temperature and incidence that was unrelated to nuclear testing, cell-phone use, altitude, and latitude and was independent of the significant correlation of cell-phone subscriptions per population with thyroid cancer in whites.

Conclusions

Living in a cold-climate state, such as Alaska, doubles the risk of thyroid cancer as compared with a warm state such as Texas. Because of climate change, a significantly raised risk of heat-related and cold-related mortality is expected in the years to come. The elderly will be most at risk. No doubt, incidence patterns of thyroid cancer and other cancers may be affected.",2014-10-01 +25638814,Factor graph analysis of live cell-imaging data reveals mechanisms of cell fate decisions.,"

Motivation

Cell fate decisions have a strong stochastic component. The identification of the underlying mechanisms therefore requires a rigorous statistical analysis of large ensembles of single cells that were tracked and phenotyped over time.

Results

We introduce a probabilistic framework for testing elementary hypotheses on dynamic cell behavior using time-lapse cell-imaging data. Factor graphs, probabilistic graphical models, are used to properly account for cell lineage and cell phenotype information. Our model is applied to time-lapse movies of murine granulocyte-macrophage progenitor (GMP) cells. It decides between competing hypotheses on the mechanisms of their differentiation. Our results theoretically substantiate previous experimental observations that lineage instruction, not selection is the cause for the differentiation of GMP cells into mature monocytes or neutrophil granulocytes.

Availability and implementation

The Matlab source code is available at http://treschgroup.de/Genealogies.html.",2015-01-31 +25086502,Phen-Gen: combining phenotype and genotype to analyze rare disorders.,"We introduce Phen-Gen, a method that combines patients' disease symptoms and sequencing data with prior domain knowledge to identify the causative genes for rare disorders. Simulations revealed that the causal variant was ranked first in 88% of cases when it was a coding variant-a 52% advantage over a genotype-only approach-and Phen-Gen outperformed other existing prediction methods by 13-58%. If disease etiology was unknown, the causal variant was assigned the top rank in 71% of simulations. Phen-Gen is available at http://phen-gen.org/.",2014-08-03 +26568622,Highly accurate sequence-based prediction of half-sphere exposures of amino acid residues in proteins.,"

Motivation

Solvent exposure of amino acid residues of proteins plays an important role in understanding and predicting protein structure, function and interactions. Solvent exposure can be characterized by several measures including solvent accessible surface area (ASA), residue depth (RD) and contact numbers (CN). More recently, an orientation-dependent contact number called half-sphere exposure (HSE) was introduced by separating the contacts within upper and down half spheres defined according to the Cα-Cβ (HSEβ) vector or neighboring Cα-Cα vectors (HSEα). HSEα calculated from protein structures was found to better describe the solvent exposure over ASA, CN and RD in many applications. Thus, a sequence-based prediction is desirable, as most proteins do not have experimentally determined structures. To our best knowledge, there is no method to predict HSEα and only one method to predict HSEβ.

Results

This study developed a novel method for predicting both HSEα and HSEβ (SPIDER-HSE) that achieved a consistent performance for 10-fold cross validation and two independent tests. The correlation coefficients between predicted and measured HSEβ (0.73 for upper sphere, 0.69 for down sphere and 0.76 for contact numbers) for the independent test set of 1199 proteins are significantly higher than existing methods. Moreover, predicted HSEα has a higher correlation coefficient (0.46) to the stability change by residue mutants than predicted HSEβ (0.37) and ASA (0.43). The results, together with its easy Cα-atom-based calculation, highlight the potential usefulness of predicted HSEα for protein structure prediction and refinement as well as function prediction.

Availability and implementation

The method is available at http://sparks-lab.org

Contact

yuedong.yang@griffith.edu.au or yaoqi.zhou@griffith.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-11-14 +22096236,ProOpDB: Prokaryotic Operon DataBase.,"The Prokaryotic Operon DataBase (ProOpDB, http://operons.ibt.unam.mx/OperonPredictor) constitutes one of the most precise and complete repositories of operon predictions now available. Using our novel and highly accurate operon identification algorithm, we have predicted the operon structures of more than 1200 prokaryotic genomes. ProOpDB offers diverse alternatives by which a set of operon predictions can be retrieved including: (i) organism name, (ii) metabolic pathways, as defined by the KEGG database, (iii) gene orthology, as defined by the COG database, (iv) conserved protein domains, as defined by the Pfam database, (v) reference gene and (vi) reference operon, among others. In order to limit the operon output to non-redundant organisms, ProOpDB offers an efficient method to select the most representative organisms based on a precompiled phylogenetic distances matrix. In addition, the ProOpDB operon predictions are used directly as the input data of our Gene Context Tool to visualize their genomic context and retrieve the sequence of their corresponding 5' regulatory regions, as well as the nucleotide or amino acid sequences of their genes.",2011-11-16 +25216490,Red blood cell cluster separation from digital images for use in sickle cell disease.,"The study of cell morphology is an important aspect of the diagnosis of some diseases, such as sickle cell disease, because red blood cell deformation is caused by these diseases. Due to the elongated shape of the erythrocyte, ellipse adjustment and concave point detection are applied widely to images of peripheral blood samples, including during the detection of cells that are partially occluded in the clusters generated by the sample preparation process. In the present study, we propose a method for the analysis of the shape of erythrocytes in peripheral blood smear samples of sickle cell disease, which uses ellipse adjustments and a new algorithm for detecting notable points. Furthermore, we apply a set of constraints that allow the elimination of significant image preprocessing steps proposed in previous studies. We used three types of images to validate our method: artificial images, which were automatically generated in a random manner using a computer code; real images from peripheral blood smear sample images that contained normal and elongated erythrocytes; and synthetic images generated from real isolated cells. Using the proposed method, the efficiency of detecting the two types of objects in the three image types exceeded 99.00%, 98.00%, and 99.35%, respectively. These efficiency levels were superior to the results obtained with previously proposed methods using the same database, which is available at http://erythrocytesidb.uib.es/. This method can be extended to clusters of several cells and it requires no user inputs.",2014-09-08 +25276334,The Porifera Ontology (PORO): enhancing sponge systematics with an anatomy ontology.,"

Background

Porifera (sponges) are ancient basal metazoans that lack organs. They provide insight into key evolutionary transitions, such as the emergence of multicellularity and the nervous system. In addition, their ability to synthesize unusual compounds offers potential biotechnical applications. However, much of the knowledge of these organisms has not previously been codified in a machine-readable way using modern web standards.

Results

The Porifera Ontology is intended as a standardized coding system for sponge anatomical features currently used in systematics. The ontology is available from http://purl.obolibrary.org/obo/poro.owl, or from the project homepage http://porifera-ontology.googlecode.com/. The version referred to in this manuscript is permanently available from http://purl.obolibrary.org/obo/poro/releases/2014-03-06/.

Conclusions

By standardizing character representations, we hope to facilitate more rapid description and identification of sponge taxa, to allow integration with other evolutionary database systems, and to perform character mapping across the major clades of sponges to better understand the evolution of morphological features. Future applications of the ontology will focus on creating (1) ontology-based species descriptions; (2) taxonomic keys that use the nested terms of the ontology to more quickly facilitate species identifications; and (3) methods to map anatomical characters onto molecular phylogenies of sponges. In addition to modern taxa, the ontology is being extended to include features of fossil taxa.",2014-09-08 +25196432,nDNA-Prot: identification of DNA-binding proteins based on unbalanced classification.,"

Background

DNA-binding proteins are vital for the study of cellular processes. In recent genome engineering studies, the identification of proteins with certain functions has become increasingly important and needs to be performed rapidly and efficiently. In previous years, several approaches have been developed to improve the identification of DNA-binding proteins. However, the currently available resources are insufficient to accurately identify these proteins. Because of this, the previous research has been limited by the relatively unbalanced accuracy rate and the low identification success of the current methods.

Results

In this paper, we explored the practicality of modelling DNA binding identification and simultaneously employed an ensemble classifier, and a new predictor (nDNA-Prot) was designed. The presented framework is comprised of two stages: a 188-dimension feature extraction method to obtain the protein structure and an ensemble classifier designated as imDC. Experiments using different datasets showed that our method is more successful than the traditional methods in identifying DNA-binding proteins. The identification was conducted using a feature that selected the minimum Redundancy and Maximum Relevance (mRMR). An accuracy rate of 95.80% and an Area Under the Curve (AUC) value of 0.986 were obtained in a cross validation. A test dataset was tested in our method and resulted in an 86% accuracy, versus a 76% using iDNA-Prot and a 68% accuracy using DNA-Prot.

Conclusions

Our method can help to accurately identify DNA-binding proteins, and the web server is accessible at http://datamining.xmu.edu.cn/~songli/nDNA. In addition, we also predicted possible DNA-binding protein sequences in all of the sequences from the UniProtKB/Swiss-Prot database.",2014-09-08 +25694078,Mutagenicity: QSAR - quasi-QSAR - nano-QSAR.,"Mutagenic potential of biphenyl-4-amines and multi-walled carbon nanotubes (MWCNTs) have been modeled by optimal descriptors. The optimal descriptors are calculated with the Monte Carlo method by means of the CORAL software (http://www.insilico.eu/coral). The optimal descriptor is a translator of eclectic data into prediction of various endpoints in general and into the prediction of the mutagenic potential (TA100) in particular. So-called, quasi-SMILES are suggested as representation of various circumstances which can influence the endpoint. The correlation weights of various circumstances are the basis of the approach. The statistical characteristics of models for the mutagenic potential (decimal logarithm of TA100) for the external invisible validation sets are the following (i) in the case of biphenyl-4- amines: n=7-11; r(2)=0.649±0.046; s=0.211±0.029; and (ii) in the case of MWCNTs: n=6, r(2)=0.804±0.107; s=0.048±0.01.",2015-01-01 +21546393,Molecular signatures database (MSigDB) 3.0.,"

Motivation

Well-annotated gene sets representing the universe of the biological processes are critical for meaningful and insightful interpretation of large-scale genomic data. The Molecular Signatures Database (MSigDB) is one of the most widely used repositories of such sets.

Results

We report the availability of a new version of the database, MSigDB 3.0, with over 6700 gene sets, a complete revision of the collection of canonical pathways and experimental signatures from publications, enhanced annotations and upgrades to the web site.

Availability and implementation

MSigDB is freely available for non-commercial use at http://www.broadinstitute.org/msigdb.",2011-05-05 +23749962,TASUKE: a web-based visualization program for large-scale resequencing data.,"

Summary

Because an enormous amount of sequence data is being collected, a method to effectively display sequence variation information is urgently needed. tasuke is a web application that visualizes large-scale resequencing data generated by next-generation sequencing technologies and is suitable for rapid data release to the public on the web. The variation and read depths of multiple genomes, as well as annotations, can be shown simultaneously at various scales. We demonstrate the use of TASUKE by applying it to 50 rice and 100 human genome resequencing datasets.

Availability and implementation

The tasuke program package and user manual are available from http://tasuke.dna.affrc.go.jp/.

Contact

taitoh@affrc.go.jp.",2013-06-07 +24022395,Offering fragile X syndrome carrier screening: a prospective mixed-methods observational study comparing carrier screening of pregnant and non-pregnant women in the general population.,"

Introduction

Fragile X syndrome (FXS) is the leading cause of inherited intellectual and developmental disability. Policy development relating to carrier screening programmes for FXS requires input from large studies examining not only test uptake but also psychosocial aspects. This study will compare carrier screening in pregnant and non-pregnant populations, examining informed decision-making, psychosocial issues and health economics.

Methods and analysis

Pregnant and non-pregnant women are being recruited from general practices and obstetric services. Women receive study information either in person or through clinic mail outs. Women are provided pretest counselling by a genetic counsellor and make a decision about testing in their own time. Data are being collected from two questionnaires: one completed at the time of making the decision about testing and the second 1 month later. Additional data are gathered through qualitative interviews conducted at several time points with a subset of participating women, including all women with a positive test result, and with staff from recruiting clinics. A minimum sample size of 500 women/group has been calculated to give us 88% power to detect a 10% difference in test uptake and 87% power to detect a 10% difference in informed choice between the pregnant and non-pregnant groups. Questionnaire data will be analysed using descriptive statistics and multivariate logistic regression models. Interview data will be thematically analysed. Willingness-to-pay and cost effectiveness analyses will also be performed. Recruitment started in July 2009 and data collection will be completed by December 2013.

Ethics and dissemination

Ethics approval has been granted by the Universities of Melbourne and Western Australia and by recruiting clinics, where required. Results will be reported in peer-reviewed publications, conference presentations and through a website http://www.fragilexscreening.net.au. The results of this study will make a significant contribution to discussions about the wider introduction of population carrier screening for FXS.",2013-09-10 +22339941,Congenital keratoconjunctivitis sicca and ichthyosiform dermatosis in Cavalier King Charles spaniel dogs. Part II: candidate gene study.,"PURPOSE: To identify causative mutation(s) for congenital keratoconjunctivitis sicca and ichthyosiform dermatosis (CKCSID) in Cavalier King Charles spaniel (CKCS) dogs using a candidate gene approach. METHODS: DNA samples from 21 cases/parents were collected. Canine candidate genes (CCGs) for similar inherited human diseases were chosen. Twenty-eight candidate genes were identified by searching the Pubmed OMIM database (http://www.ncbi.nlm.nih.gov/omim). Canine orthologues of human candidate genes were identified using the Ensembl orthologue prediction facility (http://www.ensembl.org/index.html). Two microsatellites flanking each candidate gene were selected, and primers to amplify each microsatellite were designed using the Whitehead Institute primer design website (http://frodo.wi.mit.edu/primer3/). The microsatellites associated with all 28 CCGs were genotyped on a panel of 21 DNA samples from CKCS dogs (13 affected and eight carriers). Genotyping data was analyzed to identify markers homozygous in affected dogs and heterozygous in carriers (homozygosity mapping). RESULTS: None of the microsatellites associated with 25 of the CCGs displayed an association with CKCSID in the 21 DNA samples tested. Three CCGs associated microsatellites were monomorphic across all samples tested. CONCLUSIONS: Twenty-five CCGs were excluded as cause of CKCSID. Three CCGs could not be excluded from involvement in the inheritance of CKCSID.",2012-02-16 +24886662,How informative is your kinetic model?: using resampling methods for model invalidation.,"

Background

Kinetic models can present mechanistic descriptions of molecular processes within a cell. They can be used to predict the dynamics of metabolite production, signal transduction or transcription of genes. Although there has been tremendous effort in constructing kinetic models for different biological systems, not much effort has been put into their validation. In this study, we introduce the concept of resampling methods for the analysis of kinetic models and present a statistical model invalidation approach.

Results

We based our invalidation approach on the evaluation of a kinetic model's predictive power through cross validation and forecast analysis. As a reference point for this evaluation, we used the predictive power of an unsupervised data analysis method which does not make use of any biochemical knowledge, namely Smooth Principal Components Analysis (SPCA) on the same test sets. Through a simulations study, we showed that too simple mechanistic descriptions can be invalidated by using our SPCA-based comparative approach until high amount of noise exists in the experimental data. We also applied our approach on an eicosanoid production model developed for human and concluded that the model could not be invalidated using the available data despite its simplicity in the formulation of the reaction kinetics. Furthermore, we analysed the high osmolarity glycerol (HOG) pathway in yeast to question the validity of an existing model as another realistic demonstration of our method.

Conclusions

With this study, we have successfully presented the potential of two resampling methods, cross validation and forecast analysis in the analysis of kinetic models' validity. Our approach is easy to grasp and to implement, applicable to any ordinary differential equation (ODE) type biological model and does not suffer from any computational difficulties which seems to be a common problem for approaches that have been proposed for similar purposes. Matlab files needed for invalidation using SPCA cross validation and our toy model in SBML format are provided at http://www.bdagroup.nl/content/Downloads/software/software.php.",2014-05-22 +25732605,METAXA2: improved identification and taxonomic classification of small and large subunit rRNA in metagenomic data.,"The ribosomal rRNA genes are widely used as genetic markers for taxonomic identification of microbes. Particularly the small subunit (SSU; 16S/18S) rRNA gene is frequently used for species- or genus-level identification, but also the large subunit (LSU; 23S/28S) rRNA gene is employed in taxonomic assignment. The METAXA software tool is a popular utility for extracting partial rRNA sequences from large sequencing data sets and assigning them to an archaeal, bacterial, nuclear eukaryote, mitochondrial or chloroplast origin. This study describes a comprehensive update to METAXA - METAXA2 - that extends the capabilities of the tool, introducing support for the LSU rRNA gene, a greatly improved classifier allowing classification down to genus or species level, as well as enhanced support for short-read (100 bp) and paired-end sequences, among other changes. The performance of METAXA2 was compared to other commonly used taxonomic classifiers, showing that METAXA2 often outperforms previous methods in terms of making correct predictions while maintaining a low misclassification rate. METAXA2 is freely available from http://microbiology.se/software/metaxa2/.",2015-03-23 +24564479,QChIPat: a quantitative method to identify distinct binding patterns for two biological ChIP-seq samples in different experimental conditions.,"

Background

Many computational programs have been developed to identify enriched regions for a single biological ChIP-seq sample. Given that many biological questions are often asked to compare the difference between two different conditions, it is important to develop new programs that address the comparison of two biological ChIP-seq samples. Despite several programs designed to address this question, these programs suffer from some drawbacks, such as inability to distinguish whether the identified differential enriched regions are indeed significantly enriched, lack of distinguishing binding patterns, and neglect of the normalization between samples.

Results

In this study, we developed a novel quantitative method for comparing two biological ChIP-seq samples, called QChIPat. Our method employs a new global normalization method: nonparametric empirical Bayes (NEB) correction normalization, utilizes pre-defined enriched regions identified from single-sample peak calling programs, uses statistical methods to define differential enriched regions, then defines binding (histone modification) pattern information for those differential enriched regions. Our program was tested on a benchmark data: histone modifications data used by ChIPDiffs. It was then applied on two study cases: one to identify differential histone modification sites for ChIP-seq of H3K27me3 and H3K9me2 data in AKT1-transfected MCF10A cells; the other to identify differential binding sites for ChIP-seq of TCF7L2 data in MCF7 and PANC1 cells.

Conclusions

Several advantages of our program include: 1) it considers a control (or input) experiment; 2) it incorporates a novel global normalization strategy: nonparametric empirical Bayes correction normalization; 3) it provides the binding pattern information among different enriched regions. QChIPat is implemented in R, Perl and C++, and has been tested under Linux. The R package is available at http://motif.bmi.ohio-state.edu/QChIPat.",2013-12-09 +24320218,Development of a model webserver for breed identification using microsatellite DNA marker.,"

Background

Identification of true to breed type animal for conservation purpose is imperative. Breed dilution is one of the major problems in sustainability except cases of commercial crossbreeding under controlled condition. Breed descriptor has been developed to identify breed but such descriptors cover only ""pure breed"" or true to the breed type animals excluding undefined or admixture population. Moreover, in case of semen, ova, embryo and breed product, the breed cannot be identified due to lack of visible phenotypic descriptors. Advent of molecular markers like microsatellite and SNP have revolutionized breed identification from even small biological tissue or germplasm. Microsatellite DNA marker based breed assignments has been reported in various domestic animals. Such methods have limitations viz. non availability of allele data in public domain, thus each time all reference breed has to be genotyped which is neither logical nor economical. Even if such data is available but computational methods needs expertise of data analysis and interpretation.

Results

We found Bayesian Networks as best classifier with highest accuracy of 98.7% using 51850 reference allele data generated by 25 microsatellite loci on 22 goat breed population of India. The FST values in the study were seen to be low ranging from 0.051 to 0.297 and overall genetic differentiation of 13.8%, suggesting more number of loci needed for higher accuracy. We report here world's first model webserver for breed identification using microsatellite DNA markers freely accessible at http://cabin.iasri.res.in/gomi/.

Conclusion

Higher number of loci is required due to less differentiable population and large number of breeds taken in this study. This server will reduce the cost with computational ease. This methodology can be a model for various other domestic animal species as a valuable tool for conservation and breed improvement programmes.",2013-12-09 +26566270,"Evolution of Minimum Mortality Temperature in Stockholm, Sweden, 1901-2009.","

Background

The mortality impacts of hot and cold temperatures have been thoroughly documented, with most locations reporting a U-shaped relationship with a minimum mortality temperature (MMT) at which mortality is lowest. How MMT may have evolved over previous decades as the global mean surface temperature has increased has not been thoroughly explored.

Objective

We used observations of daily mean temperatures to investigate whether MMT changed in Stockholm, Sweden, from the beginning of the 20th century until 2009.

Methods

Daily mortality and temperature data for the period 1901-2009 in Stockholm, Sweden, were used to model the temperature-mortality relationship. We estimated MMT using distributed lag nonlinear Poisson regression models considering lags up to 21 days of daily mean temperature as the exposure variable. To avoid large influences on the MMT from intra- and interannual climatic variability, we estimated MMT based on 30-year periods. Furthermore, we investigated whether there were trends in the absolute value of the MMT and in the relative value of the MMT (the corresponding percentile of the same-day temperature distribution) over the study period.

Results

Our findings suggest that both the absolute MMT and the relative MMT increased in Stockholm, Sweden, over the course of the 20th century.

Conclusions

The increase in the MMT over the course of the 20th century suggests autonomous adaptation within the context of the large epidemiological, demographical, and societal changes that occurred. Whether the rate of increase will be sustained with climate change is an open question.

Citation

Oudin Åström D, Tornevi A, Ebi KL, Rocklöv J, Forsberg B. 2016. Evolution of minimum mortality temperature in Stockholm, Sweden, 1901-2009. Environ Health Perspect 124:740-744; http://dx.doi.org/10.1289/ehp.1509692.",2015-11-13 +30727280,First Report of Powdery Mildew Caused by Erysiphe arcuata on Lanceleaf Coreopsis (Coreopsis lanceolata) in Korea.,"Lanceleaf coreopsis (Coreopsis lanceolata L.) is a plant species of the genus Coreopsis in Asteraceae native to the USA. This plant is a bushy perennial species with finely cut foliage and showy round flowers and is increasing as plants used in landscaping in the world. The invasive plant is also planted very commonly along roadsides in Korea. In late October 2011 and August 2012, signs and symptoms of a powdery mildew disease were observed on lanceleaf coreopsis in several land areas near Gwangju-river, Gwangju, Korea. Symptoms included typical white superficial mycelia. The conidia of the powdery mildew fungus occurred on adaxial and abaxial surfaces. Chasmothecia were not observed. Single conidia formed terminally on conidiophores. Conidial morphology was subcylindrical to oblong. Dimension of conidia was 23.6 to 41.4 (avg. 35.1) μm long × 11.3 to 18.2 (avg. 14.8) μm wide. Conidiophores were composed of five to six (up to seven) cells, ranged from 45.7 to 131.2 (avg. 98.1) μm long × 8.2 to 11.1 (avg. 8.3) μm wide with foot-cells straight to slightly flexuous. Oidium anamorph of this fungus matched that of E. arcuata U. Braun, Heluta and S. Takam. described by Pastircakova et al. (3). From extracted genomic DNA, the rDNA ITS was amplified with ITS1F (5'-CTTGGTCATTTAGAGGAAGT-3') and LR5F (5'-GCTATCCTGAGGGAAAC-3') primer set. The rDNA ITS homology of the fungus (EML-CDPW1, GenBank Accession No. JX485650) showed 100% (590/590) identity value with E. arcuata (GenBank Accession No. AB252459). The identification of the fungus as E. arcuata was based on morphological data combined with the results of sequence analysis. Until recently, E. arcuata has been known to widespread on Carpinus species of the family Betulaceae including European hornbeam (C. betulus L.) and Chonowski's hornbeam (C. tschonoskii Maxim.) in Asia and Europe since Braun et al. first reported it as a new species in 2006 (1). In Korea, Podosphaera fusca (= Sphaerotheca fusca) and P. fuliginea (= S. fuliginea) were reported to cause powdery mildews on Coreopsis lanceolata. E. cichoracearum (= Golovinomyces cichoracearum), Leveillula taurica, P. fusca (= S. fusca), and Oidium spp. have been reported on Coreopsis spp. in the world (3). To our knowledge, this is the first report of powdery mildew caused by Oidium anamorph of E. arcuata on lanceleaf coreopsis (C. lanceolata) in Korea or elsewhere in the world. References: (1) U. Braun et al. Mycol. Prog. 5:139, 2006. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. http://nt.ars-grin.gov/fungaldatabases/ , 2012. (3) K. Pastircakova et al. J. Phytopathol. 156:597, 2008.",2012-12-01 +26124807,Drug-drug interation prediction between ketoconazole and anti-liver cancer drug Gomisin G.,"

Background

Gomisin G, isolated from herb Schisandra chinensis, exhibits anti-tumor activities. Therefore, Gomisin G is a drug candidate for anti-liver cancer therapy.

Aims

To predict the metabolic behavior and metabolism-based drug-drug interaction of gomisin G.

Methods

Molecular docking method was used. The crystal structure of CYP3A4 with the ligand ketoconazole was chosen from protein data bank (http://www.rcsb.org/pdb). Chemdraw software was used to draw the two-dimensional structure of gomisin G with standard bond lengths and angles.

Results

Gomisin G can be well docked into the activity site of CYP3A4, and distance between gomisin G the heme active site was 2.75 Å. To evaluate whether the inhibitors of CYP3A4 can affect the metabolism of gomisin G, co-docking of gomisin G and ketoconazole was further performed. The distance between ketoconazole and activity center (2.10 Å) is closer than the distance between gomisin G and activity center of CYP3A4, indicating the easy influence of CYP3A4's strong inhibitor towards the metabolism of gomisin G.

Conclusion

Gomisin G is a good substrate of CYP3A4, and CYP3A4 inhibitors easily affect the metabolism of Gomisin G.",2015-06-01 +24966364,CNV-guided multi-read allocation for ChIP-seq.,"

Motivation

In chromatin immunoprecipitation followed by high-throughput sequencing (ChIP-seq) and other short-read sequencing experiments, a considerable fraction of the short reads align to multiple locations on the reference genome (multi-reads). Inferring the origin of multi-reads is critical for accurately mapping reads to repetitive regions. Current state-of-the-art multi-read allocation algorithms rely on the read counts in the local neighborhood of the alignment locations and ignore the variation in the copy numbers of these regions. Copy-number variation (CNV) can directly affect the read densities and, therefore, bias allocation of multi-reads.

Results

We propose cnvCSEM (CNV-guided ChIP-Seq by expectation-maximization algorithm), a flexible framework that incorporates CNV in multi-read allocation. cnvCSEM eliminates the CNV bias in multi-read allocation by initializing the read allocation algorithm with CNV-aware initial values. Our data-driven simulations illustrate that cnvCSEM leads to higher read coverage with satisfactory accuracy and lower loss in read-depth recovery (estimation). We evaluate the biological relevance of the cnvCSEM-allocated reads and the resultant peaks with the analysis of several ENCODE ChIP-seq datasets.

Availability and implementation

Available at http://www.stat.wisc.edu/∼qizhang/

Contact

: qizhang@stat.wisc.edu or keles@stat.wisc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-24 +26072497,In silico phenotyping via co-training for improved phenotype prediction from genotype.,"

Motivation

Predicting disease phenotypes from genotypes is a key challenge in medical applications in the postgenomic era. Large training datasets of patients that have been both genotyped and phenotyped are the key requisite when aiming for high prediction accuracy. With current genotyping projects producing genetic data for hundreds of thousands of patients, large-scale phenotyping has become the bottleneck in disease phenotype prediction.

Results

Here we present an approach for imputing missing disease phenotypes given the genotype of a patient. Our approach is based on co-training, which predicts the phenotype of unlabeled patients based on a second class of information, e.g. clinical health record information. Augmenting training datasets by this type of in silico phenotyping can lead to significant improvements in prediction accuracy. We demonstrate this on a dataset of patients with two diagnostic types of migraine, termed migraine with aura and migraine without aura, from the International Headache Genetics Consortium.

Conclusions

Imputing missing disease phenotypes for patients via co-training leads to larger training datasets and improved prediction accuracy in phenotype prediction.

Availability and implementation

The code can be obtained at: http://www.bsse.ethz.ch/mlcb/research/bioinformatics-and-computational-biology/co-training.html",2015-06-01 +25398475,MaxSSmap: a GPU program for mapping divergent short reads to genomes with the maximum scoring subsequence.,"

Background

Programs based on hash tables and Burrows-Wheeler are very fast for mapping short reads to genomes but have low accuracy in the presence of mismatches and gaps. Such reads can be aligned accurately with the Smith-Waterman algorithm but it can take hours and days to map millions of reads even for bacteria genomes.

Results

We introduce a GPU program called MaxSSmap with the aim of achieving comparable accuracy to Smith-Waterman but with faster runtimes. Similar to most programs MaxSSmap identifies a local region of the genome followed by exact alignment. Instead of using hash tables or Burrows-Wheeler in the first part, MaxSSmap calculates maximum scoring subsequence score between the read and disjoint fragments of the genome in parallel on a GPU and selects the highest scoring fragment for exact alignment. We evaluate MaxSSmap's accuracy and runtime when mapping simulated Illumina E.coli and human chromosome one reads of different lengths and 10% to 30% mismatches with gaps to the E.coli genome and human chromosome one. We also demonstrate applications on real data by mapping ancient horse DNA reads to modern genomes and unmapped paired reads from NA12878 in 1000 genomes.

Conclusions

We show that MaxSSmap attains comparable high accuracy and low error to fast Smith-Waterman programs yet has much lower runtimes. We show that MaxSSmap can map reads rejected by BWA and NextGenMap with high accuracy and low error much faster than if Smith-Waterman were used. On short read lengths of 36 and 51 both MaxSSmap and Smith-Waterman have lower accuracy compared to at higher lengths. On real data MaxSSmap produces many alignments with high score and mapping quality that are not given by NextGenMap and BWA. The MaxSSmap source code in CUDA and OpenCL is freely available from http://www.cs.njit.edu/usman/MaxSSmap.",2014-11-15 +25478846,ACHESYM: an algorithm and server for standardized placement of macromolecular models in the unit cell.,"Despite the existence of numerous useful conventions in structural crystallography, for example for the choice of the asymmetric part of the unit cell or of reciprocal space, surprisingly no standards are in use for the placement of the molecular model in the unit cell, often leading to inconsistencies or confusion. A conceptual solution for this problem has been proposed for macromolecular crystal structures based on the idea of the anti-Cheshire unit cell. Here, a program and server (called ACHESYM; http://achesym.ibch.poznan.pl) are presented for the practical implementation of this concept. In addition, the first task of ACHESYM is to find an optimal (compact) macromolecular assembly if more than one polymer chain exists. ACHESYM processes PDB (atomic parameters and TLS matrices) and mmCIF (diffraction data) input files to produce a new coordinate set and to reindex the reflections and modify their phases, if necessary.",2014-11-28 +26104745,MetaQuery: a web server for rapid annotation and quantitative analysis of specific genes in the human gut microbiome.,"

Unlabelled

Microbiome researchers frequently want to know how abundant a particular microbial gene or pathway is across different human hosts, including its association with disease and its co-occurrence with other genes or microbial taxa. With thousands of publicly available metagenomes, these questions should be easy to answer. However, computational barriers prevent most researchers from conducting such analyses. We address this problem with MetaQuery, a web application for rapid and quantitative analysis of specific genes in the human gut microbiome. The user inputs one or more query genes, and our software returns the estimated abundance of these genes across 1267 publicly available fecal metagenomes from American, European and Chinese individuals. In addition, our application performs downstream statistical analyses to identify features that are associated with gene variation, including other query genes (i.e. gene co-variation), taxa, clinical variables (e.g. inflammatory bowel disease and diabetes) and average genome size. The speed and accessibility of MetaQuery are a step toward democratizing metagenomics research, which should allow many researchers to query the abundance and variation of specific genes in the human gut microbiome.

Availability and implementation

http://metaquery.docpollard.org.

Contact

snayfach@gmail.comS UPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.",2015-06-22 +26454278,FALCON@home: a high-throughput protein structure prediction server based on remote homologue recognition.,"

Summary

The protein structure prediction approaches can be categorized into template-based modeling (including homology modeling and threading) and free modeling. However, the existing threading tools perform poorly on remote homologous proteins. Thus, improving fold recognition for remote homologous proteins remains a challenge. Besides, the proteome-wide structure prediction poses another challenge of increasing prediction throughput. In this study, we presented FALCON@home as a protein structure prediction server focusing on remote homologue identification. The design of FALCON@home is based on the observation that a structural template, especially for remote homologous proteins, consists of conserved regions interweaved with highly variable regions. The highly variable regions lead to vague alignments in threading approaches. Thus, FALCON@home first extracts conserved regions from each template and then aligns a query protein with conserved regions only rather than the full-length template directly. This helps avoid the vague alignments rooted in highly variable regions, improving remote homologue identification. We implemented FALCON@home using the Berkeley Open Infrastructure of Network Computing (BOINC) volunteer computing protocol. With computation power donated from over 20,000 volunteer CPUs, FALCON@home shows a throughput as high as processing of over 1000 proteins per day. In the Critical Assessment of protein Structure Prediction (CASP11), the FALCON@home-based prediction was ranked the 12th in the template-based modeling category. As an application, the structures of 880 mouse mitochondria proteins were predicted, which revealed the significant correlation between protein half-lives and protein structural factors.

Availability and implementation

FALCON@home is freely available at http://protein.ict.ac.cn/FALCON/.

Contact

shuaicli@cityu.edu.hk, dbu@ict.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-10 +26254668,Massively parallel sampling of lattice proteins reveals foundations of thermal adaptation.,"Evolution of proteins in bacteria and archaea living in different conditions leads to significant correlations between amino acid usage and environmental temperature. The origins of these correlations are poorly understood, and an important question of protein theory, physics-based prediction of types of amino acids overrepresented in highly thermostable proteins, remains largely unsolved. Here, we extend the random energy model of protein folding by weighting the interaction energies of amino acids by their frequencies in protein sequences and predict the energy gap of proteins designed to fold well at elevated temperatures. To test the model, we present a novel scalable algorithm for simultaneous energy calculation for many sequences in many structures, targeting massively parallel computing architectures such as graphics processing unit. The energy calculation is performed by multiplying two matrices, one representing the complete set of sequences, and the other describing the contact maps of all structural templates. An implementation of the algorithm for the CUDA platform is available at http://www.github.com/kzeldovich/galeprot and calculates protein folding energies over 250 times faster than a single central processing unit. Analysis of amino acid usage in 64-mer cubic lattice proteins designed to fold well at different temperatures demonstrates an excellent agreement between theoretical and simulated values of energy gap. The theoretical predictions of temperature trends of amino acid frequencies are significantly correlated with bioinformatics data on 191 bacteria and archaea, and highlight protein folding constraints as a fundamental selection pressure during thermal adaptation in biological evolution.",2015-08-01 +26650608,Aberrant expression of long noncoding RNAs in cumulus cells isolated from PCOS patients.,"

Purpose

To describe the long noncoding RNA (lncRNA) profiles in cumulus cells isolated from polycystic ovary syndrome (PCOS) patients by employing a microarray and in-depth bioinformatics analysis. This information will help us understand the occurrence and development of PCOS.

Methods

In this study, we used a microarray to describe lncRNA profiles in cumulus cells isolated from ten patients (five PCOS and five normal women). Several differentially expressed lncRNAs were chosen to validate the microarray results by quantitative RT-PCR (qRT-PCR). Then, the differentially expressed lncRNAs were classified into three subgroups (HOX loci lncRNA, enhancer-like lncRNA, and lincRNA) to deduce their potential features. Furthermore, a lncRNA/mRNA co-expression network was constructed by using the Cytoscape software (V2.8.3, http://www.cytoscape.org/ ).

Results

We observed that 623 lncRNAs and 260 messenger RNAs (mRNAs) were significantly up- or down-regulated (≥2-fold change), and these differences could be used to discriminate cumulus cells of PCOS from those of normal patients. Five differentially expressed lncRNAs (XLOC_011402, ENST00000454271, ENST00000433673, ENST00000450294, and ENST00000432431) were selected to validate the microarray results using quantitative RT-PCR (qRT-PCR). The qRT-PCR results were consistent with the microarray data. Further analysis indicated that many differentially expressed lncRNAs were transcribed from chromosome 2 and may act as enhancers to regulate their neighboring protein-coding genes. Forty-three lncRNAs and 29 mRNAs were used to construct the coding-non-coding gene co-expression network. Most pairs positively correlated, and one mRNA correlated with one or more lncRNAs.

Conclusions

Our study is the first to determine genome-wide lncRNA expression patterns in cumulus cells isolated from PCOS patients by microarray. The results show that clusters of lncRNAs were aberrantly expressed in cumulus cells of PCOS patients compared with those of normal women, which revealed that lncRNAs differentially expressed in PCOS and normal women may contribute to the occurrence of PCOS and affect oocyte development.",2015-12-09 +25878035,PheNetic: network-based interpretation of molecular profiling data.,"Molecular profiling experiments have become standard in current wet-lab practices. Classically, enrichment analysis has been used to identify biological functions related to these experimental results. Combining molecular profiling results with the wealth of currently available interactomics data, however, offers the opportunity to identify the molecular mechanism behind an observed molecular phenotype. In this paper, we therefore introduce 'PheNetic', a user-friendly web server for inferring a sub-network based on probabilistic logical querying. PheNetic extracts from an interactome, the sub-network that best explains genes prioritized through a molecular profiling experiment. Depending on its run mode, PheNetic searches either for a regulatory mechanism that gave explains to the observed molecular phenotype or for the pathways (in)activated in the molecular phenotype. The web server provides access to a large number of interactomes, making sub-network inference readily applicable to a wide variety of organisms. The inferred sub-networks can be interactively visualized in the browser. PheNetic's method and use are illustrated using an example analysis of differential expression results of ampicillin treated Escherichia coli cells. The PheNetic web service is available at http://bioinformatics.intec.ugent.be/phenetic/.",2015-04-15 +24598230,"Low copy target detection by Droplet Digital PCR through application of a novel open access bioinformatic pipeline, 'definetherain'.","Droplet Digital PCR (ddPCR) represents a new and alternative platform to conventional quantitative-PCR (qPCR) for the quantitation of DNA templates. However, the proposed improvement in sensitivity and reproducibility offered by ddPCR is not yet fully proven, partly because the delineation between positive and negative responses is not always clear. Data are presented demonstrating the sensitivity of the ddPCR system to both reagent concentrations and choice of cut-off for defining positive and negative results. By implementing k-nearest clustering, cut-offs are produced that improve the accuracy of ddPCR where target DNA is present at low copy numbers, a key application of ddPCR. This approach is applied to human albumin and HIV-1 proviral DNA ddPCR quantitative protocols. This tool is coded in JavaScript and has been made available for free in a web browser at http://www.definetherain.org.uk. Optimisation of the analyses of raw ddPCR data using 'definetherain' indicates that low target number detection can be improved by its implementation. Further application to patient samples will help define the clinical utility of this approach.",2014-03-02 +24615884,In silico detection of phylogenetic informative Y-chromosomal single nucleotide polymorphisms from whole genome sequencing data.,"A state-of-the-art phylogeny of the human Y-chromosome is an essential tool for forensic genetics. The explosion of whole genome sequencing (WGS) data due to the rapid progress of next-generation sequencing facilities is useful to optimize and to increase the resolution of the phylogenetic Y-chromosomal tree. The most interesting Y-chromosomal variants to increase the phylogeny are SNPs (Y-SNPs) especially since the software to call them in WGS data and to genotype them in forensic assays has been optimized over the past years. The PENNY software presented here detects potentially phylogenetic interesting Y-SNPs in silico based on SNP calling data files and classifies them into different types according to their position in the currently used Y-chromosomal tree. The software utilized 790 available male WGS samples of which 172 had a high SNP calling quality. In total, 1269 Y-SNPs potentially capable of increasing the resolution of the Y-chromosomal phylogenetic tree were detected based on a first run with PENNY. Based on a test panel of 57 high-quality and 618 low-quality WGS samples, we could prove that these newly added Y-SNPs indeed increased the resolution of the phylogenetic Y-chromosomal analysis substantially. Finally, we performed a second run with PENNY whereby all samples including those of the test panel are used and this resulted in 509 additional phylogenetic promising Y-SNPs. By including these additional Y-SNPs, a final update of the present phylogenetic Y-chromosomal tree which is useful for forensic applications was generated. In order to find more convincing forensic interesting Y-SNPs with this PENNY software, the number of samples and variety of the haplogroups to which these samples belong needs to increase. The PENNY software (inclusive the user manual) is freely available on the website http://bio.kuleuven.be/eeb/lbeg/software.",2014-03-20 +26528556,Specifications of Standards in Systems and Synthetic Biology.,"Standards shape our everyday life. From nuts and bolts to electronic devices and technological processes, standardised products and processes are all around us. Standards have technological and economic benefits, such as making information exchange, production, and services more efficient. However, novel, innovative areas often either lack proper standards, or documents about standards in these areas are not available from a centralised platform or formal body (such as the International Standardisation Organisation). Systems and synthetic biology is a relatively novel area, and it is only in the last decade that the standardisation of data, information, and models related to systems and synthetic biology has become a community-wide effort. Several open standards have been established and are under continuous development as a community initiative. COMBINE, the ‘COmputational Modeling in BIology’ NEtwork has been established as an umbrella initiative to coordinate and promote the development of the various community standards and formats for computational models. There are yearly two meeting, HARMONY (Hackathons on Resources for Modeling in Biology), Hackathon-type meetings with a focus on development of the support for standards, and COMBINE forums, workshop-style events with oral presentations, discussion, poster, and breakout sessions for further developing the standards. For more information see http://co.mbine.org/. So far the different standards were published and made accessible through the standards’ web- pages or preprint services. The aim of this special issue is to provide a single, easily accessible and citable platform for the publication of standards in systems and synthetic biology. This special issue is intended to serve as a central access point to standards and related initiatives in systems and synthetic biology, it will be published annually to provide an opportunity for standard development groups to communicate updated specifications.",2015-09-04 +25179208,"Characterization of carbapenem-resistant Pseudomonas aeruginosa clinical isolates, carrying multiple genes coding for this antibiotic resistance.","

Background

Carbapenemase genes are one of the most frequent mechanisms reported in carbapenem-resistant P. aeruginosa; however, description of P. aeruginosa co-harbouring two or more carbapenemases is unusual.

Methods

In this study we evaluated the presence of carbapenemase genes and the clonality of P. aeruginosa isolates obtained from a hospital over a 12-year period. A total of 127 isolates of carbapenem-resistant P. aeruginosa recovered from 109 patients feces (four samples), rectal swab (three samples), nasal swab (one sample) and anal abscess (one sample), were evaluated. Minimum inhibitory concentrations of the following antibiotics imipenem, meropenem and polymyxin E were determined by broth microdilution. The molecular profile of isolates was evaluated by pulsed field gel electrophoresis (PFGE). PCR for the following carbapenemase genes blaIMP;blaSPM;blaVIM;blaSIM;blaNDM;blaKPC;blaGES and nucleotide sequencing to confirm the enzyme gene types were performed and compared with the database available on the Internet (BLAST-http://www.ncbi.nlm.nhi.gov/blast/).

Results

All isolates were carbapenem-resistant, their MIC50 and MIC90 were respectively 64 μg/mL and 256 μg/mL to imipenem and 32 μg/mL and 256 μg/mL to meropenem, all isolates except one (MIC = 8 mg/L) were susceptible to polymyxin E. The most frequent carbapenemase genes identified were blaSPM identified in 41 isolates (32%), followed by 10 with blakpc and 5 with blaVIM (3.9%). All belonged to the class SPM-1 and VIM-2. In 2011, one isolate harbouring three carbapenemase genes (SPM-1, VIM-2 and KPC-2) that belonged to a new clone was identified in a hematopoietic stem cell transplanted patient. Then, 19 carbapenem-resistant P. aeruginosa were identified in an outbreak that occurred in the bone marrow transplant unit, all positive for SPM-1 gene, and 9 (47.3%) harbored both SPM-1 and KPC.

Conclusion

Our findings showed that PCR for KPC gene should be performed to evaluate carbapenem resistance in P. aeruginosa and that this agent can harbor more than one carbapenemase gene. Attention should be focused on the possible rapid spread of KPC in P. aeruginosa isolates and for the fact that P. aeruginosa may become a reservoir of this transmissible resistance mechanism.",2014-09-02 +27490393,The Permeability of an Artificial Membrane for Wide Range of pH in Human Gastrointestinal Tract: Experimental Measurements and Quantitative StructureActivity Relationship.,"In silico models for membrane permeability have been based on values measured for single pH. Depending on the diet (fasted/fed state) and part of human intestine the range of pH varies approximately from 2.4 to 8.0. This motivated to study and model the membrane permeability of chemicals considering the whole range of pH in the human intestine. For this, effective membrane permeability values were measured for 65 drugs and drug-like compounds using PAMPA method at four pHs (3, 5, 7.4, 9) over 48 h, introducing technological innovations for the time-dependence measurement. The highest permeability value of a compound from four pHs was used to derive QSAR analyzing a large pool of molecular descriptors and introducing new descriptor. Using stepwise forward selection approach a significant QSAR model was derived that included only two mechanistically relevant descriptors, the logarithmic octanol-water partition coefficient and hydrogen bonding surface area. Prediction confidence of the model was blind tested with a true external validation set of 15 compounds. The resulting QSAR model shows potential to combine permeability values from various pH-s into one descriptive and predictive model for estimating maximum permeability in human gastrointestinal tract. The QSAR model and data are available through the QsarDB repository (http://dx.doi.org/10.15152/QDB.137).",2015-06-18 +24833412,PLUS: open-source toolkit for ultrasound-guided intervention systems.,"A variety of advanced image analysis methods have been under the development for ultrasound-guided interventions. Unfortunately, the transition from an image analysis algorithm to clinical feasibility trials as part of an intervention system requires integration of many components, such as imaging and tracking devices, data processing algorithms, and visualization software. The objective of our paper is to provide a freely available open-source software platform-PLUS: Public software Library for Ultrasound-to facilitate rapid prototyping of ultrasound-guided intervention systems for translational clinical research. PLUS provides a variety of methods for interventional tool pose and ultrasound image acquisition from a wide range of tracking and imaging devices, spatial and temporal calibration, volume reconstruction, simulated image generation, and recording and live streaming of the acquired data. This paper introduces PLUS, explains its functionality and architecture, and presents typical uses and performance in ultrasound-guided intervention systems. PLUS fulfills the essential requirements for the development of ultrasound-guided intervention systems and it aspires to become a widely used translational research prototyping platform. PLUS is freely available as open source software under BSD license and can be downloaded from http://www.plustoolkit.org.",2014-05-09 +24813214,Efficient RNA isoform identification and quantification from RNA-Seq data with network flows.,"

Motivation

Several state-of-the-art methods for isoform identification and quantification are based on [Formula: see text]-regularized regression, such as the Lasso. However, explicitly listing the-possibly exponentially-large set of candidate transcripts is intractable for genes with many exons. For this reason, existing approaches using the [Formula: see text]-penalty are either restricted to genes with few exons or only run the regression algorithm on a small set of preselected isoforms.

Results

We introduce a new technique called FlipFlop, which can efficiently tackle the sparse estimation problem on the full set of candidate isoforms by using network flow optimization. Our technique removes the need of a preselection step, leading to better isoform identification while keeping a low computational cost. Experiments with synthetic and real RNA-Seq data confirm that our approach is more accurate than alternative methods and one of the fastest available.

Availability and implementation

Source code is freely available as an R package from the Bioconductor Web site (http://www.bioconductor.org/), and more information is available at http://cbio.ensmp.fr/flipflop.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-09 +25188327,Pep2Path: automated mass spectrometry-guided genome mining of peptidic natural products.,"Nonribosomally and ribosomally synthesized bioactive peptides constitute a source of molecules of great biomedical importance, including antibiotics such as penicillin, immunosuppressants such as cyclosporine, and cytostatics such as bleomycin. Recently, an innovative mass-spectrometry-based strategy, peptidogenomics, has been pioneered to effectively mine microbial strains for novel peptidic metabolites. Even though mass-spectrometric peptide detection can be performed quite fast, true high-throughput natural product discovery approaches have still been limited by the inability to rapidly match the identified tandem mass spectra to the gene clusters responsible for the biosynthesis of the corresponding compounds. With Pep2Path, we introduce a software package to fully automate the peptidogenomics approach through the rapid Bayesian probabilistic matching of mass spectra to their corresponding biosynthetic gene clusters. Detailed benchmarking of the method shows that the approach is powerful enough to correctly identify gene clusters even in data sets that consist of hundreds of genomes, which also makes it possible to match compounds from unsequenced organisms to closely related biosynthetic gene clusters in other genomes. Applying Pep2Path to a data set of compounds without known biosynthesis routes, we were able to identify candidate gene clusters for the biosynthesis of five important compounds. Notably, one of these clusters was detected in a genome from a different subphylum of Proteobacteria than that in which the molecule had first been identified. All in all, our approach paves the way towards high-throughput discovery of novel peptidic natural products. Pep2Path is freely available from http://pep2path.sourceforge.net/, implemented in Python, licensed under the GNU General Public License v3 and supported on MS Windows, Linux and Mac OS X.",2014-09-04 +30699650,First Report of Pindo Palm Heart Rot Caused by Ceratocystis paradoxa in China.,"On January 12th, 2012, a novel disease with an incidence of 50% was discovered in Pindo palm Butia capitata (Mart.) Becc from the Coconut Grant View Garden (19°33.137' N, 110°47.482' E) located in Wenchang, Hainan Province. Diseased leaflets at the base of the rotted heart leaves had reddish brown lesions; when the infection progressed, the leaves turned yellow and became blighted from the inner to the outer part of the crown. Once the growing point was destroyed, the entire tree ultimately died. Tissues from the edges of lesions from diseased leaflet samples were placed onto potato dextrose agar (PDA) and incubated at 25°C for 3 days. The color of colonies of five isolates obtained turned from white to black in 48 h. The optimum temperature for mycelium growth was from 20 to 30°C, and no growth occurred at temperatures higher than 40°C or lower than 5°C (n = 5). The cylindrical colorless to pale brown conidia were 7.5 to 17.5 μm long × 5.0 to 7.5 μm wide (n = 100); oval black chlamydospores were 12.5 to 22.5 × 7.5 to 15.0 μm (n = 100). The sequence (497 bp) of the internal transcribed spacer (ITS) region of the representative isolate BX3 (China Center for Type Culture Collection No. CCTCC AF2014002) was amplified using primer pair ITS1/ITS4 (GenBank Accession No. KF939052) and shared 99% sequence identity with Ceratocystis paradoxa strain xie331-4 (JQ039332). Based upon these biological characteristics and ITS sequence, this pathogen was identified as C. paradoxa (Dade) C. Moreau (anamorph Thielaviopsis paradoxa (de Seynes) Höhn.) (3). Pathogenicity tests were conducted on 8-cm-long sections of young leaflets excised from a 12-year-old pindo palm tree. One side of the midrib of 10 sections was wounded with a sterilized scalpel at the center and the other side was non-wounded, then a PDA plug (4 to 6 × 4 to 6 mm) from the edge of an actively growing colony of BX3 incubated for 3 days were inoculated onto each wounded or non-wounded site. As controls, plain PDA plugs were placed on wounded and non-wounded spots of another 10 sections following the above procedure. Pathogenicity was tested twice. Each inoculated section was then put into a 9-cm petri dish in which two filter papers (Φ = 9 cm) were placed and 8 ml of sterile water were added to maintain high humidity, and then all dishes were placed in a dark incubator at 25°C. After 5 days, typical symptoms developed only on the wounded points inoculated with mycelium plugs. C. paradoxa was re-isolated from the margins of the expanding lesions. C. paradoxa causing fruit rot of B. capitata was reported in Uruguay (2), but to our knowledge, there are no previous reports of this species in China or infecting leaves of B. capitata worldwide (1). We report here a new Ceratocystis disease on B. capitata, and it was named as pindo palm heart rot based on its symptoms. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , Feb 21, 2014. (2) V. Gepp et al. New Dis. Rep. 27:12, 2013. (3) F. Y. Yu et al. Plant Dis. 96:290, 2012.",2014-09-01 +30699635,First Detection of Puccinia ballotiflora on Salvia greggii.,"Salvia greggii, autumn sage, is grown for its bright red to white flowers that bloom in late summer and fall. In February of 2008, a rust sample was sent to the CDFA plant pathology diagnostics laboratory in Sacramento from a nursery in Santa Barbara County, CA. Pustules were abundant on older leaves causing moderate defoliation of containerized stock. Only the varieties with entirely red or pink flowers were affected. S. greggii 'Hotlips,' a popular white/red bicolor, was unaffected. Amphigenous uredinia were cinnamon brown, round, powdery, and sometimes surrounded by yellow halos. Pustules were found primarily on the leaves, although a few were on the stems. Urediniospores were broadly obovoid, subglobose to broadly ellipsoid, echinulate, and 22 to 27 × 24 to 32 μm (24.9 × 26.9 μm average) with one apical pore and 2 to 3 equatorial pores. Urediniospore walls were cinnamon brown in color and measured 1.0 to 2.0 μm (1.5 μm average). No telia were observed. After the initial detection, this rust was found in additional nursery sites in Santa Cruz, Santa Clara, Santa Barbara, and Ventura counties in 2008 and 2009. In November of 2011, a sample from a landscape planting in Santa Barbara County of a similar rust with telia and teliospores was submitted. Urediniospores and teliospores were present in the same lesions. Lesions with teliospores were located primarily on the stems. Mature teliospores were two-celled, verrucose, chocolate brown, and 25 to 31 × 32 to 40 μm (28.6 × 35.3 μm average) with a pedicel ranging from 8 to 12 × 38 to 104 μm, sometimes attached obliquely. The rust matched the morphological characteristics of Puccinia ballotiflora (Syn = P. ballotaeflora Long) (2). To confirm pathogenicity, three 20-cm-tall plants of S. greggii 'Navajo Red' in 3.8-liter pots were spray inoculated with 10 ml of a 2.5 × 103 urediniospores per ml suspension and incubated in a dew chamber at 23°C for 2 days in the dark. Plants were transferred to a growth chamber maintained at 22°C with a 12-h photoperiod. Three plants were sprayed with sterile distilled water as controls. Uredinial pustules (1 to 2 mm) appeared on the abaxial surface of the leaves after 3 weeks. The pathogenicity test was repeated with similar results. The internal transcribed spacer region of rDNA and a portion of the 28S rDNA were amplified with primer pairs ITS5 (5'-GGAAGTAAAAGTCGTAACAAGG-3'), Rust1 (5'-GCTTACTGCCTTCCTCAATC-3'), and Rust2inv (5'-GATGAAGAACACAGTGAAA-3'), LR6 (5'-CGCAGTTCTGCTTACC-3') as described by Aime (1) and sequenced using the amplification primers, Rust2 (5'-TTTCACTGTGTTCTTCATC-3') and Rust3 (5'-GAATCTTTGAACGCACCTTG-3'). BLAST query of the assembled sequence, GenBank KF381491, was 91% identical to P. acroptili, JN204194, its closest match of similar length. P. ballotiflora has been found in Colombia on S. cataractarum, S. petiolaris, and S. mayori (3), and in Texas and Mexico on S. ballotiflora (4). To the best of our knowledge, this is the first detection of P. ballotiflora on S. greggii worldwide. P. ballotiflora is already widespread in the nursery trade in California and frequent fungicide applications are necessary to keep plants marketable. References: (1) M. C. Aime. Mycoscience 47:112, 2006. (2) J. W. Baxter and G. B. Cummins. Lloydia 14:201, 1951. (3) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Botany and Mycology Laboratory, Online publication http://nt.ars-grin.gov/fungaldatabases ARS, USDA, 2014 (4) F. D. Kern et al. Mycologia 25:448, 1933.",2014-09-01 +30699625,First Report of Pseudoperonospora cubensis Causing Downy Mildew on Momordica balsamina and M. charantia in North Carolina.,"Momordica balsamina (balsam apple) and M. charantia L. (bitter melon/bitter gourd/balsam pear) commonly grow in the wild in Africa and Asia; bitter melon is also cultivated for food and medicinal purposes in Asia (1). In the United States, these cucurbits grow as weeds or ornamentals. Both species are found in southern states and bitter melon is also found in Pennsylvania and Connecticut (3). Cucurbit downy mildew (CDM), caused by the oomycete Pseudoperonospora cubensis, was observed on bitter melon and balsam apple between August and October of 2013 in six North Carolina sentinel plots belonging to the CDM ipmPIPE program (2). Plots were located at research stations in Johnston, Sampson, Lenoir, Henderson, Rowan, and Haywood counties, and contained six different commercial cucurbit species including cucumbers, melons, and squashes in addition to the Momordica spp. Leaves with symptoms typical of CDM were collected from the Momordica spp. and symptoms varied from irregular chlorotic lesions to circular lesions with chlorotic halos on the adaxial leaf surface. Sporulation on the abaxial side of the leaves was observed and a compound microscope revealed sporangiophores (180 to 200 μm height) bearing lemon-shaped, dark sporangia (20 to 35 × 10 to 20 μm diameter) with papilla on one end. Genomic DNA was extracted from lesions and regions of the NADH dehydrogynase subunit 1 (Nad1), NADH dehydrogynase subunit 5 (Nad5), and internal transcribed spacer (ITS) ribosomal RNA genes were amplified and sequenced (4). BLAST analysis revealed 100% identity to P. cubensis Nad1 (HQ636552.1, HQ636551.1), Nad5 (HQ636556.1), and ITS (HQ636491.1) sequences in GenBank. Sequences from a downy mildew isolate from each Momordica spp. were deposited in GenBank as accession nos. KJ496339 through 44. To further confirm host susceptibility, vein junctions on the abaxial leaf surface of five detached leaves of lab-grown balsam apple and bitter melon were either inoculated with a sporangia suspension (10 μl, 104 sporangia/ml) of a P. cubensis isolate from Cucumis sativus ('Vlaspik' cucumber), or with water as a control. Inoculated leaves were placed in humidity chambers to promote infection and incubated using a 12-h light (21°C) and dark (18°C) cycle. Seven days post inoculation, CDM symptoms and sporulation were observed on inoculated balsam apple and bitter melon leaves. P. cubensis has been reported as a pathogen of both hosts in Iowa (5). To our knowledge, this is the first report of P. cubensis infecting these Momordica spp. in NC in the field. Identifying these Momordica spp. as hosts for P. cubensis is important since these cucurbits may serve as a source of CDM inoculum and potentially an overwintering mechanism for P. cubensis. Further research is needed to establish the role of non-commercial cucurbits in the yearly CDM epidemic, which will aid the efforts of the CDM ipmPIPE to predict disease outbreaks. References: (1) L. K. Bharathi and K. J. John. Momordica Genus in Asia-An Overview. Springer, New Delhi, India, 2013. (2) P. S. Ojiambo et al. Plant Health Prog. doi:10.1094/PHP-2011-0411-01-RV, 2011. (3) PLANTS Database. Natural Resources Conservation Service, USDA. Retrieved from http://plants.usda.gov/ , 7 February 2014. (4) L. M. Quesada-Ocampo et al. Plant Dis. 96:1459, 2012. (5) USDA. Index of Plant Disease in the United States. Agricultural Handbook 165, 1960.",2014-09-01 +30699669,First Report of Curvularia eragrostidis Causing Postharvest Rot on Pineapple in Brazil.,"Pineapple (Ananas comosus L. Merril.) is the main plant of the Bromeliaceae, cultivated economically for the fruits' appealing flavor and a refreshing sugar-acid balance. In 2013, fruits with no initially visible symptoms began to show a postharvest rot after 3 days in a market in the municipality of Viçosa, Minas Gerais, Brazil. The rot can rarely be detected from the outside of the fruit, but a longitudinal section allows observation of extension of the affected area toward the center of the fruit. The symptoms initially appear as a dark brown to black rot on surface of the fruits, which gradually enlarges in size, leading to increased rot and disposal of infected fruits. Until now, this disease occurred sporadically and caused small losses. A fungus was isolated from rot observed in fruits from cultivar Pérola and a single-spore culture was deposited in the culture collection of the Universidade Federal de Viçosa (Accession No. COAD 1588). After 7 days of incubation at 25°C, the strain displayed radial growth and gray-white to black colonies. Microscopic observations revealed brown to light brown conidiophores present singly or in groups. The septate, simple or rarely branched conidiophores are straight or curved, up to 245 μm long and 5 μm wide, and some have a geniculate growth pattern near the apex. The conidia are ellipsoidal or barrel-shaped and 22 to 25 μm long and 10 to 12.5 μm wide. The median septum appears as a black band and the cells at each end of the conidia are pale, whereas the intermediate cells are brown or dark brown. Based on morphological characteristics, the fungus was identified as Curvularia eragrostidis (4). To confirm this identification, DNA was extracted and sequences of the internal transcribed spacer (ITS), 28S and 18S rDNA regions were obtained and deposited in GenBank (Accession Nos. KJ541818 to KJ541820). The sequence of the ITS region exhibited 99% identity over 530 bp with other C. eragrostidis sequence in GenBank (JN943449) and Bayesian inference analysis placed our isolate in the same clade with others C. eragrostidis (study S15670 deposited in TreeBASE). Koch's postulates were conducted by inoculating six fruits of pineapple previously disinfected with 2% sodium hypochlorite and washed in sterile distilled water. For inoculation, the isolate was grown in potato dextrose agar (PDA) for 15 days at 25°C. Six millimeter diameter disks were removed from the surface of fruits with a sterile cork borer and replaced with PDA disks containing mycelia from the margins of the culture. An agar plug was deposited in three control fruits and all fruits were maintained at 25°C in plastic trays. Inoculated fruits showed symptoms 7 days after inoculation that were similar to those initially observed in the infected fruits, while control fruits showed no symptoms. C. eragrostidis is a cosmopolitan pathogen that infects hosts from several botanical families (2,4). In Brazil, this fungus causes leaf spot on A. comosus (3) and also infects Allium sativum, Dioscorea alata, D. cayenensis, Oryza sativa, Sorghum bicolor, Vigna unguiculata, and Zea mays (1). To our knowledge, this is the first report of C. eragrostidis causing postharvest rot disease in pineapple in Brazil. Because invasion of the fungus can occur through minute fractures, fruits should be carefully handled to avoid mechanical damage. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases , 18 February 2014. (2) D. S. Manamgoda et al. Fungal Divers. 51:3, 2011. (3) J. J. Ponte et al. Fitopatologia 10:21, 1975. (4) A. Sivanesan. Mycological Papers 158:113, 1987.",2014-09-01 +25048473,Adverse events associated with metal contamination of traditional chinese medicines in Korea: a clinical review.,"This study was performed to review studies carried out in Korea reporting toxic reactions to traditional Chinese medicines (TCMs) as a result of heavy metal contamination. PubMed (1966-August 2013) and International Pharmaceutical Abstracts (1965-August 2013) were searched using the medical subject heading terms of ""Medicine, Chinese Traditional,"" ""Medicine, Korean Traditional,"" ""Medicine, Traditional,"" ""Metals, Heavy,"" and ""Drug Contamination"". For Korean literature, Korea Med (http://www.koreamed.org), the Korean Medical Database (http://kmbase.medric.or.kr), National Discovery for Science Leaders (www.ndsl.kr), Research Information Sharing Service (http://www.riss.kr), and Google Scholar were searched using the terms ""Chinese medicine,"" ""Korean medicine,"" ""herbal medicine,"" and ""metallic contamination"" in Korean. Bibliographies of case reports and case series, identified using secondary resources, were also utilized. Only literature describing cases or studies performed in Korea were included. Case reports identified clear issues with heavy metal, particularly lead, contamination of TCMs utilized in Korea. No international standardization guidelines for processing, manufacturing and marketing of herbal products exist. Unacceptably high levels of toxic metals can be present in TCM preparations. Health care providers and patients should be educated on the potential risks associated with TCMs. International advocacy for stricter standardization procedures for production of TCMs is warranted.",2014-09-01 +30699642,First Report of Fusarium equiseti Causing Damping-Off Disease on Aleppo Pine in Algeria.,"The Aleppo pine (Pinus halepensis Mill.) is a conifer native to the Mediterranean region. In 2008 and 2009, a survey of Aleppo pine seedling diseases was performed in three forest nurseries from Relizane, Sidi Bel Abbes, and Tlemcen provinces in northwestern Algeria. Aleppo pine seedlings showed symptoms of pre- and post-emergence damping-off disease, with an incidence of 64 to 77%. Four composite samples were taken from each location. Disinfested root and root collar segments, approximately 5 mm in length, were cultured on potato dextrose agar (PDA) and incubated at 25°C, and hyphal tips were transferred to PDA. Fusarium equiseti (Corda) Sacc. (teleomorph: Gibberella intricans Wollenw.) was identified from roots of two seedlings from the Sidi Bel Abbes nursery. Morphological identification was done according to Fusarium keys (2). PDA colonies with abundant, loosely floccose, whitish aerial mycelium and beige pigmentation were observed. Macroconidia with usually 5 to 6 septa, 31 to 45 μm long. A pronounced dorsiventral curvature, tapered and elongated apical cell, and prominent foot shape were observed. Microconidia were absent. Chlamydospores were produced in hyphae, most often intercalary, solitary, in pairs, frequently forming chains or clusters, globose (7 to 13 μm). To confirm the identity of this fungus, the internal transcribed spacer of F3RS1 and F19RS1 isolates of F. equiseti were amplified and sequenced using ITS1 and ITS4 primers (4), GenBank accession nos. JX114784 and JX114791, respectively. Those sequences bore 100% (HQ671182) similarity with sequences of F. equiseti in GenBank. Pathogenicity tests were performed to fulfill Koch's postulates. Inoculum was produced by adding a 5-mm-diameter plug from a 7-day-old CMA petri dish culture to a previously sterilized 500 ml flask (237.5 g sand, 12.5 g cornmeal, 80 ml sterile distilled water), shaken over 9 days at 25°C, and mixed with sterile sandy clay soil at 1:3 (v:v). Infested soil was then transferred to 500 ml pots, and 10 Aleppo pine seeds were planted per pot. A completely randomized design was used with three replicates per isolate and three control pots with a similar non-infested soil. After 1 month at 25°C the two tested isolates caused typical damping-off symptoms (collar rot) on seedlings and were re-isolated from recently infected tissues. The percentages of the inoculated plants that became infected were 59 to 65% among isolates (0% in control pots). To our knowledge, infection by F. equiseti is a first report on Aleppo pine in northwestern Algeria, Northern Africa, and globally, and on conifers in the Mediterranean region (1,3). In Algeria, F. equiseti is associated with black pepper (Piper nigrum L.) (3). These findings highlight the moderate impact of F. equiseti on the production of Aleppo seedling stock for reforestation activities in northwestern Algeria. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory. ARS, USDA, Beltsville, MD. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , February 20, 2013. (2) J. F. Leslie and B. A. Summerell. The Fusarium Laboratory Manual. Blackwell Publishing, Ames, IA, 2006. (3) D. W. Minter. Cybertruffle's Robigalia, Observations of Fungi and their Associated Organisms. Retrieved from http://www.cybertruffle.org.uk/robigalia/eng/ , February 20, 2013. (4) T. J. White et al. Page 315 in: PCR Protocols: A Guide to Methods and Applications. Academic Press, San Diego, 1990.",2014-09-01 +26110438,Better prediction of functional effects for sequence variants.,"Elucidating the effects of naturally occurring genetic variation is one of the major challenges for personalized health and personalized medicine. Here, we introduce SNAP2, a novel neural network based classifier that improves over the state-of-the-art in distinguishing between effect and neutral variants. Our method's improved performance results from screening many potentially relevant protein features and from refining our development data sets. Cross-validated on >100k experimentally annotated variants, SNAP2 significantly outperformed other methods, attaining a two-state accuracy (effect/neutral) of 83%. SNAP2 also outperformed combinations of other methods. Performance increased for human variants but much more so for other organisms. Our method's carefully calibrated reliability index informs selection of variants for experimental follow up, with the most strongly predicted half of all effect variants predicted at over 96% accuracy. As expected, the evolutionary information from automatically generated multiple sequence alignments gave the strongest signal for the prediction. However, we also optimized our new method to perform surprisingly well even without alignments. This feature reduces prediction runtime by over two orders of magnitude, enables cross-genome comparisons, and renders our new method as the best solution for the 10-20% of sequence orphans. SNAP2 is available at: https://rostlab.org/services/snap2web.",2015-06-18 +21841435,Trends in all-terrain vehicle-related spinal injuries in children and adolescents.,"

Background

Despite the significant morbidity and mortality associated with all-terrain vehicle (ATV) use in children, their use continues to increase dramatically. To determine the frequency and impact of spinal fractures in children and adolescents injured in ATV accidents, we reviewed all 4 of the available Kids' Inpatient Databases.

Methods

The Kids' Inpatient Databases (http://www.ahrq.gov/data/hcup/hcupkid.htm) from 1997, 2000, 2003, and 2006 were reviewed using e-codes for children (age less than 18 y) injured in ATV accidents. From the data on ATV accidents, children who sustained spinal injuries were identified by ICD-9 codes. Statistical analysis was done using SAS Windows.

Results

An estimated 4,483 children were admitted because of ATV-related accidents in 2006. Spinal injury occurred in 7.4% of patients. The most common level of fracture was thoracic (39%), followed by lumbar (29%) and cervical (16%). Pelvic fractures were the most common associated fractures, accounting for 44% of all musculoskeletal injuries, followed by forearm/wrist fractures (15%) and femoral fractures (9%). Although fewer girls were injured in ATV accidents than boys, the risk of spinal injury was higher in girls than boys (10.1% vs. 6.7%, P < 0.005), and children with spinal injuries were older than those without (14.7 y vs. 12.7 y, P < 0.001).

Conclusions

Despite educational and legislative efforts, children account for a disproportionate percentage of morbidity and mortality from ATV-related accidents. The injury rate for children from ATV accidents has increased 240% since 1997, whereas the spinal injury rate has increased 476% over the same time frame. The risk of spinal injury in ATV-related accidents is higher for girls than for boys and for older children.

Clinical relevance

Multiple injuries are frequent in children involved in ATV accidents and may be related to the high-energy nature of ATV accidents. It is important to have a high index of suspicion for multiple injuries, not only within the spine, but in other organ systems as well.",2011-09-01 +30722299,First Report of Pilidium concavum causing Leaf Necrosis on Fallopia japonica in the United States.,"Fallopia japonica (Houtt.) Ronse Decr. (= Polygonum cuspidatum Siebold & Zucc.; Japanese knotweed, JKW) is an invasive perennial forb in the Polygonaceae. It has been identified as a target for biological control in many parts of the world, including the United States. Several potted JKW plants in an outdoor study at the Oregon Department of Agriculture, Salem (44.93° N, 122.99° W) developed leaf spots. Samples collected on August 20, 2007, were sent to the FDWSRU for identification of the disease. The necrotic leaf spots were brown and large, 1 to 3 cm in diameter, and in some cases occupying 30% of the leaf area. Both hemispherical and discoid conidiomata with gloeoid spore masses (3) developed in necrotic areas of all leaves placed in moist chambers. Discoid conidiomata had dark, pedicellate bases subtending a fimbriate disc on which pale brown to brown gloeoid conidial masses were produced. Hemispherical conidiomata were black, circular, sessile, and somewhat flattened, within which similar, gloeoid conidial masses were produced. Conidia from each type of conidioma were unicellular, cylindrical to fusiform, hyaline, and 4.5 to 7.2 × 0.9 to 1.8 μm (mean 5.7 × 1.33). Artificial inoculation of 15 plants was made on two occasions with a suspension of 106 conidia per ml, followed by two 16-hr dew periods at 25°C that were separated by an 8-hr ""day;"" a similar set of 15 non-inoculated plants served as controls each time. Symptoms similar to those in the original sample developed within 2 months after inoculation. The fungus was easily reisolated, and conidia from each type of conidioma produced similar growth on artificial media and similar disease after inoculation. The characteristics of conidial size and distinctly different conidiomata are diagnostic of Pilidium concavum (Desm.) Höhn (3,4). A sequence of the ITS1-5.8S-ITS2 region DNA, extracted using a DNeasy Plant Mini Kit (QIAGEN), was found identical to that of P. concavum from Rosa sp. (BPI 1107275; GenBank Accession No. AY487094), using BLAST. This isolate, FDWSRU 07-116, has been deposited in the US National Fungus Collection (BPI 883546) and at the Centraalbureau voor Schimmelcultures (CBS 132725). Sequence data have been deposited in GenBank (JQ790789). To our knowledge, this is the first report of P. concavum causing disease on a member of the Polygonaceae in North America (1), a disease clearly different from a Japanese Mycosphaerella sp. under consideration for biological control of JKW in the United Kingdom (2). References: (1) D. F. Farr, and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , May 15, 2012. (2) D. Kurose et al. MycoSci. 50:179, 2009. (3) M. E. Palm, Mycologia 83:787, 1991. (4) A. Y. Rossman, et al. Mycol. Progr. 3:275, 2004.",2013-01-01 +25041923,bPeaks: a bioinformatics tool to detect transcription factor binding sites from ChIPseq data in yeasts and other organisms with small genomes.,"Peak calling is a critical step in ChIPseq data analysis. Choosing the correct algorithm as well as optimized parameters for a specific biological system is an essential task. In this article, we present an original peak-calling method (bPeaks) specifically designed to detect transcription factor (TF) binding sites in small eukaryotic genomes, such as in yeasts. As TF interactions with DNA are strong and generate high binding signals, bPeaks uses simple parameters to compare the sequences (reads) obtained from the immunoprecipitation (IP) with those from the control DNA (input). Because yeasts have small genomes (<20 Mb), our program has the advantage of using ChIPseq information at the single nucleotide level and can explore, in a reasonable computational time, results obtained with different sets of parameter values. Graphical outputs and text files are provided to rapidly assess the relevance of the detected peaks. Taking advantage of the simple promoter structure in yeasts, additional functions were implemented in bPeaks to automatically assign the peaks to promoter regions and retrieve peak coordinates on the DNA sequence for further predictions of regulatory motifs, enriched in the list of peaks. Applications of the bPeaks program to three different ChIPseq datasets from Saccharomyces cerevisiae, Candida albicans and Candida glabrata are presented. Each time, bPeaks allowed us to correctly predict the DNA binding sequence of the studied TF and provided relevant lists of peaks. The bioinformatics tool bPeaks is freely distributed to academic users. Supplementary data, together with detailed tutorials, are available online: http://bpeaks.gene-networks.net.",2014-07-28 +26338766,Learning directed acyclic graphical structures with genetical genomics data.,"

Motivation

Large amount of research efforts have been focused on estimating gene networks based on gene expression data to understand the functional basis of a living organism. Such networks are often obtained by considering pairwise correlations between genes, thus may not reflect the true connectivity between genes. By treating gene expressions as quantitative traits while considering genetic markers, genetical genomics analysis has shown its power in enhancing the understanding of gene regulations. Previous works have shown the improved performance on estimating the undirected network graphical structure by incorporating genetic markers as covariates. Knowing that gene expressions are often due to directed regulations, it is more meaningful to estimate the directed graphical network.

Results

In this article, we introduce a covariate-adjusted Gaussian graphical model to estimate the Markov equivalence class of the directed acyclic graphs (DAGs) in a genetical genomics analysis framework. We develop a two-stage estimation procedure to first estimate the regression coefficient matrix by [Formula: see text] penalization. The estimated coefficient matrix is then used to estimate the mean values in our multi-response Gaussian model to estimate the regulatory networks of gene expressions using PC-algorithm. The estimation consistency for high dimensional sparse DAGs is established. Simulations are conducted to demonstrate our theoretical results. The method is applied to a human Alzheimer's disease dataset in which differential DAGs are identified between cases and controls. R code for implementing the method can be downloaded at http://www.stt.msu.edu/∼cui.

Availability and implementation

R code for implementing the method is freely available at http://www.stt.msu.edu/∼cui/software.html.",2015-09-02 +25258491,miRAFinder and GeneAFinder scripts: large-scale searching for miRNA and related information in indexed literature abstracts.,"

Unlabelled

In recent times, information on miRNAs and their binding sites is gaining momentum. Therefore, there is interest in the development of tools extracting miRNA related information from known literature. Hence, we describe GeneAFinder and miRAFinder scripts (open source) developed using python programming for the semi-automatic extraction and arrangement of updated information on miRNAs, genes and additional data from published article abstracts in PubMed. The scripts are suitable for custom modification as per requirement.

Availability

miRAFinder and GeneAFinder scripts are free and available for download at http://sites.google.com /site/malaheenee/software.",2014-08-30 +24178034,IDEAL in 2014 illustrates interaction networks composed of intrinsically disordered proteins and their binding partners.,"IDEAL (Intrinsically Disordered proteins with Extensive Annotations and Literature, http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/) is a collection of intrinsically disordered proteins (IDPs) that cannot adopt stable globular structures under physiological conditions. Since its previous publication in 2012, the number of entries in IDEAL has almost tripled (120 to 340). In addition to the increase in quantity, the quality of IDEAL has been significantly improved. The new IDEAL incorporates the interactions of IDPs and their binding partners more explicitly, and illustrates the protein-protein interaction (PPI) networks and the structures of protein complexes. Redundant experimental data are arranged based on the clustering of Protein Data Bank entries, and similar sequences with the same binding mode are grouped. As a result, the new IDEAL presents more concise and informative experimental data. Nuclear magnetic resonance (NMR) disorder is annotated in a systematic manner, by identifying the regions with large deviations among the NMR models. The ordered/disordered and new domain predictions by DICHOT are available, as well as the domain assignments by HMMER. Some examples of the PPI networks and the highly deviated regions derived from NMR models will be described, together with other advances. These enhancements will facilitate deeper understanding of IDPs, in terms of their flexibility, plasticity and promiscuity.",2013-10-30 +25944184,CS-SCORE: Rapid identification and removal of human genome contaminants from metagenomic datasets.,"

Unlabelled

Metagenomic sequencing data, obtained from host-associated microbial communities, are usually contaminated with host genome sequence fragments. Prior to performing any downstream analyses, it is necessary to identify and remove such contaminating sequence fragments. The time and memory requirements of available host-contamination detection techniques are enormous. Thus, processing of large metagenomic datasets is a challenging task. This study presents CS-SCORE--a novel algorithm that can rapidly identify host sequences contaminating metagenomic datasets. Validation results indicate that CS-SCORE is 2-6 times faster than the current state-of-the-art methods. Furthermore, the memory footprint of CS-SCORE is in the range of 2-2.5GB, which is significantly lower than other available tools. CS-SCORE achieves this efficiency by incorporating (1) a heuristic pre-filtering mechanism and (2) a directed-mapping approach that utilizes a novel sequence composition metric (cs-score). CS-SCORE is expected to be a handy 'pre-processing' utility for researchers analyzing metagenomic datasets.

Availability

For academic users, an implementation of CS-SCORE is freely available at: http://metagenomics.atc.tcs.com/cs-score (or) https://metagenomics.atc.tcs.com/preprocessing/cs-score.",2015-05-02 +26895492,"A Statewide Nested Case-Control Study of Preterm Birth and Air Pollution by Source and Composition: California, 2001-2008.","

Background

Preterm birth (PTB) has been associated with exposure to air pollution, but it is unclear whether effects might vary among air pollution sources and components.

Objectives

We studied the relationships between PTB and exposure to different components of air pollution, including gases and particulate matter (PM) by size fraction, chemical composition, and sources.

Methods

Fine and ultrafine PM (respectively, PM2.5 and PM0.1) by source and composition were modeled across California over 2000-2008. Measured PM2.5, nitrogen dioxide, and ozone concentrations were spatially interpolated using empirical Bayesian kriging. Primary traffic emissions at fine scale were modeled using CALINE4 and traffic indices. Data on maternal characteristics, pregnancies, and birth outcomes were obtained from birth certificates. Associations between PTB (n = 442,314) and air pollution exposures defined according to the maternal residence at birth were examined using a nested matched case-control approach. Analyses were adjusted for maternal age, race/ethnicity, education and neighborhood income.

Results

Adjusted odds ratios for PTB in association with interquartile range (IQR) increases in average exposure during pregnancy were 1.133 (95% CI: 1.118, 1.148) for total PM2.5, 1.096 (95% CI: 1.085, 1.108) for ozone, and 1.079 (95% CI: 1.065, 1.093) for nitrogen dioxide. For primary PM, the strongest associations per IQR by source were estimated for onroad gasoline (9-11% increase), followed by onroad diesel (6-8%) and commercial meat cooking (4-7%). For PM2.5 composition, the strongest positive associations per IQR were estimated for nitrate, ammonium, and secondary organic aerosols (11-14%), followed by elemental and organic carbon (2-4%). Associations with local traffic emissions were positive only when analyses were restricted to births with residences geocoded at the tax parcel level.

Conclusions

In our statewide nested case-control study population, exposures to both primary and secondary pollutants were associated with an increase in PTB.

Citation

Laurent O, Hu J, Li L, Kleeman MJ, Bartell SM, Cockburn M, Escobedo L, Wu J. 2016. A statewide nested case-control study of preterm birth and air pollution by source and composition: California, 2001-2008. Environ Health Perspect 124:1479-1486; http://dx.doi.org/10.1289/ehp.1510133.",2016-02-19 +25133604,Curation and analysis of multitargeting agents for polypharmacological modeling.,"In drug discovery and development, the conventional ""single drug, single target"" concept has been shifted to ""single drug, multiple targets""--a concept coined as polypharmacology. For studies in this emerging field, dedicated and high-quality databases of multitargeting ligands would be exceedingly beneficial. To this end, we conducted a comprehensive analysis of the structural and chemical/biological profiles of polypharmacological agents and present a Web-based database (Polypharma). All of these compounds curated herein have been cocrystallized with more than one unique protein with intensive reports of their multitargeting activities. The present study provides more insight of drug multitargeting and is particularly useful for polypharmacology modeling. This specialized curation has been made publically available at http:/imdlab.org/polypharma/",2014-08-29 +26895509,Texture Descriptors Ensembles Enable Image-Based Classification of Maturation of Human Stem Cell-Derived Retinal Pigmented Epithelium.,"

Aims

A fast, non-invasive and observer-independent method to analyze the homogeneity and maturity of human pluripotent stem cell (hPSC) derived retinal pigment epithelial (RPE) cells is warranted to assess the suitability of hPSC-RPE cells for implantation or in vitro use. The aim of this work was to develop and validate methods to create ensembles of state-of-the-art texture descriptors and to provide a robust classification tool to separate three different maturation stages of RPE cells by using phase contrast microscopy images. The same methods were also validated on a wide variety of biological image classification problems, such as histological or virus image classification.

Methods

For image classification we used different texture descriptors, descriptor ensembles and preprocessing techniques. Also, three new methods were tested. The first approach was an ensemble of preprocessing methods, to create an additional set of images. The second was the region-based approach, where saliency detection and wavelet decomposition divide each image in two different regions, from which features were extracted through different descriptors. The third method was an ensemble of Binarized Statistical Image Features, based on different sizes and thresholds. A Support Vector Machine (SVM) was trained for each descriptor histogram and the set of SVMs combined by sum rule. The accuracy of the computer vision tool was verified in classifying the hPSC-RPE cell maturation level.

Dataset and results

The RPE dataset contains 1862 subwindows from 195 phase contrast images. The final descriptor ensemble outperformed the most recent stand-alone texture descriptors, obtaining, for the RPE dataset, an area under ROC curve (AUC) of 86.49% with the 10-fold cross validation and 91.98% with the leave-one-image-out protocol. The generality of the three proposed approaches was ascertained with 10 more biological image datasets, obtaining an average AUC greater than 97%.

Conclusions

Here we showed that the developed ensembles of texture descriptors are able to classify the RPE cell maturation stage. Moreover, we proved that preprocessing and region-based decomposition improves many descriptors' accuracy in biological dataset classification. Finally, we built the first public dataset of stem cell-derived RPE cells, which is publicly available to the scientific community for classification studies. The proposed tool is available at https://www.dei.unipd.it/node/2357 and the RPE dataset at http://www.biomeditech.fi/data/RPE_dataset/. Both are available at https://figshare.com/s/d6fb591f1beb4f8efa6f.",2016-02-19 +27303704,Infectio: a Generic Framework for Computational Simulation of Virus Transmission between Cells. ,"Viruses spread between cells, tissues, and organisms by cell-free and cell-cell mechanisms, depending on the cell type, the nature of the virus, or the phase of the infection cycle. The mode of viral transmission has a large impact on disease development, the outcome of antiviral therapies or the efficacy of gene therapy protocols. The transmission mode of viruses can be addressed in tissue culture systems using live-cell imaging. Yet even in relatively simple cell cultures, the mechanisms of viral transmission are difficult to distinguish. Here we present a cross-platform software framework called ""Infectio,"" which is capable of simulating transmission phenotypes in tissue culture of virtually any virus. Infectio can estimate interdependent biological parameters, for example for vaccinia virus infection, and differentiate between cell-cell and cell-free virus spreading. Infectio assists in elucidating virus transmission mechanisms, a feature useful for designing strategies of perturbing or enhancing viral transmission. The complexity of the Infectio software is low compared to that of other software commonly used to quantitate features of cell biological images, which yields stable and relatively error-free output from Infectio. The software is open source (GPLv3 license), and operates on the major platforms (Windows, Mac, and Linux). The complete source code can be downloaded from http://infectio.github.io/index.html. IMPORTANCE Infectio presents a generalized platform to analyze virus infection spread between cells. It allows the simulation of plaque phenotypes from image-based assays. Viral plaques are the result of virus spreading from primary infected cells to neighboring cells. This is a complex process and involves neighborhood effects at cell-cell contact sites or fluid dynamics in the extracellular medium. Infectio differentiates between two major modes of virus transmission between cells, allowing in silico testing of hypotheses about spreading mechanisms of any virus which can be grown in cell cultures, based on experimentally measured parameters, such as infection intensity or cell killing. The results of these tests can be compared with experimental data and allow interpretations with regard to biophysical mechanisms. Infectio also facilitates characterizations of the mode of action of therapeutic agents, such as oncolytic viruses or other infectious or cytotoxic agents.",2016-01-01 +26167452,Annotation of suprachromosomal families reveals uncommon types of alpha satellite organization in pericentromeric regions of hg38 human genome assembly.,"Centromeric alpha satellite (AS) is composed of highly identical higher-order DNA repetitive sequences, which make the standard assembly process impossible. Because of this the AS repeats were severely underrepresented in previous versions of the human genome assembly showing large centromeric gaps. The latest hg38 assembly (GCA_000001405.15) employed a novel method of approximate representation of these sequences using AS reference models to fill the gaps. Therefore, a lot more of assembled AS became available for genomic analysis. We used the PERCON program previously described by us to annotate various suprachromosomal families (SFs) of AS in the hg38 assembly and presented the results of our primary analysis as an easy-to-read track for the UCSC Genome Browser. The monomeric classes, characteristic of the five known SFs, were color-coded, which allowed quick visual assessment of AS composition in whole multi-megabase centromeres down to each individual AS monomer. Such comprehensive annotation of AS in the human genome assembly was performed for the first time. It showed the expected prevalence of the known major types of AS organization characteristic of the five established SFs. Also, some less common types of AS arrays were identified, such as pure R2 domains in SF5, apparent J/R and D/R mixes in SF1 and SF2, and several different SF4 higher-order repeats among reference models and in regular contigs. No new SFs or large unclassed AS domains were discovered. The dataset reveals the architecture of human centromeres and allows classification of AS sequence reads by alignment to the annotated hg38 assembly. The data were deposited here: http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg38&hgt.customText=https://dl.dropboxusercontent.com/u/22994534/AS-tracks/human-GRC-hg38-M1SFs.bed.bz2.",2015-09-01 +32647834,Accelerated Molecular Mechanical and Solvation Energetics on Multicore CPUs and Manycore GPUs.,"

Motivation

Despite several reported acceleration successes of programmable GPUs (Graphics Processing Units) for molecular modeling and simulation tools, the general focus has been on fast computation with small molecules. This was primarily due to the limited memory size on the GPU. Moreover simultaneous use of CPU and GPU cores for a single kernel execution - a necessity for achieving high parallelism - has also not been fully considered.

Results

We present fast computation methods for molecular mechanical (Lennard-Jones and Coulombic) and generalized Born solvation energetics which run on commodity multicore CPUs and manycore GPUs. The key idea is to trade off accuracy of pairwise, long-range atomistic energetics for higher speed of execution. A simple yet efficient CUDA kernel for GPU acceleration is presented which ensures high arithmetic intensity and memory efficiency. Our CUDA kernel uses a cache-friendly, recursive and linear-space octree data structure to handle very large molecular structures with up to several million atoms. Based on this CUDA kernel, we present a hybrid method which simultaneously exploits both CPU and GPU cores to provide the best performance based on selected parameters of the approximation scheme. Our CUDA kernels achieve more than two orders of magnitude speedup over serial computation for many of the molecular energetics terms. The hybrid method is shown to be able to achieve the best performance for all values of the approximation parameter.

Availability

The source code and binaries are freely available as PMEOPA (Parallel Molecular Energetic using Octree Pairwise Approximation) and downloadable from http://cvcweb.ices.utexas.edu/software.",2015-09-01 +25163418,Investigating the host specificity of Campylobacter jejuni and Campylobacter coli by sequencing gyrase subunit A.,"

Background

Surveillance and field investigations of Campylobacter infections require molecular tools with genetic markers appropriate for tracing purposes, i.e. based on the principle that some Campylobacter lineages acquire a host signature under adaptive selection pressure. We developed a sequence-based method targeting the quinolone resistance determining region within the subunit A of DNA gyrase (gyrA). Host specificity was evaluated by characterizing two collections of Campylobacter jejuni (N = 430) and Campylobacter coli (N = 302) originating from surface waters, domestic mammals and poultry.

Results

Based on nucleotide identity, a total of 80 gyrA alleles were observed. Thirty nine alleles assigned to C. coli encoding two peptides fell into three clades: two associated with surface waters and one associated with domestic mammals and poultry. The variability in GC content generated by synonymous mutations suggested that surface waters isolates originated from two distinct ecological niches. A total of 42 alleles were recorded from C. jejuni strains and encoded 8 peptides including one lying in a distinct lineage associated with wildlife. Seven of the 23 alleles encoding peptide #1 displayed the synonymous mutation G408A not identified in poultry isolates. By contrast, the substitution Ser22Gly observed in 4 different peptide groups was significantly associated with domestic birds (P = 0.001). The change in amino acid sequences Thr86Ile conferring resistance to quinolones was significantly associated with poultry (P < 0.001) in both C. jejuni and C. coli with 38.7% and 67.9% of quinolone-resistant strains, respectively.

Conclusions

The gyrA typing method presented here is an informative tool as sequences appear to be predictive of particular ecological niches. Combined with multi-locus sequence typing, it could increase the resolution of source attribution, and combined with porA/flaA typing it could be suitable for detecting temporal clusters of human cases. All gyrA alleles identified were deposited in the freely accessible online database http://pubmlst.org/campylobacter.",2014-08-28 +24678044,Shedding light on black boxes in protein identification.,"Performing a well thought-out proteomics data analysis can be a daunting task, especially for newcomers to the field. Even researchers experienced in the proteomics field can find it challenging to follow existing publication guidelines for MS-based protein identification and characterization in detail. One of the primary goals of bioinformatics is to enable any researcher to interpret the vast amounts of data generated in modern biology, by providing user-friendly and robust end-user applications, clear documentation, and corresponding teaching materials. In that spirit, we here present an extensive tutorial for peptide and protein identification, available at http://compomics.com/bioinformatics-for-proteomics. The material is completely based on freely available and open-source tools, and has already been used and refined at numerous international courses over the past 3 years. During this time, it has demonstrated its ability to allow even complete beginners to intuitively conduct advanced bioinformatics workflows, interpret the results, and understand their context. This tutorial is thus aimed at fully empowering users, by removing black boxes in the proteomics informatics pipeline.",2014-03-20 +26535109,Detecting miRNA Mentions and Relations in Biomedical Literature.,"

Introduction

MicroRNAs (miRNAs) have demonstrated their potential as post-transcriptional gene expression regulators, participating in a wide spectrum of regulatory events such as apoptosis, differentiation, and stress response. Apart from the role of miRNAs in normal physiology, their dysregulation is implicated in a vast array of diseases. Dissection of miRNA-related associations are valuable for contemplating their mechanism in diseases, leading to the discovery of novel miRNAs for disease prognosis, diagnosis, and therapy.

Motivation

Apart from databases and prediction tools, miRNA-related information is largely available as unstructured text. Manual retrieval of these associations can be labor-intensive due to steadily growing number of publications. Additionally, most of the published miRNA entity recognition methods are keyword based, further subjected to manual inspection for retrieval of relations. Despite the fact that several databases host miRNA-associations derived from text, lower sensitivity and lack of published details for miRNA entity recognition and associated relations identification has motivated the need for developing comprehensive methods that are freely available for the scientific community. Additionally, the lack of a standard corpus for miRNA-relations has caused difficulty in evaluating the available systems. We propose methods to automatically extract mentions of miRNAs, species, genes/proteins, disease, and relations from scientific literature. Our generated corpora, along with dictionaries, and miRNA regular expression are freely available for academic purposes. To our knowledge, these resources are the most comprehensive developed so far.

Results

The identification of specific miRNA mentions reaches a recall of 0.94 and precision of 0.93.  Extraction of miRNA-disease and miRNA-gene relations lead to an F 1 score of up to 0.76. A comparison of the information extracted by our approach to the databases miR2Disease and miRSel for the extraction of Alzheimer's disease related relations shows the capability of our proposed methods in identifying correct relations with improved sensitivity. The published resources and described methods can help the researchers for maximal retrieval of miRNA-relations and generation of miRNA-regulatory networks.

Availability

The training and test corpora, annotation guidelines, developed dictionaries, and supplementary files are available at http://www.scai.fraunhofer.de/mirna-corpora.html.",2014-08-28 +26718420,Eating difficulties in children born late and moderately preterm at 2 y of age: a prospective population-based cohort study.,"

Background

Very preterm (<32 wk of gestation) infants are at increased risk of eating difficulties compared with their term-born peers. Little is known about the impact of late and moderately preterm (LMPT; 32-36 wk of gestation) birth on eating difficulties in early childhood.

Objectives

The aims were to assess the prevalence of eating difficulties in infants born LMPT at 2 y corrected age and to explore the impact of neonatal and neurodevelopmental factors.

Design

A geographic population-based cohort of 1130 LMPT and 1255 term-born controls was recruited at birth. The parents of 651 (59%) LMPT and 771 (62%) term-born infants completed questionnaires at 2 y corrected age to assess neurodevelopmental outcomes. Parents also completed a validated questionnaire to assess eating behaviors in 4 domains: refusal/picky eating, oral motor problems, oral hypersensitivity, and eating behavior problems. Infants with scores >90th percentile were classified with eating difficulties in each domain. Neonatal data were collected at discharge, and sociodemographic information was collected via maternal interview. Poisson regression was used to assess between-group differences in eating difficulties and to explore associations with neonatal factors and neurodevelopmental outcomes at 2 y of age.

Results

In unadjusted analyses, LMPT infants were at increased risk of refusal/picky eating (RR: 1.53; 95% CI: 1.03, 2.25) and oral motor problems (RR: 1.62; 95% CI: 1.06, 2.47). Prolonged nasogastric feeding >2 wk (RR: 1.87; 95% CI: 1.07, 3.25), behavior problems (RR: 2.95; 95% CI: 1.93, 4.52), and delayed social competence (RR: 2.28; 95% CI: 1.49, 3.48) were independently associated with eating difficulties in multivariable analyses. After adjustment for these factors, there was no excess of eating difficulties in LMPT infants.

Conclusions

Infants born LMPT are at increased risk of oral motor and picky eating problems at 2 y corrected age. However, these are mediated by other neurobehavioral sequelae in this population. This trial was registered on the UK Clinical Research Network Portfolio at http://public.ukcrn.org.uk/search/ as UKCRN Study ID 7441.",2015-12-30 +25161255,Fast randomization of large genomic datasets while preserving alteration counts.,"

Motivation

Studying combinatorial patterns in cancer genomic datasets has recently emerged as a tool for identifying novel cancer driver networks. Approaches have been devised to quantify, for example, the tendency of a set of genes to be mutated in a 'mutually exclusive' manner. The significance of the proposed metrics is usually evaluated by computing P-values under appropriate null models. To this end, a Monte Carlo method (the switching-algorithm) is used to sample simulated datasets under a null model that preserves patient- and gene-wise mutation rates. In this method, a genomic dataset is represented as a bipartite network, to which Markov chain updates (switching-steps) are applied. These steps modify the network topology, and a minimal number of them must be executed to draw simulated datasets independently under the null model. This number has previously been deducted empirically to be a linear function of the total number of variants, making this process computationally expensive.

Results

We present a novel approximate lower bound for the number of switching-steps, derived analytically. Additionally, we have developed the R package BiRewire, including new efficient implementations of the switching-algorithm. We illustrate the performances of BiRewire by applying it to large real cancer genomics datasets. We report vast reductions in time requirement, with respect to existing implementations/bounds and equivalent P-value computations. Thus, we propose BiRewire to study statistical properties in genomic datasets, and other data that can be modeled as bipartite networks.

Availability and implementation

BiRewire is available on BioConductor at http://www.bioconductor.org/packages/2.13/bioc/html/BiRewire.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +25168057,LiverCancerMarkerRIF: a liver cancer biomarker interactive curation system combining text mining and expert annotations. ,"Biomarkers are biomolecules in the human body that can indicate disease states and abnormal biological processes. Biomarkers are often used during clinical trials to identify patients with cancers. Although biomedical research related to biomarkers has increased over the years and substantial effort has been expended to obtain results in these studies, the specific results obtained often contain ambiguities, and the results might contradict each other. Therefore, the information gathered from these studies must be appropriately integrated and organized to facilitate experimentation on biomarkers. In this study, we used liver cancer as the target and developed a text-mining-based curation system named LiverCancerMarkerRIF, which allows users to retrieve biomarker-related narrations and curators to curate supporting evidence on liver cancer biomarkers directly while browsing PubMed. In contrast to most of the other curation tools that require curators to navigate away from PubMed and accommodate distinct user interfaces or Web sites to complete the curation process, our system provides a user-friendly method for accessing text-mining-aided information and a concise interface to assist curators while they remain at the PubMed Web site. Biomedical text-mining techniques are applied to automatically recognize biomedical concepts such as genes, microRNA, diseases and investigative technologies, which can be used to evaluate the potential of a certain gene as a biomarker. Through the participation in the BioCreative IV user-interactive task, we examined the feasibility of using this novel type of augmented browsing-based curation method, and collaborated with curators to curate biomarker evidential sentences related to liver cancer. The positive feedback received from curators indicates that the proposed method can be effectively used for curation. A publicly available online database containing all the aforementioned information has been constructed at http://btm.tmu.edu.tw/livercancermarkerrif in an attempt to facilitate biomarker-related studies. http://btm.tmu.edu.tw/LiverCancerMarkerRIF/",2014-08-27 +26531126,Information maximizing component analysis of left ventricular remodeling due to myocardial infarction.,"

Background

Although adverse left ventricular shape changes (remodeling) after myocardial infarction (MI) are predictive of morbidity and mortality, current clinical assessment is limited to simple mass and volume measures, or dimension ratios such as length to width ratio. We hypothesized that information maximizing component analysis (IMCA), a supervised feature extraction method, can provide more efficient and sensitive indices of overall remodeling.

Methods

IMCA was compared to linear discriminant analysis (LDA), both supervised methods, to extract the most discriminatory global shape changes associated with remodeling after MI. Finite element shape models from 300 patients with myocardial infarction from the DETERMINE study (age 31-86, mean age 63, 20 % women) were compared with 1991 asymptomatic cases from the MESA study (age 44-84, mean age 62, 52 % women) available from the Cardiac Atlas Project. IMCA and LDA were each used to identify a single mode of global remodeling best discriminating the two groups. Logistic regression was employed to determine the association between the remodeling index and MI. Goodness-of-fit results were compared against a baseline logistic model comprising standard clinical indices.

Results

A single IMCA mode simultaneously describing end-diastolic and end-systolic shapes achieved best results (lowest Deviance, Akaike information criterion and Bayesian information criterion, and the largest area under the receiver-operating-characteristic curve). This mode provided a continuous scale where remodeling can be quantified and visualized, showing that MI patients tend to present larger size and more spherical shape, more bulging of the apex, and thinner wall thickness.

Conclusions

IMCA enables better characterization of global remodeling than LDA, and can be used to quantify progression of disease and the effect of treatment. These data and results are available from the Cardiac Atlas Project ( http://www.cardiacatlas.org ).",2015-11-03 +26528621,Modeling and Prediction of Oyster Norovirus Outbreaks along Gulf of Mexico Coast.,"

Background

Oyster norovirus outbreaks often pose high risks to human health. However, little is known about environmental factors controlling the outbreaks, and little can be done to prevent the outbreaks because they are generally considered to be unpredictable.

Objective

We sought to develop a mathematical model for predicting risks of oyster norovirus outbreaks using environmental predictors.

Methods

We developed a novel probability-based Artificial Neural Network model, called NORF model, using 21 years of environmental and norovirus outbreak data collected from Louisiana oyster harvesting areas along the Gulf of Mexico coast, USA. The NORF model involves six input variables that were selected through stepwise regression analysis and sensitivity analysis.

Results

We found that the model-based probability of norovirus outbreaks was most sensitive to gage height (the depth of water in an oyster bed) and water temperature, followed by wind, rainfall, and salinity, respectively. The NORF model predicted all historical oyster norovirus outbreaks from 1994 through 2014. Specifically, norovirus outbreaks occurred when the NORF model probability estimate was > 0.6, whereas no outbreaks occurred when the estimated probability was < 0.5. Outbreaks may also occur when the estimated probability is 0.5-0.6.

Conclusions

Our findings require further confirmation, but they suggest that oyster norovirus outbreaks may be predictable using the NORF model. The ability to predict oyster norovirus outbreaks at their onset may make it possible to prevent or at least reduce the risk of norovirus outbreaks by closing potentially affected oyster beds.

Citation

Wang J, Deng Z. 2016. Modeling and prediction of oyster norovirus outbreaks along Gulf of Mexico coast. Environ Health Perspect 124:627-633; http://dx.doi.org/10.1289/ehp.1509764.",2015-11-03 +22305189,PhosphoRice: a meta-predictor of rice-specific phosphorylation sites.,"

Background

As a result of the growing body of protein phosphorylation sites data, the number of phosphoprotein databases is constantly increasing, and dozens of tools are available for predicting protein phosphorylation sites to achieve fast automatic results. However, none of the existing tools has been developed to predict protein phosphorylation sites in rice.

Results

In this paper, the phosphorylation site predictors, NetPhos 2.0, NetPhosK, Kinasephos, Scansite, Disphos and Predphosphos, were integrated to construct meta-predictors of rice-specific phosphorylation sites using several methods, including unweighted voting, unreduced weighted voting, reduced unweighted voting and weighted voting strategies. PhosphoRice, the meta-predictor produced by using weighted voting strategy with parameters selected by restricted grid search and conditional random search, performed the best at predicting phosphorylation sites in rice. Its Matthew's Correlation Coefficient (MCC) and Accuracy (ACC) reached to 0.474 and 73.8%, respectively. Compared to the best individual element predictor (Disphos_default), PhosphoRice archieved a significant increase in MCC of 0.071 (P < 0.01), and an increase in ACC of 4.6%.

Conclusions

PhosphoRice is a powerful tool for predicting unidentified phosphorylation sites in rice. Compared to the existing methods, we found that our tool showed greater robustness in ACC and MCC. PhosphoRice is available to the public at http://bioinformatics.fafu.edu.cn/PhosphoRice.",2012-02-03 +23930810,NGSPE: A pipeline for end-to-end analysis of DNA sequencing data and comparison between different platforms.,"We present NGSPE, a pipeline for variation discovery and genotyping of pair-ended Illumina next generation sequencing (NGS) data (http://ngspeanalysis.sourceforge.net/). This pipeline not only describes a set of sequential analytical steps, such as short reads alignment, genotype calling and functional variation annotation that can be conducted using open-source software tools, but also provides users a set of scripts to install the dependent software and resources and implement the pipeline on their data. A sample summary report including the concordance rate between data generated by this pipeline and different resources as well as the comparison between replication samples of two commercial platforms from Illumina and Complete Genomics is also provided. Furthermore, some of the mutations identified by the pipeline were verified using Sanger sequencing.",2013-06-13 +25169579,A web-based tool for rational screening of mutants libraries using ProSAR.,"In directed evolution experiments, it is at stake to have methods to screen efficiently the mutant libraries. We propose a web-based tool that implements an established in silico method for the rational screening of mutant libraries. The method, known as ProSAR, attempts to link sequence data to activity. The method uses statistical models trained on small experimental datasets provided by the user. These can integrate potential epistatic interactions between mutations and be used in many diverse biological contexts. It drastically improves the search for leading mutants. The tool is freely available to non-commercial users at http://bo-protscience.fr/prosar/.",2014-08-28 +23239403,Common data elements for clinical research in Friedreich's ataxia.,"To reduce study start-up time, increase data sharing, and assist investigators conducting clinical studies, the National Institute of Neurological Disorders and Stroke embarked on an initiative to create common data elements for neuroscience clinical research. The Common Data Element Team developed general common data elements, which are commonly collected in clinical studies regardless of therapeutic area, such as demographics. In the present project, we applied such approaches to data collection in Friedreich's ataxia (FRDA), a neurological disorder that involves multiple organ systems. To develop FRDA common data elements, FRDA experts formed a working group and subgroups to define elements in the following: ataxia and performance measures; biomarkers; cardiac and other clinical outcomes; and demographics, laboratory tests, and medical history. The basic development process included identification of international experts in FRDA clinical research, meeting by teleconference to develop a draft of standardized common data elements recommendations, vetting of recommendations across the subgroups, and dissemination of recommendations to the research community for public comment. The full recommendations were published online in September 2011 at http://www.commondataelements.ninds.nih.gov/FA.aspx. The subgroups' recommendations are classified as core, supplemental, or exploratory. Template case report forms were created for many of the core tests. The present set of data elements should ideally lead to decreased initiation time for clinical research studies and greater ability to compare and analyze data across studies. Their incorporation into new, ongoing studies will be assessed in an ongoing fashion to define their utility in FRDA.",2012-12-12 +26099425,Comprehensive genome-wide transcription factor analysis reveals that a combination of high affinity and low affinity DNA binding is needed for human gene regulation.,"

Background

High-throughput in vivo protein-DNA interaction experiments are currently widely used in gene regulation studies. Hitherto, comprehensive data analysis remains a challenge and for that reason most computational methods only consider the top few hundred or thousand strongest protein binding sites whereas weak protein binding sites are completely ignored.

Results

A new biophysical model of protein-DNA interactions, BayesPI2+, was developed to address the above-mentioned challenges. BayesPI2+ can be run in either a serial computation model or a parallel ensemble learning framework. BayesPI2+ allowed us to analyze all binding sites of the transcription factors, including weak binding that cannot be analyzed by other models. It is evaluated in both synthetic and real in vivo protein-DNA binding experiments. Analysing ESR1 and SPIB in breast carcinoma and activated B cell-like diffuse large B-cell lymphoma cell lines, respectively, revealed that the concerted binding to high and low affinity sites correlates best with gene expression.

Conclusions

BayesPI2+ allows us to analyze transcription factor binding on a larger scale than hitherto achieved. By this analysis, we were able to demonstrate that genes are regulated by concerted binding to high and low affinity binding sites. The program and output results are publicly available at: http://folk.uio.no/junbaiw/BayesPI2Plus.",2015-06-11 +23930024,PROcEED: Probabilistic reverse dosimetry approaches for estimating exposure distributions.,"

Unlabelled

As increasing amounts of biomonitoring survey data become available, a new discipline focused on converting such data into estimates of chemical exposures has developed. Reverse dosimetry uses a pharmacokinetic model along with measured biomarker concentrations to determine the plausible exposure concentrations-- a critical step to incorporate ground-truthing experimental data into a distribution of probable exposures that reduces model uncertainty and variability. At the population level, probabilistic reverse dosimetry can utilize a distribution of measured biomarker concentrations to identify the most likely exposure concentrations (or intake doses) experienced by the study participants. PROcEED is software that provides access to probabilistic reverse dosimetry approaches for estimating exposure distributions via a simple user interface.

Availability

PROcEED along with installation instructions is freely available for download from http://www.epa.gov/heasd/products/proceed/proceed.html.",2013-07-17 +23514123,Combining heterogeneous data sources for accurate functional annotation of proteins.,"Combining heterogeneous sources of data is essential for accurate prediction of protein function. The task is complicated by the fact that while sequence-based features can be readily compared across species, most other data are species-specific. In this paper, we present a multi-view extension to GOstruct, a structured-output framework for function annotation of proteins. The extended framework can learn from disparate data sources, with each data source provided to the framework in the form of a kernel. Our empirical results demonstrate that the multi-view framework is able to utilize all available information, yielding better performance than sequence-based models trained across species and models trained from collections of data within a given species. This version of GOstruct participated in the recent Critical Assessment of Functional Annotations (CAFA) challenge; since then we have significantly improved the natural language processing component of the method, which now provides performance that is on par with that provided by sequence information. The GOstruct framework is available for download at http://strut.sourceforge.net.",2013-02-28 +23815181,NGS-Trex: Next Generation Sequencing Transcriptome profile explorer.,"

Background

Next-Generation Sequencing (NGS) technology has exceptionally increased the ability to sequence DNA in a massively parallel and cost-effective manner. Nevertheless, NGS data analysis requires bioinformatics skills and computational resources well beyond the possibilities of many ""wet biology"" laboratories. Moreover, most of projects only require few sequencing cycles and standard tools or workflows to carry out suitable analyses for the identification and annotation of genes, transcripts and splice variants found in the biological samples under investigation. These projects can take benefits from the availability of easy to use systems to automatically analyse sequences and to mine data without the preventive need of strong bioinformatics background and hardware infrastructure.

Results

To address this issue we developed an automatic system targeted to the analysis of NGS data obtained from large-scale transcriptome studies. This system, we named NGS-Trex (NGS Transcriptome profile explorer) is available through a simple web interface http://www.ngs-trex.org and allows the user to upload raw sequences and easily obtain an accurate characterization of the transcriptome profile after the setting of few parameters required to tune the analysis procedure. The system is also able to assess differential expression at both gene and transcript level (i.e. splicing isoforms) by comparing the expression profile of different samples.By using simple query forms the user can obtain list of genes, transcripts, splice sites ranked and filtered according to several criteria. Data can be viewed as tables, text files or through a simple genome browser which helps the visual inspection of the data.

Conclusions

NGS-Trex is a simple tool for RNA-Seq data analysis mainly targeted to ""wet biology"" researchers with limited bioinformatics skills. It offers simple data mining tools to explore transcriptome profiles of samples investigated taking advantage of NGS technologies.",2013-04-22 +25322794,A novel algorithm for the precise calculation of the maximal information coefficient.,"Measuring associations is an important scientific task. A novel measurement method maximal information coefficient (MIC) was proposed to identify a broad class of associations. As foreseen by its authors, MIC implementation algorithm ApproxMaxMI is not always convergent to real MIC values. An algorithm called SG (Simulated annealing and Genetic) was developed to facilitate the optimal calculation of MIC, and the convergence of SG was proved based on Markov theory. When run on fruit fly data set including 1,000,000 pairs of gene expression profiles, the mean squared difference between SG and the exhaustive algorithm is 0.00075499, compared with 0.1834 in the case of ApproxMaxMI. The software SGMIC and its manual are freely available at http://lxy.depart.hebust.edu.cn/SGMIC/SGMIC.htm.",2014-10-17 +26095248,"Advocating for Deployed Women Veterans' Health Differences, Difficulties, and Disparities.","

Problem

The preceding article presented a glimpse of deployed women veterans, their military culture, and their experiences in the Global War on Terror (Iraq and Afghanistan) to assist civilian nurses to gain significant rapport and provide important culturally sensitive care.

Methods

Pertinent literary sources were reviewed to gather applicable data about the problem.

Findings

A confirmatory answer from the assessment question of ""Have you served in the military?"" and the use of the Military Health History Pocket Card for Clinicians (available at http://www.va.gov.oaa/pocketcard) will assist with revealing possible health risks from the increased amounts of military men and women veterans seeking (and/or returning to) a variety of community-based health services. This article about deployed women veterans examines their specific health differences (e.g., research literature, post-traumatic stress disorder, and military sexual trauma), difficulties (e.g., reproductive, gynecologic, urinary, suicide), and gender disparities (varied treatment patterns).

Conclusion

Understanding these gender situations, civilian nurses can better advocate with increasing evidence-based decisions that their physical and behavioral responses were different from their male counterparts. Continual assessment, knowledgeable care, ongoing literature review, interdisciplinary health team development, and the presence of resourceful community agencies should be a significant part of their holistic care. Conard Armstrong.",2015-06-11 +26428288,Pseudoknots in RNA folding landscapes.,"

Motivation

The function of an RNA molecule is not only linked to its native structure, which is usually taken to be the ground state of its folding landscape, but also in many cases crucially depends on the details of the folding pathways such as stable folding intermediates or the timing of the folding process itself. To model and understand these processes, it is necessary to go beyond ground state structures. The study of rugged RNA folding landscapes holds the key to answer these questions. Efficient coarse-graining methods are required to reduce the intractably vast energy landscapes into condensed representations such as barrier trees or basin hopping graphs : BHG) that convey an approximate but comprehensive picture of the folding kinetics. So far, exact and heuristic coarse-graining methods have been mostly restricted to the pseudoknot-free secondary structures. Pseudoknots, which are common motifs and have been repeatedly hypothesized to play an important role in guiding folding trajectories, were usually excluded.

Results

We generalize the BHG framework to include pseudoknotted RNA structures and systematically study the differences in predicted folding behavior depending on whether pseudoknotted structures are allowed to occur as folding intermediates or not. We observe that RNAs with pseudoknotted ground state structures tend to have more pseudoknotted folding intermediates than RNAs with pseudoknot-free ground state structures. The occurrence and influence of pseudoknotted intermediates on the folding pathway, however, appear to depend very strongly on the individual RNAs so that no general rule can be inferred.

Availability and implementation

The algorithms described here are implemented in C++ as standalone programs. Its source code and Supplemental material can be freely downloaded from http://www.tbi.univie.ac.at/bhg.html.

Contact

qin@bioinf.uni-leipzig.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-10-01 +26819872,Peptide Peak Detection for Low Resolution MALDI-TOF Mass Spectrometry.,"A new peak detection method has been developed for rapid selection of peptide and its fragment ion peaks for protein identification using tandem mass spectrometry. The algorithm applies classification of peak intensities present in the defined mass range to determine the noise level. A threshold is then given to select ion peaks according to the determined noise level in each mass range. This algorithm was initially designed for the peak detection of low resolution peptide mass spectra, such as matrix-assisted laser desorption/ionization Time-of-Flight (MALDI-TOF) mass spectra. But it can also be applied to other type of mass spectra. This method has demonstrated obtaining a good rate of number of real ions to noises for even poorly fragmented peptide spectra. The effect of using peak lists generated from this method produces improved protein scores in database search results. The reliability of the protein identifications is increased by finding more peptide identifications. This software tool is freely available at the Mass++ home page (http://www.first-ms3d.jp/english/achievement/software/).",2014-08-23 +25149164,Risk factors for venous thromboembolism in patients undergoing craniotomy for neoplastic disease.,"Patients undergoing neurosurgical procedures for neoplasia have historically been considered at higher risk for developing venous thromboembolism (VTE). We sought to identify risk factors associated with VTE in patients undergoing craniotomy for tumor resection. We reviewed a national surgical quality database (American College of Surgeons National Surgical Quality Improvement Project, ACS-NSQIP, http://site.acsnsqip.org/ ). Patients undergoing non-emergent craniotomy for neoplastic indications were identified based on current procedural terminology codes. Clinical factors were identified that were associated with VTE events. 3,098 patients who underwent non-emergent craniotomy were identified. 1,741 patients underwent procedures for neoplastic disease (56.2 %). The rate of DVT in these patients was 3.2 % compared to 1.4 % in other neurosurgical patients (OR 2.30, CI 2.29-2.30). The rate of pulmonary embolism was 1.8 % compared to 0.5 % (OR 3.61, CI 3.60-3.62). Univariate analysis identified several factors correlated with VTE. Pre-operative characteristics associated with VTE were the presence of impaired sensorium, dependent functional status, and age > 60 years. Total operative time > 4 h was associated with VTE. Post-operative events associated with VTE included pneumonia, unplanned intubation, fail to wean from ventilator, UTI, stroke, sepsis and septic shock. Age > 60, OR time > 4 h, UTI, and septic shock were significantly associated with VTE in multivariate analysis. Patients undergoing craniotomy for neoplasm are at increased risk of VTE. This risk appears to be modified by pre-operative medical comorbidities, longer operative time, and post-operative complications.",2014-08-23 +22359433,BiodEnz:A database of biodegrading enzymes.,"

Unlabelled

Azo dyes, which re characterized by azo bonds, are a predominant class of colorants used in tattooing, cosmetics, foods, textile and consumer products. Laccases (EC 1.10.3.2), lignin peroxidases (EC 1.11.1.14) , Azo reductases (EC 1.7.1.6) of different micro organisms are mainly useful for the development of biodegradation systems as they catalyse reductive cleavage of azo groups (-N=N-) . Laccases have very broad substrate specificity with respect to the electron donor and is capable of oxidizing phenols and aromatic amines. Azoreductase belongs to the family of oxidoreductases, acting on other nitrogenous compounds as donors with NAD+ or NADP+ as acceptor. Lignin peroxidase enzymes are highly non-specific and are well reported to decolourize various dyes We have developed BiodEnz database by collecting information like strains that produce particular enzymes, azo dyes that are degraded , substrate specificity, molecular weight, the optimum temperature and pH, sequence data of the above enzymes ,as the most effective inoculants used for bioremediation are able to degrade dyes over a broad concentration range, tolerate a range of environmental conditions of temperature, pH, and activity of the enzymes. The database can be searched by using a user friendly web interface.

Availability

The database is available for free at http://www.biodenzdatabase.in.",2012-01-06 +25260792,Dynamic evolution of clonal epialleles revealed by methclone.,"We describe methclone, a novel method to identify epigenetic loci that harbor large changes in the clonality of their epialleles (epigenetic alleles). Methclone efficiently analyzes genome-wide DNA methylation sequencing data. We quantify the changes using a composition entropy difference calculation and also introduce a new measure of global clonality shift, loci with epiallele shift per million loci covered, which enables comparisons between different samples to gauge overall epiallelic dynamics. Finally, we demonstrate the utility of methclone in capturing functional epiallele shifts in leukemia patients from diagnosis to relapse. Methclone is open-source and freely available at https://code.google.com/p/methclone.",2014-09-27 +24389660,Estimation of fluorescence-tagged RNA numbers from spot intensities.,"

Motivation

Present research on gene expression using live cell imaging and fluorescent proteins or tagged RNA requires accurate automated methods of quantification of these molecules from the images. Here, we propose a novel automated method for classifying pixel intensities of fluorescent spots to RNA numbers.

Results

The method relies on a new model of intensity distributions of tagged RNAs, for which we estimated parameter values in maximum likelihood sense from measurement data, and constructed a maximum a posteriori classifier to estimate RNA numbers in fluorescent RNA spots. We applied the method to estimate the number of tagged RNAs in individual live Escherichia coli cells containing a gene coding for an RNA with MS2-GFP binding sites. We tested the method using two constructs, coding for either 96 or 48 binding sites, and obtained similar distributions of RNA numbers, showing that the method is adaptive. We further show that the results agree with a method that uses time series data and with quantitative polymerase chain reaction measurements. Lastly, using simulated data, we show that the method is accurate in realistic parameter ranges. This method should, in general, be applicable to live single-cell measurements of low-copy number fluorescence-tagged molecules.

Availability and implementation

MATLAB extensions written in C for parameter estimation and finding decision boundaries are available under Mozilla public license at http://www.cs.tut.fi/%7ehakkin22/estrna/ CONTACT: andre.ribeiro@tut.fi.",2014-01-02 +25149689,FusoBase: an online Fusobacterium comparative genomic analysis platform. ,"Fusobacterium are anaerobic gram-negative bacteria that have been associated with a wide spectrum of human infections and diseases. As the biology of Fusobacterium is still not well understood, comparative genomic analysis on members of this species will provide further insights on their taxonomy, phylogeny, pathogenicity and other information that may contribute to better management of infections and diseases. To facilitate the ongoing genomic research on Fusobacterium, a specialized database with easy-to-use analysis tools is necessary. Here we present FusoBase, an online database providing access to genome-wide annotated sequences of Fusobacterium strains as well as bioinformatics tools, to support the expanding scientific community. Using our custom-developed Pairwise Genome Comparison tool, we demonstrate how differences between two user-defined genomes and how insertion of putative prophages can be identified. In addition, Pathogenomics Profiling Tool is capable of clustering predicted genes across Fusobacterium strains and visualizing the results in the form of a heat map with dendrogram. http://fusobacterium.um.edu.my.",2014-08-22 +24766403,"Mudi, a web tool for identifying mutations by bioinformatics analysis of whole-genome sequence.","In forward genetics, identification of mutations is a time-consuming and laborious process. Modern whole-genome sequencing, coupled with bioinformatics analysis, has enabled fast and cost-effective mutation identification. However, for many experimental researchers, bioinformatics analysis is still a difficult aspect of whole-genome sequencing. To address this issue, we developed a browser-accessible and easy-to-use bioinformatics tool called Mutation discovery (Mudi; http://naoii.nig.ac.jp/mudi_top.html), which enables 'one-click' identification of causative mutations from whole-genome sequence data. In this study, we optimized Mudi for pooled-linkage analysis aimed at identifying mutants in yeast model systems. After raw sequencing data are uploaded, Mudi performs sequential analysis, including mapping, detection of variant alleles, filtering and removal of background polymorphisms, prioritization, and annotation. In an example study of suppressor mutants of ptr1-1 in the fission yeast Schizosaccharomyces pombe, pooled-linkage analysis with Mudi identified mip1(+) , a component of Target of Rapamycin Complex 1 (TORC1), as a novel component involved in RNA interference (RNAi)-related cell-cycle control. The accessibility of Mudi will accelerate systematic mutation analysis in forward genetics.",2014-04-28 +24167155,Telling metabolic stories to explore metabolomics data: a case study on the yeast response to cadmium exposure.,"

Motivation

The increasing availability of metabolomics data enables to better understand the metabolic processes involved in the immediate response of an organism to environmental changes and stress. The data usually come in the form of a list of metabolites whose concentrations significantly changed under some conditions, and are thus not easy to interpret without being able to precisely visualize how such metabolites are interconnected.

Results

We present a method that enables to organize the data from any metabolomics experiment into metabolic stories. Each story corresponds to a possible scenario explaining the flow of matter between the metabolites of interest. These scenarios may then be ranked in different ways depending on which interpretation one wishes to emphasize for the causal link between two affected metabolites: enzyme activation, enzyme inhibition or domino effect on the concentration changes of substrates and products. Equally probable stories under any selected ranking scheme can be further grouped into a single anthology that summarizes, in a unique subnetwork, all equivalently plausible alternative stories. An anthology is simply a union of such stories. We detail an application of the method to the response of yeast to cadmium exposure. We use this system as a proof of concept for our method, and we show that we are able to find a story that reproduces very well the current knowledge about the yeast response to cadmium. We further show that this response is mostly based on enzyme activation. We also provide a framework for exploring the alternative pathways or side effects this local response is expected to have in the rest of the network. We discuss several interpretations for the changes we see, and we suggest hypotheses that could in principle be experimentally tested. Noticeably, our method requires simple input data and could be used in a wide variety of applications.

Availability and implementation

The code for the method presented in this article is available at http://gobbolino.gforge.inria.fr.",2013-10-27 +23143278,Rapid prediction of multi-dimensional NMR data sets.,"We present a computational environment for Fast Analysis of multidimensional NMR DAta Sets (FANDAS) that allows assembling multidimensional data sets from a variety of input parameters and facilitates comparing and modifying such ""in silico"" data sets during the various stages of the NMR data analysis. The input parameters can vary from (partial) NMR assignments directly obtained from experiments to values retrieved from in silico prediction programs. The resulting predicted data sets enable a rapid evaluation of sample labeling in light of spectral resolution and structural content, using standard NMR software such as Sparky. In addition, direct comparison to experimental data sets can be used to validate NMR assignments, distinguish different molecular components, refine structural models or other parameters derived from NMR data. The method is demonstrated in the context of solid-state NMR data obtained for the cyclic nucleotide binding domain of a bacterial cyclic nucleotide-gated channel and on membrane-embedded sensory rhodopsin II. FANDAS is freely available as web portal under WeNMR ( http://www.wenmr.eu/services/FANDAS ).",2012-11-10 +26315912,Hierarchical boosting: a machine-learning framework to detect and classify hard selective sweeps in human populations.,"

Motivation

Detecting positive selection in genomic regions is a recurrent topic in natural population genetic studies. However, there is little consistency among the regions detected in several genome-wide scans using different tests and/or populations. Furthermore, few methods address the challenge of classifying selective events according to specific features such as age, intensity or state (completeness).

Results

We have developed a machine-learning classification framework that exploits the combined ability of some selection tests to uncover different polymorphism features expected under the hard sweep model, while controlling for population-specific demography. As a result, we achieve high sensitivity toward hard selective sweeps while adding insights about their completeness (whether a selected variant is fixed or not) and age of onset. Our method also determines the relevance of the individual methods implemented so far to detect positive selection under specific selective scenarios. We calibrated and applied the method to three reference human populations from The 1000 Genome Project to generate a genome-wide classification map of hard selective sweeps. This study improves detection of selective sweep by overcoming the classical selection versus no-selection classification strategy, and offers an explanation to the lack of consistency observed among selection tests when applied to real data. Very few signals were observed in the African population studied, while our method presents higher sensitivity in this population demography.

Availability and implementation

The genome-wide results for three human populations from The 1000 Genomes Project and an R-package implementing the 'Hierarchical Boosting' framework are available at http://hsb.upf.edu/.",2015-08-26 +24271388,The Gene Expression Barcode 3.0: improved data processing and mining tools.,"The Gene Expression Barcode project, http://barcode.luhs.org, seeks to determine the genes expressed for every tissue and cell type in humans and mice. Understanding the absolute expression of genes across tissues and cell types has applications in basic cell biology, hypothesis generation for gene function and clinical predictions using gene expression signatures. In its current version, this project uses the abundant publicly available microarray data sets combined with a suite of single-array preprocessing, quality control and analysis methods. In this article, we present the improvements that have been made since the previous version of the Gene Expression Barcode in 2011. These include a variety of new data mining tools and summaries, estimated transcriptomes and curated annotations.",2013-11-22 +21811802,Simple database to select promoters for plant transgenesis.,"The experiments with transgenic plants frequently demand selection of promoters providing appropriate transcription patterns. The set of promoters commonly used in vectors and genetic constructs is very limited, and these promoters provide only a few variants of gene expression patterns. Moreover, identical promoters in a complex construct can induce transgene silencing. This problem can be solved using a variety of plant gene promoters with experimentally verified characteristics. However, this requires a time-consuming analysis of literature data. Here, we describe a database of plant promoters (TransGene Promoters, TGP; http://wwwmgs.bionet.nsc.ru/mgs/dbases/tgp/home.html ). TGP contains the information on genomic DNA segments providing certain expression patterns of reporter genes in experiments with transgenic plants. TGP was constructed on the SRS platform, and its interface allows users to search for the promoters with particular characteristics.",2011-08-03 +25990732,StarScan: a web server for scanning small RNA targets from degradome sequencing data.,"Endogenous small non-coding RNAs (sRNAs), including microRNAs, PIWI-interacting RNAs and small interfering RNAs, play important gene regulatory roles in animals and plants by pairing to the protein-coding and non-coding transcripts. However, computationally assigning these various sRNAs to their regulatory target genes remains technically challenging. Recently, a high-throughput degradome sequencing method was applied to identify biologically relevant sRNA cleavage sites. In this study, an integrated web-based tool, StarScan (sRNA target Scan), was developed for scanning sRNA targets using degradome sequencing data from 20 species. Given a sRNA sequence from plants or animals, our web server performs an ultrafast and exhaustive search for potential sRNA-target interactions in annotated and unannotated genomic regions. The interactions between small RNAs and target transcripts were further evaluated using a novel tool, alignScore. A novel tool, degradomeBinomTest, was developed to quantify the abundance of degradome fragments located at the 9-11th nucleotide from the sRNA 5' end. This is the first web server for discovering potential sRNA-mediated RNA cleavage events in plants and animals, which affords mechanistic insights into the regulatory roles of sRNAs. The StarScan web server is available at http://mirlab.sysu.edu.cn/starscan/.",2015-05-18 +23087384,Pedimap: software for the visualization of genetic and phenotypic data in pedigrees.,"Pedimap is a user-friendly software tool for visualizing phenotypic and genotypic data for related individuals linked in pedigrees. Genetic data can include marker scores, Identity-by-Descent probabilities, and marker linkage map positions, allowing the visualization of haplotypes through lineages. The pedigrees can accommodate all types of inheritance, including selfing, cloning, and repeated backcrossing, and all ploidy levels are supported. Visual association of the genetic data with phenotypic data simplifies the exploration of large data sets, thereby improving breeding decision making. Data are imported from text files; in addition data exchange with other software packages (FlexQTL(TM) and GenomeStudio(TM)) is possible. Instructions for use and an executable version compatible with the Windows platform are available for free from http://www.plantbreeding.wur.nl/UK/software_pedimap.html.",2012-10-19 +23706300,An integrated workflow for DNA methylation analysis.,"The analysis of cytosine methylation provides a new way to assess and describe epigenetic regulation at a whole-genome level in many eukaryotes. DNA methylation has a demonstrated role in the genome stability and protection, regulation of gene expression and many other aspects of genome function and maintenance. BS-seq is a relatively unbiased method for profiling the DNA methylation, with a resolution capable of measuring methylation at individual cytosines. Here we describe, as an example, a workflow to handle DNA methylation analysis, from BS-seq library preparation to the data visualization. We describe some applications for the analysis and interpretation of these data. Our laboratory provides public access to plant DNA methylation data via visualization tools available at our ""Next-Gen Sequence"" websites (http://mpss.udel.edu), along with small RNA, RNA-seq and other data types.",2013-03-30 +25577191,"CodABC: a computational framework to coestimate recombination, substitution, and molecular adaptation rates by approximate Bayesian computation.","The estimation of substitution and recombination rates can provide important insights into the molecular evolution of protein-coding sequences. Here, we present a new computational framework, called ""CodABC,"" to jointly estimate recombination, substitution and synonymous and nonsynonymous rates from coding data. CodABC uses approximate Bayesian computation with and without regression adjustment and implements a variety of codon models, intracodon recombination, and longitudinal sampling. CodABC can provide accurate joint parameter estimates from recombining coding sequences, often outperforming maximum-likelihood methods based on more approximate models. In addition, CodABC allows for the inclusion of several nuisance parameters such as those representing codon frequencies, transition matrices, heterogeneity across sites or invariable sites. CodABC is freely available from http://code.google.com/p/codabc/, includes a GUI, extensive documentation and ready-to-use examples, and can run in parallel on multicore machines.",2015-01-09 +25623496,Background intensity correction for terabyte-sized time-lapse images.,"Several computational challenges associated with large-scale background image correction of terabyte-sized fluorescent images are discussed and analysed in this paper. Dark current, flat-field and background correction models are applied over a mosaic of hundreds of spatially overlapping fields of view (FOVs) taken over the course of several days, during which the background diminishes as cell colonies grow. The motivation of our work comes from the need to quantify the dynamics of OCT-4 gene expression via a fluorescent reporter in human stem cell colonies. Our approach to background correction is formulated as an optimization problem over two image partitioning schemes and four analytical correction models. The optimization objective function is evaluated in terms of (1) the minimum root mean square (RMS) error remaining after image correction, (2) the maximum signal-to-noise ratio (SNR) reached after downsampling and (3) the minimum execution time. Based on the analyses with measured dark current noise and flat-field images, the most optimal GFP background correction is obtained by using a data partition based on forming a set of submosaic images with a polynomial surface background model. The resulting image after correction is characterized by an RMS of about 8, and an SNR value of a 4 × 4 downsampling above 5 by Rose criterion. The new technique generates an image with half RMS value and double SNR value when compared to an approach that assumes constant background throughout the mosaic. We show that the background noise in terabyte-sized fluorescent image mosaics can be corrected computationally with the optimized triplet (data partition, model, SNR driven downsampling) such that the total RMS value from background noise does not exceed the magnitude of the measured dark current noise. In this case, the dark current noise serves as a benchmark for the lowest noise level that an imaging system can achieve. In comparison to previous work, the past fluorescent image background correction methods have been designed for single FOV and have not been applied to terabyte-sized images with large mosaic FOVs, low SNR and diminishing access to background information over time as cell colonies span entirely multiple FOVs. The code is available as open-source from the following link https://isg.nist.gov/.",2014-12-30 +25326608,"The relative and absolute frequencies of angiosperm sexual systems: dioecy, monoecy, gynodioecy, and an updated online database.","

Unlabelled

Premise of the study

Separating sexual function between different individuals carries risks, especially for sedentary organisms. Nevertheless, many land plants have unisexual gametophytes or sporophytes. This study brings together data and theoretical insights from research over the past 20 yr on the occurrence and frequency of plant sexual systems, focusing on the flowering plants.•

Methods

A list of genera with dioecious species, along with other information, is made available (http://www.umsl.edu/∼renners/). Frequencies of other sexual systems are tabulated, and data on the genetic regulation, ecological context, and theoretical benefits of dioecy reviewed.•

Key results

There are 15600 dioecious angiosperms in 987 genera and 175 families, or 5-6% of the total species (7% of genera, 43% of families), with somewhere between 871 to 5000 independent origins of dioecy. Some 43% of all dioecious angiosperms are in just 34 entirely dioecious clades, arguing against a consistent negative influence of dioecy on diversification. About 31.6% of the dioecious species are wind-pollinated, compared with 5.5-6.4% of nondioecious angiosperms. Also, 1.4% of all angiosperm genera contain dioecious and monoecious species, while 0.4% contain dioecious and gynodioecious species. All remaining angiosperm sexual systems are rare. Chromosomal sex determination is known from 40 species; environmentally modulated sex allocation is common. Few phylogenetic studies have focused on the evolution of dioecy.•

Conclusions

The current focus is on the genetic mechanisms underlying unisexual flowers and individuals. Mixed strategies of sexual and vegetative dispersal, together with plants' sedentary life style, may often favor polygamous systems in which sexually inconstant individuals can persist. Nevertheless, there are huge entirely dioecious clades of tropical woody plants.",2014-09-24 +26859631,Prenatal Arsenic Exposure and Birth Outcomes among a Population Residing near a Mining-Related Superfund Site.,"

Background

Limited epidemiologic data exist on prenatal arsenic exposure and fetal growth, particularly in the context of co-exposure to other toxic metals.

Objective

We examined whether prenatal arsenic exposure predicts birth outcomes among a rural U.S. population, while adjusting for exposure to lead and manganese.

Methods

We collected maternal and umbilical cord blood samples at delivery from 622 mother-infant pairs residing near a mining-related Superfund site in Northeast Oklahoma. Whole blood arsenic, lead, and manganese were measured using inductively coupled plasma mass spectrometry. We modeled associations between arsenic concentrations and birth weight, gestational age, head circumference, and birth weight for gestational age.

Results

Median (25th-75th percentile) maternal and umbilical cord blood metal concentrations, respectively, were as follows: arsenic, 1.4 (1.0-2.3) and 2.4 (1.8-3.3) μg/L; lead, 0.6 (0.4-0.9) and 0.4 (0.3-0.6) μg/dL; manganese, 22.7 (18.8-29.3) and 41.7 (32.2-50.4) μg/L. We estimated negative associations between maternal blood arsenic concentrations and birth outcomes. In multivariable regression models adjusted for lead and manganese, an interquartile range increase in maternal blood arsenic was associated with -77.5 g (95% CI: -127.8, -27.3) birth weight, -0.13 weeks (95% CI: -0.27, 0.01) gestation, -0.22 cm (95% CI: -0.42, -0.03) head circumference, and -0.14 (95% CI: -0.24, -0.04) birth weight for gestational age z-score units. Interactions between arsenic concentrations and lead or manganese were not statistically significant.

Conclusions

In a population with environmental exposure levels similar to the U.S. general population, maternal blood arsenic was negatively associated with fetal growth. Given the potential for relatively common fetal and early childhood arsenic exposures, our finding that prenatal arsenic can adversely affect birth outcomes is of considerable public health importance.

Citation

Claus Henn B, Ettinger AS, Hopkins MR, Jim R, Amarasiriwardena C, Christiani DC, Coull BA, Bellinger DC, Wright RO. 2016. Prenatal arsenic exposure and birth outcomes among a population residing near a mining-related Superfund site. Environ Health Perspect 124:1308-1315; http://dx.doi.org/10.1289/ehp.1510070.",2016-02-09 +23559638,Bayesian hierarchical model of protein-binding microarray k-mer data reduces noise and identifies transcription factor subclasses and preferred k-mers.,"

Motivation

Sequence-specific transcription factors (TFs) regulate the expression of their target genes through interactions with specific DNA-binding sites in the genome. Data on TF-DNA binding specificities are essential for understanding how regulatory specificity is achieved.

Results

Numerous studies have used universal protein-binding microarray (PBM) technology to determine the in vitro binding specificities of hundreds of TFs for all possible 8 bp sequences (8mers). We have developed a Bayesian analysis of variance (ANOVA) model that decomposes these 8mer data into background noise, TF familywise effects and effects due to the particular TF. Adjusting for background noise improves PBM data quality and concordance with in vivo TF binding data. Moreover, our model provides simultaneous identification of TF subclasses and their shared sequence preferences, and also of 8mers bound preferentially by individual members of TF subclasses. Such results may aid in deciphering cis-regulatory codes and determinants of protein-DNA binding specificity.

Availability and implementation

Source code, compiled code and R and Python scripts are available from http://thebrain.bwh.harvard.edu/hierarchicalANOVA.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-04 +21177658,IsoBase: a database of functionally related proteins across PPI networks.,"We describe IsoBase, a database identifying functionally related proteins, across five major eukaryotic model organisms: Saccharomyces cerevisiae, Drosophila melanogaster, Caenorhabditis elegans, Mus musculus and Homo Sapiens. Nearly all existing algorithms for orthology detection are based on sequence comparison. Although these have been successful in orthology prediction to some extent, we seek to go beyond these methods by the integration of sequence data and protein-protein interaction (PPI) networks to help in identifying true functionally related proteins. With that motivation, we introduce IsoBase, the first publicly available ortholog database that focuses on functionally related proteins. The groupings were computed using the IsoRankN algorithm that uses spectral methods to combine sequence and PPI data and produce clusters of functionally related proteins. These clusters compare favorably with those from existing approaches: proteins within an IsoBase cluster are more likely to share similar Gene Ontology (GO) annotation. A total of 48,120 proteins were clustered into 12,693 functionally related groups. The IsoBase database may be browsed for functionally related proteins across two or more species and may also be queried by accession numbers, species-specific identifiers, gene name or keyword. The database is freely available for download at http://isobase.csail.mit.edu/.",2011-01-01 +26411869,CLUSTERnGO: a user-defined modelling platform for two-stage clustering of time-series data.,"

Motivation

Simple bioinformatic tools are frequently used to analyse time-series datasets regardless of their ability to deal with transient phenomena, limiting the meaningful information that may be extracted from them. This situation requires the development and exploitation of tailor-made, easy-to-use and flexible tools designed specifically for the analysis of time-series datasets.

Results

We present a novel statistical application called CLUSTERnGO, which uses a model-based clustering algorithm that fulfils this need. This algorithm involves two components of operation. Component 1 constructs a Bayesian non-parametric model (Infinite Mixture of Piecewise Linear Sequences) and Component 2, which applies a novel clustering methodology (Two-Stage Clustering). The software can also assign biological meaning to the identified clusters using an appropriate ontology. It applies multiple hypothesis testing to report the significance of these enrichments. The algorithm has a four-phase pipeline. The application can be executed using either command-line tools or a user-friendly Graphical User Interface. The latter has been developed to address the needs of both specialist and non-specialist users. We use three diverse test cases to demonstrate the flexibility of the proposed strategy. In all cases, CLUSTERnGO not only outperformed existing algorithms in assigning unique GO term enrichments to the identified clusters, but also revealed novel insights regarding the biological systems examined, which were not uncovered in the original publications.

Availability and implementation

The C++ and QT source codes, the GUI applications for Windows, OS X and Linux operating systems and user manual are freely available for download under the GNU GPL v3 license at http://www.cmpe.boun.edu.tr/content/CnG.

Contact

sgo24@cam.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-26 +24594938,Genetic factors regulating lung vasculature and immune cell functions associate with resistance to pneumococcal infection.,"Streptococcus pneumoniae is an important human pathogen responsible for high mortality and morbidity worldwide. The susceptibility to pneumococcal infections is controlled by as yet unknown genetic factors. To elucidate these factors could help to develop new medical treatments and tools to identify those most at risk. In recent years genome wide association studies (GWAS) in mice and humans have proved successful in identification of causal genes involved in many complex diseases for example diabetes, systemic lupus or cholesterol metabolism. In this study a GWAS approach was used to map genetic loci associated with susceptibility to pneumococcal infection in 26 inbred mouse strains. As a result four candidate QTLs were identified on chromosomes 7, 13, 18 and 19. Interestingly, the QTL on chromosome 7 was located within S. pneumoniae resistance QTL (Spir1) identified previously in a linkage study of BALB/cOlaHsd and CBA/CaOlaHsd F2 intercrosses. We showed that only a limited number of genes encoded within the QTLs carried phenotype-associated polymorphisms (22 genes out of several hundred located within the QTLs). These candidate genes are known to regulate TGFβ signalling, smooth muscle and immune cells functions. Interestingly, our pulmonary histopathology and gene expression data demonstrated, lung vasculature plays an important role in resistance to pneumococcal infection. Therefore we concluded that the cumulative effect of these candidate genes on vasculature and immune cells functions as contributory factors in the observed differences in susceptibility to pneumococcal infection. We also propose that TGFβ-mediated regulation of fibroblast differentiation plays an important role in development of invasive pneumococcal disease. Gene expression data submitted to the NCBI Gene Expression Omnibus Accession No: GSE49533 SNP data submitted to NCBI dbSNP Short Genetic Variation http://www.ncbi.nlm.nih.gov/projects/SNP/snp_viewTable.cgi?handle=MUSPNEUMONIA.",2014-03-03 +26515903,STEAM - Statistical Template Estimation for Abnormality Mapping: A personalized DTI analysis technique with applications to the screening of preterm infants.,"We introduce the STEAM DTI analysis engine: a whole brain voxel-based analysis technique for the examination of diffusion tensor images (DTIs). Our STEAM analysis technique consists of two parts. First, we introduce a collection of statistical templates that represent the distribution of DTIs for a normative population. These templates include various diffusion measures from the full tensor, to fractional anisotropy, to 12 other tensor features. Second, we propose a voxel-based analysis (VBA) pipeline that is reliable enough to identify areas in individual DTI scans that differ significantly from the normative group represented in the STEAM statistical templates. We identify and justify choices in the VBA pipeline relating to multiple comparison correction, image smoothing, and dealing with non-normally distributed data. Finally, we provide a proof of concept for the utility of STEAM on a cohort of 134 very preterm infants. We generated templates from scans of 55 very preterm infants whose T1 MRI scans show no abnormalities and who have normal neurodevelopmental outcome. The remaining 79 infants were then compared to the templates using our VBA technique. We show: (a) that our statistical templates display the white matter development expected over the modeled time period, and (b) that our VBA results detect abnormalities in the diffusion measurements that relate significantly with both the presence of white matter lesions and with neurodevelopmental outcomes at 18months. Most notably, we show that STEAM produces personalized results while also being able to highlight abnormalities across the whole brain and at the scale of individual voxels. While we show the value of STEAM on DTI scans from a preterm infant cohort, STEAM can be equally applied to other cohorts as well. To facilitate this whole-brain personalized DTI analysis, we made STEAM publicly available at http://www.sfu.ca/bgb2/steam.",2015-10-26 +25128977,Accurate de novo and transmitted indel detection in exome-capture data using microassembly.,"We present an open-source algorithm, Scalpel (http://scalpel.sourceforge.net/), which combines mapping and assembly for sensitive and specific discovery of insertions and deletions (indels) in exome-capture data. A detailed repeat analysis coupled with a self-tuning k-mer strategy allows Scalpel to outperform other state-of-the-art approaches for indel discovery, particularly in regions containing near-perfect repeats. We analyzed 593 families from the Simons Simplex Collection and demonstrated Scalpel's power to detect long (≥30 bp) transmitted events and enrichment for de novo likely gene-disrupting indels in autistic children.",2014-08-17 +25657331,Evolutionary profiles improve protein-protein interaction prediction from sequence.,"

Motivation

Many methods predict the physical interaction between two proteins (protein-protein interactions; PPIs) from sequence alone. Their performance drops substantially for proteins not used for training.

Results

Here, we introduce a new approach to predict PPIs from sequence alone which is based on evolutionary profiles and profile-kernel support vector machines. It improved over the state-of-the-art, in particular for proteins that are sequence-dissimilar to proteins with known interaction partners. Filtering by gene expression data increased accuracy further for the few, most reliably predicted interactions (low recall). The overall improvement was so substantial that we compiled a list of the most reliably predicted PPIs in human. Our method makes a significant difference for biology because it improves most for the majority of proteins without experimental annotations.

Availability and implementation

Implementation and most reliably predicted human PPIs available at https://rostlab.org/owiki/index.php/Profppikernel.",2015-02-04 +25897123,miRiadne: a web tool for consistent integration of miRNA nomenclature.,"The miRBase is the official miRNA repository which keeps the annotation updated on newly discovered miRNAs: it is also used as a reference for the design of miRNA profiling platforms. Nomenclature ambiguities generated by loosely updated platforms and design errors lead to incompatibilities among platforms, even from the same vendor. Published miRNA lists are thus generated with different profiling platforms that refer to diverse and not updated annotations. This greatly compromises searches, comparisons and analyses that rely on miRNA names only without taking into account the mature sequences, which is particularly critic when such analyses are carried over automatically. In this paper we introduce miRiadne, a web tool to harmonize miRNA nomenclature, which takes into account the original miRBase versions from 10 up to 21, and annotations of 40 common profiling platforms from nine brands that we manually curated. miRiadne uses the miRNA mature sequence to link miRBase versions and/or platforms to prevent nomenclature ambiguities. miRiadne was designed to simplify and support biologists and bioinformaticians in re-annotating their own miRNA lists and/or data sets. As Ariadne helped Theseus in escaping the mythological maze, miRiadne will help the miRNA researcher in escaping the nomenclature maze. miRiadne is freely accessible from the URL http://www.miriadne.org.",2015-04-20 +23645815,RegaDB: community-driven data management and analysis for infectious diseases.,"

Summary

RegaDB is a free and open source data management and analysis environment for infectious diseases. RegaDB allows clinicians to store, manage and analyse patient data, including viral genetic sequences. Moreover, RegaDB provides researchers with a mechanism to collect data in a uniform format and offers them a canvas to make newly developed bioinformatics tools available to clinicians and virologists through a user friendly interface.

Availability and implementation

Source code, binaries and documentation are available on http://rega.kuleuven.be/cev/regadb. RegaDB is written in the Java programming language, using a web-service-oriented architecture.",2013-05-02 +25504850,GSDS 2.0: an upgraded gene feature visualization server.,"

Unlabelled

: Visualizing genes' structure and annotated features helps biologists to investigate their function and evolution intuitively. The Gene Structure Display Server (GSDS) has been widely used by more than 60 000 users since its first publication in 2007. Here, we reported the upgraded GSDS 2.0 with a newly designed interface, supports for more types of annotation features and formats, as well as an integrated visual editor for editing the generated figure. Moreover, a user-specified phylogenetic tree can be added to facilitate further evolutionary analysis. The full source code is also available for downloading.

Availability and implementation

Web server and source code are freely available at http://gsds.cbi.pku.edu.cn.

Contact

gaog@mail.cbi.pku.edu.cn or gsds@mail.cbi.pku.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-10 +25128482,The effect of CTLA-4 A49G polymorphism on rheumatoid arthritis risk: a meta-analysis.,"

Background

Recently, a number of studies have been performed to explore the association between CTLA-4 A49G polymorphism and rheumatoid arthritis (RA). However, the results of previous works are still controversial and ambiguous.

Methods

In this work, we attempted to perform an updated meta-analysis of available case-control study in order to assess the association between CTLA-4 A49G polymorphism and RA risk. We searched the various citation databases without limits on languages. Article searching was performed by screening the references of retrieved studies manually. Odds ratios (OR) and 95% confidence intervals (95% CI) were calculated to evaluate the strength of the association.

Results

We totally compiled 27 studies in 24 articles (9805 RA patients and 10691 control subjects) into our meta-analysis work. We found significant association between CTL-A4 A49G polymorphism and RA risk (GG vs. AA: OR = 1.13, 95% CI = 1.03-1.23; GA vs. AA: OR = 1.19, 95% CI = 1.07-1.33; GA + GG vs. AA: OR = 1.18, 95% CI = 1.07-1.29). In the subgroup analysis by ethnicity, evidences of significantly increased risk was also found in both Asian (GG vs. AA: OR = 1.34, 95% CI = 1.15-1.55; GA + GG vs. AA: OR = 1.24, 95% CI = 1.08-1.41) and Caucasian population (GA vs. AA: OR = 1.19, 95% CI = 1.03-1.37; GA + GG vs. AA: OR = 1.14, 95% CI = 1.01-1.29). No evidence of publication bias was found in this work.

Conclusions

Our meta-analysis suggests that CTLA-4 A49G polymorphism was associated with RA risk.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_157.",2014-08-16 +25252781,METAINTER: meta-analysis of multiple regression models in genome-wide association studies.,"

Motivation

Meta-analysis of summary statistics is an essential approach to guarantee the success of genome-wide association studies (GWAS). Application of the fixed or random effects model to single-marker association tests is a standard practice. More complex methods of meta-analysis involving multiple parameters have not been used frequently, a gap that could be explained by the lack of a respective meta-analysis pipeline. Meta-analysis based on combining p-values can be applied to any association test. However, to be powerful, meta-analysis methods for high-dimensional models should incorporate additional information such as study-specific properties of parameter estimates, their effect directions, standard errors and covariance structure.

Results

We modified 'method for the synthesis of linear regression slopes' recently proposed in the educational sciences to the case of multiple logistic regression, and implemented it in a meta-analysis tool called METAINTER. The software handles models with an arbitrary number of parameters, and can directly be applied to analyze the results of single-SNP tests, global haplotype tests, tests for and under gene-gene or gene-environment interaction. Via simulations for two-single nucleotide polymorphisms (SNP) models we have shown that the proposed meta-analysis method has correct type I error rate. Moreover, power estimates come close to that of the joint analysis of the entire sample. We conducted a real data analysis of six GWAS of type 2 diabetes, available from dbGaP (http://www.ncbi.nlm.nih.gov/gap). For each study, a genome-wide interaction analysis of all SNP pairs was performed by logistic regression tests. The results were then meta-analyzed with METAINTER.

Availability

The software is freely available and distributed under the conditions specified on http://metainter.meb.uni-bonn.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-23 +22080562,Network of Cancer Genes (NCG 3.0): integration and analysis of genetic and network properties of cancer genes.,"The identification of a constantly increasing number of genes whose mutations are causally implicated in tumor initiation and progression (cancer genes) requires the development of tools to store and analyze them. The Network of Cancer Genes (NCG 3.0) collects information on 1494 cancer genes that have been found mutated in 16 different cancer types. These genes were collected from the Cancer Gene Census as well as from 18 whole exome and 11 whole-genome screenings of cancer samples. For each cancer gene, NCG 3.0 provides a summary of the gene features and the cross-reference to other databases. In addition, it describes duplicability, evolutionary origin, orthology, network properties, interaction partners, microRNA regulation and functional roles of cancer genes and of all genes that are related to them. This integrated network of information can be used to better characterize cancer genes in the context of the system in which they act. The data can also be used to identify novel candidates that share the same properties of known cancer genes and may therefore play a similar role in cancer. NCG 3.0 is freely available at http://bio.ifom-ieo-campus.it/ncg.",2011-11-12 +25256928,"World Endometriosis Research Foundation Endometriosis Phenome and Biobanking Harmonisation Project: IV. Tissue collection, processing, and storage in endometriosis research.","

Objective

To harmonize standard operating procedures (SOPs) and standardize the recording of associated data for collection, processing, and storage of human tissues relevant to endometriosis.

Design

An international collaboration involving 34 clinical/academic centers and three industry collaborators from 16 countries on five continents.

Setting

In 2013, two workshops were conducted followed by global consultation, bringing together 54 leaders in endometriosis research and sample processing from around the world.

Patient(s)

None.

Intervention(s)

Consensus SOPs were based on: 1) systematic comparison of SOPs from 24 global centers collecting tissue samples from women with and without endometriosis on a medium or large scale (publication on >100 cases); 2) literature evidence where available, or consultation with laboratory experts otherwise; and 3) several global consultation rounds.

Main outcome measure(s)

Standard recommended and minimum required SOPs for tissue collection, processing, and storage in endometriosis research.

Result(s)

We developed ""recommended standard"" and ""minimum required"" SOPs for the collection, processing, and storage of ectopic and eutopic endometrium, peritoneum, and myometrium, and a biospecimen data collection form necessary for interpretation of sample-derived results.

Conclusion(s)

The EPHect SOPs allow endometriosis research centers to decrease variability in tissue-based results, facilitating between-center comparisons and collaborations. The procedures are also relevant to research into other gynecologic conditions involving endometrium, myometrium, and peritoneum. The consensus SOPs are based on the best available evidence; areas with limited evidence are identified as requiring further pilot studies. The SOPs will be reviewed based on investigator feedback and through systematic triannual follow-up. Updated versions will be made available at: http://endometriosisfoundation.org/ephect.",2014-09-22 +25000815,Addressing the unmet need for visualizing conditional random fields in biological data.,"

Background

The biological world is replete with phenomena that appear to be ideally modeled and analyzed by one archetypal statistical framework - the Graphical Probabilistic Model (GPM). The structure of GPMs is a uniquely good match for biological problems that range from aligning sequences to modeling the genome-to-phenome relationship. The fundamental questions that GPMs address involve making decisions based on a complex web of interacting factors. Unfortunately, while GPMs ideally fit many questions in biology, they are not an easy solution to apply. Building a GPM is not a simple task for an end user. Moreover, applying GPMs is also impeded by the insidious fact that the ""complex web of interacting factors"" inherent to a problem might be easy to define and also intractable to compute upon.

Discussion

We propose that the visualization sciences can contribute to many domains of the bio-sciences, by developing tools to address archetypal representation and user interaction issues in GPMs, and in particular a variety of GPM called a Conditional Random Field(CRF). CRFs bring additional power, and additional complexity, because the CRF dependency network can be conditioned on the query data.

Conclusions

In this manuscript we examine the shared features of several biological problems that are amenable to modeling with CRFs, highlight the challenges that existing visualization and visual analytics paradigms induce for these data, and document an experimental solution called StickWRLD which, while leaving room for improvement, has been successfully applied in several biological research projects. Software and tutorials are available at http://www.stickwrld.org/.",2014-07-10 +22080505,PSCDB: a database for protein structural change upon ligand binding.,"Proteins are flexible molecules that undergo structural changes to function. The Protein Data Bank contains multiple entries for identical proteins determined under different conditions, e.g. with and without a ligand molecule, which provides important information for understanding the structural changes related to protein functions. We gathered 839 protein structural pairs of ligand-free and ligand-bound states from monomeric or homo-dimeric proteins, and constructed the Protein Structural Change DataBase (PSCDB). In the database, we focused on whether the motions were coupled with ligand binding. As a result, the protein structural changes were classified into seven classes, i.e. coupled domain motion (59 structural changes), independent domain motion (70), coupled local motion (125), independent local motion (135), burying ligand motion (104), no significant motion (311) and other type motion (35). PSCDB provides lists of each class. On each entry page, users can view detailed information about the motion, accompanied by a morphing animation of the structural changes. PSCDB is available at http://idp1.force.cs.is.nagoya-u.ac.jp/pscdb/.",2011-11-10 +21936021,dbHCCvar: a comprehensive database of human genetic variations in hepatocellular carcinoma.,"Hepatocellular carcinoma (HCC) is a common cancer with a high mortality rate. The complete pathogenesis of HCC is not completely understood, and highly efficient therapy is still unavailable. In the past several decades, various genetic variations such as mutations and polymorphisms have been reported to be associated with HCC risk, progression, survival, and recurrence. However, to our knowledge, these genetic variations have not been comprehensively and systematically compiled. In this study we constructed dbHCCvar, a free online database of human genetic variations in HCC. Eligible publications were collected from PubMed, and detailed information and major research data from each eligible study were then extracted and recorded in our database. As a result, dbHCCvar contains almost all human genetic variations reported to be associated or not associated with HCC risk, clinical pathology, drug reaction, survival, or recurrence to date. It is expected that dbHCCvar will function as a useful tool for researchers to facilitate the search and identification of new genetic markers for HCC. dbHCCvar is free for all visitors at http://GenetMed.fudan.edu.cn/dbHCCvar.",2011-09-20 +26163828,Does psychodynamic short-term psychotherapy for depressed breast cancer patients also improve fatigue? Results from a randomized controlled trial.,"The purpose of this study was to determine (a) the course of fatigue in depressed breast cancer patients, (b) the effect of a depression-focused individual psychodynamic psychotherapy on fatigue, and (c) the associations of fatigue with depression, quality of life and treatment-related variables. In a German multicentre randomized controlled trial in Leipzig and Mainz, depressed early breast cancer patients (UICC stage 0-III, age 18-70 years) were randomly assigned to a short-term psychodynamic psychotherapy (STPP, an adaptation of the Supportive-Expressive psychotherapy by Luborsky for cancer patients) or treatment as usual (TAU) and completed data assessment pre- and post-treatment. Fatigue was assessed with the Multidimensional Fatigue Inventory (MFI-20). All analyses were conducted as complete case analyses including 52 STPP and 54 TAU completers (n = 106). The trial is registered at http://www.controlled-trials.com , number ISRCTN96793588. Fatigue declined significantly from a high level pre-treatment to post-treatment, but remained significantly higher than among population-based controls and a mixed sample of cancer patients. Significant time by group interactions favoured STPP for the subscales reduced activity and physical fatigue and the total scale. The strength of the associations between total fatigue and depression increased from 0.49 pre-treatment to 0.63 (Quality of life -0.52 to -0.63) at follow-up. STPP is beneficial for reducing dimensions of fatigue (particularly reduced activity and physical fatigue) in depressed breast cancer patients. Chronic fatigue needs more clinical attention in this vulnerable group.",2015-07-12 +25816325,Automatic identification of mobile and rigid substructures in molecular dynamics simulations and fractional structural fluctuation analysis.,"The analysis of structural mobility in molecular dynamics plays a key role in data interpretation, particularly in the simulation of biomolecules. The most common mobility measures computed from simulations are the Root Mean Square Deviation (RMSD) and Root Mean Square Fluctuations (RMSF) of the structures. These are computed after the alignment of atomic coordinates in each trajectory step to a reference structure. This rigid-body alignment is not robust, in the sense that if a small portion of the structure is highly mobile, the RMSD and RMSF increase for all atoms, resulting possibly in poor quantification of the structural fluctuations and, often, to overlooking important fluctuations associated to biological function. The motivation of this work is to provide a robust measure of structural mobility that is practical, and easy to interpret. We propose a Low-Order-Value-Optimization (LOVO) strategy for the robust alignment of the least mobile substructures in a simulation. These substructures are automatically identified by the method. The algorithm consists of the iterative superposition of the fraction of structure displaying the smallest displacements. Therefore, the least mobile substructures are identified, providing a clearer picture of the overall structural fluctuations. Examples are given to illustrate the interpretative advantages of this strategy. The software for performing the alignments was named MDLovoFit and it is available as free-software at: http://leandro.iqm.unicamp.br/mdlovofit.",2015-03-27 +25125444,LeishMicrosatDB: open source database of repeat sequences detected in six fully sequenced Leishmania genomes. ,"A Leishmania Microsatellite Database (LeishMicrosatDB) is reported for genome wise mining of microsatellites in six Leishmania species, using in silico techniques. This was created to provide parasitologists a platform to understand the genome characterization, mapping, phylogeny and evolutionary analysis. The present version of the database contains 1,738,669 simple sequence repeats of which 181 s756 repeats are present in compound form. The repeats can be sought in a chromosome using input parameters such as repeat type (mono- hexa), coding status, repeat unit length and repeat sequence motif. The genic repeats have been further hyperlinked with their corresponding locus id, and the database is appended with primer3 plus for primer designing of selected repeats with left and right flanking sequences up to 250 bp. Information on clustering and polymorphic repeats can also be retrieved. This database may also be adopted as a tool to study the relative occurrence and distribution of microsatellites across the parasitic genome. The database can enable a biologist to select markers at desired intervals over the chromosomes, and can be accessed as an open source repository at http://biomedinformri.com/leishmicrosat. http://biomedinformri.com/leishmicrosat.",2014-08-14 +25233134,"Summary of notifiable diseases--United States, 2012.","The Summary of notifiable diseases--United States, 2012 contains the official statistics, in tabular and graphic form, for the reported occurrence of nationally notifiable infectious diseases in the United States for 2012. Unless otherwise noted, the data are final totals for 2012 reported as of June 30, 2013. These statistics are collected and compiled from reports sent by state health departments and territories to the National Notifiable Diseases Surveillance System (NNDSS), which is operated by CDC in collaboration with the Council of State and Territorial Epidemiologists (CSTE). The Summary is available at http://www.cdc.gov/mmwr/mmwr_nd/index.html. This site also includes Summary publications from previous years.",2014-09-01 +26756918,Genome-Wide Analysis of DNA Methylation and Cigarette Smoking in a Chinese Population.,"

Background

Smoking is a risk factor for many human diseases. DNA methylation has been related to smoking, but genome-wide methylation data for smoking in Chinese populations is limited.

Objectives

We aimed to investigate epigenome-wide methylation in relation to smoking in a Chinese population.

Methods

We measured the methylation levels at > 485,000 CpG sites (CpGs) in DNA from leukocytes using a methylation array and conducted a genome-wide meta-analysis of DNA methylation and smoking in a total of 596 Chinese participants. We further evaluated the associations of smoking-related CpGs with internal polycyclic aromatic hydrocarbon (PAH) biomarkers and their correlations with the expression of corresponding genes.

Results

We identified 318 CpGs whose methylation levels were associated with smoking at a genome-wide significance level (false discovery rate < 0.05), among which 161 CpGs annotated to 123 genes were not associated with smoking in recent studies of Europeans and African Americans. Of these smoking-related CpGs, methylation levels at 80 CpGs showed significant correlations with the expression of corresponding genes (including RUNX3, IL6R, PTAFR, ANKRD11, CEP135 and CDH23), and methylation at 15 CpGs was significantly associated with urinary 2-hydroxynaphthalene, the most representative internal monohydroxy-PAH biomarker for smoking.

Conclusion

We identified DNA methylation markers associated with smoking in a Chinese population, including some markers that were also correlated with gene expression. Exposure to naphthalene, a byproduct of tobacco smoke, may contribute to smoking-related methylation.

Citation

Zhu X, Li J, Deng S, Yu K, Liu X, Deng Q, Sun H, Zhang X, He M, Guo H, Chen W, Yuan J, Zhang B, Kuang D, He X, Bai Y, Han X, Liu B, Li X, Yang L, Jiang H, Zhang Y, Hu J, Cheng L, Luo X, Mei W, Zhou Z, Sun S, Zhang L, Liu C, Guo Y, Zhang Z, Hu FB, Liang L, Wu T. 2016. Genome-wide analysis of DNA methylation and cigarette smoking in Chinese. Environ Health Perspect 124:966-973; http://dx.doi.org/10.1289/ehp.1509834.",2016-01-12 +24339943,ReliefSeq: a gene-wise adaptive-K nearest-neighbor feature selection tool for finding gene-gene interactions and main effects in mRNA-Seq gene expression data.,"Relief-F is a nonparametric, nearest-neighbor machine learning method that has been successfully used to identify relevant variables that may interact in complex multivariate models to explain phenotypic variation. While several tools have been developed for assessing differential expression in sequence-based transcriptomics, the detection of statistical interactions between transcripts has received less attention in the area of RNA-seq analysis. We describe a new extension and assessment of Relief-F for feature selection in RNA-seq data. The ReliefSeq implementation adapts the number of nearest neighbors (k) for each gene to optimize the Relief-F test statistics (importance scores) for finding both main effects and interactions. We compare this gene-wise adaptive-k (gwak) Relief-F method with standard RNA-seq feature selection tools, such as DESeq and edgeR, and with the popular machine learning method Random Forests. We demonstrate performance on a panel of simulated data that have a range of distributional properties reflected in real mRNA-seq data including multiple transcripts with varying sizes of main effects and interaction effects. For simulated main effects, gwak-Relief-F feature selection performs comparably to standard tools DESeq and edgeR for ranking relevant transcripts. For gene-gene interactions, gwak-Relief-F outperforms all comparison methods at ranking relevant genes in all but the highest fold change/highest signal situations where it performs similarly. The gwak-Relief-F algorithm outperforms Random Forests for detecting relevant genes in all simulation experiments. In addition, Relief-F is comparable to the other methods based on computational time. We also apply ReliefSeq to an RNA-Seq study of smallpox vaccine to identify gene expression changes between vaccinia virus-stimulated and unstimulated samples. ReliefSeq is an attractive tool for inclusion in the suite of tools used for analysis of mRNA-Seq data; it has power to detect both main effects and interaction effects. Software Availability: http://insilico.utulsa.edu/ReliefSeq.php.",2013-12-10 +25122463,RLIMS-P: an online text-mining tool for literature-based extraction of protein phosphorylation information. ,"Protein phosphorylation is central to the regulation of most aspects of cell function. Given its importance, it has been the subject of active research as well as the focus of curation in several biological databases. We have developed Rule-based Literature Mining System for protein Phosphorylation (RLIMS-P), an online text-mining tool to help curators identify biomedical research articles relevant to protein phosphorylation. The tool presents information on protein kinases, substrates and phosphorylation sites automatically extracted from the biomedical literature. The utility of the RLIMS-P Web site has been evaluated by curators from Phospho.ELM, PhosphoGRID/BioGrid and Protein Ontology as part of the BioCreative IV user interactive task (IAT). The system achieved F-scores of 0.76, 0.88 and 0.92 for the extraction of kinase, substrate and phosphorylation sites, respectively, and a precision of 0.88 in the retrieval of relevant phosphorylation literature. The system also received highly favorable feedback from the curators in a user survey. Based on the curators' suggestions, the Web site has been enhanced to improve its usability. In the RLIMS-P Web site, phosphorylation information can be retrieved by PubMed IDs or keywords, with an option for selecting targeted species. The result page displays a sortable table with phosphorylation information. The text evidence page displays the abstract with color-coded entity mentions and includes links to UniProtKB entries via normalization, i.e., the linking of entity mentions to database identifiers, facilitated by the GenNorm tool and by the links to the bibliography in UniProt. Log in and editing capabilities are offered to any user interested in contributing to the validation of RLIMS-P results. Retrieved phosphorylation information can also be downloaded in CSV format and the text evidence in the BioC format. RLIMS-P is freely available. http://www.proteininformationresource.org/rlimsp/",2014-08-13 +25750659,LocusTrack: Integrated visualization of GWAS results and genomic annotation.,"

Background

Genome-wide association studies (GWAS) are an important tool for the mapping of complex traits and diseases. Visual inspection of genomic annotations may be used to generate insights into the biological mechanisms underlying GWAS-identified loci.

Results

We developed LocusTrack, a web-based application that annotates and creates plots of regional GWAS results and incorporates user-specified tracks that display annotations such as linkage disequilibrium (LD), phylogenetic conservation, chromatin state, and other genomic and regulatory elements. Currently, LocusTrack can integrate annotation tracks from the UCSC genome-browser as well as from any tracks provided by the user.

Conclusion

LocusTrack is an easy-to-use application and can be accessed at the following URL: http://gump.qimr.edu.au/general/gabrieC/LocusTrack/. Users can upload and manage GWAS results and select from and/or provide annotation tracks using simple and intuitive menus. LocusTrack scripts and associated data can be downloaded from the website and run locally.",2015-02-03 +25165464,Ends of the line for tmRNA-SmpB.,"Genes for the RNA tmRNA and protein SmpB, partners in the trans-translation process that rescues stalled ribosomes, have previously been found in all bacteria and some organelles. During a major update of The tmRNA Website (relocated to http://bioinformatics.sandia.gov/tmrna), including addition of an SmpB sequence database, we found some bacteria that lack functionally significant regions of SmpB. Three groups with reduced genomes have lost the central loop of SmpB, which is thought to improve alanylation and EF-Tu activation: Carsonella, Hodgkinia, and the hemoplasmas (hemotropic Mycoplasma). Carsonella has also lost the SmpB C-terminal tail, thought to stimulate the decoding center of the ribosome. We validate recent identification of tmRNA homologs in oomycete mitochondria by finding partner genes from oomycete nuclei that target SmpB to the mitochondrion. We have moreover identified through exhaustive search a small number of complete, but often highly derived, bacterial genomes that appear to lack a functional copy of either the tmRNA or SmpB gene (but not both). One Carsonella isolate exhibits complete degradation of the tmRNA gene sequence yet its smpB shows no evidence for relaxed selective constraint, relative to other genes in the genome. After loss of the SmpB central loop in the hemoplasmas, one subclade apparently lost tmRNA. Carsonella also exhibits gene overlap such that tmRNA maturation should produce a non-stop smpB mRNA. At least some of the tmRNA/SmpB-deficient strains appear to further lack the ArfA and ArfB backup systems for ribosome rescue. The most frequent neighbors of smpB are the tmRNA gene, a ratA/rnfH unit, and the gene for RNaseR, a known physical and functional partner of tmRNA-SmpB.",2014-08-13 +22135292,A comprehensive manually curated protein-protein interaction database for the Death Domain superfamily.,"The Death Domain (DD) superfamily, which is one of the largest classes of protein interaction modules, plays a pivotal role in apoptosis, inflammation, necrosis and immune cell signaling pathways. Because aberrant or inappropriate DD superfamily-mediated signaling events are associated with various human diseases, such as cancers, neurodegenerative diseases and immunological disorders, the studies in these fields are of great biological and clinical importance. To facilitate the understanding of the molecular mechanisms by which the DD superfamily is associated with biological and disease processes, we have developed the DD database (http://www.deathdomain.org), a manually curated database that aims to offer comprehensive information on protein-protein interactions (PPIs) of the DD superfamily. The DD database was created by manually curating 295 peer-reviewed studies that were published in the literature; the current version documents 175 PPI pairs among the 99 DD superfamily proteins. The DD database provides a detailed summary of the DD superfamily proteins and their PPI data. Users can find in-depth information that is specified in the literature on relevant analytical methods, experimental resources and domain structures. Our database provides a definitive and valuable tool that assists researchers in understanding the signaling network that is mediated by the DD superfamily.",2011-12-01 +22023470,Recent advances in understanding of the immunological off-target effects of siRNA.,"Short interfering RNAs (siRNAs) are the most commonly used RNA interference (RNAi) triggers. They hold promise as potent therapeutic tools, as demonstrated by recent successful in vivo experiments. However, in addition to triggering intended sequence-specific silencing effects, the reagents of RNAi technology can often cause side effects, including immunological off-target effects. The cellular sensors of foreign RNA, such as RIG-I or Toll-like receptors, involved in innate immune antiviral responses, are activated by RNAi reagents. Stimulation of these pathways results in changes in the cellular transcriptome and proteome that can lead to the inhibition of cell division and growth and eventually apoptosis. An additional undesired effect in the context of research applications may be the misinterpretation of experimental results. To date, a number of the specific features of siRNA structure, sequence and delivery mode that are responsible for these effects have been identified. This knowledge may be helpful in designing safer gene-silencing reagents. In this article we discuss the recent developments in the field of non-specific toxic effects caused by RNAi triggers and their delivery vehicles. These data are critically discussed and evaluated, taking advantage of relevant information compiled in the recently launched RNAimmuno database (http://rnaimmuno.ibch.poznan.pl).",2011-12-01 +24931984,Detecting independent and recurrent copy number aberrations using interval graphs.,"

Motivation

Somatic copy number aberrations SCNAS: are frequent in cancer genomes, but many of these are random, passenger events. A common strategy to distinguish functional aberrations from passengers is to identify those aberrations that are recurrent across multiple samples. However, the extensive variability in the length and position of SCNA: s makes the problem of identifying recurrent aberrations notoriously difficult.

Results

We introduce a combinatorial approach to the problem of identifying independent and recurrent SCNA: s, focusing on the key challenging of separating the overlaps in aberrations across individuals into independent events. We derive independent and recurrent SCNA: s as maximal cliques in an interval graph constructed from overlaps between aberrations. We efficiently enumerate all such cliques, and derive a dynamic programming algorithm to find an optimal selection of non-overlapping cliques, resulting in a very fast algorithm, which we call RAIG (Recurrent Aberrations from Interval Graphs). We show that RAIG outperforms other methods on simulated data and also performs well on data from three cancer types from The Cancer Genome Atlas (TCGA). In contrast to existing approaches that employ various heuristics to select independent aberrations, RAIG optimizes a well-defined objective function. We show that this allows RAIG to identify rare aberrations that are likely functional, but are obscured by overlaps with larger passenger aberrations.

Availability

http://compbio.cs.brown.edu/software.",2014-06-01 +21785143,The InterPro BioMart: federated query and web service access to the InterPro Resource.,"The InterPro BioMart provides users with query-optimized access to predictions of family classification, protein domains and functional sites, based on a broad spectrum of integrated computational models ('signatures') that are generated by the InterPro member databases: Gene3D, HAMAP, PANTHER, Pfam, PIRSF, PRINTS, ProDom, PROSITE, SMART, SUPERFAMILY and TIGRFAMs. These predictions are provided for all protein sequences from both the UniProt Knowledge Base and the UniParc protein sequence archive. The InterPro BioMart is supplementary to the primary InterPro web interface (http://www.ebi.ac.uk/interpro), providing a web service and the ability to build complex, custom queries that can efficiently return thousands of rows of data in a variety of formats. This article describes the information available from the InterPro BioMart and illustrates its utility with examples of how to build queries that return useful biological information. Database URL: http://www.ebi.ac.uk/interpro/biomart/martview.",2011-07-23 +22857267,MicrobesFlux: a web platform for drafting metabolic models from the KEGG database.,"

Background

Concurrent with the efforts currently underway in mapping microbial genomes using high-throughput sequencing methods, systems biologists are building metabolic models to characterize and predict cell metabolisms. One of the key steps in building a metabolic model is using multiple databases to collect and assemble essential information about genome-annotations and the architecture of the metabolic network for a specific organism. To speed up metabolic model development for a large number of microorganisms, we need a user-friendly platform to construct metabolic networks and to perform constraint-based flux balance analysis based on genome databases and experimental results.

Results

We have developed a semi-automatic, web-based platform (MicrobesFlux) for generating and reconstructing metabolic models for annotated microorganisms. MicrobesFlux is able to automatically download the metabolic network (including enzymatic reactions and metabolites) of ~1,200 species from the KEGG database (Kyoto Encyclopedia of Genes and Genomes) and then convert it to a metabolic model draft. The platform also provides diverse customized tools, such as gene knockouts and the introduction of heterologous pathways, for users to reconstruct the model network. The reconstructed metabolic network can be formulated to a constraint-based flux model to predict and analyze the carbon fluxes in microbial metabolisms. The simulation results can be exported in the SBML format (The Systems Biology Markup Language). Furthermore, we also demonstrated the platform functionalities by developing an FBA model (including 229 reactions) for a recent annotated bioethanol producer, Thermoanaerobacter sp. strain X514, to predict its biomass growth and ethanol production.

Conclusion

MicrobesFlux is an installation-free and open-source platform that enables biologists without prior programming knowledge to develop metabolic models for annotated microorganisms in the KEGG database. Our system facilitates users to reconstruct metabolic networks of organisms based on experimental information. Through human-computer interaction, MicrobesFlux provides users with reasonable predictions of microbial metabolism via flux balance analysis. This prototype platform can be a springboard for advanced and broad-scope modeling of complex biological systems by integrating other ""omics"" data or 13 C- metabolic flux analysis results. MicrobesFlux is available at http://tanglab.engineering.wustl.edu/static/MicrobesFlux.html and will be continuously improved based on feedback from users.",2012-08-02 +24812308,Detecting local haplotype sharing and haplotype association.,"A novel haplotype association method is presented, and its power is demonstrated. Relying on a statistical model for linkage disequilibrium (LD), the method first infers ancestral haplotypes and their loadings at each marker for each individual. The loadings are then used to quantify local haplotype sharing between individuals at each marker. A statistical model was developed to link the local haplotype sharing and phenotypes to test for association. We devised a novel method to fit the LD model, reducing the complexity from putatively quadratic to linear (in the number of ancestral haplotypes). Therefore, the LD model can be fitted to all study samples simultaneously, and, consequently, our method is applicable to big data sets. Compared to existing haplotype association methods, our method integrated out phase uncertainty, avoided arbitrariness in specifying haplotypes, and had the same number of tests as the single-SNP analysis. We applied our method to data from the Wellcome Trust Case Control Consortium and discovered eight novel associations between seven gene regions and five disease phenotypes. Among these, GRIK4, which encodes a protein that belongs to the glutamate-gated ionic channel family, is strongly associated with both coronary artery disease and rheumatoid arthritis. A software package implementing methods described in this article is freely available at http://www.haplotype.org.",2014-05-08 +25111794,A novel high-resolution single locus sequence typing scheme for mixed populations of Propionibacterium acnes in vivo.,"The Gram-positive anaerobic bacterium Propionibacterium acnes is a prevalent member of the normal skin microbiota of human adults. In addition to its suspected role in acne vulgaris it is involved in a variety of opportunistic infections. Multi-locus sequence-typing (MLST) schemes identified distinct phylotypes associated with health and disease. Being based on 8 to 9 house-keeping genes these MLST schemes have a high discriminatory power, but their application is time- and cost-intensive. Here we describe a single-locus sequence typing (SLST) scheme for P. acnes. The target locus was identified with a genome mining approach that took advantage of the availability of representative genome sequences of all known phylotypes of P. acnes. We applied this SLST on a collection of 188 P. acnes strains and demonstrated a resolution comparable to that of existing MLST schemes. Phylogenetic analysis applied to the SLST locus resulted in clustering patterns identical to a reference tree based on core genome sequences. We further demonstrate that SLST can be applied to detect multiple phylotypes in complex microbial communities by a metagenomic pyrosequencing approach. The described SLST strategy may be applied to any bacterial species with a basically clonal population structure to achieve easy typing and mapping of multiple phylotypes in complex microbiotas. The P. acnes SLST database can be found at http://medbac.dk/slst/pacnes.",2014-08-11 +23111096,MetabR: an R script for linear model analysis of quantitative metabolomic data.,"

Background

Metabolomics is an emerging high-throughput approach to systems biology, but data analysis tools are lacking compared to other systems level disciplines such as transcriptomics and proteomics. Metabolomic data analysis requires a normalization step to remove systematic effects of confounding variables on metabolite measurements. Current tools may not correctly normalize every metabolite when the relationships between each metabolite quantity and fixed-effect confounding variables are different, or for the effects of random-effect confounding variables. Linear mixed models, an established methodology in the microarray literature, offer a standardized and flexible approach for removing the effects of fixed- and random-effect confounding variables from metabolomic data.

Findings

Here we present a simple menu-driven program, ""MetabR"", designed to aid researchers with no programming background in statistical analysis of metabolomic data. Written in the open-source statistical programming language R, MetabR implements linear mixed models to normalize metabolomic data and analysis of variance (ANOVA) to test treatment differences. MetabR exports normalized data, checks statistical model assumptions, identifies differentially abundant metabolites, and produces output files to help with data interpretation. Example data are provided to illustrate normalization for common confounding variables and to demonstrate the utility of the MetabR program.

Conclusions

We developed MetabR as a simple and user-friendly tool for implementing linear mixed model-based normalization and statistical analysis of targeted metabolomic data, which helps to fill a lack of available data analysis tools in this field. The program, user guide, example data, and any future news or updates related to the program may be found at http://metabr.r-forge.r-project.org/.",2012-10-30 +24565104,Corbi: a new R package for biological network alignment and querying.,"In the last decade, plenty of biological networks are built from the large scale experimental data produced by the rapidly developing high-throughput techniques as well as literature and other sources. But the huge amount of network data have not been fully utilized due to the limited biological network analysis tools. As a basic and essential bioinformatics method, biological network alignment and querying have been applied in many fields such as predicting new protein-protein interactions (PPI). Although many algorithms were published, the network alignment and querying problems are not solved satisfactorily. In this paper, we extended CNetQ, a novel network querying method based on the conditional random fields model, to solve network alignment problem, by adopting an iterative bi-directional mapping strategy. The new method, called CNetA, was compared with other four methods on fifty simulated and three real PPI network alignment instances by using four structural and five biological measures. The computational experiments on the simulated data, which were generated from a biological network evolutionary model to validate the effectiveness of network alignment methods, show that CNetA gets the best accuracy in terms of both nodes and networks. For the real data, larger biological conserved subnetworks and larger connected subnetworks were identified, compared with the structural-dominated methods and the biological-dominated methods, respectively, which suggests that CNetA can better balances the biological and structural similarities. Further, CNetQ and CNetA have been implemented in a new R package Corbi (http://doc.aporc.org/wiki/Corbi), and freely accessible and easy used web services for CNetQ and CNetA have also been constructed based on the R package. The simulated and real datasets used in this paper are available for downloading at http://doc.aporc.org/wiki/CNetA/.",2013-10-14 +23930811,In silico identification of Gram-negative bacterial secreted proteins from primary sequence.,"In this study, we focus on different types of Gram-negative bacterial secreted proteins, and try to analyze the relationships and differences among them. Through an extensive literature search, 1612 secreted proteins have been collected as a standard data set from three data sources, including Swiss-Prot, TrEMBL and RefSeq. To explore the relationships among different types of secreted proteins, we model this data set as a sequence similarity network. Finally, a multi-classifier named SecretP is proposed to distinguish different types of secreted proteins, and yields a high total sensitivity of 90.12% for the test set. When performed on another public independent dataset for further evaluation, a promising prediction result is obtained. Predictions can be implemented freely online at http://cic.scu.edu.cn/bioinformatics/secretPv2_1/index.htm.",2013-06-11 +22356822,Neuropathology markers and pathways associated with molecular targets for antipsychotic drugs in postmortem brain tissues: exploration of drug targets through the Stanley Neuropathology Integrative Database.,"The atypical antipsychotics bind multiple receptor targets, including dopamine D₂ receptors (DRD2), 5-HT₂ receptors (HTR2A), α-2 adrenergic receptors (ADRA2A), and muscarinic receptors (CHRM1/4). Deficits in antipsychotic targets, their associated pathways, and the causal relationships between the various targets were explored using the Stanley Neuropathology Consortium Integrative Database (SNCID; http://sncid.stanleyresearch.org) and the Network Edge Orienting (NEO) software. There were brain region-specific deficits in the level of the antipsychotic targets, and the level of each target correlated with the mRNA level of the neurotrophic factor BDNF. While myelination was a common process correlated with both DRD2 mRNA levels and ADRA2A activity in the frontal cortex, metabolic processes were specifically correlated with DRD2 mRNA. Immune and inflammatory responses and apoptosis pathways were correlated with group II metabotropic glutamate receptors (GRM2), which are a target for the development of the next-generation antipsychotics. The NEO analysis revealed that HTR2A and GRM2 are likely to regulate BDNF levels in the hippocampus and frontal cortex, respectively, whereas DRD2 and ADRA2A activity are likely to be regulated by BDNF in the frontal cortex. BDNF may play an important role in mechanisms of action of the current antipsychotics and the next-generation antipsychotics that target GRM2. However, this data-mining approach indicates that the next-generation antipsychotics are likely to work through pathways that are distinct from those through which the current antipsychotics work. Exploratory analyses such as these may initiate future hypothesis-driven studies to reveal the mechanisms of action underlying the efficacy and side-effects of the antipsychotics.",2012-02-21 +24135450,ReVeaLD: a user-driven domain-specific interactive search platform for biomedical research.,"Bioinformatics research relies heavily on the ability to discover and correlate data from various sources. The specialization of life sciences over the past decade, coupled with an increasing number of biomedical datasets available through standardized interfaces, has created opportunities towards new methods in biomedical discovery. Despite the popularity of semantic web technologies in tackling the integrative bioinformatics challenge, there are many obstacles towards its usage by non-technical research audiences. In particular, the ability to fully exploit integrated information needs using improved interactive methods intuitive to the biomedical experts. In this report we present ReVeaLD (a Real-time Visual Explorer and Aggregator of Linked Data), a user-centered visual analytics platform devised to increase intuitive interaction with data from distributed sources. ReVeaLD facilitates query formulation using a domain-specific language (DSL) identified by biomedical experts and mapped to a self-updated catalogue of elements from external sources. ReVeaLD was implemented in a cancer research setting; queries included retrieving data from in silico experiments, protein modeling and gene expression. ReVeaLD was developed using Scalable Vector Graphics and JavaScript and a demo with explanatory video is available at http://www.srvgal78.deri.ie:8080/explorer. A set of user-defined graphic rules controls the display of information through media-rich user interfaces. Evaluation of ReVeaLD was carried out as a game: biomedical researchers were asked to assemble a set of 5 challenge questions and time and interactions with the platform were recorded. Preliminary results indicate that complex queries could be formulated under less than two minutes by unskilled researchers. The results also indicate that supporting the identification of the elements of a DSL significantly increased intuitiveness of the platform and usability of semantic web technologies by domain users.",2013-10-14 +23280134,BOLDMirror: a global mirror system of DNA barcode data.,"DNA barcoding is a novel concept for taxonomic identification using short, specific genetic markers and has been applied to study a large number of eukaryotes. The huge amount of data output generated by DNA barcoding requires well-organized information systems. Besides the Barcode of Life Data system (BOLD) established in Canada, the mirror system is also important for the international barcode of life project (iBOL). For this purpose, we developed the BOLDMirror, a global mirror system of DNA barcode data. It is open-sourced and can run on the LAMP (Linux + Apache + MySQL + PHP) environment. BOLDMirror has data synchronization, data representation and statistics modules, and also provides spaces to store user operation history. BOLDMirror can be accessed at http://www.boldmirror.net and several countries have used it to setup their site of DNA barcoding.",2012-12-27 +26485026,Mapping Atmospheric Moisture Climatologies across the Conterminous United States.,"Spatial climate datasets of 1981-2010 long-term mean monthly average dew point and minimum and maximum vapor pressure deficit were developed for the conterminous United States at 30-arcsec (~800m) resolution. Interpolation of long-term averages (twelve monthly values per variable) was performed using PRISM (Parameter-elevation Relationships on Independent Slopes Model). Surface stations available for analysis numbered only 4,000 for dew point and 3,500 for vapor pressure deficit, compared to 16,000 for previously-developed grids of 1981-2010 long-term mean monthly minimum and maximum temperature. Therefore, a form of Climatologically-Aided Interpolation (CAI) was used, in which the 1981-2010 temperature grids were used as predictor grids. For each grid cell, PRISM calculated a local regression function between the interpolated climate variable and the predictor grid. Nearby stations entering the regression were assigned weights based on the physiographic similarity of the station to the grid cell that included the effects of distance, elevation, coastal proximity, vertical atmospheric layer, and topographic position. Interpolation uncertainties were estimated using cross-validation exercises. Given that CAI interpolation was used, a new method was developed to allow uncertainties in predictor grids to be accounted for in estimating the total interpolation error. Local land use/land cover properties had noticeable effects on the spatial patterns of atmospheric moisture content and deficit. An example of this was relatively high dew points and low vapor pressure deficits at stations located in or near irrigated fields. The new grids, in combination with existing temperature grids, enable the user to derive a full suite of atmospheric moisture variables, such as minimum and maximum relative humidity, vapor pressure, and dew point depression, with accompanying assumptions. All of these grids are available online at http://prism.oregonstate.edu, and include 800-m and 4-km resolution data, images, metadata, pedigree information, and station inventory files.",2015-10-20 +24215028,"AVIA: an interactive web-server for annotation, visualization and impact analysis of genomic variations.","

Motivation

The plethora of information that emerges from large-scale genome characterization studies has triggered the development of computational frameworks and tools for efficient analysis, interpretation and visualization of genomic data. Functional annotation of genomic variations and the ability to visualize the data in the context of whole genome and/or multiple genomes has remained a challenging task. We have developed an interactive web-based tool, AVIA (Annotation, Visualization and Impact Analysis), to explore and interpret large sets of genomic variations (single nucleotide variations and insertion/deletions) and to help guide and summarize genomic experiments. The annotation, summary plots and tables are packaged and can be downloaded by the user from the email link provided.

Availability and implementation

http://avia.abcc.ncifcrf.gov.",2013-11-09 +26577763,"Evaluation of pedometry as a patient-centered outcome in patients undergoing hematopoietic cell transplant (HCT): a comparison of pedometry and patient reports of symptoms, health, and quality of life.","

Aims

We evaluated pedometry as a novel patient-centered outcome because it enables passive continuous assessment of activity and may provide information about the consequences of symptomatic toxicity complementary to self-report.

Methods

Adult patients undergoing hematopoietic cell transplant (HCT) wore pedometers and completed PRO assessments during transplant hospitalization (4 weeks) and 4 weeks post-discharge. Patient reports of symptomatic treatment toxicities (single items from PRO-CTCAE, http://healthcaredelivery.cancer.gov/pro-ctcae ) and symptoms, physical health, mental health, and quality of life (PROMIS(®) Global-10, http://nih.promis.org ), assessed weekly with 7-day recall on Likert scales, were compared individually with pedometry data, summarized as average daily steps per week, using linear mixed models.

Results

Thirty-two patients [mean age 55 (SD = 14), 63 % male, 84 % white, 56 % autologous, 43 % allogeneic] completed a mean 4.6 (SD = 1.5, range 1-8) evaluable assessments. Regression model coefficients (β) indicated within-person decrements in average daily steps were associated with increases in pain (β = -852; 852 fewer steps per unit increase in pain score, p < 0.001), fatigue (β = -886, p < 0.001), vomiting (β = -518, p < 0.01), shaking/chills (β = -587, p < 0.01), diarrhea (β = -719, p < 0.001), shortness of breath (β = -1018, p < 0.05), reduction in carrying out social activities (β = 705, p < 0.01) or physical activities (β = 618, p < 0.01), and global physical health (β = 101, p < 0.001), but not global mental health or quality of life.

Conclusions

In this small sample of HCT recipients, more severe symptoms, impaired physical health, and restrictions in the performance of usual daily activities were associated with statistically significant decrements in objectively measured daily steps. Pedometry may be a valuable outcome measure and validation anchor in clinical research.",2015-11-17 +22238269,GROMACS molecule & liquid database.,"

Motivation

The molecular dynamics simulation package GROMACS is a widely used tool used in a broad range of different applications within physics, chemistry and biology. It is freely available, user friendly and extremely efficient. The GROMACS software is force field agnostic, and compatible with many molecular dynamics force fields; coarse-grained, unified atom, all atom as well as polarizable models based on the charge on a spring concept. To validate simulations, it is necessary to compare results from the simulations to experimental data. To ease the process of setting up topologies and structures for simulations, as well as providing pre-calculated physical properties along with experimental values for the same we provide a web-based database, containing 145 organic molecules at present.

Results

Liquid properties of 145 organic molecules have been simulated using two different force fields, OPLS all atom and Generalized Amber Force Field. So far, eight properties have been calculated (the density, enthalpy of vaporization, surface tension, heat capacity at constant volume and pressure, isothermal compressibility, volumetric expansion coefficient and the static dielectric constant). The results, together with experimental values are available through the database, along with liquid structures and topologies for the 145 molecules, in the two force fields.

Availability

The database is freely available under http://virtualchemistry.org.",2012-01-11 +26240527,Gel Scramble: An E-Tool for Teaching Molecular Neuroscience.,"In this completely digital teaching module, students interpret the results of two separate procedures: a restriction endonuclease digestion, and a polymerase chain reaction (PCR). The first consists of matching restriction endonuclease digest protocols with images obtained from stained agarose gels. Students are given the sequence of six plasmid cDNAs, characteristics of the plasmid vector, and the endonuclease digest protocols, which specify the enzyme(s) used. Students calculate the expected lengths of digestion products using this information and free tools available on the web. Students learn how to read gels and then match their predicted fragment lengths to the digital images obtained from the gel electrophoresis of the cDNA digest. In the PCR experiment, students are given six cDNA sequences and six sets of primers. By querying NCBI BLAST, students can match the PCR fragments to the lengths of the predicted in silico PCR products. The ruse posed to students is that the gels were inadvertently mislabeled during processing. Although students know the experimental details, they do not know which gel goes with a given restriction endonuclease digest or PCR-they must deduce the answers. Because the gel images are from actual students' experiments, the data sometimes result from mishandling/mislabeling or faulty protocol execution. The most challenging part of the exercise is to explain these errors. This latter aspect requires students to use critical thinking skills to explain aberrant outcomes. This entire exercise is available in a digital format and downloadable for free at http://mdcune.psych.ucla.edu/modules/gel.",2015-07-07 +26745545,Estimating Children's Soil/Dust Ingestion Rates through Retrospective Analyses of Blood Lead Biomonitoring from the Bunker Hill Superfund Site in Idaho.,"

Background

Soil/dust ingestion rates are important variables in assessing children's health risks in contaminated environments. Current estimates are based largely on soil tracer methodology, which is limited by analytical uncertainty, small sample size, and short study duration.

Objectives

The objective was to estimate site-specific soil/dust ingestion rates through reevaluation of the lead absorption dose-response relationship using new bioavailability data from the Bunker Hill Mining and Metallurgical Complex Superfund Site (BHSS) in Idaho, USA.

Methods

The U.S. Environmental Protection Agency (EPA) in vitro bioavailability methodology was applied to archived BHSS soil and dust samples. Using age-specific biokinetic slope factors, we related bioavailable lead from these sources to children's blood lead levels (BLLs) monitored during cleanup from 1988 through 2002. Quantitative regression analyses and exposure assessment guidance were used to develop candidate soil/dust source partition scenarios estimating lead intake, allowing estimation of age-specific soil/dust ingestion rates. These ingestion rate and bioavailability estimates were simultaneously applied to the U.S. EPA Integrated Exposure Uptake Biokinetic Model for Lead in Children to determine those combinations best approximating observed BLLs.

Results

Absolute soil and house dust bioavailability averaged 33% (SD ± 4%) and 28% (SD ± 6%), respectively. Estimated BHSS age-specific soil/dust ingestion rates are 86-94 mg/day for 6-month- to 2-year-old children and 51-67 mg/day for 2- to 9-year-old children.

Conclusions

Soil/dust ingestion rate estimates for 1- to 9-year-old children at the BHSS are lower than those commonly used in human health risk assessment. A substantial component of children's exposure comes from sources beyond the immediate home environment.

Citation

von Lindern I, Spalinger S, Stifelman ML, Stanek LW, Bartrem C. 2016. Estimating children's soil/dust ingestion rates through retrospective analyses of blood lead biomonitoring from the Bunker Hill Superfund Site in Idaho. Environ Health Perspect 124:1462-1470; http://dx.doi.org/10.1289/ehp.1510144.",2016-01-08 +25437435,cellPACK: a virtual mesoscope to model and visualize structural systems biology.,"cellPACK assembles computational models of the biological mesoscale, an intermediate scale (10-100 nm) between molecular and cellular biology scales. cellPACK's modular architecture unites existing and novel packing algorithms to generate, visualize and analyze comprehensive three-dimensional models of complex biological environments that integrate data from multiple experimental systems biology and structural biology sources. cellPACK is available as open-source code, with tools for validation of models and with 'recipes' and models for five biological systems: blood plasma, cytoplasm, synaptic vesicles, HIV and a mycoplasma cell. We have applied cellPACK to model distributions of HIV envelope protein to test several hypotheses for consistency with experimental observations. Biologists, educators and outreach specialists can interact with cellPACK models, develop new recipes and perform packing experiments through scripting and graphical user interfaces at http://cellPACK.org/.",2014-12-01 +21992071,A formalized description of the standard human variant nomenclature in Extended Backus-Naur Form.,"

Background

The use of a standard human sequence variant nomenclature is advocated by the Human Genome Variation Society in order to unambiguously describe genetic variants in databases and literature. There is a clear need for tools that allow the mining of data about human sequence variants and their functional consequences from databases and literature. Existing text mining focuses on the recognition of protein variants and their effects. The recognition of variants at the DNA and RNA levels is essential for dissemination of variant data for diagnostic purposes. Development of new tools is hampered by the complexity of the current nomenclature, which requires processing at the character level to recognize the specific syntactic constructs used in variant descriptions.

Results

We approached the gene variant nomenclature as a scientific sublanguage and created two formal descriptions of the syntax in Extended Backus-Naur Form: one at the DNA-RNA level and one at the protein level. To ensure compatibility to older versions of the human sequence variant nomenclature, previously recommended variant description formats have been included. The first grammar versions were designed to help build variant description handling in the Alamut mutation interpretation software. The DNA and RNA level descriptions were then updated and used to construct the context-free parser of the Mutalyzer 2 sequence variant nomenclature checker, which has already been used to check more than one million variant descriptions.

Conclusions

The Extended Backus-Naur Form provided an overview of the full complexity of the syntax of the sequence variant nomenclature, which remained hidden in the textual format and the division of the recommendations across the DNA, RNA and protein sections of the Human Genome Variation Society nomenclature website (http://www.hgvs.org/mutnomen/). This insight into the syntax of the nomenclature could be used to design detailed and clear rules for software development. The Mutalyzer 2 parser demonstrated that it facilitated decomposition of complex variant descriptions into their individual parts. The Extended Backus-Naur Form or parts of it can be used or modified by adding rules, allowing the development of specific sequence variant text mining tools and other programs, which can generate or handle sequence variant descriptions.",2011-07-05 +24451033,A new approach to radial basis function approximation and its application to QSAR.,"We describe a novel approach to RBF approximation, which combines two new elements: (1) linear radial basis functions and (2) weighting the model by each descriptor's contribution. Linear radial basis functions allow one to achieve more accurate predictions for diverse data sets. Taking into account the contribution of each descriptor produces more accurate similarity values used for model development. The method was validated on 14 public data sets comprising nine physicochemical properties and five toxicity endpoints. We also compared the new method with five different QSAR methods implemented in the EPA T.E.S.T. program. Our approach, implemented in the program GUSAR, showed a reasonable accuracy of prediction and high coverage for all external test sets, providing more accurate prediction results than the comparison methods and even the consensus of these methods. Using our new method, we have created models for physicochemical and toxicity endpoints, which we have made freely available in the form of an online service at http://cactus.nci.nih.gov/chemical/apps/cap.",2014-02-28 +23788679,Novel approach for selecting the best predictor for identifying the binding sites in DNA binding proteins.,"Protein-DNA complexes play vital roles in many cellular processes by the interactions of amino acids with DNA. Several computational methods have been developed for predicting the interacting residues in DNA-binding proteins using sequence and/or structural information. These methods showed different levels of accuracies, which may depend on the choice of data sets used in training, the feature sets selected for developing a predictive model, the ability of the models to capture information useful for prediction or a combination of these factors. In many cases, different methods are likely to produce similar results, whereas in others, the predictors may return contradictory predictions. In this situation, a priori estimates of prediction performance applicable to the system being investigated would be helpful for biologists to choose the best method for designing their experiments. In this work, we have constructed unbiased, stringent and diverse data sets for DNA-binding proteins based on various biologically relevant considerations: (i) seven structural classes, (ii) 86 folds, (iii) 106 superfamilies, (iv) 194 families, (v) 15 binding motifs, (vi) single/double-stranded DNA, (vii) DNA conformation (A, B, Z, etc.), (viii) three functions and (ix) disordered regions. These data sets were culled as non-redundant with sequence identities of 25 and 40% and used to evaluate the performance of 11 different methods in which online services or standalone programs are available. We observed that the best performing methods for each of the data sets showed significant biases toward the data sets selected for their benchmark. Our analysis revealed important data set features, which could be used to estimate these context-specific biases and hence suggest the best method to be used for a given problem. We have developed a web server, which considers these features on demand and displays the best method that the investigator should use. The web server is freely available at http://www.biotech.iitm.ac.in/DNA-protein/. Further, we have grouped the methods based on their complexity and analyzed the performance. The information gained in this work could be effectively used to select the best method for designing experiments.",2013-06-20 +25122572,VSI: a visual saliency-induced index for perceptual image quality assessment.,"Perceptual image quality assessment (IQA) aims to use computational models to measure the image quality in consistent with subjective evaluations. Visual saliency (VS) has been widely studied by psychologists, neurobiologists, and computer scientists during the last decade to investigate, which areas of an image will attract the most attention of the human visual system. Intuitively, VS is closely related to IQA in that suprathreshold distortions can largely affect VS maps of images. With this consideration, we propose a simple but very effective full reference IQA method using VS. In our proposed IQA model, the role of VS is twofold. First, VS is used as a feature when computing the local quality map of the distorted image. Second, when pooling the quality score, VS is employed as a weighting function to reflect the importance of a local region. The proposed IQA index is called visual saliency-based index (VSI). Several prominent computational VS models have been investigated in the context of IQA and the best one is chosen for VSI. Extensive experiments performed on four large-scale benchmark databases demonstrate that the proposed IQA index VSI works better in terms of the prediction accuracy than all state-of-the-art IQA indices we can find while maintaining a moderate computational complexity. The MATLAB source code of VSI and the evaluation results are publicly available online at http://sse.tongji.edu.cn/linzhang/IQA/VSI/VSI.htm.",2014-08-07 +21635751,Binding site prediction for protein-protein interactions and novel motif discovery using re-occurring polypeptide sequences.,"

Background

While there are many methods for predicting protein-protein interaction, very few can determine the specific site of interaction on each protein. Characterization of the specific sequence regions mediating interaction (binding sites) is crucial for an understanding of cellular pathways. Experimental methods often report false binding sites due to experimental limitations, while computational methods tend to require data which is not available at the proteome-scale. Here we present PIPE-Sites, a novel method of protein specific binding site prediction based on pairs of re-occurring polypeptide sequences, which have been previously shown to accurately predict protein-protein interactions. PIPE-Sites operates at high specificity and requires only the sequences of query proteins and a database of known binary interactions with no binding site data, making it applicable to binding site prediction at the proteome-scale.

Results

PIPE-Sites was evaluated using a dataset of 265 yeast and 423 human interacting proteins pairs with experimentally-determined binding sites. We found that PIPE-Sites predictions were closer to the confirmed binding site than those of two existing binding site prediction methods based on domain-domain interactions, when applied to the same dataset. Finally, we applied PIPE-Sites to two datasets of 2347 yeast and 14,438 human novel interacting protein pairs predicted to interact with high confidence. An analysis of the predicted interaction sites revealed a number of protein subsequences which are highly re-occurring in binding sites and which may represent novel binding motifs.

Conclusions

PIPE-Sites is an accurate method for predicting protein binding sites and is applicable to the proteome-scale. Thus, PIPE-Sites could be useful for exhaustive analysis of protein binding patterns in whole proteomes as well as discovery of novel binding motifs. PIPE-Sites is available online at http://pipe-sites.cgmlab.org/.",2011-06-02 +26356015,Measure the Semantic Similarity of GO Terms Using Aggregate Information Content.,"The rapid development of gene ontology (GO) and huge amount of biomedical data annotated by GO terms necessitate computation of semantic similarity of GO terms and, in turn, measurement of functional similarity of genes based on their annotations. In this paper we propose a novel and efficient method to measure the semantic similarity of GO terms. The proposed method addresses the limitations in existing GO term similarity measurement techniques; it computes the semantic content of a GO term by considering the information content of all of its ancestor terms in the graph. The aggregate information content (AIC) of all ancestor terms of a GO term implicitly reflects the GO term's location in the GO graph and also represents how human beings use this GO term and all its ancestor terms to annotate genes. We show that semantic similarity of GO terms obtained by our method closely matches the human perception. Extensive experimental studies show that this novel method also outperforms all existing methods in terms of the correlation with gene expression data. We have developed web services for measuring semantic similarity of GO terms and functional similarity of genes using the proposed AIC method and other popular methods. These web services are available at http://bioinformatics.clemson.edu/G-SESAME.",2014-05-01 +24228832,Magnesium deficit ? overlooked cause of low vitamin D status?,"Like vitamin D deficit, magnesium deficit is considered to be a risk factor for cardiovascular disease. Several steps in the vitamin D metabolism, such as vitamin D binding to its transport protein and the conversion of vitamin D into the hormonal form 1,25-dihydroxyvitamin D by hepatic and renal hydroxylation, depend on magnesium as a cofactor. A new analysis of two National Health and Nutrition Examination Surveys data sets, published in BMC Medicine, investigated potential interactions between magnesium intake, circulating 25-hydroxyvitamin D, which is the generally accepted indicator of vitamin D status, and mortality. Data indicate a reduced risk of insufficient/deficient vitamin D status at high magnesium intake and an inverse association between circulating 25-hydroxyvitamin D and mortality, particularly cardiovascular mortality, among those with magnesium intake above the median. The study provides important findings concerning potential metabolic interactions between magnesium and vitamin D and its clinical relevance. However, results should be considered preliminary since biochemical data on individual magnesium status were lacking, confounding cannot be excluded and questions on the dose?response relationship still remain to be answered. Please see related research article: http://www.biomedcentral.com/1741-7015/11/187.",2013-10-24 +26722765,Introduction of the Python script STRinNGS for analysis of STR regions in FASTQ or BAM files and expansion of the Danish STR sequence database to 11 STRs.,"This work introduces the in-house developed Python application STRinNGS for analysis of STR sequence elements in BAM or FASTQ files. STRinNGS identifies sequence reads with STR loci by their flanking sequences, it analyses the STR sequence and the flanking regions, and generates a report with the assigned SNP-STR alleles. The main output file from STRinNGS contains all sequences with read counts above 1% of the total number of reads per locus. STR sequences are automatically named according to the nomenclature used previously and according to the repeat unit definitions in STRBase (http://www.cstl.nist.gov/strbase/). The sequences are named with (1) the locus name, (2) the length of the repeat region divided by the length of the repeat unit, (3) the sequence(s) of the repeat unit(s) followed by the number of repeats and (4) variations in the flanking regions. Lower case letters in the main output file are used to flag sequences with previously unknown variations in the STRs. SNPs in the flanking regions are named by their ""rs"" numbers and the nucleotides in the SNP position. Data from 207 Danes sequenced with the Ion Torrent™ HID STR 10-plex that amplified nine STRs (CSF1PO, D3S1358, D5S818, D7S820, D8S1179, D16S539, TH01, TPOX, vWA), and Amelogenin was analysed with STRinNGS. Sequencing uncovered five common SNPs near four STRs and revealed 20 new alleles in the 207 Danes. Three short homopolymers in the D8S1179 flanking regions caused frequent sequencing errors. In 29 of 3726 allele calls (0.8%), sequences with homopolymer errors were falsely assigned as true alleles. An in-house developed script in R compensated for these errors by compiling sequence reads that had identical STR sequences and identical nucleotides in the five common SNPs. In the output file from the R script, all SNP-STR haplotype calls were correct. The 207 samples and six additional samples were sequenced for D3S1358, D12S391, and D21S11 using the 454 GS Junior platform in this and a previous work. Overall, next generation sequencing (NGS) of the 11 STRs lowered the mean match probability 386 times and increased the typical paternity indexes (i.e. the geometric mean) for trios and duos 47 and 23 times, respectively, compared to the traditional PCR-CE typing of the same population.",2015-12-12 +25788623,Two-group comparisons of zero-inflated intensity values: the choice of test statistic matters.,"

Motivation

A special characteristic of data from molecular biology is the frequent occurrence of zero intensity values which can arise either by true absence of a compound or by a signal that is below a technical limit of detection.

Results

While so-called two-part tests compare mixture distributions between groups, one-part tests treat the zero-inflated distributions as left-censored. The left-inflated mixture model combines these two approaches. Both types of distributional assumptions and combinations of both are considered in a simulation study to compare power and estimation of log fold change. We discuss issues of application using an example from peptidomics.The considered tests generally perform best in scenarios satisfying their respective distributional assumptions. In the absence of distributional assumptions, the two-part Wilcoxon test or the empirical likelihood ratio test is recommended. Assuming a log-normal subdistribution the left-inflated mixture model provides estimates for the proportions of the two considered types of zero intensities.

Availability

R code is available at http://cemsiis.meduniwien.ac.at/en/kb/science-research/software/",2015-03-18 +26007653,StemChecker: a web-based tool to discover and explore stemness signatures in gene sets.,"Stem cells present unique regenerative abilities, offering great potential for treatment of prevalent pathologies such as diabetes, neurodegenerative and heart diseases. Various research groups dedicated significant effort to identify sets of genes-so-called stemness signatures-considered essential to define stem cells. However, their usage has been hindered by the lack of comprehensive resources and easy-to-use tools. For this we developed StemChecker, a novel stemness analysis tool, based on the curation of nearly fifty published stemness signatures defined by gene expression, RNAi screens, Transcription Factor (TF) binding sites, literature reviews and computational approaches. StemChecker allows researchers to explore the presence of stemness signatures in user-defined gene sets, without carrying-out lengthy literature curation or data processing. To assist in exploring underlying regulatory mechanisms, we collected over 80 target gene sets of TFs associated with pluri- or multipotency. StemChecker presents an intuitive graphical display, as well as detailed statistical results in table format, which helps revealing transcriptionally regulatory programs, indicating the putative involvement of stemness-associated processes in diseases like cancer. Overall, StemChecker substantially expands the available repertoire of online tools, designed to assist the stem cell biology, developmental biology, regenerative medicine and human disease research community. StemChecker is freely accessible at http://stemchecker.sysbiolab.eu.",2015-05-24 +23717618,VirusFinder: software for efficient and accurate detection of viruses and their integration sites in host genomes through next generation sequencing data.,"Next generation sequencing (NGS) technologies allow us to explore virus interactions with host genomes that lead to carcinogenesis or other diseases; however, this effort is largely hindered by the dearth of efficient computational tools. Here, we present a new tool, VirusFinder, for the identification of viruses and their integration sites in host genomes using NGS data, including whole transcriptome sequencing (RNA-Seq), whole genome sequencing (WGS), and targeted sequencing data. VirusFinder's unique features include the characterization of insertion loci of virus of arbitrary type in the host genome and high accuracy and computational efficiency as a result of its well-designed pipeline. The source code as well as additional data of VirusFinder is publicly available at http://bioinfo.mc.vanderbilt.edu/VirusFinder/.",2013-05-24 +26472075,Survey statistics of automated segmentations applied to optical imaging of mammalian cells.,"

Background

The goal of this survey paper is to overview cellular measurements using optical microscopy imaging followed by automated image segmentation. The cellular measurements of primary interest are taken from mammalian cells and their components. They are denoted as two- or three-dimensional (2D or 3D) image objects of biological interest. In our applications, such cellular measurements are important for understanding cell phenomena, such as cell counts, cell-scaffold interactions, cell colony growth rates, or cell pluripotency stability, as well as for establishing quality metrics for stem cell therapies. In this context, this survey paper is focused on automated segmentation as a software-based measurement leading to quantitative cellular measurements.

Methods

We define the scope of this survey and a classification schema first. Next, all found and manually filteredpublications are classified according to the main categories: (1) objects of interests (or objects to be segmented), (2) imaging modalities, (3) digital data axes, (4) segmentation algorithms, (5) segmentation evaluations, (6) computational hardware platforms used for segmentation acceleration, and (7) object (cellular) measurements. Finally, all classified papers are converted programmatically into a set of hyperlinked web pages with occurrence and co-occurrence statistics of assigned categories.

Results

The survey paper presents to a reader: (a) the state-of-the-art overview of published papers about automated segmentation applied to optical microscopy imaging of mammalian cells, (b) a classification of segmentation aspects in the context of cell optical imaging, (c) histogram and co-occurrence summary statistics about cellular measurements, segmentations, segmented objects, segmentation evaluations, and the use of computational platforms for accelerating segmentation execution, and (d) open research problems to pursue.

Conclusions

The novel contributions of this survey paper are: (1) a new type of classification of cellular measurements and automated segmentation, (2) statistics about the published literature, and (3) a web hyperlinked interface to classification statistics of the surveyed papers at https://isg.nist.gov/deepzoomweb/resources/survey/index.html.",2015-10-15 +23259851,Unlocking the potential of publicly available microarray data using inSilicoDb and inSilicoMerging R/Bioconductor packages.,"

Background

With an abundant amount of microarray gene expression data sets available through public repositories, new possibilities lie in combining multiple existing data sets. In this new context, analysis itself is no longer the problem, but retrieving and consistently integrating all this data before delivering it to the wide variety of existing analysis tools becomes the new bottleneck.

Results

We present the newly released inSilicoMerging R/Bioconductor package which, together with the earlier released inSilicoDb R/Bioconductor package, allows consistent retrieval, integration and analysis of publicly available microarray gene expression data sets. Inside the inSilicoMerging package a set of five visual and six quantitative validation measures are available as well.

Conclusions

By providing (i) access to uniformly curated and preprocessed data, (ii) a collection of techniques to remove the batch effects between data sets from different sources, and (iii) several validation tools enabling the inspection of the integration process, these packages enable researchers to fully explore the potential of combining gene expression data for downstream analysis. The power of using both packages is demonstrated by programmatically retrieving and integrating gene expression studies from the InSilico DB repository [https://insilicodb.org/app/].",2012-12-24 +25199088,Hematologic and serum biochemical values of 4 species of Peromyscus mice and their hybrids.,"Deer mice (Peromyscus maniculatus) and congeneric species are used in a wide variety of research applications, particularly studies of developmental, physiologic, and behavioral characteristics associated with habitat adaptation and speciation. Because peromyscine mice readily adapt to colony conditions, animals with traits of interest in the field are moved easily into the laboratory where they can be studied under controlled conditions. The purpose of this study was to determine the serum chemistry and hematologic parameters of 4 frequently used species from the Peromyscus Genetic Stock Center species (P. californicus, P. leucopus, P. maniculatus, and P. polionotus) and to determine quantitative differences in these parameters among species and between sexes. Triglyceride values were substantially higher in female compared with male mice in all 4 species. Similar cross-species differences in MCH were present. Overall there was considerable interspecific variation for most blood parameters, with little evidence for covariation of any 2 or more parameters. Because crosses of P. maniculatus and P. polionotus produce fertile offspring, segregation analyses can be applied to determine the genetic basis of any traits that differ between them, such as their 3.8- and 2.1-fold interspecific differences in cholesterol and triglyceride levels, respectively. The current data provide a set of baseline values useful for subsequent comparative studies of species experiencing different circumstances, whether due to natural variation or anthropogenic environmental degradation. To enable such comparisons, the raw data are downloadable from a site maintained by the Stock Center (http://ww2.biol.sc.edu/∼peromyscus).",2014-07-01 +24655548,Analysis of growth factor signaling in genetically diverse breast cancer lines.,"

Background

Soluble growth factors present in the microenvironment play a major role in tumor development, invasion, metastasis, and responsiveness to targeted therapies. While the biochemistry of growth factor-dependent signal transduction has been studied extensively in individual cell types, relatively little systematic data are available across genetically diverse cell lines.

Results

We describe a quantitative and comparative dataset focused on immediate-early signaling that regulates the AKT (AKT1/2/3) and ERK (MAPK1/3) pathways in a canonical panel of well-characterized breast cancer lines. We also provide interactive web-based tools to facilitate follow-on analysis of the data. Our findings show that breast cancers are diverse with respect to ligand sensitivity and signaling biochemistry. Surprisingly, triple negative breast cancers (TNBCs; which express low levels of ErbB2, progesterone and estrogen receptors) are the most broadly responsive to growth factors and HER2amp cancers (which overexpress ErbB2) the least. The ratio of ERK to AKT activation varies with ligand and subtype, with a systematic bias in favor of ERK in hormone receptor positive (HR+) cells. The factors that correlate with growth factor responsiveness depend on whether fold-change or absolute activity is considered the key biological variable, and they differ between ERK and AKT pathways.

Conclusions

Responses to growth factors are highly diverse across breast cancer cell lines, even within the same subtype. A simple four-part heuristic suggests that diversity arises from variation in receptor abundance, an ERK/AKT bias that depends on ligand identity, a set of factors common to all receptors that varies in abundance or activity with cell line, and an ""indirect negative regulation"" by ErbB2. This analysis sets the stage for the development of a mechanistic and predictive model of growth factor signaling in diverse cancer lines. Interactive tools for looking up these results and downloading raw data are available at http://lincs.hms.harvard.edu/niepel-bmcbiol-2014/.",2014-03-21 +24710290,The evolution of protein structures and structural ensembles under functional constraint.,"Protein sequence, structure, and function are inherently linked through evolution and population genetics. Our knowledge of protein structure comes from solved structures in the Protein Data Bank (PDB), our knowledge of sequence through sequences found in the NCBI sequence databases (http://www.ncbi.nlm.nih.gov/), and our knowledge of function through a limited set of in-vitro biochemical studies. How these intersect through evolution is described in the first part of the review. In the second part, our understanding of a series of questions is addressed. This includes how sequences evolve within structures, how evolutionary processes enable structural transitions, how the folding process can change through evolution and what the fitness impacts of this might be. Moving beyond static structures, the evolution of protein kinetics (including normal modes) is discussed, as is the evolution of conformational ensembles and structurally disordered proteins. This ties back to a question of the role of neostructuralization and how it relates to selection on sequences for functions. The relationship between metastability, the fitness landscape, sequence divergence, and organismal effective population size is explored. Lastly, a brief discussion of modeling the evolution of sequences of ordered and disordered proteins is entertained.",2011-10-28 +26255308,MESSI: metabolic engineering target selection and best strain identification tool. ,"Metabolic engineering and synthetic biology are synergistically related fields for manipulating target pathways and designing microorganisms that can act as chemical factories. Saccharomyces cerevisiae's ideal bioprocessing traits make yeast a very attractive chemical factory for production of fuels, pharmaceuticals, nutraceuticals as well as a wide range of chemicals. However, future attempts of engineering S. cerevisiae's metabolism using synthetic biology need to move towards more integrative models that incorporate the high connectivity of metabolic pathways and regulatory processes and the interactions in genetic elements across those pathways and processes. To contribute in this direction, we have developed Metabolic Engineering target Selection and best Strain Identification tool (MESSI), a web server for predicting efficient chassis and regulatory components for yeast bio-based production. The server provides an integrative platform for users to analyse ready-to-use public high-throughput metabolomic data, which are transformed to metabolic pathway activities for identifying the most efficient S. cerevisiae strain for the production of a compound of interest. As input MESSI accepts metabolite KEGG IDs or pathway names. MESSI outputs a ranked list of S. cerevisiae strains based on aggregation algorithms. Furthermore, through a genome-wide association study of the metabolic pathway activities with the strains' natural variation, MESSI prioritizes genes and small variants as potential regulatory points and promising metabolic engineering targets. Users can choose various parameters in the whole process such as (i) weight and expectation of each metabolic pathway activity in the final ranking of the strains, (ii) Weighted AddScore Fuse or Weighted Borda Fuse aggregation algorithm, (iii) type of variants to be included, (iv) variant sets in different biological levels.Database URL: http://sbb.hku.hk/MESSI/.",2015-08-08 +25616741,Simple finite element methods for approximating predator-prey dynamics in two dimensions using MATLAB.,"We describe simple finite element schemes for approximating spatially extended predator-prey dynamics with the Holling type II functional response and logistic growth of the prey. The finite element schemes generalize 'Scheme 1' in the paper by Garvie (Bull Math Biol 69(3):931-956, 2007). We present user-friendly, open-source MATLAB code for implementing the finite element methods on arbitrary-shaped two-dimensional domains with Dirichlet, Neumann, Robin, mixed Robin-Neumann, mixed Dirichlet-Neumann, and Periodic boundary conditions. Users can download, edit, and run the codes from http://www.uoguelph.ca/~mgarvie/ . In addition to discussing the well posedness of the model equations, the results of numerical experiments are presented and demonstrate the crucial role that habitat shape, initial data, and the boundary conditions play in determining the spatiotemporal dynamics of predator-prey interactions. As most previous works on this problem have focussed on square domains with standard boundary conditions, our paper makes a significant contribution to the area.",2015-01-24 +25763178,"Similarity-based search of model organism, disease and drug effect phenotypes.","

Background

Semantic similarity measures over phenotype ontologies have been demonstrated to provide a powerful approach for the analysis of model organism phenotypes, the discovery of animal models of human disease, novel pathways, gene functions, druggable therapeutic targets, and determination of pathogenicity.

Results

We have developed PhenomeNET 2, a system that enables similarity-based searches over a large repository of phenotypes in real-time. It can be used to identify strains of model organisms that are phenotypically similar to human patients, diseases that are phenotypically similar to model organism phenotypes, or drug effect profiles that are similar to the phenotypes observed in a patient or model organism. PhenomeNET 2 is available at http://aber-owl.net/phenomenet.

Conclusions

Phenotype-similarity searches can provide a powerful tool for the discovery and investigation of molecular mechanisms underlying an observed phenotypic manifestation. PhenomeNET 2 facilitates user-defined similarity searches and allows researchers to analyze their data within a large repository of human, mouse and rat phenotypes.",2015-02-19 +23018606,"Assessment and management of pain, with particular emphasis on central neuropathic pain, in moderate to severe dementia.","In patients with dementia, undertreatment of pain, irrespective of its aetiology, is widely recognized; the risk for undertreatment increases with the severity of dementia. We argue, however, that central neuropathic pain is by far the most undertreated type of pain in patients with dementia. Central pain is a type of neuropathic pain that is known to occur in stroke patients and is caused by white matter lesions. Although white matter lesions are also a neuropathological hallmark of dementia, central neuropathic pain has hardly been described in dementia. Therefore, the goal of this review was to address assessment and management of pain, with particular emphasis on central neuropathic pain, in moderate to severe dementia. Concerning pain assessment, the findings of this review suggest that self-report pain rating scales, in particular the Verbal Rating Scale, the Horizontal Visual Analogue Scale and the Faces Pain Scale can be administered to patients in a more advanced stage of dementia. For those who are no longer able to communicate pain, pain observation scales are most appropriate. Self-report and pain observation should be combined, if possible. For an overview of assessment tools to measure pain with older people unable to verbally communicate, we refer readers to the City of Hope Pain and Palliative Care Resource Center ( http://prc.coh.org/PAIN-NOA.htm ). The review further highlights that behavioural disturbances, e.g. agitation and physical inactivity, as well as autonomic responses, e.g. an increase in blood pressure and heart rate, may contribute to a more reliable assessment of pain. With respect to central neuropathic pain in particular, assessment of sensory abilities (touch, pinprick, temperature and vibration), mood (e.g. anxiety) and determination of the presence of a Babinsky reflex, accelerated tendon reflexes, and spasticity may contribute to reliable assessment. Management of pain, not of a central origin, starts with paracetamol (acetaminophen), which, together with opioids, is the most frequently prescribed analgesic drug in dementia. Non-steroidal anti-inflammatory drugs are hardly prescribed in a residential setting. Some authors advise starting treatment with a low dose of opioids. Antidepressants and antiepileptic drugs appear to have a positive effect on central neuropathic pain. In the review, advantages and disadvantages of amitriptyline, carbamazepine, lamotrigine, gabapentin and pregabalin are discussed; a negative effect of these drugs on liver and kidney functions, as well as on cognitive functions in patients who already suffer from cognitive impairment is highlighted. Next to pharmacotherapy, non-pharmacological treatment strategies such as transcutaneous electrical nerve stimulation may be effective as long as afferent pathways transmitting the electrical stimulus are still intact.",2012-09-01 +25520927,Cross-species identification of in silico microsatellite biomarkers for genetic disease.,"Microsatellites appear widely in genomes of diverse species. Variants of repeat number of microsatellites often correlate with risks of genetic disorder or severity of diseases. Using cross-species comparison, the proposed system comprehensively verifies microsatellites of specific genes related to 16 genetic disorders. Genomic information retrieved from 14 frequently used model organisms in biomedical study was thoroughly analyzed, emphasizing conserved and diverse traits. Features of microsatellite sequences among different organisms, including appearing frequency, position, pattern and distribution, could be determined automatically for stating genetically functional conservation and evolutionary correlation. This research found that among mammals and fishes, the microsatellite sequences are conserved in the genes of epidermal growth factor receptor, ataxia telangiectasia mutated and androgen receptor corresponding to cancers, ataxia telangiectasia and hepatocellular carcinoma, respectively. Still, except fruit fly conserved CAG repeats in Huntington and Spinocerebellar ataxia type 2 genes, no microsatellites were conserved in those genes linked to neurological/neurodegenerative disorders among mammal and fish species. In comparison of mammalian species, microsatellite biomarkers identified from 17 genetic disorder-related genes revealed high repeat conservation, especially in human, gorilla and macaque. Obviously, this comparative analysis illustrates microsatellite repeats affecting genetic disorders, highly correlated to evolutionary distance of species. Chief contribution of this in silico research lies in assisting biologists to identify disease-related microsatellite biomarkers and employ appropriate model organisms for further biomedical studies relying on microsatellite conservation information. Database http://ssrtc.cs.ntou.edu.tw is for academic use.",2014-08-02 +24330589,Renal mucinous tubular and spindle cell carcinoma: a report of 8 cases and review of the literature.,"

Background

Mucinous tubular and spindle cell carcinoma of kidney (MTSCC-K) is a rare variant of renal tumor. The current data show most of MTSCCs are of low malignant potential and rare cases metastatic to lymph nodes have been reported; however, the recorded computed tomography (CT) and follow up data are limited.

Material and method

In the present study, we retrospectively analyzed CT and clinicopathological data of eight patients with renal MTSCC-K.

Results

A total of eight cases, including six females and two males, were included in this analysis with a mean age of 48.4 (range 25 to 81) years. Mean tumor size was 4.2 (range 2.5 to 10.0) cm. Preoperative CT demonstrated that all tumors were slightly enhanced on both corticomedullary and nephrographic phase, which was different from many other renal cell carcinomas. Three of them were treated with open radical nephrectomy, three with laparoscopic radical nephrectomy and the other two with laparoscopic partial nephrectomy. No postoperative therapy was applied. Patients were followed up for 15 to 64 months and there was no evidence of recurrence and metastasis.

Conclusions

The MTSCC-K has special clinicopathological characteristics, low degree of malignancy and relative good prognosis. The diagnosis mainly depends on the histopathological examination and CT may help to differentiate with papillary renal cell carcinoma. Surgical treatment is recommended and additional therapies are not necessary.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/8435581771088249.",2013-12-11 +24974203,Efficient initial volume determination from electron microscopy images of single particles.,"

Motivation

Structural information of macromolecular complexes provides key insights into the way they carry out their biological functions. The reconstruction process leading to the final 3D map requires an approximate initial model. Generation of an initial model is still an open and challenging problem in single-particle analysis.

Results

We present a fast and efficient approach to obtain a reliable, low-resolution estimation of the 3D structure of a macromolecule, without any a priori knowledge, addressing the well-known issue of initial volume estimation in the field of single-particle analysis. The input of the algorithm is a set of class average images obtained from individual projections of a biological object at random and unknown orientations by transmission electron microscopy micrographs. The proposed method is based on an initial non-lineal dimensionality reduction approach, which allows to automatically selecting representative small sets of class average images capturing the most of the structural information of the particle under study. These reduced sets are then used to generate volumes from random orientation assignments. The best volume is determined from these guesses using a random sample consensus (RANSAC) approach. We have tested our proposed algorithm, which we will term 3D-RANSAC, with simulated and experimental data, obtaining satisfactory results under the low signal-to-noise conditions typical of cryo-electron microscopy.

Availability

The algorithm is freely available as part of the Xmipp 3.1 package [http://xmipp.cnb.csic.es].

Contact

jvargas@cnb.csic.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-27 +24336413,HAMMER: automated operation of mass frontier to construct in silico mass spectral fragmentation libraries.,"

Summary

Experimental MS(n) mass spectral libraries currently do not adequately cover chemical space. This limits the robust annotation of metabolites in metabolomics studies of complex biological samples. In silico fragmentation libraries would improve the identification of compounds from experimental multistage fragmentation data when experimental reference data are unavailable. Here, we present a freely available software package to automatically control Mass Frontier software to construct in silico mass spectral libraries and to perform spectral matching. Based on two case studies, we have demonstrated that high-throughput automation of Mass Frontier allows researchers to generate in silico mass spectral libraries in an automated and high-throughput fashion with little or no human intervention required.

Availability and implementation

Documentation, examples, results and source code are available at http://www.biosciences-labs.bham.ac.uk/viant/hammer/.",2013-12-11 +25081485,Regional and national guideline recommendations for digital ano-rectal examination as a means for anal cancer screening in HIV positive men who have sex with men: a systematic review.,"

Background

Although anal cancer is common in HIV positive men who have sex with men, few centres offer systematic screening. Regular digital ano-rectal examination (DARE) is a type of screening that has been recommended by some experts. How widely this forms part of HIV management guidelines is unclear.

Methods

The protocol was registered prospectively (CRD42013005188; http://www.crd.york.ac.uk/PROSPERO/). We systematically reviewed 121 regional and national HIV guidelines and searched for guidelines from http://hivinsite.ucsf.edu/global?page=cr-00-04#SauguidelineX, PubMed and Web of Science databases up to 5th August 2013 for recommendations of DARE as a means of anal cancer screening in HIV positive MSM. Guidelines were examined in detail if they were clinical guidelines, including both prevention and treatment protocols and were in English. Guidelines were excluded if they were restricted to limited areas (e.g. antiretroviral therapy only, children or pregnant women, strategies for prevention/testing). Information was extracted regarding recommendation of DARE as a screening method, the frequency of DARE recommended, target population for screening and the strength of evidence supporting this.

Results

30 regional and national guidelines were included and examined in detail. Only 2 recommended DARE. The 'European AIDS Clinical Society Guidelines' recommends DARE every 1-3 years for HIV positive MSM whilst the 'US Guideline for prevention and treatment of opportunistic infections in HIV-infected adults and adolescents' recommends an annual DARE for the HIV + population in general. None of these guidelines specify the age of commencing screening. In each case, the highest level of evidence supporting these two recommendations was expert opinion.

Conclusions

Few HIV guidelines discuss or recommend DARE as a means of anal cancer screening. Studies of the efficacy, acceptability and cost-effectiveness of DARE are needed to assess its role in anal cancer screening.",2014-08-01 +24796702,Mutation update for GNE gene variants associated with GNE myopathy.,"The GNE gene encodes the rate-limiting, bifunctional enzyme of sialic acid biosynthesis, uridine diphosphate-N-acetylglucosamine 2-epimerase/N-acetylmannosamine kinase (GNE). Biallelic GNE mutations underlie GNE myopathy, an adult-onset progressive myopathy. GNE myopathy-associated GNE mutations are predominantly missense, resulting in reduced, but not absent, GNE enzyme activities. The exact pathomechanism of GNE myopathy remains unknown, but likely involves aberrant (muscle) sialylation. Here, we summarize 154 reported and novel GNE variants associated with GNE myopathy, including 122 missense, 11 nonsense, 14 insertion/deletions, and seven intronic variants. All variants were deposited in the online GNE variation database (http://www.dmd.nl/nmdb2/home.php?select_db=GNE). We report the predicted effects on protein function of all variants well as the predicted effects on epimerase and/or kinase enzymatic activities of selected variants. By analyzing exome sequence databases, we identified three frequently occurring, unreported GNE missense variants/polymorphisms, important for future sequence interpretations. Based on allele frequencies, we estimate the world-wide prevalence of GNE myopathy to be ∼4-21/1,000,000. This previously unrecognized high prevalence confirms suspicions that many patients may escape diagnosis. Awareness among physicians for GNE myopathy is essential for the identification of new patients, which is required for better understanding of the disorder's pathomechanism and for the success of ongoing treatment trials.",2014-08-01 +24489955,Citrus sinensis annotation project (CAP): a comprehensive database for sweet orange genome.,"Citrus is one of the most important and widely grown fruit crop with global production ranking firstly among all the fruit crops in the world. Sweet orange accounts for more than half of the Citrus production both in fresh fruit and processed juice. We have sequenced the draft genome of a double-haploid sweet orange (C. sinensis cv. Valencia), and constructed the Citrus sinensis annotation project (CAP) to store and visualize the sequenced genomic and transcriptome data. CAP provides GBrowse-based organization of sweet orange genomic data, which integrates ab initio gene prediction, EST, RNA-seq and RNA-paired end tag (RNA-PET) evidence-based gene annotation. Furthermore, we provide a user-friendly web interface to show the predicted protein-protein interactions (PPIs) and metabolic pathways in sweet orange. CAP provides comprehensive information beneficial to the researchers of sweet orange and other woody plants, which is freely available at http://citrus.hzau.edu.cn/.",2014-01-28 +22102575,YeTFaSCo: a database of evaluated yeast transcription factor sequence specificities.,"The yeast Saccharomyces cerevisiae is a prevalent system for the analysis of transcriptional networks. As a result, multiple DNA-binding sequence specificities (motifs) have been derived for most yeast transcription factors (TFs). However, motifs from different studies are often inconsistent with each other, making subsequent analyses complicated and confusing. Here, we have created YeTFaSCo (The Yeast Transcription Factor Specificity Compendium, http://yetfasco.ccbr.utoronto.ca/), an extensive collection of S. cerevisiae TF specificities. YeTFaSCo differs from related databases by being more comprehensive (including 1709 motifs for 256 proteins or protein complexes), and by evaluating the motifs using multiple objective quality metrics. The metrics include correlation between motif matches and ChIP-chip data, gene expression patterns, and GO terms, as well as motif agreement between different studies. YeTFaSCo also features an index of 'expert-curated' motifs, each associated with a confidence assessment. In addition, the database website features tools for motif analysis, including a sequence scanning function and precomputed genome-browser tracks of motif occurrences across the entire yeast genome. Users can also search the database for motifs that are similar to a query motif.",2011-11-18 +26252397,Smoke Rings: Towards a Comprehensive Tobacco Free Policy for the Olympic Games.,"

Background

The tobacco industry has long sought affiliation with major sporting events, including the Olympic Games, for marketing, advertising and promotion purposes. Since 1988, each Olympic Games has adopted a tobacco-free policy. Limited study of the effectiveness of the smoke-free policy has been undertaken to date, with none examining the tobacco industry's involvement with the Olympics or use of the Olympic brand.

Methods and findings

A comparison of the contents of Olympic tobacco-free policies from 1988 to 2014 was carried out by searching the websites of the IOC and host NOCs. The specific tobacco control measures adopted for each Games were compiled and compared with measures recommended by the WHO Tobacco Free Sports Initiative and Article 13 of the Framework Convention on Tobacco Control (FCTC). This was supported by semi-structured interviews of key informants involved with the adoption of tobacco-free policies for selected games. To understand the industry's interests in the Olympics, the Legacy Tobacco Documents Library (http://legacy.library.ucsf.edu) was systematically searched between June 2013 and August 2014. Company websites, secondary sources and media reports were also searched to triangulate the above data sources. This paper finds that, while most direct associations between tobacco and the Olympics have been prohibited since 1988, a variety of indirect associations undermine the Olympic tobacco-free policy. This is due to variation in the scope of tobacco-free policies, limited jurisdiction and continued efforts by the industry to be associated with Olympic ideals.

Conclusions

The paper concludes that, compatible with the IOC's commitment to promoting healthy lifestyles, a comprehensive tobacco-free policy with standardized and binding measures should be adopted by the International Olympic Committee and all national Olympic committees.",2015-08-07 +26674271,SLiMScape 3.x: a Cytoscape 3 app for discovery of Short Linear Motifs in protein interaction networks.,"Short linear motifs (SLiMs) are small protein sequence patterns that mediate a large number of critical protein-protein interactions, involved in processes such as complex formation, signal transduction, localisation and stabilisation. SLiMs show rapid evolutionary dynamics and are frequently the targets of molecular mimicry by pathogens. Identifying enriched sequence patterns due to convergent evolution in non-homologous proteins has proven to be a successful strategy for computational SLiM prediction. Tools of the SLiMSuite package use this strategy, using a statistical model to identify SLiM enrichment based on the evolutionary relationships, amino acid composition and predicted disorder of the input proteins. The quality of input data is critical for successful SLiM prediction. Cytoscape provides a user-friendly, interactive environment to explore interaction networks and select proteins based on common features, such as shared interaction partners. SLiMScape embeds tools of the SLiMSuite package for de novo SLiM discovery (SLiMFinder and QSLiMFinder) and identifying occurrences/enrichment of known SLiMs (SLiMProb) within this interactive framework. SLiMScape makes it easier to (1) generate high quality hypothesis-driven datasets for these tools, and (2) visualise predicted SLiM occurrences within the context of the network. To generate new predictions, users can select nodes from a protein network or provide a set of Uniprot identifiers. SLiMProb also requires additional query motif input. Jobs are then run remotely on the SLiMSuite server ( http://rest.slimsuite.unsw.edu.au) for subsequent retrieval and visualisation. SLiMScape can also be used to retrieve and visualise results from jobs run directly on the server. SLiMScape and SLiMSuite are open source and freely available via GitHub under GNU licenses.",2015-08-05 +23193259,The RCSB Protein Data Bank: new resources for research and education.,"The Research Collaboratory for Structural Bioinformatics Protein Data Bank (RCSB PDB) develops tools and resources that provide a structural view of biology for research and education. The RCSB PDB web site (http://www.rcsb.org) uses the curated 3D macromolecular data contained in the PDB archive to offer unique methods to access, report and visualize data. Recent activities have focused on improving methods for simple and complex searches of PDB data, creating specialized access to chemical component data and providing domain-based structural alignments. New educational resources are offered at the PDB-101 educational view of the main web site such as Author Profiles that display a researcher's PDB entries in a timeline. To promote different kinds of access to the RCSB PDB, Web Services have been expanded, and an RCSB PDB Mobile application for the iPhone/iPad has been released. These improvements enable new opportunities for analyzing and understanding structure data.",2012-11-27 +25073475,MutaCYP: Classification of missense mutations in human cytochromes P450.,"

Background

Cytochrome P450 monooxygenases (CYPs) represent a large and diverse family of enzymes involved in various biological processes in humans. Individual genome sequencing has revealed multiple mutations in human CYPs, and many missense mutations have been associated with variety of diseases. Since 3D structures are not resolved for most human CYPs, there is a need for a reliable sequence-based prediction that discriminates benign and disease causing mutations.

Methods

A new prediction method (MutaCYP) has been developed for scoring de novo missense mutations to have a deleterious effect. The method utilizes only five features, all of which are sequence-based: predicted relative solvent accessibility (RSA), variance of predicted RSA among the residues in close sequence proximity, Z-score of Shannon entropy for a given position, difference in similarity scores and weighted difference in size between wild type and new amino acids. The method is based on a single neural network.

Results

MutaCYP achieves MCC = 0.70, Q2 = 88.52%, Recall = 93.40% with Precision = 91.09%, and AUC = 0.909. Comparative evaluation with other existing methods indicates that MutaCYP outperforms SIFT and PolyPhen-2. Predictions by MutaCYP appear to be orthogonal to predictions by the evaluated methods. Potential issues on reliability of annotations of mutations in the existing databases are discussed.

Conclusions

A new accurate method, MutaCYP, for classification of missense mutations in human CYPs is presented. The prediction model consists of only five sequence-based features, including a real-valued predicted relative solvent accessibility. The method is publicly available at http://research.cchmc.org/MutaSense/.",2014-07-30 +24356726,"Austrian National CathLab Registry (ANCALAR): cardiac catheterization, coronary angiography (CA), and percutaneous coronary intervention (PCI) in Austria during the year 2011 (Registry Data with Audit including 2012).","Concerning international comparison for the year 2011, Austria is situated under the top nations with 6,383 diagnostic coronary angiographies (CA), 2,407 percutaneous coronary interventions (PCI), and 47 transarterial aortic valve implantations (TAVI) per 1 million inhabitants in Europe. Although the number of TAVI increases rapidly since its first introduction in 2007 (47 TAVI per 1 million inhabitants in 2011, not including surgical cases from the transapical route), the data for CA and PCI remained constant during the past years.The rates of stent (91%) and drug-eluting stent implantations (78% of stents) also remained constant on a high level. Little fluctuation is also reflected in the complication data (including mortality evaluation). An increased morality is well known, especially in patients with the so-called ST-segment elevation myocardial infarction and consecutive shock (19-35% in the past years).The application of certain special devices increased (clot catcher) or decreased (glycoprotein IIb/IIIa receptor antagonist) in 2011 or were finally unused (Laser).Interestingly, not only in Austria, it was observed several times that scientific knowledge, recommended as Class I Indications in the guidelines, takes several years to establish itself nationwide.Our independent, purely academic activity is located in the area of health services research, and has also the option to generate benchmarks for individual centers. Participation in our surveys is voluntary. Since 1992, every year, without interruption (no missing center!), 90-100 parameters are applicable. The questionnaire will be optimized and adapted to current conditions. This is done in cooperation with the participating centers. To provide comparability, we make only minimal and absolutely most necessary modifications.The data are collected and summarized at the end of the year by each center itself. During the year, the centers are visited to perform audits and to keep personal contact to them.The data for 2011 were presented in Linz (November 23, 2012) at the autumn meeting of the working group ""Interventional Cardiology of the Austrian Society of Cardiology"" (ÖKG), as a basis for discussion. The presentation can be viewed by using private access code to the ÖKG video presentation page ( http://oekg.medroom.at/ ); the publication will also be placed under the website http://iik.i-med.ac.at.",2013-12-20 +25120561,HumanViCe: host ceRNA network in virus infected cells in human.,"Host-virus interaction via host cellular components has been an important field of research in recent times. RNA interference mediated by short interfering RNAs and microRNAs (miRNA), is a widespread anti-viral defense strategy. Importantly, viruses also encode their own miRNAs. In recent times miRNAs were identified as key players in host-virus interaction. Furthermore, viruses were shown to exploit the host miRNA networks to suite their own need. The complex cross-talk between host and viral miRNAs and their cellular and viral targets forms the environment for viral pathogenesis. Apart from protein-coding mRNAs, non-coding RNAs may also be targeted by host or viral miRNAs in virus infected cells, and viruses can exploit the host miRNA mediated gene regulatory network via the competing endogenous RNA effect. A recent report showed that viral U-rich non-coding RNAs called HSUR, expressed in primate virus herpesvirus saimiri (HVS) infected T cells, were able to bind to three host miRNAs, causing significant alteration in cellular level for one of the miRNAs. We have predicted protein coding and non protein-coding targets for viral and human miRNAs in virus infected cells. We identified viral miRNA targets within host non-coding RNA loci from AGO interacting regions in three different virus infected cells. Gene ontology (GO) and pathway enrichment analysis of the genes comprising the ceRNA networks in the virus infected cells revealed enrichment of key cellular signaling pathways related to cell fate decisions and gene transcription, like Notch and Wnt signaling pathways, as well as pathways related to viral entry, replication and virulence. We identified a vast number of non-coding transcripts playing as potential ceRNAs to the immune response associated genes; e.g., APOBEC family genes, in some virus infected cells. All these information are compiled in HumanViCe (http://gyanxet-beta.com/humanvice), a comprehensive database that provides the potential ceRNA networks in virus infected human cells.",2014-07-29 +25344496,MP-GeneticSynth: inferring biological network regulations from time series.,"MP-GeneticSynth is a Java tool for discovering the logic and regulation mechanisms responsible for observed biological dynamics in terms of finite difference recurrent equations. The software makes use of: (i) metabolic P systems as a modeling framework, (ii) an evolutionary approach to discover flux regulation functions as linear combinations of given primitive functions, (iii) a suitable reformulation of the least squares method to estimate function parameters considering simultaneously all the reactions involved in complex dynamics. The tool is available as a plugin for the virtual laboratory MetaPlab. It has graphical and interactive interfaces for data preparation, a priori knowledge integration, and flux regulator analysis. Availability and implementation: Source code, binaries, documentation (including quick start guide and videos) and case studies are freely available at http://mplab.sci.univr.it/plugins/mpgs/index.html.",2014-10-24 +27158191,"Probability Distributome: A Web Computational Infrastructure for Exploring the Properties, Interrelations, and Applications of Probability Distributions.","Probability distributions are useful for modeling, simulation, analysis, and inference on varieties of natural processes and physical phenomena. There are uncountably many probability distributions. However, a few dozen families of distributions are commonly defined and are frequently used in practice for problem solving, experimental applications, and theoretical studies. In this paper, we present a new computational and graphical infrastructure, the Distributome, which facilitates the discovery, exploration and application of diverse spectra of probability distributions. The extensible Distributome infrastructure provides interfaces for (human and machine) traversal, search, and navigation of all common probability distributions. It also enables distribution modeling, applications, investigation of inter-distribution relations, as well as their analytical representations and computational utilization. The entire Distributome framework is designed and implemented as an open-source, community-built, and Internet-accessible infrastructure. It is portable, extensible and compatible with HTML5 and Web2.0 standards (http://Distributome.org). We demonstrate two types of applications of the probability Distributome resources: computational research and science education. The Distributome tools may be employed to address five complementary computational modeling applications (simulation, data-analysis and inference, model-fitting, examination of the analytical, mathematical and computational properties of specific probability distributions, and exploration of the inter-distributional relations). Many high school and college science, technology, engineering and mathematics (STEM) courses may be enriched by the use of modern pedagogical approaches and technology-enhanced methods. The Distributome resources provide enhancements for blended STEM education by improving student motivation, augmenting the classical curriculum with interactive webapps, and overhauling the learning assessment protocols.",2015-06-26 +21256977,AskHERMES: An online question answering system for complex clinical questions.,"

Objective

Clinical questions are often long and complex and take many forms. We have built a clinical question answering system named AskHERMES to perform robust semantic analysis on complex clinical questions and output question-focused extractive summaries as answers.

Design

This paper describes the system architecture and a preliminary evaluation of AskHERMES, which implements innovative approaches in question analysis, summarization, and answer presentation. Five types of resources were indexed in this system: MEDLINE abstracts, PubMed Central full-text articles, eMedicine documents, clinical guidelines and Wikipedia articles.

Measurement

We compared the AskHERMES system with Google (Google and Google Scholar) and UpToDate and asked physicians to score the three systems by ease of use, quality of answer, time spent, and overall performance.

Results

AskHERMES allows physicians to enter a question in a natural way with minimal query formulation and allows physicians to efficiently navigate among all the answer sentences to quickly meet their information needs. In contrast, physicians need to formulate queries to search for information in Google and UpToDate. The development of the AskHERMES system is still at an early stage, and the knowledge resource is limited compared with Google or UpToDate. Nevertheless, the evaluation results show that AskHERMES' performance is comparable to the other systems. In particular, when answering complex clinical questions, it demonstrates the potential to outperform both Google and UpToDate systems.

Conclusions

AskHERMES, available at http://www.AskHERMES.org, has the potential to help physicians practice evidence-based medicine and improve the quality of patient care.",2011-01-21 +22876890,AtlasT4SS: a curated database for type IV secretion systems.,"

Background

The type IV secretion system (T4SS) can be classified as a large family of macromolecule transporter systems, divided into three recognized sub-families, according to the well-known functions. The major sub-family is the conjugation system, which allows transfer of genetic material, such as a nucleoprotein, via cell contact among bacteria. Also, the conjugation system can transfer genetic material from bacteria to eukaryotic cells; such is the case with the T-DNA transfer of Agrobacterium tumefaciens to host plant cells. The system of effector protein transport constitutes the second sub-family, and the third one corresponds to the DNA uptake/release system. Genome analyses have revealed numerous T4SS in Bacteria and Archaea. The purpose of this work was to organize, classify, and integrate the T4SS data into a single database, called AtlasT4SS - the first public database devoted exclusively to this prokaryotic secretion system.

Description

The AtlasT4SS is a manual curated database that describes a large number of proteins related to the type IV secretion system reported so far in Gram-negative and Gram-positive bacteria, as well as in Archaea. The database was created using the RDBMS MySQL and the Catalyst Framework based in the Perl programming language and using the Model-View-Controller (MVC) design pattern for Web. The current version holds a comprehensive collection of 1,617 T4SS proteins from 58 Bacteria (49 Gram-negative and 9 Gram-Positive), one Archaea and 11 plasmids. By applying the bi-directional best hit (BBH) relationship in pairwise genome comparison, it was possible to obtain a core set of 134 clusters of orthologous genes encoding T4SS proteins.

Conclusions

In our database we present one way of classifying orthologous groups of T4SSs in a hierarchical classification scheme with three levels. The first level comprises four classes that are based on the organization of genetic determinants, shared homologies, and evolutionary relationships: (i) F-T4SS, (ii) P-T4SS, (iii) I-T4SS, and (iv) GI-T4SS. The second level designates a specific well-known protein families otherwise an uncharacterized protein family. Finally, in the third level, each protein of an ortholog cluster is classified according to its involvement in a specific cellular process. AtlasT4SS database is open access and is available at http://www.t4ss.lncc.br.",2012-08-09 +24316132,DuctApe: a suite for the analysis and correlation of genomic and OmniLog™ Phenotype Microarray data.,"Addressing the functionality of genomes is one of the most important and challenging tasks of today's biology. In particular the ability to link genotypes to corresponding phenotypes is of interest in the reconstruction and biotechnological manipulation of metabolic pathways. Over the last years, the OmniLog™ Phenotype Microarray (PM) technology has been used to address many specific issues related to the metabolic functionality of microorganisms. However, computational tools that could directly link PM data with the gene(s) of interest followed by the extraction of information on gene-phenotype correlation are still missing. Here we present DuctApe, a suite that allows the analysis of both genomic sequences and PM data, to find metabolic differences among PM experiments and to correlate them with KEGG pathways and gene presence/absence patterns. As example, an application of the program to four bacterial datasets is presented. The source code and tutorials are available at http://combogenomics.github.io/DuctApe/.",2013-12-04 +23547764,MGcV: the microbial genomic context viewer for comparative genome analysis.,"

Background

Conserved gene context is used in many types of comparative genome analyses. It is used to provide leads on gene function, to guide the discovery of regulatory sequences, but also to aid in the reconstruction of metabolic networks. We present the Microbial Genomic context Viewer (MGcV), an interactive, web-based application tailored to strengthen the practice of manual comparative genome context analysis for bacteria.

Results

MGcV is a versatile, easy-to-use tool that renders a visualization of the genomic context of any set of selected genes, genes within a phylogenetic tree, genomic segments, or regulatory elements. It is tailored to facilitate laborious tasks such as the interactive annotation of gene function, the discovery of regulatory elements, or the sequence-based reconstruction of gene regulatory networks. We illustrate that MGcV can be used in gene function annotation by visually integrating information on prokaryotic genes, like their annotation as available from NCBI with other annotation data such as Pfam domains, sub-cellular location predictions and gene-sequence characteristics such as GC content. We also illustrate the usefulness of the interactive features that allow the graphical selection of genes to facilitate data gathering (e.g. upstream regions, ID's or annotation), in the analysis and reconstruction of transcription regulation. Moreover, putative regulatory elements and their corresponding scores or data from RNA-seq and microarray experiments can be uploaded, visualized and interpreted in (ranked-) comparative context maps. The ranked maps allow the interpretation of predicted regulatory elements and experimental data in light of each other.

Conclusion

MGcV advances the manual comparative analysis of genes and regulatory elements by providing fast and flexible integration of gene related data combined with straightforward data retrieval. MGcV is available at http://mgcv.cmbi.ru.nl.",2013-04-01 +24214959,BacDive--the Bacterial Diversity Metadatabase.,"BacDive-the Bacterial Diversity Metadatabase (http://bacdive.dsmz.de) merges detailed strain-linked information on the different aspects of bacterial and archaeal biodiversity. Currently (release 9/2013), BacDive contains entries for 23 458 strains and provides information on their taxonomy, morphology, physiology, sampling and concomitant environmental conditions as well as molecular biology. Where available, links to access the respective biological resources are given. The majority of the BacDive data is manually annotated and curated. The BacDive portal offers an easy-to-use simple search and in addition powerful advanced search functionalities allowing to combine more than 30 search fields for text and numerical data. The user can compile individual sets of strains to a download selection that can easily be imported into nearly all spreadsheet applications.",2013-11-07 +24040221,FastDMA: an infinium humanmethylation450 beadchip analyzer.,"DNA methylation is vital for many essential biological processes and human diseases. Illumina Infinium HumanMethylation450 Beadchip is a recently developed platform studying genome-wide DNA methylation state on more than 480,000 CpG sites and a few CHG sites with high data quality. To analyze the data of this promising platform, we developed FastDMA which can be used to identify significantly differentially methylated probes. Besides single probe analysis, FastDMA can also do region-based analysis for identifying the differentially methylated region (DMRs). A uniformed statistical model, analysis of covariance (ANCOVA), is used to achieve all the analyses in FastDMA. We apply FastDMA on three large-scale DNA methylation datasets from The Cancer Genome Atlas (TCGA) and find many differentially methylated genomic sites in different types of cancer. On the testing datasets, FastDMA shows much higher computational efficiency than current tools. FastDMA can benefit the data analyses of large-scale DNA methylation studies with an integrative pipeline and a high computational efficiency. The software is freely available via http://bioinfo.au.tsinghua.edu.cn/software/fastdma/.",2013-09-05 +21296751,GOSSIP: a method for fast and accurate global alignment of protein structures.,"

Motivation

The database of known protein structures (PDB) is increasing rapidly. This results in a growing need for methods that can cope with the vast amount of structural data. To analyze the accumulating data, it is important to have a fast tool for identifying similar structures and clustering them by structural resemblance. Several excellent tools have been developed for the comparison of protein structures. These usually address the task of local structure alignment, an important yet computationally intensive problem due to its complexity. It is difficult to use such tools for comparing a large number of structures to each other at a reasonable time.

Results

Here we present GOSSIP, a novel method for a global all-against-all alignment of any set of protein structures. The method detects similarities between structures down to a certain cutoff (a parameter of the program), hence allowing it to detect similar structures at a much higher speed than local structure alignment methods. GOSSIP compares many structures in times which are several orders of magnitude faster than well-known available structure alignment servers, and it is also faster than a database scanning method. We evaluate GOSSIP both on a dataset of short structural fragments and on two large sequence-diverse structural benchmarks. Our conclusions are that for a threshold of 0.6 and above, the speed of GOSSIP is obtained with no compromise of the accuracy of the alignments or of the number of detected global similarities.

Availability

A server, as well as an executable for download, are available at http://bioinfo3d.cs.tau.ac.il/gossip/.",2011-02-03 +27418719,Dynamically pre-trained deep recurrent neural networks using environmental monitoring data for predicting PM2.5.,"Fine particulate matter ([Formula: see text]) has a considerable impact on human health, the environment and climate change. It is estimated that with better predictions, US$9 billion can be saved over a 10-year period in the USA (State of the science fact sheet air quality. http://www.noaa.gov/factsheets/new, 2012). Therefore, it is crucial to keep developing models and systems that can accurately predict the concentration of major air pollutants. In this paper, our target is to predict [Formula: see text] concentration in Japan using environmental monitoring data obtained from physical sensors with improved accuracy over the currently employed prediction models. To do so, we propose a deep recurrent neural network (DRNN) that is enhanced with a novel pre-training method using auto-encoder especially designed for time series prediction. Additionally, sensors selection is performed within DRNN without harming the accuracy of the predictions by taking advantage of the sparsity found in the network. The numerical experiments show that DRNN with our proposed pre-training method is superior than when using a canonical and a state-of-the-art auto-encoder training method when applied to time series prediction. The experiments confirm that when compared against the [Formula: see text] prediction system VENUS (National Institute for Environmental Studies. Visual Atmospheric Environment Utility System. http://envgis5.nies.go.jp/osenyosoku/, 2014), our technique improves the accuracy of [Formula: see text] concentration level predictions that are being reported in Japan.",2015-06-26 +22080564,AnimalTFDB: a comprehensive animal transcription factor database.,"Transcription factors (TFs) are proteins that bind to specific DNA sequences, thereby playing crucial roles in gene-expression regulation through controlling the transcription of genetic information from DNA to RNA. Transcription cofactors and chromatin remodeling factors are also essential in the gene transcriptional regulation. Identifying and annotating all the TFs are primary and crucial steps for illustrating their functions and understanding the transcriptional regulation. In this study, based on manual literature reviews, we collected and curated 72 TF families for animals, which is currently the most complete list of TF families in animals. Then, we systematically characterized all the TFs in 50 animal species and constructed a comprehensive animal TF database, AnimalTFDB. To better serve the community, we provided detailed annotations for each TF, including basic information, gene structure, functional domain, 3D structure hit, Gene Ontology, pathway, protein-protein interaction, paralogs, orthologs, potential TF-binding sites and targets. In addition, we collected and annotated transcription cofactors and chromatin remodeling factors. AnimalTFDB has a user-friendly web interface with multiple browse and search functions, as well as data downloading. It is freely available at http://www.bioguo.org/AnimalTFDB/.",2011-11-12 +25064571,Big data and other challenges in the quest for orthologs.,"

Unlabelled

Given the rapid increase of species with a sequenced genome, the need to identify orthologous genes between them has emerged as a central bioinformatics task. Many different methods exist for orthology detection, which makes it difficult to decide which one to choose for a particular application. Here, we review the latest developments and issues in the orthology field, and summarize the most recent results reported at the third 'Quest for Orthologs' meeting. We focus on community efforts such as the adoption of reference proteomes, standard file formats and benchmarking. Progress in these areas is good, and they are already beneficial to both orthology consumers and providers. However, a major current issue is that the massive increase in complete proteomes poses computational challenges to many of the ortholog database providers, as most orthology inference algorithms scale at least quadratically with the number of proteomes. The Quest for Orthologs consortium is an open community with a number of working groups that join efforts to enhance various aspects of orthology analysis, such as defining standard formats and datasets, documenting community resources and benchmarking.

Availability and implementation

All such materials are available at http://questfororthologs.org.",2014-07-26 +24064417,BSeQC: quality control of bisulfite sequencing experiments.,"

Motivation

Bisulfite sequencing (BS-seq) has emerged as the gold standard to study genome-wide DNA methylation at single-nucleotide resolution. Quality control (QC) is a critical step in the analysis pipeline to ensure that BS-seq data are of high quality and suitable for subsequent analysis. Although several QC tools are available for next-generation sequencing data, most of them were not designed to handle QC issues specific to BS-seq protocols. Therefore, there is a strong need for a dedicated QC tool to evaluate and remove potential technical biases in BS-seq experiments.

Results

We developed a package named BSeQC to comprehensively evaluate the quality of BS-seq experiments and automatically trim nucleotides with potential technical biases that may result in inaccurate methylation estimation. BSeQC takes standard SAM/BAM files as input and generates bias-free SAM/BAM files for downstream analysis. Evaluation based on real BS-seq data indicates that the use of the bias-free SAM/BAM file substantially improves the quantification of methylation level.

Availability and implementation

BSeQC is freely available at: http://code.google.com/p/bseqc/.",2013-09-23 +25051387,"""Magnitude-based inference"": a statistical review.","

Purpose

We consider ""magnitude-based inference"" and its interpretation by examining in detail its use in the problem of comparing two means.

Methods

We extract from the spreadsheets, which are provided to users of the analysis (http://www.sportsci.org/), a precise description of how ""magnitude-based inference"" is implemented. We compare the implemented version of the method with general descriptions of it and interpret the method in familiar statistical terms.

Results and conclusions

We show that ""magnitude-based inference"" is not a progressive improvement on modern statistics. The additional probabilities introduced are not directly related to the confidence interval but, rather, are interpretable either as P values for two different nonstandard tests (for different null hypotheses) or as approximate Bayesian calculations, which also lead to a type of test. We also discuss sample size calculations associated with ""magnitude-based inference"" and show that the substantial reduction in sample sizes claimed for the method (30% of the sample size obtained from standard frequentist calculations) is not justifiable so the sample size calculations should not be used. Rather than using ""magnitude-based inference,"" a better solution is to be realistic about the limitations of the data and use either confidence intervals or a fully Bayesian analysis.",2015-04-01 +23945090,Systematic review of genome-wide gene expression studies of bipolar disorder.,"

Background

Numerous genome-wide gene expression studies of bipolar disorder (BP) have been carried out. These studies are heterogeneous, underpowered and use overlapping samples. We conducted a systematic review of these studies to synthesize the current findings.

Methods

We identified all genome-wide gene expression studies on BP in humans. We then carried out a quantitative mega-analysis of studies done with post-mortem brain tissue. We obtained raw data from each study and used standardized procedures to process and analyze the data. We then combined the data and conducted three separate mega-analyses on samples from 1) any region of the brain (9 studies); 2) the prefrontal cortex (PFC) (6 studies); and 3) the hippocampus (2 studies). To minimize heterogeneity across studies, we focused primarily on the most numerous, recent and comprehensive studies.

Results

A total of 30 genome-wide gene expression studies of BP done with blood or brain tissue were identified. We included 10 studies with data on 211 microarrays on 57 unique BP cases and 229 microarrays on 60 unique controls in the quantitative mega-analysis. A total of 382 genes were identified as significantly differentially expressed by the three analyses. Eleven genes survived correction for multiple testing with a q-value < 0.05 in the PFC. Among these were FKBP5 and WFS1, which have been previously implicated in mood disorders. Pathway analyses suggested a role for metallothionein proteins, MAP Kinase phosphotases, and neuropeptides.

Conclusion

We provided an up-to-date summary of results from gene expression studies of the brain in BP. Our analyses focused on the highest quality data available and provided results by brain region so that similarities and differences can be examined relative to disease status. The results are available for closer inspection on-line at Metamoodics [http://metamoodics.igm.jhmi.edu/], where investigators can look up any genes of interest and view the current results in their genomic context and in relation to leading findings from other genomic experiments in bipolar disorder.",2013-08-15 +25527096,The Cyni framework for network inference in Cytoscape.,"

Motivation

Research on methods for the inference of networks from biological data is making significant advances, but the adoption of network inference in biomedical research practice is lagging behind. Here, we present Cyni, an open-source 'fill-in-the-algorithm' framework that provides common network inference functionality and user interface elements. Cyni allows the rapid transformation of Java-based network inference prototypes into apps of the popular open-source Cytoscape network analysis and visualization ecosystem. Merely placing the resulting app in the Cytoscape App Store makes the method accessible to a worldwide community of biomedical researchers by mouse click. In a case study, we illustrate the transformation of an ARACNE implementation into a Cytoscape app.

Availability and implementation

Cyni, its apps, user guides, documentation and sample code are available from the Cytoscape App Store http://apps.cytoscape.org/apps/cynitoolbox

Contact

benno.schwikowski@pasteur.fr.",2014-12-18 +25473701,Automated Real-Time Nucleic Acid Amplification Technology for Rapid and Simultaneous Detection of Tuberculosis and Rifampicin Resistance: Xpert MTB/RIF Assay for the Diagnosis of Pulmonary and Extrapulmonary TB in Adults and Children: Policy Update,"The global priorities for tuberculosis (TB) care and control are to improve case-detection and to detect cases earlier, including cases of smear-negative disease which are often associated with coinfection with the human immunodeficiency virus (HIV) and young age, and to enhance the capacity to diagnose multidrug-resistant tuberculosis (MDR-TB). In September 2010, the World Health Organization (WHO) convened an Expert Group to review the evidence on the accuracy of the Xpert MTB/RIF assay (Cepheid, Sunnyvale, CA, United States) for the purpose of formulating recommendations to guide the use of the test. Policy recommendations on using Xpert MTB/RIF were issued by WHO early in 2011, supported by an operational how-to document and a checklist for implementation at the country level. WHO's current policies and guidance recommend that Xpert MTB/RIF be used as an initial diagnostic test in individuals suspected of having MDR-TB or HIV-associated TB (strong recommendation, moderate quality of evidence). The guidance also provides a conditional recommendation that Xpert MTB/RIF be used as a follow-on test to smear microscopy in settings where MDR-TB or HIV are of lesser concern, especially for further testing of smear-negative specimens. In acknowledgement of the difficulties of obtaining microbiological confirmation of the diagnosis in children, this recommendation generalizes from data on adults to include the use of Xpert MTB/RIF in children. Since 2010, more than 85 peer-reviewed research papers have been published on using Xpert MTB/RIF to diagnose pulmonary, extrapulmonary and paediatric TB, and studies continue to be performed. Given the amount of additional data on Xpert MTB/RIF that have emerged since 2010, an update of WHO's policies and guidance was warranted. WHO's Global TB Programme therefore commissioned three systematic reviews to update and revise the guidance; these reviews examined the utility of Xpert MTB/RIF in diagnosing TB and rifampicin resistance in pulmonary, extrapulmonary and paediatric TB. Published studies on the affordability and cost effectiveness of Xpert MTB/RIF were also reviewed. WHO convened an Expert Group to review the evidence at Les Pensierès, Veyrier-du-Lac, France during 20–21 May 2013. The major findings and recommendations of this Expert Group are summarized below, and a detailed meeting report is available at: http://www.who.int/tb/laboratory/policy_statements/en/",2014-12-05 +24106010,RRBS-analyser: a comprehensive web server for reduced representation bisulfite sequencing data analysis.,"In reduced representation bisulfite sequencing (RRBS), genomic DNA is digested with the restriction enzyme and then subjected to next-generation sequencing, which enables detection and quantification of DNA methylation at whole-genome scale with low cost. However, the data processing, interpretation, and analysis of the huge amounts of data generated pose a bioinformatics challenge. We developed RRBS-Analyser, a comprehensive genome-scale DNA methylation analysis server based on RRBS data. RRBS-Analyser can assess sequencing quality, generate detailed statistical information, align the bisulfite-treated short reads to reference genome, identify and annotate the methylcytosines (5mCs) and associate them with different genomic features in CG, CHG, and CHH content. RRBS-Analyser supports detection, annotation, and visualization of differentially methylated regions (DMRs) for multiple samples from nine reference organisms. Moreover, RRBS-Analyser provides researchers with detailed annotation of DMR-containing genes, which will greatly aid subsequent studies. The input of RRBS-Analyser can be raw FASTQ reads, generic SAM format, or self-defined format containing individual 5mC sites. RRBS-Analyser can be widely used by researchers wanting to unravel the complexities of DNA methylome in the epigenetic community. RRBS-Analyser is freely available at http://122.228.158.106/RRBSAnalyser/.",2013-10-10 +21337704,GPDE: A biological proteomic database for biomarker discovery and evaluation.,"Clinical proteomics faces extremely complex and variable data. Here, we present an updated version of the Griss Proteomics Database Engine (GPDE): A free biological proteomic database specifically designed for clinical proteomics and biomarker discovery (http://gpde.sourceforge.net). It combines experiments based on investigated cell types thereby supporting customizable biological meta-analyses. Through the new features described here, the GPDE now became a powerful yet easy-to-use tool to support the fast identification and reliable evaluation of biomarker candidates.",2011-01-27 +26705418,Cohort Profile: The Framingham Heart Study (FHS): overview of milestones in cardiovascular epidemiology.,"The Framingham Heart Study (FHS) has conducted seminal research defining cardiovascular disease (CVD) risk factors and fundamentally shaping public health guidelines for CVD prevention over the past five decades. The success of the Original Cohort, initiated in 1948, paved the way for further epidemiological research in preventive cardiology. Due to the keen observations suggesting the role of shared familial factors in the development of CVD, in 1971 the FHS began enroling the second generation cohort, comprising the children of the Original Cohort and the spouses of the children. In 2002, the third generation cohort, comprising the grandchildren of the Original Cohort, was initiated to additionally explore genetic contributions to CVD in greater depth. Additionally, because of the predominance of White individuals of European descent in the three generations of FHS participants noted above, the Heart Study enrolled the OMNI1 and OMNI2 cohorts in 1994 and 2003, respectively, aimed to reflect the current greater racial and ethnic diversity of the town of Framingham. All FHS cohorts have been examined approximately every 2-4 years since the initiation of the study. At these periodic Heart Study examinations, we obtain a medical history and perform a cardiovascular-focused physical examination, 12-lead electrocardiography, blood and urine samples testing and other cardiovascular imaging studies reflecting subclinical disease burden.The FHS has continually evolved along the cutting edge of cardiovascular science and epidemiological research since its inception. Participant studies now additionally include study of cardiovascular imaging, serum and urine biomarkers, genetics/genomics, proteomics, metabolomics and social networks. Numerous ancillary studies have been established, expanding the phenotypes to encompass multiple organ systems including the lungs, brain, bone and fat depots, among others. Whereas the FHS was originally conceived and designed to study the epidemiology of cardiovascular disease, it has evolved over the years with staggering expanded breadth and depth that have far greater implications in the study of the epidemiology of a wide spectrum of human diseases. The FHS welcomes research collaborations using existing or new collection of data. Detailed information regarding the procedures for research application submission and review are available at [http://www.framinghamheartstudy.org/researchers/index.php].",2015-12-01 +22139928,SpliceDisease database: linking RNA splicing and disease.,"RNA splicing is an important aspect of gene regulation in many organisms. Splicing of RNA is regulated by complicated mechanisms involving numerous RNA-binding proteins and the intricate network of interactions among them. Mutations in cis-acting splicing elements or its regulatory proteins have been shown to be involved in human diseases. Defects in pre-mRNA splicing process have emerged as a common disease-causing mechanism. Therefore, a database integrating RNA splicing and disease associations would be helpful for understanding not only the RNA splicing but also its contribution to disease. In SpliceDisease database, we manually curated 2337 splicing mutation disease entries involving 303 genes and 370 diseases, which have been supported experimentally in 898 publications. The SpliceDisease database provides information including the change of the nucleotide in the sequence, the location of the mutation on the gene, the reference Pubmed ID and detailed description for the relationship among gene mutations, splicing defects and diseases. We standardized the names of the diseases and genes and provided links for these genes to NCBI and UCSC genome browser for further annotation and genomic sequences. For the location of the mutation, we give direct links of the entry to the respective position/region in the genome browser. The users can freely browse, search and download the data in SpliceDisease at http://cmbi.bjmu.edu.cn/sdisease.",2011-12-01 +22139925,GWASdb: a database for human genetic variants identified by genome-wide association studies.,"Recent advances in genome-wide association studies (GWAS) have enabled us to identify thousands of genetic variants (GVs) that are associated with human diseases. As next-generation sequencing technologies become less expensive, more GVs will be discovered in the near future. Existing databases, such as NHGRI GWAS Catalog, collect GVs with only genome-wide level significance. However, many true disease susceptibility loci have relatively moderate P values and are not included in these databases. We have developed GWASdb that contains 20 times more data than the GWAS Catalog and includes less significant GVs (P < 1.0 × 10(-3)) manually curated from the literature. In addition, GWASdb provides comprehensive functional annotations for each GV, including genomic mapping information, regulatory effects (transcription factor binding sites, microRNA target sites and splicing sites), amino acid substitutions, evolution, gene expression and disease associations. Furthermore, GWASdb classifies these GVs according to diseases using Disease-Ontology Lite and Human Phenotype Ontology. It can conduct pathway enrichment and PPI network association analysis for these diseases. GWASdb provides an intuitive, multifunctional database for biologists and clinicians to explore GVs and their functional inferences. It is freely available at http://jjwanglab.org/gwasdb and will be updated frequently.",2011-12-01 +22553387,Categorization of metabolome in bacterial systems.,"Analyses of biological databases such as those of genome, proteome, metabolome etc., have given insights in organization of biological systems. However, current efforts do not utilize the complete potential of available metabolome data. In this study, metabolome of bacterial systems with reliable annotations are analyzed and a simple method is developed to categorize pathways hierarchically, using rational approach. Ninety-four bacterial systems having for each ≥ 250 annotated metabolic pathways were used to identify a set of common pathways. 42 pathways were present in all bacteria which are termed as Core/Stage I pathways. This set of pathways was used along with interacting compounds to categorize pathways in the metabolome hierarchically. In each metabolome non-interacting pathways were identified including at each stage. The case study of Escherichia coli O157, having 433 annotated pathways, shows that 378 pathways interact directly or indirectly with 41 core pathways while 14 pathways are noninteracting. These 378 pathways are distributed in Stage II (289), Stage III (75), Stage IV (13) and Stage V (1) category. The approach discussed here allows understanding of the complexity of metabolic networks. It has pointed out that core pathways could be most ancient pathways and compounds that interact with maximum pathways may be compounds with high biosynthetic potential, which can be easily identified. Further, it was shown that interactions of pathways at various stages could be one to one, one to many, many to one or many to many mappings through interacting compounds. The granularity of the method discussed being high; the impact of perturbation in a pathway on the metabolome and particularly sub networks can be studied precisely. The categorizations of metabolic pathways help in identifying choke point enzymes that are useful to identify probable drug targets. The Metabolic categorizations for 94 bacteria are available at http://115.111.37.202/mpe/.",2012-04-13 +21569525,Learning sparse models for a dynamic Bayesian network classifier of protein secondary structure.,"

Background

Protein secondary structure prediction provides insight into protein function and is a valuable preliminary step for predicting the 3D structure of a protein. Dynamic Bayesian networks (DBNs) and support vector machines (SVMs) have been shown to provide state-of-the-art performance in secondary structure prediction. As the size of the protein database grows, it becomes feasible to use a richer model in an effort to capture subtle correlations among the amino acids and the predicted labels. In this context, it is beneficial to derive sparse models that discourage over-fitting and provide biological insight.

Results

In this paper, we first show that we are able to obtain accurate secondary structure predictions. Our per-residue accuracy on a well established and difficult benchmark (CB513) is 80.3%, which is comparable to the state-of-the-art evaluated on this dataset. We then introduce an algorithm for sparsifying the parameters of a DBN. Using this algorithm, we can automatically remove up to 70-95% of the parameters of a DBN while maintaining the same level of predictive accuracy on the SD576 set. At 90% sparsity, we are able to compute predictions three times faster than a fully dense model evaluated on the SD576 set. We also demonstrate, using simulated data, that the algorithm is able to recover true sparse structures with high accuracy, and using real data, that the sparse model identifies known correlation structure (local and non-local) related to different classes of secondary structure elements.

Conclusions

We present a secondary structure prediction method that employs dynamic Bayesian networks and support vector machines. We also introduce an algorithm for sparsifying the parameters of the dynamic Bayesian network. The sparsification approach yields a significant speed-up in generating predictions, and we demonstrate that the amino acid correlations identified by the algorithm correspond to several known features of protein secondary structure. Datasets and source code used in this study are available at http://noble.gs.washington.edu/proj/pssp.",2011-05-13 +25056354,A knowledge base of vasopressin actions in the kidney.,"Biological information is growing at a rapid pace, making it difficult for individual investigators to be familiar with all information that is relevant to their own research. Computers are beginning to be used to extract and curate biological information; however, the complexity of human language used in research papers continues to be a critical barrier to full automation of knowledge extraction. Here, we report a manually curated knowledge base of vasopressin actions in renal epithelial cells that is designed to be readable either by humans or by computer programs using natural language processing algorithms. The knowledge base consists of three related databases accessible at https://helixweb.nih.gov/ESBL/TinyUrls/Vaso_portal.html. One of the component databases reports vasopressin actions on individual proteins expressed in renal epithelia, including effects on phosphorylation, protein abundances, protein translocation from one subcellular compartment to another, protein-protein binding interactions, etc. The second database reports vasopressin actions on physiological measures in renal epithelia, and the third reports specific mRNA species whose abundances change in response to vasopressin. We illustrate the application of the knowledge base by using it to generate a protein kinase network that connects vasopressin binding in collecting duct cells to physiological effects to regulate the water channel protein aquaporin-2.",2014-07-23 +23457041,ChIP-PED enhances the analysis of ChIP-seq and ChIP-chip data.,"

Motivation

Although chromatin immunoprecipitation coupled with high-throughput sequencing (ChIP-seq) or tiling array hybridization (ChIP-chip) is increasingly used to map genome-wide-binding sites of transcription factors (TFs), it still remains difficult to generate a quality ChIPx (i.e. ChIP-seq or ChIP-chip) dataset because of the tremendous amount of effort required to develop effective antibodies and efficient protocols. Moreover, most laboratories are unable to easily obtain ChIPx data for one or more TF(s) in more than a handful of biological contexts. Thus, standard ChIPx analyses primarily focus on analyzing data from one experiment, and the discoveries are restricted to a specific biological context.

Results

We propose to enrich this existing data analysis paradigm by developing a novel approach, ChIP-PED, which superimposes ChIPx data on large amounts of publicly available human and mouse gene expression data containing a diverse collection of cell types, tissues and disease conditions to discover new biological contexts with potential TF regulatory activities. We demonstrate ChIP-PED using a number of examples, including a novel discovery that MYC, a human TF, plays an important functional role in pediatric Ewing sarcoma cell lines. These examples show that ChIP-PED increases the value of ChIPx data by allowing one to expand the scope of possible discoveries made from a ChIPx experiment.

Availability

http://www.biostat.jhsph.edu/~gewu/ChIPPED/",2013-03-01 +22123737,hiPathDB: a human-integrated pathway database with facile visualization.,"One of the biggest challenges in the study of biological regulatory networks is the systematic organization and integration of complex interactions taking place within various biological pathways. Currently, the information of the biological pathways is dispersed in multiple databases in various formats. hiPathDB is an integrated pathway database that combines the curated human pathway data of NCI-Nature PID, Reactome, BioCarta and KEGG. In total, it includes 1661 pathways consisting of 8976 distinct physical entities. hiPathDB provides two different types of integration. The pathway-level integration, conceptually a simple collection of individual pathways, was achieved by devising an elaborate model that takes distinct features of four databases into account and subsequently reformatting all pathways in accordance with our model. The entity-level integration creates a single unified pathway that encompasses all pathways by merging common components. Even though the detailed molecular-level information such as complex formation or post-translational modifications tends to be lost, such integration makes it possible to investigate signaling network over the entire pathways and allows identification of pathway cross-talks. Another strong merit of hiPathDB is the built-in pathway visualization module that supports explorative studies of complex networks in an interactive fashion. The layout algorithm is optimized for virtually automatic visualization of the pathways. hiPathDB is available at http://hiPathDB.kobic.re.kr.",2011-11-28 +26379465,Global Cicada Sound Collection I: Recordings from South Africa and Malawi by B. W. Price & M. H. Villet and harvesting of BioAcoustica data by GBIF.,"

Background

Sound collections for singing insects provide important repositories that underpin existing research (e.g. Price et al. 2007 at http://bio.acousti.ca/node/11801; Price et al. 2010) and make bioacoustic collections available for future work, including insect communication (Ordish 1992), systematics (e.g. David et al. 2003), and automated identification (Bennett et al. 2015). The BioAcoustica platform (Baker et al. 2015) is both a repository and analysis platform for bioacoustic collections: allowing collections to be available in perpetuity, and also facilitating complex analyses using the BioVeL cloud infrastructure (Vicario et al. 2011). The Global Cicada Sound Collection is a project to make recordings of the world's cicadas (Hemiptera: Cicadidae) available using open licences to maximise their potential for study and reuse. This first component of the Global Cicada Sound Collection comprises recordings made between 2006 and 2008 of Cicadidae in South Africa and Malawi.

New information

This collection of sounds includes 219 recordings of 133 voucher specimens, comprising 42 taxa (25 identified to species, all identified to genus) from South Africa and Malawi. The recordings have been used to underpin work on the species limits of cicadas in southern Africa, including Price et al. (2007) and Price et al. (2010). The specimens are deposited in the Albany Museum, Grahamstown, South Africa (AMGS). The harvesting of acoustic data as occurrence records by GBIF has been implemented by the Scratchpads Team at the Natural History Museum, London. This link increases the value of individual recordings and the BioAcoustica platform within the global infrastructure of biodiversity informatics by making specimen/occurence records from BioAcoustica available to a wider audience, and allowing their integration with other occurence datasets that also contribute to GBIF.",2015-09-02 +23559639,A tool for RNA sequencing sample identity check.,"

Summary

RNA sequencing data are becoming a major method of choice to study transcriptomes, including the mapping of gene expression quantitative trait loci (eQTLs). RNA sample contamination or swapping is a serious problem for downstream analysis and may result in false discovery and lose power to detect the true biological relationships. When genetic data are available, for example, in eQTL studies or samples have been previously genotyped or DNA sequenced, it is possible to combine genetic data and RNA-seq data to detect sample contamination and resolve sample swapping problems. In this article, we introduce a tool (IDCheck) that allows easy assessment of concordance between genotype (from SNP arrays or DNA sequencing) and gene expression (RNA-seq) samples. IDCheck compares the identity of RNA-seq reads and SNP genotypes using a likelihood-based method. Based on maximum likelihood estimates of relevant parameters, we can detect sample contamination and identify correct sample pairs when swapping occurs. Our tool provides an efficient and convenient way to evaluate and resolve these problems.

Availability

A complete description of the software is included on the application home page. The software is freely available in the public domain at http://eqtl.rc.fas.harvard.edu/idcheck/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-04 +25897112,RBO Aleph: leveraging novel information sources for protein structure prediction.,"RBO Aleph is a novel protein structure prediction web server for template-based modeling, protein contact prediction and ab initio structure prediction. The server has a strong emphasis on modeling difficult protein targets for which templates cannot be detected. RBO Aleph's unique features are (i) the use of combined evolutionary and physicochemical information to perform residue-residue contact prediction and (ii) leveraging this contact information effectively in conformational space search. RBO Aleph emerged as one of the leading approaches to ab initio protein structure prediction and contact prediction during the most recent Critical Assessment of Protein Structure Prediction experiment (CASP11, 2014). In addition to RBO Aleph's main focus on ab initio modeling, the server also provides state-of-the-art template-based modeling services. Based on template availability, RBO Aleph switches automatically between template-based modeling and ab initio prediction based on the target protein sequence, facilitating use especially for non-expert users. The RBO Aleph web server offers a range of tools for visualization and data analysis, such as the visualization of predicted models, predicted contacts and the estimated prediction error along the model's backbone. The server is accessible at http://compbio.robotics.tu-berlin.de/rbo_aleph/.",2015-04-20 +22127861,RNA CoSSMos: Characterization of Secondary Structure Motifs--a searchable database of secondary structure motifs in RNA three-dimensional structures.,"RNA secondary structure is important for designing therapeutics, understanding protein-RNA binding and predicting tertiary structure of RNA. Several databases and downloadable programs exist that specialize in the three-dimensional (3D) structure of RNA, but none focus specifically on secondary structural motifs such as internal, bulge and hairpin loops. The RNA Characterization of Secondary Structure Motifs (RNA CoSSMos) database is a freely accessible and searchable online database and website of 3D characteristics of secondary structure motifs. To create the RNA CoSSMos database, 2156 Protein Data Bank (PDB) files were searched for internal, bulge and hairpin loops, and each loop's structural information, including sugar pucker, glycosidic linkage, hydrogen bonding patterns and stacking interactions, was included in the database. False positives were defined, identified and reclassified or omitted from the database to ensure the most accurate results possible. Users can search via general PDB information, experimental parameters, sequence and specific motif and by specific structural parameters in the subquery page after the initial search. Returned results for each search can be viewed individually or a complete set can be downloaded into a spreadsheet to allow for easy comparison. The RNA CoSSMos database is automatically updated weekly and is available at http://cossmos.slu.edu.",2011-11-29 +25052701,Assisting manual literature curation for protein-protein interactions using BioQRator. ,"The time-consuming nature of manual curation and the rapid growth of biomedical literature severely limit the number of articles that database curators can scrutinize and annotate. Hence, semi-automatic tools can be a valid support to increase annotation throughput. Although a handful of curation assistant tools are already available, to date, little has been done to formally evaluate their benefit to biocuration. Moreover, most curation tools are designed for specific problems. Thus, it is not easy to apply an annotation tool for multiple tasks. BioQRator is a publicly available web-based tool for annotating biomedical literature. It was designed to support general tasks, i.e. any task annotating entities and relationships. In the BioCreative IV edition, BioQRator was tailored for protein- protein interaction (PPI) annotation by migrating information from PIE the search. The results obtained from six curators showed that the precision on the top 10 documents doubled with PIE the search compared with PubMed search results. It was also observed that the annotation time for a full PPI annotation task decreased for a beginner-intermediate level annotator. This finding is encouraging because text-mining techniques were not directly involved in the full annotation task and BioQRator can be easily integrated with any text-mining resources. Database URL: http://www.bioqrator.org/.",2014-07-22 +25519381,A comparative analysis of family-based and population-based association tests using whole genome sequence data.,"The revolution in next-generation sequencing has made obtaining both common and rare high-quality sequence variants across the entire genome feasible. Because researchers are now faced with the analytical challenges of handling a massive amount of genetic variant information from sequencing studies, numerous methods have been developed to assess the impact of both common and rare variants on disease traits. In this report, whole genome sequencing data from Genetic Analysis Workshop 18 was used to compare the power of several methods, considering both family-based and population-based designs, to detect association with variants in the MAP4 gene region and on chromosome 3 with blood pressure. To prioritize variants across the genome for testing, variants were first functionally assessed using prediction algorithms and expression quantitative trait loci (eQTLs) data. Four set-based tests in the family-based association tests (FBAT) framework--FBAT-v, FBAT-lmm, FBAT-m, and FBAT-l--were used to analyze 20 pedigrees, and 2 variance component tests, sequence kernel association test (SKAT) and genome-wide complex trait analysis (GCTA), were used with 142 unrelated individuals in the sample. Both set-based and variance-component-based tests had high power and an adequate type I error rate. Of the various FBATs, FBAT-l demonstrated superior performance, indicating the potential for it to be used in rare-variant analysis. The updated FBAT package is available at: http://www.hsph.harvard.edu/fbat/.",2014-06-17 +21474551,CycADS: an annotation database system to ease the development and update of BioCyc databases.,"In recent years, genomes from an increasing number of organisms have been sequenced, but their annotation remains a time-consuming process. The BioCyc databases offer a framework for the integrated analysis of metabolic networks. The Pathway tool software suite allows the automated construction of a database starting from an annotated genome, but it requires prior integration of all annotations into a specific summary file or into a GenBank file. To allow the easy creation and update of a BioCyc database starting from the multiple genome annotation resources available over time, we have developed an ad hoc data management system that we called Cyc Annotation Database System (CycADS). CycADS is centred on a specific database model and on a set of Java programs to import, filter and export relevant information. Data from GenBank and other annotation sources (including for example: KAAS, PRIAM, Blast2GO and PhylomeDB) are collected into a database to be subsequently filtered and extracted to generate a complete annotation file. This file is then used to build an enriched BioCyc database using the PathoLogic program of Pathway Tools. The CycADS pipeline for annotation management was used to build the AcypiCyc database for the pea aphid (Acyrthosiphon pisum) whose genome was recently sequenced. The AcypiCyc database webpage includes also, for comparative analyses, two other metabolic reconstruction BioCyc databases generated using CycADS: TricaCyc for Tribolium castaneum and DromeCyc for Drosophila melanogaster. Linked to its flexible design, CycADS offers a powerful software tool for the generation and regular updating of enriched BioCyc databases. The CycADS system is particularly suited for metabolic gene annotation and network reconstruction in newly sequenced genomes. Because of the uniform annotation used for metabolic network reconstruction, CycADS is particularly useful for comparative analysis of the metabolism of different organisms. Database URL: http://www.cycadsys.org.",2011-04-07 +24771407,Bioinformatics approach to evaluate differential gene expression of M1/M2 macrophage phenotypes and antioxidant genes in atherosclerosis.,"Atherosclerosis is a pro-inflammatory process intrinsically related to systemic redox impairments. Macrophages play a major role on disease development. The specific involvement of classically activated, M1 (pro-inflammatory), or the alternatively activated, M2 (anti-inflammatory), on plaque formation and disease progression are still not established. Thus, based on meta-data analysis of public micro-array datasets, we compared differential gene expression levels of the human antioxidant genes (HAG) and M1/M2 genes between early and advanced human atherosclerotic plaques, and among peripheric macrophages (with or without foam cells induction by oxidized low density lipoprotein, oxLDL) from healthy and atherosclerotic subjects. Two independent datasets, GSE28829 and GSE9874, were selected from gene expression omnibus (http://www.ncbi.nlm.nih.gov/geo/) repository. Functional interactions were obtained with STRING (http://string-db.org/) and Medusa (http://coot.embl.de/medusa/). Statistical analysis was performed with ViaComplex(®) (http://lief.if.ufrgs.br/pub/biosoftwares/viacomplex/) and gene score enrichment analysis (http://www.broadinstitute.org/gsea/index.jsp). Bootstrap analysis demonstrated that the activity (expression) of HAG and M1 gene sets were significantly increased in advance compared to early atherosclerotic plaque. Increased expressions of HAG, M1, and M2 gene sets were found in peripheric macrophages from atherosclerotic subjects compared to peripheric macrophages from healthy subjects, while only M1 gene set was increased in foam cells from atherosclerotic subjects compared to foam cells from healthy subjects. However, M1 gene set was decreased in foam cells from healthy subjects compared to peripheric macrophages from healthy subjects, while no differences were found in foam cells from atherosclerotic subjects compared to peripheric macrophages from atherosclerotic subjects. Our data suggest that, different to cancer, in atherosclerosis there is no M1 or M2 polarization of macrophages. Actually, M1 and M2 phenotype are equally induced, what is an important aspect to better understand the disease progression, and can help to develop new therapeutic approaches.",2014-11-01 +24813450,WormNet v3: a network-assisted hypothesis-generating server for Caenorhabditis elegans.,"High-throughput experimental technologies gradually shift the paradigm of biological research from hypothesis-validation toward hypothesis-generation science. Translating diverse types of large-scale experimental data into testable hypotheses, however, remains a daunting task. We previously demonstrated that heterogeneous genomics data can be integrated into a single genome-scale gene network with high prediction power for ribonucleic acid interference (RNAi) phenotypes in Caenorhabditis elegans, a popular metazoan model in the study of developmental biology, neurobiology and genetics. Here, we present WormNet version 3 (v3), which is a new network-assisted hypothesis-generating server for C. elegans. WormNet v3 includes major updates to the base gene network, which substantially improved predictions of RNAi phenotypes. The server generates various gene network-based hypotheses using three complementary network methods: (i) a phenotype-centric approach to 'find new members for a pathway'; (ii) a gene-centric approach to 'infer functions from network neighbors' and (iii) a context-centric approach to 'find context-associated hub genes', which is a new method to identify key genes that mediate physiology within a specific context. For example, we demonstrated that the context-centric approach can be used to identify potential molecular targets of toxic chemicals. WormNet v3 is freely accessible at http://www.inetbio.org/wormnet.",2014-05-09 +21990165,Classification of missense substitutions in the BRCA genes: a database dedicated to Ex-UVs.,"Unclassified sequence variants (UVs) arising from clinical mutation screening of cancer susceptibility genes present a frustrating issue to clinical genetics services and the patients that they serve. We created an open-access database holding missense substitutions from the breast and ovarian cancer susceptibility genes BRCA1 and BRCA2. The main inclusion criterion is that each variant should have been assessed in a published work that used the Bayesian integrated evaluation of unclassified BRCA gene variants. Transfer of data on these substitutions from the original publications to our database afforded an opportunity to analyze the missense substitutions under a single model and to remove inconsistencies that arose during the evolution of the integrated evaluation over the last decade. This analysis also afforded the opportunity to reclassify these missense substitutions according to the recently published IARC 5-Class system. From an initial set of 248 missense substitutions, 31 were set aside due to nonnegligible probability to interfere with splicing. Of the remaining substitutions, 28 fell into one of the two pathogenic classes (IARC Class 4 or 5), 174 fell into one of the two nonpathogenic classes (IARC Class 1 or 2), and 15 remain in IARC Class 3, ""Uncertain."" The database is available at http://brca.iarc.fr/LOVD.",2011-11-03 +21786137,Prediction of protein-protein interactions between Ralstonia solanacearum and Arabidopsis thaliana.,"Ralstonia solanacearum is a devastating bacterial pathogen that has an unusually wide host range. R. solanacearum, together with Arabidopsis thaliana, has become a model system for studying the molecular basis of plant-pathogen interactions. Protein-protein interactions (PPIs) play a critical role in the infection process, and some PPIs can initiate a plant defense response. However, experimental investigations have rarely addressed such PPIs. Using two computational methods, the interolog and the domain-based methods, we predicted 3,074 potential PPIs between 119 R. solanacearum and 1,442 A. thaliana proteins. Interestingly, we found that the potential pathogen-targeted proteins are more important in the A. thaliana PPI network. To facilitate further studies, all predicted PPI data were compiled into a database server called PPIRA (http://protein.cau.edu.cn/ppira/). We hope that our work will provide new insights for future research addressing the pathogenesis of R. solanacearum.",2011-07-24 +26247233,MET-XAlign: a metabolite cross-alignment tool for LC/MS-based comparative metabolomics.,"Liquid chromatography/mass spectrometry (LC/MS) metabolite profiling has been widely used in comparative metabolomics studies; however, LC/MS-based comparative metabolomics currently faces several critical challenges. One of the greatest challenges is how to effectively align metabolites across different LC/MS profiles; a single metabolite can give rise to multiple peak features, and the grouped peak features that can be used to construct a spectrum pattern of single metabolite can vary greatly between biochemical experiments and even between instrument runs. Another major challenge is that the observed retention time for a single metabolite can also be significantly affected by experimental conditions. To overcome these two key challenges, we present a novel metabolite-based alignment approach entitled MET-XAlign to align metabolites across LC/MS metabolomics profiles. MET-XAlign takes the deduced molecular mass and estimated compound retention time information that can be extracted by our previously published tool, MET-COFEA, and aligns metabolites based on this information. We demonstrate that MET-XAlign is able to cross-align metabolite compounds, either known or unknown, in LC/MS profiles not only across different samples but also across different biological experiments and different electrospray ionization modes. Therefore, our proposed metabolite-based cross-alignment approach is a great step forward and its implementation, MET-XAlign, is a very useful tool in LC/MS-based comparative metabolomics. MET-XAlign has been successfully implemented with core algorithm coding in C++, making it very efficient, and visualization interface coding in the Microsoft.NET Framework. The MET-XAlign software along with demonstrative data is freely available at http://bioinfo.noble.org/manuscript-support/met-xalign/ .",2015-08-31 +22073276,Disordered patterns in clustered Protein Data Bank and in eukaryotic and bacterial proteomes.,"We have constructed the clustered Protein Data Bank and obtained clusters of chains of different identity inside each cluster, http://bioinfo.protres.ru/st_pdb/. We have compiled the largest database of disordered patterns (141) from the clustered PDB where identity between chains inside of a cluster is larger or equal to 75% (version of 28 June 2010) by using simple rules of selection. The results of these analyses would help to further our understanding of the physicochemical and structural determinants of intrinsically disordered regions that serve as molecular recognition elements. We have analyzed the occurrence of the selected patterns in 97 eukaryotic and in 26 bacterial proteomes. The disordered patterns appear more often in eukaryotic than in bacterial proteomes. The matrix of correlation coefficients between numbers of proteins where a disordered pattern from the library of 141 disordered patterns appears at least once in 9 kingdoms of eukaryota and 5 phyla of bacteria have been calculated. As a rule, the correlation coefficients are higher inside of the considered kingdom than between them. The patterns with the frequent occurrence in proteomes have low complexity (PPPPP, GGGGG, EEEED, HHHH, KKKKK, SSTSS, QQQQQP), and the type of patterns vary across different proteomes, http://bioinfo.protres.ru/fp/search_new_pattern.html.",2011-11-04 +21799897,SEAS: a system for SEED-based pathway enrichment analysis.,"Pathway enrichment analysis represents a key technique for analyzing high-throughput omic data, and it can help to link individual genes or proteins found to be differentially expressed under specific conditions to well-understood biological pathways. We present here a computational tool, SEAS, for pathway enrichment analysis over a given set of genes in a specified organism against the pathways (or subsystems) in the SEED database, a popular pathway database for bacteria. SEAS maps a given set of genes of a bacterium to pathway genes covered by SEED through gene ID and/or orthology mapping, and then calculates the statistical significance of the enrichment of each relevant SEED pathway by the mapped genes. Our evaluation of SEAS indicates that the program provides highly reliable pathway mapping results and identifies more organism-specific pathways than similar existing programs. SEAS is publicly released under the GPL license agreement and freely available at http://csbl.bmb.uga.edu/~xizeng/research/seas/.",2011-07-22 +26324317,[Current state of medical care of polytrauma and mass casualty incidents in Germany. Are we well-prepared?].,"The white paper on the medical care of the severely injured published in 2006 is a collection of proposals and recommendations concerning structure, organization and equipment for the medical care of severely injured patients. Since its publication 50 networks ( http://www.dgu-traumanetzwerk.de/index ) have been established as part of the trauma network. This and the trauma register have helped to continuously improve the medical care of severely injured patients since 1993 [26]. Numerous studies have documented the progress made in measures required by the trauma network [4, 6]. For example, the mortality rate of severely injured patients has dropped from 25 % to approximately 10 % in the past 15 years. From the register and network data it is difficult to tell how each of these measures is implemented in the participating hospitals, who provides medical treatment to patients when, and how medical care is organized in detail. This is why a survey on medical care for polytrauma and in mass casualty situations was conducted among medical directors in German surgical hospitals who are members of the German Society for Trauma Surgery (DGU). Thanks to the 211 participants (most of whom specialize in orthopedic and trauma surgery) a detailed description of how medical treatment is currently organized and performed could be acquired. The survey showed that care of patients with polytrauma (i.e. medical treatment and management) is important irrespective of the level of training of physicians and of the level of patient treatment in hospitals. The central role of traumatologists was emphasized not only in terms of actual treatment but also as an administrator for organizational and management matters. Almost all hospitals have plans for a mass casualty situation; however, the levels of preparedness show considerable variation. A highly critical view is taken of the new surgical specialists with respect to interdisciplinary and comprehensive emergency medical treatment and casualty care. The survey also revealed the continual conflict between managing costs and maintaining quality and resources. It gives an overview of patient treatment in the transition from preclinical to clinical care and provides insights into the targets achieved, current problems and conflicts.",2015-10-01 +26446596,Final 3-Year Outcome of a Randomized Trial Comparing Second-Generation Drug-Eluting Stents Using Either Biodegradable Polymer or Durable Polymer: NOBORI Biolimus-Eluting Versus XIENCE/PROMUS Everolimus-Eluting Stent Trial. ,"There is a paucity of data reporting the clinical outcomes of biodegradable polymer biolimus-eluting stent (BP-BES) compared with durable polymer everolimus-eluting stent (DP-EES) beyond 1 year after stent implantation when the polymer is fully degraded. The NOBORI Biolimus-Eluting Versus XIENCE/PROMUS Everolimus-Eluting Stent Trial (NEXT) is a prospective, multicenter, randomized, open-label, noninferiority trial comparing BP-BES with DP-EES in patients scheduled for percutaneous coronary intervention using drug-eluting stent (DES) without any exclusion criteria among 98 participating centers in Japan. The trial was designed to evaluate noninferiority of BP-BES relative to DP-EES in terms of any target-lesion revascularization at 1 year and death or myocardial infarction at 3 years. Between May and October 2011, 3235 patients were randomly assigned to receive either BP-BES (1617 patients) or DP-EES (1618 patients). Complete 3-year follow-up was achieved in 97.6% of patients. At 3 years, the primary safety end point of death or myocardial infarction occurred in 159 patients (9.9%) in the BP-BES group and in 166 patients (10.3%) in the DP-EES group, demonstrating noninferiority of BP-BES relative to DP-EES (P noninferiority<0.0001 and P superiority=0.7). Cumulative incidence of target-lesion revascularization was not significantly different between the 2 groups (7.4% versus 7.1%; P=0.8). By a landmark analysis at 1 year, the cumulative incidences of death or myocardial infarction and target-lesion revascularization were also not significantly different between the 2 groups (4.6% versus 5.2%; P=0.46 and 3.3% versus 2.7%; P=0.39, respectively). Safety and efficacy outcomes of BP-BES were non inferior to those of DP-EES 3 years after stent implantation. URL: http://www.clinicaltrials.gov. Unique identifier: NCT01303640.",2015-10-01 +24803672,STarMir: a web server for prediction of microRNA binding sites.,"STarMir web server predicts microRNA (miRNA) binding sites on a target ribonucleic acid (RNA). STarMir is an implementation of logistic prediction models developed with miRNA binding data from crosslinking immunoprecipitation (CLIP) studies (Liu,C., Mallick, B., Long, D., Rennie, W.A., Wolenc, A., Carmack, C.S. and Ding, Y. (2013). CLIP-based prediction of mammalian microRNA binding sites. Nucleic Acids Res., 41(14), e138). In both intra-dataset and inter-dataset validations, the models showed major improvements over established algorithms in predictions of both seed and seedless sites. General applicability of the models was indicated by good performance in cross-species validations. The input data for STarMir is processed by the web server to perform prediction of miRNA binding sites, compute comprehensive sequence, thermodynamic and target structure features and a logistic probability as a measure of confidence for each predicted site. For each of seed and seedless sites and for all three regions of a mRNA (3' UTR, CDS and 5' UTR), STarMir output includes the computed binding site features, the logistic probability and a publication-quality diagram of the predicted miRNA:target hybrid. The prediction results are available through both an interactive viewer and downloadable text files. As an application module of the Sfold RNA package (http://sfold.wadsworth.org), STarMir is freely available to all at http://sfold.wadsworth.org/starmir.html.",2014-05-06 +25034530,X protein mutations in hepatitis B virus DNA predict postoperative survival in hepatocellular carcinoma.,"Hepatitis B virus (HBV) DNA is prone to mutations because of the proofreading deficiencies of HBV polymerase. The postoperative prognostic value of HBV mutations in HBV X protein (HBx) gene was assessed in HBV associated hepatocellular carcinoma (HCC) patients. The HBx gene was amplified and sequenced, the HBV mutations was identified according to NCBI database ( http://www.ncbi.nlm.nih.gov/genome/5536 ). The relationship between the HBV mutations and HCC survival was compared. Survival curves were generated using the Kaplan-Meier method, and comparisons between the curves were made using the log-rank test. Multivariate survival analysis was performed using a Cox proportional hazards model. After adjusting for clinical characteristics, the following eight mutational sites were identified as statistically significant independent predictors of HCC survival: 1383, 1461, 1485, 1544, 1613, 1653, 1719, and 1753. In addition, the following four mutational sites were identified for their association with survival at a border-line significance level: 1527, 1637, 1674, and 1762/1764. A total of 12 mutations in HBx gene region were identified as independent predictors of postoperative survival in HCC patients. The analysis of HBV DNA mutations may help identify patient subgroups with poor prognosis and may help refine therapeutic decisions regarding HCC patients.",2014-07-19 +25956929,Maximizing antimalarial efficacy and the importance of dosing strategies.,"Artemisinin-based combination therapies (ACTs) are the cornerstone for the treatment of malaria. However, confirmed resistance to artemisinins in South-East Asia, and reports of reduced efficacy of ACTs raise major concerns for malaria treatment and control. Without new drugs to replace artemisinins, it is essential to define dosing strategies that maximize therapeutic efficacy, limit the spread of resistance, and preserve the clinical value of ACTs. It is important to determine the extent to which reduced efficacy of ACTs reflects true resistance versus sub-optimal dosing, and quantify other factors that determine treatment failure. Pooled analyses of individual patient data from multiple clinical trials, by investigators in the Worldwide Antimalarial Resistance Network, have shown high overall efficacy for three widely used ACTs, artemether-lumefantrine, artesunate-amodiaquine, and dihydroartemisinin-piperaquine. Analyses also highlight that suboptimal dosing leads to increased risk of treatment failure, especially among children. In the most recent study, an analysis of clinical trials of artesunate-amodiaquine, widely used among children in Africa, revealed a superior efficacy for fixed-dose combination tablets compared to loose non-fixed dose combinations. This highlights the benefits of fixed-dose combinations as a practical strategy for ensuring optimal antimalarial dosing and maximizing efficacy. Please see related article: http://www.biomedcentral.com/1741-7015/13/66.",2015-05-09 +21471017,Comprehensive and relaxed search for oligonucleotide signatures in hierarchically clustered sequence datasets.,"

Motivation

PCR, hybridization, DNA sequencing and other important methods in molecular diagnostics rely on both sequence-specific and sequence group-specific oligonucleotide primers and probes. Their design depends on the identification of oligonucleotide signatures in whole genome or marker gene sequences. Although genome and gene databases are generally available and regularly updated, collections of valuable signatures are rare. Even for single requests, the search for signatures becomes computationally expensive when working with large collections of target (and non-target) sequences. Moreover, with growing dataset sizes, the chance of finding exact group-matching signatures decreases, necessitating the application of relaxed search methods. The resultant substantial increase in complexity is exacerbated by the dearth of algorithms able to solve these problems efficiently.

Results

We have developed CaSSiS, a fast and scalable method for computing comprehensive collections of sequence- and sequence group-specific oligonucleotide signatures from large sets of hierarchically clustered nucleic acid sequence data. Based on the ARB Positional Tree (PT-)Server and a newly developed BGRT data structure, CaSSiS not only determines sequence-specific signatures and perfect group-covering signatures for every node within the cluster (i.e. target groups), but also signatures with maximal group coverage (sensitivity) within a user-defined range of non-target hits (specificity) for groups lacking a perfect common signature. An upper limit of tolerated mismatches within the target group, as well as the minimum number of mismatches with non-target sequences, can be predefined. Test runs with one of the largest phylogenetic gene sequence datasets available indicate good runtime and memory performance, and in silico spot tests have shown the usefulness of the resulting signature sequences as blueprints for group-specific oligonucleotide probes.

Availability

Software and Supplementary Material are available at http://cassis.in.tum.de/.",2011-04-05 +23289815,ASAP: an environment for automated preprocessing of sequencing data.,"

Background

Next-generation sequencing (NGS) has yielded an unprecedented amount of data for genetics research. It is a daunting task to process the data from raw sequence reads to variant calls and manually processing this data can significantly delay downstream analysis and increase the possibility for human error. The research community has produced tools to properly prepare sequence data for analysis and established guidelines on how to apply those tools to achieve the best results, however, existing pipeline programs to automate the process through its entirety are either inaccessible to investigators, or web-based and require a certain amount of administrative expertise to set up.

Findings

Advanced Sequence Automated Pipeline (ASAP) was developed to provide a framework for automating the translation of sequencing data into annotated variant calls with the goal of minimizing user involvement without the need for dedicated hardware or administrative rights. ASAP works both on computer clusters and on standalone machines with minimal human involvement and maintains high data integrity, while allowing complete control over the configuration of its component programs. It offers an easy-to-use interface for submitting and tracking jobs as well as resuming failed jobs. It also provides tools for quality checking and for dividing jobs into pieces for maximum throughput.

Conclusions

ASAP provides an environment for building an automated pipeline for NGS data preprocessing. This environment is flexible for use and future development. It is freely available at http://biostat.mc.vanderbilt.edu/ASAP.",2013-01-04 +25963830,A web portal for in-silico action potential predictions.,"

Introduction

Multiple cardiac ion channels are prone to block by pharmaceutical compounds, and this can have large implications for cardiac safety. The effect of a compound on individual ion currents can now be measured in automated patch clamp screening assays. In-silico action potential models are proposed as one way of predicting the integrated compound effects on whole-cell electrophysiology, to provide an improved indication of pro-arrhythmic risk.

Methods

We have developed open source software to run cardiac electrophysiology simulations to predict the overall effect of compounds that block IKr, ICaL, INa, IKs, IK1 and Ito to varying degrees, using a choice of mathematical electrophysiology models. To enable safety pharmacology teams to run and evaluate these simulations easily, we have also developed an open source web portal interface to this simulator.

Results

The web portal can be found at https://chaste.cs.ox.ac.uk/ActionPotential. Users can enter details of compound affinities for ion channels in the form of IC50 or pIC50 values, run simulations, store the results for later retrieval, view summary graphs of the results, and export data to a spreadsheet format.

Discussion

This web portal provides a simple interface to reference versions of mathematical models, and well-tested state-of-the-art equation solvers. It provides safety teams easy access to the emerging technology of cardiac electrophysiology simulations for use in the drug-discovery process.",2015-05-09 +24922310,CGBayesNets: conditional Gaussian Bayesian network learning and inference with mixed discrete and continuous data.,"Bayesian Networks (BN) have been a popular predictive modeling formalism in bioinformatics, but their application in modern genomics has been slowed by an inability to cleanly handle domains with mixed discrete and continuous variables. Existing free BN software packages either discretize continuous variables, which can lead to information loss, or do not include inference routines, which makes prediction with the BN impossible. We present CGBayesNets, a BN package focused around prediction of a clinical phenotype from mixed discrete and continuous variables, which fills these gaps. CGBayesNets implements Bayesian likelihood and inference algorithms for the conditional Gaussian Bayesian network (CGBNs) formalism, one appropriate for predicting an outcome of interest from, e.g., multimodal genomic data. We provide four different network learning algorithms, each making a different tradeoff between computational cost and network likelihood. CGBayesNets provides a full suite of functions for model exploration and verification, including cross validation, bootstrapping, and AUC manipulation. We highlight several results obtained previously with CGBayesNets, including predictive models of wood properties from tree genomics, leukemia subtype classification from mixed genomic data, and robust prediction of intensive care unit mortality outcomes from metabolomic profiles. We also provide detailed example analysis on public metabolomic and gene expression datasets. CGBayesNets is implemented in MATLAB and available as MATLAB source code, under an Open Source license and anonymous download at http://www.cgbayesnets.com.",2014-06-12 +26208538,What do patients say about emergency departments in online reviews? A qualitative study.,"

Background

Patients have adopted web-based tools to report on the quality of their healthcare experiences. We seek to examine online reviews for US emergency departments (EDs) posted on Yelp, a popular consumer ratings website.

Methods

We conducted a qualitative analysis of unstructured, publicly accessible reviews for hospitals available on http://www.yelp.com. We collected all reviews describing experiences of ED care for a stratified random sample of 100 US hospitals. We analysed the content of the reviews using themes derived from the Hospital Consumer Assessment of Healthcare Providers and Systems (HCAHPS) inpatient care survey. We also used modified grounded theory to iteratively code the text of the reviews, identifying additional themes specific to emergency care. The data were double-coded, and discrepancies were evaluated to ensure consensus.

Results

Of the 1736 total reviews, 573 (33%) described patient experiences involving the ED. The reviews contained several themes assessed by the HCAHPS survey, including communication with nurses, communication with doctors, and pain control. The reviews also contained key themes specific to emergency care: waiting and efficiency; decisions to seek care in the ED; and events following discharge, including administrative difficulties.

Conclusions

These exploratory findings suggest that online reviews for EDs contain similar themes to survey-based assessments of inpatient hospital care as well as themes specific to emergency care. Consumer rating websites allow patients to provide rapid and public feedback on their experience of medical care. Web-based platforms may offer a novel strategy for assessing patient-centred quality in emergency care.",2015-07-24 +25802408,Comparison of splice sites reveals that long noncoding RNAs are evolutionarily well conserved.,"Large-scale RNA sequencing has revealed a large number of long mRNA-like transcripts (lncRNAs) that do not code for proteins. The evolutionary history of these lncRNAs has been notoriously hard to study systematically due to their low level of sequence conservation that precludes comprehensive homology-based surveys and makes them nearly impossible to align. An increasing number of special cases, however, has been shown to be at least as old as the vertebrate lineage. Here we use the conservation of splice sites to trace the evolution of lncRNAs. We show that >85% of the human GENCODE lncRNAs were already present at the divergence of placental mammals and many hundreds of these RNAs date back even further. Nevertheless, we observe a fast turnover of intron/exon structures. We conclude that lncRNA genes are evolutionary ancient components of vertebrate genomes that show an unexpected and unprecedented evolutionary plasticity. We offer a public web service (http://splicemap.bioinf.uni-leipzig.de) that allows to retrieve sets of orthologous splice sites and to produce overview maps of evolutionarily conserved splice sites for visualization and further analysis. An electronic supplement containing the ncRNA data sets used in this study is available at http://www.bioinf.uni-leipzig.de/publications/supplements/12-001.",2015-03-23 +28295119,Ocular ultraviolet radiation exposure of welders.,"I read with interest a recent paper in your journal by Slagor et al on the risk of cataract in relation to metal arc welding (1). The authors highlight that even though welders are exposed to substantial levels of ultraviolet radiation (UVR), ""no studies have reported data on how much UVR welders' eyes are exposed to during a working day. Thus, we do not know whether welders are more or less exposed to UVR than outdoor workers"" (1, p451). Undertaking accurate exposure assessment of UVR from welding arcs is difficult, however, two studies have reported ocular/facial UVR levels underneath welding helmets (2, 3). In the first paper, UVR levels were measured using polysulphone film dosimeters applied to the cheeks of a patient who suffered from severe facial dermatitis (2). UVR levels of four times the American Conference of Governmental Industrial Hygienists (ACGIH) maximum permissible exposure (MPE) (4) were measured on the workers left cheek and nine times the MPE on the right cheek. The authors concluded that the workers dermatitis was likely to have been due to the UVR exposure received during welding. In the other paper, a comprehensive exposure assessment of personal UVR exposure of workers in a welding environment was reported (3). The study was conducted at a metal fabrication workshop with participants being welders, boilermakers and non-welders (eg, supervisors, fitters, machinists). Polysulphone film dosimeters were again used to measure UVR exposure of the workers, with badges worn on the clothing of workers (in the chest area), on the exterior of welding helmets, attached to 11 locations on the inside of welding helmets, and on the bridge and side-shields of safety spectacles. Dosimeters were also attached to surfaces throughout the workshop to measure ambient UVR levels. For welding subjects, mean 8-hour UVR doses within the welding helmets ranged from around 9 mJ/cm 2(3×MPE) on the inside of the helmets to around 15 mJ/cm 2(5×MPE) on the headband (a location to approximate ocular exposure). UVR exposures for non-welding workers were also quite substantial, with mean 8-hour doses on the outside of safety spectacles being around 36 mJ/cm2 (12×MPE) on the bridge and around 27 mJ/cm2 (9×MPE) on the sides. Exposures measured on the outside of clothing was substantial (eg, mean 8-hour UVR dose for welders was around 9795 mJ/cm 2(3265 ×MPE), with mean ambient UVR levels of 16.4 mJ/cm 2(5.5×MPE). The high ambient and ""body"" exposures measured in the study by Tenkate & Collins (3) are not unexpected, however, the levels measured within the welding helmets are of concern considering this represents UVR that has penetrated or by-passed standard protection measures. It has been shown that UVR is able to infiltrate welding helmets by entering from the back and through the sides and top (5). This type of infiltration is likely to occur when welders are in close proximity to each other and the welder is receiving exposure from other welder's emissions. In addition, facial/ocular exposure is also likely to occur when welders flip-up their welding helmet and undertake other work (eg, set-up, handle materials etc). For many welders, the proportion of their welding time per day compared to these other activities has been measured at around 20% (6), which means that if welders flip-up their helmet and leave it flipped-up, their eyes and faces are directly exposed to ambient UVR and that of nearby welders for a large part of the work day. Wearing safety spectacles underneath welding helmets is a recommended practice (7), however, workers report wearing safety spectacles, particularly underneath welding helmets, is uncomfortable, with visibility impacted due to fogging and sweat (8, 9). The use of auto-darkening helmets is one solution to eliminating the practice of flipping-up the helmet. The Slagor et al paper (1, p451) also states that ""we do not know whether welders are more or less exposed to UVR than outdoor workers"", with reference made to the average solar UVR exposure of a Danish outdoor worker being 22 400 J/m 2per year (min-max 5400 - 66 900 J/m 2per year) (10). For comparison, taking the mean 8-hour UVR dose within the welding helmets as 15 mJ/cm2 (3), this would equate to an annual ocular/facial UVR dose for welders of 37 500 J/m 2(at 5 days/week, 50 weeks/year). Even though this value is weighted for the ACGIH action spectrum, and the value for the Danish outdoor workers is weighted for the erythema spectrum, it provides a reasonable comparison and indicates that welders are likely to receive comparable facial/ocular UVR doses to outdoor workers. Slagor et al also state that ""it is inferred that welders are not exposed to large amounts of UVR during their work life, in spite of the photokeratoconjunctivitis incidents"" (1, p451). I would propose that the UVR dosimetry studies described above (2, 3), taken together with studies on UVR emissions of welding arcs which show that the MPE for many welding arcs can be exceeded in a matter of seconds (11-13), indicate that welders do work in an extreme UVR environment. These studies also suggest that welders are regularly exposed to levels of UVR that exceed the occupational exposure limits at body sites which are thought to be protected (eg, face and eyes) (2, 3). When these exposures are further considered in light of the range of eye conditions reported to occur in welders (14-17), the importance of implementing a comprehensive eye safety strategy for welders and all workers in a welding environment is imperative. References 1. Slagor RM, La Cour M, Bonde JP. The risk of cataract in relation to metal arc welding. Scan J Work Environ Health. 2016;42(5):447-53. https://doi.org/10.5271/sjweh.3572.  2. Shehade SA, Roberts PJ, Diffey BF, Foulds IS. Photodermatitis due to spot welding. Br J Dermatol. 1987;117:117-9. https://doi.org/10.1111/j.1365-2133.1987.tb04100.x.  3. Tenkate TD, Collins MJ. Personal ultraviolet radiation exposure of workers in a welding environment. Am Indust Hyg Assoc J. 1997;58:33-8. https://doi.org/10.1080/15428119791013053.  4. ACGIH. Ultraviolet radiation in: TLVs and BEIs. American Conference of Governmental Industrial Hygienists. Cincinnati; 2016. p. 153-8.  5. Tenkate TSD and Collins MJ. Angles of entry of ultraviolet radiation into welding helmets. Am Indust Hyg Assoc J, 1997; 58:54-6. https://doi.org/10.1080/15428119791013099.  6. Tenkate T. Welding arc time and UV exposure: implications for worker safety. J Occup Health Safety-Aust NZ. 2008;24(2):161-6.  7. ANSI Z49.1:2012. Safety in Welding, Cutting, and Allied Processes. American Welding Society: Miami; 2012.  8. Lombardi DA, Verma SK, Brennan MJ, Perry MJ. Factors influencing worker use of personal protective eyewear. Accident Analysis and Prevention. 2009;41:755-62. https://doi.org/10.1016/j.aap.2009.03.017.  9. Tenkate TD. Optical radiation hazards of welding arcs. Rev Environ Health. 1998;13(3):131-46. https://doi.org/10.1515/REVEH.1998.13.3.131.  10. Thieden E, Philipsen PA, Heydenreich J, Wulf HC. UV radiation exposure related to age, sex, occupation, and sun behavior based on time-stamped personal dosimeter readings. Arch Dermatol. 2004;140:197-203. https://doi.org/10.1001/archderm.140.2.197.  11. Gourzoulidis GA, Achtipis A, Topalis FV, Kazasidis ME, Pantelis D, Markoulis A. Artificial optical radiation photobiological hazards in arc welding. Physica Medica. 2016;32:981-6. https://doi.org/10.1016/j.ejmp.2016.07.001.  12. Mariutti G, Matzeu M. Measurement of ultraviolet radiation emitted from welding arcs. Health Physics. 1988;54(5):529-32. https://doi.org/10.1097/00004032-198805000-00004.  13. Okuno T. Measurement of ultraviolet radiation from welding arcs. Industrial Health. 1987; 25:147-56. https://doi.org/10.2486/indhealth.25.147. 14. Zlateva V, Toncheva R, Andreev A. Epidemiological studies on occupational eye pathology. Eur. J. Ophthalmol. 1996;6(4):440-5.  15. Lombardi DA, Pannala R, Sorock GS, Wellman H, Courtney TK, Verma S, Smith GS. Welding related occupational eye injuries: a narrative analysis. Injury Prevention. 2005;1:174-9. https://doi.org/10.1136/ip.2004.007088.  16. Tenkate T, Collins MJ. A survey of symptoms and eye safety practices among welders. Clin Exp Optom. 1990;73(3):79-85. https://doi.org/10.1111/j.1444-0938.1990.tb03107.x.  17. Shah CP, Weis E, Lajous M, Shields JA, Shields CL. Intermittent and chronic ultraviolet light exposure and uveal melanoma: a meta-analysis. Opthalmology. 2005;112:1599-607. https://doi.org/10.1016/j.ophtha.2005.04.020.",2017-03-15 +24991975,sc-PDB-Frag: a database of protein-ligand interaction patterns for Bioisosteric replacements.,"Bioisosteric replacement plays an important role in medicinal chemistry by keeping the biological activity of a molecule while changing either its core scaffold or substituents, thereby facilitating lead optimization and patenting. Bioisosteres are classically chosen in order to keep the main pharmacophoric moieties of the substructure to replace. However, notably when changing a scaffold, no attention is usually paid as whether all atoms of the reference scaffold are equally important for binding to the desired target. We herewith propose a novel database for bioisosteric replacement (scPDBFrag), capitalizing on our recently published structure-based approach to scaffold hopping, focusing on interaction pattern graphs. Protein-bound ligands are first fragmented and the interaction of the corresponding fragments with their protein environment computed-on-the-fly. Using an in-house developed graph alignment tool, interaction patterns graphs can be compared, aligned, and sorted by decreasing similarity to any reference. In the herein presented sc-PDB-Frag database ( http://bioinfo-pharma.u-strasbg.fr/scPDBFrag ), fragments, interaction patterns, alignments, and pairwise similarity scores have been extracted from the sc-PDB database of 8077 druggable protein-ligand complexes and further stored in a relational database. We herewith present the database, its Web implementation, and procedures for identifying true bioisosteric replacements based on conserved interaction patterns.",2014-07-17 +25873627,Assessing the translational landscape of myogenic differentiation by ribosome profiling.,"The formation of skeletal muscles is associated with drastic changes in protein requirements known to be safeguarded by tight control of gene transcription and mRNA processing. The contribution of regulation of mRNA translation during myogenesis has not been studied so far. We monitored translation during myogenic differentiation of C2C12 myoblasts, using a simplified protocol for ribosome footprint profiling. Comparison of ribosome footprints to total RNA showed that gene expression is mostly regulated at the transcriptional level. However, a subset of transcripts, enriched for mRNAs encoding for ribosomal proteins, was regulated at the level of translation. Enrichment was also found for specific pathways known to regulate muscle biology. We developed a dedicated pipeline to identify translation initiation sites (TISs) and discovered 5333 unannotated TISs, providing a catalog of upstream and alternative open reading frames used during myogenesis. We identified 298 transcripts with a significant switch in TIS usage during myogenesis, which was not explained by alternative promoter usage, as profiled by DeepCAGE. Also these transcripts were enriched for ribosomal protein genes. This study demonstrates that differential mRNA translation controls protein expression of specific subsets of genes during myogenesis. Experimental protocols, analytical workflows, tools and data are available through public repositories (http://lumc.github.io/ribosome-profiling-analysis-framework/).",2015-04-14 +26202141,Cryopreserved Subcutaneous Adipose Tissue for Fat Graft.,"

Unlabelled

Cryopreservation of subcutaneous white adipose tissue (sWAT) avoids multiple surgeries in patients subjected to reconstructive procedure. Fat grafts were performed subcutaneously on 26 mice treated with fresh (13 mice) or cryopreserved (13 mice) human sWAT. Cytofluorometry for CD marker expression of stem cells, differentiation capability, and in vivo survival of fat grafts were evaluated. In vitro analysis evidenced that cryopreservation did not affect the stem potential of samples. In vivo MRI showed that grafts were well preserved in 13 mice treated with fresh sWAT, whereas in 13 animals treated with thawed fat, graft volumes were strongly reduced after 1 week. Ultrastructural studies performed both on fresh and thawed specimens demonstrated that grafts performed with thawed sWAT are able to store lipids more slowly with respect to grafts performed with fresh sWAT and adipocytes maintained a multilocular appearance. Collected data demonstrated that the protocol of cryopreservation could maintain the regenerative capability of the sWAT, but the rate of reabsorption after fat grafting is higher using cryopreserved sWAT. Maintaining the stem potential of sWAT after cryopreservation is a very important aspect for reconstructive and regenerative medicine. The employment of cryopreserved sWAT represents an interesting goal for surgeons. Surely there is the necessity to improve the protocol of cryopreservation.

No level assigned

This journal requires that authors assign a level of evidence to each submission to which Evidence-Based Medicine rankings are applicable. This excludes Review Articles, Book Reviews, and manuscripts that concern Basic Science, Animal Studies, Cadaver Studies, and Experimental Studies. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266 .",2015-07-23 +21816034,The volatile compound BinBase mass spectral database.,"

Background

Volatile compounds comprise diverse chemical groups with wide-ranging sources and functions. These compounds originate from major pathways of secondary metabolism in many organisms and play essential roles in chemical ecology in both plant and animal kingdoms. In past decades, sampling methods and instrumentation for the analysis of complex volatile mixtures have improved; however, design and implementation of database tools to process and store the complex datasets have lagged behind.

Description

The volatile compound BinBase (vocBinBase) is an automated peak annotation and database system developed for the analysis of GC-TOF-MS data derived from complex volatile mixtures. The vocBinBase DB is an extension of the previously reported metabolite BinBase software developed to track and identify derivatized metabolites. The BinBase algorithm uses deconvoluted spectra and peak metadata (retention index, unique ion, spectral similarity, peak signal-to-noise ratio, and peak purity) from the Leco ChromaTOF software, and annotates peaks using a multi-tiered filtering system with stringent thresholds. The vocBinBase algorithm assigns the identity of compounds existing in the database. Volatile compound assignments are supported by the Adams mass spectral-retention index library, which contains over 2,000 plant-derived volatile compounds. Novel molecules that are not found within vocBinBase are automatically added using strict mass spectral and experimental criteria. Users obtain fully annotated data sheets with quantitative information for all volatile compounds for studies that may consist of thousands of chromatograms. The vocBinBase database may also be queried across different studies, comprising currently 1,537 unique mass spectra generated from 1.7 million deconvoluted mass spectra of 3,435 samples (18 species). Mass spectra with retention indices and volatile profiles are available as free download under the CC-BY agreement (http://vocbinbase.fiehnlab.ucdavis.edu).

Conclusions

The BinBase database algorithms have been successfully modified to allow for tracking and identification of volatile compounds in complex mixtures. The database is capable of annotating large datasets (hundreds to thousands of samples) and is well-suited for between-study comparisons such as chemotaxonomy investigations. This novel volatile compound database tool is applicable to research fields spanning chemical ecology to human health. The BinBase source code is freely available at http://binbase.sourceforge.net/ under the LGPL 2.0 license agreement.",2011-08-04 +26202612,The NEWMEDS rodent touchscreen test battery for cognition relevant to schizophrenia.,"

Rationale

The NEWMEDS initiative (Novel Methods leading to New Medications in Depression and Schizophrenia, http://www.newmeds-europe.com ) is a large industrial-academic collaborative project aimed at developing new methods for drug discovery for schizophrenia. As part of this project, Work package 2 (WP02) has developed and validated a comprehensive battery of novel touchscreen tasks for rats and mice for assessing cognitive domains relevant to schizophrenia.

Objectives

This article provides a review of the touchscreen battery of tasks for rats and mice for assessing cognitive domains relevant to schizophrenia and highlights validation data presented in several primary articles in this issue and elsewhere.

Methods

The battery consists of the five-choice serial reaction time task and a novel rodent continuous performance task for measuring attention, a three-stimulus visual reversal and the serial visual reversal task for measuring cognitive flexibility, novel non-matching to sample-based tasks for measuring spatial working memory and paired-associates learning for measuring long-term memory.

Results

The rodent (i.e. both rats and mice) touchscreen operant chamber and battery has high translational value across species due to its emphasis on construct as well as face validity. In addition, it offers cognitive profiling of models of diseases with cognitive symptoms (not limited to schizophrenia) through a battery approach, whereby multiple cognitive constructs can be measured using the same apparatus, enabling comparisons of performance across tasks.

Conclusion

This battery of tests constitutes an extensive tool package for both model characterisation and pre-clinical drug discovery.",2015-07-24 +22555647,AMS 4.0: consensus prediction of post-translational modifications in protein sequences.,"We present here the 2011 update of the AutoMotif Service (AMS 4.0) that predicts the wide selection of 88 different types of the single amino acid post-translational modifications (PTM) in protein sequences. The selection of experimentally confirmed modifications is acquired from the latest UniProt and Phospho.ELM databases for training. The sequence vicinity of each modified residue is represented using amino acids physico-chemical features encoded using high quality indices (HQI) obtaining by automatic clustering of known indices extracted from AAindex database. For each type of the numerical representation, the method builds the ensemble of Multi-Layer Perceptron (MLP) pattern classifiers, each optimising different objectives during the training (for example the recall, precision or area under the ROC curve (AUC)). The consensus is built using brainstorming technology, which combines multi-objective instances of machine learning algorithm, and the data fusion of different training objects representations, in order to boost the overall prediction accuracy of conserved short sequence motifs. The performance of AMS 4.0 is compared with the accuracy of previous versions, which were constructed using single machine learning methods (artificial neural networks, support vector machine). Our software improves the average AUC score of the earlier version by close to 7 % as calculated on the test datasets of all 88 PTM types. Moreover, for the selected most-difficult sequence motifs types it is able to improve the prediction performance by almost 32 %, when compared with previously used single machine learning methods. Summarising, the brainstorming consensus meta-learning methodology on the average boosts the AUC score up to around 89 %, averaged over all 88 PTM types. Detailed results for single machine learning methods and the consensus methodology are also provided, together with the comparison to previously published methods and state-of-the-art software tools. The source code and precompiled binaries of brainstorming tool are available at http://code.google.com/p/automotifserver/ under Apache 2.0 licensing.",2012-05-04 +24386343,BOBA FRET: bootstrap-based analysis of single-molecule FRET data.,"Time-binned single-molecule Förster resonance energy transfer (smFRET) experiments with surface-tethered nucleic acids or proteins permit to follow folding and catalysis of single molecules in real-time. Due to the intrinsically low signal-to-noise ratio (SNR) in smFRET time traces, research over the past years has focused on the development of new methods to extract discrete states (conformations) from noisy data. However, limited observation time typically leads to pronounced cross-sample variability, i.e., single molecules display differences in the relative population of states and the corresponding conversion rates. Quantification of cross-sample variability is necessary to perform statistical testing in order to assess whether changes observed in response to an experimental parameter (metal ion concentration, the presence of a ligand, etc.) are significant. However, such hypothesis testing has been disregarded to date, precluding robust biological interpretation. Here, we address this problem by a bootstrap-based approach to estimate the experimental variability. Simulated time traces are presented to assess the robustness of the algorithm in conjunction with approaches commonly used in thermodynamic and kinetic analysis of time-binned smFRET data. Furthermore, a pair of functionally important sequences derived from the self-cleaving group II intron Sc.ai5γ (d3'EBS1/IBS1) is used as a model system. Through statistical hypothesis testing, divalent metal ions are shown to have a statistically significant effect on both thermodynamic and kinetic aspects of their interaction. The Matlab source code used for analysis (bootstrap-based analysis of smFRET data, BOBA FRET), as well as a graphical user interface, is available via http://www.aci.uzh.ch/rna/.",2013-12-27 +25030066,Meta-analysis of postoperative efficacy in patients receiving chemoradiotherapy followed by surgery for resectable esophageal carcinoma.,"

Background

Many studies have demonstrated that chemoradiotherapy followed by surgery (CRTS) prolongs the 5-year survival rate of resectable esophageal carcinoma patients. However, the effect of CRTS on postoperative complications, local recurrence and distant metastasis remains controversial. We performed a systematic review of the literature and conducted a meta-analysis to assess the postoperative efficacy of CRTS compared with surgery alone (SA).

Methods

Pubmed, Web of Science and the Cochrane library Databases were used to identify published studies between 2000 and 2013 that directly compared CRTS with SA. The pooled relative risk (RR) and its corresponding 95% confidence interval (95% CI) constituted the principal measure of treatment effects. Heterogeneity was assessed by the χ2 and I2 statistic.

Results

The final analysis included 1930 resectable esophageal carcinoma cases from 13 randomized controlled trials (RCTs). Compared with SA, CRTS was associated with significantly decreased postoperative mortality, local recurrence and distant metastasis rates, with RR (95% CI) = 0.64 (0.49-0.84), 0.53 (0.39-0.73), 0.82 (0.68-0.98); p = 0.001, <0.00001, =0.03, respectively. However, there was no significant difference in postoperative complication incidence between the two groups (RR, 1.09; 95% CI, 0.96-1.24; p = 0.18).

Conclusions

CRTS significantly decreased postoperative mortality, local recurrence and distant metastasis rates compared to SA. Additionally, there were no increased postoperative complications for patients with resectable esophageal carcinoma.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1531519216130950.",2014-07-16 +25202399,DNA repair gene XRCC1 Arg194Trp polymorphism and susceptibility to hepatocellular carcinoma: A meta-analysis.,"The arginine194tryptophan (Arg194Trp) polymorphism in the X-ray repair cross-complementing group 1 (XRCC1) gene has been reported to be associated with hepatocellular carcinoma (HCC), however, the results from previous studies are conflicting. The present study aimed to investigate the association between the XRCC1 Arg194Trp polymorphism and the risk of HCC, using a meta-analysis of previously published studies. PubMed (http://www.ncbi.nlm.nih.gov/pubmed/), Google Scholar (http://scholar.google.co.uk/) and the China National Knowledge Infrastructure databases (http://www.cnki.net/) were systematically searched to identify relevant studies published prior to October 2013. A meta-analysis was performed to examine the association between the Arg194Trp gene polymorphism and the susceptibility to HCC. Odds ratios (ORs) and 95% confidence intervals (95% CIs) were calculated. The meta-analysis consisted of six case-control studies that included 1,451 HCC cases and 1,398 healthy controls. Meta-analysis results based on all the studies showed no significant association between the XRCC1 Arg194Trp gene polymorphism and the risk of HCC (Trp/Trp vs. Arg/Arg: OR, 1.17; 95% CI, 0.89-1.55; Trp/Trp vs. Arg/Trp: OR, 0.94; 95% CI, 0.59-1.51; dominant model: OR, 0.97; 95% CI, 0.63-1.49; recessive model: OR, 1.22; 95% CI, 0.89-1.67). In the subgroup analysis, three studies with sample sizes of >300 produced similar results that indicated that the Arg194Trp gene polymorphism had no association with an increased or decreased risk of HCC. The pooled ORs were not markedly different following the exclusion of two studies deviating from the Hardy-Weinberg equilibrium in the control group, which indicated the reliability of the meta-analysis results. In conclusion, the XRCC1 Arg194Trp polymorphism may not be a risk or protective factor for HCC. Further large and well-designed studies are required to confirm these results.",2014-07-15 +26908244,CERAPP: Collaborative Estrogen Receptor Activity Prediction Project.,"

Background

Humans are exposed to thousands of man-made chemicals in the environment. Some chemicals mimic natural endocrine hormones and, thus, have the potential to be endocrine disruptors. Most of these chemicals have never been tested for their ability to interact with the estrogen receptor (ER). Risk assessors need tools to prioritize chemicals for evaluation in costly in vivo tests, for instance, within the U.S. EPA Endocrine Disruptor Screening Program.

Objectives

We describe a large-scale modeling project called CERAPP (Collaborative Estrogen Receptor Activity Prediction Project) and demonstrate the efficacy of using predictive computational models trained on high-throughput screening data to evaluate thousands of chemicals for ER-related activity and prioritize them for further testing.

Methods

CERAPP combined multiple models developed in collaboration with 17 groups in the United States and Europe to predict ER activity of a common set of 32,464 chemical structures. Quantitative structure-activity relationship models and docking approaches were employed, mostly using a common training set of 1,677 chemical structures provided by the U.S. EPA, to build a total of 40 categorical and 8 continuous models for binding, agonist, and antagonist ER activity. All predictions were evaluated on a set of 7,522 chemicals curated from the literature. To overcome the limitations of single models, a consensus was built by weighting models on scores based on their evaluated accuracies.

Results

Individual model scores ranged from 0.69 to 0.85, showing high prediction reliabilities. Out of the 32,464 chemicals, the consensus model predicted 4,001 chemicals (12.3%) as high priority actives and 6,742 potential actives (20.8%) to be considered for further testing.

Conclusion

This project demonstrated the possibility to screen large libraries of chemicals using a consensus of different in silico approaches. This concept will be applied in future projects related to other end points.

Citation

Mansouri K, Abdelaziz A, Rybacka A, Roncaglioni A, Tropsha A, Varnek A, Zakharov A, Worth A, Richard AM, Grulke CM, Trisciuzzi D, Fourches D, Horvath D, Benfenati E, Muratov E, Wedebye EB, Grisoni F, Mangiatordi GF, Incisivo GM, Hong H, Ng HW, Tetko IV, Balabin I, Kancherla J, Shen J, Burton J, Nicklaus M, Cassotti M, Nikolov NG, Nicolotti O, Andersson PL, Zang Q, Politi R, Beger RD, Todeschini R, Huang R, Farag S, Rosenberg SA, Slavov S, Hu X, Judson RS. 2016.

Cerapp

Collaborative Estrogen Receptor Activity Prediction Project. Environ Health Perspect 124:1023-1033; http://dx.doi.org/10.1289/ehp.1510267.",2016-02-23 +26079349,TPpred3 detects and discriminates mitochondrial and chloroplastic targeting peptides in eukaryotic proteins.,"

Motivation

Molecular recognition of N-terminal targeting peptides is the most common mechanism controlling the import of nuclear-encoded proteins into mitochondria and chloroplasts. When experimental information is lacking, computational methods can annotate targeting peptides, and determine their cleavage sites for characterizing protein localization, function, and mature protein sequences. The problem of discriminating mitochondrial from chloroplastic propeptides is particularly relevant when annotating proteomes of photosynthetic Eukaryotes, endowed with both types of sequences.

Results

Here, we introduce TPpred3, a computational method that given any Eukaryotic protein sequence performs three different tasks: (i) the detection of targeting peptides; (ii) their classification as mitochondrial or chloroplastic and (iii) the precise localization of the cleavage sites in an organelle-specific framework. Our implementation is based on our TPpred previously introduced. Here, we integrate a new N-to-1 Extreme Learning Machine specifically designed for the classification task (ii). For the last task, we introduce an organelle-specific Support Vector Machine that exploits sequence motifs retrieved with an extensive motif-discovery analysis of a large set of mitochondrial and chloroplastic proteins. We show that TPpred3 outperforms the state-of-the-art methods in all the three tasks.

Availability and implementation

The method server and datasets are available at http://tppred3.biocomp.unibo.it.

Contact

gigi@biocomp.unibo.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-16 +23193274,ENCODE data in the UCSC Genome Browser: year 5 update.,"The Encyclopedia of DNA Elements (ENCODE), http://encodeproject.org, has completed its fifth year of scientific collaboration to create a comprehensive catalog of functional elements in the human genome, and its third year of investigations in the mouse genome. Since the last report in this journal, the ENCODE human data repertoire has grown by 898 new experiments (totaling 2886), accompanied by a major integrative analysis. In the mouse genome, results from 404 new experiments became available this year, increasing the total to 583, collected during the course of the project. The University of California, Santa Cruz, makes this data available on the public Genome Browser http://genome.ucsc.edu for visual browsing and data mining. Download of raw and processed data files are all supported. The ENCODE portal provides specialized tools and information about the ENCODE data sets.",2012-11-27 +22417303,Infrequent p53 gene mutation but UV gradient-like p53 protein positivity in keloids.,"Keloids are characterized by extreme fibroblastic overgrowth of unknown pathogenesis after skin injury. Previous studies, mostly in non-Caucasian populations, suggest that p53 mutations may be involved. To substantiate this, we performed DNA sequence analysis of exons 4-8 of the p53 gene and immunohistochemical staining of p53 protein in archived keloidal tissue samples from 23 Caucasian patients. In contrast to previous reports, we found mutated p53 in keloidal tissue in a minority of cases (2/23; 12%). The G allele frequency and C allele frequency at the p53 polymorphic codon 72 were 0.72 (33/46) and 0.28 (13/46), respectively, in our study, a finding that was similar to the 0.77 (184/240) vs. 0.23 (56/240) (P = 0.4580; chi-squared test) observed in the Hap Map data of a European population but statistically significantly different from the 0.43 (547/1258) vs. 0.57 (711/1258) (P = 0.0002; chi-squared test) observed in the 1000 Genome project [Database of Single Nucleotide Polymorphisms (dbSNP). Bethesda (MD): National Center for Biotechnology Information, National Library of Medicine. dbSNP accession:rs1042522, (dbSNP Build ID: 132). Available from: (http://www.ncbi.nlm.nih.gov/SNP/] a difference most likely due to the different genetic background of the populations enrolled. However, one-third of the keloidal samples showed lesional nuclear p53 staining with a UV penetration gradient-like positivity (P ≤ 0.0084). Staining with an anti-cyclobutane pyrimidine dimer antibody revealed the total absence of short-term photoproducts in the epidermis as well as keloidal tissue. Furthermore, all fibroblasts expressing p53 stained negative for Ki-67, indicating that these cells were in a quiescent stage and p53 upregulation did not contribute to keloidal proliferation. We conclude that p53 plays no major role in the pathogenesis of keloids in the Caucasian population.",2012-04-01 +27468732,A cost-effectiveness modelling study of strategies to reduce risk of infection following primary hip replacement based on a systematic review.,"

Background

A deep infection of the surgical site is reported in 0.7% of all cases of total hip arthroplasty (THA). This often leads to revision surgery that is invasive, painful and costly. A range of strategies is employed in NHS hospitals to reduce risk, yet no economic analysis has been undertaken to compare the value for money of competing prevention strategies.

Objectives

To compare the costs and health benefits of strategies that reduce the risk of deep infection following THA in NHS hospitals. To make recommendations to decision-makers about the cost-effectiveness of the alternatives.

Design

The study comprised a systematic review and cost-effectiveness decision analysis.

Setting

77,321 patients who had a primary hip arthroplasty in NHS hospitals in 2012.

Interventions

Nine different treatment strategies including antibiotic prophylaxis, antibiotic-impregnated cement and ventilation systems used in the operating theatre.

Main outcome measures

Change in the number of deep infections, change in the total costs and change in the total health benefits in quality-adjusted life-years (QALYs).

Data sources

Literature searches using MEDLINE, EMBASE, Cumulative Index to Nursing and Allied Health Literature and the Cochrane Central Register of Controlled Trials were undertaken to cover the period 1966-2012 to identify infection prevention strategies. Relevant journals, conference proceedings and bibliographies of retrieved papers were hand-searched. Orthopaedic surgeons and infection prevention experts were also consulted.

Review methods

English-language papers only. The selection of evidence was by two independent reviewers. Studies were included if they were interventions that reported THA-related deep surgical site infection (SSI) as an outcome. Mixed-treatment comparisons were made to produce estimates of the relative effects of competing infection control strategies.

Results

Twelve studies, six randomised controlled trials and six observational studies, involving 123,788 total hip replacements (THRs) and nine infection control strategies, were identified. The quality of the evidence was judged against four categories developed by the National Institute for Health and Care Excellence Methods for Development of NICE Public Health Guidance ( http://publications.nice.org.uk/methods-for-the-development-of-nice-public-health-guidance-third-edition-pmg4 ), accessed March 2012. All evidence was found to fit the two highest categories of 1 and 2. Nine competing infection control interventions [treatments (Ts) 1-9] were used in a cohort simulation model of 77,321 patients who had a primary THR in 2012. Predictions were made for cases of deep infection and total costs, and QALY outcomes. Compared with a baseline of T1 (no systemic antibiotics, plain cement and conventional ventilation) all other treatment strategies reduced risk. T6 was the most effective (systemic antibiotics, antibiotic-impregnated cement and conventional ventilation) and prevented a further 1481 cases of deep infection, and led to the largest annual cost savings and the greatest gains to QALYs. The additional uses of laminar airflow and body exhaust suits indicate higher costs and worse health outcomes.

Conclusions

T6 is an optimal strategy for reducing the risk of SSI following THA. The other strategies that are commonly used among NHS hospitals lead to higher cost and worse QALY outcomes. Policy-makers, therefore, have an opportunity to save resources and improve health outcomes. The effects of laminar air flow and body exhaust suits might be further studied if policy-makers are to consider disinvesting in these technologies.

Limitations

A wide range of evidence sources was synthesised and there is large uncertainty in the conclusions.

Funding

The National Institute for Health Research Health Technology Assessment programme and the Queensland Health Quality Improvement and Enhancement Programme (grant number 2008001769).",2016-07-01 +24856452,MET-COFEA: a liquid chromatography/mass spectrometry data processing platform for metabolite compound feature extraction and annotation.,"In this paper, we present a novel liquid chromatography/mass spectrometry (LC/MS) data processing and analysis platform, MET-COFEA (METabolite COmpound Feature Extraction and Annotation). MET-COFEA detects and clusters chromatographic peak features for each metabolite compound by first comprehensively evaluating retention time and peak shape criteria and then annotating the associations between each peak's observed m/z value with the corresponding metabolite compound's molecular mass. MET-COFEA integrates a series of innovative approaches, including novel mass trace based extracted-ion chromatogram (EIC) extraction, continuous wavelet transform (CWT)-based peak detection, and compound-associated peak clustering and peak annotation algorithms. On the basis of the deduced neutral molecular mass and retention time, we have also developed a new alignment algorithm that uses compound-associated peak groups instead of individual peaks to align the same metabolite compound across samples from different electrospray ionization (ESI) modes, different instruments, even different experimental conditions. MET-COFEA has been systematically tested on a series of LC/MS profiles of mixed standards at different concentrations as well as real untargeted LC/MS plant metabolomics data. We compared the performances of MET-COFEA with the existing publicly available tools at LC/MS peak analysis level and demonstrated its excellent performance in this arena. MET-COFEA is freely available at http://bioinfo.noble.org/manuscript-support/met-cofea/.",2014-06-09 +21965557,QlicRice: a web interface for abiotic stress responsive QTL and loci interaction channels in rice.,"The QlicRice database is designed to host publicly accessible, abiotic stress responsive quantitative trait loci (QTLs) in rice (Oryza sativa) and their corresponding sequenced gene loci. It provides a platform for the data mining of abiotic stress responsive QTLs, as well as browsing and annotating associated traits, their location on a sequenced genome, mapped expressed sequence tags (ESTs) and tissue and growth stage-specific expressions on the whole genome. Information on QTLs related to abiotic stresses and their corresponding loci from a genomic perspective has not yet been integrated on an accessible, user-friendly platform. QlicRice offers client-responsive architecture to retrieve meaningful biological information--integrated and named 'Qlic Search'--embedded in a query phrase autocomplete feature, coupled with multiple search options that include trait names, genes and QTL IDs. A comprehensive physical and genetic map and vital statistics have been provided in a graphical manner for deciphering the position of QTLs on different chromosomes. A convenient and intuitive user interface have been designed to help users retrieve associations to agronomically important QTLs on abiotic stress response in rice. Database URL: http://nabg.iasri.res.in:8080/qlic-rice/.",2011-09-30 +25565328,Insights into the evolution of longevity from the bowhead whale genome.,"The bowhead whale (Balaena mysticetus) is estimated to live over 200 years and is possibly the longest-living mammal. These animals should possess protective molecular adaptations relevant to age-related diseases, particularly cancer. Here, we report the sequencing and comparative analysis of the bowhead whale genome and two transcriptomes from different populations. Our analysis identifies genes under positive selection and bowhead-specific mutations in genes linked to cancer and aging. In addition, we identify gene gain and loss involving genes associated with DNA repair, cell-cycle regulation, cancer, and aging. Our results expand our understanding of the evolution of mammalian longevity and suggest possible players involved in adaptive genetic changes conferring cancer resistance. We also found potentially relevant changes in genes related to additional processes, including thermoregulation, sensory perception, dietary adaptations, and immune response. Our data are made available online (http://www.bowhead-whale.org) to facilitate research in this long-lived species.",2015-01-01 +26672059,"Ambient Temperature and the Risk of Preterm Birth in Guangzhou, China (2001-2011).","

Background

Although effects of weather changes on human health have been widely reported, there is limited information regarding effects on pregnant women in developing countries.

Objective

We investigated the association between maternal exposure to ambient temperature and the risk of preterm birth (< 37 weeks of gestation) in Guangzhou, China.

Methods

We used a Cox proportional hazards model to estimate associations between preterm birth and average temperature during each week of gestation, with weekly temperature modeled as a time-varying exposure during four time windows: 1 week (the last week of the pregnancy), 4 weeks (the last 4 weeks of the pregnancy), late pregnancy (gestational week 20 onward), and the entire pregnancy. Information on singleton vaginal birth between 2001 and 2011 was collected. Daily meteorological data during the same period were obtained from the Guangzhou Meteorological Bureau.

Results

A total of 838,146 singleton vaginal births were included, among which 47,209 (5.6%) were preterm births. High mean temperatures during the 4 weeks, late pregnancy, and the entire pregnancy time windows were associated with an increased risk of preterm birth. Compared with the median temperature (24.4°C), weekly exposures during the last 4 weeks of the pregnancy to extreme cold (7.6°C, the 1st percentile) and extreme heat (31.9°C, the 99th percentile) were associated with 17.9% (95% CI: 10.2, 26.2%) and 10.0% (95% CI: 2.9, 17.6%) increased risks of preterm birth, respectively. The association between extreme heat and preterm birth was stronger for preterm births during weeks 20-31 and 32-34 than those during weeks 35-36.

Conclusions

These findings might have important implications in preventing preterm birth in Guangzhou as well as other areas with similar weather conditions.

Citation

He JR, Liu Y, Xia XY, Ma WJ, Lin HL, Kan HD, Lu JH, Feng Q, Mo WJ, Wang P, Xia HM, Qiu X, Muglia LJ. 2016. Ambient temperature and the risk of preterm birth in Guangzhou, China (2001-2011). Environ Health Perspect 124:1100-1106; http://dx.doi.org/10.1289/ehp.1509778.",2015-12-15 +26781082,WCOACH: Protein complex prediction in weighted PPI networks.,"Protein complexes are aggregates of protein molecules that play important roles in biological processes. Detecting protein complexes from protein-protein interaction (PPI) networks is one of the most challenging problems in computational biology, and many computational methods have been developed to solve this problem. Generally, these methods yield high false positive rates. In this article, a semantic similarity measure between proteins, based on Gene Ontology (GO) structure, is applied to weigh PPI networks. Consequently, one of the well-known methods, COACH, has been improved to be compatible with weighted PPI networks for protein complex detection. The new method, WCOACH, is compared to the COACH, ClusterOne, IPCA, CORE, OH-PIN, HC-PIN and MCODE methods on several PPI networks such as DIP, Krogan, Gavin 2002 and MIPS. WCOACH can be applied as a fast and high-performance algorithm to predict protein complexes in weighted PPI networks. All data and programs are freely available at http://bioinformatics.aut.ac.ir/wcoach.",2015-01-01 +24130465,GINI: from ISH images to gene interaction networks.,"Accurate inference of molecular and functional interactions among genes, especially in multicellular organisms such as Drosophila, often requires statistical analysis of correlations not only between the magnitudes of gene expressions, but also between their temporal-spatial patterns. The ISH (in-situ-hybridization)-based gene expression micro-imaging technology offers an effective approach to perform large-scale spatial-temporal profiling of whole-body mRNA abundance. However, analytical tools for discovering gene interactions from such data remain an open challenge due to various reasons, including difficulties in extracting canonical representations of gene activities from images, and in inference of statistically meaningful networks from such representations. In this paper, we present GINI, a machine learning system for inferring gene interaction networks from Drosophila embryonic ISH images. GINI builds on a computer-vision-inspired vector-space representation of the spatial pattern of gene expression in ISH images, enabled by our recently developed [Formula: see text] system; and a new multi-instance-kernel algorithm that learns a sparse Markov network model, in which, every gene (i.e., node) in the network is represented by a vector-valued spatial pattern rather than a scalar-valued gene intensity as in conventional approaches such as a Gaussian graphical model. By capturing the notion of spatial similarity of gene expression, and at the same time properly taking into account the presence of multiple images per gene via multi-instance kernels, GINI is well-positioned to infer statistically sound, and biologically meaningful gene interaction networks from image data. Using both synthetic data and a small manually curated data set, we demonstrate the effectiveness of our approach in network building. Furthermore, we report results on a large publicly available collection of Drosophila embryonic ISH images from the Berkeley Drosophila Genome Project, where GINI makes novel and interesting predictions of gene interactions. Software for GINI is available at http://sailing.cs.cmu.edu/Drosophila_ISH_images/",2013-10-10 +26063253,Standardized mappings--a framework to combine different semantic mappers into a standardized web-API.,"

Background

Automatic coding of medical terms is an important, but highly complicated and laborious task.

Objectives

To compare and evaluate different strategies a framework with a standardized web-interface was created. Two UMLS mapping strategies are compared to demonstrate the interface.

Methods

The framework is a Java Spring application running on a Tomcat application server. It accepts different parameters and returns results in JSON format. To demonstrate the framework, a list of medical data items was mapped by two different methods: similarity search in a large table of terminology codes versus search in a manually curated repository. These mappings were reviewed by a specialist.

Results

The evaluation shows that the framework is flexible (due to standardized interfaces like HTTP and JSON), performant and reliable. Accuracy of automatically assigned codes is limited (up to 40%).

Conclusion

Combining different semantic mappers into a standardized Web-API is feasible. This framework can be easily enhanced due to its modular design.",2015-01-01 +25930704,Computational identification of protein kinases and kinase-specific substrates in plants.,"The protein phosphorylation catalyzed by protein kinases (PKs) plays an essential role in almost all biological progresses in plants. Thus, the identification of PKs and kinase-specific substrates is fundamental for understanding the regulatory mechanisms of protein phosphorylation especially in controlling plant growth and development. In this chapter, we describe the computational methods and protocols for the identification of PKs and kinase-specific substrates in plants, by using Vitis vinifera as an example. First, the proteome sequences and experimentally identified phosphorylation sites (p-sites) in Vitis vinifera were downloaded. The potential PKs were computationally identified based on preconstructed Hidden Markov Model (HMM) profiles and ortholog searches, whereas the kinase-specific p-sites, or site-specific kinase-substrate relations (ssKSRs) were initially predicted by the software package of Group-based Prediction System (GPS) and further processed by the iGPS algorithm (in vivo GPS) to filter potentially false positive hits. All primary data sets and prediction results of Vitis vinifera are available at: http://ekpd.biocuckoo.org/protocol.php.",2015-01-01 +28138467,"Predicting Future Years of Life, Health, and Functional Ability: A Healthy Life Calculator for Older Adults.","Objective: To create personalized estimates of future health and ability status for older adults. Method: Data came from the Cardiovascular Health Study (CHS), a large longitudinal study. Outcomes included years of life, years of healthy life (based on self-rated health), years of able life (based on activities of daily living), and years of healthy and able life. We developed regression estimates using the demographic and health characteristics that best predicted the four outcomes. Internal and external validity were assessed. Results: A prediction equation based on 11 variables accounted for about 40% of the variability for each outcome. Internal validity was excellent, and external validity was satisfactory. The resulting CHS Healthy Life Calculator (CHSHLC) is available at http://healthylifecalculator.org. Conclusion: CHSHLC provides a well-documented estimate of future years of healthy and able life for older adults, who may use it in planning for the future.",2015-01-01 +23620364,HOMECAT: consensus homologs mapping for interspecific knowledge transfer and functional genomic data integration.,"

Motivation

Comparative studies are encouraged by the fast increase of data availability from the latest high-throughput techniques, in particular from functional genomic studies. Yet, the size of datasets, the challenge of complete orthologs findings and not last, the variety of identification formats, make information integration challenging. With HOMECAT, we aim to facilitate cross-species relationship identification and data mapping, by combining orthology predictions from several publicly available sources, a convenient interface for high-throughput data download and automatic identifier conversion into a Cytoscape plug-in, that provides both an integration with a large set of bioinformatics tools, as well as a user-friendly interface.

Availability

HOMECAT and the Supplementary Materials are freely available at http://www.cbmc.it/homecat/.",2013-04-24 +24906298,Efficient de novo assembly of large and complex genomes by massively parallel sequencing of Fosmid pools.,"

Background

Sampling genomes with Fosmid vectors and sequencing of pooled Fosmid libraries on the Illumina platform for massive parallel sequencing is a novel and promising approach to optimizing the trade-off between sequencing costs and assembly quality.

Results

In order to sequence the genome of Norway spruce, which is of great size and complexity, we developed and applied a new technology based on the massive production, sequencing, and assembly of Fosmid pools (FP). The spruce chromosomes were sampled with ~40,000 bp Fosmid inserts to obtain around two-fold genome coverage, in parallel with traditional whole genome shotgun sequencing (WGS) of haploid and diploid genomes. Compared to the WGS results, the contiguity and quality of the FP assemblies were high, and they allowed us to fill WGS gaps resulting from repeats, low coverage, and allelic differences. The FP contig sets were further merged with WGS data using a novel software package GAM-NGS.

Conclusions

By exploiting FP technology, the first published assembly of a conifer genome was sequenced entirely with massively parallel sequencing. Here we provide a comprehensive report on the different features of the approach and the optimization of the process.We have made public the input data (FASTQ format) for the set of pools used in this study:ftp://congenie.org/congenie/Nystedt_2013/Assembly/ProcessedData/FosmidPools/.(alternatively accessible via http://congenie.org/downloads).The software used for running the assembly process is available at http://research.scilifelab.se/andrej_alexeyenko/downloads/fpools/.",2014-06-06 +24888447,DBAASP: database of antimicrobial activity and structure of peptides.,"The Database of Antimicrobial Activity and Structure of Peptides (DBAASP) is a manually curated database for those peptides for which antimicrobial activity against particular targets has been evaluated experimentally. The database is a depository of complete information on: the chemical structure of peptides; target species; target object of cell; peptide antimicrobial/haemolytic/cytotoxic activities; and experimental conditions at which activities were estimated. The DBAASP search page allows the user to search peptides according to their structural characteristics, complexity type (monomer, dimer and two-peptide), source, synthesis type (ribosomal, nonribosomal and synthetic) and target species. The database prediction algorithm provides a tool for rational design of new antimicrobial peptides. DBAASP is accessible at http://www.biomedicine.org.ge/dbaasp/.",2014-07-10 +23303507,SIBER: systematic identification of bimodally expressed genes using RNAseq data.,"

Motivation

Identification of bimodally expressed genes is an important task, as genes with bimodal expression play important roles in cell differentiation, signalling and disease progression. Several useful algorithms have been developed to identify bimodal genes from microarray data. Currently, no method can deal with data from next-generation sequencing, which is emerging as a replacement technology for microarrays.

Results

We present SIBER (systematic identification of bimodally expressed genes using RNAseq data) for effectively identifying bimodally expressed genes from next-generation RNAseq data. We evaluate several candidate methods for modelling RNAseq count data and compare their performance in identifying bimodal genes through both simulation and real data analysis. We show that the lognormal mixture model performs best in terms of power and robustness under various scenarios. We also compare our method with alternative approaches, including profile analysis using clustering and kurtosis (PACK) and cancer outlier profile analysis (COPA). Our method is robust, powerful, invariant to shifting and scaling, has no blind spots and has a sample-size-free interpretation.

Availability

The R package SIBER is available at the website http://bioinformatics.mdanderson.org/main/OOMPA:Overview.",2013-01-09 +24868199,A sequential Monte Carlo framework for haplotype inference in CNV/SNP genotype data.,"Copy number variations (CNVs) are abundant in the human genome. They have been associated with complex traits in genome-wide association studies (GWAS) and expected to continue playing an important role in identifying the etiology of disease phenotypes. As a result of current high throughput whole-genome single-nucleotide polymorphism (SNP) arrays, we currently have datasets that simultaneously have integer copy numbers in CNV regions as well as SNP genotypes. At the same time, haplotypes that have been shown to offer advantages over genotypes in identifying disease traits even though available for SNP genotypes are largely not available for CNV/SNP data due to insufficient computational tools. We introduce a new framework for inferring haplotypes in CNV/SNP data using a sequential Monte Carlo sampling scheme 'Tree-Based Deterministic Sampling CNV' (TDSCNV). We compare our method with polyHap(v2.0), the only currently available software able to perform inference in CNV/SNP genotypes, on datasets of varying number of markers. We have found that both algorithms show similar accuracy but TDSCNV is an order of magnitude faster while scaling linearly with the number of markers and number of individuals and thus could be the method of choice for haplotype inference in such datasets. Our method is implemented in the TDSCNV package which is available for download at http://www.ee.columbia.edu/~anastas/tdscnv.",2014-04-24 +25452418,Mean of the typical decoding rates: a new translation efficiency index based on the analysis of ribosome profiling data.,"Gene translation modeling and prediction is a fundamental problem that has numerous biomedical implementations. In this work we present a novel, user-friendly tool/index for calculating the mean of the typical decoding rates that enables predicting translation elongation efficiency of protein coding genes for different tissue types, developmental stages, and experimental conditions. The suggested translation efficiency index is based on the analysis of the organism's ribosome profiling data. This index could be used for example to predict changes in translation elongation efficiency of lowly expressed genes that usually have relatively low and/or biased ribosomal densities and protein levels measurements, or can be used for example for predicting translation efficiency of new genetically engineered genes. We demonstrate the usability of this index via the analysis of six organisms in different tissues and developmental stages. Distributable cross platform application and guideline are available for download at: http://www.cs.tau.ac.il/~tamirtul/MTDR/MTDR_Install.html.",2014-12-01 +25741011,Diversifying Selection Analysis Predicts Antigenic Evolution of 2009 Pandemic H1N1 Influenza A Virus in Humans.,"

Unlabelled

Although a large number of immune epitopes have been identified in the influenza A virus (IAV) hemagglutinin (HA) protein using various experimental systems, it is unclear which are involved in protective immunity to natural infection in humans. We developed a data mining approach analyzing natural H1N1 human isolates to identify HA protein regions that may be targeted by the human immune system and can predict the evolution of IAV. We identified 16 amino acid sites experiencing diversifying selection during the evolution of prepandemic seasonal H1N1 strains and found that 11 sites were located in experimentally determined B-cell/antibody (Ab) epitopes, including three distinct neutralizing Caton epitopes: Sa, Sb, and Ca2 [A. J. Caton, G. G. Brownlee, J. W. Yewdell, and W. Gerhard, Cell 31:417-427, 1982, http://dx.doi.org/10.1016/0092-8674(82)90135-0]. We predicted that these diversified epitope regions would be the targets of mutation as the 2009 H1N1 pandemic (pH1N1) lineage evolves in response to the development of population-level protective immunity in humans. Using a chi-squared goodness-of-fit test, we identified 10 amino acid sites that significantly differed between the pH1N1 isolates and isolates from the recent 2012-2013 and 2013-2014 influenza seasons. Three of these sites were located in the same diversified B-cell/Ab epitope regions as identified in the analysis of prepandemic sequences, including Sa and Sb. As predicted, hemagglutination inhibition (HI) assays using human sera from subjects vaccinated with the initial pH1N1 isolate demonstrated reduced reactivity against 2013-2014 isolates. Taken together, these results suggest that diversifying selection analysis can identify key immune epitopes responsible for protective immunity to influenza virus in humans and thereby predict virus evolution.

Importance

The WHO estimates that approximately 5 to 10% of adults and 20 to 30% of children in the world are infected by influenza virus each year. While an adaptive immune response helps eliminate the virus following acute infection, the virus rapidly evolves to evade the established protective memory immune response, thus allowing for the regular seasonal cycles of influenza virus infection. The analytical approach described here, which combines an analysis of diversifying selection with an integration of immune epitope data, has allowed us to identify antigenic regions that contribute to protective immunity and are therefore the key targets of immune evasion by the virus. This information can be used to determine when sequence variations in seasonal influenza virus strains have affected regions responsible for protective immunity in order to decide when new vaccine formulations are warranted.",2015-03-04 +25740981,"Dynamic Interaction of Stress Granules, DDX3X, and IKK-α Mediates Multiple Functions in Hepatitis C Virus Infection.","The ubiquitous ATP-dependent RNA helicase DDX3X is involved in many cellular functions, including innate immunity, and is a pivotal host factor for hepatitis C virus (HCV) infection. Recently, we showed that DDX3X specifically recognizes the HCV 3' untranslated region (UTR), leading to the activation of IKK-α and a cascade of lipogenic signaling to facilitate lipid droplet biogenesis and viral assembly (Q. Li, V. Pene, S. Krishnamurthy, H. Cha, and T. J. Liang, Nat Med 19:722-729, 2013, http://dx.doi.org/10.1038/nm.3190). The interaction of DDX3X with HCV core protein seems to be dispensable for its proviral role. In this study, through systematic imaging and biochemical and virologic approaches, we identified a dynamic association between DDX3X and various cellular compartments and viral elements mediating multiple functions of DDX3X in productive HCV infection. Upon HCV infection, the HCV 3'UTR interacts with DDX3X and IKK-α, which redistribute to speckle-like cytoplasmic structures shown to be stress granules (SGs). As viral proteins accumulate in infected cells, DDX3X granules together with SG-associated proteins redistribute and colocalize with HCV core protein around lipid droplets (LDs). IKK-α, however, does not relocate to the LD but translocates to the nucleus. In HCV-infected cells, various HCV nonstructural proteins also interact or colocalize with DDX3X in close proximity to SGs and LDs, consistent with the tight juxtaposition of the replication complex and the assembly site at the surface of LDs. Short interfering RNA (siRNA)-mediated silencing of DDX3X and multiple SG components markedly inhibits HCV infection. Our data suggest that DDX3X initiates a multifaceted cellular program involving dynamic associations with HCV RNA and proteins, IKK-α, SG, and LD surfaces for its crucial role in the HCV life cycle. IMPORTANCE DDX3X is a proviral host factor for HCV infection. Recently, we showed that DDX3X binds to the HCV 3'UTR, activating IKK-α and cellular lipogenesis to facilitate viral assembly (Q. Li et al., Nat Med 19:722-729, 2013, http://dx.doi.org/10.1038/nm.3190). Here, we report associations of DDX3X with various cellular compartments and viral elements that mediate its multiple functions in the HCV life cycle. Upon infection, the HCV 3'UTR redistributes DDX3X and IKK-α to speckle-like cytoplasmic structures shown to be SGs. Subsequently, interactions between DDX3X, SG, and HCV proteins facilitate the translocation of DDX3X-SG complexes to the LD surface. HCV nonstructural proteins are shown to colocalize with DDX3X in close proximity to SGs and LDs, consistent with the tight juxtaposition of the HCV replication complex and assembly site at the LD surface. Our data demonstrate that DDX3X initiates a multifaceted cellular program involving dynamic associations with HCV elements, IKK-α, SGs, and LDs for its critical role in HCV infection.",2015-03-04 +30011741,State of the art on the initiatives and activities relevant to risk assessment and risk management of nanotechnologies in the food and agriculture sectors.,"The Food and Agriculture Organization of the United Nations (FAO) and World Health Organization (WHO) conducted an international expert meeting on the potential food safety implications of the application of nanotechnologies in the food and agriculture sectors in June 2009. The present report reviews national, regional and international activities on the risk assessment and risk management of nanomaterials in the food and agriculture sectors that have been carried out between 2009 and 2012. The full report of the work is presented in a FAO/WHO paper available at http://www.fao.org/food/food-safety-quality/a-z-index/nano. Information and data have been collected on national and international approaches that identify and implement strategies to address potential hazards associated with the use of nanotechnology-related products or techniques. Selected activities by international governmental and nongovernmental organizations were reviewed and the significant achievements are noted. Meta-analysis of scientific reviews addressing risk assessment of nanotechnologies in the food and agriculture sectors was conducted and key principles for the safety assessment of nanomaterials were identified. It was concluded that although the concepts of potential use of nanomaterials in food and the implied benefits for stakeholders including consumers have not changed significantly since 2009, there are new products being developed and claimed to enter the market and national and international interests in considering the needs for applying regulations on engineered nanomaterials are increasing. The number of published risk assessment of products used in foods that are nanomaterials or contain particles that fall within applicable definitions is growing slowly. Several data gaps with respect to interaction between nanomaterials and food matrices, behaviours of nanomaterials in the human body, methods to determine such interactions and behaviours, and the relevance of such data for risk assessment continue to exist. The international collaboration in the area of nanomaterials and nanotechnology in food and agriculture must be strengthened. International efforts on risk assessment and risk communication may benefit from the experience gained at the national and regional levels. Should a sufficient number of case studies of risk assessment of commercial products become available with time, a review of approaches applied and results obtained could support the development of risk assessment procedures acceptable at the international level.",2014-03-22 +21572886,A database for the predicted pharmacophoric features of medicinal compounds.,"

Unlabelled

Pharmacophore feature is defined by a set of chemical structure patterns having the active site of drug like molecule. Pharmacophore can be used to assist in building hypothesis about desirable chemical properties in drug molecule and hence it can be used to refine and modify drug candidates. We predicted the pharmacophoric features of 150 medicinal compounds from plants for anti-cancer, anti-carcinogenic, anti-diabetic, anti-microbial, and anti-oxidant. Estimation of pharmacophoric feature is necessary to ensure the optimal supramolecular interaction with a biological target and to trigger or block its biological response. We subsequently make this data available to open access using a database at the URL: http://www.hccbif.info/index.htm

Availability

The database is available for free at http://www.hccbif.info/index.htm.",2011-05-07 +23162058,VirusSeq: software to identify viruses and their integration sites using next-generation sequencing of human cancer tissue.,"

Summary

We developed a new algorithmic method, VirusSeq, for detecting known viruses and their integration sites in the human genome using next-generation sequencing data. We evaluated VirusSeq on whole-transcriptome sequencing (RNA-Seq) data of 256 human cancer samples from The Cancer Genome Atlas. Using these data, we showed that VirusSeq accurately detects the known viruses and their integration sites with high sensitivity and specificity. VirusSeq can also perform this function using whole-genome sequencing data of human tissue.

Availability

VirusSeq has been implemented in PERL and is available at http://odin.mdacc.tmc.edu/∼xsu1/VirusSeq.html.

Contact

xsu1@mdanderson.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-17 +24008421,"LC-IMS-MS Feature Finder: detecting multidimensional liquid chromatography, ion mobility and mass spectrometry features in complex datasets.","

Motivation

The addition of ion mobility spectrometry to liquid chromatography-mass spectrometry experiments requires new, or updated, software tools to facilitate data processing.

Results

We introduce a command line software application LC-IMS-MS Feature Finder that searches for molecular ion signatures in multidimensional liquid chromatography-ion mobility spectrometry-mass spectrometry (LC-IMS-MS) data by clustering deisotoped peaks with similar monoisotopic mass, charge state, LC elution time and ion mobility drift time values. The software application includes an algorithm for detecting and quantifying co-eluting chemical species, including species that exist in multiple conformations that may have been separated in the IMS dimension.

Availability

LC-IMS-MS Feature Finder is available as a command-line tool for download at http://omics.pnl.gov/software/LC-IMS-MS_Feature_Finder.php. The Microsoft.NET Framework 4.0 is required to run the software. All other dependencies are included with the software package. Usage of this software is limited to non-profit research to use (see README).

Contact

rds@pnnl.gov.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-09-05 +22009673,ICEberg: a web-based resource for integrative and conjugative elements found in Bacteria.,"ICEberg (http://db-mml.sjtu.edu.cn/ICEberg/) is an integrated database that provides comprehensive information about integrative and conjugative elements (ICEs) found in bacteria. ICEs are conjugative self-transmissible elements that can integrate into and excise from a host chromosome. An ICE contains three typical modules, integration and excision, conjugation, and regulation modules, that collectively promote vertical inheritance and periodic lateral gene flow. Many ICEs carry likely virulence determinants, antibiotic-resistant factors and/or genes coding for other beneficial traits. ICEberg offers a unique, highly organized, readily explorable archive of both predicted and experimentally supported ICE-relevant data. It currently contains details of 428 ICEs found in representatives of 124 bacterial species, and a collection of >400 directly related references. A broad range of similarity search, sequence alignment, genome context browser, phylogenetic and other functional analysis tools are readily accessible via ICEberg. We propose that ICEberg will facilitate efficient, multi-disciplinary and innovative exploration of bacterial ICEs and be of particular interest to researchers in the broad fields of prokaryotic evolution, pathogenesis, biotechnology and metabolism. The ICEberg database will be maintained, updated and improved regularly to ensure its ongoing maximum utility to the research community.",2011-10-18 +26731791,"Genome-Wide Analysis of DNA Methylation and Fine Particulate Matter Air Pollution in Three Study Populations: KORA F3, KORA F4, and the Normative Aging Study.","

Background

Epidemiological studies have reported associations between particulate matter (PM) concentrations and cancer and respiratory and cardiovascular diseases. DNA methylation has been identified as a possible link but so far it has only been analyzed in candidate sites.

Objectives

We studied the association between DNA methylation and short- and mid-term air pollution exposure using genome-wide data and identified potential biological pathways for additional investigation.

Methods

We collected whole blood samples from three independent studies-KORA F3 (2004-2005) and F4 (2006-2008) in Germany, and the Normative Aging Study (1999-2007) in the United States-and measured genome-wide DNA methylation proportions with the Illumina 450k BeadChip. PM concentration was measured daily at fixed monitoring stations and three different trailing averages were considered and regressed against DNA methylation: 2-day, 7-day and 28-day. Meta-analysis was performed to pool the study-specific results.

Results

Random-effect meta-analysis revealed 12 CpG (cytosine-guanine dinucleotide) sites as associated with PM concentration (1 for 2-day average, 1 for 7-day, and 10 for 28-day) at a genome-wide Bonferroni significance level (p ≤ 7.5E-8); 9 out of these 12 sites expressed increased methylation. Through estimation of I2 for homogeneity assessment across the studies, 4 of these sites (annotated in NSMAF, C1orf212, MSGN1, NXN) showed p > 0.05 and I2 < 0.5: the site from the 7-day average results and 3 for the 28-day average. Applying false discovery rate, p-value < 0.05 was observed in 8 and 1,819 additional CpGs at 7- and 28-day average PM2.5 exposure respectively.

Conclusion

The PM-related CpG sites found in our study suggest novel plausible systemic pathways linking ambient PM exposure to adverse health effect through variations in DNA methylation.

Citation

Panni T, Mehta AJ, Schwartz JD, Baccarelli AA, Just AC, Wolf K, Wahl S, Cyrys J, Kunze S, Strauch K, Waldenberger M, Peters A. 2016. A genome-wide analysis of DNA methylation and fine particulate matter air pollution in three study populations: KORA F3, KORA F4, and the Normative Aging Study. Environ Health Perspect 124:983-990; http://dx.doi.org/10.1289/ehp.1509966.",2016-01-05 +25227424,"Correlation of EGFR expression, gene copy number and clinicopathological status in NSCLC.","

Background

Epidermal Growth Factor Receptor (EGFR) targeting therapies are currently of great relevance for the treatment of lung cancer. For this reason, in addition to mutational analysis immunohistochemistry (IHC) of EGFR in lung cancer has been discussed for the decision making of according therapeutic strategies. The aim of this study was to obtain standardization of EGFR-expression methods for the selection of patients who might benefit of EGFR targeting therapies.

Methods

As a starting point of a broad investigation, aimed at elucidating the expression of EGFR on different biological levels, four EGFR specific antibodies were analyzed concerning potential differences in expression levels by Immunohistochemistry (IHC) and correlated with fluorescence in situ hybridization (FISH) analysis and clinicopathological data. 206 tumor tissues were analyzed in a tissue microarray format employing immunohistochemistry with four different antibodies including Dako PharmDx kit (clone 2-18C9), clone 31G7, clone 2.1E1 and clone SP84 using three different scoring methods. Protein expression was compared to FISH utilizing two different probes.

Results

EGFR protein expression determined by IHC with Dako PharmDx kit, clone 31G7 and clone 2.1E1 (p ≤ 0.05) correlated significantly with both FISH probes independently of the three scoring methods; best correlation is shown for 31G7 using the scoring method that defined EGFR positivity when ≥ 10% of the tumor cells show membranous staining of moderate and severe intensity (p=0.001).

Conclusion

Overall, our data show differences in EGFR expression determined by IHC, due to the applied antibody. Highest concordance with FISH is shown for antibody clone 31G7, evaluated with score B (p=0.001). On this account, this antibody clone might by utilized for standard evaluation of EGFR expression by IHC.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_165.",2014-09-17 +25923767,DIDA: Distributed Indexing Dispatched Alignment.,"One essential application in bioinformatics that is affected by the high-throughput sequencing data deluge is the sequence alignment problem, where nucleotide or amino acid sequences are queried against targets to find regions of close similarity. When queries are too many and/or targets are too large, the alignment process becomes computationally challenging. This is usually addressed by preprocessing techniques, where the queries and/or targets are indexed for easy access while searching for matches. When the target is static, such as in an established reference genome, the cost of indexing is amortized by reusing the generated index. However, when the targets are non-static, such as contigs in the intermediate steps of a de novo assembly process, a new index must be computed for each run. To address such scalability problems, we present DIDA, a novel framework that distributes the indexing and alignment tasks into smaller subtasks over a cluster of compute nodes. It provides a workflow beyond the common practice of embarrassingly parallel implementations. DIDA is a cost-effective, scalable and modular framework for the sequence alignment problem in terms of memory usage and runtime. It can be employed in large-scale alignments to draft genomes and intermediate stages of de novo assembly runs. The DIDA source code, sample files and user manual are available through http://www.bcgsc.ca/platform/bioinfo/software/dida. The software is released under the British Columbia Cancer Agency License (BCCA), and is free for academic use.",2015-04-29 +23106911,Future body mass index modelling based on macronutrient profiles and physical activity.,"

Background

An accurate system of determining the relationship of macronutrient profiles of foods and beverages to the long-term weight impacts of foods is necessary for evidence-based, unbiased front-of-the-package food labels.

Methods

Data sets on diet, physical activity, and BMI came from the Food and Agriculture Organization (FAO), the World Health Organization (WHO), the Diabetes Control and Complications Trial (DCCT), and Epidemiology Diabetes Intervention and Complications (EDIC). To predict future BMI of individuals, multiple regression derived FAO/WHO and DCCT/EDIC formulas related macronutrient profiles and physical activity (independent variables) to BMI change/year (dependent variable). Similar formulas without physical activity related macronutrient profiles of individual foods and beverages to four-year weight impacts of those items and compared those forecasts to published food group profiling estimates from three large prospective studies by Harvard nutritional epidemiologists.

Results

FAO/WHO food and beverage formula: four-year weight impact (pounds)=(0.07710 alcohol g+11.95 (381.7+carbohydrates g per serving)*4/(2,613+kilocalories per serving)-304.9 (30.38+dietary fiber g per serving)/(2,613+kilocalories per serving)+19.73 (84.44+total fat g)*9/(2,613+kilocalories per serving)-68.57 (20.45+PUFA g per serving)*9/(2,613+kilocalories per serving))*2.941-12.78 (n=334, R(2)=0.29, P < 0.0001). DCCT/EDIC formula for four-year weight impact (pounds)=(0.898 (102.2+protein g per serving)*4/(2,297+kilocalories per serving)+1.063 (264.2+carbohydrates g per serving)*4/(2,297+ kilocalories per serving)-13.19 (24.29+dietary fiber g per serving)/ (2,297+kilocalories per serving)+ 0.973 (74.59+(total fat g per serving-PUFA g per serving)*9/(2,297+kilocalories per serving))*85.82-68.11 (n=1,055, R(2)=0.03, P < 0.0001). (FAO/WHO+ DCCT/EDIC formula forecasts averaged correlated strongly with published food group profiling findings except for potatoes and dairy foods (n=12, r=0.85, P = 0.0004). Formula predictions did not correlate with food group profiling findings for potatoes and dairy products (n=10, r= -0.33 P=0.36). A formula based diet and exercise analysis tool is available to researchers and individuals: http://thehealtheconomy.com/healthTool/.

Conclusions

Two multiple regression derived formulas from dissimilar databases produced markedly similar estimates of future BMI for 1,055 individuals with type 1 diabetes and female and male cohorts from 167 countries. These formulas predicted the long-term weight impacts of foods and beverages, closely corresponding with most food group profiling estimates from three other databases. If discrepancies with potatoes and dairy products can be resolved, these formulas present a potential basis for a front-of-the-package weight impact rating system.",2012-10-29 +25015987,MAGNA: Maximizing Accuracy in Global Network Alignment.,"

Motivation

Biological network alignment aims to identify similar regions between networks of different species. Existing methods compute node similarities to rapidly identify from possible alignments the high-scoring alignments with respect to the overall node similarity. But, the accuracy of the alignments is then evaluated with some other measure that is different than the node similarity used to construct the alignments. Typically, one measures the amount of conserved edges. Thus, the existing methods align similar nodes between networks hoping to conserve many edges (after the alignment is constructed!).

Results

Instead, we introduce MAGNA to directly 'optimize' edge conservation while the alignment is constructed, without decreasing the quality of node mapping. MAGNA uses a genetic algorithm and our novel function for 'crossover' of two 'parent' alignments into a superior 'child' alignment to simulate a 'population' of alignments that 'evolves' over time; the 'fittest' alignments survive and proceed to the next 'generation', until the alignment accuracy cannot be optimized further. While we optimize our new and superior measure of the amount of conserved edges, MAGNA can optimize any alignment accuracy measure, including a combined measure of both node and edge conservation. In systematic evaluations against state-of-the-art methods (IsoRank, MI-GRAAL and GHOST), on both synthetic networks and real-world biological data, MAGNA outperforms all of the existing methods, in terms of both node and edge conservation as well as both topological and biological alignment accuracy.

Availability

Software: http://nd.edu/∼cone/MAGNA CONTACT: : tmilenko@nd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-10 +22369237,INDUS - a composition-based approach for rapid and accurate taxonomic classification of metagenomic sequences.,"

Background

Taxonomic classification of metagenomic sequences is the first step in metagenomic analysis. Existing taxonomic classification approaches are of two types, similarity-based and composition-based. Similarity-based approaches, though accurate and specific, are extremely slow. Since, metagenomic projects generate millions of sequences, adopting similarity-based approaches becomes virtually infeasible for research groups having modest computational resources. In this study, we present INDUS - a composition-based approach that incorporates the following novel features. First, INDUS discards the 'one genome-one composition' model adopted by existing compositional approaches. Second, INDUS uses 'compositional distance' information for identifying appropriate assignment levels. Third, INDUS incorporates steps that attempt to reduce biases due to database representation.

Results

INDUS is able to rapidly classify sequences in both simulated and real metagenomic sequence data sets with classification efficiency significantly higher than existing composition-based approaches. Although the classification efficiency of INDUS is observed to be comparable to those by similarity-based approaches, the binning time (as compared to alignment based approaches) is 23-33 times lower.

Conclusion

Given it's rapid execution time, and high levels of classification efficiency, INDUS is expected to be of immense interest to researchers working in metagenomics and microbial ecology.

Availability

A web-server for the INDUS algorithm is available at http://metagenomics.atc.tcs.com/INDUS/",2011-11-30 +22136275,Identification and functional characterization of a primate-specific E2F1 binding motif regulating MCPH1 expression.,"MCPH1 (also named BRIT1) is one of the known genes responsible for autosomal recessive primary microcephaly (small head syndrome), suggesting its important role in brain development. The interaction of MCPH1 with transcriptional factors like E2F1 is required for the activation of cell cycle checkpoint, DNA repair and apoptosis. However, the molecular mechanism of MCPH1 regulation is currently unclear. Here, we cloned the human MCPH1 promoter and we identified a novel E2F1 binding motif located in the proximal promoter region of MCPH1. The experiments using electrophoretic mobility shift and promoter assays showed that E2F1 could stimulate MCPH1 transcription by direct binding to the E2F1 motif. Overexpression of E2F1 led to the upregulation of MCPH1 transcription, and knocking down the endogenous E2F1 resulted in the inhibition of the MCPH1 promoter activity. Surprisingly, sequence comparison of vertebrate species suggested that the identified E2F1 binding motif is primate specific, consistent with the previous observation of rapid evolution of MCPH1 protein sequence in primates. We propose that during primate evolution MCPH1 has acquired a novel E2F1 binding motif in its promoter which may act as a parallel mechanism, acting together with the rapid protein sequence changes in primates, and eventually contributed to brain enlargement during primate evolution and human origin. Database The MCPH1 promoter region was sequenced in human, chimpanzee and rhesus macaque. Nucleotide sequence data are available in the GenBank database (http://www.ncbi.nlm.nih.gov/genbank/) under accession numbers JN573214, JN573215 and JN573216.",2012-01-09 +21543339,Methods and strategies for gene structure curation in WormBase.,"The Caenorhabditis elegans genome sequence was published over a decade ago; this was the first published genome of a multi-cellular organism and now the WormBase project has had a decade of experience in curating this genome's sequence and gene structures. In one of its roles as a central repository for nematode biology, WormBase continues to refine the gene structure annotations using sequence similarity and other computational methods, as well as information from the literature- and community-submitted annotations. We describe the various methods of gene structure curation that have been tried by WormBase and the problems associated with each of them. We also describe the current strategy for gene structure curation, and introduce the WormBase 'curation tool', which integrates different data sources in order to identify new and correct gene structures. Database URL: http://www.wormbase.org/.",2011-05-03 +25009129,StemSearch: RNA search tool based on stem identification and indexing.,"The discovery and functional analysis of noncoding RNA (ncRNA) systems in different organisms motivates the development of tools for aiding ncRNA research. Several tools exist that search for occurrences of a given RNA structural profile in genomic sequences. Yet, there is a need for an ""RNA BLAST"" tool, i.e., a tool that takes a putative functional RNA sequence as input, and efficiently searches for similar sequences in genomic databases, taking into consideration potential secondary structure features of the input query sequence. This work aims at providing such a tool. Our tool, denoted StemSearch, is based on a structural representation of an RNA sequence by its potential stems. Potential stems in genomic sequences are identified in a preprocessing stage, and indexed. A user-provided query sequence is likewise processed, and stems from the target genomes that are similar to the query stems are retrieved from the index. Then, relevant genomic regions are identified and ranked according to their similarity to the query stem-set while enforcing conservation of cross-stem topology. Experiments using RFAM families show significantly improved recall for StemSearch over BLAST, with small loss of precision. We further demonstrate our system's capability to handle eukaryotic genomes by successfully searching for members of the 7SK family in chromosome 2 of the human genome. StemSearch is freely available on the web at: http://www.cs.bgu.ac.il/∼negevcb/StemSearch.",2014-07-05 +22434844,"Argo: an integrative, interactive, text mining-based workbench supporting curation.","Curation of biomedical literature is often supported by the automatic analysis of textual content that generally involves a sequence of individual processing components. Text mining (TM) has been used to enhance the process of manual biocuration, but has been focused on specific databases and tasks rather than an environment integrating TM tools into the curation pipeline, catering for a variety of tasks, types of information and applications. Processing components usually come from different sources and often lack interoperability. The well established Unstructured Information Management Architecture is a framework that addresses interoperability by defining common data structures and interfaces. However, most of the efforts are targeted towards software developers and are not suitable for curators, or are otherwise inconvenient to use on a higher level of abstraction. To overcome these issues we introduce Argo, an interoperable, integrative, interactive and collaborative system for text analysis with a convenient graphic user interface to ease the development of processing workflows and boost productivity in labour-intensive manual curation. Robust, scalable text analytics follow a modular approach, adopting component modules for distinct levels of text analysis. The user interface is available entirely through a web browser that saves the user from going through often complicated and platform-dependent installation procedures. Argo comes with a predefined set of processing components commonly used in text analysis, while giving the users the ability to deposit their own components. The system accommodates various areas and levels of user expertise, from TM and computational linguistics to ontology-based curation. One of the key functionalities of Argo is its ability to seamlessly incorporate user-interactive components, such as manual annotation editors, into otherwise completely automatic pipelines. As a use case, we demonstrate the functionality of an in-built manual annotation editor that is well suited for in-text corpus annotation tasks. DATABASE URL: http://www.nactem.ac.uk/Argo.",2012-03-20 +24966516,A novel sequence and context based method for promoter recognition.,"

Unlabelled

Identification of promoters in DNA sequence using computational techniques is a significant research area because of its direct association in transcription regulation. A wide range of algorithms are available for promoter prediction. Most of them are polymerase dependent and cannot handle eukaryotes and prokaryotes alike. This study proposes a polymerase independent algorithm, which can predict whether a given DNA fragment is a promoter or not, based on the sequence features and statistical elements. This algorithm considers all possible pentamers formed from the nucleotides A, C, G, and T along with CpG islands, TATA box, initiator elements, and downstream promoter elements. The highlight of the algorithm is that it is not polymerase specific and can predict for both eukaryotes and prokaryotes in the same computational manner even though the underlying biological mechanisms of promoter recognition differ greatly. The proposed Method, Promoter Prediction System - PPS-CBM achieved a sensitivity, specificity, and accuracy percentages of 75.08, 83.58 and 79.33 on E. coli data set and 86.67, 88.41 and 87.58 on human data set. We have developed a tool based on PPS-CBM, the proposed algorithm, with which multiple sequences of varying lengths can be tested simultaneously and the result is reported in a comprehensive tabular format. The tool also reports the strength of the prediction.

Availability

The tool and source code of PPS-CBM is available at http://keralabs.org.",2014-04-23 +26573665,A fast method for calculating reliable event supports in tree reconciliations via Pareto optimality.,"

Background

Given a gene and a species tree, reconciliation methods attempt to retrieve the macro-evolutionary events that best explain the discrepancies between the two tree topologies. The DTL parsimonious approach searches for a most parsimonious reconciliation between a gene tree and a (dated) species tree, considering four possible macro-evolutionary events (speciation, duplication, transfer, and loss) with specific costs. Unfortunately, many events are erroneously predicted due to errors in the input trees, inappropriate input cost values or because of the existence of several equally parsimonious scenarios. It is thus crucial to provide a measure of the reliability for predicted events. It has been recently proposed that the reliability of an event can be estimated via its frequency in the set of most parsimonious reconciliations obtained using a variety of reasonable input cost vectors. To compute such a support, a straightforward but time-consuming approach is to generate the costs slightly departing from the original ones, independently compute the set of all most parsimonious reconciliations for each vector, and combine these sets a posteriori. Another proposed approach uses Pareto-optimality to partition cost values into regions which induce reconciliations with the same number of DTL events. The support of an event is then defined as its frequency in the set of regions. However, often, the number of regions is not large enough to provide reliable supports.

Results

We present here a method to compute efficiently event supports via a polynomial-sized graph, which can represent all reconciliations for several different costs. Moreover, two methods are proposed to take into account alternative input costs: either explicitly providing an input cost range or allowing a tolerance for the over cost of a reconciliation. Our methods are faster than the region based method, substantially faster than the sampling-costs approach, and have a higher event-prediction accuracy on simulated data.

Conclusions

We propose a new approach to improve the accuracy of event supports for parsimonious reconciliation methods to account for uncertainty in the input costs. Furthermore, because of their speed, our methods can be used on large gene families. Our algorithms are implemented in the ecceTERA program, freely available from http://mbb.univ-montp2.fr/MBB/.",2015-11-14 +23140568,VANTED v2: a framework for systems biology applications.,"

Background

Experimental datasets are becoming larger and increasingly complex, spanning different data domains, thereby expanding the requirements for respective tool support for their analysis. Networks provide a basis for the integration, analysis and visualization of multi-omics experimental datasets.

Results

Here we present VANTED (version 2), a framework for systems biology applications, which comprises a comprehensive set of seven main tasks. These range from network reconstruction, data visualization, integration of various data types, network simulation to data exploration combined with a manifold support of systems biology standards for visualization and data exchange. The offered set of functionalities is instantiated by combining several tasks in order to enable users to view and explore a comprehensive dataset from different perspectives. We describe the system as well as an exemplary workflow.

Conclusions

VANTED is a stand-alone framework which supports scientists during the data analysis and interpretation phase. It is available as a Java open source tool from http://www.vanted.org.",2012-11-10 +24753038,Assessing the goodness of fit of personal risk models.,"We describe a flexible family of tests for evaluating the goodness of fit (calibration) of a pre-specified personal risk model to the outcomes observed in a longitudinal cohort. Such evaluation involves using the risk model to assign each subject an absolute risk of developing the outcome within a given time from cohort entry and comparing subjects' assigned risks with their observed outcomes. This comparison involves several issues. For example, subjects followed only for part of the risk period have unknown outcomes. Moreover, existing tests do not reveal the reasons for poor model fit when it occurs, which can reflect misspecification of the model's hazards for the competing risks of outcome development and death. To address these issues, we extend the model-specified hazards for outcome and death, and use score statistics to test the null hypothesis that the extensions are unnecessary. Simulated cohort data applied to risk models whose outcome and mortality hazards agreed and disagreed with those generating the data show that the tests are sensitive to poor model fit, provide insight into the reasons for poor fit, and accommodate a wide range of model misspecification. We illustrate the methods by examining the calibration of two breast cancer risk models as applied to a cohort of participants in the Breast Cancer Family Registry. The methods can be implemented using the Risk Model Assessment Program, an R package freely available at http://stanford.edu/~ggong/rmap/.",2014-04-22 +26645102,Health Impacts of Climate Change in Pacific Island Countries: A Regional Assessment of Vulnerabilities and Adaptation Priorities.,"

Background

Between 2010 and 2012, the World Health Organization Division of Pacific Technical Support led a regional climate change and health vulnerability assessment and adaptation planning project, in collaboration with health sector partners, in 13 Pacific island countries-Cook Islands, Federated States of Micronesia, Fiji, Kiribati, Marshall Islands, Nauru, Niue, Palau, Samoa, Solomon Islands, Tonga, Tuvalu, and Vanuatu.

Objective

We assessed the vulnerabilities of Pacific island countries to the health impacts of climate change and planned adaptation strategies to minimize such threats to health.

Methods

This assessment involved a combination of quantitative and qualitative techniques. The former included descriptive epidemiology, time series analyses, Poisson regression, and spatial modeling of climate and climate-sensitive disease data, in the few instances where this was possible; the latter included wide stakeholder consultations, iterative consensus building, and expert opinion. Vulnerabilities were ranked using a ""likelihood versus impact"" matrix, and adaptation strategies were prioritized and planned accordingly.

Results

The highest-priority climate-sensitive health risks in Pacific island countries included trauma from extreme weather events, heat-related illnesses, compromised safety and security of water and food, vector-borne diseases, zoonoses, respiratory illnesses, psychosocial ill-health, non-communicable diseases, population pressures, and health system deficiencies. Adaptation strategies relating to these climate change and health risks could be clustered according to categories common to many countries in the Pacific region.

Conclusion

Pacific island countries are among the most vulnerable in the world to the health impacts of climate change. This vulnerability is a function of their unique geographic, demographic, and socioeconomic characteristics combined with their exposure to changing weather patterns associated with climate change, the health risks entailed, and the limited capacity of the countries to manage and adapt in the face of such risks. Citation: McIver L, Kim R, Woodward A, Hales S, Spickett J, Katscherian D, Hashizume M, Honda Y, Kim H, Iddings S, Naicker J, Bambrick H, McMichael AJ, Ebi KL. 2016. Health impacts of climate change in Pacific island countries: a regional assessment of vulnerabilities and adaptation priorities. Environ Health Perspect 124:1707-1714; http://dx.doi.org/10.1289/ehp.1509756.",2015-12-08 +25913879,Phogly-PseAAC: Prediction of lysine phosphoglycerylation in proteins incorporating with position-specific propensity.,"Large-scale characterization of post-translational modifications (PTMs), such as posphorylation, acetylation and ubiquitination, has highlighted their importance in the regulation of a myriad of signaling events. However, as another type of PTMs-lysine phosphoglycerylation, the data of phosphoglycerylated sites has just been manually experimented in recent years. Given an uncharacterized protein sequence that contains many lysine residues, which one of them can be phosphoglycerylated and which one not? This is a challenging problem. In view of this, establishing a useful computational method and developing an efficient predictor are highly desired. Here a new predictor named Phogly-PseAAC was developed which incorporated with the position specific amino acid propensity. The feature importance through F-score value has also been ranked. The predictor with the best feature set obtained the accuracy 75.10%, sensitivity 68.87%, specificity 75.57% and MCC 0.2538 in LOO test cross validation with center nearest neighbor algorithm. Meanwhile, a web-server for Phogly-PseAAC is accessible at http://app.aporc.org/Phogly-PseAAC/. For the convenience of most experimental scientists, we have further provided a brief instruction for the web-server, by which users can easily get their desired results without the need to follow the complicated mathematics presented in this paper. It is anticipated that Phogly-PseAAC may become a useful high throughput tool for identifying the lysine phosphoglycerylation sites.",2015-04-24 +23977981,Automated analysis of immunoglobulin genes from high-throughput sequencing: life without a template.,"

Background

Immunoglobulin (that is, antibody) and T cell receptor genes are created through somatic gene rearrangement from gene segment libraries. Immunoglobulin genes are further diversified by somatic hypermutation and selection during the immune response. Studying the repertoires of these genes yields valuable insights into immune system function in infections, aging, autoimmune diseases and cancers. The introduction of high throughput sequencing has generated unprecedented amounts of repertoire and mutation data from immunoglobulin genes. However, common analysis programs are not appropriate for pre-processing and analyzing these data due to the lack of a template or reference for the whole gene.

Results

We present here the automated analysis pipeline we created for this purpose, which integrates various software packages of our own development and others', and demonstrate its performance.

Conclusions

Our analysis pipeline presented here is highly modular, and makes it possible to analyze the data resulting from high-throughput sequencing of immunoglobulin genes, in spite of the lack of a template gene. An executable version of the Automation program (and its source code) is freely available for downloading from our website: http://immsilico2.lnx.biu.ac.il/Software.html.",2013-08-27 +24992938,Comparative analysis of human and mouse immunoglobulin variable heavy regions from IMGT/LIGM-DB with IMGT/HighV-QUEST.,"

Background

Immunoglobulin (IG) complementarity determining region (CDR) includes VH CDR1, VH CDR2, VH CDR3, VL CDR1, VL CDR2 and VL CDR3. Of these, VH CDR3 plays a dominant role in recognizing and binding antigens. Three major mechanisms are involved in the formation of the VH repertoire: germline gene rearrangement, junctional diversity and somatic hypermutation. Features of the generation mechanisms of VH repertoire in humans and mice share similarities while VH CDR3 amino acid (AA) composition differs. Previous studies have mainly focused on germline gene rearrangement and the composition and structure of the CDR3 AA in humans and mice. However the number of AA changes due to somatic hypermutation and analysis of the junctional mechanism have been ignored.

Methods

Here we analyzed 9,340 human and 6,657 murine unique productive sequences of immunoglobulin (IG) variable heavy (VH) domains derived from IMGT/LIGM-DB database to understand how VH CDR3 AA compositions significantly differed between human and mouse. These sequences were identified and analyzed by IMGT/HighV-QUEST (http://www.imgt.org), including gene usage, number of AA changes due to somatic hypermutation, AA length distribution of VH CDR3, AA composition, and junctional diversity.

Results

Analyses of human and murine IG repertoires showed significant differences. A higher number of AA changes due to somatic hypermutation and more abundant N-region addition were found in human compared to mouse, which might be an important factor leading to differences in VH CDR3 amino acid composition.

Conclusions

These findings are a benchmark for understanding VH repertoires and can be used to characterize the VH repertoire during immune responses. The study will allow standardized comparison for high throughput results obtained by IMGT/HighV-QUEST, the reference portal for NGS repertoire.",2014-07-03 +23918252,A distance-based test of association between paired heterogeneous genomic data.,"

Motivation

Due to rapid technological advances, a wide range of different measurements can be obtained from a given biological sample including single nucleotide polymorphisms, copy number variation, gene expression levels, DNA methylation and proteomic profiles. Each of these distinct measurements provides the means to characterize a certain aspect of biological diversity, and a fundamental problem of broad interest concerns the discovery of shared patterns of variation across different data types. Such data types are heterogeneous in the sense that they represent measurements taken at different scales or represented by different data structures.

Results

We propose a distance-based statistical test, the generalized RV (GRV) test, to assess whether there is a common and non-random pattern of variability between paired biological measurements obtained from the same random sample. The measurements enter the test through the use of two distance measures, which can be chosen to capture a particular aspect of the data. An approximate null distribution is proposed to compute P-values in closed-form and without the need to perform costly Monte Carlo permutation procedures. Compared with the classical Mantel test for association between distance matrices, the GRV test has been found to be more powerful in a number of simulation settings. We also demonstrate how the GRV test can be used to detect biological pathways in which genetic variability is associated to variation in gene expression levels in an ovarian cancer sample, and present results obtained from two independent cohorts.

Availability

R code to compute the GRV test is freely available from http://www2.imperial.ac.uk/∼gmontana",2013-08-05 +24990605,ARBitrator: a software pipeline for on-demand retrieval of auto-curated nifH sequences from GenBank.,"

Motivation

Studies of the biochemical functions and activities of uncultivated microorganisms in the environment require analysis of DNA sequences for phylogenetic characterization and for the development of sequence-based assays for the detection of microorganisms. The numbers of sequences for genes that are indicators of environmentally important functions such as nitrogen (N2) fixation have been rapidly growing over the past few decades. Obtaining these sequences from the National Center for Biotechnology Information's GenBank database is problematic because of annotation errors, nomenclature variation and paralogues; moreover, GenBank's structure and tools are not conducive to searching solely by function. For some genes, such as the nifH gene commonly used to assess community potential for N2 fixation, manual collection and curation are becoming intractable because of the large number of sequences in GenBank and the large number of highly similar paralogues. If analysis is to keep pace with sequence discovery, an automated retrieval and curation system is necessary.

Results

ARBitrator uses a two-step process composed of a broad collection of potential homologues followed by screening with a best hit strategy to conserved domains. 34 420 nifH sequences were identified in GenBank as of November 20, 2012. The false-positive rate is ∼0.033%. ARBitrator rapidly updates a public nifH sequence database, and we show that it can be adapted for other genes.

Availability and implementation

Java source and executable code are freely available to non-commercial users at http://pmc.ucsc.edu/∼wwwzehr/research/database/.

Contact

zehrj@ucsc.edu

Supplementary information

SUPPLEMENTARY INFORMATION is available at Bioinformatics online.",2014-07-02 +26447265,"Pressure UlceR Programme Of reSEarch (PURPOSE): using mixed methods (systematic reviews, prospective cohort, case study, consensus and psychometrics) to identify patient and organisational risk, develop a risk assessment tool and patient-reported outcome Quality of Life and Health Utility measures","

Background

The Pressure UlceR Programme Of reSEarch (PURPOSE) consisted of two themes. Theme 1 focused on improving our understanding of individuals’ and organisational risk factors and on improving the quality of risk assessments (work packages 1–3) and theme 2 focused on developing patient-reported outcome measures (work packages 4 and 5).

Methods

The programme comprised 21 individual pieces of work. Pain: (1) multicentre pain prevalence study in acute hospitals, (2) multicentre pain prevalence study in community localities incorporating (3) a comparison of case-finding methods, and (4) multicentre, prospective cohort study. Severe pressure ulcers: (5) retrospective case study, (6) patient involvement workshop with the Pressure Ulcer Research Service User Network for the UK (PURSUN UK) and (7) development of root cause analysis methodology. Risk assessment: (8) systematic review, (9) consensus study, (10) conceptual framework development and theoretical causal pathway, (11) design and pretesting of draft Risk Assessment Framework and (12) field test to assess reliability, validity, data completeness and clinical usability. Quality of life: (13) conceptual framework development (systematic review, patient interviews), (14 and 15) provisional instrument development, with items generated from patient interviews [from (1) above] two systematic reviews and experts, (16) pretesting of the provisional Pressure Ulcer Quality of Life (PU-QOL) instrument using mixed methods, (17) field test 1 including (18) optimal mode of administration substudy and item reduction with testing of scale formation, acceptability, scaling assumptions, reliability and validity, and (19) field test 2 – final psychometric evaluation to test scale targeting, item response categories, item fit, response bias, acceptability, scaling assumptions, reliability and validity. Cost–utility: (20) time trade-off task valuations of health states derived from selected PU-QOL items, and (21) validation of the items selected and psychometric properties of the new Pressure Ulcer Quality of Life Utility Index (PUQOL-UI).

Key findings

Pain: prevalence studies – hospital and community patients experience both pressure area-related and pressure ulcer pain; pain cohort study – indicates that pain is independently predictive of category 2 (and above) pressure ulcer development. Severe pressure ulcers: these were more likely to develop in contexts in which clinicians failed to listen to patients/carers or recognise/respond to high risk or the presence of an existing pressure ulcer and services were not effectively co-ordinated; service users found the interactive workshop format valuable; including novel components (interviews with patients and carers) in root cause analysis improves the quality of the insights captured. Risk assessment: we developed a Pressure Ulcer Risk Assessment Framework, the PURPOSE-T, incorporating the Minimum Data Set, a screening stage, a full assessment stage, use of colour to support decision-making, and decision pathways that make a clear distinction between patients with an existing pressure ulcer(s) (or scarring from previous ulcers) who require secondary prevention and treatment and those at risk who require primary prevention (http://medhealth.leeds.ac.uk/accesspurposet). Quality of life: the final PU-QOL instrument consists of 10 scales to measure pain, exudate, odour, sleep, vitality, mobility/movement, daily activities, emotional well-being, self-consciousness and appearance, and participation (http://medhealth.leeds.ac.uk/puqol-ques). Cost–utility: seven items were selected from the PU-QOL instrument for inclusion in the PUQOL-UI (http://medhealth.leeds.ac.uk/puqol-ui); secondary study analysis indicated that item selection for the PUQOL-UI was appropriate and that the index was acceptable to patients and had adequate levels of validity.

Conclusions

The PURPOSE programme has provided important insights for pressure ulcer prevention and treatment and involvement of service users in research and development, with implications for patient and public involvement, clinical practice, quality/safety/health service management and research including replication of the pain risk factor study, work exploring ‘best practice’ settings, the impact of including skin status as an indicator for escalation of preventative interventions, further psychometric evaluation of PU-QOL and PUQOL-UI the measurement of ‘disease attribution.’

Funding

The National Institute for Health Research Programme Grants for Applied Research programme.",2015-10-09 +26162567,Personalisation of breast cancer follow-up: a time-dependent prognostic nomogram for the estimation of annual risk of locoregional recurrence in early breast cancer patients.,"The objective of this study was to develop and validate a time-dependent logistic regression model for prediction of locoregional recurrence (LRR) of breast cancer and a web-based nomogram for clinical decision support. Women first diagnosed with early breast cancer between 2003 and 2006 in all Dutch hospitals were selected from the Netherlands Cancer Registry (n = 37,230). In the first 5 years following primary breast cancer treatment, 950 (2.6 %) patients developed a LRR as first event. Risk factors were determined using logistic regression and the risks were calculated per year, conditional on not being diagnosed with recurrence in the previous year. Discrimination and calibration were assessed. Bootstrapping was used for internal validation. Data on primary tumours diagnosed between 2007 and 2008 in 43 Dutch hospitals were used for external validation of the performance of the nomogram (n = 12,308). The final model included the variables grade, size, multifocality, and nodal involvement of the primary tumour, and whether patients were treated with radio-, chemo- or hormone therapy. The index cohort showed an area under the ROC curve of 0.84, 0.77, 0.70, 0.73 and 0.62, respectively, per subsequent year after primary treatment. Model predictions were well calibrated. Estimates in the validation cohort did not differ significantly from the index cohort. The results were incorporated in a web-based nomogram ( http://www.utwente.nl/mira/influence ). This validated nomogram can be used as an instrument to identify patients with a low or high risk of LRR who might benefit from a less or more intensive follow-up after breast cancer and to aid clinical decision making for personalised follow-up.",2015-07-11 +26649754,Maternal Occupational Exposure to Noise during Pregnancy and Hearing Dysfunction in Children: A Nationwide Prospective Cohort Study in Sweden.,"

Background

Many women of childbearing age are occupationally active, which leads to a large number of pregnancies potentially exposed to occupational exposures. Occupational noise has been identified as a risk factor for hearing impairment in adults. However, very few studies have assessed the effect of occupational noise on the fetus.

Objectives

The aim of this study was to investigate whether occupational exposure to noise during pregnancy is associated with hearing dysfunction in children.

Methods

This population based cohort study included 1,422,333 single births in Sweden 1986-2008. Data on mothers' occupation, smoking habits, age, ethnicity, body mass index, leave of absence, and socioeconomic factors were obtained from interviews performed by prenatal care unit staff at approximately 10 weeks of gestation and from national registers. Occupational noise exposure was classified by a job-exposure-matrix as < 75, 75-84, or ≥ 85 dBLAeq,8h. Diagnosed cases of hearing dysfunction (ICD-10 codes H90.3-7, 91.0, 91.2-3, 91.8, 93.1-2) were identified from a register of specialized medical care. Cox proportional hazards models were used to estimate associations.

Results

In the full sample, containing a mixture of part-time and full-time workers during pregnancy, the adjusted HR for hearing dysfunction associated with maternal occupational noise exposure ≥ 85 vs. < 75 dBLAeq,8h was 1.27 (95% CI: 0.99, 1.64; 60 exposed cases). When restricted to children whose mothers worked full-time and had < 20 days leave of absence during pregnancy, the corresponding HR was 1.82 (95% CI: 1.08, 3.08; 14 exposed cases).

Conclusions

This study showed an association between occupational noise exposure during pregnancy and hearing dysfunction in children. In view of mechanistic evidence and earlier indicative epidemiological and experimental findings, the results support that pregnant women should not be exposed to high levels of noise at work.

Citation

Selander J, Albin M, Rosenhall U, Rylander L, Lewné M, Gustavsson P. 2016. Maternal occupational exposure to noise during pregnancy and hearing dysfunction in children: a nationwide prospective cohort study in Sweden. Environ Health Perspect 124:855-860; http://dx.doi.org/10.1289/ehp.1509874.",2015-12-08 +24931751,Geographical and temporal distribution of basic research experiments in homeopathy.,"The database HomBRex (Homeopathy Basic Research experiments) was established in 2002 to provide an overview of the basic research already done on homeopathy (http://www.carstens-stiftung.de/hombrex). By this means, it facilitates the exploration of the Similia Principle and the working mechanism of homeopathy. Since 2002, the total number of experiments listed has almost doubled. The current review reports the history of basic research in homeopathy as evidenced by publication dates and origin of publications. In July 2013, the database held 1868 entries. Most publications were reported from France (n = 267), followed by Germany (n = 246) and India (n = 237). In the last ten years, the number of publications from Brazil dramatically increased from n = 13 (before 2004) to n = 164 (compared to n = 251 published in France before 2004, and n = 16 between 2004 and 2013). The oldest database entry was from Germany (1832).",2014-07-01 +24766439,Structural insights into the substrate-binding mechanism for a novel chitosanase.,"Chitosanase is able to specifically cleave β-1,4-glycosidic bond linkages in chitosan to produce a chito-oligomer product, which has found a variety of applications in many areas, including functional food and cancer therapy. Although several structures for chitosanase have been determined, the substrate-binding mechanism for this enzyme has not been fully elucidated because of the lack of a high-resolution structure of the chitosanase-substrate complex. In the present study we show the crystal structure of a novel chitosanase OU01 from Microbacterium sp. in complex with its substrate hexa-glucosamine (GlcN)6, which belongs to the GH46 (glycoside hydrolyase 46) family in the Carbohydrate Active Enzymes database (http://www.cazy.org/). This structure allows precise determination of the substrate-binding mechanism for the first time. The chitosanase-(GlcN)6 complex structure demonstrates that, from the -2 to +1 position of the (GlcN)6 substrate, the pyranose rings form extensive interactions with the chitosanase-binding cleft. Several residues (Ser27, Tyr37, Arg45, Thr58, Asp60, His203 and Asp235) in the binding cleft are found to form important interactions required to bind the substrate. Site-directed mutagenesis of these residues showed that mutations of Y37F and H203A abolish catalytic activity. In contrast, the mutations T58A and D235A only lead to a moderate loss of catalytic activity, whereas the S27A mutation retains ~80% of the enzymatic activity. In combination with previous mutagenesis studies, these results suggest that the -2, -1 and +1 subsites play a dominant role in substrate binding and catalysis. DSF (differential scanning fluorimetry) assays confirmed that these mutations had no significant effect on protein stability. Taken together, we present the first mechanistic interpretation for the substrate (GlcN)6 binding to chitosanase, which is critical for the design of novel chitosanase used for biomass conversion.",2014-07-01 +30708874,First Report of Recombinant Potato virus Y Strains Infecting Potato in Jordan.,"Potato (Solanum tuberosum L.) is an important vegetable crop in Jordan, occupying second position after olives. In 2012, potatoes were planted on about 6,000 ha with a production of about 141,000 t (2). Potato virus Y (PVY) is a serious problem for potato production worldwide. Recombinant strains of the virus were reported to cause tuber necrotic ringspot disease (PTNRD) in many potato-growing regions of the world. In the last few years, a new recombinant PVYNTN-NW that belongs to PVYZ (3) has been reported in the neighboring Syria. It included three recombination patterns, SYR-I, SYR-II, and SYR-III, and caused severe PTNRD (1). Since PVY is easily transmitted from one region to another by aphid vectors and infected potato seeds, this study was initiated to investigate the possible occurrence of PVY strains in Jordan. In October 2013, 33 leaf samples were collected from symptomatic potato plants cv. Spunta from Wadi Rum, Jordan (GPS coordinates 29°31'37.76″ N, 35°42'48.75″ E), the largest potato-producing area in Jordan. Sampled plants displayed leaf mottling and yellowing, symptoms similar to those caused by PVY. All samples were tested for PVY by DAS-ELISA using the ELISA kit (monoclonal cocktail) developed by BIOREBA (Reinach, Switzerland) to detect all PVY isolates. Twenty-nine samples were found positive for PVY by ELISA. To confirm virus infection, total RNA was extracted from all ELISA-positive samples and used as template in uniplex RT-PCR using strain-specific primers (1). The band pattern of PCR amplicons showed that 12 samples were infected with PVYNTN-NW genotype SYR-III and produced bands of 1,085, 441, and 278 bp. One sample was infected with PVYNTN (A) and produced bands of 1,307, 633, and 441 bp, and one other sample was infected with PVYNTN-NW genotype SYR-II and produced bands of 1,085 and 441 bp. Mixed infection with PVYNTN-NW genotype SYR-III and PVYNTN (B) was also detected in one sample producing bands of 278, 441, 1,085, and 1,307 bp. To confirm infection with the recombinant strains, PCR fragments of 278 bp amplified from three samples and 1,085 bp obtained from another three samples were directly sequenced and sequences were deposited in GenBank under accession numbers KJ159968, KJ159969, and KJ159970 for the 278-bp fragment and KJ159974, KJ159975, and KJ159976 for the 1,085-bp fragment. Sequence comparison with other PVY strains available in the NCBI database showed that the 278-bp fragment had the highest nucleotide sequence identity (100%) with PVY isolates SYR-III-A26 (AB461467) and SYR-III-2-4 (AB461457) from Syria. BLAST searches also showed that the 1,085-bp fragment shared 99% nucleotide identities with PVY isolates SYR-II-L3 (AB461482) and SYR-II-Be4 (AB461474) from Aleppo, Syria. To our knowledge, this is the first report of PVY recombinants in Jordan, and the first report of PVYNTN-NW recombinants infecting potato crop outside Syria. Since Europe is the main supplier of potato seeds for farmers in Jordan and Syria, the introduction of PVYNTN-NW to the region could have happened through infected potato seeds. Results of this study create new challenges for potato growers in Jordan as well as other countries in the region. References: (1) M. Chikh Ali et al. J. Virol. Methods 165:15, 2010. (2) FAO. http://faostat.fao.org/ (3) A. V. Karasev and S. M. Gray. Ann. Rev. Phytopathol. 51:571, 2013.",2014-07-01 +25068136,Omega-3 and omega-6 content of medicinal foods for depressed patients: implications from the Iranian Traditional Medicine.,"

Objectives

Considering the increasing prevalence of depression in modern societies and the positive effects of omega-3 polyunsaturated fatty acids on depression, this study aims to investigate the omega-3 and omega-6 content of various foodstuffs, prescribed or prohibited by Iranian Traditional Medicine (ITM).

Materials and methods

Firstly, reliable sources of Iranian Traditional Medicine were reviewed in order to identify the prescribed and prohibited foodstuffs for depressed patients. Afterwards, according to the online database of United States Department of Agriculture (URL: http://ndb.nal.usda.gov/ndb/search/list), the ratio of linoleic acid to alpha linolenic acid (as representatives of omega-6 and omega-3, respectively) was identified in each foodstuff. Finally, the ratios of omega-6 to omega-3 were compared between seven food groups of vegetables, fruits, dry goods, high protein products, dairies, breads, and spices.

Results

Based on the resources of Iranian Traditional Medicine, the following foods are prescribed for depressed patients: basil, coriander, spinach, lettuce, squash, peppermint, dill, chicory, celery, beet, quince, cucumber, watermelon, grape, peach, pomegranate, banana, apple, currant, pistachio, dried fig, almond, egg, chicken, lamb, trout, milk, bread without bran, saffron, oregano, and coriander seeds. On the other hand, cabbage, eggplant, onion, garlic, broad beans, lentils, beef, whole wheat bread, and mustard are prohibited. It should be noted that omega-3 content in some prescribed foods is more than that of the prohibited ones.

Conclusion

The present study showed that mint, basil, spinach, lettuce, squash, lamb, saffron, oregano, cucumber, pistachio, milk, and also wild trout can be considered as medicinal foods for depressed patients.",2014-07-01 +25383185,CyKEGGParser: tailoring KEGG pathways to fit into systems biology analysis workflows.,"The KEGG pathway database is a widely accepted source for biomolecular pathway maps. In this paper we present the CyKEGGParser app ( http://apps.cytoscape.org/apps/cykeggparser) for Cytoscape 3 that allows manipulation with KEGG pathway maps. Along with basic functionalities for pathway retrieval, visualization and export in KGML and BioPAX formats, the app provides unique features for computer-assisted adjustment of inconsistencies in KEGG pathway KGML files and generation of tissue- and protein-protein interaction specific pathways. We demonstrate that using biological context-specific KEGG pathways created with CyKEGGParser makes systems biology analysis more sensitive and appropriate compared to original pathways.",2014-07-01 +24995610,Clustering of gene ontology terms in genomes.,"Although protein coding genes occupy only a small fraction of genomes in higher species, they are not randomly distributed within or between chromosomes. Clustering of genes with related function(s) and/or characteristics has been evident at several different levels. To study how common the clustering of functionally related genes is and what kind of functions the end products of these genes are involved, we collected gene ontology (GO) terms for complete genomes and developed a method to detect previously undefined gene clustering. Exhaustive analysis was performed for seven widely studied species ranging from human to Escherichia coli. To overcome problems related to varying gene lengths and densities, a novel method was developed and a fixed number of genes were analyzed irrespective of the genome span covered. Statistically very significant GO term clustering was apparent in all the investigated genomes. The analysis window, which ranged from 5 to 50 consecutive genes, revealed extensive GO term clusters for genes with widely varying functions. Here, the most interesting and significant results are discussed and the complete dataset for each analyzed species is available at the GOme database at http://bioinf.uta.fi/GOme. The results indicated that clusters of genes with related functions are very common, not only in bacteria, in which operons are frequent, but also in all the studied species irrespective of how complex they are. There are some differences between species but in all of them GO term clusters are common and of widely differing sizes. The presented method can be applied to analyze any genome or part of a genome for which descriptive features are available, and thus is not restricted to ontology terms. This method can also be applied to investigate gene and protein expression patterns. The results pave a way for further studies of mechanisms that shape genome structure and evolutionary forces related to them.",2014-07-01 +24984610,Lack of association between a functional polymorphism (rs1800796) in the interleukin-6 gene promoter and lung cancer.,"

Background

A number of studies have examined the association between interleukin-6 (IL-6) rs1800796 polymorphism and risk of lung cancer but revealed inconsistent results. The aim of this study was to clarify the association between IL-6 rs1800796 polymorphism and risk of lung cancer.

Methods

Literature databases including PubMed, Embase and CNKI were searched up to January 2014. The pooled odds ratios (ORs) with 95% confidence intervals (CIs) under co-dominant model, dominant model and recessive model were estimated using random-effects model.

Results

A total of seven studies, including 2691 lung cancer cases and 3067 controls, were included in the meta-analysis. The results suggested that IL-6 rs1800796 polymorphism was not associated with risk of lung cancer under homogeneous co-dominant model (OR = 1.06, 95%CI = 0.73-1.54), heterogeneous co-dominant model (OR = 1.24, 95%CI = 0.96-1.60), dominant model (OR = 1.23, 95%CI = 0.95-1.58) and recessive model (OR = 0.96, 95%CI = 0.70-1.32). The association was still not significant in either never-smokers (OR = 1.19, 95%CI = 0.95-1.48) or ever-smokers (OR = 1.73, 95%CI = 0.89-3.36).

Conclusion

The present meta-analysis suggested that there was no association between IL-6 rs1800796 polymorphism and lung cancer, which was independent of smoking status.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1060061508127855.",2014-07-01 +24912776,"Modeling cancer glycolysis under hypoglycemia, and the role played by the differential expression of glycolytic isoforms.","

Unlabelled

The effect of hypoglycemia on the contents of glycolytic proteins, activities of enzymes/transporters and flux of HeLa and MCF-7 tumor cells was experimentally analyzed and modeled in silico. After 24 h hypoglycemia (2.5 mm initial glucose), significant increases in the protein levels of glucose transporters 1 and 3 (GLUT 1 and 3) (3.4 and 2.1-fold, respectively) and hexokinase I (HKI) (2.3-fold) were observed compared to the hyperglycemic standard cell culture condition (25 mm initial glucose). However, these changes did not bring about a significant increase in the total activities (Vmax ) of GLUT and HK; instead, the affinity of these proteins for glucose increased, which may explain the twofold increased glycolytic flux under hypoglycemia. Thus, an increase in more catalytically efficient isoforms for two of the main controlling steps was sufficient to induce increased flux. Further, a previous kinetic model of tumor glycolysis was updated by including the ratios of GLUT and HK isoforms, modified pyruvate kinase kinetics and an oxidative phosphorylation reaction. The updated model was robust in terms of simulating most of the metabolite levels and fluxes of the cells exposed to various glycemic conditions. Model simulations indicated that the main controlling steps were glycogen degradation > HK > hexosephosphate isomerase under hyper- and normoglycemia, and GLUT > HK > glycogen degradation under hypoglycemia. These predictions were experimentally evaluated: the glycolytic flux of hypoglycemic cells was more sensitive to cytochalasin B (a GLUT inhibitor) than that of hyperglycemic cells. The results indicated that cancer glycolysis should be inhibited at multiple controlling sites, regardless of external glucose levels, to effectively block the pathway.

Database

The mathematical models described here have been submitted to the JWS Online Cellular Systems Modelling Database and can be accessed at http://jjj.mib.ac.uk/database/achcar/index.html. [Database section added 21 July 2014 after original online publication].",2014-07-01 +25100686,MEGADOCK 4.0: an ultra-high-performance protein-protein docking software for heterogeneous supercomputers.,"

Summary

The application of protein-protein docking in large-scale interactome analysis is a major challenge in structural bioinformatics and requires huge computing resources. In this work, we present MEGADOCK 4.0, an FFT-based docking software that makes extensive use of recent heterogeneous supercomputers and shows powerful, scalable performance of >97% strong scaling.

Availability and implementation

MEGADOCK 4.0 is written in C++ with OpenMPI and NVIDIA CUDA 5.0 (or later) and is freely available to all academic and non-profit users at: http://www.bi.cs.titech.ac.jp/megadock.

Contact

akiyama@cs.titech.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-08-06 +26484184,Snai1 represses Nanog to promote embryonic stem cell differentiation.,"Embryonic stem cell (ESC) self-renewal and pluripotency is maintained by an external signaling pathways and intrinsic regulatory networks involving ESC-specific transcriptional complexes (mainly formed by OCT3/4, Sox2 and Nanog proteins), the Polycomb repressive complex 2 (PRC2) and DNA methylation [1-8]. Among these, Nanog represents the more ESC specific factor and its repression correlates with the loss of pluripotency and ESC differentiation [9-11]. During ESC early differentiation, many development-associated genes become upregulated and although, in general, much is known about the pluripotency self-renewal circuitry, the molecular events that lead ESCs to exit from pluripotency and begin differentiation are largely unknown. Snai1 is one the most early induced genes during ESC differentiation in vitro and in vivo [12,13]. Here we show that Snai1 is able to directly repress several stemness-associated genes including Nanog. We use a ESC stable-line expressing a inducible Snai1 protein. We here show microarray analysis of embryonic stem cells (ESC) expressing Snail-ER at various time points of induction with 4-OH. Data were deposited in Gene Expression Omnibus (GEO) datasets under reference GSE57854 and here: http://epigenetics.hugef-research.org/data.php.",2015-03-30 +26072500,Comparing genomes with rearrangements and segmental duplications.,"

Motivation

Large-scale evolutionary events such as genomic rearrange.ments and segmental duplications form an important part of the evolution of genomes and are widely studied from both biological and computational perspectives. A basic computational problem is to infer these events in the evolutionary history for given modern genomes, a task for which many algorithms have been proposed under various constraints. Algorithms that can handle both rearrangements and content-modifying events such as duplications and losses remain few and limited in their applicability.

Results

We study the comparison of two genomes under a model including general rearrangements (through double-cut-and-join) and segmental duplications. We formulate the comparison as an optimization problem and describe an exact algorithm to solve it by using an integer linear program. We also devise a sufficient condition and an efficient algorithm to identify optimal substructures, which can simplify the problem while preserving optimality. Using the optimal substructures with the integer linear program (ILP) formulation yields a practical and exact algorithm to solve the problem. We then apply our algorithm to assign in-paralogs and orthologs (a necessary step in handling duplications) and compare its performance with that of the state-of-the-art method MSOAR, using both simulations and real data. On simulated datasets, our method outperforms MSOAR by a significant margin, and on five well-annotated species, MSOAR achieves high accuracy, yet our method performs slightly better on each of the 10 pairwise comparisons.

Availability and implementation

http://lcbb.epfl.ch/softwares/coser.",2015-06-01 +26072488,Genome-wide detection of intervals of genetic heterogeneity associated with complex traits.,"

Motivation

Genetic heterogeneity, the fact that several sequence variants give rise to the same phenotype, is a phenomenon that is of the utmost interest in the analysis of complex phenotypes. Current approaches for finding regions in the genome that exhibit genetic heterogeneity suffer from at least one of two shortcomings: (i) they require the definition of an exact interval in the genome that is to be tested for genetic heterogeneity, potentially missing intervals of high relevance, or (ii) they suffer from an enormous multiple hypothesis testing problem due to the large number of potential candidate intervals being tested, which results in either many false positives or a lack of power to detect true intervals.

Results

Here, we present an approach that overcomes both problems: it allows one to automatically find all contiguous sequences of single nucleotide polymorphisms in the genome that are jointly associated with the phenotype. It also solves both the inherent computational efficiency problem and the statistical problem of multiple hypothesis testing, which are both caused by the huge number of candidate intervals. We demonstrate on Arabidopsis thaliana genome-wide association study data that our approach can discover regions that exhibit genetic heterogeneity and would be missed by single-locus mapping.

Conclusions

Our novel approach can contribute to the genome-wide discovery of intervals that are involved in the genetic heterogeneity underlying complex phenotypes.

Availability and implementation

The code can be obtained at: http://www.bsse.ethz.ch/mlcb/research/bioinformatics-and-computational-biology/sis.html.",2015-06-01 +25097382,AllergenPro: an integrated database for allergenicity analysis and prediction.,"

Unlabelled

The National Agricultural Biotechnology Information Center (NABIC) reconstructed an AllergenPro database for allergenic proteins analysis and allergenicity prediction. The AllergenPro is an integrated web-based system providing information about allergen in foods, microorganisms, animals and plants. The allergen database has the three main features namely, (1) allergen list with epitopes, (2) searching of allergen using keyword, and (3) methods for allergenicity prediction. This updated AllergenPro outputs the search based allergen information through a user-friendly web interface, and users can run tools for allergenicity prediction using three different methods namely, (1) FAO/WHO, (2) motif-based and (3) epitope-based methods.

Availability

The database is available for free at http://nabic.rda.go.kr/allergen/",2014-06-30 +24980129,BioC interoperability track overview. ,"BioC is a new simple XML format for sharing biomedical text and annotations and libraries to read and write that format. This promotes the development of interoperable tools for natural language processing (NLP) of biomedical text. The interoperability track at the BioCreative IV workshop featured contributions using or highlighting the BioC format. These contributions included additional implementations of BioC, many new corpora in the format, biomedical NLP tools consuming and producing the format and online services using the format. The ease of use, broad support and rapidly growing number of tools demonstrate the need for and value of the BioC format. Database URL: http://bioc.sourceforge.net/.",2014-06-30 +25097385,SCNProDB: A database for the identification of soybean cyst nematode proteins.,"Soybean cyst nematode (Heterodera glycines, SCN) is the most destructive pathogen of soybean around the world. Crop rotation and resistant cultivars are used to mitigate the damage of SCN, but these approaches are not completely successful because of the varied SCN populations. Thus, the limitations of these practices with soybean dictate investigation of other avenues of protection of soybean against SCN, perhaps through genetically engineering of broad resistance to SCN. For better understanding of the consequences of genetic manipulation, elucidation of SCN protein composition at the subunit level is necessary. We have conducted studies to determine the composition of SCN proteins using a proteomics approach in our laboratory using twodimensional polyacrylamide gel electrophoresis (2D-PAGE) to separate SCN proteins and to characterize the proteins further using mass spectrometry. Our analysis resulted in the identification of several hundred proteins. In this investigation, we developed a web based database (SCNProDB) containing protein information obtained from our previous published studies. This database will be useful to scientists who wish to develop SCN resistant soybean varieties through genetic manipulation and breeding efforts. The database is freely accessible from: http://bioinformatics.towson.edu/Soybean_SCN_proteins_2D_Gel_DB/Gel1.aspx.",2014-06-30 +26115255,Estimation of the environmental dam-offspring correlation in beef cattle.,"A long standing controversy in animal breeding is related to the strong negative estimates of the direct-maternal genetic correlation obtained when fitting data on maternally influenced traits. In this article, we focused on a model that introduces a new correlation parameter among dam-offspring records. The extant theory allows estimation of the parameter when dams have at most a single offspring. Our goal was to develop an inferential procedure in a more general setting. To do so, we applied a Bayesian approach and we showed that the estimation could be accomplished by introducing a Markov chain Monte Carlo (MCMC) step embedded into a regular Gibbs sampler program. The procedure was implemented by means of an MCMC algorithm known as the Griddy-Gibbs sampler, and a Fortran 90 library was created to accomplish the task. The computer program is available from http://www.agro.uba.ar/catedras/mg_animal/software/RDBLK. With this tool at hand, we applied the inferential procedure to weaning weight records on beef cattle calves from an Argentinean Hereford herd, and we estimated the marginal distribution of the environmental dam-offspring correlation parameter. The distribution was unimodal and symmetric with a mean value of -0.14 (±0.03) and a 95% high posterior density interval between -0.20 and -0.07, indicating that the model placed a huge mass on negative values of the parameter. Noticeably, the magnitude of the direct-maternal genetic correlation diminished from -0.61 to -0.37 with respect to the standard maternal animal model. This result reinforces the idea that environmental covariances among dam-offspring records may bias the estimate of the direct-maternal genetic correlation.",2015-06-01 +25045344,An Elegant Algorithm for the Construction of Suffix Arrays.,"The suffix array is a data structure that finds numerous applications in string processing problems for both linguistic texts and biological data. It has been introduced as a memory efficient alternative for suffix trees. The suffix array consists of the sorted suffixes of a string. There are several linear time suffix array construction algorithms (SACAs) known in the literature. However, one of the fastest algorithms in practice has a worst case run time of O(n2). The problem of designing practically and theoretically efficient techniques remains open. In this paper we present an elegant algorithm for suffix array construction which takes linear time with high probability; the probability is on the space of all possible inputs. Our algorithm is one of the simplest of the known SACAs and it opens up a new dimension of suffix array construction that has not been explored until now. Our algorithm is easily parallelizable. We offer parallel implementations on various parallel models of computing. We prove a lemma on the ℓ-mers of a random string which might find independent applications. We also present another algorithm that utilizes the above algorithm. This algorithm is called RadixSA and has a worst case run time of O(n log n). RadixSA introduces an idea that may find independent applications as a speedup technique for other SACAs. An empirical comparison of RadixSA with other algorithms on various datasets reveals that our algorithm is one of the fastest algorithms to date. The C++ source code is freely available at http://www.engr.uconn.edu/~man09004/radixSA.zip.",2014-07-01 +22110036,LegumeIP: an integrative database for comparative genomics and transcriptomics of model legumes.,"Legumes play a vital role in maintaining the nitrogen cycle of the biosphere. They conduct symbiotic nitrogen fixation through endosymbiotic relationships with bacteria in root nodules. However, this and other characteristics of legumes, including mycorrhization, compound leaf development and profuse secondary metabolism, are absent in the typical model plant Arabidopsis thaliana. We present LegumeIP (http://plantgrn.noble.org/LegumeIP/), an integrative database for comparative genomics and transcriptomics of model legumes, for studying gene function and genome evolution in legumes. LegumeIP compiles gene and gene family information, syntenic and phylogenetic context and tissue-specific transcriptomic profiles. The database holds the genomic sequences of three model legumes, Medicago truncatula, Glycine max and Lotus japonicus plus two reference plant species, A. thaliana and Populus trichocarpa, with annotations based on UniProt, InterProScan, Gene Ontology and the Kyoto Encyclopedia of Genes and Genomes databases. LegumeIP also contains large-scale microarray and RNA-Seq-based gene expression data. Our new database is capable of systematic synteny analysis across M. truncatula, G. max, L. japonicas and A. thaliana, as well as construction and phylogenetic analysis of gene families across the five hosted species. Finally, LegumeIP provides comprehensive search and visualization tools that enable flexible queries based on gene annotation, gene family, synteny and relative gene expression.",2011-11-21 +25527831,Workflow4Metabolomics: a collaborative research infrastructure for computational metabolomics.,"

Summary

The complex, rapidly evolving field of computational metabolomics calls for collaborative infrastructures where the large volume of new algorithms for data pre-processing, statistical analysis and annotation can be readily integrated whatever the language, evaluated on reference datasets and chained to build ad hoc workflows for users. We have developed Workflow4Metabolomics (W4M), the first fully open-source and collaborative online platform for computational metabolomics. W4M is a virtual research environment built upon the Galaxy web-based platform technology. It enables ergonomic integration, exchange and running of individual modules and workflows. Alternatively, the whole W4M framework and computational tools can be downloaded as a virtual machine for local installation.

Availability and implementation

http://workflow4metabolomics.org homepage enables users to open a private account and access the infrastructure. W4M is developed and maintained by the French Bioinformatics Institute (IFB) and the French Metabolomics and Fluxomics Infrastructure (MetaboHUB).

Contact

contact@workflow4metabolomics.org.",2014-12-19 +26279589,Classifier Design Given an Uncertainty Class of Feature Distributions via Regularized Maximum Likelihood and the Incorporation of Biological Pathway Knowledge in Steady-State Phenotype Classification.,"Contemporary high-throughput technologies provide measurements of very large numbers of variables but often with very small sample sizes. This paper proposes an optimization-based paradigm for utilizing prior knowledge to design better performing classifiers when sample sizes are limited. We derive approximate expressions for the first and second moments of the true error rate of the proposed classifier under the assumption of two widely-used models for the uncertainty classes; ε-contamination and p-point classes. The applicability of the approximate expressions is discussed by defining the problem of finding optimal regularization parameters through minimizing the expected true error. Simulation results using the Zipf model show that the proposed paradigm yields improved classifiers that outperform traditional classifiers that use only training data. Our application of interest involves discrete gene regulatory networks possessing labeled steady-state distributions. Given prior operational knowledge of the process, our goal is to build a classifier that can accurately label future observations obtained in the steady state by utilizing both the available prior knowledge and the training data. We examine the proposed paradigm on networks containing NF-κB pathways, where it shows significant improvement in classifier performance over the classical data-only approach to classifier design. Companion website: http://gsp.tamu.edu/Publications/supplementary/shahrokh12a.",2013-10-01 +24527749,Cellular signalling of non-synonymous single-nucleotide polymorphisms of the human μ-opioid receptor (OPRM1).,"

Unlabelled

There is significant variability in individual responses to opioid drugs, which is likely to have a significant genetic component. A number of non-synonymous single-nucleotide polymorphisms (SNPs) in the coding regions of the μ-opioid receptor gene (OPRM1) have been postulated to contribute to this variability. Although many studies have investigated the clinical influences of these μ-opioid receptor variants, the outcomes are reported in the context of thousands of other genes and environmental factors, and we are no closer to being able to predict individual response to opioids based on genotype. Investigation of how μ-opioid receptor SNPs affect their expression, coupling to second messengers, desensitization and regulation is necessary to understand how subtle changes in receptor structure can impact individual responses to opioids. To date, the few functional studies that have investigated the consequences of SNPs on the signalling profile of the μ-opioid receptor in vitro have shown that the common N40D variant has altered functional responses to some opioids, while other, rarer, variants display altered signalling or agonist-dependent regulation. Here, we review the data available on the effects of μ-opioid receptor polymorphisms on receptor function, expression and regulation in vitro, and discuss the limitations of the studies to date. Whether or not μ-opioid receptor SNPs contribute to individual variability in opioid responses remains an open question, in large part because we have relatively little good data about how the amino acid changes affect μ-opioid receptor function.

Linked articles

This article is part of a themed section on Opioids: New Pathways to Functional Selectivity. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2015.172.issue-2.",2014-07-01 +24516375,Comparative genome-scale reconstruction of gapless metabolic networks for present and ancestral species.,"We introduce a novel computational approach, CoReCo, for comparative metabolic reconstruction and provide genome-scale metabolic network models for 49 important fungal species. Leveraging on the exponential growth in sequenced genome availability, our method reconstructs genome-scale gapless metabolic networks simultaneously for a large number of species by integrating sequence data in a probabilistic framework. High reconstruction accuracy is demonstrated by comparisons to the well-curated Saccharomyces cerevisiae consensus model and large-scale knock-out experiments. Our comparative approach is particularly useful in scenarios where the quality of available sequence data is lacking, and when reconstructing evolutionary distant species. Moreover, the reconstructed networks are fully carbon mapped, allowing their use in 13C flux analysis. We demonstrate the functionality and usability of the reconstructed fungal models with computational steady-state biomass production experiment, as these fungi include some of the most important production organisms in industrial biotechnology. In contrast to many existing reconstruction techniques, only minimal manual effort is required before the reconstructed models are usable in flux balance experiments. CoReCo is available at http://esaskar.github.io/CoReCo/.",2014-02-06 +21977986,ProDiGe: Prioritization Of Disease Genes with multitask machine learning from positive and unlabeled examples.,"

Background

Elucidating the genetic basis of human diseases is a central goal of genetics and molecular biology. While traditional linkage analysis and modern high-throughput techniques often provide long lists of tens or hundreds of disease gene candidates, the identification of disease genes among the candidates remains time-consuming and expensive. Efficient computational methods are therefore needed to prioritize genes within the list of candidates, by exploiting the wealth of information available about the genes in various databases.

Results

We propose ProDiGe, a novel algorithm for Prioritization of Disease Genes. ProDiGe implements a novel machine learning strategy based on learning from positive and unlabeled examples, which allows to integrate various sources of information about the genes, to share information about known disease genes across diseases, and to perform genome-wide searches for new disease genes. Experiments on real data show that ProDiGe outperforms state-of-the-art methods for the prioritization of genes in human diseases.

Conclusions

ProDiGe implements a new machine learning paradigm for gene prioritization, which could help the identification of new disease genes. It is freely available at http://cbio.ensmp.fr/prodige.",2011-10-06 +23918246,MeltDB 2.0-advances of the metabolomics software system.,"

Motivation

The research area metabolomics achieved tremendous popularity and development in the last couple of years. Owing to its unique interdisciplinarity, it requires to combine knowledge from various scientific disciplines. Advances in the high-throughput technology and the consequently growing quality and quantity of data put new demands on applied analytical and computational methods. Exploration of finally generated and analyzed datasets furthermore relies on powerful tools for data mining and visualization.

Results

To cover and keep up with these requirements, we have created MeltDB 2.0, a next-generation web application addressing storage, sharing, standardization, integration and analysis of metabolomics experiments. New features improve both efficiency and effectivity of the entire processing pipeline of chromatographic raw data from pre-processing to the derivation of new biological knowledge. First, the generation of high-quality metabolic datasets has been vastly simplified. Second, the new statistics tool box allows to investigate these datasets according to a wide spectrum of scientific and explorative questions.

Availability

The system is publicly available at https://meltdb.cebitec.uni-bielefeld.de. A login is required but freely available.",2013-08-05 +26358730,Inferred miRNA activity identifies miRNA-mediated regulatory networks underlying multiple cancers.,"

Motivation

MicroRNAs (miRNAs) play a key role in regulating tumor progression and metastasis. Identifying key miRNAs, defined by their functional activities, can provide a deeper understanding of biology of miRNAs in cancer. However, miRNA expression level cannot accurately reflect miRNA activity.

Results

We developed a computational approach, ActMiR, for identifying active miRNAs and miRNA-mediated regulatory mechanisms. Applying ActMiR to four cancer datasets in The Cancer Genome Atlas (TCGA), we showed that (i) miRNA activity was tumor subtype specific; (ii) genes correlated with inferred miRNA activities were more likely to enrich for miRNA binding motifs; (iii) expression levels of these genes and inferred miRNA activities were more likely to be negatively correlated. For the four cancer types in TCGA we identified 77-229 key miRNAs for each cancer subtype and annotated their biological functions. The miRNA-target pairs, predicted by our ActMiR algorithm but not by correlation of miRNA expression levels, were experimentally validated. The functional activities of key miRNAs were further demonstrated to be associated with clinical outcomes for other cancer types using independent datasets. For ER(-)/HER2(-) breast cancers, we identified activities of key miRNAs let-7d and miR-18a as potential prognostic markers and validated them in two independent ER(-)/HER2(-) breast cancer datasets. Our work provides a novel scheme to facilitate our understanding of miRNA. In summary, inferred activity of key miRNA provided a functional link to its mediated regulatory network, and can be used to robustly predict patient's survival.

Availability and implementation

the software is freely available at http://research.mssm.edu/integrative-network-biology/Software.html.

Contact

jun.zhu@mssm.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-09-10 +25899044,Global estimation of the 3' untranslated region landscape using RNA sequencing.,"The 3' untranslated region (3' UTR) of mRNA contains elements that play regulatory roles in polyadenylation, localization, translation efficiency, and mRNA stability. Despite the significance of the 3' UTR, there is no popular method for annotating 3' UTRs and for profiling their isoforms. Recently, poly(A)-position profiling by sequencing (3P-seq) and other similar methods have successfully been used to annotate 3' UTRs; however, they contain complex RNA-biochemical experimental steps, resulting in a low yield of products. In this paper, we propose heuristic and regression methods to estimate and quantify the usage of 3' UTRs with widely profiled RNA sequencing (RNA-seq) data. With this approach, the 3' UTR usage estimated from RNA-seq was found to be highly correlated to that of 3P-seq, and poly(A) cleavage signals of 3' UTRs were detected upstream of the predicted poly(A) cleavage sites. Our methods predicted greater number of 3' UTRs than 3P-seq, which allows the profiling of the 3' UTRs of most expressed genes in diverse cell-types, stages, and species. Hence, the computational RNA-seq method for the estimation of the 3' UTR landscape would be useful as a tool for studying not only the functional roles of 3' UTR but also gene regulation by 3' UTR in a cell type-specific context. The method is implemented in open-source code, which is available at http://big.hanyang.ac.kr/GETUTR.",2015-04-18 +24194884,CorSig: a general framework for estimating statistical significance of correlation and its application to gene co-expression analysis.,"With the rapid increase of omics data, correlation analysis has become an indispensable tool for inferring meaningful associations from a large number of observations. Pearson correlation coefficient (PCC) and its variants are widely used for such purposes. However, it remains challenging to test whether an observed association is reliable both statistically and biologically. We present here a new method, CorSig, for statistical inference of correlation significance. CorSig is based on a biology-informed null hypothesis, i.e., testing whether the true PCC (ρ) between two variables is statistically larger than a user-specified PCC cutoff (τ), as opposed to the simple null hypothesis of ρ = 0 in existing methods, i.e., testing whether an association can be declared without a threshold. CorSig incorporates Fisher's Z transformation of the observed PCC (r), which facilitates use of standard techniques for p-value computation and multiple testing corrections. We compared CorSig against two methods: one uses a minimum PCC cutoff while the other (Zhu's procedure) controls correlation strength and statistical significance in two discrete steps. CorSig consistently outperformed these methods in various simulation data scenarios by balancing between false positives and false negatives. When tested on real-world Populus microarray data, CorSig effectively identified co-expressed genes in the flavonoid pathway, and discriminated between closely related gene family members for their differential association with flavonoid and lignin pathways. The p-values obtained by CorSig can be used as a stand-alone parameter for stratification of co-expressed genes according to their correlation strength in lieu of an arbitrary cutoff. CorSig requires one single tunable parameter, and can be readily extended to other correlation measures. Thus, CorSig should be useful for a wide range of applications, particularly for network analysis of high-dimensional genomic data.

Software availability

A web server for CorSig is provided at http://202.127.200.1:8080/probeWeb. R code for CorSig is freely available for non-commercial use at http://aspendb.uga.edu/downloads.",2013-10-23 +21887013,AnimalLectinDb: An integrated animal lectin database.,"

Unlabelled

Lectins, a class of carbohydrate-binding proteins and widely recognized to play a range of crucial roles in many cell-cell recognition events triggering several important cellular processes encompass different members that are diverse in their protein structures, carbohydrate affinities and specificities, their larger biological roles and potential applications. To attain an effective use of all the diverse data initially an animal lectin database 'AnimalLectinDb' with information pertaining to taxonomic, structural, domain architecture, molecular sequence, carbohydrate structure and blood group specificity has been developed. It is expected to be of high value not only for basic study in lectin biology but also for advanced research in pursuing several applications in biotechnology, immunology, and clinical practice.

Availability

The database is available for free at http://www.research-bioinformatics.in.",2011-04-22 +24655717,spliceR: an R package for classification of alternative splicing and prediction of coding potential from RNA-seq data.,"

Background

RNA-seq data is currently underutilized, in part because it is difficult to predict the functional impact of alternate transcription events. Recent software improvements in full-length transcript deconvolution prompted us to develop spliceR, an R package for classification of alternative splicing and prediction of coding potential.

Results

spliceR uses the full-length transcript output from RNA-seq assemblers to detect single or multiple exon skipping, alternative donor and acceptor sites, intron retention, alternative first or last exon usage, and mutually exclusive exon events. For each of these events spliceR also annotates the genomic coordinates of the differentially spliced elements, facilitating downstream sequence analysis. For each transcript isoform fraction values are calculated to identify transcript switching between conditions. Lastly, spliceR predicts the coding potential, as well as the potential nonsense mediated decay (NMD) sensitivity of each transcript.

Conclusions

spliceR is an easy-to-use tool that extends the usability of RNA-seq and assembly technologies by allowing greater depth of annotation of RNA-seq data. spliceR is implemented as an R package and is freely available from the Bioconductor repository ( http://www.bioconductor.org/packages/2.13/bioc/html/spliceR.html).",2014-03-23 +22213674,The current Salmonella-host interactome.,"Salmonella bacteria cause millions of infections and thousands of deaths every year. This pathogen has an unusually broad host range including humans, animals, and even plants. During infection, Salmonella expresses a variety of virulence factors and effectors that are delivered into the host cell triggering cellular responses through protein-protein interactions (PPI) with host cell proteins which make the pathogen's invasion and replication possible. To speed up proteomic efforts in elucidating Salmonella-host interactomes, we carried out a survey of the currently published Salmonella-host PPI. Such a list can serve as the gold standard for computational models aimed at predicting Salmonella-host interactomes through integration of large-scale biological data sources. Manual literature and database search of >2200 journal articles and >100 databases resulted in a gold standard list of currently 62 PPI, including primarily interactions of Salmonella proteins with human and mouse proteins. Only six of these interactions were directly retrievable from PPI databases and 16 were highlighted in databases featuring literature extracts. Thus, the literature survey resulted in the most complete interactome available to date for Salmonella. Pathway analysis using Ingenuity and Broad Gene Set Enrichment Analysis (GSEA) software revealed among general pathways such as MAPK signaling in particular those related to cell death as well as cell morphology, turnover, and interactions, in addition to response to not only Salmonella but also other pathogenic - viral and bacterial - infections. The list of interactions is available at http://www.shiprec.org/indicationslist.htm.",2011-12-27 +25190367,Closing the loop: from paper to protein annotation using supervised Gene Ontology classification. ,"Gene function curation of the literature with Gene Ontology (GO) concepts is one particularly time-consuming task in genomics, and the help from bioinformatics is highly requested to keep up with the flow of publications. In 2004, the first BioCreative challenge already designed a task of automatic GO concepts assignment from a full text. At this time, results were judged far from reaching the performances required by real curation workflows. In particular, supervised approaches produced the most disappointing results because of lack of training data. Ten years later, the available curation data have massively grown. In 2013, the BioCreative IV GO task revisited the automatic GO assignment task. For this issue, we investigated the power of our supervised classifier, GOCat. GOCat computes similarities between an input text and already curated instances contained in a knowledge base to infer GO concepts. The subtask A consisted in selecting GO evidence sentences for a relevant gene in a full text. For this, we designed a state-of-the-art supervised statistical approach, using a naïve Bayes classifier and the official training set, and obtained fair results. The subtask B consisted in predicting GO concepts from the previous output. For this, we applied GOCat and reached leading results, up to 65% for hierarchical recall in the top 20 outputted concepts. Contrary to previous competitions, machine learning has this time outperformed standard dictionary-based approaches. Thanks to BioCreative IV, we were able to design a complete workflow for curation: given a gene name and a full text, this system is able to select evidence sentences for curation and to deliver highly relevant GO concepts. Contrary to previous competitions, machine learning this time outperformed dictionary-based systems. Observed performances are sufficient for being used in a real semiautomatic curation workflow. GOCat is available at http://eagl.unige.ch/GOCat/. http://eagl.unige.ch/GOCat4FT/.",2014-09-04 +23657089,DDBJ read annotation pipeline: a cloud computing-based pipeline for high-throughput analysis of next-generation sequencing data.,"High-performance next-generation sequencing (NGS) technologies are advancing genomics and molecular biological research. However, the immense amount of sequence data requires computational skills and suitable hardware resources that are a challenge to molecular biologists. The DNA Data Bank of Japan (DDBJ) of the National Institute of Genetics (NIG) has initiated a cloud computing-based analytical pipeline, the DDBJ Read Annotation Pipeline (DDBJ Pipeline), for a high-throughput annotation of NGS reads. The DDBJ Pipeline offers a user-friendly graphical web interface and processes massive NGS datasets using decentralized processing by NIG supercomputers currently free of charge. The proposed pipeline consists of two analysis components: basic analysis for reference genome mapping and de novo assembly and subsequent high-level analysis of structural and functional annotations. Users may smoothly switch between the two components in the pipeline, facilitating web-based operations on a supercomputer for high-throughput data analysis. Moreover, public NGS reads of the DDBJ Sequence Read Archive located on the same supercomputer can be imported into the pipeline through the input of only an accession number. This proposed pipeline will facilitate research by utilizing unified analytical workflows applied to the NGS data. The DDBJ Pipeline is accessible at http://p.ddbj.nig.ac.jp/.",2013-05-08 +23455439,A community-driven global reconstruction of human metabolism.,"Multiple models of human metabolism have been reconstructed, but each represents only a subset of our knowledge. Here we describe Recon 2, a community-driven, consensus 'metabolic reconstruction', which is the most comprehensive representation of human metabolism that is applicable to computational modeling. Compared with its predecessors, the reconstruction has improved topological and functional features, including ∼2× more reactions and ∼1.7× more unique metabolites. Using Recon 2 we predicted changes in metabolite biomarkers for 49 inborn errors of metabolism with 77% accuracy when compared to experimental data. Mapping metabolomic data and drug information onto Recon 2 demonstrates its potential for integrating and analyzing diverse data types. Using protein expression data, we automatically generated a compendium of 65 cell type-specific models, providing a basis for manual curation or investigation of cell-specific metabolic properties. Recon 2 will facilitate many future biomedical studies and is freely available at http://humanmetabolism.org/.",2013-03-03 +23712657,DeMix: deconvolution for mixed cancer transcriptomes using raw measured data.,"

Motivation

Tissue samples of tumor cells mixed with stromal cells cause underdetection of gene expression signatures associated with cancer prognosis or response to treatment. In silico dissection of mixed cell samples is essential for analyzing expression data generated in cancer studies. Currently, a systematic approach is lacking to address three challenges in computational deconvolution: (i) violation of linear addition of expression levels from multiple tissues when log-transformed microarray data are used; (ii) estimation of both tumor proportion and tumor-specific expression, when neither is known a priori; and (iii) estimation of expression profiles for individual patients.

Results

We have developed a statistical method for deconvolving mixed cancer transcriptomes, DeMix, which addresses the aforementioned issues in array-based expression data. We demonstrate the performance of our model in synthetic and real, publicly available, datasets. DeMix can be applied to ongoing biomarker-based clinical studies and to the vast expression datasets previously generated from mixed tumor and stromal cell samples.

Availability

All codes are written in C and integrated into an R function, which is available at http://odin.mdacc.tmc.edu/∼wwang7/DeMix.html.

Contact

wwang7@mdanderson.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-27 +25440773,Automatic validation of computational models using pseudo-3D spatio-temporal model checking.,"

Background

Computational models play an increasingly important role in systems biology for generating predictions and in synthetic biology as executable prototypes/designs. For real life (clinical) applications there is a need to scale up and build more complex spatio-temporal multiscale models; these could enable investigating how changes at small scales reflect at large scales and viceversa. Results generated by computational models can be applied to real life applications only if the models have been validated first. Traditional in silico model checking techniques only capture how non-dimensional properties (e.g. concentrations) evolve over time and are suitable for small scale systems (e.g. metabolic pathways). The validation of larger scale systems (e.g. multicellular populations) additionally requires capturing how spatial patterns and their properties change over time, which are not considered by traditional non-spatial approaches.

Results

We developed and implemented a methodology for the automatic validation of computational models with respect to both their spatial and temporal properties. Stochastic biological systems are represented by abstract models which assume a linear structure of time and a pseudo-3D representation of space (2D space plus a density measure). Time series data generated by such models is provided as input to parameterised image processing modules which automatically detect and analyse spatial patterns (e.g. cell) and clusters of such patterns (e.g. cellular population). For capturing how spatial and numeric properties change over time the Probabilistic Bounded Linear Spatial Temporal Logic is introduced. Given a collection of time series data and a formal spatio-temporal specification the model checker Mudi ( http://mudi.modelchecking.org ) determines probabilistically if the formal specification holds for the computational model or not. Mudi is an approximate probabilistic model checking platform which enables users to choose between frequentist and Bayesian, estimate and statistical hypothesis testing based validation approaches. We illustrate the expressivity and efficiency of our approach based on two biological case studies namely phase variation patterning in bacterial colony growth and the chemotactic aggregation of cells.

Conclusions

The formal methodology implemented in Mudi enables the validation of computational models against spatio-temporal logic properties and is a precursor to the development and validation of more complex multidimensional and multiscale models.",2014-12-02 +24957597,PubServer: literature searches by homology.,"PubServer, available at http://pubserver.burnham.org/, is a tool to automatically collect, filter and analyze publications associated with groups of homologous proteins. Protein entries in databases such as Entrez Protein database at NCBI contain information about publications associated with a given protein. The scope of these publications varies a lot: they include studies focused on biochemical functions of individual proteins, but also reports from genome sequencing projects that introduce tens of thousands of proteins. Collecting and analyzing publications related to sets of homologous proteins help in functional annotation of novel protein families and in improving annotations of well-studied protein families or individual genes. However, performing such collection and analysis manually is a tedious and time-consuming process. PubServer automatically collects identifiers of homologous proteins using PSI-Blast, retrieves literature references from corresponding database entries and filters out publications unlikely to contain useful information about individual proteins. It also prepares simple vocabulary statistics from titles, abstracts and MeSH terms to identify the most frequently occurring keywords, which may help to quickly identify common themes in these publications. The filtering criteria applied to collected publications are user-adjustable. The results of the server are presented as an interactive page that allows re-filtering and different presentations of the output.",2014-06-23 +24953454,VANESA - a software application for the visualization and analysis of networks in system biology applications.,"VANESA is a modeling software for the automatic reconstruction and analysis of biological networks based on life-science database information. Using VANESA, scientists are able to model any kind of biological processes and systems as biological networks. It is now possible for scientists to automatically reconstruct important molecular systems with information from the databases KEGG, MINT, IntAct, HPRD, and BRENDA. Additionally, experimental results can be expanded with database information to better analyze the investigated elements and processes in an overall context. Users also have the possibility to use graph theoretical approaches in VANESA to identify regulatory structures and significant actors within the modeled systems. These structures can then be further investigated in the Petri net environment of VANESA. It is platform-independent, free-of-charge, and available at http://vanesa.sf.net.",2014-06-23 +21685053,CHASM and SNVBox: toolkit for detecting biologically important single nucleotide mutations in cancer.,"

Summary

Thousands of cancer exomes are currently being sequenced, yielding millions of non-synonymous single nucleotide variants (SNVs) of possible relevance to disease etiology. Here, we provide a software toolkit to prioritize SNVs based on their predicted contribution to tumorigenesis. It includes a database of precomputed, predictive features covering all positions in the annotated human exome and can be used either stand-alone or as part of a larger variant discovery pipeline.

Availability and implementation

MySQL database, source code and binaries freely available for academic/government use at http://wiki.chasmsoftware.org, Source in Python and C++. Requires 32 or 64-bit Linux system (tested on Fedora Core 8,10,11 and Ubuntu 10), 2.5*≤ Python <3.0*, MySQL server >5.0, 60 GB available hard disk space (50 MB for software and data files, 40 GB for MySQL database dump when uncompressed), 2 GB of RAM.",2011-06-17 +23966112,Strengths and limitations of microarray-based phenotype prediction: lessons learned from the IMPROVER Diagnostic Signature Challenge.,"

Motivation

After more than a decade since microarrays were used to predict phenotype of biological samples, real-life applications for disease screening and identification of patients who would best benefit from treatment are still emerging. The interest of the scientific community in identifying best approaches to develop such prediction models was reaffirmed in a competition style international collaboration called IMPROVER Diagnostic Signature Challenge whose results we describe herein.

Results

Fifty-four teams used public data to develop prediction models in four disease areas including multiple sclerosis, lung cancer, psoriasis and chronic obstructive pulmonary disease, and made predictions on blinded new data that we generated. Teams were scored using three metrics that captured various aspects of the quality of predictions, and best performers were awarded. This article presents the challenge results and introduces to the community the approaches of the best overall three performers, as well as an R package that implements the approach of the best overall team. The analyses of model performance data submitted in the challenge as well as additional simulations that we have performed revealed that (i) the quality of predictions depends more on the disease endpoint than on the particular approaches used in the challenge; (ii) the most important modeling factor (e.g. data preprocessing, feature selection and classifier type) is problem dependent; and (iii) for optimal results datasets and methods have to be carefully matched. Biomedical factors such as the disease severity and confidence in diagnostic were found to be associated with the misclassification rates across the different teams.

Availability

The lung cancer dataset is available from Gene Expression Omnibus (accession, GSE43580). The maPredictDSC R package implementing the approach of the best overall team is available at www.bioconductor.org or http://bioinformaticsprb.med.wayne.edu/.",2013-08-20 +24263090,Target analysis by integration of transcriptome and ChIP-seq data with BETA.,"The combination of ChIP-seq and transcriptome analysis is a compelling approach to unravel the regulation of gene expression. Several recently published methods combine transcription factor (TF) binding and gene expression for target prediction, but few of them provide an efficient software package for the community. Binding and expression target analysis (BETA) is a software package that integrates ChIP-seq of TFs or chromatin regulators with differential gene expression data to infer direct target genes. BETA has three functions: (i) to predict whether the factor has activating or repressive function; (ii) to infer the factor's target genes; and (iii) to identify the motif of the factor and its collaborators, which might modulate the factor's activating or repressive function. Here we describe the implementation and features of BETA to demonstrate its application to several data sets. BETA requires ~1 GB of RAM, and the procedure takes 20 min to complete. BETA is available open source at http://cistrome.org/BETA/.",2013-11-21 +23446869,Studying the evolution of transcription factor binding events using multi-species ChIP-Seq data.,"Recent technology advances make it possible to collect whole-genome transcription factor binding (TFB) profiles from multiple species through the ChIP-Seq data. This provides rich information to understand TFB evolution. However, few rigorous statistical models are available to infer TFB evolution from these data. We have developed a phylogenetic tree based method to model the on/off rates of TFB events. There are two unique features of our method compared to existing models. First, we mask nucleotide substitutions and focus on INDEL disruption of TFB events, which are rarer evolution events and more appropriate for divergent species and non-coding regulatory regions. Second, we correct for ascertainment bias in ChIP-Seq data by maximizing likelihood conditional on the observed (incomplete) data. Simulations show that our method works well in model selection and parameter estimation when there are sufficient aligned TFB events. When this method is applied to a ChIP-Seq data set with five vertebrates, we find that the instantaneous transition rates to INDELs are higher in TFB regions than in homologous non-binding regions. This is driven by an excess of alignment columns showing binding in one species but gaps in all other species. When we compare the inferred transition rates between the conserved and non-conserved regions, as expected, the conserved regions are estimated to have lower transition rates. The R package TFBphylo that implements the described model can be downloaded from http://bioinformatics.med.yale.edu/.",2013-03-26 +24952649,CicArMiSatDB: the chickpea microsatellite database.,"

Background

Chickpea (Cicer arietinum) is a widely grown legume crop in tropical, sub-tropical and temperate regions. Molecular breeding approaches seem to be essential for enhancing crop productivity in chickpea. Until recently, limited numbers of molecular markers were available in the case of chickpea for use in molecular breeding. However, the recent advances in genomics facilitated the development of large scale markers especially SSRs (simple sequence repeats), the markers of choice in any breeding program. Availability of genome sequence very recently opens new avenues for accelerating molecular breeding approaches for chickpea improvement.

Description

In order to assist genetic studies and breeding applications, we have developed a user friendly relational database named the Chickpea Microsatellite Database (CicArMiSatDB http://cicarmisatdb.icrisat.org). This database provides detailed information on SSRs along with their features in the genome. SSRs have been classified and made accessible through an easy-to-use web interface.

Conclusions

This database is expected to help chickpea community in particular and legume community in general, to select SSRs of particular type or from a specific region in the genome to advance both basic genomics research as well as applied aspects of crop improvement.",2014-06-21 +23893318,Cancer Digital Slide Archive: an informatics resource to support integrated in silico analysis of TCGA pathology data.,"

Background

The integration and visualization of multimodal datasets is a common challenge in biomedical informatics. Several recent studies of The Cancer Genome Atlas (TCGA) data have illustrated important relationships between morphology observed in whole-slide images, outcome, and genetic events. The pairing of genomics and rich clinical descriptions with whole-slide imaging provided by TCGA presents a unique opportunity to perform these correlative studies. However, better tools are needed to integrate the vast and disparate data types.

Objective

To build an integrated web-based platform supporting whole-slide pathology image visualization and data integration.

Materials and methods

All images and genomic data were directly obtained from the TCGA and National Cancer Institute (NCI) websites.

Results

The Cancer Digital Slide Archive (CDSA) produced is accessible to the public (http://cancer.digitalslidearchive.net) and currently hosts more than 20,000 whole-slide images from 22 cancer types.

Discussion

The capabilities of CDSA are demonstrated using TCGA datasets to integrate pathology imaging with associated clinical, genomic and MRI measurements in glioblastomas and can be extended to other tumor types. CDSA also allows URL-based sharing of whole-slide images, and has preliminary support for directly sharing regions of interest and other annotations. Images can also be selected on the basis of other metadata, such as mutational profile, patient age, and other relevant characteristics.

Conclusions

With the increasing availability of whole-slide scanners, analysis of digitized pathology images will become increasingly important in linking morphologic observations with genomic and clinical endpoints.",2013-07-25 +23407359,A dynamic Bayesian Markov model for phasing and characterizing haplotypes in next-generation sequencing.,"

Motivation

Next-generation sequencing (NGS) technologies have enabled whole-genome discovery and analysis of genetic variants in many species of interest. Individuals are often sequenced at low coverage for detecting novel variants, phasing haplotypes and inferring population structures. Although several tools have been developed for SNP and genotype calling in NGS data, haplotype phasing is often done separately on the called genotypes.

Results

We propose a dynamic Bayesian Markov model (DBM) for simultaneous genotype calling and haplotype phasing in low-coverage NGS data of unrelated individuals. Our method is fully probabilistic that produces consistent inference of genotypes, haplotypes and recombination probabilities. Using data from the 1000 Genomes Project, we demonstrate that DBM not only yields more accurate results than some popular methods, but also provides novel characterization of haplotype structures at the individual level for visualization, interpretation and comparison in downstream analysis. DBM is a powerful and flexible tool that can be applied to many sequencing studies. Its statistical framework can also be extended to accommodate broader scopes of data.

Availability and implementation

http://stat.psu.edu/∼yuzhang/software/dbm.tar.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-13 +21450710,hmChIP: a database and web server for exploring publicly available human and mouse ChIP-seq and ChIP-chip data.,"

Unlabelled

hmChIP is a database of genome-wide chromatin immunoprecipitation (ChIP) data in human and mouse. Currently, the database contains 2016 samples from 492 ChIP-seq and ChIP-chip experiments, representing a total of 170 proteins and 11 069 914 protein-DNA interactions. A web server provides interface for database query. Protein-DNA binding intensities can be retrieved from individual samples for user-provided genomic regions. The retrieved intensities can be used to cluster samples and genomic regions to facilitate exploration of combinatorial patterns, cell-type dependencies, and cross-sample variability of protein-DNA interactions.

Availability

http://jilab.biostat.jhsph.edu/database/cgi-bin/hmChIP.pl.",2011-03-30 +25161238,CRISPRstrand: predicting repeat orientations to determine the crRNA-encoding strand at CRISPR loci.,"

Motivation

The discovery of CRISPR-Cas systems almost 20 years ago rapidly changed our perception of the bacterial and archaeal immune systems. CRISPR loci consist of several repetitive DNA sequences called repeats, inter-spaced by stretches of variable length sequences called spacers. This CRISPR array is transcribed and processed into multiple mature RNA species (crRNAs). A single crRNA is integrated into an interference complex, together with CRISPR-associated (Cas) proteins, to bind and degrade invading nucleic acids. Although existing bioinformatics tools can recognize CRISPR loci by their characteristic repeat-spacer architecture, they generally output CRISPR arrays of ambiguous orientation and thus do not determine the strand from which crRNAs are processed. Knowledge of the correct orientation is crucial for many tasks, including the classification of CRISPR conservation, the detection of leader regions, the identification of target sites (protospacers) on invading genetic elements and the characterization of protospacer-adjacent motifs.

Results

We present a fast and accurate tool to determine the crRNA-encoding strand at CRISPR loci by predicting the correct orientation of repeats based on an advanced machine learning approach. Both the repeat sequence and mutation information were encoded and processed by an efficient graph kernel to learn higher-order correlations. The model was trained and tested on curated data comprising >4500 CRISPRs and yielded a remarkable performance of 0.95 AUC ROC (area under the curve of the receiver operator characteristic). In addition, we show that accurate orientation information greatly improved detection of conserved repeat sequence families and structure motifs. We integrated CRISPRstrand predictions into our CRISPRmap web server of CRISPR conservation and updated the latter to version 2.0.

Availability

CRISPRmap and CRISPRstrand are available at http://rna.informatik.uni-freiburg.de/CRISPRmap.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +26130575,"GDFuzz3D: a method for protein 3D structure reconstruction from contact maps, based on a non-Euclidean distance function.","

Motivation

To date, only a few distinct successful approaches have been introduced to reconstruct a protein 3D structure from a map of contacts between its amino acid residues (a 2D contact map). Current algorithms can infer structures from information-rich contact maps that contain a limited fraction of erroneous predictions. However, it is difficult to reconstruct 3D structures from predicted contact maps that usually contain a high fraction of false contacts.

Results

We describe a new, multi-step protocol that predicts protein 3D structures from the predicted contact maps. The method is based on a novel distance function acting on a fuzzy residue proximity graph, which predicts a 2D distance map from a 2D predicted contact map. The application of a Multi-Dimensional Scaling algorithm transforms that predicted 2D distance map into a coarse 3D model, which is further refined by typical modeling programs into an all-atom representation. We tested our approach on contact maps predicted de novo by MULTICOM, the top contact map predictor according to CASP10. We show that our method outperforms FT-COMAR, the state-of-the-art method for 3D structure reconstruction from 2D maps. For all predicted 2D contact maps of relatively low sensitivity (60-84%), GDFuzz3D generates more accurate 3D models, with the average improvement of 4.87 Å in terms of RMSD.

Availability and implementation

GDFuzz3D server and standalone version are freely available at http://iimcb.genesilico.pl/gdserver/GDFuzz3D/.

Contact

iamb@genesilico.pl

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-30 +25954400,Pharmacovigilance on twitter? Mining tweets for adverse drug reactions.,"Recent research has shown that Twitter data analytics can have broad implications on public health research. However, its value for pharmacovigilance has been scantly studied - with health related forums and community support groups preferred for the task. We present a systematic study of tweets collected for 74 drugs to assess their value as sources of potential signals for adverse drug reactions (ADRs). We created an annotated corpus of 10,822 tweets. Each tweet was annotated for the presence or absence of ADR mentions, with the span and Unified Medical Language System (UMLS) concept ID noted for each ADR present. Using Cohen's kappa1, we calculated the inter-annotator agreement (IAA) for the binary annotations to be 0.69. To demonstrate the utility of the corpus, we attempted a lexicon-based approach for concept extraction, with promising success (54.1% precision, 62.1% recall, and 57.8% F-measure). A subset of the corpus is freely available at: http://diego.asu.edu/downloads.",2014-11-14 +24836530,MethylSig: a whole genome DNA methylation analysis pipeline.,"

Motivation

DNA methylation plays critical roles in gene regulation and cellular specification without altering DNA sequences. The wide application of reduced representation bisulfite sequencing (RRBS) and whole genome bisulfite sequencing (bis-seq) opens the door to study DNA methylation at single CpG site resolution. One challenging question is how best to test for significant methylation differences between groups of biological samples in order to minimize false positive findings.

Results

We present a statistical analysis package, methylSig, to analyse genome-wide methylation differences between samples from different treatments or disease groups. MethylSig takes into account both read coverage and biological variation by utilizing a beta-binomial approach across biological samples for a CpG site or region, and identifies relevant differences in CpG methylation. It can also incorporate local information to improve group methylation level and/or variance estimation for experiments with small sample size. A permutation study based on data from enhanced RRBS samples shows that methylSig maintains a well-calibrated type-I error when the number of samples is three or more per group. Our simulations show that methylSig has higher sensitivity compared with several alternative methods. The use of methylSig is illustrated with a comparison of different subtypes of acute leukemia and normal bone marrow samples.

Availability

methylSig is available as an R package at http://sartorlab.ccmb.med.umich.edu/software.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-16 +21791094,Bayesian semi-supervised classification of bacterial samples using MLST databases.,"

Background

Worldwide effort on sampling and characterization of molecular variation within a large number of human and animal pathogens has lead to the emergence of multi-locus sequence typing (MLST) databases as an important tool for studying the epidemiology and evolution of pathogens. Many of these databases are currently harboring several thousands of multi-locus DNA sequence types (STs) enriched with metadata over traits such as serotype, antibiotic resistance, host organism etc of the isolates. Curators of the databases have thus the possibility of dividing the pathogen populations into subsets representing different evolutionary lineages, geographically associated groups, or other subpopulations, which are defined in terms of molecular similarities and dissimilarities residing within a database. When combined with the existing metadata, such subsets may provide invaluable information for assessing the position of a new set of isolates in relation to the whole pathogen population.

Results

To enable users of MLST schemes to query the databases with sets of new bacterial isolates and to automatically analyze their relation to existing curated sequences, we introduce here a Bayesian model-based method for semi-supervised classification of MLST data. Our method can use an MLST database as a training set and assign simultaneously any set of query sequences into the earlier discovered lineages/populations, while also allowing some or all of these sequences to form previously undiscovered genetically distinct groups. This tool provides probabilistic quantification of the classification uncertainty and is highly efficient computationally, thus enabling rapid analyses of large databases and sets of query sequences. The latter feature is a necessary prerequisite for an automated access through the MLST web interface. We demonstrate the versatility of our approach by anayzing both real and synthesized data from MLST databases. The introduced method for semi-supervised classification of sets of query STs is freely available for Windows, Mac OS X and Linux operative systems in BAPS 5.4 software which is downloadable at http://web.abo.fi/fak/mnf/mate/jc/software/baps.html. The query functionality is also directly available for the Staphylococcus aureus database at http://www.mlst.net and shortly will be available for other species databases hosted at this web portal.

Conclusions

We have introduced a model-based tool for automated semi-supervised classification of new pathogen samples that can be integrated into the web interface of the MLST databases. In particular, when combined with the existing metadata, the semi-supervised labeling may provide invaluable information for assessing the position of a new set of query strains in relation to the particular pathogen population represented by the curated database.Such information will be useful both for clinical and basic research purposes.",2011-07-26 +21877999,Emerging viral infections in neonatal intensive care unit.,"Nosocomial infections are the most important cause of morbidity and mortality among neonates and mostly in infants admitted to neonatal intensive care units (NICU). The total number of neonates who develop nosocomial infections per admission varies from 6.2 to 30%. The role of nosocomial virus infections is generally neglected in the actual epidemiologic scenario mostly due to the lack of data in the medical literature. Based on a worldwide database of health care-associated outbreaks (http://www.outbreak-database.com) we performed an analysis of the incidence, type of pathogens and clinical features of neonatal viral outbreaks especially those reported in NICUs. We also describe, as an example of emerging virus in NICU, a Norovirus outbreak along with clinical presentation that varies from mild to moderate clinical symptoms like vomiting, gastric remainder, diarrhoea, abdominal distension or severe presentation like necrotizing enterocolitis. and measures implemented for terminating the outbreak. In conclusion, our study analyses the viral origins of nosocomial infections in NICU and underline that the role of viral agents in neonatal nosocomial infections needs to be further investigated even in diseases traditionally considered of bacterial origin like necrotizing enterocolitis.",2011-08-31 +25504849,MpTheory Java library: a multi-platform Java library for systems biology based on the Metabolic P theory.,"

Unlabelled

MpTheory Java library is an open-source project collecting a set of objects and algorithms for modeling observed dynamics by means of the Metabolic P (MP) theory, that is, a mathematical theory introduced in 2004 for modeling biological dynamics. By means of the library, it is possible to model biological systems both at continuous and at discrete time. Moreover, the library comprises a set of regression algorithms for inferring MP models starting from time series of observations. To enhance the modeling experience, beside a pure Java usage, the library can be directly used within the most popular computing environments, such as MATLAB, GNU Octave, Mathematica and R.

Availability and implementation

The library is open-source and licensed under the GNU Lesser General Public License (LGPL) Version 3.0. Source code, binaries and complete documentation are available at http://mptheory.scienze.univr.it.

Contact

luca.marchetti@univr.it, marchetti@cosbi.eu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-10 +22025760,Mutation region detection for closely related individuals without a known pedigree using high-density genotype data.,"The fundamental problem in linkage analysis is to identify regions whose allele is shared by all or almost all affected members but by none or few unaffected members. Almost all the existing methods for linkage analysis are for families with clearly given pedigrees. Little work has been done for the case where the sampled individuals are closely related, but their pedigree is not known. This situation occurs very often when the individuals share a common ancestor at least six generations ago. Solving this case will tremendously extend the use of linkage analysis for finding genes that cause genetic diseases. In this paper, we propose a mathematical model (the shared center problem) for inferring the allele-sharing status of a given set of individuals using a database of confirmed haplotypes as reference. We show the NP-completeness of the shared center problem and present a ratio-2 polynomial-time approximation algorithm. We then convert the approximation algorithm into a heuristic algorithm for the shared center problem. Based on this heuristic, we finally design a heuristic algorithm for mutation region detection. We further implement the algorithms to obtain a software package. Our experimental data shows that the software works very well. The package is available at http://www.cs.cityu.edu.hk/~lwang/software/LDWP/index.html for non-commercial use.",2011-10-17 +24953553,Evaluation of global sequence comparison and one-to-one FASTA local alignment in regulatory allergenicity assessment of transgenic proteins in food crops.,"To address the high false positive rate using >35% identity over 80 amino acids in the regulatory assessment of transgenic proteins for potential allergenicity and the change of E-value with database size, the Needleman-Wunsch global sequence alignment and a one-to-one (1:1) local FASTA search (one protein in the target database at a time) using FASTA were evaluated by comparing proteins randomly selected from Arabidopsis, rice, corn, and soybean with known allergens in a peer-reviewed allergen database (http://www.allergenonline.org/). Compared with the approach of searching >35%/80aa+, the false positive rate measured by specificity rate for identification of true allergens was reduced by a 1:1 global sequence alignment with a cut-off threshold of ≧30% identity and a 1:1 FASTA local alignment with a cut-off E-value of ≦1.0E-09 while maintaining the same sensitivity. Hence, a 1:1 sequence comparison, especially using the FASTA local alignment tool with a biological relevant E-value of 1.0E-09 as a threshold, is recommended for the regulatory assessment of sequence identities between transgenic proteins in food crops and known allergens.",2014-06-19 +22527523,Literature and patent analysis of the cloning and identification of human functional genes in China.,"The Human Genome Project was launched at the end of the 1980s. Since then, the cloning and identification of functional genes has been a major focus of research across the world. In China too, the potentially profound impact of such studies on the life sciences and on human health was realized, and relevant studies were initiated in the 1990s. To advance China's involvement in the Human Genome Project, in the mid-1990s, Committee of Experts in Biology from National High Technology Research and Development Program of China (863 Program) proposed the ""two 1%"" goal. This goal envisaged China contributing 1% of the total sequencing work, and cloning and identifying 1% of the total human functional genes. Over the past 20 years, tremendous achievement has been accomplished by Chinese scientists. It is well known that scientists in China finished the 1% of sequencing work of the Human Genome Project, whereas, there is no comprehensive report about ""whether China had finished cloning and identifying 1% of human functional genes"". In the present study, the GenBank database at the National Center of Biotechnology Information, the PubMed search tool, and the patent database of the State Intellectual Property Office, China, were used to retrieve entries based on two screening standards: (i) Were the newly cloned and identified genes first reported by Chinese scientists? (ii) Were the Chinese scientists awarded the gene sequence patent? Entries were retrieved from the databases up to the cut-off date of 30 June 2011 and the obtained data were analyzed further. The results showed that 589 new human functional genes were first reported by Chinese scientists and 159 gene sequences were patented (http://gene.fudan.sh.cn/introduction/database/chinagene/chinagene.html). This study systematically summarizes China's contributions to human functional genomics research and answers the question ""has China finished cloning and identifying 1% of human functional genes?"" in the affirmative.",2012-03-01 +24947936,XPC Lys939Gln polymorphism contributes to colorectal cancer susceptibility: evidence from a meta-analysis.,"

Background

Published studies investigating the association between XPC Lys939Gln polymorphism and colorectal cancer (CRC) risk reported inconclusive results. We performed a meta-analysis to derive a precise estimation of the relationship.

Methods

A comprehensive literature search was done in databases PubMed, EMBASE, and Cochrane library up to December 2013. The association between XPC Lys939Gln polymorphism and CRC risk was assessed by odds ratios (ORs) together with their 95% confidence intervals (CIs).

Results

Eight studies with 3,301 cases and 4,177 controls were included in the meta-analysis. We observed that the XPC Lys939Gln polymorphism was correlated with an increased CRC risk when all studies were pooled into the meta-analysis (Gln/lys vs. Lys/Lys: OR = 1.293, 95% CI 1.169-1.430, P = 0.000; Gln/Gln + Gln/lys vs. Lys/Lys: OR = 1.260, 95% CI 1.145-1.388, P = 0.000). In stratified analyses by ethnicity, smoking, and study quality, significant increased CRC risk was found in Asians (Gln/lys vs. Lys/Lys: OR = 1.345, 95% CI 1.187-1.523, P = 0.000; Gln/Gln + Gln/lys vs. Lys/Lys: OR = 1.317, 95% CI 1.170-1.484, P = 0.000), nonsmokers (Gln/Gln + Gln/lys vs. Lys/Lys: OR = 1.286, 95% CI 1.020-1.622, P = 0.033), and high quality studies. In subgroup analysis by source of control, significant increased CRC risk was found in both hospital-based studies and population-based studies. However, in subgroup analysis according to cancer location, no any significant association was detected.

Conclusions

This meta-analysis suggests that the XPC is a candidate gene for CRC susceptibility. The XPC Lys939Gln polymorphism may play an important role in CRC development among Asians and nonsmokers. Further large and well-designed studies are needed to confirm this association.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1665902729125948.",2014-06-19 +24948179,Glutathione S-transferase M1 null genotype meta-analysis on gastric cancer risk.,"

Background

Glutathione S-transferases (GSTs) have proved to be involved in the detoxifying several carcinogens and may play an important role in carcinogenesis of cancer. Previous studies on the association between Glutathione S-transferase M1 (GSTM1) polymorphism and gastric cancer (GC) risk reported inconclusive results. To get a precise result, we conducted this present meta-analysis through pooling all eligible studies.

Methods

A comprehensive databases of Pubmed, Embase, Web of Science, and the Chinese Biomedical Database (CBM) were searched for case-control studies investigating the association between GSTM1 null genotype and GC risk. Odds ratios (OR) and 95% confidence intervals (95% CI) were used to assess this possible association. A χ2-based Q-test was used to examine the heterogeneity assumption. Begg's and Egger's test were used to examine the potential publication bias. The leave-one-out sensitivity analysis was conducted to determine whether our assumptions or decisions have a major effect on the results of present work. Statistical analyses were performed with the software program STATA 12.0.

Results

A total of 47 eligible case-control studies were identified, including 6,678 cases and 12,912 controls. Our analyses suggested that GSTM1 null genotype was significantly associated with increased risk of GC (OR=1.186, 95% CI=1.057-1.329, Pheterogenetiy=0.000, P=0.004). Significant association was also found in Asians (OR=1.269, 95% CI=1.106-1.455, Pheterogenetiy=0.002, P=0.001). However, GSTM1 null genotype was not contributed to GC risk in Caucasians (OR=1.115, 95% CI=0.937-1.326, Pheterogenetiy=0.000, P=0.222). In the subgroup analysis stratified by sources of controls, significant association was detected in hospital-based studies (OR=1.355, 95% CI=1.179-1.557, Pheterogenetiy=0.001, P=0.000), while there was no significant association detected in population-based studies (OR=1.017, 95% CI=0.862-1.200, Pheterogenetiy=0.000, P=0.840).

Conclusion

This meta-analysis showed the evidence that GSTM1 null genotype contributed to the development of GC.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1644180505119533.",2014-06-19 +21482578,"A database of thermodynamic properties of the reactions of glycolysis, the tricarboxylic acid cycle, and the pentose phosphate pathway.","A database of thermodynamic properties is developed, which extends a previous database of glycolysis and tricarboxylic acid cycle by adding the reactions of the pentose phosphate pathway. The raw data and documented estimations of solution properties are made electronically available. The database is determined by estimation of a set of parameters representing species-level free energies of formation. The resulting calculations provide thermodynamic and network-based estimates of thermodynamic properties for six reactions of the pentose phosphate pathway for which estimates are not available in the preexisting literature. Optimized results are made available in ThermoML format. Because calculations depend on estimated hydrogen and metal cation dissociation constants, an uncertainty and sensitivity analysis is performed, revealing 23 critical dissociation constants to which the computed thermodynamic properties are particularly sensitive. DATABASE URL: http://www.biocoda.org/thermo",2011-04-11 +26284082,QueTAL: a suite of tools to classify and compare TAL effectors functionally and phylogenetically.,"Transcription Activator-Like (TAL) effectors from Xanthomonas plant pathogenic bacteria can bind to the promoter region of plant genes and induce their expression. DNA-binding specificity is governed by a central domain made of nearly identical repeats, each determining the recognition of one base pair via two amino acid residues (a.k.a. Repeat Variable Di-residue, or RVD). Knowing how TAL effectors differ from each other within and between strains would be useful to infer functional and evolutionary relationships, but their repetitive nature precludes reliable use of traditional alignment methods. The suite QueTAL was therefore developed to offer tailored tools for comparison of TAL effector genes. The program DisTAL considers each repeat as a unit, transforms a TAL effector sequence into a sequence of coded repeats and makes pair-wise alignments between these coded sequences to construct trees. The program FuncTAL is aimed at finding TAL effectors with similar DNA-binding capabilities. It calculates correlations between position weight matrices of potential target DNA sequence predicted from the RVD sequence, and builds trees based on these correlations. The programs accurately represented phylogenetic and functional relationships between TAL effectors using either simulated or literature-curated data. When using the programs on a large set of TAL effector sequences, the DisTAL tree largely reflected the expected species phylogeny. In contrast, FuncTAL showed that TAL effectors with similar binding capabilities can be found between phylogenetically distant taxa. This suite will help users to rapidly analyse any TAL effector genes of interest and compare them to other available TAL genes and should improve our understanding of TAL effectors evolution. It is available at http://bioinfo-web.mpl.ird.fr/cgi-bin2/quetal/quetal.cgi.",2015-08-03 +25063469,Corset: enabling differential gene expression analysis for de novo assembled transcriptomes.,"Next generation sequencing has made it possible to perform differential gene expression studies in non-model organisms. For these studies, the need for a reference genome is circumvented by performing de novo assembly on the RNA-seq data. However, transcriptome assembly produces a multitude of contigs, which must be clustered into genes prior to differential gene expression detection. Here we present Corset, a method that hierarchically clusters contigs using shared reads and expression, then summarizes read counts to clusters, ready for statistical testing. Using a range of metrics, we demonstrate that Corset out-performs alternative methods. Corset is available from https://code.google.com/p/corset-project/.",2014-07-26 +24336805,Causal analysis approaches in Ingenuity Pathway Analysis.,"

Motivation

Prior biological knowledge greatly facilitates the meaningful interpretation of gene-expression data. Causal networks constructed from individual relationships curated from the literature are particularly suited for this task, since they create mechanistic hypotheses that explain the expression changes observed in datasets.

Results

We present and discuss a suite of algorithms and tools for inferring and scoring regulator networks upstream of gene-expression data based on a large-scale causal network derived from the Ingenuity Knowledge Base. We extend the method to predict downstream effects on biological functions and diseases and demonstrate the validity of our approach by applying it to example datasets.

Availability

The causal analytics tools 'Upstream Regulator Analysis', 'Mechanistic Networks', 'Causal Network Analysis' and 'Downstream Effects Analysis' are implemented and available within Ingenuity Pathway Analysis (IPA, http://www.ingenuity.com).

Supplementary information

Supplementary material is available at Bioinformatics online.",2013-12-13 +24616555,"in silico identification of protein-protein interactions in Silkworm, Bombyx mori.","The Domesticated silkworm, Bombyx mori, an economically important insect has been used as a lepidopteran molecular model next only to Drosophila. Compared to the genomic information in silkworm, the protein-protein interaction data are limited. Therefore experimentally identified PPI maps from five model organisms such as E.coli, C.elegans, D.melanogaster, H. sapiens, S. cerevisiae were used to infer the PPI network of silkworm using the well-recognized Interlog based method. Among the 14623 silkworm proteins, 7736 protein-protein interaction pairs were predicted which include 2700 unique proteins of the silkworms. Using the iPfam interaction domains and the gene expression data, these predictions were validated. In that 625 PPI pairs of predicted network were associated with the iPfam domain-domain interactions and the random network has average of 9. In the gene expression method, the average PCC value of the predicted network and random network was 0.29 and 0.23100±0.00042 respectively. It reveals that the predicted PPI networks of silkworm are highly significant and reliable. This is the first PPI network for the silkworm which will provide a framework for deciphering the cellular processes governing key metabolic pathways in the silkworm, Bombyx mori and available at SilkPPI (http://210.212.197.30/SilkPPI/).",2014-02-19 +26116928,Proteny: discovering and visualizing statistically significant syntenic clusters at the proteome level.,"

Background

With more and more genomes being sequenced, detecting synteny between genomes becomes more and more important. However, for microorganisms the genomic divergence quickly becomes large, resulting in different codon usage and shuffling of gene order and gene elements such as exons.

Results

We present Proteny, a methodology to detect synteny between diverged genomes. It operates on the amino acid sequence level to be insensitive to codon usage adaptations and clusters groups of exons disregarding order to handle diversity in genomic ordering between genomes. Furthermore, Proteny assigns significance levels to the syntenic clusters such that they can be selected on statistical grounds. Finally, Proteny provides novel ways to visualize results at different scales, facilitating the exploration and interpretation of syntenic regions. We test the performance of Proteny on a standard ground truth dataset, and we illustrate the use of Proteny on two closely related genomes (two different strains of Aspergillus niger) and on two distant genomes (two species of Basidiomycota). In comparison to other tools, we find that Proteny finds clusters with more true homologies in fewer clusters that contain more genes, i.e. Proteny is able to identify a more consistent synteny. Further, we show how genome rearrangements, assembly errors, gene duplications and the conservation of specific genes can be easily studied with Proteny.

Availability and implementation

Proteny is freely available at the Delft Bioinformatics Lab website http://bioinformatics.tudelft.nl/dbl/software.

Contact

t.gehrmann@tudelft.nl

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-27 +22064861,"The LANL hemorrhagic fever virus database, a new platform for analyzing biothreat viruses.","Hemorrhagic fever viruses (HFVs) are a diverse set of over 80 viral species, found in 10 different genera comprising five different families: arena-, bunya-, flavi-, filo- and togaviridae. All these viruses are highly variable and evolve rapidly, making them elusive targets for the immune system and for vaccine and drug design. About 55,000 HFV sequences exist in the public domain today. A central website that provides annotated sequences and analysis tools will be helpful to HFV researchers worldwide. The HFV sequence database collects and stores sequence data and provides a user-friendly search interface and a large number of sequence analysis tools, following the model of the highly regarded and widely used Los Alamos HIV database [Kuiken, C., B. Korber, and R.W. Shafer, HIV sequence databases. AIDS Rev, 2003. 5: p. 52-61]. The database uses an algorithm that aligns each sequence to a species-wide reference sequence. The NCBI RefSeq database [Sayers et al. (2011) Database resources of the National Center for Biotechnology Information. Nucleic Acids Res., 39, D38-D51.] is used for this; if a reference sequence is not available, a Blast search finds the best candidate. Using this method, sequences in each genus can be retrieved pre-aligned. The HFV website can be accessed via http://hfv.lanl.gov.",2011-11-07 +23880963,Analysis of the genomic response of human prostate cancer cells to histone deacetylase inhibitors.,"Histone deacetylases (HDACs) have emerged as important targets for cancer treatment. HDAC-inhibitors (HDACis) are well tolerated in patients and have been approved for the treatment of patients with cutaneous T-cell lymphoma (CTCL). To improve the clinical benefit of HDACis in solid tumors, combination strategies with HDACis could be employed. In this study, we applied Analysis of Functional Annotation (AFA) to provide a comprehensive list of genes and pathways affected upon HDACi-treatment in prostate cancer cells. This approach provides an unbiased and objective approach to high throughput data mining. By performing AFA on gene expression data from prostate cancer cell lines DU-145 (an HDACi-sensitive cell line) and PC3 (a relatively HDACi-resistant cell line) treated with HDACis valproic acid or vorinostat, we identified biological processes that are affected by HDACis and are therefore potential treatment targets for combination therapy. Our analysis revealed that HDAC-inhibition resulted among others in upregulation of major histocompatibility complex (MHC) genes and deregulation of the mitotic spindle checkpoint by downregulation of genes involved in mitosis. These findings were confirmed by AFA on publicly available data sets from HDACi-treated prostate cancer cells. In total, we analyzed 375 microarrays with HDACi treated and non-treated (control) prostate cancer cells. All results from this extensive analysis are provided as an online research source (available at the journal's website and at http://luigimarchionni.org/HDACIs.html). By publishing this data, we aim to enhance our understanding of the cellular changes after HDAC-inhibition, and to identify novel potential combination strategies with HDACis for the treatment of prostate cancer patients.",2013-07-19 +21752111,The Human Protein Atlas as a proteomic resource for biomarker discovery.,"The analysis of tissue-specific expression at both the gene and protein levels is vital for understanding human biology and disease. Antibody-based proteomics provides a strategy for the systematic generation of antibodies against all human proteins to combine with protein profiling in tissues and cells using tissue microarrays, immunohistochemistry and immunofluorescence. The Human Protein Atlas project was launched in 2003 with the aim of creating a map of protein expression patterns in normal cells, tissues and cancer. At present, 11,200 unique proteins corresponding to over 50% of all human protein-encoding genes have been analysed. All protein expression data, including underlying high-resolution images, are published on the free and publically available Human Protein Atlas portal (http://www.proteinatlas.org). This database provides an important source of information for numerous biomedical research projects, including biomarker discovery efforts. Moreover, the global analysis of how our genome is expressed at the protein level has provided basic knowledge on the ubiquitous expression of a large proportion of our proteins and revealed the paucity of cell- and tissue-type-specific proteins.",2011-08-03 +21619640,Structator: fast index-based search for RNA sequence-structure patterns.,"

Background

The secondary structure of RNA molecules is intimately related to their function and often more conserved than the sequence. Hence, the important task of searching databases for RNAs requires to match sequence-structure patterns. Unfortunately, current tools for this task have, in the best case, a running time that is only linear in the size of sequence databases. Furthermore, established index data structures for fast sequence matching, like suffix trees or arrays, cannot benefit from the complementarity constraints introduced by the secondary structure of RNAs.

Results

We present a novel method and readily applicable software for time efficient matching of RNA sequence-structure patterns in sequence databases. Our approach is based on affix arrays, a recently introduced index data structure, preprocessed from the target database. Affix arrays support bidirectional pattern search, which is required for efficiently handling the structural constraints of the pattern. Structural patterns like stem-loops can be matched inside out, such that the loop region is matched first and then the pairing bases on the boundaries are matched consecutively. This allows to exploit base pairing information for search space reduction and leads to an expected running time that is sublinear in the size of the sequence database. The incorporation of a new chaining approach in the search of RNA sequence-structure patterns enables the description of molecules folding into complex secondary structures with multiple ordered patterns. The chaining approach removes spurious matches from the set of intermediate results, in particular of patterns with little specificity. In benchmark experiments on the Rfam database, our method runs up to two orders of magnitude faster than previous methods.

Conclusions

The presented method's sublinear expected running time makes it well suited for RNA sequence-structure pattern matching in large sequence databases. RNA molecules containing several stem-loop substructures can be described by multiple sequence-structure patterns and their matches are efficiently handled by a novel chaining method. Beyond our algorithmic contributions, we provide with Structator a complete and robust open-source software solution for index-based search of RNA sequence-structure patterns. The Structator software is available at http://www.zbh.uni-hamburg.de/Structator.",2011-05-27 +22161369,Interventions for involutional lower lid entropion.,"

Background

Entropion is a condition in which the eyelid margin turns in against the eyeball. Involutional or senile entropion is one of the most common lower lid malpositions in the elderly. The interventions described and currently used for the treatment of this condition are surgical in nature, although non-surgical temporary medical treatment for the early stages of entropion has also been reported. The relative effectiveness of these interventions has not yet been resolved.

Objectives

To examine the effect of interventions for involutional entropion and to assess whether any method is superior to any other.

Search methods

We searched CENTRAL (which contains the Cochrane Eyes and Vision Group Trials Register) (The Cochrane Library 2011, Issue 10), MEDLINE (January 1950 to November 2011), EMBASE (January 1980 to November 2011), the metaRegister of Controlled Trials (mRCT) (www.controlled-trials.com),ClinicalTrials.gov (http://clinicaltrials.gov) and the WHO International Clinical Trials Registry Platform (ICTRP) (www.who.int/ictrp/search/en). There were no date or language restrictions in the electronic searches for trials. The electronic databases were last searched on 2 November 2011. We also searched oculoplastic textbooks, conference proceedings from the European and American Society of Ophthalmic Plastic and Reconstructive Surgery (ESOPRS, ASOPRS), European Ophthalmological Society (SOE), the Association for Recearch in Vision and Ophthalmology (ARVO) and American Academy of Ophthalmology (AAO) for the years 2000 to 2009 to identify relevant data. We attempted to contact researchers who are active in this field for information about further published or unpublished studies.

Selection criteria

We included randomised controlled trials (RCTs) with no restriction on date or language comparing two or more surgical methods for correction of involutional lower eyelid entropion in people older than 60 years of age with involutional lower lid entropion.

Data collection and analysis

Each review author independently assessed study abstracts identified from the electronic and manual searches. Author analysis was then compared and full papers for appropriate studies were obtained according to the inclusion criteria. Disagreements between the authors were resolved by discussion.

Main results

We identified one RCT which met our inclusion criteria and was included in this review. Sixty-three participants with primary involutional lower eyelid entropion were randomised to everting sutures alone or everting sutures with a lateral tarsal strip. Eight participants were lost to follow-up. The trial indicates that the combined procedure for horizontal and vertical eyelid tightening in the form of everting sutures and lateral tarsal strip is highly curative for involutional entropion compared to vertical tightening in the form of everting sutures alone. The superiority of the combined approach is also supported by many good quality uncontrolled studies on specific surgical procedures but these were not included in the analysis as they were not part of the inclusion criteria.

Authors' conclusions

A single RCT showed that the combination of horizontal and vertical eyelid tightening with everting sutures and lateral tarsal strip is highly efficient for entropion compared to vertical tightening with everting sutures alone. Retrospective case series studies also support the combined surgical repair but details from these studies on specific surgical techniques cannot be included in the analysis.Evidence from a single RCT is unlikely to change clinical practice and thus it is still our view that there is a clear need for more randomised studies comparing two or more surgical techniques for entropion surgery addressing the recurrence and complications rate.",2011-12-07 +23705874,Evaluation and optimization of virtual screening workflows with DEKOIS 2.0--a public library of challenging docking benchmark sets.,"The application of molecular benchmarking sets helps to assess the actual performance of virtual screening (VS) workflows. To improve the efficiency of structure-based VS approaches, the selection and optimization of various parameters can be guided by benchmarking. With the DEKOIS 2.0 library, we aim to further extend and complement the collection of publicly available decoy sets. Based on BindingDB bioactivity data, we provide 81 new and structurally diverse benchmark sets for a wide variety of different target classes. To ensure a meaningful selection of ligands, we address several issues that can be found in bioactivity data. We have improved our previously introduced DEKOIS methodology with enhanced physicochemical matching, now including the consideration of molecular charges, as well as a more sophisticated elimination of latent actives in the decoy set (LADS). We evaluate the docking performance of Glide, GOLD, and AutoDock Vina with our data sets and highlight existing challenges for VS tools. All DEKOIS 2.0 benchmark sets will be made accessible at http://www.dekois.com.",2013-06-12 +24395755,FamAnn: an automated variant annotation pipeline to facilitate target discovery for family-based sequencing studies.,"FamAnn is an automated variant annotation pipeline designed for facilitating target discovery for family-based sequencing studies. It can apply a different inheritance pattern or a de novo mutations discovery model to each family and select single nucleotide variants and small insertions and deletions segregating in each family or shared by multiple families. It also provides a variety of variant annotations and retains and annotates all transcripts hit by a single variant. Excel-compatible outputs including all annotated variants segregating in each family or shared by multiple families will be provided for users to prioritize variants based on their customized thresholds. A list of genes that harbor the segregating variants will be provided as well for possible pathway/network analyses. FamAnn uses the de facto community standard Variant Call Format as the input format and can be applied to whole exome, genome or targeted resequencing data.

Availability

https://sites.google.com/site/famannotation/home CONTACT: jianchaoyao@gmail.com, kelvinzhang@mednet.ucla.edu, mccombie@cshl.edu Supplementary information: Supplementary data are available at Bioinformatics online.",2014-01-05 +26325505,"Mapping the Conformation Space of Wildtype and Mutant H-Ras with a Memetic, Cellular, and Multiscale Evolutionary Algorithm.","An important goal in molecular biology is to understand functional changes upon single-point mutations in proteins. Doing so through a detailed characterization of structure spaces and underlying energy landscapes is desirable but continues to challenge methods based on Molecular Dynamics. In this paper we propose a novel algorithm, SIfTER, which is based instead on stochastic optimization to circumvent the computational challenge of exploring the breadth of a protein's structure space. SIfTER is a data-driven evolutionary algorithm, leveraging experimentally-available structures of wildtype and variant sequences of a protein to define a reduced search space from where to efficiently draw samples corresponding to novel structures not directly observed in the wet laboratory. The main advantage of SIfTER is its ability to rapidly generate conformational ensembles, thus allowing mapping and juxtaposing landscapes of variant sequences and relating observed differences to functional changes. We apply SIfTER to variant sequences of the H-Ras catalytic domain, due to the prominent role of the Ras protein in signaling pathways that control cell proliferation, its well-studied conformational switching, and abundance of documented mutations in several human tumors. Many Ras mutations are oncogenic, but detailed energy landscapes have not been reported until now. Analysis of SIfTER-computed energy landscapes for the wildtype and two oncogenic variants, G12V and Q61L, suggests that these mutations cause constitutive activation through two different mechanisms. G12V directly affects binding specificity while leaving the energy landscape largely unchanged, whereas Q61L has pronounced, starker effects on the landscape. An implementation of SIfTER is made available at http://www.cs.gmu.edu/~ashehu/?q=OurTools. We believe SIfTER is useful to the community to answer the question of how sequence mutations affect the function of a protein, when there is an abundance of experimental structures that can be exploited to reconstruct an energy landscape that would be computationally impractical to do via Molecular Dynamics.",2015-09-01 +24334400,Hierarchical clustering of high-throughput expression data based on general dependences.,"High-throughput expression technologies, including gene expression array and liquid chromatography--mass spectrometry (LC-MS) and so on, measure thousands of features, i.e., genes or metabolites, on a continuous scale. In such data, both linear and nonlinear relations exist between features. Nonlinear relations can reflect critical regulation patterns in the biological system. However, they are not identified and utilized by traditional clustering methods based on linear associations. Clustering based on general dependences, i.e., both linear and nonlinear relations, is hampered by the high dimensionality and high noise level of the data. We developed a sensitive nonparametric measure of general dependence between (groups of) random variables in high dimensions. Based on this dependence measure, we developed a hierarchical clustering method. In simulation studies, the method outperformed correlation- and mutual information (MI)-based hierarchical clustering methods in clustering features with nonlinear dependences. We applied the method to a microarray data set measuring the gene expression in cell-cycle time series to show it generates biologically relevant results. The R code is available at http://userwww.service.emory.edu/~tyu8/GDHC.",2013-07-01 +23142963,iBAG: integrative Bayesian analysis of high-dimensional multiplatform genomics data.,"

Motivation

Analyzing data from multi-platform genomics experiments combined with patients' clinical outcomes helps us understand the complex biological processes that characterize a disease, as well as how these processes relate to the development of the disease. Current data integration approaches are limited in that they do not consider the fundamental biological relationships that exist among the data obtained from different platforms. Statistical Model: We propose an integrative Bayesian analysis of genomics data (iBAG) framework for identifying important genes/biomarkers that are associated with clinical outcome. This framework uses hierarchical modeling to combine the data obtained from multiple platforms into one model.

Results

We assess the performance of our methods using several synthetic and real examples. Simulations show our integrative methods to have higher power to detect disease-related genes than non-integrative methods. Using the Cancer Genome Atlas glioblastoma dataset, we apply the iBAG model to integrate gene expression and methylation data to study their associations with patient survival. Our proposed method discovers multiple methylation-regulated genes that are related to patient survival, most of which have important biological functions in other diseases but have not been previously studied in glioblastoma.

Availability

http://odin.mdacc.tmc.edu/∼vbaladan/.

Contact

veera@mdanderson.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-09 +24392133,A computational framework to infer human disease-associated long noncoding RNAs.,"As a major class of noncoding RNAs, long noncoding RNAs (lncRNAs) have been implicated in various critical biological processes. Accumulating researches have linked dysregulations and mutations of lncRNAs to a variety of human disorders and diseases. However, to date, only a few human lncRNAs have been associated with diseases. Therefore, it is very important to develop a computational method to globally predict potential associated diseases for human lncRNAs. In this paper, we developed a computational framework to accomplish this by combining human lncRNA expression profiles, gene expression profiles, and human disease-associated gene data. Applying this framework to available human long intergenic noncoding RNAs (lincRNAs) expression data, we showed that the framework has reliable accuracy. As a result, for non-tissue-specific lincRNAs, the AUC of our algorithm is 0.7645, and the prediction accuracy is about 89%. This study will be helpful for identifying novel lncRNAs for human diseases, which will help in understanding the roles of lncRNAs in human diseases and facilitate treatment. The corresponding codes for our method and the predicted results are all available at http://asdcd.amss.ac.cn/MingXiLiu/lncRNA-disease.html.",2014-01-02 +24861616,ProBiS-ligands: a web server for prediction of ligands by examination of protein binding sites.,"The ProBiS-ligands web server predicts binding of ligands to a protein structure. Starting with a protein structure or binding site, ProBiS-ligands first identifies template proteins in the Protein Data Bank that share similar binding sites. Based on the superimpositions of the query protein and the similar binding sites found, the server then transposes the ligand structures from those sites to the query protein. Such ligand prediction supports many activities, e.g. drug repurposing. The ProBiS-ligands web server, an extension of the ProBiS web server, is open and free to all users at http://probis.cmm.ki.si/ligands.",2014-05-26 +22087242,Classification of sharks in the Egyptian Mediterranean waters using morphological and DNA barcoding approaches.,"The identification of species constitutes the first basic step in phylogenetic studies, biodiversity monitoring and conservation. DNA barcoding, i.e. the sequencing of a short standardized region of DNA, has been proposed as a new tool for animal species identification. The present study provides an update on the composition of shark in the Egyptian Mediterranean waters off Alexandria, since the latest study to date was performed 30 years ago, DNA barcoding was used in addition to classical taxonomical methodologies. Thus, 51 specimen were DNA barcoded for a 667 bp region of the mitochondrial COI gene. Although DNA barcoding aims at developing species identification systems, some phylogenetic signals were apparent in the data. In the neighbor-joining tree, 8 major clusters were apparent, each of them containing individuals belonging to the same species, and most with 100% bootstrap value. This study is the first to our knowledge to use DNA barcoding of the mitochondrial COI gene in order to confirm the presence of species Squalus acanthias, Oxynotus centrina, Squatina squatina, Scyliorhinus canicula, Scyliorhinus stellaris, Mustelus mustelus, Mustelus punctulatus and Carcharhinus altimus in the Egyptian Mediterranean waters. Finally, our study is the starting point of a new barcoding database concerning shark composition in the Egyptian Mediterranean waters (Barcoding of Egyptian Mediterranean Sharks [BEMS], http://www.boldsystems.org/views/projectlist.php?&#Barcoding%20Fish%20%28FishBOL%29).",2011-11-02 +24931138,Coverage of protein domain families with structural protein-protein interactions: current progress and future trends.,"Protein interactions have evolved into highly precise and regulated networks adding an immense layer of complexity to cellular systems. The most accurate atomistic description of protein binding sites can be obtained directly from structures of protein complexes. The availability of structurally characterized protein interfaces significantly improves our understanding of interactomes, and the progress in structural characterization of protein-protein interactions (PPIs) can be measured by calculating the structural coverage of protein domain families. We analyze the coverage of protein domain families (defined according to CDD and Pfam databases) by structures, structural protein-protein complexes and unique protein binding sites. Structural PPI coverage of currently available protein families is about 30% without any signs of saturation in coverage growth dynamics. Given the current growth rates of domain databases and structural PPI deposition, complete domain coverage with PPIs is not expected in the near future. As a result of this study we identify families without any protein-protein interaction evidence (listed on a supporting website http://www.ncbi.nlm.nih.gov/Structure/ibis/coverage/) and propose them as potential targets for structural studies with a focus on protein interactions.",2014-06-13 +24930145,TIPdb-3D: the three-dimensional structure database of phytochemicals from Taiwan indigenous plants. ,"The rich indigenous and endemic plants in Taiwan serve as a resourceful bank for biologically active phytochemicals. Based on our TIPdb database curating bioactive phytochemicals from Taiwan indigenous plants, this study presents a three-dimensional (3D) chemical structure database named TIPdb-3D to support the discovery of novel pharmacologically active compounds. The Merck Molecular Force Field (MMFF94) was used to generate 3D structures of phytochemicals in TIPdb. The 3D structures could facilitate the analysis of 3D quantitative structure-activity relationship, the exploration of chemical space and the identification of potential pharmacologically active compounds using protein-ligand docking. Database URL: http://cwtung.kmu.edu.tw/tipdb.",2014-06-13 +24458951,The most informative spacing test effectively discovers biologically relevant outliers or multiple modes in expression.,"

Summary

Several outlier and subgroup identification statistics (OASIS) have been proposed to discover transcriptomic features with outliers or multiple modes in expression that are indicative of distinct biological processes or subgroups. Here, we borrow ideas from the OASIS methods in the bioinformatics and statistics literature to develop the 'most informative spacing test' (MIST) for unsupervised detection of such transcriptomic features. In an example application involving 14 cases of pediatric acute megakaryoblastic leukemia, MIST more robustly identified features that perfectly discriminate subjects according to gender or the presence of a prognostically relevant fusion-gene than did seven other OASIS methods in the analysis of RNA-seq exon expression, RNA-seq exon junction expression and micorarray exon expression data. MIST was also effective at identifying features related to gender or molecular subtype in an example application involving 157 adult cases of acute myeloid leukemia.

Availability

MIST will be freely available in the OASIS R package at http://www.stjuderesearch.org/site/depts/biostats

Contact

stanley.pounds@stjude.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-01-22 +24926662,lnCeDB: database of human long noncoding RNA acting as competing endogenous RNA.,"

Unlabelled

Long noncoding RNA (lncRNA) influences post-transcriptional regulation by interfering with the microRNA (miRNA) pathways, acting as competing endogenous RNA (ceRNA). These lncRNAs have miRNA responsive elements (MRE) in them, and control endogenous miRNAs available for binding with their target mRNAs, thus reducing the repression of these mRNAs. lnCeDB provides a database of human lncRNAs (from GENCODE 19 version) that can potentially act as ceRNAs. The putative mRNA targets of human miRNAs and the targets mapped to AGO clipped regions are collected from TargetScan and StarBase respectively. The lncRNA targets of human miRNAs (up to GENCODE 11) are downloaded from miRCode database. miRNA targets on the rest of the GENCODE 19 lncRNAs are predicted by our algorithm for finding seed-matched target sites. These putative miRNA-lncRNA interactions are mapped to the Ago interacting regions within lncRNAs. To find out the likelihood of an lncRNA-mRNA pair for actually being ceRNA we take recourse to two methods. First, a ceRNA score is calculated from the ratio of the number of shared MREs between the pair with the total number of MREs of the individual candidate gene. Second, the P-value for each ceRNA pair is determined by hypergeometric test using the number of shared miRNAs between the ceRNA pair against the number of miRNAs interacting with the individual RNAs. Typically, in a pair of RNAs being targeted by common miRNA(s), there should be a correlation of expression so that the increase in level of one ceRNA results in the increased level of the other ceRNA. Near-equimolar concentration of the competing RNAs is associated with more profound ceRNA effect. In lnCeDB one can not only browse for lncRNA-mRNA pairs having common targeting miRNAs, but also compare the expression of the pair in 22 human tissues to estimate the chances of the pair for actually being ceRNAs.

Availability

Downloadable freely from http://gyanxet-beta.com/lncedb/.",2014-06-13 +23295473,International Spinal Cord Injury Data Sets for non-traumatic spinal cord injury.,"

Study design

Multifaceted: extensive discussions at workshop and conference presentations, survey of experts and feedback.

Objectives

Present the background, purpose and development of the International Spinal Cord Injury (SCI) Data Sets for Non-Traumatic SCI (NTSCI), including a hierarchical classification of aetiology.

Setting

International.

Methods

Consultation via e-mail, presentations and discussions at ISCoS conferences (2006-2009), and workshop (1 September 2008). The consultation processes aimed to: (1) clarify aspects of the classification structure, (2) determine placement of certain aetiologies and identify important missing causes of NTSCI and (3) resolve coding issues and refine definitions. Every effort was made to consider feedback and suggestions from participants.

Results

The International Data Sets for NTSCI includes basic and an extended versions. The extended data set includes a two-axis classification system for the causes of NTSCI. Axis 1 consists of a five-level, two-tier (congenital-genetic and acquired) hierarchy that allows for increasing detail to specify the aetiology. Axis 2 uses the International Statistical Classification of Diseases (ICD) and Related Health Problems for coding the initiating diseases(s) that may have triggered the events that resulted in the axis 1 diagnosis, where appropriate. Additional items cover the timeframe of onset of NTSCI symptoms and presence of iatrogenicity. Complete instructions for data collection, data sheet and training cases are available at the websites of ISCoS (http://www.iscos.org.uk) and ASIA (http://www.asia-spinalinjury.org).

Conclusions

The data sets should facilitate comparative research involving NTSCI participants, especially epidemiological studies and prevention projects. Further work is anticipated to refine the data sets, particularly regarding iatrogenicity.",2013-01-08 +25549614,Workshop report: Crystal City V--quantitative bioanalytical method validation and implementation: the 2013 revised FDA guidance.,"In September 2013, the FDA released a draft revision of the Bioanalytical Method Validation (BMV) Guidance, which included a number of changes to the expectations for bioanalysis, most notably the inclusion of biomarker assays and data. To provide a forum for an open, inclusive discussion of the revised draft BMV Guidance, the AAPS and FDA once again collaborated to convene a two-and-a-half day workshop during early December 2013 in Baltimore, MD, USA. The resulting format embodied extensive open discussion and each thematic session included only brief, concise descriptions by Agency and industry representatives prior to opening the floor discussion. The Workshop was built around four thematic sessions (Common Topics, Chromatographic, Ligand-Binding Assays, and Biomarkers) and a final session with international regulators, concluding with a review of the outcomes and recommendations from the thematic sessions. This Workshop report summarizes the outcomes and includes topics of agreement, those where the FDA will consider the Industry's perspective, and those where the workshop provided a first open dialogue. This article will be available to the bioanalytical community at http://www.aaps.org/BMV13 .",2014-12-31 +25002814,DOR - a Database of Olfactory Receptors - Integrated Repository for Sequence and Secondary Structural Information of Olfactory Receptors in Selected Eukaryotic Genomes.,"Olfaction is the response to odors and is mediated by a class of membrane-bound proteins called olfactory receptors (ORs). An understanding of these receptors serves as a good model for basic signal transduction mechanisms and also provides important clues for the strategies adopted by organisms for their ultimate survival using chemosensory perception in search of food or defense against predators. Prior research on cross-genome phylogenetic analyses from our group motivated the addressal of conserved evolutionary trends, clustering, and ortholog prediction of ORs. The database of olfactory receptors (DOR) is a repository that provides sequence and structural information on ORs of selected organisms (such as Saccharomyces cerevisiae, Drosophila melanogaster, Caenorhabditis elegans, Mus musculus, and Homo sapiens). Users can download OR sequences, study predicted membrane topology, and obtain cross-genome sequence alignments and phylogeny, including three-dimensional (3D) structural models of 100 selected ORs and their predicted dimer interfaces. The database can be accessed from http://caps.ncbs.res.in/DOR. Such a database should be helpful in designing experiments on point mutations to probe into the possible dimerization modes of ORs and to even understand the evolutionary changes between different receptors.",2014-06-12 +24925130,High-accuracy identification of incident HIV-1 infections using a sequence clustering based diversity measure.,"Accurate estimates of HIV-1 incidence are essential for monitoring epidemic trends and evaluating intervention efforts. However, the long asymptomatic stage of HIV-1 infection makes it difficult to effectively distinguish incident infections from chronic ones. Current incidence assays based on serology or viral sequence diversity are both still lacking in accuracy. In the present work, a sequence clustering based diversity (SCBD) assay was devised by utilizing the fact that viral sequences derived from each transmitted/founder (T/F) strain tend to cluster together at early stage, and that only the intra-cluster diversity is correlated with the time since HIV-1 infection. The dot-matrix pairwise alignment was used to eliminate the disproportional impact of insertion/deletions (indels) and recombination events, and so was the proportion of clusterable sequences (Pc) as an index to identify late chronic infections with declined viral genetic diversity. Tested on a dataset containing 398 incident and 163 chronic infection cases collected from the Los Alamos HIV database (last modified 2/8/2012), our SCBD method achieved 99.5% sensitivity and 98.8% specificity, with an overall accuracy of 99.3%. Further analysis and evaluation also suggested its performance was not affected by host factors such as the viral subtypes and transmission routes. The SCBD method demonstrated the potential of sequencing based techniques to become useful for identifying incident infections. Its use may be most advantageous for settings with low to moderate incidence relative to available resources. The online service is available at http://www.bioinfo.tsinghua.edu.cn:8080/SCBD/index.jsp.",2014-06-12 +24261665,Probabilistic alignment leads to improved accuracy and read coverage for bisulfite sequencing data.,"

Background

DNA methylation has been linked to many important biological phenomena. Researchers have recently begun to sequence bisulfite treated DNA to determine its pattern of methylation. However, sequencing reads from bisulfite-converted DNA can vary significantly from the reference genome because of incomplete bisulfite conversion, genome variation, sequencing errors, and poor quality bases. Therefore, it is often difficult to align reads to the correct locations in the reference genome. Furthermore, bisulfite sequencing experiments have the additional complexity of having to estimate the DNA methylation levels within the sample.

Results

Here, we present a highly accurate probabilistic algorithm, which is an extension of the Genomic Next-generation Universal MAPper to accommodate bisulfite sequencing data (GNUMAP-bs), that addresses the computational problems associated with aligning bisulfite sequencing data to a reference genome. GNUMAP-bs integrates uncertainty from read and mapping qualities to help resolve the difference between poor quality bases and the ambiguity inherent in bisulfite conversion. We tested GNUMAP-bs and other commonly-used bisulfite alignment methods using both simulated and real bisulfite reads and found that GNUMAP-bs and other dynamic programming methods were more accurate than the more heuristic methods.

Conclusions

The GNUMAP-bs aligner is a highly accurate alignment approach for processing the data from bisulfite sequencing experiments. The GNUMAP-bs algorithm is freely available for download at: http://dna.cs.byu.edu/gnumap. The software runs on multiple threads and multiple processors to increase the alignment speed.",2013-11-21 +22258275,Prediction of hot spots in protein interfaces using a random forest model with hybrid features.,"Prediction of hot spots in protein interfaces provides crucial information for the research on protein-protein interaction and drug design. Existing machine learning methods generally judge whether a given residue is likely to be a hot spot by extracting features only from the target residue. However, hot spots usually form a small cluster of residues which are tightly packed together at the center of protein interface. With this in mind, we present a novel method to extract hybrid features which incorporate a wide range of information of the target residue and its spatially neighboring residues, i.e. the nearest contact residue in the other face (mirror-contact residue) and the nearest contact residue in the same face (intra-contact residue). We provide a novel random forest (RF) model to effectively integrate these hybrid features for predicting hot spots in protein interfaces. Our method can achieve accuracy (ACC) of 82.4% and Matthew's correlation coefficient (MCC) of 0.482 in Alanine Scanning Energetics Database, and ACC of 77.6% and MCC of 0.429 in Binding Interface Database. In a comparison study, performance of our RF model exceeds other existing methods, such as Robetta, FOLDEF, KFC, KFC2, MINERVA and HotPoint. Of our hybrid features, three physicochemical features of target residues (mass, polarizability and isoelectric point), the relative side-chain accessible surface area and the average depth index of mirror-contact residues are found to be the main discriminative features in hot spots prediction. We also confirm that hot spots tend to form large contact surface areas between two interacting proteins. Source data and code are available at: http://www.aporc.org/doc/wiki/HotSpot.",2012-01-18 +24319001,ChromoHub V2: cancer genomics.,"

Summary

Cancer genomics data produced by next-generation sequencing support the notion that epigenetic mechanisms play a central role in cancer. We have previously developed Chromohub, an open access online interface where users can map chemical, structural and biological data from public repositories on phylogenetic trees of protein families involved in chromatin mediated-signaling. Here, we describe a cancer genomics interface that was recently added to Chromohub; the frequency of mutation, amplification and change in expression of chromatin factors across large cohorts of cancer patients is regularly extracted from The Cancer Genome Atlas and the International Cancer Genome Consortium and can now be mapped on phylogenetic trees of epigenetic protein families. Explorators of chromatin signaling can now easily navigate the cancer genomics landscape of writers, readers and erasers of histone marks, chromatin remodeling complexes, histones and their chaperones.

Availability and implementation

http://www.thesgc.org/chromohub/.",2013-12-06 +24025589,MLSTest: novel software for multi-locus sequence data analysis in eukaryotic organisms.,"Multi-locus sequence typing (MLST) is a frequently used genotyping method whose goal is the unambiguous assignment of microorganisms to genetic clusters. MLST typically involves analysis of DNA sequence results generated from several house-keeping gene loci. MLST remains the gold standard for molecular typing of many bacterial pathogens. Eukaryotic pathogens have also been the subject of MLST, however, few tools are available to deal with diploid sequence data. Here we present novel software for MLST data analysis tailored towards diploid Eukaryotes: MLSTest. This software meets various methods used in MLST and introduces some novel methodologies for the evaluation of the data set. In addition to construction of allelic profiles and basic clustering analysis, the MLSTest looks for network structures that suggest genetic exchange in BURST graphs. Additionally, it uses several simple methods for tree construction with the advantage of managing heterozygous or three-state sites. Additionally, the software analyses whether concatenation of fragments from different genes is suitable for the data set using different tests (bionj-incongruence length difference test, Templeton test). It evaluates how the incongruence is distributed across the tree using a variation of the localized incongruence length difference test based on a modified neighbour joining algorithm. We tested the last method in simulated datasets. We showed that is conservative (adequate type I error rate) and moderately to highly powerful as well as useful to localize incongruences in two bacterial and two eukaryotic MLST datasets. MLSTest was also designed for developing MLST schemes. It thus has tools to optimize locus combinations and to reduce the number of targets required for typing. MLSTest also analyses whether the discriminatory power of the typing scheme is increased by including more loci. We evaluated the software over simulated and real datasets from bacterial and eukaryotic microorganisms. The software is freely available at http://www.ipe.unsa.edu.ar/software.",2013-09-08 +26668003,Fuse: multiple network alignment via data fusion.,"

Motivation

Discovering patterns in networks of protein-protein interactions (PPIs) is a central problem in systems biology. Alignments between these networks aid functional understanding as they uncover important information, such as evolutionary conserved pathways, protein complexes and functional orthologs. However, the complexity of the multiple network alignment problem grows exponentially with the number of networks being aligned and designing a multiple network aligner that is both scalable and that produces biologically relevant alignments is a challenging task that has not been fully addressed. The objective of multiple network alignment is to create clusters of nodes that are evolutionarily and functionally conserved across all networks. Unfortunately, the alignment methods proposed thus far do not meet this objective as they are guided by pairwise scores that do not utilize the entire functional and evolutionary information across all networks.

Results

To overcome this weakness, we propose Fuse, a new multiple network alignment algorithm that works in two steps. First, it computes our novel protein functional similarity scores by fusing information from wiring patterns of all aligned PPI networks and sequence similarities between their proteins. This is in contrast with the previous tools that are all based on protein similarities in pairs of networks being aligned. Our comprehensive new protein similarity scores are computed by Non-negative Matrix Tri-Factorization (NMTF) method that predicts associations between proteins whose homology (from sequences) and functioning similarity (from wiring patterns) are supported by all networks. Using the five largest and most complete PPI networks from BioGRID, we show that NMTF predicts a large number protein pairs that are biologically consistent. Second, to identify clusters of aligned proteins over all networks, Fuse uses our novel maximum weight k-partite matching approximation algorithm. We compare Fuse with the state of the art multiple network aligners and show that (i) by using only sequence alignment scores, Fuse already outperforms other aligners and produces a larger number of biologically consistent clusters that cover all aligned PPI networks and (ii) using both sequence alignments and topological NMTF-predicted scores leads to the best multiple network alignments thus far.

Availability and implementation

Our dataset and software are freely available from the web site: http://bio-nets.doc.ic.ac.uk/Fuse/

Contact

natasha@imperial.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-12-14 +24966856,Comparative genomics and evolution of regulons of the LacI-family transcription factors.,"DNA-binding transcription factors (TFs) are essential components of transcriptional regulatory networks in bacteria. LacI-family TFs (LacI-TFs) are broadly distributed among certain lineages of bacteria. The majority of characterized LacI-TFs sense sugar effectors and regulate carbohydrate utilization genes. The comparative genomics approaches enable in silico identification of TF-binding sites and regulon reconstruction. To study the function and evolution of LacI-TFs, we performed genomics-based reconstruction and comparative analysis of their regulons. For over 1300 LacI-TFs from over 270 bacterial genomes, we predicted their cognate DNA-binding motifs and identified target genes. Using the genome context and metabolic subsystem analyses of reconstructed regulons, we tentatively assigned functional roles and predicted candidate effectors for 78 and 67% of the analyzed LacI-TFs, respectively. Nearly 90% of the studied LacI-TFs are local regulators of sugar utilization pathways, whereas the remaining 125 global regulators control large and diverse sets of metabolic genes. The global LacI-TFs include the previously known regulators CcpA in Firmicutes, FruR in Enterobacteria, and PurR in Gammaproteobacteria, as well as the three novel regulators-GluR, GapR, and PckR-that are predicted to control the central carbohydrate metabolism in three lineages of Alphaproteobacteria. Phylogenetic analysis of regulators combined with the reconstructed regulons provides a model of evolutionary diversification of the LacI protein family. The obtained genomic collection of in silico reconstructed LacI-TF regulons in bacteria is available in the RegPrecise database (http://regprecise.lbl.gov). It provides a framework for future structural and functional classification of the LacI protein family and identification of molecular determinants of the DNA and ligand specificity. The inferred regulons can be also used for functional gene annotation and reconstruction of sugar catabolic networks in diverse bacterial lineages.",2014-06-11 +24923820,Egas: a collaborative and interactive document curation platform. ,"With the overwhelming amount of biomedical textual information being produced, several manual curation efforts have been set up to extract and store concepts and their relationships into structured resources. As manual annotation is a demanding and expensive task, computerized solutions were developed to perform such tasks automatically. However, high-end information extraction techniques are still not widely used by biomedical research communities, mainly because of the lack of standards and limitations in usability. Interactive annotation tools intend to fill this gap, taking advantage of automatic techniques and existing knowledge bases to assist expert curators in their daily tasks. This article presents Egas, a web-based platform for biomedical text mining and assisted curation with highly usable interfaces for manual and automatic in-line annotation of concepts and relations. A comprehensive set of de facto standard knowledge bases are integrated and indexed to provide straightforward concept normalization features. Real-time collaboration and conversation functionalities allow discussing details of the annotation task as well as providing instant feedback of curator's interactions. Egas also provides interfaces for on-demand management of the annotation task settings and guidelines, and supports standard formats and literature services to import and export documents. By taking advantage of Egas, we participated in the BioCreative IV interactive annotation task, targeting the assisted identification of protein-protein interactions described in PubMed abstracts related to neuropathological disorders. When evaluated by expert curators, it obtained positive scores in terms of usability, reliability and performance. These results, together with the provided innovative features, place Egas as a state-of-the-art solution for fast and accurate curation of information, facilitating the task of creating and updating knowledge bases and annotated resources. Database URL: http://bioinformatics.ua.pt/egas.",2014-06-11 +25152231,Inter-species inference of gene set enrichment in lung epithelial cells from proteomic and large transcriptomic datasets.,"

Motivation

Translating findings in rodent models to human models has been a cornerstone of modern biology and drug development. However, in many cases, a naive 'extrapolation' between the two species has not succeeded. As a result, clinical trials of new drugs sometimes fail even after considerable success in the mouse or rat stage of development. In addition to in vitro studies, inter-species translation requires analytical tools that can predict the enriched gene sets in human cells under various stimuli from corresponding measurements in animals. Such tools can improve our understanding of the underlying biology and optimize the allocation of resources for drug development.

Results

We developed an algorithm to predict differential gene set enrichment as part of the sbv IMPROVER (systems biology verification in Industrial Methodology for Process Verification in Research) Species Translation Challenge, which focused on phosphoproteomic and transcriptomic measurements of normal human bronchial epithelial (NHBE) primary cells under various stimuli and corresponding measurements in rat (NRBE) primary cells. We find that gene sets exhibit a higher inter-species correlation compared with individual genes, and are potentially more suited for direct prediction. Furthermore, in contrast to a similar cross-species response in protein phosphorylation states 5 and 25 min after exposure to stimuli, gene set enrichment 6 h after exposure is significantly different in NHBE cells compared with NRBE cells. In spite of this difference, we were able to develop a robust algorithm to predict gene set activation in NHBE with high accuracy using simple analytical methods.

Availability and implementation

Implementation of all algorithms is available as source code (in Matlab) at http://bhanot.biomaps.rutgers.edu/wiki/codes_SC3_Predicting_GeneSets.zip, along with the relevant data used in the analysis. Gene sets, gene expression and protein phosphorylation data are available on request.

Contact

hormoz@kitp.ucsb.edu.",2014-08-24 +25686637,TarPred: a web application for predicting therapeutic and side effect targets of chemical compounds.,"

Motivation

Discovering the relevant therapeutic targets for drug-like molecules, or their unintended 'off-targets' that predict adverse drug reactions, is a daunting task by experimental approaches alone. There is thus a high demand to develop computational methods capable of detecting these potential interacting targets efficiently.

Results

As biologically annotated chemical data are becoming increasingly available, it becomes feasible to explore such existing knowledge to identify potential ligand-target interactions. Here, we introduce an online implementation of a recently published computational model for target prediction, TarPred, based on a reference library containing 533 individual targets with 179 807 active ligands. TarPred accepts interactive graphical input or input in the chemical file format of SMILES. Given a query compound structure, it provides the top ranked 30 interacting targets. For each of them, TarPred not only shows the structures of three most similar ligands that are known to interact with the target but also highlights the disease indications associated with the target. This information is useful for understanding the mechanisms of action and toxicities of active compounds and can provide drug repositioning opportunities.

Availability and implementation

TarPred is available at: http://www.dddc.ac.cn/tarpred.",2015-02-16 +30727496,First Report of Powdery Mildew Caused by Erysiphe sedi on Kalanchoe blossfeldiana in Korea.,"Kalanchoe blossfeldiana Poelln., belonging to the Crassulaceae, is a common ornamental houseplant with many cultivars. In May 2010, powdery mildew was observed on about 50% of 3,000 potted kalanchoe 'Rose Queen' plants in plastic greenhouses located in Yongin city of central Korea. Farmers producing potted kalanchoes in Yongin region stated that powdery mildew on kalanchoes was mild without causing problems for the last several years. The disease became severe from April 2010 and caused economic losses. The economic and esthetic value was reduced by the unsightly appearance of infected plants with most being unmarketable. Damage due to powdery mildew infections on kalanchoes appeared every year. A representative specimen was deposited in the Korea University herbarium (Accession No. KUS-F24911). Mycelial colonies were white, conspicuous and epiphytic on leaves and stems. Hyphae were septate, branched, and 3 to 6 μm wide. Appressoria on the hyphae were well developed, lobed, and mostly positioned in pairs. Conidiophores were cylindrical, 70 to 145 × 7 to 11.5 μm, and composed of three to four cells. Foot-cells of conidiophores were straight, cylindrical, and 28 to 48 μm long. Conidia produced singly were variable in shape, oval to cylindrical, oval or oblong-elliptical, 30 to 55 × 14 to 24 μm, lacked distinct fibrosin bodies, and showed angular/rectangular wrinkling of outer walls. Germ tubes were produced on the perihilar position of conidia. No chasmothecia were found. The morphological characteristics were consistent with descriptions of Erysiphe sedi U. Braun (1). To confirm the identity of the causal fungus, the complete ITS region of rDNA from KUS-F24911 was amplified with primers ITS5 and P3 as described by Takamatsu et al. (4) and directly sequenced. The resulting sequence was deposited in GenBank (Accession No. JX173288). A GenBank BLAST search using the present data revealed that the ITS sequence shares 100% (552/552 bp) similarity with those of E. sedi on Sedum spp. (Accession Nos. JX173289, JX173290). Pathogenicity was confirmed through inoculation by gently pressing diseased leaves onto leaves of five healthy potted kalanchoe plants. Five non-inoculated plants served as controls. Plants were maintained in a greenhouse at 22 ± 2°C. Inoculated plants developed signs and symptoms after 7 days, whereas the control plants remained symptomless. The fungus present on the inoculated plants was morphologically identical to that originally observed on diseased plants, fulfilling Koch's postulates. E. sedi is also known to infect Kalanchoe pinnata (Lam.) Pers. (= Bryophyllum calycinum Salisb.) in Romania (1,2) and other crassulaceous plants including Sedum spectabile in North America (3). To our knowledge, this is the first report of E. sedi infections of K. blossfeldiana in Korea. This disease seems to be a serious threat to the commercial production of kalanchoe plants which are cultivated under plastic greenhouses of poor ventilation and low light levels in Korea. References: (1) U. Braun and R. T. A. Cook. Taxonomic Manual of the Erysiphales (Powdery Mildews), CBS Biodiversity Series No. 11. CBS, Utrecht, 2012. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology & Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , June 13, 2012. (3) L. Kiss and M. L. Daughtrey. Plant Dis. 85:1207, 2001. (4) S. Takamatsu et al. Mycol. Res. 113:117, 2009.",2012-11-01 +25690659,DOTS-Finder: a comprehensive tool for assessing driver genes in cancer genomes.,"A key challenge in the analysis of cancer genomes is the identification of driver genes from the vast number of mutations present in a cohort of patients. DOTS-Finder is a new tool that allows the detection of driver genes through the sequential application of functional and frequentist approaches, and is specifically tailored to the analysis of few tumor samples. We have identified driver genes in the genomic data of 34 tumor types derived from existing exploratory projects such as The Cancer Genome Atlas and from studies investigating the usefulness of genomic information in the clinical settings. DOTS-Finder is available at https://cgsb.genomics.iit.it/wiki/projects/DOTS-Finder/.",2014-06-10 +23620278,User-friendly solutions for microarray quality control and pre-processing on ArrayAnalysis.org.,"Quality control (QC) is crucial for any scientific method producing data. Applying adequate QC introduces new challenges in the genomics field where large amounts of data are produced with complex technologies. For DNA microarrays, specific algorithms for QC and pre-processing including normalization have been developed by the scientific community, especially for expression chips of the Affymetrix platform. Many of these have been implemented in the statistical scripting language R and are available from the Bioconductor repository. However, application is hampered by lack of integrative tools that can be used by users of any experience level. To fill this gap, we developed a freely available tool for QC and pre-processing of Affymetrix gene expression results, extending, integrating and harmonizing functionality of Bioconductor packages. The tool can be easily accessed through a wizard-like web portal at http://www.arrayanalysis.org or downloaded for local use in R. The portal provides extensive documentation, including user guides, interpretation help with real output illustrations and detailed technical documentation. It assists newcomers to the field in performing state-of-the-art QC and pre-processing while offering data analysts an integral open-source package. Providing the scientific community with this easily accessible tool will allow improving data quality and reuse and adoption of standards.",2013-04-24 +23547033,PathVisio-Faceted Search: an exploration tool for multi-dimensional navigation of large pathways.,"

Purpose

The PathVisio-Faceted Search plugin helps users explore and understand complex pathways by overlaying experimental data and data from webservices, such as Ensembl BioMart, onto diagrams drawn using formalized notations in PathVisio. The plugin then provides a filtering mechanism, known as a faceted search, to find and highlight diagram nodes (e.g. genes and proteins) of interest based on imported data. The tool additionally provides a flexible scripting mechanism to handle complex queries.

Availability

The PathVisio-Faceted Search plugin is compatible with PathVisio 3.0 and above. PathVisio is compatible with Windows, Mac OS X and Linux. The plugin, documentation, example diagrams and Groovy scripts are available at http://PathVisio.org/wiki/PathVisioFacetedSearchHelp. The plugin is free, open-source and licensed by the Apache 2.0 License.",2013-04-01 +24913727,Association of CYP1B1 L432V polymorphism with urinary cancer susceptibility: a meta-analysis.,"

Background

The Cytochrome P450 1B1 (CYP1B1) is a key P450 enzyme involved in the metabolism of exogenous and endogenous substrates. Previous studies have reported the existence of CYP1B1 L432V missense polymorphism in prostate, bladder and renal cancers. However, the effects of this polymorphism on the risk of these cancers remain conflicting. Therefore, we performed a meta-analysis to assess the association between L432V polymorphism and the susceptibility of urinary cancers.

Methods

We searched the PubMed database without limits on language for studies exploring the relationship of CYP1B1 L432V polymorphism and urinary cancers. Article search was supplemented by screening the references of retrieved studies manually. Odds ratios (OR) and 95% confidence intervals (95% CI) were calculated to evaluate the strength of these associations. Simultaneously, publication bias was estimated by funnel plot and Begg's test with Stata 11 software.

Results

We observed a significant association between CYP1B1 L432V polymorphism and urinary cancers. The overall OR (95% CI) of CC versus CG was 0.937 (0.881-0.996), the overall OR (95% CI) of CC versus CG+GG was 0.942 (0.890-0.997). Furthermore, we identified reduced risk for CC versus other phenotypes in both prostate and overall urinary cancers, when studies were limited to Caucasian or Asian patients.

Conclusions

This meta-analysis suggests that the CYP1B1 L432V polymorphism is associated with urinary cancer risk.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/3108829721231527.",2014-06-09 +24911789,CELLO2GO: a web server for protein subCELlular LOcalization prediction with functional gene ontology annotation.,"CELLO2GO (http://cello.life.nctu.edu.tw/cello2go/) is a publicly available, web-based system for screening various properties of a targeted protein and its subcellular localization. Herein, we describe how this platform is used to obtain a brief or detailed gene ontology (GO)-type categories, including subcellular localization(s), for the queried proteins by combining the CELLO localization-predicting and BLAST homology-searching approaches. Given a query protein sequence, CELLO2GO uses BLAST to search for homologous sequences that are GO annotated in an in-house database derived from the UniProt KnowledgeBase database. At the same time, CELLO attempts predict at least one subcellular localization on the basis of the species in which the protein is found. When homologs for the query sequence have been identified, the number of terms found for each of their GO categories, i.e., cellular compartment, molecular function, and biological process, are summed and presented as pie charts representing possible functional annotations for the queried protein. Although the experimental subcellular localization of a protein may not be known, and thus not annotated, CELLO can confidentially suggest a subcellular localization. CELLO2GO should be a useful tool for research involving complex subcellular systems because it combines CELLO and BLAST into one platform and its output is easily manipulated such that the user-specific questions may be readily addressed.",2014-06-09 +24911103,GASOLINE: a Greedy And Stochastic algorithm for optimal Local multiple alignment of Interaction NEtworks.,"The analysis of structure and dynamics of biological networks plays a central role in understanding the intrinsic complexity of biological systems. Biological networks have been considered a suitable formalism to extend evolutionary and comparative biology. In this paper we present GASOLINE, an algorithm for multiple local network alignment based on statistical iterative sampling in connection to a greedy strategy. GASOLINE overcomes the limits of current approaches by producing biologically significant alignments within a feasible running time, even for very large input instances. The method has been extensively tested on a database of real and synthetic biological networks. A comprehensive comparison with state-of-the art algorithms clearly shows that GASOLINE yields the best results in terms of both reliability of alignments and running time on real biological networks and results comparable in terms of quality of alignments on synthetic networks. GASOLINE has been developed in Java, and is available, along with all the computed alignments, at the following URL: http://ferrolab.dmi.unict.it/gasoline/gasoline.html.",2014-06-09 +22827839,Seqcrawler: biological data indexing and browsing platform.,"

Background

Seqcrawler takes its roots in software like SRS or Lucegene. It provides an indexing platform to ease the search of data and meta-data in biological banks and it can scale to face the current flow of data. While many biological bank search tools are available on the Internet, mainly provided by large organizations to search their data, there is a lack of free and open source solutions to browse one's own set of data with a flexible query system and able to scale from a single computer to a cloud system. A personal index platform will help labs and bioinformaticians to search their meta-data but also to build a larger information system with custom subsets of data.

Results

The software is scalable from a single computer to a cloud-based infrastructure. It has been successfully tested in a private cloud with 3 index shards (pieces of index) hosting ~400 millions of sequence information (whole GenBank, UniProt, PDB and others) for a total size of 600 GB in a fault tolerant architecture (high-availability). It has also been successfully integrated with software to add extra meta-data from blast results to enhance users' result analysis.

Conclusions

Seqcrawler provides a complete open source search and store solution for labs or platforms needing to manage large amount of data/meta-data with a flexible and customizable web interface. All components (search engine, visualization and data storage), though independent, share a common and coherent data system that can be queried with a simple HTTP interface. The solution scales easily and can also provide a high availability infrastructure.",2012-07-24 +26404905,Serum Glycoprotein Biomarker Discovery and Qualification Pipeline Reveals Novel Diagnostic Biomarker Candidates for Esophageal Adenocarcinoma.,"We report an integrated pipeline for efficient serum glycoprotein biomarker candidate discovery and qualification that may be used to facilitate cancer diagnosis and management. The discovery phase used semi-automated lectin magnetic bead array (LeMBA)-coupled tandem mass spectrometry with a dedicated data-housing and analysis pipeline; GlycoSelector (http://glycoselector.di.uq.edu.au). The qualification phase used lectin magnetic bead array-multiple reaction monitoring-mass spectrometry incorporating an interactive web-interface, Shiny mixOmics (http://mixomics-projects.di.uq.edu.au/Shiny), for univariate and multivariate statistical analysis. Relative quantitation was performed by referencing to a spiked-in glycoprotein, chicken ovalbumin. We applied this workflow to identify diagnostic biomarkers for esophageal adenocarcinoma (EAC), a life threatening malignancy with poor prognosis in the advanced setting. EAC develops from metaplastic condition Barrett's esophagus (BE). Currently diagnosis and monitoring of at-risk patients is through endoscopy and biopsy, which is expensive and requires hospital admission. Hence there is a clinical need for a noninvasive diagnostic biomarker of EAC. In total 89 patient samples from healthy controls, and patients with BE or EAC were screened in discovery and qualification stages. Of the 246 glycoforms measured in the qualification stage, 40 glycoforms (as measured by lectin affinity) qualified as candidate serum markers. The top candidate for distinguishing healthy from BE patients' group was Narcissus pseudonarcissus lectin (NPL)-reactive Apolipoprotein B-100 (p value = 0.0231; AUROC = 0.71); BE versus EAC, Aleuria aurantia lectin (AAL)-reactive complement component C9 (p value = 0.0001; AUROC = 0.85); healthy versus EAC, Erythroagglutinin Phaseolus vulgaris (EPHA)-reactive gelsolin (p value = 0.0014; AUROC = 0.80). A panel of 8 glycoforms showed an improved AUROC of 0.94 to discriminate EAC from BE. Two biomarker candidates were independently verified by lectin magnetic bead array-immunoblotting, confirming the validity of the relative quantitation approach. Thus, we have identified candidate biomarkers, which, following large-scale clinical evaluation, can be developed into diagnostic blood tests. A key feature of the pipeline is the potential for rapid translation of the candidate biomarkers to lectin-immunoassays.",2015-09-24 +22591066,MetaMapp: mapping and visualizing metabolomic data by integrating information from biochemical pathways and chemical and mass spectral similarity.,"

Background

Exposure to environmental tobacco smoke (ETS) leads to higher rates of pulmonary diseases and infections in children. To study the biochemical changes that may precede lung diseases, metabolomic effects on fetal and maternal lungs and plasma from rats exposed to ETS were compared to filtered air control animals. Genome- reconstructed metabolic pathways may be used to map and interpret dysregulation in metabolic networks. However, mass spectrometry-based non-targeted metabolomics datasets often comprise many metabolites for which links to enzymatic reactions have not yet been reported. Hence, network visualizations that rely on current biochemical databases are incomplete and also fail to visualize novel, structurally unidentified metabolites.

Results

We present a novel approach to integrate biochemical pathway and chemical relationships to map all detected metabolites in network graphs (MetaMapp) using KEGG reactant pair database, Tanimoto chemical and NIST mass spectral similarity scores. In fetal and maternal lungs, and in maternal blood plasma from pregnant rats exposed to environmental tobacco smoke (ETS), 459 unique metabolites comprising 179 structurally identified compounds were detected by gas chromatography time of flight mass spectrometry (GC-TOF MS) and BinBase data processing. MetaMapp graphs in Cytoscape showed much clearer metabolic modularity and complete content visualization compared to conventional biochemical mapping approaches. Cytoscape visualization of differential statistics results using these graphs showed that overall, fetal lung metabolism was more impaired than lungs and blood metabolism in dams. Fetuses from ETS-exposed dams expressed lower lipid and nucleotide levels and higher amounts of energy metabolism intermediates than control animals, indicating lower biosynthetic rates of metabolites for cell division, structural proteins and lipids that are critical for in lung development.

Conclusions

MetaMapp graphs efficiently visualizes mass spectrometry based metabolomics datasets as network graphs in Cytoscape, and highlights metabolic alterations that can be associated with higher rate of pulmonary diseases and infections in children prenatally exposed to ETS. The MetaMapp scripts can be accessed at http://metamapp.fiehnlab.ucdavis.edu.",2012-05-16 +22962479,REVEAL--visual eQTL analytics.,"

Motivation

The analysis of expression quantitative trait locus (eQTL) data is a challenging scientific endeavor, involving the processing of very large, heterogeneous and complex data. Typical eQTL analyses involve three types of data: sequence-based data reflecting the genotypic variations, gene expression data and meta-data describing the phenotype. Based on these, certain genotypes can be connected with specific phenotypic outcomes to infer causal associations of genetic variation, expression and disease. To this end, statistical methods are used to find significant associations between single nucleotide polymorphisms (SNPs) or pairs of SNPs and gene expression. A major challenge lies in summarizing the large amount of data as well as statistical results and to generate informative, interactive visualizations.

Results

We present Reveal, our visual analytics approach to this challenge. We introduce a graph-based visualization of associations between SNPs and gene expression and a detailed genotype view relating summarized patient cohort genotypes with data from individual patients and statistical analyses.

Availability

Reveal is included in Mayday, our framework for visual exploration and analysis. It is available at http://it.inf.uni-tuebingen.de/software/reveal/.

Contact

guenter.jaeger@uni-tuebingen.de.",2012-09-01 +26656688,High Degree of HIV-1 Group M (HIV-1M) Genetic Diversity within Circulating Recombinant Forms: Insight into the Early Events of HIV-1M Evolution.,"The existence of various highly divergent HIV-1 lineages and of recombination-derived sequence tracts of indeterminate origin within established circulating recombinant forms (CRFs) strongly suggests that HIV-1 group M (HIV-1M) diversity is not fully represented under the current classification system. Here we used a fully exploratory screen for recombination on a set of 480 near-full-length genomes representing the full known diversity of HIV-1M. We decomposed recombinant sequences into their constituent parts and then used maximum-likelihood phylogenetic analyses of this mostly recombination-free data set to identify rare divergent sequence lineages that fall outside the major named HIV-1M taxonomic groupings. We found that many of the sequence fragments occurring within CRFs (including CRF04_cpx, CRF06_cpx, CRF11_cpx, CRF18_cpx, CRF25_cpx, CRF27_cpx, and CRF49_cpx) are in fact likely derived from divergent unclassified parental lineages that may predate the current subtypes, even though they are presently identified as derived from currently defined HIV-1M subtypes. Our evidence suggests that some of these CRFs are descended predominantly from what were or are major previously unidentified HIV-1M lineages that were likely epidemiologically relevant during the early stages of the HIV-1M epidemic. The restriction of these divergent lineages to the Congo basin suggests that they were less infectious and/or simply not present at the time and place of the initial migratory wave that triggered the global epidemic.IMPORTANCE HIV-1 group M (HIV-1M) likely spread to the rest of the world from the Congo basin in the mid-1900s (N. R. Faria et al., Science 346:56-61, 2014, http://dx.doi.org/10.1126/science.1256739) and is today the principal cause of the AIDS pandemic. Here, we show that large sequence fragments from several HIV-1M circulating recombinant forms (CRFs) are derived from divergent parental lineages that cannot reasonably be classified within the nine established HIV-1M subtypes. These lineages are likely to have been epidemiologically relevant in the Congo basin at the onset of the epidemic. Nonetheless, they appear not to have undergone the same explosive global spread as other HIV-1M subtypes, perhaps because they were less transmissible. Concerted efforts to characterize more of these divergent lineages could allow the accurate inference and chemical synthesis of epidemiologically key ancestral HIV-1M variants so as to directly test competing hypotheses relating to the viral genetic factors that enabled the present pandemic.",2015-12-09 +26649893,A Birth Cohort Study of Maternal and Infant Serum PCB-153 and DDE Concentrations and Responses to Infant Tuberculosis Vaccination.,"

Background

Reasons for the highly variable and often poor protection conferred by the Mycobacterium bovis bacille Calmette-Guérin (BCG) vaccine are multifaceted and poorly understood.

Objectives

We aimed to determine whether early-life exposure to PCBs (polychlorinated biphenyls) and DDE [1,1-dichloro-2,2-bis(p-chlorophenyl)ethylene] reduces 6-month infant BCG vaccine response.

Methods

Data came from families participating in a prospective birth cohort in eastern Slovakia. At birth, maternal and cord blood were collected for chemical analyses, and infants were immunized with BCG. Blood was collected from infants for chemical analyses and to determine 6-month BCG-specific immunoglobulin (Ig) G and IgA levels. Multivariable linear regression models were fit to examine chemical-BCG associations among approximately 500 mother-infant pairs, with adjustment for confounders.

Results

The median 6-month infant concentration of the prevalent congener PCB-153 was 113 ng/g lipid [interquartile range (IQR): 37-248], and 388 ng/g lipid (IQR: 115-847) for DDE. Higher 6-month infant concentrations of PCB-153 and DDE were strongly associated with lower 6-month BCG-specific antibody levels. For instance, BCG-specific IgG levels were 37% lower for infants with PCB-153 concentrations at the 75th percentile compared to the 25th percentile (95% CI: -42, -32; p < 0.001). Results were similar in magnitude and precision for DDE. There was also evidence of PCB-DDE additivity, where exposure to both compounds reduced anti-BCG levels more than exposure to either compound alone.

Conclusions

The associations observed in this study indicate that environmental exposures may be overlooked contributors to poorer responses to BCG vaccine. The overall association between these exposures and tuberculosis incidence is unknown.

Citation

Jusko TA, De Roos AJ, Lee SY, Thevenet-Morrison K, Schwartz SM, Verner MA, Palkovicova Murinova L, Drobná B, Kočan A, Fabišiková A, Čonka K, Trnovec T, Hertz-Picciotto I, Lawrence BP. 2016. A birth cohort study of maternal and infant serum PCB-153 and DDE concentrations and responses to infant tuberculosis vaccination. Environ Health Perspect 124:813-821; http://dx.doi.org/10.1289/ehp.1510101.",2015-12-09 +22335941,BLANNOTATOR: enhanced homology-based function prediction of bacterial proteins.,"

Background

Automated function prediction has played a central role in determining the biological functions of bacterial proteins. Typically, protein function annotation relies on homology, and function is inferred from other proteins with similar sequences. This approach has become popular in bacterial genomics because it is one of the few methods that is practical for large datasets and because it does not require additional functional genomics experiments. However, the existing solutions produce erroneous predictions in many cases, especially when query sequences have low levels of identity with the annotated source protein. This problem has created a pressing need for improvements in homology-based annotation.

Results

We present an automated method for the functional annotation of bacterial protein sequences. Based on sequence similarity searches, BLANNOTATOR accurately annotates query sequences with one-line summary descriptions of protein function. It groups sequences identified by BLAST into subsets according to their annotation and bases its prediction on a set of sequences with consistent functional information. We show the results of BLANNOTATOR's performance in sets of bacterial proteins with known functions. We simulated the annotation process for 3090 SWISS-PROT proteins using a database in its state preceding the functional characterisation of the query protein. For this dataset, our method outperformed the five others that we tested, and the improved performance was maintained even in the absence of highly related sequence hits. We further demonstrate the value of our tool by analysing the putative proteome of Lactobacillus crispatus strain ST1.

Conclusions

BLANNOTATOR is an accurate method for bacterial protein function prediction. It is practical for genome-scale data and does not require pre-existing sequence clustering; thus, this method suits the needs of bacterial genome and metagenome researchers. The method and a web-server are available at http://ekhidna.biocenter.helsinki.fi/poxo/blannotator/.",2012-02-15 +24775806,CHEM-PATH-TRACKER: An automated tool to analyze chemical motifs in molecular structures.,"In this article, we propose a method for locating functionally relevant chemical motifs in protein structures. The chemical motifs can be a small group of residues or structure protein fragments with highly conserved properties that have important biological functions. However, the detection of chemical motifs is rather difficult because they often consist of a set of amino acid residues separated by long, variable regions, and they only come together to form a functional group when the protein is folded into its three-dimensional structure. Furthermore, the assemblage of these residues is often dependent on non-covalent interactions among the constituent amino acids that are difficult to detect or visualize. To simplify the analysis of these chemical motifs and give access to a generalized use for all users, we developed chem-path-tracker. This software is a VMD plug-in that allows the user to highlight and reveal potential chemical motifs requiring only a few selections. The analysis is based on atoms/residues pair distances applying a modified version of Dijkstra's algorithm, and it makes possible to monitor the distances of a large pathway, even during a molecular dynamics simulation. This tool turned out to be very useful, fast, and user-friendly in the performed tests. The chem-path-tracker package is distributed as an independent platform and can be found at http://www.fc.up.pt/PortoBioComp/database/doku.php?id=chem-path-tracker.",2014-06-07 +21618345,miRvar: A comprehensive database for genomic variations in microRNAs.,microRNAs are a recently discovered and well studied class of small noncoding functional RNAs. The regulatory role of microRNAs (miRNAs) has been well studied in a wide variety of biological processes but there have been no systematic effort to understand and analyze the genetic variations in miRNA loci and study its functional consequences. We have comprehensively curated genetic variations in miRNA loci in the human genome and established a computational pipeline to assess potential functional consequences of these variants along with methods for systematic curation and reporting of variations in these loci. The data is made available on the Leiden Open (source) Variation Database (LOVD) platform at http://genome.igib.res.in/mirlovd to provide ease of aggregation and analysis and is open for community curation efforts.,2011-02-24 +21547743,Evaluation of the disease liability of CFTR variants.,"Over 1600 novel sequence variants in the CFTR gene have been reported to the CF Mutation Database (http://www.genet.sickkids.on.ca/cftr/Home.html). While about 25 mutations are well characterized by clinical studies and functional assays, the disease liability of most of the remaining mutations is either unclear or unknown. This gap in knowledge has implications for diagnosis, therapy selection, and counseling for patients and families carrying an uncharacterized CFTR mutation. This chapter will describe a critical approach to assessing the disease implications of CFTR mutations utilizing clinical data, literature review, functional testing, and bioinformatic in silico methods.",2011-01-01 +24664925,Very high-density planets: a possible remnant of gas giants.,"Data extracted from the Extrasolar Planets Encyclopaedia (see http://exoplanet.eu) show the existence of planets that are more massive than iron cores that would have the same size. After meticulous verification of the data, we conclude that the mass of the smallest of these planets is actually not known. However, the three largest planets, Kepler-52b, Kepler-52c and Kepler-57b, which are between 30 and 100 times the mass of the Earth, have indeed density larger than an iron planet of the same size. This observation triggers this study that investigates under which conditions these planets could represent the naked cores of gas giants that would have lost their atmospheres during their migration towards the star. This study shows that for moderate viscosity values (10(25) Pa s or lower), large values of escape rate and associated unloading stress rate during the atmospheric loss process lead to the explosion of extremely massive planets. However, for moderate escape rate, the bulk viscosity and finite-strain incompressibility of the cores of giant planets can be large enough to retain a very high density during geological time scales. This would make those a new kind of planet, which would help in understanding the interior structure of the gas giants. However, this new family of exoplanets adds some degeneracy for characterizing terrestrial exoplanets.",2014-03-24 +21610748,GWAS Integrator: a bioinformatics tool to explore human genetic associations reported in published genome-wide association studies.,"Genome-wide association studies (GWAS) have successfully identified numerous genetic loci that are associated with phenotypic traits and diseases. GWAS Integrator is a bioinformatics tool that integrates information on these associations from the National Human Genome Research institute (NHGRI) Catalog, SNAP (SNP Annotation and Proxy Search), and the Human Genome Epidemiology (HuGE) Navigator literature database. This tool includes robust search and data mining functionalities that can be used to quickly identify relevant associations from GWAS, as well as proxy single-nucleotide polymorphisms (SNPs) and potential candidate genes. Query-based University of California Santa Cruz (UCSC) Genome Browser custom tracks are generated dynamically on the basis of users' selected GWAS hits or candidate genes from HuGE Navigator literature database (http://www.hugenavigator.net/HuGENavigator/gWAHitStartPage.do). The GWAS Integrator may help enhance inference on potential genetic associations identified from GWAS studies.",2011-05-25 +26565393,Soy-Based Infant Formula Feeding and Ultrasound-Detected Uterine Fibroids among Young African-American Women with No Prior Clinical Diagnosis of Fibroids.,"

Background

Early-life soy phytoestrogen exposure has been shown in Eker rats to increase uterine fibroid incidence in adulthood. Two large epidemiologic cohorts have provided some support for increased fibroid risk with infant soy formula feeding in women, but both cohorts relied on self-report of clinically diagnosed fibroids.

Objective

We evaluated the relationship between infant soy formula feeding and ultrasound-detected fibroids.

Methods

The Study of Environment, Lifestyle & Fibroids (SELF) is an ongoing cohort study of 1,696 African-American women ages 23-34 years with baseline ultrasound screening to detect and measure fibroids ≥ 0.5 cm in diameter. Questionnaire data on soy formula feeding during infancy was ascertained for 1,553 participants (89% based on mother's report), of whom 345 were found to have fibroids. We estimated the association between soy formula feeding and fibroid prevalence and tumor number using log-binomial regression. Among those with fibroids, we compared fibroid size between soy formula-exposed and unexposed women using multivariable linear regression.

Results

We did not observe an association between soy formula feeding and fibroid prevalence [adjusted prevalence ratio (aPR) 0.9, 95% CI: 0.7, 1.3]. Nor were exposed women with fibroids more likely to have ≥ 2 tumors than unexposed women with fibroids (aPR 1.0, 95% CI: 0.7, 1.6). However, exposed women with fibroids had significantly larger fibroids than unexposed women with fibroids. On average, soy formula feeding was associated with a 32% increase in the diameter of the largest fibroid (95% CI: 6%, 65%) and a 127% increase in total tumor volume (95% CI: 12%, 358%).

Conclusions

Our observation that women fed soy formula as infants have larger fibroids than unexposed women provides further support for persistent effects of early life phytoestrogen exposure on the uterus.

Citation

Upson K, Harmon QE, Baird DD. 2016. Soy-based infant formula feeding and ultrasound-detected uterine fibroids among young African-American women with no prior clinical diagnosis of fibroids. Environ Health Perspect 124:769-775; http://dx.doi.org/10.1289/ehp.1510082.",2015-11-13 +25805722,Identification of cell types from single-cell transcriptomes using a novel clustering method.,"

Motivation

The recent advance of single-cell technologies has brought new insights into complex biological phenomena. In particular, genome-wide single-cell measurements such as transcriptome sequencing enable the characterization of cellular composition as well as functional variation in homogenic cell populations. An important step in the single-cell transcriptome analysis is to group cells that belong to the same cell types based on gene expression patterns. The corresponding computational problem is to cluster a noisy high dimensional dataset with substantially fewer objects (cells) than the number of variables (genes).

Results

In this article, we describe a novel algorithm named shared nearest neighbor (SNN)-Cliq that clusters single-cell transcriptomes. SNN-Cliq utilizes the concept of shared nearest neighbor that shows advantages in handling high-dimensional data. When evaluated on a variety of synthetic and real experimental datasets, SNN-Cliq outperformed the state-of-the-art methods tested. More importantly, the clustering results of SNN-Cliq reflect the cell types or origins with high accuracy.

Availability and implementation

The algorithm is implemented in MATLAB and Python. The source code can be downloaded at http://bioinfo.uncc.edu/SNNCliq.",2015-02-11 +24516550,Automated detection of synapses in serial section transmission electron microscopy image stacks.,"We describe a method for fully automated detection of chemical synapses in serial electron microscopy images with highly anisotropic axial and lateral resolution, such as images taken on transmission electron microscopes. Our pipeline starts from classification of the pixels based on 3D pixel features, which is followed by segmentation with an Ising model MRF and another classification step, based on object-level features. Classifiers are learned on sparse user labels; a fully annotated data subvolume is not required for training. The algorithm was validated on a set of 238 synapses in 20 serial 7197×7351 pixel images (4.5×4.5×45 nm resolution) of mouse visual cortex, manually labeled by three independent human annotators and additionally re-verified by an expert neuroscientist. The error rate of the algorithm (12% false negative, 7% false positive detections) is better than state-of-the-art, even though, unlike the state-of-the-art method, our algorithm does not require a prior segmentation of the image volume into cells. The software is based on the ilastik learning and segmentation toolkit and the vigra image processing library and is freely available on our website, along with the test data and gold standard annotations (http://www.ilastik.org/synapse-detection/sstem).",2014-02-06 +24903515,Automated semantic annotation of rare disease cases: a case study. ,"As the number of clinical reports in the peer-reviewed medical literature keeps growing, there is an increasing need for online search tools to find and analyze publications on patients with similar clinical characteristics. This problem is especially critical and challenging for rare diseases, where publications of large series are scarce. Through an applied example, we illustrate how to automatically identify new relevant cases and semantically annotate the relevant literature about patient case reports to capture the phenotype of a rare disease named cerebrotendinous xanthomatosis. Our results confirm that it is possible to automatically identify new relevant case reports with a high precision and to annotate them with a satisfactory quality (74% F-measure). Automated annotation with an emphasis to entirely describe all phenotypic abnormalities found in a disease may facilitate curation efforts by supplying phenotype retrieval and assessment of their frequency. Availability and Supplementary information: http://www.usc.es/keam/Phenotype Annotation/. Database URL: http://www.usc.es/keam/PhenotypeAnnotation/",2014-06-04 +26566198,"Modification of Heat-Related Mortality in an Elderly Urban Population by Vegetation (Urban Green) and Proximity to Water (Urban Blue): Evidence from Lisbon, Portugal.","

Background

Urban populations are highly vulnerable to the adverse effects of heat, with heat-related mortality showing intra-urban variations that are likely due to differences in urban characteristics and socioeconomic status.

Objectives

We investigated the influence of urban green and urban blue, that is, urban vegetation and water bodies, on heat-related excess mortality in the elderly > 65 years old in Lisbon, Portugal, between 1998 and 2008.

Methods

We used remotely sensed data and geographic information to determine the amount of urban vegetation and the distance to bodies of water (the Atlantic Ocean and the Tagus Estuary). Poisson generalized additive models were fitted, allowing for the interaction between equivalent temperature [universal thermal climate index (UTCI)] and quartiles of urban greenness [classified using the Normalized Difference Vegetation Index (NDVI)] and proximity to water (≤ 4 km vs. > 4 km), while adjusting for potential confounders.

Results

The association between mortality and a 1°C increase in UTCI above the 99th percentile (24.8°C) was stronger for areas in the lowest NDVI quartile (14.7% higher; 95% CI: 1.9, 17.5%) than for areas in the highest quartile (3.0%; 95% CI: 2.0, 4.0%). In areas > 4 km from water, a 1°C increase in UTCI above the 99th percentile was associated with a 7.1% increase in mortality (95% CI: 6.2, 8.1%), whereas in areas ≤ 4 km from water, the estimated increase in mortality was only 2.1% (95% CI: 1.2, 3.0%).

Conclusions

Urban green and blue appeared to have a mitigating effect on heat-related mortality in the elderly population in Lisbon. Increasing the amount of vegetation may be a good strategy to counteract the adverse effects of heat in urban areas. Our findings also suggest potential benefits of urban blue that may be present several kilometers from a body of water.

Citation

Burkart K, Meier F, Schneider A, Breitner S, Canário P, Alcoforado MJ, Scherer D, Endlicher W. 2016. Modification of heat-related mortality in an elderly urban population by vegetation (urban green) and proximity to water (urban blue): evidence from Lisbon, Portugal. Environ Health Perspect 124:927-934; http://dx.doi.org/10.1289/ehp.1409529.",2015-11-13 +26341331,Borrelia burgdorferi sensu stricto and Borrelia afzelii: Population structure and differential pathogenicity.,"MultiLocus sequence typing (MLST) is considered a powerful method to unveil relationships within bacterial populations and it constitutes an economical and fast alternative to whole genome sequencing. We used this method to understand whether there are differences in human pathogenicity within and between different Borrelia burgdorferi sensu lato species. Therefore, 136 strains from human patients or ticks from Europe were included in MLST analyses. The scheme employed used eight chromosomally located housekeeping genes (i.e. clpA, clpX, nifS, pepX, pyrG, recG, rplB and uvrA). We investigated Borrelia afzelii, one of the predominant species in Europe, and B. burgdorferi sensu stricto (s.s.), because it allowed comparative analysis to strains from the USA. We typed 113 patient isolates as well as 23 tick isolates. For further comparative purposes an additional 746 strains from Europe and the USA were included from the MLST website http://borrelia.mlst.net. We observed an overlap of the B. burgdorferi s.s. populations from Europe and the USA isolated from human patients while there was no overlap of the populations found in tick vectors. Further results indicate that B. afzelii was significantly less associated with disseminated infection than B. burgdorferi s.s. and that B. burgdorferi s.s. from Europe caused neuroborreliosis to a significantly greater extent than B. afzelii or B. burgdorferi s.s. in the USA. Our data suggest that there may be an evolutionary basis of differential interspecies pathogenicity in Borrelia. This was not evident within Borrelia species: we found the same sequence types in patients with disseminated or localized symptoms when the number of strains was sufficiently high. We hypothesize that the finding that B. burgdorferi s.s. in Europe is much more associated with neuroborreliosis than in the USA maybe linked to factor(s) related to the human host, the tick vector or the bacterium itself (e.g. plasmid content and structure).",2015-08-21 +21408061,Fast identification and removal of sequence contamination from genomic and metagenomic datasets.,"High-throughput sequencing technologies have strongly impacted microbiology, providing a rapid and cost-effective way of generating draft genomes and exploring microbial diversity. However, sequences obtained from impure nucleic acid preparations may contain DNA from sources other than the sample. Those sequence contaminations are a serious concern to the quality of the data used for downstream analysis, causing misassembly of sequence contigs and erroneous conclusions. Therefore, the removal of sequence contaminants is a necessary and required step for all sequencing projects. We developed DeconSeq, a robust framework for the rapid, automated identification and removal of sequence contamination in longer-read datasets (150 bp mean read length). DeconSeq is publicly available as standalone and web-based versions. The results can be exported for subsequent analysis, and the databases used for the web-based version are automatically updated on a regular basis. DeconSeq categorizes possible contamination sequences, eliminates redundant hits with higher similarity to non-contaminant genomes, and provides graphical visualizations of the alignment results and classifications. Using DeconSeq, we conducted an analysis of possible human DNA contamination in 202 previously published microbial and viral metagenomes and found possible contamination in 145 (72%) metagenomes with as high as 64% contaminating sequences. This new framework allows scientists to automatically detect and efficiently remove unwanted sequence contamination from their datasets while eliminating critical limitations of current methods. DeconSeq's web interface is simple and user-friendly. The standalone version allows offline analysis and integration into existing data processing pipelines. DeconSeq's results reveal whether the sequencing experiment has succeeded, whether the correct sample was sequenced, and whether the sample contains any sequence contamination from DNA preparation or host. In addition, the analysis of 202 metagenomes demonstrated significant contamination of the non-human associated metagenomes, suggesting that this method is appropriate for screening all metagenomes. DeconSeq is available at http://deconseq.sourceforge.net/.",2011-03-09 +22369715,Functional characterization of protein domains common to animal viruses and mouse.,"

Background

Many viruses contain genes that originate from their hosts. Some of these acquired genes give viruses the ability to interfere with host immune responses by various mechanisms. Genes of host origin that appear commonly in viruses code for proteins that span a wide range of functions, from kinases and phosphotases, to cytokines and their receptors, to ubiquitin ligases and proteases. While many important cases of such lateral gene transfer in viruses have been documented, there has yet to be a genome-wide survey of viral-encoded genes acquired from animal hosts.

Results

Here we carry out such a survey in order to gain insight into the host immune system. We made the results available in the form of a web-based tool that allows viral-centered or host-centered queries to be performed (http://imm.ifrec.osaka-u.ac.jp/musvirus/). We examine the relationship between acquired genes and immune function, and compare host-virus homology with gene expression data in stimulated dendritic cells and T-cells. We found that genes whose expression changes significantly during the innate antiviral immune response had more homologs in animal virus than genes whose expression did not change or genes involved in the adaptive immune response.

Conclusions

Statistics gathered from the MusVirus database support earlier reports of gene transfer from host to virus and indicate that viruses are more likely to acquire genes involved in innate antiviral immune responses than those involved in acquired immune responses.",2011-11-30 +24889152,Genome-wide survey of tissue-specific microRNA and transcription factor regulatory networks in 12 tissues.,"Tissue-specific miRNAs (TS miRNA) specifically expressed in particular tissues play an important role in tissue identity, differentiation and function. However, transcription factor (TF) and TS miRNA regulatory networks across multiple tissues have not been systematically studied. Here, we manually extracted 116 TS miRNAs and systematically investigated the regulatory network of TF-TS miRNA in 12 human tissues. We identified 2,347 TF-TS miRNA regulatory relations and revealed that most TF binding sites tend to enrich close to the transcription start site of TS miRNAs. Furthermore, we found TS miRNAs were regulated widely by non-tissue specific TFs and the tissue-specific expression level of TF have a close relationship with TF-genes regulation. Finally, we describe TSmiR (http://bioeng.swjtu.edu.cn/TSmiR), a novel and web-searchable database that houses interaction maps of TF-TS miRNA in 12 tissues. Taken together, these observations provide a new suggestion to better understand the regulatory network and mechanisms of TF-TS miRNAs underlying different tissues.",2014-06-03 +24889386,Imported Plasmodium vivax malaria ex Pakistan.,"

Background

According to WHO, 1.5 million cases of malaria are reported annually in Pakistan. Malaria distribution in Pakistan is heterogeneous, and some areas, including Punjab, are considered at low risk for malaria. The aim of this study is to describe the trend of imported malaria cases from Pakistan reported to the international surveillance systems from 2005 to 2012.

Methods

Clinics reporting malaria cases acquired after a stay in Pakistan between January 1, 2005, and December 31, 2012, were identified from the GeoSentinel (http://www.geosentinel.org) and EuroTravNet (http://www.Eurotravnet.eu) networks. Demographic and travel-related information was retrieved from the database and further information such as areas of destination within Pakistan was obtained directly from the reporting sites. Standard linear regression models were used to assess the statistical significance of the time trend.

Results

From January 2005 to December 2012, a total of 63 cases of malaria acquired in Pakistan were retrieved in six countries over three continents. A statistically significant increasing trend in imported Plasmodium vivax malaria cases acquired in Pakistan, particularly for those exposed in Punjab, was observed over time (p = 0.006).

Conclusions

Our observation may herald a variation in malaria incidence in the Punjab province of Pakistan. This is in contrast with the previously described decreasing incidence of malaria in travelers to the Indian subcontinent, and with reports that describe Punjab as a low risk area for malaria. Nevertheless, this event is considered plausible by international organizations. This has potential implications for changes in chemoprophylaxis options and reinforces the need for increased surveillance, also considering the risk of introduction of autochthonous P. vivax malaria in areas where competent vectors are present, such as Europe.",2014-06-03 +23616008,A guide to CORNET for the construction of coexpression and protein-protein interaction networks.,"To enable easy access and interpretation of heterogenous and scattered data, we have developed a user-friendly tool for data mining and integration in Arabidopsis thaliana, designated CORrelation NETworks (acronym CORNET), allowing browsing of microarray data, construction of coexpression and protein-protein interactions (PPIs), analysis of gene association and transcription factor (TF) regulatory networks, and exploration of diverse functional annotations. CORNET consists of three tools that can be used individually or in combination, namely, the coexpression tool, the PPI tool, and the TF tool. Different search options are implemented to enable the creation of networks centered around multiple input genes or proteins. Functional annotation resources are included to retrieve relevant literature, phenotypes, localization, gene ontology, plant ontology, and biological pathways. Networks and associated evidence of the majority of the currently available data types are visualized in Cytoscape. CORNET is available at https://bioinformatics.psb.ugent.be/cornet.",2013-01-01 +24796472,Clinical outcomes of patient mobility in a neuroscience intensive care unit.,"

Background

Patients treated in a neuroscience intensive care unit (NICU) are often viewed as too sick to tolerate physical activity. In this study, mobility status in NICU was assessed, and factors and outcomes associated with mobility were examined.

Methods

Using a prospective design, daily mobility status, medical history, demographics, Acute Physiology and Chronic Health Evaluation (APACHE) III score, and clinical outcomes were collected by medical records and database review. Depression, anxiety, and hostility were assessed before NICU discharge. Analyses included comparative statistics and multivariable modeling.

Results

In 228 unique patients, median (minimum, maximum) age was 64.0 (20, 95) years, 66.4% were Caucasian, and 53.6% were men. Of 246 admissions, median NICU stay was 4 (1, 61) days; APACHE III score was 56 (16, 145). Turning, range of motion, and head of bed of >30° were uniformly applied (n = 241), but 94 patients (39%) never progressed; 94 (39%) progressed to head of bed of >45° or dangling legs, 29 (12%) progressed to standing or pivoting to chair, and 24 (10%) progressed to walking. Female gender (p = .019), mechanical ventilation (p < .001), higher APACHE score (p = .004), and 30-day mortality (p = .001) were associated with less mobility. In multivariable modeling, greater mobility was associated with longer unit stay (p < .001) and discharge to home (p < .001). Psychological profile characteristics were not associated with mobility level.

Conclusion

Nearly 40% of patients never progressed beyond bed movement, and only 10% walked. Although limited mobility progression was not associated with many patient factors, it was associated with poorer clinical outcomes. Implementation and evaluation of a progressive mobility protocol are needed in NICU patients.

Video abstract

For more insights from the authors, see Supplemental Digital Content 1, at http://link.lww.com/JNN/A10.",2014-06-01 +24940783,Features of large hinge-bending conformational transitions. Prediction of closed structure from open state.,"We performed a detailed analysis of conformational transition pathways for a set of 10 proteins, which undergo large hinge-bending-type motions with 4-12 Å RMSD (root mean-square distance) between open and closed crystal structures. Anisotropic network model-Monte Carlo (ANM-MC) algorithm generates a targeted pathway between two conformations, where the collective modes from the ANM are used for deformation at each iteration and the conformational energy of the deformed structure is minimized via an MC algorithm. The target structure was approached successfully with an RMSD of 0.9-4.1 Å when a relatively low cutoff radius of 10 Å was used in ANM. Even though one predominant mode (first or second) directed the open-to-closed conformational transition, changes in the dominant mode character were observed for most cases along the transition. By imposing radius of gyration constraint during mode selection, it was possible to predict the closed structure for eight out of 10 proteins (with initial 4.1-7.1 Å and final 1.7-2.9 Å RMSD to target). Deforming along a single mode leads to most successful predictions. Based on the previously reported free energy surface of adenylate kinase, deformations along the first mode produced an energetically favorable path, which was interestingly facilitated by a change in mode shape (resembling second and third modes) at key points. Pathway intermediates are provided in our database of conformational transitions (http://safir.prc.boun.edu.tr/anmmc/method/1).",2014-06-01 +24931987,New directions for diffusion-based network prediction of protein function: incorporating pathways with confidence.,"

Motivation

It has long been hypothesized that incorporating models of network noise as well as edge directions and known pathway information into the representation of protein-protein interaction (PPI) networks might improve their utility for functional inference. However, a simple way to do this has not been obvious. We find that diffusion state distance (DSD), our recent diffusion-based metric for measuring dissimilarity in PPI networks, has natural extensions that incorporate confidence, directions and can even express coherent pathways by calculating DSD on an augmented graph.

Results

We define three incremental versions of DSD which we term cDSD, caDSD and capDSD, where the capDSD matrix incorporates confidence, known directed edges, and pathways into the measure of how similar each pair of nodes is according to the structure of the PPI network. We test four popular function prediction methods (majority vote, weighted majority vote, multi-way cut and functional flow) using these different matrices on the Baker's yeast PPI network in cross-validation. The best performing method is weighted majority vote using capDSD. We then test the performance of our augmented DSD methods on an integrated heterogeneous set of protein association edges from the STRING database. The superior performance of capDSD in this context confirms that treating the pathways as probabilistic units is more powerful than simply incorporating pathway edges independently into the network.

Availability

All source code for calculating the confidences, for extracting pathway information from KEGG XML files, and for calculating the cDSD, caDSD and capDSD matrices are available from http://dsd.cs.tufts.edu/capdsd",2014-06-01 +23996831,"The Nencki Affective Picture System (NAPS): introduction to a novel, standardized, wide-range, high-quality, realistic picture database.","Selecting appropriate stimuli to induce emotional states is essential in affective research. Only a few standardized affective stimulus databases have been created for auditory, language, and visual materials. Numerous studies have extensively employed these databases using both behavioral and neuroimaging methods. However, some limitations of the existing databases have recently been reported, including limited numbers of stimuli in specific categories or poor picture quality of the visual stimuli. In the present article, we introduce the Nencki Affective Picture System (NAPS), which consists of 1,356 realistic, high-quality photographs that are divided into five categories (people, faces, animals, objects, and landscapes). Affective ratings were collected from 204 mostly European participants. The pictures were rated according to the valence, arousal, and approach-avoidance dimensions using computerized bipolar semantic slider scales. Normative ratings for the categories are presented for each dimension. Validation of the ratings was obtained by comparing them to ratings generated using the Self-Assessment Manikin and the International Affective Picture System. In addition, the physical properties of the photographs are reported, including luminance, contrast, and entropy. The new database, with accompanying ratings and image parameters, allows researchers to select a variety of visual stimulus materials specific to their experimental questions of interest. The NAPS system is freely accessible to the scientific community for noncommercial use by request at http://naps.nencki.gov.pl .",2014-06-01 +30708661,First Report of Stigmina palmivora Causing Leaf Spots on Phoenix roebelenii in Brazil.,"Phoenix roebelenii (Arecaceae), known as dwarf date (tamareira-anã in Brazil), is a palm native to Southeast Asia and widely cultivated worldwide because of its ornamental value and ease of adaptation to a broad range of climates and soil types (4). In June 2012, some individuals were observed in a private garden in the municipality of Viçosa (state of Minas Gerais, Brazil) bearing numerous necrotic lesions on its leaves. Representative samples were taken, dried in a plant press, and brought to the laboratory for examination. A fungus was regularly associated with the leaf spots. Fungal structures were mounted in lactophenol and slides were examined under a microscope (Olympus BX 51). Spores were taken from sporulating colonies with a sterile fine needle and plated on PDA for isolation. A pure culture was deposited in the culture collection of the Universidade Federal de Viçosa (accession COAD1338). A dried herbarium sample was deposited in the local herbarium (VIC39741). The fungus had the following morphology: conidiophores grouped on sporodochia, cylindrical, 12 to 29 × 5 to 6 μm, dark brown; conidiogenous cells, terminal, proliferating percurrently (annellidic), 8 to 20 × 5 to 6 μm, pale to dark brown; conidia obclavate to subcylindrical, straight, 58 to 147 × 5 to 6 μm, 6 to 16 septate, hila thickened and darkened with a thin-walled projecting papilla, dark brown, and verrucose. The morphology of the Brazilian collections agrees well with the description of Stigmina palmivora (2), a species known to cause leaf spots on P. roebelenii in the United States (Florida) and Japan (3). Pathogenicity was demonstrated through inoculation of leaves of healthy plants by placing 6 mm diameter cuture disks of COAD1338 on the leaf surface followed by incubation in a moist chamber for 48 h and then transferred to a greenhouse bench at 21 ± 3°C. Typical leaf spots were observed 15 days after inoculation. DNA was extracted from the isolate growing in pure culture and ITS and LSU sequences were generated and deposited in GenBank under the accession numbers KF656785 and KF656786, respectively. These were compared by BLASTn with other entries in GenBank, and the closest match for each region were Mycosphaerella colombiensis strain X215 and M. irregulariamosa strain CPC 1362 (EU514231, GU2114441) with 93% of nucleotide homology (over 100% query coverage) for ITS and 98% of nucleotide homology (over 100% query coverage) for LSU. There are no sequences for S. palmivora deposited in public databases for comparison, but for Stigmina platani, the type species in this genus, 86% and 96% nucleotide homology for ITS and LSU with S. palmivora were found. The genus Stigmina is regarded as being polyphyletic (1) and this is probably reflected by these low homology levels found in the BLASTn search. To our knowledge, this is the first report of Stigmina palmivora in Brazil. References: (1) P. W. Crous et al. Stud. Mycol. 75:37, 2012. (2) M. B. Ellis. Dematiaceous Hyphomycetes. Commonwealth Mycological Institute, Kew, UK, 1971. (3) D. F. Farr and A. Y. Rossman. Fungal Databases. Syst. Mycol. Microbiol. Lab. ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 2013. (4) H. Lorenzi et al. Palmeira no Brasil: Exóticas e Nativas, 2nd ed. Editora Plantarum, Nova Odessa, Brazil, 2005.",2014-06-01 +30708670,First Report of Myrothecium roridum Causing Leaf Spot on Zantedeschia aethiopica in China.,"Zantedeschia aethiopica (L.) Spreng. (calla lily), belonging to family Araceae, is a popular ornamental plant in China. In the summer of 2010, leaves of calla lily with typical symptoms of necrotic lesions were observed in a commercial glasshouse in Beijing, China (116°20' E, 39°44' N). The initial symptoms were circular to subcircular, 1 to 3 mm, and dark brown lesions on the leaf lamina. Under high humidity, lesions expanded rapidly to 5 to 10 mm with distinct concentric zones and produced black sporodochia, especially on the backs of leaves. Later, the infected leaves were developing a combination of leaf lesions, yellowing, and falling off; as a result, the aesthetic value of the plant was significantly impacted. Leaf samples were used in pathogen isolation. Symptomatic leaf tissues were cut into small pieces and surface sterilized with 70% ethanol for 30 s and then in 0.1% mercuric chloride solution for 1 to 3 min. After being washed in sterile distilled water three times, the pieces were plated on potato dextrose agar (PDA) and incubated at 25°C in darkness for 7 days (5). Initial colonies of isolates were white, floccose mycelium and developed dark green to black concentric rings that were sporodochia bearing viscid spore masses after incubating 5 days. Conidiophores branched repeatedly. Conidiogenous cells were hyaline, clavate, and 10.0 to 16.0 × 1.4 to 2.0 μm. Conidia were hyaline, cylindrical, both rounded ends, and 6.0 to 8.2 × 1.9 to 2.4 μm. Morphological characteristics of the fungus were consistent with the description of Myrothecium roridum Tode ex Fr. (3,4). To confirm the pathogenicity, three healthy plants of calla lily were inoculated with a conidial suspension (1 × 106 conidia per ml) brushed from a 7-day-old culture of the fungus. Control plants were sprayed with sterile water. The inoculated plants were individual with clear plastic bags and placed in a glass cabinet at 25°C. After 7 days, all inoculated leaves developed symptoms similar to the original samples, but control plants remained disease free. Re-isolation and identification confirmed Koch's postulates. For molecular identification, genomic DNA of a representative isolate (MTL07081001) was extracted by modified CTAB method (1), and the rDNA-ITS region was amplified by using primers ITS1 (5-TCCGTAGGTGAACCTGCGG-3) and ITS4 (5-TCCTCCGCTTATTGATATGC-3). The 465-bp amplicon (GenBank Accession No. KF761293) was 100% identity to the sequence of M. roridum (JF724158.1) from GenBank. M. roridum has an extensive host range, covering 294 host plants (2). To our knowledge, this is the first record of leaf spot caused by M. roridum on calla lily in China. References: (1) F. M. Ausubel et al. Current Protocols in Molecular Biology. John Wiley & Sons Inc, New York, 1994. (2) D. F. Farr and A. Y. Rossman, Fungal Databases. Syst. Mycol. Microbiol. Lab., ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , October 2013. (3) M. T. Mmbaga et al. Plant Dis. 94:1266, 2010. (4) Y. X. Zhang et al. Plant Dis. 95:1030, 2011. (5) L. Zhu et al. J. Phytopathol. 161:59, 2013.",2014-06-01 +30708660,First Detection of Tomato leaf curl New Delhi virus Infecting Zucchini in Spain.,"In September 2012, a novel disease syndrome was observed in zucchini (Cucurbita pepo L.) crops in Murcia Province (southeastern Spain). Symptoms included curling, vein swelling, and severe mosaic in young leaves, short internodes, and fruit skin roughness, resembling begomovirus infection. Similar symptoms were observed in May 2013 in Almería Province (southern Spain). DNA was isolated from 8 and 7 symptomatic leaf samples collected in Murcia and Almería, respectively, and analyzed by PCR with primers GemCP-V-5' and GemCP-C-3' designed to detect begomoviruses by amplifying the core of coat protein gene (CP) (3). DNA fragments of the expected size (~600 bp) were amplified supporting a begomovirus infection. The DNA sequences obtained from four samples were identical. BLAST analysis showed the highest nucleotide identity (98%) with partial CP gene sequences from isolates of Tomato leaf curl New Delhi virus (ToLCNDV) infecting cucumber in India (GenBank Accession No. KC846817). ToLCNDV, a bipartite begomovirus first reported from tomato, also infects other solanaceous and cucurbitaceous crops in India and neighboring countries (1). DNA from two samples from Murcia and three from Almería was used for rolling-circle amplification using ϕ29 DNA polymerase (TempliPhi kit, GE Healthcare, Little Chalfont, UK) and digested with a set of restriction endonucleases. All five samples yielded amplification products with identical restriction patterns. Two samples from Murcia (MU-8.1 and MU-11.1) and one from Almería (AL-661) were selected to clone the putative DNA-A and DNA-B begomovirus genome components by using single BamHI or NcoI sites. Inserts of two clones from each sample, one corresponding to DNA-A and one to DNA-B, were completely sequenced. The cloned genomes exhibited the typical organization of Old World bipartite begomoviruses (1). Sequences were aligned with begomovirus sequences available in databases using MUSCLE and pairwise identity scores were calculated with SDT (species demarcation tool [4]). DNA-A sequences obtained from Murcia (2,738 nt, KF749224 and KF749225) and Almería (2,738 nt, KF749223) shared >99% nucleotide identity, with the highest nucleotide identity (91.3 to 91.5%) with that of an Indian ToLCNDV isolate from chilli (HM007120). DNA-B sequences (2,684 nt, KF749226, KF749227, and KF749228) shared >99% nucleotide identity, and showed the highest nucleotide identity (83.1 to 83.3%) with that of a Pakistani ToLCNDV isolate from Solanum nigrum (AJ620188). Nucleotide sequence identity of DNA-A with the most closely related begomoviruses was above the 91% threshold for species demarcation (2), thus confirming that the begomoviruses found infecting zucchini in Spain are isolates of ToLCNDV. In fall 2013, the disease was widespread in zucchini both in Murcia and Almería, and ToLCNDV has also been found infecting melon and cucumber crops. To our knowledge, this is the first report of a bipartite begomovirus in Spain and Europe. References: (1) J. K. Brown et al. Page 351 in: Virus Taxonomy. Ninth Report of the ICTV. A. M. Q. King et al., eds. Elsevier/Academic Press, London, 2012. (2) ICTV Geminiviridae Study Group. New species and revised taxonomy proposal for the genus Begomovirus (Geminiviridae). ICTV. Retrieved from http://talk.ictvonline.org/files/proposals/ taxonomy_proposals_plant1/m/plant04/4720.aspx , 10 October 2013. (3) H. Lecoq and C. Desbiez. Adv. Virus Res. 84:67, 2012. (4) B. Muhire et al. Arch. Virol. 158:1411, 2013.",2014-06-01 +23813004,Haplotype assembly in polyploid genomes and identical by descent shared tracts.,"

Motivation

Genome-wide haplotype reconstruction from sequence data, or haplotype assembly, is at the center of major challenges in molecular biology and life sciences. For complex eukaryotic organisms like humans, the genome is vast and the population samples are growing so rapidly that algorithms processing high-throughput sequencing data must scale favorably in terms of both accuracy and computational efficiency. Furthermore, current models and methodologies for haplotype assembly (i) do not consider individuals sharing haplotypes jointly, which reduces the size and accuracy of assembled haplotypes, and (ii) are unable to model genomes having more than two sets of homologous chromosomes (polyploidy). Polyploid organisms are increasingly becoming the target of many research groups interested in the genomics of disease, phylogenetics, botany and evolution but there is an absence of theory and methods for polyploid haplotype reconstruction.

Results

In this work, we present a number of results, extensions and generalizations of compass graphs and our HapCompass framework. We prove the theoretical complexity of two haplotype assembly optimizations, thereby motivating the use of heuristics. Furthermore, we present graph theory-based algorithms for the problem of haplotype assembly using our previously developed HapCompass framework for (i) novel implementations of haplotype assembly optimizations (minimum error correction), (ii) assembly of a pair of individuals sharing a haplotype tract identical by descent and (iii) assembly of polyploid genomes. We evaluate our methods on 1000 Genomes Project, Pacific Biosciences and simulated sequence data.

Availability and implementation

HapCompass is available for download at http://www.brown.edu/Research/Istrail_Lab/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +26192348,"Mycophenolic mofetil optimized pharmacokinetic modelling, and exposure-effect associations in adult heart transplant recipients.","

Unlabelled

Mycophenolic acid (MPA) area under the curve (AUC) has been associated with graft outcome.

The aims of our study were

(1) to develop pharmacokinetic tools to optimize MPA inter-dose AUC estimation in heart transplant patients; and (2) to investigate the relationships between acute allograft rejection and MPA AUC, trough level (C0) or mycophenolate mofetil (MMF) dose. Two independent modeling approaches (parametric and non parametric) were used to fit 56 rich MPA pharmacokinetic (PK) profiles collected from 40 adult heart transplant recipients enrolled in the PIGREC study, receiving MMF and a calcineurin inhibitor (CNI), in the first year post-transplantation. In addition, associations between drug exposure (MPA C0, AUC and MMF dose) and acute rejection or MMF adverse events were investigated using time-dependent Cox models with stratification on the type of calcineurin inhibitor. Exposure threshold values were investigated using ROC curve analysis. The 2 models developed fit adequately the data and the use of their combination yielded 100% consistency with the measured AUC in terms of strategy of dose adjustment (maintain, increase or decrease). MPA measured AUC adjusted on CNI exposure was significantly associated with rejection (per unit increase: HR [95% CI]=0.97 [0.95-0.99], p=0.0122), while no effect was shown for adverse events attributable to MMF. An AUC threshold of 50 mg×h/L was proposed (sensitivity=77%, specificity=25%) beyond which the risk of rejection was significantly increased (low vs. high: HR=3.48 [1.21-10.0], p=0.0204). The tools developed have already been made available to the heart transplant community on our ISBA website (https://pharmaco.chu-limoges.fr).",2015-07-17 +26217739,Dataset from chemical gas sensor array in turbulent wind tunnel.,"The dataset includes the acquired time series of a chemical detection platform exposed to different gas conditions in a turbulent wind tunnel. The chemo-sensory elements were sampling directly the environment. In contrast to traditional approaches that include measurement chambers, open sampling systems are sensitive to dispersion mechanisms of gaseous chemical analytes, namely diffusion, turbulence, and advection, making the identification and monitoring of chemical substances more challenging. The sensing platform included 72 metal-oxide gas sensors that were positioned at 6 different locations of the wind tunnel. At each location, 10 distinct chemical gases were released in the wind tunnel, the sensors were evaluated at 5 different operating temperatures, and 3 different wind speeds were generated in the wind tunnel to induce different levels of turbulence. Moreover, each configuration was repeated 20 times, yielding a dataset of 18,000 measurements. The dataset was collected over a period of 16 months. The data is related to ""On the performance of gas sensor arrays in open sampling systems using Inhibitory Support Vector Machines"", by Vergara et al.[1]. The dataset can be accessed publicly at the UCI repository upon citation of [1]: http://archive.ics.uci.edu/ml/datasets/Gas+sensor+arrays+in+open+sampling+settings.",2015-03-04 +24405700,Quantitative prediction of the effect of genetic variation using hidden Markov models.,"

Background

With the development of sequencing technologies, more and more sequence variants are available for investigation. Different classes of variants in the human genome have been identified, including single nucleotide substitutions, insertion and deletion, and large structural variations such as duplications and deletions. Insertion and deletion (indel) variants comprise a major proportion of human genetic variation. However, little is known about their effects on humans. The absence of understanding is largely due to the lack of both biological data and computational resources.

Results

This paper presents a new indel functional prediction method HMMvar based on HMM profiles, which capture the conservation information in sequences. The results demonstrate that a scoring strategy based on HMM profiles can achieve good performance in identifying deleterious or neutral variants for different data sets, and can predict the protein functional effects of both single and multiple mutations.

Conclusions

This paper proposed a quantitative prediction method, HMMvar, to predict the effect of genetic variation using hidden Markov models. The HMM based pipeline program implementing the method HMMvar is freely available at https://bioinformatics.cs.vt.edu/zhanglab/hmm.",2014-01-09 +23565205,QC-Chain: fast and holistic quality control method for next-generation sequencing data.,"Next-generation sequencing (NGS) technologies have been widely used in life sciences. However, several kinds of sequencing artifacts, including low-quality reads and contaminating reads, were found to be quite common in raw sequencing data, which compromise downstream analysis. Therefore, quality control (QC) is essential for raw NGS data. However, although a few NGS data quality control tools are publicly available, there are two limitations: First, the processing speed could not cope with the rapid increase of large data volume. Second, with respect to removing the contaminating reads, none of them could identify contaminating sources de novo, and they rely heavily on prior information of the contaminating species, which is usually not available in advance. Here we report QC-Chain, a fast, accurate and holistic NGS data quality-control method. The tool synergeticly comprised of user-friendly tools for (1) quality assessment and trimming of raw reads using Parallel-QC, a fast read processing tool; (2) identification, quantification and filtration of unknown contamination to get high-quality clean reads. It was optimized based on parallel computation, so the processing speed is significantly higher than other QC methods. Experiments on simulated and real NGS data have shown that reads with low sequencing quality could be identified and filtered. Possible contaminating sources could be identified and quantified de novo, accurately and quickly. Comparison between raw reads and processed reads also showed that subsequent analyses (genome assembly, gene prediction, gene annotation, etc.) results based on processed reads improved significantly in completeness and accuracy. As regard to processing speed, QC-Chain achieves 7-8 time speed-up based on parallel computation as compared to traditional methods. Therefore, QC-Chain is a fast and useful quality control tool for read quality process and de novo contamination filtration of NGS reads, which could significantly facilitate downstream analysis. QC-Chain is publicly available at: http://www.computationalbioenergy.org/qc-chain.html.",2013-04-02 +25780760,VaRank: a simple and powerful tool for ranking genetic variants.,"Background. Most genetic disorders are caused by single nucleotide variations (SNVs) or small insertion/deletions (indels). High throughput sequencing has broadened the catalogue of human variation, including common polymorphisms, rare variations or disease causing mutations. However, identifying one variation among hundreds or thousands of others is still a complex task for biologists, geneticists and clinicians. Results. We have developed VaRank, a command-line tool for the ranking of genetic variants detected by high-throughput sequencing. VaRank scores and prioritizes variants annotated either by Alamut Batch or SnpEff. A barcode allows users to quickly view the presence/absence of variants (with homozygote/heterozygote status) in analyzed samples. VaRank supports the commonly used VCF input format for variants analysis thus allowing it to be easily integrated into NGS bioinformatics analysis pipelines. VaRank has been successfully applied to disease-gene identification as well as to molecular diagnostics setup for several hundred patients. Conclusions. VaRank is implemented in Tcl/Tk, a scripting language which is platform-independent but has been tested only on Unix environment. The source code is available under the GNU GPL, and together with sample data and detailed documentation can be downloaded from http://www.lbgi.fr/VaRank/.",2015-03-03 +24875952,3D landmarking in multiexpression face analysis: a preliminary study on eyebrows and mouth.,"

Unlabelled

The application of three-dimensional (3D) facial analysis and landmarking algorithms in the field of maxillofacial surgery and other medical applications, such as diagnosis of diseases by facial anomalies and dysmorphism, has gained a lot of attention. In a previous work, we used a geometric approach to automatically extract some 3D facial key points, called landmarks, working in the differential geometry domain, through the coefficients of fundamental forms, principal curvatures, mean and Gaussian curvatures, derivatives, shape and curvedness indexes, and tangent map. In this article we describe the extension of our previous landmarking algorithm, which is now able to extract eyebrows and mouth landmarks using both old and new meshes. The algorithm has been tested on our face database and on the public Bosphorus 3D database. We chose to work on the mouth and eyebrows as a separate study because of the role that these parts play in facial expressions. In fact, since the mouth is the part of the face that moves the most and affects mainly facial expressions, extracting mouth landmarks from various facial poses means that the newly developed algorithm is pose-independent.

No level assigned

This journal requires that authors assign a level of evidence to each submission to which Evidence-Based Medicine rankings are applicable. This excludes Review Articles, Book Reviews, and manuscripts that concern Basic Science, Animal Studies, Cadaver Studies, and Experimental Studies. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266 .",2014-05-30 +23531303,AMBIENT: Active Modules for Bipartite Networks--using high-throughput transcriptomic data to dissect metabolic response.,"

Background

With the continued proliferation of high-throughput biological experiments, there is a pressing need for tools to integrate the data produced in ways that produce biologically meaningful conclusions. Many microarray studies have analysed transcriptomic data from a pathway perspective, for instance by testing for KEGG pathway enrichment in sets of upregulated genes. However, the increasing availability of species-specific metabolic models provides the opportunity to analyse these data in a more objective, system-wide manner.

Results

Here we introduce ambient (Active Modules for Bipartite Networks), a simulated annealing approach to the discovery of metabolic subnetworks (modules) that are significantly affected by a given genetic or environmental change. The metabolic modules returned by ambient are connected parts of the bipartite network that change coherently between conditions, providing a more detailed view of metabolic changes than standard approaches based on pathway enrichment.

Conclusions

ambient is an effective and flexible tool for the analysis of high-throughput data in a metabolic context. The same approach can be applied to any system in which reactions (or metabolites) can be assigned a score based on some biological observation, without the limitation of predefined pathways. A Python implementation of ambient is available at http://www.theosysbio.bio.ic.ac.uk/ambient.",2013-03-25 +24875496,Mapping the global distribution of livestock.,"Livestock contributes directly to the livelihoods and food security of almost a billion people and affects the diet and health of many more. With estimated standing populations of 1.43 billion cattle, 1.87 billion sheep and goats, 0.98 billion pigs, and 19.60 billion chickens, reliable and accessible information on the distribution and abundance of livestock is needed for a many reasons. These include analyses of the social and economic aspects of the livestock sector; the environmental impacts of livestock such as the production and management of waste, greenhouse gas emissions and livestock-related land-use change; and large-scale public health and epidemiological investigations. The Gridded Livestock of the World (GLW) database, produced in 2007, provided modelled livestock densities of the world, adjusted to match official (FAOSTAT) national estimates for the reference year 2005, at a spatial resolution of 3 minutes of arc (about 5×5 km at the equator). Recent methodological improvements have significantly enhanced these distributions: more up-to date and detailed sub-national livestock statistics have been collected; a new, higher resolution set of predictor variables is used; and the analytical procedure has been revised and extended to include a more systematic assessment of model accuracy and the representation of uncertainties associated with the predictions. This paper describes the current approach in detail and presents new global distribution maps at 1 km resolution for cattle, pigs and chickens, and a partial distribution map for ducks. These digital layers are made publically available via the Livestock Geo-Wiki (http://www.livestock.geo-wiki.org), as will be the maps of other livestock types as they are produced.",2014-05-29 +21327987,Identification and characterization of microRNAs in Trichinella spiralis by comparison with Brugia malayi and Caenorhabditis elegans.,"Trichinella spiralis is an important zoonotic nematode causing trichinellosis which is associated with human diseases such as malaise, anorexia, nausea, vomiting, abdominal pain, fever, diarrhea, and constipation. microRNAs (miRNAs) are endogenous small non-coding RNAs that play important roles in the regulation of gene expression. The objective of the present study was to examine the miRNA expression profile of the larvae of T. spiralis by Solexa deep sequencing combined with stem-loop real-time polymerase chain reaction (PCR) analysis. T. spiralis larvae were collected from the skeletal muscle of naturally infected pigs in Henan province, China, by artificial digestion using pepsin. The specific identity of the T. spiralis larvae was confirmed by PCR amplification and subsequent sequence analysis of the internal transcribed spacer of ribosomal DNA. A total of 17,851,693 reads with 2,773,254 unique reads were obtained. Eleven conserved miRNAs from 115 unique xsmall RNAs (sRNAs) and 12 conserved miRNAs from 130 unique sRNAs were found by BLAST analysis against the known miRNAs of Caenorhabditis elegans ( ftp://ftp.ncbi.nih.gov/genomes/Caenorhabditis_elegans ) and Brugia malayi dataset ( http://www.ncbi.nlm.nih.gov/genomeprj?Db=genomeprj&cmd=ShowDetailView&TermToSearch=9549 ) in miRBase, respectively. One novel miRNA with 12 precursors were identified and certified using the reference genome of B. malayi, while no novel miRNA was found when using the reference genome of C. elegans. Nucleotide bias analysis showed that the uracil was the prominent nucleotide, particularly at the 1st, 6th, 18th, and 23th positions, which were almost at the beginning, middle, and the end of the conserved miRNAs. The identification and characterization of T. spiralis miRNAs provides a new resource to study regulation of genes and their networks in T. spiralis.",2011-02-17 +27019547,Taxonomy and Biogeography of Apomixis in Angiosperms and Associated Biodiversity Characteristics.,"Apomixis in angiosperms is asexual reproduction from seed. Its importance to angiospermous evolution and biodiversity has been difficult to assess mainly because of insufficient taxonomic documentation. Thus, we assembled literature reporting apomixis occurrences among angiosperms and transferred the information to an internet database (http://www.apomixis.uni-goettingen.de). We then searched for correlations between apomixis occurrences and well-established measures of taxonomic diversity and biogeography. Apomixis was found to be taxonomically widespread with no clear tendency to specific groups and to occur with sexuality at all taxonomic levels. Adventitious embryony was the most frequent form (148 genera) followed by apospory (110) and diplospory (68). All three forms are phylogenetically scattered, but this scattering is strongly associated with measures of biodiversity. Across apomictic-containing orders and families, numbers of apomict-containing genera were positively correlated with total numbers of genera. In general, apomict-containing orders, families, and subfamilies of Asteraceae, Poaceae, and Orchidaceae were larger, i.e., they possessed more families or genera, than non-apomict-containing orders, families or subfamilies. Furthermore, many apomict-containing genera were found to be highly cosmopolitan. In this respect, 62% occupy multiple geographic zones. Numbers of genera containing sporophytic or gametophytic apomicts decreased from the tropics to the arctic, a trend that parallels general biodiversity. While angiosperms appear to be predisposed to shift from sex to apomixis, there is also evidence of reversions to sexuality. Such reversions may result from genetic or epigenetic destabilization events accompanying hybridization, polyploidy, or other cytogenetic alterations. Because of increased within-plant genetic and genomic heterogeneity, range expansions and diversifications at the species and genus levels may occur more rapidly upon reversion to sexuality. The significantly-enriched representations of apomicts among highly diverse and geographically-extensive taxa, from genera to orders, support this conclusion.",2014-05-29 +24884893,The NQO1 Pro187Ser polymorphism and breast cancer susceptibility: evidence from an updated meta-analysis.,"

Background nad(p)h

quinone oxidoreductase 1 (NQO1) plays a central role in catalyzing the two-electron reduction of quinoid compounds into hydroquinones. The NQO1 Pro187Ser polymorphism was found to correlate with a lower enzymatic activity, which may result in increased incidence of carcinomas including breast cancer. Previous studies investigating the association between NQO1 Pro187Ser polymorphism and breast cancer risk showed inconsistent results. We performed a meta-analysis to summarize the possible association.

Methods

All studies published from January 1966 to February 2014 on the association between NQO1 Pro187Ser polymorphism and breast cancer risk were identified by searching electronic databases PubMed, EMBASE, Cochrane library, and Chinese Biomedical Literature database (CBM). The association between NQO1 Pro187Ser polymorphism and breast cancer risk was assessed by odds ratios (ORs) together with their 95% confidence intervals (CIs).

Results

Ten studies with 2,773 cases and 4,076 controls were finally included in the meta-analysis. We did not observe a significant association between NQO1 Pro187Ser polymorphism and breast cancer risk when all studies were pooled into the meta-analysis. In subgroup analysis by ethnicity, significant increased breast cancer risk was found in Caucasians (Ser/Pro vs. Pro/Pro: OR=1.145, 95% CI=1.008-1.301, P=0.038; Ser/Ser+Ser/Pro vs. Pro/Pro: OR=1.177, 95% CI=1.041-1.331, P=0.009). When stratified by source of control, significant increased breast cancer risk was found in population-based studies (Ser/Pro vs. Pro/Pro: OR=1.180, 95% CI=1.035-1.344, P=0.013; Ser/Ser+Ser/Pro vs. Pro/Pro: OR=1.191, 95% CI=1.050-1.350, P=0.007). However, in subgroup analyses according to menopausal status, quality score, and HWE in controls, no any significant association was detected.

Conclusions

Our meta-analysis provides the evidence that the NQO1 Pro187Ser polymorphism contributed to the breast cancer susceptibility among Caucasians. Further large and well-designed studies are needed to confirm this association.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1248639991252504.",2014-05-29 +21993537,Fuzzy clustering of physicochemical and biochemical properties of amino acids.,"In this article, we categorize presently available experimental and theoretical knowledge of various physicochemical and biochemical features of amino acids, as collected in the AAindex database of known 544 amino acid (AA) indices. Previously reported 402 indices were categorized into six groups using hierarchical clustering technique and 142 were left unclustered. However, due to the increasing diversity of the database these indices are overlapping, therefore crisp clustering method may not provide optimal results. Moreover, in various large-scale bioinformatics analyses of whole proteomes, the proper selection of amino acid indices representing their biological significance is crucial for efficient and error-prone encoding of the short functional sequence motifs. In most cases, researchers perform exhaustive manual selection of the most informative indices. These two facts motivated us to analyse the widely used AA indices. The main goal of this article is twofold. First, we present a novel method of partitioning the bioinformatics data using consensus fuzzy clustering, where the recently proposed fuzzy clustering techniques are exploited. Second, we prepare three high quality subsets of all available indices. Superiority of the consensus fuzzy clustering method is demonstrated quantitatively, visually and statistically by comparing it with the previously proposed hierarchical clustered results. The processed AAindex1 database, supplementary material and the software are available at http://sysbio.icm.edu.pl/aaindex/ .",2011-10-13 +25100869,leeHom: adaptor trimming and merging for Illumina sequencing reads.,"The sequencing of libraries containing molecules shorter than the read length, such as in ancient or forensic applications, may result in the production of reads that include the adaptor, and in paired reads that overlap one another. Challenges for the processing of such reads are the accurate identification of the adaptor sequence and accurate reconstruction of the original sequence most likely to have given rise to the observed read(s). We introduce an algorithm that removes the adaptors and reconstructs the original DNA sequences using a Bayesian maximum a posteriori probability approach. Our algorithm is faster, and provides a more accurate reconstruction of the original sequence for both simulated and ancient DNA data sets, than other approaches. leeHom is released under the GPLv3 and is freely available from: https://bioinf.eva.mpg.de/leehom/",2014-08-06 +25246430,PhaseTank: genome-wide computational identification of phasiRNAs and their regulatory cascades.,"

Unlabelled

Emerging evidence has revealed phased siRNAs (phasiRNAs) as important endogenous regulators in plants. However, the integrated prediction tools for phasiRNAs are still limited. In this article, we introduce a stand-alone package PhaseTank for systematically characterizing phasiRNAs and their regulatory networks. (i) It can identify phasiRNAs/tasiRNAs functional cascades (miRNA/phasiRNA → PHAS loci → phasiRNA → target) with high sensitivity and specificity. (ii) By one command analysis, it generates comprehensive annotation and quantification of the predicted PHAS genes from any given sequences. (iii) PhaseTank has no restriction with regards to prior information of sequence homology of unrestricted organism origins.

Availability and implementation

PhaseTank is a free and open-source tool. The package is available at http://phasetank.sourceforge.net/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-21 +24861622,MORPHIN: a web tool for human disease research by projecting model organism biology onto a human integrated gene network.,"Despite recent advances in human genetics, model organisms are indispensable for human disease research. Most human disease pathways are evolutionally conserved among other species, where they may phenocopy the human condition or be associated with seemingly unrelated phenotypes. Much of the known gene-to-phenotype association information is distributed across diverse databases, growing rapidly due to new experimental techniques. Accessible bioinformatics tools will therefore facilitate translation of discoveries from model organisms into human disease biology. Here, we present a web-based discovery tool for human disease studies, MORPHIN (model organisms projected on a human integrated gene network), which prioritizes the most relevant human diseases for a given set of model organism genes, potentially highlighting new model systems for human diseases and providing context to model organism studies. Conceptually, MORPHIN investigates human diseases by an orthology-based projection of a set of model organism genes onto a genome-scale human gene network. MORPHIN then prioritizes human diseases by relevance to the projected model organism genes using two distinct methods: a conventional overlap-based gene set enrichment analysis and a network-based measure of closeness between the query and disease gene sets capable of detecting associations undetectable by the conventional overlap-based methods. MORPHIN is freely accessible at http://www.inetbio.org/morphin.",2014-05-26 +26217747,Data set from chemical sensor array exposed to turbulent gas mixtures.,"A chemical detection platform composed of 8 chemo-resistive gas sensors was exposed to turbulent gas mixtures generated naturally in a wind tunnel. The acquired time series of the sensors are provided. The experimental setup was designed to test gas sensors in realistic environments. Traditionally, chemical detection systems based on chemo-resistive sensors include a gas chamber to control the sample air flow and minimize turbulence. Instead, we utilized a wind tunnel with two independent gas sources that generate two gas plumes. The plumes get naturally mixed along a turbulent flow and reproduce the gas concentration fluctuations observed in natural environments. Hence, the gas sensors can capture the spatio-temporal information contained in the gas plumes. The sensor array was exposed to binary mixtures of ethylene with either methane or carbon monoxide. Volatiles were released at four different rates to induce different concentration levels in the vicinity of the sensor array. Each configuration was repeated 6 times, for a total of 180 measurements. The data is related to ""Chemical Discrimination in Turbulent Gas Mixtures with MOX Sensors Validated by Gas Chromatography-Mass Spectrometry"", by Fonollosa et al. [1]. The dataset can be accessed publicly at the UCI repository upon citation of [1]: http://archive.ics.uci.edu/ml/datasets/Gas+senso+rarray+exposed+to+turbulent+gas+mixtures.",2015-03-20 +25143292,FACTERA: a practical method for the discovery of genomic rearrangements at breakpoint resolution.,"

Unlabelled

For practical and robust de novo identification of genomic fusions and breakpoints from targeted paired-end DNA sequencing data, we developed Fusion And Chromosomal Translocation Enumeration and Recovery Algorithm (FACTERA). Our method has minimal external dependencies, works directly on a preexisting Binary Alignment/Map file and produces easily interpretable output. We demonstrate FACTERA's ability to rapidly identify breakpoint-resolution fusion events with high sensitivity and specificity in patients with non-small cell lung cancer, including novel rearrangements. We anticipate that FACTERA will be broadly applicable to the discovery and analysis of clinically relevant fusions from both targeted and genome-wide sequencing datasets.

Availability and implementation

http://factera.stanford.edu.",2014-08-20 +26124554,Omics Metadata Management Software (OMMS).,"

Unlabelled

Next-generation sequencing projects have underappreciated information management tasks requiring detailed attention to specimen curation, nucleic acid sample preparation and sequence production methods required for downstream data processing, comparison, interpretation, sharing and reuse. The few existing metadata management tools for genome-based studies provide weak curatorial frameworks for experimentalists to store and manage idiosyncratic, project-specific information, typically offering no automation supporting unified naming and numbering conventions for sequencing production environments that routinely deal with hundreds, if not thousands of samples at a time. Moreover, existing tools are not readily interfaced with bioinformatics executables, (e.g., BLAST, Bowtie2, custom pipelines). Our application, the Omics Metadata Management Software (OMMS), answers both needs, empowering experimentalists to generate intuitive, consistent metadata, and perform analyses and information management tasks via an intuitive web-based interface. Several use cases with short-read sequence datasets are provided to validate installation and integrated function, and suggest possible methodological road maps for prospective users. Provided examples highlight possible OMMS workflows for metadata curation, multistep analyses, and results management and downloading. The OMMS can be implemented as a stand alone-package for individual laboratories, or can be configured for webbased deployment supporting geographically-dispersed projects. The OMMS was developed using an open-source software base, is flexible, extensible and easily installed and executed. The OMMS can be obtained at http://omms.sandia.gov.

Availability

The OMMS can be obtained at http://omms.sandia.gov.",2015-04-30 +23153722,The Genetics of Sexuality and Aggression (GSA) twin samples in Finland.,"The Genetics of Sexuality and Aggression (GSA) project was launched at the Abo Akademi University in Turku, Finland in 2005 and has so far undertaken two major population-based data collections involving twins and siblings of twins. To date, it consists of about 14,000 individuals (including 1,147 informative monozygotic twin pairs, 1,042 informative same-sex dizygotic twin pairs, 741 informative opposite-sex dizygotic twin pairs). Participants have been recruited through the Central Population Registry of Finland and were 18-49 years of age at the time of the data collections. Saliva samples for DNA genotyping (n = 4,278) and testosterone analyses (n = 1,168) were collected in 2006. The primary focus of the data collections has been on sexuality (both sexual functioning and sexual behavior) and aggressive behavior. This paper provides an overview of the data collections as well as an outline of the phenotypes and biological data assembled within the project. A detailed overview of publications can be found at the project's Web site: http://www.cebg.fi/.",2012-11-16 +24223834,iSubgraph: integrative genomics for subgroup discovery in hepatocellular carcinoma using graph mining and mixture models.,"The high tumor heterogeneity makes it very challenging to identify key tumorigenic pathways as therapeutic targets. The integration of multiple omics data is a promising approach to identify driving regulatory networks in patient subgroups. Here, we propose a novel conceptual framework to discover patterns of miRNA-gene networks, observed frequently up- or down-regulated in a group of patients and to use such networks for patient stratification in hepatocellular carcinoma (HCC). We developed an integrative subgraph mining approach, called iSubgraph, and identified altered regulatory networks frequently observed in HCC patients. The miRNA and gene expression profiles were jointly analyzed in a graph structure. We defined a method to transform microarray data into graph representation that encodes miRNA and gene expression levels and the interactions between them as well. The iSubgraph algorithm was capable to detect cooperative regulation of miRNAs and genes even if it occurred only in some patients. Next, the miRNA-mRNA modules were used in an unsupervised class prediction model to discover HCC subgroups via patient clustering by mixture models. The robustness analysis of the mixture model showed that the class predictions are highly stable. Moreover, the Kaplan-Meier survival analysis revealed that the HCC subgroups identified by the algorithm have different survival characteristics. The pathway analyses of the miRNA-mRNA co-modules identified by the algorithm demonstrate key roles of Myc, E2F1, let-7, TGFB1, TNF and EGFR in HCC subgroups. Thus, our method can integrate various omics data derived from different platforms and with different dynamic scales to better define molecular tumor subtypes. iSubgraph is available as MATLAB code at http://www.cs.umd.edu/~ozdemir/isubgraph/.",2013-11-04 +25934264,"FEATnotator: A tool for integrated annotation of sequence features and variation, facilitating interpretation in genomics experiments.","As approaches are sought for more efficient and democratized uses of non-model and expanded model genomics references, ease of integration of genomic feature datasets is especially desirable in multidisciplinary research communities. Valuable conclusions are often missed or slowed when researchers refer experimental results to a single reference sequence that lacks integrated pan-genomic and multi-experiment data in accessible formats. Association of genomic positional information, such as results from an expansive variety of next-generation sequencing experiments, with annotated reference features such as genes or predicted protein binding sites, provides the context essential for conclusions and ongoing research. When the experimental system includes polymorphic genomic inputs, rapid calculation of gene structural and protein translational effects of sequence variation from the reference can be invaluable. Here we present FEATnotator, a lightweight, fast and easy to use open source software program that integrates and reports overlap and proximity in genomic information from any user-defined datasets including those from next generation sequencing applications. We illustrate use of the tool by summarizing whole genome sequence variation of a widely used natural isolate of Arabidopsis thaliana in the context of gene models of the reference accession. Previous discovery of a protein coding deletion influencing root development is replicated rapidly. Appropriate even in investigations of a single gene or genic regions such as QTL, comprehensive reports provided by FEATnotator better prepare researchers for interpretation of their experimental results. The tool is available for download at http://featnotator.sourceforge.net.",2015-04-29 +25515756,LocNES: a computational tool for locating classical NESs in CRM1 cargo proteins.,"

Motivation

Classical nuclear export signals (NESs) are short cognate peptides that direct proteins out of the nucleus via the CRM1-mediated export pathway. CRM1 regulates the localization of hundreds of macromolecules involved in various cellular functions and diseases. Due to the diverse and complex nature of NESs, reliable prediction of the signal remains a challenge despite several attempts made in the last decade.

Results

We present a new NES predictor, LocNES. LocNES scans query proteins for NES consensus-fitting peptides and assigns these peptides probability scores using Support Vector Machine model, whose feature set includes amino acid sequence, disorder propensity, and the rank of position-specific scoring matrix score. LocNES demonstrates both higher sensitivity and precision over existing NES prediction tools upon comparative analysis using experimentally identified NESs.

Availability and implementation

LocNES is freely available at http://prodata.swmed.edu/LocNES CONTACT: yuhmin.chook@utsouthwestern.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-15 +24748536,An automated framework for NMR resonance assignment through simultaneous slice picking and spin system forming.,"Despite significant advances in automated nuclear magnetic resonance-based protein structure determination, the high numbers of false positives and false negatives among the peaks selected by fully automated methods remain a problem. These false positives and negatives impair the performance of resonance assignment methods. One of the main reasons for this problem is that the computational research community often considers peak picking and resonance assignment to be two separate problems, whereas spectroscopists use expert knowledge to pick peaks and assign their resonances at the same time. We propose a novel framework that simultaneously conducts slice picking and spin system forming, an essential step in resonance assignment. Our framework then employs a genetic algorithm, directed by both connectivity information and amino acid typing information from the spin systems, to assign the spin systems to residues. The inputs to our framework can be as few as two commonly used spectra, i.e., CBCA(CO)NH and HNCACB. Different from the existing peak picking and resonance assignment methods that treat peaks as the units, our method is based on 'slices', which are one-dimensional vectors in three-dimensional spectra that correspond to certain ([Formula: see text]) values. Experimental results on both benchmark simulated data sets and four real protein data sets demonstrate that our method significantly outperforms the state-of-the-art methods while using a less number of spectra than those methods. Our method is freely available at http://sfb.kaust.edu.sa/Pages/Software.aspx.",2014-04-19 +25852542,"'Kitchen and cooking,' a serious game for mild cognitive impairment and Alzheimer's disease: a pilot study.","Recently there has been a growing interest in employing serious games (SGs) for the assessment and rehabilitation of elderly people with mild cognitive impairment (MCI), Alzheimer's disease (AD), and related disorders. In the present study we examined the acceptability of 'Kitchen and cooking' - a SG developed in the context of the EU project VERVE (http://www.verveconsortium.eu/) - in these populations. In this game a cooking plot is employed to assess and stimulate executive functions (such as planning abilities) and praxis. The game is installed on a tablet, to be flexibly employed at home and in nursing homes. Twenty one elderly participants (9 MCI and 12 AD, including 14 outpatients and 7 patients living in nursing homes, as well as 11 apathetic and 10 non-apathetic) took part in a 1-month trail, including a clinical and neuropsychological assessment, and 4-week training where the participants were free to play as long as they wanted on a personal tablet. During the training, participants met once a week with a clinician in order to fill in self-report questionnaires assessing their overall game experience (including acceptability, motivation, and perceived emotions). The results of the self reports and of the data concerning game performance (e.g., time spent playing, number of errors, etc) confirm the overall acceptability of Kitchen and cooking for both patients with MCI and patients with AD and related disorders, and the utility to employ it for training purposes. Interestingly, the results confirm that the game is adapted also to apathetic patients.",2015-03-17 +24801556,Large eddy simulation of transitional flow in an idealized stenotic blood vessel: evaluation of subgrid scale models. ,"In the present study, we performed large eddy simulation (LES) of axisymmetric, and 75% stenosed, eccentric arterial models with steady inflow conditions at a Reynolds number of 1000. The results obtained are compared with the direct numerical simulation (DNS) data (Varghese et al., 2007, ""Direct Numerical Simulation of Stenotic Flows. Part 1. Steady Flow,"" J. Fluid Mech., 582, pp. 253-280). An inhouse code (WenoHemo) employing high-order numerical methods for spatial and temporal terms, along with a 2nd order accurate ghost point immersed boundary method (IBM) (Mark, and Vanwachem, 2008, ""Derivation and Validation of a Novel Implicit Second-Order Accurate Immersed Boundary Method,"" J. Comput. Phys., 227(13), pp. 6660-6680) for enforcing boundary conditions on curved geometries is used for simulations. Three subgrid scale (SGS) models, namely, the classical Smagorinsky model (Smagorinsky, 1963, ""General Circulation Experiments With the Primitive Equations,"" Mon. Weather Rev., 91(10), pp. 99-164), recently developed Vreman model (Vreman, 2004, ""An Eddy-Viscosity Subgrid-Scale Model for Turbulent Shear Flow: Algebraic Theory and Applications,"" Phys. Fluids, 16(10), pp. 3670-3681), and the Sigma model (Nicoud et al., 2011, ""Using Singular Values to Build a Subgrid-Scale Model for Large Eddy Simulations,"" Phys. Fluids, 23(8), 085106) are evaluated in the present study. Evaluation of SGS models suggests that the classical constant coefficient Smagorinsky model gives best agreement with the DNS data, whereas the Vreman and Sigma models predict an early transition to turbulence in the poststenotic region. Supplementary simulations are performed using Open source field operation and manipulation (OpenFOAM) (""OpenFOAM,"" http://www.openfoam.org/) solver and the results are inline with those obtained with WenoHemo.",2014-07-01 +23825370,Leveraging reads that span multiple single nucleotide polymorphisms for haplotype inference from sequencing data.,"

Motivation

Haplotypes, defined as the sequence of alleles on one chromosome, are crucial for many genetic analyses. As experimental determination of haplotypes is extremely expensive, haplotypes are traditionally inferred using computational approaches from genotype data, i.e. the mixture of the genetic information from both haplotypes. Best performing approaches for haplotype inference rely on Hidden Markov Models, with the underlying assumption that the haplotypes of a given individual can be represented as a mosaic of segments from other haplotypes in the same population. Such algorithms use this model to predict the most likely haplotypes that explain the observed genotype data conditional on reference panel of haplotypes. With rapid advances in short read sequencing technologies, sequencing is quickly establishing as a powerful approach for collecting genetic variation information. As opposed to traditional genotyping-array technologies that independently call genotypes at polymorphic sites, short read sequencing often collects haplotypic information; a read spanning more than one polymorphic locus (multi-single nucleotide polymorphic read) contains information on the haplotype from which the read originates. However, this information is generally ignored in existing approaches for haplotype phasing and genotype-calling from short read data.

Results

In this article, we propose a novel framework for haplotype inference from short read sequencing that leverages multi-single nucleotide polymorphic reads together with a reference panel of haplotypes. The basis of our approach is a new probabilistic model that finds the most likely haplotype segments from the reference panel to explain the short read sequencing data for a given individual. We devised an efficient sampling method within a probabilistic model to achieve superior performance than existing methods. Using simulated sequencing reads from real individual genotypes in the HapMap data and the 1000 Genomes projects, we show that our method is highly accurate and computationally efficient. Our haplotype predictions improve accuracy over the basic haplotype copying model by ∼20% with comparable computational time, and over another recently proposed approach Hap-SeqX by ∼10% with significantly reduced computational time and memory usage.

Availability

Publicly available software is available at http://genetics.cs.ucla.edu/harsh

Contact

bpasaniuc@mednet.ucla.edu or eeskin@cs.ucla.edu.",2013-07-03 +24532727,Condition-specific target prediction from motifs and expression.,"

Motivation

It is commonplace to predict targets of transcription factors (TFs) by sequence matching with their binding motifs. However, this ignores the particular condition of the cells. Gene expression data can provide condition-specific information, as is, e.g. exploited in Motif Enrichment Analysis.

Results

Here, we introduce a novel tool named condition-specific target prediction (CSTP) to predict condition-specific targets for TFs from expression data measured by either microarray or RNA-seq. Based on the philosophy of guilt by association, CSTP infers the regulators of each studied gene by recovering the regulators of its co-expressed genes. In contrast to the currently used methods, CSTP does not insist on binding sites of TFs in the promoter of the target genes. CSTP was applied to three independent biological processes for evaluation purposes. By analyzing the predictions for the same TF in three biological processes, we confirm that predictions with CSTP are condition-specific. Predictions were further compared with true TF binding sites as determined by ChIP-seq/chip. We find that CSTP predictions overlap with true binding sites to a degree comparable with motif-based predictions, although the two target sets do not coincide.

Availability and implementation

CSTP is available via a web-based interface at http://cstp.molgen.mpg.de.",2014-02-14 +25566991,CD44 regulates pancreatic cancer invasion through MT1-MMP.,"

Unlabelled

Pancreatic cancer is one of the deadliest human malignancies due to its early metastatic spread and resistance to therapy. The mechanisms regulating pancreatic cancer metastasis are so far poorly understood. Here, using both in vitro and in vivo approaches, it is demonstrated that CD44, a transmembrane glycoprotein expressed on a subset of pancreatic cancer cells, is required for the induction of epithelial-mesenchymal transition (EMT) and the activation of an invasive program in pancreatic cancer. Mechanistically, the transcription factor Snail1 (SNAI1), a regulator of the EMT program, is a downstream target of CD44 in primary pancreatic cancer cells and regulates membrane bound metalloproteinase (MMP14/MT1-MMP) expression. In turn, MT1-MMP expression is required for pancreatic cancer invasion. Thus, these data establish the CD44-Snail-MMP axis as a key regulator of the EMT program and of invasion in pancreatic cancer.

Implications

This study sets the stage for CD44 and MT1-MMP as therapeutic targets in pancreatic cancer, for which small molecule or biologic inhibitors are available. Visual Overview: http://mcr.aacrjournals.org/content/early/2014/09/10/1541-7786.MCR-14-0076/F1.large.jpg.",2015-01-07 +22086911,Phylogenetic diversity of insecticolous fusaria inferred from multilocus DNA sequence data and their molecular identification via FUSARIUM-ID and Fusarium MLST.,"We constructed several multilocus DNA sequence datasets to assess the phylogenetic diversity of insecticolous fusaria, especially focusing on those housed at the Agricultural Research Service Collection of Entomopathogenic Fungi (ARSEF), and to aid molecular identifications of unknowns via the FUSARIUM-ID and Fusarium MLST online databases and analysis packages. Analyses of a 190-taxon, two-locus dataset, which included 159 isolates from insects, indicated that: (i) insect-associated fusaria were nested within 10 species complexes spanning the phylogenetic breadth of Fusarium, (ii) novel, putatively unnamed insecticolous species were nested within 8/10 species complexes and (iii) Latin binomials could be applied with confidence to only 18/58 phylogenetically distinct fusaria associated with pest insects. Phylogenetic analyses of an 82-taxon, three-locus dataset nearly fully resolved evolutionary relationships among the 10 clades containing insecticolous fusaria. Multilocus typing of isolates within four species complexes identified surprisingly high genetic diversity in that 63/65 of the fusaria typed represented newly discovered haplotypes. The DNA sequence data, together with corrected ABI sequence chromatograms and alignments, have been uploaded to the following websites dedicated to identifying fusaria: FUSARIUM-ID (http://isolate.fusariumdb.org) at Pennsylvania State University's Department of Plant Pathology and Fusarium MLST (http://www.cbs.knaw.nl/fusarium) at the Centraalbureau voor Schimmelcultures (CBS-KNAW) Fungal Biodiversity Center.",2011-11-15 +24886180,Current evidences on XPC polymorphisms and gastric cancer susceptibility: a meta-analysis.,"

Background

Reduced DNA repair capacities due to inherited polymorphisms may increase the susceptibility to cancers including gastric cancer. Previous studies investigating the association between Xeroderma Pigmentosum group C (XPC) gene polymorphisms and gastric cancer risk reported inconsistent results. We performed a meta-analysis to summarize the possible association.

Methods

All studies published up to January 2014 on the association between XPC polymorphisms and gastric cancer risk were identified by searching electronic databases PubMed, EMBASE, Cochrane library, and Chinese Biomedical Literature database (CBM). The association between XPC polymorphisms and gastric cancer risk was assessed by odds ratios (ORs) together with their 95% confidence intervals (CIs).

Results

Six studies with 1,355 gastric cancer cases and 2,573 controls were finally included in the meta-analysis. With respect to Lys939Gln polymorphism, we did not observe a significant association when all studies were pooled into the meta-analysis. When stratified by ethnicity, source of control, and study quality, statistical significant association was not detected in all subgroups. With respect to Ala499Val and PAT-/+polymorphisms, we also did not observe any significant association with gastric cancer risk in the pooled analysis.

Conclusions

This meta-analysis based on current evidences suggested that the XPC polymorphisms (Lys939Gln, Val499Arg, and PAT-/+) did not contribute to gastric cancer risk. Considering the limited sample size and ethnicity included in the meta-analysis, further larger scaled and well-designed studies are needed to confirm our results.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1485880312555069.",2014-05-23 +22303453,Catalog of microRNA seed polymorphisms in vertebrates.,"MicroRNAs (miRNAs) are a class of non-coding RNA that plays an important role in posttranscriptional regulation of mRNA. Evidence has shown that miRNA gene variability might interfere with its function resulting in phenotypic variation and disease susceptibility. A major role in miRNA target recognition is ascribed to complementarity with the miRNA seed region that can be affected by polymorphisms. In the present study, we developed an online tool for the detection of miRNA polymorphisms (miRNA SNiPer) in vertebrates (http://www.integratomics-time.com/miRNA-SNiPer) and generated a catalog of miRNA seed region polymorphisms (miR-seed-SNPs) consisting of 149 SNPs in six species. Although a majority of detected polymorphisms were due to point mutations, two consecutive nucleotide substitutions (double nucleotide polymorphisms, DNPs) were also identified in nine miRNAs. We determined that miR-SNPs are frequently located within the quantitative trait loci (QTL), chromosome fragile sites, and cancer susceptibility loci, indicating their potential role in the genetic control of various complex traits. To test this further, we performed an association analysis between the mmu-miR-717 seed SNP rs30372501, which is polymorphic in a large number of standard inbred strains, and all phenotypic traits in these strains deposited in the Mouse Phenome Database. Analysis showed a significant association between the mmu-miR-717 seed SNP and a diverse array of traits including behavior, blood-clinical chemistry, body weight size and growth, and immune system suggesting that seed SNPs can indeed have major pleiotropic effects. The bioinformatics analyses, data and tools developed in the present study can serve researchers as a starting point in testing more targeted hypotheses and designing experiments using optimal species or strains for further mechanistic studies.",2012-01-27 +24955134,iDrug: a web-accessible and interactive drug discovery and design platform.,"

Background

The progress in computer-aided drug design (CADD) approaches over the past decades accelerated the early-stage pharmaceutical research. Many powerful standalone tools for CADD have been developed in academia. As programs are developed by various research groups, a consistent user-friendly online graphical working environment, combining computational techniques such as pharmacophore mapping, similarity calculation, scoring, and target identification is needed.

Results

We presented a versatile, user-friendly, and efficient online tool for computer-aided drug design based on pharmacophore and 3D molecular similarity searching. The web interface enables binding sites detection, virtual screening hits identification, and drug targets prediction in an interactive manner through a seamless interface to all adapted packages (e.g., Cavity, PocketV.2, PharmMapper, SHAFTS). Several commercially available compound databases for hit identification and a well-annotated pharmacophore database for drug targets prediction were integrated in iDrug as well. The web interface provides tools for real-time molecular building/editing, converting, displaying, and analyzing. All the customized configurations of the functional modules can be accessed through featured session files provided, which can be saved to the local disk and uploaded to resume or update the history work.

Conclusions

iDrug is easy to use, and provides a novel, fast and reliable tool for conducting drug design experiments. By using iDrug, various molecular design processing tasks can be submitted and visualized simply in one browser without installing locally any standalone modeling softwares. iDrug is accessible free of charge at http://lilab.ecust.edu.cn/idrug.",2014-05-23 +24420968,pyOpenMS: a Python-based interface to the OpenMS mass-spectrometry algorithm library.,"pyOpenMS is an open-source, Python-based interface to the C++ OpenMS library, providing facile access to a feature-rich, open-source algorithm library for MS-based proteomics analysis. It contains Python bindings that allow raw access to the data structures and algorithms implemented in OpenMS, specifically those for file access (mzXML, mzML, TraML, mzIdentML among others), basic signal processing (smoothing, filtering, de-isotoping, and peak-picking) and complex data analysis (including label-free, SILAC, iTRAQ, and SWATH analysis tools). pyOpenMS thus allows fast prototyping and efficient workflow development in a fully interactive manner (using the interactive Python interpreter) and is also ideally suited for researchers not proficient in C++. In addition, our code to wrap a complex C++ library is completely open-source, allowing other projects to create similar bindings with ease. The pyOpenMS framework is freely available at https://pypi.python.org/pypi/pyopenms while the autowrap tool to create Cython code automatically is available at https://pypi.python.org/pypi/autowrap (both released under the 3-clause BSD licence).",2014-01-01 +23456039,Nmrglue: an open source Python package for the analysis of multidimensional NMR data.,"Nmrglue, an open source Python package for working with multidimensional NMR data, is described. When used in combination with other Python scientific libraries, nmrglue provides a highly flexible and robust environment for spectral processing, analysis and visualization and includes a number of common utilities such as linear prediction, peak picking and lineshape fitting. The package also enables existing NMR software programs to be readily tied together, currently facilitating the reading, writing and conversion of data stored in Bruker, Agilent/Varian, NMRPipe, Sparky, SIMPSON, and Rowland NMR Toolkit file formats. In addition to standard applications, the versatility offered by nmrglue makes the package particularly suitable for tasks that include manipulating raw spectrometer data files, automated quantitative analysis of multidimensional NMR spectra with irregular lineshapes such as those frequently encountered in the context of biomacromolecular solid-state NMR, and rapid implementation and development of unconventional data processing methods such as covariance NMR and other non-Fourier approaches. Detailed documentation, install files and source code for nmrglue are freely available at http://nmrglue.com. The source code can be redistributed and modified under the New BSD license.",2013-03-02 +25802807,GROM-RD: resolving genomic biases to improve read depth detection of copy number variants.,"Amplifications or deletions of genome segments, known as copy number variants (CNVs), have been associated with many diseases. Read depth analysis of next-generation sequencing (NGS) is an essential method of detecting CNVs. However, genome read coverage is frequently distorted by various biases of NGS platforms, which reduce predictive capabilities of existing approaches. Additionally, the use of read depth tools has been somewhat hindered by imprecise breakpoint identification. We developed GROM-RD, an algorithm that analyzes multiple biases in read coverage to detect CNVs in NGS data. We found non-uniform variance across distinct GC regions after using existing GC bias correction methods and developed a novel approach to normalize such variance. Although complex and repetitive genome segments complicate CNV detection, GROM-RD adjusts for repeat bias and uses a two-pipeline masking approach to detect CNVs in complex and repetitive segments while improving sensitivity in less complicated regions. To overcome a typical weakness of RD methods, GROM-RD employs a CNV search using size-varying overlapping windows to improve breakpoint resolution. We compared our method to two widely used programs based on read depth methods, CNVnator and RDXplorer, and observed improved CNV detection and breakpoint accuracy for GROM-RD. GROM-RD is available at http://grigoriev.rutgers.edu/software/.",2015-03-17 +24463184,Modeling tissue contamination to improve molecular identification of the primary tumor site of metastases.,"

Motivation

Contamination of a cancer tissue by the surrounding benign (non-cancerous) tissue is a concern for molecular cancer diagnostics. This is because an observed molecular signature will be distorted by the surrounding benign tissue, possibly leading to an incorrect diagnosis. One example is molecular identification of the primary tumor site of metastases because biopsies of metastases typically contain a significant amount of benign tissue.

Results

A model of tissue contamination is presented. This contamination model works independently of the training of a molecular predictor, and it can be combined with any predictor model. The usability of the model is illustrated on primary tumor site identification of liver biopsies, specifically, on a human dataset consisting of microRNA expression measurements of primary tumor samples, benign liver samples and liver metastases. For a predictor trained on primary tumor and benign liver samples, the contamination model decreased the test error on biopsies from liver metastases from 77 to 45%. A further reduction to 34% was obtained by including biopsies in the training data.

Availability and implementation

http://www.math.ku.dk/∼richard/msgl/.

Contact

vincent@math.ku.dk

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-01-24 +24857432,[Pediamécum: one year of experience].,"In 2011, the Spanish Association of Pediatrics decided to support the most ambitious project of its newly created Committee for Medicinal Products: Pediamécum. This is the first free on-line database with information on medicinal products for pediatric use in Spain. The web page http://pediamecum.es/ started on December 17 December 2012. One year later, Pediamécum includes 580 registered drugs. The website achieved more than one million page views by the end of 2013. Because of the first anniversary of Pediamécum, a survey was performed to request the feeling of users. Four hundred eighty-three responses were obtained. Ninety-five percent believed that it is easy to navigate through the web, and 74% said that their doubts about the use of medicines in children were always resolved. The overall rating of Pediamécum is 7.5/10. The aims of Pediamécum are being accomplished; which is reflected essentially due to it becoming a useful tool for all professionals who care for children in their daily clinical practice.",2014-05-22 +24876890,Expanding the fragrance chemical space for virtual screening.,"The properties of fragrance molecules in the public databases SuperScent and Flavornet were analyzed to define a ""fragrance-like"" (FL) property range (Heavy Atom Count ≤ 21, only C, H, O, S, (O + S) ≤ 3, Hydrogen Bond Donor ≤ 1) and the corresponding chemical space including FL molecules from PubChem (NIH repository of molecules), ChEMBL (bioactive molecules), ZINC (drug-like molecules), and GDB-13 (all possible organic molecules up to 13 atoms of C, N, O, S, Cl). The FL subsets of these databases were classified by MQN (Molecular Quantum Numbers, a set of 42 integer value descriptors of molecular structure) and formatted for fast MQN-similarity searching and interactive exploration of color-coded principal component maps in form of the FL-mapplet and FL-browser applications freely available at http://www.gdb.unibe.ch. MQN-similarity is shown to efficiently recover 15 different fragrance molecule families from the different FL subsets, demonstrating the relevance of the MQN-based tool to explore the fragrance chemical space.",2014-05-22 +26220359,"Characterization of the genome and transcriptome of the blue tit Cyanistes caeruleus: polymorphisms, sex-biased expression and selection signals.","Decoding genomic sequences and determining their variation within populations has potential to reveal adaptive processes and unravel the genetic basis of ecologically relevant trait variation within a species. The blue tit Cyanistes caeruleus--a long-time ecological model species--has been used to investigate fitness consequences of variation in mating and reproductive behaviour. However, very little is known about the underlying genetic changes due to natural and sexual selection in the genome of this songbird. As a step to bridge this gap, we assembled the first draft genome of a single blue tit, mapped the transcriptome of five females and five males to this reference, identified genomewide variants and performed sex-differential expression analysis in the gonads, brain and other tissues. In the gonads, we found a high number of sex-biased genes, and of those, a similar proportion were sex-limited (genes only expressed in one sex) in males and females. However, in the brain, the proportion of female-limited genes within the female-biased gene category (82%) was substantially higher than the proportion of male-limited genes within the male-biased category (6%). This suggests a predominant on-off switching mechanism for the female-limited genes. In addition, most male-biased genes were located on the Z-chromosome, indicating incomplete dosage compensation for the male-biased genes. We called more than 500,000 SNPs from the RNA-seq data. Heterozygote detection in the single reference individual was highly congruent between DNA-seq and RNA-seq calling. Using information from these polymorphisms, we identified potential selection signals in the genome. We list candidate genes which can be used for further sequencing and detailed selection studies, including genes potentially related to meiotic drive evolution. A public genome browser of the blue tit with the described information is available at http://public-genomes-ngs.molgen.mpg.de.",2015-08-10 +21291572,A quality metric for homology modeling: the H-factor.,"

Background

The analysis of protein structures provides fundamental insight into most biochemical functions and consequently into the cause and possible treatment of diseases. As the structures of most known proteins cannot be solved experimentally for technical or sometimes simply for time constraints, in silico protein structure prediction is expected to step in and generate a more complete picture of the protein structure universe. Molecular modeling of protein structures is a fast growing field and tremendous works have been done since the publication of the very first model. The growth of modeling techniques and more specifically of those that rely on the existing experimental knowledge of protein structures is intimately linked to the developments of high resolution, experimental techniques such as NMR, X-ray crystallography and electron microscopy. This strong connection between experimental and in silico methods is however not devoid of criticisms and concerns among modelers as well as among experimentalists.

Results

In this paper, we focus on homology-modeling and more specifically, we review how it is perceived by the structural biology community and what can be done to impress on the experimentalists that it can be a valuable resource to them. We review the common practices and provide a set of guidelines for building better models. For that purpose, we introduce the H-factor, a new indicator for assessing the quality of homology models, mimicking the R-factor in X-ray crystallography. The methods for computing the H-factor is fully described and validated on a series of test cases.

Conclusions

We have developed a web service for computing the H-factor for models of a protein structure. This service is freely accessible at http://koehllab.genomecenter.ucdavis.edu/toolkit/h-factor.",2011-02-04 +22851511,Batch effect removal methods for microarray gene expression data integration: a survey.,"Genomic data integration is a key goal to be achieved towards large-scale genomic data analysis. This process is very challenging due to the diverse sources of information resulting from genomics experiments. In this work, we review methods designed to combine genomic data recorded from microarray gene expression (MAGE) experiments. It has been acknowledged that the main source of variation between different MAGE datasets is due to the so-called 'batch effects'. The methods reviewed here perform data integration by removing (or more precisely attempting to remove) the unwanted variation associated with batch effects. They are presented in a unified framework together with a wide range of evaluation tools, which are mandatory in assessing the efficiency and the quality of the data integration process. We provide a systematic description of the MAGE data integration methodology together with some basic recommendation to help the users in choosing the appropriate tools to integrate MAGE data for large-scale analysis; and also how to evaluate them from different perspectives in order to quantify their efficiency. All genomic data used in this study for illustration purposes were retrieved from InSilicoDB http://insilico.ulb.ac.be.",2012-07-31 +23958725,PRAP: an ab initio software package for automated genome-wide analysis of DNA repeats for prokaryotes.,"

Motivation

Prokaryotic genome annotation has been focused mainly on identifying all genes and their protein functions. However, <30% of the prokaryotic genomes submitted to GenBank contain partial repeat features of specific types and none of the genomes contain complete repeat annotations. Deciphering all repeats in DNA sequences is an important and open task in genome annotation and bioinformatics. Hence, there is an immediate need of a tool capable of identifying full spectrum repeats in the whole genome.

Results

We report the PRAP (Prokaryotic Repeats Annotation Program software package to automate the analysis of repeats in both finished and draft genomes. It is aimed at identifying full spectrum repeats at the scale of the prokaryotic genome. Compared with the major existing repeat finding tools, PRAP exhibits competitive or better results. The results are consistent with manually curated and experimental data. Repeats can be identified and grouped into families to define their relevant types. The final output is parsed into the European Molecular Biology Laboratory (EMBL)/GenBank feature table format for reading and displaying in Artemis, where it can be combined or compared with other genome data. It is currently the most complete repeat finder for prokaryotes and is a valuable tool for genome annotation.

Availability

https://sites.google.com/site/prapsoftware/

Contact

hsuehc@ntu.edu.tw.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-08-19 +23389836,Obesity and deranged sleep are independently associated with increased cancer mortality in 50 US states and the District of Columbia.,"

Introduction

Proper sleep is associated with reduced cancer risk. For example, multiple studies have found that habitual sleeping pill usage is related to death from cancer, suggesting that sleep derangement may increase cancer mortality. However, other studies have not found a definite connection between sleep and cancer deaths. For this reason, we analyzed US cancer mortality data and sleep quality data to see if there was relationship.

Methods

Age-adjusted data on sleep disturbance in 50 US states and the District of Columbia are from Perceived insufficient rest or sleep among adults--United States, 2008. Age-adjusted all-cancer mortality data are from American Cancer Society Cancer Facts and Figures. Obesity data are from Vital signs: state-specific obesity prevalence among adults--United States, 2009. Data on race by state are from the 2010 US Census (http://www.census.gov).

Results

There was a significant correlation between percentage of persons who reported insufficient sleep every day in the preceding 30 days versus all-cancer mortality in 50 US states and the District of Columbia (p < 0.001). Because cancer survival is higher in whites than blacks and lower in obese individuals, multiple linear regression was performed. The association of insufficient sleep every day in the preceding 30 days with all-cancer mortality was significant (p = 0.017), independent of the percentage obese (p < 0.001), and unrelated to percentage white population (p = 0.847).

Conclusion

Alterations in endocrine function, perhaps abnormal cortisol metabolism resulting from deranged sleep, may be in part responsible for the increased all-cancer mortality we report here. Further studies would be worthwhile.",2013-02-07 +26032848,High level of Sema3C is associated with glioma malignancy.,"

Background

Malignant gliomas are characterized by the tendency of cancerous glial cells to infiltrate into normal brain tissue, thereby complicating targeted treatment of this type of cancer. Recent studies suggested involvement of Sema3C (semaphorin 3C) protein in tumorigenesis and metastasis in a number of cancers. The role of Sema3C in gliomagenesis is currently unclear. In this study, we investigated how expression levels of Sema3C in post-operative glioma tumors are associated with the malignancy grade and the survival of the patient.

Findings

Western blot analysis was used for detection of Sema3C protein levels in 84 different grade glioma samples: 12 grade I astrocytomas, 30 grade II astrocytomas, 17 grade III astrocytomas, and 25 grade IV astrocytomas (glioblastomas). Sema3C mRNA levels in gliomas were analysed by real-time PCR. Several statistical methods have been used to investigate associations between Sema3C protein and mRNA levels and clinical variables and survival outcome. The results demonstrated that protein levels of Sema3C were markedly increased in glioblastomas compared to grade I-III astrocytoma tissues and were significantly associated with the shorter overall survival of patients. High accumulation of Sema3C positively associated with the age of patients and pathological grade, but did not correlate with patient's gender. Sema3C mRNA levels showed no association with either grade of glioma or patient survival.

Conclusions

The data presented in this work suggest that the increased levels of Sema3C protein may be associated with the progression of glioma tumor and has a potential as a prognostic marker for outcome of glioma patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1564066714158642.",2015-06-02 +24532723,Discrete mixture modeling to address genetic heterogeneity in time-to-event regression.,"

Motivation

Time-to-event regression models are a critical tool for associating survival time outcomes with molecular data. Despite mounting evidence that genetic subgroups of the same clinical disease exist, little attention has been given to exploring how this heterogeneity affects time-to-event model building and how to accommodate it. Methods able to diagnose and model heterogeneity should be valuable additions to the biomarker discovery toolset.

Results

We propose a mixture of survival functions that classifies subjects with similar relationships to a time-to-event response. This model incorporates multivariate regression and model selection and can be fit with an expectation maximization algorithm, we call Cox-assisted clustering. We illustrate a likely manifestation of genetic heterogeneity and demonstrate how it may affect survival models with little warning. An application to gene expression in ovarian cancer DNA repair pathways illustrates how the model may be used to learn new genetic subsets for risk stratification. We explore the implications of this model for censored observations and the effect on genomic predictors and diagnostic analysis.

Availability and implementation

R implementation of CAC using standard packages is available at https://gist.github.com/programeng/8620b85146b14b6edf8f Data used in the analysis are publicly available.",2014-02-14 +25910697,Addressing false discoveries in network inference.,"

Motivation

Experimentally determined gene regulatory networks can be enriched by computational inference from high-throughput expression profiles. However, the prediction of regulatory interactions is severely impaired by indirect and spurious effects, particularly for eukaryotes. Recently, published methods report improved predictions by exploiting the a priori known targets of a regulator (its local topology) in addition to expression profiles.

Results

We find that methods exploiting known targets show an unexpectedly high rate of false discoveries. This leads to inflated performance estimates and the prediction of an excessive number of new interactions for regulators with many known targets. These issues are hidden from common evaluation and cross-validation setups, which is due to Simpson's paradox. We suggest a confidence score recalibration method (CoRe) that reduces the false discovery rate and enables a reliable performance estimation.

Conclusions

CoRe considerably improves the results of network inference methods that exploit known targets. Predictions then display the biological process specificity of regulators more correctly and enable the inference of accurate genome-wide regulatory networks in eukaryotes. For yeast, we propose a network with more than 22 000 confident interactions. We point out that machine learning approaches outside of the area of network inference may be affected as well.

Availability and implementation

Results, executable code and networks are available via our website http://www.bio.ifi.lmu.de/forschung/CoRe.

Contact

robert.kueffner@helmholtz-muenchen.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-24 +24860169,"SSpro/ACCpro 5: almost perfect prediction of protein secondary structure and relative solvent accessibility using profiles, machine learning and structural similarity.","

Motivation

Accurately predicting protein secondary structure and relative solvent accessibility is important for the study of protein evolution, structure and function and as a component of protein 3D structure prediction pipelines. Most predictors use a combination of machine learning and profiles, and thus must be retrained and assessed periodically as the number of available protein sequences and structures continues to grow.

Results

We present newly trained modular versions of the SSpro and ACCpro predictors of secondary structure and relative solvent accessibility together with their multi-class variants SSpro8 and ACCpro20. We introduce a sharp distinction between the use of sequence similarity alone, typically in the form of sequence profiles at the input level, and the additional use of sequence-based structural similarity, which uses similarity to sequences in the Protein Data Bank to infer annotations at the output level, and study their relative contributions to modern predictors. Using sequence similarity alone, SSpro's accuracy is between 79 and 80% (79% for ACCpro) and no other predictor seems to exceed 82%. However, when sequence-based structural similarity is added, the accuracy of SSpro rises to 92.9% (90% for ACCpro). Thus, by combining both approaches, these problems appear now to be essentially solved, as an accuracy of 100% cannot be expected for several well-known reasons. These results point also to several open technical challenges, including (i) achieving on the order of ≥ 80% accuracy, without using any similarity with known proteins and (ii) achieving on the order of ≥ 85% accuracy, using sequence similarity alone.

Availability and implementation

SSpro, SSpro8, ACCpro and ACCpro20 programs, data and web servers are available through the SCRATCH suite of protein structure predictors at http://scratch.proteomics.ics.uci.edu.",2014-05-24 +22079417,Disease associated cytokine SNPs database: an annotation and dissemination model.,"Cytokines mediate crucial functions in innate and adaptive immunity. They play valuable roles in immune cell growth and lineage specification, and are associated with various disease pathologies. A large number of low, medium and high throughput studies have implicated association of single nucleotide polymorphisms (SNPs) in cytokine genes with diseases. A preponderance of such experiments has not shown any causality of an identified SNP to the associated disease. Instead, they have identified statistically significant SNP-disease associations; it is likely that some of these cytokine gene variants may directly or indirectly cause the disease phenotype(s). To fill this knowledge gap and derive study parameters for cytokine SNP-disease causality relationships, we have designed and developed the disease associated cytokine SNP database (DACS-DB). DACS-DB has data on 456 cytokine genes, approximately 63,000 SNPs, and 853 SNP-associated diseases. In DACS-DB, among other attributes, we present functional annotation, and heterozygosity allele frequency for the SNPs, and literature-validated SNP association for diseases. Users of the DB can run queries such as the ones to find disease-associated SNPs in a cytokine gene, and all the SNPs involved in a disease. We have developed a web front end (available at http://www.iupui.edu/~cytosnp) to disseminate this information for immunologists, biomedical researchers, and other interested biological researchers. Since there is no such comprehensive collection of disease associated cytokine SNPs, this DB will be vital to understand the role of cytokine SNPs as markers in disease, and more importantly, in causality to disease thus helping to identify drug targets for common inflammatory diseases.",2011-11-10 +22268978,Crystal structures of the Chromobacterium violaceumω-transaminase reveal major structural rearrangements upon binding of coenzyme PLP.,"The bacterial ω-transaminase from Chromobacterium violaceum (Cv-ωTA, EC2.6.1.18) catalyses industrially important transamination reactions by use of the coenzyme pyridoxal 5'-phosphate (PLP). Here, we present four crystal structures of Cv-ωTA: two in the apo form, one in the holo form and one in an intermediate state, at resolutions between 1.35 and 2.4 Å. The enzyme is a homodimer with a molecular mass of ∼ 100 kDa. Each monomer has an active site at the dimeric interface that involves amino acid residues from both subunits. The apo-Cv-ωTA structure reveals unique 'relaxed' conformations of three critical loops involved in structuring the active site that have not previously been seen in a transaminase. Analysis of the four crystal structures reveals major structural rearrangements involving elements of the large and small domains of both monomers that reorganize the active site in the presence of PLP. The conformational change appears to be triggered by binding of the phosphate group of PLP. Furthermore, one of the apo structures shows a disordered 'roof ' over the PLP-binding site, whereas in the other apo form and the holo form the 'roof' is ordered. Comparison with other known transaminase crystal structures suggests that ordering of the 'roof' structure may be associated with substrate binding in Cv-ωTA and some other transaminases.

Database

The atomic coordinates and structure factors for the Chromobacterium violaceumω-transaminase crystal structures can be found in the RCSB Protein Data Bank (http://www.rcsb.org) under the accession codes 4A6U for the holoenzyme, 4A6R for the apo1 form, 4A6T for the apo2 form and 4A72 for the mixed form

Structured digital abstract

•  -transaminases and -transaminases bind by dynamic light scattering (View interaction) • -transaminase and -transaminase bind by x-ray crystallography (View interaction) • -transaminase and -transaminase bind by x-ray crystallography (View interaction).",2012-01-23 +26517380,Blood Cadmium Levels and Incident Cardiovascular Events during Follow-up in a Population-Based Cohort of Swedish Adults: The Malmö Diet and Cancer Study.,"

Background

Cadmium exposure may increase the risk of cardiovascular disease. The only published longitudinal study on cadmium and incident cardiovascular disease was performed in American Indians with relatively high cadmium exposure.

Objectives

Our aim was to examine the association between blood cadmium at baseline and incident cardiovascular events in a population-based study of Swedish men and women with cadmium levels similar to those of most European and U.S.

Methods

A Swedish population-based cohort (n = 6,103, age 46-67 years) was recruited between 1991 and 1994. After we excluded those with missing data on smoking, 4,819 participants remained. Acute coronary events, other major cardiac events, stroke, and cardiovascular mortality were followed until 2010. Associations with blood cadmium (estimated from cadmium in erythrocytes) were analyzed using Cox proportional hazards regression including potential confounders and important cardiovascular risk factors.

Results

Hazard ratios for all cardiovascular end points were consistently increased for participants in the 4th blood cadmium quartile (median, 0.99 μg/L). In models that also included sex, smoking, waist circumference, education, physical activity, alcohol intake, serum triglycerides, HbA1c, and C-reactive protein, the hazard ratios comparing the highest and lowest quartiles of exposure were 1.8 (95% CI: 1.2, 2.7) for acute coronary events, and 1.9 (1.3, 2.9) for stroke. Hazard ratios in never-smokers were consistent with these estimates.

Conclusions

Blood cadmium in the highest quartile was associated with incident cardiovascular disease and mortality in our population-based samples of Swedish adults. The consistent results among never-smokers are important because smoking is a strong confounder. Our findings suggest that measures to reduce cadmium exposures are warranted, even in populations without unusual sources of exposure.

Citation

Barregard L, Sallsten G, Fagerberg B, Borné Y, Persson M, Hedblad B, Engström G. 2016. Blood cadmium levels and incident cardiovascular events during follow-up in a population-based cohort of Swedish adults: the Malmö Diet and Cancer Study. Environ Health Perspect 124:594-600; http://dx.doi.org/10.1289/ehp.1509735.",2015-10-30 +25637660,A statistical approach to the initial volume problem in Single Particle Analysis by Electron Microscopy.,"Cryo Electron Microscopy is a powerful Structural Biology technique, allowing the elucidation of the three-dimensional structure of biological macromolecules. In particular, the structural study of purified macromolecules -often referred as Single Particle Analysis(SPA)- is normally performed through an iterative process that needs a first estimation of the three-dimensional structure that is progressively refined using experimental data. It is well-known the local optimisation nature of this refinement, so that the initial choice of this first structure may substantially change the final result. Computational algorithms aiming to providing this first structure already exist. However, the question is far from settled and more robust algorithms are still needed so that the refinement process can be performed with sufficient guarantees. In this article we present a new algorithm that addresses the initial volume problem in SPA by setting it in a Weighted Least Squares framework and calculating the weights through a statistical approach based on the cumulative density function of different image similarity measures. We show that the new algorithm is significantly more robust than other state-of-the-art algorithms currently in use in the field. The algorithm is available as part of the software suite Xmipp (http://xmipp.cnb.csic.es) and Scipion (http://scipion.cnb.csic.es) under the name ""Significant"".",2015-01-28 +23109555,The UCSC Cancer Genomics Browser: update 2013.,"The UCSC Cancer Genomics Browser (https://genome-cancer.ucsc.edu/) is a set of web-based tools to display, investigate and analyse cancer genomics data and its associated clinical information. The browser provides whole-genome to base-pair level views of several different types of genomics data, including some next-generation sequencing platforms. The ability to view multiple datasets together allows users to make comparisons across different data and cancer types. Biological pathways, collections of genes, genomic or clinical information can be used to sort, aggregate and zoom into a group of samples. We currently display an expanding set of data from various sources, including 201 datasets from 22 TCGA (The Cancer Genome Atlas) cancers as well as data from Cancer Cell Line Encyclopedia and Stand Up To Cancer. New features include a completely redesigned user interface with an interactive tutorial and updated documentation. We have also added data downloads, additional clinical heatmap features, and an updated Tumor Image Browser based on Google Maps. New security features allow authenticated users access to private datasets hosted by several different consortia through the public website.",2012-10-29 +30727175,First Report of Nigrospora oryzae Causing Leaf Spot of Cotton in China.,"Cotton (Gossypium hirsutum L.) is widely cultivated for the important economic value of the fiber. In the summer of 2011, a leaf spot of cotton plants cv. Wanza40 was observed in 11 fields (total of about 4 ha) in Qianshan County in southwest Anhui Province, China. Approximately 30% of the plants in each field were symptomatic. Affected plants exhibited brown to reddish, irregular foliar lesions, each with a brown border near the vein of the leaves. A sign of fungal infection was a dark leaf mold observed on lesions on the abaxial surface of leaves. Sections of symptomatic leaf tissues were surface-sterilized (in 75% ethanol for 30 s, then 1% NaOCl for 1 min), rinsed three times in sterile distilled water, and plated onto potato dextrose agar (PDA). A fungus consistently recovered from symptomatic leaf samples produced colonies that were initially white and then became grayish brown with the onset of sporulation. Black, spherical to subspherical, single-celled conidia (10 to 12 × 14 to 16 μm) were borne on a hyaline vesicle at the tip of each conidiophore. Morphological characteristics of the fungus were similar to that of Nigrospora oryzae (2). The internal transcribed spacer (ITS) region of ribosomal DNA (rDNA) from a representative strain of the fungus, AHC-1, was amplified using the primers ITS1/ITS4 (4) and sequenced (GenBank Accession No. JQ864579). The ITS sequence had 99% identity with >553 bp of the ITS sequence of an N. oryzae isolate (GenBank Accession No. EU918714.1). On the basis of morphological data and ITS rDNA sequence, the isolate was determined to be N. oryzae. A pathogenicity test was performed on detached, young leaves of 4-month-old healthy cotton plants of cv. Wanza40. Six leaves were inoculated by placing a colonized agar piece (5 mm in diameter) from 7-day-old cultures of the fungus on pushpin-wounded leaves. Another six leaves treated with sterile PDA plugs served as a negative control treatment. Leaves were incubated in petri dishes and maintained at 25°C in a growth chamber programmed for 12 hours of fluorescent white light/day. After 5 days, brown to black lesions were observed on all inoculated leaves, whereas no symptoms developed on control leaves. N. oryzae was consistently reisolated from symptomatic leaves but not from the control leaves. N. oryzae is a weak pathogen on a wide range of plants, and has been described as the causal agent of lint rot on cotton (1,3), but to our knowledge this is the first report of N. oryzae causing a leaf spot of cotton in China. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA, Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , April 8, 2012. (2) H. J. Hudson. Trans. Br. Mycol. Soc. 46:355, 1963. (3) A. J. Palmatter et al. Plant Dis. 87:873, 2003. (4) T. J. White et al. In: PCR Protocols: A Guide to Methods and Applications. Academic Press, San Diego, 1990.",2012-09-01 +25908274,"MiR-21, miR-34a, miR-198 and miR-217 as diagnostic and prognostic biomarkers for chronic pancreatitis and pancreatic ductal adenocarcinoma.","

Background

Pancreatic ductal adenocarcinoma is an aggressive malignancy with late presentation, metastatic potential and very poor prognosis. Therefore, there is an urgent need for novel diagnostic and prognostic biomarkers. MicroRNAs are small non-coding RNAs that post-transcriptionally regulate gene expression. Altered expression of microRNAs has been reported in wide range of malignancies, including pancreatic ductal adenocarcinoma. The aim of this study was to analyze the expression of selected microRNAs in normal pancreas, chronic pancreatitis and pancreatic ductal adenocarcinoma tissues and evaluate their diagnostic and prognostic potential.

Findings

Using quantitative real-time PCR, expression levels of 4 microRNAs were examined in 74 tumor tissues, 18 tissues of chronic pancreatitis and 9 adjacent normal tissues and correlated with clinicopathological features of patients. Expression levels of miR-21, miR-34a and miR-198 were significantly higher, whereas levels of miR-217 were significantly lower in pancreatic ductal adenocarcinomas compared to healthy tissues and tissues of chronic pancreatitis. Moreover, increased expression of miR-21 and miR-198 was significantly associated with shorter disease free survival and overall survival.

Conclusions

Our data suggest that altered expression of examined microRNAs is related to neoplastic transformation and progression of the disease and these microRNAs could serve as diagnostic and prognostic biomarkers for pancreatic ductal adenocarcinoma.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1373952531543898.",2015-04-24 +23505293,Sorad: a systems biology approach to predict and modulate dynamic signaling pathway response from phosphoproteome time-course measurements.,"

Motivation

Signaling networks mediate responses to different stimuli using a multitude of feed-forward, feedback and cross-talk mechanisms, and malfunctions in these mechanisms have an important role in various diseases. To understand a disease and to help discover novel therapeutic approaches, we have to reveal the molecular mechanisms underlying signal transduction and use that information to design targeted perturbations.

Results

We have pursued this direction by developing an efficient computational approach, Sorad, which can estimate the structure of signal transduction networks and the associated continuous signaling dynamics from phosphoprotein time-course measurements. Further, Sorad can identify experimental conditions that modulate the signaling toward a desired response. We have analyzed comprehensive phosphoprotein time-course data from a human hepatocellular liver carcinoma cell line and demonstrate here that Sorad provides more accurate predictions of phosphoprotein responses to given stimuli than previously presented methods and, importantly, that Sorad can estimate experimental conditions to achieve a desired signaling response. Because Sorad is data driven, it has a high potential to generate novel hypotheses for further research. Our analysis of the hepatocellular liver carcinoma data predict a regulatory connection where AKT activity is dependent on IKK in TGFα stimulated cells, which is supported by the original data but not included in the original model.

Availability

An implementation of the proposed computational methods will be available at http://research.ics.aalto.fi/csb/software/.

Contact

tarmo.aijo@aalto.fi or harri.lahdesmaki@aalto.fi

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-16 +25893087,Split diversity in constrained conservation prioritization using integer linear programming.,"Phylogenetic diversity (PD) is a measure of biodiversity based on the evolutionary history of species. Here, we discuss several optimization problems related to the use of PD, and the more general measure split diversity (SD), in conservation prioritization.Depending on the conservation goal and the information available about species, one can construct optimization routines that incorporate various conservation constraints. We demonstrate how this information can be used to select sets of species for conservation action. Specifically, we discuss the use of species' geographic distributions, the choice of candidates under economic pressure, and the use of predator-prey interactions between the species in a community to define viability constraints.Despite such optimization problems falling into the area of NP hard problems, it is possible to solve them in a reasonable amount of time using integer programming. We apply integer linear programming to a variety of models for conservation prioritization that incorporate the SD measure.We exemplarily show the results for two data sets: the Cape region of South Africa and a Caribbean coral reef community. Finally, we provide user-friendly software at http://www.cibiv.at/software/pda.",2014-12-06 +26464967,neuTube 1.0: A New Design for Efficient Neuron Reconstruction Software Based on the SWC Format. ,"Brain circuit mapping requires digital reconstruction of neuronal morphologies in complicated networks. Despite recent advances in automatic algorithms, reconstruction of neuronal structures is still a bottleneck in circuit mapping due to a lack of appropriate software for both efficient reconstruction and user-friendly editing. Here we present a new software design based on the SWC format, a standardized neuromorphometric format that has been widely used for analyzing neuronal morphologies or sharing neuron reconstructions via online archives such as NeuroMorpho.org. We have also implemented the design in our open-source software called neuTube 1.0. As specified by the design, the software is equipped with parallel 2D and 3D visualization and intuitive neuron tracing/editing functions, allowing the user to efficiently reconstruct neurons from fluorescence image data and edit standard neuron structure files produced by any other reconstruction software. We show the advantages of neuTube 1.0 by comparing it to two other software tools, namely Neuromantic and Neurostudio. The software is available for free at http://www.neutracing.com, which also hosts complete software documentation and video tutorials.",2015-01-02 +21625797,Assessment of the scientific-technological production in molecular biology in Brazil (1996-2007): the contribution of genomics programs.,"Several genome sequencing programs were launched in Brazil by the end of the nineties and the early 2000s.The most important initiatives were supported by the ONSA program (http://watson.fapesp.br/onsa/Genoma3.htm) and aimed at gaining domain in genomic technology and bringing molecular biology to the state of art. Two main sets of data were collected in the 1996-2007 period to evaluate the results of these genome programs: the scientific production (Scopus and Web of Science databases) and the register of patents (US Patent and Trademark Office), both related to the progress of molecular biology along this period. In regard to the former, Brazil took a great leap in comparison to 17 other developed and developing countries, being only surpassed by China. As to the register of patents in the area of molecular biology, Brazil's performance lags far behind most of the countries focused in the present study, confirming the Brazilian long-standing tendency of poor achievements in technological innovations when compared with scientific production. Possible solutions to surpass this inequality are discussed.",2011-05-27 +25018704,Preserving subject variability in group fMRI analysis: performance evaluation of GICA vs. IVA.,"Independent component analysis (ICA) is a widely applied technique to derive functionally connected brain networks from fMRI data. Group ICA (GICA) and Independent Vector Analysis (IVA) are extensions of ICA that enable users to perform group fMRI analyses; however a full comparison of the performance limits of GICA and IVA has not been investigated. Recent interest in resting state fMRI data with potentially higher degree of subject variability makes the evaluation of the above techniques important. In this paper we compare component estimation accuracies of GICA and an improved version of IVA using simulated fMRI datasets. We systematically change the degree of inter-subject spatial variability of components and evaluate estimation accuracy over all spatial maps (SMs) and time courses (TCs) of the decomposition. Our results indicate the following: (1) at low levels of SM variability or when just one SM is varied, both GICA and IVA perform well, (2) at higher levels of SM variability or when more than one SMs are varied, IVA continues to perform well but GICA yields SM estimates that are composites of other SMs with errors in TCs, (3) both GICA and IVA remove spatial correlations of overlapping SMs and introduce artificial correlations in their TCs, (4) if number of SMs is over estimated, IVA continues to perform well but GICA introduces artifacts in the varying and extra SMs with artificial correlations in the TCs of extra components, and (5) in the absence or presence of SMs unique to one subject, GICA produces errors in TCs and IVA estimates are accurate. In summary, our simulation experiments (both simplistic and realistic) and our holistic analyses approach indicate that IVA produces results that are closer to ground truth and thereby better preserves subject variability. The improved version of IVA is now packaged into the GIFT toolbox (http://mialab.mrn.org/software/gift).",2014-06-26 +23418726,A computational method for detecting copy number variations using scale-space filtering.,"

Background

As next-generation sequencing technology made rapid and cost-effective sequencing available, the importance of computational approaches in finding and analyzing copy number variations (CNVs) has been amplified. Furthermore, most genome projects need to accurately analyze sequences with fairly low-coverage read data. It is urgently needed to develop a method to detect the exact types and locations of CNVs from low coverage read data.

Results

Here, we propose a new CNV detection method, CNV_SS, which uses scale-space filtering. The scale-space filtering is evaluated by applying to the read coverage data the Gaussian convolution for various scales according to a given scaling parameter. Next, by differentiating twice and finding zero-crossing points, inflection points of scale-space filtered read coverage data are calculated per scale. Then, the types and the exact locations of CNVs are obtained by analyzing the finger print map, the contours of zero-crossing points for various scales.

Conclusions

The performance of CNV_SS showed that FNR and FPR stay in the range of 1.27% to 2.43% and 1.14% to 2.44%, respectively, even at a relatively low coverage (0.5x ≤C ≤2x). CNV_SS gave also much more effective results than the conventional methods in the evaluation of FNR, at 3.82% at least and 76.97% at most even when the coverage level of read data is low. CNV_SS source code is freely available from http://dblab.hallym.ac.kr/CNV SS/.",2013-02-18 +24838570,Alkemio: association of chemicals with biomedical topics by text and data mining.,"

Unlabelled

The PubMed® database of biomedical citations allows the retrieval of scientific articles studying the function of chemicals in biology and medicine. Mining millions of available citations to search reported associations between chemicals and topics of interest would require substantial human time. We have implemented the Alkemio text mining web tool and SOAP web service to help in this task. The tool uses biomedical articles discussing chemicals (including drugs), predicts their relatedness to the query topic with a naïve Bayesian classifier and ranks all chemicals by P-values computed from random simulations. Benchmarks on seven human pathways showed good retrieval performance (areas under the receiver operating characteristic curves ranged from 73.6 to 94.5%). Comparison with existing tools to retrieve chemicals associated to eight diseases showed the higher precision and recall of Alkemio when considering the top 10 candidate chemicals. Alkemio is a high performing web tool ranking chemicals for any biomedical topics and it is free to non-commercial users.

Availability

http://cbdm.mdc-berlin.de/∼medlineranker/cms/alkemio.",2014-05-16 +21216776,MADGene: retrieval and processing of gene identifier lists for the analysis of heterogeneous microarray datasets.,"

Unlabelled

MADGene is a software environment comprising a web-based database and a java application. This platform aims at unifying gene identifiers (ids) and performing gene set analysis. MADGene allows the user to perform inter-conversion of clone and gene ids over a large range of nomenclatures relative to 17 species. We propose a set of 23 functions to facilitate the analysis of gene sets and we give two microarray applications to show how MADGene can be used to conduct meta-analyses.

Availability

The MADGene resources are freely available online from http://www.madtools.org, a website dedicated to the analysis and annotation of DNA microarray data.",2011-01-06 +24885033,"Corticosteroid therapy in regressive autism: a retrospective study of effects on the Frequency Modulated Auditory Evoked Response (FMAER), language, and behavior.","

Background

Up to a third of children with Autism Spectrum Disorder (ASD) manifest regressive autism (R-ASD).They show normal early development followed by loss of language and social skills. Absent evidence-based therapies, anecdotal evidence suggests improvement following use of corticosteroids. This study examined the effects of corticosteroids for R-ASD children upon the 4 Hz frequency modulated evoked response (FMAER) arising from language cortex of the superior temporal gyrus (STG) and upon EEG background activity, language, and behavior. An untreated clinical convenience sample of ASD children served as control sample.

Methods

Twenty steroid-treated R-ASD (STAR) and 24 not-treated ASD patients (NSA), aged 3 - 5 years, were retrospectively identified from a large database. All study participants had two sequential FMAER and EEG studies;Landau-Kleffner syndrome diagnosis was excluded. All subjects' records contained clinical receptive and expressive language ratings based upon a priori developed metrics. The STAR group additionally was scored behaviorally regarding symptom severity as based on the Diagnostic and Statistical Manual IV (DSM-IV) ASD criteria list. EEGs were visually scored for abnormalities. FMAER responses were assessed quantitatively by spectral analysis. Treated and untreated group means and standard deviations for the FMAER, EEG, language, and behavior, were compared by paired t-test and Fisher's exact tests.

Results

The STAR group showed a significant increase in the 4 Hz FMAER spectral response and a significant reduction in response distortion compared to the NSA group. Star group subjects' language ratings were significantly improved and more STAR than NSA group subjects showed significant language improvement. Most STAR group children showed significant behavioral improvement after treatment. STAR group language and behavior improvement was retained one year after treatment. Groups did not differ in terms of minor EEG abnormalities. Steroid treatment produced no lasting morbidity.

Conclusions

Steroid treatment was associated with a significantly increased FMAER response magnitude, reduction of FMAER response distortion, and improvement in language and behavior scores. This was not observed in the non-treated group. These pilot findings warrant a prospective randomized validation trial of steroid treatment for R-ASD utilizing FMAER, EEG, and standardized ASD, language and behavior measures, and a longer follow-up period.Please see related article http://www.biomedcentral.com/1741-7015/12/79.",2014-05-15 +24870500,Phytoseiidae database: a website for taxonomic and distributional information on phytoseiid mites (Acari).,"This paper announces a database on the taxonomy and distribution of mites of the family Phytoseiidae Berlese, which is available online at http://www.lea.esalq.usp.br/phytoseiidae/. Synthesis of species diversity per genus, subfamily and country are given. Information about use of the database is provided.",2014-05-15 +21472892,Mutation update for the PORCN gene.,"Mutations in the PORCN gene were first identified in Goltz-Gorlin syndrome patients in 2007. Since then, several reports have been published describing a large variety of genetic defects resulting in the Goltz-Gorlin syndrome, and mutations or deletions were also reported in angioma serpiginosum, the pentalogy of Cantrell and Limb-Body Wall Complex. Here we present a review of the published mutations in the PORCN gene to date and report on seven new mutations together with the corresponding clinical data. Based on the review we have created a Web-based locus-specific database that lists all identified variants and allows the inclusion of future reports. The database is based on the Leiden Open (source) Variation Database (LOVD) software, and is accessible online at http://www.lovd.nl/porcn. At present, the database contains 106 variants, representing 68 different mutations, scattered along the whole coding sequence of the PORCN gene, and 12 large gene rearrangements, which brings up to 80 the number of unique mutations identified in Goltz-Gorlin syndrome patients.",2011-06-21 +21423624,Hypomethylation of intragenic LINE-1 represses transcription in cancer cells through AGO2.,"In human cancers, the methylation of long interspersed nuclear element -1 (LINE-1 or L1) retrotransposons is reduced. This occurs within the context of genome wide hypomethylation, and although it is common, its role is poorly understood. L1s are widely distributed both inside and outside of genes, intragenic and intergenic, respectively. Interestingly, the insertion of active full-length L1 sequences into host gene introns disrupts gene expression. Here, we evaluated if intragenic L1 hypomethylation influences their host gene expression in cancer. First, we extracted data from L1base (http://l1base.molgen.mpg.de), a database containing putatively active L1 insertions, and compared intragenic and intergenic L1 characters. We found that intragenic L1 sequences have been conserved across evolutionary time with respect to transcriptional activity and CpG dinucleotide sites for mammalian DNA methylation. Then, we compared regulated mRNA levels of cells from two different experiments available from Gene Expression Omnibus (GEO), a database repository of high throughput gene expression data, (http://www.ncbi.nlm.nih.gov/geo) by chi-square. The odds ratio of down-regulated genes between demethylated normal bronchial epithelium and lung cancer was high (p<1E(-27); OR = 3.14; 95% CI = 2.54-3.88), suggesting cancer genome wide hypomethylation down-regulating gene expression. Comprehensive analysis between L1 locations and gene expression showed that expression of genes containing L1s had a significantly higher likelihood to be repressed in cancer and hypomethylated normal cells. In contrast, many mRNAs derived from genes containing L1s are elevated in Argonaute 2 (AGO2 or EIF2C2)-depleted cells. Hypomethylated L1s increase L1 mRNA levels. Finally, we found that AGO2 targets intronic L1 pre-mRNA complexes and represses cancer genes. These findings represent one of the mechanisms of cancer genome wide hypomethylation altering gene expression. Hypomethylated intragenic L1s are a nuclear siRNA mediated cis-regulatory element that can repress genes. This epigenetic regulation of retrotransposons likely influences many aspects of genomic biology.",2011-03-15 +30727330,First Report of a Group 16SrI-B Phytoplasma Associated with Gardenia jasminoides in China.,"Gardenia jasminoides J. Ellis, (also known as common gardenia, cape jasmine, or cape jessamine) is a fragrant flowering evergreen tropical plant, a favorite in gardens worldwide. G. jasminoides were found with small, seriously yellowed leaves, stunted growth, and witches'-broom in a green belt on the Southwest University campus in October 2011. The incidence was lower than 2%. In another green belt, G. jasminoides with only slightly yellowing leaves were found. The incidence was about 5%. Five months later, most seriously yellowed leaves withered. However, no withered leaf was observed among the slightly yellowing leaves. Leaf samples from each symptomatic plant, together with asymptomatic plants from the same belt, were collected for total DNA extraction using a modified cetyltrimethylammoniumbromide method (1). The resulting DNA extracts were analyzed by a nested PCR assay using the phytoplasma 16S rRNA gene primer pairs R16mF2/R16mR1 followed by R16F2n/R16R2 (2). DNA fragments of 1.2 kb that corresponded to 16S rDNA were amplified only from the DNA samples of the five plants with the symptoms mentioned above. The purified nested PCR products were cloned in pGEM-T Easy Vector (Promega) and then sequenced. The resulting 16S rDNA sequences were found to be identical (GenBank Accession No. JQ675713). The consensus sequence was analyzed by the iPhyClassifier online tool ( http://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi ) and found to share 99.4% similarity with the 16S rDNA sequence of the 'Candidatus Phytoplasma asteris' reference strain (GenBank Accession No. M30790) that belongs to the 16SrI-B subgroup (3). The virtual RFLP pattern of the G. jasminoides phytoplasma 16S rDNA gene sequence showed maximum similarity to the reference pattern of NC005303 (similarity coefficient of 1.0). The phylogenetic tree based on the 16S rDNA sequences of phytoplasmas belonging to group 16SrI and other distinct phytoplasma groups also showed that our sequences clustered with members of subgroup 16SrI-B. Subsequently, the presence of the phytoplasmas in symptomatic plants was also confirmed by transmission electron microscopy. Taken together, the phytoplasma was classified as a member of subgroup 16SrI-B. To our knowledge, this is the first report of a subgroup 16SrI-B phytoplasma associated with diseased G. jasminoides in China. G. jasminoides yellowing is often considered to result from nutrient deficiency (especially iron compounds). However, our findings showed that a phytoplasma can cause G. jasminoides yellowing, which should be considered in the control of leaves yellowing. References: (1) E. Angelini et al. Vitis 40:79, 2001. (2) D. E. Gundersen and I.-M. Lee. Phytopathol. Mediterr. 35:144, 1996. (3) Y. Zhao, et al. Int. J. Syst. Evol. Microbiol. 59:2582, 2009.",2012-10-01 +25900917,Protein homology reveals new targets for bioactive small molecules.,"

Motivation

The functional impact of small molecules is increasingly being assessed in different eukaryotic species through large-scale phenotypic screening initiatives. Identifying the targets of these molecules is crucial to mechanistically understand their function and uncover new therapeutically relevant modes of action. However, despite extensive work carried out in model organisms and human, it is still unclear to what extent one can use information obtained in one species to make predictions in other species.

Results

Here, for the first time, we explore and validate at a large scale the use of protein homology relationships to predict the targets of small molecules across different species. Our results show that exploiting target homology can significantly improve the predictions, especially for molecules experimentally tested in other species. Interestingly, when considering separately orthology and paralogy relationships, we observe that mapping small molecule interactions among orthologs improves prediction accuracy, while including paralogs does not improve and even sometimes worsens the prediction accuracy. Overall, our results provide a novel approach to integrate chemical screening results across multiple species and highlight the promises and remaining challenges of using protein homology for small molecule target identification.

Availability and implementation

Homology-based predictions can be tested on our website http://www.swisstargetprediction.ch.

Contact

david.gfeller@unil.ch or vincent.zoete@isb-sib.ch.

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-21 +24829458,RBPmap: a web server for mapping binding sites of RNA-binding proteins.,"Regulation of gene expression is executed in many cases by RNA-binding proteins (RBPs) that bind to mRNAs as well as to non-coding RNAs. RBPs recognize their RNA target via specific binding sites on the RNA. Predicting the binding sites of RBPs is known to be a major challenge. We present a new webserver, RBPmap, freely accessible through the website http://rbpmap.technion.ac.il/ for accurate prediction and mapping of RBP binding sites. RBPmap has been developed specifically for mapping RBPs in human, mouse and Drosophila melanogaster genomes, though it supports other organisms too. RBPmap enables the users to select motifs from a large database of experimentally defined motifs. In addition, users can provide any motif of interest, given as either a consensus or a PSSM. The algorithm for mapping the motifs is based on a Weighted-Rank approach, which considers the clustering propensity of the binding sites and the overall tendency of regulatory regions to be conserved. In addition, RBPmap incorporates a position-specific background model, designed uniquely for different genomic regions, such as splice sites, 5' and 3' UTRs, non-coding RNA and intergenic regions. RBPmap was tested on high-throughput RNA-binding experiments and was proved to be highly accurate.",2014-05-14 +25209025,Unsupervised gene function extraction using semantic vectors. ,"Finding gene functions discussed in the literature is an important task of information extraction (IE) from biomedical documents. Automated computational methodologies can significantly reduce the need for manual curation and improve quality of other related IE systems. We propose an open-IE method for the BioCreative IV GO shared task (subtask b), focused on finding gene function terms [Gene Ontology (GO) terms] for different genes in an article. The proposed open-IE approach is based on distributional semantic similarity over the GO terms. The method does not require annotated data for training, which makes it highly generalizable. We achieve an F-measure of 0.26 on the test-set in the official submission for BioCreative-GO shared task, the third highest F-measure among the seven participants in the shared task. https://code.google.com/p/rainbow-nlp/",2014-09-10 +22842721,Use of imputed population-based cancer registry data as a method of accounting for missing information: application to estrogen receptor status for breast cancer.,"The National Cancer Institute's Surveillance, Epidemiology, and End Results (SEER) Program provides a rich source of data stratified according to tumor biomarkers that play an important role in cancer surveillance research. These data are useful for analyzing trends in cancer incidence and survival. These tumor markers, however, are often prone to missing observations. To address the problem of missing data, the authors employed sequential regression multivariate imputation for breast cancer variables, with a particular focus on estrogen receptor status, using data from 13 SEER registries covering the period 1992-2007. In this paper, they present an approach to accounting for missing information through the creation of imputed data sets that can be analyzed using existing software (e.g., SEER*Stat) developed for analyzing cancer registry data. Bias in age-adjusted trends in female breast cancer incidence is shown graphically before and after imputation of estrogen receptor status, stratified by age and race. The imputed data set will be made available in SEER*Stat (http://seer.cancer.gov/analysis/index.html) to facilitate accurate estimation of breast cancer incidence trends. To ensure that the imputed data set is used correctly, the authors provide detailed, step-by-step instructions for conducting analyses. This is the first time that a nationally representative, population-based cancer registry data set has been imputed and made available to researchers for conducting a variety of analyses of breast cancer incidence trends.",2012-07-25 +21520941,ScanRanker: Quality assessment of tandem mass spectra via sequence tagging.,"In shotgun proteomics, protein identification by tandem mass spectrometry relies on bioinformatics tools. Despite recent improvements in identification algorithms, a significant number of high quality spectra remain unidentified for various reasons. Here we present ScanRanker, an open-source tool that evaluates the quality of tandem mass spectra via sequence tagging with reliable performance in data from different instruments. The superior performance of ScanRanker enables it not only to find unassigned high quality spectra that evade identification through database search but also to select spectra for de novo sequencing and cross-linking analysis. In addition, we demonstrate that the distribution of ScanRanker scores predicts the richness of identifiable spectra among multiple LC-MS/MS runs in an experiment, and ScanRanker scores assist the process of peptide assignment validation to increase confident spectrum identifications. The source code and executable versions of ScanRanker are available from http://fenchurch.mc.vanderbilt.edu.",2011-04-26 +26130578,Integrating full spectrum of sequence features into predicting functional microRNA-mRNA interactions.,"

Motivation

MicroRNAs (miRNAs) play important roles in general biological processes and diseases pathogenesis. Identifying miRNA target genes is an essential step to fully understand the regulatory effects of miRNAs. Many computational methods based on the sequence complementary rules and the miRNA and mRNA expression profiles have been developed for this purpose. It is noted that there have been many sequence features of miRNA targets available, including the context features of the target sites, the thermodynamic stability and the accessibility energy for miRNA-mRNA interaction. However, most of current computational methods that combine sequence and expression information do not effectively integrate full spectrum of these features; instead, they perceive putative miRNA-mRNA interactions from sequence-based prediction as equally meaningful. Therefore, these sequence features have not been fully utilized for improving miRNA target prediction.

Results

We propose a novel regularized regression approach that is based on the adaptive Lasso procedure for detecting functional miRNA-mRNA interactions. Our method fully takes into account the gene sequence features and the miRNA and mRNA expression profiles. Given a set of sequence features for each putative miRNA-mRNA interaction and their expression values, our model quantifies the down-regulation effect of each miRNA on its targets while simultaneously estimating the contribution of each sequence feature to predicting functional miRNA-mRNA interactions. By applying our model to the expression datasets from two cancer studies, we have demonstrated our prediction results have achieved better sensitivity and specificity and are more biologically meaningful compared with those based on other methods.

Availability and implementation

The source code is available at: http://nba.uth.tmc.edu/homepage/liu/miRNALasso.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

Yin.Liu@uth.tmc.edu.",2015-06-30 +23153250,"MTMDAT-HADDOCK: high-throughput, protein complex structure modeling based on limited proteolysis and mass spectrometry.","

Background

MTMDAT is a program designed to facilitate analysis of mass spectrometry data of proteins and biomolecular complexes that are probed structurally by limited proteolysis. This approach can provide information about stable fragments of multidomain proteins, yield tertiary and quaternary structure data, and help determine the origin of stability changes at the amino acid residue level. Here, we introduce a pipeline between MTMDAT and HADDOCK, that facilitates protein-protein complex structure probing in a high-throughput and highly automated fashion.

Results

A new feature of MTMDAT allows for the direct identification of residues that are involved in complex formation by comparing the mass spectra of bound and unbound proteins after proteolysis. If 3D structures of the unbound components are available, this data can be used to define restraints for data-driven docking to calculate a model of the complex. We describe here a new implementation of MTMDAT, which includes a pipeline to the data-driven docking program HADDOCK, thus streamlining the entire procedure. This addition, together with usability improvements in MTMDAT, enables high-throughput modeling of protein complexes from mass spectrometry data. The algorithm has been validated by using the protein-protein interaction between the ubiquitin-binding domain of proteasome component Rpn13 and ubiquitin. The resulting structural model, based on restraints extracted by MTMDAT from limited proteolysis and modeled by HADDOCK, was compared to the published NMR structure, which relied on twelve unambiguous intermolecular NOE interactions. The MTMDAT-HADDOCK structure was of similar quality to structures generated using only chemical shift perturbation data derived by NMR titration experiments.

Conclusions

The new MTMDAT-HADDOCK pipeline enables direct high-throughput modeling of protein complexes from mass spectrometry data. MTMDAT-HADDOCK can be downloaded from http://www.ifm.liu.se/chemistry/molbiotech/maria_sunnerhagens_group/mtmdat/together with the manual and example files. The program is free for academic/non-commercial purposes.",2012-11-15 +23775795,miRspring: a compact standalone research tool for analyzing miRNA-seq data.,"High-throughput sequencing for microRNA (miRNA) profiling has revealed a vast complexity of miRNA processing variants, but these are difficult to discern for those without bioinformatics expertise and large computing capability. In this article, we present miRNA Sequence Profiling (miRspring) (http://mirspring.victorchang.edu.au), a software solution that creates a small portable research document that visualizes, calculates and reports on the complexities of miRNA processing. We designed an index-compression algorithm that allows the miRspring document to reproduce a complete miRNA sequence data set while retaining a small file size (typically <3 MB). Through analysis of 73 public data sets, we demonstrate miRspring's features in assessing quality parameters, miRNA cluster expression levels and miRNA processing. Additionally, we report on a new class of miRNA variants, which we term seed-isomiRs, identified through the novel visualization tools of the miRspring document. Further investigation identified that ∼30% of human miRBase entries are likely to have a seed-isomiR. We believe that miRspring will be a highly useful research tool that will enhance the analysis of miRNA data sets and thus increase our understanding of miRNA biology.",2013-06-17 +23166502,Inference of population splits and mixtures from genome-wide allele frequency data.,"Many aspects of the historical relationships between populations in a species are reflected in genetic data. Inferring these relationships from genetic data, however, remains a challenging task. In this paper, we present a statistical model for inferring the patterns of population splits and mixtures in multiple populations. In our model, the sampled populations in a species are related to their common ancestor through a graph of ancestral populations. Using genome-wide allele frequency data and a Gaussian approximation to genetic drift, we infer the structure of this graph. We applied this method to a set of 55 human populations and a set of 82 dog breeds and wild canids. In both species, we show that a simple bifurcating tree does not fully describe the data; in contrast, we infer many migration events. While some of the migration events that we find have been detected previously, many have not. For example, in the human data, we infer that Cambodians trace approximately 16% of their ancestry to a population ancestral to other extant East Asian populations. In the dog data, we infer that both the boxer and basenji trace a considerable fraction of their ancestry (9% and 25%, respectively) to wolves subsequent to domestication and that East Asian toy breeds (the Shih Tzu and the Pekingese) result from admixture between modern toy breeds and ""ancient"" Asian breeds. Software implementing the model described here, called TreeMix, is available at http://treemix.googlecode.com.",2012-11-15 +24825613,SIBIS: a Bayesian model for inconsistent protein sequence estimation.,"

Motivation

The prediction of protein coding genes is a major challenge that depends on the quality of genome sequencing, the accuracy of the model used to elucidate the exonic structure of the genes and the complexity of the gene splicing process leading to different protein variants. As a consequence, today's protein databases contain a huge amount of inconsistency, due to both natural variants and sequence prediction errors.

Results

We have developed a new method, called SIBIS, to detect such inconsistencies based on the evolutionary information in multiple sequence alignments. A Bayesian framework, combined with Dirichlet mixture models, is used to estimate the probability of observing specific amino acids and to detect inconsistent or erroneous sequence segments. We evaluated the performance of SIBIS on a reference set of protein sequences with experimentally validated errors and showed that the sensitivity is significantly higher than previous methods, with only a small loss of specificity. We also assessed a large set of human sequences from the UniProt database and found evidence of inconsistency in 48% of the previously uncharacterized sequences. We conclude that the integration of quality control methods like SIBIS in automatic analysis pipelines will be critical for the robust inference of structural, functional and phylogenetic information from these sequences.

Availability and implementation

Source code, implemented in C on a linux system, and the datasets of protein sequences are freely available for download at http://www.lbgi.fr/∼julie/SIBIS.",2014-05-13 +21258060,"WIST: toolkit for rapid, customized LIMS development.","

Unlabelled

Workflow Information Storage Toolkit (WIST) is a set of application programming interfaces and web applications that allow for the rapid development of customized laboratory information management systems (LIMS). WIST provides common LIMS input components, and allows them to be arranged and configured using a flexible language that specifies each component's visual and semantic characteristics. WIST includes a complete set of web applications for adding, editing and viewing data, as well as a powerful setup tool that can build new LIMS modules by analyzing existing database schema.

Availability and implementation

WIST is implemented in Perl and may be obtained from http://vimss.sf.net under the BSD license.",2011-01-21 +24577429,Gene expression pattern for putative chloroplast localized COPII related proteins with emphasis on Rab related proteins.,"Vesicle transport occurs in the cytosol through COPI, COPII and a clathrin coated vesicle system for transport of lipids and proteins to different subcellular compartments. All three systems consist of several different protein components to maintain a functional transport. In chloroplasts photosynthesis takes place in thylakoids. Thylakoids contain a large amount of lipids and proteins but none of these components are produced there. Transport of lipids occurs from the envelope membrane where they are produced and through the aqueous stroma before being directed to the thylakoids. Nuclear encoded proteins use distinct pathways for entering thylakoids after import into chloroplasts. Transport of lipids through stroma requires either lipid transfer proteins, association between the envelope and the thylakoid membrane, or a vesicle transport system similar to the cytosolic one. No evidence exists for lipid transfer proteins in chloroplasts, nor for a consistent association between the envelope and the thylakoid membrane. However, vesicle transport has support from e.g., biochemical and genetics data as well as transelectron microscopy data. Moreover, a recent bioinformatics study revealed putatively COPII related proteins to be chloroplast localized in Arabidopsis and thus function in vesicle transport in chloroplasts. Here we present gene expression profiles of these putatively COPII related chloroplast localized proteins using Genevestigator (https://www.genevestigator.com/gv/) with special emphasis on Rab related proteins since they represent several stage of vesicle transport e.g., uncoating, tethering and fusion.",2014-02-26 +22732065,NetWalker: a contextual network analysis tool for functional genomics.,"

Background

Functional analyses of genomic data within the context of a priori biomolecular networks can give valuable mechanistic insights. However, such analyses are not a trivial task, owing to the complexity of biological networks and lack of computational methods for their effective integration with experimental data.

Results

We developed a software application suite, NetWalker, as a one-stop platform featuring a number of novel holistic (i.e. assesses the whole data distribution without requiring data cutoffs) data integration and analysis methods for network-based comparative interpretations of genome-scale data. The central analysis components, NetWalk and FunWalk, are novel random walk-based network analysis methods that provide unique analysis capabilities to assess the entire data distributions together with network connectivity to prioritize molecular and functional networks, respectively, most highlighted in the supplied data. Extensive inter-operability between the analysis components and with external applications, including R, adds to the flexibility of data analyses. Here, we present a detailed computational analysis of our microarray gene expression data from MCF7 cells treated with lethal and sublethal doses of doxorubicin.

Conclusion

NetWalker, a detailed step-by-step tutorial containing the analyses presented in this paper and a manual are available at the web site http://netwalkersuite.org.",2012-06-25 +25344051,Elevated CXCL1 expression in breast cancer stroma predicts poor prognosis and is inversely associated with expression of TGF-β signaling proteins.,"

Background

CXCL1 is a chemotactic cytokine shown to regulate breast cancer progression and chemo-resistance. However, the prognostic significance of CXCL1 expression in breast cancer has not been fully characterized. Fibroblasts are important cellular components of the breast tumor microenvironment, and recent studies indicate that this cell type is a potential source of CXCL1 expression in breast tumors. The goal of this study was to further characterize the expression patterns of CXCL1 in breast cancer stroma, determine the prognostic significance of stromal CXCL1 expression, and identify factors affecting stromal CXCL1 expression.

Methods

Stromal CXCL1 protein expression was analyzed in 54 normal and 83 breast carcinomas by immunohistochemistry staining. RNA expression of CXCL1 in breast cancer stroma was analyzed through data mining in http://www.Oncomine.org. The relationships between CXCL1 expression and prognostic factors were analyzed by univariate analysis. Co-immunofluorescence staining for CXCL1, α-Smooth Muscle Actin (α-SMA) and Fibroblast Specific Protein 1 (FSP1) expression was performed to analyze expression of CXCL1 in fibroblasts. By candidate profiling, the TGF-β signaling pathway was identified as a regulator of CXCL1 expression in fibroblasts. Expression of TGF-β and SMAD gene products were analyzed by immunohistochemistry and data mining analysis. The relationships between stromal CXCL1 and TGF-β signaling components were analyzed by univariate analysis. Carcinoma associated fibroblasts isolated from MMTV-PyVmT mammary tumors were treated with recombinant TGF-β and analyzed for CXCL1 promoter activity by luciferase assay, and protein secretion by ELISA.

Results

Elevated CXCL1 expression in breast cancer stroma correlated with tumor grade, disease recurrence and decreased patient survival. By co-immunofluorescence staining, CXCL1 expression overlapped with expression of α-SMA and FSP1 proteins. Expression of stromal CXCL1 protein expression inversely correlated with expression of TGF-β signaling components. Treatment of fibroblasts with TGF-β suppressed CXCL1 secretion and promoter activity.

Conclusions

Increased CXCL1 expression in breast cancer stroma correlates with poor patient prognosis. Furthermore, CXCL1 expression is localized to α-SMA and FSP1 positive fibroblasts, and is negatively regulated by TGF-β signaling. These studies indicate that decreased TGF-β signaling in carcinoma associated fibroblasts enhances CXCL1 expression in fibroblasts, which could contribute to breast cancer progression.",2014-10-24 +24751218,Allosteric regulation of phosphofructokinase controls the emergence of glycolytic oscillations in isolated yeast cells.,"

Unlabelled

Oscillations are widely distributed in nature and synchronization of oscillators has been described at the cellular level (e.g. heart cells) and at the population level (e.g. fireflies). Yeast glycolysis is the best known oscillatory system, although it has been studied almost exclusively at the population level (i.e. limited to observations of average behaviour in synchronized cultures). We studied individual yeast cells that were positioned with optical tweezers in a microfluidic chamber to determine the precise conditions for autonomous glycolytic oscillations. Hopf bifurcation points were determined experimentally in individual cells as a function of glucose and cyanide concentrations. The experiments were analyzed in a detailed mathematical model and could be interpreted in terms of an oscillatory manifold in a three-dimensional state-space; crossing the boundaries of the manifold coincides with the onset of oscillations and positioning along the longitudinal axis of the volume sets the period. The oscillatory manifold could be approximated by allosteric control values of phosphofructokinase for ATP and AMP.

Database

The mathematical models described here have been submitted to the JWS Online Cellular Systems Modelling Database and can be accessed at http://jjj.mib.ac.uk/webMathematica/UItester.jsp?modelName=gustavsson5. [Database section added 14 May 2014 after original online publication].",2014-05-12 +25699093,VERSE: a novel approach to detect virus integration in host genomes through reference genome customization.,"Fueled by widespread applications of high-throughput next generation sequencing (NGS) technologies and urgent need to counter threats of pathogenic viruses, large-scale studies were conducted recently to investigate virus integration in host genomes (for example, human tumor genomes) that may cause carcinogenesis or other diseases. A limiting factor in these studies, however, is rapid virus evolution and resulting polymorphisms, which prevent reads from aligning readily to commonly used virus reference genomes, and, accordingly, make virus integration sites difficult to detect. Another confounding factor is host genomic instability as a result of virus insertions. To tackle these challenges and improve our capability to identify cryptic virus-host fusions, we present a new approach that detects Virus intEgration sites through iterative Reference SEquence customization (VERSE). To the best of our knowledge, VERSE is the first approach to improve detection through customizing reference genomes. Using 19 human tumors and cancer cell lines as test data, we demonstrated that VERSE substantially enhanced the sensitivity of virus integration site detection. VERSE is implemented in the open source package VirusFinder 2 that is available at http://bioinfo.mc.vanderbilt.edu/VirusFinder/.",2015-01-20 +25042419,Predictors of adverse events among patients undergoing primary percutaneous coronary intervention: insights from a pooled analysis of the COMFORTABLE AMI and EXAMINATION trials.,"

Aims

The aim of this study was to identify predictors of adverse events among patients with ST-elevation myocardial infarction (STEMI) undergoing contemporary primary percutaneous coronary intervention (PCI).

Methods and results

Individual data of 2,655 patients from two primary PCI trials (EXAMINATION, N=1,504; COMFORTABLE AMI, N=1,161) with identical endpoint definitions and event adjudication were pooled. Predictors of all-cause death or any reinfarction and definite stent thrombosis (ST) and target lesion revascularisation (TLR) outcomes at one year were identified by multivariable Cox regression analysis. Killip class III or IV was the strongest predictor of all-cause death or any reinfarction (OR 5.11, 95% CI: 2.48-10.52), definite ST (OR 7.74, 95% CI: 2.87-20.93), and TLR (OR 2.88, 95% CI: 1.17-7.06). Impaired left ventricular ejection fraction (OR 4.77, 95% CI: 2.10-10.82), final TIMI flow 0-2 (OR 1.93, 95% CI: 1.05-3.54), arterial hypertension (OR 1.69, 95% CI: 1.11-2.59), age (OR 1.68, 95% CI: 1.41-2.01), and peak CK (OR 1.25, 95% CI: 1.02-1.54) were independent predictors of all-cause death or any reinfarction. Allocation to treatment with DES was an independent predictor of a lower risk of definite ST (OR 0.35, 95% CI: 0.16-0.74) and any TLR (OR 0.34, 95% CI: 0.21-0.54).

Conclusions

Killip class remains the strongest predictor of all-cause death or any reinfarction among STEMI patients undergoing primary PCI. DES use independently predicts a lower risk of TLR and definite ST compared with BMS. The COMFORTABLE AMI trial is registered at: http://www.clinicaltrials.gov/ct2/show/NCT00962416. The EXAMINATION trial is registered at: http://www.clinicaltrials.gov/ct2/show/NCT00828087.",2015-08-01 +25883046,RNA-Puzzles Round II: assessment of RNA structure prediction programs applied to three large RNA structures.,"This paper is a report of a second round of RNA-Puzzles, a collective and blind experiment in three-dimensional (3D) RNA structure prediction. Three puzzles, Puzzles 5, 6, and 10, represented sequences of three large RNA structures with limited or no homology with previously solved RNA molecules. A lariat-capping ribozyme, as well as riboswitches complexed to adenosylcobalamin and tRNA, were predicted by seven groups using RNAComposer, ModeRNA/SimRNA, Vfold, Rosetta, DMD, MC-Fold, 3dRNA, and AMBER refinement. Some groups derived models using data from state-of-the-art chemical-mapping methods (SHAPE, DMS, CMCT, and mutate-and-map). The comparisons between the predictions and the three subsequently released crystallographic structures, solved at diffraction resolutions of 2.5-3.2 Å, were carried out automatically using various sets of quality indicators. The comparisons clearly demonstrate the state of present-day de novo prediction abilities as well as the limitations of these state-of-the-art methods. All of the best prediction models have similar topologies to the native structures, which suggests that computational methods for RNA structure prediction can already provide useful structural information for biological problems. However, the prediction accuracy for non-Watson-Crick interactions, key to proper folding of RNAs, is low and some predicted models had high Clash Scores. These two difficulties point to some of the continuing bottlenecks in RNA structure prediction. All submitted models are available for download at http://ahsoka.u-strasbg.fr/rnapuzzles/.",2015-04-16 +25756864,SEnviro: a sensorized platform proposal using open hardware and open standards.,"The need for constant monitoring of environmental conditions has produced an increase in the development of wireless sensor networks (WSN). The drive towards smart cities has produced the need for smart sensors to be able to monitor what is happening in our cities. This, combined with the decrease in hardware component prices and the increase in the popularity of open hardware, has favored the deployment of sensor networks based on open hardware. The new trends in Internet Protocol (IP) communication between sensor nodes allow sensor access via the Internet, turning them into smart objects (Internet of Things and Web of Things). Currently, WSNs provide data in different formats. There is a lack of communication protocol standardization, which turns into interoperability issues when connecting different sensor networks or even when connecting different sensor nodes within the same network. This work presents a sensorized platform proposal that adheres to the principles of the Internet of Things and theWeb of Things. Wireless sensor nodes were built using open hardware solutions, and communications rely on the HTTP/IP Internet protocols. The Open Geospatial Consortium (OGC) SensorThings API candidate standard was used as a neutral format to avoid interoperability issues. An environmental WSN developed following the proposed architecture was built as a proof of concept. Details on how to build each node and a study regarding energy concerns are presented.",2015-03-06 +26496690,"Developmental Effects of the ToxCast™ Phase I and Phase II Chemicals in Caenorhabditis elegans and Corresponding Responses in Zebrafish, Rats, and Rabbits.","

Background

Modern toxicology is shifting from an observational to a mechanistic science. As part of this shift, high-throughput toxicity assays are being developed using alternative, nonmammalian species to prioritize chemicals and develop prediction models of human toxicity.

Methods

The nematode Caenorhabditis elegans (C. elegans) was used to screen the U.S. Environmental Protection Agency's (EPA's) ToxCast™ Phase I and Phase II libraries, which contain 292 and 676 chemicals, respectively, for chemicals leading to decreased larval development and growth. Chemical toxicity was evaluated using three parameters: a biologically defined effect size threshold, half-maximal activity concentration (AC50), and lowest effective concentration (LEC).

Results

Across both the Phase I and Phase II libraries, 62% of the chemicals were classified as active ≤ 200 μM in the C. elegans assay. Chemical activities and potencies in C. elegans were compared with those from two zebrafish embryonic development toxicity studies and developmental toxicity data for rats and rabbits. Concordance of chemical activity was higher between C. elegans and one zebrafish assay across Phase I chemicals (79%) than with a second zebrafish assay (59%). Using C. elegans or zebrafish to predict rat or rabbit developmental toxicity resulted in balanced accuracies (the average value of the sensitivity and specificity for an assay) ranging from 45% to 53%, slightly lower than the concordance between rat and rabbit (58%).

Conclusions

Here, we present an assay that quantitatively and reliably describes the effects of chemical toxicants on C. elegans growth and development. We found significant overlap in the activity of chemicals in the ToxCast™ libraries between C. elegans and zebrafish developmental screens. Incorporating C. elegans toxicological assays as part of a battery of in vitro and in vivo assays provides additional information for the development of models to predict a chemical's potential toxicity to humans.

Citation

Boyd WA, Smith MV, Co CA, Pirone JR, Rice JR, Shockley KR, Freedman JH. 2016. Developmental effects of the ToxCast™ Phase I and II chemicals in Caenorhabditis elegans and corresponding responses in zebrafish, rats, and rabbits. Environ Health Perspect 124:586-593; http://dx.doi.org/10.1289/ehp.1409645.",2015-10-23 +24149051,Exploring high dimensional data with Butterfly: a novel classification algorithm based on discrete dynamical systems.,"

Motivation

We introduce a novel method for visualizing high dimensional data via a discrete dynamical system. This method provides a 2D representation of the relationship between subjects according to a set of variables without geometric projections, transformed axes or principal components. The algorithm exploits a memory-type mechanism inherent in a certain class of discrete dynamical systems collectively referred to as the chaos game that are closely related to iterative function systems. The goal of the algorithm was to create a human readable representation of high dimensional patient data that was capable of detecting unrevealed subclusters of patients from within anticipated classifications. This provides a mechanism to further pursue a more personalized exploration of pathology when used with medical data. For clustering and classification protocols, the dynamical system portion of the algorithm is designed to come after some feature selection filter and before some model evaluation (e.g. clustering accuracy) protocol. In the version given here, a univariate features selection step is performed (in practice more complex feature selection methods are used), a discrete dynamical system is driven by this reduced set of variables (which results in a set of 2D cluster models), these models are evaluated for their accuracy (according to a user-defined binary classification) and finally a visual representation of the top classification models are returned. Thus, in addition to the visualization component, this methodology can be used for both supervised and unsupervised machine learning as the top performing models are returned in the protocol we describe here.

Results

Butterfly, the algorithm we introduce and provide working code for, uses a discrete dynamical system to classify high dimensional data and provide a 2D representation of the relationship between subjects. We report results on three datasets (two in the article; one in the appendix) including a public lung cancer dataset that comes along with the included Butterfly R package. In the included R script, a univariate feature selection method is used for the dimension reduction step, but in the future we wish to use a more powerful multivariate feature reduction method based on neural networks (Kriesel, 2007).

Availability and implementation

A script written in R (designed to run on R studio) accompanies this article that implements this algorithm and is available at http://butterflygeraci.codeplex.com/. For details on the R package or for help installing the software refer to the accompanying document, Supporting Material and Appendix.",2013-10-21 +26740951,Injury Risk Estimation Expertise: Interdisciplinary Differences in Performance on the ACL Injury Risk Estimation Quiz.,"

Background

Simple observational assessment of movement is a potentially low-cost method for anterior cruciate ligament (ACL) injury screening and prevention. Although many individuals utilize some form of observational assessment of movement, there are currently no substantial data on group skill differences in observational screening of ACL injury risk.

Purpose/hypothesis

The purpose of this study was to compare various groups' abilities to visually assess ACL injury risk as well as the associated strategies and ACL knowledge levels. The hypothesis was that sports medicine professionals would perform better than coaches and exercise science academics/students and that these subgroups would all perform better than parents and other general population members.

Study design

Cross-sectional study; Level of evidence, 3.

Methods

A total of 428 individuals, including physicians, physical therapists, athletic trainers, strength and conditioning coaches, exercise science researchers/students, athletes, parents, and members of the general public participated in the study. Participants completed the ACL Injury Risk Estimation Quiz (ACL-IQ) and answered questions related to assessment strategy and ACL knowledge.

Results

Strength and conditioning coaches, athletic trainers, physical therapists, and exercise science students exhibited consistently superior ACL injury risk estimation ability (+2 SD) as compared with sport coaches, parents of athletes, and members of the general public. The performance of a substantial number of individuals in the exercise sciences/sports medicines (approximately 40%) was similar to or exceeded clinical instrument-based biomechanical assessment methods (eg, ACL nomogram). Parents, sport coaches, and the general public had lower ACL-IQ, likely due to their lower ACL knowledge and to rating the importance of knee/thigh motion lower and weight and jump height higher.

Conclusion

Substantial cross-professional/group differences in visual ACL injury risk estimation exist. The relatively profound differences in injury risk estimation accuracy and their potential implications for risk screening suggest the need for additional training and outreach (see http://www.ACL-IQ.org).

Clinical relevance

Parents and sport coaches would likely benefit from training or use of decision support tools such as the ACL nomogram to assess ACL injury risk. In addition, physicians and other sports medicine professionals may also benefit from improving risk estimation performance to reach clinical biomechanical standards.",2015-11-16 +21674231,"Metaxa: a software tool for automated detection and discrimination among ribosomal small subunit (12S/16S/18S) sequences of archaea, bacteria, eukaryotes, mitochondria, and chloroplasts in metagenomes and environmental sequencing datasets.","The ribosomal small subunit (SSU) rRNA gene has emerged as an important genetic marker for taxonomic identification in environmental sequencing datasets. In addition to being present in the nucleus of eukaryotes and the core genome of prokaryotes, the gene is also found in the mitochondria of eukaryotes and in the chloroplasts of photosynthetic eukaryotes. These three sets of genes are conceptually paralogous and should in most situations not be aligned and analyzed jointly. To identify the origin of SSU sequences in complex sequence datasets has hitherto been a time-consuming and largely manual undertaking. However, the present study introduces Metaxa ( http://microbiology.se/software/metaxa/ ), an automated software tool to extract full-length and partial SSU sequences from larger sequence datasets and assign them to an archaeal, bacterial, nuclear eukaryote, mitochondrial, or chloroplast origin. Using data from reference databases and from full-length organelle and organism genomes, we show that Metaxa detects and scores SSU sequences for origin with very low proportions of false positives and negatives. We believe that this tool will be useful in microbial and evolutionary ecology as well as in metagenomics.",2011-06-15 +25371430,IQ-TREE: a fast and effective stochastic algorithm for estimating maximum-likelihood phylogenies.,"Large phylogenomics data sets require fast tree inference methods, especially for maximum-likelihood (ML) phylogenies. Fast programs exist, but due to inherent heuristics to find optimal trees, it is not clear whether the best tree is found. Thus, there is need for additional approaches that employ different search strategies to find ML trees and that are at the same time as fast as currently available ML programs. We show that a combination of hill-climbing approaches and a stochastic perturbation method can be time-efficiently implemented. If we allow the same CPU time as RAxML and PhyML, then our software IQ-TREE found higher likelihoods between 62.2% and 87.1% of the studied alignments, thus efficiently exploring the tree-space. If we use the IQ-TREE stopping rule, RAxML and PhyML are faster in 75.7% and 47.1% of the DNA alignments and 42.2% and 100% of the protein alignments, respectively. However, the range of obtaining higher likelihoods with IQ-TREE improves to 73.3-97.1%. IQ-TREE is freely available at http://www.cibiv.at/software/iqtree.",2014-11-03 +23418184,A powerful Bayesian meta-analysis method to integrate multiple gene set enrichment studies.,"

Motivation

Much research effort has been devoted to the identification of enriched gene sets for microarray experiments. However, identified gene sets are often found to be inconsistent among independent studies. This is probably owing to the noisy data of microarray experiments coupled with small sample sizes of individual studies. Therefore, combining information from multiple studies is likely to improve the detection of truly enriched gene classes. As more and more data become available, it calls for statistical methods to integrate information from multiple studies, also known as meta-analysis, to improve the power of identifying enriched gene sets.

Results

We propose a Bayesian model that provides a coherent framework for joint modeling of both gene set information and gene expression data from multiple studies, to improve the detection of enriched gene sets by leveraging information from different sources available. One distinct feature of our method is that it directly models the gene expression data, instead of using summary statistics, when synthesizing studies. Besides, the proposed model is flexible and offers an appropriate treatment of between-study heterogeneities that frequently arise in the meta-analysis of microarray experiments. We show that under our Bayesian model, the full posterior conditionals all have known distributions, which greatly facilitates the MCMC computation. Simulation results show that the proposed method can improve the power of gene set enrichment meta-analysis, as opposed to existing methods developed by Shen and Tseng (2010, Bioinformatics, 26, 1316-1323), and it is not sensitive to mild or moderate deviations from the distributional assumption for gene expression data. We illustrate the proposed method through an application of combining eight lung cancer datasets for gene set enrichment analysis, which demonstrates the usefulness of the method.

Availability

http://qbrc.swmed.edu/software/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-15 +24816342,dbCRY: a Web-based comparative and evolutionary genomics platform for blue-light receptors.,"Cryptochromes are flavoproteins that play a central role in the circadian oscillations of all living organisms except archaea. Cryptochromes are clustered into three subfamilies: plant-type cryptochromes, animal-type cryptochromes and cryptochrome-DASH proteins. These subfamilies are composed of photolyase/cryptochrome superfamily with 6-4 photolyase and cyclobutane pyrimidine dimer photolyase. Cryptochromes have conserved domain architectures with two distinct domains, an N-terminal photolyase-related domain and a C-terminal domain. Although the molecular function and domain architecture of cryptochromes are conserved, their molecular mechanisms differ between plants and animals. Thus, cryptochromes are one of the best candidates for comparative and evolutionary studies. Here, we have developed a Web-based platform for comparative and evolutionary studies of cryptochromes, dbCRY (http://www.dbcryptochrome.org/). A pipeline built upon the consensus domain profile was applied to 1438 genomes and identified 1309 genes. To support comparative and evolutionary genomics studies, the Web interface provides diverse functions such as (i) browsing by species, (ii) protein domain analysis, (iii) multiple sequence alignment, (iv) homology search and (v) extended analysis opportunities through the implementation of 'Favorite Browser' powered by the Comparative Fungal Genomics Platform 2.0 (CFGP 2.0; http://cfgp.snu.ac.kr/). dbCRY would serve as a standardized and systematic solution for cryptochrome genomics studies. Database URL: http://www.dbcryptochrome.org/",2014-05-09 +23558750,Probabilistic error correction for RNA sequencing.,"Sequencing of RNAs (RNA-Seq) has revolutionized the field of transcriptomics, but the reads obtained often contain errors. Read error correction can have a large impact on our ability to accurately assemble transcripts. This is especially true for de novo transcriptome analysis, where a reference genome is not available. Current read error correction methods, developed for DNA sequence data, cannot handle the overlapping effects of non-uniform abundance, polymorphisms and alternative splicing. Here we present SEquencing Error CorrEction in Rna-seq data (SEECER), a hidden Markov Model (HMM)-based method, which is the first to successfully address these problems. SEECER efficiently learns hundreds of thousands of HMMs and uses these to correct sequencing errors. Using human RNA-Seq data, we show that SEECER greatly improves on previous methods in terms of quality of read alignment to the genome and assembly accuracy. To illustrate the usefulness of SEECER for de novo transcriptome studies, we generated new RNA-Seq data to study the development of the sea cucumber Parastichopus parvimensis. Our corrected assembled transcripts shed new light on two important stages in sea cucumber development. Comparison of the assembled transcripts to known transcripts in other species has also revealed novel transcripts that are unique to sea cucumber, some of which we have experimentally validated. Supporting website: http://sb.cs.cmu.edu/seecer/.",2013-04-04 +24273012,SpliceProt: a protein sequence repository of predicted human splice variants.,"The mechanism of alternative splicing in the transcriptome may increase the proteome diversity in eukaryotes. In proteomics, several studies aim to use protein sequence repositories to annotate MS experiments or to detect differentially expressed proteins. However, the available protein sequence repositories are not designed to fully detect protein isoforms derived from mRNA splice variants. To foster knowledge for the field, here we introduce SpliceProt, a new protein sequence repository of transcriptome experimental data used to investigate for putative splice variants in human proteomes. Current version of SpliceProt contains 159 719 non-redundant putative polypeptide sequences. The assessment of the potential of SpliceProt in detecting new protein isoforms resulting from alternative splicing was performed by using publicly available proteomics data. We detected 173 peptides hypothetically derived from splice variants, which 54 of them are not present in UniprotKB/TrEMBL sequence repository. In comparison to other protein sequence repositories, SpliceProt contains a greater number of unique peptides and is able to detect more splice variants. Therefore, SpliceProt provides a solution for the annotation of proteomics experiments regarding splice isofoms. The repository files containing the translated sequences of the predicted splice variants and a visualization tool are freely available at http://lbbc.inca.gov.br/spliceprot.",2014-02-01 +24172133,Inference of the properties of the recombination process from whole bacterial genomes.,"Patterns of linkage disequilibrium, homoplasy, and incompatibility are difficult to interpret because they depend on several factors, including the recombination process and the population structure. Here we introduce a novel model-based framework to infer recombination properties from such summary statistics in bacterial genomes. The underlying model is sequentially Markovian so that data can be simulated very efficiently, and we use approximate Bayesian computation techniques to infer parameters. As this does not require us to calculate the likelihood function, the model can be easily extended to investigate less probed aspects of recombination. In particular, we extend our model to account for the bias in the recombination process whereby closely related bacteria recombine more often with one another. We show that this model provides a good fit to a data set of Bacillus cereus genomes and estimate several recombination properties, including the rate of bias in recombination. All the methods described in this article are implemented in a software package that is freely available for download at http://code.google.com/p/clonalorigin/.",2013-10-30 +21762169,Development of an aquatic pathogen database (AquaPathogen X) and its utilization in tracking emerging fish virus pathogens in North America.,"The AquaPathogen X database is a template for recording information on individual isolates of aquatic pathogens and is freely available for download (http://wfrc.usgs.gov). This database can accommodate the nucleotide sequence data generated in molecular epidemiological studies along with the myriad of abiotic and biotic traits associated with isolates of various pathogens (e.g. viruses, parasites and bacteria) from multiple aquatic animal host species (e.g. fish, shellfish and shrimp). The cataloguing of isolates from different aquatic pathogens simultaneously is a unique feature to the AquaPathogen X database, which can be used in surveillance of emerging aquatic animal diseases and elucidation of key risk factors associated with pathogen incursions into new water systems. An application of the template database that stores the epidemiological profiles of fish virus isolates, called Fish ViroTrak, was also developed. Exported records for two aquatic rhabdovirus species emerging in North America were used in the implementation of two separate web-accessible databases: the Molecular Epidemiology of Aquatic Pathogens infectious haematopoietic necrosis virus (MEAP-IHNV) database (http://gis.nacse.org/ihnv/) released in 2006 and the MEAP- viral haemorrhagic septicaemia virus (http://gis.nacse.org/vhsv/) database released in 2010.",2011-08-01 +26104511,Epigenetics could explain some Moroccan population colorectal cancers peculiarities: microsatellite instability pathway exploration.,"

Background

Colorectal Cancers (CRC) are one of the most common malignancies in the world. Their incidence in Morocco, between 2005 and 2007, was 5.6 for 100000 inhabitants, which is very low compared to what found in developed countries. In addition, CRCs show a high frequency of rectal localizations, and occurs in a younger population in Morocco compared to what found in developed countries. The purpose of this study is to confirm these CRC peculiarities in Morocco and try to explain them by exploring the microsatellite instability molecular pathway.

Methods

This is a prospective observational study conducted since January 2010, including 385 patients admitted in Hassan II University Hospital of Fez. We collected clinical, radiological and pathological data. We investigated the expression of mismatch repair (MMR) proteins in 214 patients and BRAF gene mutations in 159 patients.

Results

Mean age was 55.08 +/- 15.16 years. 36.5% of patients were less than 50 years old and 49.3% of tumors were localized in the rectum. Loss of MMR protein expression was observed in 11.2% of cases. It was independently associated with individual or family history of cancer belonging to Hereditary Non-Polyposis Colorectal Cancer (HNPCC) spectrum (p = 0.01) and proximal localization (p = 0.02). No BRAF mutation was detected in all cases.

Conclusions

These results confirm the high occurrence of CRCs to young patients and the high frequency of rectal localizations in Moroccan population. They mostly show an absence of BRAF mutation, supposing a rarity of MLH1 promoter hypermethylation pathway, which may even partially explain the CRC peculiarities in our context.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/5868184711716884.",2015-06-24 +22544465,Computational approaches to understanding dendritic cell responses to influenza virus infection.,"The evolution of immunology research from measurements of single entities to large-scale data-intensive assays necessitates the integration of experimental work with bioinformatics and computational approaches. The introduction of physics into immunology has led to the study of new phenomena, such as cellular noise, which is likely to prove increasingly important to understand immune system responses. The fusion of ""hard science"" and biology is also leading to a re-examination of data acquisition, analysis, and statistical validation and is resulting in the development of easy-to-access tools for immunology research. Here, we review some of our models, computational tools, and results related to studies of the innate immune response of human dendritic cells to viral infection. Our project functions on an open model across institutions with electronic record keeping and public sharing of data. Our tools, models, and data can be accessed at http://tsb.mssm.edu/primeportal/ .",2012-12-01 +22954627,Genome-wide in silico prediction of gene expression.,"

Motivation

Modelling the regulation of gene expression can provide insight into the regulatory roles of individual transcription factors (TFs) and histone modifications. Recently, Ouyang et al. in 2009 modelled gene expression levels in mouse embryonic stem (mES) cells using in vivo ChIP-seq measurements of TF binding. ChIP-seq TF binding data, however, are tissue-specific and relatively difficult to obtain. This limits the applicability of gene expression models that rely on ChIP-seq TF binding data.

Results

In this study, we build regression-based models that relate gene expression to the binding of 12 different TFs, 7 histone modifications and chromatin accessibility (DNase I hypersensitivity) in two different tissues. We find that expression models based on computationally predicted TF binding can achieve similar accuracy to those using in vivo TF binding data and that including binding at weak sites is critical for accurate prediction of gene expression. We also find that incorporating histone modification and chromatin accessibility data results in additional accuracy. Surprisingly, we find that models that use no TF binding data at all, but only histone modification and chromatin accessibility data, can be as (or more) accurate than those based on in vivo TF binding data.

Availability and implementation

All scripts, motifs and data presented in this article are available online at http://research.imb.uq.edu.au/t.bailey/supplementary_data/McLeay2011a.",2012-09-06 +24928559,Detecting overlapping protein complexes based on a generative model with functional and topological properties.,"

Background

Identification of protein complexes can help us get a better understanding of cellular mechanism. With the increasing availability of large-scale protein-protein interaction (PPI) data, numerous computational approaches have been proposed to detect complexes from the PPI networks. However, most of the current approaches do not consider overlaps among complexes or functional annotation information of individual proteins. Therefore, they might not be able to reflect the biological reality faithfully or make full use of the available domain-specific knowledge.

Results

In this paper, we develop a Generative Model with Functional and Topological Properties (GMFTP) to describe the generative processes of the PPI network and the functional profile. The model provides a working mechanism for capturing the interaction structures and the functional patterns of proteins. By combining the functional and topological properties, we formulate the problem of identifying protein complexes as that of detecting a group of proteins which frequently interact with each other in the PPI network and have similar annotation patterns in the functional profile. Using the idea of link communities, our method naturally deals with overlaps among complexes. The benefits brought by the functional properties are demonstrated by real data analysis. The results evaluated using four criteria with respect to two gold standards show that GMFTP has a competitive performance over the state-of-the-art approaches. The effectiveness of detecting overlapping complexes is also demonstrated by analyzing the topological and functional features of multi- and mono-group proteins.

Conclusions

Based on the results obtained in this study, GMFTP presents to be a powerful approach for the identification of overlapping protein complexes using both the PPI network and the functional profile. The software can be downloaded from http://mail.sysu.edu.cn/home/stsddq@mail.sysu.edu.cn/dai/others/GMFTP.zip.",2014-06-13 +24700732,Analysis of TP53 mutation status in human cancer cell lines: a reassessment.,"Tumor-derived cell lines play an important role in the investigation of tumor biology and genetics. Across a wide array of studies, they have been tools of choice for the discovery of important genes involved in cancer and for the analysis of the cellular pathways that are impaired by diverse oncogenic events. They are also invaluable for screening novel anticancer drugs. The TP53 protein is a major component of multiple pathways that regulate cellular response to various types of stress. Therefore, TP53 status affects the phenotype of tumor cell lines profoundly and must be carefully ascertained for any experimental project. In the present review, we use the 2014 release of the UMD TP53 database to show that TP53 status is still controversial for numerous cell lines, including some widely used lines from the NCI-60 panel. Our analysis clearly confirms that, despite numerous warnings, the misidentification of cell lines is still present as a silent and neglected issue, and that extreme care must be taken when determining the status of p53, because errors may lead to disastrous experimental interpretations. A novel compendium gathering the TP53 status of 2,500 cell lines has been made available (http://p53.fr). A stand-alone application can be used to browse the database and extract pertinent information on cell lines and associated TP53 mutations. It will be updated regularly to minimize any scientific issues associated with the use of misidentified cell lines (http://p53.fr).",2014-05-06 +24972831,"SARA-Coffee web server, a tool for the computation of RNA sequence and structure multiple alignments.","This article introduces the SARA-Coffee web server; a service allowing the online computation of 3D structure based multiple RNA sequence alignments. The server makes it possible to combine sequences with and without known 3D structures. Given a set of sequences SARA-Coffee outputs a multiple sequence alignment along with a reliability index for every sequence, column and aligned residue. SARA-Coffee combines SARA, a pairwise structural RNA aligner with the R-Coffee multiple RNA aligner in a way that has been shown to improve alignment accuracy over most sequence aligners when enough structural data is available. The server can be accessed from http://tcoffee.crg.cat/apps/tcoffee/do:saracoffee.",2014-06-27 +24798122,Extramedullary spinal cysts in dogs.,"

Objective

To (1) synthesize the terminology used to classify extramedullary spinal cysts in dogs to clarify some of the commonly reported misconceptions, and (2) propose a classification scheme to limit confusion with terminology.

Study design

Literature review.

Methods

An online bibliographic search was performed in January 2013 for articles relating to extramedullary spinal cysts in dogs using PubMed (http://www.pubmed.gov/) and Google Scholar (http://scholar.google.com/) databases. Only peer-reviewed clinical literature describing cystic lesions pertaining to the spinal cord and associated structures was included.

Results

From 1962 to 2013, 42 articles were identified; 25 (95 dogs) reported meningeal cysts, 10 (24 dogs) described 60 extradural cysts, 3 reports (18 dogs) described discal cysts or acute compressive hydrated nucleus pulposus extrusions (HNPE). Spinal cysts were categorized by location based on cross-sectional imaging as meningeal or extradural non-meningeal. Sub-classification was then performed based on surgical findings and pathology. Meningeal cysts included arachnoid diverticulae and Tarlov (perineural) cysts. Extradural non-meningeal cysts included intraspinal cysts of the vertebral joints, ligaments and discs. Discal cysts also fit this category and have been reported extensively in humans but appear rare in dogs.

Conclusions

Extramedullary spinal cysts should be first classified according to location with a sub-classification according to pathologic and surgical findings. Previous canine cases of discal cysts appear to represent a different disease entity and the term acute compressive HNPE is therefore preferred.",2014-05-05 +24799435,SymD webserver: a platform for detecting internally symmetric protein structures.,"Internal symmetry of a protein structure is the pseudo-symmetry that a single protein chain sometimes exhibits. This is in contrast to the symmetry with which monomers are arranged in many multimeric protein complexes. SymD is a program that detects proteins with internal symmetry. It proved to be useful for analyzing protein structure, function and modeling. This web-based interactive tool was developed by implementing the SymD algorithm. To the best of our knowledge, SymD webserver is the first tool of its kind with which users can easily study the symmetry of the protein they are interested in by uploading the structure or retrieving it from databases. It uses the Galaxy platform to take advantage of its extensibility and displays the symmetry properties, the symmetry axis and the sequence alignment of the structures before and after the symmetry transformation via an interactive graphical visualization environment in any modern web browser. An Example Run video displays the workflow to help users navigate. SymD webserver is publicly available at http://symd.nci.nih.gov.",2014-05-05 +23323883,MotifLab: a tools and data integration workbench for motif discovery and regulatory sequence analysis.,"

Background

Traditional methods for computational motif discovery often suffer from poor performance. In particular, methods that search for sequence matches to known binding motifs tend to predict many non-functional binding sites because they fail to take into consideration the biological state of the cell. In recent years, genome-wide studies have generated a lot of data that has the potential to improve our ability to identify functional motifs and binding sites, such as information about chromatin accessibility and epigenetic states in different cell types. However, it is not always trivial to make use of this data in combination with existing motif discovery tools, especially for researchers who are not skilled in bioinformatics programming.

Results

Here we present MotifLab, a general workbench for analysing regulatory sequence regions and discovering transcription factor binding sites and cis-regulatory modules. MotifLab supports comprehensive motif discovery and analysis by allowing users to integrate several popular motif discovery tools as well as different kinds of additional information, including phylogenetic conservation, epigenetic marks, DNase hypersensitive sites, ChIP-Seq data, positional binding preferences of transcription factors, transcription factor interactions and gene expression. MotifLab offers several data-processing operations that can be used to create, manipulate and analyse data objects, and complete analysis workflows can be constructed and automatically executed within MotifLab, including graphical presentation of the results.

Conclusions

We have developed MotifLab as a flexible workbench for motif analysis in a genomic context. The flexibility and effectiveness of this workbench has been demonstrated on selected test cases, in particular two previously published benchmark data sets for single motifs and modules, and a realistic example of genes responding to treatment with forskolin. MotifLab is freely available at http://www.motiflab.org.",2013-01-16 +22895966,Histamine type 2 receptor antagonists as adjuvant treatment for resected colorectal cancer.,"

Background

Anecdotal reports of tumour regression with histamine type 2 receptor antagonists (H(2)RAs) have lead to a series of trials with this class of drug as adjuvant therapy to try and improve outcomes in patients with resected colorectal cancers. There was a plausible scientific rationale suggesting merit in this strategy. This included improved immune surveillance (by way of increasing tumour infiltrating lymphocytes), inhibiting the direct proliferative effect of histamine as a growth factor for colorectal cancer and, in the case of cimetidine, inhibiting endothelial expression of E-selectin (a cell adhesion molecule thought to be critical for metastatic spread).

Objectives

To determine if H(2)RAs improve overall survival when used as pre- and/or postoperative therapy in colorectal cancer patients who have had surgical resection with curative intent. We also stratified the results to see if there was an improvement in overall survival in terms of the specific H(2)RA used.

Search methods

Randomised controlled trials were identified using a sensitive search strategy in the following databases: MEDLINE (1964 to present), the Cochrane Central Register of Controlled Trials (CENTRAL, The Cochrane Library 2009), EMBASE (1980 to present) and Cancerlit (1983 to present).

Selection criteria

Criteria for study selection included: patients with colorectal cancer surgically resected with curative intent; H(2)RAs used i) at any dose, ii) for any length of time, iii) with any other treatment modality and iv) in the pre-, peri- or post-operative period. The results were stratified for the H(2)RA used.

Data collection and analysis

The literature search retrieved 142 articles. There were six studies included in the final analysis, published from 1995 to 2007, including a total of 1229 patients. All patients were analysed by intention to treat according to their initial allocation. Log hazard ratios and standard errors of treatment effects (on overall survival) were calculated using the Cochrane statistical package RevMan Version 5. Hazard ratios and standard errors were recorded from trial publications or, if not provided, were estimated from published actuarial survival curves using a spreadsheet designed for this purpose (http://www.biomedcentral.com/content/supplementary/1745-6215-8-16-S1.xls).

Main results

Of the six identified trials, five used cimetidine as the experimental H(2)RA, whereas one used ranitidine. There was a trend towards improved survival when H(2)RAs were utilised as adjuvant therapy in patients having curative-intent surgery for colorectal cancer (HR 0.70; 95% CI 0.48-1.03, P = 0.07). Analysis of the five cimetidine trials (n = 421) revealed a statistically significant improvement in overall survival (HR 0.53; 95% CI 0.32 to 0.87).

Authors' conclusions

Of the H(2)RAs evaluated cimetidine appears to confer a survival benefit when given as an adjunct to curative surgical resection of colorectal cancers. The trial designs were heterogeneous and adjuvant therapy has evolved since these trials were performed. Further prospective randomised studies are warranted.",2012-08-15 +25881276,VisualTE: a graphical interface for transposable element analysis at the genomic scale.,"

Background

Transposable elements are mobile DNA repeat sequences, known to have high impact on genes, genome structure and evolution. This has stimulated broad interest in the detailed biological studies of transposable elements. Hence, we have developed an easy-to-use tool for the comparative analysis of the structural organization and functional relationships of transposable elements, to help understand their functional role in genomes.

Results

We named our new software VisualTE and describe it here. VisualTE is a JAVA stand-alone graphical interface that allows users to visualize and analyze all occurrences of transposable element families in annotated genomes. VisualTE reads and extracts transposable elements and genomic information from annotation and repeat data. Result analyses are displayed in several graphical panels that include location and distribution on the chromosome, the occurrence of transposable elements in the genome, their size distribution, and neighboring genes' features and ontologies. With these hallmarks, VisualTE provides a convenient tool for studying transposable element copies and their functional relationships with genes, at the whole-genome scale, and in diverse organisms.

Conclusions

VisualTE graphical interface makes possible comparative analyses of transposable elements in any annotated sequence as well as structural organization and functional relationships between transposable elements and other genetic object. This tool is freely available at: http://lcb.cnrs-mrs.fr/spip.php?article867 .",2015-02-27 +24849577,Evol and ProDy for bridging protein sequence evolution and structural dynamics.,"

Unlabelled

Correlations between sequence evolution and structural dynamics are of utmost importance in understanding the molecular mechanisms of function and their evolution. We have integrated Evol, a new package for fast and efficient comparative analysis of evolutionary patterns and conformational dynamics, into ProDy, a computational toolbox designed for inferring protein dynamics from experimental and theoretical data. Using information-theoretic approaches, Evol coanalyzes conservation and coevolution profiles extracted from multiple sequence alignments of protein families with their inferred dynamics.

Availability and implementation

ProDy and Evol are open-source and freely available under MIT License from http://prody.csb.pitt.edu/.",2014-05-21 +22970854,"GRID-based three-dimensional pharmacophores II: PharmBench, a benchmark data set for evaluating pharmacophore elucidation methods.","To date, published pharmacophore elucidation approaches typically use a handful of data sets for validation: here, we have assembled a data set for 81 targets, containing 960 ligands aligned using their cocrystallized protein targets, to provide the experimental ""gold standard"". The two-dimensional structures are also assembled to remove conformational bias; an ideal method would be able to take these structures as input, find the common features, and reproduce the bioactive conformations and their alignments to correspond with the X-ray-determined gold standard alignments. Here we present this data set and describe three objective measures to evaluate performance: the ability to identify the bioactive conformation, the ability to identify and correctly align this conformation for 50% of the molecules in each data set, and the pharmacophoric field similarity. We have applied this validation methodology to our pharmacophore elucidation method FLAPpharm, that is published in the first paper of this series and discuss the limitations of the data set and objective success criteria. Starting from two-dimensional structures and producing unbiased models, FLAPpharm was able to identify the bioactive conformations for 67% of the ligands and also to produce successful models according to the second metric for 67% of the Pharmbench data sets. Inspection of the unsuccessful models highlighted the limitation of this root mean square (rms)-derived metric, since many were found to be pharmacophorically reasonable, increasing the overall success rate to 83%. The PharmBench data set is available at http://www.moldiscovery.com/PharmBench , along with a web service to enable users to score model alignments coming from external methods in the same way that we have presented here and, therefore, establishes a pharmacophore elucidation benchmark data set available to be used by the community.",2012-09-21 +24122053,MAPfastR: quantitative trait loci mapping in outbred line crosses.,MAPfastR is a software package developed to analyze quantitative trait loci data from inbred and outbred line-crosses. The package includes a number of modules for fast and accurate quantitative trait loci analyses. It has been developed in the R language for fast and comprehensive analyses of large datasets. MAPfastR is freely available at: http://www.computationalgenetics.se/?page_id=7.,2013-12-09 +25586327,Statistical interactions and Bayes estimation of log odds in case-control studies.,"This paper is concerned with the estimation of the logarithm of disease odds (log odds) when evaluating two risk factors, whether or not interactions are present. Statisticians define interaction as a departure from an additive model on a certain scale of measurement of the outcome. Certain interactions, known as removable interactions, may be eliminated by fitting an additive model under an invertible transformation of the outcome. This can potentially provide more precise estimates of log odds than fitting a model with interaction terms. In practice, we may also encounter nonremovable interactions. The model must then include interaction terms, regardless of the choice of the scale of the outcome. However, in practical settings, we do not know at the outset whether an interaction exists, and if so whether it is removable or nonremovable. Rather than trying to decide on significance levels to test for the existence of removable and nonremovable interactions, we develop a Bayes estimator based on a squared error loss function. We demonstrate the favorable bias-variance trade-offs of our approach using simulations, and provide empirical illustrations using data from three published endometrial cancer case-control studies. The methods are implemented in an R program, and available freely at http://www.mskcc.org/biostatistics/~satagopj .",2015-01-12 +23468881,Cell-type-specific predictive network yields novel insights into mouse embryonic stem cell self-renewal and cell fate.,"Self-renewal, the ability of a stem cell to divide repeatedly while maintaining an undifferentiated state, is a defining characteristic of all stem cells. Here, we clarify the molecular foundations of mouse embryonic stem cell (mESC) self-renewal by applying a proven Bayesian network machine learning approach to integrate high-throughput data for protein function discovery. By focusing on a single stem-cell system, at a specific developmental stage, within the context of well-defined biological processes known to be active in that cell type, we produce a consensus predictive network that reflects biological reality more closely than those made by prior efforts using more generalized, context-independent methods. In addition, we show how machine learning efforts may be misled if the tissue specific role of mammalian proteins is not defined in the training set and circumscribed in the evidential data. For this study, we assembled an extensive compendium of mESC data: ∼2.2 million data points, collected from 60 different studies, under 992 conditions. We then integrated these data into a consensus mESC functional relationship network focused on biological processes associated with embryonic stem cell self-renewal and cell fate determination. Computational evaluations, literature validation, and analyses of predicted functional linkages show that our results are highly accurate and biologically relevant. Our mESC network predicts many novel players involved in self-renewal and serves as the foundation for future pluripotent stem cell studies. This network can be used by stem cell researchers (at http://StemSight.org) to explore hypotheses about gene function in the context of self-renewal and to prioritize genes of interest for experimental validation.",2013-02-28 +24799331,Allergen cross-reactivity in allergic rhinitis and oral-allergy syndrome: a bioinformatic protein sequence analysis.,"

Background

Clinical allergy cross-reactivity that is seen with related inhalant allergens or between unrelated inhalant allergens and foods in oral allergy syndrome (OAS) remains poorly understood. The goal of this study is to determine whether clinical cross-reactivity can be identified from primary protein sequences in allergy epitopes and food proteins.

Methods

High-throughput analysis was performed by assembling all known allergy epitopes within the Immune Epitope Database (IEDB; http://www.iedb.org) for 5 common species from 5 inhalant allergen subclasses and comparing their protein sequences to each other, as well as to sequences of intact proteins from known cross-reactive foods in the European Molecular Biology Laboratory-European Bioinformatics Institute (EMBL-EBI) protein database (http://www.uniprot.org) that have been implicated in OAS. Computational methods were employed to allow for exact matching, gaps, and similar amino acids using multiple algorithms. A phylogenetic tree was created to determine evolutionary relationships between cross-reactive epitopes in OAS.

Results

Twenty-three common inhalant allergens had 4429 unique epitopes; the 19 foods implicated in OAS had 9497 protein sequences. The Basic Local Alignment Search Tool (BLAST) algorithm identified interclass and intraclass sequence similarities for the 5 inhalant allergy classes with high similarity for mites, grasses, and trees. Analysis of OAS proteins identified 104 matches to inhalant allergy epitopes that are known to cross-react. The phylogenetic tree displayed relationships that mostly followed organism phylogeny.

Conclusion

Use of primary protein sequences was successful in explaining clinical allergy cross-reactivity. Clinical correlation is needed for use of these epitopes as diagnostic or therapeutic entities for patients with cross-reactive allergic disease.",2014-05-02 +25890833,WHATIF: An open-source desktop application for extraction and management of the incidental findings from next-generation sequencing variant data.,"

Background

Identification and evaluation of incidental findings in patients following whole exome (WGS) or whole genome sequencing (WGS) is challenging for both practicing physicians and researchers. The American College of Medical Genetics and Genomics (ACMG) recently recommended a list of reportable incidental genetic findings. However, no informatics tools are currently available to support evaluation of incidental findings in next-generation sequencing data.

Methods

The Wisconsin Hierarchical Analysis Tool for Incidental Findings (WHATIF), was developed as a stand-alone Windows-based desktop executable, to support the interactive analysis of incidental findings in the context of the ACMG recommendations. WHATIF integrates the European Bioinformatics Institute Variant Effect Predictor (VEP) tool for biological interpretation and the National Center for Biotechnology Information ClinVar tool for clinical interpretation.

Results

An open-source desktop program was created to annotate incidental findings and present the results with a user-friendly interface. Further, a meaningful index (WHATIF Index) was devised for each gene to facilitate ranking of the relative importance of the variants and estimate the potential workload associated with further evaluation of the variants. Our WHATIF application is available at: http://tinyurl.com/WHATIF-SOFTWARE CONCLUSIONS: The WHATIF application offers a user-friendly interface and allows users to investigate the extracted variant information efficiently and intuitively while always accessing the up to date information on variants via application programming interfaces (API) connections. WHATIF׳s highly flexible design and straightforward implementation aids users in customizing the source code to meet their own special needs.",2015-04-08 +25518859,VEGAS2: Software for More Flexible Gene-Based Testing.,"Gene-based tests such as versatile gene-based association study (VEGAS) are commonly used following per-single nucleotide polymorphism (SNP) GWAS (genome-wide association studies) analysis. Two limitations of VEGAS were that the HapMap2 reference set was used to model the correlation between SNPs and only autosomal genes were considered. HapMap2 has now been superseded by the 1,000 Genomes reference set, and whereas early GWASs frequently ignored the X chromosome, it is now commonly included. Here we have developed VEGAS2, an extension that uses 1,000 Genomes data to model SNP correlations across the autosomes and chromosome X. VEGAS2 allows greater flexibility when defining gene boundaries. VEGAS2 offers both a user-friendly, web-based front end and a command line Linux version. The online version of VEGAS2 can be accessed through https://vegas2.qimrberghofer.edu.au/. The command line version can be downloaded from https://vegas2.qimrberghofer.edu.au/zVEGAS2offline.tgz. The command line version is developed in Perl, R and shell scripting languages; source code is available for further development.",2014-12-18 +21310745,GeneReporter--sequence-based document retrieval and annotation.,"

Unlabelled

GeneReporter is a web tool that reports functional information and relevant literature on a protein-coding sequence of interest. Its purpose is to support both manual genome annotation and document retrieval. PubMed references corresponding to a sequence are detected by the extraction of query words from UniProt entries of homologous sequences. Data on protein families, domains, potential cofactors, structure, function, cellular localization, metabolic contribution and corresponding DNA binding sites complement the information on a given gene product of interest.

Availability and implementation

GeneReporter is available at http://www.genereporter.tu-bs.de. The web site integrates databases and analysis tools as SOAP-based web services from the EBI (European Bioinformatics Institute) and NCBI (National Center for Biotechnology Information).",2011-02-09 +24681909,flowFit: a Bioconductor package to estimate proliferation in cell-tracking dye studies.,"

Summary

Herein we introduce flowFit, a Bioconductor package designed to perform quantitative analysis of cell proliferation in tracking dye-based experiments. The software, distributed as an R Bioconductor library, is based on a mathematical model that takes into account the height of each peak, the size and position of the parental population (labeled but not proliferating) and the estimated distance between the brightness of a cell and the brightness of its daughter (in which the dye is assumed to undergo a 2-fold dilution). Although the algorithm does not make any inference on cell types, rates of cell divisions or rates of cell death, it deconvolutes the actual collected data into a set of peaks, whereby each peak corresponds to a subpopulation of cells that have divided N times. We validated flowFit by retrospective analysis of published proliferation-tracking experiments and demonstrated that the algorithm predicts the same percentage of cells/generation either in samples with discernible peaks (in which the peaks are visible in the collected raw data) or in samples with non-discernible peaks (in which the peaks are fused together). To the best of our knowledge, flowFit represents the first open-source algorithm in its category and might be applied to numerous areas of cell biology in which quantitative deconvolution of tracking dye-based experiments is desired, including stem cell research.

Availability and implementation

http://www.bioconductor.org/packages/devel/bioc/html/flowFit.html (Bioconductor software page). http://www.bioconductor.org/packages/2.13/bioc/vignettes/flowFit/inst/doc/HowTo-flowFit.pdf (package vignette). http://rpubs.com/tucano/flowFit (online tutorial).",2014-03-27 +24692096,Mutation update and genotype-phenotype correlations of novel and previously described mutations in TPM2 and TPM3 causing congenital myopathies.,"Mutations affecting skeletal muscle isoforms of the tropomyosin genes may cause nemaline myopathy, cap myopathy, core-rod myopathy, congenital fiber-type disproportion, distal arthrogryposes, and Escobar syndrome. We correlate the clinical picture of these diseases with novel (19) and previously reported (31) mutations of the TPM2 and TPM3 genes. Included are altogether 93 families: 53 with TPM2 mutations and 40 with TPM3 mutations. Thirty distinct pathogenic variants of TPM2 and 20 of TPM3 have been published or listed in the Leiden Open Variant Database (http://www.dmd.nl/). Most are heterozygous changes associated with autosomal-dominant disease. Patients with TPM2 mutations tended to present with milder symptoms than those with TPM3 mutations, DA being present only in the TPM2 group. Previous studies have shown that five of the mutations in TPM2 and one in TPM3 cause increased Ca(2+) sensitivity resulting in a hypercontractile molecular phenotype. Patients with hypercontractile phenotype more often had contractures of the limb joints (18/19) and jaw (6/19) than those with nonhypercontractile ones (2/22 and 1/22), whereas patients with the non-hypercontractile molecular phenotype more often (19/22) had axial contractures than the hypercontractile group (7/19). Our in silico predictions show that most mutations affect tropomyosin-actin association or tropomyosin head-to-tail binding.",2014-05-01 +30708551,"First Report of Boxwood Blight Caused by Calonectria pseudonaviculata in Delaware, Maryland, New Jersey, and New York.","Boxwood (Buxus spp.) are commercially important evergreen ornamental plants with an annual market value of over $103 million in the United States. The recent U.S. incursion of boxwood blight disease caused by the fungus Calonectria pseudonaviculata (syn. Cylindrocladium pseudonaviculatum, Cy. buxicola) threatens the health and productivity of boxwood in both landscape plantings and nurseries. The first confirmed U.S. reports of the disease were made from Connecticut and North Carolina in November 2011 (2,4), followed by diagnoses in 10 additional states during 2012 and 2013. By August 2013, symptoms consistent with boxwood blight had been observed from B. sempervirens in Delaware, Maryland, New Jersey, and southeastern New York. Affected plants showed rapid onset of disease symptoms: dark brown to black spots or diffuse dark areas on leaves, followed by defoliation. Narrow, elongate black cankers also formed on current season shoots. Symptomatic stems and leaves were placed in petri dishes with moistened filter paper at 22°C for 3 days under continuous light. Conidiophores were excised, then placed on potato dextrose agar amended with streptomycin and neomycin (0.3 g/l). Resultant colonies showed dark brown pigmentation at the colony center surrounded by tan to reddish brown rings with white mycelia at the advancing edge. Conidia (n = 30 per isolate) were hyaline, cylindrical, rounded at both ends, with a single septum (45 to 76 × 4 to 6 μm; avg. 63 × 5 μm). Conidiophores (n = 20 per isolate) comprised a stipe, a hyaline septate stipe extension (length 119 to 192 μm; avg. 150 μm) and a terminal ellipsoidal vesicle (diameter 4 to 10 μm; avg. 7 μm). Based on morphological characteristics, the causal agent was identified as C. pseudonaviculata (1,4). Voucher specimens were deposited in the U.S. National Fungus Collections (BPI 892698 to 701). To verify morphological diagnosis, genomic DNA was extracted from fungal biomass grown in liquid cultures of yeast extract peptone dextrose media. A portion of the β-tubulin gene (TUB2) was PCR amplified and sequenced bi-directionally using primers Bta/Bt2b (3). BLASTn searches of NCBI GenBank databases using the TUB2 sequences (Accession Nos. KF785808 to 11) demonstrated 96 to 100% sequence identity with other C. pseudonaviculata isolates. To confirm pathogenicity, 5-month-old B. sempervirens and B. microphylla seedlings were spray-inoculated with a spore suspension of 1 × 104 conidia/ml. One isolate from each state was independently tested with four replicates each. Non-inoculated water-sprayed plants served as negative controls. Plants were maintained in growth chambers at 22°C under constant light. Blight symptoms developed 4 to 5 days post inoculation. C. pseudonaviculata was re-isolated from inoculated plants; no symptoms or signs were observed from control plants. To our knowledge, this is the first report of C. pseudonaviculata in the states of Delaware, Maryland, New Jersey, and New York. This report demonstrates that C. pseudonaviculata is now widespread across the United States eastern seaboard, and represents a substantial threat to boxwood plants in North American landscapes and nurseries. References: (1) P. Crous et al. Sydowia 54:23, 2002. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, USDA-ARS. Retrieved from http://nt.ars-grin.gov/fungaldatabases , 30 August 2013. (3) N. L. Glass and G. C. Donaldson. Appl. Environ. Microbiol. 61:1323, 1995. (4) K. L. Ivors et al. Plant Dis. 96:1070, 2012.",2014-05-01 +30708546,First Report of Alternaria alternata Causing Leaf Spots of Tea (Camellia sinensis) in China.,"Tea is the most popular non-alcoholic beverage crop in the world, which originated in China and has been cultivated in over 45 countries. In recent years, a leaf spot disease of unknown etiology has been observed on young leaves of tea trees (Camellia sinensis) grown in Luotian county, Hubei Province, China. Observed symptoms display grayish brown to white spots (about 1 cm in diameter) surrounded by brown edges. Over 20% of the young leaves were affected on surveyed trees. To identify the pathogen, six symptomatic tea leaves were collected from six individual tea trees of unknown variety in August 2012. A thin section (3 to 5 mm) of symptomatic tissue was sterilized in a bleach solution of 3% hypochlorite and placed on potato dextrose agar (PDA) medium at 25°C in darkness for isolation. Six fungal colonies displaying gray-brown and gray-white aerial mycelia were consistently recovered from lesions of the six leaves, termed as T1 to T6, respectively. Conidia produced on the colonies were olive brown, obpyriform, short conical beak at the tip, 0 to 3 vertical and 1 to 6 transverse septa, and length × width of 7.1 to 31.7 (avg. 20.1) × 2.9 to 12.7 (avg. 7.2) μm. T1 to T6 were identified as Alternaria alternata on the basis of morphological characterization, respectively (2). Confirmation of the species identification was obtained by molecular characterization of their internal transcribed spacer (ITS) and glyceraldehyde-3-phosphate dehydrogenase (GAPDH) regions amplified from the genomic DNAs using the universal primers (1). The results revealed identical sequences of ITS (GenBank Accession No. KF699530) and GAPDH among the six isolates. BLAST searches showed that they had the highest similarity with A. alternata strains, with 98.3% for ITS (AJ276055) and 96.2% for GAPDH (EF513205), deposited in fungus database ( http://www.mycobank.org/ ). Pathogenicity tests were conducted on the detached leaves expanding for 10 to 20 days of two tea varieties (cvs. Fudingdabai and Taicha No. 12) in triplicate by placing 4 mm diameter discs from 5-day-old PDA plates of T3 and T6, which were incubated in an incubator at 25°C with a 12-h photoperiod for 7 days. All inoculated leaves with or without wound treatment developed brown spots similar to the original ones at 7 days post inoculation (dpi) while the control leaves inoculated with non-colonized PDA plugs remained asymptomatic. Isolates recovered from diseased samples were of the same morphology and ITS sequence as the inoculated ones. Alternaria alternata had been described on C. sinensis in China (3), but it was only reported as a severe foliar fungal pathogen of tea in North Bengal, India (1), and to our knowledge, this is the first report of A. alternata causing leaf spots on tea leaves (C. sinensis) in China. In addition to quantity loss, the species may result in a decrease of quality of tea crop considering that it can produce Alternaria toxins related to animal and public health. The etiologic identification of the disease is expected to provide useful information for its control. References: (1) B. N. Chakraborty et al. Plant Pathol. 55:303, 2006. (2) E. G. Simmons. Page 1 in: Alternaria Biology, Plant Diseases and Metabolites. J. Chelchowski and A. Visconti, eds. Elsevier, Amsterdam, 1992. (3) F. L. Tai. Page 1527 in: Sylloge Fungorum Sinicorum. eds. Sci. Press Acad. Sin. Beijing, 1979. (4) B. S. Weir et al. Stud. Mycol. 73:115, 2012.",2014-05-01 +25888091,Impact of missing data imputation methods on gene expression clustering and classification.,"

Background

Several missing value imputation methods for gene expression data have been proposed in the literature. In the past few years, researchers have been putting a great deal of effort into presenting systematic evaluations of the different imputation algorithms. Initially, most algorithms were assessed with an emphasis on the accuracy of the imputation, using metrics such as the root mean squared error. However, it has become clear that the success of the estimation of the expression value should be evaluated in more practical terms as well. One can consider, for example, the ability of the method to preserve the significant genes in the dataset, or its discriminative/predictive power for classification/clustering purposes.

Results and conclusions

We performed a broad analysis of the impact of five well-known missing value imputation methods on three clustering and four classification methods, in the context of 12 cancer gene expression datasets. We employed a statistical framework, for the first time in this field, to assess whether different imputation methods improve the performance of the clustering/classification methods. Our results suggest that the imputation methods evaluated have a minor impact on the classification and downstream clustering analyses. Simple methods such as replacing the missing values by mean or the median values performed as well as more complex strategies. The datasets analyzed in this study are available at http://costalab.org/Imputation/ .",2015-02-26 +25124108,CRF-based models of protein surfaces improve protein-protein interaction site predictions.,"

Background

The identification of protein-protein interaction sites is a computationally challenging task and important for understanding the biology of protein complexes. There is a rich literature in this field. A broad class of approaches assign to each candidate residue a real-valued score that measures how likely it is that the residue belongs to the interface. The prediction is obtained by thresholding this score.Some probabilistic models classify the residues on the basis of the posterior probabilities. In this paper, we introduce pairwise conditional random fields (pCRFs) in which edges are not restricted to the backbone as in the case of linear-chain CRFs utilized by Li et al. (2007). In fact, any 3D-neighborhood relation can be modeled. On grounds of a generalized Viterbi inference algorithm and a piecewise training process for pCRFs, we demonstrate how to utilize pCRFs to enhance a given residue-wise score-based protein-protein interface predictor on the surface of the protein under study. The features of the pCRF are solely based on the interface predictions scores of the predictor the performance of which shall be improved.

Results

We performed three sets of experiments with synthetic scores assigned to the surface residues of proteins taken from the data set PlaneDimers compiled by Zellner et al. (2011), from the list published by Keskin et al. (2004) and from the very recent data set due to Cukuroglu et al. (2014). That way we demonstrated that our pCRF-based enhancer is effective given the interface residue score distribution and the non-interface residue score are unimodal.Moreover, the pCRF-based enhancer is also successfully applicable, if the distributions are only unimodal over a certain sub-domain. The improvement is then restricted to that domain. Thus we were able to improve the prediction of the PresCont server devised by Zellner et al. (2011) on PlaneDimers.

Conclusions

Our results strongly suggest that pCRFs form a methodological framework to improve residue-wise score-based protein-protein interface predictors given the scores are appropriately distributed. A prototypical implementation of our method is accessible at http://ppicrf.informatik.uni-goettingen.de/index.html.",2014-08-13 +23304414,A collaborative framework for Distributed Privacy-Preserving Support Vector Machine learning.,"A Support Vector Machine (SVM) is a popular tool for decision support. The traditional way to build an SVM model is to estimate parameters based on a centralized repository of data. However, in the field of biomedicine, patient data are sometimes stored in local repositories or institutions where they were collected, and may not be easily shared due to privacy concerns. This creates a substantial barrier for researchers to effectively learn from the distributed data using machine learning tools like SVMs. To overcome this difficulty and promote efficient information exchange without sharing sensitive raw data, we developed a Distributed Privacy Preserving Support Vector Machine (DPP-SVM). The DPP-SVM enables privacy-preserving collaborative learning, in which a trusted server integrates ""privacy-insensitive"" intermediary results. The globally learned model is guaranteed to be exactly the same as learned from combined data. We also provide a free web-service (http://privacy.ucsd.edu:8080/ppsvm/) for multiple participants to collaborate and complete the SVM-learning task in an efficient and privacy-preserving manner.",2012-11-03 +26088761,High doses of garlic extract significantly attenuated the ratio of serum LDL to HDL level in rat-fed with hypercholesterolemia diet.,"

Background

Hypercholesterolemia is associated with an increased risk of heart disease. In this study, we investigated the antihyperlipidemic effects of garlic (Allium sativum L.) in rat models of hypercholesterolemic.

Methods

Wistar male rats were randomly divided into 4 diet groups with garlic supplementation. Male Wistar rats were fed by standard pellet diet (group I), standard diet supplemented with 4% garlic (group II), lipogenic diet (containing sunflower oil, cholesterol and ethanol) equivalent to 200 mg raw garlic/kg body weight (raw) (group III) and lipogenic diet equivalent to 400 mg raw garlic/kg body weight (raw) (group IV).

Results

Rats fed 400 g/kg garlic extract(GE), had a significantly lower concentration of serum low-density lipoprotein cholesterol (LDL-C) cholesterol and elevated HDL -C cholesterol at day 28 (P < 0.05).In addition,serum levels of LDL-C was lower in the III and IV group than those in the IV group (P < 0.001 for each). However, cholesterol efflux capacity was positively correlated with HDL cholesterol concentration (P < 0 · 0001). It was also directly correlated with garlic supplementation (P < 0 · 0001).

Conclusion

Together Taken, the results are clearly indicative of the beneficial effects of garlic in reducing lateral side effects of hyperlipidemia. Our data demonstrate that GE has protective effects on HDL in rats with high LDL intake. Therefore, it could be used to remedy hypercholesterolemia with help reduce risk of coronary heart disease

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1834155749171141.",2015-06-20 +21841810,Proteome reference map and regulation network of neonatal rat cardiomyocyte.,"

Aim

To study and establish a proteome reference map and regulation network of neonatal rat cardiomyocyte.

Methods

Cultured cardiomyocytes of neonatal rats were used. All proteins expressed in the cardiomyocytes were separated and identified by two-dimensional polyacrylamide gel electrophoresis (2-DE) and matrix-assisted laser desorption/ionization-time of flight mass spectrometry (MALDI-TOF MS). Biological networks and pathways of the neonatal rat cardiomyocytes were analyzed using the Ingenuity Pathway Analysis (IPA) program (www.ingenuity.com). A 2-DE database was made accessible on-line by Make2ddb package on a web server.

Results

More than 1000 proteins were separated on 2D gels, and 148 proteins were identified. The identified proteins were used for the construction of an extensible markup language-based database. Biological networks and pathways were constructed to analyze the functions associate with cardiomyocyte proteins in the database. The 2-DE database of rat cardiomyocyte proteins can be accessed at http://2d.bjmu.edu.cn.

Conclusion

A proteome reference map and regulation network of the neonatal rat cardiomyocytes have been established, which may serve as an international platform for storage, analysis and visualization of cardiomyocyte proteomic data.",2011-08-15 +24730612,Prediction of linear cationic antimicrobial peptides based on characteristics responsible for their interaction with the membranes.,"Most available antimicrobial peptides (AMP) prediction methods use common approach for different classes of AMP. Contrary to available approaches, we suggest that a strategy of prediction should be based on the fact that there are several kinds of AMP that vary in mechanisms of action, structure, mode of interaction with membrane, etc. According to our suggestion for each kind of AMP, a particular approach has to be developed in order to get high efficacy. Consequently, in this paper, a particular but the biggest class of AMP, linear cationic antimicrobial peptides (LCAP), has been considered and a newly developed simple method of LCAP prediction described. The aim of this study is the development of a simple method of discrimination of AMP from non-AMP, the efficiency of which will be determined by efficiencies of selected descriptors only and comparison the results of the discrimination procedure with the results obtained by more complicated discriminative methods. As descriptors the physicochemical characteristics responsible for capability of the peptide to interact with an anionic membrane were considered. The following characteristics such as hydrophobicity, amphiphaticity, location of the peptide in relation to membrane, charge density, propensities to disordered structure and aggregation were studied. On the basis of these characteristics, a new simple algorithm of prediction is developed and evaluation of efficacies of the characteristics as descriptors performed. The results show that three descriptors, hydrophobic moment, charge density and location of the peptide along the membranes, can be used as discriminators of LCAPs. For the training set, our method gives the same level of accuracy as more complicated machine learning approaches offered as CAMP database service tools. For the test set accuracy obtained by our method gives even higher value than the one obtained by CAMP prediction tools. The AMP prediction tool based on the considered method is available at http://www.biomedicine.org.ge/dbaasp/.",2014-04-29 +24782522,SWISS-MODEL: modelling protein tertiary and quaternary structure using evolutionary information.,"Protein structure homology modelling has become a routine technique to generate 3D models for proteins when experimental structures are not available. Fully automated servers such as SWISS-MODEL with user-friendly web interfaces generate reliable models without the need for complex software packages or downloading large databases. Here, we describe the latest version of the SWISS-MODEL expert system for protein structure modelling. The SWISS-MODEL template library provides annotation of quaternary structure and essential ligands and co-factors to allow for building of complete structural models, including their oligomeric structure. The improved SWISS-MODEL pipeline makes extensive use of model quality estimation for selection of the most suitable templates and provides estimates of the expected accuracy of the resulting models. The accuracy of the models generated by SWISS-MODEL is continuously evaluated by the CAMEO system. The new web site allows users to interactively search for templates, cluster them by sequence similarity, structurally compare alternative templates and select the ones to be used for model building. In cases where multiple alternative template structures are available for a protein of interest, a user-guided template selection step allows building models in different functional states. SWISS-MODEL is available at http://swissmodel.expasy.org/.",2014-04-29 +24782520,A multi-fingerprint browser for the ZINC database.,"To confirm the activity of an initial small molecule 'hit compound' from an activity screening, one needs to probe the structure-activity relationships by testing close analogs. The multi-fingerprint browser presented here (http://dcb-reymond23.unibe.ch:8080/MCSS/) enables one to rapidly identify such close analogs among commercially available compounds in the ZINC database (>13 million molecules). The browser retrieves nearest neighbors of any query molecule in multi-dimensional chemical spaces defined by four different fingerprints, each of which represents relevant structural and pharmacophoric features in a different way: sFP (substructure fingerprint), ECFP4 (extended connectivity fingerprint), MQNs (molecular quantum numbers) and SMIfp (SMILES fingerprint). Distances are calculated using the city-block distance, a similarity measure that performs as well as Tanimoto similarity but is much faster to compute. The list of up to 1000 nearest neighbors of any query molecule is retrieved by the browser and can be then clustered using the K-means clustering algorithm to produce a focused list of analogs with likely similar bioactivity to be considered for experimental evaluation.",2014-04-29 +24124490,NeSSM: a Next-generation Sequencing Simulator for Metagenomics.,"

Background

Metagenomics can reveal the vast majority of microbes that have been missed by traditional cultivation-based methods. Due to its extremely wide range of application areas, fast metagenome sequencing simulation systems with high fidelity are in great demand to facilitate the development and comparison of metagenomics analysis tools.

Results

We present here a customizable metagenome simulation system: NeSSM (Next-generation Sequencing Simulator for Metagenomics). Combining complete genomes currently available, a community composition table, and sequencing parameters, it can simulate metagenome sequencing better than existing systems. Sequencing error models based on the explicit distribution of errors at each base and sequencing coverage bias are incorporated in the simulation. In order to improve the fidelity of simulation, tools are provided by NeSSM to estimate the sequencing error models, sequencing coverage bias and the community composition directly from existing metagenome sequencing data. Currently, NeSSM supports single-end and pair-end sequencing for both 454 and Illumina platforms. In addition, a GPU (graphics processing units) version of NeSSM is also developed to accelerate the simulation. By comparing the simulated sequencing data from NeSSM with experimental metagenome sequencing data, we have demonstrated that NeSSM performs better in many aspects than existing popular metagenome simulators, such as MetaSim, GemSIM and Grinder. The GPU version of NeSSM is more than one-order of magnitude faster than MetaSim.

Conclusions

NeSSM is a fast simulation system for high-throughput metagenome sequencing. It can be helpful to develop tools and evaluate strategies for metagenomics analysis and it's freely available for academic users at http://cbb.sjtu.edu.cn/~ccwei/pub/software/NeSSM.php.",2013-10-04 +26826164,Whole exome sequencing in recurrent early pregnancy loss.,"

Study hypothesis

Exome sequencing can identify genetic causes of idiopathic recurrent pregnancy loss (RPL).

Study finding

We identified compound heterozygous deleterious mutations affecting DYNC2H1 and ALOX15 in two out of four families with RPL. Both genes have a role in early development. Bioinformatics analysis of all genes with rare and putatively pathogenic mutations in miscarriages and couples showed enrichment in pathways relevant to pregnancy loss, including the complement and coagulation cascades pathways.

What is known already

Next generation sequencing (NGS) is increasingly being used to identify known and novel gene mutations in children with developmental delay and in fetuses with ultrasound-detected anomalies. In contrast, NGS is rarely used to study pregnancy loss. Chromosome microarray analysis detects putatively causative DNA copy number variants (CNVs) in ∼2% of miscarriages and CNVs of unknown significance (predominantly parental in origin) in up to 40% of miscarriages. Therefore, a large number of miscarriages still have an unknown cause.

Study design, samples/materials, methods

Whole exome sequencing (WES) was performed using Illumina HiSeq 2000 platform on seven euploid miscarriages from four families with RPL. Golden Helix SVS v8.1.5 was used for data assessment and inheritance analysis for deleterious DNA variants predicted to severely disrupt protein-coding genes by introducing a frameshift, loss of the stop codon, gain of the stop codon, changes in splicing or the initial codon. Webgestalt (http://bioinfo.vanderbilt.edu/webgestalt/) was used for pathway and disease association enrichment analysis of a gene pool containing putatively pathogenic variants in miscarriages and couples in comparison to control gene pools.

Main results and the role of chance

Compound heterozygous mutations in DYNC2H1 and ALOX15 were identified in miscarriages from two families with RPL. DYNC2H1 is involved in cilia biogenesis and has been associated with fetal lethality in humans. ALOX15 is expressed in placenta and its dysregulation has been associated with inflammation, placental, dysfunction, abnormal oxidative stress response and angiogenesis. The pool of putatively pathogenic single nucleotide variants (SNVs) and small insertions and deletions (indels) detected in the miscarriages showed enrichment in 'complement and coagulation cascades pathway', and 'ciliary motility disorders'. We conclude that CNVs, individual SNVs and pool of deleterious gene mutations identified by exome sequencing could contribute to RPL.

Limitations, reasons for caution

The size of our sample cohort is small. The functional effect of candidate mutations should be evaluated to determine whether the mutations are causative.

Wider implications of the findings

This is the first study to assess whether SNVs may contribute to the pathogenesis of miscarriage. Furthermore, our findings suggest that collective effect of mutations in relevant biological pathways could be implicated in RPL.

Study funding and competing interests

The study was funded by Canadian Institutes of Health Research (grant MOP 106467) and Michael Smith Foundation of Health Research Career Scholar salary award to ERS.",2016-01-28 +23278391,Sequential sentinel SNP Regional Association Plots (SSS-RAP): an approach for testing independence of SNP association signals using meta-analysis data.,"Genome-Wide Association Studies (GWAS) frequently incorporate meta-analysis within their framework. However, conditional analysis of individual-level data, which is an established approach for fine mapping of causal sites, is often precluded where only group-level summary data are available for analysis. Here, we present a numerical and graphical approach, ""sequential sentinel SNP regional association plot"" (SSS-RAP), which estimates regression coefficients (beta) with their standard errors using the meta-analysis summary results directly. Under an additive model, typical for genes with small effect, the effect for a sentinel SNP can be transformed to the predicted effect for a possibly dependent SNP through a 2×2 2-SNP haplotypes table. The approach assumes Hardy-Weinberg equilibrium for test SNPs. SSS-RAP is available as a Web-tool (http://apps.biocompute.org.uk/sssrap/sssrap.cgi). To develop and illustrate SSS-RAP we analyzed lipid and ECG traits data from the British Women's Heart and Health Study (BWHHS), evaluated a meta-analysis for ECG trait and presented several simulations. We compared results with existing approaches such as model selection methods and conditional analysis. Generally findings were consistent. SSS-RAP represents a tool for testing independence of SNP association signals using meta-analysis data, and is also a convenient approach based on biological principles for fine mapping in group level summary data.",2013-01-01 +24773593,Accelerating the scoring module of mass spectrometry-based peptide identification using GPUs.,"

Background

Tandem mass spectrometry-based database searching is currently the main method for protein identification in shotgun proteomics. The explosive growth of protein and peptide databases, which is a result of genome translations, enzymatic digestions, and post-translational modifications (PTMs), is making computational efficiency in database searching a serious challenge. Profile analysis shows that most search engines spend 50%-90% of their total time on the scoring module, and that the spectrum dot product (SDP) based scoring module is the most widely used. As a general purpose and high performance parallel hardware, graphics processing units (GPUs) are promising platforms for speeding up database searches in the protein identification process.

Results

We designed and implemented a parallel SDP-based scoring module on GPUs that exploits the efficient use of GPU registers, constant memory and shared memory. Compared with the CPU-based version, we achieved a 30 to 60 times speedup using a single GPU. We also implemented our algorithm on a GPU cluster and achieved an approximately favorable speedup.

Conclusions

Our GPU-based SDP algorithm can significantly improve the speed of the scoring module in mass spectrometry-based protein identification. The algorithm can be easily implemented in many database search engines such as X!Tandem, SEQUEST, and pFind. A software tool implementing this algorithm is available at http://www.comp.hkbu.edu.hk/~youli/ProteinByGPU.html.",2014-04-28 +22294630,Comparative effectiveness and safety of biological treatment options after tumour necrosis factor α inhibitor failure in rheumatoid arthritis: systematic review and indirect pairwise meta-analysis.,"

Background

Optimal treatment for rheumatoid arthritis (RA) after inadequate response (IR) to tumour necrosis factor α inhibitors (TNFi) remains uncertain.

Objective

To compare the efficacy and safety of biological agents after TNFi-IR.

Methods

A systematic literature search was carried out using Medline and Cochrane databases, as well as http://www.clinicaltrials.gov, and bibliographies of the retrieved literature were searched by hand. Randomised, placebo-controlled trials that enrolled patients with RA with TNFi-IR were included and American College of Rheumatology (ACR) response as primary efficacy outcome and adverse events (AEs), serious adverse events (SAEs) and serious infections (SIs) as safety measures were extracted. An indirect meta-analysis with pairwise comparisons of efficacy and safety data was then carried out using ORs or risk differences (RDs) in a random effects model.

Results

In four randomised controlled trials with 24 weeks' follow-up, direct comparisons of abatacept, golimumab, rituximab and tocilizumab versus placebo showed statistically significant mean ORs of 3.3-8.9 for ACR20, 5.5-10.2 for ACR50 and 4.1-13.5 for ACR70. Risks of AEs, SAEs and SIs versus placebo were non-significant. Indirect pairwise comparisons of the four biological agents showed no significant differences in ACR50 and ACR70. Golimumab had a significantly lower OR (0.56-0.59) for ACR20 but significantly fewer AEs (RD 0.13-0.18). Efficacy after one versus multiple TNFi failures did not differ significantly between the different biological agents.

Conclusion

In patients refractory to one or more TNFi, new biological agents provide significant improvement with good safety. Lacking head-to-head trials, indirect meta-analysis enables a comparison of effectiveness and safety of biological agents with each other and shows that all biological agents have similar effects.",2012-01-30 +22453064,Expression variation in connected recombinant populations of Arabidopsis thaliana highlights distinct transcriptome architectures.,"

Background

Expression traits can vary quantitatively between individuals and have a complex inheritance. Identification of the genetics underlying transcript variation can help in the understanding of phenotypic variation due to genetic factors regulating transcript abundance and shed light into divergence patterns. So far, only a limited number of studies have addressed this subject in Arabidopsis, with contrasting results due to dissimilar statistical power. Here, we present the transcriptome architecture in leaf tissue of two RIL sets obtained from a connected-cross design involving 3 commonly used accessions. We also present the transcriptome architecture observed in developing seeds of a third independent cross.

Results

The utilisation of the novel R/eqtl package (which goal is to automatize and extend functions from the R/qtl package) allowed us to map 4,290 and 6,534 eQTLs in the Cvi-0 × Col-0 and Bur-0 × Col-0 recombinant populations respectively. In agreement with previous studies, we observed a larger phenotypic variance explained by eQTLs in linkage with the controlled gene (potentially cis-acting), compared to distant loci (acting necessarily indirectly or in trans). Distant eQTLs hotspots were essentially not conserved between crosses, but instead, cross-specific. Accounting for confounding factors using a probabilistic approach (VBQTL) increased the mapping resolution and the number of significant associations. Moreover, using local eQTLs obtained from this approach, we detected evidence for a directional allelic effect in genes with related function, where significantly more eQTLs than expected by chance were up-regulated from one of the accessions. Primary experimental data, analysis parameters, eQTL results and visualisation of LOD score curves presented here are stored and accessible through the QTLstore service database http://qtlstore.versailles.inra.fr/.

Conclusions

Our results demonstrate the extensive diversity and moderately conserved eQTL landscape between crosses and validate the utilisation of expression traits to explore for candidates behind phenotypic variation among accessions. Furthermore, this stresses the need for a wider spectrum of diversity to fully understand expression trait variation within a species.",2012-03-27 +26079348,TENET: topological feature-based target characterization in signalling networks.,"

Motivation

Target characterization for a biochemical network is a heuristic evaluation process that produces a characterization model that may aid in predicting the suitability of each molecule for drug targeting. These approaches are typically used in drug research to identify novel potential targets using insights from known targets. Traditional approaches that characterize targets based on their molecular characteristics and biological function require extensive experimental study of each protein and are infeasible for evaluating larger networks with poorly understood proteins. Moreover, they fail to exploit network connectivity information which is now available from systems biology methods. Adopting a network-based approach by characterizing targets using network features provides greater insights that complement these traditional techniques. To this end, we present Tenet (Target charactErization using NEtwork Topology), a network-based approach that characterizes known targets in signalling networks using topological features.

Results

Tenet first computes a set of topological features and then leverages a support vector machine-based approach to identify predictive topological features that characterizes known targets. A characterization model is generated and it specifies which topological features are important for discriminating the targets and how these features should be combined to quantify the likelihood of a node being a target. We empirically study the performance of Tenet from a wide variety of aspects, using several signalling networks from BioModels with real-world curated outcomes. Results demonstrate its effectiveness and superiority in comparison to state-of-the-art approaches.

Availability and implementation

Our software is available freely for non-commercial purposes from: https://sites.google.com/site/cosbyntu/softwares/tenet

Contact

hechua@ntu.edu.sg or assourav@ntu.edu.sg

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-16 +25637557,GenoExp: a web tool for predicting gene expression levels from single nucleotide polymorphisms.,"

Unlabelled

Understanding the effect of single nucleotide polymorphisms (SNPs) on the expression level of genes is an important goal. We recently published a study in which we devised a multi-SNP predictive model for gene expression in Lymphoblastoid cell lines (LCL), and showed that it can robustly predict the expression of a small number of genes in test individuals. Here, we validate the generality of our models by predicting expression profiles for genes in LCL in an independent study, and extend the pool of predictable genes for which we are able to explain more than 25% of their expression variability to 232 genes across 14 different cell types. As the number of people who obtained their SNP profiles through companies such as 23andMe is rising rapidly, we developed GenoExp, a web-based tool in which users can upload their individual SNP data and obtain predicted expression levels for the set of predictable genes across the 14 different cell types. Our tool thus allows users with biological knowledge to study the possible effects that their set of SNPs might have on these genes and predict their cell-specific expression levels relative to the population average.

Availability and implementation

GenoExp is freely available at http://genie.weizmann.ac.il/pubs/GenoExp/.",2015-01-30 +24490896,Extending World Health Organization weight-for-age reference curves to older children.,"

Background

For ages 5-19 years, the World Health Organization (WHO) publishes reference charts based on 'core data' from the US National Center for Health Statistics (NCHS), collected from 1963-75 on 22,917 US children. To promote the use of body mass index in older children, weight-for-age was omitted after age 10. Health providers have subsequently expressed concerns about this omission and the selection of centiles. We therefore sought to extend weight-for-age reference curves from 10 to 19 years by applying WHO exclusion criteria and curve fitting methods to the core NCHS data and to revise the choice of displayed centiles.

Methods

WHO analysts first excluded ~ 3% of their reference population in order to achieve a ""non-obese sample with equal height"". Based on these exclusion criteria, 314 girls and 304 boys were first omitted for 'unhealthy' weights-for-height. By applying WHO global deviance and information criteria, optimal Box-Cox power exponential models were used to fit smoothed weight-for-age centiles. Bootstrap resampling was used to assess the precision of centile estimates. For all charts, additional centiles were included in the healthy range (3 to 97%), and the more extreme WHO centiles 0.1 and 99.9% were dropped.

Results

In addition to weight-for-age beyond 10 years, our charts provide more granularity in the centiles in the healthy range -2 to +2 SD (3-97%). For both weight and BMI, the bootstrap confidence intervals for the 99.9th centile were at least an order of magnitude wider than the corresponding 50th centile values.

Conclusions

These charts complement existing WHO charts by allowing weight-for-age to be plotted concurrently with height in older children. All modifications followed strict WHO methodology and utilized the same core data from the US NCHS. The additional centiles permit a more precise assessment of normal growth and earlier detection of aberrant growth as it crosses centiles. Elimination of extreme centiles reduces the risk of misclassification. A complete set of charts is available at the CPEG web site (http://cpeg-gcep.net).",2014-02-03 +25208583,"Finding trans-regulatory genes and protein complexes modulating meiotic recombination hotspots of human, mouse and yeast.","

Background

The regulatory mechanism of recombination is one of the most fundamental problems in genomics, with wide applications in genome wide association studies (GWAS), birth-defect diseases, molecular evolution, cancer research, etc. Recombination events cluster into short genomic regions called ""recombination hotspots"". Recently, a zinc finger protein PRDM9 was reported to regulate recombination hotspots in human and mouse genomes. In addition, a 13-mer motif contained in the binding sites of PRDM9 is found to be enriched in human hotspots. However, this 13-mer motif only covers a fraction of hotspots, indicating that PRDM9 is not the only regulator of recombination hotspots. Therefore, the challenge of discovering other regulators of recombination hotspots becomes significant. Furthermore, recombination is a complex process. Hence, multiple proteins acting as machinery, rather than individual proteins, are more likely to carry out this process in a precise and stable manner. Therefore, the extension of the prediction of individual trans-regulators to protein complexes is also highly desired.

Results

In this paper, we introduce a pipeline to identify genes and protein complexes associated with recombination hotspots. First, we prioritize proteins associated with hotspots based on their preference of binding to hotspots and coldspots. Second, using the above identified genes as seeds, we apply the Random Walk with Restart algorithm (RWR) to propagate their influences to other proteins in protein-protein interaction (PPI) networks. Hence, many proteins without DNA-binding information will also be assigned a score to implicate their roles in recombination hotspots. Third, we construct sub-PPI networks induced by top genes ranked by RWR for various species (e.g., yeast, human and mouse) and detect protein complexes in those sub-PPI networks.

Conclusions

The GO term analysis show that our prioritizing methods and the RWR algorithm are capable of identifying novel genes associated with recombination hotspots. The trans-regulators predicted by our pipeline are enriched with epigenetic functions (e.g., histone modifications), demonstrating the epigenetic regulatory mechanisms of recombination hotspots. The identified protein complexes also provide us with candidates to further investigate the molecular machineries for recombination hotspots. Moreover, the experimental data and results are available on our web site http://www.ntu.edu.sg/home/zhengjie/data/RecombinationHotspot/NetPipe/.",2014-09-11 +26372664,"Radon Exposure, IL-6 Promoter Variants, and Lung Squamous Cell Carcinoma in Former Uranium Miners.","

Background

High radon exposure is a risk factor for squamous cell carcinoma, a major lung cancer histology observed in former uranium miners. Radon exposure can cause oxidative stress, leading to pulmonary inflammation. Interleukin-6 (IL-6) is a pro-carcinogenic inflammatory cytokine that plays a pivotal role in lung cancer development.

Objectives

We assessed whether single nucleotide polymorphisms (SNPs) in the IL6 promoter are associated with lung cancer in former uranium miners with high occupational exposure to radon gas.

Methods

Genetic associations were assessed in a case-control study of former uranium miners (242 cases and 336 controls). A replication study was performed using data from the Gene Environment Association Studies (GENEVA) Genome Wide Association Study (GWAS) of Lung Cancer and Smoking. Functional relevance of the SNPs was characterized using in vitro approaches.

Results

We found that rs1800797 was associated with squamous cell carcinoma in miners and with a shorter time between the midpoint of the period of substantial exposure and diagnosis among the cases. Furthermore, rs1800797 was also associated with lung cancer among never smokers in the GENEVA dataset. Functional studies identified that the risk allele was associated with increased basal IL-6 mRNA level and greater promoter activity. Furthermore, fibroblasts with the risk allele showed greater induction of IL-6 secretion by hydrogen peroxide or benzo[a]pyrene diolepoxide treatments.

Conclusions

An IL6 promoter variant was associated with lung cancer in uranium miners and never smokers in two external study populations. The associations are strongly supported by the functional relevance that the IL6 promoter SNP affects basal expression and carcinogen-induced IL-6 secretion.

Citation

Leng S, Thomas CL, Snider AM, Picchi MA, Chen W, Willis DG, Carr TG, Krzeminski J, Desai D, Shantu A, Lin Y, Jacobson MR, Belinsky SA. 2016. Radon exposure, IL-6 promoter variants, and lung squamous cell carcinoma in former uranium miners. Environ Health Perspect 124:445-451; http://dx.doi.org/10.1289/ehp.1409437.",2015-09-15 +25187690,MICO: A meta-tool for prediction of the effects of non-synonymous mutations.,"

Unlabelled

The Next Generation Sequencing (NGS) is a state-of-the-art technology that produces high throughput data with high resolution mutation information in the genome. Numerous methods with different efficiencies have been developed to predict mutational effects in the genome. The challenge is to present the results in a balanced manner for better biological insights and interpretation. Hence, we describe a meta-tool named Mutation Information Collector (MICO) for automatically querying and collecting related information from multiple biology/bioinformatics enabled web servers with prediction capabilities. The predicted mutational results for the proteins of interest are returned and presented as an easy-to-read summary table in this service. MICO also allows for navigating the result from each website for further analysis.

Availability

http: //mico.ggc.org /MICO.",2014-07-22 +26217727,Data for a comprehensive map and functional annotation of the human cerebrospinal fluid proteome.,"Knowledge about the normal human cerebrospinal fluid (CSF) proteome serves as a baseline reference for CSF biomarker discovery and provides insight into CSF physiology. In this study, high-pH reverse-phase liquid chromatography (hp-RPLC) was first integrated with a TripleTOF 5600 mass spectrometer to comprehensively profile the normal CSF proteome. A total of 49,836 unique peptides and 3256 non-redundant proteins were identified. To obtain high-confidence results, 2513 proteins with at least 2 unique peptides were further selected as bona fide CSF proteins. Nearly 30% of the identified CSF proteins have not been previously reported in the normal CSF proteome. More than 25% of the CSF proteins were components of CNS cell microenvironments, and network analyses indicated their roles in the pathogenesis of neurological diseases. The top canonical pathway in which the CSF proteins participated was axon guidance signaling. More than one-third of the CSF proteins (788 proteins) were related to neurological diseases, and these proteins constitute potential CSF biomarker candidates. The mapping results can be freely downloaded at http://122.70.220.102:8088/csf/, which can be used to navigate the CSF proteome. For more information about the data, please refer to the related original article [1], which has been recently accepted by Journal of Proteomics.",2015-02-20 +21700675,KEGGtranslator: visualizing and converting the KEGG PATHWAY database to various formats.,"

Summary

The KEGG PATHWAY database provides a widely used service for metabolic and nonmetabolic pathways. It contains manually drawn pathway maps with information about the genes, reactions and relations contained therein. To store these pathways, KEGG uses KGML, a proprietary XML-format. Parsers and translators are needed to process the pathway maps for usage in other applications and algorithms. We have developed KEGGtranslator, an easy-to-use stand-alone application that can visualize and convert KGML formatted XML-files into multiple output formats. Unlike other translators, KEGGtranslator supports a plethora of output formats, is able to augment the information in translated documents (e.g. MIRIAM annotations) beyond the scope of the KGML document, and amends missing components to fragmentary reactions within the pathway to allow simulations on those.

Availability

KEGGtranslator is freely available as a Java(™) Web Start application and for download at http://www.cogsys.cs.uni-tuebingen.de/software/KEGGtranslator/. KGML files can be downloaded from within the application.

Contact

clemens.wrzodek@uni-tuebingen.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-23 +25701568,AIDA: ab initio domain assembly for automated multi-domain protein structure prediction and domain-domain interaction prediction.,"

Motivation

Most proteins consist of multiple domains, independent structural and evolutionary units that are often reshuffled in genomic rearrangements to form new protein architectures. Template-based modeling methods can often detect homologous templates for individual domains, but templates that could be used to model the entire query protein are often not available.

Results

We have developed a fast docking algorithm ab initio domain assembly (AIDA) for assembling multi-domain protein structures, guided by the ab initio folding potential. This approach can be extended to discontinuous domains (i.e. domains with 'inserted' domains). When tested on experimentally solved structures of multi-domain proteins, the relative domain positions were accurately found among top 5000 models in 86% of cases. AIDA server can use domain assignments provided by the user or predict them from the provided sequence. The latter approach is particularly useful for automated protein structure prediction servers. The blind test consisting of 95 CASP10 targets shows that domain boundaries could be successfully determined for 97% of targets.

Availability and implementation

The AIDA package as well as the benchmark sets used here are available for download at http://ffas.burnham.org/AIDA/.

Contact

adam@sanfordburnham.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-19 +25832298,Evidence for human norovirus infection of dogs in the United kingdom.,"Human noroviruses (HuNoVs) are a major cause of viral gastroenteritis, with an estimated 3 million cases per year in the United Kingdom. HuNoVs have recently been isolated from pet dogs in Europe (M. Summa, C.-H. von Bonsdorff, and L. Maunula, J Clin Virol 53:244-247, 2012, http://dx.doi.org/10.1016/j.jcv.2011.12.014), raising concerns about potential zoonotic infections. With 31% of United Kingdom households owning a dog, this could prove to be an important transmission route. To examine this risk, canine tissues were studied for their ability to bind to HuNoV in vitro. In addition, canine stool samples were analyzed for the presence of viral nucleic acid, and canine serum samples were tested for the presence of anti-HuNoV antibodies. The results showed that seven different genotypes of HuNoV virus-like particles (VLPs) can bind to canine gastrointestinal tissue, suggesting that infection is at least theoretically possible. Although HuNoV RNA was not identified in stool samples from 248 dogs, serological evidence of previous exposure to HuNoV was obtained in 43/325 canine serum samples. Remarkably, canine seroprevalence for different HuNoV genotypes mirrored the seroprevalence in the human population. Though entry and replication within cells have not been demonstrated, the canine serological data indicate that dogs produce an immune response to HuNoV, implying productive infection. In conclusion, this study reveals zoonotic implications for HuNoV, and to elucidate the significance of this finding, further epidemiological and molecular investigations will be essential.",2015-04-01 +25630377,Deterministic identification of specific individuals from GWAS results.,"

Motivation

Genome-wide association studies (GWASs) are commonly applied on human genomic data to understand the causal gene combinations statistically connected to certain diseases. Patients involved in these GWASs could be re-identified when the studies release statistical information on a large number of single-nucleotide polymorphisms. Subsequent work, however, found that such privacy attacks are theoretically possible but unsuccessful and unconvincing in real settings.

Results

We derive the first practical privacy attack that can successfully identify specific individuals from limited published associations from the Wellcome Trust Case Control Consortium (WTCCC) dataset. For GWAS results computed over 25 randomly selected loci, our algorithm always pinpoints at least one patient from the WTCCC dataset. Moreover, the number of re-identified patients grows rapidly with the number of published genotypes. Finally, we discuss prevention methods to disable the attack, thus providing a solution for enhancing patient privacy.

Availability and implementation

Proofs of the theorems and additional experimental results are available in the support online documents. The attack algorithm codes are publicly available at https://sites.google.com/site/zhangzhenjie/GWAS_attack.zip. The genomic dataset used in the experiments is available at http://www.wtccc.org.uk/ on request.",2015-01-27 +22914218,Qualimap: evaluating next-generation sequencing alignment data.,"

Motivation

The sequence alignment/map (SAM) and the binary alignment/map (BAM) formats have become the standard method of representation of nucleotide sequence alignments for next-generation sequencing data. SAM/BAM files usually contain information from tens to hundreds of millions of reads. Often, the sequencing technology, protocol and/or the selected mapping algorithm introduce some unwanted biases in these data. The systematic detection of such biases is a non-trivial task that is crucial to drive appropriate downstream analyses.

Results

We have developed Qualimap, a Java application that supports user-friendly quality control of mapping data, by considering sequence features and their genomic properties. Qualimap takes sequence alignment data and provides graphical and statistical analyses for the evaluation of data. Such quality-control data are vital for highlighting problems in the sequencing and/or mapping processes, which must be addressed prior to further analyses.

Availability

Qualimap is freely available from http://www.qualimap.org.",2012-08-22 +25715848,miRNA-dis: microRNA precursor identification based on distance structure status pairs.,"MicroRNA precursor identification is an important task in bioinformatics. Support Vector Machine (SVM) is one of the most effective machine learning methods used in this field. The performance of SVM-based methods depends on the vector representations of RNAs. However, the discriminative power of the existing feature vectors is limited, and many methods lack an interpretable model for analysis of characteristic sequence features. Prior studies have demonstrated that sequence or structure order effects were relevant for discrimination, but little work has explored how to use this kind of information for human pre-microRNA identification. In this study, in order to incorporate the structure-order information into the prediction, a method called ""miRNA-dis"" was proposed, in which the feature vector was constructed by the occurrence frequency of the ""distance structure status pair"" or just the ""distance-pair"". Rigorous cross-validations on a much larger and more stringent newly constructed benchmark dataset showed that the miRNA-dis outperformed some state-of-the-art predictors in this area. Remarkably, miRNA-dis trained with human data can correctly predict 87.02% of the 4022 pre-miRNAs from 11 different species ranging from animals, plants and viruses. miRNA-dis would be a useful high throughput tool for large-scale analysis of microRNA precursors. In addition, the learnt model can be easily analyzed in terms of discriminative features, and some interesting patterns were discovered, which could reflect the characteristics of microRNAs. A user-friendly web-server of miRNA-dis was constructed, which is freely accessible to the public at the web-site on http://bioinformatics.hitsz.edu.cn/miRNA-dis/.",2015-04-01 +24408034,Genomic and phenotypic characterization of a wild medaka population: towards the establishment of an isogenic population genetic resource in fish.,"Oryzias latipes (medaka) has been established as a vertebrate genetic model for more than a century and recently has been rediscovered outside its native Japan. The power of new sequencing methods now makes it possible to reinvigorate medaka genetics, in particular by establishing a near-isogenic panel derived from a single wild population. Here we characterize the genomes of wild medaka catches obtained from a single Southern Japanese population in Kiyosu as a precursor for the establishment of a near-isogenic panel of wild lines. The population is free of significant detrimental population structure and has advantageous linkage disequilibrium properties suitable for the establishment of the proposed panel. Analysis of morphometric traits in five representative inbred strains suggests phenotypic mapping will be feasible in the panel. In addition, high-throughput genome sequencing of these medaka strains confirms their evolutionary relationships on lines of geographic separation and provides further evidence that there has been little significant interbreeding between the Southern and Northern medaka population since the Southern/Northern population split. The sequence data suggest that the Southern Japanese medaka existed as a larger older population that went through a relatively recent bottleneck approximately 10,000 years ago. In addition, we detect patterns of recent positive selection in the Southern population. These data indicate that the genetic structure of the Kiyosu medaka samples is suitable for the establishment of a vertebrate near-isogenic panel and therefore inbreeding of 200 lines based on this population has commenced. Progress of this project can be tracked at http://www.ebi.ac.uk/birney-srv/medaka-ref-panel.",2014-03-20 +24762090,Follow-up of breast papillary lesion on core needle biopsy: experience in African-American population.,"

Background

The optimal course of clinical follow-up after a diagnosis of breast papillary lesion on a core needle biopsy (CNB) remains elusive. In particular, no reports in literature have addressed this question in African-American population. We describe our experience with breast papillary lesions in a primarily African-American population.

Methods

A search of our database for breast papillary lesions diagnosed on CNB between September 2002 and September 2012 was conducted. Cases were categorized into benign, atypical, and malignant. CK5/6 and CK903 stains were performed when necessary.

Results

A total of 64 breast papillary lesions were diagnosed on CNB, including 55 (86%) benign papillary lesions, 6 (9%) atypical lesions, and 3 (5%) intraductal papillary carcinomas. Of these 64 patients, 29 patients (25 African-Americans, 3 Hispanics, 1 Asian American) underwent lumpectomy within 6 months after CNB. Pathology of the lumpectomy showed: five of the 25 (20%) benign papillary lesions on needle biopsy were upgraded to intraductal or invasive papillary carcinoma; 2 of the 3 atypical papillary lesion cases on core biopsy were upgraded (67%), one into intraductal papillary carcinoma, the other invasive papillary carcinoma; the only case of malignant papillary lesion on CNB remained as intraductal papillary carcinoma on lumpectomy. The rate of upgrade in lumpectomy/mastectomy was 25%. CK5/6 and CK903 immunostains were performed on all seven core needle biopsies that were later upgraded.

Conclusions

In our predominantly African-American urban population, 25% of benign or atypical papillary lesions diagnosed on CNB was upgraded in the final excisional examination. Early excision of all papillary lesions diagnosed on CNB may be justified in this patient population.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/7950117821177201.",2014-04-24 +26484070,A definitive haplotype map of structural variations determined by microarray analysis of duplicated haploid genomes.,"Complete hydatidiform moles (CHMs) are tissues carrying duplicated haploid genomes derived from single sperms, and detecting copy number variations (CNVs) in CHMs is assumed to be sensitive and straightforward methods. We genotyped 108 CHM genomes using Affymetrix SNP 6.0 (GEO#: GSE18642) and Illumina 1 M-duo (GEO#: GSE54948). After quality control, we obtained 84 definitive haplotype consisting of 1.7 million SNPs and 2339 CNV regions. The results are presented in the database of our web site (http://orca.gen.kyushu-u.ac.jp/cgi-bin/gbrowse/humanBuild37D4_1/).",2014-04-24 +22276777,miRdSNP: a database of disease-associated SNPs and microRNA target sites on 3'UTRs of human genes.,"

Background

Single nucleotide polymorphisms (SNPs) can lead to the susceptibility and onset of diseases through their effects on gene expression at the posttranscriptional level. Recent findings indicate that SNPs could create, destroy, or modify the efficiency of miRNA binding to the 3'UTR of a gene, resulting in gene dysregulation. With the rapidly growing number of published disease-associated SNPs (dSNPs), there is a strong need for resources specifically recording dSNPs on the 3'UTRs and their nucleotide distance from miRNA target sites. We present here miRdSNP, a database incorporating three important areas of dSNPs, miRNA target sites, and diseases.

Description

miRdSNP provides a unique database of dSNPs on the 3'UTRs of human genes manually curated from PubMed. The current release includes 786 dSNP-disease associations for 630 unique dSNPs and 204 disease types. miRdSNP annotates genes with experimentally confirmed targeting by miRNAs and indexes miRNA target sites predicted by TargetScan and PicTar as well as potential miRNA target sites newly generated by dSNPs. A robust web interface and search tools are provided for studying the proximity of miRNA binding sites to dSNPs in relation to human diseases. Searches can be dynamically filtered by gene name, miRBase ID, target prediction algorithm, disease, and any nucleotide distance between dSNPs and miRNA target sites. Results can be viewed at the sequence level showing the annotated locations for miRNA target sites and dSNPs on the entire 3'UTR sequences. The integration of dSNPs with the UCSC Genome browser is also supported.

Conclusion

miRdSNP provides a comprehensive data source of dSNPs and robust tools for exploring their distance from miRNA target sites on the 3'UTRs of human genes. miRdSNP enables researchers to further explore the molecular mechanism of gene dysregulation for dSNPs at posttranscriptional level. miRdSNP is freely available on the web at http://mirdsnp.ccr.buffalo.edu.",2012-01-25 +24499498,Red blood cell distribution width is not correlated with preeclampsia among pregnant Sudanese women.,"

Background

Preeclampsia is a leading cause of maternal and perinatal mortality worldwide. The exact etiology of preeclampsia is unknown, but the inflammatory process is postulated as one of the etiologies. Red blood cell distribution width (RDW) is a measure of anisocytosis (variation of red cell size) and is associated with hypertension and diabetic ketoacidosis. There are few data on the association between RDW and preeclampsia. This study aimed to investigate the association between RDW and preeclampsia.

Methods

A case-control study was conducted at Khartoum Hospital, Sudan, during June to August 2012. Cases were women with preeclampsia and healthy women were controls. Sociodemographic characteristics, obstetrics, and clinical data were recorded. The complete blood count, including RDW, was measured using an automated hematology analyzer.

Results

The cases and controls (65 women in each arm) were matched in their basic characteristics. There was no difference in the mean (SD) RDW between women with preeclampsia and controls (14.5 ± 1.8% vs. 14.4 ± 1.4%, P = 0.710). There was also no difference in the mean RDW between women with mild and severe preeclampsia (14.7 ± 1.9% vs. 13.9 ± 1.4%, P = 0.144. In logistic regression, there was no association between RDW and preeclampsia (OR = 0.9, CI = 0.7-1.1, P = 0.952).

Conclusions

RDW levels are not associated with the presence or severity of preeclampsia.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1206247718115175.",2014-02-05 +23892401,Utilizing sequence intrinsic composition to classify protein-coding and long non-coding transcripts.,"It is a challenge to classify protein-coding or non-coding transcripts, especially those re-constructed from high-throughput sequencing data of poorly annotated species. This study developed and evaluated a powerful signature tool, Coding-Non-Coding Index (CNCI), by profiling adjoining nucleotide triplets to effectively distinguish protein-coding and non-coding sequences independent of known annotations. CNCI is effective for classifying incomplete transcripts and sense-antisense pairs. The implementation of CNCI offered highly accurate classification of transcripts assembled from whole-transcriptome sequencing data in a cross-species manner, that demonstrated gene evolutionary divergence between vertebrates, and invertebrates, or between plants, and provided a long non-coding RNA catalog of orangutan. CNCI software is available at http://www.bioinfo.org/software/cnci.",2013-07-27 +24966527,Diaretinopathy database -A Gene database for diabetic retinopathy.,"

Unlabelled

Diabetic retinopathy, is a microvascular complication of diabetes mellitus and is a major cause of adult blindness. Despite advances in diagnosis and treatment the pathogenesis of diabetic retinopathy is not well understood. Results from epidemiological studies of diabetic patients suggest that there are familial predispositions to diabetes and to diabetic retinopathy. Therefore the main purpose of this database is to help both scientists and doctors in studying the candidate genes responsible for causing diabetic retinopathy. For each candidate gene official symbol, chromosome map, number of exons, GT-AG introns, motif, polymorphic variation and 3D structure are given respectively. In addition to molecular class and function of these genes, this database also provides links to download the corresponding nucleotide and amino acid sequences in FASTA format which may be further used for computational approaches. Therefore this database will increase the understanding of the genetics underlying the development or progression of diabetic retinopathy and will have an impact on future diagnostic, prevention and intervention strategies.

Availability

The database is freely available at http: diaretinopathydatabase.com.",2014-04-23 +21554380,"spyder, a new method for in silico design and assessment of 16S rRNA gene primers for molecular microbial ecology.","Molecular microbial ecology studies are heavily reliant on 'Universal' 16S rRNA gene primers for elucidating microbial community structure and composition, and yet primer design and optimization is often overlooked. Primers that exhibit minor biases due to primer-template mismatches can substantially alter the pool of amplicons from a community DNA sample, resulting in inaccurate conclusions. As a result, it is important that primers are critically evaluated against the most comprehensive data sets available before commencing molecular microbial community studies. We present a user-friendly, multi-platform (e.g. Windows, Linux, Mac) method named spyder for the in silico design and assessment of 16S rRNA gene primers. The method utilizes the Ribosomal Database Project's Probe Match feature coupled with a compact program (available at http://people.uleth.ca/~selibl/Spyder/Spyder.html) that aligns and identifies mismatches between primers and templates. To demonstrate the value of spyder, we assessed commonly used 'Universal' and phyla-specific primers and identified primer modifications that improved the coverage of target organisms by 5-42% as well as removed excessive degeneracies.",2011-05-25 +24651453,HD chromoendoscopy coupled with DNA mass spectrometry profiling identifies somatic mutations in microdissected human proximal aberrant crypt foci.,"

Unlabelled

Despite increased implementation of screening colonoscopy, interval cancers in the proximal colon remain a major public health concern. This fact underscores the limitations of current screening paradigms and the need for developing advanced endoscopic techniques. The density of aberrant crypt foci (ACF), the earliest identifiable mucosal abnormality, may serve as a surrogate marker for colon cancer risk, but has rarely been studied in the proximal colon. To this end, high-definition (HD) chromoendoscopy was conducted to define the relevance of ACF in the proximal colon. In addition, due to limited ACF size, the development of a combinatorial approach was required to maximize data acquisition obtained from individual biopsy samples. Proximal and distal ACF samples were characterized for a total of 105 mutations across 22 known tumor suppressor and proto-oncogenes using high-throughput Sequenom MassARRAY analysis. From this profiling, a discrete number of somatic mutations were identified, including APC(R876*) and FLT3(I836M), as well as a deletion within the EGFR gene. Combined, these data highlight the significance of ACF within the context of colon cancer pathogenesis, particularly in the proximal colon.

Implications

The identification of cancer-related mutations in commonly overlooked mucosal lesions underscores the preventive benefit of implementing advanced endoscopic screening to larger patient populations, particularly in the proximal colon. Visual Overview: http://mcr.aacrjournals.org/content/early/2014/05/22/1541-7786.MCR-13-0624/F1.large.jpg. Mol Cancer Res; 12(6); 823-9. ©2014 AACR.",2014-03-20 +24886930,"The HIVToolbox 2 web system integrates sequence, structure, function and mutation analysis.","There is enormous interest in studying HIV pathogenesis for improving the treatment of patients with HIV infection. HIV infection has become one of the best-studied systems for understanding how a virus can hijack a cell. To help facilitate discovery, we previously built HIVToolbox, a web system for visual data mining. The original HIVToolbox integrated information for HIV protein sequence, structure, functional sites, and sequence conservation. This web system has been used for almost 40,000 searches. We report improvements to HIVToolbox including new functions and workflows, data updates, and updates for ease of use. HIVToolbox2, is an improvement over HIVToolbox with new functions. HIVToolbox2 has new functionalities focused on HIV pathogenesis including drug-binding sites, drug-resistance mutations, and immune epitopes. The integrated, interactive view enables visual mining to generate hypotheses that are not readily revealed by other approaches. Most HIV proteins form multimers, and there are posttranslational modification and protein-protein interaction sites at many of these multimerization interfaces. Analysis of protease drug binding sites reveals an anatomy of drug resistance with different types of drug-resistance mutations regionally localized on the surface of protease. Some of these drug-resistance mutations have a high prevalence in specific HIV-1 M subtypes. Finally, consolidation of Tat functional sites reveals a hotspot region where there appear to be 30 interactions or posttranslational modifications. A cursory analysis with HIVToolbox2 has helped to identify several global patterns for HIV proteins. An initial analysis with this tool identifies homomultimerization of almost all HIV proteins, functional sites that overlap with multimerization sites, a global drug resistance anatomy for HIV protease, and specific distributions of some DRMs in specific HIV M subtypes. HIVToolbox2 is an open-access web application available at [http://hivtoolbox2.bio-toolkit.com].",2014-06-02 +26528712,"Ambient PM2.5, O₃, and NO₂ Exposures and Associations with Mortality over 16 Years of Follow-Up in the Canadian Census Health and Environment Cohort (CanCHEC).","

Background

Few studies examining the associations between long-term exposure to ambient air pollution and mortality have considered multiple pollutants when assessing changes in exposure due to residential mobility during follow-up.

Objective

We investigated associations between cause-specific mortality and ambient concentrations of fine particulate matter (≤ 2.5 μm; PM2.5), ozone (O3), and nitrogen dioxide (NO2) in a national cohort of about 2.5 million Canadians.

Methods

We assigned estimates of annual concentrations of these pollutants to the residential postal codes of subjects for each year during 16 years of follow-up. Historical tax data allowed us to track subjects' residential postal code annually. We estimated hazard ratios (HRs) for each pollutant separately and adjusted for the other pollutants. We also estimated the product of the three HRs as a measure of the cumulative association with mortality for several causes of death for an increment of the mean minus the 5th percentile of each pollutant: 5.0 μg/m3 for PM2.5, 9.5 ppb for O3, and 8.1 ppb for NO2.

Results

PM2.5, O3, and NO2 were associated with nonaccidental and cause-specific mortality in single-pollutant models. Exposure to PM2.5 alone was not sufficient to fully characterize the toxicity of the atmospheric mix or to fully explain the risk of mortality associated with exposure to ambient pollution. Assuming additive associations, the estimated HR for nonaccidental mortality corresponding to a change in exposure from the mean to the 5th percentile for all three pollutants together was 1.075 (95% CI: 1.067, 1.084). Accounting for residential mobility had only a limited impact on the association between mortality and PM2.5 and O3, but increased associations with NO2.

Conclusions

In this large, national-level cohort, we found positive associations between several common causes of death and exposure to PM2.5, O3, and NO2.

Citation

Crouse DL, Peters PA, Hystad P, Brook JR, van Donkelaar A, Martin RV, Villeneuve PJ, Jerrett M, Goldberg MS, Pope CA III, Brauer M, Brook RD, Robichaud A, Menard R, Burnett RT. 2015. Ambient PM2.5, O3, and NO2 exposures and associations with mortality over 16 years of follow-up in the Canadian Census Health and Environment Cohort (CanCHEC). Environ Health Perspect 123:1180-1186; http://dx.doi.org/10.1289/ehp.1409276.",2015-11-01 +26261718,"Micropublications: a semantic model for claims, evidence, arguments and annotations in biomedical communications.","

Background

Scientific publications are documentary representations of defeasible arguments, supported by data and repeatable methods. They are the essential mediating artifacts in the ecosystem of scientific communications. The institutional ""goal"" of science is publishing results. The linear document publication format, dating from 1665, has survived transition to the Web. Intractable publication volumes; the difficulty of verifying evidence; and observed problems in evidence and citation chains suggest a need for a web-friendly and machine-tractable model of scientific publications. This model should support: digital summarization, evidence examination, challenge, verification and remix, and incremental adoption. Such a model must be capable of expressing a broad spectrum of representational complexity, ranging from minimal to maximal forms.

Results

The micropublications semantic model of scientific argument and evidence provides these features. Micropublications support natural language statements; data; methods and materials specifications; discussion and commentary; challenge and disagreement; as well as allowing many kinds of statement formalization. The minimal form of a micropublication is a statement with its attribution. The maximal form is a statement with its complete supporting argument, consisting of all relevant evidence, interpretations, discussion and challenges brought forward in support of or opposition to it. Micropublications may be formalized and serialized in multiple ways, including in RDF. They may be added to publications as stand-off metadata. An OWL 2 vocabulary for micropublications is available at http://purl.org/mp. A discussion of this vocabulary along with RDF examples from the case studies, appears as OWL Vocabulary and RDF Examples in Additional file 1.

Conclusion

Micropublications, because they model evidence and allow qualified, nuanced assertions, can play essential roles in the scientific communications ecosystem in places where simpler, formalized and purely statement-based models, such as the nanopublications model, will not be sufficient. At the same time they will add significant value to, and are intentionally compatible with, statement-based formalizations. We suggest that micropublications, generated by useful software tools supporting such activities as writing, editing, reviewing, and discussion, will be of great value in improving the quality and tractability of biomedical communications.",2014-07-04 +23761450,VLDP web server: a powerful geometric tool for analysing protein structures in their environment.,"Protein structures are an ensemble of atoms determined experimentally mostly by X-ray crystallography or Nuclear Magnetic Resonance. Studying 3D protein structures is a key point for better understanding protein function at a molecular level. We propose a set of accurate tools, for analysing protein structures, based on the reliable method of Voronoi-Laguerre tessellations. The Voronoi Laguerre Delaunay Protein web server (VLDPws) computes the Laguerre tessellation on a whole given system first embedded in solvent. Through this fine description, VLDPws gives the following data: (i) Amino acid volumes evaluated with high precision, as confirmed by good correlations with experimental data. (ii) A novel definition of inter-residue contacts within the given protein. (iii) A measure of the residue exposure to solvent that significantly improves the standard notion of accessibility in some cases. At present, no equivalent web server is available. VLDPws provides output in two complementary forms: direct visualization of the Laguerre tessellation, mostly its polygonal molecular surfaces; files of volumes; and areas, contacts and similar data for each residue and each atom. These files are available for download for further analysis. VLDPws can be accessed at http://www.dsimb.inserm.fr/dsimb_tools/vldp.",2013-06-12 +24433564,XS: a FASTQ read simulator.,"

Background

The emerging next-generation sequencing (NGS) is bringing, besides the natural huge amounts of data, an avalanche of new specialized tools (for analysis, compression, alignment, among others) and large public and private network infrastructures. Therefore, a direct necessity of specific simulation tools for testing and benchmarking is rising, such as a flexible and portable FASTQ read simulator, without the need of a reference sequence, yet correctly prepared for producing approximately the same characteristics as real data.

Findings

We present XS, a skilled FASTQ read simulation tool, flexible, portable (does not need a reference sequence) and tunable in terms of sequence complexity. It has several running modes, depending on the time and memory available, and is aimed at testing computing infrastructures, namely cloud computing of large-scale projects, and testing FASTQ compression algorithms. Moreover, XS offers the possibility of simulating the three main FASTQ components individually (headers, DNA sequences and quality-scores).

Conclusions

XS provides an efficient and convenient method for fast simulation of FASTQ files, such as those from Ion Torrent (currently uncovered by other simulators), Roche-454, Illumina and ABI-SOLiD sequencing machines. This tool is publicly available at http://bioinformatics.ua.pt/software/xs/.",2014-01-16 +23044549,MGAviewer: a desktop visualization tool for analysis of metagenomics alignment data.,"

Summary

Numerous metagenomics projects have produced tremendous amounts of sequencing data. Aligning these sequences to reference genomes is an essential analysis in metagenomics studies. Large-scale alignment data call for intuitive and efficient visualization tool. However, current tools such as various genome browsers are highly specialized to handle intraspecies mapping results. They are not suitable for alignment data in metagenomics, which are often interspecies alignments. We have developed a web browser-based desktop application for interactively visualizing alignment data of metagenomic sequences. This viewer is easy to use on all computer systems with modern web browsers and requires no software installation.

Availability

http://weizhongli-lab.org/mgaviewer",2012-10-08 +24814382,Role of a prudent breakfast in improving cardiometabolic risk factors in subjects with hypercholesterolemia: a randomized controlled trial.,"

Background & aims

It is unclear whether advising a prudent breakfast alone is sufficient to improve blood lipids and cardiometabolic risk factors in overweight hypercholesterolemic subjects. The aim of this study was to investigate whether a prudent low-fat breakfast (PB) rich in dietary fiber lowers low-density lipoprotein cholesterol (LDL-C) and other cardiometabolic risk factors in subjects with elevated LDL-cholesterol levels.

Methods

In a parallel, controlled, 12-week study, 79 healthy overweight subjects (all regular breakfast eaters) were randomly allocated to a group that received a PB based on Nordic foods provided ad libitum or a control group that consumed their usual breakfast. The primary outcome was plasma LDL-C. Secondary outcomes were other blood lipids, body weight, sagittal abdominal diameter (SAD), glucose tolerance, insulin sensitivity and inflammation markers (C-reactive protein [CRP] and tumor necrosis factor receptor-2 [TNF-R2]), and blood pressure. The PB was in accordance with national and Nordic nutrition recommendations and included oat bran porridge with low-fat milk or yogurt, bilberry or lingonberry jam, whole grain bread, low-fat spread, poultry or fatty fish, and fruit.

Results

No differences were found in LDL-C, other blood lipids, body weight, or glucose metabolism, but SAD, plasma CRP, and TNF-R2 decreased more during PB compared with controls (p < 0.05). In the overall diet, PB increased dietary fiber and β-glucan compared with controls (p < 0.05).

Conclusions

Advising a prudent breakfast for 3 months did not influence blood lipids, body weight, or glucose metabolism but reduced markers of visceral fat and inflammation. The trial was registered in the Current Controlled Trials database (http://www.controlled-trials.com); International Standard Randomized Controlled Trial Number (ISRCTN): 84550872.",2014-04-21 +25943348,"Complement activation, placental malaria infection, and birth weight in areas characterized by unstable malaria transmission in central Sudan.","

Background

The pathogenesis of malaria during pregnancy is not completely understood. There are few published data on complement activation and malaria during pregnancy. This study aimed to investigate complement activation and malaria during pregnancy, and their association with hemoglobin and birth weight.

Methods

A cross-sectional study was conducted at Medani, Sudan. Soluble terminal complement complex (TCC) levels were measured using ELISA in maternal and cord blood samples from 126 parturient women.

Results

There were no Plasmodium falciparum-positive blood films from maternal peripheral blood, the placenta, or cord blood samples. Three (2.4%) and 22 (17.5%) of the placentas showed chronic and previous infection with histopathological examination, respectively, while 101 (80.2%) of them had no malaria infection. The mean [SD] of the maternal (22.4 [6.1] vs. 26.5 [3.5] ng/ml, P < 0.001) and cord blood (24.5 [4.5] vs. 26.8 [4.4] ng/ml, P = 0.024) TCC levels were significantly lower in cases of placental malaria infection (n = 25) than in those without placental malaria infection (n = 101). Linear regression showed that placental malaria infection was significantly associated with birth weight (-0.353 g, P = 0.013), but there were no associations between maternal and cord TCC levels and maternal hemoglobin, or between TCC levels and birth weight.

Conclusion

Maternal and cord blood TCC levels are lower in women with placental malaria infection than in those without placental malaria infection.

Virtual slide

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/9600054761463915.",2015-05-06 +25937879,Evaluating the Emotion Ontology through use in the self-reporting of emotional responses at an academic conference.,"

Background

We evaluate the application of the Emotion Ontology (EM) to the task of self-reporting of emotional experience in the context of audience response to academic presentations at the International Conference on Biomedical Ontology (ICBO). Ontology evaluation is regarded as a difficult task. Types of ontology evaluation range from gauging adherence to some philosophical principles, following some engineering method, to assessing fitness for purpose. The Emotion Ontology (EM) represents emotions and all related affective phenomena, and should enable self-reporting or articulation of emotional states and responses; how do we know if this is the case? Here we use the EM 'in the wild' in order to evaluate the EM's ability to capture people's self-reported emotional responses to a situation through use of the vocabulary provided by the EM.

Results

To achieve this evaluation we developed a tool, EmOntoTag, in which audience members were able to capture their self-reported emotional responses to scientific presentations using the vocabulary offered by the EM. We furthermore asked participants using the tool to rate the appropriateness of an EM vocabulary term for capturing their self-assessed emotional response. Participants were also able to suggest improvements to the EM using a free-text feedback facility. Here, we present the data captured and analyse the EM's fitness for purpose in reporting emotional responses to conference talks.

Conclusions

Based on our analysis of this data set, our primary finding is that the audience are able to articulate their emotional response to a talk via the EM, and reporting via the EM ontology is able to draw distinctions between the audience's response to a speaker and between the speakers (or talks) themselves. Thus we can conclude that the vocabulary provided at the leaves of the EM are fit for purpose in this setting. We additionally obtained interesting observations from the experiment as a whole, such as that the majority of emotions captured had positive valence, and the free-form feedback supplied new terms for the EM.

Availability

EmOntoTag can be seen at http://www.bioontology.ch/emontotag; source code can be downloaded from http://emotion-ontology.googlecode.com/svn/trunk/apps/emontotag/and the ontology is available at http://purl.obolibrary.org/obo/MFOEM.owl.",2014-09-03 +21901122,Multicoil2: predicting coiled coils and their oligomerization states from sequence in the twilight zone.,"The alpha-helical coiled coil can adopt a variety of topologies, among the most common of which are parallel and antiparallel dimers and trimers. We present Multicoil2, an algorithm that predicts both the location and oligomerization state (two versus three helices) of coiled coils in protein sequences. Multicoil2 combines the pairwise correlations of the previous Multicoil method with the flexibility of Hidden Markov Models (HMMs) in a Markov Random Field (MRF). The resulting algorithm integrates sequence features, including pairwise interactions, through multinomial logistic regression to devise an optimized scoring function for distinguishing dimer, trimer and non-coiled-coil oligomerization states; this scoring function is used to produce Markov Random Field potentials that incorporate pairwise correlations localized in sequence. Multicoil2 significantly improves both coiled-coil detection and dimer versus trimer state prediction over the original Multicoil algorithm retrained on a newly-constructed database of coiled-coil sequences. The new database, comprised of 2,105 sequences containing 124,088 residues, includes reliable structural annotations based on experimental data in the literature. Notably, the enhanced performance of Multicoil2 is evident when tested in stringent leave-family-out cross-validation on the new database, reflecting expected performance on challenging new prediction targets that have minimal sequence similarity to known coiled-coil families. The Multicoil2 program and training database are available for download from http://multicoil2.csail.mit.edu.",2011-08-25 +25316677,SNPsnap: a Web-based tool for identification and annotation of matched SNPs.,"

Summary

An important computational step following genome-wide association studies (GWAS) is to assess whether disease or trait-associated single-nucleotide polymorphisms (SNPs) enrich for particular biological annotations. SNP-based enrichment analysis needs to account for biases such as co-localization of GWAS signals to gene-dense and high linkage disequilibrium (LD) regions, and correlations of gene size, location and function. The SNPsnap Web server enables SNP-based enrichment analysis by providing matched sets of SNPs that can be used to calibrate background expectations. Specifically, SNPsnap efficiently identifies sets of randomly drawn SNPs that are matched to a set of query SNPs based on allele frequency, number of SNPs in LD, distance to nearest gene and gene density.

Availability and implementation

SNPsnap server is available at http://www.broadinstitute.org/mpg/snpsnap/.

Contact

joelh@broadinstitute.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-13 +25482723,"Classifying bio-concentration factor with random forest algorithm, influence of the bio-accumulative vs. non-bio-accumulative compound ratio to modelling result, and applicability domain for random forest model.","In environmental risk assessment, the bio-concentration factor (BCF) is a widely used parameter in the estimation of the bio-accumulation potential of chemicals. BCF data often have an uneven distribution of classes (bio-accumulative vs. non-bio-accumulative), which could severely bias the classification results towards the prevailing class. The present study focuses on the influence of uneven distribution of the classes in training phase of Random Forest (RF) classification models. Three different training set designs were used and descriptors selected to the models based on the occurrence frequency in RF trees and considering the mechanistic aspects they reflect. Models were compared and their classification performance was analysed, indicating good predictive characteristics (sensitivity = 0.90 and specificity = 0.83) for the balanced set; also imbalanced sets have their strengths in certain application scenarios. The confidence of classifications was assessed with a new schema for the applicability domain that makes use of the RF proximity matrix by analysing the similarity between the predicted compound and the training set of the model. All developed models were made available in the transparent, accessible and reproducible way in QsarDB repository (http://dx.doi.org/10.15152/QDB.116).",2014-12-06 +24362839,Interaction-based discovery of functionally important genes in cancers.,"A major challenge in cancer genomics is uncovering genes with an active role in tumorigenesis from a potentially large pool of mutated genes across patient samples. Here we focus on the interactions that proteins make with nucleic acids, small molecules, ions and peptides, and show that residues within proteins that are involved in these interactions are more frequently affected by mutations observed in large-scale cancer genomic data than are other residues. We leverage this observation to predict genes that play a functionally important role in cancers by introducing a computational pipeline (http://canbind.princeton.edu) for mapping large-scale cancer exome data across patients onto protein structures, and automatically extracting proteins with an enriched number of mutations affecting their nucleic acid, small molecule, ion or peptide binding sites. Using this computational approach, we show that many previously known genes implicated in cancers are enriched in mutations within the binding sites of their encoded proteins. By focusing on functionally relevant portions of proteins--specifically those known to be involved in molecular interactions--our approach is particularly well suited to detect infrequent mutations that may nonetheless be important in cancer, and should aid in expanding our functional understanding of the genomic landscape of cancer.",2013-12-19 +25548139,iScreen: Image-Based High-Content RNAi Screening Analysis Tools.,"High-throughput RNA interference (RNAi) screening has opened up a path to investigating functional genomics in a genome-wide pattern. However, such studies are often restricted to assays that have a single readout format. Recently, advanced image technologies have been coupled with high-throughput RNAi screening to develop high-content screening, in which one or more cell image(s), instead of a single readout, were generated from each well. This image-based high-content screening technology has led to genome-wide functional annotation in a wider spectrum of biological research studies, as well as in drug and target discovery, so that complex cellular phenotypes can be measured in a multiparametric format. Despite these advances, data analysis and visualization tools are still largely lacking for these types of experiments. Therefore, we developed iScreen (image-Based High-content RNAi Screening Analysis Tool), an R package for the statistical modeling and visualization of image-based high-content RNAi screening. Two case studies were used to demonstrate the capability and efficiency of the iScreen package. iScreen is available for download on CRAN (http://cran.cnr.berkeley.edu/web/packages/iScreen/index.html). The user manual is also available as a supplementary document.",2014-12-29 +26259808,Control of Hypertension In Pregnancy Study randomised controlled trial-are the results dependent on the choice of labetalol or methyldopa?,"

Objective

To determine whether the difference in outcomes between 'less tight' (target diastolic blood pressure [dBP] of 100 mmHg) versus 'tight' control (target dBP of 85 mmHg) in the CHIPS Trial (ISRCTN 71416914, http://pre-empt.cfri.ca/;CHIPS) depended on the choice of labetalol or methyldopa, the two most commonly used antihypertensive agents in CHIPS.

Design

Secondary analysis of CHIPS Trial data.

Setting

International multicentre randomised controlled trial (94 sites, 15 countries).

Population or sample

A total of 987 women with non-severe non-proteinuric pregnancy hypertension.

Methods

Logistic regression was used for comparisons of 'less tight' versus 'tight' control among women treated with labetalol (but not methydopa) versus methyldopa (but not labetalol). Analyses were adjusted for the influence of baseline factors, including use of any antihypertensive therapy at randomisation.

Main outcome measures

Main CHIPS Trial outcomes: primary (perinatal loss or high-level neonatal care for > 48 hours), secondary (serious maternal complications), birthweight < 10th centile, severe maternal hypertension, pre-eclampsia, and delivery at < 34 or < 37 weeks.

Results

Of 987 women in CHIPS, antihypertensive therapy was taken by 566 women at randomisation (labetalol 111 ['less tight'] versus 127 ['tight'] or methyldopa 126 ['less tight'] versus 117 ['tight']) and 815 women after randomisation (labetalol 186 ['less tight'] versus 247 ['tight'] and methyldopa by 98 ['less tight'] versus 126 ['tight']). Following adjustment, odds ratios for outcomes in 'less tight' versus 'tight' control were similar between antihypertensive groups according to 'at randomisation' and 'after randomisation' therapy.

Conclusion

Outcomes for 'less tight' versus 'tight' control were not dependent on use of methyldopa or labetalol.

Tweetable abstract

In the CHIPS Trial, maternal and infant outcomes were not dependent on use of labetalol or methyldopa.",2015-08-11 +23772050,Joint analysis of expression profiles from multiple cancers improves the identification of microRNA-gene interactions.,"

Motivation

MicroRNAs (miRNAs) play a crucial role in tumorigenesis and development through their effects on target genes. The characterization of miRNA-gene interactions will lead to a better understanding of cancer mechanisms. Many computational methods have been developed to infer miRNA targets with/without expression data. Because expression datasets are in general limited in size, most existing methods concatenate datasets from multiple studies to form one aggregated dataset to increase sample size and power. However, such simple aggregation analysis results in identifying miRNA-gene interactions that are mostly common across datasets, whereas specific interactions may be missed by these methods. Recent releases of The Cancer Genome Atlas data provide paired expression profiling of miRNAs and genes in multiple tumors with sufficiently large sample size. To study both common and cancer-specific interactions, it is desirable to develop a method that can jointly analyze multiple cancers to study miRNA-gene interactions without combining all the data into one single dataset.

Results

We developed a novel statistical method to jointly analyze expression profiles from multiple cancers to identify miRNA-gene interactions that are both common across cancers and specific to certain cancers. The benefit of this joint analysis approach is demonstrated by both simulation studies and real data analysis of The Cancer Genome Atlas datasets. Compared with simple aggregate analysis or single sample analysis, our method can effectively use the shared information among different but related cancers to improve the identification of miRNA-gene interactions. Another useful property of our method is that it can estimate similarity among cancers through their shared miRNA-gene interactions.

Availability and implementation

The program, MCMG, implemented in R is available at http://bioinformatics.med.yale.edu/group/.",2013-06-14 +30727181,First Report of Powdery Mildew Caused by Erysiphe macleayae on Macleaya microcarpa in Poland.,"Macleaya microcarpa (Maxim.) Fedde, also known as smallfruit plume poppy, is a perennial herb belonging to the family Papaveraceae. The plant, together with the better-known species M. cordata (Willd.) R. Br., is native to central China and is now planted worldwide for medicinal purposes. In October 2008 and August 2009, dozens of smallfruit plume poppy planted in the Kraków Botanical Garden, Poland, were found to be severely infected with a powdery mildew. White colonies with abundant sporulation developed on both sides of leaves and young stems, forming circular to irregular patches. Infections caused leaf yellowing and premature defoliation. The damage has been observed every year since 2009. Representative voucher specimens were deposited in the fungal herbarium of the W. Szafer Institute of Botany of the Polish Academy of Sciences (KRAM) and the Korea University herbarium (KUS). Appressoria on the mycelia were lobed, often in pairs. Conidiophores composed of three to four cells arose from the upper part of creeping hyphae, 65 to 120 × 7 to 10 μm, attenuated toward the base, sub-straight or slightly flexuous in foot-cells, and produced conidia singly. Conidia were hyaline, oblong-elliptical to doliiform, 25 to 38 × 12 to 18 μm with a length/width ratio of 1.8 to 2.6; lacked fibrosin bodies; and produced germ tubes on the subterminal position with club-shaped or lobed appressoria. The conidial surface was wrinkled to irregularly reticulate. No chasmothecia were found. The structures described above match well with the anamorph of Erysiphe macleayae R.Y. Zheng & G.Q. Chen (3). To confirm the identity of the causal fungus, the internal transcribed spacer (ITS) region of rDNA from KUS-F24459 was amplified using primers ITS5 and P3 (4) and directly sequenced. The resulting sequence of 553 bp was deposited in GenBank (Accession No. JQ681217). A GenBank BLAST search using the present data revealed >99% sequence similarity of the isolate with E. macleayae on M. cordata from Japan (AB016048). Pathogenicity was confirmed through inoculation by gently pressing diseased leaves onto leaves of three healthy potted plants. Three noninoculated plants served as controls. Plants were maintained in a greenhouse at 25°C. Inoculated plants developed signs and symptoms after 7 days, whereas the control plants remained healthy. The fungus present on the inoculated plants was morphologically identical to that originally observed on diseased plants. The powdery mildew infections of M. cordata associated with E. macleayae have been recorded in China and Japan (2), and more recently in Germany (1,3). To our knowledge, this is the first report of E. macleayae on M. microcarpa globally as well as in Poland. This mildew species was described in China and is endemic to Asia, where chasmothecia of the fungus were found. Only recently have powdery mildews been found on M. cordata in Germany (1,3) and now on M. microcarpa in Poland, indicating the fungus is spreading in Europe. References: (1) N. Ale-Agha et al. Schlechtendalia 17:39, 2008. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , February 7, 2012. (3) A. Schmidt and M. Scholler. Mycotaxon 115:287, 2011. (4) S. Takamatsu et al. Mycol. Res. 113:117, 2009.",2012-09-01 +23768108,FusionQ: a novel approach for gene fusion detection and quantification from paired-end RNA-Seq.,"

Background

Gene fusions, which result from abnormal chromosome rearrangements, are a pathogenic factor in cancer development. The emerging RNA-Seq technology enables us to detect gene fusions and profile their features.

Results

In this paper, we proposed a novel fusion detection tool, FusionQ, based on paired-end RNA-Seq data. This tool can detect gene fusions, construct the structures of chimerical transcripts, and estimate their abundances. To confirm the read alignment on both sides of a fusion point, we employed a new approach, ""residual sequence extension"", which extended the short segments of the reads by aggregating their overlapping reads. We also proposed a list of filters to control the false-positive rate. In addition, we estimated fusion abundance using the Expectation-Maximization algorithm with sparse optimization, and further adopted it to improve the detection accuracy of the fusion transcripts. Simulation was performed by FusionQ and another two stated-of-art fusion detection tools. FusionQ exceeded the other two in both sensitivity and specificity, especially in low coverage fusion detection. Using paired-end RNA-Seq data from breast cancer cell lines, FusionQ detected both the previously reported and new fusions. FusionQ reported the structures of these fusions and provided their expressions. Some highly expressed fusion genes detected by FusionQ are important biomarkers in breast cancer. The performances of FusionQ on cancel line data still showed better specificity and sensitivity in the comparison with another two tools.

Conclusions

FusionQ is a novel tool for fusion detection and quantification based on RNA-Seq data. It has both good specificity and sensitivity performance. FusionQ is free and available at http://www.wakehealth.edu/CTSB/Software/Software.htm.",2013-06-15 +26505805,"In Utero and Early-Life Exposure to Ambient Air Toxics and Childhood Brain Tumors: A Population-Based Case-Control Study in California, USA.","

Background

Little is known about the influence of environmental factors on the etiology of childhood brain tumors.

Objectives

We examined risks for brain tumors in children after prenatal and infant exposure to monitored ambient air toxics.

Methods

We ascertained all cases of medulloblastoma, central nervous system primitive neuroectodermal tumor (PNET), and astrocytoma before 6 years of age diagnosed in 1990-2007 from the California Cancer Registry and selected controls randomly from birth rolls matched by birth year. Exposures to air toxics during pregnancy/infancy for 43 PNET, 34 medulloblastoma, and 106 astrocytoma cases and 30,569 controls living within 5 mi of a monitor were determined. With factor analysis we assessed the correlational structures of 26 probable carcinogenic toxics, and estimated odds ratios by brain tumor type in logistic regression models.

Results

PNETs (≤ 38 cases) were positively associated with interquartile range (IQR) increases in prenatal exposure to acetaldehyde [odds ratio (OR) = 2.30; 95% CI: 1.44, 3.67], 1,3-butadiene (OR = 2.23; 95% CI: 1.28, 3.88), benzene, and toluene; and with IQR increases in exposure during the first year of life to ortho-dichlorobenzene (OR = 3.27; 95% CI: 1.17, 9.14), 1,3-butadiene (OR = 3.15; 95% CI: 1.57, 6.32), and benzene. All exposures except ortho-dichlorobenzene loaded on the same factor. Medulloblastoma (≤ 30 cases) was associated with prenatal exposure to polycyclic aromatic hydrocarbons (PAHs combined: OR = 1.44; 95% CI: 1.15, 1.80). Exposures to lead and some PAHs during the first year of life were positively associated with astrocytoma, but the confidence intervals included the null value (e.g., for lead, OR = 1.40; 95% CI: 0.97, 2.03).

Conclusions

Our data suggest that in utero and infancy exposures to air toxics generated by industrial and road traffic sources may increase the risk of PNET and medulloblastoma, with limited support for increased risks for astrocytoma in children up to age 6.

Citation

von Ehrenstein OS, Heck JE, Park AS, Cockburn M, Escobedo L, Ritz B. 2016. In Utero and early-life exposure to ambient air toxics and childhood brain tumors: a population-based case-control study in California, USA. Environ Health Perspect 124:1093-1099; http://dx.doi.org/10.1289/ehp.1408582.",2015-10-27 +23777757,Using joint ICA to link function and structure using MEG and DTI in schizophrenia.,"In this study we employed joint independent component analysis (jICA) to perform a novel multivariate integration of magnetoencephalography (MEG) and diffusion tensor imaging (DTI) data to investigate the link between function and structure. This model-free approach allows one to identify covariation across modalities with different temporal and spatial scales [temporal variation in MEG and spatial variation in fractional anisotropy (FA) maps]. Healthy controls (HC) and patients with schizophrenia (SP) participated in an auditory/visual multisensory integration paradigm to probe cortical connectivity in schizophrenia. To allow direct comparisons across participants and groups, the MEG data were registered to an average head position and regional waveforms were obtained by calculating the local field power of the planar gradiometers. Diffusion tensor images obtained in the same individuals were preprocessed to provide FA maps for each participant. The MEG/FA data were then integrated using the jICA software (http://mialab.mrn.org/software/fit). We identified MEG/FA components that demonstrated significantly different (p<0.05) covariation in MEG/FA data between diagnostic groups (SP vs. HC) and three components that captured the predominant sensory responses in the MEG data. Lower FA values in bilateral posterior parietal regions, which include anterior/posterior association tracts, were associated with reduced MEG amplitude (120-170 ms) of the visual response in occipital sensors in SP relative to HC. Additionally, increased FA in a right medial frontal region was linked with larger amplitude late MEG activity (300-400 ms) in bilateral central channels for SP relative to HC. Step-wise linear regression provided evidence that right temporal, occipital and late central components were significant predictors of reaction time and cognitive performance based on the Measurement and Treatment Research to Improve Cognition in Schizophrenia (MATRICS) cognitive assessment battery. These results point to dysfunction in a posterior visual processing network in schizophrenia, with reduced MEG amplitude, reduced FA and poorer overall performance on the MATRICS. Interestingly, the spatial location of the MEG activity and the associated FA regions are spatially consistent with white matter regions that subserve these brain areas. This novel approach provides evidence for significant pairing between function (neurophysiology) and structure (white matter integrity) and demonstrates that this multivariate, multimodal integration technique is sensitive to group differences in function and structure.",2013-06-15 +24986561,A prognostic analysis of pediatrics central nervous system small cell tumors: evaluation of EGFR family gene amplification and overexpression.,"

Background

Central nervous system (CNS) tumors are the most common solid tumors that occur in children, however there were few big-data follow-up analysis published in China. Overexpression of epidermal growth factor receptor (EGFR) family members was reported on glioblastoma (GBM) and medulloblastoma (MB) before. However, the correlation between EGFR family members expression with prognosis of MB, supratentorial primitive neuroectodermal tumor (PNET) and small cell GBM is unclear in Chinese children.

Methods

A retrospective and survival analysis was performed on children (age ≤ 16 years) diagnosed as CNS primary small cell tumors in the Affiliated Provincial Hospital, Shandong University from 2000 to 2012, including MB (n = 44), PNET (n = 8) and small cell GBM (n = 19). The expression of EGFR, ERBB-2, ERBB-3 and ERBB-4 were detected by immunohistochemistry (IHC). The fluorescence in situ hybridization (FISH) was used to observe the amplification of EGFR and ERBB-2 gene.

Results

Median survival times of MBs, small GBMs and PNETs were 23 ± 6.7 months, 8 ± 4.7 months and 10 ± 1.4 months. Expression and amplification of ERBB-2, ERBB-3 and ERBB-4 were not observed in all tumor samples. The multiply Cox regression suggested the overexpression and amplification of EGFR were negative prognostic factors for MB. Radiotherapy had the positive function for all pediatric patients.

Conclusion

Overexpression of EGFR predicts poor outcomes of MBs, small cell GBMs and PNETs, suggesting those three CNS tumor subtypes can be considered as one group for the potential common mechanism. The current individual treatment and big data analysis of pediatric CNS embryonal tumors and GBM continues to be very challenging in China.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/7649640001237474.",2014-07-01 +24640991,Analgesic efficacy of opioids in chronic pain: recent meta-analyses.,"

Unlabelled

Opioids are regularly administered in acute and cancer pain. In chronic non-cancer pain (CNCP), however, their use is controversial. Previous meta-analyses and randomized controlled trials (RCTs) lack methodological homogeneity and comparable data. Here we analysed the maximum analgesic efficacies of opioids and non-opioids compared with placebo, and of physiotherapy and psychotherapy compared with active or waiting-list controls. We screened 3647 citations and included RCTs if treatment duration was at least 3 weeks, data were sufficient for meta-analysis, and criteria for high quality were met. Only 46 studies (10 742 patients) met the criteria. Weighted and standardized mean differences (WMD, SMD) between pain intensities were pooled to conduct separate meta-analyses for each treatment category. At the end of treatment the WMD for pain reduction (100-point scale) was 12.0 for 'strong' opioids, 10.6 for 'weak' opioids, 8.4 for non-opioids (each vs. placebo), 5.5 for psychotherapy and 4.5 for physiotherapy (each vs. active controls). Dropout rates were high in pharmacological studies. The 95% confidence intervals using the outcomes of control groups did not indicate statistical differences between efficacies of the five interventions. Because not enough eligible head-to-head trials were available, our analysis is limited to adjusted indirect comparisons. The heterogeneity of pre-post pain differences in control groups did not allow the definition of a common comparator. In conclusion, although there were statistically significant differences between maximum treatment efficacies, no intervention per se produced clinically important improvements in average pain intensity. Thus, opioids alone are inappropriate and multimodal treatment programmes may be required for CNCP.

Linked articles

This article is part of a themed section on Opioids: New Pathways to Functional Selectivity. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2015.172.issue-2.",2014-07-01 +21962915,"Neuropathological abnormalities of astrocytes, GABAergic neurons, and pyramidal neurons in the dorsolateral prefrontal cortices of patients with major depressive disorder.","Human post-mortem brain studies have revealed reduced density and size of neurons and glial cells in the dorsolateral prefrontal cortex (dlPFC) in major depressive disorder (MDD). However, the basis of these cytoarchitectural abnormalities and the relationship between them are not understood. We hypothesized that the reduced density of GABAergic neurons and glial cells was associated with altered glutamate neurotransmission in the dlPFC. In order to test this hypothesis, we examined a specific marker type (i.e., calretinin, CR: as a marker of GABAergic neurons) and also attempted to identify the neuropathological markers that correlate with the density of CR-immunoreactive (IR) GABAergic neurons in the dlPFC, using the Stanley Neuropathology Consortium Integrative Database (SNCID, http://sncid.stanleyresearch.org/), which is a web-based tool used to integrate Stanley Medical Research Institute (SMRI) data sets. We found that the density of CR-IR GABAergic neurons was significantly lower in layer I of the dlPFC of MDD patients (n=15) than in that of unaffected controls (n=15) (p=0.021). CR-IR GABAergic neuronal changes were positively correlated with changes in several markers for glial cells and pyramidal neurons in the dlPFC of all SNC subjects (n=60). We also found that the glutamate changes negatively correlated with glial fibrillary acidic protein (GFAP) expression levels and CR-IR GABAergic neuronal density in the prefrontal cortex of all SNC subjects (P<0.05). These findings yield some insight into the mechanism by which increased glutamatergic neurotransmission leads to excitotoxic damage both in neurons and glial cells in the dlPFC of MDD patients.",2011-10-02 +24735618,TCMSP: a database of systems pharmacology for drug discovery from herbal medicines.,"

Background

Modern medicine often clashes with traditional medicine such as Chinese herbal medicine because of the little understanding of the underlying mechanisms of action of the herbs. In an effort to promote integration of both sides and to accelerate the drug discovery from herbal medicines, an efficient systems pharmacology platform that represents ideal information convergence of pharmacochemistry, ADME properties, drug-likeness, drug targets, associated diseases and interaction networks, are urgently needed.

Description

The traditional Chinese medicine systems pharmacology database and analysis platform (TCMSP) was built based on the framework of systems pharmacology for herbal medicines. It consists of all the 499 Chinese herbs registered in the Chinese pharmacopoeia with 29,384 ingredients, 3,311 targets and 837 associated diseases. Twelve important ADME-related properties like human oral bioavailability, half-life, drug-likeness, Caco-2 permeability, blood-brain barrier and Lipinski's rule of five are provided for drug screening and evaluation. TCMSP also provides drug targets and diseases of each active compound, which can automatically establish the compound-target and target-disease networks that let users view and analyze the drug action mechanisms. It is designed to fuel the development of herbal medicines and to promote integration of modern medicine and traditional medicine for drug discovery and development.

Conclusions

The particular strengths of TCMSP are the composition of the large number of herbal entries, and the ability to identify drug-target networks and drug-disease networks, which will help revealing the mechanisms of action of Chinese herbs, uncovering the nature of TCM theory and developing new herb-oriented drugs. TCMSP is freely available at http://sm.nwsuaf.edu.cn/lsp/tcmsp.php.",2014-04-16 +24739306,RetrogeneDB--a database of animal retrogenes.,"Retrocopies of protein-coding genes, reverse transcribed and inserted into the genome copies of mature RNA, have commonly been categorized as pseudogenes with no biological importance. However, recent studies showed that they play important role in the genomes evolution and shaping interspecies differences. Here, we present RetrogeneDB, a database of retrocopies in 62 animal genomes. RetrogeneDB contains information about retrocopies, their genomic localization, parental genes, ORF conservation, and expression. To our best knowledge, this is the most complete retrocopies database providing information for dozens of species previously never analyzed in the context of protein-coding genes retroposition. The database is available at http://retrogenedb.amu.edu.pl.",2014-04-16 +24725256,"Genome-wide survey of transcriptional initiation in the pathogenic fungus, Candida glabrata.","DNA sequencing of the 5'-flanking region of the transcriptome effectively identifies transcription initiation sites and also aids in identifying unknown genes. This study describes a comprehensive polling of transcription start sites and an analysis of full-length complementary DNAs derived from the genome of the pathogenic fungus Candida glabrata. A comparison of the sequence reads derived from a cDNA library prepared from cells grown under different culture conditions against the reference genomic sequence of the Candida Genome Database (CGD: http://www.candidagenome.org/) revealed the expression of 4316 genes and their acknowledged transcription start sites (TSSs). In addition this analysis also predicted 59 new genes including 22 that showed no homology to the genome of Saccharomyces cerevisiae, a genetically close relative of C. glabrata. Furthermore, comparison of the 5'-untranslated regions (5'-UTRs) and core promoters of C. glabrata to those of S. cerevisiae showed various global similarities and differences among orthologous genes. Thus, the C. glabrata transcriptome can complement the annotation of the genome database and should provide new insights into the organization, regulation, and function of genes of this important human pathogen.",2014-04-14 +21965461,The new ISSMIC database on in vivo micronucleus and its role in assessing genotoxicity testing strategies.,"This paper presents a new curated database on in vivo micronucleus mutagenicity results, called ISSMIC. It is freely available at: http://www.iss.it/ampp/dati/cont.php?id=233&lang=1&tipo=7. The experimental results were critically reviewed, and evidence on target cell exposure was considered as well. The inspection of ISSMIC demonstrates that a large proportion of reported negative results in the literature (231 out 566 ISSMIC chemicals) lack a clear-cut, direct demonstration of toxicity at the target cells. Using this updated database, the predictive value of a compilation of Structural Alerts (SA) for in vivo micronucleus recently implemented in the expert system Toxtree was investigated. Individually, most of the SA showed a high Positive Predictivity (∼80%), but the need for further expanding the list of alerts was pointed out as well. The role of in vivo micronucleus in strategies for carcinogenicity prediction was re-evaluated. In agreement with previous analyses, the data point to a low overall correlation with carcinogenicity. In addition, given the cost in animal lives and the time required for the experimentation, in many programs, the in vivo tests are used only to assess in vitro positive results. The ability of in vivo micronucleus to identify real positives (i.e. carcinogens) among chemicals positive in Salmonella or among chemicals inducing in vitro chromosomal aberrations was studied. It appears that the in vivo micronucleus test does not have added value and rather impairs the prediction ability of the in vitro tests alone. The overall evidence indicates that in vivo micronucleus--in its present form--cannot be considered an useful tool for routine genotoxicity testing but should be used in targeted mechanistic studies.",2011-09-30 +22892030,SDAR: a practical tool for graphical analysis of two-dimensional data.,"

Background

Two-dimensional data needs to be processed and analysed in almost any experimental laboratory. Some tasks in this context may be performed with generic software such as spreadsheet programs which are available ubiquitously, others may require more specialised software that requires paid licences. Additionally, more complex software packages typically require more time by the individual user to understand and operate. Practical and convenient graphical data analysis software in Java with a user-friendly interface are rare.

Results

We have developed SDAR, a Java application to analyse two-dimensional data with an intuitive graphical user interface. A smart ASCII parser allows import of data into SDAR without particular format requirements. The centre piece of SDAR is the Java class GraphPanel which provides methods for generic tasks of data visualisation. Data can be manipulated and analysed with respect to the most common operations experienced in an experimental biochemical laboratory. Images of the data plots can be generated in SVG-, TIFF- or PNG-format. Data exported by SDAR is annotated with commands compatible with the Grace software.

Conclusion

Since SDAR is implemented in Java, it is truly cross-platform compatible. The software is easy to install, and very convenient to use judging by experience in our own laboratories. It is freely available to academic users at http://www.structuralchemistry.org/pcsb/. To download SDAR, users will be asked for their name, institution and email address. A manual, as well as the source code of the GraphPanel class can also be downloaded from this site.",2012-08-14 +25798933,TBI server: a web server for predicting ion effects in RNA folding.,"

Background

Metal ions play a critical role in the stabilization of RNA structures. Therefore, accurate prediction of the ion effects in RNA folding can have a far-reaching impact on our understanding of RNA structure and function. Multivalent ions, especially Mg²⁺, are essential for RNA tertiary structure formation. These ions can possibly become strongly correlated in the close vicinity of RNA surface. Most of the currently available software packages, which have widespread success in predicting ion effects in biomolecular systems, however, do not explicitly account for the ion correlation effect. Therefore, it is important to develop a software package/web server for the prediction of ion electrostatics in RNA folding by including ion correlation effects.

Results

The TBI web server http://rna.physics.missouri.edu/tbi_index.html provides predictions for the total electrostatic free energy, the different free energy components, and the mean number and the most probable distributions of the bound ions. A novel feature of the TBI server is its ability to account for ion correlation and ion distribution fluctuation effects.

Conclusions

By accounting for the ion correlation and fluctuation effects, the TBI server is a unique online tool for computing ion-mediated electrostatic properties for given RNA structures. The results can provide important data for in-depth analysis for ion effects in RNA folding including the ion-dependence of folding stability, ion uptake in the folding process, and the interplay between the different energetic components.",2015-03-23 +23842810,A comparative analysis of algorithms for somatic SNV detection in cancer.,"

Motivation

With the advent of relatively affordable high-throughput technologies, DNA sequencing of cancers is now common practice in cancer research projects and will be increasingly used in clinical practice to inform diagnosis and treatment. Somatic (cancer-only) single nucleotide variants (SNVs) are the simplest class of mutation, yet their identification in DNA sequencing data is confounded by germline polymorphisms, tumour heterogeneity and sequencing and analysis errors. Four recently published algorithms for the detection of somatic SNV sites in matched cancer-normal sequencing datasets are VarScan, SomaticSniper, JointSNVMix and Strelka. In this analysis, we apply these four SNV calling algorithms to cancer-normal Illumina exome sequencing of a chronic myeloid leukaemia (CML) patient. The candidate SNV sites returned by each algorithm are filtered to remove likely false positives, then characterized and compared to investigate the strengths and weaknesses of each SNV calling algorithm.

Results

Comparing the candidate SNV sets returned by VarScan, SomaticSniper, JointSNVMix2 and Strelka revealed substantial differences with respect to the number and character of sites returned; the somatic probability scores assigned to the same sites; their susceptibility to various sources of noise; and their sensitivities to low-allelic-fraction candidates.

Availability

Data accession number SRA081939, code at http://code.google.com/p/snv-caller-review/

Contact

david.adelson@adelaide.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-09 +25539043,Sodium channel blockers: a patent review (2010 - 2014).,"

Introduction

Abnormal activity of voltage-gated sodium channels (VGSCs) is related to several pathological processes, including cardiac arrhythmias, epilepsy, cancer, neurodegenerative diseases, spasticity, chronic and neuropathic pain. As such VGSCs are considered important therapeutic targets.

Areas covered

This review summarized > 30 patents on sodium channel blockers, having beneficial effects on a number of diseases. Pubmed, http://www.sciencedirect.com/ , SciFinder Scholar, http://ep.espacenet.com/ were used as sources for this review and patents filed 2010 and July 2014 were examined.

Expert opinion

Over the past 4 years we assisted to a continuous effort in the discovery of new sodium channel blockers by a large number of pharmaceutical companies. All the different chemical classes presented, and here analyzed, could represent an important breakout but, the lack of precise structural information, with the incompleteness of the biological data hampered the possibility to understand the real 'state of the art' of any of these inventions. Upon analysis of a number of patents in this review, it remains clear that the major hurdle faced by the discovery teams is the ability to develop subtype selective compounds. The development of subtype selective blockers could, in theory, lead to more effective and better tolerated compounds.",2014-12-24 +23413937,EUCAST technical note on voriconazole and Aspergillus spp.,"The European Committee on Antimicrobial Susceptibility Testing Subcommittee on Antifungal Susceptibility Testing (EUCAST-AFST) has determined breakpoints for voriconazole against Aspergillus spp. This Technical Note is based on the EUCAST rationale document for voriconazole (available on the EUCAST website: http://www.eucast.org). Voriconazole breakpoints are based on epidemiological cut-off values, pharmacokinetic/pharmacodynamic data and clinical experience. Breakpoints will be reviewed regularly or when new data emerge.",2013-02-15 +23203987,Ensembl 2013.,"The Ensembl project (http://www.ensembl.org) provides genome information for sequenced chordate genomes with a particular focus on human, mouse, zebrafish and rat. Our resources include evidenced-based gene sets for all supported species; large-scale whole genome multiple species alignments across vertebrates and clade-specific alignments for eutherian mammals, primates, birds and fish; variation data resources for 17 species and regulation annotations based on ENCODE and other data sets. Ensembl data are accessible through the genome browser at http://www.ensembl.org and through other tools and programmatic interfaces.",2012-11-30 +24036391,Complete (1)H and (13)C NMR chemical shift assignments of mono- to tetrasaccharides as basis for NMR chemical shift predictions of oligosaccharides using the computer program CASPER.,"(1)H and (13)C NMR chemical shift data are used by the computer program CASPER to predict chemical shifts of oligo- and polysaccharides. Three types of data are used, namely, those from monosaccharides, disaccharides, and trisaccharides. To improve the accuracy of these predictions we have assigned the (1)H and (13)C NMR chemical shifts of eleven monosaccharides, eleven disaccharides, twenty trisaccharides, and one tetrasaccharide; in total 43 compounds. Five of the oligosaccharides gave two distinct sets of NMR resonances due to the α- and β-anomeric forms resulting in 48 (1)H and (13)C NMR chemical shift data sets. In addition, the pyranose ring forms of Neu5Ac were assigned at two temperatures, due to chemical shift displacements as a function of temperature. The (1)H NMR chemical shifts were refined using total line-shape analysis with the PERCH NMR software. (1)H and (13)C NMR chemical shift predictions were subsequently carried out by the CASPER program (http://www.casper.organ.su.se/casper/) for three branched oligosaccharides having different functional groups at their reducing ends, namely, a mannose-containing pentasaccharide, and two fucose-containing heptasaccharides having N-acetyllactosamine residues in the backbone of their structures. Good to excellent agreement was observed between predicted and experimental (1)H and (13)C NMR chemical shifts showing the utility of the method for structural determination or confirmation of synthesized oligosaccharides.",2013-07-10 +26241114,Long-Term Trends Worldwide in Ambient NO2 Concentrations Inferred from Satellite Observations.,"

Background

Air pollution is associated with morbidity and premature mortality. Satellite remote sensing provides globally consistent decadal-scale observations of ambient nitrogen dioxide (NO2) pollution.

Objective

We determined global population-weighted annual mean NO2 concentrations from 1996 through 2012.

Methods

We used observations of NO2 tropospheric column densities from three satellite instruments in combination with chemical transport modeling to produce a global 17-year record of ground-level NO2 at 0.1° × 0.1° resolution. We calculated linear trends in population-weighted annual mean NO2 (PWMNO2) concentrations in different regions around the world.

Results

We found that PWMNO2 in high-income North America (Canada and the United States) decreased more steeply than in any other region, having declined at a rate of -4.7%/year [95% confidence interval (CI): -5.3, -4.1]. PWMNO2 decreased in western Europe at a rate of -2.5%/year (95% CI: -3.0, -2.1). The highest PWMNO2 occurred in high-income Asia Pacific (predominantly Japan and South Korea) in 1996, with a subsequent decrease of -2.1%/year (95% CI: -2.7, -1.5). In contrast, PWMNO2 almost tripled in East Asia (China, North Korea, and Taiwan) at a rate of 6.7%/year (95% CI: 6.0, 7.3). The satellite-derived estimates of trends in ground-level NO2 were consistent with regional trends inferred from data obtained from ground-station monitoring networks in North America (within 0.7%/year) and Europe (within 0.3%/year). Our rankings of regional average NO2 and long-term trends differed from the satellite-derived estimates of fine particulate matter reported elsewhere, demonstrating the utility of both indicators to describe changing pollutant mixtures.

Conclusions

Long-term trends in satellite-derived ambient NO2 provide new information about changing global exposure to ambient air pollution. Our estimates are publicly available at http://fizz.phys.dal.ca/~atmos/martin/?page_id=232.",2015-08-04 +24953126,Homology-based prediction of interactions between proteins using Averaged One-Dependence Estimators.,"

Background

Identification of protein-protein interactions (PPIs) is essential for a better understanding of biological processes, pathways and functions. However, experimental identification of the complete set of PPIs in a cell/organism (""an interactome"") is still a difficult task. To circumvent limitations of current high-throughput experimental techniques, it is necessary to develop high-performance computational methods for predicting PPIs.

Results

In this article, we propose a new computational method to predict interaction between a given pair of protein sequences using features derived from known homologous PPIs. The proposed method is capable of predicting interaction between two proteins (of unknown structure) using Averaged One-Dependence Estimators (AODE) and three features calculated for the protein pair: (a) sequence similarities to a known interacting protein pair (FSeq), (b) statistical propensities of domain pairs observed in interacting proteins (FDom) and (c) a sum of edge weights along the shortest path between homologous proteins in a PPI network (FNet). Feature vectors were defined to lie in a half-space of the symmetrical high-dimensional feature space to make them independent of the protein order. The predictability of the method was assessed by a 10-fold cross validation on a recently created human PPI dataset with randomly sampled negative data, and the best model achieved an Area Under the Curve of 0.79 (pAUC0.5% = 0.16). In addition, the AODE trained on all three features (named PSOPIA) showed better prediction performance on a separate independent data set than a recently reported homology-based method.

Conclusions

Our results suggest that FNet, a feature representing proximity in a known PPI network between two proteins that are homologous to a target protein pair, contributes to the prediction of whether the target proteins interact or not. PSOPIA will help identify novel PPIs and estimate complete PPI networks. The method proposed in this article is freely available on the web at http://mizuguchilab.org/PSOPIA.",2014-06-23 +22041349,Antithrombotic drug therapy for IgA nephropathy: a meta analysis of randomized controlled trials.,"

Background

Antithrombotic agents, including antiplatelet agents, anticoagulants and thrombolysis agents, have been widely used in the management of immunoglobulin A (IgA) nephropathy in Chinese and Japanese populations. To systematically evaluate the effects of antithrombotic agents for IgA nephropathy.

Methods

Data sources consisted of MEDLINE, EMBASE, the Cochrane Library, Chinese Biomedical Literature Database (CBM), Chinese Science and Technology Periodicals Databases (CNKI) and Japana Centra Revuo Medicina (http://www.jamas.gr.jp) up to April 5, 2011. The quality of the studies was evaluated from the intention to treat analysis and allocation concealment, as well as by the Jadad method. Meta-analyses were performed on the outcomes of proteinuria and renal function.

Results

Six articles met the predetermined inclusion criteria. Antithrombotic agents showed statistically significant effects on proteinuria (p<0.0001) but not on the protection of renal function (p=0.07). The pooled risk ratio for proteinuria was 0.53, [95% confidence intervals (CI): 0.41-0.68; I(2)=0%] and for renal function it was 0.42 (95% CI 0.17-1.06; I(2)=72%). Subgroup analysis showed that dipyridamole was beneficial for proteinuria (p=0.0003) but had no significant effects on protecting renal function. Urokinase had statistically significant effects both on the reduction of proteinuria (p=0.0005) and protecting renal function (p<0.00001) when compared with the control group.

Conclusion

Antithrombotic agents had statistically significant effects on the reduction of proteinuria but not on the protection of renal function in patients with IgAN. Urokinase had statistically significant effects both on the reduction of proteinuria and on protecting renal function. Urokinase was shown to be a promising medication and should be investigated further.",2011-11-01 +23162081,A system for exact and approximate genetic linkage analysis of SNP data in large pedigrees.,"

Motivation

The use of dense single nucleotide polymorphism (SNP) data in genetic linkage analysis of large pedigrees is impeded by significant technical, methodological and computational challenges. Here we describe Superlink-Online SNP, a new powerful online system that streamlines the linkage analysis of SNP data. It features a fully integrated flexible processing workflow comprising both well-known and novel data analysis tools, including SNP clustering, erroneous data filtering, exact and approximate LOD calculations and maximum-likelihood haplotyping. The system draws its power from thousands of CPUs, performing data analysis tasks orders of magnitude faster than a single computer. By providing an intuitive interface to sophisticated state-of-the-art analysis tools coupled with high computing capacity, Superlink-Online SNP helps geneticists unleash the potential of SNP data for detecting disease genes.

Results

Computations performed by Superlink-Online SNP are automatically parallelized using novel paradigms, and executed on unlimited number of private or public CPUs. One novel service is large-scale approximate Markov Chain-Monte Carlo (MCMC) analysis. The accuracy of the results is reliably estimated by running the same computation on multiple CPUs and evaluating the Gelman-Rubin Score to set aside unreliable results. Another service within the workflow is a novel parallelized exact algorithm for inferring maximum-likelihood haplotyping. The reported system enables genetic analyses that were previously infeasible. We demonstrate the system capabilities through a study of a large complex pedigree affected with metabolic syndrome.

Availability

Superlink-Online SNP is freely available for researchers at http://cbl-hap.cs.technion.ac.il/superlink-snp. The system source code can also be downloaded from the system website.

Contact

omerw@cs.technion.ac.il

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-18 +24716852,A short guide to long non-coding RNA gene nomenclature.,"The HUGO Gene Nomenclature Committee (HGNC) is the only organisation authorised to assign standardised nomenclature to human genes. Of the 38,000 approved gene symbols in our database (http://www.genenames.org), the majority represent protein-coding (pc) genes; however, we also name pseudogenes, phenotypic loci, some genomic features, and to date have named more than 8,500 human non-protein coding RNA (ncRNA) genes and ncRNA pseudogenes. We have already established unique names for most of the small ncRNA genes by working with experts for each class. Small ncRNAs can be defined into their respective classes by their shared homology and common function. In contrast, long non-coding RNA (lncRNA) genes represent a disparate set of loci related only by their size, more than 200 bases in length, share no conserved sequence homology, and have variable functions. As with pc genes, wherever possible, lncRNAs are named based on the known function of their product; a short guide is presented herein to help authors when developing novel gene symbols for lncRNAs with characterised function. Researchers must contact the HGNC with their suggestions prior to publication, to check whether the proposed gene symbol can be approved. Although thousands of lncRNAs have been predicted in the human genome, for the vast majority their function remains unresolved. lncRNA genes with no known function are named based on their genomic context. Working with lncRNA researchers, the HGNC aims to provide unique and, wherever possible, meaningful gene symbols to all lncRNA genes.",2014-04-09 +23013645,"Annotate-it: a Swiss-knife approach to annotation, analysis and interpretation of single nucleotide variation in human disease.","The increasing size and complexity of exome/genome sequencing data requires new tools for clinical geneticists to discover disease-causing variants. Bottlenecks in identifying the causative variation include poor cross-sample querying, constantly changing functional annotation and not considering existing knowledge concerning the phenotype. We describe a methodology that facilitates exploration of patient sequencing data towards identification of causal variants under different genetic hypotheses. Annotate-it facilitates handling, analysis and interpretation of high-throughput single nucleotide variant data. We demonstrate our strategy using three case studies. Annotate-it is freely available and test data are accessible to all users at http://www.annotate-it.org.",2012-09-26 +25068386,Markov logic networks for optical chemical structure recognition.,"Optical chemical structure recognition is the problem of converting a bitmap image containing a chemical structure formula into a standard structured representation of the molecule. We introduce a novel approach to this problem based on the pipelined integration of pattern recognition techniques with probabilistic knowledge representation and reasoning. Basic entities and relations (such as textual elements, points, lines, etc.) are first extracted by a low-level processing module. A probabilistic reasoning engine based on Markov logic, embodying chemical and graphical knowledge, is subsequently used to refine these pieces of information. An annotated connection table of atoms and bonds is finally assembled and converted into a standard chemical exchange format. We report a successful evaluation on two large image data sets, showing that the method compares favorably with the current state-of-the-art, especially on degraded low-resolution images. The system is available as a web server at http://mlocsr.dinfo.unifi.it.",2014-08-06 +22609187,EXP-PAC: providing comparative analysis and storage of next generation gene expression data.,"Microarrays and more recently RNA sequencing has led to an increase in available gene expression data. How to manage and store this data is becoming a key issue. In response we have developed EXP-PAC, a web based software package for storage, management and analysis of gene expression and sequence data. Unique to this package is SQL based querying of gene expression data sets, distributed normalization of raw gene expression data and analysis of gene expression data across experiments and species. This package has been populated with lactation data in the international milk genomic consortium web portal (http://milkgenomics.org/). Source code is also available which can be hosted on a Windows, Linux or Mac APACHE server connected to a private or public network (http://mamsap.it.deakin.edu.au/~pcc/Release/EXP_PAC.html).",2012-05-15 +22517427,Integrative Genomics Viewer (IGV): high-performance genomics data visualization and exploration.,"Data visualization is an essential component of genomic data analysis. However, the size and diversity of the data sets produced by today's sequencing and array-based profiling methods present major challenges to visualization tools. The Integrative Genomics Viewer (IGV) is a high-performance viewer that efficiently handles large heterogeneous data sets, while providing a smooth and intuitive user experience at all levels of genome resolution. A key characteristic of IGV is its focus on the integrative nature of genomic studies, with support for both array-based and next-generation sequencing data, and the integration of clinical and phenotypic data. Although IGV is often used to view genomic data from public sources, its primary emphasis is to support researchers who wish to visualize and explore their own data sets or those from colleagues. To that end, IGV supports flexible loading of local and remote data sets, and is optimized to provide high-performance data visualization and exploration on standard desktop systems. IGV is freely available for download from http://www.broadinstitute.org/igv, under a GNU LGPL open-source license.",2012-04-19 +25788626,Family genome browser: visualizing genomes with pedigree information.,"

Motivation

Families with inherited diseases are widely used in Mendelian/complex disease studies. Owing to the advances in high-throughput sequencing technologies, family genome sequencing becomes more and more prevalent. Visualizing family genomes can greatly facilitate human genetics studies and personalized medicine. However, due to the complex genetic relationships and high similarities among genomes of consanguineous family members, family genomes are difficult to be visualized in traditional genome visualization framework. How to visualize the family genome variants and their functions with integrated pedigree information remains a critical challenge.

Results

We developed the Family Genome Browser (FGB) to provide comprehensive analysis and visualization for family genomes. The FGB can visualize family genomes in both individual level and variant level effectively, through integrating genome data with pedigree information. Family genome analysis, including determination of parental origin of the variants, detection of de novo mutations, identification of potential recombination events and identical-by-decent segments, etc., can be performed flexibly. Diverse annotations for the family genome variants, such as dbSNP memberships, linkage disequilibriums, genes, variant effects, potential phenotypes, etc., are illustrated as well. Moreover, the FGB can automatically search de novo mutations and compound heterozygous variants for a selected individual, and guide investigators to find high-risk genes with flexible navigation options. These features enable users to investigate and understand family genomes intuitively and systematically.

Availability and implementation

The FGB is available at http://mlg.hit.edu.cn/FGB/.",2015-03-18 +24713059,Competing endogenous RNA and interactome bioinformatic analyses on human telomerase.,"We present a classic interactome bioinformatic analysis and a study on competing endogenous (ce) RNAs for hTERT. The hTERT gene codes for the catalytic subunit and limiting component of the human telomerase complex. Human telomerase reverse transcriptase (hTERT) is essential for the integrity of telomeres. Telomere dysfunctions have been widely reported to be involved in aging, cancer, and cellular senescence. The hTERT gene network has been analyzed using the BioGRID interaction database (http://thebiogrid.org/) and related analysis tools such as Osprey (http://biodata.mshri.on.ca/osprey/servlet/Index) and GeneMANIA (http://genemania.org/). The network of interaction of hTERT transcripts has been further analyzed following the competing endogenous (ce) RNA hypotheses (messenger [m] RNAs cross-talk via micro [mi] RNAs) using the miRWalk database and tools (www.ma.uni-heidelberg.de/apps/zmf/mirwalk/). These analyses suggest a role for Akt, nuclear factor-κB (NF-κB), heat shock protein 90 (HSP90), p70/p80 autoantigen, 14-3-3 proteins, and dynein in telomere functions. Roles for histone acetylation/deacetylation and proteoglycan metabolism are also proposed.",2014-04-08 +25667349,Benchmark for Algorithms Segmenting the Left Atrium From 3D CT and MRI Datasets.,"Knowledge of left atrial (LA) anatomy is important for atrial fibrillation ablation guidance, fibrosis quantification and biophysical modelling. Segmentation of the LA from Magnetic Resonance Imaging (MRI) and Computed Tomography (CT) images is a complex problem. This manuscript presents a benchmark to evaluate algorithms that address LA segmentation. The datasets, ground truth and evaluation code have been made publicly available through the http://www.cardiacatlas.org website. This manuscript also reports the results of the Left Atrial Segmentation Challenge (LASC) carried out at the STACOM'13 workshop, in conjunction with MICCAI'13. Thirty CT and 30 MRI datasets were provided to participants for segmentation. Each participant segmented the LA including a short part of the LA appendage trunk and proximal sections of the pulmonary veins (PVs). We present results for nine algorithms for CT and eight algorithms for MRI. Results showed that methodologies combining statistical models with region growing approaches were the most appropriate to handle the proposed task. The ground truth and automatic segmentations were standardised to reduce the influence of inconsistently defined regions (e.g., mitral plane, PVs end points, LA appendage). This standardisation framework, which is a contribution of this work, can be used to label and further analyse anatomical regions of the LA. By performing the standardisation directly on the left atrial surface, we can process multiple input data, including meshes exported from different electroanatomical mapping systems.",2015-02-03 +25945573,Identification of local variations within secondary structures of proteins.,"Secondary-structure elements (SSEs) play an important role in the folding of proteins. Identification of SSEs in proteins is a common problem in structural biology. A new method, ASSP (Assignment of Secondary Structure in Proteins), using only the path traversed by the C(α) atoms has been developed. The algorithm is based on the premise that the protein structure can be divided into continuous or uniform stretches, which can be defined in terms of helical parameters, and depending on their values the stretches can be classified into different SSEs, namely α-helices, 310-helices, π-helices, extended β-strands and polyproline II (PPII) and other left-handed helices. The methodology was validated using an unbiased clustering of these parameters for a protein data set consisting of 1008 protein chains, which suggested that there are seven well defined clusters associated with different SSEs. Apart from α-helices and extended β-strands, 310-helices and π-helices were also found to occur in substantial numbers. ASSP was able to discriminate non-α-helical segments from flanking α-helices, which were often identified as part of α-helices by other algorithms. ASSP can also lead to the identification of novel SSEs. It is believed that ASSP could provide a better understanding of the finer nuances of protein secondary structure and could make an important contribution to the better understanding of comparatively less frequently occurring structural motifs. At the same time, it can contribute to the identification of novel SSEs. A standalone version of the program for the Linux as well as the Windows operating systems is freely downloadable and a web-server version is also available at http://nucleix.mbu.iisc.ernet.in/assp/index.php.",2015-04-24 +26629599,Ischemic Heart Disease Mortality and Long-Term Exposure to Source-Related Components of U.S. Fine Particle Air Pollution.,"

Background

Fine particulate matter (PM2.5) air pollution exposure has been identified as a global health threat. However, the types and sources of particles most responsible are not yet known.

Objectives

We sought to identify the causal characteristics and sources of air pollution underlying past associations between long-term PM2.5 exposure and ischemic heart disease (IHD) mortality, as established in the American Cancer Society's Cancer Prevention Study-II cohort.

Methods

Individual risk factor data were evaluated for 445,860 adults in 100 U.S. metropolitan areas followed from 1982 through 2004 for vital status and cause of death. Using Cox proportional hazard models, we estimated IHD mortality hazard ratios (HRs) for PM2.5, trace constituents, and pollution source-associated PM2.5, as derived from air monitoring at central stations throughout the nation during 2000-2005.

Results

Associations with IHD mortality varied by PM2.5 mass constituent and source. A coal combustion PM2.5 IHD HR = 1.05 (95% CI: 1.02, 1.08) per microgram/cubic meter, versus an IHD HR = 1.01 (95% CI: 1.00, 1.02) per microgram/cubic meter PM2.5 mass, indicated a risk roughly five times higher for coal combustion PM2.5 than for PM2.5 mass in general, on a per microgram/cubic meter PM2.5 basis. Diesel traffic-related elemental carbon (EC) soot was also associated with IHD mortality (HR = 1.03; 95% CI: 1.00, 1.06 per 0.26-μg/m3 EC increase). However, PM2.5 from both wind-blown soil and biomass combustion was not associated with IHD mortality.

Conclusions

Long-term PM2.5 exposures from fossil fuel combustion, especially coal burning but also from diesel traffic, were associated with increases in IHD mortality in this nationwide population. Results suggest that PM2.5-mortality associations can vary greatly by source, and that the largest IHD health benefits per microgram/cubic meter from PM2.5 air pollution control may be achieved via reductions of fossil fuel combustion exposures, especially from coal-burning sources.

Citation

Thurston GD, Burnett RT, Turner MC, Shi Y, Krewski D, Lall R, Ito K, Jerrett M, Gapstur SM, Diver WR, Pope CA III. 2016. Ischemic heart disease mortality and long-term exposure to source-related components of U.S. fine particle air pollution. Environ Health Perspect 124:785-794; http://dx.doi.org/10.1289/ehp.1509777.",2015-12-02 +24599579,"The UMD-APC database, a model of nation-wide knowledge base: update with data from 3,581 variations.","Familial adenomatous polyposis (FAP) is a rare autosomal-inherited disease that highly predisposes to colorectal cancer, characterized by a diffuse duodenal and colorectal polyposis associated with various extradigestive tumors and linked to germline mutations within the APC gene. A French consortium of laboratories involved in APC mutation screening has progressively improved the description of the variation spectrum, inferred functional significance of nontruncating variations, and delineated phenotypic characteristics of the disease. The current version of the UMD-APC database is described here. The total number of variations has risen to 5,453 representing 1,473 distinct variations. The published records initially registered into the database were extended with 3,581 germline variations found through genetic testing performed by the eight licensed laboratories belonging to the French APC network. Sixty six of 149 variations of previously unknown significance have now been classified as (likely) causal or neutral. The database is available on the Internet (http://www.umd.be/APC/) and updated twice per year according to the consensus rules of the network. The UMD-APC database is thus expected to facilitate functional classification of rare synonymous, nonsynonymous, and intronic mutations and consequently improve genetic counseling and medical care in FAP families.",2014-04-07 +23056299,GenoSets: visual analytic methods for comparative genomics.,"Many important questions in biology are, fundamentally, comparative, and this extends to our analysis of a growing number of sequenced genomes. Existing genomic analysis tools are often organized around literal views of genomes as linear strings. Even when information is highly condensed, these views grow cumbersome as larger numbers of genomes are added. Data aggregation and summarization methods from the field of visual analytics can provide abstracted comparative views, suitable for sifting large multi-genome datasets to identify critical similarities and differences. We introduce a software system for visual analysis of comparative genomics data. The system automates the process of data integration, and provides the analysis platform to identify and explore features of interest within these large datasets. GenoSets borrows techniques from business intelligence and visual analytics to provide a rich interface of interactive visualizations supported by a multi-dimensional data warehouse. In GenoSets, visual analytic approaches are used to enable querying based on orthology, functional assignment, and taxonomic or user-defined groupings of genomes. GenoSets links this information together with coordinated, interactive visualizations for both detailed and high-level categorical analysis of summarized data. GenoSets has been designed to simplify the exploration of multiple genome datasets and to facilitate reasoning about genomic comparisons. Case examples are included showing the use of this system in the analysis of 12 Brucella genomes. GenoSets software and the case study dataset are freely available at http://genosets.uncc.edu. We demonstrate that the integration of genomic data using a coordinated multiple view approach can simplify the exploration of large comparative genomic data sets, and facilitate reasoning about comparisons and features of interest.",2012-10-03 +25150030,Is the Alma Ata vision of comprehensive primary health care viable? Findings from an international project.,"

Background

The 4-year (2007-2011) Revitalizing Health for All international research program (http://www.globalhealthequity.ca/projects/proj_revitalizing/index.shtml) supported 20 research teams located in 15 low- and middle-income countries to explore the strengths and weaknesses of comprehensive primary health care (CPHC) initiatives at their local or national levels. Teams were organized in a triad comprised of a senior researcher, a new researcher, and a 'research user' from government, health services, or other organizations with the authority or capacity to apply the research findings. Multiple regional and global team capacity-enhancement meetings were organized to refine methods and to discuss and assess cross-case findings.

Objective

Most research projects used mixed methods, incorporating analyses of qualitative data (interviews and focus groups), secondary data, and key policy and program documents. Some incorporated historical case study analyses, and a few undertook new surveys. The synthesis of findings in this report was derived through qualitative analysis of final project reports undertaken by three different reviewers.

Results

Evidence of comprehensiveness (defined in this research program as efforts to improve equity in access, community empowerment and participation, social and environmental health determinants, and intersectoral action) was found in many of the cases.

Conclusions

Despite the important contextual differences amongst the different country studies, the similarity of many of their findings, often generated using mixed methods, attests to certain transferable health systems characteristics to create and sustain CPHC practices. These include:1. Well-trained and supported community health workers (CHWs) able to work effectively with marginalized communities2. Effective mechanisms for community participation, both informal (through participation in projects and programs, and meaningful consultation) and formal (though program management structures)3. Co-partnership models in program and policy development (in which financial and knowledge supports from governments or institutions are provided to communities, which retain decision-making powers in program design and implementation)4. Support for community advocacy and engagement in health and social systems decision makingThese characteristics, in turn, require a political context that supports state responsibilities for redistributive health and social protection measures.",2014-08-21 +23751181,Using Genome Query Language to uncover genetic variation.,"

Motivation

With high-throughput DNA sequencing costs dropping <$1000 for human genomes, data storage, retrieval and analysis are the major bottlenecks in biological studies. To address the large-data challenges, we advocate a clean separation between the evidence collection and the inference in variant calling. We define and implement a Genome Query Language (GQL) that allows for the rapid collection of evidence needed for calling variants.

Results

We provide a number of cases to showcase the use of GQL for complex evidence collection, such as the evidence for large structural variations. Specifically, typical GQL queries can be written in 5-10 lines of high-level code and search large datasets (100 GB) in minutes. We also demonstrate its complementarity with other variant calling tools. Popular variant calling tools can achieve one order of magnitude speed-up by using GQL to retrieve evidence. Finally, we show how GQL can be used to query and compare multiple datasets. By separating the evidence and inference for variant calling, it frees all variant detection tools from the data intensive evidence collection and focuses on statistical inference.

Availability

GQL can be downloaded from http://cseweb.ucsd.edu/~ckozanit/gql.",2013-06-10 +25416616,FANTEN: a new web-based interface for the analysis of magnetic anisotropy-induced NMR data.,"Pseudocontact shifts (PCSs) and residual dipolar couplings (RDCs) arising from the presence of paramagnetic metal ions in proteins as well as RDCs due to partial orientation induced by external orienting media are nowadays routinely measured as a part of the NMR characterization of biologically relevant systems. PCSs and RDCs are becoming more and more popular as restraints (1) to determine and/or refine protein structures in solution, (2) to monitor the extent of conformational heterogeneity in systems composed of rigid domains which can reorient with respect to one another, and (3) to obtain structural information in protein-protein complexes. The use of both PCSs and RDCs proceeds through the determination of the anisotropy tensors which are at the origin of these NMR observables. A new user-friendly web tool, called FANTEN (Finding ANisotropy TENsors), has been developed for the determination of the anisotropy tensors related to PCSs and RDCs and has been made freely available through the WeNMR ( http://fanten-enmr.cerm.unifi.it:8080 ) gateway. The program has many new features not available in other existing programs, among which the possibility of a joint analysis of several sets of PCS and RDC data and the possibility to perform rigid body minimizations.",2014-11-22 +24699831,"MetalS(3), a database-mining tool for the identification of structurally similar metal sites.","We have developed a database search tool to identify metal sites having structural similarity to a query metal site structure within the MetalPDB database of minimal functional sites (MFSs) contained in metal-binding biological macromolecules. MFSs describe the local environment around the metal(s) independently of the larger context of the macromolecular structure. Such a local environment has a determinant role in tuning the chemical reactivity of the metal, ultimately contributing to the functional properties of the whole system. The database search tool, which we called MetalS(3) (Metal Sites Similarity Search), can be accessed through a Web interface at http://metalweb.cerm.unifi.it/tools/metals3/ . MetalS(3) uses a suitably adapted version of an algorithm that we previously developed to systematically compare the structure of the query metal site with each MFS in MetalPDB. For each MFS, the best superposition is kept. All these superpositions are then ranked according to the MetalS(3) scoring function and are presented to the user in tabular form. The user can interact with the output Web page to visualize the structural alignment or the sequence alignment derived from it. Options to filter the results are available. Test calculations show that the MetalS(3) output correlates well with expectations from protein homology considerations. Furthermore, we describe some usage scenarios that highlight the usefulness of MetalS(3) to obtain mechanistic and functional hints regardless of homology.",2014-04-04 +25572717,Pantograph: A template-based method for genome-scale metabolic model reconstruction.,"Genome-scale metabolic models are a powerful tool to study the inner workings of biological systems and to guide applications. The advent of cheap sequencing has brought the opportunity to create metabolic maps of biotechnologically interesting organisms. While this drives the development of new methods and automatic tools, network reconstruction remains a time-consuming process where extensive manual curation is required. This curation introduces specific knowledge about the modeled organism, either explicitly in the form of molecular processes, or indirectly in the form of annotations of the model elements. Paradoxically, this knowledge is usually lost when reconstruction of a different organism is started. We introduce the Pantograph method for metabolic model reconstruction. This method combines a template reaction knowledge base, orthology mappings between two organisms, and experimental phenotypic evidence, to build a genome-scale metabolic model for a target organism. Our method infers implicit knowledge from annotations in the template, and rewrites these inferences to include them in the resulting model of the target organism. The generated model is well suited for manual curation. Scripts for evaluating the model with respect to experimental data are automatically generated, to aid curators in iterative improvement. We present an implementation of the Pantograph method, as a toolbox for genome-scale model reconstruction, curation and validation. This open source package can be obtained from: http://pathtastic.gforge.inria.fr.",2015-01-08 +24700812,3PFDB+: improved search protocol and update for the identification of representatives of protein sequence domain families.,"Protein domain families are usually classified on the basis of similarity of amino acid sequences. Selection of a single representative sequence for each family provides targets for structure determination or modeling and also enables fast sequence searches to associate new members to a family. Such a selection could be challenging since some of these domain families exhibit huge variation depending on the number of members in the family, the average family sequence length or the extent of sequence divergence within a family. We had earlier created 3PFDB database as a repository of best representative sequences, selected from each PFAM domain family on the basis of high coverage. In this study, we have improved the database using more efficient strategies for the initial generation of sequence profiles and implement two independent methods, FASSM and HMMER, for identifying family members. HMMER employs a global sequence similarity search, while FASSM relies on motif identification and matching. This improved and updated database, 3PFDB+ generated in this study, provides representative sequences and profiles for PFAM families, with 13 519 family representatives having more than 90% family coverage. The representative sequence is also highlighted in a two-dimensional plot, which reflects the relative divergence between family members. Representatives belonging to small families with short sequences are mainly associated with low coverage. The set of sequences not recognized by the family representative profiles, highlight several potential false or weak family associations in PFAM. Partial domains and fragments dominate such cases, along with sequences that are highly diverged or different from other family members. Some of these outliers were also predicted to have different secondary structure contents, which reflect different putative structure or functional roles for these domain sequences. Database URL: http://caps.ncbs.res.in/3pfdbplus/.",2014-04-03 +24700709,The Developmental Brain Disorders Database (DBDB): a curated neurogenetics knowledge base with clinical and research applications.,"The number of single genes associated with neurodevelopmental disorders has increased dramatically over the past decade. The identification of causative genes for these disorders is important to clinical outcome as it allows for accurate assessment of prognosis, genetic counseling, delineation of natural history, inclusion in clinical trials, and in some cases determines therapy. Clinicians face the challenge of correctly identifying neurodevelopmental phenotypes, recognizing syndromes, and prioritizing the best candidate genes for testing. However, there is no central repository of definitions for many phenotypes, leading to errors of diagnosis. Additionally, there is no system of levels of evidence linking genes to phenotypes, making it difficult for clinicians to know which genes are most strongly associated with a given condition. We have developed the Developmental Brain Disorders Database (DBDB: https://www.dbdb.urmc.rochester.edu/home), a publicly available, online-curated repository of genes, phenotypes, and syndromes associated with neurodevelopmental disorders. DBDB contains the first referenced ontology of developmental brain phenotypes, and uses a novel system of levels of evidence for gene-phenotype associations. It is intended to assist clinicians in arriving at the correct diagnosis, select the most appropriate genetic test for that phenotype, and improve the care of patients with developmental brain disorders. For researchers interested in the discovery of novel genes for developmental brain disorders, DBDB provides a well-curated source of important genes against which research sequencing results can be compared. Finally, DBDB allows novel observations about the landscape of the neurogenetics knowledge base.",2014-04-03 +22123792,KNApSAcK family databases: integrated metabolite-plant species databases for multifaceted plant research.,"A database (DB) describing the relationships between species and their metabolites would be useful for metabolomics research, because it targets systematic analysis of enormous numbers of organic compounds with known or unknown structures in metabolomics. We constructed an extensive species-metabolite DB for plants, the KNApSAcK Core DB, which contains 101,500 species-metabolite relationships encompassing 20,741 species and 50,048 metabolites. We also developed a search engine within the KNApSAcK Core DB for use in metabolomics research, making it possible to search for metabolites based on an accurate mass, molecular formula, metabolite name or mass spectra in several ionization modes. We also have developed databases for retrieving metabolites related to plants used for a range of purposes. In our multifaceted plant usage DB, medicinal/edible plants are related to the geographic zones (GZs) where the plants are used, their biological activities, and formulae of Japanese and Indonesian traditional medicines (Kampo and Jamu, respectively). These data are connected to the species-metabolites relationship DB within the KNApSAcK Core DB, keyed via the species names. All databases can be accessed via the website http://kanaya.naist.jp/KNApSAcK_Family/. KNApSAcK WorldMap DB comprises 41,548 GZ-plant pair entries, including 222 GZs and 15,240 medicinal/edible plants. The KAMPO DB consists of 336 formulae encompassing 278 medicinal plants; the JAMU DB consists of 5,310 formulae encompassing 550 medicinal plants. The Biological Activity DB consists of 2,418 biological activities and 33,706 pairwise relationships between medicinal plants and their biological activities. Current statistics of the binary relationships between individual databases were characterized by the degree distribution analysis, leading to a prediction of at least 1,060,000 metabolites within all plants. In the future, the study of metabolomics will need to take this huge number of metabolites into consideration.",2011-11-28 +23158523,"InSilico DB genomic datasets hub: an efficient starting point for analyzing genome-wide studies in GenePattern, Integrative Genomics Viewer, and R/Bioconductor.","Genomics datasets are increasingly useful for gaining biomedical insights, with adoption in the clinic underway. However, multiple hurdles related to data management stand in the way of their efficient large-scale utilization. The solution proposed is a web-based data storage hub. Having clear focus, flexibility and adaptability, InSilico DB seamlessly connects genomics dataset repositories to state-of-the-art and free GUI and command-line data analysis tools. The InSilico DB platform is a powerful collaborative environment, with advanced capabilities for biocuration, dataset sharing, and dataset subsetting and combination. InSilico DB is available from https://insilicodb.org.",2012-11-18 +26007227,Epigenomic k-mer dictionaries: shedding light on how sequence composition influences in vivo nucleosome positioning.,"

Motivation

Information-theoretic and compositional analysis of biological sequences, in terms of k-mer dictionaries, has a well established role in genomic and proteomic studies. Much less so in epigenomics, although the role of k-mers in chromatin organization and nucleosome positioning is particularly relevant. Fundamental questions concerning the informational content and compositional structure of nucleosome favouring and disfavoring sequences with respect to their basic building blocks still remain open.

Results

We present the first analysis on the role of k-mers in the composition of nucleosome enriched and depleted genomic regions (NER and NDR for short) that is: (i) exhaustive and within the bounds dictated by the information-theoretic content of the sample sets we use and (ii) informative for comparative epigenomics. We analize four different organisms and we propose a paradigmatic formalization of k-mer dictionaries, providing two different and complementary views of the k-mers involved in NER and NDR. The first extends well known studies in this area, its comparative nature being its major merit. The second, very novel, brings to light the rich variety of k-mers involved in influencing nucleosome positioning, for which an initial classification in terms of clusters is also provided. Although such a classification offers many insights, the following deserves to be singled-out: short poly(dA:dT) tracts are reported in the literature as fundamental for nucleosome depletion, however a global quantitative look reveals that their role is much less prominent than one would expect based on previous studies.

Availability and implementation

Dictionaries, clusters and Supplementary Material are available online at http://math.unipa.it/rombo/epigenomics/.

Contact

simona.rombo@unipa.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-25 +24612799,Evaluating the effect of ration composition on income over feed cost and milk yield.,"Feed is generally the greatest expense for milk production. With volatility in feed and milk markets, income over feed cost (IOFC) is a more advantageous measure of profit than simply feed cost per cow. The objective of this study was to evaluate the effects of ration cost and ingredient composition on IOFC and milk yield. The Pennsylvania State Extension Dairy Team IOFC tool (http://extension.psu.edu/animals/dairy/business-management/financial-tools/income-over-feed-cost/introduction-to-iofc) was used to collect data from 95 Pennsylvania lactating dairy cow herds from 2009 to 2012 and to determine the IOFC per cow per day. The data collected included average milk yield, milk income, purchased feed cost, ration ingredients, ingredient cost per ton, and amount of each ingredient fed. Feed costs for home-raised feeds for each ration were based on market values rather than on-farm cost. Actual costs were used for purchased feed for each ration. Mean lactating herd size was 170 ± 10.5 and daily milk yield per cow was 31.7 ± 0.19 kg. The mean IOFC was $7.71 ± $1.01 cost per cow, ranging from -$0.33 in March 2009 to $16.60 in September 2011. Data were analyzed using a one-way ANOVA in SPSS (IBM Corp., Armonk, NY). Values were grouped by quartiles and analyzed with all years combined as well as by individual year. Purchased feed cost per cow per day averaged $3.16 ± $1.07 for 2009 to 2012. For 2009 to 2012 combined, milk yield and IOFC did not differ with purchased feed cost. Intermediate levels (quartiles 2 and 3) of forage cost per cow per day between $1.45 and $1.97 per cow per day resulted in the greatest average IOFC of $8.19 and the greatest average milk yield of 32.3 kg. Total feed costs in the fourth quartile ($6.27 or more per cow per day) resulted in the highest IOFC. Thus, minimizing feed cost per cow per day did not maximize IOFC. In 2010, the IOFC was highest at $8.09 for dairies that fed 1 or more commodity by-products. Results of the study indicated that intermediate levels of forage cost and higher levels of total feed cost per cow per day resulted in both higher milk yield and higher IOFC. This suggests that optimal ration formulation rather than least cost strategies may be key to increasing milk yield and IOFC, and that profit margin may be affected more by quality of the feed rather than the cost.",2014-03-05 +24927180,Towards a quantitative OCT image analysis.,"

Background

Optical coherence tomography (OCT) is an invaluable diagnostic tool for the detection and follow-up of retinal pathology in patients and experimental disease models. However, as morphological structures and layering in health as well as their alterations in disease are complex, segmentation procedures have not yet reached a satisfactory level of performance. Therefore, raw images and qualitative data are commonly used in clinical and scientific reports. Here, we assess the value of OCT reflectivity profiles as a basis for a quantitative characterization of the retinal status in a cross-species comparative study.

Methods

Spectral-Domain Optical Coherence Tomography (OCT), confocal Scanning-Laser Ophthalmoscopy (SLO), and Fluorescein Angiography (FA) were performed in mice (Mus musculus), gerbils (Gerbillus perpadillus), and cynomolgus monkeys (Macaca fascicularis) using the Heidelberg Engineering Spectralis system, and additional SLOs and FAs were obtained with the HRA I (same manufacturer). Reflectivity profiles were extracted from 8-bit greyscale OCT images using the ImageJ software package (http://rsb.info.nih.gov/ij/).

Results

Reflectivity profiles obtained from OCT scans of all three animal species correlated well with ex vivo histomorphometric data. Each of the retinal layers showed a typical pattern that varied in relative size and degree of reflectivity across species. In general, plexiform layers showed a higher level of reflectivity than nuclear layers. A comparison of reflectivity profiles from specialized retinal regions (e.g. visual streak in gerbils, fovea in non-human primates) with respective regions of human retina revealed multiple similarities. In a model of Retinitis Pigmentosa (RP), the value of reflectivity profiles for the follow-up of therapeutic interventions was demonstrated.

Conclusions

OCT reflectivity profiles provide a detailed, quantitative description of retinal layers and structures including specialized retinal regions. Our results highlight the potential of this approach in the long-term follow-up of therapeutic strategies.",2014-06-13 +30708719,First Report of Colletotrichum capsici Causing Anthracnose on Hosta plantaginea in China.,"Fragrant plantain lily [Hosta plantaginea (Lam.) Aschers.] is an easily grown herbaceous perennial plant valued for its decorative foliage and dainty colorful flowers. From 2009 to 2011, a leaf spot disease of H. plantaginea was observed in Yuyuantan Park in Beijing, China (116°25' E, 39°55' N). The leaf spots began as small, irregular, circular, brown lesions in the middle or on the margin of leaves, which enlarged gradually up to 1 to 20 mm in diameter and were circular or irregular and brown to dark brown surrounded by yellowish borders. Occasionally, some spots cracked under dry conditions. Symptomatic leaf tissues were surface-sterilized in 1% NaOCl for 2 min, washed three times with distilled water, and then placed on potato dextrose agar (PDA). Colonies on PDA at 25°C for 7 days were grayish brown and cottony. Mycelia were hyaline to grey, septate, branched, and 2 to 7 μm wide. Acervuli were dark brown to black and 198 to 486 μm in diameter, averaging 278.5 μm. Setae were pale brown to dark brown, 2 to 4 septa, 70.0 to 120.3 × 2.5 to 5.1 μm, base cylindrical, and narrower towards the apex. Conidiophores were unicellular, hyaline, phialidic, and 5.0 to 13.5 × 1.5 to 2.8 μm. Conidia were hyaline, aseptate, falcate, apices acute, oil globules, and 16.0 to 25.2 × 2.6 to 5.0 μm. Appressoria were spherical, ovate or obclavate, pale to dark brown, edge usually entire, and 9.5 to 15.5 × 6.5 to 11.5 μm. Morphological characteristics of the fungus were similar to those of Colletotrichum capsici (Syd.) Butler & Bisby (2). To validate Koch's postulates, pathogenicity tests were performed by spraying leaves of 20 healthy potted H. plantaginea (60-day-old plants) with a 106 conidia/ml aqueous suspension. Control plants were inoculated with sterile water. Plants were put into a glass cabinet for 48 h after inoculation and maintained at 25°C, relative humidity 98%. Then the plants were moved out and incubated in greenhouse at 10 to 25°C. After 10 days, all inoculated plants showed typical symptoms, whereas water sprayed controls remained healthy. C. capsici was consistently re-isolated from these lesions. The re-isolated fungus showed the same morphological characteristics as described above. Genomic DNA was extracted from the original isolate and the re-isolate from the pathogenicity test. PCR amplification of the internal transcribed spacer (ITS) regions from ribosomal DNA was performed with primers ITS1 (5'-TCCGTAGGTGAACCTGCGG-3') and ITS4 (5'-TCCTCCGCTTATTGATATGC-3'). PCR products of 513 bp were sequenced. There was 100% nucleotide identity for sequences of the original isolate and the re-isolate. The sequence was submitted to GenBank (Accession No. HM063417.1). BLAST analysis of the fungal sequence resulted in 100% identity to the sequence of C. capsici (Accession No. JX867217.1). Isolates have been deposited at the Institute of Vegetables and Flowers, Chinese Academy of Agricultural Sciences. To our knowledge, this is the first report of anthracnose caused by C. capsici on H. plantaginea in China (1). Its confirmation is a significant step toward management recommendations for growers. References: (1) D. F. Farr and A. Y. Rossman, Fungal Databases. Syst. Mycol. Microbiol. Lab. ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , August 2013. (2) J. E. M. Mordue. CMI Description of Pathogenic Fungi and Bacteria. Commonwealth Mycol. Inst., Kew, UK, 1971.",2014-04-01 +30708714,First Report of Powdery Mildew Caused by Podosphaera spiraeae on Japanese Spiraea in China.,"Japanese spiraea (Spiraea japonica L.f.), belonging to Rosaceae, is widely planted for its ornamental value in China. Since July 2011, powdery mildew infections on leaves and stems of Japanese spiraea have been noticed in some parks and gardens of Chengyang District in Qingdao City, China (GPS coordinates 36°31'04.22″ N, 120°39'41.92″ E). Symptoms first appeared as white spots covered with mycelium on both side of the leaves and young stems. As the disease progressed, abundant mycelial growth covered the whole shoots and caused growth reduction and leaf distortion with or without reddening. A voucher specimen was deposited in the herbarium of Qingdao Agricultural University (Accession No. HMQAU13013). Hyphae were flexuous to straight, branched, septate, 5 to 7 μm wide, and had nipple-shaped appressoria. Conidiophores arising from the upper surface of hyphal cells produced 2 to 5 immature conidia in chains with a crenate outline. Foot-cells of conidiophores were straight, 60 to 125 × 7 to 9 μm, and followed by 1 to 2 shorter cells. Conidia were ellipsoid-ovoid to doliiform, measured 25 to 32 × 12 to 15 μm with a length/width ratio of 1.8 to 2.6, and had distinct fibrosin bodies. Chasmothecia were not found. The structures and measurements were compatible with the anamorphic state of Podosphaera spiraeae (Sawada) U. Braun & S. Takam. as described before (1). The identity of HMQAU13013 was further confirmed by analysis of nucleotide sequences of the internal transcribed spacer (ITS) regions amplified using the primers ITS1/ITS4 (4). The resulting 564-bp sequence was deposited in GenBank (Accession No. KF500426). A GenBank BLAST search of complete ITS sequence showed 100% identity with that of P. spiraeae on S. cantoniensis (AB525940). A pathogenicity test was conducted through inoculation by gently pressing a diseased leaf onto five healthy leaves of a potted Japanese spiraea. Five non-inoculated leaves served as controls. The plants were maintained in a greenhouse at 22°C. Inoculated leaves developed typical symptoms of powdery mildew after 5 days, but the non-inoculated leaves remained symptomless. The fungus presented on the inoculated plant was morphologically identical to that originally observed on diseased plants, fulfilling Koch's postulates. Powdery mildew of S. japonica caused by P. spiraeae has been recorded in Japan, Poland, and Switzerland (2,3). To our knowledge, this is the first report of powdery mildew caused by P. spiraeae on Japanese spiraea in China. References: (1) U. Braun and R. T. A. Cook. Taxonomic Manual of the Erysiphales (Powdery Mildews), CBS Biodiversity Series No.11. CBS, Utrecht, 2012. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ September 10, 2013. (3) T. Kobayashi. Index of Fungi Inhabiting Woody Plants in Japan. Host, Distribution and Literature. Zenkoku-Noson-Kyoiku Kyokai Publishing Co. Ltd., Tokyo, 2007. (4) S. Matsuda and S. Takamatsu. Mol. Phylogenet. Evol. 27:314, 2003.",2014-04-01 +24724606,Cancers of the upper gastro-intestinal tract: a review of somatic mutation distributions.,"Cancers of the upper gastro-intestinal tract (UGIT) comprise esophageal, esophago-gastric junction, stomach and duodenal cancers. Together, these cancers represent over 1.5 million cases and are the cause of about 1.25 million deaths annually. This group of cancers encompasses diseases with marked disparities in etiology, geographic distribution, histopathological features and frequency. Based on histological origin, squamous cell carcinoma of the esophagus (ESCC), which arises through a dysplasia-carcinoma sequence within the squamous mucosa, is a completely different cancer than junction, stomach and duodenal cancers, which develop within glandular epithelia through cascades involving inflammation, metaplasia, dysplasia and carcinoma. At the frontline between these two histological domains, cancers of the esophago-gastric junction constitute a mixed group of glandular tumors including distal esophageal adenocarcinomas and cancers arising within the most proximal part of the stomach - the cardia. Most of UGIT cancers are sporadic, although familial susceptibility genes have been identified for stomach and rare cases of ESCC. We have used the COSMIC database (http://www.sanger.ac.uk/genetics/CGP/cosmic/) to identify genes commonly mutated in UGIT cancers. Regardless of etiology and histopathology, three genes are mutated in at least 5% of UGIT cancers: TP53, CDKN2a and PIK3CA. Another three genes, NFE2L2, PTCH1 and NOTCH1, are mutated in ESCC only. Conversely, genes of the RAS family and of the CDH1/APC/CTNNB1 pathway are mutated only in non-squamous cancers, with differences in mutated genes according to topography. We review the potential functional significance of these observations for understanding mechanisms of UGIT carcinogenesis.",2014-04-01 +25414360,CRISPRdirect: software for designing CRISPR/Cas guide RNA with reduced off-target sites.,"

Unlabelled

CRISPRdirect is a simple and functional web server for selecting rational CRISPR/Cas targets from an input sequence. The CRISPR/Cas system is a promising technique for genome engineering which allows target-specific cleavage of genomic DNA guided by Cas9 nuclease in complex with a guide RNA (gRNA), that complementarily binds to a ∼ 20 nt targeted sequence. The target sequence requirements are twofold. First, the 5'-NGG protospacer adjacent motif (PAM) sequence must be located adjacent to the target sequence. Second, the target sequence should be specific within the entire genome in order to avoid off-target editing. CRISPRdirect enables users to easily select rational target sequences with minimized off-target sites by performing exhaustive searches against genomic sequences. The server currently incorporates the genomic sequences of human, mouse, rat, marmoset, pig, chicken, frog, zebrafish, Ciona, fruit fly, silkworm, Caenorhabditis elegans, Arabidopsis, rice, Sorghum and budding yeast.

Availability

Freely available at http://crispr.dbcls.jp/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-20 +26105823,"Antibody Informatics: IMGT, the International ImMunoGeneTics Information System. ","Antibody informatics, a part of immunoinformatics, refers to the concepts, databases, and tools developed and used to explore and to analyze the particular properties of the immunoglobulins (IG) or antibodies, compared with conventional genes and proteins. Antibody informatics is based on a unique ontology, IMGT-ONTOLOGY, created in 1989 by IMGT, the international ImMunoGeneTics information system (http://www.imgt.org). IMGT-ONTOLOGY defined, for the first time, the concept of 'genes' for the IG and the T cell receptors (TR), which led to their gene and allele nomenclature and allowed their entry in databases and tools. A second IMGT-ONTOLOGY revolutionizing and definitive concept was the IMGT unique numbering that bridged the gap between sequences and structures for the variable (V) and constant (C) domains of the IG and TR, and for the groove (G) domains of the major histocompatibility (MH). These breakthroughs contributed to the development of IMGT databases and tools for antibody informatics and its diverse applications, such as repertoire analysis in infectious diseases, antibody engineering and humanization, and study of antibody/antigen interactions. Nucleotide sequences of antibody V domains from deep sequencing (Next Generation Sequencing or High Throughput Sequencing) are analyzed with IMGT/HighV-QUEST, the high-throughput version of IMGT/V-QUEST and IMGT/JunctionAnalysis. Amino acid sequences of V and C domains are represented with the IMGT/Collier-de-Perles tool and analyzed with IMGT/DomainGapAlign. Three-dimensional (3D) structures (including contact analysis and paratope/epitope) are described in IMGT/3Dstructure-DB. Based on a friendly interface, IMGT/mAb-DB contains therapeutic monoclonal antibodies (INN suffix-mab) that can be queried on their specificity, for example, in infectious diseases, on bacterial or viral targets.",2014-04-01 +24619146,[Drug-drug interactions: interactions between xenobiotics].,"Drug-drug interactions (DDI) are a major topic in programs for continuous medical education (CME). Many physicians are afraid of being trapped into charges of malpractice; however, DDI cannot be avoided in many cases. They belong to routine medical practice and it is often impossible to avoid them. Moreover, they do not just occur between drugs but between any kind of foreign substance (xenobiotica), such as food (e.g. grapefruit juice, broccoli, barbecue) as well as legal (e.g. tobacco smoke, caffeine and alcohol) and illegal drugs. Therefore, the medical challenge is not just to avoid any interaction. Instead the physician faces the question of how to proceed with drug treatment in the presence of such interactions. Based on the medical education a physician has to judge first of all whether there is a risk for interactions in the prescription being planned for an individual patient. The classification of interactions proposed in this article (PD1-PD4, PK1-PK3) might help as a sort of check list. For more detailed information the physician can then consult one of the many databases available on the internet, such as PSIAConline (http://www.psiac.de) and MediQ (http://www.mediq.ch). Pharmacokinetic interactions can be easily assessed, monitored and controlled by therapeutic drug monitoring (TDM). Besides these tools it is important to keep in mind that nobody knows everything; even physicians do not know everything. So take pride in asking someone who might help and for this purpose AGATE offers a drug information service AID (http://www.amuep-agate.de). Just good for nothing, without being based on any kind of medical approach are computer programs that judge prescriptions without taking into account a patient's individual peculiarities. In case these types of programs produce red exclamation marks or traffic lights to underline their judgment, they might even work in a contrapuntal way by just eliciting insecurity and fear.",2014-04-01 +23508968,CytoHiC: a cytoscape plugin for visual comparison of Hi-C networks.,"

Summary

With the introduction of the Hi-C method new and fundamental properties of the nuclear architecture are emerging. The ability to interpret data generated by this method, which aims to capture the physical proximity between and within chromosomes, is crucial for uncovering the three dimensional structure of the nucleus. Providing researchers with tools for interactive visualization of Hi-C data can help in gaining new and important insights. Specifically, visual comparison can pinpoint changes in spatial organization between Hi-C datasets, originating from different cell lines or different species, or normalized by different methods. Here, we present CytoHiC, a Cytsocape plugin, which allow users to view and compare spatial maps of genomic landmarks, based on normalized Hi-C datasets. CytoHiC was developed to support intuitive visual comparison of Hi-C data and integration of additional genomic annotations.

Availability

The CytoHiC plugin, source code, user manual, example files and documentation are available at: http://apps.cytoscape.org/apps/cytohicplugin",2013-03-18 +24376713,NMRDSP: an accurate prediction of protein shape strings from NMR chemical shifts and sequence data.,"Shape string is structural sequence and is an extremely important structure representation of protein backbone conformations. Nuclear magnetic resonance chemical shifts give a strong correlation with the local protein structure, and are exploited to predict protein structures in conjunction with computational approaches. Here we demonstrate a novel approach, NMRDSP, which can accurately predict the protein shape string based on nuclear magnetic resonance chemical shifts and structural profiles obtained from sequence data. The NMRDSP uses six chemical shifts (HA, H, N, CA, CB and C) and eight elements of structure profiles as features, a non-redundant set (1,003 entries) as the training set, and a conditional random field as a classification algorithm. For an independent testing set (203 entries), we achieved an accuracy of 75.8% for S8 (the eight states accuracy) and 87.8% for S3 (the three states accuracy). This is higher than only using chemical shifts or sequence data, and confirms that the chemical shift and the structure profile are significant features for shape string prediction and their combination prominently improves the accuracy of the predictor. We have constructed the NMRDSP web server and believe it could be employed to provide a solid platform to predict other protein structures and functions. The NMRDSP web server is freely available at http://cal.tongji.edu.cn/NMRDSP/index.jsp.",2013-12-23 +24423115,kruX: matrix-based non-parametric eQTL discovery.,"

Background

The Kruskal-Wallis test is a popular non-parametric statistical test for identifying expression quantitative trait loci (eQTLs) from genome-wide data due to its robustness against variations in the underlying genetic model and expression trait distribution, but testing billions of marker-trait combinations one-by-one can become computationally prohibitive.

Results

We developed kruX, an algorithm implemented in Matlab, Python and R that uses matrix multiplications to simultaneously calculate the Kruskal-Wallis test statistic for several millions of marker-trait combinations at once. KruX is more than ten thousand times faster than computing associations one-by-one on a typical human dataset. We used kruX and a dataset of more than 500k SNPs and 20k expression traits measured in 102 human blood samples to compare eQTLs detected by the Kruskal-Wallis test to eQTLs detected by the parametric ANOVA and linear model methods. We found that the Kruskal-Wallis test is more robust against data outliers and heterogeneous genotype group sizes and detects a higher proportion of non-linear associations, but is more conservative for calling additive linear associations.

Conclusion

kruX enables the use of robust non-parametric methods for massive eQTL mapping without the need for a high-performance computing infrastructure and is freely available from http://krux.googlecode.com.",2014-01-14 +26015372,Remote Ischemic Preconditioning To Reduce Contrast-Induced Nephropathy: A Randomized Controlled Trial.,"Despite the increasing use of pre- and post-hydration protocols and low osmolar instead of high osmolar iodine containing contrast media, the incidence of contrast induced nephropathy (CIN) is still significant. There is evidence that contrast media cause ischemia reperfusion injury of the renal medulla. Remote ischemic preconditioning (RIPC) is a non-invasive, safe, and low cost method to reduce ischemia reperfusion injury. The aim of this study is to investigate whether RIPC, as an adjunct to standard preventive measures, reduces contrast induced acute kidney injury in patients at risk of CIN.The RIPCIN study is a multicenter, single blinded, randomized controlled trial in which 76 patients at risk of CIN received standard hydration combined with RIPC or hydration with sham preconditioning. RIPC was applied by four cycles of 5 min ischemia and 5 min reperfusion of the forearm. The primary outcome measure was the change in serum creatinine from baseline to 48 to 72 hours after contrast administration.With regard to the primary endpoint, no significant effect of RIPC was found. CIN occurred in four patients (2 sham and 2 RIPC). A pre-defined subgroup analysis of patients with a Mehran risk score ≥11, showed a significantly reduced change in serum creatinine from baseline to 48 to 72 hours in patients allocated to the RIPC group (Δ creatinine -3.3 ± 9.8 μmol/L) compared with the sham group (Δ creatinine +17.8 ± 20.1 μmol/L).RIPC, as an adjunct to standard preventive measures, does not improve serum creatinine levels after contrast administration in patients at risk of CIN according to the Dutch guideline. However, the present data indicate that RIPC might have beneficial effects in patients at a high or very high risk of CIN (Mehran score ≥ 11). The RIPCIN study is registered at: http://www.controlled-trials.com/ISRCTN76496973.",2015-05-23 +26383258,"Organophosphate Pesticide Exposures, Nitric Oxide Synthase Gene Variants, and Gene-Pesticide Interactions in a Case-Control Study of Parkinson's Disease, California (USA).","

Background

Nitric oxide synthase (NOS) genes are candidates for Parkinson's disease (PD) because NOS enzymes produce nitric oxide (NO), a pro-oxidant that can damage neurons. Widely used organophosphate (OP) pesticides can induce oxidative stress and are reported to increase PD risk. Additionally, two single nucleotide polymorphisms (SNPs) from the PON1 (paraoxonase 1) gene influence the ability to metabolize OPs.

Objective

Here, we investigated contributions of NOS genes and OP pesticides to PD risk, controlling for PON1 status.

Methods

In 357 incident PD cases and 495 population controls, we investigated eight NOS SNPs and interactions with both household and ambient agricultural OP exposures assessed with geographic information system (GIS).

Results

In comparing PD in homozygous variant carriers of NOS2A rs1060826 versus homozygous wild-type or heterozygotes, we estimate an adjusted odds ratio (OR) of 1.51 (95% CI: 0.95, 2.41). When considering interactions between NOS1 rs2682826 and OP exposure from household use, the OR for frequent OP use alone was 1.30 (95% CI: 0.72, 2.34) and for the CT+TT genotype alone was 0.89 (95% CI: 0.58, 1.39), and for frequent OP use combined with the CT+TT genotype the OR was 2.84 (95% CI: 1.49, 5.40) (interaction p-value 0.04). Similar results were seen for ambient OP exposure. Interactions between OP exposure and three other NOS1 SNPs and a genetic risk score combining all NOS1 SNPs reached statistical significance.

Conclusions

We found that OP pesticides were more strongly associated with PD among participants with variant genotypes in NOS1, consistent with the importance of oxidative stress-inducing mechanisms. Our data provide evidence for NOS1 modifying PD risk in OP exposed populations.

Citation

Paul KC, Sinsheimer JS, Rhodes SL, Cockburn M, Bronstein J, Ritz B. 2016. Organophosphate pesticide exposures, nitric oxide synthase gene variants, and gene-pesticide interactions in a case-control study of Parkinson's disease, California (USA). Environ Health Perspect 124:570-577; http://dx.doi.org/10.1289/ehp.1408976.",2015-09-18 +23511543,Parametric Bayesian priors and better choice of negative examples improve protein function prediction.,"

Motivation

Computational biologists have demonstrated the utility of using machine learning methods to predict protein function from an integration of multiple genome-wide data types. Yet, even the best performing function prediction algorithms rely on heuristics for important components of the algorithm, such as choosing negative examples (proteins without a given function) or determining key parameters. The improper choice of negative examples, in particular, can hamper the accuracy of protein function prediction.

Results

We present a novel approach for choosing negative examples, using a parameterizable Bayesian prior computed from all observed annotation data, which also generates priors used during function prediction. We incorporate this new method into the GeneMANIA function prediction algorithm and demonstrate improved accuracy of our algorithm over current top-performing function prediction methods on the yeast and mouse proteomes across all metrics tested.

Availability

Code and Data are available at: http://bonneaulab.bio.nyu.edu/funcprop.html",2013-03-19 +24712535,Co-evolution analysis to predict protein-protein interactions within influenza virus envelope.,"Interactions between integral membrane proteins hemagglutinin (HA), neuraminidase (NA), M2 and membrane-associated matrix protein M1 of influenza A virus are thought to be crucial for assembly of functionally competent virions. We hypothesized that the amino acid residues located at the interface of two different proteins are under physical constraints and thus probably co-evolve. To predict co-evolving residue pairs, the EvFold ( http://evfold.org ) program searching the (nontransitive) Direct Information scores was applied for large samplings of amino acid sequences from Influenza Research Database ( http://www.fludb.org/ ). Having focused on the HA, NA, and M2 cytoplasmic tails as well as C-terminal domain of M1 (being the less conserved among the protein domains) we captured six pairs of correlated positions. Among them, there were one, two, and three position pairs for HA-M2, HA-M1, and M2-M1 protein pairs, respectively. As expected, no co-varying positions were found for NA-HA, NA-M1, and NA-M2 pairs obviously due to high conservation of the NA cytoplasmic tail. The sum of frequencies calculated for two major amino acid patterns observed in pairs of correlated positions was up to 0.99 meaning their high to extreme evolutionary sustainability. Based on the predictions a hypothetical model of pair-wise protein interactions within the viral envelope was proposed.",2014-03-31 +25886978,Accurate prediction of RNA nucleotide interactions with backbone k-tree model.,"

Motivation

Given the importance of non-coding RNAs to cellular regulatory functions, it would be highly desirable to have accurate computational prediction of RNA 3D structure, a task which remains challenging. Even for a short RNA sequence, the space of tertiary conformations is immense; existing methods to identify native-like conformations mostly resort to random sampling of conformations to achieve computational feasibility. However, native conformations may not be examined and prediction accuracy may be compromised due to sampling. State-of-the-art methods have yet to deliver satisfactory predictions for RNAs of length beyond 50 nucleotides.

Results

This paper presents a method to tackle a key step in the RNA 3D structure prediction problem, the prediction of the nucleotide interactions that constitute the desired 3D structure. The research is based on a novel graph model, called a backbone k-tree, to tightly constrain the nucleotide interaction relationships considered for RNA 3D structures. It is shown that the new model makes it possible to efficiently predict the optimal set of nucleotide interactions (including the non-canonical interactions in all recently revealed families) from the query sequence along with known or predicted canonical basepairs. The preliminary results indicate that in most cases the new method can predict with a high accuracy the nucleotide interactions that constitute the 3D structure of the query sequence. It thus provides a useful tool for the accurate prediction of RNA 3D structure.

Availability and implementation

The source package for BkTree is available at http://rna-informatics.uga.edu/index.php?f=software&p=BkTree.

Contact

lding@uga.edu or cai@cs.uga.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-16 +23688127,Protein complex detection using interaction reliability assessment and weighted clustering coefficient.,"

Background

Predicting protein complexes from protein-protein interaction data is becoming a fundamental problem in computational biology. The identification and characterization of protein complexes implicated are crucial to the understanding of the molecular events under normal and abnormal physiological conditions. On the other hand, large datasets of experimentally detected protein-protein interactions were determined using High-throughput experimental techniques. However, experimental data is usually liable to contain a large number of spurious interactions. Therefore, it is essential to validate these interactions before exploiting them to predict protein complexes.

Results

In this paper, we propose a novel graph mining algorithm (PEWCC) to identify such protein complexes. Firstly, the algorithm assesses the reliability of the interaction data, then predicts protein complexes based on the concept of weighted clustering coefficient. To demonstrate the effectiveness of the proposed method, the performance of PEWCC was compared to several methods. PEWCC was able to detect more matched complexes than any of the state-of-the-art methods with higher quality scores.

Conclusions

The higher accuracy achieved by PEWCC in detecting protein complexes is a valid argument in favor of the proposed method. The datasets and programs are freely available at http://faculty.uaeu.ac.ae/nzaki/Research.htm.",2013-05-20 +25758402,Hi-Jack: a novel computational framework for pathway-based inference of host-pathogen interactions.,"

Motivation

Pathogens infect their host and hijack the host machinery to produce more progeny pathogens. Obligate intracellular pathogens, in particular, require resources of the host to replicate. Therefore, infections by these pathogens lead to alterations in the metabolism of the host, shifting in favor of pathogen protein production. Some computational identification of mechanisms of host-pathogen interactions have been proposed, but it seems the problem has yet to be approached from the metabolite-hijacking angle.

Results

We propose a novel computational framework, Hi-Jack, for inferring pathway-based interactions between a host and a pathogen that relies on the idea of metabolite hijacking. Hi-Jack searches metabolic network data from hosts and pathogens, and identifies candidate reactions where hijacking occurs. A novel scoring function ranks candidate hijacked reactions and identifies pathways in the host that interact with pathways in the pathogen, as well as the associated frequent hijacked metabolites. We also describe host-pathogen interaction principles that can be used in the future for subsequent studies. Our case study on Mycobacterium tuberculosis (Mtb) revealed pathways in human-e.g. carbohydrate metabolism, lipids metabolism and pathways related to amino acids metabolism-that are likely to be hijacked by the pathogen. In addition, we report interesting potential pathway interconnections between human and Mtb such as linkage of human fatty acid biosynthesis with Mtb biosynthesis of unsaturated fatty acids, or linkage of human pentose phosphate pathway with lipopolysaccharide biosynthesis in Mtb.

Availability and implementation

Datasets and codes are available at http://cloud.kaust.edu.sa/Pages/Hi-Jack.aspx",2015-03-09 +26074661,Morphometric analysis and taxonomic revision of Anisopteromalus Ruschka (Hymenoptera: Chalcidoidea: Pteromalidae) - an integrative approach.,"We use an integrative taxonomic approach to revise the genus Anisopteromalus. In particular, we apply multivariate ratio analysis (MRA), a rather new statistical method based on principal component analysis (PCA) and linear discriminant analysis (LDA), to numerous body measurements and combine the data with those from our molecular analysis of Cytb and ITS2 genetic markers (on a subset of species) and all available published data on morphology, karyology, behaviour, host associations and geographic distribution. We demonstrate that the analysis of quantitative characters using MRA plays a major role for the integration of name-bearing types and thus for the association of taxa with names. Six species are recognized, of which two are new: A. cornis Baur sp.n. and A. quinarius Gokhman & Baur sp.n. For Anisopteromalus calandrae (Howard), a well-known, cosmopolitan parasitoid of stored-product pests, we have selected a neotype to foster continuity and stability in the application of this important name. The species was sometimes confused with the related A. quinariussp.n., another cosmopolitan species that is frequently encountered in similar environments. We also show that several species originally described or later put under Anisopteromalus actually belong to different genera: Cyrtoptyx camerunus (Risbec) comb.n.; Meraporus glaber (Szelényi) comb.n.; Dinarmus schwenkei (Roomi, Khan & Khan) comb.n.Neocatolaccus indicus Ayyar & Mani is confirmed as a junior synonym of Oxysychus sphenopterae (Ferrière) syn.n. and Anisopteromalus calandrae brasiliensis (Domenichini) stat.rev. must be considered as a valid but doubtful taxon. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:BDFE96D3-D0F4-4012-90F5-9A087F7F5864.",2014-06-12 +25892709,Essential protein identification based on essential protein-protein interaction prediction by Integrated Edge Weights.,"Essential proteins play a crucial role in cellular survival and development process. Experimentally, essential proteins are identified by gene knockouts or RNA interference, which are expensive and often fatal to the target organisms. Regarding this, an alternative yet important approach to essential protein identification is through computational prediction. Existing computational methods predict essential proteins based on their relative densities in a protein-protein interaction (PPI) network. Degree, betweenness, and other appropriate criteria are often used to measure the relative density. However, no matter what criterion is used, a protein is actually ordered by the attributes of this protein per se. In this research, we presented a novel computational method, Integrated Edge Weights (IEW), to first rank protein-protein interactions by integrating their edge weights, and then identified sub PPI networks consisting of those highly-ranked edges, and finally regarded the nodes in these sub networks as essential proteins. We evaluated IEW on three model organisms: Saccharomyces cerevisiae (S. cerevisiae), Escherichia coli (E. coli), and Caenorhabditis elegans (C. elegans). The experimental results showed that IEW achieved better performance than the state-of-the-art methods in terms of precision-recall and Jackknife measures. We had also demonstrated that IEW is a robust and effective method, which can retrieve biologically significant modules by its highly-ranked protein-protein interactions for S. cerevisiae, E. coli, and C. elegans. We believe that, with sufficient data provided, IEW can be used to any other organisms' essential protein identification. A website about IEW can be accessed from http://digbio.missouri.edu/IEW/index.html.",2015-04-16 +21999839,De novo transcriptome assembly and polymorphism detection in the flowering plant Silene vulgaris (Caryophyllaceae).,"Members of the angiosperm genus Silene are widely used in studies of ecology and evolution, but available genomic and population genetic resources within Silene remain limited. Deep transcriptome (i.e. expressed sequence tag or EST) sequencing has proven to be a rapid and cost-effective means to characterize gene content and identify polymorphic markers in non-model organisms. In this study, we report the results of 454 GS-FLX Titanium sequencing of a polyA-selected and normalized cDNA library from Silene vulgaris. The library was generated from a single pool of transcripts, combining RNA from leaf, root and floral tissue from three genetically divergent European subpopulations of S. vulgaris. A single full-plate 454 run produced 959,520 reads totalling 363.6 Mb of sequence data with an average read length of 379.0 bp after quality trimming and removal of custom library adaptors. We assembled 832,251 (86.7%) of these reads into 40,964 contigs, which have a total length of 25.4 Mb and can be organized into 18,178 graph-based clusters or 'isogroups'. Assembled sequences were annotated based on homology to genes in multiple public databases. Analysis of sequence variants identified 13,432 putative single-nucleotide polymorphisms (SNPs) and 1320 simple sequence repeats (SSRs) that are candidates for microsatellite analysis. Estimates of nucleotide diversity from 1577 contigs were used to generate genome-wide distributions that revealed several outliers with high diversity. All of these resources are publicly available through NCBI and/or our website (http://silenegenomics.biology.virginia.edu) and should provide valuable genomic and population genetic tools for the Silene research community.",2011-10-17 +24913724,Home- versus hospital-based phototherapy for the treatment of non-haemolytic jaundice in infants at more than 37 weeks' gestation.,"

Background

Phototherapy is commonly used for the treatment of neonatal jaundice, and home-based phototherapy is now being used in certain centres. Home-based phototherapy offers possible advantages by avoiding prolonged hospital admissions, promoting mother-infant bonding and reducing hospitalisation costs. Potential problems include increased duration of phototherapy, increased readmission to hospital and increased risk of bilirubin encephalopathy.

Objectives

To compare exclusively home-based versus exclusively hospital-based phototherapy or a combination of home- and hospital-based phototherapy for the management of non-haemolytic jaundice in term infants up to 28 days of age. We planned to include specific subgroups for duration in hospital, method of phototherapy and criteria for readiness for discharge.

Search methods

We searched the Cochrane Neonatal Review Group Specialised Register, the Cochrane Central Register of Controlled Trials (CENTRAL) January 2013, Issue 1, part of The Cochrane Library, MEDLINE (from 1966 to 15 February 2013), CINAHL (from 1982 to 15 February 2013) and EMBASE (from 1988 to 15 February 2013). We searched for abstracts from the Pediatric Academic Societies' Annual Meetings 2000 to 2013. We searched for ongoing trials on the following websites: ClinicalTrials.gov (http://clinicaltrials.gov/) and Current Controlled Trials (http://controlled-trials.com/).

Selection criteria

Randomised and quasi-randomised studies comparing term infants who received phototherapy exclusively at home versus phototherapy exclusively in the hospital or a combination of the two for non-haemolytic jaundice.

Data collection and analysis

Two review authors independently assessed trial quality and extracted data.

Main results

No studies that met the predefined eligibility criteria were identified.

Authors' conclusions

No high-quality evidence is currently available to support or refute the practice of home-based phototherapy for non-haemolytic jaundice in infants at more than 37 weeks' gestation.",2014-06-10 +23742908,SPEDRE: a web server for estimating rate parameters for cell signaling dynamics in data-rich environments.,"Cell signaling pathways and metabolic networks are often modeled using ordinary differential equations (ODEs) to represent the production/consumption of molecular species over time. Regardless whether a model is built de novo or adapted from previous models, there is a need to estimate kinetic rate constants based on time-series experimental measurements of molecular abundance. For data-rich cases such as proteomic measurements of all species, spline-based parameter estimation algorithms have been developed to avoid solving all the ODEs explicitly. We report the development of a web server for a spline-based method. Systematic Parameter Estimation for Data-Rich Environments (SPEDRE) estimates reaction rates for biochemical networks. As input, it takes the connectivity of the network and the concentrations of the molecular species at discrete time points. SPEDRE is intended for large sparse networks, such as signaling cascades with many proteins but few reactions per protein. If data are available for all species in the network, it provides global coverage of the parameter space, at low resolution and with approximate accuracy. The output is an optimized value for each reaction rate parameter, accompanied by a range and bin plot. SPEDRE uses tools from COPASI for pre-processing and post-processing. SPEDRE is a free service at http://LTKLab.org/SPEDRE.",2013-06-05 +24806471,MIDER: network inference with mutual information distance and entropy reduction.,"The prediction of links among variables from a given dataset is a task referred to as network inference or reverse engineering. It is an open problem in bioinformatics and systems biology, as well as in other areas of science. Information theory, which uses concepts such as mutual information, provides a rigorous framework for addressing it. While a number of information-theoretic methods are already available, most of them focus on a particular type of problem, introducing assumptions that limit their generality. Furthermore, many of these methods lack a publicly available implementation. Here we present MIDER, a method for inferring network structures with information theoretic concepts. It consists of two steps: first, it provides a representation of the network in which the distance among nodes indicates their statistical closeness. Second, it refines the prediction of the existing links to distinguish between direct and indirect interactions and to assign directionality. The method accepts as input time-series data related to some quantitative features of the network nodes (such as e.g. concentrations, if the nodes are chemical species). It takes into account time delays between variables, and allows choosing among several definitions and normalizations of mutual information. It is general purpose: it may be applied to any type of network, cellular or otherwise. A Matlab implementation including source code and data is freely available (http://www.iim.csic.es/~gingproc/mider.html). The performance of MIDER has been evaluated on seven different benchmark problems that cover the main types of cellular networks, including metabolic, gene regulatory, and signaling. Comparisons with state of the art information-theoretic methods have demonstrated the competitive performance of MIDER, as well as its versatility. Its use does not demand any a priori knowledge from the user; the default settings and the adaptive nature of the method provide good results for a wide range of problems without requiring tuning.",2014-05-07 +23143611,Cohort profile: the English longitudinal study of ageing.,"The English Longitudinal Study of Ageing (ELSA) is a panel study of a representative cohort of men and women living in England aged ≥50 years. It was designed as a sister study to the Health and Retirement Study in the USA and is multidisciplinary in orientation, involving the collection of economic, social, psychological, cognitive, health, biological and genetic data. The study commenced in 2002, and the sample has been followed up every 2 years. Data are collected using computer-assisted personal interviews and self-completion questionnaires, with additional nurse visits for the assessment of biomarkers every 4 years. The original sample consisted of 11 391 members ranging in age from 50 to 100 years. ELSA is harmonized with ageing studies in other countries to facilitate international comparisons, and is linked to financial and health registry data. The data set is openly available to researchers and analysts soon after collection (http://www.esds.ac.uk/longitudinal/access/elsa/l5050.asp).",2012-11-09 +21798033,Visualizing meta-features in proteomic maps.,"

Background

The steps of a high-throughput proteomics experiment include the separation, differential expression and mass spectrometry-based identification of proteins. However, the last and more challenging step is inferring the biological role of the identified proteins through their association with interaction networks, biological pathways, analysis of the effect of post-translational modifications, and other protein-related information.

Results

In this paper, we present an integrative visualization methodology that allows combining experimentally produced proteomic features with protein meta-features, typically coming from meta-analysis tools and databases, in synthetic Proteomic Feature Maps. Using three proteomics analysis scenarios, we show that the proposed visualization approach is effective in filtering, navigating and interacting with the proteomics data in order to address visually challenging biological questions. The novelty of our approach lies in the ease of integration of any user-defined proteomic features in easy-to-comprehend visual representations that resemble the familiar 2D-gel images, and can be adapted to the user's needs. The main capabilities of the developed VIP software, which implements the presented visualization methodology, are also highlighted and discussed.

Conclusions

By using this visualization and the associated VIP software, researchers can explore a complex heterogeneous proteomics dataset from different perspectives in order to address visually important biological queries and formulate new hypotheses for further investigation. VIP is freely available at http://pelopas.uop.gr/~egian/VIP/index.html.",2011-07-28 +25491094,chromoWIZ: a web tool to query and visualize chromosome-anchored genes from cereal and model genomes.,"

Background

Over the last years reference genome sequences of several economically and scientifically important cereals and model plants became available. Despite the agricultural significance of these crops only a small number of tools exist that allow users to inspect and visualize the genomic position of genes of interest in an interactive manner.

Description

We present chromoWIZ, a web tool that allows visualizing the genomic positions of relevant genes and comparing these data between different plant genomes. Genes can be queried using gene identifiers, functional annotations, or sequence homology in four grass species (Triticum aestivum, Hordeum vulgare, Brachypodium distachyon, Oryza sativa). The distribution of the anchored genes is visualized along the chromosomes by using heat maps. Custom gene expression measurements, differential expression information, and gene-to-group mappings can be uploaded and can be used for further filtering.

Conclusions

This tool is mainly designed for breeders and plant researchers, who are interested in the location and the distribution of candidate genes as well as in the syntenic relationships between different grass species. chromoWIZ is freely available and online accessible at http://mips.helmholtz-muenchen.de/plant/chromoWIZ/index.jsp.",2014-12-10 +24682734,EpimiR: a database of curated mutual regulation between miRNAs and epigenetic modifications.,"As two kinds of important gene expression regulators, both epigenetic modification and microRNA (miRNA) can play significant roles in a wide range of human diseases. Recently, many studies have demonstrated that epigenetics and miRNA can affect each other in various ways. In this study, we established the EpimiR database, which collects 1974 regulations between 19 kinds of epigenetic modifications (such as DNA methylation, histone acetylation, H3K4me3, H3S10p) and 617 miRNAs across seven species (including Homo sapiens, Mus musculus, Rattus norvegicus, Gallus gallus, Epstein-Barr virus, Canis familiaris and Arabidopsis thaliana) from >300 references in the literature. These regulations can be divided into two parts: miR2Epi (103 entries describing how miRNA regulates epigenetic modification) and Epi2miR (1871 entries describing how epigenetic modification affects miRNA). Each entry of EpimiR not only contains basic descriptions of the validated experiment (method, species, reference and so on) but also clearly illuminates the regulatory pathway between epigenetics and miRNA. As a supplement to the curated information, the EpimiR extends to gather predicted epigenetic features (such as predicted transcription start site, upstream CpG island) associated with miRNA for users to guide their future biological experiments. Finally, EpimiR offers download and submission pages. Thus, EpimiR provides a fairly comprehensive repository about the mutual regulation between epigenetic modifications and miRNAs, which will promote the research on the regulatory mechanism of epigenetics and miRNA. Database URL: http://bioinfo.hrbmu.edu.cn/EpimiR/.",2014-03-28 +25064565,img2net: automated network-based analysis of imaged phenotypes.,"

Summary

Automated analysis of imaged phenotypes enables fast and reproducible quantification of biologically relevant features. Despite recent developments, recordings of complex networked structures, such as leaf venation patterns, cytoskeletal structures or traffic networks, remain challenging to analyze. Here we illustrate the applicability of img2net to automatedly analyze such structures by reconstructing the underlying network, computing relevant network properties and statistically comparing networks of different types or under different conditions. The software can be readily used for analyzing image data of arbitrary 2D and 3D network-like structures.

Availability and implementation

img2net is open-source software under the GPL and can be downloaded from http://mathbiol.mpimp-golm.mpg.de/img2net/, where supplementary information and datasets for testing are provided.

Contact

breuer@mpimp-golm.mpg.de.",2014-07-26 +27999526,"CME/CNE Article: A Framework of Care in Multiple Sclerosis, Part 1: Updated Disease Classification and Disease-Modifying Therapy Use in Specific Circumstances.","Activity Available Online: To access the article, post-test, and evaluation online, go to http://www.cmscscholar.org.

Target audience

The target audience for this activity is physicians, physician assistants, nursing professionals, and other health-care providers involved in the management of patients with multiple sclerosis (MS).

Learning objectives

Apply new information about MS to a comprehensive individualized treatment plan for patients with MSIntegrate the team approach into long-term planning in order to optimize rehabilitation care of patients with MSAccreditation Statement: This activity has been planned and implemented in accordance with the accreditation requirements and policies of the Accreditation Council for Continuing Medical Education (ACCME) through the joint providership of the Consortium of Multiple Sclerosis Centers (CMSC), Nurse Practitioner Alternatives (NPA), and Delaware Media Group. The CMSC is accredited by the ACCME to provide continuing medical education for physicians. The CMSC designates this journal-based CME activity for a maximum of 1.0 AMA PRA Category 1 Credit(s)™. Physicians should claim only the credit commensurate with the extent of their participation in the activity. Nurse Practitioner Alternatives (NPA) is accredited as a provider of continuing nursing education by the American Nurses Credentialing Center's Commission on Accreditation. NPA designates this enduring material for 1.0 Continuing Nursing Education credit. Laurie Scudder, DNP, NP, has served as Nurse Planner for this activity. She has disclosed no relevant financial relationships. Disclosures: Francois Bethoux, MD, Editor in Chief of the International Journal of MS Care (IJMSC), has served as Physician Planner for this activity. He has received royalties from Springer Publishing and has received intellectual property rights from Biogen. Laurie Scudder, DNP, NP, has served as Nurse Planner for this activity. She has disclosed no relevant financial relationships. Scott D. Newsome, DO, MSCS (author), has served on scientific advisory boards for Biogen, Genentech, Novartis, and Genzyme, and has performed contracted research (institution received funds) for Biogen, Genentech, and Novartis. Philip J. Aliotta, MD, MSHA, CHCQM, FACS (author), has served on speakers' bureaus for Astellas Pharma, Actavis, Augmenix, and Allergan and has performed contracted research for Allergan. Jacquelyn Bainbridge, PharmD (author), has disclosed no relevant financial relationships. Susan E. Bennett, PT, DPT, EdD, NCS, MSCS (author), has served on speakers' bureaus for Acorda Therapeutics, Biogen, and Medtronic; has received consulting fees from and performed contracted research for Acorda Therapeutics; and is chair of the Clinical Events Committee at Innovative Technologies. Gary Cutter, PhD (author), has participated on Data and Safety Monitoring Committees for AMO Pharma, Apotek, Gilead Pharmaceuticals, Horizon Pharmaceuticals, Modigenetech/Prolor, Merck, Merck/Pfizer, Opko Biologics, Neuren, Sanofi-Aventis, Reata Pharmaceuticals, Receptos/Celgene, Teva Pharmaceuticals, NHLBI (Protocol Review Committee), and NICHD (OPRU Oversight Committee); has received consulting fees from and/or served on speakers' bureaus and scientific advisory boards for Cerespir, Genzyme, Genentech, Innate Therapeutics, Janssen Pharmaceuticals, Klein-Buendel Incorporated, MedImmune, Medday, Nivalis, Novartis, Opexa Therapeutics, Roche, Savara, Somahlution, Teva Pharmaceuticals, Transparency Life Sciences, and TG Therapeutics; and is President of Pythagoras, Inc., a private consulting company located in Birmingham, AL. Kaylan Fenton, CRNP, APNP, MSCN (author), has disclosed no relevant financial relationships. Fred Lublin, MD (author), has received consulting fees/fees for non-CME/CE activities from Bayer HealthCare Pharmaceuticals, Biogen, EMD Serono, Novartis, Teva Neuroscience, Actelion, Sanofi/Genzyme, Acorda, Questcor/Mallinckrodt, Roche/Genentech, MedImmune, Osmotica, Xenoport, Receptos/Celgene, Forward Pharma, Akros, TG Therapeutics, AbbVie, Toyama, Amgen, Medday, Atara Biotherapeutics, Polypharma, Pfizer, Johnson & Johnson, Revalesio, Coronado Bioscience, and Bristol-Myers Squibb; has served on speakers' bureaus for Genentech/Roche and Genzyme/Sanofi; has performed contracted research for Acorda, Biogen, Novartis, Teva Neuroscience, Genzyme, Xenoport, and Receptos; is the co-chief editor of Multiple Sclerosis and Related Disorders; and has an ownership interest in Cognition Pharmaceuticals. Dorothy Northrop, MSW, ACSW (author), has disclosed no relevant financial relationships. David Rintell, EdD (author), has received consulting fees from Novartis and has served as a patient education speaker for Teva Neuroscience. He started as a salaried employee of Sanofi Genzyme in November 2015. Dr. Rintell's work on this project was completed before he became a salaried employee of Sanofi Genzyme.Bryan D. Walker, MHS, PA-C (author), has served on scientific advisory boards for EMD Serono and Sanofi Genzyme and owns stock in Biogen. Megan Weigel, DNP, ARNP-C, MSCN (author), has received consulting fees from Mallinckrodt, Genzyme, and Genentech, and has served on speakers' bureaus for Bayer Corp, Acorda Therapeutics, Teva Neuroscience, Biogen, Mallinckrodt, Genzyme, Novartis, and Pfizer. Kathleen Zackowski, PhD, OTR, MSCS (author), has performed contracted research for Acorda Therapeutics. David E. Jones, MD (author), has received consulting fees from Biogen and Novartis, and has performed contracted research for Biogen. One anonymous peer reviewer for the IJMSC has performed contracted research (institution received funds) for Novartis, Chugai, and Biogen. Another reviewer has received consulting fees and served on speakers' bureaus for Biogen, Sanofi Genzyme, Genentech, EMD Serono, and Novartis. The third reviewer has disclosed no relevant financial relationships. Lori Saslow, MS (medical writer), has disclosed no relevant financial relationships. The staff at the IJMSC, CMSC, NPA, and Delaware Media Group who are in a position to influence content have disclosed no relevant financial relationships. Note: Disclosures listed for authors are those applicable at the time of their work on this project and within 12 months previously. Financial relationships for some authors may have changed in the interval between the time of their work on this project and publication of the article. Funding/Support: Funding for the Framework of Care consensus conference was provided by the Consortium of Multiple Sclerosis Centers, Mallinckrodt Pharmaceuticals, and Mylan Pharmaceuticals. Method of Participation: Release Date: December 1, 2016 Valid for Credit Through: December 1, 2017 In order to receive CME/CNE credit, participants must: Review the CME/CNE information, including learning objectives and author disclosures.Study the educational content.Complete the post-test and evaluation, which are available at http://www.cmscscholar.org. Statements of Credit are awarded upon successful completion of the post-test with a passing score of >70% and the evaluation. There is no fee to participate in this activity. Disclosure of Unlabeled Use: This CME/CNE activity may contain discussion of published and/or investigational uses of agents that are not approved by the FDA. CMSC, NPA, and Delaware Media Group do not recommend the use of any agent outside of the labeled indications. The opinions expressed in the educational activity are those of the faculty and do not necessarily represent the views of CMSC, NPA, or Delaware Media Group. Disclaimer: Participants have an implied responsibility to use the newly acquired information to enhance patient outcomes and their own professional development. The information presented in this activity is not meant to serve as a guideline for patient management. Any medications, diagnostic procedures, or treatments discussed in this publication should not be used by clinicians or other health-care professionals without first evaluating their patients' conditions, considering possible contraindications or risks, reviewing any applicable manufacturer's product information, and comparing any therapeutic approach with the recommendations of other authorities.",2016-11-01 +23418189,Classification of mislabelled microarrays using robust sparse logistic regression.,"

Motivation

Previous studies reported that labelling errors are not uncommon in microarray datasets. In such cases, the training set may become misleading, and the ability of classifiers to make reliable inferences from the data is compromised. Yet, few methods are currently available in the bioinformatics literature to deal with this problem. The few existing methods focus on data cleansing alone, without reference to classification, and their performance crucially depends on some tuning parameters.

Results

In this article, we develop a new method to detect mislabelled arrays simultaneously with learning a sparse logistic regression classifier. Our method may be seen as a label-noise robust extension of the well-known and successful Bayesian logistic regression classifier. To account for possible mislabelling, we formulate a label-flipping process as part of the classifier. The regularization parameter is automatically set using Bayesian regularization, which not only saves the computation time that cross-validation would take, but also eliminates any unwanted effects of label noise when setting the regularization parameter. Extensive experiments with both synthetic data and real microarray datasets demonstrate that our approach is able to counter the bad effects of labelling errors in terms of predictive performance, it is effective at identifying marker genes and simultaneously it detects mislabelled arrays to high accuracy.

Availability

The code is available from http://cs.bham.ac.uk/∼jxb008.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-15 +25619995,Bayesian feature selection for high-dimensional linear regression via the Ising approximation with applications to genomics.,"

Motivation

Feature selection, identifying a subset of variables that are relevant for predicting a response, is an important and challenging component of many methods in statistics and machine learning. Feature selection is especially difficult and computationally intensive when the number of variables approaches or exceeds the number of samples, as is often the case for many genomic datasets.

Results

Here, we introduce a new approach--the Bayesian Ising Approximation (BIA)-to rapidly calculate posterior probabilities for feature relevance in L2 penalized linear regression. In the regime where the regression problem is strongly regularized by the prior, we show that computing the marginal posterior probabilities for features is equivalent to computing the magnetizations of an Ising model with weak couplings. Using a mean field approximation, we show it is possible to rapidly compute the feature selection path described by the posterior probabilities as a function of the L2 penalty. We present simulations and analytical results illustrating the accuracy of the BIA on some simple regression problems. Finally, we demonstrate the applicability of the BIA to high-dimensional regression by analyzing a gene expression dataset with nearly 30 000 features. These results also highlight the impact of correlations between features on Bayesian feature selection.

Availability and implementation

An implementation of the BIA in C++, along with data for reproducing our gene expression analyses, are freely available at http://physics.bu.edu/∼pankajm/BIACode.",2015-01-24 +24666463,Association between tumor necrosis factor-alpha gene polymorphisms and prostate cancer risk: a meta-analysis.,"

Background

Tumor necrosis factor-alpha (TNF-α) is an important inflammatory cytokine that may play a role in controlling the progression of prostate cancer. Two common polymorphisms in the TNF-α gene, -308G/A and -238C/T, have been suggested to alter the risk for prostate cancer, but the results have been inconclusive so far. In order to obtain a better understanding of the effects of these two polymorphisms on prostate cancer risk, all available studies were considered in a meta-analysis.

Methods

We conducted a comprehensive literature search in the Cochrane Library, PubMed, EMBASE, Chinese Biomedical Literature database (CBM), and the China National Knowledge Infrastructure (CNKI). The associations were evaluated by calculating the pooled odds ratio (OR) with 95% confidence interval (95% CI).

Results

In this meta-analysis, we included 14 studies with 5,757 patients and 6,137 control subjects for the TNF-α-308G/A polymorphism and 1,967 patients and 2,004 control subjects for the TNF-α-238C/T polymorphism. A significantly increased prostate cancer risk was found to be associated with the TNF-α-308C/T polymorphism in studies with healthy volunteers (AA + AG vs. GG: OR = 1.531, 95% CI = 1.093-2.145; P = 0.013; AG vs. GG: OR = 1.477, 95% CI = 1.047-2.085; P = 0.026). No significant association was found between the TNF-α-238G/A polymorphism and prostate cancer risk in the overall or subgroup analyses. There was no risk of publication bias in this meta-analysis.

Conclusions

Our results suggest that while the TNF-α-238G/A polymorphism may not be associated with prostate cancer the TNF-α-308C/T polymorphism may significantly contribute to prostate cancer susceptibility in healthy volunteers.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1629288120116301.",2014-03-25 +26484124,Gene expression in response to cyclic mechanical stretch in primary human dermal fibroblasts.,"The human dermal skin is permanently exposed to mechanical stress, for instance during facial expression, which might cause wrinkles with age. Cyclic mechanical stretching of cells results in cellular and cytoskeleton alignment perpendicular to the stretch direction regulating cellular response. With gene expression profiling it was aimed to identify the differentially expressed genes associated with the regulation of the cytoskeleton to investigate the stretch-induced cell alignment mechanism. Here, the transcription activity of the genome in response to cyclic mechanical stress was measured using DNA microarray technology with Agilent SurePrint G3 Human GE 8x60k Microarrays, based on the overall measurement of the mRNA. Gene expression was measured at the beginning of the alignment process showing first reoriented cells after 5 h stretching and at the end after 24 h, where nearly all cells are aligned. Gene expression data of control vs. stretched primary human dermal fibroblasts after 5 h and 24 h demonstrated the regulation of differentially expressed genes associated with metabolism, differentiation and morphology and were deposited at http://www.ncbi.nlm.nih.gov/geo with the accession number GSE58389.",2014-10-16 +23529371,An automated Pearson's correlation change classification (APC3) approach for GC/MS metabonomic data using total ion chromatograms (TICs).,"A fully automated and computationally efficient Pearson's correlation change classification (APC3) approach is proposed and shown to have overall comparable performance with both an average accuracy and an average AUC of 0.89 ± 0.08 but is 3.9 to 7 times faster, easier to use and have low outlier susceptibility in contrast to other dimensional reduction and classification combinations using only the total ion chromatogram (TIC) intensities of GC/MS data. The use of only the TIC permits the possible application of APC3 to other metabonomic data such as LC/MS TICs or NMR spectra. A RapidMiner implementation is available for download at http://padel.nus.edu.sg/software/padelapc3.",2013-05-01 +24665129,Assessing multivariate gene-metabolome associations with rare variants using Bayesian reduced rank regression.,"

Motivation

A typical genome-wide association study searches for associations between single nucleotide polymorphisms (SNPs) and a univariate phenotype. However, there is a growing interest to investigate associations between genomics data and multivariate phenotypes, for example, in gene expression or metabolomics studies. A common approach is to perform a univariate test between each genotype-phenotype pair, and then to apply a stringent significance cutoff to account for the large number of tests performed. However, this approach has limited ability to uncover dependencies involving multiple variables. Another trend in the current genetics is the investigation of the impact of rare variants on the phenotype, where the standard methods often fail owing to lack of power when the minor allele is present in only a limited number of individuals.

Results

We propose a new statistical approach based on Bayesian reduced rank regression to assess the impact of multiple SNPs on a high-dimensional phenotype. Because of the method's ability to combine information over multiple SNPs and phenotypes, it is particularly suitable for detecting associations involving rare variants. We demonstrate the potential of our method and compare it with alternatives using the Northern Finland Birth Cohort with 4702 individuals, for whom genome-wide SNP data along with lipoprotein profiles comprising 74 traits are available. We discovered two genes (XRCC4 and MTHFD2L) without previously reported associations, which replicated in a combined analysis of two additional cohorts: 2390 individuals from the Cardiovascular Risk in Young Finns study and 3659 individuals from the FINRISK study.

Availability and implementation

R-code freely available for download at http://users.ics.aalto.fi/pemartti/gene_metabolome/.",2014-03-24 +26085503,libRoadRunner: a high performance SBML simulation and analysis library.,"

Motivation

This article presents libRoadRunner, an extensible, high-performance, cross-platform, open-source software library for the simulation and analysis of models expressed using Systems Biology Markup Language (SBML). SBML is the most widely used standard for representing dynamic networks, especially biochemical networks. libRoadRunner is fast enough to support large-scale problems such as tissue models, studies that require large numbers of repeated runs and interactive simulations.

Results

libRoadRunner is a self-contained library, able to run both as a component inside other tools via its C++ and C bindings, and interactively through its Python interface. Its Python Application Programming Interface (API) is similar to the APIs of MATLAB ( WWWMATHWORKSCOM: ) and SciPy ( HTTP//WWWSCIPYORG/: ), making it fast and easy to learn. libRoadRunner uses a custom Just-In-Time (JIT) compiler built on the widely used LLVM JIT compiler framework. It compiles SBML-specified models directly into native machine code for a variety of processors, making it appropriate for solving extremely large models or repeated runs. libRoadRunner is flexible, supporting the bulk of the SBML specification (except for delay and non-linear algebraic equations) including several SBML extensions (composition and distributions). It offers multiple deterministic and stochastic integrators, as well as tools for steady-state analysis, stability analysis and structural analysis of the stoichiometric matrix.

Availability and implementation

libRoadRunner binary distributions are available for Mac OS X, Linux and Windows. The library is licensed under Apache License Version 2.0. libRoadRunner is also available for ARM-based computers such as the Raspberry Pi. http://www.libroadrunner.org provides online documentation, full build instructions, binaries and a git source repository.

Contacts

hsauro@u.washington.edu or somogyie@indiana.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-06-17 +25733578,Cohort Profile: The Shanghai Men's Health Study.,"The Shanghai Men's Health Study (SMHS) is a population-based cohort study of 61,480 men aged 40-74 years, launched in 2002 in urban Shanghai to investigate the contribution of lifestyle/environmental factors and genetic susceptibility to cancer and other non-communicable diseases (NCDs). At baseline, trained interviewers collected detailed information on personal and dietary habits, occupational/medical history and physical activity, and took anthropometric measurements (response rate: 74%). Blood, urine and DNA were collected from 75%, 89% and 89% of participants, respectively. The cohort has been followed up through a combination of in-person surveys every 3-4 years and annual record linkage with cancer and vital statistics registries. Response rates for in-person follow-up surveys were over 91% and coverage for mortality nearly 100%. SMHS participants have a high smoking rate (58.6%) and moderate alcohol-drinking rate (29.3%), but low obesity rate (2.6%). They have a low calorie intake from fat (16.2% of total calorie intake) and protein (16.4%), high calorie intake from carbohydrates (67.4%), and high intake of soy food, cruciferous vegetables and fish (156.5, 110.6 and 51.7 g/day, respectively). With its unique exposure pattern and wealth of data and biological samples, the SMHS is well positioned for long-term research into NCD aetiology and prognosis. Information about accessing the SMHS resources can be found at: http://www.mc.vanderbilt.edu/swhs-smhs/.",2015-03-02 +23256906,CAPER: a chromosome-assembled human proteome browsER.,"High-throughput mass spectrometry and antibody-based experiments have begun to produce a large amount of proteomic data sets. Chromosome-based visualization of these data sets and their annotations can help effectively integrate, organize, and analyze them. Therefore, we developed a web-based, user-friendly Chromosome-Assembled human Proteome browsER (CAPER). To display proteomic data sets and related annotations comprehensively, CAPER employs two distinct visualization strategies: track-view for the sequence/site information and the correspondence between proteome, transcriptome, genome, and chromosome and heatmap-view for the qualitative and quantitative functional annotations. CAPER supports data browsing at multiple scales through Google Map-like smooth navigation, zooming, and positioning with chromosomes as the reference coordinate. Both track-view and heatmap-view can mutually switch, providing a high-quality user interface. Taken together, CAPER will greatly facilitate the complete annotation and functional interpretation of the human genome by proteomic approaches, thereby making a significant contribution to the Chromosome-Centric Human Proteome Project and even the human physiology/pathology research. CAPER can be accessed at http://www.bprc.ac.cn/CAPE .",2012-12-20 +23418185,eMZed: an open source framework in Python for rapid and interactive development of LC/MS data analysis workflows.,"

Summary

The Python-based, open-source eMZed framework was developed for mass spectrometry (MS) users to create tailored workflows for liquid chromatography (LC)/MS data analysis. The goal was to establish a unique framework with comprehensive basic functionalities that are easy to apply and allow for the extension and modification of the framework in a straightforward manner. eMZed supports the iterative development and prototyping of individual evaluation strategies by providing a computing environment and tools for inspecting and modifying underlying LC/MS data. The framework specifically addresses non-expert programmers, as it requires only basic knowledge of Python and relies largely on existing successful open-source software, e.g. OpenMS.

Availability

The framework eMZed and its documentation are freely available at http://emzed.biol.ethz.ch/. eMZed is published under the GPL 3.0 license, and an online discussion group is available at https://groups.google.com/group/emzed-users.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-15 +23771147,kmer-SVM: a web server for identifying predictive regulatory sequence features in genomic data sets.,"Massively parallel sequencing technologies have made the generation of genomic data sets a routine component of many biological investigations. For example, Chromatin immunoprecipitation followed by sequence assays detect genomic regions bound (directly or indirectly) by specific factors, and DNase-seq identifies regions of open chromatin. A major bottleneck in the interpretation of these data is the identification of the underlying DNA sequence code that defines, and ultimately facilitates prediction of, these transcription factor (TF) bound or open chromatin regions. We have recently developed a novel computational methodology, which uses a support vector machine (SVM) with kmer sequence features (kmer-SVM) to identify predictive combinations of short transcription factor-binding sites, which determine the tissue specificity of these genomic assays (Lee, Karchin and Beer, Discriminative prediction of mammalian enhancers from DNA sequence. Genome Res. 2011; 21:2167-80). This regulatory information can (i) give confidence in genomic experiments by recovering previously known binding sites, and (ii) reveal novel sequence features for subsequent experimental testing of cooperative mechanisms. Here, we describe the development and implementation of a web server to allow the broader research community to independently apply our kmer-SVM to analyze and interpret their genomic datasets. We analyze five recently published data sets and demonstrate how this tool identifies accessory factors and repressive sequence elements. kmer-SVM is available at http://kmersvm.beerlab.org.",2013-06-14 +22640803,"Pyrosequencing data analysis software: a useful tool for EGFR, KRAS, and BRAF mutation analysis.","

Background

Pyrosequencing is a new technology and can be used for mutation tests. However, its data analysis is a manual process and involves sophisticated algorithms. During this process, human errors may occur. A better way of analyzing pyrosequencing data is needed in clinical diagnostic laboratory. Computer software is potentially useful for pyrosequencing data analysis. We have developed such software, which is able to perform pyrosequencing mutation data analysis for epidermal growth factor receptor, Kirsten rat sarcoma viral oncogene homolog and v-raf murine sarcoma viral oncogene homolog B1. The input data for analysis includes the targeted nucleotide sequence, common mutations in the targeted sequence, pyrosequencing dispensing order, pyrogram peak order and peak heights. The output includes mutation type and percentage of mutant gene in the specimen.

Results

The data from 1375 pyrosequencing test results were analyzed using the software in parallel with manual analysis. The software was able to generate correct results for all 1375 cases.

Conclusion

The software developed is a useful molecular diagnostic tool for pyrosequencing mutation data analysis. This software can increase laboratory data analysis efficiency and reduce data analysis error rate.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1348911657684292.",2012-05-28 +24649507,The Physiology and Metabolism of Enterococci,"When possible, the authors have provided open reading frame (ORF) numbers (EF####) from the extensively annotated E. faecalis V583 genome sequence. This information can be easily accessed at enterocyc.broadinstitute.org. If applicable, non-V583 enterococcal sequences with homology to the annotated ORF have been supplied. Additionally, we have included IUBMB nomenclature for most reactions, which are available at http://www.iubmb.org. Enterococci have been isolated and characterized for more than 113 years (MacCallum & Hastings, 1899). During the past century, the classification of this genus has been refined, with the most significant change occurring in 1984 when most members of the Group D streptococci, including Streptococcus faecalis and Streptococcus faecium, were included in the new genus Enterococcus (Schleifer & Kilpper-Bälz, 1984). This genus currently consists of 37 species that occupy a broad range of habitats that include the gastrointestinal microbiota of nearly every animal phylum (See Enterococcus Diversity, Origins in Nature, and Gut Colonization for details). An ability to widely colonize is due, at least in part, to their metabolic versatility and intrinsic resistance to inhospitable conditions. Despite being unable to form spores, enterococci are highly tolerant to desiccation and can persist for months on dried surfaces. Enterococci also tolerate extremes of pH, ionizing radiation, osmotic and oxidative stresses, high heavy metal concentrations, and antibiotics. Enterococci survive or grow over a wide range of temperatures for mesophilic bacteria, from 10 to 45°C. These bacteria, as highly evolved commensals, have been extensively used in the food industry and as probiotics to prevent or ameliorate disease. Finally, rogue strains of enterococci have emerged on the worldwide stage as multidrug-resistant and hospital-acquired pathogens. Enterococci are often simply described as lactic-acid–producing bacteria—a designation that understates their vast metabolic potential. The ubiquitous nature of enterococci in our environment implies this potential. Investigations into the remarkable physiology of these bacteria have fluctuated over the past century. Prior to publication of The Enterococci (Huycke M. M., 2002), and now with this volume, the last formal comprehensive review of enterococcal metabolism was written in 1964 (Deibel, 1964). At that time, substantial efforts by Gunsalus, Sokatch, Gale, Niven, and Deibel, among others, focused on the central metabolism of enterococci. Since then, research into enterococcal physiology has increasingly used the tools of molecular biology and has shifted toward understanding antibiotic resistance, pathogenesis, and genomics. With this new information, there has been increasing recognition that many metabolic genes and pathways vary, even within single species, and led investigators to question the concept of a uniform core metabolism for enterococci. To address this perspective and update the available information on enterococcal physiology, this chapter compiles and reviews the most recent findings from laboratories around the world, and integrates those results with the older literature. As will be evident, the rapid growth of genomic databases continues to offer valuable insights into the physiology of enterococci and greatly facilitates experimental designs into their metabolism.",2014-03-21 +25260903,A comparison of case volumes among urologic surgeons identified on an industry-sponsored website to an all provider peer group.,"

Introduction

Industry-sponsored websites for robotic surgery direct to surgeons listed as performing specific robotic surgical procedures. The purpose of this study was to compare average annual, surgeon-specific, case volumes for those procedures for which they were listed as performing on the commercial website with the volumes of all providers performing these same procedures across a defined geographic region.

Methods

A list of providers within the state of Wisconsin cited as performing specific urologic procedures was obtained through the Intuitive Surgical website 〈http://www.davincisurgery.com/da-vinci-urology/〉. Surgeon-specific annual case volumes from 2009 to 2013 for these same cases were obtained for all Wisconsin providers through DataBay Resources (Warrendale, PA) based on International classification of diseases-9 codes. Procedural activity was rank ordered, and surgeons were placed in ""volume deciles"" derived from the total annual number of cases performed by all surgeons. The distribution of commercially listed surgeon volumes, both 5-year average and most recent year, was compared with the average and 2013 volumes of all surgeons performing a specific procedure.

Findings

A total of 35 individual urologic surgeons listed as performing robotic surgery in Wisconsin were identified through a ""search"" using the Intuitive Surgical website. Specific procedure analysis returned 5, 12, 9, and 15 surgeon names for cystectomy, partial nephrectomy, radical nephrectomy, and prostatectomy, respectively. This compared with the total number of surgeons who had performed the listed procedure in Wisconsin at least 1 time during the prior 5 years of 123, 153, 242, and 165, respectively. When distributed by surgeon-volume deciles, surgeons listed on industry-sponsored sites varied widely in their respective volume decile. More than half of site-listed, procedure-specific surgeons fell below the fifth decile for surgeon volume. Data analysis based solely on 2013 case volumes had no effect on the number of website-listed surgeons whose volumes fell below the fifth decile.

Conclusions

Surgeons listed on an industry-sponsored website demonstrate wide variation in the actual volume of specific procedures performed. The inferred endorsement of competence by commercial sites has the potential to mislead patients seeking surgical expertise. Providers should consider the ethical and legal implications of these commercial advertising that do not have volume or outcome data.",2014-09-30 +25319962,GeneNet Toolbox for MATLAB: a flexible platform for the analysis of gene connectivity in biological networks.,"

Summary

We present GeneNet Toolbox for MATLAB (also available as a set of standalone applications for Linux). The toolbox, available as command-line or with a graphical user interface, enables biologists to assess connectivity among a set of genes of interest ('seed-genes') within a biological network of their choosing. Two methods are implemented for calculating the significance of connectivity among seed-genes: 'seed randomization' and 'network permutation'. Options include restricting analyses to a specified subnetwork of the primary biological network, and calculating connectivity from the seed-genes to a second set of interesting genes. Pre-analysis tools help the user choose the best connectivity-analysis algorithm for their network. The toolbox also enables visualization of the connections among seed-genes. GeneNet Toolbox functions execute in reasonable time for very large networks (∼10 million edges) on a desktop computer.

Availability and implementation

GeneNet Toolbox is open source and freely available from http://avigailtaylor.github.io/gntat14.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

avigail.taylor@dpag.ox.ac.uk.",2014-10-14 +26484130,A pathogenesis-based transcript signature in donor-specific antibody-positive kidney transplant patients with normal biopsies.,"Affymetrix Human Gene 1.0-ST arrays were used to assess the gene expression profiles of kidney transplant patients who presented with donor-specific antibodies (DSAs) but showed normal biopsy histopathology and did not develop antibody-mediated rejection (AMR). Biopsy and whole-blood profiles for these DSA-positive, AMR-negative (DSA +/AMR-) patients were compared to both DSA-positive, AMR-positive (DSA +/AMR +) patients as well as DSA-negative (DSA -) controls. While individual gene expression changes across sample groups were relatively subtle, gene-set enrichment analysis using previously identified pathogenesis-based transcripts (PBTs) identified a clear molecular signature involving increased rejection-associated transcripts in AMR - patients. Results from this study have been published in Kidney International (Hayde et al., 2014 [1]) and the associated data have been deposited in the GEO archive and are accessible via the following link: http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE50084.",2014-10-12 +22161415,Surgical orbital decompression for thyroid eye disease.,"

Background

Orbital decompression is an established procedure for the management of exophthalmos and visual rehabilitation from optic neuropathy in cases of thyroid eye disease. Numerous procedures for removal of orbital bony wall, fat or a combination of these for a variety of indications in different stages of the disease have been well reported in the medical literature. However, the relative effectiveness and safety of these procedures in relation to the various indications remains unclear.

Objectives

To review current published evidence for the effectiveness of surgical orbital decompression for disfiguring proptosis in adult thyroid eye disease and summa rise information on possible complications and the quality of life from the studies identified.

Search methods

We searched CENTRAL (which contains the Cochrane Eyes and Vision Group Trials Register) (The Cochrane Library 2011, Issue 10), MEDLINE (January 1950 to October 2011), EMBASE (January 1980 to October 2011), the metaRegister of Controlled Trials (mRCT) (www.controlled-trials.com) and ClinicalTrials.gov (http://clinicaltrials.gov). There were no date or language restrictions in the electronic searches for trials. The electronic databases were last searched on 6 October 2011. We searched oculoplastic textbooks, conference proceedings from the European and American Society of Ophthalmic Plastic and Reconstructive Surgery (ESOPRS, ASOPRS), European Ophthalmological Society (SOE), the Association for Research in Vision and Ophthalmology (ARVO) and American Academy of Ophthalmology (AAO) for the years 2000 to 2009 to identify relevant data. We attempted to contact researchers who are active in this field for information about further published or unpublished studies.

Selection criteria

We included randomised controlled trials (RCTs) with no restriction on date or language comparing two or more surgical methods for orbital decompression with removal of bony wall, orbital fat or a combination of both for disfiguring proptosis or comparison of surgical techniques with any form of medical decompression.

Data collection and analysis

Each review author independently assessed study abstracts identified from the electronic and manual searches. Author analysis was then compared and full papers for appropriate studies were obtained according to the inclusion criteria. Disagreements between the authors were resolved by discussion.

Main results

We identified two randomised trials eligible for inclusion in the review. There was significant variability between the trials for interventions, methodology and outcome measures and therefore meta-analysis was not performed. One study suggested that the transantral approach and endoscopic transnasal technique had similar effects in reducing exophthalmos but that the endoscopic approach may be safer, relating to fewer complications. This study had short-term follow-up and lacked information on our primary outcome (success or failure of treatment). The second study provided evidence that intravenous steroids may be superior to primary surgical decompression in the management of compressive optic neuropathy requiring less secondary surgical procedures, although it relates more frequently to transient side effects. This study was weakened by a small sample size. Until more credible evidence is available recommendations as to best treatment cannot be reliably made.

Authors' conclusions

A single study showed that the transantral approach for orbital decompression was related to more complications than the endoscopic transnasal technique which is preferred by Ear, Nose and Throat (ENT) surgeons, usually as an adjunctive procedure. Intravenous steroids were reported in a single trial to be the most efficient intervention for dysthyroid optic neuropathy. The majority of published literature on orbital decompression for thyroid eye disease consists of retrospective, cohort, or case series studies. Although these provide useful descriptive information, clarification is required to show the relative effectiveness of each intervention for various indications.The two RCTs reviewed are not robust enough to provide credible evidence to our understanding of current decompressive surgery and to support recommendations for clinical practice. There is evidence from currently available uncontrolled studies that removal of the medial and lateral wall (balanced decompression) with or without fat removal may be the most effective surgical method related to only a few complications.There is a clear need for randomised studies evaluating the balanced two-wall, three-wall and orbital fat decompression techniques. Comparison with other surgical techniques for orbital decompression or with immunosuppression in cases of compressive optic neuropathy would also be important. These studies should primarily address the reduction of exophthalmos, disease severity, complication rates, quality of life and cost of the intervention.",2011-12-07 +22806579,DataPipeline: automated importing and fitting of large amounts of biophysical data.,"Raw data from experiments across the biological sciences comes in a large variety of text formats. In small or medium sized laboratories researchers often use an assorted collection of software to interpret, fit, and visualize their data. The spreadsheet is commonly the core component of such a workflow. The limitations of such programs for large amounts of heterogeneous data can be frustrating. We report the construction of DataPipeline, a desktop and command-line application that automates the tasks of importing, fitting, and plotting of text-based data. The software is designed to simplify the process of importing text data from various sources using simple configuration files to describe raw file formats. Once imported, curve fitting can be performed using custom fitting models designed by the user inside the application. Fitted parameters can be grouped together as new datasets to be fitted to other models and experimental uncertainties propagated to give error estimates. This software will be useful for processing of data from high through-put biological experiments or for rapid visualization of pilot data without the need for a chain of different programs to carry out each step. DataPipeline and source code is available under an open source license. The software can be freely downloaded at http://code.google.com/p/peat/downloads/list.",2012-07-17 +23228053,Derivation of HLA types from shotgun sequence datasets.,"The human leukocyte antigen (HLA) is key to many aspects of human physiology and medicine. All current sequence-based HLA typing methodologies are targeted approaches requiring the amplification of specific HLA gene segments. Whole genome, exome and transcriptome shotgun sequencing can generate prodigious data but due to the complexity of HLA loci these data have not been immediately informative regarding HLA genotype. We describe HLAminer, a computational method for identifying HLA alleles directly from shotgun sequence datasets (http://www.bcgsc.ca/platform/bioinfo/software/hlaminer). This approach circumvents the additional time and cost of generating HLA-specific data and capitalizes on the increasing accessibility and affordability of massively parallel sequencing.",2012-12-10 +25540182,The QDREC web server: determining dose-response characteristics of complex macroparasites in phenotypic drug screens.,"

Summary

Neglected tropical diseases (NTDs) caused by helminths constitute some of the most common infections of the world's poorest people. The etiological agents are complex and recalcitrant to standard techniques of molecular biology. Drug screening against helminths has often been phenotypic and typically involves manual description of drug effect and efficacy. A key challenge is to develop automated, quantitative approaches to drug screening against helminth diseases. The quantal dose-response calculator (QDREC) constitutes a significant step in this direction. It can be used to automatically determine quantitative dose-response characteristics and half-maximal effective concentration (EC50) values using image-based readouts from phenotypic screens, thereby allowing rigorous comparisons of the efficacies of drug compounds. QDREC has been developed and validated in the context of drug screening for schistosomiasis, one of the most important NTDs. However, it is equally applicable to general phenotypic screening involving helminths and other complex parasites.

Availability and implementation

QDREC is publically available at: http://haddock4.sfsu.edu/qdrec2/. Source code and datasets are at: http://tintin.sfsu.edu/projects/phenotypicAssays.html.

Contact

rahul@sfsu.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-24 +23246976,DiffCorr: an R package to analyze and visualize differential correlations in biological networks.,"Large-scale ""omics"" data, such as microarrays, can be used to infer underlying cellular regulatory networks in organisms, enabling us to better understand the molecular basis of disease and important traits. Correlation approaches, such as a hierarchical cluster analysis, have been widely used to analyze omics data. In addition to the changes in the mean levels of molecules in the omics data, it is important to know about the changes in the correlation relationship among molecules between 2 experimental conditions. The development of a tool to identify differential correlation patterns in omics data in an efficient and unbiased manner is therefore desirable. We developed the DiffCorr package, a simple method for identifying pattern changes between 2 experimental conditions in correlation networks, which builds on a commonly used association measure, such as Pearson's correlation coefficient. DiffCorr calculates correlation matrices for each dataset, identifies the first principal component-based ""eigen-molecules"" in the correlation networks, and tests differential correlation between the 2 groups based on Fisher's z-test. We illustrated its utility by demonstrating biologically relevant, differentially correlated molecules in transcriptome coexpression and metabolite-to-metabolite correlation networks. DiffCorr can explore differential correlations between 2 conditions in the context of post-genomics data types, namely transcriptomics and metabolomics. DiffCorr is simple to use in calculating differential correlations and is suitable for the first step towards inferring causal relationships and detecting biomarker candidates. The package can be downloaded from the following website: http://diffcorr.sourceforge.net/.",2012-12-13 +24220001,"Application of TREECS Modeling System to Strontium-90 for Borschi Watershed near Chernobyl, Ukraine.","The Training Range Environmental Evaluation and Characterization System (TREECS™) (http://el.erdc.usace.army.mil/treecs/) is being developed by the U.S. Army Engineer Research and Development Center (ERDC) for the U.S. Army to forecast the fate of munitions constituents (MC) (such as high explosives (HE) and metals) found on firing/training ranges, as well as those subsequently transported to surface water and groundwater. The overall purpose of TREECS™ is to provide environmental specialists with tools to assess the potential for MC migration into surface water and groundwater systems and to assess range management strategies to ensure protection of human health and the environment. The multimedia fate/transport models within TREECS™ are mathematical models of reduced form (e.g., reduced dimensionality) that allow rapid application with less input data requirements compared with more complicated models. Although TREECS™ was developed for the fate of MC from military ranges, it has general applicability to many other situations requiring prediction of contaminant (including radionuclide) fate in multi-media environmental systems. TREECS™ was applied to the Borschi watershed near the Chernobyl Nuclear Power Plant, Ukraine. At this site, TREECS™ demonstrated its use as a modeling tool to predict the fate of strontium 90 ((90)Sr). The most sensitive and uncertain input for this application was the soil-water partitioning distribution coefficient (Kd) for (90)Sr. The TREECS™ soil model provided reasonable estimates of the surface water export flux of (90)Sr from the Borschi watershed when using a Kd for (90)Sr of 200 L/kg. The computed export for the year 2000 was 0.18% of the watershed inventory of (90)Sr compared to the estimated export flux of 0.14% based on field data collected during 1999-2001. The model indicated that assumptions regarding the form of the inventory, whether dissolved or in solid phase form, did not appreciably affect export rates. Also, the percentage of non-exchangeable adsorbed (90)Sr, which is uncertain and affects the amount of (90)Sr available for export, was fixed at 20% based on field data measurements. A Monte Carlo uncertainty analysis was conducted treating Kd as an uncertain input variable with a range of 100-300 L/kg. This analysis resulted in a range of 0.13-0.27% of inventory exported to surface water compared to 0.14% based on measured field data. Based on this model application, it was concluded that the export of (90)Sr from the Borschi watershed to surface water is predominantly a result of soil pore water containing dissolved (90)Sr being diverted to surface waters that eventually flow out of the watershed. The percentage of non-exchangeable adsorbed (90)Sr and the soil-water Kd are the two most sensitive and uncertain factors affecting the amount of export. The 200-year projections of the model showed an exponential decline in (90)Sr export fluxes from the watershed that should drop by a factor of 10 by the year 2100. This presentation will focus on TREECS capabilities and the case study done for the Borschi Watershed.",2013-11-09 +23661681,STRAW: Species TRee Analysis Web server.,"The coalescent methods for species tree reconstruction are increasingly popular because they can accommodate coalescence and multilocus data sets. Herein, we present STRAW, a web server that offers workflows for reconstruction of phylogenies of species using three species tree methods-MP-EST, STAR and NJst. The input data are a collection of rooted gene trees (for STAR and MP-EST methods) or unrooted gene trees (for NJst). The output includes the estimated species tree, modified Robinson-Foulds distances between gene trees and the estimated species tree and visualization of trees to compare gene trees with the estimated species tree. The web sever is available at http://bioinformatics.publichealth.uga.edu/SpeciesTreeAnalysis/.",2013-05-09 +24647341,HybridGO-Loc: mining hybrid features on gene ontology for predicting subcellular localization of multi-location proteins.,"Protein subcellular localization prediction, as an essential step to elucidate the functions in vivo of proteins and identify drugs targets, has been extensively studied in previous decades. Instead of only determining subcellular localization of single-label proteins, recent studies have focused on predicting both single- and multi-location proteins. Computational methods based on Gene Ontology (GO) have been demonstrated to be superior to methods based on other features. However, existing GO-based methods focus on the occurrences of GO terms and disregard their relationships. This paper proposes a multi-label subcellular-localization predictor, namely HybridGO-Loc, that leverages not only the GO term occurrences but also the inter-term relationships. This is achieved by hybridizing the GO frequencies of occurrences and the semantic similarity between GO terms. Given a protein, a set of GO terms are retrieved by searching against the gene ontology database, using the accession numbers of homologous proteins obtained via BLAST search as the keys. The frequency of GO occurrences and semantic similarity (SS) between GO terms are used to formulate frequency vectors and semantic similarity vectors, respectively, which are subsequently hybridized to construct fusion vectors. An adaptive-decision based multi-label support vector machine (SVM) classifier is proposed to classify the fusion vectors. Experimental results based on recent benchmark datasets and a new dataset containing novel proteins show that the proposed hybrid-feature predictor significantly outperforms predictors based on individual GO features as well as other state-of-the-art predictors. For readers' convenience, the HybridGO-Loc server, which is for predicting virus or plant proteins, is available online at http://bioinfo.eie.polyu.edu.hk/HybridGoServer/.",2014-03-19 +24647629,DBatVir: the database of bat-associated viruses.,"Emerging infectious diseases remain a significant threat to public health. Most emerging infectious disease agents in humans are of zoonotic origin. Bats are important reservoir hosts of many highly lethal zoonotic viruses and have been implicated in numerous emerging infectious disease events in recent years. It is essential to enhance our knowledge and understanding of the genetic diversity of the bat-associated viruses to prevent future outbreaks. To facilitate further research, we constructed the database of bat-associated viruses (DBatVir). Known viral sequences detected in bat samples were manually collected and curated, along with the related metadata, such as the sampling time, location, bat species and specimen type. Additional information concerning the bats, including common names, diet type, geographic distribution and phylogeny were integrated into the database to bridge the gap between virologists and zoologists. The database currently covers >4100 bat-associated animal viruses of 23 viral families detected from 196 bat species in 69 countries worldwide. It provides an overview and snapshot of the current research regarding bat-associated viruses, which is essential now that the field is rapidly expanding. With a user-friendly interface and integrated online bioinformatics tools, DBatVir provides a convenient and powerful platform for virologists and zoologists to analyze the virome diversity of bats, as well as for epidemiologists and public health researchers to monitor and track current and future bat-related infectious diseases. Database URL: http://www.mgc.ac.cn/DBatVir/.",2014-03-18 +24389661,"AbsCN-seq: a statistical method to estimate tumor purity, ploidy and absolute copy numbers from next-generation sequencing data.","

Motivation

Detection and quantification of the absolute DNA copy number alterations in tumor cells is challenging because the DNA specimen is extracted from a mixture of tumor and normal stromal cells. Estimates of tumor purity and ploidy are necessary to correctly infer copy number, and ploidy may itself be a prognostic factor in cancer progression. As deep sequencing of the exome or genome has become routine for characterization of tumor samples, in this work, we aim to develop a simple and robust algorithm to infer purity, ploidy and absolute copy numbers in whole numbers for tumor cells from sequencing data.

Results

A simulation study shows that estimates have reasonable accuracy, and that the algorithm is robust against the presence of segmentation errors and subclonal populations. We validated our algorithm against a panel of cell lines with experimentally determined ploidy. We also compared our algorithm with the well-established single-nucleotide polymorphism array-based method called ABSOLUTE on three sets of tumors of different types. Our method had good performance on these four benchmark datasets for both purity and ploidy estimates, and may offer a simple solution to copy number alteration quantification for cancer sequencing projects.

Availability and implementation

The R package absCNseq is available from http://biostats.mcc.ucsd.edu/files/absCNseq_1.0.tar.gz CONTACT: kmesser@ucsd.edu Supplementary information: Supplementary data are available at Bioinformatics online.",2014-01-02 +24349421,Gathering and exploring scientific knowledge in pharmacovigilance.,"Pharmacovigilance plays a key role in the healthcare domain through the assessment, monitoring and discovery of interactions amongst drugs and their effects in the human organism. However, technological advances in this field have been slowing down over the last decade due to miscellaneous legal, ethical and methodological constraints. Pharmaceutical companies started to realize that collaborative and integrative approaches boost current drug research and development processes. Hence, new strategies are required to connect researchers, datasets, biomedical knowledge and analysis algorithms, allowing them to fully exploit the true value behind state-of-the-art pharmacovigilance efforts. This manuscript introduces a new platform directed towards pharmacovigilance knowledge providers. This system, based on a service-oriented architecture, adopts a plugin-based approach to solve fundamental pharmacovigilance software challenges. With the wealth of collected clinical and pharmaceutical data, it is now possible to connect knowledge providers' analysis and exploration algorithms with real data. As a result, new strategies allow a faster identification of high-risk interactions between marketed drugs and adverse events, and enable the automated uncovering of scientific evidence behind them. With this architecture, the pharmacovigilance field has a new platform to coordinate large-scale drug evaluation efforts in a unique ecosystem, publicly available at http://bioinformatics.ua.pt/euadr/.",2013-12-11 +25411330,HyDRA: gene prioritization via hybrid distance-score rank aggregation.,"

Unlabelled

Gene prioritization refers to a family of computational techniques for inferring disease genes through a set of training genes and carefully chosen similarity criteria. Test genes are scored based on their average similarity to the training set, and the rankings of genes under various similarity criteria are aggregated via statistical methods. The contributions of our work are threefold: (i) first, based on the realization that there is no unique way to define an optimal aggregate for rankings, we investigate the predictive quality of a number of new aggregation methods and known fusion techniques from machine learning and social choice theory. Within this context, we quantify the influence of the number of training genes and similarity criteria on the diagnostic quality of the aggregate and perform in-depth cross-validation studies; (ii) second, we propose a new approach to genomic data aggregation, termed HyDRA (Hybrid Distance-score Rank Aggregation), which combines the advantages of score-based and combinatorial aggregation techniques. We also propose incorporating a new top-versus-bottom (TvB) weighting feature into the hybrid schemes. The TvB feature ensures that aggregates are more reliable at the top of the list, rather than at the bottom, since only top candidates are tested experimentally; (iii) third, we propose an iterative procedure for gene discovery that operates via successful augmentation of the set of training genes by genes discovered in previous rounds, checked for consistency.

Motivation

Fundamental results from social choice theory, political and computer sciences, and statistics have shown that there exists no consistent, fair and unique way to aggregate rankings. Instead, one has to decide on an aggregation approach using predefined set of desirable properties for the aggregate. The aggregation methods fall into two categories, score- and distance-based approaches, each of which has its own drawbacks and advantages. This work is motivated by the observation that merging these two techniques in a computationally efficient manner, and by incorporating additional constraints, one can ensure that the predictive quality of the resulting aggregation algorithm is very high.

Results

We tested HyDRA on a number of gene sets, including autism, breast cancer, colorectal cancer, endometriosis, ischaemic stroke, leukemia, lymphoma and osteoarthritis. Furthermore, we performed iterative gene discovery for glioblastoma, meningioma and breast cancer, using a sequentially augmented list of training genes related to the Turcot syndrome, Li-Fraumeni condition and other diseases. The methods outperform state-of-the-art software tools such as ToppGene and Endeavour. Despite this finding, we recommend as best practice to take the union of top-ranked items produced by different methods for the final aggregated list.

Availability and implementation

The HyDRA software may be downloaded from: http://web.engr.illinois.edu/∼mkim158/HyDRA.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-18 +26815344,[Study on genotype and clinical characteristics of infection of carbapenemase-producing Enterobacter cloacae].,"

Objective

To study the genotypes and clinical characteristics of carbapenemase-producing Enterobacter cloacae (E.cloacae), and lay the foundation for active control of nosocomial infection.

Methods

E.cloacae isolates were collected from January 2007 to December 2014. Strains which showed decreased sensitivity to carbapenem were screened out by the modified Hodge test (MHT) and EDTA-disk synergy test. The genotype of blaKPC, blaIMP, blaVIM, blaOXA-48 and blaNDM-1 were detected by PCR amplication, the product of PCR was sequenced and conducted by Blast (http://blast.ncbi.nlm.nih.gov/Blast.cgi). Conjugal transfer experiment was conducted to prove horizontal transmit of carbapenemase gene produced by E.cloacae. Meanwhile, the clinical epidemiological data of patients infected by selected strains were also analyzed.

Results

Sixty-four carbapenemase producing E.cloacae were detected by MHT, EDTA-disk synergy test and PCR amplification. Forty-five strains (70.3%) out of 64 strains infection came from nosocomial infection, while 19 strains (29.7%) from the community infection. The strains were mainly isolated from secretions samples and sputum samples, which accounted for 65.6% (42/64) and 23.4% (15/64) separately. The mainly clinical departments were orthopaedics (43.8%), department of burn (21.9%), ICU (18.8%) and pediatrics (14.1%). Bed changing, invasive operation and indwelling catheter were risk factors for the transmission of carbapenemase producing E.cloacae, and infected patients had longer time of staying in hospital, lower cure rate and higher frequency of cephalosporins enzyme inhibitor compound or carbapenem agents administration (all P<0.05). Sixty-four strains showed increased MIC to most of the antibiotics except for polymyxin and tigecycline. Among the 64 strains, 29 strains were genotype blaIMP-4 and 35 strains were genotype blaIMP-8 by Blast alignment, no genotype blaVIM, blaOXA-48 and blaNDM-1 were detected. Result of conjugal transfer experiment showed that receptor strain obtained carbapenem resistance, and the sequence of resistance gene of receptor strain was the same to the donator strain.

Conclusions

The drug resistance of E.cloacae are growing, IMP-4 and IMP-8 carbapenemase are the main enzymes produced by strains. As the resistance gene can horizontal transmit between strains through conjugal transfer system, the strains have been locally spread in hospital departments, thus it is important to control risk factors of transmission timely.",2015-10-01 +25717190,Detection of significant protein coevolution.,"

Motivation

The evolution of proteins cannot be fully understood without taking into account the coevolutionary linkages entangling them. From a practical point of view, coevolution between protein families has been used as a way of detecting protein interactions and functional relationships from genomic information. The most common approach to inferring protein coevolution involves the quantification of phylogenetic tree similarity using a family of methodologies termed mirrortree. In spite of their success, a fundamental problem of these approaches is the lack of an adequate statistical framework to assess the significance of a given coevolutionary score (tree similarity). As a consequence, a number of ad hoc filters and arbitrary thresholds are required in an attempt to obtain a final set of confident coevolutionary signals.

Results

In this work, we developed a method for associating confidence estimators (P values) to the tree-similarity scores, using a null model specifically designed for the tree comparison problem. We show how this approach largely improves the quality and coverage (number of pairs that can be evaluated) of the detected coevolution in all the stages of the mirrortree workflow, independently of the starting genomic information. This not only leads to a better understanding of protein coevolution and its biological implications, but also to obtain a highly reliable and comprehensive network of predicted interactions, as well as information on the substructure of macromolecular complexes using only genomic information.

Availability and implementation

The software and datasets used in this work are freely available at: http://csbg.cnb.csic.es/pMT/.

Contact

pazos@cnb.csic.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-24 +25433465,"CompareSVM: supervised, Support Vector Machine (SVM) inference of gene regularity networks.","

Background

Predication of gene regularity network (GRN) from expression data is a challenging task. There are many methods that have been developed to address this challenge ranging from supervised to unsupervised methods. Most promising methods are based on support vector machine (SVM). There is a need for comprehensive analysis on prediction accuracy of supervised method SVM using different kernels on different biological experimental conditions and network size.

Results

We developed a tool (CompareSVM) based on SVM to compare different kernel methods for inference of GRN. Using CompareSVM, we investigated and evaluated different SVM kernel methods on simulated datasets of microarray of different sizes in detail. The results obtained from CompareSVM showed that accuracy of inference method depends upon the nature of experimental condition and size of the network.

Conclusions

For network with nodes (<200) and average (over all sizes of networks), SVM Gaussian kernel outperform on knockout, knockdown, and multifactorial datasets compared to all the other inference methods. For network with large number of nodes (~500), choice of inference method depend upon nature of experimental condition. CompareSVM is available at http://bis.zju.edu.cn/CompareSVM/ .",2014-11-30 +26418669,Prenatal Organophosphorus Pesticide Exposure and Child Neurodevelopment at 24 Months: An Analysis of Four Birth Cohorts.,"

Background

Organophosphorus pesticides (OPs) are used in agriculture worldwide. Residential use was common in the United States before 2001.

Objectives

We conducted a pooled analysis of four birth cohorts (children's centers; n = 936) to evaluate associations of prenatal exposure to OPs with child development at 24 months.

Methods

Using general linear models, we computed site-specific and pooled estimates of the association of total dialkyl (ΣDAP), diethyl (ΣDEP), and dimethylphosphate (ΣDMP) metabolite concentrations in maternal prenatal urine with mental and psychomotor development indices (MDI/PDI) and evaluated heterogeneity by children's center, race/ethnicity, and PON1 genotype.

Results

There was significant heterogeneity in the center-specific estimates of association for ΣDAP and ΣDMP and the MDI (p = 0.09, and p = 0.05, respectively), as well as heterogeneity in the race/ethnicity-specific estimates for ΣDAP (p = 0.06) and ΣDMP (p = 0.02) and the MDI. Strong MDI associations in the CHAMACOS population per 10-fold increase in ΣDAP (β = -4.17; 95% CI: -7.00, -1.33) and ΣDMP (β = -3.64; 95% CI: -5.97, -1.32) were influential, as were associations among Hispanics (β per 10-fold increase in ΣDAP = -2.91; 95% CI: -4.71, -1.12). We generally found stronger negative associations of ΣDAP and ΣDEP with the 24-month MDI for carriers of the 192Q PON1 allele, particularly among blacks and Hispanics.

Conclusions

Data pooling was complicated by center-related differences in subject characteristics, eligibility, and changes in regulations governing residential use of OPs during the study periods. Pooled summary estimates of prenatal exposure to OPs and neurodevelopment should be interpreted with caution because of significant heterogeneity in associations by center, race/ethnicity, and PON1 genotype. Subgroups with unique exposure profiles or susceptibilities may be at higher risk for adverse neurodevelopment following prenatal exposure.

Citation

Engel SM, Bradman A, Wolff MS, Rauh VA, Harley KG, Yang JH, Hoepner LA, Barr DB, Yolton K, Vedar MG, Xu Y, Hornung RW, Wetmur JG, Chen J, Holland NT, Perera FP, Whyatt RM, Lanphear BP, Eskenazi B. 2016. Prenatal organophosphorus pesticide exposure and child neurodevelopment at 24 months: an analysis of four birth cohorts. Environ Health Perspect 124:822-830; http://dx.doi.org/10.1289/ehp.1409474.",2015-09-29 +26165391,Gastroprotective efficacy and safety of single-tablet ibuprofen/famotidine vs ibuprofen in older persons.,"

Objectives

A combination tablet of ibuprofen 800 mg and famotidine 26.6 mg given three times daily is effective for the treatment of rheumatoid arthritis and osteoarthritis and decreases the risk of developing upper gastrointestinal (GI) ulcers. This analysis evaluated the gastroprotective efficacy and safety of the single-tablet combination of ibuprofen/famotidine compared with ibuprofen alone on the basis of age and the presence of one or more risk factors for development of upper GI ulcer.

Methods

Pooled data from the 24-week, randomized, double-blind, parallel-group REDUCE-1 and REDUCE-2 trials were used. Endoscopies were performed in patients aged 40-80 years. The proportion of patients who developed ≥ 1 upper GI ulcer during treatment with ibuprofen/famotidine versus ibuprofen alone stratified on the basis of age (< 60 or ≥ 60 years) was evaluated. Further, analyses were performed on additional risk factors for ulcer development.

Results

Gastroprotective efficacy of the combination was not affected by age. Pooled results demonstrated statistically significantly fewer upper GI (10.0 vs 19.5%, p < 0.0001), gastric (8.9 vs 16.8%, p = 0.0004), and duodenal ulcers (1.1 vs 5.4%, p < 0.0001) in patients < 60 years treated with ibuprofen/famotidine versus ibuprofen alone compared with 12.9 vs 26.6% (p = 0.0002), 11.9 vs 23.4% (p = 0.0011), and 1.0 vs 4.5% (p = 0.0096), respectively, in patients ≥ 60 years. The ibuprofen/famotidine combination provided nearly 51 and 59% reduction in the risk of developing a GI ulcer in patients <60 years and ≥ 60 of age, respectively. Efficacy was maintained in the presence of additional risk factors, as well.

Conclusions

These results indicate that the fixed-combination of ibuprofen/famotidine provides gastroprotection in those of older age, with or without additional risk factors for the development of upper GI ulcers, as compared with ibuprofen alone. US National Institutes of Health registry, http://www.clinicaltrials.gov, NCT00450658 and NCT00450216.",2015-07-13 +25296770,Comparative analysis of methods for genome-wide nucleosome cartography.,"Nucleosomes contribute to compacting the genome into the nucleus and regulate the physical access of regulatory proteins to DNA either directly or through the epigenetic modifications of the histone tails. Precise mapping of nucleosome positioning across the genome is, therefore, essential to understanding the genome regulation. In recent years, several experimental protocols have been developed for this purpose that include the enzymatic digestion, chemical cleavage or immunoprecipitation of chromatin followed by next-generation sequencing of the resulting DNA fragments. Here, we compare the performance and resolution of these methods from the initial biochemical steps through the alignment of the millions of short-sequence reads to a reference genome to the final computational analysis to generate genome-wide maps of nucleosome occupancy. Because of the lack of a unified protocol to process data sets obtained through the different approaches, we have developed a new computational tool (NUCwave), which facilitates their analysis, comparison and assessment and will enable researchers to choose the most suitable method for any particular purpose. NUCwave is freely available at http://nucleosome.usal.es/nucwave along with a step-by-step protocol for its use.",2014-10-08 +21383910,SeqMaT: A sequence manipulation tool for phylogenetic analysis.,"

Unlabelled

Most bioinformatics tools require specialized input formats for sequence comparison and analysis. This is particularly true for molecular phylogeny programs, which accept only certain formats. In addition, it is often necessary to eliminate highly similar sequences among the input, especially when the dataset is large. Moreover, most programs have restrictions upon the sequence name. Here we introduce SeqMaT, a Sequence Manipulation Tool. It has the following functions: data format conversion,sequence name coding and decoding,redundant and highly similar sequence removal, anddata mining utilities. SeqMaT was developed using Java with two versions, web-based and standalone. A standalone program is convenient to manipulate a large number of sequences, while the web version will guarantee wide availability of the tool for researchers and practitioners throughout the Internet.

Availability

The database is available for free at http://glee.ist.unomaha.edu/seqmat.",2011-02-07 +23762278,Hidden Markov models for evolution and comparative genomics analysis.,"The problem of reconstruction of ancestral states given a phylogeny and data from extant species arises in a wide range of biological studies. The continuous-time Markov model for the discrete states evolution is generally used for the reconstruction of ancestral states. We modify this model to account for a case when the states of the extant species are uncertain. This situation appears, for example, if the states for extant species are predicted by some program and thus are known only with some level of reliability; it is common for bioinformatics field. The main idea is formulation of the problem as a hidden Markov model on a tree (tree HMM, tHMM), where the basic continuous-time Markov model is expanded with the introduction of emission probabilities of observed data (e.g. prediction scores) for each underlying discrete state. Our tHMM decoding algorithm allows us to predict states at the ancestral nodes as well as to refine states at the leaves on the basis of quantitative comparative genomics. The test on the simulated data shows that the tHMM approach applied to the continuous variable reflecting the probabilities of the states (i.e. prediction score) appears to be more accurate then the reconstruction from the discrete states assignment defined by the best score threshold. We provide examples of applying our model to the evolutionary analysis of N-terminal signal peptides and transcription factor binding sites in bacteria. The program is freely available at http://bioinf.fbb.msu.ru/~nadya/tHMM and via web-service at http://bioinf.fbb.msu.ru/treehmmweb.",2013-06-07 +25432975,GenomicusPlants: a web resource to study genome evolution in flowering plants.,"Comparative genomics combined with phylogenetic reconstructions are powerful approaches to study the evolution of genes and genomes. However, the current rapid expansion of the volume of genomic information makes it increasingly difficult to interrogate, integrate and synthesize comparative genome data while taking into account the maximum breadth of information available. GenomicusPlants (http://www.genomicus.biologie.ens.fr/genomicus-plants) is an extension of the Genomicus webserver that addresses this issue by allowing users to explore flowering plant genomes in an intuitive way, across the broadest evolutionary scales. Extant genomes of 26 flowering plants can be analyzed, as well as 23 ancestral reconstructed genomes. Ancestral gene order provides a long-term chronological view of gene order evolution, greatly facilitating comparative genomics and evolutionary studies. Four main interfaces ('views') are available where: (i) PhyloView combines phylogenetic trees with comparisons of genomic loci across any number of genomes; (ii) AlignView projects loci of interest against all other genomes to visualize its topological conservation; (iii) MatrixView compares two genomes in a classical dotplot representation; and (iv) Karyoview visualizes chromosome karyotypes 'painted' with colours of another genome of interest. All four views are interconnected and benefit from many customizable features.",2014-11-27 +24505437,Secondary structures of rRNAs from all three domains of life.,"Accurate secondary structures are important for understanding ribosomes, which are extremely large and highly complex. Using 3D structures of ribosomes as input, we have revised and corrected traditional secondary (2°) structures of rRNAs. We identify helices by specific geometric and molecular interaction criteria, not by co-variation. The structural approach allows us to incorporate non-canonical base pairs on parity with Watson-Crick base pairs. The resulting rRNA 2° structures are up-to-date and consistent with three-dimensional structures, and are information-rich. These 2° structures are relatively simple to understand and are amenable to reproduction and modification by end-users. The 2° structures made available here broadly sample the phylogenetic tree and are mapped with a variety of data related to molecular interactions and geometry, phylogeny and evolution. We have generated 2° structures for both large subunit (LSU) 23S/28S and small subunit (SSU) 16S/18S rRNAs of Escherichia coli, Thermus thermophilus, Haloarcula marismortui (LSU rRNA only), Saccharomyces cerevisiae, Drosophila melanogaster, and Homo sapiens. We provide high-resolution editable versions of the 2° structures in several file formats. For the SSU rRNA, the 2° structures use an intuitive representation of the central pseudoknot where base triples are presented as pairs of base pairs. Both LSU and SSU secondary maps are available (http://apollo.chemistry.gatech.edu/RibosomeGallery). Mapping of data onto 2° structures was performed on the RiboVision server (http://apollo.chemistry.gatech.edu/RiboVision).",2014-02-05 +26417259,Imprinting genes associated with endometriosis.,"

Purpose

Much work has been carried out to investigate the genetic and epigenetic basis of endometriosis and proposed that endometriosis has been described as an epigenetic disease. The purpose of this study was to extract the imprinting genes that are associated with endometriosis development.

Methods

The information on the imprinting genes can be accessed publicly from a web-based interface at http://www.geneimprint.com/site/genes-by-species.

Results

In the current version, the database contains 150 human imprinted genes derived from the literature. We searched gene functions and their roles in particular biological processes or events, such as development and pathogenesis of endometriosis. From the genomic imprinting database, we picked 10 genes that were highly associated with female reproduction; prominent among them were paternally expressed genes (DIRAS3, BMP8B, CYP1B1, ZFAT, IGF2, MIMT1, or MIR296) and maternally expressed genes (DVL1, FGFRL1, or CDKN1C). These imprinted genes may be associated with reproductive biology such as endometriosis, pregnancy loss, decidualization process and preeclampsia.

Discussion

This study supports the possibility that aberrant epigenetic dysregulation of specific imprinting genes may contribute to endometriosis predisposition.",2014-03-13 +24621406,The evolution of stroke rehabilitation randomized controlled trials.,"

Background

In the interest of prioritizing resources and providing future direction for researchers, a complete overview of the landscape of stroke rehabilitation literature was conducted.

Aim

We aimed to examine the evolution of stroke rehabilitation randomized controlled trials, with respect to number, sample size, and methodological quality between 1970 and September 2012.

Methods

Using the Evidence-Based Review of Stroke Rehabilitation (http://www.ebrsr.com), all randomized controlled trials related to stroke rehabilitation interventions were eligible for inclusion and were divided into five groups based on the primary outcome (i.e., motor, cognitive, medical complications, psychosocial, and 'other').

Results

One thousand sixty-three randomized controlled trials met inclusion criteria, with motor studies accounting for 58·8% of the total. The total number of randomized controlled trials grew between 1970 and 2012, with 35·2% of all the studies published in the last five-years. Motor randomized controlled trials had the smallest median sample size compared with cognitive (P < 0·018), medical complications (P < 0·001), psychosocial (P < 0·001), and 'other' (P < 0·001) randomized controlled trials. Between 1973 and 1977 and 2008 and 2012, there was no statistically significant increase in median sample sizes (P = 0·845). Psychosocial randomized controlled trials had higher median Physiotherapy Evidence Database scores when compared with motor (P = 0·002), cognitive (P = 0·035), and 'other' randomized controlled trials (P = 0·036), but not medical complication randomized controlled trials (P = 0·591). Over time, median Physiotherapy Evidence Database scores for all randomized controlled trials significantly increased from 5 (interquartile range 0·5) in 1973-1977 to 7 (interquartile range 3) in 2008-2012 (P = 0·008).

Conclusions

Randomized controlled trials in stroke rehabilitation have increased over the past four decades, with an associated increase in methodological quality, but not sample size.",2014-03-13 +23813002,A graph kernel approach for alignment-free domain-peptide interaction prediction with an application to human SH3 domains.,"

Motivation

State-of-the-art experimental data for determining binding specificities of peptide recognition modules (PRMs) is obtained by high-throughput approaches like peptide arrays. Most prediction tools applicable to this kind of data are based on an initial multiple alignment of the peptide ligands. Building an initial alignment can be error-prone, especially in the case of the proline-rich peptides bound by the SH3 domains.

Results

Here, we present a machine-learning approach based on an efficient graph-kernel technique to predict the specificity of a large set of 70 human SH3 domains, which are an important class of PRMs. The graph-kernel strategy allows us to (i) integrate several types of physico-chemical information for each amino acid, (ii) consider high-order correlations between these features and (iii) eliminate the need for an initial peptide alignment. We build specialized models for each human SH3 domain and achieve competitive predictive performance of 0.73 area under precision-recall curve, compared with 0.27 area under precision-recall curve for state-of-the-art methods based on position weight matrices. We show that better models can be obtained when we use information on the noninteracting peptides (negative examples), which is currently not used by the state-of-the art approaches based on position weight matrices. To this end, we analyze two strategies to identify subsets of high confidence negative data. The techniques introduced here are more general and hence can also be used for any other protein domains, which interact with short peptides (i.e. other PRMs).

Availability

The program with the predictive models can be found at http://www.bioinf.uni-freiburg.de/Software/SH3PepInt/SH3PepInt.tar.gz. We also provide a genome-wide prediction for all 70 human SH3 domains, which can be found under http://www.bioinf.uni-freiburg.de/Software/SH3PepInt/Genome-Wide-Predictions.tar.gz.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +23838740,The antibiotic resistance and prescribing in European Children project: a neonatal and pediatric antimicrobial web-based point prevalence survey in 73 hospitals worldwide.,"

Background

The neonatal and pediatric antimicrobial point prevalence survey (PPS) of the Antibiotic Resistance and Prescribing in European Children project (http://www.arpecproject.eu/) aims to standardize a method for surveillance of antimicrobial use in children and neonates admitted to the hospital within Europe. This article describes the audit criteria used and reports overall country-specific proportions of antimicrobial use. An analytical review presents methodologies on antimicrobial use.

Methods

A 1-day PPS on antimicrobial use in hospitalized children was organized in September 2011, using a previously validated and standardized method. The survey included all inpatient pediatric and neonatal beds and identified all children receiving an antimicrobial treatment on the day of survey. Mandatory data were age, gender, (birth) weight, underlying diagnosis, antimicrobial agent, dose and indication for treatment. Data were entered through a web-based system for data-entry and reporting, based on the WebPPS program developed for the European Surveillance of Antimicrobial Consumption project.

Results

There were 2760 and 1565 pediatric versus 1154 and 589 neonatal inpatients reported among 50 European (n = 14 countries) and 23 non-European hospitals (n = 9 countries), respectively. Overall, antibiotic pediatric and neonatal use was significantly higher in non-European (43.8%; 95% confidence interval [CI]: 41.3-46.3% and 39.4%; 95% CI: 35.5-43.4%) compared with that in European hospitals (35.4; 95% CI: 33.6-37.2% and 21.8%; 95% CI: 19.4-24.2%). Proportions of antibiotic use were highest in hematology/oncology wards (61.3%; 95% CI: 56.2-66.4%) and pediatric intensive care units (55.8%; 95% CI: 50.3-61.3%).

Conclusions

An Antibiotic Resistance and Prescribing in European Children standardized web-based method for a 1-day PPS was successfully developed and conducted in 73 hospitals worldwide. It offers a simple, feasible and sustainable way of data collection that can be used globally.",2013-06-01 +26338858,Clinical Performance of a Matrix-Assisted Laser Desorption Ionization-Time of Flight Mass Spectrometry Method for Detection of Certain blaKPC-Containing Plasmids.,"Rapid detection of blaKPC-containing organisms can significantly impact infection control and clinical practices, as well as therapeutic choices. Current molecular and phenotypic methods to detect these organisms, however, require additional testing beyond routine organism identification. In this study, we evaluated the clinical performance of matrix-assisted laser desorption ionization-time of flight mass spectrometry (MALDI-TOF MS) to detect pKpQIL_p019 (p019)-an ∼11,109-Da protein associated with certain blaKPC-containing plasmids that was previously shown to successfully track a clonal outbreak of blaKPC-pKpQIL-Klebsiella pneumoniae in a proof-of-principle study (A. F. Lau, H. Wang, R. A. Weingarten, S. K. Drake, A. F. Suffredini, M. K. Garfield, Y. Chen, M. Gucek, J. H. Youn, F. Stock, H. Tso, J. DeLeo, J. J. Cimino, K. M. Frank, and J. P. Dekker, J Clin Microbiol 52:2804-2812, 2014, http://dx.doi.org/10.1128/JCM.00694-14). PCR for the p019 gene was used as the reference method. Here, blind analysis of 140 characterized Enterobacteriaceae isolates using two protein extraction methods (plate extraction and tube extraction) and two peak detection methods (manual and automated) showed sensitivities and specificities ranging from 96% to 100% and from 95% to 100%, respectively (2,520 spectra analyzed). Feasible laboratory implementation methods (plate extraction and automated analysis) demonstrated 96% sensitivity and 99% specificity. All p019-positive isolates (n = 26) contained blaKPC and were carbapenem resistant. Retrospective analysis of an additional 720 clinical Enterobacteriaceae spectra found an ∼11,109-Da signal in nine spectra (1.3%), including seven from p019-containing, carbapenem-resistant isolates (positive predictive value [PPV], 78%). Instrument tuning had a significant effect on assay sensitivity, highlighting important factors that must be considered as MALDI-TOF MS moves into applications beyond microbial identification. Using a large blind clinical data set, we have shown that spectra acquired for routine organism identification can also be analyzed automatically in real time at high throughput, at no additional expense to the laboratory, to enable rapid detection of potentially blaKPC-containing carbapenem-resistant isolates, providing early and clinically actionable results.",2015-09-02 +21988420,"Pain, analgesia and genetics.","

Objectives

In the clinical setting, there is marked intersubject variability in the intensity of pain reported by patients with apparently similar pain states, as well as widely differing analgesic dosing requirements between individuals to produce satisfactory pain relief with tolerable side-effects. Genetic and environmental factors as well as their interaction are implicated, and these are discussed in this review.

Key findings

Pioneering work undertaken in mice more than a decade ago, showed a strong genetic contribution to levels of nociception/hypersensitivity as well as levels of antinociception produced by commonly available analgesic agents. To date more than 300 candidate 'pain' genes have been identified as potentially contributing to heritable differences in pain sensitivity and analgesic responsiveness in animals and humans, with this information available in a publicly accessible database http://www.jbldesign.com/jmogil/enter.html. Since then, many genetic association studies have been conducted in humans to investigate the possibility that single nucleotide polymorphisms (SNPs) in an individual gene may explain drug inefficacy or excessive toxicity experienced by a small subset of the whole population who have the rare allele for a particular SNP.

Summary

Despite the fact that SNPs in more than 20 genes that affect pain sensitivity or contribute to interindividual variability in responses to analgesic medications have been identified in the human genome, much of the data is conflicting. Apart from deficiencies in the design and conduct of human genetic association studies, recent research from other fields has implicated epigenetic mechanisms that facilitate dynamic gene-environment communication, as a possible explanation.",2011-08-19 +25784723,Regional ventricular performance and exercise training in children and young adults after repair of tetralogy of Fallot: randomized controlled pilot study. ,"Public-health guidelines recommend patients with congenital heart disease to exercise. Studies have shown that patients with congenital heart disease can improve physical exercise capacity. The effect of training on regional ventricular performance has hardly been studied. We performed a pilot study to assess whether an exercise training program would result in adverse changes of regional ventricular performance in patients with corrected tetralogy of Fallot. Multicenter prospective randomized controlled pilot study in patients with tetralogy of Fallot aged 10 to 25 years. A 12-week standardized aerobic dynamic exercise training program (3 one-hour sessions per week) was used. Pre- and post-training cardiopulmonary exercise tests, MRI, and echocardiography, including tissue-Doppler imaging, were performed. Patients were randomized to the exercise group (n=28) or control group (n=20). One patient in the exercise group dropped out. Change in tissue-Doppler imaging parameters was similar in the exercise group and control group (change in right ventricle free wall peak velocity E' exercise group, 0.8±2.6 cm/s; control group, 0.9±4.1; peak velocity A' exercise group, 0.4±2.4 m/s; control group 4.6±18.1 cm/s). This randomized controlled pilot study provides preliminary data suggesting that regional ventricular performance is well maintained during 3-month aerobic dynamic exercise training in children and young adults with repaired tetralogy of Fallot. This information might help patients adhere to current public-health guidelines. URL: http//:www.trialregister.nl. Unique identifier: NTR2731.",2015-04-01 +24621099,Meta-analyses between 18 candidate genetic markers and overweight/obesity.,"

Aims

The goal of our study is to investigate the associations between 18 candidate genetic markers and overweight/obesity.

Methods

A total of 72 eligible articles were retrieved from literature databases including PubMed, Embase, SpingerLink, Web of Science, Chinese National Knowledge Infrastructure (CNKI), and Wanfang. Meta-analyses of 18 genetic markers among 56,738 controls and 48,148 overweight/obese persons were done by Review Manager 5.0.

Results

Our results showed that SH2B1 rs7498665 polymorphism was significantly associated with the risk of overweight/obesity (overall odds ratio (OR) = 1.21, 95% confidence interval (CI) = 1.09-1.34, P = 0.0004). Increased risk of overweight/obesity was also observed in FAIM2 rs7138803 polymorphism (overall OR = 1.11, 95% CI = 1.01-1.22, P = 0.04).

Conclusion

Our meta-analyses have shown the important role of 2 polymorphisms (SH2B1 rs7498665 and FAIM2 rs7138803) in the development of overweight/obesity. This study highlighted the importance of above two candidate genes (SH2B1 and FAIM2) in the risk of overweight/obesity.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2785487401176182.",2014-03-12 +24622092,Multilocus sequence typing of Mycoplasma hyorhinis strains identified by a real-time TaqMan PCR assay.,"A real-time TaqMan PCR assay based on the gene encoding the protein p37 was developed to detect Mycoplasma hyorhinis. Its specificity was validated with 29 epidemiologically unrelated M. hyorhinis strains (28 field strains and one reference strain) and other mycoplasma species or with other microorganisms commonly found in pigs. The estimated detection limit of this qPCR assay was 125 microorganism equivalents/μl. The same 29 epidemiologically unrelated M. hyorhinis strains and four previously fully sequenced strains were typed by two portable typing methods, the sequencing of the p37 gene and a multilocus sequence typing (MLST) scheme. The first method revealed 18 distinct nucleotide sequences and insufficient discriminatory power (0.934). The MLST scheme was developed with the sequenced genomes of the M. hyorhinis strains HUB-1, GDL-1, MCLD, and SK76 and based on the genes dnaA, rpoB, gyrB, gltX, adk, and gmk. In total, 2,304 bp of sequence was analyzed for each strain. MLST was capable of subdividing the 33 strains into 29 distinct sequence types. The discriminatory power of the method was >0.95, which is the threshold value for interpreting typing results with confidence (D=0.989). Population analysis showed that recombination in M. hyorhinis occurs and that strains are diverse but with a certain clonality (one unique clonal complex was identified). The new qPCR assay and the robust MLST scheme are available for the acquisition of new knowledge on M. hyorhinis epidemiology. A web-accessible database has been set up for the M. hyorhinis MLST scheme at http://pubmlst.org/mhyorhinis/.",2014-03-12 +22290409,miRDeepFinder: a miRNA analysis tool for deep sequencing of plant small RNAs. ,"miRDeepFinder is a software package developed to identify and functionally analyze plant microRNAs (miRNAs) and their targets from small RNA datasets obtained from deep sequencing. The functions available in miRDeepFinder include pre-processing of raw data, identifying conserved miRNAs, mining and classifying novel miRNAs, miRNA expression profiling, predicting miRNA targets, and gene pathway and gene network analysis involving miRNAs. The fundamental design of miRDeepFinder is based on miRNA biogenesis, miRNA-mediated gene regulation and target recognition, such as perfect or near perfect hairpin structures, different read abundances of miRNA and miRNA*, and targeting patterns of plant miRNAs. To test the accuracy and robustness of miRDeepFinder, we analyzed a small RNA deep sequencing dataset of Arabidopsis thaliana published in the GEO database of NCBI. Our test retrieved 128 of 131 (97.7%) known miRNAs that have a more than 3 read count in Arabidopsis. Because many known miRNAs are not associated with miRNA*s in small RNA datasets, miRDeepFinder was also designed to recover miRNA candidates without the presence of miRNA*. To mine as many miRNAs as possible, miRDeepFinder allows users to compare mature miRNAs and their miRNA*s with other small RNA datasets from the same species. Cleaveland software package was also incorporated into miRDeepFinder for miRNA target identification using degradome sequencing analysis. Using this new computational tool, we identified 13 novel miRNA candidates with miRNA*s from Arabidopsis and validated 12 of them experimentally. Interestingly, of the 12 verified novel miRNAs, a miRNA named AC1 spans the exons of two genes (UTG71C4 and UGT71C3). Both the mature AC1 miRNA and its miRNA* were also found in four other small RNA datasets. We also developed a tool, ""miRNA primer designer"" to design primers for any type of miRNAs. miRDeepFinder provides a powerful tool for analyzing small RNA datasets from all species, with or without the availability of genome information. miRDeepFinder and miRNA primer designer are freely available at http://www.leonxie.com/DeepFinder.php and at http://www.leonxie.com/miRNAprimerDesigner.php , respectively. A program (called RefFinder: http://www.leonxie.com/referencegene.php ) was also developed for assessing the reliable reference genes for gene expression analysis, including miRNAs.",2012-01-31 +25358969,The phylogenetic likelihood library.,"We introduce the Phylogenetic Likelihood Library (PLL), a highly optimized application programming interface for developing likelihood-based phylogenetic inference and postanalysis software. The PLL implements appropriate data structures and functions that allow users to quickly implement common, error-prone, and labor-intensive tasks, such as likelihood calculations, model parameter as well as branch length optimization, and tree space exploration. The highly optimized and parallelized implementation of the phylogenetic likelihood function and a thorough documentation provide a framework for rapid development of scalable parallel phylogenetic software. By example of two likelihood-based phylogenetic codes we show that the PLL improves the sequential performance of current software by a factor of 2-10 while requiring only 1 month of programming time for integration. We show that, when numerical scaling for preventing floating point underflow is enabled, the double precision likelihood calculations in the PLL are up to 1.9 times faster than those in BEAGLE. On an empirical DNA dataset with 2000 taxa the AVX version of PLL is 4 times faster than BEAGLE (scaling enabled and required). The PLL is available at http://www.libpll.org under the GNU General Public License (GPL).",2014-10-30 +24371151,TSSer: an automated method to identify transcription start sites in prokaryotic genomes from differential RNA sequencing data.,"

Motivation

Accurate identification of transcription start sites (TSSs) is an essential step in the analysis of transcription regulatory networks. In higher eukaryotes, the capped analysis of gene expression technology enabled comprehensive annotation of TSSs in genomes such as those of mice and humans. In bacteria, an equivalent approach, termed differential RNA sequencing (dRNA-seq), has recently been proposed, but the application of this approach to a large number of genomes is hindered by the paucity of computational analysis methods. With few exceptions, when the method has been used, annotation of TSSs has been largely done manually.

Results

In this work, we present a computational method called 'TSSer' that enables the automatic inference of TSSs from dRNA-seq data. The method rests on a probabilistic framework for identifying both genomic positions that are preferentially enriched in the dRNA-seq data as well as preferentially captured relative to neighboring genomic regions. Evaluating our approach for TSS calling on several publicly available datasets, we find that TSSer achieves high consistency with the curated lists of annotated TSSs, but identifies many additional TSSs. Therefore, TSSer can accelerate genome-wide identification of TSSs in bacterial genomes and can aid in further characterization of bacterial transcription regulatory networks.

Availability

TSSer is freely available under GPL license at http://www.clipz.unibas.ch/TSSer/index.php",2013-12-25 +25424913,FunPred-1: protein function prediction from a protein interaction network using neighborhood analysis.,"Proteins are responsible for all biological activities in living organisms. Thanks to genome sequencing projects, large amounts of DNA and protein sequence data are now available, but the biological functions of many proteins are still not annotated in most cases. The unknown function of such non-annotated proteins may be inferred or deduced from their neighbors in a protein interaction network. In this paper, we propose two new methods to predict protein functions based on network neighborhood properties. FunPred 1.1 uses a combination of three simple-yet-effective scoring techniques: the neighborhood ratio, the protein path connectivity and the relative functional similarity. FunPred 1.2 applies a heuristic approach using the edge clustering coefficient to reduce the search space by identifying densely connected neighborhood regions. The overall accuracy achieved in FunPred 1.2 over 8 functional groups involving hetero-interactions in 650 yeast proteins is around 87%, which is higher than the accuracy with FunPred 1.1. It is also higher than the accuracy of many of the state-of-the-art protein function prediction methods described in the literature. The test datasets and the complete source code of the developed software are now freely available at http://code.google.com/p/cmaterbioinfo/ .",2014-11-25 +25940636,"Rationale, secondary outcome scores and 1-year follow-up of a randomised trial of platelet-rich plasma injections in acute hamstring muscle injury: the Dutch Hamstring Injection Therapy study.","

Background

Platelet-rich plasma (PRP) injections are an experimental treatment for acute muscle injuries. We examined whether PRP injections would accelerate return to play after hamstring injury. The methods and the primary outcome measure were published in the New England Journal of Medicine (NEJM) as 'Platelet-rich plasma injections in acute muscle injury' (2014). This article shares information not available in the NEJM letter or online supplement, especially the rationale behind the study and the secondary outcome measures including 1 year re-injury data.

Methods

We performed a multicentre, randomised, double-blind, placebo-controlled trial in 80 competitive and recreational athletes with acute hamstring muscle injuries. Details can be found in the NEJM (http://www.nejm.org/doi/full/10.1056/NEJMc1402340). The primary outcome measure was the time needed to return to play during 6 months of follow-up. Not previously reported secondary outcome scores included re-injury at 1 year, alteration in clinical and MRI parameters, subjective patient satisfaction and the hamstring outcome score.

Results

In the earlier NEJM publication, we reported that PRP did not accelerate return to play; nor did we find a difference in the 2-month re-injury rate. We report no significant between-group difference in the 1-year re-injury rate (HR=0.89; 95% CI, 0.38 to 2.13; p=0.80) or any other secondary outcome measure.

Conclusions

At 1-year postinjection, we found no benefit of intramuscular PRP compared with placebo injections in patients with acute hamstring injuries in the time to return to play, re-injury rate and alterations of subjective, clinical or MRI measures.",2015-05-04 +25943472,Automated band annotation for RNA structure probing experiments with numerous capillary electrophoresis profiles.,"

Motivation

Capillary electrophoresis (CE) is a powerful approach for structural analysis of nucleic acids, with recent high-throughput variants enabling three-dimensional RNA modeling and the discovery of new rules for RNA structure design. Among the steps composing CE analysis, the process of finding each band in an electrophoretic trace and mapping it to a position in the nucleic acid sequence has required significant manual inspection and remains the most time-consuming and error-prone step. The few available tools seeking to automate this band annotation have achieved limited accuracy and have not taken advantage of information across dozens of profiles routinely acquired in high-throughput measurements.

Results

We present a dynamic-programming-based approach to automate band annotation for high-throughput capillary electrophoresis. The approach is uniquely able to define and optimize a robust target function that takes into account multiple CE profiles (sequencing ladders, different chemical probes, different mutants) collected for the RNA. Over a large benchmark of multi-profile datasets for biological RNAs and designed RNAs from the EteRNA project, the method outperforms prior tools (QuSHAPE and FAST) significantly in terms of accuracy compared with gold-standard manual annotations. The amount of computation required is reasonable at a few seconds per dataset. We also introduce an 'E-score' metric to automatically assess the reliability of the band annotation and show it to be practically useful in flagging uncertainties in band annotation for further inspection.

Availability and implementation

The implementation of the proposed algorithm is included in the HiTRACE software, freely available as an online server and for download at http://hitrace.stanford.edu.

Contact

sryoon@snu.ac.kr or rhiju@stanford.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-05-05 +24023630,PhagoSight: an open-source MATLAB® package for the analysis of fluorescent neutrophil and macrophage migration in a zebrafish model.,"Neutrophil migration in zebrafish larvae is increasingly used as a model to study the response of these leukocytes to different determinants of the cellular inflammatory response. However, it remains challenging to extract comprehensive information describing the behaviour of neutrophils from the multi-dimensional data sets acquired with widefield or confocal microscopes. Here, we describe PhagoSight, an open-source software package for the segmentation, tracking and visualisation of migrating phagocytes in three dimensions. The algorithms in PhagoSight extract a large number of measurements that summarise the behaviour of neutrophils, but that could potentially be applied to any moving fluorescent cells. To derive a useful panel of variables quantifying aspects of neutrophil migratory behaviour, and to demonstrate the utility of PhagoSight, we evaluated changes in the volume of migrating neutrophils. Cell volume increased as neutrophils migrated towards the wound region of injured zebrafish. PhagoSight is openly available as MATLAB® m-files under the GNU General Public License. Synthetic data sets and a comprehensive user manual are available from http://www.phagosight.org.",2013-08-30 +24297519,Topological augmentation to infer hidden processes in biological systems.,"

Motivation

A common problem in understanding a biochemical system is to infer its correct structure or topology. This topology consists of all relevant state variables-usually molecules and their interactions. Here we present a method called topological augmentation to infer this structure in a statistically rigorous and systematic way from prior knowledge and experimental data.

Results

Topological augmentation starts from a simple model that is unable to explain the experimental data and augments its topology by adding new terms that capture the experimental behavior. This process is guided by representing the uncertainty in the model topology through stochastic differential equations whose trajectories contain information about missing model parts. We first apply this semiautomatic procedure to a pharmacokinetic model. This example illustrates that a global sampling of the parameter space is critical for inferring a correct model structure. We also use our method to improve our understanding of glutamine transport in yeast. This analysis shows that transport dynamics is determined by glutamine permeases with two different kinds of kinetics. Topological augmentation can not only be applied to biochemical systems, but also to any system that can be described by ordinary differential equations.

Availability and implementation

Matlab code and examples are available at: http://www.csb.ethz.ch/tools/index",2013-12-02 +24618044,SoyFN: a knowledge database of soybean functional networks.,"Many databases for soybean genomic analysis have been built and made publicly available, but few of them contain knowledge specifically targeting the omics-level gene-gene, gene-microRNA (miRNA) and miRNA-miRNA interactions. Here, we present SoyFN, a knowledge database of soybean functional gene networks and miRNA functional networks. SoyFN provides user-friendly interfaces to retrieve, visualize, analyze and download the functional networks of soybean genes and miRNAs. In addition, it incorporates much information about KEGG pathways, gene ontology annotations and 3'-UTR sequences as well as many useful tools including SoySearch, ID mapping, Genome Browser, eFP Browser and promoter motif scan. SoyFN is a schema-free database that can be accessed as a Web service from any modern programming language using a simple Hypertext Transfer Protocol call. The Web site is implemented in Java, JavaScript, PHP, HTML and Apache, with all major browsers supported. We anticipate that this database will be useful for members of research communities both in soybean experimental science and bioinformatics. Database URL: http://nclab.hit.edu.cn/SoyFN.",2014-03-10 +21728180,MyMolDB: a micromolecular database solution with open source and free components.,"

Background

To manage chemical structures in small laboratories is one of the important daily tasks. Few solutions are available on the internet, and most of them are closed source applications. The open-source applications typically have limited capability and basic cheminformatics functionalities. In this article, we describe an open-source solution to manage chemicals in research groups based on open source and free components. It has a user-friendly interface with the functions of chemical handling and intensive searching.

Results

MyMolDB is a micromolecular database solution that supports exact, substructure, similarity, and combined searching. This solution is mainly implemented using scripting language Python with a web-based interface for compound management and searching. Almost all the searches are in essence done with pure SQL on the database by using the high performance of the database engine. Thus, impressive searching speed has been archived in large data sets for no external Central Processing Unit (CPU) consuming languages were involved in the key procedure of the searching.

Availability

MyMolDB is an open-source software and can be modified and/or redistributed under GNU General Public License version 3 published by the Free Software Foundation (Free Software Foundation Inc. The GNU General Public License, Version 3, 2007. Available at: http://www.gnu.org/licenses/gpl.html). The software itself can be found at http://code.google.com/p/mymoldb/.",2011-07-05 +24098077,Predictive modeling of nanomaterial exposure effects in biological systems.,"

Background

Predictive modeling of the biological effects of nanomaterials is critical for industry and policymakers to assess the potential hazards resulting from the application of engineered nanomaterials.

Methods

We generated an experimental dataset on the toxic effects experienced by embryonic zebrafish due to exposure to nanomaterials. Several nanomaterials were studied, such as metal nanoparticles, dendrimer, metal oxide, and polymeric materials. The embryonic zebrafish metric (EZ Metric) was used as a screening-level measurement representative of adverse effects. Using the dataset, we developed a data mining approach to model the toxic endpoints and the overall biological impact of nanomaterials. Data mining techniques, such as numerical prediction, can assist analysts in developing risk assessment models for nanomaterials.

Results

We found several important attributes that contribute to the 24 hours post-fertilization (hpf) mortality, such as dosage concentration, shell composition, and surface charge. These findings concur with previous studies on nanomaterial toxicity using embryonic zebrafish. We conducted case studies on modeling the overall effect/impact of nanomaterials and the specific toxic endpoints such as mortality, delayed development, and morphological malformations. The results show that we can achieve high prediction accuracy for certain biological effects, such as 24 hpf mortality, 120 hpf mortality, and 120 hpf heart malformation. The results also show that the weighting scheme for individual biological effects has a significant influence on modeling the overall impact of nanomaterials. Sample prediction models can be found at http://neiminer.i-a-i.com/nei_models.

Conclusion

The EZ Metric-based data mining approach has been shown to have predictive power. The results provide valuable insights into the modeling and understanding of nanomaterial exposure effects.",2013-09-16 +23677944,ReviSTER: an automated pipeline to revise misaligned reads to simple tandem repeats.,"

Motivation

Simple tandem repeats are highly variable genetic elements and widespread in genomes of many organisms. Next-generation sequencing technologies have enabled a robust comparison of large numbers of simple tandem repeat loci; however, analysis of their variation using traditional sequence analysis approaches still remains limiting and problematic due to variants occurring in repeat sequences confusing alignment programs into mapping sequence reads to incorrect loci when the sequence reads are significantly different from the reference sequence.

Results

We have developed a program, ReviSTER, which is an automated pipeline using a 'local mapping reference reconstruction method' to revise mismapped or partially misaligned reads at simple tandem repeat loci. RevisSTER estimates alleles of repeat loci using a local alignment method and creates temporary local mapping reference sequences, and finally remaps reads to the local mapping references. Using this approach, ReviSTER was able to successfully revise reads misaligned to repeat loci from both simulated data and real data.

Availability

ReviSTER is open-source software available at http://revister.sourceforge.net.

Contact

garner@vbi.vt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-15 +25866846,MMap: Fast Billion-Scale Graph Computation on a PC via Memory Mapping.,"Graph computation approaches such as GraphChi and TurboGraph recently demonstrated that a single PC can perform efficient computation on billion-node graphs. To achieve high speed and scalability, they often need sophisticated data structures and memory management strategies. We propose a minimalist approach that forgoes such requirements, by leveraging the fundamental memory mapping (MMap) capability found on operating systems. We contribute: (1) a new insight that MMap is a viable technique for creating fast and scalable graph algorithms that surpasses some of the best techniques; (2) the design and implementation of popular graph algorithms for billion-scale graphs with little code, thanks to memory mapping; (3) extensive experiments on real graphs, including the 6.6 billion edge YahooWeb graph, and show that this new approach is significantly faster or comparable to the highly-optimized methods (e.g., 9.5× faster than GraphChi for computing PageRank on 1.47B edge Twitter graph). We believe our work provides a new direction in the design and development of scalable algorithms. Our packaged code is available at http://poloclub.gatech.edu/mmap/.",2014-10-01 +25635206,Clinical initiatives linking Japanese and Swedish healthcare resources on cancer studies utilizing Biobank Repositories.,"The Tokyo Medical University Hospital in Japan and the Lund University hospital in Sweden have recently initiated a research program with the objective to impact on patient treatment by clinical disease stage characterization (phenotyping), utilizing proteomics sequencing platforms. By sharing clinical experiences, patient treatment principles, and biobank strategies, our respective clinical teams in Japan and Sweden will aid in the development of predictive and drug related protein biomarkers. Data from joint lung cancer studies are presented where protein expression from Neuro- Endocrine lung cancer (LCNEC) phenotype patients can be separated from Small cell- (SCLC) and Large Cell lung cancer (LCC) patients by deep sequencing and spectral counting analysis. LCNEC, a subtype of large cell carcinoma (LCC), is characterized by neuroendocrine differentiation that small cell lung carcinoma (SCLC) shares. Pre-therapeutic histological distinction between LCNEC and SCLC has so far been problematic, leading to adverse clinical outcome. An establishment of protein targets characteristic of LCNEC is quite helpful for decision of optimal therapeutic strategy by diagnosing individual patients. Proteoform annotation and clinical biobanking is part of the HUPO initiative (http://www.hupo.org) within chromosome 10 and chromosome 19 consortia.",2014-11-22 +24616012,A novel classification and online platform for planning and documentation of medical applications of additive manufacturing.,"Additive manufacturing technologies are widely used in industrial settings and now increasingly also in several areas of medicine. Various techniques and numerous types of materials are used for these applications. There is a clear need to unify and harmonize the patterns of their use worldwide. We present a 5-class system to aid planning of these applications and related scientific work as well as communication between various actors involved in this field. An online, matrix-based platform and a database were developed for planning and documentation of various solutions. This platform will help the medical community to structurally develop both research innovations and clinical applications of additive manufacturing. The online platform can be accessed through http://www.medicalam.info.",2014-03-09 +24949242,FOCUS: an alignment-free model to identify organisms in metagenomes using non-negative least squares.,"One of the major goals in metagenomics is to identify the organisms present in a microbial community from unannotated shotgun sequencing reads. Taxonomic profiling has valuable applications in biological and medical research, including disease diagnostics. Most currently available approaches do not scale well with increasing data volumes, which is important because both the number and lengths of the reads provided by sequencing platforms keep increasing. Here we introduce FOCUS, an agile composition based approach using non-negative least squares (NNLS) to report the organisms present in metagenomic samples and profile their abundances. FOCUS was tested with simulated and real metagenomes, and the results show that our approach accurately predicts the organisms present in microbial communities. FOCUS was implemented in Python. The source code and web-sever are freely available at http://edwards.sdsu.edu/FOCUS.",2014-06-05 +21596783,AnnotQTL: a new tool to gather functional and comparative information on a genomic region.,"AnnotQTL is a web tool designed to aggregate functional annotations from different prominent web sites by minimizing the redundancy of information. Although thousands of QTL regions have been identified in livestock species, most of them are large and contain many genes. This tool was therefore designed to assist the characterization of genes in a QTL interval region as a step towards selecting the best candidate genes. It localizes the gene to a specific region (using NCBI and Ensembl data) and adds the functional annotations available from other databases (Gene Ontology, Mammalian Phenotype, HGNC and Pubmed). Both human genome and mouse genome can be aligned with the studied region to detect synteny and segment conservation, which is useful for running inter-species comparisons of QTL locations. Finally, custom marker lists can be included in the results display to select the genes that are closest to your most significant markers. We use examples to demonstrate that in just a couple of hours, AnnotQTL is able to identify all the genes located in regions identified by a full genome scan, with some highlighted based on both location and function, thus considerably increasing the chances of finding good candidate genes. AnnotQTL is available at http://annotqtl.genouest.org.",2011-05-19 +21940666,Large-scale phosphotyrosine proteomic profiling of rat renal collecting duct epithelium reveals predominance of proteins involved in cell polarity determination.,"Although extensive phosphoproteomic information is available for renal epithelial cells, previous emphasis has been on phosphorylation of serines and threonines with little focus on tyrosine phosphorylation. Here we have carried out large-scale identification of phosphotyrosine sites in pervanadate-treated native inner medullary collecting ducts of rat, with a view towards identification of physiological processes in epithelial cells that are potentially regulated by tyrosine phosphorylation. The method combined antibody-based affinity purification of tyrosine phosphorylated peptides coupled with immobilized metal ion chromatography to enrich tyrosine phosphopeptides, which were identified by LC-MS/MS. A total of 418 unique tyrosine phosphorylation sites in 273 proteins were identified. A large fraction of these sites have not been previously reported on standard phosphoproteomic databases. All results are accessible via an online database: http://helixweb.nih.gov/ESBL/Database/iPY/. Analysis of surrounding sequences revealed four overrepresented motifs: [D/E]xxY*, Y*xxP, DY*, and Y*E, where the asterisk symbol indicates the site of phosphorylation. These motifs plus contextual information, integrated using the NetworKIN tool, suggest that the protein tyrosine kinases involved include members of the insulin- and ephrin-receptor kinase families. Analysis of the gene ontology (GO) terms and KEGG pathways whose protein elements are overrepresented in our data set point to structures involved in epithelial cell-cell and cell-matrix interactions (""adherens junction,"" ""tight junction,"" and ""focal adhesion"") and to components of the actin cytoskeleton as major sites of tyrosine phosphorylation in these cells. In general, these findings mesh well with evidence that tyrosine phosphorylation plays a key role in epithelial polarity determination.",2011-09-21 +23671334,A new reference implementation of the PSICQUIC web service.,"The Proteomics Standard Initiative Common QUery InterfaCe (PSICQUIC) specification was created by the Human Proteome Organization Proteomics Standards Initiative (HUPO-PSI) to enable computational access to molecular-interaction data resources by means of a standard Web Service and query language. Currently providing >150 million binary interaction evidences from 28 servers globally, the PSICQUIC interface allows the concurrent search of multiple molecular-interaction information resources using a single query. Here, we present an extension of the PSICQUIC specification (version 1.3), which has been released to be compliant with the enhanced standards in molecular interactions. The new release also includes a new reference implementation of the PSICQUIC server available to the data providers. It offers augmented web service capabilities and improves the user experience. PSICQUIC has been running for almost 5 years, with a user base growing from only 4 data providers to 28 (April 2013) allowing access to 151 310 109 binary interactions. The power of this web service is shown in PSICQUIC View web application, an example of how to simultaneously query, browse and download results from the different PSICQUIC servers. This application is free and open to all users with no login requirement (http://www.ebi.ac.uk/Tools/webservices/psicquic/view/main.xhtml).",2013-05-13 +25095880,AliView: a fast and lightweight alignment viewer and editor for large datasets.,"

Summary

AliView is an alignment viewer and editor designed to meet the requirements of next-generation sequencing era phylogenetic datasets. AliView handles alignments of unlimited size in the formats most commonly used, i.e. FASTA, Phylip, Nexus, Clustal and MSF. The intuitive graphical interface makes it easy to inspect, sort, delete, merge and realign sequences as part of the manual filtering process of large datasets. AliView also works as an easy-to-use alignment editor for small as well as large datasets.

Availability and implementation

AliView is released as open-source software under the GNU General Public License, version 3.0 (GPLv3), and is available at GitHub (www.github.com/AliView). The program is cross-platform and extensively tested on Linux, Mac OS X and Windows systems. Downloads and help are available at http://ormbunkar.se/aliview

Contact

anders.larsson@ebc.uu.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-08-05 +27715149,Violent assaults.,"The Department of Health is conducting an audit to determine whether emergency departments (EDs) collect and share data on violent assaults, and the extent to which data sharing has become standard practice. This mandatory ED data-sharing audit must be completed by a staff member who understands data collection procedures and how the department functions. People who want to take part in the audit should access http://tinyurl.com/cm4wuc6.",2012-06-01 +24603983,Frag'r'Us: knowledge-based sampling of protein backbone conformations for de novo structure-based protein design.,"

Motivation

The remodeling of short fragment(s) of the protein backbone to accommodate new function(s), fine-tune binding specificities or change/create novel protein interactions is a common task in structure-based computational design. Alternative backbone conformations can be generated de novo or by redeploying existing fragments extracted from protein structures i.e. knowledge-based. We present Frag'r'Us, a web server designed to sample alternative protein backbone conformations in loop regions. The method relies on a database of super secondary structural motifs called smotifs. Thus, sampling of conformations reflects structurally feasible fragments compiled from existing protein structures. Availability and implementation Frag'r'Us has been implemented as web application and is available at http://www.bioinsilico.org/FRAGRUS.",2014-03-06 +21165589,[Polypharmacy in schizophrenia].,"

Background

While most guidelines recommend monotherapy with second-generation antipsychotics (SGA) in schizophrenia, the combined application of multiple psychotropic agents is very common, especially in treatment-refractory cases.

Methods

This review summarizes the evidence of combined antipsychotic treatment strategies and the augmentation of antipsychotics with mood stabilizers, antidepressants and experimental substances, based on publications accessible in public databases (Medline/Ovid, Google, http://www.clinicaltrials.gov) up to October 2009.

Results

Polypharmacy aims to address several aspects of treatment resistance and side effects of antipsychotics. Some evidence supports the augmentation of antipsychotics with antidepressants for negative symptoms and comorbid major depressive episodes. The add-on of lithium and mood stabilizers lacks compelling evidence but might be beneficial for specific subgroups. For treatment-resistant cognitive symptoms, cognitive re-mediation seems most promising as no pharmacological add-on strategy has gained convincing evidence so far. Acute dystonic movements should be treated with anticholinergic agents while agitation and anxiety might respond to short-term application of benzodiazepines. Treatment-resistant positive and/or negative symptoms should primarily lead to clozapine monotherapy; the add-on of a second SGA may be considered in single cases.

Conclusions

In general, rigorous data on combination therapy in schizophrenia are rare, and further randomized controlled trials (RCT), naturalistic and head-to-head-studies are necessary.",2011-07-01 +25266224,Sigma: strain-level inference of genomes from metagenomic analysis for biosurveillance.,"

Motivation

Metagenomic sequencing of clinical samples provides a promising technique for direct pathogen detection and characterization in biosurveillance. Taxonomic analysis at the strain level can be used to resolve serotypes of a pathogen in biosurveillance. Sigma was developed for strain-level identification and quantification of pathogens using their reference genomes based on metagenomic analysis.

Results

Sigma provides not only accurate strain-level inferences, but also three unique capabilities: (i) Sigma quantifies the statistical uncertainty of its inferences, which includes hypothesis testing of identified genomes and confidence interval estimation of their relative abundances; (ii) Sigma enables strain variant calling by assigning metagenomic reads to their most likely reference genomes; and (iii) Sigma supports parallel computing for fast analysis of large datasets. The algorithm performance was evaluated using simulated mock communities and fecal samples with spike-in pathogen strains.

Availability and implementation

Sigma was implemented in C++ with source codes and binaries freely available at http://sigma.omicsbio.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-29 +25495213,Genotype harmonizer: automatic strand alignment and format conversion for genotype data integration.,"

Background

To gain statistical power or to allow fine mapping, researchers typically want to pool data before meta-analyses or genotype imputation. However, the necessary harmonization of genetic datasets is currently error-prone because of many different file formats and lack of clarity about which genomic strand is used as reference.

Findings

Genotype Harmonizer (GH) is a command-line tool to harmonize genetic datasets by automatically solving issues concerning genomic strand and file format. GH solves the unknown strand issue by aligning ambiguous A/T and G/C SNPs to a specified reference, using linkage disequilibrium patterns without prior knowledge of the used strands. GH supports many common GWAS/NGS genotype formats including PLINK, binary PLINK, VCF, SHAPEIT2 & Oxford GEN. GH is implemented in Java and a large part of the functionality can also be used as Java 'Genotype-IO' API. All software is open source under license LGPLv3 and available from http://www.molgenis.org/systemsgenetics.

Conclusions

GH can be used to harmonize genetic datasets across different file formats and can be easily integrated as a step in routine meta-analysis and imputation pipelines.",2014-12-11 +25173705,SeqControl: process control for DNA sequencing.,"As high-throughput sequencing continues to increase in speed and throughput, routine clinical and industrial application draws closer. These 'production' settings will require enhanced quality monitoring and quality control to optimize output and reduce costs. We developed SeqControl, a framework for predicting sequencing quality and coverage using a set of 15 metrics describing overall coverage, coverage distribution, basewise coverage and basewise quality. Using whole-genome sequences of 27 prostate cancers and 26 normal references, we derived multivariate models that predict sequencing quality and depth. SeqControl robustly predicted how much sequencing was required to reach a given coverage depth (area under the curve (AUC) = 0.993), accurately classified clinically relevant formalin-fixed, paraffin-embedded samples, and made predictions from as little as one-eighth of a sequencing lane (AUC = 0.967). These techniques can be immediately incorporated into existing sequencing pipelines to monitor data quality in real time. SeqControl is available at http://labs.oicr.on.ca/Boutros-lab/software/SeqControl/.",2014-08-31 +23975764,NextGenMap: fast and accurate read mapping in highly polymorphic genomes.,"

Summary

When choosing a read mapper, one faces the trade off between speed and the ability to map reads in highly polymorphic regions. Here, we report NextGenMap, a fast and accurate read mapper, which reduces this dilemma. NextGenMap aligns reads reliably to a reference genome even when the sequence difference between target and reference genome is large, i.e. highly polymorphic genome. At the same time, NextGenMap outperforms current mapping methods with respect to runtime and to the number of correctly mapped reads. NextGenMap efficiently uses the available hardware by exploiting multi-core CPUs as well as graphic cards (GPUs), if available. In addition, NextGenMap handles automatically any read data independent of read length and sequencing technology.

Availability

NextGenMap source code and documentation are available at: http://cibiv.github.io/NextGenMap/.

Contact

fritz.sedlazeck@univie.ac.at.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-08-23 +23844010,LegumeGRN: a gene regulatory network prediction server for functional and comparative studies.,"Building accurate gene regulatory networks (GRNs) from high-throughput gene expression data is a long-standing challenge. However, with the emergence of new algorithms combined with the increase of transcriptomic data availability, it is now reachable. To help biologists to investigate gene regulatory relationships, we developed a web-based computational service to build, analyze and visualize GRNs that govern various biological processes. The web server is preloaded with all available Affymetrix GeneChip-based transcriptomic and annotation data from the three model legume species, i.e., Medicago truncatula, Lotus japonicus and Glycine max. Users can also upload their own transcriptomic and transcription factor datasets from any other species/organisms to analyze their in-house experiments. Users are able to select which experiments, genes and algorithms they will consider to perform their GRN analysis. To achieve this flexibility and improve prediction performance, we have implemented multiple mainstream GRN prediction algorithms including co-expression, Graphical Gaussian Models (GGMs), Context Likelihood of Relatedness (CLR), and parallelized versions of TIGRESS and GENIE3. Besides these existing algorithms, we also proposed a parallel Bayesian network learning algorithm, which can infer causal relationships (i.e., directionality of interaction) and scale up to several thousands of genes. Moreover, this web server also provides tools to allow integrative and comparative analysis between predicted GRNs obtained from different algorithms or experiments, as well as comparisons between legume species. The web site is available at http://legumegrn.noble.org.",2013-07-03 +25161251,Large-scale automated identification of mouse brain cells in confocal light sheet microscopy images.,"

Motivation

Recently, confocal light sheet microscopy has enabled high-throughput acquisition of whole mouse brain 3D images at the micron scale resolution. This poses the unprecedented challenge of creating accurate digital maps of the whole set of cells in a brain.

Results

We introduce a fast and scalable algorithm for fully automated cell identification. We obtained the whole digital map of Purkinje cells in mouse cerebellum consisting of a set of 3D cell center coordinates. The method is accurate and we estimated an F1 measure of 0.96 using 56 representative volumes, totaling 1.09 GVoxel and containing 4138 manually annotated soma centers.

Availability and implementation

Source code and its documentation are available at http://bcfind.dinfo.unifi.it/. The whole pipeline of methods is implemented in Python and makes use of Pylearn2 and modified parts of Scikit-learn. Brain images are available on request.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +23479353,JAWAMix5: an out-of-core HDF5-based java implementation of whole-genome association studies using mixed models.,"

Summary

We present JAWAMix5, an out-of-core open-source toolkit for association mapping using high-throughput sequence data. Taking advantage of its HDF5-based implementation, JAWAMix5 stores genotype data on disk and accesses them as though stored in main memory. Therefore, it offers a scalable and fast analysis without concerns about memory usage, whatever the size of the dataset. We have implemented eight functions for association studies, including standard methods (linear models, linear mixed models, rare variants test, analysis in nested association mapping design and local variance component analysis), as well as a novel Bayesian local variance component analysis. Application to real data demonstrates that JAWAMix5 is reasonably fast compared with traditional solutions that load the complete dataset into memory, and that the memory usage is efficient regardless of the dataset size.

Availability

The source code, a 'batteries-included' executable and user manual can be freely downloaded from http://code.google.com/p/jawamix5/.",2013-03-11 +21251524,Citations to Web pages in scientific articles: the permanence of archived references.,"

Study objective

We validate the use of archiving Internet references by comparing the accessibility of published uniform resource locators (URLs) with corresponding archived URLs over time.

Methods

We scanned the ""Articles in Press"" section in Annals of Emergency Medicine from March 2009 through June 2010 for Internet references in research articles. If an Internet reference produced the authors' expected content, the Web page was archived with WebCite (http://www.webcitation.org). Because the archived Web page does not change, we compared it with the original URL to determine whether the original Web page had changed. We attempted to access each original URL and archived Web site URL at 3-month intervals from the time of online publication during an 18-month study period. Once a URL no longer existed or failed to contain the original authors' expected content, it was excluded from further study. The number of original URLs and archived URLs that remained accessible over time was totaled and compared.

Results

A total of 121 articles were reviewed and 144 Internet references were found within 55 articles. Of the original URLs, 15% (21/144; 95% confidence interval [CI] 9% to 21%) were inaccessible at publication. During the 18-month observation period, there was no loss of archived URLs (apart from the 4% [5/123; 95% CI 2% to 9%] that could not be archived), whereas 35% (49/139) of the original URLs were lost (46% loss; 95% CI 33% to 61% by the Kaplan-Meier method; difference between curves P<.0001, log rank test).

Conclusion

Archiving a referenced Web page at publication can help preserve the authors' expected information.",2011-02-01 +22955991,ChIP-seq guidelines and practices of the ENCODE and modENCODE consortia.,"Chromatin immunoprecipitation (ChIP) followed by high-throughput DNA sequencing (ChIP-seq) has become a valuable and widely used approach for mapping the genomic location of transcription-factor binding and histone modifications in living cells. Despite its widespread use, there are considerable differences in how these experiments are conducted, how the results are scored and evaluated for quality, and how the data and metadata are archived for public use. These practices affect the quality and utility of any global ChIP experiment. Through our experience in performing ChIP-seq experiments, the ENCODE and modENCODE consortia have developed a set of working standards and guidelines for ChIP experiments that are updated routinely. The current guidelines address antibody validation, experimental replication, sequencing depth, data and metadata reporting, and data quality assessment. We discuss how ChIP quality, assessed in these ways, affects different uses of ChIP-seq data. All data sets used in the analysis have been deposited for public viewing and downloading at the ENCODE (http://encodeproject.org/ENCODE/) and modENCODE (http://www.modencode.org/) portals.",2012-09-01 +25928282,"Diagnostic and prognostic potential of miR-21, miR-29c, miR-148 and miR-203 in adenocarcinoma and squamous cell carcinoma of esophagus.","

Background

Esophageal cancer is the malignant tumor with very poor prognosis and increasing incidence often diagnosed at very late stage, so the prognosis of affected patients is unsatisfactory, despite the development of therapeutic option such as surgery, chemotherapy and radiotherapy. Consequently, there is a great need for biomarkers to allow a tailored multimodality approach with increased efficiency. Altered expression of microRNAs has been reported in wide range of malignancies, including esophageal cancer. The aim of this study was to examine the expression levels of candidate microRNAs in esophageal cancer and evaluate their diagnostic and prognostic potential.

Findings

Using quantitative real-time PCR, expression levels of 9 candidate microRNAs were examined in 62 tissue samples, 23 esophageal adenocarcinomas, 22 esophageal squamous cell carcinomas and 17 adjacent esophageal mucosa samples. MicroRNA expression levels were further analyzed in regards to clinico-pathological features of esophageal cancer patients. We observed significantly decreased levels of miR-203 and increased levels of miR-21 in adenocarcinoma tissues when compared to normal mucosa. MiR-29c and miR-148 indicated good ability to distinguish between histological subtypes of esophageal cancer. MiR-203 and miR-148 were linked to disease-free survival and overall survival in esophageal adenocarcinoma patients, and miR-148 also in esophageal squamous cell carcinoma patients.

Conclusions

Our data suggest that altered expression of miR-21, miR-29c, miR-148 and miR-203 are related to neoplastic transformation and progression of the disease and these microRNAs could serve as a potential diagnostic and prognostic biomarkers in esophageal cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/4646922201567057.",2015-04-28 +26357088,Identification of Protein Complexes Using Weighted PageRank-Nibble Algorithm and Core-Attachment Structure.,"Protein complexes play a significant role in understanding the underlying mechanism of most cellular functions. Recently, many researchers have explored computational methods to identify protein complexes from protein-protein interaction (PPI) networks. One group of researchers focus on detecting local dense subgraphs which correspond to protein complexes by considering local neighbors. The drawback of this kind of approach is that the global information of the networks is ignored. Some methods such as Markov Clustering algorithm (MCL), PageRank-Nibble are proposed to find protein complexes based on random walk technique which can exploit the global structure of networks. However, these methods ignore the inherent core-attachment structure of protein complexes and treat adjacent node equally. In this paper, we design a weighted PageRank-Nibble algorithm which assigns each adjacent node with different probability, and propose a novel method named WPNCA to detect protein complex from PPI networks by using weighted PageRank-Nibble algorithm and core-attachment structure. Firstly, WPNCA partitions the PPI networks into multiple dense clusters by using weighted PageRank-Nibble algorithm. Then the cores of these clusters are detected and the rest of proteins in the clusters will be selected as attachments to form the final predicted protein complexes. The experiments on yeast data show that WPNCA outperforms the existing methods in terms of both accuracy and p-value. The software for WPNCA is available at ""http://netlab.csu.edu.cn/bioinfomatics/weipeng/WPNCA/download.html"".",2015-01-01 +26568918,Rapid genotyping of human rotavirus using SYBR green real-time reverse transcription-polymerase chain reaction with melting curve analysis.,"

Aim

To develop a real-time reverse transcription-polymerase chain reaction (RT-PCR) assay to genotype rotavirus (G and P) in Alberta from January 2012 to June 2013.

Methods

We developed and validated a different approach to perform rotavirus G and P genotyping using a two-step SYBR green RT-PCR (rt-gPCR) by selecting genotype-specific primers of published conventional RT nested PCR (cnRT-PCR) assay and optimizing the amplification conditions. cDNA was first synthesized from total RNA with SuperScript™ II reverse transcriptase kit followed by amplication step using monoplex SYBR green real-time PCR. After the PCR reaction, melting curve analysis was used to determine specific genotype. Sixteen samples previously genotyped using cnRT-PCR were tested using the new assay and the genotyping results were compared as sensitivity analysis. Assay specificity was evaluated by testing other gastroenteritis viruses with the new assay. The amplicon size of each available genotype was determined by gel-electrophoresis and DNA sequences were obtained using Sanger-sequencing method. After validation and optimization, the new assay was used to genotype 122 pediatric clinical stool samples previously tested positive for rotavirus using electron microscopy between January 2012 and June 2013.

Results

The new rt-gPCR assay was validated and optimized. The assay detected G1 to G4, G9, G12 and P[4] and P[8] that were available as positive controls in our laboratory. A single and clear peak of melting curve was generated for each of specific G and P genotypes with a Tm ranging from 80 °C to 82 °C. The sensitivity of rt-gPCR was comparable to cnRT-PCR with 100% correlation of the 16 samples with known G and P genotypes. No cross reaction was found with other gastroenteritis viruses. Using the new rt-gPCR assay, genotypes were obtained for 121 of the 122 pediatric clinical samples tested positive for rotavirus: G1P[8] (42.6%), G2P[4] (4.9%), G3P[8] (10.7%), G9P[8] (10.7%), G9P[4] (6.6%), G12P[8] (23.0%), and unknown GP[8] (0.8%). For the first time, G12 rotavirus strains were found in Alberta and G12 was the second most common genotype during the study period. Gel electrophoresis of all the genotypes showed expected amplicon size for each genotype. The sequence data of the two G12 samples along with other genotypes were blasted in NCBI BLAST or analyzed with Rota C Genotyping tool (http://rotac.regatools.be/). All genotyping results were confirmed to be correct.

Conclusion

rt-gPCR is a useful tool for the genotyping and characterization of rotavirus. Monitoring of rotavirus genotypes is important for the identification of emerging strains and ongoing evaluation of rotavirus vaccination programs.",2015-11-01 +25636618,A statistical framework for improving genomic annotations of transposon mutagenesis (TM) assigned essential genes.,"Whole-genome transposon mutagenesis (TM) experiment followed by sequence-based identification of insertion sites is the most popular genome-wise experiment to identify essential genes in Prokaryota. However, due to the limitation of high-throughput technique, this approach yields substantial systematic biases resulting in the incorrect assignments of many essential genes. To obtain unbiased and accurate annotations of essential genes from TM experiments, we developed a novel Poisson model based statistical framework to refine these TM assignments. In the model, first we identified and incorporated several potential factors such as gene length and TM insertion information which may cause the TM assignment biases into the basic Poisson model. Then we calculated the conditional probability of an essential gene given the observed TM insertion number. By factorizing this probability through introducing a latent variable the real insertion number, we formalized the statistical framework. Through iteratively updating and optimizing model parameters to maximize the goodness-of-fit of the model to the observed TM insertion data, we finalized the model. Using this model, we are able to assign the probability score of essentiality to each individual gene given its TM assignment, which subsequently correct the experimental biases. To enable our model widely useable, we established a user-friendly Web-server that is accessible to the public: http://research.cchmc.org/essentialgene/.",2015-01-01 +23408797,Hierarchical and spatially explicit clustering of DNA sequences with BAPS software.,"Phylogeographical analyses have become commonplace for a myriad of organisms with the advent of cheap DNA sequencing technologies. Bayesian model-based clustering is a powerful tool for detecting important patterns in such data and can be used to decipher even quite subtle signals of systematic differences in molecular variation. Here, we introduce two upgrades to the Bayesian Analysis of Population Structure (BAPS) software, which enable 1) spatially explicit modeling of variation in DNA sequences and 2) hierarchical clustering of DNA sequence data to reveal nested genetic population structures. We provide a direct interface to map the results from spatial clustering with Google Maps using the portal http://www.spatialepidemiology.net/ and illustrate this approach using sequence data from Borrelia burgdorferi. The usefulness of hierarchical clustering is demonstrated through an analysis of the metapopulation structure within a bacterial population experiencing a high level of local horizontal gene transfer. The tools that are introduced are freely available at http://www.helsinki.fi/bsg/software/BAPS/.",2013-02-13 +24185699,GWIPS-viz: development of a ribo-seq genome browser.,"We describe the development of GWIPS-viz (http://gwips.ucc.ie), an online genome browser for viewing ribosome profiling data. Ribosome profiling (ribo-seq) is a recently developed technique that provides genome-wide information on protein synthesis (GWIPS) in vivo. It is based on the deep sequencing of ribosome-protected messenger RNA (mRNA) fragments, which allows the ribosome density along all mRNA transcripts present in the cell to be quantified. Since its inception, ribo-seq has been carried out in a number of eukaryotic and prokaryotic organisms. Owing to the increasing interest in ribo-seq, there is a pertinent demand for a dedicated ribo-seq genome browser. GWIPS-viz is based on The University of California Santa Cruz (UCSC) Genome Browser. Ribo-seq tracks, coupled with mRNA-seq tracks, are currently available for several genomes: human, mouse, zebrafish, nematode, yeast, bacteria (Escherichia coli K12, Bacillus subtilis), human cytomegalovirus and bacteriophage lambda. Our objective is to continue incorporating published ribo-seq data sets so that the wider community can readily view ribosome profiling information from multiple studies without the need to carry out computational processing.",2013-10-31 +26388941,BIDCHIPS: bias decomposition and removal from ChIP-seq data clarifies true binding signal and its functional correlates.,"

Background

Unraveling transcriptional regulatory networks is a central problem in molecular biology and, in this quest, chromatin immunoprecipitation and sequencing (ChIP-seq) technology has given us the unprecedented ability to identify sites of protein-DNA binding and histone modification genome wide. However, multiple systemic and procedural biases hinder harnessing the full potential of this technology. Previous studies have addressed this problem, but a thorough characterization of different, interacting biases on ChIP-seq signals is still lacking.

Results

Here, we present a novel framework where the genome-wide ChIP-seq signal is viewed as being quantifiably influenced by different, measurable sources of bias, which can then be computationally subtracted away. We use a compendium of 123 human ENCODE ChIP-seq datasets to build regression models that tell us how much of a ChIP-seq signal can be attributed to mappability, GC-content, chromatin accessibility, and factors represented in input DNA and IgG controls. When we use the model to separate out these non-binding influences from the ChIP-seq signal, we obtain a purified signal that associates better to TF-DNA-binding motifs than do other measures of peak significance. We also carry out a multiscale analysis that reveals how ChIP-seq signal biases differ across different scales. Finally, we investigate previously reported associations between gene expression and ChIP-seq signals at transcription start sites. We show that our model can be used to discriminate ChIP-seq signals that are truly related to gene expression from those that are merely correlated by virtue of bias-in particular, chromatin accessibility bias, which shows up in ChIP-seq signals and also relates to gene expression.

Conclusions

Our study provides new insights into the behavior of ChIP-seq signal biases and proposes a novel mitigation framework that improves results compared to existing techniques. With ChIP-seq now being the central technology for studying transcriptional regulation, it is most crucial to accurately characterize, quantify, and adjust for the genome-wide effects of biases affecting ChIP-seq. Our study also emphasizes that properly accounting for confounders in ChIP-seq data is of paramount importance for obtaining biologically accurate insights into the workings of the complex regulatory mechanisms in living organisms. R and MATLAB packages implementing the framework can be obtained from http://www.perkinslab.ca/Software.html.",2015-09-17 +25084126,Computational integration of genomic traits into 16S rDNA microbiota sequencing studies.,"Molecular sequencing techniques help to understand microbial biodiversity with regard to species richness, assembly structure and function. In this context, available methods are barcoding, metabarcoding, genomics and metagenomics. The first two are restricted to taxonomic assignments, whilst genomics only refers to functional capabilities of a single organism. Metagenomics by contrast yields information about organismal and functional diversity of a community. However currently it is very demanding regarding labour and costs and thus not applicable to most laboratories. Here, we show in a proof-of-concept that computational approaches are able to retain functional information about microbial communities assessed through 16S rDNA (meta)barcoding by referring to reference genomes. We developed an automatic pipeline to show that such integration may infer preliminary or supplementary genomic content of a community. We applied it to two biological datasets and delineated significantly overrepresented protein families between communities. The script alongside supporting data is available at http://bioapps.biozentrum.uni-wuerzburg.de.",2014-07-30 +25387969,Evolutionary optimization of transcription factor binding motif detection.,"All the cell types are under strict control of how their genes are transcribed into expressed transcripts by the temporally dynamic orchestration of the transcription factor binding activities. Given a set of known binding sites (BSs) of a given transcription factor (TF), computational TFBS screening technique represents a cost efficient and large scale strategy to complement the experimental ones. There are two major classes of computational TFBS prediction algorithms based on the tertiary and primary structures, respectively. A tertiary structure based algorithm tries to calculate the binding affinity between a query DNA fragment and the tertiary structure of the given TF. Due to the limited number of available TF tertiary structures, primary structure based TFBS prediction algorithm is a necessary complementary technique for large scale TFBS screening. This study proposes a novel evolutionary algorithm to randomly mutate the weights of different positions in the binding motif of a TF, so that the overall TFBS prediction accuracy is optimized. The comparison with the most widely used algorithm, Position Weight Matrix (PWM), suggests that our algorithm performs better or the same level in all the performance measurements, including sensitivity, specificity, accuracy and Matthews correlation coefficient. Our data also suggests that it is necessary to remove the widely used assumption of independence between motif positions. The supplementary material may be found at: http://www.healthinformaticslab.org/supp/ .",2015-01-01 +25818743,A meta-analysis of bevacizumab combined with chemotherapy in the treatment of ovarian cancer.,"

Introduction

Angiogenesis plays an important role in the biology of ovarian cancer. The clinical efficacy and side effects of bevacizumab, the vascular endothelial growth factor inhibitor, on survival and toxicity in women with this ovarian cancer, was not conclusive. We performed this systematic review and meta-analysis in order to clarify the efficacy of bevacizumab combined with chemotherapy in the treatment of ovarian cancer.

Materials and methods

We searched the electronic database of MEDLINE, EMBASE, Cochrane Central Register of Controlled Trials and CNKI for clinical controlled trials of comparing bevacizumab combined with chemotherapy and chemotherapy alone in the treatment of ovarian cancer. The primary outcomes of eligible studies included median progression-free survival (PFS), overall survival (OS), and toxicities such as enterobrosis, hypertension, albuminuria, congestive heart failure (CHF), neutrophils, thrombosis, and bleeding. The Hazard ratio (HR) and relative risk were used for the meta-analysis and were expressed with 95% confidence intervals (CIs). All the statistical analyses were carried out by  Stata 11.0 software (http://www.stata.com; Stata Corporation, College Station, TX, USA).

Results

We included 5 studies with 1798 cases in the bevacizumab combined with the chemotherapy group and 1810 subjects in the chemotherapy alone group. The pooled results showed that bevacizumab + chemotherapy compared with chemotherapy alone can significant prolong the median PFS (HR, 0.64; 95% CI, 0.46-0.82; P < 0.05) but not the OS (HR, 0.84; 95% CI, 0.59-10.9; P > 0.05); the toxicity analysis showed that the enterobrosis, hypertension, albuminuria, neutrophils, thrombosis, and bleeding were significantly increased in the bevacizumab + chemotherapy group compared with chemotherapy alone (Pall < 0.05). But the CHF risk between the two groups was not statistical different (P > 0.05).

Conclusion

Bevacizumab combined with chemotherapy prolonged the median PFS in patients with ovarian cancer but also increase the risk of developing enterobrosis, hypertension, albuminuria, neutrophils, thrombosis, and bleeding.",2014-03-01 +23709164,ESCOLEX: a grade-level lexical database from European Portuguese elementary to middle school textbooks.,"In this article, we introduce ESCOLEX, the first European Portuguese children's lexical database with grade-level-adjusted word frequency statistics. Computed from a 3.2-million-word corpus, ESCOLEX provides 48,381 word forms extracted from 171 elementary and middle school textbooks for 6- to 11-year-old children attending the first six grades in the Portuguese educational system. Like other children's grade-level databases (e.g., Carroll, Davies, & Richman, 1971; Corral, Ferrero, & Goikoetxea, Behavior Research Methods, 41, 1009-1017, 2009; Lété, Sprenger-Charolles, & Colé, Behavior Research Methods, Instruments, & Computers, 36, 156-166, 2004; Zeno, Ivens, Millard, Duvvuri, 1995), ESCOLEX provides four frequency indices for each grade: overall word frequency (F), index of dispersion across the selected textbooks (D), estimated frequency per million words (U), and standard frequency index (SFI). It also provides a new measure, contextual diversity (CD). In addition, the number of letters in the word and its part(s) of speech, number of syllables, syllable structure, and adult frequencies taken from P-PAL (a European Portuguese corpus-based lexical database; Soares, Comesaña, Iriarte, Almeida, Simões, Costa, …, Machado, 2010; Soares, Iriarte, Almeida, Simões, Costa, França, …, Comesaña, in press) are provided. ESCOLEX will be a useful tool both for researchers interested in language processing and development and for professionals in need of verbal materials adjusted to children's developmental stages. ESCOLEX can be downloaded along with this article or from http://p-pal.di.uminho.pt/about/databases .",2014-03-01 +22833526,DELIMINATE--a fast and efficient method for loss-less compression of genomic sequences: sequence analysis.,"

Summary

An unprecedented quantity of genome sequence data is currently being generated using next-generation sequencing platforms. This has necessitated the development of novel bioinformatics approaches and algorithms that not only facilitate a meaningful analysis of these data but also aid in efficient compression, storage, retrieval and transmission of huge volumes of the generated data. We present a novel compression algorithm (DELIMINATE) that can rapidly compress genomic sequence data in a loss-less fashion. Validation results indicate relatively higher compression efficiency of DELIMINATE when compared with popular general purpose compression algorithms, namely, gzip, bzip2 and lzma.

Availability and implementation

Linux, Windows and Mac implementations (both 32 and 64-bit) of DELIMINATE are freely available for download at: http://metagenomics.atc.tcs.com/compression/DELIMINATE.

Contact

sharmila@atc.tcs.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-25 +28261602,TransPS: A Transcriptome Post Scaffolding Method for Assembling High Quality Contigs. ,"As the development of the high throughput sequencing technologies, transcriptome can be sequenced with a low price and high efficiency. Sequence assembly approaches have been renewed to meet the new requirements from new sequencing technologies. Assembly strategies are important for biologists who need to assemble the transcriptome generated in their experiments. However, some modern de novo assembly strategies generate a large section of redundant contigs due to sequence variations, which greatly affect downstream analysis and experiments. This work proposed TransPS, a post transcriptome scaffolding method to generate high quality transcriptomes. TransPS shows promising results on the test transcriptome data sets where the redundancy is greatly reduced by at least 50%, while the coverage is improved considerately. The web server and source code are available at https://bioinformatics.cs.vt.edu/zhanglab/transps/.",2014-05-28 +24336643,BETASEQ: a powerful novel method to control type-I error inflation in partially sequenced data for rare variant association testing.,"

Summary

Despite its great capability to detect rare variant associations, next-generation sequencing is still prohibitively expensive when applied to large samples. In case-control studies, it is thus appealing to sequence only a subset of cases to discover variants and genotype the identified variants in controls and the remaining cases under the reasonable assumption that causal variants are usually enriched among cases. However, this approach leads to inflated type-I error if analyzed naively for rare variant association. Several methods have been proposed in recent literature to control type-I error at the cost of either excluding some sequenced cases or correcting the genotypes of discovered rare variants. All of these approaches thus suffer from certain extent of information loss and thus are underpowered. We propose a novel method (BETASEQ), which corrects inflation of type-I error by supplementing pseudo-variants while keeps the original sequence and genotype data intact. Extensive simulations and real data analysis demonstrate that, in most practical situations, BETASEQ leads to higher testing powers than existing approaches with guaranteed (controlled or conservative) type-I error.

Availability and implementation

BETASEQ and associated R files, including documentation, examples, are available at http://www.unc.edu/~yunmli/betaseq",2013-12-12 +26372667,Urinary Dialkyl Phosphate Concentrations and Lung Function Parameters in Adolescents and Adults: Results from the Canadian Health Measures Survey.,"

Background

Epidemiological studies have reported associations between lung function parameters and organophosphate (OP) pesticide exposures in agricultural occupations, but to our knowledge associations have not been evaluated in general populations.

Objectives

We examined associations between OP metabolite dialkyl phosphates (DAPs) and lung function using data from the Canadian Health Measures Survey (CHMS) Cycle 1.

Methods

Forced vital capacity (FVC), forced expiratory volume in 1 sec (FEV1), FEV1/FVC ratio, and forced expiratory flow between 25% and 75% of FVC (FEF25%-75%) were measured for 4,446 CHMS participants. Urinary concentrations of six DAP metabolites (DMP, DMTP, DMDTP, DEP, DETP, and DEDTP), smoking status, and other predictors of lung function were also measured in the CHMS-Cycle 1. Multiple linear regression analyses were used to examine the relationship between total DAP concentrations (ΣDAPs) and lung function in adolescents (12-19 years) and adults (20-79 years).

Results

In adults, estimates from multiple regression analyses suggested that a 1-unit increase on natural logarithmic scale (171% increase on the original scale) in the creatinine-corrected urinary concentration (nanomoles per gram creatinine) of ΣDAP was associated with a 32.6-mL (95% CI: -57.2, -8.1) reduction in FVC, 32.6-mL (95% CI: -59.0, -6.3) reduction in FEV1, 0.2% (95% CI: -0.6, 0.2) reduction in FEV1/FVC ratio, and 53.1-mL/sec (95% CI: -113.9, 7.7) reduction in FEF25%-75%. In adolescents, associations between ΣDAP and FEV1 were closer to the null and positive for FVC, whereas associations with FEV1/FVC and FEF25%-75% were negative, as in adults. However, none of the associations were significant in adolescents.

Conclusions

The negative association between ΣDAP and lung function in adult participants suggests a detrimental effect of OP pesticides on lung function in the adult general population. Further studies using prospective designs are warranted to confirm the findings reported in this study.

Citation

Ye M, Beach J, Martin JW, Senthilselvan A. 2016. Urinary dialkyl phosphate concentrations and lung function parameters in adolescents and adults: results from the Canadian Health Measures Survey. Environ Health Perspect 124:491-497; http://dx.doi.org/10.1289/ehp.1509745.",2015-09-15 +23717195,Simultaneous identification of multiple driver pathways in cancer.,"Distinguishing the somatic mutations responsible for cancer (driver mutations) from random, passenger mutations is a key challenge in cancer genomics. Driver mutations generally target cellular signaling and regulatory pathways consisting of multiple genes. This heterogeneity complicates the identification of driver mutations by their recurrence across samples, as different combinations of mutations in driver pathways are observed in different samples. We introduce the Multi-Dendrix algorithm for the simultaneous identification of multiple driver pathways de novo in somatic mutation data from a cohort of cancer samples. The algorithm relies on two combinatorial properties of mutations in a driver pathway: high coverage and mutual exclusivity. We derive an integer linear program that finds set of mutations exhibiting these properties. We apply Multi-Dendrix to somatic mutations from glioblastoma, breast cancer, and lung cancer samples. Multi-Dendrix identifies sets of mutations in genes that overlap with known pathways - including Rb, p53, PI(3)K, and cell cycle pathways - and also novel sets of mutually exclusive mutations, including mutations in several transcription factors or other genes involved in transcriptional regulation. These sets are discovered directly from mutation data with no prior knowledge of pathways or gene interactions. We show that Multi-Dendrix outperforms other algorithms for identifying combinations of mutations and is also orders of magnitude faster on genome-scale data. Software available at: http://compbio.cs.brown.edu/software.",2013-05-23 +22645318,METAGENassist: a comprehensive web server for comparative metagenomics.,"With recent improvements in DNA sequencing and sample extraction techniques, the quantity and quality of metagenomic data are now growing exponentially. This abundance of richly annotated metagenomic data and bacterial census information has spawned a new branch of microbiology called comparative metagenomics. Comparative metagenomics involves the comparison of bacterial populations between different environmental samples, different culture conditions or different microbial hosts. However, in order to do comparative metagenomics, one typically requires a sophisticated knowledge of multivariate statistics and/or advanced software programming skills. To make comparative metagenomics more accessible to microbiologists, we have developed a freely accessible, easy-to-use web server for comparative metagenomic analysis called METAGENassist. Users can upload their bacterial census data from a wide variety of common formats, using either amplified 16S rRNA data or shotgun metagenomic data. Metadata concerning environmental, culture, or host conditions can also be uploaded. During the data upload process, METAGENassist also performs an automated taxonomic-to-phenotypic mapping. Phenotypic information covering nearly 20 functional categories such as GC content, genome size, oxygen requirements, energy sources and preferred temperature range is automatically generated from the taxonomic input data. Using this phenotypically enriched data, users can then perform a variety of multivariate and univariate data analyses including fold change analysis, t-tests, PCA, PLS-DA, clustering and classification. To facilitate data processing, users are guided through a step-by-step analysis workflow using a variety of menus, information hyperlinks and check boxes. METAGENassist also generates colorful, publication quality tables and graphs that can be downloaded and used directly in the preparation of scientific papers. METAGENassist is available at http://www.metagenassist.ca.",2012-05-29 +25913205,Genome-scale strain designs based on regulatory minimal cut sets.,"

Motivation

Stoichiometric and constraint-based methods of computational strain design have become an important tool for rational metabolic engineering. One of those relies on the concept of constrained minimal cut sets (cMCSs). However, as most other techniques, cMCSs may consider only reaction (or gene) knockouts to achieve a desired phenotype.

Results

We generalize the cMCSs approach to constrained regulatory MCSs (cRegMCSs), where up/downregulation of reaction rates can be combined along with reaction deletions. We show that flux up/downregulations can virtually be treated as cuts allowing their direct integration into the algorithmic framework of cMCSs. Because of vastly enlarged search spaces in genome-scale networks, we developed strategies to (optionally) preselect suitable candidates for flux regulation and novel algorithmic techniques to further enhance efficiency and speed of cMCSs calculation. We illustrate the cRegMCSs approach by a simple example network and apply it then by identifying strain designs for ethanol production in a genome-scale metabolic model of Escherichia coli. The results clearly show that cRegMCSs combining reaction deletions and flux regulations provide a much larger number of suitable strain designs, many of which are significantly smaller relative to cMCSs involving only knockouts. Furthermore, with cRegMCSs, one may also enable the fine tuning of desired behaviours in a narrower range. The new cRegMCSs approach may thus accelerate the implementation of model-based strain designs for the bio-based production of fuels and chemicals.

Availability and implementation

MATLAB code and the examples can be downloaded at http://www.mpi-magdeburg.mpg.de/projects/cna/etcdownloads.html.

Contact

krishna.mahadevan@utoronto.ca or klamt@mpi-magdeburg.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-04-25 +24688854,Biblio-MetReS for user-friendly mining of genes and biological processes in scientific documents.,"

Unlabelled

One way to initiate the reconstruction of molecular circuits is by using automated text-mining techniques. Developing more efficient methods for such reconstruction is a topic of active research, and those methods are typically included by bioinformaticians in pipelines used to mine and curate large literature datasets. Nevertheless, experimental biologists have a limited number of available user-friendly tools that use text-mining for network reconstruction and require no programming skills to use. One of these tools is Biblio-MetReS. Originally, this tool permitted an on-the-fly analysis of documents contained in a number of web-based literature databases to identify co-occurrence of proteins/genes. This approach ensured results that were always up-to-date with the latest live version of the databases. However, this 'up-to-dateness' came at the cost of large execution times. Here we report an evolution of the application Biblio-MetReS that permits constructing co-occurrence networks for genes, GO processes, Pathways, or any combination of the three types of entities and graphically represent those entities. We show that the performance of Biblio-MetReS in identifying gene co-occurrence is as least as good as that of other comparable applications (STRING and iHOP). In addition, we also show that the identification of GO processes is on par to that reported in the latest BioCreAtIvE challenge. Finally, we also report the implementation of a new strategy that combines on-the-fly analysis of new documents with preprocessed information from documents that were encountered in previous analyses. This combination simultaneously decreases program run time and maintains 'up-to-dateness' of the results.

Availability

http://metres.udl.cat/index.php/downloads,

Contact

metres.cmb@gmail.com.",2014-02-27 +24576332,Computational prediction of the human-microbial oral interactome.,"

Background

The oral cavity is a complex ecosystem where human chemical compounds coexist with a particular microbiota. However, shifts in the normal composition of this microbiota may result in the onset of oral ailments, such as periodontitis and dental caries. In addition, it is known that the microbial colonization of the oral cavity is mediated by protein-protein interactions (PPIs) between the host and microorganisms. Nevertheless, this kind of PPIs is still largely undisclosed. To elucidate these interactions, we have created a computational prediction method that allows us to obtain a first model of the Human-Microbial oral interactome.

Results

We collected high-quality experimental PPIs from five major human databases. The obtained PPIs were used to create our positive dataset and, indirectly, our negative dataset. The positive and negative datasets were merged and used for training and validation of a naïve Bayes classifier. For the final prediction model, we used an ensemble methodology combining five distinct PPI prediction techniques, namely: literature mining, primary protein sequences, orthologous profiles, biological process similarity, and domain interactions. Performance evaluation of our method revealed an area under the ROC-curve (AUC) value greater than 0.926, supporting our primary hypothesis, as no single set of features reached an AUC greater than 0.877. After subjecting our dataset to the prediction model, the classified result was filtered for very high confidence PPIs (probability ≥ 1-10-7), leading to a set of 46,579 PPIs to be further explored.

Conclusions

We believe this dataset holds not only important pathways involved in the onset of infectious oral diseases, but also potential drug-targets and biomarkers. The dataset used for training and validation, the predictions obtained and the network final network are available at http://bioinformatics.ua.pt/software/oralint.",2014-02-27 +26372666,Cross-Talk in the Female Rat Mammary Gland: Influence of Aryl Hydrocarbon Receptor on Estrogen Receptor Signaling.,"

Background

Cross-talk between the aryl hydrocarbon receptor (AHR) and the estrogen receptor (ER) plays a major role in signaling processes in female reproductive organs.

Objectives

We investigated the influence of the AHR ligand 3-methylcholanthrene (3-MC) on ER-mediated signaling in mammary gland tissue of ovariectomized (ovx) rats.

Methods

After 14 days of hormonal decline, ovx rats were treated for 3 days with 4 μg/kg 17β-estradiol (E2), 15 mg/kg 8-prenylnaringenin (8-PN), 15 mg/kg 3-MC, or a combination of these compounds (E2 + 3-MC, 8-PN + 3-MC). Whole-mount preparations of the mammary gland were used to count terminal end buds (TEBs). Protein expression studies (immunohistochemistry, immunofluorescence), a cDNA microarray, pathway analyses, and quantitative real-time polymerase chain reaction (qPCR) were performed to evaluate the interaction between AHR- and ER-mediated signaling pathways.

Results

E2 treatment increased the number of TEBs and the levels of Ki-67 protein and progesterone receptor (PR); this treatment also changed the expression of 325 genes by more than 1.5-fold. Although 3-MC treatment alone had marginal impact on gene or protein expression, when rats were co-treated with 3-MC and E2, 3-MC strongly inhibited E2-induced TEB development, protein synthesis, and the expression of nearly half of E2-induced genes. This inhibitory effect of 3-MC was partially mirrored when 8-PN was used as an ER ligand. The anti-estrogenicity of ligand-activated AHR was at least partly due to decreased protein levels of ERα in ductal epithelial cells.

Conclusion

Our data show transcriptome-wide anti-estrogenic properties of ligand-activated AHR on ER-mediated processes in the mammary gland, thereby contributing an explanation for the chemopreventive and endocrine-disrupting potential of AHR ligands.

Citation

Helle J, Bader MI, Keiler AM, Zierau O, Vollmer G, Chittur SV, Tenniswood M, Kretzschmar G. 2016. Cross-talk in the female rat mammary gland: influence of aryl hydrocarbon receptor on estrogen receptor signaling. Environ Health Perspect 124:601-610; http://dx.doi.org/10.1289/ehp.1509680.",2015-09-15 +22829574,Computational approaches to standard-compliant biofilm data for reliable analysis and integration.,"The study of microorganism consortia, also known as biofilms, is associated to a number of applications in biotechnology, ecotechnology and clinical domains. Nowadays, biofilm studies are heterogeneous and data-intensive, encompassing different levels of analysis. Computational modelling of biofilm studies has become thus a requirement to make sense of these vast and ever-expanding biofilm data volumes. The rationale of the present work is a machine-readable format for representing biofilm studies and supporting biofilm data interchange and data integration. This format is supported by the Biofilm Science Ontology (BSO), the first ontology on biofilms information. The ontology is decomposed into a number of areas of interest, namely: the Experimental Procedure Ontology (EPO) which describes biofilm experimental procedures; the Colony Morphology Ontology (CMO) which characterises morphologically microorganism colonies; and other modules concerning biofilm phenotype, antimicrobial susceptibility and virulence traits. The overall objective behind BSO is to develop semantic resources to capture, represent and share data on biofilms and related experiments in a regularized fashion manner. Furthermore, the present work also introduces a framework in assistance of biofilm data interchange and analysis - BiofOmics (http://biofomics.org) - and a public repository on colony morphology signatures - MorphoCol (http://stardust.deb.uminho.pt/morphocol).",2012-07-24 +22570412,SurvNet: a web server for identifying network-based biomarkers that most correlate with patient survival data.,"An important task in biomedical research is identifying biomarkers that correlate with patient clinical data, and these biomarkers then provide a critical foundation for the diagnosis and treatment of disease. Conventionally, such an analysis is based on individual genes, but the results are often noisy and difficult to interpret. Using a biological network as the searching platform, network-based biomarkers are expected to be more robust and provide deep insights into the molecular mechanisms of disease. We have developed a novel bioinformatics web server for identifying network-based biomarkers that most correlate with patient survival data, SurvNet. The web server takes three input files: one biological network file, representing a gene regulatory or protein interaction network; one molecular profiling file, containing any type of gene- or protein-centred high-throughput biological data (e.g. microarray expression data or DNA methylation data); and one patient survival data file (e.g. patients' progression-free survival data). Given user-defined parameters, SurvNet will automatically search for subnetworks that most correlate with the observed patient survival data. As the output, SurvNet will generate a list of network biomarkers and display them through a user-friendly interface. SurvNet can be accessed at http://bioinformatics.mdanderson.org/main/SurvNet.",2012-05-08 +24587032,"Rab27a was identified as a prognostic biomaker by mRNA profiling, correlated with malignant progression and subtype preference in gliomas.","

Purpose

Rab27a belongs to the Rab small GTPase superfamily. The protein is membrane-bound and may be involved in protein transport and small GTPase-mediated signal transduction. Mutations in this gene are associated with Griscelli syndrome type 2. However, the prognostic and molecular features of gliomas with Rab27a expression are still unclear.

Experimental design

We used a whole-genome mRNA expression microarray dataset of 220 glioma samples from the Chinese Glioma Genome Atlas (CGGA) database (http://www.cgga.org.cn) as a discovery set. In this set, 220 gliomas, consisting of 97 WHO Grade II gliomas, 34 WHO Grade III gliomas, and 89 WHO Grade IV gliomas, were analyzed using the Kaplan-Meier method. To validate the protein expression of Rab27a, we assayed another 162 glioma samples by immunohistochemistry. Three additional datasets were obtained as validation sets. Gene ontology (GO) analysis and gene set variation analysis (GSVA) were used for the functional annotation of Rab27a in 89 WHO Grade IV gliomas.

Results

Rab27a was significantly associated with grade progression and high mortality in all grades of glioma in the discovery set. Rab27a also showed a mesenchymal subtype, G3 subtype and isocitrate dehydrogenase 1 (IDH1) wild-type preference and association with migration. The 3 validation datasets revealed similar findings. Rab27a was more highly expressed in gliomas than in normal brain tissues, and its expression increased with glioma grade progression.

Conclusions

Rab27a expression was significantly associated with grade progression and worse prognosis in all grades of gliomas, suggesting Rab27a as a novel biomarker with potentially important therapeutic implications.",2014-02-26 +24974200,TPpred2: improving the prediction of mitochondrial targeting peptide cleavage sites by exploiting sequence motifs.,"

Summary

Targeting peptides are N-terminal sorting signals in proteins that promote their translocation to mitochondria through the interaction with different protein machineries. We recently developed TPpred, a machine learning-based method scoring among the best ones available to predict the presence of a targeting peptide into a protein sequence and its cleavage site. Here we introduce TPpred2 that improves TPpred performances in the task of identifying the cleavage site of the targeting peptides. TPpred2 is now available as a web interface and as a stand-alone version for users who can freely download and adopt it for processing large volumes of sequences. Availability and implementaion: TPpred2 is available both as web server and stand-alone version at http://tppred2.biocomp.unibo.it.

Contact

gigi@biocomp.unibo.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-27 +21546353,"MEGA5: molecular evolutionary genetics analysis using maximum likelihood, evolutionary distance, and maximum parsimony methods.","Comparative analysis of molecular sequence data is essential for reconstructing the evolutionary histories of species and inferring the nature and extent of selective forces shaping the evolution of genes and species. Here, we announce the release of Molecular Evolutionary Genetics Analysis version 5 (MEGA5), which is a user-friendly software for mining online databases, building sequence alignments and phylogenetic trees, and using methods of evolutionary bioinformatics in basic biology, biomedicine, and evolution. The newest addition in MEGA5 is a collection of maximum likelihood (ML) analyses for inferring evolutionary trees, selecting best-fit substitution models (nucleotide or amino acid), inferring ancestral states and sequences (along with probabilities), and estimating evolutionary rates site-by-site. In computer simulation analyses, ML tree inference algorithms in MEGA5 compared favorably with other software packages in terms of computational efficiency and the accuracy of the estimates of phylogenetic trees, substitution parameters, and rate variation among sites. The MEGA user interface has now been enhanced to be activity driven to make it easier for the use of both beginners and experienced scientists. This version of MEGA is intended for the Windows platform, and it has been configured for effective use on Mac OS X and Linux desktops. It is available free of charge from http://www.megasoftware.net.",2011-05-04 +24255646,Identification and removal of low-complexity sites in allele-specific analysis of ChIP-seq data.,"

Motivation

High-throughput sequencing technologies enable the genome-wide analysis of the impact of genetic variation on molecular phenotypes at unprecedented resolution. However, although powerful, these technologies can also introduce unexpected artifacts.

Results

We investigated the impact of library amplification bias on the identification of allele-specific (AS) molecular events from high-throughput sequencing data derived from chromatin immunoprecipitation assays (ChIP-seq). Putative AS DNA binding activity for RNA polymerase II was determined using ChIP-seq data derived from lymphoblastoid cell lines of two parent-daughter trios. We found that, at high-sequencing depth, many significant AS binding sites suffered from an amplification bias, as evidenced by a larger number of clonal reads representing one of the two alleles. To alleviate this bias, we devised an amplification bias detection strategy, which filters out sites with low read complexity and sites featuring a significant excess of clonal reads. This method will be useful for AS analyses involving ChIP-seq and other functional sequencing assays.

Availability

The R package abs filter for library clonality simulations and detection of amplification-biased sites is available from http://updepla1srv1.epfl.ch/waszaks/absfilter",2013-11-18 +26312189,Coalescent: an open-science framework for importance sampling in coalescent theory.,"Background. In coalescent theory, computer programs often use importance sampling to calculate likelihoods and other statistical quantities. An importance sampling scheme can exploit human intuition to improve statistical efficiency of computations, but unfortunately, in the absence of general computer frameworks on importance sampling, researchers often struggle to translate new sampling schemes computationally or benchmark against different schemes, in a manner that is reliable and maintainable. Moreover, most studies use computer programs lacking a convenient user interface or the flexibility to meet the current demands of open science. In particular, current computer frameworks can only evaluate the efficiency of a single importance sampling scheme or compare the efficiencies of different schemes in an ad hoc manner. Results. We have designed a general framework (http://coalescent.sourceforge.net; language: Java; License: GPLv3) for importance sampling that computes likelihoods under the standard neutral coalescent model of a single, well-mixed population of constant size over time following infinite sites model of mutation. The framework models the necessary core concepts, comes integrated with several data sets of varying size, implements the standard competing proposals, and integrates tightly with our previous framework for calculating exact probabilities. For a given dataset, it computes the likelihood and provides the maximum likelihood estimate of the mutation parameter. Well-known benchmarks in the coalescent literature validate the accuracy of the framework. The framework provides an intuitive user interface with minimal clutter. For performance, the framework switches automatically to modern multicore hardware, if available. It runs on three major platforms (Windows, Mac and Linux). Extensive tests and coverage make the framework reliable and maintainable. Conclusions. In coalescent theory, many studies of computational efficiency consider only effective sample size. Here, we evaluate proposals in the coalescent literature, to discover that the order of efficiency among the three importance sampling schemes changes when one considers running time as well as effective sample size. We also describe a computational technique called ""just-in-time delegation"" available to improve the trade-off between running time and precision by constructing improved importance sampling schemes from existing ones. Thus, our systems approach is a potential solution to the ""2(8) programs problem"" highlighted by Felsenstein, because it provides the flexibility to include or exclude various features of similar coalescent models or importance sampling schemes.",2015-08-18 +23413438,Efficient comparison of sets of intervals with NC-lists.,"

Motivation

High-throughput sequencing produces in a small amount of time a large amount of data, which are usually difficult to analyze. Mapping the reads to the transcripts they originate from, to quantify the expression of the genes, is a simple, yet time demanding, example of analysis. Fast genomic comparison algorithms are thus crucial for the analysis of the ever-expanding number of reads sequenced.

Results

We used NC-lists to implement an algorithm that compares a set of query intervals with a set of reference intervals in two steps. The first step, a pre-processing done once for all, requires time O[#R log(#R) + #Q log(#Q)], where Q and R are the sets of query and reference intervals. The search phase requires constant space, and time O[#R + #Q + #M), where M is the set of overlaps. We showed that our algorithm compares favorably with five other algorithms, especially when several comparisons are performed.

Availability

The algorithm has been included to S-MART, a versatile tool box for RNA-Seq analysis, freely available at http://urgi.versailles.inra.fr/Tools/S-Mart. The algorithm can be used for many kinds of data (sequencing reads, annotations, etc.) in many formats (GFF3, BED, SAM, etc.), on any operating system. It is thus readily useable for the analysis of next-generation sequencing data.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-14 +25080090,Integrative analysis reveals disease-associated genes and biomarkers for prostate cancer progression.,"

Background

Prostate cancer is one of the most common complex diseases with high leading cause of death in men. Identifications of prostate cancer associated genes and biomarkers are thus essential as they can gain insights into the mechanisms underlying disease progression and advancing for early diagnosis and developing effective therapies.

Methods

In this study, we presented an integrative analysis of gene expression profiling and protein interaction network at a systematic level to reveal candidate disease-associated genes and biomarkers for prostate cancer progression. At first, we reconstructed the human prostate cancer protein-protein interaction network (HPC-PPIN) and the network was then integrated with the prostate cancer gene expression data to identify modules related to different phases in prostate cancer. At last, the candidate module biomarkers were validated by its predictive ability of prostate cancer progression.

Results

Different phases-specific modules were identified for prostate cancer. Among these modules, transcription Androgen Receptor (AR) nuclear signaling and Epidermal Growth Factor Receptor (EGFR) signalling pathway were shown to be the pathway targets for prostate cancer progression. The identified candidate disease-associated genes showed better predictive ability of prostate cancer progression than those of published biomarkers. In context of functional enrichment analysis, interestingly candidate disease-associated genes were enriched in the nucleus and different functions were encoded for potential transcription factors, for examples key players as AR, Myc, ESR1 and hidden player as Sp1 which was considered as a potential novel biomarker for prostate cancer.

Conclusions

The successful results on prostate cancer samples demonstrated that the integrative analysis is powerful and useful approach to detect candidate disease-associate genes and modules which can be used as the potential biomarkers for prostate cancer progression. The data, tools and supplementary files for this integrative analysis are deposited at http://www.ibio-cn.org/HPC-PPIN/.",2014-05-08 +30727246,First Report of Powdery Mildew Caused by Oidium hortensiae on Mophead Hydrangea in Korea.,"Hydrangea macrophylla (Thunb.) Ser., known as mophead hydrangea, is native to Japan and is used as a potted ornamental or is planted for landscaping in gardens worldwide. In May 2011, powdery mildew occurred on potted mophead hydrangea cv. Emerald plants in polyethylene-film-covered greenhouses in Icheon, Korea. Heavily infected plantings were unmarketable, mainly due to purplish red discoloration and crinkling of leaves. Such powdery mildew symptoms on mophead hydrangea in gardens had been often found in Korea since 2001, and the collections (n = 10) were deposited in the Korea University herbarium (KUS). In all cases, there was no trace of chasmothecia formation. Mycelium was effuse on both sides of leaves, young stems, and flower petals. Appressoria were well developed, lobed, and solitary or in opposite pairs. Conidiophores were cylindrical, 70 to 145 × 7.5 to 10 μm, and composed of three to four cells. Foot-cells of conidiophores were straight to sub-straight, cylindric, short, and mostly less than 30 μm long. Conidia produced singly were ellipsoid to oval, 32 to 50 × 14 to 22 μm with a length/width ratio of 1.7 to 2.8, lacked fibrosin bodies, and showed angular/rectangular wrinkling of outer walls. Germ tubes were produced on the perihilar position of conidia. Primary conidia were apically conical, basally rounded to subtruncate, 32 to 42 × 14 to 18 μm, and thus generally smaller than the secondary conidia. The morphological characteristics are consistent with previous descriptions of Oidium hortensiae Jørst. (3,4). To confirm the identification, the complete internal transcribed spacer (ITS) region of rDNA from KUS-F25514 was amplified with primers ITS5 and P3 and directly sequenced. The resulting sequence of 694 bp was deposited in GenBank (Accession No. JQ669944). There was no ITS sequence data known from powdery mildews on Hydrangea. Therefore, this is the first sequence of O. hortensiae submitted to GenBank. Nevertheless, a GenBank BLAST search of this sequence showed >99% similarity with those of Oidium spp. recorded on crassulacean hosts (e.g. GenBank Accession Nos. EU185641 ex Sedum, EU185636 ex Echeveria, and EU185639 ex Dudleya) (2), suggesting their close phylogenetic relationship. Pathogenicity was confirmed through inoculation by gently pressing diseased leaves onto leaves of five healthy potted mophead hydrangea cv. Emerald plants. Five noninoculated plants of the same cultivar served as controls. Plants were maintained in a greenhouse at 22 ± 2°C. Inoculated plants developed signs and symptoms after 6 days, whereas the control plants remained healthy. The fungus present on the inoculated plants was morphologically identical to that originally observed on diseased plants, fulfilling Koch's postulates. Occurrence of powdery mildew disease on mophead hydrangea is circumglobal (1). To our knowledge, this is the first report of powdery mildew disease caused by O. hortensiae on mophead hydrangea in Korea. Powdery mildew infections in Korea pose a serious threat to the continued production of quality potted mophead hydrangea in polyethylene-film-covered greenhouses. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved March 19, 2012, from http://nt.ars-grin.gov/fungaldatabases/ . (2) B. Henricot. Plant Pathol. 57:779, 2008. (3) A. Schmidt and M. Scholler. Mycotaxon 115:287, 2011. (4) S. Tanda. J. Agric. Sci. Tokyo Univ. Agric. 43:253, 1999.",2012-07-01 +22914220,shortran: a pipeline for small RNA-seq data analysis.,"

Unlabelled

High-throughput sequencing currently generates a wealth of small RNA (sRNA) data, making data mining a topical issue. Processing of these large data sets is inherently multidimensional as length, abundance, sequence composition, and genomic location all hold clues to sRNA function. Analysis can be challenging because the formulation and testing of complex hypotheses requires combined use of visualization, annotation and abundance profiling. To allow flexible generation and querying of these disparate types of information, we have developed the shortran pipeline for analysis of plant or animal short RNA sequencing data. It comprises nine modules and produces both graphical and MySQL format output.

Availability

shortran is freely available and can be downloaded from http://users-mb.au.dk/pmgrp/shortran/.",2012-08-22 +21676921,Genome-wide analysis reveals the active roles of keratinocytes in oral mucosal adaptive immune response.,"To elucidate the roles of oral keratinocytes in the adaptive immune response of oral mucosa, global gene expression analysis was performed by microarray technique and integrating computational methods, including hierarchical clustering, biological process Gene Ontology analysis, Kyoto Encyclopedia of Genes and Genomes pathway analysis, self-organizing maps (SOMs) and biological association network analysis (BAN). Raw data from microarray experiments were uploaded to the Gene Expression Omnibus Database, http://www.ncbi.nlm.nih.gov/geo/ (GEO accession GSE28035). We identified 666 differentially expressed genes in the early stage (48 h) and 993 in the late stage (96 h) of the oral mucosal adaptive immune response. The analysis revealed that oral keratinocytes exerted diverse biological functions in different stages of immune response. Specifically, in 48 h the differentially expressed genes encompassed an array of biological ontology associated with immune response, such as antigen processing and presentation, and positive regulation of T-cell-mediated cytotoxicity. Several pathways which have been reported to be critical in inflammation, including mitogen-activated protein kinase pathway, were activated. Furthermore, after BAN construction, some putative hub genes and networks such as interleukin-1α and its subnetwork were recognized. Taken together, these results give substantial evidence to support the active roles of keratinocytes in the oral mucosal adaptive immune response.",2011-06-15 +25503918,"Incidence of sickle cell trait--United States, 2010.","Persons with sickle cell trait (SCT) are heterozygous carriers of an abnormal ß-globin gene that results in the production of an abnormal hemoglobin, Hb S, which can distort red blood cells (http://www.cdc.gov/ncbddd/sicklecell/facts.html). All state newborn screening (NBS) programs have provided universal sickle cell disease (SCD) screening for newborns since 2006. Screening for SCD detects both SCD and SCT. To obtain up-to-date measures of the occurrence of SCT among newborns by race/ethnicity and state of birth, data collected by state NBS programs in 2010 were examined. In 2010, the incidence of SCT in participating states was 15.5 per 1,000 newborns overall; 73.1 among black newborns and 6.9 among Hispanic newborns. Incidence by state ranged from 0.8 per 1,000 screened newborns in Montana to 34.1 per 1,000 in Mississippi. Although the occurrence of SCT varies greatly from state-to-state and among different races and ethnicities, every state and racial/ethnic population includes persons living with the condition. The period immediately following NBS is ideal for primary care providers and genetic counselors to begin educating the families of identified persons with SCT about potential health complications and reproductive considerations.",2014-12-01 +24817727,Crystallization and preliminary X-ray diffraction analysis of a novel β-L-arabinofuranosidase (HypBA1) from Bifidobacterium longum.,"The β-L-arabinofuranosidase (HypBA1) from Bifidobacterium longum JCM 1217 hydrolyzes the β-1,2-linked arabinofuranose disaccharide to release L-arabinoses. HypBA1 was classified into glycoside hydrolase family 127 (GH127) by the CAZy website (http://www.cazy.org/). The enzyme was expressed in Escherichia coli and the purified recombinant protein was crystallized. Crystals belonging to the primitive hexagonal space group P3x21, with unit-cell parameters a = b = 75.9, c = 254.0Å, were obtained by the sitting-drop vapour-diffusion method and diffracted to 2.78Å resolution. A BLASTP search (http://blast.ncbi.nlm.nih.gov/) of the Protein Data Bank did not reveal any similar crystal structures. Structural determination by using SeMet MAD and MIR methods is in progress.",2014-04-17 +23709496,PIUS: peptide identification by unbiased search.,"

Summary

We present PIUS, a tool that identifies peptides from tandem mass spectrometry data by analyzing the six-frame translation of a complete genome. It differs from earlier studies that have performed such a genomic search in two ways: (i) it considers a larger search space and (ii) it is designed for natural peptide identification rather than proteomics. Differently from other peptidomics tools designed for genome-wide searches, PIUS does not limit the analysis to a set of sequences that match a list of de novo reconstructions.

Availability

Source code, executables and a detailed technical report are freely available at http://dtai.cs.kuleuven.be/ml/systems/pius.

Contact

eduardo.costa@cs.kuleuven.be

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-24 +24559394,Health inequity in access to bariatric surgery: a protocol for a systematic review.,"

Background

Bariatric surgery is the only weight-loss treatment available that results in both sustained weight loss and improvements of obesity-related comorbidities. Individuals who meet the eligibility criteria for bariatric surgery are generally older, come from racial or ethnic minorities, are economically disadvantaged, and have low levels of education. However, the population who actually receives bariatric surgery does not reflect the individuals who need it the most. The objective is to conduct a systematic review of the literature exploring the inequities to the access of bariatric surgery.

Methods/design

EMBASE and Medline databases will be searched for observational studies that compared at least one of the PROGRESS-PLUS sociodemographic characteristics of patients eligible for bariatric surgery to those who actually received the procedure. Articles published in the year 1980 to present with no language restrictions will be included. For inclusion, studies must only include adults (≥18 years old) who meet National Institutes of Health (NIH) eligibility criteria for bariatric surgery defined as having either (1) a body mass index (BMI) of 40 kg/m² or greater; or (2) BMI of 35 kg/m² or greater with significant weight-related comorbidities. Eligible interventions will include malabsorptive, restrictive, and mixed bariatric procedures.

Discussion

There appears to be inequities in access to bariatric surgery. In order to resolve the health inequity in the treatment of obesity, a synthesis of the literature is needed to explore and identify barriers to accessing bariatric surgery. It is anticipated that the results from this systematic review will have important implications for advancing solutions to minimize inequities in the utilization of bariatric surgery. http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42013004920.",2014-02-21 +23709497,viRome: an R package for the visualization and analysis of viral small RNA sequence datasets.,"

Summary

RNA interference (RNAi) is known to play an important part in defence against viruses in a range of species. Second-generation sequencing technologies allow us to assay these systems and the small RNAs that play a key role with unprecedented depth. However, scientists need access to tools that can condense, analyse and display the resulting data. Here, we present viRome, a package for R that takes aligned sequence data and produces a range of essential plots and reports.

Availability and implementation

viRome is released under the BSD license as a package for R available for both Windows and Linux http://virome.sf.net. Additional information and a tutorial is available on the ARK-Genomics website: http://www.ark-genomics.org/bioinformatics/virome.

Contact

mick.watson@roslin.ed.ac.uk.",2013-05-24 +21989143,Accurate and fast estimation of taxonomic profiles from metagenomic shotgun sequences.,"

Background

A major goal of metagenomics is to characterize the microbial composition of an environment. The most popular approach relies on 16S rRNA sequencing, however this approach can generate biased estimates due to differences in the copy number of the gene between even closely related organisms, and due to PCR artifacts. The taxonomic composition can also be determined from metagenomic shotgun sequencing data by matching individual reads against a database of reference sequences. One major limitation of prior computational methods used for this purpose is the use of a universal classification threshold for all genes at all taxonomic levels.

Results

We propose that better classification results can be obtained by tuning the taxonomic classifier to each matching length, reference gene, and taxonomic level. We present a novel taxonomic classifier MetaPhyler (http://metaphyler.cbcb.umd.edu), which uses phylogenetic marker genes as a taxonomic reference. Results on simulated datasets demonstrate that MetaPhyler outperforms other tools commonly used in this context (CARMA, Megan and PhymmBL). We also present interesting results by analyzing a real metagenomic dataset.

Conclusions

We have introduced a novel taxonomic classification method for analyzing the microbial diversity from whole-metagenome shotgun sequences. Compared with previous approaches, MetaPhyler is much more accurate in estimating the phylogenetic composition. In addition, we have shown that MetaPhyler can be used to guide the discovery of novel organisms from metagenomic samples.",2011-07-27 +21790521,Critical incident reports concerning anaesthetic equipment: analysis of the UK National Reporting and Learning System (NRLS) data from 2006-2008*.,"Anaesthetic equipment plays a central role in anaesthetic practice but brings the potential for malfunction or misuse. We aimed to explore the national picture by reviewing patient safety incidents relating to anaesthetic equipment from the National Reporting and Learning System for England and Wales between 2006 and 2008. We searched the database using the system's own classification and by scrutinising the free text of relevant incidents. There were 1029 relevant incidents. Of these, 410 (39.8%) concerned patient monitoring, most commonly screen failure during anaesthesia, failure of one modality or failure to transfer data automatically from anaesthetic room to operating theatre. Problems relating to ventilators made up 185 (17.9%) of the reports. Sudden failures during anaesthesia accounted for 142 (13.8%) of these, with a further 10 cases (0.9%) where malfunction caused a sustained or increasing positive pressure in the patient's airway. Leaks made up 99 (9.6%) of incidents and 53 (5.2%) of incidents arose from the use of infusion pumps. Most (89%) of the incidents caused no patient harm; only 30 (2.9%) were judged to have led to moderate or severe harm. Although equipment was often faulty, user error or unfamiliarity also played a part. A large variety of causes led to a relatively small number of clinical scenarios, that anaesthetists should be ready, both individually and organisationally, to manage even when the cause is not apparent. We make recommendations for enhancing patient safety with respect to equipment. You can respond to this article at http://www.anaesthesiacorrespondence.com.",2011-07-25 +25113778,"Co-inheritance of the rare β hemoglobin variants Hb Yaounde, Hb Görwihl and Hb City of Hope with other alterations in globin genes: impact in genetic counseling.","

Purpose

Nearly 1183 different molecular defects of the globin genes leading to hemoglobin variants have been identified (http://globin.bx.psu.edu) over the past decades. The purpose of this study was to report three cases, never described in the literature, of co-inheritance of three β hemoglobin variants with other alterations in globin genes and to evaluate the clinical significance to conduct an appropriate genetic counseling.

Patients and methods

We report the molecular study performed in three probands and their families, sampling during the screening program conducted at the Laboratory for Molecular Prenatal Diagnosis of Hemoglobinopathies at Villa Sofia-Cervello Hospital in Palermo, Italy.

Results

This work allowed us to describe the co-inheritance of three rare β hemoglobin variants with other alterations in globin genes: the β hemoglobin variant Hb Yaounde [β134(H12)Val>Ala], found for the first time in combination with ααα(anti3.7) arrangement, and the β hemoglobin variants Hb Görwihl [β5(A2)Pro>Ala] and Hb City of Hope [β69(E13)Gly>Ser], found both in association with β(0) -thalassemia.

Conclusion

The present work emphasizes the importance of a careful evaluation of the hematological data, especially in cases of atypical hematological parameters, to carry out an adequate and complete molecular study and to formulate an appropriate genetic counseling for couples at risk.",2014-12-22 +23677614,aLeaves facilitates on-demand exploration of metazoan gene family trees on MAFFT sequence alignment server with enhanced interactivity.,"We report a new web server, aLeaves (http://aleaves.cdb.riken.jp/), for homologue collection from diverse animal genomes. In molecular comparative studies involving multiple species, orthology identification is the basis on which most subsequent biological analyses rely. It can be achieved most accurately by explicit phylogenetic inference. More and more species are subjected to large-scale sequencing, but the resultant resources are scattered in independent project-based, and multi-species, but separate, web sites. This complicates data access and is becoming a serious barrier to the comprehensiveness of molecular phylogenetic analysis. aLeaves, launched to overcome this difficulty, collects sequences similar to an input query sequence from various data sources. The collected sequences can be passed on to the MAFFT sequence alignment server (http://mafft.cbrc.jp/alignment/server/), which has been significantly improved in interactivity. This update enables to switch between (i) sequence selection using the Archaeopteryx tree viewer, (ii) multiple sequence alignment and (iii) tree inference. This can be performed as a loop until one reaches a sensible data set, which minimizes redundancy for better visibility and handling in phylogenetic inference while covering relevant taxa. The work flow achieved by the seamless link between aLeaves and MAFFT provides a convenient online platform to address various questions in zoology and evolutionary biology.",2013-05-15 +23435069,BioJS: an open source JavaScript framework for biological data visualization.,"

Summary

BioJS is an open-source project whose main objective is the visualization of biological data in JavaScript. BioJS provides an easy-to-use consistent framework for bioinformatics application programmers. It follows a community-driven standard specification that includes a collection of components purposely designed to require a very simple configuration and installation. In addition to the programming framework, BioJS provides a centralized repository of components available for reutilization by the bioinformatics community.

Availability and implementation

http://code.google.com/p/biojs/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-23 +25644270,Expitope: a web server for epitope expression.,"

Motivation

Adoptive T cell therapies based on introduction of new T cell receptors (TCRs) into patient recipient T cells is a promising new treatment for various kinds of cancers. A major challenge, however, is the choice of target antigens. If an engineered TCR can cross-react with self-antigens in healthy tissue, the side-effects can be devastating. We present the first web server for assessing epitope sharing when designing new potential lead targets. We enable the users to find all known proteins containing their peptide of interest. The web server returns not only exact matches, but also approximate ones, allowing a number of mismatches of the users choice. For the identified candidate proteins the expression values in various healthy tissues, representing all vital human organs, are extracted from RNA Sequencing (RNA-Seq) data as well as from some cancer tissues as control. All results are returned to the user sorted by a score, which is calculated using well-established methods and tools for immunological predictions. It depends on the probability that the epitope is created by proteasomal cleavage and its affinities to the transporter associated with antigen processing and the major histocompatibility complex class I alleles. With this framework, we hope to provide a helpful tool to exclude potential cross-reactivity in the early stage of TCR selection for use in design of adoptive T cell immunotherapy.

Availability and implementation

The Expitope web server can be accessed via http://webclu.bio.wzw.tum.de/expitope.",2015-02-01 +22922203,BIND - an algorithm for loss-less compression of nucleotide sequence data.,"Recent advances in DNA sequencing technologies have enabled the current generation of life science researchers to probe deeper into the genomic blueprint. The amount of data generated by these technologies has been increasing exponentially since the last decade. Storage, archival and dissemination of such huge data sets require efficient solutions, both from the hardware as well as software perspective. The present paper describes BIND-an algorithm specialized for compressing nucleotide sequence data. By adopting a unique 'block-length' encoding for representing binary data (as a key step), BIND achieves significant compression gains as compared to the widely used general purpose compression algorithms (gzip, bzip2 and lzma). Moreover, in contrast to implementations of existing specialized genomic compression approaches, the implementation of BIND is enabled to handle non-ATGC and lowercase characters. This makes BIND a loss-less compression approach that is suitable for practical use. More importantly, validation results of BIND (with real-world data sets) indicate reasonable speeds of compression and decompression that can be achieved with minimal processor/ memory usage. BIND is available for download at http://metagenomics.atc.tcs.com/compression/BIND. No license is required for academic or non-profit use.",2012-09-01 +24297251,"starBase v2.0: decoding miRNA-ceRNA, miRNA-ncRNA and protein-RNA interaction networks from large-scale CLIP-Seq data.","Although microRNAs (miRNAs), other non-coding RNAs (ncRNAs) (e.g. lncRNAs, pseudogenes and circRNAs) and competing endogenous RNAs (ceRNAs) have been implicated in cell-fate determination and in various human diseases, surprisingly little is known about the regulatory interaction networks among the multiple classes of RNAs. In this study, we developed starBase v2.0 (http://starbase.sysu.edu.cn/) to systematically identify the RNA-RNA and protein-RNA interaction networks from 108 CLIP-Seq (PAR-CLIP, HITS-CLIP, iCLIP, CLASH) data sets generated by 37 independent studies. By analyzing millions of RNA-binding protein binding sites, we identified ∼9000 miRNA-circRNA, 16 000 miRNA-pseudogene and 285,000 protein-RNA regulatory relationships. Moreover, starBase v2.0 has been updated to provide the most comprehensive CLIP-Seq experimentally supported miRNA-mRNA and miRNA-lncRNA interaction networks to date. We identified ∼10,000 ceRNA pairs from CLIP-supported miRNA target sites. By combining 13 functional genomic annotations, we developed miRFunction and ceRNAFunction web servers to predict the function of miRNAs and other ncRNAs from the miRNA-mediated regulatory networks. Finally, we developed interactive web implementations to provide visualization, analysis and downloading of the aforementioned large-scale data sets. This study will greatly expand our understanding of ncRNA functions and their coordinated regulatory networks.",2013-12-01 +22292669,TranscriptomeBrowser 3.0: introducing a new compendium of molecular interactions and a new visualization tool for the study of gene regulatory networks.,"

Background

Deciphering gene regulatory networks by in silico approaches is a crucial step in the study of the molecular perturbations that occur in diseases. The development of regulatory maps is a tedious process requiring the comprehensive integration of various evidences scattered over biological databases. Thus, the research community would greatly benefit from having a unified database storing known and predicted molecular interactions. Furthermore, given the intrinsic complexity of the data, the development of new tools offering integrated and meaningful visualizations of molecular interactions is necessary to help users drawing new hypotheses without being overwhelmed by the density of the subsequent graph.

Results

We extend the previously developed TranscriptomeBrowser database with a set of tables containing 1,594,978 human and mouse molecular interactions. The database includes: (i) predicted regulatory interactions (computed by scanning vertebrate alignments with a set of 1,213 position weight matrices), (ii) potential regulatory interactions inferred from systematic analysis of ChIP-seq experiments, (iii) regulatory interactions curated from the literature, (iv) predicted post-transcriptional regulation by micro-RNA, (v) protein kinase-substrate interactions and (vi) physical protein-protein interactions. In order to easily retrieve and efficiently analyze these interactions, we developed In-teractomeBrowser, a graph-based knowledge browser that comes as a plug-in for Transcriptome-Browser. The first objective of InteractomeBrowser is to provide a user-friendly tool to get new insight into any gene list by providing a context-specific display of putative regulatory and physical interactions. To achieve this, InteractomeBrowser relies on a ""cell compartments-based layout"" that makes use of a subset of the Gene Ontology to map gene products onto relevant cell compartments. This layout is particularly powerful for visual integration of heterogeneous biological information and is a productive avenue in generating new hypotheses. The second objective of InteractomeBrowser is to fill the gap between interaction databases and dynamic modeling. It is thus compatible with the network analysis software Cytoscape and with the Gene Interaction Network simulation software (GINsim). We provide examples underlying the benefits of this visualization tool for large gene set analysis related to thymocyte differentiation.

Conclusions

The InteractomeBrowser plugin is a powerful tool to get quick access to a knowledge database that includes both predicted and validated molecular interactions. InteractomeBrowser is available through the TranscriptomeBrowser framework and can be found at: http://tagc.univ-mrs.fr/tbrowser/. Our database is updated on a regular basis.",2012-01-31 +24075951,Xmipp 3.0: an improved software suite for image processing in electron microscopy.,"Xmipp is a specialized software package for image processing in electron microscopy, and that is mainly focused on 3D reconstruction of macromolecules through single-particles analysis. In this article we present Xmipp 3.0, a major release which introduces several improvements and new developments over the previous version. A central improvement is the concept of a project that stores the entire processing workflow from data import to final results. It is now possible to monitor, reproduce and restart all computing tasks as well as graphically explore the complete set of interrelated tasks associated to a given project. Other graphical tools have also been improved such as data visualization, particle picking and parameter ""wizards"" that allow the visual selection of some key parameters. Many standard image formats are transparently supported for input/output from all programs. Additionally, results have been standardized, facilitating the interoperation between different Xmipp programs. Finally, as a result of a large code refactoring, the underlying C++ libraries are better suited for future developments and all code has been optimized. Xmipp is an open-source package that is freely available for download from: http://xmipp.cnb.csic.es.",2013-09-26 +21785129,Characterization of Oncidium 'Gower Ramsey' transcriptomes using 454 GS-FLX pyrosequencing and their application to the identification of genes associated with flowering time.,"Oncidium 'Gower Ramsey' is a valuable and successful commercial orchid for the floriculture industry in Taiwan. However, no genome reference for entire sequences of the transcribed genes currently exists for Oncidium orchids, to facilitate the development of molecular biological studies and the breeding of these orchids. In this study, we generated Oncidium cDNA libraries for six different organs: leaves, pseudobulbs, young inflorescences, inflorescences, flower buds and mature flowers. We utilized 454-pyrosequencing technology to perform high-throughput deep sequencing of the Oncidium transcriptome, yielding >0.9 million reads with an average length of 328 bp, for a total of 301 million bases. De novo assembly of the sequences yielded 50,908 contig sequences with an average length of 493 bp from 796,463 reads and 120,219 singletons. The assembled sequences were annotated using BLAST, and a total of 12,757 and 13,931 unigene transcripts from the Arabidopsis and rice genomes were matched by TBLASTX, respectively. A Gene Ontology (GO) analysis of the annotated Oncidium contigs revealed that the majority of sequenced genes were associated with 'unknown molecular function', 'cellular process' and 'intracellular components'. Furthermore, a complete flowering-associated expressed sequence that included most of the genes in the photoperiod pathway and the 15 CONSTANS-LIKE (COL) homologs with the conserved CCT domain was obtained in this collection. These data revealed that the Oncidium expressed sequence tag (EST) database generated in this study has sufficient coverage to be used as a tool to investigate the flowering pathway and various other biological pathways in orchids. An OncidiumOrchidGenomeBase (OOGB) website has been constructed and is publicly available online (http://predictor.nchu.edu.tw/oogb/).",2011-07-23 +25428351,OrthoDB v8: update of the hierarchical catalog of orthologs and the underlying free software.,"Orthology, refining the concept of homology, is the cornerstone of evolutionary comparative studies. With the ever-increasing availability of genomic data, inference of orthology has become instrumental for generating hypotheses about gene functions crucial to many studies. This update of the OrthoDB hierarchical catalog of orthologs (http://www.orthodb.org) covers 3027 complete genomes, including the most comprehensive set of 87 arthropods, 61 vertebrates, 227 fungi and 2627 bacteria (sampling the most complete and representative genomes from over 11,000 available). In addition to the most extensive integration of functional annotations from UniProt, InterPro, GO, OMIM, model organism phenotypes and COG functional categories, OrthoDB uniquely provides evolutionary annotations including rates of ortholog sequence divergence, copy-number profiles, sibling groups and gene architectures. We re-designed the entirety of the OrthoDB website from the underlying technology to the user interface, enabling the user to specify species of interest and to select the relevant orthology level by the NCBI taxonomy. The text searches allow use of complex logic with various identifiers of genes, proteins, domains, ontologies or annotation keywords and phrases. Gene copy-number profiles can also be queried. This release comes with the freely available underlying ortholog clustering pipeline (http://www.orthodb.org/software).",2014-11-26 +24548788,DR-GAS: a database of functional genetic variants and their phosphorylation states in human DNA repair systems.,"We present DR-GAS(1), a unique, consolidated and comprehensive DNA repair genetic association studies database of human DNA repair system. It presents information on repair genes, assorted mechanisms of DNA repair, linkage disequilibrium, haplotype blocks, nsSNPs, phosphorylation sites, associated diseases, and pathways involved in repair systems. DNA repair is an intricate process which plays an essential role in maintaining the integrity of the genome by eradicating the damaging effect of internal and external changes in the genome. Hence, it is crucial to extensively understand the intact process of DNA repair, genes involved, non-synonymous SNPs which perhaps affect the function, phosphorylated residues and other related genetic parameters. All the corresponding entries for DNA repair genes, such as proteins, OMIM IDs, literature references and pathways are cross-referenced to their respective primary databases. DNA repair genes and their associated parameters are either represented in tabular or in graphical form through images elucidated by computational and statistical analyses. It is believed that the database will assist molecular biologists, biotechnologists, therapeutic developers and other scientific community to encounter biologically meaningful information, and meticulous contribution of genetic level information towards treacherous diseases in human DNA repair systems. DR-GAS is freely available for academic and research purposes at: http://www.bioinfoindia.org/drgas.",2014-02-16 +21515631,miRanalyzer: an update on the detection and analysis of microRNAs in high-throughput sequencing experiments.,"We present a new version of miRanalyzer, a web server and stand-alone tool for the detection of known and prediction of new microRNAs in high-throughput sequencing experiments. The new version has been notably improved regarding speed, scope and available features. Alignments are now based on the ultrafast short-read aligner Bowtie (granting also colour space support, allowing mismatches and improving speed) and 31 genomes, including 6 plant genomes, can now be analysed (previous version contained only 7). Differences between plant and animal microRNAs have been taken into account for the prediction models and differential expression of both, known and predicted microRNAs, between two conditions can be calculated. Additionally, consensus sequences of predicted mature and precursor microRNAs can be obtained from multiple samples, which increases the reliability of the predicted microRNAs. Finally, a stand-alone version of the miRanalyzer that is based on a local and easily customized database is also available; this allows the user to have more control on certain parameters as well as to use specific data such as unpublished assemblies or other libraries that are not available in the web server. miRanalyzer is available at http://bioinfo2.ugr.es/miRanalyzer/miRanalyzer.php.",2011-04-22 +24592289,Integrative immunoinformatics for Mycobacterial diseases in R platform.,"The sequencing of genomes of the pathogenic Mycobacterial species causing pulmonary and extrapulmonary tuberculosis, leprosy and other atypical mycobacterial infections, offer immense opportunities for discovering new therapeutics and identifying new vaccine candidates. Enhanced RV, which uses additional algorithms to Reverse Vaccinology (RV), has increased potential to reduce likelihood of undesirable features including allergenicity and immune cross reactivity to host. The starting point for MycobacRV database construction includes collection of known vaccine candidates and a set of predicted vaccine candidates identified from the whole genome sequences of 22 mycobacterium species and strains pathogenic to human and one non-pathogenic Mycobacterium tuberculosis H37Ra strain. These predicted vaccine candidates are the adhesins and adhesin-like proteins obtained using SPAAN at Pad > 0.6 and screening for putative extracellular or surface localization characteristics using PSORTb v.3.0 at very stringent cutoff. Subsequently, these protein sequences were analyzed through 21 publicly available algorithms to obtain Orthologs, Paralogs, BetaWrap Motifs, Transmembrane Domains, Signal Peptides, Conserved Domains, and similarity to human proteins, T cell epitopes, B cell epitopes, Discotopes and potential Allergens predictions. The Enhanced RV information was analysed in R platform through scripts following well structured decision trees to derive a set of nonredundant 233 most probable vaccine candidates. Additionally, the degree of conservation of potential epitopes across all orthologs has been obtained with reference to the M. tuberculosis H37Rv strain, the most commonly used strain in M. tuberculosis studies. Utilities for the vaccine candidate search and analysis of epitope conservation across the orthologs with reference to M. tuberculosis H37Rv strain are available in the mycobacrvR package in R platform accessible from the ""Download"" tab of MycobacRV webserver. MycobacRV an immunoinformatics database of known and predicted mycobacterial vaccine candidates has been developed and is freely available at http://mycobacteriarv.igib.res.in.",2014-02-15 +24532766,MEMOSys 2.0: an update of the bioinformatics database for genome-scale models and genomic data.,"The MEtabolic MOdel research and development System (MEMOSys) is a versatile database for the management, storage and development of genome-scale models (GEMs). Since its initial release, the database has undergone major improvements, and the new version introduces several new features. First, the novel concept of derived models allows users to create model hierarchies that automatically propagate modifications along their order. Second, all stored components can now be easily enhanced with additional annotations that can be directly extracted from a supplied Systems Biology Markup Language (SBML) file. Third, the web application has been substantially revised and now features new query mechanisms, an easy search system for reactions and new link-out services to publicly available databases. Fourth, the updated database now contains 20 publicly available models, which can be easily exported into standardized formats for further analysis. Fifth, MEMOSys 2.0 is now also available as a fully configured virtual image and can be found online at http://www.icbi.at/memosys and http://memoys.i-med.ac.at. Database URL: http://memosys.i-med.ac.at.",2014-02-14 +24532728,GUILDify: a web server for phenotypic characterization of genes through biological data integration and network-based prioritization algorithms.,"

Summary

Determining genetic factors underlying various phenotypes is hindered by the involvement of multiple genes acting cooperatively. Over the past years, disease-gene prioritization has been central to identify genes implicated in human disorders. Special attention has been paid on using physical interactions between the proteins encoded by the genes to link them with diseases. Such methods exploit the guilt-by-association principle in the protein interaction network to uncover novel disease-gene associations. These methods rely on the proximity of a gene in the network to the genes associated with a phenotype and require a set of initial associations. Here, we present GUILDify, an easy-to-use web server for the phenotypic characterization of genes. GUILDify offers a prioritization approach based on the protein-protein interaction network where the initial phenotype-gene associations are retrieved via free text search on biological databases. GUILDify web server does not restrict the prioritization to any predefined phenotype, supports multiple species and accepts user-specified genes. It also prioritizes drugs based on the ranking of their targets, unleashing opportunities for repurposing drugs for novel therapies.

Availability and implementation

Available online at http://sbi.imim.es/GUILDify.php",2014-02-14 +23209027,miRNA target enrichment analysis reveals directly active miRNAs in health and disease.,"microRNAs (miRNAs) are short non-coding regulatory RNA molecules. The activity of a miRNA in a biological process can often be reflected in the expression program that characterizes the outcome of the activity. We introduce a computational approach that infers such activity from high-throughput data using a novel statistical methodology, called minimum-mHG (mmHG), that examines mutual enrichment in two ranked lists. Based on this methodology, we provide a user-friendly web application that supports the statistical assessment of miRNA target enrichment analysis (miTEA) in the top of a ranked list of genes or proteins. Using miTEA, we analyze several target prediction tools by examining performance on public miRNA constitutive expression data. We also apply miTEA to analyze several integrative biology data sets, including a novel matched miRNA/mRNA data set covering nine human tissue types. Our novel findings include proposed direct activity of miR-519 in placenta, a direct activity of the oncogenic miR-15 in different healthy tissue types and a direct activity of the poorly characterized miR-768 in both healthy tissue types and cancer cell lines. The miTEA web application is available at http://cbl-gorilla.cs.technion.ac.il/miTEA/.",2012-12-02 +23047557,SCALCE: boosting sequence compression algorithms using locally consistent encoding.,"

Motivation

The high throughput sequencing (HTS) platforms generate unprecedented amounts of data that introduce challenges for the computational infrastructure. Data management, storage and analysis have become major logistical obstacles for those adopting the new platforms. The requirement for large investment for this purpose almost signalled the end of the Sequence Read Archive hosted at the National Center for Biotechnology Information (NCBI), which holds most of the sequence data generated world wide. Currently, most HTS data are compressed through general purpose algorithms such as gzip. These algorithms are not designed for compressing data generated by the HTS platforms; for example, they do not take advantage of the specific nature of genomic sequence data, that is, limited alphabet size and high similarity among reads. Fast and efficient compression algorithms designed specifically for HTS data should be able to address some of the issues in data management, storage and communication. Such algorithms would also help with analysis provided they offer additional capabilities such as random access to any read and indexing for efficient sequence similarity search. Here we present SCALCE, a 'boosting' scheme based on Locally Consistent Parsing technique, which reorganizes the reads in a way that results in a higher compression speed and compression rate, independent of the compression algorithm in use and without using a reference genome.

Results

Our tests indicate that SCALCE can improve the compression rate achieved through gzip by a factor of 4.19-when the goal is to compress the reads alone. In fact, on SCALCE reordered reads, gzip running time can improve by a factor of 15.06 on a standard PC with a single core and 6 GB memory. Interestingly even the running time of SCALCE + gzip improves that of gzip alone by a factor of 2.09. When compared with the recently published BEETL, which aims to sort the (inverted) reads in lexicographic order for improving bzip2, SCALCE + gzip provides up to 2.01 times better compression while improving the running time by a factor of 5.17. SCALCE also provides the option to compress the quality scores as well as the read names, in addition to the reads themselves. This is achieved by compressing the quality scores through order-3 Arithmetic Coding (AC) and the read names through gzip through the reordering SCALCE provides on the reads. This way, in comparison with gzip compression of the unordered FASTQ files (including reads, read names and quality scores), SCALCE (together with gzip and arithmetic encoding) can provide up to 3.34 improvement in the compression rate and 1.26 improvement in running time.

Availability

Our algorithm, SCALCE (Sequence Compression Algorithm using Locally Consistent Encoding), is implemented in C++ with both gzip and bzip2 compression options. It also supports multithreading when gzip option is selected, and the pigz binary is available. It is available at http://scalce.sourceforge.net.

Contact

fhach@cs.sfu.ca or cenk@cs.sfu.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-09 +25123902,Cross-validation under separate sampling: strong bias and how to correct it.,"

Motivation

It is commonly assumed in pattern recognition that cross-validation error estimation is 'almost unbiased' as long as the number of folds is not too small. While this is true for random sampling, it is not true with separate sampling, where the populations are independently sampled, which is a common situation in bioinformatics.

Results

We demonstrate, via analytical and numerical methods, that classical cross-validation can have strong bias under separate sampling, depending on the difference between the sampling ratios and the true population probabilities. We propose a new separate-sampling cross-validation error estimator, and prove that it satisfies an 'almost unbiased' theorem similar to that of random-sampling cross-validation. We present two case studies with previously published data, which show that the results can change drastically if the correct form of cross-validation is used.

Availability and implementation

The source code in C++, along with the Supplementary Materials, is available at: http://gsp.tamu.edu/Publications/supplementary/zollanvari13/.",2014-08-13 +25361964,iPro54-PseKNC: a sequence-based predictor for identifying sigma-54 promoters in prokaryote with pseudo k-tuple nucleotide composition.,"The σ(54) promoters are unique in prokaryotic genome and responsible for transcripting carbon and nitrogen-related genes. With the avalanche of genome sequences generated in the postgenomic age, it is highly desired to develop automated methods for rapidly and effectively identifying the σ(54) promoters. Here, a predictor called 'iPro54-PseKNC' was developed. In the predictor, the samples of DNA sequences were formulated by a novel feature vector called 'pseudo k-tuple nucleotide composition', which was further optimized by the incremental feature selection procedure. The performance of iPro54-PseKNC was examined by the rigorous jackknife cross-validation tests on a stringent benchmark data set. As a user-friendly web-server, iPro54-PseKNC is freely accessible at http://lin.uestc.edu.cn/server/iPro54-PseKNC. For the convenience of the vast majority of experimental scientists, a step-by-step protocol guide was provided on how to use the web-server to get the desired results without the need to follow the complicated mathematics that were presented in this paper just for its integrity. Meanwhile, we also discovered through an in-depth statistical analysis that the distribution of distances between the transcription start sites and the translation initiation sites were governed by the gamma distribution, which may provide a fundamental physical principle for studying the σ(54) promoters.",2014-10-31 +23846746,Inference of alternative splicing from RNA-Seq data with probabilistic splice graphs.,"

Motivation

Alternative splicing and other processes that allow for different transcripts to be derived from the same gene are significant forces in the eukaryotic cell. RNA-Seq is a promising technology for analyzing alternative transcripts, as it does not require prior knowledge of transcript structures or genome sequences. However, analysis of RNA-Seq data in the presence of genes with large numbers of alternative transcripts is currently challenging due to efficiency, identifiability and representation issues.

Results

We present RNA-Seq models and associated inference algorithms based on the concept of probabilistic splice graphs, which alleviate these issues. We prove that our models are often identifiable and demonstrate that our inference methods for quantification and differential processing detection are efficient and accurate.

Availability

Software implementing our methods is available at http://deweylab.biostat.wisc.edu/psginfer.

Contact

cdewey@biostat.wisc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-11 +22877864,PPfold 3.0: fast RNA secondary structure prediction using phylogeny and auxiliary data.,"

Unlabelled

PPfold is a multi-threaded implementation of the Pfold algorithm for RNA secondary structure prediction. Here we present a new version of PPfold, which extends the evolutionary analysis with a flexible probabilistic model for incorporating auxiliary data, such as data from structure probing experiments. Our tests show that the accuracy of single-sequence secondary structure prediction using experimental data in PPfold 3.0 is comparable to RNAstructure. Furthermore, alignment structure prediction quality is improved even further by the addition of experimental data. PPfold 3.0 therefore has the potential of producing more accurate predictions than it was previously possible.

Availability and implementation

PPfold 3.0 is available as a platform-independent Java application and can be downloaded from http://birc.au.dk/software/ppfold.",2012-08-09 +23479350,NTFD--a stand-alone application for the non-targeted detection of stable isotope-labeled compounds in GC/MS data.,"

Summary

Most current stable isotope-based methodologies are targeted and focus only on the well-described aspects of metabolic networks. Here, we present NTFD (non-targeted tracer fate detection), a software for the non-targeted analysis of all detectable compounds derived from a stable isotope-labeled tracer present in a GC/MS dataset. In contrast to traditional metabolic flux analysis approaches, NTFD does not depend on any a priori knowledge or library information. To obtain dynamic information on metabolic pathway activity, NTFD determines mass isotopomer distributions for all detected and labeled compounds. These data provide information on relative fluxes in a metabolic network. The graphical user interface allows users to import GC/MS data in netCDF format and export all information into a tab-separated format.

Availability

NTFD is C++- and Qt4-based, and it is freely available under an open-source license. Pre-compiled packages for the installation on Debian- and Redhat-based Linux distributions, as well as Windows operating systems, along with example data, are provided for download at http://ntfd.mit.edu/.",2013-03-11 +25123899,NetComm: a network analysis tool based on communicability.,"

Motivation

Set-based network similarity metrics are increasingly used to productively analyze genome-wide data. Conventional approaches, such as mean shortest path and clique-based metrics, have been useful but are not well suited to all applications. Computational scientists in other disciplines have developed communicability as a complementary metric. Network communicability considers all paths of all lengths between two network members. Given the success of previous network analyses of protein-protein interactions, we applied the concepts of network communicability to this problem. Here we show that our communicability implementation has advantages over traditional approaches. Overall, analyses suggest network communicability has considerable utility in analysis of large-scale biological networks.

Availability and implementation

We provide our method as an R package for use in both human protein-protein interaction network analyses and analyses of arbitrary networks along with a tutorial at http://www.shawlab.org/NetComm/.",2014-08-13 +23314324,NGSUtils: a software suite for analyzing and manipulating next-generation sequencing datasets.,"

Summary

NGSUtils is a suite of software tools for manipulating data common to next-generation sequencing experiments, such as FASTQ, BED and BAM format files. These tools provide a stable and modular platform for data management and analysis.

Availability and implementation

NGSUtils is available under a BSD license and works on Mac OS X and Linux systems. Python 2.6+ and virtualenv are required. More information and source code may be obtained from the website: http://ngsutils.org.",2013-01-12 +23271269,Inference of gene regulatory networks from genome-wide knockout fitness data.,"

Motivation

Genome-wide fitness is an emerging type of high-throughput biological data generated for individual organisms by creating libraries of knockouts, subjecting them to broad ranges of environmental conditions, and measuring the resulting clone-specific fitnesses. Since fitness is an organism-scale measure of gene regulatory network behaviour, it may offer certain advantages when insights into such phenotypical and functional features are of primary interest over individual gene expression. Previous works have shown that genome-wide fitness data can be used to uncover novel gene regulatory interactions, when compared with results of more conventional gene expression analysis. Yet, to date, few algorithms have been proposed for systematically using genome-wide mutant fitness data for gene regulatory network inference.

Results

In this article, we describe a model and propose an inference algorithm for using fitness data from knockout libraries to identify underlying gene regulatory networks. Unlike most prior methods, the presented approach captures not only structural, but also dynamical and non-linear nature of biomolecular systems involved. A state-space model with non-linear basis is used for dynamically describing gene regulatory networks. Network structure is then elucidated by estimating unknown model parameters. Unscented Kalman filter is used to cope with the non-linearities introduced in the model, which also enables the algorithm to run in on-line mode for practical use. Here, we demonstrate that the algorithm provides satisfying results for both synthetic data as well as empirical measurements of GAL network in yeast Saccharomyces cerevisiae and TyrR-LiuR network in bacteria Shewanella oneidensis.

Availability

MATLAB code and datasets are available to download at http://www.duke.edu/∼lw174/Fitness.zip and http://genomics.lbl.gov/supplemental/fitness-bioinf/",2012-12-27 +24813542,"SNPsea: an algorithm to identify cell types, tissues and pathways affected by risk loci.","

Unlabelled

We created a fast, robust and general C+ + implementation of a single-nucleotide polymorphism (SNP) set enrichment algorithm to identify cell types, tissues and pathways affected by risk loci. It tests trait-associated genomic loci for enrichment of specificity to conditions (cell types, tissues and pathways). We use a non-parametric statistical approach to compute empirical P-values by comparison with null SNP sets. As a proof of concept, we present novel applications of our method to four sets of genome-wide significant SNPs associated with red blood cell count, multiple sclerosis, celiac disease and HDL cholesterol.

Availability and implementation

http://broadinstitute.org/mpg/snpsea.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-10 +23632163,The Genomic HyperBrowser: an analysis web server for genome-scale data.,"The immense increase in availability of genomic scale datasets, such as those provided by the ENCODE and Roadmap Epigenomics projects, presents unprecedented opportunities for individual researchers to pose novel falsifiable biological questions. With this opportunity, however, researchers are faced with the challenge of how to best analyze and interpret their genome-scale datasets. A powerful way of representing genome-scale data is as feature-specific coordinates relative to reference genome assemblies, i.e. as genomic tracks. The Genomic HyperBrowser (http://hyperbrowser.uio.no) is an open-ended web server for the analysis of genomic track data. Through the provision of several highly customizable components for processing and statistical analysis of genomic tracks, the HyperBrowser opens for a range of genomic investigations, related to, e.g., gene regulation, disease association or epigenetic modifications of the genome.",2013-04-30 +24715956,Cyrface: An interface from Cytoscape to R that provides a user interface to R packages.,"There is an increasing number of software packages to analyse biological experimental data in the R environment. In particular, Bioconductor, a repository of curated R packages, is one of the most comprehensive resources for bioinformatics and biostatistics. The use of these packages is increasing, but it requires a basic understanding of the R language, as well as the syntax of the specific package used. The availability of user graphical interfaces for these packages would decrease the learning curve and broaden their application. Here, we present a Cytoscape app termed Cyrface that allows Cytoscape apps to connect to any function and package developed in R. Cyrface can be used to run R packages from within the Cytoscape environment making use of a graphical user interface. Moreover, it can link R packages with the capabilities of Cytoscape and its apps, in particular network visualization and analysis. Cyrface's utility has been demonstrated for two Bioconductor packages ( CellNOptR and DrugVsDisease), and here we further illustrate its usage by implementing a workflow of data analysis and visualization. Download links, installation instructions and user guides can be accessed from the Cyrface's homepage ( http://www.ebi.ac.uk/saezrodriguez/cyrface/) and from the Cytoscape app store ( http://apps.cytoscape.org/apps/cyrface).",2013-09-19 +22654753,"EEGVIS: A MATLAB Toolbox for Browsing, Exploring, and Viewing Large Datasets.","Recent advances in data monitoring and sensor technology have accelerated the acquisition of very large data sets. Streaming data sets from instrumentation such as multi-channel EEG recording usually must undergo substantial pre-processing and artifact removal. Even when using automated procedures, most scientists engage in laborious manual examination and processing to assure high quality data and to indentify interesting or problematic data segments. Researchers also do not have a convenient method of method of visually assessing the effects of applying any stage in a processing pipeline. EEGVIS is a MATLAB toolbox that allows users to quickly explore multi-channel EEG and other large array-based data sets using multi-scale drill-down techniques. Customizable summary views reveal potentially interesting sections of data, which users can explore further by clicking to examine using detailed viewing components. The viewer and a companion browser are built on our MoBBED framework, which has a library of modular viewing components that can be mixed and matched to best reveal structure. Users can easily create new viewers for their specific data without any programming during the exploration process. These viewers automatically support pan, zoom, resizing of individual components, and cursor exploration. The toolbox can be used directly in MATLAB at any stage in a processing pipeline, as a plug-in for EEGLAB, or as a standalone precompiled application without MATLAB running. EEGVIS and its supporting packages are freely available under the GNU general public license at http://visual.cs.utsa.edu/eegvis.",2012-05-28 +24723761,MidMedPol: Polychaetes from midlittoral rocky shores in Greece and Italy (Mediterranean Sea).,"This paper describes a dataset of polychaetes (Annelida) from 14 midlittoral rocky shore sampling sites in Greece and Italy (Mediterranean Sea). The dataset combines the outcome of four different projects studying the hard substrate midlittoral zone in the Mediterranean between 1984 and 2009. Samples were collected by scraping and collecting the organisms from a framed area. The maximal sampling depth was 1.5 m. In total, 123 polychaete species were recorded, five of which are new records for the respective biogeographic sectors of the Mediterranean. The dataset contains 788 occurrence records, fully annotated with all required metadata. These data contribute to the knowledge of a previously very understudied regional habitat, since at present, comprehensive lists of the midlittoral communities in the Mediterranean are provided through only a few, paper-based, studies. This dataset is one of the first electronic data compilations of the Mediterranean midlittoral zone communities and certainly the most comprehensive of its kind, contributing to the ongoing efforts of the Ocean Biogeographic Information System (OBIS) which aims at filling the gaps in our current knowledge of the world's oceans. It is accessible at http://ipt.vliz.be/resource.do?r=mediterraneanpolychaetaintertidal.",2013-09-16 +25890236,Improved survival among colon cancer patients with increased differentially expressed pathways.,"

Background

Studies of colorectal cancer (CRC) have shown that hundreds to thousands of genes are differentially expressed in tumors when compared to normal tissue samples. In this study, we evaluate how genes that are differentially expressed in colon versus normal tissue influence survival.

Methods

We performed RNA-seq on tumor/normal paired samples from 175 colon cancer patients. We implemented a cross validation strategy to determine genes that were significantly differentially expressed between tumor and normal samples. Differentially expressed genes were evaluated with Ingenuity Pathway Analysis to identify key pathways that were de-regulated. A summary differential pathway expression score (DPES) was developed to summarize hazard of dying while adjusting for age, American Joint Committee on Cancer (AJCC) stage, sex, and tumor molecular phenotype, i.e., MSI, TP53, KRAS, and CIMP.

Results

A total of 1,138 genes were up-regulated and 695 were down-regulated. These de-regulated genes were enriched for 19 Ingenuity Canonical Pathways, with the most significant pathways involving cell signaling and growth. Of the enriched pathways, 16 were significantly associated with CRC-specific mortality, including 1 metabolic pathway and 15 signaling pathways. In all instances, having a higher DPES (i.e., more de-regulated genes) was associated with better survival. Further assessment showed that individuals diagnosed at AJCC Stage 1 had more de-regulated genes than individuals diagnosed at AJCC Stage 4.

Conclusions

Our data suggest that having more de-regulated pathways is associated with a good prognosis and may be a reaction to key events that are disabling to tumor progression. Please see related article: http://dx.doi.org/10.1186/s12916-015-0307-6 .",2015-04-08 +23969136,Genome compression: a novel approach for large collections.,"

Motivation

Genomic repositories are rapidly growing, as witnessed by the 1000 Genomes or the UK10K projects. Hence, compression of multiple genomes of the same species has become an active research area in the past years. The well-known large redundancy in human sequences is not easy to exploit because of huge memory requirements from traditional compression algorithms.

Results

We show how to obtain several times higher compression ratio than of the best reported results, on two large genome collections (1092 human and 775 plant genomes). Our inputs are variant call format files restricted to their essential fields. More precisely, our novel Ziv-Lempel-style compression algorithm squeezes a single human genome to ∼400 KB. The key to high compression is to look for similarities across the whole collection, not just against one reference sequence, what is typical for existing solutions.

Availability

http://sun.aei.polsl.pl/tgc (also as Supplementary Material) under a free license. Supplementary data: Supplementary data are available at Bioinformatics online.",2013-08-21 +23268441,GBSA: a comprehensive software for analysing whole genome bisulfite sequencing data.,"High-throughput sequencing is increasingly being used in combination with bisulfite (BS) assays to study DNA methylation at nucleotide resolution. Although several programmes provide genome-wide alignment of BS-treated reads, the resulting information is not readily interpretable and often requires further bioinformatic steps for meaningful analysis. Current post-alignment BS-sequencing programmes are generally focused on the gene-specific level, a restrictive feature when analysis in the non-coding regions, such as enhancers and intergenic microRNAs, is required. Here, we present Genome Bisulfite Sequencing Analyser (GBSA-http://ctrad-csi.nus.edu.sg/gbsa), a free open-source software capable of analysing whole-genome bisulfite sequencing data with either a gene-centric or gene-independent focus. Through analysis of the largest published data sets to date, we demonstrate GBSA's features in providing sequencing quality assessment, methylation scoring, functional data management and visualization of genomic methylation at nucleotide resolution. Additionally, we show that GBSA's output can be easily integrated with other high-throughput sequencing data, such as RNA-Seq or ChIP-seq, to elucidate the role of methylated intergenic regions in gene regulation. In essence, GBSA allows an investigator to explore not only known loci but also all the genomic regions, for which methylation studies could lead to the discovery of new regulatory mechanisms.",2012-12-24 +27485774,LECTINPred: web Server that Uses Complex Networks of Protein Structure for Prediction of Lectins with Potential Use as Cancer Biomarkers or in Parasite Vaccine Design.,"Lectins (Ls) play an important role in many diseases such as different types of cancer, parasitic infections and other diseases. Interestingly, the Protein Data Bank (PDB) contains +3000 protein 3D structures with unknown function. Thus, we can in principle, discover new Ls mining non-annotated structures from PDB or other sources. However, there are no general models to predict new biologically relevant Ls based on 3D chemical structures. We used the MARCH-INSIDE software to calculate the Markov-Shannon 3D electrostatic entropy parameters for the complex networks of protein structure of 2200 different protein 3D structures, including 1200 Ls. We have performed a Linear Discriminant Analysis (LDA) using these parameters as inputs in order to seek a new Quantitative Structure-Activity Relationship (QSAR) model, which is able to discriminate 3D structure of Ls from other proteins. We implemented this predictor in the web server named LECTINPred, freely available at http://bio-aims.udc.es/LECTINPred.php. This web server showed the following goodness-of-fit statistics: Sensitivity=96.7 % (for Ls), Specificity=87.6 % (non-active proteins), and Accuracy=92.5 % (for all proteins), considering altogether both the training and external prediction series. In mode 2, users can carry out an automatic retrieval of protein structures from PDB. We illustrated the use of this server, in operation mode 1, performing a data mining of PDB. We predicted Ls scores for +2000 proteins with unknown function and selected the top-scored ones as possible lectins. In operation mode 2, LECTINPred can also upload 3D structural models generated with structure-prediction tools like LOMETS or PHYRE2. The new Ls are expected to be of relevance as cancer biomarkers or useful in parasite vaccine design.",2014-03-18 +25707434,Integrating multiple networks for protein function prediction.,"

Background

High throughput techniques produce multiple functional association networks. Integrating these networks can enhance the accuracy of protein function prediction. Many algorithms have been introduced to generate a composite network, which is obtained as a weighted sum of individual networks. The weight assigned to an individual network reflects its benefit towards the protein functional annotation inference. A classifier is then trained on the composite network for predicting protein functions. However, since these techniques model the optimization of the composite network and the prediction tasks as separate objectives, the resulting composite network is not necessarily optimal for the follow-up protein function prediction.

Results

We address this issue by modeling the optimization of the composite network and the prediction problems within a unified objective function. In particular, we use a kernel target alignment technique and the loss function of a network based classifier to jointly adjust the weights assigned to the individual networks. We show that the proposed method, called MNet, can achieve a performance that is superior (with respect to different evaluation criteria) to related techniques using the multiple networks of four example species (yeast, human, mouse, and fly) annotated with thousands (or hundreds) of GO terms.

Conclusion

MNet can effectively integrate multiple networks for protein function prediction and is robust to the input parameters. Supplementary data is available at https://sites.google.com/site/guoxian85/home/mnet. The Matlab code of MNet is available upon request.",2015-01-21 +23471519,An Efficient Dynamic Programming Algorithm for Phosphorylation Site Assignment of Large-Scale Mass Spectrometry Data.,"Phosphorylation site assignment of large-scale data from high throughput tandem mass spectrometry (LC-MS/MS) data is an important aspect of phosphoproteomics. Correct assignment of phosphorylated residue(s) is important for functional interpretation of the data within a biological context. Common search algorithms (Sequest etc.) for mass spectrometry data are not designed for accurate site assignment; thus, additional algorithms are needed. In this paper, we propose a linear-time and linear-space dynamic programming strategy for phosphorylation site assignment. The algorithm, referred to as PhosSA, optimizes the objective function defined as the summation of peak intensities that are associated with theoretical phosphopeptide fragmentation ions. Quality control is achieved through the use of a post-processing criteria whose value is indicative of the signal-to-noise (S/N) properties and redundancy of the fragmentation spectra. The algorithm is tested using experimentally generated data sets of peptides with known phosphorylation sites while varying the fragmentation strategy (CID or HCD) and molar amounts of the peptides. The algorithm is also compatible with various peptide labeling strategies including SILAC and iTRAQ. PhosSA is shown to achieve > 99% accuracy with a high degree of sensitivity. The algorithm is extremely fast and scalable (able to process up to 0.5 million peptides in an hour). The implemented algorithm is freely available at http://helixweb.nih.gov/ESBL/PhosSA/ for academic purposes.",2012-10-01 +25891925,Structural abnormalities in benign childhood epilepsy with centrotemporal spikes (BCECTS).,"

Purpose

The aim of this study was to investigate cortical thickness and gray matter volume abnormalities in benign childhood epilepsy with centrotemporal spikes (BCECTS). We additionally assessed the effects of comorbid attention-deficit/hyperactivity (ADHD) on these abnormalities.

Methods

Surface and volumetric MR imaging data of children with newly diagnosed BCECTS (n = 20, 14 males) and age-matched healthy controls (n = 20) were analyzed using FreeSurfer (version 5.3.0, https://surfer.nmr.mgh.harvard.edu). An additional comparison was performed between BCECTS children with and without ADHD (each, n = 8). A group comparison was carried out using an analysis of covariance with a value of significance set as p < 0.01 or p < 0.05.

Results

Children with BCECTS had significantly thicker right superior frontal, superior temporal, middle temporal, and left pars triangularis cortices. Voxel-based morphometric analysis revealed significantly larger cortical gray matter volumes of the right precuneus, left orbitofrontal, pars orbitalis, precentral gyri, and bilateral putamen and the amygdala of children with BCECTS compared to healthy controls. BCECTS patients with ADHD had significantly thicker left caudal anterior and posterior cingulate gyri and a significantly larger left pars opercularis gyral volume compared to BCECTS patients without ADHD.

Conclusion

Children with BCECTS have thicker or larger gray matters in the corticostriatal circuitry at the onset of epilepsy. Comorbid ADHD is also associated with structural aberrations. These findings suggest structural disruptions of the brain network are associated with specific developmental electro-clinical syndromes.",2015-03-01 +22568834,DIPSBC--data integration platform for systems biology collaborations.,"

Background

Modern biomedical research is often organized in collaborations involving labs worldwide. In particular in systems biology, complex molecular systems are analyzed that require the generation and interpretation of heterogeneous data for their explanation, for example ranging from gene expression studies and mass spectrometry measurements to experimental techniques for detecting molecular interactions and functional assays. XML has become the most prominent format for representing and exchanging these data. However, besides the development of standards there is still a fundamental lack of data integration systems that are able to utilize these exchange formats, organize the data in an integrative way and link it with applications for data interpretation and analysis.

Results

We have developed DIPSBC, an interactive data integration platform supporting collaborative research projects, based on Foswiki, Solr/Lucene, and specific helper applications. We describe the main features of the implementation and highlight the performance of the system with several use cases. All components of the system are platform independent and open-source developments and thus can be easily adopted by researchers. An exemplary installation of the platform which also provides several helper applications and detailed instructions for system usage and setup is available at http://dipsbc.molgen.mpg.de.

Conclusions

DIPSBC is a data integration platform for medium-scale collaboration projects that has been tested already within several research collaborations. Because of its modular design and the incorporation of XML data formats it is highly flexible and easy to use.",2012-05-08 +23706020,Unsupervised genome-wide recognition of local relationship patterns.,"

Background

Phenomena such as incomplete lineage sorting, horizontal gene transfer, gene duplication and subsequent sub- and neo-functionalisation can result in distinct local phylogenetic relationships that are discordant with species phylogeny. In order to assess the possible biological roles for these subdivisions, they must first be identified and characterised, preferably on a large scale and in an automated fashion.

Results

We developed Saguaro, a combination of a Hidden Markov Model (HMM) and a Self Organising Map (SOM), to characterise local phylogenetic relationships among aligned sequences using cacti, matrices of pair-wise distance measures. While the HMM determines the genomic boundaries from aligned sequences, the SOM hypothesises new cacti in an unsupervised and iterative fashion based on the regions that were modelled least well by existing cacti. After testing the software on simulated data, we demonstrate the utility of Saguaro by testing two different data sets: (i) 181 Dengue virus strains, and (ii) 5 primate genomes. Saguaro identifies regions under lineage-specific constraint for the first set, and genomic segments that we attribute to incomplete lineage sorting in the second dataset. Intriguingly for the primate data, Saguaro also classified an additional ~3% of the genome as most incompatible with the expected species phylogeny. A substantial fraction of these regions was found to overlap genes associated with both the innate and adaptive immune systems.

Conclusions

Saguaro detects distinct cacti describing local phylogenetic relationships without requiring any a priori hypotheses. We have successfully demonstrated Saguaro's utility with two contrasting data sets, one containing many members with short sequences (Dengue viral strains: n = 181, genome size = 10,700 nt), and the other with few members but complex genomes (related primate species: n = 5, genome size = 3 Gb), suggesting that the software is applicable to a wide variety of experimental populations. Saguaro is written in C++, runs on the Linux operating system, and can be downloaded from http://saguarogw.sourceforge.net/.",2013-05-24 +22192482,Predicting RNA-protein interactions using only sequence information.,"

Background

RNA-protein interactions (RPIs) play important roles in a wide variety of cellular processes, ranging from transcriptional and post-transcriptional regulation of gene expression to host defense against pathogens. High throughput experiments to identify RNA-protein interactions are beginning to provide valuable information about the complexity of RNA-protein interaction networks, but are expensive and time consuming. Hence, there is a need for reliable computational methods for predicting RNA-protein interactions.

Results

We propose RPISeq, a family of classifiers for predicting RNA-protein interactions using only sequence information. Given the sequences of an RNA and a protein as input, RPIseq predicts whether or not the RNA-protein pair interact. The RNA sequence is encoded as a normalized vector of its ribonucleotide 4-mer composition, and the protein sequence is encoded as a normalized vector of its 3-mer composition, based on a 7-letter reduced alphabet representation. Two variants of RPISeq are presented: RPISeq-SVM, which uses a Support Vector Machine (SVM) classifier and RPISeq-RF, which uses a Random Forest classifier. On two non-redundant benchmark datasets extracted from the Protein-RNA Interface Database (PRIDB), RPISeq achieved an AUC (Area Under the Receiver Operating Characteristic (ROC) curve) of 0.96 and 0.92. On a third dataset containing only mRNA-protein interactions, the performance of RPISeq was competitive with that of a published method that requires information regarding many different features (e.g., mRNA half-life, GO annotations) of the putative RNA and protein partners. In addition, RPISeq classifiers trained using the PRIDB data correctly predicted the majority (57-99%) of non-coding RNA-protein interactions in NPInter-derived networks from E. coli, S. cerevisiae, D. melanogaster, M. musculus, and H. sapiens.

Conclusions

Our experiments with RPISeq demonstrate that RNA-protein interactions can be reliably predicted using only sequence-derived information. RPISeq offers an inexpensive method for computational construction of RNA-protein interaction networks, and should provide useful insights into the function of non-coding RNAs. RPISeq is freely available as a web-based server at http://pridb.gdcb.iastate.edu/RPISeq/.",2011-12-22 +23510108,GlycoPep Detector: a tool for assigning mass spectrometry data of N-linked glycopeptides on the basis of their electron transfer dissociation spectra.,"Electron transfer dissociation (ETD) is commonly used in fragmenting N-linked glycopeptides in their mass spectral analyses to complement collision-induced dissociation (CID) experiments. The glycan remains intact through ETD, while the peptide backbone is cleaved, providing the sequence of amino acids for a glycopeptide. Nonetheless, data analysis is a major bottleneck to high-throughput glycopeptide identification based on ETD data, due to the complexity and diversity of ETD mass spectra compared to CID counterparts. GlycoPep Detector (GPD) is a web-based tool to address this challenge. It filters out noise peaks that interfere with glycopeptide sequencing, correlates input glycopeptide compositions with the ETD spectra, and assigns a score for each candidate. By considering multiple ion series (c-, z-, and y-ions) and scoring them separately, the software gives more weighting to the ion series that matches peaks of high intensity in the spectra. This feature enables the correct glycopeptide to receive a high score while keeping scores of incorrect compositions low. GPD has been utilized to interpret data collected on six model glycoproteins (RNase B, avidin, fetuin, asialofetuin, transferrin, and AGP) as well as a clade C HIV envelope glycoprotein, C.97ZA012 gp140ΔCFI. In every assignment made by GPD, the correct glycopeptide composition earns a score that is about 2-fold higher than other incorrect glycopeptide candidates (decoys). The software can be accessed at http://glycopro.chem.ku.edu/ZZKHome.php .",2013-04-29 +25504848,repDNA: a Python package to generate various modes of feature vectors for DNA sequences by incorporating user-defined physicochemical properties and sequence-order effects.,"

Unlabelled

In order to develop powerful computational predictors for identifying the biological features or attributes of DNAs, one of the most challenging problems is to find a suitable approach to effectively represent the DNA sequences. To facilitate the studies of DNAs and nucleotides, we developed a Python package called representations of DNAs (repDNA) for generating the widely used features reflecting the physicochemical properties and sequence-order effects of DNAs and nucleotides. There are three feature groups composed of 15 features. The first group calculates three nucleic acid composition features describing the local sequence information by means of kmers; the second group calculates six autocorrelation features describing the level of correlation between two oligonucleotides along a DNA sequence in terms of their specific physicochemical properties; the third group calculates six pseudo nucleotide composition features, which can be used to represent a DNA sequence with a discrete model or vector yet still keep considerable sequence-order information via the physicochemical properties of its constituent oligonucleotides. In addition, these features can be easily calculated based on both the built-in and user-defined properties via using repDNA.

Availability and implementation

The repDNA Python package is freely accessible to the public at http://bioinformatics.hitsz.edu.cn/repDNA/.

Contact

bliu@insun.hit.edu.cn or kcchou@gordonlifescience.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-10 +25091586,APPEX: analysis platform for the identification of prognostic gene expression signatures in cancer.,"

Summary

Because cancer has heterogeneous clinical behaviors due to the progressive accumulation of multiple genetic and epigenetic alterations, the identification of robust molecular signatures for predicting cancer outcome is profoundly important. Here, we introduce the APPEX Web-based analysis platform as a versatile tool for identifying prognostic molecular signatures that predict cancer diversity. We incorporated most of statistical methods for survival analysis and implemented seven survival analysis workflows, including CoxSingle, CoxMulti, IntransSingle, IntransMulti, SuperPC, TimeRoc and multivariate. A total of 236 publicly available datasets were collected, processed and stored to support easy independent validation of prognostic signatures. Two case studies including disease recurrence and bladder cancer progression were described using different combinations of the seven workflows.

Availability and implementation

APPEX is freely available at http://www.appex.kr.

Contact

kimsy@kribb.re.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-08-04 +24501396,A comprehensive aligned nifH gene database: a multipurpose tool for studies of nitrogen-fixing bacteria.,"We describe a nitrogenase gene sequence database that facilitates analysis of the evolution and ecology of nitrogen-fixing organisms. The database contains 32 954 aligned nitrogenase nifH sequences linked to phylogenetic trees and associated sequence metadata. The database includes 185 linked multigene entries including full-length nifH, nifD, nifK and 16S ribosomal RNA (rRNA) gene sequences. Evolutionary analyses enabled by the multigene entries support an ancient horizontal transfer of nitrogenase genes between Archaea and Bacteria and provide evidence that nifH has a different history of horizontal gene transfer from the nifDK enzyme core. Further analyses show that lineages in nitrogenase cluster I and cluster III have different rates of substitution within nifD, suggesting that nifD is under different selection pressure in these two lineages. Finally, we find that that the genetic divergence of nifH and 16S rRNA genes does not correlate well at sequence dissimilarity values used commonly to define microbial species, as stains having <3% sequence dissimilarity in their 16S rRNA genes can have up to 23% dissimilarity in nifH. The nifH database has a number of uses including phylogenetic and evolutionary analyses, the design and assessment of primers/probes and the evaluation of nitrogenase sequence diversity. Database URL: http://www.css.cornell.edu/faculty/buckley/nifh.htm.",2014-02-05 +25604690,Major depressive and anxiety disorders in visually impaired older adults.,"

Purpose

We assessed the prevalence of subthreshold depression and anxiety, and major depressive, dysthymic, and anxiety disorders (panic disorder, agoraphobia, social phobia, and general anxiety disorder) in visually impaired older adults and compared these estimates with those of normally sighted peers.

Methods

Cross-sectional data were analyzed based on telephone interviews with visually impaired older adults aged ≥ 60 years (n = 615) with a visual acuity of ≥ 0.30 logMAR (20/40 Snellen) in the best eye from outpatient low vision rehabilitation centers, and face-to-face interviews with community-dwelling normally sighted peers (n = 1232). To determine prevalence rates, the normally sighted population was weighted on sex and age to fit the visually impaired population. Logistic regression analyses were used to compare the populations and to correct for confounders.

Results

The prevalence of major depressive disorder (5.4%) and anxiety disorders (7.5%), as well as the prevalence of subthreshold depression (32.2%) and subthreshold anxiety (15.6%), were significantly higher in visually impaired older adults compared to their normally sighted peers (P < 0.05). Agoraphobia and social phobia were the most prevalent anxiety disorders in visually impaired older adults.

Conclusions

This study shows that depression and anxiety are major public health problems in visually impaired older adults. Research on psychotherapeutic and psychopharmacologic interventions to improve depression and anxiety in this population is warranted. (http://www.trialregister.nl number, NTR3296.).",2015-01-20 +23433201,"Mutation Reporter Tool: an online tool to interrogate loci of interest, with its utility demonstrated using hepatitis B virus.","

Background

An online tool, which extracts and summarises nucleotide or amino acid sequence data at specified loci of interest, was developed and tested using the basic core promoter/precore (BCP/PC) region of the hepatitis B virus (HBV). The tool is aimed at researchers without specialist computer skills.

Methods

The tool consists of a web-based front-end, with a CGI script, which runs Python code to generate an output web-page. The Python code searches the input sequence data for a specified anchor motif, after which it generates summary tables and graphs of residue and motif distributions.

Results

After the user provides an input file in FASTA format containing aligned sequence data (nucleotides or amino acids) and specifies an anchor motif at a known coordinate, the tool summarizes the nucleotides or amino acids at the specified loci, their frequency and analyzes motif patterns of the loci.The tool can output a graph that displays the frequency of mutations relative to a reference sequence. The tool was used to analyze the BCP/PC region of HBV belonging to subgenotypes A1, A2 and subgenotype D and to serotype HBV. The ""Discovery Mode"" ignores conserved loci and assists in identifying potential loci of interest.

Conclusions

Although HBV was used to demonstrate the utility of the Mutation Reporter Tool, the tool has wide application as it is genome-agnostic: nucleotide or amino acid sequence data from any organism can be processed. Rapid characterisation of many sequences can be achieved easily when the loci of interest are known. The tool is available online, without charge, at http://hvdr.bioinf.wits.ac.za/tools.",2013-02-23 +21365759,Exploring the proteome of an echinoderm nervous system: 2-DE of the sea star radial nerve cord and the synaptosomal membranes subproteome.,"We describe the first proteomic characterization of the radial nerve cord (RNC) of an echinoderm, the sea star Marthasterias glacialis. The combination of 2-DE with MS (MALDI-TOF/TOF) resulted in the identification of 286 proteins in the RNC. Additionally, 158 proteins were identified in the synaptosomal membranes enriched fraction after 1-DE separation. The 2-DE RNC reference map is available via the WORLD-2DPAGE Portal (http://www.expasy.ch/world-2dpage/) along with the associated protein identification data which are also available in the PRIDE database. The identified proteins constitute the first high-throughput evidence that seems to indicate that echinoderms nervous transmission relies primarily on chemical synapses which is similar to the synaptic activity in adult mammal's spinal cord. Furthermore, several homologous proteins known to participate in the regeneration events of other organisms were also identified, and thus can be used as targets for future studies aiming to understand the poorly uncharacterized regeneration capability of echinoderms. This ""echinoderm missing link"" is also a contribution to unravel the mystery of deuterostomian CNS evolution.",2011-02-17 +24502991,Automatic recognition and scoring of olympic rhythmic gymnastic movements.,"We describe a conceptually simple algorithm for assigning judgement scores to rhythmic gymnastic movements, which could improve scoring objectivity and reduce judgemental bias during competitions. Our method, implemented as a real-time computer vision software, takes a video shot or a live performance video stream as input and extracts detailed velocity field information from body movements, transforming them into specialized spatio-temporal image templates. The collection of such images over time, when projected into a velocity covariance eigenspace, trace out unique but similar trajectories for a particular gymnastic movement type. By comparing separate executions of the same atomic gymnastic routine, our method assigns a quality judgement score that is related to the distance between the respective spatio-temporal trajectories. For several standard gymnastic movements, the method accurately assigns scores that are comparable to those assigned by expert judges. We also describe our rhythmic gymnastic video shot database, which we have made freely available to the human movement research community. The database can be obtained at http://www.milegroup.net/apps/gymdb/.",2014-02-04 +24288354,Mechanisms in endocrinology: Genetics of FSH action: a 2014-and-beyond view.,"

Objective

To assess the pharmacogenetic potential of FSH for infertility treatment.

Design

Review of the literature and genomic databases.

Methods

Single-nucleotide polymorphism (SNP) assessed: rs6166 (c.2039A>G, p.N680S), rs6165 (c.919A>G, p.T307A), rs1394205 (c.-29G>A) in FSHR, and rs10835638 (c.-211G>T) in FSHB. Literature search via PubMed. Blast analysis of genomic information available in the NCBI nucleotide database. Comparison of allele frequency and haplotype distribution using the http://spsmart.cesga.estool.

Results

All these SNPs appear first in Homo, result in reduced FSH action, and are present with variable frequencies and combinations worldwide. Stringent clinical studies demonstrate that the FSHR genotype influences serum FSH levels and gonadal response in both sexes. Serum FSH levels depend on the -211G>T SNP, influencing transcriptional activity of the FSHB promoter. Genotypes reducing FSH action are overrepresented in infertile subjects.

Conclusions

Although the clinical relevance of the FSHR polymorphisms alone is limited, the combination of FSHR and FSHB genotypes has a much stronger impact than either one alone in both sexes. About 20% of people are carriers of the alleles associated with lower serum FSH levels/reduced FSHR expression or activity, possibly less favorable for reproduction. Prospective studies need to investigate whether stratification of infertile patients according to their FSHR-FSHB genotypes improves clinical efficacy of FSH treatment compared with the current, naïve approach. A relative enrichment of less favorable FSHR-FSHB genotypes may be related to changes in human reproductive strategies and be a marker of some health-related advantage at the cost of reduced fertility.",2014-02-04 +24909817,A Web platform for the interactive visualization and analysis of the 3D fractal dimension of MRI data.,"This study presents a Web platform (http://3dfd.ujaen.es) for computing and analyzing the 3D fractal dimension (3DFD) from volumetric data in an efficient, visual and interactive way. The Web platform is specially designed for working with magnetic resonance images (MRIs) of the brain. The program estimates the 3DFD by calculating the 3D box-counting of the entire volume of the brain, and also of its 3D skeleton. All of this is done in a graphical, fast and optimized way by using novel technologies like CUDA and WebGL. The usefulness of the Web platform presented is demonstrated by its application in a case study where an analysis and characterization of groups of 3D MR images is performed for three neurodegenerative diseases: Multiple Sclerosis, Intrauterine Growth Restriction and Alzheimer's disease. To the best of our knowledge, this is the first Web platform that allows the users to calculate, visualize, analyze and compare the 3DFD from MRI images in the cloud.",2014-06-06 +23571760,OLego: fast and sensitive mapping of spliced mRNA-Seq reads using small seeds.,"A crucial step in analyzing mRNA-Seq data is to accurately and efficiently map hundreds of millions of reads to the reference genome and exon junctions. Here we present OLego, an algorithm specifically designed for de novo mapping of spliced mRNA-Seq reads. OLego adopts a multiple-seed-and-extend scheme, and does not rely on a separate external aligner. It achieves high sensitivity of junction detection by strategic searches with small seeds (~14 nt for mammalian genomes). To improve accuracy and resolve ambiguous mapping at junctions, OLego uses a built-in statistical model to score exon junctions by splice-site strength and intron size. Burrows-Wheeler transform is used in multiple steps of the algorithm to efficiently map seeds, locate junctions and identify small exons. OLego is implemented in C++ with fully multithreaded execution, and allows fast processing of large-scale data. We systematically evaluated the performance of OLego in comparison with published tools using both simulated and real data. OLego demonstrated better sensitivity, higher or comparable accuracy and substantially improved speed. OLego also identified hundreds of novel micro-exons (<30 nt) in the mouse transcriptome, many of which are phylogenetically conserved and can be validated experimentally in vivo. OLego is freely available at http://zhanglab.c2b2.columbia.edu/index.php/OLego.",2013-04-09 +24995852,ScreenCap3: Improving prediction of caspase-3 cleavage sites using experimentally verified noncleavage sites.,"Because of its wide range of substrates, caspase-3, a main executioner among apoptosis-related caspases, is thought to have many unknown substrates that have remained unidentified. This report describes our predictive method to facilitate the discovery of novel caspase-3 substrates. To develop a more reliable prediction method, we specifically examined improvement of the data quantity and quality of caspase-3 cleavage sites. The ScreenCap3 method is based on machine learning and on information not only of experimentally verified positive examples but also of negative examples, which were not cleaved by caspase-3. Using information of experimentally verified noncleavage sites, we elucidate novel patterns of amino acids around ""actual"" cleavage sites. Results show that ScreenCap3 provides substantial improvement in terms of precision, compared with existing methods. Therefore, ScreenCap3 is anticipated for use with proteomic screening and identification of novel caspase-3 substrates and their cleavage sites. ScreenCap3 is available at http://scap.cbrc.jp/ScreenCap3/.",2014-08-04 +25866549,Phenoplant: a web resource for the exploration of large chlorophyll fluorescence image datasets.,"

Background

Image analysis is increasingly used in plant phenotyping. Among the various imaging techniques that can be used in plant phenotyping, chlorophyll fluorescence imaging allows imaging of the impact of biotic or abiotic stresses on leaves. Numerous chlorophyll fluorescence parameters may be measured or calculated, but only a few can produce a contrast in a given condition. Therefore, automated procedures that help screening chlorophyll fluorescence image datasets are needed, especially in the perspective of high-throughput plant phenotyping.

Results

We developed an automatic procedure aiming at facilitating the identification of chlorophyll fluorescence parameters impacted on leaves by a stress. First, for each chlorophyll fluorescence parameter, the procedure provides an overview of the data by automatically creating contact sheets of images and/or histograms. Such contact sheets enable a fast comparison of the impact on leaves of various treatments, or of the contrast dynamics during the experiments. Second, based on the global intensity of each chlorophyll fluorescence parameter, the procedure automatically produces radial plots and box plots allowing the user to identify chlorophyll fluorescence parameters that discriminate between treatments. Moreover, basic statistical analysis is automatically generated. Third, for each chlorophyll fluorescence parameter the procedure automatically performs a clustering analysis based on the histograms. This analysis clusters images of plants according to their health status. We applied this procedure to monitor the impact of the inoculation of the root parasitic plant Phelipanche ramosa on Arabidopsis thaliana ecotypes Col-0 and Ler.

Conclusions

Using this automatic procedure, we identified eight chlorophyll fluorescence parameters discriminating between the two ecotypes of A. thaliana, and five impacted by the infection of Arabidopsis thaliana by P. ramosa. More generally, this procedure may help to identify chlorophyll fluorescence parameters impacted by various types of stresses. We implemented this procedure at http://www.phenoplant.org freely accessible to users of the plant phenotyping community.",2015-04-03 +24794934,Mirin: identifying microRNA regulatory modules in protein-protein interaction networks.,"

Unlabelled

Exploring microRNA (miRNA) regulations and protein-protein interactions could reveal the molecular mechanisms responsible for complex biological processes. Mirin is a web-based application suitable for identifying functional modules from protein-protein interaction networks regulated by aberrant miRNAs under user-defined biological conditions such as cancers. The analysis involves combining miRNA regulations, protein-protein interactions between target genes, as well as mRNA and miRNA expression profiles provided by users. Mirin has successfully uncovered oncomirs and their regulatory networks in various cancers, such as gastric and breast cancer.

Availability and implementation

Mirin is freely available at http://mirin.ym.edu.tw/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-02 +21398668,sc-PDB: a database for identifying variations and multiplicity of 'druggable' binding sites in proteins.,"

Background

The sc-PDB database is an annotated archive of druggable binding sites extracted from the Protein Data Bank. It contains all-atoms coordinates for 8166 protein-ligand complexes, chosen for their geometrical and physico-chemical properties. The sc-PDB provides a functional annotation for proteins, a chemical description for ligands and the detailed intermolecular interactions for complexes. The sc-PDB now includes a hierarchical classification of all the binding sites within a functional class.

Method

The sc-PDB entries were first clustered according to the protein name indifferent of the species. For each cluster, we identified dissimilar sites (e.g. catalytic and allosteric sites of an enzyme). SCOPE AND APPLICATIONS: The classification of sc-PDB targets by binding site diversity was intended to facilitate chemogenomics approaches to drug design. In ligand-based approaches, it avoids comparing ligands that do not share the same binding site. In structure-based approaches, it permits to quantitatively evaluate the diversity of the binding site definition (variations in size, sequence and/or structure).

Availability

The sc-PDB database is freely available at: http://bioinfo-pharma.u-strasbg.fr/scPDB.",2011-03-12 +23902699,Estimating the contribution of a service delivery organisation to the national modern contraceptive prevalence rate: Marie Stopes International's Impact 2 model.,"

Background

Individual family planning service delivery organisations currently rely on service provision data and couple-years of protection as health impact measures. Due to the substitution effect and the continuation of users of long-term methods, these metrics cannot estimate an organisation's contribution to the national modern contraceptive prevalence rate (CPR), the standard metric for measuring family planning programme impacts. Increasing CPR is essential for addressing the unmet need for family planning, a recognized global health priority. Current health impact estimation models cannot isolate the impact of an organisation in these efforts. Marie Stopes International designed the Impact 2 model to measure an organisation's contribution to increases in national CPR, as well as resulting health and demographic impacts. This paper aims to describe the methodology for modelling increasing national-level CPR as well as to discuss its benefits and limitations.

Methods

Impact 2 converts service provision data into estimates of the number of family planning users, accounting for continuation among users of long-term methods and addressing the challenges of converting commodity distribution data of short-term methods into user numbers. These estimates, combined with the client profile and data on the organisation's previous year's CPR contribution, enable Impact 2 to estimate which clients maintain an organisation's baseline contribution, which ones fulfil population growth offsets, and ultimately, which ones increase CPR.

Results

Illustrative results from Marie Stopes Madagascar show how Impact 2 can be used to estimate an organisation's contribution to national changes in the CPR.

Conclusions

Impact 2 is a useful tool for service delivery organisations to move beyond cruder output measures to a better understanding of their role in meeting the global unmet need for family planning. By considering health impact from the perspective of an individual organisation, Impact 2 addresses gaps not met by other models for family planning service outcomes. Further, the model helps organisations improve service delivery by demonstrating that increases in the national CPR are not simply about expanding user numbers; rather, the type of user (e.g. adopters, provider changers) must be considered. Impact 2 can be downloaded at http://www.mariestopes.org/impact-2.",2013-06-17 +23836142,Determining the subcellular location of new proteins from microscope images using local features.,"

Motivation

Evaluation of previous systems for automated determination of subcellular location from microscope images has been done using datasets in which each location class consisted of multiple images of the same representative protein. Here, we frame a more challenging and useful problem where previously unseen proteins are to be classified.

Results

Using CD-tagging, we generated two new image datasets for evaluation of this problem, which contain several different proteins for each location class. Evaluation of previous methods on these new datasets showed that it is much harder to train a classifier that generalizes across different proteins than one that simply recognizes a protein it was trained on. We therefore developed and evaluated additional approaches, incorporating novel modifications of local features techniques. These extended the notion of local features to exploit both the protein image and any reference markers that were imaged in parallel. With these, we obtained a large accuracy improvement in our new datasets over existing methods. Additionally, these features help achieve classification improvements for other previously studied datasets.

Availability

The datasets are available for download at http://murphylab.web.cmu.edu/data/. The software was written in Python and C++ and is available under an open-source license at http://murphylab.web.cmu.edu/software/. The code is split into a library, which can be easily reused for other data and a small driver script for reproducing all results presented here. A step-by-step tutorial on applying the methods to new datasets is also available at that address.

Contact

murphy@cmu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-08 +23109552,MetaboLights--an open-access general-purpose repository for metabolomics studies and associated meta-data.,"MetaboLights (http://www.ebi.ac.uk/metabolights) is the first general-purpose, open-access repository for metabolomics studies, their raw experimental data and associated metadata, maintained by one of the major open-access data providers in molecular biology. Metabolomic profiling is an important tool for research into biological functioning and into the systemic perturbations caused by diseases, diet and the environment. The effectiveness of such methods depends on the availability of public open data across a broad range of experimental methods and conditions. The MetaboLights repository, powered by the open source ISA framework, is cross-species and cross-technique. It will cover metabolite structures and their reference spectra as well as their biological roles, locations, concentrations and raw data from metabolic experiments. Studies automatically receive a stable unique accession number that can be used as a publication reference (e.g. MTBLS1). At present, the repository includes 15 submitted studies, encompassing 93 protocols for 714 assays, and span over 8 different species including human, Caenorhabditis elegans, Mus musculus and Arabidopsis thaliana. Eight hundred twenty-seven of the metabolites identified in these studies have been mapped to ChEBI. These studies cover a variety of techniques, including NMR spectroscopy and mass spectrometry.",2012-10-29 +25176396,jvenn: an interactive Venn diagram viewer.,"

Background

Venn diagrams are commonly used to display list comparison. In biology, they are widely used to show the differences between gene lists originating from different differential analyses, for instance. They thus allow the comparison between different experimental conditions or between different methods. However, when the number of input lists exceeds four, the diagram becomes difficult to read. Alternative layouts and dynamic display features can improve its use and its readability.

Results

jvenn is a new JavaScript library. It processes lists and produces Venn diagrams. It handles up to six input lists and presents results using classical or Edwards-Venn layouts. User interactions can be controlled and customized. Finally, jvenn can easily be embeded in a web page, allowing to have dynamic Venn diagrams.

Conclusions

jvenn is an open source component for web environments helping scientists to analyze their data. The library package, which comes with full documentation and an example, is freely available at http://bioinfo.genotoul.fr/jvenn.",2014-08-29 +23349698,The transcriptional and gene regulatory network of Lactococcus lactis MG1363 during growth in milk.,"In the present study we examine the changes in the expression of genes of Lactococcus lactis subspecies cremoris MG1363 during growth in milk. To reveal which specific classes of genes (pathways, operons, regulons, COGs) are important, we performed a transcriptome time series experiment. Global analysis of gene expression over time showed that L. lactis adapted quickly to the environmental changes. Using upstream sequences of genes with correlated gene expression profiles, we uncovered a substantial number of putative DNA binding motifs that may be relevant for L. lactis fermentative growth in milk. All available novel and literature-derived data were integrated into network reconstruction building blocks, which were used to reconstruct and visualize the L. lactis gene regulatory network. This network enables easy mining in the chrono-transcriptomics data. A freely available website at http://milkts.molgenrug.nl gives full access to all transcriptome data, to the reconstructed network and to the individual network building blocks.",2013-01-17 +30708757,"First Report of Ustilago cynodontis Causing Smut of Cynodon dactylon in Washington State, United States.","Bermudagrass (Cynodon dactylon) is an important warm-season perennial turf and forage grass that is typically grown in warm, tropical and subtropical climates. Smutted inflorescences of bermudagrass were observed and collected in Benton County, Washington, United States, in October of 2012 in an unmanaged, naturalized area located near the banks of the Columbia River and adjacent to large expanses of managed turf containing bermudagrass. The climate in this area is favorable to bermudagrass due to the relatively mild winters and hot, dry summers that usually occur in this region. The infected plants occurred in patches alongside healthy plants and several disease foci were observed along a 100-m transect of non-contiguous bermudagrass. The disease was severe wherever it occurred. Diseased inflorescences were covered with black-brown teliospores, distorted, and frequently failed to fully emerge and develop. Teliospores (n = 80) were irregularly globose to subglobose, 5.3 to 7.0 × 4.5 to 6.2 μm (mean 6.4 × 5.9 μm) and 6.2 to 8.8 × 5.3 to 7.0 μm (mean 7.0 × 6.5 μm), with a smooth wall approximately 1 μm thick, and were consistent with previous descriptions of Ustilago cynodontis teliospores (1,3). Teliospores germinated within 24 h when plated on 0.2% malt agar at 16°C and produced 4-celled basidia in a 3+1 arrangement, also consistent with U. cynodontis (3). Basidia gave rise to lateral and terminal, ovoid to long ellipsoidal basidiospores. Basidiospores budded or germinated by hyphae from which lateral or terminal aerial sporidia developed as previously described (3,4). DNA was extracted from sporidia of three single-spored isolates grown in malt extract broth. Complete nucleotide sequences of the 5.8S ribosomal RNA coding region and partial sequences of the internal transcribed spacer (ITS) regions 1 and 2 were obtained from the three isolates using ITS1 and ITS4 primers. The corresponding regions of the three aligned sequences (GenBank Accession Nos. KC920742 to KC920744) were identical and exhibited 99 to 100% identity with U. cynodontis strains previously deposited in GenBank (HM143013, AY740168, AF038825, and AY345000). Representative specimens were deposited in the WSU Mycological Herbarium as WSP 72345 to WSP 72348. This is the first report of U. cynodontis causing smut on bermudagrass in Washington State and represents the northernmost record of this fungus in North America (2). The occurrence of U. cynodontis in Washington State suggests that the pathogen may exist in other hot and dry areas of northwestern North America where bermudagrass is found associated with turf in recreational, landscape, or natural settings. References: (1) S. D. Brook. Trans. R. Soc. N. Z. 84:643, 1957. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Online. Retrieved from http://nt.ars-grin.gov/fungaldatabases , April 18, 2013. (3) C. T. Ingold. Trans. Br. Mycol. Soc. 83:251, 1984. (4) C. T. Ingold. Trans. Br. Mycol. Soc. 89:471, 1987.",2014-02-01 +30708746,"First Report of the White Pine Blister Rust Fungus, Cronartium ribicola, on Ribes odoratum in Indiana.","Cronartium ribicola J. C. Fisch., causal agent of white pine blister rust (WPBR), is one of the most damaging pathogens of five-needle pines, forming aecial states on the trunk and branches and causing cankering, topkill, and branch dieback. Infection can predispose hosts to attack by other pests such as bark beetles, and can result in host mortality. Various species of Ribes, Pedicularis, and Castilleja are alternate hosts on which C. ribicola forms its uredinial and telial states during the mid-summer to fall. In an effort to mitigate the damage caused by white pine blister rust, the planting of ornamental species of Ribes, such as R. occidentalis, is prohibited in 14 states. Indiana currently has no restrictions on the planting of Ribes spp. Since 2010, a Cronartium sp. has been observed producing uredinia and telia on R. odoratum 'Crandall' H.L. Wendl. leaves in an urban environment in West Lafayette, Indiana. Symptoms include yellow-orange lesions on the leaf upper surface with uredinia on the underside. These persist from late summer until leaf drop. Telia were collected in 2011 to establish the identity of the causal agent using morphological and molecular analyses. Morphological comparisons between this specimen and other Cronartium species were made using Arthur (2). Filiform telial columns ranged from 0.5 to 1.5 mm in length. Teliospores were cylindrical to sub-ventricose, truncate on either end with one end generally tapering more than the other, and measured 9.0 to 18.6 × 37.2 to 60.0 μm (average 11.9 × 47.4 μm from 30 spores across 4 leaves). These teliospore measurements overlap those of C. ribicola and C. occidentale, but are more consistent with C. ribicola, in which the spores are wider and longer (8 to 12 × 30 to 60 μm) than in C. occidentale (9 to 10 × 27 to 56 μm). For molecular analyses, two nuclear ribosomal loci were sequenced: the internal transcribed spacer regions 1, 2, and 5.8S (ITS) and the 5' end of the large subunit (28S) (1). The ITS sequence was 665 bp long (KF387533) and the 28S was 892 bp (KC876675). These sequences were queried to GenBank using a BLASTn search. The 28S shared 99% identity (891/892 bp) and the ITS shared 100% identity (663/663 bp) to other published C. ribicola sequences with no close matches to any other species with either locus. Both morphological and molecular methods indicate this species to be C ribicola, making this a first report of white pine blister rust on R. odoratum in Indiana. This fungus has been observed previously on R. odoratum in the northeastern United States (Connecticut, Massachusetts, Rhode Island, Vermont, and New Hampshire), the Rockies (Colorado), northwestern United States (Washington), and Canada (3). In Indiana, C. ribicola has also been reported on R. cysnobati. There are no other reports of this fungus on any other host within the state. However, the aecial host, Pinus strobus, does grow within the state, and within West Lafayette. To our knowledge, WPBR has only been observed (not reported) once in Indiana in the past 30 years (Paul Pecknold, personal communication). Further monitoring of C. ribicola hosts is needed in Indiana to determine the extent of the disease. The specimen has been vouchered in the Arthur Herbarium (PUR N6734). References: (1) M. C. Aime. Mycoscience 47:112. 2006. (2) J. F. Arthur. Manual of the Rusts in United States and Canada. Purdue Research Foundation, 1934. (3) D. F. Farr and A. Y. Rossman. Fungal Databases Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ April 23, 2013.",2014-02-01 +25481006,Improved gene tree error correction in the presence of horizontal gene transfer.,"

Motivation

The accurate inference of gene trees is a necessary step in many evolutionary studies. Although the problem of accurate gene tree inference has received considerable attention, most existing methods are only applicable to gene families unaffected by horizontal gene transfer. As a result, the accurate inference of gene trees affected by horizontal gene transfer remains a largely unaddressed problem.

Results

In this study, we introduce a new and highly effective method for gene tree error correction in the presence of horizontal gene transfer. Our method efficiently models horizontal gene transfers, gene duplications and losses, and uses a statistical hypothesis testing framework [Shimodaira-Hasegawa (SH) test] to balance sequence likelihood with topological information from a known species tree. Using a thorough simulation study, we show that existing phylogenetic methods yield inaccurate gene trees when applied to horizontally transferred gene families and that our method dramatically improves gene tree accuracy. We apply our method to a dataset of 11 cyanobacterial species and demonstrate the large impact of gene tree accuracy on downstream evolutionary analyses.

Availability and implementation

An implementation of our method is available at http://compbio.mit.edu/treefix-dtl/

Contact

: mukul@engr.uconn.edu or manoli@mit.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-12-05 +24895432,"CFM-ID: a web server for annotation, spectrum prediction and metabolite identification from tandem mass spectra.","CFM-ID is a web server supporting three tasks associated with the interpretation of tandem mass spectra (MS/MS) for the purpose of automated metabolite identification: annotation of the peaks in a spectrum for a known chemical structure; prediction of spectra for a given chemical structure and putative metabolite identification--a predicted ranking of possible candidate structures for a target spectrum. The algorithms used for these tasks are based on Competitive Fragmentation Modeling (CFM), a recently introduced probabilistic generative model for the MS/MS fragmentation process that uses machine learning techniques to learn its parameters from data. These algorithms have been extensively tested on multiple datasets and have been shown to out-perform existing methods such as MetFrag and FingerId. This web server provides a simple interface for using these algorithms and a graphical display of the resulting annotations, spectra and structures. CFM-ID is made freely available at http://cfmid.wishartlab.com.",2014-06-03 +24431986,MEG and EEG data analysis with MNE-Python.,"Magnetoencephalography and electroencephalography (M/EEG) measure the weak electromagnetic signals generated by neuronal activity in the brain. Using these signals to characterize and locate neural activation in the brain is a challenge that requires expertise in physics, signal processing, statistics, and numerical methods. As part of the MNE software suite, MNE-Python is an open-source software package that addresses this challenge by providing state-of-the-art algorithms implemented in Python that cover multiple methods of data preprocessing, source localization, statistical analysis, and estimation of functional connectivity between distributed brain regions. All algorithms and utility functions are implemented in a consistent manner with well-documented interfaces, enabling users to create M/EEG data analysis pipelines by writing Python scripts. Moreover, MNE-Python is tightly integrated with the core Python libraries for scientific comptutation (NumPy, SciPy) and visualization (matplotlib and Mayavi), as well as the greater neuroimaging ecosystem in Python via the Nibabel package. The code is provided under the new BSD license allowing code reuse, even in commercial products. Although MNE-Python has only been under heavy development for a couple of years, it has rapidly evolved with expanded analysis capabilities and pedagogical tutorials because multiple labs have collaborated during code development to help share best practices. MNE-Python also gives easy access to preprocessed datasets, helping users to get started quickly and facilitating reproducibility of methods by other researchers. Full documentation, including dozens of examples, is available at http://martinos.org/mne.",2013-12-26 +24204222,Properties of MHC class I presented peptides that enhance immunogenicity.,"T-cells have to recognize peptides presented on MHC molecules to be activated and elicit their effector functions. Several studies demonstrate that some peptides are more immunogenic than others and therefore more likely to be T-cell epitopes. We set out to determine which properties cause such differences in immunogenicity. To this end, we collected and analyzed a large set of data describing the immunogenicity of peptides presented on various MHC-I molecules. Two main conclusions could be drawn from this analysis: First, in line with previous observations, we showed that positions P4-6 of a presented peptide are more important for immunogenicity. Second, some amino acids, especially those with large and aromatic side chains, are associated with immunogenicity. This information was combined into a simple model that was used to demonstrate that immunogenicity is, to a certain extent, predictable. This model (made available at http://tools.iedb.org/immunogenicity/) was validated with data from two independent epitope discovery studies. Interestingly, with this model we could show that T-cells are equipped to better recognize viral than human (self) peptides. After the past successful elucidation of different steps in the MHC-I presentation pathway, the identification of variables that influence immunogenicity will be an important next step in the investigation of T-cell epitopes and our understanding of cellular immune responses.",2013-10-24 +25855680,"Efficacy and safety of a novel bioabsorbable polymer-coated, everolimus-eluting coronary stent: the EVOLVE II Randomized Trial. ","Drug eluting stents with durable polymers may be associated with hypersensitivity, delayed healing, and incomplete endothelialization, which may contribute to late/very late stent thrombosis and the need for prolonged dual antiplatelet therapy. Bioabsorbable polymers may facilitate stent healing, thus enhancing clinical safety. The SYNERGY stent is a thin-strut, platinum chromium metal alloy platform with an ultrathin bioabsorbable Poly(D,L-lactide-co-glycolide) abluminal everolimus-eluting polymer. We performed a multicenter, randomized controlled trial for regulatory approval to determine noninferiority of the SYNERGY stent to the durable polymer PROMUS Element Plus everolimus-eluting stent. Patients (n=1684) scheduled to undergo percutaneous coronary intervention for non-ST-segment-elevation acute coronary syndrome or stable coronary artery disease were randomized to receive either the SYNERGY stent or the PROMUS Element Plus stent. The primary end point of 12-month target lesion failure was observed in 6.7% of SYNERGY and 6.5% PROMUS Element Plus treated subjects by intention-to-treat (P=0.83 for difference; P=0.0005 for noninferiority), and 6.4% in both the groups by per-protocol analysis (P=0.0003 for noninferiority). Clinically indicated revascularization of the target lesion or definite/probable stent thrombosis were observed in 2.6% versus 1.7% (P=0.21) and 0.4% versus 0.6% (P=0.50) of SYNERGY versus PROMUS Element Plus-treated subjects, respectively. In this randomized trial, the SYNERGY bioabsorbable polymer everolimus-eluting stent was noninferior to the PROMUS Element Plus everolimus-eluting stent with respect to 1-year target lesion failure. These data support the relative safety and efficacy of SYNERGY in a broad range of patients undergoing percutaneous coronary intervention. URL: http://www.clinicaltrials.gov. Unique identifier: NCT01665053.",2015-04-01 +24481593,Parallel implementation of 3D protein structure similarity searches using a GPU and the CUDA.,"Searching for similar 3D protein structures is one of the primary processes employed in the field of structural bioinformatics. However, the computational complexity of this process means that it is constantly necessary to search for new methods that can perform such a process faster and more efficiently. Finding molecular substructures that complex protein structures have in common is still a challenging task, especially when entire databases containing tens or even hundreds of thousands of protein structures must be scanned. Graphics processing units (GPUs) and general purpose graphics processing units (GPGPUs) can perform many time-consuming and computationally demanding processes much more quickly than a classical CPU can. In this paper, we describe the GPU-based implementation of the CASSERT algorithm for 3D protein structure similarity searching. This algorithm is based on the two-phase alignment of protein structures when matching fragments of the compared proteins. The GPU (GeForce GTX 560Ti: 384 cores, 2GB RAM) implementation of CASSERT (""GPU-CASSERT"") parallelizes both alignment phases and yields an average 180-fold increase in speed over its CPU-based, single-core implementation on an Intel Xeon E5620 (2.40GHz, 4 cores). In this paper, we show that massive parallelization of the 3D structure similarity search process on many-core GPU devices can reduce the execution time of the process, allowing it to be performed in real time. GPU-CASSERT is available at: http://zti.polsl.pl/dmrozek/science/gpucassert/cassert.htm.",2014-01-31 +24451008,"ZINClick: a database of 16 million novel, patentable, and readily synthesizable 1,4-disubstituted triazoles.","Since Professors Sharpless, Finn, and Kolb first introduced the concept of ""click reactions"" in 2001 as powerful tools in drug discovery, 1,4-disubstituted-1,2,3-triazoles have become important in medicinal chemistry due to the simultaneous discovery by Sharpless, Fokin, and Meldal of a perfect click 1,3-dipolar cycloaddition reaction between azides and alkynes catalyzed by copper salts. Because of their chemical features, these triazoles are proposed to be aggressive pharmacophores that participate in drug-receptor interactions while maintaining an excellent chemical and metabolic profile. Surprisingly, no virtual libraries of 1,4-disubstituted-1,2,3-triazoles have been generated for the systematic investigation of the click-chemical space. In this manuscript, a database of triazoles called ZINClick is generated from literature-reported alkynes and azides that can be synthesized within three steps from commercially available products. This combinatorial database contains over 16 million 1,4-disubstituted-1,2,3-triazoles that are easily synthesizable, new, and patentable! The structural diversity of ZINClick ( http://www.symech.it/ZINClick ) will be explored. ZINClick will also be compared to other available databases, and its application during the design of novel bioactive molecules containing triazole nuclei will be discussed.",2014-01-31 +25387525,Further improvements to linear mixed models for genome-wide association studies.,"We examine improvements to the linear mixed model (LMM) that better correct for population structure and family relatedness in genome-wide association studies (GWAS). LMMs rely on the estimation of a genetic similarity matrix (GSM), which encodes the pairwise similarity between every two individuals in a cohort. These similarities are estimated from single nucleotide polymorphisms (SNPs) or other genetic variants. Traditionally, all available SNPs are used to estimate the GSM. In empirical studies across a wide range of synthetic and real data, we find that modifications to this approach improve GWAS performance as measured by type I error control and power. Specifically, when only population structure is present, a GSM constructed from SNPs that well predict the phenotype in combination with principal components as covariates controls type I error and yields more power than the traditional LMM. In any setting, with or without population structure or family relatedness, a GSM consisting of a mixture of two component GSMs, one constructed from all SNPs and another constructed from SNPs that well predict the phenotype again controls type I error and yields more power than the traditional LMM. Software implementing these improvements and the experimental comparisons are available at http://microsoft.com/science.",2014-11-12 +24464771,Anaesthetic regimens for day-procedure laparoscopic cholecystectomy.,"

Background

Day surgery involves admission of selected patients to hospital for a planned surgical procedure with the patients returning home on the same day. An anaesthetic regimen usually involves a combination of an anxiolytic, an induction agent, a maintenance agent, a method of maintaining the airway (laryngeal mask versus endotracheal intubation), and a muscle relaxant. The effect of anaesthesia may continue after the completion of surgery and can delay discharge. Various regimens of anaesthesia have been suggested for day-procedure laparoscopic cholecystectomy.

Objectives

To compare the benefits and harms of different anaesthetic regimens (risks of mortality and morbidity, measures of recovery after surgery) in patients undergoing day-procedure laparoscopic cholecystectomy.

Search methods

We searched the Cochrane Central Register of Controlled Trials (CENTRAL) in The Cochrane Library (Issue 10, 2013), MEDLINE (PubMed) (1987 to November 2013), EMBASE (OvidSP) (1987 to November 2013), Science Citation Index Expanded (ISI Web of Knowledge) (1987 to November 2013), LILACS (Virtual Health Library) (1987 to November 2013), metaRegister of Controlled Trials (http://www.controlled-trials.com/mrct/) (November 2013), World Health Organization (WHO) International Clinical Trials Registry Platform (ICTRP) portal (November 2013), and ClinicalTrials.gov (November 2013).

Selection criteria

We included randomized clinical trials comparing different anaesthetic regimens during elective day-procedure laparoscopic cholecystectomy (irrespective of language or publication status).

Data collection and analysis

Two authors independently assessed trials for inclusion and independently extracted the data. We calculated the risk ratio, rate ratio or mean difference with 95% confidence intervals based on intention-to-treat or available data analysis.

Main results

We included 11 trials involving 1069 participants at low anaesthetic risk. The sample size varied from 40 to 300 participants. We included 23 comparisons. All trials were at a high risk of bias. We were unable to perform a meta-analysis because there were no two trials involving the same comparison. Primary outcomes included perioperative mortality, serious morbidity and proportion of patients who were discharged on the same day. There were no perioperative deaths or serious adverse events in either group in the only trial that reported this information (0/60). There was no clear evidence of a difference in the proportion of patients who were discharged on the same day between any of the comparisons. Overall, 472/554 patients (85%) included in this review were discharged as day-procedure laparoscopic cholecystectomy patients. Secondary outcomes included hospital readmissions, health-related quality of life, pain, return to activity and return to work. There was no clear evidence of a difference in hospital readmissions within 30 days in the only comparison in which this outcome was reported. One readmission was reported in the 60 patients (2%) in whom this outcome was assessed. Quality of life was not reported in any of the trials. There was no clear evidence of a difference in the pain intensity, measured by a visual analogue scale, between comparators in the only trial which reported the pain intensity at between four and eight hours after surgery. Times to return to activity and return to work were not reported in any of the trials.

Authors' conclusions

There is currently insufficient evidence to conclude that one anaesthetic regimen for day-procedure laparoscopic cholecystectomy is to be preferred over another. However, the data are sparse (that is, there were few trials under each comparison and the trials had few participants) and further well designed randomized trials at low risk of bias and which are powered to measure differences in clinically important outcomes are necessary to determine the optimal anaesthetic regimen for day-procedure laparoscopic cholecystectomy, one of the commonest procedures performed in the western world.",2014-01-24 +22085524,POPISK: T-cell reactivity prediction using support vector machines and string kernels.,"

Background

Accurate prediction of peptide immunogenicity and characterization of relation between peptide sequences and peptide immunogenicity will be greatly helpful for vaccine designs and understanding of the immune system. In contrast to the prediction of antigen processing and presentation pathway, the prediction of subsequent T-cell reactivity is a much harder topic. Previous studies of identifying T-cell receptor (TCR) recognition positions were based on small-scale analyses using only a few peptides and concluded different recognition positions such as positions 4, 6 and 8 of peptides with length 9. Large-scale analyses are necessary to better characterize the effect of peptide sequence variations on T-cell reactivity and design predictors of a peptide's T-cell reactivity (and thus immunogenicity). The identification and characterization of important positions influencing T-cell reactivity will provide insights into the underlying mechanism of immunogenicity.

Results

This work establishes a large dataset by collecting immunogenicity data from three major immunology databases. In order to consider the effect of MHC restriction, peptides are classified by their associated MHC alleles. Subsequently, a computational method (named POPISK) using support vector machine with a weighted degree string kernel is proposed to predict T-cell reactivity and identify important recognition positions. POPISK yields a mean 10-fold cross-validation accuracy of 68% in predicting T-cell reactivity of HLA-A2-binding peptides. POPISK is capable of predicting immunogenicity with scores that can also correctly predict the change in T-cell reactivity related to point mutations in epitopes reported in previous studies using crystal structures. Thorough analyses of the prediction results identify the important positions 4, 6, 8 and 9, and yield insights into the molecular basis for TCR recognition. Finally, we relate this finding to physicochemical properties and structural features of the MHC-peptide-TCR interaction.

Conclusions

A computational method POPISK is proposed to predict immunogenicity with scores which are useful for predicting immunogenicity changes made by single-residue modifications. The web server of POPISK is freely available at http://iclab.life.nctu.edu.tw/POPISK.",2011-11-15 +25171961,Systematic discovery of cofactor motifs from ChIP-seq data by SIOMICS.,"Understanding transcriptional regulatory elements and particularly the transcription factor binding sites represents a significant challenge in computational biology. The chromatin immunoprecipitation followed by massive parallel sequencing (ChIP-seq) experiments provide an unprecedented opportunity to study transcription factor binding sites on the genome-wide scale. Here we describe a recently developed tool, SIOMICS, to systematically discover motifs and binding sites of transcription factors and their cofactors from ChIP-seq data. Unlike other tools, SIOMICS explores the co-binding properties of multiple transcription factors in short regions to predict motifs and binding sites. We have previously shown that the original SIOMICS method predicts motifs and binding sites of more cofactors in more accurate and time-effective ways than two popular methods. In this paper, we present the extended SIOMICS method, SIOMICS_Extension, and demonstrate its usage for systematic discovery of cofactor motifs and binding sites. The SIOMICS tool, including SIOMICS and SIOMICS_Extension, are available at http://hulab.ucf.edu/research/projects/SIOMICS/SIOMICS.html.",2014-08-27 +22909347,Find pairs: the module for protein quantification of the PeakQuant software suite.,"Accurate quantification of proteins is one of the major tasks in current proteomics research. To address this issue, a wide range of stable isotope labeling techniques have been developed, allowing one to quantitatively study thousands of proteins by means of mass spectrometry. In this article, the FindPairs module of the PeakQuant software suite is detailed. It facilitates the automatic determination of protein abundance ratios based on the automated analysis of stable isotope-coded mass spectrometric data. Furthermore, it implements statistical methods to determine outliers due to biological as well as technical variance of proteome data obtained in replicate experiments. This provides an important means to evaluate the significance in obtained protein expression data. For demonstrating the high applicability of FindPairs, we focused on the quantitative analysis of proteome data acquired in (14)N/(15)N labeling experiments. We further provide a comprehensive overview of the features of the FindPairs software, and compare these with existing quantification packages. The software presented here supports a wide range of proteomics applications, allowing one to quantitatively assess data derived from different stable isotope labeling approaches, such as (14)N/(15)N labeling, SILAC, and iTRAQ. The software is publicly available at http://www.medizinisches-proteom-center.de/software and free for academic use.",2012-08-21 +24099000,Computational approaches for discovery of common immunomodulators in fungal infections: towards broad-spectrum immunotherapeutic interventions.,"

Background

Fungi are the second most abundant type of human pathogens. Invasive fungal pathogens are leading causes of life-threatening infections in clinical settings. Toxicity to the host and drug-resistance are two major deleterious issues associated with existing antifungal agents. Increasing a host's tolerance and/or immunity to fungal pathogens has potential to alleviate these problems. A host's tolerance may be improved by modulating the immune system such that it responds more rapidly and robustly in all facets, ranging from the recognition of pathogens to their clearance from the host. An understanding of biological processes and genes that are perturbed during attempted fungal exposure, colonization, and/or invasion will help guide the identification of endogenous immunomodulators and/or small molecules that activate host-immune responses such as specialized adjuvants.

Results

In this study, we present computational techniques and approaches using publicly available transcriptional data sets, to predict immunomodulators that may act against multiple fungal pathogens. Our study analyzed data sets derived from host cells exposed to five fungal pathogens, namely, Alternaria alternata, Aspergillus fumigatus, Candida albicans, Pneumocystis jirovecii, and Stachybotrys chartarum. We observed statistically significant associations between host responses to A. fumigatus and C. albicans. Our analysis identified biological processes that were consistently perturbed by these two pathogens. These processes contained both immune response-inducing genes such as MALT1, SERPINE1, ICAM1, and IL8, and immune response-repressing genes such as DUSP8, DUSP6, and SPRED2. We hypothesize that these genes belong to a pool of common immunomodulators that can potentially be activated or suppressed (agonized or antagonized) in order to render the host more tolerant to infections caused by A. fumigatus and C. albicans.

Conclusions

Our computational approaches and methodologies described here can now be applied to newly generated or expanded data sets for further elucidation of additional drug targets. Moreover, identified immunomodulators may be used to generate experimentally testable hypotheses that could help in the discovery of broad-spectrum immunotherapeutic interventions. All of our results are available at the following supplementary website: http://bioinformatics.cs.vt.edu/~murali/supplements/2013-kidane-bmc.",2013-10-07 +23943636,DRAW+SneakPeek: analysis workflow and quality metric management for DNA-seq experiments.,"

Summary

We report our new DRAW+SneakPeek software for DNA-seq analysis. DNA resequencing analysis workflow (DRAW) automates the workflow of processing raw sequence reads including quality control, read alignment and variant calling on high-performance computing facilities such as Amazon elastic compute cloud. SneakPeek provides an effective interface for reviewing dozens of quality metrics reported by DRAW, so users can assess the quality of data and diagnose problems in their sequencing procedures. Both DRAW and SneakPeek are freely available under the MIT license, and are available as Amazon machine images to be used directly on Amazon cloud with minimal installation.

Availability

DRAW+SneakPeek is released under the MIT license and is available for academic and nonprofit use for free. The information about source code, Amazon machine images and instructions on how to install and run DRAW+SneakPeek locally and on Amazon elastic compute cloud is available at the National Institute on Aging Genetics of Alzheimer's Disease Data Storage Site (http://www.niagads.org/) and Wang lab Web site (http://wanglab.pcbi.upenn.edu/).",2013-08-13 +23813010,Learning subgroup-specific regulatory interactions and regulator independence with PARADIGM.,"

Unlabelled

High-dimensional '-omics' profiling provides a detailed molecular view of individual cancers; however, understanding the mechanisms by which tumors evade cellular defenses requires deep knowledge of the underlying cellular pathways within each cancer sample. We extended the PARADIGM algorithm (Vaske et al., 2010, Bioinformatics, 26, i237-i245), a pathway analysis method for combining multiple '-omics' data types, to learn the strength and direction of 9139 gene and protein interactions curated from the literature. Using genomic and mRNA expression data from 1936 samples in The Cancer Genome Atlas (TCGA) cohort, we learned interactions that provided support for and relative strength of 7138 (78%) of the curated links. Gene set enrichment found that genes involved in the strongest interactions were significantly enriched for transcriptional regulation, apoptosis, cell cycle regulation and response to tumor cells. Within the TCGA breast cancer cohort, we assessed different interaction strengths between breast cancer subtypes, and found interactions associated with the MYC pathway and the ER alpha network to be among the most differential between basal and luminal A subtypes. PARADIGM with the Naive Bayesian assumption produced gene activity predictions that, when clustered, found groups of patients with better separation in survival than both the original version of PARADIGM and a version without the assumption. We found that this Naive Bayes assumption was valid for the vast majority of co-regulators, indicating that most co-regulators act independently on their shared target.

Availability

http://paradigm.five3genomics.com.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +23815474,VIRGO: visualization of A-to-I RNA editing sites in genomic sequences.,"

Background

RNA Editing is a type of post-transcriptional modification that takes place in the eukaryotes. It alters the sequence of primary RNA transcripts by deleting, inserting or modifying residues. Several forms of RNA editing have been discovered including A-to-I, C-to-U, U-to-C and G-to-A. In recent years, the application of global approaches to the study of A-to-I editing, including high throughput sequencing, has led to important advances. However, in spite of enormous efforts, the real biological mechanism underlying this phenomenon remains unknown.

Description

In this work, we present VIRGO (http://atlas.dmi.unict.it/virgo/), a web-based tool that maps Ato-G mismatches between genomic and EST sequences as candidate A-to-I editing sites. VIRGO is built on top of a knowledge-base integrating information of genes from UCSC, EST of NCBI, SNPs, DARNED, and Next Generations Sequencing data. The tool is equipped with a user-friendly interface allowing users to analyze genomic sequences in order to identify candidate A-to-I editing sites.

Conclusions

VIRGO is a powerful tool allowing a systematic identification of putative A-to-I editing sites in genomic sequences. The integration of NGS data allows the computation of p-values and adjusted p-values to measure the mapped editing sites confidence. The whole knowledge base is available for download and will be continuously updated as new NGS data becomes available.",2013-04-22 +23281970,"Integration of interactive, multi-scale network navigation approach with Cytoscape for functional genomics in the big data era.","

Background

The overwhelming amount of network data in functional genomics is making its visualization cluttered with jumbling nodes and edges. Such cluttered network visualization, which is known as ""hair-balls"", is significantly hindering data interpretation and analysis of researchers. Effective navigation approaches that can always abstract network data properly and present them insightfully are hence required, to help researchers interpret the data and acquire knowledge efficiently. Cytoscape is a de facto standard platform for network visualization and analysis, which has many users around the world. Apart from its core sophisticated features, it easily allows for extension of the functionalities by loading extra plug-ins.

Results

We developed NaviClusterCS, which enables researchers to interactively navigate large biological networks of ~100,000 nodes in a ""Google Maps-like"" manner in the Cytoscape environment. NaviClusterCS rapidly and automatically identifies biologically meaningful clusters in large networks, e.g., proteins sharing similar biological functions in protein-protein interaction networks. Then, it displays not all nodes but only preferable numbers of those clusters at any magnification to avoid creating the cluttered network visualization, while its zooming and re-centering functions still enable researchers to interactively analyze the networks in detail. Its application to a real Arabidopsis co-expression network dataset illustrated a practical use of the tool for suggesting knowledge that is hidden in large biological networks and difficult to be obtained using other visualization methods.

Conclusions

NaviClusterCS provides interactive and multi-scale network navigation to a wide range of biologists in the big data era, via the de facto standard platform for network visualization. It can be freely downloaded at http://navicluster.cb.k.u-tokyo.ac.jp/cs/ and installed as a plug-in of Cytoscape.",2012-12-13 +26033131,The role of noninvasive ventilation in the management and mitigation of exacerbations and hospital admissions/readmissions for the patient with moderate to severe COPD (multimedia activity).,"As seen in this CME online activity (available at http://journal.cme.chestnet.org/home-niv-copd), COPD is a common and debilitating disease and is currently the third leading cause of death in the United States. The role of noninvasive ventilation (NIV) in the management of severe, hypercapnic COPD has been controversial. However, it was concluded that current data would support the following recommendations. Patients with COPD with a waking Paco2 > 50 to 52 mm Hg, an overnight Paco2 > 55 mm Hg, or both who are symptomatic and compliant with other therapies should be eligible for NIV. In addition, multiple previous hospital admissions for COPD exacerbation, requiring noninvasive/invasive mechanical ventilation, strongly suggest a need for chronic NIV. Patients with COPD with a BMI > 30 kg/m2 respond particularly well to this therapy. When the decision is made to start NIV, this treatment is probably best initiated during a short hospitalization, although this can be accomplished in the clinic, home, or sleep laboratory if well-trained clinicians are available. Newer modes of NIV such as volume-assured pressure support, particularly with autotitrating expiratory positive airway pressure (EPAP), may create the opportunity for home NIV initiation easier for less experienced physicians. Regardless of the mode selected, inspiratory pressures must be in the 20 to 25 cm H2O range to meaningfully increase tidal volume, reduce work of breathing, and, importantly, reduce waking arterial Paco2. EPAP is currently set at 4 to 5 cm H2O, although future technologies may allow this to be individualized to maximally reduce auto-positive end expiratory pressure. The NIV device should have a backup rate although it is controversial as to whether this should be set at a high (18-20 breaths/min) vs a low (8-10 breaths/min) rate. The proper use of NIV in appropriately chosen patients with COPD can improve quality of life and increase survival. Ongoing studies are assessing if the frequency of future hospitalizations can be reduced with NIV. Thus, NIV should be strongly considered in any patients with COPD meeting the criteria described here.",2015-06-01 +24489849,Improving predictions of protein-protein interfaces by combining amino acid-specific classifiers based on structural and physicochemical descriptors with their weighted neighbor averages.,"Protein-protein interactions are involved in nearly all regulatory processes in the cell and are considered one of the most important issues in molecular biology and pharmaceutical sciences but are still not fully understood. Structural and computational biology contributed greatly to the elucidation of the mechanism of protein interactions. In this paper, we present a collection of the physicochemical and structural characteristics that distinguish interface-forming residues (IFR) from free surface residues (FSR). We formulated a linear discriminative analysis (LDA) classifier to assess whether chosen descriptors from the BlueStar STING database (http://www.cbi.cnptia.embrapa.br/SMS/) are suitable for such a task. Receiver operating characteristic (ROC) analysis indicates that the particular physicochemical and structural descriptors used for building the linear classifier perform much better than a random classifier and in fact, successfully outperform some of the previously published procedures, whose performance indicators were recently compared by other research groups. The results presented here show that the selected set of descriptors can be utilized to predict IFRs, even when homologue proteins are missing (particularly important for orphan proteins where no homologue is available for comparative analysis/indication) or, when certain conformational changes accompany interface formation. The development of amino acid type specific classifiers is shown to increase IFR classification performance. Also, we found that the addition of an amino acid conservation attribute did not improve the classification prediction. This result indicates that the increase in predictive power associated with amino acid conservation is exhausted by adequate use of an extensive list of independent physicochemical and structural parameters that, by themselves, fully describe the nano-environment at protein-protein interfaces. The IFR classifier developed in this study is now integrated into the BlueStar STING suite of programs. Consequently, the prediction of protein-protein interfaces for all proteins available in the PDB is possible through STING_interfaces module, accessible at the following website: (http://www.cbi.cnptia.embrapa.br/SMS/predictions/index.html).",2014-01-28 +23299630,"Some case studies on application of ""r(m)2"" metrics for judging quality of quantitative structure-activity relationship predictions: emphasis on scaling of response data.","Quantitative structure-activity relationship (QSAR) techniques have found wide application in the fields of drug design, property modeling, and toxicity prediction of untested chemicals. A rigorous validation of the developed models plays the key role for their successful application in prediction for new compounds. The r(m)(2) metrics introduced by Roy et al. have been extensively used by different research groups for validation of regression-based QSAR models. This concept has been further advanced here with introduction of scaling of response data prior to computation of r(m)(2). Further, a web application (accessible from http://aptsoftware.co.in/rmsquare/ and http://203.200.173.43:8080/rmsquare/) for calculation of the r(m)(2) metrics has been introduced here. The present study reports that the web application can be easily used for computation of r(m)(2) metrics provided observed and QSAR-predicted data for a set of compounds are available. Further, scaling of response data is recommended prior to r(m)(2) calculation.",2013-01-08 +25884689,3-dimensional digital reconstruction of the murine coronary system for the evaluation of chronic allograft vasculopathy.,"

Background

Chronic allograft vasculopathy (CAV) is a major mechanism of graft failure of transplanted organs in humans. Morphometric analysis of coronary arteries enables the quantitation of CAV in mouse models of heart transplantation. However, conventional histological procedures using single 2-dimensional sections limit the accuracy of CAV quantification. The aim of this study is to improve the accuracy of CAV quantification by reconstructing the murine coronary system in 3-dimensions (3D) and using virtual reconstruction and volumetric analysis to precisely assess neointimal thickness.

Methods

Mouse tissue samples, native heart and transplanted hearts with chronic allograft vasculopathy, were collected and analyzed. Paraffin embedded samples were serially sectioned, stained and digitized using whole slide digital imaging techniques under normal and ultraviolet lighting. Sophisticated software tools were used to generate and manipulate 3D reconstructions of the major coronary arteries and branches.

Results

The 3D reconstruction provides not only accurate measurements but also exact volumetric data of vascular lesions. This virtual coronary arteriography demonstrates that the vasculopathy lesions in this model are localized to the proximal coronary segments. In addition, virtual rotation and volumetric analysis enabled more precise measurements of CAV than single, randomly oriented histologic sections, and offer an improved readout for this important experimental model.

Conclusions

We believe 3D reconstruction of 2D histological slides will provide new insights into pathological mechanisms in which structural abnormalities play a role in the development of a disease. The techniques we describe are applicable to the analysis of arteries, veins, bronchioles and similar sized structures in a variety of tissue types and disease model systems.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/3772457541477230 .",2015-03-28 +24618474,Characterization of p38 MAPK isoforms for drug resistance study using systems biology approach.,"

Motivation

p38 mitogen-activated protein kinase activation plays an important role in resistance to chemotherapeutic cytotoxic drugs in treating multiple myeloma (MM). However, how the p38 mitogen-activated protein kinase signaling pathway is involved in drug resistance, in particular the roles that the various p38 isoforms play, remains largely unknown.

Method

To explore the underlying mechanisms, we developed a novel systems biology approach by integrating liquid chromatography-mass spectrometry and reverse phase protein array data from human MM cell lines with computational pathway models in which the unknown parameters were inferred using a proposed novel algorithm called modularized factor graph.

Results

New mechanisms predicted by our models suggest that combined activation of various p38 isoforms may result in drug resistance in MM via regulating the related pathways including extracellular signal-regulated kinase (ERK) pathway and NFкB pathway. ERK pathway regulating cell growth is synergistically regulated by p38δ isoform, whereas nuclear factor kappa B (NFкB) pathway regulating cell apoptosis is synergistically regulated by p38α isoform. This finding that p38δ isoform promotes the phosphorylation of ERK1/2 in MM cells treated with bortezomib was validated by western blotting. Based on the predicted mechanisms, we further screened drug combinations in silico and found that a promising drug combination targeting ERK1/2 and NFκB might reduce the effects of drug resistance in MM cells. This study provides a framework of a systems biology approach to studying drug resistance and drug combination selection.

Availability and implementation

RPPA experimental Data and Matlab source codes of modularized factor graph for parameter estimation are freely available online at http://ctsb.is.wfubmc.edu/publications/modularized-factor-graph.php.",2014-03-10 +25701570,Population-scale three-dimensional reconstruction and quantitative profiling of microglia arbors.,"

Motivation

The arbor morphologies of brain microglia are important indicators of cell activation. This article fills the need for accurate, robust, adaptive and scalable methods for reconstructing 3-D microglial arbors and quantitatively mapping microglia activation states over extended brain tissue regions.

Results

Thick rat brain sections (100-300 µm) were multiplex immunolabeled for IBA1 and Hoechst, and imaged by step-and-image confocal microscopy with automated 3-D image mosaicing, producing seamless images of extended brain regions (e.g. 5903 × 9874 × 229 voxels). An over-complete dictionary-based model was learned for the image-specific local structure of microglial processes. The microglial arbors were reconstructed seamlessly using an automated and scalable algorithm that exploits microglia-specific constraints. This method detected 80.1 and 92.8% more centered arbor points, and 53.5 and 55.5% fewer spurious points than existing vesselness and LoG-based methods, respectively, and the traces were 13.1 and 15.5% more accurate based on the DIADEM metric. The arbor morphologies were quantified using Scorcioni's L-measure. Coifman's harmonic co-clustering revealed four morphologically distinct classes that concord with known microglia activation patterns. This enabled us to map spatial distributions of microglial activation and cell abundances.

Availability and implementation

Experimental protocols, sample datasets, scalable open-source multi-threaded software implementation (C++, MATLAB) in the electronic supplement, and website (www.farsight-toolkit.org). http://www.farsight-toolkit.org/wiki/Population-scale_Three-dimensional_Reconstruction_and_Quanti-tative_Profiling_of_Microglia_Arbors

Contact

broysam@central.uh.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-19 +21372188,Multilocus sequence typing of Mycoplasma agalactiae.,"Mycoplasma agalactiae is the main cause of contagious agalactia, a serious disease of sheep and goats, which has major clinical and economic impacts. We have developed a multilocus sequence typing (MLST) scheme using the sequenced genomes of the M. agalactiae strains PG2 and 5632. An MLST scheme based on the genes gltX, metS, gyrB, tufA and dnaA was designed and in total 3468 bp of sequence were analysed for each strain. MLST offers a highly discriminatory typing method for M. agalactiae and was capable of subdividing 53 strains into 17 distinct sequence types, largely according to geographical origin. MLST detected unexpected diversity in recent isolates from Spain, identifying two novel outliers, and enabled typing of novel Mongolian isolates for the first time. Genetic diversity in the sequenced regions was largely due to mutation, with recombination playing a much smaller role. A web-accessible database has been set up for this MLST scheme for M. agalactiae: http://pubmlst.org/magalactiae/. MLST offers a robust, objective molecular epidemiological tool for M. agalactiae that that enables interlaboratory comparison of data.",2011-03-03 +25691081,Anatomy of the superficial layer of superficial fascia around the nipple-areola complex.,"The periareolar incision is the preferred method for mammaplasty because of the minimal scarring, and suturing of the superficial fascial system (SFS) is useful for avoiding hypertrophic scarring. In this report, we describe the anatomical location of the SFS around the nipple-areolar complex (NAC) and its histological structure.To define the location of the SFS, 20 healthy women were assessed by ultrasonography, and sections of the NAC of 10 female cadavers were examined under a light microscope.Ultrasonographic examination of sagittal sections of the breast revealed a hyperdense line immediately beneath the skin, which ran parallel with the skin and turned under the NAC. At the turning point, the line thickened to an average of 3.09 mm. The distance between the nipple and the thickest point of the hyperdense line was 10.14 mm on average. Histological structures of the line were collagen and elastic fibers containing smooth muscles that were connected to the dermis and adipose tissue. At the turning point, nerves, blood vessels, and mammary ducts were irregularly observed in the area of collagen and elastic fibers. These structures were intermingled, and the fiber bundle was very thick.The thickest area of the turning point is an area of the superficial layer of superficial fascia, which is a key structure around the NAC. The detailed anatomical data shown in our study provide good morphological landmarks for the closure of periareolar incisions.This journal requires that authors assign a level of evidence to each submission to which Evidence-Based Medicine rankings are applicable. This excludes Review Articles, Book Reviews, and manuscripts that concern Basic Science, Animal Studies, Cadaver Studies, and Experimental Studies. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266.",2015-02-18 +23703209,DNAshape: a method for the high-throughput prediction of DNA structural features on a genomic scale.,"We present a method and web server for predicting DNA structural features in a high-throughput (HT) manner for massive sequence data. This approach provides the framework for the integration of DNA sequence and shape analyses in genome-wide studies. The HT methodology uses a sliding-window approach to mine DNA structural information obtained from Monte Carlo simulations. It requires only nucleotide sequence as input and instantly predicts multiple structural features of DNA (minor groove width, roll, propeller twist and helix twist). The results of rigorous validations of the HT predictions based on DNA structures solved by X-ray crystallography and NMR spectroscopy, hydroxyl radical cleavage data, statistical analysis and cross-validation, and molecular dynamics simulations provide strong confidence in this approach. The DNAshape web server is freely available at http://rohslab.cmb.usc.edu/DNAshape/.",2013-05-22 +24475049,istar: a web platform for large-scale protein-ligand docking.,"Protein-ligand docking is a key computational method in the design of starting points for the drug discovery process. We are motivated by the desire to automate large-scale docking using our popular docking engine idock and thus have developed a publicly-accessible web platform called istar. Without tedious software installation, users can submit jobs using our website. Our istar website supports 1) filtering ligands by desired molecular properties and previewing the number of ligands to dock, 2) monitoring job progress in real time, and 3) visualizing ligand conformations and outputting free energy and ligand efficiency predicted by idock, binding affinity predicted by RF-Score, putative hydrogen bonds, and supplier information for easy purchase, three useful features commonly lacked on other online docking platforms like DOCK Blaster or iScreen. We have collected 17,224,424 ligands from the All Clean subset of the ZINC database, and revamped our docking engine idock to version 2.0, further improving docking speed and accuracy, and integrating RF-Score as an alternative rescoring function. To compare idock 2.0 with the state-of-the-art AutoDock Vina 1.1.2, we have carried out a rescoring benchmark and a redocking benchmark on the 2,897 and 343 protein-ligand complexes of PDBbind v2012 refined set and CSAR NRC HiQ Set 24Sept2010 respectively, and an execution time benchmark on 12 diverse proteins and 3,000 ligands of different molecular weight. Results show that, under various scenarios, idock achieves comparable success rates while outperforming AutoDock Vina in terms of docking speed by at least 8.69 times and at most 37.51 times. When evaluated on the PDBbind v2012 core set, our istar platform combining with RF-Score manages to reproduce Pearson's correlation coefficient and Spearman's correlation coefficient of as high as 0.855 and 0.859 respectively between the experimental binding affinity and the predicted binding affinity of the docked conformation. istar is freely available at http://istar.cse.cuhk.edu.hk/idock.",2014-01-24 +24356774,Validation of metal-binding sites in macromolecular structures with the CheckMyMetal web server.,"Metals have vital roles in both the mechanism and architecture of biological macromolecules. Yet structures of metal-containing macromolecules in which metals are misidentified and/or suboptimally modeled are abundant in the Protein Data Bank (PDB). This shows the need for a diagnostic tool to identify and correct such modeling problems with metal-binding environments. The CheckMyMetal (CMM) web server (http://csgid.org/csgid/metal_sites/) is a sophisticated, user-friendly web-based method to evaluate metal-binding sites in macromolecular structures using parameters derived from 7,350 metal-binding sites observed in a benchmark data set of 2,304 high-resolution crystal structures. The protocol outlines how the CMM server can be used to detect geometric and other irregularities in the structures of metal-binding sites, as well as how it can alert researchers to potential errors in metal assignment. The protocol also gives practical guidelines for correcting problematic sites by modifying the metal-binding environment and/or redefining metal identity in the PDB file. Several examples where this has led to meaningful results are described in the ANTICIPATED RESULTS section. CMM was designed for a broad audience--biomedical researchers studying metal-containing proteins and nucleic acids--but it is equally well suited for structural biologists validating new structures during modeling or refinement. The CMM server takes the coordinates of a metal-containing macromolecule structure in the PDB format as input and responds within a few seconds for a typical protein structure with 2-5 metal sites and a few hundred amino acids.",2013-12-19 +23599501,Network-guided sparse regression modeling for detection of gene-by-gene interactions.,"

Motivation

Genetic variants identified by genome-wide association studies to date explain only a small fraction of total heritability. Gene-by-gene interaction is one important potential source of unexplained total heritability. We propose a novel approach to detect such interactions that uses penalized regression and sparse estimation principles, and incorporates outside biological knowledge through a network-based penalty.

Results

We tested our new method on simulated and real data. Simulation showed that with reasonable outside biological knowledge, our method performs noticeably better than stage-wise strategies (i.e. selecting main effects first, and interactions second, from those main effects selected) in finding true interactions, especially when the marginal strength of main effects is weak. We applied our method to Framingham Heart Study data on total plasma immunoglobulin E (IgE) concentrations and found a number of interactions among different classes of human leukocyte antigen genes that may interact to influence the risk of developing IgE dysregulation and allergy.

Availability

The proposed method is implemented in R and available at http://math.bu.edu/people/kolaczyk/software.html.

Contact

chenlu@bu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-18 +23311571,"New data, strategies, and insights for Listeria monocytogenes dose-response models: summary of an interagency workshop, 2011.","Listeria monocytogenes is a leading cause of hospitalization, fetal loss, and death due to foodborne illnesses in the United States. A quantitative assessment of the relative risk of listeriosis associated with the consumption of 23 selected categories of ready-to-eat foods, published by the U.S. Department of Health and Human Services and the U.S. Department of Agriculture in 2003, has been instrumental in identifying the food products and practices that pose the greatest listeriosis risk and has guided the evaluation of potential intervention strategies. Dose-response models, which quantify the relationship between an exposure dose and the probability of adverse health outcomes, were essential components of the risk assessment. However, because of data gaps and limitations in the available data and modeling approaches, considerable uncertainty existed. Since publication of the risk assessment, new data have become available for modeling L. monocytogenes dose-response. At the same time, recent advances in the understanding of L. monocytogenes pathophysiology and strain diversity have warranted a critical reevaluation of the published dose-response models. To discuss strategies for modeling L. monocytogenes dose-response, the Interagency Risk Assessment Consortium (IRAC) and the Joint Institute for Food Safety and Applied Nutrition (JIFSAN) held a scientific workshop in 2011 (details available at http://foodrisk.org/irac/events/). The main findings of the workshop and the most current and relevant data identified during the workshop are summarized and presented in the context of L. monocytogenes dose-response. This article also discusses new insights on dose-response modeling for L. monocytogenes and research opportunities to meet future needs.",2013-01-11 +22530745,"ToF-SIMS depth profiling of cells: z-correction, 3D imaging, and sputter rate of individual NIH/3T3 fibroblasts.","Proper display of three-dimensional time-of-flight secondary ion mass spectrometry (ToF-SIMS) imaging data of complex, nonflat samples requires a correction of the data in the z-direction. Inaccuracies in displaying three-dimensional ToF-SIMS data arise from projecting data from a nonflat surface onto a 2D image plane, as well as possible variations in the sputter rate of the sample being probed. The current study builds on previous studies by creating software written in Matlab, the ZCorrectorGUI (available at http://mvsa.nb.uw.edu/), to apply the z-correction to entire 3D data sets. Three-dimensional image data sets were acquired from NIH/3T3 fibroblasts by collecting ToF-SIMS images, using a dual beam approach (25 keV Bi(3)(+) for analysis cycles and 20 keV C(60)(2+) for sputter cycles). The entire data cube was then corrected by using the new ZCorrectorGUI software, producing accurate chemical information from single cells in 3D. For the first time, a three-dimensional corrected view of a lipid-rich subcellular region, possibly the nuclear membrane, is presented. Additionally, the key assumption of a constant sputter rate throughout the data acquisition was tested by using ToF-SIMS and atomic force microscopy (AFM) analysis of the same cells. For the dried NIH/3T3 fibroblasts examined in this study, the sputter rate was found to not change appreciably in x, y, or z, and the cellular material was sputtered at a rate of approximately 10 nm per 1.25 × 10(13) ions C(60)(2+)/cm(2).",2012-05-11 +25378303,iBeetle-Base: a database for RNAi phenotypes in the red flour beetle Tribolium castaneum.,"The iBeetle-Base (http://ibeetle-base.uni-goettingen.de) makes available annotations of RNAi phenotypes, which were gathered in a large scale RNAi screen in the red flour beetle Tribolium castaneum (iBeetle screen). In addition, it provides access to sequence information and links for all Tribolium castaneum genes. The iBeetle-Base contains the annotations of phenotypes of several thousands of genes knocked down during embryonic and metamorphic epidermis and muscle development in addition to phenotypes linked to oogenesis and stink gland biology. The phenotypes are described according to the EQM (entity, quality, modifier) system using controlled vocabularies and the Tribolium morphological ontology (TrOn). Furthermore, images linked to the respective annotations are provided. The data are searchable either for specific phenotypes using a complex 'search for morphological defects' or a 'quick search' for gene names and IDs. The red flour beetle Tribolium castaneum has become an important model system for insect functional genetics and is a representative of the most species rich taxon, the Coleoptera, which comprise several devastating pests. It is used for studying insect typical development, the evolution of development and for research on metabolism and pest control. Besides Drosophila, Tribolium is the first insect model organism where large scale unbiased screens have been performed.",2014-11-05 +24457040,RAD51 Gene 135G/C polymorphism and the risk of four types of common cancers: a meta-analysis.,"

Objectives

RAD51 gene plays an important role in the pathogenesis of squamous cell carcinoma of the head and neck (SCCHN), colorectal cancer, ovarian cancer and acute leukaemia. A number of studies assessed the association between RAD51 135G/C polymorphism and the risk of these cancers in different population. However, the results have been inconclusive. We performed a systematic meta-analysis to evaluate the association between RAD51 135G/C polymorphism and the risk of these four types of cancer.

Methods

Pubmed, Cochrane library and Chinese Biomedical Literature Database (CBM) were searched for case-control studies on RAD51 135G/C polymorphism and the risk of SCCHN, colorectal cancer, ovarian cancer and acute leukaemia published up to Oct 31, 2013. Odds ratios (ORs) with 95% confidence intervals (CIs) were used to assess the strength of association.

Results

A total of twenty-two published studies, with 6836 cases and 8507 controls were included. Overall, no significant association was found between RAD51 135G/C polymorphism and the risk of the four types of cancers (G/G vs. C/C: OR = 0.83, 95% CI: 0.43-1.59, P = 0.57). However, there was a significant association between this polymorphism and SCCHN risk in the subgroup analysis by cancer type (G/G vs. C/C: OR = 2.46, 95% CI: 1.08-5.61, P = 0.03).

Conclusion

The RAD51 135G/C polymorphism was associated with the risk of SCCHN.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1383180234106945.",2014-01-23 +24088188,V-Phaser 2: variant inference for viral populations.,"

Background

Massively parallel sequencing offers the possibility of revolutionizing the study of viral populations by providing ultra deep sequencing (tens to hundreds of thousand fold coverage) of complete viral genomes. However, differentiation of true low frequency variants from sequencing errors remains challenging.

Results

We developed a software package, V-Phaser 2, for inferring intrahost diversity within viral populations. This program adds three major new methodologies to the state of the art: a technique to efficiently utilize paired end read data for calling phased variants, a new strategy to represent and infer length polymorphisms, and an in line filter for erroneous calls arising from systematic sequencing artifacts. We have also heavily optimized memory and run time performance. This combination of algorithmic and technical advances allows V-Phaser 2 to fully utilize extremely deep paired end sequencing data (such as generated by Illumina sequencers) to accurately infer low frequency intrahost variants in viral populations in reasonable time on a standard desktop computer. V-Phaser 2 was validated and compared to both QuRe and the original V-Phaser on three datasets obtained from two viral populations: a mixture of eight known strains of West Nile Virus (WNV) sequenced on both 454 Titanium and Illumina MiSeq and a mixture of twenty-four known strains of WNV sequenced only on 454 Titanium. V-Phaser 2 outperformed the other two programs in both sensitivity and specificity while using more than five fold less time and memory.

Conclusions

We developed V-Phaser 2, a publicly available software tool (V-Phaser 2 can be accessed via: http://www.broadinstitute.org/scientific-community/science/projects/viral-genomics/v-phaser-2 and is freely available for academic use) that enables the efficient analysis of ultra-deep sequencing data produced by common next generation sequencing platforms for viral populations.",2013-10-03 +23046276,BirdsEyeView (BEV): graphical overviews of experimental data.,"

Background

Analyzing global experimental data can be tedious and time-consuming. Thus, helping biologists see results as quickly and easily as possible can facilitate biological research, and is the purpose of the software we describe.

Results

We present BirdsEyeView, a software system for visualizing experimental transcriptomic data using different views that users can switch among and compare. BirdsEyeView graphically maps data to three views: Cellular Map (currently a plant cell), Pathway Tree with dynamic mapping, and Gene Ontology http://www.geneontology.org Biological Processes and Molecular Functions. By displaying color-coded values for transcript levels across different views, BirdsEyeView can assist users in developing hypotheses about their experiment results.

Conclusions

BirdsEyeView is a software system available as a Java Webstart package for visualizing transcriptomic data in the context of different biological views to assist biologists in investigating experimental results. BirdsEyeView can be obtained from http://metnetdb.org/MetNet_BirdsEyeView.htm.",2012-09-11 +23704902,PeSV-Fisher: identification of somatic and non-somatic structural variants using next generation sequencing data.,"

Unlabelled

Next-generation sequencing technologies expedited research to develop efficient computational tools for the identification of structural variants (SVs) and their use to study human diseases. As deeper data is obtained, the existence of higher complexity SVs in some genomes becomes more evident, but the detection and definition of most of these complex rearrangements is still in its infancy. The full characterization of SVs is a key aspect for discovering their biological implications. Here we present a pipeline (PeSV-Fisher) for the detection of deletions, gains, intra- and inter-chromosomal translocations, and inversions, at very reasonable computational costs. We further provide comprehensive information on co-localization of SVs in the genome, a crucial aspect for studying their biological consequences. The algorithm uses a combination of methods based on paired-reads and read-depth strategies. PeSV-Fisher has been designed with the aim to facilitate identification of somatic variation, and, as such, it is capable of analysing two or more samples simultaneously, producing a list of non-shared variants between samples. We tested PeSV-Fisher on available sequencing data, and compared its behaviour to that of frequently deployed tools (BreakDancer and VariationHunter). We have also tested this algorithm on our own sequencing data, obtained from a tumour and a normal blood sample of a patient with chronic lymphocytic leukaemia, on which we have also validated the results by targeted re-sequencing of different kinds of predictions. This allowed us to determine confidence parameters that influence the reliability of breakpoint predictions.

Availability

PeSV-Fisher is available at http://gd.crg.eu/tools.",2013-05-21 +21349987,Reannotation of the genome sequence of Clostridium difficile strain 630.,"A regular update of genome annotations is a prerequisite step to help maintain the accuracy and relevance of the information they contain. Five years after the first publication of the complete genome sequence of Clostridium difficile strain 630, we manually reannotated each of the coding sequences (CDSs), using a high-level annotation platform. The functions of more than 500 genes annotated previously with putative functions were reannotated based on updated sequence similarities to proteins whose functions have been recently identified by experimental data from the literature. We also modified 222 CDS starts, detected 127 new CDSs and added the enzyme commission numbers, which were not supplied in the original annotation. In addition, an intensive project was undertaken to standardize the names of genes and gene products and thus harmonize as much as possible with the HAMAP project. The reannotation is stored in a relational database that will be available on the MicroScope web-based platform (https://www.genoscope.cns.fr/agc/microscope/mage/viewer.php?S_id=752&wwwpkgdb=a78e3466ad5db29aa8fe49e8812de8a7). The original submission stored in the (International Nucleotide Sequence Database Collaboration) INSDC nucleotide sequence databases was also updated.",2011-02-24 +24191069,Model-based clustering for RNA-seq data.,"

Motivation

RNA-seq technology has been widely adopted as an attractive alternative to microarray-based methods to study global gene expression. However, robust statistical tools to analyze these complex datasets are still lacking. By grouping genes with similar expression profiles across treatments, cluster analysis provides insight into gene functions and networks, and hence is an important technique for RNA-seq data analysis.

Results

In this manuscript, we derive clustering algorithms based on appropriate probability models for RNA-seq data. An expectation-maximization algorithm and another two stochastic versions of expectation-maximization algorithms are described. In addition, a strategy for initialization based on likelihood is proposed to improve the clustering algorithms. Moreover, we present a model-based hybrid-hierarchical clustering method to generate a tree structure that allows visualization of relationships among clusters as well as flexibility of choosing the number of clusters. Results from both simulation studies and analysis of a maize RNA-seq dataset show that our proposed methods provide better clustering results than alternative methods such as the K-means algorithm and hierarchical clustering methods that are not based on probability models.

Availability and implementation

An R package, MBCluster.Seq, has been developed to implement our proposed algorithms. This R package provides fast computation and is publicly available at http://www.r-project.org",2013-11-04 +23060617,Transcriptome assembly and isoform expression level estimation from biased RNA-Seq reads.,"

Motivation

RNA-Seq uses the high-throughput sequencing technology to identify and quantify transcriptome at an unprecedented high resolution and low cost. However, RNA-Seq reads are usually not uniformly distributed and biases in RNA-Seq data post great challenges in many applications including transcriptome assembly and the expression level estimation of genes or isoforms. Much effort has been made in the literature to calibrate the expression level estimation from biased RNA-Seq data, but the effect of biases on transcriptome assembly remains largely unexplored.

Results

Here, we propose a statistical framework for both transcriptome assembly and isoform expression level estimation from biased RNA-Seq data. Using a quasi-multinomial distribution model, our method is able to capture various types of RNA-Seq biases, including positional, sequencing and mappability biases. Our experimental results on simulated and real RNA-Seq datasets exhibit interesting effects of RNA-Seq biases on both transcriptome assembly and isoform expression level estimation. The advantage of our method is clearly shown in the experimental analysis by its high sensitivity and precision in transcriptome assembly and the high concordance of its estimated expression levels with quantitative reverse transcription-polymerase chain reaction data.

Availability

CEM is freely available at http://www.cs.ucr.edu/~liw/cem.html.

Contact

liw@cs.ucr.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-11 +21685128,"Deep small RNA sequencing from the nematode Ascaris reveals conservation, functional diversification, and novel developmental profiles.","Eukaryotic cells express several classes of small RNAs that regulate gene expression and ensure genome maintenance. Endogenous siRNAs (endo-siRNAs) and Piwi-interacting RNAs (piRNAs) mainly control gene and transposon expression in the germline, while microRNAs (miRNAs) generally function in post-transcriptional gene silencing in both somatic and germline cells. To provide an evolutionary and developmental perspective on small RNA pathways in nematodes, we identified and characterized known and novel small RNA classes through gametogenesis and embryo development in the parasitic nematode Ascaris suum and compared them with known small RNAs of Caenorhabditis elegans. piRNAs, Piwi-clade Argonautes, and other proteins associated with the piRNA pathway have been lost in Ascaris. miRNAs are synthesized immediately after fertilization in utero, before pronuclear fusion, and before the first cleavage of the zygote. This is the earliest expression of small RNAs ever described at a developmental stage long thought to be transcriptionally quiescent. A comparison of the two classes of Ascaris endo-siRNAs, 22G-RNAs and 26G-RNAs, to those in C. elegans, suggests great diversification and plasticity in the use of small RNA pathways during spermatogenesis in different nematodes. Our data reveal conserved characteristics of nematode small RNAs as well as features unique to Ascaris that illustrate significant flexibility in the use of small RNAs pathways, some of which are likely an adaptation to Ascaris' life cycle and parasitism. The transcriptome assembly has been submitted to NCBI Transcriptome Shotgun Assembly Sequence Database(http://www.ncbi.nlm.nih.gov/genbank/TSA.html) under accession numbers JI163767–JI182837 and JI210738–JI257410.",2011-06-17 +24466070,Tomato genomic resources database: an integrated repository of useful tomato genomic information for basic and applied research.,"Tomato Genomic Resources Database (TGRD) allows interactive browsing of tomato genes, micro RNAs, simple sequence repeats (SSRs), important quantitative trait loci and Tomato-EXPEN 2000 genetic map altogether or separately along twelve chromosomes of tomato in a single window. The database is created using sequence of the cultivar Heinz 1706. High quality single nucleotide polymorphic (SNP) sites between the genes of Heinz 1706 and the wild tomato S. pimpinellifolium LA1589 are also included. Genes are classified into different families. 5'-upstream sequences (5'-US) of all the genes and their tissue-specific expression profiles are provided. Sequences of the microRNA loci and their putative target genes are catalogued. Genes and 5'-US show presence of SSRs and SNPs. SSRs located in the genomic, genic and 5'-US can be analysed separately for the presence of any particular motif. Primer sequences for all the SSRs and flanking sequences for all the genic SNPs have been provided. TGRD is a user-friendly web-accessible relational database and uses CMAP viewer for graphical scanning of all the features. Integration and graphical presentation of important genomic information will facilitate better and easier use of tomato genome. TGRD can be accessed as an open source repository at http://59.163.192.91/tomato2/.",2014-01-21 +24860165,Motif enrichment tool.,"The Motif Enrichment Tool (MET) provides an online interface that enables users to find major transcriptional regulators of their gene sets of interest. MET searches the appropriate regulatory region around each gene and identifies which transcription factor DNA-binding specificities (motifs) are statistically overrepresented. Motif enrichment analysis is currently available for many metazoan species including human, mouse, fruit fly, planaria and flowering plants. MET also leverages high-throughput experimental data such as ChIP-seq and DNase-seq from ENCODE and ModENCODE to identify the regulatory targets of a transcription factor with greater precision. The results from MET are produced in real time and are linked to a genome browser for easy follow-up analysis. Use of the web tool is free and open to all, and there is no login requirement. ADDRESS: http://veda.cs.uiuc.edu/MET/.",2014-05-23 +25757249,Operon prediction by Markov clustering.,"The prediction of operons is a critical step for the reconstruction of biochemical and regulatory networks at the whole genome level. In this paper, a novel operon prediction model is proposed based on Markov Clustering (MCL). The model employs a graph-clustering method by MCL for prediction and does not need a classifier. In the cross-species validation, the accuracies of E. coli K12, Bacillus subtilis and P. furiosus are 92.1, 86.9 and 87.3%, respectively. Experimental results show that the proposed method has a powerful capability of operon prediction. The compiled program and test data sets are publicly available at http://ccst.jlu.edu.cn/JCSB/OPMC/.",2014-01-01 +24443924,"Association of XPD Lys751Gln polymorphism with head and neck cancer susceptibility: evidence from 11,443 subjects.","

Background

Whether the single nucleotide polymorphism (SNP) Lys751Gln of xeroderma pigmentosum group D(XPD) gene increases susceptibility to head and neck cancer (HNC) is controversial and undetermined. Therefore, we conducted this meta-analysis to systematically assess the possible association between them.

Methods

The OVID, Medline, Embase, Pubmed, Web of Science databases were searched to identify the eligible studies. The odds ratio (OR) with 95% confidence interval (95% CI) were used to assess the strength of association.

Results

A total of 11,443 subjects from eighteen studies were subjected to meta-analysis. Overall, XPD Lys751Gln polymorphism had no association with increased HNC risk under all five genetic models (P > 0.05). In the subgroup analysis by ethnicity and source of controls, still no significant association was found under five genetic models (P > 0.05). In the subgroup analysis by cancer type, XPD Lys751Gln polymorphism had statistically significant association with elevated laryngeal cancer (LC) and nasopharyngeal cancer (NPC) risk under heterozygous comparison and dominant model (P<0.05) and borderline significantly increased risk was found under allele contrast for LC and NPC. Carriers of Lys allele and Lys/Lys genotype may be associated with elevated LC and NPC risk.

Conclusions

There is overall lack of association between XPD Lys751Gln polymorphism and HNC risk under all five genetic models and still no significant association was found in the subgroup analysis by ethnicity and source of controls. However, XPD Lys751Gln polymorphism was significantly associated with susceptibility to LC and NPC and the Lys allele and Lys/Lys genotype of XPD Lys751Gln polymorphism may be a risk factor for LC and NPC. However, relatively modest sample sizes were included in this meta-analysis and studies with large sample sizes and representative population are warranted to further clarify this finding.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/5628716106316015.",2014-01-20 +24958811,TRES predicts transcription control in embryonic stem cells.,"

Summary

Unraveling transcriptional circuits controlling embryonic stem cell maintenance and fate has great potential for improving our understanding of normal development as well as disease. To facilitate this, we have developed a novel web tool called 'TRES' that predicts the likely upstream regulators for a given gene list. This is achieved by integrating transcription factor (TF) binding events from 187 ChIP-sequencing and ChIP-on-chip datasets in murine and human embryonic stem (ES) cells with over 1000 mammalian TF sequence motifs. Using 114 TF perturbation gene sets, as well as 115 co-expression clusters in ES cells, we validate the utility of this approach.

Availability and implementation

TRES is freely available at http://www.tres.roslin.ed.ac.uk.

Contact

Anagha.Joshi@roslin.ed.ac.uk or bg200@cam.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-23 +23686934,Epilepsy and seizure ontology: towards an epilepsy informatics infrastructure for clinical research and patient care.,"

Objective

Epilepsy encompasses an extensive array of clinical and research subdomains, many of which emphasize multi-modal physiological measurements such as electroencephalography and neuroimaging. The integration of structured, unstructured, and signal data into a coherent structure for patient care as well as clinical research requires an effective informatics infrastructure that is underpinned by a formal domain ontology.

Methods

We have developed an epilepsy and seizure ontology (EpSO) using a four-dimensional epilepsy classification system that integrates the latest International League Against Epilepsy terminology recommendations and National Institute of Neurological Disorders and Stroke (NINDS) common data elements. It imports concepts from existing ontologies, including the Neural ElectroMagnetic Ontologies, and uses formal concept analysis to create a taxonomy of epilepsy syndromes based on their seizure semiology and anatomical location.

Results

EpSO is used in a suite of informatics tools for (a) patient data entry, (b) epilepsy focused clinical free text processing, and (c) patient cohort identification as part of the multi-center NINDS-funded study on sudden unexpected death in epilepsy. EpSO is available for download at http://prism.case.edu/prism/index.php/EpilepsyOntology.

Discussion

An epilepsy ontology consortium is being created for community-driven extension, review, and adoption of EpSO. We are in the process of submitting EpSO to the BioPortal repository.

Conclusions

EpSO plays a critical role in informatics tools for epilepsy patient care and multi-center clinical research.",2013-05-18 +24407311,A combinatorial perspective of the protein inference problem.,"In a shotgun proteomics experiment, proteins are the most biologically meaningful output. The success of proteomics studies depends on the ability to accurately and efficiently identify proteins. Many methods have been proposed to facilitate the identification of proteins from peptide identification results. However, the relationship between protein identification and peptide identification has not been thoroughly explained before. In this paper, we devote ourselves to a combinatorial perspective of the protein inference problem. We employ combinatorial mathematics to calculate the conditional protein probabilities (protein probability means the probability that a protein is correctly identified) under three assumptions, which lead to a lower bound, an upper bound, and an empirical estimation of protein probabilities, respectively. The combinatorial perspective enables us to obtain an analytical expression for protein inference. Our method achieves comparable results with ProteinProphet in a more efficient manner in experiments on two data sets of standard protein mixtures and two data sets of real samples. Based on our model, we study the impact of unique peptides and degenerate peptides (degenerate peptides are peptides shared by at least two proteins) on protein probabilities. Meanwhile, we also study the relationship between our model and ProteinProphet. We name our program ProteinInfer. Its Java source code, our supplementary document and experimental results are available at: >http://bioinformatics.ust.hk/proteininfer.",2013-11-01 +25289699,Indexes of large genome collections on a PC.,"The availability of thousands of individual genomes of one species should boost rapid progress in personalized medicine or understanding of the interaction between genotype and phenotype, to name a few applications. A key operation useful in such analyses is aligning sequencing reads against a collection of genomes, which is costly with the use of existing algorithms due to their large memory requirements. We present MuGI, Multiple Genome Index, which reports all occurrences of a given pattern, in exact and approximate matching model, against a collection of thousand(s) genomes. Its unique feature is the small index size, which is customisable. It fits in a standard computer with 16-32 GB, or even 8 GB, of RAM, for the 1000GP collection of 1092 diploid human genomes. The solution is also fast. For example, the exact matching queries (of average length 150 bp) are handled in average time of 39 µs and with up to 3 mismatches in 373 µs on the test PC with the index size of 13.4 GB. For a smaller index, occupying 7.4 GB in memory, the respective times grow to 76 µs and 917 µs. Software is available at http://sun.aei.polsl.pl/mugi under a free license. Data S1 is available at PLOS One online.",2014-10-07 +25294922,Log-odds sequence logos.,"

Motivation

DNA and protein patterns are usefully represented by sequence logos. However, the methods for logo generation in common use lack a proper statistical basis, and are non-optimal for recognizing functionally relevant alignment columns.

Results

We redefine the information at a logo position as a per-observation multiple alignment log-odds score. Such scores are positive or negative, depending on whether a column's observations are better explained as arising from relatedness or chance. Within this framework, we propose distinct normalized maximum likelihood and Bayesian measures of column information. We illustrate these measures on High Mobility Group B (HMGB) box proteins and a dataset of enzyme alignments. Particularly in the context of protein alignments, our measures improve the discrimination of biologically relevant positions.

Availability and implementation

Our new measures are implemented in an open-source Web-based logo generation program, which is available at http://www.ncbi.nlm.nih.gov/CBBresearch/Yu/logoddslogo/index.html. A stand-alone version of the program is also available from this site.

Contact

altschul@ncbi.nlm.nih.gov

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-06 +22789588,MyMiner: a web application for computer-assisted biocuration and text annotation.,"

Motivation

The exponential growth of scientific literature has resulted in a massive amount of unstructured natural language data that cannot be directly handled by means of bioinformatics tools. Such tools generally require structured data, often generated through a cumbersome process of manual literature curation. Herein, we present MyMiner, a free and user-friendly text annotation tool aimed to assist in carrying out the main biocuration tasks and to provide labelled data for the development of text mining systems. MyMiner allows easy classification and labelling of textual data according to user-specified classes as well as predefined biological entities. The usefulness and efficiency of this application have been tested for a range of real-life annotation scenarios of various research topics.

Availability

http://myminer.armi.monash.edu.au.",2012-07-12 +22639671,Functional Annotation of 2D Protein Maps: The GelMap Portal.,"In classical proteome analyses, final experimental data are (a) images of 2D protein separations obtained by gel electrophoresis and (b) corresponding lists of proteins which were identified by mass spectrometry (MS). For data annotation, software tools were developed which allow the linking of protein identity data directly to 2D gels (""clickable gels""). GelMap is a new online software tool to annotate 2D protein maps. It allows (i) functional annotation of all identified proteins according to biological categories defined by the user, e.g., subcellular localization, metabolic pathway, or assignment to a protein complex and (ii) annotation of several proteins per analyzed protein ""spot"" according to MS primary data. Options to differentially display proteins of functional categories offer new opportunities for data evaluation. For instance, if used for the annotation of 2D Blue native/SDS gels, GelMap allows the identification of protein complexes of low abundance. A web portal has been established for presentation and evaluation of protein identity data related to 2D gels and is freely accessible at http://www.gelmap.de/.",2012-05-14 +24438389,"antibacTR: dynamic antibacterial-drug-target ranking integrating comparative genomics, structural analysis and experimental annotation.","

Background

Development of novel antibacterial drugs is both an urgent healthcare necessity and a partially neglected field. The last decades have seen a substantial decrease in the discovery of novel antibiotics, which combined with the recent thrive of multi-drug-resistant pathogens have generated a scenario of general concern. The procedures involved in the discovery and development of novel antibiotics are economically challenging, time consuming and lack any warranty of success. Furthermore, the return-on-investment for an antibacterial drug is usually marginal when compared to other therapeutics, which in part explains the decrease of private investment.

Results

In this work we present antibacTR, a computational pipeline designed to aid researchers in the selection of potential drug targets, one of the initial steps in antibacterial-drug discovery. The approach was designed and implemented as part of two publicly funded initiatives aimed at discovering novel antibacterial targets, mechanisms and drugs for a priority list of Gram-negative pathogens: Acinetobacter baumannii, Escherichia coli, Helicobacter pylori, Pseudomonas aeruginosa and Stenotrophomonas maltophilia. However, at present this list has been extended to cover a total of 74 fully sequenced Gram-negative pathogens. antibacTR is based on sequence comparisons and queries to multiple databases (e.g. gene essentiality, virulence factors) to rank proteins according to their potential as antibacterial targets. The dynamic ranking of potential drug targets can easily be executed, customized and accessed by the user through a web interface which also integrates computational analyses performed in-house and visualizable on-site. These include three-dimensional modeling of protein structures and prediction of active sites among other functionally relevant ligand-binding sites.

Conclusions

Given its versatility and ease-of-use at integrating both experimental annotation and computational analyses, antibacTR may effectively assist microbiologists, medicinal-chemists and other researchers working in the field of antibacterial drug-discovery. The public web-interface for antibacTR is available at 'http://bioinf.uab.cat/antibactr'.",2014-01-17 +25416747,Multilevel regularized regression for simultaneous taxa selection and network construction with metagenomic count data.,"

Motivation

Identifying disease associated taxa and constructing networks for bacteria interactions are two important tasks usually studied separately. In reality, differentiation of disease associated taxa and correlation among taxa may affect each other. One genus can be differentiated because it is highly correlated with another highly differentiated one. In addition, network structures may vary under different clinical conditions. Permutation tests are commonly used to detect differences between networks in distinct phenotypes, and they are time-consuming.

Results

In this manuscript, we propose a multilevel regularized regression method to simultaneously identify taxa and construct networks. We also extend the framework to allow construction of a common network and differentiated network together. An efficient algorithm with dual formulation is developed to deal with the large-scale n ≪ m problem with a large number of taxa (m) and a small number of samples (n) efficiently. The proposed method is regularized with a general Lp (p ∈ [0, 2]) penalty and models the effects of taxa abundance differentiation and correlation jointly. We demonstrate that it can identify both true and biologically significant genera and network structures.

Availability and implementation

Software MLRR in MATLAB is available at http://biostatistics.csmc.edu/mlrr/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-20 +24453961,PredictSNP: robust and accurate consensus classifier for prediction of disease-related mutations.,"Single nucleotide variants represent a prevalent form of genetic variation. Mutations in the coding regions are frequently associated with the development of various genetic diseases. Computational tools for the prediction of the effects of mutations on protein function are very important for analysis of single nucleotide variants and their prioritization for experimental characterization. Many computational tools are already widely employed for this purpose. Unfortunately, their comparison and further improvement is hindered by large overlaps between the training datasets and benchmark datasets, which lead to biased and overly optimistic reported performances. In this study, we have constructed three independent datasets by removing all duplicities, inconsistencies and mutations previously used in the training of evaluated tools. The benchmark dataset containing over 43,000 mutations was employed for the unbiased evaluation of eight established prediction tools: MAPP, nsSNPAnalyzer, PANTHER, PhD-SNP, PolyPhen-1, PolyPhen-2, SIFT and SNAP. The six best performing tools were combined into a consensus classifier PredictSNP, resulting into significantly improved prediction performance, and at the same time returned results for all mutations, confirming that consensus prediction represents an accurate and robust alternative to the predictions delivered by individual tools. A user-friendly web interface enables easy access to all eight prediction tools, the consensus classifier PredictSNP and annotations from the Protein Mutant Database and the UniProt database. The web server and the datasets are freely available to the academic community at http://loschmidt.chemi.muni.cz/predictsnp.",2014-01-16 +24436305,Impact of human pathogenic micro-insertions and micro-deletions on post-transcriptional regulation.,"Small insertions/deletions (INDELs) of ≤21 bp comprise 18% of all recorded mutations causing human inherited disease and are evident in 24% of documented Mendelian diseases. INDELs affect gene function in multiple ways: for example, by introducing premature stop codons that either lead to the production of truncated proteins or affect transcriptional efficiency. However, the means by which they impact post-transcriptional regulation, including alternative splicing, have not been fully evaluated. In this study, we collate disease-causing INDELs from the Human Gene Mutation Database (HGMD) and neutral INDELs from the 1000 Genomes Project. The potential of these two types of INDELs to affect binding-site affinity of RNA-binding proteins (RBPs) was then evaluated. We identified several sequence features that can distinguish disease-causing INDELs from neutral INDELs. Moreover, we built a machine-learning predictor called PinPor (predicting pathogenic small insertions and deletions affecting post-transcriptional regulation, http://watson.compbio.iupui.edu/pinpor/) to ascertain which newly observed INDELs are likely to be pathogenic. Our results show that disease-causing INDELs are more likely to ablate RBP-binding sites and tend to affect more RBP-binding sites than neutral INDELs. Additionally, disease-causing INDELs give rise to greater deviations in binding affinity than neutral INDELs. We also demonstrated that disease-causing INDELs may be distinguished from neutral INDELs by several sequence features, such as their proximity to splice sites and their potential effects on RNA secondary structure. This predictor showed satisfactory performance in identifying numerous pathogenic INDELs, with a Matthews correlation coefficient (MCC) value of 0.51 and an accuracy of 0.75.",2014-01-16 +23677942,"PconsD: ultra rapid, accurate model quality assessment for protein structure prediction.","

Summary

Clustering methods are often needed for accurately assessing the quality of modeled protein structures. Recent blind evaluation of quality assessment methods in CASP10 showed that there is little difference between many different methods as far as ranking models and selecting best model are concerned. When comparing many models, the computational cost of the model comparison can become significant. Here, we present PconsD, a fast, stream-computing method for distance-driven model quality assessment that runs on consumer hardware. PconsD is at least one order of magnitude faster than other methods of comparable accuracy.

Availability

The source code for PconsD is freely available at http://d.pcons.net/. Supplementary benchmarking data are also available there.

Contact

arne@bioinfo.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-14 +23283715,Data resource profile: the World Health Organization Study on global AGEing and adult health (SAGE).,"Population ageing is rapidly becoming a global issue and will have a major impact on health policies and programmes. The World Health Organization's Study on global AGEing and adult health (SAGE) aims to address the gap in reliable data and scientific knowledge on ageing and health in low- and middle-income countries. SAGE is a longitudinal study with nationally representative samples of persons aged 50+ years in China, Ghana, India, Mexico, Russia and South Africa, with a smaller sample of adults aged 18-49 years in each country for comparisons. Instruments are compatible with other large high-income country longitudinal ageing studies. Wave 1 was conducted during 2007-2010 and included a total of 34 124 respondents aged 50+ and 8340 aged 18-49. In four countries, a subsample consisting of 8160 respondents participated in Wave 1 and the 2002/04 World Health Survey (referred to as SAGE Wave 0). Wave 2 data collection will start in 2012/13, following up all Wave 1 respondents. Wave 3 is planned for 2014/15. SAGE is committed to the public release of study instruments, protocols and meta- and micro-data: access is provided upon completion of a Users Agreement available through WHO's SAGE website (www.who.int/healthinfo/systems/sage) and WHO's archive using the National Data Archive application (http://apps.who.int/healthinfo/systems/surveydata).",2012-12-01 +24044702,Expanding the use of patient reports about patient-centered care.,"In an informative article on the assessment of patient care experiences, Zimlichman, Rozenblum, and Millenson describe the evolving use of surveys that elicit patient reports about medical care experiences in Israel, a trend that parallels developments in the U.S. This commentary summarizes some of experiences in the U.S. that might inform the development of more consistent and extensive strategies for assessing and promoting patient-centered care in Israel.More comprehensive patient experience surveys, the results of which would be publicly available, as Zimlichman and colleagues advocate, would facilitate quality improvements, especially if users are provided with support for the use and interpretation of the data. Developing more efficient survey methods will facilitate the broader use of such surveys, although it is important to use methods that yield results that are as representative of the target population as possible and to account for survey mode effects when data are reported. Although the surveys need to be appropriate for the Israeli context, the use of standard questions used in other countries would facilitate comparisons that could help to identify best practices that can be adopted in different settings. Those who work on assessing patient-centered care in the U.S. look forward to learning from the work of their Israeli colleagues.This is a commentary on http://www.ijhpr.org/content/2/1/35/.",2013-09-17 +21531804,Inference of the transcriptional regulatory network in Staphylococcus aureus by integration of experimental and genomics-based evidence.,"Transcriptional regulatory networks are fine-tuned systems that help microorganisms respond to changes in the environment and cell physiological state. We applied the comparative genomics approach implemented in the RegPredict Web server combined with SEED subsystem analysis and available information on known regulatory interactions for regulatory network reconstruction for the human pathogen Staphylococcus aureus and six related species from the family Staphylococcaceae. The resulting reference set of 46 transcription factor regulons contains more than 1,900 binding sites and 2,800 target genes involved in the central metabolism of carbohydrates, amino acids, and fatty acids; respiration; the stress response; metal homeostasis; drug and metal resistance; and virulence. The inferred regulatory network in S. aureus includes ∼320 regulatory interactions between 46 transcription factors and ∼550 candidate target genes comprising 20% of its genome. We predicted ∼170 novel interactions and 24 novel regulons for the control of the central metabolic pathways in S. aureus. The reconstructed regulons are largely variable in the Staphylococcaceae: only 20% of S. aureus regulatory interactions are conserved across all studied genomes. We used a large-scale gene expression data set for S. aureus to assess relationships between the inferred regulons and gene expression patterns. The predicted reference set of regulons is captured within the Staphylococcus collection in the RegPrecise database (http://regprecise.lbl.gov).",2011-04-29 +22638579,SteinerNet: a web server for integrating 'omic' data to discover hidden components of response pathways.,"High-throughput technologies including transcriptional profiling, proteomics and reverse genetics screens provide detailed molecular descriptions of cellular responses to perturbations. However, it is difficult to integrate these diverse data to reconstruct biologically meaningful signaling networks. Previously, we have established a framework for integrating transcriptional, proteomic and interactome data by searching for the solution to the prize-collecting Steiner tree problem. Here, we present a web server, SteinerNet, to make this method available in a user-friendly format for a broad range of users with data from any species. At a minimum, a user only needs to provide a set of experimentally detected proteins and/or genes and the server will search for connections among these data from the provided interactomes for yeast, human, mouse, Drosophila melanogaster and Caenorhabditis elegans. More advanced users can upload their own interactome data as well. The server provides interactive visualization of the resulting optimal network and downloadable files detailing the analysis and results. We believe that SteinerNet will be useful for researchers who would like to integrate their high-throughput data for a specific condition or cellular response and to find biologically meaningful pathways. SteinerNet is accessible at http://fraenkel.mit.edu/steinernet.",2012-05-25 +25747436,Using Online Tool (iPrior) for Modeling ToxCast™ Assays Towards Prioritization of Animal Toxicity Testing.,"The use of long-term animal studies for human and environmental toxicity estimation is more discouraged than ever before. Alternative models for toxicity prediction, including QSAR studies, are gaining more ground. A recent approach is to combine in vitro chemical profiling and in silico chemical descriptors with the knowledge about toxicity pathways to derive a unique signature for toxicity endpoints. In this study we investigate the ToxCast™ Phase I data regarding their ability to predict long-term animal toxicity. We investigated thousands of models constructed in an effort to predict 61 toxicity endpoints using multiple descriptor packages and hundreds of in vitro assays. We investigated the use of in vitro assays and biochemical pathways on model performance. We identified 10 toxicity endpoints where biologically derived descriptors from in vitro assays or pathway perturbations improved the model prediction ability. In vivo toxicity endpoints proved generally challenging to model. Few models were possible to readily model with a balanced accuracy (BA) above 0.7. We also constructed in silico models to predict the outcome of 144 in vitro assays. This showed better statistical metrics with 79 out of 144 assays having median balanced accuracy above 0.7. This suggests that the in vitro datasets have a better modelability than in vivo animal toxicities for the given datasets. Moreover, we published an online platform (http://iprior.ochem.eu) that automates large-scale model building and analysis.",2015-01-01 +26184112,Relative Blood Volume Monitoring during Renal Replacement Therapy in Critically Ill Patients with Septic Shock: A Preliminary Report.,"

Background

Volume management during renal replacement therapy (RRT) in septic shock is always in the conflict between aggravating hypovolemia by undue ultrafiltration (UF) and insufficient reduction of fluid overload which is associated with adverse outcome. Relative blood volume (RBV) monitoring could be helpful for timely transition from fluid resuscitation to fluid removal.

Methods

Data of RBV were continuously monitored and used for guidance of UF and fluid resuscitation in 21 consecutive patients with severe septic multiple organ failure. RRT was applied with extended daily hemodiafiltration for median 11 h (range 6-23). Changes in RBV were analyzed during the first 4 treatment sessions.

Results

During 26 treatments, RBV monitoring revealed an internal volume loss substituted by a median infusion volume of 2.38 l (maximum 8.07 l) per treatment to keep the RBV constant. In the remaining 40 sessions, a median net-UF of 1.00 l (range 0.40-4.40) was achieved. In the first 2 days predominantly substitution was necessary whereas from the third day UF became increasingly possible. The 28-day survival rate was 81%.

Conclusion

Blood volume monitoring proved to be an easy and feasible tool for safe guidance of fluid management maintaining the balance between UF and vascular refilling. Video Journal Club ‘Cappuccino with Claudio Ronco' at http://www.karger.com/?doi=433415",2015-01-01 +25417090,Knowledge-Based Personal Health System to empower outpatients of diabetes mellitus by means of P4 Medicine.,"Diabetes Mellitus (DM) affects hundreds of millions of people worldwide and it imposes a large economic burden on healthcare systems. We present a web patient empowering system (PHSP4) that ensures continuous monitoring and assessment of the health state of patients with DM (type I and II). PHSP4 is a Knowledge-Based Personal Health System (PHS) which follows the trend of P4 Medicine (Personalized, Predictive, Preventive, and Participative). It provides messages to outpatients and clinicians about the achievement of objectives, follow-up, and treatments adjusted to the patient condition. Additionally, it calculates a four-component risk vector of the associated pathologies with DM: Nephropathy, Diabetic retinopathy, Diabetic foot, and Cardiovascular event. The core of the system is a Rule-Based System which Knowledge Base is composed by a set of rules implementing the recommendations of the American Diabetes Association (ADA) (American Diabetes Association: http://www.diabetes.org/ ) clinical guideline. The PHSP4 is designed to be standardized and to facilitate its interoperability by means of terminologies (SNOMED-CT [The International Health Terminology Standards Development Organization: http://www.ihtsdo.org/snomed-ct/ ] and UCUM [The Unified Code for Units of Measure: http://unitsofmeasure.org/ ]), standardized clinical documents (HL7 CDA R2 [Health Level Seven International: http://www.hl7.org/index.cfm ]) for managing Electronic Health Record (EHR). We have evaluated the functionality of the system and its users' acceptance of the system using simulated and real data, and a questionnaire based in the Technology Acceptance Model methodology (TAM). Finally results show the reliability of the system and the high acceptance of clinicians.",2015-01-01 +26091605,Decision-Oriented Health Technology Assessment: One Step Forward in Supporting the Decision-Making Process in Hospitals.,"

Objectives

This article outlines the Decision-Oriented Health Technology Assessment: a new implementation of the European network for Health Technology Assessment Core Model, integrating the multicriteria decision-making analysis by using the analytic hierarchy process to introduce a standardized methodological approach as a valued and shared tool to support health care decision making within a hospital.

Methods

Following the Core Model as guidance (European network for Health Technology Assessment. HTA core model for medical and surgical interventions. Available from: http://www.eunethta.eu/outputs/hta-core-model-medical-and-surgical-interventions-10r. [Accessed May 27, 2014]), it is possible to apply the analytic hierarchy process to break down a problem into its constituent parts and identify priorities (i.e., assigning a weight to each part) in a hierarchical structure. Thus, it quantitatively compares the importance of multiple criteria in assessing health technologies and how the alternative technologies perform in satisfying these criteria. The verbal ratings are translated into a quantitative form by using the Saaty scale (Saaty TL. Decision making with the analytic hierarchy process. Int J Serv Sci 2008;1:83-98). An eigenvectors analysis is used for deriving the weights' systems (i.e., local and global weights' system) that reflect the importance assigned to the criteria and the priorities related to the performance of the alternative technologies.

Results

Compared with the Core Model, this methodological approach supplies a more timely as well as contextualized evidence for a specific technology, making it possible to obtain data that are more relevant and easier to interpret, and therefore more useful for decision makers to make investment choices with greater awareness.

Conclusions

We reached the conclusion that although there may be scope for improvement, this implementation is a step forward toward the goal of building a ""solid bridge"" between the scientific evidence and the final decision maker's choice.",2015-03-16 +23463597,GEnomes Management Application (GEM.app): a new software tool for large-scale collaborative genome analysis.,"Novel genes are now identified at a rapid pace for many Mendelian disorders, and increasingly, for genetically complex phenotypes. However, new challenges have also become evident: (1) effectively managing larger exome and/or genome datasets, especially for smaller labs; (2) direct hands-on analysis and contextual interpretation of variant data in large genomic datasets; and (3) many small and medium-sized clinical and research-based investigative teams around the world are generating data that, if combined and shared, will significantly increase the opportunities for the entire community to identify new genes. To address these challenges, we have developed GEnomes Management Application (GEM.app), a software tool to annotate, manage, visualize, and analyze large genomic datasets (https://genomics.med.miami.edu/). GEM.app currently contains ∼1,600 whole exomes from 50 different phenotypes studied by 40 principal investigators from 15 different countries. The focus of GEM.app is on user-friendly analysis for nonbioinformaticians to make next-generation sequencing data directly accessible. Yet, GEM.app provides powerful and flexible filter options, including single family filtering, across family/phenotype queries, nested filtering, and evaluation of segregation in families. In addition, the system is fast, obtaining results within 4 sec across ∼1,200 exomes. We believe that this system will further enhance identification of genetic causes of human disease.",2013-04-03 +22748112,"ReadqPCR and NormqPCR: R packages for the reading, quality checking and normalisation of RT-qPCR quantification cycle (Cq) data.","

Background

Measuring gene transcription using real-time reverse transcription polymerase chain reaction (RT-qPCR) technology is a mainstay of molecular biology. Technologies now exist to measure the abundance of many transcripts in parallel. The selection of the optimal reference gene for the normalisation of this data is a recurring problem, and several algorithms have been developed in order to solve it. So far nothing in R exists to unite these methods, together with other functions to read in and normalise the data using the chosen reference gene(s).

Results

We have developed two R/Bioconductor packages, ReadqPCR and NormqPCR, intended for a user with some experience with high-throughput data analysis using R, who wishes to use R to analyse RT-qPCR data. We illustrate their potential use in a workflow analysing a generic RT-qPCR experiment, and apply this to a real dataset. Packages are available from http://www.bioconductor.org/packages/release/bioc/html/ReadqPCR.htmland http://www.bioconductor.org/packages/release/bioc/html/NormqPCR.html

Conclusions

These packages increase the repetoire of RT-qPCR analysis tools available to the R user and allow them to (amongst other things) read their data into R, hold it in an ExpressionSet compatible R object, choose appropriate reference genes, normalise the data and look for differential expression between samples.",2012-07-02 +21655956,Diversity of killer cell immunoglobulin-like receptor genes in Southern Turkey.,"Killer cell immunoglobulin-like receptors (KIRs) are a family of inhibitory and activating receptors expressed by natural killer (NK) cells and regulate NK cells' activity. KIR genes are highly polymorphic markers, characterized by a wide diversity, and can therefore be considered as good population genetic markers. The aim of this study was to determine KIR gene frequencies, ratios of haplotypes and genotypes in Southern Turkey and also to compare the data with other worldwide populations studied previously. The study group consisted of 200 non-related individuals from Southern Turkey. The percentage of each KIR gene in the population group was determined by direct counting. Differences between populations in the distribution of each KIR gene and genotype profile were estimated by two-tailed Fisher Exact test. The most frequent non-framework KIR genes detected in Southern Turkey population were: KIR 2DL1 (97%), KIR 3DL1 (91%), KIR 2DS4 (92%) and the pseudogene 2DP1 (96%). Fourty different genotypes were found in 200 subjects and AA1 genotype was the most frequent (27%). Among 40 different genotypes, ten of these were described for the first time in this study and were added to the database ( http://www.allelefrequencies.net ) numerized as genotype ID from 400 to 409. Gene frequencies and found genotypes demonstrated similarity of Southern Turkey's KIR repertoire with the KIR repertoires of Middle East and European population. High variability seen in KIR genome in this region is thought to be formed as a result of migration and settlement of different civilizations in this region and heterogenity formed in time.",2011-06-08 +24729426,Cohort profile update: The 1993 Pelotas (Brazil) birth cohort follow-up visits in adolescence.,"In this paper we update the profile of the 1993 Pelotas (Brazil) Birth Cohort Study, with emphasis on a shift of priority from maternal and child health research topics to four main categories of outcome variables, collected throughout adolescence: (i) mental health; (ii) body composition; (iii) risk factors for non-communicable diseases (NCDs); (iv) human capital. We were able to trace 81.3% (n = 4106) of the original cohort at 18 years of age. For the first time, the 18-years visit took place entirely on the university premises, in a clinic equipped with state-of-the-art equipment for the assessment of body composition. We welcome requests for data analyses from outside scientists. For more information, refer to our website (http://www.epidemio-ufpel.org.projetos_de_pesquisas/estudos/coorte_1993) or e-mail the corresponding author.",2014-04-11 +22937822,ggbio: an R package for extending the grammar of graphics for genomic data.,"We introduce ggbio, a new methodology to visualize and explore genomics annotations and high-throughput data. The plots provide detailed views of genomic regions, summary views of sequence alignments and splicing patterns, and genome-wide overviews with karyogram, circular and grand linear layouts. The methods leverage the statistical functionality available in R, the grammar of graphics and the data handling capabilities of the Bioconductor project. The plots are specified within a modular framework that enables users to construct plots in a systematic way, and are generated directly from Bioconductor data structures. The ggbio R package is available at http://www.bioconductor.org/packages/2.11/bioc/html/ggbio.html.",2012-08-31 +24096080,A wavelet-based method to exploit epigenomic language in the regulatory region.,"

Motivation

Epigenetic landscapes in the regulatory regions reflect binding condition of transcription factors and their co-factors. Identifying epigenetic condition and its variation is important in understanding condition-specific gene regulation. Computational approaches to explore complex multi-dimensional landscapes are needed.

Results

To study epigenomic condition for gene regulation, we developed a method, AWNFR, to classify epigenomic landscapes based on the detected epigenomic landscapes. Assuming mixture of Gaussians for a nucleosome, the proposed method captures the shape of histone modification and identifies potential regulatory regions in the wavelet domain. For accuracy estimation as well as enhanced computational speed, we developed a novel algorithm based on down-sampling operation and footprint in wavelet. We showed the algorithmic advantages of AWNFR using the simulated data. AWNFR identified regulatory regions more effectively and accurately than the previous approaches with the epigenome data in mouse embryonic stem cells and human lung fibroblast cells (IMR90). Based on the detected epigenomic landscapes, AWNFR classified epigenomic status and studied epigenomic codes. We studied co-occurring histone marks and showed that AWNFR captures the epigenomic variation across time.

Availability and implementation

The source code and supplemental document of AWNFR are available at http://wonk.med.upenn.edu/AWNFR.",2013-10-04 +24678936,Two cases of multiple ossifying fibromas in the jaws.,"

Background

The clinicopathologic characteristics of multiple ossifying fibroma (OF) are unclear due to the condition's rarity, making diagnosis challenging. Sporadic multiple OFs must be distinguished from hyperparathyroidism-jaw tumour syndrome (HPT-JT) related OF and other fibro-osseous lesions.

Methods

Multiple OF cases were identified from ossifying fibroma cases. Clinical data including age, sex, anatomic site, radiographic features, clinical impression, treatment and available follow-up data as well as serum calcium, phosphorus, and parathyroid hormone (PTH) were recorded. GNAS and HRPT2 genetic mutations were examined in the two present cases. Case reports of sporadic multiple ossifying fibroma and HPT-JT-related OF were also reviewed.

Results

The two present cases were confirmed as sporadic multiple OF, with no genetic GNAS and HRPT2 mutations found. The incidence of sporadic multiple ossifying fibroma was 2.0% (2/102). The total 18 sporadic multiform OF cases were characterized as followed: 13 (72.2%) female; 5 (27.8%) male; mean age 28.6 years; 2/16 (11.1%) cases only in the mandible; 4/18 (22.2%) cases only in the maxilla; and 12/18 (66.7%) cases in both the maxilla and mandible. Radiographically, the lesions were radiolucent in 5/18 (27.8%) cases and mixed density in 13/18 (72.2%) cases. Along with 24 cases of HPT-JT related OF were reviewed, sixteen (66.7%) patients were diagnosed with a single lesion, and 8 patients (33.3%) were diagnosed with multiple jaw lesions.

Conclusions

Sporadic multiple OFs are very rare, but must be distinguished from HPT-JT related OF. We strongly recommend that patients diagnosed with multiple ossifying fibromas receive serum PTH testing and mutation screening of HRPT2.

Virtual slides

http://www.diagnosticpathology.diagnomx.eu/vs/1194507146115753.",2014-03-28 +25189778,BigDataScript: a scripting language for data pipelines.,"

Motivation

The analysis of large biological datasets often requires complex processing pipelines that run for a long time on large computational infrastructures. We designed and implemented a simple script-like programming language with a clean and minimalist syntax to develop and manage pipeline execution and provide robustness to various types of software and hardware failures as well as portability.

Results

We introduce the BigDataScript (BDS) programming language for data processing pipelines, which improves abstraction from hardware resources and assists with robustness. Hardware abstraction allows BDS pipelines to run without modification on a wide range of computer architectures, from a small laptop to multi-core servers, server farms, clusters and clouds. BDS achieves robustness by incorporating the concepts of absolute serialization and lazy processing, thus allowing pipelines to recover from errors. By abstracting pipeline concepts at programming language level, BDS simplifies implementation, execution and management of complex bioinformatics pipelines, resulting in reduced development and debugging cycles as well as cleaner code.

Availability and implementation

BigDataScript is available under open-source license at http://pcingola.github.io/BigDataScript.",2014-09-03 +24141494,Metagenomic species profiling using universal phylogenetic marker genes.,"To quantify known and unknown microorganisms at species-level resolution using shotgun sequencing data, we developed a method that establishes metagenomic operational taxonomic units (mOTUs) based on single-copy phylogenetic marker genes. Applied to 252 human fecal samples, the method revealed that on average 43% of the species abundance and 58% of the richness cannot be captured by current reference genome-based methods. An implementation of the method is available at http://www.bork.embl.de/software/mOTU/.",2013-10-20 +22982574,Efficient methods for identifying mutated driver pathways in cancer.,"

Motivation

The first step for clinical diagnostics, prognostics and targeted therapeutics of cancer is to comprehensively understand its molecular mechanisms. Large-scale cancer genomics projects are providing a large volume of data about genomic, epigenomic and gene expression aberrations in multiple cancer types. One of the remaining challenges is to identify driver mutations, driver genes and driver pathways promoting cancer proliferation and filter out the unfunctional and passenger ones.

Results

In this study, we propose two methods to solve the so-called maximum weight submatrix problem, which is designed to de novo identify mutated driver pathways from mutation data in cancer. The first one is an exact method that can be helpful for assessing other approximate or/and heuristic algorithms. The second one is a stochastic and flexible method that can be employed to incorporate other types of information to improve the first method. Particularly, we propose an integrative model to combine mutation and expression data. We first apply our methods onto simulated data to show their efficiency. We further apply the proposed methods onto several real biological datasets, such as the mutation profiles of 74 head and neck squamous cell carcinomas samples, 90 glioblastoma tumor samples and 313 ovarian carcinoma samples. The gene expression profiles were also considered for the later two data. The results show that our integrative model can identify more biologically relevant gene sets. We have implemented all these methods and made a package called mutated driver pathway finder, which can be easily used for other researchers.

Availability

A MATLAB package of MDPFinder is available at http://zhangroup.aporc.org/ShiHuaZhang.

Contact

zsh@amss.ac.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-14 +24586132,GAGA: a new algorithm for genomic inference of geographic ancestry reveals fine level population substructure in Europeans.,"Attempts to detect genetic population substructure in humans are troubled by the fact that the vast majority of the total amount of observed genetic variation is present within populations rather than between populations. Here we introduce a new algorithm for transforming a genetic distance matrix that reduces the within-population variation considerably. Extensive computer simulations revealed that the transformed matrix captured the genetic population differentiation better than the original one which was based on the T1 statistic. In an empirical genomic data set comprising 2,457 individuals from 23 different European subpopulations, the proportion of individuals that were determined as a genetic neighbour to another individual from the same sampling location increased from 25% with the original matrix to 52% with the transformed matrix. Similarly, the percentage of genetic variation explained between populations by means of Analysis of Molecular Variance (AMOVA) increased from 1.62% to 7.98%. Furthermore, the first two dimensions of a classical multidimensional scaling (MDS) using the transformed matrix explained 15% of the variance, compared to 0.7% obtained with the original matrix. Application of MDS with Mclust, SPA with Mclust, and GemTools algorithms to the same dataset also showed that the transformed matrix gave a better association of the genetic clusters with the sampling locations, and particularly so when it was used in the AMOVA framework with a genetic algorithm. Overall, the new matrix transformation introduced here substantially reduces the within population genetic differentiation, and can be broadly applied to methods such as AMOVA to enhance their sensitivity to reveal population substructure. We herewith provide a publically available (http://www.erasmusmc.nl/fmb/resources/GAGA) model-free method for improved genetic population substructure detection that can be applied to human as well as any other species data in future studies relevant to evolutionary biology, behavioural ecology, medicine, and forensics.",2014-02-20 +23574738,DP2: Distributed 3D image segmentation using micro-labor workforce.,"

Summary

This application note describes a new scalable semi-automatic approach, the Dual Point Decision Process, for segmentation of 3D structures contained in 3D microscopy. The segmentation problem is distributed to many individual workers such that each receives only simple questions regarding whether two points in an image are placed on the same object. A large pool of micro-labor workers available through Amazon's Mechanical Turk system provides the labor in a scalable manner.

Availability and implementation

Python-based code for non-commercial use and test data are available in the source archive at https://sites.google.com/site/imagecrowdseg/.

Contact

rgiuly@ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-10 +22800377,Slim-filter: an interactive Windows-based application for illumina genome analyzer data assessment and manipulation.,"

Background

The emergence of Next Generation Sequencing technologies has made it possible for individual investigators to generate gigabases of sequencing data per week. Effective analysis and manipulation of these data is limited due to large file sizes, so even simple tasks such as data filtration and quality assessment have to be performed in several steps. This requires (potentially problematic) interaction between the investigator and a bioinformatics/computational service provider. Furthermore, such services are often performed using specialized computational facilities.

Results

We present a Windows-based application, Slim-Filter designed to interactively examine the statistical properties of sequencing reads produced by Illumina Genome Analyzer and to perform a broad spectrum of data manipulation tasks including: filtration of low quality and low complexity reads; filtration of reads containing undesired subsequences (such as parts of adapters and PCR primers used during the sample and sequencing libraries preparation steps); excluding duplicated reads (while keeping each read's copy number information in a specialized data format); and sorting reads by copy numbers allowing for easy access and manual editing of the resulting files. Slim-Filter is organized as a sequence of windows summarizing the statistical properties of the reads. Each data manipulation step has roll-back abilities, allowing for return to previous steps of the data analysis process. Slim-Filter is written in C++ and is compatible with fasta, fastq, and specialized AS file formats presented in this manuscript. Setup files and a user's manual are available for download at the supplementary web site ( https://www.bioinfo.uh.edu/Slim_Filter/).

Conclusion

The presented Windows-based application has been developed with the goal of providing individual investigators with integrated sequencing reads analysis, curation, and manipulation capabilities.",2012-07-16 +23816409,BIMLR: a method for constructing rooted phylogenetic networks from rooted phylogenetic trees.,"Rooted phylogenetic trees constructed from different datasets (e.g. from different genes) are often conflicting with one another, i.e. they cannot be integrated into a single phylogenetic tree. Phylogenetic networks have become an important tool in molecular evolution, and rooted phylogenetic networks are able to represent conflicting rooted phylogenetic trees. Hence, the development of appropriate methods to compute rooted phylogenetic networks from rooted phylogenetic trees has attracted considerable research interest of late. The CASS algorithm proposed by van Iersel et al. is able to construct much simpler networks than other available methods, but it is extremely slow, and the networks it constructs are dependent on the order of the input data. Here, we introduce an improved CASS algorithm, BIMLR. We show that BIMLR is faster than CASS and less dependent on the input data order. Moreover, BIMLR is able to construct much simpler networks than almost all other methods. BIMLR is available at http://nclab.hit.edu.cn/wangjuan/BIMLR/.",2013-06-28 +24421695,Canadian Pediatric Endocrine Group extension to WHO growth charts: Why bother?,"The Canadian Pediatric Endocrinology Group (CPEG) has produced complementary growth curves based on the 2010 'WHO Growth Charts for Canada'. In response to concerns from CPEG members and the general paediatric community regarding the presentation of the WHO data, complementary curves were generated, which the authors believe will enhance clarity, reduce potential errors in classification and enable users to better track short-term changes, particularly for weight in older children. Specifically, these curves extend weight-for-age beyond 10 years of age, restore additional percentiles within the normal range, remove extreme percentiles and harmonize the choice of body mass index percentiles with adult definitions of overweight and obesity. All modifications followed strict WHO methodology and used core data from the United States National Center for Health Statistics. The curves retain the clean appearance of the 2010 Canadian curves and are available from the CPEG website (http://cpeg-gcep.net).",2013-06-01 +21643562,Toxicity tests aiming to protect Brazilian aquatic systems: current status and implications for management.,"The current status of toxicological tests performed with Brazilian native species was evaluated through a survey of the scientific data available in the literature. The information gathered was processed and an electronic toxicology database (http://www.inct-ta.furg.br/bd_toxicologico.php) was generated. This database provides valuable information for researchers to select sensitive and tolerant aquatic species to a large variety of aquatic pollutants. Furthermore, the toxicology database allows researchers to select species representative of an ecosystem of interest. Analysis of the toxicology database showed that ecotoxicological assays have significantly improved in Brazil over the last decade, in spite of the still relatively low number of tests performed and the restricted number of native species tested. This is because most of the research is developed in a few laboratories concentrated in certain regions of Brazil, especially in Southern and Southeast regions. Considering the extremely rich biodiversity and the large variety of aquatic ecosystems in Brazil, this finding points to the urgent need for the development of ecotoxicological studies with other groups of aquatic animals, such as insects, foraminifera, cnidarians, worms, amphibians, among others. This would help to derive more realistic water quality criteria (WQC) values, which would better protect the different aquatic ecosystems in Brazil. Finally, the toxicology database generated presents solid and science based information, which can encourage and drive the Environmental Regulatory Agencies in Brazil to derive WQC based on native species. In this context, the present paper discusses the historical evolution of ecotoxicological studies in Brazil, and how they have contributed to the improvement of the Brazilian Federal and Regional regulations for environment.",2011-06-03 +24010822,L_RNA_scaffolder: scaffolding genomes with transcripts.,"

Background

Generation of large mate-pair libraries is necessary for de novo genome assembly but the procedure is complex and time-consuming. Furthermore, in some complex genomes, it is hard to increase the N50 length even with large mate-pair libraries, which leads to low transcript coverage. Thus, it is necessary to develop other simple scaffolding approaches, to at least solve the elongation of transcribed fragments.

Results

We describe L_RNA_scaffolder, a novel genome scaffolding method that uses long transcriptome reads to order, orient and combine genomic fragments into larger sequences. To demonstrate the accuracy of the method, the zebrafish genome was scaffolded. With expanded human transcriptome data, the N50 of human genome was doubled and L_RNA_scaffolder out-performed most scaffolding results by existing scaffolders which employ mate-pair libraries. In these two examples, the transcript coverage was almost complete, especially for long transcripts. We applied L_RNA_scaffolder to the highly polymorphic pearl oyster draft genome and the gene model length significantly increased.

Conclusions

The simplicity and high-throughput of RNA-seq data makes this approach suitable for genome scaffolding. L_RNA_scaffolder is available at http://www.fishbrowser.org/software/L_RNA_scaffolder.",2013-09-08 +24352068,"Update: influenza activity - United States, September 29-December 7, 2013.","CDC collects, compiles, and analyzes data on influenza activity year-round in the United States (http://www.cdc.gov/flu/weekly/fluactivitysurv.htm). The influenza season generally begins in the fall and continues through the winter and spring months; however, the timing and severity of circulating influenza viruses can vary by geographic location and season. Influenza activity in the United States continued to increase from mid-November through the beginning of December. This report summarizes U.S. influenza activity* during September 29-December 7, 2013.",2013-12-01 +25872223,"Ambient Coarse Particulate Matter and Hospital Admissions in the Medicare Cohort Air Pollution Study, 1999-2010.","

Background

In recent years a number of studies have examined the short-term association between coarse particulate matter (PM(10-2.5)) and mortality and morbidity outcomes. These studies, however, have produced inconsistent conclusions.

Objectives

We estimated both the national- and regional-level associations between PM(10-2.5) and emergency hospitalizations for both cardiovascular and respiratory disease among Medicare enrollees ≥ 65 years of age during the 12-year period 1999 through 2010.

Methods

Using air pollution data obtained from the U.S. Environmental Protection Agency air quality monitoring network and daily emergency hospitalizations for 110 large urban U.S. counties assembled from the Medicare Cohort Air Pollution Study (MCAPS), we estimated the association between short-term exposure to PM(10-2.5) and hospitalizations using a two-stage Bayesian hierarchical model and Poisson log-linear regression models.

Results

A 10-μg/m3 increase in PM(10-2.5) was associated with a significant increase in same-day cardiovascular hospitalizations [0.69%; 95% posterior interval (PI): 0.45, 0.92]. After adjusting for PM2.5, this association remained significant (0.63%; 95% PI: 0.38, 0.88). A 10-μg/m3 increase in PM(10-2.5) was not associated with a significant increase in respiratory-related hospitalizations.

Conclusions

We found statistically significant evidence that daily variation in PM(10-2.5) is associated with emergency hospitalizations for cardiovascular diseases among Medicare enrollees ≥ 65 years of age. This association was robust to adjustment for concentrations of PM2.5.

Citation

Powell H, Krall JR, Wang Y, Bell ML, Peng RD. 2015. Ambient coarse particulate matter and hospital admissions in the Medicare Cohort Air Pollution Study, 1999-2010. Environ Health Perspect 123:1152-1158; http://dx.doi.org/10.1289/ehp.1408720.",2015-04-14 +24295440,DeNovoGUI: an open source graphical user interface for de novo sequencing of tandem mass spectra.,"De novo sequencing is a popular technique in proteomics for identifying peptides from tandem mass spectra without having to rely on a protein sequence database. Despite the strong potential of de novo sequencing algorithms, their adoption threshold remains quite high. We here present a user-friendly and lightweight graphical user interface called DeNovoGUI for running parallelized versions of the freely available de novo sequencing software PepNovo+, greatly simplifying the use of de novo sequencing in proteomics. Our platform-independent software is freely available under the permissible Apache2 open source license. Source code, binaries, and additional documentation are available at http://denovogui.googlecode.com .",2014-01-07 +25147449,"An interactive multi-entry key to the species of Megalostomis Chevrolat, with description of a new species from Paraguay (Chrysomelidae, Cryptocephalinae).","The main goal of this contribution is to release an interactive multi-entry key to all known species of the genus Megalostomis Chevrolat. This key constitutes a new tool created to aid the identification of the species of this diverse genus, which occasionally may be difficult to identify to the species-level, due to the lack of reference collections for most countries within its distribution range, and to the presence of intra-specific variation and secondary sexual characters. It is expected that this on-line key will facilitate future periodic updates, and will benefit all those persons interested in identifying these taxa. The present paper also includes the description of Megalostomis juanenrique sp. n., a new species from Paraguay. In addition, Megalostomis gigas Lacordaire, and Megalostomis robustipes Monrós are newly cited for the fauna of Paraguay. The online interactive Lucid key is available at http://keys.lucidcentral.org/keys/v3/megalostomis. Offline Lucid data files in LIF and SDD formats are also available at doi: 10.3897/zookeys.425.7631.app1 and doi: 10.3897/zookeys.425.7631.app2.",2014-07-10 +30727418,First Report of Downy Mildew of Spider Flower Caused by a Hyaloperonospora sp. in Korea.,"Spider flower, Tarenaya hassleriana (Chodat) H. H. Iltis (synonym Cleome hassleriana; C. spinosa), which is native to South America, is now cultivated as an ornamental plant worldwide. In Korea, this plant has recently become popular in gardens and parks because of its beautiful flowers. During July 2010, plants showing typical symptoms of downy mildew were observed in public gardens along the lakeside in Ganghwa, South Korea. Infection resulted in chlorotic areas on the leaves with a white mildew developing on the abaxial surface and finally leading to necrosis of the lesions. Representative samples of infected leaves were deposited at the herbarium of Korea University, Seoul, Korea (KUS-F25091 and F25462). Microscopic examination of fresh material was performed under a light microscope. Conidiophores emerging from stomata were hyaline, 250 to 650 × 10 to 15 μm, straight, and monopodially branched in five to eight orders. Ultimate branchlets were mostly in pairs, flexuous to sigmoid, 15 to 30 μm long, and had obtuse or subtruncate tips. Conidia were hyaline, subglobose, and measured 23 to 26.5 × 21 to 24 μm with a length/width ratio (L/W) of 1.05 to 1.15. Up to now, the downy mildew pathogen of the spider flower has been considered to be Hyaloperonospora parasitica, Peronospora capparidis or P. cleomes, but the latter two names were considered as synonyms of the former (1). In the current study, the spider flower pathogen was morphologically distinct from H. parasitica; in the Korean specimen, conidia were subglobose with a low L/W value, while in H. parasitica sensu stricto, originated from Capsella bursa-pastoris, conidia were broadly ellipsoidal and measured 22.5 to 26.5 × 18 to 21.5 μm with a L/W ratio of 1.17 to 1.31 (1). To confirm this morphological difference, the amplification and sequencing of the internal transcribed spacer (ITS) region of rDNA of the Korean specimen were performed using procedures outlined by Göker et al. (3). The resulting 874-bp sequence of the region was deposited in GenBank (Accession No. JQ301468). A comparison with the ITS sequences available in the GenBank database revealed that the Korean accession exhibits a high dissimilarity of approximately 11% (99 of 882 characters are different) from that of H. parasitica from C. bursa-pastoris (AY210987). On the basis of morphological and molecular data, the spider flower pathogen found in Korea was clearly distinct from H. parasitica. Therefore, we provisionally indicate this pathogen as a Hyaloperonospora sp. To our knowledge, there is no previous record of a downy mildew on spider flower in Asia, although this disease has been previously recorded in Malawi, South Africa, Uganda, New Zealand, Poland, Romania, the United States, and Venezuela (2). The presence of a downy mildew on spider flower in Asia can be considered a potentially new and serious threat to this ornamental plant. References: (1) O. Constantinescu and J. Fatehi. Nova Hedwigia 74:291, 2002. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , December 13, 2011. (3) M. Göker et al. Mycol. Res. 113:308, 2009.",2012-04-01 +25005749,MCMC_CLIB-an advanced MCMC sampling package for ODE models.,"

Summary

We present a new C implementation of an advanced Markov chain Monte Carlo (MCMC) method for the sampling of ordinary differential equation (ode) model parameters. The software mcmc_clib uses the simplified manifold Metropolis-adjusted Langevin algorithm (SMMALA), which is locally adaptive; it uses the parameter manifold's geometry (the Fisher information) to make efficient moves. This adaptation does not diminish with MC length, which is highly advantageous compared with adaptive Metropolis techniques when the parameters have large correlations and/or posteriors substantially differ from multivariate Gaussians. The software is standalone (not a toolbox), though dependencies include the GNU scientific library and sundials libraries for ode integration and sensitivity analysis.

Availability and implementation

The source code and binary files are freely available for download at http://a-kramer.github.io/mcmc_clib/. This also includes example files and data. A detailed documentation, an example model and user manual are provided with the software.

Contact

andrei.kramer@ist.uni-stuttgart.de.",2014-07-07 +26069051,Suicide and Ambient Temperature in East Asian Countries: A Time-Stratified Case-Crossover Analysis.,"

Background

A limited number of studies suggest that ambient temperature contributes to suicide; these studies typically focus on a single nation and use temporally and spatially aggregated data.

Objective

We evaluated the association between ambient temperature and suicide in multiple cities in three East Asian countries.

Methods

A time-stratified case-crossover method was used to explore the relationship between temperature and suicide, adjusting for potential time-varying confounders and time-invariant individual characteristics. Sex- and age-specific associations of temperature with suicide were estimated, as were interactions between temperature and these variables. A random-effects meta-analysis was used to estimate country-specific pooled associations of temperature with suicide.

Results

An increase in temperature corresponding to half of the city-specific standard deviation was positively associated with suicide in most cities, although average suicide rates varied substantially. Pooled country-level effect estimates were 7.8% (95% CI: 5.0, 10.8%) for a 2.3°C increase in ambient temperature in Taiwan, 6.8% (95% CI: 5.4, 8.2%) for a 4.7°C increase in Korea, and 4.5% (95% CI: 3.3, 5.7%) for a 4.2°C increase in Japan. The association between temperature and suicide was significant even after adjusting for sunshine duration; the association between sunshine and suicide was not significant. The associations were greater among men than women in 12 of the 15 cities although not significantly so. There was little evidence of a consistent pattern of associations with age. In general, associations were strongest with temperature on the same day or the previous day, with little evidence of associations with temperature over longer lags (up to 5 days).

Conclusions

We estimated consistent positive associations between suicide and elevated ambient temperature in three East Asian countries, regardless of country, sex, and age.

Citation

Kim Y, Kim H, Honda Y, Guo YL, Chen BY, Woo JM, Ebi KL. 2016. Suicide and ambient temperature in East Asian countries: a time-stratified case-crossover analysis. Environ Health Perspect 124:75-80; http://dx.doi.org/10.1289/ehp.1409392.",2015-06-12 +21464845,An ANN model for the identification of deleterious nsSNPs in tumor suppressor genes.,"

Unlabelled

Human genetic variations primarily result from single nucleotide polymorphisms (SNPs) that occurs approximately every 1000 bases in the overall human population. The non-synonymous SNPs (nsSNPs), lead to amino acid changes in the protein product may account for nearly half of the known genetic variations linked to inherited human diseases and cancer. One of the main problems of medical genetics today is to identify nsSNPs that underlie disease-related phenotypes in humans. An attempt was made to develop a new approach to predict such nsSNPs. This would enhance our understanding of genetic diseases and helps to predict the disease. We detect nsSNPs and all possible and reliable alleles by ANN, a soft computing model using potential SNP information. Reliable nsSNPs are identified, based on the reconstructed alleles and on sequence redundancy. The model gives good results with mean specificity (95.85&), sensitivity (97.40&) and accuracy (96.25&). Our results indicate that ANNs can serve as a useful method to analyze quantitative effect of nsSNPs on protein function and would be useful for large-scale analysis of genomic nsSNP data.

Availability

The database is available for free at http://www.snp.mirworks.in.",2011-03-02 +23178636,Efficient statistical significance approximation for local similarity analysis of high-throughput time series data.,"

Motivation

Local similarity analysis of biological time series data helps elucidate the varying dynamics of biological systems. However, its applications to large scale high-throughput data are limited by slow permutation procedures for statistical significance evaluation.

Results

We developed a theoretical approach to approximate the statistical significance of local similarity analysis based on the approximate tail distribution of the maximum partial sum of independent identically distributed (i.i.d.) random variables. Simulations show that the derived formula approximates the tail distribution reasonably well (starting at time points > 10 with no delay and > 20 with delay) and provides P-values comparable with those from permutations. The new approach enables efficient calculation of statistical significance for pairwise local similarity analysis, making possible all-to-all local association studies otherwise prohibitive. As a demonstration, local similarity analysis of human microbiome time series shows that core operational taxonomic units (OTUs) are highly synergetic and some of the associations are body-site specific across samples.

Availability

The new approach is implemented in our eLSA package, which now provides pipelines for faster local similarity analysis of time series data. The tool is freely available from eLSA's website: http://meta.usc.edu/softs/lsa.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

fsun@usc.edu.",2012-11-23 +25966491,Social Science Collaboration with Environmental Health.,"

Background

Social science research has been central in documenting and analyzing community discovery of environmental exposure and consequential processes. Collaboration with environmental health science through team projects has advanced and improved our understanding of environmental health and justice.

Objective

We sought to identify diverse methods and topics in which social scientists have expanded environmental health understandings at multiple levels, to examine how transdisciplinary environmental health research fosters better science, and to learn how these partnerships have been able to flourish because of the support from National Institute of Environmental Health Sciences (NIEHS).

Methods

We analyzed various types of social science research to investigate how social science contributes to environmental health. We also examined NIEHS programs that foster social science. In addition, we developed a case study of a community-based participation research project in Akwesasne in order to demonstrate how social science has enhanced environmental health science.

Results

Social science has informed environmental health science through ethnographic studies of contaminated communities, analysis of spatial distribution of environmental injustice, psychological experience of contamination, social construction of risk and risk perception, and social impacts of disasters. Social science-environmental health team science has altered the way scientists traditionally explore exposure by pressing for cumulative exposure approaches and providing research data for policy applications.

Conclusions

A transdisciplinary approach for environmental health practice has emerged that engages the social sciences to paint a full picture of the consequences of contamination so that policy makers, regulators, public health officials, and other stakeholders can better ameliorate impacts and prevent future exposure.

Citation

Hoover E, Renauld M, Edelstein MR, Brown P. 2015. Social science collaboration with environmental health. Environ Health Perspect 123:1100-1106; http://dx.doi.org/10.1289/ehp.1409283.",2015-05-12 +24580807,Kraken: ultrafast metagenomic sequence classification using exact alignments.,"Kraken is an ultrafast and highly accurate program for assigning taxonomic labels to metagenomic DNA sequences. Previous programs designed for this task have been relatively slow and computationally expensive, forcing researchers to use faster abundance estimation programs, which only classify small subsets of metagenomic data. Using exact alignment of k-mers, Kraken achieves classification accuracy comparable to the fastest BLAST program. In its fastest mode, Kraken classifies 100 base pair reads at a rate of over 4.1 million reads per minute, 909 times faster than Megablast and 11 times faster than the abundance estimation program MetaPhlAn. Kraken is available at http://ccb.jhu.edu/software/kraken/.",2014-03-03 +26430387,Markedly divergent estimates of Amazon forest carbon density from ground plots and satellites.,"

Aim

The accurate mapping of forest carbon stocks is essential for understanding the global carbon cycle, for assessing emissions from deforestation, and for rational land-use planning. Remote sensing (RS) is currently the key tool for this purpose, but RS does not estimate vegetation biomass directly, and thus may miss significant spatial variations in forest structure. We test the stated accuracy of pantropical carbon maps using a large independent field dataset.

Location

Tropical forests of the Amazon basin. The permanent archive of the field plot data can be accessed at: http://dx.doi.org/10.5521/FORESTPLOTS.NET/2014_1.

Methods

Two recent pantropical RS maps of vegetation carbon are compared to a unique ground-plot dataset, involving tree measurements in 413 large inventory plots located in nine countries. The RS maps were compared directly to field plots, and kriging of the field data was used to allow area-based comparisons.

Results

The two RS carbon maps fail to capture the main gradient in Amazon forest carbon detected using 413 ground plots, from the densely wooded tall forests of the north-east, to the light-wooded, shorter forests of the south-west. The differences between plots and RS maps far exceed the uncertainties given in these studies, with whole regions over- or under-estimated by > 25%, whereas regional uncertainties for the maps were reported to be < 5%.

Main conclusions

Pantropical biomass maps are widely used by governments and by projects aiming to reduce deforestation using carbon offsets, but may have significant regional biases. Carbon-mapping techniques must be revised to account for the known ecological variation in tree wood density and allometry to create maps suitable for carbon accounting. The use of single relationships between tree canopy height and above-ground biomass inevitably yields large, spatially correlated errors. This presents a significant challenge to both the forest conservation and remote sensing communities, because neither wood density nor species assemblages can be reliably mapped from space.",2014-04-22 +24441765,TR-DB: an open-access database of compounds affecting the ethylene-induced triple response in Arabidopsis.,"Small molecules which act as hormone agonists or antagonists represent useful tools in fundamental research and are widely applied in agriculture to control hormone effects. High-throughput screening of large chemical compound libraries has yielded new findings in plant biology, with possible future applications in agriculture and horticulture. To further understand ethylene biosynthesis/signaling and its crosstalk with other hormones, we screened a 12,000 compound chemical library based on an ethylene-related bioassay of dark-grown Arabidopsis thaliana (L.) Heynh. seedlings. From the initial screening, 1313 (∼11%) biologically active small molecules altering the phenotype triggered by the ethylene precursor 1-aminocyclopropane-1-carboxylic acid (ACC), were identified. Selection and sorting in classes were based on the angle of curvature of the apical hook, the length and width of the hypocotyl and the root. A MySQL-database was constructed (https://chaos.ugent.be/WE15/) including basic chemical information on the compounds, images illustrating the phenotypes, phenotype descriptions and classification. The research perspectives for different classes of hit compounds will be evaluated, and some general screening tips for customized high-throughput screening and pitfalls will be discussed.",2014-01-04 +25399029,E-MEM: efficient computation of maximal exact matches for very large genomes.,"

Motivation

Alignment of similar whole genomes is often performed using anchors given by the maximal exact matches (MEMs) between their sequences. In spite of significant amount of research on this problem, the computation of MEMs for large genomes remains a challenging problem. The leading current algorithms employ full text indexes, the sparse suffix array giving the best results. Still, their memory requirements are high, the parallelization is not very efficient, and they cannot handle very large genomes.

Results

We present a new algorithm, efficient computation of MEMs (E-MEM) that does not use full text indexes. Our algorithm uses much less space and is highly amenable to parallelization. It can compute all MEMs of minimum length 100 between the whole human and mouse genomes on a 12 core machine in 10 min and 2 GB of memory; the required memory can be as low as 600 MB. It can run efficiently genomes of any size. Extensive testing and comparison with currently best algorithms is provided.

Availability and implementation

The source code of E-MEM is freely available at: http://www.csd.uwo.ca/∼ilie/E-MEM/ CONTACT: ilie@csd.uwo.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-17 +22914221,An empirical Bayes mixture model for SNP detection in pooled sequencing data.,"

Motivation

Detecting single-nucleotide polymorphism (SNP) in pooled sequencing data is more challenging than in individual sequencing because of sampling variations across pools. To effectively differentiate SNP signal from sequencing error, appropriate estimation of the sequencing error is necessary. In this article, we propose an empirical Bayes mixture (EBM) model for SNP detection and allele frequency estimation in pooled sequencing data.

Results

The proposed model reliably learns the error distribution by pooling information across pools and genomic positions. In addition, the proposed EBM model builds in characteristics unique to the pooled sequencing data, boosting the sensitivity of SNP detection. For large-scale inference in SNP detection, the EBM model provides a flexible and robust way for estimation and control of local false discovery rate. We demonstrate the performance of the proposed method through simulation studies and real data application.

Availability

Implementation of this method is available at https://sites.google.com/site/zhouby98.",2012-08-22 +21418615,RefGenes: identification of reliable and condition specific reference genes for RT-qPCR data normalization.,"

Background

RT-qPCR is a sensitive and increasingly used method for gene expression quantification. To normalize RT-qPCR measurements between samples, most laboratories use endogenous reference genes as internal controls. There is increasing evidence, however, that the expression of commonly used reference genes can vary significantly in certain contexts.

Results

Using the Genevestigator database of normalized and well-annotated microarray experiments, we describe the expression stability characteristics of the transciptomes of several organisms. The results show that a) no genes are universally stable, b) most commonly used reference genes yield very high transcript abundances as compared to the entire transcriptome, and c) for each biological context a subset of stable genes exists that has smaller variance than commonly used reference genes or genes that were selected for their stability across all conditions.

Conclusion

We therefore propose the normalization of RT-qPCR data using reference genes that are specifically chosen for the conditions under study. RefGenes is a community tool developed for that purpose. Validation RT-qPCR experiments across several organisms showed that the candidates proposed by RefGenes generally outperformed commonly used reference genes. RefGenes is available within Genevestigator at http://www.genevestigator.com.",2011-03-21 +24292361,ABACUS: an entropy-based cumulative bivariate statistic robust to rare variants and different direction of genotype effect.,"

Motivation

In the past years, both sequencing and microarray have been widely used to search for relations between genetic variations and predisposition to complex pathologies such as diabetes or neurological disorders. These studies, however, have been able to explain only a small fraction of disease heritability, possibly because complex pathologies cannot be referred to few dysfunctional genes, but are rather heterogeneous and multicausal, as a result of a combination of rare and common variants possibly impairing multiple regulatory pathways. Rare variants, though, are difficult to detect, especially when the effects of causal variants are in different directions, i.e. with protective and detrimental effects.

Results

Here, we propose ABACUS, an Algorithm based on a BivAriate CUmulative Statistic to identify single nucleotide polymorphisms (SNPs) significantly associated with a disease within predefined sets of SNPs such as pathways or genomic regions. ABACUS is robust to the concurrent presence of SNPs with protective and detrimental effects and of common and rare variants; moreover, it is powerful even when few SNPs in the SNP-set are associated with the phenotype. We assessed ABACUS performance on simulated and real data and compared it with three state-of-the-art methods. When ABACUS was applied to type 1 and 2 diabetes data, besides observing a wide overlap with already known associations, we found a number of biologically sound pathways, which might shed light on diabetes mechanism and etiology.

Availability and implementation

ABACUS is available at http://www.dei.unipd.it/∼dicamill/pagine/Software.html.",2013-11-28 +24364790,DiSCuS: an open platform for (not only) virtual screening results management.,"DiSCuS, a ""Database System for Compound Selection"", has been developed. The primary goal of DiSCuS is to aid researchers in the steps subsequent to generating high-throughput virtual screening (HTVS) results, such as selection of compounds for further study, purchase, or synthesis. To do so, DiSCuS provides (1) a storage facility for ligand-receptor complexes (generated with external programs), (2) a number of tools for validating these complexes, such as scoring functions, potential energy contributions, and med-chem features with ligand similarity estimates, and (3) powerful searching and filtering options with logical operators. DiSCuS supports multiple receptor targets for a single ligand, so it can be used either to evaluate different variants of an active site or for selectivity studies. DiSCuS documentation, installation instructions, and source code can be found at http://discus.ibb.waw.pl .",2014-01-03 +24404173,ClassyFlu: classification of influenza A viruses with Discriminatively trained profile-HMMs.,"Accurate and rapid characterization of influenza A virus (IAV) hemagglutinin (HA) and neuraminidase (NA) sequences with respect to subtype and clade is at the basis of extended diagnostic services and implicit to molecular epidemiologic studies. ClassyFlu is a new tool and web service for the classification of IAV sequences of the HA and NA gene into subtypes and phylogenetic clades using discriminatively trained profile hidden Markov models (HMMs), one for each subtype or clade. ClassyFlu merely requires as input unaligned, full-length or partial HA or NA DNA sequences. It enables rapid and highly accurate assignment of HA sequences to subtypes H1-H17 but particularly focusses on the finer grained assignment of sequences of highly pathogenic avian influenza viruses of subtype H5N1 according to the cladistics proposed by the H5N1 Evolution Working Group. NA sequences are classified into subtypes N1-N10. ClassyFlu was compared to semiautomatic classification approaches using BLAST and phylogenetics and additionally for H5 sequences to the new ""Highly Pathogenic H5N1 Clade Classification Tool"" (IRD-CT) proposed by the Influenza Research Database. Our results show that both web tools (ClassyFlu and IRD-CT), although based on different methods, are nearly equivalent in performance and both are more accurate and faster than semiautomatic classification. A retraining of ClassyFlu to altered cladistics as well as an extension of ClassyFlu to other IAV genome segments or fragments thereof is undemanding. This is exemplified by unambiguous assignment to a distinct cluster within subtype H7 of sequences of H7N9 viruses which emerged in China early in 2013 and caused more than 130 human infections. http://bioinf.uni-greifswald.de/ClassyFlu is a free web service. For local execution, the ClassyFlu source code in PERL is freely available.",2014-01-03 +25865072,Radiopacity for Contemporary Luting Cements Using Digital Radiography under Various Exposure Conditions.,"

Purpose

This study examined the radiopacity of contemporary luting cements using direct digital radiography under a range of exposure conditions.

Materials and methods

Disc specimens (N = 80, n = 10 per group, ø5 mm × 1 mm) were prepared from 8 resin-based luting cements (BisCem Clearfil SA Luting, Duolink, Maxcem Elite Multilink Speed, Panavia F 2.0, RelyX Unicem Clicker, V-link). The specimens were radiographed using a charge-coupled device sensor along with an 11-step aluminum step wedge (1.5-mm incremental steps) and 1-mm-thick tooth cut using five tube voltage/exposure time setups (60 kVp, 0.10/0.08 seconds; 70 kVp, 0.10/0.08/0.06 seconds) at 4 mA and 30 cm. The radiopacity of the specimens was compared with that of the aluminum step wedge and human enamel and dentin using NIH ImageJ software (available at http://rsb.info.nih.gov/ij/). A linear regression model for the aluminum step wedge was constructed, and the data were analyzed by ANOVA and Duncan post hoc test.

Results

Maxcem Elite (5.142 to 5.441) showed the highest radiopacity of all materials, followed in order by Multilink Speed (3.731 to 3.396) and V-link (2.763 to 3.103). The radiopacity of Panavia F 2.0 (2.025 to 2.429), BisCem (1.825 to 2.218), Clearfil SA Luting (1.692 to 2.145), Duolink (1.707 to 1.993), and RelyX Unicem Clicker (1.586 to 1.979) were between enamel (2.117 to 2.330) and dentin (1.302 to 1.685). The radiopacity of 70 kVp conditions was higher than that of the 60 kVp conditions.

Conclusions

The radiopacities of the tested luting materials were greater than those of dentin or aluminum, satisfying the criteria of the International Organization for Standardization, and they differed significantly from each other in the exposure setups.",2015-04-10 +23595663,EPSILON: an eQTL prioritization framework using similarity measures derived from local networks.,"

Motivation

When genomic data are associated with gene expression data, the resulting expression quantitative trait loci (eQTL) will likely span multiple genes. eQTL prioritization techniques can be used to select the most likely causal gene affecting the expression of a target gene from a list of candidates. As an input, these techniques use physical interaction networks that often contain highly connected genes and unreliable or irrelevant interactions that can interfere with the prioritization process. We present EPSILON, an extendable framework for eQTL prioritization, which mitigates the effect of highly connected genes and unreliable interactions by constructing a local network before a network-based similarity measure is applied to select the true causal gene.

Results

We tested the new method on three eQTL datasets derived from yeast data using three different association techniques. A physical interaction network was constructed, and each eQTL in each dataset was prioritized using the EPSILON approach: first, a local network was constructed using a k-trials shortest path algorithm, followed by the calculation of a network-based similarity measure. Three similarity measures were evaluated: random walks, the Laplacian Exponential Diffusion kernel and the Regularized Commute-Time kernel. The aim was to predict knockout interactions from a yeast knockout compendium. EPSILON outperformed two reference prioritization methods, random assignment and shortest path prioritization. Next, we found that using a local network significantly increased prioritization performance in terms of predicted knockout pairs when compared with using exactly the same network similarity measures on the global network, with an average increase in prioritization performance of 8 percentage points (P < 10(-5)).

Availability

The physical interaction network and the source code (Matlab/C++) of our implementation can be downloaded from http://bioinformatics.intec.ugent.be/epsilon.

Contact

lieven.verbeke@intec.ugent.be, kamar@psb.ugent.be, jan.fostier@intec.ugent.be

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-16 +22909249,"Bio.Phylo: a unified toolkit for processing, analyzing and visualizing phylogenetic trees in Biopython.","

Background

Ongoing innovation in phylogenetics and evolutionary biology has been accompanied by a proliferation of software tools, data formats, analytical techniques and web servers. This brings with it the challenge of integrating phylogenetic and other related biological data found in a wide variety of formats, and underlines the need for reusable software that can read, manipulate and transform this information into the various forms required to build computational pipelines.

Results

We built a Python software library for working with phylogenetic data that is tightly integrated with Biopython, a broad-ranging toolkit for computational biology. Our library, Bio.Phylo, is highly interoperable with existing libraries, tools and standards, and is capable of parsing common file formats for phylogenetic trees, performing basic transformations and manipulations, attaching rich annotations, and visualizing trees. We unified the modules for working with the standard file formats Newick, NEXUS and phyloXML behind a consistent and simple API, providing a common set of functionality independent of the data source.

Conclusions

Bio.Phylo meets a growing need in bioinformatics for working with heterogeneous types of phylogenetic data. By supporting interoperability with multiple file formats and leveraging existing Biopython features, this library simplifies the construction of phylogenetic workflows. We also provide examples of the benefits of building a community around a shared open-source project. Bio.Phylo is included with Biopython, available through the Biopython website, http://biopython.org.",2012-08-21 +22640003,Mapping breastfeeding services: a method to inform effective implementation and evaluation of evidence-based policy in practice.,"This paper aims to introduce a method for mapping local service provision to local demographic and health outcome data, to inform evidence-based policy and practice in public health. A mapping exercise was conducted in London, England with the aims of: (1) describing services provided for breastfeeding women in primary and tertiary health care sectors and government, voluntary and private sectors; and (2) linking this information with routine data on deprivation, breastfeeding rates and health outcomes. Quantitative data on local breastfeeding services were collected via an online questionnaire by a designated 'mapping lead' in each locality. Data were collected at the level of individual health care organisations on the provision, nature and management of breastfeeding services, and related organisational inputs such as leadership, staffing, accreditation and policy. Demographic and health outcome data were identified from existing routine national data collections. Ninety-one per cent of eligible acute and primary care organisations participated in the mapping exercise. A range of mapping tools and profile were developed and launched in 2009 (http://atlas.chimat.org.uk/IAS/dataviews/view?viewId=66). These tools can be used for descriptive analyses of service provision on the basis of local need. Comparative analyses on the impact of service provision on breastfeeding or health outcomes will be feasible from 18 months of data collection onwards. This case study has demonstrated the potential utility of this mapping method to inform effective implementation and evaluation of public health policy in practice consistent with the World Health Organisation framework. Formal evaluation of the utility of the tools is recommended.",2012-05-28 +24843009,A sphingolipid-dependent diffusion barrier confines ER stress to the yeast mother cell.,"In many cell types, lateral diffusion barriers compartmentalize the plasma membrane and, at least in budding yeast, the endoplasmic reticulum (ER). However, the molecular nature of these barriers, their mode of action and their cellular functions are unclear. Here, we show that misfolded proteins of the ER remain confined into the mother compartment of budding yeast cells. Confinement required the formation of a lateral diffusion barrier in the form of a distinct domain of the ER-membrane at the bud neck, in a septin-, Bud1 GTPase- and sphingolipid-dependent manner. The sphingolipids, but not Bud1, also contributed to barrier formation in the outer membrane of the dividing nucleus. Barrier-dependent confinement of ER stress into the mother cell promoted aging. Together, our data clarify the physical nature of lateral diffusion barriers in the ER and establish the role of such barriers in the asymmetric segregation of proteotoxic misfolded proteins during cell division and aging.DOI: http://dx.doi.org/10.7554/eLife.01883.001.",2014-05-06 +21184613,"MIND-BEST: Web server for drugs and target discovery; design, synthesis, and assay of MAO-B inhibitors and theoretical-experimental study of G3PDH protein from Trichomonas gallinae.","Many drugs with very different affinity to a large number of receptors are described. Thus, in this work, we selected drug-target pairs (DTPs/nDTPs) of drugs with high affinity/nonaffinity for different targets. Quantitative structure-activity relationship (QSAR) models become a very useful tool in this context because they substantially reduce time and resource-consuming experiments. Unfortunately, most QSAR models predict activity against only one protein target and/or they have not been implemented on a public Web server yet, freely available online to the scientific community. To solve this problem, we developed a multitarget QSAR (mt-QSAR) classifier combining the MARCH-INSIDE software for the calculation of the structural parameters of drug and target with the linear discriminant analysis (LDA) method in order to seek the best model. The accuracy of the best LDA model was 94.4% (3,859/4,086 cases) for training and 94.9% (1,909/2,012 cases) for the external validation series. In addition, we implemented the model into the Web portal Bio-AIMS as an online server entitled MARCH-INSIDE Nested Drug-Bank Exploration & Screening Tool (MIND-BEST), located at http://miaja.tic.udc.es/Bio-AIMS/MIND-BEST.php . This online tool is based on PHP/HTML/Python and MARCH-INSIDE routines. Finally, we illustrated two practical uses of this server with two different experiments. In experiment 1, we report for the first time a MIND-BEST prediction, synthesis, characterization, and MAO-A and MAO-B pharmacological assay of eight rasagiline derivatives, promising for anti-Parkinson drug design. In experiment 2, we report sampling, parasite culture, sample preparation, 2-DE, MALDI-TOF and -TOF/TOF MS, MASCOT search, 3D structure modeling with LOMETS, and MIND-BEST prediction for different peptides as new protein of the found in the proteome of the bird parasite Trichomonas gallinae, which is promising for antiparasite drug targets discovery.",2011-02-24 +22889837,uAnalyze: web-based high-resolution DNA melting analysis with comparison to thermodynamic predictions.,"uAnalyze(SM) is a web-based tool for analyzing high-resolution melting data of PCR products. PCR product sequence is input by the user and recursive nearest neighbor thermodynamic calculations used to predict a melting curve similar to uMELT(http://www.dna.utah.edu/umelt/umelt.html). Unprocessed melting data are input directly from LightScanner-96, LS32, or HR-1 data files or via a generic format for other instruments. A fluorescence discriminator identifies low intensity samples to prevent analysis of data that cannot be adequately normalized. Temperature regions that define fluorescence background are initialized by prediction and optionally adjusted by the user. Background is removed either as an exponential or by linear baseline extrapolation. The precision or, “curve spread,” of experimental melting curves is quantified as the average of the maximum helicity difference of all curve pairs. Melting curve accuracy is quantified as the area or “2D offset” between the average experimental and predicted melting curves. Optional temperature overlay (temperature shifting) is provided to focus on curve shape. Using 14 amplicons of CYBB, the mean + / - standard deviation of the difference between experimental and predicted fluorescence at 50 percent helicity was 0:04 + / - 0:48°C. uAnalyze requires Flash, is not browser specific and can be accessed at http://www.dna.utah.edu/uv/uanalyze.html.",2012-11-01 +23249167,Chromosome 19 annotations with disease speciation: a first report from the Global Research Consortium.,"A first research development progress report of the Chromosome 19 Consortium with members from Sweden, Norway, Spain, United States, China and India, a part of the Chromosome-centric Human Proteome Project (C-HPP) global initiative, is presented ( http://www.c-hpp.org ). From the chromosome 19 peptide-targeted library constituting 6159 peptides, a pilot study was conducted using a subset with 125 isotope-labeled peptides. We applied an annotation strategy with triple quadrupole, ESI-Qtrap, and MALDI mass spectrometry platforms, comparing the quality of data within and in between these instrumental set-ups. LC-MS conditions were outlined by multiplex assay developments, followed by MRM assay developments. SRM was applied to biobank samples, quantifying kallikrein 3 (prostate specific antigen) in plasma from prostate cancer patients. The antibody production has been initiated for more than 1200 genes from the entire chromosome 19, and the progress developments are presented. We developed a dedicated transcript microarray to serve as the mRNA identifier by screening cancer cell lines. NAPPA protein arrays were built to align with the transcript data with the Chromosome 19 NAPPA chip, dedicated to 90 proteins, as the first development delivery. We have introduced an IT-infrastructure utilizing a LIMS system that serves as the key interface for the research teams to share and explore data generated within the project. The cross-site data repository will form the basis for sample processing, including biological samples as well as patient samples from national Biobanks.",2012-12-18 +25601846,Warsaw set of emotional facial expression pictures: a validation study of facial display photographs.,"Emotional facial expressions play a critical role in theories of emotion and figure prominently in research on almost every aspect of emotion. This article provides a background for a new database of basic emotional expressions. The goal in creating this set was to provide high quality photographs of genuine facial expressions. Thus, after proper training, participants were inclined to express ""felt"" emotions. The novel approach taken in this study was also used to establish whether a given expression was perceived as intended by untrained judges. The judgment task for perceivers was designed to be sensitive to subtle changes in meaning caused by the way an emotional display was evoked and expressed. Consequently, this allowed us to measure the purity and intensity of emotional displays, which are parameters that validation methods used by other researchers do not capture. The final set is comprised of those pictures that received the highest recognition marks (e.g., accuracy with intended display) from independent judges, totaling 210 high quality photographs of 30 individuals. Descriptions of the accuracy, intensity, and purity of displayed emotion as well as FACS AU's codes are provided for each picture. Given the unique methodology applied to gathering and validating this set of pictures, it may be a useful tool for research using face stimuli. The Warsaw Set of Emotional Facial Expression Pictures (WSEFEP) is freely accessible to the scientific community for non-commercial use by request at http://www.emotional-face.org.",2014-01-01 +24573477,3D-SURFER 2.0: web platform for real-time search and characterization of protein surfaces.,"The increasing number of uncharacterized protein structures necessitates the development of computational approaches for function annotation using the protein tertiary structures. Protein structure database search is the basis of any structure-based functional elucidation of proteins. 3D-SURFER is a web platform for real-time protein surface comparison of a given protein structure against the entire PDB using 3D Zernike descriptors. It can smoothly navigate the protein structure space in real-time from one query structure to another. A major new feature of Release 2.0 is the ability to compare the protein surface of a single chain, a single domain, or a single complex against databases of protein chains, domains, complexes, or a combination of all three in the latest PDB. Additionally, two types of protein structures can now be compared: all-atom-surface and backbone-atom-surface. The server can also accept a batch job for a large number of database searches. Pockets in protein surfaces can be identified by VisGrid and LIGSITE (csc) . The server is available at http://kiharalab.org/3d-surfer/.",2014-01-01 +24218216,Elucidation of metabolic pathways from enzyme classification data.,"The IUBMB Enzyme List is widely used by other databases as a source for avoiding ambiguity in the recognition of enzymes as catalytic entities. However, it was not designed for metabolic pathway tracing, which has become increasingly important in systems biology. A Reactions Database has been created from the material in the Enzyme List to allow reactions to be searched by substrate/product, and pathways to be traced from any selected starting/seed substrate. An extensive synonym glossary allows searches by many of the alternative names, including accepted abbreviations, by which a chemical compound may be known. This database was necessary for the development of the application Reaction Explorer ( http://www.reaction-explorer.org ), which was written in Real Studio ( http://www.realsoftware.com/realstudio/ ) to search the Reactions Database and draw metabolic pathways from reactions selected by the user. Having input the name of the starting compound (the ""seed""), the user is presented with a list of all reactions containing that compound and then selects the product of interest as the next point on the ensuing graph. The pathway diagram is then generated as the process iterates. A contextual menu is provided, which allows the user: (1) to remove a compound from the graph, along with all associated links; (2) to search the reactions database again for additional reactions involving the compound; (3) to search for the compound within the Enzyme List.",2014-01-01 +24170408,PROMALS3D: multiple protein sequence alignment enhanced with evolutionary and three-dimensional structural information.,"Multiple sequence alignment (MSA) is an essential tool with many applications in bioinformatics and computational biology. Accurate MSA construction for divergent proteins remains a difficult computational task. The constantly increasing protein sequences and structures in public databases could be used to improve alignment quality. PROMALS3D is a tool for protein MSA construction enhanced with additional evolutionary and structural information from database searches. PROMALS3D automatically identifies homologs from sequence and structure databases for input proteins, derives structure-based constraints from alignments of three-dimensional structures, and combines them with sequence-based constraints of profile-profile alignments in a consistency-based framework to construct high-quality multiple sequence alignments. PROMALS3D output is a consensus alignment enriched with sequence and structural information about input proteins and their homologs. PROMALS3D Web server and package are available at http://prodata.swmed.edu/PROMALS3D.",2014-01-01 +24170403,Multiple sequence alignment with DIALIGN.,DIALIGN is a software tool for multiple sequence alignment by combining global and local alignment features. It composes multiple alignments from local pairwise sequence similarities. This approach is particularly useful to discover conserved functional regions in sequences that share only local homologies but are otherwise unrelated. An anchoring option allows to use external information and expert knowledge in addition to primary-sequence similarity alone. The latest version of DIALIGN optionally uses matches to the PFAM database to detect weak homologies. Various versions of the program are available through Göttingen Bioinformatics Compute Server (GOBICS) at http://www.gobics.de/department/software.,2014-01-01 +25516636,Nonproliferative and proliferative lesions of the rat and mouse female reproductive system.,"The INHAND (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) Project (www.toxpath.org/inhand.asp) is a joint initiative of the Societies of Toxicological Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP) and North America (STP) to develop an internationally accepted nomenclature for proliferative and nonproliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature for classifying microscopic lesions observed in the female reproductive tract of laboratory rats and mice, with color photomicrographs illustrating examples of some lesions. The standardized nomenclature presented in this document is also available electronically on the internet (http://www.goreni.org/). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous and aging lesions as well as lesions induced by exposure to test materials. There is also a section on normal cyclical changes observed in the ovary, uterus, cervix and vagina to compare normal physiological changes with pathological lesions. A widely accepted and utilized international harmonization of nomenclature for female reproductive tract lesions in laboratory animals will decrease confusion among regulatory and scientific research organizations in different countries and provide a common language to increase and enrich international exchanges of information among toxicologists and pathologists.",2014-01-01 +25022454,Disease-specific target gene expression profiling of molecular imaging probes: database development and clinical validation. ,"Molecular imaging probes can target abnormal gene expression patterns in patients and allow early diagnosis of disease. For selecting a suitable imaging probe, the current Molecular Imaging and Contrast Agent Database (MICAD) provides descriptive and qualitative information on imaging probe characteristics and properties. However, MICAD does not support linkage with the expression profiles of target genes. The proposed Disease-specific Imaging Probe Profiling (DIPP) database quantitatively archives and presents the gene expression profiles of targets across different diseases, anatomic regions, and subcellular locations, providing an objective reference for selecting imaging probes. The DIPP database was validated with a clinical positron emission tomography (PET) study on lung cancer and an in vitro study on neuroendocrine cancer. The retrieved records show that choline kinase beta and glucose transporters were positively and significantly associated with lung cancer among the targets of 11C-choline and [18F]fluoro-2-deoxy-2-d-glucose (FDG), respectively. Their significant overexpressions corresponded to the findings that the uptake rate of FDG increased with tumor size but that of 11C-choline remained constant. Validated with the in vitro study, the expression profiles of disease-associated targets can indicate the eligibility of patients for clinical trials of the treatment probe. A Web search tool of the DIPP database is available at http://www.polyu.edu.hk/bmi/dipp/.",2014-01-01 +24047114,DigitalVHI--a freeware open-source software application to capture the Voice Handicap Index and other questionnaire data in various languages.,"In this short report we introduce DigitalVHI, a free open-source software application for obtaining Voice Handicap Index (VHI) and other questionnaire data, which can be put on a computer in clinics and used in clinical practice. The software can simplify performing clinical studies since it makes the VHI scores directly available for analysis in a digital form. It can be downloaded from http://www.christian-herbst.org/DigitalVHI/.",2013-09-19 +30708570,"First Report of Rust Caused by Puccinia nakanishikii on Lemongrass, Cymbopogon citratus, in Florida.","Lemongrass, Cymbopogon citratus (DC.) Stapf. (Poaceae), is grown widely in the tropics and subtropics as an ornamental, flavoring ingredient in Asian cooking, and for tea and fragrant oil (3). In February 2013, rust symptoms were observed on lemongrass in several gardens in Miami-Dade County, Florida. Symptoms began as small chlorotic flecks on both leaf surfaces that became crimson and enlarged to streaks ~1 cm in length. On the abaxial side of leaves, erumpent streaks ruptured to produce pustules in which urediniospores formed. Eventually, streaks coalesced to produce large patches of tan to purplish necrotic tissue that blighted most of the leaf surface and was often surrounded by chlorotic borders. These symptoms, fungal morphology, and nuclear ribosomal large subunit (28S) DNA analysis were used to identify the pathogen as Puccinia nakanishikii Dietel. Urediniospores were pyriform to globose, orange to crimson, slightly echinulate, and somewhat longer than a previous report (32.1 ± 3.4 (27 to 42) × 23.3 ± 2.4 (21 to 27) μm vs. 22 to 28 × 22 to 25 μm) (2). Uredinia contained clavate paraphyses, but teliospores were not observed. No aecial host is known for this pathogen. A 28S DNA sequence that was generated with the NL1 and LR3 primers (1,4) was deposited in GenBank under accession no. KC990123; it shared 99% identity with GenBank accession GU058002, which came from a specimen of P. nakanishikii in Hawaii. Voucher specimens of affected leaves of lemongrass have been deposited at the Arthur Herbarium, Purdue University. Although this disease has been reported in California, Hawaii, New Zealand, and Thailand, this is believed to be the first report from Florida (2). Based on rainfall and temperature conditions that are conducive to its development in South Florida, it has the potential to significantly reduce the health and production of this plant in area gardens. References: (1) C. P. Kurtzman and C. J. Robnett. Antonie Van Leeuwenhoek 73:331. 1998. (2) S. Nelson. Rust of Lemongrass. Univ. Hawaii PD-57, 2008. (3) USDA, ARS, GRIN Online Database. URL: http://www.ars-grin.gov/cgi-bin/npgs/html/taxon.pl?12797 , accessed 25 April 2013. (4) R. Vilgalys and M. Hester. J Bacteriol. 172:4238, 1990.",2014-01-01 +24573480,Prediction of intrinsic disorder in proteins using MFDp2.,"Intrinsically disordered proteins (IDPs) are either entirely disordered or contain disordered regions in their native state. IDPs were found to be abundant across all kingdoms of life, particularly in eukaryotes, and are implicated in numerous cellular processes. Experimental annotation of disorder lags behind the rapidly growing sizes of the protein databases and thus computational methods are used to close this gap and to investigate the disorder. MFDp2 is a novel webserver for accurate sequence-based prediction of protein disorder which also outputs well-described sequence-derived information that allows profiling the predicted disorder. We conveniently visualize sequence conservation, predicted secondary structure, relative solvent accessibility, and alignments to chains with annotated disorder. The webserver allows predictions for multiple proteins at the same time, includes help pages and tutorial, and the results can be downloaded as text-based (parsable) file. MFDp2 is freely available at http://biomine.ece.ualberta.ca/MFDp2/.",2014-01-01 +24077912,"The Human Gene Mutation Database: building a comprehensive mutation repository for clinical and molecular genetics, diagnostic testing and personalized genomic medicine.","The Human Gene Mutation Database (HGMD®) is a comprehensive collection of germline mutations in nuclear genes that underlie, or are associated with, human inherited disease. By June 2013, the database contained over 141,000 different lesions detected in over 5,700 different genes, with new mutation entries currently accumulating at a rate exceeding 10,000 per annum. HGMD was originally established in 1996 for the scientific study of mutational mechanisms in human genes. However, it has since acquired a much broader utility as a central unified disease-oriented mutation repository utilized by human molecular geneticists, genome scientists, molecular biologists, clinicians and genetic counsellors as well as by those specializing in biopharmaceuticals, bioinformatics and personalized genomics. The public version of HGMD (http://www.hgmd.org) is freely available to registered users from academic institutions/non-profit organizations whilst the subscription version (HGMD Professional) is available to academic, clinical and commercial users under license via BIOBASE GmbH.",2014-01-01 +23427986,From binding motifs in ChIP-Seq data to improved models of transcription factor binding sites.,"Chromatin immunoprecipitation followed by deep sequencing (ChIP-Seq) became a method of choice to locate DNA segments bound by different regulatory proteins. ChIP-Seq produces extremely valuable information to study transcriptional regulation. The wet-lab workflow is often supported by downstream computational analysis including construction of models of nucleotide sequences of transcription factor binding sites in DNA, which can be used to detect binding sites in ChIP-Seq data at a single base pair resolution. The most popular TFBS model is represented by positional weight matrix (PWM) with statistically independent positional weights of nucleotides in different columns; such PWMs are constructed from a gapless multiple local alignment of sequences containing experimentally identified TFBSs. Modern high-throughput techniques, including ChIP-Seq, provide enough data for careful training of advanced models containing more parameters than PWM. Yet, many suggested multiparametric models often provide only incremental improvement of TFBS recognition quality comparing to traditional PWMs trained on ChIP-Seq data. We present a novel computational tool, diChIPMunk, that constructs TFBS models as optimal dinucleotide PWMs, thus accounting for correlations between nucleotides neighboring in input sequences. diChIPMunk utilizes many advantages of ChIPMunk, its ancestor algorithm, accounting for ChIP-Seq base coverage profiles (""peak shape"") and using the effective subsampling-based core procedure which allows processing of large datasets. We demonstrate that diPWMs constructed by diChIPMunk outperform traditional PWMs constructed by ChIPMunk from the same ChIP-Seq data. Software website: http://autosome.ru/dichipmunk/",2013-01-16 +23155465,"Formalization, annotation and analysis of diverse drug and probe screening assay datasets using the BioAssay Ontology (BAO).","Huge amounts of high-throughput screening (HTS) data for probe and drug development projects are being generated in the pharmaceutical industry and more recently in the public sector. The resulting experimental datasets are increasingly being disseminated via publically accessible repositories. However, existing repositories lack sufficient metadata to describe the experiments and are often difficult to navigate by non-experts. The lack of standardized descriptions and semantics of biological assays and screening results hinder targeted data retrieval, integration, aggregation, and analyses across different HTS datasets, for example to infer mechanisms of action of small molecule perturbagens. To address these limitations, we created the BioAssay Ontology (BAO). BAO has been developed with a focus on data integration and analysis enabling the classification of assays and screening results by concepts that relate to format, assay design, technology, target, and endpoint. Previously, we reported on the higher-level design of BAO and on the semantic querying capabilities offered by the ontology-indexed triple store of HTS data. Here, we report on our detailed design, annotation pipeline, substantially enlarged annotation knowledgebase, and analysis results. We used BAO to annotate assays from the largest public HTS data repository, PubChem, and demonstrate its utility to categorize and analyze diverse HTS results from numerous experiments. BAO is publically available from the NCBO BioPortal at http://bioportal.bioontology.org/ontologies/1533. BAO provides controlled terminology and uniform scope to report probe and drug discovery screening assays and results. BAO leverages description logic to formalize the domain knowledge and facilitate the semantic integration with diverse other resources. As a consequence, BAO offers the potential to infer new knowledge from a corpus of assay results, for example molecular mechanisms of action of perturbagens.",2012-11-14 +26158044,Nonparametric estimation receiver operating characteristic analysis for performance evaluation on combined detection and estimation tasks.,"In an effort to generalize task-based assessment beyond traditional signal detection, there is a growing interest in performance evaluation for combined detection and estimation tasks, in which signal parameters, such as size, orientation, and contrast are unknown and must be estimated. One motivation for studying such tasks is their rich complexity, which offers potential advantages for imaging system optimization. To evaluate observer performance on combined detection and estimation tasks, Clarkson introduced the estimation receiver operating characteristic (EROC) curve and the area under the EROC curve as a summary figure of merit. This work provides practical tools for EROC analysis of experimental data. In particular, we propose nonparametric estimators for the EROC curve, the area under the EROC curve, and for the variance/covariance matrix of a vector of correlated EROC area estimates. In addition, we show that reliable confidence intervals can be obtained for EROC area, and we validate these intervals with Monte Carlo simulation. Application of our methodology is illustrated with an example comparing magnetic resonance imaging [Formula: see text]-space sampling trajectories. MATLAB® software implementing the EROC analysis estimators described in this work is publicly available at http://code.google.com/p/iqmodelo/.",2014-08-26 +25855032,Understanding the effects of establishing various cutoff criteria in the definition of men with premature ejaculation.,"

Introduction

Over the past decade, professional organizations and consensus groups have offered a variety of definitions for premature ejaculation (PE), all generally including a set of common concepts but all varying in specific language and operationalization. Clearly articulated definitions of such conditions are important because they not only affect prevalence rates but also diagnostic inclusion-who is deemed to have the condition and therefore who might be eligible for treatment.

Aim

The current study had two goals: (i) to examine the effects on prevalence rates of moving the cutoff points from more stringent to less stringent for each of three PE criteria-ejaculatory latency, distress, and ejaculating before desired; and (ii) to explore in detail the relationships among the three criteria.

Methods

Using an Internet-based sample of 1,183 men, we examined the responses of 374 with PE-type symptoms based on consensus definitions, and determined the effect of decreasing restrictions on the cutoff criteria. In addition, we calculated both correlations and concordance rates among criteria.

Results

Numeric and graphic depiction of the effects of moving the cutoff point for each of the three criteria is provided in the URL ""PE Prevalence,"" a dynamic tool developed specifically for this study (https://sites.google.com/a/valpo.edu/PEprevalence/). In addition, statistical relationships among the PE criteria suggest sufficient independence to warrant inclusion of all three in a diagnostic procedure as well as to consider a 2-minute ejaculatory latency as the threshold for a PE diagnosis.

Conclusions

Based on our data, clinicians should approach the 1-minute ejaculatory latency time (ELT) criterion with flexibility, considering ELTs up to 2 minutes for a PE diagnosis. At the same time, frequency of occurrence of either ejaculating before desired or of distress about the condition, as long as they reach at least 50% of the time, had only minor impact on PE diagnostic inclusion.",2015-04-08 +30727400,First Report of Leaf Spot of Rudbeckia hirta var. pulcherrima Caused by Septoria rudbeckiae in Korea.,"Rudbeckia hirta L. var. pulcherrima Farw. (synonym R. bicolor Nutt.), known as the black-eyed Susan, is a flowering plant belonging to the family Asteraceae. The plant is native to North America and was introduced to Korea for ornamental purposes in the 1950s. In July 2011, a previously unknown leaf spot was first observed on the plants in a public garden in Namyangju, Korea. Leaf spot symptoms developed from lower leaves as small, blackish brown lesions, which enlarged to 6 mm in diameter. In the later stages of disease development, each lesion was usually surrounded with a yellow halo, detracting from the beauty of the green leaves of the plant. A number of black pycnidia were present in diseased leaf tissue. Later, the disease was observed in several locations in Korea, including Pyeongchang, Hoengseong, and Yangpyeong. Voucher specimens were deposited at the Korea University Herbarium (KUS-F25894 and KUS-F26180). An isolate was obtained from KUS-F26180 and deposited at the Korean Agricultural Culture Collection (Accession No. KACC46694). Pycnidia were amphigenous, but mostly hypogenous, scattered, dark brown-to-rusty brown, globose, embedded in host tissue or partly erumpent, 50 to 80 μm in diameter, with ostioles 15 to 25 μm in diameter. Conidia were substraight to mildly curved, guttulate, hyaline, 25 to 50 × 1.5 to 2.5 μm, and one- to three-septate. Based on the morphological characteristics, the fungus was consistent with Septoria rudbeckiae Ellis & Halst. (1,3,4). Morphological identification of the fungus was confirmed by molecular data. Genomic DNA was extracted using the DNeasy Plant Mini DNA Extraction Kit (Qiagen Inc., Valencia, CA.). The internal transcribed spacer (ITS) region of rDNA was amplified using the ITS1/ITS4 primers and sequenced. The resulting sequence of 528 bp was deposited in GenBank (Accession No. JQ677043). A BLAST search showed that there was no matching sequence of S. rudbeckiae; therefore, this is the first ITS sequence of the species submitted to GenBank. The ITS sequence showed >99% similarity with those of many Septoria species, indicating their close phylogenetic relationship. Pathogenicity was tested by spraying leaves of three potted young plants with a conidial suspension (2 × 105 conidia/ml), which was harvested from a 4-week-old culture on potato dextrose agar. Control leaves were sprayed with sterile water. The plants were covered with plastic bags to maintain 100% relative humidity (RH) for the first 24 h. Plants were then maintained in a greenhouse (22 to 28°C and 70 to 80% RH). After 5 days, leaf spot symptoms identical to those observed in the field started to develop on the leaves inoculated with the fungus. No symptoms were observed on control plants. S. rudbeckiae was reisolated from the lesions of inoculated plants, confirming Koch's postulates. A leaf spot disease associated with S. rudbeckiae has been reported on several species of Rudbeckia in the United States, Romania, and Bulgaria (1-4). To our knowledge, this is the first report of leaf spot on R. hirta var. pulcherrima caused by S. rudbeckiae in Korea. References: (1) J. B. Ellis and B. D. Halsted. J. Mycol. 6:33, 1890. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ February 2, 2012. (3) E. Radulescu et al. Septoriozele din Romania. Ed. Acad. Rep. Soc. Romania, Bucuresti, Romania, 1973. (4) S. G. Vanev et al. Fungi Bulgaricae 3:1, 1997.",2012-06-01 +23331614,Genecentric: a package to uncover graph-theoretic structure in high-throughput epistasis data.,"

Background

New technology has resulted in high-throughput screens for pairwise genetic interactions in yeast and other model organisms. For each pair in a collection of non-essential genes, an epistasis score is obtained, representing how much sicker (or healthier) the double-knockout organism will be compared to what would be expected from the sickness of the component single knockouts. Recent algorithmic work has identified graph-theoretic patterns in this data that can indicate functional modules, and even sets of genes that may occur in compensatory pathways, such as a BPM-type schema first introduced by Kelley and Ideker. However, to date, any algorithms for finding such patterns in the data were implemented internally, with no software being made publically available.

Results

Genecentric is a new package that implements a parallelized version of the Leiserson et al. algorithm (J Comput Biol 18:1399-1409, 2011) for generating generalized BPMs from high-throughput genetic interaction data. Given a matrix of weighted epistasis values for a set of double knock-outs, Genecentric returns a list of generalized BPMs that may represent compensatory pathways. Genecentric also has an extension, GenecentricGO, to query FuncAssociate (Bioinformatics 25:3043-3044, 2009) to retrieve GO enrichment statistics on generated BPMs. Python is the only dependency, and our web site provides working examples and documentation.

Conclusion

We find that Genecentric can be used to find coherent functional and perhaps compensatory gene sets from high throughput genetic interaction data. Genecentric is made freely available for download under the GPLv2 from http://bcb.cs.tufts.edu/genecentric.",2013-01-18 +24147091,Combining position weight matrices and document-term matrix for efficient extraction of associations of methylated genes and diseases from free text.,"

Background

In a number of diseases, certain genes are reported to be strongly methylated and thus can serve as diagnostic markers in many cases. Scientific literature in digital form is an important source of information about methylated genes implicated in particular diseases. The large volume of the electronic text makes it difficult and impractical to search for this information manually.

Methodology

We developed a novel text mining methodology based on a new concept of position weight matrices (PWMs) for text representation and feature generation. We applied PWMs in conjunction with the document-term matrix to extract with high accuracy associations between methylated genes and diseases from free text. The performance results are based on large manually-classified data. Additionally, we developed a web-tool, DEMGD, which automates extraction of these associations from free text. DEMGD presents the extracted associations in summary tables and full reports in addition to evidence tagging of text with respect to genes, diseases and methylation words. The methodology we developed in this study can be applied to similar association extraction problems from free text.

Conclusion

The new methodology developed in this study allows for efficient identification of associations between concepts. Our method applied to methylated genes in different diseases is implemented as a Web-tool, DEMGD, which is freely available at http://www.cbrc.kaust.edu.sa/demgd/. The data is available for online browsing and download.",2013-10-16 +22838505,A Bayesian method for comparing and combining binary classifiers in the absence of a gold standard.,"

Background

Many problems in bioinformatics involve classification based on features such as sequence, structure or morphology. Given multiple classifiers, two crucial questions arise: how does their performance compare, and how can they best be combined to produce a better classifier? A classifier can be evaluated in terms of sensitivity and specificity using benchmark, or gold standard, data, that is, data for which the true classification is known. However, a gold standard is not always available. Here we demonstrate that a Bayesian model for comparing medical diagnostics without a gold standard can be successfully applied in the bioinformatics domain, to genomic scale data sets. We present a new implementation, which unlike previous implementations is applicable to any number of classifiers. We apply this model, for the first time, to the problem of finding the globally optimal logical combination of classifiers.

Results

We compared three classifiers of protein subcellular localisation, and evaluated our estimates of sensitivity and specificity against estimates obtained using a gold standard. The method overestimated sensitivity and specificity with only a small discrepancy, and correctly ranked the classifiers. Diagnostic tests for swine flu were then compared on a small data set. Lastly, classifiers for a genome-wide association study of macular degeneration with 541094 SNPs were analysed. In all cases, run times were feasible, and results precise. The optimal logical combination of classifiers was also determined for all three data sets. Code and data are available from http://bioinformatics.monash.edu.au/downloads/.

Conclusions

The examples demonstrate the methods are suitable for both small and large data sets, applicable to the wide range of bioinformatics classification problems, and robust to dependence between classifiers. In all three test cases, the globally optimal logical combination of the classifiers was found to be their union, according to three out of four ranking criteria. We propose as a general rule of thumb that the union of classifiers will be close to optimal.",2012-07-27 +21928249,Development of a website and biobank database for the Nanosized Cancer Polymarker Biochip Project: a Multicenter Italian Experience.,"The Nanosized Cancer Polymarker Biochip Project (RBLA03S4SP) funded by an Italian MIUR-FIRB grant (Italian Ministry of University and Research - Investment Funds for Basic Research) has led to the creation of a free-access dynamic website, available at the web address https://serviziweb.ulss12.ve.it/firbabo, and of a centralized database with password-restricted access. The project network is composed of 9 research units (RUs) and has been active since 2005. The aim of the FIRB project was the design, production and validation of optoelectronic and chemoelectronic biosensors for the simultaneous detection of a novel class of cancer biomarkers associated with immunoglobulins of the M class (IgM) for early diagnosis of cancer. Biomarker immune complexes (BM-ICs) were assessed on samples of clinical cases and matched controls for breast, colorectal, liver, ovarian and prostate malignancies. This article describes in detail the architecture of the project website, the central database application, and the biobank developed for the FIRB Nanosized Cancer Polymarker Biochip Project. The article also illustrates many unique aspects that should be considered when developing a database within a multidisciplinary scenario. The main deliverables of the project were numerous, including the development of an online database which archived 1400 case report forms (700 cases and 700 matched controls) and more than 2700 experimental results relative to the BM-ICs assayed. The database also allowed for the traceability and retrieval of 21,000 aliquots archived in the centralized bank and stored as backup in the RUs, and for the development of a centralized biological bank in the coordinating unit with 6300 aliquots of serum. The constitution of the website and biobank database enabled optimal coordination of the RUs involved, highlighting the importance of sharing samples and scientific data in a multicenter setting for the achievement of the project goals.",2011-07-01 +22592381,IRView: a database and viewer for protein interacting regions.,"

Unlabelled

Protein-protein interactions (PPIs) are mediated through specific regions on proteins. Some proteins have two or more protein interacting regions (IRs) and some IRs are competitively used for interactions with different proteins. IRView currently contains data for 3417 IRs in human and mouse proteins. The data were obtained from different sources and combined with annotated region data from InterPro. Information on non-synonymous single nucleotide polymorphism sites and variable regions owing to alternative mRNA splicing is also included. The IRView web interface displays all IR data, including user-uploaded data, on reference sequences so that the positional relationship between IRs can be easily understood. IRView should be useful for analyzing underlying relationships between the proteins behind the PPI networks.

Availability

IRView is publicly available on the web at http://ir.hgc.jp/",2012-05-15 +25073924,GSK-3 signaling in developing cortical neurons is essential for radial migration and dendritic orientation.,"GSK-3 is an essential mediator of several signaling pathways that regulate cortical development. We therefore created conditional mouse mutants lacking both GSK-3α and GSK-3β in newly born cortical excitatory neurons. Gsk3-deleted neurons expressing upper layer markers exhibited striking migration failure in all areas of the cortex. Radial migration in hippocampus was similarly affected. In contrast, tangential migration was not grossly impaired after Gsk3 deletion in interneuron precursors. Gsk3-deleted neurons extended axons and developed dendritic arbors. However, the apical dendrite was frequently branched while basal dendrites exhibited abnormal orientation. GSK-3 regulation of migration in neurons was independent of Wnt/β-catenin signaling. Importantly, phosphorylation of the migration mediator, DCX, at ser327, and phosphorylation of the semaphorin signaling mediator, CRMP-2, at Thr514 were markedly decreased. Our data demonstrate that GSK-3 signaling is essential for radial migration and dendritic orientation and suggest that GSK-3 mediates these effects by phosphorylating key microtubule regulatory proteins.DOI: http://dx.doi.org/10.7554/eLife.02663.001.",2014-07-29 +24388451,Safety of ankle arthroscopy for the treatment of anterolateral soft-tissue impingement.,"

Purpose

To quantify the overall incidence of complications related to arthroscopic treatment of anterolateral soft-tissue ankle impingement.

Methods

Electronic databases and relevant peer-reviewed sources, including OvidSP/Medline (http://ovidsp.tx.ovid.com) and Google Scholar, were systematically searched for the terms ""anterolateral"" AND ""ankle"" AND ""impingement"" OR ""soft-tissue impingement"" AND ""arthroscopy."" In addition, we manually searched common American and European (including British) orthopaedic and podiatric scientific literature for relevant articles. Studies were eligible for inclusion only if they included the following: a standard 2-portal anterior arthroscopic approach, a minimum mean follow-up of 12 months, and detailed descriptions of all complications encountered.

Results

After we considered all the potentially eligible articles, 15 (28.8%) met our inclusion criteria for the study. There were a total of 396 patients (397 ankles), with 16 total complications (4%), 3 (0.8%) of which were major. The weighted mean patient age was 31.2 years, and the weighted mean follow-up was 33.7 months.

Conclusions

The results of this systematic review showed an overall incidence of complications of 4%. The complications were categorically divided into major and minor complications, with a very low incidence of major complications (0.8%) and an acceptably low incidence of minor complications (3.3%). On the basis of these findings, arthroscopic treatment of anterolateral ankle soft-tissue impingement is a very safe procedure when indicated.

Level of evidence

Level IV, systematic review of Level IV studies.",2013-12-30 +24013470,"Laparoscopic-assisted versus open total mesorectal excision with anal sphincter preservation for mid and low rectal cancer: a prospective, randomized trial.","

Background

This single-center, prospective, randomized trial was designed to compare the short-term clinical outcome between laparoscopic-assisted versus open total mesorectal excision (TME) with anal sphincter preservation (ASP) in patients with mid and low rectal cancer. Long-term morbidity and survival data also were recorded and compared between the two groups.

Methods

Between August 2001 and August 2007, 80 patients with mid and low rectal cancer were randomized to receive either laparoscopic-assisted (40 patients) or open (40 patients) TME with ASP. The median follow-up time for all patients was 75.7 (range 16.9-115.7) months for the laparoscopic-assisted group and 76.1 (range 4.7-126.6) months for the open group. The primary endpoint of the study was short-term clinical outcome. Secondary endpoints included long-term morbidity rate and survival. Data were analyzed by intention-to-treat principle.

Results

The demographic data of the two groups were comparable. Postoperative recovery was better after laparoscopic surgery, with less analgesic requirement (P < 0.001), earlier mobilization (P = 0.001), lower short-term morbidity rate (P = 0.043), and a trend towards shorter hospital stay (P = 0.071). The cumulative long-term morbidity rate also was lower in the laparoscopic-assisted group (P = 0.019). The oncologic clearance in terms of macroscopic quality of the TME specimen, circumferential resection margin involvement, and number of lymph nodes removed was similar between both groups. After curative resection, the probabilities of survival at 5 years of the laparoscopic-assisted and open groups were 85.9 and 91.3 %, respectively (P = 0.912). The respective probabilities of being disease-free were 83.3 and 74.5 % (P = 0.114).

Conclusions

Laparoscopic-assisted TME with ASP improves postoperative recovery, reduces short-term and long-term morbidity rates, and seemingly does not jeopardize survival compared with open surgery for mid and low rectal cancer ( http://ClinicalTrials.gov Identifier: NCT00485316).",2013-09-07 +25363655,Enhanced depth imaging-OCT of the choroid: a review of the current literature.,"

Background

With the advent of enhanced depth imaging optical coherence tomography (EDI-OCT), detailed visualisation of the choroid in vivo is now possible. Measurements of choroidal thickness (CT) have also enabled new directions in research to study normal and pathological processes within the choroid. The aim of the present study is to review the current literature on choroidal imaging using EDI-OCT.

Methods

Studies were identified by a systematic search using Medline ( http://www.ncbi.nlm.nih.gov/pubmed ). Papers were also identified based on the reference lists of relevant publications. Papers were included in the review if the focus of the study involved imaging of the choroid using EDI-OCT.

Results

Recent studies have demonstrated successful imaging of the choroid and high reproducibility of measurements of CT using EDI-OCT. There are much data confirming that abnormalities in choroidal structure and function contribute to major ocular diseases and patterns of CT variation may be observed in certain disease states and may be influenced by treatment. However, it is not clear whether these variations are a contributing factor or a consequence of the disease.

Conclusion

While more invasive methods such as indocyanine green (ICG) angiography remain the gold standard for detecting abnormalities of the choroidal vasculature in normal eyes and disease states, EDI-OCT has become an important adjunctive clinical tool in providing three-dimensional anatomical information of the choroid.",2014-11-04 +23620358,A novel web server predicts amino acid residue protection against hydrogen-deuterium exchange.,"

Motivation

To clarify the relationship between structural elements and polypeptide chain mobility, a set of statistical analyses of structures is necessary. Because at present proteins with determined spatial structures are much less numerous than those with amino acid sequence known, it is important to be able to predict the extent of proton protection from hydrogen-deuterium (HD) exchange basing solely on the protein primary structure.

Results

Here we present a novel web server aimed to predict the degree of amino acid residue protection against HD exchange solely from the primary structure of the protein chain under study. On the basis of the amino acid sequence, the presented server offers the following three possibilities (predictors) for user's choice. First, prediction of the number of contacts occurring in this protein, which is shown to be helpful in estimating the number of protons protected against HD exchange (sensitivity 0.71). Second, probability of H-bonding in this protein, which is useful for finding the number of unprotected protons (specificity 0.71). The last is the use of an artificial predictor. Also, we report on mass spectrometry analysis of HD exchange that has been first applied to free amino acids. Its results showed a good agreement with theoretical data (number of protons) for 10 globular proteins (correlation coefficient 0.73). We pioneered in compiling two datasets of experimental HD exchange data for 35 proteins.

Availability

The H-Protection server is available for users at http://bioinfo.protres.ru/ogp/

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-24 +26219103,Desert Dust Outbreaks in Southern Europe: Contribution to Daily PM₁₀ Concentrations and Short-Term Associations with Mortality and Hospital Admissions.,"

Background

Evidence on the association between short-term exposure to desert dust and health outcomes is controversial.

Objectives

We aimed to estimate the short-term effects of particulate matter ≤ 10 μm (PM10) on mortality and hospital admissions in 13 Southern European cities, distinguishing between PM10 originating from the desert and from other sources.

Methods

We identified desert dust advection days in multiple Mediterranean areas for 2001-2010 by combining modeling tools, back-trajectories, and satellite data. For each advection day, we estimated PM10 concentrations originating from desert, and computed PM10 from other sources by difference. We fitted city-specific Poisson regression models to estimate the association between PM from different sources (desert and non-desert) and daily mortality and emergency hospitalizations. Finally, we pooled city-specific results in a random-effects meta-analysis.

Results

On average, 15% of days were affected by desert dust at ground level (desert PM10 > 0 μg/m3). Most episodes occurred in spring-summer, with increasing gradient of both frequency and intensity north-south and west-east of the Mediterranean basin. We found significant associations of both PM10 concentrations with mortality. Increases of 10 μg/m3 in non-desert and desert PM10 (lag 0-1 days) were associated with increases in natural mortality of 0.55% (95% CI: 0.24, 0.87%) and 0.65% (95% CI: 0.24, 1.06%), respectively. Similar associations were estimated for cardio-respiratory mortality and hospital admissions.

Conclusions

PM10 originating from the desert was positively associated with mortality and hospitalizations in Southern Europe. Policy measures should aim at reducing population exposure to anthropogenic airborne particles even in areas with large contribution from desert dust advections.

Citation

Stafoggia M, Zauli-Sajani S, Pey J, Samoli E, Alessandrini E, Basagaña X, Cernigliaro A, Chiusolo M, Demaria M, Díaz J, Faustini A, Katsouyanni K, Kelessis AG, Linares C, Marchesi S, Medina S, Pandolfi P, Pérez N, Querol X, Randi G, Ranzi A, Tobias A, Forastiere F, MED-PARTICLES Study Group. 2016. Desert dust outbreaks in Southern Europe: contribution to daily PM10 concentrations and short-term associations with mortality and hospital admissions. Environ Health Perspect 124:413-419; http://dx.doi.org/10.1289/ehp.1409164.",2015-07-24 +25623781,A scoring model for phosphopeptide site localization and its impact on the question of whether to use MSA.,"The production of structurally significant product ions during the dissociation of phosphopeptides is a key to the successful determination of phosphorylation sites. These diagnostic ions can be generated using the widely adopted MS/MS approach, MS3 (Data Dependent Neutral Loss - DDNL), or by multistage activation (MSA). The main purpose of this work is to introduce a false-localization rate (FLR) probabilistic model to enable unbiased phosphoproteomics studies. Briefly, our algorithm infers a probabilistic function from the distribution of the identified phosphopeptides' XCorr Delta scores (XD-Scores) in the current experiment. Our module infers p-values by relying on Gaussian mixture models and a logistic function. We demonstrate the usefulness of our probabilistic model by revisiting the ""to MSA, or not to MSA"" dilemma. For this, we use human leukemia-derived cells (K562) as a study model and enriched for phosphopeptides using the hydroxyapatite (HAP) chromatography. The aliquots were analyzed with and without MSA on an Orbitrap-XL. Our XD-Scoring analysis revealed that the MS/MS approach provides more identifications because of its faster scan rate, but that for the same given scan rate higher-confidence spectra can be achieved with MSA. Our software is integrated into the PatternLab for proteomics freely available for academic community at http://www.patternlabforproteomics.org. Biological significance Assigning statistical confidence to phosphorylation sites is necessary for proper phosphoproteomic assessment. Here we present a rigorous statistical model, based on Gaussian mixture models and a logistic function, which overcomes shortcomings of previous tools. The algorithm described herein is made readily available to the scientific community by integrating it into the widely adopted PatternLab for proteomics. This article is part of a Special Issue entitled: Computational Proteomics.",2015-01-23 +24180377,Inferring species trees from incongruent multi-copy gene trees using the Robinson-Foulds distance.,"

Background

Constructing species trees from multi-copy gene trees remains a challenging problem in phylogenetics. One difficulty is that the underlying genes can be incongruent due to evolutionary processes such as gene duplication and loss, deep coalescence, or lateral gene transfer. Gene tree estimation errors may further exacerbate the difficulties of species tree estimation.

Results

We present a new approach for inferring species trees from incongruent multi-copy gene trees that is based on a generalization of the Robinson-Foulds (RF) distance measure to multi-labeled trees (mul-trees). We prove that it is NP-hard to compute the RF distance between two mul-trees; however, it is easy to calculate this distance between a mul-tree and a singly-labeled species tree. Motivated by this, we formulate the RF problem for mul-trees (MulRF) as follows: Given a collection of multi-copy gene trees, find a singly-labeled species tree that minimizes the total RF distance from the input mul-trees. We develop and implement a fast SPR-based heuristic algorithm for the NP-hard MulRF problem.We compare the performance of the MulRF method (available at http://genome.cs.iastate.edu/CBL/MulRF/) with several gene tree parsimony approaches using gene tree simulations that incorporate gene tree error, gene duplications and losses, and/or lateral transfer. The MulRF method produces more accurate species trees than gene tree parsimony approaches. We also demonstrate that the MulRF method infers in minutes a credible plant species tree from a collection of nearly 2,000 gene trees.

Conclusions

Our new phylogenetic inference method, based on a generalized RF distance, makes it possible to quickly estimate species trees from large genomic data sets. Since the MulRF method, unlike gene tree parsimony, is based on a generic tree distance measure, it is appealing for analyses of genomic data sets, in which many processes such as deep coalescence, recombination, gene duplication and losses as well as phylogenetic error may contribute to gene tree discord. In experiments, the MulRF method estimated species trees accurately and quickly, demonstrating MulRF as an efficient alternative approach for phylogenetic inference from large-scale genomic data sets.",2013-11-01 +24312116,The power of regional heritability analysis for rare and common variant detection: simulations and application to eye biometrical traits.,"Genome-wide association studies (GWAS) have provided valuable insights into the genetic basis of complex traits. However, they have explained relatively little trait heritability. Recently, we proposed a new analytical approach called regional heritability mapping (RHM) that captures more of the missing genetic variation. This method is applicable both to related and unrelated populations. Here, we demonstrate the power of RHM in comparison with single-SNP GWAS and gene-based association approaches under a wide range of scenarios with variable numbers of quantitative trait loci (QTL) with common and rare causal variants in a narrow genomic region. Simulations based on real genotype data were performed to assess power to capture QTL variance, and we demonstrate that RHM has greater power to detect rare variants and/or multiple alleles in a region than other approaches. In addition, we show that RHM can capture more accurately the QTL variance, when it is caused by multiple independent effects and/or rare variants. We applied RHM to analyze three biometrical eye traits for which single-SNP GWAS have been published or performed to evaluate the effectiveness of this method in real data analysis and detected some additional loci which were not detected by other GWAS methods. RHM has the potential to explain some of missing heritability by capturing variance caused by QTL with low MAF and multiple independent QTL in a region, not captured by other GWAS methods. RHM analyses can be implemented using the software REACTA (http://www.epcc.ed.ac.uk/projects-portfolio/reacta).",2013-11-19 +24843029,Head-to-tail interactions of the coiled-coil domains regulate ClpB activity and cooperation with Hsp70 in protein disaggregation.,"The hexameric AAA+ chaperone ClpB reactivates aggregated proteins in cooperation with the Hsp70 system. Essential for disaggregation, the ClpB middle domain (MD) is a coiled-coil propeller that binds Hsp70. Although the ClpB subunit structure is known, positioning of the MD in the hexamer and its mechanism of action are unclear. We obtained electron microscopy (EM) structures of the BAP variant of ClpB that binds the protease ClpP, clearly revealing MD density on the surface of the ClpB ring. Mutant analysis and asymmetric reconstructions show that MDs adopt diverse positions in a single ClpB hexamer. Adjacent, horizontally oriented MDs form head-to-tail contacts and repress ClpB activity by preventing Hsp70 interaction. Tilting of the MD breaks this contact, allowing Hsp70 binding, and releasing the contact in adjacent subunits. Our data suggest a wavelike activation of ClpB subunits around the ring.DOI: http://dx.doi.org/10.7554/eLife.02481.001.",2014-04-30 +23761448,HiTRACE-Web: an online tool for robust analysis of high-throughput capillary electrophoresis.,"To facilitate the analysis of large-scale high-throughput capillary electrophoresis data, we previously proposed a suite of efficient analysis software named HiTRACE (High Throughput Robust Analysis of Capillary Electrophoresis). HiTRACE has been used extensively for quantitating data from RNA and DNA structure mapping experiments, including mutate-and-map contact inference, chromatin footprinting, the Eterna RNA design project and other high-throughput applications. However, HiTRACE is based on a suite of command-line MATLAB scripts that requires nontrivial efforts to learn, use and extend. Here, we present HiTRACE-Web, an online version of HiTRACE that includes standard features previously available in the command-line version and additional features such as automated band annotation and flexible adjustment of annotations, all via a user-friendly environment. By making use of parallelization, the on-line workflow is also faster than software implementations available to most users on their local computers. Free access: http://hitrace.org.",2013-06-12 +22480257,VESPA: software to facilitate genomic annotation of prokaryotic organisms through integration of proteomic and transcriptomic data.,"

Background

The procedural aspects of genome sequencing and assembly have become relatively inexpensive, yet the full, accurate structural annotation of these genomes remains a challenge. Next-generation sequencing transcriptomics (RNA-Seq), global microarrays, and tandem mass spectrometry (MS/MS)-based proteomics have demonstrated immense value to genome curators as individual sources of information, however, integrating these data types to validate and improve structural annotation remains a major challenge. Current visual and statistical analytic tools are focused on a single data type, or existing software tools are retrofitted to analyze new data forms. We present Visual Exploration and Statistics to Promote Annotation (VESPA) is a new interactive visual analysis software tool focused on assisting scientists with the annotation of prokaryotic genomes though the integration of proteomics and transcriptomics data with current genome location coordinates.

Results

VESPA is a desktop Java™ application that integrates high-throughput proteomics data (peptide-centric) and transcriptomics (probe or RNA-Seq) data into a genomic context, all of which can be visualized at three levels of genomic resolution. Data is interrogated via searches linked to the genome visualizations to find regions with high likelihood of mis-annotation. Search results are linked to exports for further validation outside of VESPA or potential coding-regions can be analyzed concurrently with the software through interaction with BLAST. VESPA is demonstrated on two use cases (Yersinia pestis Pestoides F and Synechococcus sp. PCC 7002) to demonstrate the rapid manner in which mis-annotations can be found and explored in VESPA using either proteomics data alone, or in combination with transcriptomic data.

Conclusions

VESPA is an interactive visual analytics tool that integrates high-throughput data into a genomic context to facilitate the discovery of structural mis-annotations in prokaryotic genomes. Data is evaluated via visual analysis across multiple levels of genomic resolution, linked searches and interaction with existing bioinformatics tools. We highlight the novel functionality of VESPA and core programming requirements for visualization of these large heterogeneous datasets for a client-side application. The software is freely available at https://www.biopilot.org/docs/Software/Vespa.php.",2012-04-05 +23231464,A novel method to discover fluoroquinolone antibiotic resistance (qnr) genes in fragmented nucleotide sequences.,"

Background

Broad-spectrum fluoroquinolone antibiotics are central in modern health care and are used to treat and prevent a wide range of bacterial infections. The recently discovered qnr genes provide a mechanism of resistance with the potential to rapidly spread between bacteria using horizontal gene transfer. As for many antibiotic resistance genes present in pathogens today, qnr genes are hypothesized to originate from environmental bacteria. The vast amount of data generated by shotgun metagenomics can therefore be used to explore the diversity of qnr genes in more detail.

Results

In this paper we describe a new method to identify qnr genes in nucleotide sequence data. We show, using cross-validation, that the method has a high statistical power of correctly classifying sequences from novel classes of qnr genes, even for fragments as short as 100 nucleotides. Based on sequences from public repositories, the method was able to identify all previously reported plasmid-mediated qnr genes. In addition, several fragments from novel putative qnr genes were identified in metagenomes. The method was also able to annotate 39 chromosomal variants of which 11 have previously not been reported in literature.

Conclusions

The method described in this paper significantly improves the sensitivity and specificity of identification and annotation of qnr genes in nucleotide sequence data. The predicted novel putative qnr genes in the metagenomic data support the hypothesis of a large and uncharacterized diversity within this family of resistance genes in environmental bacterial communities. An implementation of the method is freely available at http://bioinformatics.math.chalmers.se/qnr/.",2012-12-11 +23172861,DOOSS: a tool for visual analysis of data overlaid on secondary structures.,

Motivation

DOOSS (Data Overlaid On Secondary Structures) is a tool for visualizing annotated secondary structures of large single-stranded nucleotide sequences (such as full-length virus genomes). The purpose of this tool is to assist investigators in evaluating the biological relevance of secondary structures within particular sequences.

Availability and implementation

DOOSS is written in Java and is available from: http://dooss.computingforbiology.org

Contact

michaelgolden0@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.,2012-11-21 +25708947,A linear time algorithm for detecting long genomic regions enriched with a specific combination of epigenetic states.,"

Background

Epigenetic modifications are essential for controlling gene expression. Recent studies have shown that not only single epigenetic modifications but also combinations of multiple epigenetic modifications play vital roles in gene regulation. A striking example is the long hypomethylated regions enriched with modified H3K27me3 (called, ""K27HMD"" regions), which are exposed to suppress the expression of key developmental genes relevant to cellular development and differentiation during embryonic stages in vertebrates. It is thus a biologically important issue to develop an effective optimization algorithm for detecting long DNA regions (e.g., >4 kbp in size) that harbor a specific combination of epigenetic modifications (e.g., K27HMD regions). However, to date, optimization algorithms for these purposes have received little attention, and available methods are still heuristic and ad hoc.

Results

In this paper, we propose a linear time algorithm for calculating a set of non-overlapping regions that maximizes the sum of similarities between the vector of focal epigenetic states and the vectors of raw epigenetic states at DNA positions in the set of regions. The average elapsed time to process the epigenetic data of any of human chromosomes was less than 2 seconds on an Intel Xeon CPU. To demonstrate the effectiveness of the algorithm, we estimated large K27HMD regions in the medaka and human genomes using our method, ChromHMM, and a heuristic method.

Conclusions

We confirmed that the advantages of our method over those of the two other methods. Our method is flexible enough to handle other types of epigenetic combinations. The program that implements the method is called ""CSMinfinder"" and is made available at: http://mlab.cb.k.u-tokyo.ac.jp/~ichikawa/Segmentation/",2015-01-21 +25660415,Presentation of a nationwide multicenter registry of intestinal failure and intestinal transplantation.,"

Background & aims

Exact data on Dutch patients with chronic intestinal failure (CIF) and after intestinal transplantation (ITx) have been lacking. To improve standard care of these patients, a nationwide collaboration has been established. Objectives of this study were obtaining an up-to-date prevalence of CIF and characterizing these patients using the specially developed multicenter web-based Dutch Registry of Intestinal Failure and Intestinal Transplantation (DRIFT).

Methods

Cross-sectional study. CIF was defined as type 3 intestinal failure in which >75% of nutritional requirements were given as home parenteral nutrition (HPN) for ≥ 4 weeks in children and >50% for ≥3 months in adults. All patients with CIF receiving HPN care by the three Dutch specialized centers on January 1, 2013 and all ITx patients were registered in DRIFT (https://drift.darmfalen.nl).

Results

In total, 195 patients with CIF (158 adults, 37 children) were identified, of whom 184 were registered in DRIFT. The Dutch point prevalence of CIF was 11.62 per million (12.24 for adults, 9.56 for children) on January 1, 2013. Fifty-seven patients (31%) had one or more indications for ITx, while 12 patients actually underwent ITx since its Dutch introduction. Four patients required transplantectomy of their intestinal graft and 3 intestinal transplant patients died.

Conclusion

The multicenter registry DRIFT revealed an up-to-date prevalence of CIF and provided nationwide insight into the patients with CIF during HPN and after ITx in the Netherlands. DRIFT will facilitate the multicenter monitoring of individual patients, thereby supporting multidisciplinary care and decision-making.",2015-01-21 +23981350,Inferring nucleosome positions with their histone mark annotation from ChIP data.,"

Motivation

The nucleosome is the basic repeating unit of chromatin. It contains two copies each of the four core histones H2A, H2B, H3 and H4 and about 147 bp of DNA. The residues of the histone proteins are subject to numerous post-translational modifications, such as methylation or acetylation. Chromatin immunoprecipitiation followed by sequencing (ChIP-seq) is a technique that provides genome-wide occupancy data of these modified histone proteins, and it requires appropriate computational methods.

Results

We present NucHunter, an algorithm that uses the data from ChIP-seq experiments directed against many histone modifications to infer positioned nucleosomes. NucHunter annotates each of these nucleosomes with the intensities of the histone modifications. We demonstrate that these annotations can be used to infer nucleosomal states with distinct correlations to underlying genomic features and chromatin-related processes, such as transcriptional start sites, enhancers, elongation by RNA polymerase II and chromatin-mediated repression. Thus, NucHunter is a versatile tool that can be used to predict positioned nucleosomes from a panel of histone modification ChIP-seq experiments and infer distinct histone modification patterns associated to different chromatin states.

Availability

The software is available at http://epigen.molgen.mpg.de/nuchunter/.",2013-08-26 +25304777,Tabhu: tools for antibody humanization.,"

Summary

Antibodies are rapidly becoming essential tools in the clinical practice, given their ability to recognize their cognate antigens with high specificity and affinity, and a high yield at reasonable costs in model animals. Unfortunately, when administered to human patients, xenogeneic antibodies can elicit unwanted and dangerous immunogenic responses. Antibody humanization methods are designed to produce molecules with a better safety profile still maintaining their ability to bind the antigen. This can be accomplished by grafting the non-human regions determining the antigen specificity into a suitable human template. Unfortunately, this procedure may results in a partial or complete loss of affinity of the grafted molecule that can be restored by back-mutating some of the residues of human origin to the corresponding murine ones. This trial-and-error procedure is hard and involves expensive and time-consuming experiments. Here we present tools for antibody humanization (Tabhu) a web server for antibody humanization. Tabhu includes tools for human template selection, grafting, back-mutation evaluation, antibody modelling and structural analysis, helping the user in all the critical steps of the humanization experiment protocol.

Availability

http://www.biocomputing.it/tabhu

Contact

anna.tramontano@uniroma1.it, pierpaolo.olimpieri@uniroma1.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-09 +25862716,Incremental value of left atrial structural and functional characteristics for prediction of atrial fibrillation in patients receiving cardiac pacing. ,"Better prediction of cardiac pacing patients at risk of atrial fibrillation (AF) would enable more effective prophylaxis. We sought whether left atrial (LA) electromechanical conduction time (EMT) and myocardial mechanics were associated with incident AF in patients undergoing dual chamber pacemaker implantation, independent of left atrial volume (LAV). Clinical data were obtained prospectively in 146 enrollees (73±10 years) undergoing dual chamber pacemaker implantation in the Protect-Pace study. Echocardiograms and 2-dimensional strain analysis were obtained post implantation and at 2 years. Complete ascertainment of AF during follow-up was identified from interrogation of permanent pacemakers. Cox regression was used to identify correlates of AF. Incident AF (n=29, 20%) was associated with higher systolic blood pressure (P=0.01), lower left ventricular ejection fraction (P=0.03), lower LA strain at atrial contraction (LASac; P<0.001), higher LAV (P<0.003), and longer septal electromechanical conduction time (P<0.01). The associations of LAV and LASac with incident AF were independent of age, sex, systolic blood pressure, and left ventricular size and function. However, the combination of the 3 strongest predictors showed LASac (P=0.02) and systolic blood pressure (P=0.01) were independently associated with incident AF, but LAV was not (P=0.07). Using the optimal cut points from receiver operator characteristic curves (62 mL for LAV and 8.6% for LASac), we demonstrated that a significantly greater rate of AF was associated with both lower LASac at higher LAV and with lower LASac at lower LAV. The risk of AF in patients receiving dual chamber pacing is independently associated with LA size and function, not left ventricular structural and functional characteristics or right ventricular lead location. URL: http://www.clinicaltrials.gov. Unique identifier: NCT00461734.",2015-04-01 +23811095,LNETWORK: an efficient and effective method for constructing phylogenetic networks.,"

Motivation

The evolutionary history of species is traditionally represented with a rooted phylogenetic tree. Each tree comprises a set of clusters, i.e. subsets of the species that are descended from a common ancestor. When rooted phylogenetic trees are built from several different datasets (e.g. from different genes), the clusters are often conflicting. These conflicting clusters cannot be expressed as a simple phylogenetic tree; however, they can be expressed in a phylogenetic network. Phylogenetic networks are a generalization of phylogenetic trees that can account for processes such as hybridization, horizontal gene transfer and recombination, which are difficult to represent in standard tree-like models of evolutionary histories. There is currently a large body of research aimed at developing appropriate methods for constructing phylogenetic networks from cluster sets. The Cass algorithm can construct a much simpler network than other available methods, but is extremely slow for large datasets or for datasets that need lots of reticulate nodes. The networks constructed by Cass are also greatly dependent on the order of input data, i.e. it generally derives different phylogenetic networks for the same dataset when different input orders are used.

Results

In this study, we introduce an improved Cass algorithm, Lnetwork, which can construct a phylogenetic network for a given set of clusters. We show that Lnetwork is significantly faster than Cass and effectively weakens the influence of input data order. Moreover, we show that Lnetwork can construct a much simpler network than most of the other available methods.

Availability

Lnetwork has been built as a Java software package and is freely available at http://nclab.hit.edu.cn/∼wangjuan/Lnetwork/.

Contact

maozuguo@hit.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-06-29 +24253915,MEGANTE: a web-based system for integrated plant genome annotation.,"The recent advancement of high-throughput genome sequencing technologies has resulted in a considerable increase in demands for large-scale genome annotation. While annotation is a crucial step for downstream data analyses and experimental studies, this process requires substantial expertise and knowledge of bioinformatics. Here we present MEGANTE, a web-based annotation system that makes plant genome annotation easy for researchers unfamiliar with bioinformatics. Without any complicated configuration, users can perform genomic sequence annotations simply by uploading a sequence and selecting the species to query. MEGANTE automatically runs several analysis programs and integrates the results to select the appropriate consensus exon-intron structures and to predict open reading frames (ORFs) at each locus. Functional annotation, including a similarity search against known proteins and a functional domain search, are also performed for the predicted ORFs. The resultant annotation information is visualized with a widely used genome browser, GBrowse. For ease of analysis, the results can be downloaded in Microsoft Excel format. All of the query sequences and annotation results are stored on the server side so that users can access their own data from virtually anywhere on the web. The current release of MEGANTE targets 24 plant species from the Brassicaceae, Fabaceae, Musaceae, Poaceae, Salicaceae, Solanaceae, Rosaceae and Vitaceae families, and it allows users to submit a sequence up to 10 Mb in length and to save up to 100 sequences with the annotation information on the server. The MEGANTE web service is available at https://megante.dna.affrc.go.jp/.",2013-11-18 +25138169,Pepper: cytoscape app for protein complex expansion using protein-protein interaction networks.,"

Unlabelled

We introduce Pepper (Protein complex Expansion using Protein-Protein intERactions), a Cytoscape app designed to identify protein complexes as densely connected subnetworks from seed lists of proteins derived from proteomic studies. Pepper identifies connected subgraph by using multi-objective optimization involving two functions: (i) the coverage, a solution must contain as many proteins from the seed as possible, (ii) the density, the proteins of a solution must be as connected as possible, using only interactions from a proteome-wide interaction network. Comparisons based on gold standard yeast and human datasets showed Pepper's integrative approach as superior to standard protein complex discovery methods. The visualization and interpretation of the results are facilitated by an automated post-processing pipeline based on topological analysis and data integration about the predicted complex proteins. Pepper is a user-friendly tool that can be used to analyse any list of proteins.

Availability

Pepper is available from the Cytoscape plug-in manager or online (http://apps.cytoscape.org/apps/pepper) and released under GNU General Public License version 3.",2014-08-18 +23208516,Structure verification through computer-assisted spectral assignment of NMR spectra.,"The validation of a molecular organic structure on the basis of 1D and 2D HSQC, COSY and HMBC NMR spectra is proposed as an alternative to the methods that are mainly based on chemical shift prediction. The CCASA software was written for this purpose. It provides an updated and improved implementation of the preceding computer-assisted spectral assignment software. CCASA can be downloaded freely from http://www.univ-reims.fr/LSD/JmnSoft/CASA. Two bioactive natural products, a triterpene and a benzophenone, were selected from literature data as examples. The tentative matching between the structure and the NMR data interpretation of the triterpene unexpectedly leads to the hypothesis of an incorrect structure. The LSD software was used to find an alternative structure that improved the 2D NMR data interpretation and the carbon-13 chemical shift matching between experimental values and those produced by the nmrshiftdb2 prediction tool. The benzophenone example showed that signal assignment by means of chemical shift prediction can be replaced by elementary user-supplied chemical shift and multiplicity constraints.",2012-12-04 +23595697,Utilization of genomic signatures to identify high-efficacy candidate drugs for chemorefractory endometrial cancers.,"Endometrial cancer, one of the most common gynecologic malignancies, is increasing in Japan, nearly doubling over the last decade. High-grade disease patients are often resistant to conventional chemotherapy with platinum agents; therefore, discovery of efficacious new drugs in this setting is required to benefit chemorefractory cases. The 50% growth-inhibitory (GI50) concentration of 27 clinically relevant drugs was measured in the NCI60 panel of cell lines. Gene expression data were analyzed using Bayesian binary regression, to first generate a response signature for each drug and then to calculate individual susceptibility scores using in vivo endometrial cancer data (GSE2109; http://www.ncbi.nlm.nih.gov/geo) and in vitro data (GSE25458), as well as to identify candidate drugs for chemorefractory cases. Using these candidates, cell proliferation, apoptosis and caspase assays were performed in vitro. The tumor growth-inhibitory effect of the candidate was also assessed in vivo using nude mice. Through microarray analysis, fludarabine and temsirolimus showed higher susceptibility scores in high-grade cases compared to cisplatin, doxorubicin and paclitaxel. Fludarabine significantly inhibited cell proliferation and increased apoptosis in the cisplatin-resistant endometrial cancer cell line, HEC1A, relative to HEC50B (p < 0.001). Fludarabine treatment also enhanced caspase-3/7 activity in HEC1A relative to HEC50B cells (p < 0.001), and inhibited the growth of HEC1A xenograft tumors relative to cisplatin (p < 0.05). These results support that identification and use of genomic signatures can lead to identification of new therapeutic candidates that may prove beneficial to chemoresistant cases. Fludarabine may be useful in targeting high-grade, chemorefractory endometrial cancer.",2013-05-25 +25621171,VizBin - an application for reference-independent visualization and human-augmented binning of metagenomic data.,"

Background

Metagenomics is limited in its ability to link distinct microbial populations to genetic potential due to a current lack of representative isolate genome sequences. Reference-independent approaches, which exploit for example inherent genomic signatures for the clustering of metagenomic fragments (binning), offer the prospect to resolve and reconstruct population-level genomic complements without the need for prior knowledge.

Results

We present VizBin, a Java™-based application which offers efficient and intuitive reference-independent visualization of metagenomic datasets from single samples for subsequent human-in-the-loop inspection and binning. The method is based on nonlinear dimension reduction of genomic signatures and exploits the superior pattern recognition capabilities of the human eye-brain system for cluster identification and delineation. We demonstrate the general applicability of VizBin for the analysis of metagenomic sequence data by presenting results from two cellulolytic microbial communities and one human-borne microbial consortium. The superior performance of our application compared to other analogous metagenomic visualization and binning methods is also presented.

Conclusions

VizBin can be applied de novo for the visualization and subsequent binning of metagenomic datasets from single samples, and it can be used for the post hoc inspection and refinement of automatically generated bins. Due to its computational efficiency, it can be run on common desktop machines and enables the analysis of complex metagenomic datasets in a matter of minutes. The software implementation is available at https://claczny.github.io/VizBin under the BSD License (four-clause) and runs under Microsoft Windows™, Apple Mac OS X™ (10.7 to 10.10), and Linux.",2015-01-20 +24764463,GenCLiP 2.0: a web server for functional clustering of genes and construction of molecular networks based on free terms.,"

Unlabelled

Identifying biological functions and molecular networks in a gene list and how the genes may relate to various topics is of considerable value to biomedical researchers. Here, we present a web-based text-mining server, GenCLiP 2.0, which can analyze human genes with enriched keywords and molecular interactions. Compared with other similar tools, GenCLiP 2.0 offers two unique features: (i) analysis of gene functions with free terms (i.e. any terms in the literature) generated by literature mining or provided by the user and (ii) accurate identification and integration of comprehensive molecular interactions from Medline abstracts, to construct molecular networks and subnetworks related to the free terms.

Availability and implementation

http://ci.smu.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-23 +25495332,SPoRE: a mathematical model to predict double strand breaks and axis protein sites in meiosis.,"

Background

Meiotic recombination between homologous chromosomes provides natural combinations of genetic variations and is a main driving force of evolution. It is initiated via programmed DNA double-strand breaks (DSB) and involves a specific axial chromosomal structure. So far, recombination regions have been mainly determined by experiments, both expensive and time-consuming.

Results

SPoRE is a mathematical model that describes the non-uniform localisation of DSB and axis proteins sites, and distinguishes high versus low protein density. It is based on a combination of genomic signals, based on what is known from wet-lab experiments, whose contribution is precisely quantified. It models axis proteins accumulation at gene 5'-ends with a discrete approximation of their diffusion and convection along genes. It models DSB accumulation at approximated gene promoter positions with intergenic region length and GC-content. SPoRE can be used for prediction and it is parameterised in an obvious way that makes it easy to understand from a biological viewpoint.

Conclusions

When compared to Saccharomyces cerevisiae experimental data, SPoRE predicts axis protein and DSB positions with high sensitivity and precision, axis protein density with an average local correlation r = 0.63 and DSB density with an average local correlation r = 0.62. SPoRE outbreaks previous DSB predictors, which are based on nucleotide patterning, and it reaches 85% of success rate in DSB prediction compared to 54% obtained by available tools on a benchmarked dataset.SPoRE is available at the address http://www.lcqb.upmc.fr/SPoRE/.",2014-12-11 +23825367,CellMix: a comprehensive toolbox for gene expression deconvolution.,"

Unlabelled

Gene expression data are typically generated from heterogeneous biological samples that are composed of multiple cell or tissue types, in varying proportions, each contributing to global gene expression. This heterogeneity is a major confounder in standard analysis such as differential expression analysis, where differences in the relative proportions of the constituent cells may prevent or bias the detection of cell-specific differences. Computational deconvolution of global gene expression is an appealing alternative to costly physical sample separation techniques and enables a more detailed analysis of the underlying biological processes at the cell-type level. To facilitate and popularize the application of such methods, we developed CellMix, an R package that incorporates most state-of-the-art deconvolution methods, into an intuitive and extendible framework, providing a single entry point to explore, assess and disentangle gene expression data from heterogeneous samples.

Availability and implementation

The CellMix package builds on R/BioConductor and is available from http://web.cbio.uct.ac.za/∼renaud/CRAN/web/CellMix. It is currently being submitted to BioConductor. The package's vignettes notably contain additional information, examples and references.",2013-07-03 +23922726,Optimizing information in Next-Generation-Sequencing (NGS) reads for improving de novo genome assembly.,"Next-Generation-Sequencing is advantageous because of its much higher data throughput and much lower cost compared with the traditional Sanger method. However, NGS reads are shorter than Sanger reads, making de novo genome assembly very challenging. Because genome assembly is essential for all downstream biological studies, great efforts have been made to enhance the completeness of genome assembly, which requires the presence of long reads or long distance information. To improve de novo genome assembly, we develop a computational program, ARF-PE, to increase the length of Illumina reads. ARF-PE takes as input Illumina paired-end (PE) reads and recovers the original DNA fragments from which two ends the paired reads are obtained. On the PE data of four bacteria, ARF-PE recovered >87% of the DNA fragments and achieved >98% of perfect DNA fragment recovery. Using Velvet, SOAPdenovo, Newbler, and CABOG, we evaluated the benefits of recovered DNA fragments to genome assembly. For all four bacteria, the recovered DNA fragments increased the assembly contiguity. For example, the N50 lengths of the P. brasiliensis contigs assembled by SOAPdenovo and Newbler increased from 80,524 bp to 166,573 bp and from 80,655 bp to 193,388 bp, respectively. ARF-PE also increased assembly accuracy in many cases. On the PE data of two fungi and a human chromosome, ARF-PE doubled and tripled the N50 length. However, the assembly accuracies dropped, but still remained >91%. In general, ARF-PE can increase both assembly contiguity and accuracy for bacterial genomes. For complex eukaryotic genomes, ARF-PE is promising because it raises assembly contiguity. But future error correction is needed for ARF-PE to also increase the assembly accuracy. ARF-PE is freely available at http://140.116.235.124/~tliu/arf-pe/.",2013-07-29 +22736877,"500,000 fish phenotypes: The new informatics landscape for evolutionary and developmental biology of the vertebrate skeleton.","The rich phenotypic diversity that characterizes the vertebrate skeleton results from evolutionary changes in regulation of genes that drive development. Although relatively little is known about the genes that underlie the skeletal variation among fish species, significant knowledge of genetics and development is available for zebrafish. Because developmental processes are highly conserved, this knowledge can be leveraged for understanding the evolution of skeletal diversity. We developed the Phenoscape Knowledgebase (KB; http://kb.phenoscape.org) to yield testable hypotheses of candidate genes involved in skeletal evolution. We developed a community anatomy ontology for fishes and ontology-based methods to represent complex free-text character descriptions of species in a computable format. With these tools, we populated the KB with comparative morphological data from the literature on over 2,500 teleost fishes (mainly Ostariophysi) resulting in over 500,000 taxon phenotype annotations. The KB integrates these data with similarly structured phenotype data from zebrafish genes (http://zfin.org). Using ontology-based reasoning, candidate genes can be inferred for the phenotypes that vary across taxa, thereby uniting genetic and phenotypic data to formulate evo-devo hypotheses. The morphological data in the KB can be browsed, sorted, and aggregated in ways that provide unprecedented possibilities for data mining and discovery.",2012-05-21 +24098326,Identifying cancer specific functionally relevant miRNAs from gene expression and miRNA-to-gene networks using regularized regression.,"Identifying microRNA signatures for the different types and subtypes of cancer can result in improved detection, characterization and understanding of cancer and move us towards more personalized treatment strategies. However, using microRNA's differential expression (tumour versus normal) to determine these signatures may lead to inaccurate predictions and low interpretability because of the noisy nature of miRNA expression data. We present a method for the selection of biologically active microRNAs using gene expression data and microRNA-to-gene interaction network. Our method is based on a linear regression with an elastic net regularization. Our simulations show that, with our method, the active miRNAs can be detected with high accuracy and our approach is robust to high levels of noise and missing information. Furthermore, our results on real datasets for glioblastoma and prostate cancer are confirmed by microRNA expression measurements. Our method leads to the selection of potentially functionally important microRNAs. The associations of some of our identified miRNAs with cancer mechanisms are already confirmed in other studies (hypoxia related hsa-mir-210 and apoptosis-related hsa-mir-296-5p). We have also identified additional miRNAs that were not previously studied in the context of cancer but are coherently predicted as active by our method and may warrant further investigation. The code is available in Matlab and R and can be downloaded on http://www.cs.toronto.edu/goldenberg/Anna_Goldenberg/Current_Research.html.",2013-10-02 +25252785,CASPER: context-aware scheme for paired-end reads from high-throughput amplicon sequencing.,"Merging the forward and reverse reads from paired-end sequencing is a critical task that can significantly improve the performance of downstream tasks, such as genome assembly and mapping, by providing them with virtually elongated reads. However, due to the inherent limitations of most paired-end sequencers, the chance of observing erroneous bases grows rapidly as the end of a read is approached, which becomes a critical hurdle for accurately merging paired-end reads. Although there exist several sophisticated approaches to this problem, their performance in terms of quality of merging often remains unsatisfactory. To address this issue, here we present a context-aware scheme for paired-end reads (CASPER): a computational method to rapidly and robustly merge overlapping paired-end reads. Being particularly well suited to amplicon sequencing applications, CASPER is thoroughly tested with both simulated and real high-throughput amplicon sequencing data. According to our experimental results, CASPER significantly outperforms existing state-of-the art paired-end merging tools in terms of accuracy and robustness. CASPER also exploits the parallelism in the task of paired-end merging and effectively speeds up by multithreading. CASPER is freely available for academic use at http://best.snu.ac.kr/casper.",2014-09-10 +26006758,"The development of a Simplified, Effective, Labour Monitoring-to-Action (SELMA) tool for Better Outcomes in Labour Difficulty (BOLD): study protocol.","

Background

The partograph is currently the main tool available to support decision-making of health professionals during labour. However, the rate of appropriate use of the partograph is disappointingly low. Apart from limitations that are associated with partograph use, evidence of positive impact on labour-related health outcomes is lacking. The main goal of this study is to develop a Simplified, Effective, Labour Monitoring-to-Action (SELMA) tool. The primary objectives are: to identify the essential elements of intrapartum monitoring that trigger the decision to use interventions aimed at preventing poor labour outcomes; to develop a simplified, monitoring-to-action algorithm for labour management; and to compare the diagnostic performance of SELMA and partograph algorithms as tools to identify women who are likely to develop poor labour-related outcomes.

Methods/design

A prospective cohort study will be conducted in eight health facilities in Nigeria and Uganda (four facilities from each country). All women admitted for vaginal birth will comprise the study population (estimated sample size: 7,812 women). Data will be collected on maternal characteristics on admission, labour events and pregnancy outcomes by trained research assistants at the participating health facilities. Prediction models will be developed to identify women at risk of intrapartum-related perinatal death or morbidity (primary outcomes) throughout the course of labour. These predictions models will be used to assemble a decision-support tool that will be able to suggest the best course of action to avert adverse outcomes during the course of labour. To develop this set of prediction models, we will use up-to-date techniques of prognostic research, including identification of important predictors, assigning of relative weights to each predictor, estimation of the predictive performance of the model through calibration and discrimination, and determination of its potential for application using internal validation techniques.

Discussion

This research offers an opportunity to revisit the theoretical basis of the partograph. It is envisioned that the final product would help providers overcome the challenging tasks of promptly interpreting complex labour information and deriving appropriate clinical actions, and thus increase efficiency of the care process, enhance providers' competence and ultimately improve labour outcomes. Please see related articles ' http://dx.doi.org/10.1186/s12978-015-0027-6 ' and ' http://dx.doi.org/10.1186/s12978-015-0028-5 '.",2015-05-26 +24683370,Efficient Parameter Estimation of Generalizable Coarse-Grained Protein Force Fields Using Contrastive Divergence: A Maximum Likelihood Approach.,"Maximum Likelihood (ML) optimization schemes are widely used for parameter inference. They maximize the likelihood of some experimentally observed data, with respect to the model parameters iteratively, following the gradient of the logarithm of the likelihood. Here, we employ a ML inference scheme to infer a generalizable, physics-based coarse-grained protein model (which includes Go̅-like biasing terms to stabilize secondary structure elements in room-temperature simulations), using native conformations of a training set of proteins as the observed data. Contrastive divergence, a novel statistical machine learning technique, is used to efficiently approximate the direction of the gradient ascent, which enables the use of a large training set of proteins. Unlike previous work, the generalizability of the protein model allows the folding of peptides and a protein (protein G) which are not part of the training set. We compare the same force field with different van der Waals (vdW) potential forms: a hard cutoff model, and a Lennard-Jones (LJ) potential with vdW parameters inferred or adopted from the CHARMM or AMBER force fields. Simulations of peptides and protein G show that the LJ model with inferred parameters outperforms the hard cutoff potential, which is consistent with previous observations. Simulations using the LJ potential with inferred vdW parameters also outperforms the protein models with adopted vdW parameter values, demonstrating that model parameters generally cannot be used with force fields with different energy functions. The software is available at https://sites.google.com/site/crankite/.",2013-11-15 +24243847,Phylo SI: a new genome-wide approach for prokaryotic phylogeny.,"The evolutionary history of all life forms is usually represented as a vertical tree-like process. In prokaryotes, however, the vertical signal is partly obscured by the massive influence of horizontal gene transfer (HGT). The HGT creates widespread discordance between evolutionary histories of different genes as genomes become mosaics of gene histories. Thus, the Tree of Life (TOL) has been questioned as an appropriate representation of the evolution of prokaryotes. Nevertheless a common hypothesis is that prokaryotic evolution is primarily tree-like, and a routine effort is made to place new isolates in their appropriate location in the TOL. Moreover, it appears desirable to exploit non-tree-like evolutionary processes for the task of microbial classification. In this work, we present a novel technique that builds on the straightforward observation that gene order conservation ('synteny') decreases in time as a result of gene mobility. This is particularly true in prokaryotes, mainly due to HGT. Using a 'synteny index' (SI) that measures the average synteny between a pair of genomes, we developed the phylogenetic reconstruction tool 'Phylo SI'. Phylo SI offers several attractive properties such as easy bootstrapping, high sensitivity in cases where phylogenetic signal is weak and computational efficiency. Phylo SI was tested both on simulated data and on two bacterial data sets and compared with two well-established phylogenetic methods. Phylo SI is particularly efficient on short evolutionary distances where synteny footprints remain detectable, whereas the nucleotide substitution signal is too weak for reliable sequence-based phylogenetic reconstruction. The method is publicly available at http://research.haifa.ac.il/ssagi/software/PhyloSI.zip.",2013-11-15 +24475057,HTSstation: a web application and open-access libraries for high-throughput sequencing data analysis.,"The HTSstation analysis portal is a suite of simple web forms coupled to modular analysis pipelines for various applications of High-Throughput Sequencing including ChIP-seq, RNA-seq, 4C-seq and re-sequencing. HTSstation offers biologists the possibility to rapidly investigate their HTS data using an intuitive web application with heuristically pre-defined parameters. A number of open-source software components have been implemented and can be used to build, configure and run HTS analysis pipelines reactively. Besides, our programming framework empowers developers with the possibility to design their own workflows and integrate additional third-party software. The HTSstation web application is accessible at http://htsstation.epfl.ch.",2014-01-27 +24810850,"mrsFAST-Ultra: a compact, SNP-aware mapper for high performance sequencing applications.","High throughput sequencing (HTS) platforms generate unprecedented amounts of data that introduce challenges for processing and downstream analysis. While tools that report the 'best' mapping location of each read provide a fast way to process HTS data, they are not suitable for many types of downstream analysis such as structural variation detection, where it is important to report multiple mapping loci for each read. For this purpose we introduce mrsFAST-Ultra, a fast, cache oblivious, SNP-aware aligner that can handle the multi-mapping of HTS reads very efficiently. mrsFAST-Ultra improves mrsFAST, our first cache oblivious read aligner capable of handling multi-mapping reads, through new and compact index structures that reduce not only the overall memory usage but also the number of CPU operations per alignment. In fact the size of the index generated by mrsFAST-Ultra is 10 times smaller than that of mrsFAST. As importantly, mrsFAST-Ultra introduces new features such as being able to (i) obtain the best mapping loci for each read, and (ii) return all reads that have at most n mapping loci (within an error threshold), together with these loci, for any user specified n. Furthermore, mrsFAST-Ultra is SNP-aware, i.e. it can map reads to reference genome while discounting the mismatches that occur at common SNP locations provided by db-SNP; this significantly increases the number of reads that can be mapped to the reference genome. Notice that all of the above features are implemented within the index structure and are not simple post-processing steps and thus are performed highly efficiently. Finally, mrsFAST-Ultra utilizes multiple available cores and processors and can be tuned for various memory settings. Our results show that mrsFAST-Ultra is roughly five times faster than its predecessor mrsFAST. In comparison to newly enhanced popular tools such as Bowtie2, it is more sensitive (it can report 10 times or more mappings per read) and much faster (six times or more) in the multi-mapping mode. Furthermore, mrsFAST-Ultra has an index size of 2GB for the entire human reference genome, which is roughly half of that of Bowtie2. mrsFAST-Ultra is open source and it can be accessed at http://mrsfast.sourceforge.net.",2014-05-08 +24048357,miREval 2.0: a web tool for simple microRNA prediction in genome sequences.,"

Result

We have developed miREval 2.0, an online tool that can simultaneously search up to 100 sequences for novel microRNAs (miRNAs) in multiple organisms. miREval 2.0 uses multiple published in silico approaches to detect miRNAs in sequences of interest. This tool can be used to discover miRNAs from DNA sequences or to validate candidates from sequencing data.

Availability

http://mimirna.centenary.org.au/mireval/.",2013-09-18 +21688258,Neurodevelopmental MRI brain templates for children from 2 weeks to 4 years of age.,"Spatial normalization and segmentation of pediatric brain magnetic resonance images (MRI) data with adult templates may impose biases and limitations in pediatric neuroimaging work. To remedy this issue, we created a single database made up of a series of pediatric, age-specific MRI average brain templates. These average, age-specific templates were constructed from brain scans of individual children obtained from two sources: (1) the NIH MRI Study of Normal Brain Development and (2) MRIs from University of South Carolina's McCausland Brain Imaging Center. Participants included young children enrolled at ages ranging from 8 days through 4.3 years of age. A total of 13 age group cohorts spanning the developmental progression from birth through 4.3 years of age were used to construct age-specific MRI brain templates (2 weeks, 3, 4.5, 6, 7.5, 9, 12, 15, 18 months, 2, 2.5, 3, 4 years). Widely used processing programs (FSL, SPM, and ANTS) extracted the brain and constructed average templates separately for 1.5T and 3T MRI volumes. The resulting age-specific, average templates showed clear changes in head and brain size across ages and between males and females, as well as changes in regional brain structural characteristics (e.g., myelin development). This average brain template database is available via our website (http://jerlab.psych.sc.edu/neurodevelopmentalmridatabase) for use by other researchers. Use of these age-specific, average pediatric brain templates by the research community will enhance our ability to gain a clearer understanding of the early postnatal development of the human brain in health and in disease.",2011-06-17 +25819136,An audit of best evidence topic reviews in the International Journal of Surgery.,"

Introduction

IJS launched best evidence topic reviews (BETs) in 2011, when the guidelines for conducting and reporting these reviews were published in the journal.

Aims

(1) Audit the adherence of all published BETs in IJS to these guidelines. (2) Assess the reach and impact of BETs published in IJS.

Methods

BETs published between 2011 and February 2014 were identified from http://www.journal-surgery.net/. Standards audited included: completeness of description of study attrition, and independent verification of searches. Other extracted data included: relevant subspecialty, duration between searches and publication, and between acceptance and publication. Each BET's number of citations (http://scholar.google.co.uk/), number of tweets (http://www.altmetric.com/) and number of Researchgate views (https://www.researchgate.net/) were recorded.

Results

Thirty-four BETs were identified: the majority, 19 (56%), relating to upper gastrointestinal surgery and none to cardiothoracic, orthopaedic or paediatric surgery. Twenty-nine BETs (82%) fully described study attrition. Twenty-one (62%) had independently verified search results. The mean times from literature searching to publication and acceptance to publication were 38.5 weeks and 13 days respectively. There were a mean 40 (range 0-89) Researchgate views/article, mean 2 (range 0-7) citations/article and mean 0.36 (range 0-2) tweets/article.

Conclusions

Adherence to BET guidelines has been variable. Authors are encouraged to adhere to journal guidelines and reviewers and editors to enforce them. BETs have received similar citation levels to other IJS articles. Means of increasing the visibility of published BETs such as social media sharing, conference presentation and deposition of abstracts in public repositories should be explored. More work is required to encourage more submissions from other surgical subspecialties other than gastrointestinal specialties.",2015-03-25 +21438606,"Anisotropic solvent model of the lipid bilayer. 2. Energetics of insertion of small molecules, peptides, and proteins in membranes.","A new computational approach to calculating binding energies and spatial positions of small molecules, peptides, and proteins in the lipid bilayer has been developed. The method combines an anisotropic solvent representation of the lipid bilayer and universal solvation model, which predicts transfer energies of molecules from water to an arbitrary medium with defined polarity properties. The universal solvation model accounts for hydrophobic, van der Waals, hydrogen-bonding, and electrostatic solute-solvent interactions. The lipid bilayer is represented as a fluid anisotropic environment described by profiles of dielectric constant (ε), solvatochromic dipolarity parameter (π*), and hydrogen bonding acidity and basicity parameters (α and β). The polarity profiles were calculated using published distributions of quasi-molecular segments of lipids determined by neutron and X-ray scattering for DOPC bilayer and spin-labeling data that define concentration of water in the lipid acyl chain region. The model also accounts for the preferential solvation of charges and polar groups by water and includes the effect of the hydrophobic mismatch for transmembrane proteins. The method was tested on calculations of binding energies and preferential positions in membranes for small-molecules, peptides and peripheral membrane proteins that have been experimentally studied. The new theoretical approach was implemented in a new version (2.0) of our PPM program and applied for the large-scale calculations of spatial positions in membranes of more than 1000 peripheral and integral proteins. The results of calculations are deposited in the updated OPM database ( http://opm.phar.umich.edu ).",2011-03-25 +24352679,Constructing and characterizing a bioactive small molecule and microRNA association network for Alzheimer's disease.,"Alzheimer's disease (AD) is an incurable neurodegenerative disorder. Much effort has been devoted to developing effective therapeutic agents. Recently, targeting microRNAs (miRNAs) with small molecules has become a novel therapy for human diseases. In this study, we present a systematic computational approach to construct a bioactive Small molecule and miRNA association Network in AD (SmiRN-AD), which is based on the gene expression signatures of bioactive small molecule perturbation and AD-related miRNA regulation. We also performed topological and functional analysis of the SmiRN-AD from multiple perspectives. At the significance level of p ≤ 0.01, 496 small molecule-miRNA associations, including 25 AD-related miRNAs and 275 small molecules, were recognized and used to construct the SmiRN-AD. The drugs that were connected with the same miRNA tended to share common drug targets (p = 1.72 × 10(-4)) and belong to the same therapeutic category (p = 4.22 × 10(-8)). The miRNAs that were linked to the same small molecule regulated more common miRNA targets (p = 6.07 × 10(-3)). Further analysis of the positive connections (quinostatin and miR-148b, amantadine and miR-15a) and the negative connections (melatonin and miR-30e-5p) indicated that our large-scale predictions afforded specific biological insights into AD pathogenesis and therapy. This study proposes a holistic strategy for deciphering the associations between small molecules and miRNAs in AD, which may be helpful for developing a novel effective miRNA-associated therapeutic strategy for AD. A comprehensive database for the SmiRN-AD and the differential expression patterns of the miRNA targets in AD is freely available at http://bioinfo.hrbmu.edu.cn/SmiRN-AD/.",2013-12-18 +21865298,Use of array CGH to detect exonic copy number variants throughout the genome in autism families detects a novel deletion in TMLHE.,"Autism is a neurodevelopmental disorder with increasing evidence of heterogeneous genetic etiology including de novo and inherited copy number variants (CNVs). We performed array comparative genomic hybridization using a custom Agilent 1 M oligonucleotide array intended to cover 197 332 unique exons in RefSeq genes; 98% were covered by at least one probe and 95% were covered by three or more probes with the focus on detecting relatively small CNVs that would implicate a single protein-coding gene. The study group included 99 trios from the Simons Simplex Collection. The analysis identified and validated 55 potentially pathogenic CNVs, categorized as de novo autosomal heterozygous, inherited homozygous autosomal, complex autosomal and hemizygous deletions on the X chromosome of probands. Twenty percent (11 of 55) of these CNV calls were rare when compared with the Database of Genomic Variants. Thirty-six percent (20 of 55) of the CNVs were also detected in the same samples in an independent analysis using the 1 M Illumina single-nucleotide polymorphism array. Findings of note included a common and sometimes homozygous 61 bp exonic deletion in SLC38A10, three CNVs found in lymphoblast-derived DNA but not present in whole-blood derived DNA and, most importantly, in a male proband, an exonic deletion of the TMLHE (trimethyllysine hydroxylase epsilon) that encodes the first enzyme in the biosynthesis of carnitine. Data for CNVs present in lymphoblasts but absent in fresh blood DNA suggest that these represent clonal outgrowth of individual B cells with pre-existing somatic mutations rather than artifacts arising in cell culture. GEO accession number GSE23765 (http://www.ncbi.nlm.nih.gov/geo/, date last accessed on 30 August 2011). Genboree accession: http://genboree.org/java-bin/gbrowser.jsp?refSeqId=1868&entryPointId=chr17&from=53496072&to=53694382&isPublic=yes, date last accessed on 30 August 2011.",2011-08-24 +24597945,DB2: a probabilistic approach for accurate detection of tandem duplication breakpoints using paired-end reads.,"

Background

With the advent of paired-end high throughput sequencing, it is now possible to identify various types of structural variation on a genome-wide scale. Although many methods have been proposed for structural variation detection, most do not provide precise boundaries for identified variants. In this paper, we propose a new method, Distribution Based detection of Duplication Boundaries (DB2), for accurate detection of tandem duplication breakpoints, an important class of structural variation, with high precision and recall.

Results

Our computational experiments on simulated data show that DB2 outperforms state-of-the-art methods in terms of finding breakpoints of tandem duplications, with a higher positive predictive value (precision) in calling the duplications' presence. In particular, DB2's prediction of tandem duplications is correct 99% of the time even for very noisy data, while narrowing down the space of possible breakpoints within a margin of 15 to 20 bps on the average. Most of the existing methods provide boundaries in ranges that extend to hundreds of bases with lower precision values. Our method is also highly robust to varying properties of the sequencing library and to the sizes of the tandem duplications, as shown by its stable precision, recall and mean boundary mismatch performance. We demonstrate our method's efficacy using both simulated paired-end reads, and those generated from a melanoma sample and two ovarian cancer samples. Newly discovered tandem duplications are validated using PCR and Sanger sequencing.

Conclusions

Our method, DB2, uses discordantly aligned reads, taking into account the distribution of fragment length to predict tandem duplications along with their breakpoints on a donor genome. The proposed method fine tunes the breakpoint calls by applying a novel probabilistic framework that incorporates the empirical fragment length distribution to score each feasible breakpoint. DB2 is implemented in Java programming language and is freely available at http://mendel.gene.cwru.edu/laframboiselab/software.php.",2014-03-05 +24512684,"A multi-split mapping algorithm for circular RNA, splicing, trans-splicing and fusion detection.","Numerous high-throughput sequencing studies have focused on detecting conventionally spliced mRNAs in RNA-seq data. However, non-standard RNAs arising through gene fusion, circularization or trans-splicing are often neglected. We introduce a novel, unbiased algorithm to detect splice junctions from single-end cDNA sequences. In contrast to other methods, our approach accommodates multi-junction structures. Our method compares favorably with competing tools for conventionally spliced mRNAs and, with a gain of up to 40% of recall, systematically outperforms them on reads with multiple splits, trans-splicing and circular products. The algorithm is integrated into our mapping tool segemehl (http://www.bioinf.uni-leipzig.de/Software/segemehl/).",2014-02-10 +23300918,Huntington's disease mouse models online: high-resolution MRI images with stereotaxic templates for computational neuroanatomy.,"Magnetic resonance imaging (MRI) has proved to be an ideal modality for non-destructive and highly detailed assessment of structural morphology in biological tissues. Here we used MRI to make a dataset of ex vivo brains from two different rodent models of Huntington's disease (HD), the R6/2 line and the YAC 128 mouse. We are making the whole dataset (399 transgenic HD and wildtype (WT) brains, from mice aged 9-80 weeks) publicly available. These data will be useful, not only to investigators interested in the study of HD, but also to researchers of computational neuroanatomy who may not have access to such large datasets from mouse models. Here we demonstrate a number of uses of such data, for example to produce maps of grey and white matter and cortical thickness. As an example of how the library might provide insights in mouse models of HD, we calculated whole brain grey matter volumes across different age groups with different numbers of cytosine-adenine-guanine (CAG) repeats in a fragment of the gene responsible for HD in humans. (The R6/2 dataset was obtained from an allelic series of R6/2 mice carrying a range of CAG repeat lengths between 109 and 464.) This analysis revealed different trajectories for each fragment length. In particular there was a gradient of decreasing pathology with longer CAG repeat lengths, reflecting our previous findings with behavioural and histological studies. There will be no constraints placed on the use of the datasets included here. The original data will be easily and permanently accessible via the University of Cambridge data repository (http://www.dspace.cam.ac.uk/handle/1810/243361).",2012-12-31 +25160973,Poly peak parser: Method and software for identification of unknown indels using sanger sequencing of polymerase chain reaction products.,"

Background

Genome editing techniques, including ZFN, TALEN, and CRISPR, have created a need to rapidly screen many F1 individuals to identify carriers of indels and determine the sequences of the mutations. Current techniques require multiple clones of the targeted region to be sequenced for each individual, which is inefficient when many individuals must be analyzed. Direct Sanger sequencing of a polymerase chain reaction (PCR) amplified region surrounding the target site is efficient, but Sanger sequencing genomes heterozygous for an indel results in a string of ""double peaks"" due to the mismatched region.

Results

To facilitate indel identification, we developed an online tool called Poly Peak Parser (available at http://yost.genetics.utah.edu/software.php) that is able to separate chromatogram data containing ambiguous base calls into wild-type and mutant allele sequences. This tool allows the nature of the indel to be determined from a single sequencing run per individual performed directly on a PCR product spanning the targeted site, without cloning.

Conclusions

The method and algorithm described here facilitate rapid identification and sequence characterization of heterozygous mutant carriers generated by genome editing. Although designed for screening F1 individuals, this tool can also be used to identify heterozygous indels in many contexts.",2014-09-30 +23409703,SOAPfuse: an algorithm for identifying fusion transcripts from paired-end RNA-Seq data.,"We have developed a new method, SOAPfuse, to identify fusion transcripts from paired-end RNA-Seq data. SOAPfuse applies an improved partial exhaustion algorithm to construct a library of fusion junction sequences, which can be used to efficiently identify fusion events, and employs a series of filters to nominate high-confidence fusion transcripts. Compared with other released tools, SOAPfuse achieves higher detection efficiency and consumed less computing resources. We applied SOAPfuse to RNA-Seq data from two bladder cancer cell lines, and confirmed 15 fusion transcripts, including several novel events common to both cell lines. SOAPfuse is available at http://soap.genomics.org.cn/soapfuse.html.",2013-02-14 +24376502,Inferring short-range linkage information from sequencing chromatograms.,"Direct Sanger sequencing of viral genome populations yields multiple ambiguous sequence positions. It is not straightforward to derive linkage information from sequencing chromatograms, which in turn hampers the correct interpretation of the sequence data. We present a method for determining the variants existing in a viral quasispecies in the case of two nearby ambiguous sequence positions by exploiting the effect of sequence context-dependent incorporation of dideoxynucleotides. The computational model was trained on data from sequencing chromatograms of clonal variants and was evaluated on two test sets of in vitro mixtures. The approach achieved high accuracies in identifying the mixture components of 97.4% on a test set in which the positions to be analyzed are only one base apart from each other, and of 84.5% on a test set in which the ambiguous positions are separated by three bases. In silico experiments suggest two major limitations of our approach in terms of accuracy. First, due to a basic limitation of Sanger sequencing, it is not possible to reliably detect minor variants with a relative frequency of no more than 10%. Second, the model cannot distinguish between mixtures of two or four clonal variants, if one of two sets of linear constraints is fulfilled. Furthermore, the approach requires repetitive sequencing of all variants that might be present in the mixture to be analyzed. Nevertheless, the effectiveness of our method on the two in vitro test sets shows that short-range linkage information of two ambiguous sequence positions can be inferred from Sanger sequencing chromatograms without any further assumptions on the mixture composition. Additionally, our model provides new insights into the established and widely used Sanger sequencing technology. The source code of our method is made available at http://bioinf.mpi-inf.mpg.de/publications/beggel/linkageinformation.zip.",2013-12-20 +24472492,"Collaborative development for setup, execution, sharing and analytics of complex NMR experiments.","Factory settings of NMR pulse sequences are rarely ideal for every scenario in which they are utilised. The optimisation of NMR experiments has for many years been performed locally, with implementations often specific to an individual spectrometer. Furthermore, these optimised experiments are normally retained solely for the use of an individual laboratory, spectrometer or even single user. Here we introduce a web-based service that provides a database for the deposition, annotation and optimisation of NMR experiments. The application uses a Wiki environment to enable the collaborative development of pulse sequences. It also provides a flexible mechanism to automatically generate NMR experiments from deposited sequences. Multidimensional NMR experiments of proteins and other macromolecules consume significant resources, in terms of both spectrometer time and effort required to analyse the results. Systematic analysis of simulated experiments can enable optimal allocation of NMR resources for structural analysis of proteins. Our web-based application (http://nmrplus.org) provides all the necessary information, includes the auxiliaries (waveforms, decoupling sequences etc.), for analysis of experiments by accurate numerical simulation of multidimensional NMR experiments. The online database of the NMR experiments, together with a systematic evaluation of their sensitivity, provides a framework for selection of the most efficient pulse sequences. The development of such a framework provides a basis for the collaborative optimisation of pulse sequences by the NMR community, with the benefits of this collective effort being available to the whole community.",2013-12-16 +26090776,Use of the Adaptive LASSO Method to Identify PM2.5 Components Associated with Blood Pressure in Elderly Men: The Veterans Affairs Normative Aging Study.,"

Background

PM2.5 (particulate matter ≤ 2.5 μm) has been associated with adverse cardiovascular outcomes, but it is unclear whether specific PM2.5 components, particularly metals, may be responsible for cardiovascular effects.

Objectives

We aimed to determine which PM2.5 components are associated with blood pressure in a longitudinal cohort.

Methods

We fit linear mixed-effects models with the adaptive LASSO penalty to longitudinal data from 718 elderly men in the Veterans Affairs Normative Aging Study, 1999-2010. We controlled for PM2.5 mass, age, body mass index, use of antihypertensive medication (ACE inhibitors, non-ophthalmic beta blockers, calcium channel blockers, diuretics, and angiotensin receptor antagonists), smoking status, alcohol intake, years of education, temperature, and season as fixed effects in the models, and additionally applied the adaptive LASSO method to select PM2.5 components associated with blood pressure. Final models were identified by the Bayesian Information Criterion (BIC).

Results

For systolic blood pressure (SBP), nickel (Ni) and sodium (Na) were selected by the adaptive LASSO, whereas only Ni was selected for diastolic blood pressure (DBP). An interquartile range increase (2.5 ng/m3) in 7-day moving-average Ni was associated with 2.48-mmHg (95% CI: 1.45, 3.50 mmHg) increase in SBP and 2.22-mmHg (95% CI: 1.69, 2.75 mmHg) increase in DBP, respectively. Associations were comparable when the analysis was restricted to study visits with PM2.5 below the 75th percentile of the distribution (12 μg/m3).

Conclusions

Our study suggested that exposure to ambient Ni was associated with increased blood pressure independent of PM2.5 mass in our study population of elderly men. Further research is needed to confirm our findings, assess generalizability to other populations, and identify potential mechanisms for Ni effects.

Citation

Dai L, Koutrakis P, Coull BA, Sparrow D, Vokonas PS, Schwartz JD. 2016. Use of the adaptive LASSO method to identify PM2.5 components associated with blood pressure in elderly men: the Veterans Affairs Normative Aging Study. Environ Health Perspect 124:120-125; http://dx.doi.org/10.1289/ehp.1409021.",2015-06-19 +23377977,Discovery of microRNA regulatory networks by integrating multidimensional high-throughput data.,"MicroRNAs (miRNAs) are endogenous non-coding RNAs (ncRNAs) of approximately 22 nt that regulate the expression of a large fraction of genes by targeting messenger RNAs (mRNAs). However, determining the biologically significant targets of miRNAs is an ongoing challenge. In this chapter, we describe how to identify miRNA-target interactions and miRNA regulatory networks from high-throughput deep sequencing, CLIP-Seq (HITS-CLIP, PAR-CLIP) and degradome sequencing data using starBase platforms. In starBase, several web-based and stand-alone computational tools were developed to discover Argonaute (Ago) binding and cleavage sites, miRNA-target interactions, perform enrichment analysis of miRNA target genes in Gene Ontology (GO) categories and biological pathways, and identify combinatorial effects between Ago and other RNA-binding proteins (RBPs). Investigating target pathways of miRNAs in human CLIP-Seq data, we found that many cancer-associated miRNAs modulate cancer pathways. Performing an enrichment analysis of genes targeted by highly expressed miRNAs in the mouse brain showed that many miRNAs are involved in cancer-associated MAPK signaling and glioma pathways, as well as neuron-associated neurotrophin signaling and axon guidance pathways. Moreover, thousands of combinatorial binding sites between Ago and RBPs were identified from CLIP-Seq data suggesting RBPs and miRNAs coordinately regulate mRNA transcripts. As a means of comprehensively integrating CLIP-Seq and Degradome-Seq data, the starBase platform is expected to identify clinically relevant miRNA-target regulatory relationships, and reveal multi-dimensional post-transcriptional regulatory networks involving miRNAs and RBPs. starBase is available at http://starbase.sysu.edu.cn/ .",2013-01-01 +26009470,A Case-Control Study of Prenatal Thallium Exposure and Low Birth Weight in China.,"

Background

Thallium (Tl) is a highly toxic heavy metal widely present in the environment. Case reports have suggested that maternal exposure to high levels of Tl during pregnancy is associated with low birth weight (LBW), but epidemiological data are limited.

Objectives

This study was designed to evaluate whether prenatal Tl exposure is associated with an increased risk of LBW.

Methods

This case-control study involving 816 study participants (204 LBW cases and 612 matched controls) was conducted in Hubei Province, China, in 2012-2014. Tl concentrations were measured in maternal urine collected at delivery, and associations with LBW were evaluated using conditional logistic regression.

Results

Higher maternal urinary Tl levels were significantly associated with increased risk of LBW [crude odds ratio (OR) = 1.52; 95% CI: 1.00, 2.30 for the highest vs. lowest tertile], and the association was similarly elevated after adjustment for potential confounders (adjusted OR = 1.90; 95% CI: 1.01, 3.58 for the highest vs. lowest tertile). Stratified analyses showed slightly higher risk estimates for LBW associated with higher Tl levels for mothers < 28 years old and for mothers with lower household income; however, there was no statistical evidence of heterogeneity in risk according to maternal age (p for heterogeneity = 0.18) or household income (p for heterogeneity = 0.28).

Conclusion

To our knowledge, ours is the first case-control study to investigate the association between prenatal Tl exposure and LBW. The results suggest that prenatal exposure to high levels of Tl may be associated with an increased risk of LBW.

Citation

Xia W, Du X, Zheng T, Zhang B, Li Y, Bassig BA, Zhou A, Wang Y, Xiong C, Li Z, Yao Y, Hu J, Zhou Y, Liu J, Xue W, Ma Y, Pan X, Peng Y, Xu S. 2016. A case-control study of prenatal thallium exposure and low birth weight in China. Environ Health Perspect 124:164-169; http://dx.doi.org/10.1289/ehp.1409202.",2015-05-22 +24451197,GraphProt: modeling binding preferences of RNA-binding proteins.,"We present GraphProt, a computational framework for learning sequence- and structure-binding preferences of RNA-binding proteins (RBPs) from high-throughput experimental data. We benchmark GraphProt, demonstrating that the modeled binding preferences conform to the literature, and showcase the biological relevance and two applications of GraphProt models. First, estimated binding affinities correlate with experimental measurements. Second, predicted Ago2 targets display higher levels of expression upon Ago2 knockdown, whereas control targets do not. Computational binding models, such as those provided by GraphProt, are essential for predicting RBP binding sites and affinities in all tissues. GraphProt is freely available at http://www.bioinf.uni-freiburg.de/Software/GraphProt.",2014-01-22 +23658418,PconsC: combination of direct information methods and alignments improves contact prediction.,"

Summary

Recently, several new contact prediction methods have been published. They use (i) large sets of multiple aligned sequences and (ii) assume that correlations between columns in these alignments can be the results of indirect interaction. These methods are clearly superior to earlier methods when it comes to predicting contacts in proteins. Here, we demonstrate that combining predictions from two prediction methods, PSICOV and plmDCA, and two alignment methods, HHblits and jackhmmer at four different e-value cut-offs, provides a relative improvement of 20% in comparison with the best single method, exceeding 70% correct predictions for one contact prediction per residue.

Availability

The source code for PconsC along with supplementary data is freely available at http://c.pcons.net/

Contact

arne@bioinfo.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-08 +25336501,LIGSIFT: an open-source tool for ligand structural alignment and virtual screening.,"

Motivation

Shape-based alignment of small molecules is a widely used approach in computer-aided drug discovery. Most shape-based ligand structure alignment applications, both commercial and freely available ones, use the Tanimoto coefficient or similar functions for evaluating molecular similarity. Major drawbacks of using such functions are the size dependence of the score and the fact that the statistical significance of the molecular match using such metrics is not reported.

Results

We describe a new open-source ligand structure alignment and virtual screening (VS) algorithm, LIGSIFT, that uses Gaussian molecular shape overlay for fast small molecule alignment and a size-independent scoring function for efficient VS based on the statistical significance of the score. LIGSIFT was tested against the compounds for 40 protein targets available in the Directory of Useful Decoys and the performance was evaluated using the area under the ROC curve (AUC), the Enrichment Factor (EF) and Hit Rate (HR). LIGSIFT-based VS shows an average AUC of 0.79, average EF values of 20.8 and a HR of 59% in the top 1% of the screened library.

Availability and implementation

LIGSIFT software, including the source code, is freely available to academic users at http://cssb.biology.gatech.edu/LIGSIFT.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

skolnick@gatech.edu.",2014-10-21 +23734678,De novo inference of stratification and local admixture in sequencing studies.,"Analysis of population structures and genome local ancestry has become increasingly important in population and disease genetics. With the advance of next generation sequencing technologies, complete genetic variants in individuals' genomes are quickly generated, providing unprecedented opportunities for learning population evolution histories and identifying local genetic signatures at the SNP resolution. The successes of those studies critically rely on accurate and powerful computational tools that can fully utilize the sequencing information. Although many algorithms have been developed for population structure inference and admixture mapping, many of them only work for independent SNPs in genotype or haplotype format, and require a large panel of reference individuals. In this paper, we propose a novel probabilistic method for detecting population structure and local admixture. The method takes input of sequencing data, genotype data and haplotype data. The method characterizes the dependence of genetic variants via haplotype segmentation, such that all variants detected in a sequencing study can be fully utilized for inference. The method further utilizes a infinite-state Bayesian Markov model to perform de novo stratification and admixture inference. Using simulated datasets from HapMapII and 1000Genomes, we show that our method performs superior than several existing algorithms, particularly when limited or no reference individuals are available. Our method is applicable to not only human studies but also studies of other species of interests, for which little reference information is available.Software Availability: http://stat.psu.edu/~yuzhang/software/dbm.tar.",2013-04-10 +24067102,DaGO-Fun: tool for Gene Ontology-based functional analysis using term information content measures.,"

Background

The use of Gene Ontology (GO) data in protein analyses have largely contributed to the improved outcomes of these analyses. Several GO semantic similarity measures have been proposed in recent years and provide tools that allow the integration of biological knowledge embedded in the GO structure into different biological analyses. There is a need for a unified tool that provides the scientific community with the opportunity to explore these different GO similarity measure approaches and their biological applications.

Results

We have developed DaGO-Fun, an online tool available at http://web.cbio.uct.ac.za/ITGOM, which incorporates many different GO similarity measures for exploring, analyzing and comparing GO terms and proteins within the context of GO. It uses GO data and UniProt proteins with their GO annotations as provided by the Gene Ontology Annotation (GOA) project to precompute GO term information content (IC), enabling rapid response to user queries.

Conclusions

The DaGO-Fun online tool presents the advantage of integrating all the relevant IC-based GO similarity measures, including topology- and annotation-based approaches to facilitate effective exploration of these measures, thus enabling users to choose the most relevant approach for their application. Furthermore, this tool includes several biological applications related to GO semantic similarity scores, including the retrieval of genes based on their GO annotations, the clustering of functionally related genes within a set, and term enrichment analysis.",2013-09-25 +23282330,ChemEx: information extraction system for chemical data curation.,"

Background

Manual chemical data curation from publications is error-prone, time consuming, and hard to maintain up-to-date data sets. Automatic information extraction can be used as a tool to reduce these problems. Since chemical structures usually described in images, information extraction needs to combine structure image recognition and text mining together.

Results

We have developed ChemEx, a chemical information extraction system. ChemEx processes both text and images in publications. Text annotator is able to extract compound, organism, and assay entities from text content while structure image recognition enables translation of chemical raster images to machine readable format. A user can view annotated text along with summarized information of compounds, organism that produces those compounds, and assay tests.

Conclusions

ChemEx facilitates and speeds up chemical data curation by extracting compounds, organisms, and assays from a large collection of publications. The software and corpus can be downloaded from http://www.biotec.or.th/isl/ChemEx.",2012-12-13 +25775999,Hepatitis B virus pre-S2 mutant large surface protein inhibits DNA double-strand break repair and leads to genome instability in hepatocarcinogenesis.,"Although hepatitis B virus (HBV) has been established to cause hepatocellular carcinoma (HCC), the exact mechanism remains to be clarified. Type II ground glass hepatocytes (GGHs) harbouring the HBV pre-S2 mutant large surface protein (LHBS) have been recognized as a morphologically distinct hallmark of HCC in the advanced stages of chronic HBV infection. Considering its preneoplastic nature, we hypothesized that type II GGH may exhibit high genomic instability, which is important for the carcinogenic process in chronic HBV carriers. In this study we found that pre-S2 mutant LHBS directly interacted with importin α1, the key factor that recognizes cargos undergoing nuclear transportation mediated by the importin α/β-associated nuclear pore complex (NPC). By interacting with importin α1, which inhibits its function as an NPC factor, pre-S2 mutant LHBS blocked nuclear transport of an essential DNA repair and recombination factor, Nijmegen breakage syndrome 1 (NBS1), upon DNA damage, thereby delaying the formation of nuclear foci at the sites of DNA double-strand breaks (DSBs). Pre-S2 mutant LHBS was also found to block NBS1-mediated homologous recombination repair and induce multi-nucleation of cells. In addition, pre-S2 mutant LHBS transgenic mice showed genomic instability, indicated by increased global gene copy number variations (CNVs), which were significantly higher than those in hepatitis B virus X mice, indicating that pre-S2 mutant LHBS is the major viral oncoprotein inducing genomic instability in HBV-infected hepatocytes. Consistently, the human type II GGHs in HCC patients exhibited increased DNA DSBs representing significant genomic instability. In conclusion, type II GGHs harbouring HBV pre-S2 mutant oncoprotein represent a high-risk marker for the loss of genome integrity in chronic HBV carriers and explain the complex chromosome changes in HCCs. Mouse array CGH raw data: GEO Accession No. GSE61378 (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE61378).",2015-04-22 +24336809,NeuroPID: a predictor for identifying neuropeptide precursors from metazoan proteomes.,"

Motivation

The evolution of multicellular organisms is associated with increasing variability of molecules governing behavioral and physiological states. This is often achieved by neuropeptides (NPs) that are produced in neurons from a longer protein, named neuropeptide precursor (NPP). The maturation of NPs occurs through a sequence of proteolytic cleavages. The difficulty in identifying NPPs is a consequence of their diversity and the lack of applicable sequence similarity among the short functionally related NPs.

Results

Herein, we describe Neuropeptide Precursor Identifier (NeuroPID), a machine learning scheme that predicts metazoan NPPs. NeuroPID was trained on hundreds of identified NPPs from the UniProtKB database. Some 600 features were extracted from the primary sequences and processed using support vector machines (SVM) and ensemble decision tree classifiers. These features combined biophysical, chemical and informational-statistical properties of NPs and NPPs. Other features were guided by the defining characteristics of the dibasic cleavage sites motif. NeuroPID reached 89-94% accuracy and 90-93% precision in cross-validation blind tests against known NPPs (with an emphasis on Chordata and Arthropoda). NeuroPID also identified NPP-like proteins from extensively studied model organisms as well as from poorly annotated proteomes. We then focused on the most significant sets of features that contribute to the success of the classifiers. We propose that NPPs are attractive targets for investigating and modulating behavior, metabolism and homeostasis and that a rich repertoire of NPs remains to be identified.

Availability

NeuroPID source code is freely available at http://www.protonet.cs.huji.ac.il/neuropid",2013-12-13 +26311872,In Vitro Evidence Supports Membrane Alanyl Aminopeptidase N as a Receptor for a Plant Virus in the Pea Aphid Vector.,"

Unlabelled

Insect-borne plant viruses cause significant agricultural losses and jeopardize sustainable global food production. Although blocking plant virus transmission would allow for crop protection, virus receptors in insect vectors are unknown. Here we identify membrane alanyl aminopeptidase N (APN) as a receptor for pea enation mosaic virus (PEMV) coat protein (CP) in the gut of the pea aphid, Acyrthosiphon pisum, using a far-Western blot method. Pulldown and immunofluorescence binding assays and surface plasmon resonance were used to confirm and characterize CP-APN interaction. PEMV virions and a peptide comprised of PEMV CP fused to a proline-rich hinge (-P-) and green fluorescent protein (CP-P-GFP) specifically bound to APN. Recombinant APN expressed in Sf9 cells resulted in internalization of CP-P-GFP, which was visualized by confocal microscopy; such internalization is an expected hallmark of a functional gut receptor. Finally, in assays with aphid gut-derived brush border membrane vesicles, binding of CP-P-GFP competed with binding of GBP3.1, a peptide previously demonstrated to bind to APN in the aphid gut and to impede PEMV uptake into the hemocoel; this finding supports the hypothesis that GBP3.1 and PEMV bind to and compete for the same APN receptor. These in vitro data combined with previously published in vivo experiments (S. Liu, S. Sivakumar, W. O. Sparks, W. A. Miller, and B. C. Bonning, Virology 401:107-116, 2010, http://dx.doi.org/10.1016/j.virol.2010.02.009) support the identification of APN as the first receptor in a plant virus vector. Knowledge of this receptor will provide for technologies based on PEMV-APN interaction designed to block plant virus transmission and to suppress aphid populations.

Importance

A significant proportion of global food production is lost to insect pests. Aphids, in addition to weakening plants by feeding on their sap, are responsible for transmitting about half of the plant viruses vectored by insects. Growers rely heavily on the application of chemical insecticides to manage both aphids and aphid-vectored plant viral disease. To increase our understanding of plant virus-aphid vector interaction, we provide in vitro evidence supporting earlier in vivo work for identification of a receptor protein in the aphid gut called aminopeptidase N, which is responsible for entry of the plant virus pea enation mosaic virus into the pea aphid vector. Enrichment of proteins found on the surface of the aphid gut epithelium resulted in identification of this first aphid gut receptor for a plant virus. This discovery is particularly important since the disruption of plant virus binding to such a receptor may enable the development of a nonchemical strategy for controlling aphid-vectored plant viruses to maximize food production.",2015-08-26 +22962492,Finding differentially expressed regions of arbitrary length in quantitative genomic data based on marked point process model.,"

Motivation

High-throughput nucleotide sequencing technologies provide large amounts of quantitative genomic data at nucleotide resolution, which are important for the present and future biomedical researches; for example differential analysis of base-level RNA expression data will improve our understanding of transcriptome, including both coding and non-coding genes. However, most studies of these data have relied on existing genome annotations and thus are limited to the analysis of known transcripts.

Results

In this article, we propose a novel method based on a marked point process model to find differentially expressed genomic regions of arbitrary length without using genome annotations. The presented method conducts a statistical test for differential analysis in regions of various lengths at each nucleotide and searches the optimal configuration of the regions by using a Monte Carlo simulation. We applied the proposed method to both synthetic and real genomic data, and their results demonstrate the effectiveness of our method.

Availability

The program used in this study is available at https://sites.google.com/site/hiroshihatsuda/.

Contact

H.Hatsuda@warwick.ac.uk.",2012-09-01 +23109181,ADAM: automated data management for research datasets.,"

Unlabelled

Existing repositories for experimental datasets typically capture snapshots of data acquired using a single experimental technique and often require manual population and continual curation. We present a storage system for heterogeneous research data that performs dynamic automated indexing to provide powerful search, discovery and collaboration features without the restrictions of a structured repository. ADAM is able to index many commonly used file formats generated by laboratory assays and therefore offers specific advantages to the experimental biology community. However, it is not domain specific and can promote sharing and re-use of working data across scientific disciplines.

Availability and implementation

ADAM is implemented using Java and supported on Linux. It is open source under the GNU General Public License v3.0. Installation instructions, binary code, a demo system and virtual machine image and are available at http://www.imperial.ac.uk/bioinfsupport/resources/software/adam.",2012-10-29 +24339369,Insight into IKBKG/NEMO locus: report of new mutations and complex genomic rearrangements leading to incontinentia pigmenti disease.,"Incontinentia pigmenti (IP) is an X-linked-dominant Mendelian disorder caused by mutation in the IKBKG/NEMO gene, encoding for NEMO/IKKgamma, a regulatory protein of nuclear factor kappaB (NF-kB) signaling. In more than 80% of cases, IP is due to recurrent or nonrecurrent deletions causing loss-of-function (LoF) of NEMO/IKKgamma. We review how the local architecture of the IKBKG/NEMO locus with segmental duplication and a high frequency of repetitive elements favor de novo aberrant recombination through different mechanisms producing genomic microdeletion. We report here a new microindel (c.436_471delinsT, p.Val146X) arising through a DNA-replication-repair fork-stalling-and-template-switching and microhomology-mediated-end-joining mechanism in a sporadic IP case. The LoF mutations of IKBKG/NEMO leading to IP include small insertions/deletions (indel) causing frameshift and premature stop codons, which account for 10% of cases. We here present 21 point mutations previously unreported, which further extend the spectrum of pathologic variants: 14/21 predict LoF because of premature stop codon (6/14) or frameshift (8/14), whereas 7/21 predict a partial loss of NEMO/IKKgamma activity (two splicing and five missense). We review how the analysis of IP-associated IKBKG/NEMO hypomorphic mutants has contributed to the understanding of the pathophysiological mechanism of IP disease and has provided important information on affected NF-kB signaling. We built a locus-specific database listing all IKBKG/NEMO variants, accessible at http://IKBKG.lovd.nl.",2013-12-12 +25368622,Sustained mitogen-activated protein kinase activation reprograms defense metabolism and phosphoprotein profile in Arabidopsis thaliana.,"Mitogen-activated protein kinases (MAPKs) target a variety of protein substrates to regulate cellular signaling processes in eukaryotes. In plants, the number of identified MAPK substrates that control plant defense responses is still limited. Here, we generated transgenic Arabidopsis thaliana plants with an inducible system to simulate in vivo activation of two stress-activated MAPKs, MPK3, and MPK6. Metabolome analysis revealed that this artificial MPK3/6 activation (without any exposure to pathogens or other stresses) is sufficient to drive the production of major defense-related metabolites, including various camalexin, indole glucosinolate and agmatine derivatives. An accompanying (phospho)proteome analysis led to detection of hundreds of potential phosphoproteins downstream of MPK3/6 activation. Besides known MAPK substrates, many candidates on this list possess typical MAPK-targeted phosphosites and in many cases, the corresponding phosphopeptides were detected by mass spectrometry. Notably, several of these putative phosphoproteins have been reported to be associated with the biosynthesis of antimicrobial defense substances (e.g., WRKY transcription factors and proteins encoded by the genes from the ""PEN"" pathway required for penetration resistance to filamentous pathogens). Thus, this work provides an inventory of candidate phosphoproteins, including putative direct MAPK substrates, for future analysis of MAPK-mediated defense control. (Proteomics data are available with the identifier PXD001252 via ProteomeXchange, http://proteomecentral.proteomexchange.org).",2014-10-20 +25336138,Detection of atypical genes in virus families using a one-class SVM.,"

Background

The diversity of viruses, the absence of universally common genes in them, and their ability to act as carriers of genetic material make assessment of evolutionary paths of viral genes very difficult. One important factor contributing to this complexity is horizontal gene transfer.

Results

We explore the possibility for the systematic identification of atypical genes within virus families, including viruses whose genome is not encoded by a double-stranded DNA. Our method is based on gene statistical features that differ in genes that were subject of recent horizontal gene transfer from those of the genome in which they are observed. We employ a one-class SVM approach to detect atypical genes within a virus family basing of their statistical signatures and without explicit knowledge of the source species. The simplicity of the statistical features used makes the method applicable to various viruses irrespective of their genome size or type.

Conclusions

On simulated data, the method can robustly identify alien genes irrespective of the coding nucleic acid found in a virus. It also compares well to results obtained in related studies for double-stranded DNA viruses. Its value in practice is confirmed by the identification of isolated examples of horizontal gene transfer events that have already been described in the literature. A Python package implementing the method and the results for the analyzed virus families are available at http://svm-agp.bioinf.mpi-inf.mpg.de.",2014-10-20 +24330590,Polysaccharides utilization in human gut bacterium Bacteroides thetaiotaomicron: comparative genomics reconstruction of metabolic and regulatory networks.,"

Background

Bacteroides thetaiotaomicron, a predominant member of the human gut microbiota, is characterized by its ability to utilize a wide variety of polysaccharides using the extensive saccharolytic machinery that is controlled by an expanded repertoire of transcription factors (TFs). The availability of genomic sequences for multiple Bacteroides species opens an opportunity for their comparative analysis to enable characterization of their metabolic and regulatory networks.

Results

A comparative genomics approach was applied for the reconstruction and functional annotation of the carbohydrate utilization regulatory networks in 11 Bacteroides genomes. Bioinformatics analysis of promoter regions revealed putative DNA-binding motifs and regulons for 31 orthologous TFs in the Bacteroides. Among the analyzed TFs there are 4 SusR-like regulators, 16 AraC-like hybrid two-component systems (HTCSs), and 11 regulators from other families. Novel DNA motifs of HTCSs and SusR-like regulators in the Bacteroides have the common structure of direct repeats with a long spacer between two conserved sites.

Conclusions

The inferred regulatory network in B. thetaiotaomicron contains 308 genes encoding polysaccharide and sugar catabolic enzymes, carbohydrate-binding and transport systems, and TFs. The analyzed TFs control pathways for utilization of host and dietary glycans to monosaccharides and their further interconversions to intermediates of the central metabolism. The reconstructed regulatory network allowed us to suggest and refine specific functional assignments for sugar catabolic enzymes and transporters, providing a substantial improvement to the existing metabolic models for B. thetaiotaomicron. The obtained collection of reconstructed TF regulons is available in the RegPrecise database (http://regprecise.lbl.gov).",2013-12-12 +26583988,PTRAJ and CPPTRAJ: Software for Processing and Analysis of Molecular Dynamics Trajectory Data.,"We describe PTRAJ and its successor CPPTRAJ, two complementary, portable, and freely available computer programs for the analysis and processing of time series of three-dimensional atomic positions (i.e., coordinate trajectories) and the data therein derived. Common tools include the ability to manipulate the data to convert among trajectory formats, process groups of trajectories generated with ensemble methods (e.g., replica exchange molecular dynamics), image with periodic boundary conditions, create average structures, strip subsets of the system, and perform calculations such as RMS fitting, measuring distances, B-factors, radii of gyration, radial distribution functions, and time correlations, among other actions and analyses. Both the PTRAJ and CPPTRAJ programs and source code are freely available under the GNU General Public License version 3 and are currently distributed within the AmberTools 12 suite of support programs that make up part of the Amber package of computer programs (see http://ambermd.org ). This overview describes the general design, features, and history of these two programs, as well as algorithmic improvements and new features available in CPPTRAJ.",2013-06-25 +23662895,MetaboQuant: a tool combining individual peak calibration and outlier detection for accurate metabolite quantification in 1D (1)H and (1)H-(13)C HSQC NMR spectra.,"Solution nuclear magnetic resonance (NMR) spectroscopy is widely used to analyze complex mixtures of organic compounds such as biological fluids and tissue extracts. Targeted profiling approaches with reliable compound quantitifcation are hampered, however, by signal overlap and other interferences. Here, we present a tool named MetaboQuant for automated compound quantification from pre-processed 1D and 2D heteronuclear single quantum coherence (HSQC) NMR spectral data and concomitant validation of results. Performance of MetaboQuant was tested on a urinary spike-in data set and compared with other quantification strategies. The use of individual calibration factors in combination with the validation algorithms of MetaboQuant raises the reliability of the quantification results. MetaboQuant can be downloaded at http://genomics.uni-regensburg.de/site/institute/software/metaboquant/ as stand-alone software for Windows or run on other operating systems from within Matlab. Separate software for peak fitting and integration is necessary in order to use MetaboQuant.",2013-05-01 +24449789,"Sphingomonas daechungensis sp. nov., isolated from sediment of a eutrophic reservoir.","Strain CH15-11(T), isolated from a sediment sample taken from Daechung Reservoir, South Korea, during the late-blooming period of cyanobacteria, was found to be a Gram-stain-negative, non-motile, non-spore-forming, rod-shaped and aerobic bacterium. Strain CH15-11(T) grew optimally at pH 7 and 28-30 °C. According to a phylogenetic tree based on 16S rRNA gene sequences, strain CH15-11(T) belonged to the genus Sphingomonas and clustered with Sphingomonas sediminicola Dae 20(T), with which it shared the highest 16S rRNA gene sequence similarity (97.6 %). Chemotaxonomic analysis showed that strain CH15-11(T) had characteristics typical of members of the genus Sphingomonas, such as the presence of sphingoglycolipid, ubiquinone Q-10 and sym-homospermidine. Plus, strain CH15-11(T) included summed feature 8 (C18 : 1ω7c and/or C18 : 1ω6c) and C16 : 0 as the major fatty acids. The genomic DNA G+C content was 65.6 mol%. Sequence data showed that strain CH15-11(T) was most closely related to Sphingomonas sediminicola Dae 20(T) (97.6 %), Sphingomonas ginsengisoli Gsoil 634(T) (97.2 %) and http://www.genebank.go.kr/eng/microbe/microbe_search_view.jsp?sStrainsn=4602Sphingomonas jaspi TDMA-16(T) (97.0 %). However, the DNA-DNA relatedness values between strain CH15-11(T) and the most closely related type strains were within a range of 35-59 %. Thus, based on the phylogenetic, phenotypic and genetic data, strain CH15-11(T) was classified as a member of the genus Sphingomonas as a representative of a novel species, for which the name Sphingomonas daechungensis sp. nov. is proposed. The type strain is CH15-11(T) ( = KCTC 23718(T) = JCM 17887(T)).",2014-01-21 +24333540,EcoliOverExpressionDB: a database of recombinant protein overexpression in E. coli.,"

Unlabelled

Recombinant protein production is a significant biotechnological process as it allows researchers to produce a specific protein in desired quantities. Escherichia coli (E. coli) is the most popular heterologous expression host for the production of recombinant proteins due to its advantages such as low cost, high-productivity, well-characterized genetics, simple growth requirements and rapid growth. There are a number of factors that influence the expression level of a recombinant protein in E. coli which are the gene to be expressed, the expression vector, the expression host, and the culture condition. The major motivation to develop our database, EcoliOverExpressionDB, is to provide a means for researchers to quickly locate key factors in the overexpression of certain proteins. Such information would be a useful guide for the overexpression of similar proteins in E. coli. To the best of the present researchers' knowledge, in general and specifically in E. coli, EcoliOverExpressionDB is the first database of recombinant protein expression experiments which gathers the influential parameters on protein overexpression and the results in one place.

Availability

EcoliOverExpressionDB is freely available and accessible using all major browsers at http://birg4.fbb.utm.my:8080/EcoliOverExpressionDB/.",2013-12-11 +24372040,Automatic phylogenetic classification of bacterial beta-lactamase sequences including structural and antibiotic substrate preference information.,"Beta lactams comprise the largest and still most effective group of antibiotics, but bacteria can gain resistance through different beta lactamases that can degrade these antibiotics. We developed a user friendly tree building web server that allows users to assign beta lactamase sequences to their respective molecular classes and subclasses. Further clinically relevant information includes if the gene is typically chromosomal or transferable through plasmids as well as listing the antibiotics which the most closely related reference sequences are known to target and cause resistance against. This web server can automatically build three phylogenetic trees: the first tree with closely related sequences from a Tachyon search against the NCBI nr database, the second tree with curated reference beta lactamase sequences, and the third tree built specifically from substrate binding pocket residues of the curated reference beta lactamase sequences. We show that the latter is better suited to recover antibiotic substrate assignments through nearest neighbor annotation transfer. The users can also choose to build a structural model for the query sequence and view the binding pocket residues of their query relative to other beta lactamases in the sequence alignment as well as in the 3D structure relative to bound antibiotics. This web server is freely available at http://blac.bii.a-star.edu.sg/.",2013-12-11 +24326699,Fast and simple epidemiological typing of Pseudomonas aeruginosa using the double-locus sequence typing (DLST) method.,"Although the molecular typing of Pseudomonas aeruginosa is important to understand the local epidemiology of this opportunistic pathogen, it remains challenging. Our aim was to develop a simple typing method based on the sequencing of two highly variable loci. Single-strand sequencing of three highly variable loci (ms172, ms217, and oprD) was performed on a collection of 282 isolates recovered between 1994 and 2007 (from patients and the environment). As expected, the resolution of each locus alone [number of types (NT) = 35-64; index of discrimination (ID) = 0.816-0.964] was lower than the combination of two loci (NT = 78-97; ID = 0.966-0.971). As each pairwise combination of loci gave similar results, we selected the most robust combination with ms172 [reverse; R] and ms217 [R] to constitute the double-locus sequence typing (DLST) scheme for P. aeruginosa. This combination gave: (i) a complete genotype for 276/282 isolates (typability of 98%), (ii) 86 different types, and (iii) an ID of 0.968. Analysis of multiple isolates from the same patients or taps showed that DLST genotypes are generally stable over a period of several months. The high typability, discriminatory power, and ease of use of the proposed DLST scheme makes it a method of choice for local epidemiological analyses of P. aeruginosa. Moreover, the possibility to give unambiguous definition of types allowed to develop an Internet database ( http://www.dlst.org ) accessible by all.",2013-12-11 +25887779,OTO: Ontology Term Organizer.,"

Background

The need to create controlled vocabularies such as ontologies for knowledge organization and access has been widely recognized in various domains. Despite the indispensable need of thorough domain knowledge in ontology construction, most software tools for ontology construction are designed for knowledge engineers and not for domain experts to use. The differences in the opinions of different domain experts and in the terminology usages in source literature are rarely addressed by existing software.

Methods

OTO software was developed based on the Agile principles. Through iterations of software release and user feedback, new features are added and existing features modified to make the tool more intuitive and efficient to use for small and large data sets. The software is open source and built in Java.

Results

Ontology Term Organizer (OTO; http://biosemantics.arizona.edu/OTO/ ) is a user-friendly, web-based, consensus-promoting, open source application for organizing domain terms by dragging and dropping terms to appropriate locations. The application is designed for users with specific domain knowledge such as biology but not in-depth ontology construction skills. Specifically OTO can be used to establish is_a, part_of, synonym, and order relationships among terms in any domain that reflects the terminology usage in source literature and based on multiple experts' opinions. The organized terms may be fed into formal ontologies to boost their coverage. All datasets organized on OTO are publicly available.

Conclusion

OTO has been used to organize the terms extracted from thirty volumes of Flora of North America and Flora of China combined, in addition to some smaller datasets of different taxon groups. User feedback indicates that the tool is efficient and user friendly. Being open source software, the application can be modified to fit varied term organization needs for different domains.",2015-02-15 +25573913,Integrating alignment-based and alignment-free sequence similarity measures for biological sequence classification.,"

Motivation

Alignment-based sequence similarity searches, while accurate for some type of sequences, can produce incorrect results when used on more divergent but functionally related sequences that have undergone the sequence rearrangements observed in many bacterial and viral genomes. Here, we propose a classification model that exploits the complementary nature of alignment-based and alignment-free similarity measures with the aim to improve the accuracy with which DNA and protein sequences are characterized.

Results

Our model classifies sequences using a combined sequence similarity score calculated by adaptively weighting the contribution of different sequence similarity measures. Weights are determined independently for each sequence in the test set and reflect the discriminatory ability of individual similarity measures in the training set. Because the similarity between some sequences is determined more accurately with one type of measure rather than another, our classifier allows different sets of weights to be associated with different sequences. Using five different similarity measures, we show that our model significantly improves the classification accuracy over the current composition- and alignment-based models, when predicting the taxonomic lineage for both short viral sequence fragments and complete viral sequences. We also show that our model can be used effectively for the classification of reads from a real metagenome dataset as well as protein sequences.

Availability and implementation

All the datasets and the code used in this study are freely available at https://collaborators.oicr.on.ca/vferretti/borozan_csss/csss.html.

Contact

ivan.borozan@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-01-07 +24334957,The Transformer database: biotransformation of xenobiotics.,"As the number of prescribed drugs is constantly rising, drug-drug interactions are an important issue. The simultaneous administration of several drugs can cause severe adverse effects based on interactions with the same metabolizing enzyme(s). The Transformer database (http://bioinformatics.charite.de/transformer) contains integrated information on the three phases of biotransformation (modification, conjugation and excretion) of 3000 drugs and >350 relevant food ingredients (e.g. grapefruit juice) and herbs, which are catalyzed by 400 proteins. A total of 100,000 interactions were found through text mining and manual validation. The 3D structures of 200 relevant proteins are included. The database enables users to search for drugs with a visual display of known interactions with phase I (Cytochrome P450) and phase II enzymes, transporters, food and herbs. For each interaction, PubMed references are given. To detect mutual impairments of drugs, the drug-cocktail tool displays interactions between selected drugs. By choosing the indication for a drug, the tool offers suggestions for alternative medications to avoid metabolic conflicts. Drug interactions can also be visualized in an interactive network view. Additionally, prodrugs, including their mechanisms of activation, and further information on enzymes of biotransformation, including 3D models, can be viewed.",2013-12-10 +21685572,Modeling of Cell-to-Cell Communication Processes with Petri Nets Using the Example of Quorum Sensing.,"The understanding of the molecular mechanism of cell-to-cell communication is fundamental for system biology. Up to now, the main objectives of bioinformatics have been reconstruction, modeling and analysis of metabolic, regulatory and signaling processes, based on data generated from high-throughput technologies. Cell-to-cell communication or quorum sensing (QS), the use of small molecule signals to coordinate complex patterns of behavior in bacteria, has been the focus of many reports over the past decade. Based on the quorum sensing process of the organism Aliivibrio salmonicida, we aim at developing a functional Petri net, which will allow modeling and simulating cell-to-cell communication processes. Using a new editor-controlled information system called VANESA (http://vanesa.sf.net), we present how to combine different fields of studies such as life-science, database consulting, modeling, visualization and simulation for a semi-automatic reconstruction of the complex signaling quorum sensing network. We show how cell-to-cell communication processes and information-flow within a cell and across cell colonies can be modeled using VANESA and how those models can be simulated with Petri net network structures in a sophisticated way.",2011-01-01 +27481523,QSAR and Predictors of Eye and Skin Effects.,"In this study, the ensemble of features and training samples was examined with a collection of support vector machines. The effects of data sampling methods, ratio of positive to negative compounds, and types of base models combiner to produce ensemble models were explored. The ensemble method was applied to produce four separate in silico models to classify the labels for eye/skin corrosion (H314), skin irritation (H315), serious eye damage (H318), and eye irritation (H319), which are defined in the ""Globally Harmonized System of Classification and Labelling of Chemicals"". To the best of our knowledge, the training set used in this work is one of the largest (made of publicly available data) with acceptable prediction performances. These models were distributed via PaDEL-DDPredictor (http://padel.nus.edu.sg/software/padelddpredictor) that can be downloaded freely for public use.",2013-03-08 +24339831,Circ2Traits: a comprehensive database for circular RNA potentially associated with disease and traits.,"Circular RNAs are new players in regulation of post transcriptional gene expression. Animal genomes express many circular RNAs from diverse genomic locations. A recent study has validated a fairly large number of circular RNAs in human, mouse, and nematode. Circular RNAs play a crucial role in fine tuning the level of miRNA mediated regulation of gene expression by sequestering the miRNAs. Their interaction with disease associated miRNAs indicates that circular RNAs are important for disease regulation. In this paper we studied the potential association of circular RNAs (circRNA) with human diseases in two different ways. Firstly, the interactions of circRNAs with disease associated miRNAs were identified, following which the likelihood of a circRNA being associated with a disease was calculated. For the miRNAs associated with individual diseases, we constructed a network of predicted interactions between the miRNAs and protein coding, long non-coding and circular RNA genes. We carried out gene ontology (GO) enrichment analysis on the set of protein coding genes in the miRNA- circRNA interactome of individual diseases to check the enrichment of genes associated with particular biological processes. Secondly, disease associated SNPs were mapped on circRNA loci, and Argonaute (Ago) interaction sites on circular RNAs were identified. We compiled a database of disease-circRNA association in Circ2Traits (http://gyanxet-beta.com/circdb/), the first comprehensive knowledgebase of potential association of circular RNAs with diseases in human.",2013-12-10 +25022716,Impact of prophylactic central neck dissection on oncologic outcomes of papillary thyroid carcinoma: a review.,"Prophylactic neck dissection (PND) for papillary thyroid carcinoma (PTC) is controversial. Our aim was to assess current levels of evidence (LE) according to the Oxford Centre for Evidence-based Medicine ( http://www.cebm.net/?O=1025 ) regarding the oncologic benefits of PND. Data were analyzed via MEDLINE keywords: PTC, differentiated thyroid carcinoma, PND, central lymph node metastases, central compartment, recurrence-free survival. There was conflicting evidence regarding the rate of reoperation for recurrence, with some studies showing a lower rate after PND with increased recurrence-free survival and a higher rate of undetectable pre- and post-ablation thyroglobulin levels (LE 4), whereas other studies did not show a difference (LE 4). Only one study (LE 4) showed improved disease-specific survival with PND. PND may improve recurrence-free survival, although this is supported by only a low LE. Current recommendations can only be based on low-level evidence.",2014-06-11 +24564201,Rigidity analysis of protein biological assemblies and periodic crystal structures.,"

Background

We initiate in silico rigidity-theoretical studies of biological assemblies and small crystals for protein structures. The goal is to determine if, and how, the interactions among neighboring cells and subchains affect the flexibility of a molecule in its crystallized state. We use experimental X-ray crystallography data from the Protein Data Bank (PDB). The analysis relies on an effcient graph-based algorithm. Computational experiments were performed using new protein rigidity analysis tools available in the new release of our KINARI-Web server http://kinari.cs.umass.edu.

Results

We provide two types of results: on biological assemblies and on crystals. We found that when only isolated subchains are considered, structural and functional information may be missed. Indeed, the rigidity of biological assemblies is sometimes dependent on the count and placement of hydrogen bonds and other interactions among the individual subchains of the biological unit. Similarly, the rigidity of small crystals may be affected by the interactions between atoms belonging to different unit cells.

Conclusion

The rigidity analysis of a single asymmetric unit may not accurately reflect the protein's behavior in the tightly packed crystal environment. Using our KINARI software, we demonstrated that additional functional and rigidity information can be gained by analyzing a protein's biological assembly and/or crystal structure. However, performing a larger scale study would be computationally expensive (due to the size of the molecules involved). Overcoming this limitation will require novel mathematical and computational extensions to our software.",2013-11-05 +24320595,Automated workflow-based exploitation of pathway databases provides new insights into genetic associations of metabolite profiles.,"

Background

Genome-wide association studies (GWAS) have identified many common single nucleotide polymorphisms (SNPs) that associate with clinical phenotypes, but these SNPs usually explain just a small part of the heritability and have relatively modest effect sizes. In contrast, SNPs that associate with metabolite levels generally explain a higher percentage of the genetic variation and demonstrate larger effect sizes. Still, the discovery of SNPs associated with metabolite levels is challenging since testing all metabolites measured in typical metabolomics studies with all SNPs comes with a severe multiple testing penalty. We have developed an automated workflow approach that utilizes prior knowledge of biochemical pathways present in databases like KEGG and BioCyc to generate a smaller SNP set relevant to the metabolite. This paper explores the opportunities and challenges in the analysis of GWAS of metabolomic phenotypes and provides novel insights into the genetic basis of metabolic variation through the re-analysis of published GWAS datasets.

Results

Re-analysis of the published GWAS dataset from Illig et al. (Nature Genetics, 2010) using a pathway-based workflow (http://www.myexperiment.org/packs/319.html), confirmed previously identified hits and identified a new locus of human metabolic individuality, associating Aldehyde dehydrogenase family1 L1 (ALDH1L1) with serine/glycine ratios in blood. Replication in an independent GWAS dataset of phospholipids (Demirkan et al., PLoS Genetics, 2012) identified two novel loci supported by additional literature evidence: GPAM (Glycerol-3 phosphate acyltransferase) and CBS (Cystathionine beta-synthase). In addition, the workflow approach provided novel insight into the affected pathways and relevance of some of these gene-metabolite pairs in disease development and progression.

Conclusions

We demonstrate the utility of automated exploitation of background knowledge present in pathway databases for the analysis of GWAS datasets of metabolomic phenotypes. We report novel loci and potential biochemical mechanisms that contribute to our understanding of the genetic basis of metabolic variation and its relationship to disease development and progression.",2013-12-09 +22823139,|SE|S|AM|E| Barcode: NGS-oriented software for amplicon characterization--application to species and environmental barcoding.,"Progress in NGS technologies has opened up new opportunities for characterizing biodiversity, both for individual specimen identification and for environmental barcoding. Although the amount of data available to biologist is increasing, user-friendly tools to facilitate data analysis have yet to be developed. Our aim, with |SE|S|AM|E| Barcode, is to provide such support through a unified platform. The sequences are analysed through a pipeline that (i) processes NGS amplicon runs, filtering markers and samples, (ii) builds reference libraries and finally (iii) identifies (barcodes) the sequences in each amplicon from the reference library. We use a simulated data set for specimen identification and a recently published data set for environmental barcoding to validate the method. The results obtained are consistent with the expected characterizations (in silico and previously published, respectively). |SE|S|AM|E| Barcode and its documentation are freely available under the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported Licence for Windows and Linux from http://www1.montpellier.inra.fr/CBGP/NGS/.",2012-07-23 +24522604,Jasechko et al. reply.,"replying to A. M. J. Coenders-Gerrits et al. 506, http://dx.doi.org/10.1038/nature12925 (2014)In their Comment, Coenders-Gerrits et al. suggest that our conclusion that transpiration dominates the terrestrial water cycle is biased by unrepresentative input data and optimistic uncertainty ranges related to runoff, interception and the isotopic compositions of transpired and evaporated moisture. We clearly presented the uncertainties applied in our Monte-Carlo sensitivity analysis, we reported percentile ranges of results rather than standard deviations to best communicate the nonlinear nature of the isotopic evaporation model, and we highlighted that the uncertainty in our calculation remains large, particularly in humid catchments (for example, figure 2 in our paper).",2014-02-01 +23758618,Web-based visual analysis for high-throughput genomics.,"

Background

Visualization plays an essential role in genomics research by making it possible to observe correlations and trends in large datasets as well as communicate findings to others. Visual analysis, which combines visualization with analysis tools to enable seamless use of both approaches for scientific investigation, offers a powerful method for performing complex genomic analyses. However, there are numerous challenges that arise when creating rich, interactive Web-based visualizations/visual analysis applications for high-throughput genomics. These challenges include managing data flow from Web server to Web browser, integrating analysis tools and visualizations, and sharing visualizations with colleagues.

Results

We have created a platform simplifies the creation of Web-based visualization/visual analysis applications for high-throughput genomics. This platform provides components that make it simple to efficiently query very large datasets, draw common representations of genomic data, integrate with analysis tools, and share or publish fully interactive visualizations. Using this platform, we have created a Circos-style genome-wide viewer, a generic scatter plot for correlation analysis, an interactive phylogenetic tree, a scalable genome browser for next-generation sequencing data, and an application for systematically exploring tool parameter spaces to find good parameter values. All visualizations are interactive and fully customizable. The platform is integrated with the Galaxy (http://galaxyproject.org) genomics workbench, making it easy to integrate new visual applications into Galaxy.

Conclusions

Visualization and visual analysis play an important role in high-throughput genomics experiments, and approaches are needed to make it easier to create applications for these activities. Our framework provides a foundation for creating Web-based visualizations and integrating them into Galaxy. Finally, the visualizations we have created using the framework are useful tools for high-throughput genomics experiments.",2013-06-13 +26068977,"Chronic Exposure to Arsenic and Markers of Cardiometabolic Risk: A Cross-Sectional Study in Chihuahua, Mexico.","

Background

Exposure to arsenic (As) concentrations in drinking water > 150 μg/L has been associated with risk of diabetes and cardiovascular disease, but little is known about the effects of lower exposures.

Objective

This study aimed to examine whether moderate As exposure, or indicators of individual As metabolism at these levels of exposure, are associated with cardiometabolic risk.

Methods

We analyzed cross-sectional associations between arsenic exposure and multiple markers of cardiometabolic risk using drinking-water As measurements and urinary As species data obtained from 1,160 adults in Chihuahua, Mexico, who were recruited in 2008-2013. Fasting blood glucose and lipid levels, the results of an oral glucose tolerance test, and blood pressure were used to characterize cardiometabolic risk. Multivariable logistic, multinomial, and linear regression were used to assess associations between cardiometabolic outcomes and water As or the sum of inorganic and methylated As species in urine.

Results

After multivariable adjustment, concentrations in the second quartile of water As (25.5 to < 47.9 μg/L) and concentrations of total speciated urinary As (< 55.8 μg/L) below the median were significantly associated with elevated triglycerides, high total cholesterol, and diabetes. However, moderate water and urinary As levels were also positively associated with HDL cholesterol. Associations between arsenic exposure and both dysglycemia and triglyceridemia were higher among individuals with higher proportions of dimethylarsenic in urine.

Conclusions

Moderate exposure to As may increase cardiometabolic risk, particularly in individuals with high proportions of urinary dimethylarsenic. In this cohort, As exposure was associated with several markers of increased cardiometabolic risk (diabetes, triglyceridemia, and cholesterolemia), but exposure was also associated with higher rather than lower HDL cholesterol.

Citation

Mendez MA, González-Horta C, Sánchez-Ramírez B, Ballinas-Casarrubias L, Hernández Cerón R, Viniegra Morales D, Baeza Terrazas FA, Ishida MC, Gutiérrez-Torres DS, Saunders RJ, Drobná Z, Fry RC, Buse JB, Loomis D, García-Vargas GG, Del Razo LM, Stýblo M. 2016. Chronic exposure to arsenic and markers of cardiometabolic risk: a cross-sectional study in Chihuahua, Mexico. Environ Health Perspect 124:104-111; http://dx.doi.org/10.1289/ehp.1408742.",2015-06-12 +30727408,First Report of Powdery Mildew Caused by Erysiphe betae on the Invasive Weed Dysphania ambrosioides in Korea.,"Dysphania ambrosioides (L.) Mosyakin & Clemants (formerly Chenopodium ambrosioides L.), commonly known as epazote, is an herb that is native to Central America, South America, and southern Mexico. As well as in its native areas, it is used as an herb, tea, and food commodity in warm temperate to subtropical areas of Europe, the United States, and Asia. In Korea, however, this plant was accidentally introduced around the 1970s and has become widely naturalized by replacing indigenous plants and disrupting native ecosystems (3). Since 2006, powdery mildew infections of epazote have been consistently found in the southern part of Korea, including Jeju Island. Specimens (n = 8) have been deposited in the Korea University Herbarium (KUS). White mycelial and conidial growth was present mostly on leaf surfaces with sparse growth on young stems and inflorescences. Severely infected leaves were malformed. Slight purplish discoloration was present on the leaves contiguous with colony growth. Mycelial colonies were conspicuous, amphigenous, and epiphytic. Appressoria on the mycelia were lobed. Conidiophores were 110 to 200 μm long and produced conidia singly. Conidia were hyaline, oblong-elliptical, measured 30 to 48 × 13 to 18 μm, lacked fibrosin bodies, and produced germ tubes on the subterminal position. Chasmothecia were amphigenous, scattered or partly clustered, dark brown, spherical, 110 to 130 μm in diameter, and contained four to seven asci. Appendages were mycelioid, numbered 50 to 80 per chasmothecium, 0.5 to 1.5 times as long as the chasmothecial diameter, one- to three-septate, and brown at the base while becoming paler toward the tip. Asci were short stalked, 60 to 75 × 30 to 38 μm, and contained three to five spores. Ascospores were ellipsoid-ovoid with dimensions of 20 to 28 × 14 to 18 μm. On the basis of these morphological features, this fungus was identified as Erysiphe betae (Vanha) Weltzien (1). To confirm the identification, the complete internal transcribed spacer (ITS) region of rDNA from KUS-F23213 was amplified with primers ITS5 and P3 and sequenced (4). The resulting sequence of 560 bp was deposited in GenBank (Accession No. JQ041419). A GenBank BLAST search with the current data showed >99% (558 of 560 bp) similarity with the results for E. betae ex Beta vulgaris (sugar beet). Therefore, the sequence analysis verified the pathogen to be E. betae. Previous epazote infections by E. betae have been recorded in Argentina, Mexico, Romania, India, and Japan (1,2). In Taiwan, an epazote powdery mildew associated with Oidium erysiphoides f. sp. chenopodii J.M. Yen, an anamorph of E. betae, was recorded (1,2). To our knowledge, this is the first record of E. betae on epazote in Korea, and the first confirmation of epazote powdery mildew being identified as E. betae on the basis of holomorphic characteristics and ITS rDNA sequences. Our field observation suggests that the powdery mildew is acting as one of several limiting factors to suppress the expansion of this invasive weed in Korea. References: (1) U. Braun. Beih. Nova Hedw. 89:1, 1987. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , November 22, 2011. (3) C. G. Song and Y. H. Yang. The Naturalized Plants in Jeju Island. Nam-Jeju County, Jeju, Korea, 2005. (4) S. Takamatsu et al. Mycol. Res. 113:117, 2009.",2012-04-01 +25143305,Proteomic analyses of ethanol tolerance in Lactobacillus buchneri NRRL B-30929.,"The Lactobacillus buchneri NRRL B-30929 strain, isolated from a fuel ethanol (EtOH) production facility, exhibits high tolerance to environmental EtOH concentrations. This study aimed to identify proteins produced by B-30929 in response to environmental EtOH. Cellular proteins expressed by B-30929 growing in media with 10 versus 0% EtOH were compared by 2DE, followed by in-gel digestion and MALDI-MS analyses. Twenty EtOH responsive proteins were identified. These include a proline-specific peptidase (Lbuc_1852); a membrane protein (Lbuc_0921), two general stress-related proteins including a 10 kDa chaperonin (GroESL Lbuc_1359) and a 29 kDa member of the HK 97 family (Lbuc_1523); metabolic enzymes involving redox potential balances (Lbuc_2051 and Lbuc_0522) and carbohydrate fermentation (Lbuc_1319 and Lbuc_2157); nitrogen, amino acid, and fatty acid metabolism proteins (Lbuc_1994, Lbuc_0446, Lbuc_0858, Lbuc_0707, and Lbuc_0787). These changes suggested B-30929 cells respond to EtOH by degradation of available proteins and fatty acids and increased production of specific enzymes and molecular chaperons. These results can be used to guide genetic modifications to increase EtOH tolerance in industrial biocatalysts. The data have been deposited to World-2DPAGE (http://world-2dpage.expasy.org/repository/0068/; username liu, password 1h8d6Mg1).",2014-09-22 +25312811,Spread and impact of the Schmallenberg virus epidemic in France in 2012-2013.,"

Background

The Schmallenberg virus (SBV) emerged in Europe in 2011 and caused a widespread epidemic in ruminants.In France, SBV emergence was monitored through a national multi-stakeholder surveillance and investigation system. Based on the monitoring data collected from January 2012 to August 2013, we describe the spread of SBV in France during two seasons of dissemination (vector seasons 2011 and 2012) and we provide a large-scale assessment of the impact of this new disease in ruminants.

Results

SBV impact in infected herds was primarily due to the birth of stillborns or deformed foetuses and neonates. Congenital SBV morbidity level was on average moderate, although higher in sheep than in other ruminant species. On average, 8% of lambs, 3% of calves and 2% of kids born in SBV-infected herds showed typical congenital SBV deformities. In addition, in infected herds, farmers reported retrospectively a lower prolificacy during the vector season, suggesting a potential impact of acute SBV infection during mating and early stages of gestation.

Conclusions

Due to the lack of available control and prevention measures, SBV spread quickly in the naive ruminant population. France continues to monitor for SBV, and updated information is made available online on a regular basis [http://www.plateforme-esa.fr/]. Outbreaks of congenital SBV are expected to occur sporadically from now on, but further epidemics may also occur if immunity at population level declines.",2014-10-14 +21544197,CORE: a phylogenetically-curated 16S rDNA database of the core oral microbiome.,"Comparing bacterial 16S rDNA sequences to GenBank and other large public databases via BLAST often provides results of little use for identification and taxonomic assignment of the organisms of interest. The human microbiome, and in particular the oral microbiome, includes many taxa, and accurate identification of sequence data is essential for studies of these communities. For this purpose, a phylogenetically curated 16S rDNA database of the core oral microbiome, CORE, was developed. The goal was to include a comprehensive and minimally redundant representation of the bacteria that regularly reside in the human oral cavity with computationally robust classification at the level of species and genus. Clades of cultivated and uncultivated taxa were formed based on sequence analyses using multiple criteria, including maximum-likelihood-based topology and bootstrap support, genetic distance, and previous naming. A number of classification inconsistencies for previously named species, especially at the level of genus, were resolved. The performance of the CORE database for identifying clinical sequences was compared to that of three publicly available databases, GenBank nr/nt, RDP and HOMD, using a set of sequencing reads that had not been used in creation of the database. CORE offered improved performance compared to other public databases for identification of human oral bacterial 16S sequences by a number of criteria. In addition, the CORE database and phylogenetic tree provide a framework for measures of community divergence, and the focused size of the database offers advantages of efficiency for BLAST searching of large datasets. The CORE database is available as a searchable interface and for download at http://microbiome.osu.edu.",2011-04-22 +24316579,The 2014 Nucleic Acids Research Database Issue and an updated NAR online Molecular Biology Database Collection.,"The 2014 Nucleic Acids Research Database Issue includes descriptions of 58 new molecular biology databases and recent updates to 123 databases previously featured in NAR or other journals. For convenience, the issue is now divided into eight sections that reflect major subject categories. Among the highlights of this issue are six databases of the transcription factor binding sites in various organisms and updates on such popular databases as CAZy, Database of Genomic Variants (DGV), dbGaP, DrugBank, KEGG, miRBase, Pfam, Reactome, SEED, TCDB and UniProt. There is a strong block of structural databases, which includes, among others, the new RNA Bricks database, updates on PDBe, PDBsum, ArchDB, Gene3D, ModBase, Nucleic Acid Database and the recently revived iPfam database. An update on the NCBI's MMDB describes VAST+, an improved tool for protein structure comparison. Two articles highlight the development of the Structural Classification of Proteins (SCOP) database: one describes SCOPe, which automates assignment of new structures to the existing SCOP hierarchy; the other one describes the first version of SCOP2, with its more flexible approach to classifying protein structures. This issue also includes a collection of articles on bacterial taxonomy and metagenomics, which includes updates on the List of Prokaryotic Names with Standing in Nomenclature (LPSN), Ribosomal Database Project (RDP), the Silva/LTP project and several new metagenomics resources. The NAR online Molecular Biology Database Collection, http://www.oxfordjournals.org/nar/database/c/, has been expanded to 1552 databases. The entire Database Issue is freely available online on the Nucleic Acids Research website (http://nar.oxfordjournals.org/).",2013-12-06 +24170502,"Modeling bioconcentration factor (BCF) using mechanistically interpretable descriptors computed from open source tool ""PaDEL-Descriptor"".","Predictive regression-based models for bioconcentration factor (BCF) have been developed using mechanistically interpretable descriptors computed from open source tool PaDEL-Descriptor ( http://padel.nus.edu.sg/software/padeldescriptor/ ). A data set of 522 diverse chemicals has been used for this modeling study, and extended topochemical atom (ETA) indices developed by the present authors' group were chosen as the descriptors. Due to the importance of lipohilicity in modeling BCF, XLogP (computed partition coefficient) was also tried as an additional descriptor. Genetic function approximation followed by multiple linear regression algorithm was applied to select descriptors, and subsequent partial least squares analyses were performed to establish mathematical equations for BCF prediction. The model generated from only ETA indices shows importance of seven descriptors in model development, while the model generated from ETA descriptors along with XlogP shows importance of four descriptors in model development. In general, BCF depends on lipophilicity, presence of heteroatoms, presence of halogens, fused ring system, hydrogen bonding groups, etc. The developed models show excellent statistical qualities and predictive ability. The developed models were used also for prediction of an external data set available from the literature, and good quality of predictions (R (2) pred = 0.812 and 0.826) was demonstrated. Thus, BCF can be predicted using ETA and XlogP descriptors calculated from open source PaDEL-Descriptor software in the context of aquatic chemical toxicity management.",2013-10-30 +24311565,mVOC: a database of microbial volatiles.,"Scents are well known to be emitted from flowers and animals. In nature, these volatiles are responsible for inter- and intra-organismic communication, e.g. attraction and defence. Consequently, they influence and improve the establishment of organisms and populations in ecological niches by acting as single compounds or in mixtures. Despite the known wealth of volatile organic compounds (VOCs) from species of the plant and animal kingdom, in the past, less attention has been focused on volatiles of microorganisms. Although fast and affordable sequencing methods facilitate the detection of microbial diseases, however, the analysis of signature or fingerprint volatiles will be faster and easier. Microbial VOCs (mVOCs) are presently used as marker to detect human diseases, food spoilage or moulds in houses. Furthermore, mVOCs exhibited antagonistic potential against pathogens in vitro, but their biological roles in the ecosystems remain to be investigated. Information on volatile emission from bacteria and fungi is presently scattered in the literature, and no public and up-to-date collection on mVOCs is available. To address this need, we have developed mVOC, a database available online at http://bioinformatics.charite.de/mvoc.",2013-12-05 +24907367,MAGI: a Node.js web service for fast microRNA-Seq analysis in a GPU infrastructure.,"

Summary

MAGI is a web service for fast MicroRNA-Seq data analysis in a graphics processing unit (GPU) infrastructure. Using just a browser, users have access to results as web reports in just a few hours->600% end-to-end performance improvement over state of the art. MAGI's salient features are (i) transfer of large input files in native FASTA with Qualities (FASTQ) format through drag-and-drop operations, (ii) rapid prediction of microRNA target genes leveraging parallel computing with GPU devices, (iii) all-in-one analytics with novel feature extraction, statistical test for differential expression and diagnostic plot generation for quality control and (iv) interactive visualization and exploration of results in web reports that are readily available for publication.

Availability and implementation

MAGI relies on the Node.js JavaScript framework, along with NVIDIA CUDA C, PHP: Hypertext Preprocessor (PHP), Perl and R. It is freely available at http://magi.ucsd.edu.",2014-06-06 +24048352,Optimizing a global alignment of protein interaction networks.,"

Motivation

The global alignment of protein interaction networks is a widely studied problem. It is an important first step in understanding the relationship between the proteins in different species and identifying functional orthologs. Furthermore, it can provide useful insights into the species' evolution.

Results

We propose a novel algorithm, PISwap, for optimizing global pairwise alignments of protein interaction networks, based on a local optimization heuristic that has previously demonstrated its effectiveness for a variety of other intractable problems. PISwap can begin with different types of network alignment approaches and then iteratively adjust the initial alignments by incorporating network topology information, trading it off for sequence information. In practice, our algorithm efficiently refines other well-studied alignment techniques with almost no additional time cost. We also show the robustness of the algorithm to noise in protein interaction data. In addition, the flexible nature of this algorithm makes it suitable for different applications of network alignment. This algorithm can yield interesting insights into the evolutionary dynamics of related species.

Availability

Our software is freely available for non-commercial purposes from our Web site, http://piswap.csail.mit.edu/.

Contact

bab@csail.mit.edu or csliao@ie.nthu.edu.tw.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-09-17 +25790785,Computationally predicting protein-RNA interactions using only positive and unlabeled examples.,"Protein-RNA interactions (PRIs) are considerably important in a wide variety of cellular processes, ranging from transcriptional and post-transcriptional regulations of gene expression to the active defense of host against virus. With the development of high throughput technology, large amounts of PRI information is available for computationally predicting unknown PRIs. In recent years, a number of computational methods for predicting PRIs have been developed in the literature, which usually artificially construct negative samples based on verified nonredundant datasets of PRIs to train classifiers. However, such negative samples are not real negative samples, some even may be unknown positive samples. Consequently, the classifiers trained with such training datasets cannot achieve satisfactory prediction performance. In this paper, we propose a novel method PRIPU that employs biased-support vector machine (SVM) for predicting Protein-RNA Interactions using only Positive and Unlabeled examples. To the best of our knowledge, this is the first work that predicts PRIs using only positive and unlabeled samples. We first collect known PRIs as our benchmark datasets and extract sequence-based features to represent each PRI. To reduce the dimension of feature vectors for lowering computational cost, we select a subset of features by a filter-based feature selection method. Then, biased-SVM is employed to train prediction models with different PRI datasets. To evaluate the new method, we also propose a new performance measure called explicit positive recall (EPR), which is specifically suitable for the task of learning positive and unlabeled data. Experimental results over three datasets show that our method not only outperforms four existing methods, but also is able to predict unknown PRIs. Source code, datasets and related documents of PRIPU are available at: http://admis.fudan.edu.cn/projects/pripu.htm .",2015-02-08 +22689387,SAAP-RRBS: streamlined analysis and annotation pipeline for reduced representation bisulfite sequencing.,"

Unlabelled

Reduced representation bisulfite sequencing (RRBS) is a cost-effective approach for genome-wide methylation pattern profiling. Analyzing RRBS sequencing data is challenging and specialized alignment/mapping programs are needed. Although such programs have been developed, a comprehensive solution that provides researchers with good quality and analyzable data is still lacking. To address this need, we have developed a Streamlined Analysis and Annotation Pipeline for RRBS data (SAAP-RRBS) that integrates read quality assessment/clean-up, alignment, methylation data extraction, annotation, reporting and visualization. This package facilitates a rapid transition from sequencing reads to a fully annotated CpG methylation report to biological interpretation.

Availability and implementation

SAAP-RRBS is freely available to non-commercial users at the web site http://ndc.mayo.edu/mayo/research/biostat/stand-alone-packages.cfm.",2012-06-10 +24843289,URJC GB dataset: Community-based seed bank of Mediterranean high-mountain and semi-arid plant species at Universidad Rey Juan Carlos (Spain).,"The Germplasm Bank of Universidad Rey Juan Carlos was created in 2008 and currently holds 235 accessions and 96 species. This bank focuses on the conservation of wild-plant communities and aims to conserve ex situ a representative sample of the plant biodiversity present in a habitat, emphasizing priority ecosystems identified by the Habitats Directive. It is also used to store plant material for research and teaching purposes. The collection consists of three subcollections, two representative of typical habitats in the center of the Iberian Peninsula: high-mountain pastures (psicroxerophylous pastures) and semi-arid habitats (gypsophylic steppes), and a third representative of the genus Lupinus. The high-mountain subcollection currently holds 153 accessions (63 species), the semi-arid subcollection has 76 accessions (29 species,) and the Lupinus subcollection has 6 accessions (4 species). All accessions are stored in a freezer at -18 °C in Kilner jars with silica gel. The Germplasm Bank of Universidad Rey Juan Carlos follows a quality control protocol which describes the workflow performed with seeds from seed collection to storage. All collectors are members of research groups with great experience in species identification. Herbarium specimens associated with seed accessions are preserved and 63% of the records have been georreferenced with GPS and radio points. The dataset provides unique information concerning the location of populations of plant species that form part of the psicroxerophylous pastures and gypsophylic steppes of Central Spain as well as populations of genus Lupinus in the Iberian Peninsula. It also provides relevant information concerning mean seed weight and seed germination values under specific incubation conditions. This dataset has already been used by researchers of the Area of Biodiversity and Conservation of URJC as a source of information for the design and implementation of experimental designs in these plant communities. Since they are all active subcollections in continuous growth, data is updated regularly every six months and the latest version can be accessed through the GBIF data portal at http://www.gbif.es:8080/ipt/resource.do?r=germoplasma-urjc. This paper describes the URJC Germplasm Bank and its associated dataset with the aim of disseminating the dataset and explaining how it was derived.",2014-03-25 +24602174,The Semanticscience Integrated Ontology (SIO) for biomedical research and knowledge discovery.,"The Semanticscience Integrated Ontology (SIO) is an ontology to facilitate biomedical knowledge discovery. SIO features a simple upper level comprised of essential types and relations for the rich description of arbitrary (real, hypothesized, virtual, fictional) objects, processes and their attributes. SIO specifies simple design patterns to describe and associate qualities, capabilities, functions, quantities, and informational entities including textual, geometrical, and mathematical entities, and provides specific extensions in the domains of chemistry, biology, biochemistry, and bioinformatics. SIO provides an ontological foundation for the Bio2RDF linked data for the life sciences project and is used for semantic integration and discovery for SADI-based semantic web services. SIO is freely available to all users under a creative commons by attribution license. See website for further information: http://sio.semanticscience.org.",2014-03-06 +23979084,The transfer of drugs and therapeutics into human breast milk: an update on selected topics.,"Many mothers are inappropriately advised to discontinue breastfeeding or avoid taking essential medications because of fears of adverse effects on their infants. This cautious approach may be unnecessary in many cases, because only a small proportion of medications are contraindicated in breastfeeding mothers or associated with adverse effects on their infants. Information to inform physicians about the extent of excretion for a particular drug into human milk is needed but may not be available. Previous statements on this topic from the American Academy of Pediatrics provided physicians with data concerning the known excretion of specific medications into breast milk. More current and comprehensive information is now available on the Internet, as well as an application for mobile devices, at LactMed (http://toxnet.nlm.nih.gov). Therefore, with the exception of radioactive compounds requiring temporary cessation of breastfeeding, the reader will be referred to LactMed to obtain the most current data on an individual medication. This report discusses several topics of interest surrounding lactation, such as the use of psychotropic therapies, drugs to treat substance abuse, narcotics, galactagogues, and herbal products, as well as immunization of breastfeeding women. A discussion regarding the global implications of maternal medications and lactation in the developing world is beyond the scope of this report. The World Health Organization offers several programs and resources that address the importance of breastfeeding (see http://www.who.int/topics/breastfeeding/en/).",2013-08-26 +24304895,BacMet: antibacterial biocide and metal resistance genes database.,"Antibiotic resistance has become a major human health concern due to widespread use, misuse and overuse of antibiotics. In addition to antibiotics, antibacterial biocides and metals can contribute to the development and maintenance of antibiotic resistance in bacterial communities through co-selection. Information on metal and biocide resistance genes, including their sequences and molecular functions, is, however, scattered. Here, we introduce BacMet (http://bacmet.biomedicine.gu.se)--a manually curated database of antibacterial biocide- and metal-resistance genes based on an in-depth review of the scientific literature. The BacMet database contains 470 experimentally verified resistance genes. In addition, the database also contains 25 477 potential resistance genes collected from public sequence repositories. All resistance genes in the BacMet database have been organized according to their molecular function and induced resistance phenotype.",2013-12-03 +23637070,glyXalign: high-throughput migration time alignment preprocessing of electrophoretic data retrieved via multiplexed capillary gel electrophoresis with laser-induced fluorescence detection-based glycoprofiling.,"Glycomics has become a rapidly emerging field and monitoring of protein glycosylation is needed to ensure quality and consistency during production processes of biologicals such as therapeutic antibodies or vaccines. Glycoanalysis via multiplexed CGE with LIF detection (xCGE-LIF) represents a powerful technique featuring high resolution, high sensitivity as well as high-throughput performance. However, sample data retrieved from this method exhibit challenges for downstream computational analysis due to intersample migration time shifts as well as stretching and compression of electropherograms. Here, we present glyXalign, a freely available and easy-to-use software package to automatically correct for distortions in xCGE-LIF based glycan data. We demonstrate its ability to outperform conventional algorithms such as dynamic time warping and correlation optimized warping in terms of processing time and alignment accuracy for high-resolution datasets. Built upon a set of rapid algorithms, the tool includes an intuitive graphical user interface and allows full control over all parameters. Additionally, it visualizes the alignment process and enables the user to readjust misaligned results. Software and documentation are available at http://www.glyxera.com.",2013-07-08 +22044556,Global robotic experience and the type of surgical system impact the types of robotic malfunctions and their clinical consequences: an FDA MAUDE review.,"

Objectives

To assess annual rates of robotic system malfunctions and compare the da Vinci S(®) system (dVS) and da Vinci(®) surgical system (dV). To assess the types of malfunctions and associated outcomes for robotic cases and determine the extent to which experience and technological improvements impact these.

Patients and methods

This study is a retrospective review of the US Food and Drug Administration (FDA) MAUDE (Manufacturer and User Facility Device Experience) database, a publicly available, voluntary reporting system (http://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/search.cfm). The database was searched using the two phrases 'da Vinci' and 'Intuitive Surgical' from 2003 to 2009. Malfunctions of the instruments, console, patient-side cart, camera and cannula were recorded. Data on intraoperative injuries, case delays and conversions were also collected.

Results

In all, 1914 reports were reviewed (991 dVS and 878 dV, 45 unclassified) with peak years for reports of 2008 for dVS (571) and 2007 for dV (211), P < 0.001. With respect to time, the proportion of console and patient-side cart malfunctions declined from 2007 onward compared with the proportions prior to 2007 (5.1% vs 9.4% and 6.6% vs 10.9%). Patient injury did not change with year of surgery (0.5-5.4% of malfunctions, P= 0.358), open conversions declined (21.3% of malfunctions before 2007 vs 9.9% from 2007 onward, P < 0.001) and patient deaths increased (0.0013% of cases before 2007 vs 0.0061% of cases from 2007 onward, P < 0.001). With regard to robotic system, console and patient-side cart malfunctions were more frequent with the dV than the dVS: 82/878 vs 39/991 and 100/878 vs 48/991, P < 0.001. Open conversion was more frequent with dV than dVS (19.3% vs 7.7% of reported malfunctions, P < 0.001), while patient injury was less with dV than dVS (3.5% vs 5.9%, P= 0.021).

Conclusions

The dVS decreased console and patient-side cart errors relative to total malfunctions, which were also influenced by surgical year. Open conversions were reduced by increased robotic experience and newer surgical system. Differences in patient injury may reflect changes in reporting or case complexity.",2011-11-01 +23375235,Learning the local Bayesian network structure around the ZNF217 oncogene in breast tumours.,"In this study, we discuss and apply a novel and efficient algorithm for learning a local Bayesian network model in the vicinity of the ZNF217 oncogene from breast cancer microarray data without having to decide in advance which genes have to be included in the learning process. ZNF217 is a candidate oncogene located at 20q13, a chromosomal region frequently amplified in breast and ovarian cancer, and correlated with shorter patient survival in these cancers. To properly address the difficulties in managing complex gene interactions given our limited sample, statistical significance of edge strengths was evaluated using bootstrapping and the less reliable edges were pruned to increase the network robustness. We found that 13 out of the 35 genes associated with deregulated ZNF217 expression in breast tumours have been previously associated with survival and/or prognosis in cancers. Identifying genes involved in lipid metabolism opens new fields of investigation to decipher the molecular mechanisms driven by the ZNF217 oncogene. Moreover, nine of the 13 genes have already been identified as putative ZNF217 targets by independent biological studies. We therefore suggest that the algorithms for inferring local BNs are valuable data mining tools for unraveling complex mechanisms of biological pathways from expression data. The source code is available at http://www710.univ-lyon1.fr/∼aaussem/Software.html.",2013-01-31 +22804616,"A critical appraisal of techniques, software packages, and standards for quantitative proteomic analysis.","New methods for performing quantitative proteome analyses based on differential labeling protocols or label-free techniques are reported in the literature on an almost monthly basis. In parallel, a correspondingly vast number of software tools for the analysis of quantitative proteomics data has also been described in the literature and produced by private companies. In this article we focus on the review of some of the most popular techniques in the field and present a critical appraisal of several software packages available to process and analyze the data produced. We also describe the importance of community standards to support the wide range of software, which may assist researchers in the analysis of data using different platforms and protocols. It is intended that this review will serve bench scientists both as a useful reference and a guide to the selection and use of different pipelines to perform quantitative proteomics data analysis. We have produced a web-based tool ( http://www.proteosuite.org/?q=other_resources ) to help researchers find appropriate software for their local instrumentation, available file formats, and quantitative methodology.",2012-07-17 +23275695,Simplifier: a web tool to eliminate redundant NGS contigs.,"

Unlabelled

Modern genomic sequencing technologies produce a large amount of data with reduced cost per base; however, this data consists of short reads. This reduction in the size of the reads, compared to those obtained with previous methodologies, presents new challenges, including a need for efficient algorithms for the assembly of genomes from short reads and for resolving repetitions. Additionally after abinitio assembly, curation of the hundreds or thousands of contigs generated by assemblers demands considerable time and computational resources. We developed Simplifier, a stand-alone software that selectively eliminates redundant sequences from the collection of contigs generated by ab initio assembly of genomes. Application of Simplifier to data generated by assembly of the genome of Corynebacterium pseudotuberculosis strain 258 reduced the number of contigs generated by ab initio methods from 8,004 to 5,272, a reduction of 34.14%; in addition, N50 increased from 1 kb to 1.5 kb. Processing the contigs of Escherichia coli DH10B with Simplifier reduced the mate-paired library 17.47% and the fragment library 23.91%. Simplifier removed redundant sequences from datasets produced by assemblers, thereby reducing the effort required for finalization of genome assembly in tests with data from Prokaryotic organisms.

Availability

Simplifier is available at http://www.genoma.ufpa.br/rramos/softwares/simplifier.xhtmlIt requires Sun jdk 6 or higher.",2012-10-13 +21456054,Cloning and functional characterization of the UDP-glucosyltransferase UgtB1 involved in sophorolipid production by Candida bombicola and creation of a glucolipid-producing yeast strain.,"Sophorolipids produced by the non-pathogenic yeast Candida bombicola ATCC 22214 are glycolipid biosurfactants applied commercially as biodegradable and eco-friendly detergents. Their low cell toxicity, excellent wetting capability and antimicrobial activity attract the attention of high-value markets, such as the cosmetic and pharmaceutical industries. Although sophorolipid production yields have been increased by the optimization of fermentation parameters and feed sources, the biosynthetic pathway and genetic mechanism behind sophorolipid production still remains unclear. Here we identify a UDP-glucosyltransferase gene, UGTB1, with a key function in this economically important pathway. The protein shows sequence and structural homology to several bacterial glycosyltransferases involved in macrolide antibiotic synthesis. Deletion of UGTB1 in C. bombicola did not affect cell growth and resulted in a yeast producing glucolipids, thereby opening the route for in vivo production of these glycolipid intermediates. Activity assays on cell lysates confirmed that the identified gene is responsible for the second glucosylation step during sophorolipid production and illustrated that sophorolipid production in C. bombicola involves the stepwise action of two independent glucosyltransferases. The complete UGTB1 sequence data have been submitted to the GenBank database (http://www.ncbi.nlm.nih.gov) under Accession No. HM440974.",2011-01-16 +25551621,Decreased expression of microRNA-126 is associated with poor prognosis in patients with cervical cancer.,"

Background

MicroRNA-126(miR-126) has been shown to be frequently down-regulated in a variety of malignancies and act as a potential tumor suppressor. However, its correlations with the clinicopathological characters of cervical cancer remain unclear.

Methods

TaqMan quantitative RT-PCR was used to determine the expression level of miR-126 in tissue samples. The associations of miR-126 expression with clinicopathologic variables were analyzed. Kaplan-Meier survival analysis was performed to analyze the association of miR-126 expression with overall survival (OS) of patients. Univariate and multivariate Cox regression analyses were performed.

Results

miR-126 expression level in human cervical cancer tissues was significantly lower than that in adjacent nontumorous tissues (mean ± SD: 0.59 ± 0.44 vs. 1.00 ± 0.51, P < 0.0001). Decreased miR-126 expression in cervical cancer was found to be significantly associated with lymphatic invasion (P = 0.002), distant metastasis (P < 0.001), FIGO stage (P = 0.009), and histological grade (P = 0.005). Kaplan-Meier analysis showed that patients with lower levels of miR-126 had significantly poorer survival than those with higher expression of this miRNA in patients, with a 5-year OS of 45.7% and 70.9%, respectively (P = 0.002). Multivariate analysis revealed that miR-126 expression (HR = 3.97, 95% CI: 2.01-20.22; P = 0.003) was independently associated with the OS.

Conclusion

Our data suggests the potential of miR-126 as a prognostic biomarker for cervical cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_220.",2014-12-31 +26714369,WHO Expert Committee on Biological Standardization. Sixty-fifth report.,"This report presents the recommendations of a WHO Expert Committee commissioned to coordinate activities leading to the adoption of international recommendations for the production and control of vaccines and other biological substances, and the establishment of international biological reference materials. Following a brief introduction, the report summarizes a number of general issues brought to the attention of the Committee. The next part of the report, of particular relevance to manufacturers and national regulatory authorities, outlines the discussions held on the development and adoption of new and revised WHO Recommendations, Guidelines and guidance documents. Following these discussions, a WHO guidance document on the Scientific principles for regulatory risk evaluation on finding an adventitious agent in a marketed vaccine was adopted along with WHO Guidelines on procedures and data requirements for changes to approved vaccines and revised WHO Recommendations to assure the quality, safety and efficacy of poliomyelitis vaccines (inactivated). Subsequent sections of the report provide information on the current status and proposed development of international reference materials in the areas of antibiotics; biotherapeutics other than blood products; blood products and related substances; in vitro diagnostic device reagents; and vaccines and related substances. A series of annexes are then presented which include an updated list of all WHO Recommendations, Guidelines and other documents on biological substances used in medicine (Annex 1) followed by the above three WHO documents adopted on the advice of the Committee (Annexes 2-4). All additions and discontinuations made during the 2014 meeting to the list of International Standards, Reference Reagents and Reference Panels for biological substances maintained by WHO are summarized in Annex 5. The updated full catalogue of WHO International Reference Preparations is available at: http://www.who.int/bloodproducts/catalogue/en/.",2015-01-01 +24990610,ccSOL omics: a webserver for solubility prediction of endogenous and heterologous expression in Escherichia coli.,"

Summary

Here we introduce ccSOL omics, a webserver for large-scale calculations of protein solubility. Our method allows (i) proteome-wide predictions; (ii) identification of soluble fragments within each sequences; (iii) exhaustive single-point mutation analysis.

Results

Using coil/disorder, hydrophobicity, hydrophilicity, β-sheet and α-helix propensities, we built a predictor of protein solubility. Our approach shows an accuracy of 79% on the training set (36 990 Target Track entries). Validation on three independent sets indicates that ccSOL omics discriminates soluble and insoluble proteins with an accuracy of 74% on 31 760 proteins sharing <30% sequence similarity.

Availability and implementation

ccSOL omics can be freely accessed on the web at http://s.tartaglialab.com/page/ccsol_group. Documentation and tutorial are available at http://s.tartaglialab.com/static_files/shared/tutorial_ccsol_omics.html.

Contact

gian.tartaglia@crg.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-01 +30716823,First Report of Leaf Spot on Switchgrass Caused by Pithomyces chartarum in the United States.,"There are few reports on diseases of switchgrass. In November 2009, light brown to white bleached spots (1 to 2 × 3 to 4 μm) were observed on 'Alamo' switchgrass (Panicum virgatum L.) grown in a growth chamber in Knoxville, TN, from surface-disinfested seed produced in Colorado. Symptomatic leaf tissue was surface sterilized, air dried, and plated on 2% water agar (WA) amended with 6.9 mg fenpropathrin/liter (Danitol 2.4 EC, Valent Chemical, Walnut Creek, CA) and 10 mg/liter rifampicin (Sigma-Aldrich, St. Louis, MO). Plates were incubated at 26°C in the dark for 5 days. A sporulating, dematiaceous, mitosporic fungus was observed and transferred to potato dextrose agar. Colonies were white to gray, with brown as conidia increased. Conidia ranged in size from 10 to 22.5 × 20 to 37.5 (average 15.2 × 26.5) μm. Conidia were golden to dark brown, broadly ellipsoidal, some pyriform, with one longitudinal septum and two to three transverse septa, sometimes constricted at the transverse septa. Based on microscopic examination, the fungus was identified as Pithomyces chartarum (Berk. & Curt.) M.B. Ellis (1); observations were consistent with the authority (2). Pathogenicity assays were conducted with 5-week-old 'Alamo' switchgrass grown from seed scarified with 60% sulfuric acid and surface-sterilized with 50% bleach. Seed were sown in 9 × 9-cm pots containing 50% (v/v) ProMix Potting and Seeding Mix (Premier Tech Horticulture, Québec, Canada) and 50% Turface ProLeague (Profile Products, Buffalo Grove, IL). Eight replicate pots with ~20 plants each were sprayed with a spore suspension of 5.7 × 105 spores/ml sterile water prepared from 6-day-old cultures grown on V8 juice agar in the dark. Two more pots were sprayed with sterile water to serve as controls. All plants were subjected to high humidity for 72 h by enclosure in a plastic bag. Plants were placed in a growth chamber at 25/20°C with a 12-h photoperiod. Leaf spot symptoms similar to the original disease were evident on plants in each of the eight replicate pots 6 to 10 days post-inoculation. Control plants had no symptoms. Lesions were excised from leaves, surface sterilized, and plated on WA. The resulting cultures were again identified as P. chartarum based on morphology. The internal transcribed spacer (ITS) region of rDNA from the original isolate and the pathogen recovered from plants in the pathogenicity tests were amplified with PCR using primers ITS4 and ITS5. PCR amplicons were obtained from both isolates, sequenced, and found to have 100% identity. A 580-bp sequence was deposited at GenBank (Accession No. JQ406588). The nucleotide sequence had 98 to 100% identity to the ITS sequences of isolates of Leptosphaerulina chartarum (anamorph: P. chartarum), including isolate Mxg-KY09-s4 (GU195649) from leaf spot on Miscanthus × giganteus in Kentucky (1), and isolates from leaf lesions on wheat (EF489400 and JX442978). To our knowledge, leaf spot caused by P. chartarum has not been described on switchgrass (3). Pithomyces chartarum is a seedborne pathogen of switchgrass, and may play a role in stand establishment. References: (1) M. O. Ahonsi et al. Plant Dis. 94:480, 2010. (2) M. B. Ellis. Dematiaceous Hyphomycetes. Commonwealth Mycological Institute, Kew, Surrey, England. 1971. (3) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA, Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 18 January 2013.",2013-12-01 +30716824,First Report of Leaf Spot caused by Bipolaris oryzae on Switchgrass in Tennessee.,"Knowledge of pathogens in switchgrass, a potential biofuels crop, is limited. In December 2007, dark brown to black irregularly shaped foliar spots were observed on 'Alamo' switchgrass (Panicum virgatum L.) on the campus of the University of Tennessee. Symptomatic leaf samples were surface-sterilized (95% ethanol, 1 min; 20% commercial bleach, 3 min; 95% ethanol, 1 min), rinsed in sterile water, air-dried, and plated on 2% water agar amended with 3.45 mg fenpropathrin/liter (Danitol 2.4 EC, Valent Chemical, Walnut Creek, CA) and 10 mg/liter rifampicin (Sigma-Aldrich, St. Louis, MO). A sparsely sporulating, dematiaceous mitosporic fungus was observed. Fungal plugs were transferred to surface-sterilized detached 'Alamo' leaves on sterile filter paper in a moist chamber to increase spore production. Conidia were ovate, oblong, mostly straight to slightly curved, and light to olive-brown with 3 to 10 septa. Conidial dimensions were 12.5 to 17 × 27.5 to 95 (average 14.5 × 72) μm. Conidiophores were light brown, single, multiseptate, and geniculate. Conidial production was polytretic. Morphological characteristics and disease symptoms were similar to those described for Bipolaris oryzae (Breda de Haan) Shoemaker (2). Disease assays were done with 6-week-old 'Alamo' switchgrass grown from seed scarified with 60% sulfuric acid and surface-sterilized in 50% bleach. Nine 9 × 9-cm square pots with approximately 20 plants per pot were inoculated with a mycelial slurry (due to low spore production) prepared from cultures grown on potato dextrose agar for 7 days. Cultures were flooded with sterile water and rubbed gently to loosen mycelium. Two additional pots were inoculated with sterile water and subjected to the same conditions to serve as controls. Plants were exposed to high humidity by enclosure in a plastic bag for 72 h. Bags were removed, and plants were incubated at 25/20°C with 50 to 60% relative humidity. During the disease assay, plants were kept in a growth chamber with a 12-h photoperiod of fluorescent and incandescent lighting. Foliar leaf spot symptoms appeared 5 to 14 days post-inoculation for eight of nine replicates. Control plants had no symptoms. Symptomatic leaf tissue was processed and plated as described above. The original fungal isolate and the pathogen recovered in the disease assay were identified using internal transcribed spacer (ITS) region sequences. The ITS region of rDNA was amplified with PCR and primer pairs ITS4 and ITS5 (4). PCR amplicons of 553 bp were sequenced, and sequences from the original isolate and the reisolated pathogen were identical (GenBank Accession No. JQ237248). The sequence had 100% nucleotide identity to B. oryzae from switchgrass in Mississippi (GU222690, GU222691, GU222692, and GU222693) and New York (JF693908). Leaf spot caused by B. oryzae on switchgrass has also been described in North Dakota (1) and was seedborne in Mississippi (3). To our knowledge, this is the first report of B. oryzae from switchgrass in Tennessee. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/, 28 June 2012. (2) J. M. Krupinsky et al. Can. J. Plant Pathol. 26:371, 2004. (3) M. Tomaso-Peterson and C. J. Balbalian. Plant Dis. 94:643, 2010. (4) T. J. White et al. Pages 315-322 in: PCR Protocols: a Guide to Methods and Applications. M. A. Innis et al. (eds), Acad. Press, San Diego, 1990.",2013-12-01 +30716847,First Report of Pepper Fruit Rot Caused by Fusarium concentricum in China.,"Pepper (Capsicum annuum L.) is an important vegetable crop worldwide. Some Fusarium species can cause pepper fruit rot, leading to significant yield losses of pepper production and, for some Fusarium species, potential risk of mycotoxin contamination. A total of 106 diseased pepper fruit samples were collected from various pepper cultivars from seven provinces (Gansu, Hainan, Heilongjiang, Hunan, Shandong, Shanghai, and Zhejiang) in China during the 2012 growing season, where pepper production occurs on approximately 25,000 ha. Pepper fruit rot symptom incidence ranged from 5 to 20% in individual fields. Symptomatic fruit tissue was surface-sterilized in 0.1% HgCl2 for 1 min, dipped in 70% ethanol for 30 s, then rinsed in sterilized distilled water three times, dried, and plated in 90 mm diameter petri dishes containing potato dextrose agar (PDA). After incubation for 5 days at 28°C in the dark, putative Fusarium colonies were purified by single-sporing. Forty-three Fusarium strains were isolated and identified to species as described previously (1,2). Morphological characteristics of one strain were identical to those of F. concentricum. Aerial mycelium was reddish-white with an average growth rate of 4.2 to 4.3 mm/day at 25°C in the dark on PDA. Pigments in the agar were formed in alternating red and orange concentric rings. Microconidia were 0- to 1-septate, mostly 0-septate, and oval, obovoid to allantoid. Macroconidia were relatively slender with no significant curvature, 3- to 5-septate, with a beaked apical cell and a foot-shaped basal cell. To confirm the species identity, the partial TEF gene sequence (646 bp) was amplified and sequenced (GenBank Accession No. KC816735). A BLASTn search with TEF gene sequences in NCBI and the Fusarium ID databases revealed 99.7 and 100% sequence identity, respectively, to known TEF sequences of F. concentricum. Thus, both morphological and molecular criteria supported identification of the strain as F. concentricum. This strain was deposited as Accession MUCL 54697 (http://bccm.belspo.be/about/mucl.php). Pathogenicity of the strain was confirmed by inoculating 10 wounded, mature pepper fruits that had been harvested 70 days after planting the cultivar Zhongjiao-5 with a conidial suspension (1 × 106 spores/ml), as described previously (3). A control treatment consisted of inoculating 10 pepper fruits of the same cultivar with sterilized distilled water. The fruit were incubated at 25°C in a moist chamber, and the experiment was repeated independently in triplicate. Initially, green to dark brown lesions were observed on the outer surface of inoculated fruit. Typical soft-rot symptoms and lesions were observed on the inner wall when the fruit were cut open 10 days post-inoculation. Some infected seeds in the fruits were grayish-black and covered by mycelium, similar to the original fruit symptoms observed at the sampling sites. The control fruit remained healthy after 10 days of incubation. The same fungus was isolated from the inoculated infected fruit using the method described above, but no fungal growth was observed from the control fruit. To our knowledge, this is the first report of F. concentricum causing a pepper fruit rot. References: (1) J. F. Leslie and B. A. Summerell. The Fusarium Laboratory Manual. Blackwell Publishing, Ames, IA, 2006. (2) K. O'Donnell et al. Proc. Nat. Acad. Sci. USA 95:2044, 1998. (3) Y. Yang et al. 2011. Int. J. Food Microbiol. 151:150, 2011.",2013-12-01 +25411684,"Geographic Variation in Potentially Preventable Hospitalizations for Acute and Chronic Conditions, 2005–2011","This Statistical Brief presents data from the Healthcare Cost and Utilization Project (HCUP) on the characteristics of potentially preventable hospitalizations from 2005 through 2011. The Agency for Healthcare Research and Quality (AHRQ) Prevention Quality Indicators (PQIs) were used to develop estimates of the number of potentially preventable hospitalizations for overall PQIs, acute PQIs, and chronic PQIs from 2005 through 2011. An earlier Statistical Brief on potentially preventable hospitalizations presented trends from 2005 through 2010 for adults and children. This Statistical Brief is the latest in a series on potentially preventable hospitalizations that have focused on a range of topics that include acute and chronic conditions, individuals who are dually eligible for Medicare and Medicaid, older adults, nationwide frequency and costs, racial and ethnic disparities,, and trends among adults and children from 1997–2004. (See http://www.hcup-us.ahrq.gov/reports/statbriefs/sb_preventable.jsp for a complete list of Statistical Briefs in this series.) Rates of hospitalization for acute PQIs were based on admissions for dehydration, bacterial pneumonia, and urinary tract infections. Rates of hospitalization for chronic PQIs were based on admissions for diabetes, angina, congestive heart failure, hypertension, asthma, and chronic obstructive pulmonary disease. The rates for potentially preventable hospitalizations are adjusted for age and sex. With respect to geographic characteristics, this Statistical Brief examines geographic region and urban and rural areas using four categories ranging from large metropolitan to remote rural areas. All differences between estimates noted in the text, table, and figures are statistically significant at the 0.05 level or better.",2014-11-21 +25552483,Radiotherapy combined with the immunocytokine L19-IL2 provides long-lasting antitumor effects.,"

Purpose

Radiotherapy modifies the tumor microenvironment and causes the release of tumor antigens, which can enhance the effect of immunotherapy. L19 targets the extra domain B (ED-B) of fibronectin, a marker for tumor neoangiogenesis, and can be used as immunocytokine when coupled to IL2. We hypothesize that radiotherapy in combination with L19-IL2 provides an enhanced antitumor effect, which is dependent on ED-B expression.

Experimental design

Mice were injected with syngeneic C51 colon carcinoma, Lewis lung carcinoma (LLC), or 4T1 mammary carcinoma cells. Tumor growth delay, underlying immunologic parameters, and treatment toxicity were evaluated after single-dose local tumor irradiation and systemic administration of L19-IL2 or equimolar controls.

Results

ED-B expression was high, intermediate, and low for C51, LLC, and 4T1, respectively. The combination therapy showed (i) a long-lasting synergistic effect for the C51 model with 75% of tumors being cured, (ii) an additive effect for the LLC model, and (iii) no effect for the 4T1 model. The combination treatment resulted in a significantly increased cytotoxic (CD8(+)) T-cell population for both C51 and LLC. Depletion of CD8(+) T cells abolished the benefit of the combination therapy.

Conclusions

These data provide the first evidence for an increased therapeutic potential by combining radiotherapy with L19-IL2 in ED-B-positive tumors. This new opportunity in cancer treatment will be investigated in a phase I clinical study for patients with an oligometastatic solid tumor (NCT02086721). An animation summarizing our results is available at https://www.youtube.com/watch?v=xHbwQuCTkRc.",2014-12-31 +23969134,Bayesian Network Webserver: a comprehensive tool for biological network modeling.,"

Summary

The Bayesian Network Webserver (BNW) is a platform for comprehensive network modeling of systems genetics and other biological datasets. It allows users to quickly and seamlessly upload a dataset, learn the structure of the network model that best explains the data and use the model to understand relationships between network variables. Many datasets, including those used to create genetic network models, contain both discrete (e.g. genotype) and continuous (e.g. gene expression traits) variables, and BNW allows for modeling hybrid datasets. Users of BNW can incorporate prior knowledge during structure learning through an easy-to-use structural constraint interface. After structure learning, users are immediately presented with an interactive network model, which can be used to make testable hypotheses about network relationships.

Availability and implementation

BNW, including a downloadable structure learning package, is available at http://compbio.uthsc.edu/BNW. (The BNW interface for adding structural constraints uses HTML5 features that are not supported by current version of Internet Explorer. We suggest using other browsers (e.g. Google Chrome or Mozilla Firefox) when accessing BNW).

Contact

ycui2@uthsc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-08-21 +23282024,TranSeqAnnotator: large-scale analysis of transcriptomic data.,"

Background

The transcriptome of an organism can be studied with the analysis of expressed sequence tag (EST) data sets that offers a rapid and cost effective approach with several new and updated bioinformatics approaches and tools for assembly and annotation. The comprehensive analyses comprehend an organism along with the genome and proteome analysis. With the advent of large-scale sequencing projects and generation of sequence data at protein and cDNA levels, automated analysis pipeline is necessary to store, organize and annotate ESTs.

Results

TranSeqAnnotator is a workflow for large-scale analysis of transcriptomic data with the most appropriate bioinformatics tools for data management and analysis. The pipeline automatically cleans, clusters, assembles and generates consensus sequences, conceptually translates these into possible protein products and assigns putative function based on various DNA and protein similarity searches. Excretory/secretory (ES) proteins inferred from ESTs/short reads are also identified. The TranSeqAnnotator accepts FASTA format raw and quality ESTs along with protein and short read sequences and are analysed with user selected programs. After pre-processing and assembly, the dataset is annotated at the nucleotide, protein and ES protein levels.

Conclusion

TranSeqAnnotator has been developed in a Linux cluster, to perform an exhaustive and reliable analysis and provide detailed annotation. TranSeqAnnotator outputs gene ontologies, protein functional identifications in terms of mapping to protein domains and metabolic pathways. The pipeline is applied to annotate large EST datasets to identify several novel and known genes with therapeutic experimental validations and could serve as potential targets for parasite intervention. TransSeqAnnotator is freely available for the scientific community at http://estexplorer.biolinfo.org/TranSeqAnnotator/.",2012-12-13 +24467261,Comparison and analysis of the animal models used to study the effect of morphine on tumour growth and metastasis.,"

Unlabelled

The effect of opioids on tumour growth and metastasis has been debated for many years, with recent emphasis on the possibility that they might influence the rate of disease-free survival after tumour resection when used in the perioperative pain management of cancer surgery patients. The literature presents conflicting and inconclusive in vitro and in vivo data about the potential effect of opioids, especially morphine, on tumour growth and metastasis. To inform clinical practice, appropriate animal models are needed to test whether opioids alter the course of tumour growth and metastasis. Here, we review the literature on animal-based studies testing the effect of morphine on cancer so far, and analyse differences between the models used that may explain the discrepancies in published results. Such analysis should elucidate the role of opioids in cancer and help define ideal pre-clinical models to provide definitive answers.

Linked articles

This article is part of a themed section on Opioids: New Pathways to Functional Selectivity. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2015.172.issue-2.",2014-07-01 +23610369,Automated annotation and quantification of glycans using liquid chromatography-mass spectrometry.,"

Unlabelled

As a common post-translational modification, protein glycosylation plays an important role in many biological processes, and it is known to be associated with human diseases. Mass spectrometry (MS)-based glycomic profiling techniques have been developed to measure the abundances of glycans in complex biological samples and applied to the discovery of putative glycan biomarkers. To automate the annotation of glycomic profiles in the liquid chromatography-MS (LC-MS) data, we present here a user-friendly software tool, MultiGlycan, implemented in C# on Windows systems. We tested MultiGlycan by using several glycomic profiling datasets acquired using LC-MS under different preparations and show that MultiGlycan executes fast and generates robust and reliable results.

Availability

MultiGlycan can be freely downloaded at http://darwin.informatics.indiana.edu/MultiGlycan/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-22 +24293656,SCOP2 prototype: a new approach to protein structure mining.,"We present a prototype of a new structural classification of proteins, SCOP2 (http://scop2.mrc-lmb.cam.ac.uk/), that we have developed recently. SCOP2 is a successor to the Structural Classification of Proteins (SCOP, http://scop.mrc-lmb.cam.ac.uk/scop/) database. Similarly to SCOP, the main focus of SCOP2 is to organize structurally characterized proteins according to their structural and evolutionary relationships. SCOP2 was designed to provide a more advanced framework for protein structure annotation and classification. It defines a new approach to the classification of proteins that is essentially different from SCOP, but retains its best features. The SCOP2 classification is described in terms of a directed acyclic graph in which nodes form a complex network of many-to-many relationships and are represented by a region of protein structure and sequence. The new classification project is expected to ensure new advances in the field and open new areas of research.",2013-11-29 +24289158,wKinMut: an integrated tool for the analysis and interpretation of mutations in human protein kinases.,"

Background

Protein kinases are involved in relevant physiological functions and a broad number of mutations in this superfamily have been reported in the literature to affect protein function and stability. Unfortunately, the exploration of the consequences on the phenotypes of each individual mutation remains a considerable challenge.

Results

The wKinMut web-server offers direct prediction of the potential pathogenicity of the mutations from a number of methods, including our recently developed prediction method based on the combination of information from a range of diverse sources, including physicochemical properties and functional annotations from FireDB and Swissprot and kinase-specific characteristics such as the membership to specific kinase groups, the annotation with disease-associated GO terms or the occurrence of the mutation in PFAM domains, and the relevance of the residues in determining kinase subfamily specificity from S3Det. This predictor yields interesting results that compare favourably with other methods in the field when applied to protein kinases.Together with the predictions, wKinMut offers a number of integrated services for the analysis of mutations. These include: the classification of the kinase, information about associations of the kinase with other proteins extracted from iHop, the mapping of the mutations onto PDB structures, pathogenicity records from a number of databases and the classification of mutations in large-scale cancer studies. Importantly, wKinMut is connected with the SNP2L system that extracts mentions of mutations directly from the literature, and therefore increases the possibilities of finding interesting functional information associated to the studied mutations.

Conclusions

wKinMut facilitates the exploration of the information available about individual mutations by integrating prediction approaches with the automatic extraction of information from the literature (text mining) and several state-of-the-art databases.wKinMut has been used during the last year for the analysis of the consequences of mutations in the context of a number of cancer genome projects, including the recent analysis of Chronic Lymphocytic Leukemia cases and is publicly available at http://wkinmut.bioinfo.cnio.es.",2013-11-29 +25956004,Biased Exposure-Health Effect Estimates from Selection in Cohort Studies: Are Environmental Studies at Particular Risk?,"

Background

The process of creating a cohort or cohort substudy may induce misleading exposure-health effect associations through collider stratification bias (i.e., selection bias) or bias due to conditioning on an intermediate. Studies of environmental risk factors may be at particular risk.

Objectives

We aimed to demonstrate how such biases of the exposure-health effect association arise and how one may mitigate them.

Methods

We used directed acyclic graphs and the example of bone lead and mortality (all-cause, cardiovascular, and ischemic heart disease) among 835 white men in the Normative Aging Study (NAS) to illustrate potential bias related to recruitment into the NAS and the bone lead substudy. We then applied methods (adjustment, restriction, and inverse probability of attrition weighting) to mitigate these biases in analyses using Cox proportional hazards models to estimate adjusted hazard ratios (HRs) and 95% confidence intervals (CIs).

Results

Analyses adjusted for age at bone lead measurement, smoking, and education among all men found HRs (95% CI) for the highest versus lowest tertile of patella lead of 1.34 (0.90, 2.00), 1.46 (0.86, 2.48), and 2.01 (0.86, 4.68) for all-cause, cardiovascular, and ischemic heart disease mortality, respectively. After applying methods to mitigate the biases, the HR (95% CI) among the 637 men analyzed were 1.86 (1.12, 3.09), 2.47 (1.23, 4.96), and 5.20 (1.61, 16.8), respectively.

Conclusions

Careful attention to the underlying structure of the observed data is critical to identifying potential biases and methods to mitigate them. Understanding factors that influence initial study participation and study loss to follow-up is critical. Recruitment of population-based samples and enrolling participants at a younger age, before the potential onset of exposure-related health effects, can help reduce these potential pitfalls.

Citation

Weisskopf MG, Sparrow D, Hu H, Power MC. 2015. Biased exposure-health effect estimates from selection in cohort studies: are environmental studies at particular risk? Environ Health Perspect 123:1113-1122; http://dx.doi.org/10.1289/ehp.1408888.",2015-05-08 +25859686,An Assessment of Participatory Integrated Vector Management for Malaria Control in Kenya.,"

Background

The World Health Organization (WHO) recommends integrated vector management (IVM) as a strategy to improve and sustain malaria vector control. However, this approach has not been widely adopted.

Objectives

We comprehensively assessed experiences and findings on IVM in Kenya with a view to sharing lessons that might promote its wider application.

Methods

The assessment used information from a qualitative external evaluation of two malaria IVM projects implemented between 2006 and 2011 and an analysis of their accumulated entomological and malaria case data. The project sites were Malindi and Nyabondo, located in coastal and western Kenya, respectively. The assessment focused on implementation of five key elements of IVM: integration of vector control methods, evidence-based decision making, intersectoral collaboration, advocacy and social mobilization, and capacity building.

Results

IVM was more successfully implemented in Malindi than in Nyabondo owing to greater community participation and multistakeholder engagement. There was a significant decline in the proportion of malaria cases among children admitted to Malindi Hospital, from 23.7% in 2006 to 10.47% in 2011 (p < 0.001). However, the projects' operational research methodology did not allow statistical attribution of the decline in malaria and malaria vectors to specific IVM interventions or other factors.

Conclusions

Sustaining IVM is likely to require strong participation and support from multiple actors, including community-based groups, non-governmental organizations, international and national research institutes, and various government ministries. A cluster-randomized controlled trial would be essential to quantify the effectiveness and impact of specific IVM interventions, alone or in combination.

Citation

Mutero CM, Mbogo C, Mwangangi J, Imbahale S, Kibe L, Orindi B, Girma M, Njui A, Lwande W, Affognon H, Gichuki C, Mukabana WR. 2015. An assessment of participatory integrated vector management for malaria control in Kenya. Environ Health Perspect 123:1145-1151; http://dx.doi.org/10.1289/ehp.1408748.",2015-04-10 +24564336,ProphNet: a generic prioritization method through propagation of information.,"

Background

Prioritization methods have become an useful tool for mining large amounts of data to suggest promising hypotheses in early research stages. Particularly, network-based prioritization tools use a network representation for the interactions between different biological entities to identify novel indirect relationships. However, current network-based prioritization tools are strongly tailored to specific domains of interest (e.g. gene-disease prioritization) and they do not allow to consider networks with more than two types of entities (e.g. genes and diseases). Therefore, the direct application of these methods to accomplish new prioritization tasks is limited.

Results

This work presents ProphNet, a generic network-based prioritization tool that allows to integrate an arbitrary number of interrelated biological entities to accomplish any prioritization task. We tested the performance of ProphNet in comparison with leading network-based prioritization methods, namely rcNet and DomainRBF, for gene-disease and domain-disease prioritization, respectively. The results obtained by ProphNet show a significant improvement in terms of sensitivity and specificity for both tasks. We also applied ProphNet to disease-gene prioritization on Alzheimer, Diabetes Mellitus Type 2 and Breast Cancer to validate the results and identify putative candidate genes involved in these diseases.

Conclusions

ProphNet works on top of any heterogeneous network by integrating information of different types of biological entities to rank entities of a specific type according to their degree of relationship with a query set of entities of another type. Our method works by propagating information across data networks and measuring the correlation between the propagated values for a query and a target sets of entities. ProphNet is available at: http://genome2.ugr.es/prophnet. A Matlab implementation of the algorithm is also available at the website.",2014-01-10 +25664223,3DProIN: Protein-Protein Interaction Networks and Structure Visualization.,"3DProIN is a computational tool to visualize protein-protein interaction networks in both two dimensional (2D) and three dimensional (3D) view. It models protein-protein interactions in a graph and explores the biologically relevant features of the tertiary structures of each protein in the network. Properties such as color, shape and name of each node (protein) of the network can be edited in either 2D or 3D views. 3DProIN is implemented using 3D Java and C programming languages. The internet crawl technique is also used to parse dynamically grasped protein interactions from protein data bank (PDB). It is a java applet component that is embedded in the web page and it can be used on different platforms including Linux, Mac and Window using web browsers such as Firefox, Internet Explorer, Chrome and Safari. It also was converted into a mac app and submitted to the App store as a free app. Mac users can also download the app from our website. 3DProIN is available for academic research at http://bicompute.appspot.com.",2014-06-01 +23681122,CAPITO--a web server-based analysis and plotting tool for circular dichroism data.,"

Motivation

Circular dichroism (CD) spectroscopy is one of the most versatile tools to study protein folding and to validate the proper fold of purified proteins. Here, we aim to provide a readily accessible, user-friendly and platform-independent tool capable of analysing multiple CD datasets of virtually any format and returning results as high-quality graphical output to the user.

Results

CAPITO (CD Anaylsis and Plotting Tool) is a novel web server-based tool for analysing and plotting CD data. It allows reliable estimation of secondary structure content utilizing different approaches. CAPITO accepts multiple CD datasets and, hence, is well suited for a wide application range such as the analysis of temperature or pH-dependent (un)folding and the comparison of mutants.

Availability

http://capito.nmr.fli-leibniz.de.

Contact

cwiede@fli-leibniz.de or mago@fli-leibniz.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-15 +24293647,ASD v2.0: updated content and novel features focusing on allosteric regulation.,"Allostery is the most direct and efficient way for regulation of biological macromolecule function and is induced by the binding of a ligand at an allosteric site topographically distinct from the orthosteric site. AlloSteric Database (ASD, http://mdl.shsmu.edu.cn/ASD) has been developed to provide comprehensive information on allostery. Owing to the inherent high receptor selectivity and lower target-based toxicity, allosteric regulation is expected to assume a more prominent role in drug discovery and bioengineering, leading to the rapid growth of allosteric findings. In this updated version, ASD v2.0 has expanded to 1286 allosteric proteins, 565 allosteric diseases and 22 008 allosteric modulators. A total of 907 allosteric site-modulator structural complexes and >200 structural pairs of orthosteric/allosteric sites in the allosteric proteins were constructed for researchers to develop allosteric site and pathway tools in response to community demands. Up-to-date allosteric pathways were manually curated in the updated version. In addition, both the front-end and the back-end of ASD have been redesigned and enhanced to allow more efficient access. Taken together, these updates are useful for facilitating the investigation of allosteric mechanisms, allosteric target identification and allosteric drug discovery.",2013-11-28 +23677617,SGAtools: one-stop analysis and visualization of array-based genetic interaction screens.,"Screening genome-wide sets of mutants for fitness defects provides a simple but powerful approach for exploring gene function, mapping genetic networks and probing mechanisms of drug action. For yeast and other microorganisms with global mutant collections, genetic or chemical-genetic interactions can be effectively quantified by growing an ordered array of strains on agar plates as individual colonies, and then scoring the colony size changes in response to a genetic or environmental perturbation. To do so, requires efficient tools for the extraction and analysis of quantitative data. Here, we describe SGAtools (http://sgatools.ccbr.utoronto.ca), a web-based analysis system for designer genetic screens. SGAtools outlines a series of guided steps that allow the user to quantify colony sizes from images of agar plates, correct for systematic biases in the observations and calculate a fitness score relative to a control experiment. The data can also be visualized online to explore the colony sizes on individual plates, view the distribution of resulting scores, highlight genes with the strongest signal and perform Gene Ontology enrichment analysis.",2013-05-15 +22270192,International spinal cord injury pulmonary function basic data set.,"

Objectives

To develop the International Spinal Cord Injury (SCI) Pulmonary Function Basic Data Set within the framework of the International SCI Data Sets in order to facilitate consistent collection and reporting of basic bronchopulmonary findings in the SCI population.

Setting

International.

Methods

The SCI Pulmonary Function Data Set was developed by an international working group. The initial data set document was revised on the basis of suggestions from members of the Executive Committee of the International SCI Standards and Data Sets, the International Spinal Cord Society (ISCoS) Executive and Scientific Committees, American Spinal Injury Association (ASIA) Board, other interested organizations and societies and individual reviewers. In addition, the data set was posted for 2 months on ISCoS and ASIA websites for comments.

Results

The final International SCI Pulmonary Function Data Set contains questions on the pulmonary conditions diagnosed before spinal cord lesion,if available, to be obtained only once; smoking history; pulmonary complications and conditions after the spinal cord lesion, which may be collected at any time. These data include information on pneumonia, asthma, chronic obstructive pulmonary disease and sleep apnea. Current utilization of ventilator assistance including mechanical ventilation, diaphragmatic pacing, phrenic nerve stimulation and Bi-level positive airway pressure can be reported, as well as results from pulmonary function testing includes: forced vital capacity, forced expiratory volume in one second and peak expiratory flow. The complete instructions for data collection and the data sheet itself are freely available on the website of ISCoS (http://www.iscos.org.uk).",2012-01-24 +24967953,Trans3D: a free tool for dynamical visualization of EEG activity transmission in the brain.,"The problem of functional connectivity in the brain is in the focus of attention nowadays, since it is crucial for understanding information processing in the brain. A large repertoire of measures of connectivity have been devised, some of them being capable of estimating time-varying directed connectivity. Hence, there is a need for a dedicated software tool for visualizing the propagation of electrical activity in the brain. To this aim, the Trans3D application was developed. It is an open access tool based on widely available libraries and supporting both Windows XP/Vista/7(™), Linux and Mac environments. Trans3D can create animations of activity propagation between electrodes/sensors, which can be placed by the user on the scalp/cortex of a 3D model of the head. Various interactive graphic functions for manipulating and visualizing components of the 3D model and input data are available. An application of the Trans3D tool has helped to elucidate the dynamics of the phenomena of information processing in motor and cognitive tasks, which otherwise would have been very difficult to observe. Trans3D is available at: http://www.eeg.pl/.",2014-05-29 +25712836,A meta-analysis of limb-salvage versus amputation in the treatment of patients with Enneking‡U pathologic fracture osteosarcoma.,"INTRODUCTION: The aim of this meta-analysis was to further explore whether the relapse, 5-year survival and metastasis the same or not between limb-salvage and amputation in the treatment of patients with limited stage Enneking II pathologic fracture osteosarcoma. MATERIALS AND METHODS: An electronic search of the Medline, EMBASE and CNKI was done on October 2014. The clinical studies about amputation or limb-salvage surgery in the treatment of patients with limited stage Enneking II pathologic fracture osteosarcoma were searched and reviewed. The effect size of relapse, 5-year survival and metastasis between the amputation and limb-salvage surgery were pooled by stata11.0 software (Stata Corporation, College Station, TX, USA, http://www.stata.com;) using random or fixed effect model. The funnel plot and Egger's line regression test were used for evaluation of publication bias. RESULTS: A total of 89 studies were identified and seven articles with 200 cases in the limb-salvage surgery group and 84 subjects in the amputation group were finally included in the meta-analysis. The pooled data indicated that no statistical different of risk for developing relapse between limb-salvage and amputation was found relative risk (RR) =1.40, 95% confidence interval (CI): 0.71-2.79, (P = 0.33). The 5-year survival rate of patients underwent limb-salvage surgery was smaller than patients received amputation RR = 1.86, 95%CI: 1.19-2.89, (P = 0.01); the metastasis rate of patients underwent limb-salvage surgery was significant decreased compared with patients received amputation RR = 0.56, 95% CI: 0.34-0.94, (P = 0.03). No publication bias was existed in this meta-analysis. CONCLUSION: Limb-salvage surgery does not increased the risk of relapse compared with amputation in the treatment of patients with limited stage Enneking II pathologic fracture osteosarcoma.",2015-02-01 +22674656,Biases and errors on allele frequency estimation and disease association tests of next-generation sequencing of pooled samples.,"Next-generation sequencing is widely used to study complex diseases because of its ability to identify both common and rare variants without prior single nucleotide polymorphism (SNP) information. Pooled sequencing of implicated target regions can lower costs and allow more samples to be analyzed, thus improving statistical power for disease-associated variant detection. Several methods for disease association tests of pooled data and for optimal pooling designs have been developed under certain assumptions of the pooling process, for example, equal/unequal contributions to the pool, sequencing depth variation, and error rate. However, these simplified assumptions may not portray the many factors affecting pooled sequencing data quality, such as PCR amplification during target capture and sequencing, reference allele preferential bias, and others. As a result, the properties of the observed data may differ substantially from those expected under the simplified assumptions. Here, we use real datasets from targeted sequencing of pooled samples, together with microarray SNP genotypes of the same subjects, to identify and quantify factors (biases and errors) affecting the observed sequencing data. Through simulations, we find that these factors have a significant impact on the accuracy of allele frequency estimation and the power of association tests. Furthermore, we develop a workflow protocol to incorporate these factors in data analysis to reduce the potential biases and errors in pooled sequencing data and to gain better estimation of allele frequencies. The workflow, Psafe, is available at http://bioinformatics.med.yale.edu/group/.",2012-06-06 +24447569,CapR: revealing structural specificities of RNA-binding protein target recognition using CLIP-seq data.,"RNA-binding proteins (RBPs) bind to their target RNA molecules by recognizing specific RNA sequences and structural contexts. The development of CLIP-seq and related protocols has made it possible to exhaustively identify RNA fragments that bind to RBPs. However, no efficient bioinformatics method exists to reveal the structural specificities of RBP-RNA interactions using these data. We present CapR, an efficient algorithm that calculates the probability that each RNA base position is located within each secondary structural context. Using CapR, we demonstrate that several RBPs bind to their target RNA molecules under specific structural contexts. CapR is available at https://sites.google.com/site/fukunagatsu/software/capr.",2014-01-21 +24276536,Disentangling the population structure and evolution of the clam pathogen Vibrio tapetis.,"Vibrio tapetis is a fastidious slow-growing microorganism that causes the Brown Ring Disease in clams. Recently, two subspecies for this bacterial pathogen have been proposed. We have developed a multilocus sequence typing scheme and performed evolutionary studies of V. tapetis population using the great majority of isolates of V. tapetis obtained worldwide until now (30 isolates). V. tapetis constitutes a high polymorphic population, showing low diversity indexes and some genetic discontinuity among the isolates. Mutation events are more frequent than recombination, although both are approximately equally important for genetic diversification. In fact, the divergence between subspecies occurred exclusively by mutation but the diversity observed among isolates of the same subspecies appeared to be generated mostly by recombination. Between the subspecies, genetic distance is very high and almost no recurrent gene flow exists. This pathogen displays a non-clonal population structure with an ancient spatial segregation population and some degree of geographical isolation, followed by a population expansion, at least for V. tapetis subsp. tapetis. A database from this study was created and hosted on publmlst.org ( http://pubmlst.org/vtapetis/ ).",2013-11-26 +24285297,rSNPBase: a database for curated regulatory SNPs.,"In recent years, human regulatory SNPs (rSNPs) have been widely studied. Here, we present database rSNPBase, freely available at http://rsnp.psych.ac.cn/, to provide curated rSNPs that analyses the regulatory features of all SNPs in the human genome with reference to experimentally supported regulatory elements. In contrast with previous SNP functional annotation databases, rSNPBase is characterized by several unique features. (i) To improve reliability, all SNPs in rSNPBase are annotated with reference to experimentally supported regulatory elements. (ii) rSNPBase focuses on rSNPs involved in a wide range of regulation types, including proximal and distal transcriptional regulation and post-transcriptional regulation, and identifies their potentially regulated genes. (iii) Linkage disequilibrium (LD) correlations between SNPs were analysed so that the regulatory feature is annotated to SNP-set rather than a single SNP. (iv) rSNPBase provides the spatio-temporal labels and experimental eQTL labels for SNPs. In summary, rSNPBase provides more reliable, comprehensive and user-friendly regulatory annotations on rSNPs and will assist researchers in selecting candidate SNPs for further genetic studies and in exploring causal SNPs for in-depth molecular mechanisms of complex phenotypes.",2013-11-26 +24285302,Locus Reference Genomic: reference sequences for the reporting of clinically relevant sequence variants.,"Locus Reference Genomic (LRG; http://www.lrg-sequence.org/) records contain internationally recognized stable reference sequences designed specifically for reporting clinically relevant sequence variants. Each LRG is contained within a single file consisting of a stable 'fixed' section and a regularly updated 'updatable' section. The fixed section contains stable genomic DNA sequence for a genomic region, essential transcripts and proteins for variant reporting and an exon numbering system. The updatable section contains mapping information, annotation of all transcripts and overlapping genes in the region and legacy exon and amino acid numbering systems. LRGs provide a stable framework that is vital for reporting variants, according to Human Genome Variation Society (HGVS) conventions, in genomic DNA, transcript or protein coordinates. To enable translation of information between LRG and genomic coordinates, LRGs include mapping to the human genome assembly. LRGs are compiled and maintained by the National Center for Biotechnology Information (NCBI) and European Bioinformatics Institute (EBI). LRG reference sequences are selected in collaboration with the diagnostic and research communities, locus-specific database curators and mutation consortia. Currently >700 LRGs have been created, of which >400 are publicly available. The aim is to create an LRG for every locus with clinical implications.",2013-11-26 +22761696,NormaCurve: a SuperCurve-based method that simultaneously quantifies and normalizes reverse phase protein array data.,"

Motivation

Reverse phase protein array (RPPA) is a powerful dot-blot technology that allows studying protein expression levels as well as post-translational modifications in a large number of samples simultaneously. Yet, correct interpretation of RPPA data has remained a major challenge for its broad-scale application and its translation into clinical research. Satisfying quantification tools are available to assess a relative protein expression level from a serial dilution curve. However, appropriate tools allowing the normalization of the data for external sources of variation are currently missing.

Results

Here we propose a new method, called NormaCurve, that allows simultaneous quantification and normalization of RPPA data. For this, we modified the quantification method SuperCurve in order to include normalization for (i) background fluorescence, (ii) variation in the total amount of spotted protein and (iii) spatial bias on the arrays. Using a spike-in design with a purified protein, we test the capacity of different models to properly estimate normalized relative expression levels. The best performing model, NormaCurve, takes into account a negative control array without primary antibody, an array stained with a total protein stain and spatial covariates. We show that this normalization is reproducible and we discuss the number of serial dilutions and the number of replicates that are required to obtain robust data. We thus provide a ready-to-use method for reliable and reproducible normalization of RPPA data, which should facilitate the interpretation and the development of this promising technology.

Availability

The raw data, the scripts and the normacurve package are available at the following web site: http://microarrays.curie.fr.",2012-06-28 +23515528,PHISTO: pathogen-host interaction search tool.,"

Summary

Knowledge of pathogen-host protein interactions is required to better understand infection mechanisms. The pathogen-host interaction search tool (PHISTO) is a web-accessible platform that provides relevant information about pathogen-host interactions (PHIs). It enables access to the most up-to-date PHI data for all pathogen types for which experimentally verified protein interactions with human are available. The platform also offers integrated tools for visualization of PHI networks, graph-theoretical analysis of targeted human proteins, BLAST search and text mining for detecting missing experimental methods. PHISTO will facilitate PHI studies that provide potential therapeutic targets for infectious diseases.

Availability

http://www.phisto.org.

Contact

saliha.durmus@boun.edu.tr

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-20 +23379655,puma 3.0: improved uncertainty propagation methods for gene and transcript expression analysis.,"

Background

Microarrays have been a popular tool for gene expression profiling at genome-scale for over a decade due to the low cost, short turn-around time, excellent quantitative accuracy and ease of data generation. The Bioconductor package puma incorporates a suite of analysis methods for determining uncertainties from Affymetrix GeneChip data and propagating these uncertainties to downstream analysis. As isoform level expression profiling receives more and more interest within genomics in recent years, exon microarray technology offers an important tool to quantify expression level of the majority of exons and enables the possibility of measuring isoform level expression. However, puma does not include methods for the analysis of exon array data. Moreover, the current expression summarisation method for Affymetrix 3' GeneChip data suffers from instability for low expression genes. For the downstream analysis, the method for differential expression detection is computationally intensive and the original expression clustering method does not consider the variance across the replicated technical and biological measurements. It is therefore necessary to develop improved uncertainty propagation methods for gene and transcript expression analysis.

Results

We extend the previously developed Bioconductor package puma with a new method especially designed for GeneChip Exon arrays and a set of improved downstream approaches. The improvements include: (i) a new gamma model for exon arrays which calculates isoform and gene expression measurements and a level of uncertainty associated with the estimates, using the multi-mappings between probes, isoforms and genes, (ii) a variant of the existing approach for the probe-level analysis of Affymetrix 3' GeneChip data to produce more stable gene expression estimates, (iii) an improved method for detecting differential expression which is computationally more efficient than the existing approach in the package and (iv) an improved method for robust model-based clustering of gene expression, which takes technical and biological replicate information into consideration.

Conclusions

With the extensions and improvements, the puma package is now applicable to the analysis of both Affymetrix 3' GeneChips and Exon arrays for gene and isoform expression estimation. It propagates the uncertainty of expression measurements into more efficient and comprehensive downstream analysis at both gene and isoform level. Downstream methods are also applicable to other expression quantification platforms, such as RNA-Seq, when uncertainty information is available from expression measurements. puma is available through Bioconductor and can be found at http://www.bioconductor.org.",2013-02-05 +22986687,CING: an integrated residue-based structure validation program suite.,"We present a suite of programs, named CING for Common Interface for NMR Structure Generation that provides for a residue-based, integrated validation of the structural NMR ensemble in conjunction with the experimental restraints and other input data. External validation programs and new internal validation routines compare the NMR-derived models with empirical data, measured chemical shifts, distance- and dihedral restraints and the results are visualized in a dynamic Web 2.0 report. A red-orange-green score is used for residues and restraints to direct the user to those critiques that warrant further investigation. Overall green scores below ~20 % accompanied by red scores over ~50 % are strongly indicative of poorly modelled structures. The publically accessible, secure iCing webserver ( https://nmr.le.ac.uk ) allows individual users to upload the NMR data and run a CING validation analysis.",2012-09-18 +24511080,HiBrowse: multi-purpose statistical analysis of genome-wide chromatin 3D organization.,"

Unlabelled

Recently developed methods that couple next-generation sequencing with chromosome conformation capture-based techniques, such as Hi-C and ChIA-PET, allow for characterization of genome-wide chromatin 3D structure. Understanding the organization of chromatin in three dimensions is a crucial next step in the unraveling of global gene regulation, and methods for analyzing such data are needed. We have developed HiBrowse, a user-friendly web-tool consisting of a range of hypothesis-based and descriptive statistics, using realistic assumptions in null-models.

Availability and implementation

HiBrowse is supported by all major browsers, and is freely available at http://hyperbrowser.uio.no/3d. Software is implemented in Python, and source code is available for download by following instructions on the main site.",2014-02-07 +24275496,CyanoBase and RhizoBase: databases of manually curated annotations for cyanobacterial and rhizobial genomes.,"To understand newly sequenced genomes of closely related species, comprehensively curated reference genome databases are becoming increasingly important. We have extended CyanoBase (http://genome.microbedb.jp/cyanobase), a genome database for cyanobacteria, and newly developed RhizoBase (http://genome.microbedb.jp/rhizobase), a genome database for rhizobia, nitrogen-fixing bacteria associated with leguminous plants. Both databases focus on the representation and reusability of reference genome annotations, which are continuously updated by manual curation. Domain experts have extracted names, products and functions of each gene reported in the literature. To ensure effectiveness of this procedure, we developed the TogoAnnotation system offering a web-based user interface and a uniform storage of annotations for the curators of the CyanoBase and RhizoBase databases. The number of references investigated for CyanoBase increased from 2260 in our previous report to 5285, and for RhizoBase, we perused 1216 references. The results of these intensive annotations are displayed on the GeneView pages of each database. Advanced users can also retrieve this information through the representational state transfer-based web application programming interface in an automated manner.",2013-11-25 +24275491,PhylomeDB v4: zooming into the plurality of evolutionary histories of a genome.,"Phylogenetic trees representing the evolutionary relationships of homologous genes are the entry point for many evolutionary analyses. For instance, the use of a phylogenetic tree can aid in the inference of orthology and paralogy relationships, and in the detection of relevant evolutionary events such as gene family expansions and contractions, horizontal gene transfer, recombination or incomplete lineage sorting. Similarly, given the plurality of evolutionary histories among genes encoded in a given genome, there is a need for the combined analysis of genome-wide collections of phylogenetic trees (phylomes). Here, we introduce a new release of PhylomeDB (http://phylomedb.org), a public repository of phylomes. Currently, PhylomeDB hosts 120 public phylomes, comprising >1.5 million maximum likelihood trees and multiple sequence alignments. In the current release, phylogenetic trees are annotated with taxonomic, protein-domain arrangement, functional and evolutionary information. PhylomeDB is also a major source for phylogeny-based predictions of orthology and paralogy, covering >10 million proteins across 1059 sequenced species. Here we describe newly implemented PhylomeDB features, and discuss a benchmark of the orthology predictions provided by the database, the impact of proteome updates and the use of the phylome approach in the analysis of newly sequenced genomes and transcriptomes.",2013-11-25 +22374109,Analysis of the IJCNN 2011 UTL challenge.,"We organized a challenge in ""Unsupervised and Transfer Learning"": the UTL challenge (http://clopinet.com/ul). We made available large datasets from various application domains: handwriting recognition, image recognition, video processing, text processing, and ecology. The goal was to learn data representations that capture regularities of an input space for re-use across tasks. The representations were evaluated on supervised learning ""target tasks"" unknown to the participants. The first phase of the challenge was dedicated to ""unsupervised transfer learning"" (the competitors were given only unlabeled data). The second phase was dedicated to ""cross-task transfer learning"" (the competitors were provided with a limited amount of labeled data from ""source tasks"", distinct from the ""target tasks""). The analysis indicates that learned data representations yield significantly better results than those obtained with original data or data preprocessed with standard normalizations and functional transforms.",2012-02-14 +21624156,Integration and publication of heterogeneous text-mined relationships on the Semantic Web.,"

Background

Advances in Natural Language Processing (NLP) techniques enable the extraction of fine-grained relationships mentioned in biomedical text. The variability and the complexity of natural language in expressing similar relationships causes the extracted relationships to be highly heterogeneous, which makes the construction of knowledge bases difficult and poses a challenge in using these for data mining or question answering.

Results

We report on the semi-automatic construction of the PHARE relationship ontology (the PHArmacogenomic RElationships Ontology) consisting of 200 curated relations from over 40,000 heterogeneous relationships extracted via text-mining. These heterogeneous relations are then mapped to the PHARE ontology using synonyms, entity descriptions and hierarchies of entities and roles. Once mapped, relationships can be normalized and compared using the structure of the ontology to identify relationships that have similar semantics but different syntax. We compare and contrast the manual procedure with a fully automated approach using WordNet to quantify the degree of integration enabled by iterative curation and refinement of the PHARE ontology. The result of such integration is a repository of normalized biomedical relationships, named PHARE-KB, which can be queried using Semantic Web technologies such as SPARQL and can be visualized in the form of a biological network.

Conclusions

The PHARE ontology serves as a common semantic framework to integrate more than 40,000 relationships pertinent to pharmacogenomics. The PHARE ontology forms the foundation of a knowledge base named PHARE-KB. Once populated with relationships, PHARE-KB (i) can be visualized in the form of a biological network to guide human tasks such as database curation and (ii) can be queried programmatically to guide bioinformatics applications such as the prediction of molecular interactions. PHARE is available at http://purl.bioontology.org/ontology/PHARE.",2011-05-17 +22559291,PhenoLink--a web-tool for linking phenotype to ~omics data for bacteria: application to gene-trait matching for Lactobacillus plantarum strains.,"

Background

Linking phenotypes to high-throughput molecular biology information generated by ~omics technologies allows revealing cellular mechanisms underlying an organism's phenotype. ~Omics datasets are often very large and noisy with many features (e.g., genes, metabolite abundances). Thus, associating phenotypes to ~omics data requires an approach that is robust to noise and can handle large and diverse data sets.

Results

We developed a web-tool PhenoLink (http://bamics2.cmbi.ru.nl/websoftware/phenolink/) that links phenotype to ~omics data sets using well-established as well new techniques. PhenoLink imputes missing values and preprocesses input data (i) to decrease inherent noise in the data and (ii) to counterbalance pitfalls of the Random Forest algorithm, on which feature (e.g., gene) selection is based. Preprocessed data is used in feature (e.g., gene) selection to identify relations to phenotypes. We applied PhenoLink to identify gene-phenotype relations based on the presence/absence of 2847 genes in 42 Lactobacillus plantarum strains and phenotypic measurements of these strains in several experimental conditions, including growth on sugars and nitrogen-dioxide production. Genes were ranked based on their importance (predictive value) to correctly predict the phenotype of a given strain. In addition to known gene to phenotype relations we also found novel relations.

Conclusions

PhenoLink is an easily accessible web-tool to facilitate identifying relations from large and often noisy phenotype and ~omics datasets. Visualization of links to phenotypes offered in PhenoLink allows prioritizing links, finding relations between features, finding relations between phenotypes, and identifying outliers in phenotype data. PhenoLink can be used to uncover phenotype links to a multitude of ~omics data, e.g., gene presence/absence (determined by e.g.: CGH or next-generation sequencing), gene expression (determined by e.g.: microarrays or RNA-seq), or metabolite abundance (determined by e.g.: GC-MS).",2012-05-04 +21349868,Gene List significance at-a-glance with GeneValorization.,"

Motivation

High-throughput technologies provide fundamental informations concerning thousands of genes. Many of the current research laboratories daily use one or more of these technologies and end-up with lists of genes. Assessing the originality of the results obtained includes being aware of the number of publications available concerning individual or multiple genes and accessing information about these publications. Faced with the exponential growth of publications avaliable and number of genes involved in a study, this task is becoming particularly difficult to achieve.

Results

We introduce GeneValorization, a web-based tool that gives a clear and handful overview of the bibliography available corresponding to the user input formed by (i) a gene list (expressed by gene names or ids from EntrezGene) and (ii) a context of study (expressed by keywords). From this input, GeneValorization provides a matrix containing the number of publications with co-occurrences of gene names and keywords. Graphics are automatically generated to assess the relative importance of genes within various contexts. Links to publications and other databases offering information on genes and keywords are also available. To illustrate how helpful GeneValorization is, we will consider the gene list of the OncotypeDX prognostic marker test.

Availability

http://bioguide-project.net/gv

Contact

cohen@lri.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-23 +23511542,FunFrame: functional gene ecological analysis pipeline.,"

Summary

Pyrosequencing of 16S rDNA is widely used to study microbial communities, and a rich set of software tools support this analysis. Pyrosequencing of protein-coding genes, which can help elucidate functional differences among microbial communities, significantly lags behind 16S rDNA in availability of sequence analysis software. In both settings, frequent homopolymer read errors inflate the estimation of microbial diversity, and de-noising is required to reduce that bias. Here we describe FunFrame, an R-based data-analysis pipeline that uses recently described algorithms to de-noise functional gene pyrosequences and performs ecological analysis on de-noised sequence data. The novelty of this pipeline is that it provides users a unified set of tools, adapted from disparate sources and designed for different applications, that can be used to examine a particular protein coding gene of interest. We evaluated FunFrame on functional genes from four PCR-amplified clones with sequence depths ranging from 9084 to 14 494 sequences. FunFrame produced from one to nine Operational Taxanomic Units for each clone, resulting in an error rate ranging from 0 to 0.18%. Importantly, FunFrame reduced spurious diversity while retaining more sequences than a commonly used de-noising method that discards sequences with frameshift errors.

Availability

Software, documentation and a complete set of sample data files are available at http://faculty.www.umb.edu/jennifer.bowen/software/FunFrame.zip.",2013-03-19 +24963138,Diagnostically relevant facial gestalt information from ordinary photos.,"Craniofacial characteristics are highly informative for clinical geneticists when diagnosing genetic diseases. As a first step towards the high-throughput diagnosis of ultra-rare developmental diseases we introduce an automatic approach that implements recent developments in computer vision. This algorithm extracts phenotypic information from ordinary non-clinical photographs and, using machine learning, models human facial dysmorphisms in a multidimensional 'Clinical Face Phenotype Space'. The space locates patients in the context of known syndromes and thereby facilitates the generation of diagnostic hypotheses. Consequently, the approach will aid clinicians by greatly narrowing (by 27.6-fold) the search space of potential diagnoses for patients with suspected developmental disorders. Furthermore, this Clinical Face Phenotype Space allows the clustering of patients by phenotype even when no known syndrome diagnosis exists, thereby aiding disease identification. We demonstrate that this approach provides a novel method for inferring causative genetic variants from clinical sequencing data through functional genetic pathway comparisons.DOI: http://dx.doi.org/10.7554/eLife.02020.001.",2014-06-24 +22962486,Improving HIV coreceptor usage prediction in the clinic using hints from next-generation sequencing data.,"

Motivation

Due to the high mutation rate of human immunodeficiency virus (HIV), drug-resistant-variants emerge frequently. Therefore, researchers are constantly searching for new ways to attack the virus. One new class of anti-HIV drugs is the class of coreceptor antagonists that block cell entry by occupying a coreceptor on CD4 cells. This type of drug just has an effect on the subset of HIVs that use the inhibited coreceptor. A good prediction of whether the viral population inside a patient is susceptible to the treatment is hence very important for therapy decisions and pre-requisite to administering the respective drug. The first prediction models were based on data from Sanger sequencing of the V3 loop of HIV. Recently, a method based on next-generation sequencing (NGS) data was introduced that predicts labels for each read separately and decides on the patient label through a percentage threshold for the resistant viral minority.

Results

We model the prediction problem on the patient level taking the information of all reads from NGS data jointly into account. This enables us to improve prediction performance for NGS data, but we can also use the trained model to improve predictions based on Sanger sequencing data. Therefore, also laboratories without NGS capabilities can benefit from the improvements. Furthermore, we show which amino acids at which position are important for prediction success, giving clues on how the interaction mechanism between the V3 loop and the particular coreceptors might be influenced.

Availability

A webserver is available at http://coreceptor.bioinf.mpi-inf.mpg.de.

Contact

nico.pfeifer@mpi-inf.mpg.de.",2012-09-01 +24271400,"ModBase, a database of annotated comparative protein structure models and associated resources.","ModBase (http://salilab.org/modbase) is a database of annotated comparative protein structure models. The models are calculated by ModPipe, an automated modeling pipeline that relies primarily on Modeller for fold assignment, sequence-structure alignment, model building and model assessment (http://salilab.org/modeller/). ModBase currently contains almost 30 million reliable models for domains in 4.7 million unique protein sequences. ModBase allows users to compute or update comparative models on demand, through an interface to the ModWeb modeling server (http://salilab.org/modweb). ModBase models are also available through the Protein Model Portal (http://www.proteinmodelportal.org/). Recently developed associated resources include the AllosMod server for modeling ligand-induced protein dynamics (http://salilab.org/allosmod), the AllosMod-FoXS server for predicting a structural ensemble that fits an SAXS profile (http://salilab.org/allosmod-foxs), the FoXSDock server for protein-protein docking filtered by an SAXS profile (http://salilab.org/foxsdock), the SAXS Merge server for automatic merging of SAXS profiles (http://salilab.org/saxsmerge) and the Pose & Rank server for scoring protein-ligand complexes (http://salilab.org/poseandrank). In this update, we also highlight two applications of ModBase: a PSI:Biology initiative to maximize the structural coverage of the human alpha-helical transmembrane proteome and a determination of structural determinants of human immunodeficiency virus-1 protease specificity.",2013-11-23 +23812975,Stability selection for regression-based models of transcription factor-DNA binding specificity.,"

Motivation

The DNA binding specificity of a transcription factor (TF) is typically represented using a position weight matrix model, which implicitly assumes that individual bases in a TF binding site contribute independently to the binding affinity, an assumption that does not always hold. For this reason, more complex models of binding specificity have been developed. However, these models have their own caveats: they typically have a large number of parameters, which makes them hard to learn and interpret.

Results

We propose novel regression-based models of TF-DNA binding specificity, trained using high resolution in vitro data from custom protein-binding microarray (PBM) experiments. Our PBMs are specifically designed to cover a large number of putative DNA binding sites for the TFs of interest (yeast TFs Cbf1 and Tye7, and human TFs c-Myc, Max and Mad2) in their native genomic context. These high-throughput quantitative data are well suited for training complex models that take into account not only independent contributions from individual bases, but also contributions from di- and trinucleotides at various positions within or near the binding sites. To ensure that our models remain interpretable, we use feature selection to identify a small number of sequence features that accurately predict TF-DNA binding specificity. To further illustrate the accuracy of our regression models, we show that even in the case of paralogous TF with highly similar position weight matrices, our new models can distinguish the specificities of individual factors. Thus, our work represents an important step toward better sequence-based models of individual TF-DNA binding specificity.

Availability

Our code is available at http://genome.duke.edu/labs/gordan/ISMB2013. The PBM data used in this article are available in the Gene Expression Omnibus under accession number GSE47026.",2013-07-01 +25540017,Magnetic resonance image tissue classification using an automatic method.,"

Background

Brain segmentation in magnetic resonance images (MRI) is an important stage in clinical studies for different issues such as diagnosis, analysis, 3-D visualizations for treatment and surgical planning. MR Image segmentation remains a challenging problem in spite of different existing artifacts such as noise, bias field, partial volume effects and complexity of the images. Some of the automatic brain segmentation techniques are complex and some of them are not sufficiently accurate for certain applications. The goal of this paper is proposing an algorithm that is more accurate and less complex).

Methods

In this paper we present a simple and more accurate automated technique for brain segmentation into White Matter, Gray Matter and Cerebrospinal fluid (CSF) in three-dimensional MR images. The algorithm's three steps are histogram based segmentation, feature extraction and final classification using SVM. The integrated algorithm has more accurate results than what can be obtained with its individual components. To produce much more efficient segmentation method our framework captures different types of features in each step that are of special importance for MRI, i.e., distributions of tissue intensities, textural features, and relationship with neighboring voxels or spatial features.

Results

Our method has been validated on real images and simulated data, with desirable performance in the presence of noise and intensity inhomogeneities.

Conclusions

The experimental results demonstrate that our proposed method is a simple and accurate technique to define brain tissues with high reproducibility in comparison with other techniques.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_207.",2014-12-24 +24301943,Phylogenetic analysis of substrate-binding subunits in an osmoprotectant system.,"Substrate-binding subunits are important components of the solute importation system, known as the osmoprotectant system, which consists of a membrane protein belonging to the ABC superfamily. These molecules recognize specific substrates that have different physiological roles in prokaryotes, i.e., roles that contribute to the survival of these organisms in environments with high concentrations of salt. Using the MEGA software, this study performed a phylogenetic analysis of 431 nucleotide sequences of these subunits, orthologous to each other, collected from the http://www.genome.jp/kegg/ database. This analysis allowed phylogenetic trees to be generated, clearly demonstrating that there was horizontal transfer of some genes through sharing by different organisms. Furthermore, two probable ancestral sequences were generated that showed homology with permeases that transport choline, glycine betaine, and carnitine, which are trimethylamines currently present in various prokaryotes. Therefore, this system probably arose in prokaryotic organisms with the basic function of capturing nutrients, and by performing this basal function and being shared with other organisms, it was fixed in the genome. However, because of prokaryote habitat diversification, this system contributed decisively to the adaptation of these organisms to different environments, especially environments that had a high salt concentration, thus acting as an osmoprotection system, which is how they are currently categorized.",2013-11-22 +24271391,SuperPain--a resource on pain-relieving compounds targeting ion channels.,"Pain is more than an unpleasant sensory experience associated with actual or potential tissue damage: it is the most common reason for physician consultation and often dramatically affects quality of life. The management of pain is often difficult and new targets are required for more effective and specific treatment. SuperPain (http://bioinformatics.charite.de/superpain/) is freely available database for pain-stimulating and pain-relieving compounds, which bind or potentially bind to ion channels that are involved in the transmission of pain signals to the central nervous system, such as TRPV1, TRPM8, TRPA1, TREK1, TRESK, hERG, ASIC, P2X and voltage-gated sodium channels. The database consists of ∼8700 ligands, which are characterized by experimentally measured binding affinities. Additionally, 100 000 putative ligands are included. Moreover, the database provides 3D structures of receptors and predicted ligand-binding poses. These binding poses and a structural classification scheme provide hints for the design of new analgesic compounds. A user-friendly graphical interface allows similarity searching, visualization of ligands docked into the receptor, etc.",2013-11-22 +24264865,Database of protein complexes with multivalent binding ability: Bival-Bind.,"Phenomena of multivalent binding of ligands with receptors are ubiquitous in biology and of growing interest in material sciences. Multivalency can enhance binding affinity dramatically. To understand the mechanism of multivalent binding in more detail model systems of bi- and multivalent receptors are needed, but are difficult to find. Furthermore it is useful to know about multivalent receptors, which can serve as targets to design multivalent drugs. The present contribution tries to close this gap. The Bival-Bind database (http://agknapp.chemie.fu-berlin.de/bivalbind) provides a relatively complete list - 2073 protein complexes with less than 90% sequence identity - out of the protein database, which can serve as bi- or multivalent receptors. Steric clashes of molecular spacers - necessary to connect the monomeric ligand units - with the receptor surface can diminish binding affinity dramatically and, thus, abolish the expected enhancement of binding affinity due to the multivalency. The potential multivalent receptors in the Bival-Bind database are characterized with respect to the receptor surface topography. A height profile between the receptor binding pockets is provided, which is an important information to estimate the influence of unfavorable spacer receptor interaction.",2013-11-22 +25837693,Models for predicting the adult height and age at first menstruation of girls with idiopathic central precocious puberty.,"

Background

It is difficult to determine whether to treat a given girl who has idiopathic central precocious puberty (CPP) with gonadotropin-releasing hormone analog (GnRHa) in terms of adult height (AH). The objective was to provide an easy tool for predicting AH and age at first menstruation at initial evaluation to help guide the decision regarding whether to treat.

Methods

Data analysis using multiple linear regression models was performed in 134 girls with CPP. Among them 78 were given GnRHa because of low predicted AH (n=45), pubertal luteinising hormone (LH)/follicle-stimulating hormone peaks (FSH) ratio (n=50) and/or high plasma estradiol concentration (n=45). 56 girls were followed without treatment.

Results

In the whole population, the actual AH (162.1±5.61 cm) was similar to target height (161.7±4.91 cm) and to AH predicted by the Bayley and Pinneau method (161.9±7.98 cm). Separated models for treated and untreated girls provide very close estimations, leading to a unique formula for both groups. The AH (cm) could be calculated at the initial evaluation: 2.21 (height at initial evaluation, SD) + 2.32 (target height, SD) - 1.83 (LH/FSH peaks ratio) + 159.68. The actual AH was lower than the calculated AH by more than 1 SD (5.6 cm) in 11 girls (8.0%). The time between onset of puberty and first menstruation (in untreated girls) can be estimated with: 10.9 - 0.57 (LH/FSH peaks ratio). The formulae are available at http://www.kamick.org/lemaire/med/girls-cpp15.html.

Conclusions

We established formulae that can be used at an initial evaluation to predict the AH, and the time between onset of puberty and first menstruation after spontaneous puberty. The similarity of the formulae for both groups suggests that the treatment had no significant effect on the AH. However, the criteria used to select treatment suggest that it prevents the deterioration of AH in cases with rapidly evolving form of CPP.",2015-04-02 +25152854,STRUCTURE PLOT: a program for drawing elegant STRUCTURE bar plots in user friendly interface.,"

Background

Understanding structure of the population is one of the major objective of many genetic studies. The program STRUCTURE is commonly used to infer population structure using multi-locus genotype data. However, a tool with graphical-user interface is currently not available to visualize STRUCTURE bar plots.

Results

We introduce STRUCTURE PLOT, a program for drawing STRUCTURE bar plots. The program generates publication ready, aesthetic STRUCTURE bar plots by using individual Q matrix from STRUCTURE or CLUMPP output. The program is very simple to use and includes variety of options like sorting bar by original order or by K, and selection of colors from R colors or RColorBrewer palette. Individual or population labels can be printed below or above the plot in any angle. Size of the graph and label can be defined, and option is provided to save plot in variety of picture formats in user defined resolution.

Conclusion

The program is implemented as a web application for online users and also as a standalone shiny application. Web application is compatible to majority of leading web browsers and standalone version can be launched using a simple R command. The program can be freely accessed at http://btismysore.in/strplot.",2014-08-13 +25733524,Escherichia coli EDL933 requires gluconeogenic nutrients to successfully colonize the intestines of streptomycin-treated mice precolonized with E. coli Nissle 1917.,"Escherichia coli MG1655, a K-12 strain, uses glycolytic nutrients exclusively to colonize the intestines of streptomycin-treated mice when it is the only E. coli strain present or when it is confronted with E. coli EDL933, an O157:H7 strain. In contrast, E. coli EDL933 uses glycolytic nutrients exclusively when it is the only E. coli strain in the intestine but switches in part to gluconeogenic nutrients when it colonizes mice precolonized with E. coli MG1655 (R. L. Miranda et al., Infect Immun 72:1666-1676, 2004, http://dx.doi.org/10.1128/IAI.72.3.1666-1676.2004). Recently, J. W. Njoroge et al. (mBio 3:e00280-12, 2012, http://dx.doi.org/10.1128/mBio.00280-12) reported that E. coli 86-24, an O157:H7 strain, activates the expression of virulence genes under gluconeogenic conditions, suggesting that colonization of the intestine with a probiotic E. coli strain that outcompetes O157:H7 strains for gluconeogenic nutrients could render them nonpathogenic. Here we report that E. coli Nissle 1917, a probiotic strain, uses both glycolytic and gluconeogenic nutrients to colonize the mouse intestine between 1 and 5 days postfeeding, appears to stop using gluconeogenic nutrients thereafter in a large, long-term colonization niche, but continues to use them in a smaller niche to compete with invading E. coli EDL933. Evidence is also presented suggesting that invading E. coli EDL933 uses both glycolytic and gluconeogenic nutrients and needs the ability to perform gluconeogenesis in order to colonize mice precolonized with E. coli Nissle 1917. The data presented here therefore rule out the possibility that E. coli Nissle 1917 can starve the O157:H7 E. coli strain EDL933 of gluconeogenic nutrients, even though E. coli Nissle 1917 uses such nutrients to compete with E. coli EDL933 in the mouse intestine.",2015-03-02 +23024288,SNVerGUI: a desktop tool for variant analysis of next-generation sequencing data.,"

Background

Advances in next generation sequencing (NGS) technology have made it possible to interrogate comprehensively genome-wide genetic variations. However, most existing tools for variation detection are based on command-line interface, which discourages the main end users of NGS data, such as biologists, geneticists and clinicians, from utilising the software.

Method and results

We have developed the SNVerGUI, a graphical user interface (GUI) based tool for variant detection and analysis. Compared with other methods for variant calling, our approach is unique in that it is applicable to both individual and pooled sequencing data. With friendly GUI, end users can easily adjust running parameters to optimise variant calling for their specific needs. SNVerGUI supports commonly used input and output file formats that allows SNVerGUI to be seamlessly integrated into common NGS data analysis pipelines. SNVerGUI is implemented in Java, which is platform-independent and therefore easy to install and run on the commonly used operating systems, such as Linux, Mac, and Windows. Using two real datasets, we have shown that SNVerGUI is capable of analysing very high volume NGS data in a feasible time on personal computers.

Conclusions

SNVerGUI is a fast and easy desktop GUI tool for the identification of genomic variants from pooled sequencing and individual sequencing data. Using this software, users can perform sophisticated variant detection by simply configuring several parameters in a friendly graphical user interface. SNVerGUI makes variant analysis as simple and effortless as possible, and we expect it to become popular among geneticists, clinicians, and biologists. SNVerGUI can be freely downloaded from http://snver.sourceforge.net/snvergui/, and will be continuously updated upon users' feedback.",2012-09-28 +24270786,The carbohydrate-active enzymes database (CAZy) in 2013.,"The Carbohydrate-Active Enzymes database (CAZy; http://www.cazy.org) provides online and continuously updated access to a sequence-based family classification linking the sequence to the specificity and 3D structure of the enzymes that assemble, modify and breakdown oligo- and polysaccharides. Functional and 3D structural information is added and curated on a regular basis based on the available literature. In addition to the use of the database by enzymologists seeking curated information on CAZymes, the dissemination of a stable nomenclature for these enzymes is probably a major contribution of CAZy. The past few years have seen the expansion of the CAZy classification scheme to new families, the development of subfamilies in several families and the power of CAZy for the analysis of genomes and metagenomes. This article outlines the changes that have occurred in CAZy during the past 5 years and presents our novel effort to display the resolution and the carbohydrate ligands in crystallographic complexes of CAZymes.",2013-11-21 +24265220,CAMP: Collection of sequences and structures of antimicrobial peptides.,"Antimicrobial peptides (AMPs) are gaining importance as anti-infective agents. Here we describe the updated Collection of Antimicrobial Peptide (CAMP) database, available online at http://www.camp.bicnirrh.res.in/. The 3D structures of peptides are known to influence antimicrobial activity. Although there exists databases of AMPs, information on structures of AMPs is limited in these databases. CAMP is manually curated and currently holds 6756 sequences and 682 3D structures of AMPs. Sequence and structure analysis tools have been incorporated to enhance the usefulness of the database.",2013-11-21 +24273243,Protein-driven inference of miRNA-disease associations.,"

Motivation

MicroRNAs (miRNAs) are a highly abundant class of non-coding RNA genes involved in cellular regulation and thus also diseases. Despite miRNAs being important disease factors, miRNA-disease associations remain low in number and of variable reliability. Furthermore, existing databases and prediction methods do not explicitly facilitate forming hypotheses about the possible molecular causes of the association, thereby making the path to experimental follow-up longer.

Results

Here we present miRPD in which miRNA-Protein-Disease associations are explicitly inferred. Besides linking miRNAs to diseases, it directly suggests the underlying proteins involved, which can be used to form hypotheses that can be experimentally tested. The inference of miRNAs and diseases is made by coupling known and predicted miRNA-protein associations with protein-disease associations text mined from the literature. We present scoring schemes that allow us to rank miRNA-disease associations inferred from both curated and predicted miRNA targets by reliability and thereby to create high- and medium-confidence sets of associations. Analyzing these, we find statistically significant enrichment for proteins involved in pathways related to cancer and type I diabetes mellitus, suggesting either a literature bias or a genuine biological trend. We show by example how the associations can be used to extract proteins for disease hypothesis.

Availability and implementation

All datasets, software and a searchable Web site are available at http://mirpd.jensenlab.org.",2013-11-21 +24390885,Colorectal cancer predicted risk online (CRC-PRO) calculator using data from the multi-ethnic cohort study.,"

Background

Better risk predictions for colorectal cancer (CRC) could improve prevention strategies by allowing clinicians to more accurately identify high-risk individuals. The National Cancer Institute's CRC risk calculator was created by Freedman et al using case control data.

Methods

An online risk calculator was created using data from the Multi-Ethnic Cohort Study, which followed >180,000 patients for the development of CRC for up to 11.5 years through linkage with cancer registries. Forward stepwise regression tuned to the c statistic was used to select the most important variables for use in separate Cox survival models for men and women. Model accuracy was assessed using 10-fold cross-validation.

Results

Patients in the cohort experienced 2762 incident cases of CRC. The final model for men contained age, ethnicity, pack-years of smoking, alcoholic drinks per day, body mass index, years of education, regular use of aspirin, family history of colon cancer, regular use of multivitamins, ounces of red meat intake per day, history of diabetes, and hours of moderate physical activity per day. The final model for women included age, ethnicity, years of education, use of estrogen, history of diabetes, pack-years of smoking, family history of colon cancer, regular use of multivitamins, body mass index, regular use of nonsteroidal anti-inflammatory drugs, and alcoholic drinks per day. The calculator demonstrated good accuracy with a cross-validated c statistic of 0.681 in men and 0.679 in women, and it seems to be well calibrated graphically. An electronic version of the calculator is available at http://rcalc.ccf.org.

Conclusion

This calculator seems to be accurate, is user friendly, and has been internally validated in a diverse population.",2014-01-01 +25889956,"Anti-inflammatory and protective investigations on the effects of Theranekron® ""an alcoholic extract of the Tarantula cubensis"" on wound healing of peritoneal in the rat: an in vivo comparative study.","

Background

The present study sought to investigate the effects of Tarantula cubensis extract (TC; Theranekron®) on the histopathological scores of peritoneal wound healing after laparotomy in the rats.

Methods

This study was designed to investigate the effects of Theranekron on the peritoneal wound healing after wound creation, on days9, 14, 19, 24 and 29 post-injury in rats. Twenty-four mature Wister-albino male rats were randomly divided into two groups. In the experimental group, TC was repeatedly injected subcutaneously (SC) over the lesion 9, 14,19 and 24 days after laparotomy, whereas the control group received only normal saline by subcutaneous injection and then the animal groups were euthanized9, 14, 19, 24, and 29 days after wounding respectively by intravenous injections of pentobarbital (50 mg/kg). Finally, assessment of the peritoneal wound healing between the groups was carried out by histopathologic data and statistical tests as Mann-Whitney U, Wilcoxon W and Z RESULTS: Histopathological examination indicated significant improvement in angiogenesis, re-epithelialization and less inflammatory response in comparison to control and also, revealed matured, compact and parallel deposition of collagen fibrils on day 29. So, at long term, treatment reduced the inflammation and increased the quality and rate of wound re- epithelialization compared to controls(P < 0.05). Furthermore, excluding the control group, rats exhibited the most pronounced effect on wound closure, with the statistically significant improvement in wound healing being seen at post-operative day 29. Moreover, collagen content on days 24 and 29 in the test group was found to be higher than in the healthy group. To warp up, treated groups had a significant increase in peritoneal wound healing area compared to the control group on all days (P < 0.05).

Conclusions

Our results suggested that Theranekron have delivered a novel therapeutic route for wound treatment in clinical practice.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2958770714954315 .",2015-04-02 +22343432,Meta-analysis of untargeted metabolomic data from multiple profiling experiments.,"metaXCMS is a software program for the analysis of liquid chromatography/mass spectrometry-based untargeted metabolomic data. It is designed to identify the differences between metabolic profiles across multiple sample groups (e.g., 'healthy' versus 'active disease' versus 'inactive disease'). Although performing pairwise comparisons alone can provide physiologically relevant data, these experiments often result in hundreds of differences, and comparison with additional biologically meaningful sample groups can allow for substantial data reduction. By performing second-order (meta-) analysis, metaXCMS facilitates the prioritization of interesting metabolite features from large untargeted metabolomic data sets before the rate-limiting step of structural identification. Here we provide a detailed step-by-step protocol for going from raw mass spectrometry data to metaXCMS results, visualized as Venn diagrams and exported Microsoft Excel spreadsheets. There is no upper limit to the number of sample groups or individual samples that can be compared with the software, and data from most commercial mass spectrometers are supported. The speed of the analysis depends on computational resources and data volume, but will generally be less than 1 d for most users. metaXCMS is freely available at http://metlin.scripps.edu/metaxcms/.",2012-02-16 +25305539,In-depth proteomic analysis of carp (Cyprinus carpio L) spermatozoa.,"Using a combination of protein fractionation by one-dimensional gel electrophoresis and high performance liquid chromatography-electrospray ionization tandem mass spectrometry, we identified 348 proteins in carp spermatozoa, most of which were for the first time identified in fish. Dynein, tubulin, HSP90, HSP70, HSP60, adenosylhomocysteinase, NKEF-B, brain type creatine kinase, mitochondrial ATP synthase, and valosin containing enzyme represent high abundance proteins in carp spermatozoa. These proteins are functionally related to sperm motility and energy production as well as the protection of sperm against oxidative injury and stress. Moreover, carp spermatozoa are equipped with functionally diverse proteins involved in signal transduction, transcription, translation, protein turnover and transport. About 15% of proteins from carp spermatozoa identified here were also detected in seminal plasma which may be a result of leakage from spermatozoa into seminal plasma, adsorption of seminal plasma proteins on spermatozoa surface, and expression in both spermatozoa and cells secreting seminal plasma proteins. The availability of a catalog of carp sperm proteins provides substantial advances for an understanding of sperm function and for future development of molecular diagnostic tests of carp sperm quality, the evaluation of which is currently limited to certain parameters such as sperm count, morphology and motility or viability. The mass spectrometry data are available at ProteomeXchange with the dataset identifier PXD000877 (DOI: http://dx.doi.org/10.6019/PXD000877).",2014-09-28 +24256100,A mouse protein that localizes to acrosome and sperm tail is regulated by Y-chromosome.,"

Background

Acrosomal proteins play crucial roles in the physiology of fertilization. Identification of proteins localizing to the acrosome is fundamental to the understanding of its contribution to fertilization. Novel proteins are still being reported from acrosome. In order to capture yet unreported proteins localizing to acrosome in particular and sperm in general, 2D-PAGE and mass spectrometry analysis of mouse sperm proteins was done.

Results

One of the protein spots identified in the above study was reported in the NCBI database as a hypothetical protein from Riken cDNA 1700026L06 that localizes to chromosome number 2. Immunofluorescence studies using the antibody raised in rabbit against the recombinant protein showed that it localized to mouse acrosome and sperm tail. Based on the localization of this protein, it has been named mouse acrosome and sperm tail protein (MAST, [Q7TPM5 (http://www.ncbi.nlm.nih.gov/protein/Q7TPM5)]). This protein shows 96% identity to the rat spermatid specific protein RSB66. Western blotting showed that MAST is expressed testis-specifically. Co-immunoprecipitation studies using the MAST antibody identified two calcium-binding proteins, caldendrin and calreticulin as interacting partners of MAST. Caldendrin and calreticulin genes localize to mouse chromosomes 5 and 8 respectively. In a Yq-deletion mutant mouse, that is subfertile and has a deletion of 2/3rd of the long arm of the Y chromosome, MAST failed to localize to the acrosome. Western blot analysis however, revealed equal expression of MAST in the testes of wild type and mutant mice. The acrosomal calcium-binding proteins present in the MAST IP-complex were upregulated in sperms of Yq-del mice.

Conclusions

We have identified a mouse acrosomal protein, MAST, that is expressed testis specifically. MAST does not contain any known motifs for protein interactions; yet it complexes with calcium-binding proteins localizing to the acrosome. The misexpression of all the proteins identified in a complex in the Yq-del mice invokes the hypothesis of a putative pathway regulated by the Y chromosome. The role of Y chromosome in the regulation of this complex is however not clear from the current study.",2013-11-20 +22846331,Estimation of sequencing error rates in short reads.,"

Background

Short-read data from next-generation sequencing technologies are now being generated across a range of research projects. The fidelity of this data can be affected by several factors and it is important to have simple and reliable approaches for monitoring it at the level of individual experiments.

Results

We developed a fast, scalable and accurate approach to estimating error rates in short reads, which has the added advantage of not requiring a reference genome. We build on the fundamental observation that there is a linear relationship between the copy number for a given read and the number of erroneous reads that differ from the read of interest by one or two bases. The slope of this relationship can be transformed to give an estimate of the error rate, both by read and by position. We present simulation studies as well as analyses of real data sets illustrating the precision and accuracy of this method, and we show that it is more accurate than alternatives that count the difference between the sample of interest and a reference genome. We show how this methodology led to the detection of mutations in the genome of the PhiX strain used for calibration of Illumina data. The proposed method is implemented in an R package, which can be downloaded from http://bcb.dfci.harvard.edu/∼vwang/shadowRegression.html.

Conclusions

The proposed method can be used to monitor the quality of sequencing pipelines at the level of individual experiments without the use of reference genomes. Furthermore, having an estimate of the error rates gives one the opportunity to improve analyses and inferences in many applications of next-generation sequencing data.",2012-07-30 +24756070,Investigating microRNA-target interaction-supported tissues in human cancer tissues based on miRNA and target gene expression profiling.,"

Unlabelled

Recent studies have revealed that a small non-coding RNA, microRNA (miRNA) down-regulates its mRNA targets. This effect is regarded as an important role in various biological processes. Many studies have been devoted to predicting miRNA-target interactions. These studies indicate that the interactions may only be functional in some specific tissues, which depend on the characteristics of an miRNA. No systematic methods have been established in the literature to investigate the correlation between miRNA-target interactions and tissue specificity through microarray data. In this study, we propose a method to investigate miRNA-target interaction-supported tissues, which is based on experimentally validated miRNA-target interactions. The tissue specificity results by our method are in accordance with the experimental results in the literature.

Availability and implementation

Our analysis results are available at http://tsmti.mbc.nctu.edu.tw/ and http://www.stat.nctu.edu.tw/hwang/tsmti.html.",2014-04-22 +24259684,BrassiBase: introduction to a novel knowledge database on Brassicaceae evolution.,"The Brassicaceae family (mustards or crucifers) includes Arabidopsis thaliana as one of the most important model species in plant biology and a number of important crop plants such as the various Brassica species (e.g. cabbage, canola and mustard). Moreover, the family comprises an increasing number of species that serve as study systems in many fields of plant science and evolutionary research. However, the systematics and taxonomy of the family are very complex and access to scientifically valuable and reliable information linked to species and genus names and its interpretation are often difficult. BrassiBase is a continuously developing and growing knowledge database (http://brassibase.cos.uni-heidelberg.de) that aims at providing direct access to many different types of information ranging from taxonomy and systematics to phylo- and cytogenetics. Providing critically revised key information, the database intends to optimize comparative evolutionary research in this family and supports the introduction of the Brassicaceae as the model family for evolutionary biology and plant sciences. Some features that should help to accomplish these goals within a comprehensive taxonomic framework have now been implemented in the new version 1.1.9. A 'Phylogenetic Placement Tool' should help to identify critical accessions and germplasm and provide a first visualization of phylogenetic relationships. The 'Cytogenetics Tool' provides in-depth information on genome sizes, chromosome numbers and polyploidy, and sets this information into a Brassicaceae-wide context.",2013-11-19 +24311580,LigSearch: a knowledge-based web server to identify likely ligands for a protein target.,"Identifying which ligands might bind to a protein before crystallization trials could provide a significant saving in time and resources. LigSearch, a web server aimed at predicting ligands that might bind to and stabilize a given protein, has been developed. Using a protein sequence and/or structure, the system searches against a variety of databases, combining available knowledge, and provides a clustered and ranked output of possible ligands. LigSearch can be accessed at http://www.ebi.ac.uk/thornton-srv/databases/LigSearch.",2013-11-19 +24164321,Developmental biology of Streptomyces from the perspective of 100 actinobacterial genome sequences.,"To illuminate the evolution and mechanisms of actinobacterial complexity, we evaluate the distribution and origins of known Streptomyces developmental genes and the developmental significance of actinobacteria-specific genes. As an aid, we developed the Actinoblast database of reciprocal blastp best hits between the Streptomyces coelicolor genome and more than 100 other actinobacterial genomes (http://streptomyces.org.uk/actinoblast/). We suggest that the emergence of morphological complexity was underpinned by special features of early actinobacteria, such as polar growth and the coupled participation of regulatory Wbl proteins and the redox-protecting thiol mycothiol in transducing a transient nitric oxide signal generated during physiologically stressful growth transitions. It seems that some cell growth and division proteins of early actinobacteria have acquired greater importance for sporulation of complex actinobacteria than for mycelial growth, in which septa are infrequent and not associated with complete cell separation. The acquisition of extracellular proteins with structural roles, a highly regulated extracellular protease cascade, and additional regulatory genes allowed early actinobacterial stationary phase processes to be redeployed in the emergence of aerial hyphae from mycelial mats and in the formation of spore chains. These extracellular proteins may have contributed to speciation. Simpler members of morphologically diverse clades have lost some developmental genes.",2013-11-19 +25725498,L-GRAAL: Lagrangian graphlet-based network aligner.,"

Motivation

Discovering and understanding patterns in networks of protein-protein interactions (PPIs) is a central problem in systems biology. Alignments between these networks aid functional understanding as they uncover important information, such as evolutionary conserved pathways, protein complexes and functional orthologs. A few methods have been proposed for global PPI network alignments, but because of NP-completeness of underlying sub-graph isomorphism problem, producing topologically and biologically accurate alignments remains a challenge.

Results

We introduce a novel global network alignment tool, Lagrangian GRAphlet-based ALigner (L-GRAAL), which directly optimizes both the protein and the interaction functional conservations, using a novel alignment search heuristic based on integer programming and Lagrangian relaxation. We compare L-GRAAL with the state-of-the-art network aligners on the largest available PPI networks from BioGRID and observe that L-GRAAL uncovers the largest common sub-graphs between the networks, as measured by edge-correctness and symmetric sub-structures scores, which allow transferring more functional information across networks. We assess the biological quality of the protein mappings using the semantic similarity of their Gene Ontology annotations and observe that L-GRAAL best uncovers functionally conserved proteins. Furthermore, we introduce for the first time a measure of the semantic similarity of the mapped interactions and show that L-GRAAL also uncovers best functionally conserved interactions. In addition, we illustrate on the PPI networks of baker's yeast and human the ability of L-GRAAL to predict new PPIs. Finally, L-GRAAL's results are the first to show that topological information is more important than sequence information for uncovering functionally conserved interactions.

Availability and implementation

L-GRAAL is coded in C++. Software is available at: http://bio-nets.doc.ic.ac.uk/L-GRAAL/.

Contact

n.malod-dognin@imperial.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-02-28 +24758252,PTRcombiner: mining combinatorial regulation of gene expression from post-transcriptional interaction maps.,"

Background

The progress in mapping RNA-protein and RNA-RNA interactions at the transcriptome-wide level paves the way to decipher possible combinatorial patterns embedded in post-transcriptional regulation of gene expression.

Results

Here we propose an innovative computational tool to extract clusters of mRNA trans-acting co-regulators (RNA binding proteins and non-coding RNAs) from pairwise interaction annotations. In addition the tool allows to analyze the binding site similarity of co-regulators belonging to the same cluster, given their positional binding information. The tool has been tested on experimental collections of human and yeast interactions, identifying modules that coordinate functionally related messages.

Conclusions

This tool is an original attempt to uncover combinatorial patterns using all the post-transcriptional interaction data available so far. PTRcombiner is available at http://disi.unitn.it/~passerini/software/PTRcombiner/.",2014-04-23 +23269662,ATARiS: computational quantification of gene suppression phenotypes from multisample RNAi screens.,"Genome-scale RNAi libraries enable the systematic interrogation of gene function. However, the interpretation of RNAi screens is complicated by the observation that RNAi reagents designed to suppress the mRNA transcripts of the same gene often produce a spectrum of phenotypic outcomes due to differential on-target gene suppression or perturbation of off-target transcripts. Here we present a computational method, Analytic Technique for Assessment of RNAi by Similarity (ATARiS), that takes advantage of patterns in RNAi data across multiple samples in order to enrich for RNAi reagents whose phenotypic effects relate to suppression of their intended targets. By summarizing only such reagent effects for each gene, ATARiS produces quantitative, gene-level phenotype values, which provide an intuitive measure of the effect of gene suppression in each sample. This method is robust for data sets that contain as few as 10 samples and can be used to analyze screens of any number of targeted genes. We used this analytic approach to interrogate RNAi data derived from screening more than 100 human cancer cell lines and identified HNF1B as a transforming oncogene required for the survival of cancer cells that harbor HNF1B amplifications. ATARiS is publicly available at http://broadinstitute.org/ataris.",2012-12-26 +24902906,Rhinoplasty as a medicalized phenomenon: a 25-center survey on quality of life before and after cosmetic rhinoplasty.,"

Background

Cosmetic surgery, including rhinoplasty, has been dramatically increasing in Iran through the last two decades. It is performed mainly on the youth for the purpose of beauty, an area not directly related to medicine but strongly medicalized. This study aimed to explore the effects of rhinoplasty on the quality of life experienced by individuals who have undergone the surgery.

Methods

From all the plastic surgery clinics in Teheran, 25 were selected randomly as sites for the study. In the next step, 110 patients who had undergone rhinoplasty were selected randomly from these clinics. Only patients whose surgery had been performed 3 to 18 months before the interview were included in the statistical population. Data were collected through a Likert-type questionnaire that queries three major quality-of-life dimensions: general benefit, social support, and physical health. The collected data were analyzed by SPSS.

Results

The mean scores for quality of life before and after surgery were 66.54 and 61.11, respectively. The difference was statistically significant (P = 0.008), showing a decline in quality of life after rhinoplasty. Whereas the mean score for social support and physical health decreased, the score for general benefit increased after the surgery. The main motivating factors for surgery were external factors (e.g., friends, family). In addition, the date of the surgery and the time of the interview were positively correlated. In other words, the longer the time since surgery, the greater was the increase in the patients' satisfaction and quality of life.

Conclusions

The overall quality of life among the statistical population decreased. This could be attributed to unnecessary surgeries, medical errors, and performance of rhinoplasty because of its recognized popularity. The reduction in social support may have resulted from unacceptable consequences of rhinoplasty, particularly in terms of appearance, and reactions of family and peer group. The physical health of the respondents was negatively affected by rhinoplasty. Malfunction of the upper respiratory system after rhinoplasty is a known main reason for the negative effect of rhinoplasty on the quality of life. Correlations between the times of the surgery and the interview suggest a long-term rather than an immediate assessment of effects that such surgeries have on the quality of life.

Level of evidence v

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266.",2014-06-06 +23614162,"Communicable diseases report, NSW, September and October 2012.","For updated information, including data and facts on specific diseases, visit www.health.nsw.gov.au and click on Public Health and then Infectious Diseases. The communicable diseases site is available at: http://www.health.nsw.gov.au/publichealth/infectious/index.asp.",2013-03-01 +23587322,AXIOME: automated exploration of microbial diversity.,"

Background

Although high-throughput sequencing of small subunit rRNA genes has revolutionized our understanding of microbial ecosystems, these technologies generate data at depths that benefit from automated analysis. Here we present AXIOME (Automation, eXtension, and Integration Of Microbial Ecology), a highly flexible and extensible management tool for popular microbial ecology analysis packages that promotes reproducibility and customization in microbial research.

Findings

AXIOME streamlines and manages analysis of small subunit (SSU) rRNA marker data in QIIME and mothur. AXIOME also implements features including the PAired-eND Assembler for Illumina sequences (PANDAseq), non-negative matrix factorization (NMF), multi-response permutation procedures (MRPP), exploring and recovering phylogenetic novelty (SSUnique) and indicator species analysis. AXIOME has a companion graphical user interface (GUI) and is designed to be easily extended to facilitate customized research workflows.

Conclusions

AXIOME is an actively developed, open source project written in Vala and available from GitHub (http://neufeld.github.com/axiome) and as a Debian package. Axiometic, a GUI companion tool is also freely available (http://neufeld.github.com/axiometic). Given that data analysis has become an important bottleneck for microbial ecology studies, the development of user-friendly computational tools remains a high priority. AXIOME represents an important step in this direction by automating multi-step bioinformatic analyses and enabling the customization of procedures to suit the diverse research needs of the microbial ecology community.",2013-03-13 +22651224,"CORNET 2.0: integrating plant coexpression, protein-protein interactions, regulatory interactions, gene associations and functional annotations.","To enable easy access and interpretation of heterogeneous and scattered data, we have developed a user-friendly tool for data mining and integration in Arabidopsis, named CORNET. This tool allows the browsing of microarray data, the construction of coexpression and protein-protein interaction (PPI) networks and the exploration of diverse functional annotations. Here, we present the new functionalities of CORNET 2.0 for data integration in plants. First of all, CORNET allows the integration of regulatory interaction datasets accessible through the new transcription factor (TF) tool that can be used in combination with the coexpression tool or the PPI tool. In addition, we have extended the PPI tool to enable the analysis of gene-gene associations from AraNet as well as newly identified PPIs. Different search options are implemented to enable the construction of networks centered around multiple input genes or proteins. New functional annotation resources are included to retrieve relevant literature, phenotypes, plant ontology and biological pathways. We have also extended CORNET to attain the construction of coexpression and PPI networks in the crop species maize. Networks and associated evidence of the majority of currently available data types are visualized in Cytoscape. CORNET is available at https://bioinformatics.psb.ugent.be/cornet.",2012-05-31 +24273246,Sequence alignment visualization in HTML5 without Java.,"

Motivation

Java has been extensively used for the visualization of biological data in the web. However, the Java runtime environment is an additional layer of software with an own set of technical problems and security risks. HTML in its new version 5 provides features that for some tasks may render Java unnecessary.

Results

Alignment-To-HTML is the first HTML-based interactive visualization for annotated multiple sequence alignments. The server side script interpreter can perform all tasks like (i) sequence retrieval, (ii) alignment computation, (iii) rendering, (iv) identification of a homologous structural models and (v) communication with BioDAS-servers. The rendered alignment can be included in web pages and is displayed in all browsers on all platforms including touch screen tablets. The functionality of the user interface is similar to legacy Java applets and includes color schemes, highlighting of conserved and variable alignment positions, row reordering by drag and drop, interlinked 3D visualization and sequence groups. Novel features are (i) support for multiple overlapping residue annotations, such as chemical modifications, single nucleotide polymorphisms and mutations, (ii) mechanisms to quickly hide residue annotations, (iii) export to MS-Word and (iv) sequence icons.

Conclusion

Alignment-To-HTML, the first interactive alignment visualization that runs in web browsers without additional software, confirms that to some extend HTML5 is already sufficient to display complex biological data. The low speed at which programs are executed in browsers is still the main obstacle. Nevertheless, we envision an increased use of HTML and JavaScript for interactive biological software.

Availability and implementation

Under GPL at: http://www.bioinformatics.org/strap/toHTML/.",2013-11-21 +23940838,Kinome Render: a stand-alone and web-accessible tool to annotate the human protein kinome tree.,"Human protein kinases play fundamental roles mediating the majority of signal transduction pathways in eukaryotic cells as well as a multitude of other processes involved in metabolism, cell-cycle regulation, cellular shape, motility, differentiation and apoptosis. The human protein kinome contains 518 members. Most studies that focus on the human kinome require, at some point, the visualization of large amounts of data. The visualization of such data within the framework of a phylogenetic tree may help identify key relationships between different protein kinases in view of their evolutionary distance and the information used to annotate the kinome tree. For example, studies that focus on the promiscuity of kinase inhibitors can benefit from the annotations to depict binding affinities across kinase groups. Images involving the mapping of information into the kinome tree are common. However, producing such figures manually can be a long arduous process prone to errors. To circumvent this issue, we have developed a web-based tool called Kinome Render (KR) that produces customized annotations on the human kinome tree. KR allows the creation and automatic overlay of customizable text or shape-based annotations of different sizes and colors on the human kinome tree. The web interface can be accessed at: http://bcb.med.usherbrooke.ca/kinomerender. A stand-alone version is also available and can be run locally.",2013-08-08 +23640332,Memoir: template-based structure prediction for membrane proteins.,"Membrane proteins are estimated to be the targets of 50% of drugs that are currently in development, yet we have few membrane protein crystal structures. As a result, for a membrane protein of interest, the much-needed structural information usually comes from a homology model. Current homology modelling software is optimized for globular proteins, and ignores the constraints that the membrane is known to place on protein structure. Our Memoir server produces homology models using alignment and coordinate generation software that has been designed specifically for transmembrane proteins. Memoir is easy to use, with the only inputs being a structural template and the sequence that is to be modelled. We provide a video tutorial and a guide to assessing model quality. Supporting data aid manual refinement of the models. These data include a set of alternative conformations for each modelled loop, and a multiple sequence alignment that incorporates the query and template. Memoir works with both α-helical and β-barrel types of membrane proteins and is freely available at http://opig.stats.ox.ac.uk/webapps/memoir.",2013-05-02 +22121219,MitoMiner: a data warehouse for mitochondrial proteomics data.,"MitoMiner (http://mitominer.mrc-mbu.cam.ac.uk/) is a data warehouse for the storage and analysis of mitochondrial proteomics data gathered from publications of mass spectrometry and green fluorescent protein tagging studies. In MitoMiner, these data are integrated with data from UniProt, Gene Ontology, Online Mendelian Inheritance in Man, HomoloGene, Kyoto Encyclopaedia of Genes and Genomes and PubMed. The latest release of MitoMiner stores proteomics data sets from 46 studies covering 11 different species from eumetazoa, viridiplantae, fungi and protista. MitoMiner is implemented by using the open source InterMine data warehouse system, which provides a user interface allowing users to upload data for analysis, personal accounts to store queries and results and enables queries of any data in the data model. MitoMiner also provides lists of proteins for use in analyses, including the new MitoMiner mitochondrial proteome reference sets that specify proteins with substantial experimental evidence for mitochondrial localization. As further mitochondrial proteomics data sets from normal and diseased tissue are published, MitoMiner can be used to characterize the variability of the mitochondrial proteome between tissues and investigate how changes in the proteome may contribute to mitochondrial dysfunction and mitochondrial-associated diseases such as cancer, neurodegenerative diseases, obesity, diabetes, heart failure and the ageing process.",2011-11-24 +24849626,The inhibitory microcircuit of the substantia nigra provides feedback gain control of the basal ganglia output.,"Dysfunction of the basal ganglia produces severe deficits in the timing, initiation, and vigor of movement. These diverse impairments suggest a control system gone awry. In engineered systems, feedback is critical for control. By contrast, models of the basal ganglia highlight feedforward circuitry and ignore intrinsic feedback circuits. In this study, we show that feedback via axon collaterals of substantia nigra projection neurons control the gain of the basal ganglia output. Through a combination of physiology, optogenetics, anatomy, and circuit mapping, we elaborate a general circuit mechanism for gain control in a microcircuit lacking interneurons. Our data suggest that diverse tonic firing rates, weak unitary connections and a spatially diffuse collateral circuit with distinct topography and kinetics from feedforward input is sufficient to implement divisive feedback inhibition. The importance of feedback for engineered systems implies that the intranigral microcircuit, despite its absence from canonical models, could be essential to basal ganglia function. DOI: http://dx.doi.org/10.7554/eLife.02397.001.",2014-05-21 +24642063,Prokka: rapid prokaryotic genome annotation.,"

Unlabelled

The multiplex capability and high yield of current day DNA-sequencing instruments has made bacterial whole genome sequencing a routine affair. The subsequent de novo assembly of reads into contigs has been well addressed. The final step of annotating all relevant genomic features on those contigs can be achieved slowly using existing web- and email-based systems, but these are not applicable for sensitive data or integrating into computational pipelines. Here we introduce Prokka, a command line software tool to fully annotate a draft bacterial genome in about 10 min on a typical desktop computer. It produces standards-compliant output files for further analysis or viewing in genome browsers.

Availability and implementation

Prokka is implemented in Perl and is freely available under an open source GPLv2 license from http://vicbioinformatics.com/.",2014-03-18 +24243843,"DEG 10, an update of the database of essential genes that includes both protein-coding genes and noncoding genomic elements.","The combination of high-density transposon-mediated mutagenesis and high-throughput sequencing has led to significant advancements in research on essential genes, resulting in a dramatic increase in the number of identified prokaryotic essential genes under diverse conditions and a revised essential-gene concept that includes all essential genomic elements, rather than focusing on protein-coding genes only. DEG 10, a new release of the Database of Essential Genes (available at http://www.essentialgene.org), has been developed to accommodate these quantitative and qualitative advancements. In addition to increasing the number of bacterial and archaeal essential genes determined by genome-wide gene essentiality screens, DEG 10 also harbors essential noncoding RNAs, promoters, regulatory sequences and replication origins. These essential genomic elements are determined not only in vitro, but also in vivo, under diverse conditions including those for survival, pathogenesis and antibiotic resistance. We have developed customizable BLAST tools that allow users to perform species- and experiment-specific BLAST searches for a single gene, a list of genes, annotated or unannotated genomes. Therefore, DEG 10 includes essential genomic elements under different conditions in three domains of life, with customizable BLAST tools.",2013-11-15 +24243842,LPSN--list of prokaryotic names with standing in nomenclature.,"The List of Prokaryotic Names with Standing in Nomenclature (LPSN; http://www.bacterio.net) is a database that lists the names of prokaryotes (Bacteria and Archaea) that have been validly published in the International Journal of Systematic and Evolutionary Microbiology directly or by inclusion in a Validation List, under the Rules of International Code of Nomenclature of Bacteria. Currently there are 15 974 taxa listed. In addition, LPSN has an up-to-date classification of prokaryotes and information on prokaryotic nomenclature and culture collections.",2013-11-15 +23981296,Three-dimensional chemical mapping by EFTEM-TomoJ including improvement of SNR by PCA and ART reconstruction of volume by noise suppression.,"Electron tomography is becoming one of the most used methods for structural analysis at nanometric scale in biological and materials sciences. Combined with chemical mapping, it provides qualitative and semiquantitative information on the distribution of chemical elements on a given sample. Due to the current difficulties in obtaining three-dimensional (3D) maps by energy-filtered transmission electron microscopy (EFTEM), the use of 3D chemical mapping has not been widely adopted by the electron microscopy community. The lack of specialized software further complicates the issue, especially in the case of data with a low signal-to-noise ratio (SNR). Moreover, data interpretation is rendered difficult by the absence of efficient segmentation tools. Thus, specialized software for the computation of 3D maps by EFTEM needs to include optimized methods for image series alignment, algorithms to improve SNR, different background subtraction models, and methods to facilitate map segmentation. Here we present a software package (EFTEM-TomoJ, which can be downloaded from http://u759.curie.fr/fr/download/softwares/EFTEM-TomoJ), specifically dedicated to computation of EFTEM 3D chemical maps including noise filtering by image reconstitution based on multivariate statistical analysis. We also present an algorithm named BgART (for background removing algebraic reconstruction technique) allowing the discrimination between background and signal and improving the reconstructed volume in an iterative way.",2013-08-28 +24229347,"NeuroGeM, a knowledgebase of genetic modifiers in neurodegenerative diseases.","

Background

Neurodegenerative diseases (NDs) are characterized by the progressive loss of neurons in the human brain. Although the majority of NDs are sporadic, evidence is accumulating that they have a strong genetic component. Therefore, significant efforts have been made in recent years to not only identify disease-causing genes but also genes that modify the severity of NDs, so-called genetic modifiers. To date there exists no compendium that lists and cross-links genetic modifiers of different NDs.

Description

In order to address this need, we present NeuroGeM, the first comprehensive knowledgebase providing integrated information on genetic modifiers of nine different NDs in the model organisms D. melanogaster, C. elegans, and S. cerevisiae. NeuroGeM cross-links curated genetic modifier information from the different NDs and provides details on experimental conditions used for modifier identification, functional annotations, links to homologous proteins and color-coded protein-protein interaction networks to visualize modifier interactions. We demonstrate how this database can be used to generate new understanding through meta-analysis. For instance, we reveal that the Drosophila genes DnaJ-1, thread, Atx2, and mub are generic modifiers that affect multiple if not all NDs.

Conclusion

As the first compendium of genetic modifiers, NeuroGeM will assist experimental and computational scientists in their search for the pathophysiological mechanisms underlying NDs. http://chibi.ubc.ca/neurogem.",2013-11-14 +24239490,An evidence-based clinical guideline for the diagnosis and treatment of lumbar disc herniation with radiculopathy.,"

Background context

The objective of the North American Spine Society's (NASS) Evidence-Based Clinical Guideline for the Diagnosis and Treatment of Lumbar Disc Herniation with Radiculopathy is to provide evidence-based recommendations to address key clinical questions surrounding the diagnosis and treatment of lumbar disc herniation with radiculopathy. The guideline is intended to reflect contemporary treatment concepts for symptomatic lumbar disc herniation with radiculopathy as reflected in the highest quality clinical literature available on this subject as of July 2011. The goals of the guideline recommendations are to assist in delivering optimum efficacious treatment and functional recovery from this spinal disorder.

Purpose

To provide an evidence-based educational tool to assist spine specialists in the diagnosis and treatment of lumbar disc herniation with radiculopathy.

Study design

Systematic review and evidence-based clinical guideline.

Methods

This guideline is a product of the Lumbar Disc Herniation with Radiculopathy Work Group of NASS' Evidence-Based Guideline Development Committee. The work group consisted of multidisciplinary spine care specialists trained in the principles of evidence-based analysis. A literature search addressing each question and using a specific search protocol was performed on English-language references found in Medline, Embase (Drugs and Pharmacology), and four additional evidence-based databases to identify articles. The relevant literature was then independently rated using the NASS-adopted standardized levels of evidence. An evidentiary table was created for each of the questions. Final recommendations to answer each clinical question were developed via work group discussion, and grades were assigned to the recommendations using standardized grades of recommendation. In the absence of Level I to IV evidence, work group consensus statements have been developed using a modified nominal group technique, and these statements are clearly identified as such in the guideline.

Results

Twenty-nine clinical questions were formulated and addressed, and the answers are summarized in this article. The respective recommendations were graded by strength of the supporting literature, which was stratified by levels of evidence.

Conclusions

The clinical guideline has been created using the techniques of evidence-based medicine and best available evidence to aid practitioners in the care of patients with symptomatic lumbar disc herniation with radiculopathy. The entire guideline document, including the evidentiary tables, suggestions for future research, and all the references, is available electronically on the NASS Web site at http://www.spine.org/Pages/PracticePolicy/ClinicalCare/ClinicalGuidlines/Default.aspx and will remain updated on a timely schedule.",2013-11-14 +22595208,Robust identification of transcriptional regulatory networks using a Gibbs sampler on outlier sum statistic.,"

Motivation

Identification of transcriptional regulatory networks (TRNs) is of significant importance in computational biology for cancer research, providing a critical building block to unravel disease pathways. However, existing methods for TRN identification suffer from the inclusion of excessive 'noise' in microarray data and false-positives in binding data, especially when applied to human tumor-derived cell line studies. More robust methods that can counteract the imperfection of data sources are therefore needed for reliable identification of TRNs in this context.

Results

In this article, we propose to establish a link between the quality of one target gene to represent its regulator and the uncertainty of its expression to represent other target genes. Specifically, an outlier sum statistic was used to measure the aggregated evidence for regulation events between target genes and their corresponding transcription factors. A Gibbs sampling method was then developed to estimate the marginal distribution of the outlier sum statistic, hence, to uncover underlying regulatory relationships. To evaluate the effectiveness of our proposed method, we compared its performance with that of an existing sampling-based method using both simulation data and yeast cell cycle data. The experimental results show that our method consistently outperforms the competing method in different settings of signal-to-noise ratio and network topology, indicating its robustness for biological applications. Finally, we applied our method to breast cancer cell line data and demonstrated its ability to extract biologically meaningful regulatory modules related to estrogen signaling and action in breast cancer.

Availability and implementation

The Gibbs sampler MATLAB package is freely available at http://www.cbil.ece.vt.edu/software.htm.

Contact

xuan@vt.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-05-17 +24842874,T cell-intrinsic role of IL-6 signaling in primary and memory responses.,"Innate immune recognition is critical for the induction of adaptive immune responses; however the underlying mechanisms remain incompletely understood. In this study, we demonstrate that T cell-specific deletion of the IL-6 receptor α chain (IL-6Rα) results in impaired Th1 and Th17 T cell responses in vivo, and a defect in Tfh function. Depletion of Tregs in these mice rescued the Th1 but not the Th17 response. Our data suggest that IL-6 signaling in effector T cells is required to overcome Treg-mediated suppression in vivo. We show that IL-6 cooperates with IL-1β to block the suppressive effect of Tregs on CD4(+) T cells, at least in part by controlling their responsiveness to IL-2. In addition, although IL-6Rα-deficient T cells mount normal primary Th1 responses in the absence of Tregs, they fail to mature into functional memory cells, demonstrating a key role for IL-6 in CD4(+) T cell memory formation.DOI: http://dx.doi.org/10.7554/eLife.01949.001.",2014-05-19 +24225386,[COMMODE] a large-scale database of molecular descriptors using compounds from PubChem.,"

Background

Molecular descriptors have been extensively used in the field of structure-oriented drug design and structural chemistry. They have been applied in QSPR and QSAR models to predict ADME-Tox properties, which specify essential features for drugs. Molecular descriptors capture chemical and structural information, but investigating their interpretation and meaning remains very challenging.

Results

This paper introduces a large-scale database of molecular descriptors called COMMODE containing more than 25 million compounds originated from PubChem. About 2500 DRAGON-descriptors have been calculated for all compounds and integrated into this database, which is accessible through a web interface at http://commode.i-med.ac.at.",2013-11-13 +24227675,PhosphoNetworks: a database for human phosphorylation networks.,"

Summary

Phosphorylation plays an important role in cellular signal transduction. Current phosphorylation-related databases often focus on the phosphorylation sites, which are mainly determined by mass spectrometry. Here, we present PhosphoNetworks, a phosphorylation database built on a high-resolution map of phosphorylation networks. This high-resolution map of phosphorylation networks provides not only the kinase-substrate relationships (KSRs), but also the specific phosphorylation sites on which the kinases act on the substrates. The database contains the most comprehensive dataset for KSRs, including the relationships from a recent high-throughput project for identification of KSRs using protein microarrays, as well as known KSRs curated from the literature. In addition, the database also includes several analytical tools for dissecting phosphorylation networks. PhosphoNetworks is expected to play a prominent role in proteomics and phosphorylation-related disease research.

Availability and implementation

http://www.phosphonetworks.org",2013-11-13 +24930141,Cloud4Psi: cloud computing for 3D protein structure similarity searching.,"

Summary

Popular methods for 3D protein structure similarity searching, especially those that generate high-quality alignments such as Combinatorial Extension (CE) and Flexible structure Alignment by Chaining Aligned fragment pairs allowing Twists (FATCAT) are still time consuming. As a consequence, performing similarity searching against large repositories of structural data requires increased computational resources that are not always available. Cloud computing provides huge amounts of computational power that can be provisioned on a pay-as-you-go basis. We have developed the cloud-based system that allows scaling of the similarity searching process vertically and horizontally. Cloud4Psi (Cloud for Protein Similarity) was tested in the Microsoft Azure cloud environment and provided good, almost linearly proportional acceleration when scaled out onto many computational units.

Availability and implementation

Cloud4Psi is available as Software as a Service for testing purposes at: http://cloud4psi.cloudapp.net/. For source code and software availability, please visit the Cloud4Psi project home page at http://zti.polsl.pl/dmrozek/science/cloud4psi.htm.",2014-06-14 +23846748,Modeling nucleosome position distributions from experimental nucleosome positioning maps.,"

Motivation

Recent experimental advancements allow determining positions of nucleosomes for complete genomes. However, the resulting nucleosome occupancy maps are averages of heterogeneous cell populations. Accordingly, they represent a snapshot of a dynamic ensemble at a single time point with an overlay of many configurations from different cells. To study the organization of nucleosomes along the genome and to understand the mechanisms of nucleosome translocation, it is necessary to retrieve features of specific conformations from the population average.

Results

Here, we present a method for identifying non-overlapping nucleosome configurations that combines binary-variable analysis and a Monte Carlo approach with a simulated annealing scheme. In this manner, we obtain specific nucleosome configurations and optimized solutions for the complex positioning patterns from experimental data. We apply the method to compare nucleosome positioning at transcription factor binding sites in different mouse cell types. Our method can model nucleosome translocations at regulatory genomic elements and generate configurations for simulations of the spatial folding of the nucleosome chain.

Availability

Source code, precompiled binaries, test data and a web-based test installation are freely available at http://bioinformatics.fh-stralsund.de/nucpos/",2013-07-11 +24225317,The transporter classification database.,"The Transporter Classification Database (TCDB; http://www.tcdb.org) serves as a common reference point for transport protein research. The database contains more than 10,000 non-redundant proteins that represent all currently recognized families of transmembrane molecular transport systems. Proteins in TCDB are organized in a five level hierarchical system, where the first two levels are the class and subclass, the second two are the family and subfamily, and the last one is the transport system. Superfamilies that contain multiple families are included as hyperlinks to the five tier TC hierarchy. TCDB includes proteins from all types of living organisms and is the only transporter classification system that is both universal and recognized by the International Union of Biochemistry and Molecular Biology. It has been expanded by manual curation, contains extensive text descriptions providing structural, functional, mechanistic and evolutionary information, is supported by unique software and is interconnected to many other relevant databases. TCDB is of increasing usefulness to the international scientific community and can serve as a model for the expansion of database technologies. This manuscript describes an update of the database descriptions previously featured in NAR database issues.",2013-11-12 +24265686,"PANADA: protein association network annotation, determination and analysis.","Increasingly large numbers of proteins require methods for functional annotation. This is typically based on pairwise inference from the homology of either protein sequence or structure. Recently, similarity networks have been presented to leverage both the ability to visualize relationships between proteins and assess the transferability of functional inference. Here we present PANADA, a novel toolkit for the visualization and analysis of protein similarity networks in Cytoscape. Networks can be constructed based on pairwise sequence or structural alignments either on a set of proteins or, alternatively, by database search from a single sequence. The Panada web server, executable for download and examples and extensive help files are available at URL: http://protein.bio.unipd.it/panada/.",2013-11-12 +23840333,Vitamin D3 receptor ( VDR ) gene rs2228570 (Fok1) and rs731236 (Taq1) variants are not associated with the risk for multiple sclerosis: results of a new study and a meta-analysis.,"

Background

Some epidemiological, genetic, and experimental data suggest a possible role of vitamin D in the pathogenesis of multiple sclerosis (MS) and in experimental autoimmune encephalomyelitis. Data on the possible contribution of several single nucleotide polymorphisms (SNP) in the vitamin D receptor (VDR) gene to the risk for MS are controversial. Several studies suggested an interaction between some SNPs in the VDR gene and HLADRB1*1501 in the risk for MS.

Objectives

The aim of this study was to investigate a possible influence of the SNPs rs2228570 and rs731236 in the VDR gene in the risk for MS. A secondary objective was to address the possible interactions between VDR genes and HLADRB1*1501.

Methods

We analyzed the allelic and genotype frequency of VDR rs2228570, rs731236, and HLADRB1*1501 (rs3135388) in 303 patients with MS and 310 healthy controls, using TaqMan Assays. We also conducted a meta-analysis, that was carried out by using the software Meta-Disc 1.1.1 (http://www.hrc.es/investigacion/metadisc.html; Unit of Clinical Statistics, Hospital Ramón y Cajal, Madrid, Spain). Heterogeneity between studies in terms of degree of association was tested using the Q-statistic.

Results

VDR rs2228570 and rs731236 allelic and genotype frequencies did not differ significantly between MS patients and controls, and were unrelated with the age of onset of MS, gender, and course of MS. HLADRB1*1501 showed a high association with the risk of developing MS 4.76(95% C.I.  = 3.14-7.27; p<0.0001). The meta-analysis, after excluding data of one study that was responsible of heterogeneity for rs731236 polymorphism, showed lack of relation of both SNPs with the risk for MS. HLADRB1*1501 showed lack of interaction with VDR rs2228570 and rs731236 in increasing MS risk.

Conclusions

These results suggest that VDR rs2228570 and rs731236 polymorphisms are not related with the risk for MS, and did not confirm interaction between these VDR SNPs and HLADRB1 in the risk for MS.",2013-06-20 +23574736,OKVAR-Boost: a novel boosting algorithm to infer nonlinear dynamics and interactions in gene regulatory networks.,"

Motivation

Reverse engineering of gene regulatory networks remains a central challenge in computational systems biology, despite recent advances facilitated by benchmark in silico challenges that have aided in calibrating their performance. A number of approaches using either perturbation (knock-out) or wild-type time-series data have appeared in the literature addressing this problem, with the latter using linear temporal models. Nonlinear dynamical models are particularly appropriate for this inference task, given the generation mechanism of the time-series data. In this study, we introduce a novel nonlinear autoregressive model based on operator-valued kernels that simultaneously learns the model parameters, as well as the network structure.

Results

A flexible boosting algorithm (OKVAR-Boost) that shares features from L2-boosting and randomization-based algorithms is developed to perform the tasks of parameter learning and network inference for the proposed model. Specifically, at each boosting iteration, a regularized Operator-valued Kernel-based Vector AutoRegressive model (OKVAR) is trained on a random subnetwork. The final model consists of an ensemble of such models. The empirical estimation of the ensemble model's Jacobian matrix provides an estimation of the network structure. The performance of the proposed algorithm is first evaluated on a number of benchmark datasets from the DREAM3 challenge and then on real datasets related to the In vivo Reverse-Engineering and Modeling Assessment (IRMA) and T-cell networks. The high-quality results obtained strongly indicate that it outperforms existing approaches.

Availability

The OKVAR-Boost Matlab code is available as the archive: http://amis-group.fr/sourcecode-okvar-boost/OKVARBoost-v1.0.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-10 +24706045,MicroRNAs in the development and pathobiology of uterine leiomyomata: does evidence support future strategies for clinical intervention?,"

Background

Human leiomyomata (fibroids) are benign tumors of the uterus, represent the most common neoplasms of reproductive-aged women and have a prevalence of ∼70% in the general population. This disorder conveys a significant degree of morbidity and remains the leading indication for hysterectomy in the USA. Prior investigations of aberrant microRNA (miRNA) expression in various malignancies have provided invaluable insight into the role of this class of small non-coding RNAs in tumor growth. Evidence of irregular miRNA expression in uterine fibroids has garnered recent interest for diagnostic and therapeutic applications. Since miRNA gene targets modulate several processes implicated in the genesis of uterine fibroids, more focused investigation has the potential to elucidate the functional significance of miRNA in the genesis and pathology of the disease.

Methods

Comprehensive electronic searches of peer reviewed published literature in PubMed (US National Library of Medicine, National Institute of Health; http://www.ncbi.nlm.nih.gov/pubmed/) were performed for content related to the biologic functions of miRNA, the roles of miRNA in human disease and studies investigating miRNA in the context of uterine leiomyomata. Herein, this article will review the current evidence supporting the use of miRNA expression profiling as an investigative tool to assess the pathobiology of uterine fibroids and will discuss potential future applications of miRNAs as biomarkers and therapeutic targets.

Results

Mounting evidence supports a functional role for miRNA as either indirect or direct regulators of gene expression which impacts the pathobiology of uterine fibroids. Specifically, miRNAs let-7, 200a, 200c, 93, 106b and 21 have been implicated in cellular proliferation, apoptosis, extracellular matrix turnover, angiogenesis and inflammation. Preliminary data provide evidence to suggest that respective in vitro miRNA expression in leiomyomata and myometrium is regulated by sex steroids.

Conclusions

Collectively, the identification of aberrantly expressed miRNAs in uterine leiomyomata and accumulating data derived from mining of gene target prediction models and recent functional studies support the concept that miRNAs might impact the genesis and progression of disease. However, the specific biologic functions of differential miRNA expression have yet to be confirmed in vivo. Further functional studies and developing miRNA technology may provide the basis for future applications of miRNAs in clinical medicine as biomarkers and therapeutic targets.",2014-04-04 +24217916,NPInter v2.0: an updated database of ncRNA interactions.,"NPInter (http://www.bioinfo.org/NPInter) is a database that integrates experimentally verified functional interactions between noncoding RNAs (excluding tRNAs and rRNAs) and other biomolecules (proteins, RNAs and genomic DNAs). Extensive studies on ncRNA interactions have shown that ncRNAs could act as part of enzymatic or structural complexes, gene regulators or other functional elements. With the development of high-throughput biotechnology, such as cross-linking immunoprecipitation and high-throughput sequencing (CLIP-seq), the number of known ncRNA interactions, especially those formed by protein binding, has grown rapidly in recent years. In this work, we updated NPInter to version 2.0 by collecting ncRNA interactions from recent literature and related databases, expanding the number of entries to 201 107 covering 18 species. In addition, NPInter v2.0 incorporated a service for the BLAST alignment search as well as visualization of interactions.",2013-11-11 +24277981,Grouped False-Discovery Rate for Removing the Gene-Set-Level Bias of RNA-seq.,"In recent years, RNA-seq has become a very competitive alternative to microarrays. In RNA-seq experiments, the expected read count for a gene is proportional to its expression level multiplied by its transcript length. Even when two genes are expressed at the same level, differences in length will yield differing numbers of total reads. The characteristics of these RNA-seq experiments create a gene-level bias such that the proportion of significantly differentially expressed genes increases with the transcript length, whereas such bias is not present in microarray data. Gene-set analysis seeks to identify the gene sets that are enriched in the list of the identified significant genes. In the gene-set analysis of RNA-seq, the gene-level bias subsequently yields the gene-set-level bias that a gene set with genes of long length will be more likely to show up as enriched than will a gene set with genes of shorter length. Because gene expression is not related to its transcript length, any gene set containing long genes is not of biologically greater interest than gene sets with shorter genes. Accordingly the gene-set-level bias should be removed to accurately calculate the statistical significance of each gene-set enrichment in the RNA-seq. We present a new gene set analysis method of RNA-seq, called FDRseq, which can accurately calculate the statistical significance of a gene-set enrichment score by the grouped false-discovery rate. Numerical examples indicated that FDRseq is appropriate for controlling the transcript length bias in the gene-set analysis of RNA-seq data. To implement FDRseq, we developed the R program, which can be downloaded at no cost from http://home.mju.ac.kr/home/index.action?siteId=tyang.",2013-11-13 +24307774,DIACAN: Integrated Database for Antidiabetic and Anticancer Medicinal Plants.,

Unlabelled

Medicinal plants and plant derived molecules are widely used in traditional cultures all over the world and they are becoming large popular among biomedical researchers and pharmaceutical companies as a natural alternative to synthetic medicine. Information related to medicinal plants and herbal drugs accumulated over the ages are scattered and unstructured which make it prudent to develop a curated database for medicinal plants. The Antidiabetic and Anticancer Medicinal Plants Database (DIACAN) aims to collect and provide an integrated platform for plants and phytochemiclas having antidiabetic or anticancer activity.

Availability

http://www.kaubic.in/diacan.,2013-11-11 +24218542,CoIN: a network analysis for document triage.,"In recent years, there was a rapid increase in the number of medical articles. The number of articles in PubMed has increased exponentially. Thus, the workload for biocurators has also increased exponentially. Under these circumstances, a system that can automatically determine in advance which article has a higher priority for curation can effectively reduce the workload of biocurators. Determining how to effectively find the articles required by biocurators has become an important task. In the triage task of BioCreative 2012, we proposed the Co-occurrence Interaction Nexus (CoIN) for learning and exploring relations in articles. We constructed a co-occurrence analysis system, which is applicable to PubMed articles and suitable for gene, chemical and disease queries. CoIN uses co-occurrence features and their network centralities to assess the influence of curatable articles from the Comparative Toxicogenomics Database. The experimental results show that our network-based approach combined with co-occurrence features can effectively classify curatable and non-curatable articles. CoIN also allows biocurators to survey the ranking lists for specific queries without reviewing meaningless information. At BioCreative 2012, CoIN achieved a 0.778 mean average precision in the triage task, thus finishing in second place out of all participants. Database URL: http://ikmbio.csie.ncku.edu.tw/coin/home.php.",2013-11-11 +24829462,DUET: a server for predicting effects of mutations on protein stability using an integrated computational approach.,"Cancer genome and other sequencing initiatives are generating extensive data on non-synonymous single nucleotide polymorphisms (nsSNPs) in human and other genomes. In order to understand the impacts of nsSNPs on the structure and function of the proteome, as well as to guide protein engineering, accurate in silicomethodologies are required to study and predict their effects on protein stability. Despite the diversity of available computational methods in the literature, none has proven accurate and dependable on its own under all scenarios where mutation analysis is required. Here we present DUET, a web server for an integrated computational approach to study missense mutations in proteins. DUET consolidates two complementary approaches (mCSM and SDM) in a consensus prediction, obtained by combining the results of the separate methods in an optimized predictor using Support Vector Machines (SVM). We demonstrate that the proposed method improves overall accuracy of the predictions in comparison with either method individually and performs as well as or better than similar methods. The DUET web server is freely and openly available at http://structure.bioc.cam.ac.uk/duet.",2014-05-14 +24090431,SFARI Gene 2.0: a community-driven knowledgebase for the autism spectrum disorders (ASDs).,"New technologies enabling genome-wide interrogation have led to a large and rapidly growing number of autism spectrum disorder (ASD) candidate genes. Although encouraging, the volume and complexity of these data make it challenging for scientists, particularly non-geneticists, to comprehensively evaluate available evidence for individual genes. Described here is the Gene Scoring module within SFARI Gene 2.0 (https://gene.sfari.org/autdb/GS_Home.do), a platform developed to enable systematic community driven assessment of genetic evidence for individual genes with regard to ASD.",2013-10-03 +24153109,PDBsum additions.,"PDBsum, http://www.ebi.ac.uk/pdbsum, is a website providing numerous pictorial analyses of each entry in the Protein Data Bank. It portrays the structural features of all proteins, DNA and ligands in the entry, as well as depicting the interactions between them. The latest features, described here, include annotation of human protein sequences with their naturally occurring amino acid variants, dynamic graphs showing the relationships between related protein domain architectures, analyses of ligand binding clusters across different experimental determinations of the same protein, analyses of tunnels in proteins and new search options.",2013-10-22 +24127212,Nonrandom template segregation: a way to break the symmetry of stem cells.,"Whether DNA segregates in a biased way has been a subject of intense controversy and debate. Although highly provocative in its biological implications, if true, technical problems have limited researchers from drawing firm conclusions from the data. Elabd et al. (2013. J. Cell Biol. http://dx.doi.org/10.1083/jcb.201307110/DC1) now show a high frequency of nonrandom template segregation during differentiation of embryonic stem cells using rigorous experimentation and implicate the methyltransferase Dnmt3 as a key regulator of this process.",2013-10-01 +24209780,CEG: a database of essential gene clusters.,"

Background

Essential genes are indispensable for the survival of living entities. They are the cornerstones of synthetic biology, and are potential candidate targets for antimicrobial and vaccine design.

Description

Here we describe the Cluster of Essential Genes (CEG) database, which contains clusters of orthologous essential genes. Based on the size of a cluster, users can easily decide whether an essential gene is conserved in multiple bacterial species or is species-specific. It contains the similarity value of every essential gene cluster against human proteins or genes. The CEG_Match tool is based on the CEG database, and was developed for prediction of essential genes according to function. The database is available at http://cefg.uestc.edu.cn/ceg.

Conclusions

Properties contained in the CEG database, such as cluster size, and the similarity of essential gene clusters against human proteins or genes, are very important for evolutionary research and drug design. An advantage of CEG is that it clusters essential genes based on function, and therefore decreases false positive results when predicting essential genes in comparison with using the similarity alignment method.",2013-11-09 +21417267,Data-driven high-throughput prediction of the 3-D structure of small molecules: review and progress.,"Accurate prediction of the 3-D structure of small molecules is essential in order to understand their physical, chemical, and biological properties, including how they interact with other molecules. Here, we survey the field of high-throughput methods for 3-D structure prediction and set up new target specifications for the next generation of methods. We then introduce COSMOS, a novel data-driven prediction method that utilizes libraries of fragment and torsion angle parameters. We illustrate COSMOS using parameters extracted from the Cambridge Structural Database (CSD) by analyzing their distribution and then evaluating the system's performance in terms of speed, coverage, and accuracy. Results show that COSMOS represents a significant improvement when compared to state-of-the-art prediction methods, particularly in terms of coverage of complex molecular structures, including metal-organics. COSMOS can predict structures for 96.4% of the molecules in the CSD (99.6% organic, 94.6% metal-organic), whereas the widely used commercial method CORINA predicts structures for 68.5% (98.5% organic, 51.6% metal-organic). On the common subset of molecules predicted by both methods, COSMOS makes predictions with an average speed per molecule of 0.15 s (0.10 s organic, 0.21 s metal-organic) and an average rmsd of 1.57 Å (1.26 Å organic, 1.90 Å metal-organic), and CORINA makes predictions with an average speed per molecule of 0.13s (0.18s organic, 0.08s metal-organic) and an average rmsd of 1.60 Å (1.13 Å organic, 2.11 Å metal-organic). COSMOS is available through the ChemDB chemoinformatics Web portal at http://cdb.ics.uci.edu/ .",2011-03-18 +23975194,A modular framework for gene set analysis integrating multilevel omics data.,"Modern high-throughput methods allow the investigation of biological functions across multiple 'omics' levels. Levels include mRNA and protein expression profiling as well as additional knowledge on, for example, DNA methylation and microRNA regulation. The reason for this interest in multi-omics is that actual cellular responses to different conditions are best explained mechanistically when taking all omics levels into account. To map gene products to their biological functions, public ontologies like Gene Ontology are commonly used. Many methods have been developed to identify terms in an ontology, overrepresented within a set of genes. However, these methods are not able to appropriately deal with any combination of several data types. Here, we propose a new method to analyse integrated data across multiple omics-levels to simultaneously assess their biological meaning. We developed a model-based Bayesian method for inferring interpretable term probabilities in a modular framework. Our Multi-level ONtology Analysis (MONA) algorithm performed significantly better than conventional analyses of individual levels and yields best results even for sophisticated models including mRNA fine-tuning by microRNAs. The MONA framework is flexible enough to allow for different underlying regulatory motifs or ontologies. It is ready-to-use for applied researchers and is available as a standalone application from http://icb.helmholtz-muenchen.de/mona.",2013-08-23 +25229688,A new supervised over-sampling algorithm with application to protein-nucleotide binding residue prediction.,"Protein-nucleotide interactions are ubiquitous in a wide variety of biological processes. Accurately identifying interaction residues solely from protein sequences is useful for both protein function annotation and drug design, especially in the post-genomic era, as large volumes of protein data have not been functionally annotated. Protein-nucleotide binding residue prediction is a typical imbalanced learning problem, where binding residues are extremely fewer in number than non-binding residues. Alleviating the severity of class imbalance has been demonstrated to be a promising means of improving the prediction performance of a machine-learning-based predictor for class imbalance problems. However, little attention has been paid to the negative impact of class imbalance on protein-nucleotide binding residue prediction. In this study, we propose a new supervised over-sampling algorithm that synthesizes additional minority class samples to address class imbalance. The experimental results from protein-nucleotide interaction datasets demonstrate that the proposed supervised over-sampling algorithm can relieve the severity of class imbalance and help to improve prediction performance. Based on the proposed over-sampling algorithm, a predictor, called TargetSOS, is implemented for protein-nucleotide binding residue prediction. Cross-validation tests and independent validation tests demonstrate the effectiveness of TargetSOS. The web-server and datasets used in this study are freely available at http://www.csbio.sjtu.edu.cn/bioinf/TargetSOS/.",2014-09-17 +23175756,A beta-mixture quantile normalization method for correcting probe design bias in Illumina Infinium 450 k DNA methylation data.,"

Motivation

The Illumina Infinium 450 k DNA Methylation Beadchip is a prime candidate technology for Epigenome-Wide Association Studies (EWAS). However, a difficulty associated with these beadarrays is that probes come in two different designs, characterized by widely different DNA methylation distributions and dynamic range, which may bias downstream analyses. A key statistical issue is therefore how best to adjust for the two different probe designs.

Results

Here we propose a novel model-based intra-array normalization strategy for 450 k data, called BMIQ (Beta MIxture Quantile dilation), to adjust the beta-values of type2 design probes into a statistical distribution characteristic of type1 probes. The strategy involves application of a three-state beta-mixture model to assign probes to methylation states, subsequent transformation of probabilities into quantiles and finally a methylation-dependent dilation transformation to preserve the monotonicity and continuity of the data. We validate our method on cell-line data, fresh frozen and paraffin-embedded tumour tissue samples and demonstrate that BMIQ compares favourably with two competing methods. Specifically, we show that BMIQ improves the robustness of the normalization procedure, reduces the technical variation and bias of type2 probe values and successfully eliminates the type1 enrichment bias caused by the lower dynamic range of type2 probes. BMIQ will be useful as a preprocessing step for any study using the Illumina Infinium 450 k platform.

Availability

BMIQ is freely available from http://code.google.com/p/bmiq/.

Contact

a.teschendorff@ucl.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-21 +24213601,EPITRANS: a database that integrates epigenome and transcriptome data.,"Epigenetic modifications affect gene expression and thereby govern a wide range of biological processes such as differentiation, development and tumorigenesis. Recent initiatives to define genome-wide DNA methylation and histone modification profiles by microarray and sequencing methods have led to the construction of databases. These databases are repositories for international epigenetic consortiums or provide mining results from PubMed, but do not integrate the epigenetic information with gene expression changes. In order to overcome this limitation, we constructed EPITRANS, a novel database that visualizes the relationships between gene expression and epigenetic modifications. EPITRANS uses combined analysis of epigenetic modification and gene expression to search for cell function-related epigenetic and transcriptomic alterations (Freely available on the web at http://epitrans.org ).",2013-11-08 +24214996,"Negatome 2.0: a database of non-interacting proteins derived by literature mining, manual annotation and protein structure analysis.","Knowledge about non-interacting proteins (NIPs) is important for training the algorithms to predict protein-protein interactions (PPIs) and for assessing the false positive rates of PPI detection efforts. We present the second version of Negatome, a database of proteins and protein domains that are unlikely to engage in physical interactions (available online at http://mips.helmholtz-muenchen.de/proj/ppi/negatome). Negatome is derived by manual curation of literature and by analyzing three-dimensional structures of protein complexes. The main methodological innovation in Negatome 2.0 is the utilization of an advanced text mining procedure to guide the manual annotation process. Potential non-interactions were identified by a modified version of Excerbt, a text mining tool based on semantic sentence analysis. Manual verification shows that nearly a half of the text mining results with the highest confidence values correspond to NIP pairs. Compared to the first version the contents of the database have grown by over 300%.",2013-11-08 +23764085,Multilocus sequence typing of Scedosporium apiospermum and Pseudallescheria boydii isolates from cystic fibrosis patients.,"

Background

Scedosporium and Pseudallescheria species are the second most common lung-colonising fungi in cystic fibrosis (CF) patients. For epidemiological reasons it is important to trace sources of infection, routes of transmission and to determine whether these fungi are transient or permanent colonisers of the respiratory tract. Molecular typing methods like multilocus sequence typing (MLST) help provide this data.

Methods

Clinical isolates of the P. boydii complex (including S. apiospermum and P. boydii) from CF patients in different regions of Germany were studied using MLST. Five gene loci, ACT, CAL, RPB2, BT2 and SOD2, were analysed.

Results

The S. apiospermum isolates from 34 patients were assigned to 32 sequence types (STs), and the P. boydii isolates from 14 patients to 8 STs. The results revealed that patients can be colonised by individual strains for years.

Conclusions

The MLST scheme developed for S. apiospermum and P. boydii is a highly effective tool for epidemiologic studies worldwide. The MLST data are accessible at http://mlst.mycologylab.org/.",2013-06-12 +25150822,LRRsearch: An asynchronous server-based application for the prediction of leucine-rich repeat motifs and an integrative database of NOD-like receptors.,"The leucine-rich repeat (LRR) motifs of the nucleotide-binding oligomerization domain like receptors (NLRs) play key roles in recognizing and binding various pathogen associated molecular patterns (PAMPs) resulting in the activation of downstream signaling and innate immunity. Therefore, identification of LRR motifs is very important to study ligand-receptor interaction. To date, available resources pose restrictions including both false negative and false positive prediction of LRR motifs from the primary protein sequence as their algorithms are relied either only on sequence based comparison or alignment techniques or are over biased for a particular LRR containing protein family. Therefore, to minimize the error (≤5%) and to identify a maximum number of LRR motifs in the wide range of proteins, we have developed ""LRRsearch"" web-server using position specific scoring matrix (PSSM) of 11 residue LRR-HCS (highly conserved segment) which are frequently observed motifs in the most divergent classes of LRR containing proteins. A data library of 421 proteins, distributed among five known NLR families has also been integrated with the ""LRRsearch"" for the rich user experience. The access to the ""LRRsearch"" program is freely available at http://www.lrrsearch.com/.",2014-07-31 +22040322,MolabIS--an integrated information system for storing and managing molecular genetics data.,"

Background

Long-term sample storage, tracing of data flow and data export for subsequent analyses are of great importance in genetics studies. Therefore, molecular labs do need a proper information system to handle an increasing amount of data from different projects.

Results

We have developed a molecular labs information management system (MolabIS). It was implemented as a web-based system allowing the users to capture original data at each step of their workflow. MolabIS provides essential functionality for managing information on individuals, tracking samples and storage locations, capturing raw files, importing final data from external files, searching results, accessing and modifying data. Further important features are options to generate ready-to-print reports and convert sequence and microsatellite data into various data formats, which can be used as input files in subsequent analyses. Moreover, MolabIS also provides a tool for data migration.

Conclusions

MolabIS is designed for small-to-medium sized labs conducting Sanger sequencing and microsatellite genotyping to store and efficiently handle a relative large amount of data. MolabIS not only helps to avoid time consuming tasks but also ensures the availability of data for further analyses. The software is packaged as a virtual appliance which can run on different platforms (e.g. Linux, Windows). MolabIS can be distributed to a wide range of molecular genetics labs since it was developed according to a general data model. Released under GPL, MolabIS is freely available at http://www.molabis.org.",2011-10-31 +24196694,2P2I HUNTER: a tool for filtering orthosteric protein-protein interaction modulators via a dedicated support vector machine.,"Over the last 10 years, protein-protein interactions (PPIs) have shown increasing potential as new therapeutic targets. As a consequence, PPIs are today the most screened target class in high-throughput screening (HTS). The development of broad chemical libraries dedicated to these particular targets is essential; however, the chemical space associated with this 'high-hanging fruit' is still under debate. Here, we analyse the properties of 40 non-redundant small molecules present in the 2P2I database (http://2p2idb.cnrs-mrs.fr/) to define a general profile of orthosteric inhibitors and propose an original protocol to filter general screening libraries using a support vector machine (SVM) with 11 standard Dragon molecular descriptors. The filtering protocol has been validated using external datasets from PubChem BioAssay and results from in-house screening campaigns. This external blind validation demonstrated the ability of the SVM model to reduce the size of the filtered chemical library by eliminating up to 96% of the compounds as well as enhancing the proportion of active compounds by up to a factor of 8. We believe that the resulting chemical space identified in this paper will provide the scientific community with a concrete support to search for PPI inhibitors during HTS campaigns.",2013-11-06 +24203712,TISdb: a database for alternative translation initiation in mammalian cells.,"Proper selection of the translation initiation site (TIS) on mRNAs is crucial for the production of desired protein products. Recent studies using ribosome profiling technology uncovered a surprising variety of potential TIS sites in addition to the annotated start codon. The prevailing alternative translation reshapes the landscape of the proteome in terms of diversity and complexity. To identify the hidden coding potential of the transcriptome in mammalian cells, we developed global translation initiation sequencing (GTI-Seq) that maps genome-wide TIS positions at nearly a single nucleotide resolution. To facilitate studies of alternative translation, we created a database of alternative TIS sites identified from human and mouse cell lines based on multiple GTI-Seq replicates. The TISdb, available at http://tisdb.human.cornell.edu, includes 6991 TIS sites from 4961 human genes and 9973 TIS sites from 5668 mouse genes. The TISdb website provides a simple browser interface for query of high-confidence TIS sites and their associated open reading frames. The output of search results provides a user-friendly visualization of TIS information in the context of transcript isoforms. Together, the information in the database provides an easy reference for alternative translation in mammalian cells and will support future investigation of novel translational products.",2013-11-06 +23419374,Non-parametric Bayesian approach to post-translational modification refinement of predictions from tandem mass spectrometry.,"

Motivation

Tandem mass spectrometry (MS/MS) is a dominant approach for large-scale high-throughput post-translational modification (PTM) profiling. Although current state-of-the-art blind PTM spectral analysis algorithms can predict thousands of modified peptides (PTM predictions) in an MS/MS experiment, a significant percentage of these predictions have inaccurate modification mass estimates and false modification site assignments. This problem can be addressed by post-processing the PTM predictions with a PTM refinement algorithm. We developed a novel PTM refinement algorithm, iPTMClust, which extends a recently introduced PTM refinement algorithm PTMClust and uses a non-parametric Bayesian model to better account for uncertainties in the quantity and identity of PTMs in the input data. The use of this new modeling approach enables iPTMClust to provide a confidence score per modification site that allows fine-tuning and interpreting resulting PTM predictions.

Results

The primary goal behind iPTMClust is to improve the quality of the PTM predictions. First, to demonstrate that iPTMClust produces sensible and accurate cluster assignments, we compare it with k-means clustering, mixtures of Gaussians (MOG) and PTMClust on a synthetically generated PTM dataset. Second, in two separate benchmark experiments using PTM data taken from a phosphopeptide and a yeast proteome study, we show that iPTMClust outperforms state-of-the-art PTM prediction and refinement algorithms, including PTMClust. Finally, we illustrate the general applicability of our new approach on a set of human chromatin protein complex data, where we are able to identify putative novel modified peptides and modification sites that may be involved in the formation and regulation of protein complexes. Our method facilitates accurate PTM profiling, which is an important step in understanding the mechanisms behind many biological processes and should be an integral part of any proteomic study.

Availability

Our algorithm is implemented in Java and is freely available for academic use from http://genes.toronto.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-17 +26058082,"Association of Perfluoroalkyl Substances, Bone Mineral Density, and Osteoporosis in the U.S. Population in NHANES 2009-2010.","

Background

Perfluoroalkyl substances (PFASs), including perfluorooctanoic acid (PFOA), perfluorooctane sulfonic acid (PFOS), perfluorohexane sulfonic acid (PFHxS), and perfluorononanoic acid (PFNA), are detectable in the serum of 95% of the U.S.

Objective

Considering the role of PFASs as endocrine disruptors, we examined their relationships with bone health.

Methods

The association between serum PFAS concentration and bone mineral density at total femur (TFBMD), femoral neck (FNBMD), lumbar spine (LSBMD), and physician-diagnosed osteoporosis was assessed in 1,914 participants using data from the National Health and Nutritional Examination Survey 2009-2010.

Results

The mean age of the participants was 43 years. Men had higher serum PFAS concentrations than women (p < 0.001) except for PFNA. In both sexes, serum PFOS concentrations were inversely associated with FNBMD (p < 0.05). In women, significant negative associations were observed for natural log (ln)-transformed PFOS exposure with TFBMD and FNBMD, and for ln-transformed PFOA exposure with TFBMD (p < 0.05). In postmenopausal women, serum PFOS was negatively associated with TFBMD and FNBMD, and PFNA was negatively associated with TFBMD, FNBMD, and LSBMD (all p < 0.05). With one log unit increase in serum PFOA, PFHxS, and PFNA, osteoporosis prevalence in women increased as follows: [adjusted odds ratios (aORs)] 1.84 (95% CI: 1.17, 2.905), 1.64 (95% CI: 1.14, 2.38), and 1.45 (95% CI: 1.02, 2.05), respectively. In women, the prevalence of osteoporosis was significantly higher in the highest versus the lowest quartiles of PFOA, PFHxS, and PFNA, with aORs of 2.59 (95% CI: 1.01, 6.67), 13.20 (95% CI: 2.72, 64.15), and 3.23 (95% CI: 1.44, 7.21), respectively, based on 77 cases in the study sample.

Conclusion

In a representative sample of the U.S. adult population, serum PFAS concentrations were associated with lower bone mineral density, which varied according to the specific PFAS and bone site assessed. Most associations were limited to women. Osteoporosis in women was also associated with PFAS exposure, based on a small number of cases.

Citation

Khalil N, Chen A, Lee M, Czerwinski SA, Ebert JR, DeWitt JC, Kannan K. 2016. Association of perfluoroalkyl substances, bone mineral density, and osteoporosis in the U.S. population in NHANES 2009-2010. Environ Health Perspect 124:81-87; http://dx.doi.org/10.1289/ehp.1307909.",2015-06-09 +24842998,Re-examining how complexin inhibits neurotransmitter release.,"Complexins play activating and inhibitory functions in neurotransmitter release. The complexin accessory helix inhibits release and was proposed to insert into SNARE complexes to prevent their full assembly. This model was supported by 'superclamp' and 'poor-clamp' mutations that enhanced or decreased the complexin-I inhibitory activity in cell-cell fusion assays, and by the crystal structure of a superclamp mutant bound to a synaptobrevin-truncated SNARE complex. NMR studies now show that the complexin-I accessory helix does not insert into synaptobrevin-truncated SNARE complexes in solution, and electrophysiological data reveal that superclamp mutants have slightly stimulatory or no effects on neurotransmitter release, whereas a poor-clamp mutant inhibits release. Importantly, increasing or decreasing the negative charge of the complexin-I accessory helix inhibits or stimulates release, respectively. These results suggest a new model whereby the complexin accessory helix inhibits release through electrostatic (and perhaps steric) repulsion enabled by its location between the vesicle and plasma membranes.DOI: http://dx.doi.org/10.7554/eLife.02391.001.",2014-05-08 +24223923,Viral IRES prediction system - a web server for prediction of the IRES secondary structure in silico.,"The internal ribosomal entry site (IRES) functions as cap-independent translation initiation sites in eukaryotic cells. IRES elements have been applied as useful tools for bi-cistronic expression vectors. Current RNA structure prediction programs are unable to predict precisely the potential IRES element. We have designed a viral IRES prediction system (VIPS) to perform the IRES secondary structure prediction. In order to obtain better results for the IRES prediction, the VIPS can evaluate and predict for all four different groups of IRESs with a higher accuracy. RNA secondary structure prediction, comparison, and pseudoknot prediction programs were implemented to form the three-stage procedure for the VIPS. The backbone of VIPS includes: the RNAL fold program, aimed to predict local RNA secondary structures by minimum free energy method; the RNA Align program, intended to compare predicted structures; and pknotsRG program, used to calculate the pseudoknot structure. VIPS was evaluated by using UTR database, IRES database and Virus database, and the accuracy rate of VIPS was assessed as 98.53%, 90.80%, 82.36% and 80.41% for IRES groups 1, 2, 3, and 4, respectively. This advance useful search approach for IRES structures will facilitate IRES related studies. The VIPS on-line website service is available at http://140.135.61.250/vips/.",2013-11-05 +24188373,Clinicopathological and prognostic significance of S100A4 overexpression in colorectal cancer: a meta-analysis.,"

Background

Accumulated evidence has indicated a correlation between S100A4 expression and colorectal cancer (CRC) progression. However, its prognostic significance for patients with CRC remains inconclusive. To clarify their relationship, a meta-analysis of the relevant published studies was performed.

Method

PubMed, Cochrane Library, and Web of Science databases were electronically searched. All studies evaluating the prognostic value of S100A4 expression in CRC patients regarding survival and a series of clinicopathological parameters were included. The effect of S100A4 expression on the overall survival (OS) and disease-free survival (DFS) were measured by pooled hazard ratios (HRs) and 95% confidence intervals (CIs), while the effect of S100A4 expression on the clinicopathological parameters were measured by the pooled odds ratios (ORs) and their 95% CIs.

Results

Eleven studies (2,824 patients in total) were included in the meta-analysis. Overall, S100A4 overexpression was significantly associated with worse OS (HR = 1.90, 95% CI: 1.58-2.29, P <0.001), and worse DFS (HR = 2.16, 95% CI: 1.53-3.05, P <0.001) in patients with CRC. Subgroup analyses showed that S100A4 overexpression was significantly correlated with poor OS in Asian, European, and Australian patients and patients treated with surgery or chemotherapy. Additionally, there were significant associations between S100A4 expression and several clinicopathological parameters (tumour location, lymph node metastasis, nodal status, TNM stage, and tumour depth).

Conclusions

This meta-analysis indicates that S100A4 overexpression seems to correlate with tumour progression and poor prognosis of CRC patients. It may be a useful marker to predict progression and prognosis of CRC.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/8643820431072915.",2013-11-04 +24194603,dbPSHP: a database of recent positive selection across human populations.,"The dbPSHP database (http://jjwanglab.org/dbpshp) aims to help researchers to efficiently identify, validate and visualize putative positively selected loci in human evolution and further discover the mechanism governing these natural selections. Recent evolution of human populations at the genomic level reflects the adaptations to the living environments, including climate change and availability and stability of nutrients. Many genetic regions under positive selection have been identified, which assist us to understand how natural selection has shaped population differences. Here, we manually collect recent positive selections in different human populations, consisting of 15,472 loci from 132 publications. We further compiled a database that used 15 statistical terms of different evolutionary attributes for single nucleotide variant sites from the HapMap 3 and 1000 Genomes Project to identify putative regions under positive selection. These attributes include variant allele/genotype properties, variant heterozygosity, within population diversity, long-range haplotypes, pairwise population differentiation and evolutionary conservation. We also provide interactive pages for visualization and annotation of different selective signals. The database is freely available to the public and will be frequently updated.",2013-11-04 +24194607,"TreeFam v9: a new website, more species and orthology-on-the-fly.","TreeFam (http://www.treefam.org) is a database of phylogenetic trees inferred from animal genomes. For every TreeFam family we provide homology predictions together with the evolutionary history of the genes. Here we describe an update of the TreeFam database. The TreeFam project was resurrected in 2012 and has seen two releases since. The latest release (TreeFam 9) was made available in March 2013. It has orthology predictions and gene trees for 109 species in 15,736 families covering ∼2.2 million sequences. With release 9 we made modifications to our production pipeline and redesigned our website with improved gene tree visualizations and Wikipedia integration. Furthermore, we now provide an HMM-based sequence search that places a user-provided protein sequence into a TreeFam gene tree and provides quick orthology prediction. The tool uses Mafft and RAxML for the fast insertion into a reference alignment and tree, respectively. Besides the aforementioned technical improvements, we present a new approach to visualize gene trees and alternative displays that focuses on showing homology information from a species tree point of view. From release 9 onwards, TreeFam is now hosted at the EBI.",2013-11-04 +24194596,"MP:PD--a data base of internal packing densities, internal packing defects and internal waters of helical membrane proteins.","The membrane protein packing database (MP:PD) (http://proteinformatics.charite.de/mppd) is a database of helical membrane proteins featuring internal atomic packing densities, cavities and waters. Membrane proteins are not tightly packed but contain a considerable number of internal cavities that differ in volume, polarity and solvent accessibility as well as in their filling with internal water. Internal cavities are supposed to be regions of high physical compressibility. By serving as mobile hydrogen bonding donors or acceptors, internal waters likely facilitate transition between different functional states. Despite these distinct functional roles, internal cavities of helical membrane proteins are not well characterized, mainly because most internal waters are not resolved by crystal structure analysis. Here we combined various computational biophysical techniques to characterize internal cavities, reassign positions of internal waters and calculate internal packing densities of all available helical membrane protein structures and stored them in MP:PD. The database can be searched using keywords and entries can be downloaded. Each entry can be visualized in Provi, a Jmol-based protein viewer that provides an integrated display of low energy waters alongside membrane planes, internal packing density, hydrophobic cavities and hydrogen bonds.",2013-11-04 +24194591,LenVarDB: database of length-variant protein domains.,"Protein domains are functionally and structurally independent modules, which add to the functional variety of proteins. This array of functional diversity has been enabled by evolutionary changes, such as amino acid substitutions or insertions or deletions, occurring in these protein domains. Length variations (indels) can introduce changes at structural, functional and interaction levels. LenVarDB (freely available at http://caps.ncbs.res.in/lenvardb/) traces these length variations, starting from structure-based sequence alignments in our Protein Alignments organized as Structural Superfamilies (PASS2) database, across 731 structural classification of proteins (SCOP)-based protein domain superfamilies connected to 2 730 625 sequence homologues. Alignment of sequence homologues corresponding to a structural domain is available, starting from a structure-based sequence alignment of the superfamily. Orientation of the length-variant (indel) regions in protein domains can be visualized by mapping them on the structure and on the alignment. Knowledge about location of length variations within protein domains and their visual representation will be useful in predicting changes within structurally or functionally relevant sites, which may ultimately regulate protein function. Non-technical summary: Evolutionary changes bring about natural changes to proteins that may be found in many organisms. Such changes could be reflected as amino acid substitutions or insertions-deletions (indels) in protein sequences. LenVarDB is a database that provides an early overview of observed length variations that were set among 731 protein families and after examining >2 million sequences. Indels are followed up to observe if they are close to the active site such that they can affect the activity of proteins. Inclusion of such information can aid the design of bioengineering experiments.",2013-11-04 +24990571,Osiris: accessible and reproducible phylogenetic and phylogenomic analyses within the Galaxy workflow management system.,"

Background

Phylogenetic tools and 'tree-thinking' approaches increasingly permeate all biological research. At the same time, phylogenetic data sets are expanding at breakneck pace, facilitated by increasingly economical sequencing technologies. Therefore, there is an urgent need for accessible, modular, and sharable tools for phylogenetic analysis.

Results

We developed a suite of wrappers for new and existing phylogenetics tools for the Galaxy workflow management system that we call Osiris. Osiris and Galaxy provide a sharable, standardized, modular user interface, and the ability to easily create complex workflows using a graphical interface. Osiris enables all aspects of phylogenetic analysis within Galaxy, including de novo assembly of high throughput sequencing reads, ortholog identification, multiple sequence alignment, concatenation, phylogenetic tree estimation, and post-tree comparative analysis. The open source files are available on in the Bitbucket public repository and many of the tools are demonstrated on a public web server (http://galaxy-dev.cnsi.ucsb.edu/osiris/).

Conclusions

Osiris can serve as a foundation for other phylogenomic and phylogenetic tool development within the Galaxy platform.",2014-07-02 +22353882,Efficient key pathway mining: combining networks and OMICS data.,"Systems biology has emerged over the last decade. Driven by the advances in sophisticated measurement technology the research community generated huge molecular biology data sets. These comprise rather static data on the interplay of biological entities, for instance protein-protein interaction network data, as well as quite dynamic data collected for studying the behavior of individual cells or tissues in accordance with changing environmental conditions, such as DNA microarrays or RNA sequencing. Here we bring the two different data types together in order to gain higher level knowledge. We introduce a significantly improved version of the KeyPathwayMiner software framework. Given a biological network modelled as a graph and a set of expression studies, KeyPathwayMiner efficiently finds and visualizes connected sub-networks where most components are expressed in most cases. It finds all maximal connected sub-networks where all nodes but k exceptions are expressed in all experimental studies but at most l exceptions. We demonstrate the power of the new approach by comparing it to similar approaches with gene expression data previously used to study Huntington's disease. In addition, we demonstrate KeyPathwayMiner's flexibility and applicability to non-array data by analyzing genome-scale DNA methylation profiles from colorectal tumor cancer patients. KeyPathwayMiner release 2 is available as a Cytoscape plugin and online at http://keypathwayminer.mpi-inf.mpg.de.",2012-02-21 +23647742,Iterative rank-order normalization of gene expression microarray data.,"

Background

Many gene expression normalization algorithms exist for Affymetrix GeneChip microarrays. The most popular of these is RMA, primarily due to the precision and low noise produced during the process. A significant strength of this and similar approaches is the use of the entire set of arrays during both normalization and model-based estimation of signal. However, this leads to differing estimates of expression based on the starting set of arrays, and estimates can change when a single, additional chip is added to the set. Additionally, outlier chips can impact the signals of other arrays, and can themselves be skewed by the majority of the population.

Results

We developed an approach, termed IRON, which uses the best-performing techniques from each of several popular processing methods while retaining the ability to incrementally renormalize data without altering previously normalized expression. This combination of approaches results in a method that performs comparably to existing approaches on artificial benchmark datasets (i.e. spike-in) and demonstrates promising improvements in segregating true signals within biologically complex experiments.

Conclusions

By combining approaches from existing normalization techniques, the IRON method offers several advantages. First, IRON normalization occurs pair-wise, thereby avoiding the need for all chips to be normalized together, which can be important for large data analyses. Secondly, the technique does not require similarity in signal distribution across chips for normalization, which can be important for maintaining biologically relevant differences in a heterogeneous background. Lastly, IRON introduces fewer post-processing artifacts, particularly in data whose behavior violates common assumptions. Thus, the IRON method provides a practical solution to common needs of expression analysis. A software implementation of IRON is available at [http://gene.moffitt.org/libaffy/].",2013-05-07 +22692830,HDX workbench: software for the analysis of H/D exchange MS data.,"Hydrogen/deuterium exchange mass spectrometry (HDX-MS) is an established method for the interrogation of protein conformation and dynamics. While the data analysis challenge of HDX-MS has been addressed by a number of software packages, new computational tools are needed to keep pace with the improved methods and throughput of this technique. To address these needs, we report an integrated desktop program titled HDX Workbench, which facilitates automation, management, visualization, and statistical cross-comparison of large HDX data sets. Using the software, validated data analysis can be achieved at the rate of generation. The application is available at the project home page http://hdx.florida.scripps.edu .",2012-06-13 +24330602,The environment ontology: contextualising biological and biomedical entities.,"As biological and biomedical research increasingly reference the environmental context of the biological entities under study, the need for formalisation and standardisation of environment descriptors is growing. The Environment Ontology (ENVO; http://www.environmentontology.org) is a community-led, open project which seeks to provide an ontology for specifying a wide range of environments relevant to multiple life science disciplines and, through an open participation model, to accommodate the terminological requirements of all those needing to annotate data using ontology classes. This paper summarises ENVO's motivation, content, structure, adoption, and governance approach. The ontology is available from http://purl.obolibrary.org/obo/envo.owl - an OBO format version is also available by switching the file suffix to ""obo"".",2013-12-11 +21511767,A novel multilocus sequence typing scheme for the opportunistic pathogen Propionibacterium acnes and characterization of type I cell surface-associated antigens.,"We have developed a novel multilocus sequence typing (MLST) scheme and database (http://pubmlst.org/pacnes/) for Propionibacterium acnes based on the analysis of seven core housekeeping genes. The scheme, which was validated against previously described antibody, single locus and random amplification of polymorphic DNA typing methods, displayed excellent resolution and differentiated 123 isolates into 37 sequence types (STs). An overall clonal population structure was detected with six eBURST groups representing the major clades I, II and III, along with two singletons. Two highly successful and global clonal lineages, ST6 (type IA) and ST10 (type IB(1)), representing 64 % of this current MLST isolate collection were identified. The ST6 clone and closely related single locus variants, which comprise a large clonal complex CC6, dominated isolates from patients with acne, and were also significantly associated with ophthalmic infections. Our data therefore support an association between acne and P. acnes strains from the type IA cluster and highlight the role of a widely disseminated clonal genotype in this condition. Characterization of type I cell surface-associated antigens that are not detected in ST10 or strains of type II and III identified two dermatan-sulphate-binding proteins with putative phase/antigenic variation signatures. We propose that the expression of these proteins by type IA organisms contributes to their role in the pathophysiology of acne and helps explain the recurrent nature of the disease. The MLST scheme and database described in this study should provide a valuable platform for future epidemiological and evolutionary studies of P. acnes.",2011-04-21 +24894464,Complete morphologies of basal forebrain cholinergic neurons in the mouse.,"The basal forebrain cholinergic system modulates neuronal excitability and vascular tone throughout the cerebral cortex and hippocampus. This system is severely affected in Alzheimer's disease (AD), and drug treatment to enhance cholinergic signaling is widely used as symptomatic therapy in AD. Defining the full morphologies of individual basal forebrain cholinergic neurons has, until now, been technically beyond reach due to their large axon arbor sizes. Using genetically-directed sparse labeling, we have characterized the complete morphologies of basal forebrain cholinergic neurons in the mouse. Individual arbors were observed to span multiple cortical columns, and to have >1000 branch points and total axon lengths up to 50 cm. In an AD model, cholinergic axons were slowly lost and there was an accumulation of axon-derived material in discrete puncta. Calculations based on published morphometric data indicate that basal forebrain cholinergic neurons in humans have a mean axon length of ∼100 meters.DOI: http://dx.doi.org/10.7554/eLife.02444.001.",2014-05-07 +25466449,Upregulation of microRNA-106b is associated with poor prognosis in hepatocellular carcinoma.,"

Background

MicroRNA-106b (miR-106b) is a member of the miR-106b ~ 25 cluster. It has been reported that miR-106b acts as an oncogene and is upregulated in many human cancers. However, the prognostic value of miR-106b in hepatocellular carcinoma (HCC) remains unclear. The aim of this study was to investigate the clinical significance of miR-106b expression in HCC.

Methods

We determined the expression level of miR-106b in 104 cases of paired HCC and adjacent non-tumor tissues by quantitative real-time PCR (qRT-PCR). The correlation between miR-106b expression and prognosis of HCC was studied by univariate and multivariate analysis. Multivariate analysis of the prognostic factors was performed with Cox proportional hazards model.

Results

MiR-106b expression was significantly upregulated in as high as 76.0% of HCC tissues, compared with their non-tumor counterparts (P < 0.001). High miR-106b expression was significantly associated with large tumor size (P = 0.019) and vascular invasion (P = 0.016). Kaplan-Meier analysis showed that patients with high miR-106b expression had a worse overall survival than patients with low miR-106b expression (log-rank P = 0.004). The multivariate Cox regression analysis indicated that miR-106b expression was an independent prognostic factor for overall survival (HR, 2.002; 95% CI, 1.130-6.977; P = 0.027).

Conclusion

Our data indicated that miR-106b expression was significantly upregulated in HCC and could serve as a potential unfavorable prognostic biomarker.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_226.",2014-12-03 +30708477,First Report of Fusarium chlamydosporum Causing Damping-Off Disease on Aleppo Pine in Algeria.,"The Aleppo pine (Pinus halepensis Mill.) is a conifer native to the Mediterranean region. In 2008 and 2009, a survey of Aleppo pine seedling diseases was performed in three forest nurseries from the Relizane, Sidi Bel Abbes, and Tlemcen departments in northwestern Algeria. One- to two-month-old Aleppo pine seedlings showed symptoms of damping-off in pre- and post-emergence (typical seedling collar rot). The problem was widespread with a disease incidence of 64 to 77% and an annual impact of US$50,000. Disinfested root and root collar segments (from four composite samples per location), approximately 5 mm in length, were cultured on PDA and incubated at 25°C and day/night light. Two (from 21) isolates were identified morphologically (2) as the anamorph Fusarium chlamydosporum Wollenw. & Reinking and isolated from collar rots of Relizane forest nursery seedlings. Colony development on PDA media was fast; 32 mm diameter colonies developed after 3 days. Colonies were white. Mycelia were floccose, fairly dense, off-white, and turned a lilac color in older portions of the colony. Macroconidia were thick-walled and moderately curved with unequal dorsiventral curvature (the lower wall is almost straight), short, curved and pointed apical cell, usually notched, but occasionally foot shaped basal cell, 3- to 5-septate, and 2 × 8 to 21 μm. Microconidia were abundant, 0-septate, and 2 × 6 to 9 μm. Chlamydospores were abundant, formed rapidly in single chains or clusters, and 8 to 15 μm diameter. To confirm the identity of this fungus, the internal transcribed spacer of F12RR and F4SR isolates of F. chlamydosporum were amplified and sequenced using ITS1 and ITS4 primers (4). Sequences were deposited in GenBank under accessions JX114795 and JX114789, respectively. Those sequences bore 99% similarity with reference sequence AY213655 (2) and 100% with HQ671187, also found 99 to 100% similarity with F. equiseti (Corda) Sacc. but with different conidia. Pathogenicity tests were performed to fulfill Koch's postulates. Inoculum was produced by adding a 5 mm diam. plug from a 7-day-old CMA petri dish culture to a previously sterilized 500 ml flask (237.5 g sand, 12.5 g cornmeal, 80 ml SDW), shaken over 9 days, and mixed with sterile soil at 1:3 (v:v). Infested soil was then transferred to 500 ml pots, and 10 seeds were planted. A completely randomized design was used with three replicates per isolate and three control pots. After 1 month, two tested isolates caused typical damping-off symptoms on seedlings. The percentage of the plants that became infected was 65 to 77%. To our knowledge (1,3), this is the first report of F. chlamydosporum on Aleppo pine in northwestern Algeria. It is also the first report of this fungal species affecting the Aleppo pine throughout the world, and on conifers in Africa and the Mediterranean region (1,3). References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Syst. Mycol. Microbiol. Lab. ARS, USDA, Beltsville, MD. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , February 20, 2013. (2) J. F. Leslie and B. A. Summerell. The Fusarium Laboratory Manual. Blackwell Publishing, Ames, IA, 2006. (3) D. W. Minter. Cybertruffle's Robigalia, Observations of Fungi and their Associated Organisms. Retrieved from http://www.cybertruffle.org.uk/robigalia/eng/ , February 20, 2013. (4) T. J. White et al. Page 315 in: PCR Protocols: A Guide to Methods and Applications. Academic Press, San Diego, 1990.",2013-11-01 +24223973,"SWEETLEAD: an in silico database of approved drugs, regulated chemicals, and herbal isolates for computer-aided drug discovery.","In the face of drastically rising drug discovery costs, strategies promising to reduce development timelines and expenditures are being pursued. Computer-aided virtual screening and repurposing approved drugs are two such strategies that have shown recent success. Herein, we report the creation of a highly-curated in silico database of chemical structures representing approved drugs, chemical isolates from traditional medicinal herbs, and regulated chemicals, termed the SWEETLEAD database. The motivation for SWEETLEAD stems from the observance of conflicting information in publicly available chemical databases and the lack of a highly curated database of chemical structures for the globally approved drugs. A consensus building scheme surveying information from several publicly accessible databases was employed to identify the correct structure for each chemical. Resulting structures are filtered for the active pharmaceutical ingredient, standardized, and differing formulations of the same drug were combined in the final database. The publically available release of SWEETLEAD (https://simtk.org/home/sweetlead) provides an important tool to enable the successful completion of computer-aided repurposing and drug discovery campaigns.",2013-11-01 +21590420,Large scale identification of genes involved in plant-fungal interactions using Illumina's sequencing-by-synthesis technology.,"Deep transcriptome profiling of pathogen-infected tissues enhances the understanding of molecular mechanisms underlying host-pathogen interactions. Illumina's next generation sequencing technology sequencing-by-synthesis (SBS) is a powerful tool to rapidly sequence genomes and transcriptomes at an affordable rate. We modified the procedure for SBS library construction to significantly increase the efficiency of library construction. Using our improved method, two Sclerotinia homoeocarpa libraries were constructed from mycelia grown in potato dextrose broth (PDB) or potato dextrose agar (PDA) for 96 h, respectively, and two creeping bentgrass libraries were constructed from leaves 96 h after inoculation with S. homoeocarpa or water sprayed, respectively. About 4-7 million mRNA signatures were sequenced from each library. Sequence analysis using BLAST was performed against sequenced fungal genomes and rice genomic sequence to identify the expressed genes in both S. homoeocarpa mycelia and creeping bentgrass. Bioinformatic analysis identified many expressed genes in the pathogen and host. A public database to access the sequence data was developed at http://www.dstidb.org . Our results demonstrate how SBS technology can unravel transcriptome complexity during the creeping bentgrass-S. homoeocarpa interaction.",2011-01-01 +22493695,"The Digital Fish Library: using MRI to digitize, database, and document the morphological diversity of fish.","Museum fish collections possess a wealth of anatomical and morphological data that are essential for documenting and understanding biodiversity. Obtaining access to specimens for research, however, is not always practical and frequently conflicts with the need to maintain the physical integrity of specimens and the collection as a whole. Non-invasive three-dimensional (3D) digital imaging therefore serves a critical role in facilitating the digitization of these specimens for anatomical and morphological analysis as well as facilitating an efficient method for online storage and sharing of this imaging data. Here we describe the development of the Digital Fish Library (DFL, http://www.digitalfishlibrary.org), an online digital archive of high-resolution, high-contrast, magnetic resonance imaging (MRI) scans of the soft tissue anatomy of an array of fishes preserved in the Marine Vertebrate Collection of Scripps Institution of Oceanography. We have imaged and uploaded MRI data for over 300 marine and freshwater species, developed a data archival and retrieval system with a web-based image analysis and visualization tool, and integrated these into the public DFL website to disseminate data and associated metadata freely over the web. We show that MRI is a rapid and powerful method for accurately depicting the in-situ soft-tissue anatomy of preserved fishes in sufficient detail for large-scale comparative digital morphology. However these 3D volumetric data require a sophisticated computational and archival infrastructure in order to be broadly accessible to researchers and educators.",2012-04-06 +24175949,Insulin therapy contributes to the increased risk of colorectal cancer in diabetes patients: a meta-analysis.,"

Background

Recent epidemiological studies suggest that treatment with insulin may promote cancer growth. The present systematic review and meta-analysis of published observational studies was conducted to assess the risk of cancer during treatment with insulin.

Materials and methods

A compressive search was conducted through MEDLINE, PubMed, Web of Science, EMBASE, and Chinese Biomedical Literature databases (CBM). Pooled relative risks (RRs) and 95% confidence intervals (CIs) were calculated with a random-effects model.

Results

A total of four studies with one case-controls study and three cohort studies comparing the insulin therapy and colorectal cancer susceptibility were identified. When all four studies were analyzed, the summary RRs were 1.61 (95% CI = 1.18-1.35) in a random-effects model for individuals with insulin therapy, compared with individuals without insulin therapy, which suggests a statistically significant association between insulin use and colorectal cancer.

Conclusions

Our findings provides the evidence that insulin therapy may contribute to the risk of colorectal cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/9339731010859509.",2013-10-31 +25113321,Comparing algorithms for automated vessel segmentation in computed tomography scans of the lung: the VESSEL12 study.,"The VESSEL12 (VESsel SEgmentation in the Lung) challenge objectively compares the performance of different algorithms to identify vessels in thoracic computed tomography (CT) scans. Vessel segmentation is fundamental in computer aided processing of data generated by 3D imaging modalities. As manual vessel segmentation is prohibitively time consuming, any real world application requires some form of automation. Several approaches exist for automated vessel segmentation, but judging their relative merits is difficult due to a lack of standardized evaluation. We present an annotated reference dataset containing 20 CT scans and propose nine categories to perform a comprehensive evaluation of vessel segmentation algorithms from both academia and industry. Twenty algorithms participated in the VESSEL12 challenge, held at International Symposium on Biomedical Imaging (ISBI) 2012. All results have been published at the VESSEL12 website http://vessel12.grand-challenge.org. The challenge remains ongoing and open to new participants. Our three contributions are: (1) an annotated reference dataset available online for evaluation of new algorithms; (2) a quantitative scoring system for objective comparison of algorithms; and (3) performance analysis of the strengths and weaknesses of the various vessel segmentation methods in the presence of various lung diseases.",2014-07-23 +24607742,A protein structural classes prediction method based on PSI-BLAST profile.,"Knowledge of protein structural classes plays an important role in understanding protein folding patterns. Prediction of protein structural class based solely on sequence data remains to be a challenging problem. In this study, we extract the long-range correlation information and linear correlation information from position-specific score matrix (PSSM). A total of 3600 features are extracted, then, 278 features are selected by a filter feature selection method based on 1189 dataset. To verify the performance of our method (named by LCC-PSSM), jackknife tests are performed on three widely used low similarity benchmark datasets. Comparison of our results with the existing methods shows that our method provides the favorable performance for protein structural class prediction. Stand-alone version of the proposed method (LCC-PSSM) is written in MATLAB language and it can be downloaded from http://bioinfo.zstu.edu.cn/LCC-PSSM/.",2014-03-04 +22870189,ChIPnorm: a statistical method for normalizing and identifying differential regions in histone modification ChIP-seq libraries.,"The advent of high-throughput technologies such as ChIP-seq has made possible the study of histone modifications. A problem of particular interest is the identification of regions of the genome where different cell types from the same organism exhibit different patterns of histone enrichment. This problem turns out to be surprisingly difficult, even in simple pairwise comparisons, because of the significant level of noise in ChIP-seq data. In this paper we propose a two-stage statistical method, called ChIPnorm, to normalize ChIP-seq data, and to find differential regions in the genome, given two libraries of histone modifications of different cell types. We show that the ChIPnorm method removes most of the noise and bias in the data and outperforms other normalization methods. We correlate the histone marks with gene expression data and confirm that histone modifications H3K27me3 and H3K4me3 act as respectively a repressor and an activator of genes. Compared to what was previously reported in the literature, we find that a substantially higher fraction of bivalent marks in ES cells for H3K27me3 and H3K4me3 move into a K27-only state. We find that most of the promoter regions in protein-coding genes have differential histone-modification sites. The software for this work can be downloaded from http://lcbb.epfl.ch/software.html.",2012-08-03 +25616254,Individualized assessment of preterm birth risk using two modified prediction models.,"

Objectives

To construct two prediction models for individualized assessment of preterm delivery risk within 48h and before completed 32 weeks of gestation and to test the validity of modified and previously published models.

Study design

Data on 617 consecutive women with preterm labor transferred to a tertiary care center for threatened preterm delivery between 22 and 32 weeks of gestation were analysed. Variables predicting the risk of delivery within 48h and before completed 32 weeks of gestation were assessed and applied to previously published prediction models. Multivariate analyses identified variables that were incorporated into two modified models that were subsequently validated.

Results

Two modified prediction models were developed and internally validated, incorporating four and six of the following variables to predict the risk of delivery within 48h and before completed 32 weeks of gestation, respectively: presence of preterm premature rupture of membranes and/or vaginal bleeding, sonographic cervical length, week of gestation, fetal fibronectin, and serum C-reactive protein. The correspondence between the actual and the predicted preterm birth rates suggests excellent calibration of the models. Internal validation analyses for the modified 48h and 32 week prediction models revealed considerably high concordance-indices of 0.8 (95%CI: [0.70-0.81]) and 0.85 (95%CI: [0.82-0.90]), respectively.

Conclusions

Two modified prediction models to assess the risk of preterm birth were constructed and validated. The models can be used for individualized prediction of preterm birth and allow more accurate risk assessment than based upon a single risk factor. An online-based risk-calculator was constructed and can be assessed through: http://cemsiis.meduniwien.ac.at/en/kb/science-research/software/clinical-software/prematurebirth/.",2015-01-08 +22824380,Sets2Networks: network inference from repeated observations of sets.,"

Background

The skeleton of complex systems can be represented as networks where vertices represent entities, and edges represent the relations between these entities. Often it is impossible, or expensive, to determine the network structure by experimental validation of the binary interactions between every vertex pair. It is usually more practical to infer the network from surrogate observations. Network inference is the process by which an underlying network of relations between entities is determined from indirect evidence. While many algorithms have been developed to infer networks from quantitative data, less attention has been paid to methods which infer networks from repeated co-occurrence of entities in related sets. This type of data is ubiquitous in the field of systems biology and in other areas of complex systems research. Hence, such methods would be of great utility and value.

Results

Here we present a general method for network inference from repeated observations of sets of related entities. Given experimental observations of such sets, we infer the underlying network connecting these entities by generating an ensemble of networks consistent with the data. The frequency of occurrence of a given link throughout this ensemble is interpreted as the probability that the link is present in the underlying real network conditioned on the data. Exponential random graphs are used to generate and sample the ensemble of consistent networks, and we take an algorithmic approach to numerically execute the inference method. The effectiveness of the method is demonstrated on synthetic data before employing this inference approach to problems in systems biology and systems pharmacology, as well as to construct a co-authorship collaboration network. We predict direct protein-protein interactions from high-throughput mass-spectrometry proteomics, integrate data from Chip-seq and loss-of-function/gain-of-function followed by expression data to infer a network of associations between pluripotency regulators, extract a network that connects 53 cancer drugs to each other and to 34 severe adverse events by mining the FDA's Adverse Events Reporting Systems (AERS), and construct a co-authorship network that connects Mount Sinai School of Medicine investigators. The predicted networks and online software to create networks from entity-set libraries are provided online at http://www.maayanlab.net/S2N.

Conclusions

The network inference method presented here can be applied to resolve different types of networks in current systems biology and systems pharmacology as well as in other fields of research.",2012-07-23 +24168386,Identification of B-cell epitopes in an antigen for inducing specific class of antibodies.,"

Background

In the past, numerous methods have been developed for predicting antigenic regions or B-cell epitopes that can induce B-cell response. To the best of authors' knowledge, no method has been developed for predicting B-cell epitopes that can induce a specific class of antibody (e.g., IgA, IgG) except allergenic epitopes (IgE). In this study, an attempt has been made to understand the relation between primary sequence of epitopes and the class of antibodies generated.

Results

The dataset used in this study has been derived from Immune Epitope Database and consists of 14725 B-cell epitopes that include 11981 IgG, 2341 IgE, 403 IgA specific epitopes and 22835 non-B-cell epitopes. In order to understand the preference of residues or motifs in these epitopes, we computed and compared amino acid and dipeptide composition of IgG, IgE, IgA inducing epitopes and non-B-cell epitopes. Differences in composition profiles of different classes of epitopes were observed, and few residues were found to be preferred. Based on these observations, we developed models for predicting antibody class-specific B-cell epitopes using various features like amino acid composition, dipeptide composition, and binary profiles. Among these, dipeptide composition-based support vector machine model achieved maximum Matthews correlation coefficient of 0.44, 0.70 and 0.45 for IgG, IgE and IgA specific epitopes respectively. All models were developed on experimentally validated non-redundant dataset and evaluated using five-fold cross validation. In addition, the performance of dipeptide-based model was also evaluated on independent dataset.

Conclusion

Present study utilizes the amino acid sequence information for predicting the tendencies of antigens to induce different classes of antibodies. For the first time, in silico models have been developed for predicting B-cell epitopes, which can induce specific class of antibodies. A web service called IgPred has been developed to serve the scientific community. This server will be useful for researchers working in the field of subunit/epitope/peptide-based vaccines and immunotherapy (http://crdd.osdd.net/raghava/igpred/).",2013-10-30 +23233655,Characterization of disordered proteins with ENSEMBLE.,"

Unlabelled

ENSEMBLE is a computational approach for determining a set of conformations that represents the structural ensemble of a disordered protein based on input experimental data. The disordered protein can be an unfolded or intrinsically disordered state. Here, we introduce the latest version of the program, which has been enhanced to facilitate its general release and includes an intuitive user interface, as well as new approaches to treat data and analyse results.

Availability and implementation

ENSEMBLE is a program implemented in C and embedded in a Perl wrapper. It is supported on main Linux distributions. Source codes and installation files, including a detailed example, can be freely downloaded at http://abragam.med.utoronto.ca/∼JFKlab.",2012-12-11 +25056320,VNTRseek-a computational tool to detect tandem repeat variants in high-throughput sequencing data.,"DNA tandem repeats (TRs) are ubiquitous genomic features which consist of two or more adjacent copies of an underlying pattern sequence. The copies may be identical or approximate. Variable number of tandem repeats or VNTRs are polymorphic TR loci in which the number of pattern copies is variable. In this paper we describe VNTRseek, our software for discovery of minisatellite VNTRs (pattern size ≥ 7 nucleotides) using whole genome sequencing data. VNTRseek maps sequencing reads to a set of reference TRs and then identifies putative VNTRs based on a discrepancy between the copy number of a reference and its mapped reads. VNTRseek was used to analyze the Watson and Khoisan genomes (454 technology) and two 1000 Genomes family trios (Illumina). In the Watson genome, we identified 752 VNTRs with pattern sizes ranging from 7 to 84 nt. In the Khoisan genome, we identified 2572 VNTRs with pattern sizes ranging from 7 to 105 nt. In the trios, we identified between 2660 and 3822 VNTRs per individual and found nearly 100% consistency with Mendelian inheritance. VNTRseek is, to the best of our knowledge, the first software for genome-wide detection of minisatellite VNTRs. It is available at http://orca.bu.edu/vntrseek/.",2014-07-23 +25474034,"Respiratory syncytial virus--United States, July 2012-June 2014.","Respiratory syncytial virus (RSV) causes lower respiratory infection among infants and young children worldwide. Annually in the United States, RSV infection has been associated with an estimated 57,527 hospitalizations and 2.1 million outpatient visits among children aged <5 years. In temperate climate zones, RSV generally circulates during the fall, winter, and spring. However, the exact timing and duration of RSV seasons vary by region and from year-to-year. Knowing the start of the RSV season in any given locality is important to health care providers and public health officials who use RSV seasonality data to guide diagnostic testing and the timing of RSV immunoprophylaxis for children at high risk for severe respiratory infection. To describe RSV seasonality (defined as onset, offset, peak, and duration) nationally, by U.S. Department of Health and Human Services (HHS) regions and for the state of Florida, CDC analyzes RSV laboratory detections reported to the National Respiratory and Enteric Virus Surveillance System (NREVSS). Florida is reported separately because it has an earlier season onset and longer season duration than the rest of the country. For 2012-13, the RSV season onset ranged from late October to late December, and season offset ranged from late December to late April, excluding Florida. For 2013-14, the RSV season onset ranged from late October to late January, and season offset from late January to early April, excluding Florida. Weekly updates of RSV national, regional, and state RSV trends are available from NREVSS at http://www.cdc.gov/surveillance/nrevss.",2014-12-01 +25538467,Knowledge about the availability of the pharmacist in the Nuclear Medicine Department: A questionnaire-based study among health-care professionals.,"

Objective

The objective of this study was to analyze the knowledge about the availability of the pharmacist in the nuclear medicine department among health-care professionals through a prospective cohort study.

Methods

A total of 741 health-care professionals participated in the study by answering 10 simple questions about the role of the pharmacist in the nuclear medicine department and the availability of pharmacist in the nuclear medicine department. An online questionnaire system was used to conduct the study, and participants were invited to participate through personal communications and by promoting the study through social websites including Facebook, LinkedIn and Google (including Gmail and Google+). The study was conducted between April 2013 and March 2014 using the http://www.freeonlinesurveys.com/Webserver. Finally, the data provided by 621 participants was analyzed. Group frequency analysis was performed using Statistical Package for the Social Sciences (SPSS) version 16 (SPSS Inc. USA).

Results

The participants were from Malaysia, India, Pakistan, Sri Lanka, Bangladesh, UAE and Nepal. In total, 312 (50.2%) female health-care professionals and 309 (49.8%) male health-care professionals participated in the study. Of the 621 participants, 390 were working in hospitals, and 231 were not working in hospitals. Of the participants who were working in hospitals, 57.6% were pharmacists. The proportion of study participants who were aware of nuclear pharmacists was 55.39%. Awareness about the role of the pharmacist in nuclear medicine was poor.

Conclusion

The role of the pharmacist in a nuclear medicine unit needs to be highlighted and promoted among health-care professionals and hence that the nuclear medicine team can provide better pharmaceutical care.",2014-12-01 +24174541,P-MITE: a database for plant miniature inverted-repeat transposable elements.,"Miniature inverted-repeat transposable elements (MITEs) are prevalent in eukaryotic species including plants. MITE families vary dramatically and usually cannot be identified based on homology. In this study, we de novo identified MITEs from 41 plant species, using computer programs MITE Digger, MITE-Hunter and/or Repetitive Sequence with Precise Boundaries (RSPB). MITEs were found in all, but one (Cyanidioschyzon merolae), species. Combined with the MITEs identified previously from the rice genome, >2.3 million sequences from 3527 MITE families were obtained from 41 plant species. In general, higher plants contain more MITEs than lower plants, with a few exceptions such as papaya, with only 538 elements. The largest number of MITEs is found in apple, with 237 302 MITE sequences. The number of MITE sequences in a genome is significantly correlated with genome size. A series of databases (plant MITE databases, P-MITE), available online at http://pmite.hzau.edu.cn/django/mite/, was constructed to host all MITE sequences from the 41 plant genomes. The databases are available for sequence similarity searches (BLASTN), and MITE sequences can be downloaded by family or by genome. The databases can be used to study the origin and amplification of MITEs, MITE-derived small RNAs and roles of MITEs on gene and genome evolution.",2013-10-29 +22593950,Designing the Microbial Research Commons: Proceedings of an International Symposium,"The Board on Research Data and Information held an International Symposium on Designing the Microbial Research Commons at the National Academy of Sciences in Washington, DC on 8–9 October 2009. Organized by a separately appointed Steering Committee, this symposium expanded on prior international discussions on the same topic at a conference in June 2008 in Ghent, Belgium (see: http://www.microbialcommons.ugent.be/). The October 2009 symposium addressed topics such as models to lower the transaction costs and support access to and use of microbiological materials and digital resources from the perspective of publicly funded research, public-private interactions, and developing country concerns. The overall goal of the symposium was to stimulate more research and implementation of improved legal and institutional models for publicly funded research in microbiology. The International Symposium on Designing the Microbial Research Commons focused on accomplishing the following tasks: 1. Delineate the research and applications opportunities from improved integration of microbial data, information, and materials and from enhanced collaboration within the global microbial community. 2. Identify the global challenges and barriers—the scientific, technical, institutional, legal, economic, and socio-cultural—that hinder the integration of microbial resources and the collaborative practice of scientific communities in the microbial commons. 3. Characterize the alternative legal and policy approaches developed and implemented by other research communities, such as common-use licensing for scientific data and information, standard-form material transfer agreements, open access publishing, and open data networks that could be applied successfully by the microbial research community. 4. Define the contributions of new information and communication technology (ICT) tools in building federated information infrastructures, such as ontologies, data and text mining, and web 2.0. 5. Discuss and evaluate the institutional design and governance principles of data and information sharing among information infrastructures, drawing upon and analyzing successful and failed case studies in the life sciences. 6. Identify the range of policy issues that need to be addressed for maximizing open access to materials, data and literature information in an integrated microbial research commons.",2012-05-18 +24931999,Stochastic EM-based TFBS motif discovery with MITSU.,"

Motivation

The Expectation-Maximization (EM) algorithm has been successfully applied to the problem of transcription factor binding site (TFBS) motif discovery and underlies the most widely used motif discovery algorithms. In the wider field of probabilistic modelling, the stochastic EM (sEM) algorithm has been used to overcome some of the limitations of the EM algorithm; however, the application of sEM to motif discovery has not been fully explored.

Results

We present MITSU (Motif discovery by ITerative Sampling and Updating), a novel algorithm for motif discovery, which combines sEM with an improved approximation to the likelihood function, which is unconstrained with regard to the distribution of motif occurrences within the input dataset. The algorithm is evaluated quantitatively on realistic synthetic data and several collections of characterized prokaryotic TFBS motifs and shown to outperform EM and an alternative sEM-based algorithm, particularly in terms of site-level positive predictive value.

Availability and implementation

Java executable available for download at http://www.sourceforge.net/p/mitsu-motif/, supported on Linux/OS X.",2014-06-01 +23927696,COBRApy: COnstraints-Based Reconstruction and Analysis for Python.,"

Background

COnstraint-Based Reconstruction and Analysis (COBRA) methods are widely used for genome-scale modeling of metabolic networks in both prokaryotes and eukaryotes. Due to the successes with metabolism, there is an increasing effort to apply COBRA methods to reconstruct and analyze integrated models of cellular processes. The COBRA Toolbox for MATLAB is a leading software package for genome-scale analysis of metabolism; however, it was not designed to elegantly capture the complexity inherent in integrated biological networks and lacks an integration framework for the multiomics data used in systems biology. The openCOBRA Project is a community effort to promote constraints-based research through the distribution of freely available software.

Results

Here, we describe COBRA for Python (COBRApy), a Python package that provides support for basic COBRA methods. COBRApy is designed in an object-oriented fashion that facilitates the representation of the complex biological processes of metabolism and gene expression. COBRApy does not require MATLAB to function; however, it includes an interface to the COBRA Toolbox for MATLAB to facilitate use of legacy codes. For improved performance, COBRApy includes parallel processing support for computationally intensive processes.

Conclusion

COBRApy is an object-oriented framework designed to meet the computational challenges associated with the next generation of stoichiometric constraint-based models and high-density omics data sets.

Availability

http://opencobra.sourceforge.net/",2013-08-08 +22856879,A web-based multi-genome synteny viewer for customized data.,"

Background

Web-based synteny visualization tools are important for sharing data and revealing patterns of complicated genome conservation and rearrangements. Such tools should allow biologists to upload genomic data for their own analysis. This requirement is critical because individual biologists are generating large amounts of genomic sequences that quickly overwhelm any centralized web resources to collect and display all those data. Recently, we published a web-based synteny viewer, GSV, which was designed to satisfy the above requirement. However, GSV can only compare two genomes at a given time. Extending the functionality of GSV to visualize multiple genomes is important to meet the increasing demand of the research community.

Results

We have developed a multi-Genome Synteny Viewer (mGSV). Similar to GSV, mGSV is a web-based tool that allows users to upload their own genomic data files for visualization. Multiple genomes can be presented in a single integrated view with an enhanced user interface. Users can navigate through all the selected genomes in either pairwise or multiple viewing mode to examine conserved genomic regions as well as the accompanying genome annotations. Besides serving users who manually interact with the web server, mGSV also provides Web Services for machine-to-machine communication to accept data sent by other remote resources. The entire mGSV package can also be downloaded for easy local installation.

Conclusions

mGSV significantly enhances the original functionalities of GSV. A web server hosting mGSV is provided at http://cas-bioinfo.cas.unt.edu/mgsv.",2012-08-02 +22863767,Identifying multi-layer gene regulatory modules from multi-dimensional genomic data.,"

Motivation

Eukaryotic gene expression (GE) is subjected to precisely coordinated multi-layer controls, across the levels of epigenetic, transcriptional and post-transcriptional regulations. Recently, the emerging multi-dimensional genomic dataset has provided unprecedented opportunities to study the cross-layer regulatory interplay. In these datasets, the same set of samples is profiled on several layers of genomic activities, e.g. copy number variation (CNV), DNA methylation (DM), GE and microRNA expression (ME). However, suitable analysis methods for such data are currently sparse.

Results

In this article, we introduced a sparse Multi-Block Partial Least Squares (sMBPLS) regression method to identify multi-dimensional regulatory modules from this new type of data. A multi-dimensional regulatory module contains sets of regulatory factors from different layers that are likely to jointly contribute to a local 'gene expression factory'. We demonstrated the performance of our method on the simulated data as well as on The Cancer Genomic Atlas Ovarian Cancer datasets including the CNV, DM, ME and GE data measured on 230 samples. We showed that majority of identified modules have significant functional and transcriptional enrichment, higher than that observed in modules identified using only a single type of genomic data. Our network analysis of the modules revealed that the CNV, DM and microRNA can have coupled impact on expression of important oncogenes and tumor suppressor genes.

Availability and implementation

The source code implemented by MATLAB is freely available at: http://zhoulab.usc.edu/sMBPLS/.

Contact

xjzhou@usc.edu

Supplementary information

Supplementary material are available at Bioinformatics online.",2012-08-03 +22904078,Compression of next-generation sequencing reads aided by highly efficient de novo assembly.,"

Unlabelled

We present Quip, a lossless compression algorithm for next-generation sequencing data in the FASTQ and SAM/BAM formats. In addition to implementing reference-based compression, we have developed, to our knowledge, the first assembly-based compressor, using a novel de novo assembly algorithm. A probabilistic data structure is used to dramatically reduce the memory required by traditional de Bruijn graph assemblers, allowing millions of reads to be assembled very efficiently. Read sequences are then stored as positions within the assembled contigs. This is combined with statistical compression of read identifiers, quality scores, alignment information and sequences, effectively collapsing very large data sets to <15% of their original size with no loss of information.

Availability

Quip is freely available under the 3-clause BSD license from http://cs.washington.edu/homes/dcjones/quip.",2012-08-16 +23938102,Starch biosynthesis in cassava: a genome-based pathway reconstruction and its exploitation in data integration.,"

Background

Cassava is a well-known starchy root crop utilized for food, feed and biofuel production. However, the comprehension underlying the process of starch production in cassava is not yet available.

Results

In this work, we exploited the recently released genome information and utilized the post-genomic approaches to reconstruct the metabolic pathway of starch biosynthesis in cassava using multiple plant templates. The quality of pathway reconstruction was assured by the employed parsimonious reconstruction framework and the collective validation steps. Our reconstructed pathway is presented in the form of an informative map, which describes all important information of the pathway, and an interactive map, which facilitates the integration of omics data into the metabolic pathway. Additionally, to demonstrate the advantage of the reconstructed pathways beyond just the schematic presentation, the pathway could be used for incorporating the gene expression data obtained from various developmental stages of cassava roots. Our results exhibited the distinct activities of the starch biosynthesis pathway in different stages of root development at the transcriptional level whereby the activity of the pathway is higher toward the development of mature storage roots.

Conclusions

To expand its applications, the interactive map of the reconstructed starch biosynthesis pathway is available for download at the SBI group's website (http://sbi.pdti.kmutt.ac.th/?page_id=33). This work is considered a big step in the quantitative modeling pipeline aiming to investigate the dynamic regulation of starch biosynthesis in cassava roots.",2013-08-10 +22302147,cn.MOPS: mixture of Poissons for discovering copy number variations in next-generation sequencing data with a low false discovery rate.,"Quantitative analyses of next-generation sequencing (NGS) data, such as the detection of copy number variations (CNVs), remain challenging. Current methods detect CNVs as changes in the depth of coverage along chromosomes. Technological or genomic variations in the depth of coverage thus lead to a high false discovery rate (FDR), even upon correction for GC content. In the context of association studies between CNVs and disease, a high FDR means many false CNVs, thereby decreasing the discovery power of the study after correction for multiple testing. We propose 'Copy Number estimation by a Mixture Of PoissonS' (cn.MOPS), a data processing pipeline for CNV detection in NGS data. In contrast to previous approaches, cn.MOPS incorporates modeling of depths of coverage across samples at each genomic position. Therefore, cn.MOPS is not affected by read count variations along chromosomes. Using a Bayesian approach, cn.MOPS decomposes variations in the depth of coverage across samples into integer copy numbers and noise by means of its mixture components and Poisson distributions, respectively. The noise estimate allows for reducing the FDR by filtering out detections having high noise that are likely to be false detections. We compared cn.MOPS with the five most popular methods for CNV detection in NGS data using four benchmark datasets: (i) simulated data, (ii) NGS data from a male HapMap individual with implanted CNVs from the X chromosome, (iii) data from HapMap individuals with known CNVs, (iv) high coverage data from the 1000 Genomes Project. cn.MOPS outperformed its five competitors in terms of precision (1-FDR) and recall for both gains and losses in all benchmark data sets. The software cn.MOPS is publicly available as an R package at http://www.bioinf.jku.at/software/cnmops/ and at Bioconductor.",2012-02-01 +21245076,PubMed and beyond: a survey of web tools for searching biomedical literature.,"The past decade has witnessed the modern advances of high-throughput technology and rapid growth of research capacity in producing large-scale biological data, both of which were concomitant with an exponential growth of biomedical literature. This wealth of scholarly knowledge is of significant importance for researchers in making scientific discoveries and healthcare professionals in managing health-related matters. However, the acquisition of such information is becoming increasingly difficult due to its large volume and rapid growth. In response, the National Center for Biotechnology Information (NCBI) is continuously making changes to its PubMed Web service for improvement. Meanwhile, different entities have devoted themselves to developing Web tools for helping users quickly and efficiently search and retrieve relevant publications. These practices, together with maturity in the field of text mining, have led to an increase in the number and quality of various Web tools that provide comparable literature search service to PubMed. In this study, we review 28 such tools, highlight their respective innovations, compare them to the PubMed system and one another, and discuss directions for future development. Furthermore, we have built a website dedicated to tracking existing systems and future advances in the field of biomedical literature search. Taken together, our work serves information seekers in choosing tools for their needs and service providers and developers in keeping current in the field. Database URL: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/search.",2011-01-18 +21360289,PSI:Biology-materials repository: a biologist's resource for protein expression plasmids.,"The Protein Structure Initiative:Biology-Materials Repository (PSI:Biology-MR; MR; http://psimr.asu.edu ) sequence-verifies, annotates, stores, and distributes the protein expression plasmids and vectors created by the Protein Structure Initiative (PSI). The MR has developed an informatics and sample processing pipeline that manages this process for thousands of samples per month from nearly a dozen PSI centers. DNASU ( http://dnasu.asu.edu ), a freely searchable database, stores the plasmid annotations, which include the full-length sequence, vector information, and associated publications for over 130,000 plasmids created by our laboratory, by the PSI and other consortia, and by individual laboratories for distribution to researchers worldwide. Each plasmid links to external resources, including the PSI Structural Biology Knowledgebase ( http://sbkb.org ), which facilitates cross-referencing of a particular plasmid to additional protein annotations and experimental data. To expedite and simplify plasmid requests, the MR uses an expedited material transfer agreement (EP-MTA) network, where researchers from network institutions can order and receive PSI plasmids without institutional delays. As of March 2011, over 39,000 protein expression plasmids and 78 empty vectors from the PSI are available upon request from DNASU. Overall, the MR's repository of expression-ready plasmids, its automated pipeline, and the rapid process for receiving and distributing these plasmids more effectively allows the research community to dissect the biological function of proteins whose structures have been studied by the PSI.",2011-03-01 +21366916,PoPoolation DB: a user-friendly web-based database for the retrieval of natural polymorphisms in Drosophila.,"

Background

The enormous potential of natural variation for the functional characterization of genes has been neglected for a long time. Only since recently, functional geneticists are starting to account for natural variation in their analyses. With the new sequencing technologies it has become feasible to collect sequence information for multiple individuals on a genomic scale. In particular sequencing pooled DNA samples has been shown to provide a cost-effective approach for characterizing variation in natural populations. While a range of software tools have been developed for mapping these reads onto a reference genome and extracting SNPs, linking this information to population genetic estimators and functional information still poses a major challenge to many researchers.

Results

We developed PoPoolation DB a user-friendly integrated database. Popoolation DB links variation in natural populations with functional information, allowing a wide range of researchers to take advantage of population genetic data. PoPoolation DB provides the user with population genetic parameters (Watterson's θ or Tajima's π), Tajima's D, SNPs, allele frequencies and indels in regions of interest. The database can be queried by gene name, chromosomal position, or a user-provided query sequence or GTF file. We anticipate that PoPoolation DB will be a highly versatile tool for functional geneticists as well as evolutionary biologists.

Conclusions

PoPoolation DB, available at http://www.popoolation.at/pgt, provides an integrated platform for researchers to investigate natural polymorphism and associated functional annotations from UCSC and Flybase genome browsers, population genetic estimators and RNA-seq information.",2011-03-02 +24339764,Understanding variation in transcription factor binding by modeling transcription factor genome-epigenome interactions.,"Despite explosive growth in genomic datasets, the methods for studying epigenomic mechanisms of gene regulation remain primitive. Here we present a model-based approach to systematically analyze the epigenomic functions in modulating transcription factor-DNA binding. Based on the first principles of statistical mechanics, this model considers the interactions between epigenomic modifications and a cis-regulatory module, which contains multiple binding sites arranged in any configurations. We compiled a comprehensive epigenomic dataset in mouse embryonic stem (mES) cells, including DNA methylation (MeDIP-seq and MRE-seq), DNA hydroxymethylation (5-hmC-seq), and histone modifications (ChIP-seq). We discovered correlations of transcription factors (TFs) for specific combinations of epigenomic modifications, which we term epigenomic motifs. Epigenomic motifs explained why some TFs appeared to have different DNA binding motifs derived from in vivo (ChIP-seq) and in vitro experiments. Theoretical analyses suggested that the epigenome can modulate transcriptional noise and boost the cooperativity of weak TF binding sites. ChIP-seq data suggested that epigenomic boost of binding affinities in weak TF binding sites can function in mES cells. We showed in theory that the epigenome should suppress the TF binding differences on SNP-containing binding sites in two people. Using personal data, we identified strong associations between H3K4me2/H3K9ac and the degree of personal differences in NFκB binding in SNP-containing binding sites, which may explain why some SNPs introduce much smaller personal variations on TF binding than other SNPs. In summary, this model presents a powerful approach to analyze the functions of epigenomic modifications. This model was implemented into an open source program APEG (Affinity Prediction by Epigenome and Genome, http://systemsbio.ucsd.edu/apeg).",2013-12-05 +23408855,TriageTools: tools for partitioning and prioritizing analysis of high-throughput sequencing data.,"High-throughput sequencing is becoming a popular research tool but carries with it considerable costs in terms of computation time, data storage and bandwidth. Meanwhile, some research applications focusing on individual genes or pathways do not necessitate processing of a full sequencing dataset. Thus, it is desirable to partition a large dataset into smaller, manageable, but relevant pieces. We present a toolkit for partitioning raw sequencing data that includes a method for extracting reads that are likely to map onto pre-defined regions of interest. We show the method can be used to extract information about genes of interest from DNA or RNA sequencing samples in a fraction of the time and disk space required to process and store a full dataset. We report speedup factors between 2.6 and 96, depending on settings and samples used. The software is available at http://www.sourceforge.net/projects/triagetools/.",2013-02-13 +24163250,RADAR: a rigorously annotated database of A-to-I RNA editing.,"We present RADAR--a rigorously annotated database of A-to-I RNA editing (available at http://RNAedit.com). The identification of A-to-I RNA editing sites has been dramatically accelerated in the past few years by high-throughput RNA sequencing studies. RADAR includes a comprehensive collection of A-to-I RNA editing sites identified in humans (Homo sapiens), mice (Mus musculus) and flies (Drosophila melanogaster), together with extensive manually curated annotations for each editing site. RADAR also includes an expandable listing of tissue-specific editing levels for each editing site, which will facilitate the assignment of biological functions to specific editing sites.",2013-10-25 +22238653,SASqPCR: robust and rapid analysis of RT-qPCR data in SAS.,"Reverse transcription quantitative real-time PCR (RT-qPCR) is a key method for measurement of relative gene expression. Analysis of RT-qPCR data requires many iterative computations for data normalization and analytical optimization. Currently no computer program for RT-qPCR data analysis is suitable for analytical optimization and user-controllable customization based on data quality, experimental design as well as specific research aims. Here I introduce an all-in-one computer program, SASqPCR, for robust and rapid analysis of RT-qPCR data in SAS. This program has multiple macros for assessment of PCR efficiencies, validation of reference genes, optimization of data normalizers, normalization of confounding variations across samples, and statistical comparison of target gene expression in parallel samples. Users can simply change the macro variables to test various analytical strategies, optimize results and customize the analytical processes. In addition, it is highly automatic and functionally extendable. Thus users are the actual decision-makers controlling RT-qPCR data analyses. SASqPCR and its tutorial are freely available at http://code.google.com/p/sasqpcr/downloads/list.",2012-01-06 +24307700,HEFT: eQTL analysis of many thousands of expressed genes while simultaneously controlling for hidden factors.,"

Motivation

Identification of expression Quantitative Trait Loci (eQTL), the genetic loci that contribute to heritable variation in gene expression, can be obstructed by factors that produce variation in expression profiles if these factors are unmeasured or hidden from direct analysis.

Methods

We have developed a method for Hidden Expression Factor analysis (HEFT) that identifies individual and pleiotropic effects of eQTL in the presence of hidden factors. The HEFT model is a combined multivariate regression and factor analysis, where the complete likelihood of the model is used to derive a ridge estimator for simultaneous factor learning and detection of eQTL. HEFT requires no pre-estimation of hidden factor effects; it provides P-values and is extremely fast, requiring just a few hours to complete an eQTL analysis of thousands of expression variables when analyzing hundreds of thousands of single nucleotide polymorphisms on a standard 8 core 2.6 G desktop.

Results

By analyzing simulated data, we demonstrate that HEFT can correct for an unknown number of hidden factors and significantly outperforms all related hidden factor methods for eQTL analysis when there are eQTL with univariate and multivariate (pleiotropic) effects. To demonstrate a real-world application, we applied HEFT to identify eQTL affecting gene expression in the human lung for a study that included presumptive hidden factors. HEFT identified all of the cis-eQTL found by other hidden factor methods and 91 additional cis-eQTL. HEFT also identified a number of eQTLs with direct relevance to lung disease that could not be found without a hidden factor analysis, including cis-eQTL for GTF2H1 and MTRR, genes that have been independently associated with lung cancer.

Availability

Software is available at http://mezeylab.cb.bscb.cornell.edu/Software.aspx.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-12-04 +24163100,uORFdb--a comprehensive literature database on eukaryotic uORF biology.,"Approximately half of all human transcripts contain at least one upstream translational initiation site that precedes the main coding sequence (CDS) and gives rise to an upstream open reading frame (uORF). We generated uORFdb, publicly available at http://cbdm.mdc-berlin.de/tools/uorfdb, to serve as a comprehensive literature database on eukaryotic uORF biology. Upstream ORFs affect downstream translation by interfering with the unrestrained progression of ribosomes across the transcript leader sequence. Although the first uORF-related translational activity was observed >30 years ago, and an increasing number of studies link defective uORF-mediated translational control to the development of human diseases, the features that determine uORF-mediated regulation of downstream translation are not well understood. The uORFdb was manually curated from all uORF-related literature listed at the PubMed database. It categorizes individual publications by a variety of denominators including taxon, gene and type of study. Furthermore, the database can be filtered for multiple structural and functional uORF-related properties to allow convenient and targeted access to the complex field of eukaryotic uORF biology.",2013-10-24 +24154671,Tissue-specific gene expression and functional regulation of uncoupling protein 2 (UCP2) by hypoxia and nutrient availability in gilthead sea bream (Sparus aurata): implications on the physiological significance of UCP1-3 variants.,"The aim of this study was to assess in an integrative manner the physiological regulation of uncoupling protein 2 (UCP2) in gilthead sea bream. A contig of 1,325 nucleotides in length with an open reading frame of 307 amino acids was recognized as UCP2 after searches in our transcriptome reference database ( http://www.nutrigroup-iats.org/seabreamdb ). Gene expression mapping by quantitative real-time PCR revealed a ubiquitous profile that clearly differs from that of UCP1 and UCP3 variants with the greatest abundance in liver and white skeletal muscle, respectively. The greatest abundance of UCP2 transcripts was found in the heart, with a relatively high expression level in blood cells, where UCP1 and UCP3 transcripts were practically undetectable. Functional studies revealed that UCP2 mRNA expression remains either unaltered or up-regulated upon feed restriction in glycolytic (white skeletal muscle) and highly oxidative muscle tissues (heart and red skeletal muscle), respectively. In contrast, exposure to hypoxic conditions (18-19% oxygen saturation) markedly down-regulated the UCP2 mRNA expression in blood cells in a cellular environment with increased haematocrit, blood haemoglobin content, and circulating levels of glucose and lactate, and total plasma antioxidant activity. These findings demonstrated that UCP2 expression is highly regulated at the transcriptional level, arising this UCP variant as an important piece of the complex trade-off between metabolic and redox sensors. This feature would avoid the activation of futile cycles of energy wastage if changes in tissue oxidative and antioxidant metabolic capabilities are able to maintain the production of reactive oxygen species at a low regulated level.",2013-10-24 +23809014,Integrating multi-platform genomic data using hierarchical Bayesian relevance vector machines.,"

Background

Recent advances in genome technologies and the subsequent collection of genomic information at various molecular resolutions hold promise to accelerate the discovery of new therapeutic targets. A critical step in achieving these goals is to develop efficient clinical prediction models that integrate these diverse sources of high-throughput data. This step is challenging due to the presence of high-dimensionality and complex interactions in the data. For predicting relevant clinical outcomes, we propose a flexible statistical machine learning approach that acknowledges and models the interaction between platform-specific measurements through nonlinear kernel machines and borrows information within and between platforms through a hierarchical Bayesian framework. Our model has parameters with direct interpretations in terms of the effects of platforms and data interactions within and across platforms. The parameter estimation algorithm in our model uses a computationally efficient variational Bayes approach that scales well to large high-throughput datasets.

Results

We apply our methods of integrating gene/mRNA expression and microRNA profiles for predicting patient survival times to The Cancer Genome Atlas (TCGA) based glioblastoma multiforme (GBM) dataset. In terms of prediction accuracy, we show that our non-linear and interaction-based integrative methods perform better than linear alternatives and non-integrative methods that do not account for interactions between the platforms. We also find several prognostic mRNAs and microRNAs that are related to tumor invasion and are known to drive tumor metastasis and severe inflammatory response in GBM. In addition, our analysis reveals several interesting mRNA and microRNA interactions that have known implications in the etiology of GBM.

Conclusions

Our approach gains its flexibility and power by modeling the non-linear interaction structures between and within the platforms. Our framework is a useful tool for biomedical researchers, since clinical prediction using multi-platform genomic information is an important step towards personalized treatment of many cancers. We have a freely available software at: http://odin.mdacc.tmc.edu/~vbaladan.",2013-06-28 +22693437,"Exploring massive, genome scale datasets with the GenometriCorr package.","

Unlabelled

We have created a statistically grounded tool for determining the correlation of genomewide data with other datasets or known biological features, intended to guide biological exploration of high-dimensional datasets, rather than providing immediate answers. The software enables several biologically motivated approaches to these data and here we describe the rationale and implementation for each approach. Our models and statistics are implemented in an R package that efficiently calculates the spatial correlation between two sets of genomic intervals (data and/or annotated features), for use as a metric of functional interaction. The software handles any type of pointwise or interval data and instead of running analyses with predefined metrics, it computes the significance and direction of several types of spatial association; this is intended to suggest potentially relevant relationships between the datasets.

Availability and implementation

The package, GenometriCorr, can be freely downloaded at http://genometricorr.sourceforge.net/. Installation guidelines and examples are available from the sourceforge repository. The package is pending submission to Bioconductor.",2012-05-31 +23325622,CluePedia Cytoscape plugin: pathway insights using integrated experimental and in silico data.,"

Summary

The CluePedia Cytoscape plugin is a search tool for new markers potentially associated to pathways. CluePedia calculates linear and non-linear statistical dependencies from experimental data. Genes, proteins and miRNAs can be connected based on in silico and/or experimental information and integrated into a ClueGO network of terms/pathways. Interrelations within each pathway can be investigated, and new potential associations may be revealed through gene/protein/miRNA enrichments. A pathway-like visualization can be created using the Cerebral plugin layout. Combining all these features is essential for data interpretation and the generation of new hypotheses. The CluePedia Cytoscape plugin is user-friendly and has an expressive and intuitive visualization.

Availability

http://www.ici.upmc.fr/cluepedia/ and via the Cytoscape plugin manager. The user manual is available at the CluePedia website.",2013-01-16 +23202746,Musket: a multistage k-mer spectrum-based error corrector for Illumina sequence data.,"

Motivation

The imperfect sequence data produced by next-generation sequencing technologies have motivated the development of a number of short-read error correctors in recent years. The majority of methods focus on the correction of substitution errors, which are the dominant error source in data produced by Illumina sequencing technology. Existing tools either score high in terms of recall or precision but not consistently high in terms of both measures.

Results

In this article, we present Musket, an efficient multistage k-mer-based corrector for Illumina short-read data. We use the k-mer spectrum approach and introduce three correction techniques in a multistage workflow: two-sided conservative correction, one-sided aggressive correction and voting-based refinement. Our performance evaluation results, in terms of correction quality and de novo genome assembly measures, reveal that Musket is consistently one of the top performing correctors. In addition, Musket is multi-threaded using a master-slave model and demonstrates superior parallel scalability compared with all other evaluated correctors as well as a highly competitive overall execution time.

Availability

Musket is available at http://musket.sourceforge.net.",2012-11-29 +24158836,RAvariome: a genetic risk variants database for rheumatoid arthritis based on assessment of reproducibility between or within human populations.,"Rheumatoid arthritis (RA) is a common autoimmune inflammatory disease of the joints and is caused by both genetic and environmental factors. In the past six years, genome-wide association studies (GWASs) have identified many risk variants associated with RA. However, not all associations reported from GWASs are reproduced when tested in follow-up studies. To establish a reliable set of RA risk variants, we systematically classified common variants identified in GWASs by the degree of reproducibility among independent studies. We collected comprehensive genetic associations from 90 papers of GWASs and meta-analysis. The genetic variants were assessed according to the statistical significance and reproducibility between or within nine geographical populations. As a result, 82 and 19 single nucleotide polymorphisms (SNPs) were confirmed as intra- and inter-population-reproduced variants, respectively. Interestingly, majority of the intra-population-reproduced variants from European and East Asian populations were not common in two populations, but their nearby genes appeared to be the components of common pathways. Furthermore, a tool to predict the individual's genetic risk of RA was developed to facilitate personalized medicine and preventive health care. For further clinical researches, the list of reliable genetic variants of RA and the genetic risk prediction tool are provided by open access database RAvariome. DATABASE URL: http://hinv.jp/hinv/rav/.",2013-10-23 +25030374,The lncRNA PCAT29 inhibits oncogenic phenotypes in prostate cancer.,"

Unlabelled

Long noncoding RNAs (lncRNA) have recently been associated with the development and progression of a variety of human cancers. However, to date, the interplay between known oncogenic or tumor-suppressive events and lncRNAs has not been well described. Here, the novel lncRNA, prostate cancer-associated transcript 29 (PCAT29), is characterized along with its relationship to the androgen receptor. PCAT29 is suppressed by DHT and upregulated upon castration therapy in a prostate cancer xenograft model. PCAT29 knockdown significantly increased proliferation and migration of prostate cancer cells, whereas PCAT29 overexpression conferred the opposite effect and suppressed growth and metastases of prostate tumors in chick chorioallantoic membrane assays. Finally, in prostate cancer patient specimens, low PCAT29 expression correlated with poor prognostic outcomes. Taken together, these data expose PCAT29 as an androgen-regulated tumor suppressor in prostate cancer.

Implications

This study identifies PCAT29 as the first androgen receptor-repressed lncRNA that functions as a tumor suppressor and that its loss may identify a subset of patients at higher risk for disease recurrence. Visual Overview: http://mcr.aacrjournals.org/content/early/2014/07/31/1541-7786.MCR-14-0257/F1.large.jpg.",2014-07-16 +25747448,LiSIs: An Online Scientific Workflow System for Virtual Screening.,"Modern methods of drug discovery and development in recent years make a wide use of computational algorithms. These methods utilise Virtual Screening (VS), which is the computational counterpart of experimental screening. In this manner the in silico models and tools initial replace the wet lab methods saving time and resources. This paper presents the overall design and implementation of a web based scientific workflow system for virtual screening called, the Life Sciences Informatics (LiSIs) platform. The LiSIs platform consists of the following layers: the input layer covering the data file input; the pre-processing layer covering the descriptors calculation, and the docking preparation components; the processing layer covering the attribute filtering, compound similarity, substructure matching, docking prediction, predictive modelling and molecular clustering; post-processing layer covering the output reformatting and binary file merging components; output layer covering the storage component. The potential of LiSIs platform has been demonstrated through two case studies designed to illustrate the preparation of tools for the identification of promising chemical structures. The first case study involved the development of a Quantitative Structure Activity Relationship (QSAR) model on a literature dataset while the second case study implemented a docking-based virtual screening experiment. Our results show that VS workflows utilizing docking, predictive models and other in silico tools as implemented in the LiSIs platform can identify compounds in line with expert expectations. We anticipate that the deployment of LiSIs, as currently implemented and available for use, can enable drug discovery researchers to more easily use state of the art computational techniques in their search for promising chemical compounds. The LiSIs platform is freely accessible (i) under the GRANATUM platform at: http://www.granatum.org and (ii) directly at: http://lisis.cs.ucy.ac.cy.",2015-01-01 +24859754,miR-142 orchestrates a network of actin cytoskeleton regulators during megakaryopoiesis.,"Genome-encoded microRNAs (miRNAs) provide a posttranscriptional regulatory layer that controls the differentiation and function of various cellular systems, including hematopoietic cells. miR-142 is one of the most prevalently expressed miRNAs within the hematopoietic lineage. To address the in vivo functions of miR-142, we utilized a novel reporter and a loss-of-function mouse allele that we have recently generated. In this study, we show that miR-142 is broadly expressed in the adult hematopoietic system. Our data further reveal that miR-142 is critical for megakaryopoiesis. Genetic ablation of miR-142 caused impaired megakaryocyte maturation, inhibition of polyploidization, abnormal proplatelet formation, and thrombocytopenia. Finally, we characterized a network of miR-142-3p targets which collectively control actin filament homeostasis, thereby ensuring proper execution of actin-dependent proplatelet formation. Our study reveals a pivotal role for miR-142 activity in megakaryocyte maturation and function, and demonstrates a critical contribution of a single miRNA in orchestrating cytoskeletal dynamics and normal hemostasis.DOI: http://dx.doi.org/10.7554/eLife.01964.001.",2014-05-23 +25592589,Microtask crowdsourcing for disease mention annotation in PubMed abstracts.,"Identifying concepts and relationships in biomedical text enables knowledge to be applied in computational analyses. Many biological natural language processing (BioNLP) projects attempt to address this challenge, but the state of the art still leaves much room for improvement. Progress in BioNLP research depends on large, annotated corpora for evaluating information extraction systems and training machine learning models. Traditionally, such corpora are created by small numbers of expert annotators often working over extended periods of time. Recent studies have shown that workers on microtask crowdsourcing platforms such as Amazon's Mechanical Turk (AMT) can, in aggregate, generate high-quality annotations of biomedical text. Here, we investigated the use of the AMT in capturing disease mentions in PubMed abstracts. We used the NCBI Disease corpus as a gold standard for refining and benchmarking our crowdsourcing protocol. After several iterations, we arrived at a protocol that reproduced the annotations of the 593 documents in the 'training set' of this gold standard with an overall F measure of 0.872 (precision 0.862, recall 0.883). The output can also be tuned to optimize for precision (max = 0.984 when recall = 0.269) or recall (max = 0.980 when precision = 0.436). Each document was completed by 15 workers, and their annotations were merged based on a simple voting method. In total 145 workers combined to complete all 593 documents in the span of 9 days at a cost of $.066 per abstract per worker. The quality of the annotations, as judged with the F measure, increases with the number of workers assigned to each task; however minimal performance gains were observed beyond 8 workers per task. These results add further evidence that microtask crowdsourcing can be a valuable tool for generating well-annotated corpora in BioNLP. Data produced for this analysis are available at http://figshare.com/articles/Disease_Mention_Annotation_with_Mechanical_Turk/1126402.",2015-01-01 +24771767,Genetic interactions affecting human gene expression identified by variance association mapping.,"Non-additive interaction between genetic variants, or epistasis, is a possible explanation for the gap between heritability of complex traits and the variation explained by identified genetic loci. Interactions give rise to genotype dependent variance, and therefore the identification of variance quantitative trait loci can be an intermediate step to discover both epistasis and gene by environment effects (GxE). Using RNA-sequence data from lymphoblastoid cell lines (LCLs) from the TwinsUK cohort, we identify a candidate set of 508 variance associated SNPs. Exploiting the twin design we show that GxE plays a role in ∼70% of these associations. Further investigation of these loci reveals 57 epistatic interactions that replicated in a smaller dataset, explaining on average 4.3% of phenotypic variance. In 24 cases, more variance is explained by the interaction than their additive contributions. Using molecular phenotypes in this way may provide a route to uncovering genetic interactions underlying more complex traits.DOI: http://dx.doi.org/10.7554/eLife.01381.001.",2014-04-25 +25161246,OncodriveROLE classifies cancer driver genes in loss of function and activating mode of action.,"

Motivation

Several computational methods have been developed to identify cancer drivers genes-genes responsible for cancer development upon specific alterations. These alterations can cause the loss of function (LoF) of the gene product, for instance, in tumor suppressors, or increase or change its activity or function, if it is an oncogene. Distinguishing between these two classes is important to understand tumorigenesis in patients and has implications for therapy decision making. Here, we assess the capacity of multiple gene features related to the pattern of genomic alterations across tumors to distinguish between activating and LoF cancer genes, and we present an automated approach to aid the classification of novel cancer drivers according to their role.

Result

OncodriveROLE is a machine learning-based approach that classifies driver genes according to their role, using several properties related to the pattern of alterations across tumors. The method shows an accuracy of 0.93 and Matthew's correlation coefficient of 0.84 classifying genes in the Cancer Gene Census. The OncodriveROLE classifier, its results when applied to two lists of predicted cancer drivers and TCGA-derived mutation and copy number features used by the classifier are available at http://bg.upf.edu/oncodrive-role.

Availability and implementation

The R implementation of the OncodriveROLE classifier is available at http://bg.upf.edu/oncodrive-role.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +25161221,Towards a piRNA prediction using multiple kernel fusion and support vector machine.,"

Motivation

Piwi-interacting RNA (piRNA) is the most recently discovered and the least investigated class of Argonaute/Piwi protein-interacting small non-coding RNAs. The piRNAs are mostly known to be involved in protecting the genome from invasive transposable elements. But recent discoveries suggest their involvement in the pathophysiology of diseases, such as cancer. Their identification is therefore an important task, and computational methods are needed. However, the lack of conserved piRNA sequences and structural elements makes this identification challenging and difficult.

Results

In the present study, we propose a new modular and extensible machine learning method based on multiple kernels and a support vector machine (SVM) classifier for piRNA identification. Very few piRNA features are known to date. The use of a multiple kernels approach allows editing, adding or removing piRNA features that can be heterogeneous in a modular manner according to their relevance in a given species. Our algorithm is based on a combination of the previously identified features [sequence features (k-mer motifs and a uridine at the first position) and piRNAs cluster feature] and a new telomere/centromere vicinity feature. These features are heterogeneous, and the kernels allow to unify their representation. The proposed algorithm, named piRPred, gives promising results on Drosophila and Human data and outscores previously published piRNA identification algorithms.

Availability and implementation

piRPred is freely available to non-commercial users on our Web server EvryRNA http://EvryRNA.ibisc.univ-evry.fr.",2014-09-01 +24149052,BEReX: Biomedical Entity-Relationship eXplorer.,"

Summary

Biomedical Entity-Relationship eXplorer (BEReX) is a new biomedical knowledge integration, search and exploration tool. BEReX integrates eight popular databases (STRING, DrugBank, KEGG, PhamGKB, BioGRID, GO, HPRD and MSigDB) and delineates an integrated network by combining the information available from these databases. Users search the integrated network by entering key words, and BEReX returns a sub-network matching the key words. The resulting graph can be explored interactively. BEReX allows users to find the shortest paths between two remote nodes, find the most relevant drugs, diseases, pathways and so on related to the current network, expand the network by particular types of entities and relations and modify the network by removing or adding selected nodes. BEReX is implemented as a standalone Java application.

Availability and implementation

BEReX and a detailed user guide are available for download at our project Web site (http://infos.korea.ac.kr/berex).",2013-10-21 +23402649,A practical method to target individuals for outbreak detection and control.,"Identification of individuals or subpopulations that contribute the most to disease transmission is key to target surveillance and control efforts. In a recent study in BMC Medicine, Smieszek and Salathé introduced a novel method based on readily available information about spatial proximity in high schools, to help identify individuals at higher risk of infection and those more likely to be infected early in the outbreak. By combining simulation models for influenza transmission with high-resolution data on school contact patterns, the authors showed that their proximity method compares favorably to more sophisticated methods using detailed contact tracing information. The proximity method is simple and promising, but further research is warranted to confront this method against real influenza outbreak data, and to assess the generalizability of the approach to other important transmission units, such as work, households, and transportation systems.See related research article here http://www.biomedcentral.com/1741-7015/11/35.",2013-02-12 +25684987,Novel agents and associated toxicities of inhibitors of the pi3k/Akt/mtor pathway for the treatment of breast cancer.,"

Unlabelled

The pi3k/Akt/mtor (phosphatidylinositol 3 kinase/ Akt/mammalian target of rapamycin) signalling pathway is an established driver of oncogenic activity in human malignancies. Therapeutic targeting of this pathway holds significant promise as a treatment strategy. Everolimus, an mtor inhibitor, is the first of this class of agents approved for the treatment of hormone receptor-positive, human epidermal growth factor receptor 2-negative advanced breast cancer. Everolimus has been associated with significant improvements in progression-free survival; however, it is also associated with increased toxicity related to its specific mechanism of action.

Methods

A comprehensive review of the literature conducted using a focused medline search was combined with a search of current trials at http://ClinicalTrials.gov/. Summary tables of the toxicities of the various classes of pi3k/Akt/mtor inhibitors were created. A broad group of Canadian health care professionals was assembled to review the data and to produce expert opinion and summary recommendations for possible best practices in managing the adverse events associated with these pathway inhibitors.

Results

Differing toxicities are associated with the various classes of pi3k/Akt/mtor pathway inhibitors. The most common unique adverse events observed in everolimus clinical trials in breast cancer include stomatitis (all grades: approximately 60%), noninfectious pneumonitis (15%), rash (40%), hyperglycemia (15%), and immunosuppression (40%). To minimize grades 3 and 4 toxicities and to attempt to attain optimal outcomes, effective management of those adverse events is critical. Management should be interdisciplinary and should use approaches that include education, early recognition, active intervention, and potentially prophylactic strategies.

Discussion

Everolimus likely represents the first of many complex oral targeted therapies for the treatment of breast cancer. Using this agent as a template, it is essential to establish best practices involving and integrating multiple disciplines for the management of future pi3k/Akt/mtor signalling pathway inhibitors.",2015-02-01 +23772653,Accurate models for P-gp drug recognition induced from a cancer cell line cytotoxicity screen.,"P-glycoprotein (P-gp, MDR1) is a promiscuous drug efflux pump of substantial pharmacological importance. Taking advantage of large-scale cytotoxicity screening data involving 60 cancer cell lines, we correlated the differential biological activities of ∼13,000 compounds against cellular P-gp levels. We created a large set of 934 high-confidence P-gp substrates or nonsubstrates by enforcing agreement with an orthogonal criterion involving P-gp overexpressing ADR-RES cells. A support vector machine (SVM) was 86.7% accurate in discriminating P-gp substrates on independent test data, exceeding previous models. Two molecular features had an overarching influence: nearly all P-gp substrates were large (>35 atoms including H) and dense (specific volume of <7.3 Å(3)/atom) molecules. Seven other descriptors and 24 molecular fragments (""effluxophores"") were found enriched in the (non)substrates and incorporated into interpretable rule-based models. Biological experiments on an independent P-gp overexpressing cell line, the vincristine-resistant VK2, allowed us to reclassify six compounds previously annotated as substrates, validating our method's predictive ability. Models are freely available at http://pgp.biozyne.com .",2013-07-08 +23509278,The human gene connectome as a map of short cuts for morbid allele discovery.,"High-throughput genomic data reveal thousands of gene variants per patient, and it is often difficult to determine which of these variants underlies disease in a given individual. However, at the population level, there may be some degree of phenotypic homogeneity, with alterations of specific physiological pathways underlying the pathogenesis of a particular disease. We describe here the human gene connectome (HGC) as a unique approach for human mendelian genetic research, facilitating the interpretation of abundant genetic data from patients with the same disease, and guiding subsequent experimental investigations. We first defined the set of the shortest plausible biological distances, routes, and degrees of separation between all pairs of human genes by applying a shortest distance algorithm to the full human gene network. We then designed a hypothesis-driven application of the HGC, in which we generated a Toll-like receptor 3-specific connectome useful for the genetic dissection of inborn errors of Toll-like receptor 3 immunity. In addition, we developed a functional genomic alignment approach from the HGC. In functional genomic alignment, the genes are clustered according to biological distance (rather than the traditional molecular evolutionary genetic distance), as estimated from the HGC. Finally, we compared the HGC with three state-of-the-art methods: String, FunCoup, and HumanNet. We demonstrated that the existing methods are more suitable for polygenic studies, whereas HGC approaches are more suitable for monogenic studies. The HGC and functional genomic alignment data and computer programs are freely available to noncommercial users from http://lab.rockefeller.edu/casanova/HGC and should facilitate the genome-wide selection of disease-causing candidate alleles for experimental validation.",2013-03-18 +24843014,APOBEC3A deaminates transiently exposed single-strand DNA during LINE-1 retrotransposition.,"Long INterspersed Element-1 (LINE-1 or L1) retrotransposition poses a mutagenic threat to human genomes. Human cells have therefore evolved strategies to regulate L1 retrotransposition. The APOBEC3 (A3) gene family consists of seven enzymes that catalyze deamination of cytidine nucleotides to uridine nucleotides (C-to-U) in single-strand DNA substrates. Among these enzymes, APOBEC3A (A3A) is the most potent inhibitor of L1 retrotransposition in cultured cell assays. However, previous characterization of L1 retrotransposition events generated in the presence of A3A did not yield evidence of deamination. Thus, the molecular mechanism by which A3A inhibits L1 retrotransposition has remained enigmatic. Here, we have used in vitro and in vivo assays to demonstrate that A3A can inhibit L1 retrotransposition by deaminating transiently exposed single-strand DNA that arises during the process of L1 integration. These data provide a mechanistic explanation of how the A3A cytidine deaminase protein can inhibit L1 retrotransposition.DOI: http://dx.doi.org/10.7554/eLife.02008.001.",2014-04-24 +21343142,A common layer of interoperability for biomedical ontologies based on OWL EL.,"

Motivation

Ontologies are essential in biomedical research due to their ability to semantically integrate content from different scientific databases and resources. Their application improves capabilities for querying and mining biological knowledge. An increasing number of ontologies is being developed for this purpose, and considerable effort is invested into formally defining them in order to represent their semantics explicitly. However, current biomedical ontologies do not facilitate data integration and interoperability yet, since reasoning over these ontologies is very complex and cannot be performed efficiently or is even impossible. We propose the use of less expressive subsets of ontology representation languages to enable efficient reasoning and achieve the goal of genuine interoperability between ontologies.

Results

We present and evaluate EL Vira, a framework that transforms OWL ontologies into the OWL EL subset, thereby enabling the use of tractable reasoning. We illustrate which OWL constructs and inferences are kept and lost following the conversion and demonstrate the performance gain of reasoning indicated by the significant reduction of processing time. We applied EL Vira to the open biomedical ontologies and provide a repository of ontologies resulting from this conversion. EL Vira creates a common layer of ontological interoperability that, for the first time, enables the creation of software solutions that can employ biomedical ontologies to perform inferences and answer complex queries to support scientific analyses.

Availability and implementation

The EL Vira software is available from http://el-vira.googlecode.com and converted OBO ontologies and their mappings are available from http://bioonto.gen.cam.ac.uk/el-ont.",2011-02-21 +23408991,Oncodrive-CIS: a method to reveal likely driver genes based on the impact of their copy number changes on expression.,"A well-established approach for detecting genes involved in tumorigenesis due to copy number alterations (CNAs) is to assess the recurrence of the alteration across multiple samples. Expression data can be used to filter this list of candidates by assessing whether the gene expression significantly differs between tumors depending on the copy number status. A drawback of this approach is that it may fail to detect low-recurrent drivers. Furthermore, this analysis does not provide information about expression changes for each gene as compared to the whole data set and does not take into consideration the expression of normal samples. Here we describe a novel method (Oncodrive-CIS) aimed at ranking genes according to the expression impact caused by the CNAs. The rationale of Oncodrive-CIS is based on the hypothesis that genes involved in cancer due to copy number changes are more biased towards misregulation than are bystanders. Moreover, to gain insight into the expression changes caused by gene dosage, the expression of samples with CNAs is compared to that of tumor samples with diploid genotype and also to that of normal samples. Oncodrive-CIS demonstrated better performance in detecting putative associations between copy-number and expression in simulated data sets as compared to other methods aimed to this purpose, and picked up genes likely to be related with tumorigenesis when applied to real cancer samples. In summary, Oncodrive-CIS provides a statistical framework to evaluate the in cis effect of CNAs that may be useful to elucidate the role of these aberrations in driving oncogenesis. An implementation of this method and the corresponding user guide are freely available at http://bg.upf.edu/oncodrivecis.",2013-02-08 +24149049,"Cascleave 2.0, a new approach for predicting caspase and granzyme cleavage targets.","

Motivation

Caspases and granzyme B (GrB) are important proteases involved in fundamental cellular processes and play essential roles in programmed cell death, necrosis and inflammation. Although a number of substrates for both types have been experimentally identified, the complete repertoire of caspases and granzyme B substrates remained to be fully characterized. Accordingly, systematic bioinformatics studies of known cleavage sites may provide important insights into their substrate specificity and facilitate the discovery of novel substrates.

Results

We develop a new bioinformatics tool, termed Cascleave 2.0, which builds on previous success of the Cascleave tool for predicting generic caspase cleavage sites. It can be efficiently used to predict potential caspase-specific cleavage sites for the human caspase-1, 3, 6, 7, 8 and GrB. In particular, we integrate heterogeneous sequence and protein functional information from various sources to improve the prediction accuracy of Cascleave 2.0. During classification, we use both maximum relevance minimum redundancy and forward feature selection techniques to quantify the relative contribution of each feature to prediction and thus remove redundant as well as irrelevant features. A systematic evaluation of Cascleave 2.0 using the benchmark data and comparison with other state-of-the-art tools using independent test data indicate that Cascleave 2.0 outperforms other tools on protease-specific cleavage site prediction of caspase-1, 3, 6, 7 and GrB. Cascleave 2.0 is anticipated to be used as a powerful tool for identifying novel substrates and cleavage sites of caspases and GrB and help understand the functional roles of these important proteases in human proteolytic cascades.

Availability and implementation

http://www.structbioinfor.org/cascleave2/.",2013-10-21 +25417203,Efficient searching and annotation of metabolic networks using chemical similarity.,"

Motivation

The urgent need for efficient and sustainable biological production of fuels and high-value chemicals has elicited a wave of in silico techniques for identifying promising novel pathways to these compounds in large putative metabolic networks. To date, these approaches have primarily used general graph search algorithms, which are prohibitively slow as putative metabolic networks may exceed 1 million compounds. To alleviate this limitation, we report two methods--SimIndex (SI) and SimZyme--which use chemical similarity of 2D chemical fingerprints to efficiently navigate large metabolic networks and propose enzymatic connections between the constituent nodes. We also report a Byers-Waterman type pathway search algorithm for further paring down pertinent networks.

Results

Benchmarking tests run with SI show it can reduce the number of nodes visited in searching a putative network by 100-fold with a computational time improvement of up to 10(5)-fold. Subsequent Byers-Waterman search application further reduces the number of nodes searched by up to 100-fold, while SimZyme demonstrates ∼ 90% accuracy in matching query substrates with enzymes. Using these modules, we have designed and annotated an alternative to the methylerythritol phosphate pathway to produce isopentenyl pyrophosphate with more favorable thermodynamics than the native pathway. These algorithms will have a significant impact on our ability to use large metabolic networks that lack annotation of promiscuous reactions.

Availability and implementation

Python files will be available for download at http://tyolab.northwestern.edu/tools/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-11-21 +24144535,MHC-NP: predicting peptides naturally processed by the MHC.,"We present MHC-NP, a tool for predicting peptides naturally processed by the MHC pathway. The method was part of the 2nd Machine Learning Competition in Immunology and yielded state-of-the-art accuracy for the prediction of peptides eluted from human HLA-A*02:01, HLA-B*07:02, HLA-B*35:01, HLA-B*44:03, HLA-B*53:01, HLA-B*57:01 and mouse H2-D(b) and H2-K(b) MHC molecules. We briefly explain the theory and motivations that have led to developing this tool. General applicability in the field of immunology and specifically epitope-based vaccine are expected. Our tool is freely available online and hosted by the Immune Epitope Database at http://tools.immuneepitope.org/mhcnp/.",2013-10-18 +25298093,OncoCis: annotation of cis-regulatory mutations in cancer.,"Whole genome sequencing has enabled the identification of thousands of somatic mutations within non-coding genomic regions of individual cancer samples. However, identification of mutations that potentially alter gene regulation remains a major challenge. Here we present OncoCis, a new method that enables identification of potential cis-regulatory mutations using cell type-specific genome and epigenome-wide datasets along with matching gene expression data. We demonstrate that the use of cell type-specific information and gene expression can significantly reduce the number of candidate cis-regulatory mutations compared with existing tools designed for the annotation of cis-regulatory SNPs. The OncoCis webserver is freely accessible at https://powcs.med.unsw.edu.au/OncoCis/.",2014-01-01 +24511328,Characterization of the Asian Citrus Psyllid Transcriptome.,"The Asian citrus psyllid, Diaphorina citri Kuwayama (Hemiptera: Psyllidae) is a vector for the causative agents of Huanglongbing, which threatens citrus production worldwide. This study reports and discusses the first D. citri transcriptomes, encompassing the three main life stages of D. citri, egg, nymph and adult. The transcriptomes were annotated using Gene Ontology (GO) and insecticide-related genes within each life stage were identified to aid the development of future D. citri insecticides. Transcriptome assemblies and other sequence data are available for download at the International Asian Citrus Psyllid Genome Consortium website [http://psyllid.org/download] and at NCBI [http://www.ncbi.nlm.nih.gov/bioproject/29447].",2014-01-01 +24136089,In silico models for designing and discovering novel anticancer peptides.,"Use of therapeutic peptides in cancer therapy has been receiving considerable attention in the recent years. Present study describes the development of computational models for predicting and discovering novel anticancer peptides. Preliminary analysis revealed that Cys, Gly, Ile, Lys, and Trp are dominated at various positions in anticancer peptides. Support vector machine models were developed using amino acid composition and binary profiles as input features on main dataset that contains experimentally validated anticancer peptides and random peptides derived from SwissProt database. In addition, models were developed on alternate dataset that contains antimicrobial peptides instead of random peptides. Binary profiles-based model achieved maximum accuracy 91.44% with MCC 0.83. We have developed a webserver, which would be helpful in: (i) predicting minimum mutations required for improving anticancer potency; (ii) virtual screening of peptides for discovering novel anticancer peptides, and (iii) scanning natural proteins for identification of anticancer peptides (http://crdd.osdd.net/raghava/anticp/).",2013-10-18 +25095881,Thresher: an improved algorithm for peak height thresholding of microbial community profiles.,"

Motivation

This article presents Thresher, an improved technique for finding peak height thresholds for automated rRNA intergenic spacer analysis (ARISA) profiles. We argue that thresholds must be sample dependent, taking community richness into account. In most previous fragment analyses, a common threshold is applied to all samples simultaneously, ignoring richness variations among samples and thereby compromising cross-sample comparison. Our technique solves this problem, and at the same time provides a robust method for outlier rejection, selecting for removal any replicate pairs that are not valid replicates.

Results

Thresholds are calculated individually for each replicate in a pair, and separately for each sample. The thresholds are selected to be the ones that minimize the dissimilarity between the replicates after thresholding. If a choice of threshold results in the two replicates in a pair failing a quantitative test of similarity, either that threshold or that sample must be rejected. We compare thresholded ARISA results with sequencing results, and demonstrate that the Thresher algorithm outperforms conventional thresholding techniques.

Availability and implementation

The software is implemented in R, and the code is available at http://verenastarke.wordpress.com or by contacting the author.

Contact

vstarke@ciw.edu or http://verenastarke.wordpress.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-08-05 +22983621,ChIP-Seq and the complexity of bacterial transcriptional regulation.,"Transcription factors (TFs) play a central role in regulating gene expression in all bacteria. Yet, until recently, studies of TF binding were limited to a small number of factors at a few genomic locations. Chromatin immunoprecipitation followed by sequencing enables mapping of binding sites for TFs in a global and high-throughput fashion. The NIAID funded TB systems biology project http://www.broadinstitute.org/annotation/tbsysbio/home.html aims to map the binding sites for every transcription factor in the genome of Mycobacterium tuberculosis (MTB), the causative agent of human TB. ChIP-Seq data already released through TBDB.org have provided new insight into the mechanisms of TB pathogenesis. But in addition, data from MTB are beginning to challenge many simplifying assumptions associated with gene regulation in all bacteria. In this chapter, we review the global aspects of TF binding in MTB and discuss the implications of these data for our understanding of bacterial gene regulation. We begin by reviewing the canonical model of bacterial transcriptional regulation using the lac operon as the standard paradigm. We then review the use of ChIP-Seq to map the binding sites of DNA-binding proteins and the application of this method to mapping TF binding sites in MTB. Finally, we discuss two aspects of the binding discovered by ChIP-Seq that were unexpected given the canonical model: the substantial binding outside the proximal promoter region and the large number of weak binding sites.",2013-01-01 +24149050,ATHENA: the analysis tool for heritable and environmental network associations.,"

Motivation

Advancements in high-throughput technology have allowed researchers to examine the genetic etiology of complex human traits in a robust fashion. Although genome-wide association studies have identified many novel variants associated with hundreds of traits, a large proportion of the estimated trait heritability remains unexplained. One hypothesis is that the commonly used statistical techniques and study designs are not robust to the complex etiology that may underlie these human traits. This etiology could include non-linear gene × gene or gene × environment interactions. Additionally, other levels of biological regulation may play a large role in trait variability.

Results

To address the need for computational tools that can explore enormous datasets to detect complex susceptibility models, we have developed a software package called the Analysis Tool for Heritable and Environmental Network Associations (ATHENA). ATHENA combines various variable filtering methods with machine learning techniques to analyze high-throughput categorical (i.e. single nucleotide polymorphisms) and quantitative (i.e. gene expression levels) predictor variables to generate multivariable models that predict either a categorical (i.e. disease status) or quantitative (i.e. cholesterol levels) outcomes. The goal of this article is to demonstrate the utility of ATHENA using simulated and biological datasets that consist of both single nucleotide polymorphisms and gene expression variables to identify complex prediction models. Importantly, this method is flexible and can be expanded to include other types of high-throughput data (i.e. RNA-seq data and biomarker measurements).

Availability

ATHENA is freely available for download. The software, user manual and tutorial can be downloaded from http://ritchielab.psu.edu/ritchielab/software.",2013-10-21 +26355510,Evolution and Controllability of Cancer Networks: A Boolean Perspective.,"Cancer forms a robust system capable of maintaining stable functioning (cell sustenance and proliferation) despite perturbations. Cancer progresses as stages over time typically with increasing aggressiveness and worsening prognosis. Characterizing these stages and identifying the genes driving transitions between them is critical to understand cancer progression and to develop effective anti-cancer therapies. In this work, we propose a novel model for the `cancer system' as a Boolean state space in which a Boolean network, built from protein-interaction and gene-expression data from different stages of cancer, transits between Boolean satisfiability states by ""editing"" interactions and ""flipping"" genes. Edits reflect rewiring of the PPI network while flipping of genes reflect activation or silencing of genes between stages. We formulate a minimization problem min flip to identify these genes driving the transitions. The application of our model (called BoolSpace) on three case studies-pancreatic and breast tumours in human and post spinal-cord injury (SCI) in rats-reveals valuable insights into the phenomenon of cancer progression: (i) interactions involved in core cell-cycle and DNA-damage repair pathways are significantly rewired in tumours, indicating significant impact to key genome-stabilizing mechanisms; (ii) several of the genes flipped are serine/threonine kinases which act as biological switches, reflecting cellular switching mechanisms between stages; and (iii) different sets of genes are flipped during the initial and final stages indicating a pattern to tumour progression. Based on these results, we hypothesize that robustness of cancer partly stems from ""passing of the baton"" between genes at different stages-genes from different biological processes and/or cellular components are involved in different stages of tumour progression thereby allowing tumour cells to evade targeted therapy, and therefore an effective therapy should target a ""cover set"" of these genes. A C/C++ implementation of BoolSpace is freely available at: http://www.bioinformatics.org.au/tools-data.",2014-01-01 +23311589,Pool-hmm: a Python program for estimating the allele frequency spectrum and detecting selective sweeps from next generation sequencing of pooled samples.,"Due to its cost effectiveness, next generation sequencing of pools of individuals (Pool-Seq) is becoming a popular strategy for genome-wide estimation of allele frequencies in population samples. As the allele frequency spectrum provides information about past episodes of selection, Pool-seq is also a promising design for genomic scans for selection. However, no software tool has yet been developed for selection scans based on Pool-Seq data. We introduce Pool-hmm, a Python program for the estimation of allele frequencies and the detection of selective sweeps in a Pool-Seq sample. Pool-hmm includes several options that allow a flexible analysis of Pool-Seq data, and can be run in parallel on several processors. Source code and documentation for Pool-hmm is freely available at https://qgsp.jouy.inra.fr/.",2013-01-11 +22672254,The gastrointestinal electrical mapping suite (GEMS): software for analyzing and visualizing high-resolution (multi-electrode) recordings in spatiotemporal detail.,"

Background

Gastrointestinal contractions are controlled by an underlying bioelectrical activity. High-resolution spatiotemporal electrical mapping has become an important advance for investigating gastrointestinal electrical behaviors in health and motility disorders. However, research progress has been constrained by the low efficiency of the data analysis tasks. This work introduces a new efficient software package: GEMS (Gastrointestinal Electrical Mapping Suite), for analyzing and visualizing high-resolution multi-electrode gastrointestinal mapping data in spatiotemporal detail.

Results

GEMS incorporates a number of new and previously validated automated analytical and visualization methods into a coherent framework coupled to an intuitive and user-friendly graphical user interface. GEMS is implemented using MATLAB®, which combines sophisticated mathematical operations and GUI compatibility. Recorded slow wave data can be filtered via a range of inbuilt techniques, efficiently analyzed via automated event-detection and cycle clustering algorithms, and high quality isochronal activation maps, velocity field maps, amplitude maps, frequency (time interval) maps and data animations can be rapidly generated. Normal and dysrhythmic activities can be analyzed, including initiation and conduction abnormalities. The software is distributed free to academics via a community user website and forum (http://sites.google.com/site/gimappingsuite).

Conclusions

This software allows for the rapid analysis and generation of critical results from gastrointestinal high-resolution electrical mapping data, including quantitative analysis and graphical outputs for qualitative analysis. The software is designed to be used by non-experts in data and signal processing, and is intended to be used by clinical researchers as well as physiologists and bioengineers. The use and distribution of this software package will greatly accelerate efforts to improve the understanding of the causes and clinical consequences of gastrointestinal electrical disorders, through high-resolution electrical mapping.",2012-06-06 +24499537,Significant variance in genetic diversity among populations of Schistosoma haematobium detected using microsatellite DNA loci from a genome-wide database.,"

Background

Urogenital schistosomiasis caused by Schistosoma haematobium is widely distributed across Africa and is increasingly being targeted for control. Genome sequences and population genetic parameters can give insight into the potential for population- or species-level drug resistance. Microsatellite DNA loci are genetic markers in wide use by Schistosoma researchers, but there are few primers available for S. haematobium.

Methods

We sequenced 1,058,114 random DNA fragments from clonal cercariae collected from a snail infected with a single Schistosoma haematobium miracidium. We assembled and aligned the S. haematobium sequences to the genomes of S. mansoni and S. japonicum, identifying microsatellite DNA loci across all three species and designing primers to amplify the loci in S. haematobium. To validate our primers, we screened 32 randomly selected primer pairs with population samples of S. haematobium.

Results

We designed >13,790 primer pairs to amplify unique microsatellite loci in S. haematobium, (available at http://www.cebio.org/projetos/schistosoma-haematobium-genome). The three Schistosoma genomes contained similar overall frequencies of microsatellites, but the frequency and length distributions of specific motifs differed among species. We identified 15 primer pairs that amplified consistently and were easily scored. We genotyped these 15 loci in S. haematobium individuals from six locations: Zanzibar had the highest levels of diversity; Malawi, Mauritius, Nigeria, and Senegal were nearly as diverse; but the sample from South Africa was much less diverse.

Conclusions

About half of the primers in the database of Schistosoma haematobium microsatellite DNA loci should yield amplifiable and easily scored polymorphic markers, thus providing thousands of potential markers. Sequence conservation among S. haematobium, S. japonicum, and S. mansoni is relatively high, thus it should now be possible to identify markers that are universal among Schistosoma species (i.e., using DNA sequences conserved among species), as well as other markers that are specific to species or species-groups (i.e., using DNA sequences that differ among species). Full genome-sequencing of additional species and specimens of S. haematobium, S. japonicum, and S. mansoni is desirable to better characterize differences within and among these species, to develop additional genetic markers, and to examine genes as well as conserved non-coding elements associated with drug resistance.",2013-10-17 +23589399,"Fragment-based modeling of membrane protein loops: successes, failures, and prospects for the future.","Membrane proteins (MPs) have become a major focus in structure prediction, due to their medical importance. There is, however, a lack of fast and reliable methods that specialize in the modeling of MP loops. Often methods designed for soluble proteins (SPs) are applied directly to MPs. In this article, we investigate the validity of such an approach in the realm of fragment-based methods. We also examined the differences in membrane and soluble protein loops that might affect accuracy. We test our ability to predict soluble and MP loops with the previously published method FREAD. We show that it is possible to predict accurately the structure of MP loops using a database of MP fragments (0.5-1 Å median root-mean-square deviation). The presence of homologous proteins in the database helps prediction accuracy. However, even when homologues are removed better results are still achieved using fragments of MPs (0.8-1.6 Å) rather than SPs (1-4 Å) to model MP loops. We find that many fragments of SPs have shapes similar to their MP counterparts but have very different sequences; however, they do not appear to differ in their substitution patterns. Our findings may allow further improvements to fragment-based loop modeling algorithms for MPs. The current version of our proof-of-concept loop modeling protocol produces high-accuracy loop models for MPs and is available as a web server at http://medeller.info/fread.",2013-10-17 +24330842,TRAPID: an efficient online tool for the functional and comparative analysis of de novo RNA-Seq transcriptomes.,"Transcriptome analysis through next-generation sequencing technologies allows the generation of detailed gene catalogs for non-model species, at the cost of new challenges with regards to computational requirements and bioinformatics expertise. Here, we present TRAPID, an online tool for the fast and efficient processing of assembled RNA-Seq transcriptome data, developed to mitigate these challenges. TRAPID offers high-throughput open reading frame detection, frameshift correction and includes a functional, comparative and phylogenetic toolbox, making use of 175 reference proteomes. Benchmarking and comparison against state-of-the-art transcript analysis tools reveals the efficiency and unique features of the TRAPID system. TRAPID is freely available at http://bioinformatics.psb.ugent.be/webtools/trapid/.",2013-12-13 +22871049,Visualisation in imaging mass spectrometry using the minimum noise fraction transform.,"

Background

Imaging Mass Spectrometry (IMS) provides a means to measure the spatial distribution of biochemical features on the surface of a sectioned tissue sample. IMS datasets are typically huge and visualisation and subsequent analysis can be challenging. Principal component analysis (PCA) is one popular data reduction technique that has been used and we propose another; the minimum noise fraction (MNF) transform which is popular in remote sensing.

Findings

The MNF transform is able to extract spatially coherent information from IMS data. The MNF transform is implemented through an R-package which is available together with example data from http://staff.scm.uws.edu.au/~glenn/∖#Software.

Conclusions

In our example, the MNF transform was able to find additional images of interest. The extracted information forms a useful basis for subsequent analyses.",2012-08-07 +24555475,A genome-wide MeSH-based literature mining system predicts implicit gene-to-gene relationships and networks.,"

Background

The large amount of literature in the post-genomics era enables the study of gene interactions and networks using all available articles published for a specific organism. MeSH is a controlled vocabulary of medical and scientific terms that is used by biomedical scientists to manually index articles in the PubMed literature database. We hypothesized that genome-wide gene-MeSH term associations from the PubMed literature database could be used to predict implicit gene-to-gene relationships and networks. While the gene-MeSH associations have been used to detect gene-gene interactions in some studies, different methods have not been well compared, and such a strategy has not been evaluated for a genome-wide literature analysis. Genome-wide literature mining of gene-to-gene interactions allows ranking of the best gene interactions and investigation of comprehensive biological networks at a genome level.

Results

The genome-wide GenoMesh literature mining algorithm was developed by sequentially generating a gene-article matrix, a normalized gene-MeSH term matrix, and a gene-gene matrix. The gene-gene matrix relies on the calculation of pairwise gene dissimilarities based on gene-MeSH relationships. An optimized dissimilarity score was identified from six well-studied functions based on a receiver operating characteristic (ROC) analysis. Based on the studies with well-studied Escherichia coli and less-studied Brucella spp., GenoMesh was found to accurately identify gene functions using weighted MeSH terms, predict gene-gene interactions not reported in the literature, and cluster all the genes studied from an organism using the MeSH-based gene-gene matrix. A web-based GenoMesh literature mining program is also available at: http://genomesh.hegroup.org. GenoMesh also predicts gene interactions and networks among genes associated with specific MeSH terms or user-selected gene lists.

Conclusions

The GenoMesh algorithm and web program provide the first genome-wide, MeSH-based literature mining system that effectively predicts implicit gene-gene interaction relationships and networks in a genome-wide scope.",2013-10-16 +24250118,NABIC marker database: A molecular markers information network of agricultural crops.,"

Unlabelled

In 2013, National Agricultural Biotechnology Information Center (NABIC) reconstructs a molecular marker database for useful genetic resources. The web-based marker database consists of three major functional categories: map viewer, RSN marker and gene annotation. It provides 7250 marker locations, 3301 RSN marker property, 3280 molecular marker annotation information in agricultural plants. The individual molecular marker provides information such as marker name, expressed sequence tag number, gene definition and general marker information. This updated marker-based database provides useful information through a user-friendly web interface that assisted in tracing any new structures of the chromosomes and gene positional functions using specific molecular markers.

Availability

The database is available for free at http://nabic.rda.go.kr/gere/rice/molecularMarkers/",2013-10-16 +24131758,The current status of ethnobiological research in Latin America: gaps and perspectives.,"

Background

Recent reviews have demonstrated an increase in the number of papers on ethnobiology in Latin America. Among factors that have influenced this increase are the biological and cultural diversity of these countries and the general scientific situation in some countries. This study aims to assess the panorama of ethnobiological research in Latin America by analyzing its evolution, trends, and future prospects.

Methods

To conduct this study, we searched for papers in the Scopus (http://www.scopus.com) and Web of Science (http://www.isiknowledge.com) databases. The search was performed using combinations of keywords and the name of each Latin American country. The following countries were included in this study: Argentina, Bolivia, Brazil, Chile, Colombia, Costa Rica, Cuba, Ecuador, Guatemala, Haiti, Honduras, Mexico, Panama, Paraguay, Peru, Venezuela, and Uruguay.

Results and conclusions

According to our inclusion criteria, 679 ethnobiological studies conducted in Latin America were found for the period between 1963 and 2012. Of these studies, 289 (41%) were conducted in Brazil, 153 in Mexico (22%), 61 in Peru (9%), 58 in Argentina (8%), 45 in Bolivia (6%), and 97 (14%) in other Latin American countries. The increased number of publications related to this area of knowledge in recent years demonstrates the remarkable growth of ethnobiology as a science. Ethnobiological research may be stimulated by an increase in the number of scientific events and journals for study dissemination and by the creation of undergraduate courses and graduate programs to train ethnoscientists who will produce high-quality studies, especially in certain countries.",2013-10-16 +24564380,Simple re-instantiation of small databases using cloud computing.,"

Background

Small bioinformatics databases, unlike institutionally funded large databases, are vulnerable to discontinuation and many reported in publications are no longer accessible. This leads to irreproducible scientific work and redundant effort, impeding the pace of scientific progress.

Results

We describe a Web-accessible system, available online at http://biodb100.apbionet.org, for archival and future on demand re-instantiation of small databases within minutes. Depositors can rebuild their databases by downloading a Linux live operating system (http://www.bioslax.com), preinstalled with bioinformatics and UNIX tools. The database and its dependencies can be compressed into an "".lzm"" file for deposition. End-users can search for archived databases and activate them on dynamically re-instantiated BioSlax instances, run as virtual machines over the two popular full virtualization standard cloud-computing platforms, Xen Hypervisor or vSphere. The system is adaptable to increasing demand for disk storage or computational load and allows database developers to use the re-instantiated databases for integration and development of new databases.

Conclusions

Herein, we demonstrate that a relatively inexpensive solution can be implemented for archival of bioinformatics databases and their rapid re-instantiation should the live databases disappear.",2013-10-16 +22641854,DroPNet: a web portal for integrated analysis of Drosophila protein-protein interaction networks.,"DroPNet (Drosophila Protein Network) is a Drosophila-dedicated web portal for generating and analyzing protein-protein interaction (PPI) networks. This platform integrates users' experimental data provided as one or two lists of genes with PPI data from Drosophila and other species. These experimental data can, for example, come from RNAi screens, for which this approach is known to be valuable. DroPNet, therefore, provides an essential basis for further biological analysis by linking functional and physical interactions and reinforcing the relevance of each. DroPNet focuses on the search of PPIs between genes of the entry list, and includes the possibility of searching for intermediate genes for which the corresponding protein indirectly links two entry data. It also offers multiple functions for editing the networks obtained, providing users with interactive possibilities to progressively improve and refine the results. This approach gives a global view of the studied process and makes it possible to highlight specific interactions that have so far been understudied. DroPNet is freely available at http://dropnet.isima.fr.",2012-05-27 +30731826,First Asian Report of Leaf Spot of Ambrosia trifida Caused by Septoria epambrosiae.,"Ambrosia trifida L., known as giant ragweed, is native to North America and was introduced in Korea in the 1970s (4). It is now widely naturalized, and since 1999, has been designated as one of 11 most 'harmful nonindigenous plants' by the Korean Ministry of Environment because of its adverse effects on native plants. Various strategies to eradicate this noxious weed have been unsuccessful (4). In June 2008, leaf spot symptoms on this weed were found in Inje, Korea. Hundreds of giant ragweed growing along stream banks contained leaf spots with leaf yellowing and premature defoliation. Leaf lesions were 1 to 5 mm in diameter, angular to irregular, dark brown without a distinct margin, later becoming pallid with a brown margin. Between 2008 and 2011, the authors observed the same symptoms in Dongducheon, Yangku, Namyangju, and Pocheon, Korea. Voucher specimens have been housed in the herbarium of Korea University. Numerous black pycnidia were formed on the lesion. Pycnidia were amphigenous, globose, dark brown, ostiolate, and measured 80 to 130 μm in diameter. Conidia were filiform, straight to mildly curved, eguttulate, hyaline, 18 to 36 × 1.5 to 2.5 μm, one to three septate, subtruncate at the base, and tapering to a rounded apex. Single-conidial isolations onto potato dextrose agar formed dark grayish colonies. Pycnidia matured after 5 weeks when plates were incubated under fluorescent illumination for 12-h photoperiods at 25°C. On the basis of morphological and cultural characteristics, the fungus was identified as Septoria epambrosiae D.F. Farr (2). Three isolates were deposited in the Korean Agricultural Culture Collection (KACC). Preliminary morphological identification of the fungal isolates was confirmed by molecular data. The internal transcribed spacer (ITS) region of rDNA was amplified using the ITS1/ITS4 primers and sequenced. The resulting sequences of 449 bp obtained from the three isolates were identical to each other. They showed 100% similarity when compared with a sequence of S. epambrosiae (GenBank No. AF279582). The nucleotide sequence of a representative isolate (KACC43850) was deposited in GenBank (No. JN695498). Pathogenicity was confirmed by inoculating the leaves of three seedlings with a conidial suspension (~2 × 105 conidia/ml). Three noninoculated seedlings served as controls. Plants were maintained in a glasshouse at 100% relative humidity for 48 h. After 6 days, typical leaf spots, identical to the one observed in the field, started to develop on the leaves of the inoculated plants. No symptoms were observed on the control plants. The fungus was successfully reisolated from the symptomatic plants, fulfilling Koch's postulates. A leaf spot disease associated with S. epambrosiae has previously been recorded on A. artemisiifolia in Hungary (1-3) and A. trifida in North America (2,3). To our knowledge, this is the first report of S. epambrosiae on giant ragweed in Asia. Because of its potential as a biocontrol agent, further studies are needed. References: (1) G. Bohar and I. Schwarczinger. Plant Dis. 83:696, 1999. (2) D. F. Farr and L. A. Castlebury. Sydowia 53:81, 2001. (3) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , September 20, 2011. (4) S. M. Oh et al. Impacts of Invasive Alien Weeds and Control Strategies of Noxious Weeds in Korea. National Institute of Agricultural Science and Technology, Suwon, Korea, 2007.",2012-02-01 +21961731,Maize microarray annotation database.,"

Background

Microarray technology has matured over the past fifteen years into a cost-effective solution with established data analysis protocols for global gene expression profiling. The Agilent-016047 maize 44 K microarray was custom-designed from EST sequences, but only reporter sequences with EST accession numbers are publicly available. The following information is lacking: (a) reporter - gene model match, (b) number of reporters per gene model, (c) potential for cross hybridization, (d) sense/antisense orientation of reporters, (e) position of reporter on B73 genome sequence (for eQTL studies), and (f) functional annotations of genes represented by reporters. To address this, we developed a strategy to annotate the Agilent-016047 maize microarray, and built a publicly accessible annotation database.

Description

Genomic annotation of the 42,034 reporters on the Agilent-016047 maize microarray was based on BLASTN results of the 60-mer reporter sequences and their corresponding ESTs against the maize B73 RefGen v2 ""Working Gene Set"" (WGS) predicted transcripts and the genome sequence. The agreement between the EST, WGS transcript and gDNA BLASTN results were used to assign the reporters into six genomic annotation groups. These annotation groups were: (i) ""annotation by sense gene model"" (23,668 reporters), (ii) ""annotation by antisense gene model"" (4,330); (iii) ""annotation by gDNA"" without a WGS transcript hit (1,549); (iv) ""annotation by EST"", in which case the EST from which the reporter was designed, but not the reporter itself, has a WGS transcript hit (3,390); (v) ""ambiguous annotation"" (2,608); and (vi) ""inconclusive annotation"" (6,489). Functional annotations of reporters were obtained by BLASTX and Blast2GO analysis of corresponding WGS transcripts against GenBank.The annotations are available in the Maize Microarray Annotation Database http://MaizeArrayAnnot.bi.up.ac.za/, as well as through a GBrowse annotation file that can be uploaded to the MaizeGDB genome browser as a custom track.The database was used to re-annotate lists of differentially expressed genes reported in case studies of published work using the Agilent-016047 maize microarray. Up to 85% of reporters in each list could be annotated with confidence by a single gene model, however up to 10% of reporters had ambiguous annotations. Overall, more than 57% of reporters gave a measurable signal in tissues as diverse as anthers and leaves.

Conclusions

The Maize Microarray Annotation Database will assist users of the Agilent-016047 maize microarray in (i) refining gene lists for global expression analysis, and (ii) confirming the annotation of candidate genes before functional studies.",2011-10-01 +21548948,Thermodynamically consistent model calibration in chemical kinetics.,"

Background

The dynamics of biochemical reaction systems are constrained by the fundamental laws of thermodynamics, which impose well-defined relationships among the reaction rate constants characterizing these systems. Constructing biochemical reaction systems from experimental observations often leads to parameter values that do not satisfy the necessary thermodynamic constraints. This can result in models that are not physically realizable and may lead to inaccurate, or even erroneous, descriptions of cellular function.

Results

We introduce a thermodynamically consistent model calibration (TCMC) method that can be effectively used to provide thermodynamically feasible values for the parameters of an open biochemical reaction system. The proposed method formulates the model calibration problem as a constrained optimization problem that takes thermodynamic constraints (and, if desired, additional non-thermodynamic constraints) into account. By calculating thermodynamically feasible values for the kinetic parameters of a well-known model of the EGF/ERK signaling cascade, we demonstrate the qualitative and quantitative significance of imposing thermodynamic constraints on these parameters and the effectiveness of our method for accomplishing this important task. MATLAB software, using the Systems Biology Toolbox 2.1, can be accessed from http://www.cis.jhu.edu/~goutsias/CSS lab/software.html. An SBML file containing the thermodynamically feasible EGF/ERK signaling cascade model can be found in the BioModels database.

Conclusions

TCMC is a simple and flexible method for obtaining physically plausible values for the kinetic parameters of open biochemical reaction systems. It can be effectively used to recalculate a thermodynamically consistent set of parameter values for existing thermodynamically infeasible biochemical reaction models of cellular function as well as to estimate thermodynamically feasible values for the parameters of new models. Furthermore, TCMC can provide dimensionality reduction, better estimation performance, and lower computational complexity, and can help to alleviate the problem of data overfitting.",2011-05-06 +22576171,The Hematopoietic Expression Viewer: expanding mobile apps as a scientific tool.,"

Unlabelled

Many important data in current biological science comprise hundreds, thousands or more individual results. These massive data require computational tools to navigate results and effectively interact with the content. Mobile device apps are an increasingly important tool in the everyday lives of scientists and non-scientists alike. These software present individuals with compact and efficient tools to interact with complex data at meetings or other locations remote from their main computing environment. We believe that apps will be important tools for biologists, geneticists and physicians to review content while participating in biomedical research or practicing medicine. We have developed a prototype app for displaying gene expression data using the iOS platform. To present the software engineering requirements, we review the model-view-controller schema for Apple's iOS. We apply this schema to a simple app for querying locally developed microarray gene expression data. The challenge of this application is to balance between storing content locally within the app versus obtaining it dynamically via a network connection.

Availability

The Hematopoietic Expression Viewer is available at http://www.shawlab.org/he_viewer. The source code for this project and any future information on how to obtain the app can be accessed at http://www.shawlab.org/he_viewer.",2012-05-09 +24125570,New insights into pediatric idiopathic pulmonary hemosiderosis: the French RespiRare(®) cohort.,"

Background

Idiopathic pulmonary hemosiderosis (IPH) is a rare cause of alveolar hemorrhage in children and its pathophysiology remains obscure. Classically, diagnosis is based on a triad including hemoptysis, diffuse parenchymal infiltrates on chest X-rays, and iron-deficiency anemia. We present the French pediatric cohort of IPH collected through the French Reference Center for Rare Lung Diseases (RespiRare®, http://www.respirare.fr).

Methods

Since 2008, a national network/web-linked RespiRare® database has been set up in 12 French pediatric respiratory centres. It is structured as a medical recording tool with extended disease-specific datasets containing clinical information relevant to all forms of rare lung diseases including IPH.

Results

We identified 25 reported cases of IPH in children from the database (20 females and 5 males). Among them, 5 presented with Down syndrome. Upon diagnosis, median age was 4.3 [0.8-14.0] yrs, and the main manifestations were: dyspnea (n = 17, 68%), anemia (n = 16, 64%), cough (n = 12, 48%), febrile pneumonia (n = 11, 44%) and hemoptysis (n = 11, 44%). Half of the patients demonstrated diffuse parenchymal infiltrates on chest imaging, and diagnosis was ascertained either by broncho-alveolar lavage indicating the presence of hemosiderin-laden macrophages (19/25 cases), or lung biopsy (6/25). In screened patients, initial auto-immune screening revealed positive antineutrophilic cytoplasmic antibodies (ANCA) (n = 6, 40%), antinuclear antibodies (ANA) (n = 5, 45%) and specific coeliac disease antibodies (n = 4, 28%). All the patients were initially treated by corticosteroids. In 13 cases, immunosuppressants were introduced due to corticoresistance and/or major side effects. Median length of follow-up was 5.5 yrs, with a satisfactory respiratory outcome in 23/25 patients. One patient developed severe pulmonary fibrosis, and another with Down syndrome died as a result of severe pulmonary hemorrhage.

Conclusion

The present cohort provides substantial information on clinical expression and outcomes of pediatric IPH. Analysis of potential contributors supports a role of auto-immunity in disease development and highlights the importance of genetic factors.",2013-10-14 +24134945,MISIS: a bioinformatics tool to view and analyze maps of small RNAs derived from viruses and genomic loci generating multiple small RNAs.,"In eukaryotes, diverse small RNA (sRNA) populations including miRNAs, siRNAs and piRNAs regulate gene expression and repress transposons, transgenes and viruses. Functional sRNAs are associated with effector proteins based on their size and nucleotide composition. The sRNA populations are currently analyzed by deep sequencing that generates millions of reads which are then mapped to a reference sequence or database. Here we developed a tool called MISIS to view and analyze sRNA maps of genomic loci and viruses which spawn multiple sRNAs. MISIS displays sRNA reads as a histogram where the x-axis indicates positions of the 5'- or 3'-terminal nucleotide of sense and antisense sRNAs, respectively, along a given reference sequence or its selected region and the y-axis the number of reads starting (for sense sRNA) or ending (for antisense sRNA) at each position. Size-classes of sRNAs can be visualized and compared separately or in combination. Thus, MISIS gives an overview of sRNA distribution along the reference sequence as well as detailed information on single sRNA species of different size-classes and abundances. MISIS reads standard BAM/SAM files outputted by mapping tools and generates table files containing counts of sRNA reads at each position of the reference sequence forward and reverse strand and for each of the chosen size-classes of sRNAs. These table files can be used by other tools such as Excel for further quantitative analysis and visualization. MISIS is a Java standalone program. It is freely available along with the source code at the following website: http://www.fasteris.com/apps.",2013-10-14 +24122041,DGIdb: mining the druggable genome.,The Drug-Gene Interaction database (DGIdb) mines existing resources that generate hypotheses about how mutated genes might be targeted therapeutically or prioritized for drug development. It provides an interface for searching lists of genes against a compendium of drug-gene interactions and potentially 'druggable' genes. DGIdb can be accessed at http://dgidb.org/.,2013-10-13 +25132825,Morphometric differences in planum temporale in schizophrenia and bipolar disorder revealed by statistical analysis of labeled cortical depth maps.,"Differences in cortical thickness in the lateral temporal lobe, including the planum temporale (PT), have been reported in MRI studies of schizophrenia (SCZ) and bipolar disorder (BPD) patients. Most of these studies have used a single-valued global or local measure for thickness. However, additional and complementary information can be obtained by generating labeled cortical distance maps (LCDMs), which are distances of labeled gray matter (GM) voxels from the nearest point on the GM/white matter (WM) (inner) cortical surface. Statistical analyses of pooled and censored LCDM distances reveal subtle differences in PT between SCZ and BPD groups from data generated by Ratnanather et al. (Schizophrenia Research, http://dx.doi.org/10.1016/j.schres.2013.08.014). These results confirm that the left planum temporale (LPT) is more sensitive than the right PT in distinguishing between SCZ, BPD, and healthy controls. Also confirmed is a strong gender effect, with a thicker PT seen in males than in females. The differences between groups at smaller distances in the LPT revealed by pooled and censored LCDM analysis suggest that SCZ and BPD have different effects on the cortical mantle close to the GM/WM surface. This is consistent with reported subtle changes in the cortical mantle observed in post-mortem studies.",2014-08-01 +23034175,BSmooth: from whole genome bisulfite sequencing reads to differentially methylated regions.,"DNA methylation is an important epigenetic modification involved in gene regulation, which can now be measured using whole-genome bisulfite sequencing. However, cost, complexity of the data, and lack of comprehensive analytical tools are major challenges that keep this technology from becoming widely applied. Here we present BSmooth, an alignment, quality control and analysis pipeline that provides accurate and precise results even with low coverage data, appropriately handling biological replicates. BSmooth is open source software, and can be downloaded from http://rafalab.jhsph.edu/bsmooth.",2012-10-03 +24955109,Software for pre-processing Illumina next-generation sequencing short read sequences.,"

Background

When compared to Sanger sequencing technology, next-generation sequencing (NGS) technologies are hindered by shorter sequence read length, higher base-call error rate, non-uniform coverage, and platform-specific sequencing artifacts. These characteristics lower the quality of their downstream analyses, e.g. de novo and reference-based assembly, by introducing sequencing artifacts and errors that may contribute to incorrect interpretation of data. Although many tools have been developed for quality control and pre-processing of NGS data, none of them provide flexible and comprehensive trimming options in conjunction with parallel processing to expedite pre-processing of large NGS datasets.

Methods

We developed ngsShoRT (next-generation sequencing Short Reads Trimmer), a flexible and comprehensive open-source software package written in Perl that provides a set of algorithms commonly used for pre-processing NGS short read sequences. We compared the features and performance of ngsShoRT with existing tools: CutAdapt, NGS QC Toolkit and Trimmomatic. We also compared the effects of using pre-processed short read sequences generated by different algorithms on de novo and reference-based assembly for three different genomes: Caenorhabditis elegans, Saccharomyces cerevisiae S288c, and Escherichia coli O157 H7.

Results

Several combinations of ngsShoRT algorithms were tested on publicly available Illumina GA II, HiSeq 2000, and MiSeq eukaryotic and bacteria genomic short read sequences with the focus on removing sequencing artifacts and low-quality reads and/or bases. Our results show that across three organisms and three sequencing platforms, trimming improved the mean quality scores of trimmed sequences. Using trimmed sequences for de novo and reference-based assembly improved assembly quality as well as assembler performance. In general, ngsShoRT outperformed comparable trimming tools in terms of trimming speed and improvement of de novo and reference-based assembly as measured by assembly contiguity and correctness.

Conclusions

Trimming of short read sequences can improve the quality of de novo and reference-based assembly and assembler performance. The parallel processing capability of ngsShoRT reduces trimming time and improves the memory efficiency when dealing with large datasets. We recommend combining sequencing artifacts removal, and quality score based read filtering and base trimming as the most consistent method for improving sequence quality and downstream assemblies. ngsShoRT source code, user guide and tutorial are available at http://research.bioinformatics.udel.edu/genomics/ngsShoRT/. ngsShoRT can be incorporated as a pre-processing step in genome and transcriptome assembly projects.",2014-05-03 +30727176,"First Report of 'Candidatus Phytoplasma asteris' Affecting Woody Hosts (Fraxinus uhdei, Populus nigra, Pittosporum undulatum, and Croton spp.) in Colombia.","Phytoplasmas of the 16SrVII group in ornamental Fraxinus uhdei trees (1) growing in different cities of the Colombian Andes have been reported (2). In surveys made in Bogotá during March and May 2011, symptoms suggestive of phytoplasma infection were observed in ornamental woody species: Croton spp. (Euphorbiaceae), Pittosporum undulatum (Pittosporaceae) and Populus nigra (Salicaceae) trees, growing close to infected F. uhdei (Oleaceae). Symptoms included witches' broom, yellowing, dieback, epicormic sprouts, tufted foliage, abnormal elongation or shortening of internodes, and deliquescent branching leading to dramatic changes in crown architecture. P. undulatum and F. uhdei are introduced species representing the second and third most abundant trees in the city. P. nigra is an introduced species and Croton spp. is an Andean genus. In order to screen for the presence of phytoplasmas in Croton spp., P. undulatum, and P. nigra, four individuals of each species and two F. uhdei trees were sampled. For DNA extraction, 1 g of vascular tissue from young stems was used. Samples were tested by nested PCR with primers P1A/P7A (4) followed by R16F2n/R16R2 (3). The frequency of phytoplasma detection varied among species; P. undulatum and Croton spp. had three positives each, while P. nigra had one positive. Both F. uhdei were positive. Sequences from the amplicons (three reads) were aligned. BLAST analysis of 16S rDNA sequences from the four species tested had 99.2 to 99.7% similarity to 16SrI group sequences. Phylogenetic analysis further confirmed this relationship. Virtual sequence analysis using the iPhyclassifier tool ( http://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi ) showed that the sequence derived from P. undulatum (JQ730861) produced an identical RFLP pattern to group 16SrI-B (reference sequence NC_005303). RFLP similarity coefficients of the phytoplasmas from F. uhdei, Croton spp., and P. nigra (JQ730859, JQ730859 and JQ730861) were less than 0.97, suggesting the presence of a new subgroup within group 16SrI. The vectors of phytoplasmas are unknown in the region. Phytoplasma hosts previously reported in Colombia are: Solanum quitoense (16SrIII), Manihot esculenta (16SrIII), Liquidambar styraciflua (16SrI and 16SrVII), Elaeis guineensis (16SrI and 16SrIII), Coffea arabica (16SrIII), Cordia alliodora (16SrIII), Solanum tuberosum (16SrV and 16SrXII), and Zea mays (16SrI). To our knowledge, this is the first report of Croton spp. and P. undulatum as phytoplasma hosts. Phytoplasmas of group 16SrI are known to infect more than 100 species of different families worldwide. Detection of this group in several tree species and the observation of similar symptoms in other trees species raises concerns about a possible epidemic affecting plants in the Andean region. Implications are at several levels: i) epidemiological, with infected trees representing a potential inoculum source for other ornamental plants or crops growing in the agricultural surrounding areas; ii) economic, since eventually it will be necessary to replace diseased plants; and iii) environmental, because of the negative impact on the services provided by trees and green areas. References: (1) J. J. Filgueira et al. Plant Pathology 53:520, 2004. (2) L. Franco-Lara et al. Fitopatología Colombiana 29:32, 2005. (3) D. E Gundersen et al. Phytopathol. Mediterr. 35:144, 1996. (4) I-M. Lee et al. Int. J. Syst. Evol. Microbiol. 54:1037, 2004.",2012-09-01 +22689758,Xenome--a tool for classifying reads from xenograft samples.,"

Motivation

Shotgun sequence read data derived from xenograft material contains a mixture of reads arising from the host and reads arising from the graft. Classifying the read mixture to separate the two allows for more precise analysis to be performed.

Results

We present a technique, with an associated tool Xenome, which performs fast, accurate and specific classification of xenograft-derived sequence read data. We have evaluated it on RNA-Seq data from human, mouse and human-in-mouse xenograft datasets.

Availability

Xenome is available for non-commercial use from http://www.nicta.com.au/bioinformatics.",2012-06-01 +23041463,Easy parameter identifiability analysis with COPASI.,"

Background and scope

Differential equation systems modeling biochemical reaction networks can only give quantitative predictions, when they are in accordance with experimental data. However, even if a model can well recapitulate given data, it is often the case that some of its kinetic parameters can be arbitrarily chosen without significantly affecting the simulation results. This indicates a lack of appropriate data to determine those parameters. In this case, the parameter is called to be practically non-identifiable. Well-identified parameters are paramount for reliable quantitative predictions and, therefore, identifiability analysis is an important topic in modeling of biochemical reaction networks. Here, we describe a hidden feature of the free modeling software COPASI, which can be exploited to easily and quickly conduct a parameter identifiability analysis of differential equation systems by calculating likelihood profiles. The proposed combination of an established method for parameter identifiability analysis with the user-friendly features of COPASI offers an easy and rapid access to parameter identifiability analysis even for non-experts.

Availability

COPASI is freely available for academic use at http://www.copasi.org.",2012-10-04 +23457040,EBARDenovo: highly accurate de novo assembly of RNA-Seq with efficient chimera-detection.,"

Motivation

High-accuracy de novo assembly of the short sequencing reads from RNA-Seq technology is very challenging. We introduce a de novo assembly algorithm, EBARDenovo, which stands for Extension, Bridging And Repeat-sensing Denovo. This algorithm uses an efficient chimera-detection function to abrogate the effect of aberrant chimeric reads in RNA-Seq data.

Results

EBARDenovo resolves the complications of RNA-Seq assembly arising from sequencing errors, repetitive sequences and aberrant chimeric amplicons. In a series of assembly experiments, our algorithm is the most accurate among the examined programs, including de Bruijn graph assemblers, Trinity and Oases.

Availability and implementation

EBARDenovo is available at http://ebardenovo.sourceforge.net/. This software package (with patent pending) is free of charge for academic use only.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-01 +23161680,"TropGeneDB, the multi-tropical crop information system updated and extended.","TropGeneDB (http://tropgenedb.cirad.fr) was created to store genetic, molecular and phenotypic data on tropical crop species. The most common data stored in TropGeneDB are molecular markers, quantitative trait loci, genetic and physical maps, genetic diversity, phenotypic diversity studies and information on genetic resources (geographic origin, parentage, collection). TropGeneDB is organized on a crop basis with currently nine public modules (banana, cocoa, coconut, coffee, cotton, oil palm, rice, rubber tree, sugarcane). Crop-specific Web consultation interfaces have been designed to allow quick consultations and personalized complex queries. TropGeneDB is a component of the South Green Bioinformatics Platform (http://southgreen.cirad.fr/).",2012-11-17 +24828895,Alleviating manoeuvres (sensory tricks) in cervical dystonia.,"

Background

There is limited information on the phenomenology, clinical characteristics and pathophysiology of alleviating manoeuvres (AM), also called 'sensory tricks' in cervical dystonia (CD).

Methods

Individual data, collected from 10 sites participating in the Dystonia Coalition (http://clinicaltrials.gov/show/NCT01373424), included description of localisation and phenomenology of AM collected by systematic review of standardised video examinations. Analyses correlated demographic, neurologic, and psychiatric features of CD patients with or without effective AM.

Results

Of 154 people studied, 138 (89.6%) used AM, of which 60 (43.4%) reported partial improvement, 55 (39.8%) marked improvement, and 4 (0.03%) no effect on dystonic posture. Light touch, usually to the lower face or neck, was used by >90%. The presence or location of AM did not correlate with the severity of the dystonia.

Conclusions

In this large and comprehensive study of CD, we found no clinical predictors of effective AM. Further studies of sensorimotor integration in dystonia are needed to better understand the pathophysiology of AM.",2014-05-14 +25078396,Quantitative method for the assignment of hinge and shear mechanism in protein domain movements.,"

Motivation

A popular method for classification of protein domain movements apportions them into two main types: those with a 'hinge' mechanism and those with a 'shear' mechanism. The intuitive assignment of domain movements to these classes has limited the number of domain movements that can be classified in this way. Furthermore, whether intended or not, the term 'shear' is often interpreted to mean a relative translation of the domains.

Results

Numbers of occurrences of four different types of residue contact changes between domains were optimally combined by logistic regression using the training set of domain movements intuitively classified as hinge and shear to produce a predictor for hinge and shear. This predictor was applied to give a 10-fold increase in the number of examples over the number previously available with a high degree of precision. It is shown that overall a relative translation of domains is rare, and that there is no difference between hinge and shear mechanisms in this respect. However, the shear set contains significantly more examples of domains having a relative twisting movement than the hinge set. The angle of rotation is also shown to be a good discriminator between the two mechanisms.

Availability and implementation

Results are free to browse at http://www.cmp.uea.ac.uk/dyndom/interface/.

Contact

sjh@cmp.uea.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-07-30 +23748951,CoPAP: Coevolution of presence-absence patterns.,"Evolutionary analysis of phyletic patterns (phylogenetic profiles) is widely used in biology, representing presence or absence of characters such as genes, restriction sites, introns, indels and methylation sites. The phyletic pattern observed in extant genomes is the result of ancestral gain and loss events along the phylogenetic tree. Here we present CoPAP (coevolution of presence-absence patterns), a user-friendly web server, which performs accurate inference of coevolving characters as manifested by co-occurring gains and losses. CoPAP uses state-of-the-art probabilistic methodologies to infer coevolution and allows for advanced network analysis and visualization. We developed a platform for comparing different algorithms that detect coevolution, which includes simulated data with pairs of coevolving sites and independent sites. Using these simulated data we demonstrate that CoPAP performance is higher than alternative methods. We exemplify CoPAP utility by analyzing coevolution among thousands of bacterial genes across 681 genomes. Clusters of coevolving genes that were detected using our method largely coincide with known biosynthesis pathways and cellular modules, thus exhibiting the capability of CoPAP to infer biologically meaningful interactions. CoPAP is freely available for use at http://copap.tau.ac.il/.",2013-06-08 +22372975,Transparent mediation-based access to multiple yeast data sources using an ontology driven interface.,"

Background

Saccharomyces cerevisiae is recognized as a model system representing a simple eukaryote whose genome can be easily manipulated. Information solicited by scientists on its biological entities (Proteins, Genes, RNAs...) is scattered within several data sources like SGD, Yeastract, CYGD-MIPS, BioGrid, PhosphoGrid, etc. Because of the heterogeneity of these sources, querying them separately and then manually combining the returned results is a complex and time-consuming task for biologists most of whom are not bioinformatics expert. It also reduces and limits the use that can be made on the available data.

Results

To provide transparent and simultaneous access to yeast sources, we have developed YeastMed: an XML and mediator-based system. In this paper, we present our approach in developing this system which takes advantage of SB-KOM to perform the query transformation needed and a set of Data Services to reach the integrated data sources. The system is composed of a set of modules that depend heavily on XML and Semantic Web technologies. User queries are expressed in terms of a domain ontology through a simple form-based web interface.

Conclusions

YeastMed is the first mediation-based system specific for integrating yeast data sources. It was conceived mainly to help biologists to find simultaneously relevant data from multiple data sources. It has a biologist-friendly interface easy to use. The system is available at http://www.khaos.uma.es/yeastmed/.",2012-01-25 +24047445,Searching for likeness in a database of macromolecular complexes.,"A software tool and workflow based on distance geometry is presented that can be used to search for local similarity in substructures in a comprehensive database of experimentally derived macromolecular structure. The method does not rely on fold annotation, specific secondary structure assignments, or sequence homology and may be used to locate compound substructures of multiple segments spanning different macromolecules that share a queried backbone geometry. This generalized substructure searching capability is intended to allow users to play an active part in exploring the role specific substructures play in larger protein domains, quaternary assemblies of proteins, and macromolecular complexes of proteins and polynucleotides. The user may select any portion or portions of an existing structure or complex to serve as a template for searching, and other structures that share the same structural features are identified, retrieved and overlaid to emphasize substructural likeness. Matching structures may be compared using a variety of integrated tools including molecular graphics for structure visualization and matching substructure sequence logos. A number of examples are provided that illustrate how generalized substructure searching may be used to understand both the similarity, and individuality of specific macromolecular structures. Web-based access to our substructure searching services is freely available at https://drugsite.msi.umn.edu.",2013-10-09 +22760304,OmegaPlus: a scalable tool for rapid detection of selective sweeps in whole-genome datasets.,"

Unlabelled

Recent advances in sequencing technologies have led to the rapid accumulation of molecular sequence data. Analyzing whole-genome data (as obtained from next-generation sequencers) from intra-species samples allows to detect signatures of positive selection along the genome and therefore identify potentially advantageous genes in the course of the evolution of a population. We introduce OmegaPlus, an open-source tool for rapid detection of selective sweeps in whole-genome data based on linkage disequilibrium. The tool is up to two orders of magnitude faster than existing programs for this purpose and also exhibits up to two orders of magnitude smaller memory requirements.

Availability

OmegaPlus is available under GNU GPL at http://www.exelixis-lab.org/software.html.",2012-07-03 +22275896,COINS: An Innovative Informatics and Neuroimaging Tool Suite Built for Large Heterogeneous Datasets.,"The availability of well-characterized neuroimaging data with large numbers of subjects, especially for clinical populations, is critical to advancing our understanding of the healthy and diseased brain. Such data enables questions to be answered in a much more generalizable manner and also has the potential to yield solutions derived from novel methods that were conceived after the original studies' implementation. Though there is currently growing interest in data sharing, the neuroimaging community has been struggling for years with how to best encourage sharing data across brain imaging studies. With the advent of studies that are much more consistent across sites (e.g., resting functional magnetic resonance imaging, diffusion tensor imaging, and structural imaging) the potential of pooling data across studies continues to gain momentum. At the mind research network, we have developed the collaborative informatics and neuroimaging suite (COINS; http://coins.mrn.org) to provide researchers with an information system based on an open-source model that includes web-based tools to manage studies, subjects, imaging, clinical data, and other assessments. The system currently hosts data from nine institutions, over 300 studies, over 14,000 subjects, and over 19,000 MRI, MEG, and EEG scan sessions in addition to more than 180,000 clinical assessments. In this paper we provide a description of COINS with comparison to a valuable and popular system known as XNAT. Although there are many similarities between COINS and other electronic data management systems, the differences that may concern researchers in the context of multi-site, multi-organizational data sharing environments with intuitive ease of use and PHI security are emphasized as important attributes.",2011-12-23 +23572549,Genome-wide prediction of nucleosome occupancy in maize reveals plant chromatin structural features at genes and other elements at multiple scales.,"The nucleosome is a fundamental structural and functional chromatin unit that affects nearly all DNA-templated events in eukaryotic genomes. It is also a biochemical substrate for higher order, cis-acting gene expression codes and the monomeric structural unit for chromatin packaging at multiple scales. To predict the nucleosome landscape of a model plant genome, we used a support vector machine computational algorithm trained on human chromatin to predict the nucleosome occupancy likelihood (NOL) across the maize (Zea mays) genome. Experimentally validated NOL plots provide a novel genomic annotation that highlights gene structures, repetitive elements, and chromosome-scale domains likely to reflect regional gene density. We established a new genome browser (http://www.genomaize.org) for viewing support vector machine-based NOL scores. This annotation provides sequence-based comprehensive coverage across the entire genome, including repetitive genomic regions typically excluded from experimental genomics data. We find that transposable elements often displayed family-specific NOL profiles that included distinct regions, especially near their termini, predicted to have strong affinities for nucleosomes. We examined transcription start site consensus NOL plots for maize gene sets and discovered that most maize genes display a typical +1 nucleosome positioning signal just downstream of the start site but not upstream. This overall lack of a -1 nucleosome positioning signal was also predicted by our method for Arabidopsis (Arabidopsis thaliana) genes and verified by additional analysis of previously published Arabidopsis MNase-Seq data, revealing a general feature of plant promoters. Our study advances plant chromatin research by defining the potential contribution of the DNA sequence to observed nucleosome positioning and provides an invariant baseline annotation against which other genomic data can be compared.",2013-04-09 +22539430,jmzReader: A Java parser library to process and visualize multiple text and XML-based mass spectrometry data formats.,"We here present the jmzReader library: a collection of Java application programming interfaces (APIs) to parse the most commonly used peak list and XML-based mass spectrometry (MS) data formats: DTA, MS2, MGF, PKL, mzXML, mzData, and mzML (based on the already existing API jmzML). The library is optimized to be used in conjunction with mzIdentML, the recently released standard data format for reporting protein and peptide identifications, developed by the HUPO proteomics standards initiative (PSI). mzIdentML files do not contain spectra data but contain references to different kinds of external MS data files. As a key functionality, all parsers implement a common interface that supports the various methods used by mzIdentML to reference external spectra. Thus, when developing software for mzIdentML, programmers no longer have to support multiple MS data file formats but only this one interface. The library (which includes a viewer) is open source and, together with detailed documentation, can be downloaded from http://code.google.com/p/jmzreader/.",2012-03-01 +23282463,Compareads: comparing huge metagenomic experiments.,"

Background

Nowadays, metagenomic sample analyses are mainly achieved by comparing them with a priori knowledge stored in data banks. While powerful, such approaches do not allow to exploit unknown and/or ""unculturable"" species, for instance estimated at 99% for Bacteria.

Methods

This work introduces Compareads, a de novo comparative metagenomic approach that returns the reads that are similar between two possibly metagenomic datasets generated by High Throughput Sequencers. One originality of this work consists in its ability to deal with huge datasets. The second main contribution presented in this paper is the design of a probabilistic data structure based on Bloom filters enabling to index millions of reads with a limited memory footprint and a controlled error rate.

Results

We show that Compareads enables to retrieve biological information while being able to scale to huge datasets. Its time and memory features make Compareads usable on read sets each composed of more than 100 million Illumina reads in a few hours and consuming 4 GB of memory, and thus usable on today's personal computers.

Conclusion

Using a new data structure, Compareads is a practical solution for comparing de novo huge metagenomic samples. Compareads is released under the CeCILL license and can be freely downloaded from http://alcovna.genouest.org/compareads/.",2012-12-19 +24093723,The Gene Ontology (GO) Cellular Component Ontology: integration with SAO (Subcellular Anatomy Ontology) and other recent developments.,"

Background

The Gene Ontology (GO) (http://www.geneontology.org/) contains a set of terms for describing the activity and actions of gene products across all kingdoms of life. Each of these activities is executed in a location within a cell or in the vicinity of a cell. In order to capture this context, the GO includes a sub-ontology called the Cellular Component (CC) ontology (GO-CCO). The primary use of this ontology is for GO annotation, but it has also been used for phenotype annotation, and for the annotation of images. Another ontology with similar scope to the GO-CCO is the Subcellular Anatomy Ontology (SAO), part of the Neuroscience Information Framework Standard (NIFSTD) suite of ontologies. The SAO also covers cell components, but in the domain of neuroscience.

Description

Recently, the GO-CCO was enriched in content and links to the Biological Process and Molecular Function branches of GO as well as to other ontologies. This was achieved in several ways. We carried out an amalgamation of SAO terms with GO-CCO ones; as a result, nearly 100 new neuroscience-related terms were added to the GO. The GO-CCO also contains relationships to GO Biological Process and Molecular Function terms, as well as connecting to external ontologies such as the Cell Ontology (CL). Terms representing protein complexes in the Protein Ontology (PRO) reference GO-CCO terms for their species-generic counterparts. GO-CCO terms can also be used to search a variety of databases.

Conclusions

In this publication we provide an overview of the GO-CCO, its overall design, and some recent extensions that make use of additional spatial information. One of the most recent developments of the GO-CCO was the merging in of the SAO, resulting in a single unified ontology designed to serve the needs of GO annotators as well as the specific needs of the neuroscience community.",2013-10-07 +25607983,Generation of silver standard concept annotations from biomedical texts with special relevance to phenotypes.,"Electronic health records and scientific articles possess differing linguistic characteristics that may impact the performance of natural language processing tools developed for one or the other. In this paper, we investigate the performance of four extant concept recognition tools: the clinical Text Analysis and Knowledge Extraction System (cTAKES), the National Center for Biomedical Ontology (NCBO) Annotator, the Biomedical Concept Annotation System (BeCAS) and MetaMap. Each of the four concept recognition systems is applied to four different corpora: the i2b2 corpus of clinical documents, a PubMed corpus of Medline abstracts, a clinical trails corpus and the ShARe/CLEF corpus. In addition, we assess the individual system performances with respect to one gold standard annotation set, available for the ShARe/CLEF corpus. Furthermore, we built a silver standard annotation set from the individual systems' output and assess the quality as well as the contribution of individual systems to the quality of the silver standard. Our results demonstrate that mainly the NCBO annotator and cTAKES contribute to the silver standard corpora (F1-measures in the range of 21% to 74%) and their quality (best F1-measure of 33%), independent from the type of text investigated. While BeCAS and MetaMap can contribute to the precision of silver standard annotations (precision of up to 42%), the F1-measure drops when combined with NCBO Annotator and cTAKES due to a low recall. In conclusion, the performances of individual systems need to be improved independently from the text types, and the leveraging strategies to best take advantage of individual systems' annotations need to be revised. The textual content of the PubMed corpus, accession numbers for the clinical trials corpus, and assigned annotations of the four concept recognition systems as well as the generated silver standard annotation sets are available from http://purl.org/phenotype/resources. The textual content of the ShARe/CLEF (https://sites.google.com/site/shareclefehealth/data) and i2b2 (https://i2b2.org/NLP/DataSets/) corpora needs to be requested with the individual corpus providers.",2015-01-21 +24722405,Characterization of ancient and modern genomes by SNP detection and phylogenomic and metagenomic analysis using PALEOMIX.,"Next-generation sequencing technologies have revolutionized the field of paleogenomics, allowing the reconstruction of complete ancient genomes and their comparison with modern references. However, this requires the processing of vast amounts of data and involves a large number of steps that use a variety of computational tools. Here we present PALEOMIX (http://geogenetics.ku.dk/publications/paleomix), a flexible and user-friendly pipeline applicable to both modern and ancient genomes, which largely automates the in silico analyses behind whole-genome resequencing. Starting with next-generation sequencing reads, PALEOMIX carries out adapter removal, mapping against reference genomes, PCR duplicate removal, characterization of and compensation for postmortem damage, SNP calling and maximum-likelihood phylogenomic inference, and it profiles the metagenomic contents of the samples. As such, PALEOMIX allows for a series of potential applications in paleogenomics, comparative genomics and metagenomics. Applying the PALEOMIX pipeline to the three ancient and seven modern Phytophthora infestans genomes as described here takes 5 d using a 16-core server.",2014-04-10 +22589089,Massive human co-expression network and its medical applications.,"Network-based analysis is indispensable in analyzing high-throughput biological data. Based on the assumption that the variation of gene interactions under given biological conditions could be better interpreted in the context of a large-scale and wide variety of developmental, tissue, and disease conditions, we leverage the large quantity of publicly available transcriptomic data >40,000 HG U133A Affymetrix microarray chips stored in ArrayExpress (http://www.ebi.ac.uk/arrayexpress/) using MetaOmGraph (http://metnet.vrac.iastate.edu/MetNet_MetaOmGraph.htm). From this data, 18,637 chips encompassing over 500 experiments containing high-quality data (18637 Hu-dataset) were used to create a globally stable gene co-expression network (18637 Hu-co-expression-network). Regulons, groups of highly and consistently co-expressed genes, were obtained by partitioning the 18637 Hu-co-expression-network using an Markov clustering algorithm (MCL). The regulons were demonstrated to be statistically significant using a gene ontology (GO) term overrepresentation test combined with evaluation of the effects of gene permutations. The regulons include ca. 12% of human genes, interconnected by 31,471 correlations. All network data and metadata are publically available (http://metnet.vrac.iastate.edu/MetNet_MetaOmGraph.htm). Text mining of these metadata, GO term overrepresentation analysis, and statistical analysis of transcriptomic experiments across multiple environmental, tissue, and disease conditions, has revealed novel fingerprints distinguishing central nervous system (CNS)-related conditions. This study demonstrates the value of mega-scale network-based analysis for biologists to further refine transcriptomic data, derived from a particular condition, to study the global relationships between genes and diseases, and to develop hypotheses that can inform future research.",2012-05-01 +24990609,Genome editing assessment using CRISPR Genome Analyzer (CRISPR-GA).,"

Summary

Clustered regularly interspaced short palindromic repeats (CRISPR)-based technologies have revolutionized human genome engineering and opened countless possibilities to basic science, synthetic biology and gene therapy. Albeit the enormous potential of these tools, their performance is far from perfect. It is essential to perform a posterior careful analysis of the gene editing experiment. However, there are no computational tools for genome editing assessment yet, and current experimental tools lack sensitivity and flexibility. We present a platform to assess the quality of a genome editing experiment only with three mouse clicks. The method evaluates next-generation data to quantify and characterize insertions, deletions and homologous recombination. CRISPR Genome Analyzer provides a report for the locus selected, which includes a quantification of the edited site and the analysis of the different alterations detected. The platform maps the reads, estimates and locates insertions and deletions, computes the allele replacement efficiency and provides a report integrating all the information.

Availability and implementation

CRISPR-GA Web is available at http://crispr-ga.net. Documentation on CRISPR-GA instructions can be found at http://crispr-ga.net/documentation.html

Contact

mguell@genetics.med.harvard.edu.",2014-07-01 +23250826,Reliably assessing prediction reliability for high dimensional QSAR data.,"Predictability and prediction reliability are of utmost important to characterize a good Quantitative structure-activity relationships (QSAR) model. However, validation methods are insufficient to guarantee the prediction reliability of QSAR models. Moreover, high dimensional samples also pose great challenge to traditional methods in terms of predictive power. Therefore, this study presents a predictive classifier (i.e., TreeEC) that can assess prediction reliability with high confidence, especially for facing high dimensional QSAR data. Two approaches for assessing prediction reliability are provided, i.e., applicability domain and prediction confidence. We demonstrate that the applicability domain has difficulty to guarantee the models' prediction reliability, where samples intensively close to the domain center are often poor predicted than those outside the domain. Instead, prediction confidence is more promising for assessing prediction reliability. Based on a large data set assessed by prediction confidence, external samples assessed with high confidence greater than 95 % can be reliably predicted with an accuracy of 94 %, in contrast to the average accuracy of 84 %. We also illustrate that TreeEC are less affected by high dimensionality than other popular methods according to 11 public data sets. A free version of TreeEC with a user-friendly interface can also be downloading from website http://pharminfo.zju.edu.cn/computation/TreeEC/TreeEC.html.",2012-12-19 +23281813,iLOCi: a SNP interaction prioritization technique for detecting epistasis in genome-wide association studies.,"

Background

Genome-wide association studies (GWAS) do not provide a full account of the heritability of genetic diseases since gene-gene interactions, also known as epistasis are not considered in single locus GWAS. To address this problem, a considerable number of methods have been developed for identifying disease-associated gene-gene interactions. However, these methods typically fail to identify interacting markers explaining more of the disease heritability over single locus GWAS, since many of the interactions significant for disease are obscured by uninformative marker interactions e.g., linkage disequilibrium (LD).

Results

In this study, we present a novel SNP interaction prioritization algorithm, named iLOCi (Interacting Loci). This algorithm accounts for marker dependencies separately in case and control groups. Disease-associated interactions are then prioritized according to a novel ranking score calculated from the difference in marker dependencies for every possible pair between case and control groups. The analysis of a typical GWAS dataset can be completed in less than a day on a standard workstation with parallel processing capability. The proposed framework was validated using simulated data and applied to real GWAS datasets using the Wellcome Trust Case Control Consortium (WTCCC) data. The results from simulated data showed the ability of iLOCi to identify various types of gene-gene interactions, especially for high-order interaction. From the WTCCC data, we found that among the top ranked interacting SNP pairs, several mapped to genes previously known to be associated with disease, and interestingly, other previously unreported genes with biologically related roles.

Conclusion

iLOCi is a powerful tool for uncovering true disease interacting markers and thus can provide a more complete understanding of the genetic basis underlying complex disease. The program is available for download at http://www4a.biotec.or.th/GI/tools/iloci.",2012-12-13 +27481276,CORAL: Monte Carlo Method as a Tool for the Prediction of the Bioconcentration Factor of Industrial Pollutants.,"The CORAL software (http://www.insilico.eu/coral/) has been evaluated for application in QSAR modeling of the bioconcentration factor in fish (logBCF). The data used include 237 organic substances (industrial pollutants). Six random splits of the data into sub-training (30-50 %), calibration (20-30 %), test (13-30 %), and validation sets (7-25 %) have been carried out. The following numbers display the average statistical characteristics of the models for the external validation set: correlation coefficient r(2) =0.880±0.017 and standard error of estimation s=0.559±0.131. The best models were obtained with a combined representation of the molecular structure by SMILES together with hydrogen suppressed graph.",2012-11-30 +24462246,Examining the performance of the brief addiction monitor.,"The Center for Excellence in Drug Abuse Treatment and Education (Center for Excellence in Substance Abuse Treatment and Education (CESATE; 2010). Brief Addiction Monitor: Manual of Operations. Philadelphia, PA) recently suggested that Veterans Affairs' (VA) addictions treatment programs, in order to encourage measurement based care, begin using a new measure of substance abuse, the Brief Addictions Monitor (BAM). To date, only one study Caccolia et al, 2013. Development and initial evaluation of the Brief Addiction Monitor (BAM). Journal of Substance Abuse Treatment, 44, 256-63. doi: http://dx.doi.org/10.1016/j.jsat.2012.07.013) has examined the psychometric properties of a version of this instrument. However, this study did not use the version of the BAM currently available to most VA providers via the mental health assistant software; rather, the authors reported the properties of a BAM where most of the items had continuous (or near continuous) response options. The current study seeks to provide data on the version of the BAM which uses 5 point Likert scale response options for its questions, the version available on the mental health assistant software. Based on data from more than 700 veterans enrolled in out-patient (OP) and in-patient (IP) addictions treatment programs, this study examined the factor structure, reliability, and validity of this version of the BAM. Across both groups, results suggested that the BAM lacked a reliable factor structure, in contrast to the findings from the earlier study. However, a single scale, composed of a minority of items on the BAM, showed promise. A minority of the items (five) provided valid information across both OP and IP samples when applied individually, as indicated by convergent and divergent validity comparisons with other measures of functioning; tracking changes in functioning over the course of treatment; and correlating with changes in convergent and divergent validity measures. This partially supported the CESATE (CESATE; 2010). Brief Addiction Monitor: Manual of Operations. Philadelphia, PA) call to use the individual BAM items. Overall, results suggested that changing the structure of the response options may have had a negative impact on the psychometric properties of the BAM.",2014-01-23 +23918849,"The Kaya HDSS, Burkina Faso: a platform for epidemiological studies and health programme evaluation.","The Kaya Health and Demographic Surveillance System (Kaya HDSS) is located in the North Central region of Burkina Faso in the Kaya health district. The main purposes of the Kaya HDSS are to study demographic, infectious and chronic disease indicators in the district, to observe changes in health over time, evaluate health programmes and to provide a basis for policy decisions and capacity building in order to enhance the health of the community. Kaya HDSS was established in late 2007 following a baseline census of the population of the HDSS area. Homes were visited every 6 months to collect demographic information and data on morbidity and mortality. A verbal autopsy questionnaire is used to collect information on the causes of death. The Kaya HDSS reached 64,480 residents in 10,587 households by the end of 2011, with an average of 6.1 ± 4.3 persons per household. The site is 70% urban and 30% rural. The population is 51.8% female. Over 55% of deaths occur outside health facilities. Malaria is the leading cause of death, primarily affecting children under 5 years of age (44%) and those 5 to 14 years old (36%). The Kaya HDSS data can be obtained by sending a request via the HDSS website (http://kaya-hdss.org/).",2013-06-01 +24812338,RNASeqGUI: a GUI for analysing RNA-Seq data.,"

Unlabelled

We present RNASeqGUI R package, a graphical user interface (GUI) for the identification of differentially expressed genes across multiple biological conditions. This R package includes some well-known RNA-Seq tools, available at www.bioconductor.org. RNASeqGUI package is not just a collection of some known methods and functions, but it is designed to guide the user during the entire analysis process. RNASeqGUI package is mainly addressed to those users who have little experience with command-line software. Therefore, thanks to RNASeqGUI, they can conduct analogous analyses using this simple graphical interface. Moreover, RNASeqGUI is also helpful for those who are expert R-users because it speeds up the usage of the included RNASeq methods drastically.

Availability and implementation

RNASeqGUI package needs the RGTK2 graphical library to run. This package is open source and is freely available under General Public License at http://bioinfo.na.iac.cnr.it/RNASeqGUI/Download.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-07 +21414989,Structure-based de novo prediction of zinc-binding sites in proteins of unknown function.,"

Motivation

Zinc-binding proteins are the most abundant metallo-proteins in Protein Data Bank (PDB). Accurate prediction of zinc-binding sites in proteins of unknown function may provide important clues for the inference of protein function. As zinc binding is often associated with characteristic 3D arrangements of zinc ligand residues, its prediction may benefit from using not only the sequence information but also the structure information of proteins.

Results

In this work, we present a structure-based method, TEMSP (3D TEmplate-based Metal Site Prediction), to predict zinc-binding sites. TEMSP significantly improves over previously reported best methods in predicting as many as possible true ligand residues for zinc with minimum overpredictions: if only those results in which all zinc ligand residues have been correctly predicted are defined as true positives, our method improves sensitivity from less than 30% to above 60%, and selectivity from around 25% to 80%. These results are for predictions based on apo state structures. In addition, the method can predict the zinc-bound local structures reliably, generating predictions useful for function inference. We applied TEMSP to 1888 protein structures of the 'Unknown Function' class in the PDB database. A number of zinc-binding sites have been discovered de novo, i.e. based solely on the protein structures. Using the predicted local structures of these sites, possible functional roles were analyzed.

Availability

TEMSP is freely available from http://netalign.ustc.edu.cn/temsp/.",2011-03-16 +25063002,"Cohort profile update: 2004 Pelotas (Brazil) Birth Cohort Study. Body composition, mental health and genetic assessment at the 6 years follow-up.","This is an update of the 2004 Pelotas Birth Cohort profile, originally published in 2011. In view of the high prevalence of overweight and mental health problems among Brazilian children, together with the availability of state-of-the-art equipment to assess body composition and diagnostic tests for mental health in childhood, the main outcomes measured in the fifth follow-up (mean age 6.8 years) included child body composition, mental health and cognitive ability. A total of 3722 (90.2%) of the original mothers/carers were interviewed and their children examined in a clinic where they underwent whole-body dual X-ray absorptiometry (DXA), air displacement plethysmography and a 3D photonic scan. Saliva samples for DNA were obtained. Clinical psychologists applied the Development and Well-Being Assessment questionnaire and the Wechsler Intelligence Scale for Children to all children. Results are being compared with those of the two earlier cohorts to assess the health effects of economic growth and full implementation of public policies aimed at reducing social inequalities in the past 30 years. For further information visit the programme website at [http://www.epidemio-ufpel.org.br/site/content/coorte_2004/questionarios.php]. Applications to use the data should be made by contacting 2004 cohort researchers and filling in the application form available at [http://www.epidemio-ufpel.org.br/site/content/estudos/formularios.php].",2014-07-25 +22238270,BESC knowledgebase public portal.,"

Unlabelled

The BioEnergy Science Center (BESC) is undertaking large experimental campaigns to understand the biosynthesis and biodegradation of biomass and to develop biofuel solutions. BESC is generating large volumes of diverse data, including genome sequences, omics data and assay results. The purpose of the BESC Knowledgebase is to serve as a centralized repository for experimentally generated data and to provide an integrated, interactive and user-friendly analysis framework. The Portal makes available tools for visualization, integration and analysis of data either produced by BESC or obtained from external resources.

Availability

http://besckb.ornl.gov.",2012-01-11 +23228031,A network module-based method for identifying cancer prognostic signatures.,"Discovering robust prognostic gene signatures as biomarkers using genomics data can be challenging. We have developed a simple but efficient method for discovering prognostic biomarkers in cancer gene expression data sets using modules derived from a highly reliable gene functional interaction network. When applied to breast cancer, we discover a novel 31-gene signature associated with patient survival. The signature replicates across 5 independent gene expression studies, and outperforms 48 published gene signatures. When applied to ovarian cancer, the algorithm identifies a 75-gene signature associated with patient survival. A Cytoscape plugin implementation of the signature discovery method is available at http://wiki.reactome.org/index.php/Reactome_FI_Cytoscape_Plugin.",2012-12-10 +23929867,Multilabel learning via random label selection for protein subcellular multilocations prediction.,"Prediction of protein subcellular localization is an important but challenging problem, particularly when proteins may simultaneously exist at, or move between, two or more different subcellular location sites. Most of the existing protein subcellular localization methods are only used to deal with the single-location proteins. In the past few years, only a few methods have been proposed to tackle proteins with multiple locations. However, they only adopt a simple strategy, that is, transforming the multilocation proteins to multiple proteins with single location, which does not take correlations among different subcellular locations into account. In this paper, a novel method named random label selection (RALS) (multilabel learning via RALS), which extends the simple binary relevance (BR) method, is proposed to learn from multilocation proteins in an effective and efficient way. RALS does not explicitly find the correlations among labels, but rather implicitly attempts to learn the label correlations from data by augmenting original feature space with randomly selected labels as its additional input features. Through the fivefold cross-validation test on a benchmark data set, we demonstrate our proposed method with consideration of label correlations obviously outperforms the baseline BR method without consideration of label correlations, indicating correlations among different subcellular locations really exist and contribute to improvement of prediction performance. Experimental results on two benchmark data sets also show that our proposed methods achieve significantly higher performance than some other state-of-the-art methods in predicting subcellular multilocations of proteins. The prediction web server is available at >http://levis.tongji.edu.cn:8080/bioinfo/MLPred-Euk/ for the public usage.",2013-03-01 +24389662,CasOT: a genome-wide Cas9/gRNA off-target searching tool.,"The CRISPR/Cas or Cas9/guide RNA system is a newly developed, easily engineered and highly effective tool for gene targeting; it has considerable off-target effects in cultured human cells and in several organisms. However, the Cas9/guide RNA target site is too short for existing alignment tools to exhaustively and effectively identify potential off-target sites. CasOT is a local tool designed to find potential off-target sites in any given genome or user-provided sequence, with user-specified types of protospacer adjacent motif, and number of mismatches allowed in the seed and non-seed regions.

Availability

http://eendb.zfgenetics.org/casot/ CONTACT: zfgenetics@gmail.com or bzhang@pku.edu.cn Supplementary Information: Supplementary data are available at Bioinformatics online.",2014-01-02 +24967476,"Health, United States, 2013: With Special Feature on Prescription Drugs","Health, United States, 2013 is the 37th report on the health status of the nation and is submitted by the Secretary of the Department of Health and Human Services to the President and the Congress of the United States in compliance with Section 308 of the Public Health Service Act. This report was compiled by the Centers for Disease Control and Prevention's (CDC) National Center for Health Statistics (NCHS). The National Committee on Vital and Health Statistics served in a review capacity. The Health, United States series presents an annual overview of national trends in health statistics. The report contains a Chartbook that assesses the nation's health by presenting trends and current information on selected measures of morbidity, mortality, health care utilization and access, health risk factors, prevention, health insurance, and personal health care expenditures. This year's Chartbook includes a Special Feature on Prescription Drugs. The report also contains 135 Trend Tables organized around four major subject areas: health status and determinants, health care utilization, health care resources, and health care expenditures. A companion product—Health, United States: In Brief—features information extracted from the full report. The complete report, In Brief, and related data products are available on the Health, United States website at: http://www.cdc.gov/nchs/hus.htm.",2014-06-27 +24932008,A combinatorial approach for analyzing intra-tumor heterogeneity from high-throughput sequencing data.,"

Motivation

High-throughput sequencing of tumor samples has shown that most tumors exhibit extensive intra-tumor heterogeneity, with multiple subpopulations of tumor cells containing different somatic mutations. Recent studies have quantified this intra-tumor heterogeneity by clustering mutations into subpopulations according to the observed counts of DNA sequencing reads containing the variant allele. However, these clustering approaches do not consider that the population frequencies of different tumor subpopulations are correlated by their shared ancestry in the same population of cells.

Results

We introduce the binary tree partition (BTP), a novel combinatorial formulation of the problem of constructing the subpopulations of tumor cells from the variant allele frequencies of somatic mutations. We show that finding a BTP is an NP-complete problem; derive an approximation algorithm for an optimization version of the problem; and present a recursive algorithm to find a BTP with errors in the input. We show that the resulting algorithm outperforms existing clustering approaches on simulated and real sequencing data.

Availability and implementation

Python and MATLAB implementations of our method are available at http://compbio.cs.brown.edu/software/ .",2014-06-01 +24088321,"Online drug user-led harm reduction in Hungary: a review of ""Daath"".","Harm reduction has been increasingly finding its way into public drug policies and healthcare practices worldwide, with successful intervention measures justifiably focussing on the highest-risk groups, such as injecting drug users. However, there are also other types of drug users in need for harm reduction, even though they pose less, low, or no public health risk. Occasionally, drug users may autonomously organise themselves into groups to provide advocacy, harm reduction, and peer-help services, sometimes online. The http://www.daath.hu website has been operated since 2001 by the ""Hungarian Psychedelic Community"", an unorganised drug user group with a special interest in hallucinogenic and related substances. As of today, the website serves about 1200 visitors daily, and the online community comprises of more than 8000 registered members. The Daath community is driven by a strong commitment to the policy of harm reduction in the form of various peer-help activities that aim to expand harm reduction without promoting drug use. Our review comprehensively summarises Daath's user-led harm reduction services and activities from the last ten years, firstly outlining the history and growth phases of Daath, along with its self-set guidelines and policies. Online services (such as a discussion board, and an Ecstasy pill database) and offline activities (such as Ecstasy pill field testing, and a documentary film about psychedelics) are described. In order to extend its harm reduction services and activities in the future, Daath has several social, commercial, and legislative challenges to face. Starting with a need to realign its focus, outlooks for the upcoming operation of Daath are pondered. Future trends in harm reduction, such as separating harm-decreasing from benefit-increasing, are also discussed. We aim to share these innovative harm reduction measures and good practices in order to be critically assessed, and--if found useful--adapted and applied elsewhere.",2013-10-02 +24709941,TraV: a genome context sensitive transcriptome browser.,"Next-generation sequencing (NGS) technologies like Illumina and ABI Solid enable the investigation of transcriptional activities of genomes. While read mapping tools have been continually improved to enable the processing of the increasing number of reads generated by NGS technologies, analysis and visualization tools are struggling with the amount of data they are presented with. Current tools are capable of handling at most two to three datasets simultaneously before they are limited by available memory or due to processing overhead. In order to process fifteen transcriptome sequencing experiments of Bacillus licheniformis DSM13 obtained in a previous study, we developed TraV, a RNA-Seq analysis and visualization tool. The analytical methods are designed for prokaryotic RNA-seq experiments. TraV calculates single nucleotide activities from the mapping information to visualize and analyze multiple transcriptome sequencing experiments. The use of nucleotide activities instead of single read mapping information is highly memory efficient without incurring a processing overhead. TraV is available at http://appmibio.uni-goettingen.de/index.php?sec=serv.",2014-04-07 +24714491,Encounter complexes and dimensionality reduction in protein-protein association.,"An outstanding challenge has been to understand the mechanism whereby proteins associate. We report here the results of exhaustively sampling the conformational space in protein-protein association using a physics-based energy function. The agreement between experimental intermolecular paramagnetic relaxation enhancement (PRE) data and the PRE profiles calculated from the docked structures shows that the method captures both specific and non-specific encounter complexes. To explore the energy landscape in the vicinity of the native structure, the nonlinear manifold describing the relative orientation of two solid bodies is projected onto a Euclidean space in which the shape of low energy regions is studied by principal component analysis. Results show that the energy surface is canyon-like, with a smooth funnel within a two dimensional subspace capturing over 75% of the total motion. Thus, proteins tend to associate along preferred pathways, similar to sliding of a protein along DNA in the process of protein-DNA recognition. DOI: http://dx.doi.org/10.7554/eLife.01370.001.",2014-04-08 +24515137,Shack-Hartmann spot dislocation map determination using an optical flow method.,"We present a robust, dense, and accurate Shack-Hartmann spot dislocation map determination method based on a regularized optical flow algorithm that does not require obtaining the spot centroids. The method is capable to measure in presence of strong noise, background illumination and spot modulating signals, which are typical limiting factors of traditional centroid detection algorithms. Moreover, the proposed approach is able to face cases where some of the reference beam spots have not a corresponding one in the distorted Hartmann diagram, and it can expand the dynamic range of the Shack-Hartmann sensor unwrapping the obtained dense dislocation maps. We have tested the algorithm with both simulations and experimental data obtaining satisfactory results. A complete MATLAB package that can reproduce all the results can be downloaded from [http://goo.gl/XbZVOr].",2014-01-01 +23193288,CFGP 2.0: a versatile web-based platform for supporting comparative and evolutionary genomics of fungi and Oomycetes.,"In 2007, Comparative Fungal Genomics Platform (CFGP; http://cfgp.snu.ac.kr/) was publicly open with 65 genomes corresponding to 58 fungal and Oomycete species. The CFGP provided six bioinformatics tools, including a novel tool entitled BLASTMatrix that enables search homologous genes to queries in multiple species simultaneously. CFGP also introduced Favorite, a personalized virtual space for data storage and analysis with these six tools. Since 2007, CFGP has grown to archive 283 genomes corresponding to 152 fungal and Oomycete species as well as 201 genomes that correspond to seven bacteria, 39 plants and 105 animals. In addition, the number of tools in Favorite increased to 27. The Taxonomy Browser of CFGP 2.0 allows users to interactively navigate through a large number of genomes according to their taxonomic positions. The user interface of BLASTMatrix was also improved to facilitate subsequent analyses of retrieved data. A newly developed genome browser, Seoul National University Genome Browser (SNUGB), was integrated into CFGP 2.0 to support graphical presentation of diverse genomic contexts. Based on the standardized genome warehouse of CFGP 2.0, several systematic platforms designed to support studies on selected gene families have been developed. Most of them are connected through Favorite to allow of sharing data across the platforms.",2012-11-27 +24237440,[Evaluation of zygomycosis cases by pooled analysis method reported from Turkey].,"Zygomycosis is a rapidly-progressive invasive fungal disease with high mortality rates. Mucor, Rhizopus, Rhizomucor and Absidia species classified in Mucorales order, are the main causative agents of zygomycosis. Uncontrolled diabetes, hematologic malignancies, long term corticosteroid use and immunosuppressive therapies are the main predisposing factors for mucormycosis. In this study, we aimed to evaluate the mucormycosis cases from Turkey published in national and international databases in the last 17 years by means of age, gender, co-morbidities, signs and symptoms, diagnostic methods, therapeutic modalities, and mortality rate by pooling analysis. In our study, two national (http://uvt.ulakbim.gov.tr, http://www.turkmedline.net) and two international (www.ncbi.nlm.nih.gov, http://apps.webofknowledge.com) databases were used. A total of 64 manuscript (34 from national and 30 from international databases) published between 1995 and 2012, which were eligible for the study criteria and accessible as full text were included in the study. A total of 151 mucormycosis patients (71 female, 80 male; mean age: 45.4 ± 21.4 years) from these studies, with definitive diagnosis of invasive fungal infections according to the criteria of European Organization for Research and Treatment of Cancer (EORTC) have been evaluated. Of 151 patients 91 (60%) were diagnosed as rhinocerebral, 42 (%27.8) were sinoorbital, 7 (4.6%) were pulmonary, 6 (3.9%) were disseminated, 3 (1.9%) were skin, and 2 (1.3%) were gastrointestinal mycormycosis. The most common symptoms and signs were; swelling of eye and face (n= 95, 63%), fever (n= 72, 48%), nasal obstruction (n= 60, 40%), headache (n= 58, 38%) and opthtalmoplegia (n= 48, 32%). The most common co-morbidity was diabetes (49%) followed by hematological malignancies (39.7%). Mycological cultures were performed for 82 patients, and fungal growth were detected in the clinical specimens of 51 cases. The distribution of strains isolated in culture were as follows: Mucor spp. (n= 19, 37.2%), Rhizopus spp. (n= 13, 25.5%), Zygomycetes (n= 9, 17.6%), Rhizopus oryzae (n= 4, 7.8%), Rhizopus spp. + yeast (n= 3, 5.9%), Rhizomucor spp. (n= 2, 3.9%) and Rhizosporium spp. (n= 1, 1.9%). In 133 patients, histopathological investigation and in 126 patients radiological examinations were performed for diagnosis. Both surgical debridement and antifungal therapy were employed in 115 patients. Four patients had received only surgical debridement and 30 only antifungal therapies. Classical amphotericin B (AMP-B) therapy for 77 cases, liposomal AMP-B for 60 cases, liposomal AMP-B + posaconazole for six cases and lipid complex AMP-B for two cases have been started as antifungal therapies. Total mortality rate was detected as 54.3% (82/151). In conclusion, despite new diagnostic tools and therapeutic agents, mortality rates in mucormycosis are still very high. For the management, mucormycosis should be considered early in risky patients, and surgical debridement together with effective antifungal therapy should be applied as soon as possible.",2013-10-01 +23826885,Comparison of ultra-fast 2D and 3D ligand and target descriptors for side effect prediction and network analysis in polypharmacology.,"

Background and purpose

Some existing computational methods are used to infer protein targets of small molecules and can therefore be used to find new targets for existing drugs, with the goals of re-directing the molecule towards a different therapeutic purpose or explaining off-target effects due to multiple targeting. Inherent limitations, however, arise from the fact that chemical analogy is calculated on the basis of common frameworks or scaffolds and also because target information is neglected. The method we present addresses these issues by taking into account 3D information from both the ligand and the target.

Experimental approach

ElectroShape is an established method for ultra-fast comparison of the shapes and charge distributions of ligands that is validated here for prediction of on-target activities, off-target profiles and adverse effects of drugs and drug-like molecules taken from the DrugBank database.

Key results

The method is shown to predict polypharmacology profiles and relate targets from two complementary viewpoints (ligand- and target-based networks).

Conclusions and implications

The open-access web tool presented here (http://ub.cbm.uam.es/chemogenomics/) allows interactive navigation in a unified 'pharmacological space' from the viewpoints of both ligands and targets. It also enables prediction of pharmacological profiles, including likely side effects, for new compounds. We hope this web interface will help many pharmacologists to become aware of this new paradigm (up to now mostly used in the realm of the so-called 'chemical biology') and encourage its use with a view to revealing 'hidden' relationships between new and existing compounds and pharmacologically relevant targets.",2013-10-01 +24450533,A modular computational framework for automated peak extraction from ion mobility spectra.,"

Background

An ion mobility (IM) spectrometer coupled with a multi-capillary column (MCC) measures volatile organic compounds (VOCs) in the air or in exhaled breath. This technique is utilized in several biotechnological and medical applications. Each peak in an MCC/IM measurement represents a certain compound, which may be known or unknown. For clustering and classification of measurements, the raw data matrix must be reduced to a set of peaks. Each peak is described by its coordinates (retention time in the MCC and reduced inverse ion mobility) and shape (signal intensity, further shape parameters). This fundamental step is referred to as peak extraction. It is the basis for identifying discriminating peaks, and hence putative biomarkers, between two classes of measurements, such as a healthy control group and a group of patients with a confirmed disease. Current state-of-the-art peak extraction methods require human interaction, such as hand-picking approximate peak locations, assisted by a visualization of the data matrix. In a high-throughput context, however, it is preferable to have robust methods for fully automated peak extraction.

Results

We introduce PEAX, a modular framework for automated peak extraction. The framework consists of several steps in a pipeline architecture. Each step performs a specific sub-task and can be instantiated by different methods implemented as modules. We provide open-source software for the framework and several modules for each step. Additionally, an interface that allows easy extension by a new module is provided. Combining the modules in all reasonable ways leads to a large number of peak extraction methods. We evaluate all combinations using intrinsic error measures and by comparing the resulting peak sets with an expert-picked one.

Conclusions

Our software PEAX is able to automatically extract peaks from MCC/IM measurements within a few seconds. The automatically obtained results keep up with the results provided by current state-of-the-art peak extraction methods. This opens a high-throughput context for the MCC/IM application field. Our software is available at http://www.rahmannlab.de/research/ims.",2014-01-22 +30722155,First Report of Powdery Mildew on Spanish Needles (Bidens bipinnata) Caused by Podosphaera xanthii in Korea.,"Spanish needles (Bidens bipinnata L.) is an annual herb that belongs to a genus of flowering plants in family Asteraceae native to United States, and tropical regions around world. The plant produces important flavonoid compounds quercitin and hyperoside that function as anti-allergens, anti-inflammatories, anti-microbials, and anti-cancer agents. Between July and October 2011 and 2012, white superficial mycelia were observed initially on leaf and stem portions, but later progressed to the flower head. Surveys showed that the disease was widespread in Gwangju and most areas of South Korea. Abundant, necrotic, dark brown spots showing chasmothecia were frequently observed in October and were abundant on the adaxial surface of leaves. Chasmothecia were blackish brown to yellow without typical appendages. They ranged from 51.2 to 71.1 (mean 66.8) μm in diameter. Conidia were formed singly and the primary conidia were ellipsoid, rounded at the apex, truncated base, and ranged from 25.4 to 33.2 (mean 27.3) μm long × 10.2 to 12.2 (mean 11.3) μm wide. Conidiophores were erect, 60.1 to 101.3 (mean 98.3) μm long × 6.2 to 9.2 (mean 7.3) μm wide. From extracted genomic DNA, the internal transcribed spacer (ITS) region inclusive of 5.8S and 28S rDNA was amplified with ITS1F (5'-TCCGTAGGTGAACCTGCGG-3') and LR5F (5'-GCTATCCTGAGGGAAAC-3'), and LROR (5'-ACCCGCTGAACTTAAGC-3') and LR5F primer sets, respectively. rDNA ITS (GenBank Accession No. JX512555) and 28S (JX512556) homologies of the fungus (EML-BBPW1) represented 99.6% (532/534) and 100% (661/661) identity values with Podosphaera xanthii (syn. P. fusca) AB040349 and P. xanthii (syn. P. fusca) AB462798, respectively. The rDNA sequence analysis revealed that the causal fungus matched P. xanthii (syn. P. fusca), forming a xanthii/fusca group (3,4). A pathogenicity test was performed on three plants in a greenhouse. The treated leaves were sealed in vinyl pack in humid condition for 2 days. Seven days after inoculation, similar symptoms were observed on the inoculated Spanish needles plant leaves. No symptoms were observed on control plants treated with distilled water. Koch's postulates were fulfilled by re-observing the fungal pathogen on the inoculated leaves. Podosphaera (syn. Sphaerotheca) xanthii (or fusca) has been known as an ubiquitous species with a broad host range. So far, five records regarding P. xanthii (=P. fusca) have been found in plants of genus Bidens. P. xanthii has been reported to occur on B. cernua in Belarus and Switzerland. In addition, the powdery mildew species was reported to occur on B. frondosa and B. tripartita in Korea, Russia, and Switzerland (2). To our knowledge, this is the first report of powdery mildew caused by P. xanthii on Spanish needles (B. bipinnata) in Korea. References: (1) U. Braun et al. Schlechtendalia 10:91, 2003. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 2012. (3) H. B. Lee. J. Microbiol. 51:1075, 2012. (4) S. Takamatsu, et al. Persoonia 24:38, 2010.",2013-10-01 +22576172,BEDOPS: high-performance genomic feature operations.,"

Unlabelled

The large and growing number of genome-wide datasets highlights the need for high-performance feature analysis and data comparison methods, in addition to efficient data storage and retrieval techniques. We introduce BEDOPS, a software suite for common genomic analysis tasks which offers improved flexibility, scalability and execution time characteristics over previously published packages. The suite includes a utility to compress large inputs into a lossless format that can provide greater space savings and faster data extractions than alternatives.

Availability

http://code.google.com/p/bedops/ includes binaries, source and documentation.",2012-05-09 +25370817,A combination of gene expression ranking and co-expression network analysis increases discovery rate in large-scale mutant screens for novel Arabidopsis thaliana abiotic stress genes.,"As challenges to food security increase, the demand for lead genes for improving crop production is growing. However, genetic screens of plant mutants typically yield very low frequencies of desired phenotypes. Here, we present a powerful computational approach for selecting candidate genes for screening insertion mutants. We combined ranking of Arabidopsis thaliana regulatory genes according to their expression in response to multiple abiotic stresses (Multiple Stress [MST] score), with stress-responsive RNA co-expression network analysis to select candidate multiple stress regulatory (MSTR) genes. Screening of 62 T-DNA insertion mutants defective in candidate MSTR genes, for abiotic stress germination phenotypes yielded a remarkable hit rate of up to 62%; this gene discovery rate is 48-fold greater than that of other large-scale insertional mutant screens. Moreover, the MST score of these genes could be used to prioritize them for screening. To evaluate the contribution of the co-expression analysis, we screened 64 additional mutant lines of MST-scored genes that did not appear in the RNA co-expression network. The screening of these MST-scored genes yielded a gene discovery rate of 36%, which is much higher than that of classic mutant screens but not as high as when picking candidate genes from the co-expression network. The MSTR co-expression network that we created, AraSTressRegNet is publicly available at http://netbio.bgu.ac.il/arnet. This systems biology-based screening approach combining gene ranking and network analysis could be generally applicable to enhancing identification of genes regulating additional processes in plants and other organisms provided that suitable transcriptome data are available.",2014-11-05 +26115033,Lead Exposure during Early Human Development and DNA Methylation of Imprinted Gene Regulatory Elements in Adulthood.,"

Background

Lead exposure during early development causes neurodevelopmental disorders by unknown mechanisms. Epidemiologic studies have focused recently on determining associations between lead exposure and global DNA methylation; however, such approaches preclude the identification of loci that may alter human disease risk.

Objectives

The objective of this study was to determine whether maternal, postnatal, and early childhood lead exposure can alter the differentially methylated regions (DMRs) that control the monoallelic expression of imprinted genes involved in metabolism, growth, and development.

Methods

Questionnaire data and serial blood lead levels were obtained from 105 participants (64 females, 41 males) of the Cincinnati Lead Study from birth to 78 months. When participants were adults, we used Sequenom EpiTYPER assays to test peripheral blood DNA to quantify CpG methylation in peripheral blood leukocytes at DMRs of 22 human imprinted genes. Statistical analyses were conducted using linear regression.

Results

Mean blood lead concentration from birth to 78 months was associated with a significant decrease in PEG3 DMR methylation (β = -0.0014; 95% CI: -0.0023, -0.0005, p = 0.002), stronger in males (β = -0.0024; 95% CI: -0.0038, -0.0009, p = 0.003) than in females (β = -0.0009; 95% CI: -0.0020, 0.0003, p = 0.1). Elevated mean childhood blood lead concentration was also associated with a significant decrease in IGF2/H19 (β = -0.0013; 95% CI: -0.0023, -0.0003, p = 0.01) DMR methylation, but primarily in females, (β = -0.0017; 95% CI: -0.0029, -0.0006, p = 0.005) rather than in males, (β = -0.0004; 95% CI: -0.0023, 0.0015, p = 0.7). Elevated blood lead concentration during the neonatal period was associated with higher PLAGL1/HYMAI DMR methylation regardless of sex (β = 0.0075; 95% CI: 0.0018, 0.0132, p = 0.01). The magnitude of associations between cumulative lead exposure and CpG methylation remained unaltered from 30 to 78 months.

Conclusions

Our findings provide evidence that early childhood lead exposure results in sex-dependent and gene-specific DNA methylation differences in the DMRs of PEG3, IGF2/H19, and PLAGL1/HYMAI in adulthood.

Citation

Li Y, Xie C, Murphy SK, Skaar D, Nye M, Vidal AC, Cecil KM, Dietrich KN, Puga A, Jirtle RL, Hoyo C. 2016. Lead exposure during early human development and DNA methylation of imprinted gene regulatory elements in adulthood. Environ Health Perspect 124:666-673; http://dx.doi.org/10.1289/ehp.1408577.",2015-06-26 +24878919,"R. S. WebTool, a web server for random sampling-based significance evaluation of pairwise distances.","Pairwise comparison of data vectors represents a large part of computational biology, especially with the continuous increase in genome-wide approaches yielding more information from more biological samples simultaneously. Gene clustering for function prediction as well as analyses of signalling pathways and the time-dependent dynamics of a system are common biological approaches that often rely on large dataset comparison. Different metrics can be used to evaluate the similarity between entities to be compared, such as correlation coefficients and distances. While the latter offers a more flexible way of measuring potential biological relationships between datasets, the significance of any given distance is highly dependent on the dataset and cannot be easily determined. Monte Carlo methods are robust approaches for evaluating the significance of distance values by multiple random permutations of the dataset followed by distance calculation. We have developed R. S. WebTool (http://rswebtool.kwaklab.org), a user-friendly online server for random sampling-based evaluation of distance significances that features an array of visualization and analysis tools to help non-bioinformaticist users extract significant relationships from random noise in distance-based dataset analyses.",2014-05-30 +22506599,SPAdes: a new genome assembly algorithm and its applications to single-cell sequencing.,"The lion's share of bacteria in various environments cannot be cloned in the laboratory and thus cannot be sequenced using existing technologies. A major goal of single-cell genomics is to complement gene-centric metagenomic data with whole-genome assemblies of uncultivated organisms. Assembly of single-cell data is challenging because of highly non-uniform read coverage as well as elevated levels of sequencing errors and chimeric reads. We describe SPAdes, a new assembler for both single-cell and standard (multicell) assembly, and demonstrate that it improves on the recently released E+V-SC assembler (specialized for single-cell data) and on popular assemblers Velvet and SoapDeNovo (for multicell data). SPAdes generates single-cell assemblies, providing information about genomes of uncultivatable bacteria that vastly exceeds what may be obtained via traditional metagenomics studies. SPAdes is available online ( http://bioinf.spbau.ru/spades ). It is distributed as open source software.",2012-04-16 +26108914,Clinicopathologic features of plasmablastic lymphoma: Single-center series of 8 cases from Saudi Arabia.,"

Background

Plasmablastic lymphoma (PBL) is a rare subtype of non-Hodgkin's lymphoma. Characterized by its aggressive nature and plasmacytic differentiation, PBL remains a therapeutic and diagnostic challenge; it generally has a poor prognosis with very few long-term survivors and most patients dying within 2 years from initial presentation. PBL has been reported in several other countries; however, there have been no reported cases from Saudi Arabia. Here, we report 8 cases of PBL depicting the clinical presentation, immunocompetency, immunphenotypic characterization, diagnostic challenges and treatment outcome.

Methods

The medical records were reviewed for clinical presentation, staging, laboratory data, radiological studies, treatments, and outcomes. A broad immunohistochemical panel consisting of CD45, CD3, CD20, CD79a, Pax5, CD38, CD138, MUM1, EMA, Kappa, Lambda, CD 56, CD30, Bcl-2, Bcl-6, Alk-1, Ki-67, EBV-LMP-1, and HHV8 was performed.

Results

The tumors predominantly exhibited immunoblastic/plasmablastic or plasmacytic morphologic features and had a plasma cell-like immunophenotype. All cases were immunoreactive for CD38, CD138 and MUM1 confirming plasma cell differentiation of the tumor cells. CD20 was negative for all cases; whereas CD79a and Pax5 were weakly positive in 2cases. All 8 cases were EBV-LMP-1/EBER-1 negative, and 1 case was HHV8 positive. Similar to previously published studies, PBL in Saudi Arabia is characterized by male predominance (6/8), median age 51.5 years (mean age 46 years), associated with early dissemination, poor response to therapy, and limited survival (average survival time, 6.4 months, median overall survival 5.5 months). However, it does have some unique features. It occurs more commonly in immunocompetent persons (6/8, 75%), is not associated with EBV infection (0/8), and nodal involvement (either primary or secondary) is common among patients (6/8). In addition, extra-oral sites are more common than oral/nasal cavities (7/8) and the c-myc gene is not common (1/8, 12.5%).

Conclusion

It appears that PBL is heterogeneous in terms of clinical presentation and morphology. PBL is a therapeutic challenge with a clinical course that is characterized by its high rate of relapse and death. To date, treatment responses are usually partial and temporary. Therapies that are more intensive than CHOP do not seem to prolong survival. Further research is needed to understand the biology and molecular pathogenesis of PBL in order to improve therapies.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1465801416161912.",2015-06-25 +24078714,A C library for retrieving specific reactions from the BioModels database.,"

Summary

We describe libSBMLReactionFinder, a C library for retrieving specific biochemical reactions from the curated systems biology markup language models contained in the BioModels database. The library leverages semantic annotations in the database to associate reactions with human-readable descriptions, making the reactions retrievable through simple string searches. Our goal is to provide a useful tool for quantitative modelers who seek to accelerate modeling efforts through the reuse of previously published representations of specific chemical reactions.

Availability and implementation

The library is open-source and dual licensed under the Mozilla Public License Version 2.0 and GNU General Public License Version 2.0. Project source code, downloads and documentation are available at http://code.google.com/p/lib-sbml-reaction-finder.",2013-09-29 +24077687,Rigid intramedullary nail fixation of femoral fractures in adolescents: what evidence is available?,"

Background

Femoral fracture in adolescents is a significant injury. It is generally agreed that operative fixation is the treatment of choice, and rigid intramedullary nail fixation is a treatment option. However, numerous types of rigid nails to fix adolescent femoral fractures have been described. Hence, the aim of this paper was to collate and evaluate the available evidence for managing diaphyseal femoral fractures in adolescents using rigid intramedullary nails.

Materials and methods

A literature search was undertaken using the healthcare database website ( http://www.library.nhs.uk/hdas ). Medline, CINAHL, Embase, and the Cochrane Library databases were searched to identify prospective and retrospective studies of rigid intramedullary nail fixation in the adolescent population.

Results

The literature search returned 1,849 articles, among which 51 relevant articles were identified. Of these 51 articles, 23 duplicates were excluded, so a total of 28 articles were reviewed. First-generation nails had a high incidence of limb length discrepancy (Küntscher 5.8 %, Grosse-Kempf 9 %), whilst second-generation nails had a lower incidence (Russell-Taylor 1.7 %, AO 2.6 %). Avascular necrosis was noted with solid Ti nails (2.6 %), AO femoral nails (1.3 %) and Russell-Taylor nails (0.85 %). These complications have not been reported with the current generation of nails.

Conclusions

Rigid intramedullary nail fixation of femoral fractures in adolescents is a useful procedure with good clinical results. A multiplanar design and lateral trochanteric entry are key to a successful outcome of titanium alloy nail fixation.",2013-09-29 +24206655,Association of a polymorphism in PON-1 gene with steroid-induced osteonecrosis of femoral head in Chinese Han population.,"

Background

Treatment with steroids covers a wide spectrum of diseases in clinic. However, some users are suffering from serious side effects of steroid administration, while we enjoy the benefit it brings about. Osteonecrosis of the femoral head (ONFH) is a troublesome one among them. Recent studies have demonstrated that lipid metabolism disorder may play a vital role in pathogenesis of ONFH and mutation of the paraoxonase-1 (PON-1) gene may be involved in the occurrence of this disease. However, the relationship between polymorphisms of PON-1 and ONFH has not been thoroughly studied. The aim of this study was to determine whether PON-1 polymorphisms are associated with steroid-induced ONFH through a cohort study among Chinese Han population.

Methods

This trial applied a case-control scheme to compare the clinical data including PON-1 SNP among 94 patients and 106 control subjects to analyze the association between SNP and risk of steroid-induced ONFH. Time of Flight Mass Spectrometer is utilized for genotyping and the result was analyzed in multivariate analysis models.

Results

According to polymorphism test of rs662, its SNP was significantly associated with the risk of ONFH in overdominant analysis model [P value: 0.022; odds ratio (OR): 0.39]. However, genotype frequencies of rs662 of PON-1 gene between case and control group showed no differences (P > 0.05).

Conclusions

Our data suggest for the first time that SNP (rs662) of the PON-1 gene was associated with the risk of steroid-induced ONFH. In addition, PAI-1 SNPs may play an important role in pathogenesis of ONFH.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticphatology.diagnomx.eu/vs/1501829501107336.",2013-11-08 +21249673,Steroids for traumatic optic neuropathy.,"

Background

Traumatic optic neuropathy (TON) is an important cause of severe visual loss following blunt or penetrating head trauma. Following the initial injury, optic nerve swelling within the optic nerve canal can result in secondary retinal ganglion cell loss. Optic nerve decompression with steroids or surgical interventions or both has therefore been advocated as a means of improving visual prognosis in TON.

Objectives

The aim of this review was to examine the effectiveness and safety of using steroids in TON.

Search strategy

We searched CENTRAL (which contains the Cochrane Eyes and Vision Group Trials Register) (The Cochrane Library 2010, Issue 11), MEDLINE (January 1950 to November 2010), EMBASE (January 1980 to November 2010), Latin American and Caribbean Literature on Health Sciences (LILACS) (January 1982 to November 2010), the metaRegister of Controlled Trials (mRCT) (www.controlled-trials.com), ClinicalTrials.gov (http://clinicaltrials.gov) and Web of Science Conference Proceedings Citation Index- Science (CPCI-S). There were no language or date restrictions in the search for trials. The electronic databases were last searched on 23 November 2010. We also searched the reference lists of included studies, other reviews and book chapters on TON to find references to additional trials. The Science Citation Index was used to look for papers that cited the studies included in this review. We did not manually search any journals or conference proceedings. We contacted trial investigators and experts in the field to identify additional published and unpublished studies.

Selection criteria

We planned to include only randomised controlled trials (RCTs) of TON in which any steroid regime, either on its own or in combination with surgical optic nerve decompression, was compared to surgery alone or no treatment.

Data collection and analysis

Two review authors independently assessed the titles and abstracts identified from the electronic searches.

Main results

We included one study that met our selection criteria; a double-masked, placebo-controlled, randomised trial of high dose intravenous steroids in patients with indirect TON diagnosed within seven days of the initial injury. A total of 31 eligible participants were randomised to receive either high dose intravenous steroids (n = 16) or placebo (n = 15), and they were all followed-up for three months. Mean final best corrected visual acuity (BCVA) was 1.78±1.23 Logarithm of the Minimum Angle of Resolution (LogMAR) in the placebo group, and 1.11±1.14 LogMAR in the steroid group. The mean difference in BCVA between the placebo and steroid groups was 0.67 LogMAR (95% confidence interval -1.54 to 0.20), and this difference was not statistically significant (P = 0.13). At three months follow-up, an improvement in BCVA of 0.40 LogMAR occurred in eight eyes (8/15, 53.3%) in the placebo group, and in 11 eyes (11/16, 68.8%) in the treatment group. This difference was not statistically significant (P = 0.38).

Authors' conclusions

There is a relatively high rate of spontaneous visual recovery in TON and there is no convincing data that steroids provide any additional visual benefit over observation alone. Recent evidence also suggests a possible detrimental effect of steroids in TON and further studies are urgently needed to clarify this important issue. Each case therefore needs to be assessed on an individual basis and proper informed consent is paramount.",2011-01-19 +22936215,Identifying ChIP-seq enrichment using MACS.,"Model-based analysis of ChIP-seq (MACS) is a computational algorithm that identifies genome-wide locations of transcription/chromatin factor binding or histone modification from ChIP-seq data. MACS consists of four steps: removing redundant reads, adjusting read position, calculating peak enrichment and estimating the empirical false discovery rate (FDR). In this protocol, we provide a detailed demonstration of how to install MACS and how to use it to analyze three common types of ChIP-seq data sets with different characteristics: the sequence-specific transcription factor FoxA1, the histone modification mark H3K4me3 with sharp enrichment and the H3K36me3 mark with broad enrichment. We also explain how to interpret and visualize the results of MACS analyses. The algorithm requires ∼3 GB of RAM and 1.5 h of computing time to analyze a ChIP-seq data set containing 30 million reads, an estimate that increases with sequence coverage. MACS is open source and is available from http://liulab.dfci.harvard.edu/MACS/.",2012-08-30 +24459619,Hospitalized injuries and deaths in a trauma unit in upper Egypt.,"

Context

It is predicted that injuries will be among the top 20 leading causes of death worldwide by 2030. In Egypt, injuries burden is significant as it was the fifth leading cause of death in 2004. Also, it's considered as a hidden epidemic due to under-reporting.

Aims

To identify the patterns of hospitalized injury cases at Trauma Unit in Assiut University Hospitals and to provide an indication about who are at increased risk of hospitalization or death due to injury.

Settings and design

A descriptive retrospective study.

Materials and methods

Registered data of all hospitalized injuries from January 2002 to December 2009 at Trauma Unit of Assiut University Hospitals in Upper Egypt were included.

Statistical analysis

Advanced statistical package for social sciences (SPSS) program version 16 (IBM Corporation - http://www.spss.com) was used for data analysis. Descriptive statistics and tests of significance were used. P value was considered statistically significant when it was less than 0.05 and highly significant when it was less than 0.001.

Results

Admitted cases of attended injuries were (31.8%). Most admissions were below the age of 30 years (58.4%). Male to female ratio was 3:1. Falls were the most common injuries (43.6%), followed by transport accidents (31.1%). More than half of deaths (56.4%) were due to transport accidents. Transport accidents, falls, interpersonal violence and gunshot injuries had an early ranking throughout the study period.

Conclusion

Road traffic injuries, falls and violence are areas of priority in preventive strategies. Paying special attention for young adults is recommended.",2013-10-01 +22870267,SIMPLEX: cloud-enabled pipeline for the comprehensive analysis of exome sequencing data.,"In recent studies, exome sequencing has proven to be a successful screening tool for the identification of candidate genes causing rare genetic diseases. Although underlying targeted sequencing methods are well established, necessary data handling and focused, structured analysis still remain demanding tasks. Here, we present a cloud-enabled autonomous analysis pipeline, which comprises the complete exome analysis workflow. The pipeline combines several in-house developed and published applications to perform the following steps: (a) initial quality control, (b) intelligent data filtering and pre-processing, (c) sequence alignment to a reference genome, (d) SNP and DIP detection, (e) functional annotation of variants using different approaches, and (f) detailed report generation during various stages of the workflow. The pipeline connects the selected analysis steps, exposes all available parameters for customized usage, performs required data handling, and distributes computationally expensive tasks either on a dedicated high-performance computing infrastructure or on the Amazon cloud environment (EC2). The presented application has already been used in several research projects including studies to elucidate the role of rare genetic diseases. The pipeline is continuously tested and is publicly available under the GPL as a VirtualBox or Cloud image at http://simplex.i-med.ac.at; additional supplementary data is provided at http://www.icbi.at/exome.",2012-08-01 +23539302,Improved ancestry inference using weights from external reference panels.,"

Motivation

Inference of ancestry using genetic data is motivated by applications in genetic association studies, population genetics and personal genomics. Here, we provide methods and software for improved ancestry inference using genome-wide single nucleotide polymorphism (SNP) weights from external reference panels. This approach makes it possible to leverage the rich ancestry information that is available from large external reference panels, without the administrative and computational complexities of re-analyzing the raw genotype data from the reference panel in subsequent studies.

Results

We extensively validate our approach in multiple African American, Latino American and European American datasets, making use of genome-wide SNP weights derived from large reference panels, including HapMap 3 populations and 6546 European Americans from the Framingham Heart Study. We show empirically that our approach provides much greater accuracy than either the prevailing ancestry-informative marker (AIM) approach or the analysis of genome-wide target genotypes without a reference panel. For example, in an independent set of 1636 European American genome-wide association study samples, we attained prediction accuracy (R(2)) of 1.000 and 0.994 for the first two principal components using our method, compared with 0.418 and 0.407 using 150 published AIMs or 0.955 and 0.003 by applying principal component analysis directly to the target samples. We finally show that the higher accuracy in inferring ancestry using our method leads to more effective correction for population stratification in association studies.

Availability

The SNPweights software is available online at http://www.hsph.harvard.edu/faculty/alkes-price/software/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-28 +22689781,GenomeRing: alignment visualization based on SuperGenome coordinates.,"

Motivation

The number of completely sequenced genomes is continuously rising, allowing for comparative analyses of genomic variation. Such analyses are often based on whole-genome alignments to elucidate structural differences arising from insertions, deletions or from rearrangement events. Computational tools that can visualize genome alignments in a meaningful manner are needed to help researchers gain new insights into the underlying data. Such visualizations typically are either realized in a linear fashion as in genome browsers or by using a circular approach, where relationships between genomic regions are indicated by arcs. Both methods allow for the integration of additional information such as experimental data or annotations. However, providing a visualization that still allows for a quick and comprehensive interpretation of all important genomic variations together with various supplemental data, which may be highly heterogeneous, remains a challenge.

Results

Here, we present two complementary approaches to tackle this problem. First, we propose the SuperGenome concept for the computation of a common coordinate system for all genomes in a multiple alignment. This coordinate system allows for the consistent placement of genome annotations in the presence of insertions, deletions and rearrangements. Second, we present the GenomeRing visualization that, based on the SuperGenome, creates an interactive overview visualization of the multiple genome alignment in a circular layout. We demonstrate our methods by applying them to an alignment of Campylobacter jejuni strains for the discovery of genomic islands as well as to an alignment of Helicobacter pylori, which we visualize in combination with gene expression data.

Availability

GenomeRing and example data is available at http://it.inf.uni-tuebingen.de/software/genomering/.",2012-06-01 +25244936,Prediction of nucleosome rotational positioning in yeast and human genomes based on sequence-dependent DNA anisotropy.,"

Background

An organism's DNA sequence is one of the key factors guiding the positioning of nucleosomes within a cell's nucleus. Sequence-dependent bending anisotropy dictates how DNA is wrapped around a histone octamer. One of the best established sequence patterns consistent with this anisotropy is the periodic occurrence of AT-containing dinucleotides (WW) and GC-containing dinucleotides (SS) in the nucleosomal locations where DNA is bent in the minor and major grooves, respectively. Although this simple pattern has been observed in nucleosomes across eukaryotic genomes, its use for prediction of nucleosome positioning was not systematically tested.

Results

We present a simple computational model, termed the W/S scheme, implementing this pattern, without using any training data. This model accurately predicts the rotational positioning of nucleosomes both in vitro and in vivo, in yeast and human genomes. About 65 - 75% of the experimentally observed nucleosome positions are predicted with the precision of one to two base pairs. The program is freely available at http://people.rit.edu/fxcsbi/WS_scheme/. We also introduce a simple and efficient way to compare the performance of different models predicting the rotational positioning of nucleosomes.

Conclusions

This paper presents the W/S scheme to achieve accurate prediction of rotational positioning of nucleosomes, solely based on the sequence-dependent anisotropic bending of nucleosomal DNA. This method successfully captures DNA features critical for the rotational positioning of nucleosomes, and can be further improved by incorporating additional terms related to the translational positioning of nucleosomes in a species-specific manner.",2014-09-22 +23088505,Cloud CPFP: a shotgun proteomics data analysis pipeline using cloud and high performance computing.,"We have extended the functionality of the Central Proteomics Facilities Pipeline (CPFP) to allow use of remote cloud and high performance computing (HPC) resources for shotgun proteomics data processing. CPFP has been modified to include modular local and remote scheduling for data processing jobs. The pipeline can now be run on a single PC or server, a local cluster, a remote HPC cluster, and/or the Amazon Web Services (AWS) cloud. We provide public images that allow easy deployment of CPFP in its entirety in the AWS cloud. This significantly reduces the effort necessary to use the software, and allows proteomics laboratories to pay for compute time ad hoc, rather than obtaining and maintaining expensive local server clusters. Alternatively the Amazon cloud can be used to increase the throughput of a local installation of CPFP as necessary. We demonstrate that cloud CPFP allows users to process data at higher speed than local installations but with similar cost and lower staff requirements. In addition to the computational improvements, the web interface to CPFP is simplified, and other functionalities are enhanced. The software is under active development at two leading institutions and continues to be released under an open-source license at http://cpfp.sourceforge.net.",2012-10-29 +23243115,Cohort profile: the China Health and Retirement Longitudinal Study (CHARLS).,"The China Health and Retirement Longitudinal Study (CHARLS) is a nationally representative longitudinal survey of persons in China 45 years of age or older and their spouses, including assessments of social, economic, and health circumstances of community-residents. CHARLS examines health and economic adjustments to rapid ageing of the population in China. The national baseline survey for the study was conducted between June 2011 and March 2012 and involved 17 708 respondents. CHARLS respondents are followed every 2 years, using a face-to-face computer-assisted personal interview (CAPI). Physical measurements are made at every 2-year follow-up, and blood sample collection is done once in every two follow-up periods. A pilot survey for CHARLS was conducted in two provinces of China in 2008, on 2685 individuals, who were resurveyed in 2012. To ensure the adoption of best practices and international comparability of results, CHARLS was harmonized with leading international research studies in the Health and Retirement Study (HRS) model. Requests for collaborations should be directed to Dr Yaohui Zhao (yhzhao@nsd.edu.cn). All data in CHARLS are maintained at the National School of Development of Peking University and will be accessible to researchers around the world at the study website. The 2008 pilot data for CHARLS are available at: http://charls.ccer.edu.cn/charls/. National baseline data for the study are expected to be released in January 2013.",2012-12-12 +23531787,EDGE-pro: Estimated Degree of Gene Expression in Prokaryotic Genomes.,"

Background

The expression levels of bacterial genes can be measured directly using next-generation sequencing (NGS) methods, offering much greater sensitivity and accuracy than earlier, microarray-based methods. Most bioinformatics software for estimating levels of gene expression from NGS data has been designed for eukaryotic genomes, with algorithms focusing particularly on detection of splicing patterns. These methods do not perform well on bacterial genomes.

Results

Here we describe the first software system designed explicitly for quantifying the degree of gene expression in bacteria and other prokaryotes. EDGE-pro (Estimated Degree of Gene Expression in PROkaryotes) processes the raw data from an RNA-seq experiment on a bacterial or archaeal species and produces estimates of the expression levels for each gene in these gene-dense genomes.

Software

The EDGE-pro tool is implemented as a pipeline of C++ and Perl programs and is freely available as open-source code at http://www.genomics.jhu.edu/software/EDGE/index.shtml.",2013-03-10 +22519468,IsoQuant: a software tool for stable isotope labeling by amino acids in cell culture-based mass spectrometry quantitation.,"Accurate protein identification and quantitation are critical when interpreting the biological relevance of large-scale shotgun proteomics data sets. Although significant technical advances in peptide and protein identification have been made, accurate quantitation of high-throughput data sets remains a key challenge in mass spectrometry data analysis and is a labor intensive process for many proteomics laboratories. Here, we report a new SILAC-based proteomics quantitation software tool, named IsoQuant, which is used to process high mass accuracy mass spectrometry data. IsoQuant offers a convenient quantitation framework to calculate peptide/protein relative abundance ratios. At the same time, it also includes a visualization platform that permits users to validate the quality of SILAC peptide and protein ratios. The program is written in the C# programming language under the Microsoft .NET framework version 4.0 and has been tested to be compatible with both 32-bit and 64-bit Windows 7. It is freely available to noncommercial users at http://www.proteomeumb.org/MZw.html .",2012-05-03 +23323831,GSVA: gene set variation analysis for microarray and RNA-seq data.,"

Background

Gene set enrichment (GSE) analysis is a popular framework for condensing information from gene expression profiles into a pathway or signature summary. The strengths of this approach over single gene analysis include noise and dimension reduction, as well as greater biological interpretability. As molecular profiling experiments move beyond simple case-control studies, robust and flexible GSE methodologies are needed that can model pathway activity within highly heterogeneous data sets.

Results

To address this challenge, we introduce Gene Set Variation Analysis (GSVA), a GSE method that estimates variation of pathway activity over a sample population in an unsupervised manner. We demonstrate the robustness of GSVA in a comparison with current state of the art sample-wise enrichment methods. Further, we provide examples of its utility in differential pathway activity and survival analysis. Lastly, we show how GSVA works analogously with data from both microarray and RNA-seq experiments.

Conclusions

GSVA provides increased power to detect subtle pathway activity changes over a sample population in comparison to corresponding methods. While GSE methods are generally regarded as end points of a bioinformatic analysis, GSVA constitutes a starting point to build pathway-centric models of biology. Moreover, GSVA contributes to the current need of GSE methods for RNA-seq data. GSVA is an open source software package for R which forms part of the Bioconductor project and can be downloaded at http://www.bioconductor.org.",2013-01-16 +23282032,A Bayesian decision fusion approach for microRNA target prediction.,"MicroRNAs (miRNAs) are 19-25 nucleotides non-coding RNAs known to have important post-transcriptional regulatory functions. The computational target prediction algorithm is vital to effective experimental testing. However, since different existing algorithms rely on different features and classifiers, there is a poor agreement among the results of different algorithms. To benefit from the advantages of different algorithms, we proposed an algorithm called BCmicrO that combines the prediction of different algorithms with Bayesian Network. BCmicrO was evaluated using the training data and the proteomic data. The results show that BCmicrO improves both the sensitivity and the specificity of each individual algorithm. All the related materials including genome-wide prediction of human targets and a web-based tool are available at http://compgenomics.utsa.edu/gene/gene_1.php.",2012-12-17 +24078374,"Levels of advertised unprotected vaginal and oral sex by independent indoor female sex workers in West Yorkshire, UK.","

Objectives

To assess the proportion of independent indoor female sex workers (FSW) in West Yorkshire, UK who advertise unprotected sex, and to investigate any association with cost, location and provision of anal sex.

Methods

Data on whether independent indoor FSW (defined as those not advertising via an escort agency or through a parlour) advertised unprotected sexual services, along with demographic data, were collected from 462 advertisement profiles of FSW in West Yorkshire from the website http://www.adultwork.com. Independent t test and χ(2) statistics were used to test the association between advertised unprotected vaginal and oral sex, and FSW age, cost of services, location and whether they advertised anal sex.

Results

Unprotected vaginal sex was advertised by 8% of FSW, and unprotected oral sex by 74% of FSW. FSW advertising unprotected vaginal sex were more likely to live in Wakefield and Bradford than in Leeds, had significantly lower hourly rates, and were more likely to advertise anal sex.

Conclusions

Advertised condom use for vaginal and oral sex by independent indoor FSW in West Yorkshire was significantly lower than reported rates of protected sex found in previous studies based in London and the south of England. The advertisement of unprotected vaginal sex is associated with factors such as lower hourly rates and the advertisement of higher risk anal sex, which may signify greater economic need. FSW offering unprotected sex therefore represent an at-risk target group for health promotion.",2013-09-27 +25953014,Prediction of in-hospital mortality after ruptured abdominal aortic aneurysm repair using an artificial neural network.,"

Objective

Ruptured abdominal aortic aneurysm (rAAA) carries a high mortality rate, even with prompt transfer to a medical center. An artificial neural network (ANN) is a computational model that improves predictive ability through pattern recognition while continually adapting to new input data. The goal of this study was to effectively use ANN modeling to provide vascular surgeons a discriminant adjunct to assess the likelihood of in-hospital mortality on a pending rAAA admission using easily obtainable patient information from the field.

Methods

Of 332 total patients from a single institution from 1998 to 2013 who had attempted rAAA repair, 125 were reviewed for preoperative factors associated with in-hospital mortality; 108 patients received an open operation, and 17 patients received endovascular repair. Five variables were found significant on multivariate analysis (P < .05), and four of these five (preoperative shock, loss of consciousness, cardiac arrest, and age) were modeled by multiple logistic regression and an ANN. These predictive models were compared against the Glasgow Aneurysm Score. All models were assessed by generation of receiver operating characteristic curves and actual vs predicted outcomes plots, with area under the curve and Pearson r(2) value as the primary measures of discriminant ability.

Results

Of the 125 patients, 53 (42%) did not survive to discharge. Five preoperative factors were significant (P < .05) independent predictors of in-hospital mortality in multivariate analysis: advanced age, renal disease, loss of consciousness, cardiac arrest, and shock, although renal disease was excluded from the models. The sequential accumulation of zero to four of these risk factors progressively increased overall mortality rate, from 11% to 16% to 44% to 76% to 89% (age ≥ 70 years considered a risk factor). Algorithms derived from multiple logistic regression, ANN, and Glasgow Aneurysm Score models generated area under the curve values of 0.85 ± 0.04, 0.88 ± 0.04 (training set), and 0.77 ± 0.06 and Pearson r(2) values of .36, .52 and .17, respectively. The ANN model represented the most discriminant of the three.

Conclusions

An ANN-based predictive model may represent a simple, useful, and highly discriminant adjunct to the vascular surgeon in accurately identifying those patients who may carry a high mortality risk from attempted repair of rAAA, using only easily definable preoperative variables. Although still requiring external validation, our model is available for demonstration at https://redcap.vanderbilt.edu/surveys/?s=NN97NM7DTK.",2015-05-05 +25102856,Cohort profile: The Cork BASELINE Birth Cohort Study: Babies after SCOPE: Evaluating the Longitudinal Impact on Neurological and Nutritional Endpoints.,"

Unlabelled

The Cork BASELINE Birth Cohort Study (Babies After

Scope

Evaluating the Longitudinal Impact on Neurological and Nutritional Endpoints) was established with three main objectives: to investigate the effects of intrauterine growth restriction and early nutrition on metabolic health and neurodevelopment; to ascertain the incidence and determinants of food allergy and eczema in early childhood; and to describe early infant feeding, supplementation and nutritional status and their effects on physical and neurological growth and health outcomes. The SCOPE Ireland pregnancy cohort formed the basis of recruitment of infants to BASELINE [n 1537] and an additional 600 infants were recruited after delivery providing a total sample of 2137 between 2008 and 2011. Assessments were at day 2 and at 2, 6, 12 and 24 months, with 5-year assessments ongoing. Blood and DNA samples were biobanked at 15 and 20 weeks' gestation, birth, 24 months and 5 years. Body composition data were collected at 2 days and 2 months (air-displacement plethysmography) and at 5 years (dual-energy X-ray absorptiometry). Trans-epidermal water loss was measured at 2 days, 2, 6 and 24 months. Detailed dietary and validated developmental assessments were conducted at 24 months. Researchers interested in collaboration can contact [baseline@ucc.ie] and further information be found at [http://www.baselinestudy.net/ or http://www.birthcohorts.net/].",2014-08-07 +24778108,Power analysis and sample size estimation for sequence-based association studies.,"

Motivation

Statistical methods have been developed to test for complex trait rare variant (RV) associations, in which variants are aggregated across a region, which is typically a gene. Power analysis and sample size estimation for sequence-based RV association studies are challenging because of the necessity to realistically model the underlying allelic architecture of complex diseases within a suitable analytical framework to assess the performance of a variety of RV association methods in an unbiased manner.

Summary

We developed SEQPower, a software package to perform statistical power analysis for sequence-based association data under a variety of genetic variant and disease phenotype models. It aids epidemiologists in determining the best study design, sample size and statistical tests for sequence-based association studies. It also provides biostatisticians with a platform to fairly compare RV association methods and to validate and assess novel association tests.

Availability and implementation

The SEQPower program, source code, multi-platform executables, documentation, list of association tests, examples and tutorials are available at http://bioinformatics.org/spower.",2014-04-28 +22962478,Trajectory-oriented Bayesian experiment design versus Fisher A-optimal design: an in depth comparison study.,"

Motivation

Experiment design strategies for biomedical models with the purpose of parameter estimation or model discrimination are in the focus of intense research. Experimental limitations such as sparse and noisy data result in unidentifiable parameters and render-related design tasks challenging problems. Often, the temporal resolution of data is a limiting factor and the amount of possible experimental interventions is finite. To address this issue, we propose a Bayesian experiment design algorithm to minimize the prediction uncertainty for a given set of experiments and compare it to traditional A-optimal design.

Results

In an in depth numerical study involving an ordinary differential equation model of the trans-Golgi network with 12 partly non-identifiable parameters, we minimized the prediction uncertainty efficiently for predefined scenarios. The introduced method results in twice the prediction precision as the same amount of A-optimal designed experiments while introducing a useful stopping criterion. The simulation intensity of the algorithm's major design step is thereby reasonably affordable. Besides smaller variances in the predicted trajectories compared with Fisher design, we could also achieve smaller parameter posterior distribution entropies, rendering this method superior to A-optimal Fisher design also in the parameter space.

Availability

Necessary software/toolbox information are available in the supplementary material. The project script including example data can be downloaded from http://www.ist.uni-stuttgart.de/%7eweber/BayesFisher2012.

Contact

patrick.weber@ist.uni-stuttgart.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-01 +24750536,Exploiting extension bias in polymerase chain reaction to improve primer specificity in ensembles of nearly identical DNA templates.,"We describe a semi-empirical framework that combines thermodynamic models of primer hybridization with experimentally determined elongation biases introduced by 3'-end mismatches for improving polymerase chain reaction (PCR)-based sequence discrimination. The framework enables rational and automatic design of primers for optimal targeting of one or more sequences in ensembles of nearly identical DNA templates. In situations where optimal targeting is not feasible, the framework accurately predicts non-target sequences that are difficult to distinguish with PCR alone. Based on the synergistic effects of disparate sources of PCR bias, we used our framework to robustly distinguish between two alleles that differ by a single base pair. To demonstrate the applicability to environmental microbiology, we designed primers specific to all recognized archaeal and bacterial genera in the Ribosomal Database Project, and have made these primers available online. We applied these primers experimentally to obtain genus-specific amplification of 16S rRNA genes representing minor constituents of an environmental DNA sample. Our results demonstrate that inherent PCR biases can be reliably employed in an automatic fashion to maximize sequence discrimination and accurately identify potential cross-amplifications. We have made our framework accessible online as a programme for designing primers targeting one group of sequences in a set with many other sequences (http://DECIPHER.cee.wisc.edu).",2013-09-24 +25104813,A method for de novo nucleic acid diagnostic target discovery.,"

Motivation

A proper target or marker is essential in any diagnosis (e.g. an infection or cancer). An ideal diagnostic target should be both conserved in and unique to the pathogen. Currently, these targets can only be identified manually, which is time-consuming and usually error-prone. Because of the increasingly frequent occurrences of emerging epidemics and multidrug-resistant 'superbugs', a rapid diagnostic target identification process is needed.

Results

A new method that can identify uniquely conserved regions (UCRs) as candidate diagnostic targets for a selected group of organisms solely from their genomic sequences has been developed and successfully tested. Using a sequence-indexing algorithm to identify UCRs and a k-mer integer-mapping model for computational efficiency, this method has successfully identified UCRs within the bacteria domain for 15 test groups, including pathogenic, probiotic, commensal and extremophilic bacterial species or strains. Based on the identified UCRs, new diagnostic primer sets were designed, and their specificity and efficiency were tested by polymerase chain reaction amplifications from both pure isolates and samples containing mixed cultures.

Availability and implementation

The UCRs identified for the 15 bacterial species are now freely available at http://ucr.synblex.com. The source code of the programs used in this study is accessible at http://ucr.synblex.com/bacterialIdSourceCode.d.zip

Contact

yazhousun@synblex.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-08-07 +24947750,Omega: an overlap-graph de novo assembler for metagenomics.,"

Motivation

Metagenomic sequencing allows reconstruction of microbial genomes directly from environmental samples. Omega (overlap-graph metagenome assembler) was developed for assembling and scaffolding Illumina sequencing data of microbial communities.

Results

Omega found overlaps between reads using a prefix/suffix hash table. The overlap graph of reads was simplified by removing transitive edges and trimming short branches. Unitigs were generated based on minimum cost flow analysis of the overlap graph and then merged to contigs and scaffolds using mate-pair information. In comparison with three de Bruijn graph assemblers (SOAPdenovo, IDBA-UD and MetaVelvet), Omega provided comparable overall performance on a HiSeq 100-bp dataset and superior performance on a MiSeq 300-bp dataset. In comparison with Celera on the MiSeq dataset, Omega provided more continuous assemblies overall using a fraction of the computing time of existing overlap-layout-consensus assemblers. This indicates Omega can more efficiently assemble longer Illumina reads, and at deeper coverage, for metagenomic datasets.

Availability and implementation

Implemented in C++ with source code and binaries freely available at http://omega.omicsbio.org.",2014-06-19 +21258066,TOMATOMA: a novel tomato mutant database distributing Micro-Tom mutant collections.,"The tomato is an excellent model for studies of plants bearing berry-type fruits and for experimental studies of the Solanaceae family of plants due to its conserved genetic organization. In this study, a comprehensive mutant tomato population was generated in the background of Micro-Tom, a dwarf, rapid-growth variety. In this and previous studies, a family including 8,598 and 6,422 M(2) mutagenized lines was produced by ethylmethane sulfonate (EMS) mutagenesis and γ-ray irradiation, and this study developed and investigated these M(2) plants for alteration of visible phenotypes. A total of 9,183 independent M(2) families comprising 91,830 M(2) plants were inspected for phenotypic alteration, and 1,048 individual mutants were isolated. Subsequently, the observed mutant phenotypes were classified into 15 major categories and 48 subcategories. Overall, 1,819 phenotypic categories were found in 1,048 mutants. Of these mutants, 549 were pleiotropic, whereas 499 were non-pleiotropic. Multiple different mutant alleles per locus were found in the mutant libraries, suggesting that the mutagenized populations were nearly saturated. Additionally, genetic analysis of backcrosses indicated the successful inheritance of the mutations in BC(1)F(2) populations, confirming the reproducibility in the morphological phenotyping of the M(2) plants. To integrate and manage the visible phenotypes of mutants and other associated data, we developed the in silico database TOMATOMA, a relational system interfacing modules between mutant line names and phenotypic categories. TOMATOMA is a freely accessible database, and these mutant recourses are available through the TOMATOMA (http://tomatoma.nbrp.jp/index.jsp).",2011-01-21 +24064416,BioServices: a common Python package to access biological Web Services programmatically.,"

Motivation

Web interfaces provide access to numerous biological databases. Many can be accessed to in a programmatic way thanks to Web Services. Building applications that combine several of them would benefit from a single framework.

Results

BioServices is a comprehensive Python framework that provides programmatic access to major bioinformatics Web Services (e.g. KEGG, UniProt, BioModels, ChEMBLdb). Wrapping additional Web Services based either on Representational State Transfer or Simple Object Access Protocol/Web Services Description Language technologies is eased by the usage of object-oriented programming.

Availability and implementation

BioServices releases and documentation are available at http://pypi.python.org/pypi/bioservices under a GPL-v3 license.",2013-09-23 +25104515,Identifying causal variants at loci with multiple signals of association.,"Although genome-wide association studies have successfully identified thousands of risk loci for complex traits, only a handful of the biologically causal variants, responsible for association at these loci, have been successfully identified. Current statistical methods for identifying causal variants at risk loci either use the strength of the association signal in an iterative conditioning framework or estimate probabilities for variants to be causal. A main drawback of existing methods is that they rely on the simplifying assumption of a single causal variant at each risk locus, which is typically invalid at many risk loci. In this work, we propose a new statistical framework that allows for the possibility of an arbitrary number of causal variants when estimating the posterior probability of a variant being causal. A direct benefit of our approach is that we predict a set of variants for each locus that under reasonable assumptions will contain all of the true causal variants with a high confidence level (e.g., 95%) even when the locus contains multiple causal variants. We use simulations to show that our approach provides 20-50% improvement in our ability to identify the causal variants compared to the existing methods at loci harboring multiple causal variants. We validate our approach using empirical data from an expression QTL study of CHI3L2 to identify new causal variants that affect gene expression at this locus. CAVIAR is publicly available online at http://genetics.cs.ucla.edu/caviar/.",2014-08-07 +25095882,DNAApp: a mobile application for sequencing data analysis.,"

Summary

There have been numerous applications developed for decoding and visualization of ab1 DNA sequencing files for Windows and MAC platforms, yet none exists for the increasingly popular smartphone operating systems. The ability to decode sequencing files cannot easily be carried out using browser accessed Web tools. To overcome this hurdle, we have developed a new native app called DNAApp that can decode and display ab1 sequencing file on Android and iOS. In addition to in-built analysis tools such as reverse complementation, protein translation and searching for specific sequences, we have incorporated convenient functions that would facilitate the harnessing of online Web tools for a full range of analysis. Given the high usage of Android/iOS tablets and smartphones, such bioinformatics apps would raise productivity and facilitate the high demand for analyzing sequencing data in biomedical research.

Availability and implementation

The Android version of DNAApp is available in Google Play Store as 'DNAApp', and the iOS version is available in the App Store. More details on the app can be found at www.facebook.com/APDLab; www.bii.a-star.edu.sg/research/trd/apd.php The DNAApp user guide is available at http://tinyurl.com/DNAAppuser, and a video tutorial is available on Google Play Store and App Store, as well as on the Facebook page.

Contact

samuelg@bii.a-star.edu.sg.",2014-08-05 +22543369,Optimal simultaneous superpositioning of multiple structures with missing data.,"

Motivation

Superpositioning is an essential technique in structural biology that facilitates the comparison and analysis of conformational differences among topologically similar structures. Performing a superposition requires a one-to-one correspondence, or alignment, of the point sets in the different structures. However, in practice, some points are usually 'missing' from several structures, for example, when the alignment contains gaps. Current superposition methods deal with missing data simply by superpositioning a subset of points that are shared among all the structures. This practice is inefficient, as it ignores important data, and it fails to satisfy the common least-squares criterion. In the extreme, disregarding missing positions prohibits the calculation of a superposition altogether.

Results

Here, we present a general solution for determining an optimal superposition when some of the data are missing. We use the expectation-maximization algorithm, a classic statistical technique for dealing with incomplete data, to find both maximum-likelihood solutions and the optimal least-squares solution as a special case.

Availability and implementation

The methods presented here are implemented in THESEUS 2.0, a program for superpositioning macromolecular structures. ANSI C source code and selected compiled binaries for various computing platforms are freely available under the GNU open source license from http://www.theseus3d.org.

Contact

dtheobald@brandeis.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-04-27 +22570414,RPF: a quality assessment tool for protein NMR structures.,"We describe the RPF web server, a quality assessment tool for protein NMR structures. The RPF server measures the 'goodness-of-fit' of the 3D structure with NMR chemical shift and unassigned NOESY data, and calculates a discrimination power (DP) score, which estimates the differences between the fits of the query structures and random coil structures to these experimental data. The DP-score is an accuracy predictor of the query structure. The RPF server also maps local structure quality measures onto the 3D structure using an online molecular viewer, and onto the NMR spectra, allowing refinement of the structure and/or NOESY peak list data. The RPF server is available at: http://nmr.cabm.rutgers.edu/rpf.",2012-05-08 +24675726,The Geogenomic Mutational Atlas of Pathogens (GoMAP) web system.,"We present a new approach for pathogen surveillance we call Geogenomics. Geogenomics examines the geographic distribution of the genomes of pathogens, with a particular emphasis on those mutations that give rise to drug resistance. We engineered a new web system called Geogenomic Mutational Atlas of Pathogens (GoMAP) that enables investigation of the global distribution of individual drug resistance mutations. As a test case we examined mutations associated with HIV resistance to FDA-approved antiretroviral drugs. GoMAP-HIV makes use of existing public drug resistance and HIV protein sequence data to examine the distribution of 872 drug resistance mutations in ∼ 502,000 sequences for many countries in the world. We also implemented a broadened classification scheme for HIV drug resistance mutations. Several patterns for geographic distributions of resistance mutations were identified by visual mining using this web tool. GoMAP-HIV is an open access web application available at http://www.bio-toolkit.com/GoMap/project/",2014-03-27 +25349151,Incident disability in older adults: prediction models based on two British prospective cohort studies.,"

Objective

To develop and validate a prediction model for incident locomotor disability after 7 years in older adults.

Setting

Prospective British cohort studies: British Women's Heart and Health Study (BWHHS) for development and the English Longitudinal Study of Ageing (ELSA) for validation.

Subjects

Community-dwelling older adults.

Methods

Multivariable logistic regression models after selection of predictors with backward elimination. Model performance was assessed using metrics of discrimination and calibration. Models were internally and externally validated.

Results

Locomotor disability was reported in BWHHS by 861 of 1,786 (48%) women after 7 years. Age, a history of arthritis and low physical activity levels were the most important predictors of locomotor disability. Models using routine measures as predictors had satisfactory calibration and discrimination (c-index 0.73). Addition of 31 blood markers did not increase the predictive performance. External validation in ELSA showed reduced discrimination (c-index 0.65) and an underestimation of disability risks. A web-based calculator for locomotor disability is available (http://www.sealedenvelope.com/trials/bwhhsmodel/).

Conclusions

We developed and externally validated a prediction model for incident locomotor disability in older adults based on routine measures available to general practitioners, patients and public health workers, and showed an adequate discrimination. Addition of blood markers from major biological pathways did not improve the performance of the model. Further replication in additional data sets may lead to further enhancement of the current model.",2014-10-27 +23097419,Simultaneous alignment and clustering of peptide data using a Gibbs sampling approach.,"

Motivation

Proteins recognizing short peptide fragments play a central role in cellular signaling. As a result of high-throughput technologies, peptide-binding protein specificities can be studied using large peptide libraries at dramatically lower cost and time. Interpretation of such large peptide datasets, however, is a complex task, especially when the data contain multiple receptor binding motifs, and/or the motifs are found at different locations within distinct peptides.

Results

The algorithm presented in this article, based on Gibbs sampling, identifies multiple specificities in peptide data by performing two essential tasks simultaneously: alignment and clustering of peptide data. We apply the method to de-convolute binding motifs in a panel of peptide datasets with different degrees of complexity spanning from the simplest case of pre-aligned fixed-length peptides to cases of unaligned peptide datasets of variable length. Example applications described in this article include mixtures of binders to different MHC class I and class II alleles, distinct classes of ligands for SH3 domains and sub-specificities of the HLA-A*02:01 molecule.

Availability

The Gibbs clustering method is available online as a web server at http://www.cbs.dtu.dk/services/GibbsCluster.",2012-10-24 +23813014,A high-throughput framework to detect synapses in electron microscopy images.,"

Motivation

Synaptic connections underlie learning and memory in the brain and are dynamically formed and eliminated during development and in response to stimuli. Quantifying changes in overall density and strength of synapses is an important pre-requisite for studying connectivity and plasticity in these cases or in diseased conditions. Unfortunately, most techniques to detect such changes are either low-throughput (e.g. electrophysiology), prone to error and difficult to automate (e.g. standard electron microscopy) or too coarse (e.g. magnetic resonance imaging) to provide accurate and large-scale measurements.

Results

To facilitate high-throughput analyses, we used a 50-year-old experimental technique to selectively stain for synapses in electron microscopy images, and we developed a machine-learning framework to automatically detect synapses in these images. To validate our method, we experimentally imaged brain tissue of the somatosensory cortex in six mice. We detected thousands of synapses in these images and demonstrate the accuracy of our approach using cross-validation with manually labeled data and by comparing against existing algorithms and against tools that process standard electron microscopy images. We also used a semi-supervised algorithm that leverages unlabeled data to overcome sample heterogeneity and improve performance. Our algorithms are highly efficient and scalable and are freely available for others to use.

Availability

Code is available at http://www.cs.cmu.edu/∼saketn/detect_synapses/",2013-07-01 +25568279,"GlycoMine: a machine learning-based approach for predicting N-, C- and O-linked glycosylation in the human proteome.","

Motivation

Glycosylation is a ubiquitous type of protein post-translational modification (PTM) in eukaryotic cells, which plays vital roles in various biological processes (BPs) such as cellular communication, ligand recognition and subcellular recognition. It is estimated that >50% of the entire human proteome is glycosylated. However, it is still a significant challenge to identify glycosylation sites, which requires expensive/laborious experimental research. Thus, bioinformatics approaches that can predict the glycan occupancy at specific sequons in protein sequences would be useful for understanding and utilizing this important PTM.

Results

In this study, we present a novel bioinformatics tool called GlycoMine, which is a comprehensive tool for the systematic in silico identification of C-linked, N-linked, and O-linked glycosylation sites in the human proteome. GlycoMine was developed using the random forest algorithm and evaluated based on a well-prepared up-to-date benchmark dataset that encompasses all three types of glycosylation sites, which was curated from multiple public resources. Heterogeneous sequences and functional features were derived from various sources, and subjected to further two-step feature selection to characterize a condensed subset of optimal features that contributed most to the type-specific prediction of glycosylation sites. Five-fold cross-validation and independent tests show that this approach significantly improved the prediction performance compared with four existing prediction tools: NetNGlyc, NetOGlyc, EnsembleGly and GPP. We demonstrated that this tool could identify candidate glycosylation sites in case study proteins and applied it to identify many high-confidence glycosylation target proteins by screening the entire human proteome.

Availability and implementation

The webserver, Java Applet, user instructions, datasets, and predicted glycosylation sites in the human proteome are freely available at http://www.structbioinfor.org/Lab/GlycoMine/.

Contact

Jiangning.Song@monash.edu or James.Whisstock@monash.edu or zhangyang@nwsuaf.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2015-01-06 +23609545,OrganellarGenomeDRAW--a suite of tools for generating physical maps of plastid and mitochondrial genomes and visualizing expression data sets.,"Mitochondria and plastids (chloroplasts) are cell organelles of endosymbiotic origin that possess their own genetic information. Most organellar DNAs map as circular double-stranded genomes. Across the eukaryotic kingdom, organellar genomes display great size variation, ranging from ∼15 to 20 kb (the size of the mitochondrial genome in most animals) to >10 Mb (the size of the mitochondrial genome in some lineages of flowering plants). We have developed OrganellarGenomeDraw (OGDRAW), a suite of software tools that enable users to create high-quality visual representations of both circular and linear annotated genome sequences provided as GenBank files or accession numbers. Although all types of DNA sequences are accepted as input, the software has been specifically optimized to properly depict features of organellar genomes. A recent extension facilitates the plotting of quantitative gene expression data, such as transcript or protein abundance data, directly onto the genome map. OGDRAW has already become widely used and is available as a free web tool (http://ogdraw.mpimp-golm.mpg.de/). The core processing components can be downloaded as a Perl module, thus also allowing for convenient integration into custom processing pipelines.",2013-04-22 +24852251,StemCellNet: an interactive platform for network-oriented investigations in stem cell biology.,"Stem cells are characterized by their potential for self-renewal and their capacity to differentiate into mature cells. These two key features emerge through the interplay of various factors within complex molecular networks. To provide researchers with a dedicated tool to investigate these networks, we have developed StemCellNet, a versatile web server for interactive network analysis and visualization. It rapidly generates focused networks based on a large collection of physical and regulatory interactions identified in human and murine stem cells. The StemCellNet web-interface has various easy-to-use tools for selection and prioritization of network components, as well as for integration of expression data provided by the user. As a unique feature, the networks generated can be screened against a compendium of stemness-associated genes. StemCellNet can also indicate novel candidate genes by evaluating their connectivity patterns. Finally, an optional dataset of generic interactions, which provides large coverage of the human and mouse proteome, extends the versatility of StemCellNet to other biomedical research areas in which stem cells play important roles, such as in degenerative diseases or cancer. The StemCellNet web server is freely accessible at http://stemcellnet.sysbiolab.eu.",2014-05-22 +24048906,Mutation analysis of PAH gene in patients with PKU in western Iran and its association with polymorphisms: identification of four novel mutations.,"Phenylketonuria (PKU) is an autosomal recessive disorder characterized by a mutation in the phenylalanine hydroxylase (PAH) gene. Untreated PKU can lead to mental retardation, seizures, and other serious medical problems. This study was designed to investigate the status of molecular defects in the PAH gene and their association with polymorphisms in Kurdish patients with PKU in the Kermanshah province, western Iran. The study was conducted on 27 unrelated patients with PKU over a 2-year period (from 2010 to 2012). All 13 exons plus exon-intron boundaries of the PAH gene were analyzed and we identified 15 different mutations, including two novel mutations, in 51 of the 54 mutant alleles (diagnostic efficiency of 94.4 %). IVS4 + 1G > C (c.441 + 1G > C) and IVS7 - 5 T > C (c.843 - 5 T > C) are novel mutations that have not been reported in the academic literature or the PAH locus database ( http://www.pahdb.mcgill.ca ); therefore, they may be specific to the Kurdish population. IVS2 + 5G > C and IVS9 + 5G > A were the two most prevalent mutations in our sample, with frequencies of 26 % and 17 %, respectively. The second most common mutations were p.R261X, IVS10 - 11G > A, p.K363 > Nfs and IVS7 - 5 T > C, with each showing a relative frequency of 7.4 %. All other detected mutations, including p.F55 > Lfs, p.R176X, p.R243Q, p.V230I, p.R243X, p.R261Q, IVS8 - 7A > G and p.E390G had frequencies of less than 4 %. The present study showed that there is a distinct difference in the characteristics of PAH mutations between the Kermanshah province and other parts of Iran, suggesting that Kermanshah may have a unique population distribution of PAH gene mutations. Iran lies on the route of major ancient movements of the Caucasian people toward the Mediterranean basin, and Kermanshah has previously been called the gateway to Asia. Most of the mutations identified in this study are common in the Mediterranean region. Therefore, our findings are consistent with the historical and geographical links between the Iranian population and the populations of Mediterranean region.",2013-09-19 +25144544,Update on recommendations for use of herpes zoster vaccine.,"Herpes zoster vaccine (Zostavax [Merck & Co., Inc.]) was licensed in 2006 and recommended by the Advisory Committee on Immunization Practices (ACIP) in 2008 for prevention of herpes zoster (shingles) and its complications among adults aged ≥60 years. The Food and Drug Administration (FDA) approved the use of Zostavax in 2011 for adults aged 50 through 59 years based on a large study of safety and efficacy in this age group. ACIP initially considered the use of herpes zoster vaccine among adults aged 50 through 59 years in June 2011, but declined to recommend the vaccine in this age group, citing shortages of Zostavax and limited data on long-term protection afforded by herpes zoster vaccine. In October 2013, ACIP reviewed the epidemiology of herpes zoster and its complications, herpes zoster vaccine supply, short-term vaccine efficacy in adults aged 50 through 59 years, short- and long- term vaccine efficacy and effectiveness in adults aged ≥60 years, an updated cost-effectiveness analysis, and deliberations of the ACIP herpes zoster work group, all of which are summarized in this report. No vote was taken, and ACIP maintained its current recommendation that herpes zoster vaccine be routinely recommended for adults aged ≥60 years. Meeting minutes are available at http://www.cdc.gov/vaccines/acip/meetings/meetings-info.html.",2014-08-01 +24848016,PASTA 2.0: an improved server for protein aggregation prediction.,"The formation of amyloid aggregates upon protein misfolding is related to several devastating degenerative diseases. The propensities of different protein sequences to aggregate into amyloids, how they are enhanced by pathogenic mutations, the presence of aggregation hot spots stabilizing pathological interactions, the establishing of cross-amyloid interactions between co-aggregating proteins, all rely at the molecular level on the stability of the amyloid cross-beta structure. Our redesigned server, PASTA 2.0, provides a versatile platform where all of these different features can be easily predicted on a genomic scale given input sequences. The server provides other pieces of information, such as intrinsic disorder and secondary structure predictions, that complement the aggregation data. The PASTA 2.0 energy function evaluates the stability of putative cross-beta pairings between different sequence stretches. It was re-derived on a larger dataset of globular protein domains. The resulting algorithm was benchmarked on comprehensive peptide and protein test sets, leading to improved, state-of-the-art results with more amyloid forming regions correctly detected at high specificity. The PASTA 2.0 server can be accessed at http://protein.bio.unipd.it/pasta2/.",2014-05-21 +23504936,Using MEMo to discover mutual exclusivity modules in cancer.,"Although individual tumors show surprisingly diverse genomic alterations, these events tend to occur in a limited number of pathways, and alterations that affect the same pathway tend to not co-occur in the same patient. While pathway analysis has been a powerful tool in cancer genomics, our knowledge of oncogenic pathway modules is incomplete. To systematically identify such modules, we have developed a novel method, Mutual Exclusivity Modules in Cancer (MEMo). The method searches and identifies modules characterized by three properties: (1) member genes are recurrently altered across a set of tumor samples; (2) member genes are known to or are likely to participate in the same biological process; and (3) alteration events within the modules are mutually exclusive. MEMo integrates multiple data types and maps genomic alterations to biological pathways. MEMo's mutual exclusivity uses a statistical model that preserves the number of alterations per gene and per sample. The MEMo software, source code and sample data sets are available for download at: http://cbio.mskcc.org/memo.",2013-03-01 +24668168,The Kinesin-12 Kif15 is a processive track-switching tetramer.,"Kinesin-12 motors are a little studied branch of the kinesin superfamily with the human protein (Kif15) implicated in spindle mechanics and chromosome movement. In this study, we reconstitute full-length hKif15 and its microtubule-targeting factor hTpx2 in vitro to gain insight into the motors mode of operation. We reveal that hKif15 is a plus-end-directed processive homotetramer that can step against loads of up to 3.5 pN. We further show that hKif15 is the first kinesin that effectively switches microtubule tracks at intersections, enabling it to navigate microtubule networks, such as the spindle. hKif15 tetramers are also capable of cross-linking microtubules, but unexpectedly, this does not depend on hTpx2. Instead, we find that hTpx2 inhibits hKif15 stepping when microtubule-bound. Our data reveal that hKif15 is a second tetrameric spindle motor in addition to the kinesin-5 Eg5 and provides insight into the mechanisms by which hKif15 and its inhibitor hTpx2 modulate spindle microtubule architecture. DOI: http://dx.doi.org/10.7554/eLife.01724.001.",2014-03-25 +23988161,LeView: automatic and interactive generation of 2D diagrams for biomacromolecule/ligand interactions.,": 2D diagrams are widely used in the scientific literature to represent interactions between ligands and biomacromolecules. Such schematic diagrams are very helpful to better understand the chemical interactions and biological processes in which ligands are involved. Here, a new tool for automatic and interactive generation of 2D diagrams for biomacromolecule/ligand interactions is presented. LeView (Ligand-Environment Viewer) produces customised and high-quality figures, with a good compromise between a faithful representation of the 3D data (structures and interactions) and aesthetic criteria. LeView can be freely downloaded at http://www.pegase-biosciences.com/tools/leview/.",2013-08-29 +24056076,SPSSM8: an accurate approach for predicting eight-state secondary structures of proteins.,"Protein eight-state secondary structure prediction is challenging, but is necessary to determine protein structure and function. Here, we report the development of a novel approach, SPSSM8, to predict eight-state secondary structures of proteins accurately from sequences based on the structural position-specific scoring matrix (SPSSM). The SPSSM has been successfully utilized to predict three-state secondary structures. Now we employ an eight-state SPSSM as a feature that is obtained from sequence structure alignment against a large database of 9 million sequences with putative structural information. The SPSSM8 uses a low sequence identity dataset (9062 entries) as a training set and conditional random field for the classification algorithm. The SPSSM8 achieved an average eight-state secondary structure accuracy (Q8) of 71.7% (Q3, 81.6%) for an independent testing set (463 entries), which had an improved accuracy of 10.1% and 4.6% compared with SSPro8 and CNF, respectively, and significantly improved the accuracy of eight-state secondary structure prediction. For CASP 9 dataset (92 entries) the SPSSM8 achieved a Q8 accuracy of 80.1% (Q3, 83.0%). The SPSSM8 was confirmed as an outstanding predictor for eight-state secondary structures of proteins. SPSSM8 is freely available at http://cal.tongji.edu.cn/SPSSM8.",2013-09-18 +22819274,Point-of-care differentiation of Kawasaki disease from other febrile illnesses.,"

Objective

To test whether statistical learning on clinical and laboratory test patterns would lead to an algorithm for Kawasaki disease (KD) diagnosis that could aid clinicians.

Study design

Demographic, clinical, and laboratory data were prospectively collected for subjects with KD and febrile controls (FCs) using a standardized data collection form.

Results

Our multivariate models were trained with a cohort of 276 patients with KD and 243 FCs (who shared some features of KD) and validated with a cohort of 136 patients with KD and 121 FCs using either clinical data, laboratory test results, or their combination. Our KD scoring method stratified the subjects into subgroups with low (FC diagnosis, negative predictive value >95%), intermediate, and high (KD diagnosis, positive predictive value >95%) scores. Combining both clinical and laboratory test results, the algorithm diagnosed 81.2% of all training and 74.3% of all testing of patients with KD in the high score group and 67.5% of all training and 62.8% of all testing FCs in the low score group.

Conclusions

Our KD scoring metric and the associated data system with online (http://translationalmedicine.stanford.edu/cgi-bin/KD/kd.pl) and smartphone applications are easily accessible, inexpensive tools to improve the differentiation of most children with KD from FCs with other pediatric illnesses.",2012-07-20 +22655076,Reconsideration of in-silico siRNA design based on feature selection: a cross-platform data integration perspective.,"RNA interference via exogenous short interference RNAs (siRNA) is increasingly more widely employed as a tool in gene function studies, drug target discovery and disease treatment. Currently there is a strong need for rational siRNA design to achieve more reliable and specific gene silencing; and to keep up with the increasing needs for a wider range of applications. While progress has been made in the ability to design siRNAs with specific targets, we are clearly at an infancy stage towards achieving rational design of siRNAs with high efficacy. Among the many obstacles to overcome, lack of general understanding of what sequence features of siRNAs may affect their silencing efficacy and of large-scale homogeneous data needed to carry out such association analyses represents two challenges. To address these issues, we investigated a feature-selection based in-silico siRNA design from a novel cross-platform data integration perspective. An integration analysis of 4,482 siRNAs from ten meta-datasets was conducted for ranking siRNA features, according to their possible importance to the silencing efficacy of siRNAs across heterogeneous data sources. Our ranking analysis revealed for the first time the most relevant features based on cross-platform experiments, which compares favorably with the traditional in-silico siRNA feature screening based on the small samples of individual platform data. We believe that our feature ranking analysis can offer more creditable suggestions to help improving the design of siRNA with specific silencing targets. Data and scripts are available at http://csbl.bmb.uga.edu/publications/materials/qiliu/siRNA.html.",2012-05-24 +23143269,SynSysNet: integration of experimental data on synaptic protein-protein interactions with drug-target relations.,"We created SynSysNet, available online at http://bioinformatics.charite.de/synsysnet, to provide a platform that creates a comprehensive 4D network of synaptic interactions. Neuronal synapses are fundamental structures linking nerve cells in the brain and they are responsible for neuronal communication and information processing. These processes are dynamically regulated by a network of proteins. New developments in interaction proteomics and yeast two-hybrid methods allow unbiased detection of interactors. The consolidation of data from different resources and methods is important to understand the relation to human behaviour and disease and to identify new therapeutic approaches. To this end, we established SynSysNet from a set of ∼1000 synapse specific proteins, their structures and small-molecule interactions. For two-thirds of these, 3D structures are provided (from Protein Data Bank and homology modelling). Drug-target interactions for 750 approved drugs and 50 000 compounds, as well as 5000 experimentally validated protein-protein interactions, are included. The resulting interaction network and user-selected parts can be viewed interactively and exported in XGMML. Approximately 200 involved pathways can be explored regarding drug-target interactions. Homology-modelled structures are downloadable in Protein Data Bank format, and drugs are available as MOL-files. Protein-protein interactions and drug-target interactions can be viewed as networks; corresponding PubMed IDs or sources are given.",2012-11-11 +23497159,Promzea: a pipeline for discovery of co-regulatory motifs in maize and other plant species and its application to the anthocyanin and phlobaphene biosynthetic pathways and the Maize Development Atlas.,"

Background

The discovery of genetic networks and cis-acting DNA motifs underlying their regulation is a major objective of transcriptome studies. The recent release of the maize genome (Zea mays L.) has facilitated in silico searches for regulatory motifs. Several algorithms exist to predict cis-acting elements, but none have been adapted for maize.

Results

A benchmark data set was used to evaluate the accuracy of three motif discovery programs: BioProspector, Weeder and MEME. Analysis showed that each motif discovery tool had limited accuracy and appeared to retrieve a distinct set of motifs. Therefore, using the benchmark, statistical filters were optimized to reduce the false discovery ratio, and then remaining motifs from all programs were combined to improve motif prediction. These principles were integrated into a user-friendly pipeline for motif discovery in maize called Promzea, available at http://www.promzea.org and on the Discovery Environment of the iPlant Collaborative website. Promzea was subsequently expanded to include rice and Arabidopsis. Within Promzea, a user enters cDNA sequences or gene IDs; corresponding upstream sequences are retrieved from the maize genome. Predicted motifs are filtered, combined and ranked. Promzea searches the chosen plant genome for genes containing each candidate motif, providing the user with the gene list and corresponding gene annotations. Promzea was validated in silico using a benchmark data set: the Promzea pipeline showed a 22% increase in nucleotide sensitivity compared to the best standalone program tool, Weeder, with equivalent nucleotide specificity. Promzea was also validated by its ability to retrieve the experimentally defined binding sites of transcription factors that regulate the maize anthocyanin and phlobaphene biosynthetic pathways. Promzea predicted additional promoter motifs, and genome-wide motif searches by Promzea identified 127 non-anthocyanin/phlobaphene genes that each contained all five predicted promoter motifs in their promoters, perhaps uncovering a broader co-regulated gene network. Promzea was also tested against tissue-specific microarray data from maize.

Conclusions

An online tool customized for promoter motif discovery in plants has been generated called Promzea. Promzea was validated in silico by its ability to retrieve benchmark motifs and experimentally defined motifs and was tested using tissue-specific microarray data. Promzea predicted broader networks of gene regulation associated with the historic anthocyanin and phlobaphene biosynthetic pathways. Promzea is a new bioinformatics tool for understanding transcriptional gene regulation in maize and has been expanded to include rice and Arabidopsis.",2013-03-15 +22492649,Estimating the order of mutations during tumorigenesis from tumor genome sequencing data.,"

Motivation

Tumors are thought to develop and evolve through a sequence of genetic and epigenetic somatic alterations to progenitor cells. Early stages of human tumorigenesis are hidden from view. Here, we develop a method for inferring some aspects of the order of mutational events during tumorigenesis based on genome sequencing data for a set of tumors. This method does not assume that the sequence of driver alterations is the same for each tumor, but enables the degree of similarity or difference in the sequence to be evaluated.

Results

To evaluate the new method, we applied it to colon cancer tumor sequencing data and the results are consistent with the multi-step tumorigenesis model previously developed based on comparing stages of cancer. We then applied the new method to DNA sequencing data for a set of lung cancers. The model may be a useful tool for better understanding the process of tumorigenesis.

Availability

The software is available at: http://linus.nci.nih.gov/Data/YounA/OrderMutation.zip.",2012-04-06 +23449253,Using ProtMAX to create high-mass-accuracy precursor alignments from label-free quantitative mass spectrometry data generated in shotgun proteomics experiments.,"Recently, new software tools have been developed for improved protein quantification using mass spectrometry (MS) data. However, there are still limitations especially in high-sample-throughput quantification methods, and most of these relate to extensive computational calculations. The mass accuracy precursor alignment (MAPA) strategy has been shown to be a robust method for relative protein quantification. Its major advantages are high resolution, sensitivity and sample throughput. Its accuracy is data dependent and thus best suited for precursor mass-to-charge precision of ∼1 p.p.m. This protocol describes how to use a software tool (ProtMAX) that allows for the automated alignment of precursors from up to several hundred MS runs within minutes without computational restrictions. It comprises features for 'ion intensity count' and 'target search' of a distinct set of peptides. This procedure also includes the recommended MS settings for complex quantitative MAPA analysis using ProtMAX (http://www.univie.ac.at/mosys/software.html).",2013-02-28 +25344116,OvMark: a user-friendly system for the identification of prognostic biomarkers in publically available ovarian cancer gene expression datasets.,"

Background

Ovarian cancer has the lowest survival rate of all gynaecologic cancers and is characterised by a lack of early symptoms and frequent late stage diagnosis. There is a paucity of robust molecular markers that are independent of and complementary to clinical parameters such as disease stage and tumour grade.

Methods

We have developed a user-friendly, web-based system to evaluate the association of genes/miRNAs with outcome in ovarian cancer. The OvMark algorithm combines data from multiple microarray platforms (including probesets targeting miRNAs) and correlates them with clinical parameters (e.g. tumour grade, stage) and outcomes (disease free survival (DFS), overall survival). In total, OvMark combines 14 datasets from 7 different array platforms measuring the expression of ~17,000 genes and 341 miRNAs across 2,129 ovarian cancer samples.

Results

To demonstrate the utility of the system we confirmed the prognostic ability of 14 genes and 2 miRNAs known to play a role in ovarian cancer. Of these genes, CXCL12 was the most significant predictor of DFS (HR = 1.42, p-value = 2.42x10-6). Surprisingly, those genes found to have the greatest correlation with outcome have not been heavily studied in ovarian cancer, or in some cases in any cancer. For instance, the three genes with the greatest association with survival are SNAI3, VWA3A and DNAH12.

Conclusions/impact

OvMark is a powerful tool for examining putative gene/miRNA prognostic biomarkers in ovarian cancer (available at http://glados.ucd.ie/OvMark/index.html). The impact of this tool will be in the preliminary assessment of putative biomarkers in ovarian cancer, particularly for research groups with limited bioinformatics facilities.",2014-10-24 +25142782,Effective discrimination between biologically relevant contacts and crystal packing contacts using new determinants.,"In the structural models determined by X-ray crystallography, contacts between molecules can be divided into two categories: biologically relevant contacts and crystal packing contacts. With the growth in the number and quality of available large crystal packing contacts structures, distinguishing crystal packing contacts from biologically relevant contacts remains a difficult task, which can lead to wrong interpretation of structural models. In this study, we performed a systematic analysis on the biologically relevant contacts and crystal packing contacts. The analysis results reveal that biologically contacts are more tightly packed than crystal packing contacts. This property of biologically contacts may contribute to the formation of their interfacial core region. Meanwhile, the differences between the core and surface region of biologically contacts in amino acid composition and evolutionary measure are more dramatic than crystal packing contacts and these differences appear to be useful in distinguishing these two categories of contacts. On the basis of the features derived from our analysis, we developed a random forest model to classify biological relevant contacts and crystal packing contacts. Our method can achieve a high receiver operating curve of 0.923 in the 5-fold cross-validation and accuracies of 91.4% and 91.7% for two different test sets. Moreover, in a comparison study, our model outperforms other existing methods, such as DiMoVo, Pita, Pisa, and Eppic. We believe that this study will provide useful help in the validation of oligomeric proteins and protein complexes. The model and all data used in this paper are freely available at http://cic.scu.edu.cn/bioinformatics/bio-cry.zip.",2014-09-13 +30731835,First Report of Embellisia allii Causing Skin Blotch and Bulb Canker on Garlic in California.,"In April 2011, commercial garlic (Allium sativum) in Monterey County, CA showed symptoms of an undocumented disease. Bulb and stem sheaths were dark, decayed, and sloughing off the plants. Dissection of diseased sheaths revealed black hyphae between layers. Lower leaves wilted, turned tan, and dried up. Disease occurred in small patches scattered in two fields. In the patches, disease incidence was as much as 50%; however, overall field incidence was less than 1%. Isolations from 80% (16 of 20 plants) of collected plants resulted in the recovery of a dark olivaceous black fungus. Conidiophores were geniculate and brown and conidia were borne singly, brown, and ellipsoidal to cylindrical. Conidia had two to five but mostly three transverse septa. Longitudinal septa were infrequent and apical cells were rounded. Conidia measured (19.0-) 26.3 to 36.6 (-42.8) × (6.7-) 9.2 to 9.9 (-12.9) μm. Dark, intercalary chlamydospores were observed as colonies aged. DNA sequencing of the internal transcribed spacer (ITS) regions of four, single-spored isolates was completed with primers ITS1 and ITS4 (3). Sequences of all isolates (GenBank Nos. JN588614 to JN588617) were identical and 100% similar to Embellisia allii (AY278840). On the basis of morphological and molecular data, the fungus was identified as E. allii (Campanile) Simmons (1). Pathogenicity of four of the sequenced E. allii isolates and one additional E. allii isolate was tested using inoculum grown on acidified potato dextrose agar and garlic (cv. California Late) planted into 15-cm pots. A transverse incision was made at a point 2 cm above the garlic bulb so that a colonized agar plug could be inserted between the second and third sheath layer. The stem was then wrapped with Parafilm. Ten plants per isolate were inoculated and kept in a greenhouse (24 to 26°C). Seven to eight days after inoculation, the tissue around the incision turned tan and dark fungal growth was observed. Fourteen days after inoculation, the inoculated area was necrotic and dark fungal growth developed between stem layers. E. allii was reisolated from all inoculated plants and matched the morphological characteristics of the original isolates. Control plants, inoculated with uncolonized agar plugs, developed no symptoms. This experiment was repeated with similar results. In addition, one isolate was used to inoculate leek (A. porrum cv. Lancelot) and onion (A. cepa cv. Evergreen). Similar symptoms developed on these two species and E. allii was reisolated from all plants. To our knowledge, this is the first documentation of skin blotch and bulb canker caused by E. allii on garlic in California. Affected plants were of poor quality and could not be harvested. Our findings that garlic isolates of E. allii can infect leek and onion provide preliminary evidence that this pathogen is not restricted to garlic; this information may be useful to growers when considering crop rotations. E. allii has been reported on garlic in a number of places in Africa, Asia, Europe, the Middle East, and North and South America (2). The sequenced E. allii isolates are deposited in the fungal collection at the CDFA Plant Pest Diagnostics Lab (CDFA798-801). References: (1) J. C. David. Mycopathologia 116:59, 1991. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , August 8, 2011, (3) B. M. Pryor and D. M. Bigelow. Mycologia 95:1141, 2003.",2012-02-01 +22312429,NGS QC Toolkit: a toolkit for quality control of next generation sequencing data.,"Next generation sequencing (NGS) technologies provide a high-throughput means to generate large amount of sequence data. However, quality control (QC) of sequence data generated from these technologies is extremely important for meaningful downstream analysis. Further, highly efficient and fast processing tools are required to handle the large volume of datasets. Here, we have developed an application, NGS QC Toolkit, for quality check and filtering of high-quality data. This toolkit is a standalone and open source application freely available at http://www.nipgr.res.in/ngsqctoolkit.html. All the tools in the application have been implemented in Perl programming language. The toolkit is comprised of user-friendly tools for QC of sequencing data generated using Roche 454 and Illumina platforms, and additional tools to aid QC (sequence format converter and trimming tools) and analysis (statistics tools). A variety of options have been provided to facilitate the QC at user-defined parameters. The toolkit is expected to be very useful for the QC of NGS data to facilitate better downstream analysis.",2012-02-01 +24755290,Gating of neural error signals during motor learning.,"Cerebellar climbing fiber activity encodes performance errors during many motor learning tasks, but the role of these error signals in learning has been controversial. We compared two motor learning paradigms that elicited equally robust putative error signals in the same climbing fibers: learned increases and decreases in the gain of the vestibulo-ocular reflex (VOR). During VOR-increase training, climbing fiber activity on one trial predicted changes in cerebellar output on the next trial, and optogenetic activation of climbing fibers to mimic their encoding of performance errors was sufficient to implant a motor memory. In contrast, during VOR-decrease training, there was no trial-by-trial correlation between climbing fiber activity and changes in cerebellar output, and climbing fiber activation did not induce VOR-decrease learning. Our data suggest that the ability of climbing fibers to induce plasticity can be dynamically gated in vivo, even under conditions where climbing fibers are robustly activated by performance errors. DOI: http://dx.doi.org/10.7554/eLife.02076.001.",2014-04-22 +24066126,SurvExpress: an online biomarker validation tool and database for cancer gene expression data using survival analysis.,"Validation of multi-gene biomarkers for clinical outcomes is one of the most important issues for cancer prognosis. An important source of information for virtual validation is the high number of available cancer datasets. Nevertheless, assessing the prognostic performance of a gene expression signature along datasets is a difficult task for Biologists and Physicians and also time-consuming for Statisticians and Bioinformaticians. Therefore, to facilitate performance comparisons and validations of survival biomarkers for cancer outcomes, we developed SurvExpress, a cancer-wide gene expression database with clinical outcomes and a web-based tool that provides survival analysis and risk assessment of cancer datasets. The main input of SurvExpress is only the biomarker gene list. We generated a cancer database collecting more than 20,000 samples and 130 datasets with censored clinical information covering tumors over 20 tissues. We implemented a web interface to perform biomarker validation and comparisons in this database, where a multivariate survival analysis can be accomplished in about one minute. We show the utility and simplicity of SurvExpress in two biomarker applications for breast and lung cancer. Compared to other tools, SurvExpress is the largest, most versatile, and quickest free tool available. SurvExpress web can be accessed in http://bioinformatica.mty.itesm.mx/SurvExpress (a tutorial is included). The website was implemented in JSP, JavaScript, MySQL, and R.",2013-09-16 +24921091,PhotoCloud: Interactive remote exploration of joint 2D and 3D datasets.,"PhotoCloud is a real-time client-server system for interactive visualization and exploration of large datasets comprising thousands of calibrated 2D photographs of a scene and a complex 3D description of the scene. The system isn't tailored to any specific data acquisition process; it aims at generality and flexibility. PhotoCloud achieves scalability through a multiresolution dynamic hierarchical representation of the data, which is remotely stored and accessed by the client through an efficient cache system. The system includes a compact image browser and a multiresolution model renderer. PhotoCloud employs iconic visualization of the images in the 3D space and projects images onto the 3D scene on the fly. Users can navigate the 2D and 3D spaces with smooth, integrated, seamless transitions between them. A study with differently skilled users confirms PhotoCloud's effectiveness and communication power. The Web extras at http://www.youtube.com/playlist?list=PLHJB2bhmgB7cmYD0ST9CEDMRv1JlX4xPH are videos demonstrating PhotoCloud, a real-time client-server system for interactive exploration of large datasets comprising 2D photos and 3D models.",2013-03-01 +22533540,XCMS Online: a web-based platform to process untargeted metabolomic data.,"Recently, interest in untargeted metabolomics has become prevalent in the general scientific community among an increasing number of investigators. The majority of these investigators, however, do not have the bioinformatic expertise that has been required to process metabolomic data by using command-line driven software programs. Here we introduce a novel platform to process untargeted metabolomic data that uses an intuitive graphical interface and does not require installation or technical expertise. This platform, called XCMS Online, is a web-based version of the widely used XCMS software that allows users to easily upload and process liquid chromatography/mass spectrometry data with only a few mouse clicks. XCMS Online provides a solution for the complete untargeted metabolomic workflow including feature detection, retention time correction, alignment, annotation, statistical analysis, and data visualization. Results can be browsed online in an interactive, customizable table showing statistics, chromatograms, and putative METLIN identities for each metabolite. Additionally, all results and images can be downloaded as zip files for offline analysis and publication. XCMS Online is available at https://xcmsonline.scripps.edu.",2012-05-10 +22888776,Cloudgene: a graphical execution platform for MapReduce programs on private and public clouds.,"

Background

The MapReduce framework enables a scalable processing and analyzing of large datasets by distributing the computational load on connected computer nodes, referred to as a cluster. In Bioinformatics, MapReduce has already been adopted to various case scenarios such as mapping next generation sequencing data to a reference genome, finding SNPs from short read data or matching strings in genotype files. Nevertheless, tasks like installing and maintaining MapReduce on a cluster system, importing data into its distributed file system or executing MapReduce programs require advanced knowledge in computer science and could thus prevent scientists from usage of currently available and useful software solutions.

Results

Here we present Cloudgene, a freely available platform to improve the usability of MapReduce programs in Bioinformatics by providing a graphical user interface for the execution, the import and export of data and the reproducibility of workflows on in-house (private clouds) and rented clusters (public clouds). The aim of Cloudgene is to build a standardized graphical execution environment for currently available and future MapReduce programs, which can all be integrated by using its plug-in interface. Since Cloudgene can be executed on private clusters, sensitive datasets can be kept in house at all time and data transfer times are therefore minimized.

Conclusions

Our results show that MapReduce programs can be integrated into Cloudgene with little effort and without adding any computational overhead to existing programs. This platform gives developers the opportunity to focus on the actual implementation task and provides scientists a platform with the aim to hide the complexity of MapReduce. In addition to MapReduce programs, Cloudgene can also be used to launch predefined systems (e.g. Cloud BioLinux, RStudio) in public clouds. Currently, five different bioinformatic programs using MapReduce and two systems are integrated and have been successfully deployed. Cloudgene is freely available at http://cloudgene.uibk.ac.at.",2012-08-13 +22443413,Analysis of high-depth sequence data for studying viral diversity: a comparison of next generation sequencing platforms using Segminator II.,"

Background

Next generation sequencing provides detailed insight into the variation present within viral populations, introducing the possibility of treatment strategies that are both reactive and predictive. Current software tools, however, need to be scaled up to accommodate for high-depth viral data sets, which are often temporally or spatially linked. In addition, due to the development of novel sequencing platforms and chemistries, each with implicit strengths and weaknesses, it will be helpful for researchers to be able to routinely compare and combine data sets from different platforms/chemistries. In particular, error associated with a specific sequencing process must be quantified so that true biological variation may be identified.

Results

Segminator II was developed to allow for the efficient comparison of data sets derived from different sources. We demonstrate its usage by comparing large data sets from 12 influenza H1N1 samples sequenced on both the 454 Life Sciences and Illumina platforms, permitting quantification of platform error. For mismatches median error rates at 0.10 and 0.12%, respectively, suggested that both platforms performed similarly. For insertions and deletions median error rates within the 454 data (at 0.3 and 0.2%, respectively) were significantly higher than those within the Illumina data (0.004 and 0.006%, respectively). In agreement with previous observations these higher rates were strongly associated with homopolymeric stretches on the 454 platform. Outside of such regions both platforms had similar indel error profiles. Additionally, we apply our software to the identification of low frequency variants.

Conclusion

We have demonstrated, using Segminator II, that it is possible to distinguish platform specific error from biological variation using data derived from two different platforms. We have used this approach to quantify the amount of error present within the 454 and Illumina platforms in relation to genomic location as well as location on the read. Given that next generation data is increasingly important in the analysis of drug-resistance and vaccine trials, this software will be useful to the pathogen research community. A zip file containing the source code and jar file is freely available for download from http://www.bioinf.manchester.ac.uk/segminator/.",2012-03-23 +23093610,Genotype calling and phasing using next-generation sequencing reads and a haplotype scaffold.,"

Motivation

Given the current costs of next-generation sequencing, large studies carry out low-coverage sequencing followed by application of methods that leverage linkage disequilibrium to infer genotypes. We propose a novel method that assumes study samples are sequenced at low coverage and genotyped on a genome-wide microarray, as in the 1000 Genomes Project (1KGP). We assume polymorphic sites have been detected from the sequencing data and that genotype likelihoods are available at these sites. We also assume that the microarray genotypes have been phased to construct a haplotype scaffold. We then phase each polymorphic site using an MCMC algorithm that iteratively updates the unobserved alleles based on the genotype likelihoods at that site and local haplotype information. We use a multivariate normal model to capture both allele frequency and linkage disequilibrium information around each site. When sequencing data are available from trios, Mendelian transmission constraints are easily accommodated into the updates. The method is highly parallelizable, as it analyses one position at a time.

Results

We illustrate the performance of the method compared with other methods using data from Phase 1 of the 1KGP in terms of genotype accuracy, phasing accuracy and downstream imputation performance. We show that the haplotype panel we infer in African samples, which was based on a trio-phased scaffold, increases downstream imputation accuracy for rare variants (R2 increases by >0.05 for minor allele frequency <1%), and this will translate into a boost in power to detect associations. These results highlight the value of incorporating microarray genotypes when calling variants from next-generation sequence data.

Availability

The method (called MVNcall) is implemented in a C++ program and is available from http://www.stats.ox.ac.uk/∼marchini/#software.",2012-10-23 +25562476,Vision screening for children 36 to <72 months: recommended practices.,"

Purpose

This article provides recommendations for screening children aged 36 to younger than 72 months for eye and visual system disorders. The recommendations were developed by the National Expert Panel to the National Center for Children's Vision and Eye Health, sponsored by Prevent Blindness, and funded by the Maternal and Child Health Bureau of the Health Resources and Services Administration, United States Department of Health and Human Services. The recommendations describe both best and acceptable practice standards. Targeted vision disorders for screening are primarily amblyopia, strabismus, significant refractive error, and associated risk factors. The recommended screening tests are intended for use by lay screeners, nurses, and other personnel who screen children in educational, community, public health, or primary health care settings. Characteristics of children who should be examined by an optometrist or ophthalmologist rather than undergo vision screening are also described.

Results

There are two current best practice vision screening methods for children aged 36 to younger than 72 months: (1) monocular visual acuity testing using single HOTV letters or LEA Symbols surrounded by crowding bars at a 5-ft (1.5 m) test distance, with the child responding by either matching or naming, or (2) instrument-based testing using the Retinomax autorefractor or the SureSight Vision Screener with the Vision in Preschoolers Study data software installed (version 2.24 or 2.25 set to minus cylinder form). Using the Plusoptix Photoscreener is acceptable practice, as is adding stereoacuity testing using the PASS (Preschool Assessment of Stereopsis with a Smile) stereotest as a supplemental procedure to visual acuity testing or autorefraction.

Conclusions

The National Expert Panel recommends that children aged 36 to younger than 72 months be screened annually (best practice) or at least once (accepted minimum standard) using one of the best practice approaches. Technological updates will be maintained at http://nationalcenter.preventblindness.org.",2015-01-01 +22515559,Quantitative comparison of immunohistochemical staining measured by digital image analysis versus pathologist visual scoring.,"Immunohistochemical (IHC) assays performed on formalin-fixed paraffin-embedded (FFPE) tissue sections traditionally have been semi-quantified by pathologist visual scoring of staining. IHC is useful for validating biomarkers discovered through genomics methods as large clinical repositories of FFPE specimens support the construction of tissue microarrays (TMAs) for high throughput studies. Due to the ubiquitous availability of IHC techniques in clinical laboratories, validated IHC biomarkers may be translated readily into clinical use. However, the method of pathologist semi-quantification is costly, inherently subjective, and produces ordinal rather than continuous variable data. Computer-aided analysis of digitized whole slide images may overcome these limitations. Using TMAs representing 215 ovarian serous carcinoma specimens stained for S100A1, we assessed the degree to which data obtained using computer-aided methods correlated with data obtained by pathologist visual scoring. To evaluate computer-aided image classification, IHC staining within pathologist annotated and software-classified areas of carcinoma were compared for each case. Two metrics for IHC staining were used: the percentage of carcinoma with S100A1 staining (%Pos), and the product of the staining intensity (optical density [OD] of staining) multiplied by the percentage of carcinoma with S100A1 staining (OD*%Pos). A comparison of the IHC staining data obtained from manual annotations and software-derived annotations showed strong agreement, indicating that software efficiently classifies carcinomatous areas within IHC slide images. Comparisons of IHC intensity data derived using pixel analysis software versus pathologist visual scoring demonstrated high Spearman correlations of 0.88 for %Pos (p < 0.0001) and 0.90 for OD*%Pos (p < 0.0001). This study demonstrated that computer-aided methods to classify image areas of interest (e.g., carcinomatous areas of tissue specimens) and quantify IHC staining intensity within those areas can produce highly similar data to visual evaluation by a pathologist.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1649068103671302.",2012-06-20 +23396124,Quantifying spatial relationships from whole retinal images.,"

Motivation

Microscopy advances have enabled the acquisition of large-scale biological images that capture whole tissues in situ. This in turn has fostered the study of spatial relationships between cells and various biological structures, which has proved enormously beneficial toward understanding organ and organism function. However, the unique nature of biological images and tissues precludes the application of many existing spatial mining and quantification methods necessary to make inferences about the data. Especially difficult is attempting to quantify the spatial correlation between heterogeneous structures and point objects, which often occurs in many biological tissues.

Results

We develop a method to quantify the spatial correlation between a continuous structure and point data in large (17 500 × 17 500 pixel) biological images. We use this method to study the spatial relationship between the vasculature and a type of cell in the retina called astrocytes. We use a geodesic feature space based on vascular structures and embed astrocytes into the space by spatial sampling. We then propose a quantification method in this feature space that enables us to empirically demonstrate that the spatial distribution of astrocytes is often correlated with vascular structure. Additionally, these patterns are conserved in the retina after injury. These results prove the long-assumed patterns of astrocyte spatial distribution and provide a novel methodology for conducting other spatial studies of similar tissue and structures.

Availability

The Matlab code for the method described in this article can be found at http://www.cs.ucsb.edu/∼dbl/software.php.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-08 +22949484,TreeFix: statistically informed gene tree error correction using species trees.,"Accurate gene tree reconstruction is a fundamental problem in phylogenetics, with many important applications. However, sequence data alone often lack enough information to confidently support one gene tree topology over many competing alternatives. Here, we present a novel framework for combining sequence data and species tree information, and we describe an implementation of this framework in TreeFix, a new phylogenetic program for improving gene tree reconstructions. Given a gene tree (preferably computed using a maximum-likelihood phylogenetic program), TreeFix finds a ""statistically equivalent"" gene tree that minimizes a species tree-based cost function. We have applied TreeFix to 2 clades of 12 Drosophila and 16 fungal genomes, as well as to simulated phylogenies and show that it dramatically improves reconstructions compared with current state-of-the-art programs. Given its accuracy, speed, and simplicity, TreeFix should be applicable to a wide range of analyses and have many important implications for future investigations of gene evolution. The source code and a sample data set are available at http://compbio.mit.edu/treefix.",2012-09-04 +24058439,"The opportunistic pathogen Propionibacterium acnes: insights into typing, human disease, clonal diversification and CAMP factor evolution.","We previously described a Multilocus Sequence Typing (MLST) scheme based on eight genes that facilitates population genetic and evolutionary analysis of P. acnes. While MLST is a portable method for unambiguous typing of bacteria, it is expensive and labour intensive. Against this background, we now describe a refined version of this scheme based on two housekeeping (aroE; guaA) and two putative virulence (tly; camp2) genes (MLST4) that correctly predicted the phylogroup (IA1, IA2, IB, IC, II, III), clonal complex (CC) and sequence type (ST) (novel or described) status for 91% isolates (n = 372) via cross-referencing of the four gene allelic profiles to the full eight gene versions available in the MLST database (http://pubmlst.org/pacnes/). Even in the small number of cases where specific STs were not completely resolved, the MLST4 method still correctly determined phylogroup and CC membership. Examination of nucleotide changes within all the MLST loci provides evidence that point mutations generate new alleles approximately 1.5 times as frequently as recombination; although the latter still plays an important role in the bacterium's evolution. The secreted/cell-associated 'virulence' factors tly and camp2 show no clear evidence of episodic or pervasive positive selection and have diversified at a rate similar to housekeeping loci. The co-evolution of these genes with the core genome might also indicate a role in commensal/normal existence constraining their diversity and preventing their loss from the P. acnes population. The possibility that members of the expanded CAMP factor protein family, including camp2, may have been lost from other propionibacteria, but not P. acnes, would further argue for a possible role in niche/host adaption leading to their retention within the genome. These evolutionary insights may prove important for discussions surrounding camp2 as an immunotherapy target for acne, and the effect such treatments may have on commensal lineages.",2013-09-13 +24058508,In silico approach for predicting toxicity of peptides and proteins.,"

Background

Over the past few decades, scientific research has been focused on developing peptide/protein-based therapies to treat various diseases. With the several advantages over small molecules, including high specificity, high penetration, ease of manufacturing, peptides have emerged as promising therapeutic molecules against many diseases. However, one of the bottlenecks in peptide/protein-based therapy is their toxicity. Therefore, in the present study, we developed in silico models for predicting toxicity of peptides and proteins.

Description

We obtained toxic peptides having 35 or fewer residues from various databases for developing prediction models. Non-toxic or random peptides were obtained from SwissProt and TrEMBL. It was observed that certain residues like Cys, His, Asn, and Pro are abundant as well as preferred at various positions in toxic peptides. We developed models based on machine learning technique and quantitative matrix using various properties of peptides for predicting toxicity of peptides. The performance of dipeptide-based model in terms of accuracy was 94.50% with MCC 0.88. In addition, various motifs were extracted from the toxic peptides and this information was combined with dipeptide-based model for developing a hybrid model. In order to evaluate the over-optimization of the best model based on dipeptide composition, we evaluated its performance on independent datasets and achieved accuracy around 90%. Based on above study, a web server, ToxinPred has been developed, which would be helpful in predicting (i) toxicity or non-toxicity of peptides, (ii) minimum mutations in peptides for increasing or decreasing their toxicity, and (iii) toxic regions in proteins.

Conclusion

ToxinPred is a unique in silico method of its kind, which will be useful in predicting toxicity of peptides/proteins. In addition, it will be useful in designing least toxic peptides and discovering toxic regions in proteins. We hope that the development of ToxinPred will provide momentum to peptide/protein-based drug discovery (http://crdd.osdd.net/raghava/toxinpred/).",2013-09-13 +24034841,IDOMAL: the malaria ontology revisited.,"

Background

With about half a billion cases, of which nearly one million fatal ones, malaria constitutes one of the major infectious diseases worldwide. A recently revived effort to eliminate the disease also focuses on IT resources for its efficient control, which prominently includes the control of the mosquito vectors that transmit the Plasmodium pathogens. As part of this effort, IDOMAL has been developed and it is continually being updated.

Findings

In addition to the improvement of IDOMAL's structure and the correction of some inaccuracies, there were some major subdomain additions such as a section on natural products and remedies, and the import, from other, higher order ontologies, of several terms, which were merged with IDOMAL terms. Effort was put on rendering IDOMAL fully compatible as an extension of IDO, the Infectious Disease Ontology. The reason for the difficulties in fully reaching that target were the inherent differences between vector-borne diseases and ""classical"" infectious diseases, which make it necessary to specifically adjust the ontology's architecture in order to comprise vectors and their populations.

Conclusions

In addition to a higher coverage of domain-specific terms and optimizing its usage by databases and decision-support systems, the new version of IDOMAL described here allows for more cross-talk between it and other ontologies, and in particular IDO. The malaria ontology is available for downloading at the OBO Foundry (http://www.obofoundry.org/cgi-bin/detail.cgi?id=malaria_ontology) and the NCBO BioPortal (http://bioportal.bioontology.org/ontologies/1311).",2013-09-13 +25552204,Up-regulation of miR-9 expression predicate advanced clinicopathological features and poor prognosis in patients with hepatocellular carcinoma.,"

Background

MicroRNAs (miRNAs) are endogenous small (19-24 nt long) noncoding RNAs that regulate gene expression in a sequence specific manner. An increasing association between miRNA and cancer has been recently reported. Hepatocellular carcinoma (HCC), as the fifth most common cancer and the most common cause of death in men, has become the third leading cause of cancer-related deaths globally. In this study, we investigated the miR-9 expression in HCC to evaluate their value in prognosis of this tumor.

Methods

The expression of miR-9 in matched normal and tumor tissues of HCC was evaluated using a quantitative real-time RT-PCR. A Kaplan-Meier survival curve was generated following a log-rank test.

Results

It was observed that miR-9 expression was upregulated in HCC tissues compared with noncancerous liver tissues (7.26 ± 1.30 vs. 3.14 ± 1.08, P < 0.001). The up-regulation of miR-9 in HCC cancer tissues was also significantly correlated with aggressive clinicopathological features. We found that the patients with high miR-9 expression have a higher tumor staging (P = 0.0389) and are in higher risk of venous infiltration (P < 0.0001). Moreover, the results of Kaplan-Meier analyses showed that HCC patients with the high miR-9 expression tend to have shorter overall survival (P < 0.0001). The multivariate analysis clearly indicated that the high miR-9 expression in biopsy samples may be considered as an independent prognostic factor in HCC for decreased survival (4.28; 95%CI, 2.77-7.23, P < 0.001).

Conclusion

Our data indicate the potential of miR-9 as a novel prognostic biomarker for HCC. Large well-designed studies with diverse populations and functional evaluations are warranted to confirm and extend our findings.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_228.",2014-12-31 +23358824,Fast and accurate read mapping with approximate seeds and multiple backtracking.,"We present Masai, a read mapper representing the state-of-the-art in terms of speed and accuracy. Our tool is an order of magnitude faster than RazerS 3 and mrFAST, 2-4 times faster and more accurate than Bowtie 2 and BWA. The novelties of our read mapper are filtration with approximate seeds and a method for multiple backtracking. Approximate seeds, compared with exact seeds, increase filtration specificity while preserving sensitivity. Multiple backtracking amortizes the cost of searching a large set of seeds by taking advantage of the repetitiveness of next-generation sequencing data. Combined together, these two methods significantly speed up approximate search on genomic data sets. Masai is implemented in C++ using the SeqAn library. The source code is distributed under the BSD license and binaries for Linux, Mac OS X and Windows can be freely downloaded from http://www.seqan.de/projects/masai.",2013-01-28 +21217125,ATTED-II updates: condition-specific gene coexpression to extend coexpression analyses and applications to a broad range of flowering plants.,"ATTED-II (http://atted.jp) is a gene coexpression database for a wide variety of experimental designs, such as prioritizations of genes for functional identification and analyses of the regulatory relationships among genes. Here, we report updates of ATTED-II focusing on two new features: condition-specific coexpression and homologous coexpression with rice. To analyze a broad range of biological phenomena, it is important to collect data under many diverse experimental conditions, but the meaning of coexpression can become ambiguous under these conditions. One approach to overcome this difficulty is to calculate the coexpression for each set of conditions with a clear biological meaning. With this viewpoint, we prepared five sets of experimental conditions (tissue, abiotic stress, biotic stress, hormones and light conditions), and users can evaluate the coexpression by employing comparative gene lists and switchable gene networks. We also developed an interactive visualization system, using the Cytoscape web system, to improve the network representation. As the second update, rice coexpression is now available. The previous version of ATTED-II was specifically developed for Arabidopsis, and thus coexpression analyses for other useful plants have been difficult. To solve this problem, we extended ATTED-II by including comparison tables between Arabidopsis and rice. This representation will make it possible to analyze the conservation of coexpression among flowering plants. With the ability to investigate condition-specific coexpression and species conservation, ATTED-II can help researchers to clarify the functional and regulatory networks of genes in a broad array of plant species.",2011-01-07 +23991755,"In silico enzymatic synthesis of a 400,000 compound biochemical database for nontargeted metabolomics.","Current methods of structure identification in mass-spectrometry-based nontargeted metabolomics rely on matching experimentally determined features of an unknown compound to those of candidate compounds contained in biochemical databases. A major limitation of this approach is the relatively small number of compounds currently included in these databases. If the correct structure is not present in a database, it cannot be identified, and if it cannot be identified, it cannot be included in a database. Thus, there is an urgent need to augment metabolomics databases with rationally designed biochemical structures using alternative means. Here we present the In Vivo/In Silico Metabolites Database (IIMDB), a database of in silico enzymatically synthesized metabolites, to partially address this problem. The database, which is available at http://metabolomics.pharm.uconn.edu/iimdb/, includes ~23,000 known compounds (mammalian metabolites, drugs, secondary plant metabolites, and glycerophospholipids) collected from existing biochemical databases plus more than 400,000 computationally generated human phase-I and phase-II metabolites of these known compounds. IIMDB features a user-friendly web interface and a programmer-friendly RESTful web service. Ninety-five percent of the computationally generated metabolites in IIMDB were not found in any existing database. However, 21,640 were identical to compounds already listed in PubChem, HMDB, KEGG, or HumanCyc. Furthermore, the vast majority of these in silico metabolites were scored as biological using BioSM, a software program that identifies biochemical structures in chemical structure space. These results suggest that in silico biochemical synthesis represents a viable approach for significantly augmenting biochemical databases for nontargeted metabolomics applications.",2013-09-12 +25547242,WEBnm@ v2.0: Web server and services for comparing protein flexibility.,"

Background

Normal mode analysis (NMA) using elastic network models is a reliable and cost-effective computational method to characterise protein flexibility and by extension, their dynamics. Further insight into the dynamics-function relationship can be gained by comparing protein motions between protein homologs and functional classifications. This can be achieved by comparing normal modes obtained from sets of evolutionary related proteins.

Results

We have developed an automated tool for comparative NMA of a set of pre-aligned protein structures. The user can submit a sequence alignment in the FASTA format and the corresponding coordinate files in the Protein Data Bank (PDB) format. The computed normalised squared atomic fluctuations and atomic deformation energies of the submitted structures can be easily compared on graphs provided by the web user interface. The web server provides pairwise comparison of the dynamics of all proteins included in the submitted set using two measures: the Root Mean Squared Inner Product and the Bhattacharyya Coefficient. The Comparative Analysis has been implemented on our web server for NMA, WEBnm@, which also provides recently upgraded functionality for NMA of single protein structures. This includes new visualisations of protein motion, visualisation of inter-residue correlations and the analysis of conformational change using the overlap analysis. In addition, programmatic access to WEBnm@ is now available through a SOAP-based web service. Webnm@ is available at http://apps.cbu.uib.no/webnma .

Conclusion

WEBnm@ v2.0 is an online tool offering unique capability for comparative NMA on multiple protein structures. Along with a convenient web interface, powerful computing resources, and several methods for mode analyses, WEBnm@ facilitates the assessment of protein flexibility within protein families and superfamilies. These analyses can give a good view of how the structures move and how the flexibility is conserved over the different structures.",2014-12-30 +24735558,CABS-flex predictions of protein flexibility compared with NMR ensembles.,"

Motivation

Identification of flexible regions of protein structures is important for understanding of their biological functions. Recently, we have developed a fast approach for predicting protein structure fluctuations from a single protein model: the CABS-flex. CABS-flex was shown to be an efficient alternative to conventional all-atom molecular dynamics (MD). In this work, we evaluate CABS-flex and MD predictions by comparison with protein structural variations within NMR ensembles.

Results

Based on a benchmark set of 140 proteins, we show that the relative fluctuations of protein residues obtained from CABS-flex are well correlated to those of NMR ensembles. On average, this correlation is stronger than that between MD and NMR ensembles. In conclusion, CABS-flex is useful and complementary to MD in predicting protein regions that undergo conformational changes as well as the extent of such changes.

Availability and implementation

The CABS-flex is freely available to all users at http://biocomp.chem.uw.edu.pl/CABSflex.

Contact

sekmi@chem.uw.edu.pl

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-15 +24523134,Cofactory: sequence-based prediction of cofactor specificity of Rossmann folds.,"Obtaining optimal cofactor balance to drive production is a challenge in metabolically engineered microbial production strains. To facilitate identification of heterologous enzymes with desirable altered cofactor requirements from native content, we have developed Cofactory, a method for prediction of enzyme cofactor specificity using only primary amino acid sequence information. The algorithm identifies potential cofactor binding Rossmann folds and predicts the specificity for the cofactors FAD(H2), NAD(H), and NADP(H). The Rossmann fold sequence search is carried out using hidden Markov models whereas artificial neural networks are used for specificity prediction. Training was carried out using experimental data from protein-cofactor structure complexes. The overall performance was benchmarked against an independent evaluation set obtaining Matthews correlation coefficients of 0.94, 0.79, and 0.65 for FAD(H2), NAD(H), and NADP(H), respectively. The Cofactory method is made publicly available at http://www.cbs.dtu.dk/services/Cofactory.",2014-03-20 +23592716,Two-stage sampling designs for external validation of personal risk models.,"We propose a cost-effective sampling design and estimating procedure for validating personal risk models using right-censored cohort data. Validation involves using each subject's covariates, as ascertained at cohort entry, in a risk model (specified independently of the data) to assign him/her a probability of an adverse outcome within a future time period. Subjects are then grouped according to the magnitudes of their assigned risks, and within each group, the mean assigned risk is compared with the probability of outcome occurrence as estimated using the follow-up data. Such validation presents two complications. First, in the presence of right-censoring, estimating the probability of developing the outcomes before death requires competing risk analysis. Second, for rare outcomes, validation using the full cohort requires assembling covariates and assigning risks to thousands of subjects. This can be costly when some covariates involve analyzing biological specimens. A two-stage sampling design addresses this problem by assembling covariates and assigning risks only to those subjects most informative for estimating key parameters. We use this design to estimate the outcome probabilities needed to evaluate model performance and we provide theoretical and bootstrap estimates of their variances. We also describe how to choose two-stage designs with minimal efficiency loss for a parameter of interest when the quantities determining optimality are unknown at the time of design. We illustrate these methods by using subjects in the California Teachers Study to validate ovarian cancer risk models. We find that a design with optimal efficiency for one performance parameter need not be so for others, and trade-offs will be required. A two-stage design that samples all outcome-positive subjects and more outcome-negative than censored subjects will perform well in most circumstances. The methods are implemented in Risk Model Assessment Program, an R program freely available at http://med.stanford.edu/epidemiology/two-stage.html.",2013-04-16 +24737859,"The influence of dynein processivity control, MAPs, and microtubule ends on directional movement of a localising mRNA.","Many cellular constituents travel along microtubules in association with multiple copies of motor proteins. How the activity of these motors is regulated during cargo sorting is poorly understood. In this study, we address this issue using a novel in vitro assay for the motility of localising Drosophila mRNAs bound to native dynein-dynactin complexes. High precision tracking reveals that individual RNPs within a population undergo either diffusive, or highly processive, minus end-directed movements along microtubules. RNA localisation signals stimulate the processive movements, with regulation of dynein-dynactin's activity rather than its total copy number per RNP, responsible for this effect. Our data support a novel mechanism for multi-motor translocation based on the regulation of dynein processivity by discrete cargo-associated features. Studying the in vitro responses of RNPs to microtubule-associated proteins (MAPs) and microtubule ends provides insights into how an RNA population could navigate the cytoskeletal network and become anchored at its destination in cells. DOI: http://dx.doi.org/10.7554/eLife.01596.001.",2014-04-15 +25423109,"System-level insights into the cellular interactome of a non-model organism: inferring, modelling and analysing functional gene network of soybean (Glycine max).","Cellular interactome, in which genes and/or their products interact on several levels, forming transcriptional regulatory-, protein interaction-, metabolic-, signal transduction networks, etc., has attracted decades of research focuses. However, such a specific type of network alone can hardly explain the various interactive activities among genes. These networks characterize different interaction relationships, implying their unique intrinsic properties and defects, and covering different slices of biological information. Functional gene network (FGN), a consolidated interaction network that models fuzzy and more generalized notion of gene-gene relations, have been proposed to combine heterogeneous networks with the goal of identifying functional modules supported by multiple interaction types. There are yet no successful precedents of FGNs on sparsely studied non-model organisms, such as soybean (Glycine max), due to the absence of sufficient heterogeneous interaction data. We present an alternative solution for inferring the FGNs of soybean (SoyFGNs), in a pioneering study on the soybean interactome, which is also applicable to other organisms. SoyFGNs exhibit the typical characteristics of biological networks: scale-free, small-world architecture and modularization. Verified by co-expression and KEGG pathways, SoyFGNs are more extensive and accurate than an orthology network derived from Arabidopsis. As a case study, network-guided disease-resistance gene discovery indicates that SoyFGNs can provide system-level studies on gene functions and interactions. This work suggests that inferring and modelling the interactome of a non-model plant are feasible. It will speed up the discovery and definition of the functions and interactions of other genes that control important functions, such as nitrogen fixation and protein or lipid synthesis. The efforts of the study are the basis of our further comprehensive studies on the soybean functional interactome at the genome and microRNome levels. Additionally, a web tool for information retrieval and analysis of SoyFGNs can be accessed at SoyFN: http://nclab.hit.edu.cn/SoyFN.",2014-11-25 +22975077,"Evaluation of qPCR curve analysis methods for reliable biomarker discovery: bias, resolution, precision, and implications.","RNA transcripts such as mRNA or microRNA are frequently used as biomarkers to determine disease state or response to therapy. Reverse transcription (RT) in combination with quantitative PCR (qPCR) has become the method of choice to quantify small amounts of such RNA molecules. In parallel with the democratization of RT-qPCR and its increasing use in biomedical research or biomarker discovery, we witnessed a growth in the number of gene expression data analysis methods. Most of these methods are based on the principle that the position of the amplification curve with respect to the cycle-axis is a measure for the initial target quantity: the later the curve, the lower the target quantity. However, most methods differ in the mathematical algorithms used to determine this position, as well as in the way the efficiency of the PCR reaction (the fold increase of product per cycle) is determined and applied in the calculations. Moreover, there is dispute about whether the PCR efficiency is constant or continuously decreasing. Together this has lead to the development of different methods to analyze amplification curves. In published comparisons of these methods, available algorithms were typically applied in a restricted or outdated way, which does not do them justice. Therefore, we aimed at development of a framework for robust and unbiased assessment of curve analysis performance whereby various publicly available curve analysis methods were thoroughly compared using a previously published large clinical data set (Vermeulen et al., 2009) [11]. The original developers of these methods applied their algorithms and are co-author on this study. We assessed the curve analysis methods' impact on transcriptional biomarker identification in terms of expression level, statistical significance, and patient-classification accuracy. The concentration series per gene, together with data sets from unpublished technical performance experiments, were analyzed in order to assess the algorithms' precision, bias, and resolution. While large differences exist between methods when considering the technical performance experiments, most methods perform relatively well on the biomarker data. The data and the analysis results per method are made available to serve as benchmark for further development and evaluation of qPCR curve analysis methods (http://qPCRDataMethods.hfrc.nl).",2012-09-03 +23493323,BRANCH: boosting RNA-Seq assemblies with partial or related genomic sequences.,"

Motivation

De novo transcriptome assemblies of RNA-Seq data are important for genomics applications of unsequenced organisms. Owing to the complexity and often incomplete representation of transcripts in sequencing libraries, the assembly of high-quality transcriptomes can be challenging. However, with the rapidly growing number of sequenced genomes, it is now feasible to improve RNA-Seq assemblies by guiding them with genomic sequences.

Results

This study introduces BRANCH, an algorithm designed for improving de novo transcriptome assemblies by using genomic information that can be partial or complete genome sequences from the same or a related organism. Its input includes assembled RNA reads (transfrags), genomic sequences (e.g. contigs) and the RNA reads themselves. It uses a customized version of BLAT to align the transfrags and RNA reads to the genomic sequences. After identifying exons from the alignments, it defines a directed acyclic graph and maps the transfrags to paths on the graph. It then joins and extends the transfrags by applying an algorithm that solves a combinatorial optimization problem, called the Minimum weight Minimum Path Cover with given Paths. In performance tests on real data from Caenorhabditis elegans and Saccharomyces cerevisiae, assisted by genomic contigs from the same species, BRANCH improved the sensitivity and precision of transfrags generated by Velvet/Oases or Trinity by 5.1-56.7% and 0.3-10.5%, respectively. These improvements added 3.8-74.1% complete transcripts and 8.3-3.8% proteins to the initial assembly. Similar improvements were achieved when guiding the BRANCH processing of a transcriptome assembly from a more complex organism (mouse) with genomic sequences from a related species (rat).

Availability

The BRANCH software can be downloaded for free from this site: http://manuals.bioinformatics.ucr.edu/home/branch.

Contact

thomas.girke@ucr.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-14 +22719821,A case study for large-scale human microbiome analysis using JCVI's metagenomics reports (METAREP).,"As metagenomic studies continue to increase in their number, sequence volume and complexity, the scalability of biological analysis frameworks has become a rate-limiting factor to meaningful data interpretation. To address this issue, we have developed JCVI Metagenomics Reports (METAREP) as an open source tool to query, browse, and compare extremely large volumes of metagenomic annotations. Here we present improvements to this software including the implementation of a dynamic weighting of taxonomic and functional annotation, support for distributed searches, advanced clustering routines, and integration of additional annotation input formats. The utility of these improvements to data interpretation are demonstrated through the application of multiple comparative analysis strategies to shotgun metagenomic data produced by the National Institutes of Health Roadmap for Biomedical Research Human Microbiome Project (HMP) (http://nihroadmap.nih.gov). Specifically, the scalability of the dynamic weighting feature is evaluated and established by its application to the analysis of over 400 million weighted gene annotations derived from 14 billion short reads as predicted by the HMP Unified Metabolic Analysis Network (HUMAnN) pipeline. Further, the capacity of METAREP to facilitate the identification and simultaneous comparison of taxonomic and functional annotations including biological pathway and individual enzyme abundances from hundreds of community samples is demonstrated by providing scenarios that describe how these data can be mined to answer biological questions related to the human microbiome. These strategies provide users with a reference of how to conduct similar large-scale metagenomic analyses using METAREP with their own sequence data, while in this study they reveal insights into the nature and extent of variation in taxonomic and functional profiles across body habitats and individuals. Over one thousand HMP WGS datasets and the latest open source code are available at http://www.jcvi.org/hmp-metarep.",2012-06-13 +24250782,Somatic point mutation calling in low cellularity tumors.,"Somatic mutation calling from next-generation sequencing data remains a challenge due to the difficulties of distinguishing true somatic events from artifacts arising from PCR, sequencing errors or mis-mapping. Tumor cellularity or purity, sub-clonality and copy number changes also confound the identification of true somatic events against a background of germline variants. We have developed a heuristic strategy and software (http://www.qcmg.org/bioinformatics/qsnp/) for somatic mutation calling in samples with low tumor content and we show the superior sensitivity and precision of our approach using a previously sequenced cell line, a series of tumor/normal admixtures, and 3,253 putative somatic SNVs verified on an orthogonal platform.",2013-11-08 +21342555,GARNET--gene set analysis with exploration of annotation relations.,"

Background

Gene set analysis is a powerful method of deducing biological meaning for an a priori defined set of genes. Numerous tools have been developed to test statistical enrichment or depletion in specific pathways or gene ontology (GO) terms. Major difficulties towards biological interpretation are integrating diverse types of annotation categories and exploring the relationships between annotation terms of similar information.

Results

GARNET (Gene Annotation Relationship NEtwork Tools) is an integrative platform for gene set analysis with many novel features. It includes tools for retrieval of genes from annotation database, statistical analysis & visualization of annotation relationships, and managing gene sets. In an effort to allow access to a full spectrum of amassed biological knowledge, we have integrated a variety of annotation data that include the GO, domain, disease, drug, chromosomal location, and custom-defined annotations. Diverse types of molecular networks (pathways, transcription and microRNA regulations, protein-protein interaction) are also included. The pair-wise relationship between annotation gene sets was calculated using kappa statistics. GARNET consists of three modules--gene set manager, gene set analysis and gene set retrieval, which are tightly integrated to provide virtually automatic analysis for gene sets. A dedicated viewer for annotation network has been developed to facilitate exploration of the related annotations.

Conclusions

GARNET (gene annotation relationship network tools) is an integrative platform for diverse types of gene set analysis, where complex relationships among gene annotations can be easily explored with an intuitive network visualization tool (http://garnet.isysbio.org/ or http://ercsb.ewha.ac.kr/garnet/).",2011-02-15 +23175603,HOCOMOCO: a comprehensive collection of human transcription factor binding sites models.,"Transcription factor (TF) binding site (TFBS) models are crucial for computational reconstruction of transcription regulatory networks. In existing repositories, a TF often has several models (also called binding profiles or motifs), obtained from different experimental data. Having a single TFBS model for a TF is more pragmatic for practical applications. We show that integration of TFBS data from various types of experiments into a single model typically results in the improved model quality probably due to partial correction of source specific technique bias. We present the Homo sapiens comprehensive model collection (HOCOMOCO, http://autosome.ru/HOCOMOCO/, http://cbrc.kaust.edu.sa/hocomoco/) containing carefully hand-curated TFBS models constructed by integration of binding sequences obtained by both low- and high-throughput methods. To construct position weight matrices to represent these TFBS models, we used ChIPMunk software in four computational modes, including newly developed periodic positional prior mode associated with DNA helix pitch. We selected only one TFBS model per TF, unless there was a clear experimental evidence for two rather distinct TFBS models. We assigned a quality rating to each model. HOCOMOCO contains 426 systematically curated TFBS models for 401 human TFs, where 172 models are based on more than one data source.",2012-11-21 +25205853,Cohort Profile: The Social Environment and Biomarkers of Aging Study (SEBAS) in Taiwan.,"The Social Environment and Biomarkers of Aging Study (SEBAS) is a nationally representative longitudinal survey of Taiwanese middle-aged and older adults. It adds the collection of biomarkers and performance assessments to the Taiwan Longitudinal Study of Aging (TLSA), a nationally representative study of adults aged 60 and over, including the institutionalized population. The TLSA began in 1989, with follow-ups approximately every 3 years; younger refresher cohorts were added in 1996 and 2003. The first wave of SEBAS, based on a sub-sample of respondents from the 1999 TLSA, was conducted in 2000. A total of 1023 respondents completed both a face-to-face home interview and, several weeks later, a hospital-based physical examination. In addition to a 12-h (7 pm-7 am) urine specimen collected the night before and a fasting blood specimen collected during the examination, trained staff measured blood pressure, height, weight and waist and hip circumferences. A second wave of SEBAS was conducted in 2006 using a similar protocol to SEBAS 2000, but with the addition of performance assessments conducted by the interviewers at the end of the home interview. Both waves of SEBAS also included measures of health status (physical, emotional, cognitive), health behaviours, social relationships and exposure to stressors. The SEBAS data, which are publicly available at [http://www.icpsr.umich.edu/icpsrweb/NACDA/studies/3792/version/5], allow researchers to explore the relationships among life challenges, the social environment and health and to examine the antecedents, correlates and consequences of change in biological measures and health.",2014-09-08 +23913812,"The Canadian ""National Program for hemophilia mutation testing"" database: a ten-year review.","A reference genotyping laboratory was established in 2000 at Queen's University, Kingston, to provide genetic testing for Hemophilia A (HA) and B (HB) and create a Canadian mutation database. Canadian hemophilia treatment centers and genetics clinics provided DNA and clinical information from November 2000 to March 2011. The factor VIII (F8) gene was analyzed in 1,177 patients (47% of HA population) and 787 female family members and the factor IX (F9) gene in 267 patients (47% of HB population) and 123 female family members, using Southern Blot, PCR, conformation sensitive gel electrophoresis, and/or direct sequencing. The mutation detection rates for HA and HB were 91% and 94%, respectively. 380 different F8 mutations were identified: inversions of intron 22 and intron 1, 229 missense, 45 nonsense, eight deletions, 70 frameshifts, 25 splice site, and one compound mutation with a splice site and intron 1 inversion. Of these mutations, 228 were novel to the Hemophilia A Database (HADB, http://hadb.org.uk/). A total 125 different F9 mutations were identified: 80 missense, 12 frameshift, 12 splice site, nine nonsense and seven promoter mutations, three large deletions, and two compound mutations with both missense and nonsense changes. Of these mutations, 36 were novel to the International Haemophilia B Mutation database (http://www.kcl.ac.uk/ip/petergreen/haemBdatabase.html). The Canadian F8 and F9 mutation database reflects the allelic heterogeneity of HA and HB, and is similar to previously described populations. This report represents the largest and longest duration experience of a national hemophilia genotyping program documented, to date.",2013-09-09 +24976038,Structure solution with ARCIMBOLDO using fragments derived from distant homology models.,"Molecular replacement, one of the general methods used to solve the crystallographic phase problem, relies on the availability of suitable models for placement in the unit cell of the unknown structure in order to provide initial phases. ARCIMBOLDO, originally conceived for ab initio phasing, operates at the limit of this approach, using small, very accurate fragments such as polyalanine α-helices. A distant homolog may contain accurate building blocks, but it may not be evident which sub-structure is the most suitable purely from the degree of conservation. Trying out all alternative possibilities in a systematic way is computationally expensive, even if effective. In the present study, the solution of the previously unknown structure of MltE, an outer membrane-anchored endolytic peptidoglycan lytic transglycosylase from Escherichia coli, is described. The asymmetric unit contains a dimer of this 194 amino acid protein. The closest available homolog was the catalytic domain of Slt70 (PDB code 1QTE). Originally, this template was used omitting contiguous spans of aminoacids and setting as many ARCIMBOLDO runs as models, each aiming to locate two copies sequentially with PHASER. Fragment trimming against the correlation coefficient prior to expansion through density modification and autotracing in SHELXE was essential. Analysis of the figures of merit led to the strategy to optimize the search model against the experimental data now implemented within ARCIMBOLDO-SHREDDER (http://chango.ibmb.csic.es/SHREDDER). In this strategy, the initial template is systematically shredded, and fragments are scored against each unique solution of the rotation function. Results are combined into a score per residue and the template is trimmed accordingly.",2014-09-06 +23592216,"Suppression of the LMP2A target gene, EGR-1, protects Hodgkin's lymphoma cells from entry to the EBV lytic cycle.","Hodgkin's lymphoma is unusual among B cell lymphomas, in so far as the malignant Hodgkin/Reed-Sternberg (HRS) cells lack a functional B cell receptor (BCR), as well as many of the required downstream signalling components. In Epstein-Barr virus (EBV)-positive cases of Hodgkin's lymphoma, HRS cells express the viral latent membrane proteins (LMP)-1 and -2A. LMP2A is thought to contribute to the pathogenesis of Hodgkin's lymphoma by providing a surrogate BCR-like survival signal. However, LMP2A has also been shown to induce the virus-replicative cycle in B cells, an event presumably incompatible with lymphomagenesis. In an attempt to resolve this apparent paradox, we compared the transcriptional changes observed in primary HRS cells with those induced by LMP2A and by BCR activation in primary human germinal centre (GC) B cells, the presumed progenitors of HRS cells. We found a subset of genes that were up-regulated by both LMP2A expression and BCR activation but which were down-regulated in primary HRS cells. These genes included EGR1, an immediate-early gene that is required for BCR-induced entry to the virus-replicative cycle. We present data supporting a model for the pathogenesis of EBV-positive Hodgkin's lymphoma in which LMP2A-expressing HRS cells lacking BCR signalling functions cannot induce EGR1 and are consequently protected from entry to the virus lytic cycle. The primary microarray data are available from GEO (http://www.ncbi.nlm.nih.gov/geo/) under series Accession No 46143.",2013-08-01 +22199384,MeQA: a pipeline for MeDIP-seq data quality assessment and analysis.,"

Motivation

We present a pipeline for the pre-processing, quality assessment, read distribution and methylation estimation for methylated DNA immunoprecipitation (MeDIP)-sequence datasets. This is the first MeDIP-seq-specific analytic pipeline that starts at the output of the sequencers. This pipeline will reduce the data analysis load on staff and allows the easy and straightforward analysis of sequencing data for DNA methylation. The pipeline integrates customized scripting and several existing tools, which can deal with both paired and single end data.

Availability

The package and extensive documentation, and comparison to public data is available at http://life.tongji.edu.cn/meqa/.",2011-12-22 +24010892,Novel drug-regulated transcriptional networks in brain reveal pharmacological properties of psychotropic drugs.,"

Background

Despite their widespread use, the biological mechanisms underlying the efficacy of psychotropic drugs are still incompletely known; improved understanding of these is essential for development of novel more effective drugs and rational design of therapy. Given the large number of psychotropic drugs available and their differential pharmacological effects, it would be important to establish specific predictors of response to various classes of drugs.

Results

To identify the molecular mechanisms that may initiate therapeutic effects, whole-genome expression profiling (using 324 Illumina Mouse WG-6 microarrays) of drug-induced alterations in the mouse brain was undertaken, with a focus on the time-course (1, 2, 4 and 8 h) of gene expression changes produced by eighteen major psychotropic drugs: antidepressants, antipsychotics, anxiolytics, psychostimulants and opioids. The resulting database is freely accessible at http://www.genes2mind.org. Bioinformatics approaches led to the identification of three main drug-responsive genomic networks and indicated neurobiological pathways that mediate the alterations in transcription. Each tested psychotropic drug was characterized by a unique gene network expression profile related to its neuropharmacological properties. Functional links that connect expression of the networks to the development of neuronal adaptations (MAPK signaling pathway), control of brain metabolism (adipocytokine pathway), and organization of cell projections (mTOR pathway) were found.

Conclusions

The comparison of gene expression alterations between various drugs opened a new means to classify the different psychoactive compounds and to predict their cellular targets; this is well exemplified in the case of tianeptine, an antidepressant with unknown mechanisms of action. This work represents the first proof-of-concept study of a molecular classification of psychoactive drugs.",2013-09-08 +21031599,PRO-MINE: A bioinformatics repository and analytical tool for TARDBP mutations.,"TDP-43 is a multifunctional RNA-binding protein found to be a major protein component of intracellular inclusions found in neurodegenerative disorders such as Fronto Temporal Lobar Degeneration, Amyotrophic Lateral Sclerosis, and Alzheimer Disease. PRO-MINE (PROtein Mutations In NEurodegeneration) is a database populated with manually curated data from the literature regarding all TDP-43/TDP43/TARDBP gene disease-associated mutations identified to date. A web server interface has been developed to query the database and to provide tools for the analysis of already reported or novel TDP-43 gene mutations. As is usually the case with genetic association studies, assessing the potential impact of identified mutations is of crucial importance, and in order to avoid prediction biases it is essential to compare the prediction results. However, in most cases mutations have to be submitted separately to various prediction tools and the individual results manually merged together afterwards. The implemented web server aims to overcome the problem by providing simultaneous access to several prediction tools and by displaying the results into a single output. Furthermore, the results are displayed together in a comprehensive output for a more convenient analysis and are enriched with additional information about mutations. In addition, our web server can also display the mutation(s) of interest within an alignment of annotated TDP-43 protein sequences from different vertebrate species. In this way, the degree of sequence conservation where the mutation(s) occur can be easily tracked and visualized. The web server is freely available to researchers and can be accessed at http://bioinfo.hr/pro-mine.",2011-01-01 +25237650,"Human resources for cancer control in uttar pradesh, India: a case study for low and middle income countries.","For addressing the growing burden of cancer in low and middle income countries, an important first step is to estimate the human resources required for cancer control in a country, province, or city. However, few guidelines are available to decision makers in that regard. Here, we propose a methodology for estimating the human and other resources needed in the state of Uttar Pradesh (UP), India as a case study. Information about the population of UP and its cities was obtained from http://citypopulation.de/. The number of new cancer cases annually for the commonest cancers was estimated from GLOBOCAN 2008. For estimating the human resources needed, the following assumptions were made: newly diagnosed cancer patients need pathology for diagnosis and for treatment surgery, chemotherapy, and/or radiotherapy. The percentage of patients requiring each of those modalities, their average lengths of stay as in-patients, and number of in-patient oncology beds were estimated. The resources already available in UP were determined by a telephone survey and by searching the websites of radiation therapy centers and medical colleges. Twenty-four radiation oncologists at 24 cancer centers in 10 cities responded to the survey. As detailed in this manuscript, an enormous shortage of human resources for cancer control exists in UP. Human resources are the key to diagnosing cancers early and treating them appropriately. Addressing the shortage will not be easy but we hope that the methodology described here can guide decision makers and form a framework for discussion among the various stakeholders. This methodology is readily adaptable to local practices and data.",2014-09-04 +23195311,Accessing and mining data from large-scale mouse phenotyping projects.,"Comprehensive phenotyping through the International Mouse Phenotyping Consortium (IMPC)-www.mousephenotype.org-will reveal the pleiotropic functions of every gene in the mouse genome and uncover the wider role of genetic loci within diverse biological systems. The informatics challenge will be to develop an infrastructure to acquire the diverse and complex data sets generated from broad-based phenotyping and disseminate these data in an integrated manner to the scientific community. We describe here the current methodologies implemented to capture and disseminate these data, and plans within the Knockout Mouse Phenotyping Project (KOMP2) (http://commonfund.nih.gov/KOMP2/)-funded informatics consortium to scale these implementations to manage the surge in data from the IMPC.",2012-01-01 +24013926,The Xeno-glycomics database (XDB): a relational database of qualitative and quantitative pig glycome repertoire.,"

Summary

In recent years, the improvement of mass spectrometry-based glycomics techniques (i.e. highly sensitive, quantitative and high-throughput analytical tools) has enabled us to obtain a large dataset of glycans. Here we present a database named Xeno-glycomics database (XDB) that contains cell- or tissue-specific pig glycomes analyzed with mass spectrometry-based techniques, including a comprehensive pig glycan information on chemical structures, mass values, types and relative quantities. It was designed as a user-friendly web-based interface that allows users to query the database according to pig tissue/cell types or glycan masses. This database will contribute in providing qualitative and quantitative information on glycomes characterized from various pig cells/organs in xenotransplantation and might eventually provide new targets in the α1,3-galactosyltransferase gene-knock out pigs era.

Availability

The database can be accessed on the web at http://bioinformatics.snu.ac.kr/xdb.",2013-09-06 +22373911,"OMERO: flexible, model-driven data management for experimental biology.","Data-intensive research depends on tools that manage multidimensional, heterogeneous datasets. We built OME Remote Objects (OMERO), a software platform that enables access to and use of a wide range of biological data. OMERO uses a server-based middleware application to provide a unified interface for images, matrices and tables. OMERO's design and flexibility have enabled its use for light-microscopy, high-content-screening, electron-microscopy and even non-image-genotype data. OMERO is open-source software, available at http://openmicroscopy.org/.",2012-02-28 +24005040,Automated identification of RNA 3D modules with discriminative power in RNA structural alignments.,"Recent progress in predicting RNA structure is moving towards filling the 'gap' in 2D RNA structure prediction where, for example, predicted internal loops often form non-canonical base pairs. This is increasingly recognized with the steady increase of known RNA 3D modules. There is a general interest in matching structural modules known from one molecule to other molecules for which the 3D structure is not known yet. We have created a pipeline, metaRNAmodules, which completely automates extracting putative modules from the FR3D database and mapping of such modules to Rfam alignments to obtain comparative evidence. Subsequently, the modules, initially represented by a graph, are turned into models for the RMDetect program, which allows to test their discriminative power using real and randomized Rfam alignments. An initial extraction of 22 495 3D modules in all PDB files results in 977 internal loop and 17 hairpin modules with clear discriminatory power. Many of these modules describe only minor variants of each other. Indeed, mapping of the modules onto Rfam families results in 35 unique locations in 11 different families. The metaRNAmodules pipeline source for the internal loop modules is available at http://rth.dk/resources/mrm.",2013-09-04 +24008419,Infernal 1.1: 100-fold faster RNA homology searches.,"

Summary

Infernal builds probabilistic profiles of the sequence and secondary structure of an RNA family called covariance models (CMs) from structurally annotated multiple sequence alignments given as input. Infernal uses CMs to search for new family members in sequence databases and to create potentially large multiple sequence alignments. Version 1.1 of Infernal introduces a new filter pipeline for RNA homology search based on accelerated profile hidden Markov model (HMM) methods and HMM-banded CM alignment methods. This enables ∼100-fold acceleration over the previous version and ∼10 000-fold acceleration over exhaustive non-filtered CM searches.

Availability

Source code, documentation and the benchmark are downloadable from http://infernal.janelia.org. Infernal is freely licensed under the GNU GPLv3 and should be portable to any POSIX-compliant operating system, including Linux and Mac OS/X. Documentation includes a user's guide with a tutorial, a discussion of file formats and user options and additional details on methods implemented in the software.

Contact

nawrockie@janelia.hhmi.org",2013-09-04 +25554623,Behavioral and cognitive effects of the N-methyl-D-aspartate receptor co-agonist D-serine in healthy humans: initial findings.,"The efficacy of compounds having agonistic activity at the glycine site associated with the N-methyl-D-aspartate receptor (NMDAR) is presently assessed in psychiatric disorders. In contrast to NMDAR antagonists, the neuropsychiatric effects of NMDAR agonists in the healthy human organism are not known. We studied neuropsychiatric and neurochemical effects of the NMDAR-glycine site obligatory co-agonist d-serine (DSR) in healthy subjects using a randomized, controlled crossover challenge design including a baseline assessment day and two DSR/placebo administration days. Thirty-five subjects aged 23-29 years participated in the study and received a 2.1 g orally administered DSR dose. The main outcome measures were the changes in scores of mood-related Visual Analogue Scale (VAS), Continuous Performance Test-Identical Pairs (CPT-IP), and Rey Auditory Verbal Learning Test (RAVLT). DSR acute administration: (1) was well tolerated and resulted at 2 h in ≥ 200 times increase in DSR serum levels; (2) elicited reduced VAS-measured depression and anxiety feelings; (3) improved attention and vigilance as measured by CPT-IP D-prime score; (4) preferentially improved performance in RAVLT list 7 reflecting ability to retain information over interference; (5) had significant but nonspecific effects on Category Fluency and Benton Visual Retention tests; and (6) did not affect glycine and glutamate serum levels. These data indicate that in healthy subjects, DSR reduces subjective feelings of sadness and anxiety and has procognitive effects that are overall opposed to the known effects of NMDAR antagonists. The findings are relevant to translational research of NMDAR function and the development of NMDAR-glycine site treatments for specific psychiatric entities. ClinicalTrials.gov: Behavioral and Cognitive Effects of the N-methyl-D-aspartate Receptor (NMDAR) Co-agonist D-serine in Healthy Humans; http://www.clinicaltrials.gov/ct2/show/NCT02051426?term=NCT02051426&rank=1; NCT02051426.",2014-12-24 +24023812,The human urine metabolome.,"Urine has long been a ""favored"" biofluid among metabolomics researchers. It is sterile, easy-to-obtain in large volumes, largely free from interfering proteins or lipids and chemically complex. However, this chemical complexity has also made urine a particularly difficult substrate to fully understand. As a biological waste material, urine typically contains metabolic breakdown products from a wide range of foods, drinks, drugs, environmental contaminants, endogenous waste metabolites and bacterial by-products. Many of these compounds are poorly characterized and poorly understood. In an effort to improve our understanding of this biofluid we have undertaken a comprehensive, quantitative, metabolome-wide characterization of human urine. This involved both computer-aided literature mining and comprehensive, quantitative experimental assessment/validation. The experimental portion employed NMR spectroscopy, gas chromatography mass spectrometry (GC-MS), direct flow injection mass spectrometry (DFI/LC-MS/MS), inductively coupled plasma mass spectrometry (ICP-MS) and high performance liquid chromatography (HPLC) experiments performed on multiple human urine samples. This multi-platform metabolomic analysis allowed us to identify 445 and quantify 378 unique urine metabolites or metabolite species. The different analytical platforms were able to identify (quantify) a total of: 209 (209) by NMR, 179 (85) by GC-MS, 127 (127) by DFI/LC-MS/MS, 40 (40) by ICP-MS and 10 (10) by HPLC. Our use of multiple metabolomics platforms and technologies allowed us to identify several previously unknown urine metabolites and to substantially enhance the level of metabolome coverage. It also allowed us to critically assess the relative strengths and weaknesses of different platforms or technologies. The literature review led to the identification and annotation of another 2206 urinary compounds and was used to help guide the subsequent experimental studies. An online database containing the complete set of 2651 confirmed human urine metabolite species, their structures (3079 in total), concentrations, related literature references and links to their known disease associations are freely available at http://www.urinemetabolome.ca.",2013-09-04 +25161237,PconsFold: improved contact predictions improve protein models.,"

Motivation

Recently it has been shown that the quality of protein contact prediction from evolutionary information can be improved significantly if direct and indirect information is separated. Given sufficiently large protein families, the contact predictions contain sufficient information to predict the structure of many protein families. However, since the first studies contact prediction methods have improved. Here, we ask how much the final models are improved if improved contact predictions are used.

Results

In a small benchmark of 15 proteins, we show that the TM-scores of top-ranked models are improved by on average 33% using PconsFold compared with the original version of EVfold. In a larger benchmark, we find that the quality is improved with 15-30% when using PconsC in comparison with earlier contact prediction methods. Further, using Rosetta instead of CNS does not significantly improve global model accuracy, but the chemistry of models generated with Rosetta is improved.

Availability

PconsFold is a fully automated pipeline for ab initio protein structure prediction based on evolutionary information. PconsFold is based on PconsC contact prediction and uses the Rosetta folding protocol. Due to its modularity, the contact prediction tool can be easily exchanged. The source code of PconsFold is available on GitHub at https://www.github.com/ElofssonLab/pcons-fold under the MIT license. PconsC is available from http://c.pcons.net/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +26588955,EMMA: A Software Package for Markov Model Building and Analysis.,"The study of folding and conformational changes of macromolecules by molecular dynamics simulations often requires the generation of large amounts of simulation data that are difficult to analyze. Markov (state) models (MSMs) address this challenge by providing a systematic way to decompose the state space of the molecular system into substates and to estimate a transition matrix containing the transition probabilities between these substates. This transition matrix can be analyzed to reveal the metastable, i.e., long-living, states of the system, its slowest relaxation time scales, and transition pathways and rates, e.g., from unfolded to folded, or from dissociated to bound states. Markov models can also be used to calculate spectroscopic data and thus serve as a way to reconcile experimental and simulation data. To reduce the technical burden of constructing, validating, and analyzing such MSMs, we provide the software framework EMMA that is freely available at https://simtk.org/home/emma .",2012-06-18 +24146609,Reconstructing the genomic content of microbiome taxa through shotgun metagenomic deconvolution.,"Metagenomics has transformed our understanding of the microbial world, allowing researchers to bypass the need to isolate and culture individual taxa and to directly characterize both the taxonomic and gene compositions of environmental samples. However, associating the genes found in a metagenomic sample with the specific taxa of origin remains a critical challenge. Existing binning methods, based on nucleotide composition or alignment to reference genomes allow only a coarse-grained classification and rely heavily on the availability of sequenced genomes from closely related taxa. Here, we introduce a novel computational framework, integrating variation in gene abundances across multiple samples with taxonomic abundance data to deconvolve metagenomic samples into taxa-specific gene profiles and to reconstruct the genomic content of community members. This assembly-free method is not bounded by various factors limiting previously described methods of metagenomic binning or metagenomic assembly and represents a fundamentally different approach to metagenomic-based genome reconstruction. An implementation of this framework is available at http://elbo.gs.washington.edu/software.html. We first describe the mathematical foundations of our framework and discuss considerations for implementing its various components. We demonstrate the ability of this framework to accurately deconvolve a set of metagenomic samples and to recover the gene content of individual taxa using synthetic metagenomic samples. We specifically characterize determinants of prediction accuracy and examine the impact of annotation errors on the reconstructed genomes. We finally apply metagenomic deconvolution to samples from the Human Microbiome Project, successfully reconstructing genus-level genomic content of various microbial genera, based solely on variation in gene count. These reconstructed genera are shown to correctly capture genus-specific properties. With the accumulation of metagenomic data, this deconvolution framework provides an essential tool for characterizing microbial taxa never before seen, laying the foundation for addressing fundamental questions concerning the taxa comprising diverse microbial communities.",2013-10-17 +25938125,In vivo neuronal co-expression of mu and delta opioid receptors uncovers new therapeutic perspectives. ,"Opioid receptors belong to the G protein coupled receptor family. They modulate brain function at all levels of neural integration and therefore impact on autonomous, sensory, emotional and cognitive processing. In vivo functional interaction between mu and delta opioid receptors are known to take place though it is still debated whether interactions occur at circuitry, cellular or molecular level. Also, the notion of receptor crosstalk via mu-delta heteromers is well documented in vitro but in vivo evidence remains scarce. To identify neurons in which receptor interactions could take place, we designed a unique double mutant knock-in mouse line that expresses functional red-fluorescent mu receptors and green-fluorescent delta receptors. We mapped mu and delta receptor distribution and co-localization throughout the nervous system and created the first interactive brain atlas with concomitant mu-delta visualization at subcellular resolution (http://mordor.ics-mci.fr/). Mu and delta receptors co-localize in neurons from subcortical networks but are mainly detected in separate neurons in the forebrain. Also, co-immunoprecipitation experiments indicated physical proximity in the hippocampus, a prerequisite to mu-delta heteromerization. Altogether, data suggest that mu-delta functional interactions take place at systems level for high-order emotional and cognitive processing whereas mu-delta may interact at cellular level in brain networks essential for survival, which has potential implications for innovative drug design in pain control, drug addiction and eating disorders.",2014-09-01 +24003033,E-TALEN: a web tool to design TALENs for genome engineering.,"Use of transcription activator-like effector nucleases (TALENs) is a promising new technique in the field of targeted genome engineering, editing and reverse genetics. Its applications span from introducing knockout mutations to endogenous tagging of proteins and targeted excision repair. Owing to this wide range of possible applications, there is a need for fast and user-friendly TALEN design tools. We developed E-TALEN (http://www.e-talen.org), a web-based tool to design TALENs for experiments of varying scale. E-TALEN enables the design of TALENs against a single target or a large number of target genes. We significantly extended previously published design concepts to consider genomic context and different applications. E-TALEN guides the user through an end-to-end design process of de novo TALEN pairs, which are specific to a certain sequence or genomic locus. Furthermore, E-TALEN offers a functionality to predict targeting and specificity for existing TALENs. Owing to the computational complexity of many of the steps in the design of TALENs, particular emphasis has been put on the implementation of fast yet accurate algorithms. We implemented a user-friendly interface, from the input parameters to the presentation of results. An additional feature of E-TALEN is the in-built sequence and annotation database available for many organisms, including human, mouse, zebrafish, Drosophila and Arabidopsis, which can be extended in the future.",2013-09-03 +24002112,M2SG: mapping human disease-related genetic variants to protein sequences and genomic loci.,"

Summary

Online Mendelian Inheritance in Man (OMIM) is a manually curated compendium of human genetic variants and the corresponding phenotypes, mostly human diseases. Instead of directly documenting the native sequences for gene entries, OMIM links its entries to protein and DNA sequences in other databases. However, because of the existence of gene isoforms and errors in OMIM records, mapping a specific OMIM mutation to its corresponding protein sequence is not trivial. Combining computer programs and extensive manual curation of OMIM full-text descriptions and original literature, we mapped 98% of OMIM amino acid substitutions (AASs) and all SwissProt Variant (SwissVar) disease-related AASs to reference sequences and confidently mapped 99.96% of all AASs to the genomic loci. Based on the results, we developed an online database and interactive web server (M2SG) to (i) retrieve the mapped OMIM and SwissVar variants for a given protein sequence; and (ii) obtain related proteins and mutations for an input disease phenotype. This database will be useful for analyzing sequences, understanding the effect of mutations, identifying important genetic variations and designing experiments on a protein of interest.

Availability and implementation

The database and web server are freely available at http://prodata.swmed.edu/M2S/mut2seq.cgi.",2013-09-03 +22046276,myKaryoView: a light-weight client for visualization of genomic data.,"The Distributed Annotation System (DAS) is a protocol for easy sharing and integration of biological annotations. In order to visualize feature annotations in a genomic context a client is required. Here we present myKaryoView, a simple light-weight DAS tool for visualization of genomic annotation. myKaryoView has been specifically configured to help analyse data derived from personal genomics, although it can also be used as a generic genome browser visualization. Several well-known data sources are provided to facilitate comparison of known genes and normal variation regions. The navigation experience is enhanced by simultaneous rendering of different levels of detail across chromosomes. A simple interface is provided to allow searches for any SNP, gene or chromosomal region. User-defined DAS data sources may also be added when querying the system. We demonstrate myKaryoView capabilities for adding user-defined sources with a set of genetic profiles of family-related individuals downloaded directly from 23andMe. myKaryoView is a web tool for visualization of genomic data specifically designed for direct-to-consumer genomic data that uses publicly available data distributed throughout the Internet. It does not require data to be held locally and it is capable of rendering any feature as long as it conforms to DAS specifications. Configuration and addition of sources to myKaryoView can be done through the interface. Here we show a proof of principle of myKaryoView's ability to display personal genomics data with 23andMe genome data sources. The tool is available at: http://mykaryoview.com.",2011-10-26 +25046746,A novel feature extraction scheme with ensemble coding for protein-protein interaction prediction.,"Protein-protein interactions (PPIs) play key roles in most cellular processes, such as cell metabolism, immune response, endocrine function, DNA replication, and transcription regulation. PPI prediction is one of the most challenging problems in functional genomics. Although PPI data have been increasing because of the development of high-throughput technologies and computational methods, many problems are still far from being solved. In this study, a novel predictor was designed by using the Random Forest (RF) algorithm with the ensemble coding (EC) method. To reduce computational time, a feature selection method (DX) was adopted to rank the features and search the optimal feature combination. The DXEC method integrates many features and physicochemical/biochemical properties to predict PPIs. On the Gold Yeast dataset, the DXEC method achieves 67.2% overall precision, 80.74% recall, and 70.67% accuracy. On the Silver Yeast dataset, the DXEC method achieves 76.93% precision, 77.98% recall, and 77.27% accuracy. On the human dataset, the prediction accuracy reaches 80% for the DXEC-RF method. We extended the experiment to a bigger and more realistic dataset that maintains 50% recall on the Yeast All dataset and 80% recall on the Human All dataset. These results show that the DXEC method is suitable for performing PPI prediction. The prediction service of the DXEC-RF classifier is available at http://ailab.ahu.edu.cn:8087/ DXECPPI/index.jsp.",2014-07-18 +25161230,Alignment-free protein interaction network comparison.,"

Motivation

Biological network comparison software largely relies on the concept of alignment where close matches between the nodes of two or more networks are sought. These node matches are based on sequence similarity and/or interaction patterns. However, because of the incomplete and error-prone datasets currently available, such methods have had limited success. Moreover, the results of network alignment are in general not amenable for distance-based evolutionary analysis of sets of networks. In this article, we describe Netdis, a topology-based distance measure between networks, which offers the possibility of network phylogeny reconstruction.

Results

We first demonstrate that Netdis is able to correctly separate different random graph model types independent of network size and density. The biological applicability of the method is then shown by its ability to build the correct phylogenetic tree of species based solely on the topology of current protein interaction networks. Our results provide new evidence that the topology of protein interaction networks contains information about evolutionary processes, despite the lack of conservation of individual interactions. As Netdis is applicable to all networks because of its speed and simplicity, we apply it to a large collection of biological and non-biological networks where it clusters diverse networks by type.

Availability and implementation

The source code of the program is freely available at http://www.stats.ox.ac.uk/research/proteins/resources.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-09-01 +24060102,Comparative genomics of metabolic capacities of regulons controlled by cis-regulatory RNA motifs in bacteria.,"

Background

In silico comparative genomics approaches have been efficiently used for functional prediction and reconstruction of metabolic and regulatory networks. Riboswitches are metabolite-sensing structures often found in bacterial mRNA leaders controlling gene expression on transcriptional or translational levels.An increasing number of riboswitches and other cis-regulatory RNAs have been recently classified into numerous RNA families in the Rfam database. High conservation of these RNA motifs provides a unique advantage for their genomic identification and comparative analysis.

Results

A comparative genomics approach implemented in the RegPredict tool was used for reconstruction and functional annotation of regulons controlled by RNAs from 43 Rfam families in diverse taxonomic groups of Bacteria. The inferred regulons include ~5200 cis-regulatory RNAs and more than 12000 target genes in 255 microbial genomes. All predicted RNA-regulated genes were classified into specific and overall functional categories. Analysis of taxonomic distribution of these categories allowed us to establish major functional preferences for each analyzed cis-regulatory RNA motif family. Overall, most RNA motif regulons showed predictable functional content in accordance with their experimentally established effector ligands. Our results suggest that some RNA motifs (including thiamin pyrophosphate and cobalamin riboswitches that control the cofactor metabolism) are widespread and likely originated from the last common ancestor of all bacteria. However, many more analyzed RNA motifs are restricted to a narrow taxonomic group of bacteria and likely represent more recent evolutionary innovations.

Conclusions

The reconstructed regulatory networks for major known RNA motifs substantially expand the existing knowledge of transcriptional regulation in bacteria. The inferred regulons can be used for genetic experiments, functional annotations of genes, metabolic reconstruction and evolutionary analysis. The obtained genome-wide collection of reference RNA motif regulons is available in the RegPrecise database (http://regprecise.lbl.gov/).",2013-09-02 +24002109,Novel human lncRNA-disease association inference based on lncRNA expression profiles.,"

Motivation

More and more evidences have indicated that long-non-coding RNAs (lncRNAs) play critical roles in many important biological processes. Therefore, mutations and dysregulations of these lncRNAs would contribute to the development of various complex diseases. Developing powerful computational models for potential disease-related lncRNAs identification would benefit biomarker identification and drug discovery for human disease diagnosis, treatment, prognosis and prevention.

Results

In this article, we proposed the assumption that similar diseases tend to be associated with functionally similar lncRNAs. Then, we further developed the method of Laplacian Regularized Least Squares for LncRNA-Disease Association (LRLSLDA) in the semisupervised learning framework. Although known disease-lncRNA associations in the database are rare, LRLSLDA still obtained an AUC of 0.7760 in the leave-one-out cross validation, significantly improving the performance of previous methods. We also illustrated the performance of LRLSLDA is not sensitive (even robust) to the parameters selection and it can obtain a reliable performance in all the test classes. Plenty of potential disease-lncRNA associations were publicly released and some of them have been confirmed by recent results in biological experiments. It is anticipated that LRLSLDA could be an effective and important biological tool for biomedical research.

Availability

The code of LRLSLDA is freely available at http://asdcd.amss.ac.cn/Software/Details/2.",2013-09-02 +22830445,DrugLogit: logistic discrimination between drugs and nondrugs including disease-specificity by assigning probabilities based on molecular properties.,"The increasing knowledge of both structure and activity of compounds provides a good basis for enhancing the pharmacological characterization of chemical libraries. In addition, pharmacology can be seen as incorporating both advances from molecular biology as well as chemical sciences, with innovative insight provided from studying target-ligand data from a ligand molecular point of view. Predictions and profiling of libraries of drug candidates have previously focused mainly on certain cases of oral bioavailability. Inclusion of other administration routes and disease-specificity would improve the precision of drug profiling. In this work, recent data are extended, and a probability-based approach is introduced for quantitative and gradual classification of compounds into categories of drugs/nondrugs, as well as for disease- or organ-specificity. Using experimental data of over 1067 compounds and multivariate logistic regressions, the classification shows good performance in training and independent test cases. The regressions have high statistical significance in terms of the robustness of coefficients and 95% confidence intervals provided by a 1000-fold bootstrapping resampling. Besides their good predictive power, the classification functions remain chemically interpretable, containing only one to five variables in total, and the physicochemical terms involved can be easily calculated. The present approach is useful for an improved description and filtering of compound libraries. It can also be applied sequentially or in combinations of filters, as well as adapted to particular use cases. The scores and equations may be able to suggest possible routes for compound or library modification. The data is made available for reuse by others, and the equations are freely accessible at http://hermes.chem.ut.ee/~alfx/druglogit.html.",2012-08-07 +24177716,"pyGCluster, a novel hierarchical clustering approach.","

Summary

pyGCluster is a clustering algorithm focusing on noise injection for subsequent cluster validation. The reproducibility of a large amount of clusters obtained with agglomerative hierarchical clustering is assessed. Furthermore, a multitude of different distance-linkage combinations are evaluated. Finally, highly reproducible clusters are meta-clustered into communities. Graphical illustration of the results as node and expression maps is implemented.

Availability and implementation

pyGCluster requires Python 2.7, it is freely available at http://pyGCluster.github.io and published under MIT license. Dependencies are NumPy, SciPy and optionally fastcluster and rpy2.

Contact

christan@fufezan.net

Supplementary information

Supplementary data is available at Bioinformatics online and at http://pyGCluster.github.io.",2013-10-31 +22689750,Statistical model-based testing to evaluate the recurrence of genomic aberrations.,"

Motivation

In cancer genomes, chromosomal regions harboring cancer genes are often subjected to genomic aberrations like copy number alteration and loss of heterozygosity. Given this, finding recurrent genomic aberrations is considered an apt approach for screening cancer genes. Although several permutation-based tests have been proposed for this purpose, none of them are designed to find recurrent aberrations from the genomic dataset without paired normal sample controls. Their application to unpaired genomic data may lead to false discoveries, because they retrieve pseudo-aberrations that exist in normal genomes as polymorphisms.

Results

We develop a new parametric method named parametric aberration recurrence test (PART) to test for the recurrence of genomic aberrations. The introduction of Poisson-binomial statistics allow us to compute small P-values more efficiently and precisely than the previously proposed permutation-based approach. Moreover, we extended PART to cover unpaired data (PART-up) so that there is a statistical basis for analyzing unpaired genomic data. PART-up uses information from unpaired normal sample controls to remove pseudo-aberrations in unpaired genomic data. Using PART-up, we successfully predict recurrent genomic aberrations in cancer cell line samples whose paired normal sample controls are unavailable. This article thus proposes a powerful statistical framework for the identification of driver aberrations, which would be applicable to ever-increasing amounts of cancer genomic data seen in the era of next generation sequencing.

Availability

Our implementations of PART and PART-up are available from http://www.hgc.jp/~niiyan/PART/manual.html.",2012-06-01 +25274406,Dynamic classification using case-specific training cohorts outperforms static gene expression signatures in breast cancer.,"The molecular diversity of breast cancer makes it impossible to identify prognostic markers that are applicable to all breast cancers. To overcome limitations of previous multigene prognostic classifiers, we propose a new dynamic predictor: instead of using a single universal training cohort and an identical list of informative genes to predict the prognosis of new cases, a case-specific predictor is developed for each test case. Gene expression data from 3,534 breast cancers with clinical annotation including relapse-free survival is analyzed. For each test case, we select a case-specific training subset including only molecularly similar cases and a case-specific predictor is generated. This method yields different training sets and different predictors for each new patient. The model performance was assessed in leave-one-out validation and also in 325 independent cases. Prognostic discrimination was high for all cases (n = 3,534, HR = 3.68, p = 1.67 E-56). The dynamic predictor showed higher overall accuracy (0.68) than genomic surrogates for Oncotype DX (0.64), Genomic Grade Index (0.61) or MammaPrint (0.47). The dynamic predictor was also effective in triple-negative cancers (n = 427, HR = 3.08, p = 0.0093) where the above classifiers all failed. Validation in independent patients yielded similar classification power (HR = 3.57). The dynamic classifier is available online at http://www.recurrenceonline.com/?q=Re_training. In summary, we developed a new method to make personalized prognostic prediction using case-specific training cohorts. The dynamic predictors outperform static models developed from single historical training cohorts and they also predict well in triple-negative cancers.",2014-10-11 +30722414,First Report of Myrothecium roridum Causing Myrothecium Leaf Spot on Dieffenbachia picta 'Camilla' in Taiwan.,"Dumb cane (Dieffenbachia picta (Lodd.) Schott 'Camilla'), family Araceae, is a popular houseplant in Taiwan. During the winter of 2012, dumb canes with dark brown concentric spots on leaves and bright yellow borders were found in a protected ornamental nursery in Wandan township, Pingtung County, Taiwan. On diseased leaves, fungal fruiting bodies were sometimes observed in the concentric lesions and a fungal isolate was consistently isolated from the lesions. A single spore isolate, myr 2-2, was maintained on potato dextrose agar (PDA) for further tests. To fulfill Koch's postulates, the spores of myr 2-2 were suspended in sterilized distilled water containing 0.05% of Tween 20, 1 × 105 conidia ml-1, and then sprayed on leaves of D. picta 'Camilla' growing in polypropylene plant pots (about 7 cm in diameter), three plants per treatment. For the control, three plants were sprayed with sterilized distilled water containing 0.05% of Tween 20. Both inoculated and non-inoculated plants were covered with plastic bags and incubated in a growth chamber at 26 ± 1°C. Nine to 12 days after inoculation, symptoms described above were observed on inoculated plants whereas the plants in control remained healthy. The same fungus was reisolated from inoculated plants but not from the controls. Furthermore, the fungal pathogen was identified using its physiological, morphological, and molecular characteristics. In the mycelial growth test, the diameter of the fungal colony reaches 58.2 mm on PDA at 25°C after 14 days. The colonies were floccose, white to buff, and sporulate in concentric zones with olivaceous black to black sporodochia bearing viscid masses of conidia. Conidia were narrowly ellipsoid with rounded ends. The average size of 100 conidia was 6.25 ± 0.04 × 1.63 ± 0.02 μm. For molecular identification, the rDNA internal transcribed spacer (ITS) of isolate myr 2-2 was PCR amplified using ITS1 (5'-TCCGTAGGTGAACCTGCGG-3') and ITS4 (5'- TCCTCCGCTTATTGATATGC-3') primer pairs (3) and sequenced. The rDNA sequence was deposited in GenBank (KC469695) and showed 100% identity to the Myrothecium roridum isolates BBA 71015 (AJ302001) and BBA 67679 (AJ301995) (4). According to the physiological, morphological (1,2), and molecular characteristics, the fungal isolate was identified as M. roridum Tode ex Fr. To the best of our knowledge, this is the first report of Myrothecium leaf spot caused by M. roridum on D. picta 'Camilla' in Taiwan. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , January 31, 2013. (2) M. Tulloch. Mycol. Pap. 130: 1-42, 1972. (3) T. J. White et al. Page 315 in: PCR Protocols: A Guide to Methods and Applications. M. A. Innis et al., eds. Academic Press, New York, 1990. (4) Y. X. Zhang et al. Plant Dis. 95:1030, 2011.",2013-09-01 +23449093,SplicingCompass: differential splicing detection using RNA-seq data.,"

Motivation

Alternative splicing is central for cellular processes and substantially increases transcriptome and proteome diversity. Aberrant splicing events often have pathological consequences and are associated with various diseases and cancer types. The emergence of next-generation RNA sequencing (RNA-seq) provides an exciting new technology to analyse alternative splicing on a large scale. However, algorithms that enable the analysis of alternative splicing from short-read sequencing are not fully established yet and there are still no standard solutions available for a variety of data analysis tasks.

Results

We present a new method and software to predict genes that are differentially spliced between two different conditions using RNA-seq data. Our method uses geometric angles between the high dimensional vectors of exon read counts. With this, differential splicing can be detected even if the splicing events are composed of higher complexity and involve previously unknown splicing patterns. We applied our approach to two case studies including neuroblastoma tumour data with favourable and unfavourable clinical courses. We show the validity of our predictions as well as the applicability of our method in the context of patient clustering. We verified our predictions by several methods including simulated experiments and complementary in silico analyses. We found a significant number of exons with specific regulatory splicing factor motifs for predicted genes and a substantial number of publications linking those genes to alternative splicing. Furthermore, we could successfully exploit splicing information to cluster tissues and patients. Finally, we found additional evidence of splicing diversity for many predicted genes in normalized read coverage plots and in reads that span exon-exon junctions.

Availability

SplicingCompass is licensed under the GNU GPL and freely available as a package in the statistical language R at http://www.ichip.de/software/SplicingCompass.html",2013-02-28 +22492627,SAVoR: a server for sequencing annotation and visualization of RNA structures.,"RNA secondary structure is required for the proper regulation of the cellular transcriptome. This is because the functionality, processing, localization and stability of RNAs are all dependent on the folding of these molecules into intricate structures through specific base pairing interactions encoded in their primary nucleotide sequences. Thus, as the number of RNA sequencing (RNA-seq) data sets and the variety of protocols for this technology grow rapidly, it is becoming increasingly pertinent to develop tools that can analyze and visualize this sequence data in the context of RNA secondary structure. Here, we present Sequencing Annotation and Visualization of RNA structures (SAVoR), a web server, which seamlessly links RNA structure predictions with sequencing data and genomic annotations to produce highly informative and annotated models of RNA secondary structure. SAVoR accepts read alignment data from RNA-seq experiments and computes a series of per-base values such as read abundance and sequence variant frequency. These values can then be visualized on a customizable secondary structure model. SAVoR is freely available at http://tesla.pcbi.upenn.edu/savor.",2012-04-06 +23775700,DockTrina: docking triangular protein trimers.,"In spite of the abundance of oligomeric proteins within a cell, the structural characterization of protein-protein interactions is still a challenging task. In particular, many of these interactions involve heteromeric complexes, which are relatively difficult to determine experimentally. Hence there is growing interest in using computational techniques to model such complexes. However, assembling large heteromeric complexes computationally is a highly combinatorial problem. Nonetheless the problem can be simplified greatly by considering interactions between protein trimers. After dimers and monomers, triangular trimers (i.e. trimers with pair-wise contacts between all three pairs of proteins) are the most frequently observed quaternary structural motifs according to the three-dimensional (3D) complex database. This article presents DockTrina, a novel protein docking method for modeling the 3D structures of nonsymmetrical triangular trimers. The method takes as input pair-wise contact predictions from a rigid body docking program. It then scans and scores all possible combinations of pairs of monomers using a very fast root mean square deviation test. Finally, it ranks the predictions using a scoring function which combines triples of pair-wise contact terms and a geometric clash penalty term. The overall approach takes less than 2 min per complex on a modern desktop computer. The method is tested and validated using a benchmark set of 220 bound and seven unbound protein trimer structures. DockTrina will be made available at http://nano-d.inrialpes.fr/software/docktrina.",2013-08-31 +24465922,A self-report risk index to predict occurrence of dementia in three independent cohorts of older adults: the ANU-ADRI.,"

Background and aims

The Australian National University AD Risk Index (ANU-ADRI, http://anuadri.anu.edu.au) is a self-report risk index developed using an evidence-based medicine approach to measure risk of Alzheimer's disease (AD). We aimed to evaluate the extent to which the ANU-ADRI can predict the risk of AD in older adults and to compare the ANU-ADRI to the dementia risk index developed from the Cardiovascular Risk Factors, Aging and Dementia (CAIDE) study for middle-aged cohorts.

Methods

This study included three validation cohorts, i.e., the Rush Memory and Aging Study (MAP) (n = 903, age ≥53 years), the Kungsholmen Project (KP) (n = 905, age ≥75 years), and the Cardiovascular Health Cognition Study (CVHS) (n = 2496, age ≥65 years) that were each followed for dementia. Baseline data were collected on exposure to the 15 risk factors included in the ANU-ADRI of which MAP had 10, KP had 8 and CVHS had 9. Risk scores and C-statistics were computed for individual participants for the ANU-ADRI and the CAIDE index.

Results

For the ANU-ADRI using available data, the MAP study c-statistic was 0·637 (95% CI 0·596-0·678), for the KP study it was 0·740 (0·712-0·768) and for the CVHS it was 0·733 (0·691-0·776) for predicting AD. When a common set of risk and protective factors were used c-statistics were 0.689 (95% CI 0.650-0.727), 0.666 (0.628-0.704) and 0.734 (0.707-0.761) for MAP, KP and CVHS respectively. Results for CAIDE ranged from c-statistics of 0.488 (0.427-0.554) to 0.595 (0.565-0.625).

Conclusion

A composite risk score derived from the ANU-ADRI weights including 8-10 risk or protective factors is a valid, self-report tool to identify those at risk of AD and dementia. The accuracy can be further improved in studies including more risk factors and younger cohorts with long-term follow-up.",2014-01-23 +23145042,"Systematic determination of replication activity type highlights interconnections between replication, chromatin structure and nuclear localization.","DNA replication is a highly regulated process, with each genomic locus replicating at a distinct time of replication (ToR). Advances in ToR measurement technology enabled several genome-wide profiling studies that revealed tight associations between ToR and general genomic features and a remarkable ToR conservation in mammals. Genome wide studies further showed that at the hundreds kb-to-megabase scale the genome can be divided into constant ToR regions (CTRs) in which the replication process propagates at a faster pace due to the activation of multiple origins and temporal transition regions (TTRs) in which the replication process propagates at a slower pace. We developed a computational tool that assigns a ToR to every measured locus and determines its replication activity type (CTR versus TTR). Our algorithm, ARTO (Analysis of Replication Timing and Organization), uses signal processing methods to fit a constant piece-wise linear curve to the measured raw data. We tested our algorithm and provide performance and usability results. A Matlab implementation of ARTO is available at http://bioinfo.cs.technion.ac.il/people/zohar/ARTO/. Applying our algorithm to ToR data measured in multiple mouse and human samples allowed precise genome-wide ToR determination and replication activity type characterization. Analysis of the results highlighted the plasticity of the replication program. For example, we observed significant ToR differences in 10-25% of the genome when comparing different tissue types. Our analyses also provide evidence for activity type differences in up to 30% of the probes. Integration of the ToR data with multiple aspects of chromosome organization characteristics suggests that ToR plays a role in shaping the regional chromatin structure. Namely, repressive chromatin marks, are associated with late ToR both in TTRs and CTRs. Finally, characterization of the differences between TTRs and CTRs, with matching ToR, revealed that TTRs are associated with compact chromatin and are located significantly closer to the nuclear envelope. Supplementary material is available. Raw and processed data were deposited in Geo (GSE17236).",2012-11-07 +25869025,The impact of two different inspiratory to expiratory ratios (1:1 and 1:2) on respiratory mechanics and oxygenation during volume-controlled ventilation in robot-assisted laparoscopic radical prostatectomy: a randomized controlled trial.,"

Background

Volume-controlled ventilation with a prolonged inspiratory to expiratory ratio (I:E ratio) has been used to optimize gas exchange and respiratory mechanics in various surgical settings. We hypothesized that, when compared with an I:E ratio of 1:2, a prolonged I:E ratio of 1:1 would improve respiratory mechanics without reducing cardiac output (CO) during pneumoperitoneum and steep Trendelenburg positioning, both of which can impair respiratory function in robot-assisted laparoscopic radical prostatectomy. Furthermore, we evaluated its effect on oxygenation during robot-assisted laparoscopic radical prostatectomy.

Methods

Eighty patients undergoing robot-assisted laparoscopic radical prostatectomy were randomly allocated to receive an I:E ratio of either 1:1 (group 1:1) or 1:2 (group 1:2). The primary endpoint, peak airway pressure (Ppeak), as well as hemodynamic data, including cardiac output (CO) and arterial oxygen tension (PaO2), were compared between groups at four time points: ten minutes after anesthesia induction (T1), 30 and 60 min after pneumoperitoneum with steep Trendelenburg positioning (T2 and T3), and ten minutes after supine positioning (T4). Overall comparisons were made between groups using linear mixed model analysis with post hoc testing of individual time points adjusted using a Bonferroni correction.

Results

Linear mixed model analysis showed a significant overall difference in Ppeak between the two groups (P < 0.001). Post hoc analysis showed a significantly lower mean (SD) Ppeak in group 1:1 than in group 1:2 at T2 [28.4 (4.0) cm H2O vs 32.8 (5.2) cm H2O, respectively; mean difference, 4.3 cm H2O; 95% confidence interval (CI), 2.3 to 6.4; P < 0.001] and T3 [27.8 (3.9) cm H2O vs 32.6 (5.0) cm H2O, respectively; mean difference, 4.7 cm H2O; 95% CI, 2.7 to 6.7; P < 0.001]. The CO assessed over these time points was comparable in both groups (P = 0.784). In addition, there were no significant differences in PaO2 between the two groups (P = 0.521).

Conclusions

Compared with an I:E ratio of 1:2, a ratio of 1:1 lowered Ppeak without reducing CO during pneumoperitoneum and steep Trendelenburg positioning. Nevertheless, our results did not support its use solely for improving oxygenation. This trial was registered at http://clinicaltrials.gov/ (NCT01892449).",2015-04-14 +23998809,LongevityMap: a database of human genetic variants associated with longevity.,"Understanding the genetic basis of human longevity remains a challenge but could lead to life-extending interventions and better treatments for age-related diseases. Toward this end we developed the LongevityMap (http://genomics.senescence.info/longevity/), the first database of genes, loci, and variants studied in the context of human longevity and healthy ageing. We describe here its content and interface, and discuss how it can help to unravel the genetics of human longevity.",2013-08-30 +23989920,[Critical incidents in preclinical emergency airway management : Evaluation of the CIRS emergency medicine databank].,"

Background

Many patients are victims of disastrous incidents during medical interventions. One of the obligations of physicians is to identify these incidents and to subsequently develop preventive strategies in order to prevent future events. Airway management and prehospital emergency medicine are of particular interest as both categories frequently show very dynamic developments. Incidents in this particular area can lead to serious injury but at the same time it has never been analyzed what kind of incidents might harm patients during prehospital airway management.

Materials and methods

The German website http://www.cirs-notfallmedizin.de (CIRS critical incident reporting systems) offers anonymous reporting of critical incidents in prehospital emergency medicine. All incidents reported between 2005 and 2012 were screened to identify those which were concerned with airway management and four experts in this field analyzed the incidents and performed a root cause analysis.

Results

The database contained 845 reports. The authors considered 144 reports to be airway management related and identified 10 root causes: indications for intubation but no intubation performed (n = 8), no indications for intubation but intubation attempt performed (n = 7), wrong medication (n = 25), insufficient practical skills (n = 46), no use of alternative airway management (n = 7), insufficient handling before or after intubation (n = 27), defect equipment (n = 28), lack of equipment (n = 31), others (n = 18) and factors that cannot be influenced (n = 12).

Conclusions

The incidents that were reported via the website http://www.cirs-notfallmedizin.de and that occurred during airway management in prehospital emergency medicine are described. To improve practical airway management skills of emergency physicians are one of the most important tasks in order to prevent critical incidents and are discussed in the article.",2013-08-30 +25494203,Global protected area expansion is compromised by projected land-use and parochialism.,"Protected areas are one of the main tools for halting the continuing global biodiversity crisis caused by habitat loss, fragmentation and other anthropogenic pressures. According to the Aichi Biodiversity Target 11 adopted by the Convention on Biological Diversity, the protected area network should be expanded to at least 17% of the terrestrial world by 2020 (http://www.cbd.int/sp/targets). To maximize conservation outcomes, it is crucial to identify the best expansion areas. Here we show that there is a very high potential to increase protection of ecoregions and vertebrate species by expanding the protected area network, but also identify considerable risk of ineffective outcomes due to land-use change and uncoordinated actions between countries. We use distribution data for 24,757 terrestrial vertebrates assessed under the International Union for the Conservation of Nature (IUCN) 'red list of threatened species', and terrestrial ecoregions (827), modified by land-use models for the present and 2040, and introduce techniques for global and balanced spatial conservation prioritization. First, we show that with a coordinated global protected area network expansion to 17% of terrestrial land, average protection of species ranges and ecoregions could triple. Second, if projected land-use change by 2040 (ref. 11) takes place, it becomes infeasible to reach the currently possible protection levels, and over 1,000 threatened species would lose more than 50% of their present effective ranges worldwide. Third, we demonstrate a major efficiency gap between national and global conservation priorities. Strong evidence is shown that further biodiversity loss is unavoidable unless international action is quickly taken to balance land-use and biodiversity conservation. The approach used here can serve as a framework for repeatable and quantitative assessment of efficiency, gaps and expansion of the global protected area network globally, regionally and nationally, considering current and projected land-use pressures.",2014-11-14 +23435658,BLIND: a set of semantic feature norms from the congenitally blind.,"Feature-based descriptions of concepts produced by subjects in a property generation task are widely used in cognitive science to develop empirically grounded concept representations and to study systematic trends in such representations. This article introduces BLIND, a collection of parallel semantic norms collected from a group of congenitally blind Italian subjects and comparable sighted subjects. The BLIND norms comprise descriptions of 50 nouns and 20 verbs. All the materials have been semantically annotated and translated into English, to make them easily accessible to the scientific community. The article also presents a preliminary analysis of the BLIND data that highlights both the large degree of overlap between the groups and interesting differences. The complete BLIND norms are freely available and can be downloaded from http://sesia.humnet.unipi.it/blind_data .",2013-12-01 +23373753,MyCompoundID: using an evidence-based metabolome library for metabolite identification.,"Identification of unknown metabolites is a major challenge in metabolomics. Without the identities of the metabolites, the metabolome data generated from a biological sample cannot be readily linked with the proteomic and genomic information for studies in systems biology and medicine. We have developed a web-based metabolite identification tool ( http://www.mycompoundid.org ) that allows searching and interpreting mass spectrometry (MS) data against a newly constructed metabolome library composed of 8,021 known human endogenous metabolites and their predicted metabolic products (375,809 compounds from one metabolic reaction and 10,583,901 from two reactions). As an example, in the analysis of a simple extract of human urine or plasma and the whole human urine by liquid chromatography-mass spectrometry and MS/MS, we are able to identify at least two times more metabolites in these samples than by using a standard human metabolome library. In addition, it is shown that the evidence-based metabolome library (EML) provides a much superior performance in identifying putative metabolites from a human urine sample, compared to the use of the ChemPub and KEGG libraries.",2013-03-06 +24170674,Structure-based barcoding of proteins.,"A reduced representation in the format of a barcode has been developed to provide an overview of the topological nature of a given protein structure from 3D coordinate file. The molecular structure of a protein coordinate file from Protein Data Bank is first expressed in terms of an alpha-numero code and further converted to a barcode image. The barcode representation can be used to compare and contrast different proteins based on their structure. The utility of this method has been exemplified by comparing structural barcodes of proteins that belong to same fold family, and across different folds. In addition to this, we have attempted to provide an illustration to (i) the structural changes often seen in a given protein molecule upon interaction with ligands and (ii) Modifications in overall topology of a given protein during evolution. The program is fully downloadable from the website http://www.iitg.ac.in/probar/.",2014-01-01 +24282333,Fibrinogen species as resolved by HPLC-SAXS data processing within the UltraScan Solution Modeler (US-SOMO) enhanced SAS module.,"Fibrinogen is a large heterogeneous aggregation/degradation-prone protein playing a central role in blood coagulation and associated pathologies, whose structure is not completely resolved. When a high-molecular-weight fraction was analyzed by size-exclusion high-performance liquid chromatography/small-angle X-ray scattering (HPLC-SAXS), several composite peaks were apparent and because of the stickiness of fibrinogen the analysis was complicated by severe capillary fouling. Novel SAS analysis tools developed as a part of the UltraScan Solution Modeler (US-SOMO; http://somo.uthscsa.edu/), an open-source suite of utilities with advanced graphical user interfaces whose initial goal was the hydrodynamic modeling of biomacromolecules, were implemented and applied to this problem. They include the correction of baseline drift due to the accumulation of material on the SAXS capillary walls, and the Gaussian decomposition of non-baseline-resolved HPLC-SAXS elution peaks. It was thus possible to resolve at least two species co-eluting under the fibrinogen main monomer peak, probably resulting from in-column degradation, and two others under an oligomers peak. The overall and cross-sectional radii of gyration, molecular mass and mass/length ratio of all species were determined using the manual or semi-automated procedures available within the US-SOMO SAS module. Differences between monomeric species and linear and sideways oligomers were thus identified and rationalized. This new US-SOMO version additionally contains several computational and graphical tools, implementing functionalities such as the mapping of residues contributing to particular regions of P(r), and an advanced module for the comparison of primary I(q) versus q data with model curves computed from atomic level structures or bead models. It should be of great help in multi-resolution studies involving hydrodynamics, solution scattering and crystallographic/NMR data.",2013-11-15 +22073191,NNAlign: a web-based prediction method allowing non-expert end-user discovery of sequence motifs in quantitative peptide data.,"Recent advances in high-throughput technologies have made it possible to generate both gene and protein sequence data at an unprecedented rate and scale thereby enabling entirely new ""omics""-based approaches towards the analysis of complex biological processes. However, the amount and complexity of data that even a single experiment can produce seriously challenges researchers with limited bioinformatics expertise, who need to handle, analyze and interpret the data before it can be understood in a biological context. Thus, there is an unmet need for tools allowing non-bioinformatics users to interpret large data sets. We have recently developed a method, NNAlign, which is generally applicable to any biological problem where quantitative peptide data is available. This method efficiently identifies underlying sequence patterns by simultaneously aligning peptide sequences and identifying motifs associated with quantitative readouts. Here, we provide a web-based implementation of NNAlign allowing non-expert end-users to submit their data (optionally adjusting method parameters), and in return receive a trained method (including a visual representation of the identified motif) that subsequently can be used as prediction method and applied to unknown proteins/peptides. We have successfully applied this method to several different data sets including peptide microarray-derived sets containing more than 100,000 data points. NNAlign is available online at http://www.cbs.dtu.dk/services/NNAlign.",2011-11-02 +24493034,SegAnnDB: interactive Web-based genomic segmentation.,"

Motivation

DNA copy number profiles characterize regions of chromosome gains, losses and breakpoints in tumor genomes. Although many models have been proposed to detect these alterations, it is not clear which model is appropriate before visual inspection the signal, noise and models for a particular profile.

Results

We propose SegAnnDB, a Web-based computer vision system for genomic segmentation: first, visually inspect the profiles and manually annotate altered regions, then SegAnnDB determines the precise alteration locations using a mathematical model of the data and annotations. SegAnnDB facilitates collaboration between biologists and bioinformaticians, and uses the University of California, Santa Cruz genome browser to visualize copy number alterations alongside known genes.

Availability and implementation

The breakpoints project on INRIA GForge hosts the source code, an Amazon Machine Image can be launched and a demonstration Web site is http://bioviz.rocq.inria.fr.",2014-02-03 +23985157,MONA - Interactive manipulation of molecule collections.,": Working with small-molecule datasets is a routine task for cheminformaticians and chemists. The analysis and comparison of vendor catalogues and the compilation of promising candidates as starting points for screening campaigns are but a few very common applications. The workflows applied for this purpose usually consist of multiple basic cheminformatics tasks such as checking for duplicates or filtering by physico-chemical properties. Pipelining tools allow to create and change such workflows without much effort, but usually do not support interventions once the pipeline has been started. In many contexts, however, the best suited workflow is not known in advance, thus making it necessary to take the results of the previous steps into consideration before proceeding.To support intuition-driven processing of compound collections, we developed MONA, an interactive tool that has been designed to prepare and visualize large small-molecule datasets. Using an SQL database common cheminformatics tasks such as analysis and filtering can be performed interactively with various methods for visual support. Great care was taken in creating a simple, intuitive user interface which can be instantly used without any setup steps. MONA combines the interactivity of molecule database systems with the simplicity of pipelining tools, thus enabling the case-to-case application of chemistry expert knowledge. The current version is available free of charge for academic use and can be downloaded at http://www.zbh.uni-hamburg.de/mona.",2013-08-28 +22792381,ROCS: receiver operating characteristic surface for class-skewed high-throughput data.,"The receiver operating characteristic (ROC) curve is an important tool to gauge the performance of classifiers. In certain situations of high-throughput data analysis, the data is heavily class-skewed, i.e. most features tested belong to the true negative class. In such cases, only a small portion of the ROC curve is relevant in practical terms, rendering the ROC curve and its area under the curve (AUC) insufficient for the purpose of judging classifier performance. Here we define an ROC surface (ROCS) using true positive rate (TPR), false positive rate (FPR), and true discovery rate (TDR). The ROC surface, together with the associated quantities, volume under the surface (VUS) and FDR-controlled area under the ROC curve (FCAUC), provide a useful approach for gauging classifier performance on class-skewed high-throughput data. The implementation as an R package is available at http://userwww.service.emory.edu/~tyu8/ROCS/.",2012-07-06 +23634657,A protocol for a systematic review on the impact of unpublished studies and studies published in the gray literature in meta-analyses.,"

Background

Meta-analyses are particularly vulnerable to the effects of publication bias. Despite methodologists' best efforts to locate all evidence for a given topic the most comprehensive searches are likely to miss unpublished studies and studies that are published in the gray literature only. If the results of the missing studies differ systematically from the published ones, a meta-analysis will be biased with an inaccurate assessment of the intervention's effects.As part of the OPEN project (http://www.open-project.eu) we will conduct a systematic review with the following objectives:▪ To assess the impact of studies that are not published or published in the gray literature on pooled effect estimates in meta-analyses (quantitative measure).▪ To assess whether the inclusion of unpublished studies or studies published in the gray literature leads to different conclusions in meta-analyses (qualitative measure).

Methods/design

Inclusion criteria

Methodological research projects of a cohort of meta-analyses which compare the effect of the inclusion or exclusion of unpublished studies or studies published in the gray literature.

Literature search

To identify relevant research projects we will conduct electronic searches in Medline, Embase and The Cochrane Library; check reference lists; and contact experts.

Outcomes

1) The extent to which the effect estimate in a meta-analyses changes with the inclusion or exclusion of studies that were not published or published in the gray literature; and 2) the extent to which the inclusion of unpublished studies impacts the meta-analyses' conclusions.

Data collection

Information will be collected on the area of health care; the number of meta-analyses included in the methodological research project; the number of studies included in the meta-analyses; the number of study participants; the number and type of unpublished studies; studies published in the gray literature and published studies; the sources used to retrieve studies that are unpublished, published in the gray literature, or commercially published; and the validity of the methodological research project.

Data synthesis

DATA SYNTHESIS will involve descriptive and statistical summaries of the findings of the included methodological research projects.

Discussion

Results are expected to be publicly available in the middle of 2013.",2013-05-02 +25170025,Cell population identification using fluorescence-minus-one controls with a one-class classifying algorithm.,"

Motivation

The tried and true approach of flow cytometry data analysis is to manually gate on each biomarker separately, which is feasible for a small number of biomarkers, e.g. less than five. However, this rapidly becomes confusing as the number of biomarker increases. Furthermore, multivariate structure is not taken into account. Recently, automated gating algorithms have been implemented, all of which rely on unsupervised learning methodology. However, all unsupervised learning outputs suffer the same difficulties in validation in the absence of external knowledge, regardless of application domain.

Results

We present a new semi-automated algorithm for population discovery that is based on comparison to fluorescence-minus-one controls, thus transferring the problem into that of one-class classification, as opposed to being an unsupervised learning problem. The novel one-class classification algorithm is based on common principal components and can accommodate complex mixtures of multivariate densities. Computational time is short, and the simple nature of the calculations means the algorithm can easily be adapted to process large numbers of cells (10(6)). Furthermore, we are able to find rare cell populations as well as populations with low biomarker concentration, both of which are inherently hard to do in an unsupervised learning context without prior knowledge of the samples' composition.

Availability and implementation

R scripts are available via https://fccf.mpiib-berlin.mpg.de/daten/drfz/bioinformatics/with{username,password}={bioinformatics,Sar=Gac4}.",2014-08-27 +24782517,NEP: web server for epitope prediction based on antibody neutralization of viral strains with diverse sequences.,"Delineation of the antigenic site, or epitope, recognized by an antibody can provide clues about functional vulnerabilities and resistance mechanisms, and can therefore guide antibody optimization and epitope-based vaccine design. Previously, we developed an algorithm for antibody-epitope prediction based on antibody neutralization of viral strains with diverse sequences and validated the algorithm on a set of broadly neutralizing HIV-1 antibodies. Here we describe the implementation of this algorithm, NEP (Neutralization-based Epitope Prediction), as a web-based server. The users must supply as input: (i) an alignment of antigen sequences of diverse viral strains; (ii) neutralization data for the antibody of interest against the same set of antigen sequences; and (iii) (optional) a structure of the unbound antigen, for enhanced prediction accuracy. The prediction results can be downloaded or viewed interactively on the antigen structure (if supplied) from the web browser using a JSmol applet. Since neutralization experiments are typically performed as one of the first steps in the characterization of an antibody to determine its breadth and potency, the NEP server can be used to predict antibody-epitope information at no additional experimental costs. NEP can be accessed on the internet at http://exon.niaid.nih.gov/nep.",2014-04-29 +22155946,GWAtoolbox: an R package for fast quality control and handling of genome-wide association studies meta-analysis data.,"

Summary

The GWAtoolbox is an R package that standardizes and accelerates the handling of data from genome-wide association studies (GWAS), particularly in the context of large-scale GWAS meta-analyses. A key feature of GWAtoolbox is its ability to perform quality control (QC) of any number of files in a matter of minutes. The implemented workflow has been structured to check three particular data quality aspects: (i) data formatting, (ii) quality of the GWAS results and (iii) data consistency across studies. Output consists of an extensive list of quality statistics and plots which allow inspection of individual files and between-study comparison to identify systematic bias.

Availability

http://www.eurac.edu/GWAtoolbox

Contact

cfuchsb@umich.edu; daniel.taliun@eurac.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-08 +23988461,An evidence-based clinical guideline for antibiotic prophylaxis in spine surgery.,"

Background context

The North American Spine Society's (NASS) Evidence-Based Clinical Guideline on Antibiotic Prophylaxis in Spine Surgery provides evidence-based recommendations to address key clinical questions regarding the efficacy and the appropriate antibiotic prophylaxis protocol to prevent surgical site infections in patients undergoing spine surgery. The guideline is intended to address these questions based on the highest quality clinical literature available on this subject as of June 2011.

Purpose

Provide an evidence-based educational tool to assist spine surgeons in preventing surgical site infections.

Study design

Systematic review and evidence-based clinical guideline.

Methods

This guideline is a product of the Antibiotic Prophylaxis in Spine Surgery Work Group of NASS Evidence-Based Guideline Development Committee. The work group consisted of neurosurgeons and orthopedic surgeons who specialize in spine surgery and are trained in the principles of evidence-based analysis. A literature search addressing each question and using a specific search protocol was performed on English language references found in MEDLINE (PubMed), ACP Journal Club, Cochrane Database of Systematic Reviews Database of Abstracts of Reviews of Effectiveness, Cochrane Central Register of Controlled Trials, EMBASE (Drugs and Pharmacology), and Web of Science to identify articles published since the search performed for the original guideline. The relevant literature was then independently rated using the NASS-adopted standardized levels of evidence. An evidentiary table was created for each of the questions. Final recommendations to answer each clinical question were developed via work group discussion, and grades were assigned to the recommendations using standardized grades of recommendation. In the absence of Levels I to IV evidence, work group consensus statements have been developed using a modified nominal group technique, and these statements are clearly identified as such in the guideline.

Results

Sixteen clinical questions were formulated and addressed, and the answers are summarized in this article. The respective recommendations were graded by the strength of the supporting literature, which was stratified by levels of evidence.

Conclusions

The clinical guideline for antibiotic prophylaxis in spine surgery has been created using the techniques of evidence-based medicine and best available evidence to aid practitioners in the care of patients undergoing spine surgery. The entire guideline document, including the evidentiary tables, suggestions for future research, and all the references, is available electronically on the NASS Web site at http://www.spine.org/Pages/PracticePolicy/ClinicalCare/ClinicalGuidlines/Default.aspx and will remain updated on a timely schedule.",2013-08-27 +25286919,Computing autocatalytic sets to unravel inconsistencies in metabolic network reconstructions.,"

Motivation

Genome-scale metabolic network reconstructions have been established as a powerful tool for the prediction of cellular phenotypes and metabolic capabilities of organisms. In recent years, the number of network reconstructions has been constantly increasing, mostly because of the availability of novel (semi-)automated procedures, which enabled the reconstruction of metabolic models based on individual genomes and their annotation. The resulting models are widely used in numerous applications. However, the accuracy and predictive power of network reconstructions are commonly limited by inherent inconsistencies and gaps.

Results

Here we present a novel method to validate metabolic network reconstructions based on the concept of autocatalytic sets. Autocatalytic sets correspond to collections of metabolites that, besides enzymes and a growth medium, are required to produce all biomass components in a metabolic model. These autocatalytic sets are well-conserved across all domains of life, and their identification in specific genome-scale reconstructions allows us to draw conclusions about potential inconsistencies in these models. The method is capable of detecting inconsistencies, which are neglected by other gap-finding methods. We tested our method on the Model SEED, which is the largest repository for automatically generated genome-scale network reconstructions. In this way, we were able to identify a significant number of missing pathways in several of these reconstructions. Hence, the method we report represents a powerful tool to identify inconsistencies in large-scale metabolic networks.

Availability and implementation

The method is available as source code on http://users.minet.uni-jena.de/∼m3kach/ASBIG/ASBIG.zip.

Contact

christoph.kaleta@uni-jena.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-10-05 +28825060,Traversing the k-mer Landscape of NGS Read Datasets for Quality Score Sparsification.,"It is becoming increasingly impractical to indefinitely store raw sequencing data for later processing in an uncompressed state. In this paper, we describe a scalable compressive framework, Read-Quality-Sparsifier (RQS), which substantially outperforms the compression ratio and speed of other de novo quality score compression methods while maintaining SNP-calling accuracy. Surprisingly, RQS also improves the SNP-calling accuracy on a gold-standard, real-life sequencing dataset (NA12878) using a k-mer density profile constructed from 77 other individuals from the 1000 Genomes Project. This improvement in downstream accuracy emerges from the observation that quality score values within NGS datasets are inherently encoded in the k-mer landscape of the genomic sequences. To our knowledge, RQS is the first scalable sequence based quality compression method that can efficiently compress quality scores of terabyte-sized and larger sequencing datasets.

Availability

An implementation of our method, RQS, is available for download at: http://rqs.csail.mit.edu/.",2014-04-01 +24171431,Nonlinear scoring functions for similarity-based ligand docking and binding affinity prediction.,"A common strategy for virtual screening considers a systematic docking of a large library of organic compounds into the target sites in protein receptors with promising leads selected based on favorable intermolecular interactions. Despite a continuous progress in the modeling of protein-ligand interactions for pharmaceutical design, important challenges still remain, thus the development of novel techniques is required. In this communication, we describe eSimDock, a new approach to ligand docking and binding affinity prediction. eSimDock employs nonlinear machine learning-based scoring functions to improve the accuracy of ligand ranking and similarity-based binding pose prediction, and to increase the tolerance to structural imperfections in the target structures. In large-scale benchmarking using the Astex/CCDC data set, we show that 53.9% (67.9%) of the predicted ligand poses have RMSD of <2 Å (<3 Å). Moreover, using binding sites predicted by recently developed eFindSite, eSimDock models ligand binding poses with an RMSD of 4 Å for 50.0-39.7% of the complexes at the protein homology level limited to 80-40%. Simulations against non-native receptor structures, whose mean backbone rearrangements vary from 0.5 to 5.0 Å Cα-RMSD, show that the ratio of docking accuracy and the estimated upper bound is at a constant level of ∼0.65. Pearson correlation coefficient between experimental and predicted by eSimDock Ki values for a large data set of the crystal structures of protein-ligand complexes from BindingDB is 0.58, which decreases only to 0.46 when target structures distorted to 3.0 Å Cα-RMSD are used. Finally, two case studies demonstrate that eSimDock can be customized to specific applications as well. These encouraging results show that the performance of eSimDock is largely unaffected by the deformations of ligand binding regions, thus it represents a practical strategy for across-proteome virtual screening using protein models. eSimDock is freely available to the academic community as a Web server at http://www.brylinski.org/esimdock .",2013-11-11 +23691189,AliquotG: an improved heuristic algorithm for genome aliquoting.,"An extant genome can be the descendant of an ancient polyploid genome. The genome aliquoting problem is to reconstruct the latter from the former such that the rearrangement distance (i.e., the number of genome rearrangements necessary to transform the former into the latter) is minimal. Though several heuristic algorithms have been published, here, we sought improved algorithms for the problem with respect to the double cut and join (DCJ) distance. The new algorithm makes use of partial and contracted partial graphs, and locally minimizes the distance. Our test results with simulation data indicate that it reliably recovers gene order of the ancestral polyploid genome even when the ancestor is ancient. We also compared the performance of our method with an earlier method using simulation data sets and found that our algorithm has higher accuracy. It is known that vertebrates had undergone two rounds of whole-genome duplication (2R-WGD) during early vertebrate evolution. We used the new algorithm to calculate the DCJ distance between three modern vertebrate genomes and their 2R-WGD ancestor and found that the rearrangement rate might have slowed down significantly since the 2R-WGD. The software AliquotG implementing the algorithm is available as an open-source package from our website (http://mosas.sysu.edu.cn/genome/download_softwares.php).",2013-05-14 +23329413,Grape RNA-Seq analysis pipeline environment.,"

Motivation

The avalanche of data arriving since the development of NGS technologies have prompted the need for developing fast, accurate and easily automated bioinformatic tools capable of dealing with massive datasets. Among the most productive applications of NGS technologies is the sequencing of cellular RNA, known as RNA-Seq. Although RNA-Seq provides similar or superior dynamic range than microarrays at similar or lower cost, the lack of standard and user-friendly pipelines is a bottleneck preventing RNA-Seq from becoming the standard for transcriptome analysis.

Results

In this work we present a pipeline for processing and analyzing RNA-Seq data, that we have named Grape (Grape RNA-Seq Analysis Pipeline Environment). Grape supports raw sequencing reads produced by a variety of technologies, either in FASTA or FASTQ format, or as prealigned reads in SAM/BAM format. A minimal Grape configuration consists of the file location of the raw sequencing reads, the genome of the species and the corresponding gene and transcript annotation. Grape first runs a set of quality control steps, and then aligns the reads to the genome, a step that is omitted for prealigned read formats. Grape next estimates gene and transcript expression levels, calculates exon inclusion levels and identifies novel transcripts. Grape can be run on a single computer or in parallel on a computer cluster. It is distributed with specific mapping and quantification tools, but given its modular design, any tool supporting popular data interchange formats can be integrated.

Availability

Grape can be obtained from the Bioinformatics and Genomics website at: http://big.crg.cat/services/grape.",2013-01-17 +24607570,MetaCAA: A clustering-aided methodology for efficient assembly of metagenomic datasets.,"A key challenge in analyzing metagenomics data pertains to assembly of sequenced DNA fragments (i.e. reads) originating from various microbes in a given environmental sample. Several existing methodologies can assemble reads originating from a single genome. However, these methodologies cannot be applied for efficient assembly of metagenomic sequence datasets. In this study, we present MetaCAA - a clustering-aided methodology which helps in improving the quality of metagenomic sequence assembly. MetaCAA initially groups sequences constituting a given metagenome into smaller clusters. Subsequently, sequences in each cluster are independently assembled using CAP3, an existing single genome assembly program. Contigs formed in each of the clusters along with the unassembled reads are then subjected to another round of assembly for generating the final set of contigs. Validation using simulated and real-world metagenomic datasets indicates that MetaCAA aids in improving the overall quality of assembly. A software implementation of MetaCAA is available at https://metagenomics.atc.tcs.com/MetaCAA.",2014-02-01 +24469285,[Learning from regional differences: online platform: http://www.versorgungsatlas.de].,"In 2011, the Central Research Institute of Ambulatory Health Care in Germany (ZI) published the website http://www.versorgungsatlas.de, a portal that presents research results from regional health services in Germany. The Web portal provides a publicly accessible source of information and a growing number of selected analyses focusing on regional variation in health care. Each topic is presented in terms of interactive maps, tables, and diagrams and is supplemented by a paper that examines the results in detail and provides an explanation of the findings. The portal has been designed to provide a forum on which health service researchers can publish their results derived from various data sources of different institutions in Germany and can comment on results already available on http://www.versorgungsatlas.de. For health policy actors, the discussion of regional differences offers a new, previously unavailable basis for determining the region-specific treatment needs and for providing health-care management with the goal of high-quality care for each resident.",2014-02-01 +22622817,Biomedical application of fuzzy association rules for identifying breast cancer biomarkers.,"Current breast cancer research involves the study of many different prognosis factors: primary tumor size, lymph node status, tumor grade, tumor receptor status, p53, and ki67 levels, among others. High-throughput microarray technologies are allowing to better understand and identify prognostic factors in breast cancer. But the massive amounts of data derived from these technologies require the use of efficient computational techniques to unveil new and relevant biomedical knowledge. Furthermore, integrative tools are needed that effectively combine heterogeneous types of biomedical data, such as prognosis factors and expression data. The objective of this study was to integrate information from the main prognostic factors in breast cancer with whole-genome microarray data to identify potential associations among them. We propose the application of a data mining approach, called fuzzy association rule mining, to automatically unveil these associations. This paper describes the proposed methodology and illustrates how it can be applied to different breast cancer datasets. The obtained results support known associations involving the number of copies of chromosome-17, HER2 amplification, or the expression level of estrogen and progesterone receptors in breast cancer patients. They also confirm the correspondence between the HER2 status predicted by different testing methodologies (immunohistochemistry and fluorescence in situ hybridization). In addition, other interesting rules involving CDC6, SOX11, and EFEMP1 genes are identified, although further detailed studies are needed to statistically confirm these findings. As part of this study, a web platform implementing the fuzzy association rule mining approach has been made freely available at: http://www.genome2.ugr.es/biofar .",2012-05-24 +21362185,CSI-OMIM--Clinical Synopsis Search in OMIM.,"

Background

The OMIM database is a tool used daily by geneticists. Syndrome pages include a Clinical Synopsis section containing a list of known phenotypes comprising a clinical syndrome. The phenotypes are in free text and different phrases are often used to describe the same phenotype, the differences originating in spelling variations or typing errors, varying sentence structures and terminological variants.These variations hinder searching for syndromes or using the large amount of phenotypic information for research purposes. In addition, negation forms also create false positives when searching the textual description of phenotypes and induce noise in text mining applications.

Description

Our method allows efficient and complete search of OMIM phenotypes as well as improved data-mining of the OMIM phenome. Applying natural language processing, each phrase is tagged with additional semantic information using UMLS and MESH. Using a grammar based method, annotated phrases are clustered into groups denoting similar phenotypes. These groups of synonymous expressions enable precise search, as query terms can be matched with the many variations that appear in OMIM, while avoiding over-matching expressions that include the query term in a negative context. On the basis of these clusters, we computed pair-wise similarity among syndromes in OMIM. Using this new similarity measure, we identified 79,770 new connections between syndromes, an average of 16 new connections per syndrome. Our project is Web-based and available at http://fohs.bgu.ac.il/s2g/csiomim

Conclusions

The resulting enhanced search functionality provides clinicians with an efficient tool for diagnosis. This search application is also used for finding similar syndromes for the candidate gene prioritization tool S2G.The enhanced OMIM database we produced can be further used for bioinformatics purposes such as linking phenotypes and genes based on syndrome similarities and the known genes in Morbidmap.",2011-03-01 +24848019,LocTree3 prediction of localization.,"The prediction of protein sub-cellular localization is an important step toward elucidating protein function. For each query protein sequence, LocTree2 applies machine learning (profile kernel SVM) to predict the native sub-cellular localization in 18 classes for eukaryotes, in six for bacteria and in three for archaea. The method outputs a score that reflects the reliability of each prediction. LocTree2 has performed on par with or better than any other state-of-the-art method. Here, we report the availability of LocTree3 as a public web server. The server includes the machine learning-based LocTree2 and improves over it through the addition of homology-based inference. Assessed on sequence-unique data, LocTree3 reached an 18-state accuracy Q18=80±3% for eukaryotes and a six-state accuracy Q6=89±4% for bacteria. The server accepts submissions ranging from single protein sequences to entire proteomes. Response time of the unloaded server is about 90 s for a 300-residue eukaryotic protein and a few hours for an entire eukaryotic proteome not considering the generation of the alignments. For over 1000 entirely sequenced organisms, the predictions are directly available as downloads. The web server is available at http://www.rostlab.org/services/loctree3.",2014-05-21 +22451270,"ADMIT: a toolbox for guaranteed model invalidation, estimation and qualitative-quantitative modeling.","

Summary

Often competing hypotheses for biochemical networks exist in the form of different mathematical models with unknown parameters. Considering available experimental data, it is then desired to reject model hypotheses that are inconsistent with the data, or to estimate the unknown parameters. However, these tasks are complicated because experimental data are typically sparse, uncertain, and are frequently only available in form of qualitative if-then observations. ADMIT (Analysis, Design and Model Invalidation Toolbox) is a MatLab(TM)-based tool for guaranteed model invalidation, state and parameter estimation. The toolbox allows the integration of quantitative measurement data, a priori knowledge of parameters and states, and qualitative information on the dynamic or steady-state behavior. A constraint satisfaction problem is automatically generated and algorithms are implemented for solving the desired estimation, invalidation or analysis tasks. The implemented methods built on convex relaxation and optimization and therefore provide guaranteed estimation results and certificates for invalidity.

Availability

ADMIT, tutorials and illustrative examples are available free of charge for non-commercial use at http://ifatwww.et.uni-magdeburg.de/syst/ADMIT/",2012-03-25 +25142064,IRBIS: a systematic search for conserved complementarity.,"IRBIS is a computational pipeline for detecting conserved complementary regions in unaligned orthologous sequences. Unlike other methods, it follows the ""first-fold-then-align"" principle in which all possible combinations of complementary k-mers are searched for simultaneous conservation. The novel trimming procedure reduces the size of the search space and improves the performance to the point where large-scale analyses of intra- and intermolecular RNA-RNA interactions become possible. In this article, I provide a rigorous description of the method, benchmarking on simulated and real data, and a set of stringent predictions of intramolecular RNA structure in placental mammals, drosophilids, and nematodes. I discuss two particular cases of long-range RNA structures that are likely to have a causal effect on single- and multiple-exon skipping, one in the mammalian gene Dystonin and the other in the insect gene Ca-α1D. In Dystonin, one of the two complementary boxes contains a binding site of Rbfox protein similar to one recently described in Enah gene. I also report that snoRNAs and long noncoding RNAs (lncRNAs) have a high capacity of base-pairing to introns of protein-coding genes, suggesting possible involvement of these transcripts in splicing regulation. I also find that conserved sequences that occur equally likely on both strands of DNA (e.g., transcription factor binding sites) contribute strongly to the false-discovery rate and, therefore, would confound every such analysis. IRBIS is an open-source software that is available at http://genome.crg.es/~dmitri/irbis/.",2014-08-20 +24889180,"Large-scale gene expression profiling data for the model moss Physcomitrella patens aid understanding of developmental progression, culture and stress conditions.","The moss Physcomitrella patens is an important model organism for studying plant evolution, development, physiology and biotechnology. Here we have generated microarray gene expression data covering the principal developmental stages, culture forms and some environmental/stress conditions. Example analyses of developmental stages and growth conditions as well as abiotic stress treatments demonstrate that (i) growth stage is dominant over culture conditions, (ii) liquid culture is not stressful for the plant, (iii) low pH might aid protoplastation by reduced expression of cell wall structure genes, (iv) largely the same gene pool mediates response to dehydration and rehydration, and (v) AP2/EREBP transcription factors play important roles in stress response reactions. With regard to the AP2 gene family, phylogenetic analysis and comparison with Arabidopsis thaliana shows commonalities as well as uniquely expressed family members under drought, light perturbations and protoplastation. Gene expression profiles for P. patens are available for the scientific community via the easy-to-use tool at https://www.genevestigator.com. By providing large-scale expression profiles, the usability of this model organism is further enhanced, for example by enabling selection of control genes for quantitative real-time PCR. Now, gene expression levels across a broad range of conditions can be accessed online for P. patens.",2014-07-09 +22548786,FluxMap: a VANTED add-on for the visual exploration of flux distributions in biological networks.,"

Background

The quantification of metabolic fluxes is gaining increasing importance in the analysis of the metabolic behavior of biological systems such as organisms, tissues or cells. Various methodologies (wetlab or drylab) result in sets of fluxes which require an appropriate visualization for interpretation by scientists. The visualization of flux distributions is a necessary prerequisite for intuitive flux data exploration in the context of metabolic networks.

Results

We present FluxMap, a tool for the advanced visualization and exploration of flux data in the context of metabolic networks. The template-based flux data import assigns flux values and optional quality parameters (e. g. the confidence interval) to biochemical reactions. It supports the discrimination between mass and substance fluxes, such as C- or N-fluxes. After import, flux data mapping and network-based visualization allow the interactive exploration of the dataset. Various visualization options enable the user to adapt layout and network representation according to individual purposes.

Conclusions

The Vanted add-on FluxMap comprises a comprehensive set of functionalities for visualization and advanced visual exploration of flux distributions in biological networks. It is available as a Java open source tool from http://www.vanted.org/fluxmap.",2012-05-01 +22621308,Towards the identification of protein complexes and functional modules by integrating PPI network and gene expression data.,"

Background

Identification of protein complexes and functional modules from protein-protein interaction (PPI) networks is crucial to understanding the principles of cellular organization and predicting protein functions. In the past few years, many computational methods have been proposed. However, most of them considered the PPI networks as static graphs and overlooked the dynamics inherent within these networks. Moreover, few of them can distinguish between protein complexes and functional modules.

Results

In this paper, a new framework is proposed to distinguish between protein complexes and functional modules by integrating gene expression data into protein-protein interaction (PPI) data. A series of time-sequenced subnetworks (TSNs) is constructed according to the time that the interactions were activated. The algorithm TSN-PCD was then developed to identify protein complexes from these TSNs. As protein complexes are significantly related to functional modules, a new algorithm DFM-CIN is proposed to discover functional modules based on the identified complexes. The experimental results show that the combination of temporal gene expression data with PPI data contributes to identifying protein complexes more precisely. A quantitative comparison based on f-measure reveals that our algorithm TSN-PCD outperforms the other previous protein complex discovery algorithms. Furthermore, we evaluate the identified functional modules by using ""Biological Process"" annotated in GO (Gene Ontology). The validation shows that the identified functional modules are statistically significant in terms of ""Biological Process"". More importantly, the relationship between protein complexes and functional modules are studied.

Conclusions

The proposed framework based on the integration of PPI data and gene expression data makes it possible to identify protein complexes and functional modules more effectively. Moveover, the proposed new framework and algorithms can distinguish between protein complexes and functional modules. Our findings suggest that functional modules are closely related to protein complexes and a functional module may consist of one or multiple protein complexes. The program is available at http://netlab.csu.edu.cn/bioinfomatics/limin/DFM-CIN/index.html.",2012-05-23 +23243572,Accelerating mesh-based Monte Carlo method on modern CPU architectures.,"In this report, we discuss the use of contemporary ray-tracing techniques to accelerate 3D mesh-based Monte Carlo photon transport simulations. Single Instruction Multiple Data (SIMD) based computation and branch-less design are exploited to accelerate ray-tetrahedron intersection tests and yield a 2-fold speed-up for ray-tracing calculations on a multi-core CPU. As part of this work, we have also studied SIMD-accelerated random number generators and math functions. The combination of these techniques achieved an overall improvement of 22% in simulation speed as compared to using a non-SIMD implementation. We applied this new method to analyze a complex numerical phantom and both the phantom data and the improved code are available as open-source software at http://mcx.sourceforge.net/mmc/.",2012-11-12 +24209909,Prediction of microRNA-regulated protein interaction pathways in Arabidopsis using machine learning algorithms.,"MicroRNAs are small, endogenous RNAs found in many different species and are known to have an influence on diverse biological phenomena. They also play crucial roles in plant biological processes, such as metabolism, leaf sidedness and flower development. However, the functional roles of most microRNAs are still unknown. The identification of closely related microRNAs and target genes can be an essential first step towards the discovery of their combinatorial effects on different cellular states. A lot of research has tried to discover microRNAs and target gene interactions by implementing machine learning classifiers with target prediction algorithms. However, high rates of false positives have been reported as a result of undetermined factors which will affect recognition. Therefore, integrating diverse techniques could improve the prediction. In this paper we propose identifying microRNAs target of Arabidopsis thaliana by integrating prediction scores from PITA, miRanda and RNAHybrid algorithms used as a feature vector of microRNA-target interactions, and then implementing SVM, random forest tree and neural network machine learning algorithms to make final predictions by majority voting. Furthermore, microRNA target genes are linked with their protein-protein interaction (PPI) partners. We focus on plant resistance genes and transcription factor information to provide new insights into plant pathogen interaction networks. Downstream pathways are characterized by the Jaccard coefficient, which is implemented based on Gene Ontology. The database is freely accessible at http://ppi.bioinfo.asia.edu.tw/At_miRNA/.",2013-08-22 +24425099,"Temporal trends in phthalate exposures: findings from the National Health and Nutrition Examination Survey, 2001-2010.","

Background

Phthalates are ubiquitous environmental contaminants. Because of potential adverse effects on human health, butylbenzyl phthalate [BBzP; metabolite, monobenzyl phthalate (MBzP)], di-n-butyl phthalate [DnBP; metabolite, mono-n-butyl phthalate (MnBP)], and di(2-ethylhexyl) phthalate (DEHP) are being replaced by substitutes including other phthalates; however, little is known about consequent trends in population-level exposures.

Objective

We examined temporal trends in urinary concentrations of phthalate metabolites in the general U.S. population and whether trends vary by sociodemographic characteristics.

Methods

We combined data on 11 phthalate metabolites for 11,071 participants from five cycles of the National Health and Nutrition Examination Survey (2001-2010). Percent changes and least square geometric means (LSGMs) were calculated from multivariate regression models.

Results

LSGM concentrations of monoethyl phthalate, MnBP, MBzP, and ΣDEHP metabolites decreased between 2001-2002 and 2009-2010 [percent change (95% CI): -42% (-49, -34); -17% (-23, -9); -32% (-39, -23) and -37% (-46, -26), respectively]. In contrast, LSGM concentrations of monoisobutyl phthalate, mono(3-carboxypropyl) phthalate (MCPP), monocarboxyoctyl phthalate, and monocarboxynonyl phthalate (MCNP) increased over the study period [percent change (95% CI): 206% (178, 236); 25% (8, 45); 149% (102, 207); and 15% (1, 30), respectively]. Trends varied by subpopulations for certain phthalates. For example, LSGM concentrations of ΣDEHP metabolites, MCPP, and MCNP were higher in children than adults, but the gap between groups narrowed over time (pinteraction < 0.01).

Conclusions

Exposure of the U.S. population to phthalates has changed in the last decade. Data gaps make it difficult to explain trends, but legislative activity and advocacy campaigns by nongovernmental organizations may play a role in changing trends.

Citation

Zota AZ, Calafat AM, Woodruff TJ. 2014. Temporal trends in phthalate exposures: findings from the National Health and Nutrition Examination Survey, 2001-2010. Environ Health Perspect 122:235-241; http://dx.doi.org/10.1289/ehp.1306681.",2014-01-15 +23980025,MITIE: Simultaneous RNA-Seq-based transcript identification and quantification in multiple samples.,"

Motivation

High-throughput sequencing of mRNA (RNA-Seq) has led to tremendous improvements in the detection of expressed genes and reconstruction of RNA transcripts. However, the extensive dynamic range of gene expression, technical limitations and biases, as well as the observed complexity of the transcriptional landscape, pose profound computational challenges for transcriptome reconstruction.

Results

We present the novel framework MITIE (Mixed Integer Transcript IdEntification) for simultaneous transcript reconstruction and quantification. We define a likelihood function based on the negative binomial distribution, use a regularization approach to select a few transcripts collectively explaining the observed read data and show how to find the optimal solution using Mixed Integer Programming. MITIE can (i) take advantage of known transcripts, (ii) reconstruct and quantify transcripts simultaneously in multiple samples, and (iii) resolve the location of multi-mapping reads. It is designed for genome- and assembly-based transcriptome reconstruction. We present an extensive study based on realistic simulated RNA-Seq data. When compared with state-of-the-art approaches, MITIE proves to be significantly more sensitive and overall more accurate. Moreover, MITIE yields substantial performance gains when used with multiple samples. We applied our system to 38 Drosophila melanogaster modENCODE RNA-Seq libraries and estimated the sensitivity of reconstructing omitted transcript annotations and the specificity with respect to annotated transcripts. Our results corroborate that a well-motivated objective paired with appropriate optimization techniques lead to significant improvements over the state-of-the-art in transcriptome reconstruction.

Availability

MITIE is implemented in C++ and is available from http://bioweb.me/mitie under the GPL license.",2013-08-25 +22668690,Neonatal nurse practitioner workforce survey executive summary.,"The Neonatal Nurse Practitioner Workforce Survey, led by Paula Timoney, DNP, ARNP, NNP-BC, and Debra Sansoucie, EdD, RN, NNP-BC, with the National Association of Neonatal Nurse Practitioners (NANNP), provides data collected from more than 600 neonatal nurse practitioners to examine workforce characteristics and needs. NANNP commissioned the survey because no comprehensive data existed for the neonatal nurse practitioner workforce. The executive summary given in this article highlights some of the survey's key findings in the areas of demographics, practice environment, scope of responsibilities, and job satisfaction. Readers are encouraged to review the complete text of the Neonatal Nurse Practitioner Workforce Survey for more in-depth data and recommendations regarding NNP education, scope of practice, and scope of responsibility in the ever-changing health care environment. The report will be available for purchase at http://www.nannstore.org in summer 2012.",2012-06-01 +22350557,"Chiral pesticides: identification, description, and environmental implications.","Of the 1,693 pesticides considered in this review, 1,594 are organic chemicals, 47 are inorganic chemicals, 53 are of biological origin (largely non chemical; insect,fungus, bacteria, virus, etc.), and 2 have an undetermined structure. Considering that the EPA's Office of Pesticide Programs found 1,252 pesticide active ingredients(EPA Pesticides Customer Service 2011), we consider this dataset to be comprehensive; however, no direct comparison of the compound lists was undertaken. Of all pesticides reviewed, 482 (28%) are chiral; 30% are chiral when considering only the organic chemical pesticides. A graph of this distribution is shown in Fig. 7a. Each pesticide is classified with up to three pesticidal utilities (e.g., fungicide, plant growth regulator, rodenticide, etc.), taken first from the Pesticide Manual as a primary source, and the Compendium of Common Pesticide Names website as a secondary source. Of the chiral pesticides, 195 (34%) are insecticides (including attractants, pheromones, and repellents), 150 (27%) are herbicides (including plant growth regulators and herbicide safeners), 104 (18%) are fungicides, and 55 (10%)are acaricides. The distribution of chiral pesticides by utility is shown in Fig. 7b,including categories of pesticides that make up 3%t or less of the usage categories.Figure 7c shows a similar distribution of non chiral pesticide usage categories. Of the chiral pesticides, 270 (56%) have one chiral feature, 105 (22%) have two chiral features, 30 (6.2%) have three chiral features, and 29 (6.0%) have ten or more chiral features.Chiral chemicals pose many difficulties in stereospecific synthesis, characterization, and analysis. When these compounds are purposely put into the environment,even more interesting complications arise in tracking, monitoring, and predicting their fate and risks. More than 475 pesticides are chiral, as are other chiral contaminants such as pharmaceuticals, polychlorinated biphenyls, brominated flame retardants, synthetic musks, and their degradates (Kallenborn and Hiihnerfuss 2001;Heeb et al. 2007; Hihnerfuss and Shah 2009). The stereoisomers of pesticides can have widely different efficacy, toxicity to nontarget organisms, and metabolic rates in biota. For these reasons, it is important to first be aware of likely fate and effect differences, to incorporate molecular asymmetry insights into research projects, and to study the individual stereoisomers of the applied pesticide material.With the advent of enantioselective chromatography techniques, the chirality of pesticides has been increasingly studied. While the ChirBase (Advanced ChemistryDevelopment 1997-2010) database does not include all published chiral analytical separations, it does contain more than 3,500 records for 146 of the 482 chiral pesticides (30%). The majority of the records are found in the liquid chromatography database (2,677 or 76%), followed by the gas chromatography database (652 or 18%),and the capillary electrophoresis database (203 or 6%). The finding that only 30% of the chiral pesticides covered in this review have entries in ChirBase highlights the need for expanded efforts to develop additional enantioselective chromatographic methods. Other techniques (e.g., nuclear magnetic resonance and other spectroscopy)are available for investigation of chiral compounds, but often are not utilized because of cost, complexity, or simply not recognizing that a pesticide is chiral.In this review, we have listed and have briefly described the general nature of chiral fungicides, herbicides, insecticides, and other miscellaneous classes. A data-set generated for this review contains 1,693 pesticides, the number of enantioselective separation records in ChirBase, pesticide usage class, SMILES structure string and counts of stereogenic centers. This dataset is publically available for download at the following website: http://www.epa.gov/heasd/products/products.html. With the information herein coupled to the publically accessible dataset, we can begin to develop the tools to handle molecular asymmetry as it applies to agrochemicals.Additional structure-based resources would allow further analysis of key parameters (e.g., exposure, toxicity, environmental fate, degradation, and risks) for individual stereoisomers of chiral compounds.",2012-01-01 +24334622,Metagenomic frameworks for monitoring antibiotic resistance in aquatic environments.,"

Background

High-throughput genomic technologies offer new approaches for environmental health monitoring, including metagenomic surveillance of antibiotic resistance determinants (ARDs). Although natural environments serve as reservoirs for antibiotic resistance genes that can be transferred to pathogenic and human commensal bacteria, monitoring of these determinants has been infrequent and incomplete. Furthermore, surveillance efforts have not been integrated into public health decision making.

Objectives

We used a metagenomic epidemiology-based approach to develop an ARD index that quantifies antibiotic resistance potential, and we analyzed this index for common modal patterns across environmental samples. We also explored how metagenomic data such as this index could be conceptually framed within an early risk management context.

Methods

We analyzed 25 published data sets from shotgun pyrosequencing projects. The samples consisted of microbial community DNA collected from marine and freshwater environments across a gradient of human impact. We used principal component analysis to identify index patterns across samples.

Results

We observed significant differences in the overall index and index subcategory levels when comparing ecosystems more proximal versus distal to human impact. The selection of different sequence similarity thresholds strongly influenced the index measurements. Unique index subcategory modes distinguished the different metagenomes.

Conclusions

Broad-scale screening of ARD potential using this index revealed utility for framing environmental health monitoring and surveillance. This approach holds promise as a screening tool for establishing baseline ARD levels that can be used to inform and prioritize decision making regarding management of ARD sources and human exposure routes.

Citation

Port JA, Cullen AC, Wallace JC, Smith MN, Faustman EM. 2014. Metagenomic frameworks for monitoring antibiotic resistance in aquatic environments. Environ Health Perspect 122:222–228; http://dx.doi.org/10.1289/ehp.1307009",2013-12-13 +21884625,The NFI-Regulome Database: A tool for annotation and analysis of control regions of genes regulated by Nuclear Factor I transcription factors.,"

Background

Genome annotation plays an essential role in the interpretation and use of genome sequence information. While great strides have been made in the annotation of coding regions of genes, less success has been achieved in the annotation of the regulatory regions of genes, including promoters, enhancers/silencers, and other regulatory elements. One reason for this disparity in annotated information is that coding regions can be assessed using high-throughput techniques such as EST sequencing, while annotation of regulatory regions often requires a gene-by-gene approach.

Results

The NFI-Regulome database http://nfiregulome.ccr.buffalo.edu was designed to promote easy annotation of the regulatory regions of genes that contain binding sites for the NFI (Nuclear Factor I) family of transcription factors, using data from the published literature. Binding sites are annotated together with the sequence of the gene, obtained from the UCSC Genome site, and the locations of all binding sites for multiple genes can be displayed in a number of formats designed to facilitate inter-gene comparisons. Classes of genes based on expression pattern, disease involvement, or types of binding sites present can be readily compared in order to assess common ""architectural"" structures in the regulatory regions.

Conclusions

The NFI-Regulome database allows rapid display of the relative locations and number of transcription factor binding sites of individual or defined sets of genes that contain binding sites for NFI transcription factors. This database may in the future be expanded into a distributed database structure including other families of transcription factors. Such databases may be useful for identifying common regulatory structures in genes essential for organ development, tissue-specific gene expression or those genes related to specific diseases.",2011-01-20 +24752294,Bioinformatics pipelines for targeted resequencing and whole-exome sequencing of human and mouse genomes: a virtual appliance approach for instant deployment.,"Targeted resequencing by massively parallel sequencing has become an effective and affordable way to survey small to large portions of the genome for genetic variation. Despite the rapid development in open source software for analysis of such data, the practical implementation of these tools through construction of sequencing analysis pipelines still remains a challenging and laborious activity, and a major hurdle for many small research and clinical laboratories. We developed TREVA (Targeted REsequencing Virtual Appliance), making pre-built pipelines immediately available as a virtual appliance. Based on virtual machine technologies, TREVA is a solution for rapid and efficient deployment of complex bioinformatics pipelines to laboratories of all sizes, enabling reproducible results. The analyses that are supported in TREVA include: somatic and germline single-nucleotide and insertion/deletion variant calling, copy number analysis, and cohort-based analyses such as pathway and significantly mutated genes analyses. TREVA is flexible and easy to use, and can be customised by Linux-based extensions if required. TREVA can also be deployed on the cloud (cloud computing), enabling instant access without investment overheads for additional hardware. TREVA is available at http://bioinformatics.petermac.org/treva/.",2014-04-21 +24922057,BioAssemblyModeler (BAM): user-friendly homology modeling of protein homo- and heterooligomers.,"

Unlabelled

Many if not most proteins function in oligomeric assemblies of one or more protein sequences. The Protein Data Bank provides coordinates for biological assemblies for each entry, at least 60% of which are dimers or larger assemblies. BioAssemblyModeler (BAM) is a graphical user interface to the basic steps in homology modeling of protein homooligomers and heterooligomers from the biological assemblies provided in the PDB. BAM takes as input up to six different protein sequences and begins by assigning Pfam domains to the target sequences. The program utilizes a complete assignment of Pfam domains to sequences in the PDB, PDBfam (http://dunbrack2.fccc.edu/protcid/pdbfam), to obtain templates that contain any or all of the domains assigned to the target sequence(s). The contents of the biological assemblies of potential templates are provided, and alignments of the target sequences to the templates are produced with a profile-profile alignment algorithm. BAM provides for visual examination and mouse-editing of the alignments supported by target and template secondary structure information and a 3D viewer of the template biological assembly. Side-chain coordinates for a model of the biological assembly are built with the program SCWRL4. A built-in protocol navigation system guides the user through all stages of homology modeling from input sequences to a three-dimensional model of the target complex.

Availability

http://dunbrack.fccc.edu/BAM.",2014-06-12 +24684691,In vitro and modelling approaches to risk assessment from the U.S. Environmental Protection Agency ToxCast programme.,"A significant challenge in toxicology is the 'too many chemicals' problem. Human beings and environmental species are exposed to tens of thousands of chemicals, only a small percentage of which have been tested thoroughly using standard in vivo test methods. This study reviews several approaches that are being developed to deal with this problem by the U.S. Environmental Protection Agency, under the umbrella of the ToxCast programme (http://epa.gov/ncct/toxcast/). The overall approach is broken into seven tasks: (i) identifying biological pathways that, when perturbed, can lead to toxicity; (ii) developing high-throughput in vitro assays to test chemical perturbations of these pathways; (iii) identifying the universe of chemicals with likely human or ecological exposure; (iv) testing as many of these chemicals as possible in the relevant in vitro assays; (v) developing hazard models that take the results of these tests and identify chemicals as being potential toxicants; (vi) generating toxicokinetics data on these chemicals to predict the doses at which these hazard pathways would be activated; and (vii) developing exposure models to identify chemicals for which these hazardous dose levels could be achieved. This overall strategy is described and briefly illustrated with recent examples from the ToxCast programme.",2014-04-22 +24320472,Blood vessel-based liver segmentation using the portal phase of an abdominal CT dataset.,"

Purpose

Blood vessel (BV) information can be used to guide body organ segmentation on computed tomography (CT) imaging. The proposed method uses abdominal BVs (ABVs) to segment the liver through the portal phase of an abdominal CT dataset. This method aims to address the wide variability in liver shape and size, separate liver from other organs of similar intensity, and segment hepatic low-intensity tumors (LITs).

Methods

Thin ABVs are enhanced using three-dimensional (3D) opening. ABVs are extracted and classified into hepatic BVs (HBVs) and nonhepatic BVs (non-HBVs) with a small number of interactions, and HBVs and non-HBVs are used for constraining automatic liver segmentation. HBVs are used to individually segment the core region of the liver. To separate the liver from other organs, this core region and non-HBVs are used to construct an initial 3D boundary surface. To segment LITs, the core region is classified into non-LIT- and LIT-parts by fitting the histogram of the core region using a variational Bayesian Gaussian mixture model. Each part of the core region is extended based on its corresponding component of the mixture, and extension is completed when it reaches a variation in intensity or the constructed boundary surface, which is reconfirmed to fit robustly between the liver and neighboring organs of similar intensity. A solid-angle technique is used to refine main BVs at the entrances to the inferior vena cava and the portal vein.

Results

The proposed method was applied to 80 datasets: 30 Medical Image Computing and Computer Assisted Intervention (MICCAI) and 50 non-MICCAI; 30 datasets of non-MICCAI data include tumors. Our results for MICCAI-test data were evaluated by sliver07 (http://www.sliver07.org/) organizers with an overall score of 85.7, which ranks best on the site as of July 2013. These results (average ± standard deviation) include the five error measures of the 2007 MICCAI workshop for liver segmentation as follows. Results for volume overlap error, relative volume difference, average symmetric surface distance, root mean square symmetric surface distance, and maximum symmetric surface distance were 4.33 ± 0.73, 0.28 ± 0.87, 0.63 ± 0.16, 1.19 ± 0.28, and 14.01 ± 2.88, respectively; and when applying our method to non-MICCAI data, results were 3.21 ± 0.75, 0.06 ± 1.29, 0.45 ± 0.17, 0.98 ± 0.26, and 12.69 ± 3.89, respectively. These results demonstrate high performance of the method when applied to different CT datasets.

Conclusions

BVs can be used to address the wide variability in liver shape and size, as BVs provide unique details for the structure of each studied liver. Constructing a boundary surface using HBVs and non-HBVs can separate liver from its neighboring organs of similar intensity. By fitting the histogram of the core region using a variational Bayesian Gaussian mixture model, LITs are segmented and measuring the volumetry of non-LIT- and LIT-parts becomes possible. Further examination of the proposed method on a large number of datasets is required for clinical applications, and development of the method for full automation may be possible and useful in the clinic.",2013-11-01 +23104891,RIP-chip enrichment analysis.,"

Motivation

RIP-chip is a high-throughput method to identify mRNAs that are targeted by RNA-binding proteins. The protein of interest is immunoprecipitated, and the identity and relative amount of mRNA associated with it is measured on microarrays. Even if a variety of methods is available to analyse microarray data, e.g. to detect differentially regulated genes, the additional experimental steps in RIP-chip require specialized methods. Here, we focus on two aspects of RIP-chip data: First, the efficiency of the immunoprecipitation step performed in the RIP-chip protocol varies in between different experiments introducing bias not existing in standard microarray experiments. This requires an additional normalization step to compare different samples and even technical replicates. Second, in contrast to standard differential gene expression experiments, the distribution of measurements is not normal. We exploit this fact to define a set of biologically relevant genes in a statistically meaningful way.

Results

Here, we propose two methods to analyse RIP-chip data: We model the measurement distribution as a gaussian mixture distribution, which allows us to compute false discovery rates (FDRs) for any cut-off. Thus, cut-offs can be chosen for any desired FDR. Furthermore, we use principal component analysis to determine the normalization factors necessary to remove immunoprecipitation bias. Both methods are evaluated on a large RIP-chip dataset measuring targets of Ago2, the major component of the microRNA guided RNA-induced silencing complex (RISC). Using published HITS-CLIP experiments performed with the same cell line as used for RIP-chip, we show that the mixture modelling approach is a necessary step to remove background, which computed FDRs are valid, and that the additional normalization is a necessary step to make experiments comparable.

Availability

An R implementation of REA is available on the project website (http://www.bio.ifi.lmu.de/REA) and as supplementary data file.",2012-10-26 +23220573,ADAPT-NMR Enhancer: complete package for reduced dimensionality in protein NMR spectroscopy.,"

Summary

ADAPT-nuclear magnetic resonance (ADAPT-NMR) offers an automated approach to the concurrent acquisition and processing of protein NMR data with the goal of complete backbone and side chain assignments. What the approach lacks is a useful graphical interface for reviewing results and for searching for missing peaks that may have prevented assignments or led to incorrect assignments. Because most of the data ADAPT-NMR collects are 2D tilted planes used to find peaks in 3D spectra, it would be helpful to have a tool that reconstructs the 3D spectra. The software package reported here, ADAPT-NMR Enhancer, supports the visualization of both 2D tilted planes and reconstructed 3D peaks on each tilted plane. ADAPT-NMR Enhancer can be used interactively with ADAPT-NMR to automatically assign selected peaks, or it can be used to produce PINE-SPARKY-like graphical dialogs that support atom-by-atom and peak-by-peak assignment strategies. Results can be exported in various formats, including XEASY proton file (.prot), PINE pre-assignment file (.str), PINE probabilistic output file, SPARKY peak list file (.list) and TALOS+ input file (.tab). As an example, we show how ADAPT-NMR Enhancer was used to extend the automated data collection and assignment results for the protein Aedes aegypti sterol carrier protein 2.

Availability

The program, in the form of binary code along with tutorials and reference manuals, is available at http://pine.nmrfam.wisc.edu/adapt-nmr-enhancer.",2012-12-07 +24209914,Predicting protein-binding RNA nucleotides using the feature-based removal of data redundancy and the interaction propensity of nucleotide triplets.,"Several learning approaches have been used to predict RNA-binding amino acids in a protein sequence, but there has been little attempt to predict protein-binding nucleotides in an RNA sequence. One of the reasons is that the differences between nucleotides in their interaction propensity are much smaller than those between amino acids. Another reason is that RNA exhibits less diverse sequence patterns than protein. Therefore, predicting protein-binding RNA nucleotides is much harder than predicting RNA-binding amino acids. We developed a new method that removes data redundancy in a training set of sequences based on their features. The new method constructs a larger and more informative training set than the standard redundancy removal method based on sequence similarity, and the constructed dataset is guaranteed to be redundancy-free. We computed the interaction propensity (IP) of nucleotide triplets by applying a new definition of IP to an extensive dataset of protein-RNA complexes, and developed a support vector machine (SVM) model to predict protein binding sites in RNA sequences. In a 5-fold cross-validation with 812 RNA sequences, the SVM model predicted protein-binding nucleotides with an accuracy of 86.4%, an F-measure of 84.8%, and a Matthews correlation coefficient of 0.66. With an independent dataset of 56 RNA sequences that were not used in training, the resulting accuracy was 68.1% with an F-measure of 71.7% and a Matthews correlation coefficient of 0.35. To the best of our knowledge, this is the first attempt to predict protein-binding RNA nucleotides in a given RNA sequence from the sequence data alone. The SVM model and datasets are freely available for academics at http://bclab.inha.ac.kr/primer.",2013-08-21 +23958726,RDAVIDWebService: a versatile R interface to DAVID.,"

Summary

The RDAVIDWebService package provides a class-based interface from R programs/scripts to fully access/control the database for annotation, visualization and integrated discovery, without the need for human interaction on its Web site (http://david.abcc.ncifcrf.gov). The library enhances the database for annotation, visualization and integrated discovery capabilities for Gene Ontology analysis by means of GOstats-based direct acyclic graph conversion methods, in addition to the usual many-genes-to-many-terms visualization.

Availability and implementation

RDAVIDWebService is available as an R package from the Bioconductor project (www.bioconductor.org) and on the authors' Web site (www.bdmg.com.ar) under GPL-2 license, subjected to the terms of use of DAVID (http://david.abcc.ncifcrf.gov/content.jsp?file=WS.html).

Contact

cfresno@bdmg.com.ar or efernandez@bdmg.com.ar.",2013-08-19 +24753424,TargetRNA2: identifying targets of small regulatory RNAs in bacteria.,"Many small, noncoding RNAs (sRNAs) in bacteria act as posttranscriptional regulators of messenger RNAs. TargetRNA2 is a web server that identifies mRNA targets of sRNA regulatory action in bacteria. As input, TargetRNA2 takes the sequence of an sRNA and the name of a sequenced bacterial replicon. When searching for targets of RNA regulation, TargetRNA2 uses a variety of features, including conservation of the sRNA in other bacteria, the secondary structure of the sRNA, the secondary structure of each candidate mRNA target and the hybridization energy between the sRNA and each candidate mRNA target. TargetRNA2 outputs a ranked list of likely regulatory targets for the input sRNA. When evaluated on a comprehensive set of sRNA-target interactions, TargetRNA2 was found to be both accurate and efficient in identifying targets of sRNA regulatory action. Furthermore, TargetRNA2 has the ability to integrate RNA-seq data, if available. If an sRNA is differentially expressed in two or more RNA-seq experiments, TargetRNA2 considers co-differential gene expression when searching for regulatory targets, significantly improving the accuracy of target identifications. The TargetRNA2 web server is freely available for use at http://cs.wellesley.edu/∼btjaden/TargetRNA2.",2014-04-21 +25995255,Respiratory Syncytial Virus Inhibitor AZ-27 Differentially Inhibits Different Polymerase Activities at the Promoter.,"

Unlabelled

Respiratory syncytial virus (RSV) is the leading cause of pediatric respiratory disease. RSV has an RNA-dependent RNA polymerase that transcribes and replicates the viral negative-sense RNA genome. The large polymerase subunit (L) has multiple enzymatic activities, having the capability to synthesize RNA and add and methylate a cap on each of the viral mRNAs. Previous studies (H. Xiong et al., Bioorg Med Chem Lett, 23:6789-6793, 2013, http://dx.doi.org/10.1016/j.bmcl.2013.10.018; C. L. Tiong-Yip et al., Antimicrob Agents Chemother, 58:3867-3873, 2014, http://dx.doi.org/10.1128/AAC.02540-14) had identified a small-molecule inhibitor, AZ-27, that targets the L protein. In this study, we examined the effect of AZ-27 on different aspects of RSV polymerase activity. AZ-27 was found to inhibit equally both mRNA transcription and genome replication in cell-based minigenome assays, indicating that it inhibits a step common to both of these RNA synthesis processes. Analysis in an in vitro transcription run-on assay, containing RSV nucleocapsids, showed that AZ-27 inhibits synthesis of transcripts from the 3' end of the genome to a greater extent than those from the 5' end, indicating that it inhibits transcription initiation. Consistent with this finding, experiments that assayed polymerase activity on the promoter showed that AZ-27 inhibited transcription and replication initiation. The RSV polymerase also can utilize the promoter sequence to perform a back-priming reaction. Interestingly, addition of AZ-27 had no effect on the addition of up to three nucleotides by back-priming but inhibited further extension of the back-primed RNA. These data provide new information regarding the mechanism of inhibition by AZ-27. They also suggest that the RSV polymerase adopts different conformations to perform its different activities at the promoter.

Importance

Currently, there are no effective antiviral drugs to treat RSV infection. The RSV polymerase is an attractive target for drug development, but this large enzymatic complex is poorly characterized, hampering drug development efforts. AZ-27 is a small-molecule inhibitor previously shown to target the RSV large polymerase subunit (C. L. Tiong-Yip et al., Antimicrob Agents Chemother, 58:3867-3873, 2014, http://dx.doi.org/10.1128/AAC.02540-14), but its inhibitory mechanism was unknown. Understanding this would be valuable both for characterizing the polymerase and for further development of inhibitors. Here, we show that AZ-27 inhibits an early stage in mRNA transcription, as well as genome replication, by inhibiting initiation of RNA synthesis from the promoter. However, the compound does not inhibit back priming, another RNA synthesis activity of the RSV polymerase. These findings provide insight into the different activities of the RSV polymerase and will aid further development of antiviral agents against RSV.",2015-05-20 +23314326,Reconciling differential gene expression data with molecular interaction networks.,"

Motivation

Many techniques have been developed to compute the response network of a cell. A recent trend in this area is to compute response networks of small size, with the rationale that only part of a pathway is often changed by disease and that interpreting small subnetworks is easier than interpreting larger ones. However, these methods may not uncover the spectrum of pathways perturbed in a particular experiment or disease.

Results

To avoid these difficulties, we propose to use algorithms that reconcile case-control DNA microarray data with a molecular interaction network by modifying per-gene differential expression P-values such that two genes connected by an interaction show similar changes in their gene expression values. We provide a novel evaluation of four methods from this class of algorithms. We enumerate three desirable properties that this class of algorithms should address. These properties seek to maintain that the returned gene rankings are specific to the condition being studied. Moreover, to ease interpretation, highly ranked genes should participate in coherent network structures and should be functionally enriched with relevant biological pathways. We comprehensively evaluate the extent to which each algorithm addresses these properties on a compendium of gene expression data for 54 diverse human diseases. We show that the reconciled gene rankings can identify novel disease-related functions that are missed by analyzing expression data alone.

Availability

C++ software implementing our algorithms is available in the NetworkReconciliation package as part of the Biorithm software suite under the GNU General Public License: http://bioinformatics.cs.vt.edu/∼murali/software/biorithm-docs.",2013-01-12 +22199380,Probe mapping across multiple microarray platforms.,"Access to gene expression data has become increasingly common in recent years; however, analysis has become more difficult as it is often desirable to integrate data from different platforms. Probe mapping across microarray platforms is the first and most crucial step for data integration. In this article, we systematically review and compare different approaches to map probes across seven platforms from different vendors: U95A, U133A and U133 Plus 2.0 from Affymetrix, Inc.; HT-12 v1, HT-12v2 and HT-12v3 from Illumina, Inc.; and 4112A from Agilent, Inc. We use a unique data set, which contains 56 lung cancer cell line samples-each of which has been measured by two different microarray platforms-to evaluate the consistency of expression measurement across platforms using different approaches. Based on the evaluation from the empirical data set, the BLAST alignment of the probe sequences to a recent revision of the Transcriptome generated better results than using annotations provided by Vendors or from Bioconductor's Annotate package. However, a combination of all three methods (deemed the 'Consensus Annotation') yielded the most consistent expression measurement across platforms. To facilitate data integration across microarray platforms for the research community, we develop a user-friendly web-based tool, an API and an R package to map data across different microarray platforms from Affymetrix, Illumina and Agilent. Information on all three can be found at http://qbrc.swmed.edu/software/probemapper/.",2011-12-23 +21448735,SHIFTX2: significantly improved protein chemical shift prediction.,"A new computer program, called SHIFTX2, is described which is capable of rapidly and accurately calculating diamagnetic (1)H, (13)C and (15)N chemical shifts from protein coordinate data. Compared to its predecessor (SHIFTX) and to other existing protein chemical shift prediction programs, SHIFTX2 is substantially more accurate (up to 26% better by correlation coefficient with an RMS error that is up to 3.3× smaller) than the next best performing program. It also provides significantly more coverage (up to 10% more), is significantly faster (up to 8.5×) and capable of calculating a wider variety of backbone and side chain chemical shifts (up to 6×) than many other shift predictors. In particular, SHIFTX2 is able to attain correlation coefficients between experimentally observed and predicted backbone chemical shifts of 0.9800 ((15)N), 0.9959 ((13)Cα), 0.9992 ((13)Cβ), 0.9676 ((13)C'), 0.9714 ((1)HN), 0.9744 ((1)Hα) and RMS errors of 1.1169, 0.4412, 0.5163, 0.5330, 0.1711, and 0.1231 ppm, respectively. The correlation between SHIFTX2's predicted and observed side chain chemical shifts is 0.9787 ((13)C) and 0.9482 ((1)H) with RMS errors of 0.9754 and 0.1723 ppm, respectively. SHIFTX2 is able to achieve such a high level of accuracy by using a large, high quality database of training proteins (>190), by utilizing advanced machine learning techniques, by incorporating many more features (χ(2) and χ(3) angles, solvent accessibility, H-bond geometry, pH, temperature), and by combining sequence-based with structure-based chemical shift prediction techniques. With this substantial improvement in accuracy we believe that SHIFTX2 will open the door to many long-anticipated applications of chemical shift prediction to protein structure determination, refinement and validation. SHIFTX2 is available both as a standalone program and as a web server ( http://www.shiftx2.ca ).",2011-03-30 +24919880,Predicting dynamic signaling network response under unseen perturbations.,"

Motivation

Predicting trajectories of signaling networks under complex perturbations is one of the most valuable, but challenging, tasks in systems biology. Signaling networks are involved in most of the biological pathways, and modeling their dynamics has wide applications including drug design and treatment outcome prediction.

Results

In this paper, we report a novel model for predicting the cell type-specific time course response of signaling proteins under unseen perturbations. This algorithm achieved the top performance in the 2013 8th Dialogue for Reverse Engineering Assessments and Methods (DREAM 8) subchallenge: time course prediction in breast cancer cell lines. We formulate the trajectory prediction problem into a standard regularization problem; the solution becomes solving this discrete ill-posed problem. This algorithm includes three steps: denoising, estimating regression coefficients and modeling trajectories under unseen perturbations. We further validated the accuracy of this method against simulation and experimental data. Furthermore, this method reduces computational time by magnitudes compared to state-of-the-art methods, allowing genome-wide modeling of signaling pathways and time course trajectories to be carried out in a practical time.

Availability and implementation

Source code is available at http://guanlab.ccmb.med.umich.edu/DREAM/code.html and as supplementary file online.",2014-06-11 +24065654,PANNOTATOR: an automated tool for annotation of pan-genomes.,"Due to next-generation sequence technologies, sequencing of bacterial genomes is no longer one of the main bottlenecks in bacterial research and the number of new genomes deposited in public databases continues to increase at an accelerating rate. Among these new genomes, several belong to the same species and were generated for pan-genomic studies. A pan-genomic study allows investigation of strain phenotypic differences based on genotypic differences. Along with a need for good assembly quality, it is also fundamental to guarantee good functional genome annotation of the different strains. In order to ensure quality and standards for functional genome annotation among different strains, we developed and made available PANNOTATOR (http://bnet.egr.vcu.edu/iioab/agenote.php), a web-based automated pipeline for the annotation of closely related and well-suited genomes for pan-genome studies, aiming at reducing the manual work to generate reports and corrections of various genome strains. PANNOTATOR achieved 98 and 76% of correctness for gene name and function, respectively, as result of an annotation transfer, with a similarity cut-off of 70%, compared with a gold standard annotation for the same species. These results surpassed the RAST and BASys softwares by 41 and 21% and 66 and 17% for gene name and function annotation, respectively, when there were reliable genome annotations of closely related species. PANNOTATOR provides fast and reliable pan-genome annotation; thereby allowing us to maintain the research focus on the main genotype differences between strains.",2013-08-16 +23952586,A database for Mycobacterium secretome analysis: 'MycoSec' to accelerate global health research.,"Abstract Members of the genus Mycobacterium are notorious for their pathogenesis. Investigations from various perspectives have identified the pathogenic strategies employed by these lethal pathogens. Secretomes are believed to play crucial roles in host cell recognition and cross-talks, in cellular attachment, and in triggering other functions related to host pathogen interactions. However, a proper idea of the mycobacterial secretomes and their mechanism of functionality still remains elusive. In the present study, we have developed a comprehensive database of potential mycobacterial secretomes (MycoSec) using pre-existing algorithms for secretome prediction for researchers interested in this particular field. The database provides a platform for retrieval and analysis of identified secretomes in all finished genomes of the family Mycobacteriaceae. The database contains valuable information regarding secretory signal peptides (Sec type), lipoprotein signal peptides (Lipo type), and Twin arginine (RR/KR) signal peptides (TAT type), prevalent in mycobacteria. Information pertaining to COG analysis, codon usage, and gene expression of the predicted secretomes has also been incorporated in the database. MycoSec promises to be a useful repertoire providing a plethora of information regarding mycobacterial secretomes and may well be a platform to speed global health research. MycoSec is freely accessible at http://www.bicnbu.in/mycosec .",2013-08-16 +23959061,A systematic approach for identifying and presenting mechanistic evidence in human health assessments.,"Clear documentation of literature search and presentation methodologies can improve transparency in chemical hazard assessments. We sought to improve clarity for the scientific support for cancer mechanisms of action using a systematic approach to literature retrieval, selection, and presentation of studies. The general question was ""What are the mechanisms by which a chemical may cause carcinogenicity in the target tissue?"". Di(2-ethylhexyl)phthalate was used as a case study chemical with a complex database of >3000 publications. Relevant mechanistic events were identified from published reviews. The PubMed search strategy included relevant synonyms and wildcards for DEHP and its metabolites, mechanistic events, and species of interest. Tiered exclusion/inclusion criteria for study pertinence were defined, and applied to the retrieved literature. Manual curation was conducted for mechanistic events with large literature databases. Literature trees documented identification and selection of the literature evidence. The selected studies were summarized in evidence tables accompanied by succinct narratives. Primary publications were deposited into the Health and Environmental Research Online (http://hero.epa.gov/) database and identified by pertinence criteria and key terms to permit organized retrieval. This approach contributes to human health assessment by effectively managing a large volume of literature, improving transparency, and facilitating subsequent synthesis of information across studies.",2013-08-16 +24659106,Improved transcript isoform discovery using ORF graphs.,"

Motivation

High-throughput sequencing of RNA in vivo facilitates many applications, not the least of which is the cataloging of variant splice isoforms of protein-coding messenger RNAs. Although many solutions have been proposed for reconstructing putative isoforms from deep sequencing data, these generally take as their substrate the collective alignment structure of RNA-seq reads and ignore the biological signals present in the actual nucleotide sequence. The majority of these solutions are graph-theoretic, relying on a splice graph representing the splicing patterns and exon expression levels indicated by the spliced-alignment process.

Results

We show how to augment splice graphs with additional information reflecting the biology of transcription, splicing and translation, to produce what we call an ORF (open reading frame) graph. We then show how ORF graphs can be used to produce isoform predictions with higher accuracy than current state-of-the-art approaches.

Availability and implementation

RSVP is available as C++ source code under an open-source licence: http://ohlerlab.mdc-berlin.de/software/RSVP/.",2014-03-22 +24448410,p97-dependent retrotranslocation and proteolytic processing govern formation of active Nrf1 upon proteasome inhibition.,"Proteasome inhibition elicits an evolutionarily conserved response wherein proteasome subunit mRNAs are upregulated, resulting in recovery (i.e., 'bounce-back') of proteasome activity. We previously demonstrated that the transcription factor Nrf1/NFE2L1 mediates this homeostatic response in mammalian cells. We show here that Nrf1 is initially translocated into the lumen of the ER, but is rapidly and efficiently retrotranslocated to the cytosolic side of the membrane in a manner that depends on p97/VCP. Normally, retrotranslocated Nrf1 is degraded promptly by the proteasome and active species do not accumulate. However, in cells with compromised proteasomes, retrotranslocated Nrf1 escapes degradation and is cleaved N-terminal to Leu-104 to yield a fragment that is no longer tethered to the ER membrane. Importantly, this cleavage event is essential for Nrf1-dependent activation of proteasome gene expression upon proteasome inhibition. Our data uncover an unexpected role for p97 in activation of a transcription factor by relocalizing it from the ER lumen to the cytosol. DOI: http://dx.doi.org/10.7554/eLife.01856.001.",2014-01-21 +26356333,Beyond Fixed-Resolution Alignment-Free Measures for Mammalian Enhancers Sequence Comparison.,"

Unlabelled

The cell-type diversity is to a large degree driven by transcription regulation, i.e., enhancers. It has been recently shown that in high-level eukaryotes enhancers rarely work alone, instead they collaborate by forming clusters of cis-regulatory modules (CRMs). Even if the binding of transcription factors is sequence-specific, the identification of functionally similar enhancers is very difficult. A similarity measure to detect related regulatory sequences is crucial to understand functional correlation between two enhancers. This will allow large-scale analyses, clustering and genome-wide classifications. In this paper we present Under2, a parameter-free alignment-free statistic based on variable-length words. As opposed to traditional alignment-free methods, which are based on fixed-length patterns or, in other words, tied to a fixed resolution, our statistic is built upon variable-length words, and thus multiple resolutions are allowed. This will capture the great variability of lengths of CRMs. We evaluate several alignment-free statistics on simulated data and real ChIP-seq sequences. The new statistic is highly successful in discriminating functionally related enhancers and, in almost all experiments, it outperforms fixed-resolution methods. Finally, experiments on mouse enhancers show that Under2 can separate enhancers active in different tissues.

Availability

http://www.dei.unipd.it/~ciompin/main/UnderIICRMS.html.",2014-07-01 +23326485,mirTarPri: improved prioritization of microRNA targets through incorporation of functional genomics data.,"MicroRNAs (miRNAs) are a class of small (19-25 nt) non-coding RNAs. This important class of gene regulator downregulates gene expression through sequence-specific binding to the 3'untranslated regions (3'UTRs) of target mRNAs. Several computational target prediction approaches have been developed for predicting miRNA targets. However, the predicted target lists often have high false positive rates. To construct a workable target list for subsequent experimental studies, we need novel approaches to properly rank the candidate targets from traditional methods. We performed a systematic analysis of experimentally validated miRNA targets using functional genomics data, and found significant functional associations between genes that were targeted by the same miRNA. Based on this finding, we developed a miRNA target prioritization method named mirTarPri to rank the predicted target lists from commonly used target prediction methods. Leave-one-out cross validation has proved to be successful in identifying known targets, achieving an AUC score up to 0. 84. Validation in high-throughput data proved that mirTarPri was an unbiased method. Applying mirTarPri to prioritize results of six commonly used target prediction methods allowed us to find more positive targets at the top of the prioritized candidate list. In comparison with other methods, mirTarPri had an outstanding performance in gold standard and CLIP data. mirTarPri was a valuable method to improve the efficacy of current miRNA target prediction methods. We have also developed a web-based server for implementing mirTarPri method, which is freely accessible at http://bioinfo.hrbmu.edu.cn/mirTarPri.",2013-01-09 +24666391,In vivo opioid receptor heteromerization: where do we stand?,"

Unlabelled

Opioid receptors are highly homologous GPCRs that modulate brain function at all levels of neural integration, including autonomous, sensory, emotional and cognitive processing. Opioid receptors functionally interact in vivo, but the underlying mechanisms involving direct receptor-receptor interactions, affecting signalling pathways or engaging different neuronal circuits, remain unsolved. Heteromer formation through direct physical interaction between two opioid receptors or between an opioid receptor and a non-opioid one has been postulated and can be characterized by specific ligand binding, receptor signalling and trafficking properties. However, despite numerous studies in heterologous systems, evidence for physical proximity in vivo is only available for a limited number of opioid heteromers, and their physiopathological implication remains largely unknown mostly due to the lack of appropriate tools. Nonetheless, data collected so far using endogenous receptors point to a crucial role for opioid heteromers as a molecular entity that could underlie human pathologies such as alcoholism, acute or chronic pain as well as psychiatric disorders. Opioid heteromers therefore stand as new therapeutic targets for the drug discovery field.

Linked articles

This article is part of a themed section on Opioids: New Pathways to Functional Selectivity. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2015.172.issue-2.",2014-07-01 +23949945,A 3p interstitial deletion in two monozygotic twin brothers and an 18-year-old man: further characterization and review.,"An increasing number of patients with 3p proximal deletions were reported in the previous decade, but the region responsible for the main features such as intellectual disability (ID) and developmental delay is not yet characterized. Here we report on two monozygotic twin brothers of 2 10/12 years and an 18-year-old man, all three of them displaying severe ID, psychomotoric delay, autistic features, and only mild facial dysmorphisms. Array CGH (aCGH), revealed a 6.55 Mb de novo interstitial deletion of 3p14.1p14.3 in the twin brothers and a 4.76 Mb interstitial deletion of 3p14.1p14.2 in the 18-year-old patient, respectively. We compared the malformation spectrum with previous molecularly well-defined patients in the literature and in the DECIPHER database (Database of Chromosomal Imbalance and Phenotype in Humans using Ensembl Resources; http://decipher.sanger.ac.uk/). In conclusion, the deletion of a region containing 3p14.2 seems to be associated with a relative concise phenotype including ID and developmental delay. Thus, we hypothesize that 3p14.2 is the potential core region in 3p proximal deletions. The knowledge of this potential core region could be helpful in the genetic counselling of patients with 3p proximal deletions, especially concerning their phenotype.",2013-08-15 +23945046,kClust: fast and sensitive clustering of large protein sequence databases.,"

Background

Fueled by rapid progress in high-throughput sequencing, the size of public sequence databases doubles every two years. Searching the ever larger and more redundant databases is getting increasingly inefficient. Clustering can help to organize sequences into homologous and functionally similar groups and can improve the speed, sensitivity, and readability of homology searches. However, because the clustering time is quadratic in the number of sequences, standard sequence search methods are becoming impracticable.

Results

Here we present a method to cluster large protein sequence databases such as UniProt within days down to 20%-30% maximum pairwise sequence identity. kClust owes its speed and sensitivity to an alignment-free prefilter that calculates the cumulative score of all similar 6-mers between pairs of sequences, and to a dynamic programming algorithm that operates on pairs of similar 4-mers. To increase sensitivity further, kClust can run in profile-sequence comparison mode, with profiles computed from the clusters of a previous kClust iteration. kClust is two to three orders of magnitude faster than clustering based on NCBI BLAST, and on multidomain sequences of 20%-30% maximum pairwise sequence identity it achieves comparable sensitivity and a lower false discovery rate. It also compares favorably to CD-HIT and UCLUST in terms of false discovery rate, sensitivity, and speed.

Conclusions

kClust fills the need for a fast, sensitive, and accurate tool to cluster large protein sequence databases to below 30% sequence identity. kClust is freely available under GPL at http://toolkit.lmb.uni-muenchen.de/pub/kClust/.",2013-08-15 +24451625,MEMBPLUGIN: studying membrane complexity in VMD.,"

Summary

Computer simulations are giving way to more complex and accurate studies of biological membranes by molecular dynamics (MD) simulations. The analysis of MD trajectories comprises the biophysical characterization of membrane properties or the study of protein-lipid interactions and dynamics. However, there is a lack of automated tools to analyse MD simulations of complex membrane or membrane-protein systems. Here we present MEMBPLUGIN, a plugin for the Visual Molecular Dynamics package that provides algorithms to measure a host of essential biophysical properties in simulated membranes. MEMBPLUGIN features are accessible both through a user-friendly graphical interface and as command-line procedures to be invoked in analysis scripts.

Availability and implementation

MEMBPLUGIN is a VMD extension written in Tcl. Multi-platform source code, documentation and tutorials are freely available at http://membplugin.sourceforge.net.

Contact

toni.giorgino@isib.cnr.it or jana.selent@upf.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-01-21 +24363378,SplicePlot: a utility for visualizing splicing quantitative trait loci.,"

Summary

RNA sequencing has provided unprecedented resolution of alternative splicing and splicing quantitative trait loci (sQTL). However, there are few tools available for visualizing the genotype-dependent effects of splicing at a population level. SplicePlot is a simple command line utility that produces intuitive visualization of sQTLs and their effects. SplicePlot takes mapped RNA sequencing reads in BAM format and genotype data in VCF format as input and outputs publication-quality Sashimi plots, hive plots and structure plots, enabling better investigation and understanding of the role of genetics on alternative splicing and transcript structure.

Availability and implementation

Source code and detailed documentation are available at http://montgomerylab.stanford.edu/spliceplot/index.html under Resources and at Github. SplicePlot is implemented in Python and is supported on Linux and Mac OS. A VirtualBox virtual machine running Ubuntu with SplicePlot already installed is also available.",2013-12-19 +25245835,Intraductal tubular adenomas (pyloric gland-type) of the pancreas: clinicopathologic features are similar to gastric-type intraductal papillary mucinous neoplasms and different from intraductal tubulopapillary neoplasms.,"

Background

Intraductal tubular adenoma of the pancreas, pyloric gland type (ITA), is an infrequent intraductal benign lesion located in the main duct and large branch duct of the pancreas. The purpose of this report is to introduce seven new cases and to compare their clinicopathologic features and KRAS mutations to gastric-type intraductal papillary mucinous neoplasms (IPMNs) and intraductal tubulopapillary neoplasms (ITPNs).

Methods

Clinical findings, morphologic features, immunophenotypes and KRAS alterations were investigated in 7 patients with intraductal tubular adenomas, 16 patients with gastric-type intraductal papillary mucinous neoplasms and 6 patients with intraductal tubulopapillary neoplasms.

Results

There were more female patients in the ITA and gastric-type IPMN groups, whereas the opposite pattern was observed in the ITPN group. ITAs and gastric-type IPMNs were lined by columnar cells, similar to pyloric glands, with large extracellular deposits of mucin. ITPNs were polypoid and papillary mass located in the pancreatic ducts, which did not show large deposits of mucin. All ITAs and gastric-type IPMNs expressed MUC5AC strongly and diffusely, and 3/6 ITPNs expressed MUC5AC focally and weakly. KRAS mutations were identified in 4 ITAs (4/7, 57%), 9 IPMNs (9/16, 56%) and 2 ITPNs (2/6, 33%).

Conclusion

The intraductal tubular adenoma should not be considered a precursor lesion of intraductal tubulopapillary neoplasms. No adequate data established ITA should separate as a specific entity from IPMNs.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_172.",2014-09-23 +23277275,EasyProt--an easy-to-use graphical platform for proteomics data analysis.,"High throughput protein identification and quantification analysis based on mass spectrometry are fundamental steps in most proteomics projects. Here, we present EasyProt (available at http://easyprot.unige.ch), a new platform for mass spectrometry data processing, protein identification, quantification and unexpected post-translational modification characterization. EasyProt provides a fully integrated graphical experience to perform a large part of the proteomic data analysis workflow. Our goal was to develop a software platform that would fulfill the needs of scientists in the field, while emphasizing ease-of-use for non-bioinformatician users. Protein identification is based on OLAV scoring schemes and protein quantification is implemented for both, isobaric labeling and label-free methods. Additional features are available, such as peak list processing, isotopic correction, spectra filtering, charge-state deconvolution and spectra merging. To illustrate the EasyProt platform, we present two identification and quantification workflows based on isobaric tagging and label-free methods.",2012-12-28 +24464816,PRIMSIPLR: prediction of inner-membrane situated pore-lining residues for alpha-helical transmembrane proteins.,"Transmembrane proteins such as transporters and channels mediate the passage of inorganic and organic substances across biological membranes through their central pore. Pore-lining residues (PLRs) that make direct contacts to the substrates have a crucial impact on the function of the protein and, hence, their identification is a key step in mechanistic studies. Here, we established a nonredundant data set containing the three-dimensional (3D) structures of 90 α-helical transmembrane proteins and annotated the PLRs of these proteins by a pore identification software. A support vector machine was then trained to distinguish PLRs from other residues based on the protein sequence alone. Using sixfold cross-validation, our best performing predictor gave a Matthews's correlation coefficient of 0.41 with an accuracy of 0.86, sensitivity of 0.61, and specificity of 0.89, respectively. We provide a novel software tool that will aid biomedical scientists working on transmembrane proteins with unknown 3D structures. Both standalone version and web service are freely available from the URL http://service.bioinformatik.uni-saarland.de/PRIMSIPLR/.",2014-02-18 +24982016,Cohort Profile: The Applied Research Group for Kids (TARGet Kids!).,"The Applied Research Group for Kids (TARGet Kids!) is an ongoing open longitudinal cohort study enrolling healthy children (from birth to 5 years of age) and following them into adolescence. The aim of the TARGet Kids! cohort is to link early life exposures to health problems including obesity, micronutrient deficiencies and developmental problems. The overarching goal is to improve the health of Canadians by optimizing growth and developmental trajectories through preventive interventions in early childhood. TARGet Kids!, the only child health research network embedded in primary care practices in Canada, leverages the unique relationship between children and families and their trusted primary care practitioners, with whom they have at least seven health supervision visits in the first 5 years of life. Children are enrolled during regularly scheduled well-child visits. To date, we have enrolled 5062 children. In addition to demographic information, we collect physical measurements (e.g. height, weight), lifestyle factors (nutrition, screen time and physical activity), child behaviour and developmental screening and a blood sample (providing measures of cardiometabolic, iron and vitamin D status, and trace metals). All data are collected at each well-child visit: twice a year until age 2 and every year until age 10. Information can be found at: http://www.targetkids.ca/contact-us/.",2014-06-30 +23946501,isomiRID: a framework to identify microRNA isoforms.,"

Summary

MicroRNAs (miRNAs) have been extensively studied owing to their important regulatory roles in genic expression. An increasingly number of reports are performing extensive data mining in small RNA sequencing libraries to detect miRNAs isoforms and also 5' and 3' post-transcriptional nucleotide additions, as well as edited miRNAs sequences. A ready to use pipeline, isomiRID, was developed to standardize and automatize the search for miRNAs isoforms in high-throughput small RNA sequencing libraries.

Availability

isomiRID is a command line Python script available at http://www.ufrgs.br/RNAi/isomiRID/.",2013-08-14 +25160088,Credentialing features: a platform to benchmark and optimize untargeted metabolomic methods.,"The aim of untargeted metabolomics is to profile as many metabolites as possible, yet a major challenge is comparing experimental method performance on the basis of metabolome coverage. To date, most published approaches have compared experimental methods by counting the total number of features detected. Due to artifactual interference, however, this number is highly variable and therefore is a poor metric for comparing metabolomic methods. Here we introduce an alternative approach to benchmarking metabolome coverage which relies on mixed Escherichia coli extracts from cells cultured in regular and (13)C-enriched media. After mass spectrometry-based metabolomic analysis of these extracts, we ""credential"" features arising from E. coli metabolites on the basis of isotope spacing and intensity. This credentialing platform enables us to accurately compare the number of nonartifactual features yielded by different experimental approaches. We highlight the value of our platform by reoptimizing a published untargeted metabolomic method for XCMS data processing. Compared to the published parameters, the new XCMS parameters decrease the total number of features by 15% (a reduction in noise features) while increasing the number of true metabolites detected and grouped by 20%. Our credentialing platform relies on easily generated E. coli samples and a simple software algorithm that is freely available on our laboratory Web site (http://pattilab.wustl.edu/software/credential/). We have validated the credentialing platform with reversed-phase and hydrophilic interaction liquid chromatography as well as Agilent, Thermo Scientific, AB SCIEX, and LECO mass spectrometers. Thus, the credentialing platform can readily be applied by any laboratory to optimize their untargeted metabolomic pipeline for metabolite extraction, chromatographic separation, mass spectrometric detection, and bioinformatic processing.",2014-09-22 +25474679,Prediction of protein-protein interactions from amino acid sequences using a novel multi-scale continuous and discontinuous feature set.,"

Background

Identifying protein-protein interactions (PPIs) is essential for elucidating protein functions and understanding the molecular mechanisms inside the cell. However, the experimental methods for detecting PPIs are both time-consuming and expensive. Therefore, computational prediction of protein interactions are becoming increasingly popular, which can provide an inexpensive way of predicting the most likely set of interactions at the entire proteome scale, and can be used to complement experimental approaches. Although much progress has already been achieved in this direction, the problem is still far from being solved and new approaches are still required to overcome the limitations of the current prediction models.

Results

In this work, a sequence-based approach is developed by combining a novel Multi-scale Continuous and Discontinuous (MCD) feature representation and Support Vector Machine (SVM). The MCD representation gives adequate consideration to the interactions between sequentially distant but spatially close amino acid residues, thus it can sufficiently capture multiple overlapping continuous and discontinuous binding patterns within a protein sequence. An effective feature selection method mRMR was employed to construct an optimized and more discriminative feature set by excluding redundant features. Finally, a prediction model is trained and tested based on SVM algorithm to predict the interaction probability of protein pairs.

Conclusions

When performed on the yeast PPIs data set, the proposed approach achieved 91.36% prediction accuracy with 91.94% precision at the sensitivity of 90.67%. Extensive experiments are conducted to compare our method with the existing sequence-based method. Experimental results show that the performance of our predictor is better than several other state-of-the-art predictors, whose average prediction accuracy is 84.91%, sensitivity is 83.24%, and precision is 86.12%. Achieved results show that the proposed approach is very promising for predicting PPI, so it can be a useful supplementary tool for future proteomics studies. The source code and the datasets are freely available at http://csse.szu.edu.cn/staff/youzh/MCDPPI.zip for academic use.",2014-12-03 +23690949,Semi-supervised prediction of SH2-peptide interactions from imbalanced high-throughput data.,"Src homology 2 (SH2) domains are the largest family of the peptide-recognition modules (PRMs) that bind to phosphotyrosine containing peptides. Knowledge about binding partners of SH2-domains is key for a deeper understanding of different cellular processes. Given the high binding specificity of SH2, in-silico ligand peptide prediction is of great interest. Currently however, only a few approaches have been published for the prediction of SH2-peptide interactions. Their main shortcomings range from limited coverage, to restrictive modeling assumptions (they are mainly based on position specific scoring matrices and do not take into consideration complex amino acids inter-dependencies) and high computational complexity. We propose a simple yet effective machine learning approach for a large set of known human SH2 domains. We used comprehensive data from micro-array and peptide-array experiments on 51 human SH2 domains. In order to deal with the high data imbalance problem and the high signal-to-noise ration, we casted the problem in a semi-supervised setting. We report competitive predictive performance w.r.t. state-of-the-art. Specifically we obtain 0.83 AUC ROC and 0.93 AUC PR in comparison to 0.71 AUC ROC and 0.87 AUC PR previously achieved by the position specific scoring matrices (PSSMs) based SMALI approach. Our work provides three main contributions. First, we showed that better models can be obtained when the information on the non-interacting peptides (negative examples) is also used. Second, we improve performance when considering high order correlations between the ligand positions employing regularization techniques to effectively avoid overfitting issues. Third, we developed an approach to tackle the data imbalance problem using a semi-supervised strategy. Finally, we performed a genome-wide prediction of human SH2-peptide binding, uncovering several findings of biological relevance. We make our models and genome-wide predictions, for all the 51 SH2-domains, freely available to the scientific community under the following URLs: http://www.bioinf.uni-freiburg.de/Software/SH2PepInt/SH2PepInt.tar.gz and http://www.bioinf.uni-freiburg.de/Software/SH2PepInt/Genome-wide-predictions.tar.gz, respectively.",2013-05-17 +22442124,Optimized data fusion for kernel k-means clustering.,"This paper presents a novel optimized kernel k-means algorithm (OKKC) to combine multiple data sources for clustering analysis. The algorithm uses an alternating minimization framework to optimize the cluster membership and kernel coefficients as a nonconvex problem. In the proposed algorithm, the problem to optimize the cluster membership and the problem to optimize the kernel coefficients are all based on the same Rayleigh quotient objective; therefore the proposed algorithm converges locally. OKKC has a simpler procedure and lower complexity than other algorithms proposed in the literature. Simulated and real-life data fusion applications are experimentally studied, and the results validate that the proposed algorithm has comparable performance, moreover, it is more efficient on large-scale data sets. (The Matlab implementation of OKKC algorithm is downloadable from http://homes.esat.kuleuven.be/~sistawww/bio/syu/okkc.html.).",2012-05-01 +23951158,FmMDb: a versatile database of foxtail millet markers for millets and bioenergy grasses research.,"The prominent attributes of foxtail millet (Setaria italica L.) including its small genome size, short life cycle, inbreeding nature, and phylogenetic proximity to various biofuel crops have made this crop an excellent model system to investigate various aspects of architectural, evolutionary and physiological significances in Panicoid bioenergy grasses. After release of its whole genome sequence, large-scale genomic resources in terms of molecular markers were generated for the improvement of both foxtail millet and its related species. Hence it is now essential to congregate, curate and make available these genomic resources for the benefit of researchers and breeders working towards crop improvement. In view of this, we have constructed the Foxtail millet Marker Database (FmMDb; http://www.nipgr.res.in/foxtail.html), a comprehensive online database for information retrieval, visualization and management of large-scale marker datasets with unrestricted public access. FmMDb is the first database which provides complete marker information to the plant science community attempting to produce elite cultivars of millet and bioenergy grass species, thus addressing global food insecurity.",2013-08-12 +25699092,High-resolution genome-wide DNA methylation maps of mouse primary female dermal fibroblasts and keratinocytes.,"

Background

Genome-wide DNA methylation at a single nucleotide resolution in different primary cells of the mammalian genome helps to determine the characteristics and functions of tissue-specific hypomethylated regions (TS-HMRs). We determined genome-wide cytosine methylation maps at 91X and 36X coverage of newborn female mouse primary dermal fibroblasts and keratinocytes and compared with mRNA-seq gene expression data.

Results

These high coverage methylation maps were used to identify HMRs in both cell types. A total of 2.91% of the genome are in keratinocyte HMRs, and 2.15% of the genome are in fibroblast HMRs with 1.75% being common. Half of the TS-HMRs are extensions of common HMRs, and the remaining are unique TS-HMRs. Four levels of CG methylation are observed: 1) total unmethylation for CG dinucleotides in HMRs in CGIs that are active in all tissues; 2) 10% to 40% methylation for TS-HMRs; 3) 60% methylation for TS-HMRs in cells types where they are not in HMRs; and 4) 70% methylation for the nonfunctioning part of the genome. SINE elements are depleted inside the TS-HMRs, while highly enriched in the surrounding regions. Hypomethylation at the last exon shows gene repression, while demethylation toward the gene body positively correlates with gene expression. The overlapping HMRs have a more complex relationship with gene expression. The common HMRs and TS-HMRs are each enriched for distinct Transcription Factor Binding Sites (TFBS). C/EBPβ binds to methylated regions outside of HMRs while CTCF prefers to bind in HMRs, highlighting these two parts of the genome and their potential interactions.

Conclusions

Keratinocytes and fibroblasts are of epithelial and mesenchymal origin. High-resolution methylation maps in these two cell types can be used as reference methylomes for analyzing epigenetic mechanisms in several diseases including cancer. Please see related article at the following link: http://www.epigeneticsandchromatin.com/content/7/1/34.",2014-12-02 +24779372,Applying genome-wide gene-based expression quantitative trait locus mapping to study population ancestry and pharmacogenetics.,"

Background

Gene-based analysis has become popular in genomic research because of its appealing biological and statistical properties compared with those of a single-locus analysis. However, only a few, if any, studies have discussed a mapping of expression quantitative trait loci (eQTL) in a gene-based framework. Neither study has discussed ancestry-informative eQTL nor investigated their roles in pharmacogenetics by integrating single nucleotide polymorphism (SNP)-based eQTL (s-eQTL) and gene-based eQTL (g-eQTL).

Results

In this g-eQTL mapping study, the transcript expression levels of genes (transcript-level genes; T-genes) were correlated with the SNPs of genes (sequence-level genes; S-genes) by using a method of gene-based partial least squares (PLS). Ancestry-informative transcripts were identified using a rank-score-based multivariate association test, and ancestry-informative eQTL were identified using Fisher's exact test. Furthermore, key ancestry-predictive eQTL were selected in a flexible discriminant analysis. We analyzed SNPs and gene expression of 210 independent people of African-, Asian- and European-descent. We identified numerous cis- and trans-acting g-eQTL and s-eQTL for each population by using PLS. We observed ancestry information enriched in eQTL. Furthermore, we identified 2 ancestry-informative eQTL associated with adverse drug reactions and/or drug response. Rs1045642, located on MDR1, is an ancestry-informative eQTL (P = 2.13E-13, using Fisher's exact test) associated with adverse drug reactions to amitriptyline and nortriptyline and drug responses to morphine. Rs20455, located in KIF6, is an ancestry-informative eQTL (P = 2.76E-23, using Fisher's exact test) associated with the response to statin drugs (e.g., pravastatin and atorvastatin). The ancestry-informative eQTL of drug biotransformation genes were also observed; cross-population cis-acting expression regulators included SPG7, TAP2, SLC7A7, and CYP4F2. Finally, we also identified key ancestry-predictive eQTL and established classification models with promising training and testing accuracies in separating samples from close populations.

Conclusions

In summary, we developed a gene-based PLS procedure and a SAS macro for identifying g-eQTL and s-eQTL. We established data archives of eQTL for global populations. The program and data archives are accessible at http://www.stat.sinica.edu.tw/hsinchou/genetics/eQTL/HapMapII.htm. Finally, the results from our investigations regarding the interrelationship between eQTL, ancestry information, and pharmacodynamics provide rich resources for future eQTL studies and practical applications in population genetics and medical genetics.",2014-04-29 +23026555,"MetaSAMS--a novel software platform for taxonomic classification, functional annotation and comparative analysis of metagenome datasets.","Metagenomics aims at exploring microbial communities concerning their composition and functioning. Application of high-throughput sequencing technologies for the analysis of environmental DNA-preparations can generate large sets of metagenome sequence data which have to be analyzed by means of bioinformatics tools to unveil the taxonomic composition of the analyzed community as well as the repertoire of genes and gene functions. A bioinformatics software platform is required that allows the automated taxonomic and functional analysis and interpretation of metagenome datasets without manual effort. To address current demands in metagenome data analyses, the novel platform MetaSAMS was developed. MetaSAMS automatically accomplishes the tasks necessary for analyzing the composition and functional repertoire of a given microbial community from metagenome sequence data by implementing two software pipelines: (i) the first pipeline consists of three different classifiers performing the taxonomic profiling of metagenome sequences and (ii) the second functional pipeline accomplishes region predictions on assembled contigs and assigns functional information to predicted coding sequences. Moreover, MetaSAMS provides tools for statistical and comparative analyses based on the taxonomic and functional annotations. The capabilities of MetaSAMS are demonstrated for two metagenome datasets obtained from a biogas-producing microbial community of a production-scale biogas plant. The MetaSAMS web interface is available at https://metasams.cebitec.uni-bielefeld.de.",2012-09-29 +23237667,Development of a teledermatopathology consultation system using virtual slides.,"

Background

An online consultation system using virtual slides (whole slide images; WSI) has been developed for pathological diagnosis, and could help compensate for the shortage of pathologists, especially in the field of dermatopathology and in other fields dealing with difficult cases. This study focused on the performance and future potential of the system.

Method

In our system, histological specimens on slide glasses are digitalized by a virtual slide instrument, converted into web data, and up-loaded to an open server. Using our own purpose-built online system, we then input patient details such as age, gender, affected region, clinical data, past history and other related items. We next select up to ten consultants. Finally we send an e-mail to all consultants simultaneously through a single command. The consultant receives an e-mail containing an ID and password which is used to access the open server and inspect the images and other data associated with the case. The consultant makes a diagnosis, which is sent to us along with comments. Because this was a pilot study, we also conducted several questionnaires with consultants concerning the quality of images, operability, usability, and other issues.

Results

We solicited consultations for 36 cases, including cases of tumor, and involving one to eight consultants in the field of dermatopathology. No problems were noted concerning the images or the functioning of the system on the sender or receiver sides. The quickest diagnosis was received only 18 minutes after sending our data. This is much faster than in conventional consultation using glass slides. There were no major problems relating to the diagnosis, although there were some minor differences of opinion between consultants. The results of questionnaires answered by many consultants confirmed the usability of this system for pathological consultation. (16 out of 23 consultants.)

Conclusion

We have developed a novel teledermatopathological consultation system using virtual slides, and investigated the usefulness of the system. The results demonstrate that our system can be a useful tool for international medical work, and we anticipate its wider application in the future.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1902376044831574.",2012-12-13 +23941207,PKIS: computational identification of protein kinases for experimentally discovered protein phosphorylation sites.,"

Background

Dynamic protein phosphorylation is an essential regulatory mechanism in various organisms. In this capacity, it is involved in a multitude of signal transduction pathways. Kinase-specific phosphorylation data lay the foundation for reconstruction of signal transduction networks. For this reason, precise annotation of phosphorylated proteins is the first step toward simulating cell signaling pathways. However, the vast majority of kinase-specific phosphorylation data remain undiscovered and existing experimental methods and computational phosphorylation site (P-site) prediction tools have various limitations with respect to addressing this problem.

Results

To address this issue, a novel protein kinase identification web server, PKIS, is here presented for the identification of the protein kinases responsible for experimentally verified P-sites at high specificity, which incorporates the composition of monomer spectrum (CMS) encoding strategy and support vector machines (SVMs). Compared to widely used P-site prediction tools including KinasePhos 2.0, Musite, and GPS2.1, PKIS largely outperformed these tools in identifying protein kinases associated with known P-sites. In addition, PKIS was used on all the P-sites in Phospho.ELM that currently lack kinase information. It successfully identified 14 potential SYK substrates with 36 known P-sites. Further literature search showed that 5 of them were indeed phosphorylated by SYK. Finally, an enrichment analysis was performed and 6 significant SYK-related signal pathways were identified.

Conclusions

In general, PKIS can identify protein kinases for experimental phosphorylation sites efficiently. It is a valuable bioinformatics tool suitable for the study of protein phosphorylation. The PKIS web server is freely available at http://bioinformatics.ustc.edu.cn/pkis.",2013-08-13 +23940251,Badger--an accessible genome exploration environment.,"

Summary

High-quality draft genomes are now easy to generate, as sequencing and assembly costs have dropped dramatically. However, building a user-friendly searchable Web site and database for a newly annotated genome is not straightforward. Here we present Badger, a lightweight and easy-to-install genome exploration environment designed for next generation non-model organism genomes.

Availability

Badger is released under the GPL and is available at http://badger.bio.ed.ac.uk/. We show two working examples: (i) a test dataset included with the source code, and (ii) a collection of four filarial nematode genomes.

Contact

mark.blaxter@ed.ac.uk.",2013-08-11 +23700313,PyroHMMsnp: an SNP caller for Ion Torrent and 454 sequencing data.,"Both 454 and Ion Torrent sequencers are capable of producing large amounts of long high-quality sequencing reads. However, as both methods sequence homopolymers in one cycle, they both suffer from homopolymer uncertainty and incorporation asynchronization. In mapping, such sequencing errors could shift alignments around homopolymers and thus induce incorrect mismatches, which have become a critical barrier against the accurate detection of single nucleotide polymorphisms (SNPs). In this article, we propose a hidden Markov model (HMM) to statistically and explicitly formulate homopolymer sequencing errors by the overcall, undercall, insertion and deletion. We use a hierarchical model to describe the sequencing and base-calling processes, and we estimate parameters of the HMM from resequencing data by an expectation-maximization algorithm. Based on the HMM, we develop a realignment-based SNP-calling program, termed PyroHMMsnp, which realigns read sequences around homopolymers according to the error model and then infers the underlying genotype by using a Bayesian approach. Simulation experiments show that the performance of PyroHMMsnp is exceptional across various sequencing coverages in terms of sensitivity, specificity and F1 measure, compared with other tools. Analysis of the human resequencing data shows that PyroHMMsnp predicts 12.9% more SNPs than Samtools while achieving a higher specificity. (http://code.google.com/p/pyrohmmsnp/).",2013-05-21 +25815770,Association of Parkinson's Disease and Its Subtypes with Agricultural Pesticide Exposures in Men: A Case-Control Study in France.,"

Background

Pesticides have been associated with Parkinson's disease (PD), but there are few data on important exposure characteristics such as dose-effect relations. It is unknown whether associations depend on clinical PD subtypes.

Objectives

We examined quantitative aspects of occupational pesticide exposure associated with PD and investigated whether associations were similar across PD subtypes.

Methods

As part of a French population-based case-control study including men enrolled in the health insurance plan for farmers and agricultural workers, cases with clinically confirmed PD were identified through antiparkinsonian drug claims. Two controls were matched to each case. Using a comprehensive occupational questionnaire, we computed indicators for different dimensions of exposure (duration, cumulative exposure, intensity). We used conditional logistic regression to compute odds ratios (ORs) and 95% confidence intervals (CIs) among exposed male farmers (133 cases, 298 controls). We examined the relation between pesticides and PD subtypes (tremor dominant/non-tremor dominant) using polytomous logistic regression.

Results

There appeared to be a stronger association with intensity than duration of pesticide exposure based on separate models, as well as a synergistic interaction between duration and intensity (p-interaction = 0.04). High-intensity exposure to insecticides was positively associated with PD among those with low-intensity exposure to fungicides and vice versa, suggesting independent effects. Pesticide exposure in farms that specialized in vineyards was associated with PD (OR = 2.56; 95% CI: 1.31, 4.98). The association with intensity of pesticide use was stronger, although not significantly (p-heterogeneity = 0.60), for tremor-dominant (p-trend < 0.01) than for non-tremor-dominant PD (p-trend = 0.24).

Conclusions

This study helps to better characterize different aspects of pesticide exposure associated with PD, and shows a significant association of pesticides with tremor-dominant PD in men, the most typical PD presentation.

Citation

Moisan F, Spinosi J, Delabre L, Gourlet V, Mazurie JL, Bénatru I, Goldberg M, Weisskopf MG, Imbernon E, Tzourio C, Elbaz A. 2015. Association of Parkinson's disease and its subtypes with agricultural pesticide exposures in men: a case-control study in France. Environ Health Perspect 123:1123-1129; http://dx.doi.org/10.1289/ehp.1307970.",2015-03-27 +25549982,Accuracy of Igenity genomically estimated breeding values for predicting Australian Angus BREEDPLAN traits.,"Genomically estimated breeding values (GEBV) for Angus beef cattle are available from at least 2 commercial suppliers (Igenity [http://www.igenity.com] and Zoetis [http://www.zoetis.com]). The utility of these GEBV for improving genetic evaluation depends on their accuracies, which can be estimated by the genetic correlation with phenotypic target traits. Genomically estimated breeding values of 1,032 Angus bulls calculated from prediction equations (PE) derived by 2 different procedures in the U.S. Angus population were supplied by Igenity. Both procedures were based on Illuminia BovineSNP50 BeadChip genotypes. In procedure sg, GEBV were calculated from PE that used subsets of only 392 SNP, where these subsets were individually selected for each trait by BayesCπ. In procedure rg GEBV were calculated from PE derived in a ridge regression approach using all available SNP. Because the total set of 1,032 bulls with GEBV contained 732 individuals used in the Igenity training population, GEBV subsets were formed characterized by a decreasing average relationship between individuals in the subsets and individuals in the training population. Accuracies of GEBV were estimated as genetic correlations between GEBV and their phenotypic target traits modeling GEBV as trait observations in a bivariate REML approach, in which phenotypic observations were those recorded in the commercial Australian Angus seed stock sector. Using results from the GEBV subset excluding all training individuals as a reference, estimated accuracies were generally in agreement with those already published, with both types of GEBV (sg and rg) yielding similar results. Accuracies for growth traits ranged from 0.29 to 0.45, for reproductive traits from 0.11 to 0.53, and for carcass traits from 0.3 to 0.75. Accuracies generally decreased with an increasing genetic distance between the training and the validation population. However, for some carcass traits characterized by a low number of phenotypic records (weight, intramuscular fat, and eye muscle area), accuracies were observed to increase but had large SE. Therefore, Igenity GEBV can be useful to Australian Angus breeders, either for blending EBV or as the sole basis for selection decisions if no other information is available. However, for carcass traits, additional phenotypic data are required.",2015-02-01 +22302572,pymzML--Python module for high-throughput bioinformatics on mass spectrometry data.,"

Summary

pymzML is an extension to Python that offers (i) an easy access to mass spectrometry (MS) data that allows the rapid development of tools, (ii) a very fast parser for mzML data, the standard data format in MS and (iii) a set of functions to compare or handle spectra.

Availability and implementation

pymzML requires Python2.6.5+ and is fully compatible with Python3. The module is freely available on http://pymzml.github.com or pypi, is published under LGPL license and requires no additional modules to be installed.

Contact

christian@fufezan.net.",2012-02-02 +26521770,Rehabilitative treatments for chronic fatigue syndrome: long-term follow-up from the PACE trial.,"

Background

The PACE trial found that, when added to specialist medical care (SMC), cognitive behavioural therapy (CBT), or graded exercise therapy (GET) were superior to adaptive pacing therapy (APT) or SMC alone in improving fatigue and physical functioning in people with chronic fatigue syndrome 1 year after randomisation. In this pre-specified follow-up study, we aimed to assess additional treatments received after the trial and investigate long-term outcomes (at least 2 years after randomisation) within and between original treatment groups in those originally included in the PACE trial.

Methods

The PACE trial was a parallel-group randomised controlled trial of patients meeting Oxford criteria for chronic fatigue syndrome who were recruited from six secondary care clinics in the UK between March 18, 2005, and Nov 28, 2008. Participants were randomly allocated to receive SMC alone or plus APT, CBT, or GET. Primary outcomes (were fatigue measured with Chalder fatigue questionnaire score and physical functioning with short form-36 subscale score, assessed 1 year after randomisation. In this long-term follow-up, we sent postal questionnaires to assess treatment received after the trial and outcomes a minimum of 2 years after randomisation. We assessed long-term differences in outcomes within and between originally randomised groups. The PACE trial is registered at http://isrctn.org, number ISRCTN54285094.

Findings

Between May 8, 2008, and April 26, 2011, 481 (75%) participants from the PACE trial returned questionnaires. Median time from randomisation to return of long-term follow-up assessment was 31 months (IQR 30-32; range 24-53). 210 (44%) participants received additional treatment (mostly CBT or GET) after the trial; with participants originally assigned to SMC alone (73 [63%] of 115) or APT (60 [50%] of 119) more likely to seek treatment than those originally assigned to GET (41 [32%] of 127) or CBT (36 [31%] of 118; p<0·0001). Improvements in fatigue and physical functioning reported by participants originally assigned to CBT and GET were maintained (within-group comparison of fatigue and physical functioning, respectively, at long-term follow-up as compared with 1 year: CBT -2·2 [95% CI -3·7 to -0·6], 3·3 [0·02 to 6·7]; GET -1·3 [-2·7 to 0·1], 0·5 [-2·7 to 3·6]). Participants allocated to APT and to SMC alone in the trial improved over the follow-up period compared with 1 year (fatigue and physical functioning, respectively: APT -3·0 [-4·4 to -1·6], 8·5 [4·5 to 12·5]; SMC -3·9 [-5·3 to -2·6], 7·1 [4·0 to 10·3]). There was little evidence of differences in outcomes between the randomised treatment groups at long-term follow-up.

Interpretation

The beneficial effects of CBT and GET seen at 1 year were maintained at long-term follow-up a median of 2·5 years after randomisation. Outcomes with SMC alone or APT improved from the 1 year outcome and were similar to CBT and GET at long-term follow-up, but these data should be interpreted in the context of additional therapies having being given according to physician choice and patient preference after the 1 year trial final assessment. Future research should identify predictors of response to CBT and GET and also develop better treatments for those who respond to neither.

Funding

UK Medical Research Council, Department of Health for England, Scottish Chief Scientist Office, Department for Work and Pensions, National Institute for Health Research (NIHR), NIHR Biomedical Research Centre for Mental Health at South London and Maudsley NHS Foundation Trust, King's College London.",2015-10-28 +21535883,Genome-wide analysis of the mouse lung transcriptome reveals novel molecular gene interaction networks and cell-specific expression signatures.,"

Background

The lung is critical in surveillance and initial defense against pathogens. In humans, as in mice, individual genetic differences strongly modulate pulmonary responses to infectious agents, severity of lung disease, and potential allergic reactions. In a first step towards understanding genetic predisposition and pulmonary molecular networks that underlie individual differences in disease vulnerability, we performed a global analysis of normative lung gene expression levels in inbred mouse strains and a large family of BXD strains that are widely used for systems genetics. Our goal is to provide a key community resource on the genetics of the normative lung transcriptome that can serve as a foundation for experimental analysis and allow predicting genetic predisposition and response to pathogens, allergens, and xenobiotics.

Methods

Steady-state polyA+ mRNA levels were assayed across a diverse and fully genotyped panel of 57 isogenic strains using the Affymetrix M430 2.0 array. Correlations of expression levels between genes were determined. Global expression QTL (eQTL) analysis and network covariance analysis was performed using tools and resources in GeneNetwork http://www.genenetwork.org.

Results

Expression values were highly variable across strains and in many cases exhibited a high heritability factor. Several genes which showed a restricted expression to lung tissue were identified. Using correlations between gene expression values across all strains, we defined and extended memberships of several important molecular networks in the lung. Furthermore, we were able to extract signatures of immune cell subpopulations and characterize co-variation and shared genetic modulation. Known QTL regions for respiratory infection susceptibility were investigated and several cis-eQTL genes were identified. Numerous cis- and trans-regulated transcripts and chromosomal intervals with strong regulatory activity were mapped. The Cyp1a1 P450 transcript had a strong trans-acting eQTL (LOD 11.8) on Chr 12 at 36 ± 1 Mb. This interval contains the transcription factor Ahr that has a critical mis-sense allele in the DBA/2J haplotype and evidently modulates transcriptional activation by AhR.

Conclusions

Large-scale gene expression analyses in genetic reference populations revealed lung-specific and immune-cell gene expression profiles and suggested specific gene regulatory interactions.",2011-05-02 +22113085,"MSnbase-an R/Bioconductor package for isobaric tagged mass spectrometry data visualization, processing and quantitation.","

Unlabelled

MSnbase is an R/Bioconductor package for the analysis of quantitative proteomics experiments that use isobaric tagging. It provides an exploratory data analysis framework for reproducible research, allowing raw data import, quality control, visualization, data processing and quantitation. MSnbase allows direct integration of quantitative proteomics data with additional facilities for statistical analysis provided by the Bioconductor project.

Availability

MSnbase is implemented in R (version ≥ 2.13.0) and available at the Bioconductor web site (http://www.bioconductor.org/). Vignettes outlining typical workflows, input/output capabilities and detailing underlying infrastructure are included in the package.",2011-11-22 +22897824,DREM 2.0: Improved reconstruction of dynamic regulatory networks from time-series expression data.,"

Background

Modeling dynamic regulatory networks is a major challenge since much of the protein-DNA interaction data available is static. The Dynamic Regulatory Events Miner (DREM) uses a Hidden Markov Model-based approach to integrate this static interaction data with time series gene expression leading to models that can determine when transcription factors (TFs) activate genes and what genes they regulate. DREM has been used successfully in diverse areas of biological research. However, several issues were not addressed by the original version.

Results

DREM 2.0 is a comprehensive software for reconstructing dynamic regulatory networks that supports interactive graphical or batch mode. With version 2.0 a set of new features that are unique in comparison with other softwares are introduced. First, we provide static interaction data for additional species. Second, DREM 2.0 now accepts continuous binding values and we added a new method to utilize TF expression levels when searching for dynamic models. Third, we added support for discriminative motif discovery, which is particularly powerful for species with limited experimental interaction data. Finally, we improved the visualization to support the new features. Combined, these changes improve the ability of DREM 2.0 to accurately recover dynamic regulatory networks and make it much easier to use it for analyzing such networks in several species with varying degrees of interaction information.

Conclusions

DREM 2.0 provides a unique framework for constructing and visualizing dynamic regulatory networks. DREM 2.0 can be downloaded from: http://www.sb.cs.cmu.edu/drem.",2012-08-16 +22537045,An improved approach for accurate and efficient calling of structural variations with low-coverage sequence data.,"

Background

Recent advances in sequencing technologies make it possible to comprehensively study structural variations (SVs) using sequence data of large-scale populations. Currently, more efforts have been taken to develop methods that call SVs with exact breakpoints. Among these approaches, split-read mapping methods can be applied on low-coverage sequence data. With increasing amount of data generated, more efficient split-read mapping methods are still needed. Also, since sequence errors can not be avoided for the current sequencing technologies, more accurate split-read mapping methods are still needed to better handle sequence errors.

Results

In this paper, we present a split-read mapping method implemented in the program SVseq2 which improves our previous work SVseq1. Similar to SVseq1, SVseq2 calls deletions (and insertions) with exact breakpoints. SVseq2 achieves more accurate calling through split-read mapping within focal regions. SVseq2 also has a much desired feature: there is no need to specify the maximum deletion size, while some existing split-read mapping methods need more memory and longer running time when larger maximum deletion size is chosen. SVseq2 is also much faster because it only needs to examine a small number of ways of splitting the reads. Moreover, SVseq2 supports insertion calling from low-coverage sequence data, while SVseq1 only supports deletion finding. The program SVseq2 can be downloaded at http://www.engr.uconn.edu/~jiz08001/.

Conclusions

SVseq2 enables accurate and efficient SV calling through split-read mapping within focal regions using paired-end reads. For many simulated data and real sequence data, SVseq2 outperforms some other existing approaches in accuracy and efficiency, especially when sequence coverage is low.",2012-04-19 +22368248,Unipro UGENE: a unified bioinformatics toolkit.,"

Unlabelled

Unipro UGENE is a multiplatform open-source software with the main goal of assisting molecular biologists without much expertise in bioinformatics to manage, analyze and visualize their data. UGENE integrates widely used bioinformatics tools within a common user interface. The toolkit supports multiple biological data formats and allows the retrieval of data from remote data sources. It provides visualization modules for biological objects such as annotated genome sequences, Next Generation Sequencing (NGS) assembly data, multiple sequence alignments, phylogenetic trees and 3D structures. Most of the integrated algorithms are tuned for maximum performance by the usage of multithreading and special processor instructions. UGENE includes a visual environment for creating reusable workflows that can be launched on local resources or in a High Performance Computing (HPC) environment. UGENE is written in C++ using the Qt framework. The built-in plugin system and structured UGENE API make it possible to extend the toolkit with new functionality.

Availability and implementation

UGENE binaries are freely available for MS Windows, Linux and Mac OS X at http://ugene.unipro.ru/download.html. UGENE code is licensed under the GPLv2; the information about the code licensing and copyright of integrated tools can be found in the LICENSE.3rd_party file provided with the source bundle.",2012-02-24 +23935056,Version 6 of the consensus yeast metabolic network refines biochemical coverage and improves model performance.,"Updates to maintain a state-of-the art reconstruction of the yeast metabolic network are essential to reflect our understanding of yeast metabolism and functional organization, to eliminate any inaccuracies identified in earlier iterations, to improve predictive accuracy and to continue to expand into novel subsystems to extend the comprehensiveness of the model. Here, we present version 6 of the consensus yeast metabolic network (Yeast 6) as an update to the community effort to computationally reconstruct the genome-scale metabolic network of Saccharomyces cerevisiae S288c. Yeast 6 comprises 1458 metabolites participating in 1888 reactions, which are annotated with 900 yeast genes encoding the catalyzing enzymes. Compared with Yeast 5, Yeast 6 demonstrates improved sensitivity, specificity and positive and negative predictive values for predicting gene essentiality in glucose-limited aerobic conditions when analyzed with flux balance analysis. Additionally, Yeast 6 improves the accuracy of predicting the likelihood that a mutation will cause auxotrophy. The network reconstruction is available as a Systems Biology Markup Language (SBML) file enriched with Minimium Information Requested in the Annotation of Biochemical Models (MIRIAM)-compliant annotations. Small- and macromolecules in the network are referenced to authoritative databases such as Uniprot or ChEBI. Molecules and reactions are also annotated with appropriate publications that contain supporting evidence. Yeast 6 is freely available at http://yeast.sf.net/ as three separate SBML files: a model using the SBML level 3 Flux Balance Constraint package, a model compatible with the MATLAB® COBRA Toolbox for backward compatibility and a reconstruction containing only reactions for which there is experimental evidence (without the non-biological reactions necessary for simulating growth). Database URL: http://yeast.sf.net/",2013-08-09 +23407360,Hybrid regulatory models: a statistically tractable approach to model regulatory network dynamics.,"

Motivation

Computational modelling of the dynamics of gene regulatory networks is a central task of systems biology. For networks of small/medium scale, the dominant paradigm is represented by systems of coupled non-linear ordinary differential equations (ODEs). ODEs afford great mechanistic detail and flexibility, but calibrating these models to data is often an extremely difficult statistical problem.

Results

Here, we develop a general statistical inference framework for stochastic transcription-translation networks. We use a coarse-grained approach, which represents the system as a network of stochastic (binary) promoter and (continuous) protein variables. We derive an exact inference algorithm and an efficient variational approximation that allows scalable inference and learning of the model parameters. We demonstrate the power of the approach on two biological case studies, showing that the method allows a high degree of flexibility and is capable of testable novel biological predictions.

Availability and implementation

http://homepages.inf.ed.ac.uk/gsanguin/software.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-13 +23245209,Gene family matters: expanding the HGNC resource.,"The HUGO Gene Nomenclature Committee (HGNC) assigns approved gene symbols to human loci. There are currently over 33,000 approved gene symbols, the majority of which represent protein-coding genes, but we also name other locus types such as non-coding RNAs, pseudogenes and phenotypic loci. Where relevant, the HGNC organise these genes into gene families and groups. The HGNC website http://www.genenames.org/ is an online repository of HGNC-approved gene nomenclature and associated resources for human genes, and includes links to genomic, proteomic and phenotypic information. In addition to this, we also have dedicated gene family web pages and are currently expanding and generating more of these pages using data curated by the HGNC and from information derived from external resources that focus on particular gene families. Here, we review our current online resources with a particular focus on our gene family data, using it to highlight our new Gene Symbol Report and gene family data downloads.",2012-07-05 +25433580,"Effects of Calcium Fructoborate on Levels of C-Reactive Protein, Total Cholesterol, Low-Density Lipoprotein, Triglycerides, IL-1β, IL-6, and MCP-1: a Double-blind, Placebo-controlled Clinical Study.","Calcium fructoborate (CFB) has been reported as supporting healthy inflammatory response. In this study, we assess the effects of CFB on blood parameters and proinflammatory cytokines in healthy subjects. This was a randomized, double-blinded, placebo-controlled trial. Participants received placebo or CFB at a dose of 112 mg/day (CFB-1) or 56 mg/day (CFB-2) for 30 days. Glucose, total cholesterol (TC), low-density lipoprotein (LDL), high-density lipoprotein (HDL), triglycerides (TG), C-reactive protein (CRP), homocysteine, interleukin 1 beta (IL-1β), IL-6, and monocyte chemoattractant protein-1 (MCP-1) were determined before and after supplementation. CFB-1 showed a reduction in blood levels of CRP by 31.3 % compared to baseline. CFB-1 and CFB-2 reduced LDL levels by 9.8 and 9.4 %, respectively. CFB-1 decreased blood homocysteine by 5.5 % compared with baseline, whereas CFB-2 did not have a significant effect. Blood levels of TG were reduced by 9.1 and 8.8 % for CFB-1 and CFB-2, respectively. Use of both CFB-1 and CFB-2 resulted in significantly reduced IL-6 levels, when compared within and between groups. IL-1β was reduced by 29.2 % in the CFB-1 group. Finally, CFB-1 and CFB-2 reduced MCP-1 by 31 and 26 %, respectively. Our data indicate that 30-day supplementation with 112 mg/day CFB (CFB-1) resulted in a significant reduction of LDL, TG, TC, IL-1β, IL-6, MCP-1, and CRP. HDL levels were increased, when compared to baseline and placebo. These results suggest that CFB might provide beneficial support to healthy cardiovascular systems by positively affecting these blood markers (ClinicalTrials.gov, ISRCTN90543844; May 24, 2012 ( http://www.controlled-trials.com/ISRCTN90543844 )).",2014-11-30 +22796958,A regression model for estimating DNA copy number applied to capture sequencing data.,"

Motivation

Target enrichment, also referred to as DNA capture, provides an effective way to focus sequencing efforts on a genomic region of interest. Capture data are typically used to detect single-nucleotide variants. It can also be used to detect copy number alterations, which is particularly useful in the context of cancer, where such changes occur frequently. In copy number analysis, it is a common practice to determine log-ratios between test and control samples, but this approach results in a loss of information as it disregards the total coverage or intensity at a locus.

Results

We modeled the coverage or intensity of the test sample as a linear function of the control sample. This regression approach is able to deal with regions that are completely deleted, which are problematic for methods that use log-ratios. To demonstrate the utility of our approach, we used capture data to determine copy number for a set of 600 genes in a panel of nine breast cancer cell lines. We found high concordance between our results and those generated using a single-nucleotide polymorphsim genotyping platform. When we compared our results with other log-ratio-based methods, including ExomeCNV, we found that our approach produced better overall correlation with SNP data.

Availability

The algorithm is implemented in C and R and the code can be downloaded from http://bioinformatics.nki.nl/ocs/

Contact

l.wessels@nki.nl

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-13 +23813015,A framework for scalable parameter estimation of gene circuit models using structural information.,"

Motivation

Systematic and scalable parameter estimation is a key to construct complex gene regulatory models and to ultimately facilitate an integrative systems biology approach to quantitatively understand the molecular mechanisms underpinning gene regulation.

Results

Here, we report a novel framework for efficient and scalable parameter estimation that focuses specifically on modeling of gene circuits. Exploiting the structure commonly found in gene circuit models, this framework decomposes a system of coupled rate equations into individual ones and efficiently integrates them separately to reconstruct the mean time evolution of the gene products. The accuracy of the parameter estimates is refined by iteratively increasing the accuracy of numerical integration using the model structure. As a case study, we applied our framework to four gene circuit models with complex dynamics based on three synthetic datasets and one time series microarray data set. We compared our framework to three state-of-the-art parameter estimation methods and found that our approach consistently generated higher quality parameter solutions efficiently. Although many general-purpose parameter estimation methods have been applied for modeling of gene circuits, our results suggest that the use of more tailored approaches to use domain-specific information may be a key to reverse engineering of complex biological systems.

Availability

http://sfb.kaust.edu.sa/Pages/Software.aspx.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +22884039,EasyLCMS: an asynchronous web application for the automated quantification of LC-MS data.,"

Background

Downstream applications in metabolomics, as well as mathematical modelling, require data in a quantitative format, which may also necessitate the automated and simultaneous quantification of numerous metabolites. Although numerous applications have been previously developed for metabolomics data handling, automated calibration and calculation of the concentrations in terms of μmol have not been carried out. Moreover, most of the metabolomics applications are designed for GC-MS, and would not be suitable for LC-MS, since in LC, the deviation in the retention time is not linear, which is not taken into account in these applications. Moreover, only a few are web-based applications, which could improve stand-alone software in terms of compatibility, sharing capabilities and hardware requirements, even though a strong bandwidth is required. Furthermore, none of these incorporate asynchronous communication to allow real-time interaction with pre-processed results.

Findings

Here, we present EasyLCMS (http://www.easylcms.es/), a new application for automated quantification which was validated using more than 1000 concentration comparisons in real samples with manual operation. The results showed that only 1% of the quantifications presented a relative error higher than 15%. Using clustering analysis, the metabolites with the highest relative error distributions were identified and studied to solve recurrent mistakes.

Conclusions

EasyLCMS is a new web application designed to quantify numerous metabolites, simultaneously integrating LC distortions and asynchronous web technology to present a visual interface with dynamic interaction which allows checking and correction of LC-MS raw data pre-processing results. Moreover, quantified data obtained with EasyLCMS are fully compatible with numerous downstream applications, as well as for mathematical modelling in the systems biology field.",2012-08-11 +27493549,LigandBox: A database for 3D structures of chemical compounds.,"A database for the 3D structures of available compounds is essential for the virtual screening by molecular docking. We have developed the LigandBox database (http://ligandbox.protein.osaka-u.ac.jp/ligandbox/) containing four million available compounds, collected from the catalogues of 37 commercial suppliers, and approved drugs and biochemical compounds taken from KEGG_DRUG, KEGG_COMPOUND and PDB databases. Each chemical compound in the database has several 3D conformers with hydrogen atoms and atomic charges, which are ready to be docked into receptors using docking programs. The 3D conformations were generated using our molecular simulation program package, myPresto. Various physical properties, such as aqueous solubility (LogS) and carcinogenicity have also been calculated to characterize the ADME-Tox properties of the compounds. The Web database provides two services for compound searches: a property/chemical ID search and a chemical structure search. The chemical structure search is performed by a descriptor search and a maximum common substructure (MCS) search combination, using our program kcombu. By specifying a query chemical structure, users can find similar compounds among the millions of compounds in the database within a few minutes. Our database is expected to assist a wide range of researchers, in the fields of medical science, chemical biology, and biochemistry, who are seeking to discover active chemical compounds by the virtual screening.",2013-08-07 +24931994,BlockClust: efficient clustering and classification of non-coding RNAs from short read RNA-seq profiles.,"

Summary

Non-coding RNAs (ncRNAs) play a vital role in many cellular processes such as RNA splicing, translation, gene regulation. However the vast majority of ncRNAs still have no functional annotation. One prominent approach for putative function assignment is clustering of transcripts according to sequence and secondary structure. However sequence information is changed by post-transcriptional modifications, and secondary structure is only a proxy for the true 3D conformation of the RNA polymer. A different type of information that does not suffer from these issues and that can be used for the detection of RNA classes, is the pattern of processing and its traces in small RNA-seq reads data. Here we introduce BlockClust, an efficient approach to detect transcripts with similar processing patterns. We propose a novel way to encode expression profiles in compact discrete structures, which can then be processed using fast graph-kernel techniques. We perform both unsupervised clustering and develop family specific discriminative models; finally we show how the proposed approach is scalable, accurate and robust across different organisms, tissues and cell lines.

Availability

The whole BlockClust galaxy workflow including all tool dependencies is available at http://toolshed.g2.bx.psu.edu/view/rnateam/blockclust_workflow.",2014-06-01 +23173819,TIGRESS: Trustful Inference of Gene REgulation using Stability Selection.,"

Background

Inferring the structure of gene regulatory networks (GRN) from a collection of gene expression data has many potential applications, from the elucidation of complex biological processes to the identification of potential drug targets. It is however a notoriously difficult problem, for which the many existing methods reach limited accuracy.

Results

In this paper, we formulate GRN inference as a sparse regression problem and investigate the performance of a popular feature selection method, least angle regression (LARS) combined with stability selection, for that purpose. We introduce a novel, robust and accurate scoring technique for stability selection, which improves the performance of feature selection with LARS. The resulting method, which we call TIGRESS (for Trustful Inference of Gene REgulation with Stability Selection), was ranked among the top GRN inference methods in the DREAM5 gene network inference challenge. In particular, TIGRESS was evaluated to be the best linear regression-based method in the challenge. We investigate in depth the influence of the various parameters of the method, and show that a fine parameter tuning can lead to significant improvements and state-of-the-art performance for GRN inference, in both directed and undirected settings.

Conclusions

TIGRESS reaches state-of-the-art performance on benchmark data, including both in silico and in vivo (E. coli and S. cerevisiae) networks. This study confirms the potential of feature selection techniques for GRN inference. Code and data are available on http://cbio.ensmp.fr/tigress. Moreover, TIGRESS can be run online through the GenePattern platform (GP-DREAM, http://dream.broadinstitute.org).",2012-11-22 +25809590,"SCAI/AATS/ACC/STS operator and institutional requirements for transcatheter valve repair and replacement, Part III: Pulmonic valve.","With the evolution of transcatheter valve replacement, an important opportunity has arisen for cardiologists and surgeons to collaborate in identifying the criteria for performing these procedures. Therefore, The Society for Cardiovascular Angiography and Interventions (SCAI), American Association for Thoracic Surgery (AATS), American College of Cardiology (ACC), and The Society of Thoracic Surgeons (STS) have partnered to provide recommendations for institutions to assess their potential for instituting and/or maintaining a transcatheter valve program. This article concerns transcatheter pulmonic valve replacement (tPVR). tPVR procedures are in their infancy with few reports available on which to base an expert consensus statement. Therefore, many of these recommendations are based on expert consensus and the few reports available. As the procedures evolve, technology advances, experience grows, and more data accumulate, there will certainly be a need to update this consensus statement. The writing committee and participating societies believe that the recommendations in this report serve as appropriate requisites. In some ways, these recommendations apply to institutions more than to individuals. There is a strong consensus that these new valve therapies are best performed using a Heart Team approach; thus, these credentialing criteria should be applied at the institutional level. Partnering societies used the ACC's policy on relationships with industry (RWI) and other entities to author this document (http://www.acc.org/guidelines/about-guidelines-and-clinical-documents). To avoid actual, potential, or perceived conflicts of interest due to industry relationships or personal interests, all members of the writing committee, as well as peer reviewers of the document, were asked to disclose all current healthcare-related relationships including those existing 12 months before the initiation of the writing effort. A committee of interventional cardiologists and surgeons was formed to include a majority of members with no relevant RWI and to be led by an interventional cardiology cochair and a surgical cochair with no relevant RWI. Authors with relevant RWI were not permitted to draft or vote on text or recommendations pertaining to their RWI. RWI were reviewed on all conference calls and updated as changes occurred. Author and peer reviewer RWI pertinent to this document are disclosed in the Appendices. In addition, to ensure complete transparency, authors' comprehensive disclosure information (including RWI not pertinent to this document) is available in Appendix AII. The work of the writing committee was supported exclusively by the partnering societies without commercial support. SCAI, AATS, ACC, and STS believe that adherence to these recommendations will maximize the chances that these therapies will become a successful part of the armamentarium for treating valvular heart disease in the United States. In addition, these recommendations will hopefully facilitate optimum quality during the delivery of this therapy, which will be important to the development and successful implementation of future, less invasive approaches to structural heart disease.",2015-03-24 +23740742,WhichCyp: prediction of cytochromes P450 inhibition.,"

Summary

In this work we present WhichCyp, a tool for prediction of which cytochromes P450 isoforms (among 1A2, 2C9, 2C19, 2D6 and 3A4) a given molecule is likely to inhibit. The models are built from experimental high-throughput data using support vector machines and molecular signatures.

Availability

The WhichCyp server is freely available for use on the web at http://drug.ku.dk/whichcyp, where the WhichCyp Java program and source code is also available for download.",2013-06-05 +23813001,IDBA-tran: a more robust de novo de Bruijn graph assembler for transcriptomes with uneven expression levels.,"

Motivation

RNA sequencing based on next-generation sequencing technology is effective for analyzing transcriptomes. Like de novo genome assembly, de novo transcriptome assembly does not rely on any reference genome or additional annotation information, but is more difficult. In particular, isoforms can have very uneven expression levels (e.g. 1:100), which make it very difficult to identify low-expressed isoforms. One challenge is to remove erroneous vertices/edges with high multiplicity (produced by high-expressed isoforms) in the de Bruijn graph without removing correct ones with not-so-high multiplicity from low-expressed isoforms. Failing to do so will result in the loss of low-expressed isoforms or having complicated subgraphs with transcripts of different genes mixed together due to erroneous vertices/edges. Contributions: Unlike existing tools, which remove erroneous vertices/edges with multiplicities lower than a global threshold, we use a probabilistic progressive approach to iteratively remove them with local thresholds. This enables us to decompose the graph into disconnected components, each containing a few genes, if not a single gene, while retaining many correct vertices/edges of low-expressed isoforms. Combined with existing techniques, IDBA-Tran is able to assemble both high-expressed and low-expressed transcripts and outperform existing assemblers in terms of sensitivity and specificity for both simulated and real data.

Availability

http://www.cs.hku.hk/~alse/idba_tran.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +24520165,Escherichia coli swimming is robust against variations in flagellar number.,"Bacterial chemotaxis is a paradigm for how environmental signals modulate cellular behavior. Although the network underlying this process has been studied extensively, we do not yet have an end-to-end understanding of chemotaxis. Specifically, how the rotational states of a cell's flagella cooperatively determine whether the cell 'runs' or 'tumbles' remains poorly characterized. Here, we measure the swimming behavior of individual E. coli cells while simultaneously detecting the rotational states of each flagellum. We find that a simple mathematical expression relates the cell's run/tumble bias to the number and average rotational state of its flagella. However, due to inter-flagellar correlations, an 'effective number' of flagella-smaller than the actual number-enters into this relation. Data from a chemotaxis mutant and stochastic modeling suggest that fluctuations of the regulator CheY-P are the source of flagellar correlations. A consequence of inter-flagellar correlations is that run/tumble behavior is only weakly dependent on number of flagella. DOI: http://dx.doi.org/10.7554/eLife.01916.001.",2014-02-11 +23918248,Pclust: protein network visualization highlighting experimental data.,"

Summary

One approach to infer functions of new proteins from their homologs utilizes visualization of an all-against-all pairwise similarity network (A2ApsN) that exploits the speed of BLAST and avoids the complexity of multiple sequence alignment. However, identifying functions of the protein clusters in A2ApsN is never trivial, due to a lack of linking characterized proteins to their relevant information in current software packages. Given the database errors introduced by automatic annotation transfer, functional deduction should be made from proteins with experimental studies, i.e. 'reference proteins'. Here, we present a web server, termed Pclust, which provides a user-friendly interface to visualize the A2ApsN, placing emphasis on such 'reference proteins' and providing access to their full information in source databases, e.g. articles in PubMed. The identification of 'reference proteins' and the ease of cross-database linkage will facilitate understanding the functions of protein clusters in the network, thus promoting interpretation of proteins of interest.

Availability

The Pclust server is freely available at http://prodata.swmed.edu/pclust",2013-08-05 +23924163,MatrixCatch--a novel tool for the recognition of composite regulatory elements in promoters.,"

Background

Accurate recognition of regulatory elements in promoters is an essential prerequisite for understanding the mechanisms of gene regulation at the level of transcription. Composite regulatory elements represent a particular type of such transcriptional regulatory elements consisting of pairs of individual DNA motifs. In contrast to the present approach, most available recognition techniques are based purely on statistical evaluation of the occurrence of single motifs. Such methods are limited in application, since the accuracy of recognition is greatly dependent on the size and quality of the sequence dataset. Methods that exploit available knowledge and have broad applicability are evidently needed.

Results

We developed a novel method to identify composite regulatory elements in promoters using a library of known examples. In depth investigation of regularities encoded in known composite elements allowed us to introduce a new characteristic measure and to improve the specificity compared with other methods. Tests on an established benchmark and real genomic data show that our method outperforms other available methods based either on known examples or statistical evaluations. In addition to better recognition, a practical advantage of this method is first the ability to detect a high number of different types of composite elements, and second direct biological interpretation of the identified results. The program is available at http://gnaweb.helmholtz-hzi.de/cgi-bin/MCatch/MatrixCatch.pl and includes an option to extend the provided library by user supplied data.

Conclusions

The novel algorithm for the identification of composite regulatory elements presented in this paper was proved to be superior to existing methods. Its application to tissue specific promoters identified several highly specific composite elements with relevance to their biological function. This approach together with other methods will further advance the understanding of transcriptional regulation of genes.",2013-08-08 +22859915,Assessing drug target association using semantic linked data.,"The rapidly increasing amount of public data in chemistry and biology provides new opportunities for large-scale data mining for drug discovery. Systematic integration of these heterogeneous sets and provision of algorithms to data mine the integrated sets would permit investigation of complex mechanisms of action of drugs. In this work we integrated and annotated data from public datasets relating to drugs, chemical compounds, protein targets, diseases, side effects and pathways, building a semantic linked network consisting of over 290,000 nodes and 720,000 edges. We developed a statistical model to assess the association of drug target pairs based on their relation with other linked objects. Validation experiments demonstrate the model can correctly identify known direct drug target pairs with high precision. Indirect drug target pairs (for example drugs which change gene expression level) are also identified but not as strongly as direct pairs. We further calculated the association scores for 157 drugs from 10 disease areas against 1683 human targets, and measured their similarity using a [Formula: see text] score matrix. The similarity network indicates that drugs from the same disease area tend to cluster together in ways that are not captured by structural similarity, with several potential new drug pairings being identified. This work thus provides a novel, validated alternative to existing drug target prediction algorithms. The web service is freely available at: http://chem2bio2rdf.org/slap.",2012-07-05 +21177655,The 2011 Nucleic Acids Research Database Issue and the online Molecular Biology Database Collection.,"The current 18th Database Issue of Nucleic Acids Research features descriptions of 96 new and 83 updated online databases covering various areas of molecular biology. It includes two editorials, one that discusses COMBREX, a new exciting project aimed at figuring out the functions of the 'conserved hypothetical' proteins, and one concerning BioDBcore, a proposed description of the 'minimal information about a biological database'. Papers from the members of the International Nucleotide Sequence Database collaboration (INSDC) describe each of the participating databases, DDBJ, ENA and GenBank, principles of data exchange within the collaboration, and the recently established Sequence Read Archive. A testament to the longevity of databases, this issue includes updates on the RNA modification database, Definition of Secondary Structure of Proteins (DSSP) and Homology-derived Secondary Structure of Proteins (HSSP) databases, which have not been featured here in >12 years. There is also a block of papers describing recent progress in protein structure databases, such as Protein DataBank (PDB), PDB in Europe (PDBe), CATH, SUPERFAMILY and others, as well as databases on protein structure modeling, protein-protein interactions and the organization of inter-protein contact sites. Other highlights include updates of the popular gene expression databases, GEO and ArrayExpress, several cancer gene databases and a detailed description of the UK PubMed Central project. The Nucleic Acids Research online Database Collection, available at: http://www.oxfordjournals.org/nar/database/a/, now lists 1330 carefully selected molecular biology databases. The full content of the Database Issue is freely available online at the Nucleic Acids Research web site (http://nar.oxfordjournals.org/).",2011-01-01 +24143964,Expression of microRNA-497 and its prognostic significance in human breast cancer.,"

Objective

Dysregulation of microRNAs (miRNAs) plays critical roles in tumor progression. The aim of this study was to investigate the clinicopathologic and prognostic significance of miR-497 expression in human breast cancer (BC).

Methods

Taqman qRT-PCR assay was performed to detect the expression of microRNA (miR)-497 in 30 pairs of BC tissues and corresponding noncancerous breast tissues. Additionally, the expression of this miRNA was detected in another 128 BC tissues and its correlations with clinicopathologic features of patients were analyzed. Kaplan-Meier analyses were used to assess survival of patients. Univariate and multivariate analyses were performed using the Cox proportional hazards model to analyze the prognostic significance of miR-497 expression.

Results

Our data indicated that the relative level of miR-497 expression in BC tissues was significantly lower than that in corresponding noncancerous breast tissues (P = 0.0046). Of 128 BC patients, 74 (57.8%) were placed in the high-miR-497 group and 54 (42.2%) were placed in the low-miR-497 group. By statistical analyses, low miR-497 expression was observed to be closely correlated with higher differentiation grade, positive HER-2 expression, higher incidence of lymph node metastasis and advanced clinical stage. Moreover, patients with high miR-497 expression had better 5-year disease-free and overall survival compared with the low miR-497 group (P = 0.0124 and 0.0018, respectively). Univariate and multivariate analyses indicated that low miR-497 expression was an independent poor prognostic factor for BC patients.

Conclusions

Our data provided the first evidence that downregulation of miR-497 was correlated with BC progression, and miR-497 might be a potential molecular biomarker for predicting the prognosis of patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2025828761093488.",2013-10-21 +23609541,"CNA web server: rigidity theory-based thermal unfolding simulations of proteins for linking structure, (thermo-)stability, and function.","The Constraint Network Analysis (CNA) web server provides a user-friendly interface to the CNA approach developed in our laboratory for linking results from rigidity analyses to biologically relevant characteristics of a biomolecular structure. The CNA web server provides a refined modeling of thermal unfolding simulations that considers the temperature dependence of hydrophobic tethers and computes a set of global and local indices for quantifying biomacromolecular stability. From the global indices, phase transition points are identified where the structure switches from a rigid to a floppy state; these phase transition points can be related to a protein's (thermo-)stability. Structural weak spots (unfolding nuclei) are automatically identified, too; this knowledge can be exploited in data-driven protein engineering. The local indices are useful in linking flexibility and function and to understand the impact of ligand binding on protein flexibility. The CNA web server robustly handles small-molecule ligands in general. To overcome issues of sensitivity with respect to the input structure, the CNA web server allows performing two ensemble-based variants of thermal unfolding simulations. The web server output is provided as raw data, plots and/or Jmol representations. The CNA web server, accessible at http://cpclab.uni-duesseldorf.de/cna or http://www.cnanalysis.de, is free and open to all users with no login requirement.",2013-04-22 +22445902,Using Tablet for visual exploration of second-generation sequencing data.,"The advent of second-generation sequencing (2GS) has provided a range of significant new challenges for the visualization of sequence assemblies. These include the large volume of data being generated, short-read lengths and different data types and data formats associated with the diversity of new sequencing technologies. This article illustrates how Tablet-a high-performance graphical viewer for visualization of 2GS assemblies and read mappings-plays an important role in the analysis of these data. We present Tablet, and through a selection of use cases, demonstrate its value in quality assurance and scientific discovery, through features such as whole-reference coverage overviews, variant highlighting, paired-end read mark-up, GFF3-based feature tracks and protein translations. We discuss the computing and visualization techniques utilized to provide a rich and responsive graphical environment that enables users to view a range of file formats with ease. Tablet installers can be freely downloaded from http://bioinf.hutton.ac.uk/tablet in 32 or 64-bit versions for Windows, OS X, Linux or Solaris. For further details on the Tablet, contact tablet@hutton.ac.uk.",2012-03-24 +23692254,Characterization of the novel broad-spectrum kinase inhibitor CTx-0294885 as an affinity reagent for mass spectrometry-based kinome profiling.,"Kinase enrichment utilizing broad-spectrum kinase inhibitors enables the identification of large proportions of the expressed kinome by mass spectrometry. However, the existing inhibitors are still inadequate in covering the entire kinome. Here, we identified a novel bisanilino pyrimidine, CTx-0294885, exhibiting inhibitory activity against a broad range of kinases in vitro, and further developed it into a Sepharose-supported kinase capture reagent. Use of a quantitative proteomics approach confirmed the selectivity of CTx-0294885-bound beads for kinase enrichment. Large-scale CTx-0294885-based affinity purification followed by LC-MS/MS led to the identification of 235 protein kinases from MDA-MB-231 cells, including all members of the AKT family that had not been previously detected by other broad-spectrum kinase inhibitors. Addition of CTx-0294885 to a mixture of three kinase inhibitors commonly used for kinase-enrichment increased the number of kinase identifications to 261, representing the largest kinome coverage from a single cell line reported to date. Coupling phosphopeptide enrichment with affinity purification using the four inhibitors enabled the identification of 799 high-confidence phosphosites on 183 kinases, ∼10% of which were localized to the activation loop, and included previously unreported phosphosites on BMP2K, MELK, HIPK2, and PRKDC. Therefore, CTx-0294885 represents a powerful new reagent for analysis of kinome signaling networks that may facilitate development of targeted therapeutic strategies. Proteomics data have been deposited to the ProteomeXchange Consortium ( http://proteomecentral.proteomexchange.org ) via the PRIDE partner repository with the data set identifier PXD000239.",2013-06-25 +25188452,Treatment including anthracyclines versus treatment not including anthracyclines for childhood cancer.,"

Background

One of the most important adverse effects of anthracyclines is cardiotoxicity. A well-informed decision on the use of anthracyclines in the treatment of childhood cancers should be based on evidence regarding both antitumour efficacy and cardiotoxicity. This review is the second update of a previously published Cochrane review.

Objectives

To compare antitumour efficacy (survival and tumour response) and cardiotoxicity of treatment including or not including anthracyclines in children with childhood cancer.

Search methods

We searched the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library 2013, Issue 6), MEDLINE (1966 to July 2013) and EMBASE (1980 to July 2013). In addition, we searched reference lists of relevant articles and conference proceedings, the International Society for Paediatric Oncology (SIOP) (from 2002 to 2012) and American Society of Clinical Oncology (ASCO) (from 2002 to 2013). We have searched for ongoing trials in the ISRCTN register and the National Institute of Health register (both screened August 2013) (http://www.controlled-trials.com).

Selection criteria

Randomised controlled trials (RCTs) comparing treatment of any type of childhood cancer with and without anthracyclines and reporting outcomes concerning antitumour efficacy or cardiotoxicity.

Data collection and analysis

Two review authors independently performed the study selection, risk of bias assessment and data extraction. Analyses were performed according to the guidelines of the Cochrane Handbook for Systematic Reviews of Interventions.

Main results

We identified RCTs for seven types of tumour, acute lymphoblastic leukaemia (ALL) (three trials; 912 children), Wilms' tumour (one trial; 316 children), rhabdomyosarcoma and undifferentiated sarcoma (one trial; 413 children), Ewing's sarcoma (one trial; 94 children), non-Hodgkin lymphoma (one trial; 284 children), hepatoblastoma (one trial; 255 children) and acute myeloid leukaemia (AML) (one trial; 394 children). All studies had methodological limitations. For ALL no evidence of a significant difference in antitumour efficacy was identified in the meta-analyses, but in most individual studies there was a suggestion of better antitumour efficacy in patients treated with anthracyclines. For both Wilms' tumour and Ewing's sarcoma a significant difference in event-free and overall survival in favour of treatment with anthracyclines was identified, although for Wilms' tumour the significant difference in overall survival disappeared with long-term follow-up. For rhabdomyosarcoma and undifferentiated sarcoma, non-Hodgkin lymphoma and hepatoblastoma no difference in antitumour efficacy between the treatment groups was identified. The same was true for AML, with the exception of overall survival in a post hoc analysis in a subgroup of patients with relapsed core binding factor (CBF)-AML in which patients treated with anthracyclines did better. Clinical cardiotoxicity was evaluated in four RCTs; no significant difference between the treatment groups was identified, but in all individual studies there was a suggestion of a lower rate of clinical cardiotoxicity in patients who did not receive anthracyclines. None of the studies evaluated asymptomatic cardiac dysfunction. No RCTs were identified for other childhood cancers.

Authors' conclusions

At the moment no evidence from RCTs is available which underscores the use of anthracyclines in ALL. However, 'no evidence of effect', as identified in this review, is not the same as 'evidence of no effect'. For Wilms' tumour, rhabdomyosarcoma and undifferentiated sarcoma, Ewing's sarcoma, non-Hodgkin lymphoma, hepatoblastoma and AML only one RCT was available for each type and, therefore, no definitive conclusions can be made about the antitumour efficacy of treatment with or without anthracyclines in these tumours. For other childhood cancers no RCTs were identified and therefore no conclusions can be made about the antitumour efficacy of treatment with or without anthracyclines in these tumours.",2014-09-04 +30731661,First Report of Powdery Mildew Caused by Golovinomyces cichoracearum on Zinnia elegans in Turkey.,"Powdery mildews are one of the most common diseases of plants growing in many nurseries, city parks, and home gardens in Turkey. Common zinnia (Zinnia elegans Jacq.) is widely cultivated in Turkey for ornamental purposes. In September 2010, zinnia plants grown in Hatay, Turkey were found to be heavily infected with a powdery mildew. Pathogen mycelia and sporulation were observed as circular to irregular, white patches on both sides of the leaves and on stems and flower petals. As the disease progressed, infected leaves turned yellow and died. Hyphae were straight to wavy and 4 to 7 μm wide. Conidiophores arose from the upper part of the hyphae, measured 120 to 190 × 10 to 13 μm, were simple, and produced two to six immature conidia in chains with a sinuate edge, followed by two to three straight cells. Conidia were hyaline, ellipsoid to barrel-shaped, measured 25 to 42 × 14 to 22 μm (length/width ratio = 1.3 to 2.5), lacked distinct fibrosin bodies, and produced germ tubes on the perihilar position, with reticulate wrinkling of the outer walls. No chasmothecia were observed. The structures described above were typical of the Oidium subgenus Reticuloidium, anamorph of the genus Golovinomyces, and the fungus measurements were compatible with those of G. cichoracearum (DC.) V.P. Heluta described previously (1,3). To confirm the tentative identification based on morphological characteristics, molecular analysis of internal transcribed spacer (ITS) rDNA sequences from a representative material (MKU-ZK311077, duplicate KUS-F25655) was conducted. The complete ITS regions of rDNA were amplified using primers ITS5 and P3 as described by S. Takamatsu (4) and sequenced. The resulting sequence of 508 bp from MKU-ZK311077 was deposited in GenBank (Accession No. JN051414). A GenBank BLAST search using the current data revealed an exact match for several sequences of G. cichoracearum, including Australian and Korean powdery mildews on zinnia plants, with a 100% sequence similarity. Pathogenicity was confirmed through inoculation by gently pressing diseased leaves onto leaves of three healthy, potted zinnia plants. Three noninoculated plants served as controls. Plants were maintained in a greenhouse at 25°C. Inoculated plants developed signs and symptoms after 10 days, whereas the control plants remained healthy. The fungus present on the inoculated plants was morphologically identical to that originally observed on diseased plants. The powdery mildew infections of Z. elegans associated with G. cichoracearum are nearly circumglobal, including Europe, North America, South America, Africa, Oceania, and Western Asian localities like India, Nepal, Jordan, and Israel (1,2). The current work confirmed the occurrence of G. cichoracearum infecting Z. elegans in Turkey using detailed morphological and molecular analysis. References: (1) U. Braun. Beih. Nova Hedw. 89:1, 1987. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , May 24, 2011. (3) M. J. Park et al. Plant Pathol. J. 27:85, 2011. (4) S. Takamatsu et al. Mycol. Res. 113:117, 2009.",2011-10-01 +24985530,Bridging islands of information to establish an integrated knowledge base of drugs and health outcomes of interest.,"The entire drug safety enterprise has a need to search, retrieve, evaluate, and synthesize scientific evidence more efficiently. This discovery and synthesis process would be greatly accelerated through access to a common framework that brings all relevant information sources together within a standardized structure. This presents an opportunity to establish an open-source community effort to develop a global knowledge base, one that brings together and standardizes all available information for all drugs and all health outcomes of interest (HOIs) from all electronic sources pertinent to drug safety. To make this vision a reality, we have established a workgroup within the Observational Health Data Sciences and Informatics (OHDSI, http://ohdsi.org) collaborative. The workgroup's mission is to develop an open-source standardized knowledge base for the effects of medical products and an efficient procedure for maintaining and expanding it. The knowledge base will make it simpler for practitioners to access, retrieve, and synthesize evidence so that they can reach a rigorous and accurate assessment of causal relationships between a given drug and HOI. Development of the knowledge base will proceed with the measureable goal of supporting an efficient and thorough evidence-based assessment of the effects of 1,000 active ingredients across 100 HOIs. This non-trivial task will result in a high-quality and generally applicable drug safety knowledge base. It will also yield a reference standard of drug-HOI pairs that will enable more advanced methodological research that empirically evaluates the performance of drug safety analysis methods.",2014-08-01 +25486693,[Improving vaccination social marketing by monitoring the web].,"Immunisation is one of the most important and cost- effective interventions in Public Health because of their significant positive impact on population health.However, since Jenner's discovery there always been a lively debate between supporters and opponents of vaccination; Today the antivaccination movement spreads its message mostly on the web, disseminating inaccurate data through blogs and forums, increasing vaccine rejection.In this context, the Società Italiana di Igiene (SItI) created a web project in order to fight the misinformation on the web regarding vaccinations, through a series of information tools, including scientific articles, educational information, video and multimedia presentations The web portal (http://www.vaccinarsi.org) was published in May 2013 and now is already available over one hundred web pages related to vaccinations Recently a Forum, a periodic newsletter and a Twitter page have been created. There has been an average of 10,000 hits per month. Currently our users are mostly healthcare professionals. The visibility of the site is very good and it currently ranks first in the Google's search engine, taping the word ""vaccinarsi"" The results of the first four months of activity are extremely encouraging and show the importance of this project; furthermore the application for quality certification by independent international Organizations has been submitted.",2014-05-01 +22289480,BIGpre: a quality assessment package for next-generation sequencing data.,"The emergence of next-generation sequencing (NGS) technologies has significantly improved sequencing throughput and reduced costs. However, the short read length, duplicate reads and massive volume of data make the data processing much more difficult and complicated than the first-generation sequencing technology. Although there are some software packages developed to assess the data quality, those packages either are not easily available to users or require bioinformatics skills and computer resources. Moreover, almost all the quality assessment software currently available didn't taken into account the sequencing errors when dealing with the duplicate assessment in NGS data. Here, we present a new user-friendly quality assessment software package called BIGpre, which works for both Illumina and 454 platforms. BIGpre contains all the functions of other quality assessment software, such as the correlation between forward and reverse reads, read GC-content distribution, and base Ns quality. More importantly, BIGpre incorporates associated programs to detect and remove duplicate reads after taking sequencing errors into account and trimming low quality reads from raw data as well. BIGpre is primarily written in Perl and integrates graphical capability from the statistics package R. This package produces both tabular and graphical summaries of data quality for sequencing datasets from Illumina and 454 platforms. Processing hundreds of millions reads within minutes, this package provides immediate diagnostic information for user to manipulate sequencing data for downstream analyses. BIGpre is freely available at http://bigpre.sourceforge.net.",2011-12-01 +24694260,HGCS: an online tool for prioritizing disease-causing gene variants by biological distance.,"

Background

Identifying the genotypes underlying human disease phenotypes is a fundamental step in human genetics and medicine. High-throughput genomic technologies provide thousands of genetic variants per individual. The causal genes of a specific phenotype are usually expected to be functionally close to each other. According to this hypothesis, candidate genes are picked from high-throughput data on the basis of their biological proximity to core genes - genes already known to be responsible for the phenotype. There is currently no effective gene-centric online interface for this purpose.

Results

We describe here the human gene connectome server (HGCS), a powerful, easy-to-use interactive online tool enabling researchers to prioritize any list of genes according to their biological proximity to core genes associated with the phenotype of interest. We also make available an updated and extended version for all human gene-specific connectomes. The HGCS is freely available to noncommercial users from: http://hgc.rockefeller.edu.

Conclusions

The HGCS should help investigators from diverse fields to identify new disease-causing candidate genes more effectively, via a user-friendly online interface.",2014-04-03 +23504705,Accurate prediction of hot spot residues through physicochemical characteristics of amino acid sequences.,"Hot spot residues of proteins are fundamental interface residues that help proteins perform their functions. Detecting hot spots by experimental methods is costly and time-consuming. Sequential and structural information has been widely used in the computational prediction of hot spots. However, structural information is not always available. In this article, we investigated the problem of identifying hot spots using only physicochemical characteristics extracted from amino acid sequences. We first extracted 132 relatively independent physicochemical features from a set of the 544 properties in AAindex1, an amino acid index database. Each feature was utilized to train a classification model with a novel encoding schema for hot spot prediction by the IBk algorithm, an extension of the K-nearest neighbor algorithm. The combinations of the individual classifiers were explored and the classifiers that appeared frequently in the top performing combinations were selected. The hot spot predictor was built based on an ensemble of these classifiers and to work in a voting manner. Experimental results demonstrated that our method effectively exploited the feature space and allowed flexible weights of features for different queries. On the commonly used hot spot benchmark sets, our method significantly outperformed other machine learning algorithms and state-of-the-art hot spot predictors. The program is available at http://sfb.kaust.edu.sa/pages/software.aspx.",2013-08-01 +25474882,Large-scale development of PIP and SSR markers and their complementary applied in Nicotiana.,"PIP (Potential Intron Polymorphism) and SSR (Simple Sequence Repeats) were used in many species, but large-scale development and combined use of these two markers have not been reported in tobacco. In this study, a total of 12,388 PIP and 76,848 SSR markers were designed and uploaded to a web-accessible database (http://yancao.sdau.edu.cn/tgb/). E-PCR analysis showed that PIP and SSR rarely overlapped and were strongly complementary in the tobacco genome. The density was 3.07 PIP and 1.72 SSR markers per 10 kb of the known sequences. A total of 153 and 166 alleles were detectedby 22 PIP and 22 SSR markers in 64 Nicotiana accessions. SSR produced higher PIC (polymorphism information content) values and identified more alleles than PIP, whereas PIP could identify larger numbers of rare alleles. Mantel testing demonstrated a high correlation coefficient (r = 0.949, P < 0.001) between PIP and SSR. The UPGMA dendrogram created from the combined PIP and SSR markers was clearer and more reliable than the individual PIP or SSR dendrograms. It suggested that PIP and SSR can make up the deficiency of molecular markers not only in tobacco but other plant.",2013-08-01 +23948415,[The expression of CARD18 in apoptin-transfected gastric cancer cells and gastric adenocarcinoma tissues].,"

Objective

To study the expression of caspase recruitment domain family member 18 (CARD18) in apoptin-induced apoptosis of gastric cancer cells and its role in the development of gastric cancer.

Methods

After gastric cancer cells were transfected with apoptin, differentially expressed proteins between the apoptin-expression SGC7901 group and the control group were seperated using two-dimensional gel electrophoresis and identified using matrix-assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOF-MS) and Mascot database (http: //www.matrixscience.com/). The expression level and location of CARD18 in 56 cases of gastric adenocarcinoma and adjacent cancer-free tissues were respectively detected by RT-PCR, Western blotting and immunohistochemistry to analyze the relationship between CARD18 and the clinical pathological characteristics.

Results

RT-PCR and Western blotting showed that the level of CARD18 mRNA and protein was down-regulated the most significantly after apoptin treatment. The expression of CARD18 in gastric adenocarcinoma tissues was significantly higher than that in adjacent cancer-free tissues (P<0.05), and it was proved that the CARD18 expression was related to the gastric cancer lymph node metastasis and TNM stage (P<0.05). CONCLUSION CARD18 may be both a promising marker for prognosis and a target protein for treatment of gastric cancer.",2013-08-01 +24465054,Web-based tools for modelling and analysis of multivariate data: California ozone pollution activity.,"This article presents a hands-on web-based activity motivated by the relation between human health and ozone pollution in California. This case study is based on multivariate data collected monthly at 20 locations in California between 1980 and 2006. Several strategies and tools for data interrogation and exploratory data analysis, model fitting and statistical inference on these data are presented. All components of this case study (data, tools, activity) are freely available online at: http://wiki.stat.ucla.edu/socr/index.php/SOCR_MotionCharts_CAOzoneData. Several types of exploratory (motion charts, box-and-whisker plots, spider charts) and quantitative (inference, regression, analysis of variance (ANOVA)) data analyses tools are demonstrated. Two specific human health related questions (temporal and geographic effects of ozone pollution) are discussed as motivational challenges.",2011-09-01 +22441573,Cancer gene prioritization by integrative analysis of mRNA expression and DNA copy number data: a comparative review.,"A variety of genome-wide profiling techniques are available to investigate complementary aspects of genome structure and function. Integrative analysis of heterogeneous data sources can reveal higher level interactions that cannot be detected based on individual observations. A standard integration task in cancer studies is to identify altered genomic regions that induce changes in the expression of the associated genes based on joint analysis of genome-wide gene expression and copy number profiling measurements. In this review, we highlight common approaches to genomic data integration and provide a transparent benchmarking procedure to quantitatively compare method performances in cancer gene prioritization. Algorithms, data sets and benchmarking results are available at http://intcomp.r-forge.r-project.org.",2012-03-22 +25285153,Graph-distance distribution of the Boltzmann ensemble of RNA secondary structures.,"

Background

Large RNA molecules are often composed of multiple functional domains whose spatial arrangement strongly influences their function. Pre-mRNA splicing, for instance, relies on the spatial proximity of the splice junctions that can be separated by very long introns. Similar effects appear in the processing of RNA virus genomes. Albeit a crude measure, the distribution of spatial distances in thermodynamic equilibrium harbors useful information on the shape of the molecule that in turn can give insights into the interplay of its functional domains.

Result

Spatial distance can be approximated by the graph-distance in RNA secondary structure. We show here that the equilibrium distribution of graph-distances between a fixed pair of nucleotides can be computed in polynomial time by means of dynamic programming. While a naïve implementation would yield recursions with a very high time complexity of O(n (6) D (5)) for sequence length n and D distinct distance values, it is possible to reduce this to O(n (4)) for practical applications in which predominantly small distances are of of interest. Further reductions, however, seem to be difficult. Therefore, we introduced sampling approaches that are much easier to implement. They are also theoretically favorable for several real-life applications, in particular since these primarily concern long-range interactions in very large RNA molecules.

Conclusions

The graph-distance distribution can be computed using a dynamic programming approach. Although a crude approximation of reality, our initial results indicate that the graph-distance can be related to the smFRET data. The additional file and the software of our paper are available from http://www.rna.uni-jena.de/RNAgraphdist.html.",2014-09-11 +24177720,"MetaboNetworks, an interactive Matlab-based toolbox for creating, customizing and exploring sub-networks from KEGG.","

Summary

MetaboNetworks is a tool to create custom sub-networks in Matlab using main reaction pairs as defined by the Kyoto Encyclopaedia of Genes and Genomes and can be used to explore transgenomic interactions, for example mammalian and bacterial associations. It calculates the shortest path between a set of metabolites (e.g. biomarkers from a metabonomic study) and plots the connectivity between metabolites as links in a network graph. The resulting graph can be edited and explored interactively. Furthermore, nodes and edges in the graph are linked to the Kyoto Encyclopaedia of Genes and Genomes compound and reaction pair web pages.

Availability and implementation

MetaboNetworks is available from http://www.mathworks.com/matlabcentral/fileexchange/42684.

Contact

jmp111@ic.ac.uk or j.nicholson@imperial.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-10-30 +24914969,Ten years of probabilistic estimates of biocrystal solvent content: new insights via nonparametric kernel density estimate.,"The probabilistic estimate of the solvent content (Matthews probability) was first introduced in 2003. Given that the Matthews probability is based on prior information, revisiting the empirical foundation of this widely used solvent-content estimate is appropriate. The parameter set for the original Matthews probability distribution function employed in MATTPROB has been updated after ten years of rapid PDB growth. A new nonparametric kernel density estimator has been implemented to calculate the Matthews probabilities directly from empirical solvent-content data, thus avoiding the need to revise the multiple parameters of the original binned empirical fit function. The influence and dependency of other possible parameters determining the solvent content of protein crystals have been examined. Detailed analysis showed that resolution is the primary and dominating model parameter correlated with solvent content. Modifications of protein specific density for low molecular weight have no practical effect, and there is no correlation with oligomerization state. A weak, and in practice irrelevant, dependency on symmetry and molecular weight is present, but cannot be satisfactorily explained by simple linear or categorical models. The Bayesian argument that the observed resolution represents only a lower limit for the true diffraction potential of the crystal is maintained. The new kernel density estimator is implemented as the primary option in the MATTPROB web application at http://www.ruppweb.org/mattprob/.",2014-05-24 +23793751,Differential gene expression analysis using coexpression and RNA-Seq data.,"

Motivation

RNA-Seq is increasingly being used for differential gene expression analysis, which was dominated by the microarray technology in the past decade. However, inferring differential gene expression based on the observed difference of RNA-Seq read counts has unique challenges that were not present in microarray-based analysis. The differential expression estimation may be biased against low read count values such that the differential expression of genes with high read counts is more easily detected. The estimation bias may further propagate in downstream analyses at the systems biology level if it is not corrected.

Results

To obtain a better inference of differential gene expression, we propose a new efficient algorithm based on a Markov random field (MRF) model, called MRFSeq, that uses additional gene coexpression data to enhance the prediction power. Our main technical contribution is the careful selection of the clique potential functions in the MRF so its maximum a posteriori estimation can be reduced to the well-known maximum flow problem and thus solved in polynomial time. Our extensive experiments on simulated and real RNA-Seq datasets demonstrate that MRFSeq is more accurate and less biased against genes with low read counts than the existing methods based on RNA-Seq data alone. For example, on the well-studied MAQC dataset, MRFSeq improved the sensitivity from 11.6 to 38.8% for genes with low read counts.

Availability

MRFSeq is implemented in C and available at http://www.cs.ucr.edu/~yyang027/mrfseq.htm",2013-06-21 +22815738,Gene Expression Commons: an open platform for absolute gene expression profiling.,"Gene expression profiling using microarrays has been limited to comparisons of gene expression between small numbers of samples within individual experiments. However, the unknown and variable sensitivities of each probeset have rendered the absolute expression of any given gene nearly impossible to estimate. We have overcome this limitation by using a very large number (>10,000) of varied microarray data as a common reference, so that statistical attributes of each probeset, such as the dynamic range and threshold between low and high expression, can be reliably discovered through meta-analysis. This strategy is implemented in a web-based platform named ""Gene Expression Commons"" (https://gexc.stanford.edu/) which contains data of 39 distinct highly purified mouse hematopoietic stem/progenitor/differentiated cell populations covering almost the entire hematopoietic system. Since the Gene Expression Commons is designed as an open platform, investigators can explore the expression level of any gene, search by expression patterns of interest, submit their own microarray data, and design their own working models representing biological relationship among samples.",2012-07-18 +23906817,The DDI corpus: an annotated corpus with pharmacological substances and drug-drug interactions.,"The management of drug-drug interactions (DDIs) is a critical issue resulting from the overwhelming amount of information available on them. Natural Language Processing (NLP) techniques can provide an interesting way to reduce the time spent by healthcare professionals on reviewing biomedical literature. However, NLP techniques rely mostly on the availability of the annotated corpora. While there are several annotated corpora with biological entities and their relationships, there is a lack of corpora annotated with pharmacological substances and DDIs. Moreover, other works in this field have focused in pharmacokinetic (PK) DDIs only, but not in pharmacodynamic (PD) DDIs. To address this problem, we have created a manually annotated corpus consisting of 792 texts selected from the DrugBank database and other 233 Medline abstracts. This fined-grained corpus has been annotated with a total of 18,502 pharmacological substances and 5028 DDIs, including both PK as well as PD interactions. The quality and consistency of the annotation process has been ensured through the creation of annotation guidelines and has been evaluated by the measurement of the inter-annotator agreement between two annotators. The agreement was almost perfect (Kappa up to 0.96 and generally over 0.80), except for the DDIs in the MedLine database (0.55-0.72). The DDI corpus has been used in the SemEval 2013 DDIExtraction challenge as a gold standard for the evaluation of information extraction techniques applied to the recognition of pharmacological substances and the detection of DDIs from biomedical texts. DDIExtraction 2013 has attracted wide attention with a total of 14 teams from 7 different countries. For the task of recognition and classification of pharmacological names, the best system achieved an F1 of 71.5%, while, for the detection and classification of DDIs, the best result was F1 of 65.1%. These results show that the corpus has enough quality to be used for training and testing NLP techniques applied to the field of Pharmacovigilance. The DDI corpus and the annotation guidelines are free for use for academic research and are available at http://labda.inf.uc3m.es/ddicorpus.",2013-07-29 +25458812,LocFuse: human protein-protein interaction prediction via classifier fusion using protein localization information.,"Protein-protein interaction (PPI) detection is one of the central goals of functional genomics and systems biology. Knowledge about the nature of PPIs can help fill the widening gap between sequence information and functional annotations. Although experimental methods have produced valuable PPI data, they also suffer from significant limitations. Computational PPI prediction methods have attracted tremendous attentions. Despite considerable efforts, PPI prediction is still in its infancy in complex multicellular organisms such as humans. Here, we propose a novel ensemble learning method, LocFuse, which is useful in human PPI prediction. This method uses eight different genomic and proteomic features along with four types of different classifiers. The prediction performance of this classifier selection method was found to be considerably better than methods employed hitherto. This confirms the complex nature of the PPI prediction problem and also the necessity of using biological information for classifier fusion. The LocFuse is available at: http://lbb.ut.ac.ir/Download/LBBsoft/LocFuse.

Biological significance

The results revealed that if we divide proteome space according to the cellular localization of proteins, then the utility of some classifiers in PPI prediction can be improved. Therefore, to predict the interaction for any given protein pair, we can select the most accurate classifier with regard to the cellular localization information. Based on the results, we can say that the importance of different features for PPI prediction varies between differently localized proteins; however in general, our novel features, which were extracted from position-specific scoring matrices (PSSMs), are the most important ones and the Random Forest (RF) classifier performs best in most cases. LocFuse was developed with a user-friendly graphic interface and it is freely available for Linux, Mac OSX and MS Windows operating systems.",2014-10-16 +22923299,GFOLD: a generalized fold change for ranking differentially expressed genes from RNA-seq data.,"

Motivation

RNA-seq has been widely used in transcriptome analysis to effectively measure gene expression levels. Although sequencing costs are rapidly decreasing, almost 70% of all the human RNA-seq samples in the gene expression omnibus do not have biological replicates and more unreplicated RNA-seq data were published than replicated RNA-seq data in 2011. Despite the large amount of single replicate studies, there is currently no satisfactory method for detecting differentially expressed genes when only a single biological replicate is available.

Results

We present the GFOLD (generalized fold change) algorithm to produce biologically meaningful rankings of differentially expressed genes from RNA-seq data. GFOLD assigns reliable statistics for expression changes based on the posterior distribution of log fold change. In this way, GFOLD overcomes the shortcomings of P-value and fold change calculated by existing RNA-seq analysis methods and gives more stable and biological meaningful gene rankings when only a single biological replicate is available.

Availability

The open source C/C++ program is available at http://www.tongji.edu.cn/∼zhanglab/GFOLD/index.html",2012-08-24 +23044543,ncPRO-seq: a tool for annotation and profiling of ncRNAs in sRNA-seq data.,"

Summary

Non-coding RNA (ncRNA) PROfiling in small RNA (sRNA)-seq (ncPRO-seq) is a stand-alone, comprehensive and flexible ncRNA analysis pipeline. It can interrogate and perform detailed profiling analysis on sRNAs derived from annotated non-coding regions in miRBase, Rfam and RepeatMasker, as well as specific regions defined by users. The ncPRO-seq pipeline performs both gene-based and family-based analyses of sRNAs. It also has a module to identify regions significantly enriched with short reads, which cannot be classified under known ncRNA families, thus enabling the discovery of previously unknown ncRNA- or small interfering RNA (siRNA)-producing regions. The ncPRO-seq pipeline supports input read sequences in fastq, fasta and color space format, as well as alignment results in BAM format, meaning that sRNA raw data from the three current major platforms (Roche-454, Illumina-Solexa and Life technologies-SOLiD) can be analyzed with this pipeline. The ncPRO-seq pipeline can be used to analyze read and alignment data, based on any sequenced genome, including mammals and plants.

Availability

Source code, annotation files, manual and online version are available at http://ncpro.curie.fr/.

Contact

bioinfo.ncproseq@curie.fr or cciaudo@ethz.ch

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-07 +22641853,CombFunc: predicting protein function using heterogeneous data sources.,"Only a small fraction of known proteins have been functionally characterized, making protein function prediction essential to propose annotations for uncharacterized proteins. In recent years many function prediction methods have been developed using various sources of biological data from protein sequence and structure to gene expression data. Here we present the CombFunc web server, which makes Gene Ontology (GO)-based protein function predictions. CombFunc incorporates ConFunc, our existing function prediction method, with other approaches for function prediction that use protein sequence, gene expression and protein-protein interaction data. In benchmarking on a set of 1686 proteins CombFunc obtains precision and recall of 0.71 and 0.64 respectively for gene ontology molecular function terms. For biological process GO terms precision of 0.74 and recall of 0.41 is obtained. CombFunc is available at http://www.sbg.bio.ic.ac.uk/combfunc.",2012-05-27 +23894139,mpMoRFsDB: a database of molecular recognition features in membrane proteins.,"

Summary

Molecular recognition features (MoRFs) are small, intrinsically disordered regions in proteins that undergo a disorder-to-order transition on binding to their partners. MoRFs are involved in protein-protein interactions and may function as the initial step in molecular recognition. The aim of this work was to collect, organize and store all membrane proteins that contain MoRFs. Membrane proteins constitute ∼30% of fully sequenced proteomes and are responsible for a wide variety of cellular functions. MoRFs were classified according to their secondary structure, after interacting with their partners. We identified MoRFs in transmembrane and peripheral membrane proteins. The position of transmembrane protein MoRFs was determined in relation to a protein's topology. All information was stored in a publicly available mySQL database with a user-friendly web interface. A Jmol applet is integrated for visualization of the structures. mpMoRFsDB provides valuable information related to disorder-based protein-protein interactions in membrane proteins.

Availability

http://bioinformatics.biol.uoa.gr/mpMoRFsDB",2013-07-26 +22318370,msCompare: a framework for quantitative analysis of label-free LC-MS data for comparative candidate biomarker studies.,"Data processing forms an integral part of biomarker discovery and contributes significantly to the ultimate result. To compare and evaluate various publicly available open source label-free data processing workflows, we developed msCompare, a modular framework that allows the arbitrary combination of different feature detection/quantification and alignment/matching algorithms in conjunction with a novel scoring method to evaluate their overall performance. We used msCompare to assess the performance of workflows built from modules of publicly available data processing packages such as SuperHirn, OpenMS, and MZmine and our in-house developed modules on peptide-spiked urine and trypsin-digested cerebrospinal fluid (CSF) samples. We found that the quality of results varied greatly among workflows, and interestingly, heterogeneous combinations of algorithms often performed better than the homogenous workflows. Our scoring method showed that the union of feature matrices of different workflows outperformed the original homogenous workflows in some cases. msCompare is open source software (https://trac.nbic.nl/mscompare), and we provide a web-based data processing service for our framework by integration into the Galaxy server of the Netherlands Bioinformatics Center (http://galaxy.nbic.nl/galaxy) to allow scientists to determine which combination of modules provides the most accurate processing for their particular LC-MS data sets.",2012-02-07 +23886610,BmTEdb: a collective database of transposable elements in the silkworm genome.,"The silkworm, Bombyx mori, is one of the major insect model organisms, and its draft and fine genome sequences became available in 2004 and 2008, respectively. Transposable elements (TEs) constitute ~40% of the silkworm genome. To better understand the roles of TEs in organization, structure and evolution of the silkworm genome, we used a combination of de novo, structure-based and homology-based approaches for identification of the silkworm TEs and identified 1308 silkworm TE families. These TE families and their classification information were organized into a comprehensive and easy-to-use web-based database, BmTEdb. Users are entitled to browse, search and download the sequences in the database. Sequence analyses such as BLAST, HMMER and EMBOSS GetORF were also provided in BmTEdb. This database will facilitate studies for the silkworm genomics, the TE functions in the silkworm and the comparative analysis of the insect TEs. Database URL: http://gene.cqu.edu.cn/BmTEdb/.",2013-07-25 +24170398,T-Coffee: Tree-based consistency objective function for alignment evaluation.,"T-Coffee, for Tree-based consistency objective function for alignment evaluation, is a versatile multiple sequence alignment (MSA) method suitable for aligning virtually any type of biological sequences. T-Coffee provides more than a simple sequence aligner; rather it is a framework in which alternative alignment methods and/or extra information (i.e., structural, evolutionary, or experimental information) can be combined to reach more accurate and more meaningful MSAs. T-Coffee can be used either by running input data via the Web server ( http://tcoffee.crg.cat/apps/tcoffee/index.html ) or by downloading the T-Coffee package. Here, we present how the package can be used in its command line mode to carry out the most common tasks and multiply align proteins, DNA, and RNA sequences. This chapter particularly emphasizes on the description of T-Coffee special flavors also called ""modes,"" designed to address particular biological problems.",2014-01-01 +24386124,SiBIC: a web server for generating gene set networks based on biclusters obtained by maximal frequent itemset mining.,"Detecting biclusters from expression data is useful, since biclusters are coexpressed genes under only part of all given experimental conditions. We present a software called SiBIC, which from a given expression dataset, first exhaustively enumerates biclusters, which are then merged into rather independent biclusters, which finally are used to generate gene set networks, in which a gene set assigned to one node has coexpressed genes. We evaluated each step of this procedure: 1) significance of the generated biclusters biologically and statistically, 2) biological quality of merged biclusters, and 3) biological significance of gene set networks. We emphasize that gene set networks, in which nodes are not genes but gene sets, can be more compact than usual gene networks, meaning that gene set networks are more comprehensible. SiBIC is available at http://utrecht.kuicr.kyoto-u.ac.jp:8080/miami/faces/index.jsp.",2013-12-30 +24117467,MetalS2: a tool for the structural alignment of minimal functional sites in metal-binding proteins and nucleic acids.,"We developed a new software tool, MetalS(2), for the structural alignment of Minimal Functional Sites (MFSs) in metal-binding biological macromolecules. MFSs are 3D templates that describe the local environment around the metal(s) independently of the larger context of the macromolecular structure. Such local environment has a determinant role in tuning the chemical reactivity of the metal, ultimately contributing to the functional properties of the whole system. On our example data sets, MetalS(2) unveiled structural similarities that other programs for protein structure comparison do not consistently point out and overall identified a larger number of structurally similar MFSs. MetalS(2) supports the comparison of MFSs harboring different metals and/or with different nuclearity and is available both as a stand-alone program and a Web tool ( http://metalweb.cerm.unifi.it/tools/metals2/).",2013-10-28 +23809576,iBIG: an integrative network tool for supporting human disease mechanism studies.,"Understanding the mechanism of complex human diseases is a major scientific challenge. Towards this end, we developed a web-based network tool named iBIG (stands for integrative BIoloGy), which incorporates a variety of information on gene interaction and regulation. The generated network can be annotated with various types of information and visualized directly online. In addition to the gene networks based on physical and pathway interactions, networks at a functional level can also be constructed. Furthermore, a supplementary R package is provided to process microarray data and generate a list of important genes to be used as input for iBIG. To demonstrate its usefulness, we collected 54 microarrays on common human diseases including cancer, neurological disorders, infectious diseases and other common diseases. We processed the microarray data with our R package and constructed a network of functional modules perturbed in common human diseases. Networks at the functional level in combination with gene networks may provide new insight into the mechanism of human diseases. iBIG is freely available at http://lei.big.ac.cn/ibig.",2013-01-04 +24112435,cudaMap: a GPU accelerated program for gene expression connectivity mapping.,"

Background

Modern cancer research often involves large datasets and the use of sophisticated statistical techniques. Together these add a heavy computational load to the analysis, which is often coupled with issues surrounding data accessibility. Connectivity mapping is an advanced bioinformatic and computational technique dedicated to therapeutics discovery and drug re-purposing around differential gene expression analysis. On a normal desktop PC, it is common for the connectivity mapping task with a single gene signature to take > 2h to complete using sscMap, a popular Java application that runs on standard CPUs (Central Processing Units). Here, we describe new software, cudaMap, which has been implemented using CUDA C/C++ to harness the computational power of NVIDIA GPUs (Graphics Processing Units) to greatly reduce processing times for connectivity mapping.

Results

cudaMap can identify candidate therapeutics from the same signature in just over thirty seconds when using an NVIDIA Tesla C2050 GPU. Results from the analysis of multiple gene signatures, which would previously have taken several days, can now be obtained in as little as 10 minutes, greatly facilitating candidate therapeutics discovery with high throughput. We are able to demonstrate dramatic speed differentials between GPU assisted performance and CPU executions as the computational load increases for high accuracy evaluation of statistical significance.

Conclusion

Emerging 'omics' technologies are constantly increasing the volume of data and information to be processed in all areas of biomedical research. Embracing the multicore functionality of GPUs represents a major avenue of local accelerated computing. cudaMap will make a strong contribution in the discovery of candidate therapeutics by enabling speedy execution of heavy duty connectivity mapping tasks, which are increasingly required in modern cancer research. cudaMap is open source and can be freely downloaded from http://purl.oclc.org/NET/cudaMap.",2013-10-11 +24801104,QSAR models for anti-malarial activity of 4-aminoquinolines.,"In the present study, predictive quantitative structure - activity relationship (QSAR) models for anti-malarial activity of 4-aminoquinolines have been developed. CORAL, which is freely available on internet (http://www.insilico.eu/coral), has been used as a tool of QSAR analysis to establish statistically robust QSAR model of anti-malarial activity of 4-aminoquinolines. Six random splits into the visible sub-system of the training and invisible subsystem of validation were examined. Statistical qualities for these splits vary, but in all these cases, statistical quality of prediction for anti-malarial activity was quite good. The optimal SMILES-based descriptor was used to derive the single descriptor based QSAR model for a data set of 112 aminoquinolones. All the splits had r(2)> 0.85 and r(2)> 0.78 for subtraining and validation sets, respectively. The three parametric multilinear regression (MLR) QSAR model has Q(2) = 0.83, R(2) = 0.84 and F = 190.39. The anti-malarial activity has strong correlation with presence/absence of nitrogen and oxygen at a topological distance of six.",2014-03-01 +22110245,PGDSpider: an automated data conversion tool for connecting population genetics and genomics programs.,"

Unlabelled

The analysis of genetic data often requires a combination of several approaches using different and sometimes incompatible programs. In order to facilitate data exchange and file conversions between population genetics programs, we introduce PGDSpider, a Java program that can read 27 different file formats and export data into 29, partially overlapping, other file formats. The PGDSpider package includes both an intuitive graphical user interface and a command-line version allowing its integration in complex data analysis pipelines.

Availability

PGDSpider is freely available under the BSD 3-Clause license on http://cmpg.unibe.ch/software/PGDSpider/.",2011-11-21 +24754707,Teaching moral reasoning through gesture.,"Stem-cell research. Euthanasia. Personhood. Marriage equality. School shootings. Gun control. Death penalty. Ethical dilemmas regularly spark fierce debate about the underlying moral fabric of societies. How do we prepare today's children to be fully informed and thoughtful citizens, capable of moral and ethical decisions? Current approaches to moral education are controversial, requiring adults to serve as either direct ('top-down') or indirect ('bottom-up') conduits of information about morality. A common thread weaving throughout these two educational initiatives is the ability to take multiple perspectives - increases in perspective taking ability have been found to precede advances in moral reasoning. We propose gesture as a behavior uniquely situated to augment perspective taking ability. Requiring gesture during spatial tasks has been shown to catalyze the production of more sophisticated problem-solving strategies, allowing children to profit from instruction. Our data demonstrate that requiring gesture during moral reasoning tasks has similar effects, resulting in increased perspective taking ability subsequent to instruction. A video abstract of this article can be viewed at http://www.youtube.com/watch?v/gAcRIClU_GY.",2014-04-23 +24916385,MetAssign: probabilistic annotation of metabolites from LC-MS data using a Bayesian clustering approach.,"

Motivation

The use of liquid chromatography coupled to mass spectrometry has enabled the high-throughput profiling of the metabolite composition of biological samples. However, the large amount of data obtained can be difficult to analyse and often requires computational processing to understand which metabolites are present in a sample. This article looks at the dual problem of annotating peaks in a sample with a metabolite, together with putatively annotating whether a metabolite is present in the sample. The starting point of the approach is a Bayesian clustering of peaks into groups, each corresponding to putative adducts and isotopes of a single metabolite.

Results

The Bayesian modelling introduced here combines information from the mass-to-charge ratio, retention time and intensity of each peak, together with a model of the inter-peak dependency structure, to increase the accuracy of peak annotation. The results inherently contain a quantitative estimate of confidence in the peak annotations and allow an accurate trade-off between precision and recall. Extensive validation experiments using authentic chemical standards show that this system is able to produce more accurate putative identifications than other state-of-the-art systems, while at the same time giving a probabilistic measure of confidence in the annotations.

Availability and implementation

The software has been implemented as part of the mzMatch metabolomics analysis pipeline, which is available for download at http://mzmatch.sourceforge.net/.",2014-06-09 +24755304,The cell behavior ontology: describing the intrinsic biological behaviors of real and model cells seen as active agents.,"

Motivation

Currently, there are no ontologies capable of describing both the spatial organization of groups of cells and the behaviors of those cells. The lack of a formalized method for describing the spatiality and intrinsic biological behaviors of cells makes it difficult to adequately describe cells, tissues and organs as spatial objects in living tissues, in vitro assays and in computational models of tissues.

Results

We have developed an OWL-2 ontology to describe the intrinsic physical and biological characteristics of cells and tissues. The Cell Behavior Ontology (CBO) provides a basis for describing the spatial and observable behaviors of cells and extracellular components suitable for describing in vivo, in vitro and in silico multicell systems. Using the CBO, a modeler can create a meta-model of a simulation of a biological model and link that meta-model to experiment or simulation results. Annotation of a multicell model and its computational representation, using the CBO, makes the statement of the underlying biology explicit. The formal representation of such biological abstraction facilitates the validation, falsification, discovery, sharing and reuse of both models and experimental data.

Availability and implementation

The CBO, developed using Protégé 4, is available at http://cbo.biocomplexity.indiana.edu/cbo/ and at BioPortal (http://bioportal.bioontology.org/ontologies/CBO).",2014-04-22 +25183489,ballaxy: web services for structural bioinformatics.,"

Motivation

Web-based workflow systems have gained considerable momentum in sequence-oriented bioinformatics. In structural bioinformatics, however, such systems are still relatively rare; while commercial stand-alone workflow applications are common in the pharmaceutical industry, academic researchers often still rely on command-line scripting to glue individual tools together.

Results

In this work, we address the problem of building a web-based system for workflows in structural bioinformatics. For the underlying molecular modelling engine, we opted for the BALL framework because of its extensive and well-tested functionality in the field of structural bioinformatics. The large number of molecular data structures and algorithms implemented in BALL allows for elegant and sophisticated development of new approaches in the field. We hence connected the versatile BALL library and its visualization and editing front end BALLView with the Galaxy workflow framework. The result, which we call ballaxy, enables the user to simply and intuitively create sophisticated pipelines for applications in structure-based computational biology, integrated into a standard tool for molecular modelling.

Availability and implementation

 ballaxy consists of three parts: some minor modifications to the Galaxy system, a collection of tools and an integration into the BALL framework and the BALLView application for molecular modelling. Modifications to Galaxy will be submitted to the Galaxy project, and the BALL and BALLView integrations will be integrated in the next major BALL release. After acceptance of the modifications into the Galaxy project, we will publish all ballaxy tools via the Galaxy toolshed. In the meantime, all three components are available from http://www.ball-project.org/ballaxy. Also, docker images for ballaxy are available at https://registry.hub.docker.com/u/anhi/ballaxy/dockerfile/. ballaxy is licensed under the terms of the GPL.",2014-09-02 +23419378,PleioGRiP: genetic risk prediction with pleiotropy.,"

Motivation

Although several studies have used Bayesian classifiers for risk prediction using genome-wide single nucleotide polymorphism (SNP) datasets, no software can efficiently perform these analyses on massive genetic datasets and can accommodate multiple traits.

Results

We describe the program PleioGRiP that performs a genome-wide Bayesian model search to identify SNPs associated with a discrete phenotype and uses SNPs ranked by Bayes factor to produce nested Bayesian classifiers. These classifiers can be used for genetic risk prediction, either selecting the classifier with optimal number of features or using an ensemble of classifiers. In addition, PleioGRiP implements an extension to the Bayesian search and classification and can search for pleiotropic relationships in which SNPs are simultaneously associated with two or more distinct phenotypes. These relationships can be used to generate connected Bayesian classifiers to predict the phenotype of interest either using genetic data alone or in combination with the secondary phenotype(s).

Availability

PleioGRiP is implemented in Java, and it is available from http://hdl.handle.net/2144/4367.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-17 +22610855,Inferring direct DNA binding from ChIP-seq.,"Genome-wide binding data from transcription factor ChIP-seq experiments is the best source of information for inferring the relative DNA-binding affinity of these proteins in vivo. However, standard motif enrichment analysis and motif discovery approaches sometimes fail to correctly identify the binding motif for the ChIP-ed factor. To overcome this problem, we propose 'central motif enrichment analysis' (CMEA), which is based on the observation that the positional distribution of binding sites matching the direct-binding motif tends to be unimodal, well centered and maximal in the precise center of the ChIP-seq peak regions. We describe a novel visualization and statistical analysis tool--CentriMo--that identifies the region of maximum central enrichment in a set of ChIP-seq peak regions and displays the positional distributions of predicted sites. Using CentriMo for motif enrichment analysis, we provide evidence that one transcription factor (Nanog) has different binding affinity in vivo than in vitro, that another binds DNA cooperatively (E2f1), and confirm the in vivo affinity of NFIC, rescuing a difficult ChIP-seq data set. In another data set, CentriMo strongly suggests that there is no evidence of direct DNA binding by the ChIP-ed factor (Smad1). CentriMo is now part of the MEME Suite software package available at http://meme.nbcr.net. All data and output files presented here are available at: http://research.imb.uq.edu.au/t.bailey/sd/Bailey2011a.",2012-05-18 +25161241,A new statistical framework to assess structural alignment quality using information compression.,"

Motivation

Progress in protein biology depends on the reliability of results from a handful of computational techniques, structural alignments being one. Recent reviews have highlighted substantial inconsistencies and differences between alignment results generated by the ever-growing stock of structural alignment programs. The lack of consensus on how the quality of structural alignments must be assessed has been identified as the main cause for the observed differences. Current methods assess structural alignment quality by constructing a scoring function that attempts to balance conflicting criteria, mainly alignment coverage and fidelity of structures under superposition. This traditional approach to measuring alignment quality, the subject of considerable literature, has failed to solve the problem. Further development along the same lines is unlikely to rectify the current deficiencies in the field.

Results

This paper proposes a new statistical framework to assess structural alignment quality and significance based on lossless information compression. This is a radical departure from the traditional approach of formulating scoring functions. It links the structural alignment problem to the general class of statistical inductive inference problems, solved using the information-theoretic criterion of minimum message length. Based on this, we developed an efficient and reliable measure of structural alignment quality, I-value. The performance of I-value is demonstrated in comparison with a number of popular scoring functions, on a large collection of competing alignments. Our analysis shows that I-value provides a rigorous and reliable quantification of structural alignment quality, addressing a major gap in the field.

Availability

http://lcb.infotech.monash.edu.au/I-value.

Supplementary information

Online supplementary data are available at http://lcb.infotech.monash.edu.au/I-value/suppl.html.",2014-09-01 +24829448,ToppMiR: ranking microRNAs and their mRNA targets based on biological functions and context.,"Identifying functionally significant microRNAs (miRs) and their correspondingly most important messenger RNA targets (mRNAs) in specific biological contexts is a critical task to improve our understanding of molecular mechanisms underlying organismal development, physiology and disease. However, current miR-mRNA target prediction platforms rank miR targets based on estimated strength of physical interactions and lack the ability to rank interactants as a function of their potential to impact a given biological system. To address this, we have developed ToppMiR (http://toppmir.cchmc.org), a web-based analytical workbench that allows miRs and mRNAs to be co-analyzed via biologically centered approaches in which gene function associated annotations are used to train a machine learning-based analysis engine. ToppMiR learns about biological contexts based on gene associated information from expression data or from a user-specified set of genes that relate to context-relevant knowledge or hypotheses. Within the biological framework established by the genes in the training set, its associated information content is then used to calculate a features association matrix composed of biological functions, protein interactions and other features. This scoring matrix is then used to jointly rank both the test/candidate miRs and mRNAs. Results of these analyses are provided as downloadable tables or network file formats usable in Cytoscape.",2014-05-14 +21984755,Computational network analysis of the anatomical and genetic organizations in the mouse brain.,"

Motivation

The mammalian central nervous system (CNS) generates high-level behavior and cognitive functions. Elucidating the anatomical and genetic organizations in the CNS is a key step toward understanding the functional brain circuitry. The CNS contains an enormous number of cell types, each with unique gene expression patterns. Therefore, it is of central importance to capture the spatial expression patterns in the brain. Currently, genome-wide atlas of spatial expression patterns in the mouse brain has been made available, and the data are in the form of aligned 3D data arrays. The sheer volume and complexity of these data pose significant challenges for efficient computational analysis.

Results

We employ data reduction and network modeling techniques to explore the anatomical and genetic organizations in the mouse brain. First, to reduce the volume of data, we propose to apply tensor factorization techniques to reduce the data volumes. This tensor formulation treats the stack of 3D volumes as a 4D data array, thereby preserving the mouse brain geometry. We then model the anatomical and genetic organizations as graphical models. To improve the robustness and efficiency of network modeling, we employ stable model selection and efficient sparsity-regularized formulation. Results on network modeling show that our efforts recover known interactions and predicts novel putative correlations.

Availability

The complete results are available at the project website: http://compbio.cs.odu.edu/mouse/

Contact

sji@cs.odu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-07 +24009496,How microRNA and transcription factor co-regulatory networks affect osteosarcoma cell proliferation.,"Osteosarcomas (OS) are complex bone tumors with various genomic alterations. These alterations affect the expression and function of several genes due to drastic changes in the underlying gene regulatory network. However, we know little about critical gene regulators and their functional consequences on the pathogenesis of OS. Therefore, we aimed to determine microRNA and transcription factor (TF) co-regulatory networks in OS cell proliferation. Cell proliferation is an essential part in the pathogenesis of OS and deeper understanding of its regulation might help to identify potential therapeutic targets. Based on expression data of OS cell lines divided according to their proliferative activity, we obtained 12 proliferation-related microRNAs and corresponding target genes. Therewith, microRNA and TF co-regulatory networks were generated and analyzed regarding their structure and functional influence. We identified key co-regulators comprising the microRNAs miR-9-5p, miR-138, and miR-214 and the TFs SP1 and MYC in the derived networks. These regulators are implicated in NFKB- and RB1-signaling and focal adhesion processes based on their common or interacting target genes (e.g., CDK6, CTNNB1, E2F4, HES1, ITGA6, NFKB1, NOTCH1, and SIN3A). Thus, we proposed a model of OS cell proliferation which is primarily co-regulated through the interactions of the mentioned microRNA and TF combinations. This study illustrates the benefit of systems biological approaches in the analysis of complex diseases. We integrated experimental data with publicly available information to unravel the coordinated (post)-transcriptional control of microRNAs and TFs to identify potential therapeutic targets in OS. The resulting microRNA and TF co-regulatory networks are publicly available for further exploration to generate or evaluate own hypotheses of the pathogenesis of OS (http://www.complex-systems.uni-muenster.de/co_networks.html).",2013-08-29 +23926227,OncoSNP-SEQ: a statistical approach for the identification of somatic copy number alterations from next-generation sequencing of cancer genomes.,"

Summary

Recent major cancer genome sequencing studies have used whole-genome sequencing to detect various types of genomic variation. However, a number of these studies have continued to rely on SNP array information to provide additional results for copy number and loss-of-heterozygosity estimation and assessing tumour purity. OncoSNP-SEQ is a statistical model-based approach for inferring copy number profiles directly from high-coverage whole genome sequencing data that is able to account for unknown tumour purity and ploidy.

Availability

MATLAB code is available at the following URL: https://sites.google.com/site/oncosnpseq/.",2013-08-07 +24818602,A novel method for identification and quantification of consistently differentially methylated regions.,"Advances in biotechnology have resulted in large-scale studies of DNA methylation. A differentially methylated region (DMR) is a genomic region with multiple adjacent CpG sites that exhibit different methylation statuses among multiple samples. Many so-called ""supervised"" methods have been established to identify DMRs between two or more comparison groups. Methods for the identification of DMRs without reference to phenotypic information are, however, less well studied. An alternative ""unsupervised"" approach was proposed, in which DMRs in studied samples were identified with consideration of nature dependence structure of methylation measurements between neighboring probes from tiling arrays. Through simulation study, we investigated effects of dependencies between neighboring probes on determining DMRs where a lot of spurious signals would be produced if the methylation data were analyzed independently of the probe. In contrast, our newly proposed method could successfully correct for this effect with a well-controlled false positive rate and a comparable sensitivity. By applying to two real datasets, we demonstrated that our method could provide a global picture of methylation variation in studied samples. R source codes to implement the proposed method were freely available at http://www.csjfann.ibms.sinica.edu.tw/eag/programlist/ICDMR/ICDMR.html.",2014-05-12 +24172939,"Malaria surveillance--United States, 2011.","

Problem/condition

Malaria in humans is caused by intraerythrocytic protozoa of the genus Plasmodium. These parasites are transmitted by the bite of an infective female Anopheles mosquito. The majority of malaria infections in the United States occur among persons who have traveled to regions with ongoing malaria transmission. However, malaria is also occasionally acquired by persons who have not traveled out of the country, through exposure to infected blood products, congenital transmission, laboratory exposure, or local mosquitoborne transmission. Malaria surveillance in the United States is conducted to identify episodes of local transmission and to guide prevention recommendations for travelers.

Period covered

This report summarizes cases in persons with onset of illness in 2011 and summarizes trends during previous years.

Description of system

Malaria cases diagnosed by blood film, polymerase chain reaction, or rapid diagnostic tests are mandated to be reported to local and state health departments by health-care providers or laboratory staff. Case investigations are conducted by local and state health departments, and reports are transmitted to CDC through the National Malaria Surveillance System, National Notifiable Diseases Surveillance System, or direct CDC consults. Data from these reporting systems serve as the basis for this report.

Results

CDC received 1,925 reported cases of malaria with an onset of symptoms in 2011 among persons in the United States, including 1,920 cases classified as imported, one laboratory-acquired case, one transfusion-related case, two congenital cases, and one cryptic case. The total number of cases represents an increase of 14% from the 1,691 cases reported for 2010 and the largest number of reported cases since 1971. Plasmodium falciparum, P. vivax, P. malariae, and P. ovale were identified in 49%, 22%, 3%, and 3% of cases, respectively. Twenty-one (1%) patients were infected by two species. The infecting species was unreported or undetermined in 23% of cases, an increase of 5 percentage points from 2010. Of the 871 patients who reported purpose of travel, 607 (70%) were visiting friends or relatives (VFR). Among the 929 cases in U.S. civilians for whom information on chemoprophylaxis use and travel region was known, 57 (6%) patients reported that they had followed and adhered to a chemoprophylactic drug regimen recommended by CDC for the regions to which they had traveled. Thirty-seven cases were reported in pregnant women, among whom only one adhered to chemoprophylaxis. Among all reported cases, significantly more cases (n=275 [14%]) were classified as severe infections in 2011 compared with 2010 (n=183 [11%]; p=0.0018; chi square). Five persons with malaria died in 2011. After 2 years of improvement in completion of data elements on the malaria case form, higher percentages of incomplete data in 2011 for residential status (from 11% in 2010 to 19% in 2011) and species (from 18% in 2010 to 22% in 2011) were noted.

Interpretation

The number of cases reported in 2011 marked the largest number of cases since 1971 (N = 3,180). Despite progress in reducing the global burden of malaria, the disease remains endemic in many regions, and the use of appropriate prevention measures by travelers is still inadequate.

Public health actions

Completion of data elements on the malaria case report form decreased in 2011 compared with 2010. This incomplete reporting compromises efforts to examine trends in malaria cases and prevent infections. VFR travelers continue to be a difficult population to reach with effective malaria prevention strategies. Evidence-based prevention strategies that effectively target VFR travelers need to be developed and implemented to have a substantial impact on the numbers of imported malaria cases in the United States. Although more persons with cases reported taking chemoprophylaxis to prevent malaria, the majority reported not taking it, and adherence was poor among those who did take chemoprophylaxis. Proper use of malaria chemoprophylaxis will prevent the majority of malaria illness and reduce the risk for severe disease (http://www.cdc.gov/malaria/travelers/drugs.html). Malaria infections can be fatal if not diagnosed and treated promptly with antimalarial medications appropriate for the patient's age and medical history, the likely country of malaria acquisition, and previous use of antimalarial chemoprophylaxis. Clinicians should consult the CDC Guidelines for Treatment of Malaria and contact the CDC's Malaria Hotline for case management advice, when needed. Malaria treatment recommendations can be obtained online (http://www.cdc.gov/malaria/diagnosis_treatment) or by calling the Malaria Hotline (770-488-7788 or toll-free at 855-856-4713).",2013-11-01 +24739488,PharmDock: a pharmacophore-based docking program.,"

Background

Protein-based pharmacophore models are enriched with the information of potential interactions between ligands and the protein target. We have shown in a previous study that protein-based pharmacophore models can be applied for ligand pose prediction and pose ranking. In this publication, we present a new pharmacophore-based docking program PharmDock that combines pose sampling and ranking based on optimized protein-based pharmacophore models with local optimization using an empirical scoring function.

Results

Tests of PharmDock on ligand pose prediction, binding affinity estimation, compound ranking and virtual screening yielded comparable or better performance to existing and widely used docking programs. The docking program comes with an easy-to-use GUI within PyMOL. Two features have been incorporated in the program suite that allow for user-defined guidance of the docking process based on previous experimental data. Docking with those features demonstrated superior performance compared to unbiased docking.

Conclusion

A protein pharmacophore-based docking program, PharmDock, has been made available with a PyMOL plugin. PharmDock and the PyMOL plugin are freely available from http://people.pharmacy.purdue.edu/~mlill/software/pharmdock.",2014-04-16 +22920415,Combining peak- and chromatogram-based retention time alignment algorithms for multiple chromatography-mass spectrometry datasets.,"

Background

Modern analytical methods in biology and chemistry use separation techniques coupled to sensitive detectors, such as gas chromatography-mass spectrometry (GC-MS) and liquid chromatography-mass spectrometry (LC-MS). These hyphenated methods provide high-dimensional data. Comparing such data manually to find corresponding signals is a laborious task, as each experiment usually consists of thousands of individual scans, each containing hundreds or even thousands of distinct signals. In order to allow for successful identification of metabolites or proteins within such data, especially in the context of metabolomics and proteomics, an accurate alignment and matching of corresponding features between two or more experiments is required. Such a matching algorithm should capture fluctuations in the chromatographic system which lead to non-linear distortions on the time axis, as well as systematic changes in recorded intensities. Many different algorithms for the retention time alignment of GC-MS and LC-MS data have been proposed and published, but all of them focus either on aligning previously extracted peak features or on aligning and comparing the complete raw data containing all available features.

Results

In this paper we introduce two algorithms for retention time alignment of multiple GC-MS datasets: multiple alignment by bidirectional best hits peak assignment and cluster extension (BIPACE) and center-star multiple alignment by pairwise partitioned dynamic time warping (CeMAPP-DTW). We show how the similarity-based peak group matching method BIPACE may be used for multiple alignment calculation individually and how it can be used as a preprocessing step for the pairwise alignments performed by CeMAPP-DTW. We evaluate the algorithms individually and in combination on a previously published small GC-MS dataset studying the Leishmania parasite and on a larger GC-MS dataset studying grains of wheat (Triticum aestivum).

Conclusions

We have shown that BIPACE achieves very high precision and recall and a very low number of false positive peak assignments on both evaluation datasets. CeMAPP-DTW finds a high number of true positives when executed on its own, but achieves even better results when BIPACE is used to constrain its search space. The source code of both algorithms is included in the OpenSource software framework Maltcms, which is available from http://maltcms.sf.net. The evaluation scripts of the present study are available from the same source.",2012-08-27 +23251396,Using answer set programming to integrate RNA expression with signalling pathway information to infer how mutations affect ageing.,"A challenge of systems biology is to integrate incomplete knowledge on pathways with existing experimental data sets and relate these to measured phenotypes. Research on ageing often generates such incomplete data, creating difficulties in integrating RNA expression with information about biological processes and the phenotypes of ageing, including longevity. Here, we develop a logic-based method that employs Answer Set Programming, and use it to infer signalling effects of genetic perturbations, based on a model of the insulin signalling pathway. We apply our method to RNA expression data from Drosophila mutants in the insulin pathway that alter lifespan, in a foxo dependent fashion. We use this information to deduce how the pathway influences lifespan in the mutant animals. We also develop a method for inferring the largest common sub-paths within each of our signalling predictions. Our comparisons reveal consistent homeostatic mechanisms across both long- and short-lived mutants. The transcriptional changes observed in each mutation usually provide negative feedback to signalling predicted for that mutation. We also identify an S6K-mediated feedback in two long-lived mutants that suggests a crosstalk between these pathways in mutants of the insulin pathway, in vivo. By formulating the problem as a logic-based theory in a qualitative fashion, we are able to use the efficient search facilities of Answer Set Programming, allowing us to explore larger pathways, combine molecular changes with pathways and phenotype and infer effects on signalling in in vivo, whole-organism, mutants, where direct signalling stimulation assays are difficult to perform. Our methods are available in the web-service NetEffects: http://www.ebi.ac.uk/thornton-srv/software/NetEffects.",2012-12-10 +24524857,A novel process simulation model (PSM) for anaerobic digestion using Aspen Plus.,"A novel process simulation model (PSM) was developed for biogas production in anaerobic digesters using Aspen Plus®. The PSM is a library model of anaerobic digestion, which predicts the biogas production from any substrate at any given process condition. A total of 46 reactions were used in the model, which include inhibitions, rate-kinetics, pH, ammonia, volume, loading rate, and retention time. The hydrolysis reactions were based on the extent of the reaction, while the acidogenic, acetogenic, and methanogenic reactions were based on the kinetics. The PSM was validated against a variety of lab and industrial data on anaerobic digestion. The P-value after statistical analysis was found to be 0.701, which showed that there was no significant difference between discrete validations and processing conditions. The sensitivity analysis for a ±10% change in composition of substrate and extent of reaction results in 5.285% higher value than the experimental value. The model is available at http://hdl.handle.net/2320/12358 (Rajendran et al., 2013b).",2014-01-24 +22543367,Geneious Basic: an integrated and extendable desktop software platform for the organization and analysis of sequence data.,"

Unlabelled

The two main functions of bioinformatics are the organization and analysis of biological data using computational resources. Geneious Basic has been designed to be an easy-to-use and flexible desktop software application framework for the organization and analysis of biological data, with a focus on molecular sequences and related data types. It integrates numerous industry-standard discovery analysis tools, with interactive visualizations to generate publication-ready images. One key contribution to researchers in the life sciences is the Geneious public application programming interface (API) that affords the ability to leverage the existing framework of the Geneious Basic software platform for virtually unlimited extension and customization. The result is an increase in the speed and quality of development of computation tools for the life sciences, due to the functionality and graphical user interface available to the developer through the public API. Geneious Basic represents an ideal platform for the bioinformatics community to leverage existing components and to integrate their own specific requirements for the discovery, analysis and visualization of biological data.

Availability and implementation

Binaries and public API freely available for download at http://www.geneious.com/basic, implemented in Java and supported on Linux, Apple OSX and MS Windows. The software is also available from the Bio-Linux package repository at http://nebc.nerc.ac.uk/news/geneiousonbl.",2012-04-27 +23865810,Fast online and index-based algorithms for approximate search of RNA sequence-structure patterns.,"

Background

It is well known that the search for homologous RNAs is more effective if both sequence and structure information is incorporated into the search. However, current tools for searching with RNA sequence-structure patterns cannot fully handle mutations occurring on both these levels or are simply not fast enough for searching large sequence databases because of the high computational costs of the underlying sequence-structure alignment problem.

Results

We present new fast index-based and online algorithms for approximate matching of RNA sequence-structure patterns supporting a full set of edit operations on single bases and base pairs. Our methods efficiently compute semi-global alignments of structural RNA patterns and substrings of the target sequence whose costs satisfy a user-defined sequence-structure edit distance threshold. For this purpose, we introduce a new computing scheme to optimally reuse the entries of the required dynamic programming matrices for all substrings and combine it with a technique for avoiding the alignment computation of non-matching substrings. Our new index-based methods exploit suffix arrays preprocessed from the target database and achieve running times that are sublinear in the size of the searched sequences. To support the description of RNA molecules that fold into complex secondary structures with multiple ordered sequence-structure patterns, we use fast algorithms for the local or global chaining of approximate sequence-structure pattern matches. The chaining step removes spurious matches from the set of intermediate results, in particular of patterns with little specificity. In benchmark experiments on the Rfam database, our improved online algorithm is faster than the best previous method by up to factor 45. Our best new index-based algorithm achieves a speedup of factor 560.

Conclusions

The presented methods achieve considerable speedups compared to the best previous method. This, together with the expected sublinear running time of the presented index-based algorithms, allows for the first time approximate matching of RNA sequence-structure patterns in large sequence databases. Beyond the algorithmic contributions, we provide with RaligNAtor a robust and well documented open-source software package implementing the algorithms presented in this manuscript. The RaligNAtor software is available at http://www.zbh.uni-hamburg.de/ralignator.",2013-07-17 +23864220,MisPred: a resource for identification of erroneous protein sequences in public databases.,"Correct prediction of the structure of protein-coding genes of higher eukaryotes is still a difficult task; therefore, public databases are heavily contaminated with mispredicted sequences. The high rate of misprediction has serious consequences because it significantly affects the conclusions that may be drawn from genome-scale sequence analyses of eukaryotic genomes. Here we present the MisPred database and computational pipeline that provide efficient means for the identification of erroneous sequences in public databases. The MisPred database contains a collection of abnormal, incomplete and mispredicted protein sequences from 19 metazoan species identified as erroneous by MisPred quality control tools in the UniProtKB/Swiss-Prot, UniProtKB/TrEMBL, NCBI/RefSeq and EnsEMBL databases. Major releases of the database are automatically generated and updated regularly. The database (http://www.mispred.com) is easily accessible through a simple web interface coupled to a powerful query engine and a standard web service. The content is completely or partially downloadable in a variety of formats. DATABASE URL: http://www.mispred.com.",2013-07-17 +25038083,Cross kingdom functional conservation of the core universally conserved threonylcarbamoyladenosine tRNA synthesis enzymes.,"Threonylcarbamoyladenosine (t(6)A) is a universal modification located in the anticodon stem-loop of tRNAs. In yeast, both cytoplasmic and mitochondrial tRNAs are modified. The cytoplasmic t(6)A synthesis pathway was elucidated and requires Sua5p, Kae1p, and four other KEOPS complex proteins. Recent in vitro work suggested that the mitochondrial t(6)A machinery of Saccharomyces cerevisiae is composed of only two proteins, Sua5p and Qri7p, a member of the Kae1p/TsaD family (L. C. K. Wan et al., Nucleic Acids Res. 41:6332-6346, 2013, http://dx.doi.org/10.1093/nar/gkt322). Sua5p catalyzes the first step leading to the threonyl-carbamoyl-AMP intermediate (TC-AMP), while Qri7 transfers the threonyl-carbamoyl moiety from TC-AMP to tRNA to form t(6)A. Qri7p localizes to the mitochondria, but Sua5p was reported to be cytoplasmic. We show that Sua5p is targeted to both the cytoplasm and the mitochondria through the use of alternative start sites. The import of Sua5p into the mitochondria is required for this organelle to be functional, since the TC-AMP intermediate produced by Sua5p in the cytoplasm is not transported into the mitochondria in sufficient amounts. This minimal t(6)A pathway was characterized in vitro and, for the first time, in vivo by heterologous complementation studies in Escherichia coli. The data revealed a potential for TC-AMP channeling in the t(6)A pathway, as the coexpression of Qri7p and Sua5p is required to complement the essentiality of the E. coli tsaD mutant. Our results firmly established that Qri7p and Sua5p constitute the mitochondrial pathway for the biosynthesis of t(6)A and bring additional advancement in our understanding of the reaction mechanism.",2014-07-18 +23749960,Network2Canvas: network visualization on a canvas with enrichment analysis.,"

Motivation

Networks are vital to computational systems biology research, but visualizing them is a challenge. For networks larger than ∼100 nodes and ∼200 links, ball-and-stick diagrams fail to convey much information. To address this, we developed Network2Canvas (N2C), a web application that provides an alternative way to view networks. N2C visualizes networks by placing nodes on a square toroidal canvas. The network nodes are clustered on the canvas using simulated annealing to maximize local connections where a node's brightness is made proportional to its local fitness. The interactive canvas is implemented in HyperText Markup Language (HTML)5 with the JavaScript library Data-Driven Documents (D3). We applied N2C to visualize 30 canvases made from human and mouse gene-set libraries and 6 canvases made from the Food and Drug Administration (FDA)-approved drug-set libraries. Given lists of genes or drugs, enriched terms are highlighted on the canvases, and their degree of clustering is computed. Because N2C produces visual patterns of enriched terms on canvases, a trained eye can detect signatures instantly. In summary, N2C provides a new flexible method to visualize large networks and can be used to perform and visualize gene-set and drug-set enrichment analyses.

Availability

N2C is freely available at http://www.maayanlab.net/N2C and is open source.

Contact

avi.maayan@mssm.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-06-07 +22839576,Genome-wide identification of significant aberrations in cancer genome.,"

Background

Somatic Copy Number Alterations (CNAs) in human genomes are present in almost all human cancers. Systematic efforts to characterize such structural variants must effectively distinguish significant consensus events from random background aberrations. Here we introduce Significant Aberration in Cancer (SAIC), a new method for characterizing and assessing the statistical significance of recurrent CNA units. Three main features of SAIC include: (1) exploiting the intrinsic correlation among consecutive probes to assign a score to each CNA unit instead of single probes; (2) performing permutations on CNA units that preserve correlations inherent in the copy number data; and (3) iteratively detecting Significant Copy Number Aberrations (SCAs) and estimating an unbiased null distribution by applying an SCA-exclusive permutation scheme.

Results

We test and compare the performance of SAIC against four peer methods (GISTIC, STAC, KC-SMART, CMDS) on a large number of simulation datasets. Experimental results show that SAIC outperforms peer methods in terms of larger area under the Receiver Operating Characteristics curve and increased detection power. We then apply SAIC to analyze structural genomic aberrations acquired in four real cancer genome-wide copy number data sets (ovarian cancer, metastatic prostate cancer, lung adenocarcinoma, glioblastoma). When compared with previously reported results, SAIC successfully identifies most SCAs known to be of biological significance and associated with oncogenes (e.g., KRAS, CCNE1, and MYC) or tumor suppressor genes (e.g., CDKN2A/B). Furthermore, SAIC identifies a number of novel SCAs in these copy number data that encompass tumor related genes and may warrant further studies.

Conclusions

Supported by a well-grounded theoretical framework, SAIC has been developed and used to identify SCAs in various cancer copy number data sets, providing useful information to study the landscape of cancer genomes. Open-source and platform-independent SAIC software is implemented using C++, together with R scripts for data formatting and Perl scripts for user interfacing, and it is easy to install and efficient to use. The source code and documentation are freely available at http://www.cbil.ece.vt.edu/software.htm.",2012-07-27 +23856170,"The association between the PPARγ2 Pro12Ala polymorphism and nephropathy susceptibility in type 2 diabetes: a meta-analysis based on 9,176 subjects.","

Background

The polymorphism Pro12Ala in peroxisome proliferator-activated receptor-γ2 gene (PPARγ2) has been reported to be associated with diabetic nephropathy (DN) in some studies, though the results remain inconclusive. To explore this relationship between PPARγ2 Pro12Ala polymorphism and the susceptibility for DN, a cumulative meta-analysis was performed in this study.

Method

PubMed, Medline, Embase and Web of Science databases have been systematically searched to identify relevant studies. Odds ratios (ORs) and 95% confidence intervals (CIs) were calculated.

Results

18 studies were included in this meta-analysis, involving 3,361 cases and 5,815 controls. The PPARγ2 Ala12 allele was significantly associated with decreased risk of DN based on dominant model (OR=0.778; 95%CI=0.618-0.981; Pheterogeneity=0.008; P=0.034). In the stratified analysis by ethnicity, significantly decreased risks were found among Caucasians for dominant model (OR=0.674; 95%CI=0.500-0.909; Pheterogeneity=0.079; P=0.010), while there was no significant association was found in Asians.

Conclusions

The results from the present meta-analysis indicated that the Pro12Ala polymorphism in PPARγ2 gene is not a risk factor for DN in type 2 diabetes (T2D). Further large and well-designed studies are needed to confirm this conclusion.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/7491348341027320.",2013-07-15 +23865855,CELDA -- an ontology for the comprehensive representation of cells in complex systems.,"

Background

The need for detailed description and modeling of cells drives the continuous generation of large and diverse datasets. Unfortunately, there exists no systematic and comprehensive way to organize these datasets and their information. CELDA (Cell: Expression, Localization, Development, Anatomy) is a novel ontology for the association of primary experimental data and derived knowledge to various types of cells of organisms.

Results

CELDA is a structure that can help to categorize cell types based on species, anatomical localization, subcellular structures, developmental stages and origin. It targets cells in vitro as well as in vivo. Instead of developing a novel ontology from scratch, we carefully designed CELDA in such a way that existing ontologies were integrated as much as possible, and only minimal extensions were performed to cover those classes and areas not present in any existing model. Currently, ten existing ontologies and models are linked to CELDA through the top-level ontology BioTop. Together with 15.439 newly created classes, CELDA contains more than 196.000 classes and 233.670 relationship axioms. CELDA is primarily used as a representational framework for modeling, analyzing and comparing cells within and across species in CellFinder, a web based data repository on cells (http://cellfinder.org).

Conclusions

CELDA can semantically link diverse types of information about cell types. It has been integrated within the research platform CellFinder, where it exemplarily relates cell types from liver and kidney during development on the one hand and anatomical locations in humans on the other, integrating information on all spatial and temporal stages. CELDA is available from the CellFinder website: http://cellfinder.org/about/ontology.",2013-07-17 +24861193,[Quality guidelines for presurgical epilepsy diagnosis and operative epilepsy therapy: 1st revised version].,"In patients with pharmacorefractory epilepsy, preoperative epilepsy evaluation and subsequent epilepsy surgery lead to a significant improvement of seizure control, proportion of seizure-free patients, quality of life and social participation. The aims of preoperative epilepsy evaluation are to define the chance of complete seizure freedom and the likelihood of inducing new neurological deficits in a given patient. As epilepsy surgery is an elective procedure quality standards are particularly high. As detailed in the first edition of these practice guidelines, quality control relates to seven different domains: (1) establishing centres with a sufficient number of sufficiently and specifically trained personnel, (2) minimum technical standards and equipment, (3) continuing medical education of employees, (4) surveillance by trained personnel during the video electroencephalography (EEG) monitoring (VEM), (5) systematic acquisition of clinical and outcome data, (6) the minimum number of preoperative evaluations and epilepsy surgery procedures and (7) cooperation of epilepsy centres. In the first edition of these practice guidelines published in 2000 it was defined which standards were desirable and that their implementation should be aimed for. These standards related especially to the certification required for different groups of medical doctors involved and to the minimum numbers of procedures required. In the subsequent decade quite a number of colleagues have been certified by the trinational Working Group (Arbeitsgemeinschaft, AG) for Presurgical Epilepsy Diagnosis and Operative Epilepsy Treatment (http://www.ag-epilepsiechirurgie.de) and therefore, on 8 May 2013 the executive board of the AG decided to now make these standards obligatory.",2014-06-01 +22962465,Protein domain recurrence and order can enhance prediction of protein functions.,"

Motivation

Burgeoning sequencing technologies have generated massive amounts of genomic and proteomic data. Annotating the functions of proteins identified in this data has become a big and crucial problem. Various computational methods have been developed to infer the protein functions based on either the sequences or domains of proteins. The existing methods, however, ignore the recurrence and the order of the protein domains in this function inference.

Results

We developed two new methods to infer protein functions based on protein domain recurrence and domain order. Our first method, DRDO, calculates the posterior probability of the Gene Ontology terms based on domain recurrence and domain order information, whereas our second method, DRDO-NB, relies on the naïve Bayes methodology using the same domain architecture information. Our large-scale benchmark comparisons show strong improvements in the accuracy of the protein function inference achieved by our new methods, demonstrating that domain recurrence and order can provide important information for inference of protein functions.

Availability

The new models are provided as open source programs at http://sfb.kaust.edu.sa/Pages/Software.aspx.

Contact

dkihara@cs.purdue.edu, xin.gao@kaust.edu.sa

Supplementary information

Supplementary data are available at Bioinformatics Online.",2012-09-01 +24894502,PePr: a peak-calling prioritization pipeline to identify consistent or differential peaks from replicated ChIP-Seq data.,"

Motivation

ChIP-Seq is the standard method to identify genome-wide DNA-binding sites for transcription factors (TFs) and histone modifications. There is a growing need to analyze experiments with biological replicates, especially for epigenomic experiments where variation among biological samples can be substantial. However, tools that can perform group comparisons are currently lacking.

Results

We present a peak-calling prioritization pipeline (PePr) for identifying consistent or differential binding sites in ChIP-Seq experiments with biological replicates. PePr models read counts across the genome among biological samples with a negative binomial distribution and uses a local variance estimation method, ranking consistent or differential binding sites more favorably than sites with greater variability. We compared PePr with commonly used and recently proposed approaches on eight TF datasets and show that PePr uniquely identifies consistent regions with enriched read counts, high motif occurrence rate and known characteristics of TF binding based on visual inspection. For histone modification data with broadly enriched regions, PePr identified differential regions that are consistent within groups and outperformed other methods in scaling False Discovery Rate (FDR) analysis.

Availability and implementation

http://code.google.com/p/pepr-chip-seq/.",2014-06-03 +23740234,"Treating axial and peripheral spondyloarthritis, including psoriatic arthritis, to target: results of a systematic literature search to support an international treat-to-target recommendation in spondyloarthritis.","

Background

Current recommendations for the management of axial spondyloarthritis (SpA) and psoriatic arthritis are to monitor disease activity and adjust therapy accordingly. However, treatment targets and timeframes of change have not been defined. An international expert panel has been convened to develop 'treat-to-target' recommendations, based on published evidence and expert opinion.

Objective

To review evidence on targeted treatment for axial and peripheral SpA, as well as for psoriatic skin disease.

Methods

We performed a systematic literature search covering Medline, Embase and Cochrane, conference abstracts and studies in http://www.clinicaltrials.gov.

Results

Randomised comparisons of targeted versus routine treatment are lacking. Some studies implemented treatment targets before escalating therapy: in ankylosing spondylitis, most trials used a decrease in Bath Ankylosing Spondylitis Disease Activity Index; in psoriatic arthritis, protocols primarily considered a reduction in swollen and tender joints; in psoriasis, the Modified Psoriasis Severity Score and the Psoriasis Area and Severity Index were used. Complementary evidence correlating these factors with function and radiographic damage at follow-up is sparse and equivocal.

Conclusions

There is a need for randomised trials that investigate the value of treat-to-target recommendations in SpA and psoriasis. Several trials have used thresholds of disease activity measures to guide treatment decisions. However, evidence on the effect of these data on long-term outcome is scarce. The search data informed the expert committee regarding the formulation of recommendations and a research agenda.",2013-06-05 +24932001,Accurate viral population assembly from ultra-deep sequencing data.,"

Motivation

Next-generation sequencing technologies sequence viruses with ultra-deep coverage, thus promising to revolutionize our understanding of the underlying diversity of viral populations. While the sequencing coverage is high enough that even rare viral variants are sequenced, the presence of sequencing errors makes it difficult to distinguish between rare variants and sequencing errors.

Results

In this article, we present a method to overcome the limitations of sequencing technologies and assemble a diverse viral population that allows for the detection of previously undiscovered rare variants. The proposed method consists of a high-fidelity sequencing protocol and an accurate viral population assembly method, referred to as Viral Genome Assembler (VGA). The proposed protocol is able to eliminate sequencing errors by using individual barcodes attached to the sequencing fragments. Highly accurate data in combination with deep coverage allow VGA to assemble rare variants. VGA uses an expectation-maximization algorithm to estimate abundances of the assembled viral variants in the population. RESULTS on both synthetic and real datasets show that our method is able to accurately assemble an HIV viral population and detect rare variants previously undetectable due to sequencing errors. VGA outperforms state-of-the-art methods for genome-wide viral assembly. Furthermore, our method is the first viral assembly method that scales to millions of sequencing reads.

Availability

Our tool VGA is freely available at http://genetics.cs.ucla.edu/vga/",2014-06-01 +23303509,ALE: a generic assembly likelihood evaluation framework for assessing the accuracy of genome and metagenome assemblies.,"

Motivation

Researchers need general purpose methods for objectively evaluating the accuracy of single and metagenome assemblies and for automatically detecting any errors they may contain. Current methods do not fully meet this need because they require a reference, only consider one of the many aspects of assembly quality or lack statistical justification, and none are designed to evaluate metagenome assemblies.

Results

In this article, we present an Assembly Likelihood Evaluation (ALE) framework that overcomes these limitations, systematically evaluating the accuracy of an assembly in a reference-independent manner using rigorous statistical methods. This framework is comprehensive, and integrates read quality, mate pair orientation and insert length (for paired-end reads), sequencing coverage, read alignment and k-mer frequency. ALE pinpoints synthetic errors in both single and metagenomic assemblies, including single-base errors, insertions/deletions, genome rearrangements and chimeric assemblies presented in metagenomes. At the genome level with real-world data, ALE identifies three large misassemblies from the Spirochaeta smaragdinae finished genome, which were all independently validated by Pacific Biosciences sequencing. At the single-base level with Illumina data, ALE recovers 215 of 222 (97%) single nucleotide variants in a training set from a GC-rich Rhodobacter sphaeroides genome. Using real Pacific Biosciences data, ALE identifies 12 of 12 synthetic errors in a Lambda Phage genome, surpassing even Pacific Biosciences' own variant caller, EviCons. In summary, the ALE framework provides a comprehensive, reference-independent and statistically rigorous measure of single genome and metagenome assembly accuracy, which can be used to identify misassemblies or to optimize the assembly process.

Availability

ALE is released as open source software under the UoI/NCSA license at http://www.alescore.org. It is implemented in C and Python.",2013-01-09 +22688413,Metagenomic microbial community profiling using unique clade-specific marker genes.,"Metagenomic shotgun sequencing data can identify microbes populating a microbial community and their proportions, but existing taxonomic profiling methods are inefficient for increasingly large data sets. We present an approach that uses clade-specific marker genes to unambiguously assign reads to microbial clades more accurately and >50× faster than current approaches. We validated our metagenomic phylogenetic analysis tool, MetaPhlAn, on terabases of short reads and provide the largest metagenomic profiling to date of the human gut. It can be accessed at http://huttenhower.sph.harvard.edu/metaphlan/.",2012-06-10 +25521044,Using hidden Markov models to investigate G-quadruplex motifs in genomic sequences.,"

Background

G-quadruplexes are four-stranded structures formed in guanine-rich nucleotide sequences. Several functional roles of DNA G-quadruplexes have so far been investigated, where their putative functional roles during DNA replication and transcription have been suggested. A necessary condition for G-quadruplex formation is the presence of four regions of tandem guanines called G-runs and three nucleotide subsequences called loops that connect G-runs. A simple computational way to detect potential G-quadruplex regions in a given genomic sequence is pattern matching with regular expression. Although many putative G-quadruplex motifs can be found in most genomes by the regular expression-based approach, the majority of these sequences are unlikely to form G-quadruplexes because they are unstable as compared with canonical double helix structures.

Results

Here we present elaborate computational models for representing DNA G-quadruplex motifs using hidden Markov models (HMMs). Use of HMMs enables us to evaluate G-quadruplex motifs quantitatively by a probabilistic measure. In addition, the parameters of HMMs can be trained by using experimentally verified data. Computational experiments in discriminating between positive and negative G-quadruplex sequences as well as reducing putative G-quadruplexes in the human genome were carried out, indicating that HMM-based models can discern bona fide G-quadruplex structures well and one of them has the possibility of reducing false positive G-quadruplexes predicted by existing regular expression-based methods. Furthermore, our results show that one of our models can be specialized to detect G-quadruplex sequences whose functional roles are expected to be involved in DNA transcription.

Conclusions

The HMM-based method along with the conventional pattern matching approach can contribute to reducing costly and laborious wet-lab experiments to perform functional analysis on a given set of potential G-quadruplexes of interest. The C++ and Perl programs are available at http://tcs.cira.kyoto-u.ac.jp/~ykato/program/g4hmm/.",2014-12-08 +22389011,The Bluejay genome browser.,"The Bluejay genome browser is a stand-alone visualization tool for the multi-scale viewing of annotated genomes and other genomic elements. Bluejay allows users to customize display features to suit their needs, and produces publication-quality graphics. Bluejay provides a multitude of ways to interrelate biological data at the genome scale. Users can load gene expression data into a genome display for expression visualization in context. Multiple genomes can be compared concurrently, including time series expression data, based on Gene Ontology labels. External, context-sensitive biological Web Services are linked to the displayed genomic elements ad hoc for in-depth genomic data analysis and interpretation. Users can mark multiple points of interest in a genome by creating waypoints, and exploit them for easy navigation of single or multiple genomes. Using this comprehensive visual environment, users can study a gene not just in relation to its genome, but also its transcriptome and evolutionary origins. Written in Java, Bluejay is platform-independent and is freely available from http://bluejay.ucalgary.ca.",2012-03-01 +24896117,3D hydrophobic moment vectors as a tool to characterize the surface polarity of amphiphilic peptides.,"The interaction of membranes with peptides and proteins is largely determined by their amphiphilic character. Hydrophobic moments of helical segments are commonly derived from their two-dimensional helical wheel projections, and the same is true for β-sheets. However, to the best of our knowledge, there exists no method to describe structures in three dimensions or molecules with irregular shape. Here, we define the hydrophobic moment of a molecule as a vector in three dimensions by evaluating the surface distribution of all hydrophilic and lipophilic regions over any given shape. The electrostatic potential on the molecular surface is calculated based on the atomic point charges. The resulting hydrophobic moment vector is specific for the instantaneous conformation, and it takes into account all structural characteristics of the molecule, e.g., partial unfolding, bending, and side-chain torsion angles. Extended all-atom molecular dynamics simulations are then used to calculate the equilibrium hydrophobic moments for two antimicrobial peptides, gramicidin S and PGLa, under different conditions. We show that their effective hydrophobic moment vectors reflect the distribution of polar and nonpolar patches on the molecular surface and the calculated electrostatic surface potential. A comparison of simulations in solution and in lipid membranes shows how the peptides undergo internal conformational rearrangement upon binding to the bilayer surface. A good correlation with solid-state NMR data indicates that the hydrophobic moment vector can be used to predict the membrane binding geometry of peptides. This method is available as a web application on http://www.ibg.kit.edu/HM/.",2014-06-01 +24809449,Calculating Kolmogorov complexity from the output frequency distributions of small Turing machines.,"Drawing on various notions from theoretical computer science, we present a novel numerical approach, motivated by the notion of algorithmic probability, to the problem of approximating the Kolmogorov-Chaitin complexity of short strings. The method is an alternative to the traditional lossless compression algorithms, which it may complement, the two being serviceable for different string lengths. We provide a thorough analysis for all Σ(n=1)(11) 2(n) binary strings of length n<12 and for most strings of length 12≤n≤16 by running all ~2.5 x 10(13) Turing machines with 5 states and 2 symbols (8 x 22(9) with reduction techniques) using the most standard formalism of Turing machines, used in for example the Busy Beaver problem. We address the question of stability and error estimation, the sensitivity of the continued application of the method for wider coverage and better accuracy, and provide statistical evidence suggesting robustness. As with compression algorithms, this work promises to deliver a range of applications, and to provide insight into the question of complexity calculation of finite (and short) strings. Additional material can be found at the Algorithmic Nature Group website at http://www.algorithmicnature.org. An Online Algorithmic Complexity Calculator implementing this technique and making the data available to the research community is accessible at http://www.complexitycalculator.com.",2014-05-08 +21846375,An R package implementation of multifactor dimensionality reduction.,"

Background

A breadth of high-dimensional data is now available with unprecedented numbers of genetic markers and data-mining approaches to variable selection are increasingly being utilized to uncover associations, including potential gene-gene and gene-environment interactions. One of the most commonly used data-mining methods for case-control data is Multifactor Dimensionality Reduction (MDR), which has displayed success in both simulations and real data applications. Additional software applications in alternative programming languages can improve the availability and usefulness of the method for a broader range of users.

Results

We introduce a package for the R statistical language to implement the Multifactor Dimensionality Reduction (MDR) method for nonparametric variable selection of interactions. This package is designed to provide an alternative implementation for R users, with great flexibility and utility for both data analysis and research. The 'MDR' package is freely available online at http://www.r-project.org/. We also provide data examples to illustrate the use and functionality of the package.

Conclusions

MDR is a frequently-used data-mining method to identify potential gene-gene interactions, and alternative implementations will further increase this usage. We introduce a flexible software package for R users.",2011-08-16 +24245831,Does cancer deserve special treatment when health technologies are prioritized?,"Despite most new cancer treatments having relatively high costs and low health benefits, they are often funded ahead of treatments for other illnesses. And yet, according to the article by Dan Greenberg and colleagues, most Israeli oncologists and family physicians think that new cancer treatments should not receive such a high priority and that cost-effectiveness data should be used to support funding decisions. In this commentary, I point out that the increasing pressure worldwide when prioritizing health technologies to widen the scope of the benefits that are recognized beyond just narrowly-defined health benefits would almost certainly include the special characteristics of cancer. Future research would be worthwhile into how the criteria for prioritizing technologies should be incorporated into prioritization frameworks in practice, including, in particular, how to resolve the inherent trade-offs.This is a commentary on http://www.ijhpr.org/content/2/2/44/",2013-11-18 +22541597,Linking genes to diseases with a SNPedia-Gene Wiki mashup.,"

Background

A variety of topic-focused wikis are used in the biomedical sciences to enable the mass-collaborative synthesis and distribution of diverse bodies of knowledge. To address complex problems such as defining the relationships between genes and disease, it is important to bring the knowledge from many different domains together. Here we show how advances in wiki technology and natural language processing can be used to automatically assemble 'meta-wikis' that present integrated views over the data collaboratively created in multiple source wikis.

Results

We produced a semantic meta-wiki called the Gene Wiki+ that automatically mirrors and integrates data from the Gene Wiki and SNPedia. The Gene Wiki+, available at (http://genewikiplus.org/), captures 8,047 distinct gene-disease relationships. SNPedia accounts for 4,149 of the gene-disease pairs, the Gene Wiki provides 4,377 and only 479 appear independently in both sources. All of this content is available to query and browse and is provided as linked open data.

Conclusions

Wikis contain increasing amounts of diverse, biological information useful for elucidating the connections between genes and disease. The Gene Wiki+ shows how wiki technology can be used in concert with natural language processing to provide integrated views over diverse underlying data sources.",2012-04-24 +24077092,Causal inference in occupational epidemiology: accounting for the healthy worker effect by using structural nested models.,"In a recent issue of the Journal, Kirkeleit et al. (Am J Epidemiol. 2013;177(11):1218-1224) provided empirical evidence for the potential of the healthy worker effect in a large cohort of Norwegian workers across a range of occupations. In this commentary, we provide some historical context, define the healthy worker effect by using causal diagrams, and use simulated data to illustrate how structural nested models can be used to estimate exposure effects while accounting for the healthy worker survivor effect in 4 simple steps. We provide technical details and annotated SAS software (SAS Institute, Inc., Cary, North Carolina) code corresponding to the example analysis in the Web Appendices, available at http://aje.oxfordjournals.org/.",2013-09-27 +24432194,Pathway-GPS and SIGORA: identifying relevant pathways based on the over-representation of their gene-pair signatures.,"Motivation. Predominant pathway analysis approaches treat pathways as collections of individual genes and consider all pathway members as equally informative. As a result, at times spurious and misleading pathways are inappropriately identified as statistically significant, solely due to components that they share with the more relevant pathways. Results. We introduce the concept of Pathway Gene-Pair Signatures (Pathway-GPS) as pairs of genes that, as a combination, are specific to a single pathway. We devised and implemented a novel approach to pathway analysis, Signature Over-representation Analysis (SIGORA), which focuses on the statistically significant enrichment of Pathway-GPS in a user-specified gene list of interest. In a comparative evaluation of several published datasets, SIGORA outperformed traditional methods by delivering biologically more plausible and relevant results. Availability. An efficient implementation of SIGORA, as an R package with precompiled GPS data for several human and mouse pathway repositories is available for download from http://sigora.googlecode.com/svn/.",2013-12-19 +23888127,An analysis of a 'community-driven' reconstruction of the human metabolic network.,"Following a strategy similar to that used in baker's yeast (Herrgård et al. Nat Biotechnol 26:1155-1160, 2008). A consensus yeast metabolic network obtained from a community approach to systems biology (Herrgård et al. 2008; Dobson et al. BMC Syst Biol 4:145, 2010). Further developments towards a genome-scale metabolic model of yeast (Dobson et al. 2010; Heavner et al. BMC Syst Biol 6:55, 2012). Yeast 5-an expanded reconstruction of the Saccharomyces cerevisiae metabolic network (Heavner et al. 2012) and in Salmonella typhimurium (Thiele et al. BMC Syst Biol 5:8, 2011). A community effort towards a knowledge-base and mathematical model of the human pathogen Salmonellatyphimurium LT2 (Thiele et al. 2011), a recent paper (Thiele et al. Nat Biotechnol 31:419-425, 2013). A community-driven global reconstruction of human metabolism (Thiele et al. 2013) described a much improved 'community consensus' reconstruction of the human metabolic network, called Recon 2, and the authors (that include the present ones) have made it freely available via a database at http://humanmetabolism.org/ and in SBML format at Biomodels (http://identifiers.org/biomodels.db/MODEL1109130000). This short analysis summarises the main findings, and suggests some approaches that will be able to exploit the availability of this model to advantage.",2013-07-12 +24655681,Future talk in later life.,"This article focuses on the relevance that the dimension of the future has for promoting healthy and active aging. Older people generally have difficulties in talking about the future and when they do they generally express very negative perspectives on it. The data analyzed in this paper are part of an on-going interdisciplinary research project: ""Aging, poverty and social exclusion: an interdisciplinary study on innovative support services"" (https://apseclunl.wordpress.com/). The project aims at documenting good practices in social intervention with older people who are at risk of exclusion. This study describes and critically discusses an activity carried out in Portugal among older women in a poor area in the suburb of Lisbon entitled ""self-awareness workshop on the future"". Through a detailed discourse analysis within an ethnomethodological framework the study shows age membership categorizations in use and categorization processes, examining the workshop interaction. In particular, the article describes how the psychologist works at deconstructing and problematizing the negative connotations related to age membership categories. Taking into consideration the interactionally constructed nature of aging and the material consequences that different attitudes towards aging can imply is very important in particular in relation to the provision of services to older people.",2014-03-15 +23846594,DoSA: Database of Structural Alignments.,"Protein structure alignment is a crucial step in protein structure-function analysis. Despite the advances in protein structure alignment algorithms, some of the local conformationally similar regions are mislabeled as structurally variable regions (SVRs). These regions are not well superimposed because of differences in their spatial orientations. The Database of Structural Alignments (DoSA) addresses this gap in identification of local structural similarities obscured in global protein structural alignments by realigning SVRs using an algorithm based on protein blocks. A set of protein blocks is a structural alphabet that abstracts protein structures into 16 unique local structural motifs. DoSA provides unique information about 159,780 conformationally similar and 56,140 conformationally dissimilar SVRs in 74 705 pairwise structural alignments of homologous proteins. The information provided on conformationally similar and dissimilar SVRs can be helpful to model loop regions. It is also conceivable that conformationally similar SVRs with conserved residues could potentially contribute toward functional integrity of homologues, and hence identifying such SVRs could be helpful in understanding the structural basis of protein function. Database URL: http://bo-protscience.fr/dosa/",2013-07-11 +23846593,lncRNome: a comprehensive knowledgebase of human long noncoding RNAs.,"The advent of high-throughput genome scale technologies has enabled us to unravel a large amount of the previously unknown transcriptionally active regions of the genome. Recent genome-wide studies have provided annotations of a large repertoire of various classes of noncoding transcripts. Long noncoding RNAs (lncRNAs) form a major proportion of these novel annotated noncoding transcripts, and presently known to be involved in a number of functionally distinct biological processes. Over 18,000 transcripts are presently annotated as lncRNA, and encompass previously annotated classes of noncoding transcripts including large intergenic noncoding RNA, antisense RNA and processed pseudogenes. There is a significant gap in the resources providing a stable annotation, cross-referencing and biologically relevant information. lncRNome has been envisioned with the aim of filling this gap by integrating annotations on a wide variety of biologically significant information into a comprehensive knowledgebase. To the best of our knowledge, lncRNome is one of the largest and most comprehensive resources for lncRNAs. Database URL: http://genome.igib.res.in/lncRNome.",2013-07-11 +24072003,THRIVE score predicts outcomes with a third-generation endovascular stroke treatment device in the TREVO-2 trial.,"

Background and purpose

Several outcome prediction scores have been tested in patients receiving acute stroke treatment with previous generations of endovascular stroke treatment devices. The TREVO-2 trial was a randomized controlled trial comparing a novel endovascular stroke treatment device (the Trevo device) to a previous-generation endovascular stroke treatment device (the Merci device).

Methods

We used data from the TREVO-2 trial to validate the Totaled Health Risks in Vascular Events (THRIVE) score in patients receiving treatment with a third-generation endovascular stroke treatment device and to compare THRIVE to other predictive scores. We used logistic regression to model outcomes and compared score performance with receiver operating characteristic curve analysis.

Results

In the TREVO-2 trial, the THRIVE score strongly predicts clinical outcome and mortality. The relationship between THRIVE score and outcome is not influenced by either success of recanalization or the type of device used (Trevo versus Merci). The superiority of the Trevo device to the Merci device is evident particularly among patients with a low-to-moderate THRIVE score (0-5; 53.8% good outcome with Trevo versus 27.5% good outcome with Merci). In receiver operating characteristic curve analysis, the THRIVE score was comparable or superior to several other outcome prediction scores (HIAT, HIAT-2, SPAN-100, and iScore).

Conclusions

The THRIVE score strongly predicts clinical outcome and mortality in the TREVO-2 trial. Taken together with THRIVE validation data from patients receiving intravenous tissue-type plasminogen activator or no acute treatment, the THRIVE score has broad predictive power in patients with acute ischemic stroke, which is likely because THRIVE reflects a set of strong nonmodifiable predictors of stroke outcome. A free Web calculator for the THRIVE score is available at http://www.thrivescore.org.",2013-09-26 +23860256,Membrane protein structure determination - the next generation.,"The field of Membrane Protein Structural Biology has grown significantly since its first landmark in 1985 with the first three-dimensional atomic resolution structure of a membrane protein. Nearly twenty-six years later, the crystal structure of the beta2 adrenergic receptor in complex with G protein has contributed to another landmark in the field leading to the 2012 Nobel Prize in Chemistry. At present, more than 350 unique membrane protein structures solved by X-ray crystallography (http://blanco.biomol.uci.edu/mpstruc/exp/list, Stephen White Lab at UC Irvine) are available in the Protein Data Bank. The advent of genomics and proteomics initiatives combined with high-throughput technologies, such as automation, miniaturization, integration and third-generation synchrotrons, has enhanced membrane protein structure determination rate. X-ray crystallography is still the only method capable of providing detailed information on how ligands, cofactors, and ions interact with proteins, and is therefore a powerful tool in biochemistry and drug discovery. Yet the growth of membrane protein crystals suitable for X-ray diffraction studies amazingly remains a fine art and a major bottleneck in the field. It is often necessary to apply as many innovative approaches as possible. In this review we draw attention to the latest methods and strategies for the production of suitable crystals for membrane protein structure determination. In addition we also highlight the impact that third-generation synchrotron radiation has made in the field, summarizing the latest strategies used at synchrotron beamlines for screening and data collection from such demanding crystals. This article is part of a Special Issue entitled: Structural and biophysical characterisation of membrane protein-ligand binding.",2013-07-13 +23842462,DBATE: database of alternative transcripts expression.,"The use of high-throughput RNA sequencing technology (RNA-seq) allows whole transcriptome analysis, providing an unbiased and unabridged view of alternative transcript expression. Coupling splicing variant-specific expression with its functional inference is still an open and difficult issue for which we created the DataBase of Alternative Transcripts Expression (DBATE), a web-based repository storing expression values and functional annotation of alternative splicing variants. We processed 13 large RNA-seq panels from human healthy tissues and in disease conditions, reporting expression levels and functional annotations gathered and integrated from different sources for each splicing variant, using a variant-specific annotation transfer pipeline. The possibility to perform complex queries by cross-referencing different functional annotations permits the retrieval of desired subsets of splicing variant expression values that can be visualized in several ways, from simple to more informative. DBATE is intended as a novel tool to help appreciate how, and possibly why, the transcriptome expression is shaped. DATABASE URL: http://bioinformatica.uniroma2.it/DBATE/.",2013-07-09 +23842809,nhmmer: DNA homology search with profile HMMs.,"

Summary

Sequence database searches are an essential part of molecular biology, providing information about the function and evolutionary history of proteins, RNA molecules and DNA sequence elements. We present a tool for DNA/DNA sequence comparison that is built on the HMMER framework, which applies probabilistic inference methods based on hidden Markov models to the problem of homology search. This tool, called nhmmer, enables improved detection of remote DNA homologs, and has been used in combination with Dfam and RepeatMasker to improve annotation of transposable elements in the human genome.

Availability

nhmmer is a part of the new HMMER3.1 release. Source code and documentation can be downloaded from http://hmmer.org. HMMER3.1 is freely licensed under the GNU GPLv3 and should be portable to any POSIX-compliant operating system, including Linux and Mac OS/X.",2013-07-09 +24764461,relax: the analysis of biomolecular kinetics and thermodynamics using NMR relaxation dispersion data.,"

Unlabelled

Nuclear magnetic resonance (NMR) is a powerful tool for observing the motion of biomolecules at the atomic level. One technique, the analysis of relaxation dispersion phenomenon, is highly suited for studying the kinetics and thermodynamics of biological processes. Built on top of the relax computational environment for NMR dynamics is a new dispersion analysis designed to be comprehensive, accurate and easy-to-use. The software supports more models, both numeric and analytic, than current solutions. An automated protocol, available for scripting and driving the graphical user interface (GUI), is designed to simplify the analysis of dispersion data for NMR spectroscopists. Decreases in optimization time are granted by parallelization for running on computer clusters and by skipping an initial grid search by using parameters from one solution as the starting point for another -using analytic model results for the numeric models, taking advantage of model nesting, and using averaged non-clustered results for the clustered analysis.

Availability and implementation

The software relax is written in Python with C modules and is released under the GPLv3+ license. Source code and precompiled binaries for all major operating systems are available from http://www.nmr-relax.com.

Contact

edward@nmr-relax.com.",2014-04-09 +22308147,IDEOM: an Excel interface for analysis of LC-MS-based metabolomics data.,"

Summary

The application of emerging metabolomics technologies to the comprehensive investigation of cellular biochemistry has been limited by bottlenecks in data processing, particularly noise filtering and metabolite identification. IDEOM provides a user-friendly data processing application that automates filtering and identification of metabolite peaks, paying particular attention to common sources of noise and false identifications generated by liquid chromatography-mass spectrometry (LC-MS) platforms. Building on advanced processing tools such as mzMatch and XCMS, it allows users to run a comprehensive pipeline for data analysis and visualization from a graphical user interface within Microsoft Excel, a familiar program for most biological scientists.

Availability and implementation

IDEOM is provided free of charge at http://mzmatch.sourceforge.net/ideom.html, as a macro-enabled spreadsheet (.xlsb). Implementation requires Microsoft Excel (2007 or later). R is also required for full functionality.

Contact

michael.barrett@glasgow.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-04 +22563068,Joint Bayesian inference of condition-specific miRNA and transcription factor activities from combined gene and microRNA expression data.,"

Motivation

There have been many successful experimental and bioinformatics efforts to elucidate transcription factor (TF)-target networks in several organisms. For many organisms, these annotations are complemented by miRNA-target networks of good quality. Attempts that use these networks in combination with gene expression data to draw conclusions on TF or miRNA activity are, however, still relatively sparse.

Results

In this study, we propose Bayesian inference of regulation of transcriptional activity (BIRTA) as a novel approach to infer both, TF and miRNA activities, from combined miRNA and mRNA expression data in a condition specific way. That means our model explains mRNA and miRNA expression for a specific experimental condition by the activities of certain miRNAs and TFs, hence allowing for differentiating between switches from active to inactive (negative switch) and inactive to active (positive switch) forms. Extensive simulations of our model reveal its good prediction performance in comparison to other approaches. Furthermore, the utility of BIRTA is demonstrated at the example of Escherichia coli data comparing aerobic and anaerobic growth conditions, and by human expression data from pancreas and ovarian cancer.

Availability and implementation

The method is implemented in the R package birta, which is freely available for Bio-conductor (>=2.10) on http://www.bioconductor.org/packages/release/bioc/html/birta.html.",2012-05-04 +23895164,THetA: inferring intra-tumor heterogeneity from high-throughput DNA sequencing data.,"Tumor samples are typically heterogeneous, containing admixture by normal, non-cancerous cells and one or more subpopulations of cancerous cells. Whole-genome sequencing of a tumor sample yields reads from this mixture, but does not directly reveal the cell of origin for each read. We introduce THetA (Tumor Heterogeneity Analysis), an algorithm that infers the most likely collection of genomes and their proportions in a sample, for the case where copy number aberrations distinguish subpopulations. THetA successfully estimates normal admixture and recovers clonal and subclonal copy number aberrations in real and simulated sequencing data. THetA is available at http://compbio.cs.brown.edu/software/.",2013-07-29 +21999641,Chipster: user-friendly analysis software for microarray and other high-throughput data.,"

Background

The growth of high-throughput technologies such as microarrays and next generation sequencing has been accompanied by active research in data analysis methodology, producing new analysis methods at a rapid pace. While most of the newly developed methods are freely available, their use requires substantial computational skills. In order to enable non-programming biologists to benefit from the method development in a timely manner, we have created the Chipster software.

Results

Chipster (http://chipster.csc.fi/) brings a powerful collection of data analysis methods within the reach of bioscientists via its intuitive graphical user interface. Users can analyze and integrate different data types such as gene expression, miRNA and aCGH. The analysis functionality is complemented with rich interactive visualizations, allowing users to select datapoints and create new gene lists based on these selections. Importantly, users can save the performed analysis steps as reusable, automatic workflows, which can also be shared with other users. Being a versatile and easily extendable platform, Chipster can be used for microarray, proteomics and sequencing data. In this article we describe its comprehensive collection of analysis and visualization tools for microarray data using three case studies.

Conclusions

Chipster is a user-friendly analysis software for high-throughput data. Its intuitive graphical user interface enables biologists to access a powerful collection of data analysis and integration tools, and to visualize data interactively. Users can collaborate by sharing analysis sessions and workflows. Chipster is open source, and the server installation package is freely available.",2011-10-14 +24063684,"Impaired binding of standard initiation factors eIF3b, eIF4G and eIF4B to domain V of the live-attenuated coxsackievirus B3 Sabin3-like IRES--alternatives for 5'UTR-related cardiovirulence mechanisms.","Internal ribosome entry site (IRES) elements fold into highly organized conserved secondary and probably tertiary structures that guide the ribosome to an internal site of the RNA at the IRES 3'end. The composition of the cellular proteome is under the control of multiple processes, one of the most important being translation initiation. In each poliovirus Sabin vaccine strain, a single point mutation in the IRES secondary-structure domain V is a major determinant of neurovirulence and translation attenuation. Here we are extrapolating poliovirus findings to a genomic related virus named coxsackievirus B3 CVB3); a causative agent of viral myocarditis. We have previously reported that Sabin3-like mutation (U473 → C) introduced in the domain V sequence of the CVB3 IRES led to a defective mutant with a serious reduction in translation efficiency and ribosomal initiation complex assembly, besides an impaired RNA-protein binding pattern. With the aim to identify proteins interacting with both CVB3 wild-type and Sabin3-like domain V RNAs and to assess the effect of the Sabin3-like mutation on these potential interactions, we have used a proteomic approach. This procedure allowed the identification of three RNA-binding proteins interacting with the domain V: eIF4G (p220), eIF3b (p116) and eIF4B (p80). Moreover, we report that this single-nucleotide exchange impairs the interaction pattern and the binding affinity of these standard translation initiation factors within the IRES domain V of the mutant strain. Taken together, these data indicate how this decisive Sabin3-like mutation mediates viral translation attenuation; playing a key role in the understanding of the cardiovirulence attenuation within this construct. Hence, these data provide further evidence for the crucial role of RNA structure for the IRES activity, and reinforce the idea of a distribution of function between the different IRES structural domains.

Virtual slide

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/6160165131045880.",2013-09-24 +30732046,First Report of Leaf Blight Caused by Nigrospora sphaerica on Curcuma in China.,"Curcuma (family Zingiberaceae) is commonly cultivated for the use of rhizomes within traditional Chinese medicines. In October 2009 and 2010, severe leaf blight was observed on Curcuma wenyujin Y.H. Chen & C. Ling (4) in fields located in Ruian, China. The area of cultivation in Ruian encompasses 90% of the production in Zhejiang Province. Disease incidence was approximately 90% of plants observed in affected fields. Early symptoms were yellow-to-brown, irregular-shaped lesions on the leaf margin or tip. After several days, lesions expanded along the mid-vein until the entire leaf was destroyed. Blighted leaves turned grayish to dark brown and withered, and severely affected plants died. Eight fungal isolates were recovered from symptomatic C. wenyujin leaves, collected from eight different fields, on potato dextrose agar (PDA). These fungal colonies were initially white, becoming light to dark gray and produced black, spherical to subspherical, single-celled conidia (14 to 17 × 12 to 15 μm), which were borne on a hyaline vesicle at the tip of the conidiophores. On the basis of these morphological features, the isolates appeared to be similar to Nigrospora sphaerica (2). Strain ZJW-1 was selected as a representative for molecular identification. Genomic DNA was extracted from the isolate, and the internal transcribed spacer (ITS) region of the ribosomal DNA (ITS1-5.8S-ITS2) was amplified using ITS1 (5'-TCCGTAGGTGAACCTGCGG-3') and ITS4 (5'-TCCTCCGCTTATTGATATGC-3') primers (3). The ITS region was further cloned and sequenced (GenBank Accession No. JF738028) and was 99% identical to N. sphaerica (GenBank Accession No. FJ478134.1). On the basis of morphological data and the ITS rDNA sequence, the isolate was determined to be N. sphaerica. Pathogenicity tests were conducted on four leaves of four C. wenyujin plants by placing agar pieces (5 mm in diameter) from 8-day-old cultures on pushpin-wounded leaves. An equal number of control plants were wounded and inoculated with noncolonized PDA agar pieces. Plants were placed in moist chambers at 25°C with a 12-h photoperiod. Brown-to-black lesions were observed on wounded leaves after 3 days and expanded to an average of 56 × 40 mm 15 days after inoculation. No symptoms developed on the control leaves. The pathogen was reisolated from the margins of necrotic tissues but not from the controls. The pathogen has been reported as a leaf pathogen on several hosts worldwide (1). To our knowledge, this is the first report of N. sphaerica as a leaf pathogen of C. wenyujin in China. Future research will focus primarily on management of this disease. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, USDA-ARS, Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , March 31, 2011. (2) E. W. Mason. Trans. Brit. Mycol. Soc. 12:152, 1927. (3) T. J. White et al. PCR Protocols: A Guide to Methods and Applications. Academic Press, San Diego, 1990. (4) J. Zhao et al. Molecules 15:7547, 2010.",2011-09-01 +25257702,Down-regulated miR-22 as predictive biomarkers for prognosis of epithelial ovarian cancer.,"

Background

Recent studies have demonstrated that microRNA-22 (miR-22) was deregulated in many types of cancers and was involved in various cellular processes related to carcinogenesis. However, the clinical significance and prognostic value of miR-22 in epithelial ovarian cancer (EOC) haven't been investigated.

Methods

109 pairs of fresh EOC tissue and matched adjacent normal tissue specimens were collected between May 2007 and March 2013. Real-time quantitative RT-PCR assay was performed to evaluate the expression levels of miR-22. The chi-square test was used to assess miR-22 expression with respect to clinicopathological parameters. The survival curves of the patients were determined using the Kaplan-Meier method and Cox regression, and the log-rank test was used for statistical evaluations.

Results

miR-22 expression in EOC tissues was significantly lower than that in matched normal adjacent tissues (mean ± SD: 1.944 ± 1.026 vs. 4.981 ± 1.507, P<0.0001). Low miR-22 expression level was correlated with FIGO stage (P=0.006), tumor grade (P=0.03), and lymph node metastases (P=0.01). Kaplan-Meier analysis with the log-rank test indicated that low miR-22 expression had a significant impact on overall survival (44.4% vs. 64.5%; P=0.005) and progression-free survival (23.5% vs. 52.6%; P=0.004).

Conclusions

Our data demonstrated that the expression of miR-22 was downregulated in EOC, and associated with overall survival as well as progression-free survival, suggesting that miR-22 could serve as an efficient prognostic factor for EOC patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_178.",2014-09-26 +25361681,"Expression of P16 in high-risk human papillomavirus related lesions of the uterine cervix in a government hospital, Malaysia.","

Background

Cervical cancer is one of the most common cancers affecting women worldwide. It is well established that human papilloma virus (HPV) infection is the prime risk factor in the development of cervical cancer. The current screening and diagnostic tests have limitations in identifying the range of lesions caused by HPV. The current study aims to evaluate the diagnostic value of p16 immunohistochemical (IHC) investigation in high-risk human papillomavirus (HR-HPV) related lesions of the uterine cervix in Hospital Tuanku Jaafar, Seremban, Malaysia.

Methods

A total of 75 cases were selected from the records of Pathology services, Hospital Tuanku Ja'afar, Seremban. The samples were collected in three separate groups (n=25 per group) as Carcinoma cervix, Carcinoma in situ and Chronic cervicitis. The demographic data of the patients and the representative paraffin blocks were retrieved from Hospital Tuanku Ja'afar, Seremban. The immunohistochemical staining with p16 and HPV 16 L1 were done on all cases. The staining intensity and density were observed and compared among the three groups of cases.

Results

Immunohistochemistry of p16INK4A staining shows nil (0/25) expression in the cervicitis patients, 72% (18/25) in CIN patients and 100% (25/25) in cervical carcinoma. HPV 16 L1 was positive in 100% (25/25) of cervicitis patients, 96% (24/25) of CIN patients and 40% (10/25) of cervical cancers patients. A chi square test was used to analyze the result and the obtained p value was <0.05.

Conclusion

p16 expression was strongly observed in cervical cancer and minimally observed in cervicitis. Thus indicating p16 immunohistochemistry investigations can aid in diagnosing the different categories of cervical lesions into benign, insitu and malignant.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_202.",2014-11-01 +23734622,Biotea: RDFizing PubMed Central in support for the paper as an interface to the Web of Data.,"

Background

The World Wide Web has become a dissemination platform for scientific and non-scientific publications. However, most of the information remains locked up in discrete documents that are not always interconnected or machine-readable. The connectivity tissue provided by RDF technology has not yet been widely used to support the generation of self-describing, machine-readable documents.

Results

In this paper, we present our approach to the generation of self-describing machine-readable scholarly documents. We understand the scientific document as an entry point and interface to the Web of Data. We have semantically processed the full-text, open-access subset of PubMed Central. Our RDF model and resulting dataset make extensive use of existing ontologies and semantic enrichment services. We expose our model, services, prototype, and datasets at http://biotea.idiginfo.org/

Conclusions

The semantic processing of biomedical literature presented in this paper embeds documents within the Web of Data and facilitates the execution of concept-based queries against the entire digital library. Our approach delivers a flexible and adaptable set of tools for metadata enrichment and semantic processing of biomedical documents. Our model delivers a semantically rich and highly interconnected dataset with self-describing content so that software can make effective use of it.",2013-04-15 +23716195,A method for finding consensus breakpoints in the cancer genome from copy number data.,"

Motivation

Recurrent DNA breakpoints in cancer genomes indicate the presence of critical functional elements for tumor development. Identifying them can help determine new therapeutic targets. High-dimensional DNA microarray experiments like arrayCGH afford the identification of DNA copy number breakpoints with high precision, offering a solid basis for computational estimation of recurrent breakpoint locations.

Results

We introduce a method for identification of recurrent breakpoints (consensus breakpoints) from copy number aberration datasets. The method is based on weighted kernel counting of breakpoints around genomic locations. Counts larger than expected by chance are considered significant. We show that the consensus breakpoints facilitate consensus segmentation of the samples. We apply our method to three arrayCGH datasets and show that by using consensus segmentation we achieve significant dimension reduction, which is useful for the task of prediction of tumor phenotype based on copy number data. We use our approach for classification of neuroblastoma tumors from different age groups and confirm the recent recommendation for the choice of age cut-off for differential treatment of 18 months. We also investigate the (epi)genetic properties at consensus breakpoint locations for seven datasets and show enrichment in overlap with important functional genomic regions.

Availability

Implementation in R of our approach can be found at http://www.mpi-inf.mpg.de/ ∼laura/FeatureGrouping.html.

Contact

laura@mpi-inf.mpg.de.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-28 +23376350,Filtering duplicate reads from 454 pyrosequencing data.,"

Motivation

Throughout the recent years, 454 pyrosequencing has emerged as an efficient alternative to traditional Sanger sequencing and is widely used in both de novo whole-genome sequencing and metagenomics. Especially the latter application is extremely sensitive to sequencing errors and artificially duplicated reads. Both are common in 454 pyrosequencing and can create a strong bias in the estimation of diversity and composition of a sample. To date, there are several tools that aim to remove both sequencing noise and duplicates. Nevertheless, duplicate removal is often based on nucleotide sequences rather than on the underlying flow values, which contain additional information.

Results

With the novel tool JATAC, we present an approach towards a more accurate duplicate removal by analysing flow values directly. Making use of previous findings on 454 flow data characteristics, we combine read clustering with Bayesian distance measures. Finally, we provide a benchmark with an existing algorithm.

Availability

JATAC is freely available under the General Public License from http://malde.org/ketil/jatac/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-01 +23828785,Rapid similarity search of proteins using alignments of domain arrangements.,"

Motivation

Homology search methods are dominated by the central paradigm that sequence similarity is a proxy for common ancestry and, by extension, functional similarity. For determining sequence similarity in proteins, most widely used methods use models of sequence evolution and compare amino-acid strings in search for conserved linear stretches. Probabilistic models or sequence profiles capture the position-specific variation in an alignment of homologous sequences and can identify conserved motifs or domains. While profile-based search methods are generally more accurate than simple sequence comparison methods, they tend to be computationally more demanding. In recent years, several methods have emerged that perform protein similarity searches based on domain composition. However, few methods have considered the linear arrangements of domains when conducting similarity searches, despite strong evidence that domain order can harbour considerable functional and evolutionary signal.

Results

Here, we introduce an alignment scheme that uses a classical dynamic programming approach to the global alignment of domains. We illustrate that representing proteins as strings of domains (domain arrangements) and comparing these strings globally allows for a both fast and sensitive homology search. Further, we demonstrate that the presented methods complement existing methods by finding similar proteins missed by popular amino-acid-based comparison methods.

Availability

An implementation of the presented algorithms, a web-based interface as well as a command-line program for batch searching against the UniProt database can be found at http://rads.uni-muenster.de. Furthermore, we provide a JAVA API for programmatic access to domain-string–based search methods.",2013-07-04 +24618469,pRESTO: a toolkit for processing high-throughput sequencing raw reads of lymphocyte receptor repertoires.,"

Unlabelled

Driven by dramatic technological improvements, large-scale characterization of lymphocyte receptor repertoires via high-throughput sequencing is now feasible. Although promising, the high germline and somatic diversity, especially of B-cell immunoglobulin repertoires, presents challenges for analysis requiring the development of specialized computational pipelines. We developed the REpertoire Sequencing TOolkit (pRESTO) for processing reads from high-throughput lymphocyte receptor studies. pRESTO processes raw sequences to produce error-corrected, sorted and annotated sequence sets, along with a wealth of metrics at each step. The toolkit supports multiplexed primer pools, single- or paired-end reads and emerging technologies that use single-molecule identifiers. pRESTO has been tested on data generated from Roche and Illumina platforms. It has a built-in capacity to parallelize the work between available processors and is able to efficiently process millions of sequences generated by typical high-throughput projects.

Availability and implementation

pRESTO is freely available for academic use. The software package and detailed tutorials may be downloaded from http://clip.med.yale.edu/presto.",2014-03-10 +25341068,"Udock, the interactive docking entertainment system.","Protein-protein interactions play a crucial role in biological processes. Protein docking calculations' goal is to predict, given two proteins of known structures, the associate conformation of the corresponding complex. Here, we present a new interactive protein docking system, Udock, that makes use of users' cognitive capabilities added up. In Udock, the users tackle simplified representations of protein structures and explore protein-protein interfaces' conformational space using a gamified interactive docking system with on the fly scoring. We assumed that if given appropriate tools, a naïve user's cognitive capabilities could provide relevant data for (1) the prediction of correct interfaces in binary protein complexes and (2) the identification of the experimental partner in interaction among a set of decoys. To explore this approach experimentally, we conducted a preliminary two week long playtest where the registered users could perform a cross-docking on a dataset comprising 4 binary protein complexes. The users explored almost all the surface of the proteins that were available in the dataset but favored certain regions that seemed more attractive as potential docking spots. These favored regions were located inside or nearby the experimental binding interface for 5 out of the 8 proteins in the dataset. For most of them, the best scores were obtained with the experimental partner. The alpha version of Udock is freely accessible at http://udock.fr.",2014-05-22 +23698862,ShereKhan--calculating exchange parameters in relaxation dispersion data from CPMG experiments.,"

Summary

Dynamics governing the function of biomolecule is usually described as exchange processes and can be monitored at atomic resolution with nuclear magnetic resonance (NMR) relaxation dispersion data. Here, we present a new tool for the analysis of CPMG relaxation dispersion profiles (ShereKhan). The web interface to ShereKhan provides a user-friendly environment for the analysis.

Availability

A stable version of ShereKhan, the web application and documentation are available at http://sherekhan.bionmr.org.

Contact

dole@nmr.mpibpc.mpg.de or mako@nmr.mpibpc.mpg.de.",2013-05-21 +24516529,Robust regression analysis of copy number variation data based on a univariate score.,"

Motivation

The discovery that copy number variants (CNVs) are widespread in the human genome has motivated development of numerous algorithms that attempt to detect CNVs from intensity data. However, all approaches are plagued by high false discovery rates. Further, because CNVs are characterized by two dimensions (length and intensity) it is unclear how to order called CNVs to prioritize experimental validation.

Results

We developed a univariate score that correlates with the likelihood that a CNV is true. This score can be used to order CNV calls in such a way that calls having larger scores are more likely to overlap a true CNV. We developed cnv.beast, a computationally efficient algorithm for calling CNVs that uses robust backward elimination regression to keep CNV calls with scores that exceed a user-defined threshold. Using an independent dataset that was measured using a different platform, we validated our score and showed that our approach performed better than six other currently-available methods.

Availability

cnv.beast is available at http://www.duke.edu/~asallen/Software.html.",2014-02-07 +23819639,The association between XPC Lys939Gln gene polymorphism and urinary bladder cancer susceptibility: a systematic review and meta-analysis.,"

Background

Numerous epidemiological studies have been conducted to explore the association between the Lys939Gln polymorphism of Xeroderma pigmentosum group C (XPC) gene and urinary bladder cancer susceptibility. However, the results remain inconclusive. In order to derive a more precise estimation of this relationship, a large and update meta-analysis was performed in this study.

Methods

A comprehensive search was conducted through researching MEDLINE, EMBASE, PubMed, Web of Science, China Biomedical Literature database (CBM) and China National Knowledge Infrastructure (CNKI) databases before June 2013. Crude odds ratios (ORs) with 95% confidence intervals (CIs) were calculated to estimate the strength of the association.

Results

A total of 12 studies with 4828 cases and 4890 controls for evaluating the XPC Lys939Gln polymorphism and urinary bladder cancer were included. Overall, there was significant associations between the XPC Lys939Gln polymorphism and urinary bladder cancer risk were found for homozygous model (OR = 1.352, 95% CL = 1.088-1.681), heterozygous model (OR = 1.354, 95% CL = 1.085-1.688), and allele comparison (OR = 1.109, 95% CL = 1.013-1.214). In subgroup analysis by ethnicity and source of controls, there were still significant associations detected in some genetic models.

Conclusion

Our meta-analysis suggested that the XPC Lys939Gln polymorphism contributed to the risk of urinary bladder cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1001118393101798.",2013-07-02 +23878509,"Circumpolar dataset of sequenced specimens of Promachocrinus kerguelensis (Echinodermata, Crinoidea).","This circumpolar dataset of the comatulid (Echinodermata: Crinoidea) Promachocrinus kerguelensis (Carpenter, 1888) from the Southern Ocean, documents biodiversity associated with the specimens sequenced in Hemery et al. (2012). The aim of Hemery et al. (2012) paper was to use phylogeographic and phylogenetic tools to assess the genetic diversity, demographic history and evolutionary relationships of this very common and abundant comatulid, in the context of the glacial history of the Antarctic and Sub-Antarctic shelves (Thatje et al. 2005, 2008). Over one thousand three hundred specimens (1307) used in this study were collected during seventeen cruises from 1996 to 2010, in eight regions of the Southern Ocean: Kerguelen Plateau, Davis Sea, Dumont d'Urville Sea, Ross Sea, Amundsen Sea, West Antarctic Peninsula, East Weddell Sea and Scotia Arc including the tip of the Antarctic Peninsula and the Bransfield Strait. We give here the metadata of this dataset, which lists sampling sources (cruise ID, ship name, sampling date, sampling gear), sampling sites (station, geographic coordinates, depth) and genetic data (phylogroup, haplotype, sequence ID) for each of the 1307 specimens. The identification of the specimens was controlled by an expert taxonomist specialist of crinoids (Marc Eléaume, Muséum national d'Histoire naturelle, Paris) and all the COI sequences were matched against those available on the Barcode of Life Data System (BOLD: http://www.boldsystems.org/index.php/IDS_OpenIdEngine). This dataset can be used by studies dealing with, among other interests, Antarctic and/or crinoid diversity (species richness, distribution patterns), biogeography or habitat / ecological niche modeling. This dataset is accessible through the GBIF network at http://ipt.biodiversity.aq/resource.do?r=proke.",2013-07-04 +23819658,OvidSP Medline-to-PubMed search filter translation: a methodology for extending search filter range to include PubMed's unique content.,"

Background

PubMed translations of OvidSP Medline search filters offer searchers improved ease of access. They may also facilitate access to PubMed's unique content, including citations for the most recently published biomedical evidence. Retrieving this content requires a search strategy comprising natural language terms ('textwords'), rather than Medical Subject Headings (MeSH). We describe a reproducible methodology that uses a validated PubMed search filter translation to create a textword-only strategy to extend retrieval to PubMed's unique heart failure literature.

Methods

We translated an OvidSP Medline heart failure search filter for PubMed and established version equivalence in terms of indexed literature retrieval. The PubMed version was then run within PubMed to identify citations retrieved by the filter's MeSH terms (Heart failure, Left ventricular dysfunction, and Cardiomyopathy). It was then rerun with the same MeSH terms restricted to searching on title and abstract fields (i.e. as 'textwords'). Citations retrieved by the MeSH search but not the textword search were isolated. Frequency analysis of their titles/abstracts identified natural language alternatives for those MeSH terms that performed less effectively as textwords. These terms were tested in combination to determine the best performing search string for reclaiming this 'lost set'. This string, restricted to searching on PubMed's unique content, was then combined with the validated PubMed translation to extend the filter's performance in this database.

Results

The PubMed heart failure filter retrieved 6829 citations. Of these, 834 (12%) failed to be retrieved when MeSH terms were converted to textwords. Frequency analysis of the 834 citations identified five high frequency natural language alternatives that could improve retrieval of this set (cardiac failure, cardiac resynchronization, left ventricular systolic dysfunction, left ventricular diastolic dysfunction, and LV dysfunction). Together these terms reclaimed 157/834 (18.8%) of lost citations.

Conclusions

MeSH terms facilitate precise searching in PubMed's indexed subset. They may, however, work less effectively as search terms prior to subject indexing. A validated PubMed search filter can be used to develop a supplementary textword-only search strategy to extend retrieval to PubMed's unique content. A PubMed heart failure search filter is available on the CareSearch website (http://www.caresearch.com.au) providing access to both indexed and non-indexed heart failure evidence.",2013-07-02 +23812998,The RNA Newton polytope and learnability of energy parameters.,"

Motivation

Computational RNA structure prediction is a mature important problem that has received a new wave of attention with the discovery of regulatory non-coding RNAs and the advent of high-throughput transcriptome sequencing. Despite nearly two score years of research on RNA secondary structure and RNA-RNA interaction prediction, the accuracy of the state-of-the-art algorithms are still far from satisfactory. So far, researchers have proposed increasingly complex energy models and improved parameter estimation methods, experimental and/or computational, in anticipation of endowing their methods with enough power to solve the problem. The output has disappointingly been only modest improvements, not matching the expectations. Even recent massively featured machine learning approaches were not able to break the barrier. Why is that?

Approach

The first step toward high-accuracy structure prediction is to pick an energy model that is inherently capable of predicting each and every one of known structures to date. In this article, we introduce the notion of learnability of the parameters of an energy model as a measure of such an inherent capability. We say that the parameters of an energy model are learnable iff there exists at least one set of such parameters that renders every known RNA structure to date the minimum free energy structure. We derive a necessary condition for the learnability and give a dynamic programming algorithm to assess it. Our algorithm computes the convex hull of the feature vectors of all feasible structures in the ensemble of a given input sequence. Interestingly, that convex hull coincides with the Newton polytope of the partition function as a polynomial in energy parameters. To the best of our knowledge, this is the first approach toward computing the RNA Newton polytope and a systematic assessment of the inherent capabilities of an energy model. The worst case complexity of our algorithm is exponential in the number of features. However, dimensionality reduction techniques can provide approximate solutions to avoid the curse of dimensionality.

Results

We demonstrated the application of our theory to a simple energy model consisting of a weighted count of A-U, C-G and G-U base pairs. Our results show that this simple energy model satisfies the necessary condition for more than half of the input unpseudoknotted sequence-structure pairs (55%) chosen from the RNA STRAND v2.0 database and severely violates the condition for ~ 13%, which provide a set of hard cases that require further investigation. From 1350 RNA strands, the observed 3D feature vector for 749 strands is on the surface of the computed polytope. For 289 RNA strands, the observed feature vector is not on the boundary of the polytope but its distance from the boundary is not more than one. A distance of one essentially means one base pair difference between the observed structure and the closest point on the boundary of the polytope, which need not be the feature vector of a structure. For 171 sequences, this distance is larger than two, and for only 11 sequences, this distance is larger than five.

Availability

The source code is available on http://compbio.cs.wayne.edu/software/rna-newton-polytope.",2013-07-01 +23818526,CHD@ZJU: a knowledgebase providing network-based research platform on coronary heart disease.,"Coronary heart disease (CHD), the leading cause of global morbidity and mortality in adults, has been reported to be associated with hundreds of genes. A comprehensive understanding of the CHD-related genes and their corresponding interactions is essential to advance the translational research on CHD. Accordingly, we construct this knowledgebase, CHD@ZJU, which records CHD-related information (genes, pathways, drugs and references) collected from different resources and through text-mining method followed by manual confirmation. In current release, CHD@ZJU contains 660 CHD-related genes, 45 common pathways and 1405 drugs accompanied with >8000 supporting references. Almost half of the genes collected in CHD@ZJU were novel to other publicly available CHD databases. Additionally, CHD@ZJU incorporated the protein-protein interactions to investigate the cross-talk within the pathways from a multi-layer network view. These functions offered by CHD@ZJU would allow researchers to dissect the molecular mechanism of CHD in a systematic manner and therefore facilitate the research on CHD-related multi-target therapeutic discovery. Database URL: http://tcm.zju.edu.cn/chd/",2013-07-01 +27480062,Prediction of Milk/Plasma Concentration Ratios of Drugs and Environmental Pollutants Using In Silico Tools: Classification and Regression Based QSARs and Pharmacophore Mapping.,"A large set of 185 compounds with diverse molecular structures and different mechanisms of therapeutic actions was used to develop and validate statistically significant classification and regression based QSTR models for predicting partitioning of drugs/chemicals into breast milk. Pharmacophore mapping was also carried out which showed four important features required for lower risk of secretion into milk: (i) hydrophobic group (HYD), (ii) ring aromatic group (RA), (iii) negative ionizable (NegIon) and (iv) hydrogen bond donor (HBA). QSTR and pharmacophore models were rigorously validated internally as well as externally to check the possibilities of any chance correlation and judge the predictive potential of the models. Pharmacological distribution diagrams (PDDs) were used for the classification model as a visualizing technique for the identification and selection of chemicals with lower partitioning into milk. Our in silico models enable to identify the essential structural attributes and quantify the prime molecular pre-requisites which were chiefly responsible for secretion into milk. The developed models were also implemented to screen milk/plasma partitioning potential for a huge number DrugBank database (http://www.drugbank.ca/) compounds.",2013-07-01 +23815072,Jenner-predict server: prediction of protein vaccine candidates (PVCs) in bacteria based on host-pathogen interactions.,"

Background

Subunit vaccines based on recombinant proteins have been effective in preventing infectious diseases and are expected to meet the demands of future vaccine development. Computational approach, especially reverse vaccinology (RV) method has enormous potential for identification of protein vaccine candidates (PVCs) from a proteome. The existing protective antigen prediction software and web servers have low prediction accuracy leading to limited applications for vaccine development. Besides machine learning techniques, those software and web servers have considered only protein's adhesin-likeliness as criterion for identification of PVCs. Several non-adhesin functional classes of proteins involved in host-pathogen interactions and pathogenesis are known to provide protection against bacterial infections. Therefore, knowledge of bacterial pathogenesis has potential to identify PVCs.

Results

A web server, Jenner-Predict, has been developed for prediction of PVCs from proteomes of bacterial pathogens. The web server targets host-pathogen interactions and pathogenesis by considering known functional domains from protein classes such as adhesin, virulence, invasin, porin, flagellin, colonization, toxin, choline-binding, penicillin-binding, transferring-binding, fibronectin-binding and solute-binding. It predicts non-cytosolic proteins containing above domains as PVCs. It also provides vaccine potential of PVCs in terms of their possible immunogenicity by comparing with experimentally known IEDB epitopes, absence of autoimmunity and conservation in different strains. Predicted PVCs are prioritized so that only few prospective PVCs could be validated experimentally. The performance of web server was evaluated against known protective antigens from diverse classes of bacteria reported in Protegen database and datasets used for VaxiJen server development. The web server efficiently predicted known vaccine candidates reported from Streptococcus pneumoniae and Escherichia coli proteomes. The Jenner-Predict server outperformed NERVE, Vaxign and VaxiJen methods. It has sensitivity of 0.774 and 0.711 for Protegen and VaxiJen dataset, respectively while specificity of 0.940 has been obtained for the latter dataset.

Conclusions

Better prediction accuracy of Jenner-Predict web server signifies that domains involved in host-pathogen interactions and pathogenesis are better criteria for prediction of PVCs. The web server has successfully predicted maximum known PVCs belonging to different functional classes. Jenner-Predict server is freely accessible at http://117.211.115.67/vaccine/home.html.",2013-07-01 +22075998,ENCODE whole-genome data in the UCSC Genome Browser: update 2012.,"The Encyclopedia of DNA Elements (ENCODE) Consortium is entering its 5th year of production-level effort generating high-quality whole-genome functional annotations of the human genome. The past year has brought the ENCODE compendium of functional elements to critical mass, with a diverse set of 27 biochemical assays now covering 200 distinct human cell types. Within the mouse genome, which has been under study by ENCODE groups for the past 2 years, 37 cell types have been assayed. Over 2000 individual experiments have been completed and submitted to the Data Coordination Center for public use. UCSC makes this data available on the quality-reviewed public Genome Browser (http://genome.ucsc.edu) and on an early-access Preview Browser (http://genome-preview.ucsc.edu). Visual browsing, data mining and download of raw and processed data files are all supported. An ENCODE portal (http://encodeproject.org) provides specialized tools and information about the ENCODE data sets.",2011-11-09 +24649511,Enterococcal Genomics,"Enterococcal genomics is a rapidly growing area of study. The first enterococcal genome sequence—that of Enterococcus faecalis V583—was published ten years ago (McShan & Shankar, 2002; Paulsen, et al., 2003), and complete or draft genome sequences of various enterococcal strains and species now number in the hundreds (http://www.ncbi.nlm.nih.gov/genome). Concurrent with rapid advances in genome sequencing, the sequencing-based classification scheme of multilocus sequence typing (MLST) has been used to interrogate population structures of the two enterococcal species that are most associated with human health and disease, E. faecalis and Enterococcus faecium. These two species also constitute the bulk of enterococcal genome sequence data that has been generated to date. This wealth of genomic data has allowed for an investigation of enterococcal diversity at a depth not previously achievable. Genomic studies in enterococci have been driven by overarching questions, such as: Why do multiple species of enterococci exist that inhabit seemingly identical niches, such as E. faecalis and E. faecium in the human gut, and what ecological factors have contributed to their divergence from a common ancestor? Within an enterococcal species such as E. faecalis or E. faecium, what qualities distinguish one strain from another? Are infection- or hospital-derived strains evolutionarily distinct from strains that benignly co-exist in the complex microbial consortium of the healthy human intestine? Related to this, have antibiotic use and the nosocomial environment led to changes in the enterococcal genome and/or its population structure? This chapter highlights major advances in enterococcal genomics, including the development of MLST schemes to study the population structures of E. faecalis and E. faecium; comparative genome hybridization (CGH) studies to catalog the genomic contents of hundreds of E. faecalis and E. faecium strains; and significant findings from genome sequencing of multiple enterococcal species, beginning with the discovery and sequencing of the E. faecalis pathogenicity island (PAI). Additionally, we review the use of genome resequencing as a tool to study the short-term evolution of E. faecalis and the use of metagenomics to assemble in situ enterococcal genomes. In concluding the chapter, we discuss future perspectives in enterococcal genomics, including pressing questions that should drive future research in this field. While comparative genomics in enterococci has rapidly advanced over the last ten years, the number of genomes discussed here pales in comparison to what has been emerging—136 enterococcal genomes have been sequenced as part of the Human Microbiome Project (http://www.hmpdacc.org/), and 406 more were sequenced in a large-scale enterococcal genome sequencing endeavor performed in a multi-national collaboration with the Broad Institute (Cambridge, MA). Clearly, our foray into enterococcal genomics has only just begun.",2014-03-21 +22085896,ARSyN: a method for the identification and removal of systematic noise in multifactorial time course microarray experiments.,"Transcriptomic profiling experiments that aim to the identification of responsive genes in specific biological conditions are commonly set up under defined experimental designs that try to assess the effects of factors and their interactions on gene expression. Data from these controlled experiments, however, may also contain sources of unwanted noise that can distort the signal under study, affect the residuals of applied statistical models, and hamper data analysis. Commonly, normalization methods are applied to transcriptomics data to remove technical artifacts, but these are normally based on general assumptions of transcript distribution and greatly ignore both the characteristics of the experiment under consideration and the coordinative nature of gene expression. In this paper, we propose a novel methodology, ARSyN, for the preprocessing of microarray data that takes into account these 2 last aspects. By combining analysis of variance (ANOVA) modeling of gene expression values and multivariate analysis of estimated effects, the method identifies the nonstructured part of the signal associated to the experimental factors (the noise within the signal) and the structured variation of the ANOVA errors (the signal of the noise). By removing these noise fractions from the original data, we create a filtered data set that is rich in the information of interest and includes only the random noise required for inferential analysis. In this work, we focus on multifactorial time course microarray (MTCM) experiments with 2 factors: one quantitative such as time or dosage and the other qualitative, as tissue, strain, or treatment. However, the method can be used in other situations such as experiments with only one factor or more complex designs with more than 2 factors. The filtered data obtained after applying ARSyN can be further analyzed with the appropriate statistical technique to obtain the biological information required. To evaluate the performance of the filtering strategy, we have applied different statistical approaches for MTCM analysis to several real and simulated data sets, studying also the efficiency of these techniques. By comparing the results obtained with the original and ARSyN filtered data and also with other filtering techniques, we can conclude that the proposed method increases the statistical power to detect biological signals, especially in cases where there are high levels of structural noise. Software for ARSyN is freely available at http://www.ua.es/personal/mj.nueda.",2011-11-14 +23300665,Crumple: a method for complete enumeration of all possible pseudoknot-free RNA secondary structures.,"The diverse landscape of RNA conformational space includes many canyons and crevices that are distant from the lowest minimum free energy valley and remain unexplored by traditional RNA structure prediction methods. A complete description of the entire RNA folding landscape can facilitate identification of biologically important conformations. The Crumple algorithm rapidly enumerates all possible non-pseudoknotted structures for an RNA sequence without consideration of thermodynamics while filtering the output with experimental data. The Crumple algorithm provides an alternative approach to traditional free energy minimization programs for RNA secondary structure prediction. A complete computation of all non-pseudoknotted secondary structures can reveal structures that would not be predicted by methods that sample the RNA folding landscape based on thermodynamic predictions. The free energy minimization approach is often successful but is limited by not considering RNA tertiary and protein interactions and the possibility that kinetics rather than thermodynamics determines the functional RNA fold. Efficient parallel computing and filters based on experimental data make practical the complete enumeration of all non-pseudoknotted structures. Efficient parallel computing for Crumple is implemented in a ring graph approach. Filters for experimental data include constraints from chemical probing of solvent accessibility, enzymatic cleavage of paired or unpaired nucleotides, phylogenetic covariation, and the minimum number and lengths of helices determined from crystallography or cryo-electron microscopy. The minimum number and length of helices has a significant effect on reducing conformational space. Pairing constraints reduce conformational space more than single nucleotide constraints. Examples with Alfalfa Mosaic Virus RNA and Trypanosome brucei guide RNA demonstrate the importance of evaluating all possible structures when pseduoknots, RNA-protein interactions, and metastable structures are important for biological function. Crumple software is freely available at http://adenosine.chem.ou.edu/software.html.",2012-12-27 +24397582,X13CMS: global tracking of isotopic labels in untargeted metabolomics.,"Studies of isotopically labeled compounds have been fundamental to understanding metabolic pathways and fluxes. They have traditionally, however, been used in conjunction with targeted analyses that identify and quantify a limited number of labeled downstream metabolites. Here we describe an alternative workflow that leverages recent advances in untargeted metabolomic technologies to track the fates of isotopically labeled metabolites in a global, unbiased manner. This untargeted approach can be applied to discover novel biochemical pathways and characterize changes in the fates of labeled metabolites as a function of altered biological conditions such as disease. To facilitate the data analysis, we introduce X(13)CMS, an extension of the widely used mass spectrometry-based metabolomic software package XCMS. X(13)CMS uses the XCMS platform to detect metabolite peaks and perform retention-time alignment in liquid chromatography/mass spectrometry (LC/MS) data. With the use of the XCMS output, the program then identifies isotopologue groups that correspond to isotopically labeled compounds. The retrieval of these groups is done without any a priori knowledge besides the following input parameters: (i) the mass difference between the unlabeled and labeled isotopes, (ii) the mass accuracy of the instrument used in the analysis, and (iii) the estimated retention-time reproducibility of the chromatographic method. Despite its name, X(13)CMS can be used to track any isotopic label. Additionally, it detects differential labeling patterns in biological samples collected from parallel control and experimental conditions. We validated the ability of X(13)CMS to accurately retrieve labeled metabolites from complex biological matrices both with targeted LC/MS/MS analysis of a subset of the hits identified by the program and with labeled standards spiked into cell extracts. We demonstrate the full functionality of X(13)CMS with an analysis of cultured rat astrocytes treated with uniformly labeled (U-)(13)C-glucose during lipopolysaccharide (LPS) challenge. Our results show that out of 223 isotopologue groups enriched from U-(13)C-glucose, 95 have statistically significant differential labeling patterns in astrocytes challenged with LPS compared to unchallenged control cells. Only two of these groups overlap with the 32 differentially regulated peaks identified by XCMS, indicating that X(13)CMS uncovers different and complementary information from untargeted metabolomic studies. Like XCMS, X(13)CMS is implemented in R. It is available from our laboratory website at http://pattilab.wustl.edu/x13cms.php .",2014-01-24 +23821598,Robust identification of local adaptation from allele frequencies.,"Comparing allele frequencies among populations that differ in environment has long been a tool for detecting loci involved in local adaptation. However, such analyses are complicated by an imperfect knowledge of population allele frequencies and neutral correlations of allele frequencies among populations due to shared population history and gene flow. Here we develop a set of methods to robustly test for unusual allele frequency patterns and correlations between environmental variables and allele frequencies while accounting for these complications based on a Bayesian model previously implemented in the software Bayenv. Using this model, we calculate a set of ""standardized allele frequencies"" that allows investigators to apply tests of their choice to multiple populations while accounting for sampling and covariance due to population history. We illustrate this first by showing that these standardized frequencies can be used to detect nonparametric correlations with environmental variables; these correlations are also less prone to spurious results due to outlier populations. We then demonstrate how these standardized allele frequencies can be used to construct a test to detect SNPs that deviate strongly from neutral population structure. This test is conceptually related to FST and is shown to be more powerful, as we account for population history. We also extend the model to next-generation sequencing of population pools-a cost-efficient way to estimate population allele frequencies, but one that introduces an additional level of sampling noise. The utility of these methods is demonstrated in simulations and by reanalyzing human SNP data from the Human Genome Diversity Panel populations and pooled next-generation sequencing data from Atlantic herring. An implementation of our method is available from http://gcbias.org.",2013-07-02 +23888102,Graphical contig analyzer for all sequencing platforms (G4ALL): a new stand-alone tool for finishing and draft generation of bacterial genomes.,"

Unlabelled

Genome assembly has always been complicated due to the inherent difficulties of sequencing technologies, as well the computational methods used to process sequences. Although many of the problems for the generation of contigs from reads are well known, especially those involving short reads, the orientation and ordination of contigs in the finishing stages is still very challenging and time consuming, as it requires the manual curation of the contigs to guarantee correct identification them and prevent misassembly. Due to the large numbers of sequences that are produced, especially from the reads produced by next generation sequencers, this process demands considerable manual effort, and there are few software options available to facilitate the process. To address this problem, we have developed the Graphic Contig Analyzer for All Sequencing Platforms (G4ALL): a stand-alone multi-user tool that facilitates the editing of the contigs produced in the assembly process. Besides providing information on the gene products contained in each contig, obtained through a search of the available biological databases, G4ALL produces a scaffold of the genome, based on the overlap of the contigs after curation.

Availability

THE SOFTWARE IS AVAILABLE AT: http://www.genoma.ufpa.br/rramos/softwares/g4all.xhtml.",2013-06-29 +25161663,A hidden Markov model for haplotype inference for present-absent data of clustered genes using identified haplotypes and haplotype patterns.,"The majority of killer cell immunoglobin-like receptor (KIR) genes are detected as either present or absent using locus-specific genotyping technology. Ambiguity arises from the presence of a specific KIR gene since the exact copy number (one or two) of that gene is unknown. Therefore, haplotype inference for these genes is becoming more challenging due to such large portion of missing information. Meantime, many haplotypes and partial haplotype patterns have been previously identified due to tight linkage disequilibrium (LD) among these clustered genes thus can be incorporated to facilitate haplotype inference. In this paper, we developed a hidden Markov model (HMM) based method that can incorporate identified haplotypes or partial haplotype patterns for haplotype inference from present-absent data of clustered genes (e.g., KIR genes). We compared its performance with an expectation maximization (EM) based method previously developed in terms of haplotype assignments and haplotype frequency estimation through extensive simulations for KIR genes. The simulation results showed that the new HMM based method outperformed the previous method when some incorrect haplotypes were included as identified haplotypes and/or the standard deviation of haplotype frequencies were small. We also compared the performance of our method with two methods that do not use previously identified haplotypes and haplotype patterns, including an EM based method, HPALORE, and a HMM based method, MaCH. Our simulation results showed that the incorporation of identified haplotypes and partial haplotype patterns can improve accuracy for haplotype inference. The new software package HaploHMM is available and can be downloaded at http://www.soph.uab.edu/ssg/files/People/KZhang/HaploHMM/haplohmm-index.html.",2014-08-12 +21831268,Efficient counting of k-mers in DNA sequences using a bloom filter.,"

Background

Counting k-mers (substrings of length k in DNA sequence data) is an essential component of many methods in bioinformatics, including for genome and transcriptome assembly, for metagenomic sequencing, and for error correction of sequence reads. Although simple in principle, counting k-mers in large modern sequence data sets can easily overwhelm the memory capacity of standard computers. In current data sets, a large fraction-often more than 50%-of the storage capacity may be spent on storing k-mers that contain sequencing errors and which are typically observed only a single time in the data. These singleton k-mers are uninformative for many algorithms without some kind of error correction.

Results

We present a new method that identifies all the k-mers that occur more than once in a DNA sequence data set. Our method does this using a Bloom filter, a probabilistic data structure that stores all the observed k-mers implicitly in memory with greatly reduced memory requirements. We then make a second sweep through the data to provide exact counts of all nonunique k-mers. For example data sets, we report up to 50% savings in memory usage compared to current software, with modest costs in computational speed. This approach may reduce memory requirements for any algorithm that starts by counting k-mers in sequence data with errors.

Conclusions

A reference implementation for this methodology, BFCounter, is written in C++ and is GPL licensed. It is available for free download at http://pritch.bsd.uchicago.edu/bfcounter.html.",2011-08-10 +24497547,Integrating influenza antigenic dynamics with molecular evolution.,"Influenza viruses undergo continual antigenic evolution allowing mutant viruses to evade host immunity acquired to previous virus strains. Antigenic phenotype is often assessed through pairwise measurement of cross-reactivity between influenza strains using the hemagglutination inhibition (HI) assay. Here, we extend previous approaches to antigenic cartography, and simultaneously characterize antigenic and genetic evolution by modeling the diffusion of antigenic phenotype over a shared virus phylogeny. Using HI data from influenza lineages A/H3N2, A/H1N1, B/Victoria and B/Yamagata, we determine patterns of antigenic drift across viral lineages, showing that A/H3N2 evolves faster and in a more punctuated fashion than other influenza lineages. We also show that year-to-year antigenic drift appears to drive incidence patterns within each influenza lineage. This work makes possible substantial future advances in investigating the dynamics of influenza and other antigenically-variable pathogens by providing a model that intimately combines molecular and antigenic evolution. DOI: http://dx.doi.org/10.7554/eLife.01914.001.",2014-02-04 +23171871,Cohort profile: the skin cancer after organ transplant study.,"The Skin Cancer after Organ Transplant (SCOT) study was designed to investigate the link between genus beta human papillomavirus (HPV) and squamous cell skin cancer (SCSC). We focused on a population receiving immunosuppressive therapy for extended periods, transplant patients, as they are at extremely high risk for developing SCSC. Two complementary projects were conducted in the Seattle area: (i) a retrospective cohort with interview data from 2004 recipients of renal or cardiac transplants between 1995 and 2010 and (ii) a prospective cohort with interview data from 328 people on the transplant waiting lists between 2009 and 2011. Within the retrospective cohort, we developed a nested case-control study (172 cases and 337 control subjects) to assess risk of SCSC associated with markers of HPV in SCSC tumour tissue and eyebrow hair bulb DNA (HPV genotypes) and blood (HPV antibodies). In the prospective cohort, 135 participants had a 1-year post-transplant visit and 71 completed a 2-year post-transplant visit. In both arms of the cohort, we collected samples to assess markers of HPV infection such as acquisition of new types, proportion positive for each type, persistence of types at consecutive visits and number of HPV types detected. In the prospective cohort, we will also examine these HPV markers in relation to levels of cell-mediated immunity. The goal of the SCOT study is to use the data we collected to gain a more complete understanding of the role of immune suppression in HPV kinetics and of genus beta HPV types in SCSC. For more information, please contact the principal investigator through the study website: http://www.fhcrc.org/science/phs/cerc/The_SCOT_Study.html.",2012-11-21 +24339543,Phenotypic spectrum in uniparental disomy: Low incidence or lack of study?,"

Context

Alterations in the human chromosomal complement are expressed phenotypically ranging from (i) normal, via (ii) frequent fetal loss in otherwise normal person, to (iii) sub-clinical to severe mental retardation and dysmorphism in live births. A subtle and microscopically undetectable chromosomal alteration is uniparental disomy (UPD), which is known to be associated with distinct birth defects as per the chromosome involved and parental origin. UPD can be evident due to imprinted genes and/or activation of recessive mutations.

Aims

The present study comprises of data mining of published UPD cases with a focus on associated phenotypes. The goal was to identify non-random and recurrent associations between UPD and various genetic conditions, which can possibly indicate the presence of new imprinted genes.

Settings and design

Data mining was carried out using the homepage ""http://www.fish.uniklinikum-jena.de/UPD.html."", an online catalog of published cases with UPD.

Materials and methods

The UPD cases having normal karyotype and with or without clinical findings were selected to analyze the associated phenotypes for each chromosome, maternal or paternal involved in UPD.

Results

Our results revealed many genetic conditions (other than the known UPD syndromes) to be associated with UPD. Even in cases of bad obstetric history as well as normal individuals chance detection of UPD has been reported.

Conclusions

The role of UPD in human genetic disorders needs to be studied by involving larger cohorts of individuals with birth defects as well as normal population. The genetic conditions were scrutinized in terms of inheritance patterns; majority of these were autosomal recessive indicating the role of UPD as an underlying mechanism.",2013-07-01 +23812988,Identifying proteins controlling key disease signaling pathways.,"

Motivation

Several types of studies, including genome-wide association studies and RNA interference screens, strive to link genes to diseases. Although these approaches have had some success, genetic variants are often only present in a small subset of the population, and screens are noisy with low overlap between experiments in different labs. Neither provides a mechanistic model explaining how identified genes impact the disease of interest or the dynamics of the pathways those genes regulate. Such mechanistic models could be used to accurately predict downstream effects of knocking down pathway members and allow comprehensive exploration of the effects of targeting pairs or higher-order combinations of genes.

Results

We developed methods to model the activation of signaling and dynamic regulatory networks involved in disease progression. Our model, SDREM, integrates static and time series data to link proteins and the pathways they regulate in these networks. SDREM uses prior information about proteins' likelihood of involvement in a disease (e.g. from screens) to improve the quality of the predicted signaling pathways. We used our algorithms to study the human immune response to H1N1 influenza infection. The resulting networks correctly identified many of the known pathways and transcriptional regulators of this disease. Furthermore, they accurately predict RNA interference effects and can be used to infer genetic interactions, greatly improving over other methods suggested for this task. Applying our method to the more pathogenic H5N1 influenza allowed us to identify several strain-specific targets of this infection.

Availability

SDREM is available from http://sb.cs.cmu.edu/sdrem.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +23060610,CD-HIT: accelerated for clustering the next-generation sequencing data.,"

Summary

CD-HIT is a widely used program for clustering biological sequences to reduce sequence redundancy and improve the performance of other sequence analyses. In response to the rapid increase in the amount of sequencing data produced by the next-generation sequencing technologies, we have developed a new CD-HIT program accelerated with a novel parallelization strategy and some other techniques to allow efficient clustering of such datasets. Our tests demonstrated very good speedup derived from the parallelization for up to ∼24 cores and a quasi-linear speedup for up to ∼8 cores. The enhanced CD-HIT is capable of handling very large datasets in much shorter time than previous versions.

Availability

http://cd-hit.org.

Contact

liwz@sdsc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-11 +23918250,Scaffold network generator: a tool for mining molecular structures.,"

Summary

Scaffold network generator (SNG) is an open-source command-line utility that computes the hierarchical network of scaffolds that define a large set of input molecules. Scaffold networks are useful for visualizing, analysing and understanding the chemical data that is increasingly available through large public repositories like PubChem. For example, some groups have used scaffold networks to identify missed-actives in high-throughput screens of small molecules with bioassays. Substantially improving on existing software, SNG is robust enough to work on millions of molecules at a time with a simple command-line interface.

Availability and implementation

SNG is accessible at http://swami.wustl.edu/sng",2013-08-05 +23050565,Cluster-based assessment of protein-protein interaction confidence.,"

Background

Protein-protein interaction networks are key to a systems-level understanding of cellular biology. However, interaction data can contain a considerable fraction of false positives. Several methods have been proposed to assess the confidence of individual interactions. Most of them require the integration of additional data like protein expression and interaction homology information. While being certainly useful, such additional data are not always available and may introduce additional bias and ambiguity.

Results

We propose a novel, network topology based interaction confidence assessment method called CAPPIC (cluster-based assessment of protein-protein interaction confidence). It exploits the network's inherent modular architecture for assessing the confidence of individual interactions. Our method determines algorithmic parameters intrinsically and does not require any parameter input or reference sets for confidence scoring.

Conclusions

On the basis of five yeast and two human physical interactome maps inferred using different techniques, we show that CAPPIC reliably assesses interaction confidence and its performance compares well to other approaches that are also based on network topology. The confidence score correlates with the agreement in localization and biological process annotations of interacting proteins. Moreover, it corroborates experimental evidence of physical interactions. Our method is not limited to physical interactome maps as we exemplify with a large yeast genetic interaction network. An implementation of CAPPIC is available at http://intscore.molgen.mpg.de.",2012-10-10 +23554899,DendroBLAST: approximate phylogenetic trees in the absence of multiple sequence alignments.,"The rapidly growing availability of genome information has created considerable demand for both fast and accurate phylogenetic inference algorithms. We present a novel method called DendroBLAST for reconstructing phylogenetic dendrograms/trees from protein sequences using BLAST. This method differs from other methods by incorporating a simple model of sequence evolution to test the effect of introducing sequence changes on the reliability of the bipartitions in the inferred tree. Using realistic simulated sequence data we demonstrate that this method produces phylogenetic trees that are more accurate than other commonly-used distance based methods though not as accurate as maximum likelihood methods from good quality multiple sequence alignments. In addition to tests on simulated data, we use DendroBLAST to generate input trees for a supertree reconstruction of the phylogeny of the Archaea. This independent analysis produces an approximate phylogeny of the Archaea that has both high precision and recall when compared to previously published analysis of the same dataset using conventional methods. Taken together these results demonstrate that approximate phylogenetic trees can be produced in the absence of multiple sequence alignments, and we propose that these trees will provide a platform for improving and informing downstream bioinformatic analysis. A web implementation of the DendroBLAST method is freely available for use at http://www.dendroblast.com/.",2013-03-15 +24364864,Engineered DNA sequence syntax inspector.,"DNAs encoding polypeptides often contain design errors that cause experiments to prematurely fail. One class of design errors is incorrect or missing elements in the DNA, here termed syntax errors. We have identified three major causes of syntax errors: point mutations from sequencing or manual data entry, gene structure misannotation, and unintended open reading frames (ORFs). The Engineered DNA Sequence Syntax Inspector (EDSSI) is an online bioinformatics pipeline that checks for syntax errors through three steps. First, ORF prediction in input DNA sequences is done by GeneMark; next, homologous sequences are retrieved by BLAST, and finally, syntax errors in the protein sequence are predicted by using the SIFT algorithm. We show that the EDSSI is able to identify previously published examples of syntactical errors and also show that our indel addition to the SIFT program is 97% accurate on a test set of Escherichia coli proteins. The EDSSI is available at http://andersonlab.qb3.berkeley.edu/Software/EDSSI/ .",2014-01-03 +23803311,Consensus and conflict cards for metabolic pathway databases.,"

Background

The metabolic network of H. sapiens and many other organisms is described in multiple pathway databases. The level of agreement between these descriptions, however, has proven to be low. We can use these different descriptions to our advantage by identifying conflicting information and combining their knowledge into a single, more accurate, and more complete description. This task is, however, far from trivial.

Results

We introduce the concept of Consensus and Conflict Cards (C₂Cards) to provide concise overviews of what the databases do or do not agree on. Each card is centered at a single gene, EC number or reaction. These three complementary perspectives make it possible to distinguish disagreements on the underlying biology of a metabolic process from differences that can be explained by different decisions on how and in what detail to represent knowledge. As a proof-of-concept, we implemented C₂Cards(Human), as a web application http://www.molgenis.org/c2cards, covering five human pathway databases.

Conclusions

C₂Cards can contribute to ongoing reconciliation efforts by simplifying the identification of consensus and conflicts between pathway databases and lowering the threshold for experts to contribute. Several case studies illustrate the potential of the C₂Cards in identifying disagreements on the underlying biology of a metabolic process. The overviews may also point out controversial biological knowledge that should be subject of further research. Finally, the examples provided emphasize the importance of manual curation and the need for a broad community involvement.",2013-06-26 +24835488,BosFinder: a novel pre-microRNA gene prediction algorithm in Bos taurus.,"MicroRNAs (miRNAs) are small non-coding RNAs that modulate gene expression transcriptionally (transcriptional activation or inactivation) and/or post-transcriptionally (translation inhibition or degradation of their target mRNAs). This phenomenon has significant roles in growth and developmental processes in plants and animals. Bos taurus is one of the most important livestock animals, having great importance in food and economical sciences and industries. However, limited information is available on Bos taurus constituent miRNAs because its whole genome assembly has been only recently published. Therefore, computational methods have been essential tools in miRNA gene prediction and discovery. Among these, machine-learning-based approaches are used to characterize genome scale pre-miRNAs from expressed sequence tags (ESTs). In this study, a support vector machine model was used to classify 33 structural and thermodynamic features of pre-miRNA genes. Public bovine EST data were obtained from different tissues in various developmental stages. A new algorithm, called BosFinder, was developed to identify and annotate the whole genome's derived pre-miRNAs. We found 18 776 highly potential pre-miRNA sequences. This is the first genome survey report of Bos taurus based on a machine-learning method for pre-miRNA gene finding. The bosfinder program is freely available at http://lbb.ut.ac.ir/Download/LBBsoft/BosFinder/.",2014-05-17 +22262732,A powerful test for multiple rare variants association studies that incorporates sequencing qualities.,"Next-generation sequencing data will soon become routinely available for association studies between complex traits and rare variants. Sequencing data, however, are characterized by the presence of sequencing errors at each individual genotype. This makes it especially challenging to perform association studies of rare variants, which, due to their low minor allele frequencies, can be easily perturbed by genotype errors. In this article, we develop the quality-weighted multivariate score association test (qMSAT), a new procedure that allows powerful association tests between complex traits and multiple rare variants under the presence of sequencing errors. Simulation results based on quality scores from real data show that the qMSAT often dominates over current methods, that do not utilize quality information. In particular, the qMSAT can dramatically increase power over existing methods under moderate sample sizes and relatively low coverage. Moreover, in an obesity data study, we identified using the qMSAT two functional regions (MGLL promoter and MGLL 3'-untranslated region) where rare variants are associated with extreme obesity. Due to the high cost of sequencing data, the qMSAT is especially valuable for large-scale studies involving rare variants, as it can potentially increase power without additional experimental cost. qMSAT is freely available at http://qmsat.sourceforge.net/.",2012-01-19 +22962468,Techniques to cope with missing data in host-pathogen protein interaction prediction.,"

Motivation

Approaches that use supervised machine learning techniques for protein-protein interaction (PPI) prediction typically use features obtained by integrating several sources of data. Often certain attributes of the data are not available, resulting in missing values. In particular, our host-pathogen PPI datasets have a large fraction, in the range of 58-85% of missing values, which makes it challenging to apply machine learning algorithms.

Results

We show that specialized techniques for missing value imputation can improve the performance of the models significantly. We use cross species information in combination with machine learning techniques like Group lasso with ℓ(1)/ℓ(2) regularization. We demonstrate the benefits of our approach on two PPI prediction problems. In our first example of Salmonella-human PPI prediction, we are able to obtain high prediction accuracies with 77.6% precision and 84% recall. Comparison with various other techniques shows an improvement of 9 in F1 score over the next best technique. We also apply our method to Yersinia-human PPI prediction successfully, demonstrating the generality of our approach.

Availability

Predicted interactions, datasets, features are available at: http://www.cs.cmu.edu/~mkshirsa/eccb2012_paper46.html.

Contact

judithks@cs.cmu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-01 +24596152,Structure of the SAS-6 cartwheel hub from Leishmania major.,"Centrioles are cylindrical cell organelles with a ninefold symmetric peripheral microtubule array that is essential to template cilia and flagella. They are built around a central cartwheel assembly that is organized through homo-oligomerization of the centriolar protein SAS-6, but whether SAS-6 self-assembly can dictate cartwheel and thereby centriole symmetry is unclear. Here we show that Leishmania major SAS-6 crystallizes as a 9-fold symmetric cartwheel and provide the X-ray structure of this assembly at a resolution of 3.5 Å. We furthermore demonstrate that oligomerization of Leishmania SAS-6 can be inhibited by a small molecule in vitro and provide indications for its binding site. Our results firmly establish that SAS-6 can impose cartwheel symmetry on its own and indicate how this process might occur mechanistically in vivo. Importantly, our data also provide a proof-of-principle that inhibition of SAS-6 oligomerization by small molecules is feasible. DOI: http://dx.doi.org/10.7554/eLife.01812.001.",2014-01-01 +25412848,Metaplastic breast carcinomas display genomic and transcriptomic heterogeneity [corrected]. .,"Metaplastic breast carcinoma is a rare and aggressive histologic type of breast cancer, preferentially displaying a triple-negative phenotype. We sought to define the transcriptomic heterogeneity of metaplastic breast cancers on the basis of current gene expression microarray-based classifiers, and to determine whether these tumors display gene copy number profiles consistent with those of BRCA1-associated breast cancers. Twenty-eight consecutive triple-negative metaplastic breast carcinomas were reviewed, and the metaplastic component present in each frozen specimen was defined (ie, spindle cell, squamous, chondroid metaplasia). RNA and DNA extracted from frozen sections with tumor cell content >60% were subjected to gene expression (Illumina HumanHT-12 v4) and copy number profiling (Affymetrix SNP 6.0), respectively. Using the best practice PAM50/claudin-low microarray-based classifier, all metaplastic breast carcinomas with spindle cell metaplasia were of claudin-low subtype, whereas those with squamous or chondroid metaplasia were preferentially of basal-like subtype. Triple-negative breast cancer subtyping using a dedicated website (http://cbc.mc.vanderbilt.edu/tnbc/) revealed that all metaplastic breast carcinomas with chondroid metaplasia were of mesenchymal-like subtype, spindle cell carcinomas preferentially of unstable or mesenchymal stem-like subtype, and those with squamous metaplasia were of multiple subtypes. None of the cases was classified as immunomodulatory or luminal androgen receptor subtype. Integrative clustering, combining gene expression and gene copy number data, revealed that metaplastic breast carcinomas with spindle cell and chondroid metaplasia were preferentially classified as of integrative clusters 4 and 9, respectively, whereas those with squamous metaplasia were classified into six different clusters. Eight of the 26 metaplastic breast cancers subjected to SNP6 analysis were classified as BRCA1-like. The diversity of histologic features of metaplastic breast carcinomas is reflected at the transcriptomic level, and an association between molecular subtypes and histology was observed. BRCA1-like genomic profiles were found only in a subset (31%) of metaplastic breast cancers, and were not associated with a specific molecular or histologic subtype.",2014-11-21 +24875684,Positive technology: a free mobile platform for the self-management of psychological stress.,"We describe the main features and preliminary evaluation of Positive Technology, a free mobile platform for the self-management of psychological stress (http://positiveapp.info/). The mobile platform features three main components: (i) guided relaxation, which provides the user with the opportunity of browsing a gallery of relaxation music and video-narrative resources for reducing stress; (ii) 3D biofeedback, which helps the user learning to control his/her responses, by visualizing variations of heart rate in an engaging 3D environment; (iii) stress tracking, by the recording of heart rate and self-reports. We evaluated the Positive Technology app in an online trial involving 32 participants, out of which 7 used the application in combination with the wrist sensor. Overall, feedback from users was satisfactory and the analysis of data collected online indicated the capability of the app for reducing perceived stress levels. A future goal is to improve the usability of the application and include more advanced stress monitoring features, based on the analysis of heart rate variability indexes.",2014-01-01 +23826173,Genomer--a Swiss army knife for genome scaffolding.,"The increasing accessibility and reduced costs of sequencing has made genome analysis accessible to more and more researchers. Yet there remains a steep learning curve in the subsequent computational steps required to process raw reads into a database-deposited genome sequence. Here we describe ""Genomer,"" a tool to simplify the manual tasks of finishing and uploading a genome sequence to a database. Genomer can format a genome scaffold into the common files required for submission to GenBank. This software also simplifies updating a genome scaffold by allowing a human-readable YAML format file to be edited instead of large sequence files. Genomer is written as a command line tool and is an effort to make the manual process of genome scaffolding more robust and reproducible. Extensive documentation and video tutorials are available at http://next.gs.",2013-06-24 +22484508,KOMA: ELISA-microarray calibration and data analysis based on kinetic signal amplification.,"Antibody microarrays with enzyme-linked immunosorbent technology are used for quantitative, simultaneous and high-throughput analysis of multiple proteins in a single probe. Kinetic detection can significantly improve precision and quantification range of microarray measurements. Here we present the open source software Kinetic Operating Microarray Analyzer (KOMA) that enables calibration and high-throughput analysis of quantitative microarray data collected using a time-resolved kinetic detection protocol of the enzymatic signal. This tool can also be helpful for analyzing data from any other analytical assays employing enzymatic signal amplification, in which a broader range of quantification is reached by the time-resolved recording of readouts. KOMA is open for download at http://www.uni-heidelberg.de/fakultaeten/biowissenschaften/ipmb/biologie/woelfl/Research.html together with a set of test raw data and requires R version 2.12 and Java RE version 6.0.",2012-03-30 +23542069,Radiofrequency ablation and endoscopic mucosal resection for dysplastic barrett's esophagus and early esophageal adenocarcinoma: outcomes of the UK National Halo RFA Registry.,"

Background & aims

Patients with Barrett's esophagus (BE) and high-grade dysplasia (HGD) or early neoplasia increasingly receive endoscopic mucosal resection and radiofrequency ablation (RFA) therapy. We analyzed data from a UK registry that follows the outcomes of patients with BE who have undergone RFA for neoplasia.

Methods

We collected data on 335 patients with BE and neoplasia (72% with HGD, 24% with intramucosal cancer, 4% with low-grade dysplasia [mean age, 69 years; 81% male]), treated at 19 centers in the United Kingdom from July 2008 through August 2012. Mean length of BE segments was 5.8 cm (range, 1-20 cm). Patients' nodules were removed by endoscopic mucosal resection, and the patients then underwent RFA every 3 months until all areas of BE were ablated or cancer developed. Biopsies were collected 12 months after the first RFA; clearance of HGD, dysplasia, and BE were assessed.

Results

HGD was cleared from 86% of patients, all dysplasia from 81%, and BE from 62% at the 12-month time point, after a mean of 2.5 (range, 2-6) RFA procedures. Complete reversal dysplasia was 15% less likely for every 1-cm increment in BE length (odds ratio = 1.156; SE = 0.048; 95% confidence interval: 1.07-1.26; P < .001). Endoscopic mucosal resection before RFA did not provide any benefit. Invasive cancer developed in 10 patients (3%) by the 12-month time point and disease had progressed in 17 patients (5.1%) after a median follow-up time of 19 months. Symptomatic strictures developed in 9% of patients and were treated by endoscopic dilatation. Nineteen months after therapy began, 94% of patients remained clear of dysplasia.

Conclusions

We analyzed data from a large series of patients in the United Kingdom who underwent RFA for BE-related neoplasia and found that by 12 months after treatment, dysplasia was cleared from 81%. Shorter segments of BE respond better to RFA; http://www.controlled-trials.com, number ISRCTN93069556.",2013-03-28 +22962477,Comprehensive estimation of input signals and dynamics in biochemical reaction networks.,"

Motivation

Cellular information processing can be described mathematically using differential equations. Often, external stimulation of cells by compounds such as drugs or hormones leading to activation has to be considered. Mathematically, the stimulus is represented by a time-dependent input function. Parameters such as rate constants of the molecular interactions are often unknown and need to be estimated from experimental data, e.g. by maximum likelihood estimation. For this purpose, the input function has to be defined for all times of the integration interval. This is usually achieved by approximating the input by interpolation or smoothing of the measured data. This procedure is suboptimal since the input uncertainties are not considered in the estimation process which often leads to overoptimistic confidence intervals of the inferred parameters and the model dynamics.

Results

This article presents a new approach which includes the input estimation into the estimation process of the dynamical model parameters by minimizing an objective function containing all parameters simultaneously. We applied this comprehensive approach to an illustrative model with simulated data and compared it to alternative methods. Statistical analyses revealed that our method improves the prediction of the model dynamics and the confidence intervals leading to a proper coverage of the confidence intervals of the dynamic parameters. The method was applied to the JAK-STAT signaling pathway.

Availability

MATLAB code is available on the authors' website http://www.fdmold.uni-freiburg.de/~schelker/.

Contact

max.schelker@fdm.uni-freiburg.de

Supplementary information

Additional information is available at Bioinformatics Online.",2012-09-01 +23129301,HD-CNV: hotspot detector for copy number variants.,"

Summary

Copy number variants (CNVs) are a major source of genetic variation. Comparing CNVs between samples is important in elucidating their potential effects in a wide variety of biological contexts. HD-CNV (hotspot detector for copy number variants) is a tool for downstream analysis of previously identified CNV regions from multiple samples, and it detects recurrent regions by finding cliques in an interval graph generated from the input. It creates a unique graphical representation of the data, as well as summary spreadsheets and UCSC (University of California, Santa Cruz) Genome Browser track files. The interval graph, when viewed with other software or by automated graph analysis, is useful in identifying genomic regions of interest for further study.

Availability and implementation

HD-CNV is an open source Java code and is freely available, with tutorials and sample data from http://daleylab.org.

Contact

jcamer7@uwo.ca",2012-11-04 +23413433,An approximate Bayesian approach for mapping paired-end DNA reads to a reference genome.,"

Summary

Many high-throughput sequencing experiments produce paired DNA reads. Paired-end DNA reads provide extra positional information that is useful in reliable mapping of short reads to a reference genome, as well as in downstream analyses of structural variations. Given the importance of paired-end alignments, it is surprising that there have been no previous publications focusing on this topic. In this article, we present a new probabilistic framework to predict the alignment of paired-end reads to a reference genome. Using both simulated and real data, we compare the performance of our method with six other read-mapping tools that provide a paired-end option. We show that our method provides a good combination of accuracy, error rate and computation time, especially in more challenging and practical cases, such as when the reference genome is incomplete or unavailable for the sample, or when there are large variations between the reference genome and the source of the reads. An open-source implementation of our method is available as part of Last, a multi-purpose alignment program freely available at http://last.cbrc.jp.

Contact

martin@cbrc.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-14 +22238575,CATCHprofiles: clustering and alignment tool for ChIP profiles.,"Chromatin Immuno Precipitation (ChIP) profiling detects in vivo protein-DNA binding, and has revealed a large combinatorial complexity in the binding of chromatin associated proteins and their post-translational modifications. To fully explore the spatial and combinatorial patterns in ChIP-profiling data and detect potentially meaningful patterns, the areas of enrichment must be aligned and clustered, which is an algorithmically and computationally challenging task. We have developed CATCHprofiles, a novel tool for exhaustive pattern detection in ChIP profiling data. CATCHprofiles is built upon a computationally efficient implementation for the exhaustive alignment and hierarchical clustering of ChIP profiling data. The tool features a graphical interface for examination and browsing of the clustering results. CATCHprofiles requires no prior knowledge about functional sites, detects known binding patterns ""ab initio"", and enables the detection of new patterns from ChIP data at a high resolution, exemplified by the detection of asymmetric histone and histone modification patterns around H2A.Z-enriched sites. CATCHprofiles' capability for exhaustive analysis combined with its ease-of-use makes it an invaluable tool for explorative research based on ChIP profiling data. CATCHprofiles and the CATCH algorithm run on all platforms and is available for free through the CATCH website: http://catch.cmbi.ru.nl/. User support is available by subscribing to the mailing list catch-users@bioinformatics.org.",2012-01-04 +23793516,"MuPIT interactive: webserver for mapping variant positions to annotated, interactive 3D structures.","Mutation position imaging toolbox (MuPIT) interactive is a browser-based application for single-nucleotide variants (SNVs), which automatically maps the genomic coordinates of SNVs onto the coordinates of available three-dimensional (3D) protein structures. The application is designed for interactive browser-based visualization of the putative functional relevance of SNVs by biologists who are not necessarily experts either in bioinformatics or protein structure. Users may submit batches of several thousand SNVs and review all protein structures that cover the SNVs, including available functional annotations such as binding sites, mutagenesis experiments, and common polymorphisms. Multiple SNVs may be mapped onto each structure, enabling 3D visualization of SNV clusters and their relationship to functionally annotated positions. We illustrate the utility of MuPIT interactive in rationalizing the impact of selected polymorphisms in the PharmGKB database, somatic mutations identified in the Cancer Genome Atlas study of invasive breast carcinomas, and rare variants identified in the exome sequencing project. MuPIT interactive is freely available for non-profit use at http://mupit.icm.jhu.edu .",2013-06-23 +24569478,Synaptotagmin 7 functions as a Ca2+-sensor for synaptic vesicle replenishment.,"Synaptotagmin (syt) 7 is one of three syt isoforms found in all metazoans; it is ubiquitously expressed, yet its function in neurons remains obscure. Here, we resolved Ca(2+)-dependent and Ca(2+)-independent synaptic vesicle (SV) replenishment pathways, and found that syt 7 plays a selective and critical role in the Ca(2+)-dependent pathway. Mutations that disrupt Ca(2+)-binding to syt 7 abolish this function, suggesting that syt 7 functions as a Ca(2+)-sensor for replenishment. The Ca(2+)-binding protein calmodulin (CaM) has also been implicated in SV replenishment, and we found that loss of syt 7 was phenocopied by a CaM antagonist. Moreover, we discovered that syt 7 binds to CaM in a highly specific and Ca(2+)-dependent manner; this interaction requires intact Ca(2+)-binding sites within syt 7. Together, these data indicate that a complex of two conserved Ca(2+)-binding proteins, syt 7 and CaM, serve as a key regulator of SV replenishment in presynaptic nerve terminals. DOI: http://dx.doi.org/10.7554/eLife.01524.001.",2014-02-25 +22216865,Constructing logical models of gene regulatory networks by integrating transcription factor-DNA interactions with expression data: an entropy-based approach.,"Models of gene regulatory networks (GRNs) attempt to explain the complex processes that determine cells' behavior, such as differentiation, metabolism, and the cell cycle. The advent of high-throughput data generation technologies has allowed researchers to fit theoretical models to experimental data on gene-expression profiles. GRNs are often represented using logical models. These models require that real-valued measurements be converted to discrete levels, such as on/off, but the discretization often introduces inconsistencies into the data. Dimitrova et al. posed the problem of efficiently finding a parsimonious resolution of the introduced inconsistencies. We show that reconstruction of a logical GRN that minimizes the errors is NP-complete, so that an efficient exact algorithm for the problem is not likely to exist. We present a probabilistic formulation of the problem that circumvents discretization of expression data. We phrase the problem of error reduction as a minimum entropy problem, develop a heuristic algorithm for it, and evaluate its performance on mouse embryonic stem cell data. The constructed model displays high consistency with prior biological knowledge. Despite the oversimplification of a discrete model, we show that it is superior to raw experimental measurements and demonstrates a highly significant level of identical regulatory logic among co-regulated genes. A software implementing the method is freely available at: http://acgt.cs.tau.ac.il/modent.",2012-01-01 +23851377,CoLIde: a bioinformatics tool for CO-expression-based small RNA Loci Identification using high-throughput sequencing data.,"Small RNAs (sRNAs) are 20-25 nt non-coding RNAs that act as guides for the highly sequence-specific regulatory mechanism known as RNA silencing. Due to the recent increase in sequencing depth, a highly complex and diverse population of sRNAs in both plants and animals has been revealed. However, the exponential increase in sequencing data has also made the identification of individual sRNA transcripts corresponding to biological units (sRNA loci) more challenging when based exclusively on the genomic location of the constituent sRNAs, hindering existing approaches to identify sRNA loci. To infer the location of significant biological units, we propose an approach for sRNA loci detection called CoLIde (Co-expression based sRNA Loci Identification) that combines genomic location with the analysis of other information such as variation in expression levels (expression pattern) and size class distribution. For CoLIde, we define a locus as a union of regions sharing the same pattern and located in close proximity on the genome. Biological relevance, detected through the analysis of size class distribution, is also calculated for each locus. CoLIde can be applied on ordered (e.g., time-dependent) or un-ordered (e.g., organ, mutant) series of samples both with or without biological/technical replicates. The method reliably identifies known types of loci and shows improved performance on sequencing data from both plants (e.g., A. thaliana, S. lycopersicum) and animals (e.g., D. melanogaster) when compared with existing locus detection techniques. CoLIde is available for use within the UEA Small RNA Workbench which can be downloaded from: http://srna-workbench.cmp.uea.ac.uk.",2013-06-28 +22327835,Comparative analysis of algorithms for integration of copy number and expression data.,"Chromosomal instability is a hallmark of cancer, and genes that display abnormal expression in aberrant chromosomal regions are likely to be key players in tumor progression. Identifying such driver genes reliably requires computational methods that can integrate genome-scale data from several sources. We compared the performance of ten algorithms that integrate copy-number and transcriptomics data from 15 head and neck squamous cell carcinoma cell lines, 129 lung squamous cell carcinoma primary tumors and simulated data. Our results revealed clear differences between the methods in terms of sensitivity and specificity as well as in their performance in small and large sample sizes. Results of the comparison are available at http://csbi.ltdk.helsinki.fi/cn2gealgo/.",2012-02-12 +23203885,Factorbook.org: a Wiki-based database for transcription factor-binding data generated by the ENCODE consortium.,"The Encyclopedia of DNA Elements (ENCODE) consortium aims to identify all functional elements in the human genome including transcripts, transcriptional regulatory regions, along with their chromatin states and DNA methylation patterns. The ENCODE project generates data utilizing a variety of techniques that can enrich for regulatory regions, such as chromatin immunoprecipitation (ChIP), micrococcal nuclease (MNase) digestion and DNase I digestion, followed by deeply sequencing the resulting DNA. As part of the ENCODE project, we have developed a Web-accessible repository accessible at http://factorbook.org. In Wiki format, factorbook is a transcription factor (TF)-centric repository of all ENCODE ChIP-seq datasets on TF-binding regions, as well as the rich analysis results of these data. In the first release, factorbook contains 457 ChIP-seq datasets on 119 TFs in a number of human cell lines, the average profiles of histone modifications and nucleosome positioning around the TF-binding regions, sequence motifs enriched in the regions and the distance and orientation preferences between motif sites.",2012-11-29 +21719554,British Cardiovascular Intervention Society Registry for audit and quality assessment of percutaneous coronary interventions in the United Kingdom.,"

Aims

To create an inclusive and accurate registry of all percutaneous coronary intervention (PCI) procedures performed in the UK for audit to assess quality of care, drive improvements in this care and to provide data for research.

Interventions

Feedback to PCI centres with 'live' online data analysis and structured monthly and quarterly reports of PCI activity, including process of care measures and assessment of risk-adjusted outcome. Annual national reports focused on the structure of the provision of PCI across the UK, the appropriateness and process of its delivery and outcomes.

Setting

All hospitals performing PCI in the UK.

Years

1994 to present.

Population

Consecutive patients treated by PCI. Approximately 80,000 new procedures each year in recent years.

Startpoints

All attempts to perform a PCI procedure. This is defined as when any coronary device is used to approach, probe or cross one or more coronary lesions, with the intention of performing a coronary intervention.

Baseline data

113 variables defining patient demographic features, indications for PCI, procedural details and outcomes up to time of hospital discharge.

Data capture

Data entry into local software systems by caregivers and data clerks, with subsequent encryption and internet transfer to central data servers.

Data quality

Local validation, range checks and consistency assessments during upload. No external validation. Feedback of data completeness to all units.

Access to data

Available for research by application to British Cardiovascular Intervention Society using a data sharing agreement which can be obtained at http://www.bcis.org.uk.",2011-06-30 +24965847,H-DROP: an SVM based helical domain linker predictor trained with features optimized by combining random forest and stepwise selection.,"Domain linker prediction is attracting much interest as it can help identifying novel domains suitable for high throughput proteomics analysis. Here, we report H-DROP, an SVM-based Helical Domain linker pRediction using OPtimal features. H-DROP is, to the best of our knowledge, the first predictor for specifically and effectively identifying helical linkers. This was made possible first because a large training dataset became available from IS-Dom, and second because we selected a small number of optimal features from a huge number of potential ones. The training helical linker dataset, which included 261 helical linkers, was constructed by detecting helical residues at the boundary regions of two independent structural domains listed in our previously reported IS-Dom dataset. 45 optimal feature candidates were selected from 3,000 features by random forest, which were further reduced to 26 optimal features by stepwise selection. The prediction sensitivity and precision of H-DROP were 35.2 and 38.8%, respectively. These values were over 10.7% higher than those of control methods including our previously developed DROP, which is a coil linker predictor, and PPRODO, which is trained with un-differentiated domain boundary sequences. Overall, these results indicated that helical linkers can be predicted from sequence information alone by using a strictly curated training data set for helical linkers and carefully selected set of optimal features. H-DROP is available at http://domserv.lab.tuat.ac.jp.",2014-06-26 +21551150,Creating views on integrated multidomain data.,"

Motivation

Modern data acquisition methods in biology allow the procurement of different types of data in increasing quantity, facilitating a comprehensive view of biological systems. As data are usually gathered and interpreted by separate domain scientists, it is hard to grasp multidomain properties and structures. Consequently, there is a need for the integration of biological data from different sources and of different types in one application, providing various visualization approaches.

Results

In this article, methods for the integration and visualization of multimodal biological data are presented. This is achieved based on two graphs representing the meta-relations between biological data and the measurement combinations, respectively. Both graphs are linked and serve as different views of the integrated data with navigation and exploration possibilities. Data can be combined and visualized multifariously, resulting in views of the integrated biological data.

Availability

http://vanted.ipk-gatersleben.de/hive/.

Contact

rohn@ipk-gatersleben.de.",2011-05-06 +22080549,GeneWeaver: a web-based system for integrative functional genomics.,"High-throughput genome technologies have produced a wealth of data on the association of genes and gene products to biological functions. Investigators have discovered value in combining their experimental results with published genome-wide association studies, quantitative trait locus, microarray, RNA-sequencing and mutant phenotyping studies to identify gene-function associations across diverse experiments, species, conditions, behaviors or biological processes. These experimental results are typically derived from disparate data repositories, publication supplements or reconstructions from primary data stores. This leaves bench biologists with the complex and unscalable task of integrating data by identifying and gathering relevant studies, reanalyzing primary data, unifying gene identifiers and applying ad hoc computational analysis to the integrated set. The freely available GeneWeaver (http://www.GeneWeaver.org) powered by the Ontological Discovery Environment is a curated repository of genomic experimental results with an accompanying tool set for dynamic integration of these data sets, enabling users to interactively address questions about sets of biological functions and their relations to sets of genes. Thus, large numbers of independently published genomic results can be organized into new conceptual frameworks driven by the underlying, inferred biological relationships rather than a pre-existing semantic framework. An empirical 'ontology' is discovered from the aggregate of experimental knowledge around user-defined areas of biological inquiry.",2011-11-12 +23805260,Prediction and Analysis of Post-Translational Pyruvoyl Residue Modification Sites from Internal Serines in Proteins.,"Most of pyruvoyl-dependent proteins observed in prokaryotes and eukaryotes are critical regulatory enzymes, which are primary targets of inhibitors for anti-cancer and anti-parasitic therapy. These proteins undergo an autocatalytic, intramolecular self-cleavage reaction in which a covalently bound pyruvoyl group is generated on a conserved serine residue. Traditional detections of the modified serine sites are performed by experimental approaches, which are often labor-intensive and time-consuming. In this study, we initiated in an attempt for the computational predictions of such serine sites with Feature Selection based on a Random Forest. Since only a small number of experimentally verified pyruvoyl-modified proteins are collected in the protein database at its current version, we only used a small dataset in this study. After removing proteins with sequence identities >60%, a non-redundant dataset was generated and was used, which contained only 46 proteins, with one pyruvoyl serine site for each protein. Several types of features were considered in our method including PSSM conservation scores, disorders, secondary structures, solvent accessibilities, amino acid factors and amino acid occurrence frequencies. As a result, a pretty good performance was achieved in our dataset. The best 100.00% accuracy and 1.0000 MCC value were obtained from the training dataset, and 93.75% accuracy and 0.8441 MCC value from the testing dataset. The optimal feature set contained 9 features. Analysis of the optimal feature set indicated the important roles of some specific features in determining the pyruvoyl-group-serine sites, which were consistent with several results of earlier experimental studies. These selected features may shed some light on the in-depth understanding of the mechanism of the post-translational self-maturation process, providing guidelines for experimental validation. Future work should be made as more pyruvoyl-modified proteins are found and the method should be evaluated on larger datasets. At last, the predicting software can be downloaded from http://www.nkbiox.com/sub/pyrupred/index.html.",2013-06-21 +23805255,Elevation of Eosinophil-Derived Neurotoxin in Plasma of the Subjects with Aspirin-Exacerbated Respiratory Disease: A Possible Peripheral Blood Protein Biomarker.,"Aspirin-exacerbated respiratory disease (AERD) remains widely underdiagnosed in asthmatics, primarily due to insufficient awareness of the relationship between aspirin ingestion and asthma exacerbation. The identification of aspirin hypersensitivity is therefore essential to avoid serious aspirin complications. The goal of the study was to develop plasma biomarkers to predict AERD. We identified differentially expressed genes in peripheral blood mononuclear cells (PBMC) between subjects with AERD and those with aspirin-tolerant asthma (ATA). The genes were matched with the secreted protein database (http://spd.cbi.pku.edu.cn/) to select candidate proteins in the plasma. Plasma levels of the candidate proteins were then measured in AERD (n = 40) and ATA (n = 40) subjects using an enzyme-linked immunosorbent assay (ELISA). Target genes were validated as AERD biomarkers using an ROC curve analysis. From 175 differentially expressed genes (p-value <0.0001) that were queried to the secreted protein database, 11 secreted proteins were retrieved. The gene expression patterns were predicted as elevated for 7 genes and decreased for 4 genes in AERD as compared with ATA subjects. Among these genes, significantly higher levels of plasma eosinophil-derived neurotoxin (RNASE2) were observed in AERD as compared with ATA subjects (70(14.62∼311.92) µg/ml vs. 12(2.55∼272.84) µg/ml, p-value <0.0003). Based on the ROC curve analysis, the AUC was 0.74 (p-value = 0.0001, asymptotic 95% confidence interval [lower bound: 0.62, upper bound: 0.83]) with 95% sensitivity, 60% specificity, and a cut-off value of 27.15 µg/ml. Eosinophil-derived neurotoxin represents a novel biomarker to distinguish AERD from ATA.",2013-06-21 +22931062,The simple fool's guide to population genomics via RNA-Seq: an introduction to high-throughput sequencing data analysis.,"High-throughput sequencing technologies are currently revolutionizing the field of biology and medicine, yet bioinformatic challenges in analysing very large data sets have slowed the adoption of these technologies by the community of population biologists. We introduce the 'Simple Fool's Guide to Population Genomics via RNA-seq' (SFG), a document intended to serve as an easy-to-follow protocol, walking a user through one example of high-throughput sequencing data analysis of nonmodel organisms. It is by no means an exhaustive protocol, but rather serves as an introduction to the bioinformatic methods used in population genomics, enabling a user to gain familiarity with basic analysis steps. The SFG consists of two parts. This document summarizes the steps needed and lays out the basic themes for each and a simple approach to follow. The second document is the full SFG, publicly available at http://sfg.stanford.edu, that includes detailed protocols for data processing and analysis, along with a repository of custom-made scripts and sample files. Steps included in the SFG range from tissue collection to de novo assembly, blast annotation, alignment, gene expression, functional enrichment, SNP detection, principal components and F(ST) outlier analyses. Although the technical aspects of population genomics are changing very quickly, our hope is that this document will help population biologists with little to no background in high-throughput sequencing and bioinformatics to more quickly adopt these new techniques.",2012-08-29 +22905315,jPopGen Suite: population genetic analysis of DNA polymorphism from nucleotide sequences with errors.,"1. Next-generation sequencing (NGS) is being increasingly used in ecological and evolutionary studies. Though promising, NGS is known to be error-prone. Sequencing error can cause significant bias for population genetic analysis of a sequence sample.2. We present jPopGen Suite, an integrated tool for population genetic analysis of DNA polymorphisms from nucleotide sequences. It is specially designed for data with a non-negligible error rate, although it serves well for ""error-free"" data. It implements several methods for estimating the population mutation rate, population growth rate, and conducting neutrality tests.3. jPopGen Suite facilitates the population genetic analysis of NGS data in various applications, and is freely available for non-commercial users at http://sites.google.com/site/jpopgen/.",2012-03-02 +22923307,FacPad: Bayesian sparse factor modeling for the inference of pathways responsive to drug treatment.,"

Motivation

It is well recognized that the effects of drugs are far beyond targeting individual proteins, but rather influencing the complex interactions among many relevant biological pathways. Genome-wide expression profiling before and after drug treatment has become a powerful approach for capturing a global snapshot of cellular response to drugs, as well as to understand drugs' mechanism of action. Therefore, it is of great interest to analyze this type of transcriptomic profiling data for the identification of pathways responsive to different drugs. However, few computational tools exist for this task.

Results

We have developed FacPad, a Bayesian sparse factor model, for the inference of pathways responsive to drug treatments. This model represents biological pathways as latent factors and aims to describe the variation among drug-induced gene expression alternations in terms of a much smaller number of latent factors. We applied this model to the Connectivity Map data set (build 02) and demonstrated that FacPad is able to identify many drug-pathway associations, some of which have been validated in the literature. Although this method was originally designed for the analysis of drug-induced transcriptional alternation data, it can be naturally applied to many other settings beyond polypharmacology.

Availability and implementation

The R package 'FacPad' is publically available at: http://cran.open-source-solution.org/web/packages/FacPad/.",2012-08-24 +25447629,"Update on pharmacological cardiac stress testing: efficacy, risk stratification and patient selection.","Despite greater control of risk factors and improved treatments, coronary heart disease (CHD) remains a significant cause of mortality with 1 in every 4 deaths in the United States due to this disorder.(1) Cardiac stress tests have long been one of the most often utilized testing modalities used to identify patients suspected of having CHD, specifically coronary artery disease (CAD). These tests allow for noninvasive assessment of the coronary circulation and its ability to augment flow in response to physiologic demand. As with any diagnostic testing however, potential health risks as well as the financial burden of cardiovascular stress testing, must be weighed against the benefits and utility of the data procured. Given the rapidly evolving field of cardiac stress testing with respect to new risk stratification guidelines, new agents, and new assessment methods, it is difficult for physicians to remain up to date on the latest research and the benefits and risks of different testing modalities. A recent survey of primary care physicians and cardiologists conducted by the Elsevier Office of Continuing Medical Education found that approximately one-quarter of the cardiologists and primary care physicians surveyed do not feel confident identifying the factors which should be considered before ordering a cardiac stress test as part of pre-operative screening for a patient. Additionally, this survey also reported that primary care physicians reported a high degree of confidence in ordering the appropriate cardiac screening tests for patients yet, cardiologists reported that they frequently/somewhat frequently felt the need to change the test ordered by the internist. This educational intervention focuses on patient selection, exercise vs. pharmacologic stress testing, pharmacologic agents, and the importance of patient and doctor communication in ensuring the right test is recommended for the right patient. This CME Multimedia Activity is also available through the Website of The American Journal of Cardiology (www.amjmed.com). Click on the Multimedia button in the navigation bar for full access. Or access: http://elseviercme.com/538.",2014-10-15 +23842804,Allosite: a method for predicting allosteric sites.,"

Motivation

The use of allosteric modulators as preferred therapeutic agents against classic orthosteric ligands has colossal advantages, including higher specificity, fewer side effects and lower toxicity. Therefore, the computational prediction of allosteric sites in proteins is receiving increased attention in the field of drug discovery. Allosite is a newly developed automatic tool for the prediction of allosteric sites in proteins of interest and is now available through a web server.

Availability

The Allosite server and tutorials are freely available at http://mdl.shsmu.edu.cn/AST CONTACT: jian.zhang@sjtu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-09 +24958812,A multiobjective method for robust identification of bacterial small non-coding RNAs.,"

Motivation

Small non-coding RNAs (sRNAs) have major roles in the post-transcriptional regulation in prokaryotes. The experimental validation of a relatively small number of sRNAs in few species requires developing computational algorithms capable of robustly encoding the available knowledge and using this knowledge to predict sRNAs within and across species.

Results

We present a novel methodology designed to identify bacterial sRNAs by incorporating the knowledge encoded by different sRNA prediction methods and optimally aggregating them as potential predictors. Because some of these methods emphasize specificity, whereas others emphasize sensitivity while detecting sRNAs, their optimal aggregation constitutes trade-off solutions between these two contradictory objectives that enhance their individual merits. Many non-redundant optimal aggregations uncovered by using multiobjective optimization techniques are then combined into a multiclassifier, which ensures robustness during detection and prediction even in genomes with distinct nucleotide composition. By training with sRNAs in Salmonella enterica Typhimurium, we were able to successfully predict sRNAs in Sinorhizobium meliloti, as well as in multiple and poorly annotated species. The proposed methodology, like a meta-analysis approach, may begin to lay a possible foundation for developing robust predictive methods across a wide spectrum of genomic variability.

Availability and implementation

Scripts created for the experimentation are available at http://m4m.ugr.es/SupInfo/sRNAOS/sRNAOSscripts.zip.

Contact

delval@decsai.ugr.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-06-23 +22689785,Minimum message length inference of secondary structure from protein coordinate data.,"

Motivation

Secondary structure underpins the folding pattern and architecture of most proteins. Accurate assignment of the secondary structure elements is therefore an important problem. Although many approximate solutions of the secondary structure assignment problem exist, the statement of the problem has resisted a consistent and mathematically rigorous definition. A variety of comparative studies have highlighted major disagreements in the way the available methods define and assign secondary structure to coordinate data.

Results

We report a new method to infer secondary structure based on the Bayesian method of minimum message length inference. It treats assignments of secondary structure as hypotheses that explain the given coordinate data. The method seeks to maximize the joint probability of a hypothesis and the data. There is a natural null hypothesis and any assignment that cannot better it is unacceptable. We developed a program SST based on this approach and compared it with popular programs, such as DSSP and STRIDE among others. Our evaluation suggests that SST gives reliable assignments even on low-resolution structures.

Availability

http://www.csse.monash.edu.au/~karun/sst.",2012-06-01 +23777655,β2 agonist for the treatment of acute lung injury: a systematic review and meta-analysis.,"

Background

The use of β2 agonist as an intervention for acute lung injury (ALI) and ARDS patients is controversial, so we performed a systematic review and meta-analysis of the published randomized controlled trials of using β2 agonists to improve outcomes (mortality and ventilator free days) among patients with ALI/ARDS.

Methods

A comprehensive search of 7 major databases (Ovid MEDLINE In-Process and other non-indexed citations, Ovid MEDLINE, Ovid EMBASE, Ovid Cochrane Central Register of Controlled Trials (CENTRAL), Ovid Cochrane Database of Systematic Reviews, Web of Science, and Scopus) for randomized controlled trials using β2 agonists for ALI from their origin to March 2013 was conducted. The effect size was measured by relative risk for dichotomous outcomes, and mean difference for continuous outcomes, with 95% CI. The statistical heterogeneity between the studies was assessed with the Cochran Q test and I(2) statistic. The heterogeneity of > 50% was considered significant for the analysis. The Cochrane risk of bias tool was used to ascertain the quality of the included studies.

Results

Out of 219 studies screened, 3 randomized controlled trials reported mortality and ventilator-free days, in 646 ALI/ARDS subjects. Of the 646 subjects, 334 (51.7%) received β2 agonist and 312 (48.3%) received placebo. There was no significant decrease in 28-day mortality or hospital mortality in the β2-agonist group: relative risk 1.04, 95% CI 0.50-2.16, and relative risk 1.22, 95% CI 0.95-1.56, respectively. The ventilator-free days and organ-failure-free days were significantly lower for the ALI subjects who received β2 agonists: mean difference -2.19 days (95% CI -3.68 to -1.99 d) and mean difference -2.04 days (95% CI -3.74 to -0.35 d), respectively.

Conclusions

In subjects with ALI/ARDS, β2 agonists were not only nonbeneficial in improving the survival, but were harmful and increased morbidity (reduced organ-failure-free days and ventilator-free days). The current evidence discourages the use of β2 agonist in ALI/ARDS patients. (International Prospective Register of Systematic Reviews, http://www.crd.york.ac.uk/prospero, 2012:CRD42012002616.).",2013-06-18 +23734609,VAMMPIRE: a matched molecular pairs database for structure-based drug design and optimization.,Structure-based optimization to improve the affinity of a lead compound is an established approach in drug discovery. Knowledge-based databases holding molecular replacements can be supportive in the optimization process. We introduce a strategy to relate the substitution effect within matched molecular pairs (MMPs) to the atom environment within the cocrystallized protein-ligand complex. Virtually Aligned Matched Molecular Pairs Including Receptor Environment (VAMMPIRE) database and the supplementary web interface ( http://vammpire.pharmchem.uni-frankfurt.de ) provide valuable information for structure-based lead optimization.,2013-06-18 +22923301,Bayesian inference of signaling network topology in a cancer cell line.,"

Motivation

Protein signaling networks play a key role in cellular function, and their dysregulation is central to many diseases, including cancer. To shed light on signaling network topology in specific contexts, such as cancer, requires interrogation of multiple proteins through time and statistical approaches to make inferences regarding network structure.

Results

In this study, we use dynamic Bayesian networks to make inferences regarding network structure and thereby generate testable hypotheses. We incorporate existing biology using informative network priors, weighted objectively by an empirical Bayes approach, and exploit a connection between variable selection and network inference to enable exact calculation of posterior probabilities of interest. The approach is computationally efficient and essentially free of user-set tuning parameters. Results on data where the true, underlying network is known place the approach favorably relative to existing approaches. We apply these methods to reverse-phase protein array time-course data from a breast cancer cell line (MDA-MB-468) to predict signaling links that we independently validate using targeted inhibition. The methods proposed offer a general approach by which to elucidate molecular networks specific to biological context, including, but not limited to, human cancers.

Availability

http://mukherjeelab.nki.nl/DBN (code and data).",2012-08-24 +23658419,DAPPLE: a pipeline for the homology-based prediction of phosphorylation sites.,"

Summary

While many experimentally characterized phosphorylation sites exist for certain organisms, such as human, rat and mouse, few sites are known for other organisms, hampering related research efforts. We have developed a software pipeline called DAPPLE that automates the process of using known phosphorylation sites from other organisms to identify putative sites in an organism of interest.

Availability

DAPPLE is available as a web server at http://saphire.usask.ca.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-08 +23789978,Communities' knowledge and perceptions of type two diabetes mellitus in Rwanda: a questionnaire survey.,"

Aims and objectives

To explore the level of knowledge and perceptions of T2DM among people in the Rwamagana district.

Background

Diabetes is one of the leading causes of death in the world. Knowledge of type 2 diabetes mellitus (T2DM) can assist in early detection of the disease and reduce the incidence of complications. Therefore, a descriptive study was conducted to determine the level of knowledge and perceptions of T2DM among people in the Rwamagana district, Rwanda.

Design

The study used a cluster multistage sampling technique to obtain a representative sample. The clusters were provinces, districts, sectors, household clusters and sample units selection. The Kigabiro sector was studied, and a sample size of 355 respondents was calculated using Raosoft Sample Size Calculator (Raosoft, Inc 2004, http://www.raosoft.com/samplesize.html).

Methods

A descriptive method, using questionnaires, was used for data collection. Data were analysed using descriptive statistics, contingency tables and chi-square test. The target population comprised 4556 people (women and men aged between 15-65 years) living in a sampled sector of Kigabiro.

Results

The level of knowledge of respondents was inadequate. Few respondents got a high score on questions intended to explore the knowledge of definition, signs, causes and risk factors of diabetes.

Conclusion

The perceptions were also poor and inadequate. The recommendations focused on education campaigns by the Kigabiro sector authorities.

Relevance to clinical practice

If people are knowledgeable on managing long-term conditions such as diabetes, there will be less expenditure on curative care. The healthcare services will have fewer burdens, and the focus will be on specific and relevant ailments.",2013-06-21 +23773402,"Quantitative assessment of the association between MHTFR C677T (rs1801133, Ala222Val) polymorphism and susceptibility to bladder cancer.","

Background

The association between Methylenetetrahydrofolate reductase (MTHFR) Ala222Val (rs1801133) has been implicated to alter the risk of bladder cancer, but the results are controversial.

Methods

A comprehensive databases of Pubmed, Embase, Web of Science, and the Chinese Biomedical Database (CBM) were searched for case-control studies investigating the association between MTHFR Ala222Val polymorphism and bladder cancer susceptibility. Odds ratios (OR) and 95% confidence intervals (95%CI) were used to assess this possible association. A χ2-based Q-test was used to examine the heterogeneity assumption. Begg's and Egger's test were used to examine the potential publication bias. The leave-one-out sensitivity analysis was conducted to determine whether our assumptions or decisions have a major effect on the results of the review. Statistical analysis was performed with the software program Stata 12.0.

Results

A total of 15 independent studies were identified, including 3,570 cases and 3,926 controls. Our analysis suggested that Ala222Val was not associated with bladder cancer risk in overall population under additive model (OR=0.96, 95%CI=0.76-1.21, P=0.731), dominant model (OR=1.00, 95%CI=0.87-1.15, P=0.975), recessive model (OR=0.92, 95%CI=0.79-1.07, P=0.279), and Ala allele versus Val allele (OR=0.96, 95%CI=0.86-1.07, P=0.427). In the subgroup analysis stratified by ethnicity and sources of controls, there were also no significant associations detected among different descent populations, population-based studies and hospital-based studies.

Conclusion

This meta-analysis showed the evidence that MTHFR Ala222Val polymorphism was not contributed to the development of bladder cancer.

Virtual slide

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2117182849994994.",2013-06-17 +24843006,Prospective identification of functionally distinct stem cells and neurosphere-initiating cells in adult mouse forebrain.,"Neurosphere formation is commonly used as a surrogate for neural stem cell (NSC) function but the relationship between neurosphere-initiating cells (NICs) and NSCs remains unclear. We prospectively identified, and isolated by flow cytometry, adult mouse lateral ventricle subventricular zone (SVZ) NICs as Glast(mid)EGFR(high)PlexinB2(high)CD24(-/low)O4/PSA-NCAM(-/low)Ter119/CD45(-) (GEPCOT) cells. They were highly mitotic and short-lived in vivo based on fate-mapping with Ascl1(CreERT2) and Dlx1(CreERT2). In contrast, pre-GEPCOT cells were quiescent, expressed higher Glast, and lower EGFR and PlexinB2. Pre-GEPCOT cells could not form neurospheres but expressed the stem cell markers Slc1a3-CreER(T), GFAP-CreER(T2), Sox2(CreERT2), and Gli1(CreERT2) and were long-lived in vivo. While GEPCOT NICs were ablated by temozolomide, pre-GEPCOT cells survived and repopulated the SVZ. Conditional deletion of the Bmi-1 polycomb protein depleted pre-GEPCOT and GEPCOT cells, though pre-GEPCOT cells were more dependent upon Bmi-1 for Cdkn2a (p16(Ink4a)) repression. Our data distinguish quiescent NSCs from NICs and make it possible to study their properties in vivo.DOI: http://dx.doi.org/10.7554/eLife.02669.001.",2014-05-07 +22600740,Cyber-T web server: differential analysis of high-throughput data.,"The Bayesian regularization method for high-throughput differential analysis, described in Baldi and Long (A Bayesian framework for the analysis of microarray expression data: regularized t-test and statistical inferences of gene changes. Bioinformatics 2001: 17: 509-519) and implemented in the Cyber-T web server, is one of the most widely validated. Cyber-T implements a t-test using a Bayesian framework to compute a regularized variance of the measurements associated with each probe under each condition. This regularized estimate is derived by flexibly combining the empirical measurements with a prior, or background, derived from pooling measurements associated with probes in the same neighborhood. This approach flexibly addresses problems associated with low replication levels and technology biases, not only for DNA microarrays, but also for other technologies, such as protein arrays, quantitative mass spectrometry and next-generation sequencing (RNA-seq). Here we present an update to the Cyber-T web server, incorporating several useful new additions and improvements. Several preprocessing data normalization options including logarithmic and (Variance Stabilizing Normalization) VSN transforms are included. To augment two-sample t-tests, a one-way analysis of variance is implemented. Several methods for multiple tests correction, including standard frequentist methods and a probabilistic mixture model treatment, are available. Diagnostic plots allow visual assessment of the results. The web server provides comprehensive documentation and example data sets. The Cyber-T web server, with R source code and data sets, is publicly available at http://cybert.ics.uci.edu/.",2012-05-16 +23517579,Estimating the similarity of alternative Affymetrix probe sets using transcriptional networks.,"

Background

The usefulness of the data from Affymetrix microarray analysis depends largely on the reliability of the files describing the correspondence between probe sets, genes and transcripts. Particularly, when a gene is targeted by several probe sets, these files should give information about the similarity of each alternative probe set pair. Transcriptional networks integrate the multiple correlations that exist between all probe sets and supply much more information than a simple correlation coefficient calculated for two series of signals. In this study, we used the PSAWN (Probe Set Assignment With Networks) programme we developed to investigate whether similarity of alternative probe sets resulted in some specific properties.

Findings

PSAWNpy delivered a full textual description of each probe set and information on the number and properties of secondary targets. PSAWNml calculated the similarity of each alternative probe set pair and allowed finding relationships between similarity and localisation of probes in common transcripts or exons. Similar alternative probe sets had very low negative correlation, high positive correlation and similar neighbourhood overlap. Using these properties, we devised a test that allowed grouping similar probe sets in a given network. By considering several networks, additional information concerning the similarity reproducibility was obtained, which allowed defining the actual similarity of alternative probe set pairs. In particular, we calculated the common localisation of probes in exons and in known transcripts and we showed that similarity was correctly correlated with them. The information collected on all pairs of alternative probe sets in the most popular 3' IVT Affymetrix chips is available in tabular form at http://bns.crbm.cnrs.fr/download.html.

Conclusions

These processed data can be used to obtain a finer interpretation when comparing microarray data between biological conditions. They are particularly well adapted for searching 3' alternative poly-adenylation events and can be also useful for studying the structure of transcriptional networks. The PSAWNpy, (in Python) and PSAWNml (in Matlab) programmes are freely available and can be downloaded at http://code.google.com/p/arraymatic. Tutorials and reference manuals are available at BMC Research Notes online (Additional file 1) or from http://bns.crbm.cnrs.fr/softwares.html.",2013-03-21 +24911613,IndividualizedPath: identifying genetic alterations contributing to the dysfunctional pathways in glioblastoma individuals.,"Due to the extensive complexity and high genetic heterogeneity of genetic alterations in cancer, comprehensively depicting the molecular mechanisms of cancer remains difficult. Characterizing personalized pathogenesis in cancer individuals can help to reveal new details of the complex mechanisms. In this study, we proposed an integrative method called IndividualizedPath to identify genetic alterations and their downstream risk pathways from the perspective of individuals through combining the DNA copy number, gene expression data and topological structures of biological pathways. By applying the method to TCGA glioblastoma multiforme (GBM) samples, we identified 394 gene-pathway pairs in 252 GBM individuals. We found that genes with copy number alterations showed high heterogeneity across GBM individuals, whereas they affected relatively consistent biological pathways. A global landscape of gene-pathway pairs showed that EGFR linked with multiple cancer-related biological pathways confers the highest risk of GBM. GBM individuals with MET-pathway pairs showed significantly shorter survival times than those with only MET amplification. Importantly, we found that the same risk pathways were affected by different genes in distinct groups of GBM individuals with a significant pattern of mutual exclusivity. Similarly, GBM subtype analysis revealed some subtype-specific gene-pathway pairs. In addition, we found that some rare copy number alterations had a large effect on contribution to numerous cancer-related pathways. In summary, our method offers the possibility to identify personalized cancer mechanisms, which can be applied to other types of cancer through the web server (http://bioinfo.hrbmu.edu.cn/IndividualizedPath/).",2014-08-01 +23769720,Daily vs. intermittent inhaled corticosteroids for recurrent wheezing and mild persistent asthma: a systematic review with meta-analysis.,"

Background

Intermittent ICS treatment with SABA in response to symptoms, is an emerging strategy for control of mild-to-moderate asthma, and recurrent wheezing. This systematic revue compares the efficacy of daily vs. intermittent ICS among preschoolers, children and adults with persistent wheezing and mild to moderate stable persistent asthma.

Methods

Systematic review of randomized, placebo-controlled trials with a minimum of 8 weeks of daily (daily ICS with rescue SABA during exacerbations) vs. intermittent ICS (ICS plus SABA at the onset of symptoms), were retrieved through different databases. Primary outcome was asthma exacerbations; secondary outcomes were pulmonary function tests, symptoms, days without symptoms, SABA use, corticosteroids use, days without rescue medication use, expired nitric oxide and serious adverse events.

Results

Seven trials (1367 participants) met inclusion criteria there was no statistically significant difference in the rate of asthma exacerbations between those with daily vs. intermittent ICS (0.96; 95% CI: 0.86, 1.06, I(2) = 0%). In the sub-group analysis, no differences were seen in duration of studies, step-up strategy or age. However, compared to intermittent ICS, the daily ICS group had a significant increase in asthma-free days and non-significant decreases in rescue SABA use and exhaled nitric oxide measurement.

Conclusions

No significant differences between daily and intermittent ICS in reducing the incidence of asthma exacerbations was found. However, the daily ICS strategy was superior in many secondary outcomes. Therefore, this study suggests to not change daily for intermittent ICS use among preschoolers, children with persistent wheezing and adults with mild-to-moderate stable persistent asthma. International prospective register of systematic reviews http://www.crd.york.ac.uk/PROSPERO/ (CRD42012003228).",2013-06-14 +23599922,LASAGNA-Search: an integrated web tool for transcription factor binding site search and visualization.,"The release of ChIP-seq data from the ENCyclopedia Of DNA Elements (ENCODE) and Model Organism ENCyclopedia Of DNA Elements (modENCODE) projects has significantly increased the amount of transcription factor (TF) binding affinity information available to researchers. However, scientists still routinely use TF binding site (TFBS) search tools to scan unannotated sequences for TFBSs, particularly when searching for lesser-known TFs or TFs in organisms for which ChIP-seq data are unavailable. The sequence analysis often involves multiple steps such as TF model collection, promoter sequence retrieval, and visualization; thus, several different tools are required. We have developed a novel integrated web tool named LASAGNA-Search that allows users to perform TFBS searches without leaving the web site. LASAGNA-Search uses the LASAGNA (Length-Aware Site Alignment Guided by Nucleotide Association) algorithm for TFBS alignment. Important features of LASAGNA-Search include (i) acceptance of unaligned variable-length TFBSs, (ii) a collection of 1726 TF models, (iii) automatic promoter sequence retrieval, (iv) visualization in the UCSC Genome Browser, and (v) gene regulatory network inference and visualization based on binding specificities. LASAGNA-Search is freely available at http://biogrid.engr.uconn.edu/lasagna_search/.",2013-03-01 +23162082,Stability analysis of phylogenetic trees.,"

Motivation

Phylogenetics, or reconstructing the evolutionary relationships of organisms, is critical for understanding evolution. A large number of heuristic algorithms for phylogenetics have been developed, some of which enable estimates of trees with tens of thousands of taxa. Such trees may not be robust, as small changes in the input data can cause major differences in the optimal topology. Tools that can assess the quality and stability of phylogenetic tree estimates and identify the most reliable parts of the tree are needed.

Results

We define measures that assess the stability of trees, subtrees and individual taxa with respect to changes in the input sequences. Our measures consider changes at the finest granularity in the input data (i.e. individual nucleotides). We demonstrate the effectiveness of our measures on large published datasets. Our measures are computationally feasible for phylogenetic datasets consisting of tens of thousands of taxa.

Availability

This software is available at http://bioinformatics.cise.ufl.edu/phylostab

Contact

sheikh@cise.ufl.edu",2012-11-18 +24728853,Codon Optimization OnLine (COOL): a web-based multi-objective optimization platform for synthetic gene design.,"

Summary

Codon optimization has been widely used for designing synthetic genes to improve their expression in heterologous host organisms. However, most of the existing codon optimization tools consider a single design criterion and/or implement a rather rigid user interface to yield only one optimal sequence, which may not be the best solution. Hence, we have developed Codon Optimization OnLine (COOL), which is the first web tool that provides the multi-objective codon optimization functionality to aid systematic synthetic gene design. COOL supports a simple and flexible interface for customizing various codon optimization parameters such as codon adaptation index, individual codon usage and codon pairing. In addition, users can visualize and compare the optimal synthetic sequences with respect to various fitness measures. User-defined DNA sequences can also be compared against the COOL optimized sequences to show the extent by which the user's sequences can be further improved.

Availability and implementation

COOL is free to academic and non-commercial users and licensed to others for a fee by the National University of Singapore. Accessible at http://bioinfo.bti.a-star.edu.sg/COOL/ CONTACT: cheld@nus.edu.sg

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-10 +23785434,Structural modeling and in silico analysis of human superoxide dismutase 2.,"Aging in the world population has increased every year. Superoxide dismutase 2 (Mn-SOD or SOD2) protects against oxidative stress, a main factor influencing cellular longevity. Polymorphisms in SOD2 have been associated with the development of neurodegenerative diseases, such as Alzheimer's and Parkinson's disease, as well as psychiatric disorders, such as schizophrenia, depression and bipolar disorder. In this study, all of the described natural variants (S10I, A16V, E66V, G76R, I82T and R156W) of SOD2 were subjected to in silico analysis using eight different algorithms: SNPeffect, PolyPhen-2, PhD-SNP, PMUT, SIFT, SNAP, SNPs&GO and nsSNPAnalyzer. This analysis revealed disparate results for a few of the algorithms. The results showed that, from at least one algorithm, each amino acid substitution appears to harmfully affect the protein. Structural theoretical models were created for variants through comparative modelling performed using the MHOLline server (which includes MODELLER and PROCHECK) and ab initio modelling, using the I-Tasser server. The predicted models were evaluated using TM-align, and the results show that the models were constructed with high accuracy. The RMSD values of the modelled mutants indicated likely pathogenicity for all missense mutations. Structural phylogenetic analysis using ConSurf revealed that human SOD2 is highly conserved. As a result, a human-curated database was generated that enables biologists and clinicians to explore SOD2 nsSNPs, including predictions of their effects and visualisation of the alignment of both the wild-type and mutant structures. The database is freely available at http://bioinfogroup.com/database and will be regularly updated.",2013-06-13 +23766418,Metingear: a development environment for annotating genome-scale metabolic models.,"

Unlabelled

Genome-scale metabolic models often lack annotations that would allow them to be used for further analysis. Previous efforts have focused on associating metabolites in the model with a cross reference, but this can be problematic if the reference is not freely available, multiple resources are used or the metabolite is added from a literature review. Associating each metabolite with chemical structure provides unambiguous identification of the components and a more detailed view of the metabolism. We have developed an open-source desktop application that simplifies the process of adding database cross references and chemical structures to genome-scale metabolic models. Annotated models can be exported to the Systems Biology Markup Language open interchange format.

Availability

Source code, binaries, documentation and tutorials are freely available at http://johnmay.github.com/metingear. The application is implemented in Java with bundles available for MS Windows and Macintosh OS X.",2013-06-13 +23418540,canEvolve: a web portal for integrative oncogenomics.,"

Background & objective

Genome-wide profiles of tumors obtained using functional genomics platforms are being deposited to the public repositories at an astronomical scale, as a result of focused efforts by individual laboratories and large projects such as the Cancer Genome Atlas (TCGA) and the International Cancer Genome Consortium. Consequently, there is an urgent need for reliable tools that integrate and interpret these data in light of current knowledge and disseminate results to biomedical researchers in a user-friendly manner. We have built the canEvolve web portal to meet this need.

Results

canEvolve query functionalities are designed to fulfill most frequent analysis needs of cancer researchers with a view to generate novel hypotheses. canEvolve stores gene, microRNA (miRNA) and protein expression profiles, copy number alterations for multiple cancer types, and protein-protein interaction information. canEvolve allows querying of results of primary analysis, integrative analysis and network analysis of oncogenomics data. The querying for primary analysis includes differential gene and miRNA expression as well as changes in gene copy number measured with SNP microarrays. canEvolve provides results of integrative analysis of gene expression profiles with copy number alterations and with miRNA profiles as well as generalized integrative analysis using gene set enrichment analysis. The network analysis capability includes storage and visualization of gene co-expression, inferred gene regulatory networks and protein-protein interaction information. Finally, canEvolve provides correlations between gene expression and clinical outcomes in terms of univariate survival analysis.

Conclusion

At present canEvolve provides different types of information extracted from 90 cancer genomics studies comprising of more than 10,000 patients. The presence of multiple data types, novel integrative analysis for identifying regulators of oncogenesis, network analysis and ability to query gene lists/pathways are distinctive features of canEvolve. canEvolve will facilitate integrative and meta-analysis of oncogenomics datasets.

Availability

The canEvolve web portal is available at http://www.canevolve.org/.",2013-02-13 +23761452,DigSee: Disease gene search engine with evidence sentences (version cancer).,"Biological events such as gene expression, regulation, phosphorylation, localization and protein catabolism play important roles in the development of diseases. Understanding the association between diseases and genes can be enhanced with the identification of involved biological events in this association. Although biological knowledge has been accumulated in several databases and can be accessed through the Web, there is no specialized Web tool yet allowing for a query into the relationship among diseases, genes and biological events. For this task, we developed DigSee to search MEDLINE abstracts for evidence sentences describing that 'genes' are involved in the development of 'cancer' through 'biological events'. DigSee is available through http://gcancer.org/digsee.",2013-06-12 +24934485,"KRAS, EGFR, PDGFR-α, KIT and COX-2 status in carcinoma showing thymus-like elements (CASTLE).","

Background

CASTLE (Carcinoma showing thymus-like elements) is a rare malignant neoplasm of the thyroid resembling lymphoepithelioma-like and squamous cell carcinoma of the thymus with different biological behaviour and a better prognosis than anaplastic carcinoma of the thyroid.

Methods

We retrospectively investigated 6 cases of this very rare neoplasm in order to investigate the mutational status of KRAS, EGFR, PDGFR-α and KIT, as well as the immunohistochemical expression pattern of CD117, EGFR and COX-2, and possibly find new therapeutic targets.

Results

Diagnosis was confirmed by a moderate to strong expression of CD5, CD117 and CK5/6, whereas thyroglobulin, calcitonin and TTF-1 were negative in all cases. Tumors were also positive for COX-2 and in nearly all cases for EGFR. In four cases single nucleotide polymorphisms (SNPs) could be detected in exon 12 of the PDGFR-α gene (rs1873778), in three cases SNPs were found in exon 20 of the EGFR gene (rs1050171). No mutations were found in the KIT and KRAS gene.

Conclusions

All tumors showed a COX-2 expression as well as an EGFR expression except for one case and a wild-type KRAS status. No activating mutations in the EGFR, KIT and PDGFR-α gene could be detected. Our data may indicate a potential for targeted therapies, but if these therapeutic strategies are of benefit in CASTLE remains to be determined.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1658499296115016.",2014-06-16 +23444319,Mixed modeling and sample size calculations for identifying housekeeping genes.,"Normalization of gene expression data using internal control genes that have biologically stable expression levels is an important process for analyzing reverse transcription polymerase chain reaction data. We propose a three-way linear mixed-effects model to select optimal housekeeping genes. The mixed-effects model can accommodate multiple continuous and/or categorical variables with sample random effects, gene fixed effects, systematic effects, and gene by systematic effect interactions. We propose using the intraclass correlation coefficient among gene expression levels as the stability measure to select housekeeping genes that have low within-sample variation. Global hypothesis testing is proposed to ensure that selected housekeeping genes are free of systematic effects or gene by systematic effect interactions. A gene combination with the highest lower bound of 95% confidence interval for intraclass correlation coefficient and no significant systematic effects is selected for normalization. Sample size calculation based on the estimation accuracy of the stability measure is offered to help practitioners design experiments to identify housekeeping genes. We compare our methods with geNorm and NormFinder by using three case studies. A free software package written in SAS (Cary, NC, U.S.A.) is available at http://d.web.umkc.edu/daih under software tab.",2013-02-26 +25184013,Adaptation of the CARE Guidelines for Therapeutic Massage and Bodywork Publications: Efforts To Improve the Impact of Case Reports.,"Case reports provide the foundation of practice-based evidence for therapeutic massage and bodywork (TMB), as well as many other health-related fields. To improve the consistency of information contained in case reports, the CARE (CAse REport) Group developed and published a set of guidelines for the medical community to facilitate systematic data collection (http://www.care-statement.org/#). Because of the differences between the practice of medicine and TMB, modifying some sections of the CARE guidelines is necessary to make them compatible with TMB case reports. Accordingly, the objectives of this article are to present the CARE guidelines, apply each section of the guidelines to TMB practice and reporting with suggested adaptations, and highlight concerns, new ideas, and other resources for potential authors of TMB case reports. The primary sections of the CARE guidelines adapted for TMB case reports are diagnostic assessment, follow-up and outcomes, and therapeutic intervention. Specifically, because diagnosis falls outside of the scope of most TMB practitioners, suggestions are made as to how diagnoses made by other health care providers should be included in the context of a TMB case report. Additionally, two new aspects of the case presentation section are recommended: a) assessment measures, which outline and describe the outcome measures on which the case report will focus, and b) a description of the TMB provider (i.e., scope of practice, practice environment, experience level, training, credentialing, and/or expertise) as part of the intervention description. This article culminates with practical resources for TMB practitioners writing case reports, including a TMB Case Report Template-a single document that TMB practitioners can use to guide his or her process of writing a case report. Once the template is adopted by authors of TMB case reports, future efforts can explore the impact on the quality and quantity of case reports and how they impact TMB practice, research, education and, ultimately, the clients.",2014-09-03 +22471441,PhosphoSiteAnalyzer: a bioinformatic platform for deciphering phospho proteomes using kinase predictions retrieved from NetworKIN.,"Phosphoproteomic experiments are routinely conducted in laboratories worldwide, and because of the fast development of mass spectrometric techniques and efficient phosphopeptide enrichment methods, researchers frequently end up having lists with tens of thousands of phosphorylation sites for further interrogation. To answer biologically relevant questions from these complex data sets, it becomes essential to apply computational, statistical, and predictive analytical methods. Here we provide an advanced bioinformatic platform termed ""PhosphoSiteAnalyzer"" to explore large phosphoproteomic data sets that have been subjected to kinase prediction using the previously published NetworKIN algorithm. NetworKIN applies sophisticated linear motif analysis and contextual network modeling to obtain kinase-substrate associations with high accuracy and sensitivity. PhosphoSiteAnalyzer provides an algorithm to retrieve kinase predictions from the public NetworKIN webpage in a semiautomated way and applies hereafter advanced statistics to facilitate a user-tailored in-depth analysis of the phosphoproteomic data sets. The interface of the software provides a high degree of analytical flexibility and is designed to be intuitive for most users. PhosphoSiteAnalyzer is a freeware program available at http://phosphosite.sourceforge.net .",2012-05-23 +25183767,Photoplethysmographic measurement of various retinal vascular pulsation parameters and measurement of the venous phase delay.,"

Purpose

Retinal vein pulsation properties are altered by glaucoma, intracranial pressure (ICP) changes, and retinal venous occlusion, but measurements are limited to threshold measures or manual observation from video frames. We developed an objective retinal vessel pulsation measurement technique, assessed its repeatability, and used it to determine the phase relations between retinal arteries and veins.

Methods

Twenty-three eyes of 20 glaucoma patients had video photograph recordings from their optic nerve and peripapillary retina. A modified photoplethysmographic system using video recordings taken through an ophthalmodynamometer and timed to the cardiac cycle was used. Aligned video frames of vessel segments were analyzed for blood column light absorbance, and waveform analysis was applied. Coefficient of variation (COV) was calculated from data series using recordings taken within ±1 unit ophthalmodynamometric force of each other. The time in cardiac cycles and seconds of the peak (dilation) and trough (constriction) points of the retinal arterial and vein pulse waveforms were measured.

Results

Mean vein peak time COV was 3.4%, and arterial peak time COV was 4.4%. Lower vein peak occurred at 0.044 cardiac cycles (0.040 seconds) after the arterial peak (P = 0.0001), with upper vein peak an insignificant 0.019 cardiac cycles later. No difference in COV for any parameter was found between upper or lower hemiveins. Mean vein amplitude COV was 12.6%, and mean downslope COV was 17.7%.

Conclusions

This technique demonstrates a small retinal venous phase lag behind arterial pulse. It is objective and applicable to any eye with clear ocular media and has moderate to high reproducibility. ( http://www.anzctr.org.au number, ACTRN12608000274370.).",2014-09-02 +22693224,PsRobot: a web-based plant small RNA meta-analysis toolbox.,"Small RNAs (smRNAs) in plants, mainly microRNAs and small interfering RNAs, play important roles in both transcriptional and post-transcriptional gene regulation. The broad application of high-throughput sequencing technology has made routinely generation of bulk smRNA sequences in laboratories possible, thus has significantly increased the need for batch analysis tools. PsRobot is a web-based easy-to-use tool dedicated to the identification of smRNAs with stem-loop shaped precursors (such as microRNAs and short hairpin RNAs) and their target genes/transcripts. It performs fast analysis to identify smRNAs with stem-loop shaped precursors among batch input data and predicts their targets using a modified Smith-Waterman algorithm. PsRobot integrates the expression data of smRNAs in major plant smRNA biogenesis gene mutants and smRNA-associated protein complexes to give clues to the smRNA generation and functional processes. Besides improved specificity, the reliability of smRNA target prediction results can also be evaluated by mRNA cleavage (degradome) data. The cross species conservation statuses and the multiplicity of smRNA target sites are also provided. PsRobot is freely accessible at http://omicslab.genetics.ac.cn/psRobot/.",2012-06-12 +24782338,"Development and validation of the HScore, a score for the diagnosis of reactive hemophagocytic syndrome.","

Objective

Because it has no unique clinical, biologic, or histologic features, reactive hemophagocytic syndrome may be difficult to distinguish from other diseases such as severe sepsis or hematologic malignancies. This study was undertaken to develop and validate a diagnostic score for reactive hemophagocytic syndrome.

Methods

A multicenter retrospective cohort of 312 patients who were judged by experts to have reactive hemophagocytic syndrome (n = 162), were judged by experts to not have reactive hemophagocytic syndrome (n = 104), or in whom the diagnosis of reactive hemophagocytic syndrome was undetermined (n = 46) was used to construct and validate the reactive hemophagocytic syndrome diagnostic score, called the HScore. Ten explanatory variables were evaluated for their association with the diagnosis of hemophagocytic syndrome, and logistic regression was used to calculate the weight of each criterion included in the score. Performance of the score was assessed using developmental and validation data sets.

Results

Nine variables (3 clinical [i.e., known underlying immunosuppression, high temperature, organomegaly], 5 biologic [i.e., triglyceride, ferritin, serum glutamic oxaloacetic transaminase, and fibrinogen levels, cytopenia], and 1 cytologic [i.e., hemophagocytosis features on bone marrow aspirate]) were retained in the HScore. The possible number of points assigned to each variable ranged from 0-18 for known underlying immunosuppression to 0-64 for triglyceride level. The median HScore was 230 (interquartile range [IQR] 203-257) for patients with a positive diagnosis of reactive hemophagocytic syndrome and 125 (IQR 91-150) for patients with a negative diagnosis. The probability of having hemophagocytic syndrome ranged from <1% with an HScore of ≤90 to >99% with an HScore of ≥250.

Conclusion

The HScore can be used to estimate an individual's risk of having reactive hemophagocytic syndrome. This scoring system is freely available online (http://saintantoine.aphp.fr/score/).",2014-09-01 +22018222,"GeoSymbio: a hybrid, cloud-based web application of global geospatial bioinformatics and ecoinformatics for Symbiodinium-host symbioses.","The genus Symbiodinium encompasses a group of unicellular, photosynthetic dinoflagellates that are found free living or in hospite with a wide range of marine invertebrate hosts including scleractinian corals. We present GeoSymbio, a hybrid web application that provides an online, easy to use and freely accessible interface for users to discover, explore and utilize global geospatial bioinformatic and ecoinformatic data on Symbiodinium-host symbioses. The novelty of this application lies in the combination of a variety of query and visualization tools, including dynamic searchable maps, data tables with filter and grouping functions, and interactive charts that summarize the data. Importantly, this application is hosted remotely or 'in the cloud' using Google Apps, and therefore does not require any specialty GIS, web programming or data programming expertise from the user. The current version of the application utilizes Symbiodinium data based on the ITS2 genetic marker from PCR-based techniques, including denaturing gradient gel electrophoresis, sequencing and cloning of specimens collected during 1982-2010. All data elements of the application are also downloadable as spatial files, tables and nucleic acid sequence files in common formats for desktop analysis. The application provides a unique tool set to facilitate research on the basic biology of Symbiodinium and expedite new insights into their ecology, biogeography and evolution in the face of a changing global climate. GeoSymbio can be accessed at https://sites.google.com/site/geosymbio/.",2011-10-24 +23328955,Measurement of lifespan in Drosophila melanogaster. ,"Aging is a phenomenon that results in steady physiological deterioration in nearly all organisms in which it has been examined, leading to reduced physical performance and increased risk of disease. Individual aging is manifest at the population level as an increase in age-dependent mortality, which is often measured in the laboratory by observing lifespan in large cohorts of age-matched individuals. Experiments that seek to quantify the extent to which genetic or environmental manipulations impact lifespan in simple model organisms have been remarkably successful for understanding the aspects of aging that are conserved across taxa and for inspiring new strategies for extending lifespan and preventing age-associated disease in mammals. The vinegar fly, Drosophila melanogaster, is an attractive model organism for studying the mechanisms of aging due to its relatively short lifespan, convenient husbandry, and facile genetics. However, demographic measures of aging, including age-specific survival and mortality, are extraordinarily susceptible to even minor variations in experimental design and environment, and the maintenance of strict laboratory practices for the duration of aging experiments is required. These considerations, together with the need to practice careful control of genetic background, are essential for generating robust measurements. Indeed, there are many notable controversies surrounding inference from longevity experiments in yeast, worms, flies and mice that have been traced to environmental or genetic artifacts(1-4). In this protocol, we describe a set of procedures that have been optimized over many years of measuring longevity in Drosophila using laboratory vials. We also describe the use of the dLife software, which was developed by our laboratory and is available for download (http://sitemaker.umich.edu/pletcherlab/software). dLife accelerates throughput and promotes good practices by incorporating optimal experimental design, simplifying fly handling and data collection, and standardizing data analysis. We will also discuss the many potential pitfalls in the design, collection, and interpretation of lifespan data, and we provide steps to avoid these dangers.",2013-01-07 +23429380,HIV-2EU: supporting standardized HIV-2 drug resistance interpretation in Europe.,"Considering human immunodeficiency virus type 2 (HIV-2) phenotypic data and experience from HIV type 1 and from the follow-up of HIV-2-infected patients, a panel of European experts voted on a rule set for interpretation of mutations in HIV-2 protease, reverse transcriptase, and integrase and an automated tool for HIV-2 drug resistance analyses freely available on the Internet (http://www.hiv-grade.de).",2013-02-19 +22540149,"EUCAST technical note on Aspergillus and amphotericin B, itraconazole, and posaconazole.","The European Committee on Antimicrobial Susceptibility Testing Subcommittee on Antifungal Susceptibility Testing (EUCAST-AFST) has determined breakpoints for amphotericin B, itraconazole and posaconazole for Aspergillus species. This Technical Note is based on the EUCAST amphotericin B, itraconazole and posaconazole rationale documents (available on the EUCAST website: http://www.eucast.org/antifungal_susceptibility_testing_afst/rationale_documents_for_antifungals/). The amphotericin B and itraconazole breakpoints are based on epidemiological cut-off values and clinical experience. The posaconazole breakpoints are also based on pharmacokinetic and pharmacodynamic data. Breakpoints will be reviewed regularly or when new data emerge.",2012-04-30 +25395861,Five-year tracking of Plasmodium falciparum allele frequencies in a holoendemic area with indistinct seasonal transitions.,"

Background

The renewed malaria eradication efforts require an understanding of the seasonal patterns of frequency of polymorphic variants in order to focus limited funds productively. Although cross-sectional studies in holoendemic areas spanning a single year could be useful in describing parasite genotype status at a given point, such information is inadequate in describing temporal trends in genotype polymorphisms. For Plasmodium falciparum isolates from Kisumu District Hospital, Plasmodium falciparum chloroquine-resistance transporter gene (Pfcrt-K76T) and P. falciparum multidrug resistance gene 1 (PfMDR1-N86Y), were analyzed for polymorphisms and parasitemia changes in the 53 months from March 2008 to August 2012. Observations were compared with prevailing climatic factors, including humidity, rainfall, and temperature.

Methods

Parasitemia (the percentage of infected red blood cells per total red blood cells) was established by microscopy for P. falciparum malaria-positive samples. P. falciparum DNA was extracted from whole blood using a Qiagen DNA Blood Mini Kit. Single nucleotide polymorphism identification at positions Pfcrt-K76T and PfMDR1-N86Y was performed using real-time polymerase chain reaction and/or sequencing. Data on climatic variables were obtained from http://www.tutiempo.net/en/.

Results

A total of 895 field isolates from 2008 (n=169), 2009 (n=161), 2010 (n=216), 2011 (n=223), and 2012 (n=126) showed large variations in monthly frequency of PfMDR1-N86Y and Pfcrt-K76T as the mutant genotypes decreased from 68.4%±15% and 38.1%±13% to 29.8%±18% and 13.3%±9%, respectively. The mean percentage of parasitemia was 2.61%±1.01% (coefficient of variation 115.86%; n=895). There was no correlation between genotype or parasitemia and climatic factors.

Conclusion

This study shows variability in the frequency of Pfcrt-K76T and PfMDR1-N86Y polymorphisms during the study period, bringing into focus the role of cross-sectional studies in describing temporal genotype trends. The lack of correlation between genotypes and climatic changes, especially precipitation, emphasizes the cost of investment in genotype change.",2014-11-06 +24526832,OmicCircos: A Simple-to-Use R Package for the Circular Visualization of Multidimensional Omics Data.,"

Summary

OmicCircos is an R software package used to generate high-quality circular plots for visualizing genomic variations, including mutation patterns, copy number variations (CNVs), expression patterns, and methylation patterns. Such variations can be displayed as scatterplot, line, or text-label figures. Relationships among genomic features in different chromosome positions can be represented in the forms of polygons or curves. Utilizing the statistical and graphic functions in an R/Bioconductor environment, OmicCircos performs statistical analyses and displays results using cluster, boxplot, histogram, and heatmap formats. In addition, OmicCircos offers a number of unique capabilities, including independent track drawing for easy modification and integration, zoom functions, link-polygons, and position-independent heatmaps supporting detailed visualization.

Availability and implementation

OmicCircos is available through Bioconductor at http://www.bioconductor.org/packages/devel/bioc/html/OmicCircos.html. An extensive vignette in the package describes installation, data formatting, and workflow procedures. The software is open source under the Artistic-2.0 license.",2014-01-16 +23981351,"A combined omics study on activated macrophages--enhanced role of STATs in apoptosis, immunity and lipid metabolism.","

Background

Macrophage activation by lipopolysaccharide and adenosine triphosphate (ATP) has been studied extensively because this model system mimics the physiological context of bacterial infection and subsequent inflammatory responses. Previous studies on macrophages elucidated the biological roles of caspase-1 in post-translational activation of interleukin-1β and interleukin-18 in inflammation and apoptosis. However, the results from these studies focused only on a small number of factors. To better understand the host response, we have performed a high-throughput study of Kdo2-lipid A (KLA)-primed macrophages stimulated with ATP.

Results

The study suggests that treating mouse bone marrow-derived macrophages with KLA and ATP produces 'synergistic' effects that are not seen with treatment of KLA or ATP alone. The synergistic regulation of genes related to immunity, apoptosis and lipid metabolism is observed in a time-dependent manner. The synergistic effects are produced by nuclear factor kappa-light-chain-enhancer of activated B cells (NF-kB) and activator protein (AP)-1 through regulation of their target cytokines. The synergistically regulated cytokines then activate signal transducer and activator of transcription (STAT) factors that result in enhanced immunity, apoptosis and lipid metabolism; STAT1 enhances immunity by promoting anti-microbial factors; and STAT3 contributes to downregulation of cell cycle and upregulation of apoptosis. STAT1 and STAT3 also regulate glycerolipid and eicosanoid metabolism, respectively. Further, western blot analysis for STAT1 and STAT3 showed that the changes in transcriptomic levels were consistent with their proteomic levels. In summary, this study shows the synergistic interaction between the toll-like receptor and purinergic receptor signaling during macrophage activation on bacterial infection.

Availability

Time-course data of transcriptomics and lipidomics can be queried or downloaded from http://www.lipidmaps.org.

Contact

shankar@ucsd.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-08-26 +21292511,"Robot-assisted laparoscopic partial nephrectomy for tumors greater than 4 cm and high nephrometry score: feasibility, renal functional, and oncological outcomes with minimum 1 year follow-up.","

Objectives

Minimally invasive robotic assistance is being increasingly utilized to treat larger complex renal masses. We report on the technical feasibility and renal functional and oncologic outcomes with minimum 1 year follow-up of robot-assisted laparoscopic partial nephrectomy (RALPN) for tumors greater than 4 cm.

Materials and methods

The urologic oncology database was queried to identify patients treated with RALPN for tumors greater than 4 cm and a minimum follow-up of 12 months. We identified 19 RALPN on 17 patients treated between June 2007 and July 2009. Two patients underwent staged bilateral RALPN. Demographic, operative, and pathologic data were collected. Renal function was assessed by serum creatinine levels, estimated glomerular filtration rate, and nuclear renal scans assessed at baseline, 3, and 12 months postoperatively. All tumors were assigned R.E.N.A.L. nephrometry scores (http://www.nephrometry.com).

Results

The median nephrometry score for the largest tumor from each kidney was 9 (range 6-11) while the median size was 5 cm (range 4.1-15). Three of 19 cases (16%) required intraoperative conversion to open partial nephrectomy. No renal units were lost. There were no statistically significant differences between preoperative and postoperative creatinine and eGFR. A statistically significant decline of ipsilateral renal scan function (49% vs. 46.5%, P = 0.006) was observed at 3 months and at 12 mo postoperatively (49% vs. 45.5%, P = 0.014). None of the patients had evidence of recurrence or metastatic disease at a median follow-up of 22 months (range 12-36).

Conclusions

RALPN is feasible for renal tumors greater than 4 cm with moderate or high nephrometry scores. Although there was a modest decline in renal function of the operated unit, RALPN may afford the ability resect challenging tumors requiring complex renal reconstruction. The renal functional and oncologic outcomes are promising at a median follow-up of 22 months, but longer follow-up is required.",2011-02-02 +23104896,Rare variant discovery and calling by sequencing pooled samples with overlaps.,"

Motivation

For many complex traits/diseases, it is believed that rare variants account for some of the missing heritability that cannot be explained by common variants. Sequencing a large number of samples through DNA pooling is a cost-effective strategy to discover rare variants and to investigate their associations with phenotypes. Overlapping pool designs provide further benefit because such approaches can potentially identify variant carriers, which is important for downstream applications of association analysis of rare variants. However, existing algorithms for analysing sequence data from overlapping pools are limited.

Results

We propose a complete data analysis framework for overlapping pool designs, with novelties in all three major steps: variant pool and variant locus identification, variant allele frequency estimation and variant sample decoding. The framework can be used in combination with any design matrix. We have investigated its performance based on two different overlapping designs and have compared it with three state-of-the-art methods, by simulating targeted sequencing and by pooling real sequence data. Results on both datasets show that our algorithm has made significant improvements over existing ones. In conclusion, successful discovery of rare variants and identification of variant carriers using overlapping pool strategies critically depend on many steps, from generation of design matrixes to decoding algorithms. The proposed framework in combination with the design matrixes generated based on the Chinese remainder theorem achieves best overall results.

Availability

Source code of the program, termed VIP for Variant Identification by Pooling, is available at http://cbc.case.edu/VIP.",2012-10-27 +24700317,Fast alignment-free sequence comparison using spaced-word frequencies.,"

Motivation

Alignment-free methods for sequence comparison are increasingly used for genome analysis and phylogeny reconstruction; they circumvent various difficulties of traditional alignment-based approaches. In particular, alignment-free methods are much faster than pairwise or multiple alignments. They are, however, less accurate than methods based on sequence alignment. Most alignment-free approaches work by comparing the word composition of sequences. A well-known problem with these methods is that neighbouring word matches are far from independent.

Results

To reduce the statistical dependency between adjacent word matches, we propose to use 'spaced words', defined by patterns of 'match' and 'don't care' positions, for alignment-free sequence comparison. We describe a fast implementation of this approach using recursive hashing and bit operations, and we show that further improvements can be achieved by using multiple patterns instead of single patterns. To evaluate our approach, we use spaced-word frequencies as a basis for fast phylogeny reconstruction. Using real-world and simulated sequence data, we demonstrate that our multiple-pattern approach produces better phylogenies than approaches relying on contiguous words.

Availability and implementation

Our program is freely available at http://spaced.gobics.de/.",2014-04-03 +24344194,SILVER: an efficient tool for stable isotope labeling LC-MS data quantitative analysis with quality control methods.,"

Summary

With the advance of experimental technologies, different stable isotope labeling methods have been widely applied to quantitative proteomics. Here, we present an efficient tool named SILVER for processing the stable isotope labeling mass spectrometry data. SILVER implements novel methods for quality control of quantification at spectrum, peptide and protein levels, respectively. Several new quantification confidence filters and indices are used to improve the accuracy of quantification results. The performance of SILVER was verified and compared with MaxQuant and Proteome Discoverer using a large-scale dataset and two standard datasets. The results suggest that SILVER shows high accuracy and robustness while consuming much less processing time. Additionally, SILVER provides user-friendly interfaces for parameter setting, result visualization, manual validation and some useful statistics analyses.

Availability and implementation

SILVER and its source codes are freely available under the GNU General Public License v3.0 at http://bioinfo.hupo.org.cn/silver.",2013-12-15 +24524870,Impaired white-matter integrity in photosensitive epilepsy: a DTI study using tract-based spatial statistics.,"

Background and purpose

The present study was designed to map alterations in brain white-matter in photosensitive epilepsy (PSE) by applying tract-based spatial statistics (TBSS) analysis.

Methods

Diffusion tensor-imaging (DTI) data from MRI brain scans were collected from eight PSE patients and 16 gender- and age-matched non-epileptic controls using a SIEMENS Trio 3.0-Tesla scanner. For the white-matter analysis, DTI scans were processed using FSL software (http://www.fmrib.ox.ac.uk/fsl/index.html). Fractional anisotropy (FA) values in the PSE and control groups were compared using TBSS analysis corrected for multiple comparisons using threshold-free cluster enhancement.

Results

Compared with the control subjects, the corpus callosum of PSE patients had significantly lower FA values.

Conclusion

Our DTI study indicates that white-matter in the corpus callosum was abnormal in PSE patients, and that DTI methods can serve as useful non-invasive tools to evaluate white-matter changes in PSE patients.",2014-02-10 +23876513,A comparative study of covariance selection models for the inference of gene regulatory networks.,"

Motivation

The inference, or 'reverse-engineering', of gene regulatory networks from expression data and the description of the complex dependency structures among genes are open issues in modern molecular biology.

Results

In this paper we compared three regularized methods of covariance selection for the inference of gene regulatory networks, developed to circumvent the problems raising when the number of observations n is smaller than the number of genes p. The examined approaches provided three alternative estimates of the inverse covariance matrix: (a) the 'PINV' method is based on the Moore-Penrose pseudoinverse, (b) the 'RCM' method performs correlation between regression residuals and (c) 'ℓ(2C)' method maximizes a properly regularized log-likelihood function. Our extensive simulation studies showed that ℓ(2C) outperformed the other two methods having the most predictive partial correlation estimates and the highest values of sensitivity to infer conditional dependencies between genes even when a few number of observations was available. The application of this method for inferring gene networks of the isoprenoid biosynthesis pathways in Arabidopsis thaliana allowed to enlighten a negative partial correlation coefficient between the two hubs in the two isoprenoid pathways and, more importantly, provided an evidence of cross-talk between genes in the plastidial and the cytosolic pathways. When applied to gene expression data relative to a signature of HRAS oncogene in human cell cultures, the method revealed 9 genes (p-value<0.0005) directly interacting with HRAS, sharing the same Ras-responsive binding site for the transcription factor RREB1. This result suggests that the transcriptional activation of these genes is mediated by a common transcription factor downstream of Ras signaling.

Availability

Software implementing the methods in the form of Matlab scripts are available at: http://users.ba.cnr.it/issia/iesina18/CovSelModelsCodes.zip.",2013-07-20 +24307375,Robust diagnostic genetic testing using solution capture enrichment and a novel variant-filtering interface.,"Targeted hybridization enrichment prior to next-generation sequencing is a widespread method for characterizing sequence variation in a research setting, and is being adopted by diagnostic laboratories. However, the number of variants identified can overwhelm clinical laboratories with strict time constraints, the final interpretation of likely pathogenicity being a particular bottleneck. To address this, we have developed an approach in which, after automatic variant calling on a standard unix pipeline, subsequent variant filtering is performed interactively, using AgileExomeFilter and AgilePindelFilter (http://dna.leeds.ac.uk/agile), tools designed for clinical scientists with standard desktop computers. To demonstrate the method's diagnostic efficacy, we tested 128 patients using (1) a targeted capture of 36 cancer-predisposing genes or (2) whole-exome capture for diagnosis of the genetically heterogeneous disorder primary ciliary dyskinesia (PCD). In the cancer cohort, complete concordance with previous diagnostic data was achieved across 793 variant genotypes. A high yield (42%) was also achieved for exome-based PCD diagnosis, underscoring the scalability of our method. Simple adjustments to the variant filtering parameters further allowed the identification of a homozygous truncating mutation in a presumptive new PCD gene, DNAH8. These tools should allow diagnostic laboratories to expand their testing portfolios flexibly, using a standard set of reagents and techniques.",2014-04-01 +22748151,GenomeGems: evaluation of genetic variability from deep sequencing data.,"

Background

Detection of disease-causing mutations using Deep Sequencing technologies possesses great challenges. In particular, organizing the great amount of sequences generated so that mutations, which might possibly be biologically relevant, are easily identified is a difficult task. Yet, for this assignment only limited automatic accessible tools exist.

Findings

We developed GenomeGems to gap this need by enabling the user to view and compare Single Nucleotide Polymorphisms (SNPs) from multiple datasets and to load the data onto the UCSC Genome Browser for an expanded and familiar visualization. As such, via automatic, clear and accessible presentation of processed Deep Sequencing data, our tool aims to facilitate ranking of genomic SNP calling. GenomeGems runs on a local Personal Computer (PC) and is freely available at http://www.tau.ac.il/~nshomron/GenomeGems.

Conclusions

GenomeGems enables researchers to identify potential disease-causing SNPs in an efficient manner. This enables rapid turnover of information and leads to further experimental SNP validation. The tool allows the user to compare and visualize SNPs from multiple experiments and to easily load SNP data onto the UCSC Genome browser for further detailed information.",2012-07-02 +24265680,"SomatiCA: identifying, characterizing and quantifying somatic copy number aberrations from cancer genome sequencing data.","Whole genome sequencing of matched tumor-normal sample pairs is becoming routine in cancer research. However, analysis of somatic copy-number changes from sequencing data is still challenging because of insufficient sequencing coverage, unknown tumor sample purity and subclonal heterogeneity. Here we describe a computational framework, named SomatiCA, which explicitly accounts for tumor purity and subclonality in the analysis of somatic copy-number profiles. Taking read depths (RD) and lesser allele frequencies (LAF) as input, SomatiCA will output 1) admixture rate for each tumor sample, 2) somatic allelic copy-number for each genomic segment, 3) fraction of tumor cells with subclonal change in each somatic copy number aberration (SCNA), and 4) a list of substantial genomic aberration events including gain, loss and LOH. SomatiCA is available as a Bioconductor R package at http://www.bioconductor.org/packages/2.13/bioc/html/SomatiCA.html.",2013-11-12 +22113810,A RESTful image gateway for multiple medical image repositories.,"Mobile technologies are increasingly important components in telemedicine systems and are becoming powerful decision support tools. Universal access to data may already be achieved by resorting to the latest generation of tablet devices and smartphones. However, the protocols employed for communicating with image repositories are not suited to exchange data with mobile devices. In this paper, we present an extensible approach to solving the problem of querying and delivering data in a format that is suitable for the bandwidth and graphic capacities of mobile devices. We describe a three-tiered component-based gateway that acts as an intermediary between medical applications and a number of Picture Archiving and Communication Systems (PACS). The interface with the gateway is accomplished using Hypertext Transfer Protocol (HTTP) requests following a Representational State Transfer (REST) methodology, which relieves developers from dealing with complex medical imaging protocols and allows the processing of data on the server side.",2011-11-18 +23806045,Genetic parts to a preventive medicine whole.,"Integration of clinical evaluations and whole-genome sequence data from eight individuals in a recent study demonstrates that genetic and clinical information can be combined and applied to preventive medicine. Statistical and graphical tools were developed to assess and visualize the genetic risk of common chronic conditions and to show the changes in disease risk that result from monitoring clinical symptoms over time. This approach provides a direction to consider in the adoption of genetic information in health care, but, like all provocative scientific articles, it raises as many questions as it answers.

Please see related research

http://genomemedicine.com/content/5/6/58.",2013-06-27 +25480758,"Traditional health practitioners' perceptions, herbal treatment and management of HIV and related opportunistic infections.","

Background

In South Africa, traditional health practitioners' (THPs) explanatory frameworks concerning illness aetiologies are much researched. However there is a gap in the literature on how THPs understand HIV-related opportunistic infections (OIs), i.e. tuberculosis, candidiasis and herpes zoster. This study aimed to comprehend THPs' understandings of the aforementioned; to ascertain and better understand the treatment methods used by THPs for HIV and OIs, while also contributing to the documentation of South African medicinal plants for future conservation.

Methods

The study was conducted in two locations: Strand, Western Cape where THPs are trained and Mpoza village, Mount Frere, Eastern Cape from where medicinal plants are ordered or collected. Semi-structured interviews were conducted with 53 THPs of whom 36 were diviners (amagrirha: isangoma) and 17 herbalists (inyanga). THPs were selected through a non-probability ""snowball"" method. Data were analysed using a thematic content analysis approach. An ethnobotanical survey was conducted and plants used to manage HIV and OIs were collected. A complete set of voucher specimens was deposited at the University of the Western Cape Herbarium for identification. Plant names were checked and updated with Kew's online website http://www.theplantlist.org .

Results

THPs conceptualise the aetiology of HIV and OIs at two related levels. The first involves the immediate manifestation of the illness/condition because of a viral infection in the blood (HIV), the presence of bacteria in the lungs (tuberculosis), or weakened state of the body making it susceptible to OIs. The presence of OIs is indicative of the probable presence of HIV. The second level of causation affects the first, which includes pollution, changes in cultural sexual norms, witchcraft, environmental factors, and lack of adherence to ancestral rituals. THPs reported using 17 plants belonging to 12 families. Remedies included mixes of up to five plants.

Conclusion

This study explored the THPs' perspectives on HIV and commonly associated OIs and their herbal treatment methods. THPs generally rely on biomedical diagnosis before treating a client. They also seek guidance from the ancestors for a particular diagnosis, the plants to use for a specific treatment, when to harvest, and how to administer herbal remedies.",2014-12-05 +23428640,Making automated multiple alignments of very large numbers of protein sequences.,"

Motivation

Recent developments in sequence alignment software have made possible multiple sequence alignments (MSAs) of >100 000 sequences in reasonable times. At present, there are no systematic analyses concerning the scalability of the alignment quality as the number of aligned sequences is increased.

Results

We benchmarked a wide range of widely used MSA packages using a selection of protein families with some known structures and found that the accuracy of such alignments decreases markedly as the number of sequences grows. This is more or less true of all packages and protein families. The phenomenon is mostly due to the accumulation of alignment errors, rather than problems in guide-tree construction. This is partly alleviated by using iterative refinement or selectively adding sequences. The average accuracy of progressive methods by comparison with structure-based benchmarks can be improved by incorporating information derived from high-quality structural alignments of sequences with solved structures. This suggests that the availability of high quality curated alignments will have to complement algorithmic and/or software developments in the long-term.

Availability and implementation

Benchmark data used in this study are available at http://www.clustal.org/omega/homfam-20110613-25.tar.gz and http://www.clustal.org/omega/bali3fam-26.tar.gz.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-21 +21856757,The modENCODE Data Coordination Center: lessons in harvesting comprehensive experimental details.,"The model organism Encyclopedia of DNA Elements (modENCODE) project is a National Human Genome Research Institute (NHGRI) initiative designed to characterize the genomes of Drosophila melanogaster and Caenorhabditis elegans. A Data Coordination Center (DCC) was created to collect, store and catalog modENCODE data. An effective DCC must gather, organize and provide all primary, interpreted and analyzed data, and ensure the community is supplied with the knowledge of the experimental conditions, protocols and verification checks used to generate each primary data set. We present here the design principles of the modENCODE DCC, and describe the ramifications of collecting thorough and deep metadata for describing experiments, including the use of a wiki for capturing protocol and reagent information, and the BIR-TAB specification for linking biological samples to experimental results. modENCODE data can be found at http://www.modencode.org.",2011-08-19 +24914762,The subjective burden of informal caregivers of persons with dementia: extended validation of the German language version of the Burden Scale for Family Caregivers (BSFC).,"

Objective

In research as well as in the practice of caregiver counselling, the subjective burden of family caregivers is considered an important characteristic of home care. The objective of the present study was to provide an extended validation of the German language version of the Burden Scale for Family Caregivers (BSFC) published in 2001.

Methods

Extended validation was performed using medical, interview, and health insurance data of 351 informal caregivers and their relatives who had dementia. Cronbach's alpha was calculated to assess the internal consistency of the items, and an exploratory factor analysis was conducted to determine the structure of the BSFC. The discriminatory power and item difficulties of the 28 BSFC items were examined. Five research questions addressed construct validity. Question six addressed the BSFC score as a potential predictor of institutionalization at a follow-up time of 2.5 years (predictive validity).

Results

Exploratory factor analysis indicated that the BSFC had a single-factor structure. Cronbach's alpha for the total scale was 0.93. A significant increase in the BSFC score was observed when the severity of cognitive impairment increased, persons with dementia showed disturbing behaviour, caregivers were diagnosed with depression, care requirements increased, or the family caregiver and the person with dementia lived together. Caregiver burden emerged as a significant predictor of institutionalization. The validity of the BSFC was confirmed by the results.

Conclusion

The BSFC score allows for a valid assessment of the total caregiver burden in both research and practice. The BSFC is available for free in 20 languages ( http://www.caregiver-burden.eu ).",2014-06-10 +23736530,Whole-Genome rVISTA: a tool to determine enrichment of transcription factor binding sites in gene promoters from transcriptomic data.,"

Summary

We have developed a web-based query tool, Whole-Genome rVISTA (WGRV), that determines enrichment of transcription factors (TFs) and associated target genes in sets of co-regulated genes. WGRV enables users to query databases containing pre-computed genome coordinates of evolutionarily conserved transcription factor binding sites in the proximal promoters (from 100 bp to 5 kb upstream) of human, mouse and Drosophila genomes. TF binding sites are based on position-weight matrices from the TRANSFAC Professional database. For a given set of co-regulated genes, WGRV returns statistically enriched and evolutionarily conserved binding sites, mapped by the regulatory VISTA (rVISTA) algorithm. Users can then retrieve a list of genes from the query set containing the enriched TF binding sites and their location in the query set promoters. Results are exported in a BED format for rapid visualization in the UCSC genome browser. Flat files of mapped conserved sites and their genomic coordinates are also available for analysis with stand-alone software.

Availability

http://genome.lbl.gov/cgi-bin/WGRVistaInputCommon.pl.",2013-06-04 +23736528,BeCAS: biomedical concept recognition services and visualization.,"

Summary

The continuous growth of the biomedical scientific literature has been motivating the development of text-mining tools able to efficiently process all this information. Although numerous domain-specific solutions are available, there is no web-based concept-recognition system that combines the ability to select multiple concept types to annotate, to reference external databases and to automatically annotate nested and intercepted concepts. BeCAS, the Biomedical Concept Annotation System, is an API for biomedical concept identification and a web-based tool that addresses these limitations. MEDLINE abstracts or free text can be annotated directly in the web interface, where identified concepts are enriched with links to reference databases. Using its customizable widget, it can also be used to augment external web pages with concept highlighting features. Furthermore, all text-processing and annotation features are made available through an HTTP REST API, allowing integration in any text-processing pipeline.

Availability

BeCAS is freely available for non-commercial use at http://bioinformatics.ua.pt/becas.

Contacts

tiago.nunes@ua.pt or jlo@ua.pt.",2013-06-04 +23620362,RSVSim: an R/Bioconductor package for the simulation of structural variations.,"

Unlabelled

RSVSim is a tool for the simulation of deletions, insertions, inversions, tandem duplications and translocations of various sizes in any genome available as FASTA-file or data package in R. The structural variations can be generated randomly, based on user-supplied genomic coordinates or associated to various kinds of repeats. The package further comprises functions to estimate the distribution of structural variation sizes from real datasets.

Availability

RSVSim is implemented in R and available at http://www.bioconductor.org. A vignette with detailed descriptions of the functions and examples is included.",2013-04-25 +24913607,"Low neonatal blood glucose levels in cesarean-delivered term newborns at Khartoum Hospital, Sudan.","

Background

Glucose is the main source of energy for organ function in neonates. There are few published recent data on neonatal glucose levels during cesarean delivery.

Methods

A case (cesarean delivery) -control (vaginal delivery) study was conducted at Khartoum Hospital Sudan to compare blood glucose levels of term newborns born after elective cesarean delivery with those born vaginally.

Results

Cord blood glucose levels at delivery were significantly lower in women who had a cesarean delivery compared with those who delivered vaginally (99.8 ± 20.6 vs. 106.8 ± 11.1 mg/dl, P=0.026), but there was no significant difference (97.8 ± 16.7 vs. 102.1 ± 9.6, P=0.110) in newborn glucose levels at 2 hours after delivery between the groups. In linear regression, cesarean delivery (-6.475 mg/dl, P=0.013) and maternal blood glucose levels at the time of delivery (+0.619 mg, P<0.001) were significantly associated with mean cord glucose levels.

Conclusion

This study shows that cord blood glucose levels are significantly lower in cesarean-delivered neonates than vaginally-delivered neonates. In addition, cord blood glucose levels are significantly associated with cesarean delivery and maternal blood glucose levels at delivery.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2011479878124993.",2014-06-09 +22669910,MAGNET: MicroArray Gene expression and Network Evaluation Toolkit.,"MicroArray Gene expression and Network Evaluation Toolkit (MAGNET) is a web-based application that provides tools to generate and score both protein-protein interaction networks and coexpression networks. MAGNET integrates user-provided experimental measurements with high-throughput proteomic datasets, generating weighted gene-gene and protein-protein interaction networks. MAGNET allows users to weight edges of protein-protein interaction networks using a logistic regression model integrating tissue-specific gene expression data, sub-cellular localization data, co-clustering of interacting proteins and the number of observations of the interaction. This provides a way to quantitatively measure the plausibility of interactions in protein-protein interaction networks given protein/gene expression measurements. Secondly, MAGNET generates filtered coexpression networks, where genes are represented as nodes, and their correlations are represented with edges. Overall, MAGNET provides researchers with a new framework with which to analyze and generate gene-gene and protein-protein interaction networks, based on both the user's own data and publicly available -omics datasets. The freely available service and documentation can be accessed at http://gurkan.case.edu/software or http://magnet.case.edu.",2012-06-04 +22270228,BootstRatio: A web-based statistical analysis of fold-change in qPCR and RT-qPCR data using resampling methods.,"Real-time quantitative polymerase chain reaction (qPCR) is widely used in biomedical sciences quantifying its results through the relative expression (RE) of a target gene versus a reference one. Obtaining significance levels for RE assuming an underlying probability distribution of the data may be difficult to assess. We have developed the web-based application BootstRatio, which tackles the statistical significance of the RE and the probability that RE>1 through resampling methods without any assumption on the underlying probability distribution for the data analyzed. BootstRatio perform these statistical analyses of gene expression ratios in two settings: (1) when data have been already normalized against a control sample and (2) when the data control samples are provided. Since the estimation of the probability that RE>1 is an important feature for this type of analysis, as it is used to assign statistical significance and it can be also computed under the Bayesian framework, a simulation study has been carried out comparing the performance of BootstRatio versus a Bayesian approach in the estimation of that probability. In addition, two analyses, one for each setting, carried out with data from real experiments are presented showing the performance of BootstRatio. Our simulation study suggests that Bootstratio approach performs better than the Bayesian one excepting in certain situations of very small sample size (N≤12). The web application BootstRatio is accessible through http://regstattools.net/br and developed for the purpose of these intensive computation statistical analyses.",2012-01-24 +25304792,Comparison and consensus guidelines for delineation of clinical target volume for CT- and MR-based brachytherapy in locally advanced cervical cancer.,"

Objective

To create and compare consensus clinical target volume (CTV) contours for computed tomography (CT) and 3-Tesla (3-T) magnetic resonance (MR) image-based cervical-cancer brachytherapy.

Methods and materials

Twenty-three experts in gynecologic radiation oncology contoured the same 3 cervical cancer brachytherapy cases: 1 stage IIB near-complete response (CR) case with a tandem and ovoid, 1 stage IIB partial response (PR) case with tandem and ovoid with needles, and 1 stage IB2 CR case with a tandem and ring applicator. The CT contours were completed before the MRI contours. These were analyzed for consistency and clarity of target delineation using an expectation maximization algorithm for simultaneous truth and performance level estimation (STAPLE), with κ statistics as a measure of agreement between participants. The conformity index was calculated for each of the 6 data sets. Dice coefficients were generated to compare the CT and MR contours of the same case.

Results

For all 3 cases, the mean tumor volume was smaller on MR than on CT (P<.001). The κ and conformity index estimates were slightly higher for CT, indicating a higher level of agreement on CT. The Dice coefficients were 89% for the stage IB2 case with a CR, 74% for the stage IIB case with a PR, and 57% for the stage IIB case with a CR.

Conclusion

In a comparison of MR-contoured with CT-contoured CTV volumes, the higher level of agreement on CT may be due to the more distinct contrast medium visible on the images at the time of brachytherapy. MR at the time of brachytherapy may be of greatest benefit in patients with large tumors with parametrial extension that have a partial or complete response to external beam. On the basis of these results, a 95% consensus volume was generated for CT and for MR. Online contouring atlases are available for instruction at http://www.nrgoncology.org/Resources/ContouringAtlases/GYNCervicalBrachytherapy.aspx.",2014-10-01 +22369459,A hidden Markov model-based algorithm for identifying tumour subtype using array CGH data.,

Background

The recent advancement in array CGH (aCGH) research has significantly improved tumor identification using DNA copy number data. A number of unsupervised learning methods have been proposed for clustering aCGH samples. Two of the major challenges for developing aCGH sample clustering are the high spatial correlation between aCGH markers and the low computing efficiency. A mixture hidden Markov model based algorithm was developed to address these two challenges.

Results

The hidden Markov model (HMM) was used to model the spatial correlation between aCGH markers. A fast clustering algorithm was implemented and real data analysis on glioma aCGH data has shown that it converges to the optimal cluster rapidly and the computation time is proportional to the sample size. Simulation results showed that this HMM based clustering (HMMC) method has a substantially lower error rate than NMF clustering. The HMMC results for glioma data were significantly associated with clinical outcomes.

Conclusions

We have developed a fast clustering algorithm to identify tumor subtypes based on DNA copy number aberrations. The performance of the proposed HMMC method has been evaluated using both simulated and real aCGH data. The software for HMMC in both R and C++ is available in ND INBRE website http://ndinbre.org/programs/bioinformatics.php.,2011-12-23 +22645166,An integrated open framework for thermodynamics of reactions that combines accuracy and coverage.,"

Motivation

The laws of thermodynamics describe a direct, quantitative relationship between metabolite concentrations and reaction directionality. Despite great efforts, thermodynamic data suffer from limited coverage, scattered accessibility and non-standard annotations. We present a framework for unifying thermodynamic data from multiple sources and demonstrate two new techniques for extrapolating the Gibbs energies of unmeasured reactions and conditions.

Results

Both methods account for changes in cellular conditions (pH, ionic strength, etc.) by using linear regression over the ΔG(○) of pseudoisomers and reactions. The Pseudoisomeric Reactant Contribution method systematically infers compound formation energies using measured K' and pK(a) data. The Pseudoisomeric Group Contribution method extends the group contribution method and achieves a high coverage of unmeasured reactions. We define a continuous index that predicts the reversibility of a reaction under a given physiological concentration range. In the characteristic physiological range 3μM-3mM, we find that roughly half of the reactions in Escherichia coli's metabolism are reversible. These new tools can increase the accuracy of thermodynamic-based models, especially in non-standard pH and ionic strengths. The reversibility index can help modelers decide which reactions are reversible in physiological conditions.

Availability

Freely available on the web at: http://equilibrator.weizmann.ac.il. Website implemented in Python, MySQL, Apache and Django, with all major browsers supported. The framework is open-source (code.google.com/p/milo-lab), implemented in pure Python and tested mainly on Linux.

Contact

ron.milo@weizmann.ac.il

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-05-29 +23553446,Antipsychotics and torsadogenic risk: signals emerging from the US FDA Adverse Event Reporting System database.,"

Background

Drug-induced torsades de pointes (TdP) and related clinical entities represent a current regulatory and clinical burden.

Objective

As part of the FP7 ARITMO (Arrhythmogenic Potential of Drugs) project, we explored the publicly available US FDA Adverse Event Reporting System (FAERS) database to detect signals of torsadogenicity for antipsychotics (APs).

Methods

Four groups of events in decreasing order of drug-attributable risk were identified: (1) TdP, (2) QT-interval abnormalities, (3) ventricular fibrillation/tachycardia, and (4) sudden cardiac death. The reporting odds ratio (ROR) with 95 % confidence interval (CI) was calculated through a cumulative analysis from group 1 to 4. For groups 1+2, ROR was adjusted for age, gender, and concomitant drugs (e.g., antiarrhythmics) and stratified for AZCERT drugs, lists I and II (http://www.azcert.org , as of June 2011). A potential signal of torsadogenicity was defined if a drug met all the following criteria: (a) four or more cases in group 1+2; (b) significant ROR in group 1+2 that persists through the cumulative approach; (c) significant adjusted ROR for group 1+2 in the stratum without AZCERT drugs; (d) not included in AZCERT lists (as of June 2011).

Results

Over the 7-year period, 37 APs were reported in 4,794 cases of arrhythmia: 140 (group 1), 883 (group 2), 1,651 (group 3), and 2,120 (group 4). Based on our criteria, the following potential signals of torsadogenicity were found: amisulpride (25 cases; adjusted ROR in the stratum without AZCERT drugs = 43.94, 95 % CI 22.82-84.60), cyamemazine (11; 15.48, 6.87-34.91), and olanzapine (189; 7.74, 6.45-9.30).

Conclusions

This pharmacovigilance analysis on the FAERS found 3 potential signals of torsadogenicity for drugs previously unknown for this risk.",2013-06-01 +30722615,First Report of Powdery Mildew Caused by Podosphaera xanthii (syn. P. fusca) on Cocklebur in Korea.,"Cocklebur (Xanthium strumarium L., Asteraceae) is an annual broadleaf weed native to the Americas and eastern Asia. The plant is known as one of the worst competitive weeds in soybean fields and also is known to have some phytopharmacological or toxicological properties. In October 2011, a powdery mildew disease was observed on cocklebur growing in a natural landscape at Geomun Oreum located in Jeju Island, South Korea. Initial signs appeared as thin white colonies, which subsequently developed abundant growth on adaxial leaf surfaces. As the disease progressed, brown discoloration extended down infected leaves which withered. Conidia were formed singly and terminally on conidiophores. Primary conidia (20.3 to 28.6 [average 25.1] μm long × 11.1 to 15.2 [14.3] μm wide, n = 30) were ellipsoid with a round apex and truncate base. Conidiophores were straight or slightly curved and 60.1 to 101.7 (97.3) μm long × 8.2 to 13.2 (11.3) μm wide. Chasmothecia were not observed. No fibrosin bodies were observed in the conidia. Morphological characteristics were consistent with descriptions of Podosphaera xanthii (syn. P. fusca) (2,4). To confirm the identity of the causal fungus, the internal transcribed spacer (ITS) region inclusive of 5.8S and 28S rDNA was amplified from white patches consisting of mycelia and conidia on one leaf using ITS1 (5'-TCCGTAGGTGAACCTGCGG-3') and LR5F (5'-GCTATCCTGAGGGAAAC-3'), and LROR (5'-ACCCGCTGAACTTAAGC-3') and LR5F primer sets, respectively. The resulting sequences were deposited in GenBank (Accession Nos. JX502022 and JX964999). A NCBI BLASTn search revealed that the rDNA ITS (JX502022) and 28S (JX964999) homologies of isolate EML-XSPW1 represented 99.6% (512/514) and 100% (803/803) identity values with those of P. xanthii (AB040330 and AB462792, respectively). The rDNA ITS and 28S sequence analysis revealed that the causal fungus clustered with P. xanthii (syn. P. fusca), falling into the Xanthii/Fusca phylogenetic group (2,4). Pathogenicity was confirmed through inoculations made by gently pressing infected leaves onto mature leaves of healthy cocklebur plants in the field in August. The six inoculated leaves were sealed in sterilized vinyl bags to maintain humid conditions for 2 days. Seven days after inoculation, symptoms similar to those observed under natural infection were observed on the inoculated plant leaves. No symptoms developed on the uninoculated control plants. A fungal pathogen that was morphologically identical to the fungus originally observed on diseased plants was also observed on inoculated plants. Erysiphe cichoracearum, E. communis, Oidium asteris-punicei, O. xanthimi, P. xanthii, and P. fuliginea have all been reported to cause powdery mildew on cocklebur (1). P. xanthii was first reported on X. strumarium in Russia (3). To our knowledge, this is the first report of powdery mildew on cocklebur caused by P. xanthii in Korea. The powdery mildew pathogen may represent an option for biocontrol of the noxious weed in the near future. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases , December 11, 2012. (2) H. B. Lee. J. Microbiol. 51:1075, 2012. (3) V. A. Rusanov and T. S. Bulgakov. Mikol. Fitopatol. 42:314, 2008. (4) S. Takamatsu et al. Persoonia 24:38, 2010.",2013-06-01 +24215021,A MATLAB-based tool for accurate detection of perfect overlapping and nested inverted repeats in DNA sequences.,"

Summary

Palindromic sequences, or inverted repeats (IRs), in DNA sequences involve important biological processes such as DNA-protein binding, DNA replication and DNA transposition. Development of bioinformatics tools that are capable of accurately detecting perfect IRs can enable genome-wide studies of IR patterns in both prokaryotes and eukaryotes. Different from conventional string-comparison approaches, we propose a novel algorithm that uses a cumulative score system based on a prime number representation of nucleotide bases. We then implemented this algorithm as a MATLAB-based program for perfect IR detection. In comparison with other existing tools, our program demonstrates a high accuracy in detecting nested and overlapping IRs.

Availability and implementation

The source code is freely available on (http://bioinfolab.miamioh.edu/bioinfolab/palindrome.php)

Contact

liangc@miamioh.edu or karroje@miamioh.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-11-08 +23721660,HIM-herbal ingredients in-vivo metabolism database.,"

Background

Herbal medicine has long been viewed as a valuable asset for potential new drug discovery and herbal ingredients' metabolites, especially the in vivo metabolites were often found to gain better pharmacological, pharmacokinetic and even better safety profiles compared to their parent compounds. However, these herbal metabolite information is still scattered and waiting to be collected.

Description

HIM database manually collected so far the most comprehensive available in-vivo metabolism information for herbal active ingredients, as well as their corresponding bioactivity, organs and/or tissues distribution, toxicity, ADME and the clinical research profile. Currently HIM contains 361 ingredients and 1104 corresponding in-vivo metabolites from 673 reputable herbs. Tools of structural similarity, substructure search and Lipinski's Rule of Five are also provided. Various links were made to PubChem, PubMed, TCM-ID (Traditional Chinese Medicine Information database) and HIT (Herbal ingredients' targets databases).

Conclusions

A curated database HIM is set up for the in vivo metabolites information of the active ingredients for Chinese herbs, together with their corresponding bioactivity, toxicity and ADME profile. HIM is freely accessible to academic researchers at http://www.bioinformatics.org.cn/.",2013-05-31 +23613489,Systematic tracking of dysregulated modules identifies novel genes in cancer.,"

Motivation

Deciphering the modus operandi of dysregulated cellular mechanisms in cancer is critical to implicate novel cancer genes and develop effective anti-cancer therapies. Fundamental to this is meticulous tracking of the behavior of core modules, including complexes and pathways across specific conditions in cancer.

Results

Here, we performed a straightforward yet systematic identification and comparison of modules across pancreatic normal and cancer tissue conditions by integrating PPI, gene-expression and mutation data. Our analysis revealed interesting change-patterns in gene composition and expression correlation particularly affecting modules responsible for genome stability. Although in most cases these changes indicated impairment of essential functions (e.g., of DNA damage repair), in several other cases we noticed strengthening of modules possibly abetting cancer. Some of these compensatory modules showed switches in transcription regulation and recruitment of tumor inducers (e.g., SOX2 through overexpression). In-depth analysis revealed novel genes in pancreatic cancer, which showed susceptibility to copy-number alterations (e.g., for USP15 in 17 of 67 cases), supported by literature evidence for their involvement in other tumors (e.g., USP15 in glioblastoma). Two of the identified genes, YWHAE and DISC1, further supported the nexus between neural genes and pancreatic carcinogenesis. Extension of this assessment to BRCA1 and BRCA2 breast tumors showed specific differences even across the two sub-types and revealed novel genes involved therein (e.g., TRIM5 and NCOA6).

Availability

Our software CONTOURv1 is available at: http://bioinformatics.org.au/tools-data/.",2013-04-23 +23720012,DNA variant databases improve test accuracy and phenotype prediction in Alport syndrome.,"X-linked Alport syndrome is a form of progressive renal failure caused by pathogenic variants in the COL4A5 gene. More than 700 variants have been described and a further 400 are estimated to be known to individual laboratories but are unpublished. The major genetic testing laboratories for X-linked Alport syndrome worldwide have established a Web-based database for published and unpublished COL4A5 variants ( https://grenada.lumc.nl/LOVD2/COL4A/home.php?select_db=COL4A5 ). This conforms with the recommendations of the Human Variome Project: it uses the Leiden Open Variation Database (LOVD) format, describes variants according to the human reference sequence with standardized nomenclature, indicates likely pathogenicity and associated clinical features, and credits the submitting laboratory. The database includes non-pathogenic and recurrent variants, and is linked to another COL4A5 mutation database and relevant bioinformatics sites. Access is free. Increasing the number of COL4A5 variants in the public domain helps patients, diagnostic laboratories, clinicians, and researchers. The database improves the accuracy and efficiency of genetic testing because its variants are already categorized for pathogenicity. The description of further COL4A5 variants and clinical associations will improve our ability to predict phenotype and our understanding of collagen IV biochemistry. The database for X-linked Alport syndrome represents a model for databases in other inherited renal diseases.",2013-05-30 +25618864,Prediction of potential disease-associated microRNAs based on random walk.,"

Motivation

Identifying microRNAs associated with diseases (disease miRNAs) is helpful for exploring the pathogenesis of diseases. Because miRNAs fulfill function via the regulation of their target genes and because the current number of experimentally validated targets is insufficient, some existing methods have inferred potential disease miRNAs based on the predicted targets. It is difficult for these methods to achieve excellent performance due to the high false-positive and false-negative rates for the target prediction results. Alternatively, several methods have constructed a network composed of miRNAs based on their associated diseases and have exploited the information within the network to predict the disease miRNAs. However, these methods have failed to take into account the prior information regarding the network nodes and the respective local topological structures of the different categories of nodes. Therefore, it is essential to develop a method that exploits the more useful information to predict reliable disease miRNA candidates.

Results

miRNAs with similar functions are normally associated with similar diseases and vice versa. Therefore, the functional similarity between a pair of miRNAs is calculated based on their associated diseases to construct a miRNA network. We present a new prediction method based on random walk on the network. For the diseases with some known related miRNAs, the network nodes are divided into labeled nodes and unlabeled nodes, and the transition matrices are established for the two categories of nodes. Furthermore, different categories of nodes have different transition weights. In this way, the prior information of nodes can be completely exploited. Simultaneously, the various ranges of topologies around the different categories of nodes are integrated. In addition, how far the walker can go away from the labeled nodes is controlled by restarting the walking. This is helpful for relieving the negative effect of noisy data. For the diseases without any known related miRNAs, we extend the walking on a miRNA-disease bilayer network. During the prediction process, the similarity between diseases, the similarity between miRNAs, the known miRNA-disease associations and the topology information of the bilayer network are exploited. Moreover, the importance of information from different layers of network is considered. Our method achieves superior performance for 18 human diseases with AUC values ranging from 0.786 to 0.945. Moreover, case studies on breast neoplasms, lung neoplasms, prostatic neoplasms and 32 diseases further confirm the ability of our method to discover potential disease miRNAs.

Availability and implementation

A web service for the prediction and analysis of disease miRNAs is available at http://bioinfolab.stx.hk/midp/.",2015-01-23 +22809386,Using rigidity analysis to probe mutation-induced structural changes in proteins.,"Predicting the effect of a single amino acid substitution on the stability of a protein structure is a fundamental task in macromolecular modeling. It has relevance to drug design and understanding of disease-causing protein variants. We present KINARI-Mutagen, a web server for performing in silico mutation experiments on protein structures from the Protein Data Bank. Our rigidity-theoretical approach permits fast evaluation of the effects of mutations that may not be easy to perform in vitro, because it is not always possible to express a protein with a specific amino acid substitution. We use KINARI-Mutagen to identify critical residues, and we show that our predictions correlate with destabilizing mutations to glycine. In two in-depth case studies we show that the mutated residues identified by KINARI-Mutagen as critical correlate with experimental data, and would not have been identified by other methods such as Solvent Accessible Surface Area measurements or residue ranking by contributions to stabilizing interactions. We also generate 48 mutants for 14 proteins, and compare our rigidity-based results against experimental mutation stability data. KINARI-Mutagen is available at http://kinari.cs.umass.edu.",2012-06-01 +22843986,zCall: a rare variant caller for array-based genotyping: genetics and population analysis.,

Summary

zCall is a variant caller specifically designed for calling rare single-nucleotide polymorphisms from array-based technology. This caller is implemented as a post-processing step after a default calling algorithm has been applied. The algorithm uses the intensity profile of the common allele homozygote cluster to define the location of the other two genotype clusters. We demonstrate improved detection of rare alleles when applying zCall to samples that have both Illumina Infinium HumanExome BeadChip and exome sequencing data available.

Availability

http://atguweb.mgh.harvard.edu/apps/zcall.

Contact

bneale@broadinstitute.org

Supplementary information

Supplementary data are available at Bioinformatics online.,2012-07-27 +24893587,Genomic V exons from whole genome shotgun data in reptiles.,"Reptiles and mammals diverged over 300 million years ago, creating two parallel evolutionary lineages amongst terrestrial vertebrates. In reptiles, two main evolutionary lines emerged: one gave rise to Squamata, while the other gave rise to Testudines, Crocodylia, and Aves. In this study, we determined the genomic variable (V) exons from whole genome shotgun sequencing (WGS) data in reptiles corresponding to the three main immunoglobulin (IG) loci and the four main T cell receptor (TR) loci. We show that Squamata lack the TRG and TRD genes, and snakes lack the IGKV genes. In representative species of Testudines and Crocodylia, the seven major IG and TR loci are maintained. As in mammals, genes of the IG loci can be grouped into well-defined IMGT clans through a multi-species phylogenetic analysis. We show that the reptilian IGHV and IGLV genes are distributed amongst the established mammalian clans, while their IGKV genes are found within a single clan, nearly exclusive from the mammalian sequences. The reptilian and mammalian TRAV genes cluster into six common evolutionary clades (since IMGT clans have not been defined for TR). In contrast, the reptilian TRBV genes cluster into three clades, which have few mammalian members. In this locus, the V exon sequences from mammals appear to have undergone different evolutionary diversification processes that occurred outside these shared reptilian clans. These sequences can be obtained in a freely available public repository (http://vgenerepertoire.org).",2014-06-04 +23720490,Drug-target interaction prediction through domain-tuned network-based inference.,"

Motivation

The identification of drug-target interaction (DTI) represents a costly and time-consuming step in drug discovery and design. Computational methods capable of predicting reliable DTI play an important role in the field. Recently, recommendation methods relying on network-based inference (NBI) have been proposed. However, such approaches implement naive topology-based inference and do not take into account important features within the drug-target domain.

Results

In this article, we present a new NBI method, called domain tuned-hybrid (DT-Hybrid), which extends a well-established recommendation technique by domain-based knowledge including drug and target similarity. DT-Hybrid has been extensively tested using the last version of an experimentally validated DTI database obtained from DrugBank. Comparison with other recently proposed NBI methods clearly shows that DT-Hybrid is capable of predicting more reliable DTIs.

Availability

DT-Hybrid has been developed in R and it is available, along with all the results on the predictions, through an R package at the following URL: http://sites.google.com/site/ehybridalgo/.",2013-05-29 +24400097,Sebnif: an integrated bioinformatics pipeline for the identification of novel large intergenic noncoding RNAs (lincRNAs)--application in human skeletal muscle cells.,"Ab initio assembly of transcriptome sequencing data has been widely used to identify large intergenic non-coding RNAs (lincRNAs), a novel class of gene regulators involved in many biological processes. To differentiate real lincRNA transcripts from thousands of assembly artifacts, a series of filtering steps such as filters of transcript length, expression level and coding potential, need to be applied. However, an easy-to-use and publicly available bioinformatics pipeline that integrates these filters is not yet available. Hence, we implemented sebnif, an integrative bioinformatics pipeline to facilitate the discovery of bona fide novel lincRNAs that are suitable for further functional characterization. Specifically, sebnif is the only pipeline that implements an algorithm for identifying high-quality single-exonic lincRNAs that were often omitted in many studies. To demonstrate the usage of sebnif, we applied it on a real biological RNA-seq dataset from Human Skeletal Muscle Cells (HSkMC) and built a novel lincRNA catalog containing 917 highly reliable lincRNAs. Sebnif is available at http://sunlab.lihs.cuhk.edu.hk/sebnif/.",2014-01-06 +22963430,Increased numbers of P63-positive/CD117-positive cells in advanced adenoid cystic carcinoma give a poorer prognosis.,"

Objectives

This study consisted of two parts. One part was to analyze the survival rates of adenoid cystic carcinoma (ACC) in Chinese and explain the difference between our data and the literature. The other was to analyze the relationship between the expression of CD117 and the histological grade and the prognosis.

Methods

A retrospective study of 80 ACC patients was performed. Clinical data were collected, and p63, CD117 were detected by immunohistochemical staining.

Results

Eighty patients received follow-ups 3 to 216 months after initial diagnosis. ACC occurred in the lacrimal gland (26.3%, n = 21), nasal cavity and parasinus (33.8%, n = 27) and other sites (40.0%, n = 33). The 5-year and 10-year survival rates were 66.41% and 10.16%, respectively. Over expression of CD117 was detected in p63-negative cells in 94.3% of cases and in p63-positive cells in 45.8%. The expression of CD117 in p63-positive cells was significantly associated with the histological grade (P<0.001) and prognosis (P = 0.037) in patients in the advanced stage.

Conclusions

ACC had a good 5-year survival but poor 10-year survival in Chinese, which differed from the occidental data. More p63+/CD117+ cells were associated with a higher histological grade and poorer outcome.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1701457278762097.",2012-09-10 +23853063,Exhaustively characterizing feasible logic models of a signaling network using Answer Set Programming.,"

Motivation

Logic modeling is a useful tool to study signal transduction across multiple pathways. Logic models can be generated by training a network containing the prior knowledge to phospho-proteomics data. The training can be performed using stochastic optimization procedures, but these are unable to guarantee a global optima or to report the complete family of feasible models. This, however, is essential to provide precise insight in the mechanisms underlaying signal transduction and generate reliable predictions.

Results

We propose the use of Answer Set Programming to explore exhaustively the space of feasible logic models. Toward this end, we have developed caspo, an open-source Python package that provides a powerful platform to learn and characterize logic models by leveraging the rich modeling language and solving technologies of Answer Set Programming. We illustrate the usefulness of caspo by revisiting a model of pro-growth and inflammatory pathways in liver cells. We show that, if experimental error is taken into account, there are thousands (11 700) of models compatible with the data. Despite the large number, we can extract structural features from the models, such as links that are always (or never) present or modules that appear in a mutual exclusive fashion. To further characterize this family of models, we investigate the input-output behavior of the models. We find 91 behaviors across the 11 700 models and we suggest new experiments to discriminate among them. Our results underscore the importance of characterizing in a global and exhaustive manner the family of feasible models, with important implications for experimental design.

Availability

caspo is freely available for download (license GPLv3) and as a web service at http://caspo.genouest.org/.

Supplementary information

Supplementary materials are available at Bioinformatics online.

Contact

santiago.videla@irisa.fr.",2013-07-12 +25028721,Modeling time-dependent transcription effects of HER2 oncogene and discovery of a role for E2F2 in breast cancer cell-matrix adhesion.,"

Motivation

Oncogenes are known drivers of cancer phenotypes and targets of molecular therapies; however, the complex and diverse signaling mechanisms regulated by oncogenes and potential routes to targeted therapy resistance remain to be fully understood. To this end, we present an approach to infer regulatory mechanisms downstream of the HER2 driver oncogene in SUM-225 metastatic breast cancer cells from dynamic gene expression patterns using a succession of analytical techniques, including a novel MP grammars method to mathematically model putative regulatory interactions among sets of clustered genes.

Results

Our method highlighted regulatory interactions previously identified in the cell line and a novel finding that the HER2 oncogene, as opposed to the proto-oncogene, upregulates expression of the E2F2 transcription factor. By targeted gene knockdown we show the significance of this, demonstrating that cancer cell-matrix adhesion and outgrowth were markedly inhibited when E2F2 levels were reduced. Thus, validating in this context that upregulation of E2F2 represents a key intermediate event in a HER2 oncogene-directed gene expression-based signaling circuit. This work demonstrates how predictive modeling of longitudinal gene expression data combined with multiple systems-level analyses can be used to accurately predict downstream signaling pathways. Here, our integrated method was applied to reveal insights as to how the HER2 oncogene drives a specific cancer cell phenotype, but it is adaptable to investigate other oncogenes and model systems.

Availability and implementation

Accessibility of various tools is listed in methods; the Log-Gain Stoichiometric Stepwise algorithm is accessible at http://www.cbmc.it/software/Software.php.",2014-07-15 +21249531,The DIADEM data sets: representative light microscopy images of neuronal morphology to advance automation of digital reconstructions.,"The comprehensive characterization of neuronal morphology requires tracing extensive axonal and dendritic arbors imaged with light microscopy into digital reconstructions. Considerable effort is ongoing to automate this greatly labor-intensive and currently rate-determining process. Experimental data in the form of manually traced digital reconstructions and corresponding image stacks play a vital role in developing increasingly more powerful reconstruction algorithms. The DIADEM challenge (short for DIgital reconstruction of Axonal and DEndritic Morphology) successfully stimulated progress in this area by utilizing six data set collections from different animal species, brain regions, neuron types, and visualization methods. The original research projects that provided these data are representative of the diverse scientific questions addressed in this field. At the same time, these data provide a benchmark for the types of demands automated software must meet to achieve the quality of manual reconstructions while minimizing human involvement. The DIADEM data underwent extensive curation, including quality control, metadata annotation, and format standardization, to focus the challenge on the most substantial technical obstacles. This data set package is now freely released ( http://diademchallenge.org ) to train, test, and aid development of automated reconstruction algorithms.",2011-09-01 +22207815,Engaging the broader community in biodiversity research: the concept of the COMBER pilot project for divers in ViBRANT.,"This paper discusses the design and implementation of a citizen science pilot project, COMBER (Citizens' Network for the Observation of Marine BiodivERsity, http://www.comber.hcmr.gr), which has been initiated under the ViBRANT EU e-infrastructure. It is designed and implemented for divers and snorkelers who are interested in participating in marine biodiversity citizen science projects. It shows the necessity of engaging the broader community in the marine biodiversity monitoring and research projects, networks and initiatives. It analyses the stakeholders, the industry and the relevant markets involved in diving activities and their potential to sustain these activities. The principles, including data policy and rewards for the participating divers through their own data, upon which this project is based are thoroughly discussed. The results of the users analysis and lessons learned so far are presented. Future plans include promotion, links with citizen science web developments, data publishing tools, and development of new scientific hypotheses to be tested by the data collected so far.",2011-11-28 +22954633,Mendel-GPU: haplotyping and genotype imputation on graphics processing units.,"

Motivation

In modern sequencing studies, one can improve the confidence of genotype calls by phasing haplotypes using information from an external reference panel of fully typed unrelated individuals. However, the computational demands are so high that they prohibit researchers with limited computational resources from haplotyping large-scale sequence data.

Results

Our graphics processing unit based software delivers haplotyping and imputation accuracies comparable to competing programs at a fraction of the computational cost and peak memory demand.

Availability

Mendel-GPU, our OpenCL software, runs on Linux platforms and is portable across AMD and nVidia GPUs. Users can download both code and documentation at http://code.google.com/p/mendel-gpu/.

Contact

gary.k.chen@usc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-05 +22453911,Protein interaction data curation: the International Molecular Exchange (IMEx) consortium.,"The International Molecular Exchange (IMEx) consortium is an international collaboration between major public interaction data providers to share literature-curation efforts and make a nonredundant set of protein interactions available in a single search interface on a common website (http://www.imexconsortium.org/). Common curation rules have been developed, and a central registry is used to manage the selection of articles to enter into the dataset. We discuss the advantages of such a service to the user, our quality-control measures and our data-distribution practices.",2012-04-01 +21984758,Sparse distance-based learning for simultaneous multiclass classification and feature selection of metagenomic data.,"

Motivation

Direct sequencing of microbes in human ecosystems (the human microbiome) has complemented single genome cultivation and sequencing to understand and explore the impact of commensal microbes on human health. As sequencing technologies improve and costs decline, the sophistication of data has outgrown available computational methods. While several existing machine learning methods have been adapted for analyzing microbiome data recently, there is not yet an efficient and dedicated algorithm available for multiclass classification of human microbiota.

Results

By combining instance-based and model-based learning, we propose a novel sparse distance-based learning method for simultaneous class prediction and feature (variable or taxa, which is used interchangeably) selection from multiple treatment populations on the basis of 16S rRNA sequence count data. Our proposed method simultaneously minimizes the intraclass distance and maximizes the interclass distance with many fewer estimated parameters than other methods. It is very efficient for problems with small sample sizes and unbalanced classes, which are common in metagenomic studies. We implemented this method in a MATLAB toolbox called MetaDistance. We also propose several approaches for data normalization and variance stabilization transformation in MetaDistance. We validate this method on several real and simulated 16S rRNA datasets to show that it outperforms existing methods for classifying metagenomic data. This article is the first to address simultaneous multifeature selection and class prediction with metagenomic count data.

Availability

The MATLAB toolbox is freely available online at http://metadistance.igs.umaryland.edu/.

Contact

zliu@umm.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-07 +24931991,An efficient parallel algorithm for accelerating computational protein design.,"

Motivation

Structure-based computational protein design (SCPR) is an important topic in protein engineering. Under the assumption of a rigid backbone and a finite set of discrete conformations of side-chains, various methods have been proposed to address this problem. A popular method is to combine the dead-end elimination (DEE) and A* tree search algorithms, which provably finds the global minimum energy conformation (GMEC) solution.

Results

In this article, we improve the efficiency of computing A* heuristic functions for protein design and propose a variant of A* algorithm in which the search process can be performed on a single GPU in a massively parallel fashion. In addition, we make some efforts to address the memory exceeding problem in A* search. As a result, our enhancements can achieve a significant speedup of the A*-based protein design algorithm by four orders of magnitude on large-scale test data through pre-computation and parallelization, while still maintaining an acceptable memory overhead. We also show that our parallel A* search algorithm could be successfully combined with iMinDEE, a state-of-the-art DEE criterion, for rotamer pruning to further improve SCPR with the consideration of continuous side-chain flexibility.

Availability

Our software is available and distributed open-source under the GNU Lesser General License Version 2.1 (GNU, February 1999). The source code can be downloaded from http://www.cs.duke.edu/donaldlab/osprey.php or http://iiis.tsinghua.edu.cn/∼compbio/software.html.",2014-06-01 +23095127,Bag of Naïve Bayes: biomarker selection and classification from genome-wide SNP data.,"

Background

Multifactorial diseases arise from complex patterns of interaction between a set of genetic traits and the environment. To fully capture the genetic biomarkers that jointly explain the heritability component of a disease, thus, all SNPs from a genome-wide association study should be analyzed simultaneously.

Results

In this paper, we present Bag of Naïve Bayes (BoNB), an algorithm for genetic biomarker selection and subjects classification from the simultaneous analysis of genome-wide SNP data. BoNB is based on the Naïve Bayes classification framework, enriched by three main features: bootstrap aggregating of an ensemble of Naïve Bayes classifiers, a novel strategy for ranking and selecting the attributes used by each classifier in the ensemble and a permutation-based procedure for selecting significant biomarkers, based on their marginal utility in the classification process. BoNB is tested on the Wellcome Trust Case-Control study on Type 1 Diabetes and its performance is compared with the ones of both a standard Naïve Bayes algorithm and HyperLASSO, a penalized logistic regression algorithm from the state-of-the-art in simultaneous genome-wide data analysis.

Conclusions

The significantly higher classification accuracy obtained by BoNB, together with the significance of the biomarkers identified from the Type 1 Diabetes dataset, prove the effectiveness of BoNB as an algorithm for both classification and biomarker selection from genome-wide SNP data.

Availability

Source code of the BoNB algorithm is released under the GNU General Public Licence and is available at http://www.dei.unipd.it/~sambofra/bonb.html.",2012-09-07 +24300438,ISRNA: an integrative online toolkit for short reads from high-throughput sequencing data.,"

Unlabelled

Integrative Short Reads NAvigator (ISRNA) is an online toolkit for analyzing high-throughput small RNA sequencing data. Besides the high-speed genome mapping function, ISRNA provides statistics for genomic location, length distribution and nucleotide composition bias analysis of sequence reads. Number of reads mapped to known microRNAs and other classes of short non-coding RNAs, coverage of short reads on genes, expression abundance of sequence reads as well as some other analysis functions are also supported. The versatile search functions enable users to select sequence reads according to their sub-sequences, expression abundance, genomic location, relationship to genes, etc. A specialized genome browser is integrated to visualize the genomic distribution of short reads. ISRNA also supports management and comparison among multiple datasets.

Availability

ISRNA is implemented in Java/C++/Perl/MySQL and can be freely accessed at http://omicslab.genetics.ac.cn/ISRNA/.",2013-12-03 +24863641,A novel tool to detect behavioural symptoms in ALS.,"There is need for a valid, sensitive and short instrument capable of detecting and quantifying behavioural changes in ALS, which can be utilized in clinical and research settings. This study aimed to 1) develop and validate such an instrument; 2) verify the most common behavioural symptoms; and 3) investigate longitudinal changes over a six-month period. Two hundred and nineteen patients were included. The development sample (n = 140) was used to determine the most appropriate items to include in the new tool, the Motor Neuron Disease Behavioural Instrument (MiND-B) * , via a data-driven approach. An independent sample (n = 79) validated the tool. A more comprehensive sample (n = 50, sub-classified into ALS and ALS plus) was utilized to verify if the MiND-B could detect ALS plus patients. Finally, 20 ALS patients completed the MiND-B after a six-month period. Apathy, disinhibition and stereotypical behaviour were all found to be very common symptoms in ALS occurring in 75%, 66% and 58%, respectively, of cases. Notably, the MiND-B could identify ALS plus patients without standard cognitive assessments. In conclusion, the MiND-B tool can detect patients with ALS plus reliably, by means of questions to the informant. This test could enable ALS centres to evaluate non-motor symptoms and adapt management and decision-making approaches as necessary. *only available in the online version of the journal. Please find this material with the following direct link to the article: http://www.informahealthcare.com/(DOI: 10.3109/21678421.2014.896927).",2014-06-01 +24564704,Random forests on Hadoop for genome-wide association studies of multivariate neuroimaging phenotypes.,"

Motivation

Multivariate quantitative traits arise naturally in recent neuroimaging genetics studies, in which both structural and functional variability of the human brain is measured non-invasively through techniques such as magnetic resonance imaging (MRI). There is growing interest in detecting genetic variants associated with such multivariate traits, especially in genome-wide studies. Random forests (RFs) classifiers, which are ensembles of decision trees, are amongst the best performing machine learning algorithms and have been successfully employed for the prioritisation of genetic variants in case-control studies. RFs can also be applied to produce gene rankings in association studies with multivariate quantitative traits, and to estimate genetic similarities measures that are predictive of the trait. However, in studies involving hundreds of thousands of SNPs and high-dimensional traits, a very large ensemble of trees must be inferred from the data in order to obtain reliable rankings, which makes the application of these algorithms computationally prohibitive.

Results

We have developed a parallel version of the RF algorithm for regression and genetic similarity learning tasks in large-scale population genetic association studies involving multivariate traits, called PaRFR (Parallel Random Forest Regression). Our implementation takes advantage of the MapReduce programming model and is deployed on Hadoop, an open-source software framework that supports data-intensive distributed applications. Notable speed-ups are obtained by introducing a distance-based criterion for node splitting in the tree estimation process. PaRFR has been applied to a genome-wide association study on Alzheimer's disease (AD) in which the quantitative trait consists of a high-dimensional neuroimaging phenotype describing longitudinal changes in the human brain structure. PaRFR provides a ranking of SNPs associated to this trait, and produces pair-wise measures of genetic proximity that can be directly compared to pair-wise measures of phenotypic proximity. Several known AD-related variants have been identified, including APOE4 and TOMM40. We also present experimental evidence supporting the hypothesis of a linear relationship between the number of top-ranked mutated states, or frequent mutation patterns, and an indicator of disease severity.

Availability

The Java codes are freely available at http://www2.imperial.ac.uk/~gmontana.",2013-10-22 +23369266,CpGIMethPred: computational model for predicting methylation status of CpG islands in human genome.,"DNA methylation is an inheritable chemical modification of cytosine, and represents one of the most important epigenetic events. Computational prediction of the DNA methylation status can be employed to speed up the genome-wide methylation profiling, and to identify the key features that are correlated with various methylation patterns. Here, we develop CpGIMethPred, the support vector machine-based models to predict the methylation status of the CpG islands in the human genome under normal conditions. The features for prediction include those that have been previously demonstrated effective (CpG island specific attributes, DNA sequence composition patterns, DNA structure patterns, distribution patterns of conserved transcription factor binding sites and conserved elements, and histone methylation status) as well as those that have not been extensively explored but are likely to contribute additional information from a biological point of view (nucleosome positioning propensities, gene functions, and histone acetylation status). Statistical tests are performed to identify the features that are significantly correlated with the methylation status of the CpG islands, and principal component analysis is then performed to decorrelate the selected features. Data from the Human Epigenome Project (HEP) are used to train, validate and test the predictive models. Specifically, the models are trained and validated by using the DNA methylation data obtained in the CD4 lymphocytes, and are then tested for generalizability using the DNA methylation data obtained in the other 11 normal tissues and cell types. Our experiments have shown that (1) an eight-dimensional feature space that is selected via the principal component analysis and that combines all categories of information is effective for predicting the CpG island methylation status, (2) by incorporating the information regarding the nucleosome positioning, gene functions, and histone acetylation, the models can achieve higher specificity and accuracy than the existing models while maintaining a comparable sensitivity measure, (3) the histone modification (methylation and acetylation) information contributes significantly to the prediction, without which the performance of the models deteriorate, and, (4) the predictive models generalize well to different tissues and cell types. The developed program CpGIMethPred is freely available at http://users.ece.gatech.edu/~hzheng7/CGIMetPred.zip.",2013-01-23 +24742008,Goodness-of-Fit Tests and Nonparametric Adaptive Estimation for Spike Train Analysis.,"When dealing with classical spike train analysis, the practitioner often performs goodness-of-fit tests to test whether the observed process is a Poisson process, for instance, or if it obeys another type of probabilistic model (Yana et al. in Biophys. J. 46(3):323-330, 1984; Brown et al. in Neural Comput. 14(2):325-346, 2002; Pouzat and Chaffiol in Technical report, http://arxiv.org/abs/arXiv:0909.2785, 2009). In doing so, there is a fundamental plug-in step, where the parameters of the supposed underlying model are estimated. The aim of this article is to show that plug-in has sometimes very undesirable effects. We propose a new method based on subsampling to deal with those plug-in issues in the case of the Kolmogorov-Smirnov test of uniformity. The method relies on the plug-in of good estimates of the underlying model that have to be consistent with a controlled rate of convergence. Some nonparametric estimates satisfying those constraints in the Poisson or in the Hawkes framework are highlighted. Moreover, they share adaptive properties that are useful from a practical point of view. We show the performance of those methods on simulated data. We also provide a complete analysis with these tools on single unit activity recorded on a monkey during a sensory-motor task.Electronic Supplementary MaterialThe online version of this article (doi:10.1186/2190-8567-4-3) contains supplementary material.",2014-04-17 +22843982,Estimation of pleiotropy between complex diseases using single-nucleotide polymorphism-derived genomic relationships and restricted maximum likelihood.,"

Summary

Genetic correlations are the genome-wide aggregate effects of causal variants affecting multiple traits. Traditionally, genetic correlations between complex traits are estimated from pedigree studies, but such estimates can be confounded by shared environmental factors. Moreover, for diseases, low prevalence rates imply that even if the true genetic correlation between disorders was high, co-aggregation of disorders in families might not occur or could not be distinguished from chance. We have developed and implemented statistical methods based on linear mixed models to obtain unbiased estimates of the genetic correlation between pairs of quantitative traits or pairs of binary traits of complex diseases using population-based case-control studies with genome-wide single-nucleotide polymorphism data. The method is validated in a simulation study and applied to estimate genetic correlation between various diseases from Wellcome Trust Case Control Consortium data in a series of bivariate analyses. We estimate a significant positive genetic correlation between risk of Type 2 diabetes and hypertension of ~0.31 (SE 0.14, P = 0.024).

Availability

Our methods, appropriate for both quantitative and binary traits, are implemented in the freely available software GCTA (http://www.complextraitgenomics.com/software/gcta/reml_bivar.html).

Contact

hong.lee@uq.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-26 +22923290,Positive-unlabeled learning for disease gene identification.,"

Background

Identifying disease genes from human genome is an important but challenging task in biomedical research. Machine learning methods can be applied to discover new disease genes based on the known ones. Existing machine learning methods typically use the known disease genes as the positive training set P and the unknown genes as the negative training set N (non-disease gene set does not exist) to build classifiers to identify new disease genes from the unknown genes. However, such kind of classifiers is actually built from a noisy negative set N as there can be unknown disease genes in N itself. As a result, the classifiers do not perform as well as they could be.

Result

Instead of treating the unknown genes as negative examples in N, we treat them as an unlabeled set U. We design a novel positive-unlabeled (PU) learning algorithm PUDI (PU learning for disease gene identification) to build a classifier using P and U. We first partition U into four sets, namely, reliable negative set RN, likely positive set LP, likely negative set LN and weak negative set WN. The weighted support vector machines are then used to build a multi-level classifier based on the four training sets and positive training set P to identify disease genes. Our experimental results demonstrate that our proposed PUDI algorithm outperformed the existing methods significantly.

Conclusion

The proposed PUDI algorithm is able to identify disease genes more accurately by treating the unknown data more appropriately as unlabeled set U instead of negative set N. Given that many machine learning problems in biomedical research do involve positive and unlabeled data instead of negative data, it is possible that the machine learning methods for these problems can be further improved by adopting PU learning methods, as we have done here for disease gene identification.

Availability and implementation

The executable program and data are available at http://www1.i2r.a-star.edu.sg/~xlli/PUDI/PUDI.html.",2012-08-24 +21936510,SERAPhiC: a benchmark for in silico fragment-based drug design.,"Our main objective was to compile a data set of high-quality protein-fragment complexes and make it publicly available. Once assembled, the data set was challenged using docking procedures to address the following questions: (i) Can molecular docking correctly reproduce the experimentally solved structures? (ii) How thorough must the sampling be to replicate the experimental data? (iii) Can commonly used scoring functions discriminate between the native pose and other energy minima? The data set, named SERAPhiC (Selected Fragment Protein Complexes), is publicly available in a ready-to-dock format ( http://www.iit.it/en/drug-discovery-and-development/seraphic.html ). It offers computational medicinal chemists a reliable test set for both in silico protocol assessment and software development.",2011-10-12 +23462073,FDA-iRISK--a comparative risk assessment system for evaluating and ranking food-hazard pairs: case studies on microbial hazards.,"Stakeholders in the system of food safety, in particular federal agencies, need evidence-based, transparent, and rigorous approaches to estimate and compare the risk of foodborne illness from microbial and chemical hazards and the public health impact of interventions. FDA-iRISK (referred to here as iRISK), a Web-based quantitative risk assessment system, was developed to meet this need. The modeling tool enables users to assess, compare, and rank the risks posed by multiple food-hazard pairs at all stages of the food supply system, from primary production, through manufacturing and processing, to retail distribution and, ultimately, to the consumer. Using standard data entry templates, built-in mathematical functions, and Monte Carlo simulation techniques, iRISK integrates data and assumptions from seven components: the food, the hazard, the population of consumers, process models describing the introduction and fate of the hazard up to the point of consumption, consumption patterns, dose-response curves, and health effects. Beyond risk ranking, iRISK enables users to estimate and compare the impact of interventions and control measures on public health risk. iRISK provides estimates of the impact of proposed interventions in various ways, including changes in the mean risk of illness and burden of disease metrics, such as losses in disability-adjusted life years. Case studies for Listeria monocytogenes and Salmonella were developed to demonstrate the application of iRISK for the estimation of risks and the impact of interventions for microbial hazards. iRISK was made available to the public at http://irisk.foodrisk.org in October 2012.",2013-03-01 +24470570,Parseq: reconstruction of microbial transcription landscape from RNA-Seq read counts using state-space models.,"

Motivation

The most common RNA-Seq strategy consists of random shearing, amplification and high-throughput sequencing of the RNA fraction. Methods to analyze transcription level variations along the genome from the read count profiles generated by the RNA-Seq protocol are needed.

Results

We developed a statistical approach to estimate the local transcription levels and to identify transcript borders. This transcriptional landscape reconstruction relies on a state-space model to describe transcription level variations in terms of abrupt shifts and more progressive drifts. A new emission model is introduced to capture not only the read count variance inside a transcript but also its short-range autocorrelation and the fraction of positions with zero counts. The estimation relies on a particle Gibbs algorithm whose running time makes it more suited to microbial genomes. The approach outperformed read-overlapping strategies on synthetic and real microbial datasets.

Availability

A program named Parseq is available at: http://www.lgm.upmc.fr/parseq/.

Contact

bodgan.mirauta@upmc.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-01-27 +25611331,Distribution and frequencies of post-transcriptional modifications in tRNAs.,"Functional tRNA molecules always contain a wide variety of post-transcriptionally modified nucleosides. These modifications stabilize tRNA structure, allow for proper interaction with other macromolecules and fine-tune the decoding of mRNAs during translation. Their presence in functionally important regions of tRNA is conserved in all domains of life. However, the identities of many of these modified residues depend much on the phylogeny of organisms the tRNAs are found in, attesting for domain-specific strategies of tRNA maturation. In this work we present a new tool, tRNAmodviz web server (http://genesilico.pl/trnamodviz) for easy comparative analysis and visualization of modification patterns in individual tRNAs, as well as in groups of selected tRNA sequences. We also present results of comparative analysis of tRNA sequences derived from 7 phylogenetically distinct groups of organisms: Gram-negative bacteria, Gram-positive bacteria, cytosol of eukaryotic single cell organisms, Fungi and Metazoa, cytosol of Viridiplantae, mitochondria, plastids and Euryarchaeota. These data update the study conducted 20 y ago with the tRNA sequences available at that time.",2014-01-01 +23717556,HGPGD: the human gene population genetic difference database.,"Demographic events such as migration, and evolutionary events like mutation and recombination, have contributed to the genetic variations that are found in the human genome. During the evolution and differentiation of human populations, different functional genes and pathways (a group of genes that act together to perform specific biological tasks) would have displayed different degrees of genetic diversity or evolutionary conservatism. To query the genetic differences of functional genes or pathways in populations, we have developed the human gene population genetic difference (HGPGD) database. Currently, 11 common population genetic features, 18,158 single human genes, 220 KEGG (Kyoto Encyclopedia of Genes and Genomes) human pathways and 4,639 Gene Ontology (GO) categories (3,269 in biological process; 862 in molecular function; and 508 in cellular component) are available in the HGPGD database. The 11 population genetic features are related mainly to three aspects: allele frequency, linkage disequilibrium pattern, and transferability of tagSNPs. By entering a list of Gene IDs, KEGG pathway IDs or GO category IDs and selecting a population genetic feature, users can search the genetic differences between pairwise HapMap populations. We hope that, when the researchers carry out gene-based, KEGG pathway-based or GO category-based research, they can take full account of the genetic differences between populations. The HGPGD database (V1.0) is available at http://www.bioapp.org/hgpgd.",2013-05-22 +23703206,PubTator: a web-based text mining tool for assisting biocuration.,"Manually curating knowledge from biomedical literature into structured databases is highly expensive and time-consuming, making it difficult to keep pace with the rapid growth of the literature. There is therefore a pressing need to assist biocuration with automated text mining tools. Here, we describe PubTator, a web-based system for assisting biocuration. PubTator is different from the few existing tools by featuring a PubMed-like interface, which many biocurators find familiar, and being equipped with multiple challenge-winning text mining algorithms to ensure the quality of its automatic results. Through a formal evaluation with two external user groups, PubTator was shown to be capable of improving both the efficiency and accuracy of manual curation. PubTator is publicly available at http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator/.",2013-05-22 +22955990,Sequence features and chromatin structure around the genomic regions bound by 119 human transcription factors.,"Chromatin immunoprecipitation coupled with high-throughput sequencing (ChIP-seq) has become the dominant technique for mapping transcription factor (TF) binding regions genome-wide. We performed an integrative analysis centered around 457 ChIP-seq data sets on 119 human TFs generated by the ENCODE Consortium. We identified highly enriched sequence motifs in most data sets, revealing new motifs and validating known ones. The motif sites (TF binding sites) are highly conserved evolutionarily and show distinct footprints upon DNase I digestion. We frequently detected secondary motifs in addition to the canonical motifs of the TFs, indicating tethered binding and cobinding between multiple TFs. We observed significant position and orientation preferences between many cobinding TFs. Genes specifically expressed in a cell line are often associated with a greater occurrence of nearby TF binding in that cell line. We observed cell-line-specific secondary motifs that mediate the binding of the histone deacetylase HDAC2 and the enhancer-binding protein EP300. TF binding sites are located in GC-rich, nucleosome-depleted, and DNase I sensitive regions, flanked by well-positioned nucleosomes, and many of these features show cell type specificity. The GC-richness may be beneficial for regulating TF binding because, when unoccupied by a TF, these regions are occupied by nucleosomes in vivo. We present the results of our analysis in a TF-centric web repository Factorbook (http://factorbook.org) and will continually update this repository as more ENCODE data are generated.",2012-09-01 +24200767,A urologic oncology roundtable discussion: how to choose among the available therapies for the treatment of castration-resistant prostate cancer.,"Results from a recent survey of 100 urologists and 100 oncologists who treat patients with castration-resistant prostate cancer (CRPC) identified a lack of physician confidence in choosing among the variety of new anticancer therapies available, and incorporating these therapies into their clinical decision-making process. In response to a survey conducted by Urologic Oncology, a physician roundtable discussion was convened and this companion summary article created to provide a knowledge-based perspective for optimizing CRPC treatment and improving communication between urologists and oncologists (http://prostatecancer.urologiconcology.org/). The participating experts described the importance of a documented testosterone level, despite androgen-deprivation therapy, and an increase in prostate-specific antigen level when diagnosing patients with CRPC. Recently published data and personal clinical experience in CRPC management using approved chemotherapeutics, immunotherapies, and oral agents were discussed, as were management of bone metastases and the overall survival improvement in patients undergoing treatment.",2013-11-01 +23707439,Adjuvant and salvage radiotherapy after prostatectomy: AUA/ASTRO Guideline.,"

Purpose

The purpose of this guideline is to provide a clinical framework for the use of radiotherapy after radical prostatectomy as adjuvant or salvage therapy.

Materials and methods

A systematic literature review using the PubMed®, Embase, and Cochrane databases was conducted to identify peer-reviewed publications relevant to the use of radiotherapy after prostatectomy. The review yielded 294 articles; these publications were used to create the evidence-based guideline statements. Additional guidance is provided as Clinical Principles when insufficient evidence existed.

Results

Guideline statements are provided for patient counseling, the use of radiotherapy in the adjuvant and salvage contexts, defining biochemical recurrence, and conducting a re-staging evaluation.

Conclusions

Physicians should offer adjuvant radiotherapy to patients with adverse pathologic findings at prostatectomy (i.e., seminal vesicle invasion, positive surgical margins, extraprostatic extension) and should offer salvage radiotherapy to patients with prostatic specific antigen or local recurrence after prostatectomy in whom there is no evidence of distant metastatic disease. The offer of radiotherapy should be made in the context of a thoughtful discussion of possible short- and long-term side effects of radiotherapy as well as the potential benefits of preventing recurrence. The decision to administer radiotherapy should be made by the patient and the multi-disciplinary treatment team with full consideration of the patient's history, values, preferences, quality of life, and functional status. Please visit the ASTRO and AUA websites (http://www.redjournal.org/webfiles/images/journals/rob/RAP%20Guideline.pdf and http://www.auanet.org/education/guidelines/radiation-after-prostatectomy.cfm) to view this guideline in its entirety, including the full literature review.",2013-05-21 +23698860,TiPs: a database of therapeutic targets in pathogens and associated tools.,"

Motivation

The need for new drugs and new targets is particularly compelling in an era that is witnessing an alarming increase of drug resistance in human pathogens. The identification of new targets of known drugs is a promising approach, which has proven successful in several cases. Here, we describe a database that includes information on 5153 putative drug-target pairs for 150 human pathogens derived from available drug-target crystallographic complexes.

Availability and implementation

The TiPs database is freely available at http://biocomputing.it/tips.

Contact

anna.tramontano@uniroma1.it or allegra.via@uniroma1.it.",2013-05-21 +23489488,Application of fast Fourier transform cross-correlation and mass spectrometry data for accurate alignment of chromatograms.,"Chromatography has been established as one of the most important analytical methods in the modern analytical laboratory. However, preprocessing of the chromatograms, especially peak alignment, is usually a time-consuming task prior to extracting useful information from the datasets because of the small unavoidable differences in the experimental conditions caused by minor changes and drift. Most of the alignment algorithms are performed on reduced datasets using only the detected peaks in the chromatograms, which means a loss of data and introduces the problem of extraction of peak data from the chromatographic profiles. These disadvantages can be overcome by using the full chromatographic information that is generated from hyphenated chromatographic instruments. A new alignment algorithm called CAMS (Chromatogram Alignment via Mass Spectra) is present here to correct the retention time shifts among chromatograms accurately and rapidly. In this report, peaks of each chromatogram were detected based on Continuous Wavelet Transform (CWT) with Haar wavelet and were aligned against the reference chromatogram via the correlation of mass spectra. The aligning procedure was accelerated by Fast Fourier Transform cross correlation (FFT cross correlation). This approach has been compared with several well-known alignment methods on real chromatographic datasets, which demonstrates that CAMS can preserve the shape of peaks and achieve a high quality alignment result. Furthermore, the CAMS method was implemented in the Matlab language and available as an open source package at http://www.github.com/matchcoder/CAMS.",2013-02-28 +23155066,DiffSplice: the genome-wide detection of differential splicing events with RNA-seq.,"The RNA transcriptome varies in response to cellular differentiation as well as environmental factors, and can be characterized by the diversity and abundance of transcript isoforms. Differential transcription analysis, the detection of differences between the transcriptomes of different cells, may improve understanding of cell differentiation and development and enable the identification of biomarkers that classify disease types. The availability of high-throughput short-read RNA sequencing technologies provides in-depth sampling of the transcriptome, making it possible to accurately detect the differences between transcriptomes. In this article, we present a new method for the detection and visualization of differential transcription. Our approach does not depend on transcript or gene annotations. It also circumvents the need for full transcript inference and quantification, which is a challenging problem because of short read lengths, as well as various sampling biases. Instead, our method takes a divide-and-conquer approach to localize the difference between transcriptomes in the form of alternative splicing modules (ASMs), where transcript isoforms diverge. Our approach starts with the identification of ASMs from the splice graph, constructed directly from the exons and introns predicted from RNA-seq read alignments. The abundance of alternative splicing isoforms residing in each ASM is estimated for each sample and is compared across sample groups. A non-parametric statistical test is applied to each ASM to detect significant differential transcription with a controlled false discovery rate. The sensitivity and specificity of the method have been assessed using simulated data sets and compared with other state-of-the-art approaches. Experimental validation using qRT-PCR confirmed a selected set of genes that are differentially expressed in a lung differentiation study and a breast cancer data set, demonstrating the utility of the approach applied on experimental biological data sets. The software of DiffSplice is available at http://www.netlab.uky.edu/p/bioinfo/DiffSplice.",2012-11-15 +23175613,"BioGPS and MyGene.info: organizing online, gene-centric information.","Fast-evolving technologies have enabled researchers to easily generate data at genome scale, and using these technologies to compare biological states typically results in a list of candidate genes. Researchers are then faced with the daunting task of prioritizing these candidate genes for follow-up studies. There are hundreds, possibly even thousands, of web-based gene annotation resources available, but it quickly becomes impractical to manually access and review all of these sites for each gene in a candidate gene list. BioGPS (http://biogps.org) was created as a centralized gene portal for aggregating distributed gene annotation resources, emphasizing community extensibility and user customizability. BioGPS serves as a convenient tool for users to access known gene-centric resources, as well as a mechanism to discover new resources that were previously unknown to the user. This article describes updates to BioGPS made after its initial release in 2008. We summarize recent additions of features and data, as well as the robust user activity that underlies this community intelligence application. Finally, we describe MyGene.info (http://mygene.info) and related web services that provide programmatic access to BioGPS.",2012-11-21 +23044542,VIPR HMM: a hidden Markov model for detecting recombination with microbial detection microarrays.,"

Motivation

Current methods in diagnostic microbiology typically focus on the detection of a single genomic locus or protein in a candidate agent. The presence of the entire microbe is then inferred from this isolated result. Problematically, the presence of recombination in microbial genomes would go undetected unless other genomic loci or protein components were specifically assayed. Microarrays lend themselves well to the detection of multiple loci from a given microbe; furthermore, the inherent nature of microarrays facilitates highly parallel interrogation of multiple microbes. However, none of the existing methods for analyzing diagnostic microarray data has the capacity to specifically identify recombinant microbes. In previous work, we developed a novel algorithm, VIPR, for analyzing diagnostic microarray data.

Results

We have expanded upon our previous implementation of VIPR by incorporating a hidden Markov model (HMM) to detect recombinant genomes. We trained our HMM on a set of non-recombinant parental viruses and applied our method to 11 recombinant alphaviruses and 4 recombinant flaviviruses hybridized to a diagnostic microarray in order to evaluate performance of the HMM. VIPR HMM correctly identified 95% of the 62 inter-species recombination breakpoints in the validation set and only two false-positive breakpoints were predicted. This study represents the first description and validation of an algorithm capable of detecting recombinant viruses based on diagnostic microarray hybridization patterns.

Availability

VIPR HMM is freely available for academic use and can be downloaded from http://ibridgenetwork.org/wustl/vipr.

Contact

davewang@borcim.wustl.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-07 +22120689,dopOSCCI: a functional transcranial Doppler ultrasonography summary suite for the assessment of cerebral lateralization of cognitive function.,"We present a description of a new software package, 'dopOSCCI', which summarises data from experimental studies where functional transcranial Doppler ultrasonography (fTCD) is used to compare hemispheric rates of blood flow in order to assess lateralization of a cognitive process. The software provides a graphical user interface to summarise analogue and digital data collected using Multi-Dop Doppler Ultrasound devices (DWL Multidop T2: manufacturer, DWL Elektronische Systeme, Singen, Germany). The unique aspects of dopOSCCI allow multi-file processing, multi-event marker processing, behavioural and multi-session summaries, image file data visualization, and tab-delimited output files which includes split-half, single-trial summaries and data quality variables. The Matlab based software is available under the GNU GPL license and can be accessed online at https://databank.ora.ox.ac.uk/general/datasets/dopOSCCI, the Oxford University DataBank.",2011-11-19 +23812989,Predicting protein interactions via parsimonious network history inference.,"

Motivation

Reconstruction of the network-level evolutionary history of protein-protein interactions provides a principled way to relate interactions in several present-day networks. Here, we present a general framework for inferring such histories and demonstrate how it can be used to determine what interactions existed in the ancestral networks, which present-day interactions we might expect to exist based on evolutionary evidence and what information extant networks contain about the order of ancestral protein duplications.

Results

Our framework characterizes the space of likely parsimonious network histories. It results in a structure that can be used to find probabilities for a number of events associated with the histories. The framework is based on a directed hypergraph formulation of dynamic programming that we extend to enumerate many optimal and near-optimal solutions. The algorithm is applied to reconstructing ancestral interactions among bZIP transcription factors, imputing missing present-day interactions among the bZIPs and among proteins from five herpes viruses, and determining relative protein duplication order in the bZIP family. Our approach more accurately reconstructs ancestral interactions than existing approaches. In cross-validation tests, we find that our approach ranks the majority of the left-out present-day interactions among the top 2 and 17% of possible edges for the bZIP and herpes networks, respectively, making it a competitive approach for edge imputation. It also estimates relative bZIP protein duplication orders, using only interaction data and phylogenetic tree topology, which are significantly correlated with sequence-based estimates.

Availability

The algorithm is implemented in C++, is open source and is available at http://www.cs.cmu.edu/ckingsf/software/parana2.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +23812987,Multitask learning for host-pathogen protein interactions.,"

Motivation

An important aspect of infectious disease research involves understanding the differences and commonalities in the infection mechanisms underlying various diseases. Systems biology-based approaches study infectious diseases by analyzing the interactions between the host species and the pathogen organisms. This work aims to combine the knowledge from experimental studies of host-pathogen interactions in several diseases to build stronger predictive models. Our approach is based on a formalism from machine learning called 'multitask learning', which considers the problem of building models across tasks that are related to each other. A 'task' in our scenario is the set of host-pathogen protein interactions involved in one disease. To integrate interactions from several tasks (i.e. diseases), our method exploits the similarity in the infection process across the diseases. In particular, we use the biological hypothesis that similar pathogens target the same critical biological processes in the host, in defining a common structure across the tasks.

Results

Our current work on host-pathogen protein interaction prediction focuses on human as the host, and four bacterial species as pathogens. The multitask learning technique we develop uses a task-based regularization approach. We find that the resulting optimization problem is a difference of convex (DC) functions. To optimize, we implement a Convex-Concave procedure-based algorithm. We compare our integrative approach to baseline methods that build models on a single host-pathogen protein interaction dataset. Our results show that our approach outperforms the baselines on the training data. We further analyze the protein interaction predictions generated by the models, and find some interesting insights.

Availability

The predictions and code are available at: http://www.cs.cmu.edu/∼mkshirsa/ismb2013_paper320.html .

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +23444967,A novel method for cross-species gene expression analysis.,"

Background

Analysis of gene expression from different species is a powerful way to identify evolutionarily conserved transcriptional responses. However, due to evolutionary events such as gene duplication, there is no one-to-one correspondence between genes from different species which makes comparison of their expression profiles complex.

Results

In this paper we describe a new method for cross-species meta-analysis of gene expression. The method takes the homology structure between compared species into account and can therefore compare expression data from genes with any number of orthologs and paralogs. A simulation study shows that the proposed method results in a substantial increase in statistical power compared to previously suggested procedures. As a proof of concept, we analyzed microarray data from heat stress experiments performed in eight species and identified several well-known evolutionarily conserved transcriptional responses. The method was also applied to gene expression profiles from five studies of estrogen exposed fish and both known and potentially novel responses were identified.

Conclusions

The method described in this paper will further increase the potential and reliability of meta-analysis of gene expression profiles from evolutionarily distant species. The method has been implemented in R and is freely available at http://bioinformatics.math.chalmers.se/Xspecies/.",2013-02-27 +27485201,Modeling the Biodegradability of Chemical Compounds Using the Online CHEmical Modeling Environment (OCHEM).,"Biodegradability describes the capacity of substances to be mineralized by free-living bacteria. It is a crucial property in estimating a compound's long-term impact on the environment. The ability to reliably predict biodegradability would reduce the need for laborious experimental testing. However, this endpoint is difficult to model due to unavailability or inconsistency of experimental data. Our approach makes use of the Online Chemical Modeling Environment (OCHEM) and its rich supply of machine learning methods and descriptor sets to build classification models for ready biodegradability. These models were analyzed to determine the relationship between characteristic structural properties and biodegradation activity. The distinguishing feature of the developed models is their ability to estimate the accuracy of prediction for each individual compound. The models developed using seven individual descriptor sets were combined in a consensus model, which provided the highest accuracy. The identified overrepresented structural fragments can be used by chemists to improve the biodegradability of new chemical compounds. The consensus model, the datasets used, and the calculated structural fragments are publicly available at http://ochem.eu/article/31660.",2013-11-28 +26736126,Abstract: An International Comparison of Private and Public Schools Using Multilevel Propensity Score Methods and Graphics.,"As can be seen from the recent Special Issue of MBR on propensity score analysis (PSA) methods, the use of PSA has gained increasing popularity for estimating causal effects in observational studies. However, PSA use with multilevel or clustered data has been limited, and to date there seems to have been no development of specialized graphics for such data. This paper introduces the multilevelPSA ( http://multilevelPSA.r-forge.r-project.org ) package for R that provides cluster-based functions for estimating propensity scores as well as graphics to exhibit results for multilevel data. This work extends to the multilevel case the framework for visualizing propensity score analysis introduced by Helmreich and Pruzek (2009). International data from the Programme for International Student Assessment (Organization for Economic Co-operation and Development, 2009) are comprehensively examined to compare private with public schools on reading, mathematics, and science outcomes after adjusting for covariate differences in the multilevel context. Particularly for analyses of large data sets, focusing on statistical significance is limiting. As can readily be seen, overall results favor ""private"" over ""public"" schools, at least for end of secondary school math achievement. But the graphics provide a more nuanced understanding of the nature and magnitude of adjusted differences for countries. Furthermore, the graphics are readily interpreted by a nontechnical audience. Broadly speaking, it is seen that modern graphics can enhance and extend conventional numerical summaries by focusing on details of what data have to say for multilevel comparisons of many countries based on propensity score methods.",2011-11-01 +24727276,hypD as a marker for [NiFe]-hydrogenases in microbial communities of surface waters.,"Hydrogen is an important trace gas in the atmosphere. Soil microorganisms are known to be an important part of the biogeochemical H2 cycle, contributing 80 to 90% of the annual hydrogen uptake. Different aquatic ecosystems act as either sources or sinks of hydrogen, but the contribution of their microbial communities is unknown. [NiFe]-hydrogenases are the best candidates for hydrogen turnover in these environments since they are able to cope with oxygen. As they lack sufficiently conserved sequence motifs, reliable markers for these enzymes are missing, and consequently, little is known about their environmental distribution. We analyzed the essential maturation genes of [NiFe]-hydrogenases, including their frequency of horizontal gene transfer, and found hypD to be an applicable marker for the detection of the different known hydrogenase groups. Investigation of two freshwater lakes showed that [NiFe]-hydrogenases occur in many prokaryotic orders. We found that the respective hypD genes cooccur with oxygen-tolerant [NiFe]-hydrogenases (groups 1 and 5) mainly of Actinobacteria, Acidobacteria, and Burkholderiales; cyanobacterial uptake hydrogenases (group 2a) of cyanobacteria; H2-sensing hydrogenases (group 2b) of Burkholderiales, Rhizobiales, and Rhodobacterales; and two groups of multimeric soluble hydrogenases (groups 3b and 3d) of Legionellales and cyanobacteria. These findings support and expand a previous analysis of metagenomic data (M. Barz et al., PLoS One 5:e13846, 2010, http://dx.doi.org/10.1371/journal.pone.0013846) and further identify [NiFe]-hydrogenases that could be involved in hydrogen cycling in aquatic surface waters.",2014-04-11 +23001322,Microbase2.0: a generic framework for computationally intensive bioinformatics workflows in the cloud.,"As bioinformatics datasets grow ever larger, and analyses become increasingly complex, there is a need for data handling infrastructures to keep pace with developing technology. One solution is to apply Grid and Cloud technologies to address the computational requirements of analysing high throughput datasets. We present an approach for writing new, or wrapping existing applications, and a reference implementation of a framework, Microbase2.0, for executing those applications using Grid and Cloud technologies. We used Microbase2.0 to develop an automated Cloud-based bioinformatics workflow executing simultaneously on two different Amazon EC2 data centres and the Newcastle University Condor Grid. Several CPU years' worth of computational work was performed by this system in less than two months. The workflow produced a detailed dataset characterising the cellular localisation of 3,021,490 proteins from 867 taxa, including bacteria, archaea and unicellular eukaryotes. Microbase2.0 is freely available from http://www.microbase.org.uk/.",2012-09-24 +23568340,SCL-Epred: a generalised de novo eukaryotic protein subcellular localisation predictor.,"Knowledge of the subcellular location of a protein provides valuable information about its function, possible interaction with other proteins and drug targetability, among other things. The experimental determination of a protein's location in the cell is expensive, time consuming and open to human error. Fast and accurate predictors of subcellular location have an important role to play if the abundance of sequence data which is now available is to be fully exploited. In the post-genomic era, genomes in many diverse organisms are available. Many of these organisms are important in human and veterinary disease and fall outside of the well-studied plant, animal and fungi groups. We have developed a general eukaryotic subcellular localisation predictor (SCL-Epred) which predicts the location of eukaryotic proteins into three classes which are important, in particular, for determining the drug targetability of a protein-secreted proteins, membrane proteins and proteins that are neither secreted nor membrane. The algorithm powering SCL-Epred is a N-to-1 neural network and is trained on very large non-redundant sets of protein sequences. SCL-Epred performs well on training data achieving a Q of 86 % and a generalised correlation of 0.75 when tested in tenfold cross-validation on a set of 15,202 redundancy reduced protein sequences. The three class accuracy of SCL-Epred and LocTree2, and in particular a consensus predictor comprising both methods, surpasses that of other widely used predictors when benchmarked using a large redundancy reduced independent test set of 562 proteins. SCL-Epred is publicly available at http://distillf.ucd.ie/distill/ .",2013-04-09 +23683922,RTEL1 tagging SNPs and haplotypes were associated with glioma development.,"As glioma ranks as the first most prevalent solid tumors in primary central nervous system, certain single-nucleotide polymorphisms (SNPs) may be related to increased glioma risk, and have implications in carcinogenesis. The present case-control study was carried out to elucidate how common variants contribute to glioma susceptibility. Ten candidate tagging SNPs (tSNPs) were selected from seven genes whose polymorphisms have been proven by classical literatures and reliable databases to be tended to relate with gliomas, and with the minor allele frequency (MAF)>5% in the HapMap Asian population. The selected tSNPs were genotyped in 629 glioma patients and 645 controls from a Han Chinese population using the multiplexed SNP MassEXTEND assay calibrated. Two significant tSNPs in RTEL1 gene were observed to be associated with glioma risk (rs6010620, P=0.0016, OR: 1.32, 95% CI: 1.11-1.56; rs2297440, P=0.001, OR: 1.33, 95% CI: 1.12-1.58) by χ2 test. It was identified the genotype ""GG"" of rs6010620 acted as the protective genotype for glioma (OR, 0.46; 95% CI, 0.31-0.7; P=0.0002), while the genotype ""CC"" of rs2297440 as the protective genotype in glioma (OR, 0.47; 95% CI, 0.31-0.71; P=0.0003). Furthermore, haplotype ""GCT"" in RTEL1 gene was found to be associated with risk of glioma (OR, 0.7; 95% CI, 0.57-0.86; Fisher's P=0.0005; Pearson's P=0.0005), and haplotype ""ATT"" was detected to be associated with risk of glioma (OR, 1.32; 95% CI, 1.12-1.57; Fisher's P=0.0013; Pearson's P=0.0013). Two single variants, the genotypes of ""GG"" of rs6010620 and ""CC"" of rs2297440 (rs6010620 and rs2297440) in the RTEL1 gene, together with two haplotypes of GCT and ATT, were identified to be associated with glioma development. And it might be used to evaluate the glioma development risks to screen the above RTEL1 tagging SNPs and haplotypes.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1993021136961998.",2013-05-17 +23686350,International Union of Basic and Clinical Pharmacology. LXXXVIII. G protein-coupled receptor list: recommendations for new pairings with cognate ligands.,"In 2005, the International Union of Basic and Clinical Pharmacology Committee on Receptor Nomenclature and Drug Classification (NC-IUPHAR) published a catalog of all of the human gene sequences known or predicted to encode G protein-coupled receptors (GPCRs), excluding sensory receptors. This review updates the list of orphan GPCRs and describes the criteria used by NC-IUPHAR to recommend the pairing of an orphan receptor with its cognate ligand(s). The following recommendations are made for new receptor names based on 11 pairings for class A GPCRs: hydroxycarboxylic acid receptors [HCA₁ (GPR81) with lactate, HCA₂ (GPR109A) with 3-hydroxybutyric acid, HCA₃ (GPR109B) with 3-hydroxyoctanoic acid]; lysophosphatidic acid receptors [LPA₄ (GPR23), LPA₅ (GPR92), LPA₆ (P2Y5)]; free fatty acid receptors [FFA4 (GPR120) with omega-3 fatty acids]; chemerin receptor (CMKLR1; ChemR23) with chemerin; CXCR7 (CMKOR1) with chemokines CXCL12 (SDF-1) and CXCL11 (ITAC); succinate receptor (SUCNR1) with succinate; and oxoglutarate receptor [OXGR1 with 2-oxoglutarate]. Pairings are highlighted for an additional 30 receptors in class A where further input is needed from the scientific community to validate these findings. Fifty-seven human class A receptors (excluding pseudogenes) are still considered orphans; information has been provided where there is a significant phenotype in genetically modified animals. In class B, six pairings have been reported by a single publication, with 28 (excluding pseudogenes) still classified as orphans. Seven orphan receptors remain in class C, with one pairing described by a single paper. The objective is to stimulate research into confirming pairings of orphan receptors where there is currently limited information and to identify cognate ligands for the remaining GPCRs. Further information can be found on the IUPHAR Database website (http://www.iuphar-db.org).",2013-05-17 +22238636,An analysis approach for high-field fMRI data from awake non-human primates.,"fMRI experiments with awake non-human primates (NHP) have seen a surge of applications in recent years. However, the standard fMRI analysis tools designed for human experiments are not optimal for analysis of NHP fMRI data collected at high fields. There are several reasons for this, including the trial-based nature of NHP experiments, with inter-trial periods being of no interest, and segmentation artefacts and distortions that may result from field changes due to movement. We demonstrate an approach that allows us to address some of these issues consisting of the following steps: 1) Trial-based experimental design. 2) Careful control of subject movement. 3) Computer-assisted selection of trials devoid of artefacts and animal motion. 4) Nonrigid between-trial and rigid within-trial realignment of concatenated data from temporally separated trials and sessions. 5) Linear interpolation of inter-trial intervals and high-pass filtering of temporally continuous data 6) Removal of interpolated data and reconcatenation of datasets before statistical analysis with SPM. We have implemented a software toolbox, fMRI Sandbox (http://code.google.com/p/fmri-sandbox/), for semi-automated application of these processing steps that interfaces with SPM software. Here, we demonstrate that our methodology provides significant improvements for the analysis of awake monkey fMRI data acquired at high-field. The method may also be useful for clinical applications with subjects that are unwilling or unable to remain motionless for the whole duration of a functional scan.",2012-01-06 +21577217,The international spinal cord injury endocrine and metabolic function basic data set.,"

Objective

To develop the International Spinal Cord Injury (SCI) Endocrine and Metabolic Function Basic Data Set within the framework of the International SCI Data Sets that would facilitate consistent collection and reporting of basic endocrine and metabolic findings in the SCI population.

Setting

International.

Methods

The International SCI Endocrine and Metabolic Function Data Set was developed by a working group. The initial data set document was revised on the basis of suggestions from members of the Executive Committee of the International SCI Standards and Data Sets, the International Spinal Cord Society (ISCoS) Executive and Scientific Committees, American Spinal Injury Association (ASIA) Board, other interested organizations and societies, and individual reviewers. In addition, the data set was posted for 2 months on ISCoS and ASIA websites for comments.

Results

The final International SCI Endocrine and Metabolic Function Data Set contains questions on the endocrine and metabolic conditions diagnosed before and after spinal cord lesion. If available, information collected before injury is to be obtained only once, whereas information after injury may be collected at any time. These data include information on diabetes mellitus, lipid disorders, osteoporosis, thyroid disease, adrenal disease, gonadal disease and pituitary disease. The question of gonadal status includes stage of sexual development and that for females also includes menopausal status. Data will be collected for body mass index and for the fasting serum lipid profile. The complete instructions for data collection and the data sheet itself are freely available on the websites of ISCoS (http://www.iscos.org.uk) and ASIA (http://www.asia-spinalinjury.org).",2011-05-17 +23958730,HippDB: a database of readily targeted helical protein-protein interactions.,"

Summary

HippDB catalogs every protein-protein interaction whose structure is available in the Protein Data Bank and which exhibits one or more helices at the interface. The Web site accepts queries on variables such as helix length and sequence, and it provides computational alanine scanning and change in solvent-accessible surface area values for every interfacial residue. HippDB is intended to serve as a starting point for structure-based small molecule and peptidomimetic drug development.

Availability and implementation

HippDB is freely available on the web at http://www.nyu.edu/projects/arora/hippdb. The Web site is implemented in PHP, MySQL and Apache. Source code freely available for download at http://code.google.com/p/helidb, implemented in Perl and supported on Linux.

Contact

arora@nyu.edu.",2013-08-19 +23281601,snpTree--a web-server to identify and construct SNP trees from whole genome sequence data.,"

Background

The advances and decreasing economical cost of whole genome sequencing (WGS), will soon make this technology available for routine infectious disease epidemiology. In epidemiological studies, outbreak isolates have very little diversity and require extensive genomic analysis to differentiate and classify isolates. One of the successfully and broadly used methods is analysis of single nucletide polymorphisms (SNPs). Currently, there are different tools and methods to identify SNPs including various options and cut-off values. Furthermore, all current methods require bioinformatic skills. Thus, we lack a standard and simple automatic tool to determine SNPs and construct phylogenetic tree from WGS data.

Results

Here we introduce snpTree, a server for online-automatic SNPs analysis. This tool is composed of different SNPs analysis suites, perl and python scripts. snpTree can identify SNPs and construct phylogenetic trees from WGS as well as from assembled genomes or contigs. WGS data in fastq format are aligned to reference genomes by BWA while contigs in fasta format are processed by Nucmer. SNPs are concatenated based on position on reference genome and a tree is constructed from concatenated SNPs using FastTree and a perl script. The online server was implemented by HTML, Java and python script.The server was evaluated using four published bacterial WGS data sets (V. cholerae, S. aureus CC398, S. Typhimurium and M. tuberculosis). The evaluation results for the first three cases was consistent and concordant for both raw reads and assembled genomes. In the latter case the original publication involved extensive filtering of SNPs, which could not be repeated using snpTree.

Conclusions

The snpTree server is an easy to use option for rapid standardised and automatic SNP analysis in epidemiological studies also for users with limited bioinformatic experience. The web server is freely accessible at http://www.cbs.dtu.dk/services/snpTree-1.0/.",2012-12-13 +22898240,Getting DNA copy numbers without control samples.,"

Unlabelled

Background

The selection of the reference to scale the data in a copy number analysis has paramount importance to achieve accurate estimates. Usually this reference is generated using control samples included in the study. However, these control samples are not always available and in these cases, an artificial reference must be created. A proper generation of this signal is crucial in terms of both noise and bias.We propose NSA (Normality Search Algorithm), a scaling method that works with and without control samples. It is based on the assumption that genomic regions enriched in SNPs with identical copy numbers in both alleles are likely to be normal. These normal regions are predicted for each sample individually and used to calculate the final reference signal. NSA can be applied to any CN data regardless the microarray technology and preprocessing method. It also finds an optimal weighting of the samples minimizing possible batch effects.

Results

Five human datasets (a subset of HapMap samples, Glioblastoma Multiforme (GBM), Ovarian, Prostate and Lung Cancer experiments) have been analyzed. It is shown that using only tumoral samples, NSA is able to remove the bias in the copy number estimation, to reduce the noise and therefore, to increase the ability to detect copy number aberrations (CNAs). These improvements allow NSA to also detect recurrent aberrations more accurately than other state of the art methods.

Conclusions

NSA provides a robust and accurate reference for scaling probe signals data to CN values without the need of control samples. It minimizes the problems of bias, noise and batch effects in the estimation of CNs. Therefore, NSA scaling approach helps to better detect recurrent CNAs than current methods. The automatic selection of references makes it useful to perform bulk analysis of many GEO or ArrayExpress experiments without the need of developing a parser to find the normal samples or possible batches within the data. The method is available in the open-source R package NSA, which is an add-on to the aroma.cn framework. http://www.aroma-project.org/addons.",2012-08-16 +24713439,ProQM-resample: improved model quality assessment for membrane proteins by limited conformational sampling.,"

Summary

Model Quality Assessment Programs (MQAPs) are used to predict the quality of modeled protein structures. These usually use two approaches: methods using consensus of many alternative models and methods requiring only a single model to do its prediction. The consensus methods are useful to improve overall accuracy; however, they frequently fail to pick out the best possible model and cannot be used to generate and score new structures. Single-model methods, on the other hand, do not have these inherent shortcomings and can be used to both sample new structures and improve existing consensus methods. Here, we present ProQM-resample, a membrane protein-specific single-model MQAP, that couples side-chain resampling with MQAP rescoring by ProQM to improve model selection. The side-chain resampling is able to improve side-chain packing for 96% of all models, and improve model selection by 24% as measured by the sum of the Z-score for the first-ranked model (from 25.0 to 31.1), even better than the state-of-the-art consensus method Pcons. The improved model selection can be attributed to the improved side-chain quality, which enables the MQAP to rescue good backbone models with poor side-chain packing.

Availability and implementation

http://proqm.wallnerlab.org/download/.

Contact

bjornw@ifm.liu.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-08 +24712382,Recommendation to increase the test concentration of methylchloroisothiazolinone/methylisothiazolinone in the European baseline patch test series - on behalf of the European Society of Contact Dermatitis and the European Environmental and Contact Dermatitis Research Group.,"

Background

Methylchloroisothiazolinone (MCI)/methylisothiazolinone (MI) in aqua is present in the European baseline patch test series at 100 ppm, whereas 200 ppm has been used in Sweden since 1986, in Spain in the late 1980s, and, in recent years, also in the United Kingdom and Ireland.

Objectives

With regard to MCI/MI, to investigate the data on contact allergy rates in dermatitis patients, the frequencies of allergic contact dermatitis in the same group, and adverse reactions, particularly patch test sensitization in tested dermatitis patients, and to find the optimal patch test concentration as dose in mg/cm(2) .

Materials and methods

We performed a survey of the literature found via the National Library of Medicine (PubMed, http://www.ncbi.nlm.nih.gov/pubmed, last accessed 20 February 2014).

Results

MCI/MI at 200 ppm aq. diagnosis substantially more contact allergy and allergic contact dermatitis, without any registered increase in patch test sensitization, than the presently used concentration of 100 ppm.

Conclusion

MCI/MI at 200 ppm aq. is recommended to be included in the European baseline patch test series. To avoid patch test sensitization, a dose of 0.006 mg/cm(2) must not be exceeded, which means a volume of 15 µl for Finn Chambers(®) (Ø 8 mm).",2014-04-09 +22595236,Predicting protein function by multi-label correlated semi-supervised learning.,"Assigning biological functions to uncharacterized proteins is a fundamental problem in the postgenomic era. The increasing availability of large amounts of data on protein-protein interactions (PPIs) has led to the emergence of a considerable number of computational methods for determining protein function in the context of a network. These algorithms, however, treat each functional class in isolation and thereby often suffer from the difficulty of the scarcity of labeled data. In reality, different functional classes are naturally dependent on one another. We propose a new algorithm, Multi-label Correlated Semi-supervised Learning (MCSL), to incorporate the intrinsic correlations among functional classes into protein function prediction by leveraging the relationships provided by the PPI network and the functional class network. The guiding intuition is that the classification function should be sufficiently smooth on subgraphs where the respective topologies of these two networks are a good match. We encode this intuition as regularized learning with intraclass and interclass consistency, which can be understood as an extension of the graph-based learning with local and global consistency (LGC) method. Cross validation on the yeast proteome illustrates that MCSL consistently outperforms several state-of-the-art methods. Most notably, it effectively overcomes the problem associated with scarcity of label data. The supplementary files are freely available at http://sites.google.com/site/csaijiang/MCSL.",2012-07-01 +23320449,Phylogenetic search through partial tree mixing.,"

Background

Recent advances in sequencing technology have created large data sets upon which phylogenetic inference can be performed. Current research is limited by the prohibitive time necessary to perform tree search on a reasonable number of individuals. This research develops new phylogenetic algorithms that can operate on tens of thousands of species in a reasonable amount of time through several innovative search techniques.

Results

When compared to popular phylogenetic search algorithms, better trees are found much more quickly for large data sets. These algorithms are incorporated in the PSODA application available at http://dna.cs.byu.edu/psoda

Conclusions

The use of Partial Tree Mixing in a partition based tree space allows the algorithm to quickly converge on near optimal tree regions. These regions can then be searched in a methodical way to determine the overall optimal phylogenetic solution.",2012-08-24 +23142964,Density parameter estimation for finding clusters of homologous proteins--tracing actinobacterial pathogenicity lifestyles.,"

Motivation

Homology detection is a long-standing challenge in computational biology. To tackle this problem, typically all-versus-all BLAST results are coupled with data partitioning approaches resulting in clusters of putative homologous proteins. One of the main problems, however, has been widely neglected: all clustering tools need a density parameter that adjusts the number and size of the clusters. This parameter is crucial but hard to estimate without gold standard data at hand. Developing a gold standard, however, is a difficult and time consuming task. Having a reliable method for detecting clusters of homologous proteins between a huge set of species would open opportunities for better understanding the genetic repertoire of bacteria with different lifestyles.

Results

Our main contribution is a method for identifying a suitable and robust density parameter for protein homology detection without a given gold standard. Therefore, we study the core genome of 89 actinobacteria. This allows us to incorporate background knowledge, i.e. the assumption that a set of evolutionarily closely related species should share a comparably high number of evolutionarily conserved proteins (emerging from phylum-specific housekeeping genes). We apply our strategy to find genes/proteins that are specific for certain actinobacterial lifestyles, i.e. different types of pathogenicity. The whole study was performed with transitivity clustering, as it only requires a single intuitive density parameter and has been shown to be well applicable for the task of protein sequence clustering. Note, however, that the presented strategy generally does not depend on our clustering method but can easily be adapted to other clustering approaches.

Availability

All results are publicly available at http://transclust.mmci.uni-saarland.de/actino_core/ or as Supplementary Material of this article.

Contact

roettger@mpi-inf.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-09 +23677608,BAGEL3: Automated identification of genes encoding bacteriocins and (non-)bactericidal posttranslationally modified peptides.,"Identifying genes encoding bacteriocins and ribosomally synthesized and posttranslationally modified peptides (RiPPs) can be a challenging task. Especially those peptides that do not have strong homology to previously identified peptides can easily be overlooked. Extensive use of BAGEL2 and user feedback has led us to develop BAGEL3. BAGEL3 features genome mining of prokaryotes, which is largely independent of open reading frame (ORF) predictions and has been extended to cover more (novel) classes of posttranslationally modified peptides. BAGEL3 uses an identification approach that combines direct mining for the gene and indirect mining via context genes. Especially for heavily modified peptides like lanthipeptides, sactipeptides, glycocins and others, this genetic context harbors valuable information that is used for mining purposes. The bacteriocin and context protein databases have been updated and it is now easy for users to submit novel bacteriocins or RiPPs. The output has been simplified to allow user-friendly analysis of the results, in particular for large (meta-genomic) datasets. The genetic context of identified candidate genes is fully annotated. As input, BAGEL3 uses FASTA DNA sequences or folders containing multiple FASTA formatted files. BAGEL3 is freely accessible at http://bagel.molgenrug.nl.",2013-05-15 +23684732,Colon tumour secretopeptidome: insights into endogenous proteolytic cleavage events in the colon tumour microenvironment.,"The secretopeptidome comprises endogenous peptides derived from proteins secreted into the tumour microenvironment through classical and non-classical secretion. This study characterised the low-Mr (<3kDa) component of the human colon tumour (LIM1215, LIM1863) secretopeptidome, as a first step towards gaining insights into extracellular proteolytic cleavage events in the tumour microenvironment. Based on two biological replicates, this secretopeptidome isolation strategy utilised differential centrifugal ultrafiltration in combination with analytical RP-HPLC and nanoLC-MS/MS. Secreted peptides were identified using a combination of Mascot and post-processing analyses including MSPro re-scoring, extended feature sets and Percolator, resulting in 474 protein identifications from 1228 peptides (≤1% q-value, ≤5% PEP) - a 36% increase in peptide identifications when compared with conventional Mascot (homology ionscore thresholding). In both colon tumour models, 122 identified peptides were derived from 41 cell surface protein ectodomains, 23 peptides (12 proteins) from regulated intramembrane proteolysis (RIP), and 12 peptides (9 proteins) generated from intracellular domain proteolysis. Further analyses using the protease/substrate database MEROPS, (http://merops.sanger.ac.uk/), revealed 335 (71%) proteins classified as originating from classical/non-classical secretion, or the cell membrane. Of these, peptides were identified from 42 substrates in MEROPS with defined protease cleavage sites, while peptides generated from a further 205 substrates were fragmented by hitherto unknown proteases. A salient finding was the identification of peptides from 88 classical/non-classical secreted substrates in MEROPS, implicated in tumour progression and angiogenesis (FGFBP1, PLXDC2), cell-cell recognition and signalling (DDR1, GPA33), and tumour invasiveness and metastasis (MACC1, SMAGP); the nature of the proteases responsible for these proteolytic events is unknown. To confirm reproducibility of peptide fragment abundance in this study, we report the identification of a specific cleaved peptide fragment in the secretopeptidome from the colon-specific GPA33 antigen in 4/14 human CRC models. This improved secretopeptidome isolation and characterisation strategy has extended our understanding of endogenous peptides generated through proteolysis of classical/non-classical secreted proteins, extracellular proteolytic processing of cell surface membrane proteins, and peptides generated through RIP. The novel peptide cleavage site information in this study provides a useful first step in detailing proteolytic cleavage associated with tumourigenesis and the extracellular environment. This article is part of a Special Issue entitled: An Updated Secretome.",2013-05-15 +23677616,CovalentDock Cloud: a web server for automated covalent docking.,"Covalent binding is an important mechanism for many drugs to gain its function. We developed a computational algorithm to model this chemical event and extended it to a web server, the CovalentDock Cloud, to make it accessible directly online without any local installation and configuration. It provides a simple yet user-friendly web interface to perform covalent docking experiments and analysis online. The web server accepts the structures of both the ligand and the receptor uploaded by the user or retrieved from online databases with valid access id. It identifies the potential covalent binding patterns, carries out the covalent docking experiments and provides visualization of the result for user analysis. This web server is free and open to all users at http://docking.sce.ntu.edu.sg/.",2013-05-15 +24158600,Mouse model phenotypes provide information about human drug targets.,"

Motivation

Methods for computational drug target identification use information from diverse information sources to predict or prioritize drug targets for known drugs. One set of resources that has been relatively neglected for drug repurposing is animal model phenotype.

Results

We investigate the use of mouse model phenotypes for drug target identification. To achieve this goal, we first integrate mouse model phenotypes and drug effects, and then systematically compare the phenotypic similarity between mouse models and drug effect profiles. We find a high similarity between phenotypes resulting from loss-of-function mutations and drug effects resulting from the inhibition of a protein through a drug action, and demonstrate how this approach can be used to suggest candidate drug targets.

Availability and implementation

Analysis code and supplementary data files are available on the project Web site at https://drugeffects.googlecode.com.",2013-10-24 +24532722,fast_protein_cluster: parallel and optimized clustering of large-scale protein modeling data.,"

Motivation

fast_protein_cluster is a fast, parallel and memory efficient package used to cluster 60 000 sets of protein models (with up to 550 000 models per set) generated by the Nutritious Rice for the World project.

Results

fast_protein_cluster is an optimized and extensible toolkit that supports Root Mean Square Deviation after optimal superposition (RMSD) and Template Modeling score (TM-score) as metrics. RMSD calculations using a laptop CPU are 60× faster than qcprot and 3× faster than current graphics processing unit (GPU) implementations. New GPU code further increases the speed of RMSD and TM-score calculations. fast_protein_cluster provides novel k-means and hierarchical clustering methods that are up to 250× and 2000× faster, respectively, than Clusco, and identify significantly more accurate models than Spicker and Clusco.

Availability and implementation

fast_protein_cluster is written in C++ using OpenMP for multi-threading support. Custom streaming Single Instruction Multiple Data (SIMD) extensions and advanced vector extension intrinsics code accelerate CPU calculations, and OpenCL kernels support AMD and Nvidia GPUs. fast_protein_cluster is available under the M.I.T. license. (http://software.compbio.washington.edu/fast_protein_cluster)",2014-02-14 +22581178,iFad: an integrative factor analysis model for drug-pathway association inference.,"

Motivation

Pathway-based drug discovery considers the therapeutic effects of compounds in the global physiological environment. This approach has been gaining popularity in recent years because the target pathways and mechanism of action for many compounds are still unknown, and there are also some unexpected off-target effects. Therefore, the inference of drug-pathway associations is a crucial step to fully realize the potential of system-based pharmacological research. Transcriptome data offer valuable information on drug-pathway targets because the pathway activities may be reflected through gene expression levels. Hence, it is of great interest to jointly analyze the drug sensitivity and gene expression data from the same set of samples to investigate the gene-pathway-drug-pathway associations.

Results

We have developed iFad, a Bayesian sparse factor analysis model to jointly analyze the paired gene expression and drug sensitivity datasets measured across the same panel of samples. The model enables direct incorporation of prior knowledge regarding gene-pathway and/or drug-pathway associations to aid the discovery of new association relationships. We use a collapsed Gibbs sampling algorithm for inference. Satisfactory performance of the proposed model was found for both simulated datasets and real data collected on the NCI-60 cell lines. Our results suggest that iFad is a promising approach for the identification of drug targets. This model also provides a general statistical framework for pathway-based integrative analysis of other types of -omics data.

Availability

The R package 'iFad' and real NCI-60 dataset used are available at http://bioinformatics.med.yale.edu/group.",2012-05-10 +24849576,FamLBL: detecting rare haplotype disease association based on common SNPs using case-parent triads.,"

Motivation

In recent years, there has been an increasing interest in using common single-nucleotide polymorphisms (SNPs) amassed in genome-wide association studies to investigate rare haplotype effects on complex diseases. Evidence has suggested that rare haplotypes may tag rare causal single-nucleotide variants, making SNP-based rare haplotype analysis not only cost effective, but also more valuable for detecting causal variants. Although a number of methods for detecting rare haplotype association have been proposed in recent years, they are population based and thus susceptible to population stratification.

Results

We propose family-triad-based logistic Bayesian Lasso (famLBL) for estimating effects of haplotypes on complex diseases using SNP data. By choosing appropriate prior distribution, effect sizes of unassociated haplotypes can be shrunk toward zero, allowing for more precise estimation of associated haplotypes, especially those that are rare, thereby achieving greater detection power. We evaluate famLBL using simulation to gauge its type I error and power. Compared with its population counterpart, LBL, highlights famLBL's robustness property in the presence of population substructure. Further investigation by comparing famLBL with Family-Based Association Test (FBAT) reveals its advantage for detecting rare haplotype association.

Availability and implementation

famLBL is implemented as an R-package available at http://www.stat.osu.edu/∼statgen/SOFTWARE/LBL/.",2014-05-21 +23671333,IgBLAST: an immunoglobulin variable domain sequence analysis tool.,"The variable domain of an immunoglobulin (IG) sequence is encoded by multiple genes, including the variable (V) gene, the diversity (D) gene and the joining (J) gene. Analysis of IG sequences typically requires identification of each gene, as well as a comparison of sequence variations in the context of defined regions. General purpose tools, such as the BLAST program, have only limited use for such tasks, as the rearranged nature of an IG sequence and the variable length of each gene requires multiple rounds of BLAST searches for a single IG sequence. Additionally, manual assembly of different genes is difficult and error-prone. To address these issues and to facilitate other common tasks in analysing IG sequences, we have developed the sequence analysis tool IgBLAST (http://www.ncbi.nlm.nih.gov/igblast/). With this tool, users can view the matches to the germline V, D and J genes, details at rearrangement junctions, the delineation of IG V domain framework regions and complementarity determining regions. IgBLAST has the capability to analyse nucleotide and protein sequences and can process sequences in batches. Furthermore, IgBLAST allows searches against the germline gene databases and other sequence databases simultaneously to minimize the chance of missing possibly the best matching germline V gene.",2013-05-13 +21498401,"datPAV--an online processing, analysis and visualization tool for exploratory investigation of experimental data.","

Summary

Data processing, analysis and visualization (datPAV) is an exploratory tool that allows experimentalist to quickly assess the general characteristics of the data. This platform-independent software is designed as a generic tool to process and visualize data matrices. This tool explores organization of the data, detect errors and support basic statistical analyses. Processed data can be reused whereby different step-by-step data processing/analysis workflows can be created to carry out detailed investigation. The visualization option provides publication-ready graphics. Applications of this tool are demonstrated at the web site for three cases of metabolomics, environmental and hydrodynamic data analysis.

Availability

datPAV is available free for academic use at http://www.sdwa.nus.edu.sg/datPAV/.",2011-04-15 +25318848,Automated quantification of Ki-67 proliferative index of excised neuroendocrine tumors of the lung.,"

Background

The histopathologic distinction between typical carcinoid (TC) and atypical carcinoid (AC) of the lung is based largely on mitotic index. Ki-67 may aid in separation of these tumors, as well as the distinction from large cell neuroendocrine carcinoma (LCNEC).

Methods

We identified 55 surgically resected primary neuroendocrine lung tumors (39 TC, 7 AC, 9 LCNEC) based on mitotic rate and histologic features. Ki-67 proliferative index based on automated image analysis, tumor necrosis, nodal metastases, local or distant recurrence, and survival were compared across groups.

Results

The mean mitotic count and Ki-67 index for TC, AC, and LCNEC were 0.1 and 2.3%, 3.4 and 16.8%, and 56.1 and 81.3% respectively. The Ki-67 index did not overlap among groups, with ranges of 0-6.7% for TC, 9.9-25.7% for AC, and 63.2-91.9% for LCNEC. Nodal metastases were identified in 4/39 (10%) TC, 2/7 (22%) AC, and 2/8 (25%) LCNEC. There was no survival difference between TC and AC, but there was a significant survival difference between LCNEC and TC and AC combined (p<0.001). There was a step-wise increase in disease free survival with tumor grade: no TC recurred, 2/7 AC recurred or progressed (median interval 35.5 months), and all LCNEC recurred or progressed (median interval 10.1 months). No patient with TC or AC died of disease, compared to 7/8 LCNEC with follow-up data.

Conclusions

We conclude that Ki-67 index is a useful diagnostic marker for neuroendocrine tumors, with 7% a divider between AC and TC, and 50% a divider between LCNEC and AC. LCNEC is biologically different from AC and TC, with a much more aggressive course, and a high Ki-67 index.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_174.",2014-10-16 +22784572,Extended local similarity analysis (eLSA) of microbial community and other time series data with replicates.,"

Background

The increasing availability of time series microbial community data from metagenomics and other molecular biological studies has enabled the analysis of large-scale microbial co-occurrence and association networks. Among the many analytical techniques available, the Local Similarity Analysis (LSA) method is unique in that it captures local and potentially time-delayed co-occurrence and association patterns in time series data that cannot otherwise be identified by ordinary correlation analysis. However LSA, as originally developed, does not consider time series data with replicates, which hinders the full exploitation of available information. With replicates, it is possible to understand the variability of local similarity (LS) score and to obtain its confidence interval.

Results

We extended our LSA technique to time series data with replicates and termed it extended LSA, or eLSA. Simulations showed the capability of eLSA to capture subinterval and time-delayed associations. We implemented the eLSA technique into an easy-to-use analytic software package. The software pipeline integrates data normalization, statistical correlation calculation, statistical significance evaluation, and association network construction steps. We applied the eLSA technique to microbial community and gene expression datasets, where unique time-dependent associations were identified.

Conclusions

The extended LSA analysis technique was demonstrated to reveal statistically significant local and potentially time-delayed association patterns in replicated time series data beyond that of ordinary correlation analysis. These statistically significant associations can provide insights to the real dynamics of biological systems. The newly designed eLSA software efficiently streamlines the analysis and is freely available from the eLSA homepage, which can be accessed at http://meta.usc.edu/softs/lsa.",2011-12-14 +22783576,Statistical analysis of results obtained by real-time PCR for improvement of absolute quantification of target sequences.,"

Background

Real-time quantitative PCR is increasingly used in clinical laboratories. Genomic DNA or plasmids containing cloned target sequences are necessary to generate data for standard curves. These data must be analysed to obtain the relative or absolute quantity of the target concentration in a sample. The method chosen for data analysis can strongly influence results of the quantification. Absolute quantification is important especially in clinical settings. For different reasons estimating the copy number of the gene of interest based on DNA concentration measurements is vague and tends toward overestimation, especially if cell lines are used.

Methods

Data gained by limiting dilution and multiple-tube approach were analyzed using our new Poisson distribution based software and were compared with results from DNA concentration measurement. Data from different cell sources (peripheral blood mononuclear cells and two cell lines) were compared:

Results

Limiting dilution and multiple-tube approach analyzed by a Poisson distribution simplifies and improves the generation of standard curves for real time PCR if cell lines are used. The absolute target copy number in a sample, the standard deviation, and a 95% confidence interval are calculated by the software.

Conclusions

With this easy to use program a target copy number can be reliably quantified. The program is available free of charge from: http://www.medizin.uni-greifswald.de/InnereC/index.php?id=18 (link will be activated after acceptance of the paper).",2012-01-01 +24850854,MetaImprint: an information repository of mammalian imprinted genes.,"Genomic imprinting is a complex genetic and epigenetic phenomenon that plays important roles in mammalian development and diseases. Mammalian imprinted genes have been identified widely by experimental strategies or predicted using computational methods. Systematic information for these genes would be necessary for the identification of novel imprinted genes and the analysis of their regulatory mechanisms and functions. Here, a well-designed information repository, MetaImprint (http://bioinfo.hrbmu.edu.cn/MetaImprint), is presented, which focuses on the collection of information concerning mammalian imprinted genes. The current version of MetaImprint incorporates 539 imprinted genes, including 255 experimentally confirmed genes, and their detailed research courses from eight mammalian species. MetaImprint also hosts genome-wide genetic and epigenetic information of imprinted genes, including imprinting control regions, single nucleotide polymorphisms, non-coding RNAs, DNA methylation and histone modifications. Information related to human diseases and functional annotation was also integrated into MetaImprint. To facilitate data extraction, MetaImprint supports multiple search options, such as by gene ID and disease name. Moreover, a configurable Imprinted Gene Browser was developed to visualize the information on imprinted genes in a genomic context. In addition, an Epigenetic Changes Analysis Tool is provided for online analysis of DNA methylation and histone modification differences of imprinted genes among multiple tissues and cell types. MetaImprint provides a comprehensive information repository of imprinted genes, allowing researchers to investigate systematically the genetic and epigenetic regulatory mechanisms of imprinted genes and their functions in development and diseases.",2014-05-21 +23420840,"FUBAR: a fast, unconstrained bayesian approximation for inferring selection.","Model-based analyses of natural selection often categorize sites into a relatively small number of site classes. Forcing each site to belong to one of these classes places unrealistic constraints on the distribution of selection parameters, which can result in misleading inference due to model misspecification. We present an approximate hierarchical Bayesian method using a Markov chain Monte Carlo (MCMC) routine that ensures robustness against model misspecification by averaging over a large number of predefined site classes. This leaves the distribution of selection parameters essentially unconstrained, and also allows sites experiencing positive and purifying selection to be identified orders of magnitude faster than by existing methods. We demonstrate that popular random effects likelihood methods can produce misleading results when sites assigned to the same site class experience different levels of positive or purifying selection--an unavoidable scenario when using a small number of site classes. Our Fast Unconstrained Bayesian AppRoximation (FUBAR) is unaffected by this problem, while achieving higher power than existing unconstrained (fixed effects likelihood) methods. The speed advantage of FUBAR allows us to analyze larger data sets than other methods: We illustrate this on a large influenza hemagglutinin data set (3,142 sequences). FUBAR is available as a batch file within the latest HyPhy distribution (http://www.hyphy.org), as well as on the Datamonkey web server (http://www.datamonkey.org/).",2013-02-18 +22161569,Installation and use of LabKey Server for proteomics.,"LabKey Server (formerly CPAS, the Computational Proteomics Analysis System) provides a Web-based platform for mining data from liquid chromatography-tandem mass spectrometry (LC-MS/MS) proteomic experiments. This open source platform supports systematic proteomic analyses and secure data management, integration, and sharing. LabKey Server incorporates several tools currently used in proteomic analysis, including the X! Tandem search engine, the ProteoWizard toolkit, and the PeptideProphet and ProteinProphet data mining tools. These tools and others are integrated into LabKey Server, which provides an extensible architecture for developing high-throughput biological applications. The LabKey Server analysis pipeline acts on data in standardized file formats, so that researchers may use LabKey Server with other search engines, including Mascot or SEQUEST, that follow a standardized format for reporting search engine results. Supported builds of LabKey Server are freely available at http://www.labkey.com/. Documentation and source code are available under the Apache License 2.0 at http://www.labkey.org.",2011-12-01 +22576177,CPSS: a computational platform for the analysis of small RNA deep sequencing data.,"

Unlabelled

Next generation sequencing (NGS) techniques have been widely used to document the small ribonucleic acids (RNAs) implicated in a variety of biological, physiological and pathological processes. An integrated computational tool is needed for handling and analysing the enormous datasets from small RNA deep sequencing approach. Herein, we present a novel web server, CPSS (a computational platform for the analysis of small RNA deep sequencing data), designed to completely annotate and functionally analyse microRNAs (miRNAs) from NGS data on one platform with a single data submission. Small RNA NGS data can be submitted to this server with analysis results being returned in two parts: (i) annotation analysis, which provides the most comprehensive analysis for small RNA transcriptome, including length distribution and genome mapping of sequencing reads, small RNA quantification, prediction of novel miRNAs, identification of differentially expressed miRNAs, piwi-interacting RNAs and other non-coding small RNAs between paired samples and detection of miRNA editing and modifications and (ii) functional analysis, including prediction of miRNA targeted genes by multiple tools, enrichment of gene ontology terms, signalling pathway involvement and protein-protein interaction analysis for the predicted genes. CPSS, a ready-to-use web server that integrates most functions of currently available bioinformatics tools, provides all the information wanted by the majority of users from small RNA deep sequencing datasets.

Availability

CPSS is implemented in PHP/PERL+MySQL+R and can be freely accessed at http://mcg.ustc.edu.cn/db/cpss/index.html or http://mcg.ustc.edu.cn/sdap1/cpss/index.html.",2012-05-09 +25204741,"Association between cyclin D1 G870A polymorphism and cervical cancer risk: a cumulative meta-analysis involving 2,864 patients and 3,898 controls.","

Background

Association between Cyclin D1 (CCND1) polymorphism and cervical cancer risk are conflicting with published articles. We performed a meta-analysis to investigate the association between CCND1 G870A polymorphism and cervical cancer risk.

Methods

PubMed, Embase and CNKI data were researched to conduct a meta-analysis on the associations between CCND1 G870A polymorphism and cervical cancer risk. Ten published case-control studies including 2,864 patients with cervical cancer and 3,898 controls were collected in this meta-analysis. Odds ratio (OR) with 95% confidence interval (CI) were applied to assess the relationship; meta-regression, sensitivity analysis and cumulative analysis were also conducted to guarantee the strength of results.

Results

Overall, no significant association between CCND1 G870A polymorphism and cervical cancer risk were found in allele contrast (A vs. G: OR=1.02, 95% CI=0.88-1.19, P=0.76 I2=74.5%), codominant model (GA vs. GG: OR=0.98, 95% CI=0.77-1.26, P=0.90 I2=69.1%; AA vs GG: OR=1.03, 95% CI=0.75-1.41, P=0.85 I2=75.9%), dominant model (GA + AA vs. GG: OR=1.00, 95% CI=0.78-1.28, P=0.99 I2=72.3%) and recessive model (AA vs GG + GA: OR=1.06, 95% CI=0.85-1.23, P=0.62, I2=70.1%). Similarly, in the stratified analysis by ethnicity, study design and genotyping type, no significant association detected in all genetic models either.

Conclusions

Our meta-analysis indicated that CCND1 G870A might be not the crucial risk factor for the development of cervical cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_168.",2014-09-10 +21602921,Gitools: analysis and visualisation of genomic data using interactive heat-maps.,"Intuitive visualization of data and results is very important in genomics, especially when many conditions are to be analyzed and compared. Heat-maps have proven very useful for the representation of biological data. Here we present Gitools (http://www.gitools.org), an open-source tool to perform analyses and visualize data and results as interactive heat-maps. Gitools contains data import systems from several sources (i.e. IntOGen, Biomart, KEGG, Gene Ontology), which facilitate the integration of novel data with previous knowledge.",2011-05-13 +23660286,"RiceSRTFDB: a database of rice transcription factors containing comprehensive expression, cis-regulatory element and mutant information to facilitate gene function analysis.","Rice is one of the most important crop plants, representing the staple food for more than half the world's population. However, its productivity is challenged by various stresses, including drought and salinity. Transcription factors (TFs) represent a regulatory component of the genome and are the most important targets for engineering stress tolerance. Here, we constructed a database, RiceSRTFDB, which provides comprehensive expression information for rice TFs during drought and salinity stress conditions and various stages of development. This information will be useful to identify the target TF(s) involved in stress response at a particular stage of development. The curated information for cis-regulatory elements present in their promoters has also been provided, which will be important to study the binding proteins. In addition, we have provided the available mutants and their phenotype information for rice TFs. All these information have been integrated in the database to facilitate the selection of target TFs of interest for functional analysis. This database aims to accelerate functional genomics research of rice TFs and understand the regulatory mechanisms underlying abiotic stress responses. Database URL: http://www.nipgr.res.in/RiceSRTFDB.html",2013-05-09 +25315959,Postnatal germ cell development during mini-puberty in the mouse does not require androgen receptor: implications for managing cryptorchidism.,"Undescended testis leads to infertility and malignancy resulting from aberrant germ cell development. Androgens are proposed to control early germ cell development during the transient postnatal surge of gonadotropins and androgen, known as mini-puberty. We assessed the effect of androgen receptor on perinatal germ cell development in mice.Testes from androgen receptor knockout mice and wild-type littermates (3 to 4 per group) were collected at embryonic day 17 and postnatal days 0 (birth), 2, 4, 6, 8 and 10 for immunohistochemical analysis. Antibodies against mouse VASA homologue (germ cell marker), antimüllerian hormone (Sertoli cell marker), Ki67 (proliferating cell marker) and DAPI (nuclei) were used and visualized by confocal microscopy. Number of germ cells per tubule, germ cells on the tubular basement membrane and Sertoli cells per tubule, and percentage of proliferating germ cells (Ki67(+)) per tubule and germ cells (Ki67(+)) on the basement membrane on confocal images were counted using Image J, version 1.44 (http://imagej.nih.gov/ij/). Data were analyzed using nonparametric one-way ANOVA with GraphPad Prism® 5.02 software.In wild-type and androgen receptor knockout testes germ cells per tubule decreased from embryonic day 17 to postnatal day 2, then increased normally. Number of mouse VASA homologue positive germ cells per tubule and germ cells on the basement membrane were similar in androgen receptor knockout and wild-type testes (p > 0.05) at each age, and percentages of proliferating germ cells (Ki67(+)) per tubule and proliferating germ cells on the basement membrane were similar at each age (p > 0.05).Androgen receptors are not required for gonocyte migration from the center of the testicular tubules to the basement membrane and transformation into spermatogonia stem cells up to day 10 in androgen receptor knockout mice. Identifying nonandrogenic factors might improve the fertility potential of boys with undescended testis who are undergoing orchiopexy.",2014-10-12 +22039460,ConDeTri--a content dependent read trimmer for Illumina data.,"

Unlabelled

During the last few years, DNA and RNA sequencing have started to play an increasingly important role in biological and medical applications, especially due to the greater amount of sequencing data yielded from the new sequencing machines and the enormous decrease in sequencing costs. Particularly, Illumina/Solexa sequencing has had an increasing impact on gathering data from model and non-model organisms. However, accurate and easy to use tools for quality filtering have not yet been established. We present ConDeTri, a method for content dependent read trimming for next generation sequencing data using quality scores of each individual base. The main focus of the method is to remove sequencing errors from reads so that sequencing reads can be standardized. Another aspect of the method is to incorporate read trimming in next-generation sequencing data processing and analysis pipelines. It can process single-end and paired-end sequence data of arbitrary length and it is independent from sequencing coverage and user interaction. ConDeTri is able to trim and remove reads with low quality scores to save computational time and memory usage during de novo assemblies. Low coverage or large genome sequencing projects will especially gain from trimming reads. The method can easily be incorporated into preprocessing and analysis pipelines for Illumina data.

Availability and implementation

Freely available on the web at http://code.google.com/p/condetri.",2011-10-19 +23832570,Integrating pathways of Parkinson's disease in a molecular interaction map.,"Parkinson's disease (PD) is a major neurodegenerative chronic disease, most likely caused by a complex interplay of genetic and environmental factors. Information on various aspects of PD pathogenesis is rapidly increasing and needs to be efficiently organized, so that the resulting data is available for exploration and analysis. Here we introduce a computationally tractable, comprehensive molecular interaction map of PD. This map integrates pathways implicated in PD pathogenesis such as synaptic and mitochondrial dysfunction, impaired protein degradation, alpha-synuclein pathobiology and neuroinflammation. We also present bioinformatics tools for the analysis, enrichment and annotation of the map, allowing the research community to open new avenues in PD research. The PD map is accessible at http://minerva.uni.lu/pd_map .",2013-07-07 +22908217,FACETS: multi-faceted functional decomposition of protein interaction networks.,"

Motivation

The availability of large-scale curated protein interaction datasets has given rise to the opportunity to investigate higher level organization and modularity within the protein-protein interaction (PPI) network using graph theoretic analysis. Despite the recent progress, systems level analysis of high-throughput PPIs remains a daunting task because of the amount of data they present. In this article, we propose a novel PPI network decomposition algorithm called FACETS in order to make sense of the deluge of interaction data using Gene Ontology (GO) annotations. FACETS finds not just a single functional decomposition of the PPI network, but a multi-faceted atlas of functional decompositions that portray alternative perspectives of the functional landscape of the underlying PPI network. Each facet in the atlas represents a distinct interpretation of how the network can be functionally decomposed and organized. Our algorithm maximizes interpretative value of the atlas by optimizing inter-facet orthogonality and intra-facet cluster modularity.

Results

We tested our algorithm on the global networks from IntAct, and compared it with gold standard datasets from MIPS and KEGG. We demonstrated the performance of FACETS. We also performed a case study that illustrates the utility of our approach.

Supplementary information

Supplementary data are available at the Bioinformatics online.

Availability

Our software is available freely for non-commercial purposes from: http://www.cais.ntu.edu.sg/~assourav/Facets/",2012-08-20 +23656909,Discovering chromatin motifs using FAIRE sequencing and the human diploid genome.,"

Background

Specific chromatin structures are associated with active or inactive gene transcription. The gene regulatory elements are intrinsically dynamic and alternate between inactive and active states through the recruitment of DNA binding proteins, such as chromatin-remodeling proteins.

Results

We developed a unique genome-wide method to discover DNA motifs associated with chromatin accessibility using formaldehyde-assisted isolation of regulatory elements with high-throughput sequencing (FAIRE-seq). We aligned the FAIRE-seq reads to the GM12878 diploid genome and subsequently identified differential chromatin-state regions (DCSRs) using heterozygous SNPs. The DCSR pairs represent the locations of imbalances of chromatin accessibility between alleles and are ideal to reveal chromatin motifs that may directly modulate chromatin accessibility. In this study, we used DNA 6-10mer sequences to interrogate all DCSRs, and subsequently discovered conserved chromatin motifs with significant changes in the occurrence frequency. To investigate their likely roles in biology, we studied the annotated protein associated with each of the top ten chromatin motifs genome-wide, in the intergenic regions and in genes, respectively. As a result, we found that most of these annotated motifs are associated with chromatin remodeling, reflecting their significance in biology.

Conclusions

Our method is the first one using fully phased diploid genome and FAIRE-seq to discover motifs associated with chromatin accessibility. Our results were collected to construct the first chromatin motif database (CMD), providing the potential DNA motifs recognized by chromatin-remodeling proteins and is freely available at http://syslab.nchu.edu.tw/chromatin.",2013-05-08 +24966410,Biomarkers of nutrition for development--iodine review.,"The objective of the Biomarkers of Nutrition for Development (BOND) project is to provide state-of-the-art information and service with regard to selection, use, and interpretation of biomarkers of nutrient exposure, status, function, and effect. Specifically, the BOND project seeks to develop consensus on accurate assessment methodologies that are applicable to researchers (laboratory/clinical/surveillance), clinicians, programmers, and policy makers (data consumers). The BOND project is also intended to develop targeted research agendas to support the discovery and development of biomarkers through improved understanding of nutrient biology within relevant biologic systems. In phase I of the BOND project, 6 nutrients (iodine, vitamin A, iron, zinc, folate, and vitamin B-12) were selected for their high public health importance because they typify the challenges faced by users in the selection, use, and interpretation of biomarkers. For each nutrient, an expert panel was constituted and charged with the development of a comprehensive review covering the respective nutrient's biology, existing biomarkers, and specific issues of use with particular reference to the needs of the individual user groups. In addition to the publication of these reviews, materials from each will be extracted to support the BOND interactive Web site (http://www.nichd.nih.gov/global_nutrition/programs/bond/pages/index.aspx). This review represents the first in the series of reviews and covers all relevant aspects of iodine biology and biomarkers. The article is organized to provide the reader with a full appreciation of iodine's background history as a public health issue, its biology, and an overview of available biomarkers and specific considerations for the use and interpretation of iodine biomarkers across a range of clinical and population-based uses. The review also includes a detailed research agenda to address priority gaps in our understanding of iodine biology and assessment.",2014-06-25 +23667458,Improved method for linear B-cell epitope prediction using antigen's primary sequence.,"One of the major challenges in designing a peptide-based vaccine is the identification of antigenic regions in an antigen that can stimulate B-cell's response, also called B-cell epitopes. In the past, several methods have been developed for the prediction of conformational and linear (or continuous) B-cell epitopes. However, the existing methods for predicting linear B-cell epitopes are far from perfection. In this study, an attempt has been made to develop an improved method for predicting linear B-cell epitopes. We have retrieved experimentally validated B-cell epitopes as well as non B-cell epitopes from Immune Epitope Database and derived two types of datasets called Lbtope_Variable and Lbtope_Fixed length datasets. The Lbtope_Variable dataset contains 14876 B-cell epitope and 23321 non-epitopes of variable length where as Lbtope_Fixed length dataset contains 12063 B-cell epitopes and 20589 non-epitopes of fixed length. We also evaluated the performance of models on above datasets after removing highly identical peptides from the datasets. In addition, we have derived third dataset Lbtope_Confirm having 1042 epitopes and 1795 non-epitopes where each epitope or non-epitope has been experimentally validated in at least two studies. A number of models have been developed to discriminate epitopes and non-epitopes using different machine-learning techniques like Support Vector Machine, and K-Nearest Neighbor. We achieved accuracy from ∼54% to 86% using diverse s features like binary profile, dipeptide composition, AAP (amino acid pair) profile. In this study, for the first time experimentally validated non B-cell epitopes have been used for developing method for predicting linear B-cell epitopes. In previous studies, random peptides have been used as non B-cell epitopes. In order to provide service to scientific community, a web server LBtope has been developed for predicting and designing B-cell epitopes (http://crdd.osdd.net/raghava/lbtope/).",2013-05-07 +23658416,A poor man's BLASTX--high-throughput metagenomic protein database search using PAUDA.,"

Summary

In the context of metagenomics, we introduce a new approach to protein database search called PAUDA, which runs ~10,000 times faster than BLASTX, while achieving about one-third of the assignment rate of reads to KEGG orthology groups, and producing gene and taxon abundance profiles that are highly correlated to those obtained with BLASTX. PAUDA requires <80 CPU hours to analyze a dataset of 246 million Illumina DNA reads from permafrost soil for which a previous BLASTX analysis (on a subset of 176 million reads) reportedly required 800,000 CPU hours, leading to the same clustering of samples by functional profiles.

Availability

PAUDA is freely available from: http://ab.inf.uni-tuebingen.de/software/pauda. Also supplementary method details are available from this website.",2013-05-07 +22257670,AnnTools: a comprehensive and versatile annotation toolkit for genomic variants.,"

Unlabelled

AnnTools is a versatile bioinformatics application designed for comprehensive annotation of a full spectrum of human genome variation: novel and known single-nucleotide substitutions (SNP/SNV), short insertions/deletions (INDEL) and structural variants/copy number variation (SV/CNV). The variants are interpreted by interrogating data compiled from 15 constantly updated sources. In addition to detailed functional characterization of the coding variants, AnnTools searches for overlaps with regulatory elements, disease/trait associated loci, known segmental duplications and artifact prone regions, thereby offering an integrated and comprehensive analysis of genomic data. The tool conveniently accepts user-provided tracks for custom annotation and offers flexibility in input data formats. The output is generated in the universal Variant Call Format. High annotation speed makes AnnTools suitable for high-throughput sequencing facilities, while a low-memory footprint and modest CPU requirements allow it to operate on a personal computer. The application is freely available for public use; the package includes installation scripts and a set of helper tools.

Availability

http://anntools.sourceforge.net/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-18 +22978657,bgc: Software for Bayesian estimation of genomic clines.,"Introgression in admixed populations can be used to identify candidate loci that might underlie adaptation or reproductive isolation. The Bayesian genomic cline model provides a framework for quantifying variable introgression in admixed populations and identifying regions of the genome with extreme introgression that are potentially associated with variation in fitness. Here we describe the bgc software, which uses Markov chain Monte Carlo to estimate the joint posterior probability distribution of the parameters in the Bayesian genomic cline model and designate outlier loci. This software can be used with next-generation sequence data, accounts for uncertainty in genotypic state, and can incorporate information from linked loci on a genetic map. Output from the analysis is written to an HDF5 file for efficient storage and manipulation. This software is written in C++. The source code, software manual, compilation instructions and example data sets are available under the GNU Public License at http://sites.google.com/site/bgcsoftware/.",2012-09-14 +23614390,Peppy: proteogenomic search software.,"Proteogenomic searching is a useful method for identifying novel proteins, annotating genes and detecting peptides unique to an individual genome. The approach, however, can be laborious, as it often requires search segmentation and the use of several unintegrated tools. Furthermore, many proteogenomic efforts have been limited to small genomes, as large genomes can prove impractical due to the required amount of computer memory and computation time. We present Peppy, a software tool designed to perform every necessary task of proteogenomic searches quickly, accurately and automatically. The software generates a peptide database from a genome, tracks peptide loci, matches peptides to MS/MS spectra and assigns confidence values to those matches. Peppy automatically performs a decoy database generation, search and analysis to return identifications at the desired false discovery rate threshold. Written in Java for cross-platform execution, the software is fully multithreaded for enhanced speed. The program can run on regular desktop computers, opening the doors of proteogenomic searching to a wider audience of proteomics and genomics researchers. Peppy is available at http://geneffects.com/peppy .",2013-05-06 +24685661,Psychological correlates to dysfunctional eating patterns among morbidly obese patients accepted for bariatric surgery.,"

Objective

To examine the relationships between dysfunctional eating patterns, personality, anxiety and depression in morbidly obese patients accepted for bariatric surgery.

Design

The study used cross-sectional data collected by running a randomized controlled trial (http://clinicaltrials.gov/ct2/show/NCT01403558).

Subjects

A total of 102 patients (69 women, 33 men) with a mean (SD) age of 42.6 (9.8) years and a mean BMI of 43.5 (4.4) kg/m(2) participated.

Measurements

Measurements included the NEO-PI-R (personality: neuroticism, extroversion, openness, conscientiousness and agreeableness), the TFEQ-R-21 (dysfunctional eating: emotional eating (EE), uncontrolled eating (UE) and cognitive restraint of eating (CR)) and the HADS (anxiety and depression).

Results

The personality traits neuroticism and conscientiousness were more strongly correlated with dysfunctional eating than anxiety and depression. These differences were most pronounced for emotional and cognitive restraint of eating. Emotional eating occurred more often in female than in male patients, a finding that was partially mediated by neuroticism but not by anxiety and depression.

Conclusion

Personality traits may be important to address in the clinical management of morbidly obese patients seeking bariatric surgery as neuroticism is particularly salient in female patients displaying an emotional eating behaviour.",2014-03-29 +24602349,"Diagnosis of Trichomonous vaginalis by microscopy, latex agglutination, diamond's media, and PCR in symptomatic women, Khartoum, Sudan.","

Background

Trichomoniasis is the most common sexually transmitted disease. However, limited data are available on an effective technique for the diagnosis of Trichomonas vaginalis.

Methods

A cross-sectional study was conducted to evaluate the accuracy of wet mount microscopy, latex agglutination, Diamond's media, and polymerase chain reaction (PCR) for detection of T. vaginalis among symptomatic women who attended the gynecological clinic at Khartoum, Sudan.

Results

Of the 297 women studied, 252 (84.8%) were positive for T. vaginalis by wet mount microscopy, 257 (86.5%) by latex agglutination, 253 (85.2%) by Diamond's media, and 253 (85.2%) by PCR. The sensitivity and specificity of wet mount microscopy were 99.2% and 97.7%, respectively, compared with PCR. The sensitivity and specificity of latex agglutination and Diamond's media were 99.6% and 88.6%, and 100.0% and 86.4%, respectively, compared with PCR.

Conclusions

In this study, wet mount microscopy, latex agglutination, and Diamond's media were found to be highly sensitive and specific. However, the availability and cost effectiveness might limit the use of Diamond's media and PCR in routine practice.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/7859723851211496.",2014-03-06 +23298314,Proteome-wide analysis of amino acid variations that influence protein lysine acetylation.,"Next-generation sequencing (NGS) technologies are yielding ever higher volumes of genetic variation data. Given this large amount of data, it has become both a possibility and a priority to determine what the functional implication of genetic variations is. Considering the essential roles of acetylation in protein functions, it is highly likely that acetylation related genetic variations change protein functions. In this work, we performed a proteome-wide analysis of amino acid variations that could potentially influence protein lysine acetylation characteristics in human variant proteins. Here, we defined the AcetylAAVs as acetylation related amino acid variations that affect acetylation sites or their interacting acetyltransferases, and categorized three types of AcetylAAVs. Using the developed prediction system, named KAcePred, we detected that 50.87% of amino acid variations are potential AcetylAAVs and 12.32% of disease mutations could result in AcetylAAVs. More interestingly, from the statistical analysis, we found that the amino acid variations that directly create new potential lysine acetylation sites have more chance to cause diseases. It can be anticipated that the analysis of AcetylAAVs might be useful to screen important polymorphisms and help to identify the mechanism of genetic diseases. A user-friendly web interface for analysis of AcetylAAVs is now freely available at http://bioinfo.ncu.edu.cn/AcetylAAVs_Home.aspx .",2013-01-18 +21383760,International spinal cord injury female sexual and reproductive function basic data set.,"

Objective

To create the International Spinal Cord Injury (SCI) Female Sexual and Reproductive Function Basic Data Set within the International SCI Data Sets.

Setting

An international working group.

Methods

The draft of the data set was developed by an international working group consisting of members appointed by the International Spinal Cord Society (ISCoS), the American Spinal Injury Association (ASIA), and a representative from the Executive Committee of the International SCI Standards and Data Sets. The data set was developed in an iterative process with review and comments by members of the Executive Committee of the International SCI Standards and Data Sets, ISCoS Scientific Committee, ASIA Board and the ISCoS Council, as well as all interested organizations and individuals. In addition, the data set was posted for 2 months at the ISCoS and ASIA websites for comments. ISCoS and ASIA approved the final version of the data set. To make the data set uniform, each variable and each response category within each variable have been specifically designed to promote the collection and reporting of comparable minimal data.

Results

Variables included in the International SCI Female Sexual and Reproductive Function Basic Data Set are as follows: date of data collection, interest in discussing sexual issues, sexual issues unrelated to spinal cord lesion, sexual dysfunction related to spinal cord lesion, psychogenic genital arousal, reflex genital arousal, orgasmic function and menstruation. Complete instruction for data collection, data sheet and training cases are available at the websites of ISCoS (http://www.iscos.org.uk) and ASIA (http://www.asia-spinalinjury.org).",2011-03-08 +24227676,Track data hubs enable visualization of user-defined genome-wide annotations on the UCSC Genome Browser.,"

Summary

Track data hubs provide an efficient mechanism for visualizing remotely hosted Internet-accessible collections of genome annotations. Hub datasets can be organized, configured and fully integrated into the University of California Santa Cruz (UCSC) Genome Browser and accessed through the familiar browser interface. For the first time, individuals can use the complete browser feature set to view custom datasets without the overhead of setting up and maintaining a mirror.

Availability and implementation

Source code for the BigWig, BigBed and Genome Browser software is freely available for non-commercial use at http://hgdownload.cse.ucsc.edu/admin/jksrc.zip, implemented in C and supported on Linux. Binaries for the BigWig and BigBed creation and parsing utilities may be downloaded at http://hgdownload.cse.ucsc.edu/admin/exe/. Binary Alignment/Map (BAM) and Variant Call Format (VCF)/tabix utilities are available from http://samtools.sourceforge.net/ and http://vcftools.sourceforge.net/. The UCSC Genome Browser is publicly accessible at http://genome.ucsc.edu.",2013-11-13 +21998156,Extraction of data deposition statements from the literature: a method for automatically tracking research results.,"

Motivation

Research in the biomedical domain can have a major impact through open sharing of the data produced. For this reason, it is important to be able to identify instances of data production and deposition for potential re-use. Herein, we report on the automatic identification of data deposition statements in research articles.

Results

We apply machine learning algorithms to sentences extracted from full-text articles in PubMed Central in order to automatically determine whether a given article contains a data deposition statement, and retrieve the specific statements. With an Support Vector Machine classifier using conditional random field determined deposition features, articles containing deposition statements are correctly identified with 81% F-measure. An error analysis shows that almost half of the articles classified as containing a deposition statement by our method but not by the gold standard do indeed contain a deposition statement. In addition, our system was used to process articles in PubMed Central, predicting that a total of 52 932 articles report data deposition, many of which are not currently included in the Secondary Source Identifier [si] field for MEDLINE citations.

Availability

All annotated datasets described in this study are freely available from the NLM/NCBI website at http://www.ncbi.nlm.nih.gov/CBBresearch/Fellows/Neveol/DepositionDataSets.zip

Contact

aurelie.neveol@nih.gov; john.wilbur@nih.gov; zhiyong.lu@nih.gov

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-13 +22371128,An integrated WebGIS framework for volunteered geographic information and social media in soil and water conservation.,"Volunteered geographic information and social networking in a WebGIS has the potential to increase public participation in soil and water conservation, promote environmental awareness and change, and provide timely data that may be otherwise unavailable to policymakers in soil and water conservation management. The objectives of this study were: (1) to develop a framework for combining current technologies, computing advances, data sources, and social media; and (2) develop and test an online web mapping interface. The mapping interface integrates Microsoft Silverlight, Bing Maps, ArcGIS Server, Google Picasa Web Albums Data API, RSS, Google Analytics, and Facebook to create a rich user experience. The website allows the public to upload photos and attributes of their own subdivisions or sites they have identified and explore other submissions. The website was made available to the public in early February 2011 at http://www.AbandonedDevelopments.com and evaluated for its potential long-term success in a pilot study.",2012-02-28 +23640984,PCorral--interactive mining of protein interactions from MEDLINE.,"The extraction of information from the scientific literature is a complex task-for researchers doing manual curation and for automatic text processing solutions. The identification of protein-protein interactions (PPIs) requires the extraction of protein named entities and their relations. Semi-automatic interactive support is one approach to combine both solutions for efficient working processes to generate reliable database content. In principle, the extraction of PPIs can be achieved with different methods that can be combined to deliver high precision and/or high recall results in different combinations at the same time. Interactive use can be achieved, if the analytical methods are fast enough to process the retrieved documents. PCorral provides interactive mining of PPIs from the scientific literature allowing curators to skim MEDLINE for PPIs at low overheads. The keyword query to PCorral steers the selection of documents, and the subsequent text analysis generates high recall and high precision results for the curator. The underlying components of PCorral process the documents on-the-fly and are available, as well, as web service from the Whatizit infrastructure. The human interface summarizes the identified PPI results, and the involved entities are linked to relevant resources and databases. Altogether, PCorral serves curator at both the beginning and the end of the curation workflow for information retrieval and information extraction. Database URL: http://www.ebi.ac.uk/Rebholz-srv/pcorral.",2013-05-02 +23640335,CMCompare webserver: comparing RNA families via covariance models.,"A standard method for the identification of novel non-coding RNAs is homology search by covariance models. Covariance models are constructed for specific RNA families with common sequence and structure (e.g. transfer RNAs). Currently, there are models for 2208 families available from Rfam. Before being included into a database, a proposed family should be tested for specificity (finding only true homolog sequences), sensitivity (finding remote homologs) and uniqueness. The CMCompare webserver (CMCws) compares Infernal RNA family models to (i) identify models with poor specificity and (ii) explore the relationship between models. The CMCws provides options to compare new models against all existing models in the current Rfam database to avoid the construction of duplicate models for the same non-coding RNA family. In addition, the user can explore the relationship between two or more models, including whole sets of user-created family models. Visualization of family relationships provides help in evaluating candidates for clusters of biologically related families, called clans. The CMCws is freely available, without any login requirements, at http://rna.tbi.univie.ac.at/cmcws, and the underlying software is available under the GPL-3 license.",2013-05-02 +24663061,Fast and accurate multivariate Gaussian modeling of protein families: predicting residue contacts and protein-interaction partners.,"In the course of evolution, proteins show a remarkable conservation of their three-dimensional structure and their biological function, leading to strong evolutionary constraints on the sequence variability between homologous proteins. Our method aims at extracting such constraints from rapidly accumulating sequence data, and thereby at inferring protein structure and function from sequence information alone. Recently, global statistical inference methods (e.g. direct-coupling analysis, sparse inverse covariance estimation) have achieved a breakthrough towards this aim, and their predictions have been successfully implemented into tertiary and quaternary protein structure prediction methods. However, due to the discrete nature of the underlying variable (amino-acids), exact inference requires exponential time in the protein length, and efficient approximations are needed for practical applicability. Here we propose a very efficient multivariate Gaussian modeling approach as a variant of direct-coupling analysis: the discrete amino-acid variables are replaced by continuous Gaussian random variables. The resulting statistical inference problem is efficiently and exactly solvable. We show that the quality of inference is comparable or superior to the one achieved by mean-field approximations to inference with discrete variables, as done by direct-coupling analysis. This is true for (i) the prediction of residue-residue contacts in proteins, and (ii) the identification of protein-protein interaction partner in bacterial signal transduction. An implementation of our multivariate Gaussian approach is available at the website http://areeweb.polito.it/ricerca/cmp/code.",2014-03-24 +23635100,Software for selecting the most informative sets of genomic loci for multi-target microbial typing.,"

Background

High-throughput sequencing can identify numerous potential genomic targets for microbial strain typing, but identification of the most informative combinations requires the use of computational screening tools. This paper describes novel software-- Automated Selection of Typing Target Subsets (AuSeTTS)--that allows intelligent selection of optimal targets for pathogen strain typing. The objective of this software is to maximise both discriminatory power, using Simpson's index of diversity (D), and concordance with existing typing methods, using the adjusted Wallace coefficient (AW). The program interrogates molecular typing results for panels of isolates, based on large target sets, and iteratively examines each target, one-by-one, to determine the most informative subset.

Results

AuSeTTS was evaluated using three target sets: 51 binary targets (13 toxin genes, 16 phage-related loci and 22 SCCmec elements), used for multilocus typing of 153 methicillin-resistant Staphylococcus aureus (MRSA) isolates; 17 MLVA loci in 502 Streptococcus pneumoniae isolates from the MLVA database (http://www.mlva.eu) and 12 MLST loci for 98 Cryptococcus spp. isolates.The maximum D for MRSA, 0.984, was achieved with a subset of 20 targets and a D value of 0.954 with 7 targets. Twelve targets predicted MLST with a maximum AW of 0.9994. All 17 S. pneumoniae MLVA targets were required to achieve maximum D of 0.997, but 4 targets reached D of 0.990. Twelve targets predicted pneumococcal serotype with a maximum AW of 0.899 and 9 predicted MLST with maximum AW of 0.963. Eight of the 12 MLST loci were sufficient to achieve the maximum D of 0.963 for Cryptococcus spp.

Conclusions

Computerised analysis with AuSeTTS allows rapid selection of the most discriminatory targets for incorporation into typing schemes. Output of the program is presented in both tabular and graphical formats and the software is available for free download from http://www.cidmpublichealth.org/pages/ausetts.html.",2013-05-01 +23625438,Effect of missense mutations on structure and interaction of anaplastic Lymphoma kinase (ALK) in neuroblastom.,"Neuroblastoma is a cancer of the sympathetic nervous system, accounting for upto 15% of childhood cancer mortality. It can occur in many areas but most of them begin in the abdomen in the adrenal gland and can spread to the bones and other areas. http://en.wikipedia.org/wiki/Neuroblastoma-cite_note-pmid19383347-3. Unfortunately, like other cancers, its causes are still poorly understood. Anaplastic lymphoma kinase (ALK), a membrane associated tyrosine kinase was recently found to be mutated in neuroblastoma. Protein sequence of ALK was retrieved from UniProt and the seven identified mutations were substituted in native sequence to get its mutant proteins. Significant changes were explored in the mutant secondary structures when compared with the native protein. Changes were also observed in the physiochemical properties and it can therefore be inferred that, these changes may be translated in the tertiary structures due to their effects on the folding pattern. Tertiary structure of the protein modeled after refinement and validation was submitted to Protein Model Database (PMDB) and was assigned with the PMDB ID P0077827. RMSD values of the mutant structures were observed deviated from the native structure when compared with probability < 0.05. It was observed that there are a total of 15 Disordered Regions in the protein having a total of 290 Disordered Residues. Protein-ligand interaction analysis was performed to investigate the effects of mutations damaging its interactions and it was observed that the mutations understudy affects its interactions with ATP which ultimately results in causing neuroblastoma. This study was based on the in silico mutation analysis of Seven missense mutations of anaplastic lymphoma kinase which can better explain why missense mutations in ALK protein cause neuroblastoma. Structure and sequence based computations were systematically and comprehensively evaluated applied to the mutants in anaplastic lymphoma kinase and on the basis of our observations a detailed structural explanations have been developed for the measured and predicted impact of these missense substitutions.",2013-05-01 +23758891,Utilizing protein structure to identify non-random somatic mutations.,"

Background

Human cancer is caused by the accumulation of somatic mutations in tumor suppressors and oncogenes within the genome. In the case of oncogenes, recent theory suggests that there are only a few key ""driver"" mutations responsible for tumorigenesis. As there have been significant pharmacological successes in developing drugs that treat cancers that carry these driver mutations, several methods that rely on mutational clustering have been developed to identify them. However, these methods consider proteins as a single strand without taking their spatial structures into account. We propose an extension to current methodology that incorporates protein tertiary structure in order to increase our power when identifying mutation clustering.

Results

We have developed iPAC (identification of Protein Amino acid Clustering), an algorithm that identifies non-random somatic mutations in proteins while taking into account the three dimensional protein structure. By using the tertiary information, we are able to detect both novel clusters in proteins that are known to exhibit mutation clustering as well as identify clusters in proteins without evidence of clustering based on existing methods. For example, by combining the data in the Protein Data Bank (PDB) and the Catalogue of Somatic Mutations in Cancer, our algorithm identifies new mutational clusters in well known cancer proteins such as KRAS and PI3KC α. Further, by utilizing the tertiary structure, our algorithm also identifies clusters in EGFR, EIF2AK2, and other proteins that are not identified by current methodology. The R package is available at: http://www.bioconductor.org/packages/2.12/bioc/html/iPAC.html.

Conclusion

Our algorithm extends the current methodology to identify oncogenic activating driver mutations by utilizing tertiary protein structure when identifying nonrandom somatic residue mutation clusters.",2013-06-13 +22039151,ALFRED: an allele frequency resource for research and teaching.,"ALFRED (http://alfred.med.yale.edu) is a free, web accessible, curated compilation of allele frequency data on DNA sequence polymorphisms in anthropologically defined human populations. Currently, ALFRED has allele frequency tables on over 663,400 polymorphic sites; 170 of them have frequency tables for more than 100 different population samples. In ALFRED, a population may have multiple samples with each 'sample' consisting of many individuals on which an allele frequency is based. There are 3566 population samples from 710 different populations with allele frequency tables on at least one polymorphism. Fifty of those population samples have allele frequency data for over 650,000 polymorphisms. Records also have active links to relevant resources (dbSNP, PharmGKB, OMIM, Ethnologue, etc.). The flexible search options and data display and download capabilities available through the web interface allow easy access to the large quantity of high-quality data in ALFRED.",2011-10-28 +21851592,The taming of an impossible child: a standardized all-in approach to the phylogeny of Hymenoptera using public database sequences.,"

Background

Enormous molecular sequence data have been accumulated over the past several years and are still exponentially growing with the use of faster and cheaper sequencing techniques. There is high and widespread interest in using these data for phylogenetic analyses. However, the amount of data that one can retrieve from public sequence repositories is virtually impossible to tame without dedicated software that automates processes. Here we present a novel bioinformatics pipeline for downloading, formatting, filtering and analyzing public sequence data deposited in GenBank. It combines some well-established programs with numerous newly developed software tools (available at http://software.zfmk.de/).

Results

We used the bioinformatics pipeline to investigate the phylogeny of the megadiverse insect order Hymenoptera (sawflies, bees, wasps and ants) by retrieving and processing more than 120,000 sequences and by selecting subsets under the criteria of compositional homogeneity and defined levels of density and overlap. Tree reconstruction was done with a partitioned maximum likelihood analysis from a supermatrix with more than 80,000 sites and more than 1,100 species. In the inferred tree, consistent with previous studies, ""Symphyta"" is paraphyletic. Within Apocrita, our analysis suggests a topology of Stephanoidea + (Ichneumonoidea + (Proctotrupomorpha + (Evanioidea + Aculeata))). Despite the huge amount of data, we identified several persistent problems in the Hymenoptera tree. Data coverage is still extremely low, and additional data have to be collected to reliably infer the phylogeny of Hymenoptera.

Conclusions

While we applied our bioinformatics pipeline to Hymenoptera, we designed the approach to be as general as possible. With this pipeline, it is possible to produce phylogenetic trees for any taxonomic group and to monitor new data and tree robustness in a taxon of interest. It therefore has great potential to meet the challenges of the phylogenomic era and to deepen our understanding of the tree of life.",2011-08-18 +24320163,Tissue-specific alternative splicing analysis reveals the diversity of chromosome 18 transcriptome.,"The Chromosome-centric Human Proteome Project (C-HPP) is aimed to identify the variety of protein products and transcripts of the number of chromosomes. The Russian part of C-HPP is devoted to the study of the human chromosome 18. Using widely accepted Tophat and SpliceGrapher, a tool for accurate splice sites and alternative mRNA isoforms prediction, we performed the extensive mining of the splice variants of chromosome 18 transcripts and encoded protein products in liver, brain, lung, kidney, blood, testis, derma, and skeletal muscles. About 6.1 billion of the reads represented by 450 billion of the bases have been analyzed. The relative frequencies of splice events as well as gene expression profiles in normal tissues are evaluated. Using ExPASy PROSITE, the novel features and possible functional sites of previously unknown splice variants were highlighted. A set of unique proteotypic peptides enabling the identification of novel alternative protein species using mass-spectrometry is constructed. The revealed data will be integrated into the gene-centric knowledgebase of the Russian part of C-HPP available at http://kb18.ru and http://www.splicing.zz.mu/.",2013-12-09 +22531217,Accurate extension of multiple sequence alignments using a phylogeny-aware graph algorithm.,"

Motivation

Accurate alignment of large numbers of sequences is demanding and the computational burden is further increased by downstream analyses depending on these alignments. With the abundance of sequence data, an integrative approach of adding new sequences to existing alignments without their full re-computation and maintaining the relative matching of existing sequences is an attractive option. Another current challenge is the extension of reference alignments with fragmented sequences, as those coming from next-generation metagenomics, that contain relatively little information. Widely used methods for alignment extension are based on profile representation of reference sequences. These do not incorporate and use phylogenetic information and are affected by the composition of the reference alignment and the phylogenetic positions of query sequences.

Results

We have developed a method for phylogeny-aware alignment of partial-order sequence graphs and apply it here to the extension of alignments with new data. Our new method, called PAGAN, infers ancestral sequences for the reference alignment and adds new sequences in their phylogenetic context, either to predefined positions or by finding the best placement for sequences of unknown origin. Unlike profile-based alternatives, PAGAN considers the phylogenetic relatedness of the sequences and is not affected by inclusion of more diverged sequences in the reference set. Our analyses show that PAGAN outperforms alternative methods for alignment extension and provides superior accuracy for both DNA and protein data, the improvement being especially large for fragmented sequences. Moreover, PAGAN-generated alignments of noisy next-generation sequencing (NGS) sequences are accurate enough for the use of RNA-seq data in evolutionary analyses.

Availability

PAGAN is written in C++, licensed under the GPL and its source code is available at http://code.google.com/p/pagan-msa.",2012-04-23 +23658631,MabsBase: a Mycobacterium abscessus genome and annotation database.,"

Summary

Mycobacterium abscessus is a rapidly growing non-tuberculous mycobacterial species that has been associated with a wide spectrum of human infections. As the classification and biology of this organism is still not well understood, comparative genomic analysis on members of this species may provide further insights on their taxonomy, phylogeny, pathogenicity and other information that may contribute to better management of infections. The MabsBase described in this paper is a user-friendly database providing access to whole-genome sequences of newly discovered M. abscessus strains as well as resources for whole-genome annotations and computational predictions, to support the expanding scientific community interested in M. abscessus research. The MabsBase is freely available at http://mabscessus.um.edu.my.",2013-04-29 +23630321,The RNAsnp web server: predicting SNP effects on local RNA secondary structure.,"The function of many non-coding RNA genes and cis-regulatory elements of messenger RNA largely depends on the structure, which is in turn determined by their sequence. Single nucleotide polymorphisms (SNPs) and other mutations may disrupt the RNA structure, interfere with the molecular function and hence cause a phenotypic effect. RNAsnp is an efficient method to predict the effect of SNPs on local RNA secondary structure based on the RNA folding algorithms implemented in the Vienna RNA package. The SNP effects are quantified in terms of empirical P-values, which, for computational efficiency, are derived from extensive pre-computed tables of distributions of substitution effects as a function of gene length and GC content. Here, we present a web service that not only provides an interface for RNAsnp but also features a graphical output representation. In addition, the web server is connected to a local mirror of the UCSC genome browser database that enables the users to select the genomic sequences for analysis and visualize the results directly in the UCSC genome browser. The RNAsnp web server is freely available at: http://rth.dk/resources/rnasnp/.",2013-04-29 +22086953,IMG/M: the integrated metagenome data management and comparative analysis system.,The integrated microbial genomes and metagenomes (IMG/M) system provides support for comparative analysis of microbial community aggregate genomes (metagenomes) in a comprehensive integrated context. IMG/M integrates metagenome data sets with isolate microbial genomes from the IMG system. IMG/M's data content and analytical capabilities have been extended through regular updates since its first release in 2007. IMG/M is available at http://img.jgi.doe.gov/m. A companion IMG/M systems provide support for annotation and expert review of unpublished metagenomic data sets (IMG/M ER: http://img.jgi.doe.gov/mer).,2011-11-15 +24387046,Identification of RNA silencing components in soybean and sorghum.,"

Background

RNA silencing is a process triggered by 21-24 small RNAs to repress gene expression. Many organisms including plants use RNA silencing to regulate development and physiology, and to maintain genome stability. Plants possess two classes of small RNAs: microRNAs (miRNAs) and small interfering RNAs (siRNAs). The frameworks of miRNA and siRNA pathways have been established in the model plant, Arabidopsis thaliana (Arabidopsis).

Results

Here we report the identification of putative genes that are required for the generation and function of miRNAs and siRNAs in soybean and sorghum, based on knowledge obtained from Arabidopsis. The gene families, including DCL, HEN1, SE, HYL1, HST, RDR, NRPD1, NRPD2/NRPE2, NRPE1, and AGO, were analyzed for gene structures, phylogenetic relationships, and protein motifs. The gene expression was validated using RNA-seq, expressed sequence tags (EST), and reverse transcription PCR (RT-PCR).

Conclusions

The identification of these components could provide not only insight into RNA silencing mechanism in soybean and sorghum but also basis for further investigation. All data are available at http://sysbio.unl.edu/.",2014-01-04 +23868932,Quarterly vs. yearly clinical follow-up of remotely monitored recipients of prophylactic implantable cardioverter-defibrillators: results of the REFORM trial.,"

Aims

The rapidly increasing number of patients with implantable cardioverter-defibrillators (ICD) places a large burden on follow-up providers. This study investigated the possibility of longer in-office follow-up intervals in primary prevention ICD patients under remote monitoring with automatic daily data transmissions from the implant memory.

Methods and results

Conducted in 155 ICD recipients with MADIT II indications, the study compared the burden of scheduled and unscheduled ICD follow-up visits, quality of life (SF-36), and clinical outcomes in patients randomized to either 3- or 12-month follow-up intervals in the period between 3 and 27 months after implantation. Remote monitoring (Biotronik Home Monitoring) was used equally in all patients. In contrast to previous clinical studies, no calendar-based remote data checks were performed between scheduled in-office visits. Compared with the 3-month follow-up interval, the 12-month interval resulted in a minor increase in the number of unscheduled follow-ups (0.64 vs. 0.27 per patient-year; P = 0.03) and in a major reduction in the total number of in-office ICD follow-ups (1.60 vs. 3.85 per patient-year; P < 0.001). No significant difference was found in mortality, hospitalization rate, or hospitalization length during the 2-year observation period, but more patients were lost to follow-up in the 12-month group (10 vs. 3; P = 0.04). The SF-36 scores favoured the 12-month intervals in the domains 'social functioning' and 'mental health'.

Conclusion

In prophylactic ICD recipients under automatic daily remote monitoring, the extension of the 3-month in-office follow-up interval to 12 months appeared to safely reduce the ICD follow-up burden during 27 months after implantation.

Clinicaltrialsgov identifier

NCT00401466 (http://www.clinicaltrials.gov/ct2/show/NCT00401466).",2013-07-18 +21896507,wapRNA: a web-based application for the processing of RNA sequences.,"

Summary

mRNA/miRNA-seq technology is becoming the leading technology to globally profile gene expression and elucidate the transcriptional regulation mechanisms in living cells. Although there are many tools available for analyzing RNA-seq data, few of them are available as easy accessible online web tools for processing both mRNA and miRNA data for the RNA-seq based user community. As such, we have developed a comprehensive web application tool for processing mRNA-seq and miRNA-seq data. Our web tool wapRNA includes four different modules: mRNA-seq and miRNA-seq sequenced from SOLiD or Solexa platform and all the modules were tested on previously published experimental data. We accept raw sequence data with an optional reads filter, followed by mapping and gene annotation or miRNA prediction. wapRNA also integrates downstream functional analyses such as Gene Ontology, KEGG pathway, miRNA targets prediction and comparison of gene's or miRNA's different expression in different samples. Moreover, we provide the executable packages for installation on user's local server.

Availability

wapRNA is freely available for use at http://waprna.big.ac.cn. The executable packages and the instruction for installation can be downloaded from our web site.

Contact

husn@big.ac.cn; songshh@big.ac.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-06 +25432794,"Solitary fibrous tumor - clinicopathologic, immunohistochemical and molecular analysis of 28 cases.","

Background

Solitary fibrous tumor is a mesenchymal tumor of fibroblastic type, which can affect any region of the body. Recently, a recurrent gene fusion NAB2-STAT6 has been identified as molecular hallmark. The NAB2-STAT6 fusion leads to EGR1 activation and transcriptional deregulation of EGR1-dependent target genes and is a driving event in initiation of SFT. In this study, we report the clinicopathologic and RT-PCR findings and evaluated expression of STAT6 and EGR1 protein in a cohort of 28 SFTs.

Methods

28 patients with a median age of 54 years were included with SFTs originating at different sites, most occurring in the lung and pleura (9, 32%), 5 in soft tissues of the lower extremities (18%) and 5 in the head and neck (18%). For detection of the NAB2-STAT6 fusion gene, RT-PCR was performed using RNA extracted from formalin-fixed and paraffin-embedded tissues. Immunohistochemistry was performed on all cases with antibodies against STAT6 and EGR1.

Results

All patients were treated by surgery, 3 with adjuvant chemo- or radiotherapy. Follow-up data of 18 patients could be obtained of which 2 patients died of metastatic disease 13 months and 52 years after first diagnosis. Sixteen patients have no evidence of disease with a median follow up of 29.5 months (range 7 - 120 months). NAB2-STAT6 fusion transcripts were found in 19/28 cases (68%). The most common fusion was between NAB2 exon 4 and STAT6 exon 3 (11/19, 58%), mainly occurring in pleuropulmonary lesions. All cases showed strong nuclear expression of STAT6 (28/28, 100%) while EGR1 showed low-level variable nuclear expression in all samples, comparable with the EGR1 expression results of the control group.

Conclusions

The identification of the NAB2-STAT6 fusion in SFTs can provide important diagnostic information, especially in cases with aberrant morphology or when biopsy material is limited. STAT6 immunohistochemistry is another useful tool in diagnosing SFT. EGR1 immunohistochemistry indicates low-level protein expression in accordance with EGR1 activation due to distorted NAB2 activity.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/13000_2014_224.",2014-11-29 +22893372,omniBiomarker: A Web-Based Application for Knowledge-Driven Biomarker Identification.,"We have developed omniBiomarker, a web-based application that uses knowledge from the NCI Cancer Gene Index to guide the selection of biologically relevant algorithms for identifying biomarkers. Biomarker identification from high-throughput genomic expression data is difficult because of data properties (i.e., small-sample size compared to large-feature size) as well as the large number of available feature selection algorithms. Thus, it is unclear which algorithm should be used for a particular dataset. These factors lead to instability in biomarker identification and affect the reproducibility of results. We introduce a method for computing the biological relevance of feature selection algorithms using an externally validated knowledge base of manually curated cancer biomarkers. Results suggest that knowledge-driven biomarker identification can improve microarray-based clinical prediction performance. omniBiomarker can be accessed at http://omnibiomarker.bme.gatech.edu/.",2012-08-08 +22499705,Sample size calculations for designing clinical proteomic profiling studies using mass spectrometry.,"In cancer clinical proteomics, MALDI and SELDI profiling are used to search for biomarkers of potentially curable early-stage disease. A given number of samples must be analysed in order to detect clinically relevant differences between cancers and controls, with adequate statistical power. From clinical proteomic profiling studies, expression data for each peak (protein or peptide) from two or more clinically defined groups of subjects are typically available. Typically, both exposure and confounder information on each subject are also available, and usually the samples are not from randomized subjects. Moreover, the data is usually available in replicate. At the design stage, however, covariates are not typically available and are often ignored in sample size calculations. This leads to the use of insufficient numbers of samples and reduced power when there are imbalances in the numbers of subjects between different phenotypic groups. A method is proposed for accommodating information on covariates, data imbalances and design-characteristics, such as the technical replication and the observational nature of these studies, in sample size calculations. It assumes knowledge of a joint distribution for the protein expression values and the covariates. When discretized covariates are considered, the effect of the covariates enters the calculations as a function of the proportions of subjects with specific attributes. This makes it relatively straightforward (even when pilot data on subject covariates is unavailable) to specify and to adjust for the effect of the expected heterogeneities. The new method suggests certain experimental designs which lead to the use of a smaller number of samples when planning a study. Analysis of data from the proteomic profiling of colorectal cancer reveals that fewer samples are needed when a study is balanced than when it is unbalanced, and when the IMAC30 chip-type is used. The method is implemented in the clippda package and is available in R at: http://www.bioconductor.org/help/bioc-views/release/bioc/html/clippda.html.",2012-02-10 +24797299,Development of a comprehensive prognostic index for patients with chronic lymphocytic leukemia.,"In addition to clinical staging, a number of biomarkers predicting overall survival (OS) have been identified in chronic lymphocytic leukemia (CLL). The multiplicity of markers, limited information on their independent prognostic value, and a lack of understanding of how to interpret discordant markers are major barriers to use in routine clinical practice. We therefore performed an analysis of 23 prognostic markers based on prospectively collected data from 1948 CLL patients participating in phase 3 trials of the German CLL Study Group to develop a comprehensive prognostic index. A multivariable Cox regression model identified 8 independent predictors of OS: sex, age, ECOG status, del(17p), del(11q), IGHV mutation status, serum β2-microglobulin, and serum thymidine kinase. Using a weighted grading system, a prognostic index was derived that separated 4 risk categories with 5-year OS ranging from 18.7% to 95.2% and having a C-statistic of 0.75. The index stratified OS within all analyzed subgroups, including all Rai/Binet stages. The validity of the index was externally confirmed in a series of 676 newly diagnosed CLL patients from Mayo Clinic. Using this multistep process including external validation, we developed a comprehensive prognostic index with high discriminatory power and prognostic significance on the individual patient level. The studies were registered as follows: CLL1 trial (NCT00262782, http://clinicaltrials.gov), CLL4 trial (ISRCTN 75653261, http://www.controlled-trials.com), and CLL8 trial (NCT00281918, http://clinicaltrials.gov).",2014-05-05 +23624946,The Protein Model Portal--a comprehensive resource for protein structure and model information.,"The Protein Model Portal (PMP) has been developed to foster effective use of 3D molecular models in biomedical research by providing convenient and comprehensive access to structural information for proteins. Both experimental structures and theoretical models for a given protein can be searched simultaneously and analyzed for structural variability. By providing a comprehensive view on structural information, PMP offers the opportunity to apply consistent assessment and validation criteria to the complete set of structural models available for proteins. PMP is an open project so that new methods developed by the community can contribute to PMP, for example, new modeling servers for creating homology models and model quality estimation servers for model validation. The accuracy of participating modeling servers is continuously evaluated by the Continuous Automated Model EvaluatiOn (CAMEO) project. The PMP offers a unique interface to visualize structural coverage of a protein combining both theoretical models and experimental structures, allowing straightforward assessment of the model quality and hence their utility. The portal is updated regularly and actively developed to include latest methods in the field of computational structural biology. Database URL: http://www.proteinmodelportal.org.",2013-04-26 +23629049,Multi-site genetic analysis of diffusion images and voxelwise heritability analysis: a pilot project of the ENIGMA-DTI working group.,"The ENIGMA (Enhancing NeuroImaging Genetics through Meta-Analysis) Consortium was set up to analyze brain measures and genotypes from multiple sites across the world to improve the power to detect genetic variants that influence the brain. Diffusion tensor imaging (DTI) yields quantitative measures sensitive to brain development and degeneration, and some common genetic variants may be associated with white matter integrity or connectivity. DTI measures, such as the fractional anisotropy (FA) of water diffusion, may be useful for identifying genetic variants that influence brain microstructure. However, genome-wide association studies (GWAS) require large populations to obtain sufficient power to detect and replicate significant effects, motivating a multi-site consortium effort. As part of an ENIGMA-DTI working group, we analyzed high-resolution FA images from multiple imaging sites across North America, Australia, and Europe, to address the challenge of harmonizing imaging data collected at multiple sites. Four hundred images of healthy adults aged 18-85 from four sites were used to create a template and corresponding skeletonized FA image as a common reference space. Using twin and pedigree samples of different ethnicities, we used our common template to evaluate the heritability of tract-derived FA measures. We show that our template is reliable for integrating multiple datasets by combining results through meta-analysis and unifying the data through exploratory mega-analyses. Our results may help prioritize regions of the FA map that are consistently influenced by additive genetic factors for future genetic discovery studies. Protocols and templates are publicly available at (http://enigma.loni.ucla.edu/ongoing/dti-working-group/).",2013-04-28 +24651462,iNR-Drug: predicting the interaction of drugs with nuclear receptors in cellular networking.,"Nuclear receptors (NRs) are closely associated with various major diseases such as cancer, diabetes, inflammatory disease, and osteoporosis. Therefore, NRs have become a frequent target for drug development. During the process of developing drugs against these diseases by targeting NRs, we are often facing a problem: Given a NR and chemical compound, can we identify whether they are really in interaction with each other in a cell? To address this problem, a predictor called ""iNR-Drug"" was developed. In the predictor, the drug compound concerned was formulated by a 256-D (dimensional) vector derived from its molecular fingerprint, and the NR by a 500-D vector formed by incorporating its sequential evolution information and physicochemical features into the general form of pseudo amino acid composition, and the prediction engine was operated by the SVM (support vector machine) algorithm. Compared with the existing prediction methods in this area, iNR-Drug not only can yield a higher success rate, but is also featured by a user-friendly web-server established at http://www.jci-bioinfo.cn/iNR-Drug/, which is particularly useful for most experimental scientists to obtain their desired data in a timely manner. It is anticipated that the iNR-Drug server may become a useful high throughput tool for both basic research and drug development, and that the current approach may be easily extended to study the interactions of drug with other targets as well.",2014-03-19 +23300135,MaSC: mappability-sensitive cross-correlation for estimating mean fragment length of single-end short-read sequencing data.,"

Motivation

Reliable estimation of the mean fragment length for next-generation short-read sequencing data is an important step in next-generation sequencing analysis pipelines, most notably because of its impact on the accuracy of the enriched regions identified by peak-calling algorithms. Although many peak-calling algorithms include a fragment-length estimation subroutine, the problem has not been adequately solved, as demonstrated by the variability of the estimates returned by different algorithms.

Results

In this article, we investigate the use of strand cross-correlation to estimate mean fragment length of single-end data and show that traditional estimation approaches have mixed reliability. We observe that the mappability of different parts of the genome can introduce an artificial bias into cross-correlation computations, resulting in incorrect fragment-length estimates. We propose a new approach, called mappability-sensitive cross-correlation (MaSC), which removes this bias and allows for accurate and reliable fragment-length estimation. We analyze the computational complexity of this approach, and evaluate its performance on a test suite of NGS datasets, demonstrating its superiority to traditional cross-correlation analysis.

Availability

An open-source Perl implementation of our approach is available at http://www.perkinslab.ca/Software.html.",2013-01-07 +23620293,DIALIGN at GOBICS--multiple sequence alignment using various sources of external information.,"DIALIGN is an established tool for multiple sequence alignment that is particularly useful to detect local homologies in sequences with low overall similarity. In recent years, various versions of the program have been developed, some of which are fully automated, whereas others are able to accept user-specified external information. In this article, we review some versions of the program that are available through 'Göttingen Bioinformatics Compute Server'. In addition to previously described implementations, we present a new release of DIALIGN called 'DIALIGN-PFAM', which uses hits to the PFAM database for improved protein alignment. Our software is available through http://dialign.gobics.de/.",2013-04-24 +24302573,Hdac6 regulates Tip60-p400 function in stem cells.,"In embryonic stem cells (ESCs), the Tip60 histone acetyltransferase activates genes required for proliferation and silences genes that promote differentiation. Here we show that the class II histone deacetylase Hdac6 co-purifies with Tip60-p400 complex from ESCs. Hdac6 is necessary for regulation of most Tip60-p400 target genes, particularly those repressed by the complex. Unlike differentiated cells, where Hdac6 is mainly cytoplasmic, Hdac6 is largely nuclear in ESCs, neural stem cells (NSCs), and some cancer cell lines, and interacts with Tip60-p400 in each. Hdac6 localizes to promoters bound by Tip60-p400 in ESCs, binding downstream of transcription start sites. Surprisingly, Hdac6 does not appear to deacetylate histones, but rather is required for Tip60-p400 binding to many of its target genes. Finally, we find that, like canonical subunits of Tip60-p400, Hdac6 is necessary for robust ESC differentiation. These data suggest that Hdac6 plays a major role in the modulation of Tip60-p400 function in stem cells. DOI: http://dx.doi.org/10.7554/eLife.01557.001.",2013-12-03 +23623159,Viral outbreaks in neonatal intensive care units: what we do not know.,"

Background

Nosocomial infection is among the most important causes of morbidity, prolonged hospital stay, increased hospital costs, and mortality in neonates, particularly those born preterm. The vast majority of scientific articles dealing with nosocomial infections address bacterial or fungal infections, and viral agents are often disregarded. This analysis reviews the medical literature in an effort to establish the incidence, types of pathogens, and clinical features of noncongenital neonatal viral infections.

Methods

This analysis was performed using the worldwide database of health care-associated outbreaks (http://www.outbreak-database.com). Items analyzed included causative pathogens, types of infection, source of outbreaks, and measures taken to stop outbreaks.

Results

The outbreak database contained a total of 590 neonatal outbreaks, of which 64 were originated by viruses, 44 of which (68.75%) were reported from neonatal intensive care units (NICUs). The 5 most frequent viral agents were rotavirus (23.44%), respiratory syncytial virus (17.19%), enterovirus (15.63%), hepatitis A virus (10.94%), and adenovirus (9.38%).

Conclusion

Our analysis of the viral origins of nosocomial infections in NICUs can be a valuable tool in the investigation of neonatal infections. The mortality rates reported in this analysis demonstrate the significance of noncongenital viral infections in NICUs and the need for more effective outbreak prevention strategies.",2013-04-23 +23093680,Methods for calculating sensitivity and specificity of clustered data: a tutorial.,"

Unlabelled

The performance of a diagnostic test is often expressed in terms of sensitivity and specificity compared with the reference standard. Calculations of sensitivity and specificity commonly involve multiple observations per patient, which implies that the data are clustered. Whether analysis of sensitivity and specificity per patient or using multiple observations per patient is preferable depends on the clinical context and consequences. The purpose of this article was to discuss and illustrate the most common statistical methods that calculate sensitivity and specificity of clustered data, adjusting for the possible correlation between observations within each patient. This tutorial presents and illustrates the following methods: (a) analysis at different levels ignoring correlation, (b) variance adjustment, (c) logistic random-effects models, and (d) generalized estimating equations. The choice of method and the level of reporting should correspond with the clinical decision problem. If multiple observations per patient are relevant to the clinical decision problem, the potential correlation between observations should be explored and taken into account in the statistical analysis.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120509/-/DC1.",2012-10-23 +24757667,"Development and evaluation of an open-source software package ""CGITA"" for quantifying tumor heterogeneity with molecular images.","

Background

The quantification of tumor heterogeneity with molecular images, by analyzing the local or global variation in the spatial arrangements of pixel intensity with texture analysis, possesses a great clinical potential for treatment planning and prognosis. To address the lack of available software for computing the tumor heterogeneity on the public domain, we develop a software package, namely, Chang-Gung Image Texture Analysis (CGITA) toolbox, and provide it to the research community as a free, open-source project.

Methods

With a user-friendly graphical interface, CGITA provides users with an easy way to compute more than seventy heterogeneity indices. To test and demonstrate the usefulness of CGITA, we used a small cohort of eighteen locally advanced oral cavity (ORC) cancer patients treated with definitive radiotherapies.

Results

In our case study of ORC data, we found that more than ten of the current implemented heterogeneity indices outperformed SUVmean for outcome prediction in the ROC analysis with a higher area under curve (AUC). Heterogeneity indices provide a better area under the curve up to 0.9 than the SUVmean and TLG (0.6 and 0.52, resp.).

Conclusions

CGITA is a free and open-source software package to quantify tumor heterogeneity from molecular images. CGITA is available for free for academic use at http://code.google.com/p/cgita.",2014-03-17 +24920577,Cigarette company trade secrets are not secret: an analysis of reverse engineering reports in internal tobacco industry documents released as a result of litigation.,"

Objectives

Use previously secret tobacco industry documents to assess tobacco companies' routine claims of trade secret protection for information on cigarette ingredients, additives and construction made to regulatory agencies, as well as the companies' refusal to publicly disclose this information.

Methods

We analysed previously secret tobacco industry documents available at (http://legacy.library.ucsf.edu) to identify 100 examples of seven major tobacco companies' reverse engineering of their competitors' brands between 1937 and 2001.

Results

These reverse engineering reports contain detailed data for 142 different measurements for at least two companies, including physical parameters of the cigarettes, tobacco types, humectants, additives, flavourings, and smoke constituents of competitors' cigarettes. These 100 documents were distributed to 564 employees, including top managers in domestic and foreign offices across multiple departments, including executive leadership, research and design, product development, marketing and legal. These documents reported new competitors' products, measured ingredient changes over time, and informed companies' decisions regarding ingredients in their own products.

Conclusions

Because cigarette companies routinely analyse their competitors' cigarettes in great detail, this information is neither secret nor commercially valuable and, thus, does not meet the legal definition of a 'trade secret.' This information is only being kept 'secret' from the people consuming cigarettes and the scientific community. Public agencies should release this detailed information because it would provide valuable information about how ingredients affect addictiveness and toxicity, and would help the public health community and consumers better understand the impact of cigarette design on human health.",2014-06-11 +22811343,Alcaftadine for the prevention of itching associated with allergic conjunctivitis.,"

Objective

To evaluate the safety and efficacy of alcaftadine for the prevention of itching associated with allergic conjunctivitis.

Data sources

A medical literature search was conducted in MEDLINE/PubMed (2006-February 2012) and EMBASE (2006-February 2012) using the search terms alcaftadine and Lastacaft. References from these publications were reviewed for additional resources. Additional information was collected from Web sites of the US government (http://www.clinicaltrials.gov, http://www.fda.gov) and of Allergan Inc., the manufacturer of Lastacaft (http://www.lastacaft.com).

Study selection and data extraction

All identified articles and publications in English were reviewed for pharmacology, pharmacokinetics, efficacy, and safety data. Priority was placed on clinical trials.

Data synthesis

Two published clinical trials evaluated the efficacy of alcaftadine in the prevention of ocular itching and conjunctival redness associated with allergic conjunctivitis. One trial compared alcaftadine to placebo, and another trial compared alcaftadine to placebo and olopatadine HCl to placebo. Both studies showed superior efficacy, both clinically and statistically, in the prevention of ocular itching associated with allergic conjunctivitis compared to placebo. Although conjunctival redness was evaluated in the 2 trials, neither trial demonstrated both clinical and statistical significance. Both trials demonstrated a rapid onset of action of less than 15 minutes, as well as a duration of action greater than 16 hours, which supports the use of once-daily administration. Overall, alcaftadine was well tolerated, and common adverse effects, reported in less than 4% of patients, included ocular irritation, pruritus, erythema, and stinging or burning upon instillation. Ocular adverse effects were typically mild in severity and self-limiting.

Conclusions

Alcaftadine is a safe and effective option for the prevention of ocular itching associated with allergic conjunctivitis, is dosed once daily, and is competitively priced among prescription medications for allergic conjunctivitis. Additional studies are needed to further evaluate the comparative efficacy among ocular antihistamine/mast cell stabilizing medications.",2012-07-17 +24794930,Structural and energetic determinants of tyrosylprotein sulfotransferase sulfation specificity.,"

Motivation

Tyrosine sulfation is a type of post-translational modification (PTM) catalyzed by tyrosylprotein sulfotransferases (TPST). The modification plays a crucial role in mediating protein-protein interactions in many biologically important processes. There is no well-defined sequence motif for TPST sulfation, and the underlying determinants of TPST sulfation specificity remains elusive. Here, we perform molecular modeling to uncover the structural and energetic determinants of TPST sulfation specificity.

Results

We estimate the binding affinities between TPST and peptides around tyrosines of both sulfated and non-sulfated proteins to differentiate them. We find that better differentiation is achieved after including energy costs associated with local unfolding of the tyrosine-containing peptide in a host protein, which depends on both the peptide's secondary structures and solvent accessibility. Local unfolding renders buried peptide-with ordered structures-thermodynamically available for TPST binding. Our results suggest that both thermodynamic availability of the peptide and its binding affinity to the enzyme are important for TPST sulfation specificity, and their interplay results into great variations in sequences and structures of sulfated peptides. We expect our method to be useful in predicting potential sulfation sites and transferable to other TPST variants. Our study may also shed light on other PTM systems without well-defined sequence and structural specificities.

Availability and implementation

All the data and scripts used in the work are available at http://dlab.clemson.edu/research/Sulfation.",2014-05-02 +27395897,"First report of the land planarian Diversibipalium multilineatum (Makino & Shirasawa, 1983) (Platyhelminthes, Tricladida, Continenticola) in Europe.","Introduction of alien species may significantly affect soil ecosystems, through predation or disruption of components of native ecosystems (Winsor et al. 2004; Álvarez-Presas et al. 2014; Justine et al. 2014). Land planarians have been reported as alien species in soils throughout the world and, among those, some species are considered to be successful invaders, e.g. Platydemus manokwari de Beauchamp, 1963, Arthurdendyus triangulatus (Dendy, 1894), Bipalium adventitium Hyman, 1943, Bipalium kewense Moseley, 1878 and Dolichoplana striata Moseley, 1877 (Winsor et al. 2004; Álvarez-Presas et al. 2014; Justine et al. 2014, 2015). Soil moisture status seems to be an important element for their successful invasion (Fraser & Boag 1998). In Europe at least 18 species of alien land planarians have been recorded since now and some of them are considered as invasive ones, e.g. P. manokwari (cf. Justine et al. 2014). Although the alien land planarian B. kewense has been reported to occur in many greenhouses in Italy (Bello et al. 1995), no data are available on its establishment and/or impact on natural environments. On 28th September 2014, 20 specimens (~1 individual/m2) of the land planarian Diversibipalium multilineatum (Makino & Shirasawa, 1983) (Fig. 1), native to Japan, were collected under pots, branches and plastic materials in a private garden located in the center of Bologna (Emilia Romagna, Central Italy), near the urban park Giardini Margherita (44°29' N, 11°21' E; WGS84). Thirty plant species (both indigenous and alien), mainly cultivated as bonsai (e.g. Lagerstroemia indica L., Juniperus procumbens (Siebold ex Endl.) Miquel), were present in this shady, wet garden (25 m2). Between March 2014 and June 2015, 70 more specimens of D. multilineatum were collected at the same site, mainly at dusk and dawn after rain. Reproduction by fission and regeneration processes were observed in several of those specimens, which were kept for some time in captivity. A specimen of D. multilineatum was also collected in a garden in Léguevin (Haute-Garonne, France), which will be described in a forthcoming paper by Justine et al. (in prep.) (see also Kawakatsu et al. 2014). Specimens without a genital pore were initially ascribed to D. multilineatum on the basis of their external appearance: the dorsal surface was brownish yellow and presented five longitudinal stripes at the head plate and the neck, showing the typical appearance of the species. The middorsal stripe was widened at its anterior end, on the head plate, and at the pharynx level. The ventral pattern of the animals at the pharyngeal region was also characteristic, with the middorsal stripe widened at this level. The Italian Diversibipalium specimens used for the molecular analysis were fixed and preserved in absolute ethanol. Fragments of the mitochondrial gene COI and 28S ribosomal RNA nuclear gene (GenBank Acc. Numbers KU245358 and KU245357, respectively) were obtained using the procedure and COI primers described in Álvarez-Presas et al. (2008) and Solà et al. (2013). The French specimen's COI (Specimen MNHN JL177, GenBank Acc. Number KT922162) was obtained as described in Justine et al. (2015). 28S sequences of 14 Bipaliinae specimens and four Microplana species (outgroup) retrieved from GenBank were included in the phylogenetic analyses (Fig. 2). Sequence alignment was obtained by using the online software MAFFT version 7 (Katoh & Standley 2013), while ambiguously aligned positions were removed using the program Gblocks (Talavera & Castresana 2007) with default settings, excepting the minimum number of sequences for a flank position at the minimum value (set at 10) and with half of the allowed gap positions. The final alignment had a length of 1589 bp. We used two phylogenetic inference approaches: maximum likelihood (ML), using the RaxML 8.2.3 software (Stamatakis 2014), and Bayesian inferences (BI), using MrBayes 3.2.4 (Ronquist et al. 2012). The evolutionary model used, GTR+I+G, was estimated to be the best with the software jModeltest 2.1.7 (Darriba et al. 2012; Guindon & Gascuel 2003), using the Akaike Information Criterion (AIC). MrBayes analyses were performed for 10-milion generation with sampling parameters every 103 and a 25% default burn-in value for the final trees. Convergence of the two runs (average standard deviation of split frequencies << 0.01) and likelihood stationarity were checked. The maximum likelihood analyses were performed under 1000 bootstrap pseudoreplicates. The phylogenetic results show a close and highly supported relationship of the Italian Diversibipalium specimens with those from Japan and South Korea that have been identified as D. multilineatum (Fig. 2). Diversibipalium multilineatum is the sister-group of B. nobile Kawakatsu & Makino, 1982, but with low support. The COI sequences of the French (MNHN JL177) and the Italian Diversibipalium specimens were compared in Geneious v. 8.0.5 (
http://www.geneious.com, Kearse et al. 2012) and were found to be identical. These results indicate that the species introduced in both countries is the same, and most probably concerns the species D. multilineatum. The pathways of introduction of D. multilineatum are currently unknown, although a relationship between the horticultural trade and the introduction of alien land planarians is well known (Álvarez-Presas et al. 2014 and references therein). Here we report the first occurrence of individuals of D. multilineatum outside Asia. The GenBank sequence of D. multilineatum from South Korea is not yet supported by a published description of the specimen, while it is debatable whether South Korea should be considered part of the natural range of D. multilineatum, which only seems to include Japan. In the present paper, we consider the South Korean animal to be an introduced specimen. Soil moisture status, temperature, and food availability are considered to be the main factors determining the presence of terrestrial planarians (Boag et al. 1998); the microclimatic conditions of the Italian garden were similar to plant nurseries and greenhouses, while an abundance of food was available, such as isopods [Porcellionides pruinosus (Brandt, 1833)], oligochaetes [Dendrobaena attemsi (Michaelsen, 1902) and several juveniles of Lumbricus spp.] and gastropods [Cernuella cisalpina (Rossmassler, 1837), Cornu aspersum (O.F. Müller 1774), Deroceras reticulatum (O.F. Müller, 1774), Discus rotundatus (O.F. Müller, 1774), Limacus flavus (Linnaeus, 1758), Milax nigricans (Philippi, 1836), Papillifera papillaris (Linnaeus, 1758), Pomatias elegans (O.F. Müller, 1774)]. Moreover, winter 2014 reached the highest temperatures and rainfall of the last two decades (source: CNR-ISAC, Bologna), thus favouring establishment and spread of D. multilineatum. The potential environmental impacts of some invasive flatworms are well documented (Álvarez-Presas et al. 2014; Justine et al. 2014) and, even if these effects have not yet been assessed for D. multilineatum, the adoption of precautionary measures and of early intervention is here strongly recommended (Genovesi & Shine 2004). Finally, knowledge of the introduction pathway(s), together with the analysis of prey preference and possible impact on the invertebrate fauna, will be essential to halt or at least to limit the spread of this introduced land flatworm.",2016-01-26 +25352020,"Revisiting the J shaped curve, exploring the association between cardiovascular risk factors and concurrent depressive symptoms in patients with cardiometabolic disease: Findings from a large cross-sectional study.","

Background

Depression is common in patients with cardiometabolic diseases but little is known about the relationship, if any, between cardiovascular risk factor values and depressive symptoms in patients with these conditions. The objective of this paper is to study the association between cardiovascular risk factors and concurrent depressive symptoms in patients with three common cardiometabolic conditions: coronary heart disease (CHD), stroke and diabetes.

Methods

We retrospectively reviewed primary care data for N = 35537 with 1 of the above 3 conditions who underwent depression screening using the depressive subscale of hospital anxiety and depression score (HADS-D). We reviewed 4 cardiometabolic risk factors (Systolic Blood Pressure [SBP], Diastolic Blood Pressure [DBP], BMI and total cholesterol) recorded concurrently in all patients and HbA1c in patients with diabetes (n = 18453). We analysed the association between individual risk factor value and a positive HADS-D screening result (>7) using logistic regression.

Results

SBP and BMI were noted to have a non-linear ""J-shaped"" relationship with the probability of having a positive HADS-D and observed nadirs (levels with the lowest probability) of 148 mm Hg and 30.70 kg/m2, respectively. Total cholesterol and DBP found to have a weaker curvilinear association with concurrent depression symptoms and nadirs of 3.60 mmol/l and 74 mmHg. Among patients with Diabetes, HbA1c was also found to have a ""J-shaped"" relationship with probability of having a positive HADS-D with an observed nadir of 7.06% DCCT. The above relationships remain significant after adjusting for age, sex, socio-economic status and number of co-morbid conditions.

Conclusion

In patients with cardiometabolic disease, cardiovascular risk factor values at both extremes were associated with higher positive depression screening after adjusting for confounders. These findings have potentially important implications for clinical practice in relation to both risk stratification for depression and approaches to secondary prevention in individuals with cardiometabolic disease and merit further investigation to determine the nature and direction of the observed association.Please see related article: http://www.biomedcentral.com/1741-7015/12/199.",2014-10-28 +23613486,A unifying kinetic framework for modeling oxidoreductase-catalyzed reactions.,"

Motivation

Oxidoreductases are a fundamental class of enzymes responsible for the catalysis of oxidation-reduction reactions, crucial in most bioenergetic metabolic pathways. From their common root in the ancient prebiotic environment, oxidoreductases have evolved into diverse and elaborate protein structures with specific kinetic properties and mechanisms adapted to their individual functional roles and environmental conditions. While accurate kinetic modeling of oxidoreductases is thus important, current models suffer from limitations to the steady-state domain, lack empirical validation or are too specialized to a single system or set of conditions.

Results

To address these limitations, we introduce a novel unifying modeling framework for kinetic descriptions of oxidoreductases. The framework is based on a set of seven elementary reactions that (i) form the basis for 69 pairs of enzyme state transitions for encoding various specific microscopic intra-enzyme reaction networks (micro-models), and (ii) lead to various specific macroscopic steady-state kinetic equations (macro-models) via thermodynamic assumptions. Thus, a synergistic bridge between the micro and macro kinetics can be achieved, enabling us to extract unitary rate constants, simulate reaction variance and validate the micro-models using steady-state empirical data. To help facilitate the application of this framework, we make available RedoxMech: a Mathematica™ software package that automates the generation and customization of micro-models.

Availability

The Mathematica™ source code for RedoxMech, the documentation and the experimental datasets are all available from: http://www.igb.uci.edu/tools/sb/metabolic-modeling.

Contact

pfbaldi@ics.uci.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-23 +24916671,EFIN: predicting the functional impact of nonsynonymous single nucleotide polymorphisms in human genome.,"

Background

Predicting the functional impact of amino acid substitutions (AAS) caused by nonsynonymous single nucleotide polymorphisms (nsSNPs) is becoming increasingly important as more and more novel variants are being discovered. Bioinformatics analysis is essential to predict potentially causal or contributing AAS to human diseases for further analysis, as for each genome, thousands of rare or private AAS exist and only a very small number of which are related to an underlying disease. Existing algorithms in this field still have high false prediction rate and novel development is needed to take full advantage of vast amount of genomic data.

Results

Here we report a novel algorithm that features two innovative changes: 1. making better use of sequence conservation information by grouping the homologous protein sequences into six blocks according to evolutionary distances to human and evaluating sequence conservation in each block independently, and 2. including as many such homologous sequences as possible in analyses. Random forests are used to evaluate sequence conservation in each block and to predict potential impact of an AAS on protein function. Testing of this algorithm on a comprehensive dataset showed significant improvement on prediction accuracy upon currently widely-used programs. The algorithm and a web-based application tool implementing it, EFIN (Evaluation of Functional Impact of Nonsynonymous SNPs) were made freely available (http://paed.hku.hk/efin/) to the public.

Conclusions

Grouping homologous sequences into different blocks according to the evolutionary distance of the species to human and evaluating sequence conservation in each group independently significantly improved prediction accuracy. This approach may help us better understand the roles of genetic variants in human disease and health.",2014-06-10 +23608491,Isolation and characterization of Staphylococcus aureus strains from a Paso del Norte dairy.,"The primary purpose of this study was to determine if methicillin-resistant Staphylococcus aureus (MRSA) strains could be identified in the milk of dairy cattle in a Paso del Norte region dairy of the United States. Using physiological and PCR-based identification schemes, a total of 40 Staph. aureus strains were isolated from 29 raw milk samples of 133 total samples analyzed. Pulsed-field gel electrophoresis after digestion with the SmaI enzyme revealed that the 40 confirmed strains were represented by 5 pulsed-field types, which each contained 3 or more strains. Of 7 hospital strains isolated from cows undergoing antibiotic therapy, 3 demonstrated resistance to 3 or more antimicrobial classes and displayed similar pulsed-field gel electrophoresis patterns. A secondary purpose of this study was to elucidate the evolutionary relationships of strains isolated in this study to genomically characterized Staph. aureus strains. Therefore, Roche 454 GS (Roche Diagnostics Corp., Dallas, TX) pyrosequencing was used to produce draft genome sequences of an MRSA raw milk isolate (H29) and a methicillin-susceptible Staph. aureus (PB32). Analysis using the BLASTn database (http://blast.ncbi.nlm.nih.gov/) demonstrated that the H29 draft genome was highly homologous to the human MRSA strain JH1, yet the β-lactamase plasmid carried by H29 was different from that carried by JH1. Genomic analysis of H29 also clearly explained the multidrug resistance phenotype of this raw milk isolate. Analysis of the PB32 draft genome (using BLASTn) demonstrated that this raw milk isolate was most related to human MRSA strain 04-02981. Although PB32 is not a MRSA, the PB32 draft genome did reveal the presence of a unique staphylococcal cassette mec (SCCmec) remnant. In addition, the PB32 draft genome revealed the presence of a novel bovine staphylococcal pathogenicity island, SaPIbovPB32. This study demonstrates the presence of clones closely related to human and (or) bovine Staph. aureus strains circulating in a dairy herd.",2013-04-19 +23603847,The challenge of increasing Pfam coverage of the human proteome.,"It is a worthy goal to completely characterize all human proteins in terms of their domains. Here, using the Pfam database, we asked how far we have progressed in this endeavour. Ninety per cent of proteins in the human proteome matched at least one of 5494 manually curated Pfam-A families. In contrast, human residue coverage by Pfam-A families was <45%, with 9418 automatically generated Pfam-B families adding a further 10%. Even after excluding predicted signal peptide regions and short regions (<50 consecutive residues) unlikely to harbour new families, for ∼38% of the human protein residues, there was no information in Pfam about conservation and evolutionary relationship with other protein regions. This uncovered portion of the human proteome was found to be distributed over almost 25 000 distinct protein regions. Comparison with proteins in the UniProtKB database suggested that the human regions that exhibited similarity to thousands of other sequences were often either divergent elements or N- or C-terminal extensions of existing families. Thirty-four per cent of regions, on the other hand, matched fewer than 100 sequences in UniProtKB. Most of these did not appear to share any relationship with existing Pfam-A families, suggesting that thousands of new families would need to be generated to cover them. Also, these latter regions were particularly rich in amino acid compositional bias such as the one associated with intrinsic disorder. This could represent a significant obstacle toward their inclusion into new Pfam families. Based on these observations, a major focus for increasing Pfam coverage of the human proteome will be to improve the definition of existing families. New families will also be built, prioritizing those that have been experimentally functionally characterized. Database URL: http://pfam.sanger.ac.uk/",2013-04-19 +24854765,RBRDetector: improved prediction of binding residues on RNA-binding protein structures using complementary feature- and template-based strategies.,"Computational prediction of RNA-binding residues is helpful in uncovering the mechanisms underlying protein-RNA interactions. Traditional algorithms individually applied feature- or template-based prediction strategy to recognize these crucial residues, which could restrict their predictive power. To improve RNA-binding residue prediction, herein we propose the first integrative algorithm termed RBRDetector (RNA-Binding Residue Detector) by combining these two strategies. We developed a feature-based approach that is an ensemble learning predictor comprising multiple structure-based classifiers, in which well-defined evolutionary and structural features in conjunction with sequential or structural microenvironment were used as the inputs of support vector machines. Meanwhile, we constructed a template-based predictor to recognize the putative RNA-binding regions by structurally aligning the query protein to the RNA-binding proteins with known structures. The final RBRDetector algorithm is an ingenious fusion of our feature- and template-based approaches based on a piecewise function. By validating our predictors with diverse types of structural data, including bound and unbound structures, native and simulated structures, and protein structures binding to different RNA functional groups, we consistently demonstrated that RBRDetector not only had clear advantages over its component methods, but also significantly outperformed the current state-of-the-art algorithms. Nevertheless, the major limitation of our algorithm is that it performed relatively well on DNA-binding proteins and thus incorrectly predicted the DNA-binding regions as RNA-binding interfaces. Finally, we implemented the RBRDetector algorithm as a user-friendly web server, which is freely accessible at http://ibi.hzau.edu.cn/rbrdetector.",2014-06-09 +25148528,"PredPPCrys: accurate prediction of sequence cloning, protein production, purification and crystallization propensity from protein sequences using multi-step heterogeneous feature fusion and selection.","X-ray crystallography is the primary approach to solve the three-dimensional structure of a protein. However, a major bottleneck of this method is the failure of multi-step experimental procedures to yield diffraction-quality crystals, including sequence cloning, protein material production, purification, crystallization and ultimately, structural determination. Accordingly, prediction of the propensity of a protein to successfully undergo these experimental procedures based on the protein sequence may help narrow down laborious experimental efforts and facilitate target selection. A number of bioinformatics methods based on protein sequence information have been developed for this purpose. However, our knowledge on the important determinants of propensity for a protein sequence to produce high diffraction-quality crystals remains largely incomplete. In practice, most of the existing methods display poorer performance when evaluated on larger and updated datasets. To address this problem, we constructed an up-to-date dataset as the benchmark, and subsequently developed a new approach termed 'PredPPCrys' using the support vector machine (SVM). Using a comprehensive set of multifaceted sequence-derived features in combination with a novel multi-step feature selection strategy, we identified and characterized the relative importance and contribution of each feature type to the prediction performance of five individual experimental steps required for successful crystallization. The resulting optimal candidate features were used as inputs to build the first-level SVM predictor (PredPPCrys I). Next, prediction outputs of PredPPCrys I were used as the input to build second-level SVM classifiers (PredPPCrys II), which led to significantly enhanced prediction performance. Benchmarking experiments indicated that our PredPPCrys method outperforms most existing procedures on both up-to-date and previous datasets. In addition, the predicted crystallization targets of currently non-crystallizable proteins were provided as compendium data, which are anticipated to facilitate target selection and design for the worldwide structural genomics consortium. PredPPCrys is freely available at http://www.structbioinfor.org/PredPPCrys.",2014-08-22 +23716645,IMAAAGINE: a webserver for searching hypothetical 3D amino acid side chain arrangements in the Protein Data Bank.,"We describe a server that allows the interrogation of the Protein Data Bank for hypothetical 3D side chain patterns that are not limited to known patterns from existing 3D structures. A minimal side chain description allows a variety of side chain orientations to exist within the pattern, and generic side chain types such as acid, base and hydroxyl-containing can be additionally deployed in the search query. Moreover, only a subset of distances between the side chains need be specified. We illustrate these capabilities in case studies involving arginine stacks, serine-acid group arrangements and multiple catalytic triad-like configurations. The IMAAAGINE server can be accessed at http://mfrlab.org/grafss/imaaagine/.",2013-05-28 +23710727,REAPR: a universal tool for genome assembly evaluation.,"Methods to reliably assess the accuracy of genome sequence data are lacking. Currently completeness is only described qualitatively and mis-assemblies are overlooked. Here we present REAPR, a tool that precisely identifies errors in genome assemblies without the need for a reference sequence. We have validated REAPR on complete genomes or de novo assemblies from bacteria, malaria and Caenorhabditis elegans, and demonstrate that 86% and 82% of the human and mouse reference genomes are error-free, respectively. When applied to an ongoing genome project, REAPR provides corrected assembly statistics allowing the quantitative comparison of multiple assemblies. REAPR is available at http://www.sanger.ac.uk/resources/software/reapr/.",2013-05-27 +22645320,Integrative analysis of gene and miRNA expression profiles with transcription factor-miRNA feed-forward loops identifies regulators in human cancers.,"We describe here a novel method for integrating gene and miRNA expression profiles in cancer using feed-forward loops (FFLs) consisting of transcription factors (TFs), miRNAs and their common target genes. The dChip-GemiNI (Gene and miRNA Network-based Integration) method statistically ranks computationally predicted FFLs by their explanatory power to account for differential gene and miRNA expression between two biological conditions such as normal and cancer. GemiNI integrates not only gene and miRNA expression data but also computationally derived information about TF-target gene and miRNA-mRNA interactions. Literature validation shows that the integrated modeling of expression data and FFLs better identifies cancer-related TFs and miRNAs compared to existing approaches. We have utilized GemiNI for analyzing six data sets of solid cancers (liver, kidney, prostate, lung and germ cell) and found that top-ranked FFLs account for ∼20% of transcriptome changes between normal and cancer. We have identified common FFL regulators across multiple cancer types, such as known FFLs consisting of MYC and miR-15/miR-17 families, and novel FFLs consisting of ARNT, CREB1 and their miRNA partners. The results and analysis web server are available at http://www.canevolve.org/dChip-GemiNi.",2012-05-29 +23735058,AllerTOP--a server for in silico prediction of allergens.,"

Background

Allergy is a form of hypersensitivity to normally innocuous substances, such as dust, pollen, foods or drugs. Allergens are small antigens that commonly provoke an IgE antibody response. There are two types of bioinformatics-based allergen prediction. The first approach follows FAO/WHO Codex alimentarius guidelines and searches for sequence similarity. The second approach is based on identifying conserved allergenicity-related linear motifs. Both approaches assume that allergenicity is a linearly coded property. In the present study, we applied ACC pre-processing to sets of known allergens, developing alignment-independent models for allergen recognition based on the main chemical properties of amino acid sequences.

Results

A set of 684 food, 1,156 inhalant and 555 toxin allergens was collected from several databases. A set of non-allergens from the same species were selected to mirror the allergen set. The amino acids in the protein sequences were described by three z-descriptors (z1, z2 and z3) and by auto- and cross-covariance (ACC) transformation were converted into uniform vectors. Each protein was presented as a vector of 45 variables. Five machine learning methods for classification were applied in the study to derive models for allergen prediction. The methods were: discriminant analysis by partial least squares (DA-PLS), logistic regression (LR), decision tree (DT), naïve Bayes (NB) and k nearest neighbours (kNN). The best performing model was derived by kNN at k = 3. It was optimized, cross-validated and implemented in a server named AllerTOP, freely accessible at http://www.pharmfac.net/allertop. AllerTOP also predicts the most probable route of exposure. In comparison to other servers for allergen prediction, AllerTOP outperforms them with 94% sensitivity.

Conclusions

AllerTOP is the first alignment-free server for in silico prediction of allergens based on the main physicochemical properties of proteins. Significantly, as well allergenicity AllerTOP is able to predict the route of allergen exposure: food, inhalant or toxin.",2013-04-17 +23603108,Pride-asap: automatic fragment ion annotation of identified PRIDE spectra.,"We present an open source software application and library written in Java that provides a uniform annotation of identified spectra stored in the PRIDE database. Pride-asap can be ran in a command line mode for automated processing of multiple PRIDE experiments, but also has a graphical user interface that allows end users to annotate the spectra in PRIDE experiments and to inspect the results in detail. Pride-asap binaries, source code and additional information can be downloaded from http://pride-asa-pipeline.googlecode.com.This article is part of a Special Issue entitled: Standardization and Quality Control in Proteomics.",2013-04-17 +25030022,Metaplastic carcinoma of the breast: an immunohistochemical study.,"

Background

Metaplastic breast carcinoma is a rare entity of breast cancer expressing epithelial and/or mesenchymal tissue within the same tumor. The aim of this study is to evaluate the clinicopathological features of metaplastic breast carcinoma and to confirm the triple negative, basal-like and/or luminal phenotype of this type of tumor by using immunohistochemical staining.

Methods

Seven cases of MBC were evaluated for clinico-pathological features including follow up data. Cases were studied immunohistochemically by CK-Pan, Vimentin, ER, PR, HER2, basal markers (CK5/6, p63, EGFR, SMA and S-100), luminal cytokeratins (CK8, CK18 and CK19), markers for syncytial cells (β-HCG and PLAP), as well as prognostic markers (p53, ki-67 and calretinin).

Results

The mean age of the patients was 36 years. Three cases showed choriocarcinomatous features. All of our cases were negative for ER, PR and HER2. Six out of the 7 cases showed basal-like differentiation by demonstrating positivity with at least one of the basal/myoepithelial markers. Also 6 out of the 7 cases expressed luminal type cytokeratins (CK8, CK18 and/or CK19). P53 was positive in 3 cases, ki-67 was strongly expressed in only one case, while calretinin was expressed in 6 cases.

Conclusion

Metaplastic breast carcinoma presents in our population at a younger age group than other international studies. All cases are categorized immunohistochemically under the triple negative group of breast cancer and 86% of them exhibited basal-like and luminal phenotype. Majority of cases developed local recurrence and distant metastasis in a relatively short period of time.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1101289295115804.",2014-07-16 +22494792,"EggLib: processing, analysis and simulation tools for population genetics and genomics.","

Background

With the considerable growth of available nucleotide sequence data over the last decade, integrated and flexible analytical tools have become a necessity. In particular, in the field of population genetics, there is a strong need for automated and reliable procedures to conduct repeatable and rapid polymorphism analyses, coalescent simulations, data manipulation and estimation of demographic parameters under a variety of scenarios.

Results

In this context, we present EggLib (Evolutionary Genetics and Genomics Library), a flexible and powerful C++/Python software package providing efficient and easy to use computational tools for sequence data management and extensive population genetic analyses on nucleotide sequence data. EggLib is a multifaceted project involving several integrated modules: an underlying computationally efficient C++ library (which can be used independently in pure C++ applications); two C++ programs; a Python package providing, among other features, a high level Python interface to the C++ library; and the egglib script which provides direct access to pre-programmed Python applications.

Conclusions

EggLib has been designed aiming to be both efficient and easy to use. A wide array of methods are implemented, including file format conversion, sequence alignment edition, coalescent simulations, neutrality tests and estimation of demographic parameters by Approximate Bayesian Computation (ABC). Classes implementing different demographic scenarios for ABC analyses can easily be developed by the user and included to the package. EggLib source code is distributed freely under the GNU General Public License (GPL) from its website http://egglib.sourceforge.net/ where a full documentation and a manual can also be found and downloaded.",2012-04-11 +21283085,International spinal cord injury male sexual function basic data set.,"

Objective

To create the International Spinal Cord Injury (SCI) Male Sexual Function Basic Data Set within the International SCI Data Sets.

Setting

An international working group.

Methods

The draft of the data set was developed by an international working group consisting of members appointed by the International Spinal Cord Society (ISCoS), the American Spinal Injury Association (ASIA) and a representative from the executive committee of the International SCI Standards and Data Sets. The data set was developed in an iterative process with review and comments by the members of the executive committee of the International SCI Standards and Data Sets, ISCoS scientific committee, ASIA Board and the ISCoS Council, as well as all the interested organizations and individuals. Next, the data set was posted for 2 months at the ISCoS and ASIA's websites for comments. ISCoS and ASIA approved the final version of the data set. To make the data set uniform, each variable and each response category within each variable have been specifically defined in a way that is designed to promote the collection and reporting of comparable minimal data.

Results

Variables included in the International SCI Male Sexual Function Basic Data Set are as follows: date of data collection, interest in discussing sexual issues, sexual issues unrelated to spinal cord lesion, sexual dysfunction related to spinal cord lesion, psychogenic erection, reflex erection, ejaculation and orgasmic function. Complete instructions for data collection, data sheet and training cases are available at the website of ISCoS (http://www.iscos.org.uk) and ASIA (http://www.asia-spinalinjury.org).",2011-02-01 +20479508,TCLUST: a fast method for clustering genome-scale expression data.,"Genes with a common function are often hypothesized to have correlated expression levels in mRNA expression data, motivating the development of clustering algorithms for gene expression data sets. We observe that existing approaches do not scale well for large data sets, and indeed did not converge for the data set considered here. We present a novel clustering method TCLUST that exploits coconnectedness to efficiently cluster large, sparse expression data. We compare our approach with two existing clustering methods CAST and K-means which have been previously applied to clustering of gene-expression data with good performance results. Using a number of metrics, TCLUST is shown to be superior to or at least competitive with the other methods, while being much faster. We have applied this clustering algorithm to a genome-scale gene-expression data set and used gene set enrichment analysis to discover highly significant biological clusters. (Source code for TCLUST is downloadable at http://www.cse.ucsd.edu/~bdost/tclust.)",2011-05-01 +22300319,An S-System Parameter Estimation Method (SPEM) for biological networks.,"Advances in experimental biology, coupled with advances in computational power, bring new challenges to the interdisciplinary field of computational biology. One such broad challenge lies in the reverse engineering of gene networks, and goes from determining the structure of static networks, to reconstructing the dynamics of interactions from time series data. Here, we focus our attention on the latter area, and in particular, on parameterizing a dynamic network of oriented interactions between genes. By basing the parameterizing approach on a known power-law relationship model between connected genes (S-system), we are able to account for non-linearity in the network, without compromising the ability to analyze network characteristics. In this article, we introduce the S-System Parameter Estimation Method (SPEM). SPEM, a freely available R software package (http://www.picb.ac.cn/ClinicalGenomicNTW/temp3.html), takes gene expression data in time series and returns the network of interactions as a set of differential equations. The methods, which are presented and tested here, are shown to provide accurate results not only on synthetic data, but more importantly on real and therefore noisy by nature, biological data. In summary, SPEM shows high sensitivity and positive predicted values, as well as free availability and expansibility (because based on open source software). We expect these characteristics to make it a useful and broadly applicable software in the challenging reconstruction of dynamic gene networks.",2012-02-01 +22130873,Analyzing cancer samples with SNP arrays.,"Single nucleotide polymorphism (SNP) arrays are powerful tools to delineate genomic aberrations in cancer genomes. However, the analysis of these SNP array data of cancer samples is complicated by three phenomena: (a) aneuploidy: due to massive aberrations, the total DNA content of a cancer cell can differ significantly from its normal two copies; (b) nonaberrant cell admixture: samples from solid tumors do not exclusively contain aberrant tumor cells, but always contain some portion of nonaberrant cells; (c) intratumor heterogeneity: different cells in the tumor sample may have different aberrations. We describe here how these phenomena impact the SNP array profile, and how these can be accounted for in the analysis. In an extended practical example, we apply our recently developed and further improved ASCAT (allele-specific copy number analysis of tumors) suite of tools to analyze SNP array data using data from a series of breast carcinomas as an example. We first describe the structure of the data, how it can be plotted and interpreted, and how it can be segmented. The core ASCAT algorithm next determines the fraction of nonaberrant cells and the tumor ploidy (the average number of DNA copies), and calculates an ASCAT profile. We describe how these ASCAT profiles visualize both copy number aberrations as well as copy-number-neutral events. Finally, we touch upon regions showing intratumor heterogeneity, and how they can be detected in ASCAT profiles. All source code and data described here can be found at our ASCAT Web site ( http://www.ifi.uio.no/forskning/grupper/bioinf/Projects/ASCAT/).",2012-01-01 +22711795,BioContext: an integrated text mining system for large-scale extraction and contextualization of biomolecular events.,"

Motivation

Although the amount of data in biology is rapidly increasing, critical information for understanding biological events like phosphorylation or gene expression remains locked in the biomedical literature. Most current text mining (TM) approaches to extract information about biological events are focused on either limited-scale studies and/or abstracts, with data extracted lacking context and rarely available to support further research.

Results

Here we present BioContext, an integrated TM system which extracts, extends and integrates results from a number of tools performing entity recognition, biomolecular event extraction and contextualization. Application of our system to 10.9 million MEDLINE abstracts and 234 000 open-access full-text articles from PubMed Central yielded over 36 million mentions representing 11.4 million distinct events. Event participants included over 290 000 distinct genes/proteins that are mentioned more than 80 million times and linked where possible to Entrez Gene identifiers. Over a third of events contain contextual information such as the anatomical location of the event occurrence or whether the event is reported as negated or speculative.

Availability

The BioContext pipeline is available for download (under the BSD license) at http://www.biocontext.org, along with the extracted data which is also available for online browsing.",2012-06-17 +23587063,Retroperitoneal composite pheochromocytoma-ganglioneuroma : a case report and review of literature.,"Composite pheochromocytoma/paraganglioma is a rare tumor with elements of pheochromocytoma/paraganglioma and neurogenic tumor. Most were located in the adrenal glands, and extra-adrenal composite pheochromocytoma is extremely rare. Only 4 cases in the retroperitoneum have been described in the online database PUBMED. Here, we report a case of retroperitoneal extra-adrenal composite pheochromocytoma and review the related literature.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1700539911908679.",2013-04-15 +23282103,Prediction and analysis of protein solubility using a novel scoring card method with dipeptide composition.,"

Background

Existing methods for predicting protein solubility on overexpression in Escherichia coli advance performance by using ensemble classifiers such as two-stage support vector machine (SVM) based classifiers and a number of feature types such as physicochemical properties, amino acid and dipeptide composition, accompanied with feature selection. It is desirable to develop a simple and easily interpretable method for predicting protein solubility, compared to existing complex SVM-based methods.

Results

This study proposes a novel scoring card method (SCM) by using dipeptide composition only to estimate solubility scores of sequences for predicting protein solubility. SCM calculates the propensities of 400 individual dipeptides to be soluble using statistic discrimination between soluble and insoluble proteins of a training data set. Consequently, the propensity scores of all dipeptides are further optimized using an intelligent genetic algorithm. The solubility score of a sequence is determined by the weighted sum of all propensity scores and dipeptide composition. To evaluate SCM by performance comparisons, four data sets with different sizes and variation degrees of experimental conditions were used. The results show that the simple method SCM with interpretable propensities of dipeptides has promising performance, compared with existing SVM-based ensemble methods with a number of feature types. Furthermore, the propensities of dipeptides and solubility scores of sequences can provide insights to protein solubility. For example, the analysis of dipeptide scores shows high propensity of α-helix structure and thermophilic proteins to be soluble.

Conclusions

The propensities of individual dipeptides to be soluble are varied for proteins under altered experimental conditions. For accurately predicting protein solubility using SCM, it is better to customize the score card of dipeptide propensities by using a training data set under the same specified experimental conditions. The proposed method SCM with solubility scores and dipeptide propensities can be easily applied to the protein function prediction problems that dipeptide composition features play an important role.

Availability

The used datasets, source codes of SCM, and supplementary files are available at http://iclab.life.nctu.edu.tw/SCM/.",2012-12-13 +23750084,InDiaMed: A Comprehensive Database of Indian Medicinal plants for Diabetes.,"

Unlabelled

According to International Diabetes Federation (IDF), India has 62.4 million people with diabetes and by 2030 it is predicted that the number will rise to 100 million. Studies claim that there are around 410 experimentally proven Indian medicinal plants which have anti-diabetic activity, of which the mechanism of action of 109 plants has been elucidated or reported. So, the need of the hour is to explore the claims of Indian medicinal flora and open up the facets of many Indian plants which are being examined for their beneficial role in diabetes. So, we created a database (InDiaMed) of Indian medicinal plants that captures their role in anti-diabetic activity. InDiaMed's features include chemical, pharmacological, biochemical and geographical information of the medicinal plant, scientifically relevant information of the plant, and the coherent research done on it in the field of diabetes. The database also includes the list of poly-herbal formulations which are used for treatment of diabetes in India.

Availability

http://www.indiamed.info.",2013-04-13 +24161808,MNE software for processing MEG and EEG data.,"Magnetoencephalography and electroencephalography (M/EEG) measure the weak electromagnetic signals originating from neural currents in the brain. Using these signals to characterize and locate brain activity is a challenging task, as evidenced by several decades of methodological contributions. MNE, whose name stems from its capability to compute cortically-constrained minimum-norm current estimates from M/EEG data, is a software package that provides comprehensive analysis tools and workflows including preprocessing, source estimation, time-frequency analysis, statistical analysis, and several methods to estimate functional connectivity between distributed brain regions. The present paper gives detailed information about the MNE package and describes typical use cases while also warning about potential caveats in analysis. The MNE package is a collaborative effort of multiple institutes striving to implement and share best methods and to facilitate distribution of analysis pipelines to advance reproducibility of research. Full documentation is available at http://martinos.org/mne.",2013-10-24 +23828786,Towards building a disease-phenotype knowledge base: extracting disease-manifestation relationship from literature.,"

Motivation

Systems approaches to studying phenotypic relationships among diseases are emerging as an active area of research for both novel disease gene discovery and drug repurposing. Currently, systematic study of disease phenotypic relationships on a phenome-wide scale is limited because large-scale machine-understandable disease-phenotype relationship knowledge bases are often unavailable. Here, we present an automatic approach to extract disease-manifestation (D-M) pairs (one specific type of disease-phenotype relationship) from the wide body of published biomedical literature.

Data and methods

Our method leverages external knowledge and limits the amount of human effort required. For the text corpus, we used 119 085 682 MEDLINE sentences (21 354 075 citations). First, we used D-M pairs from existing biomedical ontologies as prior knowledge to automatically discover D-M-specific syntactic patterns. We then extracted additional pairs from MEDLINE using the learned patterns. Finally, we analysed correlations between disease manifestations and disease-associated genes and drugs to demonstrate the potential of this newly created knowledge base in disease gene discovery and drug repurposing.

Results

In total, we extracted 121 359 unique D-M pairs with a high precision of 0.924. Among the extracted pairs, 120 419 (99.2%) have not been captured in existing structured knowledge sources. We have shown that disease manifestations correlate positively with both disease-associated genes and drug treatments.

Conclusions

The main contribution of our study is the creation of a large-scale and accurate D-M phenotype relationship knowledge base. This unique knowledge base, when combined with existing phenotypic, genetic and proteomic datasets, can have profound implications in our deeper understanding of disease etiology and in rapid drug repurposing.

Availability

http://nlp.case.edu/public/data/DMPatternUMLS/",2013-07-04 +22962487,An accurate paired sample test for count data.,"

Motivation

Recent technology platforms in proteomics and genomics produce count data for quantitative analysis. Previous works on statistical significance analysis for count data have mainly focused on the independent sample setting, which does not cover the case where pairs of measurements are taken from individual patients before and after treatment. This experimental setting requires paired sample testing such as the paired t-test often used for continuous measurements. A state-of-the-art method uses a negative binomial distribution in a generalized linear model framework for paired sample testing. A paired sample design assumes that the relative change within each pair is constant across biological samples. This model can be used as an approximation to the true model in cases of heterogeneity of response in complex biological systems. We aim to specify the variation in response explicitly in combination with the inherent technical variation.

Results

We formulate the problem of paired sample test for count data in a framework of statistical combination of multiple contingency tables. In particular, we specify explicitly a random distribution for the effect with an inverted beta model. The technical variation can be modeled by either a standard Poisson distribution or an exponentiated Poisson distribution, depending on the reproducibility of the acquisition workflow. The new statistical test is evaluated on both proteomics and genomics datasets, showing a comparable performance to the state-of-the-art method in general, and in several cases where the two methods differ, the proposed test returns more reasonable p-values.

Availability

Available for download at http://www.oncoproteomics.nl/.

Contact

t.pham@vumc.nl.",2012-09-01 +24936511,"CTSC and Papillon-Lefèvre syndrome: detection of recurrent mutations in Hungarian patients, a review of published variants and database update.","Papillon-Lefèvre syndrome (PLS; OMIM 245000) is an autosomal recessive condition characterized by palmoplantar hyperkeratosis and periodontitis. In 1997, the gene locus for PLS was mapped to 11q14-21, and in 1999, variants in the cathepsin C gene (CTSC) were identified as causing PLS. To date, a total of 75 different disease-causing mutations have been published for the CTSC gene. A summary of recurrent mutations identified in Hungarian patients and a review of published mutations is presented in this update. Comparison of clinical features in affected families with the same mutation strongly confirm that identical mutations of the CTSC gene can give rise to multiple different phenotypes, making genotype-phenotype correlations difficult. Variable expression of the phenotype associated with the same CTSC mutation may reflect the influence of other genetic and/or environmental factors. Most mutations are missense (53%), nonsense (23%), or frameshift (17%); however, in-frame deletions, one splicing variant, and one 5' untranslated region (UTR) mutation have also been reported. The majority of the mutations are located in exons 5-7, which encodes the heavy chain of the cathepsin C protein, suggesting that tetramerization is important for cathepsin C enzymatic activity. All the data reviewed here have been submitted to the CTSC base, a mutation registry for PLS at http://bioinf.uta.fi/CTSCbase/.",2014-02-11 +24075082,MCF: a tool to find multi-scale community profiles in biological networks.,"Recent developments of complex graph clustering methods have implicated the practical applications with biological networks in different settings. Multi-scale Community Finder (MCF) is a tool to profile network communities (i.e., clusters of nodes) with the control of community sizes. The controlling parameter is referred to as the scale of the network community profile. MCF is able to find communities in all major types of networks including directed, signed, bipartite, and multi-slice networks. The fast computation promotes the practicability of the tool for large-scaled analysis (e.g., protein-protein interaction and gene co-expression networks). MCF is distributed as an open-source C++ package for academic use with both command line and user interface options, and can be downloaded at http://bsdxd.cpsc.ucalgary.ca/MCF. Detailed user manual and sample data sets are also available at the project website.",2013-08-19 +22238266,Detecting genomic indel variants with exact breakpoints in single- and paired-end sequencing data using SplazerS.,"

Motivation

The reliable detection of genomic variation in resequencing data is still a major challenge, especially for variants larger than a few base pairs. Sequencing reads crossing boundaries of structural variation carry the potential for their identification, but are difficult to map.

Results

Here we present a method for 'split' read mapping, where prefix and suffix match of a read may be interrupted by a longer gap in the read-to-reference alignment. We use this method to accurately detect medium-sized insertions and long deletions with precise breakpoints in genomic resequencing data. Compared with alternative split mapping methods, SplazerS significantly improves sensitivity for detecting large indel events, especially in variant-rich regions. Our method is robust in the presence of sequencing errors as well as alignment errors due to genomic mutations/divergence, and can be used on reads of variable lengths. Our analysis shows that SplazerS is a versatile tool applicable to unanchored or single-end as well as anchored paired-end reads. In addition, application of SplazerS to targeted resequencing data led to the interesting discovery of a complete, possibly functional gene retrocopy variant.

Availability

SplazerS is available from http://www.seqan.de/projects/ splazers.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-11 +22829625,Nebula--a web-server for advanced ChIP-seq data analysis.,"

Motivation

ChIP-seq consists of chromatin immunoprecipitation and deep sequencing of the extracted DNA fragments. It is the technique of choice for accurate characterization of the binding sites of transcription factors and other DNA-associated proteins. We present a web service, Nebula, which allows inexperienced users to perform a complete bioinformatics analysis of ChIP-seq data.

Results

Nebula was designed for both bioinformaticians and biologists. It is based on the Galaxy open source framework. Galaxy already includes a large number of functionalities for mapping reads and peak calling. We added the following to Galaxy: (i) peak calling with FindPeaks and a module for immunoprecipitation quality control, (ii) de novo motif discovery with ChIPMunk, (iii) calculation of the density and the cumulative distribution of peak locations relative to gene transcription start sites, (iv) annotation of peaks with genomic features and (v) annotation of genes with peak information. Nebula generates the graphs and the enrichment statistics at each step of the process. During Steps 3-5, Nebula optionally repeats the analysis on a control dataset and compares these results with those from the main dataset. Nebula can also incorporate gene expression (or gene modulation) data during these steps. In summary, Nebula is an innovative web service that provides an advanced ChIP-seq analysis pipeline providing ready-to-publish results.

Availability

Nebula is available at http://nebula.curie.fr/

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-24 +23596449,The Mitochondrial Complexome of Medicago truncatula.,"Legumes (Fabaceae, Leguminosae) are unique in their ability to carry out an elaborate endosymbiotic nitrogen fixation process with rhizobia proteobacteria. The symbiotic nitrogen fixation enables the host plants to grow almost independently of any other nitrogen source. Establishment of symbiosis requires adaptations of the host cellular metabolism, here foremost of the energy metabolism mainly taking place in mitochondria. Since the early 1990s, the galegoid legume Medicago truncatula Gaertn. is a well-established model for studying legume biology, but little is known about the protein complement of mitochondria from this species. An initial characterization of the mitochondrial proteome of M. truncatula (Jemalong A17) was published recently. In the frame of this study, mitochondrial protein complexes were characterized using Two-dimensional (2D) Blue native (BN)/SDS-PAGE. From 139 detected spots, the ""first hit"" (=most abundant) proteins of 59 spots were identified by mass spectrometry. Here, we present a comprehensive analysis of the mitochondrial ""complexome"" (the ""protein complex proteome"") of M. truncatula via 2D BN/SDS-PAGE in combination with highly sensitive MS protein identification. In total, 1,485 proteins were identified within 158 gel spots, representing 467 unique proteins. Data evaluation by the novel GelMap annotation tool allowed recognition of protein complexes of low abundance. Overall, at least 36 mitochondrial protein complexes were found. To our knowledge several of these complexes were described for the first time in Medicago. The data set is accessible under http://www.gelmap.de/medicago/. The mitochondrial protein complex proteomes of Arabidopsis available at http://www.gelmap.de/arabidopsis/ and Medicago are compared.",2013-04-15 +22302575,Predicting kinase substrates using conservation of local motif density.,"

Motivation

Protein kinases represent critical links in cell signaling. A central problem in computational biology is to systematically identify their substrates.

Results

This study introduces a new method to predict kinase substrates by extracting evolutionary information from multiple sequence alignments in a manner that is tolerant to degenerate motif positioning. Given a known consensus, the new method (ConDens) compares the observed density of matches to a null model of evolution and does not require labeled training data. We confirmed that ConDens has improved performance compared with several existing methods in the field. Further, we show that it is generalizable and can predict interesting substrates for several important eukaryotic kinases where training data is not available.

Availability and implementation

ConDens can be found at http://www.moseslab.csb.utoronto.ca/andyl/.

Contact

alan.moses@utoronto.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-01 +23748950,CABS-fold: Server for the de novo and consensus-based prediction of protein structure.,"The CABS-fold web server provides tools for protein structure prediction from sequence only (de novo modeling) and also using alternative templates (consensus modeling). The web server is based on the CABS modeling procedures ranked in previous Critical Assessment of techniques for protein Structure Prediction competitions as one of the leading approaches for de novo and template-based modeling. Except for template data, fragmentary distance restraints can also be incorporated into the modeling process. The web server output is a coarse-grained trajectory of generated conformations, its Jmol representation and predicted models in all-atom resolution (together with accompanying analysis). CABS-fold can be freely accessed at http://biocomp.chem.uw.edu.pl/CABSfold.",2013-06-08 +23859271,De novo sequencing with limited number of post-translational modifications per peptide.,"De novo sequencing derives the peptide sequence from a tandem mass spectrum without the assistance of protein databases. This analysis has been indispensable for the identification of novel or modified peptides in a biological sample. Currently, the speed of de novo sequencing algorithms is not heavily affected by the number of post-translational modification (PTM) types in consideration. However, the accuracy of the algorithms can be degraded due to the increased search space. Most peptides in a proteomics research contain only a small number of PTMs per peptide, yet the types of PTMs can come from a large number of choices. Therefore, it is desirable to include a large number of PTM types in a de novo sequencing algorithm, yet to limit the number of PTM occurrences in each peptide to increase the accuracy. In this paper, we present an efficient de novo sequencing algorithm, DeNovoPTM, for such a purpose. The implemented software is downloadable from http://www.cs.uwaterloo.ca/~l22he/denovo_ptm .",2013-04-11 +23593247,DomHR: accurately identifying domain boundaries in proteins using a hinge region strategy.,"

Motivation

The precise prediction of protein domains, which are the structural, functional and evolutionary units of proteins, has been a research focus in recent years. Although many methods have been presented for predicting protein domains and boundaries, the accuracy of predictions could be improved.

Results

In this study we present a novel approach, DomHR, which is an accurate predictor of protein domain boundaries based on a creative hinge region strategy. A hinge region was defined as a segment of amino acids that covers part of a domain region and a boundary region. We developed a strategy to construct profiles of domain-hinge-boundary (DHB) features generated by sequence-domain/hinge/boundary alignment against a database of known domain structures. The DHB features had three elements: normalized domain, hinge, and boundary probabilities. The DHB features were used as input to identify domain boundaries in a sequence. DomHR used a nonredundant dataset as the training set, the DHB and predicted shape string as features, and a conditional random field as the classification algorithm. In predicted hinge regions, a residue was determined to be a domain or a boundary according to a decision threshold. After decision thresholds were optimized, DomHR was evaluated by cross-validation, large-scale prediction, independent test and CASP (Critical Assessment of Techniques for Protein Structure Prediction) tests. All results confirmed that DomHR outperformed other well-established, publicly available domain boundary predictors for prediction accuracy.

Availability

The DomHR is available at http://cal.tongji.edu.cn/domain/.",2013-04-11 +24931973,Cross-study validation for the assessment of prediction algorithms.,"

Motivation

Numerous competing algorithms for prediction in high-dimensional settings have been developed in the statistical and machine-learning literature. Learning algorithms and the prediction models they generate are typically evaluated on the basis of cross-validation error estimates in a few exemplary datasets. However, in most applications, the ultimate goal of prediction modeling is to provide accurate predictions for independent samples obtained in different settings. Cross-validation within exemplary datasets may not adequately reflect performance in the broader application context.

Methods

We develop and implement a systematic approach to 'cross-study validation', to replace or supplement conventional cross-validation when evaluating high-dimensional prediction models in independent datasets. We illustrate it via simulations and in a collection of eight estrogen-receptor positive breast cancer microarray gene-expression datasets, where the objective is predicting distant metastasis-free survival (DMFS). We computed the C-index for all pairwise combinations of training and validation datasets. We evaluate several alternatives for summarizing the pairwise validation statistics, and compare these to conventional cross-validation.

Results

Our data-driven simulations and our application to survival prediction with eight breast cancer microarray datasets, suggest that standard cross-validation produces inflated discrimination accuracy for all algorithms considered, when compared to cross-study validation. Furthermore, the ranking of learning algorithms differs, suggesting that algorithms performing best in cross-validation may be suboptimal when evaluated through independent validation.

Availability

The survHD: Survival in High Dimensions package (http://www.bitbucket.org/lwaldron/survhd) will be made available through Bioconductor.",2014-06-01 +22072382,Epigenetic priors for identifying active transcription factor binding sites.,"

Motivation

Accurate knowledge of the genome-wide binding of transcription factors in a particular cell type or under a particular condition is necessary for understanding transcriptional regulation. Using epigenetic data such as histone modification and DNase I, accessibility data has been shown to improve motif-based in silico methods for predicting such binding, but this approach has not yet been fully explored.

Results

We describe a probabilistic method for combining one or more tracks of epigenetic data with a standard DNA sequence motif model to improve our ability to identify active transcription factor binding sites (TFBSs). We convert each data type into a position-specific probabilistic prior and combine these priors with a traditional probabilistic motif model to compute a log-posterior odds score. Our experiments, using histone modifications H3K4me1, H3K4me3, H3K9ac and H3K27ac, as well as DNase I sensitivity, show conclusively that the log-posterior odds score consistently outperforms a simple binary filter based on the same data. We also show that our approach performs competitively with a more complex method, CENTIPEDE, and suggest that the relative simplicity of the log-posterior odds scoring method makes it an appealing and very general method for identifying functional TFBSs on the basis of DNA and epigenetic evidence.

Availability and implementation

FIMO, part of the MEME Suite software toolkit, now supports log-posterior odds scoring using position-specific priors for motif search. A web server and source code are available at http://meme.nbcr.net. Utilities for creating priors are at http://research.imb.uq.edu.au/t.bailey/SD/Cuellar2011.

Contact

t.bailey@uq.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-08 +24753487,Multiscale DNA partitioning: statistical evidence for segments.,"

Motivation

DNA segmentation, i.e. the partitioning of DNA in compositionally homogeneous segments, is a basic task in bioinformatics. Different algorithms have been proposed for various partitioning criteria such as Guanine/Cytosine (GC) content, local ancestry in population genetics or copy number variation. A critical component of any such method is the choice of an appropriate number of segments. Some methods use model selection criteria and do not provide a suitable error control. Other methods that are based on simulating a statistic under a null model provide suitable error control only if the correct null model is chosen.

Results

Here, we focus on partitioning with respect to GC content and propose a new approach that provides statistical error control: as in statistical hypothesis testing, it guarantees with a user-specified probability [Formula: see text] that the number of identified segments does not exceed the number of actually present segments. The method is based on a statistical multiscale criterion, rendering this as a segmentation method that searches segments of any length (on all scales) simultaneously. It is also accurate in localizing segments: under benchmark scenarios, our approach leads to a segmentation that is more accurate than the approaches discussed in the comparative review of Elhaik et al. In our real data examples, we find segments that often correspond well to features taken from standard University of California at Santa Cruz (UCSC) genome annotation tracks.

Availability and implementation

Our method is implemented in function smuceR of the R-package stepR available at http://www.stochastik.math.uni-goettingen.de/smuce.",2014-04-21 +24266945,Identification and characterization of plastid-type proteins from sequence-attributed features using machine learning.,"

Background

Plastids are an important component of plant cells, being the site of manufacture and storage of chemical compounds used by the cell, and contain pigments such as those used in photosynthesis, starch synthesis/storage, cell color etc. They are essential organelles of the plant cell, also present in algae. Recent advances in genomic technology and sequencing efforts is generating a huge amount of DNA sequence data every day. The predicted proteome of these genomes needs annotation at a faster pace. In view of this, one such annotation need is to develop an automated system that can distinguish between plastid and non-plastid proteins accurately, and further classify plastid-types based on their functionality. We compared the amino acid compositions of plastid proteins with those of non-plastid ones and found significant differences, which were used as a basis to develop various feature-based prediction models using similarity-search and machine learning.

Results

In this study, we developed separate Support Vector Machine (SVM) trained classifiers for characterizing the plastids in two steps: first distinguishing the plastid vs. non-plastid proteins, and then classifying the identified plastids into their various types based on their function (chloroplast, chromoplast, etioplast, and amyloplast). Five diverse protein features: amino acid composition, dipeptide composition, the pseudo amino acid composition, N(terminal)-Center-C(terminal) composition and the protein physicochemical properties are used to develop SVM models. Overall, the dipeptide composition-based module shows the best performance with an accuracy of 86.80% and Matthews Correlation Coefficient (MCC) of 0.74 in phase-I and 78.60% with a MCC of 0.44 in phase-II. On independent test data, this model also performs better with an overall accuracy of 76.58% and 74.97% in phase-I and phase-II, respectively. The similarity-based PSI-BLAST module shows very low performance with about 50% prediction accuracy for distinguishing plastid vs. non-plastids and only 20% in classifying various plastid-types, indicating the need and importance of machine learning algorithms.

Conclusion

The current work is a first attempt to develop a methodology for classifying various plastid-type proteins. The prediction modules have also been made available as a web tool, PLpred available at http://bioinfo.okstate.edu/PLpred/ for real time identification/characterization. We believe this tool will be very useful in the functional annotation of various genomes.",2013-10-09 +22694900,Genotyping hepatitis B virus dual infections using population-based sequence data.,"The hepatitis B virus (HBV) is classified into distinct genotypes A-H that are characterized by different progression of hepatitis B and sensitivity to interferon treatment. Previous computational genotyping methods are not robust enough regarding HBV dual infections with different genotypes. The correct classification of HBV sequences into the present genotypes is impaired due to multiple ambiguous sequence positions. We present a computational model that is able to identify and genotype inter- and intragenotype dual infections using population-based sequencing data. Model verification on synthetic data showed 100 % accuracy for intergenotype dual infections and 36.4 % sensitivity in intragenotype dual infections. Screening patient sera (n = 241) revealed eight putative cases of intergenotype dual infection (one A-D, six A-G and one D-G) and four putative cases of intragenotype dual infection (one A-A, two D-D and one E-E). Clonal experiments from the original patient material confirmed three out of three of our predictions. The method has been integrated into geno2pheno([hbv]), an established web-service in clinical use for analysing HBV sequence data. It offers exact and detailed identification of HBV genotypes in patients with dual infections that helps to optimize antiviral therapy regimens. geno2pheno([hbv]) is available under http://www.genafor.org/g2p_hbv/index.php.",2012-06-13 +24603986,Estimating optimal window size for analysis of low-coverage next-generation sequence data.,"

Motivation

Current high-throughput sequencing has greatly transformed genome sequence analysis. In the context of very low-coverage sequencing (<0.1×), performing 'binning' or 'windowing' on mapped short sequences ('reads') is critical to extract genomic information of interest for further evaluation, such as copy-number alteration analysis. If the window size is too small, many windows will exhibit zero counts and almost no pattern can be observed. In contrast, if the window size is too wide, the patterns or genomic features will be 'smoothed out'. Our objective is to identify an optimal window size in between the two extremes.

Results

We assume the reads density to be a step function. Given this model, we propose a data-based estimation of optimal window size based on Akaike's information criterion (AIC) and cross-validation (CV) log-likelihood. By plotting the AIC and CV log-likelihood curve as a function of window size, we are able to estimate the optimal window size that minimizes AIC or maximizes CV log-likelihood. The proposed methods are of general purpose and we illustrate their application using low-coverage next-generation sequence datasets from real tumour samples and simulated datasets.

Availability and implementation

An R package to estimate optimal window size is available at http://www1.maths.leeds.ac.uk/∼arief/R/win/.",2014-03-05 +24753484,Efficient Bayesian inference under the structured coalescent.,"

Motivation

Population structure significantly affects evolutionary dynamics. Such structure may be due to spatial segregation, but may also reflect any other gene-flow-limiting aspect of a model. In combination with the structured coalescent, this fact can be used to inform phylogenetic tree reconstruction, as well as to infer parameters such as migration rates and subpopulation sizes from annotated sequence data. However, conducting Bayesian inference under the structured coalescent is impeded by the difficulty of constructing Markov Chain Monte Carlo (MCMC) sampling algorithms (samplers) capable of efficiently exploring the state space.

Results

In this article, we present a new MCMC sampler capable of sampling from posterior distributions over structured trees: timed phylogenetic trees in which lineages are associated with the distinct subpopulation in which they lie. The sampler includes a set of MCMC proposal functions that offer significant mixing improvements over a previously published method. Furthermore, its implementation as a BEAST 2 package ensures maximum flexibility with respect to model and prior specification. We demonstrate the usefulness of this new sampler by using it to infer migration rates and effective population sizes of H3N2 influenza between New Zealand, New York and Hong Kong from publicly available hemagglutinin (HA) gene sequences under the structured coalescent.

Availability and implementation

The sampler has been implemented as a publicly available BEAST 2 package that is distributed under version 3 of the GNU General Public License at http://compevol.github.io/MultiTypeTree.",2014-04-20 +22720667,m:Explorer: multinomial regression models reveal positive and negative regulators of longevity in yeast quiescence.,"We developed m:Explorer for identifying process-specific transcription factors (TFs) from multiple genome-wide sources, including transcriptome, DNA-binding and chromatin data. m:Explorer robustly outperforms similar techniques in finding cell cycle TFs in Saccharomyces cerevisiae. We predicted and experimentally tested regulators of quiescence (G0), a model of ageing, over a six-week time-course. We validated nine of top-12 predictions as novel G0 TFs, including Δmga2, Δcst6, Δbas1 with higher viability and G0-essential TFs Tup1, Swi3. Pathway analysis associates longevity to reduced growth, reprogrammed metabolism and cell wall remodeling. m:Explorer (http://biit.cs.ut.ee/mexplorer/) is instrumental in interrogating eukaryotic regulatory systems using heterogeneous data.",2012-06-21 +23550138,The Biotinidase Gene Variants Registry: A Paradigm Public Database.,"The BTD gene codes for production of biotinidase, the enzyme responsible for helping the body reuse and recycle the biotin found in foods. Biotinidase deficiency is an autosomal recessively inherited disorder resulting in the inability to recycle the vitamin biotin and affects approximately 1 in 60,000 newborns. If untreated, the depletion of intracellular biotin leads to impaired activities of the biotin-dependent carboxylases and can result in cutaneous and neurological abnormalities in individuals with the disorder. Mutations in the biotinidase gene (BTD) alter enzymatic function. To date, more than 165 mutations in BTD have been reported. Our group has developed a database that characterizes the known mutations and sequence variants in BTD (http://arup.utah.edu/database/BTD/BTD_welcome.php). All sequence variants have been verified for their positions within the BTD gene and designated according to standard nomenclature suggested by Human Genome Variation Society (HGVS). In addition, we describe the change in the protein, indicate whether the variant is a known or likely mutation vs. a benign polymorphism, and include the reference that first described the alteration. We also indicate whether the alteration is known to be clinically pathological based on an observation of a known symptomatic individual or predicted to be pathological based on enzymatic activity or putative disruption of the protein structure. We incorporated the published phenotype to help establish genotype-phenotype correlations and facilitate this process for those performing mutation analysis and/or interpreting results. Other features of this database include disease information, relevant links about biotinidase deficiency, reference sequences, ability to query by various criteria, and the process for submitting novel variations. This database is free to the public and will be updated quarterly. This database is a paradigm for formulating databases for other inherited metabolic disorders.",2013-04-09 +23568222,Using the candidate gene approach for detecting genes underlying seed oil concentration and yield in soybean.,"Increasing the oil concentration in soybean seeds has been given more attention in recent years because of demand for both edible oil and biodiesel production. Oil concentration in soybean is a complex quantitative trait regulated by many genes as well as environmental conditions. To identify genes governing seed oil concentration in soybean, 16 putative candidate genes of three important gene families (GPAT: acyl-CoA:sn-glycerol-3-phosphate acyltransferase, DGAT: acyl-CoA:diacylglycerol acyltransferase, and PDAT: phospholipid:diacylglycerol acyltransferase) involved in triacylglycerol (TAG) biosynthesis pathways were selected and their sequences retrieved from the soybean database ( http://www.phytozome.net/soybean ). Three sequence mutations were discovered in either coding or noncoding regions of three DGAT soybean isoforms when comparing the parents of a 203 recombinant inbreed line (RIL) population; OAC Wallace and OAC Glencoe. The RIL population was used to study the effects of these mutations on seed oil concentration and other important agronomic and seed composition traits, including seed yield and protein concentration across three field locations in Ontario, Canada, in 2009 and 2010. An insertion/deletion (indel) mutation in the GmDGAT2B gene in OAC Wallace was significantly associated with reduced seed oil concentration across three environments and reduced seed yield at Woodstock in 2010. A mutation in the 3' untranslated (3'UTR) region of GmDGAT2C was associated with seed yield at Woodstock in 2009. A mutation in the intronic region of GmDGAR1B was associated with seed yield and protein concentration at Ottawa in 2010. The genes identified in this study had minor effects on either seed yield or oil concentration, which was in agreement with the quantitative nature of the traits. However, the novel gene-specific markers designed in the present study can be used in soybean breeding for marker-assisted selection aimed at increasing seed yield and oil concentration with no significant impact on seed protein concentration.",2013-04-09 +22333270,"Yabi: An online research environment for grid, high performance and cloud computing.","

Background

There is a significant demand for creating pipelines or workflows in the life science discipline that chain a number of discrete compute and data intensive analysis tasks into sophisticated analysis procedures. This need has led to the development of general as well as domain-specific workflow environments that are either complex desktop applications or Internet-based applications. Complexities can arise when configuring these applications in heterogeneous compute and storage environments if the execution and data access models are not designed appropriately. These complexities manifest themselves through limited access to available HPC resources, significant overhead required to configure tools and inability for users to simply manage files across heterogenous HPC storage infrastructure.

Results

In this paper, we describe the architecture of a software system that is adaptable to a range of both pluggable execution and data backends in an open source implementation called Yabi. Enabling seamless and transparent access to heterogenous HPC environments at its core, Yabi then provides an analysis workflow environment that can create and reuse workflows as well as manage large amounts of both raw and processed data in a secure and flexible way across geographically distributed compute resources. Yabi can be used via a web-based environment to drag-and-drop tools to create sophisticated workflows. Yabi can also be accessed through the Yabi command line which is designed for users that are more comfortable with writing scripts or for enabling external workflow environments to leverage the features in Yabi. Configuring tools can be a significant overhead in workflow environments. Yabi greatly simplifies this task by enabling system administrators to configure as well as manage running tools via a web-based environment and without the need to write or edit software programs or scripts. In this paper, we highlight Yabi's capabilities through a range of bioinformatics use cases that arise from large-scale biomedical data analysis.

Conclusion

The Yabi system encapsulates considered design of both execution and data models, while abstracting technical details away from users who are not skilled in HPC and providing an intuitive drag-and-drop scalable web-based workflow environment where the same tools can also be accessed via a command line. Yabi is currently in use and deployed at multiple institutions and is available at http://ccg.murdoch.edu.au/yabi.",2012-02-15 +24932004,Automated detection and tracking of many cells by using 4D live-cell imaging data.,"

Motivation

Automated fluorescence microscopes produce massive amounts of images observing cells, often in four dimensions of space and time. This study addresses two tasks of time-lapse imaging analyses; detection and tracking of the many imaged cells, and it is especially intended for 4D live-cell imaging of neuronal nuclei of Caenorhabditis elegans. The cells of interest appear as slightly deformed ellipsoidal forms. They are densely distributed, and move rapidly in a series of 3D images. Thus, existing tracking methods often fail because more than one tracker will follow the same target or a tracker transits from one to other of different targets during rapid moves.

Results

The present method begins by performing the kernel density estimation in order to convert each 3D image into a smooth, continuous function. The cell bodies in the image are assumed to lie in the regions near the multiple local maxima of the density function. The tasks of detecting and tracking the cells are then addressed with two hill-climbing algorithms. The positions of the trackers are initialized by applying the cell-detection method to an image in the first frame. The tracking method keeps attacking them to near the local maxima in each subsequent image. To prevent the tracker from following multiple cells, we use a Markov random field (MRF) to model the spatial and temporal covariation of the cells and to maximize the image forces and the MRF-induced constraint on the trackers. The tracking procedure is demonstrated with dynamic 3D images that each contain >100 neurons of C.elegans.

Availability

http://daweb.ism.ac.jp/yoshidalab/crest/ismb2014 SUPPLEMENTARY INFORMATION: Supplementary data are available at http://daweb.ism.ac.jp/yoshidalab/crest/ismb2014",2014-06-01 +23341021,Spatial modeling of visual field data for assessing glaucoma progression.,"

Purpose

In order to reduce noise and account for spatial correlation, we applied disease mapping techniques to visual field (VF) data. We compared our calculated rates of progression to other established techniques.

Methods

Conditional autoregressive (CAR) priors, weighted to account for physiologic correlations, were employed to describe spatial and spatiotemporal correlation over the VF. Our model is extended to account for several physiologic features, such as the nerve fibers serving adjacent loci on the VF not mapping to the adjacent optic disc regions, the presence of the blind spot, and large measurement fluctuation. The models were applied to VFs from 194 eyes and fitted within a Bayesian framework using Metropolis-Hastings algorithms.

Results

Our method (SPROG for Spatial PROGgression) showed progression in 42% of eyes. Using a clinical reference, our method had the best receiver operating characteristics compared with the point-wise linear regression methods. Because our model intrinsically accounts for the large variation of VF data, by adjusting for spatial correlation, the effects of outliers are minimized, and spurious trends are avoided.

Conclusions

by using CAR priors, we have modeled the spatial correlation in the eye. combining this with physiologic information, we are able to provide a novel method for VF analysis. model diagnostics, sensitivity, and specificity show our model to be apparently superior to CURRENT POINT-wise linear regression methods. (http://www.anzctr.org.au number, ACTRN12608000274370.).",2013-02-28 +21742634,PREDA: an R-package to identify regional variations in genomic data.,"

Summary

Chromosomal patterns of genomic signals represent molecular fingerprints that may reveal how the local structural organization of a genome impacts the functional control mechanisms. Thus, the integrative analysis of multiple sources of genomic data and information deepens the resolution and enhances the interpretation of stand-alone high-throughput data. In this note, we present PREDA (Position RElated Data Analysis), an R package for detecting regional variations in genomics data. PREDA identifies relevant chromosomal patterns in high-throughput data using a smoothing approach that accounts for distance and density variability of genomics features. Custom-designed data structures allow efficiently managing diverse signals in different genomes. A variety of smoothing functions and statistics empower flexible and robust workflows. The modularity of package design allows an easy deployment of custom analytical pipelines. Tabular and graphical representations facilitate downstream biological interpretation of results.

Availability

PREDA is available in Bioconductor and at http://www.xlab.unimo.it/PREDA.

Contact

silvio.bicciato@unimore.it

Supplementary information

Supplementary information is available at Bioinformatics online.",2011-07-07 +24048353,Assessing the validity and reproducibility of genome-scale predictions.,"

Motivation

Validation and reproducibility of results is a central and pressing issue in genomics. Several recent embarrassing incidents involving the irreproducibility of high-profile studies have illustrated the importance of this issue and the need for rigorous methods for the assessment of reproducibility.

Results

Here, we describe an existing statistical model that is very well suited to this problem. We explain its utility for assessing the reproducibility of validation experiments, and apply it to a genome-scale study of adenosine deaminase acting on RNA (ADAR)-mediated RNA editing in Drosophila. We also introduce a statistical method for planning validation experiments that will obtain the tightest reproducibility confidence limits, which, for a fixed total number of experiments, returns the optimal number of replicates for the study.

Availability

Downloadable software and a web service for both the analysis of data from a reproducibility study and for the optimal design of these studies is provided at http://ccmbweb.ccv.brown.edu/reproducibility.html .",2013-09-17 +22684630,"RobiNA: a user-friendly, integrated software solution for RNA-Seq-based transcriptomics.","Recent rapid advances in next generation RNA sequencing (RNA-Seq)-based provide researchers with unprecedentedly large data sets and open new perspectives in transcriptomics. Furthermore, RNA-Seq-based transcript profiling can be applied to non-model and newly discovered organisms because it does not require a predefined measuring platform (like e.g. microarrays). However, these novel technologies pose new challenges: the raw data need to be rigorously quality checked and filtered prior to analysis, and proper statistical methods have to be applied to extract biologically relevant information. Given the sheer volume of data, this is no trivial task and requires a combination of considerable technical resources along with bioinformatics expertise. To aid the individual researcher, we have developed RobiNA as an integrated solution that consolidates all steps of RNA-Seq-based differential gene-expression analysis in one user-friendly cross-platform application featuring a rich graphical user interface. RobiNA accepts raw FastQ files, SAM/BAM alignment files and counts tables as input. It supports quality checking, flexible filtering and statistical analysis of differential gene expression based on state-of-the art biostatistical methods developed in the R/Bioconductor projects. In-line help and a step-by-step manual guide users through the analysis. Installer packages for Mac OS X, Windows and Linux are available under the LGPL licence from http://mapman.gabipd.org/web/guest/robin.",2012-06-08 +22123829,PathVar: analysis of gene and protein expression variance in cellular pathways using microarray data.,"

Summary

Finding significant differences between the expression levels of genes or proteins across diverse biological conditions is one of the primary goals in the analysis of functional genomics data. However, existing methods for identifying differentially expressed genes or sets of genes by comparing measures of the average expression across predefined sample groups do not detect differential variance in the expression levels across genes in cellular pathways. Since corresponding pathway deregulations occur frequently in microarray gene or protein expression data, we present a new dedicated web application, PathVar, to analyze these data sources. The software ranks pathway-representing gene/protein sets in terms of the differences of the variance in the within-pathway expression levels across different biological conditions. Apart from identifying new pathway deregulation patterns, the tool exploits these patterns by combining different machine learning methods to find clusters of similar samples and build sample classification models.

Availability

freely available at http://pathvar.embl.de

Contact

enrico.glaab@uni.lu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-28 +24637495,Contextual and cued fear conditioning test using a video analyzing system in mice. ,"The contextual and cued fear conditioning test is one of the behavioral tests that assesses the ability of mice to learn and remember an association between environmental cues and aversive experiences. In this test, mice are placed into a conditioning chamber and are given parings of a conditioned stimulus (an auditory cue) and an aversive unconditioned stimulus (an electric footshock). After a delay time, the mice are exposed to the same conditioning chamber and a differently shaped chamber with presentation of the auditory cue. Freezing behavior during the test is measured as an index of fear memory. To analyze the behavior automatically, we have developed a video analyzing system using the ImageFZ application software program, which is available as a free download at http://www.mouse-phenotype.org/. Here, to show the details of our protocol, we demonstrate our procedure for the contextual and cued fear conditioning test in C57BL/6J mice using the ImageFZ system. In addition, we validated our protocol and the video analyzing system performance by comparing freezing time measured by the ImageFZ system or a photobeam-based computer measurement system with that scored by a human observer. As shown in our representative results, the data obtained by ImageFZ were similar to those analyzed by a human observer, indicating that the behavioral analysis using the ImageFZ system is highly reliable. The present movie article provides detailed information regarding the test procedures and will promote understanding of the experimental situation.",2014-03-01 +22559792,ADMET evaluation in drug discovery. 11. PharmacoKinetics Knowledge Base (PKKB): a comprehensive database of pharmacokinetic and toxic properties for drugs.,"Good and extensive experimental ADMET (absorption, distribution, metabolism, excretion, and toxicity) data is critical for developing reliable in silico ADMET models. Here we develop a PharmacoKinetics Knowledge Base (PKKB) to compile comprehensive information about ADMET properties into a single electronic repository. We incorporate more than 10 000 experimental ADMET measurements of 1685 drugs into the PKKB. The ADMET properties in the PKKB include octanol/water partition coefficient, solubility, dissociation constant, intestinal absorption, Caco-2 permeability, human bioavailability, plasma protein binding, blood-plasma partitioning ratio, volume of distribution, metabolism, half-life, excretion, urinary excretion, clearance, toxicity, half lethal dose in rat or mouse, etc. The PKKB provides the most extensive collection of freely available data for ADMET properties up to date. All these ADMET properties, as well as the pharmacological information and the calculated physiochemical properties are integrated into a web-based information system. Eleven separated data sets for octanol/water partition coefficient, solubility, blood-brain partitioning, intestinal absorption, Caco-2 permeability, human oral bioavailability, and P-glycoprotein inhibitors have been provided for free download and can be used directly for ADMET modeling. The PKKB is available online at http://cadd.suda.edu.cn/admet.",2012-05-15 +21903626,TopiaryExplorer: visualizing large phylogenetic trees with environmental metadata.,"

Motivation

Microbial community profiling is a highly active area of research, but tools that facilitate visualization of phylogenetic trees and associated environmental data have not kept up with the increasing quantity of data generated in these studies.

Results

TopiaryExplorer supports the visualization of very large phylogenetic trees, including features such as the automated coloring of branches by environmental data, manipulation of trees and incorporation of per-tip metadata (e.g. taxonomic labels).

Availability

http://topiaryexplorer.sourceforge.net.

Contact

rob.knight@colorado.edu.",2011-09-08 +23564842,tmVar: a text mining approach for extracting sequence variants in biomedical literature.,"

Motivation

Text-mining mutation information from the literature becomes a critical part of the bioinformatics approach for the analysis and interpretation of sequence variations in complex diseases in the post-genomic era. It has also been used for assisting the creation of disease-related mutation databases. Most of existing approaches are rule-based and focus on limited types of sequence variations, such as protein point mutations. Thus, extending their extraction scope requires significant manual efforts in examining new instances and developing corresponding rules. As such, new automatic approaches are greatly needed for extracting different kinds of mutations with high accuracy.

Results

Here, we report tmVar, a text-mining approach based on conditional random field (CRF) for extracting a wide range of sequence variants described at protein, DNA and RNA levels according to a standard nomenclature developed by the Human Genome Variation Society. By doing so, we cover several important types of mutations that were not considered in past studies. Using a novel CRF label model and feature set, our method achieves higher performance than a state-of-the-art method on both our corpus (91.4 versus 78.1% in F-measure) and their own gold standard (93.9 versus 89.4% in F-measure). These results suggest that tmVar is a high-performance method for mutation extraction from biomedical literature.

Availability

tmVar software and its corpus of 500 manually curated abstracts are available for download at http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/pub/tmVar",2013-04-05 +24874013,Finding novel molecular connections between developmental processes and disease.,"Identifying molecular connections between developmental processes and disease can lead to new hypotheses about health risks at all stages of life. Here we introduce a new approach to identifying significant connections between gene sets and disease genes, and apply it to several gene sets related to human development. To overcome the limits of incomplete and imperfect information linking genes to disease, we pool genes within disease subtrees in the MeSH taxonomy, and we demonstrate that such pooling improves the power and accuracy of our approach. Significance is assessed through permutation. We created a web-based visualization tool to facilitate multi-scale exploration of this large collection of significant connections (http://gda.cs.tufts.edu/development). High-level analysis of the results reveals expected connections between tissue-specific developmental processes and diseases linked to those tissues, and widespread connections to developmental disorders and cancers. Yet interesting new hypotheses may be derived from examining the unexpected connections. We highlight and discuss the implications of three such connections, linking dementia with bone development, polycystic ovary syndrome with cardiovascular development, and retinopathy of prematurity with lung development. Our results provide additional evidence that TGFB lays a key role in the early pathogenesis of polycystic ovary syndrome. Our evidence also suggests that the VEGF pathway and downstream NFKB signaling may explain the complex relationship between bronchopulmonary dysplasia and retinopathy of prematurity, and may form a bridge between two currently-competing hypotheses about the molecular origins of bronchopulmonary dysplasia. Further data exploration and similar queries about other gene sets may generate a variety of new information about the molecular relationships between additional diseases.",2014-05-29 +23220899,Accuracy and reliability assessment of CT and MR perfusion analysis software using a digital phantom.,"

Purpose

To design a digital phantom data set for computed tomography (CT) perfusion and perfusion-weighted imaging on the basis of the widely accepted tracer kinetic theory in which the true values of cerebral blood flow (CBF), cerebral blood volume (CBV), mean transit time (MTT), and tracer arrival delay are known and to evaluate the accuracy and reliability of postprocessing programs using this digital phantom.

Materials and methods

A phantom data set was created by generating concentration-time curves reflecting true values for CBF (2.5-87.5 mL/100 g per minute), CBV (1.0-5.0 mL/100 g), MTT (3.4-24 seconds), and tracer delays (0-3.0 seconds). These curves were embedded in human brain images. The data were analyzed by using 13 algorithms each for CT and magnetic resonance (MR), including five commercial vendors and five academic programs. Accuracy was assessed by using the Pearson correlation coefficient (r) for true values. Delay-, MTT-, or CBV-dependent errors and correlations between time to maximum of residue function (Tmax) were also evaluated.

Results

In CT, CBV was generally well reproduced (r > 0.9 in 12 algorithms), but not CBF and MTT (r > 0.9 in seven and four algorithms, respectively). In MR, good correlation (r > 0.9) was observed in one-half of commercial programs, while all academic algorithms showed good correlations for all parameters. Most algorithms had delay-dependent errors, especially for commercial software, as well as CBV dependency for CBF or MTT calculation and MTT dependency for CBV calculation. Correlation was good in Tmax except for one algorithm.

Conclusion

The digital phantom readily evaluated the accuracy and characteristics of the CT and MR perfusion analysis software. All commercial programs had delay-induced errors and/or insufficient correlations with true values, while academic programs for MR showed good correlations with true values.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12112618/-/DC1.",2012-12-06 +25474160,"Malaria surveillance--United States, 2012.","

Problem/condition

Malaria in humans is caused by intraerythrocytic protozoa of the genus Plasmodium. These parasites are transmitted by the bite of an infective female Anopheles mosquito. The majority of malaria infections in the United States occur among persons who have traveled to regions with ongoing malaria transmission. However, malaria is also occasionally acquired by persons who have not traveled out of the country, through exposure to infected blood products, congenital transmission, laboratory exposure, or local mosquitoborne transmission. Malaria surveillance in the United States is conducted to identify episodes of local transmission and to guide prevention recommendations for travelers.

Period covered

This report summarizes cases in persons with onset of symptoms in 2012 and summarizes trends during previous years.

Description of system

Malaria cases diagnosed by blood film, polymerase chain reaction, or rapid diagnostic tests are mandated to be reported to local and state health departments by health-care providers or laboratory staff. Case investigations are conducted by local and state health departments, and reports are transmitted to CDC through the National Malaria Surveillance System (NMSS), National Notifiable Diseases Surveillance System (NNDSS), or direct CDC consults. For the first time, CDC conducted antimalarial drug resistance testing on blood samples submitted to CDC by health-care providers or local/state health departments. Data from these reporting systems serve as the basis for this report.

Results

CDC received 1,687 reported cases of malaria with an onset of symptoms in 2012 among persons in the United States, including 1,683 cases classified as imported, one laboratory-acquired case, one nosocomial case, and two cryptic cases. The total number of cases represents a 12% decrease from the 1,925 cases reported for 2011. Plasmodium falciparum, P. vivax, P. malariae, and P. ovale were identified in 58%, 17%, 3%, and 3% of cases, respectively. Twenty (1%) patients were infected by two species. The infecting species was unreported or undetermined in 17% of cases, a decrease of 6 percentage points from 2011. Polymerase chain reaction testing determined or corrected the species for 45 (43%) of the 104 samples submitted for drug resistance testing. Of the 909 patients who reported purpose of travel, 604 (66%) were visiting friends or relatives (VFR). Among the 983 cases in U.S. civilians for whom information on chemoprophylaxis use and travel region was known, 63 (6%) patients reported that they had followed and adhered to a chemoprophylaxis drug regimen recommended by CDC for the regions to which they had traveled. Thirty-two cases were reported in pregnant women, among whom only one adhered to chemoprophylaxis. Among all reported cases, 231 (14%) were classified as severe infections in 2012. Of these, six persons with malaria died in 2012. Beginning in 2012, there were 104 blood samples submitted to CDC that were tested for molecular markers associated with antimalarial drug resistance. Of the 65 P. falciparum-positive samples, 53 (82%) had genetic polymorphisms associated with pyrimethamine drug resistance, 61 (94%) with sulfadoxine resistance, 29 (45%) with chloroquine resistance, 1 (2%) with mefloquine drug resistance, 2 (3%) with atovaquone resistance, and none with artemisinin resistance.

Interpretation

Despite the 12% decline in the number of cases reported in 2012 compared with 2011, the overall trend in malaria cases has been increasing since 1973. Although progress has been made in reducing the global burden of malaria, the disease remains endemic in many regions, and the use of appropriate prevention measures by travelers is still inadequate.

Public health actions

Completion of data elements on the malaria case report form increased slightly in 2012 compared with 2011, but still remains unacceptably low. This incomplete reporting compromises efforts to examine trends in malaria cases and prevent infections. VFRs continue to be a difficult population to reach with effective malaria prevention strategies. Evidence-based prevention strategies that effectively target VFRs need to be developed and implemented to have a substantial impact on the numbers of imported malaria cases in the United States. Although more patients reported taking chemoprophylaxis to prevent malaria, the majority reported not taking it, and adherence was poor among those who did take chemoprophylaxis. Proper use of malaria chemoprophylaxis will prevent the majority of malaria illness and reduce the risk for severe disease (http://www.cdc.gov/malaria/travelers/drugs.html). Malaria infections can be fatal if not diagnosed and treated promptly with antimalarial medications appropriate for the patient's age and medical history, the likely country of malaria acquisition, and previous use of antimalarial chemoprophylaxis. Recent molecular laboratory advances have enabled CDC to identify and conduct molecular surveillance of antimalarial drug resistance (http://www.cdc.gov/malaria/features/ars.html). These advances will allow CDC to track, guide treatment, and manage drug resistant malaria parasites both domestically and globally. For this to be successful, specimens should be submitted for cases diagnosed in the United States and for ongoing specimen collection and testing globally. Clinicians should consult the CDC Guidelines for Treatment of Malaria and contact the CDC's Malaria Hotline for case management advice when needed. Malaria treatment recommendations can be obtained online (http://www.cdc.gov/malaria/diagnosis_treatment) or by calling the Malaria Hotline (770-488-7788 or toll-free at 855-856-4713).",2014-12-01 +21685088,StructHDP: automatic inference of number of clusters and population structure from admixed genotype data.,"

Motivation

Clustering of genotype data is an important way of understanding similarities and differences between populations. A summary of populations through clustering allows us to make inferences about the evolutionary history of the populations. Many methods have been proposed to perform clustering on multilocus genotype data. However, most of these methods do not directly address the question of how many clusters the data should be divided into and leave that choice to the user.

Methods

We present StructHDP, which is a method for automatically inferring the number of clusters from genotype data in the presence of admixture. Our method is an extension of two existing methods, Structure and Structurama. Using a Hierarchical Dirichlet Process (HDP), we model the presence of admixture of an unknown number of ancestral populations in a given sample of genotype data. We use a Gibbs sampler to perform inference on the resulting model and infer the ancestry proportions and the number of clusters that best explain the data.

Results

To demonstrate our method, we simulated data from an island model using the neutral coalescent. Comparing the results of StructHDP with Structurama shows the utility of combining HDPs with the Structure model. We used StructHDP to analyze a dataset of 155 Taita thrush, Turdus helleri, which has been previously analyzed using Structure and Structurama. StructHDP correctly picks the optimal number of populations to cluster the data. The clustering based on the inferred ancestry proportions also agrees with that inferred using Structure for the optimal number of populations. We also analyzed data from 1048 individuals from the Human Genome Diversity project from 53 world populations. We found that the clusters obtained correspond with major geographical divisions of the world, which is in agreement with previous analyses of the dataset.

Availability

StructHDP is written in C++. The code will be available for download at http://www.sailing.cs.cmu.edu/structhdp.

Contact

suyash@cs.cmu.edu; epxing@cs.cmu.edu.",2011-07-01 +23596339,Oversampling smoothness: an effective algorithm for phase retrieval of noisy diffraction intensities.,"Coherent diffraction imaging (CDI) is high-resolution lensless microscopy that has been applied to image a wide range of specimens using synchrotron radiation, X-ray free-electron lasers, high harmonic generation, soft X-ray lasers and electrons. Despite recent rapid advances, it remains a challenge to reconstruct fine features in weakly scattering objects such as biological specimens from noisy data. Here an effective iterative algorithm, termed oversampling smoothness (OSS), for phase retrieval of noisy diffraction intensities is presented. OSS exploits the correlation information among the pixels or voxels in the region outside of a support in real space. By properly applying spatial frequency filters to the pixels or voxels outside the support at different stages of the iterative process (i.e. a smoothness constraint), OSS finds a balance between the hybrid input-output (HIO) and error reduction (ER) algorithms to search for a global minimum in solution space, while reducing the oscillations in the reconstruction. Both numerical simulations with Poisson noise and experimental data from a biological cell indicate that OSS consistently outperforms the HIO, ER-HIO and noise robust (NR)-HIO algorithms at all noise levels in terms of accuracy and consistency of the reconstructions. It is expected that OSS will find application in the rapidly growing CDI field, as well as other disciplines where phase retrieval from noisy Fourier magnitudes is needed. The MATLAB (The MathWorks Inc., Natick, MA, USA) source code of the OSS algorithm is freely available from http://www.physics.ucla.edu/research/imaging.",2013-02-23 +24209899,Validation of a seizure-related injury model.,"

Objective

Persons with epilepsy (PWEs) are more prone to accidents than healthy people. A previous study provided an online tool to predict the risk of seizure-related injury (SRI) in individual PWEs. There is, however, no validation of the formula.

Methods

This is a cross-sectional study conducted in 10 community hospitals in Thailand. PWEs with an age of over 18 years were enrolled and defined by having had a seizure related injury (SRI). The probability of individual PWEs having a SRI was calculated by the online tool (http://sribykku.webs.com). The probability of this happening in all patients was calculated for sensitivity and specificity when compared with real data.

Results

There were 316 patients enrolled in the study. Of those, 122 patients (38.6%) had a SRI. The sensitivity and specificity of having a SRI by the online formula were 93.44% and 43.30%, respectively.

Conclusion

The online formula to predict SRI in PWEs is valid and provided comparable sensitivity and specificity with a previous study that was conducted in the tertiary care hospital.",2013-10-17 +22982435,MITOS: improved de novo metazoan mitochondrial genome annotation.,"About 2000 completely sequenced mitochondrial genomes are available from the NCBI RefSeq data base together with manually curated annotations of their protein-coding genes, rRNAs, and tRNAs. This annotation information, which has accumulated over two decades, has been obtained with a diverse set of computational tools and annotation strategies. Despite all efforts of manual curation it is still plagued by misassignments of reading directions, erroneous gene names, and missing as well as false positive annotations in particular for the RNA genes. Taken together, this causes substantial problems for fully automatic pipelines that aim to use these data comprehensively for studies of animal phylogenetics and the molecular evolution of mitogenomes. The MITOS pipeline is designed to compute a consistent de novo annotation of the mitogenomic sequences. We show that the results of MITOS match RefSeq and MitoZoa in terms of annotation coverage and quality. At the same time we avoid biases, inconsistencies of nomenclature, and typos originating from manual curation strategies. The MITOS pipeline is accessible online at http://mitos.bioinf.uni-leipzig.de.",2012-09-07 +24416128,"ZODET: software for the identification, analysis and visualisation of outlier genes in microarray expression data.","

Summary

Complex human diseases can show significant heterogeneity between patients with the same phenotypic disorder. An outlier detection strategy was developed to identify variants at the level of gene transcription that are of potential biological and phenotypic importance. Here we describe a graphical software package (z-score outlier detection (ZODET)) that enables identification and visualisation of gross abnormalities in gene expression (outliers) in individuals, using whole genome microarray data. Mean and standard deviation of expression in a healthy control cohort is used to detect both over and under-expressed probes in individual test subjects. We compared the potential of ZODET to detect outlier genes in gene expression datasets with a previously described statistical method, gene tissue index (GTI), using a simulated expression dataset and a publicly available monocyte-derived macrophage microarray dataset. Taken together, these results support ZODET as a novel approach to identify outlier genes of potential pathogenic relevance in complex human diseases. The algorithm is implemented using R packages and Java.

Availability

The software is freely available from http://www.ucl.ac.uk/medicine/molecular-medicine/publications/microarray-outlier-analysis.",2014-01-08 +26270911,Gradient Magnitude Similarity Deviation: A Highly Efficient Perceptual Image Quality Index.,"It is an important task to faithfully evaluate the perceptual quality of output images in many applications, such as image compression, image restoration, and multimedia streaming. A good image quality assessment (IQA) model should not only deliver high quality prediction accuracy, but also be computationally efficient. The efficiency of IQA metrics is becoming particularly important due to the increasing proliferation of high-volume visual data in high-speed networks. We present a new effective and efficient IQA model, called gradient magnitude similarity deviation (GMSD). The image gradients are sensitive to image distortions, while different local structures in a distorted image suffer different degrees of degradations. This motivates us to explore the use of global variation of gradient based local quality map for overall image quality prediction. We find that the pixel-wise gradient magnitude similarity (GMS) between the reference and distorted images combined with a novel pooling strategy-the standard deviation of the GMS map-can predict accurately perceptual image quality. The resulting GMSD algorithm is much faster than most state-of-the-art IQA methods, and delivers highly competitive prediction accuracy. MATLAB source code of GMSD can be downloaded at http://www4.comp.polyu.edu.hk/~cslzhang/IQA/GMSD/GMSD.htm.",2014-02-01 +24585853,Data resource profile: the Korea National Health and Nutrition Examination Survey (KNHANES).,"The Korea National Health and Nutrition Examination Survey (KNHANES) is a national surveillance system that has been assessing the health and nutritional status of Koreans since 1998. Based on the National Health Promotion Act, the surveys have been conducted by the Korea Centers for Disease Control and Prevention (KCDC). This nationally representative cross-sectional survey includes approximately 10 000 individuals each year as a survey sample and collects information on socioeconomic status, health-related behaviours, quality of life, healthcare utilization, anthropometric measures, biochemical and clinical profiles for non-communicable diseases and dietary intakes with three component surveys: health interview, health examination and nutrition survey. The health interview and health examination are conducted by trained staff members, including physicians, medical technicians and health interviewers, at a mobile examination centre, and dieticians' visits to the homes of the study participants are followed up. KNHANES provides statistics for health-related policies in Korea, which also serve as the research infrastructure for studies on risk factors and diseases by supporting over 500 publications. KCDC has also supported researchers in Korea by providing annual workshops for data users. KCDC has published the Korea Health Statistics each year, and microdata are publicly available through the KNHANES website (http://knhanes.cdc.go.kr).",2014-02-01 +24727480,Exploring the associations between drug side-effects and therapeutic indications.,"Drug therapeutic indications and side-effects are both measurable patient phenotype changes in response to the treatment. Inferring potential drug therapeutic indications and identifying clinically interesting drug side-effects are both important and challenging tasks. Previous studies have utilized either chemical structures or protein targets to predict indications and side-effects. In this study, we compared drug therapeutic indication prediction using various information including chemical structures, protein targets and side-effects. We also compared drug side-effect prediction with various information sources including chemical structures, protein targets and therapeutic indication. Prediction performance based on 10-fold cross-validation demonstrates that drug side-effects and therapeutic indications are the most predictive information source for each other. In addition, we extracted 6706 statistically significant indication-side-effect associations from all known drug-disease and drug-side-effect relationships. We further developed a novel user interface that allows the user to interactively explore these associations in the form of a dynamic bipartitie graph. Many relationship pairs provide explicit repositioning hypotheses (e.g., drugs causing postural hypotension are potential candidates for hypertension) and clear adverse-reaction watch lists (e.g., drugs for heart failure possibly cause impotence). All data sets and highly correlated disease-side-effect relationships are available at http://astro.temple.edu/∼tua87106/druganalysis.html.",2014-04-13 +23480444,Key issues in addressing the clinical and humanistic burden of short bowel syndrome in the US.,"

Background

The purpose of this analysis was to provide a concise report of the literature on the burden of intestinal failure associated with short bowel syndrome (SBS-IF) in adults, focused on clinical and humanistic issues important to clinicians and payers.

Scope

A literature search was performed using the National Library of Medicine PubMed database ( http://www.ncbi.nlm.nih.gov/pubmed ) with the search term 'short bowel syndrome' limited to adult populations and English-language reports published from January 1, 1965, to January 18, 2013. Citations were assessed for relevance and excluded articles focused on single case studies, colon fermentation, absorption of medications with PN/IV, surgical technique, mesenteric artery complications/surgery, and transplantation focus. Additional hand searches were performed using the terms 'short bowel syndrome' AND 'cost', and 'home parenteral nutrition' AND 'cost', along with the exclusion criteria described above.

Findings

Despite advances in management in recent decades, SBS-IF continues to carry a high burden of morbidity and mortality. In the absence of sufficient intestinal adaptation following resection, many patients remain dependent on long-term parenteral nutrition and/or intravenous fluids (PN/IV). Although potentially life saving, PN/IV is costly, invasive, and associated with numerous complications and deleterious effects on health and quality of life. Surgical interventions, especially intestinal transplantation, are costly and are associated with substantial morbidity and high mortality. New therapies, which show promise in promoting intestinal rehabilitation and reducing dependence on PN/IV therapy, are the subject of active research.

Conclusions

Overall, the available literature suggests that although SBS-IF affects a relatively small population, the clinical and humanistic burden is significant, and there is an unmet need for effective therapeutic options that target the underlying problem of inadequate absorptive capacity of the remaining intestine. Consequently, many patients with SBS-IF remain dependent on long-term PN/IV support, adding to the burden imposed by the underlying disorder.",2013-04-02 +25204697,5th National Audit Project (NAP5) on accidental awareness during general anaesthesia: summary of main findings and risk factors.,"We present the main findings of the 5th National Audit Project (NAP5) on accidental awareness during general anaesthesia (AAGA). Incidences were estimated using reports of accidental awareness as the numerator, and a parallel national anaesthetic activity survey to provide denominator data. The incidence of certain/probable and possible accidental awareness cases was ~1:19,600 anaesthetics (95% confidence interval 1:16,700-23,450). However, there was considerable variation across subtypes of techniques or subspecialities. The incidence with neuromuscular block (NMB) was ~1:8200 (1:7030-9700), and without, it was ~1:135,900 (1:78,600-299,000). The cases of AAGA reported to NAP5 were overwhelmingly cases of unintended awareness during NMB. The incidence of accidental awareness during Caesarean section was ~1:670 (1:380-1300). Two-thirds (82, 66%) of cases of accidental awareness experiences arose in the dynamic phases of anaesthesia, namely induction of and emergence from anaesthesia. During induction of anaesthesia, contributory factors included: use of thiopental, rapid sequence induction, obesity, difficult airway management, NMB, and interruptions of anaesthetic delivery during movement from anaesthetic room to theatre. During emergence from anaesthesia, residual paralysis was perceived by patients as accidental awareness, and commonly related to a failure to ensure full return of motor capacity. One-third (43, 33%) of accidental awareness events arose during the maintenance phase of anaesthesia, mostly due to problems at induction or towards the end of anaesthesia. Factors increasing the risk of accidental awareness included: female sex, age (younger adults, but not children), obesity, anaesthetist seniority (junior trainees), previous awareness, out-of-hours operating, emergencies, type of surgery (obstetric, cardiac, thoracic), and use of NMB. The following factors were not risk factors for accidental awareness: ASA physical status, race, and use or omission of nitrous oxide. We recommend that an anaesthetic checklist, to be an integral part of the World Health Organization Safer Surgery checklist, is introduced as an aid to preventing accidental awareness. This paper is a shortened version describing the main findings from NAP5--the full report can be found at http://www.nationalauditprojects.org.uk/NAP5_home.",2014-09-09 +24259248,Multiple comparisons in genetic association studies: a hierarchical modeling approach.,"Multiple comparisons or multiple testing has been viewed as a thorny issue in genetic association studies aiming to detect disease-associated genetic variants from a large number of genotyped variants. We alleviate the problem of multiple comparisons by proposing a hierarchical modeling approach that is fundamentally different from the existing methods. The proposed hierarchical models simultaneously fit as many variables as possible and shrink unimportant effects towards zero. Thus, the hierarchical models yield more efficient estimates of parameters than the traditional methods that analyze genetic variants separately, and also coherently address the multiple comparisons problem due to largely reducing the effective number of genetic effects and the number of statistically ""significant"" effects. We develop a method for computing the effective number of genetic effects in hierarchical generalized linear models, and propose a new adjustment for multiple comparisons, the hierarchical Bonferroni correction, based on the effective number of genetic effects. Our approach not only increases the power to detect disease-associated variants but also controls the Type I error. We illustrate and evaluate our method with real and simulated data sets from genetic association studies. The method has been implemented in our freely available R package BhGLM (http://www.ssg.uab.edu/bhglm/).",2014-02-01 +23666447,Developing topic-specific search filters for PubMed with click-through data.,"

Objectives

Search filters have been developed and demonstrated for better information access to the immense and ever-growing body of publications in the biomedical domain. However, to date the number of filters remains quite limited because the current filter development methods require significant human efforts in manual document review and filter term selection. In this regard, we aim to investigate automatic methods for generating search filters.

Methods

We present an automated method to develop topic-specific filters on the basis of users' search logs in PubMed. Specifically, for a given topic, we first detect its relevant user queries and then include their corresponding clicked articles to serve as the topic-relevant document set accordingly. Next, we statistically identify informative terms that best represent the topic-relevant document set using a background set composed of topic irrelevant articles. Lastly, the selected representative terms are combined with Boolean operators and evaluated on benchmark datasets to derive the final filter with the best performance.

Results

We applied our method to develop filters for four clinical topics: nephrology, diabetes, pregnancy, and depression. For the nephrology filter, our method obtained performance comparable to the state of the art (sensitivity of 91.3%, specificity of 98.7%, precision of 94.6%, and accuracy of 97.2%). Similarly, high-performing results (over 90% in all measures) were obtained for the other three search filters.

Conclusion

Based on PubMed click-through data, we successfully developed a high-performance method for generating topic-specific search filters that is significantly more efficient than existing manual methods. All data sets (topic-relevant and irrelevant document sets) used in this study and a demonstration system are publicly available at http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/downloads/CQ_filter/",2013-05-13 +30722243,First Report of Downy Mildew Caused by Peronospora sordida on Butterflybush (Buddleja stachyoides) in Brazil.,"Butterflybush (common name in Brazil, verbasco), Buddleja stachyoides Cham. & Schltdl. (Buddlejaceae), is an erect herb or small shrub, native to Brazil, that is listed both as a folk medicinal plant and as a pasture weed (4). In March 2012, a group of B. stachyoides plants growing in a pasture in Viçosa (state of Minas Gerais, Brazil), were found bearing typical downy mildew symptoms. The only pathogen reported associated with this plant species is Podosphaera xanthii (1) and because there is no record of downy mildew on members of Buddleja in Brazil, an investigation was carried out to clarify the pathogen identity. Diseased plants had lesions on living leaves that were vein-delimited, chlorotic, coalescing, and becoming necrotic adaxially and bearing downy mildew-like colonies over diseased tissues abaxially. The samples were dried in a plant press and a representative specimen was deposited in the local herbarium at the Universidade Federal de Viçosa (Accession No. VIC 31836). Reproductive structures were scraped from leaves with a scalpel and mounted in lactophenol. Preliminary observations indicated the pathogen as belonging to Peronospora (Peronosporaceae). The pathogen had the following morphology: Sporangiophores are 288 to 641 μm long, dichotomously branching up to seven times, hyaline, smooth, 5 to 16 μm wide at the trunk, branches 63 to 202 μm long; tips subacute, in pairs or rarely single, 5 to 19 μm long; sporangia subglobose to ellipsoidal, 12 to 22 × 11 to 17 μm, pale yellowish brown, non-papillate. Only one species of Peronospora is known to infect members of Buddleja, namely Peronospora hariotii Gäum. (1). Nevertheless, the pathogen on B. stachyoides has smaller sporangia as compared to those of P. hariotii (20 to 26 × 16 to 21 μm) (2) and it was closer to P. sordida (3). DNA of the pathogen from B. stachyoides was extracted and the cytochrome c oxidase subunit II (COX2) and internal transcribed spacer (ITS) regions were sequenced. Sequences were deposited in GenBank (COX2 as JX982637; ITS as JX982638). A BLAST search yielded 99% and 100% of maximum identity with P. sordida for COX2 and ITS, respectively. A more detailed phylogenetic study is necessary to clarify the relationship between P. sordida, P. hariotii, and related species causing downy mildew on closely related hosts. To our knowledge, this is the first report of P. sordida occurring on a member of the genus Buddleja. This is also the first time that P. sordida is reported from South America. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 19 October 2012. (2) S. M. Francis. C.M.I. Descr. Pathog. Fungi Bact. 767:1, 1983. (3) G. Hall. I.M.I. Descr. Fungi Bact. 1062:1, 1991. (4) K. G. Kissmann and D. Groth. Plantas Infestantes e Nocivas. São Paulo, BASF, 1997.",2013-04-01 +30722249,First Report of Fusarium acuminatum Causing Damping-Off Disease on Aleppo Pine in Algeria.,"The Aleppo pine (Pinus halepensis Mill.) is a conifer native to the Mediterranean region. In autumn and spring of 2008 to 2009, a survey of Aleppo pine seedling diseases was carried out in three forest nurseries from the Relizane, Sidi Bel Abbes, and Tlemcen departments in northwestern Algeria. Aleppo pine seedlings were potted from the soil. In all three nurseries, 1- to 2-month old seedlings showed symptoms of damping-off disease in pre- and post-emergence (collar rot) with a disease incidence of 64, 77, and 72%, respectively. Disinfected collar segments, about 5 mm in length, were plated on PDA and petri dishes incubated at 25°C. A Fusarium sp. was consistently isolated from tissues and all isolates were morphologically identified as Fusarium acuminatum Ellis & Everh. (teleomorph: Gibberella acuminata Wollenw.) according to Fusarium keys (2). Colony growth was 43 mm after 3 days on PDA; the aerial mycelium was white, developing a brownish tinge in the center on PDA; macroconidia were formed in orange sporodochia, broadly falcate, strongly septate, 3 to 5 septa, the apical cell with an incurved elongation, distinct foot shape, 3 to 4 × 20 to 50 μm; microconidia were usually absent for isolates other than F12SS1, reniform, septate, 5 to 6 × 6 to 10 μm, in monophialides; chlamydospores were formed in chains, 6 to 13 μm. For the molecular identification, ITS regions of Fusarium isolates were amplified with the primers ITS1 and ITS4, and products were directly sequenced in both strands using the same primers ITS 1 and ITS4. Sequences were compared to known sequences deposited in the NCBI non redundant database to confirm morphological identification. An NCBI BLAST search identified isolates F12SS1, F14SS3, F30SS3, and F25SR as F. acuminatum based on 100% similarity with corresponding sequences. GenBank Accession Nos. were JX114788, JX114785, JX114782, and JX114790, respectively. Pathogenicity tests were performed to fulfill Koch's postulates. Inocula were produced by adding a 5-mm diameter plug from a 7-day-old CMA petri dish culture to a previously sterilized 500-ml flask (237.5 g sand, 12.5 g cornmeal, 80 ml SDW), shaken over 9 days, and mixed with sterile soil at 1:3 (v:v). The inocula were transferred to a 500-ml pot, and 10 Aleppo pine seeds were planted with three replicates. After 1 month, all tested isolates caused typical symptoms on seedlings and the proportion of infected seedlings per each isolate was 50, 53.33, 56.66, 60, and 63.33%, respectively. There are many reports of F. acuminatum associated to conifer seedlings in nurseries (1,3) and most of them are conflicting because in some reports this species is considered non-pathogenic or only a seed contaminant and others consider it as a pathogen. To our knowledge, F. acuminatum is a first report on the Aleppo pine in northwestern Algeria, northern Africa. It is also the first report of this fungal species affecting the Aleppo pine throughout the world, and on conifers in Africa and the Mediterranean region. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory. ARS, USDA., Bestville, Maryland, USA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , June 18, 2012. (2) J. F. Leslie and B. A. Summerell. The Fusarium Laboratory Manual. Blackwell Publishing, Ames, Iowa, USA, 2006. (3) D. W. Minter. Cybertruffle's Robigalia, Observations of Fungi and their Associated Organisms. Retrieved from http://www.cybertruffle.org.uk/robigalia/eng/ , June 18, 2012.",2013-04-01 +22974169,Co-processed excipients: a patent review.,"The introduction of high speed tableting machines and the preference of direct compression as a method of tableting have increased the demands on the functionality of excipients mainly in terms of flowability and compressibility. Co-processed excipients, where in, excipients are combined by virtue of sub-particle level interaction have provided an attractive tool for developing high functionality excipients. The multifold advantages offered by co-processed excipients such as production of synergism in functionality of individual components, reduction of company's regulatory concern because of absence of chemical change during co-processing and improvement in physico-chemical properties have expanded their use in the pharmaceutical industry. In the recent years, there has been a spurt in the number of patents filed on co-processed excipients. Hence, the present review focuses on co-processed excipients and their application in pharmaceutical industry. The worldwide databases of European patent office (http://ep.espacenet.com) and United States patent office (www.uspto.gov) were employed to collect the patents and patent applications. The advantages, limitations, basis for the selection of excipients to be co-processed, methods of co-processing and regulatory perspective of co-processed excipients are also briefly discussed.",2013-04-01 +24220508,Position of UNC-13 in the active zone regulates synaptic vesicle release probability and release kinetics.,"The presynaptic active zone proteins UNC-13/Munc13s are essential for synaptic vesicle (SV) exocytosis by directly interacting with SV fusion apparatus. An open question is how their association with active zones, hence their position to Ca(2+) entry sites, regulates SV release. The N-termini of major UNC-13/Munc13 isoforms contain a non-calcium binding C2A domain that mediates protein homo- or hetero-meric interactions. Here, we show that the C2A domain of Caenorhabditis elegans UNC-13 regulates release probability of evoked release and its precise active zone localization. Kinetics analysis of SV release supports that the proximity of UNC-13 to Ca(2+) entry sites, mediated by the C2A-domain containing N-terminus, is critical for accelerating neurotransmitter release. Additionally, the C2A domain is specifically required for spontaneous release. These data reveal multiple roles of UNC-13 C2A domain, and suggest that spontaneous release and the fast phase of evoked release may involve a common pool of SVs at the active zone. DOI: http://dx.doi.org/10.7554/eLife.01180.001.",2013-11-12 +25476405,The sunflower downy mildew pathogen Plasmopara halstedii.,"Downy mildew of sunflower is caused by Plasmopara halstedii (Farlow) Berlese & de Toni. Plasmopara halstedii is an obligate biotrophic oomycete pathogen that attacks annual Helianthus species and cultivated sunflower, Helianthus annuus. Depending on the sunflower developmental stage at which infection occurs, the characteristic symptoms range from young seedling death, plant dwarfing, leaf bleaching and sporulation to the production of infertile flowers. Downy mildew attacks can have a great economic impact on sunflower crops, and several Pl resistance genes are present in cultivars to protect them against the disease. Nevertheless, some of these resistances have been overcome by the occurrence of novel isolates of the pathogen showing increased virulence. A better characterization of P. halstedii infection and dissemination mechanisms, and the identification of the molecular basis of the interaction with sunflower, is a prerequisite to efficiently fight this pathogen. This review summarizes what is currently known about P. halstedii, provides new insights into its infection cycle on resistant and susceptible sunflower lines using scanning electron and light microscopy imaging, and sheds light on the pathogenicity factors of P. halstedii obtained from recent molecular data.

Taxonomy

Kingdom Stramenopila; Phylum Oomycota; Class Oomycetes; Order Peronosporales; Family Peronosporaceae; Genus Plasmopara; Species Plasmopara halstedii.

Disease symptoms

Sunflower seedling damping off, dwarfing of the plant, bleaching of leaves, starting from veins, and visible white sporulation, initially on the lower side of cotyledons and leaves. Plasmopara halstedii infection may severely impact sunflower seed yield.

Infection process

In spring, germination of overwintered sexual oospores leads to sunflower root infection. Intercellular hyphae are responsible for systemic plant colonization and the induction of disease symptoms. Under humid and fresh conditions, dissemination structures are produced by the pathogen on all plant organs to release asexual zoosporangia. These zoosporangia play an important role in pathogen dissemination, as they release motile zoospores that are responsible for leaf infections on neighbouring plants.

Disease control

Disease control is obtained by both chemical seed treatment (mefenoxam) and the deployment of dominant major resistance genes, denoted Pl. However, the pathogen has developed fungicide resistance and has overcome some plant resistance genes. Research for more sustainable strategies based on the identification of the molecular basis of the interaction are in progress.

Useful websites

http://www.heliagene.org/HP, http://lipm-helianthus.toulouse.inra.fr/dokuwiki/doku.php?id=start, https://www.heliagene.org/PlasmoparaSpecies (soon available).",2014-12-04 +23351593,Informed consent in the era of biobanks.,"Biorepositories collecting human specimens and health information have proliferated in recent years. Efforts to set a range of policies related to biorepositories, including those related to procedures for obtaining informed consent and recontacting participants, have been hindered by a paucity of data on the diverse forms biorepositories take and the variety of institutional settings where they are established. A recent survey demonstrates in detail, for the first time, the diversity of biorepositories in the USA.

See research article

http://genomemedicine.com/content/5/1/3.",2013-01-25 +26353306,Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments.,"We introduce a new dataset, Human3.6M, of 3.6 Million accurate 3D Human poses, acquired by recording the performance of 5 female and 6 male subjects, under 4 different viewpoints, for training realistic human sensing systems and for evaluating the next generation of human pose estimation models and algorithms. Besides increasing the size of the datasets in the current state-of-the-art by several orders of magnitude, we also aim to complement such datasets with a diverse set of motions and poses encountered as part of typical human activities (taking photos, talking on the phone, posing, greeting, eating, etc.), with additional synchronized image, human motion capture, and time of flight (depth) data, and with accurate 3D body scans of all the subject actors involved. We also provide controlled mixed reality evaluation scenarios where 3D human models are animated using motion capture and inserted using correct 3D geometry, in complex real environments, viewed with moving cameras, and under occlusion. Finally, we provide a set of large-scale statistical models and detailed evaluation baselines for the dataset illustrating its diversity and the scope for improvement by future work in the research community. Our experiments show that our best large-scale model can leverage our full training set to obtain a 20% improvement in performance compared to a training set of the scale of the largest existing public dataset for this problem. Yet the potential for improvement by leveraging higher capacity, more complex models with our large dataset, is substantially vaster and should stimulate future research. The dataset together with code for the associated large-scale learning models, features, visualization tools, as well as the evaluation server, is available online at http://vision.imar.ro/human3.6m.",2014-07-01 +22355082,Allele-specific expression analysis methods for high-density SNP microarray data.,"

Motivation

In the past decade, a number of technologies to quantify allele-specific expression (ASE) in a genome-wide manner have become available to researchers. We investigate the application of single-nucleotide polymorphism (SNP) microarrays to this task, exploring data obtained from both cell lines and primary tissue for which both RNA and DNA profiles are available.

Results

We analyze data from two experiments that make use of high-density Illumina Infinium II genotyping arrays to measure ASE. We first preprocess each data set, which involves removal of outlier samples, careful normalization and a two-step filtering procedure to remove SNPs that show no evidence of expression in the samples being analyzed and calls that are clear genotyping errors. We then compare three different tests for detecting ASE, one of which has been previously published and two novel approaches. These tests vary at the level at which they operate (per SNP per individual or per SNP) and in the input data they require. Using SNPs from imprinted genes as true positives for ASE, we observe varying sensitivity for the different testing procedures that improves with increasing sample size. Methods that rely on RNA signal alone were found to perform best across a range of metrics. The top ranked SNPs recovered by all methods appear to be reasonable candidates for ASE.

Availability and implementation

Analysis was carried out in R (http://www.R-project.org/) using existing functions.",2012-02-21 +21892150,FaST linear mixed models for genome-wide association studies.,"We describe factored spectrally transformed linear mixed models (FaST-LMM), an algorithm for genome-wide association studies (GWAS) that scales linearly with cohort size in both run time and memory use. On Wellcome Trust data for 15,000 individuals, FaST-LMM ran an order of magnitude faster than current efficient algorithms. Our algorithm can analyze data for 120,000 individuals in just a few hours, whereas current algorithms fail on data for even 20,000 individuals (http://mscompbio.codeplex.com/).",2011-09-04 +24215029,HPC-CLUST: distributed hierarchical clustering for large sets of nucleotide sequences.,"

Motivation

Nucleotide sequence data are being produced at an ever increasing rate. Clustering such sequences by similarity is often an essential first step in their analysis-intended to reduce redundancy, define gene families or suggest taxonomic units. Exact clustering algorithms, such as hierarchical clustering, scale relatively poorly in terms of run time and memory usage, yet they are desirable because heuristic shortcuts taken during clustering might have unintended consequences in later analysis steps.

Results

Here we present HPC-CLUST, a highly optimized software pipeline that can cluster large numbers of pre-aligned DNA sequences by running on distributed computing hardware. It allocates both memory and computing resources efficiently, and can process more than a million sequences in a few hours on a small cluster.

Availability and implementation

Source code and binaries are freely available at http://meringlab.org/software/hpc-clust/; the pipeline is implemented in Cþþ and uses the Message Passing Interface (MPI) standard for distributed computing.",2013-11-09 +22139913,GeoPCA: a new tool for multivariate analysis of dihedral angles based on principal component geodesics.,"The GeoPCA package is the first tool developed for multivariate analysis of dihedral angles based on principal component geodesics. Principal component geodesic analysis provides a natural generalization of principal component analysis for data distributed in non-Euclidean space, as in the case of angular data. GeoPCA presents projection of angular data on a sphere composed of the first two principal component geodesics, allowing clustering based on dihedral angles as opposed to Cartesian coordinates. It also provides a measure of the similarity between input structures based on only dihedral angles, in analogy to the root-mean-square deviation of atoms based on Cartesian coordinates. The principal component geodesic approach is shown herein to reproduce clusters of nucleotides observed in an η-θ plot. GeoPCA can be accessed via http://pca.limlab.ibms.sinica.edu.tw.",2011-12-01 +23545399,Epidemiological and phylogenetic analysis of institutional mouse parvoviruses.,"Mouse parvoviruses (MPVs) are small, single-stranded, 5 kb DNA viruses that are subclinical and endemic in many laboratory mouse colonies. MPVs cause more distinctive deleterious effects in immune-compromised or genetically-engineered mice than immuno-competent mice. At the University of Louisville (U of L), there was an unexpected increase of MPV sero-positivity for MPV infections in mouse colonies between January 2006 and February 2007, resulting in strategic husbandry changes aimed at controlling MPV spread throughout the animal facility. To investigate these MPVs, VP2 genes of seven MPVs were cloned and sequenced from eight documented incidences by PCR technology. The mutations in these VP2 genes were compared to those found at the Genbank database (NCBI; http://www.ncbi.nlm.nih.gov) and an intra-institutional phylogenetic tree for MPV infections at U of L was constructed. We discovered that the seven MPV isolates were different from those in Genbank and were not identical to each other. These MPVs were designated MPV-UL1 to 7; none of them were minute virus of mice (MVMs). Four isolates could be classified as MPV1, one was classified as MPV2, and two were defined as novel types with less than 96% and 94% homology with existing MPV types. Considering that all seven isolates had mutations in their VP2 genes and no mutations were observed in VP2 genes of MPV during a four-month time period of incubation, we concluded that all seven MPVs isolated at U of L between 2006 and 2007 probably originated from different sources. Serological survey for MPV infections verified that each MPV outbreak was controlled without further contamination within the institution.",2013-03-29 +24794927,A fast and powerful tree-based association test for detecting complex joint effects in case-control studies.,"

Motivation

Multivariate tests derived from the logistic regression model are widely used to assess the joint effect of multiple predictors on a disease outcome in case-control studies. These tests become less optimal if the joint effect cannot be approximated adequately by the additive model. The tree-structure model is an attractive alternative, as it is more apt to capture non-additive effects. However, the tree model is used most commonly for prediction and seldom for hypothesis testing, mainly because of the computational burden associated with the resampling-based procedure required for estimating the significance level.

Results

We designed a fast algorithm for building the tree-structure model and proposed a robust TREe-based Association Test (TREAT) that incorporates an adaptive model selection procedure to identify the optimal tree model representing the joint effect. We applied TREAT as a multilocus association test on >20 000 genes/regions in a study of esophageal squamous cell carcinoma (ESCC) and detected a highly significant novel association between the gene CDKN2B and ESCC ([Formula: see text]). We also demonstrated, through simulation studies, the power advantage of TREAT over other commonly used tests.

Availability and implementation

 The package TREAT is freely available for download at http://www.hanzhang.name/softwares/treat, implemented in C++ and R and supported on 64-bit Linux and 64-bit MS Windows.

Contact

yuka@mail.nih.gov

Supplementary information

 Supplementary data are available at Bioinformatics online.",2014-04-09 +23825550,Complementing the Eukaryotic Protein Interactome.,"

Unlabelled

Protein interaction networks are important for the understanding of regulatory mechanisms, for the explanation of experimental data and for the prediction of protein functions. Unfortunately, most interaction data is available only for model organisms. As a possible remedy, the transfer of interactions to organisms of interest is common practice, but it is not clear when interactions can be transferred from one organism to another and, thus, the confidence in the derived interactions is low. Here, we propose to use a rich set of features to train Random Forests in order to score transferred interactions. We evaluated the transfer from a range of eukaryotic organisms to S. cerevisiae using orthologs. Directly transferred interactions to S. cerevisiae are on average only 24% consistent with the current S. cerevisiae interaction network. By using commonly applied filter approaches the transfer precision can be improved, but at the cost of a large decrease in the number of transferred interactions. Our Random Forest approach uses various features derived from both the target and the source network as well as the ortholog annotations to assign confidence values to transferred interactions. Thereby, we could increase the average transfer consistency to 85%, while still transferring almost 70% of all correctly transferable interactions. We tested our approach for the transfer of interactions to other species and showed that our approach outperforms competing methods for the transfer of interactions to species where no experimental knowledge is available. Finally, we applied our predictor to score transferred interactions to 83 targets species and we were able to extend the available interactome of B. taurus, M. musculus and G. gallus with over 40,000 interactions each. Our transferred interaction networks are publicly available via our web interface, which allows to inspect and download transferred interaction sets of different sizes, for various species, and at specified expected precision levels.

Availability

http://services.bio.ifi.lmu.de/coin-db/.",2013-06-18 +22039212,MissForest--non-parametric missing value imputation for mixed-type data.,"

Motivation

Modern data acquisition based on high-throughput technology is often facing the problem of missing data. Algorithms commonly used in the analysis of such large-scale data often depend on a complete set. Missing value imputation offers a solution to this problem. However, the majority of available imputation methods are restricted to one type of variable only: continuous or categorical. For mixed-type data, the different types are usually handled separately. Therefore, these methods ignore possible relations between variable types. We propose a non-parametric method which can cope with different types of variables simultaneously.

Results

We compare several state of the art methods for the imputation of missing values. We propose and evaluate an iterative imputation method (missForest) based on a random forest. By averaging over many unpruned classification or regression trees, random forest intrinsically constitutes a multiple imputation scheme. Using the built-in out-of-bag error estimates of random forest, we are able to estimate the imputation error without the need of a test set. Evaluation is performed on multiple datasets coming from a diverse selection of biological fields with artificially introduced missing values ranging from 10% to 30%. We show that missForest can successfully handle missing values, particularly in datasets including different types of variables. In our comparative study, missForest outperforms other methods of imputation especially in data settings where complex interactions and non-linear relations are suspected. The out-of-bag imputation error estimates of missForest prove to be adequate in all settings. Additionally, missForest exhibits attractive computational efficiency and can cope with high-dimensional data.

Availability

The package missForest is freely available from http://stat.ethz.ch/CRAN/.

Contact

stekhoven@stat.math.ethz.ch; buhlmann@stat.math.ethz.ch",2011-10-28 +21810899,SEED: efficient clustering of next-generation sequences.,"

Motivation

Similarity clustering of next-generation sequences (NGS) is an important computational problem to study the population sizes of DNA/RNA molecules and to reduce the redundancies in NGS data. Currently, most sequence clustering algorithms are limited by their speed and scalability, and thus cannot handle data with tens of millions of reads.

Results

Here, we introduce SEED-an efficient algorithm for clustering very large NGS sets. It joins sequences into clusters that can differ by up to three mismatches and three overhanging residues from their virtual center. It is based on a modified spaced seed method, called block spaced seeds. Its clustering component operates on the hash tables by first identifying virtual center sequences and then finding all their neighboring sequences that meet the similarity parameters. SEED can cluster 100 million short read sequences in <4 h with a linear time and memory performance. When using SEED as a preprocessing tool on genome/transcriptome assembly data, it was able to reduce the time and memory requirements of the Velvet/Oasis assembler for the datasets used in this study by 60-85% and 21-41%, respectively. In addition, the assemblies contained longer contigs than non-preprocessed data as indicated by 12-27% larger N50 values. Compared with other clustering tools, SEED showed the best performance in generating clusters of NGS data similar to true cluster results with a 2- to 10-fold better time performance. While most of SEED's utilities fall into the preprocessing area of NGS data, our tests also demonstrate its efficiency as stand-alone tool for discovering clusters of small RNA sequences in NGS data from unsequenced organisms.

Availability

The SEED software can be downloaded for free from this site: http://manuals.bioinformatics.ucr.edu/home/seed.

Contact

thomas.girke@ucr.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-02 +22492314,Eoulsan: a cloud computing-based framework facilitating high throughput sequencing analyses.,"

Unlabelled

We developed a modular and scalable framework called Eoulsan, based on the Hadoop implementation of the MapReduce algorithm dedicated to high-throughput sequencing data analysis. Eoulsan allows users to easily set up a cloud computing cluster and automate the analysis of several samples at once using various software solutions available. Our tests with Amazon Web Services demonstrated that the computation cost is linear with the number of instances booked as is the running time with the increasing amounts of data.

Availability and implementation

Eoulsan is implemented in Java, supported on Linux systems and distributed under the LGPL License at: http://transcriptome.ens.fr/eoulsan/",2012-04-05 +22645098,A reference microsatellite kit to assess for genetic diversity of Sorghum bicolor (Poaceae).,"

Premise of the study

Discrepancies in terms of genotyping data are frequently observed when comparing simple sequence repeat (SSR) data sets across genotyping technologies and laboratories. This technical concern introduces biases that hamper any synthetic studies or comparison of genetic diversity between collections. To prevent this for Sorghum bicolor, we developed a control kit of 48 SSR markers.

Methods and results

One hundred seventeen markers were selected along the genome to provide coverage across the length of all 10 sorghum linkage groups. They were tested for polymorphism and reproducibility across two laboratories (Centre de Cooperation Internationale en Recherche Agronomique pour le Developpement [CIRAD], France, and International Crops Research Institute for the Semi-Arid Tropics [ICRISAT], India) using two commonly used genotyping technologies (polyacrylamide gel-based technology with LI-COR sequencing machines and capillary systems with ABI sequencing apparatus) with DNA samples from a diverse set of 48 S. bicolor accessions.

Conclusions

A kit for diversity analysis (http://sat.cirad.fr/sat/sorghum_SSR_kit/) was developed. It contains information on 48 technically robust sorghum microsatellite markers and 10 DNA controls. It can further be used to calibrate sorghum SSR genotyping data acquired with different technologies and compare those to genetic diversity references.",2012-05-29 +23878283,Triple-negative and non-triple-negative invasive breast cancer: association between MR and fluorine 18 fluorodeoxyglucose PET imaging.,"

Purpose

To assess the relationship between parameters measured on dynamic contrast material-enhanced (DCE) magnetic resonance (MR) imaging and fluorine 18 fluorodeoxyglucose (FDG) positron emission tomography (PET)/computed tomography (CT) in primary invasive breast cancer.

Materials and methods

This HIPAA-compliant study was a retrospective review of medical records and therefore approved by the institutional review board without the requirement for informed consent. Patients with a diagnosis of invasive breast cancer from January 2005 through December 2009 who underwent both DCE MR imaging and FDG PET/CT before treatment initiation were retrospectively identified. Fractional volumes were measured for ranges of signal enhancement ratio (SER) values from DCE MR imaging data and compared with maximum standardized uptake values (SUVmax) from FDG PET/CT data. Linear regression analysis was performed to clarify the relationship between SER and SUVmax, adjusting for tumor size, pathologic grade, and receptor status.

Results

Analyzed were 117 invasive breast cancers in 117 patients. Overall, a higher percentage of high washout kinetics was positively associated with SUVmax (1.57% increase in SUVmax per 1% increase in high washout; P = .020), and a higher percentage of low plateau kinetics was negatively associated with SUVmax (1.19% decrease in SUVmax per 1% increase in low plateau; P = .003). These relationships were strongest among triple-negative (TN) tumors (4.34% increase in SUVmax per 1% increase in high washout and 2.65% decrease in SUVmax per 1% increase in low plateau; P = .018 and .004, respectively).

Conclusion

In invasive breast carcinoma, there is a positive relationship between the percentage of high washout and SUVmax and a negative relationship between the percentage of low plateau and SUVmax. These results are stronger in TN tumors.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.13130058/-/DC1.",2013-07-22 +21816082,rapmad: Robust analysis of peptide microarray data.,"

Background

Peptide microarrays offer an enormous potential as a screening tool for peptidomics experiments and have recently seen an increased field of application ranging from immunological studies to systems biology. By allowing the parallel analysis of thousands of peptides in a single run they are suitable for high-throughput settings. Since data characteristics of peptide microarrays differ from DNA oligonucleotide microarrays, computational methods need to be tailored to these specifications to allow a robust and automated data analysis. While follow-up experiments can ensure the specificity of results, sensitivity cannot be recovered in later steps. Providing sensitivity is thus a primary goal of data analysis procedures. To this end we created rapmad (Robust Alignment of Peptide MicroArray Data), a novel computational tool implemented in R.

Results

We evaluated rapmad in antibody reactivity experiments for several thousand peptide spots and compared it to two existing algorithms for the analysis of peptide microarrays. rapmad displays competitive and superior behavior to existing software solutions. Particularly, it shows substantially improved sensitivity for low intensity settings without sacrificing specificity. It thereby contributes to increasing the effectiveness of high throughput screening experiments.

Conclusions

rapmad allows the robust and sensitive, automated analysis of high-throughput peptide array data. The rapmad R-package as well as the data sets are available from http://www.tron-mz.de/compmed.",2011-08-04 +21777450,BioIMAX: a Web 2.0 approach for easy exploratory and collaborative access to multivariate bioimage data.,"

Background

Innovations in biological and biomedical imaging produce complex high-content and multivariate image data. For decision-making and generation of hypotheses, scientists need novel information technology tools that enable them to visually explore and analyze the data and to discuss and communicate results or findings with collaborating experts from various places.

Results

In this paper, we present a novel Web2.0 approach, BioIMAX, for the collaborative exploration and analysis of multivariate image data by combining the webs collaboration and distribution architecture with the interface interactivity and computation power of desktop applications, recently called rich internet application.

Conclusions

BioIMAX allows scientists to discuss and share data or results with collaborating experts and to visualize, annotate, and explore multivariate image data within one web-based platform from any location via a standard web browser requiring only a username and a password. BioIMAX can be accessed at http://ani.cebitec.uni-bielefeld.de/BioIMAX with the username ""test"" and the password ""test1"" for testing purposes.",2011-07-21 +22537039,Reconstructing cancer genomes from paired-end sequencing data.,"

Background

A cancer genome is derived from the germline genome through a series of somatic mutations. Somatic structural variants - including duplications, deletions, inversions, translocations, and other rearrangements - result in a cancer genome that is a scrambling of intervals, or ""blocks"" of the germline genome sequence. We present an efficient algorithm for reconstructing the block organization of a cancer genome from paired-end DNA sequencing data.

Results

By aligning paired reads from a cancer genome - and a matched germline genome, if available - to the human reference genome, we derive: (i) a partition of the reference genome into intervals; (ii) adjacencies between these intervals in the cancer genome; (iii) an estimated copy number for each interval. We formulate the Copy Number and Adjacency Genome Reconstruction Problem of determining the cancer genome as a sequence of the derived intervals that is consistent with the measured adjacencies and copy numbers. We design an efficient algorithm, called Paired-end Reconstruction of Genome Organization (PREGO), to solve this problem by reducing it to an optimization problem on an interval-adjacency graph constructed from the data. The solution to the optimization problem results in an Eulerian graph, containing an alternating Eulerian tour that corresponds to a cancer genome that is consistent with the sequencing data. We apply our algorithm to five ovarian cancer genomes that were sequenced as part of The Cancer Genome Atlas. We identify numerous rearrangements, or structural variants, in these genomes, analyze reciprocal vs. non-reciprocal rearrangements, and identify rearrangements consistent with known mechanisms of duplication such as tandem duplications and breakage/fusion/bridge (B/F/B) cycles.

Conclusions

We demonstrate that PREGO efficiently identifies complex and biologically relevant rearrangements in cancer genome sequencing data. An implementation of the PREGO algorithm is available at http://compbio.cs.brown.edu/software/.",2012-04-19 +22253293,SNP calling using genotype model selection on high-throughput sequencing data.,"

Motivation

A review of the available single nucleotide polymorphism (SNP) calling procedures for Illumina high-throughput sequencing (HTS) platform data reveals that most rely mainly on base-calling and mapping qualities as sources of error when calling SNPs. Thus, errors not involved in base-calling or alignment, such as those in genomic sample preparation, are not accounted for.

Results

A novel method of consensus and SNP calling, Genotype Model Selection (GeMS), is given which accounts for the errors that occur during the preparation of the genomic sample. Simulations and real data analyses indicate that GeMS has the best performance balance of sensitivity and positive predictive value among the tested SNP callers.

Availability

The GeMS package can be downloaded from https://sites.google.com/a/bioinformatics.ucr.edu/xinping-cui/home/software or http://computationalbioenergy.org/software.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-16 +22583024,TOPPAS: a graphical workflow editor for the analysis of high-throughput proteomics data.,"Mass spectrometry coupled to high-performance liquid chromatography (HPLC-MS) is evolving more quickly than ever. A wide range of different instrument types and experimental setups are commonly used. Modern instruments acquire huge amounts of data, thus requiring tools for an efficient and automated data analysis. Most existing software for analyzing HPLC-MS data is monolithic and tailored toward a specific application. A more flexible alternative consists of pipeline-based tool kits allowing the construction of custom analysis workflows from small building blocks, e.g., the Trans Proteomics Pipeline (TPP) or The OpenMS Proteomics Pipeline (TOPP). One drawback, however, is the hurdle of setting up complex workflows using command line tools. We present TOPPAS, The OpenMS Proteomics Pipeline ASsistant, a graphical user interface (GUI) for rapid composition of HPLC-MS analysis workflows. Workflow construction reduces to simple drag-and-drop of analysis tools and adding connections in between. Integration of external tools into these workflows is possible as well. Once workflows have been developed, they can be deployed in other workflow management systems or batch processing systems in a fully automated fashion. The implementation is portable and has been tested under Windows, Mac OS X, and Linux. TOPPAS is open-source software and available free of charge at http://www.OpenMS.de/TOPPAS .",2012-05-24 +22559164,GeSICA: genome segmentation from intra-chromosomal associations.,"

Background

Various aspects of genome organization have been explored based on data from distinct technologies, including histone modification ChIP-Seq, 3C, and its derivatives. Recently developed Hi-C techniques enable the genome wide mapping of DNA interactomes, thereby providing the opportunity to study genome organization in detail, but these methods also pose challenges in methodology development.

Results

We developed Genome Segmentation from Intra Chromosomal Associations, or GeSICA, to explore genome organization and applied the method to Hi-C data in human GM06990 and K562 cells. GeSICA calculates a simple logged ratio to efficiently segment the human genome into regions with two distinct states that correspond to rich and poor functional element states. Inside the rich regions, Markov Clustering was subsequently applied to segregate the regions into more detailed clusters. The binding sites of the insulator, cohesion, and transcription complexes are enriched in the boundaries between neighboring clusters, indicating that inferred clusters may have fine organizational features.

Conclusions

Our study presents a novel analysis method, known as GeSICA, which gives insight into genome organization based on Hi-C data. GeSICA is open source and freely available at: http://web.tongji.edu.cn/~zhanglab/GeSICA/",2012-05-04 +23527686,Systematic investigation of predicted effect of nonsynonymous SNPs in human prion protein gene: a molecular modeling and molecular dynamics study.,"Nonsynonymous mutations in the human prion protein (HuPrP) gene contribute to the conversion of HuPrP(C) to HuPrP(Sc) and amyloid formation which in turn leads to prion diseases such as familial Creutzfeldt-Jakob disease and Gerstmann-Straussler-Scheinker disease. In order to better understand and predict the role of HuPrP mutations, we developed the following procedure: first, we consulted the Human Genome Variation database and dbSNP databases, and we reviewed literature for the retrieval of aggregation-related nsSNPs of the HuPrP gene. Next, we used three different methods - Polymorphism Phenotyping (PolyPhen), PANTHER, and Auto-Mute - to predict the effect of nsSNPs on the phenotype. We compared the predictions against experimentally reported effects of these nsSNPs to evaluate the accuracy of the three methods: PolyPhen predicted 17 out of 22 nsSNPs as ""probably damaging"" or ""possibly damaging""; PANTHER predicted 8 out of 22 nsSNPs as ""Deleterious""; and Auto-Mute predicted 9 out of 20 nsSNPs as ""Disease"". Finally, structural analyses of the native protein against mutated models were investigated using molecular modeling and molecular dynamics (MD) simulation methods. In addition to comparing predictor methods, our results show the applicability of our procedure for the prediction of damaging nsSNPs. Our study also elucidates the obvious relationship between predicted values of aggregation-related nsSNPs in HuPrP gene and molecular modeling and MD simulations results. In conclusion, this procedure would enable researchers to select outstanding candidates for extensive MD simulations in order to decipher more details of HuPrP aggregation. An animated interactive 3D complement (I3DC) is available in Proteopedia at http://proteopedia.org/w/Journal:JBSD:34.",2013-03-25 +24970840,Induced lexico-syntactic patterns improve information extraction from online medical forums.,"

Objective

To reliably extract two entity types, symptoms and conditions (SCs), and drugs and treatments (DTs), from patient-authored text (PAT) by learning lexico-syntactic patterns from data annotated with seed dictionaries.

Background and significance

Despite the increasing quantity of PAT (eg, online discussion threads), tools for identifying medical entities in PAT are limited. When applied to PAT, existing tools either fail to identify specific entity types or perform poorly. Identification of SC and DT terms in PAT would enable exploration of efficacy and side effects for not only pharmaceutical drugs, but also for home remedies and components of daily care.

Materials and methods

We use SC and DT term dictionaries compiled from online sources to label several discussion forums from MedHelp (http://www.medhelp.org). We then iteratively induce lexico-syntactic patterns corresponding strongly to each entity type to extract new SC and DT terms.

Results

Our system is able to extract symptom descriptions and treatments absent from our original dictionaries, such as 'LADA', 'stabbing pain', and 'cinnamon pills'. Our system extracts DT terms with 58-70% F1 score and SC terms with 66-76% F1 score on two forums from MedHelp. We show improvements over MetaMap, OBA, a conditional random field-based classifier, and a previous pattern learning approach.

Conclusions

Our entity extractor based on lexico-syntactic patterns is a successful and preferable technique for identifying specific entity types in PAT. To the best of our knowledge, this is the first paper to extract SC and DT entities from PAT. We exhibit learning of informal terms often used in PAT but missing from typical dictionaries.",2014-06-26 +23522376,LASAGNA: a novel algorithm for transcription factor binding site alignment.,"

Background

Scientists routinely scan DNA sequences for transcription factor (TF) binding sites (TFBSs). Most of the available tools rely on position-specific scoring matrices (PSSMs) constructed from aligned binding sites. Because of the resolutions of assays used to obtain TFBSs, databases such as TRANSFAC, ORegAnno and PAZAR store unaligned variable-length DNA segments containing binding sites of a TF. These DNA segments need to be aligned to build a PSSM. While the TRANSFAC database provides scoring matrices for TFs, nearly 78% of the TFs in the public release do not have matrices available. As work on TFBS alignment algorithms has been limited, it is highly desirable to have an alignment algorithm tailored to TFBSs.

Results

We designed a novel algorithm named LASAGNA, which is aware of the lengths of input TFBSs and utilizes position dependence. Results on 189 TFs of 5 species in the TRANSFAC database showed that our method significantly outperformed ClustalW2 and MEME. We further compared a PSSM method dependent on LASAGNA to an alignment-free TFBS search method. Results on 89 TFs whose binding sites can be located in genomes showed that our method is significantly more precise at fixed recall rates. Finally, we described LASAGNA-ChIP, a more sophisticated version for ChIP (Chromatin immunoprecipitation) experiments. Under the one-per-sequence model, it showed comparable performance with MEME in discovering motifs in ChIP-seq peak sequences.

Conclusions

We conclude that the LASAGNA algorithm is simple and effective in aligning variable-length binding sites. It has been integrated into a user-friendly webtool for TFBS search and visualization called LASAGNA-Search. The tool currently stores precomputed PSSM models for 189 TFs and 133 TFs built from TFBSs in the TRANSFAC Public database (release 7.0) and the ORegAnno database (08Nov10 dump), respectively. The webtool is available at http://biogrid.engr.uconn.edu/lasagna_search/.",2013-03-24 +22974120,De novo assembly of highly diverse viral populations.,"

Background

Extensive genetic diversity in viral populations within infected hosts and the divergence of variants from existing reference genomes impede the analysis of deep viral sequencing data. A de novo population consensus assembly is valuable both as a single linear representation of the population and as a backbone on which intra-host variants can be accurately mapped. The availability of consensus assemblies and robustly mapped variants are crucial to the genetic study of viral disease progression, transmission dynamics, and viral evolution. Existing de novo assembly techniques fail to robustly assemble ultra-deep sequence data from genetically heterogeneous populations such as viruses into full-length genomes due to the presence of extensive genetic variability, contaminants, and variable sequence coverage.

Results

We present VICUNA, a de novo assembly algorithm suitable for generating consensus assemblies from genetically heterogeneous populations. We demonstrate its effectiveness on Dengue, Human Immunodeficiency and West Nile viral populations, representing a range of intra-host diversity. Compared to state-of-the-art assemblers designed for haploid or diploid systems, VICUNA recovers full-length consensus and captures insertion/deletion polymorphisms in diverse samples. Final assemblies maintain a high base calling accuracy. VICUNA program is publicly available at: http://www.broadinstitute.org/scientific-community/science/projects/viral-genomics/ viral-genomics-analysis-software.

Conclusions

We developed VICUNA, a publicly available software tool, that enables consensus assembly of ultra-deep sequence derived from diverse viral populations. While VICUNA was developed for the analysis of viral populations, its application to other heterogeneous sequence data sets such as metagenomic or tumor cell population samples may prove beneficial in these fields of research.",2012-09-13 +24060135,A user's guide to quantitative and comparative analysis of metagenomic datasets.,"Metagenomics has revolutionized microbiological studies during the past decade and provided new insights into the diversity, dynamics, and metabolic potential of natural microbial communities. However, metagenomics still represents a field in development, and standardized tools and approaches to handle and compare metagenomes have not been established yet. An important reason accounting for the latter is the continuous changes in the type of sequencing data available, for example, long versus short sequencing reads. Here, we provide a guide to bioinformatic pipelines developed to accomplish the following tasks, focusing primarily on those developed by our team: (i) assemble a metagenomic dataset; (ii) determine the level of sequence coverage obtained and the amount of sequencing required to obtain complete coverage; (iii) identify the taxonomic affiliation of a metagenomic read or assembled contig; and (iv) determine differentially abundant genes, pathways, and species between different datasets. Most of these pipelines do not depend on the type of sequences available or can be easily adjusted to fit different types of sequences, and are freely available (for instance, through our lab Web site: http://www.enve-omics.gatech.edu/). The limitations of current approaches, as well as the computational aspects that can be further improved, will also be briefly discussed. The work presented here provides practical guidelines on how to perform metagenomic analysis of microbial communities characterized by varied levels of diversity and establishes approaches to handle the resulting data, independent of the sequencing platform employed.",2013-01-01 +22563066,Identifying differentially expressed transcripts from RNA-seq data with biological variation.,"

Motivation

High-throughput sequencing enables expression analysis at the level of individual transcripts. The analysis of transcriptome expression levels and differential expression (DE) estimation requires a probabilistic approach to properly account for ambiguity caused by shared exons and finite read sampling as well as the intrinsic biological variance of transcript expression.

Results

We present Bayesian inference of transcripts from sequencing data (BitSeq), a Bayesian approach for estimation of transcript expression level from RNA-seq experiments. Inferred relative expression is represented by Markov chain Monte Carlo samples from the posterior probability distribution of a generative model of the read data. We propose a novel method for DE analysis across replicates which propagates uncertainty from the sample-level model while modelling biological variance using an expression-level-dependent prior. We demonstrate the advantages of our method using simulated data as well as an RNA-seq dataset with technical and biological replication for both studied conditions.

Availability

The implementation of the transcriptome expression estimation and differential expression analysis, BitSeq, has been written in C++ and Python. The software is available online from http://code.google.com/p/bitseq/, version 0.4 was used for generating results presented in this article.",2012-05-03 +23407358,GROMACS 4.5: a high-throughput and highly parallel open source molecular simulation toolkit.,"

Motivation

Molecular simulation has historically been a low-throughput technique, but faster computers and increasing amounts of genomic and structural data are changing this by enabling large-scale automated simulation of, for instance, many conformers or mutants of biomolecules with or without a range of ligands. At the same time, advances in performance and scaling now make it possible to model complex biomolecular interaction and function in a manner directly testable by experiment. These applications share a need for fast and efficient software that can be deployed on massive scale in clusters, web servers, distributed computing or cloud resources.

Results

Here, we present a range of new simulation algorithms and features developed during the past 4 years, leading up to the GROMACS 4.5 software package. The software now automatically handles wide classes of biomolecules, such as proteins, nucleic acids and lipids, and comes with all commonly used force fields for these molecules built-in. GROMACS supports several implicit solvent models, as well as new free-energy algorithms, and the software now uses multithreading for efficient parallelization even on low-end systems, including windows-based workstations. Together with hand-tuned assembly kernels and state-of-the-art parallelization, this provides extremely high performance and cost efficiency for high-throughput as well as massively parallel simulations.

Availability

GROMACS is an open source and free software available from http://www.gromacs.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-13 +22563065,BRAT-BW: efficient and accurate mapping of bisulfite-treated reads.,"

Summary

We introduce BRAT-BW, a fast, accurate and memory-efficient tool that maps bisulfite-treated short reads (BS-seq) to a reference genome using the FM-index (Burrows-Wheeler transform). BRAT-BW is significantly more memory efficient and faster on longer reads than current state-of-the-art tools for BS-seq data, without compromising on accuracy. BRAT-BW is a part of a software suite for genome-wide single base-resolution methylation data analysis that supports single and paired-end reads and includes a tool for estimation of methylation level at each cytosine.

Availability

The software is available in the public domain at http://compbio.cs.ucr.edu/brat/.",2012-05-03 +25204236,The 5th National Audit Project (NAP5) on accidental awareness during general anaesthesia: summary of main findings and risk factors.,"We present the main findings of the 5th National Audit Project on accidental awareness during general anaesthesia. Incidences were estimated using reports of accidental awareness as the numerator, and a parallel national anaesthetic activity survey to provide denominator data. The incidence of certain/probable and possible accidental awareness cases was ~1:19 600 anaesthetics (95% CI 1:16 700-23 450). However, there was considerable variation across subtypes of techniques or subspecialties. The incidence with neuromuscular blockade was ~1:8200 (1:7030-9700), and without it was ~1:135 900 (1:78 600-299 000). The cases of accidental awareness during general anaesthesia reported to 5th National Audit Project were overwhelmingly cases of unintended awareness during neuromuscular blockade. The incidence of accidental awareness during caesarean section was ~1:670 (1:380-1300). Two thirds (82, 66%) of cases of accidental awareness experiences arose in the dynamic phases of anaesthesia, namely induction of and emergence from anaesthesia. During induction of anaesthesia, contributory factors included: use of thiopental; rapid sequence induction; obesity; difficult airway management; neuromuscular blockade; and interruptions of anaesthetic delivery during movement from anaesthetic room to theatre. During emergence from anaesthesia, residual paralysis was perceived by patients as accidental awareness, and commonly related to a failure to ensure full return of motor capacity. One third (43, 33%) of accidental awareness events arose during the maintenance phase of anaesthesia, most due to problems at induction or towards the end of anaesthesia. Factors increasing the risk of accidental awareness included: female sex; age (younger adults, but not children); obesity; anaesthetist seniority (junior trainees); previous awareness; out-of-hours operating; emergencies; type of surgery (obstetric, cardiac, thoracic); and use of neuromuscular blockade. The following factors were not risk factors for accidental awareness: ASA physical status; race; and use or omission of nitrous oxide. We recommend that an anaesthetic checklist, to be an integral part of the World Health Organization Safer Surgery checklist, is introduced as an aid to preventing accidental awareness. This paper is a shortened version describing the main findings from 5th National Audit Project - the full report can be found at http://www.nationalauditprojects.org.uk/NAP5_home#pt.",2014-10-01 +23531354,Patchwork: allele-specific copy number analysis of whole-genome sequenced tumor tissue.,"Whole-genome sequencing of tumor tissue has the potential to provide comprehensive characterization of genomic alterations in tumor samples. We present Patchwork, a new bioinformatic tool for allele-specific copy number analysis using whole-genome sequencing data. Patchwork can be used to determine the copy number of homologous sequences throughout the genome, even in aneuploid samples with moderate sequence coverage and tumor cell content. No prior knowledge of average ploidy or tumor cell content is required. Patchwork is freely available as an R package, installable via R-Forge (http://patchwork.r-forge.r-project.org/).",2013-03-25 +22308096,xQTL workbench: a scalable web environment for multi-level QTL analysis.,"

Summary

xQTL workbench is a scalable web platform for the mapping of quantitative trait loci (QTLs) at multiple levels: for example gene expression (eQTL), protein abundance (pQTL), metabolite abundance (mQTL) and phenotype (phQTL) data. Popular QTL mapping methods for model organism and human populations are accessible via the web user interface. Large calculations scale easily on to multi-core computers, clusters and Cloud. All data involved can be uploaded and queried online: markers, genotypes, microarrays, NGS, LC-MS, GC-MS, NMR, etc. When new data types come available, xQTL workbench is quickly customized using the Molgenis software generator.

Availability

xQTL workbench runs on all common platforms, including Linux, Mac OS X and Windows. An online demo system, installation guide, tutorials, software and source code are available under the LGPL3 license from http://www.xqtl.org.

Contact

m.a.swertz@rug.nl.",2012-02-03 +24456584,Expression of glucosylceramide synthase in invasive ductal breast cancer may be correlated with high estrogen receptor status and low HER-2 status.,"

Background and objectives

Breast cancer is one of the most common causes of cancer-related deaths in women worldwide. Studies on glucosylceramide synthase (GCS) activity suggest that this enzyme has a role in the development of multidrug resistance in many cancer cells. However, few studies have shown the expression of GCS in invasive ductal breast cancer and breast intraductal proliferative lesions.

Methods

In total, 196 samples from patients with invasive ductal breast cancer and 61 samples of breast intraductal proliferative lesions were collected. Immunohistochemical analyses were conducted to determine the expression of GCS and other related proteins.

Results

Expression of GCS was high in estrogen receptor (ER)-positive and HER-2 negative samples. In contrast, the expression of GCS in invasive ductal cancer was significantly lower than that in intraductal proliferative lesions.

Conclusion

Our data demonstrates a correlation between the expression of the GCS protein and ER-positive/HER-2 negative breast cancer. Furthermore, in contrast to previous reports, the expression of GCS protein was shown to be much higher in ductal carcinoma in-situ than that in invasive ductal cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1559854430111589.",2014-01-23 +23517090,Application of text-mining for updating protein post-translational modification annotation in UniProtKB.,"

Background

The annotation of protein post-translational modifications (PTMs) is an important task of UniProtKB curators and, with continuing improvements in experimental methodology, an ever greater number of articles are being published on this topic. To help curators cope with this growing body of information we have developed a system which extracts information from the scientific literature for the most frequently annotated PTMs in UniProtKB.

Results

The procedure uses a pattern-matching and rule-based approach to extract sentences with information on the type and site of modification. A ranked list of protein candidates for the modification is also provided. For PTM extraction, precision varies from 57% to 94%, and recall from 75% to 95%, according to the type of modification. The procedure was used to track new publications on PTMs and to recover potential supporting evidence for phosphorylation sites annotated based on the results of large scale proteomics experiments.

Conclusions

The information retrieval and extraction method we have developed in this study forms the basis of a simple tool for the manual curation of protein post-translational modifications in UniProtKB/Swiss-Prot. Our work demonstrates that even simple text-mining tools can be effectively adapted for database curation tasks, providing that a thorough understanding of the working process and requirements are first obtained. This system can be accessed at http://eagl.unige.ch/PTM/.",2013-03-22 +25646324,"A high whey protein-, leucine-, and vitamin D-enriched supplement preserves muscle mass during intentional weight loss in obese older adults: a double-blind randomized controlled trial.","

Background

Intentional weight loss in obese older adults is a risk factor for muscle loss and sarcopenia.

Objective

The objective was to examine the effect of a high whey protein-, leucine-, and vitamin D-enriched supplement on muscle mass preservation during intentional weight loss in obese older adults.

Design

We included 80 obese older adults in a double-blind randomized controlled trial. During a 13-wk weight loss program, all subjects followed a hypocaloric diet (-600 kcal/d) and performed resistance training 3×/wk. Subjects were randomly allocated to a high whey protein-, leucine-, and vitamin D-enriched supplement including a mix of other macro- and micronutrients (150 kcal, 21 g protein; 10×/wk, intervention group) or an isocaloric control. The primary outcome was change in appendicular muscle mass. The secondary outcomes were body composition, handgrip strength, and physical performance. Data were analyzed by using ANCOVA and mixed linear models with sex and baseline value as covariates.

Results

At baseline, mean ± SD age was 63 ± 5.6 y, and body mass index (in kg/m(2)) was 33 ± 4.4. During the trial, protein intake was 1.11 ± 0.28 g · kg body weight(-1) · d(-1) in the intervention group compared with 0.85 ± 0.24 g · kg body weight(-1) · d(-1) in the control group (P < 0.001). Both intervention and control groups decreased in body weight (-3.4 ± 3.6 kg and -2.8 ± 2.8 kg; both P < 0.001) and fat mass (-3.2 ± 3.1 kg and -2.5 ± 2.4 kg; both P < 0.001), with no differences between groups. The 13-wk change in appendicular muscle mass, however, was different in the intervention and control groups [+0.4 ± 1.2 kg and -0.5 ± 2.1 kg, respectively; β = 0.95 kg (95% CI: 0.09, 1.81); P = 0.03]. Muscle strength and function improved over time without significant differences between groups.

Conclusion

A high whey protein-, leucine-, and vitamin D-enriched supplement compared with isocaloric control preserves appendicular muscle mass in obese older adults during a hypocaloric diet and resistance exercise program and might therefore reduce the risk of sarcopenia. This trial was registered at the Dutch Trial Register (http://www.trialregister.nl) as NTR2751.",2014-11-26 +23514094,Expansion of the enzymatic repertoire of the CAZy database to integrate auxiliary redox enzymes.,"

Background

Since its inception, the carbohydrate-active enzymes database (CAZy; http://www.cazy.org) has described the families of enzymes that cleave or build complex carbohydrates, namely the glycoside hydrolases (GH), the polysaccharide lyases (PL), the carbohydrate esterases (CE), the glycosyltransferases (GT) and their appended non-catalytic carbohydrate-binding modules (CBM). The recent discovery that members of families CBM33 and family GH61 are in fact lytic polysaccharide monooxygenases (LPMO), demands a reclassification of these families into a suitable category.

Results

Because lignin is invariably found together with polysaccharides in the plant cell wall and because lignin fragments are likely to act in concert with (LPMO), we have decided to join the families of lignin degradation enzymes to the LPMO families and launch a new CAZy class that we name ""Auxiliary Activities"" in order to accommodate a range of enzyme mechanisms and substrates related to lignocellulose conversion. Comparative analyses of these auxiliary activities in 41 fungal genomes reveal a pertinent division of several fungal groups and subgroups combining their phylogenetic origin and their nutritional mode (white vs. brown rot).

Conclusions

The new class introduced in the CAZy database extends the traditional CAZy families, and provides a better coverage of the full extent of the lignocellulose breakdown machinery.",2013-03-21 +22641453,CORAL: QSPR modeling of rate constants of reactions between organic aromatic pollutants and hydroxyl radical.,"The rate constants (K(OH)) of reactions between 78 organic aromatic pollutants and hydroxyl radical were examined. Simplified molecular input line entry system was used as representation of the molecular structure of the pollutants. Quantitative structure-property relationships was developed using CORAL software (http://www.insilico.eu/CORAL) for four random splits of the data into the subtraining, calibration, and test sets. The obtained results reveal good predictive potential of the applied approach: correlation coefficients (r(2)) for the test sets of the four random splits are 0.75, 0.91, 0.84, and 0.80. Using the Monte Carlo method CORAL software generated the optimal descriptors for one-variable models. The reproducibility of each model was tested performing three runs of the Monte Carlo optimization. The current data were compared to previous results and discussed.",2012-05-28 +24292360,RNASurface: fast and accurate detection of locally optimal potentially structured RNA segments.,"

Motivation

During the past decade, new classes of non-coding RNAs (ncRNAs) and their unexpected functions were discovered. Stable secondary structure is the key feature of many non-coding RNAs. Taking into account huge amounts of genomic data, development of computational methods to survey genomes for structured RNAs remains an actual problem, especially when homologous sequences are not available for comparative analysis. Existing programs scan genomes with a fixed window by efficiently constructing a matrix of RNA minimum free energies. A wide range of lengths of structured RNAs necessitates the use of many different window lengths that substantially increases the output size and computational efforts.

Results

In this article, we present an algorithm RNASurface to efficiently scan genomes by constructing a matrix of significance of RNA secondary structures and to identify all locally optimal structured RNA segments up to a predefined size. RNASurface significantly improves precision of identification of known ncRNA in Bacillus subtilis.

Availability and implementation

RNASurface C source code is available from http://bioinf.fbb.msu.ru/RNASurface/downloads.html.",2013-11-28 +22809306,Generation of synthetic data and experimental designs in evaluating interactions for association studies.,"Complex diseases, by definition, involve multiple factors, including gene-gene interactions and gene-environment interactions. Researchers commonly rely on simulated data to evaluate their approaches for detecting high-order interactions in disease gene mapping. A publicly available simulation program to generate samples involving complex genetic and environmental interactions is of great interest to the community. We have developed a software package named gs1.0, which has been widely used since its publication. In this article, we present an upgraded version gs2.0, which not only inherits its capacity to generate realistic genotype data but also provides great functionality and flexibility to simulate various interaction models. In addition to a standalone version, a user-friendly web server (http://cbc.case.edu/gs) has been set up to help users to build complex interaction models. Furthermore, by utilizing three three-locus models as an example, we have shown how realistic model parameters can be chosen in generating simulated data.",2012-02-01 +23628380,EMu: probabilistic inference of mutational processes and their localization in the cancer genome.,"The spectrum of mutations discovered in cancer genomes can be explained by the activity of a few elementary mutational processes. We present a novel probabilistic method, EMu, to infer the mutational signatures of these processes from a collection of sequenced tumors. EMu naturally incorporates the tumor-specific opportunity for different mutation types according to sequence composition. Applying EMu to breast cancer data, we derive detailed maps of the activity of each process, both genome-wide and within specific local regions of the genome. Our work provides new opportunities to study the mutational processes underlying cancer development. EMu is available at http://www.sanger.ac.uk/resources/software/emu/.",2013-04-29 +22536855,PROSO II--a new method for protein solubility prediction.,"Many fields of science and industry depend on efficient production of active protein using heterologous expression in Escherichia coli. The solubility of proteins upon expression is dependent on their amino acid sequence. Prediction of solubility from sequence is therefore highly valuable. We present a novel machine-learning-based model called PROSO II which makes use of new classification methods and growth in experimental data to improve coverage and accuracy of solubility predictions. The classification algorithm is organized as a two-layered structure in which the output of a primary Parzen window model for sequence similarity and a logistic regression classifier of amino acid k-mer composition serve as input for a second-level logistic regression classifier. Compared with previously published research our model is trained on five times more data than used by any other method before (82 000 proteins). When tested on a separate holdout set not used at any point of method development our server attained the best results in comparison with other currently available methods: accuracy 75.4%, Matthew's correlation coefficient 0.39, sensitivity 0.731, specificity 0.759, gain (soluble) 2.263. In summary, due to utilization of cutting edge machine learning technologies combined with the largest currently available experimental data set the PROSO II server constitutes a substantial improvement in protein solubility predictions. PROSO II is available at http://mips.helmholtz-muenchen.de/prosoII.",2012-05-21 +24009897,EVpedia: an integrated database of high-throughput data for systemic analyses of extracellular vesicles. ,"Secretion of extracellular vesicles is a general cellular activity that spans the range from simple unicellular organisms (e.g. archaea; Gram-positive and Gram-negative bacteria) to complex multicellular ones, suggesting that this extracellular vesicle-mediated communication is evolutionarily conserved. Extracellular vesicles are spherical bilayered proteolipids with a mean diameter of 20-1,000 nm, which are known to contain various bioactive molecules including proteins, lipids, and nucleic acids. Here, we present EVpedia, which is an integrated database of high-throughput datasets from prokaryotic and eukaryotic extracellular vesicles. EVpedia provides high-throughput datasets of vesicular components (proteins, mRNAs, miRNAs, and lipids) present on prokaryotic, non-mammalian eukaryotic, and mammalian extracellular vesicles. In addition, EVpedia also provides an array of tools, such as the search and browse of vesicular components, Gene Ontology enrichment analysis, network analysis of vesicular proteins and mRNAs, and a comparison of vesicular datasets by ortholog identification. Moreover, publications on extracellular vesicle studies are listed in the database. This free web-based database of EVpedia (http://evpedia.info) might serve as a fundamental repository to stimulate the advancement of extracellular vesicle studies and to elucidate the novel functions of these complex extracellular organelles.",2013-03-19 +23510227,Saliva as a potential tool for cystic fibrosis diagnosis.,"

Background

Saliva and sweat are modified by cystic fibrosis (CF). In both cases the chloride and sodium ion concentrations for healthy subjects and CF patients differ, this representing a possible alternative tool for CF diagnosis. In this context, the aim of this study was to compare the concentrations of these ions in saliva samples taken from CF patients and healthy subjects.

Methods

A case-control study was carried out at a university CF center, in which the saliva samples were analyzed on an ABL 835 Radiometer® to determine the ion concentration.

Results

For the CF patients (n = 80) the values for the biochemical parameters of chloride, potassium and sodium ion concentration were higher (p < 0.009) and the volume and pH of the saliva were lower than in the case of healthy subjects (p < 0.009). For the healthy subjects group (n = 84) versus CF patients, according to the ROC curve, the values for sodium were: cutoff: 13.5 mmol/L, sensitivity: 73.4%, specificity: 70.6%; and for chloride: cutoff: 20 mmol/L, sensitivity: 68.1%, specificity: 72.9%.

Conclusions

The chloride and sodium concentrations in the saliva samples were higher for CF patients in comparison with healthy subjects. Thus, saliva as a tool for CF diagnosis can be considered a new challenge, and a population study including patients in all age classes needs to be performed, in different countries over the world, to extend the database to include a broad spectrum of information in order to identify normal ion concentration ranges for CF patients according to age, genotype and environment.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2614233148750145.",2013-03-19 +23740747,Pathway Processor 2.0: a web resource for pathway-based analysis of high-throughput data.,"

Summary

Pathway Processor 2.0 is a web application designed to analyze high-throughput datasets, including but not limited to microarray and next-generation sequencing, using a pathway centric logic. In addition to well-established methods such as the Fisher's test and impact analysis, Pathway Processor 2.0 offers innovative methods that convert gene expression into pathway expression, leading to the identification of differentially regulated pathways in a dataset of choice.

Availability and implementation

Pathway Processor 2.0 is available as a web service at http://compbiotoolbox.fmach.it/pathwayProcessor/. Sample datasets to test the functionality can be used directly from the application.

Contact

duccio.cavalieri@fmach.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-06-05 +24946687,Silencing BMP-2 expression inhibits A549 and H460 cell proliferation and migration.,"

Background

Bone morphogenetic protein 2 (BMP-2) is a member of the TGF-β superfamily that is closely correlated with many malignancies, particularly lung cancer. However, the effects of silenced BMP-2 on lung cancer cell proliferation and migration are not clear.

Methods

Using quantitative real-time RT-PCR, BMP-2 mRNA expression was detected in 61 non-small cell lung cancer (NSCLC) samples. Survival curves were generated using follow-up data. Relationships between clinical or pathological characteristics and prognosis were analyzed. Cell viability assays and transwell migration assays were used to evaluate the effects of BMP-2 silencing on cell proliferation and migration of A549 and H460 cells.

Results

BMP-2 mRNA expression was higher in NSCLC tissues compared to matched adjacent normal tissues (P<0.01). High BMP-2 expression levels were significantly associated with the occurrence of lymph node metastases and tumor stage (P<0.05). There were significant differences in survival curves between groups with metastatic lymph nodes and non-metastatic lymph nodes, as well as between groups with low BMP-2 expression and groups with high BMP-2 expression. In addition, we observed decreased proliferation and migration rates of the NSCLC-derived cell lines A549 and H460 that were transfected with siBMP-2 (P<0.05).

Conclusion

BMP-2 mRNA is overexpressed in NSCLC samples and is a risk factor for survival in patients with NSCLC. BMP-2 silencing can significantly inhibit A549 and H460 cell proliferation and migration.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/4263254471298866.",2014-06-19 +23589683,Knowledge discovery in variant databases using inductive logic programming.,"Understanding the effects of genetic variation on the phenotype of an individual is a major goal of biomedical research, especially for the development of diagnostics and effective therapeutic solutions. In this work, we describe the use of a recent knowledge discovery from database (KDD) approach using inductive logic programming (ILP) to automatically extract knowledge about human monogenic diseases. We extracted background knowledge from MSV3d, a database of all human missense variants mapped to 3D protein structure. In this study, we identified 8,117 mutations in 805 proteins with known three-dimensional structures that were known to be involved in human monogenic disease. Our results help to improve our understanding of the relationships between structural, functional or evolutionary features and deleterious mutations. Our inferred rules can also be applied to predict the impact of any single amino acid replacement on the function of a protein. The interpretable rules are available at http://decrypthon.igbmc.fr/kd4v/.",2013-03-18 +24885616,Insulin therapy and colorectal cancer risk among type 2 diabetes mellitus patients: a systemic review and meta-analysis.,"

Background

Insulin is widely used in patients with type 2 diabetes mellitus (T2DM). More attention was focused on its higher risk of colorectal cancer (CRC). This meta-analysis examined the relationship between levels of insulin use and the risk of CRC.

Methods

A meta-analysis using data from 12 published epidemiologic studies (7 case-control, and 5 cohort studies) published before Jan. 2014 was done to examine the association between insulin use and CRC. Random effects analyses were done to calculate relative risk (RR) and 95% confidence intervals (CI). Heterogeneity among studies was measured by the χ2 and I2 statistic.

Results

Overall, the risk of CRC was significantly associated with insulin use to a random-effects model (RR, 1.69; 95% CI, 1.25 -2.27). When subgroup analyses were conducted according to the study types, no associations were detected in cohort group (RR, 1.25; 95% CI, 0.95-1.65; I2, 75.7%); however significant association was detected in case-control group (RR, 2.15; 95% CI, 1.41-3.26; I2, 89.1%).

Conclusions

A significant harmful effect of insulin, observed mainly among case-control studies, may result from study design differences and amount of included studies. Although these results suggest a harmful effect of insulin use for CRC risk, additional large studies are warranted to support these preliminary evidences.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2194715731194123.",2014-05-12 +24821260,Transatlantic peer-to-peer learning: an initial feasibility analysis.,"

Introduction

Peer-to-peer learning is a well-established learning modality, which has been shown to improve learning outcomes, with positive implications for clinical practice. The purpose of this pilot study was to explore the feasibility of linking students from North America and Europe with a peer-to-peer learning approach.

Methods

Face and content validity studies were completed on the previously designed and validated online repository http://www.pilgrimshospital.com. Four medical students from the University of Toronto, Canada, were paired with four students from University College Cork, Ireland. Each student was invited to upload two pieces of information learned from a senior colleague that day. Each student was asked to review the information uploaded by their partner, editing with references if needed. Quantitative and qualitative evaluations of the e-peer system were conducted.

Results

Over the study period, the system recorded a total of 10 079 individual page views. Questionnaires completed by participants demonstrated that 6/8 found the system either ""very easy"" or ""easy"" to use, whereas all found that the system promoted evidenced-based and self-directed learning. Structured interviews revealed 3 main themes: The Peer Connection, Trust in Data Veracity, and Aid to Clinical Learning.

Conclusion

This pilot study demonstrates it is feasible to link students from separate continents in a community of peer-to-peer learning. This is viewed positively by students and enhances evidenced-based learning, and the aspect of peer connectivity was important to participating students. Such an approach encourages peer cooperation and has the potential to disseminate key clinical learning experiences widely.",2014-05-12 +24303318,Platform for Personalized Oncology: Integrative analyses reveal novel molecular signatures associated with colorectal cancer relapse.,"Approximately 80% of Stage II colon cancer patients are cured by appropriate surgery. However, 20% relapse, and virtually all of these people will die due to metastatic disease. Adjuvant chemotherapy has little or no impact on relapse or survival in Stage II colon cancer, and can only add toxicity without benefit for 80% of the target population that has been cured by surgery. Despite much effort, it is difficult to identify clinical or molecular determinants of outcome in Stage II colon cancer, defeating attempts to target treatments to the 20% of individuals who are destined to relapse. We hypothesized that a multidimensional molecular analysis will identify a combination of factors that serve as prognostic biomarkers in Stage II adenocarcinoma of the colon. The Georgetown informatics team generated and analyzed multi-omics profiling datasets in stage II CRC patients with or without relapse to identify molecular signatures in CRC that may serve both as prognostic markers of recurrence, and also allow for identification of the subgroup of patients who might benefit from adjuvant chemotherapy. The datasets were loaded to GDOC® (Georgetown Database of Cancer) for further mining and analysis. The G-DOC web portal (http://gdoc.georgetown.edu) includes a broad collection of bioinformatics and systems biology tools for analysis and visualization of four major ""omics"" types: DNA, mRNA, microRNA, and metabolites. Through technology re-use, the G-DOC infrastructure will accelerate progress for a variety of ongoing programs in need of integrative multi-omics analysis, and advance our opportunities to practice effective personalized oncology in the near future.",2013-03-18 +21957981,caCORRECT2: Improving the accuracy and reliability of microarray data in the presence of artifacts.,"

Background

In previous work, we reported the development of caCORRECT, a novel microarray quality control system built to identify and correct spatial artifacts commonly found on Affymetrix arrays. We have made recent improvements to caCORRECT, including the development of a model-based data-replacement strategy and integration with typical microarray workflows via caCORRECT's web portal and caBIG grid services. In this report, we demonstrate that caCORRECT improves the reproducibility and reliability of experimental results across several common Affymetrix microarray platforms. caCORRECT represents an advance over state-of-art quality control methods such as Harshlighting, and acts to improve gene expression calculation techniques such as PLIER, RMA and MAS5.0, because it incorporates spatial information into outlier detection as well as outlier information into probe normalization. The ability of caCORRECT to recover accurate gene expressions from low quality probe intensity data is assessed using a combination of real and synthetic artifacts with PCR follow-up confirmation and the affycomp spike in data. The caCORRECT tool can be accessed at the website: http://cacorrect.bme.gatech.edu.

Results

We demonstrate that (1) caCORRECT's artifact-aware normalization avoids the undesirable global data warping that happens when any damaged chips are processed without caCORRECT; (2) When used upstream of RMA, PLIER, or MAS5.0, the data imputation of caCORRECT generally improves the accuracy of microarray gene expression in the presence of artifacts more than using Harshlighting or not using any quality control; (3) Biomarkers selected from artifactual microarray data which have undergone the quality control procedures of caCORRECT are more likely to be reliable, as shown by both spike in and PCR validation experiments. Finally, we present a case study of the use of caCORRECT to reliably identify biomarkers for renal cell carcinoma, yielding two diagnostic biomarkers with potential clinical utility, PRKAB1 and NNMT.

Conclusions

caCORRECT is shown to improve the accuracy of gene expression, and the reproducibility of experimental results in clinical application. This study suggests that caCORRECT will be useful to clean up possible artifacts in new as well as archived microarray data.",2011-09-29 +22139581,EpiRegNet: constructing epigenetic regulatory network from high throughput gene expression data for humans.,"The advances of high throughput profiling methods, such as microarray gene profiling and RNA-seq, have enabled researchers to identify thousands of differentially expressed genes under a certain perturbation. Much work has been done to understand the genetic factors that contribute to the expression changes by searching the over-represented regulatory motifs in the promoter regions of these genes. However, the changes could also be caused by epigenetic regulation, especially histone modifications, and no web server has been constructed to study the epigenetic factors responsible for gene expression changes. Here, we pre-sent a web tool for this purpose. Provided with different categories of genes (e.g., up-regulated, down-regulated or unchanged genes), the server will find epigenetic factors responsible for the difference among the categories and construct an epigenetic regulatory network. Furthermore, it will perform co-localization analyses between these epigenetic factors and transcription factors, which were collected from large scale experimental ChIP-seq or computational predicted data. In addition, for users who want to analyze dynamic change of a histone modification mark under different cell conditions, the server will find direct and indirect target genes of this mark by integrative analysis of experimental data and computational prediction, and present a regulatory network around this mark. Both networks can be visualized by a user friendly interface and the data are downloadable in batch. The server currently supports 12 cell types in human, including ESC and CD4+ T cells, and will expand as more public data are available. It also allows user to create a self-defined cell type, upload and analyze multiple ChIP-seq data. It is freely available to academic users at http://jjwanglab.org/EpiRegNet.",2011-12-01 +22224429,Annotator: postprocessing software for generating function-based signatures from quantitative mass spectrometry.,"Mass spectrometry is used to investigate global changes in protein abundance in cell lysates. Increasingly powerful methods of data collection have emerged over the past decade, but this has left researchers with the task of sifting through mountains of data for biologically significant results. Often, the end result is a list of proteins with no obvious quantitative relationships to define the larger context of changes in cell behavior. Researchers are often forced to perform a manual analysis from this list or to fall back on a range of disparate tools, which can hinder the communication of results and their reproducibility. To address these methodological problems, we developed Annotator, an application that filters validated mass spectrometry data and applies a battery of standardized heuristic and statistical tests to determine significance. To address systems-level interpretations, we incorporated UniProt and Gene Ontology keywords as statistical units of analysis, yielding quantitative information about changes in abundance for an entire functional category. This provides a consistent and quantitative method for formulating conclusions about cellular behavior, independent of network models or standard enrichment analyses. Annotator allows for ""bottom-up"" annotations that are based on experimental data and not inferred by comparison to external or hypothetical models. Annotator was developed as an independent postprocessing platform that runs on all common operating systems, thereby providing a useful tool for establishing the inherently dynamic nature of functional annotations, which depend on results from ongoing proteomic experiments. Annotator is available for download at http://people.cs.uchicago.edu/∼tyler/annotator/annotator_desktop_0.1.tar.gz .",2012-02-03 +21498398,OTUbase: an R infrastructure package for operational taxonomic unit data.,"

Summary

OTUbase is an R package designed to facilitate the analysis of operational taxonomic unit (OTU) data and sequence classification (taxonomic) data. Currently there are programs that will cluster sequence data into OTUs and/or classify sequence data into known taxonomies. However, there is a need for software that can take the summarized output of these programs and organize it into easily accessed and manipulated formats. OTUbase provides this structure and organization within R, to allow researchers to easily manipulate the data with the rich library of R packages currently available for additional analysis.

Availability

OTUbase is an R package available through Bioconductor. It can be found at http://www.bioconductor.org/packages/release/bioc/html/OTUbase.html.",2011-04-15 +23988793,Protein subcellular localization in human and hamster cell lines: employing local ternary patterns of fluorescence microscopy images.,"Discriminative feature extraction technique is always required for the development of accurate and efficient prediction systems for protein subcellular localization so that effective drugs can be developed. In this work, we showed that Local Ternary Patterns (LTPs) effectively exploit small variations in pixel intensities; present in fluorescence microscopy based protein images of human and hamster cell lines. Further, Synthetic Minority Oversampling Technique is applied to balance the feature space for the classification stage. We observed that LTPs coupled with data balancing technique could enable a classifier, in this case support vector machine, to yield good performance. The proposed ensemble based prediction system, using 10-fold cross-validation, has yielded better performance compared to existing techniques in predicting various subcellular compartments for both 2D HeLa and CHO datasets. The proposed predictor is available online at: http://111.68.99.218/Protein_SubLoc/, which is freely accessible to the public.",2013-08-27 +24678734,Research resource: EPSLiM: ensemble predictor for short linear motifs in nuclear hormone receptors.,"Nuclear receptors (NRs) are a superfamily of transcription factors central to regulating many biological processes, including cell growth, death, metabolism, and immune responses. NR-mediated gene expression can be modulated by coactivators and corepressors through direct physical interaction or protein complexes with functional domains in NRs. One class of these domains includes short linear motifs (SLiMs), which facilitate protein-protein interactions, phosphorylation, and ligand binding primarily in the intrinsically disordered regions (IDRs) of proteins. Across all proteins, the number of known SLiMs is limited due to the difficulty in studying IDRs experimentally. Computational tools provide a systematic and data-driven approach for predicting functional motifs that can be used to prioritize experimental efforts. Accordingly, several tools have been developed based on sequence conservation or biophysical features; however, discrepancies in predictions make it difficult to determine the true candidate SLiMs. In this work, we present the ensemble predictor for short linear motifs (EPSLiM), a novel strategy to prioritize the residues that are most likely to be SLiMs in IDRs. EPSLiM applies a generalized linear model to integrate predictions from individual methodologies. We show that EPSLiM outperforms individual predictors, and we apply our method to NRs. The androgen receptor is an example with an N-terminal domain of 559 disordered amino acids that contains several validated SLiMs important for transcriptional activation. We use the androgen receptor to illustrate the predictive performance of EPSLiM and make the results of all human and mouse NRs publically available through the web service http://epslim.bwh.harvard.edu.",2014-03-28 +23504016,Genomic reconstruction of the transcriptional regulatory network in Bacillus subtilis.,"The adaptation of microorganisms to their environment is controlled by complex transcriptional regulatory networks (TRNs), which are still only partially understood even for model species. Genome scale annotation of regulatory features of genes and TRN reconstruction are challenging tasks of microbial genomics. We used the knowledge-driven comparative-genomics approach implemented in the RegPredict Web server to infer TRN in the model Gram-positive bacterium Bacillus subtilis and 10 related Bacillales species. For transcription factor (TF) regulons, we combined the available information from the DBTBS database and the literature with bioinformatics tools, allowing inference of TF binding sites (TFBSs), comparative analysis of the genomic context of predicted TFBSs, functional assignment of target genes, and effector prediction. For RNA regulons, we used known RNA regulatory motifs collected in the Rfam database to scan genomes and analyze the genomic context of new RNA sites. The inferred TRN in B. subtilis comprises regulons for 129 TFs and 24 regulatory RNA families. First, we analyzed 66 TF regulons with previously known TFBSs in B. subtilis and projected them to other Bacillales genomes, resulting in refinement of TFBS motifs and identification of novel regulon members. Second, we inferred motifs and described regulons for 28 experimentally studied TFs with previously unknown TFBSs. Third, we discovered novel motifs and reconstructed regulons for 36 previously uncharacterized TFs. The inferred collection of regulons is available in the RegPrecise database (http://regprecise.lbl.gov/) and can be used in genetic experiments, metabolic modeling, and evolutionary analysis.",2013-03-15 +23626001,Incorporating key position and amino acid residue features to identify general and species-specific Ubiquitin conjugation sites.,"

Motivation

Systematic dissection of the ubiquitylation proteome is emerging as an appealing but challenging research topic because of the significant roles ubiquitylation play not only in protein degradation but also in many other cellular functions. High-throughput experimental studies using mass spectrometry have identified many ubiquitylation sites, primarily from eukaryotes. However, the vast majority of ubiquitylation sites remain undiscovered, even in well-studied systems. Because mass spectrometry-based experimental approaches for identifying ubiquitylation events are costly, time-consuming and biased toward abundant proteins and proteotypic peptides, in silico prediction of ubiquitylation sites is a potentially useful alternative strategy for whole proteome annotation. Because of various limitations, current ubiquitylation site prediction tools were not well designed to comprehensively assess proteomes.

Results

We present a novel tool known as UbiProber, specifically designed for large-scale predictions of both general and species-specific ubiquitylation sites. We collected proteomics data for ubiquitylation from multiple species from several reliable sources and used them to train prediction models by a comprehensive machine-learning approach that integrates the information from key positions and key amino acid residues. Cross-validation tests reveal that UbiProber achieves some improvement over existing tools in predicting species-specific ubiquitylation sites. Moreover, independent tests show that UbiProber improves the areas under receiver operating characteristic curves by ~15% by using the Combined model.

Availability

The UbiProber server is freely available on the web at http://bioinfo.ncu.edu.cn/UbiProber.aspx. The software system of UbiProber can be downloaded at the same site.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-26 +23727957,Circular code motifs in transfer RNAs.,"In 1996, a trinucleotide circular code X is identified in genes of prokaryotes and eukaryotes (Arquès and Michel, 1996). In 2012, X motifs are identified in the transfer RNA (tRNA) Phe and 16S ribosomal RNA (Michel, 2012). A statistical analysis of X motifs in all available tRNAs of prokaryotes and eukaryotes in the genomic tRNA database (September 2012, http://lowelab.ucsc.edu/GtRNAdb/, Lowe and Eddy, 1997) is carried out here. For this purpose, a search algorithm of X motifs in a DNA sequence is developed. Two definitions allow to determine the occurrence probabilities of X motifs and the circular codes X, X1=P(X) and X2=P(2)(X) (P being a circular permutation map applied on X) in a population of tRNAs. This approach identifies X motifs in the 5' and/or 3' regions of 16 isoaccepting tRNAs (except for the tRNAs Arg, His, Ser and Trp). The statistical analyses are performed on different and large tRNA populations according to the taxonomy (prokaryotes and eukaryotes), tRNA length and tRNA score. Finally, a circular code property observed in genes of prokaryotes and eukaryotes is identified in the 3' regions of 19 isoaccepting tRNAs of prokaryotes and eukaryotes (except for the tRNA Leu). The identification of X motifs and a gene circular code property in tRNAs strengthens the concept proposed in Michel (2012) of a possible translation (framing) code based on a circular code.",2013-03-15 +22389013,Identification of peptide features in precursor spectra using Hardklör and Krönik.,"Hardklör and Krönik are software tools for feature detection and data reduction of high-resolution mass spectra. Hardklör is used to reduce peptide isotope distributions to a single monoisotopic mass and charge state, and can deconvolve overlapping peptide isotope distributions. Krönik filters, validates, and summarizes peptide features identified with Hardklör from data obtained during liquid chromatography mass spectrometry (LC-MS). Both software tools contain a simple user interface and can be run from nearly any desktop computer. These tools are freely available from http://proteome.gs.washington.edu/software/hardklor.",2012-03-01 +24807056,Neisseria adhesin A variation and revised nomenclature scheme.,"Neisseria adhesin A (NadA), involved in the adhesion and invasion of Neisseria meningitidis into host tissues, is one of the major components of Bexsero, a novel multicomponent vaccine licensed for protection against meningococcal serogroup B in Europe, Australia, and Canada. NadA has been identified in approximately 30% of clinical isolates and in a much lower proportion of carrier isolates. Three protein variants were originally identified in invasive meningococci and named NadA-1, NadA-2, and NadA-3, whereas most carrier isolates either lacked the gene or harbored a different variant, NadA-4. Further analysis of isolates belonging to the sequence type 213 (ST-213) clonal complex identified NadA-5, which was structurally similar to NadA-4, but more distantly related to NadA-1, -2, and -3. At the time of this writing, more than 89 distinct nadA allele sequences and 43 distinct peptides have been described. Here, we present a revised nomenclature system, taking into account the complete data set, which is compatible with previous classification schemes and is expandable. The main features of this new scheme include (i) the grouping of the previously named NadA-2 and NadA-3 variants into a single NadA-2/3 variant, (ii) the grouping of the previously assigned NadA-4 and NadA-5 variants into a single NadA-4/5 variant, (iii) the introduction of an additional variant (NadA-6), and (iv) the classification of the variants into two main groups, named groups I and II. To facilitate querying of the sequences and submission of new allele sequences, the nucleotide and amino acid sequences are available at http://pubmlst.org/neisseria/NadA/.",2014-05-07 +21906109,Genotypechecker: an interactive tool for checking the inheritance consistency of genotyped pedigrees.,"Datapoint errors in pedigree genotype data sets are difficult to identify and adversely affect downstream genetic analyses. We present GenotypeChecker, a desktop software tool for assisting data cleansing. The application identifies likely data errors in pedigree/genotype data sets by performing an inheritance-checking algorithm for each marker across the pedigree, and highlights inconsistently inherited genotypes in an exploratory user interface. By 'masking' suspect datapoints and rechecking inheritance consistency, erroneous datapoints can be confirmed and cleansed from the data set. The software, examples and documentation are freely available at http://bioinformatics.roslin.ac.uk/genotypechecker.",2011-03-24 +24665131,Prediction of individualized therapeutic vulnerabilities in cancer from genomic profiles.,"

Motivation

Somatic homozygous deletions of chromosomal regions in cancer, while not necessarily oncogenic, may lead to therapeutic vulnerabilities specific to cancer cells compared with normal cells. A recently reported example is the loss of one of the two isoenzymes in glioblastoma cancer cells such that the use of a specific inhibitor selectively inhibited growth of the cancer cells, which had become fully dependent on the second isoenzyme. We have now made use of the unprecedented conjunction of large-scale cancer genomics profiling of tumor samples in The Cancer Genome Atlas (TCGA) and of tumor-derived cell lines in the Cancer Cell Line Encyclopedia, as well as the availability of integrated pathway information systems, such as Pathway Commons, to systematically search for a comprehensive set of such epistatic vulnerabilities.

Results

Based on homozygous deletions affecting metabolic enzymes in 16 TCGA cancer studies and 972 cancer cell lines, we identified 4104 candidate metabolic vulnerabilities present in 1019 tumor samples and 482 cell lines. Up to 44% of these vulnerabilities can be targeted with at least one Food and Drug Administration-approved drug. We suggest focused experiments to test these vulnerabilities and clinical trials based on personalized genomic profiles of those that pass preclinical filters. We conclude that genomic profiling will in the future provide a promising basis for network pharmacology of epistatic vulnerabilities as a promising therapeutic strategy.

Availability and implementation

A web-based tool for exploring all vulnerabilities and their details is available at http://cbio.mskcc.org/cancergenomics/statius/ along with supplemental data files.",2014-03-24 +24500204,EDDY: a novel statistical gene set test method to detect differential genetic dependencies.,"Identifying differential features between conditions is a popular approach to understanding molecular features and their mechanisms underlying a biological process of particular interest. Although many tests for identifying differential expression of gene or gene sets have been proposed, there was limited success in developing methods for differential interactions of genes between conditions because of its computational complexity. We present a method for Evaluation of Dependency DifferentialitY (EDDY), which is a statistical test for differential dependencies of a set of genes between two conditions. Unlike previous methods focused on differential expression of individual genes or correlation changes of individual gene-gene interactions, EDDY compares two conditions by evaluating the probability distributions of dependency networks from genes. The method has been evaluated and compared with other methods through simulation studies, and application to glioblastoma multiforme data resulted in informative cancer and glioblastoma multiforme subtype-related findings. The comparison with Gene Set Enrichment Analysis, a differential expression-based method, revealed that EDDY identifies the gene sets that are complementary to those identified by Gene Set Enrichment Analysis. EDDY also showed much lower false positives than Gene Set Co-expression Analysis, a method based on correlation changes of individual gene-gene interactions, thus providing more informative results. The Java implementation of the algorithm is freely available to noncommercial users. Download from: http://biocomputing.tgen.org/software/EDDY.",2014-02-05 +25033270,Effective automated feature construction and selection for classification of biological sequences.,"

Background

Many open problems in bioinformatics involve elucidating underlying functional signals in biological sequences. DNA sequences, in particular, are characterized by rich architectures in which functional signals are increasingly found to combine local and distal interactions at the nucleotide level. Problems of interest include detection of regulatory regions, splice sites, exons, hypersensitive sites, and more. These problems naturally lend themselves to formulation as classification problems in machine learning. When classification is based on features extracted from the sequences under investigation, success is critically dependent on the chosen set of features.

Methodology

We present an algorithmic framework (EFFECT) for automated detection of functional signals in biological sequences. We focus here on classification problems involving DNA sequences which state-of-the-art work in machine learning shows to be challenging and involve complex combinations of local and distal features. EFFECT uses a two-stage process to first construct a set of candidate sequence-based features and then select a most effective subset for the classification task at hand. Both stages make heavy use of evolutionary algorithms to efficiently guide the search towards informative features capable of discriminating between sequences that contain a particular functional signal and those that do not.

Results

To demonstrate its generality, EFFECT is applied to three separate problems of importance in DNA research: the recognition of hypersensitive sites, splice sites, and ALU sites. Comparisons with state-of-the-art algorithms show that the framework is both general and powerful. In addition, a detailed analysis of the constructed features shows that they contain valuable biological information about DNA architecture, allowing biologists and other researchers to directly inspect the features and potentially use the insights obtained to assist wet-laboratory studies on retainment or modification of a specific signal. Code, documentation, and all data for the applications presented here are provided for the community at http://www.cs.gmu.edu/~ashehu/?q=OurTools.",2014-07-17 +24564446,AnnotateGenomicRegions: a web application.,"

Background

Modern genomic technologies produce large amounts of data that can be mapped to specific regions in the genome. Among the first steps in interpreting the results is annotation of genomic regions with known features such as genes, promoters, CpG islands etc. Several tools have been published to perform this task. However, using these tools often requires a significant amount of bioinformatics skills and/or downloading and installing dedicated software.

Results

Here we present AnnotateGenomicRegions, a web application that accepts genomic regions as input and outputs a selection of overlapping and/or neighboring genome annotations. Supported organisms include human (hg18, hg19), mouse (mm8, mm9, mm10), zebrafish (danRer7), and Saccharomyces cerevisiae (sacCer2, sacCer3). AnnotateGenomicRegions is accessible online on a public server or can be installed locally. Some frequently used annotations and genomes are embedded in the application while custom annotations may be added by the user.

Conclusions

The increasing spread of genomic technologies generates the need for a simple-to-use annotation tool for genomic regions that can be used by biologists and bioinformaticians alike. AnnotateGenomicRegions meets this demand. AnnotateGenomicRegions is an open-source web application that can be installed on any personal computer or institute server. AnnotateGenomicRegions is available at: http://cru.genomics.iit.it/AnnotateGenomicRegions.",2014-01-10 +23520492,A statistical framework for improving genomic annotations of prokaryotic essential genes.,"Large-scale systematic analysis of gene essentiality is an important step closer toward unraveling the complex relationship between genotypes and phenotypes. Such analysis cannot be accomplished without unbiased and accurate annotations of essential genes. In current genomic databases, most of the essential gene annotations are derived from whole-genome transposon mutagenesis (TM), the most frequently used experimental approach for determining essential genes in microorganisms under defined conditions. However, there are substantial systematic biases associated with TM experiments. In this study, we developed a novel Poisson model-based statistical framework to simulate the TM insertion process and subsequently correct the experimental biases. We first quantitatively assessed the effects of major factors that potentially influence the accuracy of TM and subsequently incorporated relevant factors into the framework. Through iteratively optimizing parameters, we inferred the actual insertion events occurred and described each gene's essentiality on probability measure. Evaluated by the definite mapping of essential gene profile in Escherichia coli, our model significantly improved the accuracy of original TM datasets, resulting in more accurate annotations of essential genes. Our method also showed encouraging results in improving subsaturation level TM datasets. To test our model's broad applicability to other bacteria, we applied it to Pseudomonas aeruginosa PAO1 and Francisella tularensis novicida TM datasets. We validated our predictions by literature as well as allelic exchange experiments in PAO1. Our model was correct on six of the seven tested genes. Remarkably, among all three cases that our predictions contradicted the TM assignments, experimental validations supported our predictions. In summary, our method will be a promising tool in improving genomic annotations of essential genes and enabling large-scale explorations of gene essentiality. Our contribution is timely considering the rapidly increasing essential gene sets. A Webserver has been set up to provide convenient access to this tool. All results and source codes are available for download upon publication at http://research.cchmc.org/essentialgene/.",2013-03-08 +24650035,High expression of DEK predicts poor prognosis of gastric adenocarcinoma.,"

Background

DEK, as an oncoprotein, plays an important role in cancer development and progression. This study aimed to investigate the clinicopathological significance of DEK overexpression in patients with gastric cancer.

Materials and methods

The expression of DEK protein was evaluated by immunohistochemical (IHC) staining of 172 gastric cancer samples with complete clinicopathological features, and the correlation between DEK expression and clinicopathological features was examined. Survival rates were also calculated using the Kaplan-Meier method in gastric cancer patients with complete survival data.

Results

DEK protein showed a strictly nuclear staining pattern in gastric cancers with IHC and immunofluorescence. The strongly positive rate of DEK protein was 60.5% (104/172) in gastric cancers, which was significantly higher than that in either gastric dysplasia (19.4%, 7/36) or adjacent normal mucosa (0%, 0/27). DEK expression in gastric cancer correlated to tumor size, differentiation, clinical stage, disease-free survival, and overall survival rates. Further analysis showed that patients with early-stage gastric cancer and high DEK expression had shorter disease-free survival and overall survival duration than those with low DEK expression.

Conclusion

High level of DEK protein expression predicts the poor prognosis of patients with gastric cancer. DEK expression might be potentially used as an independent effective biomarker for prognostic evaluation of gastric cancers.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/5050145571193097.",2014-03-20 +21949842,REST: a toolkit for resting-state functional magnetic resonance imaging data processing.,"Resting-state fMRI (RS-fMRI) has been drawing more and more attention in recent years. However, a publicly available, systematically integrated and easy-to-use tool for RS-fMRI data processing is still lacking. We developed a toolkit for the analysis of RS-fMRI data, namely the RESting-state fMRI data analysis Toolkit (REST). REST was developed in MATLAB with graphical user interface (GUI). After data preprocessing with SPM or AFNI, a few analytic methods can be performed in REST, including functional connectivity analysis based on linear correlation, regional homogeneity, amplitude of low frequency fluctuation (ALFF), and fractional ALFF. A few additional functions were implemented in REST, including a DICOM sorter, linear trend removal, bandpass filtering, time course extraction, regression of covariates, image calculator, statistical analysis, and slice viewer (for result visualization, multiple comparison correction, etc.). REST is an open-source package and is freely available at http://www.restfmri.net.",2011-09-20 +24655567,Genes associated with agronomic traits in non-heading Chinese cabbage identified by expression profiling.,"

Background

The genomes of non-heading Chinese cabbage (Brassica rapa ssp. chinensis), heading Chinese cabbage (Brassica rapa ssp. pekinensis) and their close relative Arabidopsis thaliana have provided important resources for studying the evolution and genetic improvement of cruciferous plants. Natural growing conditions present these plants with a variety of physiological challenges for which they have a repertoire of genes that ensure adaptability and normal growth. We investigated the differential expressions of genes that control adaptability and development in plants growing in the natural environment to study underlying mechanisms of their expression.

Results

Using digital gene expression tag profiling, we constructed an expression profile to identify genes related to important agronomic traits under natural growing conditions. Among three non-heading Chinese cabbage cultivars, we found thousands of genes that exhibited significant differences in expression levels at five developmental stages. Through comparative analysis and previous reports, we identified several candidate genes associated with late flowering, cold tolerance, self-incompatibility, and leaf color. Two genes related to cold tolerance were verified using quantitative real-time PCR.

Conclusions

We identified a large number of genes associated with important agronomic traits of non-heading Chinese cabbage. This analysis will provide a wealth of resources for molecular-assisted breeding of cabbage. The raw data and detailed results of this analysis are available at the website http://nhccdata.njau.edu.cn.",2014-03-22 +22285562,JointSNVMix: a probabilistic model for accurate detection of somatic mutations in normal/tumour paired next-generation sequencing data.,"

Motivation

Identification of somatic single nucleotide variants (SNVs) in tumour genomes is a necessary step in defining the mutational landscapes of cancers. Experimental designs for genome-wide ascertainment of somatic mutations now routinely include next-generation sequencing (NGS) of tumour DNA and matched constitutional DNA from the same individual. This allows investigators to control for germline polymorphisms and distinguish somatic mutations that are unique to the tumour, thus reducing the burden of labour-intensive and expensive downstream experiments needed to verify initial predictions. In order to make full use of such paired datasets, computational tools for simultaneous analysis of tumour-normal paired sequence data are required, but are currently under-developed and under-represented in the bioinformatics literature.

Results

In this contribution, we introduce two novel probabilistic graphical models called JointSNVMix1 and JointSNVMix2 for jointly analysing paired tumour-normal digital allelic count data from NGS experiments. In contrast to independent analysis of the tumour and normal data, our method allows statistical strength to be borrowed across the samples and therefore amplifies the statistical power to identify and distinguish both germline and somatic events in a unified probabilistic framework.

Availability

The JointSNVMix models and four other models discussed in the article are part of the JointSNVMix software package available for download at http://compbio.bccrc.ca

Contact

sshah@bccrc.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-27 +21610212,Sharing and archiving nucleic acid structure mapping data.,"Nucleic acids are particularly amenable to structural characterization using chemical and enzymatic probes. Each individual structure mapping experiment reveals specific information about the structure and/or dynamics of the nucleic acid. Currently, there is no simple approach for making these data publically available in a standardized format. We therefore developed a standard for reporting the results of single nucleotide resolution nucleic acid structure mapping experiments, or SNRNASMs. We propose a schema for sharing nucleic acid chemical probing data that uses generic public servers for storing, retrieving, and searching the data. We have also developed a consistent nomenclature (ontology) within the Ontology of Biomedical Investigations (OBI), which provides unique identifiers (termed persistent URLs, or PURLs) for classifying the data. Links to standardized data sets shared using our proposed format along with a tutorial and links to templates can be found at http://snrnasm.bio.unc.edu.",2011-05-24 +23476021,PLncDB: plant long non-coding RNA database.,"

Summary

Plant long non-coding RNA database (PLncDB) attempts to provide the following functions related to long non-coding RNAs (lncRNAs): (i) Genomic information for a large number of lncRNAs collected from various resources; (ii) an online genome browser for plant lncRNAs based on a platform similar to that of the UCSC Genome Browser; (iii) Integration of transcriptome datasets derived from various samples including different tissues, developmental stages, mutants and stress treatments; and (iv) A list of epigenetic modification datasets and small RNA datasets. Currently, our PLncDB provides a comprehensive genomic view of Arabidopsis lncRNAs for the plant research community. This database will be regularly updated with new plant genome when available so as to greatly facilitate future investigations on plant lncRNAs.

Availability

PLncDB is freely accessible at http://chualab.rockefeller.edu/gbrowse2/homepage.html and all results can be downloaded for free at the website.",2013-03-07 +23464379,Retinal amino acid neurochemistry in health and disease.,"Advances in basic retinal anatomy, genetics, biochemical pathways and neurochemistry have not only provided a better understanding of retinal function but have also allowed us to link basic science to retinal disease. The link with disease allowed measures to be developed that now provide an opportunity to intervene and slow down or even restore sight in previously 'untreatable' retinal diseases. One of the critical advances has been the understanding of the retinal amino acid neurotransmitters, related amino acids, their metabolites and functional receptors. This review provides an overview of amino acid localisation in the retina and examples of how retinal anatomy and amino acid neurochemistry directly links to understanding retinal disease. Also, the implications of retinal remodelling involving amino acid (glutamate) receptors are outlined in this review and insights are presented on how understanding of detrimental and beneficial retinal remodelling will provide better outcomes for patients using strategies for the preservation or restoration of vision. An internet-based database of retinal images of amino acid labelling patterns and other amino acid-related images in health and disease is located at http://www.aminoacidimmunoreactivity.com.",2013-03-07 +23595661,NetworkPrioritizer: a versatile tool for network-based prioritization of candidate disease genes or other molecules.,"

Summary

The prioritization of candidate disease genes is often based on integrated datasets and their network representation with genes as nodes connected by edges for biological relationships. However, the majority of prioritization methods does not allow for a straightforward integration of the user's own input data. Therefore, we developed the Cytoscape plugin NetworkPrioritizer that particularly supports the integrative network-based prioritization of candidate disease genes or other molecules. Our versatile software tool computes a number of important centrality measures to rank nodes based on their relevance for network connectivity and provides different methods to aggregate and compare rankings.

Availability

NetworkPrioritizer and the online documentation are freely available at http://www.networkprioritizer.de",2013-04-16 +24871251,"Million hearts: prevalence of leading cardiovascular disease risk factors--United States, 2005-2012.","Each year, approximately 1.5 million U.S. adults have a heart attack or stroke, resulting in approximately 30 deaths every hour and, for nonfatal events, often leading to long-term disability. Overall, an estimated 14 million survivors of heart attacks and strokes are living in the United States. In 2011, the U.S. Department of Health and Human Services, in collaboration with nonprofit and private organizations, launched Million Hearts (http://www.millionhearts.hhs.gov), an initiative focused on implementing clinical and community-level evidence-based strategies to reduce cardiovascular disease (CVD) risk factors and prevent a total of 1 million heart attacks and strokes during the 5-year period 2012-2016. From 2005-2006 to the period with the most current data, analysis of the Million Hearts four ""ABCS"" clinical measures (for aspirin, blood pressure, cholesterol, and smoking) showed 1) no statistically significant change in the prevalence of aspirin use for secondary prevention (53.8% in 2009-2010), 2) an increase to 51.9% in the prevalence of blood pressure control (in 2011-2012), 3) an increase to 42.8% in the prevalence of cholesterol management (in 2011-2012), and 4) no statistically significant change in the prevalence of smoking assessment and treatment (22.2% in 2009-2010). In addition, analysis of two community-level indicators found 1) a decrease in current tobacco product smoking (including cigarette, cigar, or pipe use) prevalence to 25.1% in 2011-2012 and 2) minimal change in mean daily sodium intake (3,594 mg/day in 2009-2010). Although trends in some measures are encouraging, further reductions of CVD risk factors will be needed to meet Million Hearts goals by 2017.",2014-05-01 +22695826,A pilot project for the Japan arthroplasty register.,"

Background

National arthroplasty registers are valuable tools for reporting on an updated epidemiologic survey of arthroplasties and for evaluating the performance of implants and operative procedures through the early identification of failure risk factors. More than ten registers have been launched globally, but no national register has been reported in Asia.

Methods

In February 2006, a pilot project of the Japan Arthroplasty Register (JAR) for total hip arthroplasty (THA) and total knee arthroplasty/unicompartmental knee arthroplasty (TKA/UKA) was launched by the Japanese Orthopaedic Association (JOA). Data obtained include information about patients, primary and revision arthroplasty operative procedures, and implants and materials used. The JAR office accumulated and processed all data and reports annually.

Results

Up to May 2011, 83 of 130 hospitals nominated by the JOA (64 %) participated in the JAR pilot project. From 2006 to 2011, 33,080 data collection forms were submitted; 17,534 for THA and 17,269 for TKA/UKA. A brief summary of the annual report of the JAR is available from The Japanese Society for Replacement Arthroplasty web site at http://jsra.info/ .

Conclusion

A national arthroplasty register is a useful tool for evaluating the outcomes of interventions and the materials used in arthroplasties and for providing rapid feedback to practitioners and patients about any failure of THA and TKA/UKA. As the first national arthroplasty register in Asia, the JAR will help guide the development of registers of arthroplasty characteristics specific to Asian populations.",2012-06-14 +23497033,"phiBIOTICS: catalogue of therapeutic enzybiotics, relevant research studies and practical applications.","

Background

The incidence of bacterial infections in humans along with the growing problem of antibiotic resistance is a major public health concern worldwide. Therefore it is necessary to develop novel therapeutic agents to control microbial pathogens. In this regard, enzybiotics, lytic enzymes endowed with the capacity to degrade bacterial cell wall, are a very promising group of alternative antimicrobials.

Description

Numerous experimental studies have confirmed unique therapeutic capabilities of enzybiotics and hence they are worth of wider attention of the medical community. In order to summarize the state of current knowledge of enzybiotics, we have developed phiBIOTICS, an information portal about known and studied therapeutic enzybiotics. phiBIOTICS contains information on chemical and biological properties of enzybiotics together with compendium of facts retrieved from research studies, where enzybiotics were applied. Our auxiliary phiBiScan program utility is dedicated for prediction of novel potential enzybiotics.

Conclusions

phiBIOTICS presents a solid body of knowledge about all studied therapeutic enzybiotics to date. The database brings high-value information on outcomes of applied research and pre-clinical trials of these prospective antimicrobial agents. This information which was scattered in research papers with heterogeneous quality and relevance is now available in the form of manually curated database. phiBIOTICS and phiBiScan are freely accessible at http://www.phibiotics.org/.",2013-03-06 +22244038,MIPHENO: data normalization for high throughput metabolite analysis.,"

Background

High throughput methodologies such as microarrays, mass spectrometry and plate-based small molecule screens are increasingly used to facilitate discoveries from gene function to drug candidate identification. These large-scale experiments are typically carried out over the course of months and years, often without the controls needed to compare directly across the dataset. Few methods are available to facilitate comparisons of high throughput metabolic data generated in batches where explicit in-group controls for normalization are lacking.

Results

Here we describe MIPHENO (Mutant Identification by Probabilistic High throughput-Enabled Normalization), an approach for post-hoc normalization of quantitative first-pass screening data in the absence of explicit in-group controls. This approach includes a quality control step and facilitates cross-experiment comparisons that decrease the false non-discovery rates, while maintaining the high accuracy needed to limit false positives in first-pass screening. Results from simulation show an improvement in both accuracy and false non-discovery rate over a range of population parameters (p < 2.2 × 10(-16)) and a modest but significant (p < 2.2 × 10(-16)) improvement in area under the receiver operator characteristic curve of 0.955 for MIPHENO vs 0.923 for a group-based statistic (z-score). Analysis of the high throughput phenotypic data from the Arabidopsis Chloroplast 2010 Project (http://www.plastid.msu.edu/) showed ~ 4-fold increase in the ability to detect previously described or expected phenotypes over the group based statistic.

Conclusions

Results demonstrate MIPHENO offers substantial benefit in improving the ability to detect putative mutant phenotypes from post-hoc analysis of large data sets. Additionally, it facilitates data interpretation and permits cross-dataset comparison where group-based controls are missing. MIPHENO is applicable to a wide range of high throughput screenings and the code is freely available as Additional file 1 as well as through an R package in CRAN.",2012-01-13 +22877863,BioJava: an open-source framework for bioinformatics in 2012.,"

Unlabelled

BioJava is an open-source project for processing of biological data in the Java programming language. We have recently released a new version (3.0.5), which is a major update to the code base that greatly extends its functionality.

Results

BioJava now consists of several independent modules that provide state-of-the-art tools for protein structure comparison, pairwise and multiple sequence alignments, working with DNA and protein sequences, analysis of amino acid properties, detection of protein modifications and prediction of disordered regions in proteins as well as parsers for common file formats using a biologically meaningful data model.

Availability

BioJava is an open-source project distributed under the Lesser GPL (LGPL). BioJava can be downloaded from the BioJava website (http://www.biojava.org). BioJava requires Java 1.6 or higher. All inquiries should be directed to the BioJava mailing lists. Details are available at http://biojava.org/wiki/BioJava:MailingLists.",2012-08-09 +23497081,Learning a peptide-protein binding affinity predictor with kernel ridge regression.,"

Background

The cellular function of a vast majority of proteins is performed through physical interactions with other biomolecules, which, most of the time, are other proteins. Peptides represent templates of choice for mimicking a secondary structure in order to modulate protein-protein interaction. They are thus an interesting class of therapeutics since they also display strong activity, high selectivity, low toxicity and few drug-drug interactions. Furthermore, predicting peptides that would bind to a specific MHC alleles would be of tremendous benefit to improve vaccine based therapy and possibly generate antibodies with greater affinity. Modern computational methods have the potential to accelerate and lower the cost of drug and vaccine discovery by selecting potential compounds for testing in silico prior to biological validation.

Results

We propose a specialized string kernel for small bio-molecules, peptides and pseudo-sequences of binding interfaces. The kernel incorporates physico-chemical properties of amino acids and elegantly generalizes eight kernels, comprised of the Oligo, the Weighted Degree, the Blended Spectrum, and the Radial Basis Function. We provide a low complexity dynamic programming algorithm for the exact computation of the kernel and a linear time algorithm for it's approximation. Combined with kernel ridge regression and SupCK, a novel binding pocket kernel, the proposed kernel yields biologically relevant and good prediction accuracy on the PepX database. For the first time, a machine learning predictor is capable of predicting the binding affinity of any peptide to any protein with reasonable accuracy. The method was also applied to both single-target and pan-specific Major Histocompatibility Complex class II benchmark datasets and three Quantitative Structure Affinity Model benchmark datasets.

Conclusion

On all benchmarks, our method significantly (p-value ≤ 0.057) outperforms the current state-of-the-art methods at predicting peptide-protein binding affinities. The proposed approach is flexible and can be applied to predict any quantitative biological activity. Moreover, generating reliable peptide-protein binding affinities will also improve system biology modelling of interaction pathways. Lastly, the method should be of value to a large segment of the research community with the potential to accelerate the discovery of peptide-based drugs and facilitate vaccine development. The proposed kernel is freely available at http://graal.ift.ulaval.ca/downloads/gs-kernel/.",2013-03-05 +23812974,Genome-wide identification and predictive modeling of tissue-specific alternative polyadenylation.,"

Motivation

Pre-mRNA cleavage and polyadenylation are essential steps for 3'-end maturation and subsequent stability and degradation of mRNAs. This process is highly controlled by cis-regulatory elements surrounding the cleavage/polyadenylation sites (polyA sites), which are frequently constrained by sequence content and position. More than 50% of human transcripts have multiple functional polyA sites, and the specific use of alternative polyA sites (APA) results in isoforms with variable 3'-untranslated regions, thus potentially affecting gene regulation. Elucidating the regulatory mechanisms underlying differential polyA preferences in multiple cell types has been hindered both by the lack of suitable data on the precise location of cleavage sites, as well as of appropriate tests for determining APAs with significant differences across multiple libraries.

Results

We applied a tailored paired-end RNA-seq protocol to specifically probe the position of polyA sites in three human adult tissue types. We specified a linear-effects regression model to identify tissue-specific biases indicating regulated APA; the significance of differences between tissue types was assessed by an appropriately designed permutation test. This combination allowed to identify highly specific subsets of APA events in the individual tissue types. Predictive models successfully classified constitutive polyA sites from a biologically relevant background (auROC = 99.6%), as well as tissue-specific regulated sets from each other. We found that the main cis-regulatory elements described for polyadenylation are a strong, and highly informative, hallmark for constitutive sites only. Tissue-specific regulated sites were found to contain other regulatory motifs, with the canonical polyadenylation signal being nearly absent at brain-specific polyA sites. Together, our results contribute to the understanding of the diversity of post-transcriptional gene regulation.

Availability

Raw data are deposited on SRA, accession numbers: brain SRX208132, kidney SRX208087 and liver SRX208134. Processed datasets as well as model code are published on our website: http://www.genome.duke.edu/labs/ohler/research/UTR/.

Contact

uwe.ohler@duke.edu.",2013-07-01 +22784576,A high performance profile-biomarker diagnosis for mass spectral profiles.,"

Background

Although mass spectrometry based proteomics demonstrates an exciting promise in complex diseases diagnosis, it remains an important research field rather than an applicable clinical routine for its diagnostic accuracy and data reproducibility. Relatively less investigation has been done yet in attaining high-performance proteomic pattern classification compared with the amount of endeavours in enhancing data reproducibility.

Methods

In this study, we present a novel machine learning approach to achieve a clinical level disease diagnosis for mass spectral data. We propose multi-resolution independent component analysis, a novel feature selection algorithm to tackle the large dimensionality of mass spectra, by following our local and global feature selection framework. We also develop high-performance classifiers by embedding multi-resolution independent component analysis in linear discriminant analysis and support vector machines.

Results

Our multi-resolution independent component based support vector machines not only achieve clinical level classification accuracy, but also overcome the weakness in traditional peak-selection based biomarker discovery. In addition to rigorous theoretical analysis, we demonstrate our method's superiority by comparing it with nine state-of-the-art classification and regression algorithms on six heterogeneous mass spectral profiles.

Conclusions

Our work not only suggests an alternative direction from machine learning to accelerate mass spectral proteomic technologies into a clinical routine by treating an input profile as a 'profile-biomarker', but also has positive impacts on large scale 'omics' data mining. Related source codes and data sets can be found at: https://sites.google.com/site/heyaumbioinformatics/home/proteomics.",2011-12-14 +24776231,SMOQ: a tool for predicting the absolute residue-specific quality of a single protein model with support vector machines.,"

Background

It is important to predict the quality of a protein structural model before its native structure is known. The method that can predict the absolute local quality of individual residues in a single protein model is rare, yet particularly needed for using, ranking and refining protein models.

Results

We developed a machine learning tool (SMOQ) that can predict the distance deviation of each residue in a single protein model. SMOQ uses support vector machines (SVM) with protein sequence and structural features (i.e. basic feature set), including amino acid sequence, secondary structures, solvent accessibilities, and residue-residue contacts to make predictions. We also trained a SVM model with two new additional features (profiles and SOV scores) on 20 CASP8 targets and found that including them can only improve the performance when real deviations between native and model are higher than 5Å. The SMOQ tool finally released uses the basic feature set trained on 85 CASP8 targets. Moreover, SMOQ implemented a way to convert predicted local quality scores into a global quality score. SMOQ was tested on the 84 CASP9 single-domain targets. The average difference between the residue-specific distance deviation predicted by our method and the actual distance deviation on the test data is 2.637Å. The global quality prediction accuracy of the tool is comparable to other good tools on the same benchmark.

Conclusion

SMOQ is a useful tool for protein single model quality assessment. Its source code and executable are available at: http://sysbio.rnet.missouri.edu/multicom_toolbox/.",2014-04-28 +21810900,A powerful and flexible approach to the analysis of RNA sequence count data.,"

Motivation

A number of penalization and shrinkage approaches have been proposed for the analysis of microarray gene expression data. Similar techniques are now routinely applied to RNA sequence transcriptional count data, although the value of such shrinkage has not been conclusively established. If penalization is desired, the explicit modeling of mean-variance relationships provides a flexible testing regimen that 'borrows' information across genes, while easily incorporating design effects and additional covariates.

Results

We describe BBSeq, which incorporates two approaches: (i) a simple beta-binomial generalized linear model, which has not been extensively tested for RNA-Seq data and (ii) an extension of an expression mean-variance modeling approach to RNA-Seq data, involving modeling of the overdispersion as a function of the mean. Our approaches are flexible, allowing for general handling of discrete experimental factors and continuous covariates. We report comparisons with other alternate methods to handle RNA-Seq data. Although penalized methods have advantages for very small sample sizes, the beta-binomial generalized linear model, combined with simple outlier detection and testing approaches, appears to have favorable characteristics in power and flexibility.

Availability

An R package containing examples and sample datasets is available at http://www.bios.unc.edu/research/genomic_software/BBSeq

Contact

yzhou@bios.unc.edu; fwright@bios.unc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-02 +23369147,Shape and secondary structure prediction for ncRNAs including pseudoknots based on linear SVM.,"

Background

Accurate secondary structure prediction provides important information to undefirstafinding the tertiary structures and thus the functions of ncRNAs. However, the accuracy of the native structure derivation of ncRNAs is still not satisfactory, especially on sequences containing pseudoknots. It is recently shown that using the abstract shapes, which retain adjacency and nesting of structural features but disregard the length details of helix and loop regions, can improve the performance of structure prediction. In this work, we use SVM-based feature selection to derive the consensus abstract shape of homologous ncRNAs and apply the predicted shape to structure prediction including pseudoknots.

Results

Our approach was applied to predict shapes and secondary structures on hundreds of ncRNA data sets with and without psuedoknots. The experimental results show that we can achieve 18% higher accuracy in shape prediction than the state-of-the-art consensus shape prediction tools. Using predicted shapes in structure prediction allows us to achieve approximate 29% higher sensitivity and 10% higher positive predictive value than other pseudoknot prediction tools.

Conclusions

Extensive analysis of RNA properties based on SVM allows us to identify important properties of sequences and structures related to their shapes. The combination of mass data analysis and SVM-based feature selection makes our approach a promising method for shape and structure prediction. The implemented tools, Knot Shape and Knot Structure are open source software and can be downloaded at: http://www.cse.msu.edu/~achawana/KnotShape.",2013-01-21 +22962467,LocTree2 predicts localization for all domains of life.,"

Motivation

Subcellular localization is one aspect of protein function. Despite advances in high-throughput imaging, localization maps remain incomplete. Several methods accurately predict localization, but many challenges remain to be tackled.

Results

In this study, we introduced a framework to predict localization in life's three domains, including globular and membrane proteins (3 classes for archaea; 6 for bacteria and 18 for eukaryota). The resulting method, LocTree2, works well even for protein fragments. It uses a hierarchical system of support vector machines that imitates the cascading mechanism of cellular sorting. The method reaches high levels of sustained performance (eukaryota: Q18=65%, bacteria: Q6=84%). LocTree2 also accurately distinguishes membrane and non-membrane proteins. In our hands, it compared favorably with top methods when tested on new data.

Availability

Online through PredictProtein (predictprotein.org); as standalone version at http://www.rostlab.org/services/loctree2.

Contact

localization@rostlab.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-01 +23949542,"Report on the 2nd Annual Infinium Humanmethylation450 Array Workshop: 15 April 2013 QMUL, London, UK.","The Illumina Infinium HumanMethylation450 BeadChip - the successor to their hugely popular HumanMethylation27 BeadChip - is arguably the most prevalent platform for large-scale studies of DNA methylome analysis. After the success of last year's meeting (1) that discussed initial analysis strategies for this then-new platform, this year's meeting (held at Queen Mary, University of London) included the presentation of now established pipelines and normalization methods for data analysis, as well as some exciting tools for down-stream analysis. The importance of defining cell composition was a new topic mentioned by most speakers. The epigenome varies between cell types and insuring that methylation differences are related to sample treatment and not a differing cell population is essential. The meeting was attended by 215 computational and bench scientists from 18 countries. There were 11 speakers, a small poster session, and a discussion session. Talks were recorded and are now freely available at http://www.illumina.com/applications/epigenetics/array-based_methylation_analysis/methylation-array-analysis-education.ilmn.",2013-08-15 +23466241,Is there a relationship between National Institutes of Health funding and research impact on academic urology?,"

Purpose

Scholarly productivity in the form of research contributions is important for appointment and promotion in academic urology. Some believe that this production may require significant funding. We evaluated the relationship between National Institutes of Health (NIH) funding, academic rank and research productivity, as measured by the h-index, an objective indicator of research impact on a field.

Materials and methods

A total of 361 faculty members from the top 20 NIH funded academic urology departments were examined for research productivity, as measured by the h-index and calculated from the Scopus database (http://www.info.sciverse.com/scopus). Research productivity was compared to individual funding totals, the terminal degree and academic rank.

Results

NIH funded faculty members had statistically higher research productivity than nonfunded colleagues. Research productivity increased with increasing NIH funding. Departmental NIH funding correlated poorly with the mean department h-index. Successive academic rank was associated with increasing research productivity. Full professors had higher NIH funding awards than their junior NIH funded colleagues.

Conclusions

There is an association among the h-index, NIH funding and academic rank. The h-index is a reliable method of assessing the impact of scholarly contributions toward the discourse in academic urology. It may be used as an adjunct for evaluating the scholarly productivity of academic urologists.",2013-03-01 +22238264,Interactome-transcriptome integration for predicting distant metastasis in breast cancer.,"

Motivation

High-throughput gene expression profiling yields genomic signatures that allow the prediction of clinical conditions including patient outcome. However, these signatures have limitations, such as dependency on the training set, and worse, lack of generalization.

Results

We propose a novel algorithm called ITI (interactome-transcriptome integration), to extract a genomic signature predicting distant metastasis in breast cancer by superimposition of large-scale protein-protein interaction data over a compendium of several gene expression datasets. Training on two different compendia showed that the estrogen receptor-specific signatures obtained are more stable (11-35% stability), can be generalized on independent data and performs better than previously published methods (53-74% accuracy).

Availability

The ITI algorithm source code from analysis are available under CeCILL from the ITI companion website: http://bioinformatique.marseille.inserm.fr/iti.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-11 +23370425,CYP21A2 p.E238 deletion as result of multiple microconversion events: a genetic study on an Italian congenital adrenal hyperplasia (CAH) family.,"More than 90% of congenital adrenal hyperplasia (CAH) cases are associated with mutations in the 21-hydroxylase gene (CYP21A2) in the HLA class III area on the short arm of chromosome 6p21.3. The major part of disease-causing mutations in CYP21A2 alleles are CYP21A1P-derived sequence transferred to the active gene by macroconversion or microconversion events. Only around 5% of all disease-causing CYP21A2 alleles harbor rare mutations that do not originate from the pseudogene. A complete list of all reported CYP21A2 mutations can be found in the CYP21A2 database created by the Human Cytochrome P450 (CYP) Allele Nomenclature Committee (http://www.imm.Ki.se/CYPalleles/cyp21.htm). In this report, we describe clinical and genetic findings regarding an Italian woman suffering from a classic salt-wasting form of CAH due to a severe 21-hydroxylase deficiency. A complex genetic family study was performed including a prenatal diagnosis. The patient was found to be heterozygous for p.I172N (exon 4), p.E238del (exon 6), p.M239K (exon 6), and p.F306insT (exon 7) mutations and homozygous for p.I236N (exon 6) and p.V237E (exon 6) mutations. The deletion of glutamic acid 238 is a new mutation not reported before in the literature. CYP21A2 genotyping has become a valuable complement to biochemical CAH investigation. We highlight the contribution of molecular genetic advancements to the clinical management of patients with 21-hydroxylase deficiency.",2013-03-01 +30722371,First Report of Black Stem Caused by Botryosporium longibrachiatum on Sweet Basil in Korea.,"Sweet basil, Ocimum basilicum L., is cultivated mainly for fresh consumption in Korea. In March 2009, in Icheon, Korea, several dozen plants showing symptoms of black stems were found in an organic farm that used polyethylene tunnels for production. The black stems were usually covered with a fungus that gave the appearance of hoar-frost on the stems, especially when plants were grown under a cool and humid environment. According to the farmer, black stems appear during the winter season of November to March when the tunnels were mostly closed. The relative humidity (RH) during that period was around 100% every night due to poor ventilation. Beginning the middle of April when both sides of the tunnels were open, providing good ventilation, no further disease development was observed. The fungus on the stems had an elongate, upright conidiophore, reaching 5 mm in length. At intervals along its length, the main axis of conidiophores produced lateral fertile branches in acropetal succession. Each lateral branch terminated in a cluster of four or five ampullae. Conidia were hyaline, oval, and 5.5 to 9.5 × 3.5 to 6 μm. The fungus was non-pigmented and colonies on potato dextrose agar were chalk white. Morphological and cultural characteristics of the fungus were consistent with the previous reports of Botryosporium longibrachiatum (Oudem.) Maire (3,4). Voucher specimens (n = 4) were housed at Korea University Herbarium (KUS). An isolate from KUS-F24010 was deposited in the Korean Agricultural Culture Collection (Accession No. KACC44849) and used for molecular analysis and pathogenicity tests. The complete internal transcribed spacer (ITS) region of rDNA was amplified with the primers ITS1/ITS4 and sequenced. The resulting sequence of 592 bp was deposited in GenBank (Accession No. JX666334). A BLAST search in GenBank showed that there was no comparable sequence of B. longibrachiatum and thus this was the first ITS sequence for the species submitted in GenBank. To confirm the pathogenicity, colonized mycelial plugs (3 mm in diameter) from 10-day-old PDA cultures were placed onto the stem apices (n = 10) of 2-month-old sweet basil pot plants, which were topped as normally harvested. Control plants were inoculated with uncolonized agar plugs. All plants were incubated at 22 ± 2°C in a humidified chamber with a 12-h photoperiod for 48 h, and then maintained in a greenhouse (22 ± 2°C). Three to four days after inoculation, necrotic lesions developed around the points of inoculation on all stems and expanded downwards, leading to black stems covered with the hoar-frost like fungus after 14 days. B. longibrachiatum was successfully reisolated from all inoculated stems, while control plants remained symptomless. The pathogenicity test was conducted twice with the same result. The association of B. longibrachiatum and sweet basil was previously reported (4). Several other plants including burley tobacco are also reported to be infected by this fungus (1,2). To our knowledge, this is the first etiological report of B. longibrachiatum on sweet basil globally as well as in Korea. References: (1) T. R. Anderson. Plant Dis. 67:1158, 1983. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology & Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , September 7, 2012. (3) C. V. Subramanian. Hyphomycetes. Indian Council of Agricultural Research, New Delhi, India, 1971. (4) H. T. Tribe and R. W. S. Weber. Mycologist 15:158, 2001.",2013-03-01 +22191855,JGromacs: a Java package for analyzing protein simulations.,"

Unlabelled

In this paper, we introduce JGromacs, a Java API (Application Programming Interface) that facilitates the development of cross-platform data analysis applications for Molecular Dynamics (MD) simulations. The API supports parsing and writing file formats applied by GROMACS (GROningen MAchine for Chemical Simulations), one of the most widely used MD simulation packages. JGromacs builds on the strengths of object-oriented programming in Java by providing a multilevel object-oriented representation of simulation data to integrate and interconvert sequence, structure, and dynamics information. The easy-to-learn, easy-to-use, and easy-to-extend framework is intended to simplify and accelerate the implementation and development of complex data analysis algorithms. Furthermore, a basic analysis toolkit is included in the package. The programmer is also provided with simple tools (e.g., XML-based configuration) to create applications with a user interface resembling the command-line interface of GROMACS applications.

Availability

JGromacs and detailed documentation is freely available from http://sbcb.bioch.ox.ac.uk/jgromacs under a GPLv3 license .",2012-01-09 +25048119,"Immunoinformatics of the V, C, and G domains: IMGT® definitive system for IG, TR and IgSF, MH, and MhSF.","By its creation in 1989, IMGT(®), the international ImMunoGeneTics information system(®) ( http://www.imgt.org , CNRS and Université Montpellier 2), marked the advent of immunoinformatics, which emerged at the interface between immunogenetics and bioinformatics. IMGT(®) is the global reference in immunogenetics and immunoinformatics. The accuracy and the consistency of the IMGT(®) data are based on the IMGT Scientific chart rules generated from the IMGT-ONTOLOGY axioms and concepts, which comprise IMGT standardized labels (DESCRIPTION), IMGT gene and allele nomenclature (CLASSIFICATION), IMGT unique numbering, and IMGT Collier de Perles (NUMEROTATION). The IMGT(®) standards have bridged the gap between genes, sequences, and three-dimensional (3D) structures for the receptors, chains, and domains. Started specifically for the immunoglobulins (IG) or antibodies and T cell receptors (TR), the IMGT-ONTOLOGY concepts have been extended to conventional genes of the immunoglobulin superfamily (IgSF) and major histocompatibility (MH) superfamily (MhSF), members of which are defined by the presence of at least one variable (V) or constant (C) domain, or two groove (G) domains, respectively. In this chapter, we review the IMGT(®) definitive system for the V, C, and G domains, based on the IMGT-ONTOLOGY concepts of IMGT unique numbering and IMGT Collier de Perles.",2014-01-01 +23468878,Towards improved quality of GPCR models by usage of multiple templates and profile-profile comparison.,"

Unlabelled

G-protein coupled receptors (GPCRs) are targets of nearly one third of the drugs at the current pharmaceutical market. Despite their importance in many cellular processes the crystal structures are available for less than 20 unique GPCRs of the Rhodopsin-like class. Fortunately, even though involved in different signaling cascades, this large group of membrane proteins has preserved a uniform structure comprising seven transmembrane helices that allows quite reliable comparative modeling. Nevertheless, low sequence similarity between the GPCR family members is still a serious obstacle not only in template selection but also in providing theoretical models of acceptable quality. An additional level of difficulty is the prediction of kinks and bulges in transmembrane helices. Usage of multiple templates and generation of alignments based on sequence profiles may increase the rate of success in difficult cases of comparative modeling in which the sequence similarity between GPCRs is exceptionally low. Here, we present GPCRM, a novel method for fast and accurate generation of GPCR models using averaging of multiple template structures and profile-profile comparison. In particular, GPCRM is the first GPCR structure predictor incorporating two distinct loop modeling techniques: Modeller and Rosetta together with the filtering of models based on the Z-coordinate. We tested our approach on all unique GPCR structures determined to date and report its performance in comparison with other computational methods targeting the Rhodopsin-like class. We also provide a database of precomputed GPCR models of the human receptors from that class.

Availability

GPCRM SERVER AND DATABASE: http://gpcrm.biomodellab.eu.",2013-02-28 +21554765,Enhancing genome assemblies by integrating non-sequence based data.,"

Introduction

Many genome projects were underway before the advent of high-throughput sequencing and have thus been supported by a wealth of genome information from other technologies. Such information frequently takes the form of linkage and physical maps, both of which can provide a substantial amount of data useful in de novo sequencing projects. Furthermore, the recent abundance of genome resources enables the use of conserved synteny maps identified in related species to further enhance genome assemblies.

Methods

The tammar wallaby (Macropus eugenii) is a model marsupial mammal with a low coverage genome. However, we have access to extensive comparative maps containing over 14,000 markers constructed through the physical mapping of conserved loci, chromosome painting and comprehensive linkage maps. Using a custom Bioperl pipeline, information from the maps was aligned to assembled tammar wallaby contigs using BLAT. This data was used to construct pseudo paired-end libraries with intervals ranging from 5-10 MB. We then used Bambus (a program designed to scaffold eukaryotic genomes by ordering and orienting contigs through the use of paired-end data) to scaffold our libraries. To determine how map data compares to sequence based approaches to enhance assemblies, we repeated the experiment using a 0.5× coverage of unique reads from 4 KB and 8 KB Illumina paired-end libraries. Finally, we combined both the sequence and non-sequence-based data to determine how a combined approach could further enhance the quality of the low coverage de novo reconstruction of the tammar wallaby genome.

Results

Using the map data alone, we were able order 2.2% of the initial contigs into scaffolds, and increase the N50 scaffold size to 39 KB (36 KB in the original assembly). Using only the 0.5× paired-end sequence based data, 53% of the initial contigs were assigned to scaffolds. Combining both data sets resulted in a further 2% increase in the number of initial contigs integrated into a scaffold (55% total) but a 35% increase in N50 scaffold size over the use of sequence-based data alone.

Conclusions

We provide a relatively simple pipeline utilizing existing bioinformatics tools to integrate map data into a genome assembly which is available at http://www.mcb.uconn.edu/fac.php?name=paska. While the map data only contributed minimally to assigning the initial contigs to scaffolds in the new assembly, it greatly increased the N50 size. This process added structure to our low coverage assembly, greatly increasing its utility in further analyses.",2011-05-28 +24336862,The Global Invertebrate Genomics Alliance (GIGA): developing community resources to study diverse invertebrate genomes.,"Over 95% of all metazoan (animal) species comprise the ""invertebrates,"" but very few genomes from these organisms have been sequenced. We have, therefore, formed a ""Global Invertebrate Genomics Alliance"" (GIGA). Our intent is to build a collaborative network of diverse scientists to tackle major challenges (e.g., species selection, sample collection and storage, sequence assembly, annotation, analytical tools) associated with genome/transcriptome sequencing across a large taxonomic spectrum. We aim to promote standards that will facilitate comparative approaches to invertebrate genomics and collaborations across the international scientific community. Candidate study taxa include species from Porifera, Ctenophora, Cnidaria, Placozoa, Mollusca, Arthropoda, Echinodermata, Annelida, Bryozoa, and Platyhelminthes, among others. GIGA will target 7000 noninsect/nonnematode species, with an emphasis on marine taxa because of the unrivaled phyletic diversity in the oceans. Priorities for selecting invertebrates for sequencing will include, but are not restricted to, their phylogenetic placement; relevance to organismal, ecological, and conservation research; and their importance to fisheries and human health. We highlight benefits of sequencing both whole genomes (DNA) and transcriptomes and also suggest policies for genomic-level data access and sharing based on transparency and inclusiveness. The GIGA Web site (http://giga.nova.edu) has been launched to facilitate this collaborative venture.",2014-01-01 +23859003,Finding simple rules for discriminating folding rate change upon single mutation by statistical and learning methods.,"Protein folding rate is a valuable clue for understanding the variations in protein folding kinetics. The ability to accurately discriminate protein folding rate change is very helpful in protein design. However, there are fewer studies on the influence of amino acid substitution to protein folding rates. In our earlier studies, we constructed a dataset of 467 mutants upon amino acid substitution and proposed novel methods for discriminating and predicting the accelerating and decelerating mutants during the folding process. This study aimed to effectively develop simple rules for discriminating accelerating mutants from decelerating ones upon single amino acid substitution. The main points of the study were to build a more general dataset F661 with 661 mutants, analyze the dataset systematically, and then implement different data mining techniques to build discrimination rules. Furthermore, the rules obtained from different methods were interpreted, evaluated, compared and integrated. The results appeared that the present approach may effectively develop simple rules from these mutants and the quality of the rules may be improved by combining the statistical and learning methods. These results suggest that the present method, as well as the rules, may advance the understanding of discriminating protein folding rate change. The details of the rules along with relevant information have been integrated and available freely at http://bioinformatics.myweb.hinet.net/rulefr.htm.",2014-01-01 +23453308,Jacaric acid and its octadecatrienoic acid geoisomers induce apoptosis selectively in cancerous human prostate cells: a mechanistic and 3-D structure-activity study.,"Plant-derived non-essential fatty acids are important dietary nutrients, and some are purported to have chemopreventive properties against various cancers, including that of the prostate. In this study, we determined the ability of seven dietary C-18 fatty acids to cause cytotoxicity and induce apoptosis in various types of human prostate cancer cells. These fatty acids included jacaric and punicic acid found in jacaranda and pomegranate seed oil, respectively, three octadecatrienoic geometric isomers (alpha- and beta-calendic and catalpic acid) and two mono-unsaturated C-18 fatty acids (trans- and cis-vaccenic acid). Jacaric acid and four of its octadecatrienoic geoisomers selectively induced apoptosis in hormone-dependent (LNCaP) and -independent (PC-3) human prostate cancer cells, whilst not affecting the viability of normal human prostate epithelial cells (RWPE-1). Jacaric acid induced concentration- and time-depedent LNCaP cell death through activation of intrinsic and extrinsic apoptotic pathways resulting in cleavage of PARP-1, modulation of pro- and antiapoptotic Bcl-2 family of proteins and increased cleavage of caspase-3, -8 and -9. Moreover, activation of a cell death-inducing signalling cascade involving death receptor 5 was observed. Jacaric acid induced apoptosis in PC-3 cells by activation of the intrinsic pathway only. The spatial conformation cis, trans, cis of jacaric and punicic acid was shown to play a key role in the increased potency and efficacy of these two fatty acids in comparison to the five other C-18 fatty acids tested. Three-dimensional conformational analysis using the PubChem Database (http://pubchem.ncbi.nlm.nih.gov) showed that the cytotoxic potency of the C-18 fatty acids was related to their degree of conformational similarity to our cytotoxic reference compound, punicic acid, based on optimized shape (ST) and feature (CT) similarity scores, with jacaric acid being most 'biosimilar' (ST(ST-opt)=0.81; CT(CT-opt)=0.45). This 3-D analysis of structural similarity enabled us to rank geoisomeric fatty acids according to cytotoxic potency, whereas a 2-D positional assessment of cis/trans structure did not. Our findings provide mechanistic evidence that nutrition-derived non-essential fatty acids have chemopreventive biological activities and Exhibit 3-D structure-activity relationships that could be exploited to develop new strategies for the prevention or treatment of prostate cancer regardless of hormone dependency.",2013-02-27 +23446039,CoRSeqV3-C: a novel HIV-1 subtype C specific V3 sequence based coreceptor usage prediction algorithm.,"

Background

The majority of HIV-1 subjects worldwide are infected with HIV-1 subtype C (C-HIV). Although C-HIV predominates in developing regions of the world such as Southern Africa and Central Asia, C-HIV is also spreading rapidly in countries with more developed economies and health care systems, whose populations are more likely to have access to wider treatment options, including the CCR5 antagonist maraviroc (MVC). The ability to reliably determine C-HIV coreceptor usage is therefore becoming increasingly more important. In silico V3 sequence based coreceptor usage prediction algorithms are a relatively rapid and cost effective method for determining HIV-1 coreceptor specificity. In this study, we elucidated the V3 sequence determinants of C-HIV coreceptor usage, and used this knowledge to develop and validate a novel, user friendly, and highly sensitive C-HIV specific coreceptor usage prediction algorithm.

Results

We characterized every phenotypically-verified C-HIV gp120 V3 sequence available in the Los Alamos HIV Database. Sequence analyses revealed that compared to R5 C-HIV V3 sequences, CXCR4-using C-HIV V3 sequences have significantly greater amino acid variability, increased net charge, increased amino acid length, increased frequency of insertions and substitutions within the GPGQ crown motif, and reduced frequency of glycosylation sites. Based on these findings, we developed a novel C-HIV specific coreceptor usage prediction algorithm (CoRSeqV3-C), which we show has superior sensitivity for determining CXCR4 usage by C-HIV strains compared to all other available algorithms and prediction rules, including Geno2pheno[coreceptor] and WebPSSMSINSI-C, which has been designed specifically for C-HIV.

Conclusions

CoRSeqV3-C is now openly available for public use at http://www.burnet.edu.au/coreceptor. Our results show that CoRSeqV3-C is the most sensitive V3 sequence based algorithm presently available for predicting CXCR4 usage of C-HIV strains, without compromising specificity. CoRSeqV3-C may be potentially useful for assisting clinicians to decide the best treatment options for patients with C-HIV infection, and will be helpful for basic studies of C-HIV pathogenesis.",2013-02-27 +24447135,Quantitative analysis of colony morphology in yeast.,"Microorganisms often form multicellular structures such as biofilms and structured colonies that can influence the organism's virulence, drug resistance, and adherence to medical devices. Phenotypic classification of these structures has traditionally relied on qualitative scoring systems that limit detailed phenotypic comparisons between strains. Automated imaging and quantitative analysis have the potential to improve the speed and accuracy of experiments designed to study the genetic and molecular networks underlying different morphological traits. For this reason, we have developed a platform that uses automated image analysis and pattern recognition to quantify phenotypic signatures of yeast colonies. Our strategy enables quantitative analysis of individual colonies, measured at a single time point or over a series of time-lapse images, as well as the classification of distinct colony shapes based on image-derived features. Phenotypic changes in colony morphology can be expressed as changes in feature space trajectories over time, thereby enabling the visualization and quantitative analysis of morphological development. To facilitate data exploration, results are plotted dynamically through an interactive Yeast Image Analysis web application (YIMAA; http://yimaa.cs.tut.fi) that integrates the raw and processed images across all time points, allowing exploration of the image-based features and principal components associated with morphological development.",2014-01-01 +22433281,JETTA: junction and exon toolkits for transcriptome analysis.,"

Summary

High-throughput genome-wide studies of alternatively spliced mRNA transcripts have become increasingly important in clinical research. Consequently, easy-to-use software tools are required to process data from these studies, for example, using exon and junction arrays. Here, we introduce JETTA, an integrated software package for the calculation of gene expression indices as well as the identification and visualization of alternative splicing events. We demonstrate the software using data of human liver and muscle samples hybridized on an exon-junction array.

Availability

JETTA and its demonstrations are freely available at http://igenomed.stanford.edu/~junhee/JETTA/index.html",2012-03-19 +26589047,Systematic Parametrization of Polarizable Force Fields from Quantum Chemistry Data.,"We introduce ForceBalance, a method and free software package for systematic force field optimization with the ability to parametrize a wide variety of functional forms using flexible combinations of reference data. We outline several important challenges in force field development and how they are addressed in ForceBalance, and present an example calculation where these methods are applied to develop a highly accurate polarizable water model. ForceBalance is available for free download at https://simtk.org/home/forcebalance.",2012-11-29 +23442184,De novo assembly and characterization of transcriptome using Illumina paired-end sequencing and identification of CesA gene in ramie (Boehmeria nivea L. Gaud).,"

Background

Ramie fiber, extracted from vegetative organ stem bast, is one of the most important natural fibers. Understanding the molecular mechanisms of the vegetative growth of the ramie and the formation and development of bast fiber is essential for improving the yield and quality of the ramie fiber. However, only 418 expressed tag sequences (ESTs) of ramie deposited in public databases are far from sufficient to understand the molecular mechanisms. Thus, high-throughput transcriptome sequencing is essential to generate enormous ramie transcript sequences for the purpose of gene discovery, especially genes such as the cellulose synthase (CesA) gene.

Results

Using Illumina paired-end sequencing, about 53 million sequencing reads were generated. De novo assembly yielded 43,990 unigenes with an average length of 824 bp. By sequence similarity searching for known proteins, a total of 34,192 (77.7%) genes were annotated for their function. Out of these annotated unigenes, 16,050 and 13,042 unigenes were assigned to gene ontology and clusters of orthologous group, respectively. Searching against the Kyoto Encyclopedia of Genes and Genomes Pathway database (KEGG) indicated that 19,846 unigenes were mapped to 126 KEGG pathways, and 565 genes were assigned to http://starch and sucrose metabolic pathway which was related with cellulose biosynthesis. Additionally, 51 CesA genes involved in cellulose biosynthesis were identified. Analysis of tissue-specific expression pattern of the 51 CesA genes revealed that there were 36 genes with a relatively high expression levels in the stem bark, which suggests that they are most likely responsible for the biosynthesis of bast fiber.

Conclusion

To the best of our knowledge, this study is the first to characterize the ramie transcriptome and the substantial amount of transcripts obtained will accelerate the understanding of the ramie vegetative growth and development mechanism. Moreover, discovery of the 36 CesA genes with relatively high expression levels in the stem bark will present an opportunity to understand the ramie bast fiber formation and development mechanisms.",2013-02-26 +23181585,A Monte Carlo-based framework enhances the discovery and interpretation of regulatory sequence motifs.,"

Background

Discovery of functionally significant short, statistically overrepresented subsequence patterns (motifs) in a set of sequences is a challenging problem in bioinformatics. Oftentimes, not all sequences in the set contain a motif. These non-motif-containing sequences complicate the algorithmic discovery of motifs. Filtering the non-motif-containing sequences from the larger set of sequences while simultaneously determining the identity of the motif is, therefore, desirable and a non-trivial problem in motif discovery research.

Results

We describe MotifCatcher, a framework that extends the sensitivity of existing motif-finding tools by employing random sampling to effectively remove non-motif-containing sequences from the motif search. We developed two implementations of our algorithm; each built around a commonly used motif-finding tool, and applied our algorithm to three diverse chromatin immunoprecipitation (ChIP) data sets. In each case, the motif finder with the MotifCatcher extension demonstrated improved sensitivity over the motif finder alone. Our approach organizes candidate functionally significant discovered motifs into a tree, which allowed us to make additional insights. In all cases, we were able to support our findings with experimental work from the literature.

Conclusions

Our framework demonstrates that additional processing at the sequence entry level can significantly improve the performance of existing motif-finding tools. For each biological data set tested, we were able to propose novel biological hypotheses supported by experimental work from the literature. Specifically, in Escherichia coli, we suggested binding site motifs for 6 non-traditional LexA protein binding sites; in Saccharomyces cerevisiae, we hypothesize 2 disparate mechanisms for novel binding sites of the Cse4p protein; and in Halobacterium sp. NRC-1, we discoverd subtle differences in a general transcription factor (GTF) binding site motif across several data sets. We suggest that small differences in our discovered motif could confer specificity for one or more homologous GTF proteins. We offer a free implementation of the MotifCatcher software package at http://www.bme.ucdavis.edu/facciotti/resources_data/software/.",2012-11-27 +23842806,BioSmalltalk: a pure object system and library for bioinformatics.,"

Summary

We have developed BioSmalltalk, a new environment system for pure object-oriented bioinformatics programming. Adaptive end-user programming systems tend to become more important for discovering biological knowledge, as is demonstrated by the emergence of open-source programming toolkits for bioinformatics in the past years. Our software is intended to bridge the gap between bioscientists and rapid software prototyping while preserving the possibility of scaling to whole-system biology applications. BioSmalltalk performs better in terms of execution time and memory usage than Biopython and BioPerl for some classical situations.

Availability

BioSmalltalk is cross-platform and freely available (MIT license) through the Google Project Hosting at http://code.google.com/p/biosmalltalk

Contact

hernan.morales@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-09 +21995452,Bayesian hierarchical clustering for microarray time series data with replicates and outlier measurements.,"

Background

Post-genomic molecular biology has resulted in an explosion of data, providing measurements for large numbers of genes, proteins and metabolites. Time series experiments have become increasingly common, necessitating the development of novel analysis tools that capture the resulting data structure. Outlier measurements at one or more time points present a significant challenge, while potentially valuable replicate information is often ignored by existing techniques.

Results

We present a generative model-based Bayesian hierarchical clustering algorithm for microarray time series that employs Gaussian process regression to capture the structure of the data. By using a mixture model likelihood, our method permits a small proportion of the data to be modelled as outlier measurements, and adopts an empirical Bayes approach which uses replicate observations to inform a prior distribution of the noise variance. The method automatically learns the optimum number of clusters and can incorporate non-uniformly sampled time points. Using a wide variety of experimental data sets, we show that our algorithm consistently yields higher quality and more biologically meaningful clusters than current state-of-the-art methodologies. We highlight the importance of modelling outlier values by demonstrating that noisy genes can be grouped with other genes of similar biological function. We demonstrate the importance of including replicate information, which we find enables the discrimination of additional distinct expression profiles.

Conclusions

By incorporating outlier measurements and replicate values, this clustering algorithm for time series microarray data provides a step towards a better treatment of the noise inherent in measurements from high-throughput genomic technologies. Timeseries BHC is available as part of the R package 'BHC' (version 1.5), which is available for download from Bioconductor (version 2.9 and above) via http://www.bioconductor.org/packages/release/bioc/html/BHC.html?pagewanted=all.",2011-10-13 +22936970,Insights from classifying visual concepts with multiple kernel learning.,"Combining information from various image features has become a standard technique in concept recognition tasks. However, the optimal way of fusing the resulting kernel functions is usually unknown in practical applications. Multiple kernel learning (MKL) techniques allow to determine an optimal linear combination of such similarity matrices. Classical approaches to MKL promote sparse mixtures. Unfortunately, 1-norm regularized MKL variants are often observed to be outperformed by an unweighted sum kernel. The main contributions of this paper are the following: we apply a recently developed non-sparse MKL variant to state-of-the-art concept recognition tasks from the application domain of computer vision. We provide insights on benefits and limits of non-sparse MKL and compare it against its direct competitors, the sum-kernel SVM and sparse MKL. We report empirical results for the PASCAL VOC 2009 Classification and ImageCLEF2010 Photo Annotation challenge data sets. Data sets (kernel matrices) as well as further information are available at http://doc.ml.tu-berlin.de/image_mkl/(Accessed 2012 Jun 25).",2012-08-24 +24618471,Turtle: identifying frequent k-mers with cache-efficient algorithms.,"

Motivation

Counting the frequencies of k-mers in read libraries is often a first step in the analysis of high-throughput sequencing data. Infrequent k-mers are assumed to be a result of sequencing errors. The frequent k-mers constitute a reduced but error-free representation of the experiment, which can inform read error correction or serve as the input to de novo assembly methods. Ideally, the memory requirement for counting should be linear in the number of frequent k-mers and not in the, typically much larger, total number of k-mers in the read library.

Results

We present a novel method that balances time, space and accuracy requirements to efficiently extract frequent k-mers even for high-coverage libraries and large genomes such as human. Our method is designed to minimize cache misses in a cache-efficient manner by using a pattern-blocked Bloom filter to remove infrequent k-mers from consideration in combination with a novel sort-and-compact scheme, instead of a hash, for the actual counting. Although this increases theoretical complexity, the savings in cache misses reduce the empirical running times. A variant of method can resort to a counting Bloom filter for even larger savings in memory at the expense of false-negative rates in addition to the false-positive rates common to all Bloom filter-based approaches. A comparison with the state-of-the-art shows reduced memory requirements and running times.

Availability and implementation

The tools are freely available for download at http://bioinformatics.rutgers.edu/Software/Turtle and http://figshare.com/articles/Turtle/791582.",2014-03-10 +24618466,FISH: fast and accurate diploid genotype imputation via segmental hidden Markov model.,"

Motivation

Fast and accurate genotype imputation is necessary for facilitating gene-mapping studies, especially with the ever increasing numbers of both common and rare variants generated by high-throughput-sequencing experiments. However, most of the existing imputation approaches suffer from either inaccurate results or heavy computational demand.

Results

In this article, aiming to perform fast and accurate genotype-imputation analysis, we propose a novel, fast and yet accurate method to impute diploid genotypes. Specifically, we extend a hidden Markov model that is widely used to describe haplotype structures. But we model hidden states onto single reference haplotypes rather than onto pairs of haplotypes. Consequently the computational complexity is linear to size of reference haplotypes. We further develop an algorithm 'merge-and-recover (MAR)' to speed up the calculation. Working on compact representation of segmental reference haplotypes, the MAR algorithm always calculates an exact form of transition probabilities regardless of partition of segments. Both simulation studies and real-data analyses demonstrated that our proposed method was comparable to most of the existing popular methods in terms of imputation accuracy, but was much more efficient in terms of computation. The MAR algorithm can further speed up the calculation by several folds without loss of accuracy. The proposed method will be useful in large-scale imputation studies with a large number of reference subjects.

Availability

The implemented multi-threading software FISH is freely available for academic use at https://sites.google.com/site/lzhanghomepage/FISH.",2014-03-10 +24447531,MafFilter: a highly flexible and extensible multiple genome alignment files processor.,"

Background

Sequence alignments are the starting point for most evolutionary and comparative analyses. Full genome sequences can be compared to study patterns of within and between species variation. Genome sequence alignments are complex structures containing information such as coordinates, quality scores and synteny structure, which are stored in Multiple Alignment Format (MAF) files. Processing these alignments therefore involves parsing and manipulating typically large MAF files in an efficient way.

Results

MafFilter is a command-line driven program written in C++ that enables the processing of genome alignments stored in the Multiple Alignment Format in an efficient and extensible manner. It provides an extensive set of tools which can be parametrized and combined by the user via option files. We demonstrate the software's functionality and performance on several biological examples covering Primate genomics and fungal population genomics. Example analyses involve window-based alignment filtering, feature extractions and various statistics, phylogenetics and population genomics calculations.

Conclusions

MafFilter is a highly efficient and flexible tool to analyse multiple genome alignments. By allowing the user to combine a large set of available methods, as well as designing his/her own, it enables the design of custom data filtering and analysis pipelines for genomic studies. MafFilter is an open source software available at http://bioweb.me/maffilter.",2014-01-22 +22271825,Subspace learning from image gradient orientations.,"We introduce the notion of subspace learning from image gradient orientations for appearance-based object recognition. As image data are typically noisy and noise is substantially different from Gaussian, traditional subspace learning from pixel intensities very often fails to estimate reliably the low-dimensional subspace of a given data population. We show that replacing pixel intensities with gradient orientations and the ℓ₂ norm with a cosine-based distance measure offers, to some extend, a remedy to this problem. Within this framework, which we coin Image Gradient Orientations (IGO) subspace learning, we first formulate and study the properties of Principal Component Analysis of image gradient orientations (IGO-PCA). We then show its connection to previously proposed robust PCA techniques both theoretically and experimentally. Finally, we derive a number of other popular subspace learning techniques, namely, Linear Discriminant Analysis (LDA), Locally Linear Embedding (LLE), and Laplacian Eigenmaps (LE). Experimental results show that our algorithms significantly outperform popular methods such as Gabor features and Local Binary Patterns and achieve state-of-the-art performance for difficult problems such as illumination and occlusion-robust face recognition. In addition to this, the proposed IGO-methods require the eigendecomposition of simple covariance matrices and are as computationally efficient as their corresponding ℓ₂ norm intensity-based counterparts. Matlab code for the methods presented in this paper can be found at http://ibug.doc.ic.ac.uk/resources.",2012-12-01 +22623377,PepDistiller: A quality control tool to improve the sensitivity and accuracy of peptide identifications in shotgun proteomics.,"In this study, we presented a quality control tool named PepDistiller to facilitate the validation of MASCOT search results. By including the number of tryptic termini, and integrating a refined false discovery rate (FDR) calculation method, we demonstrated the improved sensitivity of peptide identifications obtained from semitryptic search results. Based on the analysis of a complex data set, approximately 7% more peptide identifications were obtained using PepDistiller than using MASCOT Percolator. Moreover, the refined method generated lower FDR estimations than the percentage of incorrect target (PIT) fixed method applied in Percolator. Using a standard data set, we further demonstrated the increased accuracy of the refined FDR estimations relative to the PIT-fixed FDR estimations. PepDistiller is fast and convenient to use, and is freely available for academic access. The software can be downloaded from http://www.bprc.ac.cn/pepdistiller.",2012-06-01 +21622664,RightField: embedding ontology annotation in spreadsheets.,"

Motivation

In the Life Sciences, guidelines, checklists and ontologies describing what metadata is required for the interpretation and reuse of experimental data are emerging. Data producers, however, may have little experience in the use of such standards and require tools to support this form of data annotation.

Results

RightField is an open source application that provides a mechanism for embedding ontology annotation support for Life Science data in Excel spreadsheets. Individual cells, columns or rows can be restricted to particular ranges of allowed classes or instances from chosen ontologies. The RightField-enabled spreadsheet presents selected ontology terms to the users as a simple drop-down list, enabling scientists to consistently annotate their data. The result is 'semantic annotation by stealth', with an annotation process that is less error-prone, more efficient, and more consistent with community standards.

Availability and implementation

RightField is open source under a BSD license and freely available from http://www.rightfield.org.uk",2011-05-26 +22155870,Control-FREEC: a tool for assessing copy number and allelic content using next-generation sequencing data.,"

Summary

More and more cancer studies use next-generation sequencing (NGS) data to detect various types of genomic variation. However, even when researchers have such data at hand, single-nucleotide polymorphism arrays have been considered necessary to assess copy number alterations and especially loss of heterozygosity (LOH). Here, we present the tool Control-FREEC that enables automatic calculation of copy number and allelic content profiles from NGS data, and consequently predicts regions of genomic alteration such as gains, losses and LOH. Taking as input aligned reads, Control-FREEC constructs copy number and B-allele frequency profiles. The profiles are then normalized, segmented and analyzed in order to assign genotype status (copy number and allelic content) to each genomic region. When a matched normal sample is provided, Control-FREEC discriminates somatic from germline events. Control-FREEC is able to analyze overdiploid tumor samples and samples contaminated by normal cells. Low mappability regions can be excluded from the analysis using provided mappability tracks.

Availability

C++ source code is available at: http://bioinfo.curie.fr/projects/freec/

Contact

freec@curie.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-06 +21813454,SNVer: a statistical tool for variant calling in analysis of pooled or individual next-generation sequencing data.,"We develop a statistical tool SNVer for calling common and rare variants in analysis of pooled or individual next-generation sequencing (NGS) data. We formulate variant calling as a hypothesis testing problem and employ a binomial-binomial model to test the significance of observed allele frequency against sequencing error. SNVer reports one single overall P-value for evaluating the significance of a candidate locus being a variant based on which multiplicity control can be obtained. This is particularly desirable because tens of thousands loci are simultaneously examined in typical NGS experiments. Each user can choose the false-positive error rate threshold he or she considers appropriate, instead of just the dichotomous decisions of whether to 'accept or reject the candidates' provided by most existing methods. We use both simulated data and real data to demonstrate the superior performance of our program in comparison with existing methods. SNVer runs very fast and can complete testing 300 K loci within an hour. This excellent scalability makes it feasible for analysis of whole-exome sequencing data, or even whole-genome sequencing data using high performance computing cluster. SNVer is freely available at http://snver.sourceforge.net/.",2011-08-03 +22135461,Sparse linear modeling of next-generation mRNA sequencing (RNA-Seq) data for isoform discovery and abundance estimation.,"Since the inception of next-generation mRNA sequencing (RNA-Seq) technology, various attempts have been made to utilize RNA-Seq data in assembling full-length mRNA isoforms de novo and estimating abundance of isoforms. However, for genes with more than a few exons, the problem tends to be challenging and often involves identifiability issues in statistical modeling. We have developed a statistical method called ""sparse linear modeling of RNA-Seq data for isoform discovery and abundance estimation"" (SLIDE) that takes exon boundaries and RNA-Seq data as input to discern the set of mRNA isoforms that are most likely to present in an RNA-Seq sample. SLIDE is based on a linear model with a design matrix that models the sampling probability of RNA-Seq reads from different mRNA isoforms. To tackle the model unidentifiability issue, SLIDE uses a modified Lasso procedure for parameter estimation. Compared with deterministic isoform assembly algorithms (e.g., Cufflinks), SLIDE considers the stochastic aspects of RNA-Seq reads in exons from different isoforms and thus has increased power in detecting more novel isoforms. Another advantage of SLIDE is its flexibility of incorporating other transcriptomic data such as RACE, CAGE, and EST into its model to further increase isoform discovery accuracy. SLIDE can also work downstream of other RNA-Seq assembly algorithms to integrate newly discovered genes and exons. Besides isoform discovery, SLIDE sequentially uses the same linear model to estimate the abundance of discovered isoforms. Simulation and real data studies show that SLIDE performs as well as or better than major competitors in both isoform discovery and abundance estimation. The SLIDE software package is available at https://sites.google.com/site/jingyijli/SLIDE.zip.",2011-12-01 +23202747,ToxPi GUI: an interactive visualization tool for transparent integration of data from diverse sources of evidence.,"

Motivation

Scientists and regulators are often faced with complex decisions, where use of scarce resources must be prioritized using collections of diverse information. The Toxicological Prioritization Index (ToxPi™) was developed to enable integration of multiple sources of evidence on exposure and/or safety, transformed into transparent visual rankings to facilitate decision making. The rankings and associated graphical profiles can be used to prioritize resources in various decision contexts, such as testing chemical toxicity or assessing similarity of predicted compound bioactivity profiles. The amount and types of information available to decision makers are increasing exponentially, while the complex decisions must rely on specialized domain knowledge across multiple criteria of varying importance. Thus, the ToxPi bridges a gap, combining rigorous aggregation of evidence with ease of communication to stakeholders.

Results

An interactive ToxPi graphical user interface (GUI) application has been implemented to allow straightforward decision support across a variety of decision-making contexts in environmental health. The GUI allows users to easily import and recombine data, then analyze, visualize, highlight, export and communicate ToxPi results. It also provides a statistical metric of stability for both individual ToxPi scores and relative prioritized ranks.

Availability

The ToxPi GUI application, complete user manual and example data files are freely available from http://comptox.unc.edu/toxpi.php.",2012-11-29 +21266471,A novel compression tool for efficient storage of genome resequencing data.,"With the advent of DNA sequencing technologies, more and more reference genome sequences are available for many organisms. Analyzing sequence variation and understanding its biological importance are becoming a major research aim. However, how to store and process the huge amount of eukaryotic genome data, such as those of the human, mouse and rice, has become a challenge to biologists. Currently available bioinformatics tools used to compress genome sequence data have some limitations, such as the requirement of the reference single nucleotide polymorphisms (SNPs) map and information on deletions and insertions. Here, we present a novel compression tool for storing and analyzing Genome ReSequencing data, named GRS. GRS is able to process the genome sequence data without the use of the reference SNPs and other sequence variation information and automatically rebuild the individual genome sequence data using the reference genome sequence. When its performance was tested on the first Korean personal genome sequence data set, GRS was able to achieve ∼159-fold compression, reducing the size of the data from 2986.8 to 18.8 MB. While being tested against the sequencing data from rice and Arabidopsis thaliana, GRS compressed the 361.0 MB rice genome data to 4.4 MB, and the A. thaliana genome data from 115.1 MB to 6.5 KB. This de novo compression tool is available at http://gmdd.shgmo.org/Computational-Biology/GRS.",2011-01-25 +21487532,SOLiDzipper: A High Speed Encoding Method for the Next-Generation Sequencing Data.,"

Background

Next-generation sequencing (NGS) methods pose computational challenges of handling large volumes of data. Although cloud computing offers a potential solution to these challenges, transferring a large data set across the internet is the biggest obstacle, which may be overcome by efficient encoding methods. When encoding is used to facilitate data transfer to the cloud, the time factor is equally as important as the encoding efficiency. Moreover, to take advantage of parallel processing in cloud computing, a parallel technique to decode and split compressed data in the cloud is essential. Hence in this review, we present SOLiDzipper, a new encoding method for NGS data.

Methods

The basic strategy of SOLiDzipper is to divide and encode. NGS data files contain both the sequence and non-sequence information whose encoding efficiencies are different. In SOLiDzipper, encoded data are stored in binary data block that does not contain the characteristic information of a specific sequence platform, which means that data can be decoded according to a desired platform even in cases of Illumina, Solexa or Roche 454 data.

Results

The main calculation time using Crossbow was 173 minutes when 40 EC2 nodes were involved. In that case, an analysis preparation time of 464 minutes is required to encode data in the latest DNA compression method like G-SQZ and transmit it on a 183 Mbit/s bandwidth. However, it takes 194 minutes to encode and transmit data with SOLiDzipper under the same bandwidth conditions. These results indicate that the entire processing time can be reduced according to the encoding methods used, under the same network bandwidth conditions. Considering the limited network bandwidth, high-speed, high-efficiency encoding methods such as SOLiDzipper can make a significant contribution to higher productivity in labs seeking to take advantage of the cloud as an alternative to local computing.

Availability

http://szipper.dinfree.com. Academic/non-profit: Binary available for direct download at no cost. For-profit: Submit request for for-profit license from the web-site.",2011-03-10 +22697456,QuaMeter: multivendor performance metrics for LC-MS/MS proteomics instrumentation.,"LC-MS/MS-based proteomics studies rely on stable analytical system performance that can be evaluated by objective criteria. The National Institute of Standards and Technology (NIST) introduced the MSQC software to compute diverse metrics from experimental LC-MS/MS data, enabling quality analysis and quality control (QA/QC) of proteomics instrumentation. In practice, however, several attributes of the MSQC software prevent its use for routine instrument monitoring. Here, we present QuaMeter, an open-source tool that improves MSQC in several aspects. QuaMeter can directly read raw data from instruments manufactured by different vendors. The software can work with a wide variety of peptide identification software for improved reliability and flexibility. Finally, QC metrics implemented in QuaMeter are rigorously defined and tested. The source code and binary versions of QuaMeter are available under Apache 2.0 License at http://fenchurch.mc.vanderbilt.edu.",2012-06-27 +23483122,Isolation and identification of Geosmithia argillacea from a fungal ball in the lung of a tuberculosis patient.,"Geosmithia argillacea, an anamorph of Talaromyces eburneus, is a thermophilic filamentous fungus that has a phenotype similar to that of the Penicillium species, except for the creamy-white colonies and cylindrical conidia. Recently, a new genus called Rasamsonia has been proposed, which is to accommodate the Talaromyces and Geosmithia species. Here, we report the first Korean case of G. argillacea isolated from a patient with a fungal ball. The patient was a 44-yr-old Korean man with a history of pulmonary tuberculosis and aspergilloma. The newly developed fungal ball in his lung was removed and cultured to identify the fungus. The fungal colonies were white and slow-growing, and the filaments resembled those of Penicillium. Molecular identification was carried out by sequencing the internal transcribed spacer (ITS) region of the 28S rDNA and the β-tubulin genes. A comparative sequence analysis using the GenBank (http://blast.ncbi.nlm.nih.gov/) database was performed with the basic local alignment search tool (BLAST) algorithm. The results revealed a 97-100% similarity with the G. argillacea ITS sequence. This case should increase awareness among physicians about the pathogenic potential of G. argillacea in humans and help them accurately identify this fungus, because it can be easily confused with Penicillium and Paecilomyces species owing to their similar phenotypic and microscopic characteristics. A molecular approach should be employed to enable accurate identification of G. argillacea.",2013-02-21 +23825368,MMuFLR: missense mutation and frameshift location reporter.,"

Motivation

Cancer researchers seeking immunotherapy targets in cancer cells need tools to locate highly expressed proteins unique to cancer cells. Missense mutation and frameshift location reporter (MMuFLR), a Galaxy-based workflow, analyzes next-generation sequencing paired read RNA-seq output to reliably identify small frameshift mutations and missense mutations in highly expressed protein-coding genes. MMuFLR ignores known SNPs, low quality reads and poly-A/T sequences. For each frameshift and missense mutation identified, MMuFLR provides the location and sequence of the amino acid substitutions in the novel protein candidates for direct input into epitope evaluation tools.

Availability

http://toolshed.g2.bx.psu.edu/

Contact

rath0096@umn.edu or johns198@umn.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-03 +21699401,A topology-preserving selection and clustering approach to multidimensional biological data.,"Multidimensional genome-wide data (e.g., gene expression microarray data) provide rich information and widespread applications in integrative biology. However, little attention has been paid to the inherent relationships within these natural data. By simply viewing multidimensional microarray data scattered over hyperspace, the spatial properties (topological structure) of the data clouds may reveal the underlying relationships. Based on this idea, we herein make analytical improvements by introducing a topology-preserving selection and clustering (TPSC) approach to complex large-scale microarray data. Specifically, the integration of self-organizing map (SOM) and singular value decomposition allows genome-wide selection on sound foundations of statistical inference. Moreover, this approach is complemented with an SOM-based two-phase gene clustering procedure, allowing the topology-preserving identification of gene clusters. These gene clusters with highly similar expression patterns can facilitate many aspects of biological interpretations in terms of functional and regulatory relevance. As demonstrated by processing large and complex datasets of the human cell cycle, stress responses, and host cell responses to pathogen infection, our proposed method can yield better characteristic features from the whole datasets compared to conventional routines. We hence conclude that the topology-preserving selection and clustering without a priori assumption on data structure allow the in-depth mining of biological information in a more accurate and unbiased manner. A Web server ( http://www.cs.bris.ac.uk/∼hfang/TPSC ) hosting a MATLAB package that implements the methodology is freely available to both academic and nonacademic users. These advances will expand the scope of omics applications.",2011-06-23 +23437146,Hierarchical classification of protein folds using a novel ensemble classifier.,"The analysis of biological information from protein sequences is important for the study of cellular functions and interactions, and protein fold recognition plays a key role in the prediction of protein structures. Unfortunately, the prediction of protein fold patterns is challenging due to the existence of compound protein structures. Here, we processed the latest release of the Structural Classification of Proteins (SCOP, version 1.75) database and exploited novel techniques to impressively increase the accuracy of protein fold classification. The techniques proposed in this paper include ensemble classifying and a hierarchical framework, in the first layer of which similar or redundant sequences were deleted in two manners; a set of base classifiers, fused by various selection strategies, divides the input into seven classes; in the second layer of which, an analogous ensemble method is adopted to predict all protein folds. To our knowledge, it is the first time all protein folds can be intelligently detected hierarchically. Compared with prior studies, our experimental results demonstrated the efficiency and effectiveness of our proposed method, which achieved a success rate of 74.21%, which is much higher than results obtained with previous methods (ranging from 45.6% to 70.5%). When applied to the second layer of classification, the prediction accuracy was in the range between 23.13% and 46.05%. This value, which may not be remarkably high, is scientifically admirable and encouraging as compared to the relatively low counts of proteins from most fold recognition programs. The web server Hierarchical Protein Fold Prediction (HPFP) is available at http://datamining.xmu.edu.cn/software/hpfp.",2013-02-20 +24743329,FANSe2: a robust and cost-efficient alignment tool for quantitative next-generation sequencing applications.,"Correct and bias-free interpretation of the deep sequencing data is inevitably dependent on the complete mapping of all mappable reads to the reference sequence, especially for quantitative RNA-seq applications. Seed-based algorithms are generally slow but robust, while Burrows-Wheeler Transform (BWT) based algorithms are fast but less robust. To have both advantages, we developed an algorithm FANSe2 with iterative mapping strategy based on the statistics of real-world sequencing error distribution to substantially accelerate the mapping without compromising the accuracy. Its sensitivity and accuracy are higher than the BWT-based algorithms in the tests using both prokaryotic and eukaryotic sequencing datasets. The gene identification results of FANSe2 is experimentally validated, while the previous algorithms have false positives and false negatives. FANSe2 showed remarkably better consistency to the microarray than most other algorithms in terms of gene expression quantifications. We implemented a scalable and almost maintenance-free parallelization method that can utilize the computational power of multiple office computers, a novel feature not present in any other mainstream algorithm. With three normal office computers, we demonstrated that FANSe2 mapped an RNA-seq dataset generated from an entire Illunima HiSeq 2000 flowcell (8 lanes, 608 M reads) to masked human genome within 4.1 hours with higher sensitivity than Bowtie/Bowtie2. FANSe2 thus provides robust accuracy, full indel sensitivity, fast speed, versatile compatibility and economical computational utilization, making it a useful and practical tool for deep sequencing applications. FANSe2 is freely available at http://bioinformatics.jnu.edu.cn/software/fanse2/.",2014-04-17 +24439719,A pilot study on peak systolic velocity monitoring of fetal anemia after administration of chemotherapy during pregnancy.,"

Objectives

To monitor fetal anemia during administration of chemotherapy to the fetus's mother.

Study design

Between 2007 and 2012 six patients with malignancy diagnosed during pregnancy were included in our prospective study. For evaluation of fetal anemia, peak systolic velocimetry (PSV) of the middle cerebral artery is considered the best method. The patients were repeatedly examined one day before and on the third day after the administration of chemotherapy. At least three measurements were performed and the highest value was used as appropriate. Multiples of the median (MoM) were calculated using the website http://www.perinatology.com/calculators/MCA.htm. When the MoM reached 1.29, moderate anemia was diagnosed.

Results

The women's average age was 30 years. The average gestational age at diagnosis was 20.7 weeks of pregnancy. Borderline fetal anemia was detected in only in one patient. After delivery newborns were examined by standard pediatric evaluation and blood count was provided. There was no evidence of any newborn anemia.

Conclusions

Chemotherapy administered during pregnancy is becoming more frequent due to increasing knowledge and data on such cases. Close monitoring of the fetus should be performed in specialized centers. For detection of chemotherapy-induced anemia, PSV measurement should be employed.",2013-12-24 +23712658,ChemMapper: a versatile web server for exploring pharmacology and chemical structure association based on molecular 3D similarity method.,"

Summary

ChemMapper is an online platform to predict polypharmacology effect and mode of action for small molecules based on 3D similarity computation. ChemMapper collects >350 000 chemical structures with bioactivities and associated target annotations (as well as >3 000 000 non-annotated compounds for virtual screening). Taking the user-provided chemical structure as the query, the top most similar compounds in terms of 3D similarity are returned with associated pharmacology annotations. ChemMapper is designed to provide versatile services in a variety of chemogenomics, drug repurposing, polypharmacology, novel bioactive compounds identification and scaffold hopping studies.

Availability

http://lilab.ecust.edu.cn/chemmapper/.

Contact

xfliu@ecust.edu.cn or hlli@ecust.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-27 +22245546,"Dynamo: a flexible, user-friendly development tool for subtomogram averaging of cryo-EM data in high-performance computing environments.","Dynamo is a new software package for subtomogram averaging of cryo Electron Tomography (cryo-ET) data with three main goals: first, Dynamo allows user-transparent adaptation to a variety of high-performance computing platforms such as GPUs or CPU clusters. Second, Dynamo implements user-friendliness through GUI interfaces and scripting resources. Third, Dynamo offers user-flexibility through a plugin API. Besides the alignment and averaging procedures, Dynamo includes native tools for visualization and analysis of results and data, as well as support for third party visualization software, such as Chimera UCSF or EMAN2. As a demonstration of these functionalities, we studied bacterial flagellar motors and showed automatically detected classes with absent and present C-rings. Subtomogram averaging is a common task in current cryo-ET pipelines, which requires extensive computational resources and follows a well-established workflow. However, due to the data diversity, many existing packages offer slight variations of the same algorithm to improve results. One of the main purposes behind Dynamo is to provide explicit tools to allow the user the insertion of custom designed procedures - or plugins - to replace or complement the native algorithms in the different steps of the processing pipeline for subtomogram averaging without the burden of handling parallelization. Custom scripts that implement new approaches devised by the user are integrated into the Dynamo data management system, so that they can be controlled by the GUI or the scripting capacities. Dynamo executables do not require licenses for third party commercial software. Sources, executables and documentation are freely distributed on http://www.dynamo-em.org.",2012-01-08 +24209380,Selective of informative metabolites using random forests based on model population analysis.,"One of the main goals of metabolomics studies is to discover informative metabolites or biomarkers, which may be used to diagnose diseases and to find out pathology. Sophisticated feature selection approaches are required to extract the information hidden in such complex 'omics' data. In this study, it is proposed a new and robust selective method by combining random forests (RF) with model population analysis (MPA), for selecting informative metabolites from three metabolomic datasets. According to the contribution to the classification accuracy, the metabolites were classified into three kinds: informative, no-informative, and interfering metabolites. Based on the proposed method, some informative metabolites were selected for three datasets; further analyses of these metabolites between healthy and diseased groups were then performed, showing by T-test that the P values for all these selected metabolites were lower than 0.05. Moreover, the informative metabolites identified by the current method were demonstrated to be correlated with the clinical outcome under investigation. The source codes of MPA-RF in Matlab can be freely downloaded from http://code.google.com/p/my-research-list/downloads/list.",2013-10-03 +22084010,A new genotype calling method for affymetrix SNP arrays.,"Current genotype-calling methods such as Robust Linear Model with Mahalanobis Distance Classifier (RLMM) and Corrected Robust Linear Model with Maximum Likelihood Classification (CRLMM) provide accurate calling results for Affymetrix Single Nucleotide Polymorphisms (SNP) chips. However, these methods are computationally expensive as they employ preprocess procedures, including chip data normalization and other sophisticated statistical techniques. In the small sample case the accuracy rate may drop significantly. We develop a new genotype calling method for Affymetrix 100 k and 500 k SNP chips. A two-stage classification scheme is proposed to obtain a fast genotype calling algorithm. The first stage uses unsupervised classification to quickly discriminate genotypes with high accuracy for the majority of the SNPs. And the second stage employs a supervised classification method to incorporate allele frequency information either from the HapMap data or from a self-training scheme. Confidence score is provided for every genotype call. The overall performance is shown to be comparable to that of CRLMM as verified by the known gold standard HapMap data and is superior in small sample cases. The new algorithm is computationally simple and standalone in the sense that a self-training scheme can be used without employing any other training data. A package implementing the calling algorithm is freely available at http://www.sfs.ecnu.edu.cn/teachers/xuj_en.html.",2011-12-01 +22451269,CpGassoc: an R function for analysis of DNA methylation microarray data.,"

Summary

With the increasing availability of high-density methylation microarrays, there has been growing interest in analysis of DNA methylation data. We have developed CpGassoc, an R package that can efficiently perform the statistical analysis needed for increasingly large methylation datasets. CpGassoc is a modular, expandable package with functions to perform rapid analyses of DNA methylation data via fixed or mixed effects models, to perform basic quality control, to carry out permutation tests, and to display results via an array of publication-quality plots.

Availability and implementation

CpGassoc is implemented in R and is freely available at http://genetics.emory.edu/conneely; we are in the process of submitting it to CRAN.",2012-03-25 +24593846,Upregulation of microRNA-25 associates with prognosis in hepatocellular carcinoma.,"

Background

Accumulating evidence has shown that up-regulation of microRNA-25(miR-25) is associated with the prognosis of several types of human malignant solid tumors. However, whether miR-25 expression has influence on the prognosis of hepatocellular carcinoma (HCC) is still unknown.

Methods

The differentially expressed amount of the miR-25 was validated in triplicate by quantitative reverse-transcription polymerase chain reaction (qRT-PCR). Survival rate was analyzed by log-rank test, and survival curves were plotted according to Kaplan-Meier. Multivariate analysis of the prognostic factors was performed with Cox regression model.

Results

The expression of miR-25 was significantly upregulated in HCC tissues when compared with adjacent normal tissues (p<0.0001). Patients who had high miR-25 expression had a shorter overall survival than patients who had low miR-25 expression (median overall survival, 31.0 months versus 42.9 months, p=0.0192). The multivariate Cox regression analysis indicated that miR-25 expression (HR=2.179; p=0.001), TNM stage (HR=1.782; p=0.014), and vein invasion (HR=1.624; p=0.020) were independent prognostic factors for overall survival.

Conclusion

Our data suggests that the overexpression of miR-25 in HCC tissues is of predictive value on poor prognosis.

Virtual slide

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1989618421114309.",2014-03-04 +24599324,MOSAIK: a hash-based algorithm for accurate next-generation sequencing short-read mapping.,"MOSAIK is a stable, sensitive and open-source program for mapping second and third-generation sequencing reads to a reference genome. Uniquely among current mapping tools, MOSAIK can align reads generated by all the major sequencing technologies, including Illumina, Applied Biosystems SOLiD, Roche 454, Ion Torrent and Pacific BioSciences SMRT. Indeed, MOSAIK was the only aligner to provide consistent mappings for all the generated data (sequencing technologies, low-coverage and exome) in the 1000 Genomes Project. To provide highly accurate alignments, MOSAIK employs a hash clustering strategy coupled with the Smith-Waterman algorithm. This method is well-suited to capture mismatches as well as short insertions and deletions. To support the growing interest in larger structural variant (SV) discovery, MOSAIK provides explicit support for handling known-sequence SVs, e.g. mobile element insertions (MEIs) as well as generating outputs tailored to aid in SV discovery. All variant discovery benefits from an accurate description of the read placement confidence. To this end, MOSAIK uses a neural-network based training scheme to provide well-calibrated mapping quality scores, demonstrated by a correlation coefficient between MOSAIK assigned and actual mapping qualities greater than 0.98. In order to ensure that studies of any genome are supported, a training pipeline is provided to ensure optimal mapping quality scores for the genome under investigation. MOSAIK is multi-threaded, open source, and incorporated into our command and pipeline launcher system GKNO (http://gkno.me).",2014-03-05 +22340508,Automatic assessment of the motor state of the Parkinson's disease patient--a case study.,"This paper presents a novel methodology in which the Unified Parkinson's Disease Rating Scale (UPDRS) data processed with a rule-based decision algorithm is used to predict the state of the Parkinson's Disease patients. The research was carried out to investigate whether the advancement of the Parkinson's Disease can be automatically assessed. For this purpose, past and current UPDRS data from 47 subjects were examined. The results show that, among other classifiers, the rough set-based decision algorithm turned out to be most suitable for such automatic assessment.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1563339375633634.",2012-02-19 +23658222,CABS-flex: Server for fast simulation of protein structure fluctuations.,"The CABS-flex server (http://biocomp.chem.uw.edu.pl/CABSflex) implements CABS-model-based protocol for the fast simulations of near-native dynamics of globular proteins. In this application, the CABS model was shown to be a computationally efficient alternative to all-atom molecular dynamics--a classical simulation approach. The simulation method has been validated on a large set of molecular dynamics simulation data. Using a single input (user-provided file in PDB format), the CABS-flex server outputs an ensemble of protein models (in all-atom PDB format) reflecting the flexibility of the input structure, together with the accompanying analysis (residue mean-square-fluctuation profile and others). The ensemble of predicted models can be used in structure-based studies of protein functions and interactions.",2013-05-08 +24248751,High-fertility phenotypes: two outbred mouse models exhibit substantially different molecular and physiological strategies warranting improved fertility.,"Animal models are valuable tools in fertility research. Worldwide, there are more than 400 transgenic or knockout mouse models available showing a reproductive phenotype; almost all of them exhibit an infertile or at least subfertile phenotype. By contrast, animal models revealing an improved fertility phenotype are barely described. This article summarizes data on two outbred mouse models exhibiting a 'high-fertility' phenotype. These mouse lines were generated via selection over a time period of more than 40 years and 161 generations. During this selection period, the number of offspring per litter and the total birth weight of the entire litter nearly doubled. Concomitantly with the increased fertility phenotype, several endocrine parameters (e.g. serum testosterone concentrations in male animals), physiological parameters (e.g. body weight, accelerated puberty, and life expectancy), and behavioral parameters (e.g. behavior in an open field and endurance fitness on a treadmill) were altered. We demonstrate that the two independently bred high-fertility mouse lines warranted their improved fertility phenotype using different molecular and physiological strategies. The fertility lines display female- as well as male-specific characteristics. These genetically heterogeneous mouse models provide new insights into molecular and cellular mechanisms that enhance fertility. In view of decreasing fertility in men, these models will therefore be a precious information source for human reproductive medicine. Translated abstract A German translation of abstract is freely available at http://www.reproduction-online.org/content/147/4/427/suppl/DC1.",2014-03-02 +22813356,LXtoo: an integrated live Linux distribution for the bioinformatics community.,"

Background

Recent advances in high-throughput technologies dramatically increase biological data generation. However, many research groups lack computing facilities and specialists. This is an obstacle that remains to be addressed. Here, we present a Linux distribution, LXtoo, to provide a flexible computing platform for bioinformatics analysis.

Findings

Unlike most of the existing live Linux distributions for bioinformatics limiting their usage to sequence analysis and protein structure prediction, LXtoo incorporates a comprehensive collection of bioinformatics software, including data mining tools for microarray and proteomics, protein-protein interaction analysis, and computationally complex tasks like molecular dynamics. Moreover, most of the programs have been configured and optimized for high performance computing.

Conclusions

LXtoo aims to provide well-supported computing environment tailored for bioinformatics research, reducing duplication of efforts in building computing infrastructure. LXtoo is distributed as a Live DVD and freely available at http://bioinformatics.jnu.edu.cn/LXtoo.",2012-07-19 +22039183,Changing trends in the UK management of childhood ITP.,"

Objective

To compare the treatment of childhood immune thrombocytopenia (ITP) with historical practice in the UK.

Design

Data collection through a national UK Childhood ITP registry (http://www.uk-itp.org) started in January 2007.

Settings

UK hospitals.

Participants

Children admitted with a new diagnosis of acute ITP and their treating physicians.

Main outcome measures

Bleeding severity, platelet count, disease management and outcome from the time of presentation to 6 months.

Results

Data from 225 children were analysed. 54% of children had clinically mild, 42% had moderate and 4% had severe disease as defined previously. The mean platelet counts at diagnosis for these groups were 14, 8 and 6×10(9)/l respectively. Children with mild disease had fewer bleeding sites (1.9), compared with moderate (2.5) or severe disease (3.6). There was one intracranial haemorrhage reported. At 6 months' follow-up, 32% had a persistent platelet count <150×10(9)/l, but only 4.8% had a count <20. The proportion of UK children receiving platelet-raising treatment was noted to decrease from 61% in 1995 to 38% in 2000. The current UK 2009 registry data show a continued decrease in treatment to 16% of all the children. In contrast, historical international data report 69% of children receiving interventional therapy.

Conclusion

The current UK practice has shown a continued reduction in the number of children receiving treatment in comparison with historical data and international practice.",2011-10-28 +25072489,Adaptive scenarios: a training model for today's public health workforce.,"

Introduction

With the current economic climate, money for training is scarce. In addition, time is a major barrier to participation in trainings. To meet the public health workforce's rising demand for training, while struggling with less time and fewer resources, the Upper Midwest Preparedness and Emergency Response Learning Center has developed a model of online training that provides the public health workforce with individually customized, needs-based training experiences.

Background/rationale

Adaptive scenarios are rooted in case-based reasoning, a learning approach that focuses on the specific knowledge needed to solve a problem. Proponents of case-based reasoning argue that learners benefit from being able to remember previous similar situations and reusing information and knowledge from that situation. Adaptive scenarios based on true-to-life job performance provide an opportunity to assess skills by presenting the user with choices to make in a problem-solving context.

Methods/activities

A team approach was used to develop the adaptive scenarios. Storylines were developed that incorporated situations aligning with the knowledge, skills, and attitudes outlined in the Public Health Preparedness and Response Core Competency Model. This article examines 2 adaptive scenarios: ""Ready or Not? A Family Preparedness Scenario"" and ""Responding to a Crisis: Managing Emotions and Stress Scenario.""

Results/outcomes

The scenarios are available on Upper Midwest Preparedness and Emergency Response Learning Center's Learning Management System, the Training Source (http://training-source.org). Evaluation data indicate that users' experiences have been positive.

Discussion

Integrating the assessment and training elements of the scenarios so that the training experience is uniquely adaptive to each user is one of the most efficient ways to provide training. The opportunity to provide individualized, needs-based training without having to administer separate assessments has the potential to save time and resources.

Lessons learned/next steps

These adaptive scenarios continue to be marketed to target audiences through partner organizations, various Web sites, electronic newsletters, and social media. Next steps include the implementation of a 6-month follow-up evaluation, using Kirkpatrick level III. Kirkpatrick level III evaluation measures whether there was actual transfer of learning to the work setting.",2014-09-01 +23457606,Barcode server: a visualization-based genome analysis system.,"We have previously developed a computational method for representing a genome as a barcode image, which makes various genomic features visually apparent. We have demonstrated that this visual capability has made some challenging genome analysis problems relatively easy to solve. We have applied this capability to a number of challenging problems, including (a) identification of horizontally transferred genes, (b) identification of genomic islands with special properties and (c) binning of metagenomic sequences, and achieved highly encouraging results. These application results inspired us to develop this barcode-based genome analysis server for public service, which supports the following capabilities: (a) calculation of the k-mer based barcode image for a provided DNA sequence; (b) detection of sequence fragments in a given genome with distinct barcodes from those of the majority of the genome, (c) clustering of provided DNA sequences into groups having similar barcodes; and (d) homology-based search using Blast against a genome database for any selected genomic regions deemed to have interesting barcodes. The barcode server provides a job management capability, allowing processing of a large number of analysis jobs for barcode-based comparative genome analyses. The barcode server is accessible at http://csbl1.bmb.uga.edu/Barcode.",2013-02-15 +21419654,Assessment of trending ability of cardiac output monitors by polar plot methodology.,"

Objectives

To develop a valid statistical method of showing acceptable cardiac output (CO) trending ability when new CO monitors are compared to a reference standard, such as thermodilution, using polar coordinates.

Design

Developing a new statistical analytic method using historic data.

Setting

University Hospital Anesthesia and Intensive Care Department.

Participants

Data taken from previously published CO validation studies.

Interventions

Cartesian data were reanalyzed, being uplifted using Data Thief 3.0 software (http://datathief.org/). Polar plots were constructed from this data. Central zone data (<0.5 L/min or <10% change) were excluded because they introduced statistical noise. Trial polar criteria were set using data from a study that compared 5 CO monitors against thermodilution. Then, these criteria were further validated using data extracted from 15 other studies. Mean (95% confidence intervals) polar angles were used.

Measurements and main results

Trial data suggest ±5° (angle) ±30° (95% confidence interval) as acceptance limits. Concordance rates (ie, >95%-90%) from 5 articles supported trending, and polar data from these studies concurred with the authors' pilot criteria. Favorable comments on trending also were found in 8 of 15 articles in which radial limits were less than ±32°. Good calibration was associated with a mean polar angle of less than ±5°.

Conclusions

Polar plots can be used to show the trending ability of CO monitors in comparative validation studies. They overcome the deficiencies of concordance analysis, which uses the direction of change as a statistic and ignores the magnitude of change in CO.",2011-03-17 +22815360,DanteR: an extensible R-based tool for quantitative analysis of -omics data.,"

Motivation

The size and complex nature of mass spectrometry-based proteomics datasets motivate development of specialized software for statistical data analysis and exploration. We present DanteR, a graphical R package that features extensive statistical and diagnostic functions for quantitative proteomics data analysis, including normalization, imputation, hypothesis testing, interactive visualization and peptide-to-protein rollup. More importantly, users can easily extend the existing functionality by including their own algorithms under the Add-On tab.

Availability

DanteR and its associated user guide are available for download free of charge at http://omics.pnl.gov/software/. We have an updated binary source for the DanteR package up on our website together with a vignettes document. For Windows, a single click automatically installs DanteR along with the R programming environment. For Linux and Mac OS X, users must install R and then follow instructions on the DanteR website for package installation.

Contact

rds@pnnl.gov.",2012-07-19 +23411719,YY1TargetDB: an integral information resource for Yin Yang 1 target loci.,"Yin Yang 1 (YY1), a ubiquitously expressed transcription factor, plays a critical role in regulating cell development, differentiation, cellular proliferation and tumorigenesis. Previous studies identified many YY1-regulated target genes in both human and mouse. Emerging global mapping by Chromatin ImmnoPrecipitation (ChIP)-based high-throughput experiments indicate that YY1 binds to a vast number of loci genome-wide. However, the information is widely scattered in many disparate poorly cross-indexed literatures; a large portion was only published recently by the ENCODE consortium with limited annotation. A centralized database, which annotates and organizes YY1-binding loci and target motifs in a systematic way with easy access, will be valuable resources for the research community. We therefore implemented a web-based YY1 Target loci Database (YY1TargetDB). This database contains YY1-binding loci (binding peaks) from ChIP-seq and ChIP-on-chip experiments, computationally predicated YY1 and cofactor motifs within each locus. It also collects the experimentally verified YY1-binding motifs from individual researchers. The current version of YY1TargetDB contains 92 314 binding loci identified by ChIP-based experiments; 157 200 YY1-binding motifs in which 42 are experimentally verified and 157 158 are computationally predicted; and 130 759 binding motifs for 47 cofactors. Database URL: http://www.myogenesisdb.org/YY1TargetDB.",2013-02-14 +22144253,SRMBuilder: a user-friendly tool for selected reaction monitoring data analysis.,"With high sensitivity and reproducibility, selected reaction monitoring (SRM) has become increasingly popular in proteome research for targeted quantification of low abundance proteins and post translational modification. SRM is also well accepted in other mass-spectrometry based research areas such as lipidomics and metabolomics, which necessitates the development of easy-to-use software for both post-acquisition SRM data analysis and quantification result validation. Here, we introduce a software tool SRMBuilder, which can automatically parse SRM data in multiple file formats, assign transitions to compounds, match light/heavy transition/compound pairs and provide a user-friendly graphic interface to manually validate the quantification result at transition/compound/sample level. SRMBuilder will greatly facilitate processing of the post-acquisition data files and validation of quantification result for SRM. The software can be downloaded for free from http://www.proteomics.ac.cn/software/proteomicstools/index.htm as part of the software suite ProteomicsTools.",2011-12-01 +24932669,Forcefield_NCAA: ab initio charge parameters to aid in the discovery and design of therapeutic proteins and peptides with unnatural amino acids and their application to complement inhibitors of the compstatin family.,"We describe the development and testing of ab initio derived, AMBER ff03 compatible charge parameters for a large library of 147 noncanonical amino acids including β- and N-methylated amino acids for use in applications such as protein structure prediction and de novo protein design. The charge parameter derivation was performed using the RESP fitting approach. Studies were performed assessing the suitability of the derived charge parameters in discriminating the activity/inactivity between 63 analogs of the complement inhibitor Compstatin on the basis of previously published experimental IC50 data and a screening procedure involving short simulations and binding free energy calculations. We found that both the approximate binding affinity (K*) and the binding free energy calculated through MM-GBSA are capable of discriminating between active and inactive Compstatin analogs, with MM-GBSA performing significantly better. Key interactions between the most potent Compstatin analog that contains a noncanonical amino acid are presented and compared to the most potent analog containing only natural amino acids and native Compstatin. We make the derived parameters and an associated web interface that is capable of performing modifications on proteins using Forcefield_NCAA and outputting AMBER-ready topology and parameter files freely available for academic use at http://selene.princeton.edu/FFNCAA . The forcefield allows one to incorporate these customized amino acids into design applications with control over size, van der Waals, and electrostatic interactions.",2014-01-14 +22611131,Gossamer--a resource-efficient de novo assembler.,"

Motivation

The de novo assembly of short read high-throughput sequencing data poses significant computational challenges. The volume of data is huge; the reads are tiny compared to the underlying sequence, and there are significant numbers of sequencing errors. There are numerous software packages that allow users to assemble short reads, but most are either limited to relatively small genomes (e.g. bacteria) or require large computing infrastructure or employ greedy algorithms and thus often do not yield high-quality results.

Results

We have developed Gossamer, an implementation of the de Bruijn approach to assembly that requires close to the theoretical minimum of memory, but still allows efficient processing. Our results show that it is space efficient and produces high-quality assemblies.

Availability

Gossamer is available for non-commercial use from http://www.genomics.csse.unimelb.edu.au/product-gossamer.php.",2012-05-18 +23398941,Genomic reconstruction of transcriptional regulatory networks in lactic acid bacteria.,"

Background

Genome scale annotation of regulatory interactions and reconstruction of regulatory networks are the crucial problems in bacterial genomics. The Lactobacillales order of bacteria collates various microorganisms having a large economic impact, including both human and animal pathogens and strains used in the food industry. Nonetheless, no systematic genome-wide analysis of transcriptional regulation has been previously made for this taxonomic group.

Results

A comparative genomics approach was used for reconstruction of transcriptional regulatory networks in 30 selected genomes of lactic acid bacteria. The inferred networks comprise regulons for 102 orthologous transcription factors (TFs), including 47 novel regulons for previously uncharacterized TFs. Numerous differences between regulatory networks of the Streptococcaceae and Lactobacillaceae groups were described on several levels. The two groups are characterized by substantially different sets of TFs encoded in their genomes. Content of the inferred regulons and structure of their cognate TF binding motifs differ for many orthologous TFs between the two groups. Multiple cases of non-orthologous displacements of TFs that control specific metabolic pathways were reported.

Conclusions

The reconstructed regulatory networks substantially expand the existing knowledge of transcriptional regulation in lactic acid bacteria. In each of 30 studied genomes the obtained regulatory network contains on average 36 TFs and 250 target genes that are mostly involved in carbohydrate metabolism, stress response, metal homeostasis and amino acids biosynthesis. The inferred networks can be used for genetic experiments, functional annotations of genes, metabolic reconstruction and evolutionary analysis. All reconstructed regulons are captured within the Streptococcaceae and Lactobacillaceae collections in the RegPrecise database (http://regprecise.lbl.gov).",2013-02-12 +23162087,Scaffolding low quality genomes using orthologous protein sequences.,"

Motivation

The ready availability of next-generation sequencing has led to a situation where it is easy to produce very fragmentary genome assemblies. We present a pipeline, SWiPS (Scaffolding With Protein Sequences), that uses orthologous proteins to improve low quality genome assemblies. The protein sequences are used as guides to scaffold existing contigs, while simultaneously allowing the gene structure to be predicted by homology.

Results

To perform, SWiPS does not depend on a high N50 or whole proteins being encoded on a single contig. We tested our algorithm on simulated next-generation data from Ciona intestinalis, real next-generation data from Drosophila melanogaster, a complex genome assembly of Homo sapiens and the low coverage Sanger sequence assembly of Callorhinchus milii. The improvements in N50 are of the order of ∼20% for the C.intestinalis and H.sapiens assemblies, which is significant, considering the large size of intergenic regions in these eukaryotes. Using the CEGMA pipeline to assess the gene space represented in the genome assemblies, the number of genes retrieved increased by >110% for C.milii and from 20 to 40% for C.intestinalis. The scaffold error rates are low: 85-90% of scaffolds are fully correct, and >95% of local contig joins are correct.

Availability

SWiPS is available freely for download at http://www.well.ox.ac.uk/∼yli142/swips.html.

Contact

yang.li@well.ox.ac.uk or copley@well.ox.ac.uk",2012-11-18 +23413436,SPINAL: scalable protein interaction network alignment.,"

Motivation

Given protein-protein interaction (PPI) networks of a pair of species, a pairwise global alignment corresponds to a one-to-one mapping between their proteins. Based on the presupposition that such a mapping provides pairs of functionally orthologous proteins accurately, the results of the alignment may then be used in comparative systems biology problems such as function prediction/verification or construction of evolutionary relationships.

Results

We show that the problem is NP-hard even for the case where the pair of networks are simply paths. We next provide a polynomial time heuristic algorithm, SPINAL, which consists of two main phases. In the first coarse-grained alignment phase, we construct all pairwise initial similarity scores based on pairwise local neighborhood matchings. Using the produced similarity scores, the fine-grained alignment phase produces the final one-to-one mapping by iteratively growing a locally improved solution subset. Both phases make use of the construction of neighborhood bipartite graphs and the contributors as a common primitive. We assess the performance of our algorithm on the PPI networks of yeast, fly, human and worm. We show that based on the accuracy measures used in relevant work, our method outperforms the state-of-the-art algorithms. Furthermore, our algorithm does not suffer from scalability issues, as such accurate results are achieved in reasonable running times as compared with the benchmark algorithms.

Availability

Supplementary Document, open source codes, useful scripts, all the experimental data and the results are freely available at http://code.google.com/p/spinal/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-14 +24346285,"BOADICEA breast cancer risk prediction model: updates to cancer incidences, tumour pathology and web interface.","

Background

The Breast and Ovarian Analysis of Disease Incidence and Carrier Estimation Algorithm (BOADICEA) is a risk prediction model that is used to compute probabilities of carrying mutations in the high-risk breast and ovarian cancer susceptibility genes BRCA1 and BRCA2, and to estimate the future risks of developing breast or ovarian cancer. In this paper, we describe updates to the BOADICEA model that extend its capabilities, make it easier to use in a clinical setting and yield more accurate predictions.

Methods

We describe: (1) updates to the statistical model to include cancer incidences from multiple populations; (2) updates to the distributions of tumour pathology characteristics using new data on BRCA1 and BRCA2 mutation carriers and women with breast cancer from the general population; (3) improvements to the computational efficiency of the algorithm so that risk calculations now run substantially faster; and (4) updates to the model's web interface to accommodate these new features and to make it easier to use in a clinical setting.

Results

We present results derived using the updated model, and demonstrate that the changes have a significant impact on risk predictions.

Conclusion

All updates have been implemented in a new version of the BOADICEA web interface that is now available for general use: http://ccge.medschl.cam.ac.uk/boadicea/.",2013-12-17 +24267822,HuPSON: the human physiology simulation ontology.,"

Background

Large biomedical simulation initiatives, such as the Virtual Physiological Human (VPH), are substantially dependent on controlled vocabularies to facilitate the exchange of information, of data and of models. Hindering these initiatives is a lack of a comprehensive ontology that covers the essential concepts of the simulation domain.

Results

We propose a first version of a newly constructed ontology, HuPSON, as a basis for shared semantics and interoperability of simulations, of models, of algorithms and of other resources in this domain. The ontology is based on the Basic Formal Ontology, and adheres to the MIREOT principles; the constructed ontology has been evaluated via structural features, competency questions and use case scenarios.The ontology is freely available at: http://www.scai.fraunhofer.de/en/business-research-areas/bioinformatics/downloads.html (owl files) and http://bishop.scai.fraunhofer.de/scaiview/ (browser).

Conclusions

HuPSON provides a framework for a) annotating simulation experiments, b) retrieving relevant information that are required for modelling, c) enabling interoperability of algorithmic approaches used in biomedical simulation, d) comparing simulation results and e) linking knowledge-based approaches to simulation-based approaches. It is meant to foster a more rapid uptake of semantic technologies in the modelling and simulation domain, with particular focus on the VPH domain.",2013-11-22 +22743225,Targeted retrieval of gene expression measurements using regulatory models.,"

Motivation

Large public repositories of gene expression measurements offer the opportunity to position a new experiment into the context of earlier studies. While previous methods rely on experimental annotation or global similarity of expression profiles across genes or gene sets, we compare experiments by measuring similarity based on an unsupervised, data-driven regulatory model around pre-specified genes of interest. Our experiment retrieval approach is novel in two conceptual respects: (i) targetable focus and interpretability: the analysis is targeted at regulatory relationships of genes that are relevant to the analyst or come from prior knowledge; (ii) regulatory model-based similarity measure: related experiments are retrieved based on the strength of inferred regulatory links between genes.

Results

We learn a model for the regulation of specific genes from a data repository and exploit it to construct a similarity metric for an information retrieval task. We use the Fisher kernel, a rigorous similarity measure that typically has been applied to use generative models in discriminative classifiers. Results on human and plant microarray collections indicate that our method is able to substantially improve the retrieval of related experiments against standard methods. Furthermore, it allows the user to interpret biological conditions in terms of changes in link activity patterns. Our study of the osmotic stress network for Arabidopsis thaliana shows that the method successfully identifies relevant relationships around given key genes.

Availability

The code (R) is available at http://research.ics.tkk.fi/mi/software.shtml.

Contact

elisabeth.georgii@aalto.fi; jarkko.salojarvi@helsinki.fi; samuel.kaski@hiit.fi

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-06-27 +23411029,"The ""Vampirome"": Transcriptome and proteome analysis of the principal and accessory submaxillary glands of the vampire bat Desmodus rotundus, a vector of human rabies.","Vampire bats are notorious for being the sole mammals that strictly feed on fresh blood for their survival. While their saliva has been historically associated with anticoagulants, only one antihemostatic (plasminogen activator) has been molecularly and functionally characterized. Here, RNAs from both principal and accessory submaxillary (submandibular) salivary glands of Desmodus rotundus were extracted, and ~200 million reads were sequenced by Illumina. The principal gland was enriched with plasminogen activators with fibrinolytic properties, members of lipocalin and secretoglobin families, which bind prohemostatic prostaglandins, and endonucleases, which cleave neutrophil-derived procoagulant NETs. Anticoagulant (tissue factor pathway inhibitor, TFPI), vasodilators (PACAP and C-natriuretic peptide), and metalloproteases (ADAMTS-1) were also abundantly expressed. Members of the TSG-6 (anti-inflammatory), antigen 5/CRISP, and CCL28-like (antimicrobial) protein families were also sequenced. Apyrases (which remove platelet agonist ADP), phosphatases (which degrade procoagulant polyphosphates), and sphingomyelinase were found at lower transcriptional levels. Accessory glands were enriched with antimicrobials (lysozyme, defensin, lactotransferrin) and protease inhibitors (TIL-domain, cystatin, Kazal). Mucins, heme-oxygenase, and IgG chains were present in both glands. Proteome analysis by nano LC-MS/MS confirmed that several transcripts are expressed in the glands. The database presented herein is accessible online at http://exon.niaid.nih.gov/transcriptome/D_rotundus/Supplemental-web.xlsx. These results reveal that bat saliva emerges as a novel source of modulators of vascular biology.

Biological significance

Vampire bat saliva emerges as a novel source of antihemostatics which modulate several aspects of vascular biology.",2013-02-11 +23401504,Expression profiling of mouse subplate reveals a dynamic gene network and disease association with autism and schizophrenia.,"The subplate zone is a highly dynamic transient sector of the developing cerebral cortex that contains some of the earliest generated neurons and the first functional synapses of the cerebral cortex. Subplate cells have important functions in early establishment and maturation of thalamocortical connections, as well as in the development of inhibitory cortical circuits in sensory areas. So far no role has been identified for cells in the subplate in the mature brain and disease association of the subplate-specific genes has not been analyzed systematically. Here we present gene expression evidence for distinct roles of the mouse subplate across development as well as unique molecular markers to extend the repertoire of subplate labels. Performing systematic comparisons between different ages (embryonic days 15 and 18, postnatal day 8, and adult), we reveal the dynamic and constant features of the markers labeling subplate cells during embryonic and early postnatal development and in the adult. This can be visualized using the online database of subplate gene expression at https://molnar.dpag.ox.ac.uk/subplate/. We also identify embryonic similarities in gene expression between the ventricular zones, intermediate zone, and subplate, and distinct postnatal similarities between subplate, layer 5, and layers 2/3. The genes expressed in a subplate-specific manner at some point during development show a statistically significant enrichment for association with autism spectrum disorders and schizophrenia. Our report emphasizes the importance of the study of transient features of the developing brain to better understand neurodevelopmental disorders.",2013-02-11 +24076250,BaCoCa--a heuristic software tool for the parallel assessment of sequence biases in hundreds of gene and taxon partitions.,"BaCoCa (BAse COmposition CAlculator) is a user-friendly software that combines multiple statistical approaches (like RCFV and C value calculations) to identify biases in aligned sequence data which potentially mislead phylogenetic reconstructions. As a result of its speed and flexibility, the program provides the possibility to analyze hundreds of pre-defined gene partitions and taxon subsets in one single process run. BaCoCa is command-line driven and can be easily integrated into automatic process pipelines of phylogenomic studies. Moreover, given the tab-delimited output style the results can be easily used for further analyses in programs like Excel or statistical packages like R. A built-in option of BaCoCa is the generation of heat maps with hierarchical clustering of certain results using R. As input files BaCoCa can handle FASTA and relaxed PHYLIP, which are commonly used in phylogenomic pipelines. BaCoCa is implemented in Perl and works on Windows PCs, Macs and Linux operating systems. The executable source code as well as example test files and a detailed documentation of BaCoCa are freely available at http://software.zfmk.de.",2013-09-25 +24722556,Transcriptomic analysis of Petunia hybrida in response to salt stress using high throughput RNA sequencing.,"Salinity and drought stress are the primary cause of crop losses worldwide. In sodic saline soils sodium chloride (NaCl) disrupts normal plant growth and development. The complex interactions of plant systems with abiotic stress have made RNA sequencing a more holistic and appealing approach to study transcriptome level responses in a single cell and/or tissue. In this work, we determined the Petunia transcriptome response to NaCl stress by sequencing leaf samples and assembling 196 million Illumina reads with Trinity software. Using our reference transcriptome we identified more than 7,000 genes that were differentially expressed within 24 h of acute NaCl stress. The proposed transcriptome can also be used as an excellent tool for biological and bioinformatics in the absence of an available Petunia genome and it is available at the SOL Genomics Network (SGN) http://solgenomics.net. Genes related to regulation of reactive oxygen species, transport, and signal transductions as well as novel and undescribed transcripts were among those differentially expressed in response to salt stress. The candidate genes identified in this study can be applied as markers for breeding or to genetically engineer plants to enhance salt tolerance. Gene Ontology analyses indicated that most of the NaCl damage happened at 24 h inducing genotoxicity, affecting transport and organelles due to the high concentration of Na+ ions. Finally, we report a modification to the library preparation protocol whereby cDNA samples were bar-coded with non-HPLC purified primers, without affecting the quality and quantity of the RNA-seq data. The methodological improvement presented here could substantially reduce the cost of sample preparation for future high-throughput RNA sequencing experiments.",2014-04-10 +25068684,"A Eu(III) tetrakis(β-diketonate) dimeric complex: photophysical properties, structural elucidation by Sparkle/AM1 calculations, and doping into PMMA films and nanowires.","Reaction of Ln(III) with a tetrakis(diketone) ligand H4L [1,1'-(4,4'-(2,2-bis((4-(4,4,4-trifluoro-3-oxobutanoyl) phenoxy)methyl)propane-1,3-diyl)bis(oxy)bis(4,1-phenylene))bis(4,4,4-trifluorobutane-1,3-dione)] gives new podates which, according to mass spectral data and Sparkle/AM1 calculations, can be described as dimers, (NBu4[LnL])2 (Ln = Eu, Tb, Gd:Eu), in both solid-state and dimethylformamide (DMF) solution. The photophysical properties of the Eu(III) podate are compared with those of the mononuclear diketonate (NBu4[Eu(BTFA)4], BTFA = benzoyltrifluoroacetonate), the crystal structure of which is also reported. The new Eu(III) dimeric complex displays bright red luminescence upon irradiation at the ligand-centered band in the range of 250-400 nm, irrespective of the medium. The emission quantum yields and the luminescence lifetimes of (NBu4[EuL])2 (solid state: 51% ± 8% and 710 ± 2 μs; DMF: 31% ± 5% and 717 ± 1 μs) at room temperature are comparable to those obtained for NBu4[Eu(BTFA)4] (solid state: 60 ± 9% and 730 ± 5 μs; DMF: 30 ± 5% and 636 ± 1 μs). Sparkle/AM1 calculations were utilized for predicting the ground-state geometries of the Eu(III) dimer. Theoretical Judd-Ofelt and photoluminescence parameters, including quantum yields, predicted from this model are in good agreement with the experimental values, proving the efficiency of this theoretical approach implemented in the LUMPAC software (http://lumpac.pro.br). The kinetic scheme for modeling energy transfer processes show that the main donor state is the ligand triplet state and that energy transfer occurs on both the (5)D1 (44.2%) and (5)D0 (55.8%) levels. Furthermore, the newly obtained Eu(III) complex was doped into a PMMA matrix to form highly luminescent films and one-dimensional nanowires having emission quantum yield as high as 67%-69% (doping concentration = 4% by weight); these materials display bright red luminescence even under sunlight, so that interesting photonic applications can be foreseen.",2014-07-28 +24194827,PSI: a comprehensive and integrative approach for accurate plant subcellular localization prediction.,"Predicting the subcellular localization of proteins conquers the major drawbacks of high-throughput localization experiments that are costly and time-consuming. However, current subcellular localization predictors are limited in scope and accuracy. In particular, most predictors perform well on certain locations or with certain data sets while poorly on others. Here, we present PSI, a novel high accuracy web server for plant subcellular localization prediction. PSI derives the wisdom of multiple specialized predictors via a joint-approach of group decision making strategy and machine learning methods to give an integrated best result. The overall accuracy obtained (up to 93.4%) was higher than best individual (CELLO) by ~10.7%. The precision of each predicable subcellular location (more than 80%) far exceeds that of the individual predictors. It can also deal with multi-localization proteins. PSI is expected to be a powerful tool in protein location engineering as well as in plant sciences, while the strategy employed could be applied to other integrative problems. A user-friendly web server, PSI, has been developed for free access at http://bis.zju.edu.cn/psi/.",2013-10-23 +22559876,Quantitatively integrating molecular structure and bioactivity profile evidence into drug-target relationship analysis.,"

Background

Public resources of chemical compound are in a rapid growth both in quantity and the types of data-representation. To comprehensively understand the relationship between the intrinsic features of chemical compounds and protein targets is an essential task to evaluate potential protein-binding function for virtual drug screening. In previous studies, correlations were proposed between bioactivity profiles and target networks, especially when chemical structures were similar. With the lack of effective quantitative methods to uncover such correlation, it is demanding and necessary for us to integrate the information from multiple data sources to produce an comprehensive assessment of the similarity between small molecules, as well as quantitatively uncover the relationship between compounds and their targets by such integrated schema.

Results

In this study a multi-view based clustering algorithm was introduced to quantitatively integrate compound similarity from both bioactivity profiles and structural fingerprints. Firstly, a hierarchy clustering was performed with the fused similarity on 37 compounds curated from PubChem. Compared to clustering in a single view, the overall common target number within fused classes has been improved by using the integrated similarity, which indicated that the present multi-view based clustering is more efficient by successfully identifying clusters with its members sharing more number of common targets. Analysis in certain classes reveals that mutual complement of the two views for compound description helps to discover missing similar compound when only single view was applied. Then, a large-scale drug virtual screen was performed on 1267 compounds curated from Connectivity Map (CMap) dataset based on the fused similarity, which obtained a better ranking result compared to that of single-view. These comprehensive tests indicated that by combining different data representations; an improved assessment of target-specific compound similarity can be achieved.

Conclusions

Our study presented an efficient, extendable and quantitative computational model for integration of different compound representations, and expected to provide new clues to improve the virtual drug screening from various pharmacological properties. Scripts, supplementary materials and data used in this study are publicly available at http://lifecenter.sgst.cn/fusion/.",2012-05-04 +23396298,PIPEMicroDB: microsatellite database and primer generation tool for pigeonpea genome.,"Molecular markers play a significant role for crop improvement in desirable characteristics, such as high yield, resistance to disease and others that will benefit the crop in long term. Pigeonpea (Cajanus cajan L.) is the recently sequenced legume by global consortium led by ICRISAT (Hyderabad, India) and been analysed for gene prediction, synteny maps, markers, etc. We present PIgeonPEa Microsatellite DataBase (PIPEMicroDB) with an automated primer designing tool for pigeonpea genome, based on chromosome wise as well as location wise search of primers. Total of 123 387 Short Tandem Repeats (STRs) were extracted from pigeonpea genome, available in public domain using MIcroSAtellite tool (MISA). The database is an online relational database based on 'three-tier architecture' that catalogues information of microsatellites in MySQL and user-friendly interface is developed using PHP. Search for STRs may be customized by limiting their location on chromosome as well as number of markers in that range. This is a novel approach and is not been implemented in any of the existing marker database. This database has been further appended with Primer3 for primer designing of selected markers with left and right flankings of size up to 500 bp. This will enable researchers to select markers of choice at desired interval over the chromosome. Furthermore, one can use individual STRs of a targeted region over chromosome to narrow down location of gene of interest or linked Quantitative Trait Loci (QTLs). Although it is an in silico approach, markers' search based on characteristics and location of STRs is expected to be beneficial for researchers. Database URL: http://cabindb.iasri.res.in/pigeonpea/",2013-02-08 +23396301,CrossTope: a curate repository of 3D structures of immunogenic peptide: MHC complexes.,"The CrossTope is a highly curate repository of three-dimensional structures of peptide:major histocompatibility complex (MHC) class I complexes (pMHC-I). The complexes hosted by this databank were obtained in protein databases and by large-scale in silico construction of pMHC-I structures, using a new approach developed by our group. At this moment, the database contains 182 'non-redundant' pMHC-I complexes from two human and two murine alleles. A web server provides interface for database query. The user can download (i) structure coordinate files and (ii) topological and charges distribution maps images from the T-cell receptor-interacting surface of pMHC-I complexes. The retrieved structures and maps can be used to cluster similar epitopes in cross-reactivity approaches, to analyse viral escape mutations in a structural level or even to improve the immunogenicity of tumour antigens. Database URL: http://www.crosstope.com.br.",2013-02-08 +23874353,MicroRNA discovery by similarity search to a database of RNA-seq profiles.,"In silico generated search for microRNAs (miRNAs) has been driven by methods compiling structural features of the miRNA precursor hairpin, as well as to some degree combining this with the analysis of RNA-seq profiles for which the miRNA typically leave the drosha/dicer fingerprint of 1-2 ~22 nt blocks of reads corresponding to the mature and star miRNA. In complement to the previous methods, we present a study where we systematically exploit these patterns of read profiles. We created two datasets comprised of 2540 and 4795 read profiles obtained after preprocessing short RNA-seq data from miRBase and ENCODE, respectively. Out of 4795 ENCODE read profiles, 1361 are annotated as non-coding RNAs (ncRNAs) and of which 285 are further annotated as miRNAs. Using deepBlockAlign (dba), we align ncRNA read profiles from ENCODE against the miRBase read profiles (cleaned for ""self-matches"") and are able to separate ENCODE miRNAs from the other ncRNAs by a Matthews Correlation Coefficient (MCC) of 0.8 and obtain an area under the curve of 0.93. Based on the dba score cut-off of 0.7 at which we observed the maximum MCC of 0.8, we predict 523 novel miRNA candidates. An additional RNA secondary structure analysis reveal that 42 of the candidates overlap with predicted conserved secondary structure. Further analysis reveal that the 523 miRNA candidates are located in genomic regions with MAF block (UCSC) fragmentation and poor sequence conservation, which in part might explain why they have been overlooked in previous efforts. We further analyzed known human and mouse miRNA read profiles and found two distinct classes; the first containing two blocks and the second containing >2 blocks of reads. Also the latter class holds read profiles that have less well defined arrangement of reads in comparison to the former class. On comparison of miRNA read profiles from plants and animals, we observed kingdom specific read profiles that are distinct in terms of both length and distribution of reads within the read profiles to each other. All the data, as well as a server to search miRBase read profiles by uploading a BED file, is available at http://rth.dk/resources/mirdba.",2013-07-11 +23307004,"TG13 current terminology, etiology, and epidemiology of acute cholangitis and cholecystitis.","While referring to the evidence adopted in the Tokyo Guidelines 2007 (TG07) as well as subsequently obtained evidence, further discussion took place on terminology, etiology, and epidemiological data. In particular, new findings have accumulated on the occurrence of symptoms in patients with gallstones, frequency of severe cholecystitis and cholangitis, onset of cholecystitis and cholangitis after endoscopic retrograde cholangiopancreatography and medications, mortality rate, and recurrence rate. The primary etiology of acute cholangitis/cholecystitis is the presence of stones. Next to stones, the most significant etiology of acute cholangitis is benign/malignant stenosis of the biliary tract. On the other hand, there is another type of acute cholecystitis, acute acalculous cholecystitis, in which stones are not involved as causative factors. Risk factors for acute acalculous cholecystitis include surgery, trauma, burn, and parenteral nutrition. After 2000, the mortality rate of acute cholangitis has been about 10 %, while that of acute cholecystitis has generally been less than 1 %. After the publication of TG07, diagnostic criteria and severity assessment criteria were standardized, and the distribution of cases according to severity and comparison of clinical data among target populations have become more subjective. The concept of healthcare-associated infections is important in the current treatment of infection. The treatment of acute cholangitis and cholecystitis substantially differs from that of community-acquired infections. Cholangitis and cholecystitis as healthcare-associated infections are clearly described in the updated Tokyo Guidelines (TG13). Free full-text articles and a mobile application of TG13 are available via http://www.jshbps.jp/en/guideline/tg13.html.",2013-01-01 +24064419,An alignment-free test for recombination.,"

Motivation

Why recombination? is one of the central questions in biology. This has led to a host of methods for quantifying recombination from sequence data. These methods are usually based on aligned DNA sequences. Here, we propose an efficient alignment-free alternative.

Results

Our method is based on the distribution of match lengths, which we look up using enhanced suffix arrays. By eliminating the alignment step, the test becomes fast enough for application to whole bacterial genomes. Using simulations we show that our test has similar power as established tests when applied to long pairs of sequences. When applied to 58 genomes of Escherichia coli, we pick up the strongest recombination signal from a 125 kb horizontal gene transfer engineered 20 years ago.

Availability and implementation

We have implemented our method in the command-line program rush. Its C sources and documentation are available under the GNU General Public License from http://guanine.evolbio.mpg.de/rush/.",2013-09-23 +22238272,A scalable and portable framework for massively parallel variable selection in genetic association studies.,"

Unlabelled

The deluge of data emerging from high-throughput sequencing technologies poses large analytical challenges when testing for association to disease. We introduce a scalable framework for variable selection, implemented in C++ and OpenCL, that fits regularized regression across multiple Graphics Processing Units. Open source code and documentation can be found at a Google Code repository under the URL http://bioinformatics.oxfordjournals.org/content/early/2012/01/10/bioinformatics.bts015.abstract.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-11 +24885235,Women participating in a web-based preconception study have a high prevalence of risk factors for adverse pregnancy outcomes.,"

Background

Adverse pregnancy outcomes (APOs) can be increased by preconception risk factors and lifestyles.We measured the prevalence of preconception risk factors for APOs in a population of Italian women of childbearing age enrolled in a web-based study.

Methods

Participants were enrolled through a web platform (http://www.mammainforma.it). After enrollment, participants filled in a questionnaire regarding socio-demographic characteristics, clinical data and preconception risk factors for adverse pregnancy outcomes. Through logistic regression, we explored how the prevalence of risk factors was affected by age, education level, employment, parity, physician's recommendation and knowledge of the specific risk factor.

Results

We enrolled a total of 728 women. Sixty-two percent had a University degree, 84% were employed and 77% were planning their first pregnancy.Nearly 70% drank alcohol in any quantity; 16% were smokers; 6% was underweight; 21.4% was overweight; 51.6% did not assume folic acid; 22% was susceptible to rubella, 44.5% to hepatitis b and 13.2% to varicella.According to the multivariate analysis, compared to women who already had at least one pregnancy, nulliparous women had a higher BMI [OR 1.60 (CI 1.02;2.48)] and were less likely to be susceptible to rubella [OR 0.33 (CI 0.20;0.58)] and to be consuming alcohol [OR 0.47 (CI 0.31;0.70)] or cigarettes [OR 0.48 (CI 0.26;0.90)].Appropriate knowledge was associated with a correct behavior regarding smoking, drinking alcohol and folic acid supplementation.

Conclusions

This study shows that the prevalence of risk factors for APOs in our population is high.Interventions aimed at reducing risk factors for APOs are needed and, to this purpose, a web intervention may represent a feasible tool to integrate tailored information and to inform preconception counseling targeting a specific group of women planning a pregnancy who are engaged on the web.",2014-05-17 +23681125,MEME-LaB: motif analysis in clusters.,"

Summary

Genome-wide expression analysis can result in large numbers of clusters of co-expressed genes. Although there are tools for ab initio discovery of transcription factor-binding sites, most do not provide a quick and easy way to study large numbers of clusters. To address this, we introduce a web tool called MEME-LaB. The tool wraps MEME (an ab initio motif finder), providing an interface for users to input multiple gene clusters, retrieve promoter sequences, run motif finding and then easily browse and condense the results, facilitating better interpretation of the results from large-scale datasets.

Availability

MEME-LaB is freely accessible at: http://wsbc.warwick.ac.uk/wsbcToolsWebpage/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-16 +22592379,OSA: a fast and accurate alignment tool for RNA-Seq.,"

Unlabelled

Accurately mapping RNA-Seq reads to the reference genome is a critical step for performing downstream analysis such as transcript assembly, isoform detection and quantification. Many tools have been developed; however, given the huge size of the next generation sequencing datasets and the complexity of the transcriptome, RNA-Seq read mapping remains a challenge with the ever-increasing amount of data. We develop Omicsoft sequence aligner (OSA), a fast and accurate alignment tool for RNA-Seq data. Benchmarked with existing methods, OSA improves mapping speed 4-10-fold with better sensitivity and less false positives.

Availability

OSA can be downloaded from http://omicsoft.com/osa. It is free to academic users. OSA has been tested extensively on Linux, Mac OS X and Windows platforms.",2012-05-15 +23389242,Assignment of Y-chromosomal SNPs found in Japanese population to Y-chromosomal haplogroup tree.,"The relationship between Y-chromosome single-nucleotide polymorphisms (SNPs) registered in the Japanese SNP (JSNP) database (http://snp.ims.u-tokyo.ac.jp) and Y-binary haplogroup lineages was investigated to identify new Y-chromosomal binary haplogroup markers and further refine Y-chromosomal haplogroup classification in the Japanese population. We used SNPs for which it was possible to construct primers to make Y-specific PCR product sizes small enough to obtain amplification products even from degraded DNA, as this would allow their use not only in genetic but also in archeological and forensic studies. The genotype of 35 JSNP markers were determined, of which 14 were assigned to appropriate positions on the Y-chromosomal haplogroup tree, together with 5 additional new non-JSNP markers. These markers defined 14 new branches (C3/64562+13, C3/2613-27, D2a1b/006841*, D2a1b/119166-11A, D2a/022456*, D2a/119166-11A, D2a/119167rec/119167-40rec*, D2a/75888-GC, O3a3c/075888-9T/10T*, O3a3c/075888-9T/9T, O3a3/8425+6, O3a3/119166-13A*, O3a3/008002 and O3a4/037852) and 21 new internal markers on the 2008 Y-chromosome haplogroup tree. These results will provide useful information for Y-chromosomal polymorphic studies of East Asian populations, particularly those in and around Japan, in the fields of anthropology, genetics and forensics.",2013-02-07 +21330291,libfbi: a C++ implementation for fast box intersection and application to sparse mass spectrometry data.,"

Motivation

Algorithms for sparse data require fast search and subset selection capabilities for the determination of point neighborhoods. A natural data representation for such cases are space partitioning data structures. However, the associated range queries assume noise-free observations and cannot take into account observation-specific uncertainty estimates that are present in e.g. modern mass spectrometry data. In order to accommodate the inhomogeneous noise characteristics of sparse real-world datasets, point queries need to be reformulated in terms of box intersection queries, where box sizes correspond to uncertainty regions for each observation.

Results

This contribution introduces libfbi, a standard C++, header-only template implementation for fast box intersection in an arbitrary number of dimensions, with arbitrary data types in each dimension. The implementation is applied to a data aggregation task on state-of-the-art liquid chromatography/mass spectrometry data, where it shows excellent run time properties.

Availability

The library is available under an MIT license and can be downloaded from http://software.steenlab.org/libfbi.

Contact

marc.kirchner@childrens.harvard.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-16 +22681780,Patho-Genes.org: a website dedicated to gene sequences of potential bioterror bacteria and PCR primers used to amplify them.,"Pathogenic agents can be very hard to detect, and usually they do not cause illness for several hours or days. To improve the speed and the accuracy of detection tests and satisfy the needs of early diagnosis, molecular biology methods such as PCR are now used. However, selecting a proper target gene and designing good primers is often not easy. We present a dedicated website, http://patho-genes.org, where we provide every sequence, functional annotation, published primer and relevant article for every annotated gene of major pathogenic bacterial species listed as key agents to be used for a bioterrorism attack. Each published primer was analysed to determine its melting temperature, its specificity and its coverage (i.e. its sensitivity against every allele of its target gene). Data generated have been organized in the form of data sheet for each gene, which are available through multiple browser panels and query systems.",2012-06-10 +24565437,Functional approach to high-throughput plant growth analysis.,"

Method

Taking advantage of the current rapid development in imaging systems and computer vision algorithms, we present HPGA, a high-throughput phenotyping platform for plant growth modeling and functional analysis, which produces better understanding of energy distribution in regards of the balance between growth and defense. HPGA has two components, PAE (Plant Area Estimation) and GMA (Growth Modeling and Analysis). In PAE, by taking the complex leaf overlap problem into consideration, the area of every plant is measured from top-view images in four steps. Given the abundant measurements obtained with PAE, in the second module GMA, a nonlinear growth model is applied to generate growth curves, followed by functional data analysis.

Results

Experimental results on model plant Arabidopsis thaliana show that, compared to an existing approach, HPGA reduces the error rate of measuring plant area by half. The application of HPGA on the cfq mutant plants under fluctuating light reveals the correlation between low photosynthetic rates and small plant area (compared to wild type), which raises a hypothesis that knocking out cfq changes the sensitivity of the energy distribution under fluctuating light conditions to repress leaf growth.

Availability

HPGA is available at http://www.msu.edu/~jinchen/HPGA.",2013-12-13 +23395470,Incidence of nonunion after isolated arthroscopic ankle arthrodesis.,"

Purpose

To determine the incidence of nonunion after isolated arthroscopic ankle arthrodesis.

Methods

Electronic databases and relevant peer-reviewed sources, including OvidSP/Medline (http://ovidsp.tx.ovid.com) and Google, were systematically searched for the terms ""arthroscopic ankle arthrodesis"" AND ""nonunion"". Additionally, we manually searched common American, British, and European orthopaedic and podiatric scientific literature for relevant articles. Studies were eligible for inclusion only if they included the following: isolated ankle arthrodesis, greater than 20 ankles, minimum mean follow-up of 12-months, a 2-portal anterior arthroscopic approach, fixation with 2 or 3 large-diameter cannulated cancellous screws, and the nonunion rate with no restriction on cause.

Results

After considering all the potentially eligible articles, 7 (25.9%) met the inclusion criteria. A total of 244 patients (244 ankles)-148 (60.7%) male and 96 (39.3%) female patients, with a weighted mean age of 49.2 years-were included. For those studies that specified the exact follow-up, the weighted mean was 24.1 months. A total of 21 nonunions (8.6%) were reported, with 14 (66.7%) being symptomatic and requiring further intervention.

Conclusions

The results of this systematic review reveal an acceptable incidence of nonunion of 8.6%. However it is important to recognize that of these nonunions, 66.7% were symptomatic. This supports the belief that regardless of approach, nonunion of an ankle arthrodesis is problematic. In light of this finding, additional prospective studies are warranted to compare directly the incidence of nonunion between open, minimum incision, and arthroscopic approaches with a variety of fixation constructs.

Level of evidence

Level IV, systematic review of level IV studies.",2013-02-06 +23405067,Benchmarking human protein complexes to investigate drug-related systems and evaluate predicted protein complexes.,"Protein complexes are key entities to perform cellular functions. Human diseases are also revealed to associate with some specific human protein complexes. In fact, human protein complexes are widely used for protein function annotation, inference of human protein interactome, disease gene prediction, and so on. Therefore, it is highly desired to build an up-to-date catalogue of human complexes to support the research in these applications. Protein complexes from different databases are as expected to be highly redundant. In this paper, we designed a set of concise operations to compile these redundant human complexes and built a comprehensive catalogue called CHPC2012 (Catalogue of Human Protein Complexes). CHPC2012 achieves a higher coverage for proteins and protein complexes than those individual databases. It is also verified to be a set of complexes with high quality as its co-complex protein associations have a high overlap with protein-protein interactions (PPI) in various existing PPI databases. We demonstrated two distinct applications of CHPC2012, that is, investigating the relationship between protein complexes and drug-related systems and evaluating the quality of predicted protein complexes. In particular, CHPC2012 provides more insights into drug development. For instance, proteins involved in multiple complexes (the overlapping proteins) are potential drug targets; the drug-complex network is utilized to investigate multi-target drugs and drug-drug interactions; and the disease-specific complex-drug networks will provide new clues for drug repositioning. With this up-to-date reference set of human protein complexes, we believe that the CHPC2012 catalogue is able to enhance the studies for protein interactions, protein functions, human diseases, drugs, and related fields of research. CHPC2012 complexes can be downloaded from http://www1.i2r.a-star.edu.sg/xlli/CHPC2012/CHPC2012.htm.",2013-02-06 +23080116,NARROMI: a noise and redundancy reduction technique improves accuracy of gene regulatory network inference.,"

Motivation

Reconstruction of gene regulatory networks (GRNs) is of utmost interest to biologists and is vital for understanding the complex regulatory mechanisms within the cell. Despite various methods developed for reconstruction of GRNs from gene expression profiles, they are notorious for high false positive rate owing to the noise inherited in the data, especially for the dataset with a large number of genes but a small number of samples.

Results

In this work, we present a novel method, namely NARROMI, to improve the accuracy of GRN inference by combining ordinary differential equation-based recursive optimization (RO) and information theory-based mutual information (MI). In the proposed algorithm, the noisy regulations with low pairwise correlations are first removed by using MI, and the redundant regulations from indirect regulators are further excluded by RO to improve the accuracy of inferred GRNs. In particular, the RO step can help to determine regulatory directions without prior knowledge of regulators. The results on benchmark datasets from Dialogue for Reverse Engineering Assessments and Methods challenge and experimentally determined GRN of Escherichia coli show that NARROMI significantly outperforms other popular methods in terms of false positive rates and accuracy.

Availability

All the source data and code are available at: http://csb.shu.edu.cn/narromi.htm.",2012-10-18 +23384165,Bioinformatic analysis of protein families for identification of variable amino acid residues responsible for functional diversity.,"Proteins within a single family usually share a common function but differ in more specific features and can be divided into subfamilies with different properties. Availability of genomic, structural, and functional information implemented into numerous databases provides new opportunities for bioinformatic analysis of homologous proteins. In this work, new method of bioinformatic analysis has been developed to identify subfamily-specific positions (SSPs)--conserved only within protein subfamilies, but different between subfamilies--that seem to play important role in functional diversity. A novel scoring function is suggested to consider structural information as well as physicochemical and residue conservation in protein subfamilies. Random shuffling is performed to rank results by significance, and Bernoulli statistics is applied to calculate p-values. Algorithm does not require predefined subfamily classification and can propose it automatically by graph-based clustering. This method can be used as a tool to explore SSPs with different structural localization in order to understand their implication to structure-function relationship and protein function. Web interface to the program is available at http://biokinet.belozersky.msu.ru/zebra.",2013-02-05 +23384242,The roles of the monomer length and nucleotide context of plant tandem repeats in nucleosome positioning.,"Similar to regularly spaced nucleosomes in chromatin, long tandem DNA arrays are composed of regularly alternating monomers that have almost identical primary DNA structures. Such a similarity in the structural organization makes these arrays especially interesting for studying the role of intrinsic DNA preferences in nucleosome positioning. We have studied the nucleosome formation potential of DNA tandem repeat families with different monomer lengths (ML). In total, 165 plant tandem repeat families from the PlantSat database (http://w3lamc.umbr.cas.cz/PlantSat/) were divided into two classes based on the number of nucleosome repeats in one DNA monomer. For predicting nucleosome formation potential, we developed the Phase method, which combines the advantages of multiple bioinformatics models. The Phase method was able to distinguish interfamily differences and intrafamily monomer variation and identify the influence of nucleotide context on nucleosome formation potential. Three main types of nucleosome arrangement in DNA tandem repeat arrays--regular, partially regular (partial), and flexible--were distinguished among a great variety of Phase profiles. The regular type, in which all nucleosomes of the monomer array are positioned in a context-dependent manner, is the most representative type of the class 1 families, with ML equal to or a multiple of the nucleosome repeat length (NRL). In the partially regular type, nucleotide context influences the positioning of only a subset of nucleosomes. The influence of the nucleotide context on nucleosome positioning has the least effect in the flexible type, which contains the greatest number of families (65). The majority of these families belong to class 2 and have nonmultiple ML to NRL ratios.",2013-02-05 +23387433,"M-pick, a modularity-based method for OTU picking of 16S rRNA sequences.","

Background

Binning 16S rRNA sequences into operational taxonomic units (OTUs) is an initial crucial step in analyzing large sequence datasets generated to determine microbial community compositions in various environments including that of the human gut. Various methods have been developed, but most suffer from either inaccuracies or from being unable to handle millions of sequences generated in current studies. Furthermore, existing binning methods usually require a priori decisions regarding binning parameters such as a distance level for defining an OTU.

Results

We present a novel modularity-based approach (M-pick) to address the aforementioned problems. The new method utilizes ideas from community detection in graphs, where sequences are viewed as vertices on a weighted graph, each pair of sequences is connected by an imaginary edge, and the similarity of a pair of sequences represents the weight of the edge. M-pick first generates a graph based on pairwise sequence distances and then applies a modularity-based community detection technique on the graph to generate OTUs to capture the community structures in sequence data. To compare the performance of M-pick with that of existing methods, specifically CROP and ESPRIT-Tree, sequence data from different hypervariable regions of 16S rRNA were used and binning results were compared.

Conclusions

A new modularity-based clustering method for OTU picking of 16S rRNA sequences is developed in this study. The algorithm does not require a predetermined cut-off level, and our simulation studies suggest that it is superior to existing methods that require specified distance levels to define OTUs. The source code is available at http://plaza.ufl.edu/xywang/Mpick.htm.",2013-02-07 +23782611,GAT: a simulation framework for testing the association of genomic intervals.,"

Motivation

A common question in genomic analysis is whether two sets of genomic intervals overlap significantly. This question arises, for example, when interpreting ChIP-Seq or RNA-Seq data in functional terms. Because genome organization is complex, answering this question is non-trivial.

Summary

We present Genomic Association Test (GAT), a tool for estimating the significance of overlap between multiple sets of genomic intervals. GAT implements a null model that the two sets of intervals are placed independently of one another, but allows each set's density to depend on external variables, for example, isochore structure or chromosome identity. GAT estimates statistical significance based on simulation and controls for multiple tests using the false discovery rate.

Availability

GAT's source code, documentation and tutorials are available at http://code.google.com/p/genomic-association-tester.",2013-06-18 +24561587,Predicting the concentration of verotoxin-producing Escherichia coli bacteria during processing and storage of fermented raw-meat sausages.,"A model to predict the population density of verotoxigenic Escherichia coli (VTEC) throughout the elaboration and storage of fermented raw-meat sausages (FRMS) was developed. Probabilistic and kinetic measurement data sets collected from publicly available resources were completed with new measurements when required and used to quantify the dependence of VTEC growth and inactivation on the temperature, pH, water activity (aw), and concentration of lactic acid. Predictions were compared with observations in VTEC-contaminated FRMS manufactured in a pilot plant. Slight differences in the reduction of VTEC were predicted according to the fermentation temperature, 24 or 34°C, with greater inactivation at the highest temperature. The greatest reduction was observed during storage at high temperatures. A population decrease greater than 6 decimal logarithmic units was observed after 66 days of storage at 25°C, while a reduction of only ca. 1 logarithmic unit was detected at 12°C. The performance of our model and other modeling approaches was evaluated throughout the processing of dry and semidry FRMS. The greatest inactivation of VTEC was predicted in dry FRMS with long drying periods, while the smallest reduction was predicted in semidry FMRS with short drying periods. The model is implemented in a computing tool, E. coli SafeFerment (EcSF), freely available from http://www.ifr.ac.uk/safety/EcoliSafeFerment. EcSF integrates growth, probability of growth, and thermal and nonthermal inactivation models to predict the VTEC concentration throughout FRMS manufacturing and storage under constant or fluctuating environmental conditions.",2014-02-21 +24948027,"Inter-observer variability between general pathologists and a specialist in breast pathology in the diagnosis of lobular neoplasia, columnar cell lesions, atypical ductal hyperplasia and ductal carcinoma in situ of the breast.","

Background

This study aimed to assess inter-observer variability between the original diagnostic reports and later review by a specialist in breast pathology considering lobular neoplasias (LN), columnar cell lesions (CCL), atypical ductal hyperplasia (ADH), and ductal carcinoma in situ (DCIS) of the breast.

Methods

A retrospective, observational, cross-sectional study was conducted. A total of 610 breast specimens that had been formally sent for consultation and/or second opinions to the Breast Pathology Laboratory of Federal University of Minas Gerais were analysed between January 2005 and December 2010. The inter-observer variability between the original report and later review was compared regarding the diagnoses of LN, CCL, ADH, and DCIS. Statistical analyses were conducted using the Kappa index.

Results

Weak correlations were observed for the diagnoses of columnar cell change (CCC; Kappa=0.38), columnar cell hyperplasia (CCH; Kappa=0.32), while a moderate agreement (Kappa=0.47) was observed for the diagnoses of flat epithelial atypia (FEA). Good agreement was observed in the diagnoses of atypical lobular hyperplasia (ALH; Kappa=0.62) and lobular carcinoma in situ (LCIS; Kappa=0.66). However, poor agreement was observed for the diagnoses of pleomorphic LCIS (Kappa=0.22). Moderate agreement was observed for the diagnoses of ADH (Kappa=0.44), low-grade DCIS (Kappa=0.47), intermediate-grade DCIS (Kappa=0.45), and DCIS with microinvasion (Kappa=0.56). Good agreement was observed between the diagnoses of high-grade DCIS (Kappa=0.68).

Conclusions

According to our data, the best diagnostic agreements were observed for high-grade DCIS, ALH, and LCIS. CCL without atypia and pleomorphic LCIS had the worst agreement indices.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1640072350119725.",2014-06-19 +22171329,Transformations for the compression of FASTQ quality scores of next-generation sequencing data.,"

Motivation

The growth of next-generation sequencing means that more effective and efficient archiving methods are needed to store the generated data for public dissemination and in anticipation of more mature analytical methods later. This article examines methods for compressing the quality score component of the data to partly address this problem.

Results

We compare several compression policies for quality scores, in terms of both compression effectiveness and overall efficiency. The policies employ lossy and lossless transformations with one of several coding schemes. Experiments show that both lossy and lossless transformations are useful, and that simple coding methods, which consume less computing resources, are highly competitive, especially when random access to reads is needed.

Availability and implementation

Our C++ implementation, released under the Lesser General Public License, is available for download at http://www.cb.k.u-tokyo.ac.jp/asailab/members/rwan.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-13 +22962493,PARADIGM-SHIFT predicts the function of mutations in multiple cancers using pathway impact analysis.,"

Motivation

A current challenge in understanding cancer processes is to pinpoint which mutations influence the onset and progression of disease. Toward this goal, we describe a method called PARADIGM-SHIFT that can predict whether a mutational event is neutral, gain-or loss-of-function in a tumor sample. The method uses a belief-propagation algorithm to infer gene activity from gene expression and copy number data in the context of a set of pathway interactions.

Results

The method was found to be both sensitive and specific on a set of positive and negative controls for multiple cancers for which pathway information was available. Application to the Cancer Genome Atlas glioblastoma, ovarian and lung squamous cancer datasets revealed several novel mutations with predicted high impact including several genes mutated at low frequency suggesting the approach will be complementary to current approaches that rely on the prevalence of events to reach statistical significance.

Availability

All source code is available at the github repository http:github.org/paradigmshift.

Contact

jstuart@soe.ucsc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-01 +23388151,A methodical microarray design enables surveying of expression of a broader range of genes in Ciona intestinalis.,"We provide a new oligo-microarray for Ciona intestinalis, based on the NimbleGen 12-plex×135k format. The array represents 106,285 probes, which is more than double the probe number of the currently available 44k microarray. These probes cover 99.2% of the transcripts in the KyotoHoya (KH) models, published in 2008, and they contain 81.1% of the entries in the UniGene database that are not included in the KH models. In this paper, we show that gene expression levels measured by this new 135k microarray are highly correlated with those obtained by the existing 44k microarray for genes common to both arrays. We also investigated gene expression using samples obtained from the ovary and the neural complex of adult C. intestinalis, showing that the expression of tissue-specific genes is consistent with previous reports. Approximately half of the highly expressed genes identified in the 135k microarray are not included in the previous microarray. The high coverage of gene models by this microarray made it possible to identify splicing variants for a given transcript. The 135k microarray is useful in investigating the functions of genes that are not yet well characterized. Detailed information about this 135k microarray is accessible at no charge from supplemental materials, NCBI Gene Expression Omnibus (GEO), and http://marinegenomics.oist.jp.",2013-02-04 +23379937,Colloid or crystalloid solution on maternal and neonatal hemodynamics for cesarean section: a meta-analysis of randomized controlled trials.,"

Aim

To compare the effect of colloid and crystalloid on maternal and neonatal hemodynamics in cesarean section.

Material and methods

We searched MEDLINE (PubMed, 1966-2011), EMBASE (1974-2011), http://www.clinicaltrials.gov, the Cochrane Controlled Clinical Trials Register Database, Biosis Preview, and the Chinese Biomedical Database (1980-2011). Randomized controlled trials involving healthy term patients undergoing scheduled cesarean delivery that compared the effect of colloid and crystalloid on hypotension, need for vasopressors, cardiac output, neonatal outcomes, and other adverse effects were analyzed.

Results

Ten trials of 853 patients were eligible for analysis. When colloid was used, significantly fewer hypotensive events occurred (odds ratio [OR] 3.21, 95% CI 2.15-4.53, number needed to treat = 4), less demand for vasopressors (standard mean difference [SMD] 0.77, 95% CI 0.34-1.21) and improved cardiac output (SMD -1.08, 95% CI -2.00 - -0.17). In subgroup analysis, the use of colloid reduced hypotensive events and adverse effects in Asian patients.

Conclusion

Colloid hydration should be considered first, especially in Asian patients, focusing on dosage and type of fluids. Preventive or therapeutic vasopressors may be required in a significant proportion of patients.",2013-02-04 +24828116,Supervised learning classification models for prediction of plant virus encoded RNA silencing suppressors.,"Viral encoded RNA silencing suppressor proteins interfere with the host RNA silencing machinery, facilitating viral infection by evading host immunity. In plant hosts, the viral proteins have several basic science implications and biotechnology applications. However in silico identification of these proteins is limited by their high sequence diversity. In this study we developed supervised learning based classification models for plant viral RNA silencing suppressor proteins in plant viruses. We developed four classifiers based on supervised learning algorithms: J48, Random Forest, LibSVM and Naïve Bayes algorithms, with enriched model learning by correlation based feature selection. Structural and physicochemical features calculated for experimentally verified primary protein sequences were used to train the classifiers. The training features include amino acid composition; auto correlation coefficients; composition, transition, and distribution of various physicochemical properties; and pseudo amino acid composition. Performance analysis of predictive models based on 10 fold cross-validation and independent data testing revealed that the Random Forest based model was the best and achieved 86.11% overall accuracy and 86.22% balanced accuracy with a remarkably high area under the Receivers Operating Characteristic curve of 0.95 to predict viral RNA silencing suppressor proteins. The prediction models for plant viral RNA silencing suppressors can potentially aid identification of novel viral RNA silencing suppressors, which will provide valuable insights into the mechanism of RNA silencing and could be further explored as potential targets for designing novel antiviral therapeutics. Also, the key subset of identified optimal features may help in determining compositional patterns in the viral proteins which are important determinants for RNA silencing suppressor activities. The best prediction model developed in the study is available as a freely accessible web server pVsupPred at http://bioinfo.icgeb.res.in/pvsup/.",2014-05-14 +20714018,Greedy learning of binary latent trees.,"Inferring latent structures from observations helps to model and possibly also understand underlying data generating processes. A rich class of latent structures is the latent trees, i.e., tree-structured distributions involving latent variables where the visible variables are leaves. These are also called hierarchical latent class (HLC) models. Zhang and Kocka proposed a search algorithm for learning such models in the spirit of Bayesian network structure learning. While such an approach can find good solutions, it can be computationally expensive. As an alternative, we investigate two greedy procedures: the BIN-G algorithm determines both the structure of the tree and the cardinality of the latent variables in a bottom-up fashion. The BIN-A algorithm first determines the tree structure using agglomerative hierarchical clustering, and then determines the cardinality of the latent variables as for BIN-G. We show that even with restricting ourselves to binary trees, we obtain HLC models of comparable quality to Zhang's solutions (in terms of cross-validated log-likelihood), while being generally faster to compute. This claim is validated by a comprehensive comparison on several data sets. Furthermore, we demonstrate that our methods are able to estimate interpretable latent structures on real-world data with a large number of variables. By applying our method to a restricted version of the 20 newsgroups data, these models turn out to be related to topic models, and on data from the PASCAL Visual Object Classes (VOC) 2007 challenge, we show how such treestructured models help us understand how objects co-occur in images. For reproducibility of all experiments in this paper, all code and data sets (or links to data) are available at http://people.kyb.tuebingen.mpg.de/harmeling/code/ltt-1.4.tar.",2011-06-01 +23622142,Global remodeling of nucleosome positions in C. elegans.,"

Background

Eukaryotic chromatin architecture is affected by intrinsic histone-DNA sequence preferences, steric exclusion between nucleosome particles, formation of higher-order structures, and in vivo activity of chromatin remodeling enzymes.

Results

To disentangle sequence-dependent nucleosome positioning from the other factors, we have created two high-throughput maps of nucleosomes assembled in vitro on genomic DNA from the nematode worm Caenorhabditis elegans. A comparison of in vitro nucleosome positions with those observed in a mixed-stage, mixed-tissue population of C. elegans cells reveals that in vivo sequence preferences are modified on the genomic scale. Indeed, G/C dinucleotides are predicted to be most favorable for nucleosome formation in vitro but not in vivo. Nucleosome sequence read coverage in vivo is distinctly lower in chromosome arms than in central regions; the observed changes in apparent nucleosome sequence specificity, likely due to genome-wide chromatin remodeler activity, contribute to the formation of these megabase-scale chromatin domains. We also observe that the majority of well-positioned in vivo nucleosomes do not occupy thermodynamically favorable sequences observed in vitro. Finally, we find that exons are intrinsically more amenable to nucleosome formation compared to introns. Nucleosome occupancy of introns and exons consistently increases with G/C content in vitro but not in vivo, in agreement with our observation that G/C dinucleotide enrichment does not strongly promote in vivo nucleosome formation.

Conclusions

Our findings highlight the importance of both sequence specificity and active nucleosome repositioning in creating large-scale chromatin domains, and the antagonistic roles of intrinsic sequence preferences and chromatin remodelers in C. elegans.Sequence read data has been deposited into Sequence Read Archive (http://www.ncbi.nlm.nih.gov/sra; accession number SRA050182). Additional data, software and computational predictions are available on the Nucleosome Explorer website (http://nucleosome.rutgers.edu).",2013-04-26 +24978117,"Reliability, validity and sensitivity of a computerized visual analog scale measuring state anxiety.","

Background and objectives

Assessment of state anxiety is frequently required in clinical and research settings, but its measurement using standard multi-item inventories entails practical challenges. Such inventories are increasingly complemented by paper-and-pencil, single-item visual analog scales measuring state anxiety (VAS-A), which allow rapid assessment of current anxiety states. Computerized versions of VAS-A offer additional advantages, including facilitated and accurate data collection and analysis, and applicability to computer-based protocols. Here, we establish the psychometric properties of a computerized VAS-A.

Methods

Experiment 1 assessed the reliability, convergent validity, and discriminant validity of the computerized VAS-A in a non-selected sample. Experiment 2 assessed its sensitivity to increase in state anxiety following social stress induction, in participants with high levels of social anxiety.

Results

Experiment 1 demonstrated the computerized VAS-A's test-retest reliability (r = .44, p < .001); convergent validity with the State-Trait Anxiety Inventory's state subscale (STAI-State; r = .60, p < .001); and discriminant validity as indicated by significantly lower correlations between VAS-A and different psychological measures relative to the correlation between VAS-A and STAI-State. Experiment 2 demonstrated the VAS-A's sensitivity to changes in state anxiety via a significant pre- to during-stressor rise in VAS-A scores (F(1,48) = 25.13, p < .001).

Limitations

Set-order administration of measures, absence of clinically-anxious population, and gender-unbalanced samples.

Conclusions

The adequate psychometric characteristics, combined with simple and rapid administration, make the computerized VAS-A a valuable self-rating tool for state anxiety. It may prove particularly useful for clinical and research settings where multi-item inventories are less applicable, including computer-based treatment and assessment protocols. The VAS-A is freely available: http://people.socsci.tau.ac.il/mu/anxietytrauma/visual-analog-scale/.",2014-06-18 +23137144,Isotope pattern deconvolution for peptide mass spectrometry by non-negative least squares/least absolute deviation template matching.,"

Background

The robust identification of isotope patterns originating from peptides being analyzed through mass spectrometry (MS) is often significantly hampered by noise artifacts and the interference of overlapping patterns arising e.g. from post-translational modifications. As the classification of the recorded data points into either 'noise' or 'signal' lies at the very root of essentially every proteomic application, the quality of the automated processing of mass spectra can significantly influence the way the data might be interpreted within a given biological context.

Results

We propose non-negative least squares/non-negative least absolute deviation regression to fit a raw spectrum by templates imitating isotope patterns. In a carefully designed validation scheme, we show that the method exhibits excellent performance in pattern picking. It is demonstrated that the method is able to disentangle complicated overlaps of patterns.

Conclusions

We find that regularization is not necessary to prevent overfitting and that thresholding is an effective and user-friendly way to perform feature selection. The proposed method avoids problems inherent in regularization-based approaches, comes with a set of well-interpretable parameters whose default configuration is shown to generalize well without the need for fine-tuning, and is applicable to spectra of different platforms. The R package IPPD implements the method and is available from the Bioconductor platform (http://bioconductor.fhcrc.org/help/bioc-views/devel/bioc/html/IPPD.html).",2012-11-08 +23142965,DLocalMotif: a discriminative approach for discovering local motifs in protein sequences.,"

Motivation

Local motifs are patterns of DNA or protein sequences that occur within a sequence interval relative to a biologically defined anchor or landmark. Current protein motif discovery methods do not adequately consider such constraints to identify biologically significant motifs that are only weakly over-represented but spatially confined. Using negatives, i.e. sequences known to not contain a local motif, can further increase the specificity of their discovery.

Results

This article introduces the method DLocalMotif that makes use of positional information and negative data for local motif discovery in protein sequences. DLocalMotif combines three scoring functions, measuring degrees of motif over-representation, entropy and spatial confinement, specifically designed to discriminatively exploit the availability of negative data. The method is shown to outperform current methods that use only a subset of these motif characteristics. We apply the method to several biological datasets. The analysis of peroxisomal targeting signals uncovers several novel motifs that occur immediately upstream of the dominant peroxisomal targeting signal-1 signal. The analysis of proline-tyrosine nuclear localization signals uncovers multiple novel motifs that overlap with C2H2 zinc finger domains. We also evaluate the method on classical nuclear localization signals and endoplasmic reticulum retention signals and find that DLocalMotif successfully recovers biologically relevant sequence properties.

Availability

http://bioinf.scmb.uq.edu.au/dlocalmotif/",2012-11-09 +21389073,Baking a mass-spectrometry data PIE with McMC and simulated annealing: predicting protein post-translational modifications from integrated top-down and bottom-up data.,"

Motivation

Post-translational modifications are vital to the function of proteins, but are hard to study, especially since several modified isoforms of a protein may be present simultaneously. Mass spectrometers are a great tool for investigating modified proteins, but the data they provide is often incomplete, ambiguous and difficult to interpret. Combining data from multiple experimental techniques-especially bottom-up and top-down mass spectrometry-provides complementary information. When integrated with background knowledge this allows a human expert to interpret what modifications are present and where on a protein they are located. However, the process is arduous and for high-throughput applications needs to be automated.

Results

This article explores a data integration methodology based on Markov chain Monte Carlo and simulated annealing. Our software, the Protein Inference Engine (the PIE) applies these algorithms using a modular approach, allowing multiple types of data to be considered simultaneously and for new data types to be added as needed. Even for complicated data representing multiple modifications and several isoforms, the PIE generates accurate modification predictions, including location. When applied to experimental data collected on the L7/L12 ribosomal protein the PIE was able to make predictions consistent with manual interpretation for several different L7/L12 isoforms using a combination of bottom-up data with experimentally identified intact masses.

Availability

Software, demo projects and source can be downloaded from http://pie.giddingslab.org/",2011-03-01 +24936036,Cancer and comparative imaging.,"Comparative oncology research is gaining traction as a method for streamlining the drug discovery and development strategies currently in place worldwide. This approach uses the tumor-bearing pet dog as a relevant and complementary model alongside the traditional use of rodents, no-human primates, and other large mammalian species such as purpose-bred dogs or pigs. To date, most comparative oncology studies have been designed and executed to evaluate new anticancer drugs using tumor-bearing dogs with specific naturally occurring cancers as models for humans. These studies have proved extremely valuable for modeling pharmacokinetic-pharmacodynamic relationships, refining drug doses and schedules, and validating an individual drug's target in vivo. The National Cancer Institute's Comparative Oncology Trials Consortium (http://ccr.cancer.gov/resources/cop/COTC.asp) is a cooperative effort that provides infrastructure and resources to support this effort. To complement ongoing efforts in this field, we propose expansion of comparative cancer imaging as a component to drug discovery and development. Diagnostic imaging is critical to diagnosis and management of malignancy in both humans and animals. Molecular imaging techniques allow for detection of disease-specific signals that provide individualized data to aid in patient selection, response to therapy, and prognostication. In this review, we will highlight the comparative oncology studies that have used molecular imaging techniques, demonstrating the value of spontaneous canine cancers as a research tool in drug and imaging agent development.",2014-01-01 +24317974,Shared subgenome dominance following polyploidization explains grass genome evolutionary plasticity from a seven protochromosome ancestor with 16K protogenes.,"Modern plant genomes are diploidized paleopolyploids. We revisited grass genome paleohistory in response to the diploidization process through a detailed investigation of the evolutionary fate of duplicated blocks. Ancestrally duplicated genes can be conserved, deleted, and shuffled, defining dominant (bias toward duplicate retention) and sensitive (bias toward duplicate erosion) chromosomal fragments. We propose a new grass genome paleohistory deriving from an ancestral karyotype structured in seven protochromosomes containing 16,464 protogenes and following evolutionary rules where 1) ancestral shared polyploidizations shaped conserved dominant (D) and sensitive (S) subgenomes, 2) subgenome dominance is revealed by both gene deletion and shuffling from the S blocks, 3) duplicate deletion/movement may have been mediated by single-/double-stranded illegitimate recombination mechanisms, 4) modern genomes arose through centromeric fusion of protochromosomes, leading to functional monocentric neochromosomes, 5) the fusion of two dominant blocks leads to supradominant neochromosomes (D + D = D) with higher ancestral gene retention compared with D + S = D (i.e., fusion of blocks with opposite sensitivity) or even S + S = S (i.e., fusion of two sensitive ancestral blocks). A new user-friendly online tool named ""PlantSyntenyViewer,"" available at http://urgi.versailles.inra.fr/synteny-cereal, presents the refined comparative genomics data.",2014-01-01 +24250114,SNPAAMapper: An efficient genome-wide SNP variant analysis pipeline for next-generation sequencing data.,"

Unlabelled

Many NGS analysis tools focusing on read alignment and variant calling functions for exome sequencing data have been developed in recent years. However, publicly available tools dealing with the downstream analysis of genome-wide variants are fewer and have limited functionality. We developed SNPAAMapper, a novel variant analysis pipeline that can effectively classify variants by region (e.g. CDS, UTRs, intron, upstream, downstream), predict amino acid change type (e.g. synonymous, non-synonymous mutation), and prioritize mutation effects (e.g. CDS versus UTRs). Additional functionality afforded by our pipeline includes: checking variants at exon/intron junctions, customized homozygosity and allele frequency cutoff parameters, and annotation of known variants with dbSNP information, listing original and mutated amino acid sequences containing variants. The final result is reported in a spreadsheet format table containing all variant associated information and prioritized amino acids effects for investigators to examine.

Availability

Perl scripts and required input files are available on the web at http://www.ccmb.med.umich.edu/ccdu /SNPAAMapper.",2013-10-16 +30722336,First Report of Oidium anamorph of Erysiphe hypophylla Causing Powdery Mildew on Leafy Lespedeza (Lespedeza cyrtobotrya) in Korea.,"Leafy lespedeza (Lespedeza cyrtobotrya Miq.) is a deciduous shrub in the pea family (Fabaceae) that occurs in areas of East Asia including Korea, China, and Japan. It has been commonly used as a fence plant and for sand control in Korea. In late October 2011, a powdery mildew disease was observed on leafy lespedeza in several areas near Gwangju River, Gwangju, Korea. Symptoms appeared late in October when temperature fluctuation was high. Major symptoms included scattered white powdery to cottony colonies on both surfaces of the leaves which spread to stems, causing a minor chlorosis and distortion. Conidia were formed singly on conidiophores with 2 to 4 (commonly 3) septa including basal septum, primary conidia ellipsoid, apex rounded to subtruncate, base truncate; and secondary conidia subcylindrical to oblong when mature, and ends truncate. The size was 26.4 to 43.2 (av. 35.1) × 11.2 to 13.2 (av. 11.3) μm. Conidiophores were erect, cylindrical, wider at apex than foot cell, and straight or slightly flexuous in foot cells. The size was 60.1 to 81.3 (av. 78.1) × 6.2 to 12.1 (av. 8.3) μm. Chasmothecia were not observed. Morphologically, the conidia and conidiophores of our strain (EML-LCPW1) were very similar to those of Erysiphe hypophylla (syn. Microsphaera hypophylla) (4). From extracted genomic DNA, the internal transcribed spacer (ITS) region inclusive of 5.8S and 28S rDNA were amplified with ITS1 (5'-TCCGTAGGTGAACCTGCGG-3'), LR5F (5'-GCTATCCTGAGGGAAAC-3'), LROR (5'-ACCCGCTGAACTTAAGC-3'), and LR5F primer sets, respectively. Based on the morphology and ITS rDNA sequence analysis, the fungus was identified as E. hypophylla. rDNA ITS and 28S homologies of the fungus (EML-LCPW1, GenBank Accession Nos. JX512557 and JX512558) represented 100% (771/771) and 100% (775/775) identity values with E. hypophylla (AB292712 and AB292716, respectively) via NCBI BLASTn search of each isolate. The rDNA ITS (JX512557) and 28S (JX512558) sequence analysis revealed that the causal fungus matched E. hypophylla, forming a HypophyllaAlphitoides clade as Takamatsu et al. suggested that E. hypophylla is conspecific to E. alphitoides (3). So far, it has been known that E. communis, E. glycines var. lespedezae, and E. lespedezae cause powdery mildews on Lespedeza plants in the world (1). In Korea, only one Erysiphe species, E. lespedezae (= E. pisi), has been reported to cause powdery mildew on Lespedeza plants including L. bicolor and L. cyrtobotrya (2). In addition, 10 records with respect to Oidium sp. have been found on Lespedeza spp., including L. cyrtobotrya from Japan and L. chinensis from China (1). However, powdery mildew on Lespedeza plants, including leafy lespedeza caused by E. hypophylla, has not been reported in Korea or elsewhere in the world. This fungus has been reported in association with numerous oak (Quercus) species in nearby countries such as China and Russia (Far East), showing that it may be a potential source of inoculum in Korea as well. To our knowledge, this is the first report of Oidium anamorph of E. hypophylla on leafy lespedeza (L. cyrtobotrya) in Korea. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Syst. Mycol. Microbiol. Lab., ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , October 9, 2012. (2) H. D. Shin. Page 320 in: Erysiphaceae of Korea. National Institute of Agricultural Science & Technology, Suwon, Korea, 2000. (3) S. Takamatsu et al. Mycoscience 47:367, 2006. (4) S. Takamatsu et al. Mycol. Res. 111:809, 2007.",2013-02-01 +23355455,Gene therapy clinical trials worldwide to 2012 - an update.,"To date, over 1800 gene therapy clinical trials have been completed, are ongoing or have been approved worldwide. Our database brings together global information on gene therapy clinical trials from official agency sources, published literature, conference presentations and posters kindly provided to us by individual investigators or trial sponsors. This review presents our analysis of clinical trials that, to the best of our knowledge, have been or are being performed worldwide. As of our June 2012 update, we have entries on 1843 trials undertaken in 31 countries. We have analysed the geographical distribution of trials, the disease indications (or other reasons) for trials, the proportions to which different vector types are used, and which genes have been transferred. Details of the analyses presented, and our searchable database are available on The Journal of Gene Medicine Gene Therapy Clinical Trials Worldwide website at: http://www.wiley.co.uk/genmed/clinical. We also provide an overview of the progress being made in clinical trials of gene therapy approaches around the world and discuss the prospects for the future.",2013-02-01 +24297529,Tumor haplotype assembly algorithms for cancer genomics.,"The growing availability of inexpensive high-throughput sequence data is enabling researchers to sequence tumor populations within a single individual at high coverage. But, cancer genome sequence evolution and mutational phenomena like driver mutations and gene fusions are difficult to investigate without first reconstructing tumor haplotype sequences. Haplotype assembly of single individual tumor populations is an exceedingly difficult task complicated by tumor haplotype heterogeneity, tumor or normal cell sequence contamination, polyploidy, and complex patterns of variation. While computational and experimental haplotype phasing of diploid genomes has seen much progress in recent years, haplotype assembly in cancer genomes remains uncharted territory. In this work, we describe HapCompass-Tumor a computational modeling and algorithmic framework for haplotype assembly of copy number variable cancer genomes containing haplotypes at different frequencies and complex variation. We extend our polyploid haplotype assembly model and present novel algorithms for (1) complex variations, including copy number changes, as varying numbers of disjoint paths in an associated graph, (2) variable haplotype frequencies and contamination, and (3) computation of tumor haplotypes using simple cycles of the compass graph which constrain the space of haplotype assembly solutions. The model and algorithm are implemented in the software package HapCompass-Tumor which is available for download from http://www.brown.edu/Research/Istrail_Lab/.",2014-01-01 +24319002,RNA-seq differential expression studies: more sequence or more replication?,"

Motivation

RNA-seq is replacing microarrays as the primary tool for gene expression studies. Many RNA-seq studies have used insufficient biological replicates, resulting in low statistical power and inefficient use of sequencing resources.

Results

We show the explicit trade-off between more biological replicates and deeper sequencing in increasing power to detect differentially expressed (DE) genes. In the human cell line MCF7, adding more sequencing depth after 10 M reads gives diminishing returns on power to detect DE genes, whereas adding biological replicates improves power significantly regardless of sequencing depth. We also propose a cost-effectiveness metric for guiding the design of large-scale RNA-seq DE studies. Our analysis showed that sequencing less reads and performing more biological replication is an effective strategy to increase power and accuracy in large-scale differential expression RNA-seq studies, and provided new insights into efficient experiment design of RNA-seq studies.

Availability and implementation

The code used in this paper is provided on: http://home.uchicago.edu/∼jiezhou/replication/. The expression data is deposited in the Gene Expression Omnibus under the accession ID GSE51403.",2013-12-06 +21982300,Open Babel: An open chemical toolbox.,"

Background

A frequent problem in computational modeling is the interconversion of chemical structures between different formats. While standard interchange formats exist (for example, Chemical Markup Language) and de facto standards have arisen (for example, SMILES format), the need to interconvert formats is a continuing problem due to the multitude of different application areas for chemistry data, differences in the data stored by different formats (0D versus 3D, for example), and competition between software along with a lack of vendor-neutral formats.

Results

We discuss, for the first time, Open Babel, an open-source chemical toolbox that speaks the many languages of chemical data. Open Babel version 2.3 interconverts over 110 formats. The need to represent such a wide variety of chemical and molecular data requires a library that implements a wide range of cheminformatics algorithms, from partial charge assignment and aromaticity detection, to bond order perception and canonicalization. We detail the implementation of Open Babel, describe key advances in the 2.3 release, and outline a variety of uses both in terms of software products and scientific research, including applications far beyond simple format interconversion.

Conclusions

Open Babel presents a solution to the proliferation of multiple chemical file formats. In addition, it provides a variety of useful utilities from conformer searching and 2D depiction, to filtering, batch conversion, and substructure and similarity searching. For developers, it can be used as a programming library to handle chemical data in areas such as organic chemistry, drug design, materials science, and computational chemistry. It is freely available under an open-source license from http://openbabel.org.",2011-10-07 +22072384,Using Poisson mixed-effects model to quantify transcript-level gene expression in RNA-Seq.,"

Motivation

RNA sequencing (RNA-Seq) is a powerful new technology for mapping and quantifying transcriptomes using ultra high-throughput next-generation sequencing technologies. Using deep sequencing, gene expression levels of all transcripts including novel ones can be quantified digitally. Although extremely promising, the massive amounts of data generated by RNA-Seq, substantial biases and uncertainty in short read alignment pose challenges for data analysis. In particular, large base-specific variation and between-base dependence make simple approaches, such as those that use averaging to normalize RNA-Seq data and quantify gene expressions, ineffective.

Results

In this study, we propose a Poisson mixed-effects (POME) model to characterize base-level read coverage within each transcript. The underlying expression level is included as a key parameter in this model. Since the proposed model is capable of incorporating base-specific variation as well as between-base dependence that affect read coverage profile throughout the transcript, it can lead to improved quantification of the true underlying expression level.

Availability and implementation

POME can be freely downloaded at http://www.stat.purdue.edu/~yuzhu/pome.html.

Contact

yuzhu@purdue.edu; zhaohui.qin@emory.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-08 +23376577,GOASVM: a subcellular location predictor by incorporating term-frequency gene ontology into the general form of Chou's pseudo-amino acid composition.,"Prediction of protein subcellular localization is an important yet challenging problem. Recently, several computational methods based on Gene Ontology (GO) have been proposed to tackle this problem and have demonstrated superiority over methods based on other features. Existing GO-based methods, however, do not fully use the GO information. This paper proposes an efficient GO method called GOASVM that exploits the information from the GO term frequencies and distant homologs to represent a protein in the general form of Chou's pseudo-amino acid composition. The method first selects a subset of relevant GO terms to form a GO vector space. Then for each protein, the method uses the accession number (AC) of the protein or the ACs of its homologs to find the number of occurrences of the selected GO terms in the Gene Ontology annotation (GOA) database as a means to construct GO vectors for support vector machines (SVMs) classification. With the advantages of GO term frequencies and a new strategy to incorporate useful homologous information, GOASVM can achieve a prediction accuracy of 72.2% on a new independent test set comprising novel proteins that were added to Swiss-Prot six years later than the creation date of the training set. GOASVM and Supplementary materials are available online at http://bioinfo.eie.polyu.edu.hk/mGoaSvmServer/GOASVM.html.",2013-01-29 +23368100,CORAL: classification model for predictions of anti-sarcoma activity.,"A modified version of the CORAL software (http://www.insilico.eu/coral) allows building up the classification model for the case of the Yes/No data on the anti-sarcoma activity of organic compounds. Three random splits into the sub-training, calibration, and test sets of the data for 3017 compounds were examined. The performance of the proposed approach is satisfactory. The average values of the statistical characteristics for external test set on three random splits are as follows: n=1173-1234, sensitivity = 0.8903±0.0390, specificity = 0.9869±0.0013, and accuracy = 0.9759±0.0043. Mechanistic interpretation of the suggested model is discussed.",2012-01-01 +23047560,KASpOD--a web service for highly specific and explorative oligonucleotide design.,"

Summary

KASpOD is a web service dedicated to the design of signature sequences using a k-mer-based algorithm. Such highly specific and explorative oligonucleotides are then suitable for various goals, including Phylogenetic Oligonucleotide Arrays.

Availability

http://g2im.u-clermont1.fr/kaspod.

Contact

eric.peyretaillade@udamail.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-09 +23360652,CGAL: computing genome assembly likelihoods.,"Assembly algorithms have been extensively benchmarked using simulated data so that results can be compared to ground truth. However, in de novo assembly, only crude metrics such as contig number and size are typically used to evaluate assembly quality. We present CGAL, a novel likelihood-based approach to assembly assessment in the absence of a ground truth. We show that likelihood is more accurate than other metrics currently used for evaluating assemblies, and describe its application to the optimization and comparison of assembly algorithms. Our methods are implemented in software that is freely available at http://bio.math.berkeley.edu/cgal/.",2013-01-29 +21775306,GaggleBridge: collaborative data analysis.,"

Motivation

Tools aiding in collaborative data analysis are becoming ever more important as researchers work together over long distances. We present an extension to the Gaggle framework, which has been widely adopted as a tool to enable data exchange between different analysis programs on one computer.

Results

Our program, GaggleBridge, transparently extends this functionality to allow data exchange between Gaggle users at different geographic locations using network communication. GaggleBridge can automatically set up SSH tunnels to traverse firewalls while adding some security features to the Gaggle communication.

Availability

GaggleBridge is available as open-source software implemented in the Java language at http://it.inf.uni-tuebingen.de/gb.

Contact

florian.battke@uni-tuebingen.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-19 +23650886,The reliability of methodological ratings for speechBITE using the PEDro-P scale.,"

Background

speechBITE (http://www.speechbite.com) is an online database established in order to help speech and language therapists gain faster access to relevant research that can used in clinical decision-making. In addition to containing more than 3000 journal references, the database also provides methodological ratings on the PEDro-P (an adapted version of the PEDro) scale to assist clinicians in identifying the scientific quality of randomized (RCTs) and non-randomized control trials (NRCTs). While reliability of the PEDro scale has been established by similar allied health databases, the reliability of the PEDro-P scale has yet to be reported.

Aims

To examine the reliability of PEDro-P scale ratings undertaken by raters on the speechBITE database and benchmark these results to the published reliability for the original PEDro scale. Both the total score (out of ten) as well as each of the 11 scale items were included in this analysis.

Methods & procedures

speechBITE's volunteer rater network of 17 members rated the first 100 RCTs and NRCTs on the website. The criterion and overall scores for these ratings were compared with previously published reliability studies using the PEDro scale. Intra-class correlations and per cent agreement measures were used to establish and benchmark reliability.

Outcomes & results

The speechBITE PEDro-P ratings ranged from fair to excellent for both the total score and for each of the 11 scale items. Furthermore, reliability was equal to that of other databases.

Conclusions & implications

speechBITE users can be confident of the reliability of ratings published on the website. Further analysis of differences between this study and previous PEDro scale reliability studies are discussed.",2013-01-28 +23356573,Seq2Ref: a web server to facilitate functional interpretation.,"

Background

The size of the protein sequence database has been exponentially increasing due to advances in genome sequencing. However, experimentally characterized proteins only constitute a small portion of the database, such that the majority of sequences have been annotated by computational approaches. Current automatic annotation pipelines inevitably introduce errors, making the annotations unreliable. Instead of such error-prone automatic annotations, functional interpretation should rely on annotations of 'reference proteins' that have been experimentally characterized or manually curated.

Results

The Seq2Ref server uses BLAST to detect proteins homologous to a query sequence and identifies the reference proteins among them. Seq2Ref then reports publications with experimental characterizations of the identified reference proteins that might be relevant to the query. Furthermore, a plurality-based rating system is developed to evaluate the homologous relationships and rank the reference proteins by their relevance to the query.

Conclusions

The reference proteins detected by our server will lend insight into proteins of unknown function and provide extensive information to develop in-depth understanding of uncharacterized proteins. Seq2Ref is available at: http://prodata.swmed.edu/seq2ref.",2013-01-28 +21493657,'Sciencenet'--towards a global search and share engine for all scientific knowledge.,"

Summary

Modern biological experiments create vast amounts of data which are geographically distributed. These datasets consist of petabytes of raw data and billions of documents. Yet to the best of our knowledge, a search engine technology that searches and cross-links all different data types in life sciences does not exist. We have developed a prototype distributed scientific search engine technology, 'Sciencenet', which facilitates rapid searching over this large data space. By 'bringing the search engine to the data', we do not require server farms. This platform also allows users to contribute to the search index and publish their large-scale data to support e-Science. Furthermore, a community-driven method guarantees that only scientific content is crawled and presented. Our peer-to-peer approach is sufficiently scalable for the science web without performance or capacity tradeoff.

Availability and implementation

The free to use search portal web page and the downloadable client are accessible at: http://sciencenet.kit.edu. The web portal for index administration is implemented in ASP.NET, the 'AskMe' experiment publisher is written in Python 2.7, and the backend 'YaCy' search engine is based on Java 1.6.",2011-04-14 +24843822,Pocket pathologist: A mobile application for rapid diagnostic surgical pathology consultation.,"

Introduction

Telepathology allows the digital transmission of images for rapid access to pathology experts. Recent technologic advances in smartphones have allowed them to be used to acquire and transmit digital images of the glass slide, representing cost savings and efficiency gains over traditional forms of telepathology. We report our experience with developing an iPhone application (App - Pocket Pathologist) to facilitate rapid diagnostic pathology teleconsultation utilizing a smartphone.

Materials and methods

A secure, web-based portal (http://pathconsult.upmc.com/) was created to facilitate remote transmission of digital images for teleconsultation. The App augments functionality of the web-based portal and allows the user to quickly and easily upload digital images for teleconsultation. Image quality of smartphone cameras was evaluated by capturing images using different adapters that directly attach phones to a microscope ocular lens.

Results

The App was launched in August 2013. The App facilitated easy submission of cases for teleconsultation by limiting the number of data entry fields for users and enabling uploading of images from their smartphone's gallery wirelessly. Smartphone cameras properly attached to a microscope create static digital images of similar quality to a commercial digital microscope camera.

Conclusion

Smartphones have great potential to support telepathology because they are portable, provide ubiquitous internet connectivity, contain excellent digital cameras, and can be easily attached to a microscope. The Pocket Pathologist App represents a significant reduction in the cost of creating digital images and submitting them for teleconsultation. The iPhone App provides an easy solution for global users to submit digital pathology images to pathology experts for consultation.",2014-03-28 +23244467,"COEUS: ""semantic web in a box"" for biomedical applications.","

Unlabelled

Background

As the ""omics"" revolution unfolds, the growth in data quantity and diversity is bringing about the need for pioneering bioinformatics software, capable of significantly improving the research workflow. To cope with these computer science demands, biomedical software engineers are adopting emerging semantic web technologies that better suit the life sciences domain. The latter's complex relationships are easily mapped into semantic web graphs, enabling a superior understanding of collected knowledge. Despite increased awareness of semantic web technologies in bioinformatics, their use is still limited.

Results

COEUS is a new semantic web framework, aiming at a streamlined application development cycle and following a ""semantic web in a box"" approach. The framework provides a single package including advanced data integration and triplification tools, base ontologies, a web-oriented engine and a flexible exploration API. Resources can be integrated from heterogeneous sources, including CSV and XML files or SQL and SPARQL query results, and mapped directly to one or more ontologies. Advanced interoperability features include REST services, a SPARQL endpoint and LinkedData publication. These enable the creation of multiple applications for web, desktop or mobile environments, and empower a new knowledge federation layer.

Conclusions

The platform, targeted at biomedical application developers, provides a complete skeleton ready for rapid application deployment, enhancing the creation of new semantic information systems. COEUS is available as open source at http://bioinformatics.ua.pt/coeus/.",2012-12-17 +21352572,BIO::Phylo-phyloinformatic analysis using perl.,"

Background

Phyloinformatic analyses involve large amounts of data and metadata of complex structure. Collecting, processing, analyzing, visualizing and summarizing these data and metadata should be done in steps that can be automated and reproduced. This requires flexible, modular toolkits that can represent, manipulate and persist phylogenetic data and metadata as objects with programmable interfaces.

Results

This paper presents Bio::Phylo, a Perl5 toolkit for phyloinformatic analysis. It implements classes and methods that are compatible with the well-known BioPerl toolkit, but is independent from it (making it easy to install) and features a richer API and a data model that is better able to manage the complex relationships between different fundamental data and metadata objects in phylogenetics. It supports commonly used file formats for phylogenetic data including the novel NeXML standard, which allows rich annotations of phylogenetic data to be stored and shared. Bio::Phylo can interact with BioPerl, thereby giving access to the file formats that BioPerl supports. Many methods for data simulation, transformation and manipulation, the analysis of tree shape, and tree visualization are provided.

Conclusions

Bio::Phylo is composed of 59 richly documented Perl5 modules. It has been deployed successfully on a variety of computer architectures (including various Linux distributions, Mac OS X versions, Windows, Cygwin and UNIX-like systems). It is available as open source (GPL) software from http://search.cpan.org/dist/Bio-Phylo.",2011-02-27 +22373022,iGepros: an integrated gene and protein annotation server for biological nature exploration.,"

Background

In the post-genomic era, transcriptomics and proteomics provide important information to understand the genomes. With fast development of high-throughput technology, more and more transcriptomics and proteomics data are generated at an unprecedented rate. Therefore, requirement of software to annotate those omics data and explore their biological nature arises. In the past decade, some pioneer works were presented to address this issue, but limitations still exist. Fox example, some of these tools offer command line only, which is not suitable for those users with little or no experience in programming. Besides, some tools don't support large scale gene and protein analysis.

Results

To overcome these limitations, an integrated gene and protein annotation server named iGepros has been developed. The server provides user-friendly interfaces and detailed on-line examples, so most researchers even those with little or no programming experience can use it smoothly. Moreover, the server provides many functionalities to compare transcriptomics and proteomics data. Especially, the server is constructed under a model-view-control framework, which makes it easy to incorporate more functions to the server in the future.

Conclusions

In this paper, we present a server with powerful capability not only for gene and protein functional annotation, but also for transcriptomics and proteomics data comparison. Researchers can survey biological characters behind gene and protein datasets and accelerate their investigation of transcriptome and proteome by applying the server. The server is publicly available at http://www.biosino.org/iGepros/.",2011-12-14 +23282411,Revealing functionally coherent subsets using a spectral clustering and an information integration approach.,"

Background

Contemporary high-throughput analyses often produce lengthy lists of genes or proteins. It is desirable to divide the genes into functionally coherent subsets for further investigation, by integrating heterogeneous information regarding the genes. Here we report a principled approach for managing and integrating multiple data sources within the framework of graph-spectrum analysis in order to identify coherent gene subsets.

Results

We investigated several approaches to integrate information derived from different sources that reflect distinct aspects of gene functional relationships including: functional annotations of genes in the form of the Gene Ontology, co-mentioning of genes in the literature, and shared transcription factor binding sites among genes. Given a list of genes, we construct a graph containing the genes in each information space; then the graphs were kernel transformed so they could be integrated; finally functionally coherent subsets were identified using a spectral clustering algorithm. In a series of simulation experiments, known functionally coherent gene sets were mixed and recovered using our approach.

Conclusions

The results indicate that spectral clustering approaches are capable of recovering coherent gene modules even under noisy conditions, and that information integration serves to further enhance this capability. When applied to a real-world data set, our methods revealed biologically sensible modules, and highlighted the importance of information integration. The implementation of the statistical model is provided under the GNU general public license, as an installable Python module, at: http://code.google.com/p/spectralmix.",2012-12-17 +21458441,The FaceBase Consortium: a comprehensive program to facilitate craniofacial research.,"The FaceBase Consortium consists of ten interlinked research and technology projects whose goal is to generate craniofacial research data and technology for use by the research community through a central data management and integrated bioinformatics hub. Funded by the National Institute of Dental and Craniofacial Research (NIDCR) and currently focused on studying the development of the middle region of the face, the Consortium will produce comprehensive datasets of global gene expression patterns, regulatory elements and sequencing; will generate anatomical and molecular atlases; will provide human normative facial data and other phenotypes; conduct follow up studies of a completed genome-wide association study; generate independent data on the genetics of craniofacial development, build repositories of animal models and of human samples and data for community access and analysis; and will develop software tools and animal models for analyzing and functionally testing and integrating these data. The FaceBase website (http://www.facebase.org) will serve as a web home for these efforts, providing interactive tools for exploring these datasets, together with discussion forums and other services to support and foster collaboration within the craniofacial research community.",2011-03-31 +22551205,Fractal MapReduce decomposition of sequence alignment.,"

Background

The dramatic fall in the cost of genomic sequencing, and the increasing convenience of distributed cloud computing resources, positions the MapReduce coding pattern as a cornerstone of scalable bioinformatics algorithm development. In some cases an algorithm will find a natural distribution via use of map functions to process vectorized components, followed by a reduce of aggregate intermediate results. However, for some data analysis procedures such as sequence analysis, a more fundamental reformulation may be required.

Results

In this report we describe a solution to sequence comparison that can be thoroughly decomposed into multiple rounds of map and reduce operations. The route taken makes use of iterated maps, a fractal analysis technique, that has been found to provide a ""alignment-free"" solution to sequence analysis and comparison. That is, a solution that does not require dynamic programming, relying on a numeric Chaos Game Representation (CGR) data structure. This claim is demonstrated in this report by calculating the length of the longest similar segment by inspecting only the USM coordinates of two analogous units: with no resort to dynamic programming.

Conclusions

The procedure described is an attempt at extreme decomposition and parallelization of sequence alignment in anticipation of a volume of genomic sequence data that cannot be met by current algorithmic frameworks. The solution found is delivered with a browser-based application (webApp), highlighting the browser's emergence as an environment for high performance distributed computing.

Availability

Public distribution of accompanying software library with open source and version control at http://usm.github.com. Also available as a webApp through Google Chrome's WebStore http://chrome.google.com/webstore: search with ""usm"".",2012-05-02 +24634371,Course of depressive symptoms and treatment in the longitudinal assessment of bariatric surgery (LABS-2) study.,"

Objective

To examine changes in depressive symptoms and treatment in the first 3 years following bariatric surgery.

Methods

The longitudinal assessment of bariatric surgery-2 (LABS-2) is an observational cohort study of adults (n = 2,458) who underwent a bariatric surgical procedure at 1 of 10 US hospitals between 2006 and 2009. This study includes 2,148 participants who completed the Beck depression inventory (BDI) at baseline and ≥ one follow-up visit in years 1-3.

Results

At baseline, 40.4% self-reported treatment for depression. At least mild depressive symptoms (BDI score ≥ 10) were reported by 28.3%; moderate (BDI score 19-29) and severe (BDI score ≥30) symptoms were uncommon (4.2 and 0.5%, respectively). Mild-to-severe depressive symptoms independently increased the odds (OR = 1.75; P = 0.03) of a major adverse event within 30 days of surgery. Compared with baseline, symptom severity was significantly lower at all follow-up time points (e.g., mild-to-severe symptomatology was 8.9%, 6 months; 8.4%, 1year; 12.2%, 2 years; 15.6%, 3 years; ps < 0.001), but increased between 1 and 3 years postoperatively (P < 0.01). Change in depressive symptoms was significantly related to change in body mass index (r = 0.42; P < 0001).

Conclusion

Bariatric surgery has a positive impact on depressive features. However, data suggest some deterioration in improvement after the first postoperative year. LABS-2, #NCT00465829, http://www.clinicaltrials.gov/ct2/show/NCT00465829.",2014-03-25 +21365761,Visualize: a free and open source multifunction tool for proteomics data analysis.,"A major challenge in the field of high-throughput proteomics is the conversion of the large volume of experimental data that is generated into biological knowledge. Typically, proteomics experiments involve the combination and comparison of multiple data sets and the analysis and annotation of these combined results. Although there are some commercial applications that provide some of these functions, there is a need for a free, open source, multifunction tool for advanced proteomics data analysis. We have developed the Visualize program that provides users with the abilities to visualize, analyze, and annotate proteomics data; combine data from multiple runs, and quantitate differences between individual runs and combined data sets. Visualize is licensed under GNU GPL and can be downloaded from http://proteomics.mcw.edu/visualize. It is available as compiled client-based executable files for both Windows and Mac OS X platforms as well as PERL source code.",2011-02-07 +22649056,IntScore: a web tool for confidence scoring of biological interactions.,"Knowledge of all molecular interactions that potentially take place in the cell is a key for a detailed understanding of cellular processes. Currently available interaction data, such as protein-protein interaction maps, are known to contain false positives that inevitably diminish the accuracy of network-based inferences. Interaction confidence scoring is thus a crucial intermediate step after obtaining interaction data and before using it in an interaction network-based inference approach. It enables to weight individual interactions according to the likelihood that they actually take place in the cell, and can be used to filter out false positives. We describe a web tool called IntScore which calculates confidence scores for user-specified sets of interactions. IntScore provides six network topology- and annotation-based confidence scoring methods. It also enables the integration of scores calculated by the different methods into an aggregate score using machine learning approaches. IntScore is user-friendly and extensively documented. It is freely available at http://intscore.molgen.mpg.de.",2012-05-30 +23369322,HDAM: a resource of human disease associated mutations from next generation sequencing studies.,"

Background

Next generation sequencing (NGS) technologies have greatly facilitated the rapid and economical detection of pathogenic mutations in human disorders. However, mutation descriptions are hard to be compared and integrated due to various reference sequences and annotation tools adopted in different articles as well as the nomenclature of diseases/traits.

Description

The Human Disease Associated Mutation (HDAM) database is dedicated to collect, standardize and re-annotate mutations for human diseases discovered by NGS studies. In the current release, HDAM contains 1,114 mutations, located in 669 genes and associated with 125 human diseases through literature mining. All mutation records have uniform and unequivocal descriptions of sequence changes according to the Human Genome Sequence Variation Society (HGVS) nomenclature recommendations. Each entry displays comprehensive information, including mutation location in genome (hg18/hg19), gene functional annotation, protein domain annotation, susceptible diseases, the first literature report of the mutation and etc. Moreover, new mutation-disease relationships predicted by Bayesian network are also presented under each mutation.

Conclusion

HDAM contains hundreds rigorously curated human mutations from NGS studies and was created to provide a comprehensive view of these mutations that confer susceptibility to the common disorders. HDAM can be freely accessed at http://www.megabionet.org/HDAM.",2013-01-23 +23842807,iLoops: a protein-protein interaction prediction server based on structural features.,"

Summary

Protein-protein interactions play a critical role in many biological processes. Despite that, the number of servers that provide an easy and comprehensive method to predict them is still limited. Here, we present iLoops, a web server that predicts whether a pair of proteins can interact using local structural features. The inputs of the server are as follows: (i) the sequences of the query proteins and (ii) the pairs to be tested. Structural features are assigned to the query proteins by sequence similarity. Pairs of structural features (formed by loops or domains) are classified according to their likelihood to favor or disfavor a protein-protein interaction, depending on their observation in known interacting and non-interacting pairs. The server evaluates the putative interaction using a random forest classifier.

Availability

iLoops is available at http://sbi.imim.es/iLoops.php

Contact

baldo.oliva@upf.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-09 +23875887,"A peptide-spectrum scoring system based on ion alignment, intensity, and pair probabilities.","Peppy, the proteogenomic/proteomic search software, employs a novel method for assessing the match quality between an MS/MS spectrum and a theorized peptide sequence. The scoring system uses three score factors calculated with binomial probabilities: the probability that a fragment ion will randomly align with a peptide ion, the probability that the aligning ions will be selected from subsets of the most intense peaks, and the probability that the intensities of fragment ions identified as y-ions are greater than those of their counterpart b-ions. The scores produced by the method act as global confidence scores, which facilitate the accurate comparison of results and the estimation of false discovery rates. Peppy has been integrated into the meta-search engine PepArML to produce meaningful comparisons with Mascot, MSGF+, OMSSA, X!Tandem, k-Score and s-Score. For two of the four data sets examined with the PepArML analysis, Peppy exceeded the accuracy performance of the other scoring systems. Peppy is available for download at http://geneffects.com/peppy .",2013-08-08 +24667142,Prognostic significance of HLA EMR8-5 immunohistochemically analyzed expression in osteosarcoma.,"

Background

Defects in Human Leukocyte Antigen (HLA) class I antigen expression and/or function in tumor cells have been extensively investigated, because of their potential role in the escape of tumor cells from T cell recognition and destruction. The researchers evaluated HLA class I expression in tumor tissue as a prognostic factor in osteosarcoma patients and as a predictor of their survival. This retrospective cohort study was conducted at the pathology laboratory of Ain Shams University Hospital, and Ain Shams University Specialized Hospital during the period between January 2009 and January 2012.

Methods

The researchers investigated HLA class I expression in primary osteosarcoma by immunohistochemistry using EMR8-5 mAbs. Furthermore, researchers evaluated the correlation between HLA class I expression and the clinicopathological status and outcome in formalin fixed paraffin embedded tissues from thirty six (36) patients with osteosarcoma.

Results

A high expression of HLA class I was detected in 18 (50) % of tumor samples examined; while tumors with low or negative expression represented 9 (25%) cases each. Data indicate that the overall survival rate of patients with tumors highly expressing HLA class I was significantly higher than those with low or negative expression.

Conclusion

Down-regulation of class I antigen expression is associated with features of aggressive disease and a poorer prognosis. Therefore, it is imperative to identify HLA as a prognostic factor at the time of diagnosis to detect chemotherapy-resistant tumors and to generate a modified treatment regimen.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1159334857109547.",2014-03-25 +23480664,ChemCalc: a building block for tomorrow's chemical infrastructure.,"Web services, as an aspect of cloud computing, are becoming an important part of the general IT infrastructure, and scientific computing is no exception to this trend. We propose a simple approach to develop chemical Web services, through which servers could expose the essential data manipulation functionality that students and researchers need for chemical calculations. These services return their results as JSON (JavaScript Object Notation) objects, which facilitates their use for Web applications. The ChemCalc project http://www.chemcalc.org demonstrates this approach: we present three Web services related with mass spectrometry, namely isotopic distribution simulation, peptide fragmentation simulation, and molecular formula determination. We also developed a complete Web application based on these three Web services, taking advantage of modern HTML5 and JavaScript libraries (ChemDoodle and jQuery).",2013-04-30 +21884583,Clustering with position-specific constraints on variance: applying redescending M-estimators to label-free LC-MS data analysis.,"

Background

Clustering is a widely applicable pattern recognition method for discovering groups of similar observations in data. While there are a large variety of clustering algorithms, very few of these can enforce constraints on the variation of attributes for data points included in a given cluster. In particular, a clustering algorithm that can limit variation within a cluster according to that cluster's position (centroid location) can produce effective and optimal results in many important applications ranging from clustering of silicon pixels or calorimeter cells in high-energy physics to label-free liquid chromatography based mass spectrometry (LC-MS) data analysis in proteomics and metabolomics.

Results

We present MEDEA (M-Estimator with DEterministic Annealing), an M-estimator based, new unsupervised algorithm that is designed to enforce position-specific constraints on variance during the clustering process. The utility of MEDEA is demonstrated by applying it to the problem of ""peak matching""--identifying the common LC-MS peaks across multiple samples--in proteomic biomarker discovery. Using real-life datasets, we show that MEDEA not only outperforms current state-of-the-art model-based clustering methods, but also results in an implementation that is significantly more efficient, and hence applicable to much larger LC-MS data sets.

Conclusions

MEDEA is an effective and efficient solution to the problem of peak matching in label-free LC-MS data. The program implementing the MEDEA algorithm, including datasets, clustering results, and supplementary information is available from the author website at http://www.hephy.at/user/fru/medea/.",2011-08-31 +22338662,Computation of small-angle scattering profiles with three-dimensional Zernike polynomials.,"Small-angle X-ray scattering (SAXS) methods are extensively used for characterizing macromolecular structure and dynamics in solution. The computation of theoretical scattering profiles from three-dimensional models is crucial in order to test structural hypotheses. Here, a new approach is presented to efficiently compute SAXS profiles that are based on three-dimensional Zernike polynomial expansions. Comparison with existing methods and experimental data shows that the Zernike method can be used to effectively validate three-dimensional models against experimental data. For molecules with large cavities or complicated surfaces, the Zernike method more accurately accounts for the solvent contributions. The program is available as open-source software at http://sastbx.als.lbl.gov.",2012-02-09 +23635342,Hydroa vacciniforme-like lymphoma of an adult: a case report with review of the literature.,"Hydroa vacciniforme-like lymphoma (HVL) is a rare type of Epstein-Barr virus (EBV)-positive lymphoma of cytotoxic T-cell or natural killer cell origin that mainly affect children, characterized by a vesicopapular skin eruption that clinically resemble hydroa vacciniforme (HV). In current study, we report an adult patient with the tumor. The patient presented similar morphologic, immunophenotypic and genotypic changes of the disease with that occurred in children, whereas clinically, he showed a prolonged clinical course without hepatosplenomegaly or generalized lymphadenopathy. Whether there are some differences in biologic behavior between children and adults still remains unknown and it is necessary to collect more data to observe and to investigate in the future.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/7644172219178472.",2013-05-01 +22135419,DMAN: a Java tool for analysis of multi-well differential scanning fluorimetry experiments.,"

Summary

Differential scanning fluorimetry (DSF) is a rapid technique that can be used in structural biology to study protein-ligand interactions. We have developed DMAN, a novel tool to analyse multi-well plate data obtained in DSF experiments. DMAN is easy to install and provides a user-friendly interface. Multi-well plate layouts can be designed by the user and experimental data can be annotated and analysed by DMAN according to the specified plate layout. Statistical tests for significance are performed automatically, and graphical tools are also provided to assist in data analysis. The modular concept of this software will allow easy development of other multi-well plate analysis applications in the future.

Availability and implementation

DMAN is implemented in Java to provide a cross-platform compatibility. It is freely available to academic users at http://www.structuralchemistry.org/pcsb/. To download DMAN, users will be asked for their name, institution and email address. A manual can also be downloaded from this site.

Contact

conan.wang@griffith.edu.au; a.hofmann@griffith.edu.au.",2011-11-30 +23336252,Predicting PDZ domain mediated protein interactions from structure.,"

Background

PDZ domains are structural protein domains that recognize simple linear amino acid motifs, often at protein C-termini, and mediate protein-protein interactions (PPIs) in important biological processes, such as ion channel regulation, cell polarity and neural development. PDZ domain-peptide interaction predictors have been developed based on domain and peptide sequence information. Since domain structure is known to influence binding specificity, we hypothesized that structural information could be used to predict new interactions compared to sequence-based predictors.

Results

We developed a novel computational predictor of PDZ domain and C-terminal peptide interactions using a support vector machine trained with PDZ domain structure and peptide sequence information. Performance was estimated using extensive cross validation testing. We used the structure-based predictor to scan the human proteome for ligands of 218 PDZ domains and show that the predictions correspond to known PDZ domain-peptide interactions and PPIs in curated databases. The structure-based predictor is complementary to the sequence-based predictor, finding unique known and novel PPIs, and is less dependent on training-testing domain sequence similarity. We used a functional enrichment analysis of our hits to create a predicted map of PDZ domain biology. This map highlights PDZ domain involvement in diverse biological processes, some only found by the structure-based predictor. Based on this analysis, we predict novel PDZ domain involvement in xenobiotic metabolism and suggest new interactions for other processes including wound healing and Wnt signalling.

Conclusions

We built a structure-based predictor of PDZ domain-peptide interactions, which can be used to scan C-terminal proteomes for PDZ interactions. We also show that the structure-based predictor finds many known PDZ mediated PPIs in human that were not found by our previous sequence-based predictor and is less dependent on training-testing domain sequence similarity. Using both predictors, we defined a functional map of human PDZ domain biology and predict novel PDZ domain function. Users may access our structure-based and previous sequence-based predictors at http://webservice.baderlab.org/domains/POW.",2013-01-21 +23231371,Exome-assistant: a rapid and easy detection of disease-related genes and genetic variations from exome sequencing.,"

Background

Protein-coding regions in human genes harbor 85% of the mutations that are associated with disease-related traits. Compared with whole-genome sequencing of complex samples, exome sequencing serves as an alternative option because of its dramatically reduced cost. In fact, exome sequencing has been successfully applied to identify the cause of several Mendelian disorders, such as Miller and Schinzel-Giedio syndrome. However, there remain great challenges in handling the huge data generated by exome sequencing and in identifying potential disease-related genetic variations.

Results

In this study, Exome-assistant (http://122.228.158.106/exomeassistant), a convenient tool for submitting and annotating single nucleotide polymorphisms (SNPs) and insertion/deletion variations (InDels), was developed to rapidly detect candidate disease-related genetic variations from exome sequencing projects. Versatile filter criteria are provided by Exome-assistant to meet different users' requirements. Exome-assistant consists of four modules: the single case module, the two cases module, the multiple cases module, and the reanalysis module. The two cases and multiple cases modules allow users to identify sample-specific and common variations. The multiple cases module also supports family-based studies and Mendelian filtering. The identified candidate disease-related genetic variations can be annotated according to their sample features.

Conclusions

In summary, by exploring exome sequencing data, Exome-assistant can provide researchers with detailed biological insights into genetic variation events and permits the identification of potential genetic causes of human diseases and related traits.",2012-12-11 +23341643,A structural bioinformatics approach for identifying proteins predisposed to bind linear epitopes on pre-selected target proteins.,"We have developed a protocol for identifying proteins that are predisposed to bind linear epitopes on target proteins of interest. The protocol searches through the protein database for proteins (scaffolds) that are bound to peptides with sequences similar to accessible, linear epitopes on the target protein. The sequence match is considered more significant if residues calculated to be important in the scaffold-peptide interaction are present in the target epitope. The crystal structure of the scaffold-peptide complex is then used as a template for creating a model of the scaffold bound to the target epitope. This model can then be used in conjunction with sequence optimization algorithms or directed evolution methods to search for scaffold mutations that further increase affinity for the target protein. To test the applicability of this approach we targeted three disease-causing proteins: a tuberculosis virulence factor (TVF), the apical membrane antigen (AMA) from malaria, and hemagglutinin from influenza. In each case the best scoring scaffold was tested, and binders with Kds equal to 37 μM and 50 nM for TVF and AMA, respectively, were identified. A web server (http://rosettadesign.med.unc.edu/scaffold/) has been created for performing the scaffold search process with user-defined target sequences.",2013-01-21 +23800230,Quantitation of small intestinal permeability during normal human drug absorption.,"

Background

Understanding the quantitative relationship between a drug's physical chemical properties and its rate of intestinal absorption (QSAR) is critical for selecting candidate drugs. Because of limited experimental human small intestinal permeability data, approximate surrogates such as the fraction absorbed or Caco-2 permeability are used, both of which have limitations.

Methods

Given the blood concentration following an oral and intravenous dose, the time course of intestinal absorption in humans was determined by deconvolution and related to the intestinal permeability by the use of a new 3 parameter model function (""Averaged Model"" (AM)). The theoretical validity of this AM model was evaluated by comparing it to the standard diffusion-convection model (DC). This analysis was applied to 90 drugs using previously published data. Only drugs that were administered in oral solution form to fasting subjects were considered so that the rate of gastric emptying was approximately known. All the calculations are carried out using the freely available routine PKQuest Java (http://www.pkquest.com) which has an easy to use, simple interface.

Results

Theoretically, the AM permeability provides an accurate estimate of the intestinal DC permeability for solutes whose absorption ranges from 1% to 99%. The experimental human AM permeabilities determined by deconvolution are similar to those determined by direct human jejunal perfusion. The small intestinal pH varies with position and the results are interpreted in terms of the pH dependent octanol partition. The permeability versus partition relations are presented separately for the uncharged, basic, acidic and charged solutes. The small uncharged solutes caffeine, acetaminophen and antipyrine have very high permeabilities (about 20 x 10-4 cm/sec) corresponding to an unstirred layer of only 45 μm. The weak acid aspirin also has a large AM permeability despite its low octanol partition at pH 7.4, suggesting that it is nearly completely absorbed in the first part of the intestine where the pH is about 5.4.

Conclusions

The AM deconvolution method provides an accurate estimate of the human intestinal permeability. The results for these 90 drugs should provide a useful benchmark for evaluating QSAR models.",2013-06-24 +21513519,MI-GWAS: a SAS platform for the analysis of inherited and maternal genetic effects in genome-wide association studies using log-linear models.,"

Background

Several platforms for the analysis of genome-wide association data are available. However, these platforms focus on the evaluation of the genotype inherited by affected (i.e. case) individuals, whereas for some conditions (e.g. birth defects) the genotype of the mothers of affected individuals may also contribute to risk. For such conditions, it is critical to evaluate associations with both the maternal and the inherited (i.e. case) genotype. When genotype data are available for case-parent triads, a likelihood-based approach using log-linear modeling can be used to assess both the maternal and inherited genotypes. However, available software packages for log-linear analyses are not well suited to the analysis of typical genome-wide association data (e.g. including missing data).

Results

An integrated platform, Maternal and Inherited Analyses for Genome-wide Association Studies (MI-GWAS) for log-linear analyses of maternal and inherited genetic effects in large, genome-wide datasets, is described. MI-GWAS uses SAS and LEM software in combination to appropriately format data, perform the log-linear analyses and summarize the results. This platform was evaluated using existing genome-wide data and was shown to perform accurately and relatively efficiently.

Conclusions

The MI-GWAS platform provides a valuable tool for the analysis of association of a phenotype or condition with maternal and inherited genotypes using genome-wide data from case-parent triads. The source code for this platform is freely available at http://www.sph.uth.tmc.edu/sbrr/mi-gwas.htm.",2011-04-22 +24899658,"Acetobacteroides hydrogenigenes gen. nov., sp. nov., an anaerobic hydrogen-producing bacterium in the family Rikenellaceae isolated from a reed swamp.","A strictly anaerobic, mesophilic, carbohydrate-fermenting, hydrogen-producing bacterium, designated strain RL-C(T), was isolated from a reed swamp in China. Cells were Gram-stain-negative, catalase-negative, non-spore-forming, non-motile rods measuring 0.7-1.0 µm in width and 3.0-8.0 µm in length. The optimum temperature for growth of strain RL-C(T) was 37 °C (range 25-40 °C) and pH 7.0-7.5 (range pH 5.7-8.0). The strain could grow fermentatively on yeast extract, tryptone, arabinose, glucose, galactose, mannose, maltose, lactose, glycogen, pectin and starch. The main end products of glucose fermentation were acetate, H2 and CO2. Organic acids, alcohols and amino acids were not utilized for growth. Yeast extract was not required for growth; however, it stimulated growth slightly. Nitrate, sulfate, sulfite, thiosulfate, elemental sulfur and Fe(III) nitrilotriacetate were not reduced as terminal electron acceptors. Aesculin was hydrolysed but not gelatin. Indole and H2S were produced from yeast extract. The G+C content of the genomic DNA was 51.2 mol%. The major cellular fatty acids were iso-C15 : 0, anteiso-C15 : 0 and C16 : 0. The most abundant polar lipid of strain RL-C(T) was phosphatidylethanolamine. 16S rRNA gene sequence analysis revealed that the isolate belongs to the uncultured Blvii28 wastewater-sludge group (http://www.arb-silva.de/) in the family Rikenellaceae of the phylum Bacteroidetes, and shared low sequence similarities with the related species Alistipes shahii WAL 8301(T) (81.8 %), Rikenella microfusus ATCC 29728(T) (81.7 %) and Anaerocella delicata WN081(T) (80.9 %). On the basis of these data, a novel species in a new genus of the family Rikenellaceae is proposed, Acetobacteroides hydrogenigenes gen. nov., sp. nov. The type strain of the type species is RL-C(T) ( = JCM 17603(T) = DSM 24657(T) = CGMCC 1.5173(T)).",2014-06-04 +22905221,WinHAP: an efficient haplotype phasing algorithm based on scalable sliding windows.,"Haplotype phasing represents an essential step in studying the association of genomic polymorphisms with complex genetic diseases, and in determining targets for drug designing. In recent years, huge amounts of genotype data are produced from the rapidly evolving high-throughput sequencing technologies, and the data volume challenges the community with more efficient haplotype phasing algorithms, in the senses of both running time and overall accuracy. 2SNP is one of the fastest haplotype phasing algorithms with comparable low error rates with the other algorithms. The most time-consuming step of 2SNP is the construction of a maximum spanning tree (MST) among all the heterozygous SNP pairs. We simplified this step by replacing the MST with the initial haplotypes of adjacent heterozygous SNP pairs. The multi-SNP haplotypes were estimated within a sliding window along the chromosomes. The comparative studies on four different-scale genotype datasets suggest that our algorithm WinHAP outperforms 2SNP and most of the other haplotype phasing algorithms in terms of both running speeds and overall accuracies. To facilitate the WinHAP's application in more practical biological datasets, we released the software for free at: http://staff.ustc.edu.cn/~xuyun/winhap/index.htm.",2012-08-14 +23933392,Particle quality assessment and sorting for automatic and semiautomatic particle-picking techniques.,"Three-dimensional reconstruction of biological specimens using electron microscopy by single particle methodologies requires the identification and extraction of the imaged particles from the acquired micrographs. Automatic and semiautomatic particle selection approaches can localize these particles, minimizing the user interaction, but at the cost of selecting a non-negligible number of incorrect particles, which can corrupt the final three-dimensional reconstruction. In this work, we present a novel particle quality assessment and sorting method that can separate most erroneously picked particles from correct ones. The proposed method is based on multivariate statistical analysis of a particle set that has been picked previously using any automatic or manual approach. The new method uses different sets of particle descriptors, which are morphology-based, histogram-based and signal to noise analysis based. We have tested our proposed algorithm with experimental data obtaining very satisfactory results. The algorithm is freely available as a part of the Xmipp 3.0 package [http://xmipp.cnb.csic.es].",2013-08-06 +23336431,In silico mining of putative microsatellite markers from whole genome sequence of water buffalo (Bubalus bubalis) and development of first BuffSatDB.,"

Background

Though India has sequenced water buffalo genome but its draft assembly is based on cattle genome BTau 4.0, thus de novo chromosome wise assembly is a major pending issue for global community. The existing radiation hybrid of buffalo and these reported STR can be used further in final gap plugging and ""finishing"" expected in de novo genome assembly. QTL and gene mapping needs mining of putative STR from buffalo genome at equal interval on each and every chromosome. Such markers have potential role in improvement of desirable characteristics, such as high milk yields, resistance to diseases, high growth rate. The STR mining from whole genome and development of user friendly database is yet to be done to reap the benefit of whole genome sequence.

Description

By in silico microsatellite mining of whole genome, we have developed first STR database of water buffalo, BuffSatDb (Buffalo MicroSatellite Database (http://cabindb.iasri.res.in/buffsatdb/) which is a web based relational database of 910529 microsatellite markers, developed using PHP and MySQL database. Microsatellite markers have been generated using MIcroSAtellite tool. It is simple and systematic web based search for customised retrieval of chromosome wise and genome-wide microsatellites. Search has been enabled based on chromosomes, motif type (mono-hexa), repeat motif and repeat kind (simple and composite). The search may be customised by limiting location of STR on chromosome as well as number of markers in that range. This is a novel approach and not been implemented in any of the existing marker database. This database has been further appended with Primer3 for primer designing of the selected markers enabling researcher to select markers of choice at desired interval over the chromosome. The unique add-on of degenerate bases further helps in resolving presence of degenerate bases in current buffalo assembly.

Conclusion

Being first buffalo STR database in the world , this would not only pave the way in resolving current assembly problem but shall be of immense use for global community in QTL/gene mapping critically required to increase knowledge in the endeavour to increase buffalo productivity, especially for third world country where rural economy is significantly dependent on buffalo productivity.",2013-01-19 +23731828,Proceedings of the 2013 Rheumatology Winter Clinical Symposia.,"Advances in rheumatology occur at a rapid pace and staying abreast of important changes is a challenge for all. Both novel drug development and enhanced understanding of conventional or historic therapies have molded current day rheumatologic practice. Rheumatology has led the way in the use of outcome measures and imaging modalities in common disorders like rheumatoid arthritis, osteoarthritis, and gout. The expertise of the rheumatologist has widened such that knowledge of economics, legal issues, related disorders and extraarticular disease is essential. In February 2013, the 6th annual Rheumatology Winter Clinical Symposium was held. At this meeting, faculty and participants held discussions and exchanged knowledge about new scientific data and how it may impact the care of rheumatology patients. Excerpts from some of the lectures from the Rheumatology Winter Clinical Symposium 2013 are included in this review. These and other presentations can be viewed in their entirety at http://www.r-w-c-s.com.",2013-06-01 +23330984,Development of a natural products database from the biodiversity of Brazil.,"We describe herein the design and development of an innovative tool called the NuBBE database (NuBBEDB), a new Web-based database, which incorporates several classes of secondary metabolites and derivatives from the biodiversity of Brazil. This natural product database incorporates botanical, chemical, pharmacological, and toxicological compound information. The NuBBEDB provides specialized information to the worldwide scientific community and can serve as a useful tool for studies on the multidisciplinary interfaces related to chemistry and biology, including virtual screening, dereplication, metabolomics, and medicinal chemistry. The NuBBEDB site is at http://nubbe.iq.unesp.br/nubbeDB.html .",2013-01-18 +23327938,Use of Gene Ontology Annotation to understand the peroxisome proteome in humans.,"The Gene Ontology (GO) is the de facto standard for the functional description of gene products, providing a consistent, information-rich terminology applicable across species and information repositories. The UniProt Consortium uses both manual and automatic GO annotation approaches to curate UniProt Knowledgebase (UniProtKB) entries. The selection of a protein set prioritized for manual annotation has implications for the characteristics of the information provided to users working in a specific field or interested in particular pathways or processes. In this article, we describe an organelle-focused, manual curation initiative targeting proteins from the human peroxisome. We discuss the steps taken to define the peroxisome proteome and the challenges encountered in defining the boundaries of this protein set. We illustrate with the use of examples how GO annotations now capture cell and tissue type information and the advantages that such an annotation approach provides to users. Database URL: http://www.ebi.ac.uk/GOA/ and http://www.uniprot.org.",2013-01-17 +23331578,Discrimination between Streptococcus pneumoniae and Streptococcus mitis based on sorting of their MALDI mass spectra.,"Accurate species-level identification of alpha-hemolytic (viridans) streptococci (VGS) is very important for understanding their pathogenicity and virulence. However, an extremely high level of similarity between VGS within the mitis group (S. pneumoniae, S. mitis, S. oralis and S. pseudopneumoniae) often results in misidentification of these organisms. Earlier, matrix-assisted laser desorption ionization-time of flight mass spectrometry (MALDI-TOF MS) has been suggested as a tool for the rapid identification of S. pneumoniae. However, by using Biotyper 3.0 (Bruker) or Vitek MS (bioMérieux) databases, Streptococcus mitis/oralis species can be erroneously identified as S. pneumoniae. ClinProTools 2.1 software was used for the discrimination of MALDI-TOF mass spectra of 25 S. pneumoniae isolates, 34 S. mitis and three S. oralis. Phenotypical tests and multilocus gene typing schemes for the S. pneumoniae (http://spneumoniae.mlst.net/) and viridans streptococci (http://viridans.emlsa.net/) were used for the identification of isolates included in the study. The classifying model was generated based on different algorithms (Genetic Algorithm, Supervised Neural Network and QuickClassifier). In all cases, values of sensitivity and specificity were found to be equal or close to 100%, allowing discrimination of mass spectra of different species. Three peaks (6949, 9876 and 9975 m/z) were determined conferring the maximal statistical weight onto each model built. We find this approach to be promising for viridans streptococci discrimination.",2013-01-17 +23325619,miRCancer: a microRNA-cancer association database constructed by text mining on literature.,"

Motivation

Research interests in microRNAs have increased rapidly in the past decade. Many studies have showed that microRNAs have close relationships with various human cancers, and they potentially could be used as cancer indicators in diagnosis or as a suppressor for treatment purposes. There are several databases that contain microRNA-cancer associations predicted by computational methods but few from empirical results. Despite the fact that abundant experiments investigating microRNA expressions in cancer cells have been carried out, the results have remain scattered in the literature. We propose to extract microRNA-cancer associations by text mining and store them in a database called miRCancer.

Results

The text mining is based on 75 rules we have constructed, which represent the common sentence structures typically used to state microRNA expressions in cancers. The microRNA-cancer association database, miRCancer, is updated regularly by running the text mining algorithm against PubMed. All miRNA-cancer associations are confirmed manually after automatic extraction. miRCancer currently documents 878 relationships between 236 microRNAs and 79 human cancers through the processing of >26 000 published articles.

Availability

miRCancer is freely available on the web at http://mircancer.ecu.edu/",2013-01-16 +22947028,Likelihood based observability analysis and confidence intervals for predictions of dynamic models.,"

Background

Predicting a system's behavior based on a mathematical model is a primary task in Systems Biology. If the model parameters are estimated from experimental data, the parameter uncertainty has to be translated into confidence intervals for model predictions. For dynamic models of biochemical networks, the nonlinearity in combination with the large number of parameters hampers the calculation of prediction confidence intervals and renders classical approaches as hardly feasible.

Results

In this article reliable confidence intervals are calculated based on the prediction profile likelihood. Such prediction confidence intervals of the dynamic states can be utilized for a data-based observability analysis. The method is also applicable if there are non-identifiable parameters yielding to some insufficiently specified model predictions that can be interpreted as non-observability. Moreover, a validation profile likelihood is introduced that should be applied when noisy validation experiments are to be interpreted.

Conclusions

The presented methodology allows the propagation of uncertainty from experimental to model predictions. Although presented in the context of ordinary differential equations, the concept is general and also applicable to other types of models. Matlab code which can be used as a template to implement the method is provided at http://www.fdmold.uni-freiburg.de/∼ckreutz/PPL.",2012-09-05 +23977990,EvoSNP-DB: A database of genetic diversity in East Asian populations.,"Genome-wide association studies (GWAS) have become popular as an approach for the identification of large numbers of phenotype-associated variants. However, differences in genetic architecture and environmental factors mean that the effect of variants can vary across populations. Understanding population genetic diversity is valuable for the investigation of possible population specific and independent effects of variants. EvoSNP-DB aims to provide information regarding genetic diversity among East Asian populations, including Chinese, Japanese, and Korean. Non-redundant SNPs (1.6 million) were genotyped in 54 Korean trios (162 samples) and were compared with 4 million SNPs from HapMap phase II populations. EvoSNP-DB provides two user interfaces for data query and visualization, and integrates scores of genetic diversity (Fst and VarLD) at the level of SNPs, genes, and chromosome regions. EvoSNP-DB is a web-based application that allows users to navigate and visualize measurements of population genetic differences in an interactive manner, and is available online at [http://biomi.cdc.go.kr/EvoSNP/].",2013-08-01 +24881795,Short communication: Staphylococcus aureus isolated from colostrum of dairy heifers represent a closely related group exhibiting highly homogeneous genomic and antimicrobial resistance features.,"In heifers, intramammary infections caused by Staphylococcus aureus affect milk production and udder health in the first and subsequent lactations, and can lead to premature culling. Not much is known about Staph. aureus isolated from heifers and it is also unclear whether or not these strains are readily transmitted between heifers and lactating herd mates. In this study, we compared phenotypic characteristics, spa types, and DNA microarray virulence and resistance gene profiles of Staph. aureus isolates obtained from colostrum samples of dairy heifers with isolates obtained from lactating cows. Our objective was to (1) characterize Staph. aureus strains associated with mastitis in heifers and (2) determine relatedness of Staph. aureus strains from heifers and lactating cows to provide data on transmission. We analyzed colostrum samples of 501 heifers and milk samples of 68 lactating cows within the same herd, isolating 48 and 9 Staph. aureus isolates, respectively. Staphylococcus aureus strains from heifers, lactating herd mates, and an unrelated collection of 78 strains from bovine mastitis milk of mature cows were compared. With 1 exception each, characterization of all strains from heifers and lactating cows in the same herd yielded highly similar phenotypic and genotypic results. The strains were Staphaurex latex agglutination test negative (Oxoid AG, Basel, Switzerland) and belonged to agr type II, CC705, and spa types tbl 2645 and t12926. They were susceptible to all antimicrobial agents tested. In contrast, the strains from mature cows in other herds were spread across different clonal complexes, spa types, and SplitsTree clusters (http://www.splitstree.org/), thus displaying a far higher degree of heterogeneity. We conclude that strains isolated from colostrum of heifers and mastitis milk of lactating cows in the same herd feature highly similar phenotypic and genomic characteristics, suggesting persistence of the organism during the first and potentially subsequent lactations or transmission between heifers and mature herd mates.",2014-06-02 +23325628,PPInterFinder--a mining tool for extracting causal relations on human proteins from literature.,"One of the most common and challenging problem in biomedical text mining is to mine protein-protein interactions (PPIs) from MEDLINE abstracts and full-text research articles because PPIs play a major role in understanding the various biological processes and the impact of proteins in diseases. We implemented, PPInterFinder--a web-based text mining tool to extract human PPIs from biomedical literature. PPInterFinder uses relation keyword co-occurrences with protein names to extract information on PPIs from MEDLINE abstracts and consists of three phases. First, it identifies the relation keyword using a parser with Tregex and a relation keyword dictionary. Next, it automatically identifies the candidate PPI pairs with a set of rules related to PPI recognition. Finally, it extracts the relations by matching the sentence with a set of 11 specific patterns based on the syntactic nature of PPI pair. We find that PPInterFinder is capable of predicting PPIs with the accuracy of 66.05% on AIMED corpus and outperforms most of the existing systems. DATABASE URL: http://www.biomining-bu.in/ppinterfinder/",2013-01-15 +24994575,A mathematical model for predicting the adult height of girls with advanced puberty after spontaneous growth.,"

Background

Advanced puberty in girls is defined as the onset of puberty between the ages of 8 yr and 10 yr. The objective was to predict adult height (AH) at initial evaluation and to characterize patients with an actual AH below -2 SD (152 cm) and/or lower than their target height (TH) by > one SD (5.6 cm).

Methods

Data analysis using multiple linear regression models was performed in 50 girls with advanced puberty who reached their AH after spontaneous puberty.

Results

The actual AH (159.0 ± 6.1 cm) was similar to the TH (161.2 ± 4.6 cm) and to the AH predicted at the initial evaluation (160.8 ± 6.0 cm), and the actual AH correlated positively with both (R = 0.76, P = 0.0003; R = 0.71, P = 0.008, respectively).The AH was below 152 cm in 7 girls, of whom 3 were characterized by paternal transmission of the advanced puberty. The AH was lower than the TH by >5.6 cm in 8 girls.The AH (cm) could be calculated at the initial evaluation: 1.8822 age + 3.3510 height (SD) - 0.7465 bone age - 1.7993 pubic hair stage + 2.8409 TH (SD) + 150.32.The formula is available online at http://www.kamick.org/lemaire/med/girls-advpub.html.The calculated AH (159.0 ± 5.7 cm) and the actual AH were highly correlated (R = 0.93). The actual AH was lower than the calculated AH by > 0.5 SD in only one case (4.35 cm).

Conclusion

We established a formula that can be used at an initial evaluation to predict the AH, and then to assess the risk of reduced AH as a result of advanced puberty. According to this formula, the actual AH was lower than the calculated AH by more than 2.8 cm (0.5 SD) in only one girl. The AHs of the untreated girls with advanced puberty did not differ from those predicted at the initial evaluation by the Bayley and Pinneau table or from the THs. However, this study provides a useful and ready-to-use formula that can be an additional assessment of girls with advanced puberty.",2014-07-03 +21984769,KABOOM! A new suffix array based algorithm for clustering expression data.,"

Motivation

Second-generation sequencing technology has reinvigorated research using expression data, and clustering such data remains a significant challenge, with much larger datasets and with different error profiles. Algorithms that rely on all-versus-all comparison of sequences are not practical for large datasets.

Results

We introduce a new filter for string similarity which has the potential to eliminate the need for all-versus-all comparison in clustering of expression data and other similar tasks. Our filter is based on multiple long exact matches between the two strings, with the additional constraint that these matches must be sufficiently far apart. We give details of its efficient implementation using modified suffix arrays. We demonstrate its efficiency by presenting our new expression clustering tool, wcd-express, which uses this heuristic. We compare it to other current tools and show that it is very competitive both with respect to quality and run time.

Availability

Source code and binaries available under GPL at http://code.google.com/p/wcdest. Runs on Linux and MacOS X.

Contact

scott.hazelhurst@wits.ac.za; zsuzsa@cebitec.uni-bielefeld.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-08 +22072383,"NARWHAL, a primary analysis pipeline for NGS data.",

Unlabelled

The NARWHAL software pipeline has been developed to automate the primary analysis of Illumina sequencing data. This pipeline combines a new and flexible de-multiplexing tool with open-source aligners and automated quality assessment. The entire pipeline can be run using only one simple sample-sheet for diverse sequencing applications. NARWHAL creates a sample-oriented data structure and outperforms existing tools in speed.

Availability

https://trac.nbic.nl/narwhal/.,2011-11-08 +24635884,Identifying large sets of unrelated individuals and unrelated markers.,"

Background

Genetic Analyses in large sample populations are important for a better understanding of the variation between populations, for designing conservation programs, for detecting rare mutations which may be risk factors for a variety of diseases, among other reasons. However these analyses frequently assume that the participating individuals or animals are mutually unrelated which may not be the case in large samples, leading to erroneous conclusions. In order to retain as much data as possible while minimizing the risk of false positives it is useful to identify a large subset of relatively unrelated individuals in the population. This can be done using a heuristic for finding a large set of independent of nodes in an undirected graph. We describe a fast randomized heuristic for this purpose. The same methodology can also be used for identifying a suitable set of markers for analyzing population stratification, and other instances where a rapid heuristic for maximal independent sets in large graphs is needed.

Results

We present FastIndep, a fast random heuristic algorithm for finding a maximal independent set of nodes in an arbitrary undirected graph along with an efficient implementation in C++. On a 64 bit Linux or MacOS platform the execution time is a few minutes, even with a graph of several thousand nodes. The algorithm can discover multiple solutions of the same cardinality. FastIndep can be used to discover unlinked markers, and unrelated individuals in populations.

Conclusions

The methods presented here provide a quick and efficient method for identifying sets of unrelated individuals in large populations and unlinked markers in marker panels. The C++ source code and instructions along with utilities for generating the input files in the appropriate format are available at http://taurus.ansci.iastate.edu/wiki/people/jabr/Joseph_Abraham.html.",2014-03-17 +21501472,SAQC: SNP array quality control.,"

Background

Genome-wide single-nucleotide polymorphism (SNP) arrays containing hundreds of thousands of SNPs from the human genome have proven useful for studying important human genome questions. Data quality of SNP arrays plays a key role in the accuracy and precision of downstream data analyses. However, good indices for assessing data quality of SNP arrays have not yet been developed.

Results

We developed new quality indices to measure the quality of SNP arrays and/or DNA samples and investigated their statistical properties. The indices quantify a departure of estimated individual-level allele frequencies (AFs) from expected frequencies via standardized distances. The proposed quality indices followed lognormal distributions in several large genomic studies that we empirically evaluated. AF reference data and quality index reference data for different SNP array platforms were established based on samples from various reference populations. Furthermore, a confidence interval method based on the underlying empirical distributions of quality indices was developed to identify poor-quality SNP arrays and/or DNA samples. Analyses of authentic biological data and simulated data show that this new method is sensitive and specific for the detection of poor-quality SNP arrays and/or DNA samples.

Conclusions

This study introduces new quality indices, establishes references for AFs and quality indices, and develops a detection method for poor-quality SNP arrays and/or DNA samples. We have developed a new computer program that utilizes these methods called SNP Array Quality Control (SAQC). SAQC software is written in R and R-GUI and was developed as a user-friendly tool for the visualization and evaluation of data quality of genome-wide SNP arrays. The program is available online (http://www.stat.sinica.edu.tw/hsinchou/genetics/quality/SAQC.htm).",2011-04-18 +23148687,The Schistosoma mansoni phylome: using evolutionary genomics to gain insight into a parasite's biology.,"

Background

Schistosoma mansoni is one of the causative agents of schistosomiasis, a neglected tropical disease that affects about 237 million people worldwide. Despite recent efforts, we still lack a general understanding of the relevant host-parasite interactions, and the possible treatments are limited by the emergence of resistant strains and the absence of a vaccine. The S. mansoni genome was completely sequenced and still under continuous annotation. Nevertheless, more than 45% of the encoded proteins remain without experimental characterization or even functional prediction. To improve our knowledge regarding the biology of this parasite, we conducted a proteome-wide evolutionary analysis to provide a broad view of the S. mansoni's proteome evolution and to improve its functional annotation.

Results

Using a phylogenomic approach, we reconstructed the S. mansoni phylome, which comprises the evolutionary histories of all parasite proteins and their homologs across 12 other organisms. The analysis of a total of 7,964 phylogenies allowed a deeper understanding of genomic complexity and evolutionary adaptations to a parasitic lifestyle. In particular, the identification of lineage-specific gene duplications pointed to the diversification of several protein families that are relevant for host-parasite interaction, including proteases, tetraspanins, fucosyltransferases, venom allergen-like proteins, and tegumental-allergen-like proteins. In addition to the evolutionary knowledge, the phylome data enabled us to automatically re-annotate 3,451 proteins through a phylogenetic-based approach rather than solely sequence similarity searches. To allow further exploitation of this valuable data, all information has been made available at PhylomeDB (http://www.phylomedb.org).

Conclusions

In this study, we used an evolutionary approach to assess S. mansoni parasite biology, improve genome/proteome functional annotation, and provide insights into host-parasite interactions. Taking advantage of a proteome-wide perspective rather than focusing on individual proteins, we identified that this parasite has experienced specific gene duplication events, particularly affecting genes that are potentially related to the parasitic lifestyle. These innovations may be related to the mechanisms that protect S. mansoni against host immune responses being important adaptations for the parasite survival in a potentially hostile environment. Continuing this work, a comparative analysis involving genomic, transcriptomic, and proteomic data from other helminth parasites, other parasites, and vectors will supply more information regarding parasite's biology as well as host-parasite interactions.",2012-11-13 +24078703,MS2PIP: a tool for MS/MS peak intensity prediction.,

Motivation

Tandem mass spectrometry provides the means to match mass spectrometry signal observations with the chemical entities that generated them. The technology produces signal spectra that contain information about the chemical dissociation pattern of a peptide that was forced to fragment using methods like collision-induced dissociation. The ability to predict these MS(2) signals and to understand this fragmentation process is important for sensitive high-throughput proteomics research.

Results

We present a new tool called MS(2)PIP for predicting the intensity of the most important fragment ion signal peaks from a peptide sequence. MS(2)PIP pre-processes a large dataset with confident peptide-to-spectrum matches to facilitate data-driven model induction using a random forest regression learning algorithm. The intensity predictions of MS(2)PIP were evaluated on several independent evaluation sets and found to correlate significantly better with the observed fragment-ion intensities as compared with the current state-of-the-art PeptideART tool.

Availability

MS(2)PIP code is available for both training and predicting at http://compomics.com/.,2013-09-27 +21789201,"Interoperability between biomedical ontologies through relation expansion, upper-level ontologies and automatic reasoning.","Researchers design ontologies as a means to accurately annotate and integrate experimental data across heterogeneous and disparate data- and knowledge bases. Formal ontologies make the semantics of terms and relations explicit such that automated reasoning can be used to verify the consistency of knowledge. However, many biomedical ontologies do not sufficiently formalize the semantics of their relations and are therefore limited with respect to automated reasoning for large scale data integration and knowledge discovery. We describe a method to improve automated reasoning over biomedical ontologies and identify several thousand contradictory class definitions. Our approach aligns terms in biomedical ontologies with foundational classes in a top-level ontology and formalizes composite relations as class expressions. We describe the semi-automated repair of contradictions and demonstrate expressive queries over interoperable ontologies. Our work forms an important cornerstone for data integration, automatic inference and knowledge discovery based on formal representations of knowledge. Our results and analysis software are available at http://bioonto.de/pmwiki.php/Main/ReasonableOntologies.",2011-07-18 +24762057,Inhibitory receptor immunoglobulin-like transcript 4 was highly expressed in primary ductal and lobular breast cancer and significantly correlated with IL-10.,"

Background

Immunoglobulin-like transcript 4 (ILT4) is an inhibitory molecule involved in immune response and has recently been identified to be strongly inducible by IL-10. The aim of the present study was to examine the associations of ILT4 expression with clinicopathological characteristics and IL-10 expression in primary ductal and lobular breast cancer.

Methods

We studied the expression of ILT4 in 4 cancer cell lines, 117 primary tumor tissues and 97 metastatic lymph nodes from patients with primary ductal and lobular breast cancer by reverse transcription-polymerase chain reaction, western blot or immunohistochemistry analysis. Additionally, IL-10 expression was also investigated using immunohistochemistry in primary tumor tissues. Then the relationship between ILT4 expression and clinicopathological characteristics/IL-10 expression was evaluated.

Results

ILT4 was highly expressed in all 4 human breast cancer cell lines on both mRNA and protein levels. In primary tumor tissues, ILT4 or IL-10 was expressed in the cell membrane, cytoplasm, or both; the positive rate of ILT4 and IL-10 expression was 60.7% (71/117) and 80.34% (94/117), respectively. ILT4 level was significantly correlated with IL-10 (r =0.577; p<0.01). Furthermore, the expression of ILT4 or IL-10 was associated with less number of Tumor Infiltrating Lymphocytes (TILs) (p=0.004 and 0.018, respectively) and more lymph node metastasis (p=0.046 and 0.035, respectively).

Conclusion

Our data demonstrated the association of ILT4 and IL-10 expression in human breast cancer, suggesting their important roles in immune dysfunction and lymph node metastases.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1692652692107916.",2014-04-24 +21782820,Dana-Farber repository for machine learning in immunology.,"The immune system is characterized by high combinatorial complexity that necessitates the use of specialized computational tools for analysis of immunological data. Machine learning (ML) algorithms are used in combination with classical experimentation for the selection of vaccine targets and in computational simulations that reduce the number of necessary experiments. The development of ML algorithms requires standardized data sets, consistent measurement methods, and uniform scales. To bridge the gap between the immunology community and the ML community, we designed a repository for machine learning in immunology named Dana-Farber Repository for Machine Learning in Immunology (DFRMLI). This repository provides standardized data sets of HLA-binding peptides with all binding affinities mapped onto a common scale. It also provides a list of experimentally validated naturally processed T cell epitopes derived from tumor or virus antigens. The DFRMLI data were preprocessed and ensure consistency, comparability, detailed descriptions, and statistically meaningful sample sizes for peptides that bind to various HLA molecules. The repository is accessible at http://bio.dfci.harvard.edu/DFRMLI/.",2011-07-18 +24476358,SHEAR: sample heterogeneity estimation and assembly by reference.,"

Background

Personal genome assembly is a critical process when studying tumor genomes and other highly divergent sequences. The accuracy of downstream analyses, such as RNA-seq and ChIP-seq, can be greatly enhanced by using personal genomic sequences rather than standard references. Unfortunately, reads sequenced from these types of samples often have a heterogeneous mix of various subpopulations with different variants, making assembly extremely difficult using existing assembly tools. To address these challenges, we developed SHEAR (Sample Heterogeneity Estimation and Assembly by Reference; http://vk.cs.umn.edu/SHEAR), a tool that predicts SVs, accounts for heterogeneous variants by estimating their representative percentages, and generates personal genomic sequences to be used for downstream analysis.

Results

By making use of structural variant detection algorithms, SHEAR offers improved performance in the form of a stronger ability to handle difficult structural variant types and better computational efficiency. We compare against the lead competing approach using a variety of simulated scenarios as well as real tumor cell line data with known heterogeneous variants. SHEAR is shown to successfully estimate heterogeneity percentages in both cases, and demonstrates an improved efficiency and better ability to handle tandem duplications.

Conclusion

SHEAR allows for accurate and efficient SV detection and personal genomic sequence generation. It is also able to account for heterogeneous sequencing samples, such as from tumor tissue, by estimating the subpopulation percentage for each heterogeneous variant.",2014-01-29 +22762711,Percutaneous coronary intervention with second-generation paclitaxel-eluting stents versus everolimus-eluting stents in United States contemporary practice (REWARDS TLX Trial).,"Registry Experience at the Washington Hospital Center, DES - Taxus Liberte Versus Xience V (REWARDS TLX) is a physician-initiated, retrospective, real-world, multicenter, observational study for all patients >18 years of age subjected to percutaneous coronary intervention with everolimus-eluting stents (EESs) or paclitaxel-eluting stents (PESs). Outcomes of patients receiving a TAXUS Liberté or XIENCE V drug-eluting stent were compared. Baseline clinical, procedural, and follow-up data at 12 months were collected from 10 clinical centers by an electronic data capture system. The study's primary end point was major adverse cardiac events: a composite of all-cause death, Q-wave myocardial infarction, target vessel revascularization, and stent thrombosis. The trial is registered with http://www.clinicaltrials.gov (NCT01134159). Data were entered for 1,195 patients (PES, n = 595; EES, n = 600). Baseline clinical characteristics were similar except for higher dyslipidemia, systemic hypertension, and family history of coronary artery disease in the EES group. In-hospital outcome was similar between groups, with an overall in-hospital stent thrombosis rate of 0.2%. The primary end point at 12 months was similar (EES 7.8% vs 10.8%, p = 0.082). Overall stent thrombosis rate was lower in the EES group (0.3% vs 1.2%, respectively, p = 0.107); however, target lesion revascularization was similar (PES, hazard ratio 1.46, 95% confidence interval 0.98 to 2.19, p = 0.064). There was no difference in overall mortality between groups. In conclusion, second-generation EESs and PESs demonstrated similar efficacy and safety profiles for broadened patient and lesion subsets compared to a selected population from the pivotal trials. However, for composite efficacy and safety end points, EESs outperformed second-generation PESs.",2012-07-03 +24158599,Fast pairwise IBD association testing in genome-wide association studies.,"

Motivation

Recently, investigators have proposed state-of-the-art Identity-by-descent (IBD) mapping methods to detect IBD segments between purportedly unrelated individuals. The IBD information can then be used for association testing in genetic association studies. One approach for this IBD association testing strategy is to test for excessive IBD between pairs of cases ('pairwise method'). However, this approach is inefficient because it requires a large number of permutations. Moreover, a limited number of permutations define a lower bound for P-values, which makes fine-mapping of associated regions difficult because, in practice, a much larger genomic region is implicated than the region that is actually associated.

Results

In this article, we introduce a new pairwise method 'Fast-Pairwise'. Fast-Pairwise uses importance sampling to improve efficiency and enable approximation of extremely small P-values. Fast-Pairwise method takes only days to complete a genome-wide scan. In the application to the WTCCC type 1 diabetes data, Fast-Pairwise successfully fine-maps a known human leukocyte antigen gene that is known to cause the disease.

Availability

Fast-Pairwise is publicly available at: http://genetics.cs.ucla.edu/graphibd.",2013-10-24 +24611578,Variation in surgical quality measure adherence within hospital referral regions: do publicly reported surgical quality measures distinguish among hospitals that patients are likely to compare?,"

Objective

To determine whether surgical quality measures that Medicare publicly reports provide a basis for patients to choose a hospital from within their geographic region.

Data source

The Department of Health and Human Services' public reporting website, http://www.medicare.gov/hospitalcompare.

Study design

We identified hospitals (n = 2,953) reporting adherence rates to the quality measures intended to reduce surgical site infections (Surgical Care Improvement Project, 1-3) in 2012. We defined regions within which patients were likely to compare hospitals using the hospital referral regions (HRRs) from the Dartmouth Atlas of Health Care Project. We described distributions of reported SCIP adherence within each HRR, including medians, interquartile ranges (IQRs), skewness, and outliers.

Principal findings

Ninety-seven percent of HRRs had median SCIP-1 scores ≥95 percent. In 93 percent of HRRs, half of the hospitals in the HRR were within 5 percent of the median hospital's score. In 62 percent of HRRs, hospitals were skewed toward the higher rates (negative skewness). Seven percent of HRRs demonstrated positive skewness. Only 1 percent had a positive outlier. SCIP-2 and SCIP-3 demonstrated similar distributions.

Conclusions

Publicly reported quality measures for surgical site infection prevention do not distinguish the majority of hospitals that patients are likely to choose from when selecting a surgical provider. More studies are needed to improve public reporting's ability to positively impact patient decision making.",2014-03-11 +24064420,"QSSPN: dynamic simulation of molecular interaction networks describing gene regulation, signalling and whole-cell metabolism in human cells.","

Motivation

Dynamic simulation of genome-scale molecular interaction networks will enable the mechanistic prediction of genotype-phenotype relationships. Despite advances in quantitative biology, full parameterization of whole-cell models is not yet possible. Simulation methods capable of using available qualitative data are required to develop dynamic whole-cell models through an iterative process of modelling and experimental validation.

Results

We formulate quasi-steady state Petri nets (QSSPN), a novel method integrating Petri nets and constraint-based analysis to predict the feasibility of qualitative dynamic behaviours in qualitative models of gene regulation, signalling and whole-cell metabolism. We present the first dynamic simulations including regulatory mechanisms and a genome-scale metabolic network in human cell, using bile acid homeostasis in human hepatocytes as a case study. QSSPN simulations reproduce experimentally determined qualitative dynamic behaviours and permit mechanistic analysis of genotype-phenotype relationships.

Availability and implementation

The model and simulation software implemented in C++ are available in supplementary material and at http://sysbio3.fhms.surrey.ac.uk/qsspn/.",2013-09-23 +21750706,GenExp: an interactive web-based genomic DAS client with client-side data rendering.,"

Background

The Distributed Annotation System (DAS) offers a standard protocol for sharing and integrating annotations on biological sequences. There are more than 1000 DAS sources available and the number is steadily increasing. Clients are an essential part of the DAS system and integrate data from several independent sources in order to create a useful representation to the user. While web-based DAS clients exist, most of them do not have direct interaction capabilities such as dragging and zooming with the mouse.

Results

Here we present GenExp, a web based and fully interactive visual DAS client. GenExp is a genome oriented DAS client capable of creating informative representations of genomic data zooming out from base level to complete chromosomes. It proposes a novel approach to genomic data rendering and uses the latest HTML5 web technologies to create the data representation inside the client browser. Thanks to client-side rendering most position changes do not need a network request to the server and so responses to zooming and panning are almost immediate. In GenExp it is possible to explore the genome intuitively moving it with the mouse just like geographical map applications. Additionally, in GenExp it is possible to have more than one data viewer at the same time and to save the current state of the application to revisit it later on.

Conclusions

GenExp is a new interactive web-based client for DAS and addresses some of the short-comings of the existing clients. It uses client-side data rendering techniques resulting in easier genome browsing and exploration. GenExp is open source under the GPL license and it is freely available at http://gralggen.lsi.upc.edu/recerca/genexp.",2011-07-05 +22468708,The k partition-distance problem.,"Many applications of data partitioning (clustering) have been well studied in bioinformatics. Consider, for instance, a set N of organisms (elements) based on DNA marker data. A partition divides all elements in N into two or more disjoint clusters that cover all elements, where a cluster contains a non-empty subset of N. Different partitioning algorithms may produce different partitions. To compute the distance and find the consensus partition (also called consensus clustering) between two or more partitions are important and interesting problems that arise frequently in bioinformatics and data mining, in which different distance functions may be considered in different partition algorithms. In this article, we discuss the k partition-distance problem. Given a set of elements N with k partitions of N, the k partition-distance problem is to delete the minimum number of elements from each partition such that all remaining partitions become identical. This problem is NP-complete for general k > 2 partitions, and no algorithms are known at present. We design the first known heuristic and approximation algorithms with performance ratios 2 to solve the k partition-distance problem in O(k · ρ · |N|) time, where ρ is the maximum number of clusters of these k partitions and |N| is the number of elements in N. We also present the first known exact algorithm in O(ℓ · 2(ℓ)·k(2) · |N|(2)) time, where ℓ is the partition-distance of the optimal solution for this problem. Performances of our exact and approximation algorithms in testing the random data with actual sets of organisms based on DNA markers are compared and discussed. Experimental results reveal that our algorithms can improve the computational speed of the exact algorithm for the two partition-distance problem in practice if the maximum number of elements per cluster is less than ρ. From both theoretical and computational points of view, our solutions are at most twice the partition-distance of the optimal solution. A website offering the interactive service of solving the k partition-distance problem using our and previous algorithms is available (see http://mail.tmue.edu.tw/~yhchen/KPDP.html).",2012-04-01 +21609962,"PRI-CAT: a web-tool for the analysis, storage and visualization of plant ChIP-seq experiments.","Although several tools for the analysis of ChIP-seq data have been published recently, there is a growing demand, in particular in the plant research community, for computational resources with which such data can be processed, analyzed, stored, visualized and integrated within a single, user-friendly environment. To accommodate this demand, we have developed PRI-CAT (Plant Research International ChIP-seq analysis tool), a web-based workflow tool for the management and analysis of ChIP-seq experiments. PRI-CAT is currently focused on Arabidopsis, but will be extended with other plant species in the near future. Users can directly submit their sequencing data to PRI-CAT for automated analysis. A QuickLoad server compatible with genome browsers is implemented for the storage and visualization of DNA-binding maps. Submitted datasets and results can be made publicly available through PRI-CAT, a feature that will enable community-based integrative analysis and visualization of ChIP-seq experiments. Secondary analysis of data can be performed with the aid of GALAXY, an external framework for tool and data integration. PRI-CAT is freely available at http://www.ab.wur.nl/pricat. No login is required.",2011-05-24 +23249312,PCAdmix: principal components-based assignment of ancestry along each chromosome in individuals with admixed ancestry from two or more populations.,"Identifying ancestry along each chromosome in admixed individuals provides a wealth of information for understanding the population genetic history of admixture events and is valuable for admixture mapping and identifying recent targets of selection. We present PCAdmix (available at https://sites.google.com/site/pcadmix/home ), a Principal Components-based algorithm for determining ancestry along each chromosome from a high-density, genome-wide set of phased single-nucleotide polymorphism (SNP) genotypes of admixed individuals. We compare our method to HAPMIX on simulated data from two ancestral populations, and we find high concordance between the methods. Our method also has better accuracy than LAMP when applied to three-population admixture, a situation as yet unaddressed by HAPMIX. Finally, we apply our method to a data set of four Latino populations with European, African, and Native American ancestry. We find evidence of assortative mating in each of the four populations, and we identify regions of shared ancestry that may be recent targets of selection and could serve as candidate regions for admixture-based association mapping.",2012-08-01 +23297300,Genome-wide landscape of alternative splicing events in Brachypodium distachyon.,"Recently, Brachypodium distachyon has emerged as a model plant for studying monocot grasses and cereal crops. Using assembled expressed transcript sequences and subsequent mapping to the corresponding genome, we identified 1219 alternative splicing (AS) events spanning across 2021 putatively assembled transcripts generated from 941 genes. Approximately, 6.3% of expressed genes are alternatively spliced in B. distachyon. We observed that a majority of the identified AS events were related to retained introns (55.5%), followed by alternative acceptor sites (16.7%). We also observed a low percentage of exon skipping (5.0%) and alternative donor site events (8.8%). The 'complex event' that consists of a combination of two or more basic splicing events accounted for ∼14.0%. Comparative AS transcript analysis revealed 163 and 39 homologous pairs between B. distachyon and Oryza sativa and between B. distachyon and Arabidopsis thaliana, respectively. In all, we found 16 AS transcripts to be conserved in all 3 species. AS events and related putative assembled transcripts annotation can be systematically browsed at Plant Alternative Splicing Database (http://proteomics.ysu.edu/altsplice/plant/).",2013-01-07 +24551427,Exploring the relationship between drug side-effects and therapeutic indications.,"Therapeutic indications and drug side-effects are both measureable human behavioral or physiological changes in response to the treatment. In modern drug development, both inferring potential therapeutic indications and identifying clinically important drug side-effects are challenging tasks. Previous studies have utilized either chemical structures or protein targets to predict indications and side-effects. In this study, we compared indication prediction using side-effect information and side-effect prediction using indication information against models using only chemical structures and protein targets. Experimental results based on 10-fold cross-validation, show that drug side-effects and therapeutic indications are the most predictive features for each other. In addition, we extracted 6,706 statistically highly correlated disease-side-effect pairs from all known drug-disease and drug-side-effect relationships. Many relationship pairs provide explicit repositioning hypotheses (e.g., drugs causing postural hypotension are potential candidates for hypertension) and clear adverse-reaction watch lists (e.g., drugs for heart failure possibly cause impotence). All data sets and highly correlated disease-side-effect relationships are available at http://astro.temple.edu/~tua87106/druganalysis.html.",2013-11-16 +22155871,"RRBSMAP: a fast, accurate and user-friendly alignment tool for reduced representation bisulfite sequencing.","

Summary

Reduced representation bisulfite sequencing (RRBS) is a powerful yet cost-efficient method for studying DNA methylation on a genomic scale. RRBS involves restriction-enzyme digestion, bisulfite conversion and size selection, resulting in DNA sequencing data that require special bioinformatic handling. Here, we describe RRBSMAP, a short-read alignment tool that is designed for handling RRBS data in a user-friendly and scalable way. RRBSMAP uses wildcard alignment, and avoids the need for any preprocessing or post-processing steps. We benchmarked RRBSMAP against a well-validated MAQ-based pipeline for RRBS read alignment and observed similar accuracy but much improved runtime performance, easier handling and better scaling to large sample sets. In summary, RRBSMAP removes bioinformatic hurdles and reduces the computational burden of large-scale epigenome association studies performed with RRBS.

Availability

http://rrbsmap.computational-epigenetics.org/ http://code.google.com/p/bsmap/

Contact

wl1@bcm.tmc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-06 +23297037,BETAWARE: a machine-learning tool to detect and predict transmembrane beta-barrel proteins in prokaryotes.,"

Summary

The annotation of membrane proteins in proteomes is an important problem of Computational Biology, especially after the development of high-throughput techniques that allow fast and efficient genome sequencing. Among membrane proteins, transmembrane β-barrels (TMBBs) are poorly represented in the database of protein structures (PDB) and difficult to identify with experimental approaches. They are, however, extremely important, playing key roles in several cell functions and bacterial pathogenicity. TMBBs are included in the lipid bilayer with a β-barrel structure and are presently found in the outer membranes of Gram-negative bacteria, mitochondria and chloroplasts. Recently, we developed two top-performing methods based on machine-learning approaches to tackle both the detection of TMBBs in sets of proteins and the prediction of their topology. Here, we present our BETAWARE program that includes both approaches and can run as a standalone program on a linux-based computer to easily address in-home massive protein annotation or filtering.

Availability and implementation

http://www.biocomp.unibo.it/∼savojard/betawarecl .",2013-01-06 +24145223,Using machine learning and high-throughput RNA sequencing to classify the precursors of small non-coding RNAs.,"Recent advances in high-throughput sequencing allow researchers to examine the transcriptome in more detail than ever before. Using a method known as high-throughput small RNA-sequencing, we can now profile the expression of small regulatory RNAs such as microRNAs and small interfering RNAs (siRNAs) with a great deal of sensitivity. However, there are many other types of small RNAs (<50nt) present in the cell, including fragments derived from snoRNAs (small nucleolar RNAs), snRNAs (small nuclear RNAs), scRNAs (small cytoplasmic RNAs), tRNAs (transfer RNAs), and transposon-derived RNAs. Here, we present a user's guide for CoRAL (Classification of RNAs by Analysis of Length), a computational method for discriminating between different classes of RNA using high-throughput small RNA-sequencing data. Not only can CoRAL distinguish between RNA classes with high accuracy, but it also uses features that are relevant to small RNA biogenesis pathways. By doing so, CoRAL can give biologists a glimpse into the characteristics of different RNA processing pathways and how these might differ between tissue types, biological conditions, or even different species. CoRAL is available at http://wanglab.pcbi.upenn.edu/coral/.",2013-10-18 +24564637,Molecular pathway identification using biological network-regularized logistic models.,"

Background

Selecting genes and pathways indicative of disease is a central problem in computational biology. This problem is especially challenging when parsing multi-dimensional genomic data. A number of tools, such as L1-norm based regularization and its extensions elastic net and fused lasso, have been introduced to deal with this challenge. However, these approaches tend to ignore the vast amount of a priori biological network information curated in the literature.

Results

We propose the use of graph Laplacian regularized logistic regression to integrate biological networks into disease classification and pathway association problems. Simulation studies demonstrate that the performance of the proposed algorithm is superior to elastic net and lasso analyses. Utility of this algorithm is also validated by its ability to reliably differentiate breast cancer subtypes using a large breast cancer dataset recently generated by the Cancer Genome Atlas (TCGA) consortium. Many of the protein-protein interaction modules identified by our approach are further supported by evidence published in the literature. Source code of the proposed algorithm is freely available at http://www.github.com/zhandong/Logit-Lapnet.

Conclusion

Logistic regression with graph Laplacian regularization is an effective algorithm for identifying key pathways and modules associated with disease subtypes. With the rapid expansion of our knowledge of biological regulatory networks, this approach will become more accurate and increasingly useful for mining transcriptomic, epi-genomic, and other types of genome wide association studies.",2013-12-09 +21810901,Mauve assembly metrics.,"

Summary

High-throughput DNA sequencing technologies have spurred the development of numerous novel methods for genome assembly. With few exceptions, these algorithms are heuristic and require one or more parameters to be manually set by the user. One approach to parameter tuning involves assembling data from an organism with an available high-quality reference genome, and measuring assembly accuracy using some metrics. We developed a system to measure assembly quality under several scoring metrics, and to compare assembly quality across a variety of assemblers, sequence data types, and parameter choices. When used in conjunction with training data such as a high-quality reference genome and sequence reads from the same organism, our program can be used to manually identify an optimal sequencing and assembly strategy for de novo sequencing of related organisms.

Availability

GPL source code and a usage tutorial is at http://ngopt.googlecode.com

Contact

aarondarling@ucdavis.edu

Supplementary information

Supplementary data is available at Bioinformatics online.",2011-08-02 +23292739,KEGGParser: parsing and editing KEGG pathway maps in Matlab.,"

Summary

KEGG pathway database is a collection of manually drawn pathway maps accompanied with KGML format files intended for use in automatic analysis. KGML files, however, do not contain the required information for complete reproduction of all the events indicated in the static image of a pathway map. Several parsers and editors of KEGG pathways exist for processing KGML files. We introduce KEGGParser-a MATLAB based tool for KEGG pathway parsing, semiautomatic fixing, editing, visualization and analysis in MATLAB environment. It also works with Scilab.

Availability and implementation

The source code is available at http://www.mathworks.com/matlabcentral/fileexchange/37561.",2013-01-03 +23292603,KNApSAcK-3D: a three-dimensional structure database of plant metabolites.,"Studies on plant metabolites have attracted significant attention in recent years. Over the past 8 years, we have constructed a unique metabolite database, called KNApSAcK, that contains information on the relationships between metabolites and their expressing organism(s). In the present paper, we introduce KNApSAcK-3D, which contains the three-dimensional (3D) structures of all of the metabolic compounds included in the original KNApSAcK database. The 3D structure for each compound was optimized using the Merck Molecular Force Field (MMFF94), and a multiobjective genetic algorithm was used to search extensively for possible conformations and locate the global minimum. The resulting set of structures may be used for docking studies to identify new and potentially unexpected binding sites for target proteins. The 3D structures may also be utilized for more qualitative studies, such as the estimation of biological activities using 3D-QSAR. The database can be accessed via a link from the KNApSAcK Family website (http://kanaya.naist.jp/KNApSAcK_Family/) or directory at http://kanaya.naist.jp/knapsack3d/.",2013-01-03 +23418672,Optimal precursor ion selection for LC-MALDI MS/MS.,"

Background

Liquid chromatography mass spectrometry (LC-MS) maps in shotgun proteomics are often too complex to select every detected peptide signal for fragmentation by tandem mass spectrometry (MS/MS). Standard methods for precursor ion selection, commonly based on data dependent acquisition, select highly abundant peptide signals in each spectrum. However, these approaches produce redundant information and are biased towards high-abundance proteins.

Results

We present two algorithms for inclusion list creation that formulate precursor ion selection as an optimization problem. Given an LC-MS map, the first approach maximizes the number of selected precursors given constraints such as a limited number of acquisitions per RT fraction. Second, we introduce a protein sequence-based inclusion list that can be used to monitor proteins of interest. Given only the protein sequences, we create an inclusion list that optimally covers the whole protein set. Additionally, we propose an iterative precursor ion selection that aims at reducing the redundancy obtained with data dependent LC-MS/MS. We overcome the risk of erroneous assignments by including methods for retention time and proteotypicity predictions. We show that our method identifies a set of proteins requiring fewer precursors than standard approaches. Thus, it is well suited for precursor ion selection in experiments with limited sample amount or analysis time.

Conclusions

We present three approaches to precursor ion selection with LC-MALDI MS/MS. Using a well-defined protein standard and a complex human cell lysate, we demonstrate that our methods outperform standard approaches. Our algorithms are implemented as part of OpenMS and are available under http://www.openms.de.",2013-02-18 +23292738,A comprehensive SNP and indel imputability database.,"

Motivation

Genotype imputation has become an indispensible step in genome-wide association studies (GWAS). Imputation accuracy, directly influencing downstream analysis, has shown to be improved using re-sequencing-based reference panels; however, this comes at the cost of high computational burden due to the huge number of potentially imputable markers (tens of millions) discovered through sequencing a large number of individuals. Therefore, there is an increasing need for access to imputation quality information without actually conducting imputation. To facilitate this process, we have established a publicly available SNP and indel imputability database, aiming to provide direct access to imputation accuracy information for markers identified by the 1000 Genomes Project across four major populations and covering multiple GWAS genotyping platforms.

Results

SNP and indel imputability information can be retrieved through a user-friendly interface by providing the ID(s) of the desired variant(s) or by specifying the desired genomic region. The query results can be refined by selecting relevant GWAS genotyping platform(s). This is the first database providing variant imputability information specific to each continental group and to each genotyping platform. In Filipino individuals from the Cebu Longitudinal Health and Nutrition Survey, our database can achieve an area under the receiver-operating characteristic curve of 0.97, 0.91, 0.88 and 0.79 for markers with minor allele frequency >5%, 3-5%, 1-3% and 0.5-1%, respectively. Specifically, by filtering out 48.6% of markers (corresponding to a reduction of up to 48.6% in computational costs for actual imputation) based on the imputability information in our database, we can remove 77%, 58%, 51% and 42% of the poorly imputed markers at the cost of only 0.3%, 0.8%, 1.5% and 4.6% of the well-imputed markers with minor allele frequency >5%, 3-5%, 1-3% and 0.5-1%, respectively.

Availability

http://www.unc.edu/∼yunmli/imputability.html",2013-01-03 +23682804,mtDNAprofiler: a Web application for the nomenclature and comparison of human mitochondrial DNA sequences.,"Mitochondrial DNA (mtDNA) is a valuable tool in the fields of forensic, population, and medical genetics. However, recording and comparing mtDNA control region or entire genome sequences would be difficult if researchers are not familiar with mtDNA nomenclature conventions. Therefore, mtDNAprofiler, a Web application, was designed for the analysis and comparison of mtDNA sequences in a string format or as a list of mtDNA single-nucleotide polymorphisms (mtSNPs). mtDNAprofiler which comprises four mtDNA sequence-analysis tools (mtDNA nomenclature, mtDNA assembly, mtSNP conversion, and mtSNP concordance-check) supports not only the accurate analysis of mtDNA sequences via an automated nomenclature function, but also consistent management of mtSNP data via direct comparison and validity-check functions. Since mtDNAprofiler consists of four tools that are associated with key steps of mtDNA sequence analysis, mtDNAprofiler will be helpful for researchers working with mtDNA. mtDNAprofiler is freely available at http://mtprofiler.yonsei.ac.kr.",2013-05-17 +22914219,Intensity quantile estimation and mapping--a novel algorithm for the correction of image non-uniformity bias in HCS data.,"

Motivation

Image non-uniformity (NU) refers to systematic, slowly varying spatial gradients in images that result in a bias that can affect all downstream image processing, quantification and statistical analysis steps. Image NU is poorly modeled in the field of high-content screening (HCS), however, such that current conventional correction algorithms may be either inappropriate for HCS or fail to take advantage of the information available in HCS image data.

Results

A novel image NU bias correction algorithm, termed intensity quantile estimation and mapping (IQEM), is described. The algorithm estimates the full non-linear form of the image NU bias by mapping pixel intensities to a reference intensity quantile function. IQEM accounts for the variation in NU bias over broad cell intensity ranges and data acquisition times, both of which are characteristic of HCS image datasets. Validation of the method, using simulated and HCS microtubule polymerization screen images, is presented. Two requirements of IQEM are that the dataset consists of large numbers of images acquired under identical conditions and that cells are distributed with no within-image spatial preference.

Availability and implementation

MATLAB function files are available at http://nadon-mugqic.mcgill.ca/.",2012-08-22 +23893473,IsoDesign: a software for optimizing the design of 13C-metabolic flux analysis experiments.,"The growing demand for (13) C-metabolic flux analysis ((13) C-MFA) in the field of metabolic engineering and systems biology is driving the need to rationalize expensive and time-consuming (13) C-labeling experiments. Experimental design is a key step in improving both the number of fluxes that can be calculated from a set of isotopic data and the precision of flux values. We present IsoDesign, a software that enables these parameters to be maximized by optimizing the isotopic composition of the label input. It can be applied to (13) C-MFA investigations using a broad panel of analytical tools (MS, MS/MS, (1) H NMR, (13) C NMR, etc.) individually or in combination. It includes a visualization module to intuitively select the optimal label input depending on the biological question to be addressed. Applications of IsoDesign are described, with an example of the entire (13) C-MFA workflow from the experimental design to the flux map including important practical considerations. IsoDesign makes the experimental design of (13) C-MFA experiments more accessible to a wider biological community. IsoDesign is distributed under an open source license at http://metasys.insa-toulouse.fr/software/isodes/",2013-09-18 +25035577,Proliferative and non-proliferative lesions of the rat and mouse integument.,"The INHAND (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) project is a joint initiative of the societies of toxicological pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP) and North America (STP). Its aim is to develop an internationally-accepted nomenclature for proliferative and non-proliferative lesions in laboratory rodents. A widely accepted international harmonization of nomenclature in laboratory animals will decrease confusion among regulatory and scientific research organizations in different countries and will provide a common language to increase and enrich international exchanges of information among toxicologists and pathologists. The purpose of this publication is to provide a standardized nomenclature for classifying microscopical lesions observed in the integument of laboratory rats and mice. Example colour images are provided for most lesions. The standardized nomenclature presented in this document and additional colour images are also available electronically at http://www.goreni.org. The nomenclature presented herein is based on histopathology databases from government, academia, and industrial laboratories throughout the world, and covers lesions that develop spontaneously as well as those induced by exposure to various test materials. (DOI: 10.1293/tox.26.27S; J Toxicol Pathol 2013; 26: 27S-57S).",2013-01-01 +23568467,Databases and in silico tools for vaccine design.,"In vaccine design, databases and in silico tools play different but complementary roles. Databases collect experimentally verified vaccines and vaccine components, and in silico tools provide computational methods to predict and design new vaccines and vaccine components. Vaccine-related databases include databases of vaccines and vaccine components. In the USA, the Food and Drug Administration (FDA) maintains a database of licensed human vaccines, and the US Department of Agriculture keeps a database of licensed animal vaccines. Databases of vaccine clinical trials and vaccines in research also exist. The important vaccine components include vaccine antigens, vaccine adjuvants, vaccine vectors, and -vaccine preservatives. The vaccine antigens can be whole proteins or immune epitopes. Various in silico vaccine design tools are also available. The Vaccine Investigation and Online Information Network (VIOLIN; http://www.violinet.org ) is a comprehensive vaccine database and analysis system. The VIOLIN database includes various types of vaccines and vaccine components. VIOLIN also includes Vaxign, a Web-based in silico vaccine design program based on the reverse vaccinology strategy. Vaccine information and resources can be integrated with Vaccine Ontology (VO). This chapter introduces databases and in silico tools that facilitate vaccine design, especially those in the VIOLIN system.",2013-01-01 +23486013,CancerDR: cancer drug resistance database.,"Cancer therapies are limited by the development of drug resistance, and mutations in drug targets is one of the main reasons for developing acquired resistance. The adequate knowledge of these mutations in drug targets would help to design effective personalized therapies. Keeping this in mind, we have developed a database ""CancerDR"", which provides information of 148 anti-cancer drugs, and their pharmacological profiling across 952 cancer cell lines. CancerDR provides comprehensive information about each drug target that includes; (i) sequence of natural variants, (ii) mutations, (iii) tertiary structure, and (iv) alignment profile of mutants/variants. A number of web-based tools have been integrated in CancerDR. This database will be very useful for identification of genetic alterations in genes encoding drug targets, and in turn the residues responsible for drug resistance. CancerDR allows user to identify promiscuous drug molecules that can kill wide range of cancer cells. CancerDR is freely accessible at http://crdd.osdd.net/raghava/cancerdr/",2013-01-01 +23192546,Analysis strategy of protein-protein interaction networks.,"Protein interactions, as well as the networks they formed, play a key role in many cellular processes and the distortion of the protein interacting interfaces may lead to the development of many diseases. In this chapter, we will briefly introduce the background knowledge of the protein-protein interaction, followed by the detailed explanation of varied analysis-from basic to advanced, as well as related tools and databases. VisANT (http://visant.bu.edu)-a free Web-based software platform for the integrative visualization, mining, analysis, and modeling of the biological networks-will be used as a main tool for all examples used in this section.",2013-01-01 +22220553,Early progress in epigenetic regulation of endothelin pathway genes.,Control of gene transcription is a major regulatory determinant for function of the endothelin pathway. Epigenetic mechanisms act on tissue-specific gene expression during development and in response to physiological stimuli. Most of the limited evidence available on epigenetic regulation of the endothelin pathway focuses on the EDN1 and EDNRB genes. Examination of whole genome databases suggests that both genes are influenced by histone modifications and DNA methylation. This interpretation is supported by studies directed at detecting epigenetic action on the two genes. The clearest illustration of epigenetic factors altering endothelin signalling is DNA methylation-associated EDNRB silencing during tumourigenesis. This review summarizes our current understanding of epigenetic regulation of the endothelin pathway genes. LINKED ARTICLES This article is part of a themed section on Endothelin. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2013.168.issue-1.,2013-01-01 +23963915,Metabolomics and dereplication strategies in natural products.,"Metabolomic methods can be utilized to screen diverse biological sources of potentially novel and sustainable sources of antibiotics and pharmacologically-active drugs. Dereplication studies by high resolution Fourier transform mass spectrometry coupled to liquid chromatography (LC-HRFTMS) and nuclear magnetic resonance (NMR) spectroscopy can establish the chemical profile of endophytic and/or endozoic microbial extracts and their plant or animal sources. Identifying the compounds of interest at an early stage will aid in the isolation of the bioactive components. Therefore metabolite profiling is important for functional genomics and in the search for new pharmacologically active compounds. Using the tools of metabolomics through the employment of LC-HRFTMS as well as high resolution NMR will be a very efficient approach. Metabolomic profiling has found its application in screening extracts of macroorganisms as well as in the isolation and cultivation of suspected microbial producers of bioactive natural products.Metabolomics is being applied to identify and biotechnologically optimize the production of pharmacologically active secondary metabolites. The links between metabolome evolution during optimization and processing factors can be identified through metabolomics. Information obtained from a metabolomics dataset can efficiently establish cultivation and production processes at a small scale which will be finally scaled up to a fermenter system, while maintaining or enhancing synthesis of the desired compounds. MZmine (BMC Bioinformatics 11:395-399, 2010; http://mzmine.sourceforge.net/download.shtml ) and SIEVE ( http://www.vastscientific.com/resources/index.html ; Rapid Commun Mass Spectrom 22:1912-1918, 2008) softwares are utilized to perform differential analysis of sample populations to find significant expressed features of complex biomarkers between parameter variables. Metabolomes are identified with the aid of existing high resolution MS and NMR records from online or in-house databases like AntiMarin, a merger database of Antibase (Laatsch H. Antibase Version 4.0 - The Natural Compound Identifier. Wiley-VCH Verlag GmbH & Co. KGaA, 2012) for microbial secondary metabolites as well as higher fungi and MarinLit for marine natural products (Blunt J. MarinLit. University of Canterbury, New Zealand, 2012). This is further validated through available reference standards and NMR experiments. Metabolomics has become a powerful tool in systems biology which allows us to gain insights into the potential of natural isolates for synthesis of significant quantities of promising new agents and allows us to manipulate the environment within fermentation systems in a rational manner to select a desired metabolome.",2013-01-01 +23907417,A previously undescribed mutation detected by sequence analysis of CYP21A2 gene in an infant with salt wasting congenital adrenal hyperplasia.,The Human Cytochrome P450 (CYP) Allele Nomenclature Committee (http:www.imm.Ki.se/CYPalleles/cyp21.htm) has created a CYP21A2 database which include a list of all reported CYP21A2 mutations and the last update of this database was in 2006. The most up to date list of the CYP21A2 mutations reported over the last four years was published in a recent article by Concolino et al. We report a previously undescribed mutation detected by sequence analysis of CYP21A2 gene in an infant resulting in salt wasting congenital adrenal hyperplasia.,2013-01-01 +25035576,"Proliferative and non-proliferative lesions of the rat and mouse soft tissue, skeletal muscle and mesothelium.","The INHAND Project (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) is a joint initiative of the Societies of Toxicologic Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP), and North America (STP) to develop an internationally accepted nomenclature for proliferative and nonproliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature for classifying lesions observed in the soft tissues including skeletal muscle as well as the mesothelium of rats and mice. The standardized nomenclature of lesions presented in this document is also available electronically on the Internet (http://www.goreni.org/). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous developmental and aging lesions as well as those induced by exposure to test materials. A widely accepted and utilized international harmonization of nomenclature for lesions in soft tissues, skeletal muscle and mesothelium in laboratory animals will decrease confusion among regulatory and scientific research organizations in different countries and provide a common language to increase and enrich international exchanges of information among toxicologists and pathologists. (DOI: 10.1293/tox.26.1S; J Toxicol Pathol 2013; 26: 1S-26S).",2013-01-01 +23027045,Designing functional siRNA with reduced off-target effects.,"RNA interference (RNAi) mediated by small interfering RNA (siRNA) is now widely used to knock down gene expression in a sequence-specific manner, making it a powerful tool not only for studying gene functions but also for therapeutic applications. siRNA decreases the expression level of the intended target gene with complete complementarity by cleaving its mRNA. However, the efficacy of each siRNA widely varies depending on its sequence in mammalian cells; only a limited fraction of randomly designed siRNAs is functional. Moreover, off-target silencing effects arise when the siRNA has partial complementarity in the seed region with unintended genes. Here, we describe the rational designing of functional, off-target effect-reduced siRNAs using siDirect 2.0 Web server (http://siDirect2.RNAi.jp/). By using the default parameters, siDirect 2.0 can design at least one qualified siRNA for >94% of human mRNA sequences in the RefSeq database.",2013-01-01 +30722291,First Report of Frosty Mildew Caused by Mycopappus alni on Asian Pear in Korea.,"Asian pear (Pyrus pyrifolia Nakai), also known as Japanese or Korean pear, is widely cultivated in East Asia. On September 2011, irregularly shaped necrotic lesions were observed on leaves of cv. Shinheung growing in an orchard in Gangneung City, Korea. At 40× magnification under a microscope, the white to cream colored propagules were epiphyllous, conical, scattered to aggregated, and composed of stroma-like bases, globose to subglobose, 55 to 100 μm wide and 35 to 75 μm high with filamentous and claviform hyphae. The filamentous hyphae were cylindrical, 125 to 425 × 3.5 to 6 μm, 2- to 8-septate, and obtuse to subobtuse at the apex. The claviform hyphae were clavate to cylindrical, 35 to 125 × 5 to 12.5 μm, aseptate to 3-septate, and obtuse at the apex. The fungus was isolated from leaf lesions and cultured on potato dextrose agar (PDA). The colonies consisted of thin mycelia colored whitish at first and then pale brown on PDA. Sclerotia were produced on PDA after 2 weeks incubation at 15°C, but conidia were not observed in culture. An isolate from KUS-F26196 was deposited in the Korean Agricultural Culture Collection (Accession No. 25 KACC46693). These morphological and cultural characteristics were consistent with Mycopappus alni (Dearn. & Barthol.) Redhead & G.P. White (1,3,4). Fungal DNA was extracted with DNeasy Plant Mini DNA Extraction Kits (Qiagen Inc., Valencia, CA). The complete internal transcribed spacer (ITS) region of rDNA was amplified with the primers ITS1/ITS4 and sequenced. The resulting sequence product of 520 bp was deposited in GenBank (Accession No. JX458815). A BLAST search in GenBank revealed that the sequence was 99% similar to M. alni (AB254190, AB254177, AB254189). To determine the pathogenicity of the fungus, propagules were detached from lesions on the naturally infected leaves using fine needles. Each propagule was transferred individually onto five places of six detached healthy leaves. Control treatment comprised placing small agar blocks onto five places of six detached healthy leaves. The plants were incubated in a humid chamber at RH 100% and 18°C. Symptoms were observed after 2 days on all inoculated leaves. The pathogen was reisolated from lesions on the inoculated leaves, confirming Koch's postulates. No symptoms were observed on control leaves. The fungus has been associated with frosty mildew on Alnus spp., Betula spp., Crataegus spp., and Pyrus spp. in North America, Turkey, Russia, and Japan (1,2,4). To our knowledge, this is the first report of frosty mildew on P. pyrifolia caused by M. alni globally as well as in Korea. Since the infections may be limited to the mountainous area with low night temperature and high humidity, economic losses seem to be negligible. However, the disease could be a potential threat to the safe production of Korean pears in case of prolonged period of cool and moist weather. References: (1) U. Braun et al. Mikologiya i Fitopatologiya 34(6):1, 2000. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology & Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , August 2, 2012. (3) S. A. Redhead and G. P. White. Can. J. Bot. 63:1429, 1985. (4) Y. Takahashi et al. Mycoscience 47:388, 2006.",2013-01-01 +24479118,PFMFind: a system for discovery of peptide homology and function.,"Protein Fragment Motif Finder (PFMFind) is a system that enables e cient discovery of relationships between short fragments of protein sequences using similarity search. It supports queries based on amino acid similarity matrices and position specific score matrices (PSSMs) obtained through an iterative procedure. PSSM construction is customisable through plugins written in Python. PFMFind consists of a GUI client, an index for fast similarity search and a relational database for storing search results and sequence annotations. It is written mostly in Python. The components of PFMFind communicate through TCP/IP sockets and can be located on different physical machines. PFMFind is freely available for download (under a GPL licence) from http://pfmfind.stojmirovic.org.",2013-01-01 +23269131,"Clinical practice guidelines for the management of pain, agitation, and delirium in adult patients in the intensive care unit.","

Objective

To revise the ""Clinical Practice Guidelines for the Sustained Use of Sedatives and Analgesics in the Critically Ill Adult"" published in Critical Care Medicine in 2002.

Methods

The American College of Critical Care Medicine assembled a 20-person, multidisciplinary, multi-institutional task force with expertise in guideline development, pain, agitation and sedation, delirium management, and associated outcomes in adult critically ill patients. The task force, divided into four subcommittees, collaborated over 6 yr in person, via teleconferences, and via electronic communication. Subcommittees were responsible for developing relevant clinical questions, using the Grading of Recommendations Assessment, Development and Evaluation method (http://www.gradeworkinggroup.org) to review, evaluate, and summarize the literature, and to develop clinical statements (descriptive) and recommendations (actionable). With the help of a professional librarian and Refworks database software, they developed a Web-based electronic database of over 19,000 references extracted from eight clinical search engines, related to pain and analgesia, agitation and sedation, delirium, and related clinical outcomes in adult ICU patients. The group also used psychometric analyses to evaluate and compare pain, agitation/sedation, and delirium assessment tools. All task force members were allowed to review the literature supporting each statement and recommendation and provided feedback to the subcommittees. Group consensus was achieved for all statements and recommendations using the nominal group technique and the modified Delphi method, with anonymous voting by all task force members using E-Survey (http://www.esurvey.com). All voting was completed in December 2010. Relevant studies published after this date and prior to publication of these guidelines were referenced in the text. The quality of evidence for each statement and recommendation was ranked as high (A), moderate (B), or low/very low (C). The strength of recommendations was ranked as strong (1) or weak (2), and either in favor of (+) or against (-) an intervention. A strong recommendation (either for or against) indicated that the intervention's desirable effects either clearly outweighed its undesirable effects (risks, burdens, and costs) or it did not. For all strong recommendations, the phrase ""We recommend …"" is used throughout. A weak recommendation, either for or against an intervention, indicated that the trade-off between desirable and undesirable effects was less clear. For all weak recommendations, the phrase ""We suggest …"" is used throughout. In the absence of sufficient evidence, or when group consensus could not be achieved, no recommendation (0) was made. Consensus based on expert opinion was not used as a substitute for a lack of evidence. A consistent method for addressing potential conflict of interest was followed if task force members were coauthors of related research. The development of this guideline was independent of any industry funding.

Conclusion

These guidelines provide a roadmap for developing integrated, evidence-based, and patient-centered protocols for preventing and treating pain, agitation, and delirium in critically ill patients.",2013-01-01 +26120664,Phytoseiidae in European grape (Vitis vinifera L.): bio-ecological aspects and keys to species (Acari: Mesostigmata).,"The family Phytoseiidae includes many species of predators involved in the control of mite pests of crops all over the world. In European vineyards, these natural enemies play a key role in plant protection as their presence usually makes the use of acaricides unnecessary. Each species has its specific biological features. It is thus of great interest to identify the species reported on grape, Vitis vinifera. The present paper, based on a world database of the family Phytoseiidae and on an analysis of more than 250 publications, presents the 54 species of Phytoseiidae belonging to 15 genera reported on V. vinifera in Europe, and identification keys to species. Online versions of the key (dichotomous and polytomous) with illustrations are available at http://wwwl.montpellier.inra.fr/CBGP/phytoseiidae/sitewebvineyards2/index.htm. An analysis of the biogeographic distribution of these species and their prey has also been carried out. Most species reported on V. vinifera in Europe are rare; only five species are frequently observed: Kampimodromus aberrans, Typhlodromus (Typhlodromus) pyri, Typhlodromus (Typhlodromus) exhilaratus, Euseius finlandicus and Phytoseius finitimus. The 12 countries where Phytoseiidae have been reported from grapes have been unevenly surveyed, the most well-known faunas being from Italy, Greece and France. These species are reported to prey upon the main species of mite pests of grapes.",2013-01-01 +25267934,Improving Software Sustainability: Lessons Learned from Profiles in Science.,"The Profiles in Science® digital library features digitized surrogates of historical items selected from the archival collections of the U.S. National Library of Medicine as well as collaborating institutions. In addition, it contains a database of descriptive, technical and administrative metadata. It also contains various software components that allow creation of the metadata, management of the digital items, and access to the items and metadata through the Profiles in Science Web site [1]. The choices made building the digital library were designed to maximize the sustainability and long-term survival of all of the components of the digital library [2]. For example, selecting standard and open digital file formats rather than proprietary formats increases the sustainability of the digital files [3]. Correspondingly, using non-proprietary software may improve the sustainability of the software--either through in-house expertise or through the open source community. Limiting our digital library software exclusively to open source software or to software developed in-house has not been feasible. For example, we have used proprietary operating systems, scanning software, a search engine, and office productivity software. We did this when either lack of essential capabilities or the cost-benefit trade-off favored using proprietary software. We also did so knowing that in the future we would need to replace or upgrade some of our proprietary software, analogous to migrating from an obsolete digital file format to a new format as the technological landscape changes. Since our digital library's start in 1998, all of its software has been upgraded or replaced, but the digitized items have not yet required migration to other formats. Technological changes that compelled us to replace proprietary software included the cost of product licensing, product support, incompatibility with other software, prohibited use due to evolving security policies, and product abandonment. Sometimes these changes happen on short notice, so we continually monitor our library's software for signs of endangerment. We have attempted to replace proprietary software with suitable in-house or open source software. When the replacement involves a standalone piece of software with a nearly equivalent version, such as replacing a commercial HTTP server with an open source HTTP server, the replacement is straightforward. Recently we replaced software that functioned not only as our search engine but also as the backbone of the architecture of our Web site. In this paper, we describe the lessons learned and the pros and cons of replacing this software with open source software.",2013-01-01 +24584870,"Peptides, specific proteolysis products, as molecular markers of allergenic proteins - in silico studies.","The objective of this study was to analyse allergenic proteins by identifying their molecular biomarkers for detection in food using bioinformatics tools. The protein and epitope sequences were from BIOPEP database, proteolysis was simulated using BIOPEP program and UniProt database screening via BLAST and FASTA programs. The biomarkers of food proteins were proposed: for example for whey proteins - TPEVDDEALEKFDKALKALPMHIR (β-Lg: fragment 141-164), chicken egg - AAVSVDCSEYPKPDCTAEDRPL (ovomucoid: 156-177), wheat - KCNGTVEQVESIVNTLNAGQIASTDVVEVVVSPPY (triose phosphate isomerase: 12-46) and peanuts - QARQLKNNNPFKFFVPPFQQSPRAVA (arachin: 505-530). The results are annotated in the BIOPEP database of allergenic proteins and epitopes, available at http://www.uwm.edu.pl/biochemia. The epitope-receptor interactions are attributed to the epitope's sequence and suggest that in silico proteolysis products showing the highest degree of sequence identity with an epitope or its part are characteristic of a given protein or a group of cross-reactive homologs. The protein markers from basic food groups were proposed based on the above assumption.",2013-01-01 +23185449,Molecular pathways involved in prostate carcinogenesis: insights from public microarray datasets.,"

Background

Prostate cancer is currently the most frequently diagnosed malignancy in men and the second leading cause of cancer-related deaths in industrialized countries. Worldwide, an increase in prostate cancer incidence is expected due to an increased life-expectancy, aging of the population and improved diagnosis. Although the specific underlying mechanisms of prostate carcinogenesis remain unknown, prostate cancer is thought to result from a combination of genetic and environmental factors altering key cellular processes. To elucidate these complex interactions and to contribute to the understanding of prostate cancer progression and metastasis, analysis of large scale gene expression studies using bioinformatics approaches is used to decipher regulation of core processes.

Methodology/principal findings

In this study, a standardized quality control procedure and statistical analysis (http://www.arrayanalysis.org/) were applied to multiple prostate cancer datasets retrieved from the ArrayExpress data repository and pathway analysis using PathVisio (http://www.pathvisio.org/) was performed. The results led to the identification of three core biological processes that are strongly affected during prostate carcinogenesis: cholesterol biosynthesis, the process of epithelial-to-mesenchymal transition and an increased metabolic activity.

Conclusions

This study illustrates how a standardized bioinformatics evaluation of existing microarray data and subsequent pathway analysis can quickly and cost-effectively provide essential information about important molecular pathways and cellular processes involved in prostate cancer development and disease progression. The presented results may assist in biomarker profiling and the development of novel treatment approaches.",2012-11-20 +23333734,ProCoCoA: A quantitative approach for analyzing protein core composition.,"Defining the amino acid composition of protein cores is fundamental for understanding protein folding, as different architectures might achieve structural stability only in the presence of specific amino acid networks. Quantitative characterization of protein cores in relation to the corresponding structures and dynamics is needed to increase the reliability of protein engineering procedures. Unambiguous criteria based on atom depth considerations were established to assign amino acid residues to protein cores and, hence, for classifying inner and outer molecular moieties. These criteria were summarized in a new tool named ProCoCoA, Protein Core Composition Analyzer. An user-friendly web interface was developed, available at the URL: http://www.sbl.unisi.it/prococoa. An accurate estimate of protein core composition for six protein architectures selected from the CATH database of solved structures has been carried out, and the obtained results indicate the presence of specific patterns of amino acid core composition in different protein folds.",2012-12-31 +21803805,ContEst: estimating cross-contamination of human samples in next-generation sequencing data.,"

Summary

Here, we present ContEst, a tool for estimating the level of cross-individual contamination in next-generation sequencing data. We demonstrate the accuracy of ContEst across a range of contamination levels, sources and read depths using sequencing data mixed in silico at known concentrations. We applied our tool to published cancer sequencing datasets and report their estimated contamination levels.

Availability and implementation

ContEst is a GATK module, and distributed under a BSD style license at http://www.broadinstitute.org/cancer/cga/contest

Contact

kcibul@broadinstitute.org; gadgetz@broadinstitute.org

Supplementary information

Supplementary data is available at Bioinformatics online.",2011-07-29 +22761941,FusionFinder: a software tool to identify expressed gene fusion candidates from RNA-Seq data.,"The hallmarks of many haematological malignancies and solid tumours are chromosomal translocations, which may lead to gene fusions. Recently, next-generation sequencing techniques at the transcriptome level (RNA-Seq) have been used to verify known and discover novel transcribed gene fusions. We present FusionFinder, a Perl-based software designed to automate the discovery of candidate gene fusion partners from single-end (SE) or paired-end (PE) RNA-Seq read data. FusionFinder was applied to data from a previously published analysis of the K562 chronic myeloid leukaemia (CML) cell line. Using FusionFinder we successfully replicated the findings of this study and detected additional previously unreported fusion genes in their dataset, which were confirmed experimentally. These included two isoforms of a fusion involving the genes BRK1 and VHL, whose co-deletion has previously been associated with the prevalence and severity of renal-cell carcinoma. FusionFinder is made freely available for non-commercial use and can be downloaded from the project website (http://bioinformatics.childhealthresearch.org.au/software/fusionfinder/).",2012-06-27 +22210868,VarSifter: visualizing and analyzing exome-scale sequence variation data on a desktop computer.,"

Unlabelled

VarSifter is a graphical software tool for desktop computers that allows investigators of varying computational skills to easily and quickly sort, filter, and sift through sequence variation data. A variety of filters and a custom query framework allow filtering based on any combination of sample and annotation information. By simplifying visualization and analyses of exome-scale sequence variation data, this program will help bring the power and promise of massively-parallel DNA sequencing to a broader group of researchers.

Availability and implementation

VarSifter is written in Java, and is freely available in source and binary versions, along with a User Guide, at http://research.nhgri.nih.gov/software/VarSifter/.",2011-12-30 +21063949,imzML: Imaging Mass Spectrometry Markup Language: A common data format for mass spectrometry imaging.,"Imaging mass spectrometry is the method of scanning a sample of interest and generating an ""image"" of the intensity distribution of a specific analyte. The data sets consist of a large number of mass spectra which are usually acquired with identical settings. Existing data formats are not sufficient to describe an MS imaging experiment completely. The data format imzML was developed to allow the flexible and efficient exchange of MS imaging data between different instruments and data analysis software.For this purpose, the MS imaging data is divided in two separate files. The mass spectral data is stored in a binary file to ensure efficient storage. All metadata (e.g., instrumental parameters, sample details) are stored in an XML file which is based on the standard data format mzML developed by HUPO-PSI. The original mzML controlled vocabulary was extended to include specific parameters of imaging mass spectrometry (such as x/y position and spatial resolution). The two files (XML and binary) are connected by offset values in the XML file and are unambiguously linked by a universally unique identifier. The resulting datasets are comparable in size to the raw data and the separate metadata file allows flexible handling of large datasets.Several imaging MS software tools already support imzML. This allows choosing from a (growing) number of processing tools. One is no longer limited to proprietary software, but is able to use the processing software which is best suited for a specific question or application. On the other hand, measurements from different instruments can be compared within one software application using identical settings for data processing. All necessary information for evaluating and implementing imzML can be found at http://www.imzML.org .",2011-01-01 +21685091,ccSVM: correcting Support Vector Machines for confounding factors in biological data classification.,"

Motivation

Classifying biological data into different groups is a central task of bioinformatics: for instance, to predict the function of a gene or protein, the disease state of a patient or the phenotype of an individual based on its genotype. Support Vector Machines are a wide spread approach for classifying biological data, due to their high accuracy, their ability to deal with structured data such as strings, and the ease to integrate various types of data. However, it is unclear how to correct for confounding factors such as population structure, age or gender or experimental conditions in Support Vector Machine classification.

Results

In this article, we present a Support Vector Machine classifier that can correct the prediction for observed confounding factors. This is achieved by minimizing the statistical dependence between the classifier and the confounding factors. We prove that this formulation can be transformed into a standard Support Vector Machine with rescaled input data. In our experiments, our confounder correcting SVM (ccSVM) improves tumor diagnosis based on samples from different labs, tuberculosis diagnosis in patients of varying age, ethnicity and gender, and phenotype prediction in the presence of population structure and outperforms state-of-the-art methods in terms of prediction accuracy.

Availability

A ccSVM-implementation in MATLAB is available from http://webdav.tuebingen.mpg.de/u/karsten/Forschung/ISMB11_ccSVM/.

Contact

limin.li@tuebingen.mpg.de; karsten.borgwardt@tuebingen.mpg.de.",2011-07-01 +24725959,"Genetic characterization of Toxoplasma gondii from cats in Yunnan Province, Southwestern China.","

Background

Cats are the definitive hosts of Toxoplasma gondii. The distribution of genetic diversity of T. gondii in cats is of importance to understand the transmission of this parasite. The objective of this study was to genetically characterize T. gondii isolates from cats in Yunnan province, southwestern China.

Methods

Genomic DNA was extracted from 5-10 g cat tissue samples (brain, tongue, heart, and liver). Using multilocous polymerase chain reaction-restriction fragment length polymorphism (PCR-RFLP) technology, we determined genetic diversity of T. gondii isolates from cats in Yunnan province.

Result

In total, 175 stray cats were tested for T. gondii DNA, respectively, 44 (25.14%) of which were found to be positive for the T. gondii B1 gene by PCR amplification. The positive DNA samples were typed at 11 genetic markers, including 10 nuclear markers, namely, SAG1, 5'-3'SAG2, alternative SAG2, SAG3, GRA6, L358, PK1, BTUB, c22-8, c29-2 and an apicoplast locus Apico. Of these, 16 isolates from cats were genotyped with data for more than 9 loci, revealed 5 genotypes in total, of which 11 of 16 samples were identified as ToxoDB#9, two samples may belong to genotye #225, one was Type II, one was ToxoDB#3, and one was ToxoDB#20 (http://toxodb.org/toxo/).

Conclusions

The results of the present study indicated a wide distribution of T. gondii infection in cats in Yunnan province, which may pose significant public health concerns. To our knowledge, the present study is the first report of T. gondii prevalence and genotypes in cats in southwestern China, and the first report of Type II T. gondii from cats in China.",2014-04-11 +23273493,The Foundational Model of Anatomy in OWL 2 and its use.,"

Objective

The objective is to represent the Foundational Model of Anatomy (FMA) in the OWL 2 Web Ontology Language (informally OWL 2), and to use it in a European cross-lingual portal of health terminologies for indexing and searching Web resources. Formalizing the FMA in OWL 2 is essential for semantic interoperability, to improve its design, and to ensure its reliability and correctness, which is particularly important for medical applications.

Method and material

The native FMA was implemented in frames and stored in a MySQL database backend. The main strength of the method is to leverage OWL 2 expressiveness and to rely on the naming conventions of the FMA, to make explicit some implicit semantics, while improving its ontological model and fixing some errors. Doing so, the semantics (meaning) of the formal definitions and axioms are anatomically correct. A flexible tool enables the generation of a new version in OWL 2 at each Protégé FMA update. While it creates by default a 'standard' version of the FMA in OWL 2 (FMA-OWL), many options allow for producing other variants customized to users' applications. Once formalized in OWL 2, it was possible to use an inference engine to check the ontology and detect inconsistencies. Next, the FMA-OWL was used to derive a lightweight FMA terminology for a European cross-lingual portal of terminologies/ontologies for indexing and searching resources. The transformation is mainly based on a reification process.

Result

Complete representations of the entire FMA in OWL 1 or OWL 2 are now available. The formalization tool is flexible and easy to use, making it possible to obtain an OWL 2 version for all existing public FMA. A number of errors were detected in the native FMA and several patterns of recurrent errors were identified in the original FMA. This shows how the underlying OWL 2 ontology is essential to ensure that the lightweight derived terminology is reliable. The FMA OWL 2 ontology has been applied to derive an anatomy terminology that is used in a European cross-lingual portal of health terminologies. This portal is daily used by librarians to index Web health resources. In August 2011, 6481 out of 81,450 health resources of CISMeF catalog (http://www.chu-rouen.fr/cismef/--accessed 29.08.12) (7.96%) were indexed with at least one FMA entity.

Conclusion

The FMA is a central terminology used to index and search Web resources. To the best of our knowledge, neither a complete representation of the entire FMA in OWL 2, nor an anatomy terminology available in a cross-lingual portal, has been developed to date. The method designed to represent the FMA ontology in OWL 2 presented in this article is general and may be extended to other ontologies. Using a formal ontology for quality assurance and deriving a lightweight terminology for biomedical applications is a general and promising strategy.",2012-12-28 +23240691,"FINDSITE(comb): a threading/structure-based, proteomic-scale virtual ligand screening approach.","Virtual ligand screening is an integral part of the modern drug discovery process. Traditional ligand-based, virtual screening approaches are fast but require a set of structurally diverse ligands known to bind to the target. Traditional structure-based approaches require high-resolution target protein structures and are computationally demanding. In contrast, the recently developed threading/structure-based FINDSITE-based approaches have the advantage that they are as fast as traditional ligand-based approaches and yet overcome the limitations of traditional ligand- or structure-based approaches. These new methods can use predicted low-resolution structures and infer the likelihood of a ligand binding to a target by utilizing ligand information excised from the target's remote or close homologous proteins and/or libraries of ligand binding databases. Here, we develop an improved version of FINDSITE, FINDSITE(filt), that filters out false positive ligands in threading identified templates by a better binding site detection procedure that includes information about the binding site amino acid similarity. We then combine FINDSITE(filt) with FINDSITE(X) that uses publicly available binding databases ChEMBL and DrugBank for virtual ligand screening. The combined approach, FINDSITE(comb), is compared to two traditional docking methods, AUTODOCK Vina and DOCK 6, on the DUD benchmark set. It is shown to be significantly better in terms of enrichment factor, dependence on target structure quality, and speed. FINDSITE(comb) is then tested for virtual ligand screening on a large set of 3576 generic targets from the DrugBank database as well as a set of 168 Human GPCRs. Excluding close homologues, FINDSITE(comb) gives an average enrichment factor of 52.1 for generic targets and 22.3 for GPCRs within the top 1% of the screened compound library. Around 65% of the targets have better than random enrichment factors. The performance is insensitive to target structure quality, as long as it has a TM-score ≥ 0.4 to native. Thus, FINDSITE(comb) makes the screening of millions of compounds across entire proteomes feasible. The FINDSITE(comb) web service is freely available for academic users at http://cssb.biology.gatech.edu/skolnick/webservice/FINDSITE-COMB/index.html.",2012-12-28 +21926123,Bambus 2: scaffolding metagenomes.,"

Motivation

Sequencing projects increasingly target samples from non-clonal sources. In particular, metagenomics has enabled scientists to begin to characterize the structure of microbial communities. The software tools developed for assembling and analyzing sequencing data for clonal organisms are, however, unable to adequately process data derived from non-clonal sources.

Results

We present a new scaffolder, Bambus 2, to address some of the challenges encountered when analyzing metagenomes. Our approach relies on a combination of a novel method for detecting genomic repeats and algorithms that analyze assembly graphs to identify biologically meaningful genomic variants. We compare our software to current assemblers using simulated and real data. We demonstrate that the repeat detection algorithms have higher sensitivity than current approaches without sacrificing specificity. In metagenomic datasets, the scaffolder avoids false joins between distantly related organisms while obtaining long-range contiguity. Bambus 2 represents a first step toward automated metagenomic assembly.

Availability

Bambus 2 is open source and available from http://amos.sf.net.

Contact

mpop@umiacs.umd.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-16 +23276706,PRASA: an integrated web server that analyzes protein interaction types.,"This work presents the Protein Association Analyzer (PRASA) (http://zoro.ee.ncku.edu.tw/prasa/) that predicts protein interactions as well as interaction types. Protein interactions are essential to most biological functions. The existence of diverse interaction types, such as physically contacted or functionally related interactions, makes protein interactions complex. Different interaction types are distinct and should not be confused. However, most existing tools focus on a specific interaction type or mix different interaction types. This work collected 7234058 associations with experimentally verified interaction types from five databases and compiled individual probabilistic models for different interaction types. The PRASA result page shows predicted associations and their related references by interaction type. Experimental results demonstrate the performance difference when distinguishing between different interaction types. The PRASA provides a centralized and organized platform for easy browsing, downloading and comparing of interaction types, which helps reveal insights into the complex roles that proteins play in organisms.",2012-12-28 +24280312,WikiPathways for plants: a community pathway curation portal and a case study in rice and arabidopsis seed development networks.,"

Background

Next-generation sequencing and 'omics' platforms are used extensively in plant biology research to unravel new genomes and study their interactions with abiotic and biotic agents in the growth environment. Despite the availability of a large and growing number of genomic data sets, there are only limited resources providing highly-curated and up-to-date metabolic and regulatory networks for plant pathways.

Results

Using PathVisio, a pathway editor tool associated with WikiPathways, we created a gene interaction network of 430 rice (Oryza sativa) genes involved in the seed development process by curating interactions reported in the published literature. We then applied an InParanoid-based homology search to these genes and used the resulting gene clusters to identify 351 Arabidopsis thaliana genes. Using this list of homologous genes, we constructed a seed development network in Arabidopsis by processing the gene list and the rice network through a Perl utility software called Pathway GeneSWAPPER developed by us. In order to demonstrate the utility of these networks in generating testable hypotheses and preliminary analysis prior to more in-depth downstream analysis, we used the expression viewer and statistical analysis features of PathVisio to analyze publicly-available and published microarray gene expression data sets on diurnal photoperiod response and the seed development time course to discover patterns of coexpressed genes found in the rice and Arabidopsis seed development networks. These seed development networks described herein, along with other plant pathways and networks, are freely available on the plant pathways portal at WikiPathways (http://plants.wikipathways.org).

Conclusion

In collaboration with the WikiPathways project we present a community curation and analysis platform for plant biologists where registered users can freely create, edit, share and monitor pathways supported by published literature. We describe the curation and annotation of a seed development network in rice, and the projection of a similar, gene homology-based network in Arabidopsis. We also demonstrate the utility of the Pathway GeneSWAPPER (PGS) application in saving valuable time and labor when a reference network in one species compiled in GPML format is used to project a similar network in another species based on gene homology.",2013-05-29 +22824207,ACPYPE - AnteChamber PYthon Parser interfacE.,"

Background

ACPYPE (or AnteChamber PYthon Parser interfacE) is a wrapper script around the ANTECHAMBER software that simplifies the generation of small molecule topologies and parameters for a variety of molecular dynamics programmes like GROMACS, CHARMM and CNS. It is written in the Python programming language and was developed as a tool for interfacing with other Python based applications such as the CCPN software suite (for NMR data analysis) and ARIA (for structure calculations from NMR data). ACPYPE is open source code, under GNU GPL v3, and is available as a stand-alone application at http://www.ccpn.ac.uk/acpype and as a web portal application at http://webapps.ccpn.ac.uk/acpype.

Findings

We verified the topologies generated by ACPYPE in three ways: by comparing with default AMBER topologies for standard amino acids; by generating and verifying topologies for a large set of ligands from the PDB; and by recalculating the structures for 5 protein-ligand complexes from the PDB.

Conclusions

ACPYPE is a tool that simplifies the automatic generation of topology and parameters in different formats for different molecular mechanics programmes, including calculation of partial charges, while being object oriented for integration with other applications.",2012-07-23 +24292941,AISAIC: a software suite for accurate identification of significant aberrations in cancers.,"

Unlabelled

Accurate identification of significant aberrations in cancers (AISAIC) is a systematic effort to discover potential cancer-driving genes such as oncogenes and tumor suppressors. Two major confounding factors against this goal are the normal cell contamination and random background aberrations in tumor samples. We describe a Java AISAIC package that provides comprehensive analytic functions and graphic user interface for integrating two statistically principled in silico approaches to address the aforementioned challenges in DNA copy number analyses. In addition, the package provides a command-line interface for users with scripting and programming needs to incorporate or extend AISAIC to their customized analysis pipelines. This open-source multiplatform software offers several attractive features: (i) it implements a user friendly complete pipeline from processing raw data to reporting analytic results; (ii) it detects deletion types directly from copy number signals using a Bayes hypothesis test; (iii) it estimates the fraction of normal contamination for each sample; (iv) it produces unbiased null distribution of random background alterations by iterative aberration-exclusive permutations; and (v) it identifies significant consensus regions and the percentage of homozygous/hemizygous deletions across multiple samples. AISAIC also provides users with a parallel computing option to leverage ubiquitous multicore machines.

Availability and implementation

 AISAIC is available as a Java application, with a user's guide and source code, at https://code.google.com/p/aisaic/.",2013-11-29 +24303983,A role for PPARα in the medial prefrontal cortex in formalin-evoked nociceptive responding in rats.,"

Background and purpose

The nuclear hormone receptor, PPARα, and its endogenous ligands, are involved in pain modulation. PPARα is expressed in the medial prefrontal cortex (mPFC), a key brain region involved in both the cognitive-affective component of pain and in descending modulation of pain. However, the role of PPARα in the mPFC in pain responding has not been investigated. Here, we investigated the effects of pharmacological modulation of PPARα in the rat mPFC on formalin-evoked nociceptive behaviour and the impact of formalin-induced nociception on components of PPARα signalling in the mPFC.

Experimental approach

The effects of intra-mPFC microinjection of a PPARα agonist (GW7647) or a PPARα antagonist (GW6471) on formalin-evoked nociceptive behaviour in rats were studied. Quantitative real-time PCR and LC-MS/MS were used to study the effects of intraplantar injection of formalin on PPARα mRNA expression and levels of endogenous ligands, respectively, in the mPFC.

Key results

Intra-mPFC administration of GW6471, but not GW7647, resulted in delayed onset of the early second phase of formalin-evoked nociceptive behaviour. Furthermore, formalin-evoked nociceptive behaviour was associated with significant reductions in mPFC levels of endogenous PPARα ligands (N-palmitoylethanolamide and N-oleoylethanolamide) and a 70% reduction in PPARα mRNA but not protein expression.

Conclusions and implications

These data suggest that endogenous ligands may act at PPARα in the mPFC to play a facilitatory/permissive role in second phase formalin-evoked nociceptive behaviour in rats.

Linked articles

This article is part of a themed section on Cannabinoids 2013. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2014.171.issue-6.",2014-03-01 +23772049,"Porter, PaleAle 4.0: high-accuracy prediction of protein secondary structure and relative solvent accessibility.","

Summary

Protein secondary structure and solvent accessibility predictions are a fundamental intermediate step towards protein structure and function prediction. We present new systems for the ab initio prediction of protein secondary structure and solvent accessibility, Porter 4.0 and PaleAle 4.0. Porter 4.0 predicts secondary structure correctly for 82.2% of residues. PaleAle 4.0's accuracy is 80.0% for prediction in two classes with a 25% accessibility threshold. We show that the increasing training set sizes that come with the continuing growth of the Protein Data Bank keep yielding prediction quality improvements and examine the impact of protein resolution on prediction performances.

Availability

Porter 4.0 and PaleAle 4.0 are freely available for academic users at http://distillf.ucd.ie/porterpaleale/. Up to 64 kb of input in FASTA format can be processed in a single submission, with predictions now being returned to the user within a single web page and, optionally, a single email.",2013-06-14 +24292362,Finding consistent disease subnetworks using PFSNet.,"

Motivation

Microarray data analysis is often applied to characterize disease populations by identifying individual genes linked to the disease. In recent years, efforts have shifted to focus on sets of genes known to perform related biological functions (i.e. in the same pathways). Evaluating gene sets reduces the need to correct for false positives in multiple hypothesis testing. However, pathways are often large, and genes in the same pathway that do not contribute to the disease can cause a method to miss the pathway. In addition, large pathways may not give much insight to the cause of the disease. Moreover, when such a method is applied independently to two datasets of the same disease phenotypes, the two resulting lists of significant pathways often have low agreement.

Results

We present a powerful method, PFSNet, that identifies smaller parts of pathways (which we call subnetworks), and show that significant subnetworks (and the genes therein) discovered by PFSNet are up to 51% (64%) more consistent across independent datasets of the same disease phenotypes, even for datasets based on different platforms, than previously published methods. We further show that those methods which initially declared some large pathways to be insignificant would declare subnetworks detected by PFSNet in those large pathways to be significant, if they were given those subnetworks as input instead of the entire large pathways.

Availability

http://compbio.ddns.comp.nus.edu.sg:8080/pfsnet/",2013-11-28 +23394478,Multiple consensus trees: a method to separate divergent genes.,"

Background

It is generally admitted that the species tree cannot be inferred from the genetic sequences of a single gene because the evolution of different genes, and thus the gene tree topologies, may vary substantially. Gene trees can differ, for example, because of horizontal transfer events or because some of them correspond to paralogous instead of orthologous sequences. A variety of methods has been proposed to tackle the problem of the reconciliation of gene trees in order to reconstruct a species tree. When the taxa in all the trees are identical, the problem can be stated as a consensus tree problem.

Results

In this paper we define a new method for deciding whether a unique consensus tree or multiple consensus trees can best represent a set of given phylogenetic trees. If the given trees are all congruent, they should be compatible into a single consensus tree. Otherwise, several consensus trees corresponding to divergent genetic patterns can be identified. We introduce a method optimizing the generalized score, over a set of tree partitions in order to decide whether the given set of gene trees is homogeneous or not.

Conclusions

The proposed method has been validated with simulated data (random trees organized in three topological groups) as well as with real data (bootstrap trees, homogeneous set of trees, and a set of non homogeneous gene trees of 30 E. Coli strains; it is worth noting that some of the latter genes underwent horizontal gene transfers). A computer program, MCT - Multiple Consensus Trees, written in C was made freely available for the research community (it can be downloaded from http://bioinformatics.lif.univ-mrs.fr/consensus/index.html). It handles trees in a standard Newick format, builds three hierarchies corresponding to RF and QS similarities between trees and the greedy ascending algorithm. The generalized score values of all tree partitions are computed.",2013-02-09 +25394054,Evolution of Framingham cardiovascular risk score in HIV-infected patients initiating EFV- and LPV/r-based HAART in a Latin American cohort.,"

Introduction

Epidemiological studies suggest that some antiretroviral drugs may contribute to increase cardiovascular risk in HIV-infected patients. However, data from Latin American countries are limited, as impact of HAART on cardiovascular risk remains understudied. In this context, we aimed to evaluate if 10-year Framingham Cardiovascular Risk Score (FCRS) increases in patients following exposure to EFV- and LPV/r-based HAART in a Latin American cohort.

Materials and methods

Retrospective 48-week cohort study. We reviewed clinical charts of randomly selected samples of patients initiating (according to national guidelines) EFV first-line HAART and LPV/r first- or second-line (but first PI-based) HAART assisted at a reference HIV centre in Buenos Aires, Argentina (period 2004-2012). Each patient could only be included in one arm. FCRS was calculated according to National Institutes of Health risk assessment tool (http://cvdrisk.nhlbi.nih.gov/).

Results

A total of 357 patients were included: 249 in EFV arm and 108 in LPV/r arm (80 as first line and 28 as second line, but first PI-based HAART). Baseline characteristics (median, interquartile range): age, 38 (33-45) years; male, 247 (69%); viral load, 98200 (20550-306000) copies/mL; CD4 T-cell count, 115 (60-175) cel/µL; total cholesterol, 159 (135-194) mg/dL; HDL: 39 (31-41) mg/dL; LDL: 94 (72-123) mg/dL; current smoker, 29%; on antihypertensive drugs: 14 (4%), diabetic: 4 (1%). Most frequent accompanying nucleoside reverse transcriptase inhibitors (NRTIs) were 3TC (92%) and zidovudine (AZT; 76%). Baseline FCRS was low, moderate and high for 93%, 7% and 0% of patients on EFV arm and 96.7%, 1.7% and 1.7% on LPV/r arm. On EFV arm, an increase in FCRS category (low to moderate or moderate to high) was observed in 1 patient (0.9%) at 24 weeks and 6 (5,6%) at 48 weeks; 5 (4.7%) decreased category. On LPV/r arm no one varied FCRS category at 24 weeks and 2 (3.4%) increased from low to moderate at 48 weeks (no patient decreased FCRS category). Cumulative incidence of overall cardiovascular events was 1.6% on EFV and 1.8% on LPV/r arms respectively. Probability of increasing FCRS category or having a cardiovascular event did not differ between arms at a significance level of 5%.

Conclusions

Probability of increasing FCRS category and cardiovascular events was low and similar in patients exposed to EFV versus LPV/r-based HAART in a Latin American cohort. ClinicalTrials.gov Identifier: NCT01705873.",2014-11-02 +23280990,The CDC Hemophilia A Mutation Project (CHAMP) mutation list: a new online resource.,"Genotyping efforts in hemophilia A (HA) populations in many countries have identified large numbers of unique mutations in the Factor VIII gene (F8). To assist HA researchers conducting genotyping analyses, we have developed a listing of F8 mutations including those listed in existing locus-specific databases as well as those identified in patient populations and reported in the literature. Each mutation was reviewed and uniquely identified using Human Genome Variation Society (HGVS) nomenclature standards for coding DNA and predicted protein changes as well as traditional nomenclature based on the mature, processed protein. Listings also include the associated hemophilia severity classified by International Society of Thrombosis and Haemostasis (ISTH) criteria, associations of the mutations with inhibitors, and reference information. The mutation list currently contains 2,537 unique mutations known to cause HA. HA severity caused by the mutation is available for 2,022 mutations (80%) and information on inhibitors is available for 1,816 mutations (72%). The CDC Hemophilia A Mutation Project (CHAMP) Mutation List is available at http://www.cdc.gov/hemophiliamutations for download and search and will be updated quarterly based on periodic literature reviews and submitted reports.",2012-12-26 +22174743,A simple and objective method for reproducible resting state network (RSN) detection in fMRI.,"Spatial Independent Component Analysis (ICA) decomposes the time by space functional MRI (fMRI) matrix into a set of 1-D basis time courses and their associated 3-D spatial maps that are optimized for mutual independence. When applied to resting state fMRI (rsfMRI), ICA produces several spatial independent components (ICs) that seem to have biological relevance - the so-called resting state networks (RSNs). The ICA problem is well posed when the true data generating process follows a linear mixture of ICs model in terms of the identifiability of the mixing matrix. However, the contrast function used for promoting mutual independence in ICA is dependent on the finite amount of observed data and is potentially non-convex with multiple local minima. Hence, each run of ICA could produce potentially different IC estimates even for the same data. One technique to deal with this run-to-run variability of ICA was proposed by [1] in their algorithm RAICAR which allows for the selection of only those ICs that have a high run-to-run reproducibility. We propose an enhancement to the original RAICAR algorithm that enables us to assign reproducibility p-values to each IC and allows for an objective assessment of both within subject and across subjects reproducibility. We call the resulting algorithm RAICAR-N (N stands for null hypothesis test), and we have applied it to publicly available human rsfMRI data (http://www.nitrc.org). Our reproducibility analyses indicated that many of the published RSNs in rsfMRI literature are highly reproducible. However, we found several other RSNs that are highly reproducible but not frequently listed in the literature.",2011-12-12 +23162054,mzMatch-ISO: an R tool for the annotation and relative quantification of isotope-labelled mass spectrometry data.,"

Motivation

Stable isotope-labelling experiments have recently gained increasing popularity in metabolomics studies, providing unique insights into the dynamics of metabolic fluxes, beyond the steady-state information gathered by routine mass spectrometry. However, most liquid chromatography-mass spectrometry data analysis software lacks features that enable automated annotation and relative quantification of labelled metabolite peaks. Here, we describe mzMatch-ISO, a new extension to the metabolomics analysis pipeline mzMatch.R.

Results

Targeted and untargeted isotope profiling using mzMatch-ISO provides a convenient visual summary of the quality and quantity of labelling for every metabolite through four types of diagnostic plots that show (i) the chromatograms of the isotope peaks of each compound in each sample group; (ii) the ratio of mono-isotopic and labelled peaks indicating the fraction of labelling; (iii) the average peak area of mono-isotopic and labelled peaks in each sample group; and (iv) the trend in the relative amount of labelling in a predetermined isotopomer. To aid further statistical analyses, the values used for generating these plots are also provided as a tab-delimited file. We demonstrate the power and versatility of mzMatch-ISO by analysing a (13)C-labelled metabolome dataset from trypanosomal parasites.

Availability

mzMatch.R and mzMatch-ISO are available free of charge from http://mzmatch.sourceforge.net and can be used on Linux and Windows platforms running the latest version of R.

Contact

rainer.breitling@manchester.ac.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-17 +22160811,GUARDD: user-friendly MATLAB software for rigorous analysis of CPMG RD NMR data.,"Molecular dynamics are essential for life, and nuclear magnetic resonance (NMR) spectroscopy has been used extensively to characterize these phenomena since the 1950s. For the past 15 years, the Carr-Purcell Meiboom-Gill relaxation dispersion (CPMG RD) NMR experiment has afforded advanced NMR labs access to kinetic, thermodynamic, and structural details of protein and RNA dynamics in the crucial μs-ms time window. However, analysis of RD data is challenging because datasets are often large and require many non-linear fitting parameters, thereby confounding assessment of accuracy. Moreover, novice CPMG experimentalists face an additional barrier because current software options lack an intuitive user interface and extensive documentation. Hence, we present the open-source software package GUARDD (Graphical User-friendly Analysis of Relaxation Dispersion Data), which is designed to organize, automate, and enhance the analytical procedures which operate on CPMG RD data ( http://code.google.com/p/guardd/). This MATLAB-based program includes a graphical user interface, permits global fitting to multi-field, multi-temperature, multi-coherence data, and implements χ (2)-mapping procedures, via grid-search and Monte Carlo methods, to enhance and assess fitting accuracy. The presentation features allow users to seamlessly traverse the large amount of results, and the RD Simulator feature can help design future experiments as well as serve as a teaching tool for those unfamiliar with RD phenomena. Based on these innovative features, we expect that GUARDD will fill a well-defined gap in service of the RD NMR community.",2011-12-11 +22390928,ParaAT: a parallel tool for constructing multiple protein-coding DNA alignments.,"Constructing multiple homologous alignments for protein-coding DNA sequences is crucial for a variety of bioinformatic analyses but remains computationally challenging. With the growing amount of sequence data available and the ongoing efforts largely dependent on protein-coding DNA alignments, there is an increasing demand for a tool that can process a large number of homologous groups and generate multiple protein-coding DNA alignments. Here we present a parallel tool - ParaAT that is capable of parallelly constructing multiple protein-coding DNA alignments for a large number of homologs. As testified on empirical datasets, ParaAT is well suited for large-scale data analysis in the high-throughput era, providing good scalability and exhibiting high parallel efficiency for computationally demanding tasks. ParaAT is freely available for academic use only at http://cbb.big.ac.cn/software.",2012-02-27 +21504571,ENGINES: exploring single nucleotide variation in entire human genomes.,"

Background

Next generation ultra-sequencing technologies are starting to produce extensive quantities of data from entire human genome or exome sequences, and therefore new software is needed to present and analyse this vast amount of information. The 1000 Genomes project has recently released raw data for 629 complete genomes representing several human populations through their Phase I interim analysis and, although there are certain public tools available that allow exploration of these genomes, to date there is no tool that permits comprehensive population analysis of the variation catalogued by such data.

Description

We have developed a genetic variant site explorer able to retrieve data for Single Nucleotide Variation (SNVs), population by population, from entire genomes without compromising future scalability and agility. ENGINES (ENtire Genome INterface for Exploring SNVs) uses data from the 1000 Genomes Phase I to demonstrate its capacity to handle large amounts of genetic variation (>7.3 billion genotypes and 28 million SNVs), as well as deriving summary statistics of interest for medical and population genetics applications. The whole dataset is pre-processed and summarized into a data mart accessible through a web interface. The query system allows the combination and comparison of each available population sample, while searching by rs-number list, chromosome region, or genes of interest. Frequency and FST filters are available to further refine queries, while results can be visually compared with other large-scale Single Nucleotide Polymorphism (SNP) repositories such as HapMap or Perlegen.

Conclusions

ENGINES is capable of accessing large-scale variation data repositories in a fast and comprehensive manner. It allows quick browsing of whole genome variation, while providing statistical information for each variant site such as allele frequency, heterozygosity or FST values for genetic differentiation. Access to the data mart generating scripts and to the web interface is granted from http://spsmart.cesga.es/engines.php.",2011-04-19 +22811546,Fast and accurate read alignment for resequencing.,"

Motivation

Next-generation sequence analysis has become an important task both in laboratory and clinical settings. A key stage in the majority sequence analysis workflows, such as resequencing, is the alignment of genomic reads to a reference genome. The accurate alignment of reads with large indels is a computationally challenging task for researchers.

Results

We introduce SeqAlto as a new algorithm for read alignment. For reads longer than or equal to 100 bp, SeqAlto is up to 10 × faster than existing algorithms, while retaining high accuracy and the ability to align reads with large (up to 50 bp) indels. This improvement in efficiency is particularly important in the analysis of future sequencing data where the number of reads approaches many billions. Furthermore, SeqAlto uses less than 8 GB of memory to align against the human genome. SeqAlto is benchmarked against several existing tools with both real and simulated data.

Availability

Linux and Mac OS X binaries free for academic use are available at http://www.stanford.edu/group/wonglab/seqalto

Contact

whwong@stanford.edu.",2012-07-18 +25023870,Annotation of gene function in citrus using gene expression information and co-expression networks.,"

Background

The genus Citrus encompasses major cultivated plants such as sweet orange, mandarin, lemon and grapefruit, among the world's most economically important fruit crops. With increasing volumes of transcriptomics data available for these species, Gene Co-expression Network (GCN) analysis is a viable option for predicting gene function at a genome-wide scale. GCN analysis is based on a ""guilt-by-association"" principle whereby genes encoding proteins involved in similar and/or related biological processes may exhibit similar expression patterns across diverse sets of experimental conditions. While bioinformatics resources such as GCN analysis are widely available for efficient gene function prediction in model plant species including Arabidopsis, soybean and rice, in citrus these tools are not yet developed.

Results

We have constructed a comprehensive GCN for citrus inferred from 297 publicly available Affymetrix Genechip Citrus Genome microarray datasets, providing gene co-expression relationships at a genome-wide scale (33,000 transcripts). The comprehensive citrus GCN consists of a global GCN (condition-independent) and four condition-dependent GCNs that survey the sweet orange species only, all citrus fruit tissues, all citrus leaf tissues, or stress-exposed plants. All of these GCNs are clustered using genome-wide, gene-centric (guide) and graph clustering algorithms for flexibility of gene function prediction. For each putative cluster, gene ontology (GO) enrichment and gene expression specificity analyses were performed to enhance gene function, expression and regulation pattern prediction. The guide-gene approach was used to infer novel roles of genes involved in disease susceptibility and vitamin C metabolism, and graph-clustering approaches were used to investigate isoprenoid/phenylpropanoid metabolism in citrus peel, and citric acid catabolism via the GABA shunt in citrus fruit.

Conclusions

Integration of citrus gene co-expression networks, functional enrichment analysis and gene expression information provide opportunities to infer gene function in citrus. We present a publicly accessible tool, Network Inference for Citrus Co-Expression (NICCE, http://citrus.adelaide.edu.au/nicce/home.aspx), for the gene co-expression analysis in citrus.",2014-07-15 +23314327,Scotty: a web tool for designing RNA-Seq experiments to measure differential gene expression.,

Motivation

A common question arises at the beginning of every experiment where RNA-Seq is used to detect differential gene expression between two conditions: How many reads should we sequence?

Results

Scotty is an interactive web-based application that assists biologists to design an experiment with an appropriate sample size and read depth to satisfy the user-defined experimental objectives. This design can be based on data available from either pilot samples or publicly available datasets.

Availability

Scotty can be freely accessed on the web at http://euler.bc.edu/marthlab/scotty/scotty.php,2013-01-12 +21496247,CDAO-store: ontology-driven data integration for phylogenetic analysis.,"

Background

The Comparative Data Analysis Ontology (CDAO) is an ontology developed, as part of the EvoInfo and EvoIO groups supported by the National Evolutionary Synthesis Center, to provide semantic descriptions of data and transformations commonly found in the domain of phylogenetic analysis. The core concepts of the ontology enable the description of phylogenetic trees and associated character data matrices.

Results

Using CDAO as the semantic back-end, we developed a triple-store, named CDAO-Store. CDAO-Store is a RDF-based store of phylogenetic data, including a complete import of TreeBASE. CDAO-Store provides a programmatic interface, in the form of web services, and a web-based front-end, to perform both user-defined as well as domain-specific queries; domain-specific queries include search for nearest common ancestors, minimum spanning clades, filter multiple trees in the store by size, author, taxa, tree identifier, algorithm or method. In addition, CDAO-Store provides a visualization front-end, called CDAO-Explorer, which can be used to view both character data matrices and trees extracted from the CDAO-Store. CDAO-Store provides import capabilities, enabling the addition of new data to the triple-store; files in PHYLIP, MEGA, nexml, and NEXUS formats can be imported and their CDAO representations added to the triple-store.

Conclusions

CDAO-Store is made up of a versatile and integrated set of tools to support phylogenetic analysis. To the best of our knowledge, CDAO-Store is the first semantically-aware repository of phylogenetic data with domain-specific querying capabilities. The portal to CDAO-Store is available at http://www.cs.nmsu.edu/~cdaostore.",2011-04-15 +24594419,Design and implementation of handheld and desktop software for the structured reporting of hepatic masses using the LI-RADS schema.,"

Rationale and objectives

The Liver Imaging Reporting and Data System (LI-RADS) can enhance communication between radiologists and clinicians if applied consistently. We identified an institutional need to improve liver imaging report standardization and developed handheld and desktop software to serve this purpose.

Materials and methods

We developed two complementary applications that implement the LI-RADS schema. A mobile application for iOS devices written in the Objective-C language allows for rapid characterization of hepatic observations under a variety of circumstances. A desktop application written in the Java language allows for comprehensive observation characterization and standardized report text generation. We chose the applications' languages and feature sets based on the computing resources of target platforms, anticipated usage scenarios, and ease of application installation, deployment, and updating.

Results

Our primary results are the publication of the core source code implementing the LI-RADS algorithm and the availability of the applications for use worldwide via our website, http://www.liradsapp.com/. The Java application is free open-source software that can be integrated into nearly any vendor's reporting system. The iOS application is distributed through Apple's iTunes App Store. Observation categorizations of both programs have been manually validated to be correct. The iOS application has been used to characterize liver tumors during multidisciplinary conferences of our institution, and several faculty members, fellows, and residents have adopted the generated text of Java application into their diagnostic reports.

Conclusions

Although these two applications were developed for the specific reporting requirements of our liver tumor service, we intend to apply this development model to other diseases as well. Through semiautomated structured report generation and observation characterization, we aim to improve patient care while increasing radiologist efficiency.",2014-04-01 +23256889,CYPSI: a structure-based interface for cytochrome P450s and ligands in Arabidopsis thaliana.,"

Background

The cytochrome P450 (CYP) superfamily enables terrestrial plants to adapt to harsh environments. CYPs are key enzymes involved in a wide range of metabolic pathways. It is particularly useful to be able to analyse the three-dimensional (3D) structure when investigating the interactions between CYPs and their substrates. However, only two plant CYP structures have been resolved. In addition, no currently available databases contain structural information on plant CYPs and ligands. Fortunately, the 3D structure of CYPs is highly conserved and this has made it possible to obtain structural information from template-based modelling (TBM).

Description

The CYP Structure Interface (CYPSI) is a platform for CYP studies. CYPSI integrated the 3D structures for 266 A. thaliana CYPs predicted by three TBM methods: BMCD, which we developed specifically for CYP TBM; and two well-known web-servers, MUSTER and I-TASSER. After careful template selection and optimization, the models built by BMCD were accurate enough for practical application, which we demonstrated using a docking example aimed at searching for the CYPs responsible for ABA 8'-hydroxylation. CYPSI also provides extensive resources for A. thaliana CYP structure and function studies, including 400 PDB entries for solved CYPs, 48 metabolic pathways associated with A. thaliana CYPs, 232 reported CYP ligands and 18 A. thaliana CYPs docked with ligands (61 complexes in total). In addition, CYPSI also includes the ability to search for similar sequences and chemicals.

Conclusions

CYPSI provides comprehensive structure and function information for A. thaliana CYPs, which should facilitate investigations into the interactions between CYPs and their substrates. CYPSI has a user-friendly interface, which is available at http://bioinfo.cau.edu.cn/CYPSI.",2012-12-20 +22034520,SVGMap: configurable image browser for experimental data.,"

Summary

Spatial data visualization is very useful to represent biological data and quickly interpret the results. For instance, to show the expression pattern of a gene in different tissues of a fly, an intuitive approach is to draw the fly with the corresponding tissues and color the expression of the gene in each of them. However, the creation of these visual representations may be a burdensome task. Here we present SVGMap, a java application that automatizes the generation of high-quality graphics for singular data items (e.g. genes) and biological conditions. SVGMap contains a browser that allows the user to navigate the different images created and can be used as a web-based results publishing tool.

Availability

SVGMap is freely available as precompiled java package as well as source code at http://bg.upf.edu/svgmap. It requires Java 6 and any recent web browser with JavaScript enabled. The software can be run on Linux, Mac OS X and Windows systems.

Contact

nuria.lopez@upf.edu",2011-10-27 +24051875,Surgical rehearsal platform: potential uses in microsurgery.,"Surgical training has remained remarkably similar in many respects since the early days of halstedian training. Neurosurgery is a demanding field that requires extensive cognitive, perceptive, and technical training. Surgical simulation is a promising approach to facilitate acquiring proficiency in neurosurgical procedures. Simulation can permit mentoring trainees in a ""safe"" environment. By incorporating images that depict specific abnormalities in actual patients, simulation can provide realistic rehearsal for any given case for both novice and experienced surgeons in much the same way that data acquired from drones can be used to allow pilots to rehearse mission-critical maneuvers in a simulator before taking flight. Most neurosurgical simulators to date have focused on endovascular procedures, spinal procedures, temporal bone dissection, and stereotactic procedures. The use of simulator technology for microsurgery is in its infancy. This article describes a novel simulator technology developed by Surgical Theater LLC (http://www.surgicaltheater.net/home.html) called the Selman Surgical Rehearsal Platform. The platform shows promise for use in intracranial microvascular procedures, which require experience that is becoming increasingly limited for trainees who have to become proficient in more procedures in much less time than ever before.",2013-10-01 +22178299,"SimTB, a simulation toolbox for fMRI data under a model of spatiotemporal separability.","We introduce SimTB, a MATLAB toolbox designed to simulate functional magnetic resonance imaging (fMRI) datasets under a model of spatiotemporal separability. The toolbox meets the increasing need of the fMRI community to more comprehensively understand the effects of complex processing strategies by providing a ground truth that estimation methods may be compared against. SimTB captures the fundamental structure of real data, but data generation is fully parameterized and fully controlled by the user, allowing for accurate and precise comparisons. The toolbox offers a wealth of options regarding the number and configuration of spatial sources, implementation of experimental paradigms, inclusion of tissue-specific properties, addition of noise and head movement, and much more. A straightforward data generation method and short computation time (3-10 seconds for each dataset) allow a practitioner to simulate and analyze many datasets to potentially understand a problem from many angles. Beginning MATLAB users can use the SimTB graphical user interface (GUI) to design and execute simulations while experienced users can write batch scripts to automate and customize this process. The toolbox is freely available at http://mialab.mrn.org/software together with sample scripts and tutorials.",2011-12-08 +23376705,Bother and distress associated with Peyronie's disease: validation of the Peyronie's disease questionnaire.,"

Purpose

We validated the Peyronie's Disease Questionnaire (http://www.auxilium.com/PDQ), a 15-question self-reported survey that measures the impact and severity of Peyronie's disease symptoms in 3 domains, including 1) psychological and physical symptoms, 2) penile pain and 3) symptom bother.

Materials and methods

We used baseline data from 2 phase 3 clinical trials (334 and 345 patients, respectively) of collagenase clostridium histolyticum treatment for Peyronie's disease associated penile curvature and bother. Collected data included PDQ domain scores, International Index of Erectile Function scores, objective penile curvature measures and patient reported Peyronie's disease symptom severity. Psychometric analyses included confirmatory factor analysis, inter-item reliability, and tests of convergent and discriminant validity, all related to the overall construct validity of the scale.

Results

Confirmatory factor analysis supported the conceptual framework of the PDQ with 3 confirmed subdomains. Each scale showed good consistency, ie internal reliability (each Cronbach α >0.70). Convergent and discriminant validity were noted in the pattern of associations between PDQ domains and other Peyronie's disease measures. PDQ domain scores significantly differed between patients with vs without erectile dysfunction and between patients with vs without Peyronie's disease related symptom distress, further supporting PDQ construct validity.

Conclusions

This study confirms the conceptual framework, factor structure, and convergent and discriminant validity of the PDQ psychological and physical symptoms, penile pain, and symptom bother domains. Used in conjunction with objective penile curvature measurements, the PDQ can serve as a valuable diagnostic tool or outcome measure to assess treatment related improvements in Peyronie's disease symptoms.",2013-01-31 +22685074,Empirical Bayes conditional independence graphs for regulatory network recovery.,"

Motivation

Computational inference methods that make use of graphical models to extract regulatory networks from gene expression data can have difficulty reconstructing dense regions of a network, a consequence of both computational complexity and unreliable parameter estimation when sample size is small. As a result, identification of hub genes is of special difficulty for these methods.

Methods

We present a new algorithm, Empirical Light Mutual Min (ELMM), for large network reconstruction that has properties well suited for recovery of graphs with high-degree nodes. ELMM reconstructs the undirected graph of a regulatory network using empirical Bayes conditional independence testing with a heuristic relaxation of independence constraints in dense areas of the graph. This relaxation allows only one gene of a pair with a putative relation to be aware of the network connection, an approach that is aimed at easing multiple testing problems associated with recovering densely connected structures.

Results

Using in silico data, we show that ELMM has better performance than commonly used network inference algorithms including GeneNet, ARACNE, FOCI, GENIE3 and GLASSO. We also apply ELMM to reconstruct a network among 5492 genes expressed in human lung airway epithelium of healthy non-smokers, healthy smokers and individuals with chronic obstructive pulmonary disease assayed using microarrays. The analysis identifies dense sub-networks that are consistent with known regulatory relationships in the lung airway and also suggests novel hub regulatory relationships among a number of genes that play roles in oxidative stress and secretion.

Availability and implementation

Software for running ELMM is made available at http://mezeylab.cb.bscb.cornell.edu/Software.aspx.

Contact

ramimahdi@yahoo.com or jgm45@cornell.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-06-08 +23640334,"The Taverna workflow suite: designing and executing workflows of Web Services on the desktop, web or in the cloud.","The Taverna workflow tool suite (http://www.taverna.org.uk) is designed to combine distributed Web Services and/or local tools into complex analysis pipelines. These pipelines can be executed on local desktop machines or through larger infrastructure (such as supercomputers, Grids or cloud environments), using the Taverna Server. In bioinformatics, Taverna workflows are typically used in the areas of high-throughput omics analyses (for example, proteomics or transcriptomics), or for evidence gathering methods involving text mining or data mining. Through Taverna, scientists have access to several thousand different tools and resources that are freely available from a large range of life science institutions. Once constructed, the workflows are reusable, executable bioinformatics protocols that can be shared, reused and repurposed. A repository of public workflows is available at http://www.myexperiment.org. This article provides an update to the Taverna tool suite, highlighting new features and developments in the workbench and the Taverna Server.",2013-05-02 +23282288,GLAD4U: deriving and prioritizing gene lists from PubMed literature.,"

Background

Answering questions such as ""Which genes are related to breast cancer?"" usually requires retrieving relevant publications through the PubMed search engine, reading these publications, and creating gene lists. This process is not only time-consuming, but also prone to errors.

Results

We report GLAD4U (Gene List Automatically Derived For You), a new, free web-based gene retrieval and prioritization tool. GLAD4U takes advantage of existing resources of the NCBI to ensure computational efficiency. The quality of gene lists created by GLAD4U for three Gene Ontology (GO) terms and three disease terms was assessed using corresponding ""gold standard"" lists curated in public databases. For all queries, GLAD4U gene lists showed very high recall but low precision, leading to low F-measure. As a comparison, EBIMed's recall was consistently lower than GLAD4U, but its precision was higher. To present the most relevant genes at the top of a list, we studied two prioritization methods based on publication count and the hypergeometric test, and compared the ranked lists and those generated by EBIMed to the gold standards. Both GLAD4U methods outperformed EBIMed for all queries based on a variety of quality metrics. Moreover, the hypergeometric method allowed for a better performance by thresholding genes with low scores. In addition, manual examination suggests that many false-positives could be explained by the incompleteness of the gold standards. The GLAD4U user interface accepts any valid queries for PubMed, and its output page displays the ranked gene list and information associated with each gene, chronologically-ordered supporting publications, along with a summary of the run and links for file export and functional enrichment and protein interaction network analysis.

Conclusions

GLAD4U has a high overall recall. Although precision is generally low, the prioritization methods successfully rank truly relevant genes at the top of the lists to facilitate efficient browsing. GLAD4U is simple to use, and its interface can be found at: http://bioinfo.vanderbilt.edu/glad4u.",2012-12-17 +22328783,The identification of short linear motif-mediated interfaces within the human interactome.,"

Motivation

Eukaryotic proteins are highly modular, containing multiple interaction interfaces that mediate binding to a network of regulators and effectors. Recent advances in high-throughput proteomics have rapidly expanded the number of known protein-protein interactions (PPIs); however, the molecular basis for the majority of these interactions remains to be elucidated. There has been a growing appreciation of the importance of a subset of these PPIs, namely those mediated by short linear motifs (SLiMs), particularly the canonical and ubiquitous SH2, SH3 and PDZ domain-binding motifs. However, these motif classes represent only a small fraction of known SLiMs and outside these examples little effort has been made, either bioinformatically or experimentally, to discover the full complement of motif instances.

Results

In this article, interaction data are analysed to identify and characterize an important subset of PPIs, those involving SLiMs binding to globular domains. To do this, we introduce iELM, a method to identify interactions mediated by SLiMs and add molecular details of the interaction interfaces to both interacting proteins. The method identifies SLiM-mediated interfaces from PPI data by searching for known SLiM-domain pairs. This approach was applied to the human interactome to identify a set of high-confidence putative SLiM-mediated PPIs.

Availability

iELM is freely available at http://elmint.embl.de

Contact

toby.gibson@embl.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-10 +22156918,A quantitative structure-activity relationship (QSAR) study on glycan array data to determine the specificities of glycan-binding proteins.,"Advances in glycan array technology have provided opportunities to automatically and systematically characterize the binding specificities of glycan-binding proteins. However, there is still a lack of robust methods for such analyses. In this study, we developed a novel quantitative structure-activity relationship (QSAR) method to analyze glycan array data. We first decomposed glycan chains into mono-, di-, tri- or tetrasaccharide subtrees. The bond information was incorporated into subtrees to help distinguish glycan chain structures. Then, we performed partial least-squares (PLS) regression on glycan array data using the subtrees as features. The application of QSAR to the glycan array data of different glycan-binding proteins demonstrated that PLS regression using subtree features can obtain higher R(2) values and a higher percentage of variance explained in glycan array intensities. Based on the regression coefficients of PLS, we were able to effectively identify subtrees that indicate the binding specificities of a glycan-binding protein. Our approach will facilitate the glycan-binding specificity analysis using the glycan array. A user-friendly web tool of the QSAR method is available at http://bci.clemson.edu/tools/glycan_array.",2011-12-07 +21352538,"A lightweight, flow-based toolkit for parallel and distributed bioinformatics pipelines.","

Background

Bioinformatic analyses typically proceed as chains of data-processing tasks. A pipeline, or 'workflow', is a well-defined protocol, with a specific structure defined by the topology of data-flow interdependencies, and a particular functionality arising from the data transformations applied at each step. In computer science, the dataflow programming (DFP) paradigm defines software systems constructed in this manner, as networks of message-passing components. Thus, bioinformatic workflows can be naturally mapped onto DFP concepts.

Results

To enable the flexible creation and execution of bioinformatics dataflows, we have written a modular framework for parallel pipelines in Python ('PaPy'). A PaPy workflow is created from re-usable components connected by data-pipes into a directed acyclic graph, which together define nested higher-order map functions. The successive functional transformations of input data are evaluated on flexibly pooled compute resources, either local or remote. Input items are processed in batches of adjustable size, all flowing one to tune the trade-off between parallelism and lazy-evaluation (memory consumption). An add-on module ('NuBio') facilitates the creation of bioinformatics workflows by providing domain specific data-containers (e.g., for biomolecular sequences, alignments, structures) and functionality (e.g., to parse/write standard file formats).

Conclusions

PaPy offers a modular framework for the creation and deployment of parallel and distributed data-processing workflows. Pipelines derive their functionality from user-written, data-coupled components, so PaPy also can be viewed as a lightweight toolkit for extensible, flow-based bioinformatics data-processing. The simplicity and flexibility of distributed PaPy pipelines may help users bridge the gap between traditional desktop/workstation and grid computing. PaPy is freely distributed as open-source Python code at http://muralab.org/PaPy, and includes extensive documentation and annotated usage examples.",2011-02-25 +23248008,McGenus: a Monte Carlo algorithm to predict RNA secondary structures with pseudoknots.,"We present McGenus, an algorithm to predict RNA secondary structures with pseudoknots. The method is based on a classification of RNA structures according to their topological genus. McGenus can treat sequences of up to 1000 bases and performs an advanced stochastic search of their minimum free energy structure allowing for non-trivial pseudoknot topologies. Specifically, McGenus uses a Monte Carlo algorithm with replica exchange for minimizing a general scoring function which includes not only free energy contributions for pair stacking, loop penalties, etc. but also a phenomenological penalty for the genus of the pairing graph. The good performance of the stochastic search strategy was successfully validated against TT2NE which uses the same free energy parametrization and performs exhaustive or partially exhaustive structure search, albeit for much shorter sequences (up to 200 bases). Next, the method was applied to other RNA sets, including an extensive tmRNA database, yielding results that are competitive with existing algorithms. Finally, it is shown that McGenus highlights possible limitations in the free energy scoring function. The algorithm is available as a web server at http://ipht.cea.fr/rna/mcgenus.php.",2012-12-16 +23918251,Distilled single-cell genome sequencing and de novo assembly for sparse microbial communities.,"

Motivation

Identification of every single genome present in a microbial sample is an important and challenging task with crucial applications. It is challenging because there are typically millions of cells in a microbial sample, the vast majority of which elude cultivation. The most accurate method to date is exhaustive single-cell sequencing using multiple displacement amplification, which is simply intractable for a large number of cells. However, there is hope for breaking this barrier, as the number of different cell types with distinct genome sequences is usually much smaller than the number of cells.

Results

Here, we present a novel divide and conquer method to sequence and de novo assemble all distinct genomes present in a microbial sample with a sequencing cost and computational complexity proportional to the number of genome types, rather than the number of cells. The method is implemented in a tool called Squeezambler. We evaluated Squeezambler on simulated data. The proposed divide and conquer method successfully reduces the cost of sequencing in comparison with the naïve exhaustive approach.

Availability

Squeezambler and datasets are available at http://compbio.cs.wayne.edu/software/squeezambler/.",2013-08-05 +23281852,Space-related pharma-motifs for fast search of protein binding motifs and polypharmacological targets.,"

Background

To discover a compound inhibiting multiple proteins (i.e. polypharmacological targets) is a new paradigm for the complex diseases (e.g. cancers and diabetes). In general, the polypharmacological proteins often share similar local binding environments and motifs. As the exponential growth of the number of protein structures, to find the similar structural binding motifs (pharma-motifs) is an emergency task for drug discovery (e.g. side effects and new uses for old drugs) and protein functions.

Results

We have developed a Space-Related Pharmamotifs (called SRPmotif) method to recognize the binding motifs by searching against protein structure database. SRPmotif is able to recognize conserved binding environments containing spatially discontinuous pharma-motifs which are often short conserved peptides with specific physico-chemical properties for protein functions. Among 356 pharma-motifs, 56.5% interacting residues are highly conserved. Experimental results indicate that 81.1% and 92.7% polypharmacological targets of each protein-ligand complex are annotated with same biological process (BP) and molecular function (MF) terms, respectively, based on Gene Ontology (GO). Our experimental results show that the identified pharma-motifs often consist of key residues in functional (active) sites and play the key roles for protein functions. The SRPmotif is available at http://gemdock.life.nctu.edu.tw/SRP/.

Conclusions

SRPmotif is able to identify similar pharma-interfaces and pharma-motifs sharing similar binding environments for polypharmacological targets by rapidly searching against the protein structure database. Pharma-motifs describe the conservations of binding environments for drug discovery and protein functions. Additionally, these pharma-motifs provide the clues for discovering new sequence-based motifs to predict protein functions from protein sequence databases. We believe that SRPmotif is useful for elucidating protein functions and drug discovery.",2012-12-13 +22859502,A robust approach to extract biomedical events from literature.,"

Motivation

The abundance of biomedical literature has attracted significant interest in novel methods to automatically extract biomedical relations from the literature. Until recently, most research was focused on extracting binary relations such as protein-protein interactions and drug-disease relations. However, these binary relations cannot fully represent the original biomedical data. Therefore, there is a need for methods that can extract fine-grained and complex relations known as biomedical events.

Results

In this article we propose a novel method to extract biomedical events from text. Our method consists of two phases. In the first phase, training data are mapped into structured representations. Based on that, templates are used to extract rules automatically. In the second phase, extraction methods are developed to process the obtained rules. When evaluated against the Genia event extraction abstract and full-text test datasets (Task 1), we obtain results with F-scores of 52.34 and 53.34, respectively, which are comparable to the state-of-the-art systems. Furthermore, our system achieves superior performance in terms of computational efficiency.

Availability

Our source code is available for academic use at http://dl.dropbox.com/u/10256952/BioEvent.zip.",2012-08-01 +23281853,FastAnnotator--an efficient transcript annotation web tool.,"

Background

Recent developments in high-throughput sequencing (HTS) technologies have made it feasible to sequence the complete transcriptomes of non-model organisms or metatranscriptomes from environmental samples. The challenge after generating hundreds of millions of sequences is to annotate these transcripts and classify the transcripts based on their putative functions. Because many biological scientists lack the knowledge to install Linux-based software packages or maintain databases used for transcript annotation, we developed an automatic annotation tool with an easy-to-use interface.

Methods

To elucidate the potential functions of gene transcripts, we integrated well-established annotation tools: Blast2GO, PRIAM and RPS BLAST in a web-based service, FastAnnotator, which can assign Gene Ontology (GO) terms, Enzyme Commission numbers (EC numbers) and functional domains to query sequences.

Results

Using six transcriptome sequence datasets as examples, we demonstrated the ability of FastAnnotator to assign functional annotations. FastAnnotator annotated 88.1% and 81.3% of the transcripts from the well-studied organisms Caenorhabditis elegans and Streptococcus parasanguinis, respectively. Furthermore, FastAnnotator annotated 62.9%, 20.4%, 53.1% and 42.0% of the sequences from the transcriptomes of sweet potato, clam, amoeba, and Trichomonas vaginalis, respectively, which lack reference genomes. We demonstrated that FastAnnotator can complete the annotation process in a reasonable amount of time and is suitable for the annotation of transcriptomes from model organisms or organisms for which annotated reference genomes are not avaiable.

Conclusions

The sequencing process no longer represents the bottleneck in the study of genomics, and automatic annotation tools have become invaluable as the annotation procedure has become the limiting step. We present FastAnnotator, which was an automated annotation web tool designed to efficiently annotate sequences with their gene functions, enzyme functions or domains. FastAnnotator is useful in transcriptome studies and especially for those focusing on non-model organisms or metatranscriptomes. FastAnnotator does not require local installation and is freely available at http://fastannotator.cgu.edu.tw.",2012-12-13 +23281624,A computational tool for the design of live attenuated virus vaccine based on microRNA-mediated gene silencing.,"

Background

The microRNA-based gene-silencing machinery has been recognized as a promising approach to control viral replication and used for improving safety for the live attenuated virus vaccines. The effective host microRNA response elements (MREs) have been incorporated into a virus sequence mainly based on the experimental trials for identifying both microRNA binding sites and effective mutations. The design of MREs for viral genomes or with multiple host microRNAs of interest, then, will be time and cost consuming.

Results

In this paper, we introduced a computational flow that could be used to design MREs of human microRNAs within Influenza A H1N1 virus gene segments. The main steps of the flow includes locating possible binding sites; MREs, of human microRNAs within the viral sequences using a miRNA target prediction tool (miranda), performing various mutations among mismatched binding positions, calculating the binding energy, score, identity, and the effects of changed physical properties of amino acids according to the changed bases in RNA level, and prioritizing the mutated binding sites. The top ranked MREs of human microRNA hsa-miR-93 is consistent with previous literature while other results waited to be experimentally verified. To make the computational flow easily accessible by virologists, we also developed MicroLive, a web server version of the MRE design flow together with the database of miranda-predicted MREs within gene sequences of seven RNA viruses including Influenza A, dengue, hepatitis C, measles, mumps, poliovirus, and rabies. Users may design MREs of specific human microRNAs for their input viral sequences using MRE design tool or optimize the miranda-predicted MREs of seven viruses available on the system. Also, users could design varied number of MREs for multiple human microRNAs to modulate the degree of live vaccine attenuation and reduce the likelihood of escape mutants.

Conclusions

The computational design of MREs helps reduce time and cost for experimental trials. While the flow was demonstrated using human microRNAs and Influenza A H1N1 virus, it could be flexibly applied to other hosts (e.g., animals) and viruses of interest for constructing host-specific live attenuated vaccines. Also, it could be deployed for engineering tissue-specific oncolytic viruses in cancer virotherapeutics. The MicroLive web server is freely accessible at http://www.biotec.or.th/isl/microlive.",2012-12-13 +23281648,C-mii: a tool for plant miRNA and target identification.,"

Background

MicroRNAs (miRNAs) have been known to play an important role in several biological processes in both animals and plants. Although several tools for miRNA and target identification are available, the number of tools tailored towards plants is limited, and those that are available have specific functionality, lack graphical user interfaces, and restrict the number of input sequences. Large-scale computational identifications of miRNAs and/or targets of several plants have been also reported. Their methods, however, are only described as flow diagrams, which require programming skills and the understanding of input and output of the connected programs to reproduce.

Results

To overcome these limitations and programming complexities, we proposed C-mii as a ready-made software package for both plant miRNA and target identification. C-mii was designed and implemented based on established computational steps and criteria derived from previous literature with the following distinguishing features. First, software is easy to install with all-in-one programs and packaged databases. Second, it comes with graphical user interfaces (GUIs) for ease of use. Users can identify plant miRNAs and targets via step-by-step execution, explore the detailed results from each step, filter the results according to proposed constraints in plant miRNA and target biogenesis, and export sequences and structures of interest. Third, it supplies bird's eye views of the identification results with infographics and grouping information. Fourth, in terms of functionality, it extends the standard computational steps of miRNA target identification with miRNA-target folding and GO annotation. Fifth, it provides helper functions for the update of pre-installed databases and automatic recovery. Finally, it supports multi-project and multi-thread management.

Conclusions

C-mii constitutes the first complete software package with graphical user interfaces enabling computational identification of both plant miRNA genes and miRNA targets. With the provided functionalities, it can help accelerate the study of plant miRNAs and targets, especially for small and medium plant molecular labs without bioinformaticians. C-mii is freely available at http://www.biotec.or.th/isl/c-mii for both Windows and Ubuntu Linux platforms.",2012-12-13 +25483703,Immune camouflage: relevance to vaccines and human immunology.,"High strain sequence variability, interference with innate immune mechanisms, and epitope deletion are all examples of strategies that pathogens have evolved to subvert host defenses. To this list we would add another strategy: immune camouflage. Pathogens whose epitope sequences are cross-conserved with multiple human proteins at the TCR-facing residues may be exploiting ""ignorance and tolerance,"" which are mechanisms by which mature T cells avoid immune responses to self-antigens. By adopting amino acid configurations that may be recognized by autologous regulatory T cells, pathogens may be actively suppressing protective immunity. Using the new JanusMatrix TCR-homology-mapping tool, we have identified several such 'camouflaged' tolerizing epitopes that are present in the viral genomes of pathogens such as emerging H7N9 influenza. Thus in addition to the overall low number of T helper epitopes that is present in H7 hemaglutinin (as described previously, see http://dx.doi.org/10.4161/hv.24939), the presence of such tolerizing epitopes in H7N9 could explain why, in recent vaccine trials, whole H7N9-HA was poorly immunogenic and associated with low seroconversion rates (see http://dx.doi.org/10.4161/hv.28135). In this commentary, we provide an overview of the immunoinformatics process leading to the discovery of tolerizing epitopes in pathogen genomic sequences, provide a brief summary of laboratory data that validates the discovery, and point the way forward. Removal of viral, bacterial and parasite tolerizing epitopes may permit researchers to develop more effective vaccines and immunotherapeutics in the future.",2014-01-01 +23792804,"Web-based educational activities developed by the Society for Neuroscience in Anesthesiology and Critical Care (SNACC): the experience of process, utilization, and expert evaluation.","

Background

Web-based delivery of educational material by scientific societies appears to have increased recently. However, the utilization of such efforts by the members of professional societies is unknown. We report the experience with delivery of educational resources on the Web site of the Society for Neuroscience in Anesthesiology and Critical Care (SNACC), and utilization of those resources by members.

Methods

Three web-based educational initiatives were developed over 1 year to be disseminated through the SNACC Web site (http://www.snacc.org) for society members: (1) The SNACC Bibliography; (2) ""Chat with the Author""; and (3) Clinical Case Discussions. Content experts and authors of important new research publications were invited to contribute. Member utilization data were abstracted with the help of the webmaster.

Results

For the bibliography, there were 1175 page requests during the 6-month period after its launch by 122/664 (19%) distinct SNACC members. The bibliography was utilized by 107/553 (19%) of the active members and 15/91 (16.5%) of the trainee members. The ""Chats with the Authors"" were viewed by 56 (9%) members and the Clinical Case Discussions by 51 (8%) members.

Conclusions

Educational resources can be developed in a timely manner utilizing member contributions without additional financial implications. However, the member utilization of these resources was lower than expected. These are first estimates of utilization of web-based educational resources by members of a scientific society. Further evaluation of such utilization by members of other societies as well as measures of the effectiveness and impact of such activities is needed.",2014-01-01 +24389653,SNPdryad: predicting deleterious non-synonymous human SNPs using only orthologous protein sequences.,"

Motivation

The recent advances in genome sequencing have revealed an abundance of non-synonymous polymorphisms among human individuals; subsequently, it is of immense interest and importance to predict whether such substitutions are functional neutral or have deleterious effects. The accuracy of such prediction algorithms depends on the quality of the multiple-sequence alignment, which is used to infer how an amino acid substitution is tolerated at a given position. Because of the scarcity of orthologous protein sequences in the past, the existing prediction algorithms all include sequences of protein paralogs in the alignment, which can dilute the conservation signal and affect prediction accuracy. However, we believe that, with the sequencing of a large number of mammalian genomes, it is now feasible to include only protein orthologs in the alignment and improve the prediction performance.

Results

We have developed a novel prediction algorithm, named SNPdryad, which only includes protein orthologs in building a multiple sequence alignment. Among many other innovations, SNPdryad uses different conservation scoring schemes and uses Random Forest as a classifier. We have tested SNPdryad on several datasets. We found that SNPdryad consistently outperformed other methods in several performance metrics, which is attributed to the exclusion of paralogous sequence. We have run SNPdryad on the complete human proteome, generating prediction scores for all the possible amino acid substitutions.

Availability and implementation

The algorithm and the prediction results can be accessed from the Web site: http://snps.ccbr.utoronto.ca:8080/SNPdryad/ CONTACT: Zhaolei.Zhang@utoronto.ca Supplementary information: Supplementary data are available at Bioinformatics online.",2014-01-02 +23233129,Development of universal genetic markers based on single-copy orthologous (COSII) genes in Poaceae.,"KEY MESSAGE : We develop a set of universal genetic markers based on single-copy orthologous (COSII) genes in Poaceae. Being evolutionary conserved, single-copy orthologous (COSII) genes are particularly useful in comparative mapping and phylogenetic investigation among species. In this study, we identified 2,684 COSII genes based on five sequenced Poaceae genomes including rice, maize, sorghum, foxtail millet, and brachypodium, and then developed 1,072 COSII markers whose transferability and polymorphism among five bamboo species were further evaluated with 46 pairs of randomly selected primers. 91.3 % of the 46 primers obtained clear amplification in at least one bamboo species, and 65.2 % of them produced polymorphism in more than one species. We also used 42 of them to construct the phylogeny for the five bamboo species, and it might reflect more precise evolutionary relationship than the one based on the vegetative morphology. The results indicated a promising prospect of applying these markers to the investigation of genetic diversity and the classification of Poaceae. To ease and facilitate access of the information of common interest to readers, a web-based database of the COSII markers is provided ( http://www.sicau.edu.cn/web/yms/PCOSWeb/PCOS.html ).",2012-12-12 +21777084,Discovery of protein complexes with core-attachment structures from Tandem Affinity Purification (TAP) data.,"Many cellular functions involve protein complexes that are formed by multiple interacting proteins. Tandem Affinity Purification (TAP) is a popular experimental method for detecting such multi-protein interactions. However, current computational methods that predict protein complexes from TAP data require converting the co-complex relationships in TAP data into binary interactions. The resulting pairwise protein-protein interaction (PPI) network is then mined for densely connected regions that are identified as putative protein complexes. Converting the TAP data into PPI data not only introduces errors but also loses useful information about the underlying multi-protein relationships that can be exploited to detect the internal organization (i.e., core-attachment structures) of protein complexes. In this article, we propose a method called CACHET that detects protein complexes with Core-AttaCHment structures directly from bipartitETAP data. CACHET models the TAP data as a bipartite graph in which the two vertex sets are the baits and the preys, respectively. The edges between the two vertex sets represent bait-prey relationships. CACHET first focuses on detecting high-quality protein-complex cores from the bipartite graph. To minimize the effects of false positive interactions, the bait-prey relationships are indexed with reliability scores. Only non-redundant, reliable bicliques computed from the TAP bipartite graph are regarded as protein-complex cores. CACHET constructs protein complexes by including attachment proteins into the cores. We applied CACHET on large-scale TAP datasets and found that CACHET outperformed existing methods in terms of prediction accuracy (i.e., F-measure and functional homogeneity of predicted complexes). In addition, the protein complexes predicted by CACHET are equipped with core-attachment structures that provide useful biological insights into the inherent functional organization of protein complexes. Our supplementary material can be found at http://www1.i2r.a-star.edu.sg/~xlli/CACHET/CACHET.htm ; binary executables can also be found there. Supplementary Material is also available at www.liebertonline.com/cmb.",2011-07-21 +24940857,Congenital diseases of the gastrointestinal tract.,"With the rapid increase in knowledge on the genetic origin of diseases within the gastrointestinal tract the number of congenital diseases, which already manifest during childhood have drastically increased. Due to the large application of molecular genetics the number is steadily increasing. To make the access to these rare diseases fast and efficient the data base of the National Library of Medicine (Online Mendelian Inheritance of Man - OMIN) is a very helpful online tool, with which all these disease entities can be found easily (http://www.ncbi.nlm.nih.gov/omim). Detailed tables are given to find most of the congenitally inherited disease, which affect the gastrointestinal tract. A variety of congenital diarrheas with disturbances of digestion, hydrolysis, absorption and secretion is described in detail: lactose intolerance, sucrose intolerance, glucose-galactose malabsorption, fructose malabsorption, trehalase and enterokinase deficiency, congenital chloride and sodium diarrhea, congenital hypomagnesaemia, primary bile acid malabsorption, acrodermatitis enteropathica and Menke's syndrome. Also described in detail are diseases with structural anomalies of the intestine like microvillous inclusion disease, congenital tufting enteropathy and IPEX syndrome. The diagnosis in the disturbances of carbohydrate hydrolysis or absorption can be established by H2-breath tests after appropriate sugar challenge. Treatment consists of elimination of the responsible sugar from the diet. The diagnosis of the congenital secretory diarrheas is established by investigation of electrolytes in blood and stool. Substitution of high doses of the responsible mineral can improve the clinical outcome. In acrodermatitis enteropathica low serum zinc level together with the typical skin lesions guide to the diagnosis. High doses of oral zinc aspartate can cure the symptoms of the disease. The diagnosis of structural congenital lesions of the intestine can be established by histology and/or electron microscopy and molecular identification of the responsible mutations. The treatment of these diseases is difficult and therefore the prognosis remains poor. Immunosupressive therapy, total parenteral nutrition and even intestinal or bone marrow transplantation are the only choice for treatment.",2014-05-01 +25327087,An assessment of standardisation of HbA1c testing across clinical laboratories in India and its impact on diabetes management.,"

Objectives

This study is aimed at evaluating the degree of standardisation of HbA1c and glucose testing across accredited laboratories in India.

Methods

The information declared on the scope of testing by 147 medical laboratories accredited by the National Accreditation Board for Testing and Calibration Laboratories (NABL) across India was used by the authors for this study (http://www.nabl-india.org). This information on the scope of testing is available within the public domain and is accessible through the NABL website and covered laboratories accredited between 2009 and 2012. We focussed on HbA1c and glucose tests offered by laboratories and documented the way tests were named, the methodologies used and the degree of confidence in testing based on the coefficient of variation (CV). The data was independently reviewed by two medical biochemists and then subjected to analysis.

Results

Although the glucose test appeared to be ubiquitous, HbA1c assays appeared on the scope of testing in 87.1% of the laboratories. The HbA1c tests however appear to be poorly standardised across laboratories. We noted gross differences in test nomenclature, methodology and analytical performance across laboratories.

Conclusion

This is one of the first studies that has focussed on the standards of laboratory care for diabetes management in India. The study highlights the lack of standardisation in nomenclature, analytical performance and methodology of tests used for HbA1c in NABL accredited laboratories across India. Affirmative actions in terms of improved regulation, patient advocacy, further studies on impact of laboratory quality and education of physicians, healthcare providers, laboratorians may improve harmonisation and quality of patient care in diabetes in India.",2014-01-01 +23281941,Personalized cloud-based bioinformatics services for research and education: use cases and the elasticHPC package.,"

Background

Bioinformatics services have been traditionally provided in the form of a web-server that is hosted at institutional infrastructure and serves multiple users. This model, however, is not flexible enough to cope with the increasing number of users, increasing data size, and new requirements in terms of speed and availability of service. The advent of cloud computing suggests a new service model that provides an efficient solution to these problems, based on the concepts of ""resources-on-demand"" and ""pay-as-you-go"". However, cloud computing has not yet been introduced within bioinformatics servers due to the lack of usage scenarios and software layers that address the requirements of the bioinformatics domain.

Results

In this paper, we provide different use case scenarios for providing cloud computing based services, considering both the technical and financial aspects of the cloud computing service model. These scenarios are for individual users seeking computational power as well as bioinformatics service providers aiming at provision of personalized bioinformatics services to their users. We also present elasticHPC, a software package and a library that facilitates the use of high performance cloud computing resources in general and the implementation of the suggested bioinformatics scenarios in particular. Concrete examples that demonstrate the suggested use case scenarios with whole bioinformatics servers and major sequence analysis tools like BLAST are presented. Experimental results with large datasets are also included to show the advantages of the cloud model.

Conclusions

Our use case scenarios and the elasticHPC package are steps towards the provision of cloud based bioinformatics services, which would help in overcoming the data challenge of recent biological research. All resources related to elasticHPC and its web-interface are available at http://www.elasticHPC.org.",2012-12-13 +24172184,A risk scoring system to predict in-hospital mortality in patients with cirrhosis presenting with upper gastrointestinal bleeding.,"

Goals

We aimed to develop a simple and practical risk scoring system to predict in-hospital mortality in cirrhotics presenting with upper gastrointestinal (GI) bleeding.

Study

Extensive clinical data were captured in patients with documented cirrhosis who underwent endoscopic evaluation for upper GI bleeding between January 1, 2003 and June 30, 2011 at Parkland Memorial Hospital. Predictors of mortality were identified by multivariate regression analysis.

Results

A total of 884 patients with cirrhosis admitted for upper GI bleeding were identified; 809 patients survived and 75 died (8.4%). The etiology of bleeding was similar in both groups, with bleeding attributed to esophageal varices in 59% of survivors and 60% of non-survivors (ulcer disease and other etiologies of bleeding accounted for the other causes of bleeding). Mortality was 8.6% and 8.3% in patients with variceal bleeding and nonvariceal bleeding, respectively. While survivors and those who died were similarly matched with regard to gender, age, ethnicity and etiology of cirrhosis, patients who died had lower systolic blood pressures, higher pulse rates and lower mean arterial pressures at admission than patients who survived. Non-survivors were more likely to be Childs C (61% vs. 19%, P<0.001). Multivariate regression analysis identified the following 4 predictors of in-hospital mortality: use of vasoactive pressors, number of packed red blood cells transfused, model for end-stage liver disease (MELD) score, and serum albumin. A receiver operating characteristic curve including these 4 variables yielded an area under the receiver operating characteristic (AUROC) curve of 0.94 (95% confidence interval, 0.91-0.98). Classification and Regression Tree analysis yielded similar results, identifying vasoactive pressors and then MELD>21 as the most important decision nodes for predicting death. By comparison, using the Rockall scoring system in the same patients, the AUROC curve was 0.70 (95% confidence interval, 0.64-0.76 and the comparison of the University of Texas Southwestern model to the Rockall model revealed P<0.0001). A validation set comprised of 150 unique admissions between July 1, 2011 and July 31, 2012, had an AUROC of 0.92, and the outcomes of 97% of the subjects in this set were accurately predicted by the risk score model.

Conclusions

Use of vasoactive agents, packed red blood cell transfusion, albumin, and MELD score were highly predictive of in-hospital mortality in cirrhotics presenting with upper GI bleeding. These variables were used to formulate a clinical risk scoring system for in-hospital mortality, which is available at: http://medweb.musc.edu/LogisticModelPredictor.",2014-09-01 +21948615,NTP-CERHR expert panel report on the developmental toxicity of soy infant formula.,"Soy infant formula contains soy protein isolates and is fed to infants as a supplement to or replacement for human milk or cow milk. Soy protein isolates contains estrogenic isoflavones (phytoestrogens) that occur naturally in some legumes, especially soybeans. Phytoestrogens are nonsteroidal, estrogenic compounds. In plants, nearly all phytoestrogens are bound to sugar molecules and these phytoestrogen-sugar complexes are not generally considered hormonally active. Phytoestrogens are found in many food products in addition to soy infant formula, especially soy-based foods such as tofu, soy milk, and in some over-the-counter dietary supplements. Soy infant formula was selected for National Toxicology Program (NTP) evaluation because of (1) the availability of large number of developmental toxicity studies in laboratory animals exposed to the isoflavones found in soy infant formula (namely, genistein) or other soy products, as well as few studies on human infants fed soy infant formula, (2) the availability of information on exposures in infants fed soy infant formula, and (3) public concern for effects on infant or child development. On October 2, 2008 (73 FR 57360), the NTP Center for the Evaluation of Risks to Human Reproduction (CERHR) announced its intention to conduct an updated review of soy infant formula to complete a previous evaluation that was initiated in 2005. Both the current and previous evaluations relied on expert panels to assist the NTP in developing its conclusions on the potential developmental effects associated with the use of soy infant formula, presented in the NTP Brief on Soy Infant Formula. The initial expert panel met on March 15 to 17, 2006, to reach conclusions on the potential developmental and reproductive toxicities of soy infant formula and its predominant isoflavone constituent genistein. The expert panel reports were released for public comment on May 5, 2006 (71 FR 28368). On November 8, 2006 (71 FR 65537), CERHR staff released draft NTP Briefs on Genistein and Soy Formula that provided the NTP's interpretation of the potential for genistein and soy infant formula to cause adverse reproductive and/or developmental effects in exposed humans. However, CERHR did not complete these evaluations, finalize the briefs, or issue NTP Monographs on these substances based on this initial evaluation. Between 2006 and 2009, a substantial number of new publications related to human exposure or reproductive and/or developmental toxicity were published for these substances. Thus, CERHR determined that updated evaluations of genistein and soy infant formula were needed. However, the current evaluation focuses only on soy infant formula and the potential developmental toxicity of its major isoflavone components, e.g. genistein, daidzein (and estrogenic metabolite, equol), and glycitein. This updated evaluation does not include an assessment on the potential reproductive toxicity of genistein following exposures during adulthood as was carried out in the 2006 evaluation. CERHR narrowed the scope of the evaluation because the assessment of reproductive effects of genistein following exposure to adults was not considered relevant to the consideration of soy infant formula use in infants during the 2006 evaluation. To obtain updated information about soy infant formula for the CERHR evaluation, the PubMed (Medline) database was searched from February 2006 to August 2009 with genistein/genistin, daidzein/daidzin, glycitein/glycitin, equol, soy, and other relevant keywords. References were also identified from the bibliographies of published literature. The updated expert panel report represents the efforts of a 14-member panel of government and nongovernment scientists, and was prepared with assistance from NTP staff. The finalized report, released on January 15, 2010 (75 FR 2545), reflects consideration of public comments received on a draft report that was released on October 19, 2009, for public comment and discussions that occurred at a public meeting of the expert panel held December 16 to 18, 2009 (74 FR 53509). The finalized report presents conclusions on (1) the strength of scientific evidence that soy infant formula or its isoflavone constituents are developmental toxicants based on data from in vitro, animal, or human studies; (2) the extent of exposures in infants fed soy infant formula; (3) the assessment of the scientific evidence that adverse developmental health effects may be associated with such exposures; and (4) knowledge gaps that will help establish research and testing priorities to reduce uncertainties and increase confidence in future evaluations. The Expert Panel expressed minimal concern for adverse developmental effects in infants fed soy infant formula. This level of concern represents a ""2"" on the five-level scale of concern used by the NTP that ranges from negligible concern (""1"") to serious concern (""5""). The Expert Panel Report on Soy Infant Formula was considered extensively by NTP staff in preparing the 2010 NTP Brief on Soy Infant Formula, which represents the NTP's opinion on the potential for exposure to soy infant formula to cause adverse developmental effects in humans. The NTP concurred with the expert panel that there is minimal concern for adverse effects on development in infants who consume soy infant formula. This conclusion was based on information about soy infant formula provided in the expert panel report, public comments received during the course of the expert panel evaluation, additional scientific information made available since the expert panel meeting, and peer reviewer critiques of the draft NTP Brief by the NTP Board of Scientific Counselors (BSC) on May 10, 2010 (Meeting materials are available at http://ntp.niehs.nih.gov/go/9741.). The BSC voted in favor of the minimal concern conclusion with 7 yes votes, 3 no votes, and 0 abstentions. One member thought that the conclusion should be negligible concern and two members thought that the level of concern should be higher than minimal concern. The NTP's response to the May 10, 2010 review (""peer-review report"") is available on the NTP website at http://ntp.niehs.nih.gov/go/9741. The monograph includes the NTP Brief on Soy Infant Formula as well as the entire final Expert Panel Report on Soy Infant Formula. Public comments received as part of the NTP's evaluation of soy infant formula and other background materials are available at http://cerhr.niehs.nih.gov/evals/index.html.",2011-09-21 +23468467,"A novel model-based meta-analysis to indirectly estimate the comparative efficacy of two medications: an example using DPP-4 inhibitors, sitagliptin and linagliptin, in treatment of type 2 diabetes mellitus. ","To develop a longitudinal statistical model to indirectly estimate the comparative efficacies of two drugs, using model-based meta-analysis (MBMA). Comparison of two oral dipeptidyl peptidase (DPP)-4 inhibitors, sitagliptin and linagliptin, for type 2 diabetes mellitus (T2DM) treatment was used as an example. Systematic review with MBMA. MEDLINE, EMBASE, http://www.ClinicalTrials.gov, Cochrane review of DPP-4 inhibitors for T2DM, sitagliptin trials on Food and Drug Administration website to December 2011 and linagliptin data from the manufacturer. Double-blind, randomised controlled clinical trials, ≥12 weeks' duration, that analysed sitagliptin or linagliptin efficacies as changes in glycated haemoglobin (HbA1c) levels, in adults with T2DM and HbA1c >7%, irrespective of background medication. MODEL DEVELOPMENT AND APPLICATION: A Bayesian model was fitted (Markov Chain Monte Carlo method). The final model described HbA1c levels as function of time, dose, baseline HbA1c, washout status/duration and ethnicity. Other covariates showed no major impact on model parameters and were not included. For the indirect comparison, a population of 1000 patients was simulated from the model with a racial composition reflecting the average racial distribution of the linagliptin trials, and baseline HbA1c of 8%. The model was developed using longitudinal data from 11 234 patients (10 linagliptin, 15 sitagliptin trials), and assessed by internal evaluation techniques, demonstrating that the model adequately described the observations. Simulations showed both linagliptin 5 mg and sitagliptin 100 mg reduced HbA1c by 0.81% (placebo-adjusted) at week 24. Credible intervals for participants without washout were -0.88 to -0.75 (linagliptin) and -0.89 to -0.73 (sitagliptin), and for those with washout, -0.91 to -0.76 (linagliptin) and -0.91 to -0.75 (sitagliptin). This study demonstrates the use of longitudinal MBMA in the field of diabetes treatment. Based on an example evaluating HbA1c reduction with linagliptin versus sitagliptin, the model used seems a valid approach for indirect drug comparisons.",2013-03-05 +24787002,Stability of bivariate GWAS biomarker detection.,"Given the difficulty and effort required to confirm candidate causal SNPs detected in genome-wide association studies (GWAS), there is no practical way to definitively filter false positives. Recent advances in algorithmics and statistics have enabled repeated exhaustive search for bivariate features in a practical amount of time using standard computational resources, allowing us to use cross-validation to evaluate the stability. We performed 10 trials of 2-fold cross-validation of exhaustive bivariate analysis on seven Wellcome-Trust Case-Control Consortium GWAS datasets, comparing the traditional [Formula: see text] test for association, the high-performance GBOOST method and the recently proposed GSS statistic (Available at http://bioinformatics.research.nicta.com.au/software/gwis/). We use Spearman's correlation to measure the similarity between the folds of cross validation. To compare incomplete lists of ranks we propose an extension to Spearman's correlation. The extension allows us to consider a natural threshold for feature selection where the correlation is zero. This is the first reported cross-validation study of exhaustive bivariate GWAS feature selection. We found that stability between ranked lists from different cross-validation folds was higher for GSS in the majority of diseases. A thorough analysis of the correlation between SNP-frequency and univariate [Formula: see text] score demonstrated that the [Formula: see text] test for association is highly confounded by main effects: SNPs with high univariate significance replicably dominate the ranked results. We show that removal of the univariately significant SNPs improves [Formula: see text] replicability but risks filtering pairs involving SNPs with univariate effects. We empirically confirm that the stability of GSS and GBOOST were not affected by removal of univariately significant SNPs. These results suggest that the GSS and GBOOST tests are successfully targeting bivariate association with phenotype and that GSS is able to reliably detect a larger set of SNP-pairs than GBOOST in the majority of the data we analysed. However, the [Formula: see text] test for association was confounded by main effects.",2014-04-30 +23986568,lDDT: a local superposition-free score for comparing protein structures and models using distance difference tests.,"

Motivation

The assessment of protein structure prediction techniques requires objective criteria to measure the similarity between a computational model and the experimentally determined reference structure. Conventional similarity measures based on a global superposition of carbon α atoms are strongly influenced by domain motions and do not assess the accuracy of local atomic details in the model.

Results

The Local Distance Difference Test (lDDT) is a superposition-free score that evaluates local distance differences of all atoms in a model, including validation of stereochemical plausibility. The reference can be a single structure, or an ensemble of equivalent structures. We demonstrate that lDDT is well suited to assess local model quality, even in the presence of domain movements, while maintaining good correlation with global measures. These properties make lDDT a robust tool for the automated assessment of structure prediction servers without manual intervention.

Availability and implementation

Source code, binaries for Linux and MacOSX, and an interactive web server are available at http://swissmodel.expasy.org/lddt.

Contact

torsten.schwede@unibas.ch.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-08-27 +25247743,Perceptions about e-cigarette safety may lead to e-smoking during pregnancy.,"Electronic cigarettes (e-cigarettes) are nicotine-delivery devices that are increasingly used, especially by young people. Because e-cigarettes lack many of the substances found in regular tobacco, they are often perceived as a safer smoking alternative, especially in high-risk situations such as pregnancy. However, studies suggest that it is exposure to nicotine that is most detrimental to prenatal development. The authors studied perceptions of tobacco and e-cigarette health risks using a multiple-choice survey. To study the perceived safety of e-cigarettes versus tobacco cigarettes, 184 modified Global Health Youth Surveys (WHO, http://www.who.int/tobacco/surveillance/gyts/en/ ) were completed electronically or on paper. Age range, smoking status, and perceptions about tobacco cigarettes and e-cigarettes were studied. The results verified that younger people use e-cigarettes more than older people. Tobacco cigarettes were perceived as more harmful than e-cigarettes to health in general, including lung cancer and pregnancy. Although more research is necessary, the authors postulate that the perception that e-cigarettes are safer during pregnancy may induce pregnant women to use these devices more freely. Given that nicotine is known to cause fetal harm, pregnant mothers who smoke e-cigarettes could cause even greater harm to the fetus because e-cigarettes are perceived as being safer than tobacco cigarettes. Until more data about the effects of nicotine during pregnancy are available, the authors advocate for labeling of e-cigarettes as potentially harmful, at least during pregnancy.",2014-01-01 +30743683,First Report of Erysiphe quercicola Causing Powdery Mildew on Ubame Oak in Korea.,"Ubame oak (Quercus phillyraeoides A. Gray) is native to eastern Asia, including China, Korea, and Japan. In 2009 and 2010, a powdery mildew on Q. phillyraeoides growing in clusters and singly was observed in three locations on the campus of Chonnam National University, Gwangju, Korea. White superficial conidia of the powdery mildew fungus occurred on adaxial and abaxial surfaces. However, the white powdery growth was more abundant on the adaxial surface. Leaf symptoms commonly appeared white from May to October. Along with the typical white powdery mildew, spot and/or necrotic symptoms with irregular violet-to-wine red surfaces were also frequently observed on overwintered leaves. A voucher specimen has been deposited in EML (Environmental Microbiology Laboratory) herbarium collection, Chonnam National University (EML-QUP1). Conidia were commonly formed singly but also occurred in chains. Primary conidia were obovoid to ellipsoid, with a rounded apex and subtruncate base. Secondary conidia were generally obovoid to ellipsoid or sometimes cylindrical but dolioform when mature. The size was 30.1 to 43.2 (average 37.7) × 14.1 to 21.1 (average 18.1) μm with length/width ratio of 1.8 to 2.4 (average 2.1). Conidiophores were erect and up to 102.2 μm long. No chasmothecia were found. From extracted genomic DNA, the internal transcribed spacer (ITS) region inclusive of 5.8S rDNA was amplified with ITS1F (5'-CTTGGT CATTTAGAGGAAGT-3') and LR5F (5'-GCTATCCTGAGGGAAAC-3') primers (4). Sequence analysis by BLASTN search indicated that EML-QUP1 (GenBank Accession No. HQ328834) was closest to E. quercicola (GenBank Accession No. AB292691) with >99% identity (478 of 480), forming a monophyletic quercicola clade in the resulting phylogenetic analysis. The causal fungus was determined to be Erysiphe quercicola on the basis of morphology and sequence data analysis. Major genera including Cystotheca, Erysiphe, Microsphaera, and Phyllactinia have been reported to cause powdery mildews on Quercus plants. Until now, 22 Erysiphe species including E. abbreviata, E. alphitoides, E. calocladophora, E. gracilis, E. polygoni, and E. quercicola have been reported to cause powdery mildews on Quercus spp. (1). Of these, four Erysiphe species including E. alphitoides, E. gracilis, E. quercicola, and an unidentified Erysiphe sp. have been found on Q. phillyraeoides from Japan (1-3). E. quercicola was reported to occur on five Quercus species: Q. crispula, Q. phillyraeoides, and Q. serrata in Japan, Q. robur in Australia, and Quercus sp. in Australia, Iran, and Thailand (1). To our knowledge, this is the first report of leaf powdery mildew caused by E. quercicola on Q. phillyraeoides in Korea. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved October 7, 2010, from http://nt.ars-grin.gov/fungaldatabases/ , 2010. (2) S. Limkaisang et al. Mycoscience 47:327, 2006. (3) S. Takamatsu et al. Mycol. Res. 111:809, 2007. (4) T. J. White et al. PCR Protocols: A Guide to Methods and Applications. Academic Press, San Diego, 1990.",2011-01-01 +23716643,R3D Align web server for global nucleotide to nucleotide alignments of RNA 3D structures.,"The R3D Align web server provides online access to 'RNA 3D Align' (R3D Align), a method for producing accurate nucleotide-level structural alignments of RNA 3D structures. The web server provides a streamlined and intuitive interface, input data validation and output that is more extensive and easier to read and interpret than related servers. The R3D Align web server offers a unique Gallery of Featured Alignments, providing immediate access to pre-computed alignments of large RNA 3D structures, including all ribosomal RNAs, as well as guidance on effective use of the server and interpretation of the output. By accessing the non-redundant lists of RNA 3D structures provided by the Bowling Green State University RNA group, R3D Align connects users to structure files in the same equivalence class and the best-modeled representative structure from each group. The R3D Align web server is freely accessible at http://rna.bgsu.edu/r3dalign/.",2013-05-28 +23110720,Fast and accurate haplotype frequency estimation for large haplotype vectors from pooled DNA data.,"

Background

Typically, the first phase of a genome wide association study (GWAS) includes genotyping across hundreds of individuals and validation of the most significant SNPs. Allelotyping of pooled genomic DNA is a common approach to reduce the overall cost of the study. Knowledge of haplotype structure can provide additional information to single locus analyses. Several methods have been proposed for estimating haplotype frequencies in a population from pooled DNA data.

Results

We introduce a technique for haplotype frequency estimation in a population from pooled DNA samples focusing on datasets containing a small number of individuals per pool (2 or 3 individuals) and a large number of markers. We compare our method with the publicly available state-of-the-art algorithms HIPPO and HAPLOPOOL on datasets of varying number of pools and marker sizes. We demonstrate that our algorithm provides improvements in terms of accuracy and computational time over competing methods for large number of markers while demonstrating comparable performance for smaller marker sizes. Our method is implemented in the ""Tree-Based Deterministic Sampling Pool"" (TDSPool) package which is available for download at http://www.ee.columbia.edu/~anastas/tdspool.

Conclusions

Using a tree-based determinstic sampling technique we present an algorithm for haplotype frequency estimation from pooled data. Our method demonstrates superior performance in datasets with large number of markers and could be the method of choice for haplotype frequency estimation in such datasets.",2012-10-30 +23368516,Expanding the boundaries of local similarity analysis.,"

Background

Pairwise comparison of time series data for both local and time-lagged relationships is a computationally challenging problem relevant to many fields of inquiry. The Local Similarity Analysis (LSA) statistic identifies the existence of local and lagged relationships, but determining significance through a p-value has been algorithmically cumbersome due to an intensive permutation test, shuffling rows and columns and repeatedly calculating the statistic. Furthermore, this p-value is calculated with the assumption of normality -- a statistical luxury dissociated from most real world datasets.

Results

To improve the performance of LSA on big datasets, an asymptotic upper bound on the p-value calculation was derived without the assumption of normality. This change in the bound calculation markedly improved computational speed from O(pm²n) to O(m²n), where p is the number of permutations in a permutation test, m is the number of time series, and n is the length of each time series. The bounding process is implemented as a computationally efficient software package, FASTLSA, written in C and optimized for threading on multi-core computers, improving its practical computation time. We computationally compare our approach to previous implementations of LSA, demonstrate broad applicability by analyzing time series data from public health, microbial ecology, and social media, and visualize resulting networks using the Cytoscape software.

Conclusions

The FASTLSA software package expands the boundaries of LSA allowing analysis on datasets with millions of co-varying time series. Mapping metadata onto force-directed graphs derived from FASTLSA allows investigators to view correlated cliques and explore previously unrecognized network relationships. The software is freely available for download at: http://www.cmde.science.ubc.ca/hallam/fastLSA/.",2013-01-21 +24507755,Systematic characterization of small RNAome during zebrafish early developmental stages.,"

Background

During early vertebrate development, various small non-coding RNAs (sRNAs) such as MicroRNAs (miRNAs) and Piwi-interacting RNAs (piRNAs) are dynamically expressed for orchestrating the maternal-to-zygotic transition (MZT). Systematic analysis of expression profiles of zebrafish small RNAome will be greatly helpful for understanding the sRNA regulation during embryonic development.

Results

We first determined the expression profiles of sRNAs during eight distinct stages of early zebrafish development by sRNA-seq technology. Integrative analyses with a new computational platform of CSZ (characterization of small RNAome for zebrafish) demonstrated an sRNA class transition from piRNAs to miRNAs as development proceeds. We observed that both the abundance and diversity of miRNAs are gradually increased, while the abundance is enhanced more dramatically than the diversity during development. However, although both the abundance and diversity of piRNAs are gradually decreased, the diversity was firstly increased then rapidly decreased. To evaluate the computational accuracy, the expression levels of four known miRNAs were experimentally validated. We also predicted 25 potentially novel miRNAs, whereas two candidates were verified by Northern blots.

Conclusions

Taken together, our analyses revealed the piRNA to miRNA transition as a conserved mechanism in zebrafish, although two different types of sRNAs exhibit distinct expression dynamics in abundance and diversity, respectively. Our study not only generated a better understanding for sRNA regulations in early zebrafish development, but also provided a useful platform for analyzing sRNA-seq data. The CSZ was implemented in Perl and freely downloadable at: http://csz.biocuckoo.org.",2014-02-10 +21409563,CONNJUR spectrum translator: an open source application for reformatting NMR spectral data.,"NMR spectroscopists are hindered by the lack of standardization for spectral data among the file formats for various NMR data processing tools. This lack of standardization is cumbersome as researchers must perform their own file conversion in order to switch between processing tools and also restricts the combination of tools employed if no conversion option is available. The CONNJUR Spectrum Translator introduces a new, extensible architecture for spectrum translation and introduces two key algorithmic improvements. This first is translation of NMR spectral data (time and frequency domain) to a single in-memory data model to allow addition of new file formats with two converter modules, a reader and a writer, instead of writing a separate converter to each existing format. Secondly, the use of layout descriptors allows a single fid data translation engine to be used for all formats. For the end user, sophisticated metadata readers allow conversion of the majority of files with minimum user configuration. The open source code is freely available at http://connjur.sourceforge.net for inspection and extension.",2011-03-16 +23095231,BSAC standardized disc susceptibility testing method (version 11).,"This article highlights key amendments incorporated into version 11 of the BSAC standardized disc susceptibility testing method, available as Supplementary data at JAC Online (http://jac.oxfordjournals.org/) and on the BSAC web site (http://bsac.org.uk/susceptibility/guidelines-standardized-disc-susceptibility-testing-method/). The basic disc susceptibility testing method remains unchanged, but there have been a number of alterations to the interpretive criteria for certain organism/drug combinations due to continuing harmonization with the EUCAST MIC breakpoints and constant efforts to improve the reliability and clinical applicability of the guidance.",2012-10-24 +21824973,A biclustering algorithm for extracting bit-patterns from binary datasets.,"

Motivation

Binary datasets represent a compact and simple way to store data about the relationships between a group of objects and their possible properties. In the last few years, different biclustering algorithms have been specially developed to be applied to binary datasets. Several approaches based on matrix factorization, suffix trees or divide-and-conquer techniques have been proposed to extract useful biclusters from binary data, and these approaches provide information about the distribution of patterns and intrinsic correlations.

Results

A novel approach to extracting biclusters from binary datasets, BiBit, is introduced here. The results obtained from different experiments with synthetic data reveal the excellent performance and the robustness of BiBit to density and size of input data. Also, BiBit is applied to a central nervous system embryonic tumor gene expression dataset to test the quality of the results. A novel gene expression preprocessing methodology, based on expression level layers, and the selective search performed by BiBit, based on a very fast bit-pattern processing technique, provide very satisfactory results in quality and computational cost. The power of biclustering in finding genes involved simultaneously in different cancer processes is also shown. Finally, a comparison with Bimax, one of the most cited binary biclustering algorithms, shows that BiBit is faster while providing essentially the same results.

Availability

The source and binary codes, the datasets used in the experiments and the results can be found at: http://www.upo.es/eps/bigs/BiBit.html

Contact

dsrodbae@upo.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-08 +24523864,DiME: a scalable disease module identification algorithm with application to glioma progression.,"Disease module is a group of molecular components that interact intensively in the disease specific biological network. Since the connectivity and activity of disease modules may shed light on the molecular mechanisms of pathogenesis and disease progression, their identification becomes one of the most important challenges in network medicine, an emerging paradigm to study complex human disease. This paper proposes a novel algorithm, DiME (Disease Module Extraction), to identify putative disease modules from biological networks. We have developed novel heuristics to optimise Community Extraction, a module criterion originally proposed for social network analysis, to extract topological core modules from biological networks as putative disease modules. In addition, we have incorporated a statistical significance measure, B-score, to evaluate the quality of extracted modules. As an application to complex diseases, we have employed DiME to investigate the molecular mechanisms that underpin the progression of glioma, the most common type of brain tumour. We have built low (grade II)--and high (GBM)--grade glioma co-expression networks from three independent datasets and then applied DiME to extract potential disease modules from both networks for comparison. Examination of the interconnectivity of the identified modules have revealed changes in topology and module activity (expression) between low- and high- grade tumours, which are characteristic of the major shifts in the constitution and physiology of tumour cells during glioma progression. Our results suggest that transcription factors E2F4, AR and ETS1 are potential key regulators in tumour progression. Our DiME compiled software, R/C++ source code, sample data and a tutorial are available at http://www.cs.bham.ac.uk/~szh/DiME.",2014-02-11 +21641563,TAAPP: Tiling Array Analysis Pipeline for Prokaryotes.,"High-density tiling arrays provide closer view of transcription than regular microarrays and can also be used for annotating functional elements in genomes. The identified transcripts usually have a complex overlapping architecture when compared to the existing genome annotation. Therefore, there is a need for customized tiling array data analysis tools. Since most of the initial tiling arrays were conducted in eukaryotes, data analysis methods are well suited for eukaryotic genomes. For using whole-genome tiling arrays to identify previously unknown transcriptional elements like small RNA and antisense RNA in prokaryotes, existing data analysis tools need to be tailored for prokaryotic genome architecture. Furthermore, automation of such custom data analysis workflow is necessary for biologists to apply this powerful platform for knowledge discovery. Here we describe TAAPP, a web-based package that consists of two modules for prokaryotic tiling array data analysis. The transcript generation module works on normalized data to generate transcriptionally active regions (TARs). The feature extraction and annotation module then maps TARs to existing genome annotation. This module further categorizes the transcription profile into potential novel non-coding RNA, antisense RNA, gene expression and operon structures. The implemented workflow is microarray platform independent and is presented as a web-based service. The web interface is freely available for academic use at http://lims.lsbi.mafes.msstate.edu/TAAPP-HTML/.",2011-04-01 +21414991,Customizable views on semantically integrated networks for systems biology.,"

Motivation

The rise of high-throughput technologies in the post-genomic era has led to the production of large amounts of biological data. Many of these datasets are freely available on the Internet. Making optimal use of these data is a significant challenge for bioinformaticians. Various strategies for integrating data have been proposed to address this challenge. One of the most promising approaches is the development of semantically rich integrated datasets. Although well suited to computational manipulation, such integrated datasets are typically too large and complex for easy visualization and interactive exploration.

Results

We have created an integrated dataset for Saccharomyces cerevisiae using the semantic data integration tool Ondex, and have developed a view-based visualization technique that allows for concise graphical representations of the integrated data. The technique was implemented in a plug-in for Cytoscape, called OndexView. We used OndexView to investigate telomere maintenance in S. cerevisiae.

Availability

The Ondex yeast dataset and the OndexView plug-in for Cytoscape are accessible at http://bsu.ncl.ac.uk/ondexview.",2011-03-16 +23217001,Association of MTHFR Ala222Val (rs1801133) polymorphism and breast cancer susceptibility: An update meta-analysis based on 51 research studies.,"

Background

The association between MTHFR Ala222Val polymorphism and breast cancer (BC) risk are inconclusive. To derive a more precise estimation of the relationship, a systematic review and meta-analysis was performed.

Methods

A comprehensive search was conducted through researching MEDLINE, EMBASE, PubMed, Web of Science, Chinese Biomedical Literature database (CBM) and China National Knowledge Infrastructure (CNKI) databases before August 2012. Crude odds ratios (ORs) with 95% confidence intervals (CIs) were calculated to estimate the strength of the association.

Results

A total of 51 studies including 20,907 cases and 23,905 controls were involved in this meta-analysis. Overall, significant associations were found between MTHFR Ala222Val polymorphism and BC risk when all studies pooled into the meta-analysis (Ala/Ala vs Val/Val: OR=0.870, 95%CI=0.789-0.958,P=0.005; Ala/Val vs Val/Val: OR=0.895, 95%CI=0.821-0.976, P=0.012; dominant model: OR=0.882, 95%CI=0.808-0.963, P=0.005; and recessive model: OR = 0.944, 95%CI=0.898-0.993, P=0.026; Ala allele vs Val allele: OR = 0.935, 95%CI=0.887-0.986, P=0.013). In the subgroup analysis by ethnicity, the same results were found in Asian populations, while no significant associations were found for all comparison models in other Ethnicity populations.

Conclusion

In conclusion, our meta-analysis provides the evidence that MTHFR Ala222Val gene polymorphisms contributed to the breast cancer development.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1966146911851976.",2012-12-07 +21401061,"PeakML/mzMatch: a file format, Java library, R library, and tool-chain for mass spectrometry data analysis.","The recent proliferation of high-resolution mass spectrometers has generated a wealth of new data analysis methods. However, flexible integration of these methods into configurations best suited to the research question is hampered by heterogeneous file formats and monolithic software development. The mzXML, mzData, and mzML file formats have enabled uniform access to unprocessed raw data. In this paper we present our efforts to produce an equally simple and powerful format, PeakML, to uniformly exchange processed intermediary and result data. To demonstrate the versatility of PeakML, we have developed an open source Java toolkit for processing, filtering, and annotating mass spectra in a customizable pipeline (mzMatch), as well as a user-friendly data visualization environment (PeakML Viewer). The PeakML format in particular enables the flexible exchange of processed data between software created by different groups or companies, as we illustrate by providing a PeakML-based integration of the widely used XCMS package with mzMatch data processing tools. As an added advantage, downstream analysis can benefit from direct access to the full mass trace information underlying summarized mass spectrometry results, providing the user with the means to rapidly verify results. The PeakML/mzMatch software is freely available at http://mzmatch.sourceforge.net, with documentation, tutorials, and a community forum.",2011-03-14 +21974739,New resources for functional analysis of omics data for the genus Aspergillus.,"

Background

Detailed and comprehensive genome annotation can be considered a prerequisite for effective analysis and interpretation of omics data. As such, Gene Ontology (GO) annotation has become a well accepted framework for functional annotation. The genus Aspergillus comprises fungal species that are important model organisms, plant and human pathogens as well as industrial workhorses. However, GO annotation based on both computational predictions and extended manual curation has so far only been available for one of its species, namely A. nidulans.

Results

Based on protein homology, we mapped 97% of the 3,498 GO annotated A. nidulans genes to at least one of seven other Aspergillus species: A. niger, A. fumigatus, A. flavus, A. clavatus, A. terreus, A. oryzae and Neosartorya fischeri. GO annotation files compatible with diverse publicly available tools have been generated and deposited online. To further improve their accessibility, we developed a web application for GO enrichment analysis named FetGOat and integrated GO annotations for all Aspergillus species with public genome sequences. Both the annotation files and the web application FetGOat are accessible via the Broad Institute's website (http://www.broadinstitute.org/fetgoat/index.html). To demonstrate the value of those new resources for functional analysis of omics data for the genus Aspergillus, we performed two case studies analyzing microarray data recently published for A. nidulans, A. niger and A. oryzae.

Conclusions

We mapped A. nidulans GO annotation to seven other Aspergilli. By depositing the newly mapped GO annotation online as well as integrating it into the web tool FetGOat, we provide new, valuable and easily accessible resources for omics data analysis and interpretation for the genus Aspergillus. Furthermore, we have given a general example of how a well annotated genome can help improving GO annotation of related species to subsequently facilitate the interpretation of omics data.",2011-10-05 +23219434,"Systematic annotation and analysis of ""virmugens""-virulence factors whose mutants can be used as live attenuated vaccines.","Live attenuated vaccines are usually generated by mutation of genes encoding virulence factors. ""Virmugen"" is coined here to represent a gene that encodes for a virulent factor of a pathogen and has been proven feasible in animal models to make a live attenuated vaccine by knocking out this gene. Not all virulence factors are virmugens. VirmugenDB is a web-based virmugen database (http://www.violinet.org/virmugendb). Currently, VirmugenDB includes 225 virmugens that have been verified to be valuable for vaccine development against 57 bacterial, viral, and protozoan pathogens. Bioinformatics analysis has revealed significant patterns in virmugens. For example, 10 Gram-negative and 1 Gram-positive bacterial aroA genes are virmugens. A sequence analysis has revealed at least 50% of identities in the protein sequences of the 10 Gram-negative bacterial aroA virmugens. As a pathogen case study, Brucella virmugens were analyzed. Out of 15 verified Brucella virmugens, 6 are related to carbohydrate or nucleotide transport and metabolism, and 2 involving cell membrane biogenesis. In addition, 54 virmugens from 24 viruses and 12 virmugens from 4 parasites are also stored in VirmugenDB. Virmugens tend to involve metabolism of nutrients (e.g., amino acids, carbohydrates, and nucleotides) and cell membrane formation. Host genes whose expressions were regulated by virmugen mutation vaccines or wild type virulent pathogens have also been annotated and systematically compared. The bioinformatics annotation and analysis of virmugens helps to elucidate enriched virmugen profiles and the mechanisms of protective immunity, and further supports rational vaccine design.",2012-12-06 +24885169,"Histological comparison between preoperative and surgical specimens of non-small cell lung cancer for distinguishing between ""squamous"" and ""non-squamous"" cell carcinoma.","

Background

Non-small cell lung cancers (NSCLCs) are frequently heterogeneous and in approximately 70% of cases, NSCLCs are diagnosed and staged by small biopsies or cytology rather than by examination of surgically resected specimens. Thus, in most patients, the diagnosis is established based on examination of preoperative specimens alone. Recently, classification of NSCLC into pathologic subtypes has been shown to be important for selecting the appropriate systemic therapy, from both the point of view of treatment efficacy and prevention of toxicity.

Methods

We retrospectively reviewed the data of 225 patients to compare the preoperative classification of the NSCLC subtype on biopsy specimens with the postoperative classification based on examination of the resected specimens, in order to compare the accuracy of the two for the diagnosis of various histological subtypes of NSCLC.

Results

In 169 of the 225 (75.1%) patients, the preoperative diagnosis was definite malignancy. Histologically, the final pathologic diagnosis made from the surgical specimens was adenocarcinoma (ADC) in 169 patients, and in 75.5% of these cases, the diagnosis was concordant with the preoperative diagnosis. Among the patients who had squamous cell carcinoma (SQC) in the preoperative specimens, the diagnosis was concordant with the preoperative diagnosis in 65.7% of cases. Misclassified preoperative biopsies included an even number of SQCs and ADCs, with all the misclassified biopsies being ADCs morphologically mimicking SQC due to solid growth. Significantly higher specificity, negative predictive value and accuracy were observed for the diagnosis of SQC.

Conclusions

Our study suggested that the concordance rates for diagnosis of the NSCLC subtypes, especially the ""squamous"" or ""non-squamous"" histologies, between preoperative and surgical specimens were satisfactory, as compared with previous reports. Therefore, pretreatment diagnosis of lung cancer using small samples is reasonable for selecting the optimal treatment. However, in order not to lose the opportunity for selecting an effective treatment, we should be aware that the diagnosis in preoperative small samples might be different from that based on examination of the surgical specimens.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2032698427120488.",2014-05-29 +23559362,Comparative stability of the bioresorbable ferric crosslinked hyaluronic acid adhesion prevention solutions.,"The Intergel® ferric crosslinked hyaluronate (FeHA) adhesion prevention solution (APS) (FDA) is associated with serious post-operative complications (Henley, http://www.lawyersandsettlements.com/features/gynecare-intergel/intergel-timeline.html, 2007; FDA, 2003; Roman et al., Fertil Steril 2005, 83 Suppl 1:1113-1118; Tang et al., Ann Surg 2006;243(4):449-455; Wiseman, Fertil Steril 2006;86(3):771; Wiseman, Fertil Steril 2006;85(4):e7). This prompted us to examine the in situ stability of crosslinked HA materials to hyaluronidase lyase degradation. Variables such as ferric ionic crosslink density, HA concentration, gel geometry, and molecular weight (MW) of HA polymer were studied. Various formulations of the crosslinked ""in house"" [Isayeva et al., J Biomed Mater Res: Part B - Appl Biomater 2010, 95B (1):9-18] FeHA (0.5%, w/v; 30, 50, 90% crosslinked), the Intergel® FeHA (0.5%, w/v; 90%), and the non-crosslinked HA (0.05-0.5%, w/v) were degraded at a fixed activity of hyaluronidase lyase from Streptomyces hyalurolyticus (Hyase) at 37°C over time according to the method [Payan et al., J Chrom B: Biomed Sci Appl 1991;566(1):9-18]. Under our conditions, the data show that the crosslink density affects degradation the most, followed by HA concentration and then gel geometry. We found that MW has no effect. Our results are one possible explanation of the observations that the Intergel® FeHA APS (0.5%, w/v; 90%) material persisted an order of magnitude longer than expected [t1/2 = 500 hrs vs. t1/2 = 50 hrs (FDA; Johns et al., Fertil Steril 1997;68(1):37-42)]. These data also demonstrate the sensitivity of the in vitro hyaluronidase assay to predict the in situ stability of crosslinked HA medical products as previously reported [Sall et al., Polym Degrad Stabil 2007;92(5):915-919].",2013-04-04 +23220571,SM2miR: a database of the experimentally validated small molecules' effects on microRNA expression.,"

Unlabelled

The inappropriate expression of microRNAs (miRNAs) is closely related with disease diagnosis, prognosis and therapy response. Recently, many studies have demonstrated that bioactive small molecules (or drugs) can regulate miRNA expression, which indicates that targeting miRNAs with small molecules is a new therapy for human diseases. In this study, we established the SM2miR database, which recorded 2925 relationships between 151 small molecules and 747 miRNAs in 17 species after manual curation from nearly 2000 articles. Each entry contains the detailed information about small molecules, miRNAs and evidences of their relationships, such as species, miRBase Accession number, DrugBank Accession number, PubChem Compound Identifier (CID), expression pattern of miRNA, experimental method, tissues or conditions for detection. SM2miR database has a user-friendly interface to retrieve by miRNA or small molecule. In addition, we offered a submission page. Thus, SM2miR provides a fairly comprehensive repository about the influences of small molecules on miRNA expression, which will promote the development of miRNA therapeutics.

Availability

SM2miR is freely available at http://bioinfo.hrbmu.edu.cn/SM2miR/.",2012-12-05 +23221176,Using binary classification to prioritize and curate articles for the Comparative Toxicogenomics Database.,"We report on the original integration of an automatic text categorization pipeline, so-called ToxiCat (Toxicogenomic Categorizer), that we developed to perform biomedical documents classification and prioritization in order to speed up the curation of the Comparative Toxicogenomics Database (CTD). The task can be basically described as a binary classification task, where a scoring function is used to rank a selected set of articles. Then components of a question-answering system are used to extract CTD-specific annotations from the ranked list of articles. The ranking function is generated using a Support Vector Machine, which combines three main modules: an information retrieval engine for MEDLINE (EAGLi), a gene normalization service (NormaGene) developed for a previous BioCreative campaign and finally, a set of answering components and entity recognizer for diseases and chemicals. The main components of the pipeline are publicly available both as web application and web services. The specific integration performed for the BioCreative competition is available via a web user interface at http://pingu.unige.ch:8080/Toxicat.",2012-12-05 +23109668,Using force to visualize conformational activation of integrins.,"The development of biophysical approaches to analyze integrin-ligand binding allows us to visualize in real time the conformational changes that shift the bond affinity between low- and high-affinity states. In this issue, Chen et al. (2012. J. Cell Biol. http://dx.doi.org/jcb.201201091) use these approaches to validate some aspects of the classical integrin regulation model; however, their data suggest that much of the regulation occurs after ligand binding rather than in preparation for ligand binding to occur.",2012-10-01 +24202542,PREDDIMER: a web server for prediction of transmembrane helical dimers.,"

Summary

Here we present PREDDIMER, a web tool for prediction of dimer structure of transmembrane (TM) helices. PREDDIMER allows (i) reconstruction of a number of dimer structures for given sequence(s) of TM protein fragments, (ii) ranking and filtering of predicted structures according to respective values of a scoring function, (iii) visualization of predicted 3D dimer structures and (iv) visualization of surface hydrophobicity of TM helices and their contacting (interface) regions represented as 2D maps.

Results

We implemented online the original PREDDIMER algorithm and benchmarked the server on 11 TM sequences, whose 3D dimer conformations were obtained previously by nuclear magnetic resonance spectroscopy. In the most of tested cases backbone root-mean-square deviations of closest predicted conformations from the experimental reference are below 3 Å. A randomization test displays good anticorrelation (-0.82) between values of the scoring function and statistical significance of the prediction 'by chance'. Going beyond a single dimer conformation, our web tool predicts an ensemble of possible conformations, which may be useful for explanation of a functioning of bitopic membrane proteins, e.g. receptor tyrosine kinases.

Availability and implementation

PREDDIMER can be accessed for free on the web at http://model.nmr.ru/preddimer/

Contact

newant@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-11-07 +23219548,Renal perfusion pump vs cold storage for donation after cardiac death kidneys: a systematic review.,"

Purpose

Static cold storage is generally used to preserve kidney allografts from deceased donors. Hypothermic machine perfusion may improve the outcome after transplantation but few studies with limited power have addressed this issue. We reviewed evidence of the effectiveness of storing kidneys from deceased donors after cardiac death before transplantation using cold static storage solution or pulsatile hypothermic machine perfusion.

Materials and methods

We searched electronic databases in September 2011 for systematic reviews and/or meta-analyses, randomized, controlled trials and studies of other designs that compared delayed graft function and graft survival. Sources included The Cochrane Library, PubMed® and EMBASE®. Studies excluded from review included those that did not discriminate between donation after cardiac death and donation from a neurologically deceased donor. Primary outcomes were delayed graft function and 1-year graft survival. Statistical analysis was done using RevMan (http://ims.cochrane.org/revman).

Results

Nine studies qualified for review. Pulsatile perfusion pumped kidneys from donation after cardiac death donors had decreased delayed graft function compared to kidneys placed in cold storage (OR 0.64, 95% CI 0.43-0.95, p = 0.03). There was a trend toward improved 1-year graft survival in the pulsatile perfusion group but statistical significance was not attained (OR 0.74, 95% CI 0.48-1.13, p = 0.17).

Conclusions

Pulsatile machine perfusion of donation after cardiac death kidneys appears to decrease the delayed graft function rate. We noted no benefit in 1-year graft survival. Due to the great heterogeneity among the trials as well as several confounding factors, the overall impact on allograft function and survival requires more study.",2012-12-03 +23228854,Identifying differentially spliced genes from two groups of RNA-seq samples.,"Recent study revealed that most human genes have alternative splicing and can produce multiple isoforms of transcripts. Differences in the relative abundance of the isoforms of a gene can have significant biological consequences. Identifying genes that are differentially spliced between two groups of RNA-sequencing samples is an important basic task in the study of transcriptomes with next-generation sequencing technology. We use the negative binomial (NB) distribution to model sequencing reads on exons, and propose a NB-statistic to detect differentially spliced genes between two groups of samples by comparing read counts on all exons. The method opens a new exon-based approach instead of isoform-based approach for the task. It does not require information about isoform composition, nor need the estimation of isoform expression. Experiments on simulated data and real RNA-seq data of human kidney and liver samples illustrated the method's good performance and applicability. It can also detect previously unknown alternative splicing events, and highlight exons that are most likely differentially spliced between the compared samples. We developed an NB-statistic method that can detect differentially spliced genes between two groups of samples without using a prior knowledge on the annotation of alternative splicing. It does not need to infer isoform structure or to estimate isoform expression. It is a useful method designed for comparing two groups of RNA-seq samples. Besides identifying differentially spliced genes, the method can highlight on the exons that contribute the most to the differential splicing. We developed a software tool called DSGseq for the presented method available at http://bioinfo.au.tsinghua.edu.cn/software/DSGseq.",2012-12-08 +24196532,Do you know the sex of your cells?,"Do you know the sex of your cells? Not a question that is frequently heard around the lab bench, yet thanks to recent research is probably one that should be asked. It is self-evident that cervical epithelial cells would be derived from female tissue and prostate cells from a male subject (exemplified by HeLa and LnCaP, respectively), yet beyond these obvious examples, it would be true to say that the sex of cell lines derived from non-reproductive tissue, such as lung, intestine, kidney, for example, is given minimal if any thought. After all, what possible impact could the presence of a Y chromosome have on the biochemistry and cell biology of tissues such as the exocrine pancreatic acini? Intriguingly, recent evidence has suggested that far from being irrelevant, genes expressed on the sex chromosomes can have a marked impact on the biology of such diverse tissues as neurons and renal cells. It is also policy of AJP-Cell Physiology that the source of all cells utilized (species, sex, etc.) should be clearly indicated when submitting an article for publication, an instruction that is rarely followed (http://www.the-aps.org/mm/Publications/Info-For-Authors/Composition). In this review we discuss recent data arguing that the sex of cells being used in experiments can impact the cell's biology, and we provide a table outlining the sex of cell lines that have appeared in AJP-Cell Physiology over the past decade.",2013-11-06 +22088843,Inferring gene regulatory networks from gene expression data by path consistency algorithm based on conditional mutual information.,"

Motivation

Reconstruction of gene regulatory networks (GRNs), which explicitly represent the causality of developmental or regulatory process, is of utmost interest and has become a challenging computational problem for understanding the complex regulatory mechanisms in cellular systems. However, all existing methods of inferring GRNs from gene expression profiles have their strengths and weaknesses. In particular, many properties of GRNs, such as topology sparseness and non-linear dependence, are generally in regulation mechanism but seldom are taken into account simultaneously in one computational method.

Results

In this work, we present a novel method for inferring GRNs from gene expression data considering the non-linear dependence and topological structure of GRNs by employing path consistency algorithm (PCA) based on conditional mutual information (CMI). In this algorithm, the conditional dependence between a pair of genes is represented by the CMI between them. With the general hypothesis of Gaussian distribution underlying gene expression data, CMI between a pair of genes is computed by a concise formula involving the covariance matrices of the related gene expression profiles. The method is validated on the benchmark GRNs from the DREAM challenge and the widely used SOS DNA repair network in Escherichia coli. The cross-validation results confirmed the effectiveness of our method (PCA-CMI), which outperforms significantly other previous methods. Besides its high accuracy, our method is able to distinguish direct (or causal) interactions from indirect associations.

Availability

All the source data and code are available at: http://csb.shu.edu.cn/subweb/grn.htm.

Contact

lnchen@sibs.ac.cn; zpliu@sibs.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-15 +21576229,ChemMine tools: an online service for analyzing and clustering small molecules.,"ChemMine Tools is an online service for small molecule data analysis. It provides a web interface to a set of cheminformatics and data mining tools that are useful for various analysis routines performed in chemical genomics and drug discovery. The service also offers programmable access options via the R library ChemmineR. The primary functionalities of ChemMine Tools fall into five major application areas: data visualization, structure comparisons, similarity searching, compound clustering and prediction of chemical properties. First, users can upload compound data sets to the online Compound Workbench. Numerous utilities are provided for compound viewing, structure drawing and format interconversion. Second, pairwise structural similarities among compounds can be quantified. Third, interfaces to ultra-fast structure similarity search algorithms are available to efficiently mine the chemical space in the public domain. These include fingerprint and embedding/indexing algorithms. Fourth, the service includes a Clustering Toolbox that integrates cheminformatic algorithms with data mining utilities to enable systematic structure and activity based analyses of custom compound sets. Fifth, physicochemical property descriptors of custom compound sets can be calculated. These descriptors are important for assessing the bioactivity profile of compounds in silico and quantitative structure-activity relationship (QSAR) analyses. ChemMine Tools is available at: http://chemmine.ucr.edu.",2011-05-16 +23216884,Flexible and efficient genome tiling design with penalized uniqueness score.,"

Background

As a powerful tool in whole genome analysis, tiling array has been widely used in the answering of many genomic questions. Now it could also serve as a capture device for the library preparation in the popular high throughput sequencing experiments. Thus, a flexible and efficient tiling array design approach is still needed and could assist in various types and scales of transcriptomic experiment.

Results

In this paper, we address issues and challenges in designing probes suitable for tiling array applications and targeted sequencing. In particular, we define the penalized uniqueness score, which serves as a controlling criterion to eliminate potential cross-hybridization, and a flexible tiling array design pipeline. Unlike BLAST or simple suffix array based methods, computing and using our uniqueness measurement can be more efficient for large scale design and require less memory. The parameters provided could assist in various types of genomic tiling task. In addition, using both commercial array data and experiment data we show, unlike previously claimed, that palindromic sequence exhibiting relatively lower uniqueness.

Conclusions

Our proposed penalized uniqueness score could serve as a better indicator for cross hybridization with higher sensitivity and specificity, giving more control of expected array quality. The flexible tiling design algorithm incorporating the penalized uniqueness score was shown to give higher coverage and resolution. The package to calculate the penalized uniqueness score and the described probe selection algorithm are implemented as a Perl program, which is freely available at http://www1.fbn-dummerstorf.de/en/forschung/fbs/fb3/paper/2012-yang-1/OTAD.v1.1.tar.gz.",2012-12-05 +22399244,Iconicity of simple Chinese characters.,"The iconicity of a Chinese character, or the degree to which it looks like the concept that it represents, has been suggested as affecting the learning and processing of the character. However, previous studies have not provided good empirical information on the iconicity of specific characters. To fill this gap, 40 U.S. adults with no knowledge of Chinese were given an English word or short phrase together with two Chinese characters and were asked which character matched the meaning of the English word. The right and wrong answers had the same number of strokes, and different wrong answers were used for different participants. We examined all 213 simple-structure Chinese characters that occur in textbooks for elementary school children. The overall percentage of correct responses was 53.6%, slightly but significantly higher than would be expected by chance. Using a false discovery rate procedure, we found that 15 of the 213 characters were guessed at a level higher than chance. The proportion of correct responses to each character, which can be taken as an indicator of its degree of iconicity, should be useful to researchers studying Chinese character reading and writing. The full database, showing the proportion of correct guesses and other psycholinguistic variables for each character, can be downloaded from http://brm.psychonomic-journals.org/content/supplemental .",2012-12-01 +24642210,'Plug and Play' assembly of a low-temperature plasma ionization mass spectrometry imaging (LTP-MSI) system.,"Mass spectrometry imaging (MSI) is of high and growing interest in life science research, but the investment for necessary equipment is often prohibitive for small research groups. Therefore, we developed a basic MSI system from low cost 'Plug and Play' components, which are connected to the Universal Serial Bus (USB) of a standard computer. Our open source software OpenMZxy (http://www.bioprocess.org/openmzxy) enables automatic and manual sampling, as well as the recording of position data. For ionization we used a low-temperature plasma probe (LTP), coupled to a quadrupole mass analyzer. The current set-up has a practical resolution of 1mm, and a sampling area of 100×100mm, resulting in up to 10,000 sampling points. Our prototype is easy and economical to adopt for different types of mass analyzers. We prove the usability of the LTP-MSI system for macroscopic samples by imaging the distribution of metabolites in the longitudinal cross-cut of a chili (Capsicum annuum, 'Jalapeño pepper') fruit. The localization of capsaicin in the placenta could be confirmed. But additionally, yet unknown low molecular weight compounds were detected in defined areas, which underline the potential of LTP-MSI for the imaging of volatile and semi-volatile metabolites and for the discovery of new natural products. Biological significance Knowledge about the spatial distribution of metabolites, proteins, or lipids in a given tissue often leads to novel findings in medicine and biology. Therefore, mass spectrometry based imaging (MSI) is becoming increasingly popular in life science research. However, the investment for necessary equipment is often prohibitive for small research groups. We built a prototype with an ambient ionization source, which is easy and economical to adopt for different types of mass analyzers. Therefore, we hope that our system contributes to a broader use of mass spectrometry imaging for answering biological questions.",2014-03-15 +30727272,First Report of Powdery Mildew Caused by Podosphaera euphorbiae-hirtae on Euphorbia tithymaloides in California.,"Euphorbia tithymaloides (Euphorbiaceae; known as 'Jacob's ladder,' 'Devil's Backbone') is a perennial, succulent spurge, grown primarily as a border plant in ornamental landscapes. In June 2011 and February 2012, the California Department of Food and Agriculture Plant Pest Diagnostics Lab, Sacramento, CA, received an unusual powdery mildew sample on greenhouse-grown E. tithymaloides from a Ventura County, CA nursery. Disease incidence at the nursery was 100%. White mycelial patches were present on the stems and on both sides of the leaves. Over time, heavily infected branches defoliated and brownish, roughened, scabby lesions developed on the stems. Hyphae were thin-walled, up to 8 μm wide and developed nipple-shaped appressoria. Ellipsoid-ovoid conidia measured 21.0 to 32.5 × 13 to 18 μm (avg. 26.4 × 13.9 μm, n = 20) and formed in chains. The rDNA internal transcribed spacer (ITS) region was amplified with primers PFITS-F and PF5.8-R (4). The 387-bp sequence (GenBank JX006103) was 99% similar (346/347 bp) to Podosphaera euphorbia-hirtae (AB040306) from Acalypha australis (Euphorbiaceae) (3). Based on ITS similarity and culture morphology, the fungus was identified as P. euphorbiae-hirtae U. Braun & Somani (1,3). Pathogenicity was confirmed through inoculation by gently pressing diseased leaves from the nursery onto the youngest leaves of three plants each of E. tithymaloides cultivars 'Nano' and 'Variegated.' Leaves of an equal number of control plants were pressed with healthy leaves. Plants were incubated in a dew chamber for 48 h after which they were transferred to a 22°C growth chamber with a 12-h photoperiod. The experiment was repeated once. White powdery mildew colonies formed after 7 days on 'Variegated' and 13 days on 'Nano'. Conidia measured 27.5 to 35.0 × 11 to 15 μm (avg. 30.5 × 12.6 μm, n = 30) which was within the range of P. euphorbia-hirtae. No symptoms developed on the control plants. P. euphorbiae-hirtae has been reported in Asia and the UK on E. tithymaloides and in Asia on A. australis (2). An asexual Oidium stage on Euphorbiaceae in Asia, Africa, Australia, Florida, Puerto Rico, Cuba, and the U.S. Virgin Islands may correspond to P. euphorbiae-hirtae (2). To our knowledge, this is the first report of P. euphorbiae-hirtae in California. Following the 2011 and 2012 detections, all E. tithymaloides plants in the Ventura County, CA nursery were destroyed. A regulatory trace back survey found that the plants were shipped from a Florida supplier, which was also shown to have an outbreak of P. euphorbiae-hirtae. The original source of the Florida E. tithymaloides plants was a 2010 shipment from Costa Rica. The host range of P. euphorbiae-hirtae is restricted to three landscape species in the Euphorbiaceae. References: (1) U. Braun. Beih. Nova Hedwigia 89:143, 1987. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/index.cfm May 1, 2012. (3) T. Hirata. et al. Can. J. Bot. 78:1521, 2000. (4) R. Singh et al. Plant Dis. 93:1348, 2009.",2012-12-01 +24497029,Deep conservation of human protein tandem repeats within the eukaryotes.,"Tandem repeats (TRs) are a major element of protein sequences in all domains of life. They are particularly abundant in mammals, where by conservative estimates one in three proteins contain a TR. High generation-scale duplication and deletion rates were reported for nucleic TR units. However, it is not known whether protein TR units can also be frequently lost or gained providing a source of variation for rapid adaptation of protein function, or alternatively, tend to have conserved TR unit configurations over long evolutionary times. To obtain a systematic picture, we performed a proteome-wide analysis of the mode of evolution for human protein TRs. For this purpose, we propose a novel method for the detection of orthologous TRs based on circular profile hidden Markov models. For all detected TRs, we reconstructed bispecies TR unit phylogenies across 61 eukaryotes ranging from human to yeast. Moreover, we performed additional analyses to correlate functional and structural annotations of human TRs with their mode of evolution. Surprisingly, we find that the vast majority of human TRs are ancient, with TR unit number and order preserved intact since distant speciation events. For example, ≥ 61% of all human TRs have been strongly conserved at least since the root of all mammals, approximately 300 Ma. Further, we find no human protein TR that shows evidence for strong recent duplications and deletions. The results are in contrast to the high generation-scale mutability of nucleic TRs. Presumably, most protein TRs fold into stable and conserved structures that are indispensable for the function of the TR-containing protein. All of our data and results are available for download from http://www.atgc-montpellier.fr/TRE.",2014-02-03 +24638223,ST-analyzer: a web-based user interface for simulation trajectory analysis.,"Molecular dynamics (MD) simulation has become one of the key tools to obtain deeper insights into biological systems using various levels of descriptions such as all-atom, united-atom, and coarse-grained models. Recent advances in computing resources and MD programs have significantly accelerated the simulation time and thus increased the amount of trajectory data. Although many laboratories routinely perform MD simulations, analyzing MD trajectories is still time consuming and often a difficult task. ST-analyzer, http://im.bioinformatics.ku.edu/st-analyzer, is a standalone graphical user interface (GUI) toolset to perform various trajectory analyses. ST-analyzer has several outstanding features compared to other existing analysis tools: (i) handling various formats of trajectory files from MD programs, such as CHARMM, NAMD, GROMACS, and Amber, (ii) intuitive web-based GUI environment--minimizing administrative load and reducing burdens on the user from adapting new software environments, (iii) platform independent design--working with any existing operating system, (iv) easy integration into job queuing systems--providing options of batch processing either on the cluster or in an interactive mode, and (v) providing independence between foreground GUI and background modules--making it easier to add personal modules or to recycle/integrate pre-existing scripts utilizing other analysis tools. The current ST-analyzer contains nine main analysis modules that together contain 18 options, including density profile, lipid deuterium order parameters, surface area per lipid, and membrane hydrophobic thickness. This article introduces ST-analyzer with its design, implementation, and features, and also illustrates practical analysis of lipid bilayer simulations.",2014-03-17 +21863128,On differential gene expression using RNA-Seq data.,"

Motivation

RNA-Seq is a novel technology that provides read counts of RNA fragments in each gene, including the mapped positions of each read within each gene. Besides many other applications it can be used to detect differentially expressed genes. Most published methods collapse the position-level read data into a single gene-specific expression measurement. Statistical inference proceeds by modeling these gene-level expression measurements.

Results

We present a Bayesian method of calling differential expression (BM-DE) that directly models the position-level read counts. We demonstrate the potential advantage of the BM-DE method compared to existing approaches that rely on gene-level aggregate data. An important additional feature of the proposed approach is that BM-DE can be used to analyze RNA-Seq data from experiments without biological replicates. This becomes possible since the approach works with multiple position-level read counts for each gene. We demonstrate the importance of modeling for position-level read counts with a yeast data set and a simulation study.

Availability

A public domain R package is available from http://odin.mdacc.tmc.edu/~ylji/BMDE/.",2011-08-01 +22073121,A multi-sample based method for identifying common CNVs in normal human genomic structure using high-resolution aCGH data.,"

Background

It is difficult to identify copy number variations (CNV) in normal human genomic data due to noise and non-linear relationships between different genomic regions and signal intensity. A high-resolution array comparative genomic hybridization (aCGH) containing 42 million probes, which is very large compared to previous arrays, was recently published. Most existing CNV detection algorithms do not work well because of noise associated with the large amount of input data and because most of the current methods were not designed to analyze normal human samples. Normal human genome analysis often requires a joint approach across multiple samples. However, the majority of existing methods can only identify CNVs from a single sample.

Methodology and principal findings

We developed a multi-sample-based genomic variations detector (MGVD) that uses segmentation to identify common breakpoints across multiple samples and a k-means-based clustering strategy. Unlike previous methods, MGVD simultaneously considers multiple samples with different genomic intensities and identifies CNVs and CNV zones (CNVZs); CNVZ is a more precise measure of the location of a genomic variant than the CNV region (CNVR).

Conclusions and significance

We designed a specialized algorithm to detect common CNVs from extremely high-resolution multi-sample aCGH data. MGVD showed high sensitivity and a low false discovery rate for a simulated data set, and outperformed most current methods when real, high-resolution HapMap datasets were analyzed. MGVD also had the fastest runtime compared to the other algorithms evaluated when actual, high-resolution aCGH data were analyzed. The CNVZs identified by MGVD can be used in association studies for revealing relationships between phenotypes and genomic aberrations. Our algorithm was developed with standard C++ and is available in Linux and MS Windows format in the STL library. It is freely available at: http://embio.yonsei.ac.kr/~Park/mgvd.php.",2011-10-31 +24561350,"Exploring medical diagnostic performance using interactive, multi-parameter sourced receiver operating characteristic scatter plots.","Determining diagnostic criteria for specific disorders is often a tedious task that involves determining optimal diagnostic thresholds for symptoms and biomarkers using receiver-operating characteristic (ROC) statistics. To help this endeavor, we developed softROC, a user-friendly graphic-based tool that lets users visually explore possible ROC tradeoffs. The software requires MATLAB installation and an Excel file containing threshold symptoms/biological measures, with corresponding gold standard diagnoses for a set of patients. The software scans the input file for diagnostic and symptom/biomarkers columns, and populates the graphical-user-interface (GUI). Users select symptoms/biomarkers of interest using Boolean algebra as potential inputs to create diagnostic criteria outputs. The software evaluates subtests across the user-established range of cut-points and compares them to a gold standard in order to generate ROC and quality ROC scatter plots. These plots can be examined interactively to find optimal cut-points of interest for a given application (e.g. sensitivity versus specificity needs). Split-set validation can also be used to set up criteria and validate these in independent samples. Bootstrapping is used to produce confidence intervals. Additional statistics and measures are provided, such as the area under the ROC curve (AUC). As a testing set, softROC is used to investigate nocturnal polysomnogram measures as diagnostic features for narcolepsy. All measures can be outputted to a text file for offline analysis. The softROC toolbox, with clinical training data and tutorial instruction manual, is provided as supplementary material and can be obtained online at http://www.stanford.edu/~hyatt4/software/softroc or from the open source repository at http://www.github.com/informaton/softroc.",2014-02-03 +24198250,POGO-DB--a database of pairwise-comparisons of genomes and conserved orthologous genes.,"POGO-DB (http://pogo.ece.drexel.edu/) provides an easy platform for comparative microbial genomics. POGO-DB allows users to compare genomes using pre-computed metrics that were derived from extensive computationally intensive BLAST comparisons of >2000 microbes. These metrics include (i) average protein sequence identity across all orthologs shared by two genomes, (ii) genomic fluidity (a measure of gene content dissimilarity), (iii) number of 'orthologs' shared between two genomes, (iv) pairwise identity of the 16S ribosomal RNA genes and (v) pairwise identity of an additional 73 marker genes present in >90% prokaryotes. Users can visualize these metrics against each other in a 2D plot for exploratory analysis of genome similarity and of how different aspects of genome similarity relate to each other. The results of these comparisons are fully downloadable. In addition, users can download raw BLAST results for all or user-selected comparisons. Therefore, we provide users with full flexibility to carry out their own downstream analyses, by creating easy access to data that would normally require heavy computational resources to generate. POGO-DB should prove highly useful for researchers interested in comparative microbiology and benefit the microbiome/metagenomic communities by providing the information needed to select suitable phylogenetic marker genes within particular lineages.",2013-11-05 +23782613,GPViz: dynamic visualization of genomic regions and variants affecting protein domains.,"

Unlabelled

GPViz is a versatile Java-based software for dynamic gene-centered visualization of genomic regions and/or variants. User-defined data can be loaded in common formats as resulting from analysis workflows used in sequencing applications and studied in the context of the gene, the corresponding transcript isoforms, proteins and their domains or other protein features. Both the genomic regions and variants can be also defined interactively. Various gene filter options are provided to enable an intersection of variants, genomic regions and affected protein features. Finally, by using GPViz, we identified differentially expressed exons, which could indicate alternative splicing events, and found somatic variants in different cancer types affecting metabolic proteins. GPViz is freely available at http://icbi.at/gpviz (released under GNU general public license), is based on Java 7 and can be used as a stand-alone or Web Start application.

Availability

http://icbi.at/gpviz",2013-06-19 +23226242,Accurate diagnostics for Bovine tuberculosis based on high-throughput sequencing.,"

Background

Bovine tuberculosis (bTB) is an enduring contagious disease of cattle that has caused substantial losses to the global livestock industry. Despite large-scale eradication efforts, bTB continues to persist. Current bTB tests rely on the measurement of immune responses in vivo (skin tests), and in vitro (bovine interferon-γ release assay). Recent developments are characterized by interrogating the expression of an increasing number of genes that participate in the immune response. Currently used assays have the disadvantages of limited sensitivity and specificity, which may lead to incomplete eradication of bTB. Moreover, bTB that reemerges from wild disease reservoirs requires early and reliable diagnostics to prevent further spread. In this work, we use high-throughput sequencing of the peripheral blood mononuclear cells (PBMCs) transcriptome to identify an extensive panel of genes that participate in the immune response. We also investigate the possibility of developing a reliable bTB classification framework based on RNA-Seq reads.

Methodology/principal findings

Pooled PBMC mRNA samples from unaffected calves as well as from those with disease progression of 1 and 2 months were sequenced using the Illumina Genome Analyzer II. More than 90 million reads were splice-aligned against the reference genome, and deposited to the database for further expression analysis and visualization. Using this database, we identified 2,312 genes that were differentially expressed in response to bTB infection (p<10(-8)). We achieved a bTB infected status classification accuracy of more than 99% with split-sample validation on newly designed and learned mixtures of expression profiles.

Conclusions/significance

We demonstrated that bTB can be accurately diagnosed at the early stages of disease progression based on RNA-Seq high-throughput sequencing. The inclusion of multiple genes in the diagnostic panel, combined with the superior sensitivity and broader dynamic range of RNA-Seq, has the potential to improve the accuracy of bTB diagnostics. The computational pipeline used for the project is available from http://code.google.com/p/bovine-tb-prediction.",2012-11-30 +23203984,MetaMicrobesOnline: phylogenomic analysis of microbial communities.,"The metaMicrobesOnline database (freely available at http://meta.MicrobesOnline.org) offers phylogenetic analysis of genes from microbial genomes and metagenomes. Gene trees are constructed for canonical gene families such as COG and Pfam. Such gene trees allow for rapid homologue analysis and subfamily comparison of genes from multiple metagenomes and comparisons with genes from microbial isolates. Additionally, the genome browser permits genome context comparisons, which may be used to determine the closest sequenced genome or suggest functionally associated genes. Lastly, the domain browser permits rapid comparison of protein domain organization within genes of interest from metagenomes and complete microbial genomes.",2012-11-30 +23203986,Genome3D: a UK collaborative project to annotate genomic sequences with predicted 3D structures based on SCOP and CATH domains.,"Genome3D, available at http://www.genome3d.eu, is a new collaborative project that integrates UK-based structural resources to provide a unique perspective on sequence-structure-function relationships. Leading structure prediction resources (DomSerf, FUGUE, Gene3D, pDomTHREADER, Phyre and SUPERFAMILY) provide annotations for UniProt sequences to indicate the locations of structural domains (structural annotations) and their 3D structures (structural models). Structural annotations and 3D model predictions are currently available for three model genomes (Homo sapiens, E. coli and baker's yeast), and the project will extend to other genomes in the near future. As these resources exploit different strategies for predicting structures, the main aim of Genome3D is to enable comparisons between all the resources so that biologists can see where predictions agree and are therefore more trusted. Furthermore, as these methods differ in whether they build their predictions using CATH or SCOP, Genome3D also contains the first official mapping between these two databases. This has identified pairs of similar superfamilies from the two resources at various degrees of consensus (532 bronze pairs, 527 silver pairs and 370 gold pairs).",2012-11-30 +22820204,GenAlEx 6.5: genetic analysis in Excel. Population genetic software for teaching and research--an update.,"

Summary

GenAlEx: Genetic Analysis in Excel is a cross-platform package for population genetic analyses that runs within Microsoft Excel. GenAlEx offers analysis of diploid codominant, haploid and binary genetic loci and DNA sequences. Both frequency-based (F-statistics, heterozygosity, HWE, population assignment, relatedness) and distance-based (AMOVA, PCoA, Mantel tests, multivariate spatial autocorrelation) analyses are provided. New features include calculation of new estimators of population structure: G'(ST), G''(ST), Jost's D(est) and F'(ST) through AMOVA, Shannon Information analysis, linkage disequilibrium analysis for biallelic data and novel heterogeneity tests for spatial autocorrelation analysis. Export to more than 30 other data formats is provided. Teaching tutorials and expanded step-by-step output options are included. The comprehensive guide has been fully revised.

Availability and implementation

GenAlEx is written in VBA and provided as a Microsoft Excel Add-in (compatible with Excel 2003, 2007, 2010 on PC; Excel 2004, 2011 on Macintosh). GenAlEx, and supporting documentation and tutorials are freely available at: http://biology.anu.edu.au/GenAlEx.

Contact

rod.peakall@anu.edu.au.",2012-07-20 +22815924,TAGCNA: a method to identify significant consensus events of copy number alterations in cancer.,"Somatic copy number alteration (CNA) is a common phenomenon in cancer genome. Distinguishing significant consensus events (SCEs) from random background CNAs in a set of subjects has been proven to be a valuable tool to study cancer. In order to identify SCEs with an acceptable type I error rate, better computational approaches should be developed based on reasonable statistics and null distributions. In this article, we propose a new approach named TAGCNA for identifying SCEs in somatic CNAs that may encompass cancer driver genes. TAGCNA employs a peel-off permutation scheme to generate a reasonable null distribution based on a prior step of selecting tag CNA markers from the genome being considered. We demonstrate the statistical power of TAGCNA on simulated ground truth data, and validate its applicability using two publicly available cancer datasets: lung and prostate adenocarcinoma. TAGCNA identifies SCEs that are known to be involved with proto-oncogenes (e.g. EGFR, CDK4) and tumor suppressor genes (e.g. CDKN2A, CDKN2B), and provides many additional SCEs with potential biological relevance in these data. TAGCNA can be used to analyze the significance of CNAs in various cancers. It is implemented in R and is freely available at http://tagcna.sourceforge.net/.",2012-07-18 +23431393,Predicting the binding patterns of hub proteins: a study using yeast protein interaction networks.,"

Background

Protein-protein interactions are critical to elucidating the role played by individual proteins in important biological pathways. Of particular interest are hub proteins that can interact with large numbers of partners and often play essential roles in cellular control. Depending on the number of binding sites, protein hubs can be classified at a structural level as singlish-interface hubs (SIH) with one or two binding sites, or multiple-interface hubs (MIH) with three or more binding sites. In terms of kinetics, hub proteins can be classified as date hubs (i.e., interact with different partners at different times or locations) or party hubs (i.e., simultaneously interact with multiple partners).

Methodology

Our approach works in 3 phases: Phase I classifies if a protein is likely to bind with another protein. Phase II determines if a protein-binding (PB) protein is a hub. Phase III classifies PB proteins as singlish-interface versus multiple-interface hubs and date versus party hubs. At each stage, we use sequence-based predictors trained using several standard machine learning techniques.

Conclusions

Our method is able to predict whether a protein is a protein-binding protein with an accuracy of 94% and a correlation coefficient of 0.87; identify hubs from non-hubs with 100% accuracy for 30% of the data; distinguish date hubs/party hubs with 69% accuracy and area under ROC curve of 0.68; and SIH/MIH with 89% accuracy and area under ROC curve of 0.84. Because our method is based on sequence information alone, it can be used even in settings where reliable protein-protein interaction data or structures of protein-protein complexes are unavailable to obtain useful insights into the functional and evolutionary characteristics of proteins and their interactions.

Availability

We provide a web server for our three-phase approach: http://hybsvm.gdcb.iastate.edu.",2013-02-19 +23209700,PROSPER: an integrated feature-based tool for predicting protease substrate cleavage sites.,"The ability to catalytically cleave protein substrates after synthesis is fundamental for all forms of life. Accordingly, site-specific proteolysis is one of the most important post-translational modifications. The key to understanding the physiological role of a protease is to identify its natural substrate(s). Knowledge of the substrate specificity of a protease can dramatically improve our ability to predict its target protein substrates, but this information must be utilized in an effective manner in order to efficiently identify protein substrates by in silico approaches. To address this problem, we present PROSPER, an integrated feature-based server for in silico identification of protease substrates and their cleavage sites for twenty-four different proteases. PROSPER utilizes established specificity information for these proteases (derived from the MEROPS database) with a machine learning approach to predict protease cleavage sites by using different, but complementary sequence and structure characteristics. Features used by PROSPER include local amino acid sequence profile, predicted secondary structure, solvent accessibility and predicted native disorder. Thus, for proteases with known amino acid specificity, PROSPER provides a convenient, pre-prepared tool for use in identifying protein substrates for the enzymes. Systematic prediction analysis for the twenty-four proteases thus far included in the database revealed that the features we have included in the tool strongly improve performance in terms of cleavage site prediction, as evidenced by their contribution to performance improvement in terms of identifying known cleavage sites in substrates for these enzymes. In comparison with two state-of-the-art prediction tools, PoPS and SitePrediction, PROSPER achieves greater accuracy and coverage. To our knowledge, PROSPER is the first comprehensive server capable of predicting cleavage sites of multiple proteases within a single substrate sequence using machine learning techniques. It is freely available at http://lightning.med.monash.edu.au/PROSPER/.",2012-11-29 +22026390,Three-dimensional modeling of chromatin structure from interaction frequency data using Markov chain Monte Carlo sampling.,"

Background

Long-range interactions between regulatory DNA elements such as enhancers, insulators and promoters play an important role in regulating transcription. As chromatin contacts have been found throughout the human genome and in different cell types, spatial transcriptional control is now viewed as a general mechanism of gene expression regulation. Chromosome Conformation Capture Carbon Copy (5C) and its variant Hi-C are techniques used to measure the interaction frequency (IF) between specific regions of the genome. Our goal is to use the IF data generated by these experiments to computationally model and analyze three-dimensional chromatin organization.

Results

We formulate a probabilistic model linking 5C/Hi-C data to physical distances and describe a Markov chain Monte Carlo (MCMC) approach called MCMC5C to generate a representative sample from the posterior distribution over structures from IF data. Structures produced from parallel MCMC runs on the same dataset demonstrate that our MCMC method mixes quickly and is able to sample from the posterior distribution of structures and find subclasses of structures. Structural properties (base looping, condensation, and local density) were defined and their distribution measured across the ensembles of structures generated. We applied these methods to a biological model of human myelomonocyte cellular differentiation and identified distinct chromatin conformation signatures (CCSs) corresponding to each of the cellular states. We also demonstrate the ability of our method to run on Hi-C data and produce a model of human chromosome 14 at 1Mb resolution that is consistent with previously observed structural properties as measured by 3D-FISH.

Conclusions

We believe that tools like MCMC5C are essential for the reliable analysis of data from the 3C-derived techniques such as 5C and Hi-C. By integrating complex, high-dimensional and noisy datasets into an easy to interpret ensemble of three-dimensional conformations, MCMC5C allows researchers to reliably interpret the result of their assay and contrast conformations under different conditions.

Availability

http://Dostielab.biochem.mcgill.ca.",2011-10-25 +23203870,EENdb: a database and knowledge base of ZFNs and TALENs for endonuclease engineering.,"We report here the construction of engineered endonuclease database (EENdb) (http://eendb.zfgenetics.org/), a searchable database and knowledge base for customizable engineered endonucleases (EENs), including zinc finger nucleases (ZFNs) and transcription activator-like effector nucleases (TALENs). EENs are artificial nucleases designed to target and cleave specific DNA sequences. EENs have been shown to be a very useful genetic tool for targeted genome modification and have shown great potentials in the applications in basic research, clinical therapies and agricultural utilities, and they are specifically essential for reverse genetics research in species where no other gene targeting techniques are available. EENdb contains over 700 records of all the reported ZFNs and TALENs and related information, such as their target sequences, the peptide components [zinc finger protein-/transcription activator-like effector (TALE)-binding domains, FokI variants and linker peptide/framework], the efficiency and specificity of their activities. The database also lists EEN engineering tools and resources as well as information about forms and types of EENs, EEN screening and construction methods, detection methods for targeting efficiency and many other utilities. The aim of EENdb is to represent a central hub for EEN information and an integrated solution for EEN engineering. These studies may help to extract in-depth properties and common rules regarding ZFN or TALEN efficiency through comparison of the known ZFNs or TALENs.",2012-11-29 +23196988,NURBS: a database of experimental and predicted nuclear receptor binding sites of mouse.,"

Summary

Nuclear receptors (NRs) are a class of transcription factors playing important roles in various biological processes. An NR often impacts numerous genes and different NRs share overlapped target networks. To fulfil the need for a database incorporating binding sites of different NRs at various conditions for easy comparison and visualization to improve our understanding of NR binding mechanisms, we have developed NURBS, a database for experimental and predicted nuclear receptor binding sites of mouse (NURBS). NURBS currently contains binding sites across the whole-mouse genome of 8 NRs identified in 40 chromatin immunoprecipitation with massively parallel DNA sequencing experiments. All datasets are processed using a widely used procedure and same statistical criteria to ensure the binding sites derived from different datasets are comparable. NURBS also provides predicted binding sites using NR-HMM, a Hidden Markov Model (HMM) model.

Availability

The GBrowse-based user interface of NURBS is freely accessible at http://shark.abl.ku.edu/nurbs/. NR-HMM and all results can be downloaded for free at the website.

Contact

jwfang@ku.edu",2012-11-29 +21794104,IsoformEx: isoform level gene expression estimation using weighted non-negative least squares from mRNA-Seq data.,"

Background

mRNA-Seq technology has revolutionized the field of transcriptomics for identification and quantification of gene transcripts not only at gene level but also at isoform level. Estimating the expression levels of transcript isoforms from mRNA-Seq data is a challenging problem due to the presence of constitutive exons.

Results

We propose a novel algorithm (IsoformEx) that employs weighted non-negative least squares estimation method to estimate the expression levels of transcript isoforms. Validations based on in silico simulation of mRNA-Seq and qRT-PCR experiments with real mRNA-Seq data showed that IsoformEx could accurately estimate transcript expression levels. In comparisons with published methods, the transcript expression levels estimated by IsoformEx showed higher correlation with known transcript expression levels from simulated mRNA-Seq data, and higher agreement with qRT-PCR measurements of specific transcripts for real mRNA-Seq data.

Conclusions

IsoformEx is a fast and accurate algorithm to estimate transcript expression levels and gene expression levels, which takes into account short exons and alternative exons with a weighting scheme. The software is available at http://bioinformatics.wistar.upenn.edu/isoformex.",2011-07-27 +21624158,HyQue: evaluating hypotheses using Semantic Web technologies.,"

Background

Key to the success of e-Science is the ability to computationally evaluate expert-composed hypotheses for validity against experimental data. Researchers face the challenge of collecting, evaluating and integrating large amounts of diverse information to compose and evaluate a hypothesis. Confronted with rapidly accumulating data, researchers currently do not have the software tools to undertake the required information integration tasks.

Results

We present HyQue, a Semantic Web tool for querying scientific knowledge bases with the purpose of evaluating user submitted hypotheses. HyQue features a knowledge model to accommodate diverse hypotheses structured as events and represented using Semantic Web languages (RDF/OWL). Hypothesis validity is evaluated against experimental and literature-sourced evidence through a combination of SPARQL queries and evaluation rules. Inference over OWL ontologies (for type specifications, subclass assertions and parthood relations) and retrieval of facts stored as Bio2RDF linked data provide support for a given hypothesis. We evaluate hypotheses of varying levels of detail about the genetic network controlling galactose metabolism in Saccharomyces cerevisiae to demonstrate the feasibility of deploying such semantic computing tools over a growing body of structured knowledge in Bio2RDF.

Conclusions

HyQue is a query-based hypothesis evaluation system that can currently evaluate hypotheses about the galactose metabolism in S. cerevisiae. Hypotheses as well as the supporting or refuting data are represented in RDF and directly linked to one another allowing scientists to browse from data to hypothesis and vice versa. HyQue hypotheses and data are available at http://semanticscience.org/projects/hyque.",2011-05-17 +23197658,The Disease and Gene Annotations (DGA): an annotation resource for human disease.,"Disease and Gene Annotations database (DGA, http://dga.nubic.northwestern.edu) is a collaborative effort aiming to provide a comprehensive and integrative annotation of the human genes in disease network context by integrating computable controlled vocabulary of the Disease Ontology (DO version 3 revision 2510, which has 8043 inherited, developmental and acquired human diseases), NCBI Gene Reference Into Function (GeneRIF) and molecular interaction network (MIN). DGA integrates these resources together using semantic mappings to build an integrative set of disease-to-gene and gene-to-gene relationships with excellent coverage based on current knowledge. DGA is kept current by periodically reparsing DO, GeneRIF, and MINs. DGA provides a user-friendly and interactive web interface system enabling users to efficiently query, download and visualize the DO tree structure and annotations as a tree, a network graph or a tabular list. To facilitate integrative analysis, DGA provides a web service Application Programming Interface for integration with external analytic tools.",2012-11-28 +23209562,Wiki-pi: a web-server of annotated human protein-protein interactions to aid in discovery of protein function.,"Protein-protein interactions (PPIs) are the basis of biological functions. Knowledge of the interactions of a protein can help understand its molecular function and its association with different biological processes and pathways. Several publicly available databases provide comprehensive information about individual proteins, such as their sequence, structure, and function. There also exist databases that are built exclusively to provide PPIs by curating them from published literature. The information provided in these web resources is protein-centric, and not PPI-centric. The PPIs are typically provided as lists of interactions of a given gene with links to interacting partners; they do not present a comprehensive view of the nature of both the proteins involved in the interactions. A web database that allows search and retrieval based on biomedical characteristics of PPIs is lacking, and is needed. We present Wiki-Pi (read Wiki-π), a web-based interface to a database of human PPIs, which allows users to retrieve interactions by their biomedical attributes such as their association to diseases, pathways, drugs and biological functions. Each retrieved PPI is shown with annotations of both of the participant proteins side-by-side, creating a basis to hypothesize the biological function facilitated by the interaction. Conceptually, it is a search engine for PPIs analogous to PubMed for scientific literature. Its usefulness in generating novel scientific hypotheses is demonstrated through the study of IGSF21, a little-known gene that was recently identified to be associated with diabetic retinopathy. Using Wiki-Pi, we infer that its association to diabetic retinopathy may be mediated through its interactions with the genes HSPB1, KRAS, TMSB4X and DGKD, and that it may be involved in cellular response to external stimuli, cytoskeletal organization and regulation of molecular activity. The website also provides a wiki-like capability allowing users to describe or discuss an interaction. Wiki-Pi is available publicly and freely at http://severus.dbmi.pitt.edu/wiki-pi/.",2012-11-28 +23197656,TIGRFAMs and Genome Properties in 2013.,"TIGRFAMs, available online at http://www.jcvi.org/tigrfams is a database of protein family definitions. Each entry features a seed alignment of trusted representative sequences, a hidden Markov model (HMM) built from that alignment, cutoff scores that let automated annotation pipelines decide which proteins are members, and annotations for transfer onto member proteins. Most TIGRFAMs models are designated equivalog, meaning they assign a specific name to proteins conserved in function from a common ancestral sequence. Models describing more functionally heterogeneous families are designated subfamily or domain, and assign less specific but more widely applicable annotations. The Genome Properties database, available at http://www.jcvi.org/genome-properties, specifies how computed evidence, including TIGRFAMs HMM results, should be used to judge whether an enzymatic pathway, a protein complex or another type of molecular subsystem is encoded in a genome. TIGRFAMs and Genome Properties content are developed in concert because subsystems reconstruction for large numbers of genomes guides selection of seed alignment sequences and cutoff values during protein family construction. Both databases specialize heavily in bacterial and archaeal subsystems. At present, 4284 models appear in TIGRFAMs, while 628 systems are described by Genome Properties. Content derives both from subsystem discovery work and from biocuration of the scientific literature.",2012-11-28 +23193279,KIDFamMap: a database of kinase-inhibitor-disease family maps for kinase inhibitor selectivity and binding mechanisms.,"Kinases play central roles in signaling pathways and are promising therapeutic targets for many diseases. Designing selective kinase inhibitors is an emergent and challenging task, because kinases share an evolutionary conserved ATP-binding site. KIDFamMap (http://gemdock.life.nctu.edu.tw/KIDFamMap/) is the first database to explore kinase-inhibitor families (KIFs) and kinase-inhibitor-disease (KID) relationships for kinase inhibitor selectivity and mechanisms. This database includes 1208 KIFs, 962 KIDs, 55 603 kinase-inhibitor interactions (KIIs), 35 788 kinase inhibitors, 399 human protein kinases, 339 diseases and 638 disease allelic variants. Here, a KIF can be defined as follows: (i) the kinases in the KIF with significant sequence similarity, (ii) the inhibitors in the KIF with significant topology similarity and (iii) the KIIs in the KIF with significant interaction similarity. The KIIs within a KIF are often conserved on some consensus KIDFamMap anchors, which represent conserved interactions between the kinase subsites and consensus moieties of their inhibitors. Our experimental results reveal that the members of a KIF often possess similar inhibition profiles. The KIDFamMap anchors can reflect kinase conformations types, kinase functions and kinase inhibitor selectivity. We believe that KIDFamMap provides biological insights into kinase inhibitor selectivity and binding mechanisms.",2012-11-28 +23197660,EcoGene 3.0.,"EcoGene (http://ecogene.org) is a database and website devoted to continuously improving the structural and functional annotation of Escherichia coli K-12, one of the most well understood model organisms, represented by the MG1655(Seq) genome sequence and annotations. Major improvements to EcoGene in the past decade include (i) graphic presentations of genome map features; (ii) ability to design Boolean queries and Venn diagrams from EcoArray, EcoTopics or user-provided GeneSets; (iii) the genome-wide clone and deletion primer design tool, PrimerPairs; (iv) sequence searches using a customized EcoBLAST; (v) a Cross Reference table of synonymous gene and protein identifiers; (vi) proteome-wide indexing with GO terms; (vii) EcoTools access to >2000 complete bacterial genomes in EcoGene-RefSeq; (viii) establishment of a MySql relational database; and (ix) use of web content management systems. The biomedical literature is surveyed daily to provide citation and gene function updates. As of September 2012, the review of 37 397 abstracts and articles led to creation of 98 425 PubMed-Gene links and 5415 PubMed-Topic links. Annotation updates to Genbank U00096 are transmitted from EcoGene to NCBI. Experimental verifications include confirmation of a CTG start codon, pseudogene restoration and quality assurance of the Keio strain collection.",2012-11-28 +23193298,SecReT4: a web-based bacterial type IV secretion system resource.,"SecReT4 (http://db-mml.sjtu.edu.cn/SecReT4/) is an integrated database providing comprehensive information of type IV secretion systems (T4SSs) in bacteria. T4SSs are versatile assemblages that promote genetic exchange and/or effector translocation with consequent impacts on pathogenesis and genome plasticity. T4SSs have been implicated in conjugation, DNA uptake and release and effector translocation. The effectors injected into eukaryotic target cells can lead to alteration of host cellular processes during infection. SecReT4 offers a unique, highly organized, readily exploreable archive of known and putative T4SSs and cognate effectors in bacteria. It currently contains details of 10 752 core components mapping to 808 T4SSs and 1884 T4SS effectors found in representatives of 289 bacterial species, as well as a collection of more than 900 directly related references. A broad range of similarity search, sequence alignment, phylogenetic, primer design and other functional analysis tools are readily accessible via SecReT4. We propose that SecReT4 will facilitate efficient investigation of large numbers of these systems, recognition of diverse patterns of sequence-, gene- and/or functional conservation and an improved understanding of the biological roles and significance of these versatile molecular machines. SecReT4 will be regularly updated to ensure its ongoing maximum utility to the research community.",2012-11-28 +23197659,CDD: conserved domains and protein three-dimensional structure.,"CDD, the Conserved Domain Database, is part of NCBI's Entrez query and retrieval system and is also accessible via http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml. CDD provides annotation of protein sequences with the location of conserved domain footprints and functional sites inferred from these footprints. Pre-computed annotation is available via Entrez, and interactive search services accept single protein or nucleotide queries, as well as batch submissions of protein query sequences, utilizing RPS-BLAST to rapidly identify putative matches. CDD incorporates several protein domain and full-length protein model collections, and maintains an active curation effort that aims at providing fine grained classifications for major and well-characterized protein domain families, as supported by available protein three-dimensional (3D) structure and the published literature. To this date, the majority of protein 3D structures are represented by models tracked by CDD, and CDD curators are characterizing novel families that emerge from protein structure determination efforts.",2012-11-28 +24125686,Exploring the biologically relevant chemical space for drug discovery.,"Both recent studies and our calculation suggest that the physicochemical properties of launched drugs changed continuously over the past decades. Besides shifting of commonly used properties, the average biological relevance (BR) and similarity to natural products (NPs) of launched drugs decreased, reflecting the fact that current drug discovery deviated away from NPs. To change the current situation characterized by high investment but low productivity in drug discovery, efforts should be made to improve the BR of the screening library and hunt drugs more effectively in the biologically relevant chemical space. Additionally, a multiple dimensional molecular descriptor, named the biologically relevant spectrum (BRS) was proposed for quantitative structure-activity relationships (QSAR) study or screening library preparation. Prediction models for 43 biological activity categories were developed with BRS and support vector machine (SVM). In most cases, the overall prediction accuracies were around 95% and the Matthew's correlation coefficients (MCC) were over 0.8. Thirty-seven out of 48 drug-activity associations were successfully predicted for drugs that launched from 2006 to 2012, which were not included in the training data set. A web-server named BioRel ( http://ibi.hzau.edu.cn/biorel ) was developed to provide services including BR, BRS calculation, activity class, and pharmacokinetic property prediction.",2013-11-01 +23193255,H2DB: a heritability database across multiple species by annotating trait-associated genomic loci.,"H2DB (http://tga.nig.ac.jp/h2db/), an annotation database of genetic heritability estimates for humans and other species, has been developed as a knowledge database to connect trait-associated genomic loci. Heritability estimates have been investigated for individual species, particularly in human twin studies and plant/animal breeding studies. However, there appears to be no comprehensive heritability database for both humans and other species. Here, we introduce an annotation database for genetic heritabilities of various species that was annotated by manually curating online public resources in PUBMED abstracts and journal contents. The proposed heritability database contains attribute information for trait descriptions, experimental conditions, trait-associated genomic loci and broad- and narrow-sense heritability specifications. Annotated trait-associated genomic loci, for which most are single-nucleotide polymorphisms derived from genome-wide association studies, may be valuable resources for experimental scientists. In addition, we assigned phenotype ontologies to the annotated traits for the purposes of discussing heritability distributions based on phenotypic classifications.",2012-11-27 +24621074,Activation of P21-activated protein kinase 2 is an independent prognostic predictor for patients with gastric cancer.,"

Objective

p21-activated kinase (PAK) 2, as a member of the PAK family kinases, is involved in a number of hallmark processes including cell proliferation, survival, mitosis, apoptosis, motility and angiogenesis. However, the clinical significance of the activation of PAK2 in human gastric cancer has not been fully elucidated. The aim of this study was to investigate whether PAK2 expression and its phosphorylation status are correlated with tumor progression and prognosis in gastric cancer.

Methods

Expression patterns and subcellular localizations of PAK2 and Ser20-phosphorylated PAK2 (pSer20PAK2) in 82 gastric cancer patients were detected by immunohistochemistry.

Results

Both PAK2 and pSer20PAK2 immunostainings were localized in the cytoplasm of tumor cells of gastric cancer tissues. Compared with the normal gastric mucosa, the expression levels of PAK2 and pSer20PAK2 proteins were both significantly increased (both P < 0.001). Additionally, the patients displaying the over-expression of PAK2 and pSer20PAK2 proteins were dramatically associated with unfavorable clinicopathologic variables including higher tumor depth (P = 0.022 and 0.036, respectively), greater extent of lymph node metastasis ((P = 0.022 and 0.036, respectively), positive distant metastasis (P = 0.025 and 0.038, respectively) and advanced tumor stage (P = 0.018 and 0.031, respectively). Moreover, the patients overexpressing PAK2 and pSer20PAK2 proteins have poor overall survival rates relative to those without overexpression of these proteins. Furthermore, cox multi-factor analysis showed that PAK2 (p = 0.012) and pSer20PAK2 (p = 0.010) were independent prognosis factors for human gastric cancer.

Conclusion

Our data suggest for the first time that PAK2 activation may be associated with advanced tumor progression and poor prognosis of gastric cancer.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1236344107120406.",2014-03-12 +23193276,KEGG OC: a large-scale automatic construction of taxonomy-based ortholog clusters.,"The identification of orthologous genes in an increasing number of fully sequenced genomes is a challenging issue in recent genome science. Here we present KEGG OC (http://www.genome.jp/tools/oc/), a novel database of ortholog clusters (OCs). The current version of KEGG OC contains 1 176 030 OCs, obtained by clustering 8 357 175 genes in 2112 complete genomes (153 eukaryotes, 1830 bacteria and 129 archaea). The OCs were constructed by applying the quasi-clique-based clustering method to all possible protein coding genes in all complete genomes, based on their amino acid sequence similarities. It is computationally efficient to calculate OCs, which enables to regularly update the contents. KEGG OC has the following two features: (i) It consists of all complete genomes of a wide variety of organisms from three domains of life, and the number of organisms is the largest among the existing databases; and (ii) It is compatible with the KEGG database by sharing the same sets of genes and identifiers, which leads to seamless integration of OCs with useful components in KEGG such as biological pathways, pathway modules, functional hierarchy, diseases and drugs. The KEGG OC resources are accessible via OC Viewer that provides an interactive visualization of OCs at different taxonomic levels.",2012-11-27 +23193256,"ESTHER, the database of the α/β-hydrolase fold superfamily of proteins: tools to explore diversity of functions.","The ESTHER database, which is freely available via a web server (http://bioweb.ensam.inra.fr/esther) and is widely used, is dedicated to proteins with an α/β-hydrolase fold, and it currently contains >30 000 manually curated proteins. Herein, we report those substantial changes towards improvement that we have made to improve ESTHER during the past 8 years since our 2004 update. In particular, we generated 87 new families and increased the coverage of the UniProt Knowledgebase (UniProtKB). We also renewed the ESTHER website and added new visualization tools, such as the Overall Table and the Family Tree. We also address two topics of particular interest to the ESTHER users. First, we explain how the different enzyme classifications (bacterial lipases, peptidases, carboxylesterases) used by different communities of users are combined in ESTHER. Second, we discuss how variations of core architecture or in predicted active site residues result in a more precise clustering of families, and whether this strategy provides trustable hints to identify enzyme-like proteins with no catalytic activity.",2012-11-27 +23193262,Genomicus: five genome browsers for comparative genomics in eukaryota.,"Genomicus (http://www.dyogen.ens.fr/genomicus/) is a database and an online tool that allows easy comparative genomic visualization in >150 eukaryote genomes. It provides a way to explore spatial information related to gene organization within and between genomes and temporal relationships related to gene and genome evolution. For the specific vertebrate phylum, it also provides access to ancestral gene order reconstructions and conserved non-coding elements information. We extended the Genomicus database originally dedicated to vertebrate to four new clades, including plants, non-vertebrate metazoa, protists and fungi. This visualization tool allows evolutionary phylogenomics analysis and exploration. Here, we describe the graphical modules of Genomicus and show how it is capable of revealing differential gene loss and gain, segmental or genome duplications and study the evolution of a locus through homology relationships.",2012-11-27 +25364211,Gene Selection with Sequential Classification and Regression Tree Algorithm.,"

Background

In the typical setting of gene-selection problems from high-dimensional data, e.g., gene expression data from microarray or next-generation sequencing-based technologies, an enormous volume of high-throughput data is generated, and there is often a need for a simple, computationally-inexpensive, non-parametric screening procedure than can quickly and accurately find a low-dimensional variable subset that preserves biological information from the original very high-dimensional data (dimension p > 40,000). This is in contrast to the very sophisticated variable selection methods that are computationally expensive, need pre-processing routines, and often require calibration of priors.

Results

We present a tree-based sequential CART (S-CART) approach to variable selection in the binary classification setting and compare it against the more sophisticated procedures using simulated and real biological data. In simulated data, we analyze S-CART performance versus (i) a random forest (RF), (ii) a fully-parametric Bayesian stochastic search variable selection (SSVS), and (iii) the moderated t-test statistic from the LIMMA package in R. The simulation study is based on a hierarchical Bayesian model, where dataset dimensionality, percentage of significant variables, and substructure via dependency vary. Selection efficacy is measured through false-discovery and missed-discovery rates. In all scenarios, the S-CART method is seen to consistently outperform SSVS and RF in both speed and detection accuracy. We demonstrate the utility of the S-CART technique both on simulated data and in a control-treatment mouse study. We show that the network analysis based on the S-CART-selected gene subset in essence recapitulates the biological findings of the study using only a fraction of the original set of genes considered in the study's analysis.

Conclusions

The relatively simple-minded gene selection algorithms like S-CART may often in practical circumstances be preferred over much more sophisticated ones. The advantage of the ""greedy"" selection methods utilized by S-CART and the likes is that they scale well with the problem size and require virtually no tuning or training while remaining efficient in extracting the relevant information from microarray-like datasets containing large number of redundant or irrelevant variables.

Availability

The MATLAB 7.4b code for the S-CART implementation is available for download from https://neyman.mcg.edu/posts/scart.zip.",2011-08-01 +22638573,DichroMatch: a website for similarity searching of circular dichroism spectra.,"Circular dichroism (CD) spectroscopy is a widely used method for examining the structure, folding and conformational changes of proteins. A new online CD analysis server (DichroMatch) has been developed for identifying proteins with similar spectral characteristics by detecting possible structurally and functionally related proteins and homologues. DichroMatch includes six different methods for determining the spectral nearest neighbours to a query protein spectrum and provides metrics of how similar these spectra are and, if corresponding crystal structures are available for the closest matched proteins, information on their secondary structures and fold classifications. By default, DichroMatch uses all the entries in the Protein Circular Dichroism Data Bank (PCDDB) for its comparison set, providing the broadest range of publicly available protein spectra to match with the unknown protein. Alternatively, users can download or create their own specialized data sets, thereby enabling comparisons between the structures of related proteins such as wild-type versus mutants or homologues or a series of spectra of the same protein under different conditions. The DichroMatch server is freely available at http://dichromatch.cryst.bbk.ac.uk.",2012-05-25 +23185043,DoBISCUIT: a database of secondary metabolite biosynthetic gene clusters.,"This article introduces DoBISCUIT (Database of BIoSynthesis clusters CUrated and InTegrated, http://www.bio.nite.go.jp/pks/), a literature-based, manually curated database of gene clusters for secondary metabolite biosynthesis. Bacterial secondary metabolites often show pharmacologically important activities and can serve as lead compounds and/or candidates for drug development. Biosynthesis of each secondary metabolite is catalyzed by a number of enzymes, usually encoded by a gene cluster. Although many scientific papers describe such gene clusters, the gene information is not always described in a comprehensive manner and the related information is rarely integrated. DoBISCUIT integrates the latest literature information and provides standardized gene/module/domain descriptions related to the gene clusters.",2012-11-26 +24605673,[Genotyping of parvovirus B19 isolates circulating in Northwestern Federal District of Russia].,"

Aim

Genotyping and phylogenetic analysis of parvovirus B19 isolates isolated on the territories of Northwestern Federal District (NWFD) of Russia.

Materials and methods

61 blood sera and 30 oropharyngeal lavages obtained from patients with maculopapular rash from various territories of NWFD were studied for the presence of parvovirus B 19 DNA (PVB 19). DNA isolation and amplification was carried out by standard techniques. DNA segment including fragment of non-structural gene NS1 and region of structural gene VPI (NS1-VPlu, 994 nucleotides) was sequenced, original sequences ofoligonucleotide primers were selected for this purpose. Phylogenetic analysis was carried out online on the website http://www.phylogeny.fr. Data for tree construction was obtained from GenBank.

Results

PVB 19 DNAwas detected in 45% ofsamples. PVB19 genome segment was sequenced in 8 samples. All the PVB 19 isolates belong to a single cluster of 1A genotype. Isolate 57.12 from Komi Republic is similar to ISR-G strain isolated from Israel.

Conclusion

Phylogenetic analysis showed a high degree of genetic similarity between PVB 19 isolates circulating on the territories of NWFD, their membership in the most widespread genotype in the world. Local and import cases ofparvovirus infection (PVI) were identified. The authors make a conclusion on the necessity to include PVI into the system of rubella and measles control.",2013-11-01 +22798331,From trajectories to averages: an improved description of the heterogeneity of substitution rates along lineages.,"The accuracy and precision of species divergence date estimation from molecular data strongly depend on the models describing the variation of substitution rates along a phylogeny. These models generally assume that rates randomly fluctuate along branches from one node to the next. However, for mathematical convenience, the stochasticity of such a process is ignored when translating these rate trajectories into branch lengths. This study addresses this shortcoming. A new approach is described that explicitly considers the average substitution rates along branches as random quantities, resulting in a more realistic description of the variations of evolutionary rates along lineages. The proposed method provides more precise estimates of the rate autocorrelation parameter as well as divergence times. Also, simulation results indicate that ignoring the stochastic variation of rates along edges can lead to significant overestimation of specific node ages. Altogether, the new approach introduced in this study is a step forward to designing biologically relevant models of rate evolution that are well suited to data sets with dense taxon sampling which are likely to present rate autocorrelation. The computer programme PhyTime, part of the PhyML package and implementing the new approach, is available from http://code.google.com/p/phyml (last accessed 1 August 2012).",2012-07-12 +24736875,NTP Monograph: Developmental Effects and Pregnancy Outcomes Associated With Cancer Chemotherapy Use During Pregnancy.,"The National Toxicology Program (NTP) Office of Health Assessment and Translation (OHAT) conducted an evaluation of the developmental effects and pregnancy outcomes associated with cancer chemotherapy use during pregnancy in humans. The final NTP monograph was completed in May 2013 (available at http:// ntp.niehs.nih.gov/go/36495). The incidence of cancer during pregnancy has been reported to occur from 17 to 100 per 100,000 pregnant women. Chemotherapy is a common treatment for cancer; however, most chemotherapy agents are classified as known or suspected human teratogens. Cancer chemotherapy use during pregnancy was selected for evaluation by the NTP because of the: (1) paucity of comprehensive reviews on the pregnancy outcomes following cancer chemotherapy use during pregnancy in humans, including the integration of the developmental animal toxicology literature with the observational studies in humans, and (2) growing public interest in the developmental effects of chemotherapy on offspring exposed to cancer chemotherapy during gestation due to the expected incidence of cancer diagnosed during pregnancy as women delay pregnancy to later ages. Of the approximately 110 cancer chemotherapeutic agents currently in use, the NTP monograph includes data on 56 agents used during 1,261 pregnancies for which pregnancy outcomes were documented. Overall, the NTP evaluation found that treatment with chemotherapy for cancer appeared to be associated with: (1) a higher rate of major malformations following exposure during the first trimester compared to exposure in the second and/or third trimester; (2) an increase the rate of stillbirth following exposure in the second and/ or third trimester; abnormally low levels of amniotic fluid (primarily attributable to Trastuzumab); and (3), also data are insufficient, impaired fetal growth and myelosuppression. Treatment with chemotherapy for cancer during pregnancy did not appear to increase spontaneous preterm birth, or impair normal growth and development of offspring during early life. In addition, the NTP monograph provides background materials on individual cancer chemotherapeutic agents (e.g., evidence for placenta and breast milk transport, developmental toxicity in animals), and a brief review of the prevalence and prognosis of seven frequently diagnosed cancers in women during pregnancy. Finally, the NTP monograph identifies challenges in interpreting the health outcomes from this observational literature base and discussed possible actions to improve the understanding of the developmental effects of chemotherapy treatment for cancer administered during pregnancy.",2013-05-01 +24367574,Switch of sensitivity dynamics revealed with DyGloSA toolbox for dynamical global sensitivity analysis as an early warning for system's critical transition.,"Systems with bifurcations may experience abrupt irreversible and often unwanted shifts in their performance, called critical transitions. For many systems like climate, economy, ecosystems it is highly desirable to identify indicators serving as early warnings of such regime shifts. Several statistical measures were recently proposed as early warnings of critical transitions including increased variance, autocorrelation and skewness of experimental or model-generated data. The lack of automatized tool for model-based prediction of critical transitions led to designing DyGloSA - a MATLAB toolbox for dynamical global parameter sensitivity analysis (GPSA) of ordinary differential equations models. We suggest that the switch in dynamics of parameter sensitivities revealed by our toolbox is an early warning that a system is approaching a critical transition. We illustrate the efficiency of our toolbox by analyzing several models with bifurcations and predicting the time periods when systems can still avoid going to a critical transition by manipulating certain parameter values, which is not detectable with the existing SA techniques. DyGloSA is based on the SBToolbox2 and contains functions, which compute dynamically the global sensitivity indices of the system by applying four main GPSA methods: eFAST, Sobol's ANOVA, PRCC and WALS. It includes parallelized versions of the functions enabling significant reduction of the computational time (up to 12 times). DyGloSA is freely available as a set of MATLAB scripts at http://bio.uni.lu/systems_biology/software/dyglosa. It requires installation of MATLAB (versions R2008b or later) and the Systems Biology Toolbox2 available at www.sbtoolbox2.org. DyGloSA can be run on Windows and Linux systems, -32 and -64 bits.",2013-12-18 +21249219,ePlant and the 3D data display initiative: integrative systems biology on the world wide web.,"Visualization tools for biological data are often limited in their ability to interactively integrate data at multiple scales. These computational tools are also typically limited by two-dimensional displays and programmatic implementations that require separate configurations for each of the user's computing devices and recompilation for functional expansion. Towards overcoming these limitations we have developed ""ePlant"" (http://bar.utoronto.ca/eplant) - a suite of open-source world wide web-based tools for the visualization of large-scale data sets from the model organism Arabidopsis thaliana. These tools display data spanning multiple biological scales on interactive three-dimensional models. Currently, ePlant consists of the following modules: a sequence conservation explorer that includes homology relationships and single nucleotide polymorphism data, a protein structure model explorer, a molecular interaction network explorer, a gene product subcellular localization explorer, and a gene expression pattern explorer. The ePlant's protein structure explorer module represents experimentally determined and theoretical structures covering >70% of the Arabidopsis proteome. The ePlant framework is accessed entirely through a web browser, and is therefore platform-independent. It can be applied to any model organism. To facilitate the development of three-dimensional displays of biological data on the world wide web we have established the ""3D Data Display Initiative"" (http://3ddi.org).",2011-01-10 +22403539,Combining collation and annotation efforts toward completion of the rat and mouse connectomes in BAMS.,"Many different independently published neuroanatomical parcellation schemes (brain maps, nomenclatures, or atlases) can exist for a particular species, although one scheme (a standard scheme) is typically chosen for mapping neuroanatomical data in a particular study. This is problematic for building connection matrices (connectomes) because the terms used to name structures in different parcellation schemes differ widely and interrelationships are seldom defined. Therefore, data sets cannot be compared across studies that have been mapped on different neuroanatomical atlases without a reliable translation method. Because resliceable 3D brain models for relating systematically and topographically different parcellation schemes are still in the first phases of development, it is necessary to rely on qualitative comparisons between regions and tracts that are either inserted directly by neuroanatomists or trained annotators, or are extracted or inferred by collators from the available literature. To address these challenges, we developed a publicly available neuroinformatics system, the Brain Architecture Knowledge Management System (BAMS; http://brancusi.usc.edu/bkms). The structure and functionality of BAMS is briefly reviewed here, as an exemplar for constructing interrelated connectomes at different levels of the mammalian central nervous system organization. Next, the latest version of BAMS rat macroconnectome is presented because it is significantly more populated with the number of inserted connectivity reports exceeding a benchmark value (50,000), and because it is based on a different classification scheme. Finally, we discuss a general methodology and strategy for producing global connection matrices, starting with rigorous mapping of data, then inserting and annotating it, and ending with online generation of large-scale connection matrices.",2012-02-28 +22014236,An integrated workflow for robust alignment and simplified quantitative analysis of NMR spectrometry data.,"

Background

Nuclear magnetic resonance spectroscopy (NMR) is a powerful technique to reveal and compare quantitative metabolic profiles of biological tissues. However, chemical and physical sample variations make the analysis of the data challenging, and typically require the application of a number of preprocessing steps prior to data interpretation. For example, noise reduction, normalization, baseline correction, peak picking, spectrum alignment and statistical analysis are indispensable components in any NMR analysis pipeline.

Results

We introduce a novel suite of informatics tools for the quantitative analysis of NMR metabolomic profile data. The core of the processing cascade is a novel peak alignment algorithm, called hierarchical Cluster-based Peak Alignment (CluPA). The algorithm aligns a target spectrum to the reference spectrum in a top-down fashion by building a hierarchical cluster tree from peak lists of reference and target spectra and then dividing the spectra into smaller segments based on the most distant clusters of the tree. To reduce the computational time to estimate the spectral misalignment, the method makes use of Fast Fourier Transformation (FFT) cross-correlation. Since the method returns a high-quality alignment, we can propose a simple methodology to study the variability of the NMR spectra. For each aligned NMR data point the ratio of the between-group and within-group sum of squares (BW-ratio) is calculated to quantify the difference in variability between and within predefined groups of NMR spectra. This differential analysis is related to the calculation of the F-statistic or a one-way ANOVA, but without distributional assumptions. Statistical inference based on the BW-ratio is achieved by bootstrapping the null distribution from the experimental data.

Conclusions

The workflow performance was evaluated using a previously published dataset. Correlation maps, spectral and grey scale plots show clear improvements in comparison to other methods, and the down-to-earth quantitative analysis works well for the CluPA-aligned spectra. The whole workflow is embedded into a modular and statistically sound framework that is implemented as an R package called ""speaq"" (""spectrum alignment and quantitation""), which is freely available from http://code.google.com/p/speaq/.",2011-10-20 +21991315,The chemical information ontology: provenance and disambiguation for chemical data on the biological semantic web.,"Cheminformatics is the application of informatics techniques to solve chemical problems in silico. There are many areas in biology where cheminformatics plays an important role in computational research, including metabolism, proteomics, and systems biology. One critical aspect in the application of cheminformatics in these fields is the accurate exchange of data, which is increasingly accomplished through the use of ontologies. Ontologies are formal representations of objects and their properties using a logic-based ontology language. Many such ontologies are currently being developed to represent objects across all the domains of science. Ontologies enable the definition, classification, and support for querying objects in a particular domain, enabling intelligent computer applications to be built which support the work of scientists both within the domain of interest and across interrelated neighbouring domains. Modern chemical research relies on computational techniques to filter and organise data to maximise research productivity. The objects which are manipulated in these algorithms and procedures, as well as the algorithms and procedures themselves, enjoy a kind of virtual life within computers. We will call these information entities. Here, we describe our work in developing an ontology of chemical information entities, with a primary focus on data-driven research and the integration of calculated properties (descriptors) of chemical entities within a semantic web context. Our ontology distinguishes algorithmic, or procedural information from declarative, or factual information, and renders of particular importance the annotation of provenance to calculated data. The Chemical Information Ontology is being developed as an open collaborative project. More details, together with a downloadable OWL file, are available at http://code.google.com/p/semanticchemistry/ (license: CC-BY-SA).",2011-10-03 +23180794,TFClass: an expandable hierarchical classification of human transcription factors.,"TFClass (http://tfclass.bioinf.med.uni-goettingen.de/) provides a comprehensive classification of human transcription factors based on their DNA-binding domains. Transcription factors constitute a large functional family of proteins directly regulating the activity of genes. Most of them are sequence-specific DNA-binding proteins, thus reading out the information encoded in cis-regulatory DNA elements of promoters, enhancers and other regulatory regions of a genome. TFClass is a database that classifies human transcription factors by a six-level classification schema, four of which are abstractions according to different criteria, while the fifth level represents TF genes and the sixth individual gene products. Altogether, nine superclasses have been identified, comprising 40 classes and 111 families. Counted by genes, 1558 human TFs have been classified so far or >2900 different TFs when including their isoforms generated by alternative splicing or protein processing events. With this classification, we hope to provide a basis for deciphering protein-DNA recognition codes; moreover, it can be used for constructing expanded transcriptional networks by inferring additional TF-target gene relations.",2012-11-24 +23180785,PeroxiBase: a database for large-scale evolutionary analysis of peroxidases.,"The PeroxiBase (http://peroxibase.toulouse.inra.fr/) is a specialized database devoted to peroxidases' families, which are major actors of stress responses. In addition to the increasing number of sequences and the complete modification of the Web interface, new analysis tools and functionalities have been developed since the previous publication in the NAR database issue. Nucleotide sequences and graphical representation of the gene structure can now be included for entries containing genomic cross-references. An expert semi-automatic annotation strategy is being developed to generate new entries from genomic sequences and from EST libraries. Plus, new internal and automatic controls have been included to improve the quality of the entries. To compare gene structure organization among families' members, two new tools are available, CIWOG to detect common introns and GECA to visualize gene structure overlaid with sequence conservation. The multicriteria search tool was greatly improved to allow simple and combined queries. After such requests or a BLAST search, different analysis processes are suggested, such as multiple alignments with ClustalW or MAFFT, a platform for phylogenetic analysis and GECA's display in association with a phylogenetic tree. Finally, we updated our family specific profiles implemented in the PeroxiScan tool and made new profiles to consider new sub-families.",2012-11-24 +23180797,"Quorumpeps database: chemical space, microbial origin and functionality of quorum sensing peptides.","Quorum-sensing (QS) peptides are biologically attractive molecules, with a wide diversity of structures and prone to modifications altering or presenting new functionalities. Therefore, the Quorumpeps database (http://quorumpeps.ugent.be) is developed to give a structured overview of the QS oligopeptides, describing their microbial origin (species), functionality (method, result and receptor), peptide links and chemical characteristics (3D-structure-derived physicochemical properties). The chemical diversity observed within this group of QS signalling molecules can be used to develop new synthetic bio-active compounds.",2012-11-24 +23180792,PIECE: a database for plant gene structure comparison and evolution.,"Gene families often show degrees of differences in terms of exon-intron structures depending on their distinct evolutionary histories. Comparative analysis of gene structures is important for understanding their evolutionary and functional relationships within plant species. Here, we present a comparative genomics database named PIECE (http://wheat.pw.usda.gov/piece) for Plant Intron and Exon Comparison and Evolution studies. The database contains all the annotated genes extracted from 25 sequenced plant genomes. These genes were classified based on Pfam motifs. Phylogenetic trees were pre-constructed for each gene category. PIECE provides a user-friendly interface for different types of searches and a graphical viewer for displaying a gene structure pattern diagram linked to the resulting bootstrapped dendrogram for each gene family. The gene structure evolution of orthologous gene groups was determined using the GLOOME, Exalign and GECA software programs that can be accessed within the database. PIECE also provides a web server version of the software, GSDraw, for drawing schematic diagrams of gene structures. PIECE is a powerful tool for comparing gene sequences and provides valuable insights into the evolution of gene structure in plant genomes.",2012-11-24 +23445519,MixSIH: a mixture model for single individual haplotyping.,"

Background

Haplotype information is useful for various genetic analyses, including genome-wide association studies. Determining haplotypes experimentally is difficult and there are several computational approaches that infer haplotypes from genomic data. Among such approaches, single individual haplotyping or haplotype assembly, which infers two haplotypes of an individual from aligned sequence fragments, has been attracting considerable attention. To avoid incorrect results in downstream analyses, it is important not only to assemble haplotypes as long as possible but also to provide means to extract highly reliable haplotype regions. Although there are several efficient algorithms for solving haplotype assembly, there are no efficient method that allow for extracting the regions assembled with high confidence.

Results

We develop a probabilistic model, called MixSIH, for solving the haplotype assembly problem. The model has two mixture components representing two haplotypes. Based on the optimized model, a quality score is defined, which we call the 'minimum connectivity' (MC) score, for each segment in the haplotype assembly. Because existing accuracy measures for haplotype assembly are designed to compare the efficiency between the algorithms and are not suitable for evaluating the quality of the set of partially assembled haplotype segments, we develop an accuracy measure based on the pairwise consistency and evaluate the accuracy on the simulation and real data. By using the MC scores, our algorithm can extract highly accurate haplotype segments. We also show evidence that an existing experimental dataset contains chimeric read fragments derived from different haplotypes, which significantly degrade the quality of assembled haplotypes.

Conclusions

We develop a novel method for solving the haplotype assembly problem. We also define the quality score which is based on our model and indicates the accuracy of the haplotypes segments. In our evaluation, MixSIH has successfully extracted reliable haplotype segments. The C++ source code of MixSIH is available at https://sites.google.com/site/hmatsu1226/software/mixsih.",2013-02-15 +23189075,"Insights into the Emergent Bacterial Pathogen Cronobacter spp., Generated by Multilocus Sequence Typing and Analysis.","Cronobacter spp. (previously known as Enterobacter sakazakii) is a bacterial pathogen affecting all age groups, with particularly severe clinical complications in neonates and infants. One recognized route of infection being the consumption of contaminated infant formula. As a recently recognized bacterial pathogen of considerable importance and regulatory control, appropriate detection, and identification schemes are required. The application of multilocus sequence typing (MLST) and analysis (MLSA) of the seven alleles atpD, fusA, glnS, gltB, gyrB, infB, and ppsA (concatenated length 3036 base pairs) has led to considerable advances in our understanding of the genus. This approach is supported by both the reliability of DNA sequencing over subjective phenotyping and the establishment of a MLST database which has open access and is also curated; http://www.pubMLST.org/cronobacter. MLST has been used to describe the diversity of the newly recognized genus, instrumental in the formal recognition of new Cronobacter species (C. universalis and C. condimenti) and revealed the high clonality of strains and the association of clonal complex 4 with neonatal meningitis cases. Clearly the MLST approach has considerable benefits over the use of non-DNA sequence based methods of analysis for newly emergent bacterial pathogens. The application of MLST and MLSA has dramatically enabled us to better understand this opportunistic bacterium which can cause irreparable damage to a newborn baby's brain, and has contributed to improved control measures to protect neonatal health.",2012-11-22 +22782551,Efficient sampling for Bayesian inference of conjunctive Bayesian networks.,"

Motivation

Cancer development is driven by the accumulation of advantageous mutations and subsequent clonal expansion of cells harbouring these mutations, but the order in which mutations occur remains poorly understood. Advances in genome sequencing and the soon-arriving flood of cancer genome data produced by large cancer sequencing consortia hold the promise to elucidate cancer progression. However, new computational methods are needed to analyse these large datasets.

Results

We present a Bayesian inference scheme for Conjunctive Bayesian Networks, a probabilistic graphical model in which mutations accumulate according to partial order constraints and cancer genotypes are observed subject to measurement noise. We develop an efficient MCMC sampling scheme specifically designed to overcome local optima induced by dependency structures. We demonstrate the performance advantage of our sampler over traditional approaches on simulated data and show the advantages of adopting a Bayesian perspective when reanalyzing cancer datasets and comparing our results to previous maximum-likelihood-based approaches.

Availability

An R package including the sampler and examples is available at http://www.cbg.ethz.ch/software/bayes-cbn.

Contacts

niko.beerenwinkel@bsse.ethz.ch.",2012-07-10 +22144874,Comparative microbial modules resource: generation and visualization of multi-species biclusters.,"The increasing abundance of large-scale, high-throughput datasets for many closely related organisms provides opportunities for comparative analysis via the simultaneous biclustering of datasets from multiple species. These analyses require a reformulation of how to organize multi-species datasets and visualize comparative genomics data analyses results. Recently, we developed a method, multi-species cMonkey, which integrates heterogeneous high-throughput datatypes from multiple species to identify conserved regulatory modules. Here we present an integrated data visualization system, built upon the Gaggle, enabling exploration of our method's results (available at http://meatwad.bio.nyu.edu/cmmr.html). The system can also be used to explore other comparative genomics datasets and outputs from other data analysis procedures - results from other multiple-species clustering programs or from independent clustering of different single-species datasets. We provide an example use of our system for two bacteria, Escherichia coli and Salmonella Typhimurium. We illustrate the use of our system by exploring conserved biclusters involved in nitrogen metabolism, uncovering a putative function for yjjI, a currently uncharacterized gene that we predict to be involved in nitrogen assimilation.",2011-12-01 +24109555,iSNO-AAPair: incorporating amino acid pairwise coupling into PseAAC for predicting cysteine S-nitrosylation sites in proteins.,"As one of the most important and universal posttranslational modifications (PTMs) of proteins, S-nitrosylation (SNO) plays crucial roles in a variety of biological processes, including the regulation of cellular dynamics and many signaling events. Knowledge of SNO sites in proteins is very useful for drug development and basic research as well. Unfortunately, it is both time-consuming and costly to determine the SNO sites purely based on biological experiments. Facing the explosive protein sequence data generated in the post-genomic era, we are challenged to develop automated vehicles for timely and effectively determining the SNO sites for uncharacterized proteins. To address the challenge, a new predictor called iSNO-AAPair was developed by taking into account the coupling effects for all the pairs formed by the nearest residues and the pairs by the next nearest residues along protein chains. The cross-validation results on a state-of-the-art benchmark have shown that the new predictor outperformed the existing predictors. The same was true when tested by the independent proteins whose experimental SNO sites were known. A user-friendly web-server for iSNO-AAPair was established at http://app.aporc.org/iSNO-AAPair/, by which users can easily obtain their desired results without the need to follow the mathematical equations involved during its development.",2013-10-03 +24204795,PathogenFinder--distinguishing friend from foe using bacterial whole genome sequence data.,"Although the majority of bacteria are harmless or even beneficial to their host, others are highly virulent and can cause serious diseases, and even death. Due to the constantly decreasing cost of high-throughput sequencing there are now many completely sequenced genomes available from both human pathogenic and innocuous strains. The data can be used to identify gene families that correlate with pathogenicity and to develop tools to predict the pathogenicity of newly sequenced strains, investigations that previously were mainly done by means of more expensive and time consuming experimental approaches. We describe PathogenFinder (http://cge.cbs.dtu.dk/services/PathogenFinder/), a web-server for the prediction of bacterial pathogenicity by analysing the input proteome, genome, or raw reads provided by the user. The method relies on groups of proteins, created without regard to their annotated function or known involvement in pathogenicity. The method has been built to work with all taxonomic groups of bacteria and using the entire training-set, achieved an accuracy of 88.6% on an independent test-set, by correctly classifying 398 out of 449 completely sequenced bacteria. The approach here proposed is not biased on sets of genes known to be associated with pathogenicity, thus the approach could aid the discovery of novel pathogenicity factors. Furthermore the pathogenicity prediction web-server could be used to isolate the potential pathogenic features of both known and unknown strains.",2013-10-28 +23754850,FIDEA: a server for the functional interpretation of differential expression analysis.,"The results of differential expression analyses provide scientists with hundreds to thousands of differentially expressed genes that need to be interpreted in light of the biology of the specific system under study. This requires mapping the genes to functional classifications that can be, for example, the KEGG pathways or InterPro families they belong to, their GO Molecular Function, Biological Process or Cellular Component. A statistically significant overrepresentation of one or more category terms in the set of differentially expressed genes is an essential step for the interpretation of the biological significance of the results. Ideally, the analysis should be performed by scientists who are well acquainted with the biological problem, as they have a wealth of knowledge about the system and can, more easily than a bioinformatician, discover less obvious and, therefore, more interesting relationships. To allow experimentalists to explore their data in an easy and at the same time exhaustive fashion within a single tool and to test their hypothesis quickly and effortlessly, we developed FIDEA. The FIDEA server is located at http://www.biocomputing.it/fidea; it is free and open to all users, and there is no login requirement.",2013-06-10 +23175614,LncRNADisease: a database for long-non-coding RNA-associated diseases.,"In this article, we describe a long-non-coding RNA (lncRNA) and disease association database (LncRNADisease), which is publicly accessible at http://cmbi.bjmu.edu.cn/lncrnadisease. In recent years, a large number of lncRNAs have been identified and increasing evidence shows that lncRNAs play critical roles in various biological processes. Therefore, the dysfunctions of lncRNAs are associated with a wide range of diseases. It thus becomes important to understand lncRNAs' roles in diseases and to identify candidate lncRNAs for disease diagnosis, treatment and prognosis. For this purpose, a high-quality lncRNA-disease association database would be extremely beneficial. Here, we describe the LncRNADisease database that collected and curated approximately 480 entries of experimentally supported lncRNA-disease associations, including 166 diseases. LncRNADisease also curated 478 entries of lncRNA interacting partners at various molecular levels, including protein, RNA, miRNA and DNA. Moreover, we annotated lncRNA-disease associations with genomic information, sequences, references and species. We normalized the disease name and the type of lncRNA dysfunction and provided a detailed description for each entry. Finally, we developed a bioinformatic method to predict novel lncRNA-disease associations and integrated the method and the predicted associated diseases of 1564 human lncRNAs into the database.",2012-11-21 +24604333,Female users of internet-based screening for rectal STIs: descriptive statistics and correlates of positivity.,"

Background

Internet-based screening for vaginal sexually transmitted infections (STI) has been shown to reach high-risk populations. Published studies of internet-based screening for rectal STIs in women are needed. Our objectives were to describe the female users of a rectal internet-based screening intervention and assess what factors correlated with rectal positivity for STIs.

Methods

The website http://www.iwantthekit.org offers free STI testing via home self-sampling kits. Women could order vaginal and rectal kits, both containing questionnaires. Rectal and vaginal swabs were tested for Chlamydia trachomatis, Neisseria gonorrhoeae and Trichomonas vaginalis using nucleic acid amplification tests. Data were analysed from 205 rectal kits from January 2009 through February 2011. Self-reported characteristics of participants were examined, and correlates of rectal STI positivity were analysed.

Results

Of the 205 rectal samples returned and eligible for testing, 38 (18.5%) were positive for at least one STI. The women were young (mean age 25.8 years), mostly African-American (50.0%), and only 14.0% always used condoms. After adjusting for age and race, Black race (AOR=3.06) and vaginal STI positivity (AOR=40.6) were significantly correlated with rectal STI positivity. Of women testing positive for rectal STIs who also submitted vaginal swabs, 29.4% were negative in the vaginal sample.

Conclusions

Internet-based rectal screening can reach populations that appear to be at high risk for rectal STIs (18.5% prevalence) and led to the diagnosis of STIs in women who would not have been diagnosed vaginally. Black race and vaginal STI positivity were highly correlated with rectal STI positivity.",2014-03-06 +23178820,MicrobPad MD: microbial pathogen diagnostic methods database.,"Medical pathogens induce infections, illnesses and sometimes serious medical conditions in the infected hosts. Diagnosis of these pathogens is important for proper treatment and investigation of pathogenesis processes. Molecular techniques have been developed for facilitating accurate, sensitive and low-cost diagnosis of these pathogens. Based on these techniques, diagnostic devices have been developed for a number of pathogens. More devices are needed for comprehensive coverage of medical pathogens. To facilitate the development of these devices, a database with integrated information about diagnostic methods, targets, and primers/probes for the known bacterial, fungal and viral pathogens is needed. We developed the microbial pathogen diagnostic methods database MicrobPad MD (http://bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp or http://pha-bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp) to provide comprehensive information about the molecular diagnostic techniques, targets, primers/probes, detection procedures and conditions, and tested diagnostic accuracies and limit of diagnosis for 314 bacterial, fungal and viral species from 61 genera. While available, additional information such as pathogen strains and hosts, tissue distribution or habitats, cultivation methods, biochemical characteristics, virulence factors, morphology, diseases, symptoms, treatment and prevention methods are provided. Our Database covers 242 gene targets, 700 primers/probes, 340 virulence factors, and 261 diseases. Cross-links to the NCBI genome and SwissProt/UniProt databases are provided.",2012-11-21 +23410089,An ethics curriculum for short-term global health trainees.,"

Background

Interest in short-term global health training and service programs continues to grow, yet they can be associated with a variety of ethical issues for which trainees or others with limited global health experience may not be prepared to address. Therefore, there is a clear need for educational interventions concerning these ethical issues.

Methods

We developed and evaluated an introductory curriculum, ""Ethical Challenges in Short-term Global Health Training."" The curriculum was developed through solicitation of actual ethical issues experienced by trainees and program leaders; content drafting; and external content review. It was then evaluated from November 1, 2011, through July 1, 2012, by analyzing web usage data and by conducting user surveys. The survey included basic demographic data; prior experience in global health and global health ethics; and assessment of cases within the curriculum.

Results

The ten case curriculum is freely available at http://ethicsandglobalhealth.org. An average of 238 unique visitors accessed the site each month (standard deviation, 19). Of users who had been abroad before for global health training or service, only 31% reported prior ethics training related to short-term work. Most users (62%) reported accessing the site via personal referral or their training program; however, a significant number (28%) reported finding the site via web search, and 8% discovered it via web links. Users represented different fields: medicine (46%), public health (15%), and nursing (11%) were most common. All cases in the curriculum were evaluated favorably.

Conclusions

The curriculum is meeting a critical need for an introduction to the ethical issues in short-term global health training. Future work will integrate this curriculum within more comprehensive curricula for global health and evaluate specific knowledge and behavioral effects, including at training sites abroad.",2013-02-14 +24040122,Transimulation - protein biosynthesis web service.,"Although translation is the key step during gene expression, it remains poorly characterized at the level of individual genes. For this reason, we developed Transimulation - a web service measuring translational activity of genes in three model organisms: Escherichia coli, Saccharomyces cerevisiae and Homo sapiens. The calculations are based on our previous computational model of translation and experimental data sets. Transimulation quantifies mean translation initiation and elongation time (expressed in SI units), and the number of proteins produced per transcript. It also approximates the number of ribosomes that typically occupy a transcript during translation, and simulates their propagation. The simulation of ribosomes' movement is interactive and allows modifying the coding sequence on the fly. It also enables uploading any coding sequence and simulating its translation in one of three model organisms. In such a case, ribosomes propagate according to mean codon elongation times of the host organism, which may prove useful for heterologous expression. Transimulation was used to examine evolutionary conservation of translational parameters of orthologous genes. Transimulation may be accessed at http://nexus.ibb.waw.pl/Transimulation (requires Java version 1.7 or higher). Its manual and source code, distributed under the GPL-2.0 license, is freely available at the website.",2013-09-05 +24330655,BioSuper: a web tool for the superimposition of biomolecules and assemblies with rotational symmetry.,"

Background

Most of the proteins in the Protein Data Bank (PDB) are oligomeric complexes consisting of two or more subunits that associate by rotational or helical symmetries. Despite the myriad of superimposition tools in the literature, we could not find any able to account for rotational symmetry and display the graphical results in the web browser.

Results

BioSuper is a free web server that superimposes and calculates the root mean square deviation (RMSD) of protein complexes displaying rotational symmetry. To the best of our knowledge, BioSuper is the first tool of its kind that provides immediate interactive visualization of the graphical results in the browser, biomolecule generator capabilities, different levels of atom selection, sequence-dependent and structure-based superimposition types, and is the only web tool that takes into account the equivalence of atoms in side chains displaying symmetry ambiguity. BioSuper uses ICM program functionality as a core for the superimpositions and displays the results as text, HTML tables and 3D interactive molecular objects that can be visualized in the browser or in Android and iOS platforms with a free plugin.

Conclusions

BioSuper is a fast and functional tool that allows for pairwise superimposition of proteins and assemblies displaying rotational symmetry. The web server was created after our own frustration when attempting to superimpose flexible oligomers. We strongly believe that its user-friendly and functional design will be of great interest for structural and computational biologists who need to superimpose oligomeric proteins (or any protein). BioSuper web server is freely available to all users at http://ablab.ucsd.edu/BioSuper.",2013-12-13 +23661695,iFUSE: integrated fusion gene explorer.,"

Unlabelled

We present iFUSE (integrated fusion gene explorer), an online visualization tool that provides a fast and informative view of structural variation data and prioritizes those breaks likely representing fusion genes. This application uses calculated break points to determine fusion genes based on the latest annotation for genomic sequence information, and where relevant the structural variation (SV) events are annotated with predicted RNA and protein sequences. iFUSE takes as input a Complete Genomics (CG) junction file, a FusionMap fusion detection report file or a file already analysed and annotated by the iFUSE application on a previous occasion.

Results

We demonstrate the use of iFUSE with case studies from tumour-normal SV detection derived from Complete Genomics whole-genome sequencing results.

Availability

iFUSE is available as a web service at http://ifuse.erasmusmc.nl.",2013-05-09 +23658420,Twine: display and analysis of cis-regulatory modules.,"

Unlabelled

Many algorithms analyze enhancers for overrepresentation of known and novel motifs, with the goal of identifying binding sites for direct regulators of gene expression. Twine is a Java GUI with multiple graphical representations ('Views') of enhancer alignments that displays motifs, as IUPAC consensus sequences or position frequency matrices, in the context of phylogenetic conservation to facilitate cis-regulatory element discovery. Thresholds of phylogenetic conservation and motif stringency can be altered dynamically to facilitate detailed analysis of enhancer architecture. Views can be exported to vector graphics programs to generate high-quality figures for publication. Twine can be extended via Java plugins to manipulate alignments and analyze sequences.

Availability

Twine is freely available as a compiled Java .jar package or Java source code at http://labs.bio.unc.edu/crews/twine/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-08 +24167157,Fast protein fragment similarity scoring using a Binet-Cauchy kernel.,"

Motivation

Meaningful scores to assess protein structure similarity are essential to decipher protein structure and sequence evolution. The mining of the increasing number of protein structures requires fast and accurate similarity measures with statistical significance. Whereas numerous approaches have been proposed for protein domains as a whole, the focus is progressively moving to a more local level of structure analysis for which similarity measurement still remains without any satisfactory answer.

Results

We introduce a new score based on Binet-Cauchy kernel. It is normalized and bounded between 1-maximal similarity that implies exactly the same conformations for protein fragments-and -1-mirror image conformations, the unrelated conformations having a null mean score. This allows for the search of both similar and mirror conformations. In addition, such score addresses two major issue of the widely used root mean square deviation (RMSD). First, it achieves length independent statistics even for short fragments. Second, it shows better performance in the discrimination of medium range RMSD values. Being simpler and faster to compute than the RMSD, it also provides the means for large-scale mining of protein structures.

Availability and implementation

The computer software implementing the score is available at http://bioserv.rpbs.univ-paris-diderot.fr/BCscore/

Contact

frederic.guyon@univ-paris-diderot.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-10-27 +23060611,Network-based inference from complex proteomic mixtures using SNIPE.,"

Motivation

Proteomics presents the opportunity to provide novel insights about the global biochemical state of a tissue. However, a significant problem with current methods is that shotgun proteomics has limited success at detecting many low abundance proteins, such as transcription factors from complex mixtures of cells and tissues. The ability to assay for these proteins in the context of the entire proteome would be useful in many areas of experimental biology.

Results

We used network-based inference in an approach named SNIPE (Software for Network Inference of Proteomics Experiments) that selectively highlights proteins that are more likely to be active but are otherwise undetectable in a shotgun proteomic sample. SNIPE integrates spectral counts from paired case-control samples over a network neighbourhood and assesses the statistical likelihood of enrichment by a permutation test. As an initial application, SNIPE was able to select several proteins required for early murine tooth development. Multiple lines of additional experimental evidence confirm that SNIPE can uncover previously unreported transcription factors in this system. We conclude that SNIPE can enhance the utility of shotgun proteomics data to facilitate the study of poorly detected proteins in complex mixtures.

Availability and implementation

An implementation for the R statistical computing environment named snipeR has been made freely available at http://genetics.bwh.harvard.edu/snipe/.

Contact

ssunyaev@rics.bwh.harvard.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-11 +21244646,easyDAS: automatic creation of DAS servers.,"

Background

The Distributed Annotation System (DAS) has proven to be a successful way to publish and share biological data. Although there are more than 750 active registered servers from around 50 organizations, setting up a DAS server comprises a fair amount of work, making it difficult for many research groups to share their biological annotations. Given the clear advantage that the generalized sharing of relevant biological data is for the research community it would be desirable to facilitate the sharing process.

Results

Here we present easyDAS, a web-based system enabling anyone to publish biological annotations with just some clicks. The system, available at http://www.ebi.ac.uk/panda-srv/easydas is capable of reading different standard data file formats, process the data and create a new publicly available DAS source in a completely automated way. The created sources are hosted on the EBI systems and can take advantage of its high storage capacity and network connection, freeing the data provider from any network management work. easyDAS is an open source project under the GNU LGPL license.

Conclusions

easyDAS is an automated DAS source creation system which can help many researchers in sharing their biological data, potentially increasing the amount of relevant biological data available to the scientific community.",2011-01-18 +23846743,pyRMSD: a Python package for efficient pairwise RMSD matrix calculation and handling.,"

Summary

We introduce pyRMSD, an open source standalone Python package that aims at offering an integrative and efficient way of performing Root Mean Square Deviation (RMSD)-related calculations of large sets of structures. It is specially tuned to do fast collective RMSD calculations, as pairwise RMSD matrices, implementing up to three well-known superposition algorithms. pyRMSD provides its own symmetric distance matrix class that, besides the fact that it can be used as a regular matrix, helps to save memory and increases memory access speed. This last feature can dramatically improve the overall performance of any Python algorithm using it. In addition, its extensibility, testing suites and documentation make it a good choice to those in need of a workbench for developing or testing new algorithms.

Availability

The source code (under MIT license), installer, test suites and benchmarks can be found at https://pele.bsc.es/ under the tools section.

Contact

victor.guallar@bsc.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-10 +23172289,DictyBase 2013: integrating multiple Dictyostelid species.,"dictyBase (http://dictybase.org) is the model organism database for the social amoeba Dictyostelium discoideum. This contribution provides an update on dictyBase that has been previously presented. During the past 3 years, dictyBase has taken significant strides toward becoming a genome portal for the whole Amoebozoa clade. In its latest release, dictyBase has scaled up to host multiple Dictyostelids, including Dictyostelium purpureum [Sucgang, Kuo, Tian, Salerno, Parikh, Feasley, Dalin, Tu, Huang, Barry et al.(2011) (Comparative genomics of the social amoebae Dictyostelium discoideum and Dictyostelium purpureum. Genome Biol., 12, R20)], Dictyostelium fasciculatum and Polysphondylium pallidum [Heidel, Lawal, Felder, Schilde, Helps, Tunggal, Rivero, John, Schleicher, Eichinger et al. (2011) (Phylogeny-wide analysis of social amoeba genomes highlights ancient origins for complex intercellular communication. Genome Res., 21, 1882-1891)]. The new release includes a new Genome Browser with RNAseq expression, interspecies Basic Local Alignment Search Tool alignments and a unified Basic Local Alignment Search Tool search for cross-species comparisons.",2012-11-20 +23172287,PhosPhAt goes kinases--searchable protein kinase target information in the plant phosphorylation site database PhosPhAt.,"Reversible phosphorylation is a key mechanism for regulating protein function. Thus it is of high interest to know which kinase can phosphorylate which proteins. Comprehensive information about phosphorylation sites in Arabidopsis proteins is hosted within the PhosPhAt database (http://phosphat.mpimp-golm.mpg.de). However, our knowledge of the kinases that phosphorylate those sites is dispersed throughout the literature and very difficult to access, particularly for investigators seeking to interpret large scale and high-throughput experiments. Therefore, we aimed to compile information on kinase-substrate interactions and kinase-specific regulatory information and make this available via a new functionality embedded in PhosPhAt. Our approach involved systematic surveying of the literature for regulatory information on the members of the major kinase families in Arabidopsis thaliana, such as CDPKs, MPK(KK)s, AGC kinases and SnRKs, as well as individual kinases from other families. To date, we have researched more than 4450 kinase-related publications, which collectively contain information on about 289 kinases. Users can now query the PhosPhAt database not only for experimental and predicted phosphorylation sites of individual proteins, but also for known substrates for a given kinase or kinase family. Further developments include addition of new phosphorylation sites and visualization of clustered phosphorylation events, known as phosphorylation hotspots.",2012-11-20 +24336414,BEAMS: backbone extraction and merge strategy for the global many-to-many alignment of multiple PPI networks.,"

Motivation

Global many-to-many alignment of biological networks has been a central problem in comparative biological network studies. Given a set of biological interaction networks, the informal goal is to group together related nodes. For the case of protein-protein interaction networks, such groups are expected to form clusters of functionally orthologous proteins. Construction of such clusters for networks from different species may prove useful in determining evolutionary relationships, in predicting the functions of proteins with unknown functions and in verifying those with estimated functions.

Results

A central informal objective in constructing clusters of orthologous proteins is to guarantee that each cluster is composed of members with high homological similarity, usually determined via sequence similarities, and that the interactions of the proteins involved in the same cluster are conserved across the input networks. We provide a formal definition of the global many-to-many alignment of multiple protein-protein interaction networks that captures this informal objective. We show the computational intractability of the suggested definition. We provide a heuristic method based on backbone extraction and merge strategy (BEAMS) for the problem. We finally show, through experiments based on biological significance tests, that the proposed BEAMS algorithm performs better than the state-of-the-art approaches. Furthermore, the computational burden of the BEAMS algorithm in terms of execution speed and memory requirements is more reasonable than the competing algorithms.

Availability and implementation

Supplementary material including code implementations in LEDA C++, experimental data and the results are available at http://webprs.khas.edu.tr/~cesim/BEAMS.tar.gz.",2013-12-11 +23047740,Prioritising risk pathways of complex human diseases based on functional profiling.,"Analysis of the biological pathways involved in complex human diseases is an important step in elucidating the pathogenesis and mechanism of diseases. Most pathway analysis approaches identify disease-related biological pathways using overlapping genes between pathways and diseases. However, these approaches ignore the functional biological association between pathways and diseases. In this paper, we designed a novel computational framework for prioritising disease-risk pathways based on functional profiling. The disease gene set and biological pathways were translated into functional profiles in the context of GO annotations. We then implemented a semantic similarity measurement for calculating the concordance score between a functional profile of disease genes and a functional profile of pathways (FPP); the concordance score was then used to prioritise and infer disease-risk pathways. A freely accessible web toolkit, 'Functional Profiling-based Pathway Prioritisation' (FPPP), was developed (http://bioinfo.hrbmu.edu.cn/FPPP). During validation, our method successfully identified known disease-pathway pairs with area under the ROC curve (AUC) values of 96.73 and 95.02% in tests using both pathway randomisation and disease randomisation. A robustness analysis showed that FPPP is reliable even when using data containing noise. A case study based on a dilated cardiomyopathy data set indicated that the high-ranking pathways from FPPP are well known to be linked with this disease. Furthermore, we predicted the risk pathways of 413 diseases by using FPPP to build a disease similarity landscape that systematically reveals the global modular organisation of disease associations.",2012-10-10 +24564959,OMACC: an Optical-Map-Assisted Contig Connector for improving de novo genome assembly.,"

Background

Genome sequencing and assembly are essential for revealing the secrets of life hidden in genomes. Because of repeats in most genomes, current programs collate sequencing data into a set of assembled sequences, called contigs, instead of a complete genome. Toward completing a genome, optical mapping is powerful in rendering the relative order of contigs on the genome, which is called scaffolding. However, connecting the neighboring contigs with nucleotide sequences requires further efforts. Nagarajian et al. have recently proposed a software module, FINISH, to close the gaps between contigs with other contig sequences after scaffolding contigs using an optical map. The results, however, are not yet satisfying.

Results

To increase the accuracy of contig connections, we develop OMACC, which carefully takes into account length information in optical maps. Specifically, it rescales optical map and applies length constraint for selecting the correct contig sequences for gap closure. In addition, it uses an advanced graph search algorithm to facilitate estimating the number of repeat copies within gaps between contigs. On both simulated and real datasets, OMACC achieves a <10% false gap-closing rate, three times lower than the ~27% false rate by FINISH, while maintaining a similar sensitivity.

Conclusion

As optical mapping is becoming popular and repeats are the bottleneck of assembly, OMACC should benefit various downstream biological studies via accurately connecting contigs into a more complete genome.

Availability

http://140.116.235.124/~tliu/omacc.",2013-12-13 +24004986,MEGADOCK 3.0: a high-performance protein-protein interaction prediction software using hybrid parallel computing for petascale supercomputing environments.,"

Background

Protein-protein interaction (PPI) plays a core role in cellular functions. Massively parallel supercomputing systems have been actively developed over the past few years, which enable large-scale biological problems to be solved, such as PPI network prediction based on tertiary structures.

Results

We have developed a high throughput and ultra-fast PPI prediction system based on rigid docking, ""MEGADOCK"", by employing a hybrid parallelization (MPI/OpenMP) technique assuming usages on massively parallel supercomputing systems. MEGADOCK displays significantly faster processing speed in the rigid-body docking process that leads to full utilization of protein tertiary structural data for large-scale and network-level problems in systems biology. Moreover, the system was scalable as shown by measurements carried out on two supercomputing environments. We then conducted prediction of biological PPI networks using the post-docking analysis.

Conclusions

We present a new protein-protein docking engine aimed at exhaustive docking of mega-order numbers of protein pairs. The system was shown to be scalable by running on thousands of nodes. The software package is available at: http://www.bi.cs.titech.ac.jp/megadock/k/.",2013-09-03 +24560580,Identification and characterization of lysine-methylated sites on histones and non-histone proteins.,"Protein methylation is a kind of post-translational modification (PTM), and typically takes place on lysine and arginine amino acid residues. Protein methylation is involved in many important biological processes, and most recent studies focused on lysine methylation of histones due to its critical roles in regulating transcriptional repression and activation. Histones possess highly conserved sequences and are homologous in most species. However, there is much less sequence conservation among non-histone proteins. Therefore, mechanisms for identifying lysine-methylated sites may greatly differ between histones and non-histone proteins. Nevertheless, this point of view was not considered in previous studies. Here we constructed two support vector machine (SVM) models by using lysine-methylated data from histones and non-histone proteins for predictions of lysine-methylated sites. Numerous features, such as the amino acid composition (AAC) and accessible surface area (ASA), were used in the SVM models, and the predictive performance was evaluated using five-fold cross-validations. For histones, the predictive sensitivity was 85.62% and specificity was 80.32%. For non-histone proteins, the predictive sensitivity was 69.1% and specificity was 88.72%. Results showed that our model significantly improved the predictive accuracy of histones compared to previous approaches. In addition, features of the flanking region of lysine-methylated sites on histones and non-histone proteins were also characterized and are discussed. A gene ontology functional analysis of lysine-methylated proteins and correlations of lysine-methylated sites with other PTMs in histones were also analyzed in detail. Finally, a web server, MethyK, was constructed to identify lysine-methylated sites. MethK now is available at http://csb.cse.yzu.edu.tw/MethK/.",2014-01-24 +22086963,Ensembl 2012.,"The Ensembl project (http://www.ensembl.org) provides genome resources for chordate genomes with a particular focus on human genome data as well as data for key model organisms such as mouse, rat and zebrafish. Five additional species were added in the last year including gibbon (Nomascus leucogenys) and Tasmanian devil (Sarcophilus harrisii) bringing the total number of supported species to 61 as of Ensembl release 64 (September 2011). Of these, 55 species appear on the main Ensembl website and six species are provided on the Ensembl preview site (Pre!Ensembl; http://pre.ensembl.org) with preliminary support. The past year has also seen improvements across the project.",2011-11-15 +23161695,eProS--a database and toolbox for investigating protein sequence-structure-function relationships through energy profiles.,"Gaining information about structural and functional features of newly identified proteins is often a difficult task. This information is crucial for understanding sequence-structure-function relationships of target proteins and, thus, essential in comprehending the mechanisms and dynamics of the molecular systems of interest. Using protein energy profiles is a novel approach that can contribute in addressing such problems. An energy profile corresponds to the sequence of energy values that are derived from a coarse-grained energy model. Energy profiles can be computed from protein structures or predicted from sequences. As shown, correspondences and dissimilarities in energy profiles can be applied for investigations of protein mechanics and dynamics. We developed eProS (energy profile suite, freely available at http://bioservices.hs-mittweida.de/Epros/), a database that provides ∼76 000 pre-calculated energy profiles as well as a toolbox for addressing numerous problems of structure biology. Energy profiles can be browsed, visualized, calculated from an uploaded structure or predicted from sequence. Furthermore, it is possible to align energy profiles of interest or compare them with all entries in the eProS database to identify significantly similar energy profiles and, thus, possibly relevant structural and functional relationships. Additionally, annotations and cross-links from numerous sources provide a broad view of potential biological correspondences.",2012-11-17 +23161685,Atlas of genetics and cytogenetics in oncology and haematology in 2013.,"The Atlas of Genetics and Cytogenetics in Oncology and Haematology (http://AtlasGeneticsOncology.org) is a peer-reviewed internet journal/encyclopaedia/database focused on genes implicated in cancer, cytogenetics and clinical entities in cancer and cancer-prone hereditary diseases. The main goal of the Atlas is to provide review articles that describe complementary topics, namely, genes, genetic abnormalities, histopathology, clinical diagnoses and a large iconography. This description, which was historically based on karyotypic abnormalities and in situ hybridization (fluorescence in situ hybridization) techniques, now benefits from comparative genomic hybridization and massive sequencing, uncovering a tremendous amount of genetic rearrangements. As the Atlas combines different types of information (genes, genetic abnormalities, histopathology, clinical diagnoses and external links), its content is currently unique. The Atlas is a cognitive tool for fundamental and clinical research and has developed into an encyclopaedic work. In clinical practice, it contributes to the cytogenetic diagnosis and may guide treatment decision making, particularly regarding rare diseases (because they are numerous and are frequently encountered). Readers as well as the authors of the Atlas are researchers and/or clinicians.",2012-11-17 +23162052,NetCAD: a network analysis tool for coronary artery disease-associated PPI network.,"

Summary

The systematic and unbiased charting of protein-protein interaction (PPI) networks relevant to health or diseases has become an important and burgeoning challenge in systems biology. Further, current reports have supported that good correlation exists between the topological properties and biological function of protein nodes in networks. Coronary artery disease (CAD, also called coronary heart disease) is the most common type of heart disease worldwide. Traditional approaches of studying individual gene or protein have shown their weakness in such complex disease. Here, we provide NetCAD, a web-based tool for systematic investigation of CAD-specific proteins in human PPI network. The features of NetCAD includes the following: proposing a novel method combining biological principles and graph theory, quantified topological analysis tools, build-in PPI information database consolidated from major public databases, creating CAD-associated subnetwork and visualizing network graph with good visual effects. NetCAD may provide important biological information for uncovering the molecular mechanisms and potential targets for therapies of CAD, which could not be found merely through molecular biology methods.

Availability and implementation

NetCAD is freely available at: http://www.herbbol.org/netcad/.

Contact

zhliu@implad.ac.cn or zhliu.liulab@foxmail.com",2012-11-17 +23160414,Accelerating literature curation with text-mining tools: a case study of using PubTator to curate genes in PubMed abstracts.,"Today's biomedical research has become heavily dependent on access to the biological knowledge encoded in expert curated biological databases. As the volume of biological literature grows rapidly, it becomes increasingly difficult for biocurators to keep up with the literature because manual curation is an expensive and time-consuming endeavour. Past research has suggested that computer-assisted curation can improve efficiency, but few text-mining systems have been formally evaluated in this regard. Through participation in the interactive text-mining track of the BioCreative 2012 workshop, we developed PubTator, a PubMed-like system that assists with two specific human curation tasks: document triage and bioconcept annotation. On the basis of evaluation results from two external user groups, we find that the accuracy of PubTator-assisted curation is comparable with that of manual curation and that PubTator can significantly increase human curatorial speed. These encouraging findings warrant further investigation with a larger number of publications to be annotated. Database URL: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator/",2012-11-17 +24590441,Protein fold recognition using geometric kernel data fusion.,"

Motivation

Various approaches based on features extracted from protein sequences and often machine learning methods have been used in the prediction of protein folds. Finding an efficient technique for integrating these different protein features has received increasing attention. In particular, kernel methods are an interesting class of techniques for integrating heterogeneous data. Various methods have been proposed to fuse multiple kernels. Most techniques for multiple kernel learning focus on learning a convex linear combination of base kernels. In addition to the limitation of linear combinations, working with such approaches could cause a loss of potentially useful information.

Results

We design several techniques to combine kernel matrices by taking more involved, geometry inspired means of these matrices instead of convex linear combinations. We consider various sequence-based protein features including information extracted directly from position-specific scoring matrices and local sequence alignment. We evaluate our methods for classification on the SCOP PDB-40D benchmark dataset for protein fold recognition. The best overall accuracy on the protein fold recognition test set obtained by our methods is ∼ 86.7%. This is an improvement over the results of the best existing approach. Moreover, our computational model has been developed by incorporating the functional domain composition of proteins through a hybridization model. It is observed that by using our proposed hybridization model, the protein fold recognition accuracy is further improved to 89.30%. Furthermore, we investigate the performance of our approach on the protein remote homology detection problem by fusing multiple string kernels.

Availability and implementation

The MATLAB code used for our proposed geometric kernel fusion frameworks are publicly available at http://people.cs.kuleuven.be/∼raf.vandebril/homepage/software/geomean.php?menu=5/.",2014-03-03 +23161689,GeneTack database: genes with frameshifts in prokaryotic genomes and eukaryotic mRNA sequences.,"Database annotations of prokaryotic genomes and eukaryotic mRNA sequences pay relatively low attention to frame transitions that disrupt protein-coding genes. Frame transitions (frameshifts) could be caused by sequencing errors or indel mutations inside protein-coding regions. Other observed frameshifts are related to recoding events (that evolved to control expression of some genes). Earlier, we have developed an algorithm and software program GeneTack for ab initio frameshift finding in intronless genes. Here, we describe a database (freely available at http://topaz.gatech.edu/GeneTack/db.html) containing genes with frameshifts (fs-genes) predicted by GeneTack. The database includes 206 991 fs-genes from 1106 complete prokaryotic genomes and 45 295 frameshifts predicted in mRNA sequences from 100 eukaryotic genomes. The whole set of fs-genes was grouped into clusters based on sequence similarity between fs-proteins (conceptually translated fs-genes), conservation of the frameshift position and frameshift direction (-1, +1). The fs-genes can be retrieved by similarity search to a given query sequence via a web interface, by fs-gene cluster browsing, etc. Clusters of fs-genes are characterized with respect to their likely origin, such as pseudogenization, phase variation, etc. The largest clusters contain fs-genes with programed frameshifts (related to recoding events).",2012-11-17 +24593823,Neutrophil depletion in the early inflammatory phase delayed cutaneous wound healing in older rats: improvements due to the use of un-denatured camel whey protein.,"

Background

While it is known that advanced age alters the recruitment of neutrophils during wound healing, thereby delaying the wound healing process, little is known about prolonged wound healing in advanced ages. Thus, we investigated the correlation of neutrophil recruitment with healing events, and the impact of whey protein (WP) on neutrophil activation.

Methods

The animals were allocated into wounded young group, wounded older group and wounded older rats with daily treatment of WP at a dose of 100 mg/kg of body weight.

Results

Our results pointed to a marked deficiency in the number of neutrophils in the wounds of older rats, which was accompanied with impairment of the healing process. In the group of older rats, phagocytic activity, as tested by fluorescence microscopy, declined throughout the first 24 hours after wounding. Both the neutrophil number and the phagocytic activity recovered in older rats which received WP supplementation. Interestingly, WP was found to significantly up-regulate the MIP-1α and CINC-1 mRNA expression in old rats. On the other hand, the wound size in older rats was significantly higher than that in younger ones. Blood angiogenesis was also significantly delayed in the older group as opposed to the young rats. WP, however, was found to return these indices to normal levels in the older rats. Proliferation and epidermal migration of the keratinocytes and the collagen deposition were also returned to the normal rates.

Conclusions

This data confirms the critical role of neutrophil recruitment in the early inflammatory phase of wound healing in older rats. In addition, WP protein was used to improve neutrophil function in older rats, healing events returned to a more normal profile.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2100966986117779.",2014-03-04 +21398672,"Histogram-based DNA analysis for the visualization of chromosome, genome and species information.","

Motivation

We describe a novel approach to explore DNA nucleotide sequence data, aiming to produce high-level categorical and structural information about the underlying chromosomes, genomes and species.

Results

The article starts by analyzing chromosomal data through histograms using fixed length DNA sequences. After creating the DNA-related histograms, a correlation between pairs of histograms is computed, producing a global correlation matrix. These data are then used as input to several data processing methods for information extraction and tabular/graphical output generation. A set of 18 species is processed and the extensive results reveal that the proposed method is able to generate significant and diversified outputs, in good accordance with current scientific knowledge in domains such as genomics and phylogenetics.

Availability and implementation

Source code freely available for download at http://www4.dei.isep.ipp.pt/etc/dnapaper2010, implemented in Free Pascal and UNIX scripting tools. Study input data available online for download at University of California at Santa Cruz Genome Bioinformatics, http://hgdownload.cse.ucsc.edu/downloads.html.",2011-03-12 +24711653,Covariate-modulated local false discovery rate for genome-wide association studies.,"

Motivation

Genome-wide association studies (GWAS) have largely failed to identify most of the genetic basis of highly heritable diseases and complex traits. Recent work has suggested this could be because many genetic variants, each with individually small effects, compose their genetic architecture, limiting the power of GWAS, given currently obtainable sample sizes. In this scenario, Bonferroni-derived thresholds are severely underpowered to detect the vast majority of associations. Local false discovery rate (fdr) methods provide more power to detect non-null associations, but implicit assumptions about the exchangeability of single nucleotide polymorphisms (SNPs) limit their ability to discover non-null loci.

Methods

We propose a novel covariate-modulated local false discovery rate (cmfdr) that incorporates prior information about gene element-based functional annotations of SNPs, so that SNPs from categories enriched for non-null associations have a lower fdr for a given value of a test statistic than SNPs in unenriched categories. This readjustment of fdr based on functional annotations is achieved empirically by fitting a covariate-modulated parametric two-group mixture model. The proposed cmfdr methodology is applied to a large Crohn's disease GWAS.

Results

Use of cmfdr dramatically improves power, e.g. increasing the number of loci declared significant at the 0.05 fdr level by a factor of 5.4. We also demonstrate that SNPs were declared significant using cmfdr compared with usual fdr replicate in much higher numbers, while maintaining similar replication rates for a given fdr cutoff in de novo samples, using the eight Crohn's disease substudies as independent training and test datasets. Availability an implementation: https://sites.google.com/site/covmodfdr/

Contact

: wes.stat@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-04-07 +24326156,MicroRNA-21 expression is associated with overall survival in patients with glioma.,"

Background

MicroRNA-21 has been proved to be associated with glioma proliferation and invasion; thus, we sought to clarify the clinical value of miR-21 expression in glioma tissues with WHO grade I to IV.

Methods

One hundred and fifty-two pairs of human gliomas and non-neoplastic brain tissues were evaluated using real-time PCR. The association of miR-21 expression with clinicopathological factors or the prognosis of glioma patients was also analyzed. In this study, survival analysis was performed using the Kaplan-Meier method and Cox's proportional hazards model.

Results

MiR-21 was more greatly expressed in glioma tissues compared to the corresponding non-neoplastic brain tissues (P < 0.001). This observed high miR-21 expression was significantly associated with high pathological grades and the Karnofsky performance score of glioma patients. In addition, overall patient survival for those with low miR-21 expression was significantly longer than those patients with high miR-21 expression (P < 0.001). Moreover, multivariate Cox regression analysis indicated that miR-21 might be an independent prognostic marker for glioma patient survival.

Conclusions

Our data show that miR-21 may be a candidate independent marker for gliomas, especially those with high pathological grades, and this could also be a potential therapeutic target for molecular glioma therapy.

Virtual slide

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1445749171109834.",2013-12-10 +23155063,The UCSC Genome Browser database: extensions and updates 2013.,"The University of California Santa Cruz (UCSC) Genome Browser (http://genome.ucsc.edu) offers online public access to a growing database of genomic sequence and annotations for a wide variety of organisms. The Browser is an integrated tool set for visualizing, comparing, analysing and sharing both publicly available and user-generated genomic datasets. As of September 2012, genomic sequence and a basic set of annotation 'tracks' are provided for 63 organisms, including 26 mammals, 13 non-mammal vertebrates, 3 invertebrate deuterostomes, 13 insects, 6 worms, yeast and sea hare. In the past year 19 new genome assemblies have been added, and we anticipate releasing another 28 in early 2013. Further, a large number of annotation tracks have been either added, updated by contributors or remapped to the latest human reference genome. Among these are an updated UCSC Genes track for human and mouse assemblies. We have also introduced several features to improve usability, including new navigation menus. This article provides an update to the UCSC Genome Browser database, which has been previously featured in the Database issue of this journal.",2012-11-15 +24466017,Genome-wide DNA polymorphisms in seven rice cultivars of temperate and tropical japonica groups.,"Elucidation of the rice genome is expected to broaden our understanding of genes related to the agronomic characteristics and the genetic relationship among cultivars. In this study, we conducted whole-genome sequencings of 6 cultivars, including 5 temperate japonica cultivars and 1 tropical japonica cultivar (Moroberekan), by using next-generation sequencing (NGS) with Nipponbare genome as a reference. The temperate japonica cultivars contained 2 sake brewing (Yamadanishiki and Gohyakumangoku), 1 landrace (Kameji), and 2 modern cultivars (Koshihikari and Norin 8). Almost >83% of the whole genome sequences of the Nipponbare genome could be covered by sequenced short-reads of each cultivar, including Omachi, which has previously been reported to be a temperate japonica cultivar. Numerous single nucleotide polymorphisms (SNPs), insertions, and deletions were detected among the various cultivars and the Nipponbare genomes. Comparison of SNPs detected in each cultivar suggested that Moroberekan had 5-fold more SNPs than the temperate japonica cultivars. Success of the 2 approaches to improve the efficacy of sequence data by using NGS revealed that sequencing depth was directly related to sequencing coverage of coding DNA sequences: in excess of 30× genome sequencing was required to cover approximately 80% of the genes in the rice genome. Further, the contigs prepared using the assembly of unmapped reads could increase the value of NGS short-reads and, consequently, cover previously unavailable sequences. These approaches facilitated the identification of new genes in coding DNA sequences and the increase of mapping efficiency in different regions. The DNA polymorphism information between the 7 cultivars and Nipponbare are available at NGRC_Rices_Build1.0 (http://www.nodai-genome.org/oryza_sativa_en.html).",2014-01-21 +23165094,IT Future of Medicine: from molecular analysis to clinical diagnosis and improved treatment.,"The IT Future of Medicine (ITFoM, http://www.itfom.eu/) initiative will produce computational models of individuals to enable the prediction of their future health risks, progression of diseases and selection and efficacy of treatments while minimising side effects. To be able to move our health care system to treat patients as individuals rather than as members of larger, divergent groups, the ITFoM initiative, proposes to integrate molecular, physiological and anatomical data of every person in 'virtual patient' models. The establishment of such 'virtual patient' models is now possible due to the enormous progress in analytical techniques, particularly in the '-omics' technology areas and in imaging, as well as in sensor technologies, but also due to the immense developments in the ICT field. As one of six Future and Emerging Technologies (FET) Flagship Pilot Projects funded by the European Commission, ITFoM with more than 150 academic and industrial partners from 34 countries, will foster the development in functional genomics and computer technologies to generate 'virtual patient' models to make them available for clinical application. The increase in the capacity of next generation sequencing systems will enable the high-throughput analysis of a large number of individuals generating huge amounts of genome, epigenome and transcriptome data, but making it feasible to apply deep sequencing in the clinic to characterise not only the patient's genome, but also individual samples, for example, from tumours. The genome profile will be integrated with proteome and metabolome information generated via new powerful chromatography, mass spectrometry and nuclear magnetic resonance techniques. The individualised model will not only enable the analysis of the current situation, but will allow the prediction of the response of the patient to different therapy options or intolerance for certain drugs.",2012-11-16 +21858155,OASIS: online application for the survival analysis of lifespan assays performed in aging research.,"

Background

Aging is a fundamental biological process. Characterization of genetic and environmental factors that influence lifespan is a crucial step toward understanding the mechanisms of aging at the organism level. To capture the different effects of genetic and environmental factors on lifespan, appropriate statistical analyses are needed.

Methodology/principal findings

We developed an online application for survival analysis (OASIS) that helps conduct various novel statistical tasks involved in analyzing survival data in a user-friendly manner. OASIS provides standard survival analysis results including Kaplan-Meier estimates and mean/median survival time by taking censored survival data. OASIS also provides various statistical tests including comparison of mean survival time, overall survival curve, and survival rate at specific time point. To visualize survival data, OASIS generates survival and log cumulative hazard plots that enable researchers to easily interpret their experimental results. Furthermore, we provide statistical methods that can analyze variances among survival datasets. In addition, users can analyze proportional effects of risk factors on survival.

Conclusions/significance

OASIS provides a platform that is essential to facilitate efficient statistical analyses of survival data in the field of aging research. Web application and a detailed description of algorithms are accessible from http://sbi.postech.ac.kr/oasis.",2011-08-15 +21381738,Ligand Classifier of Adaptively Boosting Ensemble Decision Stumps (LiCABEDS) and its application on modeling ligand functionality for 5HT-subtype GPCR families.,"Advanced high-throughput screening (HTS) technologies generate great amounts of bioactivity data, and this data needs to be analyzed and interpreted with attention to understand how these small molecules affect biological systems. As such, there is an increasing demand to develop and adapt cheminformatics algorithms and tools in order to predict molecular and pharmacological properties on the basis of these large data sets. In this manuscript, we report a novel machine-learning-based ligand classification algorithm, named Ligand Classifier of Adaptively Boosting Ensemble Decision Stumps (LiCABEDS), for data-mining and modeling of large chemical data sets to predict pharmacological properties in an efficient and accurate manner. The performance of LiCABEDS was evaluated through predicting GPCR ligand functionality (agonist or antagonist) using four different molecular fingerprints, including Maccs, FP2, Unity, and Molprint 2D fingerprints. Our studies showed that LiCABEDS outperformed two other popular techniques, classification tree and Naive Bayes classifier, on all four types of molecular fingerprints. Parameters in LiCABEDS, including the number of boosting iterations, initialization condition, and a ""reject option"" boundary, were thoroughly explored and discussed to demonstrate the capability of handling imbalanced data sets, as well as its robustness and flexibility. In addition, the detailed mathematical concepts and theory are also given to address the principle behind statistical prediction models. The LiCABEDS algorithm has been implemented into a user-friendly software package that is accessible online at http://www.cbligand.org/LiCABEDS/ .",2011-03-07 +24229371,Pre-hypertension: another 'pseudodisease'?,"Hypertension is one of the most important and common cardiovascular risk factors. Defining the level at which blood pressure starts causing end-organ damage is challenging, and is not easily answered. The threshold of blood pressure defining hypertension has progressively been reduced over time, from systolic >160 mmHg to >150 mmHg, then to >140 mmHg; and now even blood pressures above 130 to 120 mmHg are labeled as 'pre-hypertension' by some expert committees. Are interest groups creating another 'pseudodisease' or is this trend scientifically justified? A recent meta-analysis published in BMC Medicine by Huang et al. clearly indicates that pre-hypertension (120 to 140/80 to 90 mmHg) is a significant marker of increased cardiovascular risk. This raises the question as to whether we now need to lower the threshold of 'hypertension' (as opposed to 'pre-hypertension') to >120/80 mmHg, redefining a significant proportion of currently healthy people as 'patients' with an established disease. These data need to be interpreted with some caution. It is controversial whether pre-hypertension is an independent risk factor or just a risk marker and even more controversial whether treatment of pre-hypertension will lower cardiovascular risk. Please see related research: http://www.biomedcentral.com/1741-7015/11/177.",2013-09-25 +24334449,Systemic upregulation of PDGF-B in patients with neovascular AMD.,"

Purpose

To determine the plasma levels of platelet-derived growth factor-B (PDGF-B), VEGF, and TNF-α in patients with neovascular AMD and in patients with diabetic macular edema (DME).

Methods

Thirty patients with neovascular AMD, 30 patients with DME, and 12 healthy controls were included in this prospective study. The concentrations of PDGF-B, VEGF, and TNF-α were measured by ELISA.

Results

The PDGF-B concentration in the plasma of controls was (median [25th-75th percentile]) 263.5 (162.0-513.3) pg/mL and in patients with DME 219.0 (122.8-604.8) pg/mL. In patients with neovascular AMD, PDGF-B levels were significantly higher with a median plasma concentration of 783.5 (289.3-1183.5) pg/mL (P = 0.003). The VEGF concentrations in patients with DME 33.0 (21.8-73.0) pg/mL and in patients with neovascular AMD 55.0 (37.0-116.3) pg/mL showed no significant differences (P = 0.159). A positive correlation of PDGF-B and VEGF plasma levels was found in patients with neovascular AMD and in patients with DME (r = 0.683, P < 0.001, and r = 0.612, P < 0.001, respectively). No significant differences of systemic TNF-α levels could be found between the three study groups.

Conclusions

Patients with neovascular AMD have significantly higher plasma PDGF-B levels compared with patients with DME and healthy controls. Our study data indicate that PDGF-B may be involved in the pathogenesis of neovascular AMD. (https://eudract.ema.europa.eu number, EudraCT 2010-024654-11)",2014-01-20 +23602956,An efficient method of wavelength interval selection based on random frog for multivariate spectral calibration.,"Wavelength selection is a critical step for producing better prediction performance when applied to spectral data. Considering the fact that the vibrational and rotational spectra have continuous features of spectral bands, we propose a novel method of wavelength interval selection based on random frog, called interval random frog (iRF). To obtain all the possible continuous intervals, spectra are first divided into intervals by moving window of a fix width over the whole spectra. These overlapping intervals are ranked applying random frog coupled with PLS and the optimal ones are chosen. This method has been applied to two near-infrared spectral datasets displaying higher efficiency in wavelength interval selection than others. The source code of iRF can be freely downloaded for academy research at the website: http://code.google.com/p/multivariate-calibration/downloads/list.",2013-03-28 +24813213,PeakLink: a new peptide peak linking method in LC-MS/MS using wavelet and SVM.,"

Motivation

In liquid chromatography-mass spectrometry/tandem mass spectrometry (LC-MS/MS), it is necessary to link tandem MS-identified peptide peaks so that protein expression changes between the two runs can be tracked. However, only a small number of peptides can be identified and linked by tandem MS in two runs, and it becomes necessary to link peptide peaks with tandem identification in one run to their corresponding ones in another run without identification. In the past, peptide peaks are linked based on similarities in retention time (rt), mass or peak shape after rt alignment, which corrects mean rt shifts between runs. However, the accuracy in linking is still limited especially for complex samples collected from different conditions. Consequently, large-scale proteomics studies that require comparison of protein expression profiles of hundreds of patients can not be carried out effectively.

Method

In this article, we consider the problem of linking peptides from a pair of LC-MS/MS runs and propose a new method, PeakLink (PL), which uses information in both the time and frequency domain as inputs to a non-linear support vector machine (SVM) classifier. The PL algorithm first uses a threshold on an rt likelihood ratio score to remove candidate corresponding peaks with excessively large elution time shifts, then PL calculates the correlation between a pair of candidate peaks after reducing noise through wavelet transformation. After converting rt and peak shape correlation to statistical scores, an SVM classifier is trained and applied for differentiating corresponding and non-corresponding peptide peaks.

Results

PL is tested in multiple challenging cases, in which LC-MS/MS samples are collected from different disease states, different instruments and different laboratories. Testing results show significant improvement in linking accuracy compared with other algorithms.

Availability and implementation

M files for the PL alignment method are available at http://compgenomics.utsa.edu/zgroup/PeakLink.

Supplementary information

Supplementary data are available at Bioinformatics online.",2014-05-09 +23539305,MCScanX-transposed: detecting transposed gene duplications based on multiple colinearity scans.,"

Summary

Gene duplication occurs via different modes such as segmental and single-gene duplications. Transposed gene duplication, a specific form of single-gene duplication, 'copies' a gene from an ancestral chromosomal location to a novel location. MCScanX is a toolkit for detection and evolutionary analysis of gene colinearity. We have developed MCScanX-transposed, a software package to detect transposed gene duplications that occurred within different epochs, based on execution of MCScanX within and between related genomes. MCScanX-transposed can be also used for integrative analysis of gene duplication modes for a genome and to annotate a gene family of interest with gene duplication modes.

Availability

MCScanX-transposed is freely available at http://chibba.pgml.uga.edu/mcscan2/transposed/

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-28 +23140436,A possible strategy against head and neck cancer: in silico investigation of three-in-one inhibitors.,"Overexpression of epidermal growth factor receptor (EGFR), Her2, and uroporphyrinogen decarboxylase (UROD) occurs in a variety of malignant tumor tissues. UROD has potential to modulate tumor response of radiotherapy for head and neck cancer, and EGFR and Her2 are common drug targets for the treatment of head and neck cancer. This study attempts to find a possible lead compound backbone from TCM Database@Taiwan ( http://tcm.cmu.edu.tw/ ) for EGFR, Her2, and UROD proteins against head and neck cancer using computational techniques. Possible traditional Chinese medicine (TCM) lead compounds had potential binding affinities with EGFR, Her2, and UROD proteins. The candidates formed stable interactions with residues Arg803, Thr854 in EGFR, residues Thr862, Asp863 in Her2 protein, and residues Arg37, Arg41 in UROD protein, which are key residues in the binding or catalytic domain of EGFR, Her2, and UROD proteins. Thus, the TCM candidates indicated a possible molecule backbone for evolving potential inhibitors for three drug target proteins against head and neck cancer.",2012-11-12 +24451229,Estimation of viral richness from shotgun metagenomes using a frequency count approach.,"

Background

Viruses are important drivers of ecosystem functions, yet little is known about the vast majority of viruses. Viral shotgun metagenomics enables the investigation of broad ecological questions in phage communities. One ecological characteristic is species richness, which is the number of different species in a community. Viruses do not have a phylogenetic marker analogous to the bacterial 16S rRNA gene with which to estimate richness, and so contig spectra are employed to measure the number of virus taxa in a given community. A contig spectrum is generated from a viral shotgun metagenome by assembling the random sequence reads into groups of sequences that overlap (contigs) and counting the number of sequences that group within each contig. Current tools available to analyze contig spectra to estimate phage richness are limited by relying on rank-abundance data.

Results

We present statistical estimates of virus richness from contig spectra. The program CatchAll (http://www.northeastern.edu/catchall/) was used to analyze contig spectra in terms of frequency count data rather than rank-abundance, thus enabling formal statistical analyses. Also, the influence of potentially spurious low-frequency counts on richness estimates was minimized by two methods, empirical and statistical. The results show greater estimates of viral richness than previous calculations in nearly all environments analyzed, including swine feces and reclaimed fresh water.

Conclusions

CatchAll yielded consistent estimates of richness across viral metagenomes from the same or similar environments. Additionally, analysis of pooled viral metagenomes from different environments via mixed contig spectra resulted in greater richness estimates than those of the component metagenomes. Using CatchAll to analyze contig spectra will improve estimations of richness from viral shotgun metagenomes, particularly from large datasets, by providing statistical measures of richness.",2013-02-04 +23189029,Myosinome: a database of myosins from select eukaryotic genomes to facilitate analysis of sequence-structure-function relationships.,"Myosins are one of the largest protein superfamilies with 24 classes. They have conserved structural features and catalytic domains yet show huge variation at different domains resulting in a variety of functions. Myosins are molecules driving various kinds of cellular processes and motility until the level of organisms. These are ATPases that utilize the chemical energy released by ATP hydrolysis to bring about conformational changes leading to a motor function. Myosins are important as they are involved in almost all cellular activities ranging from cell division to transcriptional regulation. They are crucial due to their involvement in many congenital diseases symptomatized by muscular malfunctions, cardiac diseases, deafness, neural and immunological dysfunction, and so on, many of which lead to death at an early age. We present Myosinome, a database of selected myosin classes (myosin II, V, and VI) from five model organisms. This knowledge base provides the sequences, phylogenetic clustering, domain architectures of myosins and molecular models, structural analyses, and relevant literature of their coiled-coil domains. In the current version of Myosinome, information about 71 myosin sequences belonging to three myosin classes (myosin II, V, and VI) in five model organisms (Homo Sapiens, Mus musculus, D. melanogaster, C. elegans and S. cereviseae) identified using bioinformatics surveys are presented, and several of them are yet to be functionally characterized. As these proteins are involved in congenital diseases, such a database would be useful in short-listing candidates for gene therapy and drug development. The database can be accessed from http://caps.ncbs.res.in/myosinome.",2012-11-12 +22661648,POOL server: machine learning application for functional site prediction in proteins.,"

Summary

We present an automated web server for partial order optimum likelihood (POOL), a machine learning application that combines computed electrostatic and geometric information for high-performance prediction of catalytic residues from 3D structures. Input features consist of THEMATICS electrostatics data and pocket information from ConCavity. THEMATICS measures deviation from typical, sigmoidal titration behavior to identify functionally important residues and ConCavity identifies binding pockets by analyzing the surface geometry of protein structures. Both THEMATICS and ConCavity (structure only) do not require the query protein to have any sequence or structure similarity to other proteins. Hence, POOL is applicable to proteins with novel folds and engineered proteins. As an additional option for cases where sequence homologues are available, users can include evolutionary information from INTREPID for enhanced accuracy in site prediction.

Availability

The web site is free and open to all users with no login requirements at http://www.pool.neu.edu.

Contact

m.ondrechen@neu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-06-01 +23818512,BNFinder2: Faster Bayesian network learning and Bayesian classification.,"

Summary

Bayesian Networks (BNs) are versatile probabilistic models applicable to many different biological phenomena. In biological applications the structure of the network is usually unknown and needs to be inferred from experimental data. BNFinder is a fast software implementation of an exact algorithm for finding the optimal structure of the network given a number of experimental observations. Its second version, presented in this article, represents a major improvement over the previous version. The improvements include (i) a parallelized learning algorithm leading to an order of magnitude speed-ups in BN structure learning time; (ii) inclusion of an additional scoring function based on mutual information criteria; (iii) possibility of choosing the resulting network specificity based on statistical criteria and (iv) a new module for classification by BNs, including cross-validation scheme and classifier quality measurements with receiver operator characteristic scores.

Availability and implementation

BNFinder2 is implemented in python and freely available under the GNU general public license at the project Web site https://launchpad.net/bnfinder, together with a user's manual, introductory tutorial and supplementary methods.",2013-07-01 +23788719,Prostate cancer localization using multiparametric MR imaging: comparison of Prostate Imaging Reporting and Data System (PI-RADS) and Likert scales.,"

Purpose

To compare the recently proposed Prostate Imaging Reporting and Data System (PI-RADS) scale that incorporates fixed criteria and a standard Likert scale based on overall impression in prostate cancer localization using multiparametric magnetic resonance (MR) imaging.

Materials and methods

This retrospective study was HIPAA compliant and institutional review board approved. Seventy patients who underwent 3-T pelvic MR imaging, including T2-weighted imaging, diffusion-weighted imaging, and dynamic contrast material-enhanced imaging, with a pelvic phased-array coil before radical prostatectomy were included. Three radiologists, each with 6 years of experience, independently scored 18 regions (12 peripheral zone [PZ], six transition zone [TZ]) using PI-RADS (range, scores 3-15) and Likert (range, scores 1-5) scales. Logistic regression for correlated data was used to compare scales for detection of tumors larger than 3 mm in maximal diameter at prostatectomy.

Results

Maximal accuracy was achieved with score thresholds of 8 and higher and of 3 and higher for PI-RADS and Likert scales, respectively. At these thresholds, in the PZ, similar accuracy was achieved with the PI-RADS scale and the Likert scale for radiologist 1 (89.0% vs 88.2%, P = .223) and radiologist 3 (88.5% vs 88.2%, P = .739) and greater accuracy was achieved with the PI-RADS scale than the Likert scale for radiologist 2 (89.6% vs 87.1%, P = .008). In the TZ, accuracy was lower with the PI-RADS scale than with the Likert scale for radiologist 1 (70.0% vs 87.1%, P < .001), radiologist 2 (87.6% vs 92.6%, P = .002), and radiologist 3 (82.9% vs 91.2%, P < .001). For tumors with Gleason score of at least 7, sensitivity was higher with the PI-RADS scale than with the Likert scale for radiologist 1 (88.6% vs 82.6%, P = .032), and sensitivity was similar for radiologist 2 (78.0% vs 76.5, P = .467) and radiologist 3 (77.3% vs 81.1%, P = .125).

Conclusion

Radiologists performed well with both PI-RADS and Likert scales for tumor localization, although, in the TZ, performance was better with the Likert scale than the PI-RADS scale.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.13122233/-/DC1.",2013-06-20 +23842981,Interactive case review of radiologic and pathologic findings from breast biopsy: are they concordant? How do I manage the results?,"The number of imaging-guided percutaneous breast biopsies performed has steadily increased as imaging techniques have improved. Percutaneous biopsy is becoming more commonplace and supplanting excisional biopsy as the preferred diagnostic tool. The radiologist's role in caring for patients who undergo breast biopsy extends beyond imaging to identifying lesions for biopsy and then performing the procedure. Radiologists must also be cognizant of radiologic-pathologic correlation to determine whether biopsy results are concordant with imaging findings and make management recommendations. Management of microcalcifications, masses, and areas of asymmetry begins with recognizing and characterizing the findings with the proper Breast Imaging Reporting and Data System (BI-RADS) lexicon. Determining concordance between imaging findings and histologic results is equally important. The decision to recommend surgical excision or short-term follow-up relies heavily on whether the histologic diagnosis correlates with the imaging findings, a determination that is part of the radiologist's responsibilities if he or she performs the biopsy. Supplemental material available at http://radiographics.rsna.org/lookup/suppl/doi:10.1148/rg.334125123/-/DC1.",2013-07-01 +23467573,Prescription drug cost reduction in Native Hawaiians after laparoscopic Roux-en-y gastric bypass.,"

Objective

Native Hawaiians (NH) represent a unique population where socioeconomic factors have contributed to higher incidence rates of obesity and related comorbidities than in the general population resulting in substantial prescription medication costs. Studies demonstrate that laparoscopic Roux-en-y gastric bypass (LRYGB) surgery results in significant weight loss, improvement of comorbidities, and decreased costs for prescription medications in Caucasians. This study aimed to analyze the effects of LRYGB surgery on Native Hawaiians and their prescription drug costs.

Methods

Demographics, baseline body mass index (BMI), comorbidities, preoperative, and postoperative data were analyzed for NH patients who underwent LRYGB between January 2004 and April 2009. Medication costs were determined using the online pharmacy . Generic drugs were selected when appropriate, while vitamins and nutritional supplements were not included in this study.

Results

Fifty (14 Men, 36 women) NH patients had sufficient data and follow-up for analysis. Average preoperative BMI was 49 kg/m(2), while at one year follow-up it decreased to 33 kg/m(2) (P<.001). This correlates to an average of 61% excess body weight lost (P<.001). The average number of prescription medications decreased from 3.5/patient preoperatively to 1.1/patient at one year (P<.001), equating to a monthly cost savings of US $195.8/patient (P<.001).

Conclusions

LRYGB provided substantial weight loss for morbidly obese NH patients, resulting in significantly less prescription medication use and substantial cost savings. Thus, bariatric surgery for weight management has the potential to improve the overall well-being and lower the financial burden of medical care in socioeconomically disadvantaged communities such as the NH.",2013-02-01 +22302378,The determinants of performance in master swimmers: an analysis of master world records.,"Human performances in sports decline with age in all competitions/disciplines. Since the effects of age are often compounded by disuse, the study of master athletes provides the opportunity to investigate the effects of age per se on the metabolic/biomechanical determinants of performance. For all master age groups, swimming styles and distances, we calculated the metabolic power required to cover the distance (d) in the best performance time as: E' maxR ¼ C d=BTP ¼ C vmax; where C is the energy cost of swimming in young elite swimmers, vmax = d/BTP is the record speed over the distance d, and BTP was obtained form ""cross-sectional data"" (http://www.fina.org). To establish a record performance, E' maxR must be equal to the maximal available metabolic power (E'maxA). This was calculated assuming a decrease of 1% per year at 40 - 70 years, 2% at 70 - 80 years and 3% at 80 - 90 years (as indicated in the literature) and compared to the E' maxR values, whereas up to about 55 years of age E' maxR ¼ E' maxA; for older subjects E' maxA > E' maxR; the difference increasing linearly by about 0.30% (backstroke), 1.93% (butterfly), 0.92% (front crawl) and 0.37% (breaststroke) per year (average over the 50, 100 and 200 m distances). These data suggest that the energy cost of swimming increases with age. Hence, the decrease in performance in master swimmers is due to both decrease in the metabolic power available (E' maxA) and to an increase in C.",2012-10-01 +21349869,R453Plus1Toolbox: an R/Bioconductor package for analyzing Roche 454 Sequencing data.,"

Unlabelled

The R453Plus1Toolbox is an R/Bioconductor package for the analysis of 454 Sequencing data. Projects generated with Roche's data analysis software can be imported into R allowing advanced and customized analyses within the R/Bioconductor environment for sequencing data. Several methods were implemented extending the current functionality of Roche's software. These extensions include methods for quality assurance and annotation of detected variants. Further, a pipeline for the detection of structural variants, e.g. balanced chromosomal translocations, is provided.

Availability

The R453Plus1Toolbox is implemented in R and available at http://www.bioconductor.org/. A vignette outlining typical workflows is included in the package.

Contact

h.klein@uni-muenster.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-23 +22132069,MetaBinG: using GPUs to accelerate metagenomic sequence classification.,"Metagenomic sequence classification is a procedure to assign sequences to their source genomes. It is one of the important steps for metagenomic sequence data analysis. Although many methods exist, classification of high-throughput metagenomic sequence data in a limited time is still a challenge. We present here an ultra-fast metagenomic sequence classification system (MetaBinG) using graphic processing units (GPUs). The accuracy of MetaBinG is comparable to the best existing systems and it can classify a million of 454 reads within five minutes, which is more than 2 orders of magnitude faster than existing systems. MetaBinG is publicly available at http://cbb.sjtu.edu.cn/~ccwei/pub/software/MetaBinG/MetaBinG.php.",2011-11-23 +24696503,Confetti: a multiprotease map of the HeLa proteome for comprehensive proteomics.,"Bottom-up proteomics largely relies on tryptic peptides for protein identification and quantification. Tryptic digestion often provides limited coverage of protein sequence because of issues such as peptide length, ionization efficiency, and post-translational modification colocalization. Unfortunately, a region of interest in a protein, for example, because of proximity to an active site or the presence of important post-translational modifications, may not be covered by tryptic peptides. Detection limits, quantification accuracy, and isoform differentiation can also be improved with greater sequence coverage. Selected reaction monitoring (SRM) would also greatly benefit from being able to identify additional targetable sequences. In an attempt to improve protein sequence coverage and to target regions of proteins that do not generate useful tryptic peptides, we deployed a multiprotease strategy on the HeLa proteome. First, we used seven commercially available enzymes in single, double, and triple enzyme combinations. A total of 48 digests were performed. 5223 proteins were detected by analyzing the unfractionated cell lysate digest directly; with 42% mean sequence coverage. Additional strong-anion exchange fractionation of the most complementary digests permitted identification of over 3000 more proteins, with improved mean sequence coverage. We then constructed a web application (https://proteomics.swmed.edu/confetti) that allows the community to examine a target protein or protein isoform in order to discover the enzyme or combination of enzymes that would yield peptides spanning a certain region of interest in the sequence. Finally, we examined the use of nontryptic digests for SRM. From our strong-anion exchange fractionation data, we were able to identify three or more proteotypic SRM candidates within a single digest for 6056 genes. Surprisingly, in 25% of these cases the digest producing the most observable proteotypic peptides was neither trypsin nor Lys-C. SRM analysis of Asp-N versus tryptic peptides for eight proteins determined that Asp-N yielded higher signal in five of eight cases.",2014-04-02 +23144492,MSDB: a user-friendly program for reporting distribution and building databases of microsatellites from genome sequences.,"Microsatellite Search and Building Database (MSDB) is a new Perl program providing a user-friendly interface for identification and building databases of microsatellites from complete genome sequences. The general aims of MSDB are to use the database to store the information of microsatellites and to facilitate the management, classification, and statistics of microsatellites. A user-friendly interface facilitates the treatment of large datasets. The program is powerful in finding various types of pure, compound, and complex microsatellites from sequences as well as generating a detailed statistical report in worksheet format. MSDB also contains other two subprograms: SWR, which is used to export microsatellites from the database to meet user's requirements, and SWP, which is used to automatically invoke R to draw a sliding window plot for displaying the distribution of density or frequency of identified microsatellites. MSDB is freely available under the GNU General Public license for Windows and Linux from the following website: http://msdb.biosv.com/.",2012-11-09 +23143106,EcoCyc: fusing model organism databases with systems biology.,"EcoCyc (http://EcoCyc.org) is a model organism database built on the genome sequence of Escherichia coli K-12 MG1655. Expert manual curation of the functions of individual E. coli gene products in EcoCyc has been based on information found in the experimental literature for E. coli K-12-derived strains. Updates to EcoCyc content continue to improve the comprehensive picture of E. coli biology. The utility of EcoCyc is enhanced by new tools available on the EcoCyc web site, and the development of EcoCyc as a teaching tool is increasing the impact of the knowledge collected in EcoCyc.",2012-11-09 +23152759,Analysis of the clonality of Candida tropicalis strains from a general hospital in Beijing using multilocus sequence typing.,"Multilocus sequence typing (MLST) based on six loci was used to analyze the relationship of 58 Candida tropicalis isolates from individual patients in a general hospital in Beijing, China. A total of 52 diploid sequence types (DSTs) were generated by the MLST, all of which were new to the central database. Unweighted Pair Group Method with Arithmetic Mean (UPGMA) dendrograms were constructed, which showed that the 58 isolates were distributed robustly and 6 main groups were clustered regardless of the specimen source and medical department. The minimum spanning tree (MST) of the 58 isolates (52 DSTs) and all 401 isolates (268 DSTs) in the C. tropicalis central database (http://pubmlst.org/ctropicalis/) indicated that the isolates in this study clustered in three relative pure clonal complexes, and 2 clustered with isolates from Taiwan, Belgium, Brazil, and the US. This study presents the first MLST analysis of C. tropicalis isolates from Mainland China, which may be useful for further studies on the similarity, genetic relationship, and molecular epidemiology of C. tropicalis strains worldwide.",2012-11-09 +23145153,CREST--classification resources for environmental sequence tags.,"Sequencing of taxonomic or phylogenetic markers is becoming a fast and efficient method for studying environmental microbial communities. This has resulted in a steadily growing collection of marker sequences, most notably of the small-subunit (SSU) ribosomal RNA gene, and an increased understanding of microbial phylogeny, diversity and community composition patterns. However, to utilize these large datasets together with new sequencing technologies, a reliable and flexible system for taxonomic classification is critical. We developed CREST (Classification Resources for Environmental Sequence Tags), a set of resources and tools for generating and utilizing custom taxonomies and reference datasets for classification of environmental sequences. CREST uses an alignment-based classification method with the lowest common ancestor algorithm. It also uses explicit rank similarity criteria to reduce false positives and identify novel taxa. We implemented this method in a web server, a command line tool and the graphical user interfaced program MEGAN. Further, we provide the SSU rRNA reference database and taxonomy SilvaMod, derived from the publicly available SILVA SSURef, for classification of sequences from bacteria, archaea and eukaryotes. Using cross-validation and environmental datasets, we compared the performance of CREST and SilvaMod to the RDP Classifier. We also utilized Greengenes as a reference database, both with CREST and the RDP Classifier. These analyses indicate that CREST performs better than alignment-free methods with higher recall rate (sensitivity) as well as precision, and with the ability to accurately identify most sequences from novel taxa. Classification using SilvaMod performed better than with Greengenes, particularly when applied to environmental sequences. CREST is freely available under a GNU General Public License (v3) from http://apps.cbu.uib.no/crest and http://lcaclassifier.googlecode.com.",2012-11-08 +25621316,Read-mapping using personalized diploid reference genome for RNA sequencing data reduced bias for detecting allele-specific expression.,"Next generation sequencing (NGS) technologies have been applied extensively in many areas of genetics and genomics research. A fundamental problem when comes to analyzing NGS data is mapping short sequencing reads back to the reference genome. Most of existing software packages rely on a single uniform reference genome and do not automatically take into the consideration of genetic variants. On the other hand, large proportions of incorrectly mapped reads affect the correct interpretation of the NGS experimental results. As an example, Degner et al. showed that detecting allele-specific expression from RNA sequencing data was biased toward the reference allele. In this study, we developed a method that utilize DirectX 11 enabled graphics processing unit (GPU)'s parallel computing power to produces a personalized diploid reference genome based on all known genetic variants of that particular individual. We show that using such a personalized diploid reference genome can improve mapping accuracy and significantly reduce the bias toward reference allele in allele-specific expression analysis. Our method can be applied to any individual that has genotype information obtained either from array-based genotyping or resequencing. Besides the reference genome, no additional changes to alignment algorithm are needed for performing read mapping therefore one can utilize any of the existing read mapping tools and achieve the improved read mapping result. C++ and GPU compute shader source code of the software program is available at: http://code.google.com/p/diploid-mapping/downloads/list.",2012-10-01 +21919745,Increasing power of groupwise association test with likelihood ratio test.,"Sequencing studies have been discovering a numerous number of rare variants, allowing the identification of the effects of rare variants on disease susceptibility. As a method to increase the statistical power of studies on rare variants, several groupwise association tests that group rare variants in genes and detect associations between genes and diseases have been proposed. One major challenge in these methods is to determine which variants are causal in a group, and to overcome this challenge, previous methods used prior information that specifies how likely each variant is causal. Another source of information that can be used to determine causal variants is the observed data because case individuals are likely to have more causal variants than control individuals. In this article, we introduce a likelihood ratio test (LRT) that uses both data and prior information to infer which variants are causal and uses this finding to determine whether a group of variants is involved in a disease. We demonstrate through simulations that LRT achieves higher power than previous methods. We also evaluate our method on mutation screening data of the susceptibility gene for ataxia telangiectasia, and show that LRT can detect an association in real data. To increase the computational speed of our method, we show how we can decompose the computation of LRT, and propose an efficient permutation test. With this optimization, we can efficiently compute an LRT statistic and its significance at a genome-wide level. The software for our method is publicly available at http://genetics.cs.ucla.edu/rarevariants .",2011-09-15 +22821558,Computational identification of new structured cis-regulatory elements in the 3'-untranslated region of human protein coding genes.,"Messenger ribonucleic acids (RNAs) contain a large number of cis-regulatory RNA elements that function in many types of post-transcriptional regulation. These cis-regulatory elements are often characterized by conserved structures and/or sequences. Although some classes are well known, given the wide range of RNA-interacting proteins in eukaryotes, it is likely that many new classes of cis-regulatory elements are yet to be discovered. An approach to this is to use computational methods that have the advantage of analysing genomic data, particularly comparative data on a large scale. In this study, a set of structural discovery algorithms was applied followed by support vector machine (SVM) classification. We trained a new classification model (CisRNA-SVM) on a set of known structured cis-regulatory elements from 3'-untranslated regions (UTRs) and successfully distinguished these and groups of cis-regulatory elements not been strained on from control genomic and shuffled sequences. The new method outperformed previous methods in classification of cis-regulatory RNA elements. This model was then used to predict new elements from cross-species conserved regions of human 3'-UTRs. Clustering of these elements identified new classes of potential cis-regulatory elements. The model, training and testing sets and novel human predictions are available at: http://mRNA.otago.ac.nz/CisRNA-SVM.",2012-07-20 +23317704,AMDD: antimicrobial drug database.,"Drug resistance is one of the major concerns for antimicrobial chemotherapy against any particular target. Knowledge of the primary structure of antimicrobial agents and their activities is essential for rational drug design. Thus, we developed a comprehensive database, anti microbial drug database (AMDD), of known synthetic antibacterial and antifungal compounds that were extracted from the available literature and other chemical databases, e.g., PubChem, PubChem BioAssay and ZINC, etc. The current version of AMDD contains ~2900 antibacterial and ~1200 antifungal compounds. The molecules are annotated with properties such as description, target, format, bioassay, molecular weight, hydrogen bond donor, hydrogen bond acceptor and rotatable bond. The availability of these antimicrobial agents on common platform not only provides useful information but also facilitate the virtual screening process, thus saving time and overcoming difficulties in selecting specific type of inhibitors for the specific targets. AMDD may provide a more effective and efficient way of accessing antimicrobial compounds based on their properties along with the links to their structure and bioassay. All the compounds are freely available at the advanced web-based search interface http://www.amddatabase.info.",2012-11-07 +23094866,Automated phosphopeptide identification using multiple MS/MS fragmentation modes.,"Phosphopeptide identification is still a challenging task because fragmentation spectra obtained by mass spectrometry do not necessarily contain sufficient fragment ions to establish with certainty the underlying amino acid sequence and the precise phosphosite. To improve upon this, it has been suggested to acquire pairs of spectra from every phosphorylated precursor ion using different fragmentation modes, for example CID, ETD, and/or HCD. The development of automated tools for the interpretation of these paired spectra has however, until now, lagged behind. Using phosphopeptide samples analyzed by an LTQ-Orbitrap instrument, we here assess an approach in which, on each selected precursor, a pair of CID spectra, with or without multistage activation (MSA or MS2, respectively), are acquired in the linear ion trap. We applied this approach on phosphopeptide samples of variable proteomic complexity obtained from Arabidopsis thaliana . We present a straightforward computational approach to reconcile sequence and phosphosite identifications provided by the database search engine Mascot on the spectrum pairs, using two simple filtering rules, at the amino acid sequence and phosphosite localization levels. If multiple sequences and/or phosphosites are likely, they are reported in the consensus sequence. Using our program FragMixer, we could assess that on samples of moderate complexity, it was worth combining the two fragmentation schemes on every precursor ion to help efficiently identify amino acid sequences and precisely localize phosphosites. FragMixer can be flexibly configured, independently of the Mascot search parameters, and can be applied to various spectrum pairs, such as MSA/ETD and ETD/HCD, to automatically compare and combine the information provided by these more differing fragmentation modes. The software is openly accessible and can be downloaded from our Web site at http://proteomics.fr/FragMixer.",2012-11-07 +22053077,AMPA: an automated web server for prediction of protein antimicrobial regions.,"

Summary

AMPA is a web application for assessing the antimicrobial domains of proteins, with a focus on the design on new antimicrobial drugs. The application provides fast discovery of antimicrobial patterns in proteins that can be used to develop new peptide-based drugs against pathogens. Results are shown in a user-friendly graphical interface and can be downloaded as raw data for later examination.

Availability

AMPA is freely available on the web at http://tcoffee.crg.cat/apps/ampa. The source code is also available in the web.

Contact

marc.torrent@upf.edu; david.andreu@upf.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-03 +23895738,Measurement of baseline toxicity and QSAR analysis of 50 non-polar and 58 polar narcotic chemicals for the alga Pseudokirchneriella subcapitata.,"In this paper a set of homogenous experimental algal toxicity data was measured for 50 non-polar narcotic chemicals using the alga Pseudokirchneriella subcapitata in a closed test with a growth rate endpoint. Most of the tested compounds are high volume industrial chemicals that so far lacked published REACH-compliant algal growth inhibition values. The test protocol fulfilled the criteria set forth in the OECD guideline 201 and had the same sensitivity as the open test which allowed direct comparison of toxicity values. Baseline QSAR model for non-polar narcotic compounds was established and compared with previous analogous models. Multi-linear QSAR model was derived for the non-polar and 58 previously tested polar (anilines and phenols) narcotic compounds modulating hydrophobicity, molecular size, electronic and molecular stability effects coded in the molecular descriptors. Descriptors in the model were analyzed and applicability domain was assessed providing further guidelines for the in silico prediction purposes in decision support while performing risk assessment. QSAR models in the manuscript are available on-line through QsarDB repository for exploring and prediction services (http://hdl.handle.net/10967/106).",2013-07-26 +23413437,GalaxyGemini: a web server for protein homo-oligomer structure prediction based on similarity.,"

Summary

A large number of proteins function as homo-oligomers; therefore, predicting homo-oligomeric structure of proteins is of primary importance for understanding protein function at the molecular level. Here, we introduce a web server for prediction of protein homo-oligomer structure. The server takes a protein monomer structure as input and predicts its homo-oligomer structure from oligomer templates selected based on sequence and tertiary/quaternary structure similarity. Using protein model structures as input, the server shows clear improvement over the best methods of CASP9 in predicting oligomeric structures from amino acid sequences.

Availability

http://galaxy.seoklab.org/gemini.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-14 +21372084,SC²ATmd: a tool for integration of the figure of merit with cluster analysis for gene expression data.,"

Unlabelled

Standard and Consensus Clustering Analysis Tool for Microarray Data (SC²ATmd) is a MATLAB-implemented application specifically designed for the exploration of microarray gene expression data via clustering. Implementation of two versions of the clustering validation method figure of merit allows for performance comparisons between different clustering algorithms, and tailors the cluster analysis process to the varying characteristics of each dataset. Along with standard clustering algorithms this application also offers a consensus clustering method that can generate reproducible clusters across replicate experiments or different clustering algorithms. This application was designed specifically for the analysis of gene expression data, but may be used with any numerical data as long as it is in the right format.

Availability

SC²ATmd may be freely downloaded from http://www.compbiosci.wfu.edu/tools.htm.",2011-03-03 +22427539,The SMART Platform: early experience enabling substitutable applications for electronic health records.,"

Objective

The Substitutable Medical Applications, Reusable Technologies (SMART) Platforms project seeks to develop a health information technology platform with substitutable applications (apps) constructed around core services. The authors believe this is a promising approach to driving down healthcare costs, supporting standards evolution, accommodating differences in care workflow, fostering competition in the market, and accelerating innovation.

Materials and methods

The Office of the National Coordinator for Health Information Technology, through the Strategic Health IT Advanced Research Projects (SHARP) Program, funds the project. The SMART team has focused on enabling the property of substitutability through an app programming interface leveraging web standards, presenting predictable data payloads, and abstracting away many details of enterprise health information technology systems. Containers--health information technology systems, such as electronic health records (EHR), personally controlled health records, and health information exchanges that use the SMART app programming interface or a portion of it--marshal data sources and present data simply, reliably, and consistently to apps.

Results

The SMART team has completed the first phase of the project (a) defining an app programming interface, (b) developing containers, and (c) producing a set of charter apps that showcase the system capabilities. A focal point of this phase was the SMART Apps Challenge, publicized by the White House, using http://www.challenge.gov website, and generating 15 app submissions with diverse functionality.

Conclusion

Key strategic decisions must be made about the most effective market for further disseminating SMART: existing market-leading EHR vendors, new entrants into the EHR market, or other stakeholders such as health information exchanges.",2012-03-17 +23130999,mGOASVM: Multi-label protein subcellular localization based on gene ontology and support vector machines.,"

Background

Although many computational methods have been developed to predict protein subcellular localization, most of the methods are limited to the prediction of single-location proteins. Multi-location proteins are either not considered or assumed not existing. However, proteins with multiple locations are particularly interesting because they may have special biological functions, which are essential to both basic research and drug discovery.

Results

This paper proposes an efficient multi-label predictor, namely mGOASVM, for predicting the subcellular localization of multi-location proteins. Given a protein, the accession numbers of its homologs are obtained via BLAST search. Then, the original accession number and the homologous accession numbers of the protein are used as keys to search against the Gene Ontology (GO) annotation database to obtain a set of GO terms. Given a set of training proteins, a set of T relevant GO terms is obtained by finding all of the GO terms in the GO annotation database that are relevant to the training proteins. These relevant GO terms then form the basis of a T-dimensional Euclidean space on which the GO vectors lie. A support vector machine (SVM) classifier with a new decision scheme is proposed to classify the multi-label GO vectors. The mGOASVM predictor has the following advantages: (1) it uses the frequency of occurrences of GO terms for feature representation; (2) it selects the relevant GO subspace which can substantially speed up the prediction without compromising performance; and (3) it adopts an efficient multi-label SVM classifier which significantly outperforms other predictors. Briefly, on two recently published virus and plant datasets, mGOASVM achieves an actual accuracy of 88.9% and 87.4%, respectively, which are significantly higher than those achieved by the state-of-the-art predictors such as iLoc-Virus (74.8%) and iLoc-Plant (68.1%).

Conclusions

mGOASVM can efficiently predict the subcellular locations of multi-label proteins. The mGOASVM predictor is available online at http://bioinfo.eie.polyu.edu.hk/mGoaSvmServer/mGOASVM.html.",2012-11-06 +24189227,Molecular replacement: tricks and treats.,"Molecular replacement is the method of choice for X-ray crystallographic structure determination provided that suitable structural homologues are available in the PDB. Presently, there are ~80,000 structures in the PDB (8074 were deposited in the year 2012 alone), of which ~70% have been solved by molecular replacement. For successful molecular replacement the model must cover at least 50% of the total structure and the Cα r.m.s.d. between the core model and the structure to be solved must be less than 2 Å. Here, an approach originally implemented in the CaspR server (http://www.igs.cnrs-mrs.fr/Caspr2/index.cgi) based on homology modelling to search for a molecular-replacement solution is discussed. How the use of as much information as possible from different sources can improve the model(s) is briefly described. The combination of structural information with distantly related sequences is crucial to optimize the multiple alignment that will define the boundaries of the core domains. PDB clusters (sequences with ≥30% identical residues) can also provide information on the eventual changes in conformation and will help to explore the relative orientations assumed by protein subdomains. Normal-mode analysis can also help in generating series of conformational models in the search for a molecular-replacement solution. Of course, finding a correct solution is only the first step and the accuracy of the identified solution is as important as the data quality to proceed through refinement. Here, some possible reasons for failure are discussed and solutions are proposed using a set of successful examples.",2013-10-12 +24678958,Prognostic value of miR-96 in patients with acute myeloid leukemia.,"

Objective

Aberrant expression of miRNA (miR)-96 is associated with tumorigenesis and tumor progression in several solid cancers. However, little is known about the expression and prognostic value of miR-96 in acute myeloid leukemia (AML). Therefore, the aim of this study was to investigate the correlation of miR-96 expression with clinicopathological features and prognosis of AML.

Methods

Real-time quantitative RT-PCR assay was performed to evaluate the expression levels of miR-96 in mononuclear cells from bone marrow or peripheral blood specimens in 86 patients with newly diagnosed AML.

Results

Compared with normal controls, miR-96 expression was significantly downregulated in patients with newly diagnosed AML (P < 0.001). In analysis of 14 diagnosis/CR-paired samples, the expression level of miR-96 was found markedly elevated in patients after treatment than before (P < 0.001). Moreover, lower levels of miR-96 were associated with a higher white blood cell count, bone marrow blast count (P < 0.001 and 0.022, respectively), and lower hemoglobin and platelet count (P = 0.036 and 0.033, respectively). Although the low-expression group seemed to have a lower CR rate (53.85% vs 70.0%), there was no significant difference between the two groups (P = 0.213). The low-expression group had a lower relapse-free survival (RFS) (P = 0.038) and overall survival (OS) (P = 0.022) compared with the high-expression group during a median follow-up of 20 months.

Conclusion

Our data demonstrated that the expression of miR-96 was downregulated in newly diagnosed AML patients and associated with leukemic burden, as well as RFS and OS. This suggests that miR-96 detection might become a potential biomarker of prognosis and monitoring in AML.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1434808553949498.",2014-03-29 +23566548,Reconciliation and local gene tree rearrangement can be of mutual profit.,"

Background

Reconciliation methods compare gene trees and species trees to recover evolutionary events such as duplications, transfers and losses explaining the history and composition of genomes. It is well-known that gene trees inferred from molecular sequences can be partly erroneous due to incorrect sequence alignments as well as phylogenetic reconstruction artifacts such as long branch attraction. In practice, this leads reconciliation methods to overestimate the number of evolutionary events. Several methods have been proposed to circumvent this problem, by collapsing the unsupported edges and then resolving the obtained multifurcating nodes, or by directly rearranging the binary gene trees. Yet these methods have been defined for models of evolution accounting only for duplications and losses, i.e. can not be applied to handle prokaryotic gene families.

Results

We propose a reconciliation method accounting for gene duplications, losses and horizontal transfers, that specifically takes into account the uncertainties in gene trees by rearranging their weakly supported edges. Rearrangements are performed on edges having a low confidence value, and are accepted whenever they improve the reconciliation cost. We prove useful properties on the dynamic programming matrix used to compute reconciliations, which allows to speed-up the tree space exploration when rearrangements are generated by Nearest Neighbor Interchanges (NNI) edit operations. Experiments on synthetic data show that gene trees modified by such NNI rearrangements are closer to the correct simulated trees and lead to better event predictions on average. Experiments on real data demonstrate that the proposed method leads to a decrease in the reconciliation cost and the number of inferred events. Finally on a dataset of 30 k gene families, this reconciliation method shows a ranking of prokaryotic phyla by transfer rates identical to that proposed by a different approach dedicated to transfer detection [BMCBIOINF 11:324, 2010, PNAS 109(13):4962-4967, 2012].

Conclusions

Prokaryotic gene trees can now be reconciled with their species phylogeny while accounting for the uncertainty of the gene tree. More accurate and more precise reconciliations are obtained with respect to previous parsimony algorithms not accounting for such uncertainties [LNCS 6398:93-108, 2010, BIOINF 28(12): i283-i291, 2012].A software implementing the method is freely available at http://www.atgc-montpellier.fr/Mowgli/.",2013-04-08 +23125372,Non-B DB v2.0: a database of predicted non-B DNA-forming motifs and its associated tools.,"The non-B DB, available at http://nonb.abcc.ncifcrf.gov, catalogs predicted non-B DNA-forming sequence motifs, including Z-DNA, G-quadruplex, A-phased repeats, inverted repeats, mirror repeats, direct repeats and their corresponding subsets: cruciforms, triplexes and slipped structures, in several genomes. Version 2.0 of the database revises and re-implements the motif discovery algorithms to better align with accepted definitions and thresholds for motifs, expands the non-B DNA-forming motifs coverage by including short tandem repeats and adds key visualization tools to compare motif locations relative to other genomic annotations. Non-B DB v2.0 extends the ability for comparative genomics by including re-annotation of the five organisms reported in non-B DB v1.0, human, chimpanzee, dog, macaque and mouse, and adds seven additional organisms: orangutan, rat, cow, pig, horse, platypus and Arabidopsis thaliana. Additionally, the non-B DB v2.0 provides an overall improved graphical user interface and faster query performance.",2012-11-03 +23125362,Rfam 11.0: 10 years of RNA families.,"The Rfam database (available via the website at http://rfam.sanger.ac.uk and through our mirror at http://rfam.janelia.org) is a collection of non-coding RNA families, primarily RNAs with a conserved RNA secondary structure, including both RNA genes and mRNA cis-regulatory elements. Each family is represented by a multiple sequence alignment, predicted secondary structure and covariance model. Here we discuss updates to the database in the latest release, Rfam 11.0, including the introduction of genome-based alignments for large families, the introduction of the Rfam Biomart as well as other user interface improvements. Rfam is available under the Creative Commons Zero license.",2012-11-03 +23125365,HBVdb: a knowledge database for Hepatitis B Virus.,"We have developed a specialized database, HBVdb (http://hbvdb.ibcp.fr), allowing the researchers to investigate the genetic variability of Hepatitis B Virus (HBV) and viral resistance to treatment. HBV is a major health problem worldwide with more than 350 million individuals being chronically infected. HBV is an enveloped DNA virus that replicates by reverse transcription of an RNA intermediate. HBV genome is optimized, being circular and encoding four overlapping reading frames. Indeed, each nucleotide of the genome takes part in the coding of at least one protein. However, HBV shows some genome variability leading to at least eight different genotypes and recombinant forms. The main drugs used to treat infected patients are nucleos(t)ides analogs (reverse transcriptase inhibitors). Unfortunately, HBV mutants resistant to these drugs may be selected and be responsible for treatment failure. HBVdb contains a collection of computer-annotated sequences based on manually annotated reference genomes. The database can be accessed through a web interface that allows static and dynamic queries and offers integrated generic sequence analysis tools and specialized analysis tools (e.g. annotation, genotyping, drug resistance profiling).",2012-11-03 +21899761,WebMGA: a customizable web server for fast metagenomic sequence analysis.,"

Background

The new field of metagenomics studies microorganism communities by culture-independent sequencing. With the advances in next-generation sequencing techniques, researchers are facing tremendous challenges in metagenomic data analysis due to huge quantity and high complexity of sequence data. Analyzing large datasets is extremely time-consuming; also metagenomic annotation involves a wide range of computational tools, which are difficult to be installed and maintained by common users. The tools provided by the few available web servers are also limited and have various constraints such as login requirement, long waiting time, inability to configure pipelines etc.

Results

We developed WebMGA, a customizable web server for fast metagenomic analysis. WebMGA includes over 20 commonly used tools such as ORF calling, sequence clustering, quality control of raw reads, removal of sequencing artifacts and contaminations, taxonomic analysis, functional annotation etc. WebMGA provides users with rapid metagenomic data analysis using fast and effective tools, which have been implemented to run in parallel on our local computer cluster. Users can access WebMGA through web browsers or programming scripts to perform individual analysis or to configure and run customized pipelines. WebMGA is freely available at http://weizhongli-lab.org/metagenomic-analysis.

Conclusions

WebMGA offers to researchers many fast and unique tools and great flexibility for complex metagenomic data analysis.",2011-09-07 +22877213,Automated motif discovery from glycan array data.,"Assessing interactions of a glycan-binding protein (GBP) or lectin with glycans on a microarray generates large datasets, making it difficult to identify a glycan structural motif or determinant associated with the highest apparent binding strength of the GBP. We have developed a computational method, termed GlycanMotifMiner, that uses the relative binding of a GBP with glycans within a glycan microarray to automatically reveal the glycan structural motifs recognized by a GBP. We implemented the software with a web-based graphical interface for users to explore and visualize the discovered motifs. The utility of GlycanMotifMiner was determined using five plant lectins, SNA, HPA, PNA, Con A, and UEA-I. Data from the analyses of the lectins at different protein concentrations were processed to rank the glycans based on their relative binding strengths. The motifs, defined as glycan substructures that exist in a large number of the bound glycans and few non-bound glycans, were then discovered by our algorithm and displayed in a web-based graphical user interface ( http://glycanmotifminer.emory.edu ). The information is used in defining the glycan-binding specificity of GBPs. The results were compared to the known glycan specificities of these lectins generated by manual methods. A more complex analysis was also carried out using glycan microarray data obtained for a recombinant form of human galectin-8. Results for all of these lectins show that GlycanMotifMiner identified the major motifs known in the literature along with some unexpected novel binding motifs.",2012-08-09 +30727506,First Report of Tatsoi Downy Mildew Caused by Hyaloperonospora brassicae in Korea.,"Tatsoi (Brassica narinosa L.H. Bailey), also called spinach mustard or spoon mustard, is cultivated for edible greens in Asia. In Korea, this plant has recently become popular as a sprout vegetable that is grown to harvestable size in 5 to 6 days. During April 2012, tatsoi seedlings showing typical symptoms of downy mildew were found in plastic greenhouses in Pyeongtaek City of Korea. Infection resulted in chlorotic areas on the leaves with a white mildew developing on the abaxial surface, and finally led to necrosis of the lesions. Affected sprouts were unmarketable and abandoned without harvesting. A sample was deposited in the Korea University herbarium (Accession No. KUS-F26445). Microscopic examination of fresh material was performed under a light microscope. Conidiophores emerging from stomata were hyaline, 270 to 550 × 10 to 25 μm, straight, and monopodially branched in six to eight orders. Ultimate branchlets were mostly in pairs, flexuous, and 15 to 25 μm long. Conidia were hyaline, subglobose, and 20.5 to 26.5 × 19.5 to 24.5 μm with a length/width ratio of 1.05 to 1.20. These characteristics unequivocally indicate the genus Hyaloperonospora (1). Previously H. parasitica (formerly under Peronospora) has been considered a causal agent of downy mildew on tatsoi (2,4), but the present Korean accession is morphologically distinct from the former species by possessing subglobose conidia with a low length/width ratio. To confirm this morphological difference, amplification and sequencing of the internal transcribed spacer (ITS) region of rDNA of the Korean specimen were performed using procedures outlined by Göker et al. (3). The resulting 874-bp sequence of the region was deposited in GenBank (Accession No. JX401551). A comparison with the ITS sequences available in the GenBank database revealed that it was identical to Hyaloperonospora brassicae found on Brassica oleracea var. italica (EU137726), and showed only one base pair substitution compared to pathogens from B. rapa ssp. pekinensis (JF975613) and B. napus spp. napus (EU049248), but is significantly different from H. parasitica on Capsella bursa-pastoris (AY210988) with a base-pair dissimilarity of about 13%. Therefore, the pathogen found in Korea was confirmed to be H. brassicae. Pathogenicity was demonstrated by shaking diseased leaves onto the leaves of healthy tatsoi seedlings, incubating the plants in a dew chamber at 20°C for 24 h, and then maintaining them in a greenhouse (22 to 26°C). After 3 days, inoculated plants developed downy mildew symptoms from which identical fungi were observed, thus fulfilling Koch's postulates. Control plants treated with sterile water did not develop any symptoms or signs of downy mildew. This is the first report of a downy mildew on tatsoi in Korea, although it has been found in China (2) and Japan (4). To our knowledge, there is no record of tatsoi downy mildew outside East Asia (2,4). References: (1) O. Constantinescu and J. Fatehi. Nova Hedwigia 74:291, 2002. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ July 18, 2012. (3) M. Göker et al. Mycol. Res. 113:308, 2009. (4) M. Satou et al. Annu. Rep. Soc. Plant Protect. N. Jpn. 50:62, 1999.",2012-11-01 +23114483,Nonoperative management of blunt hepatic injury: an Eastern Association for the Surgery of Trauma practice management guideline.,"

Background

During the last century, the management of blunt force trauma to the liver has changed from observation and expectant management in the early part of the 1900s to mainly operative intervention, to the current practice of selective operative and nonoperative management. These issues were first addressed by the Eastern Association for the Surgery of Trauma in the Practice Management Guidelines for Nonoperative Management of Blunt Injury to the Liver and Spleen published online in 2003. Since that time, a large volume of literature on these topics has been published requiring a reevaluation of the previous Eastern Association for the Surgery of Trauma guideline.

Methods

The National Library of Medicine and the National Institutes of Health MEDLINE database were searched using PubMed (http://www.pubmed.gov). The search was designed to identify English-language citations published after 1996 (the last year included in the previous guideline) using the keywords liver injury and blunt abdominal trauma.

Results

One hundred seventy-six articles were reviewed, of which 94 were used to create the current practice management guideline for the selective nonoperative management of blunt hepatic injury.

Conclusion

Most original hepatic guidelines remained valid and were incorporated into the greatly expanded current guidelines as appropriate. Nonoperative management of blunt hepatic injuries currently is the treatment modality of choice in hemodynamically stable patients, irrespective of the grade of injury or patient age. Nonoperative management of blunt hepatic injuries should only be considered in an environment that provides capabilities for monitoring, serial clinical evaluations, and an operating room available for urgent laparotomy. Patients presenting with hemodynamic instability and peritonitis still warrant emergent operative intervention. Intravenous contrast enhanced computed tomographic scan is the diagnostic modality of choice for evaluating blunt hepatic injuries. Repeated imaging should be guided by a patient's clinical status. Adjunctive therapies like angiography, percutaneous drainage, endoscopy/endoscopic retrograde cholangiopancreatography and laparoscopy remain important adjuncts to nonoperative management of hepatic injuries. Despite the explosion of literature on this topic, many questions regarding nonoperative management of blunt hepatic injuries remain without conclusive answers in the literature.",2012-11-01 +30743425,First Report of Crown Rot on Gypsophila (Gypsophila paniculata) Caused by Fusarium proliferatum in Korea.,"Gypsophilas commonly cultivated are Gypsophila elegans B. and G. paniculata L. In September of 2009 and 2010, a severe wilt symptom due to crown rot was observed on G. paniculata (cv. Bristol Fairy) in greenhouses in Yeosu, South Korea. The area of cultivation (~8 ha) in Yeosu covers 90% of production in the Jeonnam Province. Disease outbreak was 20 to 30% in affected greenhouses. Early symptoms included brown discoloration surrounding basal stems and slight wilting. Late symptoms included a sunken stem rot next to the roots, root rot, severe wilting, and dying plants. The causal fungus appeared to invade plants through the basal stem, causing a crown rot that prevented the plant from taking up water and nutrients. Crown rot occurred on young and mature plants. Ten fungal isolates were recovered from basal stems and roots of wilted plants. Microconidia were abundantly produced on potato dextrose agar (PDA), V8 juice agar (VA), carnation leaf agar (CLA), and oatmeal agar (OA). Microconidia were single celled, variable, oval-ellipsoid cylindrical, straight to curved, club-to-kidney shaped or spindle shaped on OA, more slender on VA. Macroconidia were not found on any media used. Microconidia on PDA were 5.9 to 15.1 (9.9) × 2.7 to 4.3 (3.5) μm. Germinated conidia (or false conidia) were often formed on CLA. Conidiophores as phialides were singly formed but often branched. Length of conidiophores was up to 31.1 μm on CLA. Small-sized chlamydospores were rarely found. Fusarium isolates (EML-GYP1, 2, and 3) were selected and identified. From extracted genomic DNA, the internal transcribed spacer (ITS) region including 5.8S rDNA was amplified using ITS1F (5'-CTTGGTCATTTAGAGGAAGT-3') and LR5F (5'-GCTATCCTGAGGGAAAC-3') primers. Sequence analyses by BLAST indicated that the isolates (GenBank HM560019, HM560020, and HM560021) were most similar to F. proliferatum (EF4534150) with sequence identity values of 99.3, 99.4, and 99.1%, respectively. The causal fungus was determined to be F. proliferatum based on morphological data and ITS rDNA sequences. Pathogenicity tests with the three isolates were performed on 10 plants of G. paniculata using the dipping method. Healthy roots and basal stems were soaked in a conidial suspension adjusted to ~1.2 × 106 conidia/ml (distilled water) for 15 min. Plants were potted in sterile soil, kept in a humid chamber for 72 h, and moved to a greenhouse. The experiment was carried out in duplicate and repeated two times. Similar symptoms to those observed in the greenhouses were seen 7 days after inoculation. The causal fungus was reisolated from the artificially inoculated basal stems, fulfilling Koch's postulates. Control plants whose basal stems and roots were dipped in sterile water showed no crown rot and wilt symptoms. EML-GYP2 was determined to be the most pathogenic. Ten records of disease caused by three Fusarium species (Fusarium sp., F. oxysporum, and F. udum) have been found on gypsophilas (1), but only F. oxysporum has been reported to cause wilt on G. elgans in Korea (2). To our knowledge, this is the first report of crown rot on gypsophila caused by F. proliferatum in Korea as well as the world. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , October 27, 2010, (2) W.-G. Kim and H.-M. Koo. Page 381 in: List of Plant Diseases in Korea. The Korean Society of Plant Pathology, 2009.",2011-02-01 +30727470,First Report of Colletotrichum chlorophyti Causing Soybean Anthracnose.,"Anthracnose of soybean [Glycine max (L.) Merr.] is caused by several Colletotrichum spp. (4). Petiole samples were collected from Alabama, Mississippi, and Illinois in 2009. Diseased tissues suspected of being caused by Colletotrichum were cut into 1- to 2-cm lengths, surface-disinfested, and placed on water agar. Pure cultures obtained by picking single spores from sporulating acervuli on tissue or hyphal tips on agar were transferred to acidic potato dextrose agar (APDA) at 24 ±1°C under 12-h cool-white fluorescent lighting. Isolates were grouped by morphological characteristics. One group consisting of six isolates (four from IL and one each from AL and MS) did not morphologically match any reported Colletotrichum spp. causing soybean anthracnose but matched the description of C. chlorophyti S. Chandra & Tandon (1,2). On APDA, colonies were initially pink, turning black after several days with smooth margins and no aerial mycelium. Conidial masses were light salmon in color. Conidia ranged from 15.5 to 21.3 μm long (mean 18.0 μm) × 2.5 to 4.3 wide (mean 3.3 μm) (n = 200). They were curved with tapered ends and a truncated base, aseptate, and hyaline. Chlamydospores were dark brown, clustered or chained together, and 5 to 12 μm wide (n = 30). Setae were straight, dark brown, and septate. Appressoria and perithecia were absent. Soybean plants (cv. Williams 82) at the V2 to V3 stage were atomized with a suspension of fragmented mycelia (40 mg/ml) using one isolate from IL. Plants were kept moist (>90% relative humidity) for 48 h in the dark, then transferred to normal growing conditions. Three days post-inoculation, younger trifoliolate leaf margins and intra- and interveinal lesions were necrotic surrounded by slight chlorosis. Isolations were obtained from symptomatic leaves and confirmed as C. chlorophyti by morphological characteristics. Further comparisons were completed with one isolate (IL1A or BPI 884117) by PCR and BLAST sequencing analyses of the partial ITS rDNA region, actin, β-tubulin, GAPDH, and histone H3 genes (2) (GenBank Accession Nos. JX126475, JX126476, JX126477, JX126478, and JX126479, respectively). The results showed high identity of all the five sequences to two C. chlorophyti isolates, IMI 103806 and CBS 142.79 (Accession Nos. GU227894/GU227895 in ITS = 100%, GU227992/GU227993 in actin = 99%, GU228188/GU228189 in β-tubulin = 99%, GU228286/GU228287 in GAPDH = 99% and 96%, respectively, and GU228090/GU228091 in histone H3 = 99%). Soybean anthracnose, commonly caused by C. truncatum, has curved and truncated conidia that are longer than C. chlorophyti. In addition, the two are distinguished by chlamydospores and lack of appressoria in C. chlorophyti combined with differences in multigene sequence analysis. Isolates of C. chlorophyti were reported to infect Chlorophytum sp. (Liliaceae) in India and Stylosanthes hamate in Australia (3). To our knowledge, there are no previous reports of this species in the United States or of it infecting soybean worldwide (3). This report describes C. chlorophyti as a novel incitant of soybean anthracnose. References: (1) S. Chandra and R. N. Tandon. Curr. Sci. 34:565, 1965. (2) U. Damm et al. Fungal Divers. 39:45, 2009. (3) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , May 21, 2012. (4) G. L. Hartman et al. Compendium of Soybean Diseases, APS Press, St. Paul, MN. pp. 13, 1999.",2012-11-01 +23148880,prfectBLAST: a platform-independent portable front end for the command terminal BLAST+ stand-alone suite.,"prfectBLAST is a multiplatform graphical user interface (GUI) for the stand-alone BLAST+ suite of applications. It allows researchers to do nucleotide or amino acid sequence similarity searches against public (or user-customized) databases that are locally stored. It does not require any dependencies or installation and can be used from a portable flash drive. prfectBLAST is implemented in Java version 6 (SUN) and runs on all platforms that support Java and for which National Center for Biotechnology Information has made available stand-alone BLAST executables, including MS Windows, Mac OS X, and Linux. It is free and open source software, made available under the GNU General Public License version 3 (GPLv3) and can be downloaded at www.cicy.mx/sitios/jramirez or http://code.google.com/p/prfectblast/.",2012-11-01 +21713110,"The connectome viewer toolkit: an open source framework to manage, analyze, and visualize connectomes.","Advanced neuroinformatics tools are required for methods of connectome mapping, analysis, and visualization. The inherent multi-modality of connectome datasets poses new challenges for data organization, integration, and sharing. We have designed and implemented the Connectome Viewer Toolkit - a set of free and extensible open source neuroimaging tools written in Python. The key components of the toolkit are as follows: (1) The Connectome File Format is an XML-based container format to standardize multi-modal data integration and structured metadata annotation. (2) The Connectome File Format Library enables management and sharing of connectome files. (3) The Connectome Viewer is an integrated research and development environment for visualization and analysis of multi-modal connectome data. The Connectome Viewer's plugin architecture supports extensions with network analysis packages and an interactive scripting shell, to enable easy development and community contributions. Integration with tools from the scientific Python community allows the leveraging of numerous existing libraries for powerful connectome data mining, exploration, and comparison. We demonstrate the applicability of the Connectome Viewer Toolkit using Diffusion MRI datasets processed by the Connectome Mapper. The Connectome Viewer Toolkit is available from http://www.cmtk.org/",2011-06-06 +21626396,MITK-ToF--range data within MITK.,"

Purpose

The time-of-flight (ToF) technique is an emerging technique for rapidly acquiring distance information and is becoming increasingly popular for intra-operative surface acquisition. Using the ToF technique as an intra-operative imaging modality requires seamless integration into the clinical workflow. We thus aim to integrate ToF support in an existing framework for medical image processing.

Methods

MITK-ToF was implemented as an extension of the open-source C++ Medical Imaging Interaction Toolkit (MITK) and provides the basic functionality needed for rapid prototyping and development of image-guided therapy (IGT) applications that utilize range data for intra-operative surface acquisition. This framework was designed with a module-based architecture separating the hardware-dependent image acquisition task from the processing of the range data.

Results

The first version of MITK-ToF has been released as an open-source toolkit and supports several ToF cameras and basic processing algorithms. The toolkit, a sample application, and a tutorial are available from http://mitk.org.

Conclusions

With the increased popularity of time-of-flight cameras for intra-operative surface acquisition, integration of range data supports into medical image processing toolkits such as MITK is a necessary step. Handling acquisition of range data from different cameras and processing of the data requires the establishment and use of software design principles that emphasize flexibility, extendibility, robustness, performance, and portability. The open-source toolkit MITK-ToF satisfies these requirements for the image-guided therapy community and was already used in several research projects.",2011-05-31 +22084253,Feature-based classifiers for somatic mutation detection in tumour-normal paired sequencing data.,"

Motivation

The study of cancer genomes now routinely involves using next-generation sequencing technology (NGS) to profile tumours for single nucleotide variant (SNV) somatic mutations. However, surprisingly few published bioinformatics methods exist for the specific purpose of identifying somatic mutations from NGS data and existing tools are often inaccurate, yielding intolerably high false prediction rates. As such, the computational problem of accurately inferring somatic mutations from paired tumour/normal NGS data remains an unsolved challenge.

Results

We present the comparison of four standard supervised machine learning algorithms for the purpose of somatic SNV prediction in tumour/normal NGS experiments. To evaluate these approaches (random forest, Bayesian additive regression tree, support vector machine and logistic regression), we constructed 106 features representing 3369 candidate somatic SNVs from 48 breast cancer genomes, originally predicted with naive methods and subsequently revalidated to establish ground truth labels. We trained the classifiers on this data (consisting of 1015 true somatic mutations and 2354 non-somatic mutation positions) and conducted a rigorous evaluation of these methods using a cross-validation framework and hold-out test NGS data from both exome capture and whole genome shotgun platforms. All learning algorithms employing predictive discriminative approaches with feature selection improved the predictive accuracy over standard approaches by statistically significant margins. In addition, using unsupervised clustering of the ground truth 'false positive' predictions, we noted several distinct classes and present evidence suggesting non-overlapping sources of technical artefacts illuminating important directions for future study.

Availability

Software called MutationSeq and datasets are available from http://compbio.bccrc.ca.",2011-11-13 +23275706,An online database for genome information of agricultural plants.,"

Unlabelled

The integration-based genome database provides useful information through a user-friendly web interface that allows analysis of comparative genome for agricultural plants. We have concentrated on the functional bioinformatics of major agricultural resources, such as rice, Chinese cabbage, rice mutant lines, and microorganisms. The major functions are focused on functional genome analysis, including genome projects, gene expression analysis, gene markers with genetic map, analysis tools for comparative genome structure, and genome annotation in agricultural plants.

Availability

The database is available for free at http://nabic.naas.go.kr/",2012-10-31 +21985277,A model-based circular binary segmentation algorithm for the analysis of array CGH data.,"

Background

Circular Binary Segmentation (CBS) is a permutation-based algorithm for array Comparative Genomic Hybridization (aCGH) data analysis. CBS accurately segments data by detecting change-points using a maximal-t test; but extensive computational burden is involved for evaluating the significance of change-points using permutations. A recent implementation utilizing a hybrid method and early stopping rules (hybrid CBS) to improve the performance in speed was subsequently proposed. However, a time analysis revealed that a major portion of computation time of the hybrid CBS was still spent on permutation. In addition, what the hybrid method provides is an approximation of the significance upper bound or lower bound, not an approximation of the significance of change-points itself.

Results

We developed a novel model-based algorithm, extreme-value based CBS (eCBS), which limits permutations and provides robust results without loss of accuracy. Thousands of aCGH data under null hypothesis were simulated in advance based on a variety of non-normal assumptions, and the corresponding maximal-t distribution was modeled by the Generalized Extreme Value (GEV) distribution. The modeling results, which associate characteristics of aCGH data to the GEV parameters, constitute lookup tables (eXtreme model). Using the eXtreme model, the significance of change-points could be evaluated in a constant time complexity through a table lookup process.

Conclusions

A novel algorithm, eCBS, was developed in this study. The current implementation of eCBS consistently outperforms the hybrid CBS 4× to 20× in computation time without loss of accuracy. Source codes, supplementary materials, supplementary figures, and supplementary tables can be found at http://ntumaps.cgm.ntu.edu.tw/eCBSsupplementary.",2011-10-10 +22991088,Accessing data from the International Mouse Phenotyping Consortium: state of the art and future plans.,"The International Mouse Phenotyping Consortium (IMPC) (http://www.mousephenotype.org) will reveal the pleiotropic functions of every gene in the mouse genome and uncover the wider role of genetic loci within diverse biological systems. Comprehensive informatics solutions are vital to ensuring that this vast array of data is captured in a standardised manner and made accessible to the scientific community for interrogation and analysis. Here we review the existing EuroPhenome and WTSI phenotype informatics systems and the IKMC portal, and present plans for extending these systems and lessons learned to the development of a robust IMPC informatics infrastructure.",2012-09-19 +23114100,Mining the literature: new methods to exploit keyword profiles.,"Bibliographic records in the PubMed database of biomedical literature are annotated with Medical Subject Headings (MeSH) by curators, which summarize the content of the articles. Two recent publications explain how to generate profiles of MeSH terms for a set of bibliographic records and to use them to define any given concept by its associated literature. These concepts can then be related by their keyword profiles, and this can be used, for example, to detect new associations between genes and inherited diseases.

See related research articles

http://www.biomedcentral.com/1471-2105/13/249/abstracthttp://genomemedicine.com/content/4/9/75/abstract.",2012-10-30 +23118483,Spliceosome database: a tool for tracking components of the spliceosome.,"The spliceosome is the extremely complex macromolecular machine responsible for pre-mRNA splicing. It assembles from five U-rich small nuclear RNAs (snRNAs) and over 200 proteins in a highly dynamic fashion. One important challenge to studying the spliceosome is simply keeping track of all these proteins, a situation further complicated by the variety of names and identifiers that exist in the literature for them. To facilitate studies of the spliceosome and its components, we created a database of spliceosome-associated proteins and snRNAs, which is available at http://spliceosomedb.ucsc.edu and can be queried through a simple browser interface. In the database, we cataloged the various names, orthologs and gene identifiers of spliceosome proteins to navigate the complex nomenclature of spliceosome proteins. We also provide links to gene and protein records for the spliceosome components in other databases. To navigate spliceosome assembly dynamics, we created tools to compare the association of spliceosome proteins with complexes that form at specific stages of spliceosome assembly based on a compendium of mass spectrometry experiments that identified proteins in purified splicing complexes. Together, the information in the database provides an easy reference for spliceosome components and will support future modeling of spliceosome structure and dynamics.",2012-10-30 +23118484,MODOMICS: a database of RNA modification pathways--2013 update.,"MODOMICS is a database of RNA modifications that provides comprehensive information concerning the chemical structures of modified ribonucleosides, their biosynthetic pathways, RNA-modifying enzymes and location of modified residues in RNA sequences. In the current database version, accessible at http://modomics.genesilico.pl, we included new features: a census of human and yeast snoRNAs involved in RNA-guided RNA modification, a new section covering the 5'-end capping process, and a catalogue of 'building blocks' for chemical synthesis of a large variety of modified nucleosides. The MODOMICS collections of RNA modifications, RNA-modifying enzymes and modified RNAs have been also updated. A number of newly identified modified ribonucleosides and more than one hundred functionally and structurally characterized proteins from various organisms have been added. In the RNA sequences section, snRNAs and snoRNAs with experimentally mapped modified nucleosides have been added and the current collection of rRNA and tRNA sequences has been substantially enlarged. To facilitate literature searches, each record in MODOMICS has been cross-referenced to other databases and to selected key publications. New options for database searching and querying have been implemented, including a BLAST search of protein sequences and a PARALIGN search of the collected nucleic acid sequences.",2012-10-30 +21685098,A novel computational framework for simultaneous integration of multiple types of genomic data to identify microRNA-gene regulatory modules.,"

Motivation

It is well known that microRNAs (miRNAs) and genes work cooperatively to form the key part of gene regulatory networks. However, the specific functional roles of most miRNAs and their combinatorial effects in cellular processes are still unclear. The availability of multiple types of functional genomic data provides unprecedented opportunities to study the miRNA-gene regulation. A major challenge is how to integrate the diverse genomic data to identify the regulatory modules of miRNAs and genes.

Results

Here we propose an effective data integration framework to identify the miRNA-gene regulatory comodules. The miRNA and gene expression profiles are jointly analyzed in a multiple non-negative matrix factorization framework, and additional network data are simultaneously integrated in a regularized manner. Meanwhile, we employ the sparsity penalties to the variables to achieve modular solutions. The mathematical formulation can be effectively solved by an iterative multiplicative updating algorithm. We apply the proposed method to integrate a set of heterogeneous data sources including the expression profiles of miRNAs and genes on 385 human ovarian cancer samples, computationally predicted miRNA-gene interactions, and gene-gene interactions. We demonstrate that the miRNAs and genes in 69% of the regulatory comodules are significantly associated. Moreover, the comodules are significantly enriched in known functional sets such as miRNA clusters, GO biological processes and KEGG pathways, respectively. Furthermore, many miRNAs and genes in the comodules are related with various cancers including ovarian cancer. Finally, we show that comodules can stratify patients (samples) into groups with significant clinical characteristics.

Availability

The program and supplementary materials are available at http://zhoulab.usc.edu/SNMNMF/.

Contact

xjzhou@usc.edu; zsh@amss.ac.cn",2011-07-01 +23633579,OpenStructure: an integrated software framework for computational structural biology.,"Research projects in structural biology increasingly rely on combinations of heterogeneous sources of information, e.g. evolutionary information from multiple sequence alignments, experimental evidence in the form of density maps and proximity constraints from proteomics experiments. The OpenStructure software framework, which allows the seamless integration of information of different origin, has previously been introduced. The software consists of C++ libraries which are fully accessible from the Python programming language. Additionally, the framework provides a sophisticated graphics module that interactively displays molecular structures and density maps in three dimensions. In this work, the latest developments in the OpenStructure framework are outlined. The extensive capabilities of the framework will be illustrated using short code examples that show how information from molecular-structure coordinates can be combined with sequence data and/or density maps. The framework has been released under the LGPL version 3 license and is available for download from http://www.openstructure.org.",2013-04-19 +21896509,SpliceTrap: a method to quantify alternative splicing under single cellular conditions.,"

Motivation

Alternative splicing (AS) is a pre-mRNA maturation process leading to the expression of multiple mRNA variants from the same primary transcript. More than 90% of human genes are expressed via AS. Therefore, quantifying the inclusion level of every exon is crucial for generating accurate transcriptomic maps and studying the regulation of AS.

Results

Here we introduce SpliceTrap, a method to quantify exon inclusion levels using paired-end RNA-seq data. Unlike other tools, which focus on full-length transcript isoforms, SpliceTrap approaches the expression-level estimation of each exon as an independent Bayesian inference problem. In addition, SpliceTrap can identify major classes of alternative splicing events under a single cellular condition, without requiring a background set of reads to estimate relative splicing changes. We tested SpliceTrap both by simulation and real data analysis, and compared it to state-of-the-art tools for transcript quantification. SpliceTrap demonstrated improved accuracy, robustness and reliability in quantifying exon-inclusion ratios.

Conclusions

SpliceTrap is a useful tool to study alternative splicing regulation, especially for accurate quantification of local exon-inclusion ratios from RNA-seq data.

Availability and implementation

SpliceTrap can be implemented online through the CSH Galaxy server http://cancan.cshl.edu/splicetrap and is also available for download and installation at http://rulai.cshl.edu/splicetrap/.

Contact

michael.zhang@utdallas.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-06 +23369107,An enhanced computational platform for investigating the roles of regulatory RNA and for identifying functional RNA motifs.,"

Background

Functional RNA molecules participate in numerous biological processes, ranging from gene regulation to protein synthesis. Analysis of functional RNA motifs and elements in RNA sequences can obtain useful information for deciphering RNA regulatory mechanisms. Our previous work, RegRNA, is widely used in the identification of regulatory motifs, and this work extends it by incorporating more comprehensive and updated data sources and analytical approaches into a new platform.

Methods and results

An integrated web-based system, RegRNA 2.0, has been developed for comprehensively identifying the functional RNA motifs and sites in an input RNA sequence. Numerous data sources and analytical approaches are integrated, and several types of functional RNA motifs and sites can be identified by RegRNA 2.0: (i) splicing donor/acceptor sites; (ii) splicing regulatory motifs; (iii) polyadenylation sites; (iv) ribosome binding sites; (v) rho-independent terminator; (vi) motifs in mRNA 5'-untranslated region (5'UTR) and 3'UTR; (vii) AU-rich elements; (viii) C-to-U editing sites; (ix) riboswitches; (x) RNA cis-regulatory elements; (xi) transcriptional regulatory motifs; (xii) user-defined motifs; (xiii) similar functional RNA sequences; (xiv) microRNA target sites; (xv) non-coding RNA hybridization sites; (xvi) long stems; (xvii) open reading frames; (xviii) related information of an RNA sequence. User can submit an RNA sequence and obtain the predictive results through RegRNA 2.0 web page.

Conclusions

RegRNA 2.0 is an easy to use web server for identifying regulatory RNA motifs and functional sites. Through its integrated user-friendly interface, user is capable of using various analytical approaches and observing results with graphical visualization conveniently. RegRNA 2.0 is now available at http://regrna2.mbc.nctu.edu.tw.",2013-01-21 +24273247,DiSWOP: a novel measure for cell-level protein network analysis in localized proteomics image data.,"

Motivation

New bioimaging techniques have recently been proposed to visualize the colocation or interaction of several proteins within individual cells, displaying the heterogeneity of neighbouring cells within the same tissue specimen. Such techniques could hold the key to understanding complex biological systems such as the protein interactions involved in cancer. However, there is a need for new algorithmic approaches that analyze the large amounts of multi-tag bioimage data from cancerous and normal tissue specimens to begin to infer protein networks and unravel the cellular heterogeneity at a molecular level.

Results

The proposed approach analyzes cell phenotypes in normal and cancerous colon tissue imaged using the robotically controlled Toponome Imaging System microscope. It involves segmenting the 4',6-diamidino-2-phenylindole-labelled image into cells and determining the cell phenotypes according to their protein-protein dependence profile. These were analyzed using two new measures, Difference in Sums of Weighted cO-dependence/Anti-co-dependence profiles (DiSWOP and DiSWAP) for overall co-expression and anti-co-expression, respectively. These novel quantities were extracted using 11 Toponome Imaging System image stacks from either cancerous or normal human colorectal specimens. This approach enables one to easily identify protein pairs that have significantly higher/lower co-expression levels in cancerous tissue samples when compared with normal colon tissue.

Availability and implementation

http://www2.warwick.ac.uk/fac/sci/dcs/research/combi/research/bic/diswop.",2013-11-21 +23234249,Breast cancer screening: evidence of benefit depends on the method used.,"In this article, we discuss the most common epidemiological methods used for evaluating the ability of mammography screening to decrease the risk of breast cancer death in general populations (effectiveness). Case-control studies usually find substantial effectiveness. However when breast cancer mortality decreases for reasons unrelated to screening, the case-control design may attribute to screening mortality reductions due to other causes. Studies based on incidence-based mortality have obtained contrasted results compatible with modest to considerable effectiveness, probably because of differences in study design and statistical analysis. In areas where screening has been widespread for a long time, the incidence of advanced breast cancer should be decreasing, which in turn would translate into reduced mortality. However, no or modest declines in the incidence of advanced breast cancer has been observed in these areas. Breast cancer mortality should decrease more rapidly in areas with early introduction of screening than in areas with late introduction of screening. Nonetheless, no difference in breast mortality trends has been observed between areas with early or late screening start. When effectiveness is assessed using incidence-based mortality studies, or the monitoring of advanced cancer incidence, or trends in mortality, the ecological bias is an inherent limitation that is not easy to control. Minimization of this bias requires data over long periods of time, careful selection of populations being compared and availability of data on major confounding factors. If case-control studies seem apparently more adequate for evaluating screening effectiveness, this design has its own limitations and results must be viewed with caution.See related Opinion article: http://www.biomedcentral.com/1741-7015/10/106 and Commentary http://www.biomedcentral.com/1741-7015/10/164.",2012-12-12 +24278254,PD5: a general purpose library for primer design software.,"

Background

Complex PCR applications for large genome-scale projects require fast, reliable and often highly sophisticated primer design software applications. Presently, such applications use pipelining methods to utilise many third party applications and this involves file parsing, interfacing and data conversion, which is slow and prone to error. A fully integrated suite of software tools for primer design would considerably improve the development time, the processing speed, and the reliability of bespoke primer design software applications.

Results

The PD5 software library is an open-source collection of classes and utilities, providing a complete collection of software building blocks for primer design and analysis. It is written in object-oriented C(++) with an emphasis on classes suitable for efficient and rapid development of bespoke primer design programs. The modular design of the software library simplifies the development of specific applications and also integration with existing third party software where necessary. We demonstrate several applications created using this software library that have already proved to be effective, but we view the project as a dynamic environment for building primer design software and it is open for future development by the bioinformatics community. Therefore, the PD5 software library is published under the terms of the GNU General Public License, which guarantee access to source-code and allow redistribution and modification.

Conclusions

The PD5 software library is downloadable from Google Code and the accompanying Wiki includes instructions and examples: http://code.google.com/p/primer-design.",2013-11-21 +23028544,Characterization and robust classification of EEG signal from image RSVP events with independent time-frequency features.,"

Unlabelled

This paper considers the problem of automatic characterization and detection of target images in a rapid serial visual presentation (RSVP) task based on EEG data. A novel method that aims to identify single-trial event-related potentials (ERPs) in time-frequency is proposed, and a robust classifier with feature clustering is developed to better utilize the correlated ERP features. The method is applied to EEG recordings of a RSVP experiment with multiple sessions and subjects.The results show that the target image events are mainly characterized by 3 distinct patterns in the time-frequency domain, i.e., a theta band (4.3 Hz) power boosting 300-700 ms after the target image onset, an alpha band (12 Hz) power boosting 500-1000 ms after the stimulus onset, and a delta band (2 Hz) power boosting after 500 ms. The most discriminant time-frequency features are power boosting and are relatively consistent among multiple sessions and subjects.Since the original discriminant time-frequency features are highly correlated, we constructed the uncorrelated features using hierarchical clustering for better classification of target and non-target images. With feature clustering, performance (area under ROC) improved from 0.85 to 0.89 on within-session tests, and from 0.76 to 0.84 on cross-subject tests. The constructed uncorrelated features were more robust than the original discriminant features and corresponded to a number of local regions on the time-frequency plane.

Availability

The data and code are available at: http://compgenomics.cbi.utsa.edu/rsvp/index.html.",2012-09-18 +24657884,Rationale and cross-sectional study design of the Research on Obesity and type 2 Diabetes among African Migrants: the RODAM study.,"

Introduction

Obesity and type 2 diabetes (T2D) are highly prevalent among African migrants compared with European descent populations. The underlying reasons still remain a puzzle. Gene-environmental interaction is now seen as a potential plausible factor contributing to the high prevalence of obesity and T2D, but has not yet been investigated. The overall aim of the Research on Obesity and Diabetes among African Migrants (RODAM) project is to understand the reasons for the high prevalence of obesity and T2D among sub-Saharan Africans in diaspora by (1) studying the complex interplay between environment (eg, lifestyle), healthcare, biochemical and (epi)genetic factors, and their relative contributions to the high prevalence of obesity and T2D; (2) to identify specific risk factors within these broad categories to guide intervention programmes and (3) to provide a basic knowledge for improving diagnosis and treatment.

Methods and analysis

RODAM is a multicentre cross-sectional study among homogenous sub-Saharan African participants (ie, Ghanaians) aged >25 years living in rural and urban Ghana, the Netherlands, Germany and the UK (http://rod-am.eu/). Standardised data on the main outcomes, genetic and non-genetic factors are collected in all locations. The aim is to recruit 6250 individuals comprising five subgroups of 1250 individuals from each site. In Ghana, Kumasi and Obuasi (urban stratum) and villages in the Ashanti region (rural stratum) are served as recruitment sites. In Europe, Ghanaian migrants are selected through the municipality or Ghanaian organisations registers.

Ethics and dissemination

Ethical approval has been obtained in all sites. This paper gives an overview of the rationale, conceptual framework and methods of the study. The differences across locations will allow us to gain insight into genetic and non-genetic factors contributing to the occurrence of obesity and T2D and will inform targeted intervention and prevention programmes, and provide the basis for improving diagnosis and treatment in these populations and beyond.",2014-03-21 +21804150,OPCRIT+: an electronic system for psychiatric diagnosis and data collection in clinical and research settings.,"

Background

The increasingly large sample size requirements of modern adult mental health research suggests the need for a data collection and diagnostic application that can be used across a broad range of clinical and research populations. Aims To develop a data collection and diagnostic application that can be used across a broad range of clinical and research settings.

Method

We expanded and redeveloped the OPCRIT system into a broadly applicable diagnostic and data-collection package and carried out an interrater reliability study of this new tool.

Results

OPCRIT+ performed well in an interrater reliability study with relatively inexperienced clinicians, giving a combined, weighted kappa of 0.70 for diagnostic reliability.

Conclusions

OPCRIT+ showed good overall interrater reliability scores for diagnoses. It is now incorporated in the electronic patient record of the Maudsley and associated hospitals. OPCRIT+ can be downloaded free of charge at http://sgdp.iop.kcl.ac.uk/opcritplus.",2011-08-01 +24096415,"GET_HOMOLOGUES, a versatile software package for scalable and robust microbial pangenome analysis.","GET_HOMOLOGUES is an open-source software package that builds on popular orthology-calling approaches making highly customizable and detailed pangenome analyses of microorganisms accessible to nonbioinformaticians. It can cluster homologous gene families using the bidirectional best-hit, COGtriangles, or OrthoMCL clustering algorithms. Clustering stringency can be adjusted by scanning the domain composition of proteins using the HMMER3 package, by imposing desired pairwise alignment coverage cutoffs, or by selecting only syntenic genes. The resulting homologous gene families can be made even more robust by computing consensus clusters from those generated by any combination of the clustering algorithms and filtering criteria. Auxiliary scripts make the construction, interrogation, and graphical display of core genome and pangenome sets easy to perform. Exponential and binomial mixture models can be fitted to the data to estimate theoretical core genome and pangenome sizes, and high-quality graphics can be generated. Furthermore, pangenome trees can be easily computed and basic comparative genomics performed to identify lineage-specific genes or gene family expansions. The software is designed to take advantage of modern multiprocessor personal computers as well as computer clusters to parallelize time-consuming tasks. To demonstrate some of these capabilities, we survey a set of 50 Streptococcus genomes annotated in the Orthologous Matrix (OMA) browser as a benchmark case. The package can be downloaded at http://www.eead.csic.es/compbio/soft/gethoms.php and http://maya.ccg.unam.mx/soft/gethoms.php.",2013-10-04 +21940644,Model averaging and Bayes factor calculation of relaxed molecular clocks in Bayesian phylogenetics.,"We describe a procedure for model averaging of relaxed molecular clock models in Bayesian phylogenetics. Our approach allows us to model the distribution of rates of substitution across branches, averaged over a set of models, rather than conditioned on a single model. We implement this procedure and test it on simulated data to show that our method can accurately recover the true underlying distribution of rates. We applied the method to a set of alignments taken from a data set of 12 mammalian species and uncovered evidence that lognormally distributed rates better describe this data set than do exponentially distributed rates. Additionally, our implementation of model averaging permits accurate calculation of the Bayes factor(s) between two or more relaxed molecular clock models. Finally, we introduce a new computational approach for sampling rates of substitution across branches that improves the convergence of our Markov chain Monte Carlo algorithms in this context. Our methods are implemented under the BEAST 1.6 software package, available at http://beast-mcmc.googlecode.com.",2011-09-22 +22985944,"Reducing multiples: a mathematical formula that accurately predicts rates of singletons, twins, and higher-order multiples in women undergoing in vitro fertilization.","

Objective

To develop a mathematical formula that accurately predicts the probability of a singleton, twin, and higher-order multiple pregnancy according to implantation rate and number of embryos transferred.

Design

A total of 12,003 IVF cycles from a single center resulting in ET were analyzed. Using mathematical modeling we developed a formula, the Combined Formula, and tested for the ability of this formula to accurately predict outcomes.

Setting

Academic hospital.

Patient(s)

Patients undergoing IVF.

Intervention(s)

None.

Main outcome measure(s)

Goodness of fit of data from our center and previously published data to the Combined Formula and three previous mathematical models.

Result(s)

The Combined Formula predicted the probability of singleton, twin, and higher-order pregnancies more accurately than three previous formulas (1.4% vs. 2.88%, 4.02%, and 5%, respectively) and accurately predicted outcomes from five previously published studies from other centers. An online applet is provided (https://secure.ivf.org/ivf-calculator.html).

Conclusion(s)

The probability of pregnancy with singletons, twins, and higher-order multiples according to number of embryos transferred is predictable and not random and can be accurately modeled using the Combined Formula. The embryo itself is the major predictor of pregnancy outcomes, but there is an influence from ""barriers,"" such as the endometrium and collaboration between embryos (embryo-embryo interaction). This model can be used to guide the decision regarding number of embryos to transfer after IVF.",2012-09-15 +23479352,HExpoChem: a systems biology resource to explore human exposure to chemicals.,"

Summary

Humans are exposed to diverse hazardous chemicals daily. Although an exposure to these chemicals is suspected to have adverse effects on human health, mechanistic insights into how they interact with the human body are still limited. Therefore, acquisition of curated data and development of computational biology approaches are needed to assess the health risks of chemical exposure. Here we present HExpoChem, a tool based on environmental chemicals and their bioactivities on human proteins with the objective of aiding the qualitative exploration of human exposure to chemicals. The chemical-protein interactions have been enriched with a quality-scored human protein-protein interaction network, a protein-protein association network and a chemical-chemical interaction network, thus allowing the study of environmental chemicals through formation of protein complexes and phenotypic outcomes enrichment.

Availability

HExpoChem is available at http://www.cbs.dtu.dk/services/HExpoChem-1.0/.",2013-03-11 +24655585,Whole DNA methylome profiling in lung cancer cells before and after epithelial-to-mesenchymal transition.,"

Background

Metastatic lung cancer is one of the leading causes of cancer death. In recent years, epithelial-to-mesenchymal transition (EMT) has been found to contribute to metastasis, as it enables migratory and invasive properties in cancer cells. Previous genome-wide studies found that DNA methylation was unchanged during EMT induced by TGF-β in AML12 cells. In this study, we aimed to discover EMT-related changes in DNA methylation in cancer cells, which are poorly understood.

Methods

We employed a next-generation sequencing-based method, MSCC (methyl-sensitive cut counting), to investigate DNA methylation during EMT in the A549 lung cancer cell line.

Results

We found that methylation levels were highly correlated to gene expression, histone modifications and small RNA expression. However, no differentially methylated regions (DMRs) were found in A549 cells treated with TGF-β for 4 h, 12 h, 24 h and 96 h. Additionally, CpG islands (CGIs) showed no overall change in methylation levels, and at the single-base level, almost all of the CpGs showed conservation of DNA methylation levels. Furthermore, we found that the expression of DNA methyltransferase 1, 3a, 3b (DNMT1, DNMT3a, DNMT3b) and ten-eleven translocation 1 (TET1) was altered after EMT. The level of several histone methylations was also changed.

Conclusions

DNA methylation-related enzymes and histone methylation might have a role in TGF-β-induced EMT without affecting the whole DNA methylome in cancer cells. Our data provide new insights into the global methylation signature of lung cancer cells and the role of DNA methylation in EMT.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1112892497119603.",2014-03-20 +22011310,EUCAST technical note on Amphotericin B.,"The European Committee on Antimicrobial Susceptibility Testing-Subcommittee on Antifungal Susceptibility Testing (EUCAST-AFST) has determined breakpoints for amphotericin B for Candida spp. This Technical Note is based on the EUCAST amphotericin B rationale document (available on the EUCAST website: http://www.eucast.org). Species-specific breakpoints for C. albicans, C. glabrata, C. krusei, C. parapsilosis and C. tropicalis are S: MIC ≤1 mg/L, R: MIC > 1 mg/L. There are insufficient data to set breakpoints for other species. The breakpoints are based upon pharmacokinetic data, epidemiological cut-ff values and clinical experience. Breakpoints will be reviewed regularly.",2011-10-19 +23300804,A model-based clustering method for genomic structural variant prediction and genotyping using paired-end sequencing data.,"Structural variation (SV) has been reported to be associated with numerous diseases such as cancer. With the advent of next generation sequencing (NGS) technologies, various types of SV can be potentially identified. We propose a model based clustering approach utilizing a set of features defined for each type of SV events. Our method, termed SVMiner, not only provides a probability score for each candidate, but also predicts the heterozygosity of genomic deletions. Extensive experiments on genome-wide deep sequencing data have demonstrated that SVMiner is robust against the variability of a single cluster feature, and it significantly outperforms several commonly used SV detection programs. SVMiner can be downloaded from http://cbc.case.edu/svminer/.",2012-12-27 +25340248,"Head Injury: Triage, Assessment, Investigation and Early Management of Head Injury in Children, Young People and Adults","For the purposes of this guideline, head injury is defined as any trauma to the head other than superficial injuries to the face. Head injury is the commonest cause of death and disability in people aged 1–40 years in the UK. Data for head injury are recorded in the Hospital Episode Statistics (http://www.hscic.gov.uk/hes). Each year, 1.4 million people attend emergency departments in England and Wales with a recent head injury. Between 33% and 50% of these are children aged under 15 years. Annually, about 200,000 people are admitted to hospital with head injury. Of these, one-fifth have features suggesting skull fracture or have evidence of brain damage. Most patients recover without specific or specialist intervention, but others experience long-term disability or even die from the effects of complications that could potentially be minimised or avoided with early detection and appropriate treatment. The incidence of death from head injury is low, with as few as 0.2% of all patients attending emergency departments with a head injury dying as a result of this injury. Ninety five per cent of people who have sustained a head injury present with a normal or minimally impaired conscious level (Glasgow Coma Scale [GCS] greater than 12) but the majority of fatal outcomes are in the moderate (GCS 9–12) or severe (GCS 8 or less) head injury groups, which account for only 5% of attenders. Therefore, emergency departments see a large number of patients with minor or mild head injuries and need to identify the very small number who will go on to have serious acute intracranial complications. It is estimated that 25–30% of children aged under 2 years who are hospitalised with head injury have an abusive head injury. This guideline has updated some of the terminology used in relation to safeguarding children and vulnerable adults. The previous head injury guideline produced by NICE in 2003 (NICE clinical guideline 4) and updated in 2007 (NICE clinical guideline 56) resulted in CT scanning replacing skull radiography as the primary imaging modality for assessing head injury. It also led to an increase in the proportion of people with severe head injury having their care managed in specialist centres. This has been associated with a decline in fatality among patients with severe head injury. This update is needed because of the continuing importance of up-to-date evidence-based guidance on the initial assessment and early management of head injury. Appropriate guidance can enable early detection and treatment of life-threatening brain injury, where present, but also early discharge of patients with negligible risk of brain injury. It can therefore save lives while at the same time preventing needless crowding in emergency departments and observation wards. Further key NHS changes have driven the scope of this update. These include the introduction in 2012 of regional trauma networks with major trauma triage tools within NHS England; the extension of indications for anticoagulation therapy; the expanding use of biomarkers to guide emergent clinical management in other conditions, such as chest pain; and the establishment of local safeguarding boards. The last of these addresses the requirement for front-line clinical staff to assess not only the severity of the head injury but also why it occurred.",2014-10-24 +24575054,Anisotropic kernels for coordinate-based meta-analyses of neuroimaging studies.,"Peak-based meta-analyses of neuroimaging studies create, for each study, a brain map of effect size or peak likelihood by convolving a kernel with each reported peak. A kernel is a small matrix applied in order that voxels surrounding the peak have a value similar to, but slightly lower than that of the peak. Current kernels are isotropic, i.e., the value of a voxel close to a peak only depends on the Euclidean distance between the voxel and the peak. However, such perfect spheres of effect size or likelihood around the peak are rather implausible: a voxel that correlates with the peak across individuals is more likely to be part of the cluster of significant activation or difference than voxels uncorrelated with the peak. This paper introduces anisotropic kernels, which assign different values to the different neighboring voxels based on the spatial correlation between them. They are specifically developed for effect-size signed differential mapping (ES-SDM), though might be easily implemented in other meta-analysis packages such as activation likelihood estimation (ALE). The paper also describes the creation of the required correlation templates for gray matter/BOLD response, white matter, cerebrospinal fluid, and fractional anisotropy. Finally, the new method is validated by quantifying the accuracy of the recreation of effect size maps from peak information. This empirical validation showed that the optimal degree of anisotropy and full-width at half-maximum (FWHM) might vary largely depending on the specific data meta-analyzed. However, it also showed that the recreation substantially improved and did not depend on the FWHM when full anisotropy was used. Based on these results, we recommend the use of fully anisotropic kernels in ES-SDM and ALE, unless optimal meta-analysis-specific parameters can be estimated based on the recreation of available statistical maps. The new method and templates are freely available at http://www.sdmproject.com/.",2014-02-10 +23812847,HERMES: towards an integrated toolbox to characterize functional and effective brain connectivity.,"The analysis of the interdependence between time series has become an important field of research in the last years, mainly as a result of advances in the characterization of dynamical systems from the signals they produce, the introduction of concepts such as generalized and phase synchronization and the application of information theory to time series analysis. In neurophysiology, different analytical tools stemming from these concepts have added to the 'traditional' set of linear methods, which includes the cross-correlation and the coherency function in the time and frequency domain, respectively, or more elaborated tools such as Granger Causality.This increase in the number of approaches to tackle the existence of functional (FC) or effective connectivity (EC) between two (or among many) neural networks, along with the mathematical complexity of the corresponding time series analysis tools, makes it desirable to arrange them into a unified-easy-to-use software package. The goal is to allow neuroscientists, neurophysiologists and researchers from related fields to easily access and make use of these analysis methods from a single integrated toolbox.Here we present HERMES ( http://hermes.ctb.upm.es ), a toolbox for the Matlab® environment (The Mathworks, Inc), which is designed to study functional and effective brain connectivity from neurophysiological data such as multivariate EEG and/or MEG records. It includes also visualization tools and statistical methods to address the problem of multiple comparisons. We believe that this toolbox will be very helpful to all the researchers working in the emerging field of brain connectivity analysis.",2013-10-01 +22978681,SPInDel: a multifunctional workbench for species identification using insertion/deletion variants.,"The majority of the available methods for the molecular identification of species use pairwise sequence divergences between the query and reference sequences (DNA barcoding). The presence of multiple insertions and deletions (indels) in the target genomic regions is generally regarded as a problem, as it introduces ambiguities in sequence alignments. However, we have recently shown that a high level of species discrimination is attainable in all taxa of life simply by considering the length of hypervariable regions defined by indel variants. Each species is tagged with a numeric profile of fragment lengths-a true numeric barcode. In this study, we describe a multifunctional computational workbench (named SPInDel for SPecies Identification by Insertions/Deletions) to assist researchers using variable-length DNA sequences, and we demonstrate its applicability in molecular ecology. The SPInDel workbench provides a step-by-step environment for the alignment of target sequences, selection of informative hypervariable regions, design of PCR primers and the statistical validation of the species-identification process. In our test data sets, we were able to discriminate all species from two genera of frogs (Ansonia and Leptobrachium) inhabiting lowland rainforests and mountain regions of South-East Asia and species from the most common genus of coral reef fishes (Apogon). Our method can complement conventional DNA barcoding systems when indels are common (e.g. in rRNA genes) without the required step of DNA sequencing. The executable files, source code, documentation and test data sets are freely available at http://www.portugene.com/SPInDel/SPInDel_webworkbench.html.",2012-09-15 +23093601,DoriC 5.0: an updated database of oriC regions in both bacterial and archaeal genomes.,"Replication of chromosomes is one of the central events in the cell cycle. Chromosome replication begins at specific sites, called origins of replication (oriCs), for all three domains of life. However, the origins of replication still remain unknown in a considerably large number of bacterial and archaeal genomes completely sequenced so far. The availability of increasing complete bacterial and archaeal genomes has created challenges and opportunities for identification of their oriCs in silico, as well as in vivo. Based on the Z-curve theory, we have developed a web-based system Ori-Finder to predict oriCs in bacterial genomes with high accuracy and reliability by taking advantage of comparative genomics, and the predicted oriC regions have been organized into an online database DoriC, which is publicly available at http://tubic.tju.edu.cn/doric/ since 2007. Five years after we constructed DoriC, the database has significant advances over the number of bacterial genomes, increasing about 4-fold. Additionally, oriC regions in archaeal genomes identified by in vivo experiments, as well as in silico analyses, have also been added to the database. Consequently, the latest release of DoriC contains oriCs for >1500 bacterial genomes and 81 archaeal genomes, respectively.",2012-10-23 +21408081,"TargetMine, an integrated data warehouse for candidate gene prioritisation and target discovery.","Prioritising candidate genes for further experimental characterisation is a non-trivial challenge in drug discovery and biomedical research in general. An integrated approach that combines results from multiple data types is best suited for optimal target selection. We developed TargetMine, a data warehouse for efficient target prioritisation. TargetMine utilises the InterMine framework, with new data models such as protein-DNA interactions integrated in a novel way. It enables complicated searches that are difficult to perform with existing tools and it also offers integration of custom annotations and in-house experimental data. We proposed an objective protocol for target prioritisation using TargetMine and set up a benchmarking procedure to evaluate its performance. The results show that the protocol can identify known disease-associated genes with high precision and coverage. A demonstration version of TargetMine is available at http://targetmine.nibio.go.jp/.",2011-03-08 +21526183,Scoring protein relationships in functional interaction networks predicted from sequence data.,"

Unlabelled

The abundance of diverse biological data from various sources constitutes a rich source of knowledge, which has the power to advance our understanding of organisms. This requires computational methods in order to integrate and exploit these data effectively and elucidate local and genome wide functional connections between protein pairs, thus enabling functional inferences for uncharacterized proteins. These biological data are primarily in the form of sequences, which determine functions, although functional properties of a protein can often be predicted from just the domains it contains. Thus, protein sequences and domains can be used to predict protein pair-wise functional relationships, and thus contribute to the function prediction process of uncharacterized proteins in order to ensure that knowledge is gained from sequencing efforts. In this work, we introduce information-theoretic based approaches to score protein-protein functional interaction pairs predicted from protein sequence similarity and conserved protein signature matches. The proposed schemes are effective for data-driven scoring of connections between protein pairs. We applied these schemes to the Mycobacterium tuberculosis proteome to produce a homology-based functional network of the organism with a high confidence and coverage. We use the network for predicting functions of uncharacterised proteins.

Availability

Protein pair-wise functional relationship scores for Mycobacterium tuberculosis strain CDC1551 sequence data and python scripts to compute these scores are available at http://web.cbio.uct.ac.za/~gmazandu/scoringschemes.",2011-04-19 +23093589,ValidNESs: a database of validated leucine-rich nuclear export signals.,"ValidNESs (http://validness.ym.edu.tw/) is a new database for experimentally validated leucine-rich nuclear export signal (NES)-containing proteins. The therapeutic potential of the chromosomal region maintenance 1 (CRM1)-mediated nuclear export pathway and disease relevance of its cargo proteins has gained recognition in recent years. Unfortunately, only about one-third of known CRM1 cargo proteins are accessible in a single database since the last compilation in 2003. CRM1 cargo proteins are often recognized by a classical NES (leucine-rich NES), but this signal is notoriously difficult to predict from sequence alone. Fortunately, a recently developed prediction method, NESsential, is able to identify good candidates in some cases, enabling valuable hints to be gained by in silico prediction, but until now it has not been available through a web interface. We present ValidNESs, an integrated, up-to-date database holding 221 NES-containing proteins, combined with a web interface to prediction by NESsential.",2012-10-22 +22971057,Highly improved homopolymer aware nucleotide-protein alignments with 454 data.,"

Background

Roche 454 sequencing is the leading sequencing technology for producing long read high throughput sequence data. Unlike most methods where sequencing errors translate to base uncertainties, 454 sequencing inaccuracies create nucleotide gaps. These gaps are particularly troublesome for translated search tools such as BLASTx where they introduce frame-shifts and result in regions of decreased identity and/or terminated alignments, which affect further analysis.

Results

To address this issue, the Homopolymer Aware Cross Alignment Tool (HAXAT) was developed. HAXAT uses a novel dynamic programming algorithm for solving the optimal local alignment between a 454 nucleotide and a protein sequence by allowing frame-shifts, guided by 454 flowpeak values. The algorithm is an efficient minimal extension of the Smith-Waterman-Gotoh algorithm that easily fits in into other tools. Experiments using HAXAT demonstrate, through the introduction of 454 specific frame-shift penalties, significantly increased accuracy of alignments spanning homopolymer sequence errors. The full effect of the new parameters introduced with this novel alignment model is explored. Experimental results evaluating homopolymer inaccuracy through alignments show a two to five-fold increase in Matthews Correlation Coefficient over previous algorithms, for 454-derived data.

Conclusions

This increased accuracy provided by HAXAT does not only result in improved homologue estimations, but also provides un-interrupted reading-frames, which greatly facilitate further analysis of protein space, for example phylogenetic analysis. The alignment tool is available at http://bioinfo.ifm.liu.se/454tools/haxat.",2012-09-12 +21596789,"GenPlay, a multipurpose genome analyzer and browser.","

Motivation

Rapidly decreasing sequencing cost due to the emergence and improvement of massively parallel sequencing technologies has resulted in a dramatic increase in the quantity of data that needs to be analyzed. Therefore, software tools to process, visualize, analyze and integrate data produced on multiple platforms and using multiple methods are needed.

Results

GenPlay is a fast, easy to use and stable tool for rapid analysis and data processing. It is written in Java and runs on all major operating systems. GenPlay recognizes a wide variety of common genomic data formats from microarray- or sequencing-based platforms and offers a library of operations (normalization, binning, smoothing) to process raw data into visualizable tracks. GenPlay displays tracks adapted to summarize gene structure, gene expression, repeat families, CPG islands, etc. as well as custom tracks to show the results of RNA-Seq, ChIP-Seq, TimEX-Seq and single nucleotide polymorphism (SNP) analysis. GenPlay can generate statistics (minimum, maximum, SD, correlation, etc.). The tools provided include Gaussian filter, peak finders, signal saturation, island finders. The software also offers graphical features such as scatter plots and bar charts to depict signal repartition. The library of operations is continuously growing based on the emerging needs.

Availability

GenPlay is an open-source project available from http://www.genplay.net. The code source of the software is available at https://genplay.einstein.yu.edu/svn/GenPlay.",2011-05-19 +23221815,Invariant delineation of nuclear architecture in glioblastoma multiforme for clinical and molecular association.,"Automated analysis of whole mount tissue sections can provide insights into tumor subtypes and the underlying molecular basis of neoplasm. However, since tumor sections are collected from different laboratories, inherent technical and biological variations impede analysis for very large datasets such as The Cancer Genome Atlas (TCGA). Our objective is to characterize tumor histopathology, through the delineation of the nuclear regions, from hematoxylin and eosin (H&E) stained tissue sections. Such a representation can then be mined for intrinsic subtypes across a large dataset for prediction and molecular association. Furthermore, nuclear segmentation is formulated within a multi-reference graph framework with geodesic constraints, which enables computation of multidimensional representations, on a cell-by-cell basis, for functional enrichment and bioinformatics analysis. Here, we present a novel method, multi-reference graph cut (MRGC), for nuclear segmentation that overcomes technical variations associated with sample preparation by incorporating prior knowledge from manually annotated reference images and local image features. The proposed approach has been validated on manually annotated samples and then applied to a dataset of 377 Glioblastoma Multiforme (GBM) whole slide images from 146 patients. For the GBM cohort, multidimensional representation of the nuclear features and their organization have identified 1) statistically significant subtypes based on several morphometric indexes, 2) whether each subtype can be predictive or not, and 3) that the molecular correlates of predictive subtypes are consistent with the literature. Data and intermediaries for a number of tumor types (GBM, low grade glial, and kidney renal clear carcinoma) are available at: http://tcga.lbl.gov for correlation with TCGA molecular data. The website also provides an interface for panning and zooming of whole mount tissue sections with/without overlaid segmentation results for quality control.",2012-12-04 +23566564,CoreGenes3.5: a webserver for the determination of core genes from sets of viral and small bacterial genomes.,"

Background

CoreGenes3.5 is a webserver that determines sets of core genes from viral and small bacterial genomes as an automated batch process. Previous versions of CoreGenes have been used to classify bacteriophage genomes and mine data from pathogen genomes.

Findings

CoreGenes3.5 accepts as input GenBank accession numbers of genomes and performs iterative BLASTP analyses to output a set of core genes. After completion of the program run, the results can be either displayed in a new window for one pair of reference and query genomes or emailed to the user for multiple pairs of small genomes in tabular format.

Conclusions

With the number of genomes sequenced increasing daily and interest in determining phylogenetic relationships, CoreGenes3.5 provides a user-friendly web interface for wet-bench biologists to process multiple small genomes for core gene determinations. CoreGenes3.5 is available at http://binf.gmu.edu:8080/CoreGenes3.5.",2013-04-08 +22492642,GRASS: a generic algorithm for scaffolding next-generation sequencing assemblies.,"

Motivation

The increasing availability of second-generation high-throughput sequencing (HTS) technologies has sparked a growing interest in de novo genome sequencing. This in turn has fueled the need for reliable means of obtaining high-quality draft genomes from short-read sequencing data. The millions of reads usually involved in HTS experiments are first assembled into longer fragments called contigs, which are then scaffolded, i.e. ordered and oriented using additional information, to produce even longer sequences called scaffolds. Most existing scaffolders of HTS genome assemblies are not suited for using information other than paired reads to perform scaffolding. They use this limited information to construct scaffolds, often preferring scaffold length over accuracy, when faced with the tradeoff.

Results

We present GRASS (GeneRic ASsembly Scaffolder)-a novel algorithm for scaffolding second-generation sequencing assemblies capable of using diverse information sources. GRASS offers a mixed-integer programming formulation of the contig scaffolding problem, which combines contig order, distance and orientation in a single optimization objective. The resulting optimization problem is solved using an expectation-maximization procedure and an unconstrained binary quadratic programming approximation of the original problem. We compared GRASS with existing HTS scaffolders using Illumina paired reads of three bacterial genomes. Our algorithm constructs a comparable number of scaffolds, but makes fewer errors. This result is further improved when additional data, in the form of related genome sequences, are used.

Availability

GRASS source code is freely available from http://code.google.com/p/tud-scaffolding/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-04-06 +21483474,Automated ensemble modeling with modelMaGe: analyzing feedback mechanisms in the Sho1 branch of the HOG pathway.,"In systems biology uncertainty about biological processes translates into alternative mathematical model candidates. Here, the goal is to generate, fit and discriminate several candidate models that represent different hypotheses for feedback mechanisms responsible for downregulating the response of the Sho1 branch of the yeast high osmolarity glycerol (HOG) signaling pathway after initial stimulation. Implementing and testing these candidate models by hand is a tedious and error-prone task. Therefore, we automatically generated a set of candidate models of the Sho1 branch with the tool modelMaGe. These candidate models are automatically documented, can readily be simulated and fitted automatically to data. A ranking of the models with respect to parsimonious data representation is provided, enabling discrimination between candidate models and the biological hypotheses underlying them. We conclude that a previously published model fitted spurious effects in the data. Moreover, the discrimination analysis suggests that the reported data does not support the conclusion that a desensitization mechanism leads to the rapid attenuation of Hog1 signaling in the Sho1 branch of the HOG pathway. The data rather supports a model where an integrator feedback shuts down the pathway. This conclusion is also supported by dedicated experiments that can exclusively be predicted by those models including an integrator feedback.modelMaGe is an open source project and is distributed under the Gnu General Public License (GPL) and is available from http://modelmage.org.",2011-03-30 +23094086,miRandola: extracellular circulating microRNAs database.,"MicroRNAs are small noncoding RNAs that play an important role in the regulation of various biological processes through their interaction with cellular messenger RNAs. They are frequently dysregulated in cancer and have shown great potential as tissue-based markers for cancer classification and prognostication. microRNAs are also present in extracellular human body fluids such as serum, plasma, saliva, and urine. Most of circulating microRNAs are present in human plasma and serum cofractionate with the Argonaute2 (Ago2) protein. However, circulating microRNAs have been also found in membrane-bound vesicles such as exosomes. Since microRNAs circulate in the bloodstream in a highly stable, extracellular form, they may be used as blood-based biomarkers for cancer and other diseases. A knowledge base of extracellular circulating miRNAs is a fundamental tool for biomedical research. In this work, we present miRandola, a comprehensive manually curated classification of extracellular circulating miRNAs. miRandola is connected to miRò, the miRNA knowledge base, allowing users to infer the potential biological functions of circulating miRNAs and their connections with phenotypes. The miRandola database contains 2132 entries, with 581 unique mature miRNAs and 21 types of samples. miRNAs are classified into four categories, based on their extracellular form: miRNA-Ago2 (173 entries), miRNA-exosome (856 entries), miRNA-HDL (20 entries) and miRNA-circulating (1083 entries). miRandola is available online at: http://atlas.dmi.unict.it/mirandola/index.html.",2012-10-19 +23086211,"Transcription factor family-based reconstruction of singleton regulons and study of the Crp/Fnr, ArsR, and GntR families in Desulfovibrionales genomes.","Accurate detection of transcriptional regulatory elements is essential for high-quality genome annotation, metabolic reconstruction, and modeling of regulatory networks. We developed a computational approach for reconstruction of regulons operated by transcription factors (TFs) from large protein families and applied this novel approach to three TF families in 10 Desulfovibrionales genomes. Phylogenetic analyses of 125 regulators from the ArsR, Crp/Fnr, and GntR families revealed that 65% of these regulators (termed reference TFs) are well conserved in Desulfovibrionales, while the remaining 35% of regulators (termed singleton TFs) are species specific and show a mosaic distribution. For regulon reconstruction in the group of singleton TFs, the standard orthology-based approach was inefficient, and thus, we developed a novel approach based on the simultaneous study of all homologous TFs from the same family in a group of genomes. As a result, we identified binding for 21 singleton TFs and for all reference TFs in all three analyzed families. Within each TF family we observed structural similarities between DNA-binding motifs of different reference and singleton TFs. The collection of reconstructed regulons is available at the RegPrecise database (http://regprecise.lbl.gov/RegPrecise/Desulfovibrionales.jsp).",2012-10-19 +23748960,Inferring the functional effect of gene expression changes in signaling pathways.,"Signaling pathways constitute a valuable source of information that allows interpreting the way in which alterations in gene activities affect to particular cell functionalities. There are web tools available that allow viewing and editing pathways, as well as representing experimental data on them. However, few methods aimed to identify the signaling circuits, within a pathway, associated to the biological problem studied exist and none of them provide a convenient graphical web interface. We present PATHiWAYS, a web-based signaling pathway visualization system that infers changes in signaling that affect cell functionality from the measurements of gene expression values in typical expression microarray case-control experiments. A simple probabilistic model of the pathway is used to estimate the probabilities for signal transmission from any receptor to any final effector molecule (taking into account the pathway topology) using for this the individual probabilities of gene product presence/absence inferred from gene expression values. Significant changes in these probabilities allow linking different cell functionalities triggered by the pathway to the biological problem studied. PATHiWAYS is available at: http://pathiways.babelomics.org/.",2013-06-08 +22827163,The Triform algorithm: improved sensitivity and specificity in ChIP-Seq peak finding.,"

Background

Chromatin immunoprecipitation combined with high-throughput sequencing (ChIP-Seq) is the most frequently used method to identify the binding sites of transcription factors. Active binding sites can be seen as peaks in enrichment profiles when the sequencing reads are mapped to a reference genome. However, the profiles are normally noisy, making it challenging to identify all significantly enriched regions in a reliable way and with an acceptable false discovery rate.

Results

We present the Triform algorithm, an improved approach to automatic peak finding in ChIP-Seq enrichment profiles for transcription factors. The method uses model-free statistics to identify peak-like distributions of sequencing reads, taking advantage of improved peak definition in combination with known characteristics of ChIP-Seq data.

Conclusions

Triform outperforms several existing methods in the identification of representative peak profiles in curated benchmark data sets. We also show that Triform in many cases is able to identify peaks that are more consistent with biological function, compared with other methods. Finally, we show that Triform can be used to generate novel information on transcription factor binding in repeat regions, which represents a particular challenge in many ChIP-Seq experiments. The Triform algorithm has been implemented in R, and is available via http://tare.medisin.ntnu.no/triform.",2012-07-24 +22262674,An effective statistical evaluation of ChIPseq dataset similarity.,"

Motivation

ChIPseq is rapidly becoming a common technique for investigating protein-DNA interactions. However, results from individual experiments provide a limited understanding of chromatin structure, as various chromatin factors cooperate in complex ways to orchestrate transcription. In order to quantify chromtain interactions, it is thus necessary to devise a robust similarity metric applicable to ChIPseq data. Unfortunately, moving past simple overlap calculations to give statistically rigorous comparisons of ChIPseq datasets often involves arbitrary choices of distance metrics, with significance being estimated by computationally intensive permutation tests whose statistical power may be sensitive to non-biological experimental and post-processing variation.

Results

We show that it is in fact possible to compare ChIPseq datasets through the efficient computation of exact P-values for proximity. Our method is insensitive to non-biological variation in datasets such as peak width, and can rigorously model peak location biases by evaluating similarity conditioned on a restricted set of genomic regions (such as mappable genome or promoter regions). Applying our method to the well-studied dataset of Chen et al. (2008), we elucidate novel interactions which conform well with our biological understanding. By comparing ChIPseq data in an asymmetric way, we are able to observe clear interaction differences between cofactors such as p300 and factors that bind DNA directly.

Availability

Source code is available for download at http://sonorus.princeton.edu/IntervalStats/IntervalStats.tar.gz.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-19 +24499477,Low copy number of mitochondrial DNA (mtDNA) predicts worse prognosis in early-stage laryngeal cancer patients.,"

Objectives

Alterations in mitochondrial DNA (mtDNA) copy number have been widely reported in various human cancers, and been considered to be an important hallmark of cancers. However, little is known about the value of copy number variations of mtDNA in the prognostic evaluation of laryngeal cancer.

Design and methods

Using real-time quantitative PCR method, we investigated mtDNA copy number in a cohort of laryngeal cancers (n =204) and normal laryngeal tissues (n =40), and explored the association of variable mtDNA copy number with clinical outcomes of laryngeal cancer patients.

Results

Our data showed that the relative mean mtDNA content was higher in the laryngeal cancer patients (11.91 ± 4.35 copies) than the control subjects (4.72 ± 0.70 copies). Moreover, we found that mtDNA content was negatively associated with cigarette smoking (pack-years), tumor invasion, and TNM stage. Notably, variable mtDNA content did not affect overall survival of laryngeal cancer patients. However, when the patients were categorized into early-stage and late-stage tumor groups according to TNM stage, we found that low mtDNA content was strongly associated with poor survival in the former, but not in the latter.

Conclusions

The present study demonstrated that low mtDNA content was strongly correlated with some of clinicopathological characteristics, such as cigarette smoking, tumor invasion and TNM stage. In addition, we found a strong link between low mtDNA content and worse survival of the patients with early-stage tumors. Taken together, low copy number of mtDNA may be a useful poor prognostic factor for early-stage laryngeal cancer patients.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1841771572115955.",2014-02-05 +23023982,HiCNorm: removing biases in Hi-C data via Poisson regression.,"

Summary

We propose a parametric model, HiCNorm, to remove systematic biases in the raw Hi-C contact maps, resulting in a simple, fast, yet accurate normalization procedure. Compared with the existing Hi-C normalization method developed by Yaffe and Tanay, HiCNorm has fewer parameters, runs >1000 times faster and achieves higher reproducibility.

Availability

Freely available on the web at: http://www.people.fas.harvard.edu/∼junliu/HiCNorm/.

Contact

jliu@stat.harvard.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-27 +21118201,The Viking viewer for connectomics: scalable multi-user annotation and summarization of large volume data sets.,"Modern microscope automation permits the collection of vast amounts of continuous anatomical imagery in both two and three dimensions. These large data sets present significant challenges for data storage, access, viewing, annotation and analysis. The cost and overhead of collecting and storing the data can be extremely high. Large data sets quickly exceed an individual's capability for timely analysis and present challenges in efficiently applying transforms, if needed. Finally annotated anatomical data sets can represent a significant investment of resources and should be easily accessible to the scientific community. The Viking application was our solution created to view and annotate a 16.5 TB ultrastructural retinal connectome volume and we demonstrate its utility in reconstructing neural networks for a distinctive retinal amacrine cell class. Viking has several key features. (1) It works over the internet using HTTP and supports many concurrent users limited only by hardware. (2) It supports a multi-user, collaborative annotation strategy. (3) It cleanly demarcates viewing and analysis from data collection and hosting. (4) It is capable of applying transformations in real-time. (5) It has an easily extensible user interface, allowing addition of specialized modules without rewriting the viewer.",2011-01-01 +23084601,PESNPdb: a comprehensive database of SNPs studied in association with pre-eclampsia.,"Pre-eclampsia is a pregnancy specific disorder that can be life threatening for mother and child. Multiple studies have been carried out in an attempt to identify SNPs that contribute to the genetic susceptibility of the disease. Here we describe PESNPdb (http://bejerano.stanford.edu/pesnpdb), a database aimed at centralizing SNP and study details investigated in association with pre-eclampsia. We also describe a Placenta Disorders ontology that utilizes information from PESNPdb. The main focus of PESNPdb is to help researchers study the genetic complexity of pre-eclampsia through a user-friendly interface that encourages community participation.",2012-10-18 +25590865,"Trichinellosis surveillance--United States, 2008-2012.","

Problem/condition

Trichinellosis is a parasitic disease caused by nematodes in the genus Trichinella, which are among the most widespread zoonotic pathogens globally. Infection occurs following consumption of raw or undercooked meat infected with Trichinella larvae. Clinical manifestations of the disease range from asymptomatic infection to fatal disease; the common signs and symptoms include eosinophilia, fever, periorbital edema, and myalgia. Trichinellosis surveillance has documented a steady decline in the reported incidence of the disease in the United States. In recent years, proportionally fewer cases have been associated with consumption of commercial pork products, and more are associated with meat from wild game such as bear.

Period covered

2008-2012.

Description of system

Trichinellosis has been a nationally notifiable disease in the United States since 1966 and is reportable in 48 states, New York City, and the District of Columbia. The purpose of national surveillance is to estimate incidence of infection, detect outbreaks, and guide prevention efforts. Cases are defined by clinical characteristics and the results of laboratory testing for evidence of Trichinella infection. Food exposure histories are obtained at the local level either at the point of care or through health department interview. States notify CDC of cases electronically through the National Notifiable Disease Surveillance System (available at http://wwwn.cdc.gov/nndss). In addition, states are asked to submit a standardized supplementary case report form that captures the clinical and epidemiologic information needed to meet the surveillance case definition. Reported cases are summarized weekly and annually in MMWR.

Results

During 2008-2012, a total of 90 cases of trichinellosis were reported to CDC from 24 states and the District of Columbia. Six (7%) cases were excluded from analysis because a supplementary case report form was not submitted or the case did not meet the case definition. A total of 84 confirmed trichinellosis cases, including five outbreaks that comprised 40 cases, were analyzed and included in this report. During 2008-2012, the mean annual incidence of trichinellosis in the United States was 0.1 cases per 1 million population, with a median of 15 cases per year. Pork products were associated with 22 (26%) cases, including 10 (45%) that were linked with commercial pork products, six (27%) that were linked with wild boar, and one (5%) that was linked with home-raised swine; five (23%) were unspecified. Meats other than pork were associated with 45 (54%) cases, including 41 (91%) that were linked with bear meat, two (4%) that were linked with deer meat, and two (4%) that were linked with ground beef. The source for 17 (20%) cases was unknown. Of the 51 patients for whom information was reported on the manner in which the meat product was cooked, 24 (47%) reported eating raw or undercooked meat.

Interpretation

The risk for Trichinella infection associated with commercial pork has decreased substantially in the United States since the 1940s, when data collection on trichinellosis cases first began. However, the continued identification of cases related to both pork and nonpork sources indicates that public education about trichinellosis and the dangers of consuming raw or undercooked meat still is needed.

Public health actions

Changes in domestic pork production and public health education regarding the safe preparation of pork have contributed to the reduction in the incidence of trichinellosis in the United States; however, consumption of wild game meat such as bear continues to be an important source of infection. Hunters and consumers of wild game meat should be educated about the risk associated with consumption of raw or undercooked meat.",2015-01-01 +24371153,SigHunt: horizontal gene transfer finder optimized for eukaryotic genomes.,"

Motivation

Genomic islands (GIs) are DNA fragments incorporated into a genome through horizontal gene transfer (also called lateral gene transfer), often with functions novel for a given organism. While methods for their detection are well researched in prokaryotes, the complexity of eukaryotic genomes makes direct utilization of these methods unreliable, and so labour-intensive phylogenetic searches are used instead.

Results

We present a surrogate method that investigates nucleotide base composition of the DNA sequence in a eukaryotic genome and identifies putative GIs. We calculate a genomic signature as a vector of tetranucleotide (4-mer) frequencies using a sliding window approach. Extending the neighbourhood of the sliding window, we establish a local kernel density estimate of the 4-mer frequency. We score the number of 4-mer frequencies in the sliding window that deviate from the credibility interval of their local genomic density using a newly developed discrete interval accumulative score (DIAS). To further improve the effectiveness of DIAS, we select informative 4-mers in a range of organisms using the tetranucleotide quality score developed herein. We show that the SigHunt method is computationally efficient and able to detect GIs in eukaryotic genomes that represent non-ameliorated integration. Thus, it is suited to scanning for change in organisms with different DNA composition.

Availability and implementation

Source code and scripts freely available for download at http://www.iba.muni.cz/index-en.php?pg=research-data-analysis-tools-sighunt are implemented in C and R and are platform-independent.

Contact

376090@mail.muni.cz or martinkova@ivb.cz.",2013-12-25 +23075283,Investigation of silent information regulator 1 (Sirt1) agonists from Traditional Chinese Medicine.,"Silent information regulator 1 (Sirt1), a class III nicotinamide adenine dinucleotide dependent histone deacetylases, is important in cardioprotection, neuroprotection, metabolic disease, calorie restriction, and diseases associated with aging. Traditional Chinese Medicine (TCM) compounds from TCM Database@Taiwan ( http://tcm.cmu.edu.tw/ ) were employed for screening potent Sirt1 agonists, and molecular dynamics (MD) simulation was implemented to simulate ligand optimum docking poses and protein structure under dynamic conditions. TCM compounds such as (S)-tryptophan-betaxanthin, 5-O-feruloylquinic acid, and RosA exhibited good binding affinity across different computational methods, and their drug-like potential were validated by MD simulation. Docking poses indicate that the carboxylic group of the three candidates generated H-bonds with residues in the protein chain from Ser441 to Lys444 and formed H-bond, π-cation interactions, or hydrophobic contacts with Phe297 and key active residue, His363. During MD, stable π-cation interactions with residues Phe273 or Arg274 were formed by (S)-tryptophan-betaxanthin and RosA. All candidates were anchored to His363 by stable π- or H-bonds. Hence, we propose (S)-tryptophan-betaxanthin, 5-O-feruloylquinic acid, and RosA as potential lead compounds that can be further tested in drug development process for diseases associated with aging An animated interactive 3D complement (I3DC) is available in Proteopedia at http://proteopedia.org/w/Journal:JBSD:28.",2012-10-17 +23175545,"Findings of the UK national audit evaluating image-guided or image-assisted liver biopsy. Part I. Procedural aspects, diagnostic adequacy, and accuracy.","

Purpose

To assess procedural aspects, diagnostic adequacy, and accuracy of liver biopsy across the United Kingdom.

Materials and methods

Institutional review board approval for this type of study is not required in the United Kingdom. All radiology departments with an approved leader for departmental audit registered with the Royal College of Radiologists were invited to participate in this retrospective audit. The first 50 consecutive patients who underwent image-guided or image-assisted liver biopsy in 2008 were included. Audit standards relating to procedural aspects of biopsy, sample adequacy, and accuracy were prepared with reference to published data. Sensitivity, specificity, positive and negative likelihood ratios, and accuracy were calculated. Organizational and clinical variables were investigated for their association with diagnostic specimen quality.

Results

Eighty-seven (41%) of 210 departments supplied data for this study, with a total of 3496 cases (1225 focal disease, 2262 nonfocal disease, nine unspecified). Ultrasonographic (US) guidance was the technique most commonly used for focal lesions and for cases of nonfocal disease (2808 [96.38%] of 3490 cases). The audit standard for sample adequacy (98%) was narrowly missed in practice (3401 [97.96%] of 3472 cases); however, the standard for diagnostic accuracy (90%) was met (3187 [98.55%] of 3234 cases). Poor compliance with postbiopsy documentation was observed.

Conclusion

The majority of liver biopsies in this audit were performed by radiologists using image guidance or assistance, usually in the form of US. Biopsies were performed with a high degree of accuracy. Some postprocedural aspects of biopsy failed to meet required standards and would merit reaudit after practice changes.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12111562/-/DC1.",2012-12-01 +24236176,Purpose in life predicts better emotional recovery from negative stimuli.,"Purpose in life predicts both health and longevity suggesting that the ability to find meaning from life's experiences, especially when confronting life's challenges, may be a mechanism underlying resilience. Having purpose in life may motivate reframing stressful situations to deal with them more productively, thereby facilitating recovery from stress and trauma. In turn, enhanced ability to recover from negative events may allow a person to achieve or maintain a feeling of greater purpose in life over time. In a large sample of adults (aged 36-84 years) from the MIDUS study (Midlife in the U.S., http://www.midus.wisc.edu/), we tested whether purpose in life was associated with better emotional recovery following exposure to negative picture stimuli indexed by the magnitude of the eyeblink startle reflex (EBR), a measure sensitive to emotional state. We differentiated between initial emotional reactivity (during stimulus presentation) and emotional recovery (occurring after stimulus offset). Greater purpose in life, assessed over two years prior, predicted better recovery from negative stimuli indexed by a smaller eyeblink after negative pictures offset, even after controlling for initial reactivity to the stimuli during the picture presentation, gender, age, trait affect, and other well-being dimensions. These data suggest a proximal mechanism by which purpose in life may afford protection from negative events and confer resilience is through enhanced automatic emotion regulation after negative emotional provocation.",2013-11-13 +21531699,PhyleasProg: a user-oriented web server for wide evolutionary analyses.,"Evolutionary analyses of biological data are becoming a prerequisite in many fields of biology. At a time of high-throughput data analysis, phylogenetics is often a necessary complementary tool for biologists to understand, compare and identify the functions of sequences. But available bioinformatics tools are frequently not easy for non-specialists to use. We developed PhyleasProg (http://phyleasprog.inra.fr), a user-friendly web server as a turnkey tool dedicated to evolutionary analyses. PhyleasProg can help biologists with little experience in evolutionary methodologies by analysing their data in a simple and robust way, using methods corresponding to robust standards. Via a very intuitive web interface, users only need to enter a list of Ensembl protein IDs and a list of species as inputs. After dynamic computations, users have access to phylogenetic trees, positive/purifying selection data (on site and branch-site models), with a display of these results on the protein sequence and on a 3D structure model, and the synteny environment of related genes. This connection between different domains of phylogenetics opens the way to new biological analyses for the discovery of the function and structure of proteins.",2011-04-29 +23754940,Scrutinizing MHC-I binding peptides and their limits of variation.,"Designed peptides that bind to major histocompatibility protein I (MHC-I) allomorphs bear the promise of representing epitopes that stimulate a desired immune response. A rigorous bioinformatical exploration of sequence patterns hidden in peptides that bind to the mouse MHC-I allomorph H-2K(b) is presented. We exemplify and validate these motif findings by systematically dissecting the epitope SIINFEKL and analyzing the resulting fragments for their binding potential to H-2K(b) in a thermal denaturation assay. The results demonstrate that only fragments exclusively retaining the carboxy- or amino-terminus of the reference peptide exhibit significant binding potential, with the N-terminal pentapeptide SIINF as shortest ligand. This study demonstrates that sophisticated machine-learning algorithms excel at extracting fine-grained patterns from peptide sequence data and predicting MHC-I binding peptides, thereby considerably extending existing linear prediction models and providing a fresh view on the computer-based molecular design of future synthetic vaccines. The server for prediction is available at http://modlab-cadd.ethz.ch (SLiDER tool, MHC-I version 2012).",2013-06-06 +22084252,rNA: a fast and accurate short reads numerical aligner.,

Summary

The advent of high-throughput sequencers (HTS) introduced the need of new tools in order to analyse the large amount of data that those machines are able to produce. The mandatory first step for a wide range of analyses is the alignment of the sequences against a reference genome. We present a major update to our rNA (randomized Numerical Aligner) tool. The main feature of rNA is the fact that it achieves an accuracy greater than the majority of other tools in a feasible amount of time. rNA executables and source codes are freely downloadable at http://iga-rna.sourceforge.net/.

Contact

vezzi@appliedgenomics.org; delfabbro@appliedgenomics.org

Supplementary information

Supplementary data are available at Bioinformatics online.,2011-11-13 +22247664,The new pelagic Operational Observatory of the Catalan Sea (OOCS) for the multisensor coordinated measurement of atmospheric and oceanographic conditions.,"The new pelagic Operational Observatory of the Catalan Sea (OOCS) for the coordinated multisensor measurement of atmospheric and oceanographic conditions has been recently installed (2009) in the Catalan Sea (41°39'N, 2°54'E; Western Mediterranean) and continuously operated (with minor maintenance gaps) until today. This multiparametric platform is moored at 192 m depth, 9.3 km off Blanes harbour (Girona, Spain). It is composed of a buoy holding atmospheric sensors and a set of oceanographic sensors measuring the water conditions over the upper 100 m depth. The station is located close to the head of the Blanes submarine canyon where an important multispecies pelagic and demersal fishery gives the station ecological and economic relevance. The OOCS provides important records on atmospheric and oceanographic conditions, the latter through the measurement of hydrological and biogeochemical parameters, at depths with a time resolution never attained before for this area of the Mediterranean. Twenty four moored sensors and probes operating in a coordinated fashion provide important data on Essential Ocean Variables (EOVs; UNESCO) such as temperature, salinity, pressure, dissolved oxygen, chlorophyll fluorescence, and turbidity. In comparison with other pelagic observatories presently operating in other world areas, OOCS also measures photosynthetic available radiation (PAR) from above the sea surface and at different depths in the upper 50 m. Data are recorded each 30 min and transmitted in real-time to a ground station via GPRS. This time series is published and automatically updated at the frequency of data collection on the official OOCS website (http://www.ceab.csic.es/~oceans). Under development are embedded automated routines for the in situ data treatment and assimilation into numerical models, in order to provide a reliable local marine processing forecast. In this work, our goal is to detail the OOCS multisensor architecture in relation to the coordinated capability for the remote, continuous and prolonged monitoring of atmospheric and oceanographic conditions, including data communication and storage. Accordingly, time series of measurements for a number of biological parameters will be presented for the summer months of 2011. Marine hindcast outputs from the numerical models implemented for simulating the conditions over the study area are shown. The strong changes of atmospheric conditions recorded in the last years over the area have altered the marine conditions of living organisms, but the dimension of the impact remains unclear. The OOCS multisensor coordinated monitoring has been specifically designed to address this issue, thus contributing to better understand the present environmental fluctuations and to provide a sound basis for a more accurate marine forecast system.",2011-11-28 +22586449,Learning transcriptional regulatory relationships using sparse graphical models.,"Understanding the organization and function of transcriptional regulatory networks by analyzing high-throughput gene expression profiles is a key problem in computational biology. The challenges in this work are 1) the lack of complete knowledge of the regulatory relationship between the regulators and the associated genes, 2) the potential for spurious associations due to confounding factors, and 3) the number of parameters to learn is usually larger than the number of available microarray experiments. We present a sparse (L1 regularized) graphical model to address these challenges. Our model incorporates known transcription factors and introduces hidden variables to represent possible unknown transcription and confounding factors. The expression level of a gene is modeled as a linear combination of the expression levels of known transcription factors and hidden factors. Using gene expression data covering 39,296 oligonucleotide probes from 1109 human liver samples, we demonstrate that our model better predicts out-of-sample data than a model with no hidden variables. We also show that some of the gene sets associated with hidden variables are strongly correlated with Gene Ontology categories. The software including source code is available at http://grnl1.codeplex.com.",2012-05-07 +21903627,"A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data.","

Motivation

Most existing methods for DNA sequence analysis rely on accurate sequences or genotypes. However, in applications of the next-generation sequencing (NGS), accurate genotypes may not be easily obtained (e.g. multi-sample low-coverage sequencing or somatic mutation discovery). These applications press for the development of new methods for analyzing sequence data with uncertainty.

Results

We present a statistical framework for calling SNPs, discovering somatic mutations, inferring population genetical parameters and performing association tests directly based on sequencing data without explicit genotyping or linkage-based imputation. On real data, we demonstrate that our method achieves comparable accuracy to alternative methods for estimating site allele count, for inferring allele frequency spectrum and for association mapping. We also highlight the necessity of using symmetric datasets for finding somatic mutations and confirm that for discovering rare events, mismapping is frequently the leading source of errors.

Availability

http://samtools.sourceforge.net.

Contact

hengli@broadinstitute.org.",2011-09-08 +22743226,RSeQC: quality control of RNA-seq experiments.,"

Motivation

RNA-seq has been extensively used for transcriptome study. Quality control (QC) is critical to ensure that RNA-seq data are of high quality and suitable for subsequent analyses. However, QC is a time-consuming and complex task, due to the massive size and versatile nature of RNA-seq data. Therefore, a convenient and comprehensive QC tool to assess RNA-seq quality is sorely needed.

Results

We developed the RSeQC package to comprehensively evaluate different aspects of RNA-seq experiments, such as sequence quality, GC bias, polymerase chain reaction bias, nucleotide composition bias, sequencing depth, strand specificity, coverage uniformity and read distribution over the genome structure. RSeQC takes both SAM and BAM files as input, which can be produced by most RNA-seq mapping tools as well as BED files, which are widely used for gene models. Most modules in RSeQC take advantage of R scripts for visualization, and they are notably efficient in dealing with large BAM/SAM files containing hundreds of millions of alignments.

Availability and implementation

RSeQC is written in Python and C. Source code and a comprehensive user's manual are freely available at: http://code.google.com/p/rseqc/.",2012-06-27 +22661576,"DR_bind: a web server for predicting DNA-binding residues from the protein structure based on electrostatics, evolution and geometry.","DR_bind is a web server that automatically predicts DNA-binding residues, given the respective protein structure based on (i) electrostatics, (ii) evolution and (iii) geometry. In contrast to machine-learning methods, DR_bind does not require a training data set or any parameters. It predicts DNA-binding residues by detecting a cluster of conserved, solvent-accessible residues that are electrostatically stabilized upon mutation to Asp(-)/Glu(-). The server requires as input the DNA-binding protein structure in PDB format and outputs a downloadable text file of the predicted DNA-binding residues, a 3D visualization of the predicted residues highlighted in the given protein structure, and a downloadable PyMol script for visualization of the results. Calibration on 83 and 55 non-redundant DNA-bound and DNA-free protein structures yielded a DNA-binding residue prediction accuracy/precision of 90/47% and 88/42%, respectively. Since DR_bind does not require any training using protein-DNA complex structures, it may predict DNA-binding residues in novel structures of DNA-binding proteins resulting from structural genomics projects with no conservation data. The DR_bind server is freely available with no login requirement at http://dnasite.limlab.ibms.sinica.edu.tw.",2012-05-31 +21636593,A memory-efficient data structure representing exact-match overlap graphs with application for next-generation DNA assembly.,"

Motivation

Exact-match overlap graphs have been broadly used in the context of DNA assembly and the shortest super string problem where the number of strings n ranges from thousands to billions. The length ℓ of the strings is from 25 to 1000, depending on the DNA sequencing technologies. However, many DNA assemblers using overlap graphs suffer from the need for too much time and space in constructing the graphs. It is nearly impossible for these DNA assemblers to handle the huge amount of data produced by the next-generation sequencing technologies where the number n of strings could be several billions. If the overlap graph is explicitly stored, it would require Ω(n(2)) memory, which could be prohibitive in practice when n is greater than a hundred million. In this article, we propose a novel data structure using which the overlap graph can be compactly stored. This data structure requires only linear time to construct and and linear memory to store.

Results

For a given set of input strings (also called reads), we can informally define an exact-match overlap graph as follows. Each read is represented as a node in the graph and there is an edge between two nodes if the corresponding reads overlap sufficiently. A formal description follows. The maximal exact-match overlap of two strings x and y, denoted by ov(max)(x, y), is the longest string which is a suffix of x and a prefix of y. The exact-match overlap graph of n given strings of length ℓ is an edge-weighted graph in which each vertex is associated with a string and there is an edge (x, y) of weight ω=ℓ-|ov(max)(x, y)| if and only if ω ≤ λ, where |ov(max)(x, y)| is the length of ov(max)(x, y) and λ is a given threshold. In this article, we show that the exact-match overlap graphs can be represented by a compact data structure that can be stored using at most (2λ-1)(2⌈logn⌉+⌈logλ⌉)n bits with a guarantee that the basic operation of accessing an edge takes O(log λ) time. We also propose two algorithms for constructing the data structure for the exact-match overlap graph. The first algorithm runs in O(λℓnlogn) worse-case time and requires O(λ) extra memory. The second one runs in O(λℓn) time and requires O(n) extra memory. Our experimental results on a huge amount of simulated data from sequence assembly show that the data structure can be constructed efficiently in time and memory.

Availability

Our DNA sequence assembler that incorporates the data structure is freely available on the web at http://www.engr.uconn.edu/~htd06001/assembler/leap.zip",2011-06-02 +24618463,A gradient-boosting approach for filtering de novo mutations in parent-offspring trios.,"

Motivation

Whole-genome and -exome sequencing on parent-offspring trios is a powerful approach to identifying disease-associated genes by detecting de novo mutations in patients. Accurate detection of de novo mutations from sequencing data is a critical step in trio-based genetic studies. Existing bioinformatic approaches usually yield high error rates due to sequencing artifacts and alignment issues, which may either miss true de novo mutations or call too many false ones, making downstream validation and analysis difficult. In particular, current approaches have much worse specificity than sensitivity, and developing effective filters to discriminate genuine from spurious de novo mutations remains an unsolved challenge.

Results

In this article, we curated 59 sequence features in whole genome and exome alignment context which are considered to be relevant to discriminating true de novo mutations from artifacts, and then employed a machine-learning approach to classify candidates as true or false de novo mutations. Specifically, we built a classifier, named De Novo Mutation Filter (DNMFilter), using gradient boosting as the classification algorithm. We built the training set using experimentally validated true and false de novo mutations as well as collected false de novo mutations from an in-house large-scale exome-sequencing project. We evaluated DNMFilter's theoretical performance and investigated relative importance of different sequence features on the classification accuracy. Finally, we applied DNMFilter on our in-house whole exome trios and one CEU trio from the 1000 Genomes Project and found that DNMFilter could be coupled with commonly used de novo mutation detection approaches as an effective filtering approach to significantly reduce false discovery rate without sacrificing sensitivity.

Availability

The software DNMFilter implemented using a combination of Java and R is freely available from the website at http://humangenome.duke.edu/software.",2014-03-10 +23929859,A graph-theoretical approach to the selection of the minimum tiling path from a physical map.,"The problem of computing the minimum tiling path (MTP) from a set of clones arranged in a physical map is a cornerstone of hierarchical (clone-by-clone) genome sequencing projects. We formulate this problem in a graph theoretical framework, and then solve by a combination of minimum hitting set and minimum spanning tree algorithms. The tool implementing this strategy, called FMTP, shows improved performance compared to the widely used software FPC. When we execute FMTP and FPC on the same physical map, the MTP produced by FMTP covers a higher portion of the genome, and uses a smaller number of clones. For instance, on the rice genome the MTP produced by our tool would reduce by about 11 percent the cost of a clone-by-clone sequencing project. Source code, benchmark data sets, and documentation of FMTP are freely available at >http://code.google.com/p/fingerprint-based-minimal-tiling-path/ under MIT license.",2013-03-01 +22973536,oPOSSUM-3: advanced analysis of regulatory motif over-representation across genes or ChIP-Seq datasets.,"oPOSSUM-3 is a web-accessible software system for identification of over-represented transcription factor binding sites (TFBS) and TFBS families in either DNA sequences of co-expressed genes or sequences generated from high-throughput methods, such as ChIP-Seq. Validation of the system with known sets of co-regulated genes and published ChIP-Seq data demonstrates the capacity for oPOSSUM-3 to identify mediating transcription factors (TF) for co-regulated genes or co-recovered sequences. oPOSSUM-3 is available at http://opossum.cisreg.ca.",2012-09-01 +21960719,mz5: space- and time-efficient storage of mass spectrometry data sets.,"Across a host of MS-driven-omics fields, researchers witness the acquisition of ever increasing amounts of high throughput MS data and face the need for their compact yet efficiently accessible storage. Addressing the need for an open data exchange format, the Proteomics Standards Initiative and the Seattle Proteome Center at the Institute for Systems Biology independently developed the mzData and mzXML formats, respectively. In a subsequent joint effort, they defined an ontology and associated controlled vocabulary that specifies the contents of MS data files, implemented as the newer mzML format. All three formats are based on XML and are thus not particularly efficient in either storage space requirements or read/write speed. This contribution introduces mz5, a complete reimplementation of the mzML ontology that is based on the efficient, industrial strength storage backend HDF5. Compared with the current mzML standard, this strategy yields an average file size reduction to ∼54% and increases linear read and write speeds ∼3-4-fold. The format is implemented as part of the ProteoWizard project and is available under a permissive Apache license. Additional information and download links are available from http://software.steenlab.org/mz5.",2011-09-29 +23399458,A population-level prediction tool for the incidence of first-episode psychosis: translational epidemiology based on cross-sectional data. ,"Specialist early intervention services (EIS) for people aged 14-35 years with first episodes of psychosis (FEP) have been commissioned throughout England since 2001. A single estimate of population need was used everywhere, but true incidence varies enormously according to sociodemographic factors. We sought to develop a realistically complex, population-based prediction tool for FEP, based on precise estimates of epidemiological risk. Data from 1037 participants in two cross-sectional population-based FEP studies were fitted to several negative binomial regression models to estimate risk coefficients across combinations of different sociodemographic and socioenvironmental factors. We applied these coefficients to the population at-risk of a third, socioeconomically different region to predict expected caseload over 2.5 years, where the observed rates of ICD-10 F10-39 FEP had been concurrently ascertained via EIS. Empirical population-based epidemiological data from London, Nottingham and Bristol predicted counts in the population at-risk in the East Anglia region of England. Observed counts were compared with predicted counts (with 95% prediction intervals (PI)) at EIS and local authority district (LAD) levels in East Anglia to establish the predictive validity of each model. A model with age, sex, ethnicity and population density performed most strongly, predicting 508 FEP participants in EIS in East Anglia (95% PI 459, 559), compared with 522 observed participants. This model predicted correctly in 5/6 EIS and 19/21 LADs. All models performed better than the current gold standard for EIS commissioning in England (716 cases; 95% PI 664-769). We have developed a prediction tool for the incidence of psychotic disorders in England and Wales, made freely available online (http://www.psymaptic.org), to provide healthcare commissioners with accurate forecasts of FEP based on robust epidemiology and anticipated local population need. The initial assessment of some people who do not require subsequent EIS care means additional service resources, not addressed here, will be required.",2013-02-11 +23601347,LocARNAscan: Incorporating thermodynamic stability in sequence and structure-based RNA homology search.,"

Background

The search for distant homologs has become an import issue in genome annotation. A particular difficulty is posed by divergent homologs that have lost recognizable sequence similarity. This same problem also arises in the recognition of novel members of large classes of RNAs such as snoRNAs or microRNAs that consist of families unrelated by common descent. Current homology search tools for structured RNAs are either based entirely on sequence similarity (such as blast or hmmer) or combine sequence and secondary structure. The most prominent example of the latter class of tools is Infernal. Alternatives are descriptor-based methods. In most practical applications published to-date, however, the information contained in covariance models or manually prescribed search patterns is dominated by sequence information. Here we ask two related questions: (1) Is secondary structure alone informative for homology search and the detection of novel members of RNA classes? (2) To what extent is the thermodynamic propensity of the target sequence to fold into the correct secondary structure helpful for this task?

Results

Sequence-structure alignment can be used as an alternative search strategy. In this scenario, the query consists of a base pairing probability matrix, which can be derived either from a single sequence or from a multiple alignment representing a set of known representatives. Sequence information can be optionally added to the query. The target sequence is pre-processed to obtain local base pairing probabilities. As a search engine we devised a semi-global scanning variant of LocARNA's algorithm for sequence-structure alignment. The LocARNAscan tool is optimized for speed and low memory consumption. In benchmarking experiments on artificial data we observe that the inclusion of thermodynamic stability is helpful, albeit only in a regime of extremely low sequence information in the query. We observe, furthermore, that the sensitivity is bounded in particular by the limited accuracy of the predicted local structures of the target sequence.

Conclusions

Although we demonstrate that a purely structure-based homology search is feasible in principle, it is unlikely to outperform tools such as Infernal in most application scenarios, where a substantial amount of sequence information is typically available. The LocARNAscan approach will profit, however, from high throughput methods to determine RNA secondary structure. In transcriptome-wide applications, such methods will provide accurate structure annotations on the target side.

Availability

Source code of the free software LocARNAscan 1.0 and supplementary data are available at http://www.bioinf.uni-leipzig.de/Software/LocARNAscan.",2013-04-20 +23075261,Conformational dynamics of full-length inducible human Hsp70 derived from microsecond molecular dynamics simulations in explicit solvent.,"Human 70 kDa heat shock protein (hHsp70) is an ATP-dependent chaperone and is currently an important target for developing new drugs in cancer therapy. Knowledge of the conformations of hHsp70 is central to understand the interactions between its nucleotide-binding domain (NBD) and substrate-binding domain (SBD) and is a prerequisite to design inhibitors. The conformations of ADP-bound (or nucleotide-free) hHsp70 and ATP-bound hHsp70 was investigated by using unbiased all-atom molecular dynamics (MD) simulations of homology models of hHsp70 in explicit solvent on a timescale of .5 and 2.7 μs, respectively. The conformational heterogeneity of hHsp70 was analyzed by computing effective free-energy landscapes (FELs) and distance distribution between selected pair of residues. These theoretical data were compared with those extracted from single-molecule Förster resonance energy transfer (FRET) experiments and to small-angle X-ray scattering (SAXS) data of Hsp70 homologs. The distance between a pair of residues in FRET is systematically larger than the distance computed in MD which is interpreted as an effect of the size and of the dynamics of the fluorescent probes. The origin of the conformational heterogeneity of hHsp70 in the ATP-bound state is due to different binding modes of the helix B of the SBD onto the NBD. In the ADP-bound (or nucleotide-free) state, it arises from the different closed conformations of the SBD and from the different positions of the SBD relative to the NBD. In each nucleotide-binding state, Hsp70 is better represented by an ensemble of conformations on a μs timescale corresponding to different local minima of the FEL. An animated interactive 3D complement (I3DC) is available in Proteopedia at http://proteopedia.org/w/Journal:JBSD:30.",2012-10-17 +22556370,SEQCHIP: a powerful method to integrate sequence and genotype data for the detection of rare variant associations.,"

Motivation

Next-generation sequencing greatly increases the capacity to detect rare-variant complex-trait associations. However, it is still expensive to sequence a large number of samples and therefore often small datasets are used. Given cost constraints, a potentially more powerful two-step strategy is to sequence a subset of the sample to discover variants, and genotype the identified variants in the remaining sample. If only cases are sequenced, directly combining sequence and genotype data will lead to inflated type-I errors in rare-variant association analysis. Although several methods have been developed to correct for the bias, they are either underpowered or theoretically invalid. We proposed a new method SEQCHIP to integrate genotype and sequence data, which can be used with most existing rare-variant tests.

Results

It is demonstrated using both simulated and real datasets that the SEQCHIP method has controlled type-I errors, and is substantially more powerful than all other currently available methods.

Availability

SEQCHIP is implemented in an R-Package and is available at http://linkage.rockefeller.edu/suzanne/seqchip/Seqchip.html.",2012-05-03 +22812424,Ultramarathon is an outstanding model for the study of adaptive responses to extreme load and stress.,"Ultramarathons comprise any sporting event involving running longer than the traditional marathon length of 42.195 km (26.2 miles). Studies on ultramarathon participants can investigate the acute consequences of ultra-endurance exercise on inflammation and cardiovascular or renal consequences, as well as endocrine/energetic aspects, and examine the tissue recovery process over several days of extreme physical load. In a study published in BMC Medicine, Schütz et al. followed 44 ultramarathon runners over 4,487 km from South Italy to North Cape, Norway (the Trans Europe Foot Race 2009) and recorded daily sets of data from magnetic resonance imaging, psychometric, body composition and biological measurements. The findings will allow us to better understand the timecourse of degeneration/regeneration of some lower leg tissues such as knee joint cartilage, to differentiate running-induced from age-induced pathologies (for example, retropatelar arthritis) and finally to assess the interindividual susceptibility to injuries. Moreover, it will also provide new information about the complex interplay between cerebral adaptations/alterations and hormonal influences resulting from endurance exercise and provide data on the dose-response relationship between exercise and brain structure/function. Overall, this study represents a unique attempt to investigate the limits of the adaptive response of human bodies.Please see related article: http://www.biomedcentral.com/1741-7015/10/78.",2012-07-19 +21840877,ChimeraScan: a tool for identifying chimeric transcription in sequencing data.,"

Summary

Next generation sequencing (NGS) technologies have enabled de novo gene fusion discovery that could reveal candidates with therapeutic significance in cancer. Here we present an open-source software package, ChimeraScan, for the discovery of chimeric transcription between two independent transcripts in high-throughput transcriptome sequencing data.

Availability

http://chimerascan.googlecode.com

Contact

cmaher@dom.wustl.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-11 +23087698,Seeds in Chernobyl: the database on proteome response on radioactive environment.,"Two serious nuclear accidents during the last quarter century (Chernobyl, 1986 and Fukushima, 2011) contaminated large agricultural areas with radioactivity. The database ""Seeds in Chernobyl"" (http://www.chernobylproteomics.sav.sk) contains the information about the abundances of hundreds of proteins from on-going investigation of mature and developing seed harvested from plants grown in radioactive Chernobyl area. This database provides a useful source of information concerning the response of the seed proteome to permanently increased level of ionizing radiation in a user-friendly format.",2012-10-10 +21797991,ExpressionPlot: a web-based framework for analysis of RNA-Seq and microarray gene expression data.,"RNA-Seq and microarray platforms have emerged as important tools for detecting changes in gene expression and RNA processing in biological samples. We present ExpressionPlot, a software package consisting of a default back end, which prepares raw sequencing or Affymetrix microarray data, and a web-based front end, which offers a biologically centered interface to browse, visualize, and compare different data sets. Download and installation instructions, a user's manual, discussion group, and a prototype are available at http://expressionplot.com/.",2011-07-28 +22779798,QSAR study on 5-lipoxygenase inhibitors based on support vector machine.,"QSAR study on a data set of 5-lipoxygenase inhibitors (1-phenyl [2H]-tetrahydro-triazine-3-one analogues) was carried out by using Support Vector Regression (SVR) and physicochemical parameters. Wrapper methods were used to select descriptors, while Leave-One-Out Cross Validation (LOOCV) method and independent set test were used to judge the predictive power of different models. We found out that the generalization ability of SVR model outperformed multiple linear regression (MLR) and Partial Least Squares (PLS) models in this work. An online web server for activity prediction is available at http://chemdata.shu.edu.cn/qsar5lip.",2012-11-01 +21441235,Global landscape of a co-expressed gene network in barley and its application to gene discovery in Triticeae crops.,"Accumulated transcriptome data can be used to investigate regulatory networks of genes involved in various biological systems. Co-expression analysis data sets generated from comprehensively collected transcriptome data sets now represent efficient resources that are capable of facilitating the discovery of genes with closely correlated expression patterns. In order to construct a co-expression network for barley, we analyzed 45 publicly available experimental series, which are composed of 1,347 sets of GeneChip data for barley. On the basis of a gene-to-gene weighted correlation coefficient, we constructed a global barley co-expression network and classified it into clusters of subnetwork modules. The resulting clusters are candidates for functional regulatory modules in the barley transcriptome. To annotate each of the modules, we performed comparative annotation using genes in Arabidopsis and Brachypodium distachyon. On the basis of a comparative analysis between barley and two model species, we investigated functional properties from the representative distributions of the gene ontology (GO) terms. Modules putatively involved in drought stress response and cellulose biogenesis have been identified. These modules are discussed to demonstrate the effectiveness of the co-expression analysis. Furthermore, we applied the data set of co-expressed genes coupled with comparative analysis in attempts to discover potentially Triticeae-specific network modules. These results demonstrate that analysis of the co-expression network of the barley transcriptome together with comparative analysis should promote the process of gene discovery in barley. Furthermore, the insights obtained should be transferable to investigations of Triticeae plants. The associated data set generated in this analysis is publicly accessible at http://coexpression.psc.riken.jp/barley/.",2011-03-24 +24109552,Galaxy tools and workflows for sequence analysis with applications in molecular plant pathology.,"The Galaxy Project offers the popular web browser-based platform Galaxy for running bioinformatics tools and constructing simple workflows. Here, we present a broad collection of additional Galaxy tools for large scale analysis of gene and protein sequences. The motivating research theme is the identification of specific genes of interest in a range of non-model organisms, and our central example is the identification and prediction of ""effector"" proteins produced by plant pathogens in order to manipulate their host plant. This functional annotation of a pathogen's predicted capacity for virulence is a key step in translating sequence data into potential applications in plant pathology. This collection includes novel tools, and widely-used third-party tools such as NCBI BLAST+ wrapped for use within Galaxy. Individual bioinformatics software tools are typically available separately as standalone packages, or in online browser-based form. The Galaxy framework enables the user to combine these and other tools to automate organism scale analyses as workflows, without demanding familiarity with command line tools and scripting. Workflows created using Galaxy can be saved and are reusable, so may be distributed within and between research groups, facilitating the construction of a set of standardised, reusable bioinformatic protocols. The Galaxy tools and workflows described in this manuscript are open source and freely available from the Galaxy Tool Shed (http://usegalaxy.org/toolshed or http://toolshed.g2.bx.psu.edu).",2013-09-17 +24363374,Efficient clustering of identity-by-descent between multiple individuals.,"

Motivation

Most existing identity-by-descent (IBD) detection methods only consider haplotype pairs; less attention has been paid to considering multiple haplotypes simultaneously, even though IBD is an equivalence relation on haplotypes that partitions a set of haplotypes into IBD clusters. Multiple-haplotype IBD clusters may have advantages over pairwise IBD in some applications, such as IBD mapping. Existing methods for detecting multiple-haplotype IBD clusters are often computationally expensive and unable to handle large samples with thousands of haplotypes.

Results

We present a clustering method, efficient multiple-IBD, which uses pairwise IBD segments to infer multiple-haplotype IBD clusters. It expands clusters from seed haplotypes by adding qualified neighbors and extends clusters across sliding windows in the genome. Our method is an order of magnitude faster than existing methods and has comparable performance with respect to the quality of clusters it uncovers. We further investigate the potential application of multiple-haplotype IBD clusters in association studies by testing for association between multiple-haplotype IBD clusters and low-density lipoprotein cholesterol in the Northern Finland Birth Cohort. Using our multiple-haplotype IBD cluster approach, we found an association with a genomic interval covering the PCSK9 gene in these data that is missed by standard single-marker association tests. Previously published studies confirm association of PCSK9 with low-density lipoprotein.

Availability and implementation

Source code is available under the GNU Public License http://cs.au.dk/~qianyuxx/EMI/.",2013-12-19 +21967760,ProfileChaser: searching microarray repositories based on genome-wide patterns of differential expression.,"

Summary

We introduce ProfileChaser, a web server that allows for querying the Gene Expression Omnibus based on genome-wide patterns of differential expression. Using a novel, content-based approach, ProfileChaser retrieves expression profiles that match the differentially regulated transcriptional programs in a user-supplied experiment. This analysis identifies statistical links to similar expression experiments from the vast array of publicly available data on diseases, drugs, phenotypes and other experimental conditions.

Availability

http://profilechaser.stanford.edu

Contact

abutte@stanford.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-03 +23632165,QualitySNPng: a user-friendly SNP detection and visualization tool.,"QualitySNPng is a new software tool for the detection and interactive visualization of single-nucleotide polymorphisms (SNPs). It uses a haplotype-based strategy to identify reliable SNPs; it is optimized for the analysis of current RNA-seq data; but it can also be used on genomic DNA sequences derived from next-generation sequencing experiments. QualitySNPng does not require a sequenced reference genome and delivers reliable SNPs for di- as well as polyploid species. The tool features a user-friendly interface, multiple filtering options to handle typical sequencing errors, support for SAM and ACE files and interactive visualization. QualitySNPng produces high-quality SNP information that can be used directly in genotyping by sequencing approaches for application in QTL and genome-wide association mapping as well as to populate SNP arrays. The software can be used as a stand-alone application with a graphical user interface or as part of a pipeline system like Galaxy. Versions for Windows, Mac OS X and Linux, as well as the source code, are available from http://www.bioinformatics.nl/QualitySNPng.",2013-04-30 +22155865,B-SOLANA: an approach for the analysis of two-base encoding bisulfite sequencing data.,"

Summary

Bisulfite sequencing, a combination of bisulfite treatment and high-throughput sequencing, has proved to be a valuable method for measuring DNA methylation at single base resolution. Here, we present B-SOLANA, an approach for the analysis of two-base encoding (colorspace) bisulfite sequencing data on the SOLiD platform of Life Technologies. It includes the alignment of bisulfite sequences and the determination of methylation levels in CpG as well as non-CpG sequence contexts. B-SOLANA enables a fast and accurate analysis of large raw sequence datasets.

Availability and implementation

The source code, released under the GNU GPLv3 licence, is freely available at http://code.google.com/p/bsolana/.

Contact

b.kreck@ikmb.uni-kiel.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-06 +21204332,Drug Discrimination,"The psychoactive effect of a drug usually refers to a chemical agent that exerts an action upon the central nervous system (CNS), alters brain function, and, consequently, produces a temporary change in an individual’s mood, feelings, perception, and/or behavior. Such agents may be prescribed as therapeutic medications or used (or abused) as recreational drugs. In each case, the subjective effects produced by such agents are generally not accessible to independent verification by an observer. However, methods were developed about 50 years ago whereby human subjects could self-rate their experiences on questionnaires after administration of a drug [1]. Generally, these self-inventories require subjects to provide information about themselves and are considered valuable because they venture “below the surface” to glean the effect of a drug on an individual. Also, they are convenient because they (usually) do not require the services of a group of raters or interviewers. Their chief disadvantage may be that individuals might not completely understand the effect of the drug or their drug “experience” and therefore might not always give an accurate report. The drug discrimination (DD) paradigm is an assay of, and relates to, the subjective effect of drugs in nonhuman animals or humans. In a typical DD experiment, there are four basic components: (1) the subject, (2) the dose of drug that exerts an effect on the subject and precedes a response by the subject, (3) an appropriate (or correct) response, and (4) presentation of reinforcement. SUBJECT → DOSE OF DRUG → RESPONSE → REINFORCEMENT The drug effect that “leads to” a behavioral event (i.e., particular response) and signals that reinforcement is available is called the discriminative stimulus. A wide variety of psychoactive drugs can serve as discriminative stimuli (see below). In laboratory subjects, discriminative control by (usually) two treatments is established through the use of reinforcement (reward). When subjects receive a dose of a drug, it functions as a signal that prompts a correct behavioral response and results in the presentation of a reward. In other words, the effect of the drug is used as a “help” or “aid” to control appropriate behavioral responding by signaling that reinforcement is (or will be) available. Subjects are usually trained to distinguish administration of a particular dose of a particular drug (i.e., the training dose of a training drug) from administration of saline vehicle (i.e., usually a 0.9% sodium chloride solution that is often used as a solvent for many parenterally administered drugs). In a subject’s course of training sessions, the dose of drug is administered (i.e., drug sessions) and lever presses on the drug-designated lever (for that subject) produce reinforcement. In other training sessions, saline is administered (i.e., vehicle sessions) and responses on the (alternate) saline-designated lever produce reinforcement. The DD procedure can be characterized as a highly sensitive and very specific drug detection method that provides both quantitative and qualitative data on the effect of a training drug in relation to the effect of a test (i.e., challenge) agent. Historically, DD studies are linked by a common requirement that subjects must perform an appropriate (or correct) response that indicates a distinction was made between drug and nondrug conditions. As such, when employed with animals or humans, a subject’s response permits an experimenter to determine if a drug effect has been “perceived.” An excellent source of information on DD studies can be found at the Drug Discrimination Bibliography Web site (http://www.dd-database.org). The Web site, established and maintained by Drs. Ian P. Stolerman and Jonathan B. Kamien, is funded by the National Institute on Drug Abuse (NIDA) of the National Institutes of Health (NIH) and contains close to 4000 DD references published between 1951 and the present. The citations include DD abstracts, journal articles, reviews, book chapters, and books. In addition, the Web site can be navigated to selectively retrieve references on particular training drugs, drug classes, test drugs, authors, and DD methodologies.",2011-01-05 +23060616,CLEVER: clique-enumerating variant finder.,"

Motivation

Next-generation sequencing techniques have facilitated a large-scale analysis of human genetic variation. Despite the advances in sequencing speed, the computational discovery of structural variants is not yet standard. It is likely that many variants have remained undiscovered in most sequenced individuals.

Results

Here, we present a novel internal segment size based approach, which organizes all, including concordant, reads into a read alignment graph, where max-cliques represent maximal contradiction-free groups of alignments. A novel algorithm then enumerates all max-cliques and statistically evaluates them for their potential to reflect insertions or deletions. For the first time in the literature, we compare a large range of state-of-the-art approaches using simulated Illumina reads from a fully annotated genome and present relevant performance statistics. We achieve superior performance, in particular, for deletions or insertions (indels) of length 20-100 nt. This has been previously identified as a remaining major challenge in structural variation discovery, in particular, for insert size based approaches. In this size range, we even outperform split-read aligners. We achieve competitive results also on biological data, where our method is the only one to make a substantial amount of correct predictions, which, additionally, are disjoint from those by split-read aligners.

Availability

CLEVER is open source (GPL) and available from http://clever-sv.googlecode.com.

Contact

as@cwi.nl or tm@cwi.nl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-11 +23885363,"Health, United States, 2012: With Special Feature on Emergency Care","Health, United States, 2012 is the 36th report on the health status of the Nation and is submitted by the Secretary of the Department of Health and Human Services to the President and the Congress of the United States in compliance with Section 308 of the Public Health Service Act. This report was compiled by the Centers for Disease Control and Prevention’s (CDC) National Center for Health Statistics (NCHS). The National Committee on Vital and Health Statistics served in a review capacity. The Health, United States series presents an annual overview of national trends in health statistics. The report contains a Chartbook that assesses the Nation’s health by presenting trends and current information on selected measures of morbidity, mortality, health care utilization, health risk factors, prevention, health insurance, and personal health care expenditures. This year’s Chartbook includes a Special Feature on Emergency Care. The report also contains 134 Trend Tables organized around four major subject areas: health status and determinants, health care utilization, health care resources, and health care expenditures. A companion product—Health, United States: In Brief—features information extracted from the full report. The complete report, In Brief, and related data products are available on the Health, United States website at: http://www.cdc.gov/nchs/hus.htm.",2013-07-26 +24912374,Quantification of myocardial fibrosis by digital image analysis and interactive stereology.,"

Background

Cardiac fibrosis disrupts the normal myocardial structure and has a direct impact on heart function and survival. Despite already available digital methods, the pathologist's visual score is still widely considered as ground truth and used as a primary method in histomorphometric evaluations. The aim of this study was to compare the accuracy of digital image analysis tools and the pathologist's visual scoring for evaluating fibrosis in human myocardial biopsies, based on reference data obtained by point counting performed on the same images.

Methods

Endomyocardial biopsy material from 38 patients diagnosed with inflammatory dilated cardiomyopathy was used. The extent of total cardiac fibrosis was assessed by image analysis on Masson's trichrome-stained tissue specimens using automated Colocalization and Genie software, by Stereology grid count and manually by Pathologist's visual score.

Results

A total of 116 slides were analyzed. The mean results obtained by the Colocalization software (13.72 ± 12.24%) were closest to the reference value of stereology (RVS), while the Genie software and Pathologist score gave a slight underestimation. RVS values correlated strongly with values obtained using the Colocalization and Genie (r>0.9, p<0.001) software as well as the pathologist visual score. Differences in fibrosis quantification by Colocalization and RVS were statistically insignificant. However, significant bias was found in the results obtained by using Genie versus RVS and pathologist score versus RVS with mean difference values of: -1.61% and 2.24%. Bland-Altman plots showed a bidirectional bias dependent on the magnitude of the measurement: Colocalization software overestimated the area fraction of fibrosis in the lower end, and underestimated in the higher end of the RVS values. Meanwhile, Genie software as well as the pathologist score showed more uniform results throughout the values, with a slight underestimation in the mid-range for both.

Conclusion

Both applied digital image analysis methods revealed almost perfect correlation with the criterion standard obtained by stereology grid count and, in terms of accuracy, outperformed the pathologist's visual score. Genie algorithm proved to be the method of choice with the only drawback of a slight underestimation bias, which is considered acceptable for both clinical and research evaluations.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/9857909611227193.",2014-06-09 +23803466,Prediction of site-specific interactions in antibody-antigen complexes: the proABC method and server.,"

Motivation

Antibodies or immunoglobulins are proteins of paramount importance in the immune system. They are extremely relevant as diagnostic, biotechnological and therapeutic tools. Their modular structure makes it easy to re-engineer them for specific purposes. Short of undergoing a trial and error process, these experiments, as well as others, need to rely on an understanding of the specific determinants of the antibody binding mode.

Results

In this article, we present a method to identify, on the basis of the antibody sequence alone, which residues of an antibody directly interact with its cognate antigen. The method, based on the random forest automatic learning techniques, reaches a recall and specificity as high as 80% and is implemented as a free and easy-to-use server, named prediction of Antibody Contacts. We believe that it can be of great help in re-design experiments as well as a guide for molecular docking experiments. The results that we obtained also allowed us to dissect which features of the antibody sequence contribute most to the involvement of specific residues in binding to the antigen.

Availability

http://www.biocomputing.it/proABC.

Contact

anna.tramontano@uniroma1.it or paolo.marcatili@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-06-26 +21325201,Focus issue: conquering the data mountain.,"High-throughput technologies have enabled a rapid increase in the acquisition of data regarding cellular regulation, such as protein-protein interactions, gene expression profiling, proteomic analyses of changes in protein abundance, and global analyses of posttranslational modifications. The challenge now is for the community to devise adequate standards for assessing reliability and annotation, facilities for storage, mechanisms for sharing, and tools for visualization and analysis. In conjunction with Science (http://www.sciencemag.org/special/data), this issue of Science Signaling tackles some of the key issues related to the data deluge faced by cell signaling researchers.",2011-02-15 +28517234,SU-E-T-388: Verification of Monitor Units and Dose Distributions in IMRT Plans Using Monte Carlo Algorithms on the E-IMRT Web Platform.,"

Purpose

Currently, quality control (QC) for each IMRT treatment is performed by dose distribution measurements. These techniques are very time-consuming and require long accelerator downtime. QC could be only based in verification of monitor units and dose distributions, if precise control of MLC is carried out. In such a manner, the e-IMRT platform (http://eimrt.cesga.es/) is a remote distributed computing tool, which allows comparison between the dose distributions calculated by a TPS and those calculated by Monte Carlo (MC).

Methods

Previously, our linear accelerator (Oncor Impression, Siemens) was commissioned. For this purpose, comparison of experimental and MC simulated data was carried out. Several IMRT treatments plans were calculated in superposition algorithm (TPS Xio®CMS 4.60.00) and used as input data for the e-IMRT platform. These treatment plans were previously verified employing a 2D array MapCheckTM, Sun Nuclear. The gamma index (3%, 3mm) was used for validating results.

Results

The platform displays calculated doses using MC, also gamma map (in the CT images, not only statistical data) and histogram shown in Figures 1a), b) and d). The gamma map illustrates the differences between the input and calculated doses. According to the legend in Figure 1 d), these differences correspond to less than 1%. Results show good agreement between the doses calculated by TPS and those computed by e-IMRT platform.

Conclusions

If a rigorous quality control is established for MLC and optimisation criteria (number of gantry angles, minimum segment size, levels of intensity for fluency map) are used. Then, QC for IMRT standard treatment plans would be only based on the verification of monitor units and dose distributions using e-IMRT II.This work has been funded by the Xunta de Galicia, Project R&D Grant 09SIN007CT. We would like to thank Centro de SupercomputaciÃ3 n de Galicia for the computational resources and support.",2012-06-01 +24588959,Analysis of BRAF(V600E) mutation and DNA methylation improves the diagnostics of thyroid fine needle aspiration biopsies.,"

Background

Thyroid nodules with indeterminate cytological features on fine needle aspiration biopsy specimens (FNABs) have a ~20% risk of thyroid cancer. BRAF(V600E) mutation and DNA methylation are useful markers to distinguish malignant thyroid neoplasm from benign. The aim of this study was to determine whether combined detection of BRAF(V600E) mutation and methylation markers on FNABs could improve the diagnostic accuracy of thyroid cancer.

Methods

Using pyrosequencing and quantitative methylation-specific PCR (Q-MSP) methods, FNABs from 79 and 38 patients with thyroid nodules in training and test groups, respectively, were analyzed for BRAF(V600E) mutation and gene methylation.

Results

BRAF(V600E) mutation was found in 30/42 (71.4%) and 14/20 (70%) FNABs in training and test groups, respectively. All BRAF(V600E)-positive samples were histologically diagnosed as papillary thyroid cancer (PTC) after thyroidectomy. As expected, BRAF mutation was not found in all benign nodules. Moreover, we demonstrated that the five genes, including CALCA, DAPK1, TIMP3, RAR-beta and RASSF1A, were aberrantly methylated in FNABs. Of them, methylation level of DAPK1 in PTCs was significantly higher than that in benign samples (P <0.0001). Conversely, methylation level of RASSF1A in PTCs was significantly lower than that in benign samples (P =0.003). Notably, compared with BRAF mutation testing alone, combined detection of BRAF mutation and methylation markers increased the diagnostic sensitivity and accuracy of PTC with excellent specificity.

Conclusion

Our data have demonstrated that combine analysis of BRAF mutation and DNA methylation markers on FNABs may be a useful strategy to facilitate the diagnosis of malignant thyroid neoplasm, particularly PTC.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/6080878071149177.",2014-03-03 +24811971,Mast cells in the periprosthetic breast capsule.,"

Background

Symptomatic capsular contracture occurs in about 10 % of primary breast augmentations and in more than double that rate in reconstruction after mastectomy, especially in the setting of radiation. Mast cells, traditionally associated with immune response and inflammation, secrete profibrotic mediators and may play a role in capsule formation and contracture. We analyzed the mast cell and fibroblast populations in breast capsule tissue from patients who underwent capsular excision.

Methods

Capsule tissue was collected from patients who underwent exchange of tissue expanders for permanent implants, revision of reconstruction, or revision augmentation. Breast capsule tissues were prepared for histological analyses of mast cells, fibroblasts, and collagen. Mast cells and fibroblasts were isolated from capsule tissue and screened for mediators and receptor expression.

Results

In breast capsule tissue, the average numbers of mast cells and fibroblasts were 9 ± 1/mm(2) and 33 ± 10/mm(2), respectively. There were significantly more mast cells on the posterior side than on the anterior side of the capsule tissue (12 ± 2 vs. 6 ± 1/mm(2), p < 0.01). Baker grade IV capsules had an increased number of fibroblasts compared to Baker grade I capsules (93 ± 9 vs. 40 ± 19/mm(2), p < 0.001). In breast capsule tissue, mast cells contained renin, histamine, and TGF-β, and their respective receptors, AT1R, H1R, and TGF-βRI were expressed by fibroblasts.

Conclusion

These data indicate that within breast capsule tissue mast cells contain mediators that may activate neighboring fibroblasts. Understanding the role of mast cells in pathologic periprosthetic breast capsule formation may lead to novel therapies to prevent and treat capsular contracture.

No level assigned

This journal requires that authors assign a level of evidence to each submission to which Evidence-Based Medicine rankings are applicable. This excludes Review Articles, Book Reviews, and manuscripts that concern Basic Science, Animal Studies, Cadaver Studies, and Experimental Studies. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266.",2014-05-09 +24469313,iRSpot-TNCPseAAC: identify recombination spots with trinucleotide composition and pseudo amino acid components.,"Meiosis and recombination are the two opposite aspects that coexist in a DNA system. As a driving force for evolution by generating natural genetic variations, meiotic recombination plays a very important role in the formation of eggs and sperm. Interestingly, the recombination does not occur randomly across a genome, but with higher probability in some genomic regions called ""hotspots"", while with lower probability in so-called ""coldspots"". With the ever-increasing amount of genome sequence data in the postgenomic era, computational methods for effectively identifying the hotspots and coldspots have become urgent as they can timely provide us with useful insights into the mechanism of meiotic recombination and the process of genome evolution as well. To meet the need, we developed a new predictor called ""iRSpot-TNCPseAAC"", in which a DNA sample was formulated by combining its trinucleotide composition (TNC) and the pseudo amino acid components (PseAAC) of the protein translated from the DNA sample according to its genetic codes. The former was used to incorporate its local or short-rage sequence order information; while the latter, its global and long-range one. Compared with the best existing predictor in this area, iRSpot-TNCPseAAC achieved higher rates in accuracy, Mathew's correlation coefficient, and sensitivity, indicating that the new predictor may become a useful tool for identifying the recombination hotspots and coldspots, or, at least, become a complementary tool to the existing methods. It has not escaped our notice that the aforementioned novel approach to incorporate the DNA sequence order information into a discrete model may also be used for many other genome analysis problems. The web-server for iRSpot-TNCPseAAC is available at http://www.jci-bioinfo.cn/iRSpot-TNCPseAAC. Furthermore, for the convenience of the vast majority of experimental scientists, a step-by-step guide is provided on how to use the current web server to obtain their desired result without the need to follow the complicated mathematical equations.",2014-01-24 +24028489,Zebra: a web server for bioinformatic analysis of diverse protein families.,"During evolution of proteins from a common ancestor, one functional property can be preserved while others can vary leading to functional diversity. A systematic study of the corresponding adaptive mutations provides a key to one of the most challenging problems of modern structural biology - understanding the impact of amino acid substitutions on protein function. The subfamily-specific positions (SSPs) are conserved within functional subfamilies but are different between them and, therefore, seem to be responsible for functional diversity in protein superfamilies. Consequently, a corresponding method to perform the bioinformatic analysis of sequence and structural data has to be implemented in the common laboratory practice to study the structure-function relationship in proteins and develop novel protein engineering strategies. This paper describes Zebra web server - a powerful remote platform that implements a novel bioinformatic analysis algorithm to study diverse protein families. It is the first application that provides specificity determinants at different levels of functional classification, therefore addressing complex functional diversity of large superfamilies. Statistical analysis is implemented to automatically select a set of highly significant SSPs to be used as hotspots for directed evolution or rational design experiments and analyzed studying the structure-function relationship. Zebra results are provided in two ways - (1) as a single all-in-one parsable text file and (2) as PyMol sessions with structural representation of SSPs. Zebra web server is available at http://biokinet.belozersky.msu.ru/zebra .",2013-09-13 +23619610,PHAISTOS: a framework for Markov chain Monte Carlo simulation and inference of protein structure.,"We present a new software framework for Markov chain Monte Carlo sampling for simulation, prediction, and inference of protein structure. The software package contains implementations of recent advances in Monte Carlo methodology, such as efficient local updates and sampling from probabilistic models of local protein structure. These models form a probabilistic alternative to the widely used fragment and rotamer libraries. Combined with an easily extendible software architecture, this makes PHAISTOS well suited for Bayesian inference of protein structure from sequence and/or experimental data. Currently, two force-fields are available within the framework: PROFASI and OPLS-AA/L, the latter including the generalized Born surface area solvent model. A flexible command-line and configuration-file interface allows users quickly to set up simulations with the desired configuration. PHAISTOS is released under the GNU General Public License v3.0. Source code and documentation are freely available from http://phaistos.sourceforge.net. The software is implemented in C++ and has been tested on Linux and OSX platforms.",2013-04-26 +23035717,Determining similarity in histological images using graph-theoretic description and matching methods for content-based image retrieval in medical diagnostics.,"

Background

Computer-based analysis of digitalized histological images has been gaining increasing attention, due to their extensive use in research and routine practice. The article aims to contribute towards the description and retrieval of histological images by employing a structural method using graphs. Due to their expressive ability, graphs are considered as a powerful and versatile representation formalism and have obtained a growing consideration especially by the image processing and computer vision community.

Methods

The article describes a novel method for determining similarity between histological images through graph-theoretic description and matching, for the purpose of content-based retrieval. A higher order (region-based) graph-based representation of breast biopsy images has been attained and a tree-search based inexact graph matching technique has been employed that facilitates the automatic retrieval of images structurally similar to a given image from large databases.

Results

The results obtained and evaluation performed demonstrate the effectiveness and superiority of graph-based image retrieval over a common histogram-based technique. The employed graph matching complexity has been reduced compared to the state-of-the-art optimal inexact matching methods by applying a pre-requisite criterion for matching of nodes and a sophisticated design of the estimation function, especially the prognosis function.

Conclusion

The proposed method is suitable for the retrieval of similar histological images, as suggested by the experimental and evaluation results obtained in the study. It is intended for the use in Content Based Image Retrieval (CBIR)-requiring applications in the areas of medical diagnostics and research, and can also be generalized for retrieval of different types of complex images.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1224798882787923.",2012-10-04 +22646978,Molecular ecological network analyses.,"

Background

Understanding the interaction among different species within a community and their responses to environmental changes is a central goal in ecology. However, defining the network structure in a microbial community is very challenging due to their extremely high diversity and as-yet uncultivated status. Although recent advance of metagenomic technologies, such as high throughout sequencing and functional gene arrays, provide revolutionary tools for analyzing microbial community structure, it is still difficult to examine network interactions in a microbial community based on high-throughput metagenomics data.

Results

Here, we describe a novel mathematical and bioinformatics framework to construct ecological association networks named molecular ecological networks (MENs) through Random Matrix Theory (RMT)-based methods. Compared to other network construction methods, this approach is remarkable in that the network is automatically defined and robust to noise, thus providing excellent solutions to several common issues associated with high-throughput metagenomics data. We applied it to determine the network structure of microbial communities subjected to long-term experimental warming based on pyrosequencing data of 16 S rRNA genes. We showed that the constructed MENs under both warming and unwarming conditions exhibited topological features of scale free, small world and modularity, which were consistent with previously described molecular ecological networks. Eigengene analysis indicated that the eigengenes represented the module profiles relatively well. In consistency with many other studies, several major environmental traits including temperature and soil pH were found to be important in determining network interactions in the microbial communities examined. To facilitate its application by the scientific community, all these methods and statistical tools have been integrated into a comprehensive Molecular Ecological Network Analysis Pipeline (MENAP), which is open-accessible now (http://ieg2.ou.edu/MENA).

Conclusions

The RMT-based molecular ecological network analysis provides powerful tools to elucidate network interactions in microbial communities and their responses to environmental changes, which are fundamentally important for research in microbial ecology and environmental microbiology.",2012-05-30 +23034962,Dietary phylloquinone intake and risk of type 2 diabetes in elderly subjects at high risk of cardiovascular disease.,"

Background

Limited evidence from human and animal studies has suggested that vitamin K has a potentially beneficial role in glucose metabolism and insulin resistance.

Objective

We analyzed the cross-sectional and longitudinal associations between dietary phylloquinone intake and type 2 diabetes in elderly subjects at high cardiovascular risk.

Design

Cross-sectional associations were tested in 1925 men and women in the Prevention with the Mediterranean Diet trial. A longitudinal analysis was conducted on 1069 individuals free of diabetes at baseline (median follow-up: 5.5 y). Biochemical and anthropometric variables were obtained yearly. Dietary intake was collected during each annual visit by using a food-frequency questionnaire, and phylloquinone intake was estimated by using the USDA database. The occurrence of type 2 diabetes during follow-up was assessed by using American Diabetes Association criteria.

Results

Dietary phylloquinone at baseline was significantly lower in subjects who developed type 2 diabetes during the study. After adjustment for potential confounders, risk of incident diabetes was 17% lower for each additional intake of 100 μg phylloquinone/d. Moreover, subjects who increased their dietary intake of vitamin K during the follow-up had a 51% reduced risk of incident diabetes compared with subjects who decreased or did not change the amount of phylloquinone intake.

Conclusion

We conclude that dietary phylloquinone intake is associated with reduced risk of type 2 diabetes. This trial was registered at http://www.controlled-trials.com as ISRCTN35739639.",2012-10-03 +22665286,Analysis and management of gene and allelic diversity in subdivided populations using the software program METAPOP.,"METAPOP (http://webs.uvigo.es/anpefi/metapop/) is a desktop application that provides an analysis of gene and allelic diversity in subdivided populations from molecular genotype or coancestry data as well as a tool for the management of genetic diversity in conservation programs. A partition of gene and allelic diversity is made within and between subpopulations, in order to assess the contribution of each subpopulation to global diversity for descriptive population genetics or conservation purposes. In the context of management of subdivided populations in in situ conservation programs, the software also determines the optimal contributions (i.e., number of offspring) of each individual, the number of migrants, and the particular subpopulations involved in the exchange of individuals in order to maintain the largest level of gene diversity in the whole population with a desired control in the rate of inbreeding. The partition of gene and allelic diversity within and between subpopulations is illustrated with microsatellite and SNP data from human populations.",2012-01-01 +24447576,Expression of serum amyloid A in uterine cervical cancer.,"

Background

As an acute-phase protein, serum amyloid A (SAA) is expressed primarily in the liver. However, its expression in extrahepatic tissues, especially in tumor tissues, was also demonstrated recently. In our study, we investigated the expression of SAA in uterine cervical carcinomas, and our results suggested its potential as a serum biomarker.

Methods

Quantitative real-time polymerase chain reaction (RT-PCR), immunohistochemistry (IHC) and enzyme-linked immunosorbent assay (ELISA) were used to evaluate the SAA gene and protein expression levels in the tissues and sera of patients with non-neoplastic lesions (NNLs), cervical intraepithelial neoplasia (CIN) and cervical carcinoma (CC).

Results

Compared with NNLs, the SAA gene (SAA1 and SAA4) expression levels were significantly higher in uterine CC (mean copy numbers: 138.7 vs. 5.01, P < 0.000; and 1.8 vs. 0.079, P = 0.001, respectively) by real-time PCR. IHC revealed cytoplasmic SAA protein staining in tissues from adenocarcinoma and squamous cell carcinoma of the cervix. The median serum concentrations (μg/ml) of SAA were 6.02 in patients with NNLs and 10.98 in patients with CIN (P = 0.31). In contrast, the median serum SAA concentration was 23.7 μg/ml in uterine CC patients, which was significantly higher than the SAA concentrations of the NNL group (P = 0.002) and the CIN group (P = 0.024).

Conclusions

Our data suggested that SAA might be a uterine CC cell product. High SAA concentrations in the serum of CC patients may have a role in monitoring disease occurrence and could have therapeutic applications.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1433263219102962.",2014-01-21 +24069234,Age-specific average head template for typically developing 6-month-old infants.,"Due to the rapid anatomical changes that occur within the brain structure in early human development and the significant differences between infant brains and the widely used standard adult templates, it becomes increasingly important to utilize appropriate age- and population-specific average templates when analyzing infant neuroimaging data. In this study we created a new and highly detailed age-specific unbiased average head template in a standard MNI152-like infant coordinate system for healthy, typically developing 6-month-old infants by performing linear normalization, diffeomorphic normalization and iterative averaging processing on 60 subjects' structural images. The resulting age-specific average templates in a standard MNI152-like infant coordinate system demonstrate sharper anatomical detail and clarity compared to existing infant average templates and successfully retains the average head size of the 6-month-old infant. An example usage of the average infant templates transforms magnetoencephalography (MEG) estimated activity locations from MEG's subject-specific head coordinate space to the standard MNI152-like infant coordinate space. We also created a new atlas that reflects the true 6-month-old infant brain anatomy. Average templates and atlas are publicly available on our website (http://ilabs.washington.edu/6-m-templates-atlas).",2013-09-12 +23025271,Identification of structural motifs in the E2 glycoprotein of Chikungunya involved in virus-host interaction.,"Chikungunya fever is one of the reemerging vector-borne diseases. It has become a major global health problem especially in the developing countries. There are no vaccines or specific antiviral drugs available to date. This study reports small molecule inhibitors of envelope glycoprotein 2 (E2 glycoprotein) which are predicted based on Chikungunya virus-host interactions. E2 glycoprotein of Chikungunya virus interacts at 216 residue of the host receptor protein which plays a vital role in initiating infection. Understanding the structural aspects of E2 glycoprotein is crucial to develop specific inhibitors to prevent the virus binding from host receptors. In silico method was adopted to predict the sequence motifs of envelope protein, as the method like yeast two hybrid system is laborious, time consuming, and costly. The E2 glycoprotein structure of the Indian isolate was modeled using two templates (2XFC and 3JOC) and then validated. The class III PDZ domain binding motif was found to be identified at 213-216 amino acids. The corresponding peptide structures which recognize the PDZ domain binding motif were identified by the literature search and were used for generating five point pharmacophore model (ADDDR) containing acceptor, donor and aromatic ring features. Databases such as Asinex, TosLab and Maybridge were searched for the matches for the predicted pharmacophore model. Two compounds were identified as lead molecules as their glide score is > 5 kcal/mol. Since the pharmacophore model is developed based on Chikungunya virus-host interaction, it can be used for designing promising antiviral lead compounds for the treatment of Chikungunya fever.An animated Interactive 3D Complement (I3DC) is available in Proteopedia at http://proteopedia.org/w/Journal:JBSD:21.",2012-10-02 +21859476,Cistrome: an integrative platform for transcriptional regulation studies.,"The increasing volume of ChIP-chip and ChIP-seq data being generated creates a challenge for standard, integrative and reproducible bioinformatics data analysis platforms. We developed a web-based application called Cistrome, based on the Galaxy open source framework. In addition to the standard Galaxy functions, Cistrome has 29 ChIP-chip- and ChIP-seq-specific tools in three major categories, from preliminary peak calling and correlation analyses to downstream genome feature association, gene expression analyses, and motif discovery. Cistrome is available at http://cistrome.org/ap/.",2011-08-22 +23709495,miRTCat: a comprehensive map of human and mouse microRNA target sites including non-canonical nucleation bulges.,"

Summary

MicroRNAs (miRNAs) regulate various biological functions by binding hundreds of transcripts to impart post-transcriptional repression. Recently, by applying a transcriptome-wide experimental method for identifying miRNA target sites (Ago HITS-CLIP), a novel non-canonical target site, named 'nucleation bulge', was discovered as widespread, functional and evolutionally conserved. Although such non-canonical nucleation bulges have been proven to be predictive by using 'pivot pairing rule' and sequence conservation, this approach has not been applied yet. To facilitate the functional studies of non-canonical miRNA targets, we implement miRTCat: a comprehensive searchable map of miRNA target sites, including non-canonical nucleation bulges, not only mapped in experimentally verified miRNA-bound regions but also predicted in all 3'-untranslated regions (3'-UTRs) derived from human and mouse (∼15.6% as expected false-positive results).

Availability

http://ion.skku.edu/mirtcat.

Contact

swchi@skku.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-24 +30727336,First Report of Leaf Spot of Sweet Basil Caused by Cercospora guatemalensis in Korea.,"Sweet basil, Ocimum basilicum L., is a fragrant herb belonging to the family Lamiaceae. Originated in India 5,000 years ago, sweet basil plays a significant role in diverse cuisines across the world, especially in Asian and Italian cooking. In October 2008, hundreds of plants showing symptoms of leaf spot with nearly 100% incidence were found in polyethylene tunnels at an organic farm in Icheon, Korea. Leaf spots were circular to subcircular, water-soaked, dark brown with grayish center, and reached 10 mm or more in diameter. Diseased leaves defoliated prematurely. The damage purportedly due to this disease has reappeared every year with confirmation of the causal agent made again in 2011. A cercosporoid fungus was consistently associated with disease symptoms. Stromata were brown, consisting of brown cells, and 10 to 40 μm in width. Conidiophores were fasciculate (n = 2 to 10), olivaceous brown, paler upwards, straight to mildly curved, not geniculate in shorter ones or one to two times geniculate in longer ones, 40 to 200 μm long, occasionally reaching up to 350 μm long, 3.5 to 6 μm wide, and two- to six-septate. Conidia were hyaline, acicular to cylindric, straight in shorter ones, flexuous to curved in longer ones, truncate to obconically truncate at the base, three- to 16-septate, and 50 to 300 × 3.5 to 4.5 μm. Morphological characteristics of the fungus were consistent with the previous reports of Cercospora guatemalensis A.S. Mull. & Chupp (1,3). Voucher specimens were housed at Korea University herbarium (KUS). An isolate from KUS-F23757 was deposited in the Korean Agricultural Culture Collection (Accession No. KACC43980). Fungal DNA was extracted with DNeasy Plant Mini DNA Extraction Kits (Qiagen Inc., Valencia, CA). The complete internal transcribed spacer (ITS) region of rDNA was amplified with the primers ITS1/ITS4 and sequenced. The resulting sequence of 548 bp was deposited in GenBank (Accession No. JQ995781). This showed >99% similarity with sequences of many Cercospora species, indicating their close phylogenetic relationship. Isolate of KACC43980 was used in the pathogenicity tests. Hyphal suspensions were prepared by grinding 3-week-old colonies grown on PDA with distilled water using a mortar and pestle. Five plants were inoculated with hyphal suspensions and five plants were sprayed with sterile distilled water. The plants were covered with plastic bags to maintain a relative humidity of 100% for 24 h and then transferred to a 25 ± 2°C greenhouse with a 12-h photoperiod. Typical symptoms of necrotic spots appeared on the inoculated leaves 6 days after inoculation, and were identical to the ones observed in the field. C. guatemalensis was reisolated from symptomatic leaf tissues, confirming Koch's postulates. No symptoms were observed on control plants. Previously, the disease was reported in Malawi, India, China, and Japan (2,3), but not in Korea. To our knowledge, this is the first report of C. guatemalensis on sweet basil in Korea. Since farming of sweet basil has recently started on a commercial scale in Korea, the disease poses a serious threat to safe production of this herb, especially in organic farming. References: (1) C. Chupp. A Monograph of the Fungus Genus Cercospora. Ithaca, NY, 1953. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology & Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , May 5, 2012. (3) J. Nishikawa et al. J. Gen. Plant Pathol. 68:46, 2002.",2012-10-01 +22906111,A possible mechanism for Inv22-related F8 large deletions in severe hemophilia A patients with high responding factor VIII inhibitors.,"

Background

Intron 22 inversion (Inv22) of the coagulation factor (F)VIII gene (F8) is a frequent cause of severe hemophilia A. In addition to Inv22, a variety of F8 mutations (1492 unique mutations) causing hemophilia A have been reported, of which 171 involve deletions of over 50 bp (HAMSTeRs database; http://hadb.org.uk/). However, only 10% of these large deletions have been fully characterized at the nucleotide level.

Patients and methods

We investigated gene abnormalities in three unrelated severe hemophilia A patients with high titer FVIII inhibitors. They had previously been shown to carry large deletions of the F8, but the precise gene abnormalities remain to be elucidated.

Results

Inverse shifting-PCR (IS-PCR) Inv22 diagnostic tests revealed that these patients carried either type I or II Inv22. However, they showed a wild-type (WT) pattern in the IS-PCR Inv22 complementary tests. We further analyzed their X chromosomes to account for the puzzling results, and found that they had different centromeric breakpoints in the Inv22 X chromosomes, adjacent to the palindromic regions containing int22h-2 or -3, and their spacer region, respectively. The connections appeared to be shifted towards the telomere of the WT F8 Xq28, resulting in a new telomere with an additional intact int22h copy.

Conclusions

These gene rearrangements might result from double-strand breaks in the most distal regions of the long arms of the Inv22 X chromosomes, followed by DNA restorations using the WT F8 Xq28 by non-homologous end joining or break-induced replication; thus leading to large F8 deletions in severe hemophilia A patients.",2012-10-01 +30727303,"Canker on Bark of Populus spp. Caused by Cytospora tritici, a New Disease in China.","Species of Cytospora Ehrenb. and associated teleomorphs cause dieback and canker on over 85 species of angiosperm and gymnosperm plants throughout the world (2). Cytospora tritici Punith. was first observed on Triticum asetivum in Germany in 1980 but may also affect many hardwoods (3). During a survey of landscape trees in 2007, Populus spp. with cankers were found in Fushun, Baoxing, and Luding counties and Chengdu city in Sichuan Province. In these trees, bark canker pathogens discolored the sapwood. During damp weather, conidia were pushed out and formed orange spore horns. Conidiomatal stromata were immersed in bark, prominent, and 1.53 ± 0.33 mm in diameter (n = 10). Discs were white to grey, circular, oval, and 0.59 ± 0.14 mm in diameter (n = 10), with one ostiole per disc. Ostioles were dark grey. Locules were multi-chambered, chambers irregular. Conidia were lelongate-allantoid shaped, hyaline, aseptate, 5.04 ± 0.65 μm long (n = 50), and 1.22 ± 0.13 μm wide (n = 50). Fragments (5 × 5 mm2) of the junction of diseased and healthy tissues were surface sterilized with 1% NaOCl for 30 s and then rinsed twice in sterile distilled water. The pieces were placed on potato dextrose agar (PDA) plates and incubated at 25°C for 7 days. The obtained isolates were cultured on PDA at 25°C in diffuse fluorescent light for 30 days. Upon isolation, the mycelium grew at a rate of 3 to 5 mm per day at 25°C, forming pale white-to-pure white flat colonies. Conidiomata never formed on PDA. ITS1-5.8S-ITS2 sequences were amplified via PCR from genomic DNA obtained from mycelia using universal primers ITS1 and ITS4 (4). The amplification products showed 100% sequence homology with C. tritici isolate DQ243812 from the GenBank database. The ITS sequences were submitted to GenBank (Accession No. JQ277333 to JQ277336). Pathogenicity was confirmed by inoculating 20 disinfected (70% ethanol) Populus tomentosa cuttings. Cuttings were incubated at 25°C for 30 days. Another two cuttings were treated with water agar as controls. In 18 of the 20 cuttings, the cambium developed a brown color and appeared water soaked 15 days later, whereas controls did not develop any symptoms. C. tritici was reisolated from symptomatic tissues. To our knowledge, this is the first report of C. tritici in China causing canker on Populus spp. Cytospora canker is common in practically all countries where poplar are grown. Canker expansion increases when tree defenses are compromised, usually by seasonal dormancy but also by drought, cold injury of wood, sun scald of bark, flooding of root, hail, freezing, or other stress (1). Future spread of C. tritici to western China is considered highly likely. References: (1) G. C. Adams et al. Stud. Mycol. 52:1, 2005. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ March 25, 2012. (3) E. Punithalingam. Nova Hedwigia 32:585, 1980. (4) T. J. White et al. Page 315 in: PCR Protocols: A Guide to Methods and Applications. M. A. Innis et al., eds. Academic Press, San Diego, 1990.",2012-10-01 +23049702,A statistical framework for accurate taxonomic assignment of metagenomic sequencing reads.,"The advent of next-generation sequencing technologies has greatly promoted the field of metagenomics which studies genetic material recovered directly from an environment. Characterization of genomic composition of a metagenomic sample is essential for understanding the structure of the microbial community. Multiple genomes contained in a metagenomic sample can be identified and quantitated through homology searches of sequence reads with known sequences catalogued in reference databases. Traditionally, reads with multiple genomic hits are assigned to non-specific or high ranks of the taxonomy tree, thereby impacting on accurate estimates of relative abundance of multiple genomes present in a sample. Instead of assigning reads one by one to the taxonomy tree as many existing methods do, we propose a statistical framework to model the identified candidate genomes to which sequence reads have hits. After obtaining the estimated proportion of reads generated by each genome, sequence reads are assigned to the candidate genomes and the taxonomy tree based on the estimated probability by taking into account both sequence alignment scores and estimated genome abundance. The proposed method is comprehensively tested on both simulated datasets and two real datasets. It assigns reads to the low taxonomic ranks very accurately. Our statistical approach of taxonomic assignment of metagenomic reads, TAMER, is implemented in R and available at http://faculty.wcas.northwestern.edu/hji403/MetaR.htm.",2012-10-01 +22368245,A Bayesian approach to targeted experiment design.,"

Motivation

Systems biology employs mathematical modelling to further our understanding of biochemical pathways. Since the amount of experimental data on which the models are parameterized is often limited, these models exhibit large uncertainty in both parameters and predictions. Statistical methods can be used to select experiments that will reduce such uncertainty in an optimal manner. However, existing methods for optimal experiment design (OED) rely on assumptions that are inappropriate when data are scarce considering model complexity.

Results

We have developed a novel method to perform OED for models that cope with large parameter uncertainty. We employ a Bayesian approach involving importance sampling of the posterior predictive distribution to predict the efficacy of a new measurement at reducing the uncertainty of a selected prediction. We demonstrate the method by applying it to a case where we show that specific combinations of experiments result in more precise predictions.

Availability and implementation

Source code is available at: http://bmi.bmt.tue.nl/sysbio/software/pua.html.",2012-02-24 +23034089,EpiExplorer: live exploration and global analysis of large epigenomic datasets.,"Epigenome mapping consortia are generating resources of tremendous value for studying epigenetic regulation. To maximize their utility and impact, new tools are needed that facilitate interactive analysis of epigenome datasets. Here we describe EpiExplorer, a web tool for exploring genome and epigenome data on a genomic scale. We demonstrate EpiExplorer's utility by describing a hypothesis-generating analysis of DNA hydroxymethylation in relation to public reference maps of the human epigenome. All EpiExplorer analyses are performed dynamically within seconds, using an efficient and versatile text indexing scheme that we introduce to bioinformatics. EpiExplorer is available at http://epiexplorer.mpi-inf.mpg.de.",2012-10-03 +23106040,Regulation and 3 dimensional culture of tertiary follicle growth.,"It has been revealed that multiple cohorts of tertiary follicles develop during some animal estrous cycle and the human menstrual cycle. To reach developmental competence, oocytes need the support of somatic cells. During embryogenesis, the primordial germ cells appear, travel to the gonadal rudiments, and form follicles. The female germ cells develop within the somatic cells of the ovary, granulosa cells, and theca cells. How the oocyte and follicle cells support each other has been seriously studied. The latest technologies in genes and proteins and genetic engineering have allowed us to collect a great deal of information about folliculogenesis. For example, a few web pages (http://www.ncbi.nlm.nih.gov; http://mrg.genetics.washington.edu) provide access to databases of genomes, sequences of transcriptomes, and various tools for analyzing and discovering genes important in ovarian development. Formation of the antrum (tertiary follicle) is the final phase of folliculogenesis and the transition from intraovarian to extraovian regulation. This final step coordinates with the hypothalamic-pituitary-ovarian axis. On the other hand, currently, follicle physiology is under intense investigation, as little is known about how to overcome women's ovarian problems or how to develop competent oocytes from in vitro follicle culture or transplantation. In this review, some of the known roles of hormones and some of the genes involved in tertiary follicle growth and the general characteristics of tertiary follicles are summarized. In addition, in vitro culture of tertiary follicles is also discussed as a study model and an assisted reproductive technology model.",2012-09-30 +23613488,CellAging: a tool to study segregation and partitioning in division in cell lineages of Escherichia coli.,"

Motivation

Cell division in Escherichia coli is morphologically symmetric. However, as unwanted protein aggregates are segregated to the cell poles and, after divisions, accumulate at older poles, generate asymmetries in sister cells' vitality. Novel single-molecule detection techniques allow observing aging-related processes in vivo, over multiple generations, informing on the underlying mechanisms.

Results

CellAging is a tool to automatically extract information on polar segregation and partitioning in division of aggregates in E.coli, and on cellular vitality. From time-lapse, parallel brightfield and fluorescence microscopy images, it performs cell segmentation, alignment of brightfield and fluorescence images, lineage construction and pole age determination, and it computes aging-related features. We exemplify its use by analyzing spatial distributions of fluorescent protein aggregates from images of cells across generations.

Availability

CellAging, instructions and an example are available at http://www.cs.tut.fi/%7esanchesr/cellaging/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-23 +23424620,Improving microbial genome annotations in an integrated database context.,"Effective comparative analysis of microbial genomes requires a consistent and complete view of biological data. Consistency regards the biological coherence of annotations, while completeness regards the extent and coverage of functional characterization for genomes. We have developed tools that allow scientists to assess and improve the consistency and completeness of microbial genome annotations in the context of the Integrated Microbial Genomes (IMG) family of systems. All publicly available microbial genomes are characterized in IMG using different functional annotation and pathway resources, thus providing a comprehensive framework for identifying and resolving annotation discrepancies. A rule based system for predicting phenotypes in IMG provides a powerful mechanism for validating functional annotations, whereby the phenotypic traits of an organism are inferred based on the presence of certain metabolic reactions and pathways and compared to experimentally observed phenotypes. The IMG family of systems are available at http://img.jgi.doe.gov/.",2013-02-12 +23280520,"SMILES-based QSAR models for the calcium channel-antagonistic effect of 1,4-dihydropyridines.","The activity of 72 1,4-dihydropyridines as calcium channel antagonists was examined. The simplified molecular input-line entry system (SMILES) was used as representation of the molecular structure of the calcium channel antagonists. Quantitative structure-activity relationships (QSARs) were developed using CORAL software (http://www.insilico.eu/CORAL) for four random splits of the data into the training and test sets. Using the Monte Carlo method, the CORAL software generated the optimal descriptors for one-variable models. The reproducibility of each model was tested performing three runs of the Monte Carlo optimization. The obtained results reveal good predictive potential of the applied approach: The correlation coefficients (r(2) ) for the test sets of the four random splits are 0.9571, 0.9644, 0.9836, and 0.9444.",2012-12-20 +22586178,HCS-Analyzer: open source software for high-content screening data correction and analysis.,"

Motivation

High-throughput screening is a powerful technology principally used by pharmaceutical industries allowing the identification of molecules of interest within large libraries. Originally target based, cellular assays provide a way to test compounds (or other biological material such as small interfering RNA) in a more physiologically realistic in vitro environment. High-content screening (HCS) platforms are now available at lower cost, giving the opportunity for universities or research institutes to access those technologies for research purposes. However, the amount of information extracted from each experiment is multiplexed and hence difficult to handle. In such context, there is an important need for an easy-to-use, but still powerful software able to manage multidimensional screening data by performing adapted quality control and classification. HCS-analyzer includes: a user-friendly interface specifically dedicated to HCS readouts, an automated approach to identify systematic errors potentially occurring during screening and a set of tools to classify, cluster and identify phenotypes of interest among large and multivariate data.

Availability

The application, the C# .Net source code, as well as detailed documentation, are freely available at the following URL: http://hcs-analyzer.ip-korea.org.",2012-05-13 +23698863,Assembling the 20 Gb white spruce (Picea glauca) genome from whole-genome shotgun sequencing data.,"

Unlabelled

White spruce (Picea glauca) is a dominant conifer of the boreal forests of North America, and providing genomics resources for this commercially valuable tree will help improve forest management and conservation efforts. Sequencing and assembling the large and highly repetitive spruce genome though pushes the boundaries of the current technology. Here, we describe a whole-genome shotgun sequencing strategy using two Illumina sequencing platforms and an assembly approach using the ABySS software. We report a 20.8 giga base pairs draft genome in 4.9 million scaffolds, with a scaffold N50 of 20,356 bp. We demonstrate how recent improvements in the sequencing technology, especially increasing read lengths and paired end reads from longer fragments have a major impact on the assembly contiguity. We also note that scalable bioinformatics tools are instrumental in providing rapid draft assemblies.

Availability

The Picea glauca genome sequencing and assembly data are available through NCBI (Accession#: ALWZ0100000000 PID: PRJNA83435). http://www.ncbi.nlm.nih.gov/bioproject/83435.",2013-05-22 +23505299,PeptideLocator: prediction of bioactive peptides in protein sequences.,"

Motivation

Peptides play important roles in signalling, regulation and immunity within an organism. Many have successfully been used as therapeutic products often mimicking naturally occurring peptides. Here we present PeptideLocator for the automated prediction of functional peptides in a protein sequence.

Results

We have trained a machine learning algorithm to predict bioactive peptides within protein sequences. PeptideLocator performs well on training data achieving an area under the curve of 0.92 when tested in 5-fold cross-validation on a set of 2202 redundancy reduced peptide containing protein sequences. It has predictive power when applied to antimicrobial peptides, cytokines, growth factors, peptide hormones, toxins, venoms and other peptides. It can be applied to refine the choice of experimental investigations in functional studies of proteins.

Availability and implementation

PeptideLocator is freely available for academic users at http://bioware.ucd.ie/.",2013-03-16 +23019219,"METscout: a pathfinder exploring the landscape of metabolites, enzymes and transporters.","METscout (http://metscout.mpg.de) brings together metabolism and gene expression landscapes. It is a MySQL relational database linking biochemical pathway information with 3D patterns of gene expression determined by robotic in situ hybridization in the E14.5 mouse embryo. The sites of expression of ∼1500 metabolic enzymes and of ∼350 solute carriers (SLCs) were included and are accessible as single cell resolution images and in the form of semi-quantitative image abstractions. METscout provides several graphical web-interfaces allowing navigation through complex anatomical and metabolic information. Specifically, the database shows where in the organism each of the many metabolic reactions take place and where SLCs transport metabolites. To link enzymatic reactions and transport, the KEGG metabolic reaction network was extended to include metabolite transport. This network in conjunction with spatial expression pattern of the network genes allows for a tracing of metabolic reactions and transport processes across the entire body of the embryo.",2012-09-27 +23023028,Impact of DNA polymorphisms in key DNA base excision repair proteins on cancer risk.,"Genetic variation in DNA repair genes can modulate DNA repair capacity and may be related to cancer risk. However, study findings have been inconsistent. Inheritance of variant DNA repair genes is believed to influence individual susceptibility to the development of environmental cancer. Reliable knowledge on which the base excision repair (BER) sequence variants are associated with cancer risk would help elucidate the mechanism of cancer. Given that most of the previous studies had inadequate statistical power, we have conducted a systematic review on sequence variants in three important BER proteins. Here, we review published studies on the association between polymorphism in candidate BER genes and cancer risk. We focused on three key BER genes: 8-oxoguanine DNA glycosylase (OGG1), apurinic/apyrimidinic endonuclease (APE1/APEX1) and x-ray repair cross-complementing group 1 (XRCC1). These specific DNA repair genes were selected because of their critical role in maintaining genome integrity and, based on previous studies, suggesting that single-nucleotide polymorphisms (SNPs) in these genes have protective or deleterious effects on cancer risk. A total of 136 articles in the December 13, 2010 MEDLINE database (National Center for Biotechnology Information, http://www.ncbi.nlm.nih.gov/pubmed/) reporting polymorphism in OGG1, XRCC1 or APE1 genes were analyzed. Many of the reported SNPs had diverse association with specific human cancers. For example, there was a positive association between the OGG1 Ser326Cys variant and gastric and lung cancer, while the XRCC1 Arg399Gln variant was associated with reduced cancer risk. Gene-environment interactions have been noted and may be important for colorectal and lung cancer risk and possibly other human cancers.",2012-09-27 +21460451,Recent advances in the CRANK software suite for experimental phasing.,"For its first release in 2004, CRANK was shown to effectively detect and phase anomalous scatterers from single-wavelength anomalous diffraction data. Since then, CRANK has been significantly improved and many more structures can be built automatically with single- or multiple-wavelength anomalous diffraction or single isomorphous replacement with anomalous scattering data. Here, the new algorithms that have been developed that have led to these substantial improvements are discussed and CRANK's performance on over 100 real data sets is shown. The latest version of CRANK is freely available for download at http://www.bfsc.leidenuniv.nl/software/crank/ and from CCP4 (http://www.ccp4.ac.uk/).",2011-03-18 +23009059,Separating metagenomic short reads into genomes via clustering.,"

Unlabelled

Background

The metagenomics approach allows the simultaneous sequencing of all genomes in an environmental sample. This results in high complexity datasets, where in addition to repeats and sequencing errors, the number of genomes and their abundance ratios are unknown. Recently developed next-generation sequencing (NGS) technologies significantly improve the sequencing efficiency and cost. On the other hand, they result in shorter reads, which makes the separation of reads from different species harder. Among the existing computational tools for metagenomic analysis, there are similarity-based methods that use reference databases to align reads and composition-based methods that use composition patterns (i.e., frequencies of short words or l-mers) to cluster reads. Similarity-based methods are unable to classify reads from unknown species without close references (which constitute the majority of reads). Since composition patterns are preserved only in significantly large fragments, composition-based tools cannot be used for very short reads, which becomes a significant limitation with the development of NGS. A recently proposed algorithm, AbundanceBin, introduced another method that bins reads based on predicted abundances of the genomes sequenced. However, it does not separate reads from genomes of similar abundance levels.

Results

In this work, we present a two-phase heuristic algorithm for separating short paired-end reads from different genomes in a metagenomic dataset. We use the observation that most of the l-mers belong to unique genomes when l is sufficiently large. The first phase of the algorithm results in clusters of l-mers each of which belongs to one genome. During the second phase, clusters are merged based on l-mer repeat information. These final clusters are used to assign reads. The algorithm could handle very short reads and sequencing errors. It is initially designed for genomes with similar abundance levels and then extended to handle arbitrary abundance ratios. The software can be download for free at http://www.cs.ucr.edu/∼tanaseio/toss.htm.

Conclusions

Our tests on a large number of simulated metagenomic datasets concerning species at various phylogenetic distances demonstrate that genomes can be separated if the number of common repeats is smaller than the number of genome-specific repeats. For such genomes, our method can separate NGS reads with a high precision and sensitivity.",2012-09-26 +23264345,Perfusion CT in acute stroke: a comprehensive analysis of infarct and penumbra.,"

Purpose

To perform a large-scale systematic comparison of the accuracy of all commonly used perfusion computed tomography (CT) data postprocessing methods in the definition of infarct core and penumbra in acute stroke.

Materials and methods

The collection of data for this study was approved by the institutional ethics committee, and all patients gave informed consent. Three hundred fourteen patients with hemispheric ischemia underwent perfusion CT within 6 hours of stroke symptom onset and magnetic resonance (MR) imaging at 24 hours. CT perfusion maps were generated by using six different postprocessing methods. Pixel-based analysis was used to calculate sensitivity and specificity of different perfusion CT thresholds for the penumbra and infarct core with each postprocessing method, and receiver operator characteristic (ROC) curves were plotted. Area under the ROC curve (AUC) analysis was used to define the optimum threshold.

Results

Delay-corrected singular value deconvolution (SVD) with a delay time of more than 2 seconds most accurately defined the penumbra (AUC = 0.86, P = .046, mean volume difference between acute perfusion CT and 24-hour diffusion-weighted MR imaging = 1.7 mL). A double core threshold with a delay time of more than 2 seconds and cerebral blood flow less than 40% provided the most accurate definition of the infarct core (AUC = 0.86, P = .038). The other SVD measures (block circulant, nondelay corrected) were more accurate than non-SVD methods.

Conclusion

This study has shown that there is marked variability in penumbra and infarct prediction among various deconvolution techniques and highlights the need for standardization of perfusion CT in stroke.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120971/-/DC1.",2012-12-21 +24008418,INSECT: IN-silico SEarch for Co-occurring Transcription factors.,"

Motivation

Transcriptional regulation occurs through the concerted actions of multiple transcription factors (TFs) that bind cooperatively to cis-regulatory modules (CRMs) of genes. These CRMs usually contain a variable number of transcription factor-binding sites (TFBSs) involved in related cellular and physiological processes. Chromatin immunoprecipitation followed by sequencing (ChIP-seq) has been effective in detecting TFBSs and nucleosome location to identify potential CRMs in genome-wide studies. Although several attempts were previously reported to predict the potential binding of TFs at TFBSs within CRMs by comparing different ChIP-seq data, these have been hampered by excessive background, usually emerging as a consequence of experimental conditions. To understand these complex regulatory circuits, it would be helpful to have reliable and updated user-friendly tools to assist in the identification of TFBSs and CRMs for gene(s) of interest.

Results

Here we present INSECT (IN-silico SEarch for Co-occurring Transcription factors), a novel web server for identifying potential TFBSs and CRMs in gene sequences. By combining several strategies, INSECT provides flexible analysis of multiple co-occurring TFBSs, by applying differing search schemes and restriction parameters. availability and implementation: INSECT is freely available as a web server at http://bioinformatics.ibioba-mpsp-conicet.gov.ar/INSECT .",2013-09-04 +23144830,The limits of de novo DNA motif discovery.,"A major challenge in molecular biology is reverse-engineering the cis-regulatory logic that plays a major role in the control of gene expression. This program includes searching through DNA sequences to identify ""motifs"" that serve as the binding sites for transcription factors or, more generally, are predictive of gene expression across cellular conditions. Several approaches have been proposed for de novo motif discovery-searching sequences without prior knowledge of binding sites or nucleotide patterns. However, unbiased validation is not straightforward. We consider two approaches to unbiased validation of discovered motifs: testing the statistical significance of a motif using a DNA ""background"" sequence model to represent the null hypothesis and measuring performance in predicting membership in gene clusters. We demonstrate that the background models typically used are ""too null,"" resulting in overly optimistic assessments of significance, and argue that performance in predicting TF binding or expression patterns from DNA motifs should be assessed by held-out data, as in predictive learning. Applying this criterion to common motif discovery methods resulted in universally poor performance, although there is a marked improvement when motifs are statistically significant against real background sequences. Moreover, on synthetic data where ""ground truth"" is known, discriminative performance of all algorithms is far below the theoretical upper bound, with pronounced ""over-fitting"" in training. A key conclusion from this work is that the failure of de novo discovery approaches to accurately identify motifs is basically due to statistical intractability resulting from the fixed size of co-regulated gene clusters, and thus such failures do not necessarily provide evidence that unfound motifs are not active biologically. Consequently, the use of prior knowledge to enhance motif discovery is not just advantageous but necessary. An implementation of the LR and ALR algorithms is available at http://code.google.com/p/likelihood-ratio-motifs/.",2012-11-07 +27234245,OGRO: The Overview of functionally characterized Genes in Rice online database.,"

Background

The high-quality sequence information and rich bioinformatics tools available for rice have contributed to remarkable advances in functional genomics. To facilitate the application of gene function information to the study of natural variation in rice, we comprehensively searched for articles related to rice functional genomics and extracted information on functionally characterized genes.

Results

As of 31 March 2012, 702 functionally characterized genes were annotated. This number represents about 1.6% of the predicted loci in the Rice Annotation Project Database. The compiled gene information is organized to facilitate direct comparisons with quantitative trait locus (QTL) information in the Q-TARO database. Comparison of genomic locations between functionally characterized genes and the QTLs revealed that QTL clusters were often co-localized with high-density gene regions, and that the genes associated with the QTLs in these clusters were different genes, suggesting that these QTL clusters are likely to be explained by tightly linked but distinct genes. Information on the functionally characterized genes compiled during this study is now available in the O verview of Functionally Characterized G enes in R ice O nline database (OGRO) on the Q-TARO website ( http://qtaro.abr.affrc.go.jp/ogro ). The database has two interfaces: a table containing gene information, and a genome viewer that allows users to compare the locations of QTLs and functionally characterized genes.

Conclusions

OGRO on Q-TARO will facilitate a candidate-gene approach to identifying the genes responsible for QTLs. Because the QTL descriptions in Q-TARO contain information on agronomic traits, such comparisons will also facilitate the annotation of functionally characterized genes in terms of their effects on traits important for rice breeding. The increasing amount of information on rice gene function being generated from mutant panels and other types of studies will make the OGRO database even more valuable in the future.",2012-09-24 +23396122,Phylogenomic clustering for selecting non-redundant genomes for comparative genomics.,"

Motivation

Analyses in comparative genomics often require non-redundant genome datasets. Eliminating redundancy is not as simple as keeping one strain for each named species because genomes might be redundant at a higher taxonomic level than that of species for some analyses; some strains with different species names can be as similar as most strains sharing a species name, whereas some strains sharing a species name can be so different that they should be put into different groups; and some genomes lack a species name.

Results

We have implemented a method and Web server that clusters a genome dataset into groups of redundant genomes at different thresholds based on a few phylogenomic distance measures.

Availability

The Web interface, similarity and distance data and R-scripts can be accessed at http://microbiome.wlu.ca/research/redundancy/.",2013-02-08 +24423161,GIANT: pattern analysis of molecular interactions in 3D structures of protein-small ligand complexes.,"

Background

Interpretation of binding modes of protein-small ligand complexes from 3D structure data is essential for understanding selective ligand recognition by proteins. It is often performed by visual inspection and sometimes largely depends on a priori knowledge about typical interactions such as hydrogen bonds and π-π stacking. Because it can introduce some biases due to scientists' subjective perspectives, more objective viewpoints considering a wide range of interactions are required.

Description

In this paper, we present a web server for analyzing protein-small ligand interactions on the basis of patterns of atomic contacts, or ""interaction patterns"" obtained from the statistical analyses of 3D structures of protein-ligand complexes in our previous study. This server can guide visual inspection by providing information about interaction patterns for each atomic contact in 3D structures. Users can visually investigate what atomic contacts in user-specified 3D structures of protein-small ligand complexes are statistically overrepresented. This server consists of two main components: ""Complex Analyzer"", and ""Pattern Viewer"". The former provides a 3D structure viewer with annotations of interacting amino acid residues, ligand atoms, and interacting pairs of these. In the annotations of interacting pairs, assignment to an interaction pattern of each contact and statistical preferences of the patterns are presented. The ""Pattern Viewer"" provides details of each interaction pattern. Users can see visual representations of probability density functions of interactions, and a list of protein-ligand complexes showing similar interactions.

Conclusions

Users can interactively analyze protein-small ligand binding modes with statistically determined interaction patterns rather than relying on a priori knowledge of the users, by using our new web server named GIANT that is freely available at http://giant.hgc.jp/.",2014-01-14 +23281802,BM-Map: an efficient software package for accurately allocating multireads of RNA-sequencing data.,"

Background

RNA sequencing (RNA-seq) has become a major tool for biomedical research. A key step in analyzing RNA-seq data is to infer the origin of short reads in the source genome, and for this purpose, many read alignment/mapping software programs have been developed. Usually, the majority of mappable reads can be mapped to one unambiguous genomic location, and these reads are called unique reads. However, a considerable proportion of mappable reads can be aligned to more than one genomic location with the same or similar fidelities, and they are called ""multireads"". Allocating these multireads is challenging but critical for interpreting RNA-seq data. We recently developed a Bayesian stochastic model that allocates multireads more accurately than alternative methods (Ji et al. Biometrics 2011).

Results

In order to serve a greater biological community, we have implemented this method in a stand-alone, efficient, and user-friendly software package, BM-Map. BM-Map takes SAM (Sequence Alignment/Map), the most popular read alignment format, as the standard input; then based on the Bayesian model, it calculates mapping probabilities of multireads for competing genomic loci; and BM-Map generates the output by adding mapping probabilities to the original SAM file so that users can easily perform downstream analyses. The program is available in three common operating systems, Linux, Mac and PC. Moreover, we have built a dedicated website, http://bioinformatics.mdanderson.org/main/BM-Map, which includes free downloads, detailed tutorials and illustration examples.

Conclusions

We have developed a stand-alone, efficient, and user-friendly software package for accurately allocating multireads, which is an important addition to our previous methodology paper. We believe that this bioinformatics tool will greatly help RNA-seq and related applications reach their full potential in life science research.",2012-12-17 +23391497,Coalescent simulation in continuous space.,"

Unlabelled

Coalescent simulation has become an indispensable tool in population genetics, and many complex evolutionary scenarios have been incorporated into the basic algorithm. Despite many years of intense interest in spatial structure, however, there are no available methods to simulate the ancestry of a sample of genes that occupy a spatial continuum. This is mainly due to the severe technical problems encountered by the classical model of isolation by distance. A recently introduced model solves these technical problems and provides a solid theoretical basis for the study of populations evolving in continuous space. We present a detailed algorithm to simulate the coalescent process in this model, and provide an efficient implementation of a generalized version of this algorithm as a freely available Python module.

Availability

Package available at http://pypi.python.org/pypi/ercs.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-07 +23625170,MiRAuto: an automated user-friendly microRNA prediction tool utilizing plant small RNA sequencing data.,"MicroRNAs (miRNAs) are a class of small RNAs that post-transcriptionally regulate gene expression in animals and plants. The recent rapid advancement in miRNA biology, including high-throughput sequencing of small RNA libraries, inspired the development of a bioinformatics software, miRAuto, which predicts putative miRNAs in model plant genomes computationally. Furthermore, miRAuto enables users to identify miRNAs in non-model plant species whose genomes have yet to be fully sequenced. miRAuto analyzes the expression of the 5'-end position of mapped small RNAs in reference sequences to prevent the possibility of mRNA fragments being included as candidate miRNAs. We validated the utility of miRAuto on a small RNA dataset, and the results were compared to other publicly available miRNA prediction programs. In conclusion, miRAuto is a fully automated user-friendly tool for predicting miRNAs from small RNA sequencing data in both model and non-model plant species. miRAuto is available at http://nature.snu.ac.kr/software/miRAuto.htm.",2013-04-15 +23389766,AGP: a multimethods web server for alignment-free genome phylogeny.,"Phylogenetic analysis based on alignment method meets huge challenges when dealing with whole-genome sequences, for example, recombination, shuffling, and rearrangement of sequences. Thus, various alignment-free methods for phylogeny construction have been proposed. However, most of these methods have not been implemented as tools or web servers. Researchers cannot use these methods easily with their data sets. To facilitate the usage of various alignment-free methods, we implemented most of the popular alignment-free methods and constructed a user-friendly web server for alignment-free genome phylogeny (AGP). AGP integrated the phylogenetic tree construction, visualization, and comparison functions together. Both AGP and all source code of the methods are available at http://www.herbbol.org:8000/agp (last accessed February 26, 2013). AGP will facilitate research in the field of whole-genome phylogeny and comparison.",2013-02-06 +25214827,BioPortal as a Dataset of Linked Biomedical Ontologies and Terminologies in RDF.,"BioPortal is a repository of biomedical ontologies-the largest such repository, with more than 300 ontologies to date. This set includes ontologies that were developed in OWL, OBO and other formats, as well as a large number of medical terminologies that the US National Library of Medicine distributes in its own proprietary format. We have published the RDF version of all these ontologies at http://sparql.bioontology.org. This dataset contains 190M triples, representing both metadata and content for the 300 ontologies. We use the metadata that the ontology authors provide and simple RDFS reasoning in order to provide dataset users with uniform access to key properties of the ontologies, such as lexical properties for the class names and provenance data. The dataset also contains 9.8M cross-ontology mappings of different types, generated both manually and automatically, which come with their own metadata.",2013-01-01 +22669906,NMSim web server: integrated approach for normal mode-based geometric simulations of biologically relevant conformational transitions in proteins.,"The NMSim web server implements a three-step approach for multiscale modeling of protein conformational changes. First, the protein structure is coarse-grained using the FIRST software. Second, a rigid cluster normal-mode analysis provides low-frequency normal modes. Third, these modes are used to extend the recently introduced idea of constrained geometric simulations by biasing backbone motions of the protein, whereas side chain motions are biased toward favorable rotamer states (NMSim). The generated structures are iteratively corrected regarding steric clashes and stereochemical constraint violations. The approach allows performing three simulation types: unbiased exploration of conformational space; pathway generation by a targeted simulation; and radius of gyration-guided simulation. On a data set of proteins with experimentally observed conformational changes, the NMSim approach has been shown to be a computationally efficient alternative to molecular dynamics simulations for conformational sampling of proteins. The generated conformations and pathways of conformational transitions can serve as input to docking approaches or more sophisticated sampling techniques. The web server output is a trajectory of generated conformations, Jmol representations of the coarse-graining and a subset of the trajectory and data plots of structural analyses. The NMSim webserver, accessible at http://www.nmsim.de, is free and open to all users with no login requirement.",2012-06-04 +23681124,ACCUSA2: multi-purpose SNV calling enhanced by probabilistic integration of quality scores.,"

Summary

Direct comparisons of assembled short-read stacks are one way to identify single-nucleotide variants. Single-nucleotide variant detection is especially challenging across samples with different read depths (e.g. RNA-Seq) and high-background levels (e.g. selection experiments). We present ACCUSA2 to identify variant positions where nucleotide frequency spectra differ between two samples. To this end, ACCUSA2 integrates quality scores for base calling and read mapping into a common framework. Our benchmarks demonstrate that ACCUSA2 is superior to a state-of-the-art SNV caller in situations of diverging read depths and reliably detects subtle differences among sample nucleotide frequency spectra. Additionally, we show that ACCUSA2 is fast and robust against base quality score deviations.

Availability

ACCUSA2 is available free of charge to academic users and may be obtained from https://bbc.mdc-berlin.de/software.

Contact

christoph.dieterich@mdc-berlin.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-16 +22495746,A de novo metagenomic assembly program for shotgun DNA reads.,"

Motivation

A high-quality assembly of reads generated from shotgun sequencing is a substantial step in metagenome projects. Although traditional assemblers have been employed in initial analysis of metagenomes, they cannot surmount the challenges created by the features of metagenomic data.

Result

We present a de novo assembly approach and its implementation named MAP (metagenomic assembly program). Based on an improved overlap/layout/consensus (OLC) strategy incorporated with several special algorithms, MAP uses the mate pair information, resulting in being more applicable to shotgun DNA reads (recommended as >200 bp) currently widely used in metagenome projects. Results of extensive tests on simulated data show that MAP can be superior to both Celera and Phrap for typical longer reads by Sanger sequencing, as well as has an evident advantage over Celera, Newbler and the newest Genovo, for typical shorter reads by 454 sequencing.

Availability and implementation

The source code of MAP is distributed as open source under the GNU GPL license, the MAP program and all simulated datasets can be freely available at http://bioinfo.ctb.pku.edu.cn/MAP/",2012-04-11 +21923779,EUCAST technical note on posaconazole.,"The European Committee on Antimicrobial Susceptibility Testing-Subcommittee on Antifungal Susceptibility Testing (EUCAST-AFST) has determined breakpoints for posaconazole for Candida spp. This Technical Note is based on the EUCAST posaconazole rationale document (available on the EUCAST website: http://www.eucast.org). Species-specific breakpoints for C. albicans, C. parapsilosis and C. tropicalis are S: MIC ≤0.06 mg/L, R: MIC >0.06 mg/L. There are insufficient data to set breakpoints for C. glabrata and C. krusei as well as non-species-related breakpoints. The breakpoints are based upon pharmacokinetic data, epidemiological cut-off values and clinical experience. Breakpoints will be reviewed regularly.",2011-09-16 +23771137,Secondary structure and domain architecture of the 23S and 5S rRNAs.,"We present a de novo re-determination of the secondary (2°) structure and domain architecture of the 23S and 5S rRNAs, using 3D structures, determined by X-ray diffraction, as input. In the traditional 2° structure, the center of the 23S rRNA is an extended single strand, which in 3D is seen to be compact and double helical. Accurately assigning nucleotides to helices compels a revision of the 23S rRNA 2° structure. Unlike the traditional 2° structure, the revised 2° structure of the 23S rRNA shows architectural similarity with the 16S rRNA. The revised 2° structure also reveals a clear relationship with the 3D structure and is generalizable to rRNAs of other species from all three domains of life. The 2° structure revision required us to reconsider the domain architecture. We partitioned the 23S rRNA into domains through analysis of molecular interactions, calculations of 2D folding propensities and compactness. The best domain model for the 23S rRNA contains seven domains, not six as previously ascribed. Domain 0 forms the core of the 23S rRNA, to which the other six domains are rooted. Editable 2° structures mapped with various data are provided (http://apollo.chemistry.gatech.edu/RibosomeGallery).",2013-06-14 +21881406,mirExplorer: detecting microRNAs from genome and next generation sequencing data using the AdaBoost method with transition probability matrix and combined features.,"microRNAs (miRNAs) represent an abundant group of small regulatory non-coding RNAs in eukaryotes. The emergence of Next-generation sequencing (NGS) technologies has allowed the systematic detection of small RNAs (sRNAs) and de novo sequencing of genomes quickly and with low cost. As a result, there is an increased need to develop fast miRNA prediction tools to annotate miRNAs from various organisms with a high level of accuracy, using the genome sequence or the NGS data. Several miRNA predictors have been proposed to achieve this purpose. However, the accuracy and fitness for multiple species of existing predictors needed to be improved. Here, we present a novel prediction tool called mirExplorer, which is based on an integrated adaptive boosting method and contains two modules. The first module named mirExplorer-genome was designed to de novo predict pre-miRNAs from genome, and the second module named mirExplorer-NGS was used to discover miRNAs from NGS data. A set of novel features of pre-miRNA secondary structure and miRNA biogenesis has been extracted to distinguish real pre-miRNAs from pseudo ones. We used outer-ten-fold cross-validation to verify the mirExplorer-genome computation, which obtained a specificity of 95.03% and a sensitivity of 93.71% on human data. This computation was made on test data from 16 species, and it achieved an overall accuracy of 95.53%. Systematic outer-ten-fold cross-validation of the mirExplorer-NGS model achieved a specificity of 98.3% and a sensitivity of 97.72%. We found that the good performance of the mirExplorer-NGS model was upheld across species from vertebrates to plants in test datasets. The mirExplorer is available as both web server and software package at http://biocenter.sysu.edu.cn/mir/.",2011-09-01 +23761447,ResponseNet2.0: Revealing signaling and regulatory pathways connecting your proteins and genes--now with human data.,"Genome sequencing and transcriptomic profiling are two widely used approaches for the identification of human disease pathways. However, each approach typically provides a limited view of disease pathways: Genome sequencing can identify disease-related mutations but rarely reveals their mode-of-action, while transcriptomic assays do not reveal the series of events that lead to the transcriptomic change. ResponseNet is an integrative network-optimization approach that we developed to fill these gaps by highlighting major signaling and regulatory molecular interaction paths that connect disease-related mutations and genes. The ResponseNet web-server provides a user-friendly interface to ResponseNet. Specifically, users can upload weighted lists of proteins and genes and obtain a sparse, weighted, molecular interaction subnetwork connecting them, that is biased toward regulatory and signaling pathways. ResponseNet2.0 enhances the functionality of the ResponseNet web-server in two important ways. First, it supports analysis of human data by offering a human interactome composed of proteins, genes and micro-RNAs. Second, it offers a new informative view of the output, including a randomization analysis, to help users assess the biological relevance of the output subnetwork. ResponseNet2.0 is available at http://netbio.bgu.ac.il/respnet .",2013-06-12 +23959063,Role of obesity on all-cause mortality in whites with type 2 diabetes from Italy.,"Mortality rate of diabetic patients is twice as much that of non-diabetic individuals. The role of obesity on mortality risk in patients with type 2 diabetes is controversial. Aim of our study was to address the relationship between obesity and all-cause mortality in a real-life set of white patients with type 2 diabetes from central-southern Italy from the Gargano Mortality Study (GMS). In addition, we used genetic data from genome-wide association studies (GWAs)-derived single nucleotide polymorphisms (SNPs) firmly associated with body mass index (BMI), in order to investigate the intrinsic nature of reduced mortality rate we, in fact, observed in obese patients. Study subjects with type 2 diabetes (n = 764) are part of the GMS, which is aimed at unraveling predictors of incident all-cause mortality. Time-to-death analyses were performed by Cox regression. Association between genotype risk score and obesity was tested by logistic regression. Of the 32 SNPs firmly associated with BMI, we investigated those with BMI β value ≥0.10 kg/m(2) and allele frequency ≥10 %. Genotyping was performed by KBioscience (http://www.lgcgenomics.com/). In GMS, obesity predicted a 45 % reduction in all-cause mortality. Individuals with high ""obesity genetic load"" (i.e., those carrying >9 risk alleles) were 60 % more likely to be obese as compared to individuals with low ""obesity genetic load."" Most importantly, mortality rate was not different in individuals with high and low ""obesity genetic load,"" thus indicating no role of obesity genes on all-cause mortality and speaking against a cause-effect relationship underlying the association between obesity and reduced mortality rate.",2013-12-01 +23006764,PathNet: a tool for pathway analysis using topological information.,"

Unlabelled

Background

Identification of canonical pathways through enrichment of differentially expressed genes in a given pathway is a widely used method for interpreting gene lists generated from high-throughput experimental studies. However, most algorithms treat pathways as sets of genes, disregarding any inter- and intra-pathway connectivity information, and do not provide insights beyond identifying lists of pathways.

Results

We developed an algorithm (PathNet) that utilizes the connectivity information in canonical pathway descriptions to help identify study-relevant pathways and characterize non-obvious dependencies and connections among pathways using gene expression data. PathNet considers both the differential expression of genes and their pathway neighbors to strengthen the evidence that a pathway is implicated in the biological conditions characterizing the experiment. As an adjunct to this analysis, PathNet uses the connectivity of the differentially expressed genes among all pathways to score pathway contextual associations and statistically identify biological relations among pathways. In this study, we used PathNet to identify biologically relevant results in two Alzheimer's disease microarray datasets, and compared its performance with existing methods. Importantly, PathNet identified de-regulation of the ubiquitin-mediated proteolysis pathway as an important component in Alzheimer's disease progression, despite the absence of this pathway in the standard enrichment analyses.

Conclusions

PathNet is a novel method for identifying enrichment and association between canonical pathways in the context of gene expression data. It takes into account topological information present in pathways to reveal biological information. PathNet is available as an R workspace image from http://www.bhsai.org/downloads/pathnet/.",2012-09-24 +21873641,Extending KNIME for next-generation sequencing data analysis.,"

Summary

KNIME (Konstanz Information Miner) is a user-friendly and comprehensive open-source data integration, processing, analysis and exploration platform. We present here new functionality and workflows that open the door to performing next-generation sequencing analysis using the KNIME framework.

Availability

All sources and compiled code are available via the KNIME update mechanism. Example workflows and descriptions are available through http://tech.knime.org/community/next-generation-sequencing.

Contact

bernd.jagla@pasteur.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-27 +24135164,Genetic polymorphisms in plasminogen activator inhibitor-1 predict susceptibility to steroid-induced osteonecrosis of the femoral head in Chinese population.,"

Background

Steroid usage has been considered as a leading cause of non-traumatic osteonecrosis of the femoral head (ONFH), which is involved in hypo-fibrinolysis and blood supply interruption. Genetic polymorphisms in plasminogen activator inhibitor-1 (PAI-1) have been demonstrated to be associated with ONFH risk in several populations. However, this relationship has not been established in Chinese population. The aim of this study was to investigate the association of PAI-1 gene polymorphisms with steroid-induced ONFH in a large cohort of Chinese population.

Methods

A case-control study was conducted, which included 94 and 106 unrelated patients after steroid administration recruited from 14 provinces in China, respectively. Two SNPs (rs11178 and rs2227631) within PAI-1 were genotyped using Sequenom MassARRAY system.

Results

rs2227631 SNP was significantly associated with steroid-induced ONFH group in codominant (P = 0.04) and recessive (P = 0.02) models. However, there were no differences found in genotype frequencies of rs11178 SNP between controls and patients with steroid-induced ONFH (all P > 0.05).

Conclusions

Our data offer the convincing evidence for the first time that rs2227631 SNP of PAI-1 may be associated with the risk of steroid-induced ONFH, suggesting that the genetic variations of this gene may play an important role in the disease development.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1569909986109783.",2013-10-17 +27011267,HOW MANY SPECIES OF ALGAE ARE THERE?,"Algae have been estimated to include anything from 30,000 to more than 1 million species. An attempt is made here to arrive at a more accurate estimate using species numbers in phyla and classes included in the on-line taxonomic database AlgaeBase (http://www.algaebase.org). Despite uncertainties regarding what organisms should be included as algae and what a species is in the context of the various algal phyla and classes, a conservative approach results in an estimate of 72,500 algal species, names for 44,000 of which have probably been published, and 33,248 names have been processed by AlgaeBase to date (June 2012). Some published estimates of diatom numbers are of over 200,000 species, which would result in four to five diatom species for every other algal species. Concern is expressed at the decline and potential extinction of taxonomists worldwide capable of improving and completing the necessary systematic studies.",2012-09-20 +23675420,Dispec: a novel peptide scoring algorithm based on peptide matching discriminability.,"Identifying peptides from the fragmentation spectra is a fundamental step in mass spectrometry (MS) data processing. The significance (discriminability) of every peak varies, providing additional information for potentially enhancing the identification sensitivity and the correct match rate. However this important information was not considered in previous algorithms. Here we presented a novel method based on Peptide Matching Discriminability (PMD), in which the PMD information of every peak reflects the discriminability of candidate peptides. In addition, we developed a novel peptide scoring algorithm Dispec based on PMD, by taking three aspects of discriminability into consideration: PMD, intensity discriminability and m/z error discriminability. Compared with Mascot and Sequest, Dispec identified remarkably more peptides from three experimental datasets with the same confidence at 1% PSM-level FDR. Dispec is also robust and versatile for various datasets obtained on different instruments. The concept of discriminability enhances the peptide identification and thus may contribute largely to the proteome studies. As an open-source program, Dispec is freely available at http://bioinformatics.jnu.edu.cn/software/dispec/.",2013-05-13 +23810591,Assets of imputation to ultra-high density for productive and functional traits.,"The aim of this study was to evaluate different-density genotyping panels for genotype imputation and genomic prediction. Genotypes from customized Golden Gate Bovine3K BeadChip [LD3K; low-density (LD) 3,000-marker (3K); Illumina Inc., San Diego, CA] and BovineLD BeadChip [LD6K; 6,000-marker (6K); Illumina Inc.] panels were imputed to the BovineSNP50v2 BeadChip [50K; 50,000-marker; Illumina Inc.]. In addition, LD3K, LD6K, and 50K genotypes were imputed to a BovineHD BeadChip [HD; high-density 800,000-marker (800K) panel], and with predictive ability evaluated and compared subsequently. Comparisons of prediction accuracy were carried out using Random boosting and genomic BLUP. Four traits under selection in the Spanish Holstein population were used: milk yield, fat percentage (FP), somatic cell count, and days open (DO). Training sets at 50K density for imputation and prediction included 1,632 genotypes. Testing sets for imputation from LD to 50K contained 834 genotypes and testing sets for genomic evaluation included 383 bulls. The reference population genotyped at HD included 192 bulls. Imputation using BEAGLE software (http://faculty.washington.edu/browning/beagle/beagle.html) was effective for reconstruction of dense 50K and HD genotypes, even when a small reference population was used, with 98.3% of SNP correctly imputed. Random boosting outperformed genomic BLUP in terms of prediction reliability, mean squared error, and selection effectiveness of top animals in the case of FP. For other traits, however, no clear differences existed between methods. No differences were found between imputed LD and 50K genotypes, whereas evaluation of genotypes imputed to HD was on average across data set, method, and trait, 4% more accurate than 50K prediction, and showed smaller (2%) mean squared error of predictions. Similar bias in regression coefficients was found across data sets but regressions were 0.32 units closer to unity for DO when genotypes were imputed to HD density. Imputation to HD genotypes might produce higher stability in the genomic proofs of young candidates. Regarding selection effectiveness of top animals, more (2%) top bulls were classified correctly with imputed LD6K genotypes than with LD3K. When the original 50K genotypes were used, correct classification of top bulls increased by 1%, and when those genotypes were imputed to HD, 3% more top bulls were detected. Selection effectiveness could be slightly enhanced for certain traits such as FP, somatic cell count, or DO when genotypes are imputed to HD. Genetic evaluation units may consider a trait-dependent strategy in terms of method and genotype density for use in the genome-enhanced evaluations.",2013-06-28 +23991743,Fundamentals of pyrosequencing.,"

Context

DNA sequencing is critical to identifying many human genetic disorders caused by DNA mutations, including cancer. Pyrosequencing is less complex, involves fewer steps, and has a superior limit of detection compared with Sanger sequencing. The fundamental basis of pyrosequencing is that pyrophosphate is released when a deoxyribonucleotide triphosphate is added to the end of a nascent strand of DNA. Because deoxyribonucleotide triphosphates are sequentially added to the reaction and because the pyrophosphate concentration is continuously monitored, the DNA sequence can be determined.

Objective

To demonstrate the fundamental principles of pyrosequencing.

Data sources

Salient features of pyrosequencing are demonstrated using the free software program Pyromaker ( http://pyromaker.pathology.jhmi.edu ), through which users can input DNA sequences and other pyrosequencing parameters to generate the expected pyrosequencing results.

Conclusions

We demonstrate how mutant and wild-type DNA sequences result in different pyrograms. Using pyrograms of established mutations in tumors, we explain how to analyze the pyrogram peaks generated by different dispensation sequences. Further, we demonstrate some limitations of pyrosequencing, including how some complex mutations can be indistinguishable from single base mutations. Pyrosequencing is the basis of the Roche 454 next-generation sequencer and many of the same principles also apply to the Ion Torrent hydrogen ion-based next-generation sequencers.",2013-09-01 +23990415,"A-clustering: a novel method for the detection of co-regulated methylation regions, and regions associated with exposure.","

Motivation

DNA methylation is a heritable modifiable chemical process that affects gene transcription and is associated with other molecular markers (e.g. gene expression) and biomarkers (e.g. cancer or other diseases). Current technology measures methylation in hundred of thousands, or millions of CpG sites throughout the genome. It is evident that neighboring CpG sites are often highly correlated with each other, and current literature suggests that clusters of adjacent CpG sites are co-regulated.

Results

We develop the Adjacent Site Clustering (A-clustering) algorithm to detect sets of neighboring CpG sites that are correlated with each other. To detect methylation regions associated with exposure, we propose an analysis pipeline for high-dimensional methylation data in which CpG sites within regions identified by A-clustering are modeled as multivariate responses to environmental exposure using a generalized estimating equation approach that assumes exposure equally affects all sites in the cluster. We develop a correlation preserving simulation scheme, and study the proposed methodology via simulations. We study the clusters detected by the algorithm on high dimensional dataset of peripheral blood methylation of pesticide applicators.

Availability

We provide the R package Aclust that efficiently implements the A-clustering and the analysis pipeline, and produces analysis reports. The package is found on http://www.hsph.harvard.edu/tamar-sofer/packages/

Contact

tsofer@hsph.harvard.edu",2013-08-29 +22492638,Detection of differentially expressed segments in tiling array data.,"

Motivation

Tiling arrays have been a mainstay of unbiased genome-wide transcriptomics over the last decade. Currently available approaches to identify expressed or differentially expressed segments in tiling array data are limited in the recovery of the underlying gene structures and require several parameters that are intensity-related or partly dataset-specific.

Results

We have developed TileShuffle, a statistical approach that identifies transcribed and differentially expressed segments as significant differences from the background distribution while considering sequence-specific affinity biases and cross-hybridization. It avoids dataset-specific parameters in order to provide better comparability of different tiling array datasets, based on different technologies or array designs. TileShuffle detects highly and differentially expressed segments in biological data with significantly lower false discovery rates under equal sensitivities than commonly used methods. Also, it is clearly superior in the recovery of exon-intron structures. It further provides window z-scores as a normalized and robust measure for visual inspection.

Availability

The R package including documentation and examples is freely available at http://www.bioinf.uni-leipzig.de/Software/TileShuffle/",2012-04-06 +23028497,Sequencing and characterization of striped venus transcriptome expand resources for clam fishery genetics.,"

Background

The striped venus Chamelea gallina clam fishery is among the oldest and the largest in the Mediterranean Sea, particularly in the inshore waters of northern Adriatic Sea. The high fishing pressure has lead to a strong stock abundance decline, enhanced by several irregular mortality events. The nearly complete lack of molecular characterization limits the available genetic resources for C. gallina. We achieved the first transcriptome of this species with the aim of identifying an informative set of expressed genes, potential markers to assess genetic structure of natural populations and molecular resources for pathogenic contamination detection.

Methodology/principal findings

The 454-pyrosequencing of a normalized cDNA library of a pool C. gallina adult individuals yielded 298,494 raw reads. Different steps of reads assembly and filtering produced 36,422 contigs of high quality, one half of which (18,196) were annotated by similarity. A total of 111 microsatellites and 20,377 putative SNPs were identified. A panel of 13 polymorphic transcript-linked microsatellites was developed and their variability assessed in 12 individuals. Remarkably, a scan to search for contamination sequences of infectious origin indicated the presence of several Vibrionales species reported to be among the most frequent clam pathogen's species. Results reported in this study were included in a dedicated database available at http://compgen.bio.unipd.it/chameleabase.

Conclusions/significance

This study represents the first attempt to sequence and de novo annotate the transcriptome of the clam C. gallina. The availability of this transcriptome opens new perspectives in the study of biochemical and physiological role of gene products and their responses to large and small-scale environmental stress in C. gallina, with high throughput experiments such as custom microarray or targeted re-sequencing. Molecular markers, such as the already optimized EST-linked microsatellites and the discovered SNPs will be useful to estimate effects of demographic processes and to detect minute levels of population structuring.",2012-09-18 +21478487,Comrad: detection of expressed rearrangements by integrated analysis of RNA-Seq and low coverage genome sequence data.,"

Motivation

Comrad is a novel algorithmic framework for the integrated analysis of RNA-Seq and whole genome shotgun sequencing (WGSS) data for the purposes of discovering genomic rearrangements and aberrant transcripts. The Comrad framework leverages the advantages of both RNA-Seq and WGSS data, providing accurate classification of rearrangements as expressed or not expressed and accurate classification of the genomic or non-genomic origin of aberrant transcripts. A major benefit of Comrad is its ability to accurately identify aberrant transcripts and associated rearrangements using low coverage genome data. As a result, a Comrad analysis can be performed at a cost comparable to that of two RNA-Seq experiments, significantly lower than an analysis requiring high coverage genome data.

Results

We have applied Comrad to the discovery of gene fusions and read-throughs in prostate cancer cell line C4-2, a derivative of the LNCaP cell line with androgen-independent characteristics. As a proof of concept, we have rediscovered in the C4-2 data 4 of the 6 fusions previously identified in LNCaP. We also identified six novel fusion transcripts and associated genomic breakpoints, and verified their existence in LNCaP, suggesting that Comrad may be more sensitive than previous methods that have been applied to fusion discovery in LNCaP. We show that many of the gene fusions discovered using Comrad would be difficult to identify using currently available techniques.

Availability

A C++ and Perl implementation of the method demonstrated in this article is available at http://compbio.cs.sfu.ca/.",2011-04-09 +22902564,SIMPLE: Software for ab initio reconstruction of heterogeneous single-particles.,"The open source software suite SIMPLE: Single-particle IMage Processing Linux Engine provides data analysis methods for single-particle cryo-electron microscopy (cryo-EM). SIMPLE addresses the problem of obtaining 3D reconstructions from 2D projections only, without using an input reference volume for approximating orientations. The SIMPLE reconstruction algorithm is tailored to asymmetrical and structurally heterogeneous single-particles. Its basis is global optimization with the use of Fourier common lines. The advance that enables ab initio reconstruction and heterogeneity analysis is the separation of the tasks of in-plane alignment and projection direction determination via bijective orientation search - a new concept in common lines-based strategies. Bijective orientation search divides the configuration space into two groups of paired parameters that are optimized separately. The first group consists of the rotations and shifts in the plane of the projection; the second group consists of the projection directions and state assignments. In SIMPLE, ab initio reconstruction is feasible because the 3D in-plane alignment is approximated using reference-free 2D rotational alignment. The subsequent common lines-based search hence searches projection directions and states only. Thousands of class averages are analyzed simultaneously in a matter of hours. Novice SIMPLE users get a head start via the well documented front-end. The structured, object-oriented back-end invites advanced users to develop new alignment and reconstruction algorithms. An overview of the package is presented together with benchmarks on simulated data. Executable binaries, source code, and documentation are available at http://simple.stanford.edu.",2012-08-10 +24649978,A new in situ hybridization and immunohistochemistry with a novel antibody to detect small T-antigen expressions of Merkel cell polyomavirus (MCPyV).,"

Background

Approximately 80% of Merkel cell carcinomas (MCCs) harbor Merkel cell polyomavirus (MCPyV) which monoclonally integrates into the genome and has prognostic significance. The presence or absence of MCPyV is usually diagnosed using CM2B4 immunohistochemistry (IHC) for MCPyV-large T antigen (LT) protein. However, this method poses a risk of misdiagnosis.

Methods

In this study, we determined MCPyV infection in MCCs using real-time PCR for MCPyV-LT DNA and prepared 16 cases of MCPyV-DNA-positive and -negative groups. Diagnostic sensitivity and specificity of conventional PCR for MCPyV-small T antigen (MCPyV-ST), IHC using a newly developed polyclonal antibody (ST-1) for MCPyV-ST protein (MCPyV-ST) (aa: 164-177), and in situ hybridization (ISH) as well as real-time PCR for MCPyV-ST mRNA were compared against CM2B4-IHC for sensitivity (0.94, 15/16) and specificity (0.94, 15/16).

Results

The followings are the respective sensitivity and specificity results from examinations for MCPyV-ST gene: conventional PCR for the MCPyV-ST (0.94, 1.0), ST-1-IHC (0.69, 1.0), real-time PCR for ST mRNA (1.0, no data), ST mRNA ISH (0.94, 1.0). Each of the MCPyV-pseudonegative (1/16) and -pseudopositive (1/16) diagnoses evaluated using CM2B4-IHC were accurately corrected by examinations for MCPyV-ST or its expression as well as real-time PCR for MCPyV-LT. Sensitivity of CM2B4-IHC (0.94) was superior to that of ST-1-IHC (0.69) but equal to that of ST mRNA-ISH (0.94). Specificities of ST-1-IHC (1.0) and ST mRNA-ISH (1.0) were superior to that of CM2B4-IHC (0.94).

Conclusions

Therefore, combined application of ST mRNA-ISH and ST-IHC as well as CM2B4-IHC is recommended and will contribute to the diagnostic accuracy for MCPyV infection in MCCs.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/9966295741144834.",2014-03-20 +22108294,Constructing circular phylogenetic networks from weighted quartets using simulated annealing.,"In this paper, we present a heuristic algorithm based on the simulated annealing, SAQ-Net, as a method for constructing phylogenetic networks from weighted quartets. Similar to QNet algorithm, SAQ-Net constructs a collection of circular weighted splits of the taxa set. This collection is represented by a split network. In order to show that SAQ-Net performs better than QNet, we apply these algorithm to both the simulated and actual data sets containing salmonella, Bees, Primates and Rubber data sets. Then we draw phylogenetic networks corresponding to outputs of these algorithms using SplitsTree4 and compare the results. We find that SAQ-Net produces a better circular ordering and phylogenetic networks than QNet in most cases. SAQ-Net has been implemented in Matlab and is available for download at http://bioinf.cs.ipm.ac.ir/softwares/saq.net.",2011-11-13 +22492192,A survey of error-correction methods for next-generation sequencing.,"

Unlabelled

Error Correction is important for most next-generation sequencing applications because highly accurate sequenced reads will likely lead to higher quality results. Many techniques for error correction of sequencing data from next-gen platforms have been developed in the recent years. However, compared with the fast development of sequencing technologies, there is a lack of standardized evaluation procedure for different error-correction methods, making it difficult to assess their relative merits and demerits. In this article, we provide a comprehensive review of many error-correction methods, and establish a common set of benchmark data and evaluation criteria to provide a comparative assessment. We present experimental results on quality, run-time, memory usage and scalability of several error-correction methods. Apart from providing explicit recommendations useful to practitioners, the review serves to identify the current state of the art and promising directions for future research.

Availability

All error-correction programs used in this article are downloaded from hosting websites. The evaluation tool kit is publicly available at: http://aluru-sun.ece.iastate.edu/doku.php?id=ecr.",2012-04-06 +23890284,"Excerpt from PHS guideline for reducing HIV, HBV and HCV transmission through organ transplantation.","The intent of the PHS guideline is to improve organ transplant recipient outcomes by reducing the risk of unexpected HIV, HBV and HCV transmission, while preserving the availability of high-quality organs. An evidence-based approach was used to identify the most relevant studies and reports on which to formulate the recommendations. This excerpt from the guideline comprises (1) the executive summary; (2) 12 criteria for assessment of risk factors for recent HIV, HBV and HCV infection; (3) 34 recommendations on risk assessment (screening) of living and deceased donors; testing of living and deceased donors; informed consent discussion with transplant candidates; testing of recipients pre- and posttransplant; collection and/or storage of donor and recipient specimens; and tracking and reporting of HIV, HBV and HCV; and (4) 20 recommendations for further study. For the PHS guideline in its entirety, including the background, methodology and primary evidence underlying the recommendations, refer to the source document in Public Health Reports, accessible at http://www.publichealthreports.org/issuecontents.cfm?Volume=128&Issue=4. For more in-depth information on the evidence base, including tables of all study-level data, refer to Solid Organ Transplantation and the Probability of Transmitting HIV, HBV or HCV: A Systematic Review to Support an Evidence-Based Guideline, accessible at http://stacks.cdc.gov/view/cdc/12164/.",2013-08-01 +22047014,Fast MCMC sampling for hidden Markov Models to determine copy number variations.,"

Background

Hidden Markov Models (HMM) are often used for analyzing Comparative Genomic Hybridization (CGH) data to identify chromosomal aberrations or copy number variations by segmenting observation sequences. For efficiency reasons the parameters of a HMM are often estimated with maximum likelihood and a segmentation is obtained with the Viterbi algorithm. This introduces considerable uncertainty in the segmentation, which can be avoided with Bayesian approaches integrating out parameters using Markov Chain Monte Carlo (MCMC) sampling. While the advantages of Bayesian approaches have been clearly demonstrated, the likelihood based approaches are still preferred in practice for their lower running times; datasets coming from high-density arrays and next generation sequencing amplify these problems.

Results

We propose an approximate sampling technique, inspired by compression of discrete sequences in HMM computations and by kd-trees to leverage spatial relations between data points in typical data sets, to speed up the MCMC sampling.

Conclusions

We test our approximate sampling method on simulated and biological ArrayCGH datasets and high-density SNP arrays, and demonstrate a speed-up of 10 to 60 respectively 90 while achieving competitive results with the state-of-the art Bayesian approaches.

Availability

An implementation of our method will be made available as part of the open source GHMM library from http://ghmm.org.",2011-11-02 +22084254,Gene set analysis in the cloud.,"

Unlabelled

Cloud computing offers low cost and highly flexible opportunities in bioinformatics. Its potential has already been demonstrated in high-throughput sequence data analysis. Pathway-based or gene set analysis of expression data has received relatively less attention. We developed a gene set analysis algorithm for biomarker identification in the cloud. The resulting tool, YunBe, is ready to use on Amazon Web Services. Moreover, here we compare its performance to those obtained with desktop and computing cluster solutions.

Availability and implementation

YunBe is open-source and freely accessible within the Amazon Elastic MapReduce service at s3n://lrcv-crp-sante/app/yunbe.jar. Source code and user's guidelines can be downloaded from http://tinyurl.com/yunbedownload.",2011-11-13 +21743061,A Lasso regression model for the construction of microRNA-target regulatory networks.,"

Motivation

MicroRNAs have recently emerged as a major class of regulatory molecules involved in a broad range of biological processes and complex diseases. Construction of miRNA-target regulatory networks can provide useful information for the study and diagnosis of complex diseases. Many sequence-based and evolutionary information-based methods have been developed to identify miRNA-mRNA targeting relationships. However, as the amount of available miRNA and gene expression data grows, a more statistical and systematic method combining sequence-based binding predictions and expression-based correlation data becomes necessary for the accurate identification of miRNA-mRNA pairs.

Results

We propose a Lasso regression model for the identification of miRNA-mRNA targeting relationships that combines sequence-based prediction information, miRNA co-regulation, RISC availability and miRNA/mRNA abundance data. By comparing this modelling approach with two other known methods applied to three different datasets, we found that the Lasso regression model has considerable advantages in both sensitivity and specificity. The regression coefficients in the model can be used to determine the true regulatory efficacies in tissues and was demonstrated using the miRNA target site type data. Finally, by constructing the miRNA regulatory networks in two stages of prostate cancer (PCa), we found the several significant miRNA-hubbed network modules associated with PCa metastasis. In conclusion, the Lasso regression model is a robust and informative tool for constructing the miRNA regulatory networks for diagnosis and treatment of complex diseases.

Availability

The R program for predicting miRNA-mRNA targeting relationships using the Lasso regression model is freely available, along with the described datasets and resulting regulatory network, at http://biocompute.bmi.ac.cn/CZlab/alarmnet/. The source code is open for modification and application to other miRNA/mRNA expression datasets.

Contact

zhangcg@bmi.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-08 +23742238,iXora: exact haplotype inferencing and trait association.,"

Background

We address the task of extracting accurate haplotypes from genotype data of individuals of large F1 populations for mapping studies. While methods for inferring parental haplotype assignments on large F1 populations exist in theory, these approaches do not work in practice at high levels of accuracy.

Results

We have designed iXora (Identifying crossovers and recombining alleles), a robust method for extracting reliable haplotypes of a mapping population, as well as parental haplotypes, that runs in linear time. Each allele in the progeny is assigned not just to a parent, but more precisely to a haplotype inherited from the parent. iXora shows an improvement of at least 15% in accuracy over similar systems in literature. Furthermore, iXora provides an easy-to-use, comprehensive environment for association studies and hypothesis checking in populations of related individuals.

Conclusions

iXora provides detailed resolution in parental inheritance, along with the capability of handling very large populations, which allows for accurate haplotype extraction and trait association. iXora is available for non-commercial use from http://researcher.ibm.com/project/3430.",2013-06-06 +21789178,Prediction of drought-resistant genes in Arabidopsis thaliana using SVM-RFE.,"

Background

Identifying genes with essential roles in resisting environmental stress rates high in agronomic importance. Although massive DNA microarray gene expression data have been generated for plants, current computational approaches underutilize these data for studying genotype-trait relationships. Some advanced gene identification methods have been explored for human diseases, but typically these methods have not been converted into publicly available software tools and cannot be applied to plants for identifying genes with agronomic traits.

Methodology

In this study, we used 22 sets of Arabidopsis thaliana gene expression data from GEO to predict the key genes involved in water tolerance. We applied an SVM-RFE (Support Vector Machine-Recursive Feature Elimination) feature selection method for the prediction. To address small sample sizes, we developed a modified approach for SVM-RFE by using bootstrapping and leave-one-out cross-validation. We also expanded our study to predict genes involved in water susceptibility.

Conclusions

We analyzed the top 10 genes predicted to be involved in water tolerance. Seven of them are connected to known biological processes in drought resistance. We also analyzed the top 100 genes in terms of their biological functions. Our study shows that the SVM-RFE method is a highly promising method in analyzing plant microarray data for studying genotype-phenotype relationships. The software is freely available with source code at http://ccst.jlu.edu.cn/JCSB/RFET/.",2011-07-15 +21342552,Motif-All: discovering all phosphorylation motifs.,"

Background

Phosphorylation motifs represent common patterns around the phosphorylation site. The discovery of such kinds of motifs reveals the underlying regulation mechanism and facilitates the prediction of unknown phosphorylation event. To date, people have gathered large amounts of phosphorylation data, making it possible to perform substrate-driven motif discovery using data mining techniques.

Results

We describe an algorithm called Motif-All that is able to efficiently identify all statistically significant motifs. The proposed method explores a support constraint to reduce search space and avoid generating random artifacts. As the number of phosphorylated peptides are far less than that of unphosphorylated ones, we divide the mining process into two stages: The first step generates candidates from the set of phosphorylated sequences using only support constraint and the second step tests the statistical significance of each candidate using the odds ratio derived from the whole data set. Experimental results on real data show that Motif-All outperforms current algorithms in terms of both effectiveness and efficiency.

Conclusions

Motif-All is a useful tool for discovering statistically significant phosphorylation motifs. Source codes and data sets are available at: http://bioinformatics.ust.hk/MotifAll.rar.",2011-02-15 +24157696,Provisional CDC guidelines for the use and safety monitoring of bedaquiline fumarate (Sirturo) for the treatment of multidrug-resistant tuberculosis.,"Multidrug-resistant tuberculosis (MDR TB) is caused by Mycobacterium tuberculosis that is resistant to at least isoniazid and rifampin, the two most effective of the four first-line TB drugs (the other two drugs being ethambutol and pyrazinamide). MDR TB includes the subcategory of extensively drug-resistant TB (XDR TB), which is MDR TB with additional resistance to any fluoroquinolone and to at least one of three injectable anti-TB drugs (i.e., kanamycin, capreomycin, or amikacin). MDR TB is difficult to cure, requiring 18-24 months of treatment after sputum culture conversion with a regimen that consists of four to six medications with toxic side effects, and carries a mortality risk greater than that of drug-susceptible TB. Bedaquiline fumarate (Sirturo or bedaquiline) is an oral diarylquinoline. On December 28, 2012, on the basis of data from two Phase IIb trials (i.e., well-controlled trials to evaluate the efficacy and safety of drugs in patients with a disease or condition to be treated, diagnosed, or prevented), the Food and Drug Administration (FDA) approved use of bedaquiline under the provisions of the accelerated approval regulations for ""serious or life-threatening illnesses"" (21CFR314.500) (Cox EM. FDA accelerated approval letter to Janssen Research and Development. Available at http://www.accessdata.fda.gov/drugsatfda_docs/appletter/2012/204384Orig1s000ltr.pdf). This report provides provisional CDC guidelines for FDA-approved and unapproved, or off-label, uses of bedaquiline in certain populations, such as children, pregnant women, or persons with extrapulmonary MDR TB who were not included in the clinical trials for the drug. CDC's Division of TB Elimination developed these guidelines on the basis of expert opinion informed by data from systematic reviews and literature searches. This approach is different from the statutory standards that FDA uses when approving drugs and drug labeling. These guidelines are intended for health-care professionals who might use bedaquiline for the treatment of MDR TB for indicated and off-label uses. Aspects of these guidelines are not identical to current FDA-approved labeling for bedaquiline. Bedaquiline should be used with clinical expert consultation as part of combination therapy (minimum four-drug treatment regimen) and administered by direct observation to adults aged ≥18 years with a diagnosis of pulmonary MDR TB (Food and Drug Administration. SIRTURO [bedaquiline] tablets label. Available at http://www.accessdata.fda.gov/drugsatfda_docs/label/2012/204384s000lbl.pdf). Use of the drug also can be considered for individual patients in other categories (e.g., persons with extrapulmonary TB, children, pregnant women, or persons with HIV or other comorbid conditions) when treatment options are limited. However, further study is required before routine use of bedaquiline can be recommended in these populations. A registry for persons treated with bedaquiline is being implemented by Janssen Therapeutics to track patient outcomes, adverse reactions, laboratory testing results (e.g., diagnosis, drug susceptibility, and development of drug resistance), use of concomitant medications, and presence of other comorbid conditions. Suspected adverse reactions (i.e., any adverse event for which there is a reasonable possibility that the drug caused the adverse event) and serious adverse events (i.e., any adverse event that results in an outcome such as death, hospitalization, permanent disability, or a life-threatening situation) should be reported to Janssen Therapeutics at telephone 1-800-526-7736, to FDA at telephone 1-800-332-1088 or at http://www.fda.gov/medwatch, and to CDC's Emergency Operations Center at telephone 1-770-488-7100.",2013-10-01 +22467912,Reno: regularized non-parametric analysis of protein lysate array data.,"

Motivation

The reverse-phase protein lysate arrays have been used to quantify the relative expression levels of a protein in a number of cellular samples simultaneously. To avoid quantification bias due to mis-specification of commonly used parametric models, a nonparametric approach based on monotone response curves may be used. The existing methods, however, aggregate the protein concentration levels of replicates of each sample, and therefore fail to account for within-sample variability.

Results

We propose a method of regularization on protein concentration estimation at the level of individual dilution series to account for within-sample or within-group variability. We use an efficient algorithm to optimize an approximate objective function, with a data-adaptive approach to choose the level of shrinkage. Simulation results show that the proposed method quantifies protein concentration levels well. We show through the analysis of protein lysate array data from cell lines of different cancer groups that accounting for within-sample variability leads to better statistical analysis.

Availability

Code written in statistical programming language R is available at: http://odin.mdacc.tmc.edu/~jhhu/Reno",2012-03-30 +21696594,BioGraph: unsupervised biomedical knowledge discovery via automated hypothesis generation.,"We present BioGraph, a data integration and data mining platform for the exploration and discovery of biomedical information. The platform offers prioritizations of putative disease genes, supported by functional hypotheses. We show that BioGraph can retrospectively confirm recently discovered disease genes and identify potential susceptibility genes, outperforming existing technologies, without requiring prior domain knowledge. Additionally, BioGraph allows for generic biomedical applications beyond gene discovery. BioGraph is accessible at http://www.biograph.be.",2011-06-22 +23139595,CIBMAN: Database exploring Citrus biodiversity of Manipur.,"

Unlabelled

The rich wealth of Citrus genetic resources makes India to enjoy a remarkable position in the ""Citrus belt of the world"". We have developed CIBMAN, a unique database on Citrus biodiversity of Manipur which comprises 33 accessions collected through extensive survey for more than three years. CIBMAN provides integrated access to Citrus species through sophisticated web interface which has following capabilities a) morphological details, b) socio-economic details, c) taxonomic details and d) geographical distribution. Morphological variability among Citrus accessions is due to variance in their genome which contributes to diverse agronomical traits and diverse bioactive compounds of high value. This diverse gene pool can be potential source for genetic improvement of existing cultivars and rootstocks. Systematic collection, characterization and conservation of the underutilized or lesser exploited varieties is required for incorporating in breeding program and conserve the germplasm from ever going on genetic erosion. This database will be useful for scientific validations and updating of traditional wisdom in bioprospecting aspects especially industrialization of Citrus found in the state. Further, the features will be suited for detailed investigation on potential medicinal and edible Citrus that make CIBMAN a powerful tool for sustainable management.

Availability

http://ibsd.gov.in/cibman.",2012-09-11 +22039209,Correcting for cancer genome size and tumour cell content enables better estimation of copy number alterations from next-generation sequence data.,"

Motivation

Comparison of read depths from next-generation sequencing between cancer and normal cells makes the estimation of copy number alteration (CNA) possible, even at very low coverage. However, estimating CNA from patients' tumour samples poses considerable challenges due to infiltration with normal cells and aneuploid cancer genomes. Here we provide a method that corrects contamination with normal cells and adjusts for genomes of different sizes so that the actual copy number of each region can be estimated.

Results

The procedure consists of several steps. First, we identify the multi-modality of the distribution of smoothed ratios. Then we use the estimates of the mean (modes) to identify underlying ploidy and the contamination level, and finally we perform the correction. The results indicate that the method works properly to estimate genomic regions with gains and losses in a range of simulated data as well as in two datasets from lung cancer patients. It also proves a powerful tool when analysing publicly available data from two cell lines (HCC1143 and COLO829).

Availability

An R package, called CNAnorm, is available at http://www.precancer.leeds.ac.uk/cnanorm or from Bioconductor.

Contact

a.gusnanto@leeds.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-28 +23732274,AuthorReward: increasing community curation in biological knowledge wikis through automated authorship quantification.,"

Summary

Community curation-harnessing community intelligence in knowledge curation, bears great promise in dealing with the flood of biological knowledge. To exploit the full potential of the scientific community for knowledge curation, multiple biological wikis (bio-wikis) have been built to date. However, none of them have achieved a substantial impact on knowledge curation. One of the major limitations in bio-wikis is insufficient community participation, which is intrinsically because of lack of explicit authorship and thus no credit for community curation. To increase community curation in bio-wikis, here we develop AuthorReward, an extension to MediaWiki, to reward community-curated efforts in knowledge curation. AuthorReward quantifies researchers' contributions by properly factoring both edit quantity and quality and yields automated explicit authorship according to their quantitative contributions. AuthorReward provides bio-wikis with an authorship metric, helpful to increase community participation in bio-wikis and to achieve community curation of massive biological knowledge.

Availability

http://cbb.big.ac.cn/software.

Contact

zhangzhang@big.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-06-03 +25344707,Management of External Hemorrhage in Tactical Combat Casualty Care: Chitosan-Based Hemostatic Gauze Dressings--TCCC Guidelines-Change 13-05.,"Hemorrhage remains the leading cause of combat death and a major cause of death from potentially survivable injuries. Great strides have been made in controlling extremity hemorrhage with tourniquets, but not all injuries are amenable to tourniquet application. Topical hemostatic agents and dressings have also contributed to success in controlling extremity and compressible junctional hemorrhage, and their efficacy continues to increase as enhanced products are developed. Since the addition of Combat Gauze™ (Z-Medica Corporation, Wallingford, CT, USA; http://www.z-medica.com/) in April 2008 to the Tactical Combat Casualty Care (TCCC) Guidelines, there are consistent data from animal studies of severe hemorrhage that chitosan-based hemostatic gauze dressings developed for battlefield application are, at least, equally efficacious as Combat Gauze. Successful outcomes are also reported using newer chitosan-based dressings in civilian hospital-based surgical case reports and prehospital (battlefield) case reports and series. Additionally, there have been no noted complications or safety concerns in these cases or across many years of chitosan-based hemostatic dressing use in both the military and civilian prehospital sectors. Consequently, after a decade of clinical use, there is added benefit and a good safety record for using chitosan-based gauze dressings. For these reasons, many specific US military Special Operations Forces, NATO militaries, and emergency medical services (EMS) and law enforcement agencies have already implemented the widespread use of these new recommended chitosan-based hemostatic dressings. Based on the past battlefield success, this report proposes to keep Combat Gauze as the hemostatic dressing of choice along with the new addition of Celox™ Gauze (Medtrade Products Ltd., Crewe, UK; http://www.celoxmedical.com/usa/products /celox-gauze/) and ChitoGauze® (HemCon Medical Technologies, Portland, OR, USA; http://www.hemcon.com/) to the TCCC Guidelines.",2014-01-01 +24334393,GeneOnEarth: fitting genetic PC plots on the globe.,"Principal component (PC) plots have become widely used to summarize genetic variation of individuals in a sample. The similarity between genetic distance in PC plots and geographical distance has shown to be quite impressive. However, in most situations, individual ancestral origins are not precisely known or they are heterogeneously distributed; hence, they are hardly linked to a geographical area. We have developed GeneOnEarth, a user-friendly web-based tool to help geneticists to understand whether a linear isolation-by-distance model may apply to a genetic data set; thus, genetic distances among a set of individuals resemble geographical distances among their origins. Its main goal is to allow users to first apply a by-view Procrustes method to visually learn whether this model holds. To do that, the user can choose the exact geographical area from an on line 2D or 3D world map by using, respectively, Google Maps or Google Earth, and rotate, flip, and resize the images. GeneOnEarth can also compute the optimal rotation angle using Procrustes analysis and assess statistical evidence of similarity when a different rotation angle has been chosen by the user. An online version of GeneOnEarth is available for testing and using purposes at http://bios.ugr.es/GeneOnEarth.",2013-07-01 +22965118,Solitary restriction endonucleases in prokaryotic genomes.,"Prokaryotic restriction-modification (R-M) systems defend the host cell from the invasion of a foreign DNA. They comprise two enzymatic activities: specific DNA cleavage activity and DNA methylation activity preventing cleavage. Typically, these activities are provided by two separate enzymes: a DNA methyltransferase (MTase) and a restriction endonuclease (RE). In the absence of a corresponding MTase, an RE of Type II R-M system is highly toxic for the cell. Genes of the R-M system are linked in the genome in the vast majority of annotated cases. There are only a few reported cases in which the genes of MTase and RE from one R-M system are not linked. Nevertheless, a few hundreds solitary RE genes are present in the Restriction Enzyme Database (http://rebase.neb.com) annotations. Using the comparative genomic approach, we analysed 272 solitary RE genes. For 57 solitary RE genes we predicted corresponding MTase genes located distantly in a genome. Of the 272 solitary RE genes, 99 are likely to be fragments of RE genes. Various explanations for the existence of the remaining 116 solitary RE genes are also discussed.",2012-09-10 +21923778,EUCAST technical note on anidulafungin.,"The European Committee on Antimicrobial Susceptibility Testing-Subcommittee on Antifungal Susceptibility Testing has determined breakpoints for anidulafungin for Candida spp. This Technical Note is based on the EUCAST anidulafungin rationale document (available at: http://www.eucast.org). Species-specific breakpoints for C. albicans are S ≤0.03 mg/L and R >0.03 mg/L and for C. glabrata, C. tropicalis and C. krusei S ≤0.06 mg/L and R >0.06 mg/L. C. parapsilosis was not regarded a good target for anidulafungin. There are insufficient data to set breakpoints for other species. The breakpoints are based upon pharmacokinetic data, epidemiological cut-off values and clinical experience. Breakpoints will be reviewed regularly.",2011-09-16 +25231309,"Comparative genome analysis of four elephant endotheliotropic herpesviruses, EEHV3, EEHV4, EEHV5, and EEHV6, from cases of hemorrhagic disease or viremia.","

Unlabelled

The genomes of three types of novel endotheliotropic herpesviruses (elephant endotheliotropic herpesvirus 1A [EEHV1A], EEHV1B, and EEHV2) associated with lethal hemorrhagic disease in Asian elephants have been previously well characterized and assigned to a new Proboscivirus genus. Here we have generated 112 kb of DNA sequence data from segments of four more types of EEHV by direct targeted PCR from blood samples or necropsy tissue samples from six viremic elephants. Comparative phylogenetic analysis of nearly 30 protein-encoding genes of EEHV5 and EEHV6 show that they diverge uniformly by nearly 20% from their closest relatives, EEHV2 and EEHV1A, respectively, and are likely to have similar overall gene content and genome organization. In contrast, seven EEHV3 and EEHV4 genes analyzed differ from those of all other EEHVs by 37% and have a G+C content of 63% compared to just 42% for the others. Three strains of EEHV5 analyzed clustered into two partially chimeric subgroups EEHV5A and EEHV5B that diverge by 19% within three small noncontiguous segments totaling 6.2 kb. We conclude that all six EEHV types should be designated as independent species within a proposed new fourth Deltaherpesvirinae subfamily of mammalian herpesviruses. These virus types likely initially diverged close to 100 million years ago when the ancestors of modern elephants split from all other placental mammals and then evolved into two major branches with high- or low-G+C content about 35 million years ago. Later additional branching events subsequently generated three paired sister taxon lineages of which EEHV1 plus EEHV6, EEHV5 plus EEHV2, and EEHV4 plus EEHV3 may represent Asian and African elephant versions, respectively.

Importance

One of the factors threatening the long-term survival of endangered Asian elephants in both wild range countries and in captive breeding populations in zoos is a highly lethal hemorrhagic herpesvirus disease that has killed at least 70 young Asian elephants worldwide. The genomes of the first three types of EEHVs (or probosciviruses) identified have been partially characterized in the preceding accompanying paper (L. K. Richman, J.-C. Zong, E. M. Latimer, J. Lock, R. C. Fleischer, S. Y. Heaggans, and G. S. Hayward, J. Virol. 88:13523-13546, 2014, http://dx.doi.org/10.1128/JVI.01673-14). Here we have used PCR DNA sequence analysis from multiple segments of DNA amplified directly from blood or necropsy tissue samples of six more selected cases of hemorrhagic disease to partially characterize four other types of EEHVs from either Asian or African elephants. We propose that all six types and two chimeric subtypes of EEHV belong to multiple lineages of both AT-rich and GC-rich branches within a new subfamily to be named the Deltaherpesvirinae, which evolved separately from all other mammalian herpesviruses about100 million years ago.",2014-09-17 +24273239,Coarse-grained versus atomistic simulations: realistic interaction free energies for real proteins.,"

Motivation

To assess whether two proteins will interact under physiological conditions, information on the interaction free energy is needed. Statistical learning techniques and docking methods for predicting protein-protein interactions cannot quantitatively estimate binding free energies. Full atomistic molecular simulation methods do have this potential, but are completely unfeasible for large-scale applications in terms of computational cost required. Here we investigate whether applying coarse-grained (CG) molecular dynamics simulations is a viable alternative for complexes of known structure.

Results

We calculate the free energy barrier with respect to the bound state based on molecular dynamics simulations using both a full atomistic and a CG force field for the TCR-pMHC complex and the MP1-p14 scaffolding complex. We find that the free energy barriers from the CG simulations are of similar accuracy as those from the full atomistic ones, while achieving a speedup of >500-fold. We also observe that extensive sampling is extremely important to obtain accurate free energy barriers, which is only within reach for the CG models. Finally, we show that the CG model preserves biological relevance of the interactions: (i) we observe a strong correlation between evolutionary likelihood of mutations and the impact on the free energy barrier with respect to the bound state; and (ii) we confirm the dominant role of the interface core in these interactions. Therefore, our results suggest that CG molecular simulations can realistically be used for the accurate prediction of protein-protein interaction strength.

Availability and implementation

The python analysis framework and data files are available for download at http://www.ibi.vu.nl/downloads/bioinformatics-2013-btt675.tgz.",2013-11-22 +24334392,Designing template-free predictor for targeting protein-ligand binding sites with classifier ensemble and spatial clustering.,"Accurately identifying the protein-ligand binding sites or pockets is of significant importance for both protein function analysis and drug design. Although much progress has been made, challenges remain, especially when the 3D structures of target proteins are not available or no homology templates can be found in the library, where the template-based methods are hard to be applied. In this paper, we report a new ligand-specific template-free predictor called TargetS for targeting protein-ligand binding sites from primary sequences. TargetS first predicts the binding residues along the sequence with ligand-specific strategy and then further identifies the binding sites from the predicted binding residues through a recursive spatial clustering algorithm. Protein evolutionary information, predicted protein secondary structure, and ligand-specific binding propensities of residues are combined to construct discriminative features; an improved AdaBoost classifier ensemble scheme based on random undersampling is proposed to deal with the serious imbalance problem between positive (binding) and negative (nonbinding) samples. Experimental results demonstrate that TargetS achieves high performances and outperforms many existing predictors. TargetS web server and data sets are freely available at: http://www.csbio.sjtu.edu.cn/bioinf/TargetS/ for academic use.",2013-07-01 +22984448,"Using object oriented bayesian networks to model linkage, linkage disequilibrium and mutations between STR markers.","In a number of applications there is a need to determine the most likely pedigree for a group of persons based on genetic markers. Adequate models are needed to reach this goal. The markers used to perform the statistical calculations can be linked and there may also be linkage disequilibrium (LD) in the population. The purpose of this paper is to present a graphical Bayesian Network framework to deal with such data. Potential LD is normally ignored and it is important to verify that the resulting calculations are not biased. Even if linkage does not influence results for regular paternity cases, it may have substantial impact on likelihood ratios involving other, more extended pedigrees. Models for LD influence likelihoods for all pedigrees to some degree and an initial estimate of the impact of ignoring LD and/or linkage is desirable, going beyond mere rules of thumb based on marker distance. Furthermore, we show how one can readily include a mutation model in the Bayesian Network; extending other programs or formulas to include such models may require considerable amounts of work and will in many case not be practical. As an example, we consider the two STR markers vWa and D12S391. We estimate probabilities for population haplotypes to account for LD using a method based on data from trios, while an estimate for the degree of linkage is taken from the literature. The results show that accounting for haplotype frequencies is unnecessary in most cases for this specific pair of markers. When doing calculations on regular paternity cases, the markers can be considered statistically independent. In more complex cases of disputed relatedness, for instance cases involving siblings or so-called deficient cases, or when small differences in the LR matter, independence should not be assumed. (The networks are freely available at http://arken.umb.no/~dakl/BayesianNetworks.).",2012-09-11 +23089793,Shack-Hartmann centroid detection using the spiral phase transform.,"We present a Shack-Hartmann (SH) centroid detection algorithm capable to measure in presence of strong noise, background illumination and spot modulating signals, which are typical limiting factors of traditional centroid detection algorithms. The proposed method is based on performing a normalization of the SH pattern using the spiral phase transform method and Fourier filtering. The spot centroids are then obtained using global thresholding and weighted average methods. We have tested the algorithm with simulations and experimental data obtaining satisfactory results. A complete MATLAB package that can reproduce all the results can be downloaded from [http://goo.gl/o2JhD].",2012-10-01 +21515453,[Design and realization of a microarray data analysis platform].,"

Objective

To design a platform for microarray data analysis and processing in the browser/server mode running in Linux operating system.

Methods

Based on the Apache HTTP server, the platform, programmed with Perl language, integrated R language and Bioconductor packages for processing and analysis of the input data of oligonucleotide arrays and two-color spotted arrays. Users were allowed to submit data and parameter configurations to the platform via the web page, and the results of analysis were also returned via the web page.

Results

With an easy operation and high performance, the platform fulfilled the functions of processing, quality assessment, biological annotation and statistical analysis of the data from oligonucleotide arrays and two-color spotted arrays. Using the platform, we analyzed the gene expression profiles in Mtb-stimulated macrophages of three clinical phenotypes, namely latent TB (LTB), pulmonary (PTB) and meningeal (TBM), and obtained valuable clues for identifying tuberculosis susceptibility genes. We also analyzed the effect of INH treatment on Mycobacterium tuberculosis gene expression in various dormancy models, such as hypoxia and KatG mutant, and found that a set of genes responded to INH treatment during exponential growth but not in dormancy, and that the overall number of differentially regulated genes was reduced in the cells in low metabolic state.

Conclusion

The platform we have constructed integrates comprehensive resources, and with a user-friendly interface, allows direct result visualization to facilitate microarray data analysis.",2011-04-01 +23969135,DNorm: disease name normalization with pairwise learning to rank.,"

Motivation

Despite the central role of diseases in biomedical research, there have been much fewer attempts to automatically determine which diseases are mentioned in a text-the task of disease name normalization (DNorm)-compared with other normalization tasks in biomedical text mining research.

Methods

In this article we introduce the first machine learning approach for DNorm, using the NCBI disease corpus and the MEDIC vocabulary, which combines MeSH® and OMIM. Our method is a high-performing and mathematically principled framework for learning similarities between mentions and concept names directly from training data. The technique is based on pairwise learning to rank, which has not previously been applied to the normalization task but has proven successful in large optimization problems for information retrieval.

Results

We compare our method with several techniques based on lexical normalization and matching, MetaMap and Lucene. Our algorithm achieves 0.782 micro-averaged F-measure and 0.809 macro-averaged F-measure, an increase over the highest performing baseline method of 0.121 and 0.098, respectively.

Availability

The source code for DNorm is available at http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/DNorm, along with a web-based demonstration and links to the NCBI disease corpus. Results on PubMed abstracts are available in PubTator: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator .",2013-08-21 +22689641,PredyFlexy: flexibility and local structure prediction from sequence.,"Protein structures are necessary for understanding protein function at a molecular level. Dynamics and flexibility of protein structures are also key elements of protein function. So, we have proposed to look at protein flexibility using novel methods: (i) using a structural alphabet and (ii) combining classical X-ray B-factor data and molecular dynamics simulations. First, we established a library composed of structural prototypes (LSPs) to describe protein structure by a limited set of recurring local structures. We developed a prediction method that proposes structural candidates in terms of LSPs and predict protein flexibility along a given sequence. Second, we examine flexibility according to two different descriptors: X-ray B-factors considered as good indicators of flexibility and the root mean square fluctuations, based on molecular dynamics simulations. We then define three flexibility classes and propose a method based on the LSP prediction method for predicting flexibility along the sequence. This method does not resort to sophisticate learning of flexibility but predicts flexibility from average flexibility of predicted local structures. The method is implemented in PredyFlexy web server. Results are similar to those obtained with the most recent, cutting-edge methods based on direct learning of flexibility data conducted with sophisticated algorithms. PredyFlexy can be accessed at http://www.dsimb.inserm.fr/dsimb_tools/predyflexy/.",2012-06-11 +21685085,Systematic exploration of error sources in pyrosequencing flowgram data.,"

Motivation

454 pyrosequencing, by Roche Diagnostics, has emerged as an alternative to Sanger sequencing when it comes to read lengths, performance and cost, but shows higher per-base error rates. Although there are several tools available for noise removal, targeting different application fields, data interpretation would benefit from a better understanding of the different error types.

Results

By exploring 454 raw data, we quantify to what extent different factors account for sequencing errors. In addition to the well-known homopolymer length inaccuracies, we have identified errors likely to originate from other stages of the sequencing process. We use our findings to extend the flowsim pipeline with functionalities to simulate these errors, and thus enable a more realistic simulation of 454 pyrosequencing data with flowsim.

Availability

The flowsim pipeline is freely available under the General Public License from http://biohaskell.org/Applications/FlowSim.

Contact

susanne.balzer@imr.no.",2011-07-01 +23297336,Reduced-dose low-voltage chest CT angiography with Sinogram-affirmed iterative reconstruction versus standard-dose filtered back projection.,"

Purpose

To evaluate image quality of low-voltage chest computed tomographic (CT) angiography with raw data-based iterative reconstruction (sonogram-affirmed iterative reconstruction) in comparison with image quality of standard-dose standard-voltage filtered back projection (FBP) CT.

Materials and methods

This prospective study was approved by the institutional review board, and the informed consent requirement was waived. Eighty consecutive patients who were referred for follow-up chest CT angiography underwent reduced-dose CT (hereafter, T2 examination) under technical conditions similar to those of the initial examination (hereafter, T1 examination), except the voltage selection was reduced by 20 kV with adaptation of the tube current to ensure a 50% reduction in CT dose index, and regular FBP was replaced by iterative reconstruction with sonogram-affirmed iterative reconstruction. The two techniques were compared by using paired tests (Student t test, Wilcoxon test, or McNemar test, according to the nature of variables).

Results

When compared with standard-dose T1 studies, reduced-dose T2 images showed: (a) significantly less objective noise at the level of the trachea on mediastinal and lung parenchymal images (P < .001) and no significant difference in objective noise at the level of the aorta on mediastinal images (P = .507); (b) significantly higher signal-to-noise and contrast-to-noise (P < .001) ratios; (c) similar visual perception of noise on mediastinal (P = .132) and lung (P = .366) images, mainly rated as moderate; and (d) similar overall subjective image quality (P = .405).

Conclusion

Raw data-based iterative reconstruction yielded equivalent subjective and improved objective image quality of low-voltage half-dose CT angiograms compared with standard-dose FBP CT images for an average dose-length product of less than 80 mGy · cm in this population.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120414/-/DC1.",2013-01-07 +22955849,The collagen prolyl hydroxylases are novel transcriptionally silenced genes in lymphoma.,"

Background

Prolyl hydroxylation is a post-translational modification that affects the structure, stability and function of proteins including collagen by catalysing hydroxylation of proline to hydroxyproline through action of collagen prolyl hydroxylases3 (C-P3H) and 4 (C-P4H). Three C-P3Hs (nomenclature was amended according to approval by the HGNC symbols and names at http://www.genenames.org/ and Entrez database at http://www.ncbi.nlm.nih.gov/gene) leucineproline-enriched proteoglycan (leprecan) 1 (Lepre1), leprecan-like 1 (Leprel1), leprecan-like 2 (Leprel2) and two paralogs Cartilage-Related Protein (CRTAP) and leprecan-like 4 (Leprel4) are found in humans. The C-P4Hs are tetrameric proteins comprising a variable α subunit, encoded by the P4HA1, P4HA2 and P4HA3 genes and a constant β subunit encoded by P4HB.

Methods

We used RT-PCR, qPCR, pyrosequencing, methylation-specific PCR, western blotting and immunohistochemistry to investigate expression and regulation of the C-P3H and C-P4H genes in B lymphomas and normal bone marrow.

Results

C-P3H and C-P4H are downregulated in lymphoma. Down-regulation is associated with methylation in the CpG islands and is detected in almost all common types of B-cell lymphoma, but the CpG islands are unmethylated or methylated at lower levels in DNA isolated from normal bone marrow and lymphoblastoid cell lines. Methylation of multiple C-P3H and C-P4H genes is present in some lymphomas, particularly Burkitt's lymphoma.

Conclusions

Methylation of C-P3H and C-P4H is common in B lymphomas and may have utility in differentiating disease subtypes.",2012-09-06 +21592079,3dswap-pred: prediction of 3D domain swapping from protein sequence using Random Forest approach.,"3D domain swapping is a protein structural phenomenon that mediates the formation of the higher order oligomers in a variety of proteins with different structural and functional properties. 3D domain swapping is associated with a variety of biological functions ranging from oligomerization to pathological conformational diseases. 3D domain swapping is realised subsequent to structure determination where the protein is observed in the swapped conformation in the oligomeric state. This is a limiting step to understand this important structural phenomenon in a large scale from the growing sequence data. A new machine learning approach, 3dswap-pred, has been developed for the prediction of 3D domain swapping in protein structures from mere sequence data using the Random Forest approach. 3Dswap-pred is implemented using a positive sequence dataset derived from literature based structural curation of 297 structures. A negative sequence dataset is obtained from 462 SCOP domains using a new sequence data mining approach and a set of 126 sequencederived features. Statistical validation using an independent dataset of 68 positive sequences and 313 negative sequences revealed that 3dswap-pred achieved an accuracy of 63.8%. A webserver is also implemented using the 3dswap-pred Random Forest model. The server is available from the URL: http://caps.ncbs.res.in/3dswap-pred.",2011-10-01 +23083219,dsPIG: a tool to predict imprinted genes from the deep sequencing of whole transcriptomes.,"

Background

Dysregulation of imprinted genes, which are expressed in a parent-of-origin-specific manner, plays an important role in various human diseases, such as cancer and behavioral disorder. To date, however, fewer than 100 imprinted genes have been identified in the human genome. The recent availability of high-throughput technology makes it possible to have large-scale prediction of imprinted genes. Here we propose a Bayesian model (dsPIG) to predict imprinted genes on the basis of allelic expression observed in mRNA-Seq data of independent human tissues.

Results

Our model (dsPIG) was capable of identifying imprinted genes with high sensitivity and specificity and a low false discovery rate when the number of sequenced tissue samples was fairly large, according to simulations. By applying dsPIG to the mRNA-Seq data, we predicted 94 imprinted genes in 20 cerebellum samples and 57 imprinted genes in 9 diverse tissue samples with expected low false discovery rates. We also assessed dsPIG using previously validated imprinted and non-imprinted genes. With simulations, we further analyzed how imbalanced allelic expression of non-imprinted genes or different minor allele frequencies affected the predictions of dsPIG. Interestingly, we found that, among biallelically expressed genes, at least 18 genes expressed significantly more transcripts from one allele than the other among different individuals and tissues.

Conclusion

With the prevalence of the mRNA-Seq technology, dsPIG has become a useful tool for analysis of allelic expression and large-scale prediction of imprinted genes. For ease of use, we have set up a web service and also provided an R package for dsPIG at http://www.shoudanliang.com/dsPIG/.",2012-10-19 +22973286,Characterization and Prediction of Protein Phosphorylation Hotspots in Arabidopsis thaliana.,"The regulation of protein function by modulating the surface charge status via sequence-locally enriched phosphorylation sites (P-sites) in so called phosphorylation ""hotspots"" has gained increased attention in recent years. We set out to identify P-hotspots in the model plant Arabidopsis thaliana. We analyzed the spacing of experimentally detected P-sites within peptide-covered regions along Arabidopsis protein sequences as available from the PhosPhAt database. Confirming earlier reports (Schweiger and Linial, 2010), we found that, indeed, P-sites tend to cluster and that distributions between serine and threonine P-sites to their respected closest next P-site differ significantly from those for tyrosine P-sites. The ability to predict P-hotspots by applying available computational P-site prediction programs that focus on identifying single P-sites was observed to be severely compromised by the inevitable interference of nearby P-sites. We devised a new approach, named HotSPotter, for the prediction of phosphorylation hotspots. HotSPotter is based primarily on local amino acid compositional preferences rather than sequence position-specific motifs and uses support vector machines as the underlying classification engine. HotSPotter correctly identified experimentally determined phosphorylation hotspots in A. thaliana with high accuracy. Applied to the Arabidopsis proteome, HotSPotter-predicted 13,677 candidate P-hotspots in 9,599 proteins corresponding to 7,847 unique genes. Hotspot containing proteins are involved predominantly in signaling processes confirming the surmised modulating role of hotspots in signaling and interaction events. Our study provides new bioinformatics means to identify phosphorylation hotspots and lays the basis for further investigating novel candidate P-hotspots. All phosphorylation hotspot annotations and predictions have been made available as part of the PhosPhAt database at http://phosphat.mpimp-golm.mpg.de.",2012-09-05 +21672185,Efficient alignment of pyrosequencing reads for re-sequencing applications.,"

Background

Over the past few years, new massively parallel DNA sequencing technologies have emerged. These platforms generate massive amounts of data per run, greatly reducing the cost of DNA sequencing. However, these techniques also raise important computational difficulties mostly due to the huge volume of data produced, but also because of some of their specific characteristics such as read length and sequencing errors. Among the most critical problems is that of efficiently and accurately mapping reads to a reference genome in the context of re-sequencing projects.

Results

We present an efficient method for the local alignment of pyrosequencing reads produced by the GS FLX (454) system against a reference sequence. Our approach explores the characteristics of the data in these re-sequencing applications and uses state of the art indexing techniques combined with a flexible seed-based approach, leading to a fast and accurate algorithm which needs very little user parameterization. An evaluation performed using real and simulated data shows that our proposed method outperforms a number of mainstream tools on the quantity and quality of successful alignments, as well as on the execution time.

Conclusions

The proposed methodology was implemented in a software tool called TAPyR--Tool for the Alignment of Pyrosequencing Reads--which is publicly available from http://www.tapyr.net.",2011-05-16 +23435070,Improvements on bicriteria pairwise sequence alignment: algorithms and applications.,"

Motivation

In this article, we consider the bicriteria pairwise sequence alignment problem and propose extensions of dynamic programming algorithms for several problem variants with a novel pruning technique that efficiently reduces the number of states to be processed. Moreover, we present a method for the construction of phylogenetic trees based on this bicriteria framework. Two exemplary cases are discussed.

Results

Numerical results on a real dataset show that this approach is very fast in practice. The pruning technique saves up to 90% in memory usage and 80% in CPU time. Based on this method, phylogenetic trees are constructed from real-life data. In addition of providing complementary information, some of these trees match those obtained by the Maximum Likelihood method.

Availability and implementation

Source code is freely available for download at URL http://eden.dei.uc.pt/paquete/MOSAL, implemented in C and supported on Linux, MAC OS and MS Windows.",2013-02-23 +22537047,High-resolution genetic mapping with pooled sequencing.,"

Background

Modern genetics has been transformed by high-throughput sequencing. New experimental designs in model organisms involve analyzing many individuals, pooled and sequenced in groups for increased efficiency. However, the uncertainty from pooling and the challenge of noisy sequencing data demand advanced computational methods.

Results

We present MULTIPOOL, a computational method for genetic mapping in model organism crosses that are analyzed by pooled genotyping. Unlike other methods for the analysis of pooled sequence data, we simultaneously consider information from all linked chromosomal markers when estimating the location of a causal variant. Our use of informative sequencing reads is formulated as a discrete dynamic Bayesian network, which we extend with a continuous approximation that allows for rapid inference without a dependence on the pool size. MULTIPOOL generalizes to include biological replicates and case-only or case-control designs for binary and quantitative traits.

Conclusions

Our increased information sharing and principled inclusion of relevant error sources improve resolution and accuracy when compared to existing methods, localizing associations to single genes in several cases. MULTIPOOL is freely available at http://cgs.csail.mit.edu/multipool/.",2012-04-19 +22951932,Comparative genome analysis of three eukaryotic parasites with differing abilities to transform leukocytes reveals key mediators of Theileria-induced leukocyte transformation.,"We sequenced the genome of Theileria orientalis, a tick-borne apicomplexan protozoan parasite of cattle. The focus of this study was a comparative genome analysis of T. orientalis relative to other highly pathogenic Theileria species, T. parva and T. annulata. T. parva and T. annulata induce transformation of infected cells of lymphocyte or macrophage/monocyte lineages; in contrast, T. orientalis does not induce uncontrolled proliferation of infected leukocytes and multiplies predominantly within infected erythrocytes. While synteny across homologous chromosomes of the three Theileria species was found to be well conserved overall, subtelomeric structures were found to differ substantially, as T. orientalis lacks the large tandemly arrayed subtelomere-encoded variable secreted protein-encoding gene family. Moreover, expansion of particular gene families by gene duplication was found in the genomes of the two transforming Theileria species, most notably, the TashAT/TpHN and Tar/Tpr gene families. Gene families that are present only in T. parva and T. annulata and not in T. orientalis, Babesia bovis, or Plasmodium were also identified. Identification of differences between the genome sequences of Theileria species with different abilities to transform and immortalize bovine leukocytes will provide insight into proteins and mechanisms that have evolved to induce and regulate this process. The T. orientalis genome database is available at http://totdb.czc.hokudai.ac.jp/.",2012-09-04 +21389147,MicroRNA transfection and AGO-bound CLIP-seq data sets reveal distinct determinants of miRNA action.,"Microarray expression analyses following miRNA transfection/inhibition and, more recently, Argonaute cross-linked immunoprecipitation (CLIP)-seq assays have been used to detect miRNA target sites. CLIP and expression approaches measure differing stages of miRNA functioning-initial binding of the miRNP complex and subsequent message repression. We use nonparametric predictive models to characterize a large number of known target and flanking features, utilizing miRNA transfection, HITS-CLIP, and PAR-CLIP data. In particular, we utilize the precise spatial information provided by CLIP-seq to analyze the predictive effect of target flanking features. We observe distinct target determinants between expression-based and CLIP-based data. Target flanking features such as flanking region conservation are an important AGO-binding determinant-we hypothesize that CLIP experiments have a preference for strongly bound miRNP-target interactions involving adjacent RNA-binding proteins that increase the strength of cross-linking. In contrast, seed-related features are major determinants in expression-based studies, but less so for CLIP-seq studies, and increased miRNA concentrations typical of transfection studies contribute to this difference. While there is a good overlap between miRNA targets detected by miRNA transfection and CLIP-seq, the detection of CLIP-seq targets is largely independent of the level of subsequent mRNA degradation. Also, models built using CLIP-seq data show strong predictive power between independent CLIP-seq data sets, but are not strongly predictive for expression change. Similarly, models built from expression data are not strongly predictive for CLIP-seq data sets, supporting the finding that the determinants of miRNA binding and mRNA degradation differ. Predictive models and results are available at http://servers.binf.ku.dk/antar/.",2011-03-09 +22765348,Effect of thiazole orange doubly labeled thymidine on DNA duplex formation.,"Nucleic acid oligonucleotides are widely used in hybridization experiments for specific detection of complementary nucleic acid sequences. For design and application of oligonucleotides, an understanding of their thermodynamic properties is essential. Recently, exciton-controlled hybridization-sensitive fluorescent oligonucleotides (ECHOs) were developed as uniquely labeled DNA oligomers containing commonly one thymidine having two covalently linked thiazole orange dye moieties. The fluorescent signal of an ECHO is strictly hybridization-controlled, where the dye moieties have to intercalate into double-stranded DNA for signal generation. Here we analyzed the hybridization thermodynamics of ECHO/DNA duplexes, and thermodynamic parameters were obtained from melting curves of 64 ECHO/DNA duplexes measured by ultraviolet absorbance and fluorescence. Both methods demonstrated a substantial increase in duplex stability (ΔΔG°(37) ~ -2.6 ± 0.7 kcal mol(-1)) compared to that of DNA/DNA duplexes of the same sequence. With the exception of T·G mismatches, this increased stability was mostly unaffected by other mismatches in the position opposite the labeled nucleotide. A nearest neighbor model was constructed for predicting thermodynamic parameters for duplex stability. Evaluation of the nearest neighbor parameters by cross validation tests showed higher predictive reliability for the fluorescence-based than the absorbance-based parameters. Using our experimental data, a tool for predicting the thermodynamics of formation of ECHO/DNA duplexes was developed that is freely available at http://genome.gsc.riken.jp/echo/thermodynamics/. It provides reliable thermodynamic data for using the unique features of ECHOs in fluorescence-based experiments.",2012-07-25 +22945787,Updating annotations with the distributed annotation system and the automated sequence annotation pipeline.,"

Summary

The integration between BioDAS ProServer and Automated Sequence Annotation Pipeline (ASAP) provides an interface for querying diverse annotation sources, chaining and linking results, and standardizing the output using the Distributed Annotation System (DAS) protocol. This interface allows pipeline plans in ASAP to be integrated into any system using HTTP and also allows the information returned by ASAP to be included in the DAS registry for use in any DAS-aware system. Three example implementations have been developed: the first accesses TRANSFAC information to automatically create gene sets for the Coordinated Gene Activity in Pattern Sets (CoGAPS) algorithm; the second integrates annotations from multiple array platforms and provides unified annotations in an R environment; and the third wraps the UniProt database for integration with the SPICE DAS client.

Availability

Source code for ASAP 2.7 and the DAS 1.6 interface is available under the GNU public license. Proserver 2.20 is free software available from SourceForge. Scripts for installation and configuration on Linux are provided at our website: http://www.rits.onc.jhmi.edu/dbb/custom/A6/",2012-09-03 +24298272,Models of somatic hypermutation targeting and substitution based on synonymous mutations from high-throughput immunoglobulin sequencing data.,"Analyses of somatic hypermutation (SHM) patterns in B cell immunoglobulin (Ig) sequences contribute to our basic understanding of adaptive immunity, and have broad applications not only for understanding the immune response to pathogens, but also to determining the role of SHM in autoimmunity and B cell cancers. Although stochastic, SHM displays intrinsic biases that can confound statistical analysis, especially when combined with the particular codon usage and base composition in Ig sequences. Analysis of B cell clonal expansion, diversification, and selection processes thus critically depends on an accurate background model for SHM micro-sequence targeting (i.e., hot/cold-spots) and nucleotide substitution. Existing models are based on small numbers of sequences/mutations, in part because they depend on data from non-coding regions or non-functional sequences to remove the confounding influences of selection. Here, we combine high-throughput Ig sequencing with new computational analysis methods to produce improved models of SHM targeting and substitution that are based only on synonymous mutations, and are thus independent of selection. The resulting ""S5F"" models are based on 806,860 Synonymous mutations in 5-mer motifs from 1,145,182 Functional sequences and account for dependencies on the adjacent four nucleotides (two bases upstream and downstream of the mutation). The estimated profiles can explain almost half of the variance in observed mutation patterns, and clearly show that both mutation targeting and substitution are significantly influenced by neighboring bases. While mutability and substitution profiles were highly conserved across individuals, the variability across motifs was found to be much larger than previously estimated. The model and method source code are made available at http://clip.med.yale.edu/SHM.",2013-11-15 +22646700,Transcriptome of the adult female malaria mosquito vector Anopheles albimanus.,"

Background

Human Malaria is transmitted by mosquitoes of the genus Anopheles. Transmission is a complex phenomenon involving biological and environmental factors of humans, parasites and mosquitoes. Among more than 500 anopheline species, only a few species from different branches of the mosquito evolutionary tree transmit malaria, suggesting that their vectorial capacity has evolved independently. Anopheles albimanus (subgenus Nyssorhynchus) is an important malaria vector in the Americas. The divergence time between Anopheles gambiae, the main malaria vector in Africa, and the Neotropical vectors has been estimated to be 100 My. To better understand the biological basis of malaria transmission and to develop novel and effective means of vector control, there is a need to explore the mosquito biology beyond the An. gambiae complex.

Results

We sequenced the transcriptome of the An. albimanus adult female. By combining Sanger, 454 and Illumina sequences from cDNA libraries derived from the midgut, cuticular fat body, dorsal vessel, salivary gland and whole body, we generated a single, high-quality assembly containing 16,669 transcripts, 92% of which mapped to the An. darlingi genome and covered 90% of the core eukaryotic genome. Bidirectional comparisons between the An. gambiae, An. darlingi and An. albimanus predicted proteomes allowed the identification of 3,772 putative orthologs. More than half of the transcripts had a match to proteins in other insect vectors and had an InterPro annotation. We identified several protein families that may be relevant to the study of Plasmodium-mosquito interaction. An open source transcript annotation browser called GDAV (Genome-Delinked Annotation Viewer) was developed to facilitate public access to the data generated by this and future transcriptome projects.

Conclusions

We have explored the adult female transcriptome of one important New World malaria vector, An. albimanus. We identified protein-coding transcripts involved in biological processes that may be relevant to the Plasmodium lifecycle and can serve as the starting point for searching targets for novel control strategies. Our data increase the available genomic information regarding An. albimanus several hundred-fold, and will facilitate molecular research in medical entomology, evolutionary biology, genomics and proteomics of anopheline mosquito vectors. The data reported in this manuscript is accessible to the community via the VectorBase website (http://www.vectorbase.org/Other/AdditionalOrganisms/).",2012-05-30 +23977285,Geptop: a gene essentiality prediction tool for sequenced bacterial genomes based on orthology and phylogeny.,"Integrative genomics predictors, which score highly in predicting bacterial essential genes, would be unfeasible in most species because the data sources are limited. We developed a universal approach and tool designated Geptop, based on orthology and phylogeny, to offer gene essentiality annotations. In a series of tests, our Geptop method yielded higher area under curve (AUC) scores in the receiver operating curves than the integrative approaches. In the ten-fold cross-validations among randomly upset samples, Geptop yielded an AUC of 0.918, and in the cross-organism predictions for 19 organisms Geptop yielded AUC scores between 0.569 and 0.959. A test applied to the very recently determined essential gene dataset from the Porphyromonas gingivalis, which belongs to a phylum different with all of the above 19 bacterial genomes, gave an AUC of 0.77. Therefore, Geptop can be applied to any bacterial species whose genome has been sequenced. Compared with the essential genes uniquely identified by the lethal screening, the essential genes predicted only by Gepop are associated with more protein-protein interactions, especially in the three bacteria with lower AUC scores (<0.7). This may further illustrate the reliability and feasibility of our method in some sense. The web server and standalone version of Geptop are available at http://cefg.uestc.edu.cn/geptop/ free of charge. The tool has been run on 968 bacterial genomes and the results are accessible at the website.",2013-08-15 +22848493,"qPMS7: a fast algorithm for finding (ℓ, d)-motifs in DNA and protein sequences.","Detection of rare events happening in a set of DNA/protein sequences could lead to new biological discoveries. One kind of such rare events is the presence of patterns called motifs in DNA/protein sequences. Finding motifs is a challenging problem since the general version of motif search has been proven to be intractable. Motifs discovery is an important problem in biology. For example, it is useful in the detection of transcription factor binding sites and transcriptional regulatory elements that are very crucial in understanding gene function, human disease, drug design, etc. Many versions of the motif search problem have been proposed in the literature. One such is the (ℓ, d)-motif search (or Planted Motif Search (PMS)). A generalized version of the PMS problem, namely, Quorum Planted Motif Search (qPMS), is shown to accurately model motifs in real data. However, solving the qPMS problem is an extremely difficult task because a special case of it, the PMS Problem, is already NP-hard, which means that any algorithm solving it can be expected to take exponential time in the worse case scenario. In this paper, we propose a novel algorithm named qPMS7 that tackles the qPMS problem on real data as well as challenging instances. Experimental results show that our Algorithm qPMS7 is on an average 5 times faster than the state-of-art algorithm. The executable program of Algorithm qPMS7 is freely available on the web at http://pms.engr.uconn.edu/downloads/qPMS7.zip. Our online motif discovery tools that use Algorithm qPMS7 are freely available at http://pms.engr.uconn.edu or http://motifsearch.com.",2012-07-24 +23873892,Testing for presence of known and unknown molecules in imaging mass spectrometry.,"

Motivation

Imaging mass spectrometry has emerged in the past decade as a label-free, spatially resolved and multi-purpose bioanalytical technique for direct analysis of biological samples. However, solving two everyday data analysis problems still requires expert judgment: (i) the detection of unknown molecules and (ii) the testing for presence of known molecules.

Results

We developed a measure of spatial chaos of a molecular image corresponding to a mass-to-charge value, which is a proxy for the molecular presence, and developed methods solving considered problems. The statistical evaluation was performed on a dataset from a rat brain section with test sets of molecular images selected by an expert. The measure of spatial chaos has shown high agreement with expert judges. The method for detection of unknown molecules allowed us to find structured molecular images corresponding to spectral peaks of any low intensity. The test for presence applied to a list of endogenous peptides ranked them according to the proposed measure of their presence in the sample.

Availability

The source code and test sets of mass-to-charge images are available at http://www.math.uni-bremen.de/∼theodore.

Supplementary information

Supplementary materials are available at Bioinformatics online.

Contact

theodore@uni-bremen.de.",2013-07-19 +22993350,A review of dose-dense temozolomide alone and in combination with bevacizumab in patients with first relapse of glioblastoma.,"Treatment of patients with glioblastoma improved dramatically when concomitant and adjuvant temozolomide was added to external radiation therapy. The addition of this new treatment schedule as well as the improvements in individually-tailored radiation treatment, has resulted in a larger proportion of patients being fit for further treatment after first relapse. One of the most interesting combinations that have started to become part of the therapeutic arsenal in the daily clinic is dose-dense temozolomide in combination with bevacizumab. We reviewed and compiled the literature concerning the present topic based on a search of the PubMed database (http://www.ncbi.nlm.nih.gov/pubmed/) for the years between 1995 and 2011. The clinical studies that have been performed are small and divergent, making it difficult to grade the scientific evidence for the combinatorial treatment of dose-dense temozolomide and bevacizumab. However, the available studies and the experience we have at our departments suggest that this combination is of interest for glioblastoma patients experiencing first relapse. More randomized clinical trials are needed in order to establish the standard of treatment at first relapse in patients with glioblastoma.",2012-09-01 +30727187,First Report of Colletotrichum acutatum sensu lato Causing Leaf Curling and Petiole Anthracnose on Celery (Apium graveolens) in Michigan.,"In September 2010, celery plants with leaf cupping and petiole twisting were observed in commercial production fields located in Barry, Kent, Newago, and Van Buren Counties in Michigan. Long, elliptical lesions were observed on petioles but signs (mycelia, conidia, or acervuli) were not readily observed. Celery petioles were incubated in humid chambers (acrylic boxes with wet paper towels). After 24 h, conidia corresponding to the genus Colletotrichum were observed. Isolations were performed by excising pieces of celery tissue from the lesion margin and placing them on potato dextrose agar (PDA) amended with 30 ppm of rifampicin and 100 ppm of ampicillin. Plates were incubated at 21 ± 2°C under fluorescent light for 5 days. Fungal colony morphology was gray with salmon-colored masses of spores when viewed from above, and carmine when viewed from below. Isolates were single-spored and placed on 30% glycerol in -20°C, and cryoconservation media (20% glycerol, 0.04% yeast extract, 0.1% malt extract, 0.04% glucose, 0.02% K2HPO4) at -80°C. Conidia were 8.5 to 12.0 × 2.8 to 4.0 μm and straight fusiform in shape. Three isolates were confirmed as C. acutatum sensu lato based on sequences of the internal transcribed spacer (ITS) region of the nuclear ribosomal RNA and the 1-kb intron of the glutamine synthase gene (3), both with 100% similarity with Glomerella acutata sequences. Sequences were submitted to GenBank (Accession Nos. JQ951599 and JQ951600 for ITS and GS, respectively). Additionally, C. acutatum specific primer CaIntg was used in combination with the primer ITS4 on 54 isolates from symptomatic celery plants, obtaining the expected 490-pb fragment (1). Koch's postulates were completed by inoculating 4-week-old celery seedlings of cultivars Sabroso, Green Bay, and Dutchess using three plants per cultivar. Prior to inoculation, seedlings were incubated for 16 h in high relative humidity (≥95%) by enclosing the plants in humid chambers. Seven-day-old C. acutatum s. l. colonies were used to prepare the inoculum. Seedlings were spray-inoculated with a C. acutatum s. l. conidial suspension of 1 × 106 conidia/ml in double-distilled water plus Tween 0.01%. Two control seedlings per cultivar were sprayed with sterile, double-distilled water plus 0.01% Tween. Plants were enclosed in bags for 96 h post inoculation and incubated in a greenhouse at 27°C by day/20°C by night with a 16-h photoperiod. Leaf curling was observed on all inoculated plants of the three cultivars 4 days after inoculation (DAI). Petiole lesions were observed 14 to 21 DAI. Conidia were observed in lesions after incubation in high humidity at 21 ± 2°C for 24 to 72 h. Symptomatic tissue was excised and cultured onto PDA and resulted in C. acutatum colonies. Control plants remained symptomless. C. acutatum (4) and C. orbiculare (2) were reported to cause celery leaf curl in Australia in 1966 (2,4). To our knowledge, this is the first report of C. acutatum s. l. infecting celery in Michigan. References: (1) A. E. Brown et al. Phytopathology 86:523, 1996. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Syst. Mycol. Microbiol. Lab., USDA-ARS. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 10 September 2010. (3) J. C. Guerber et al. Mycologia 95:872, 2003. (4) D. G. Wright and J. B. Heaton. Austral. Plant Pathol. 20:155, 1991.",2012-09-01 +30727180,First Report of Leaf Spot on Tibouchina semidecandra Caused by Beltrania rhombica in China.,"Tibouchina semidecandra Cogn. is a popular ornamental plant in tropical and subtropical areas (1). In August 2011, a leaf spot was observed on approximately 70% of 5,000 potted plants of T. semidecandra in a nursery in Zhongshan, Guangdong Province, China. Each leaf spot was round with a brown center surrounded by a reddish brown border, and ranged from 8 to 10 mm in diameter. A fungus was isolated consistently from the lesions by surface-sterilizing symptomatic leaf sections (each 3 cm2) with 75% alcohol for 8 s, washing the sections with sterile water, soaking the sections in 3% NaOCl for 15 s, rinsing the sections with sterile water three times, and then placing the sections on potato dextrose agar (PDA) at 28°C. Each of three single-spore isolates on PDA produced gray, floccose colonies that reached 70 mm in diameter after 5 days at 28°C. Setae were dark brown, straight, erect, distantly and inconspicuously septate, and 125 to 193 × 3.0 to 4.5 μm. Conidiophores were light brown, cylindrical, simple or sometimes branched at the base, and 105 to 202 × 3 to 5 μm. Separating cells were hyaline, oval, and 12 to 13 × 4 to 5 μm. Conidia were unequally biconic, unicellular, dark brown with a pale brown or subhyaline band just above the widest part, and 26 to 31 × 8.5 to 12 μm (mean 27.3 × 10.6 μm) with a conspicuous appendage at the apex that was 6 to 14 × 1 to 1.8 μm. These characteristics were consistent with the description of Beltrania rhombica Penz. (3). The internal transcribed spacer (ITS) region of the ribosomal DNA (rDNA) of one isolate (GenBank Accession No. JN853777) was amplified using primers ITS4 and ITS5 (4) and sequenced. A BLAST search in GenBank revealed 97% similarity to the ITS sequence of an isolate of B. rhombica (GU797390.1). To confirm pathogenicity of the isolate, ten detached leaves from 3-month-old plants of T. semidecandra 'Purple Glorybush' were inoculated in vitro with 5-mm diameter, colonized mycelial plugs from the periphery of 5-day-old cultures of the isolated fungus. The agar plugs were put on the leaf surface and secured with sterile, moist cotton. Sterile PDA plugs were similarly used as the control treatment on ten detached leaves. Leaves were placed in petri dishes and incubated in a growth chamber with 12 h of light/day at 28°C. Necrotic lesions appeared on leaves after 2 to 3 days of incubation, whereas control leaves inoculated with sterile PDA plugs remained asymptomatic. B. rhombica was consistently reisolated from the lesions using the same method described above, but was not reisolated from the control leaves. Although there are approximately 77 reported hosts of B. rhombica (2), to our knowledge, this is the first report of B. rhombica causing a leaf spot on T. semidecandra. Because the disease caused foliar damage and reduced the ornamental value of the nursery plants, control measures may need to be implemented for this species in nurseries. References: (1) M. Faravani and B. H. Bakar. J. Food Agric. Env. Pap. 5:234, 2007. (4) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 30 Mar. 2012. (2) K. A. Pirozyski and S. D. Patil. Can. J. Bot. Pap. 48:567, 1970. (3) T. J. White et al. Page 315 in: PCR Protocols: A Guide to Methods and Applications. M. A. Innis et al., eds. Academic Press, San Diego, CA, 1990.",2012-09-01 +22881376,Low-density lipoprotein receptor gene familial hypercholesterolemia variant database: update and pathological assessment.,"Familial hypercholesterolemia (FH) is caused predominately by variants in the low-density lipoprotein receptor gene (LDLR). We report here an update of the UCL LDLR variant database to include variants reported in the literature and in-house between 2008 and 2010, transfer of the database to LOVDv.2.0 platform (https://grenada.lumc.nl/LOVD2/UCL-Heart/home.php?select_db=LDLR) and pathogenicity analysis. The database now contains over 1288 different variants reported in FH patients: 55% exonic substitutions, 22% exonic small rearrangements (<100 bp), 11% large rearrangements (>100 bp), 2% promoter variants, 10% intronic variants and 1 variant in the 3' untranslated sequence. The distribution and type of newly reported variants closely matches that of the 2008 database, and we have used these variants (n= 223) as a representative sample to assess the utility of standard open access software (PolyPhen, SIFT, refined SIFT, Neural Network Splice Site Prediction Tool, SplicePort and NetGene2) and additional analyses (Single Amino Acid Polymorphism database, analysis of conservation and structure and Mutation Taster) for pathogenicity prediction. In combination, these techniques have enabled us to assign with confidence pathogenic predictions to 8/8 in-frame small rearrangements and 8/9 missense substitutions with previously discordant results from PolyPhen and SIFT analysis. Overall, we conclude that 79% of the reported variants are likely to be disease causing.",2012-09-01 +23934896,An automated approach to network features of protein structure ensembles.,"Network theory applied to protein structures provides insights into numerous problems of biological relevance. The explosion in structural data available from PDB and simulations establishes a need to introduce a standalone-efficient program that assembles network concepts/parameters under one hood in an automated manner. Herein, we discuss the development/application of an exhaustive, user-friendly, standalone program package named PSN-Ensemble, which can handle structural ensembles generated through molecular dynamics (MD) simulation/NMR studies or from multiple X-ray structures. The novelty in network construction lies in the explicit consideration of side-chain interactions among amino acids. The program evaluates network parameters dealing with topological organization and long-range allosteric communication. The introduction of a flexible weighing scheme in terms of residue pairwise cross-correlation/interaction energy in PSN-Ensemble brings in dynamical/chemical knowledge into the network representation. Also, the results are mapped on a graphical display of the structure, allowing an easy access of network analysis to a general biological community. The potential of PSN-Ensemble toward examining structural ensemble is exemplified using MD trajectories of an ubiquitin-conjugating enzyme (UbcH5b). Furthermore, insights derived from network parameters evaluated using PSN-Ensemble for single-static structures of active/inactive states of β2-adrenergic receptor and the ternary tRNA complexes of tyrosyl tRNA synthetases (from organisms across kingdoms) are discussed. PSN-Ensemble is freely available from http://vishgraph.mbu.iisc.ernet.in/PSN-Ensemble/psn_index.html.",2013-10-01 +30727193,First Report of Leaf Spot and Necrotic Roots on Switchgrass Caused by Curvularia lunata var. aeria in the United States.,"Curvularia lunata infects many grass species; however, switchgrass (Panicum virgatum L.) has not been reported as a host (2). In June 2009, small brown leaf spots and necrotic roots were observed on stunted 2-year-old 'Alamo' switchgrass on the University of Tennessee, Knoxville campus. Symptomatic leaf and root tissues were surface-sterilized in 95% ethanol for 1 min, 20% bleach for 3 min, and 95% ethanol for 1 min, and then air dried and placed on water agar amended with 10 mg/liter rifampicin (Sigma-Aldrich, St. Louis, MO) and 7.5 μl/liter Danitol (Valent Chemical, Walnut Creek, CA). Cultures were incubated at 25°C for 3 days. Hyphal tips were transferred to potato dextrose agar (PDA) and incubated at 25°C. Dark brown-to-black fungal colonies with black stromata formed. Conidiophores were dark brown, unbranched, septate, polytretic, sympodial, and geniculate at the apical region with rachis conidial ontogeny. Conidia were dark brown and cymbiform with three to four septations, with one or two central cells larger than the terminal cells. Spore size ranged from 17.5 to 30.0 × 8.8 to 12.5 μm (mean 21.6 × 10.8 μm). Morphological traits matched the description of C. lunata var. aeria (1). To test pathogenicity, fungal sporulation was optimized on PDA with pieces of sterile, moistened index card placed on each plate; cultures were incubated at 25°C with a 12-h photoperiod (3). After 14 days, conidia were dislodged in sterile water and the spore concentration adjusted to 8 × 104 conidia/ml. Ten pots, with about 15 plants per pot, of 6-week-old 'Alamo' switchgrass grown from surface-sterilized seed were inoculated with the spore suspension applied to the plant crown and surrounding soil with an aerosol sprayer. Prior to inoculation, roots were wounded with a sterile scalpel. Noninoculated plants (two pots), with roots also wounded, served as controls. To maintain high humidity, each pot was covered with a plastic bag and maintained in a growth chamber at 30°C with a 16-h photoperiod. Bags were removed after 3 days; plants were maintained as described for 6 weeks. Brown leaf spots and brown-to-black necrotic roots that matched symptoms on the naturally infected plants were observed in all inoculated plants; there were no symptoms of Curvularia infection on the controls. The fungus was reisolated from inoculated plants as described above. Genomic DNA was extracted from the original isolate and the reisolate from the pathogenicity test. PCR amplification of the internal transcribed spacer (ITS) regions from ribosomal DNA was performed with primers ITS4 and ITS5. PCR products of 503 bp were sequenced. There was 100% nucleotide identity for sequences of the original isolate and the re-isolate. The sequence was submitted to GenBank (Accession No. HQ130484.1). BLAST analysis of the fungal sequence resulted in 100% nucleotide sequence identity to the ITS sequences of isolates of C. affinis, C. geniculata, and C. lunata. On the basis of the smaller spore size and abundant stromata on PDA, the isolate was identified as C. lunata var. aeria. As switchgrass is developed as a biofuels crop, identification of new pathogens may warrant development of disease management strategies. References: (1) M. B. Ellis. Mycological Papers No. 106, CMI, Surrey, 1966. (2) D. F. Farr and A. Y. Rossman, Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , August 2011. (3) R. G. Pratt. Mycopathologia 162:133, 2006.",2012-09-01 +22941663,Exploring functional variant discovery in non-coding regions with SInBaD.,"The thousand genomes project and many similar ongoing large-scale sequencing efforts require new methods to predict functional variants in both coding and non-coding regions in order to understand phenotype and genotype relationships. We report the design of a new model SInBaD (Sequence-Information-Based-Decision-model) which relies on nucleotide conservation information to evaluate any annotated human variant in all known exons, introns, splice junctions and promoter regions. SInBaD builds separate mathematical models for promoters, exons and introns, using the human disease mutations annotated in human gene mutation database as the training dataset for functional variants. The ten-fold cross validation shows high prediction accuracy. Validations on test datasets, demonstrate that variants predicted as functional have a significantly higher occurrence in cancer patients. We also applied our model to variants found in four different individual human genomes to identify a set of functional variants, which might be of interest for further studies. Scores for any possible variants for all annotated genes are available under http://tingchenlab.cmb.usc.edu/sinbad/. SInBaD supports the current standard format of genotyping, the variant call files (VCF 4.0), making it easy to integrate it into any existing next-generation sequencing pipeline. The accuracy of SNP detection poses the only limitation to the use of SInBaD.",2012-08-31 +21685100,Piecewise linear approximation of protein structures using the principle of minimum message length.,"

Unlabelled

Simple and concise representations of protein-folding patterns provide powerful abstractions for visualizations, comparisons, classifications, searching and aligning structural data. Structures are often abstracted by replacing standard secondary structural features-that is, helices and strands of sheet-by vectors or linear segments. Relying solely on standard secondary structure may result in a significant loss of structural information. Further, traditional methods of simplification crucially depend on the consistency and accuracy of external methods to assign secondary structures to protein coordinate data. Although many methods exist automatically to identify secondary structure, the impreciseness of definitions, along with errors and inconsistencies in experimental structure data, drastically limit their applicability to generate reliable simplified representations, especially for structural comparison. This article introduces a mathematically rigorous algorithm to delineate protein structure using the elegant statistical and inductive inference framework of minimum message length (MML). Our method generates consistent and statistically robust piecewise linear explanations of protein coordinate data, resulting in a powerful and concise representation of the structure. The delineation is completely independent of the approaches of using hydrogen-bonding patterns or inspecting local substructural geometry that the current methods use. Indeed, as is common with applications of the MML criterion, this method is free of parameters and thresholds, in striking contrast to the existing programs which are often beset by them. The analysis of results over a large number of proteins suggests that the method produces consistent delineation of structures that encompasses, among others, the segments corresponding to standard secondary structure.

Availability

http://www.csse.monash.edu.au/~karun/pmml.",2011-07-01 +22940741,"4-Liter split-dose polyethylene glycol is superior to other bowel preparations, based on systematic review and meta-analysis.","

Background & aims

Adequate bowel cleansing is an important determinant of the efficacy of screening colonoscopy. Polyethylene glycol (PEG)-based solutions are used commonly in bowel preparation, but their poor palatability and large volumes (4 L) influence compliance. Adjunct therapies, such as bisacodyl, split-dose regimens, and lower-volume regimens have been tested. We performed a meta-analysis to determine whether a 4-L split dose of PEG is better than others for bowel cleansing before colonoscopy.

Methods

We searched MEDLINE, the Cochrane Central Register of Controlled Trials and Database, recent abstracts from major conference proceedings, references from selected reviews and randomized trials (http://clinicaltrials.gov), and Google Scholar, through September 2011, for high-quality, randomized trials that compared 4-L split-dose PEG without adjunct therapy with other bowel preparation methods. Nine of 2477 trials considered were used in the analysis. We calculated pooled estimates of bowel preparation quality (primary outcome: excellent or good), preparation compliance, favorable overall experiences, willingness to repeat same preparation, and side effects. We calculated pooled estimates of odds ratios by fixed- and random-effects models. We also assessed heterogeneity among studies and publication bias.

Results

The overall pooled odds ratio for excellent or good bowel preparation quality for 4-L split-dose PEG was 3.46, compared with other methods (95% confidence interval, 2.45-4.89; P < .01). Although there was significant heterogeneity in results among studies, 7 of 9 reported a significant benefit from the 4-L split-dose PEG preparation. There were no significant differences between PEG and others in preparation compliance, favorable overall experience, willingness to repeat the same preparation, abdominal cramping, nausea, or sleep disturbance. There was no significant publication bias based on funnel plot.

Conclusions

A meta-analysis showed that 4-L split-dose PEG is better than other bowel preparation methods for colonoscopy. Significant heterogeneity among studies might result from differences in patient demographics and protocols. A 4-L split dose of PEG should be considered the standard with which new bowel preparation methods are compared.",2012-08-30 +24602369,Detection of Helicobacter spp. DNA in the colonic biopsies of stray dogs: molecular and histopathological investigations.,"

Background

In dogs, the gastric Helicobacter spp. have been well studied, but there is little information regarding the other parts of the alimentary system. The incidence of Helicobacter spp. infection in dogs is largely unknown and to our knowledge there are no data about their potential pathogenic role. In light of these considerations, the aims of this study were (i) to assess the prevalence of Helicobacter spp. in colonic biopsies of healthy and symptomatic stray dogs also (ii) we isolate and characterize helicobacters in canine colonic biopsies to compare the commonly used tests for the identification of Helicobacter spp. and to determine the occurrence of these species in dogs.

Methods

Tissues from fifteen stray dogs (8 males and 7 females, age 6 months -10 years) were used in this study. From each stray dog, multiple colonic biopsies were taken for PCR. Biopsies for PCR of cecum and colon were immediately frozen and stored at -20°C until DNA extraction. Samples for histological examination were fixed in 10% neutral buffered formalin and embedded in paraffin wax.

Results

In the cecum and colon, Helicobacter spp. DNA was detected in all dogs. H.canis, H.bizzozeronii, H. bilis, H.felis, H.salomonis and H.pylori Identified by specific polymerase chain reaction. Histopathology demonstrated that Helicobacter organisms were localized within the surface mucus and the intestinal crypts. Dogs with heavy Helicobacter spp. colonization were significantly in younger as well as had a higher level of mucosal fibrosis/atrophy than dogs with uncolonized or poorly colonized biopsies (p<0.05).

Conclusions

We have indicated that the crypts of the cecum and colon of healthy and symptomatic dogs are heavily colonized by Helicobacter spp.. Combined molecular and histological approaches demonstrated that enterohepatic Helicobacter spp. infection is rather common in colonic biopsies of healthy and symptomatic stray dogs, with Helicobacter spp. specialy H. canis, H.bizzozeroni, H.billis, H.felis and H. salomonis identified as the most common species.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1957989294118782.",2014-03-06 +22962443,Context-specific transcriptional regulatory network inference from global gene expression maps using double two-way t-tests.,"

Motivation

Transcriptional regulatory network inference methods have been studied for years. Most of them rely on complex mathematical and algorithmic concepts, making them hard to adapt, re-implement or integrate with other methods. To address this problem, we introduce a novel method based on a minimal statistical model for observing transcriptional regulatory interactions in noisy expression data, which is conceptually simple, easy to implement and integrate in any statistical software environment and equally well performing as existing methods.

Results

We developed a method to infer regulatory interactions based on a model where transcription factors (TFs) and their targets are both differentially expressed in a gene-specific, critical sample contrast, as measured by repeated two-way t-tests. Benchmarking on standard Escherichia coli and yeast reference datasets showed that this method performs equally well as the best existing methods. Analysis of the predicted interactions suggested that it works best to infer context-specific TF-target interactions which only co-express locally. We confirmed this hypothesis on a dataset of >1000 normal human tissue samples, where we found that our method predicts highly tissue-specific and functionally relevant interactions, whereas a global co-expression method only associates general TFs to non-specific biological processes.

Availability

A software tool called TwixTrix is available from http://twixtrix.googlecode.com.

Supplementary information

Supplementary Material is available from http://www.roslin.ed.ac.uk/tom-michoel/supplementary-data.

Contact

tom.michoel@roslin.ed.ac.uk.",2012-09-01 +24593867,Tuberous-sclerosis complex-related cell signaling in the pathogenesis of lung cancer.,"

Background

Hamartin (TSC1) and tuberin (TSC2), encoded by the tuberous sclerosis complex (TSC) genes, form a tumor-suppressor heterodimer which is implicated in PI3K-Akt signaling and acts as a functional inhibitor of the mammalian target of rapamycin (mTOR). Dysregulation of mTOR has been assigned to carcinogenesis and thus may be involved in cancer development. We have addressed the role of hamartin, phospho-tuberin (p-TSC2) and phospho-mTOR (p-mTOR) in a series of non-small cell lung cancer (NSCLC) and small cell lung cancer (SCLC) samples.

Methods

We collected 166 NSCLC and SCLC samples for immunohistochemical studies and performed western blot analyses in NSCLC and SCLC cell lines as well as comparative analyses with EGFR phosphorylation and downstream effectors.

Results

In cell lines we found an inverse correlation between hamartin and p-mTOR expression. In surgical specimens cytoplasmic hamartin expression was observed in more than 50% of adenocarcinoma (AC) and squamous cell carcinoma (SCC) compared to 14% of SCLC. P-mTOR and p-TSC2 staining was found in a minority of cases.There was a significant correlation between p-EGFR Tyr-1068, p-EGFR Tyr-992 and hamartin, and also between p-mTOR and p-EGFR Tyr-1173 in AC. In SCC an inverse correlation between hamartin and p-EGFR Tyr-992 was detected. Phosphorylation of TSC2 was associated with expression of MAP-Kinase. Hamartin, p-TSC2 and p-mTOR expression was not dependant of the EGFR mutation status. Hamartin expression is associated with poorer survival in SCC and SCLC.

Conclusions

Our findings confirm the inhibitory role of the tuberous sclerosis complex for mTOR activation in lung cancer cell lines. These results reveal hamartin expression in a substantial subset of NSCLC and SCLC specimens, which may be due to EGFR signaling but is not dependant on EGFR mutations. Our data provide evidence for a functional role of the tuberous sclerosis complex in lung cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/9274845161175223.",2014-03-04 +22689760,SEQuel: improving the accuracy of genome assemblies.,"

Motivation

Assemblies of next-generation sequencing (NGS) data, although accurate, still contain a substantial number of errors that need to be corrected after the assembly process. We develop SEQuel, a tool that corrects errors (i.e. insertions, deletions and substitution errors) in the assembled contigs. Fundamental to the algorithm behind SEQuel is the positional de Bruijn graph, a graph structure that models k-mers within reads while incorporating the approximate positions of reads into the model.

Results

SEQuel reduced the number of small insertions and deletions in the assemblies of standard multi-cell Escherichia coli data by almost half, and corrected between 30% and 94% of the substitution errors. Further, we show SEQuel is imperative to improving single-cell assembly, which is inherently more challenging due to higher error rates and non-uniform coverage; over half of the small indels, and substitution errors in the single-cell assemblies were corrected. We apply SEQuel to the recently assembled Deltaproteobacterium SAR324 genome, which is the first bacterial genome with a comprehensive single-cell genome assembly, and make over 800 changes (insertions, deletions and substitutions) to refine this assembly.

Availability

SEQuel can be used as a post-processing step in combination with any NGS assembler and is freely available at http://bix.ucsd.edu/SEQuel/.",2012-06-01 +23825697,Accelerating the Original Profile Kernel.,"One of the most accurate multi-class protein classification systems continues to be the profile-based SVM kernel introduced by the Leslie group. Unfortunately, its CPU requirements render it too slow for practical applications of large-scale classification tasks. Here, we introduce several software improvements that enable significant acceleration. Using various non-redundant data sets, we demonstrate that our new implementation reaches a maximal speed-up as high as 14-fold for calculating the same kernel matrix. Some predictions are over 200 times faster and render the kernel as possibly the top contender in a low ratio of speed/performance. Additionally, we explain how to parallelize various computations and provide an integrative program that reduces creating a production-quality classifier to a single program call. The new implementation is available as a Debian package under a free academic license and does not depend on commercial software. For non-Debian based distributions, the source package ships with a traditional Makefile-based installer. Download and installation instructions can be found at https://rostlab.org/owiki/index.php/Fast_Profile_Kernel. Bugs and other issues may be reported at https://rostlab.org/bugzilla3/enter_bug.cgi?product=fastprofkernel.",2013-06-18 +24468032,Support vector machine (SVM) based multiclass prediction with basic statistical analysis of plasminogen activators.,"

Background

Plasminogen (Pg), the precursor of the proteolytic and fibrinolytic enzyme of blood, is converted to the active enzyme plasmin (Pm) by different plasminogen activators (tissue plasminogen activators and urokinase), including the bacterial activators streptokinase and staphylokinase, which activate Pg to Pm and thus are used clinically for thrombolysis. The identification of Pg-activators is therefore an important step in understanding their functional mechanism and derives new therapies.

Methods

In this study, different computational methods for predicting plasminogen activator peptide sequences with high accuracy were investigated, including support vector machines (SVM) based on amino acid (AC), dipeptide composition (DC), PSSM profile and Hybrid methods used to predict different Pg-activators from both prokaryotic and eukaryotic origins.

Results

Overall maximum accuracy, evaluated using the five-fold cross validation technique, was 88.37%, 84.32%, 87.61%, 85.63% in 0.87, 0.83,0.86 and 0.85 MCC with amino (AC) or dipeptide composition (DC), PSSM profile and Hybrid methods respectively. Through this study, we have found that the different subfamilies of Pg-activators are quite closely correlated in terms of amino, dipeptide, PSSM and Hybrid compositions. Therefore, our prediction results show that plasminogen activators are predictable with a high accuracy from their primary sequence. Prediction performance was also cross-checked by confusion matrix and ROC (Receiver operating characteristics) analysis. A web server to facilitate the prediction of Pg-activators from primary sequence data was implemented.

Conclusion

The results show that dipeptide, PSSM profile, and Hybrid based methods perform better than single amino acid composition (AC). Furthermore, we also have developed a web server, which predicts the Pg-activators and their classification (available online at http://mamsap.it.deakin.edu.au/plas_pred/home.html). Our experimental results show that our approaches are faster and achieve generally a good prediction performance.",2014-01-27 +22952791,Modeling formamide denaturation of probe-target hybrids for improved microarray probe design in microbial diagnostics.,"Application of high-density microarrays to the diagnostic analysis of microbial communities is challenged by the optimization of oligonucleotide probe sensitivity and specificity, as it is generally unfeasible to experimentally test thousands of probes. This study investigated the adjustment of hybridization stringency using formamide with the idea that sensitivity and specificity can be optimized during probe design if the hybridization efficiency of oligonucleotides with target and non-target molecules can be predicted as a function of formamide concentration. Sigmoidal denaturation profiles were obtained using fluorescently labeled and fragmented 16S rRNA gene amplicon of Escherichia coli as the target with increasing concentrations of formamide in the hybridization buffer. A linear free energy model (LFEM) was developed and microarray-specific nearest neighbor rules were derived. The model simulated formamide melting with a denaturant m-value that increased hybridization free energy (ΔG°) by 0.173 kcal/mol per percent of formamide added (v/v). Using the LFEM and specific probe sets, free energy rules were systematically established to predict the stability of single and double mismatches, including bulged and tandem mismatches. The absolute error in predicting the position of experimental denaturation profiles was less than 5% formamide for more than 90 percent of probes, enabling a practical level of accuracy in probe design. The potential of the modeling approach for probe design and optimization is demonstrated using a dataset including the 16S rRNA gene of Rhodobacter sphaeroides as an additional target molecule. The LFEM and thermodynamic databases were incorporated into a computational tool (ProbeMelt) that is freely available at http://DECIPHER.cee.wisc.edu.",2012-08-27 +21393653,Peakbin selection in mass spectrometry data using a consensus approach with estimation of distribution algorithms.,"Progress is continuously being made in the quest for stable biomarkers linked to complex diseases. Mass spectrometers are one of the devices for tackling this problem. The data profiles they produce are noisy and unstable. In these profiles, biomarkers are detected as signal regions (peaks), where control and disease samples behave differently. Mass spectrometry (MS) data generally contain a limited number of samples described by a high number of features. In this work, we present a novel class of evolutionary algorithms, estimation of distribution algorithms (EDA), as an efficient peak selector in this MS domain. There is a trade-of f between the reliability of the detected biomarkers and the low number of samples for analysis. For this reason, we introduce a consensus approach, built upon the classical EDA scheme, that improves stability and robustness of the final set of relevant peaks. An entire data workflow is designed to yield unbiased results. Four publicly available MS data sets (two MALDI-TOF and another two SELDI-TOF) are analyzed. The results are compared to the original works, and a new plot (peak frequential plot) for graphically inspecting the relevant peaks is introduced. A complete online supplementary page, which can be found at http://www.sc.ehu.es/ccwbayes/members/ruben/ms, includes extended info and results, in addition to Matlab scripts and references.",2011-05-01 +21821665,Tree-structured algorithm for long weak motif discovery.,"

Motivation

Motifs in DNA sequences often appear in degenerate form, so there has been an increased interest in computational algorithms for weak motif discovery. Probabilistic algorithms are unable to detect weak motifs while exact methods have been able to detect only short weak motifs. This article proposes an exact tree-based motif detection (TreeMotif) algorithm capable of discovering longer and weaker motifs than by the existing methods.

Results

TreeMotif converts the graphical representation of motifs into a tree-structured representation in which a tree that branches with nodes from every sequence represents motif instances. The method of tree construction is novel to motif discovery based on graphical representation. TreeMotif is more efficient and scalable in handling longer and weaker motifs than the existing algorithms in terms of accuracy and execution time. The performances of TreeMotif were demonstrated on synthetic data as well as on real biological data.

Availability

https://sites.google.com/site/shqssw/treemotif

Contact

sunh0013@e.ntu.edu.sg

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-05 +24214729,Gene network and proteomic analyses of cardiac responses to pathological and physiological stress.,"

Background

The molecular mechanisms underlying similarities and differences between physiological and pathological left ventricular hypertrophy (LVH) are of intense interest. Most previous work involved targeted analysis of individual signaling pathways or screening of transcriptomic profiles. We developed a network biology approach using genomic and proteomic data to study the molecular patterns that distinguish pathological and physiological LVH.

Methods and results

A network-based analysis using graph theory methods was undertaken on 127 genome-wide expression arrays of in vivo murine LVH. This revealed phenotype-specific pathological and physiological gene coexpression networks. Despite >1650 common genes in the 2 networks, network structure is significantly different. This is largely because of rewiring of genes that are differentially coexpressed in the 2 networks; this novel concept of differential wiring was further validated experimentally. Functional analysis of the rewired network revealed several distinct cellular pathways and gene sets. Deeper exploration was undertaken by targeted proteomic analysis of mitochondrial, myofilament, and extracellular subproteomes in pathological LVH. A notable finding was that mRNA-protein correlation was greater at the cellular pathway level than for individual loci.

Conclusions

This first combined gene network and proteomic analysis of LVH reveals novel insights into the integrated pathomechanisms that distinguish pathological versus physiological phenotypes. In particular, we identify differential gene wiring as a major distinguishing feature of these phenotypes. This approach provides a platform for the investigation of potentially novel pathways in LVH and offers a freely accessible protocol (http://sites.google.com/site/cardionetworks) for similar analyses in other cardiovascular diseases.",2013-11-08 +22652832,Identification of functional CNV region networks using a CNV-gene mapping algorithm in a genome-wide scale.,"

Motivation

Identifying functional relation of copy number variation regions (CNVRs) and gene is an essential process in understanding the impact of genotypic variations on phenotype. There have been many related works, but only a few attempts were made to normal populations.

Results

To analyze the functions of genome-wide CNVRs, we applied a novel correlation measure called Correlation based on Sample Set (CSS) to paired Whole Genome TilePath array and messenger RNA (mRNA) microarray data from 210 HapMap individuals with normal phenotypes and calculated the confident CNVR-gene relationships. Two CNVR nodes form an edge if they regulate a common set of genes, allowing the construction of a global CNVR network. We performed functional enrichment on the common genes that were trans-regulated from CNVRs clustered together in our CNVR network. As a result, we observed that most of CNVR clusters in our CNVR network were reported to be involved in some biological processes or cellular functions, while most CNVR clusters from randomly constructed CNVR networks showed no evidence of functional enrichment. Those results imply that CSS is capable of finding related CNVR-gene pairs and CNVR networks that have functional significance.

Availability

http://embio.yonsei.ac.kr/~ Park/cnv_net.php.

Contact

sanghyun@cs.yonsei.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-05-30 +21533266,Data integration workflow for search of disease driving genes and genetic variants.,"Comprehensive characterization of a gene's impact on phenotypes requires knowledge of the context of the gene. To address this issue we introduce a systematic data integration method Candidate Genes and SNPs (CANGES) that links SNP and linkage disequilibrium data to pathway- and protein-protein interaction information. It can be used as a knowledge discovery tool for the search of disease associated causative variants from genome-wide studies as well as to generate new hypotheses on synergistically functioning genes. We demonstrate the utility of CANGES by integrating pathway and protein-protein interaction data to identify putative functional variants for (i) the p53 gene and (ii) three glioblastoma multiforme (GBM) associated risk genes. For the GBM case, we further integrate the CANGES results with clinical and genome-wide data for 209 GBM patients and identify genes having effects on GBM patient survival. Our results show that selecting a focused set of genes can result in information beyond the traditional genome-wide association approaches. Taken together, holistic approach to identify possible interacting genes and SNPs with CANGES provides a means to rapidly identify networks for any set of genes and generate novel hypotheses. CANGES is available in http://csbi.ltdk.helsinki.fi/CANGES/",2011-04-12 +22940442,miRNA_Targets: a database for miRNA target predictions in coding and non-coding regions of mRNAs.,"MicroRNAs (miRNAs) are small non-coding RNAs that play a role in post-transcriptional regulation of gene expression in most eukaryotes. They help in fine-tuning gene expression by targeting messenger RNAs (mRNA). The interactions of miRNAs and mRNAs are sequence specific and computational tools have been developed to predict miRNA target sites on mRNAs, but miRNA research has been mainly focused on target sites within 3' untranslated regions (UTRs) of genes. There is a need for an easily accessible repository of genome wide full length mRNA - miRNA target predictions with versatile search capabilities and visualization tools. We have created a web accessible database of miRNA target predictions for human, mouse, cow, chicken, Zebra fish, fruit fly and Caenorhabditis elegans using two different target prediction algorithms, The database has target predictions for miRNA's on 5' UTRs, coding region and 3' UTRs of all mRNAs. This database can be freely accessed at http://mamsap.it.deakin.edu.au/mirna_targets/.",2012-08-25 +23874612,RMOD: a tool for regulatory motif detection in signaling network.,"Regulatory motifs are patterns of activation and inhibition that appear repeatedly in various signaling networks and that show specific regulatory properties. However, the network structures of regulatory motifs are highly diverse and complex, rendering their identification difficult. Here, we present a RMOD, a web-based system for the identification of regulatory motifs and their properties in signaling networks. RMOD finds various network structures of regulatory motifs by compressing the signaling network and detecting the compressed forms of regulatory motifs. To apply it into a large-scale signaling network, it adopts a new subgraph search algorithm using a novel data structure called path-tree, which is a tree structure composed of isomorphic graphs of query regulatory motifs. This algorithm was evaluated using various sizes of signaling networks generated from the integration of various human signaling pathways and it showed that the speed and scalability of this algorithm outperforms those of other algorithms. RMOD includes interactive analysis and auxiliary tools that make it possible to manipulate the whole processes from building signaling network and query regulatory motifs to analyzing regulatory motifs with graphical illustration and summarized descriptions. As a result, RMOD provides an integrated view of the regulatory motifs and mechanism underlying their regulatory motif activities within the signaling network. RMOD is freely accessible online at the following URL: http://pks.kaist.ac.kr/rmod.",2013-07-12 +22640820,Scaffolder - software for manual genome scaffolding.,"

Background

The assembly of next-generation short-read sequencing data can result in a fragmented non-contiguous set of genomic sequences. Therefore a common step in a genome project is to join neighbouring sequence regions together and fill gaps. This scaffolding step is non-trivial and requires manually editing large blocks of nucleotide sequence. Joining these sequences together also hides the source of each region in the final genome sequence. Taken together these considerations may make reproducing or editing an existing genome scaffold difficult.

Methods

The software outlined here, ""Scaffolder,"" is implemented in the Ruby programming language and can be installed via the RubyGems software management system. Genome scaffolds are defined using YAML - a data format which is both human and machine-readable. Command line binaries and extensive documentation are available.

Results

This software allows a genome build to be defined in terms of the constituent sequences using a relatively simple syntax. This syntax further allows unknown regions to be specified and additional sequence to be used to fill known gaps in the scaffold. Defining the genome construction in a file makes the scaffolding process reproducible and easier to edit compared with large FASTA nucleotide sequences.

Conclusions

Scaffolder is easy-to-use genome scaffolding software which promotes reproducibility and continuous development in a genome project. Scaffolder can be found at http://next.gs.",2012-05-28 +24215022,Rapid detection of expanded short tandem repeats in personal genomics using hybrid sequencing.,"

Motivation

Long expansions of short tandem repeats (STRs), i.e. DNA repeats of 2-6 nt, are associated with some genetic diseases. Cost-efficient high-throughput sequencing can quickly produce billions of short reads that would be useful for uncovering disease-associated STRs. However, enumerating STRs in short reads remains largely unexplored because of the difficulty in elucidating STRs much longer than 100 bp, the typical length of short reads.

Results

We propose ab initio procedures for sensing and locating long STRs promptly by using the frequency distribution of all STRs and paired-end read information. We validated the reproducibility of this method using biological replicates and used it to locate an STR associated with a brain disease (SCA31). Subsequently, we sequenced this STR site in 11 SCA31 samples using SMRT(TM) sequencing (Pacific Biosciences), determined 2.3-3.1 kb sequences at nucleotide resolution and revealed that (TGGAA)- and (TAAAATAGAA)-repeat expansions determined the instability of the repeat expansions associated with SCA31. Our method could also identify common STRs, (AAAG)- and (AAAAG)-repeat expansions, which are remarkably expanded at four positions in an SCA31 sample. This is the first proposed method for rapidly finding disease-associated long STRs in personal genomes using hybrid sequencing of short and long reads.

Availability and implementation

Our TRhist software is available at http://trhist.gi.k.u-tokyo.ac.jp/.

Contact

moris@cb.k.u-tokyo.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-11-08 +21994222,SVseq: an approach for detecting exact breakpoints of deletions with low-coverage sequence data.,"

Motivation

Structural variation (SV), such as deletion, is an important type of genetic variation and may be associated with diseases. While there are many existing methods for detecting SVs, finding deletions is still challenging with low-coverage short sequence reads. Existing deletion finding methods for sequence reads either use the so-called split reads mapping for detecting deletions with exact breakpoints, or rely on discordant insert sizes to estimate approximate positions of deletions. Neither is completely satisfactory with low-coverage sequence reads.

Results

We present SVseq, an efficient two-stage approach, which combines the split reads mapping and discordant insert size analysis. The first stage is split reads mapping based on the Burrows-Wheeler transform (BWT), which finds candidate deletions. Our split reads mapping method allows mismatches and small indels, thus deletions near other small variations can be discovered and reads with sequencing errors can be utilized. The second stage filters the false positives by analyzing discordant insert sizes. SVseq is more accurate than an alternative approach when applying on simulated data and empirical data, and is also much faster.

Availability

The program SVseq can be downloaded at http://www.engr.uconn.edu/~jiz08001/

Contact

jinzhang@engr.uconn.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-12 +21987088,PlantRGS: a web server for the identification of most suitable candidate reference genes for quantitative gene expression studies in plants.,"Normalization of quantitative gene expression data with a suitable reference gene is essential for accurate and reliable results. However, the availability and choice of most suitable reference gene(s) showing uniform expression across all the experimental conditions remain a drawback. We have developed a web server, PlantRGS (http://www.nipgr.res.in/PlantRGS), for the identification of most suitable candidate reference gene(s) at the whole-genome level using microarray data for quantitative gene expression studies in plants. Microarray data from more than 11 000 tissue samples for nine plant species have been included in the PlantRGS for meta-analysis. The web server provides a user-friendly graphical user interface-based analysis tool for the identification of most suitable reference genes in the selected plant species under user-defined experimental conditions. Various parameter options and output formats will help users to investigate desired number of most suitable reference genes with wide range of expression levels. Validation of results revealed that novel reference genes identified by the PlantRGS outperforms the traditionally used reference genes in terms of expression stability. We anticipate that the PlantRGS will provide a platform for the identification of most suitable reference gene(s) under given experimental conditions and facilitate quantitative gene expression studies in plants.",2011-10-10 +24139720,Development of a single multi-locus sequence typing scheme for Taylorella equigenitalis and Taylorella asinigenitalis.,"We describe here the development of a multilocus sequence typing (MLST) scheme for Taylorella equigenitalis, the causative agent of contagious equine metritis (CEM), and Taylorella asinigenitalis, a nonpathogenic bacterium. MLST was performed on a set of 163 strains collected in several countries over 35 years (1977-2012). The MLST data were analyzed using START2, MEGA 5.05 and eBURST, and can be accessed at http://pubmlst.org/taylorella/. Our results revealed a clonal population with 39 sequence types (ST) and no common ST between the two Taylorella species. The eBURST analysis grouped the 27 T. equigenitalis STs into four clonal complexes (CC1-4) and five unlinked STs. The 12 T. asinigenitalis STs were grouped into three clonal complexes (CC5-7) and five unlinked STs, among which CC1 (68.1% of the 113 T. equigenitalis) and CC5 (58.0% of the 50 T. asinigenitalis) were dominants. The CC1, still in circulation in France, contains isolates from the first CEM outbreaks that simultaneously emerged in several countries in the late 1970s. The emergence in different countries (e.g. France, Japan, and United Arab Emirates) of STs without any genetic relationship to CC1 suggests the existence of a natural worldwide reservoir that remains to be identified. T. asinigenitalis appears to behave same way since the American, Swedish and French isolates have unrelated STs. This first Taylorella sp. MLST is a powerful tool for further epidemiological investigations and population biology studies of the Taylorella genus.",2013-09-24 +22480150,TINA manual landmarking tool: software for the precise digitization of 3D landmarks.,"

Background

Interest in the placing of landmarks and subsequent morphometric analyses of shape for 3D data has increased with the increasing accessibility of computed tomography (CT) scanners. However, current computer programs for this task suffer from various practical drawbacks. We present here a free software tool that overcomes many of these problems.

Results

The TINA Manual Landmarking Tool was developed for the digitization of 3D data sets. It enables the generation of a modifiable 3D volume rendering display plus matching orthogonal 2D cross-sections from DICOM files. The object can be rotated and axes defined and fixed. Predefined lists of landmarks can be loaded and the landmarks identified within any of the representations. Output files are stored in various established formats, depending on the preferred evaluation software.

Conclusions

The software tool presented here provides several options facilitating the placing of landmarks on 3D objects, including volume rendering from DICOM files, definition and fixation of meaningful axes, easy import, placement, control, and export of landmarks, and handling of large datasets. The TINA Manual Landmark Tool runs under Linux and can be obtained for free from http://www.tina-vision.net/tarballs/.",2012-04-05 +22806945,Characterizing the morphology of protein binding patches.,"Let the patch of a partner in a protein complex be the collection of atoms accounting for the interaction. To improve our understanding of the structure-function relationship, we present a patch model decoupling the topological and geometric properties. While the geometry is classically encoded by the atomic positions, the topology is recorded in a graph encoding the relative position of concentric shells partitioning the interface atoms. The topological-geometric duality provides the basis of a generic dynamic programming-based algorithm comparing patches at the shell level, which may favor topological or geometric features. On the biological side, we address four questions, using 249 cocrystallized heterodimers organized in biological families. First, we dissect the morphology of binding patches and show that Nature enjoyed the topological and geometric degrees of freedom independently while retaining a finite set of qualitatively distinct topological signatures. Second, we argue that our shell-based comparison is effective to perform atomic-level comparisons and show that topological similarity is a less stringent than geometric similarity. We also use the topological versus geometric duality to exhibit topo-rigid patches, whose topology (but not geometry) remains stable upon docking. Third, we use our comparison algorithms to infer specificity-related information amidst a database of complexes. Finally, we exhibit a descriptor outperforming its contenders to predict the binding affinities of the affinity benchmark. The softwares developed with this article are availablefrom http://team.inria.fr/abs/vorpatch_compatch/.",2012-08-22 +23772985,"The Gray Institute 'open' high-content, fluorescence lifetime microscopes.","We describe a microscopy design methodology and details of microscopes built to this 'open' design approach. These demonstrate the first implementation of time-domain fluorescence microscopy in a flexible automated platform with the ability to ease the transition of this and other advanced microscopy techniques from development to use in routine biology applications. This approach allows easy expansion and modification of the platform capabilities, as it moves away from the use of a commercial, monolithic, microscope body to small, commercial off-the-shelf and custom made modular components. Drawings and diagrams of our microscopes have been made available under an open license for noncommercial use at http://users.ox.ac.uk/~atdgroup. Several automated high-content fluorescence microscope implementations have been constructed with this design framework and optimized for specific applications with multiwell plates and tissue microarrays. In particular, three platforms incorporate time-domain FLIM via time-correlated single photon counting in an automated fashion. We also present data from experiments performed on these platforms highlighting their automated wide-field and laser scanning capabilities designed for high-content microscopy. Devices using these designs also form radiation-beam 'end-stations' at Oxford and Surrey Universities, showing the versatility and extendibility of this approach.",2013-06-12 +22635606,Gowinda: unbiased analysis of gene set enrichment for genome-wide association studies.,"

Summary

An analysis of gene set [e.g. Gene Ontology (GO)] enrichment assumes that all genes are sampled independently from each other with the same probability. These assumptions are violated in genome-wide association (GWA) studies since (i) longer genes typically have more single-nucleotide polymorphisms resulting in a higher probability of being sampled and (ii) overlapping genes are sampled in clusters. Herein, we introduce Gowinda, a software specifically designed to test for enrichment of gene sets in GWA studies. We show that GO tests on GWA data could result in a substantial number of false-positive GO terms. Permutation tests implemented in Gowinda eliminate these biases, but maintain sufficient power to detect enrichment of GO terms. Since sufficient resolution for large datasets requires millions of permutations, we use multi-threading to keep computation times reasonable.

Availability and implementation

Gowinda is implemented in Java (v1.6) and freely available on http://code.google.com/p/gowinda/

Contact

christian.schloetterer@vetmeduni.ac.at

Supplementary information

Manual: http://code.google.com/p/gowinda/wiki/Manual. Test data and tutorial: http://code.google.com/p/gowinda/wiki/Tutorial.

Validation

http://code.google.com/p/gowinda/wiki/VALIDATION.",2012-05-26 +22437511,MorePower 6.0 for ANOVA with relational confidence intervals and Bayesian analysis.,"MorePower 6.0 is a flexible freeware statistical calculator that computes sample size, effect size, and power statistics for factorial ANOVA designs. It also calculates relational confidence intervals for ANOVA effects based on formulas from Jarmasz and Hollands (Canadian Journal of Experimental Psychology 63:124-138, 2009), as well as Bayesian posterior probabilities for the null and alternative hypotheses based on formulas in Masson (Behavior Research Methods 43:679-690, 2011). The program is unique in affording direct comparison of these three approaches to the interpretation of ANOVA tests. Its high numerical precision and ability to work with complex ANOVA designs could facilitate researchers' attention to issues of statistical power, Bayesian analysis, and the use of confidence intervals for data interpretation. MorePower 6.0 is available at https://wiki.usask.ca/pages/viewpageattachments.action?pageId=420413544 .",2012-12-01 +21521499,ShoRAH: estimating the genetic diversity of a mixed sample from next-generation sequencing data.,"

Background

With next-generation sequencing technologies, experiments that were considered prohibitive only a few years ago are now possible. However, while these technologies have the ability to produce enormous volumes of data, the sequence reads are prone to error. This poses fundamental hurdles when genetic diversity is investigated.

Results

We developed ShoRAH, a computational method for quantifying genetic diversity in a mixed sample and for identifying the individual clones in the population, while accounting for sequencing errors. The software was run on simulated data and on real data obtained in wet lab experiments to assess its reliability.

Conclusions

ShoRAH is implemented in C++, Python, and Perl and has been tested under Linux and Mac OS X. Source code is available under the GNU General Public License at http://www.cbg.ethz.ch/software/shorah.",2011-04-26 +22704203,Novel application of the CORAL software to model cytotoxicity of metal oxide nanoparticles to bacteria Escherichia coli.,Convenient to apply and available on the Internet software CORAL (http://www.insilico.eu/CORAL) has been used to build up quantitative structure-activity relationships (QSAR) for prediction of cytotoxicity of metal oxide nanoparticles to bacteria Escherichia coli (minus logarithm of concentration for 50% effect pEC50). In this study six random splits of the data into the training and test set were examined. It has been shown that the CORAL provides a reliable tool that could be used to build up a QSAR of the pEC50.,2012-06-15 +22094860,Random Addition Concatenation Analysis: a novel approach to the exploration of phylogenomic signal reveals strong agreement between core and shell genomic partitions in the cyanobacteria.,"Recent whole-genome approaches to microbial phylogeny have emphasized partitioning genes into functional classes, often focusing on differences between a stable core of genes and a variable shell. To rigorously address the effects of partitioning and combining genes in genome-level analyses, we developed a novel technique called Random Addition Concatenation Analysis (RADICAL). RADICAL operates by sequentially concatenating randomly chosen gene partitions starting with a single-gene partition and ending with the entire genomic data set. A phylogenetic tree is built for every successive addition, and the entire process is repeated creating multiple random concatenation paths. The result is a library of trees representing a large variety of differently sized random gene partitions. This library can then be mined to identify unique topologies, assess overall agreement, and measure support for different trees. To evaluate RADICAL, we used 682 orthologous genes across 13 cyanobacterial genomes. Despite previous assertions of substantial differences between a core and a shell set of genes for this data set, RADICAL reveals the two partitions contain congruent phylogenetic signal. Substantial disagreement within the data set is limited to a few nodes and genes involved in metabolism, a functional group that is distributed evenly between the core and the shell partitions. We highlight numerous examples where RADICAL reveals aspects of phylogenetic behavior not evident by examining individual gene trees or a ""'total evidence"" tree. Our method also demonstrates that most emergent phylogenetic signal appears early in the concatenation process. The software is freely available at http://desalle.amnh.org.",2011-11-16 +22319561,Dirichlet multinomial mixtures: generative models for microbial metagenomics.,"We introduce Dirichlet multinomial mixtures (DMM) for the probabilistic modelling of microbial metagenomics data. This data can be represented as a frequency matrix giving the number of times each taxa is observed in each sample. The samples have different size, and the matrix is sparse, as communities are diverse and skewed to rare taxa. Most methods used previously to classify or cluster samples have ignored these features. We describe each community by a vector of taxa probabilities. These vectors are generated from one of a finite number of Dirichlet mixture components each with different hyperparameters. Observed samples are generated through multinomial sampling. The mixture components cluster communities into distinct 'metacommunities', and, hence, determine envirotypes or enterotypes, groups of communities with a similar composition. The model can also deduce the impact of a treatment and be used for classification. We wrote software for the fitting of DMM models using the 'evidence framework' (http://code.google.com/p/microbedmm/). This includes the Laplace approximation of the model evidence. We applied the DMM model to human gut microbe genera frequencies from Obese and Lean twins. From the model evidence four clusters fit this data best. Two clusters were dominated by Bacteroides and were homogenous; two had a more variable community composition. We could not find a significant impact of body mass on community structure. However, Obese twins were more likely to derive from the high variance clusters. We propose that obesity is not associated with a distinct microbiota but increases the chance that an individual derives from a disturbed enterotype. This is an example of the 'Anna Karenina principle (AKP)' applied to microbial communities: disturbed states having many more configurations than undisturbed. We verify this by showing that in a study of inflammatory bowel disease (IBD) phenotypes, ileal Crohn's disease (ICD) is associated with a more variable community.",2012-02-03 +23564964,MH2c: Characterization of major histocompatibility α-helices - an information criterion approach.,"Major histocompatibility proteins share a common overall structure or peptide binding groove. Two binding groove domains, on the same chain for major histocompatibility class I or on two different chains for major histocompatibility class II, contribute to that structure that consists of two α-helices (""wall"") and a sheet of eight anti-parallel beta strands (""floor""). Apart from the peptide presented in the groove, the major histocompatibility α-helices play a central role for the interaction with the T cell receptor. This study presents a generalized mathematical approach for the characterization of these helices. We employed polynomials of degree 1 to 7 and splines with 1 to 2 nodes based on polynomials of degree 1 to 7 on the α-helices projected on their principal components. We evaluated all models with a corrected Akaike Information Criterion to determine which model represents the α-helices in the best way without overfitting the data. This method is applicable for both the stationary and the dynamic characterization of α-helices. By deriving differential geometric parameters from these models one obtains a reliable method to characterize and compare α-helices for a broad range of applications.

Program summary

Program title: MH2c (MH helix curves) Catalogue identifier: AELX_v1_0 Program summary URL:http://cpc.cs.qub.ac.uk/summaries/AELX_v1_0.html Program obtainable from: CPC Program Library, Queen's University, Belfast, N. Ireland Licensing provisions: Standard CPC licence, http://cpc.cs.qub.ac.uk/licence/licence.html No. of lines in distributed program, including test data, etc.: 327 565 No. of bytes in distributed program, including test data, etc.: 17 433 656 Distribution format: tar.gz Programming language: Matlab Computer: Personal computer architectures Operating system: Windows, Linux, Mac (all systems on which Matlab can be installed) RAM: Depends on the trajectory size, min. 1 GB (Matlab) Classification: 2.1, 4.9, 4.14 External routines: Curve Fitting Toolbox and Statistic Toolbox of Matlab Nature of problem: Major histocompatibility (MH) proteins share a similar overall structure. However, identical MH alleles which present different peptides differ by subtle conformational alterations. One hypothesis is that such conformational differences could be another level of T cell regulation. By this software package we present a reliable and systematic way to compare different MH structures to each other. Solution method: We tested several fitting approaches on all available experimental crystal structures of MH to obtain an overall picture of how to describe MH helices. For this purpose we transformed all complexes into the same space and applied splines and polynomials of several degrees to them. To draw a general conclusion which method fits them best we employed the ""corrected Akaike Information Criterion"". The software is applicable for all kinds of helices of biomolecules. Running time: Depends on the data, for a single stationary structure the runtime should not exceed a few seconds.",2012-07-01 +23203873,New functional families (FunFams) in CATH to improve the mapping of conserved functional sites to 3D structures.,"CATH version 3.5 (Class, Architecture, Topology, Homology, available at http://www.cathdb.info/) contains 173 536 domains, 2626 homologous superfamilies and 1313 fold groups. When focusing on structural genomics (SG) structures, we observe that the number of new folds for CATH v3.5 is slightly less than for previous releases, and this observation suggests that we may now know the majority of folds that are easily accessible to structure determination. We have improved the accuracy of our functional family (FunFams) sub-classification method and the CATH sequence domain search facility has been extended to provide FunFam annotations for each domain. The CATH website has been redesigned. We have improved the display of functional data and of conserved sequence features associated with FunFams within each CATH superfamily.",2012-11-29 +23492433,CRISPRTarget: bioinformatic prediction and analysis of crRNA targets.,"The bacterial and archaeal CRISPR/Cas adaptive immune system targets specific protospacer nucleotide sequences in invading organisms. This requires base pairing between processed CRISPR RNA and the target protospacer. For type I and II CRISPR/Cas systems, protospacer adjacent motifs (PAM) are essential for target recognition, and for type III, mismatches in the flanking sequences are important in the antiviral response. In this study, we examine the properties of each class of CRISPR. We use this information to provide a tool (CRISPRTarget) that predicts the most likely targets of CRISPR RNAs (http://bioanalysis.otago.ac.nz/CRISPRTarget). This can be used to discover targets in newly sequenced genomic or metagenomic data. To test its utility, we discover features and targets of well-characterized Streptococcus thermophilus and Sulfolobus solfataricus type II and III CRISPR/Cas systems. Finally, in Pectobacterium species, we identify new CRISPR targets and propose a model of temperate phage exposure and subsequent inhibition by the type I CRISPR/Cas systems.",2013-03-14 +23828247,An information-theoretic classification of amino acids for the assessment of interfaces in protein-protein docking.,"Docking represents a versatile and powerful method to predict the geometry of protein-protein complexes. However, despite significant methodical advances, the identification of good docking solutions among a large number of false solutions still remains a difficult task. We have previously demonstrated that the formalism of mutual information (MI) from information theory can be adapted to protein docking, and we have now extended this approach to enhance its robustness and applicability. A large dataset consisting of 22,934 docking decoys derived from 203 different protein-protein complexes was used for an MI-based optimization of reduced amino acid alphabets representing the protein-protein interfaces. This optimization relied on a clustering analysis that allows one to estimate the mutual information of whole amino acid alphabets by considering all structural features simultaneously, rather than by treating them individually. This clustering approach is fast and can be applied in a similar fashion to the generation of reduced alphabets for other biological problems like fold recognition, sequence data mining, or secondary structure prediction. The reduced alphabets derived from the present work were converted into a scoring function for the evaluation of docking solutions, which is available for public use via the web service score-MI: http://score-MI.biochem.uni-erlangen.de.",2013-07-05 +21388547,The dChip survival analysis module for microarray data.,"

Background

Genome-wide expression signatures are emerging as potential marker for overall survival and disease recurrence risk as evidenced by recent commercialization of gene expression based biomarkers in breast cancer. Similar predictions have recently been carried out using genome-wide copy number alterations and microRNAs. Existing software packages for microarray data analysis provide functions to define expression-based survival gene signatures. However, there is no software that can perform survival analysis using SNP array data or draw survival curves interactively for expression-based sample clusters.

Results

We have developed the survival analysis module in the dChip software that performs survival analysis across the genome for gene expression and copy number microarray data. Built on the current dChip software's microarray analysis functions such as chromosome display and clustering, the new survival functions include interactive exploring of Kaplan-Meier (K-M) plots using expression or copy number data, computing survival p-values from the log-rank test and Cox models, and using permutation to identify significant chromosome regions associated with survival.

Conclusions

The dChip survival module provides user-friendly way to perform survival analysis and visualize the results in the context of genes and cytobands. It requires no coding expertise and only minimal learning curve for thousands of existing dChip users. The implementation in Visual C++ also enables fast computation. The software and demonstration data are freely available at http://dchip-surv.chenglilab.org.",2011-03-09 +22978639,ReactionPredictor: prediction of complex chemical reactions at the mechanistic level using machine learning.,"Proposing reasonable mechanisms and predicting the course of chemical reactions is important to the practice of organic chemistry. Approaches to reaction prediction have historically used obfuscating representations and manually encoded patterns or rules. Here we present ReactionPredictor, a machine learning approach to reaction prediction that models elementary, mechanistic reactions as interactions between approximate molecular orbitals (MOs). A training data set of productive reactions known to occur at reasonable rates and yields and verified by inclusion in the literature or textbooks is derived from an existing rule-based system and expanded upon with manual curation from graduate level textbooks. Using this training data set of complex polar, hypervalent, radical, and pericyclic reactions, a two-stage machine learning prediction framework is trained and validated. In the first stage, filtering models trained at the level of individual MOs are used to reduce the space of possible reactions to consider. In the second stage, ranking models over the filtered space of possible reactions are used to order the reactions such that the productive reactions are the top ranked. The resulting model, ReactionPredictor, perfectly ranks polar reactions 78.1% of the time and recovers all productive reactions 95.7% of the time when allowing for small numbers of errors. Pericyclic and radical reactions are perfectly ranked 85.8% and 77.0% of the time, respectively, rising to >93% recovery for both reaction types with a small number of allowed errors. Decisions about which of the polar, pericyclic, or radical reaction type ranking models to use can be made with >99% accuracy. Finally, for multistep reaction pathways, we implement the first mechanistic pathway predictor using constrained tree-search to discover a set of reasonable mechanistic steps from given reactants to given products. Webserver implementations of both the single step and pathway versions of ReactionPredictor are available via the chemoinformatics portal http://cdb.ics.uci.edu/.",2012-10-01 +24170080,[Personalised pharmacogenetics. Evidence-based guidelines and clinical application of pharmacogenetic diagnostics].,"The broad clinical application of pharmacogenetic diagnostics for individualised drug treatment is still limited. With the exception of oncological therapies where molecular tumor makers are frequently used to decide upon individual drug therapies, pharmacogenetic testing is not generally offered in clinical laboratory diagnostics, because the costs are not covered by general health insurance and it is not evident what consequences the results of a genotyping test may have for the individual drug treatment. Especially in the context of pharmacokinetics, bioequivalence-based concepts have been developed that allow the individual drug dosage or therapy to be adjusted to genetic polymorphisms in drug metabolism, drug transport that affect drug absorption, metabolism and elimination. Pharmacogenetic aspects are increasingly included in the product information (e.g., on its website the FDA lists more than 60 drug labels that include pharmacogenetic information). However, most pharmacogenetic information on drug labels does not give recommendations for clinical decisions to be made based on individual genotypes. This gap is currently being closed by the development of international consortia aiming to base clinical recommendations on the best available evidence by systematic review of the existing data. The Clinical Pharmacogenetics Implementation Consortium of the Pharmacogenomics Research Network (CPIC) is an international community-driven organisation that is developing peer-reviewed, freely available gene/drug guidelines that are published in full at PharmGKB (http://www.pharmgkb.org). The aim of these guidelines is to give therapeutic recommendations such as dose adjustments or suggestions for the choice of an alternative drug in the case of specific genotypes (phenotypes) that predict slow metabolism or transport of drugs or safety risks or risks of therapeutic failure. These guidelines are not mandatory but serve to facilitate the translation of pharmacogenetic knowledge from bench to bedside.",2013-11-01 +23288102,Importance of histological analysis of seroma fluid.,"

Unlabelled

The recent observation of anaplastic large cell lymphoma (ALCL) in association with breast implants has initiated a large amount of literature recently, particularly in light of the issues with Poly Implant Prosthese implants. There are now approximately 35-50 reports of this lymphoma associated with breast implants. One of the presenting signs with this lymphoma is a late perimplant seroma. Given Kim et als recommendations for seroma fluid to be analysed, we suggest that all late seromas should be considered for analysis for the possible presence of a causative ALCL pathology, and add to the data currently available on this association.

Level of evidence v

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266 .",2013-01-04 +21653516,HAPGEN2: simulation of multiple disease SNPs.,"

Motivation

Performing experiments with simulated data is an inexpensive approach to evaluating competing experimental designs and analysis methods in genome-wide association studies. Simulation based on resampling known haplotypes is fast and efficient and can produce samples with patterns of linkage disequilibrium (LD), which mimic those in real data. However, the inability of current methods to simulate multiple nearby disease SNPs on the same chromosome can limit their application.

Results

We introduce a new simulation algorithm based on a successful resampling method, HAPGEN, that can simulate multiple nearby disease SNPs on the same chromosome. The new method, HAPGEN2, retains many advantages of resampling methods and expands the range of disease models that current simulators offer.

Availability

HAPGEN2 is freely available from http://www.stats.ox.ac.uk/~marchini/software/gwas/gwas.html.

Contact

zhan@well.ox.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-08 +24885964,Modulation of insulin/IGFs pathways by sirtuin-7 inhibition in drug-induced chemoreistance.,"

Background

Insulin and insulin-like growth factors (IGFs) are key regulators of metabolism and growth. Recent evidences suggest a key role of these pathways in non-classical tissues and the metabolic pathways by which these hormones exert their effects in neoplasia is unclear.

Aims

To study insulin/IGFs pathways in drug sensitive and resistant cancer cells representing breast cancer (MCF-7), osteosarcoma (SaOS-2), and ovarian cancer (A2780) and to examine the effect of Sirtuin-7 (Sirt7) inhibition on insulin/IGFs pathways in MCF-7 cell line.

Methods

Drug resistant cells were generated by continuous incubation of parental cell lines with stepwise increases in Doxorubicin or Cisplatin over a period of 3 to 6 months. MCF-7 cells were transfected with cloned hairpin siRNA template for Sirt7 using the Amaxa GmbH transfection system. mRNA expression of Sirt7, INSR, IRS-1, IRS-2, IRS-4, IGF-1, IGF-2, MDR-1, MRP-1, BCRP was measured by qPCR and Sirt7 by standard Western blotting. FITC-insulin uptake was imaged with Leica Confocal Microscope.

Results

Insulin receptor (INSR), insulin receptor substrate-1 (IRS-1) were inhibited in drug-induced resistance, whereas IRS-2 was significantly induced in all the chemoresistant cells tested when compared to their parental counterparts. IGF-1 and IGF-2 were also upregulated in all the drug resistant cells tested. Sirt7 was significantly reduced in all chemoresistant cells tested. Knockdown of Sirt7 expression in human breast MCF-7 cell line by siRNA induced premature senescence-like phenotype and multi-drug resistance, suggesting that this gene may play an active role in regulating cancer cell response to stress. Suppression of Sirt7 selectively inhibited INSR and IRS-1, whereas it had minimal effect on that of IRS-2. Sirt7 suppression in MCF-7 also inhibited insulin uptake. Additionally, Sirt7 inhibition upregulated IGF-1, IGF-2 and IGFR expression.

Conclusion

Our data demonstrate that stress-induced Sirt7 inhibition significantly increases stress resistance and modulates insulin/IGF-1 signaling pathways. More importantly, this study links Sir2 family proteins to insulin/IGF signaling in drug-induced stress resistance in neoplasia.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1135426681234493.",2014-05-22 +23900188,Indel and Carryforward Correction (ICC): a new analysis approach for processing 454 pyrosequencing data.,"

Motivation

Pyrosequencing technology provides an important new approach to more extensively characterize diverse sequence populations and detect low frequency variants. However, the promise of this technology has been difficult to realize, as careful correction of sequencing errors is crucial to distinguish rare variants (∼1%) in an infected host with high sensitivity and specificity.

Results

We developed a new approach, referred to as Indel and Carryforward Correction (ICC), to cluster sequences without substitutions and locally correct only indel and carryforward sequencing errors within clusters to ensure that no rare variants are lost. ICC performs sequence clustering in the order of (i) homopolymer indel patterns only, (ii) indel patterns only and (iii) carryforward errors only, without the requirement of a distance cutoff value. Overall, ICC removed 93-95% of sequencing errors found in control datasets. On pyrosequencing data from a PCR fragment derived from 15 HIV-1 plasmid clones mixed at various frequencies as low as 0.1%, ICC achieved the highest sensitivity and similar specificity compared with other commonly used error correction and variant calling algorithms.

Availability and implementation

Source code is freely available for download at http://indra.mullins.microbiol.washington.edu/ICC. It is implemented in Perl and supported on Linux, Mac OS X and MS Windows.",2013-07-29 +23056181,Integrating constitutive gene expression and chemoactivity: mining the NCI60 anticancer screen.,"Studies into the genetic origins of tumor cell chemoactivity pose significant challenges to bioinformatic mining efforts. Connections between measures of gene expression and chemoactivity have the potential to identify clinical biomarkers of compound response, cellular pathways important to efficacy and potential toxicities; all vital to anticancer drug development. An investigation has been conducted that jointly explores tumor-cell constitutive NCI60 gene expression profiles and small-molecule NCI60 growth inhibition chemoactivity profiles, viewed from novel applications of self-organizing maps (SOMs) and pathway-centric analyses of gene expressions, to identify subsets of over- and under-expressed pathway genes that discriminate chemo-sensitive and chemo-insensitive tumor cell types. Linear Discriminant Analysis (LDA) is used to quantify the accuracy of discriminating genes to predict tumor cell chemoactivity. LDA results find 15% higher prediction accuracies, using ∼30% fewer genes, for pathway-derived discriminating genes when compared to genes derived using conventional gene expression-chemoactivity correlations. The proposed pathway-centric data mining procedure was used to derive discriminating genes for ten well-known compounds. Discriminating genes were further evaluated using gene set enrichment analysis (GSEA) to reveal a cellular genetic landscape, comprised of small numbers of key over and under expressed on- and off-target pathway genes, as important for a compound's tumor cell chemoactivity. Literature-based validations are provided as support for chemo-important pathways derived from this procedure. Qualitatively similar results are found when using gene expression measurements derived from different microarray platforms. The data used in this analysis is available at http://pubchem.ncbi.nlm.nih.gov/andhttp://www.ncbi.nlm.nih.gov/projects/geo (GPL96, GSE32474).",2012-10-02 +21647782,LipidomeDB data calculation environment: online processing of direct-infusion mass spectral data for lipid profiles.,"LipidomeDB Data Calculation Environment (DCE) is a web application to quantify complex lipids by processing data acquired after direct infusion of a lipid-containing biological extract, to which a cocktail of internal standards has been added, into an electrospray source of a triple quadrupole mass spectrometer. LipidomeDB DCE is located on the public Internet at http://lipidome.bcf.ku.edu:9000/Lipidomics . LipidomeDB DCE supports targeted analyses; analyte information can be entered, or pre-formulated lists of typical plant or animal polar lipid analytes can be selected. LipidomeDB DCE performs isotopic deconvolution and quantification in comparison to internal standard spectral peaks. Multiple precursor or neutral loss spectra from up to 35 samples may be processed simultaneously with data input as Excel files and output as tables viewable on the web and exportable in Excel. The pre-formulated compound lists and web access, used with direct-infusion mass spectrometry, provide a simple approach to lipidomic analysis, particularly for new users.",2011-06-07 +22406225,TB-Lineage: an online tool for classification and analysis of strains of Mycobacterium tuberculosis complex.,"This paper formulates a set of rules to classify genotypes of the Mycobacterium tuberculosis complex (MTBC) into major lineages using spoligotypes and MIRU-VNTR results. The rules synthesize prior literature that characterizes lineages by spacer deletions and variations in the number of repeats seen at locus MIRU24 (alias VNTR2687). A tool that efficiently and accurately implements this rule base is now freely available at http://tbinsight.cs.rpi.edu/run_tb_lineage.html. When MIRU24 data is not available, the system utilizes predictions made by a Naïve Bayes classifier based on spoligotype data. This website also provides a tool to generate spoligoforests in order to visualize the genetic diversity and relatedness of genotypes and their associated lineages. A detailed analysis of the application of these tools on a dataset collected by the CDC consisting of 3198 distinct spoligotypes and 5430 distinct MIRU-VNTR types from 37,066 clinical isolates is presented. The tools were also tested on four other independent datasets. The accuracy of automated classification using both spoligotypes and MIRU24 is >99%, and using spoligotypes alone is >95%. This online rule-based classification technique in conjunction with genotype visualization provides a practical tool that supports surveillance of TB transmission trends and molecular epidemiological studies.",2012-03-03 +21372081,Mixtures of common t-factor analyzers for clustering high-dimensional microarray data.,"

Motivation

Mixtures of factor analyzers enable model-based clustering to be undertaken for high-dimensional microarray data, where the number of observations n is small relative to the number of genes p. Moreover, when the number of clusters is not small, for example, where there are several different types of cancer, there may be the need to reduce further the number of parameters in the specification of the component-covariance matrices. A further reduction can be achieved by using mixtures of factor analyzers with common component-factor loadings (MCFA), which is a more parsimonious model. However, this approach is sensitive to both non-normality and outliers, which are commonly observed in microarray experiments. This sensitivity of the MCFA approach is due to its being based on a mixture model in which the multivariate normal family of distributions is assumed for the component-error and factor distributions.

Results

An extension to mixtures of t-factor analyzers with common component-factor loadings is considered, whereby the multivariate t-family is adopted for the component-error and factor distributions. An EM algorithm is developed for the fitting of mixtures of common t-factor analyzers. The model can handle data with tails longer than that of the normal distribution, is robust against outliers and allows the data to be displayed in low-dimensional plots. It is applied here to both synthetic data and some microarray gene expression data for clustering and shows its better performance over several existing methods.

Availability

The algorithms were implemented in Matlab. The Matlab code is available at http://blog.naver.com/aggie100.",2011-03-03 +23823934,Interaction of PICK1 with C-terminus of growth hormone-releasing hormone receptor (GHRHR) modulates trafficking and signal transduction of human GHRHR.,"Release of growth hormone (GH) from the somatotroph is regulated by binding GH-releasing hormone (GHRH) to its cognate receptor (GHRHR), one of the members of the G protein-coupled receptor (GPCR) superfamily. Proteins bound to the carboxy (C)-terminus of GPCR have been reported to regulate intracellular trafficking and function of the receptor; however, no functionally significant protein associated with GHRHR has been reported. We have identified a protein interacting with C-kinase 1 (PICK1) as a binding partner of GHRHR. In vitro binding assay revealed the PDZ-domain of PICK1 and the last four amino acid residues of GHRHR were prerequisite for the interaction. Further, in vivo association of these proteins was confirmed. Immunostaining data of a stable cell line expressing GHRHR with or without PICK1 suggested the C-terminus of GHRHR promoted cell surface expression of GHRHR and PICK1 affected the kinetics of the cell surface expression of GHRHR. Furthermore, cAMP production assay showed the C-terminus of GHRHR is involved in the regulation of receptor activation, and the interaction of GHRHR with PICK1 may influence intensities of the signal response after ligand stimulation. Thus, the interaction of the C-terminus of GHRHR with PICK1 has a profound role in regulating the trafficking and the signaling of GHRHR. [Supplementary Figure: available only at http://dx.doi.org/10.1254/jphs.12287FP].",2013-07-02 +21621752,"Complete (1)H and (13)C NMR chemical shift assignments of mono-, di-, and trisaccharides as basis for NMR chemical shift predictions of polysaccharides using the computer program casper.","The computer program casper uses (1)H and (13)C NMR chemical shift data of mono- to trisaccharides for the prediction of chemical shifts of oligo- and polysaccharides. In order to improve the quality of these predictions the (1)H and (13)C, as well as (31)P when applicable, NMR chemical shifts of 30 mono-, di-, and trisaccharides were assigned. The reducing sugars gave two distinct sets of NMR resonances due to the α- and β-anomeric forms. In total 35 (1)H and (13)C NMR chemical shift data sets were obtained from the oligosaccharides. One- and two-dimensional NMR experiments were used for the chemical shift assignments and special techniques were employed in some cases such as 2D (1)H,(13)C-HSQC Hadamard Transform methodology which was acquired approximately 45 times faster than a regular t(1) incremented (1)H,(13)C-HSQC experiment and a 1D (1)H,(1)H-CSSF-TOCSY experiment which was able to distinguish spin-systems in which the target protons were only 3.3Hz apart. The (1)H NMR chemical shifts were subsequently refined using total line-shape analysis with the PERCH NMR software. The acquired NMR data were then utilized in the casper program (http://www.casper.organ.su.se/casper/) for NMR chemical shift predictions of the O-antigen polysaccharides from Klebsiella O5, Shigella flexneri serotype X, and Salmonella arizonae O62. The data were compared to experimental data of the polysaccharides from the two former strains and the lipopolysaccharide of the latter strain showing excellent agreement between predicted and experimental (1)H and (13)C NMR chemical shifts.",2011-05-04 +22950075,First Korean case of Robinsoniella peoriensis bacteremia in a patient with aspiration pneumonia.,"Robinsoniella peoriensis has recently been identified as a Gram-positive, spore-forming, anaerobic rod originally recovered from swine manure storage pits. To date, 6 cases of R. peoriensis infection have been reported, including 2 cases of bacteremia, 1 of abdominal fluid collection, and 3 of wound infection. In the present study, we report a 76-yr-old man with R. peoriensis bacteremia who developed aspiration pneumonia. Gram staining of a purified colony revealed Gram-positive, rod-shaped bacteria. Biochemical identification using API 20 A (bioMérieux, France) indicated presence of Clostridium spp. We performed both 500-bp and full-gene sequencing of 16S rRNA of the isolate. The sequence was analyzed with MicroSeq ID 16S rRNA Library v2.0 (Applied Biosystems, USA), GenBank Basic Local Alignment Search Tool (BLAST) (http://www.ncbi.nlm.nih.gov/genbank), and EzTaxon database v2.1 (http://www.eztaxon.org). The 500-bp 16S rRNA sequence of the blood culture isolate showed 99.16-99.79% similarity with R. peoriensis and the full-gene 16S rRNA sequence showed 98.87-99.50% similarity with R. peoriensis. The organism was confirmed as R. peoriensis by using all of the mentioned databases except for MicroSeq, which did not include the RNA sequence of this bacterium. This case suggests that identification of R. peoriensis might be challenging in clinical laboratories with no access to molecular methods, as certain commercial identification systems may not identify, or may misidentify, this organism. To the best of our knowledge, this is the first report of the isolation of R. peoriensis in Korea.",2012-08-13 +22917656,dbDiarrhea: the database of pathogen proteins and vaccine antigens from diarrheal pathogens.,"Diarrhea occurs world-wide and is most commonly caused by gastrointestinal infections which kill around 2.2 million people globally each year, mostly children in developing countries. We describe here dbDiarrhea, which is currently the most comprehensive catalog of proteins implicated in the pathogenesis of diarrhea caused by major bacterial, viral and parasitic species. The current release of the database houses 820 proteins gleaned through an extensive and critical survey of research articles from PubMed. The major contributors to this compendium of proteins are Escherichia coli and Salmonella enterica. These proteins are classified into different categories such as Type III secretion system effectors, Type III secretion system components, and Pathogen proteins. There is another complementary module called 'Host proteins'. dbDiarrhea also serves as a repository of the research articles describing (1) trials of subunit and whole organism vaccines (2) high-throughput screening of Type III secretion system inhibitors and (3) diagnostic assays, for various diarrheal pathogens. The database is web accessible through an intuitive user interface that allows querying proteins and research articles for different organism, keywords and accession number. Besides providing the search facility through browsing, the database supports sequence similarity search with the BLAST tool. With the rapidly burgeoning global burden of the diarrhea, we anticipate that this database would serve as a source of useful information for furthering research on diarrhea. The database can be freely accessed at http://www.juit.ac.in/attachments/dbdiarrhea/diarrhea_home.html.",2012-08-13 +22743227,PurityEst: estimating purity of human tumor samples using next-generation sequencing data.,"

Unlabelled

We developed a novel algorithm, PurityEst, to infer the tumor purity level from the allelic differential representation of heterozygous loci with somatic mutations in a human tumor sample with a matched normal tissue using next-generation sequencing data. We applied our tool to a whole cancer genome sequencing datasets and demonstrated the accuracy of PurityEst compared with DNA copy number-based estimation.

Availability

PurityEst has been implemented in PERL and is available at http://odin.mdacc.tmc.edu/~xsu1/PurityEst.html.",2012-06-28 +21861884,Biological interaction networks are conserved at the module level.,"

Background

Orthologous genes are highly conserved between closely related species and biological systems often utilize the same genes across different organisms. However, while sequence similarity often implies functional similarity, interaction data is not well conserved even for proteins with high sequence similarity. Several recent studies comparing high throughput data including expression, protein-protein, protein-DNA, and genetic interactions between close species show conservation at a much lower rate than expected.

Results

In this work we collected comprehensive high-throughput interaction datasets for four model organisms (S. cerevisiae, S. pombe, C. elegans, and D. melanogaster) and carried out systematic analyses in order to explain the apparent lower conservation of interaction data when compared to the conservation of sequence data. We first showed that several previously proposed hypotheses only provide a limited explanation for such lower conservation rates. We combined all interaction evidences into an integrated network for each species and identified functional modules from these integrated networks. We then demonstrate that interactions that are part of functional modules are conserved at much higher rates than previous reports in the literature, while interactions that connect between distinct functional modules are conserved at lower rates.

Conclusions

We show that conservation is maintained between species, but mainly at the module level. Our results indicate that interactions within modules are much more likely to be conserved than interactions between proteins in different modules. This provides a network based explanation to the observed conservation rates that can also help explain why so many biological processes are well conserved despite the lower levels of conservation for the interactions of proteins participating in these processes.Accompanying website: http://www.sb.cs.cmu.edu/CrossSP.",2011-08-23 +22530078,A decade in gastric cancer curative surgery: Evidence of progress (1999-2009).,"To investigate the progress in evidence-based surgical treatment of non-metastatic gastric cancer, we reviewed the last ten years' literature. The data used in this review were identified by searches made on MEDLINE, Current Contents, PubMed, and other references taken from relevant original articles (on prospective and retrospective studies) concerning gastric cancer surgery. Only papers published in English between January 1999 and December 2009 were selected. Data from ongoing studies were obtained in December 2009, from the trials registry of the United States National Institutes of Health (http://www.clinicaltrial.gov). The citations list was presented according to evidence based relevance (i.e., randomized controlled trials, prospective studies, retrospective series). In the last ten years, many challenges have been faced relating to the extension of gastric resection and nodal dissection as well as surgical timing, but we found only limited evidence, regardless of latitude of study. The ongoing phase-III trials may provide answers that will be valid for the coming decades, and which may bring definitive answers for the currently unresolved questions.",2012-03-01 +21330288,Sensitive gene fusion detection using ambiguously mapping RNA-Seq read pairs.,"

Motivation

Paired-end whole transcriptome sequencing provides evidence for fusion transcripts. However, due to the repetitiveness of the transcriptome, many reads have multiple high-quality mappings. Previous methods to find gene fusions either ignored these reads or required additional longer single reads. This can obscure up to 30% of fusions and unnecessarily discards much of the data.

Results

We present a method for using paired-end reads to find fusion transcripts without requiring unique mappings or additional single read sequencing. Using simulated data and data from tumors and cell lines, we show that our method can find fusions with ambiguously mapping read pairs without generating numerous spurious fusions from the many mapping locations.

Availability

A C++ and Python implementation of the method demonstrated in this article is available at http://exon.ucsd.edu/ShortFuse.

Contact

mckinsel@ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-16 +21278189,Conveyor: a workflow engine for bioinformatic analyses.,"

Motivation

The rapidly increasing amounts of data available from new high-throughput methods have made data processing without automated pipelines infeasible. As was pointed out in several publications, integration of data and analytic resources into workflow systems provides a solution to this problem, simplifying the task of data analysis. Various applications for defining and running workflows in the field of bioinformatics have been proposed and published, e.g. Galaxy, Mobyle, Taverna, Pegasus or Kepler. One of the main aims of such workflow systems is to enable scientists to focus on analysing their datasets instead of taking care for data management, job management or monitoring the execution of computational tasks. The currently available workflow systems achieve this goal, but fundamentally differ in their way of executing workflows.

Results

We have developed the Conveyor software library, a multitiered generic workflow engine for composition, execution and monitoring of complex workflows. It features an open, extensible system architecture and concurrent program execution to exploit resources available on modern multicore CPU hardware. It offers the ability to build complex workflows with branches, loops and other control structures. Two example use cases illustrate the application of the versatile Conveyor engine to common bioinformatics problems.

Availability

The Conveyor application including client and server are available at http://conveyor.cebitec.uni-bielefeld.de.",2011-01-28 +22537044,KISSPLICE: de-novo calling alternative splicing events from RNA-seq data.,"

Background

In this paper, we address the problem of identifying and quantifying polymorphisms in RNA-seq data when no reference genome is available, without assembling the full transcripts. Based on the fundamental idea that each polymorphism corresponds to a recognisable pattern in a De Bruijn graph constructed from the RNA-seq reads, we propose a general model for all polymorphisms in such graphs. We then introduce an exact algorithm, called KISSPLICE, to extract alternative splicing events.

Results

We show that KISSPLICE enables to identify more correct events than general purpose transcriptome assemblers. Additionally, on a 71 M reads dataset from human brain and liver tissues, KISSPLICE identified 3497 alternative splicing events, out of which 56% are not present in the annotations, which confirms recent estimates showing that the complexity of alternative splicing has been largely underestimated so far.

Conclusions

We propose new models and algorithms for the detection of polymorphism in RNA-seq data. This opens the way to a new kind of studies on large HTS RNA-seq datasets, where the focus is not the global reconstruction of full-length transcripts, but local assembly of polymorphic regions. KISSPLICE is available for download at http://alcovna.genouest.org/kissplice/.",2012-04-19 +21330289,Defining an informativeness metric for clustering gene expression data.,"

Motivation

Unsupervised 'cluster' analysis is an invaluable tool for exploratory microarray data analysis, as it organizes the data into groups of genes or samples in which the elements share common patterns. Once the data are clustered, finding the optimal number of informative subgroups within a dataset is a problem that, while important for understanding the underlying phenotypes, is one for which there is no robust, widely accepted solution.

Results

To address this problem we developed an 'informativeness metric' based on a simple analysis of variance statistic that identifies the number of clusters which best separate phenotypic groups. The performance of the informativeness metric has been tested on both experimental and simulated datasets, and we contrast these results with those obtained using alternative methods such as the gap statistic.

Availability

The method has been implemented in the Bioconductor R package attract; it is also freely available from http://compbio.dfci.harvard.edu/pubs/attract_1.0.1.zip.

Contact

jess@jimmy.harvard.edu; johnq@jimmy.harvard.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-16 +22912585,Dissecting the gene network of dietary restriction to identify evolutionarily conserved pathways and new functional genes.,"Dietary restriction (DR), limiting nutrient intake from diet without causing malnutrition, delays the aging process and extends lifespan in multiple organisms. The conserved life-extending effect of DR suggests the involvement of fundamental mechanisms, although these remain a subject of debate. To help decipher the life-extending mechanisms of DR, we first compiled a list of genes that if genetically altered disrupt or prevent the life-extending effects of DR. We called these DR-essential genes and identified more than 100 in model organisms such as yeast, worms, flies, and mice. In order for other researchers to benefit from this first curated list of genes essential for DR, we established an online database called GenDR (http://genomics.senescence.info/diet/). To dissect the interactions of DR-essential genes and discover the underlying lifespan-extending mechanisms, we then used a variety of network and systems biology approaches to analyze the gene network of DR. We show that DR-essential genes are more conserved at the molecular level and have more molecular interactions than expected by chance. Furthermore, we employed a guilt-by-association method to predict novel DR-essential genes. In budding yeast, we predicted nine genes related to vacuolar functions; we show experimentally that mutations deleting eight of those genes prevent the life-extending effects of DR. Three of these mutants (OPT2, FRE6, and RCR2) had extended lifespan under ad libitum, indicating that the lack of further longevity under DR is not caused by a general compromise of fitness. These results demonstrate how network analyses of DR using GenDR can be used to make phenotypically relevant predictions. Moreover, gene-regulatory circuits reveal that the DR-induced transcriptional signature in yeast involves nutrient-sensing, stress responses and meiotic transcription factors. Finally, comparing the influence of gene expression changes during DR on the interactomes of multiple organisms led us to suggest that DR commonly suppresses translation, while stimulating an ancient reproduction-related process.",2012-08-09 +21700673,A rank-based statistical test for measuring synergistic effects between two gene sets.,"

Motivation

Due to recent advances in high-throughput technologies, data on various types of genomic annotation have accumulated. These data will be crucially helpful for elucidating the combinatorial logic of transcription. Although several approaches have been proposed for inferring cooperativity among multiple factors, most approaches are haunted by the issues of normalization and threshold values.

Results

In this article, we propose a rank-based non-parametric statistical test for measuring the effects between two gene sets. This method is free from the issues of normalization and threshold value determination for gene expression values. Furthermore, we have proposed an efficient Markov chain Monte Carlo method for calculating an approximate significance value of synergy. We have applied this approach for detecting synergistic combinations of transcription factor binding motifs and histone modifications.

Availability

C implementation of the method is available from http://www.hgc.jp/~yshira/software/rankSynergy.zip.

Contact

yshira@hgc.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-23 +24641834,Levels of HOXB7 and miR-337 in pancreatic ductal adenocarcinoma patients.,"

Background

Many studies have revealed that homeobox-B7 (HOXB7) and miR-337 play important roles in different types of human cancers. However, the relationship of HOXB7 and miR-337 in PDAC with clinicopathological factors has not yet been examined and their biological roles remain to be explored.

Methods

Using quantitative real-time RT-PCR and immunohistochemical staining, the expression of HOXB7 mRNA, miR-337, and HOXB7 protein in 44 PDAC samples was detected. Survival curves were made using follow-up data. The relationship between clinical or pathological characteristics and the prognosis was analyzed.

Results

The expression levels of HOXB7 mRNA and HOXB7 protein were significantly elevated in PDAC samples than that in non-malignant adjacent tissues. There were obvious differences in HOXB7 mRNA and proteins between tumors of different diameters, differentiation, TNM stage, and lymph node status. The level of miR-337 was markedly lower in tumor samples than in non-malignant adjacent tissues. The expression of miR-337 was related to TNM stage and lymph node status. There were significant differences in survival curves between patients with tumors <4 cm in diameter and patients with tumors ≥4 cm, among groups of well, moderately, and poorly differentiated tumors, between groups with TNM stages I, II and III or IV, between groups with metastatic lymph nodes and non-metastatic lymph nodes, among groups of HOXB7 protein expression negative (or weak) and positive, between groups with low levels of miR-337 expression and with high levels of miR-337 expression. The levels of HOXB7 mRNA, HOXB7 protein, and miR-337 were found to be associated with longer survival.

Conclusion

The present study showed that HOXB7 was over-expressed and miR-337 was minimally expressed in PDAC tissues, and their levels were related to TNM stage and lymph node status. The levels of HOXB7 mRNA, HOXB7 protein, and miR-337 were associated with survival in PDAC patients. Results suggested that HOXB7 and miR-337 could be used as determinants of PDAC patient prognosis.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1509730773118658.",2014-03-18 +24152881,Increased expression of α-methylacyl-coenzyme A racemase (AMACR; p504s) and p16 in distal hyperplastic polyps.,"

Background

Hyperplastic polyps (HP) and sessile serrated adenomas (SSA) share morphological similarities. In this immunohistochemical study we chose a panel of potential relevant and promising biomarkers including α-methylacyl-coenzyme A racemase (AMACR; p504s), which is involved in the degradation of branched chained fatty acids derivates, and analysed a cohort of HPs and SSAs in order to identify different immunophenotypes in relation to lesion localisation.

Methods

154 specimen were carefully selected and a micro tissue array (TMA) was constructed. Immunohistochemistry of p16Ink4a, Ki67, α-methylacyl-coenzyme A racemase (AMACR; p504s), BRAF, CK 20, MLH1 and β-catenin was performed and and immunoexpression was compared among proximal and distal HPs as well as SSAs.

Results

None of the markers revealed a differential expression among HPs and SSAs. However, the study demonstrates a significant overexpression of AMACR (p = 0.004) and p16Ink4a (p = 0.028) in distal HPs compared to proximal HPs. In addition AMACR overexpression was associated with increased p16Ink4a immunoexpression (p < 0.001).

Conclusions

In this study we describe differential AMACR and p16Ink4a in HPs in relation to their localisation. Distal HPs were characterized by AMACR and p16Ink4a overexpression in contrast to proximal HPs, although morphological identically. Thus AMACR overexpression points towards a pathobiological relevance of the protein in distal HPs. In context of recently published data this suggest distal HPs as potential precursor lesions of certain adenoma subtypes. However, at this point of time this finding remains speculative and needs to be confirmed by further studies.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1836116001066768.",2013-10-23 +23626661,AAPL: Assessing Association between P-value Lists.,"Joint analyses of high-throughput datasets generate the need to assess the association between two long lists of p-values. In such p-value lists, the vast majority of the features are insignificant. Ideally contributions of features that are null in both tests should be minimized. However, by random chance their p-values are uniformly distributed between zero and one, and weak correlations of the p-values may exist due to inherent biases in the high-throughput technology used to generate the multiple datasets. Rank-based agreement test may capture such unwanted effects. Testing contingency tables generated using hard cutoffs may be sensitive to arbitrary threshold choice. We develop a novel method based on feature-level concordance using local false discovery rate. The association score enjoys straight-forward interpretation. The method shows higher statistical power to detect association between p-value lists in simulation. We demonstrate its utility using real data analysis. The R implementation of the method is available at http://userwww.service.emory.edu/~tyu8/AAPL/.",2013-04-01 +23140350,PanelComposer: a web-based panel construction tool for multivariate analysis of disease biomarker candidates.,"Measuring and evaluating diagnostic efficiency is important in biomarker discovery and validation. The receiver operating characteristic (ROC) curve is a graphical plot for assessing the performance of a classifier or predictor that can be used to test the sensitivity and specificity of diagnostic biomarkers. In this study, we describe PanelComposer, a Web-based software tool that uses statistical results from proteomic expression data and validates biomarker candidates based on ROC curves and the area under the ROC curve (AUC) values using a logistic regression model and provides an ordered list that includes ROC graphs and AUC values for proteins (individually or in combination). This tool allows users to easily compare and assess the effectiveness and diagnostic efficiency of single or multiprotein biomarker candidates. PanelComposer is available publicly at http://panelcomposer.proteomix.org/ and is compatible with major Web browsers.",2012-11-16 +24636669,Histopathological features of bone regeneration in a canine segmental ulnar defect model.,"

Background

Today, finding an ideal biomaterial to treat the large bone defects, delayed unions and non-unions remains a challenge for orthopaedic surgeions and researchers. Several studies have been carried out on the subject of bone regeneration, each having its own advantages. The present study has been designed in vivo to evaluate the effects of cellular auto-transplantation of tail vertebrae on healing of experimental critical bone defect in a dog model.

Methods

Six indigenous breeds of dog with 32 ± 3.6 kg average weight from both sexes (5 males and 1 female) received bilateral critical-sized ulnar segmental defects. After determining the health condition, divided to 2 groups: The Group I were kept as control I (n = 1) while in Group II (experimental group; n = 5) bioactive bone implants were inserted. The defects were implanted with either autogeneic coccygeal bone grafts in dogs with 3-4 cm diaphyseal defects in the ulna. Defects were stabilized with internal plate fixation, and the control defects were not stabilized. Animals were euthanized at 16 weeks and analyzed by histopathology.

Results

Histological evaluation of this new bone at sixteen weeks postoperatively revealed primarily lamellar bone, with the formation of new cortices and normal-appearing marrow elements. And also reformation cortical compartment and reconstitution of marrow space were observed at the graft-host interface together with graft resorption and necrosis responses. Finally, our data were consistent with the osteoconducting function of the tail autograft.

Conclusions

Our results suggested that the tail vertebrae autograft seemed to be a new source of autogenous cortical bone in order to supporting segmental long bone defects in dogs. Furthermore, cellular autotransplantation was found to be a successful replacement for the tail vertebrae allograft bone at 3-4 cm segmental defects in the canine mid- ulna. Clinical application using graft expanders or bone autotransplantation should be used carefully and requires further investigation.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2028232688119271.",2014-03-17 +22871885,Nomogram prediction for overall survival of patients diagnosed with cervical cancer.,"

Background

Nomograms are predictive tools that are widely used for estimating cancer prognosis. The aim of this study was to develop a nomogram for the prediction of overall survival (OS) in patients diagnosed with cervical cancer.

Methods

Cervical cancer databases of two large institutions were analysed. Overall survival was defined as the clinical endpoint and OS probabilities were estimated using the Kaplan-Meier method. Based on the results of survival analyses and previous studies, relevant covariates were identified, a nomogram was constructed and validated using bootstrap cross-validation. Discrimination of the nomogram was quantified with the concordance probability.

Results

In total, 528 consecutive patients with invasive cervical cancer, who had all nomogram variables available, were identified. Mean 5-year OS rates for patients with International Federation of Gynecologists and Obstetricians (FIGO) stage IA, IB, II, III, and IV were 99.0%, 88.6%, 65.8%, 58.7%, and 41.5%, respectively. Seventy-six cancer-related deaths were observed during the follow-up period. FIGO stage, tumour size, age, histologic subtype, lymph node ratio, and parametrial involvement were selected as nomogram covariates. The prognostic performance of the model exceeded that of FIGO stage alone and the model's estimated optimism-corrected concordance probability was 0.723, indicating accurate prediction of OS. We present the prediction model as nomogram and provide a web-based risk calculator (http://www.ccc.ac.at/gcu).

Conclusion

Based on six easily available parameters, a novel statistical model to predict OS of patients diagnosed with cervical cancer was constructed and validated. The model was implemented in a nomogram and provides accurate prediction of individual patients' prognosis useful for patient counselling and deciding on follow-up strategies.",2012-08-07 +22866914,"Cancer support services--are they appropriate and accessible for Indigenous cancer patients in Queensland, Australia?","

Introduction

In Queensland, Australia, the incidence of cancer (all cancers combined) is 21% lower for Indigenous people compared with non-Indigenous people but mortality is 36% higher. Support services play an important role in helping cancer patients through their cancer journey. Indigenous cancer patients are likely to face greater unmet supportive care needs and more barriers to accessing cancer care and support. Other barriers include the higher proportion of Indigenous people who live remotely and in regional areas, a known difficulty for access to health services. This study describes the availability of cancer support services in Queensland for Indigenous patients and relevant location.

Methods

Using a set criteria 121 services were selected from a pre-existing database (n = 344) of cancer services. These services were invited to complete an online questionnaire. ArcGIS (http://www.esri.com/software/arcgis/index.html) was used to map the services' location (using postcode) against Indigenous population by local government area. Services were classified as an 'Indigenous' or 'Indigenous friendly' service using set criteria.

Results

Eighty-three services (73.6%) completed the questionnaire. Mapping revealed services are located where there are relatively low percentages of Indigenous people compared with the whole population. No 'Indigenous-specific' services were identified; however, 11 services (13%) were classed 'Indigenous-friendly'. The primary support offered by these services was 'information'. Fewer referrals were received from Indigenous liaison officers compared with other health professionals. Only 8.6% of services reported frequently having contact with an Indigenous organisation; however, 44.6% of services reported that their staff participated in cultural training. Services also identified barriers to access which may exist for Indigenous clientele, including no Indigenous staff and the costs involved in accessing the service, but were unable to address these issues due to restricted staff and funding capacity.

Conclusion

Further research into the best models for providing culturally appropriate cancer support services to Indigenous people is essential to ensure Indigenous patients are well supported throughout their cancer journey. Emphasis should be placed on providing support services where a high Indigenous population percentage resides to ensure support is maintained in rural and remote settings. Further efforts should be placed on relationships with Indigenous organisations and mainstream support services and encouraging referral from Indigenous liaison officers.",2012-08-07 +22871314,"Analysis of Babesia bovis infection-induced gene expression changes in larvae from the cattle tick, Rhipicephalus (Boophilus) microplus.","

Background

Cattle babesiosis is a tick-borne disease of cattle that has severe economic impact on cattle producers throughout the world's tropical and subtropical countries. The most severe form of the disease is caused by the apicomplexan, Babesia bovis, and transmitted to cattle through the bite of infected cattle ticks of the genus Rhipicephalus, with the most prevalent species being Rhipicephalus (Boophilus) microplus. We studied the reaction of the R. microplus larval transcriptome in response to infection by B. bovis.

Methods

Total RNA was isolated for both uninfected and Babesia bovis-infected larval samples. Subtracted libraries were prepared by subtracting the B. bovis-infected material with the uninfected material, thus enriching for expressed genes in the B. bovis-infected sample. Expressed sequence tags from the subtracted library were generated, assembled, and sequenced. To complement the subtracted library method, differential transcript expression between samples was also measured using custom high-density microarrays. The microarray probes were fabricated using oligonucleotides derived from the Bmi Gene Index database (Version 2). Array results were verified for three target genes by real-time PCR.

Results

Ticks were allowed to feed on a B. bovis-infected splenectomized calf and on an uninfected control calf. RNA was purified in duplicate from whole larvae and subtracted cDNA libraries were synthesized from Babesia-infected larval RNA, subtracting with the corresponding uninfected larval RNA. One thousand ESTs were sequenced from the larval library and the transcripts were annotated. We used a R. microplus microarray designed from a R. microplus gene index, BmiGI Version 2, to look for changes in gene expression that were associated with infection of R. microplus larvae. We found 24 transcripts were expressed at a statistically significant higher level in ticks feeding upon a B. bovis-infected calf contrasted to ticks feeding on an uninfected calf. Six transcripts were expressed at a statistically significant lower level in ticks feeding upon a B. bovis-infected calf contrasted to ticks feeding on an uninfected calf.

Conclusion

Our experimental approaches yielded specific differential gene expression associated with the infection of R. microplus by B. bovis. Overall, an unexpectedly low number of transcripts were found to be differentially expressed in response to B. bovis infection. Although the BmiGI Version 2 gene index (http://compbio.dfci.harvard.edu/tgi/cgi-bin/tgi/gimain.pl?gudb=b_microplus) was a useful database to help assign putative function to some transcripts, a majority of the differentially expressed transcripts did not have annotation that was useful for assignment of function and specialized bioinformatic approaches were necessary to increase the information from these transcriptome experiments.",2012-08-07 +24144008,Variable copy number of mitochondrial DNA (mtDNA) predicts worse prognosis in advanced gastric cancer patients.,"

Background

Change of mitochondrial DNA (mtDNA) copy number is widely reported in various human cancers, including gastric cancer, and is considered to be an important hallmark of cancers. However, there is remarkably little consensus on the value of variable mtDNA content in the prognostic evaluation of this cancer.

Methods

Using real-time quantitative PCR approach, we examined mtDNA copy number in a cohort of gastric cancers and normal gastric tissues, and explored the association of variable mtDNA content with clinical outcomes of gastric cancer patients.

Results

Our data showed that the majority of gastric cancer patients had low mtDNA content as compared to control subjects although the relative mean mtDNA content was higher in the former than the latter. Moreover, we found that variable mtDNA content was strongly associated with lymph node metastasis and cancer-related death of the patients with late-stage tumors. Notably, variable mtDNA content did not affect overall survival of gastric cancer patients, however, we found that increased mtDNA content was associated with poor survival in the patients with late-stage tumors.

Conclusion

In this study, we demonstrated that variable mtDNA content markedly increased the risk of lymph node metastasis and high mortality of the patients with late-stage tumors. Additionally, we found a strong link between increased mtDNA content and worse survival of the patients with late-stage tumors. Taken together, variable mtDNA content may be a valuable poor prognostic factor for advanced gastric cancer patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1344721463103353.",2013-10-21 +21551139,MPEA--metabolite pathway enrichment analysis.,"

Unlabelled

We present metabolite pathway enrichment analysis (MPEA) for the visualization and biological interpretation of metabolite data at the system level. Our tool follows the concept of gene set enrichment analysis (GSEA) and tests whether metabolites involved in some predefined pathway occur towards the top (or bottom) of a ranked query compound list. In particular, MPEA is designed to handle many-to-many relationships that may occur between the query compounds and metabolite annotations. For a demonstration, we analysed metabolite profiles of 14 twin pairs with differing body weights. MPEA found significant pathways from data that had no significant individual query compounds, its results were congruent with those discovered from transcriptomics data and it detected more pathways than the competing metabolic pathway method did.

Availability

The web server and source code of MPEA are available at http://ekhidna.biocenter.helsinki.fi/poxo/mpea/.",2011-05-05 +22753090,"Associations among genotype, clinical phenotype, and intracellular localization of trafficking proteins in ARC syndrome.","Arthrogryposis-renal dysfunction-cholestasis (ARC) syndrome is a rare autosomal recessive multisystem disorder caused by mutations in vacuolar protein sorting 33 homologue B (VPS33B) and VPS33B interacting protein, apical-basolateral polarity regulator (VIPAR). Cardinal features of ARC include congenital joint contractures, renal tubular dysfunction, cholestasis, severe failure to thrive, ichthyosis, and a defect in platelet alpha-granule biogenesis. Most patients with ARC do not survive past the first year of life. We report two patients presenting with a mild ARC phenotype, now 5.5 and 3.5 years old. Both patients were compound heterozygotes with the novel VPS33B donor splice-site mutation c.1225+5G>C in common. Immunoblotting and complementary DNA analysis suggest expression of a shorter VPS33B transcript, and cell-based assays show that c.1225+5G>C VPS33B mutant retains some ability to interact with VIPAR (and thus partial wild-type function). This study provides the first evidence of genotype-phenotype correlation in ARC and suggests that VPS33B c.1225+5G>C mutation predicts a mild ARC phenotype. We have established an interactive online database for ARC (https://grenada.lumc.nl/LOVD2/ARC) comprising all known variants in VPS33B and VIPAR. Also included in the database are 15 novel pathogenic variants in VPS33B and five in VIPAR.",2012-08-06 +22949831,High throughput mining and characterization of microsatellites from common carp genome.,"In order to supply sufficient microsatellite loci for high-density linkage mapping, whole genome shotgun (WGS) sequences of the common carp (Cyprinus carpio) were assembled and surveyed for microsatellite identification. A total of 79,014 microsatellites were collected which were harbored in 68,827 distinct contig sequences. These microsatellites were characterized in the common carp genome. Information of all microsatellites, including previously published BAC-based microsatellites, was then stored in a MySQL database, and a web-based database interface (http://genomics.cafs.ac.cn/ssrdb) was built for public access and download. A total of 3,110 microsatellites, including 1,845 from WGS and 1,265 from BAC end sequences (BES), were tested and genotyped on a mapping family with 192 individuals. A total of 963 microsatellites markers were validated with polymorphism in the mapping family. They will soon be used for high-density linkage mapping with a vast number of polymorphic SNP markers.",2012-08-06 +21208985,MBRole: enrichment analysis of metabolomic data.,"

Unlabelled

While many tools exist for performing enrichment analysis of transcriptomic and proteomic data in order to interpret them in biological terms, almost no equivalent tools exist for metabolomic data. We present Metabolite Biological Role (MBRole), a web server for carrying out over-representation analysis of biological and chemical annotations in arbitrary sets of metabolites (small chemical compounds) coming from metabolomic data of any organism or sample.

Availability and implementation

The web server is freely available at http://csbg.cnb.csic.es/mbrole. It was tested in the main web browsers.",2011-01-05 +23473666,All-atom ensemble modeling to analyze small-angle x-ray scattering of glycosylated proteins.,"The flexible and heterogeneous nature of carbohydrate chains often renders glycoproteins refractory to traditional structure determination methods. Small-angle X-ray scattering (SAXS) can be a useful tool for obtaining structural information of these systems. All-atom modeling of glycoproteins with flexible glycan chains was applied to interpret the solution SAXS data for a set of glycoproteins. For simpler systems (single glycan, with a well-defined protein structure), all-atom modeling generates models in excellent agreement with the scattering pattern and reveals the approximate spatial occupancy of the glycan chain in solution. For more complex systems (several glycan chains, or unknown protein substructure), the approach can still provide insightful models, though the orientations of glycans become poorly determined. Ab initio shape reconstructions appear to capture the global morphology of glycoproteins but in most cases offer little information about glycan spatial occupancy. The all-atom modeling methodology is available as a web server at http://salilab.org/allosmod-foxs.",2013-03-01 +24398113,Cumulative lead exposure and age at menopause in the Nurses' Health Study cohort.,"

Background

Early menopause has been associated with many adverse health outcomes, including increased risk of cardiovascular disease morbidity and mortality. Lead has been found to be adversely associated with female reproductive function, but whether exposures experienced by the general population are associated with altered age at menopause has not been explored.

Objective

Our goal was to assess the association between cumulative lead exposure and age at natural menopause.

Methods

Self-reported menopausal status and bone lead concentration measured with K-shell X-ray fluorescence-a biomarker of cumulative lead exposure-were obtained from 434 women participants in the Nurses' Health Study.

Results

The mean (± SD) age at natural menopause was 50.8 ± 3.6 years. Higher tibia lead level was associated with younger age at menopause. In adjusted analyses, the average age of menopause for women in the highest tertile of tibia lead was 1.21 years younger (95% CI: -2.08, -0.35) than for women in the lowest tertile (p-trend = 0.006). Although the number of cases was small (n = 23), the odds ratio for early menopause (< 45 years of age) was 5.30 (95% CI: 1.42, 19.78) for women in the highest tertile of tibia lead compared with those in the lowest tertile (p-trend = 0.006). There was no association between patella or blood lead and age at menopause.

Conclusions

Our results support an association between low-level cumulative lead exposure and an earlier age at menopause. These data suggest that low-level lead exposure may contribute to menopause-related health outcomes in older women through effects on age at menopause.

Citation

Eum KD, Weisskopf MG, Nie LH, Hu H, Korrick SA. 2014. Cumulative lead exposure and age at menopause in the Nurses' Health Study Cohort. Environ Health Perspect 122:229–234; http://dx.doi.org/10.1289/ehp.1206399",2014-01-07 +21636597,Robust biclustering by sparse singular value decomposition incorporating stability selection.,"

Motivation

Over the past decade, several biclustering approaches have been published in the field of gene expression data analysis. Despite of huge diversity regarding the mathematical concepts of the different biclustering methods, many of them can be related to the singular value decomposition (SVD). Recently, a sparse SVD approach (SSVD) has been proposed to reveal biclusters in gene expression data. In this article, we propose to incorporate stability selection to improve this method. Stability selection is a subsampling-based variable selection that allows to control Type I error rates. The here proposed S4VD algorithm incorporates this subsampling approach to find stable biclusters, and to estimate the selection probabilities of genes and samples to belong to the biclusters.

Results

So far, the S4VD method is the first biclustering approach that takes the cluster stability regarding perturbations of the data into account. Application of the S4VD algorithm to a lung cancer microarray dataset revealed biclusters that correspond to coregulated genes associated with cancer subtypes. Marker genes for different lung cancer subtypes showed high selection probabilities to belong to the corresponding biclusters. Moreover, the genes associated with the biclusters belong to significantly enriched cancer-related Gene Ontology categories. In a simulation study, the S4VD algorithm outperformed the SSVD algorithm and two other SVD-related biclustering methods in recovering artificial biclusters and in being robust to noisy data.

Availability

R-Code of the S4VD algorithm as well as a documentation can be found at http://s4vd.r-forge.r-project.org/.",2011-06-02 +21675294,What is the role of the school nurse in sexual health education?,"Sexual health information must be readily available to teens and delivered using both formal and informal means. By forming partnerships and sharing resources with health education teachers, social workers, guidance counselors, administrators, students, families, and the community, school nurses can improve access to information and resources to mitigate the negative consequences of early, unprotected, or forced sexual intercourse. Open communication with teens will allow them to obtain the information they need to make responsible decisions and access care when needed. For information on YRBS data for individual states, visit the CDC Youth Online website at http://apps.nccd.cdc.gov/ youthonline/App/Default.aspx. YRBS data are collected on six categories: unintentional injuries and violence, tobacco use, alcohol and other drug use, sexual behaviors, dietary behaviors, and physical inactivity. The site includes data from 1991 to 2009 and allows tables and graphs related to various health topics to be created.",2011-05-01 +22155947,M(3): an improved SNP calling algorithm for Illumina BeadArray data.,"

Summary

Genotype calling from high-throughput platforms such as Illumina and Affymetrix is a critical step in data processing, so that accurate information on genetic variants can be obtained for phenotype-genotype association studies. A number of algorithms have been developed to infer genotypes from data generated through the Illumina BeadStation platform, including GenCall, GenoSNP, Illuminus and CRLMM. Most of these algorithms are built on population-based statistical models to genotype every SNP in turn, such as GenCall with the GenTrain clustering algorithm, and require a large reference population to perform well. These approaches may not work well for rare variants where only a small proportion of the individuals carry the variant. A fundamentally different approach, implemented in GenoSNP, adopts a single nucleotide polymorphism (SNP)-based model to infer genotypes of all the SNPs in one individual, making it an appealing alternative to call rare variants. However, compared to the population-based strategies, more SNPs in GenoSNP may fail the Hardy-Weinberg Equilibrium test. To take advantage of both strategies, we propose a two-stage SNP calling procedure, named the modified mixture model (M(3)), to improve call accuracy for both common and rare variants. The effectiveness of our approach is demonstrated through applications to genotype calling on a set of HapMap samples used for quality control purpose in a large case-control study of cocaine dependence. The increase in power with M(3) is greater for rare variants than for common variants depending on the model.

Availability

M(3) algorithm: http://bioinformatics.med.yale.edu/group.

Contact

name@bio.com; hongyu.zhao@yale.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-08 +23399351,Duration of lithium treatment is a risk factor for reduced glomerular function: a cross-sectional study.,"

Background

The adverse renal effects of lithium have long been known, but glomerular insufficiency had been considered an unlikely event until recently, when new studies have raised concern regarding very long-term treatment. In this cross-sectional study, we examined glomerular function in a cohort of patients treated with lithium for up to 33 years and a control group of lithium-naïve patients treated with other mood-stabilizers.

Methods

Patients with a diagnosis of recurrent or persistent affective disorders, examined between 1 October 2007 and 31 December 2009, were screened. Demographic and clinical data were extracted from clinical charts regarding two study groups: one for patients treated with lithium for at least 12 months and the other for patients never exposed to lithium. Multivariate regression analysis was applied: the dependent variable was the estimated glomerular filtration rate (eGFR) calculated from the last available serum creatinine value using the Modification of Diet in Renal Disease Study Group equation; the following independent variables, potentially associated with renal dysfunction, were included: gender, current age, duration of lithium treatment, cigarette smoking, hypertension, diabetes and dyslipidemia.

Results

eGFRs lower than 60 ml/min were significantly more frequent in the group treated with lithium (38/139 = 27.3%) compared to lithium-naïve patients (4/70 = 5.7%) (P = 0.0002; Fisher's test). Regression analysis showed a significant effect on eGFR of age, gender and duration of lithium treatment but no effect of cigarette smoking, hypertension, diabetes or dyslipidemia. eGFR was estimated to decrease by 0.64 ml/min (95% confidence interval = 0.38 to 0.90; P = 0.00) for each year of lithium treatment.

Conclusions

The duration of lithium treatment is a risk factor for glomerular failure, in addition to advancing age. For example, all patients aged 60 years or older may be estimated to undergo Stage 3 or more severe chronic kidney disease (namely an eGFR less than 60 ml/min) if treated with lithium for 30 years. These data may be added to the current debate on the balance between the protective effects of lithium on recurrent affective disorders and suicide and the risk of renal disease.See related commentary article here http://www.biomedcentral.com/1741-7015/11/34.",2013-02-11 +22879875,Computational selection of transcriptomics experiments improves Guilt-by-Association analyses.,"The Guilt-by-Association (GBA) principle, according to which genes with similar expression profiles are functionally associated, is widely applied for functional analyses using large heterogeneous collections of transcriptomics data. However, the use of such large collections could hamper GBA functional analysis for genes whose expression is condition specific. In these cases a smaller set of condition related experiments should instead be used, but identifying such functionally relevant experiments from large collections based on literature knowledge alone is an impractical task. We begin this paper by analyzing, both from a mathematical and a biological point of view, why only condition specific experiments should be used in GBA functional analysis. We are able to show that this phenomenon is independent of the functional categorization scheme and of the organisms being analyzed. We then present a semi-supervised algorithm that can select functionally relevant experiments from large collections of transcriptomics experiments. Our algorithm is able to select experiments relevant to a given GO term, MIPS FunCat term or even KEGG pathways. We extensively test our algorithm on large dataset collections for yeast and Arabidopsis. We demonstrate that: using the selected experiments there is a statistically significant improvement in correlation between genes in the functional category of interest; the selected experiments improve GBA-based gene function prediction; the effectiveness of the selected experiments increases with annotation specificity; our algorithm can be successfully applied to GBA-based pathway reconstruction. Importantly, the set of experiments selected by the algorithm reflects the existing literature knowledge about the experiments. [A MATLAB implementation of the algorithm and all the data used in this paper can be downloaded from the paper website: http://www.paccanarolab.org/papers/CorrGene/].",2012-08-07 +22563071,BreakFusion: targeted assembly-based identification of gene fusions in whole transcriptome paired-end sequencing data.,"

Unlabelled

Despite recent progress, computational tools that identify gene fusions from next-generation whole transcriptome sequencing data are often limited in accuracy and scalability. Here, we present a software package, BreakFusion that combines the strength of reference alignment followed by read-pair analysis and de novo assembly to achieve a good balance in sensitivity, specificity and computational efficiency.

Availability

http://bioinformatics.mdanderson.org/main/BreakFusion",2012-05-04 +23055620,Competing endogenous RNA database.,"

Unlabelled

A given mRNA can be regulated by interactions with miRNAs and in turn the availability of these miRNAs can be regulated by their interactions with alternate mRNAs. The concept of regulation of a given mRNA by alternate mRNA (competing endogenous mRNA) by virtue of interactions with miRNAs through shared miRNA response elements is poised to become a fundamental genetic regulatory mechanism. The molecular basis of the mRNA-mRNA cross talks is via miRNA response elements, which can be predicted based on both molecular interaction and evolutionary conservation. By examining the co-occurrence of miRNA response elements in the mRNAs on a genome-wide basis we predict competing endogenous RNA for specific mRNAs targeted by miRNAs. Comparison of the mRNAs predicted to regulate PTEN with recently published work, indicate that the results presented within the competing endogenous RNA database (ceRDB) have biological relevance.

Availability

http://www.oncomir.umn.edu/cefinder/",2012-08-03 +22720726,miREvo: an integrative microRNA evolutionary analysis platform for next-generation sequencing experiments.,"

Background

MicroRNAs (miRNAs) are small (~19-24nt) non-coding RNAs that play important roles in various biological processes. To date, the next-generation sequencing (NGS) technology has been widely used to discover miRNAs in plants and animals. Although evolutionary analysis is important to reveal the functional dynamics of miRNAs, few computational tools have been developed to analyze the evolution of miRNA sequence and expression across species, especially the newly emerged ones,

Results

We developed miREvo, an integrated software platform with a graphical user interface (GUI), to process deep-sequencing data of small RNAs and to analyze miRNA sequence and expression evolution based on the multiple-species whole genome alignments (WGAs). Three major features are provided by miREvo: (i) to identify novel miRNAs in both plants and animals, based on a modified miRDeep algorithm, (ii) to detect miRNA homologs and measure their pairwise evolutionary distances among multiple species based on a WGA, and (iii) to profile miRNA expression abundances and analyze expression divergence across multiple species (small RNA libraries). Moreover, we demonstrated the utility of miREvo with Illumina data sets from Drosophila melanogaster and Arabidopsis, respectively.

Conclusion

This work presents an integrated pipline, miREvo, for exploring the expressional and evolutionary dynamics of miRNAs across multiple species. MiREvo is standalone, modular, and freely available at http://evolution.sysu.edu.cn/software/mirevo.htm under the GNU/GPL license.",2012-06-21 +24142950,PEAR: a fast and accurate Illumina Paired-End reAd mergeR.,"

Motivation

The Illumina paired-end sequencing technology can generate reads from both ends of target DNA fragments, which can subsequently be merged to increase the overall read length. There already exist tools for merging these paired-end reads when the target fragments are equally long. However, when fragment lengths vary and, in particular, when either the fragment size is shorter than a single-end read, or longer than twice the size of a single-end read, most state-of-the-art mergers fail to generate reliable results. Therefore, a robust tool is needed to merge paired-end reads that exhibit varying overlap lengths because of varying target fragment lengths.

Results

We present the PEAR software for merging raw Illumina paired-end reads from target fragments of varying length. The program evaluates all possible paired-end read overlaps and does not require the target fragment size as input. It also implements a statistical test for minimizing false-positive results. Tests on simulated and empirical data show that PEAR consistently generates highly accurate merged paired-end reads. A highly optimized implementation allows for merging millions of paired-end reads within a few minutes on a standard desktop computer. On multi-core architectures, the parallel version of PEAR shows linear speedups compared with the sequential version of PEAR.

Availability and implementation

PEAR is implemented in C and uses POSIX threads. It is freely available at http://www.exelixis-lab.org/web/software/pear.",2013-10-18 +23034086,methylKit: a comprehensive R package for the analysis of genome-wide DNA methylation profiles.,"DNA methylation is a chemical modification of cytosine bases that is pivotal for gene regulation, cellular specification and cancer development. Here, we describe an R package, methylKit, that rapidly analyzes genome-wide cytosine epigenetic profiles from high-throughput methylation and hydroxymethylation sequencing experiments. methylKit includes functions for clustering, sample quality visualization, differential methylation analysis and annotation features, thus automating and simplifying many of the steps for discerning statistically significant bases or regions of DNA methylation. Finally, we demonstrate methylKit on breast cancer data, in which we find statistically significant regions of differential methylation and stratify tumor subtypes. methylKit is available at http://code.google.com/p/methylkit.",2012-10-03 +23104885,"XiP: a computational environment to create, extend and share workflows.","

Unlabelled

XiP (eXtensible integrative Pipeline) is a flexible, editable and modular environment with a user-friendly interface that does not require previous advanced programming skills to run, construct and edit workflows. XiP allows the construction of workflows by linking components written in both R and Java, the analysis of high-throughput data in grid engine systems and also the development of customized pipelines that can be encapsulated in a package and distributed. XiP already comes with several ready-to-use pipeline flows for the most common genomic and transcriptomic analysis and ∼300 computational components.

Availability

XiP is open source, freely available under the Lesser General Public License (LGPL) and can be downloaded from http://xip.hgc.jp.",2012-10-25 +22253280,BamView: visualizing and interpretation of next-generation sequencing read alignments.,"

Unlabelled

So-called next-generation sequencing (NGS) has provided the ability to sequence on a massive scale at low cost, enabling biologists to perform powerful experiments and gain insight into biological processes. BamView has been developed to visualize and analyse sequence reads from NGS platforms, which have been aligned to a reference sequence. It is a desktop application for browsing the aligned or mapped reads [Ruffalo, M, LaFramboise, T, Koyutürk, M. Comparative analysis of algorithms for next-generation sequencing read alignment. Bioinformatics 2011;27:2790-6] at different levels of magnification, from nucleotide level, where the base qualities can be seen, to genome or chromosome level where overall coverage is shown. To enable in-depth investigation of NGS data, various views are provided that can be configured to highlight interesting aspects of the data. Multiple read alignment files can be overlaid to compare results from different experiments, and filters can be applied to facilitate the interpretation of the aligned reads. As well as being a standalone application it can be used as an integrated part of the Artemis genome browser, BamView allows the user to study NGS data in the context of the sequence and annotation of the reference genome. Single nucleotide polymorphism (SNP) density and candidate SNP sites can be highlighted and investigated, and read-pair information can be used to discover large structural insertions and deletions. The application will also calculate simple analyses of the read mapping, including reporting the read counts and reads per kilobase per million mapped reads (RPKM) for genes selected by the user.

Availability

BamView and Artemis are freely available software. These can be downloaded from their home pages: http://bamview.sourceforge.net/; http://www.sanger.ac.uk/resources/software/artemis/. Requirements: Java 1.6 or higher.",2012-01-16 +22853808,A UK and Irish survey of enteral nutrition practices in paediatric intensive care units.,"The aim of the present study was to describe the present knowledge of healthcare professionals and the practices surrounding enteral feeding in the UK and Irish paediatric intensive care unit (PICU) and propose recommendations for practice and research. A cross-sectional (thirty-four item) survey was sent to all PICU listed in the Paediatric Intensive Care Audit Network (PICANET) database (http://www.picanet.org.uk) in November 2010. The overall PICU response rate was 90 % (27/30 PICU; 108 individual responses in total). The overall breakdown of the professional groups was 59 % nursing staff (most were children's nurses), 27 % medical staff, 13 % dietitians and 1 % physician assistants. Most units (96 %) had some written guidance (although brief and generic) on enteral nutrition (EN); 85 % of staff, across all professional groups (P= 0.672), thought that guidelines helped to improve energy delivery in the PICU. Factors contributing to reduced energy delivery included: fluid-restrictive policies (60 %), the child just being 'too ill' to feed (17 %), surgical post-operative orders (16 %), nursing staff being too slow in starting feeds (7 %), frequent procedures requiring fasting (7 %) and haemodynamic instability (7 %). What constituted an 'acceptable' level of gastric residual volume (GRV) varied markedly across respondents, but GRV featured prominently in the decision to both stop EN and to determine feed tolerance and was similar for all professional groups. There was considerable variation across respondents about which procedures required fasting and the duration of this fasting. The present survey has highlighted the variability of the present enteral feeding practices across the UK and Ireland, particularly with regard to the use of GRV and fasting for procedures. The present study highlights a number of recommendations for both practice and research.",2012-08-01 +22949412,Proliferative and nonproliferative lesions of the rat and mouse male reproductive system.,"The INHAND Project (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) is a joint initiative of the Societies of Toxicologic Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP), and North America (STP) to develop an internationally accepted nomenclature for proliferative and nonproliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature and differential diagnosis for classifying microscopic lesions observed in the male reproductive system of laboratory rats and mice, with color microphotographs illustrating examples of some lesions. The standardized nomenclature presented in this document is also available for society members electronically on the Internet (http://goreni.org). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous and aging lesions as well as lesions induced by exposure to test materials. A widely accepted and utilized international harmonization of nomenclature for lesions of the male reproductive system in laboratory animals will decrease confusion among regulatory and scientific research organizations in different countries and provide a common language to increase and enrich international exchanges of information among toxicologists and pathologists.",2012-08-01 +22870325,miRSystem: an integrated system for characterizing enriched functions and pathways of microRNA targets.,"

Background

Many prediction tools for microRNA (miRNA) targets have been developed, but inconsistent predictions were observed across multiple algorithms, which can make further analysis difficult. Moreover, the nomenclature of human miRNAs changes rapidly. To address these issues, we developed a web-based system, miRSystem, for converting queried miRNAs to the latest annotation and predicting the function of miRNA by integrating miRNA target gene prediction and function/pathway analyses.

Results

First, queried miRNA IDs were converted to the latest annotated version to prevent potential conflicts resulting from multiple aliases. Next, by combining seven algorithms and two validated databases, potential gene targets of miRNAs and their functions were predicted based on the consistency across independent algorithms and observed/expected ratios. Lastly, five pathway databases were included to characterize the enriched pathways of target genes through bootstrap approaches. Based on the enriched pathways of target genes, the functions of queried miRNAs could be predicted.

Conclusions

MiRSystem is a user-friendly tool for predicting the target genes and their associated pathways for many miRNAs simultaneously. The web server and the documentation are freely available at http://mirsystem.cgm.ntu.edu.tw/.",2012-08-01 +22849520,Improved annotation of a plant pathogen genome Xanthomonas oryzae pv. oryzae PXO99A.,"Many bacterial genomes have been sequenced and stored in public databases now, of which Reference Sequence (RefSeq) is the most widely used one. However, the annotation in RefSeq is still unsatisfactory. The present analysis is focused on the re-annotation of an important plant pathogen genome Xanthomonas oryzae pv. oryzae PXO99A (Xoo PXO99A), which is the causal agent of bacterial blight on rice. Based on the parameters of 28 nucleotide frequencies and support vector machine algorithm, 41 originally annotated hypothetical genes were recognized as noncoding sequences, which were further supported by principal component analysis and other evidence. Ten of them were tested with reverse transcription-polymerase chain reaction experiments (RT-PCR), and all of them were confirmed to be noncoding sequences. Furthermore, 197 potential new genes not annotated in RefSeq were both recognized by two ab initio gene finding programs. Most of them only have sequence similarities with part of the known genes in other species, so they are unlikely to be protein-coding genes. Twelve potential new genes have high full-length sequence similarities with function-known genes, which are very likely to be true protein-coding genes. All the 12 potential genes were tested with RT-PCR, and 11 of them (92%) were successfully amplified in cDNA template. The RT-PCR experiments confirm that our theoretical prediction has high accuracy. The improvement of Xoo PXO99A annotation is helpful for the research of lifestyle, metabolism, and pathogenicity of this important plant pathogen. The improved annotation can be obtained from http://211.69.128.148/Xoo .",2012-08-01 +21824972,Metannogen: annotation of biological reaction networks.,"

Motivation

Semantic annotations of the biochemical entities constituting a biological reaction network are indispensable to create biologically meaningful networks. They further heighten efficient exchange, reuse and merging of existing models which concern present-day systems biology research more often. Two types of tools for the reconstruction of biological networks currently exist: (i) several sophisticated programs support graphical network editing and visualization. (ii) Data management systems permit reconstruction and curation of huge networks in a team of scientists including data integration, annotation and cross-referencing. We seeked ways to combine the advantages of both approaches.

Results

Metannogen, which was previously developed for network reconstruction, has been considerably improved. From now on, Metannogen provides sbml import and annotation of networks created elsewhere. This permits users of other network reconstruction platforms or modeling software to annotate their networks using Metannogen's advanced information management. We implemented word-autocompletion, multipattern highlighting, spell check, brace-expansion and publication management, and improved annotation, cross-referencing and team work requirements. Unspecific enzymes and transporters acting on a spectrum of different substrates are efficiently handled. The network can be exported in sbml format where the annotations are embedded in line with the miriam standard. For more comfort, Metannogen may be tightly coupled with the network editor such that Metannogen becomes an additional view for the focused reaction in the network editor. Finally, Metannogen provides local single user, shared password protected multiuser or public access to the annotation data.

Availability

Metannogen is available free of charge at: http://www.bioinformatics.org/strap/metannogen/ or http://3d-alignment.eu/metannogen/.

Contact

christoph.gille@charite.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-08 +23863837,CRISPRmap: an automated classification of repeat conservation in prokaryotic adaptive immune systems.,"Central to Clustered Regularly Interspaced Short Palindromic Repeat (CRISPR)-Cas systems are repeated RNA sequences that serve as Cas-protein-binding templates. Classification is based on the architectural composition of associated Cas proteins, considering repeat evolution is essential to complete the picture. We compiled the largest data set of CRISPRs to date, performed comprehensive, independent clustering analyses and identified a novel set of 40 conserved sequence families and 33 potential structure motifs for Cas-endoribonucleases with some distinct conservation patterns. Evolutionary relationships are presented as a hierarchical map of sequence and structure similarities for both a quick and detailed insight into the diversity of CRISPR-Cas systems. In a comparison with Cas-subtypes, I-C, I-E, I-F and type II were strongly coupled and the remaining type I and type III subtypes were loosely coupled to repeat and Cas1 evolution, respectively. Subtypes with a strong link to CRISPR evolution were almost exclusive to bacteria; nevertheless, we identified rare examples of potential horizontal transfer of I-C and I-E systems into archaeal organisms. Our easy-to-use web server provides an automated assignment of newly sequenced CRISPRs to our classification system and enables more informed choices on future hypotheses in CRISPR-Cas research: http://rna.informatik.uni-freiburg.de/CRISPRmap.",2013-07-17 +23860716,Assessing association between protein truncating variants and quantitative traits.,"

Motivation

In sequencing studies of common diseases and quantitative traits, power to test rare and low frequency variants individually is weak. To improve power, a common approach is to combine statistical evidence from several genetic variants in a region. Major challenges are how to do the combining and which statistical framework to use. General approaches for testing association between rare variants and quantitative traits include aggregating genotypes and trait values, referred to as 'collapsing', or using a score-based variance component test. However, little attention has been paid to alternative models tailored for protein truncating variants. Recent studies have highlighted the important role that protein truncating variants, commonly referred to as 'loss of function' variants, may have on disease susceptibility and quantitative levels of biomarkers. We propose a Bayesian modelling framework for the analysis of protein truncating variants and quantitative traits.

Results

Our simulation results show that our models have an advantage over the commonly used methods. We apply our models to sequence and exome-array data and discover strong evidence of association between low plasma triglyceride levels and protein truncating variants at APOC3 (Apolipoprotein C3).

Availability

Software is available from http://www.well.ox.ac.uk/~rivas/mamba",2013-07-16 +24648783,An investigation into the factors that encourage learner participation in a large group medical classroom.,"

Background

Effective lectures often incorporate activities that encourage learner participation. A challenge for educators is how to facilitate this in the large group lecture setting. This study investigates the individual student characteristics involved in encouraging (or dissuading) learners to interact, ask questions, and make comments in class.

Methods

Students enrolled in a Doctor of Veterinary Medicine program at Ross University School of Veterinary Medicine, St Kitts, were invited to complete a questionnaire canvassing their participation in the large group classroom. Data from the questionnaire were analyzed using Excel (Microsoft, Redmond, WA, USA) and the R software environment (http://www.r-project.org/).

Results

One hundred and ninety-two students completed the questionnaire (response rate, 85.7%). The results showed statistically significant differences between male and female students when asked to self-report their level of participation (P=0.011) and their confidence to participate (P<0.001) in class. No statistically significant difference was identified between different age groups of students (P=0.594). Student responses reflected that an ""aversion to public speaking"" acted as the main deterrent to participating during a lecture. Female participants were 3.56 times more likely to report a fear of public speaking than male participants (odds ratio 3.56, 95% confidence interval 1.28-12.33, P=0.01). Students also reported ""smaller sizes of class and small group activities"" and ""other students participating"" as factors that made it easier for them to participate during a lecture.

Conclusion

In this study, sex likely played a role in learner participation in the large group veterinary classroom. Male students were more likely to participate in class and reported feeling more confident to participate than female students. Female students in this study commonly identified aversion to public speaking as a factor which held them back from participating in the large group lecture setting. These are important findings for veterinary and medical educators aiming to improve learner participation in the classroom. Potential ways of addressing this challenge include addition of small group activities and audience response systems during lectures, and inclusion of training interventions in public speaking at an early stage of veterinary and medical curricula.",2014-03-11 +23620356,Structural RNA alignment by multi-objective optimization.,"

Motivation

The calculation of reliable alignments for structured RNA is still considered as an open problem. One approach is the incorporation of secondary structure information into the optimization criteria by using a weighted sum of sequence and structure components as an objective function. As it is not clear how to choose the weighting parameters, we use multi-objective optimization to calculate a set of Pareto-optimal RNA sequence-structure alignments. The solutions in this set then represent all possible trade-offs between the different objectives, independent of any previous weighting.

Results

We present a practical multi-objective dynamic programming algorithm, which is a new method for the calculation of the set of Pareto-optimal solutions to the pairwise RNA sequence-structure alignment problem. In selected examples, we show the usefulness of this approach, and its advantages over state-of-the-art single-objective algorithms.

Availability and implementation

The source code of our software (ISO C++11) is freely available at http://sysbio.uni-ulm.de/?Software and is licensed under the GNU GPLv3.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-24 +24564427,PLW: Probabilistic Local Walks for detecting protein complexes from protein interaction networks.,"

Background

Many biological processes are carried out by proteins interacting with each other in the form of protein complexes. However, large-scale detection of protein complexes has remained constrained by experimental limitations. As such, computational detection of protein complexes by applying clustering algorithms on the abundantly available protein-protein interaction (PPI) networks is an important alternative. However, many current algorithms have overlooked the importance of selecting seeds for expansion into clusters without excluding important proteins and including many noisy ones, while ensuring a high degree of functional homogeneity amongst the proteins detected for the complexes.

Results

We designed a novel method called Probabilistic Local Walks (PLW) which clusters regions in a PPI network with high functional similarity to find protein complex cores with high precision and efficiency in O (|V| log |V| + |E|) time. A seed selection strategy, which prioritises seeds with dense neighbourhoods, was devised. We defined a topological measure, called common neighbour similarity, to estimate the functional similarity of two proteins given the number of their common neighbours.

Conclusions

Our proposed PLW algorithm achieved the highest F-measure (recall and precision) when compared to 11 state-of-the-art methods on yeast protein interaction data, with an improvement of 16.7% over the next highest score. Our experiments also demonstrated that our seed selection strategy is able to increase algorithm precision when applied to three previous protein complex mining techniques.

Availability

The software, datasets and predicted complexes are available at http://wonglkd.github.io/PLW.",2013-10-16 +23609309,Identifying key factors in homeowner's adoption of water quality best management practices.,"The recognition of the significance of the residential environment in contributing to non-point source (NPS) pollution and the inherently dispersed nature of NPS pollution itself that presents significant challenges to effective regulation has led to the creation and dissemination of best management practices (BMPs) that can reduce the impacts of NPS pollution (Environmental Protection Agency US, Protecting water quality from urban runoff, http://www.epa.gov/npdes/pubs/nps_urban-facts_final.pdf , 2003). However, very few studies have examined the factors that influence the adoption of BMPs by residential homeowners, despite the fact that residential environments have been identified as one of the most significant contributors to NPS pollution. Given this need, the purpose of this project was to explore how demographic and knowledge-based factors predict adoption of residential BMPs in an urbanizing watershed in Northern Illinois using statistical analyses of survey data collected as part of a watershed planning process. The findings indicate that broad knowledge of BMPs is the strongest predictor of use for a specific BMP. Knowledge of BMPs is strongly correlated with their use, which reinforces the need for educational programs, even among those assumed to be knowledgeable about BMPs.",2013-04-23 +23995392,PyroHMMvar: a sensitive and accurate method to call short indels and SNPs for Ion Torrent and 454 data.,"

Motivation

The identification of short insertions and deletions (indels) and single nucleotide polymorphisms (SNPs) from Ion Torrent and 454 reads is a challenging problem, essentially because these techniques are prone to sequence erroneously at homopolymers and can, therefore, raise indels in reads. Most of the existing mapping programs do not model homopolymer errors when aligning reads against the reference. The resulting alignments will then contain various kinds of mismatches and indels that confound the accurate determination of variant loci and alleles.

Results

To address these challenges, we realign reads against the reference using our previously proposed hidden Markov model that models homopolymer errors and then merges these pairwise alignments into a weighted alignment graph. Based on our weighted alignment graph and hidden Markov model, we develop a method called PyroHMMvar, which can simultaneously detect short indels and SNPs, as demonstrated in human resequencing data. Specifically, by applying our methods to simulated diploid datasets, we demonstrate that PyroHMMvar produces more accurate results than state-of-the-art methods, such as Samtools and GATK, and is less sensitive to mapping parameter settings than the other methods. We also apply PyroHMMvar to analyze one human whole genome resequencing dataset, and the results confirm that PyroHMMvar predicts SNPs and indels accurately.

Availability and implementation

Source code freely available at the following URL: https://code.google.com/p/pyrohmmvar/, implemented in C++ and supported on Linux. .",2013-08-31 +22932300,"Increases in quitline calls and smoking cessation website visitors during a national tobacco education campaign--March 19-June 10, 2012.","Mass media campaigns and telephone quitlines are effective in increasing cessation rates among cigarette smokers. During March 19-June 10, 2012, CDC aired Tips from Former Smokers (TIPS), the first federally funded, nationwide, paid-media tobacco education campaign in the United States. The TIPS campaign featured former smokers talking about their experiences living with diseases caused by smoking. The campaign was primarily intended to encourage adult smokers aged 18-54 years to quit by making them aware of the health damage caused by smoking and letting them know that they could call the telephone quitline portal 1-800-QUIT-NOW or visit the National Cancer Institute (NCI) smoking cessation website (http://www.smokefree.gov) if they needed free help to quit. The campaign included advertising on national and local cable television, local radio, online media, and billboards, and in movie theaters, transit venues, and print media. To determine the effects of the TIPS campaign on weekly quitline call volume and weekly unique visitors to the cessation website, CDC analyzed call and visitor data immediately before, during, and immediately after the campaign period and compared them with data from the corresponding weeks in 2011. This report summarizes the results of that analysis, which found that the number of weekly calls to the quitline from the 50 states, the District of Columbia, Guam, and Puerto Rico increased 132% (207,519 additional calls) during the TIPS campaign, and the number of unique visitors to the cessation website increased 428% (510,571 additional unique visitors). These results indicate that many smokers are interested in quitting and learning more about cessation assistance, and will respond to motivational messages that include an offer of help.",2012-08-01 +23421496,The complete compositional epistasis detection in genome-wide association studies.,"

Background

The detection of epistasis among genetic markers is of great interest in genome-wide association studies (GWAS). In recent years, much research has been devoted to find disease-associated epistasis in GWAS. However, due to the high computational cost involved, most methods focus on specific epistasis models, making the potential loss of power when the underlying epistasis models are not examined in these analyses.

Results

In this work, we propose a computational efficient approach based on complete enumeration of two-locus epistasis models. This approach uses a two-stage (screening and testing) search strategy and guarantees the enumeration of all epistasis patterns. The implementation is done on graphic processing units (GPU), which can finish the analysis on a GWAS data (with around 5,000 subjects and around 350,000 markers) within two hours. Source code is available at http://bioinformatics.ust.hk/BOOST.html#GBOOST.

Conclusions

This work demonstrates that the complete compositional epistasis detection is computationally feasible in GWAS.",2013-02-19 +22419782,PINALOG: a novel approach to align protein interaction networks--implications for complex detection and function prediction.,"

Motivation

Analysis of protein-protein interaction networks (PPINs) at the system level has become increasingly important in understanding biological processes. Comparison of the interactomes of different species not only provides a better understanding of species evolution but also helps with detecting conserved functional components and in function prediction. Method and

Results

Here we report a PPIN alignment method, called PINALOG, which combines information from protein sequence, function and network topology. Alignment of human and yeast PPINs reveals several conserved subnetworks between them that participate in similar biological processes, notably the proteasome and transcription related processes. PINALOG has been tested for its power in protein complex prediction as well as function prediction. Comparison with PSI-BLAST in predicting protein function in the twilight zone also shows that PINALOG is valuable in predicting protein function.

Availability and implementation

The PINALOG web-server is freely available from http://www.sbg.bio.ic.ac.uk/~pinalog. The PINALOG program and associated data are available from the Download section of the web-server.

Contact

m.sternberg@imperial.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-03-13 +24391954,Effective identification of Gram-negative bacterial type III secreted effectors using position-specific residue conservation profiles.,"

Background

Type III secretion systems (T3SSs) are central to the pathogenesis and specifically deliver their secreted substrates (type III secreted proteins, T3SPs) into host cells. Since T3SPs play a crucial role in pathogen-host interactions, identifying them is crucial to our understanding of the pathogenic mechanisms of T3SSs. This study reports a novel and effective method for identifying the distinctive residues which are conserved different from other SPs for T3SPs prediction. Moreover, the importance of several sequence features was evaluated and further, a promising prediction model was constructed.

Results

Based on the conservation profiles constructed by a position-specific scoring matrix (PSSM), 52 distinctive residues were identified. To our knowledge, this is the first attempt to identify the distinct residues of T3SPs. Of the 52 distinct residues, the first 30 amino acid residues are all included, which is consistent with previous studies reporting that the secretion signal generally occurs within the first 30 residue positions. However, the remaining 22 positions span residues 30-100 were also proven by our method to contain important signal information for T3SP secretion because the translocation of many effectors also depends on the chaperone-binding residues that follow the secretion signal. For further feature optimisation and compression, permutation importance analysis was conducted to select 62 optimal sequence features. A prediction model across 16 species was developed using random forest to classify T3SPs and non-T3 SPs, with high receiver operating curve of 0.93 in the 10-fold cross validation and an accuracy of 94.29% for the test set. Moreover, when performing on a common independent dataset, the results demonstrate that our method outperforms all the others published to date. Finally, the novel, experimentally confirmed T3 effectors were used to further demonstrate the model's correct application. The model and all data used in this paper are freely available at http://cic.scu.edu.cn/bioinformatics/T3SPs.zip.",2013-12-31 +23238647,Ocular complication after trichloroacetic acid peeling: a case report.,"

Unlabelled

Chemical peeling is a noninvasive technique currently used more frequently as a cosmetic procedure. Trichloroacetic acid (TCA) is one of the most popular chemical agents used for this purpose Stuzin et al. (Clin Plast Surg 20:9-25, 1993). Although this application commonly is used for the whole face, including the eyelids, the data in the literature referring to ocular complications if TCA leaks into the eye and the injury treatment thereafter are too sparse. The authors therefore report the treatment procedure and follow-up evaluation for a patient who sustained a chemical injury to the eye during rhytidectomy combined with TCA peeling.

Level of evidence v

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors http://www.springer.com/00266 .",2012-12-13 +23837734,NURD: an implementation of a new method to estimate isoform expression from non-uniform RNA-seq data.,"

Background

RNA-Seq technology has been used widely in transcriptome study, and one of the most important applications is to estimate the expression level of genes and their alternative splicing isoforms. There have been several algorithms published to estimate the expression based on different models. Recently Wu et al. published a method that can accurately estimate isoform level expression by considering position-related sequencing biases using nonparametric models. The method has advantages in handling different read distributions, but there hasn't been an efficient program to implement this algorithm.

Results

We developed an efficient implementation of the algorithm in the program NURD. It uses a binary interval search algorithm. The program can correct both the global tendency of sequencing bias in the data and local sequencing bias specific to each gene. The correction makes the isoform expression estimation more reliable under various read distributions. And the implementation is computationally efficient in both the memory cost and running time and can be readily scaled up for huge datasets.

Conclusion

NURD is an efficient and reliable tool for estimating the isoform expression level. Given the reads mapping result and gene annotation file, NURD will output the expression estimation result. The package is freely available for academic use at http://bioinfo.au.tsinghua.edu.cn/software/NURD/.",2013-07-10 +22962342,GREVE: Genomic Recurrent Event ViEwer to assist the identification of patterns across individual cancer samples.,"

Summary

GREVE has been developed to assist with the identification of recurrent genomic aberrations across cancer samples. The exact characterization of such aberrations remains a challenge despite the availability of increasing amount of data, from SNParray to next-generation sequencing. Furthermore, genomic aberrations in cancer are especially difficult to handle because they are, by nature, unique to the patients. However, their recurrence in specific regions of the genome has been shown to reflect their relevance in the development of tumors. GREVE makes use of previously characterized events to identify such regions and focus any further analysis.

Availability

GREVE is available through a web interface and open-source application (http://www.well.ox.ac.uk/GREVE).",2012-09-08 +22836136,A complete workflow for the analysis of full-size ChIP-seq (and similar) data sets using peak-motifs.,"This protocol explains how to use the online integrated pipeline 'peak-motifs' (http://rsat.ulb.ac.be/rsat/) to predict motifs and binding sites in full-size peak sets obtained by chromatin immunoprecipitation-sequencing (ChIP-seq) or related technologies. The workflow combines four time- and memory-efficient motif discovery algorithms to extract significant motifs from the sequences. Discovered motifs are compared with databases of known motifs to identify potentially bound transcription factors. Sequences are scanned to predict transcription factor binding sites and analyze their enrichment and positional distribution relative to peak centers. Peaks and binding sites are exported as BED tracks that can be uploaded into the University of California Santa Cruz (UCSC) genome browser for visualization in the genomic context. This protocol is illustrated with the analysis of a set of 6,000 peaks (8 Mb in total) bound by the Drosophila transcription factor Krüppel. The complete workflow is achieved in about 25 min of computational time on the Regulatory Sequence Analysis Tools (RSAT) Web server. This protocol can be followed in about 1 h.",2012-07-26 +23846744,An integrated toolkit for accurate prediction and analysis of cis-regulatory motifs at a genome scale.,"

Motivation

We present an integrated toolkit, BoBro2.0, for prediction and analysis of cis-regulatory motifs. This toolkit can (i) reliably identify statistically significant cis-regulatory motifs at a genome scale; (ii) accurately scan for all motif instances of a query motif in specified genomic regions using a novel method for P-value estimation; (iii) provide highly reliable comparisons and clustering of identified motifs, which takes into consideration the weak signals from the flanking regions of the motifs; and (iv) analyze co-occurring motifs in the regulatory regions.

Results

We have carried out systematic comparisons between motif predictions using BoBro2.0 and the MEME package. The comparison results on Escherichia coli K12 genome and the human genome show that BoBro2.0 can identify the statistically significant motifs at a genome scale more efficiently, identify motif instances more accurately and get more reliable motif clusters than MEME. In addition, BoBro2.0 provides correlational analyses among the identified motifs to facilitate the inference of joint regulation relationships of transcription factors.

Availability

The source code of the program is freely available for noncommercial uses at http://code.google.com/p/bobro/.

Contact

xyn@bmb.uga.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-10 +23982307,APSLAP: an adaptive boosting technique for predicting subcellular localization of apoptosis protein.,"Apoptotic proteins play key roles in understanding the mechanism of programmed cell death. Knowledge about the subcellular localization of apoptotic protein is constructive in understanding the mechanism of programmed cell death, determining the functional characterization of the protein, screening candidates in drug design, and selecting protein for relevant studies. It is also proclaimed that the information required for determining the subcellular localization of protein resides in their corresponding amino acid sequence. In this work, a new biological feature, class pattern frequency of physiochemical descriptor, was effectively used in accordance with the amino acid composition, protein similarity measure, CTD (composition, translation, and distribution) of physiochemical descriptors, and sequence similarity to predict the subcellular localization of apoptosis protein. AdaBoost with the weak learner as Random-Forest was designed for the five modules and prediction is made based on the weighted voting system. Bench mark dataset of 317 apoptosis proteins were subjected to prediction by our system and the accuracy was found to be 100.0 and 92.4 %, and 90.1 % for self-consistency test, jack-knife test, and tenfold cross validation test respectively, which is 0.9 % higher than that of other existing methods. Beside this, the independent data (N151 and ZW98) set prediction resulted in the accuracy of 90.7 and 87.7 %, respectively. These results show that the protein feature represented by a combined feature vector along with AdaBoost algorithm holds well in effective prediction of subcellular localization of apoptosis proteins. The user friendly web interface ""APSLAP"" has been constructed, which is freely available at http://apslap.bicpu.edu.in and it is anticipated that this tool will play a significant role in determining the specific role of apoptosis proteins with reliability.",2013-08-28 +22847933,An integrated approach to reduce the impact of minor allele frequency and linkage disequilibrium on variable importance measures for genome-wide data.,"

Motivation

There is growing momentum to develop statistical learning (SL) methods as an alternative to conventional genome-wide association studies (GWAS). Methods such as random forests (RF) and gradient boosting machine (GBM) result in variable importance measures that indicate how well each single-nucleotide polymorphism (SNP) predicts the phenotype. For RF, it has been shown that variable importance measures are systematically affected by minor allele frequency (MAF) and linkage disequilibrium (LD). To establish RF and GBM as viable alternatives for analyzing genome-wide data, it is necessary to address this potential bias and show that SL methods do not significantly under-perform conventional GWAS methods.

Results

Both LD and MAF have a significant impact on the variable importance measures commonly used in RF and GBM. Dividing SNPs into overlapping subsets with approximate linkage equilibrium and applying SL methods to each subset successfully reduces the impact of LD. A welcome side effect of this approach is a dramatic reduction in parallel computing time, increasing the feasibility of applying SL methods to large datasets. The created subsets also facilitate a potential correction for the effect of MAF using pseudocovariates. Simulations using simulated SNPs embedded in empirical data-assessing varying effect sizes, minor allele frequencies and LD patterns-suggest that the sensitivity to detect effects is often improved by subsetting and does not significantly under-perform the Armitage trend test, even under ideal conditions for the trend test.

Availability

Code for the LD subsetting algorithm and pseudocovariate correction is available at http://www.nd.edu/~glubke/code.html.",2012-07-30 +22544707,SNPnexus: a web server for functional annotation of novel and publicly known genetic variants (2012 update).,"Broader functional annotation of single nucleotide variations is a valuable mean for prioritizing targets in further disease studies and large-scale genotyping projects. We originally developed SNPnexus to assess the potential significance of known and novel SNPs on the major transcriptome, proteome, regulatory and structural variation models in order to identify the phenotypically important variants. Being committed to providing continuous support to the scientific community, we have substantially improved SNPnexus over time by incorporating a broader range of variations such as insertions/deletions, block substitutions, IUPAC codes submission and region-based analysis, expanding the query size limit, and most importantly including additional categories for the assessment of functional impact. SNPnexus provides a comprehensive set of annotations for genomic variation data by characterizing related functional consequences at the transcriptome/proteome levels of seven major annotation systems with in-depth analysis of potential deleterious effects, inferring physical and cytogenetic mapping, reporting information on HapMap genotype/allele data, finding overlaps with potential regulatory elements, structural variations and conserved elements, and retrieving links with previously reported genetic disease studies. SNPnexus has a user-friendly web interface with an improved query structure, enhanced functional annotation categories and flexible output presentation making it practically useful for biologists. SNPnexus is freely available at http://www.snp-nexus.org.",2012-04-28 +21903631,Reconstructing transcription factor activities in hierarchical transcription network motifs.,"

Motivation

A knowledge of the dynamics of transcription factors is fundamental to understand the transcriptional regulation mechanism. Nowadays, an experimental measure of transcription factor activities in vivo represents a challenge. Several methods have been developed to infer these activities from easily measurable quantities such as mRNA expression of target genes. A limitation of these methods is represented by the fact that they rely on very simple single-layer structures, typically consisting of one or more transcription factors regulating a number of target genes.

Results

We present a novel statistical inference methodology to reverse engineer the dynamics of transcription factors in hierarchical network motifs such as feed-forward loops. The approach we present is based on a continuous time representation of the system where the high-level master transcription factor is represented as a two state Markov jump process driving a system of differential equations. We solve the inference problem using an efficient variational approach and demonstrate our method on simulated data and two real datasets. The results on real data show that the predictions of our approach can capture biological behaviours in a more effective way than single-layer models of transcription, and can lead to novel biological insights.

Availability

http://homepages.inf.ed.ac.uk/gsanguin/software.html

Contact

g.sanguinetti@ed.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-07 +23685432,DRIMust: a web server for discovering rank imbalanced motifs using suffix trees.,"Cellular regulation mechanisms that involve proteins and other active molecules interacting with specific targets often involve the recognition of sequence patterns. Short sequence elements on DNA, RNA and proteins play a central role in mediating such molecular recognition events. Studies that focus on measuring and investigating sequence-based recognition processes make use of statistical and computational tools that support the identification and understanding of sequence motifs. We present a new web application, named DRIMust, freely accessible through the website http://drimust.technion.ac.il for de novo motif discovery services. The DRIMust algorithm is based on the minimum hypergeometric statistical framework and uses suffix trees for an efficient enumeration of motif candidates. DRIMust takes as input ranked lists of sequences in FASTA format and returns motifs that are over-represented at the top of the list, where the determination of the threshold that defines top is data driven. The resulting motifs are presented individually with an accurate P-value indication and as a Position Specific Scoring Matrix. Comparing DRIMust with other state-of-the-art tools demonstrated significant advantage to DRIMust, both in result accuracy and in short running times. Overall, DRIMust is unique in combining efficient search on large ranked lists with rigorous P-value assessment for the detected motifs.",2013-05-17 +22888263,"B-Pred, a structure based B-cell epitopes prediction server.","The ability to predict immunogenic regions in selected proteins by in-silico methods has broad implications, such as allowing a quick selection of potential reagents to be used as diagnostics, vaccines, immunotherapeutics, or research tools in several branches of biological and biotechnological research. However, the prediction of antibody target sites in proteins using computational methodologies has proven to be a highly challenging task, which is likely due to the somewhat elusive nature of B-cell epitopes. This paper proposes a web-based platform for scoring potential immunological reagents based on the structures or 3D models of the proteins of interest. The method scores a protein's peptides set, which is derived from a sliding window, based on the average solvent exposure, with a filter on the average local model quality for each peptide. The platform was validated on a custom-assembled database of 1336 experimentally determined epitopes from 106 proteins for which a reliable 3D model could be obtained through standard modeling techniques. Despite showing poor sensitivity, this method can achieve a specificity of 0.70 and a positive predictive value of 0.29 by combining these two simple parameters. These values are slightly higher than those obtained with other established sequence-based or structure-based methods that have been evaluated using the same epitopes dataset. This method is implemented in a web server called B-Pred, which is accessible at http://immuno.bio.uniroma2.it/bpred. The server contains a number of original features that allow users to perform personalized reagent searches by manipulating the sliding window's width and sliding step, changing the exposure and model quality thresholds, and running sequential queries with different parameters. The B-Pred server should assist experimentalists in the rational selection of epitope antigens for a wide range of applications.",2012-07-25 +21596785,mirAct: a web tool for evaluating microRNA activity based on gene expression data.,"MicroRNAs (miRNAs) are critical regulators in the complex cellular networks. The mirAct web server (http://sysbio.ustc.edu.cn/software/mirAct) is a tool designed to investigate miRNA activity based on gene-expression data by using the negative regulation relationship between miRNAs and their target genes. mirAct supports multiple-class data and enables clustering analysis based on computationally determined miRNA activity. Here, we describe the framework of mirAct, demonstrate its performance by comparing with other similar programs and exemplify its applications using case studies.",2011-05-19 +24245968,"Clinicopathologic significance of claudin-6, occludin, and matrix metalloproteinases -2 expression in ovarian carcinoma.","

Background

Tight junctions (TJs) are mainly composed of claudins, occludin, and tight junction adhesion molecules (JAM). The invasive and metastatic phenotype of highly invasive cancer cells has been related to abnormal structure and function of TJs, and with expression of activated matrix metalloproteinases (MMPs). The relevance of these mechanisms responsible for the invasion and metastasis of ovarian carcinoma is unclear. Similarly, it is not known if the expression of claudin-6, occludin and MMP2 is related with the clinical properties of these tumors.

Methods

Expression of claudin-6, occludin, and MMP2 was detected in samples of human ovarian cancer tissues by immunohistochemistry and correlated with the clinical properties of the tumors.

Results

The positive expression rates of claudin-6 and MMP-2 were higher in ovarian papillary serous carcinomas than n ovarian serous adenomas (P < 0.05). There were no differences in the expression of occludin (P > 0.05). The expression of claudin-6 and occludin in ovarian cancer was not correlated with patient age, pathological grade, clinical stage, and metastasis (P > 0.05). MMP-2 expression was enhanced with increased clinical stage and metastasis (P < 0.05), but was unrelated to patient age or tumor grade (P > 0.05). There were no apparent correlations between expression of claudin-6, occludin and MMP-2 in ovarian cancer tissue (P > 0.05).

Conclusions

Our data suggest, for the first time, that the claudin-6 and MMP-2 are up-regulated in ovarian papillary serous carcinomas, MMP-2 expression was enhanced with increased clinical stage and metastasis. Claudin-6 and MMP-2 may play a positive role in the invasion and metastasis of ovarian cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1775628454106511.",2013-11-19 +23090401,Error-estimation-guided rebuilding of de novo models increases the success rate of ab initio phasing.,"Recent advancements in computational methods for protein-structure prediction have made it possible to generate the high-quality de novo models required for ab initio phasing of crystallographic diffraction data using molecular replacement. Despite those encouraging achievements in ab initio phasing using de novo models, its success is limited only to those targets for which high-quality de novo models can be generated. In order to increase the scope of targets to which ab initio phasing with de novo models can be successfully applied, it is necessary to reduce the errors in the de novo models that are used as templates for molecular replacement. Here, an approach is introduced that can identify and rebuild the residues with larger errors, which subsequently reduces the overall C(α) root-mean-square deviation (CA-RMSD) from the native protein structure. The error in a predicted model is estimated from the average pairwise geometric distance per residue computed among selected lowest energy coarse-grained models. This score is subsequently employed to guide a rebuilding process that focuses on more error-prone residues in the coarse-grained models. This rebuilding methodology has been tested on ten protein targets that were unsuccessful using previous methods. The average CA-RMSD of the coarse-grained models was improved from 4.93 to 4.06 Å. For those models with CA-RMSD less than 3.0 Å, the average CA-RMSD was improved from 3.38 to 2.60 Å. These rebuilt coarse-grained models were then converted into all-atom models and refined to produce improved de novo models for molecular replacement. Seven diffraction data sets were successfully phased using rebuilt de novo models, indicating the improved quality of these rebuilt de novo models and the effectiveness of the rebuilding process. Software implementing this method, called MORPHEUS, can be downloaded from http://www.riken.jp/zhangiru/software.html.",2012-10-18 +23761451,PGMRA: a web server for (phenotype x genotype) many-to-many relation analysis in GWAS.,"It has been proposed that single nucleotide polymorphisms (SNPs) discovered by genome-wide association studies (GWAS) account for only a small fraction of the genetic variation of complex traits in human population. The remaining unexplained variance or missing heritability is thought to be due to marginal effects of many loci with small effects and has eluded attempts to identify its sources. Combination of different studies appears to resolve in part this problem. However, neither individual GWAS nor meta-analytic combinations thereof are helpful for disclosing which genetic variants contribute to explain a particular phenotype. Here, we propose that most of the missing heritability is latent in the GWAS data, which conceals intermediate phenotypes. To uncover such latent information, we propose the PGMRA server that introduces phenomics--the full set of phenotype features of an individual--to identify SNP-set structures in a broader sense, i.e. causally cohesive genotype-phenotype relations. These relations are agnostically identified (without considering disease status of the subjects) and organized in an interpretable fashion. Then, by incorporating a posteriori the subject status within each relation, we can establish the risk surface of a disease in an unbiased mode. This approach complements-instead of replaces-current analysis methods. The server is publically available at http://phop.ugr.es/fenogeno.",2013-06-12 +22826268,CNVD: text mining-based copy number variation in disease database.,"Copy number variation (CNV) is a kind of chromosomal structural reorganization that has been detected, in this decade, mainly by high-throughput biological technology. Researchers have found that CNVs are ubiquitous in many species and accumulating evidence indicates that CNVs are closely related with complex diseases. The investigation of chromosomal structural alterations has begun to reveal some important clues to the pathologic causes of diseases and to the disease process. However, many of the published studies have focused on a single disease and, so far, the experimental results have not been systematically collected or organized. Manual text mining from 6301 published papers was used to build the Copy Number Variation in Disease database (CNVD). CNVD contains CNV information for 792 diseases in 22 species from diverse types of experiments, thus, ensuring high confidence and comprehensive representation of the relationship between the CNVs and the diseases. In addition, multiple query modes and visualized results are provided in the CNVD database. With its user-friendly interface and the integrated CNV information for different diseases, CNVD will offer a truly comprehensive platform for disease research based on chromosomal structural variations. The CNVD interface is accessible at http://bioinfo.hrbmu.edu.cn/CNVD.",2012-07-23 +22559794,Functional magnetic resonance imaging phase synchronization as a measure of dynamic functional connectivity.,"Functional brain activity and connectivity have been studied by calculating intersubject and seed-based correlations of hemodynamic data acquired with functional magnetic resonance imaging (fMRI). To inspect temporal dynamics, these correlation measures have been calculated over sliding time windows with necessary restrictions on the length of the temporal window that compromises the temporal resolution. Here, we show that it is possible to increase temporal resolution by using instantaneous phase synchronization (PS) as a measure of dynamic (time-varying) functional connectivity. We applied PS on an fMRI dataset obtained while 12 healthy volunteers watched a feature film. Narrow frequency band (0.04-0.07 Hz) was used in the PS analysis to avoid artifactual results. We defined three metrics for computing time-varying functional connectivity and time-varying intersubject reliability based on estimation of instantaneous PS across the subjects: (1) seed-based PS, (2) intersubject PS, and (3) intersubject seed-based PS. Our findings show that these PS-based metrics yield results consistent with both seed-based correlation and intersubject correlation methods when inspected over the whole time series, but provide an important advantage of maximal single-TR temporal resolution. These metrics can be applied both in studies with complex naturalistic stimuli (e.g., watching a movie or listening to music in the MRI scanner) and more controlled (e.g., event-related or blocked design) paradigms. A MATLAB toolbox FUNPSY ( http://becs.aalto.fi/bml/software.html ) is openly available for using these metrics in fMRI data analysis.",2012-06-11 +24033658,Attitudes towards and knowledge of nutrition support amongst health care professionals on London intensive care units.,"

Background

Nutrition support on intensive care units (ICUs) has gained a higher profile ever since the development of published guidelines (Clin. Nutr. 2006, 25, 210; J. Parenter. Enteral Nutr. 2009, 33, 277; http://www.nice.org.uk/Guidance/CG32; Clin. Nutr. 2009, 28, 387). However, there are limited data available on knowledge and attitudes towards nutrition support specific to ICU.

Methods

An online survey was sent to all healthcare professionals working on ICUs across London via an e-mail link. The aim of the study was to assess the knowledge base of and attitudes of staff towards nutrition support, within an ICU setting, and to understand their educational needs. The results were analysed using descriptive statistics.

Results

Attitudes were in line with the evidence in current nutrition guidelines. The proportion of healthcare professionals who were regarded as demonstrating sufficient understanding of the evidence set out in the nutrition support guidelines were 44% of clinicians, 26% of nurses, 76% of dietitians and 67% of other staff. In total, 59% of staff wanted more education on a number of aspects related to nutrition support on ICU.

Conclusions

The present study highlights the need for more prominent dissemination of the current guidelines and illustrates the preferred mode. Specific gaps in knowledge regarding energy intake and the use of parenteral feeding are highlighted. It is hoped that the present survey will help to guide education in this area.",2013-08-24 +21971516,Inference of gain and loss events from phyletic patterns using stochastic mapping and maximum parsimony--a simulation study.,"Bacterial evolution is characterized by frequent gain and loss events of gene families. These events can be inferred from phyletic pattern data-a compact representation of gene family repertoire across multiple genomes. The maximum parsimony paradigm is a classical and prevalent approach for the detection of gene family gains and losses mapped on specific branches. We and others have previously developed probabilistic models that aim to account for the gain and loss stochastic dynamics. These models are a critical component of a methodology termed stochastic mapping, in which probabilities and expectations of gain and loss events are estimated for each branch of an underlying phylogenetic tree. In this work, we present a phyletic pattern simulator in which the gain and loss dynamics are assumed to follow a continuous-time Markov chain along the tree. Various models and options are implemented to make the simulation software useful for a large number of studies in which binary (presence/absence) data are analyzed. Using this simulation software, we compared the ability of the maximum parsimony and the stochastic mapping approaches to accurately detect gain and loss events along the tree. Our simulations cover a large array of evolutionary scenarios in terms of the propensities for gene family gains and losses and the variability of these propensities among gene families. Although in all simulation schemes, both methods obtain relatively low levels of false positive rates, stochastic mapping outperforms maximum parsimony in terms of true positive rates. We further studied the factors that influence the performance of both methods. We find, for example, that the accuracy of maximum parsimony inference is substantially reduced when the goal is to map gain and loss events along internal branches of the phylogenetic tree. Furthermore, the accuracy of stochastic mapping is reduced with smaller data sets (limited number of gene families) due to unreliable estimation of branch lengths. Our simulator and simulation results are additionally relevant for the analysis of other types of binary-coded data, such as the existence of homologues restriction sites, gaps, and introns, to name a few. Both the simulation software and the inference methodology are freely available at a user-friendly server: http://gloome.tau.ac.il/.",2011-10-04 +23417115,Carotid wall volume quantification from magnetic resonance images using deformable model fitting and learning-based correction of systematic errors.,"We present a method for carotid vessel wall volume quantification from magnetic resonance imaging (MRI). The method combines lumen and outer wall segmentation based on deformable model fitting with a learning-based segmentation correction step. After selecting two initialization points, the vessel wall volume in a region around the bifurcation is automatically determined. The method was trained on eight datasets (16 carotids) from a population-based study in the elderly for which one observer manually annotated both the lumen and outer wall. An evaluation was carried out on a separate set of 19 datasets (38 carotids) from the same study for which two observers made annotations. Wall volume and normalized wall index measurements resulting from the manual annotations were compared to the automatic measurements. Our experiments show that the automatic method performs comparably to the manual measurements. All image data and annotations used in this study together with the measurements are made available through the website http://ergocar.bigr.nl.",2013-02-15 +23975762,Protein-ligand binding site recognition using complementary binding-specific substructure comparison and sequence profile alignment.,"

Motivation

Identification of protein-ligand binding sites is critical to protein function annotation and drug discovery. However, there is no method that could generate optimal binding site prediction for different protein types. Combination of complementary predictions is probably the most reliable solution to the problem.

Results

We develop two new methods, one based on binding-specific substructure comparison (TM-SITE) and another on sequence profile alignment (S-SITE), for complementary binding site predictions. The methods are tested on a set of 500 non-redundant proteins harboring 814 natural, drug-like and metal ion molecules. Starting from low-resolution protein structure predictions, the methods successfully recognize >51% of binding residues with average Matthews correlation coefficient (MCC) significantly higher (with P-value <10(-9) in student t-test) than other state-of-the-art methods, including COFACTOR, FINDSITE and ConCavity. When combining TM-SITE and S-SITE with other structure-based programs, a consensus approach (COACH) can increase MCC by 15% over the best individual predictions. COACH was examined in the recent community-wide COMEO experiment and consistently ranked as the best method in last 22 individual datasets with the Area Under the Curve score 22.5% higher than the second best method. These data demonstrate a new robust approach to protein-ligand binding site recognition, which is ready for genome-wide structure-based function annotations.

Availability

http://zhanglab.ccmb.med.umich.edu/COACH/",2013-08-23 +23677611,WebScipio: Reconstructing alternative splice variants of eukaryotic proteins.,"Accurate exon-intron structures are essential prerequisites in genomics, proteomics and for many protein family and single gene studies. We originally developed Scipio and the corresponding web service WebScipio for the reconstruction of gene structures based on protein sequences and available genome assemblies. WebScipio also allows predicting mutually exclusive spliced exons and tandemly arrayed gene duplicates. The obtained gene structures are illustrated in graphical schemes and can be analysed down to the nucleotide level. The set of eukaryotic genomes available at the WebScipio server is updated on a daily basis. The current version of the web server provides access to ∼3400 genome assembly files of >1100 sequenced eukaryotic species. Here, we have also extended the functionality by adding a module with which expressed sequence tag (EST) and cDNA data can be mapped to the reconstructed gene structure for the identification of all types of alternative splice variants. WebScipio has a user-friendly web interface, and we believe that the improved web server will provide better service to biologists interested in the gene structure corresponding to their protein of interest, including all types of alternative splice forms and tandem gene duplicates. WebScipio is freely available at http://www.webscipio.org.",2013-05-15 +23230398,The INIA19 Template and NeuroMaps Atlas for Primate Brain Image Parcellation and Spatial Normalization.,"The INIA19 is a new, high-quality template for imaging-based studies of non-human primate brains, created from high-resolution, T(1)-weighted magnetic resonance (MR) images of 19 rhesus macaque (Macaca mulatta) animals. Combined with the comprehensive cortical and sub-cortical label map of the NeuroMaps atlas, the INIA19 is equally suitable for studies requiring both spatial normalization and atlas label propagation. Population-averaged template images are provided for both the brain and the whole head, to allow alignment of the atlas with both skull-stripped and unstripped data, and thus to facilitate its use for skull stripping of new images. This article describes the construction of the template using freely available software tools, as well as the template itself, which is being made available to the scientific community (http://nitrc.org/projects/inia19/).",2012-12-06 +21082436,Analysis of high-throughput ELISA microarray data.,"Our research group develops analytical methods and software for the high-throughput analysis of quantitative enzyme-linked immunosorbent assay (ELISA) microarrays. ELISA microarrays differ from DNA microarrays in several fundamental aspects and most algorithms for analysis of DNA microarray data are not applicable to ELISA microarrays. In this review, we provide an overview of the steps involved in ELISA microarray data analysis and how the statistically sound algorithms we have developed provide an integrated software suite to address the needs of each data-processing step. The algorithms discussed are available in a set of open-source software tools (http://www.pnl.gov/statistics/ProMAT).",2011-01-01 +22678091,Resolution-by-proxy: a simple measure for assessing and comparing the overall quality of NMR protein structures.,"In protein X-ray crystallography, resolution is often used as a good indicator of structural quality. Diffraction resolution of protein crystals correlates well with the number of X-ray observables that are used in structure generation and, therefore, with protein coordinate errors. In protein NMR, there is no parameter identical to X-ray resolution. Instead, resolution is often used as a synonym of NMR model quality. Resolution of NMR structures is often deduced from ensemble precision, torsion angle normality and number of distance restraints per residue. The lack of common techniques to assess the resolution of X-ray and NMR structures complicates the comparison of structures solved by these two methods. This problem is sometimes approached by calculating ""equivalent resolution"" from structure quality metrics. However, existing protocols do not offer a comprehensive assessment of protein structure as they calculate equivalent resolution from a relatively small number (<5) of protein parameters. Here, we report a development of a protocol that calculates equivalent resolution from 25 measurable protein features. This new method offers better performance (correlation coefficient of 0.92, mean absolute error of 0.28 Å) than existing predictors of equivalent resolution. Because the method uses coordinate data as a proxy for X-ray diffraction data, we call this measure ""Resolution-by-Proxy"" or ResProx. We demonstrate that ResProx can be used to identify under-restrained, poorly refined or inaccurate NMR structures, and can discover structural defects that the other equivalent resolution methods cannot detect. The ResProx web server is available at http://www.resprox.ca.",2012-06-08 +22532533,The genome sequence of bluetongue virus type 2 from India: evidence for reassortment between eastern and western topotype field strains.,"Bluetongue virus type 2, isolated in India in 1982 (IND1982/01), was obtained from the Orbivirus Reference Collection at IAH Pirbright (http://www.reoviridae.org/dsRNA_virus_proteins/ReoID/btv-2.htm#IND1982/01). Full genome sequencing and phylogenetic analyses show that IND1982/01 is a reassortant virus containing genome segments derived from both eastern and western topotypes. These data will help to identify further reassortment events involving this or other virus lineages in the subcontinent.",2012-05-01 +23671331,EvoDesign: De novo protein design based on structural and evolutionary profiles.,"Protein design aims to identify new protein sequences of desirable structure and biological function. Most current de novo protein design methods rely on physics-based force fields to search for low free-energy states following Anfinsen's thermodynamic hypothesis. A major obstacle of such approaches is the inaccuracy of the force field design, which cannot accurately describe the atomic interactions or distinguish correct folds. We developed a new web server, EvoDesign, to design optimal protein sequences of given scaffolds along with multiple sequence and structure-based features to assess the foldability and goodness of the designs. EvoDesign uses an evolution-profile-based Monte Carlo search with the profiles constructed from homologous structure families in the Protein Data Bank. A set of local structure features, including secondary structure, torsion angle and solvation, are predicted by single-sequence neural-network training and used to smooth the sequence motif and accommodate the physicochemical packing. The EvoDesign algorithm has been extensively tested in large-scale protein design experiments, which demonstrate enhanced foldability and structural stability of designed sequences compared with the physics-based designing methods. The EvoDesign server is freely available at http://zhanglab.ccmb.med.umich.edu/EvoDesign.",2013-05-13 +22815355,Metabolite identification and molecular fingerprint prediction through machine learning.,"

Motivation

Metabolite identification from tandem mass spectra is an important problem in metabolomics, underpinning subsequent metabolic modelling and network analysis. Yet, currently this task requires matching the observed spectrum against a database of reference spectra originating from similar equipment and closely matching operating parameters, a condition that is rarely satisfied in public repositories. Furthermore, the computational support for identification of molecules not present in reference databases is lacking. Recent efforts in assembling large public mass spectral databases such as MassBank have opened the door for the development of a new genre of metabolite identification methods.

Results

We introduce a novel framework for prediction of molecular characteristics and identification of metabolites from tandem mass spectra using machine learning with the support vector machine. Our approach is to first predict a large set of molecular properties of the unknown metabolite from salient tandem mass spectral signals, and in the second step to use the predicted properties for matching against large molecule databases, such as PubChem. We demonstrate that several molecular properties can be predicted to high accuracy and that they are useful in de novo metabolite identification, where the reference database does not contain any spectra of the same molecule.

Availability

An Matlab/Python package of the FingerID tool is freely available on the web at http://www.sourceforge.net/p/fingerid.

Contact

markus.heinonen@cs.helsinki.fi.",2012-07-18 +24106478,Integrative EEG biomarkers predict progression to Alzheimer's disease at the MCI stage.,"Alzheimer's disease (AD) is a devastating disorder of increasing prevalence in modern society. Mild cognitive impairment (MCI) is considered a transitional stage between normal aging and AD; however, not all subjects with MCI progress to AD. Prediction of conversion to AD at an early stage would enable an earlier, and potentially more effective, treatment of AD. Electroencephalography (EEG) biomarkers would provide a non-invasive and relatively cheap screening tool to predict conversion to AD; however, traditional EEG biomarkers have not been considered accurate enough to be useful in clinical practice. Here, we aim to combine the information from multiple EEG biomarkers into a diagnostic classification index in order to improve the accuracy of predicting conversion from MCI to AD within a 2-year period. We followed 86 patients initially diagnosed with MCI for 2 years during which 25 patients converted to AD. We show that multiple EEG biomarkers mainly related to activity in the beta-frequency range (13-30 Hz) can predict conversion from MCI to AD. Importantly, by integrating six EEG biomarkers into a diagnostic index using logistic regression the prediction improved compared with the classification using the individual biomarkers, with a sensitivity of 88% and specificity of 82%, compared with a sensitivity of 64% and specificity of 62% of the best individual biomarker in this index. In order to identify this diagnostic index we developed a data mining approach implemented in the Neurophysiological Biomarker Toolbox (http://www.nbtwiki.net/). We suggest that this approach can be used to identify optimal combinations of biomarkers (integrative biomarkers) also in other modalities. Potentially, these integrative biomarkers could be more sensitive to disease progression and response to therapeutic intervention.",2013-10-03 +24766722,Clinical and computed tomographic predictors of chronic bronchitis in COPD: a cross sectional analysis of the COPDGene study.,"

Background

Chronic bronchitis (CB) has been related to poor outcomes in Chronic Obstructive Pulmonary Disease (COPD). From a clinical standpoint, we have shown that subjects with CB in a group with moderate to severe airflow obstruction were younger, more likely to be current smokers, male, Caucasian, had worse health related quality of life, more dyspnea, and increased exacerbation history compared to those without CB. We sought to further refine our clinical characterization of chronic bronchitics in a larger cohort and analyze the CT correlates of CB in COPD subjects. We hypothesized that COPD patients with CB would have thicker airways and a greater history of smoking, acute bronchitis, allergic rhinitis, and occupational exposures compared to those without CB.

Methods

We divided 2703 GOLD 1-4 subjects in the Genetic Epidemiology of COPD (COPDGene®) Study into two groups based on symptoms: chronic bronchitis (CB+, n = 663, 24.5%) and no chronic bronchitis (CB-, n = 2040, 75.5%). Subjects underwent extensive clinical characterization, and quantitative CT analysis to calculate mean wall area percent (WA%) of 6 segmental airways was performed using VIDA PW2 (http://www.vidadiagnostics.com). Square roots of the wall areas of bronchi with internal perimeters 10 mm and 15 mm (Pi10 and Pi15, respectively), % emphysema, %gas trapping, were calculated using 3D Slicer (http://www.slicer.org).

Results

There were no differences in % emphysema (11.4 ± 12.0 vs. 12.0 ± 12.6%, p = 0.347) or % gas trapping (35.3 ± 21.2 vs. 36.3 ± 20.6%, p = 0.272) between groups. Mean segmental WA% (63.0 ± 3.2 vs. 62.0 ± 3.1%, p < 0.0001), Pi10 (3.72 ± 0.15 vs. 3.69 ± 0.14 mm, p < 0.0001), and Pi15 (5.24 ± 0.22 vs. 5.17 ± 0.20, p < 0.0001) were greater in the CB + group. Greater percentages of gastroesophageal reflux, allergic rhinitis, histories of asthma and acute bronchitis, exposures to dusts and occupational exposures, and current smokers were seen in the CB + group. In multivariate binomial logistic regression, male gender, Caucasian race, a lower FEV1%, allergic rhinitis, history of acute bronchitis, current smoking, and increased airway wall thickness increased odds for having CB.

Conclusions

Histories of asthma, allergic rhinitis, acute bronchitis, current smoking, a lower FEV1%, Caucasian race, male gender, and increased airway wall thickness are associated with CB. These data provide clinical and radiologic correlations to the clinical phenotype of CB.",2014-04-27 +22319139,Efficient selection of branch-specific models of sequence evolution.,"The analysis of extant sequences shows that molecular evolution has been heterogeneous through time and among lineages. However, for a given sequence alignment, it is often difficult to uncover what factors caused this heterogeneity. In fact, identifying and characterizing heterogeneous patterns of molecular evolution along a phylogenetic tree is very challenging, for lack of appropriate methods. Users either have to a priori define groups of branches along which they believe molecular evolution has been similar or have to allow each branch to have its own pattern of molecular evolution. The first approach assumes prior knowledge that is seldom available, and the second requires estimating an unreasonably large number of parameters. Here we propose a convenient and reliable approach where branches get clustered by their pattern of molecular evolution alone, with no need for prior knowledge about the data set under study. Model selection is achieved in a statistical framework and therefore avoids overparameterization. We rely on substitution mapping for efficiency and present two clustering approaches, depending on whether or not we expect neighbouring branches to share more similar patterns of sequence evolution than distant branches. We validate our method on simulations and test it on four previously published data sets. We find that our method correctly groups branches sharing similar equilibrium GC contents in a data set of ribosomal RNAs and recovers expected footprints of selection through dN/dS. Importantly, it also uncovers a new pattern of relaxed selection in a phylogeny of Mantellid frogs, which we are able to correlate to life-history traits. This shows that our programs should be very useful to study patterns of molecular evolution and reveal new correlations between sequence and species evolution. Our programs can run on DNA, RNA, codon, or amino acid sequences with a large set of possible models of substitutions and are available at http://biopp.univ-montp2.fr/forge/testnh.",2012-02-02 +22805605,The dental trauma internet calculator.,"

Background/aim

Prediction tools are increasingly used to inform patients about the future dental health outcome. Advanced statistical methods are required to arrive at unbiased predictions based on follow-up studies.

Materials and methods

The Internet risk calculator at the Dental Trauma Guide provides prognoses for teeth with traumatic injuries based on the Copenhagen trauma database: http://www.dentaltraumaguide.org The database includes 2191 traumatized permanent teeth from 1282 patients that were treated at the dental trauma unit at the University Hospital in Copenhagen (Denmark) in the period between 1972 and 1991. Subgroup analyses and estimates of event probabilities were based on the Kaplan-Meier and the Aalen-Johansen method.

Results

The Internet risk calculator shows individualized prognoses for the short- and long-term healing outcome of traumatized teeth with the following injuries: concussion, subluxation, extrusion, lateral luxation, intrusion, avulsion, crown fractures without luxation, root fractures and alveolar fractures. The prognoses for pulp necrosis, pulp canal obliteration, infection-related root resorption, ankylosis, surface resorption, marginal bone loss, and tooth loss were based on the tooth's root development stage and other risk factors at the time of the injury.

Conclusions

This article explains the database, the functionality and the statistical approach of the Internet risk calculator.",2012-07-16 +22800758,"'MicroRNA Targets', a new AthaMap web-tool for genome-wide identification of miRNA targets in Arabidopsis thaliana.","

Background

The AthaMap database generates a genome-wide map for putative transcription factor binding sites for A. thaliana. When analyzing transcriptional regulation using AthaMap it may be important to learn which genes are also post-transcriptionally regulated by inhibitory RNAs. Therefore, a unified database for transcriptional and post-transcriptional regulation will be highly useful for the analysis of gene expression regulation.

Methods

To identify putative microRNA target sites in the genome of A. thaliana, processed mature miRNAs from 243 annotated miRNA genes were used for screening with the psRNATarget web server. Positional information, target genes and the psRNATarget score for each target site were annotated to the AthaMap database. Furthermore, putative target sites for small RNAs from seven small RNA transcriptome datasets were used to determine small RNA target sites within the A. thaliana genome.

Results

Putative 41,965 genome wide miRNA target sites and 10,442 miRNA target genes were identified in the A. thaliana genome. Taken together with genes targeted by small RNAs from small RNA transcriptome datasets, a total of 16,600 A. thaliana genes are putatively regulated by inhibitory RNAs. A novel web-tool, 'MicroRNA Targets', was integrated into AthaMap which permits the identification of genes predicted to be regulated by selected miRNAs. The predicted target genes are displayed with positional information and the psRNATarget score of the target site. Furthermore, putative target sites of small RNAs from selected tissue datasets can be identified with the new 'Small RNA Targets' web-tool.

Conclusions

The integration of predicted miRNA and small RNA target sites with transcription factor binding sites will be useful for AthaMap-assisted gene expression analysis. URL: http://www.athamap.de/",2012-07-16 +21765096,Stage prediction of embryonic stem cell differentiation from genome-wide expression data.,"

Motivation

The developmental stage of a cell can be determined by cellular morphology or various other observable indicators. Such classical markers could be complemented with modern surrogates, like whole-genome transcription profiles, that can encode the state of the entire organism and provide increased quantitative resolution. Recent findings suggest that such profiles provide sufficient information to reliably predict the cell's developmental stage.

Results

We use whole-genome transcription data and several data projection methods to infer differentiation stage prediction models for embryonic cells. Given a transcription profile of an uncharacterized cell, these models can then predict its developmental stage. In a series of experiments comprising 14 datasets from the Gene Expression Omnibus, we demonstrate that the approach is robust and has excellent prediction ability both within a specific cell line and across different cell lines.

Availability

Model inference and computational evaluation procedures in the form of Python scripts and accompanying datasets are available at http://www.biolab.si/supp/stagerank.

Contact

blaz.zupan@fri.uni-lj.si

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-15 +22936822,The Olympic Regeneration in East London (ORiEL) study: protocol for a prospective controlled quasi-experiment to evaluate the impact of urban regeneration on young people and their families. ,"Recent systematic reviews suggest that there is a dearth of evidence on the effectiveness of large-scale urban regeneration programmes in improving health and well-being and alleviating health inequalities. The development of the Olympic Park in Stratford for the London 2012 Olympic and Paralympic Games provides the opportunity to take advantage of a natural experiment to examine the impact of large-scale urban regeneration on the health and well-being of young people and their families. A prospective school-based survey of adolescents (11-12 years) with parent data collected through face-to-face interviews at home. Adolescents will be recruited from six randomly selected schools in an area receiving large-scale urban regeneration (London Borough of Newham) and compared with adolescents in 18 schools in three comparison areas with no equivalent regeneration (London Boroughs of Tower Hamlets, Hackney and Barking & Dagenham). Baseline data will be completed prior to the start of the London Olympics (July 2012) with follow-up at 6 and 18 months postintervention. Primary outcomes are: pre-post change in adolescent and parent mental health and well-being, physical activity and parental employment status. Secondary outcomes include: pre-post change in social cohesion, smoking, alcohol use, diet and body mass index. The study will account for individual and environmental contextual effects in evaluating changes to identified outcomes. A nested longitudinal qualitative study will explore families' experiences of regeneration in order to unpack the process by which regeneration impacts on health and well-being. The study has approval from Queen Mary University of London Ethics Committee (QMREC2011/40), the Association of Directors of Children's Services (RGE110927) and the London Boroughs Research Governance Framework (CERGF113). Fieldworkers have had advanced Criminal Records Bureau clearance. Findings will be disseminated through peer-reviewed publications, national and international conferences, through participating schools and the study website (http://www.orielproject.co.uk).",2012-08-29 +23812981,Efficient network-guided multi-locus association mapping with graph cuts.,"

Motivation

As an increasing number of genome-wide association studies reveal the limitations of the attempt to explain phenotypic heritability by single genetic loci, there is a recent focus on associating complex phenotypes with sets of genetic loci. Although several methods for multi-locus mapping have been proposed, it is often unclear how to relate the detected loci to the growing knowledge about gene pathways and networks. The few methods that take biological pathways or networks into account are either restricted to investigating a limited number of predetermined sets of loci or do not scale to genome-wide settings.

Results

We present SConES, a new efficient method to discover sets of genetic loci that are maximally associated with a phenotype while being connected in an underlying network. Our approach is based on a minimum cut reformulation of the problem of selecting features under sparsity and connectivity constraints, which can be solved exactly and rapidly. SConES outperforms state-of-the-art competitors in terms of runtime, scales to hundreds of thousands of genetic loci and exhibits higher power in detecting causal SNPs in simulation studies than other methods. On flowering time phenotypes and genotypes from Arabidopsis thaliana, SConES detects loci that enable accurate phenotype prediction and that are supported by the literature.

Availability

Code is available at http://webdav.tuebingen.mpg.de/u/karsten/Forschung/scones/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +21494330,Directed partial correlation: inferring large-scale gene regulatory network through induced topology disruptions.,"Inferring regulatory relationships among many genes based on their temporal variation in transcript abundance has been a popular research topic. Due to the nature of microarray experiments, classical tools for time series analysis lose power since the number of variables far exceeds the number of the samples. In this paper, we describe some of the existing multivariate inference techniques that are applicable to hundreds of variables and show the potential challenges for small-sample, large-scale data. We propose a directed partial correlation (DPC) method as an efficient and effective solution to regulatory network inference using these data. Specifically for genomic data, the proposed method is designed to deal with large-scale datasets. It combines the efficiency of partial correlation for setting up network topology by testing conditional independence, and the concept of Granger causality to assess topology change with induced interruptions. The idea is that when a transcription factor is induced artificially within a gene network, the disruption of the network by the induction signifies a genes role in transcriptional regulation. The benchmarking results using GeneNetWeaver, the simulator for the DREAM challenges, provide strong evidence of the outstanding performance of the proposed DPC method. When applied to real biological data, the inferred starch metabolism network in Arabidopsis reveals many biologically meaningful network modules worthy of further investigation. These results collectively suggest DPC is a versatile tool for genomics research. The R package DPC is available for download (http://code.google.com/p/dpcnet/).",2011-04-06 +21546551,iPath2.0: interactive pathway explorer.,"iPath2.0 is a web-based tool (http://pathways.embl.de) for the visualization and analysis of cellular pathways. Its primary map summarizes the metabolism in biological systems as annotated to date. Nodes in the map correspond to various chemical compounds and edges represent series of enzymatic reactions. In two other maps, iPath2.0 provides an overview of secondary metabolite biosynthesis and a hand-picked selection of important regulatory pathways and other functional modules, allowing a more general overview of protein functions in a genome or metagenome. iPath2.0's main interface is an interactive Flash-based viewer, which allows users to easily navigate and explore the complex pathway maps. In addition to the default pre-computed overview maps, iPath offers several data mapping tools. Users can upload various types of data and completely customize all nodes and edges of iPath2.0's maps. These customized maps give users an intuitive overview of their own data, guiding the analysis of various genomics and metagenomics projects.",2011-05-05 +22798079,Jatropha curcas hemagglutinin is similar to a 2S albumin allergen from the same source and has unique sugar affinities.,"We have previously reported the purification and preliminary X-ray characterization of a hemagglutinin from the seeds of Jatropha curcas and, with the detailed sequencing information available now, we find that it is similar to a 2S albumin allergen isolated from the same source. Through a search of Jatropha genome database (http://www.kazusa.or.jp/jatropha/), we map it to the sequence id JcCA0234191 (now referred to as Jcr4S00619.70 in the new version, release 4.5) which has a conserved alpha amylase inhibitor/seed storage protein domain found in the 2S albumin allergens. The putative sequence of the small and large chains of the protein is assigned and the total mass of the two subunits matches with the intact mass 10 kDa determined through MALDI. The protein retains hemagglutination activity between pH 6-9 and up to 60 °C on heat treatment and its hemagglutination activity is inhibited by sialic acid and fetuin. Bioinformatics studies show that the isolated protein sequence clusters in close association with a 2S albumin from Ricinus communis in phylogeny analysis and has a conservation of the characteristic four disulfide linkage pattern. Hemagglutinins and lectins are known to have allergenic effects through their interaction with immunoglobulin E and histamine release and earlier studies have shown that this interaction can be inhibited by lectin-specific sugars. We hope this report bridges the plant allergens and hemagglutinins further for exploring possible mediation of allergenic activity through sialic acid and complex sugar interactions and generates further interest in the area.",2012-07-15 +21471018,Multi-platform segmentation for joint detection of copy number variants.,"

Motivation

With the expansion of whole-genome studies, there is rapid evolution of genotyping platforms. This leads to practical issues such as upgrading of genotyping equipment which often results in research groups having data from different platforms for the same samples. While having more data can potentially yield more accurate copy-number estimates, combining such data is not straightforward as different platforms show different degrees of attenuation of the true copy-number or different noise characteristics and marker panels. Currently, there is still a relative lack of procedures for combining information from different platforms.

Results

We develop a method, called MPSS, based on a correlated random-effect model for the unobserved patterns and extend the robust smooth segmentation approach to the multiple-platform scenario. We also propose an objective criterion for discrete segmentation required for downstream analyses. For each identified segment, the software reports a P-value to indicate the likelihood of the segment being a true CNV. From the analyses of real and simulated data, we show that MPSS has better operating characteristics when compared to single-platform methods, and have substantially higher sensitivity compared to an existing multiplatform method.

Availability

The methods are implemented in an R package MPSS, and the source is available from http://www.meb.ki.se/~yudpaw.",2011-04-05 +21344013,NeuroMap: A Spline-Based Interactive Open-Source Software for Spatiotemporal Mapping of 2D and 3D MEA Data.,"A major characteristic of neural networks is the complexity of their organization at various spatial scales, from microscopic local circuits to macroscopic brain-scale areas. Understanding how neural information is processed thus entails the ability to study them at multiple scales simultaneously. This is made possible using microelectrodes array (MEA) technology. Indeed, high-density MEAs provide large-scale coverage (several square millimeters) of whole neural structures combined with microscopic resolution (about 50 μm) of unit activity. Yet, current options for spatiotemporal representation of MEA-collected data remain limited. Here we present NeuroMap, a new interactive Matlab-based software for spatiotemporal mapping of MEA data. NeuroMap uses thin plate spline interpolation, which provides several assets with respect to conventional mapping methods used currently. First, any MEA design can be considered, including 2D or 3D, regular or irregular, arrangements of electrodes. Second, spline interpolation allows the estimation of activity across the tissue with local extrema not necessarily at recording sites. Finally, this interpolation approach provides a straightforward analytical estimation of the spatial Laplacian for better current sources localization. In this software, coregistration of 2D MEA data on the anatomy of the neural tissue is made possible by fine matching of anatomical data with electrode positions using rigid-deformation-based correction of anatomical pictures. Overall, NeuroMap provides substantial material for detailed spatiotemporal analysis of MEA data. The package is distributed under GNU General Public License and available at http://sites.google.com/site/neuromapsoftware.",2011-01-31 +22798082,AtlasCBS: a web server to map and explore chemico-biological space.,"New approaches are needed that can help decrease the unsustainable failure in small-molecule drug discovery. Ligand Efficiency Indices (LEI) are making a great impact on early-stage compound selection and prioritization. Given a target-ligand database with chemical structures and associated biological affinities/activities for a target, the AtlasCBS server generates two-dimensional, dynamical representations of its contents in terms of LEI. These variables allow an effective decoupling of the chemical (angular) and biological (radial) components. BindingDB, PDBBind and ChEMBL databases are currently implemented. Proprietary datasets can also be uploaded and compared. The utility of this atlas-like representation in the future of drug design is highlighted with some examples. The web server can be accessed at http://ub.cbm.uam.es/atlascbs and https://www.ebi.ac.uk/chembl/atlascbs.",2012-07-14 +22340440,GADS software for parametric linkage analysis of quantitative traits distributed as a point-mass mixture.,"Often the quantitative data coming from proteomics and metabolomics studies have irregular distribution with a spike. None of the wide used methods for human QTL mapping are applicable to such traits. Researchers have to reduce the sample, excluding the spike, and analyze only continuous measurements. In this study, we propose a method for the parametric linkage analysis of traits with a spike in the distribution, and a software GADS, which implements this method. Our software includes not only the programs for parametric linkage analysis, but also the program for complex segregation analysis, which allows the estimation of the model parameters used in linkage. We tested our method on the real data about vertical cup-to-disc ratio, the important characteristic of the optic disc associated with glaucoma, in a large pedigree from a Dutch isolated population. Significant linkage signal was identified on chromosome 6 with the help of GADS, whereas the analysis of the normal distributed part of the sample demonstrated only a suggestive linkage peak on this chromosome. The software GADS is freely available at http://mga.bionet.nsc.ru/soft/index.html.",2011-12-07 +23740749,An image-based multi-label human protein subcellular localization predictor (iLocator) reveals protein mislocalizations in cancer tissues.,"

Motivation

Human cells are organized into compartments of different biochemical cellular processes. Having proteins appear at the right time to the correct locations in the cellular compartments is required to conduct their functions in normal cells, whereas mislocalization of proteins can result in pathological diseases, including cancer.

Results

To reveal the cancer-related protein mislocalizations, we developed an image-based multi-label subcellular location predictor, iLocator, which covers seven cellular localizations. The iLocator incorporates both global and local image descriptors and generates predictions by using an ensemble multi-label classifier. The algorithm has the ability to treat both single- and multiple-location proteins. We first trained and tested iLocator on 3240 normal human tissue images that have known subcellular location information from the human protein atlas. The iLocator was then used to generate protein localization predictions for 3696 protein images from seven cancer tissues that have no location annotations in the human protein atlas. By comparing the output data from normal and cancer tissues, we detected eight potential cancer biomarker proteins that have significant localization differences with P-value < 0.01.

Availability

http://www.csbio.sjtu.edu.cn/bioinf/iLocator/",2013-06-04 +23384279,DNA conformation and energy in nucleosome core: a theoretical approach.,"DNA conformation in complex with proteins is far from its canonical B-form. The affinity of complex formation and structure of DNA depend on its attachment configuration and sequence. In this article, we develop a mechanical model to address the problem of DNA structure and energy under deformation. DNA in nucleosome core particle is described as an example. The structure and energy of nucleosomal DNA is calculated based on its sequence and positioning state. The inferred structure has remarkable similarity with X-ray data. Although there is no sequence-specific interaction of bases and the histone core, we found considerable sequence dependency for the nucleosomal DNA positioning. The affinity of nucleosome formation for several sequences is examined and the differences are compatible with observations. We argue that structural energy determines the natural state of nucleosomal DNA and is the main reason for affinity differences in vitro. This theory can be utilized for the DNA structure and energy determination in protein-DNA complexes in general. An animated Interactive 3D Complement (I3DC) is available in Proteopedia at http://proteopedia.org/w/Journal:JBSD:17.",2013-02-05 +23778980,A daily-updated tree of (sequenced) life as a reference for genome research.,"We report a daily-updated sequenced/species Tree Of Life (sTOL) as a reference for the increasing number of cellular organisms with their genomes sequenced. The sTOL builds on a likelihood-based weight calibration algorithm to consolidate NCBI taxonomy information in concert with unbiased sampling of molecular characters from whole genomes of all sequenced organisms. Via quantifying the extent of agreement between taxonomic and molecular data, we observe there are many potential improvements that can be made to the status quo classification, particularly in the Fungi kingdom; we also see that the current state of many animal genomes is rather poor. To augment the use of sTOL in providing evolutionary contexts, we integrate an ontology infrastructure and demonstrate its utility for evolutionary understanding on: nuclear receptors, stem cells and eukaryotic genomes. The sTOL (http://supfam.org/SUPERFAMILY/sTOL) provides a binary tree of (sequenced) life, and contributes to an analytical platform linking genome evolution, function and phenotype.",2013-01-01 +21851598,Development and application of a modified dynamic time warping algorithm (DTW-S) to analyses of primate brain expression time series.,"

Background

Comparing biological time series data across different conditions, or different specimens, is a common but still challenging task. Algorithms aligning two time series represent a valuable tool for such comparisons. While many powerful computation tools for time series alignment have been developed, they do not provide significance estimates for time shift measurements.

Results

Here, we present an extended version of the original DTW algorithm that allows us to determine the significance of time shift estimates in time series alignments, the DTW-Significance (DTW-S) algorithm. The DTW-S combines important properties of the original algorithm and other published time series alignment tools: DTW-S calculates the optimal alignment for each time point of each gene, it uses interpolated time points for time shift estimation, and it does not require alignment of the time-series end points. As a new feature, we implement a simulation procedure based on parameters estimated from real time series data, on a series-by-series basis, allowing us to determine the false positive rate (FPR) and the significance of the estimated time shift values. We assess the performance of our method using simulation data and real expression time series from two published primate brain expression datasets. Our results show that this method can provide accurate and robust time shift estimates for each time point on a gene-by-gene basis. Using these estimates, we are able to uncover novel features of the biological processes underlying human brain development and maturation.

Conclusions

The DTW-S provides a convenient tool for calculating accurate and robust time shift estimates at each time point for each gene, based on time series data. The estimates can be used to uncover novel biological features of the system being studied. The DTW-S is freely available as an R package TimeShift at http://www.picb.ac.cn/Comparative/data.html.",2011-08-18 +24215030,Parallel content-based sub-image retrieval using hierarchical searching.,"

Motivation

The capacity to systematically search through large image collections and ensembles and detect regions exhibiting similar morphological characteristics is central to pathology diagnosis. Unfortunately, the primary methods used to search digitized, whole-slide histopathology specimens are slow and prone to inter- and intra-observer variability. The central objective of this research was to design, develop, and evaluate a content-based image retrieval system to assist doctors for quick and reliable content-based comparative search of similar prostate image patches.

Method

Given a representative image patch (sub-image), the algorithm will return a ranked ensemble of image patches throughout the entire whole-slide histology section which exhibits the most similar morphologic characteristics. This is accomplished by first performing hierarchical searching based on a newly developed hierarchical annular histogram (HAH). The set of candidates is then further refined in the second stage of processing by computing a color histogram from eight equally divided segments within each square annular bin defined in the original HAH. A demand-driven master-worker parallelization approach is employed to speed up the searching procedure. Using this strategy, the query patch is broadcasted to all worker processes. Each worker process is dynamically assigned an image by the master process to search for and return a ranked list of similar patches in the image.

Results

The algorithm was tested using digitized hematoxylin and eosin (H&E) stained prostate cancer specimens. We have achieved an excellent image retrieval performance. The recall rate within the first 40 rank retrieved image patches is ∼90%.

Availability and implementation

Both the testing data and source code can be downloaded from http://pleiad.umdnj.edu/CBII/Bioinformatics/.",2013-11-09 +22613562,Flexible-meccano: a tool for the generation of explicit ensemble descriptions of intrinsically disordered proteins and their associated experimental observables.,"

Motivation

Intrinsically disordered proteins (IDPs) represent a significant fraction of the human proteome. The classical structure function paradigm that has successfully underpinned our understanding of molecular biology breaks down when considering proteins that have no stable tertiary structure in their functional form. One convenient approach is to describe the protein in terms of an equilibrium of rapidly inter-converting conformers. Currently, tools to generate such ensemble descriptions are extremely rare, and poorly adapted to the prediction of experimental data.

Results

We present flexible-meccano-a highly efficient algorithm that generates ensembles of molecules, on the basis of amino acid-specific conformational potentials and volume exclusion. Conformational sampling depends uniquely on the primary sequence, with the possibility of introducing additional local or long-range conformational propensities at an amino acid-specific resolution. The algorithm can also be used to calculate expected values of experimental parameters measured at atomic or molecular resolution, such as nuclear magnetic resonance (NMR) and small angle scattering, respectively. We envisage that flexible-meccano will be useful for researchers who wish to compare experimental data with those expected from a fully disordered protein, researchers who see experimental evidence of deviation from 'random coil' behaviour in their protein, or researchers who are interested in working with a broad ensemble of conformers representing the flexibility of the IDP of interest.

Availability

A fully documented multi-platform executable is provided, with examples, at http://www.ibs.fr/science-213/scientific-output/software/flexible-meccano/

Contact

martin.blackledge@ibs.fr.",2012-06-01 +22849369,Metagenomic taxonomic classification using extreme learning machines.,"Next-generation sequencing technologies have allowed researchers to determine the collective genomes of microbial communities co-existing within diverse ecological environments. Varying species abundance, length and complexities within different communities, coupled with discovery of new species makes the problem of taxonomic assignment to short DNA sequence reads extremely challenging. We have developed a new sequence composition-based taxonomic classifier using extreme learning machines referred to as TAC-ELM for metagenomic analysis. TAC-ELM uses the framework of extreme learning machines to quickly and accurately learn the weights for a neural network model. The input features consist of GC content and oligonucleotides. TAC-ELM is evaluated on two metagenomic benchmarks with sequence read lengths reflecting the traditional and current sequencing technologies. Our empirical results indicate the strength of the developed approach, which outperforms state-of-the-art taxonomic classifiers in terms of accuracy and implementation complexity. We also perform experiments that evaluate the pervasive case within metagenome analysis, where a species may not have been previously sequenced or discovered and will not exist in the reference genome databases. TAC-ELM was also combined with BLAST to show improved classification results. Code and Supplementary Results: http://www.cs.gmu.edu/~mlbio/TAC-ELM (BSD License).",2012-07-11 +22084256,Discovering transcription factor regulatory targets using gene expression and binding data.,"

Motivation

Identifying the target genes regulated by transcription factors (TFs) is the most basic step in understanding gene regulation. Recent advances in high-throughput sequencing technology, together with chromatin immunoprecipitation (ChIP), enable mapping TF binding sites genome wide, but it is not possible to infer function from binding alone. This is especially true in mammalian systems, where regulation often occurs through long-range enhancers in gene-rich neighborhoods, rather than proximal promoters, preventing straightforward assignment of a binding site to a target gene.

Results

We present EMBER (Expectation Maximization of Binding and Expression pRofiles), a method that integrates high-throughput binding data (e.g. ChIP-chip or ChIP-seq) with gene expression data (e.g. DNA microarray) via an unsupervised machine learning algorithm for inferring the gene targets of sets of TF binding sites. Genes selected are those that match overrepresented expression patterns, which can be used to provide information about multiple TF regulatory modes. We apply the method to genome-wide human breast cancer data and demonstrate that EMBER confirms a role for the TFs estrogen receptor alpha, retinoic acid receptors alpha and gamma in breast cancer development, whereas the conventional approach of assigning regulatory targets based on proximity does not. Additionally, we compare several predicted target genes from EMBER to interactions inferred previously, examine combinatorial effects of TFs on gene regulation and illustrate the ability of EMBER to discover multiple modes of regulation.

Availability

All code used for this work is available at http://dinner-group.uchicago.edu/downloads.html.",2011-11-13 +24485712,The ADHF/NT-proBNP risk score to predict 1-year mortality in hospitalized patients with advanced decompensated heart failure.,"

Background

The acute decompensated heart failure/N-terminal pro-B-type natriuretic peptide (ADHF/NT-proBNP) score is a validated risk scoring system that predicts mortality in hospitalized heart failure patients with a wide range of left ventricular ejection fractions (LVEFs). We sought to assess discrimination and calibration of the score when applied to patients with advanced decompensated heart failure (AHF).

Methods

We studied 445 patients hospitalized for AHF, defined by the presence of severe symptoms of worsening HF at admission, severely depressed LVEF, and the need for intravenous diuretic and/or inotropic drugs. The primary outcome was cumulative (in-hospital and post-discharge) mortality and post-discharge 1-year mortality. Separate analyses were performed for patients aged ≤ 70 years. A Seattle Heart Failure Score (SHFS) was calculated for each patient discharged alive.

Results

During follow-up, 144 patients (32.4%) died, and 69 (15.5%) underwent heart transplantation (HT) or ventricular assist device (VAD) implantation. After accounting for the competing events (VAD/HT), the ADHF/NT-proBNP score's C-statistic for cumulative mortality was 0.738 in the overall cohort and 0.771 in patients aged ≤ 70 years. The C-statistic for post-discharge mortality was 0.741 and 0.751, respectively. Adding prior (≤6 months) hospitalizations for HF to the score increased the C-statistic for post-discharge mortality to 0.759 in the overall cohort and to 0.774 in patients aged ≤ 70 years. Predicted and observed mortality rates by quartiles of score were highly correlated. The SHFS demonstrated adequate discrimination but underestimated the risk. The ADHF/NT-proBNP risk calculator is available at http://www.fsm.it/fsm/file/NTproBNPscore.zip.

Conclusions

Our data suggest that the ADHF/NT-proBNP score may efficiently predict mortality in patients hospitalized with AHF.",2013-12-16 +22689767,NORMAL: accurate nucleosome positioning using a modified Gaussian mixture model.,"

Motivation

Nucleosomes are the basic elements of chromatin structure. They control the packaging of DNA and play a critical role in gene regulation by allowing physical access to transcription factors. The advent of second-generation sequencing has enabled landmark genome-wide studies of nucleosome positions for several model organisms. Current methods to determine nucleosome positioning first compute an occupancy coverage profile by mapping nucleosome-enriched sequenced reads to a reference genome; then, nucleosomes are placed according to the peaks of the coverage profile. These methods are quite accurate on placing isolated nucleosomes, but they do not properly handle more complex configurations. Also, they can only provide the positions of nucleosomes and their occupancy level, whereas it is very beneficial to supply molecular biologists additional information about nucleosomes like the probability of placement, the size of DNA fragments enriched for nucleosomes and/or whether nucleosomes are well positioned or 'fuzzy' in the sequenced cell sample.

Results

We address these issues by providing a novel method based on a parametric probabilistic model. An expectation maximization algorithm is used to infer the parameters of the mixture of distributions. We compare the performance of our method on two real datasets against Template Filtering, which is considered the current state-of-the-art. On synthetic data, we show that our method can resolve more accurately complex configurations of nucleosomes, and it is more robust to user-defined parameters. On real data, we show that our method detects a significantly higher number of nucleosomes.

Availability

Visit http://www.cs.ucr.edu/~polishka.",2012-06-01 +22786784,Reference databases for taxonomic assignment in metagenomics.,"Metagenomics is providing an unprecedented access to the environmental microbial diversity. The amplicon-based metagenomics approach involves the PCR-targeted sequencing of a genetic locus fitting different features. Namely, it must be ubiquitous in the taxonomic range of interest, variable enough to discriminate between different species but flanked by highly conserved sequences, and of suitable size to be sequenced through next-generation platforms. The internal transcribed spacers 1 and 2 (ITS1 and ITS2) of the ribosomal DNA operon and one or more hyper-variable regions of 16S ribosomal RNA gene are typically used to identify fungal and bacterial species, respectively. In this context, reliable reference databases and taxonomies are crucial to assign amplicon sequence reads to the correct phylogenetic ranks. Several resources provide consistent phylogenetic classification of publicly available 16S ribosomal DNA sequences, whereas the state of ribosomal internal transcribed spacers reference databases is notably less advanced. In this review, we aim to give an overview of existing reference resources for both types of markers, highlighting strengths and possible shortcomings of their use for metagenomics purposes. Moreover, we present a new database, ITSoneDB, of well annotated and phylogenetically classified ITS1 sequences to be used as a reference collection in metagenomic studies of environmental fungal communities. ITSoneDB is available for download and browsing at http://itsonedb.ba.itb.cnr.it/.",2012-07-10 +22646023,"OLSVis: an animated, interactive visual browser for bio-ontologies.","

Background

More than one million terms from biomedical ontologies and controlled vocabularies are available through the Ontology Lookup Service (OLS). Although OLS provides ample possibility for querying and browsing terms, the visualization of parts of the ontology graphs is rather limited and inflexible.

Results

We created the OLSVis web application, a visualiser for browsing all ontologies available in the OLS database. OLSVis shows customisable subgraphs of the OLS ontologies. Subgraphs are animated via a real-time force-based layout algorithm which is fully interactive: each time the user makes a change, e.g. browsing to a new term, hiding, adding, or dragging terms, the algorithm performs smooth and only essential reorganisations of the graph. This assures an optimal viewing experience, because subsequent screen layouts are not grossly altered, and users can easily navigate through the graph. URL: http://ols.wordvis.com

Conclusions

The OLSVis web application provides a user-friendly tool to visualise ontologies from the OLS repository. It broadens the possibilities to investigate and select ontology subgraphs through a smooth visualisation method.",2012-07-10 +21317139,eGOB: eukaryotic Gene Order Browser.,"

Unlabelled

A large number of genomes have been sequenced, allowing a range of comparative studies. Here, we present the eukaryotic Gene Order Browser with information on the order of protein and non-coding RNA (ncRNA) genes of 74 different eukaryotic species. The browser is able to display a gene of interest together with its genomic context in all species where that gene is present. Thereby, questions related to the evolution of gene organization and non-random gene order may be examined. The browser also provides access to data collected on pairs of adjacent genes that are evolutionarily conserved.

Availability

eGOB as well as underlying data are freely available at http://egob.biomedicine.gu.se

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

tore.samuelsson@medkem.gu.se.",2011-02-10 +22130594,PGAP: pan-genomes analysis pipeline.,"

Summary

With the rapid development of DNA sequencing technology, increasing bacteria genome data enable the biologists to dig the evolutionary and genetic information of prokaryotic species from pan-genome sight. Therefore, the high-efficiency pipelines for pan-genome analysis are mostly needed. We have developed a new pan-genome analysis pipeline (PGAP), which can perform five analytic functions with only one command, including cluster analysis of functional genes, pan-genome profile analysis, genetic variation analysis of functional genes, species evolution analysis and function enrichment analysis of gene clusters. PGAP's performance has been evaluated on 11 Streptococcus pyogenes strains.

Availability

PGAP is developed with Perl script on the Linux Platform and the package is freely available from http://pgap.sf.net.

Contact

junyu@big.ac.cn; xiaojingfa@big.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-29 +23153116,Unipept: tryptic peptide-based biodiversity analysis of metaproteome samples.,"The Unipept web application (http://unipept.ugent.be) supports biodiversity analysis of large and complex metaproteome samples using tryptic peptide information obtained from shotgun MS/MS experiments. Its underlying index structure is designed to quickly retrieve all occurrences of a tryptic peptide in UniProtKB records. Taxon-specificity of the tryptic peptide is successively derived from these occurrences using a novel lowest common ancestor approach that is robust against taxonomic misarrangements, misidentifications, and inaccuracies. Not taking into account this identification noise would otherwise result in drastic loss of information. Dynamic treemaps visualize the biodiversity of metaproteome samples, which eases the exploration of samples with highly complex compositions. The potential of Unipept to gain novel insights into the biodiversity of a sample is evaluated by reanalyzing publicly available metaproteome data sets taken from the bacterial phyllosphere and the human gut.",2012-11-26 +24198854,The 2011 PHARMINE report on pharmacy and pharmacy education in the European Union.,"The PHARMINE consortium consists of 50 universities from European Union member states or other European countries that are members of the European Association of Faculties of Pharmacy (EAFP). EU partner associations representing community (PGEU), hospital (EAHP) and industrial pharmacy (EIPG), together with the European Pharmacy Students' Association (EPSA) are also part of the consortium. THE CONSORTIUM SURVEYED PHARMACIES AND PHARMACISTS IN DIFFERENT SETTINGS: community, hospital, industry and other sectors. The consortium also looked at how European Union higher education institutions and courses are organised. The PHARMINE survey of pharmacy and pharmacy education in Europe produced country profiles with extensive information for EU member states and several other European countries. These data are available at: http://www.pharmine.org/losse_paginas/Country_Profiles/. This 2011 PHARMINE report presents the project and data, and some preliminary analysis on the basic question of how pharmacy education is adapted to pharmacy practice in the EU.",2011-10-01 +22776072,"Assessment of the structural and functional impact of in-frame mutations of the DMD gene, using the tools included in the eDystrophin online database.","

Background

Dystrophin is a large essential protein of skeletal and heart muscle. It is a filamentous scaffolding protein with numerous binding domains. Mutations in the DMD gene, which encodes dystrophin, mostly result in the deletion of one or several exons and cause Duchenne (DMD) and Becker (BMD) muscular dystrophies. The most common DMD mutations are frameshift mutations resulting in an absence of dystrophin from tissues. In-frame DMD mutations are less frequent and result in a protein with partial wild-type dystrophin function. The aim of this study was to highlight structural and functional modifications of dystrophin caused by in-frame mutations.

Methods and results

We developed a dedicated database for dystrophin, the eDystrophin database. It contains 209 different non frame-shifting mutations found in 945 patients from a French cohort and previous studies. Bioinformatics tools provide models of the three-dimensional structure of the protein at deletion sites, making it possible to determine whether the mutated protein retains the typical filamentous structure of dystrophin. An analysis of the structure of mutated dystrophin molecules showed that hybrid repeats were reconstituted at the deletion site in some cases. These hybrid repeats harbored the typical triple coiled-coil structure of native repeats, which may be correlated with better function in muscle cells.

Conclusion

This new database focuses on the dystrophin protein and its modification due to in-frame deletions in BMD patients. The observation of hybrid repeat reconstitution in some cases provides insight into phenotype-genotype correlations in dystrophin diseases and possible strategies for gene therapy. The eDystrophin database is freely available: http://edystrophin.genouest.org/.",2012-07-09 +24072004,THRIVE score predicts ischemic stroke outcomes and thrombolytic hemorrhage risk in VISTA.,"

Background and purpose

In previous studies, the Totaled Health Risks in Vascular Events (THRIVE) score has shown broad utility, allowing prediction of clinical outcome, death, and risk of hemorrhage after tissue-type plasminogen activator (tPA) treatment, irrespective of the type of acute stroke therapy applied to the patient.

Methods

We used data from the Virtual International Stroke Trials Archive to further validate the THRIVE score in a large cohort of patients receiving tPA or no acute treatment, to confirm the relationship between THRIVE and hemorrhage after tPA, and to compare the THRIVE score with several other available outcome prediction scores.

Results

The THRIVE score strongly predicts clinical outcome (odds ratio, 0.55 for good outcome [95% CI, 0.53-0.57]; P<0.001), mortality (odds ratio, 1.57 [95% confidence interval, 1.50-1.64]; P<0.001), and risk of intracerebral hemorrhage after tPA (odds ratio, 1.34 [95% confidence interval, 1.22-1.46]; P<0.001). The relationship between THRIVE score and outcome is not influenced by the independent relationship of tPA administration and outcome. In receiver operator characteristic curve analysis, the THRIVE score was superior to several other available outcome prediction scores in the prediction of clinical outcome and mortality.

Conclusions

The THRIVE score is a simple-to-use tool to predict clinical outcome, mortality, and risk of hemorrhage after thrombolysis in patients with ischemic stroke. Despite its simplicity, the THRIVE score performs better than several other outcome prediction tools. A free Web calculator for the THRIVE score is available at http://www.thrivescore.org.",2013-09-26 +24559086,Computing the probability of RNA hairpin and multiloop formation.,"We describe four novel algorithms, RNAhairpin, RNAmloopNum, RNAmloopOrder, and RNAmloopHP, which compute the Boltzmann partition function for global structural constraints-respectively for the number of hairpins, the number of multiloops, maximum order (or depth) of multiloops, and the simultaneous number of hairpins and multiloops. Given an RNA sequence of length n and a user-specified integer 0 ≤ K ≤ n, RNAhairpin (resp. RNAmloopNum and RNAmloopOrder) computes the partition functions Z(k) for each 0 ≤ k ≤ K in time O(K(2)n(3)) and space O(Kn(2)), while RNAmloopHP computes the partition functions Z(m, h) for 0 ≤ mm ≤ M multiloops and 0 ≤ h ≤ H hairpins, with run time O(M(2)H(2)n(3)) and space O(MHn(2)). In addition, programs such as RNAhairpin (resp. RNAmloopHP) sample from the low-energy ensemble of structures having h hairpins (resp. m multiloops and h hairpins), for given h, m. Moreover, by using the fast Fourier transform (FFT), RNAhairpin and RNAmloopNum have been improved to run in time O(n(4)) and space O(n(2)), although this improvement is not possible for RNAmloopOrder. We present two applications of the novel algorithms. First, we show that for many Rfam families of RNA, structures sampled from RNAmloopHP are more accurate than the minimum free-energy structure; for instance, sensitivity improves by almost 24% for transfer RNA, while for certain ribozyme families, there is an improvement of around 5%. Second, we show that the probabilities p(k)=Z(k)/Z of forming k hairpins (resp. multiloops) provide discriminating novel features for a support vector machine or relevance vector machine binary classifier for Rfam families of RNA. Our data suggests that multiloop order does not provide any significant discriminatory power over that of hairpin and multiloop number, and since these probabilities can be efficiently computed using the FFT, hairpin and multiloop formation probabilities could be added to other features in existent noncoding RNA gene finders. Our programs, written in C/C++, are publicly available online at: http://bioinformatics.bc.edu/clotelab/RNAparametric .",2014-02-21 +24072733,MATE-CLEVER: Mendelian-inheritance-aware discovery and genotyping of midsize and long indels.,"

Motivation

Accurately predicting and genotyping indels longer than 30 bp has remained a central challenge in next-generation sequencing (NGS) studies. While indels of up to 30 bp are reliably processed by standard read aligners and the Genome Analysis Toolkit (GATK), longer indels have still resisted proper treatment. Also, discovering and genotyping longer indels has become particularly relevant owing to the increasing attention in globally concerted projects.

Results

We present MATE-CLEVER (Mendelian-inheritance-AtTEntive CLique-Enumerating Variant findER) as an approach that accurately discovers and genotypes indels longer than 30 bp from contemporary NGS reads with a special focus on family data. For enhanced quality of indel calls in family trios or quartets, MATE-CLEVER integrates statistics that reflect the laws of Mendelian inheritance. MATE-CLEVER's performance rates for indels longer than 30 bp are on a par with those of the GATK for indels shorter than 30 bp, achieving up to 90% precision overall, with >80% of calls correctly typed. In predicting de novo indels longer than 30 bp in family contexts, MATE-CLEVER even raises the standards of the GATK. MATE-CLEVER achieves precision and recall of ∼63% on indels of 30 bp and longer versus 55% in both categories for the GATK on indels of 10-29 bp. A special version of MATE-CLEVER has contributed to indel discovery, in particular for indels of 30-100 bp, the 'NGS twilight zone of indels', in the Genome of the Netherlands Project.

Availability and implementation

 http://clever-sv.googlecode.com/",2013-09-25 +21852304,Improved quality control processing of peptide-centric LC-MS proteomics data.,"

Motivation

In the analysis of differential peptide peak intensities (i.e. abundance measures), LC-MS analyses with poor quality peptide abundance data can bias downstream statistical analyses and hence the biological interpretation for an otherwise high-quality dataset. Although considerable effort has been placed on assuring the quality of the peptide identification with respect to spectral processing, to date quality assessment of the subsequent peptide abundance data matrix has been limited to a subjective visual inspection of run-by-run correlation or individual peptide components. Identifying statistical outliers is a critical step in the processing of proteomics data as many of the downstream statistical analyses [e.g. analysis of variance (ANOVA)] rely upon accurate estimates of sample variance, and their results are influenced by extreme values.

Results

We describe a novel multivariate statistical strategy for the identification of LC-MS runs with extreme peptide abundance distributions. Comparison with current method (run-by-run correlation) demonstrates a significantly better rate of identification of outlier runs by the multivariate strategy. Simulation studies also suggest that this strategy significantly outperforms correlation alone in the identification of statistically extreme liquid chromatography-mass spectrometry (LC-MS) runs.

Availability

https://www.biopilot.org/docs/Software/RMD.php

Contact

bj@pnl.gov

Supplementary information

Supplementary material is available at Bioinformatics online.",2011-08-18 +21851618,Test-retest of computerized health status questionnaires frequently used in the monitoring of knee osteoarthritis: a randomized crossover trial.,"

Background

To compare data based on touch screen to data based on traditional paper versions of questionnaires frequently used to examine patient reported outcomes in knee osteoarthritis patients and to examine the impact of patient characteristics on this comparison

Methods

Participants were recruited from an ongoing trial (http://ClinicalTrials.Gov Identifier: NCT00655941). 20 female participants, mean age 67 (SD 7), completed KOOS, VAS pain, function and patient global, SF-36, Physical Activity Scale, painDETECT, and the ADL Taxonomy. Patients were randomly assigned to one of two subgroups, completing either the paper or touch screen version first. Mean, mean differences (95% CI), median, median differences and Intraclass Correlation Coefficients (ICCs) were calculated for all questionnaires.

Results

ICCs between data based on computerized and paper versions ranged from 0.86 to 0.99. Analysis revealed a statistically significant difference between versions of the ADL Taxonomy, but not for the remaining questionnaires. Age, computer experience or education-level had no significant impact on the results. The computerized questionnaires were reported to be easier to use.

Conclusion

The computerized questionnaires gave comparable results to answers given on paper. Patient characteristics did not influence results and implementation was feasible.",2011-08-18 +24188461,Upregulation of SOX9 in osteosarcoma and its association with tumor progression and patients' prognosis.,"

Objective

SOX9 plays an important role in bone formation and tumorigenesis. However, its involvement in osteosarcoma is still unclear. The aim of this study was to investigate the expression pattern and the clinical significance of SOX9 in human osteosarcoma.

Methods

SOX9 mRNA and protein expression levels were detected by RT-PCR and Western blot assays, respectively, using 30 pairs of osteosarcoma and noncancerous bone tissues. Then, immunohistochemistry was performed to analyze the association of SOX9 expression in 166 osteosarcoma tissues with clinicopathological factors or survival of patients.

Results

SOX9 expression at mRNA and protein levels were both significantly higher in osteosarcoma tissues than those in corresponding noncancerous bone tissues (both P < 0.001). Immunohistochemical staining indicated that SOX9 localized to the nucleus and high SOX9 expression was observed in 120 of 166 (72.3%) osteosarcoma specimens. In addition, high SOX9 expression was more frequently occurred in osteosarcoma tissues with advanced clinical stage (P = 0.02), positive distant metastasis (P = 0.008) and poor response to chemotherapy (P = 0.02). Osteosarcoma patients with high SOX9 expression had shorter overall survival and disease-free survival (both P < 0.001). Furthermore, the multivariate analysis confirmed that upregulation of SOX9 was an independent and significant prognostic factor to predict poor overall survival and disease-free survival (both P = 0.006).

Conclusions

Our data show for the first time that SOX9 is upregulated in aggressive osteosarcoma tissues indicating that SOX9 may participate in the osteosarcoma progression. More importantly, SOX9 status is a useful prognostic factor for predicting the prognosis of osteosarcoma, suggesting that SOX9 may contribute to the optimization of clinical treatments for osteosarcoma patients.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1318085636110837.",2013-11-04 +22788920,Identification of BIRC6 as a novel intervention target for neuroblastoma therapy.,"

Background

Neuroblastoma are pediatric tumors of the sympathetic nervous system with a poor prognosis. Apoptosis is often deregulated in cancer cells, but only a few defects in apoptotic routes have been identified in neuroblastoma.

Methods

Here we investigated genomic aberrations affecting genes of the intrinsic apoptotic pathway in neuroblastoma. We analyzed DNA profiling data (CGH and SNP arrays) and mRNA expression data of 31 genes of the intrinsic apoptotic pathway in a dataset of 88 neuroblastoma tumors using the R2 bioinformatic platform ( http://r2.amc.nl). BIRC6 was selected for further analysis as a tumor driving gene. Knockdown experiments were performed using BIRC6 lentiviral shRNA and phenotype responses were analyzed by Western blot and MTT-assays. In addition, DIABLO levels and interactions were investigated with immunofluorescence and co-immunoprecipitation.

Results

We observed frequent gain of the BIRC6 gene on chromosome 2, which resulted in increased mRNA expression. BIRC6 is an inhibitor of apoptosis protein (IAP), that can bind and degrade the cytoplasmic fraction of the pro-apoptotic protein DIABLO. DIABLO mRNA expression was exceptionally high in neuroblastoma but the protein was only detected in the mitochondria. Upon silencing of BIRC6 by shRNA, DIABLO protein levels increased and cells went into apoptosis. Co-immunoprecipitation confirmed direct interaction between DIABLO and BIRC6 in neuroblastoma cell lines.

Conclusion

Our findings indicate that BIRC6 may have a potential oncogenic role in neuroblastoma by inactivating cytoplasmic DIABLO. BIRC6 inhibition may therefore provide a means for therapeutic intervention in neuroblastoma.",2012-07-12 +22829744,shRNAPred (version 1.0): An open source and standalone software for short hairpin RNA (shRNA) prediction.,"

Unlabelled

The small hairpin RNAs (shRNA) are useful in many ways like identification of trait specific molecular markers, gene silencing and characterization of a species. In public domain, hardly there exists any standalone software for shRNA prediction. Hence, a software shRNAPred (1.0) is proposed here to offer a user-friendly Command-line User Interface (CUI) to predict 'shRNA-like' regions from a large set of nucleotide sequences. The software is developed using PERL Version 5.12.5 taking into account the parameters such as stem and loop length combinations, specific loop sequence, GC content, melting temperature, position specific nucleotides, low complexity filter, etc. Each of the parameters is assigned with a specific score and based on which the software ranks the predicted shRNAs. The high scored shRNAs obtained from the software are depicted as potential shRNAs and provided to the user in the form of a text file. The proposed software also allows the user to customize certain parameters while predicting specific shRNAs of his interest. The shRNAPred (1.0) is open access software available for academic users. It can be downloaded freely along with user manual, example dataset and output for easy understanding and implementation.

Availability

The database is available for free at http://bioinformatics.iasri.res.in/EDA/downloads/shRNAPred_v1.0.exe.",2012-07-06 +22778062,Systematic curation and analysis of genomic variations and their potential functional consequences in snoRNA loci.,"Small nucleolar RNAs (snoRNAs) are a class of noncoding functional RNAs which are involved in RNA modifications, like methylation and pseudouridylation of other RNAs. The snoRNA species of RNAs are characterized by conserved structural motifs they harbor which are also intricately related to their functionality. Though there have been reports of the involvement of snoRNAs in disease processes and anecdotal reports of genomic variations in snoRNA loci and their effects in modulating snoRNA function, there has been no systematic collection and analysis of variations in snoRNA loci. In this manuscript, we present the most comprehensive curation of genomic single nucleotide variations in human snoRNA loci, and their systematic computational analysis to reveal potential single nucleotide variations which could have functional effects. We show six single nucleotide variations in snoRNA loci could significantly alter snoRNA structure and could have potential implications in their functions. The compilation is available at the snoRNA locus specific variation database: http://genome.igib.res.in/snolovd conforming to the HGVS standards for nomenclature of genomic variants.",2012-07-06 +22829746,"BioParishodhana: A novel graphical interface integrating BLAST, ClustalW, primer3 and restriction digestion tools.","

Unlabelled

Bioinformatics has emerged as an integral part of life sciences and biomedical research. The bioinformatics tools developed so far exist individually and do not cross talk leading biologists to spend more time in formatting the output from one tool as input for another tool. This leads to huge loss of time and cost. We herein have made platform which integrates the tools in a way that the output of one program can be directly used as input of another and does not need any modifications. Tools for similarity search, primer designing, and restriction enzyme digestion are required in almost all biological research; therefore we initially tried to integrate these tools. BioParisodhana platform optimizes the time spend in browsing and downloading applications and is an interactive, effective and user friendly.

Availability

The database is available for free at http://resource.ibab.ac.in/bioparishodhana.html.",2012-07-06 +22792192,Transcript profile of the response of two soybean genotypes to potassium deficiency.,"The macronutrient potassium (K) is essential to plant growth and development. Crop yield potential is often affected by lack of soluble K. The molecular regulation mechanism of physiological and biochemical responses to K starvation in soybean roots and shoots is not fully understood. In the present study, two soybean varieties were subjected to low-K stress conditions: a low-K-tolerant variety (You06-71) and a low-K-sensitive variety (HengChun04-11). Eight libraries were generated for analysis: 2 genotypes ×2 tissues (roots and shoots) ×2 time periods [short term (0.5 to 12 h) and long term (3 to 12 d)]. RNA derived from the roots and shoots of these two varieties across two periods (short term and long term) were sequenced and the transcriptomes were compared using high-throughput tag-sequencing. To this end, a large number of clean tags (tags used for analysis after removal of dirty tags) corresponding to distinct tags (all types of clean tags) were identified in eight libraries (L1, You06-71-root short term; L2, HengChun04-11-root short term; L3, You06-71-shoot short term; L4, HengChun04-11-shoot short term; L5, You06-71-root long term; L6, HengChun04-11-root long term; L7, You06-71-shoot long term; L8, HengChun04-11-shoot long term). All clean tags were mapped to the available soybean (Glycine max) transcript database (http://www.soybase.org). Many genes showed substantial differences in expression across the libraries. In total, 5,440 transcripts involved in 118 KEGG pathways were either up- or down-regulated. Fifteen genes were randomly selected and their expression levels were confirmed using quantitative RT-PCR. Our results provide preliminary information on the molecular mechanism of potassium absorption and transport under low-K stress conditions in different soybean tissues.",2012-07-05 +23020263,"G-NEST: a gene neighborhood scoring tool to identify co-conserved, co-expressed genes.","

Background

In previous studies, gene neighborhoods-spatial clusters of co-expressed genes in the genome-have been defined using arbitrary rules such as requiring adjacency, a minimum number of genes, a fixed window size, or a minimum expression level. In the current study, we developed a Gene Neighborhood Scoring Tool (G-NEST) which combines genomic location, gene expression, and evolutionary sequence conservation data to score putative gene neighborhoods across all possible window sizes simultaneously.

Results

Using G-NEST on atlases of mouse and human tissue expression data, we found that large neighborhoods of ten or more genes are extremely rare in mammalian genomes. When they do occur, neighborhoods are typically composed of families of related genes. Both the highest scoring and the largest neighborhoods in mammalian genomes are formed by tandem gene duplication. Mammalian gene neighborhoods contain highly and variably expressed genes. Co-localized noisy gene pairs exhibit lower evolutionary conservation of their adjacent genome locations, suggesting that their shared transcriptional background may be disadvantageous. Genes that are essential to mammalian survival and reproduction are less likely to occur in neighborhoods, although neighborhoods are enriched with genes that function in mitosis. We also found that gene orientation and protein-protein interactions are partially responsible for maintenance of gene neighborhoods.

Conclusions

Our experiments using G-NEST confirm that tandem gene duplication is the primary driver of non-random gene order in mammalian genomes. Non-essentiality, co-functionality, gene orientation, and protein-protein interactions are additional forces that maintain gene neighborhoods, especially those formed by tandem duplicates. We expect G-NEST to be useful for other applications such as the identification of core regulatory modules, common transcriptional backgrounds, and chromatin domains. The software is available at http://docpollard.org/software.html.",2012-09-28 +21715382,ncFANs: a web server for functional annotation of long non-coding RNAs.,"Recent interest in the non-coding transcriptome has resulted in the identification of large numbers of long non-coding RNAs (lncRNAs) in mammalian genomes, most of which have not been functionally characterized. Computational exploration of the potential functions of these lncRNAs will therefore facilitate further work in this field of research. We have developed a practical and user-friendly web interface called ncFANs (non-coding RNA Function ANnotation server), which is the first web service for functional annotation of human and mouse lncRNAs. On the basis of the re-annotated Affymetrix microarray data, ncFANs provides two alternative strategies for lncRNA functional annotation: one utilizing three aspects of a coding-non-coding gene co-expression (CNC) network, the other identifying condition-related differentially expressed lncRNAs. ncFANs introduces a highly efficient way of re-using the abundant pre-existing microarray data. The present version of ncFANs includes re-annotated CDF files for 10 human and mouse Affymetrix microarrays, and the server will be continuously updated with more re-annotated microarray platforms and lncRNA data. ncFANs is freely accessible at http://www.ebiomed.org/ncFANs/ or http://www.noncode.org/ncFANs/.",2011-07-01 +22906555,mtDNAoffice: a software to assign human mtDNA macro haplogroups through automated analysis of the protein coding region.,"We describe a fast, automated process to determine distances between mtDNA sequences allowing their subsequent clustering and haplogroup assignment that may increase the speed of data analysis and avoid human errors. In order to avoid complexities/ambiguities resulting from recurrence and insertion/deletion phenomena and thus improving evolutionary signal-to-noise ratio, protein coding sequences were compared using a vectorial representation method, and the corresponding genetic distance matrix was used for the construction of a neighbor-joining/UPGMA tree or an MDS graphic, which generally agrees with the consensus mtDNA phylogeny. mtDNAoffice software, detailed instructions and example files are freely available on the web at http://www.portugene.com/SupMat/setupmtDNAoffice.rar.",2012-08-18 +22025480,PoPoolation2: identifying differentiation between populations using sequencing of pooled DNA samples (Pool-Seq).,"

Summary

Sequencing pooled DNA samples (Pool-Seq) is the most cost-effective approach for the genome-wide comparison of population samples. Here, we introduce PoPoolation2, the first software tool specifically designed for the comparison of populations with Pool-Seq data. PoPoolation2 implements a range of commonly used measures of differentiation (F(ST), Fisher's exact test and Cochran-Mantel-Haenszel test) that can be applied on different scales (windows, genes, exons, SNPs). The result may be visualized with the widely used Integrated Genomics Viewer.

Availability and implementation

PoPoolation2 is implemented in Perl and R. It is freely available on http://code.google.com/p/popoolation2/

Contact

christian.schloetterer@vetmeduni.ac.at

Supplementary information

Manual: http://code.google.com/p/popoolation2/wiki/Manual Test data and tutorial: http://code.google.com/p/popoolation2/wiki/Tutorial Validation: http://code.google.com/p/popoolation2/wiki/Validation.",2011-10-23 +21502391,"Optimizing analysis, visualization, and navigation of large image data sets: one 5000-section CT scan can ruin your whole day.","

Unlabelled

The technology revolution in image acquisition, instrumentation, and methods has resulted in vast data sets that far outstrip the human observers' ability to view, digest, and interpret modern medical images by using traditional methods. This may require a paradigm shift in the radiologic interpretation process. As human observers, radiologists must search for, detect, and interpret targets. Potential interventions should be based on an understanding of human perceptual and attentional abilities and limitations. New technologies and tools already in use in other fields can be adapted to the health care environment to improve medical image analysis, visualization, and navigation through large data sets. This historical psychophysical and technical review touches on a broad range of disciplines but focuses mainly on the analysis, visualization, and navigation of image data performed during the interpretive process. Advanced postprocessing, including three-dimensional image display, multimodality image fusion, quantitative measures, and incorporation of innovative human-machine interfaces, will likely be the future. Successful new paradigms will integrate image and nonimage data, incorporate workflow considerations, and be informed by evidence-based practices. This overview is meant to heighten the awareness of the complexities and limitations of how radiologists interact with images, particularly the large image sets generated today. Also addressed is how human-machine interface and informatics technologies could combine to transform the interpretation process in the future to achieve safer and better quality care for patients and a more efficient and effective work environment for radiologists.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11091276/-/DC1.",2011-05-01 +23919729,"The distinct expression patterns of claudin-2, -6, and -11 between human gastric neoplasms and adjacent non-neoplastic tissues.","

Background

Cancers have a multifactorial etiology a part of which is genetic. Recent data indicate that expression of the tight junction claudin proteins is involved in the etiology and progression of cancer.

Methods

To explore the correlations of the tight junction proteins claudin-2,-6, and -11 in the pathogenesis and clinical behavior of gastric cancer, 40 gastric cancer tissues and 28 samples of non-neoplastic tissues adjacent to the tumors were examined for expression of claudin-2,-6, and -11 by streptavidin-perosidase immunohistochemical staining method.

Results

The positive expression rates of claudin-2 in gastric cancer tissues and adjacent tissues were 25% and 68% respectively (P<0.001). The positive expression rates of claudin-6 in gastric cancer tissues and adjacent tissues were 55% and 79% respectively (P=0.045<0.05). In contrast, the positive expression rates of claudin-11 in gastric cancer tissues and gastric cancer adjacent tissues were 80% and 46% (P=0.004<0.01). Thus in our study, the expression of claudin-2, and claudin-6 was down regulated in gastric cancer tissue while the expression of claudin-11 was up regulated. Correlations between claudin expression and clinical behavior were not observed.

Conclusion

Our study provides the first evidence that claudin-2,-6, and -11 protein expression varies between human gastric cancers and adjacent non-neoplastic tissues.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/5470513569630744.",2013-08-06 +23782641,"Phaseoloideside E, a novel natural triterpenoid saponin identified from Entada phaseoloides, induces apoptosis in Ec-109 esophageal cancer cells through reactive oxygen species generation.","Phaseoloideside E (PE), a new oleanane-type triterpene saponin, was isolated from the seed kernels of Entada phaseoloides (Linn.) Merr. PE had strong cytotoxic activity against an array of malignant cells. Typical morphological and biochemical features of apoptosis were observed in PE-treated Ec-109 cells. PE induced a dose-dependent increase in the sub-G1 fraction of the cell cycle and DNA fragmentation. Decreases in the mitochondrial membrane potential, SOD activity, and GSH content were also observed. Further investigations revealed that PE reduced the ratio of Bcl-2 to Bax and increased the activities of caspase-3 and -9, but this was prevented by Z-VAD-fmk. PE also induced a decrease of the sub-G1 fraction. Furthermore, PE-induced apoptosis was mediated by up-regulating cellular ROS, which was suppressed by cotreating the cells with N-acetylcysteine (NAC). NAC also attenuated the ratio of sub-G1, the generation of DNA fragmentation and the expression of Bcl-2, Bax, caspase-3, and caspase-9. Interestingly, PE did not up-regulate ROS or induce cell death in untransformed cells. These data showed that PE induces cell death through up-regulation of cellular ROS production. Our investigation provides the scientific basis for the traditional application of this herb and suggests the possibility that PE may be used for a treatment of esophageal carcinoma. [Supplementary materials: available only at http://dx.doi.org/10.1254/jphs.12193FP].",2013-06-20 +21252075,Dalliance: interactive genome viewing on the web.,"

Summary

Dalliance is a new genome viewer which offers a high level of interactivity while running within a web browser. All data is fetched using the established distributed annotation system (DAS) protocol, making it easy to customize the browser and add extra data.

Availability and implementation

Dalliance runs entirely within your web browser, and relies on existing DAS server infrastructure. Browsers for several mammalian genomes are available at http://www.biodalliance.org/, and the use of DAS means you can add your own data to these browsers. In addition, the source code (Javascript) is available under the BSD license, and is straightforward to install on your own web server and embed within other documents.",2011-01-19 +23172865,High-throughput microbial population genomics using the Cortex variation assembler.,"

Summary

We have developed a software package, Cortex, designed for the analysis of genetic variation by de novo assembly of multiple samples. This allows direct comparison of samples without using a reference genome as intermediate and incorporates discovery and genotyping of single-nucleotide polymorphisms, indels and larger events in a single framework. We introduce pipelines which simplify the analysis of microbial samples and increase discovery power; these also enable the construction of a graph of known sequence and variation in a species, against which new samples can be compared rapidly. We demonstrate the ease-of-use and power by reproducing the results of studies using both long and short reads.

Availability

http://cortexassembler.sourceforge.net (GPLv3 license).

Contact

zam@well.ox.ac.uk, mcvean@well.ox.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-19 +23162053,PASS-bis: a bisulfite aligner suitable for whole methylome analysis of Illumina and SOLiD reads.,"

Summary

The sequencing of bisulfite-treated DNA (Bi-Seq) is becoming a gold standard for methylation studies. The mapping of Bi-Seq reads is complex and requires special alignment algorithms. This problem is particularly relevant for SOLiD color space, where the bisulfite conversion C/T changes two adjacent colors into 16 possible combinations. Here, we present an algorithm that efficiently aligns Bi-Seq reads obtained either from SOLiD or Illumina. An accompanying methylation-caller program creates a genomic view of methylated and unmethylated Cs on both DNA strands.

Availability and implementation

The algorithm has been implemented as an option of the program PASS, freely available at http://pass.cribi.unipd.it.

Contact

pass@cribi.unipd.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-17 +22766416,FunGene-DB: a web-based tool for Polyporales strains authentication.,"Polyporales are extensively studied wood-decaying fungi with applications in white and green biotechnologies and in medicinal chemistry. We developed an open-access, user-friendly, bioinformatics tool named FunGene-DB (http://www.fungene-db.org). The goal was to facilitate the molecular authentication of Polyporales strains and fruit-bodies, otherwise subjected to morphological studies. This tool includes a curated database that contains ITS1-5.8S-ITS2 rDNA genes screened through a semi-automated pipeline from the International Nucleotide Sequence Database (INSD), and the similarity search BLASTn program. Today, the web-accessible database compiles 2379 accepted sequences, among which 386 were selected as reference sequences (most often fully identified ITS sequences for which a voucher, strain or specimen, has been deposited in a public-access collection). The restriction of the database to one reference sequence per species (or per clade for species complex) allowed most often unequivocal analysis. We conclude that FunGene-DB is a promising tool for molecular authentication of Polyporales. It should be especially useful for scientists who are not expert mycologists but who need to check the identity of strains (e.g. for culture collections, for applied microbiology).",2012-07-02 +22748121,Genes2FANs: connecting genes through functional association networks.,"

Background

Protein-protein, cell signaling, metabolic, and transcriptional interaction networks are useful for identifying connections between lists of experimentally identified genes/proteins. However, besides physical or co-expression interactions there are many ways in which pairs of genes, or their protein products, can be associated. By systematically incorporating knowledge on shared properties of genes from diverse sources to build functional association networks (FANs), researchers may be able to identify additional functional interactions between groups of genes that are not readily apparent.

Results

Genes2FANs is a web based tool and a database that utilizes 14 carefully constructed FANs and a large-scale protein-protein interaction (PPI) network to build subnetworks that connect lists of human and mouse genes. The FANs are created from mammalian gene set libraries where mouse genes are converted to their human orthologs. The tool takes as input a list of human or mouse Entrez gene symbols to produce a subnetwork and a ranked list of intermediate genes that are used to connect the query input list. In addition, users can enter any PubMed search term and then the system automatically converts the returned results to gene lists using GeneRIF. This gene list is then used as input to generate a subnetwork from the user's PubMed query. As a case study, we applied Genes2FANs to connect disease genes from 90 well-studied disorders. We find an inverse correlation between the counts of links connecting disease genes through PPI and links connecting diseases genes through FANs, separating diseases into two categories.

Conclusions

Genes2FANs is a useful tool for interpreting the relationships between gene/protein lists in the context of their various functions and networks. Combining functional association interactions with physical PPIs can be useful for revealing new biology and help form hypotheses for further experimentation. Our finding that disease genes in many cancers are mostly connected through PPIs whereas other complex diseases, such as autism and type-2 diabetes, are mostly connected through FANs without PPIs, can guide better strategies for disease gene discovery. Genes2FANs is available at: http://actin.pharm.mssm.edu/genes2FANs.",2012-07-02 +24058058,Exploring variation-aware contig graphs for (comparative) metagenomics using MaryGold.,"

Motivation

Although many tools are available to study variation and its impact in single genomes, there is a lack of algorithms for finding such variation in metagenomes. This hampers the interpretation of metagenomics sequencing datasets, which are increasingly acquired in research on the (human) microbiome, in environmental studies and in the study of processes in the production of foods and beverages. Existing algorithms often depend on the use of reference genomes, which pose a problem when a metagenome of a priori unknown strain composition is studied. In this article, we develop a method to perform reference-free detection and visual exploration of genomic variation, both within a single metagenome and between metagenomes.

Results

We present the MaryGold algorithm and its implementation, which efficiently detects bubble structures in contig graphs using graph decomposition. These bubbles represent variable genomic regions in closely related strains in metagenomic samples. The variation found is presented in a condensed Circos-based visualization, which allows for easy exploration and interpretation of the found variation. We validated the algorithm on two simulated datasets containing three respectively seven Escherichia coli genomes and showed that finding allelic variation in these genomes improves assemblies. Additionally, we applied MaryGold to publicly available real metagenomic datasets, enabling us to find within-sample genomic variation in the metagenomes of a kimchi fermentation process, the microbiome of a premature infant and in microbial communities living on acid mine drainage. Moreover, we used MaryGold for between-sample variation detection and exploration by comparing sequencing data sampled at different time points for both of these datasets.

Availability

MaryGold has been written in C++ and Python and can be downloaded from http://bioinformatics.tudelft.nl/software",2013-09-20 +21226895,Shape-based peak identification for ChIP-Seq.,"

Background

The identification of binding targets for proteins using ChIP-Seq has gained popularity as an alternative to ChIP-chip. Sequencing can, in principle, eliminate artifacts associated with microarrays, and cheap sequencing offers the ability to sequence deeply and obtain a comprehensive survey of binding. A number of algorithms have been developed to call ""peaks"" representing bound regions from mapped reads. Most current algorithms incorporate multiple heuristics, and despite much work it remains difficult to accurately determine individual peaks corresponding to distinct binding events.

Results

Our method for identifying statistically significant peaks from read coverage is inspired by the notion of persistence in topological data analysis and provides a non-parametric approach that is statistically sound and robust to noise in experiments. Specifically, our method reduces the peak calling problem to the study of tree-based statistics derived from the data. We validate our approach using previously published data and show that it can discover previously missed regions.

Conclusions

The difficulty in accurately calling peaks for ChIP-Seq data is partly due to the difficulty in defining peaks, and we demonstrate a novel method that improves on the accuracy of previous methods in resolving peaks. Our introduction of a robust statistical test based on ideas from topological data analysis is also novel. Our methods are implemented in a program called T-PIC (Tree shape Peak Identification for ChIP-Seq) is available at http://bio.math.berkeley.edu/tpic/.",2011-01-12 +23823315,"CluGene: A Bioinformatics Framework for the Identification of Co-Localized, Co-Expressed and Co-Regulated Genes Aimed at the Investigation of Transcriptional Regulatory Networks from High-Throughput Expression Data.","The full understanding of the mechanisms underlying transcriptional regulatory networks requires unravelling of complex causal relationships. Genome high-throughput technologies produce a huge amount of information pertaining gene expression and regulation; however, the complexity of the available data is often overwhelming and tools are needed to extract and organize the relevant information. This work starts from the assumption that the observation of co-occurrent events (in particular co-localization, co-expression and co-regulation) may provide a powerful starting point to begin unravelling transcriptional regulatory networks. Co-expressed genes often imply shared functional pathways; co-expressed and functionally related genes are often co-localized, too; moreover, co-expressed and co-localized genes are also potential targets for co-regulation; finally, co-regulation seems more frequent for genes mapped to proximal chromosome regions. Despite the recognized importance of analysing co-occurrent events, no bioinformatics solution allowing the simultaneous analysis of co-expression, co-localization and co-regulation is currently available. Our work resulted in developing and valuating CluGene, a software providing tools to analyze multiple types of co-occurrences within a single interactive environment allowing the interactive investigation of combined co-expression, co-localization and co-regulation of genes. The use of CluGene will enhance the power of testing hypothesis and experimental approaches aimed at unravelling transcriptional regulatory networks. The software is freely available at http://bioinfolab.unipg.it/.",2013-06-18 +21945114,An alternative methodology for interpretation and reporting of hand hygiene compliance data.,"

Background

Since 2009, all hospitals in Ontario have been mandated to publicly report health care provider compliance with hand hygiene opportunities (http://www.health.gov.on.ca/patient_safety/index.html). Hand hygiene compliance (HHC) is reported for 2 of the 4 moments during the health care provider-patient encounter. This study analyzes the HHC data by using an alternative methodology for interpretation and reporting.

Methods

Annualized HHC data were available for fiscal years 2009 and 2010 for each of the 5 hospital corporations (6 sites) in the North Simcoe Muskoka Local Health Integration Network. The weighted average for HHC was used to estimate the overall observed rate for HHC for each hospital and reporting period. Using Bayes' probability theorem, this estimate was used to predict the probability that any patient would experience HHC for at least 75% of hand hygiene moments. This probability was categorized as excellent (≥75%), above average (50%-74%), below average (25%-49%), or poor (<25%). The results were reported using a balanced scorecard display.

Results

The overall observed rates for HHC ranged from 50% to 87% (mean, 75% ± 11%, P = .079). Using the alternative methodology for reporting, 6 of the 12 reporting periods would be categorized as excellent, 1 as above average, 2 as below average, and 3 as poor.

Conclusion

Population-level HHC data can be converted to patient-level risk information. Reporting this information to the public may increase the value and understandability of this patient safety indicator.",2011-09-25 +22580308,"Analysis of expressed sequence tags from the antarctic psychrophilic green algae, Pyramimonas gelidicola.","Expressed sequence tags (ESTs) from the Antarctic green algae Pyramimonas gelidicola were analyzed to obtain molecular information on cold acclimation of psychrophilic microorganisms. A total of 2,112 EST clones were sequenced, generating 222 contigs and 219 singletons, and 200 contigs and 391 singletons from control (4 degrees C) and cold-shock conditions (-2 degrees C), respectively. The complete EST sequences were deposited to the DDBJ EST database (http:// www.ddbj.nig.ac.jp/index-e.html) and the nucleotide sequences reported in this study are available in the DDBJ/EMBL/ GenBank. These EST databases of Antarctic green algae can be used in a wide range of studies on psychrophilic genes expressed by polar microorganisms.",2012-07-01 +21840973,Evidence-based annotation of transcripts and proteins in the sulfate-reducing bacterium Desulfovibrio vulgaris Hildenborough.,"We used high-resolution tiling microarrays and 5' RNA sequencing to identify transcripts in Desulfovibrio vulgaris Hildenborough, a model sulfate-reducing bacterium. We identified the first nucleotide position for 1,124 transcripts, including 54 proteins with leaderless transcripts and another 72 genes for which a major transcript initiates within the upstream protein-coding gene, which confounds measurements of the upstream gene's expression. Sequence analysis of these promoters showed that D. vulgaris prefers -10 and -35 boxes different from those preferred by Escherichia coli. A total of 549 transcripts ended at intrinsic (rho-independent) terminators, but most of the other transcripts seemed to have variable ends. We found low-level antisense expression of most genes, and the 5' ends of these transcripts mapped to promoter-like sequences. Because antisense expression was reduced for highly expressed genes, we suspect that elongation of nonspecific antisense transcripts is suppressed by transcription of the sense strand. Finally, we combined the transcript results with comparative analysis and proteomics data to make 505 revisions to the original annotation of 3,531 proteins: we removed 255 (7.5%) proteins, changed 123 (3.6%) start codons, and added 127 (3.7%) proteins that had been missed. Tiling data had higher coverage than shotgun proteomics and hence led to most of the corrections, but many errors probably remain. Our data are available at http://genomics.lbl.gov/supplemental/DvHtranscripts2011/.",2011-08-12 +23670815,Identification of efflux proteins using efficient radial basis function networks with position-specific scoring matrices and biochemical properties.,"Efflux proteins are membrane proteins, which are involved in the transportation of multidrugs. The annotation of efflux proteins in genomic sequences would aid to understand the function. Although the percentage of membrane proteins in genomes is estimated to be 25-30%, there is no information about the content of efflux proteins. For annotating such class of proteins it is necessary to develop a reliable method to identify efflux proteins from amino acid sequence information. In this work, we have developed a method based on radial basis function networks using position specific scoring matrices (PSSM) and amino acid properties. We noticed that the C-terminal domain of efflux proteins contain vital information for discrimination. Our method showed an accuracy of 78 and 92% in discriminating efflux proteins from transporters and membrane proteins, respectively using fivefold cross-validation. We utilized our method for annotating the genomes E. coli and P. aeruginosa and it predicted 8.7 and 9.2% of proteins as efflux proteins in these genomes, respectively. The predicted efflux proteins have been compared with available experimental data and we observed a very good agreement between them. Further, we developed a web server for classifying efflux proteins and it is freely available at http://rbf.bioinfo.tw/∼sachen/EFFLUXpredict/Efflux-RBF.php. We suggest that our method could be an effective tool for annotating efflux proteins in genomic sequences.",2013-06-17 +21725060,Visualization and biochemical analyses of the emerging mammalian 14-3-3-phosphoproteome.,"Hundreds of candidate 14-3-3-binding (phospho)proteins have been reported in publications that describe one interaction at a time, as well as high-throughput 14-3-3-affinity and mass spectrometry-based studies. Here, we transcribed these data into a common format, deposited the collated data from low-throughput studies in MINT (http://mint.bio.uniroma2.it/mint), and compared the low- and high-throughput data in VisANT graphs that are easy to analyze and extend. Exploring the graphs prompted questions about technical and biological specificity, which were addressed experimentally, resulting in identification of phosphorylated 14-3-3-binding sites in the mitochondrial import sequence of the iron-sulfur cluster assembly enzyme (ISCU), cytoplasmic domains of the mitochondrial fission factor (MFF), and endoplasmic reticulum-tethered receptor expression-enhancing protein 4 (REEP4), RNA regulator SMAUG2, and cytoskeletal regulatory proteins, namely debrin-like protein (DBNL) and kinesin light chain (KLC) isoforms. Therefore, 14-3-3s undergo physiological interactions with proteins that are destined for diverse subcellular locations. Graphing and validating interactions underpins efforts to use 14-3-3-phosphoproteomics to identify mechanisms and biomarkers for signaling pathways in health and disease.",2011-07-01 +21714929,SeqGene: a comprehensive software solution for mining exome- and transcriptome- sequencing data.,"

Background

The popularity of massively parallel exome and transcriptome sequencing projects demands new data mining tools with a comprehensive set of features to support a wide range of analysis tasks.

Results

SeqGene, a new data mining tool, supports mutation detection and annotation, dbSNP and 1000 Genome data integration, RNA-Seq expression quantification, mutation and coverage visualization, allele specific expression (ASE), differentially expressed genes (DEGs) identification, copy number variation (CNV) analysis, and gene expression quantitative trait loci (eQTLs) detection. We also developed novel methods for testing the association between SNP and expression and identifying genotype-controlled DEGs. We showed that the results generated from SeqGene compares favourably to other existing methods in our case studies.

Conclusion

SeqGene is designed as a general-purpose software package. It supports both paired-end reads and single reads generated on most sequencing platforms; it runs on all major types of computers; it supports arbitrary genome assemblies for arbitrary organisms; and it scales well to support both large and small scale sequencing projects. The software homepage is http://seqgene.sourceforge.net.",2011-06-29 +22750101,LSHGD: a database for human leprosy susceptible genes.,"Studies aiming to explore the involvement of host genetic factors to determine susceptibility to develop disease and individual's response to the infection with Mycobacterium leprae have increased in recent years. To address this issue, we have developed a Leprosy Susceptible Human Gene Database (LSHGD) to integrate leprosy and human associated 45 genes by profound literature search. This will serve as a user-friendly and interactive platform to understand the involvement of human polymorphisms (SNPs) in leprosy, independent genetic control over both susceptibility to leprosy and its association with multi-drug resistance of M. leprae. As the first human genetic database in leprosy it aims to provide information about the associated genes, corresponding protein sequences, available three dimensional structures and polymorphism related to leprosy. In conclusion, this will serve as a multifunctional valuable tool and convenient information platform which is freely available at http://www.vit.ac.in/leprosy/leprosy.htm and enables the user to retrieve information of their interest.",2012-06-30 +24521294,Prediction and classification of ncRNAs using structural information.,"

Background

Evidence is accumulating that non-coding transcripts, previously thought to be functionally inert, play important roles in various cellular activities. High throughput techniques like next generation sequencing have resulted in the generation of vast amounts of sequence data. It is therefore desirable, not only to discriminate coding and non-coding transcripts, but also to assign the noncoding RNA (ncRNA) transcripts into respective classes (families). Although there are several algorithms available for this task, their classification performance remains a major concern. Acknowledging the crucial role that non-coding transcripts play in cellular processes, it is required to develop algorithms that are able to precisely classify ncRNA transcripts.

Results

In this study, we initially develop prediction tools to discriminate coding or non-coding transcripts and thereafter classify ncRNAs into respective classes. In comparison to the existing methods that employed multiple features, our SVM-based method by using a single feature (tri-nucleotide composition), achieved MCC of 0.98. Knowing that the structure of a ncRNA transcript could provide insights into its biological function, we use graph properties of predicted ncRNA structures to classify the transcripts into 18 different non-coding RNA classes. We developed classification models using a variety of algorithms (BayeNet, NaiveBayes, MultilayerPerceptron, IBk, libSVM, SMO and RandomForest) and observed that model based on RandomForest performed better than other models. As compared to the GraPPLE study, the sensitivity (of 13 classes) and specificity (of 14 classes) was higher. Moreover, the overall sensitivity of 0.43 outperforms the sensitivity of GraPPLE (0.33) whereas the overall MCC measure of 0.40 (in contrast to MCC of 0.29 of GraPPLE) was significantly higher for our method. This clearly demonstrates that our models are more accurate than existing models.

Conclusions

This work conclusively demonstrates that a simple feature, tri-nucleotide composition, is sufficient to discriminate between coding and non-coding RNA sequences. Similarly, graph properties based feature set along with RandomForest algorithm are most suitable to classify different ncRNA classes. We have also developed an online and standalone tool-- RNAcon ( http://crdd.osdd.net/raghava/rnacon).",2014-02-13 +22031814,Automated detection and segmentation of synaptic contacts in nearly isotropic serial electron microscopy images.,"We describe a protocol for fully automated detection and segmentation of asymmetric, presumed excitatory, synapses in serial electron microscopy images of the adult mammalian cerebral cortex, taken with the focused ion beam, scanning electron microscope (FIB/SEM). The procedure is based on interactive machine learning and only requires a few labeled synapses for training. The statistical learning is performed on geometrical features of 3D neighborhoods of each voxel and can fully exploit the high z-resolution of the data. On a quantitative validation dataset of 111 synapses in 409 images of 1948×1342 pixels with manual annotations by three independent experts the error rate of the algorithm was found to be comparable to that of the experts (0.92 recall at 0.89 precision). Our software offers a convenient interface for labeling the training data and the possibility to visualize and proofread the results in 3D. The source code, the test dataset and the ground truth annotation are freely available on the website http://www.ilastik.org/synapse-detection.",2011-10-21 +22888955,L1 cell adhesion molecule overexpression in hepatocellular carcinoma associates with advanced tumor progression and poor patient survival.,"

Objective

L1 cell adhesion molecule (L1CAM), as a member of the immunoglobulin superfamily, has recently been observed in a variety of human malignancies. However, no data of L1CAM are available for hepatocellular carcinoma (HCC). The aim of this study was to investigate the expression of L1CAM in HCC and determine its correlation with tumor progression and prognosis.

Methods

One-hundred and thirty HCC patients who had undergone curative liver resection were selected and immunohistochemistry, Western blotting, and quantitative real time polymerase chain reaction (Q-PCR) were performed to analyze L1CAM expression in the respective tumors.

Results

Immunohistochemistry, Western blotting, and Q-PCR consistently confirmed the overexpression of L1CAM in HCC tissues compared with their adjacent nonneoplastic tissues at both protein and gene level (both P <0.01). Additionally, the high expression of L1CAM was significantly associated with advanced tumor stage (P = 0.02) and advanced tumor grade (P = 0.03), respectively. Moreover, HCC patients with high L1CAM expression were significantly associated with lower 5-year overall survival (P <0.01) and lower 5-year disease-free survival (P <0.01), respectively. The Cox proportional hazards model further showed that L1CAM over-expression was an independent poor prognostic factor for both 5-year disease-free survival (P = 0.02) and 5-year overall survival (P = 0.008) in HCC.

Conclusion

Our data suggest for the first time that L1CAM expression in HCC was significantly correlated with the advanced tumor progression and was an independent poor prognostic factor for both overall survival and disease-free survival in patients with HCC.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1970024872761542.",2012-08-13 +22829734,TPX: Biomedical literature search made easy.,"

Unlabelled

TPX is a web-based PubMed search enhancement tool that enables faster article searching using analysis and exploration features. These features include identification of relevant biomedical concepts from search results with linkouts to source databases, concept based article categorization, concept assisted search and filtering, query refinement. A distinguishing feature here is the ability to add user-defined concept names and/or concept types for named entity recognition. The tool allows contextual exploration of knowledge sources by providing concept association maps derived from the MEDLINE repository. It also has a full-text search mode that can be configured on request to access local text repositories, incorporating entity co-occurrence search at sentence/paragraph levels. Local text files can also be analyzed on-the-fly.

Availability

http://tpx.atc.tcs.com",2012-06-28 +22829735,RAmiRNA: Software suite for generation of SVMbased prediction models of mature miRNAs.,"

Unlabelled

MicroRNAs (miRNAs) are short endogenous non-coding RNA molecules that regulate protein coding gene expression in animals, plants, fungi, algae and viruses through the RNA interference pathway. By virtue of their base complementarity, mature miRNAs stop the process of translation, thus acting as one of the important molecules in vivo. Attempts to predict precursor-miRNAs and mature miRNAs have been achieved in a significant number of model organisms but development of prediction models aiming at relatively less studied organisms are rare. In this work, we provide a suite of standalone softwares called RAmiRNA (RAdicalmiRNA detector), to solve the problem of custom development of prediction models for mature miRNAs using support vector machine (SVM) learning. RAmiRNA could be used to develop SVM based model for prediction of mature miRNAs in an organism or a group of organisms in a UNIX based local machine. Additionally RAmiRNA generates training accuracy for a quick estimation of prediction ability of generated model.

Availability

The database is available for free at http://ircb.iiita.ac.in.",2012-06-28 +22759499,Facilitating knowledge transfer: decision support tools in environment and health.,"The HENVINET Health and Environment Network aimed to enhance the use of scientific knowledge in environmental health for policy making. One of the goals was to identify and evaluate Decision Support Tools (DST) in current use. Special attention was paid to four ""priority"" health issues: asthma and allergies, cancer, neurodevelopment disorders, and endocrine disruptors.We identified a variety of tools that are used for decision making at various levels and by various stakeholders. We developed a common framework for information acquisition about DSTs, translated this to a database structure and collected the information in an online Metadata Base (MDB).The primary product is an open access web-based MDB currently filled with 67 DSTs, accessible through the HENVINET networking portal http://www.henvinet.eu and http://henvinet.nilu.no. Quality assurance and control of the entries and evaluation of requirements to use the DSTs were also a focus of the work. The HENVINET DST MDB is an open product that enables the public to get basic information about the DSTs, and to search the DSTs using pre-designed attributes or free text. Registered users are able to 1) review and comment on existing DSTs; 2) evaluate each DST's functionalities, and 3) add new DSTs, or change the entry for their own DSTs. Assessment of the available 67 DSTs showed: 1) more than 25% of the DSTs address only one pollution source; 2) 25% of the DSTs address only one environmental stressor; 3) almost 50% of the DSTs are only applied to one disease; 4) 41% of the DSTs can only be applied to one decision making area; 5) 60% of the DSTs' results are used only by national authority and/or municipality/urban level administration; 6) almost half of the DSTs are used only by environmental professionals and researchers. This indicates that there is a need to develop DSTs covering an increasing number of pollution sources, environmental stressors and health end points, and considering links to other 'Driving forces-Pressures-State-Exposure-Effects-Actions' (DPSEEA) elements. Of interest to both researchers and decision makers should be the standardization of the way DSTs are described for easier access to the knowledge, and the identification of coverage gaps.",2012-06-28 +22761559,ProteinHistorian: tools for the comparative analysis of eukaryote protein origin.,"The evolutionary history of a protein reflects the functional history of its ancestors. Recent phylogenetic studies identified distinct evolutionary signatures that characterize proteins involved in cancer, Mendelian disease, and different ontogenic stages. Despite the potential to yield insight into the cellular functions and interactions of proteins, such comparative phylogenetic analyses are rarely performed, because they require custom algorithms. We developed ProteinHistorian to make tools for performing analyses of protein origins widely available. Given a list of proteins of interest, ProteinHistorian estimates the phylogenetic age of each protein, quantifies enrichment for proteins of specific ages, and compares variation in protein age with other protein attributes. ProteinHistorian allows flexibility in the definition of protein age by including several algorithms for estimating ages from different databases of evolutionary relationships. We illustrate the use of ProteinHistorian with three example analyses. First, we demonstrate that proteins with high expression in human, compared to chimpanzee and rhesus macaque, are significantly younger than those with human-specific low expression. Next, we show that human proteins with annotated regulatory functions are significantly younger than proteins with catalytic functions. Finally, we compare protein length and age in many eukaryotic species and, as expected from previous studies, find a positive, though often weak, correlation between protein age and length. ProteinHistorian is available through a web server with an intuitive interface and as a set of command line tools; this allows biologists and bioinformaticians alike to integrate these approaches into their analysis pipelines. ProteinHistorian's modular, extensible design facilitates the integration of new datasets and algorithms. The ProteinHistorian web server, source code, and pre-computed ages for 32 eukaryotic genomes are freely available under the GNU public license at http://lighthouse.ucsf.edu/ProteinHistorian/.",2012-06-28 +22829736,Universal fingerprinting chip server.,"

Unlabelled

The Virtual Hybridization approach predicts the most probable hybridization sites across a target nucleic acid of known sequence, including both perfect and mismatched pairings. Potential hybridization sites, having a user-defined minimum number of bases that are paired with the oligonucleotide probe, are first identified. Then free energy values are evaluated for each potential hybridization site, and if it has a calculated free energy of equal or higher negative value than a user-defined free energy cut-off value, it is considered as a site of high probability of hybridization. The Universal Fingerprinting Chip Applications Server contains the software for visualizing predicted hybridization patterns, which yields a simulated hybridization fingerprint that can be compared with experimentally derived fingerprints or with a virtual fingerprint arising from a different sample.

Availability

The database is available for free at http://bioinformatica.homelinux.org/UFCVH/",2012-06-28 +22347386,Segmentation of multi-isotope imaging mass spectrometry data for semi-automatic detection of regions of interest.,"Multi-isotope imaging mass spectrometry (MIMS) associates secondary ion mass spectrometry (SIMS) with detection of several atomic masses, the use of stable isotopes as labels, and affiliated quantitative image-analysis software. By associating image and measure, MIMS allows one to obtain quantitative information about biological processes in sub-cellular domains. MIMS can be applied to a wide range of biomedical problems, in particular metabolism and cell fate [1], [2], [3]. In order to obtain morphologically pertinent data from MIMS images, we have to define regions of interest (ROIs). ROIs are drawn by hand, a tedious and time-consuming process. We have developed and successfully applied a support vector machine (SVM) for segmentation of MIMS images that allows fast, semi-automatic boundary detection of regions of interests. Using the SVM, high-quality ROIs (as compared to an expert's manual delineation) were obtained for 2 types of images derived from unrelated data sets. This automation simplifies, accelerates and improves the post-processing analysis of MIMS images. This approach has been integrated into ""Open MIMS,"" an ImageJ-plugin for comprehensive analysis of MIMS images that is available online at http://www.nrims.hms.harvard.edu/NRIMS_ImageJ.php.",2012-02-09 +21722407,Gene set analysis for longitudinal gene expression data.,"

Background

Gene set analysis (GSA) has become a successful tool to interpret gene expression profiles in terms of biological functions, molecular pathways, or genomic locations. GSA performs statistical tests for independent microarray samples at the level of gene sets rather than individual genes. Nowadays, an increasing number of microarray studies are conducted to explore the dynamic changes of gene expression in a variety of species and biological scenarios. In these longitudinal studies, gene expression is repeatedly measured over time such that a GSA needs to take into account the within-gene correlations in addition to possible between-gene correlations.

Results

We provide a robust nonparametric approach to compare the expressions of longitudinally measured sets of genes under multiple treatments or experimental conditions. The limiting distributions of our statistics are derived when the number of genes goes to infinity while the number of replications can be small. When the number of genes in a gene set is small, we recommend permutation tests based on our nonparametric test statistics to achieve reliable type I error and better power while incorporating unknown correlations between and within-genes. Simulation results demonstrate that the proposed method has a greater power than other methods for various data distributions and heteroscedastic correlation structures. This method was used for an IL-2 stimulation study and significantly altered gene sets were identified.

Conclusions

The simulation study and the real data application showed that the proposed gene set analysis provides a promising tool for longitudinal microarray analysis. R scripts for simulating longitudinal data and calculating the nonparametric statistics are posted on the North Dakota INBRE website http://ndinbre.org/programs/bioinformatics.php. Raw microarray data is available in Gene Expression Omnibus (National Center for Biotechnology Information) with accession number GSE6085.",2011-07-03 +21724591,Tissue-specific prediction of directly regulated genes.,"

Unlabelled

Direct binding by a transcription factor (TF) to the proximal promoter of a gene is a strong evidence that the TF regulates the gene. Assaying the genome-wide binding of every TF in every cell type and condition is currently impractical. Histone modifications correlate with tissue/cell/condition-specific ('tissue specific') TF binding, so histone ChIP-seq data can be combined with traditional position weight matrix (PWM) methods to make tissue-specific predictions of TF-promoter interactions.

Results

We use supervised learning to train a naïve Bayes predictor of TF-promoter binding. The predictor's features are the histone modification levels and a PWM-based score for the promoter. Training and testing uses sets of promoters labeled using TF ChIP-seq data, and we use cross-validation on 23 such datasets to measure the accuracy. A PWM+histone naïve Bayes predictor using a single histone modification (H3K4me3) is substantially more accurate than a PWM score or a conservation-based score (phylogenetic motif model). The naïve Bayes predictor is more accurate (on average) at all sensitivity levels, and makes only half as many false positive predictions at sensitivity levels from 10% to 80%. On average, it correctly predicts 80% of bound promoters at a false positive rate of 20%. Accuracy does not diminish when we test the predictor in a different cell type (and species) from training. Accuracy is barely diminished even when we train the predictor without using TF ChIP-seq data.

Availability

Our tissue-specific predictor of promoters bound by a TF is called Dr Gene and is available at http://bioinformatics.org.au/drgene.

Contact

t.bailey@imb.uq.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-30 +22238263,MetExtract: a new software tool for the automated comprehensive extraction of metabolite-derived LC/MS signals in metabolomics research.,"

Motivation

Liquid chromatography-mass spectrometry (LC/MS) is a key technique in metabolomics. Since the efficient assignment of MS signals to true biological metabolites becomes feasible in combination with in vivo stable isotopic labelling, our aim was to provide a new software tool for this purpose.

Results

An algorithm and a program (MetExtract) have been developed to search for metabolites in in vivo labelled biological samples. The algorithm makes use of the chromatographic characteristics of the LC/MS data and detects MS peaks fulfilling the criteria of stable isotopic labelling. As a result of all calculations, the algorithm specifies a list of m/z values, the corresponding number of atoms of the labelling element (e.g. carbon) together with retention time and extracted adduct-, fragment- and polymer ions. Its function was evaluated using native (12)C- and uniformly (13)C-labelled standard substances.

Availability

MetExtract is available free of charge and warranty at http://code.google.com/p/metextract/. Precompiled executables are available for Windows operating systems.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-11 +22743224,GO-Elite: a flexible solution for pathway and ontology over-representation.,"

Unlabelled

We introduce GO-Elite, a flexible and powerful pathway analysis tool for a wide array of species, identifiers (IDs), pathways, ontologies and gene sets. In addition to the Gene Ontology (GO), GO-Elite allows the user to perform over-representation analysis on any structured ontology annotations, pathway database or biological IDs (e.g. gene, protein or metabolite). GO-Elite exploits the structured nature of biological ontologies to report a minimal set of non-overlapping terms. The results can be visualized on WikiPathways or as networks. Built-in support is provided for over 60 species and 50 ID systems, covering gene, disease and phenotype ontologies, multiple pathway databases, biomarkers, and transcription factor and microRNA targets. GO-Elite is available as a web interface, GenMAPP-CS plugin and as a cross-platform application.

Availability

http://www.genmapp.org/go_elite",2012-06-27 +23044545,fRMA ST: frozen robust multiarray analysis for Affymetrix Exon and Gene ST arrays.,"

Summary

Frozen robust multiarray analysis (fRMA) is a single-array preprocessing algorithm that retains the advantages of multiarray algorithms and removes certain batch effects by downweighting probes that have high between-batch residual variance. Here, we extend the fRMA algorithm to two new microarray platforms--Affymetrix Human Exon and Gene 1.0 ST--by modifying the fRMA probe-level model and extending the frma package to work with oligo ExonFeatureSet and GeneFeatureSet objects.

Availability and implementation

All packages are implemented in R. Source code and binaries are freely available through the Bioconductor project. Convenient links to all software and data packages can be found at http://mnmccall.com/software

Contact

mccallm@gmail.com.",2012-10-07 +23297334,Primary colorectal cancer: use of kinetic modeling of dynamic contrast-enhanced CT data to predict clinical outcome.,"

Purpose

To compare four different tracer kinetic models for the analysis of dynamic contrast material-enhanced computed tomographic (CT) data with respect to the prediction of 5-year overall survival in primary colorectal cancer.

Materials and methods

This study was approved by the ethical review board. Archival dynamic contrast-enhanced CT data from 46 patients with colorectal cancer, obtained as part of a research study, were analyzed retrospectively by using the distributed parameter, conventional compartmental, adiabatic tissue homogeneity, and generalized kinetic models. Blood flow, blood volume, mean transit time (MTT), permeability-surface area product, extraction fraction, extravascular extracellular volume (v(e)), and volume transfer constant (K(trans)) were compared by using the Friedman test, with statistical significance at 5%. Following receiver operating characteristic analysis, parameters of the different kinetic models and tumor stage were compared with respect to overall survival discrimination, with use of Kaplan Meier analysis and a univariate Cox proportional hazard model, with additional cross-validation and permutation testing.

Results

Blood flow was lower with the distributed parameter model than with the conventional compartmental and adiabatic tissue homogeneity models (P < .0001), and blood flow values determined with the conventional compartmental and adiabatic tissue homogeneity models were similar. Conversely, MTT was longer with the distributed parameter model than with the conventional compartmental and adiabatic tissue homogeneity models (P < .0001). Blood volume, permeability-surface area product, and v(e) were higher with the conventional compartmental model than with the adiabatic tissue homogeneity, distributed parameter, or generalized kinetic models (P < .0001). The extraction fraction was higher with the distributed parameter model than with the adiabatic tissue homogeneity model. With respect to 5-year overall survival, only the distributed parameter model-derived v(e) was predictive of 5-year overall survival with a threshold value of 15.48 mL/100 mL after cross-validation and permutation testing.

Conclusion

Parameter values differ significantly between models. Of the models investigated, the distributed parameter model was the best predictor of 5-year overall survival.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120186/-/DC1.",2013-01-07 +22439608,Arena3D: visualizing time-driven phenotypic differences in biological systems.,"

Background

Elucidating the genotype-phenotype connection is one of the big challenges of modern molecular biology. To fully understand this connection, it is necessary to consider the underlying networks and the time factor. In this context of data deluge and heterogeneous information, visualization plays an essential role in interpreting complex and dynamic topologies. Thus, software that is able to bring the network, phenotypic and temporal information together is needed. Arena3D has been previously introduced as a tool that facilitates link discovery between processes. It uses a layered display to separate different levels of information while emphasizing the connections between them. We present novel developments of the tool for the visualization and analysis of dynamic genotype-phenotype landscapes.

Results

Version 2.0 introduces novel features that allow handling time course data in a phenotypic context. Gene expression levels or other measures can be loaded and visualized at different time points and phenotypic comparison is facilitated through clustering and correlation display or highlighting of impacting changes through time. Similarity scoring allows the identification of global patterns in dynamic heterogeneous data. In this paper we demonstrate the utility of the tool on two distinct biological problems of different scales. First, we analyze a medium scale dataset that looks at perturbation effects of the pluripotency regulator Nanog in murine embryonic stem cells. Dynamic cluster analysis suggests alternative indirect links between Nanog and other proteins in the core stem cell network. Moreover, recurrent correlations from the epigenetic to the translational level are identified. Second, we investigate a large scale dataset consisting of genome-wide knockdown screens for human genes essential in the mitotic process. Here, a potential new role for the gene lsm14a in cytokinesis is suggested. We also show how phenotypic patterning allows for extensive comparison and identification of high impact knockdown targets.

Conclusions

We present a new visualization approach for perturbation screens with multiple phenotypic outcomes. The novel functionality implemented in Arena3D enables effective understanding and comparison of temporal patterns within morphological layers, to help with the system-wide analysis of dynamic processes. Arena3D is available free of charge for academics as a downloadable standalone application from: http://arena3d.org/.",2012-03-22 +22039211,Cascade detection for the extraction of localized sequence features; specificity results for HIV-1 protease and structure-function results for the Schellman loop.,"

Motivation

The extraction of the set of features most relevant to function from classified biological sequence sets is still a challenging problem. A central issue is the determination of expected counts for higher order features so that artifact features may be screened.

Results

Cascade detection (CD), a new algorithm for the extraction of localized features from sequence sets, is introduced. CD is a natural extension of the proportional modeling techniques used in contingency table analysis into the domain of feature detection. The algorithm is successfully tested on synthetic data and then applied to feature detection problems from two different domains to demonstrate its broad utility. An analysis of HIV-1 protease specificity reveals patterns of strong first-order features that group hydrophobic residues by side chain geometry and exhibit substantial symmetry about the cleavage site. Higher order results suggest that favorable cooperativity is weak by comparison and broadly distributed, but indicate possible synergies between negative charge and hydrophobicity in the substrate. Structure-function results for the Schellman loop, a helix-capping motif in proteins, contain strong first-order features and also show statistically significant cooperativities that provide new insights into the design of the motif. These include a new 'hydrophobic staple' and multiple amphipathic and electrostatic pair features. CD should prove useful not only for sequence analysis, but also for the detection of multifactor synergies in cross-classified data from clinical studies or other sources.

Availability

Windows XP/7 application and data files available at: https://sites.google.com/site/cascadedetect/home.

Contact

nacnewell@comcast.net

Supplementary information

Supplementary information is available at Bioinformatics online.",2011-10-28 +21500218,MDAnalysis: a toolkit for the analysis of molecular dynamics simulations.,"MDAnalysis is an object-oriented library for structural and temporal analysis of molecular dynamics (MD) simulation trajectories and individual protein structures. It is written in the Python language with some performance-critical code in C. It uses the powerful NumPy package to expose trajectory data as fast and efficient NumPy arrays. It has been tested on systems of millions of particles. Many common file formats of simulation packages including CHARMM, Gromacs, Amber, and NAMD and the Protein Data Bank format can be read and written. Atoms can be selected with a syntax similar to CHARMM's powerful selection commands. MDAnalysis enables both novice and experienced programmers to rapidly write their own analytical tools and access data stored in trajectories in an easily accessible manner that facilitates interactive explorative analysis. MDAnalysis has been tested on and works for most Unix-based platforms such as Linux and Mac OS X. It is freely available under the GNU General Public License from http://mdanalysis.googlecode.com.",2011-04-15 +22731403,"Three-in-one agonists for PPAR-α, PPAR-γ, and PPAR-δ from traditional Chinese medicine.","Nowadays, the occurrence of metabolic syndrome, which is characterized by obesity and clinical disorders, has been increasing rapidly over the world. It induces several serious chronic diseases such as cardiovascular disease, dyslipidemia, gall bladder disease, hypertension, osteoarthritis, sleep apnea, stroke, and type 2 diabetes mellitus. Peroxisome proliferator-activated receptors (PPARs), which have three isoforms: PPAR-α, PPAR-γ, and PPAR-δ, are key regulators of adipogenesis, lipid and carbohydrate metabolism, and are potential drug targets for treating metabolic syndrome. The traditional Chinese medicine (TCM) compounds from TCM Database@Taiwan ( http://tcm.cmu.edu.tw/ ) were employed to virtually screen for potential PPAR agonists, and structure-based pharmacophore models were generated to identify the key interactions for each PPAR protein. In addition, molecular dynamics (MD) simulation was performed to evaluate the stability of the PPAR-ligand complexes in a dynamic state. (S)-Tryptophan-betaxanthin and berberrubine, which have higher Dock Score than controls, form stable interactions during MD, and are further supported by the structure-based pharmacophore models in each PPAR protein. Key features include stable H-bonds with Thr279 and Ala333 of PPAR-α, with Thr252, Thr253 and Lys331 of PPAR-δ, and with Arg316 and Glu371 of PPAR-γ. Hence, we propose the top two TCM candidates as potential lead compounds in developing agonists targeting PPARs protein for treating metabolic syndrome.",2012-06-26 +23275734,Sequence Maneuverer: tool for sequence extraction from genomes.,"

Unlabelled

The availability of genomic sequences of many organisms has opened new challenges in many aspects particularly in terms of genome analysis. Sequence extraction is a vital step and many tools have been developed to solve this issue. These tools are available publically but have limitations with reference to the sequence extraction, length of the sequence to be extracted, organism specificity and lack of user friendly interface. We have developed a java based software package having three modules which can be used independently or sequentially. The tool efficiently extracts sequences from large datasets with few simple steps. It can efficiently extract multiple sequences of any desired length from a genome of any organism. The results are crosschecked by published data.

Availability

URL 1: http://ww3.comsats.edu.pk/bio/ResearchProjects.aspx URL 2: http://ww3.comsats.edu.pk/bio/SequenceManeuverer.aspx.",2012-12-19 +22759425,Computing the protein binding sites.,"

Background

Identifying the location of binding sites on proteins is of fundamental importance for a wide range of applications including molecular docking, de novo drug design, structure identification and comparison of functional sites. Structural genomic projects are beginning to produce protein structures with unknown functions. Therefore, efficient methods are required if all these structures are to be properly annotated. Lots of methods for finding binding sites involve 3D structure comparison. Here we design a method to find protein binding sites by direct comparison of protein 3D structures.

Results

We have developed an efficient heuristic approach for finding similar binding sites from the surface of given proteins. Our approach consists of three steps: local sequence alignment, protein surface detection, and 3D structures comparison. We implement the algorithm and produce a software package that works well in practice. When comparing a complete protein with all complete protein structures in the PDB database, experiments show that the average recall value of our approach is 82% and the average precision value of our approach is also significantly better than the existing approaches.

Conclusions

Our program has much higher recall values than those existing programs. Experiments show that all the existing approaches have recall values less than 50%. This implies that more than 50% of real binding sites cannot be reported by those existing approaches. The software package is available at http://sites.google.com/site/guofeics/bsfinder.",2012-06-25 +21724593,TotalReCaller: improved accuracy and performance via integrated alignment and base-calling.,"

Motivation

Currently, re-sequencing approaches use multiple modules serially to interpret raw sequencing data from next-generation sequencing platforms, while remaining oblivious to the genomic information until the final alignment step. Such approaches fail to exploit the full information from both raw sequencing data and the reference genome that can yield better quality sequence reads, SNP-calls, variant detection, as well as an alignment at the best possible location in the reference genome. Thus, there is a need for novel reference-guided bioinformatics algorithms for interpreting analog signals representing sequences of the bases ({A, C, G, T}), while simultaneously aligning possible sequence reads to a source reference genome whenever available.

Results

Here, we propose a new base-calling algorithm, TotalReCaller, to achieve improved performance. A linear error model for the raw intensity data and Burrows-Wheeler transform (BWT) based alignment are combined utilizing a Bayesian score function, which is then globally optimized over all possible genomic locations using an efficient branch-and-bound approach. The algorithm has been implemented in soft- and hardware [field-programmable gate array (FPGA)] to achieve real-time performance. Empirical results on real high-throughput Illumina data were used to evaluate TotalReCaller's performance relative to its peers-Bustard, BayesCall, Ibis and Rolexa-based on several criteria, particularly those important in clinical and scientific applications. Namely, it was evaluated for (i) its base-calling speed and throughput, (ii) its read accuracy and (iii) its specificity and sensitivity in variant calling.

Availability

A software implementation of TotalReCaller as well as additional information, is available at: http://bioinformatics.nyu.edu/wordpress/projects/totalrecaller/

Contact

fabian.menges@nyu.edu.",2011-06-30 +21347314,De-novo discovery of differentially abundant transcription factor binding sites including their positional preference.,"Transcription factors are a main component of gene regulation as they activate or repress gene expression by binding to specific binding sites in promoters. The de-novo discovery of transcription factor binding sites in target regions obtained by wet-lab experiments is a challenging problem in computational biology, which has not been fully solved yet. Here, we present a de-novo motif discovery tool called Dispom for finding differentially abundant transcription factor binding sites that models existing positional preferences of binding sites and adjusts the length of the motif in the learning process. Evaluating Dispom, we find that its prediction performance is superior to existing tools for de-novo motif discovery for 18 benchmark data sets with planted binding sites, and for a metazoan compendium based on experimental data from micro-array, ChIP-chip, ChIP-DSL, and DamID as well as Gene Ontology data. Finally, we apply Dispom to find binding sites differentially abundant in promoters of auxin-responsive genes extracted from Arabidopsis thaliana microarray data, and we find a motif that can be interpreted as a refined auxin responsive element predominately positioned in the 250-bp region upstream of the transcription start site. Using an independent data set of auxin-responsive genes, we find in genome-wide predictions that the refined motif is more specific for auxin-responsive genes than the canonical auxin-responsive element. In general, Dispom can be used to find differentially abundant motifs in sequences of any origin. However, the positional distribution learned by Dispom is especially beneficial if all sequences are aligned to some anchor point like the transcription start site in case of promoter sequences. We demonstrate that the combination of searching for differentially abundant motifs and inferring a position distribution from the data is beneficial for de-novo motif discovery. Hence, we make the tool freely available as a component of the open-source Java framework Jstacs and as a stand-alone application at http://www.jstacs.de/index.php/Dispom.",2011-02-10 +21810250,GSV: a web-based genome synteny viewer for customized data.,"

Background

The analysis of genome synteny is a common practice in comparative genomics. With the advent of DNA sequencing technologies, individual biologists can rapidly produce their genomic sequences of interest. Although web-based synteny visualization tools are convenient for biologists to use, none of the existing ones allow biologists to upload their own data for analysis.

Results

We have developed the web-based Genome Synteny Viewer (GSV) that allows users to upload two data files for synteny visualization, the mandatory synteny file for specifying genomic positions of conserved regions and the optional genome annotation file. GSV presents two selected genomes in a single integrated view while still retaining the browsing flexibility necessary for exploring individual genomes. Users can browse and filter for genomic regions of interest, change the color or shape of each annotation track as well as re-order, hide or show the tracks dynamically. Additional features include downloadable images, immediate email notification and tracking of usage history. The entire GSV package is also light-weighted which enables easy local installation.

Conclusions

GSV provides a unique option for biologists to analyze genome synteny by uploading their own data set to a web-based comparative genome browser. A web server hosting GSV is provided at http://cas-bioinfo.cas.unt.edu/gsv, and the software is also freely available for local installations.",2011-08-02 +21751374,MassChroQ: a versatile tool for mass spectrometry quantification.,"Recently, many software tools have been developed to perform quantification in LC-MS analyses. However, most of them are specific to either a quantification strategy (e.g. label-free or isotopic labelling) or a mass-spectrometry system (e.g. high or low resolution). In this context, we have developed MassChroQ (Mass Chromatogram Quantification), a versatile software that performs LC-MS data alignment and peptide quantification by peak area integration on extracted ion chromatograms. MassChroQ is suitable for quantification with or without labelling and is not limited to high-resolution systems. Peptides of interest (for example all the identified peptides) can be determined automatically, or manually by providing targeted m/z and retention time values. It can handle large experiments that include protein or peptide fractionation (as SDS-PAGE, 2-D LC). It is fully configurable. Every processing step is traceable, the produced data are in open standard formats and its modularity allows easy integration into proteomic pipelines. The output results are ready for use in statistical analyses. Evaluation of MassChroQ on complex label-free data obtained from low and high-resolution mass spectrometers showed low CVs for technical reproducibility (1.4%) and high coefficients of correlation to protein quantity (0.98). MassChroQ is freely available under the GNU General Public Licence v3.0 at http://pappso.inra.fr/bioinfo/masschroq/.",2011-08-04 +21385349,PIMS sequencing extension: a laboratory information management system for DNA sequencing facilities.,"

Background

Facilities that provide a service for DNA sequencing typically support large numbers of users and experiment types. The cost of services is often reduced by the use of liquid handling robots but the efficiency of such facilities is hampered because the software for such robots does not usually integrate well with the systems that run the sequencing machines. Accordingly, there is a need for software systems capable of integrating different robotic systems and managing sample information for DNA sequencing services. In this paper, we describe an extension to the Protein Information Management System (PIMS) that is designed for DNA sequencing facilities. The new version of PIMS has a user-friendly web interface and integrates all aspects of the sequencing process, including sample submission, handling and tracking, together with capture and management of the data.

Results

The PIMS sequencing extension has been in production since July 2009 at the University of Leeds DNA Sequencing Facility. It has completely replaced manual data handling and simplified the tasks of data management and user communication. Samples from 45 groups have been processed with an average throughput of 10000 samples per month. The current version of the PIMS sequencing extension works with Applied Biosystems 3130XL 96-well plate sequencer and MWG 4204 or Aviso Theonyx liquid handling robots, but is readily adaptable for use with other combinations of robots.

Conclusions

PIMS has been extended to provide a user-friendly and integrated data management solution for DNA sequencing facilities that is accessed through a normal web browser and allows simultaneous access by multiple users as well as facility managers. The system integrates sequencing and liquid handling robots, manages the data flow, and provides remote access to the sequencing results. The software is freely available, for academic users, from http://www.pims-lims.org/.",2011-03-07 +22726705,QuartetS-DB: a large-scale orthology database for prokaryotes and eukaryotes inferred by evolutionary evidence.,"

Background

The concept of orthology is key to decoding evolutionary relationships among genes across different species using comparative genomics. QuartetS is a recently reported algorithm for large-scale orthology detection. Based on the well-established evolutionary principle that gene duplication events discriminate paralogous from orthologous genes, QuartetS has been shown to improve orthology detection accuracy while maintaining computational efficiency.

Description

QuartetS-DB is a new orthology database constructed using the QuartetS algorithm. The database provides orthology predictions among 1621 complete genomes (1365 bacterial, 92 archaeal, and 164 eukaryotic), covering more than seven million proteins and four million pairwise orthologs. It is a major source of orthologous groups, containing more than 300,000 groups of orthologous proteins and 236,000 corresponding gene trees. The database also provides over 500,000 groups of inparalogs. In addition to its size, a distinguishing feature of QuartetS-DB is the ability to allow users to select a cutoff value that modulates the balance between prediction accuracy and coverage of the retrieved pairwise orthologs. The database is accessible at https://applications.bioanalysis.org/quartetsdb.

Conclusions

QuartetS-DB is one of the largest orthology resources available to date. Because its orthology predictions are underpinned by evolutionary evidence obtained from sequenced genomes, we expect its accuracy to continue to increase in future releases as the genomes of additional species are sequenced.",2012-06-22 +22726260,Developing the anemone Aiptasia as a tractable model for cnidarian-dinoflagellate symbiosis: the transcriptome of aposymbiotic A. pallida.,"

Background

Coral reefs are hotspots of oceanic biodiversity, forming the foundation of ecosystems that are important both ecologically and for their direct practical impacts on humans. Corals are declining globally due to a number of stressors, including rising sea-surface temperatures and pollution; such stresses can lead to a breakdown of the essential symbiotic relationship between the coral host and its endosymbiotic dinoflagellates, a process known as coral bleaching. Although the environmental stresses causing this breakdown are largely known, the cellular mechanisms of symbiosis establishment, maintenance, and breakdown are still largely obscure. Investigating the symbiosis using an experimentally tractable model organism, such as the small sea anemone Aiptasia, should improve our understanding of exactly how the environmental stressors affect coral survival and growth.

Results

We assembled the transcriptome of a clonal population of adult, aposymbiotic (dinoflagellate-free) Aiptasia pallida from ~208 million reads, yielding 58,018 contigs. We demonstrated that many of these contigs represent full-length or near-full-length transcripts that encode proteins similar to those from a diverse array of pathways in other organisms, including various metabolic enzymes, cytoskeletal proteins, and neuropeptide precursors. The contigs were annotated by sequence similarity, assigned GO terms, and scanned for conserved protein domains. We analyzed the frequency and types of single-nucleotide variants and estimated the size of the Aiptasia genome to be ~421 Mb. The contigs and annotations are available through NCBI (Transcription Shotgun Assembly database, accession numbers JV077153-JV134524) and at http://pringlelab.stanford.edu/projects.html.

Conclusions

The availability of an extensive transcriptome assembly for A. pallida will facilitate analyses of gene-expression changes, identification of proteins of interest, and other studies in this important emerging model system.",2012-06-22 +22570409,GPSy: a cross-species gene prioritization system for conserved biological processes--application in male gamete development.,"We present gene prioritization system (GPSy), a cross-species gene prioritization system that facilitates the arduous but critical task of prioritizing genes for follow-up functional analyses. GPSy's modular design with regard to species, data sets and scoring strategies enables users to formulate queries in a highly flexible manner. Currently, the system encompasses 20 topics related to conserved biological processes including male gamete development discussed in this article. The web server-based tool is freely available at http://gpsy.genouest.org.",2012-05-08 +24149053,Inferring the soybean (Glycine max) microRNA functional network based on target gene network.,"

Motivation

The rapid accumulation of microRNAs (miRNAs) and experimental evidence for miRNA interactions has ushered in a new area of miRNA research that focuses on network more than individual miRNA interaction, which provides a systematic view of the whole microRNome. So it is a challenge to infer miRNA functional interactions on a system-wide level and further draw a miRNA functional network (miRFN). A few studies have focused on the well-studied human species; however, these methods can neither be extended to other non-model organisms nor take fully into account the information embedded in miRNA-target and target-target interactions. Thus, it is important to develop appropriate methods for inferring the miRNA network of non-model species, such as soybean (Glycine max), without such extensive miRNA-phenotype associated data as miRNA-disease associations in human.

Results

Here we propose a new method to measure the functional similarity of miRNAs considering both the site accessibility and the interactive context of target genes in functional gene networks. We further construct the miRFNs of soybean, which is the first study on soybean miRNAs on the network level and the core methods can be easily extended to other species. We found that miRFNs of soybean exhibit a scale-free, small world and modular architecture, with their degrees fit best to power-law and exponential distribution. We also showed that miRNA with high degree tends to interact with those of low degree, which reveals the disassortativity and modularity of miRFNs. Our efforts in this study will be useful to further reveal the soybean miRNA-miRNA and miRNA-gene interactive mechanism on a systematic level.

Availability and implementation

A web tool for information retrieval and analysis of soybean miRFNs and the relevant target functional gene networks can be accessed at SoymiRNet: http://nclab.hit.edu.cn/SoymiRNet.",2013-10-22 +22759430,Efficient error correction for next-generation sequencing of viral amplicons.,"

Background

Next-generation sequencing allows the analysis of an unprecedented number of viral sequence variants from infected patients, presenting a novel opportunity for understanding virus evolution, drug resistance and immune escape. However, sequencing in bulk is error prone. Thus, the generated data require error identification and correction. Most error-correction methods to date are not optimized for amplicon analysis and assume that the error rate is randomly distributed. Recent quality assessment of amplicon sequences obtained using 454-sequencing showed that the error rate is strongly linked to the presence and size of homopolymers, position in the sequence and length of the amplicon. All these parameters are strongly sequence specific and should be incorporated into the calibration of error-correction algorithms designed for amplicon sequencing.

Results

In this paper, we present two new efficient error correction algorithms optimized for viral amplicons: (i) k-mer-based error correction (KEC) and (ii) empirical frequency threshold (ET). Both were compared to a previously published clustering algorithm (SHORAH), in order to evaluate their relative performance on 24 experimental datasets obtained by 454-sequencing of amplicons with known sequences. All three algorithms show similar accuracy in finding true haplotypes. However, KEC and ET were significantly more efficient than SHORAH in removing false haplotypes and estimating the frequency of true ones.

Conclusions

Both algorithms, KEC and ET, are highly suitable for rapid recovery of error-free haplotypes obtained by 454-sequencing of amplicons from heterogeneous viruses.The implementations of the algorithms and data sets used for their testing are available at: http://alan.cs.gsu.edu/NGS/?q=content/pyrosequencing-error-correction-algorithm.",2012-06-25 +21993538,Wavelet images and Chou's pseudo amino acid composition for protein classification.,"The last decade has seen an explosion in the collection of protein data. To actualize the potential offered by this wealth of data, it is important to develop machine systems capable of classifying and extracting features from proteins. Reliable machine systems for protein classification offer many benefits, including the promise of finding novel drugs and vaccines. In developing our system, we analyze and compare several feature extraction methods used in protein classification that are based on the calculation of texture descriptors starting from a wavelet representation of the protein. We then feed these texture-based representations of the protein into an Adaboost ensemble of neural network or a support vector machine classifier. In addition, we perform experiments that combine our feature extraction methods with a standard method that is based on the Chou's pseudo amino acid composition. Using several datasets, we show that our best approach outperforms standard methods. The Matlab code of the proposed protein descriptors is available at http://bias.csr.unibo.it/nanni/wave.rar .",2011-10-13 +22718529,Parallel-ProBiS: fast parallel algorithm for local structural comparison of protein structures and binding sites.,"The ProBiS algorithm performs a local structural comparison of the query protein surface against the nonredundant database of protein structures. It finds proteins that have binding sites in common with the query protein. Here, we present a new parallelized algorithm, Parallel-ProBiS, for detecting similar binding sites on clusters of computers. The obtained speedups of the parallel ProBiS scale almost ideally with the number of computing cores up to about 64 computing cores. Scaling is better for larger than for smaller query proteins. For a protein with almost 600 amino acids, the maximum speedup of 180 was achieved on two interconnected clusters with 248 computing cores. Source code of Parallel-ProBiS is available for download free for academic users at http://probis.cmm.ki.si/download.",2012-06-20 +21737438,Simulating systems genetics data with SysGenSIM.,"

Summary

SysGenSIM is a software package to simulate Systems Genetics (SG) experiments in model organisms, for the purpose of evaluating and comparing statistical and computational methods and their implementations for analyses of SG data [e.g. methods for expression quantitative trait loci (eQTL) mapping and network inference]. SysGenSIM allows the user to select a variety of network topologies, genetic and kinetic parameters to simulate SG data ( genotyping, gene expression and phenotyping) with large gene networks with thousands of nodes. The software is encoded in MATLAB, and a user-friendly graphical user interface is provided.

Availability

The open-source software code and user manual can be downloaded at: http://sysgensim.sourceforge.net/

Contact

alf@crs4.it.",2011-07-06 +23418443,Prediction of S-glutathionylation sites based on protein sequences.,"S-glutathionylation, the reversible formation of mixed disulfides between glutathione(GSH) and cysteine residues in proteins, is a specific form of post-translational modification that plays important roles in various biological processes, including signal transduction, redox homeostasis, and metabolism inside cells. Experimentally identifying S-glutathionylation sites is labor-intensive and time consuming, whereas bioinformatics methods provide an alternative way to this problem by predicting S-glutathionylation sites in silico. The bioinformatics approaches give not only candidate sites for further experimental verification but also bio-chemical insights into the mechanism of S-glutathionylation. In this paper, we firstly collect experimentally determined S-glutathionylated proteins and their corresponding modification sites from the literature, and then propose a new method for predicting S-glutathionylation sites by employing machine learning methods based on protein sequence data. Promising results are obtained by our method with an AUC (area under ROC curve) score of 0.879 in 5-fold cross-validation, which demonstrates the predictive power of our proposed method. The datasets used in this work are available at http://csb.shu.edu.cn/SGDB.",2013-02-13 +22434832,Community annotation and bioinformatics workforce development in concert--Little Skate Genome Annotation Workshops and Jamborees.,"Recent advances in high-throughput DNA sequencing technologies have equipped biologists with a powerful new set of tools for advancing research goals. The resulting flood of sequence data has made it critically important to train the next generation of scientists to handle the inherent bioinformatic challenges. The North East Bioinformatics Collaborative (NEBC) is undertaking the genome sequencing and annotation of the little skate (Leucoraja erinacea) to promote advancement of bioinformatics infrastructure in our region, with an emphasis on practical education to create a critical mass of informatically savvy life scientists. In support of the Little Skate Genome Project, the NEBC members have developed several annotation workshops and jamborees to provide training in genome sequencing, annotation and analysis. Acting as a nexus for both curation activities and dissemination of project data, a project web portal, SkateBase (http://skatebase.org) has been developed. As a case study to illustrate effective coupling of community annotation with workforce development, we report the results of the Mitochondrial Genome Annotation Jamborees organized to annotate the first completely assembled element of the Little Skate Genome Project, as a culminating experience for participants from our three prior annotation workshops. We are applying the physical/virtual infrastructure and lessons learned from these activities to enhance and streamline the genome annotation workflow, as we look toward our continuing efforts for larger-scale functional and structural community annotation of the L. erinacea genome.",2012-03-20 +23884480,OncodriveCLUST: exploiting the positional clustering of somatic mutations to identify cancer genes.,"

Motivation

Gain-of-function mutations often cluster in specific protein regions, a signal that those mutations provide an adaptive advantage to cancer cells and consequently are positively selected during clonal evolution of tumours. We sought to determine the overall extent of this feature in cancer and the possibility to use this feature to identify drivers.

Results

We have developed OncodriveCLUST, a method to identify genes with a significant bias towards mutation clustering within the protein sequence. This method constructs the background model by assessing coding-silent mutations, which are assumed not to be under positive selection and thus may reflect the baseline tendency of somatic mutations to be clustered. OncodriveCLUST analysis of the Catalogue of Somatic Mutations in Cancer retrieved a list of genes enriched by the Cancer Gene Census, prioritizing those with dominant phenotypes but also highlighting some recessive cancer genes, which showed wider but still delimited mutation clusters. Assessment of datasets from The Cancer Genome Atlas demonstrated that OncodriveCLUST selected cancer genes that were nevertheless missed by methods based on frequency and functional impact criteria. This stressed the benefit of combining approaches based on complementary principles to identify driver mutations. We propose OncodriveCLUST as an effective tool for that purpose.

Availability

OncodriveCLUST has been implemented as a Python script and is freely available from http://bg.upf.edu/oncodriveclust

Contact

nuria.lopez@upf.edu or abel.gonzalez@upf.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-24 +22708584,Primer-BLAST: a tool to design target-specific primers for polymerase chain reaction.,"

Background

Choosing appropriate primers is probably the single most important factor affecting the polymerase chain reaction (PCR). Specific amplification of the intended target requires that primers do not have matches to other targets in certain orientations and within certain distances that allow undesired amplification. The process of designing specific primers typically involves two stages. First, the primers flanking regions of interest are generated either manually or using software tools; then they are searched against an appropriate nucleotide sequence database using tools such as BLAST to examine the potential targets. However, the latter is not an easy process as one needs to examine many details between primers and targets, such as the number and the positions of matched bases, the primer orientations and distance between forward and reverse primers. The complexity of such analysis usually makes this a time-consuming and very difficult task for users, especially when the primers have a large number of hits. Furthermore, although the BLAST program has been widely used for primer target detection, it is in fact not an ideal tool for this purpose as BLAST is a local alignment algorithm and does not necessarily return complete match information over the entire primer range.

Results

We present a new software tool called Primer-BLAST to alleviate the difficulty in designing target-specific primers. This tool combines BLAST with a global alignment algorithm to ensure a full primer-target alignment and is sensitive enough to detect targets that have a significant number of mismatches to primers. Primer-BLAST allows users to design new target-specific primers in one step as well as to check the specificity of pre-existing primers. Primer-BLAST also supports placing primers based on exon/intron locations and excluding single nucleotide polymorphism (SNP) sites in primers.

Conclusions

We describe a robust and fully implemented general purpose primer design tool that designs target-specific PCR primers. Primer-BLAST offers flexible options to adjust the specificity threshold and other primer properties. This tool is publicly available at http://www.ncbi.nlm.nih.gov/tools/primer-blast.",2012-06-18 +22723914,A global characterization and identification of multifunctional enzymes.,"Multi-functional enzymes are enzymes that perform multiple physiological functions. Characterization and identification of multi-functional enzymes are critical for communication and cooperation between different functions and pathways within a complex cellular system or between cells. In present study, we collected literature-reported 6,799 multi-functional enzymes and systematically characterized them in structural, functional, and evolutionary aspects. It was found that four physiochemical properties, that is, charge, polarizability, hydrophobicity, and solvent accessibility, are important for characterization of multi-functional enzymes. Accordingly, a combinational model of support vector machine and random forest model was constructed, based on which 6,956 potential novel multi-functional enzymes were successfully identified from the ENZYME database. Moreover, it was observed that multi-functional enzymes are non-evenly distributed in species, and that Bacteria have relatively more multi-functional enzymes than Archaebacteria and Eukaryota. Comparative analysis indicated that the multi-functional enzymes experienced a fluctuation of gene gain and loss during the evolution from S. cerevisiae to H. sapiens. Further pathway analyses indicated that a majority of multi-functional enzymes were well preserved in catalyzing several essential cellular processes, for example, metabolisms of carbohydrates, nucleotides, and amino acids. What's more, a database of known multi-functional enzymes and a server for novel multi-functional enzyme prediction were also constructed for free access at http://bioinf.xmu.edu.cn/databases/MFEs/index.htm.",2012-06-18 +22448159,Spike sorting of heterogeneous neuron types by multimodality-weighted PCA and explicit robust variational Bayes.,"This study introduces a new spike sorting method that classifies spike waveforms from multiunit recordings into spike trains of individual neurons. In particular, we develop a method to sort a spike mixture generated by a heterogeneous neural population. Such a spike sorting has a significant practical value, but was previously difficult. The method combines a feature extraction method, which we may term ""multimodality-weighted principal component analysis"" (mPCA), and a clustering method by variational Bayes for Student's t mixture model (SVB). The performance of the proposed method was compared with that of other conventional methods for simulated and experimental data sets. We found that the mPCA efficiently extracts highly informative features as clusters clearly separable in a relatively low-dimensional feature space. The SVB was implemented explicitly without relying on Maximum-A-Posterior (MAP) inference for the ""degree of freedom"" parameters. The explicit SVB is faster than the conventional SVB derived with MAP inference and works more reliably over various data sets that include spiking patterns difficult to sort. For instance, spikes of a single bursting neuron may be separated incorrectly into multiple clusters, whereas those of a sparsely firing neuron tend to be merged into clusters for other neurons. Our method showed significantly improved performance in spike sorting of these ""difficult"" neurons. A parallelized implementation of the proposed algorithm (EToS version 3) is available as open-source code at http://etos.sourceforge.net/.",2012-03-19 +23304401,An interface terminology for medical imaging ordering purposes.,"The need for structured data in electronic health records has not been fully addressed by reference terminologies (RT) due to difficulties of use for end-users. Interface terminologies (IT), built for specific usage and users, and linked to RT, may solve this issue. We propose an IT for medical imaging prescription, based on the French nomenclature for procedure (CCAM), and its qualitative evaluation. The creation and evaluation processes were adapted from published guidelines. Prescription IT is available on the web (http://pts.chu-rouen.fr). It contains 290 orderable terms linked to 249 CCAM codes. The synonymy of prescription IT is significantly richer than the CCAM one and labels are significantly shorter. The main problem came from the CCAM, which is dedicated to billing purposes. We are planning to map prescription IT to other international RT such as RadLex or SNOMED. Prescription IT might quicken the adoption of computerized ordering processes in France.",2012-11-03 +22782546,CytoSPADE: high-performance analysis and visualization of high-dimensional cytometry data.,"

Motivation

Recent advances in flow cytometry enable simultaneous single-cell measurement of 30+ surface and intracellular proteins. CytoSPADE is a high-performance implementation of an interface for the Spanning-tree Progression Analysis of Density-normalized Events algorithm for tree-based analysis and visualization of this high-dimensional cytometry data.

Availability

Source code and binaries are freely available at http://cytospade.org and via Bioconductor version 2.10 onwards for Linux, OSX and Windows. CytoSPADE is implemented in R, C++ and Java.

Contact

michael.linderman@mssm.edu

Supplementary information

Additional documentation available at http://cytospade.org.",2012-07-10 +23335826,Global maps of science based on the new Web-of-Science categories.,"In August 2011, Thomson Reuters launched version 5 of the Science and Social Science Citation Index in the Web of Science (WoS). Among other things, the 222 ISI Subject Categories (SCs) for these two databases in version 4 of WoS were renamed and extended to 225 WoS Categories (WCs). A new set of 151 Subject Areas was added, but at a higher level of aggregation. Perhaps confusingly, these Subject Areas are now abbreviated ""SC"" in the download, whereas ""WC"" is used for WoS Categories. Since we previously used the ISI SCs as the baseline for a global map in Pajek (Pajek is freely available at http://vlado.fmf.uni-lj.si/pub/networks/pajek/) (Rafols et al., Journal of the American Society for Information Science and Technology 61:1871-1887, 2010) and brought this facility online (at http://www.leydesdorff.net/overlaytoolkit), we recalibrated this map for the new WC categories using the Journal Citation Reports 2010. In the new installation, the base maps can also be made using VOSviewer (VOSviewer is freely available at http://www.VOSviewer.com/) (Van Eck and Waltman, Scientometrics 84:523-538, 2010).",2012-06-17 +23736532,Identification of deleterious synonymous variants in human genomes.,"

Motivation

The prioritization and identification of disease-causing mutations is one of the most significant challenges in medical genomics. Currently available methods address this problem for non-synonymous single nucleotide variants (SNVs) and variation in promoters/enhancers; however, recent research has implicated synonymous (silent) exonic mutations in a number of disorders.

Results

We have curated 33 such variants from literature and developed the Silent Variant Analyzer (SilVA), a machine-learning approach to separate these from among a large set of rare polymorphisms. We evaluate SilVA's performance on in silico 'infection' experiments, in which we implant known disease-causing mutations into a human genome, and show that for 15 of 33 disorders, we rank the implanted mutation among the top five most deleterious ones. Furthermore, we apply the SilVA method to two additional datasets: synonymous variants associated with Meckel syndrome, and a collection of silent variants clinically observed and stratified by a molecular diagnostics laboratory, and show that SilVA is able to accurately predict the harmfulness of silent variants in these datasets.

Availability

SilVA is open source and is freely available from the project website: http://compbio.cs.toronto.edu/silva

Contact

silva-snv@cs.toronto.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-06-04 +21775304,Making whole genome multiple alignments usable for biologists.,

Summary

Here we describe a set of tools implemented within the Galaxy platform designed to make analysis of multiple genome alignments truly accessible for biologists. These tools are available through both a web-based graphical user interface and a command-line interface.

Availability and implementation

This open-source toolset was implemented in Python and has been integrated into the online data analysis platform Galaxy (public web access: http://usegalaxy.org; download: http://getgalaxy.org). Additional help is available as a live supplement from http://usegalaxy.org/u/dan/p/maf.

Contact

james.taylor@emory.edu; anton@bx.psu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.,2011-07-19 +22700939,RNAiAtlas: a database for RNAi (siRNA) libraries and their specificity.,"Large-scale RNA interference (RNAi) experiments, especially the ones based on short-interfering RNA (siRNA) technology became increasingly popular over the past years. For such knock-down/screening purposes, different companies offer sets of oligos/reagents targeting the whole genome or a subset of it for various organisms. Obviously, the sequence (and structure) of the corresponding oligos is a key factor in obtaining reliable results in these large-scale studies and the companies use a variety of (often not fully public) algorithms to design them. Nevertheless, as the genome annotations are still continuously changing, oligos may become obsolete, so siRNA reagents should be periodically re-annotated according to the latest version of the sequence database (which of course has serious consequences also on the interpretation of the screening results). In our article, we would like to introduce a new software/database tool, the RNAiAtlas. It has been created for exploration, analysis and distribution of large scale RNAi libraries (currently limited to the human genome) with their latest annotation (including former history) but in addition it contains also specific on-target analysis results (design quality, side effects, off-targets). Database URL: http://www.rnaiatlas.ethz.ch.",2012-06-14 +22719847,Multi-label multi-kernel transfer learning for human protein subcellular localization.,"Recent years have witnessed much progress in computational modelling for protein subcellular localization. However, the existing sequence-based predictive models demonstrate moderate or unsatisfactory performance, and the gene ontology (GO) based models may take the risk of performance overestimation for novel proteins. Furthermore, many human proteins have multiple subcellular locations, which renders the computational modelling more complicated. Up to the present, there are far few researches specialized for predicting the subcellular localization of human proteins that may reside in multiple cellular compartments. In this paper, we propose a multi-label multi-kernel transfer learning model for human protein subcellular localization (MLMK-TLM). MLMK-TLM proposes a multi-label confusion matrix, formally formulates three multi-labelling performance measures and adapts one-against-all multi-class probabilistic outputs to multi-label learning scenario, based on which to further extends our published work GO-TLM (gene ontology based transfer learning model for protein subcellular localization) and MK-TLM (multi-kernel transfer learning based on Chou's PseAAC formulation for protein submitochondria localization) for multiplex human protein subcellular localization. With the advantages of proper homolog knowledge transfer, comprehensive survey of model performance for novel protein and multi-labelling capability, MLMK-TLM will gain more practical applicability. The experiments on human protein benchmark dataset show that MLMK-TLM significantly outperforms the baseline model and demonstrates good multi-labelling ability for novel human proteins. Some findings (predictions) are validated by the latest Swiss-Prot database. The software can be freely downloaded at http://soft.synu.edu.cn/upload/msy.rar.",2012-06-13 +23396119,aCSM: noise-free graph-based signatures to large-scale receptor-based ligand prediction.,"

Motivation

Receptor-ligand interactions are a central phenomenon in most biological systems. They are characterized by molecular recognition, a complex process mainly driven by physicochemical and structural properties of both receptor and ligand. Understanding and predicting these interactions are major steps towards protein ligand prediction, target identification, lead discovery and drug design.

Results

We propose a novel graph-based-binding pocket signature called aCSM, which proved to be efficient and effective in handling large-scale protein ligand prediction tasks. We compare our results with those described in the literature and demonstrate that our algorithm overcomes the competitor's techniques. Finally, we predict novel ligands for proteins from Trypanosoma cruzi, the parasite responsible for Chagas disease, and validate them in silico via a docking protocol, showing the applicability of the method in suggesting ligands for pockets in a real-world scenario.

Availability and implementation

Datasets and the source code are available at http://www.dcc.ufmg.br/∼dpires/acsm.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-08 +22106333,Identification of context-specific gene regulatory networks with GEMULA--gene expression modeling using LAsso.,"

Motivation

Gene regulatory networks, in which edges between nodes describe interactions between transcriptional regulators and their target genes, determine the coordinated spatiotemporal expression of genes. Especially in higher organisms, context-specific combinatorial regulation by transcription factors (TFs) is believed to determine cellular states and fates. TF-target gene interactions can be studied using high-throughput techniques such as ChIP-chip or ChIP-Seq. These experiments are time and cost intensive, and further limited by, for instance, availability of high affinity TF antibodies. Hence, there is a practical need for methods that can predict TF-TF and TF-target gene interactions in silico, i.e. from gene expression and DNA sequence data alone. We propose GEMULA, a novel approach based on linear models to predict TF-gene expression associations and TF-TF interactions from experimental data. GEMULA is based on linear models, fast and considers a wide range of biologically plausible models that describe gene expression data as a function of predicted TF binding to gene promoters.

Results

We show that models inferred with GEMULA are able to explain roughly 70% of the observed variation in gene expression in the yeast heat shock response. The functional relevance of the inferred TF-TF interactions in these models are validated by different sources of independent experimental evidence. We also have applied GEMULA to an in vitro model of neuronal outgrowth. Our findings confirm existing knowledge on gene regulatory interactions underlying neuronal outgrowth, but importantly also generate new insights into the temporal dynamics of this gene regulatory network that can now be addressed experimentally.

Availability

The GEMULA R-package is available from http://www.few.vu.nl/~degunst/gemula_1.0.tar.gz.",2011-11-21 +21623595,Current status of the Korean venous thromboembolism registry.,"The Korean venous thromboembolism (VTE) registry, which was initiated by the Working Parties of Korean Society on Thrombosis and Hemostasis, and the Korean Society of Hematology, is a web-based multicenter registry (http://kdvt.chamc.co.kr) for recruiting consecutive VTE patients. The aim of the registry is to prospectively collect data on the epidemiology and clinical outcomes of VTE from a large, unselected cohort of patients, and to provide data on the true incidence and management of VTE in the real-world. By the end of 2007, the starting year of the registry, 840 patients were registered. By the end of 2008, 1,121 were registered, with 1,289 by the end of 2009, and 1,463 by April 2010 from 11 hospitals. The first report on the epidemiologic characteristics of 596 consecutive VTE patients was released in October 2007.",2011-07-01 +24261984,Exploration of the gene fusion landscape of glioblastoma using transcriptome sequencing and copy number data.,"

Background

RNA-seq has spurred important gene fusion discoveries in a number of different cancers, including lung, prostate, breast, brain, thyroid and bladder carcinomas. Gene fusion discovery can potentially lead to the development of novel treatments that target the underlying genetic abnormalities.

Results

In this study, we provide comprehensive view of gene fusion landscape in 185 glioblastoma multiforme patients from two independent cohorts. Fusions occur in approximately 30-50% of GBM patient samples. In the Ivy Center cohort of 24 patients, 33% of samples harbored fusions that were validated by qPCR and Sanger sequencing. We were able to identify high-confidence gene fusions from RNA-seq data in 53% of the samples in a TCGA cohort of 161 patients. We identified 13 cases (8%) with fusions retaining a tyrosine kinase domain in the TCGA cohort and one case in the Ivy Center cohort. Ours is the first study to describe recurrent fusions involving non-coding genes. Genomic locations 7p11 and 12q14-15 harbor majority of the fusions. Fusions on 7p11 are formed in focally amplified EGFR locus whereas 12q14-15 fusions are formed by complex genomic rearrangements. All the fusions detected in this study can be further visualized and analyzed using our website: http://ivygap.swedish.org/fusions.

Conclusions

Our study highlights the prevalence of gene fusions as one of the major genomic abnormalities in GBM. The majority of the fusions are private fusions, and a minority of these recur with low frequency. A small subset of patients with fusions of receptor tyrosine kinases can benefit from existing FDA approved drugs and drugs available in various clinical trials. Due to the low frequency and rarity of clinically relevant fusions, RNA-seq of GBM patient samples will be a vital tool for the identification of patient-specific fusions that can drive personalized therapy.",2013-11-22 +22693219,GeneView: a comprehensive semantic search engine for PubMed.,"Research results are primarily published in scientific literature and curation efforts cannot keep up with the rapid growth of published literature. The plethora of knowledge remains hidden in large text repositories like MEDLINE. Consequently, life scientists have to spend a great amount of time searching for specific information. The enormous ambiguity among most names of biomedical objects such as genes, chemicals and diseases often produces too large and unspecific search results. We present GeneView, a semantic search engine for biomedical knowledge. GeneView is built upon a comprehensively annotated version of PubMed abstracts and openly available PubMed Central full texts. This semi-structured representation of biomedical texts enables a number of features extending classical search engines. For instance, users may search for entities using unique database identifiers or they may rank documents by the number of specific mentions they contain. Annotation is performed by a multitude of state-of-the-art text-mining tools for recognizing mentions from 10 entity classes and for identifying protein-protein interactions. GeneView currently contains annotations for >194 million entities from 10 classes for ∼21 million citations with 271,000 full text bodies. GeneView can be searched at http://bc3.informatik.hu-berlin.de/.",2012-06-12 +22694277,Traditional Chinese medicine as dual guardians against hypertension and cancer?,"This study utilizes the comprehensive traditional Chinese medicine database TCM Database@Taiwan ( http://tcm.cmu.edu.tw/ ) in conjunction with structure-based and ligand-based drug design to identify multi-function Src inhibitors. The three potential TCM candidates identified as having suitable docking conformations and bioactivity profiles were Angeliferulate, (3R)-2'-hydroxy-3',4'-dimethoxyisoflavan-7-O-beta-D-glucoside (HMID), and 3-[2',6-dihydroxy-5'-(2-propenyl)[1,1'-biphenyl]3-yl]-(E)-2-propenoic acid (3PA). Molecular dynamics simulation demonstrated that the TCM candidates have more stable interactions with the cleft and in complex with Src kinase compared to Saracatinib. Angeliferulate and HMID, both originated from Angelica sinensis, not only interact with Lys298 and amino acids from different loops in the cleft, but also with Asp407 located on the activation loop. These interactions are important to reduce the opening of the activation loop due to phosphorylation, hence stabilize the Src kinase cleft structure and inhibit activation. The TCM candidates also exhibited high affinity to other cancer-related target proteins (EGFR, HER2, and HSP90). Our observations suggest that the TCM candidates might have multi-targeting effects in hypertension and cancer.",2012-06-12 +22268698,AFLPMax: a user-friendly application for computing the optimal number of amplified fragment length polymorphism markers needed in phylogenetic reconstruction.,"Amplified fragment length polymorphisms (AFLPs) are widely used for phylogenetic inference especially in non-model species. Frequently, trees obtained with other nuclear or mitochondrial markers or with morphological information need additional resolution, increased branch support, or independent data sources (i.e. unlinked loci). In such cases, the use of AFLPs is a quick and cheap option. Computer simulation has shown that dominant AFLP markers lead to less accurate tree topologies than bi-allelic codominant markers such as SNPs, but this difference becomes negligible for shallow trees when using AFLP data sets that include a sufficiently large number of characters. Thus, determining how many AFLP characters are required to recover a given phylogeny is a key issue regarding the appropriateness of AFLPs for phylogenetic reconstruction. Here, we present a user-friendly, java-based graphical interface, AFLPMax, which executes an automatic pipeline of different programs providing the user with the optimal number of AFLP characters needed to recover a given phylogeny with high accuracy and support. Executables for Windows, linux and MacOS X operating systems, source code and user manual are available from: http://webs.uvigo.es/acraaj/AFLPMax.htm.",2012-01-23 +23723249,"GWAS3D: Detecting human regulatory variants by integrative analysis of genome-wide associations, chromosome interactions and histone modifications.","Interpreting the genetic variants located in the regulatory regions, such as enhancers and promoters, is an indispensable step to understand molecular mechanism of complex traits. Recent studies show that genetic variants detected by genome-wide association study (GWAS) are significantly enriched in the regulatory regions. Therefore, detecting, annotating and prioritizing of genetic variants affecting gene regulation are critical to our understanding of genotype-phenotype relationships. Here, we developed a web server GWAS3D to systematically analyze the genetic variants that could affect regulatory elements, by integrating annotations from cell type-specific chromatin states, epigenetic modifications, sequence motifs and cross-species conservation. The regulatory elements are inferred from the genome-wide chromosome interaction data, chromatin marks in 16 different cell types and 73 regulatory factors motifs from the Encyclopedia of DNA Element project. Furthermore, we used these function elements, as well as risk haplotype, binding affinity, conservation and P-values reported from the original GWAS to reprioritize the genetic variants. Using studies from low-density lipoprotein cholesterol, we demonstrated that our reprioritizing approach was effective and cell type specific. In conclusion, GWAS3D provides a comprehensive annotation and visualization tool to help users interpreting their results. The web server is freely available at http://jjwanglab.org/gwas3d.",2013-05-30 +23206520,"Usual interstitial pneumonia coexisted with nonspecific interstitial pneumonia, What's the diagnosis?","The differential diagnosis between idiopathic nonspecific interstitial pneumonia(INSIP) and idiopathic pulmonary fibrosis(IPF)/usual interstitial pneumonia(UIP)is tough in both clinicians and pathologists. In this study, we analyzed the lesions of right lung removed from a 58-year-old patient by gross and microscopy. The results showed that the pathological appearance of nonspecific interstitial pneumonia (NSIP) and UIP coexisted in his upper lobe. Besides, because of severe fibrosis in middle and lower lobes, it was hard to distinguish the lesions of NSIP fibrotic pattern (NSIP-F) or UIP. Based on clinic-radiologic-pathological data, the diagnosis of INSIP-F was made for this patient finally. Our study suggests that UIP is not always an accurate diagnosis when the NSIP and UIP coexist, and NSIP can have regions of UIP.

Virtual slide

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2573531681608730.",2012-12-03 +21998155,Protein stability: a single recorded mutation aids in predicting the effects of other mutations in the same amino acid site.,"

Motivation

Accurate prediction of protein stability is important for understanding the molecular underpinnings of diseases and for the design of new proteins. We introduce a novel approach for the prediction of changes in protein stability that arise from a single-site amino acid substitution; the approach uses available data on mutations occurring in the same position and in other positions. Our algorithm, named Pro-Maya (Protein Mutant stAbilitY Analyzer), combines a collaborative filtering baseline model, Random Forests regression and a diverse set of features. Pro-Maya predicts the stability free energy difference of mutant versus wild type, denoted as ΔΔG.

Results

We evaluated our algorithm extensively using cross-validation on two previously utilized datasets of single amino acid mutations and a (third) validation set. The results indicate that using known ΔΔG values of mutations at the query position improves the accuracy of ΔΔG predictions for other mutations in that position. The accuracy of our predictions in such cases significantly surpasses that of similar methods, achieving, e.g. a Pearson's correlation coefficient of 0.79 and a root mean square error of 0.96 on the validation set. Because Pro-Maya uses a diverse set of features, including predictions using two other methods, it also performs slightly better than other methods in the absence of additional experimental data on the query positions.

Availability

Pro-Maya is freely available via web server at http://bental.tau.ac.il/ProMaya.

Contact

nirb@tauex.tau.ac.il; wolf@cs.tau.ac.il

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-13 +22759612,Genome-wide search for miRNA-target interactions in Arabidopsis thaliana with an integrated approach.,"

Background

MiRNA are about 22nt long small noncoding RNAs that post transcriptionally regulate gene expression in animals, plants and protozoa. Confident identification of MiRNA-Target Interactions (MTI) is vital to understand their function. Currently, several integrated computational programs and databases are available for animal miRNAs, the mechanisms of which are significantly different from plant miRNAs.

Methods

Here we present an integrated MTI prediction and analysis toolkit (imiRTP) for Arabidopsis thaliana. It features two important functions: (i) combination of several effective plant miRNA target prediction methods provides a sufficiently large MTI candidate set, and (ii) different filters allow for an efficient selection of potential targets. The modularity of imiRTP enables the prediction of high quality targets on genome-wide scale. Moreover, predicted MTIs can be presented in various ways, which allows for browsing through the putative target sites as well as conducting simple and advanced analyses.

Results

Results show that imiRTP could always find high quality candidates compared with single method by choosing appropriate filter and parameter. And we also reveal that a portion of plant miRNA could bind target genes out of coding region. Based on our results, imiRTP could facilitate the further study of Arabidopsis miRNAs in real use. All materials of imiRTP are freely available under a GNU license at (http://admis.fudan.edu.cn/projects/imiRTP.htm).",2012-06-11 +22689645,PBSword: a web server for searching similar protein-protein binding sites.,"PBSword is a web server designed for efficient and accurate comparisons and searches of geometrically similar protein-protein binding sites from a large-scale database. The basic idea of PBSword is that each protein binding site is first represented by a high-dimensional vector of 'visual words', which characterizes both the global and local shape features of the binding site. It then uses a scalable indexing technique to search for those binding sites whose visual words representations are similar to that of the query binding site. Our system is able to return ranked results of binding sites in short time from a database of 194 322 domain-domain binding sites. PBSword supports query by protein ID and by new structures uploaded by users. PBSword is a useful tool to investigate functional connections among proteins based on the local structures of binding site and has potential applications to protein-protein docking and drug discovery. The system is hosted at http://pbs.rnet.missouri.edu.",2012-06-11 +22693211,"VARIANT: Command Line, Web service and Web interface for fast and accurate functional characterization of variants found by Next-Generation Sequencing.","The massive use of Next-Generation Sequencing (NGS) technologies is uncovering an unexpected amount of variability. The functional characterization of such variability, particularly in the most common form of variation found, the Single Nucleotide Variants (SNVs), has become a priority that needs to be addressed in a systematic way. VARIANT (VARIant ANalyis Tool) reports information on the variants found that include consequence type and annotations taken from different databases and repositories (SNPs and variants from dbSNP and 1000 genomes, and disease-related variants from the Genome-Wide Association Study (GWAS) catalog, Online Mendelian Inheritance in Man (OMIM), Catalog of Somatic Mutations in Cancer (COSMIC) mutations, etc). VARIANT also produces a rich variety of annotations that include information on the regulatory (transcription factor or miRNA-binding sites, etc.) or structural roles, or on the selective pressures on the sites affected by the variation. This information allows extending the conventional reports beyond the coding regions and expands the knowledge on the contribution of non-coding or synonymous variants to the phenotype studied. Contrarily to other tools, VARIANT uses a remote database and operates through efficient RESTful Web Services that optimize search and transaction operations. In this way, local problems of installation, update or disk size limitations are overcome without the need of sacrifice speed (thousands of variants are processed per minute). VARIANT is available at: http://variant.bioinfo.cipf.es.",2012-06-11 +22689643,MoNetFamily: a web server to infer homologous modules and module-module interaction networks in vertebrates.,"A module is a fundamental unit forming with highly connected proteins and performs a certain kind of biological functions. Modules and module-module interaction (MMI) network are essential for understanding cellular processes and functions. The MoNetFamily web server can identify the modules, homologous modules (called module family) and MMI networks across multiple species for the query protein(s). This server first finds module candidates of the query by using BLASTP to search the module template database (1785 experimental and 1252 structural templates). MoNetFamily then infers the homologous modules of the selected module candidate using protein-protein interaction (PPI) families. According to homologous modules and PPIs, we statistically calculated MMIs and MMI networks across multiple species. For each module candidate, MoNetFamily identifies its neighboring modules and their MMIs in module networks of Homo sapiens, Mus musculus and Danio rerio. Finally, MoNetFamily shows the conserved proteins, PPI profiles and functional annotations of the module family. Our results indicate that the server can be useful for MMI network (e.g. 1818 modules and 9678 MMIs in H. sapiens) visualizations and query annotations using module families and neighboring modules. We believe that the server is able to provide valuable insights to determine homologous modules and MMI networks across multiple species for studying module evolution and cellular processes. The MoNetFamily sever is available at http://monetfamily.life.nctu.edu.tw.",2012-06-11 +22607453,Sustained glycolytic oscillations in individual isolated yeast cells.,"

Unlabelled

Yeast glycolytic oscillations have been studied since the 1950s in cell-free extracts and intact cells. For intact cells, sustained oscillations have so far only been observed at the population level, i.e. for synchronized cultures at high biomass concentrations. Using optical tweezers to position yeast cells in a microfluidic chamber, we were able to observe sustained oscillations in individual isolated cells. Using a detailed kinetic model for the cellular reactions, we simulated the heterogeneity in the response of the individual cells, assuming small differences in a single internal parameter. This is the first time that sustained limit-cycle oscillations have been demonstrated in isolated yeast cells.

Database

The mathematical model described here has been submitted to the JWS Online Cellular Systems Modelling Database and can be accessed at http://jjj.biochem.sun.ac.za/database/gustavsson/index.html free of charge.",2012-06-11 +23163318,The tower of Babel: survey on concepts and terminology in electrical status epilepticus in sleep and continuous spikes and waves during sleep in North America.,"

Purpose

The terms ""electrical status epilepticus during sleep (ESES)"" and ""continuous spikes and waves during sleep (CSWS)"" have been used interchangeably when referring to related but different concepts. In addition, the quantification of epileptiform activity has not been standardized, and different approaches to quantification have been used. The aim of this study was to evaluate the extent to which pediatric neurologists and epileptologists use a homogeneous terminology and conceptualization in CSWS and ESES and to characterize the current understanding of these conditions.

Methods

A survey addressing the use of terminology in ""ESES"" and ""CSWS"" and the understanding of related concepts was distributed online to all members of the Child Neurology Society and the American Epilepsy Society mailing lists. Surveys were self-administered and collected using an online survey website (http://www.surveymonkey.com).

Key findings

  Two hundred nineteen surveys were completed, 137 from the Child Neurology Society mailing list and 82 from the American Epilepsy Society mailing list. ESES and CSWS were considered synonymous by 117 respondents, not synonymous by 61, 21 respondents did not know, and 20 did not respond. Most respondents (63.1%) considered CSWS as a devastating epileptic encephalopathy with severe sequelae even if treated correctly, but 25.1% of respondents indicated that it does not leave sequelae if epilepsy was treated early and another 11.8% noted that cognitive difficulties resolved with age. Cognitive and/or language regression were considered mandatory for the diagnosis of CSWS by only 27% of the respondents. The diagnosis of CSWS was based on electroencephalography (EEG) assessment alone by 31% of respondents. Respondents used different methods for calculation of the epileptiform activity, different EEG samples for calculation, and considered differently the lateralized epileptiform activity. The cut-off values for percentage of the sleep record occupied by spike-waves were variable depending on the respondent. There was no agreement on whether these cutoff values were mandatory for the diagnosis of ESES and CSWS.

Significance

  Our data show that the professionals caring for children with ESES and CSWS in North America use the terms, concepts, and defining features heterogeneously. The lack of a common language may complicate communication among clinicians and jeopardize research in this field. We anticipate that our data will fuel the development of much needed common terminology and conceptualization of ESES and CSWS.",2012-11-16 +23715893,IS-Dom: a dataset of independent structural domains automatically delineated from protein structures.,"Protein domains that can fold in isolation are significant targets in diverse area of proteomics research as they are often readily analyzed by high-throughput methods. Here, we report IS-Dom, a dataset of Independent Structural Domains (ISDs) that are most likely to fold in isolation. IS-Dom was constructed by filtering domains from SCOP, CATH, and DomainParser using quantitative structural measures, which were calculated by estimating inter-domain hydrophobic clusters and hydrogen bonds from the full length protein's atomic coordinates. The ISD detection protocol is fully automated, and all of the computed interactions are stored in the server which enables rapid update of IS-Dom. We also prepared a standard IS-Dom using parameters optimized by maximizing the Youden's index. The standard IS-Dom, contained 54,860 ISDs, of which 25.5 % had high sequence identity and termini overlap with a Protein Data Bank (PDB) cataloged sequence and are thus experimentally shown to fold in isolation [coined autonomously folded domain (AFDs)]. Furthermore, our ISD detection protocol missed less than 10 % of the AFDs, which corroborated our protocol's ability to define structural domains that are able to fold independently. IS-Dom is available through the web server ( http://domserv.lab.tuat.ac.jp/IS-Dom.html ), and users can either, download the standard IS-Dom dataset, construct their own IS-Dom by interactively varying the parameters, or assess the structural independence of newly defined putative domains.",2013-05-29 +23842521,The experience of young adult cancer patients described through online narratives.,"

Background

Young adults are often faced with challenges related to relationships, employment, housing, and emotional development. Experiencing cancer during this time complicates the developmental processes and creates a need for communicating concerns and discussing issues.

Objective

The purpose of this study was to explore the experiences and gain a better understanding of young adults affected by cancer (YAACs) by examining their online narratives (also known as Web logs or blogs).

Interventions/methods

Inclusion and exclusion criteria were used to identify eligible Web sites. The Web site that most corresponded to the selection criteria was chosen for analysis (Planet Cancer, http://www.planetcancer.org/). The blog content generated in July 2011 was collected. The blogs were written by a total of 34 female and 12 male writers and included 136 (by female) and 28 (by male) blog entries. Researchers conducted a descriptive qualitative examination of blogs to explore YAACs' experiences during/after cancer.

Results

At the end of the data analysis process, 10 main themes were identified: physical burdens, future prospects, isolation (physical and psychological), guilt, mortality, images of cancer, creating a positive attitude, healthcare, online social interaction, and cancer survivorship.

Conclusions

The Internet provides young cancer patients with a space in which to express themselves and to share experiences with those who are of similar age and in similar situations.

Implications for practice

Blogs can be particularly helpful when patients are isolated or physically unable to interact with other people because of treatment requirements or physical deterioration.Future studies should investigate other types of Internet cancer communities and how they can benefit the development of Internet-based support networks for YAACs.",2013-09-01 +22990907,Extraction of quantitative characteristics describing wheat leaf pubescence with a novel image-processing technique.,"Leaf pubescence (hairiness) in wheat plays an important biological role in adaptation to the environment. However, this trait has always been methodologically difficult to phenotype. An important step forward has been taken with the use of computer technologies. Computer analysis of a photomicrograph of a transverse fold line of a leaf is proposed for quantitative evaluation of wheat leaf pubescence. The image-processing algorithm is implemented in the LHDetect2 software program accessible as a Web service at http://wheatdb.org/lhdetect2 . The results demonstrate that the proposed method is rapid, adequately assesses leaf pubescence density and the length distribution of trichomes and the data obtained using this method are significantly correlated with the density of trichomes on the leaf surface. Thus, the proposed method is efficient for high-throughput analysis of leaf pubescence morphology in cereal genetic collections and mapping populations.",2012-09-19 +23436767,ESPRESSO: a system for estimating protein expression and solubility in protein expression systems.,"Recombinant protein technology is essential for conducting protein science and using proteins as materials in pharmaceutical or industrial applications. Although obtaining soluble proteins is still a major experimental obstacle, knowledge about protein expression/solubility under standard conditions may increase the efficiency and reduce the cost of proteomics studies. In this study, we present a computational approach to estimate the probability of protein expression and solubility for two different protein expression systems: in vivo Escherichia coli and wheat germ cell-free, from only the sequence information. It implements two kinds of methods: a sequence/predicted structural property-based method that uses both the sequence and predicted structural features, and a sequence pattern-based method that utilizes the occurrence frequencies of sequence patterns. In the benchmark test, the proposed methods obtained F-scores of around 70%, and outperformed publicly available servers. Applying the proposed methods to genomic data revealed that proteins associated with translation or transcription have a strong tendency to be expressed as soluble proteins by the in vivo E. coli expression system. The sequence pattern-based method also has the potential to indicate a candidate region for modification, to increase protein solubility. All methods are available for free at the ESPRESSO server (http://mbs.cbrc.jp/ESPRESSO).",2013-05-01 +22267129,Stoffenmanager Nano version 1.0: a web-based tool for risk prioritization of airborne manufactured nano objects.,"Stoffenmanager Nano (version 1.0) is a risk-banding tool developed for employers and employees to prioritize health risks occurring as a result of exposure to manufactured nano objects (MNOs) for a broad range of worker scenarios and to assist implementation of control measures to reduce exposure levels. In order to prioritize the health risks, the Stoffenmanager Nano combines the available hazard information of a substance with a qualitative estimate of potential for inhalation exposure. The development of the Stoffenmanager Nano started with a review of the available literature on control banding. Input parameters for the hazard assessment of MNOs were selected based on the availability of these parameters in, for instance, Safety Data Sheets or product information sheets. The conceptual exposure model described by Schneider et al. (2011) was used as the starting point for exposure banding. During the development of the Stoffenmanager Nano tool, the precautionary principle was applied to deal with the uncertainty regarding hazard and exposure assessment of MNOs. Subsequently, the model was converted into an online tool (http://nano.stoffenmanager.nl), tested, and reviewed by a number of companies. In this paper, we describe the Stoffenmanager Nano. This tool offers a practical approach for risk prioritization in exposure situations where quantitative risk assessment is currently not possible. Updates of this first version are anticipated as more data become available in the future.",2012-01-20 +24109772,EpiDiff: entropy-based quantitative identification of differential epigenetic modification regions from epigenomes.,"Genome-wide epigenetic modification dynamics, including DNA methylation and chromatin modification, are involved in biological processes such as development, aging, and disease. Quantitative identification of differential epigenetic modification regions (DEMRs) from various temporal and spatial epigenomes is a crucial step towards investigating the relationship between epigenotype and phenotype. Here, we describe EpiDiff (http://bioinfo.hrbmu.edu.cn/epidiff/), an integrated software platform that supports quantification of epigenetic difference and identification of DEMRs by Shannon entropy. Two main modules, quantitative differential chromatin modification region (QDCMR) and quantitative differentially methylated region (QDMR) are provided for bioinformatic analysis of chromatin modifications and DNA methylation data, respectively. The third module, quantitative differential expressed gene (QDEG), can be used to identify differentially expressed genes. The platform-free and species-free nature of EpiDiff makes it potentially applicable to a wide variety of epigenomes at an unprecedented scale and resolution. The graphical user interface provides biologists with a practicable and reliable way to analyze and visualize epigenetic difference.",2013-01-01 +23856064,Lymphoepithelial-like carcinoma of the parotid gland: a case report and a brief review of the western literature.,"

Background

Primary lymphoepithelial-like carcinoma of the parotid gland is a rare tumour with an increased incidence among Eskimos and Orientals. In these populations, it is usually associated with Epstein-Barr virus. In Western countries, salivary gland lymphoepithelial-like carcinomas are uncommon and only 14 cases have been described so far; among these, only five cases showed Epstein-Barr virus positivity.

Case report

A 45-year-old woman was admitted to Siena Hospital for evaluation of a pre-existent (2 years) painless and tender submandibular mass, rapidly enlarging since two months. On physical examination, a 2.5-cm mass was found in the right parotid. It was firm, mobile and non-tender. Laboratory data were within reference range. Nuclear magnetic resonance detected a 2,5×1,5×1-cm well-circumscribed mass in the deep lobe of the right parotid. A total right paroditectomy with dissection of a satellite lymph node was performed. On the basis of morphological, immunohistochemical and molecular biology findings, a diagnosis of stage II (according to TNM7) Epstein Barr-virus positive, undifferentiated lymphoepithelial-like carcinoma of the parotid gland was made. Twenty months after surgery the patient was free of disease.

Conclusions

Further studies seem to be necessary to completely elucidate the oncogenic role of Epstein Barr-virus in these tumors, which have identical morphology but different prognosis and variable presence of the virus.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1260381551000616.",2013-07-15 +24127838,"Penile vibratory stimulation in the recovery of urinary continence and erectile function after nerve-sparing radical prostatectomy: a randomized, controlled trial.","

Objective

To examine the effect of penile vibratory stimulation (PVS) in the preservation and restoration of erectile function and urinary continence in conjunction with nerve-sparing radical prostatectomy (RP).

Patients and methods

The present study was conducted between July 2010 and March 2013 as a randomized prospective trial at two university hospitals. Eligible participants were continent men with an International Index of Erectile Function-5 (IIEF-5) score of at least 18, scheduled to undergo nerve-sparing RP. Patients were randomized to a PVS group or a control group. Patients in the PVS group were instructed in using a PVS device (FERTI CARE(®) vibrator). Stimulation was performed at the frenulum once daily by the patients in their own homes for at least 1 week before surgery. After catheter removal, daily PVS was re-initiated for a period of 6 weeks. Participants were evaluated at 3, 6 and 12 months after surgery with the IIEF-5 questionnaire and questions regarding urinary bother. Patients using up to one pad daily for security reasons only were considered continent. The study was registered at http://clinicaltrials.gov/ (NCT01067261).

Results

Data from 68 patients were available for analyses (30 patients randomized to PVS and 38 patients randomized to the control group). The IIEF-5 score was highest in the PVS group at all time points after surgery with a median score of 18 vs 7.5 in the control group at 12 months (P = 0.09), but the difference only reached borderline significance. At 12 months, 16/30 (53%) patients in the PVS group had reached an IIEF-5 score of at least 18, while this was the case for 12/38 (32%) patients in the control group (P = 0.07). There were no significant differences in the proportions of continent patients between groups at 3, 6 or 12 months. At 12 months 90% of the PVS patients were continent, while 94.7% of the control patients were continent (P = 0.46).

Conclusion

The present study did not document a significant effect of PVS. However, the method proved to be acceptable for most patients and there was a trend towards better erectile function with PVS. More studies are needed to explore this possible effect further.",2014-01-22 +23990413,DIST: direct imputation of summary statistics for unmeasured SNPs.,"

Motivation

Genotype imputation methods are used to enhance the resolution of genome-wide association studies, and thus increase the detection rate for genetic signals. Although most studies report all univariate summary statistics, many of them limit the access to subject-level genotypes. Because such an access is required by all genotype imputation methods, it is helpful to develop methods that impute summary statistics without going through the interim step of imputing genotypes. Even when subject-level genotypes are available, due to the substantial computational cost of the typical genotype imputation, there is a need for faster imputation methods.

Results

Direct Imputation of summary STatistics (DIST) imputes the summary statistics of untyped variants without first imputing their subject-level genotypes. This is achieved by (i) using the conditional expectation formula for multivariate normal variates and (ii) using the correlation structure from a relevant reference population. When compared with genotype imputation methods, DIST (i) requires only a fraction of their computational resources, (ii) has comparable imputation accuracy for independent subjects and (iii) is readily applicable to the imputation of association statistics coming from large pedigree data. Thus, the proposed application is useful for a fast imputation of summary results for (i) studies of unrelated subjects, which (a) do not provide subject-level genotypes or (b) have a large size and (ii) family association studies.

Availability and implementation

Pre-compiled executables built under commonly used operating systems are publicly available at http://code.google.com/p/dist/.

Contact

dlee4@vcu.edu .",2013-08-28 +24365754,"[Report of the editors, 2013].","The editors of Revista Clínica Española(Rev Clin Esp) inform on their editorial activity during the last 12 months: (a) objectives and attainments, (b) editorial activity, and (c) objectives for 2014. In 2013 the most relevant modification concerning the editorial activity has been the translation into English of the 5 manuscripts with abstract contained in each issue (http://www.revclinesp.es/). From the first January to the 30th September 2013 we received 458 manuscripts (50.9 manuscripts per month), a similar figure to that obtained in 2012 (51.1 manuscripts per month). The acceptance rate of the 443 manuscripts whose editorial process has been concluded was 23.7% (originals, 11.8%). We asked for 253 revisions to 186 reviewers and we received 74.4% revisions in less than 2 weeks (10.9 days). The mean time to adopt an editorial decision for all manuscripts («accepted»/«rejected») has been 20,3 (half than in 2009). For «originals» this figure has dropped from 56.6 days in 2009 to 22.5 days in 2013. The mean time elapsed from manuscript reception to its on-line publication was 94.8 days in 2013 (110.5 in 2012 and 155.8 in 2011). In 2013 the collaboration with the working groups from the Internal Medicine Spanish Foundation has reported 17 published manuscripts. In 2013 we were informed that the Journal Citation Reports excluded Rev Clin Esp from its impact factor journal list due to its elevated self-citations. We have taken a number of actions to reduce self-citations and we expect to be a minority in 2014. Some other data concerning the editorial policy are encouraging. In this sense, manuscript citation to Rev Clin Esp published articles has seen a substantial increase from 19% in 2008 to 29% in 2012. We work to achieve the digitalization of Rev Clin Esp from 1940 to 1999 (the journal is already digitalized since 2000). The continuous renewal of the journal sections and the working groups collaboration are necessary elements to make our journal, each day, better.",2013-12-22 +22600734,Rtips: fast and accurate tools for RNA 2D structure prediction using integer programming.,"We present a web-based tool set Rtips for fast and accurate prediction of RNA 2D complex structures. Rtips comprises two computational tools based on integer programming, IPknot for predicting RNA secondary structures with pseudoknots and RactIP for predicting RNA-RNA interactions with kissing hairpins. Both servers can run much faster than existing services with the same purpose on large data sets as well as being at least comparable in prediction accuracy. The Rtips web server along with the stand-alone programs is freely accessible at http://rna.naist.jp/.",2012-05-16 +23350948,BRAIN: a universal tool for high-throughput calculations of the isotopic distribution for mass spectrometry.,"This Letter presents the R-package implementation of the recently introduced polynomial method for calculating the aggregated isotopic distribution called BRAIN (Baffling Recursive Algorithm for Isotopic distributioN calculations). The algorithm is simple, easy to understand, highly accurate, fast, and memory-efficient. The method is based on the application of the Newton-Girard theorem and Viète's formulae to the polynomial coding of different aggregated isotopic variants. As a result, an elegant recursive equation is obtained for computing the occurrence probabilities of consecutive aggregated isotopic peaks. Additionally, the algorithm also allows calculating the center-masses of the aggregated isotopic variants. We propose an implementation which is suitable for high-throughput processing and easily customizable for application in different areas of mass spectral data analyses. A case study demonstrates how the R-package can be applied in the context of protein research, but the software can be also used for calculating the isotopic distribution in the context of lipidomics, metabolomics, glycoscience, or even space exploration. More materials, i.e., reference manual, vignette, and the package itself are available at Bioconductor online (http://www.bioconductor.org/packages/release/bioc/html/BRAIN.html).",2013-01-31 +23986565,Identifying differentially expressed proteins in two-dimensional electrophoresis experiments: inputs from transcriptomics statistical tools.,"

Background

Two-dimensional electrophoresis is a crucial method in proteomics that allows the characterization of proteins' function and expression. This usually implies the identification of proteins that are differentially expressed between two contrasting conditions, for example, healthy versus diseased in human proteomics biomarker discovery and stressful conditions versus control in animal experimentation. The statistical procedures that lead to such identifications are critical steps in the 2-DE analysis workflow. They include a normalization step and a test and probability correction for multiple testing. Statistical issues caused by the high dimensionality of the data and large-scale multiple testing have been a more active topic in transcriptomics than proteomics, especially in microarray analysis. We thus propose to adapt innovative statistical tools developed for microarray analysis and incorporate them in the 2-DE analysis pipeline.

Results

In this article, we evaluate the performance of different normalization procedures, different statistical tests and false discovery rate calculation methods with both real and simulated datasets. We demonstrate that the use of statistical procedures adapted from microarrays lead to notable increase in power as well as a minimization of false-positive discovery rate. More specifically, we obtained the best results in terms of reliability and sensibility when using the 'moderate t-test' from Smyth in association with classic false discovery rate from Benjamini and Hochberg.

Availability

The methods discussed are freely available in the 'prot2D' open source R-package from Bioconductor (http://www.bioconductor.org//) under the terms of the GNU General Public License (version 2 or later).

Contact

sebastien.artigaud@univ-brest.fr or sebastien.artigaud@gmx.com.",2013-08-27 +21297945,G =  MAT: linking transcription factor expression and DNA binding data.,"Transcription factors are proteins that bind to motifs on the DNA and thus affect gene expression regulation. The qualitative description of the corresponding processes is therefore important for a better understanding of essential biological mechanisms. However, wet lab experiments targeted at the discovery of the regulatory interplay between transcription factors and binding sites are expensive. We propose a new, purely computational method for finding putative associations between transcription factors and motifs. This method is based on a linear model that combines sequence information with expression data. We present various methods for model parameter estimation and show, via experiments on simulated data, that these methods are reliable. Finally, we examine the performance of this model on biological data and conclude that it can indeed be used to discover meaningful associations. The developed software is available as a web tool and Scilab source code at http://biit.cs.ut.ee/gmat/.",2011-01-31 +22815359,Quantifying uniformity of mapped reads.,"

Unlabelled

We describe a tool for quantifying the uniformity of mapped reads in high-throughput sequencing experiments. Our statistic directly measures the uniformity of both read position and fragment length, and we explain how to compute a P-value that can be used to quantify biases arising from experimental protocols and mapping procedures. Our method is useful for comparing different protocols in experiments such as RNA-Seq.

Availability and implementation

We provide a freely available and open source python script that can be used to analyze raw read data or reads mapped to transcripts in BAM format at http://www.math.miami.edu/~vhower/ReadSpy.html.",2012-07-18 +24453363,Ribavirin-resistant variants of foot-and-mouth disease virus: the effect of restricted quasispecies diversity on viral virulence.,"

Unlabelled

Mutagenic nucleoside analogues can be used to isolate RNA virus high-fidelity RNA-dependent RNA polymerase (RdRp) variants, the majority of which are attenuated in vivo. However, attenuated foot-and-mouth disease virus (FMDV) high-fidelity RdRp variants have not been isolated, and the correlations between RdRp fidelity and virulence remain unclear. Here, the mutagen ribavirin was used to select a ribavirin-resistant population of FMDV, and 4 amino acid substitutions (D5N, A38V, M194I, and M296V) were identified in the RdRp-coding region of the population. Through single or combined mutagenesis using a reverse genetics system, we generated direct experimental evidence that the rescued D5N, A38V, and DAMM mutants but not the M194I and M296V mutants are high-fidelity RdRp variants. Mutagen resistance assays revealed that the higher replication fidelity was associated with higher-level resistance to ribavirin. In addition, significantly attenuated fitness and virulence phenotypes were observed for the D5N, A38V, and DAMM mutants. Based on a systematic quantitative analysis of fidelity and virulence, we concluded that higher replication fidelity is associated with a more attenuated virus. These data suggest that the resulting restricted quasispecies diversity compromises the adaptability and virulence of an RNA virus population. The modulation of replication fidelity to attenuate virulence may represent a general strategy for the rational design of new types of live, attenuated vaccine strains.

Importance

The ribavirin-isolated poliovirus (PV) RdRp G64S variant, the polymerases of which were of high replication fidelity, was attenuated in vivo. It has been proposed (M. Vignuzzi, E. Wendt, and R. Andino, Nat. Med. 14:154-161, http://dx.doi.org/10.1038/nm1726) that modulation of replication fidelity is a promising approach for engineering attenuated virus vaccines. The subsequently mutagen-isolated RdRp variants also expressed the high-fidelity polymerase, but not all of them were attenuated. Few studies have shown the exact correlation between fidelity and virulence. The present study investigates the effect of restricted quasispecies diversity on viral virulence via several attenuated FMDV high-fidelity RdRp variants. Our findings may aid in the rational design of a new type of vaccine strain.",2014-01-22 +21653522,The variant call format and VCFtools.,"

Summary

The variant call format (VCF) is a generic format for storing DNA polymorphism data such as SNPs, insertions, deletions and structural variants, together with rich annotations. VCF is usually stored in a compressed manner and can be indexed for fast data retrieval of variants from a range of positions on the reference genome. The format was developed for the 1000 Genomes Project, and has also been adopted by other projects such as UK10K, dbSNP and the NHLBI Exome Project. VCFtools is a software suite that implements various utilities for processing VCF files, including validation, merging, comparing and also provides a general Perl API.

Availability

http://vcftools.sourceforge.net",2011-06-07 +22663945,Yeast 5 - an expanded reconstruction of the Saccharomyces cerevisiae metabolic network.,"

Background

Efforts to improve the computational reconstruction of the Saccharomyces cerevisiae biochemical reaction network and to refine the stoichiometrically constrained metabolic models that can be derived from such a reconstruction have continued since the first stoichiometrically constrained yeast genome scale metabolic model was published in 2003. Continuing this ongoing process, we have constructed an update to the Yeast Consensus Reconstruction, Yeast 5. The Yeast Consensus Reconstruction is a product of efforts to forge a community-based reconstruction emphasizing standards compliance and biochemical accuracy via evidence-based selection of reactions. It draws upon models published by a variety of independent research groups as well as information obtained from biochemical databases and primary literature.

Results

Yeast 5 refines the biochemical reactions included in the reconstruction, particularly reactions involved in sphingolipid metabolism; updates gene-reaction annotations; and emphasizes the distinction between reconstruction and stoichiometrically constrained model. Although it was not a primary goal, this update also improves the accuracy of model prediction of viability and auxotrophy phenotypes and increases the number of epistatic interactions. This update maintains an emphasis on standards compliance, unambiguous metabolite naming, and computer-readable annotations available through a structured document format. Additionally, we have developed MATLAB scripts to evaluate the model's predictive accuracy and to demonstrate basic model applications such as simulating aerobic and anaerobic growth. These scripts, which provide an independent tool for evaluating the performance of various stoichiometrically constrained yeast metabolic models using flux balance analysis, are included as Additional files 1, 2 and 3.

Conclusions

Yeast 5 expands and refines the computational reconstruction of yeast metabolism and improves the predictive accuracy of a stoichiometrically constrained yeast metabolic model. It differs from previous reconstructions and models by emphasizing the distinction between the yeast metabolic reconstruction and the stoichiometrically constrained model, and makes both available as Additional file 4 and Additional file 5 and at http://yeast.sf.net/ as separate systems biology markup language (SBML) files. Through this separation, we intend to make the modeling process more accessible, explicit, transparent, and reproducible.",2012-06-04 +22669905,PSC: protein surface classification.,"We recently proposed to classify proteins by their functional surfaces. Using the structural attributes of functional surfaces, we inferred the pairwise relationships of proteins and constructed an expandable database of protein surface classification (PSC). As the functional surface(s) of a protein is the local region where the protein performs its function, our classification may reflect the functional relationships among proteins. Currently, PSC contains a library of 1974 surface types that include 25,857 functional surfaces identified from 24,170 bound structures. The search tool in PSC empowers users to explore related surfaces that share similar local structures and core functions. Each functional surface is characterized by structural attributes, which are geometric, physicochemical or evolutionary features. The attributes have been normalized as descriptors and integrated to produce a profile for each functional surface in PSC. In addition, binding ligands are recorded for comparisons among homologs. PSC allows users to exploit related binding surfaces to reveal the changes in functionally important residues on homologs that have led to functional divergence during evolution. The substitutions at the key residues of a spatial pattern may determine the functional evolution of a protein. In PSC (http://pocket.uchicago.edu/psc/), a pool of changes in residues on similar functional surfaces is provided.",2012-06-04 +22612593,A Bayesian approach to in silico blood-brain barrier penetration modeling.,"The human blood-brain barrier (BBB) is a membrane that protects the central nervous system (CNS) by restricting the passage of solutes. The development of any new drug must take into account its existence whether for designing new molecules that target components of the CNS or, on the other hand, to find new substances that should not penetrate the barrier. Several studies in the literature have attempted to predict BBB penetration, so far with limited success and few, if any, application to real world drug discovery and development programs. Part of the reason is due to the fact that only about 2% of small molecules can cross the BBB, and the available data sets are not representative of that reality, being generally biased with an over-representation of molecules that show an ability to permeate the BBB (BBB positives). To circumvent this limitation, the current study aims to devise and use a new approach based on Bayesian statistics, coupled with state-of-the-art machine learning methods to produce a robust model capable of being applied in real-world drug research scenarios. The data set used, gathered from the literature, totals 1970 curated molecules, one of the largest for similar studies. Random Forests and Support Vector Machines were tested in various configurations against several chemical descriptor set combinations. Models were tested in a 5-fold cross-validation process, and the best one tested over an independent validation set. The best fitted model produced an overall accuracy of 95%, with a mean square contingency coefficient (ϕ) of 0.74, and showing an overall capacity for predicting BBB positives of 83% and 96% for determining BBB negatives. This model was adapted into a Web based tool made available for the whole community at http://b3pp.lasige.di.fc.ul.pt.",2012-06-06 +24555830,Factors associated with the survival of prostate cancer patients with rectal involvement.,"

Background

Prostate cancer patients with rectal involvement are rare, and the factors associated with the survival of these patients are yet to be elucidated.

Patients and methods

We collected data on patients who were admitted to our hospital for prostate cancer in the last thirteen years and of those in studies in the literature. The associations of clinical characteristics with survival were evaluated using Cox regression models.

Results

This study included 94 patients (5 admitted to our hospital and 89 from studies in the literature) of prostate cancer with rectal involvement. 11 patients in the group of synchronous rectal involvement at first cancer diagnosis (n = 58) and 23 patients in the group of metachronous diagnosis of rectal involvement (n = 29) died at the latest follow up. The estimated overall survival rate (% ± SE) at 1, 3, and 5 years were 68.3 ± 5.3%, 54.4 ± 7.2%, and 38.1 ± 11.1%, respectively. In the Cox univariate analysis, Asian prostate cancer (p = 0.001) was associated with better survival, while rectal bleeding (p = 0.043), metachronous presentation of development of rectal involvement (p = 0.000), prior hormonal therapy (p = 0.000) and extrarectal metastases (p = 0.054) were associated with poor survival. In multivariate analysis, prior hormone therapy (HR = 14.540, p = 0.000) and rectal bleeding (HR = 2.195, p = 0.041) retained independent poor prognostic values. There were 13 patients survived for more than 3 years, the longest survival time was 96 months. Total pelvic extenteration (TPE) combined with hormonal therapy in 12 hormone-untreated prostate cancer give us six of thirteen long-term survivors for more than 3 years in this series.

Conclusions

Our findings suggest that rectal involvement does not necessarily predict a worse outcome when presenting as a previously hormone-untreated disease and that the prognosis was worse when presenting as a hormone relapsed disease. Prior hormone therapy and rectal bleeding were associated independently with a significantly poor overall survival in prostate cancer patients with rectal involvement. TPE combined with hormonal therapy appears to confer better overall survival in hormonally untreated patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1604504118106105.",2014-02-20 +21784796,phyloMeta: a program for phylogenetic comparative analyses with meta-analysis.,"

Summary

phyloMeta is an easy to use console program for integrating phylogenetic information into meta-analysis. It is designed to help ecologists, evolutionary biologists and conservation biologists analyze effect size data extracted from published studies in a comparative phylogenetic context. This software estimates phylogenetic versions of all the traditional meta-analytical statistics used for: pooling effect sizes with weighted regressions; evaluating the homogeneity of these effect sizes; performing moderator tests akin to ANOVA style analyses; and analyzing data with fixed- and random-effects models. phyloMeta is developed in C/C++ and can be used via command line in MS Windows environments.

Availability

phyloMeta can be obtained freely as an executable on the web at http://lajeunesse.myweb.usf.edu/publications

Contact

lajeunesse@usf.edu.",2011-07-22 +23560036,t-LSE: a novel robust geometric approach for modeling protein-protein interaction networks.,"Protein-protein interaction (PPI) networks provide insights into understanding of biological processes, function and the underlying complex evolutionary mechanisms of the cell. Modeling PPI network is an important and fundamental problem in system biology, where it is still of major concern to find a better fitting model that requires less structural assumptions and is more robust against the large fraction of noisy PPIs. In this paper, we propose a new approach called t-logistic semantic embedding (t-LSE) to model PPI networks. t-LSE tries to adaptively learn a metric embedding under the simple geometric assumption of PPI networks, and a non-convex cost function was adopted to deal with the noise in PPI networks. The experimental results show the superiority of the fit of t-LSE over other network models to PPI data. Furthermore, the robust loss function adopted here leads to big improvements for dealing with the noise in PPI network. The proposed model could thus facilitate further graph-based studies of PPIs and may help infer the hidden underlying biological knowledge. The Matlab code implementing the proposed method is freely available from the web site: http://home.ustc.edu.cn/~yzh33108/PPIModel.htm.",2013-04-01 +21901085,Biomedical cloud computing with Amazon Web Services.,"In this overview to biomedical computing in the cloud, we discussed two primary ways to use the cloud (a single instance or cluster), provided a detailed example using NGS mapping, and highlighted the associated costs. While many users new to the cloud may assume that entry is as straightforward as uploading an application and selecting an instance type and storage options, we illustrated that there is substantial up-front effort required before an application can make full use of the cloud's vast resources. Our intention was to provide a set of best practices and to illustrate how those apply to a typical application pipeline for biomedical informatics, but also general enough for extrapolation to other types of computational problems. Our mapping example was intended to illustrate how to develop a scalable project and not to compare and contrast alignment algorithms for read mapping and genome assembly. Indeed, with a newer aligner such as Bowtie, it is possible to map the entire African genome using one m2.2xlarge instance in 48 hours for a total cost of approximately $48 in computation time. In our example, we were not concerned with data transfer rates, which are heavily influenced by the amount of available bandwidth, connection latency, and network availability. When transferring large amounts of data to the cloud, bandwidth limitations can be a major bottleneck, and in some cases it is more efficient to simply mail a storage device containing the data to AWS (http://aws.amazon.com/importexport/). More information about cloud computing, detailed cost analysis, and security can be found in references.",2011-08-25 +21765897,"Conditional random fields for fast, large-scale genome-wide association studies.","Understanding the role of genetic variation in human diseases remains an important problem to be solved in genomics. An important component of such variation consist of variations at single sites in DNA, or single nucleotide polymorphisms (SNPs). Typically, the problem of associating particular SNPs to phenotypes has been confounded by hidden factors such as the presence of population structure, family structure or cryptic relatedness in the sample of individuals being analyzed. Such confounding factors lead to a large number of spurious associations and missed associations. Various statistical methods have been proposed to account for such confounding factors such as linear mixed-effect models (LMMs) or methods that adjust data based on a principal components analysis (PCA), but these methods either suffer from low power or cease to be tractable for larger numbers of individuals in the sample. Here we present a statistical model for conducting genome-wide association studies (GWAS) that accounts for such confounding factors. Our method scales in runtime quadratic in the number of individuals being studied with only a modest loss in statistical power as compared to LMM-based and PCA-based methods when testing on synthetic data that was generated from a generalized LMM. Applying our method to both real and synthetic human genotype/phenotype data, we demonstrate the ability of our model to correct for confounding factors while requiring significantly less runtime relative to LMMs. We have implemented methods for fitting these models, which are available at http://www.microsoft.com/science.",2011-07-12 +22804825,HuPho: the human phosphatase portal.,"Phosphatases and kinases contribute to the regulation of protein phosphorylation homeostasis in the cell. Phosphorylation is a key post-translational modification underlying the regulation of many cellular processes. Thus, a comprehensive picture of phosphatase function and the identification of their target substrates would aid a systematic approach to a mechanistic description of cell signalling. Here we present a website designed to facilitate the retrieval of information about human protein phosphatases. To this end we developed a search engine to recover and integrate information annotated in several publicly available web resources. In addition we present a text-mining-assisted annotation effort aimed at extracting phosphatase related data reported in the scientific literature. The HuPho (human phosphatases) website can be accessed at http://hupho.uniroma2.it.",2012-08-24 +30727401,First Report of Leaf Spot on Gerbera jamesonii Caused by Corynespora cassiicola in China.,"Gerbera (Gerbera jamesonii Bolus ex. Hook f.) is a popular cut flower and flowering potted plant. In August 2011, a new leaf spot disease was observed on double-type Gerbera growing in outdoor ground beds in Guangzhou, Guangdong Province, China. Approximately 30% of about 20,000 Gerbera plants in the Guangzhou ground beds were affected. Leaf spots were round or irregular with grayish centers surrounded by dark brown borders and ranged from 5 to 15 mm in diameter. Leaves with multiple lesions became blighted. A fungus was isolated from the lesions and single-spore isolates plated on potato dextrose agar (PDA) produced gray, floccose colonies, which reached 65 mm on PDA after 7 days at 28°C. Conidiophores were brown or olivaceous, cylindrical, straight and unbranched, two to seven septations, and 25 to 83 × 4 to 7 μm. Conidiogenous cells were olivaceous or brown, cylindrical, and 11 to 21 × 4 to 6 μm. Conidia were borne singly or in chains of two to five, brown, cylindrical, straight to slightly curved, two to eight pseudosepta, and 30 to 90 × 5.5 to 11.5 μm (mean 70.4 × 7.3 μm), with a conspicuous hilum. These characteristics were consistent with the description of Corynespora cassiicola (Berk. & M.A. Curtis.) C.T. Wei (1). The internal transcribed spacer region (ITS) of one isolate (GenBank Accession No. JN853778) was amplified using primers ITS4 and ITS5 (3) and sequenced. A BLAST search in GenBank revealed highest similarity (99%) to sequences of C. cassiicola (AY238606.1 and FJ852715.1). Pathogenicity tests were conducted on 10 potted double-type Gerbera plants. Five wounded and five unwounded leaves on each plant were inoculated with 5-mm mycelial plugs from the periphery of 5-day-old cultures of the isolated fungus. The plugs were put on the leaf surface and secured with sterile wet cotton. Sterile PDA plugs were used as the control treatment on different leaves of the same plants that were inoculated. Plants were covered with plastic bags and incubated in a growth chamber with 12 h of light at 28°C. Necrotic lesions appeared on wounded leaves after 2 to 3 days of incubation and on unwounded leaves 5 to 7 days after incubation. Symptoms on wounded and unwounded leaves were similar to those observed in the field, whereas control leaves inoculated with sterile PDA plugs remained symptomless. C. cassiicola was consistently reisolated from these lesions. Although there are approximately 644 reported hosts of C. cassiicola (2), to our knowledge, this is the first report of C. cassiicola leaf spot on G. jamesonii. Because the disease caused damage to the foliage and affected the flowering of the plants, control measures may need to be implemented for the production of Gerbera in cut flower nurseries. References: (1) M. B. Ellis. CMI Mycol. Pap. 65:15, 1957. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 21 November 2011. (3) T. J. White et al. Page 315 in: PCR Protocols: A Guide to Methods and Applications. M. A. Innis et al., eds. Academic Press, San Diego, 1990.",2012-06-01 +30727391,First Report of Brown Spot Needle Blight on Pinus thunbergii Caused by Lecanosticta acicola in Korea.,"Pinus thunbergii Parl., known as black pine, is a pine native to coastal areas of Japan and Korea. Because of its resistance to pollution and salt, it is planted as windbreakers along the coast. In March 2010, needle blight symptoms were found on several trees of black pine in Naju, southern Korea. Further surveys in 2010 and 2011 showed that these symptoms are rather common but disease incidence is less than 1%. Small, circular grayish green spots first appeared on the needles. The spots developed into brown bands reaching 1 to 2 mm long, sometimes with yellow margins. Dark olivaceous to dark grayish stromata were erumpent and conspicuous on the brown lesions in the later stage of disease development. Conidiophores were simple or occasionally branched, 1- to 2-septate, pale brown to olivaceous brown, and smooth walled. Conidia (n = 30) were olivaceous brown to grayish brown, verrucose, thick-walled, mildly curved, allantoid to fusiform, one- to five-septate (mostly three-septate), and 20 to 45 × 3.5 to 5 μm. Morphological characteristics of the fungus were consistent with those of Lecanosticta acicola (Thüm.) Syd. (anamorph of Mycosphaerella dearnessii M.E. Barr), previously known as the causal agent of brown spot needle blight of pines (2,4). The teleomorph was not observed. On potato dextrose agar, single-spore cultures of three isolates were obtained from conidia sporulating on needles. An isolate was preserved at the Korean Agricultural Culture Collection (Accession No. KACC44982). Genomic DNA was extracted using the DNeasy Plant Mini DNA Extraction Kit (Qiagen Inc., Valencia, CA) and the complete internal transcribed spacer (ITS) region of rDNA was amplified and sequenced with the primers ITS1/ITS4. The resulting ITS sequence of 543 bp was deposited in GenBank (Accession No. JQ245448). A GenBank BLAST search produced an exact match for the sequences of M. dearnessii (= L. acicola) on P. mugo Tura from Lithuania (HM367708) and P. radiata D. Don from France (GU214663), with 100% sequence similarity. To conduct a pathogenicity test, a conidial suspension (approx. 2 × 105 conidia/ml) was prepared by harvesting conidia from 5-week-old cultures of KACC44982 and sprayed onto the needles of five 3-year-old healthy seedlings. Five noninoculated seedlings of the same age served as controls. Inoculated and noninoculated plants were kept in humid chambers for 48 h in a glasshouse. After 28 days, typical leaf spot symptoms started to develop on the needles of inoculated plants. The fungus, L. acicola, was reisolated from those lesions, confirming Koch's postulates. No symptoms were observed on control plants. The disease has been previously reported on several species of Pinus in the Americas (1) and recently in China (3), Japan (4), and Europe (2). To our knowledge, this is the first report of the Lecanosticta-Pinus association in Korea. Occurrence of the disease in Korea is a new threat to the health of black pine, especially in nursery plots. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.arsgrin.gov/fungaldatabases/ December 2011. (2) L. Jankovsky et al. Plant Protect. Sci. 45:16, 2009. (3) C. Li et al. J. Nanjing Inst. For. 1986:11, 1986. (4) Y. Suto and D. Ougi. Mycoscience 39:319, 1998.",2012-06-01 +30727371,First Report of Powdery Mildew Caused by Erysiphe heraclei on Dill in Korea.,"Dill (Anethum graveolens L.) is a scented herb belonging to the family Apiaceae. The plant has a long and ancient history in many countries as a culinary and medicinal herb. In October 2008, plants showing typical symptoms of powdery mildew disease were found in polythene tunnels in Icheon, Korea. Symptoms first appeared as thin white colonies, which subsequently showed abundant growth on the leaves and stems. Most diseased plantings were unmarketable and shriveled without being harvested. The damage caused by powdery mildew infections on dill has reappeared every year, with confirmation of the causal agent made again in 2011. Voucher specimens were deposited in the Korea University Herbarium (KUS). Hyphae were septate, branched, and 4 to 7 μm wide. Appressoria on the mycelium were multilobed or moderately lobed. Conidiophores were unbranched, cylindrical, 80 to 140 × 8 to 10 μm, straight or slightly flexuous in foot cells, and produced conidia singly, followed by two to three cells. Conidia were oblong elliptical to oblong, 28 to 50 × 14 to 18 μm, lacked fibrosin bodies, and produced germ tubes on the subterminal position, with angular/rectangular wrinkling of the outer walls. Primary conidia were apically conical, basally subtruncate, and generally smaller than the secondary conidia. No chasmothecia were found but the above characteristics are consistent with Erysiphe heraclei DC. (1). To confirm the identity of the causal fungus, the complete internal transcribed spacer (ITS) region of rDNA from isolate KUS-F26425 was amplified with primers ITS5 and P3 as described by Takamatsu et al. (3) and directly sequenced. The resulting 630-bp sequence was deposited in GenBank (Accession No. JQ517297). Comparison with the sequences available in the GenBank database revealed that the isolate showed >99% sequence similarity with those of E. heraclei from Pleurospermum camtschaticum (GU173850) and Daucus carota (EU371725). Pathogenicity was confirmed through inoculation by gently pressing diseased leaves onto leaves of five healthy potted dill plants. Five noninoculated plants served as controls. Plants were maintained in a greenhouse at 22 ± 2°C. Inoculated plants developed signs and symptoms after 7 days, whereas the control plants remained healthy. The fungus present on the inoculated plants was morphologically identical to that originally observed on diseased plants. Powdery mildew caused by E. heraclei on dill has been known worldwide where the plant is cultivated (2). In East Asia, however, dill powdery mildew was known only from Taiwan (4). To our knowledge, this is the first report of powdery mildew infections by E. heraclei on dill in Korea. Since cultivation of dill was only recently started on a commercial scale in Korea, powdery mildew infections pose a serious threat to safe production of this herb, especially in organic farming where chemical control would be prohibited. References: (1) U. Braun. Beih. Nova Hedw. 89:1, 1987. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ January 28, 2012. (3) S. Takamatsu et al. Mycol. Res. 113:117, 2009. (4) J. G. Tsay. Trans. Mycol. Soc. Repub. China 5:1, 1990.",2012-06-01 +22637735,Proliferative and nonproliferative lesions of the rat and mouse urinary system.,"The INHAND Project (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) is a joint initiative of the Societies of Toxicologic Pathology from Europe (ESTP), Great Britain (BSTP), Japan (JSTP), and North America (STP) to develop an internationally accepted nomenclature for proliferative and nonproliferative lesions in laboratory animals. The purpose of this publication is to provide a standardized nomenclature for classifying lesions observed in the urinary tract of rats and mice. The standardized nomenclature of urinary tract lesions presented in this document is also available electronically on the Internet (http://www.goreni.org/). Sources of material included histopathology databases from government, academia, and industrial laboratories throughout the world. Content includes spontaneous developmental and aging lesions as well as those induced by exposure to test materials. A widely accepted and utilized international harmonization of nomenclature for urinary tract lesions in laboratory animals will decrease confusion among regulatory and scientific research organizations in different countries and provide a common language to increase and enrich international exchanges of information among toxicologists and pathologists.",2012-06-01 +30727372,First Report of Zonate Leaf Spot of Glycine max Caused by Cristulariella moricola in Korea.,"Soybean (Glycine max (L.) Merr.) is native to East Asia including Korea and is widely grown and consumed as an edible seed. In August 2011, following a prolonged period of cool and moist weather, zonate leaf spots were observed in local soybean (cultivar unknown) planted in a mountainous area of Goseong, central Korea. A voucher specimen was collected and entered at the Korea University herbarium (KUS-F26049). Initial symptoms included grayish green-to-grayish brown spots without border lines. As the lesions enlarged, they coalesced, leading to leaf blight and premature defoliation. Sporophores on the leaf lesions were dominantly hypophyllous, rarely epiphyllous, solitary, erect, easily detachable, and as long as 750 μm. The upper portion of the sporophores consisted of a pyramidal head that was ventricose, 275 to 500 μm long, and 80 to 160 μm wide. The fungus was isolated from leaf lesions and maintained on potato dextrose agar (PDA). Sclerotia were produced on PDA after 4 to 5 weeks at 18°C without light, but conidia were not observed in culture. The morphological and cultural characteristics were consistent with those of Cristulariella moricola (Hino) Redhead (2,3). An isolate was preserved in the Korean Agricultural Culture Collection (KACC46401). Genomic DNA was extracted with the DNeasy Plant Mini DNA Extraction Kit (Qiagen Inc., Valencia, CA). The complete internal transcribed spacer (ITS) region of rDNA was amplified with the primers ITS1/ITS4 and sequenced. The resulting sequence of 453 bp was deposited in GenBank (Accession No. JQ036182). A BLAST search in GenBank revealed that the sequence showed an exact match with that of C. moricola from Acer negundo (JQ036181) and >99% similarity with that of Grovesinia pyramidalis, teleomorph of C. moricola from Juglans sp. (Z81433). To determine the pathogenicity of the fungus, sporophores with the pyramidal head were carefully detached from a lesion on the naturally infected leaflet with fine needles. Each sporophore was transferred individually onto four places of six detached healthy soybean leaflets. The leaflets were placed in humid chambers at 100% relative humidity and incubated at 16 to 20°C (4). Symptoms were observed after 2 days on all inoculated leaflets (one to four lesions/leaflet). The lesions enlarged rapidly and reached ~20 mm diameter in a week. A number of sporulating structures and immature sclerotia were formed on the abaxial surface of the leaf 2 weeks after inoculation. The pathogen was reisolated from lesions on the inoculated leaflets, confirming Koch's postulates. No symptoms were observed on the control leaflets kept in humid chambers for 2 weeks. C. moricola was known to cause zonate leaf spots and defoliation on a wide range of woody and annual plants (1), but not on G. max. To our knowledge, this is the first report of Cristulariella infection in cultivated soybeans. Since the infections may be limited to the mountainous area with low night temperature and high humidity, economic losses seem to be negligible. However, the disease could be a potential threat to the safe production of soybeans in areas with prolonged periods of cool and moist weather. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.arsgrin.gov/fungaldatabases/ , January 7, 2012. (2) H. B. Lee and C. J. Kim. Plant Dis. 86:440, 2002. (3) S. A. Redhead. Can. J. Bot. 53:700, 1975. (4) H. J. Su and S. C. Leu. Plant Dis. 67:915, 1983.",2012-06-01 +22689763,Towards 3D structure prediction of large RNA molecules: an integer programming framework to insert local 3D motifs in RNA secondary structure.,"

Motivation

The prediction of RNA 3D structures from its sequence only is a milestone to RNA function analysis and prediction. In recent years, many methods addressed this challenge, ranging from cycle decomposition and fragment assembly to molecular dynamics simulations. However, their predictions remain fragile and limited to small RNAs. To expand the range and accuracy of these techniques, we need to develop algorithms that will enable to use all the structural information available. In particular, the energetic contribution of secondary structure interactions is now well documented, but the quantification of non-canonical interactions-those shaping the tertiary structure-is poorly understood. Nonetheless, even if a complete RNA tertiary structure energy model is currently unavailable, we now have catalogues of local 3D structural motifs including non-canonical base pairings. A practical objective is thus to develop techniques enabling us to use this knowledge for robust RNA tertiary structure predictors.

Results

In this work, we introduce RNA-MoIP, a program that benefits from the progresses made over the last 30 years in the field of RNA secondary structure prediction and expands these methods to incorporate the novel local motif information available in databases. Using an integer programming framework, our method refines predicted secondary structures (i.e. removes incorrect canonical base pairs) to accommodate the insertion of RNA 3D motifs (i.e. hairpins, internal loops and k-way junctions). Then, we use predictions as templates to generate complete 3D structures with the MC-Sym program. We benchmarked RNA-MoIP on a set of 9 RNAs with sizes varying from 53 to 128 nucleotides. We show that our approach (i) improves the accuracy of canonical base pair predictions; (ii) identifies the best secondary structures in a pool of suboptimal structures; and (iii) predicts accurate 3D structures of large RNA molecules.

Availability

RNA-MoIP is publicly available at: http://csb.cs.mcgill.ca/RNAMoIP.",2012-06-01 +22198331,Prevalidation study of the BALB/c 3T3 cell transformation assay for assessment of carcinogenic potential of chemicals.,"The cell transformation assays (CTAs) have attracted attention within the field of alternative methods due to their potential to reduce the number of animal experiments in the field of carcinogenicity. The CTA using BALB/c 3T3 cells has proved to be able to respond to chemical carcinogens by inducing morphologically transformed foci. Although a considerable amount of data on the performance of the assay has been collected, a formal evaluation focusing particularly on reproducibility, and a standardised protocol were considered important. Therefore the European Centre for the Validation of Alternative Methods (ECVAM) decided to coordinate a prevalidation study of the BALB/c 3T3 CTA. Three different laboratories from Japan and Europe participated. In the study the following modules were assessed stepwise: test definition (Module 1) consisted of the standardisation of the protocol, the selection of the cell lineage, and the preparation of a photo catalogue on the transformed foci. The within-laboratory reproducibility (Module 2) and the transferability (Module 3) were assessed using non-coded and coded 3-methylcholanthrene. Then, five coded chemicals were tested for the assessment of between-laboratory reproducibility (Module 4). All three laboratories obtained positive results with benzo[a]pyrene, phenanthrene and o-toluidine HCl. 2-Acetylaminofluorene was positive in two laboratories and equivocal in one laboratory. Anthracene was negative in all three laboratories. The chemicals except phenanthrene, which is classified by IARC (http://monographs.iarc.fr) as group 3 ""not classifiable as to its carcinogenicity to human"", were correctly predicted as carcinogens. Further studies on phenanthrene will clarify this discrepancy. Thus, although only a few chemicals were tested, it can be seen that the predictive capacity of the BALB/c 3T3 CTA is satisfactory. On the basis of the outcome of this study, an improved protocol, incorporating some changes related to data interpretation, has been developed. It is recommended that this protocol be used in the future to provide more data that may confirm the robustness of this protocol and the performance of the assay itself. During the study it became clear that selecting the most appropriate concentrations for the transformation assay is crucial.",2011-12-17 +22661580,ExPASy: SIB bioinformatics resource portal.,"ExPASy (http://www.expasy.org) has worldwide reputation as one of the main bioinformatics resources for proteomics. It has now evolved, becoming an extensible and integrative portal accessing many scientific resources, databases and software tools in different areas of life sciences. Scientists can henceforth access seamlessly a wide range of resources in many different domains, such as proteomics, genomics, phylogeny/evolution, systems biology, population genetics, transcriptomics, etc. The individual resources (databases, web-based and downloadable software tools) are hosted in a 'decentralized' way by different groups of the SIB Swiss Institute of Bioinformatics and partner institutions. Specifically, a single web portal provides a common entry point to a wide range of resources developed and operated by different SIB groups and external institutions. The portal features a search function across 'selected' resources. Additionally, the availability and usage of resources are monitored. The portal is aimed for both expert users and people who are not familiar with a specific domain in life sciences. The new web interface provides, in particular, visual guidance for newcomers to ExPASy.",2012-05-31 +24663182,Dietary plant extracts modulate gene expression profiles in ileal mucosa of weaned pigs after an Escherichia coli infection.,"This study was conducted to characterize the effects of infection with a pathogenic F-18 Escherichia coli and 3 different plant extracts on gene expression of ileal mucosa in weaned pigs. Weaned pigs (total = 64, 6.3 ± 0.2 kg BW, and 21-d old) were housed in individual pens for 15 d, 4 d before and 11 d after the first inoculation (d 0). Treatments were in a 2 × 4 factorial arrangement: with or without an F-18 E. coli challenge and 4 diets (a nursery basal, control diet [CON], 10 ppm of capsicum oleoresin [CAP], garlic botanical [GAR], or turmeric oleoresin [TUR]). Results reported elsewhere showed that the plant extracts reduced diarrhea in challenged pigs. Total RNA (4 pigs/treatment) was extracted from ileal mucosa of pigs at d 5 post inoculation. Double-stranded cDNA was amplified, labeled, and further hybridized to the microarray, and data were analyzed in R. Differential gene expression was tested by fitting a mixed linear model in a 2 × 4 factorial ANOVA. Bioinformatics analysis was conducted by DAVID Bioinformatics Resources 6.7 (DAVID; National Institute of Allergy and Infectious Diseases [NIAID, NIH], http://david.abcc.ncifcrf.gov). The E. coli infection altered (P < 0.05) the expression of 240 genes in pigs fed the CON (148 up- and 92 down-regulated). Compared with the infected CON, feeding CAP, GAR, or TUR altered (P < 0.05) the expression of 52 genes (18 up, 34 down), 117 genes (34 up- and 83 down-regulated), or 84 genes (16 up- and 68 down-regulated), respectively, often counteracting the effects of E. coli. The E. coli infection up-regulated (P < 0.05) the expression of genes related to the activation of immune response and complement and coagulation cascades, but down-regulated (P < 0.05) the expression of genes involved in protein synthesis and accumulation. Compared with the CON, feeding CAP and GAR increased (P < 0.05) the expression of genes related to integrity of membranes in infected pigs, indicating enhanced gut mucosa health. Moreover, feeding all 3 plant extracts reduced (P < 0.05) the expression of genes associated with antigen presentation or other biological processes of immune responses, indicating they attenuated overstimulation of immune responses caused by E. coli. These findings may explain why diarrhea was reduced and clinical immune responses were ameliorated in infected pigs fed plant extracts. In conclusion, plant extracts altered the expression of genes in ileal mucosa of E. coli-infected pigs, perhaps leading to the reduction in diarrhea reported previously.",2014-03-18 +22659539,Expression of a recombinant Phoneutria toxin active in calcium channels.,"PnTx3-4 is a toxin isolated from the venom of the spider Phoneutria nigriventer that blocks N-, P/Q-, and R-type voltage-gated calcium channels and has great potential for clinical applications. In this report we used the SUMO system to express large amounts of recombinant PnTx3-4 peptide, which was found in both soluble and insoluble fractions of bacterial extracts. We purified the recombinant toxin from both fractions and showed that the recombinant peptide showed biological activity similar to the native PnTx3-4. In silico analysis of the primary sequence of PnTx3-4 indicated that the peptide conforms to all the criteria of a knottin scaffold. Additionally, circular dichroism spectrum analysis of the recombinant PnTx3-4 predicted that the toxin structure is composed of approximately 53% turns/unordered, 31% α-helix and 16% β-strand, which is consistent with predicted model of the PnTx3-4 knottin scaffold available at the knottin database (http://knottin.cbs.cnrs.fr). These studies provide the basis for future large scale production and structure-function investigation of PnTx3-4.",2012-05-31 +24949398,Endoscopic ultrasound practice survey in latin america.,"

Objective

Endoscopic ultrasound (EUS) has become an important imaging modality for the diagnosis, staging and treatment of gastrointestinal disorders. However, no official data exists regarding clinical EUS practice in Latin America (LA). This study assessed current EUS practice and training.

Patients and methods

A direct mail survey questionnaire was sent to 268 Capítulo Latino Americano de Ultrasonido Endoscópico members between August 2012 and January 2013. The questionnaire was sent out in English, Spanish and Portuguese languages and was available through the following site: http://www.cleus-encuesta.com. Responses were requested only from physicians who perform EUS.

Results

A total of 70 LA physicians answered the questionnaire until January 2013. Most of the participants were under 42 years of age (53%) and 80% were men. Most participants (45.7%) perform EUS in Brazil, 53% work in a private hospital. The majority (70%) also perform endoscopic retrograde cholangiopancreatography. A total 42% had performed EUS for 2 years or less and 22.7% for 11 years or more. Only 10% performed more than 5000 EUS. The most common indication was an evaluation of pancreatic-biliary-ampullary lesions. Regarding training, 48.6% had more than 6 months of dedicated hands-on EUS and 37% think that at least 6 months of formal training is necessary to acquire competence. Furthermore, 64% think that more than 50 procedures for pancreatic-biliary lesions are necessary.

Conclusion

This survey provides insight into the status of EUS in LA. EUS is performed mostly by young endoscopists in LA. Diagnostic upper EUS is the most common EUS procedure. Most endosonographers believe that formal training is necessary to acquire competence.",2013-10-01 +22536968,"BEAT: Bioinformatics Exon Array Tool to store, analyze and visualize Affymetrix GeneChip Human Exon Array data from disease experiments.","

Background

It is known from recent studies that more than 90% of human multi-exon genes are subject to Alternative Splicing (AS), a key molecular mechanism in which multiple transcripts may be generated from a single gene. It is widely recognized that a breakdown in AS mechanisms plays an important role in cellular differentiation and pathologies. Polymerase Chain Reactions, microarrays and sequencing technologies have been applied to the study of transcript diversity arising from alternative expression. Last generation Affymetrix GeneChip Human Exon 1.0 ST Arrays offer a more detailed view of the gene expression profile providing information on the AS patterns. The exon array technology, with more than five million data points, can detect approximately one million exons, and it allows performing analyses at both gene and exon level. In this paper we describe BEAT, an integrated user-friendly bioinformatics framework to store, analyze and visualize exon arrays datasets. It combines a data warehouse approach with some rigorous statistical methods for assessing the AS of genes involved in diseases. Meta statistics are proposed as a novel approach to explore the analysis results. BEAT is available at http://beat.ba.itb.cnr.it.

Results

BEAT is a web tool which allows uploading and analyzing exon array datasets using standard statistical methods and an easy-to-use graphical web front-end. BEAT has been tested on a dataset with 173 samples and tuned using new datasets of exon array experiments from 28 colorectal cancer and 26 renal cell cancer samples produced at the Medical Genetics Unit of IRCCS Casa Sollievo della Sofferenza.To highlight all possible AS events, alternative names, accession Ids, Gene Ontology terms and biochemical pathways annotations are integrated with exon and gene level expression plots. The user can customize the results choosing custom thresholds for the statistical parameters and exploiting the available clinical data of the samples for a multivariate AS analysis.

Conclusions

Despite exon array chips being widely used for transcriptomics studies, there is a lack of analysis tools offering advanced statistical features and requiring no programming knowledge. BEAT provides a user-friendly platform for a comprehensive study of AS events in human diseases, displaying the analysis results with easily interpretable and interactive tables and graphics.",2012-03-28 +23685613,Prediction of clustered RNA-binding protein motif sites in the mammalian genome.,"Sequence-specific interactions of RNA-binding proteins (RBPs) with their target transcripts are essential for post-transcriptional gene expression regulation in mammals. However, accurate prediction of RBP motif sites has been difficult because many RBPs recognize short and degenerate sequences. Here we describe a hidden Markov model (HMM)-based algorithm mCarts to predict clustered functional RBP-binding sites by effectively integrating the number and spacing of individual motif sites, their accessibility in local RNA secondary structures and cross-species conservation. This algorithm learns and quantifies rules of these features, taking advantage of a large number of in vivo RBP-binding sites obtained from cross-linking and immunoprecipitation data. We applied this algorithm to study two representative RBP families, Nova and Mbnl, which regulate tissue-specific alternative splicing through interacting with clustered YCAY and YGCY elements, respectively, and predicted their binding sites in the mouse transcriptome. Despite the low information content in individual motif elements, our algorithm made specific predictions for successful experimental validation. Analysis of predicted sites also revealed cases of extensive and distal RBP-binding sites important for splicing regulation. This algorithm can be readily applied to other RBPs to infer their RNA-regulatory networks. The software is freely available at http://zhanglab.c2b2.columbia.edu/index.php/MCarts.",2013-05-18 +22647208,AlzPathway: a comprehensive map of signaling pathways of Alzheimer's disease.,"

Background

Alzheimer's disease (AD) is the most common cause of dementia among the elderly. To clarify pathogenesis of AD, thousands of reports have been accumulating. However, knowledge of signaling pathways in the field of AD has not been compiled as a database before.

Description

Here, we have constructed a publicly available pathway map called ""AlzPathway"" that comprehensively catalogs signaling pathways in the field of AD. We have collected and manually curated over 100 review articles related to AD, and have built an AD pathway map using CellDesigner. AlzPathway is currently composed of 1347 molecules and 1070 reactions in neuron, brain blood barrier, presynaptic, postsynaptic, astrocyte, and microglial cells and their cellular localizations. AlzPathway is available as both the SBML (Systems Biology Markup Language) map for CellDesigner and the high resolution image map. AlzPathway is also available as a web service (online map) based on Payao system, a community-based, collaborative web service platform for pathway model curation, enabling continuous updates by AD researchers.

Conclusions

AlzPathway is the first comprehensive map of intra, inter and extra cellular AD signaling pathways which can enable mechanistic deciphering of AD pathogenesis. The AlzPathway map is accessible at http://alzpathway.org/.",2012-05-30 +23609546,LISE: a server using ligand-interacting and site-enriched protein triangles for prediction of ligand-binding sites.,"LISE is a web server for a novel method for predicting small molecule binding sites on proteins. It differs from a number of servers currently available for such predictions in two aspects. First, rather than relying on knowledge of similar protein structures, identification of surface cavities or estimation of binding energy, LISE computes a score by counting geometric motifs extracted from sub-structures of interaction networks connecting protein and ligand atoms. These network motifs take into account spatial and physicochemical properties of ligand-interacting protein surface atoms. Second, LISE has now been more thoroughly tested, as, in addition to the evaluation we previously reported using two commonly used small benchmark test sets and targets of two community-based experiments on ligand-binding site predictions, we now report an evaluation using a large non-redundant data set containing >2000 protein-ligand complexes. This unprecedented test, the largest ever reported to our knowledge, demonstrates LISE's overall accuracy and robustness. Furthermore, we have identified some hard to predict protein classes and provided an estimate of the performance that can be expected from a state-of-the-art binding site prediction server, such as LISE, on a proteome scale. The server is freely available at http://lise.ibms.sinica.edu.tw.",2013-04-22 +22644586,Update of the pompe disease mutation database with 60 novel GAA sequence variants and additional studies on the functional effect of 34 previously reported variants.,"Pompe disease is an autosomal recessive lysosomal glycogen storage disorder, characterized by progressive muscle weakness. Deficiency of acid α-glucosidase (EC; 3.2.1.20/3) can be caused by numerous pathogenic variants in the GAA gene. The Pompe Disease Mutation Database at http://www.pompecenter.nl aims to list all variants and their effect. This update reports on 94 variants. We examined 35 novel and 34 known mutations by site-directed mutagenesis and transient expression in COS-7 cells or HEK293T cells. Each of these mutations was given a severity rating using a previously published system, based on the level of acid α-glucosidase activity in medium and transfected cells and on the quantity and quality of the different molecular mass species in the posttranslational modification and transport of acid α-glucosidase. This approach enabled to classify 55 missense mutations as pathogenic and 13 as likely nonpathogenic. Based on their nature and the use of in silico analysis (Alamut® software), 12 of the additional 25 novel mutations were predicted to be pathogenic including 4 splicing mutations, 6 mutations leading to frameshift, and 2 point mutations causing stop codons. Seven of the additional mutations were considered nonpathogenic (4 silent and 3 occurring in intron regions), and 6 are still under investigation.",2012-05-29 +23962615,fmcsR: mismatch tolerant maximum common substructure searching in R.,"

Motivation

The ability to accurately measure structural similarities among small molecules is important for many analysis routines in drug discovery and chemical genomics. Algorithms used for this purpose include fragment-based fingerprint and graph-based maximum common substructure (MCS) methods. MCS approaches provide one of the most accurate similarity measures. However, their rigid matching policies limit them to the identification of perfect MCSs. To eliminate this restriction, we introduce a new mismatch tolerant search method for identifying flexible MCSs (FMCSs) containing a user-definable number of atom and/or bond mismatches.

Results

The fmcsR package provides an R interface, with the time-consuming steps of the FMCS algorithm implemented in C++. It includes utilities for pairwise compound comparisons, structure similarity searching, clustering and visualization of MCSs. In comparison with an existing MCS tool, fmcsR shows better time performance over a wide range of compound sizes. When mismatching of atoms or bonds is turned on, the compute times increase as expected, and the resulting FMCSs are often substantially larger than their strict MCS counterparts. Based on extensive virtual screening (VS) tests, the flexible matching feature enhances the enrichment of active structures at the top of MCS-based similarity search results. With respect to overall and early enrichment performance, FMCS outperforms most of the seven other VS methods considered in these tests.

Availability

fmcsR is freely available for all common operating systems from the Bioconductor site (http://www.bioconductor.org/packages/devel/bioc/html/fmcsR.html).

Contact

thomas.girke@ucr.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-08-20 +22689755,DELISHUS: an efficient and exact algorithm for genome-wide detection of deletion polymorphism in autism.,"

Motivation

The understanding of the genetic determinants of complex disease is undergoing a paradigm shift. Genetic heterogeneity of rare mutations with deleterious effects is more commonly being viewed as a major component of disease. Autism is an excellent example where research is active in identifying matches between the phenotypic and genomic heterogeneities. A considerable portion of autism appears to be correlated with copy number variation, which is not directly probed by single nucleotide polymorphism (SNP) array or sequencing technologies. Identifying the genetic heterogeneity of small deletions remains a major unresolved computational problem partly due to the inability of algorithms to detect them.

Results

In this article, we present an algorithmic framework, which we term DELISHUS, that implements three exact algorithms for inferring regions of hemizygosity containing genomic deletions of all sizes and frequencies in SNP genotype data. We implement an efficient backtracking algorithm-that processes a 1 billion entry genome-wide association study SNP matrix in a few minutes-to compute all inherited deletions in a dataset. We further extend our model to give an efficient algorithm for detecting de novo deletions. Finally, given a set of called deletions, we also give a polynomial time algorithm for computing the critical regions of recurrent deletions. DELISHUS achieves significantly lower false-positive rates and higher power than previously published algorithms partly because it considers all individuals in the sample simultaneously. DELISHUS may be applied to SNP array or sequencing data to identify the deletion spectrum for family-based association studies.

Availability

DELISHUS is available at http://www.brown.edu/Research/Istrail_Lab/.",2012-06-01 +21715059,[Statistical Analysis of Rates and Trends (SART): a web-based tool for statistical calculation of population indicators].,"We propose a web-based tool (SART: http://regstattools.net/sart.html) that automates calculations to obtain various population indicators that can be used for the control of diseases or health events. SART has four modules: a) a descriptive module that allows calculation of the number of cases and their percentage, the crude rate, the adjusted rate, the truncated rate and the cumulative rate; b) the estimated annual percentage change of rates; c) calculation of expected cases; and d) the standardized incidence of mortality ratio. SART requests a base file and input parameters from the user before processing the data. The data and the results obtained are processed and then sent by email to the user. The results are provided by sex and for each of the study variables (diseases, ethnic groups, geographic areas...) introduced into the base file.",2011-06-28 +23977154,DTW4Omics: comparing patterns in biological time series.,"When studying time courses of biological measurements and comparing these to other measurements eg. gene expression and phenotypic endpoints, the analysis is complicated by the fact that although the associated elements may show the same patterns of behaviour, the changes do not occur simultaneously. In these cases standard correlation-based measures of similarity will fail to find significant associations. Dynamic time warping (DTW) is a technique which can be used in these situations to find the optimal match between two time courses, which may then be assessed for its significance. We implement DTW4Omics, a tool for performing DTW in R. This tool extends existing R scripts for DTW making them applicable for ""omics"" datasets where thousands entities may need to be compared with a range of markers and endpoints. It includes facilities to estimate the significance of the matches between the supplied data, and provides a set of plots to enable the user to easily visualise the output. We illustrate the utility of this approach using a dataset linking the exposure of the colon carcinoma Caco-2 cell line to oxidative stress by hydrogen peroxide (H2O2) and menadione across 9 timepoints and show that on average 85% of the genes found are not obtained from a standard correlation analysis between the genes and the measured phenotypic endpoints. We then show that when we analyse the genes identified by DTW4Omics as significantly associated with a marker for oxidative DNA damage (8-oxodG), through over-representation, an Oxidative Stress pathway is identified as the most over-represented pathway demonstrating that the genes found by DTW4Omics are biologically relevant. In contrast, when the positively correlated genes were similarly analysed, no pathways were found. The tool is implemented as an R Package and is available, along with a user guide from http://web.tgx.unimaas.nl/svn/public/dtw/.",2013-08-20 +21317138,Automated analysis of biological oscillator models using mode decomposition.,"

Motivation

Oscillating signals produced by biological systems have shapes, described by their Fourier spectra, that can potentially reveal the mechanisms that generate them. Extracting this information from measured signals is interesting for the validation of theoretical models, discovery and classification of interaction types, and for optimal experiment design.

Results

An automated workflow is described for the analysis of oscillating signals. A software package is developed to match signal shapes to hundreds of a priori viable model structures defined by a class of first-order differential equations. The package computes parameter values for each model by exploiting the mode decomposition of oscillating signals and formulating the matching problem in terms of systems of simultaneous polynomial equations. On the basis of the computed parameter values, the software returns a list of models consistent with the data. In validation tests with synthetic datasets, it not only shortlists those model structures used to generate the data but also shows that excellent fits can sometimes be achieved with alternative equations. The listing of all consistent equations is indicative of how further invalidation might be achieved with additional information. When applied to data from a microarray experiment on mice, the procedure finds several candidate model structures to describe interactions related to the circadian rhythm. This shows that experimental data on oscillators is indeed rich in information about gene regulation mechanisms.

Availability

The software package is available at http://babylone.ulb.ac.be/autoosc/.",2011-02-10 +22351181,Data (pre-)processing of nominal and accurate mass LC-MS or GC-MS data using MetAlign.,"This paper gives a step-by-step account of how to install, set up, and run MetAlign software, which can be downloaded freely ( http://www.metalign.wur.nl/UK/Download+and+publications ). The software is used for accurate mass and nominal mass data coming from different kinds of GC-MS and LC-MS platforms. The algorithms are beyond the scope of this paper and were published separately.",2012-01-01 +21998153,Fast scaffolding with small independent mixed integer programs.,"

Motivation

Assembling genomes from short read data has become increasingly popular, but the problem remains computationally challenging especially for larger genomes. We study the scaffolding phase of sequence assembly where preassembled contigs are ordered based on mate pair data.

Results

We present MIP Scaffolder that divides the scaffolding problem into smaller subproblems and solves these with mixed integer programming. The scaffolding problem can be represented as a graph and the biconnected components of this graph can be solved independently. We present a technique for restricting the size of these subproblems so that they can be solved accurately with mixed integer programming. We compare MIP Scaffolder to two state of the art methods, SOPRA and SSPACE. MIP Scaffolder is fast and produces better or as good scaffolds as its competitors on large genomes.

Availability

The source code of MIP Scaffolder is freely available at http://www.cs.helsinki.fi/u/lmsalmel/mip-scaffolder/.

Contact

leena.salmela@cs.helsinki.fi.",2011-10-13 +22426492,Unsupervised pattern discovery in human chromatin structure through genomic segmentation.,"We trained Segway, a dynamic Bayesian network method, simultaneously on chromatin data from multiple experiments, including positions of histone modifications, transcription-factor binding and open chromatin, all derived from a human chronic myeloid leukemia cell line. In an unsupervised fashion, we identified patterns associated with transcription start sites, gene ends, enhancers, transcriptional regulator CTCF-binding regions and repressed regions. Software and genome browser tracks are at http://noble.gs.washington.edu/proj/segway/.",2012-03-18 +24325792,"The distinct expression patterns of claudin-10, -14, -17 and E-cadherin between adjacent non-neoplastic tissues and gastric cancer tissues.","

Background

Recent data indicate that the cell adhesion proteins are abnormally regulated in several human cancers and the expression of the cell adhesion proteins E-cadherin and claudin proteins is involved in the etiology and progression of cancer. It is clear that these protein represent promising targets for cancer detection, diagnosis, and therapy.

Methods

To explore the expression distinction of the cell adhesion proteins claudin-10,-14,-17 and E-cadherin in the adjacent non-neoplastic tissues and gastric cancer tissues, 50 gastric cancer tissues and 50 samples of adjacent non-neoplastic tissues adjacent to the tumors were examined for expression of claudin-10,-14,-17 and E-cadherin by streptavidin-perosidase immunohistochemical staining method.

Results

The positive expression rates of E-cadherin in gastric cancer tissues and adjacent non-neoplastic tissues were 32% and 74% respectively (P < 0.01). The positive expression rates of claudin-10 in gastric cancer tissues and adjacent non-neoplastic tissues were 24% and 72% respectively (P < 0.01). The positive expression rates of claudin-17 in gastric cancer tissues and adjacent non-neoplastic tissues were 18% and 70% (P < 0.01). In contrast, the positive expression rates of claudin-14 in gastric cancer tissues and adjacent non-neoplastic tissues were 58% and 24% respectively (P = 0.015 < 0.05) Thus in our study, the expression of E-cadherin, claudin-10, and claudin-17 was down-regulated in gastric cancer tissue while the expression of claudin-14 was up-regulated. Correlations between claudins and E-cadherin expression with lymphatic metastasis were observed.

Conclusion

Our study reveals that the expression of E-cadherin, claudin-10, and claudin-17 were down-regulated in gastric cancer tissue while the expression of claudin-14 was up-regulated and correlation between claudins and E-cadherin expression with lymphatic metastasis were observed.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1475928069111326.",2013-12-10 +22954630,SAPIN: a framework for the structural analysis of protein interaction networks.,"

Summary

Protein interaction networks are widely used to depict the relationships between proteins. These networks often lack the information on physical binary interactions, and they do not inform whether there is incompatibility of structure between binding partners. Here, we introduce SAPIN, a framework dedicated to the structural analysis of protein interaction networks. SAPIN first identifies the protein parts that could be involved in the interaction and provides template structures. Next, SAPIN performs structural superimpositions to identify compatible and mutually exclusive interactions. Finally, the results are displayed using Cytoscape Web.

Availability

The SAPIN server is available at http://sapin.crg.es.

Contact

jae-seong.yang@crg.eu or christina.kiel@crg.eu.

Supplementary information

Supplementary data are available at Bioinformatics Online.",2012-09-06 +23422339,QUAST: quality assessment tool for genome assemblies.,"

Summary

Limitations of genome sequencing techniques have led to dozens of assembly algorithms, none of which is perfect. A number of methods for comparing assemblers have been developed, but none is yet a recognized benchmark. Further, most existing methods for comparing assemblies are only applicable to new assemblies of finished genomes; the problem of evaluating assemblies of previously unsequenced species has not been adequately considered. Here, we present QUAST-a quality assessment tool for evaluating and comparing genome assemblies. This tool improves on leading assembly comparison software with new ideas and quality metrics. QUAST can evaluate assemblies both with a reference genome, as well as without a reference. QUAST produces many reports, summary tables and plots to help scientists in their research and in their publications. In this study, we used QUAST to compare several genome assemblers on three datasets. QUAST tables and plots for all of them are available in the Supplementary Material, and interactive versions of these reports are on the QUAST website.

Availability

http://bioinf.spbau.ru/quast .

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-19 +24078704,Optimized atomic statistical potentials: assessment of protein interfaces and loops.,"

Motivation

Statistical potentials have been widely used for modeling whole proteins and their parts (e.g. sidechains and loops) as well as interactions between proteins, nucleic acids and small molecules. Here, we formulate the statistical potentials entirely within a statistical framework, avoiding questionable statistical mechanical assumptions and approximations, including a definition of the reference state.

Results

We derive a general Bayesian framework for inferring statistically optimized atomic potentials (SOAP) in which the reference state is replaced with data-driven 'recovery' functions. Moreover, we restrain the relative orientation between two covalent bonds instead of a simple distance between two atoms, in an effort to capture orientation-dependent interactions such as hydrogen bonds. To demonstrate this general approach, we computed statistical potentials for protein-protein docking (SOAP-PP) and loop modeling (SOAP-Loop). For docking, a near-native model is within the top 10 scoring models in 40% of the PatchDock benchmark cases, compared with 23 and 27% for the state-of-the-art ZDOCK and FireDock scoring functions, respectively. Similarly, for modeling 12-residue loops in the PLOP benchmark, the average main-chain root mean square deviation of the best scored conformations by SOAP-Loop is 1.5 Å, close to the average root mean square deviation of the best sampled conformations (1.2 Å) and significantly better than that selected by Rosetta (2.1 Å), DFIRE (2.3 Å), DOPE (2.5 Å) and PLOP scoring functions (3.0 Å). Our Bayesian framework may also result in more accurate statistical potentials for additional modeling applications, thus affording better leverage of the experimentally determined protein structures.

Availability and implementation

SOAP-PP and SOAP-Loop are available as part of MODELLER (http://salilab.org/modeller).",2013-09-27 +21979549,Community Environmental Monitoring Program: a case study of public education and involvement in radiological monitoring.,"The public's trust in the source of information about radiation is a key element of its acceptance. The public tends to trust two groups where risk communication is concerned: (1) scientists with expertise who are viewed as acting independently; and (2) friends, family, and other close associates who are viewed as sharing the same interests and concern, even if they have less knowledge of the subject. The Community Environmental Monitoring Program (CEMP) bridges both of these groups by having members of the public help operate and communicate results of a network of 29 radiation monitoring stations around the Nevada National Security Site (NNSS), formerly known as the Nevada Test Site (NTS), the principal continental location where the United States conducted nuclear tests. The CEMP stations, spread across a 160,000 km area, help provide evidence to the public that no releases of radiation of health concern are occurring from the NNSS to public receptors. The stations provide continuous measurements of gamma radiation and collect air particulate samples that are analyzed for radioactivity and meteorological measurements that aid in interpreting variations in background radiation. A public website (http://cemp.dri.edu) provides data for most instruments. Twenty-three of the 29 stations upload their data in near-real time to a public website as well as to digital readout displays at the stations, both of which are key elements in the CEMP's transparency. The remaining six stations upload their data hourly. Public stakeholders who are direct participants provide the most significant element of the CEMP. The ""Community Environmental Monitors,"" who are residents of towns where the stations are located, are part of the chain-of-custody for the air samples, perform minor station maintenance, and most significantly in terms of trust, serve as lay experts on issues concerning the NNSS and on ionizing radiation and nuclear technologies in general. The CEMP meets nearly all of the principles for stakeholder engagement identified by the International Radiation Protection Association.",2011-11-01 +23813003,Using state machines to model the Ion Torrent sequencing process and to improve read error rates.,"

Motivation

The importance of fast and affordable DNA sequencing methods for current day life sciences, medicine and biotechnology is hard to overstate. A major player is Ion Torrent, a pyrosequencing-like technology which produces flowgrams--sequences of incorporation values--which are converted into nucleotide sequences by a base-calling algorithm. Because of its exploitation of ubiquitous semiconductor technology and innovation in chemistry, Ion Torrent has been gaining popularity since its debut in 2011. Despite the advantages, however, Ion Torrent read accuracy remains a significant concern.

Results

We present FlowgramFixer, a new algorithm for converting flowgrams into reads. Our key observation is that the incorporation signals of neighboring flows, even after normalization and phase correction, carry considerable mutual information and are important in making the correct base-call. We therefore propose that base-calling of flowgrams should be done on a read-wide level, rather than one flow at a time. We show that this can be done in linear-time by combining a state machine with a Viterbi algorithm to find the nucleotide sequence that maximizes the likelihood of the observed flowgram. FlowgramFixer is applicable to any flowgram-based sequencing platform. We demonstrate FlowgramFixer's superior performance on Ion Torrent Escherichia coli data, with a 4.8% improvement in the number of high-quality mapped reads and a 7.1% improvement in the number of uniquely mappable reads.

Availability

Binaries and source code of FlowgramFixer are freely available at: http://www.cs.tau.ac.il/~davidgo5/flowgramfixer.html.",2013-07-01 +21685107,Meta-IDBA: a de Novo assembler for metagenomic data.,"

Motivation

Next-generation sequencing techniques allow us to generate reads from a microbial environment in order to analyze the microbial community. However, assembling of a set of mixed reads from different species to form contigs is a bottleneck of metagenomic research. Although there are many assemblers for assembling reads from a single genome, there are no assemblers for assembling reads in metagenomic data without reference genome sequences. Moreover, the performances of these assemblers on metagenomic data are far from satisfactory, because of the existence of common regions in the genomes of subspecies and species, which make the assembly problem much more complicated.

Results

We introduce the Meta-IDBA algorithm for assembling reads in metagenomic data, which contain multiple genomes from different species. There are two core steps in Meta-IDBA. It first tries to partition the de Bruijn graph into isolated components of different species based on an important observation. Then, for each component, it captures the slight variants of the genomes of subspecies from the same species by multiple alignments and represents the genome of one species, using a consensus sequence. Comparison of the performances of Meta-IDBA and existing assemblers, such as Velvet and Abyss for different metagenomic datasets shows that Meta-IDBA can reconstruct longer contigs with similar accuracy.

Availability

Meta-IDBA toolkit is available at our website http://www.cs.hku.hk/~alse/metaidba.

Contact

chin@cs.hku.hk.",2011-07-01 +22223445,Phylogenetic inference via sequential Monte Carlo.,"Bayesian inference provides an appealing general framework for phylogenetic analysis, able to incorporate a wide variety of modeling assumptions and to provide a coherent treatment of uncertainty. Existing computational approaches to bayesian inference based on Markov chain Monte Carlo (MCMC) have not, however, kept pace with the scale of the data analysis problems in phylogenetics, and this has hindered the adoption of bayesian methods. In this paper, we present an alternative to MCMC based on Sequential Monte Carlo (SMC). We develop an extension of classical SMC based on partially ordered sets and show how to apply this framework--which we refer to as PosetSMC--to phylogenetic analysis. We provide a theoretical treatment of PosetSMC and also present experimental evaluation of PosetSMC on both synthetic and real data. The empirical results demonstrate that PosetSMC is a very promising alternative to MCMC, providing up to two orders of magnitude faster convergence. We discuss other factors favorable to the adoption of PosetSMC in phylogenetics, including its ability to estimate marginal likelihoods, its ready implementability on parallel and distributed computing platforms, and the possibility of combining with MCMC in hybrid MCMC-SMC schemes. Software for PosetSMC is available at http://www.stat.ubc.ca/ bouchard/PosetSMC.",2012-01-04 +21317188,Ranking causal variants and associated regions in genome-wide association studies by the support vector machine and random forest.,"We study the number of causal variants and associated regions identified by top SNPs in rankings given by the popular 1 df chi-squared statistic, support vector machine (SVM) and the random forest (RF) on simulated and real data. If we apply the SVM and RF to the top 2r chi-square-ranked SNPs, where r is the number of SNPs with P-values within the Bonferroni correction, we find that both improve the ranks of causal variants and associated regions and achieve higher power on simulated data. These improvements, however, as well as stability of the SVM and RF rankings, progressively decrease as the cutoff increases to 5r and 10r. As applications we compare the ranks of previously replicated SNPs in real data, associated regions in type 1 diabetes, as provided by the Type 1 Diabetes Consortium, and disease risk prediction accuracies as given by top ranked SNPs by the three methods. Software and webserver are available at http://svmsnps.njit.edu.",2011-02-11 +22283880,"P.Re.Val.E.: outcome research program for the evaluation of health care quality in Lazio, Italy.","

Background

P.Re.Val.E. is the most comprehensive comparative evaluation program of healthcare outcomes in Lazio, an Italian region, and the first Italian study to make health provider performance data available to the public. The aim of this study is to describe the P.Re.Val.E. and the impact of releasing performance data to the public.

Methods

P.Re.Val.E. included 54 outcome/process indicators encompassing many different clinical areas. Crude and adjusted rates were estimated for the 2006-2009 period. Multivariate regression models and direct standardization procedures were used to control for potential confounding due to individual characteristics. Variable life-adjusted display charts were developed, and 2008-2009 results were compared with those from 2006-2007.

Results

Results of 54 outcome indicators were published online at http://www.epidemiologia.lazio.it/prevale10/index.php. Public disclosure of the indicators' results caused mixed reactions but finally promoted discussion and refinement of some indicators. Based on the P.Re.Val.E. experience, the Italian National Agency for Regional Health Services has launched a National Outcome Program aimed at systematically comparing outcomes in hospitals and local health units in Italy.

Conclusions

P.Re.Val.E. highlighted aspects of patient care that merit further investigation and monitoring to improve healthcare services and equity.",2012-01-27 +23062237,CORAL: binary classifications (active/inactive) for Liver-Related Adverse Effects of Drugs.,"Classification data related to the Liver-Related Adverse Effects of Drugs have been studied with the CORAL software (http://www.insilico.eu/coral). Two datasets which contain compounds with two serum enzyme markers of liver toxicity: alanine aminotransferase (ALT, n=187) and aspartate aminotransferase (AST, n=209) are analyzed. Statistical quality of the prediction for ALT activity is n=35, Sensitivity = 0.5556, Specificity = 0.8077, and Accuracy = 0.7429. In the case of AST activity the prediction is characterized by n=42, Sensitivity = 0.6875, Specificity = 0.7692, and Accuracy = 0.7381. A number of structural alerts which can be related to the studied activities are revealed. It is the first attempt to build up the classification QSAR model by means of the Monte Carlo technique based on representation of the molecular structure by SMILES using the CORAL software.",2012-09-01 +23589650,Shrinkage estimation of dispersion in Negative Binomial models for RNA-seq experiments with small sample size.,"

Motivation

RNA-seq experiments produce digital counts of reads that are affected by both biological and technical variation. To distinguish the systematic changes in expression between conditions from noise, the counts are frequently modeled by the Negative Binomial distribution. However, in experiments with small sample size, the per-gene estimates of the dispersion parameter are unreliable.

Method

We propose a simple and effective approach for estimating the dispersions. First, we obtain the initial estimates for each gene using the method of moments. Second, the estimates are regularized, i.e. shrunk towards a common value that minimizes the average squared difference between the initial estimates and the shrinkage estimates. The approach does not require extra modeling assumptions, is easy to compute and is compatible with the exact test of differential expression.

Results

We evaluated the proposed approach using 10 simulated and experimental datasets and compared its performance with that of currently popular packages edgeR, DESeq, baySeq, BBSeq and SAMseq. For these datasets, sSeq performed favorably for experiments with small sample size in sensitivity, specificity and computational time.

Availability

http://www.stat.purdue.edu/∼ovitek/Software.html and Bioconductor.

Contact

ovitek@purdue.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-14 +22618877,TaxMan: a server to trim rRNA reference databases and inspect taxonomic coverage.,"Amplicon sequencing of the hypervariable regions of the small subunit ribosomal RNA gene is a widely accepted method for identifying the members of complex bacterial communities. Several rRNA gene sequence reference databases can be used to assign taxonomic names to the sequencing reads using BLAST, USEARCH, GAST or the RDP classifier. Next-generation sequencing methods produce ample reads, but they are short, currently ∼100-450 nt (depending on the technology), as compared to the full rRNA gene of ∼1550 nt. It is important, therefore, to select the right rRNA gene region for sequencing. The primers should amplify the species of interest and the hypervariable regions should differentiate their taxonomy. Here, we introduce TaxMan: a web-based tool that trims reference sequences based on user-selected primer pairs and returns an assessment of the primer specificity by taxa. It allows interactive plotting of taxa, both amplified and missed in silico by the primers used. Additionally, using the trimmed sequences improves the speed of sequence matching algorithms. The smaller database greatly improves run times (up to 98%) and memory usage, not only of similarity searching (BLAST), but also of chimera checking (UCHIME) and of clustering the reads (UCLUST). TaxMan is available at http://www.ibi.vu.nl/programs/taxmanwww/.",2012-05-22 +21388526,QiSampler: evaluation of scoring schemes for high-throughput datasets using a repetitive sampling strategy on gold standards.,"

Background

High-throughput biological experiments can produce a large amount of data showing little overlap with current knowledge. This may be a problem when evaluating alternative scoring mechanisms for such data according to a gold standard dataset because standard statistical tests may not be appropriate.

Findings

To address this problem we have implemented the QiSampler tool that uses a repetitive sampling strategy to evaluate several scoring schemes or experimental parameters for any type of high-throughput data given a gold standard. We provide two example applications of the tool: selection of the best scoring scheme for a high-throughput protein-protein interaction dataset by comparison to a dataset derived from the literature, and evaluation of functional enrichment in a set of tumour-related differentially expressed genes from a thyroid microarray dataset.

Conclusions

QiSampler is implemented as an open source R script and a web server, which can be accessed at http://cbdm.mdc-berlin.de/tools/sampler/.",2011-03-09 +24320447,CONRAD--a software framework for cone-beam imaging in radiology.,"

Purpose

In the community of x-ray imaging, there is a multitude of tools and applications that are used in scientific practice. Many of these tools are proprietary and can only be used within a certain lab. Often the same algorithm is implemented multiple times by different groups in order to enable comparison. In an effort to tackle this problem, the authors created CONRAD, a software framework that provides many of the tools that are required to simulate basic processes in x-ray imaging and perform image reconstruction with consideration of nonlinear physical effects.

Methods

CONRAD is a Java-based state-of-the-art software platform with extensive documentation. It is based on platform-independent technologies. Special libraries offer access to hardware acceleration such as OpenCL. There is an easy-to-use interface for parallel processing. The software package includes different simulation tools that are able to generate up to 4D projection and volume data and respective vector motion fields. Well known reconstruction algorithms such as FBP, DBP, and ART are included. All algorithms in the package are referenced to a scientific source.

Results

A total of 13 different phantoms and 30 processing steps have already been integrated into the platform at the time of writing. The platform comprises 74.000 nonblank lines of code out of which 19% are used for documentation. The software package is available for download at http://conrad.stanford.edu. To demonstrate the use of the package, the authors reconstructed images from two different scanners, a table top system and a clinical C-arm system. Runtimes were evaluated using the RabbitCT platform and demonstrate state-of-the-art runtimes with 2.5 s for the 256 problem size and 12.4 s for the 512 problem size.

Conclusions

As a common software framework, CONRAD enables the medical physics community to share algorithms and develop new ideas. In particular this offers new opportunities for scientific collaboration and quantitative performance comparison between the methods of different groups.",2013-11-01 +24398116,Development of a microarray for two rice subspecies: characterization and validation of gene expression in rice tissues.,"

Background

Rice is one of the major crop species in the world helping to sustain approximately half of the global population's diet especially in Asia. However, due to the impact of extreme climate change and global warming, rice crop production and yields may be adversely affected resulting in a world food crisis. Researchers have been keen to understand the effects of drought, temperature and other environmental stress factors on rice plant growth and development. Gene expression microarray technology represents a key strategy for the identification of genes and their associated expression patterns in response to stress. Here, we report on the development of the rice OneArray® microarray platform which is suitable for two major rice subspecies, japonica and indica.

Results

The rice OneArray® 60-mer, oligonucleotide microarray consists of a total of 21,179 probes covering 20,806 genes of japonica and 13,683 genes of indica. Through a validation study, total RNA isolated from rice shoots and roots were used for comparison of gene expression profiles via microarray examination. The results were submitted to NCBI's Gene Expression Omnibus (GEO). Data can be found under the GEO accession number GSE50844 (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE50844). A list of significantly differentially expressed genes was generated; 438 shoot-specific genes were identified among 3,138 up-regulated genes, and 463 root-specific genes were found among 3,845 down-regulated genes. GO enrichment analysis demonstrates these results are in agreement with the known physiological processes of the different organs/tissues. Furthermore, qRT-PCR validation was performed on 66 genes, and found to significantly correlate with the microarray results (R = 0.95, p < 0.001***).

Conclusion

The rice OneArray® 22 K microarray, the first rice microarray, covering both japonica and indica subspecies was designed and validated in a comprehensive study of gene expression in rice tissues. The rice OneArray® microarray platform revealed high specificity and sensitivity. Additional information for the rice OneArray® microarray can be found at http://www.phalanx.com.tw/index.php.",2014-01-08 +22101143,BSMac: a MATLAB toolbox implementing a Bayesian spatial model for brain activation and connectivity.,"We present a statistical and graphical visualization MATLAB toolbox for the analysis of functional magnetic resonance imaging (fMRI) data, called the Bayesian Spatial Model for activation and connectivity (BSMac). BSMac simultaneously performs whole-brain activation analyses at the voxel and region of interest (ROI) levels as well as task-related functional connectivity (FC) analyses using a flexible Bayesian modeling framework (Bowman et al., 2008). BSMac allows for inputting data in either Analyze or Nifti file formats. The user provides information pertaining to subgroup memberships, scanning sessions, and experimental tasks (stimuli), from which the design matrix is constructed. BSMac then performs parameter estimation based on Markov Chain Monte Carlo (MCMC) methods and generates plots for activation and FC, such as interactive 2D maps of voxel and region-level task-related changes in neural activity and animated 3D graphics of the FC results. The toolbox can be downloaded from http://www.sph.emory.edu/bios/CBIS/. We illustrate the BSMac toolbox through an application to an fMRI study of working memory in patients with schizophrenia.",2011-11-10 +22962459,"Fractionation, rearrangement and subgenome dominance.","

Motivation

Fractionation is arguably the greatest cause of gene order disruption following whole genome duplication, causing severe biases in chromosome rearrangement-based estimates of evolutionary divergence.

Results

We show how to correct for this bias almost entirely by means of a 'consolidation' algorithm for detecting and suitably transforming identifiable regions of fractionation. We characterize the process of fractionation and the performance of the algorithm through realistic simulations. We apply our method to a number of core eudicot genomes, we and by studying the fractionation regions detected, are able to address topical issues in polyploid evolution.

Availability and implementation

Code for the consolidation algorithm, and sample data, is available at: http://137.122.149.195/Software/Fractionation/fractionation.html

Contact

sankoff@uottawa.ca.",2012-09-01 +22833229,Including shared peptides for estimating protein abundances: a significant improvement for quantitative proteomics.,"Inferring protein abundances from peptide intensities is the key step in quantitative proteomics. The inference is necessarily more accurate when many peptides are taken into account for a given protein. Yet, the information brought by the peptides shared by different proteins is commonly discarded. We propose a statistical framework based on a hierarchical modeling to include that information. Our methodology, based on a simultaneous analysis of all the quantified peptides, handles the biological and technical errors as well as the peptide effect. In addition, we propose a practical implementation suitable for analyzing large data sets. Compared to a method based on the analysis of one protein at a time (that does not include shared peptides), our methodology proved to be far more reliable for estimating protein abundances and testing abundance changes. The source codes are available at http://pappso.inra.fr/bioinfo/all_p/.",2012-09-01 +21936916,Hierarchical parallelization of gene differential association analysis.,"

Background

Microarray gene differential expression analysis is a widely used technique that deals with high dimensional data and is computationally intensive for permutation-based procedures. Microarray gene differential association analysis is even more computationally demanding and must take advantage of multicore computing technology, which is the driving force behind increasing compute power in recent years. In this paper, we present a two-layer hierarchical parallel implementation of gene differential association analysis. It takes advantage of both fine- and coarse-grain (with granularity defined by the frequency of communication) parallelism in order to effectively leverage the non-uniform nature of parallel processing available in the cutting-edge systems of today.

Results

Our results show that this hierarchical strategy matches data sharing behavior to the properties of the underlying hardware, thereby reducing the memory and bandwidth needs of the application. The resulting improved efficiency reduces computation time and allows the gene differential association analysis code to scale its execution with the number of processors. The code and biological data used in this study are downloadable from http://www.urmc.rochester.edu/biostat/people/faculty/hu.cfm.

Conclusions

The performance sweet spot occurs when using a number of threads per MPI process that allows the working sets of the corresponding MPI processes running on the multicore to fit within the machine cache. Hence, we suggest that practitioners follow this principle in selecting the appropriate number of MPI processes and threads within each MPI process for their cluster configurations. We believe that the principles of this hierarchical approach to parallelization can be utilized in the parallelization of other computationally demanding kernels.",2011-09-21 +24281688,An extended genovo metagenomic assembler by incorporating paired-end information.,"Metagenomes present assembly challenges, when assembling multiple genomes from mixed reads of multiple species. An assembler for single genomes can't adapt well when applied in this case. A metagenomic assembler, Genovo, is a de novo assembler for metagenomes under a generative probabilistic model. Genovo assembles all reads without discarding any reads in a preprocessing step, and is therefore able to extract more information from metagenomic data and, in principle, generate better assembly results. Paired end sequencing is currently widely-used yet Genovo was designed for 454 single end reads. In this research, we attempted to extend Genovo by incorporating paired-end information, named Xgenovo, so that it generates higher quality assemblies with paired end reads. First, we extended Genovo by adding a bonus parameter in the Chinese Restaurant Process used to get prior accounts for the unknown number of genomes in the sample. This bonus parameter intends for a pair of reads to be in the same contig and as an effort to solve chimera contig case. Second, we modified the sampling process of the location of a read in a contig. We used relative distance for the number of trials in the symmetric geometric distribution instead of using distance between the offset and the center of contig used in Genovo. Using this relative distance, a read sampled in the appropriate location has higher probability. Therefore a read will be mapped in the correct location. Results of extensive experiments on simulated metagenomic datasets from simple to complex with species coverage setting following uniform and lognormal distribution showed that Xgenovo can be superior to the original Genovo and the recently proposed metagenome assembler for 454 reads, MAP. Xgenovo successfully generated longer N50 than Genovo and MAP while maintaining the assembly quality even for very complex metagenomic datasets consisting of 115 species. Xgenovo also demonstrated the potential to decrease the computational cost. This means that our strategy worked well. The software and all simulated datasets are publicly available online at http://xgenovo.dna.bio.keio.ac.jp.",2013-10-31 +22613085,HAltORF: a database of predicted out-of-frame alternative open reading frames in human.,"Human alternative open reading frames (HAltORF) is a publicly available and searchable online database referencing putative products of out-of-frame alternative translation initiation (ATI) in human mRNAs. Out-of-frame ATI is a process by which a single mRNA encodes independent proteins, when distinct initiation codons located in different reading frames are recognized by a ribosome to initiate translation. This mechanism is largely used in viruses to increase the coding potential of small viral genomes. There is increasing evidence that out-of-frame ATI is also used in eukaryotes, including human, and may contribute to the diversity of the human proteome. HAltORF is the first web-based searchable database that allows thorough investigation in the human transcriptome of out-of-frame alternative open reading frames with a start codon located in a strong Kozak context, and are thus the more likely to be expressed. It is also the first large scale study on the human transcriptome to successfully predict the expression of out-of-frame ATI protein products that were previously discovered experimentally. HAltORF will be a useful tool for the identification of human genes with multiple coding sequences, and will help to better define and understand the complexity of the human proteome. Database URL: http://haltorf.roucoulab.com/.",2012-05-20 +22607234,Multiple structure alignment with msTALI.,"

Background

Multiple structure alignments have received increasing attention in recent years as an alternative to multiple sequence alignments. Although multiple structure alignment algorithms can potentially be applied to a number of problems, they have primarily been used for protein core identification. A method that is capable of solving a variety of problems using structure comparison is still absent. Here we introduce a program msTALI for aligning multiple protein structures. Our algorithm uses several informative features to guide its alignments: torsion angles, backbone Cα atom positions, secondary structure, residue type, surface accessibility, and properties of nearby atoms. The algorithm allows the user to weight the types of information used to generate the alignment, which expands its utility to a wide variety of problems.

Results

msTALI exhibits competitive results on 824 families from the Homstrad and SABmark databases when compared to Matt and Mustang. We also demonstrate success at building a database of protein cores using 341 randomly selected CATH domains and highlight the contribution of msTALI compared to the CATH classifications. Finally, we present an example applying msTALI to the problem of detecting hinges in a protein undergoing rigid-body motion.

Conclusions

msTALI is an effective algorithm for multiple structure alignment. In addition to its performance on standard comparison databases, it utilizes clear, informative features, allowing further customization for domain-specific applications. The C++ source code for msTALI is available for Linux on the web at http://ifestos.cse.sc.edu/mstali.",2012-05-20 +23505297,TfReg: calculating DNA and RNA melting temperatures and opening profiles with mesoscopic models.,"

Summary

The mesoscopic statistical physics models, known generically as Peyrard-Bishop (PB) models, have found many applications for the study of oligonucleotide properties. Unfortunately, PB models have not reached a wider non-specialized audience for the lack of freely available software implementations. Here we present an extensible C++ implementation of four variants of the PB model, which allows the user to calculate melting temperatures from tested model parameters. Even for a non-specialist, it should be straightforward to change these parameters to reflect different experimental environments or different types of oligonucleotides. For users with some proficiency in C++ programming, it should be feasible to extend the code to other PB models owing to the generic programming implementation adopted for TfReg. Pre-calculated parameters are included that allow the immediate calculation of melting temperatures and thermal equivalence indexes for DNA and RNA.

Availability

C++ source code and compiled binaries for several Linux distributions are available from https://sites.google.com/site/geraldweberufmg/tfreg and from OpenSuse build service at http://build.opensuse.org.

Contact

gweberbh@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-03-16 +22873568,Modeling RNA polymerase interaction in mitochondria of chordates.,"

Background

In previous work, we introduced a concept, a mathematical model and its computer realization that describe the interaction between bacterial and phage type RNA polymerases, protein factors, DNA and RNA secondary structures during transcription, including transcription initiation and termination. The model accurately reproduces changes of gene transcription level observed in polymerase sigma-subunit knockout and heat shock experiments in plant plastids. The corresponding computer program and a user guide are available at http://lab6.iitp.ru/en/rivals. Here we apply the model to the analysis of transcription and (partially) translation processes in the mitochondria of frog, rat and human. Notably, mitochondria possess only phage-type polymerases. We consider the entire mitochondrial genome so that our model allows RNA polymerases to complete more than one circle on the DNA strand.

Results

Our model of RNA polymerase interaction during transcription initiation and elongation accurately reproduces experimental data obtained for plastids. Moreover, it also reproduces evidence on bulk RNA concentrations and RNA half-lives in the mitochondria of frog, human with or without the MELAS mutation, and rat with normal (euthyroid) or hyposecretion of thyroid hormone (hypothyroid). The transcription characteristics predicted by the model include: (i) the fraction of polymerases terminating at a protein-dependent terminator in both directions (the terminator polarization), (ii) the binding intensities of the regulatory protein factor (mTERF) with the termination site and, (iii) the transcription initiation intensities (initiation frequencies) of all promoters in all five conditions (frog, healthy human, human with MELAS syndrome, healthy rat, and hypothyroid rat with aberrant mtDNA methylation). Using the model, absolute levels of all gene transcription can be inferred from an arbitrary array of the three transcription characteristics, whereas, for selected genes only relative RNA concentrations have been experimentally determined. Conversely, these characteristics and absolute transcription levels can be obtained using relative RNA concentrations and RNA half-lives known from various experimental studies. In this case, the ""inverse problem"" is solved with multi-objective optimization.

Conclusions

In this study, we demonstrate that our model accurately reproduces all relevant experimental data available for plant plastids, as well as the mitochondria of chordates. Using experimental data, the model is applied to estimate binding intensities of phage-type RNA polymerases to their promoters as well as predicting terminator characteristics, including polarization. In addition, one can predict characteristics of phage-type RNA polymerases and the transcription process that are difficult to measure directly, e.g., the association between the promoter's nucleotide composition and the intensity of polymerase binding. To illustrate the application of our model in functional predictions, we propose a possible mechanism for MELAS syndrome development in human involving a decrease of Phe-tRNA, Val-tRNA and rRNA concentrations in the cell. In addition, we describe how changes in methylation patterns of the mTERF binding site and three promoters in hypothyroid rat correlate with changes in intensities of the mTERF binding and transcription initiations. Finally, we introduce an auxiliary model to describe the interaction between polysomal mRNA and ribonucleases.",2012-08-09 +21742635,DivBayes and SubT: exploring species diversification using Bayesian statistics.,"

Summary

DivBayes is a program to estimate diversification rates from species richness and ages of a set of clades. SubT estimates diversification rates from node heights within a clade. Both programs implement Bayesian statistics and provide the ability to account for uncertainty in the ages of taxa in the underlying data, an improvement over more commonly used maximum likelihood methods.

Availability

DivBayes and SubT are released as C++ source code under the GNU GPL v. 3 software license in Supplementary information 1 and 2, respectively, and at http://web.utk.edu/~kryberg/. They have been successfully compiled on various Linux, MacOS X and Windows systems.

Contact

kryberg@utk.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-07 +23585274,Accurate detection of differential RNA processing.,"Deep transcriptome sequencing (RNA-Seq) has become a vital tool for studying the state of cells in the context of varying environments, genotypes and other factors. RNA-Seq profiling data enable identification of novel isoforms, quantification of known isoforms and detection of changes in transcriptional or RNA-processing activity. Existing approaches to detect differential isoform abundance between samples either require a complete isoform annotation or fall short in providing statistically robust and calibrated significance estimates. Here, we propose a suite of statistical tests to address these open needs: a parametric test that uses known isoform annotations to detect changes in relative isoform abundance and a non-parametric test that detects differential read coverages and can be applied when isoform annotations are not available. Both methods account for the discrete nature of read counts and the inherent biological variability. We demonstrate that these tests compare favorably to previous methods, both in terms of accuracy and statistical calibrations. We use these techniques to analyze RNA-Seq libraries from Arabidopsis thaliana and Drosophila melanogaster. The identified differential RNA processing events were consistent with RT-qPCR measurements and previous studies. The proposed toolkit is available from http://bioweb.me/rdiff and enables in-depth analyses of transcriptomes, with or without available isoform annotation.",2013-04-12 +22611129,Efficient RNA pairwise structure comparison by SETTER method.,"

Motivation

Understanding the architecture and function of RNA molecules requires methods for comparing and analyzing their 3D structures. Although a structural alignment of short RNAs is achievable in a reasonable amount of time, large structures represent much bigger challenge. However, the growth of the number of large RNAs deposited in the PDB database calls for the development of fast and accurate methods for analyzing their structures, as well as for rapid similarity searches in databases.

Results

In this article a novel algorithm for an RNA structural comparison SETTER (SEcondary sTructure-based TERtiary Structure Similarity Algorithm) is introduced. SETTER uses a pairwise comparison method based on 3D similarity of the so-called generalized secondary structure units. For each pair of structures, SETTER produces a distance score and an indication of its statistical significance. SETTER can be used both for the structural alignments of structures that are already known to be homologous, as well as for 3D structure similarity searches and functional annotation. The algorithm presented is both accurate and fast and does not impose limits on the size of aligned RNA structures.

Availability

The SETTER program, as well as all datasets, is freely available from http://siret.cz/hoksza/projects/setter/.",2012-05-18 +23652426,InterEvScore: a novel coarse-grained interface scoring function using a multi-body statistical potential coupled to evolution.,"

Motivation

Structural prediction of protein interactions currently remains a challenging but fundamental goal. In particular, progress in scoring functions is critical for the efficient discrimination of near-native interfaces among large sets of decoys. Many functions have been developed using knowledge-based potentials, but few make use of multi-body interactions or evolutionary information, although multi-residue interactions are crucial for protein-protein binding and protein interfaces undergo significant selection pressure to maintain their interactions.

Results

This article presents InterEvScore, a novel scoring function using a coarse-grained statistical potential including two- and three-body interactions, which provides each residue with the opportunity to contribute in its most favorable local structural environment. Combination of this potential with evolutionary information considerably improves scoring results on the 54 test cases from the widely used protein docking benchmark for which evolutionary information can be collected. We analyze how our way to include evolutionary information gradually increases the discriminative power of InterEvScore. Comparison with several previously published scoring functions (ZDOCK, ZRANK and SPIDER) shows the significant progress brought by InterEvScore.

Availability

http://biodev.cea.fr/interevol/interevscore

Contact

guerois@cea.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-06 +24176797,"The effect of a glucagon-like peptide-1 receptor agonist on glucose tolerance in women with previous gestational diabetes mellitus: protocol for an investigator-initiated, randomised, placebo-controlled, double-blinded, parallel intervention trial.","

Introduction

Pregnancy is associated with decreased insulin sensitivity, which is usually overcome by a compensatory increase in insulin secretion. Some pregnant women are not able to increase their insulin secretion sufficiently, and consequently develop gestational diabetes mellitus (GDM). The disease normally disappears after delivery. Nevertheless, women with previous GDM have a high risk of developing type 2 diabetes (T2D) later in life. We aim to investigate the early development of T2D in women with previous GDM and to evaluate whether treatment with the glucagon-like peptide-1 receptor (GLP-1R) agonist, liraglutide, may modify their risk of developing T2D.

Methods and analyses

100 women with previous GDM will be randomised to either liraglutide or placebo treatment for 1 year (blinded) with an open-label extension for another 4 years. Additionally, 15 women without previous GDM will constitute a baseline control group. Women will be tested with an oral glucose tolerance test (primary endpoint: area under the curve for plasma glucose) and an isoglycaemic intravenous glucose infusion at baseline, after 1 year and after 5 years. Additional evaluations include a glucagon test, dual-energy X-ray absorptiometry, imaging of the liver (ultrasound elastography and fibroscanning), an ad libitum meal for food intake evaluation and questionnaires related to appetite, quality of life and alcohol consumption habits.

Ethics and dissemination

The protocol has been approved by the Danish Medicines Agency, the Scientific-Ethical Committee of the Capital Region of Denmark, and the Danish Data Protection Agency and will be carried out under the surveillance and guidance of the GCP unit at Copenhagen University Hospital Bispebjerg in compliance with the ICH-GCP guidelines and in accordance with the Helsinki Declaration. Positive, negative and inconclusive results will be published at scientific conferences and as one or more scientific manuscripts in peer-reviewed journals.

Registrations

The trial is registered at https://eudract.ema.europa.eu (2012-001371-37) and http://www.clinicaltrials.gov (NCT01795248).",2013-10-30 +21592310,"Kakusan4 and Aminosan: two programs for comparing nonpartitioned, proportional and separate models for combined molecular phylogenetic analyses of multilocus sequence data.","Proportional and separate models able to apply different combination of substitution rate matrix (SRM) and among-site rate variation model (ASRVM) to each locus are frequently used in phylogenetic studies of multilocus data. A proportional model assumes that branch lengths are proportional among partitions and a separate model assumes that each partition has an independent set of branch lengths. However, the selection from among nonpartitioned (i.e., a common combination of models is applied to all-loci concatenated sequences), proportional and separate models is usually based on the researcher's preference rather than on any information criteria. This study describes two programs, 'Kakusan4' (for DNA sequences) and 'Aminosan' (for amino-acid sequences), which allow the selection of evolutionary models based on several types of information criteria. The programs can handle both multilocus and single-locus data, in addition to providing an easy-to-use wizard interface and a noninteractive command line interface. In the case of multilocus data, SRMs and ASRVMs are compared at each locus and at all-loci concatenated sequences, after which nonpartitioned, proportional and separate models are compared based on information criteria. The programs also provide model configuration files for mrbayes, paup*, phyml, raxml and Treefinder to support further phylogenetic analysis using a selected model. When likelihoods are optimized by Treefinder, the best-fit models were found to differ depending on the data set. Furthermore, differences in the information criteria among nonpartitioned, proportional and separate models were much larger than those among the nonpartitioned models. These findings suggest that selecting from nonpartitioned, proportional and separate models results in a better phylogenetic tree. Kakusan4 and Aminosan are available at http://www.fifthdimension.jp/. They are licensed under gnugpl Ver.2, and are able to run on Windows, MacOS X and Linux.",2011-05-19 +21269452,Pre-processing and differential expression analysis of Agilent microRNA arrays using the AgiMicroRna Bioconductor library.,"

Background

The main research tool for identifying microRNAs involved in specific cellular processes is gene expression profiling using microarray technology. Agilent is one of the major producers of microRNA arrays, and microarray data are commonly analyzed by using R and the functions and packages collected in the Bioconductor project. However, an analytical package that integrates the specific characteristics of microRNA Agilent arrays has been lacking.

Results

This report presents the new bioinformatic tool AgiMicroRNA for the pre-processing and differential expression analysis of Agilent microRNA array data. The software is implemented in the open-source statistical scripting language R and is integrated in the Bioconductor project (http://www.bioconductor.org) under the GPL license. For the pre-processing of the microRNA signal, AgiMicroRNA incorporates the robust multiarray average algorithm, a method that produces a summary measure of the microRNA expression using a linear model that takes into account the probe affinity effect. To obtain a normalized microRNA signal useful for the statistical analysis, AgiMicroRna offers the possibility of employing either the processed signal estimated by the robust multiarray average algorithm or the processed signal produced by the Agilent image analysis software. The AgiMicroRNA package also incorporates different graphical utilities to assess the quality of the data. AgiMicroRna uses the linear model features implemented in the limma package to assess the differential expression between different experimental conditions and provides links to the miRBase for those microRNAs that have been declared as significant in the statistical analysis.

Conclusions

AgiMicroRna is a rational collection of Bioconductor functions that have been wrapped into specific functions in order to ease and systematize the pre-processing and statistical analysis of Agilent microRNA data. The development of this package contributes to the Bioconductor project filling the gap in microRNA array data analysis.",2011-01-26 +22600737,ProBiS-2012: web server and web services for detection of structurally similar binding sites in proteins.,"The ProBiS web server is a web server for detection of structurally similar binding sites in the PDB and for local pairwise alignment of protein structures. In this article, we present a new version of the ProBiS web server that is 10 times faster than earlier versions, due to the efficient parallelization of the ProBiS algorithm, which now allows significantly faster comparison of a protein query against the PDB and reduces the calculation time for scanning the entire PDB from hours to minutes. It also features new web services, and an improved user interface. In addition, the new web server is united with the ProBiS-Database and thus provides instant access to pre-calculated protein similarity profiles for over 29 000 non-redundant protein structures. The ProBiS web server is particularly adept at detection of secondary binding sites in proteins. It is freely available at http://probis.cmm.ki.si/old-version, and the new ProBiS web server is at http://probis.cmm.ki.si.",2012-05-16 +22138362,Integrated annotation and analysis of genetic variants from next-generation sequencing studies with variant tools.,"

Motivation

Storing, annotating and analyzing variants from next-generation sequencing projects can be difficult due to the availability of a wide array of data formats, tools and annotation sources, as well as the sheer size of the data files. Useful tools, including the GATK, ANNOVAR and BEDTools can be integrated into custom pipelines for annotating and analyzing sequence variants. However, building flexible pipelines that support the tracking of variants alongside their samples, while enabling updated annotation and reanalyses, is not a simple task.

Results

We have developed variant tools, a flexible annotation and analysis toolset that greatly simplifies the storage, annotation and filtering of variants and the analysis of the underlying samples. variant tools can be used to manage and analyze genetic variants obtained from sequence alignments, and the command-line driven toolset could be used as a foundation for building more sophisticated analytical methods.

Availability and implementation

variant tools consists of two command-line driven programs vtools and vtools_report. It is freely available at http://varianttools.sourceforge.net, distributed under a GPL license.

Contact

bpeng@mdanderson.org.",2011-12-02 +22135301,BacMap: an up-to-date electronic atlas of annotated bacterial genomes.,"Originally released in 2005, BacMap is an electronic, interactive atlas of fully sequenced bacterial genomes. It contains fully labeled, zoomable and searchable chromosome maps for essentially all sequenced prokaryotic (archaebacterial and eubacterial) species. Each map can be zoomed to the level of individual genes and each gene is hyperlinked to a richly annotated gene card. The latest release of BacMap (http://bacmap.wishartlab.com/) now contains data for more than 1700 bacterial species (~10× more than the 2005 release), corresponding to more than 2800 chromosome and plasmid maps. All bacterial genome maps are now supplemented with separate prophage genome maps as well as separate tRNA and rRNA maps. Each bacterial chromosome entry in BacMap also contains graphs and tables on a variety of gene and protein statistics. Likewise, every bacterial species entry contains a bacterial 'biography' card, with taxonomic details, phenotypic details, textual descriptions and images (when available). Improved data browsing and searching tools have also been added to allow more facile filtering, sorting and display of the chromosome maps and their contents.",2011-12-01 +22645600,RiceRBP: A Resource for Experimentally Identified RNA Binding Proteins in Oryza sativa.,"RNA binding proteins (RBPs) play an important role not only in nuclear gene expression, but also in cytosolic events, including RNA transport, localization, translation, and stability. Although over 200 RBPs are predicted from the Arabidopsis genome alone, relatively little is known about these proteins in plants as many exhibit no homology to known RBPs in other eukaryotes. Furthermore, RBPs likely have low expression levels making them difficult to identify and study. As part of our continuing efforts to understand plant cytosolic gene expression and the factors involved, we employed a combination of affinity chromatography and proteomic techniques to enrich for low abundance RBPs in developing rice seed. Our results have been compiled into RiceRBP (http://www.bioinformatics2.wsu.edu/RiceRBP), a database that contains 257 experimentally identified proteins, many of which have not previously been predicted to be RBPs. For each of the identified proteins, RiceRBP provides information on transcript and protein sequence, predicted protein domains, details of the experimental identification, and whether antibodies have been generated for public use. In addition, tools are available to analyze expression patterns for the identified genes, view phylogentic relationships and search for orthologous proteins. RiceRBP is a valuable tool for the community in the study of plant RBPs.",2012-05-14 +21427755,"Modeling of environmental and genetic interactions with AMBROSIA, an information-theoretic model synthesis method.","To develop a model synthesis method for parsimoniously modeling gene-environmental interactions (GEI) associated with clinical outcomes and phenotypes. The AMBROSIA model synthesis approach utilizes the k-way interaction information (KWII), an information-theoretic metric capable of identifying variable combinations associated with GEI. For model synthesis, AMBROSIA considers relevance of combinations to the phenotype, it precludes entry of combinations with redundant information, and penalizes for unjustifiable complexity; each step is KWII based. The performance and power of AMBROSIA were evaluated with simulations and Genetic Association Workshop 15 (GAW15) data sets of rheumatoid arthritis (RA). AMBROSIA identified parsimonious models in data sets containing multiple interactions with linkage disequilibrium present. For the GAW15 data set containing 9187 single-nucleotide polymorphisms, the parsimonious AMBROSIA model identified nine RA-associated combinations with power >90%. AMBROSIA was compared with multifactor dimensionality reduction across several diverse models and had satisfactory power. Software source code is available from http://www.cse.buffalo.edu/DBGROUP/bioinformatics/resources.html. AMBROSIA is a promising method for GEI model synthesis.",2011-03-23 +21697125,GeneNetWeaver: in silico benchmark generation and performance profiling of network inference methods.,"

Motivation

Over the last decade, numerous methods have been developed for inference of regulatory networks from gene expression data. However, accurate and systematic evaluation of these methods is hampered by the difficulty of constructing adequate benchmarks and the lack of tools for a differentiated analysis of network predictions on such benchmarks.

Results

Here, we describe a novel and comprehensive method for in silico benchmark generation and performance profiling of network inference methods available to the community as an open-source software called GeneNetWeaver (GNW). In addition to the generation of detailed dynamical models of gene regulatory networks to be used as benchmarks, GNW provides a network motif analysis that reveals systematic prediction errors, thereby indicating potential ways of improving inference methods. The accuracy of network inference methods is evaluated using standard metrics such as precision-recall and receiver operating characteristic curves. We show how GNW can be used to assess the performance and identify the strengths and weaknesses of six inference methods. Furthermore, we used GNW to provide the international Dialogue for Reverse Engineering Assessments and Methods (DREAM) competition with three network inference challenges (DREAM3, DREAM4 and DREAM5).

Availability

GNW is available at http://gnw.sourceforge.net along with its Java source code, user manual and supporting data.

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

dario.floreano@epfl.ch.",2011-06-22 +23796450,OLYMPUS: an automated hybrid clustering method in time series gene expression. Case study: host response after Influenza A (H1N1) infection.,"The increasing flow of short time series microarray experiments for the study of dynamic cellular processes poses the need for efficient clustering tools. These tools must deal with three primary issues: first, to consider the multi-functionality of genes; second, to evaluate the similarity of the relative change of amplitude in the time domain rather than the absolute values; third, to cope with the constraints of conventional clustering algorithms such as the assignment of the appropriate cluster number. To address these, we propose OLYMPUS, a novel unsupervised clustering algorithm that integrates Differential Evolution (DE) method into Fuzzy Short Time Series (FSTS) algorithm with the scope to utilize efficiently the information of population of the first and enhance the performance of the latter. Our hybrid approach provides sets of genes that enable the deciphering of distinct phases in dynamic cellular processes. We proved the efficiency of OLYMPUS on synthetic as well as on experimental data. The discriminative power of OLYMPUS provided clusters, which refined the so far perspective of the dynamics of host response mechanisms to Influenza A (H1N1). Our kinetic model sets a timeline for several pathways and cell populations, implicated to participate in host response; yet no timeline was assigned to them (e.g. cell cycle, homeostasis). Regarding the activity of B cells, our approach revealed that some antibody-related mechanisms remain activated until day 60 post infection. The Matlab codes for implementing OLYMPUS, as well as example datasets, are freely accessible via the Web (http://biosignal.med.upatras.gr/wordpress/biosignal/).",2013-06-22 +21253599,PoPoolation: a toolbox for population genetic analysis of next generation sequencing data from pooled individuals.,"Recent statistical analyses suggest that sequencing of pooled samples provides a cost effective approach to determine genome-wide population genetic parameters. Here we introduce PoPoolation, a toolbox specifically designed for the population genetic analysis of sequence data from pooled individuals. PoPoolation calculates estimates of θ(Watterson), θ(π), and Tajima's D that account for the bias introduced by pooling and sequencing errors, as well as divergence between species. Results of genome-wide analyses can be graphically displayed in a sliding window plot. PoPoolation is written in Perl and R and it builds on commonly used data formats. Its source code can be downloaded from http://code.google.com/p/popoolation/. Furthermore, we evaluate the influence of mapping algorithms, sequencing errors, and read coverage on the accuracy of population genetic parameter estimates from pooled data.",2011-01-06 +23396123,"MODexplorer: an integrated tool for exploring protein sequence, structure and function relationships.","

Summary

MODexplorer is an integrated tool aimed at exploring the sequence, structural and functional diversity in protein families useful in homology modeling and in analyzing protein families in general. It takes as input either the sequence or the structure of a protein and provides alignments with its homologs along with a variety of structural and functional annotations through an interactive interface. The annotations include sequence conservation, similarity scores, ligand-, DNA- and RNA-binding sites, secondary structure, disorder, crystallographic structure resolution and quality scores of models implied by the alignments to the homologs of known structure. MODexplorer can be used to analyze sequence and structural conservation among the structures of similar proteins, to find structures of homologs solved in different conformational state or with different ligands and to transfer functional annotations. Furthermore, if the structure of the query is not known, MODexplorer can be used to select the modeling templates taking all this information into account and to build a comparative model.

Availability and implementation

Freely available on the web at http://modorama.biocomputing.it/modexplorer. Website implemented in HTML and JavaScript with all major browsers supported.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-08 +24247528,Progress on understanding spatial and temporal variability of PM(2.5) and its components in the Detroit Exposure and Aerosol Research Study (DEARS).,"The Detroit Exposure and Aerosol Research Study (DEARS) measured personal exposures, ambient, residential indoor and residential outdoor concentrations of select PM2.5 aerosol components (SO4, NO3, Fe, Si, Ca, K, Mn, Pb, Zn, EC and OC) over a three year period (2004-2007). These events represented approximately 190 calendar days of monitoring which was performed in seven residential neighborhoods throughout Wayne County, MI. The selection of neighborhoods and participants for study inclusion was based upon an a priori hypothesis that each neighborhood represented a potentially distinct air quality scenario being influenced by both regional as well as local pollution sources. Daily (24 h integrated) measurement data were used to evaluate the spatial and temporal PM2.5 compositional variability of the personal, indoor and outdoor spatial settings as they related to a central ambient monitoring site (Allen Park). Many of the PM2.5 components were observed to have spatially different outdoor mass concentrations in matched neighborhood by neighborhood comparisons, with sulfate, OC, and NO3 being noted exceptions. Coefficient of divergence (COD) comparisons involving outdoor measures for Ca, Si, Fe, Zn, Pb, and EC revealed significant spatial variability. While concentrations of most components were lower indoors as compared to outdoor measures, K and Si indoor concentrations often reflected aerosol enrichment (indoor/outdoor ratios ≥ 1.2). Even when personal exposures were adjusted for day to day changes in ambient concentrations, certain components (Ca, Fe, Mn, Zn, among others) revealed a high degree of location-specific spatial variability suggesting the influences of personal activities and/or local source influences on total personal PM2.5 exposures. As a whole, findings indicate that reliance on a central ambient monitor as a surrogate for total personal and potentially even residential outdoor estimates of PM2.5 aerosol composition may provide an undesirable degree of exposure uncertainty for health-based risk estimates. The focus of this paper is on the spatial variability and uncertainty in using a central monitoring site to estimate exposures. Additional information concerning the DEARS can be found at http://www.epa.gov/DEARS/.",2014-01-01 +22584068,CACG: a database for comparative analysis of conjoined genes.,"A conjoined gene is defined as one formed at the time of transcription by combining at least part of one exon from each of two or more distinct genes that lie on the same chromosome, in the same or opposite orientation, which translate independently into different proteins. We comparatively studied the extent of conjoined genes in thirteen genomes by analyzing the public databases of expressed sequence tags and mRNA sequences using a set of computational tools designed to identify conjoined genes on the same DNA strand or opposite DNA strands of the same genomic locus. The CACG database, available at http://cgc.kribb.re.kr/map/, includes a number of conjoined genes (7131-human, 2-chimpanzee, 5-orangutan, 57-chicken, 4-rhesus monkey, 651-cow, 27-dog, 2512-mouse, 263-rat, 1482-zebrafish, 5-horse, 29-sheep, and 8-medaka) and is very effective and easy to use to analyze the evolutionary process of conjoined genes when comparing different species.",2012-05-11 +21622957,DARIO: a ncRNA detection and analysis tool for next-generation sequencing experiments.,"Small non-coding RNAs (ncRNAs) such as microRNAs, snoRNAs and tRNAs are a diverse collection of molecules with several important biological functions. Current methods for high-throughput sequencing for the first time offer the opportunity to investigate the entire ncRNAome in an essentially unbiased way. However, there is a substantial need for methods that allow a convenient analysis of these overwhelmingly large data sets. Here, we present DARIO, a free web service that allows to study short read data from small RNA-seq experiments. It provides a wide range of analysis features, including quality control, read normalization, ncRNA quantification and prediction of putative ncRNA candidates. The DARIO web site can be accessed at http://dario.bioinf.uni-leipzig.de/.",2011-05-27 +21551144,Pathway analysis of high-throughput biological data within a Bayesian network framework.,"

Motivation

Most current approaches to high-throughput biological data (HTBD) analysis either perform individual gene/protein analysis or, gene/protein set enrichment analysis for a list of biologically relevant molecules. Bayesian Networks (BNs) capture linear and non-linear interactions, handle stochastic events accounting for noise, and focus on local interactions, which can be related to causal inference. Here, we describe for the first time an algorithm that models biological pathways as BNs and identifies pathways that best explain given HTBD by scoring fitness of each network.

Results

Proposed method takes into account the connectivity and relatedness between nodes of the pathway through factoring pathway topology in its model. Our simulations using synthetic data demonstrated robustness of our approach. We tested proposed method, Bayesian Pathway Analysis (BPA), on human microarray data regarding renal cell carcinoma (RCC) and compared our results with gene set enrichment analysis. BPA was able to find broader and more specific pathways related to RCC.

Availability

Accompanying BPA software (BPAS) package is freely available for academic use at http://bumil.boun.edu.tr/bpa.",2011-05-05 +23023983,Adding unaligned sequences into an existing alignment using MAFFT and LAST.,"

Unlabelled

Two methods to add unaligned sequences into an existing multiple sequence alignment have been implemented as the '--add' and '--addfragments' options in the MAFFT package. The former option is a basic one and applicable only to full-length sequences, whereas the latter option is applicable even when the unaligned sequences are short and fragmentary. These methods internally infer the phylogenetic relationship among the sequences in the existing alignment and the phylogenetic positions of unaligned sequences. Benchmarks based on two independent simulations consistently suggest that the ""--addfragments"" option outperforms recent methods, PaPaRa and PAGAN, in accuracy for difficult problems and that these three methods appropriately handle easy problems.

Availability

http://mafft.cbrc.jp/alignment/software/

Contact

katoh@ifrec.osaka-u.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-27 +22647710,"Summary of notifiable diseases--United States, 2010.","The Summary of Notifiable Diseases United States, 2010 contains the official statistics, in tabular and graphic form, for the reported occurrence of nationally notifiable infectious diseases in the United States for 2010. Unless otherwise noted, the data are final totals for 2010, reported as of June 30, 2011. These statistics are collected and compiled from reports sent by state health departments and territories to the National Notifiable Diseases Surveillance System (NNDSS), which is operated by CDC in collaboration with the Council of State and Territorial Epidemiologists (CSTE). The Summary is available at http:// www.cdc.gov/mmwr/mmwr_su/mmwr_nd/. This site also includes Summary publications from previous years.",2012-06-01 +22581177,Exhaustive database searching for amino acid mutations in proteomes.,"

Motivation

Amino acid mutations in proteins can be found by searching tandem mass spectra acquired in shotgun proteomics experiments against protein sequences predicted from genomes. Traditionally, unconstrained searches for amino acid mutations have been accomplished by using a sequence tagging approach that combines de novo sequencing with database searching. However, this approach is limited by the performance of de novo sequencing.

Results

The Sipros algorithm v2.0 was developed to perform unconstrained database searching using high-resolution tandem mass spectra by exhaustively enumerating all single non-isobaric mutations for every residue in a protein database. The performance of Sipros for amino acid mutation identification exceeded that of an established sequence tagging algorithm, Inspect, based on benchmarking results from a Rhodopseudomonas palustris proteomics dataset. To demonstrate the viability of the algorithm for meta-proteomics, Sipros was used to identify amino acid mutations in a natural microbial community in acid mine drainage.

Availability

The Sipros algorithm is freely available at\newline http://code.google.com/p/sipros.",2012-05-10 +23463026,A global map for dissecting phenotypic variants in human lincRNAs.,"Large intergenic noncoding RNAs (lincRNAs) are emerging as key factors of multiple cellular processes. Cumulative evidence has linked lincRNA polymorphisms to diverse diseases. However, the global properties of lincRNA polymorphisms and their implications for human disease remain largely unknown. Here we performed a systematic analysis of naturally occurring variants in human lincRNAs, with a particular focus on lincRNA polymorphism as novel risk factor of disease etiology. We found that lincRNAs exhibited a relatively low level of polymorphisms, and low single-nucleotide polymorphism (SNP) density lincRNAs might have a broad range of functions. We also found that some polymorphisms in evolutionarily conserved regions of lincRNAs had significant effects on predicted RNA secondary structures, indicating their potential contribution to diseases. We mapped currently available phenotype-associated SNPs to lincRNAs and found that lincRNAs were associated with a wide range of human diseases. Some lincRNAs could be responsible for particular diseases. Our results provided not only a global perspective on genetic variants in human lincRNAs but also novel insights into the function and etiology of lincRNA. All the data in this study can be accessed and retrieved freely via a web server at http://bioinfo.hrbmu.edu.cn/lincPoly.",2013-03-06 +24564522,Incorporating substrate sequence motifs and spatial amino acid composition to identify kinase-specific phosphorylation sites on protein three-dimensional structures.,"

Background

Protein phosphorylation catalyzed by kinases plays crucial regulatory roles in cellular processes. Given the high-throughput mass spectrometry-based experiments, the desire to annotate the catalytic kinases for in vivo phosphorylation sites has motivated. Thus, a variety of computational methods have been developed for performing a large-scale prediction of kinase-specific phosphorylation sites. However, most of the proposed methods solely rely on the local amino acid sequences surrounding the phosphorylation sites. An increasing number of three-dimensional structures make it possible to physically investigate the structural environment of phosphorylation sites.

Results

In this work, all of the experimental phosphorylation sites are mapped to the protein entries of Protein Data Bank by sequence identity. It resulted in a total of 4508 phosphorylation sites containing the protein three-dimensional (3D) structures. To identify phosphorylation sites on protein 3D structures, this work incorporates support vector machines (SVMs) with the information of linear motifs and spatial amino acid composition, which is determined for each kinase group by calculating the relative frequencies of 20 amino acid types within a specific radial distance from central phosphorylated amino acid residue. After the cross-validation evaluation, most of the kinase-specific models trained with the consideration of structural information outperform the models considering only the sequence information. Furthermore, the independent testing set which is not included in training set has demonstrated that the proposed method could provide a comparable performance to other popular tools.

Conclusion

The proposed method is shown to be capable of predicting kinase-specific phosphorylation sites on 3D structures and has been implemented as a web server which is freely accessible at http://csb.cse.yzu.edu.tw/PhosK3D/. Due to the difficulty of identifying the kinase-specific phosphorylation sites with similar sequenced motifs, this work also integrates the 3D structural information to improve the cross classifying specificity.",2013-10-22 +23471300,freeIbis: an efficient basecaller with calibrated quality scores for Illumina sequencers.,"

Motivation

The conversion of the raw intensities obtained from next-generation sequencing platforms into nucleotide sequences with well-calibrated quality scores is a critical step in the generation of good sequence data. While recent model-based approaches can yield highly accurate calls, they require a substantial amount of processing time and/or computational resources. We previously introduced Ibis, a fast and accurate basecaller for the Illumina platform. We have continued active development of Ibis to take into account developments in the Illumina technology, as well as to make Ibis fully open source.

Results

We introduce here freeIbis, which offers significant improvements in sequence accuracy owing to the use of a novel multiclass support vector machine (SVM) algorithm. Sequence quality scores are now calibrated based on empirically observed scores, thus providing a high correlation to their respective error rates. These improvements result in downstream advantages including improved genotyping accuracy.

Availability and implementation

FreeIbis is freely available for use under the GPL (http://bioinf.eva.mpg.de/freeibis/). It requires a Python interpreter and a C++ compiler. Tailored versions of LIBOCAS and LIBLINEAR are distributed along with the package.",2013-03-06 +22570419,LAHEDES: the LAGLIDADG homing endonuclease database and engineering server.,"LAGLIDADG homing endonucleases (LHEs) are DNA cleaving enzymes, also termed 'meganucleases' that are employed as gene-targeting reagents. This use of LHEs requires that their DNA specificity be altered to match sequences in genomic targets. The choice of the most appropriate LHE to target a particular gene is facilitated by the growing number of such enzymes with well-characterized activities and structures. 'LAHEDES' (The LAGLIDADG Homing Endonuclease Database and Engineering Server) provides both an online archive of LHEs with validated DNA cleavage specificities and DNA-binding interactions, as well as a tool for the identification of DNA sequences that might be targeted by various LHEs. Searches can be performed using four separate scoring algorithms and user-defined choices of LHE scaffolds. The webserver subsequently provides information regarding clusters of amino acids that should be interrogated during engineering and selection experiments. The webserver is fully open access and can be found at http://homingendonuclease.net.",2012-05-08 +22434533,Sann: solvent accessibility prediction of proteins by nearest neighbor method.,"We present a method to predict the solvent accessibility of proteins which is based on a nearest neighbor method applied to the sequence profiles. Using the method, continuous real-value prediction as well as two-state and three-state discrete predictions can be obtained. The method utilizes the z-score value of the distance measure in the feature vector space to estimate the relative contribution among the k-nearest neighbors for prediction of the discrete and continuous solvent accessibility. The Solvent accessibility database is constructed from 5717 proteins extracted from PISCES culling server with the cutoff of 25% sequence identities. Using optimal parameters, the prediction accuracies (for discrete predictions) of 78.38% (two-state prediction with the threshold of 25%), 65.1% (three-state prediction with the thresholds of 9 and 36%), and the Pearson correlation coefficient (between the predicted and true RSA's for continuous prediction) of 0.676 are achieved An independent benchmark test was performed with the CASP8 targets where we find that the proposed method outperforms existing methods. The prediction accuracies are 80.89% (for two state prediction with the threshold of 25%), 67.58% (three-state prediction), and the Pearson correlation coefficient of 0.727 (for continuous prediction) with mean absolute error of 0.148. We have also investigated the effect of increasing database sizes on the prediction accuracy, where additional improvement in the accuracy is observed as the database size increases. The SANN web server is available at http://lee.kias.re.kr/~newton/sann/.",2012-05-08 +23388428,Association study of integrins beta 1 and beta 2 gene polymorphism and papillary thyroid cancer.,"

Background

We investigated whether single nucleotide polymorphisms (SNPs) of integrin beta 1 (ITGB1) and integrin beta 2 (ITGB2) contribute to the development of papillary thyroid cancer (PTC).

Methods

Two synonymous SNPs (rs2230396 and rs2298141) of ITGB1 and 1 synonymous SNP (rs2352326), 1 5' URT-region SNP (rs2070947), and 1 promoter SNP (rs2070946) of ITGB2 SNPs were genotyped using direct sequencing in 94 patients with PTC and 213 healthy controls. Genetic data were analyzed using SNPStats (http://bioinfo.iconcologia.net/SNPstats), Helix Tree (Golden Helix Inc, Bozeman, MT), and SNPAnalyzer (ISTECH Corp, Goyang City, Republic of Korea).

Results

The promoter SNP (rs2070946) of ITGB2 was significantly associated with the development of PTC (dominant model, log-additive model). The G allele frequencies of the promoter SNP (rs2070946) of ITBG2 in patients with PTC (19.9%) were increased by about 2-fold compared with controls (10.2%).

Conclusions

Our results suggest that a promoter SNP (rs2070946) of ITGB2 might be associated with a risk of PTC.",2013-02-04 +22452996,"Hypoxia-inducible factor-2a is associated with ABCG2 expression, histology-grade and Ki67 expression in breast invasive ductal carcinoma.","

Background

Breast cancer is the most common cancer and the leading cause of cancer mortality in women worldwide. Hypoxia is an important factor involved in the progression of solid tumors and has been associated with various indicators of tumor metabolism, angiogenesis and metastasis. But little is known about the contribution of Hypoxia-Inducible Factor-2a (HIF-2a) to the drug resistance and the clinicopathological characteristics in breast cancer.

Methods

Immunohistochemistry was employed on the tissue microarray paraffin sections of surgically removed samples from 196 invasive breast cancer patients with clinicopathological data. The correlations between the expression of HIF-2a and ABCG2 as well as other patients' clinicopathological data were investigated.

Results

The results showed that HIF-2a was expressed in different intensities and distributions in the tumor cells of the breast invasive ductal carcinoma. A positive staining for HIF-2a was defined as a brown staining observed mainly in the nucleus. A statistically significant correlation was demonstrated between HIF-2a expression and ABCG2 expression (p = 0.001), histology-grade (p = 0.029), and Ki67 (p = 0. 043) respectively.

Conclusion

HIF-2a was correlated with ABCG2 expression, histology-grade and Ki67 expression in breast invasive ductal carcinoma. HIF-2a could regulate ABCG2 in breast cancer cells, and could be a novel potential bio-marker to predict chemotherapy effectiveness. The hypoxia/HIF-2a/ABCG2 pathway could be a new mechanism of breast cancer multidrug-resistance.

Virtual slides

http://www.diagnosticpathology.diagnomx.eu/vs/2965948166714795.",2012-03-27 +23936240,TrOn: an anatomical ontology for the beetle Tribolium castaneum.,"In a morphological ontology the expert's knowledge is represented in terms, which describe morphological structures and how these structures relate to each other. With the assistance of ontologies this expert knowledge is made processable by machines, through a formal and standardized representation of terms and their relations to each other. The red flour beetle Tribolium castaneum, a representative of the most species rich animal taxon on earth (the Coleoptera), is an emerging model organism for development, evolution, physiology, and pest control. In order to foster Tribolium research, we have initiated the Tribolium Ontology (TrOn), which describes the morphology of the red flour beetle. The content of this ontology comprises so far most external morphological structures as well as some internal ones. All modeled structures are consistently annotated for the developmental stages larva, pupa and adult. In TrOn all terms are grouped into three categories: Generic terms represent morphological structures, which are independent of a developmental stage. In contrast, downstream of such terms are concrete terms which stand for a dissectible structure of a beetle at a specific life stage. Finally, there are mixed terms describing structures that are only found at one developmental stage. These terms combine the characteristics of generic and concrete terms with features of both. These annotation principles take into account the changing morphology of the beetle during development and provide generic terms to be used in applications or for cross linking with other ontologies and data resources. We use the ontology for implementing an intuitive search function at the electronic iBeetle-Base, which stores morphological defects found in a genome wide RNA interference (RNAi) screen. The ontology is available for download at http://ibeetle-base.uni-goettingen.de.",2013-07-30 +23110968,Dragon TIS Spotter: an Arabidopsis-derived predictor of translation initiation sites in plants.,"

Summary

In higher eukaryotes, the identification of translation initiation sites (TISs) has been focused on finding these signals in cDNA or mRNA sequences. Using Arabidopsis thaliana (A.t.) information, we developed a prediction tool for signals within genomic sequences of plants that correspond to TISs. Our tool requires only genome sequence, not expressed sequences. Its sensitivity/specificity is for A.t. (90.75%/92.2%), for Vitis vinifera (66.8%/94.4%) and for Populus trichocarpa (81.6%/94.4%), which suggests that our tool can be used in annotation of different plant genomes. We provide a list of features used in our model. Further study of these features may improve our understanding of mechanisms of the translation initiation.

Availability and implementation

Our tool is implemented as an artificial neural network. It is available as a web-based tool and, together with the source code, the list of features, and data used for model development, is accessible at http://cbrc.kaust.edu.sa/dts.",2012-10-30 +22443449,"Mapsembler, targeted and micro assembly of large NGS datasets on a desktop computer.","

Background

The analysis of next-generation sequencing data from large genomes is a timely research topic. Sequencers are producing billions of short sequence fragments from newly sequenced organisms. Computational methods for reconstructing whole genomes/transcriptomes (de novo assemblers) are typically employed to process such data. However, these methods require large memory resources and computation time. Many basic biological questions could be answered targeting specific information in the reads, thus avoiding complete assembly.

Results

We present Mapsembler, an iterative micro and targeted assembler which processes large datasets of reads on commodity hardware. Mapsembler checks for the presence of given regions of interest that can be constructed from reads and builds a short assembly around it, either as a plain sequence or as a graph, showing contextual structure. We introduce new algorithms to retrieve approximate occurrences of a sequence from reads and construct an extension graph. Among other results presented in this paper, Mapsembler enabled to retrieve previously described human breast cancer candidate fusion genes, and to detect new ones not previously known.

Conclusions

Mapsembler is the first software that enables de novo discovery around a region of interest of repeats, SNPs, exon skipping, gene fusion, as well as other structural events, directly from raw sequencing reads. As indexing is localized, the memory footprint of Mapsembler is negligible. Mapsembler is released under the CeCILL license and can be freely downloaded from http://alcovna.genouest.org/mapsembler/.",2012-03-23 +23902675,Subsequent intra-abdominal fibromatosis mimicking recurrent gastrointestinal stromal tumor.,"Intra-abdominal fibromatosis (IAF) commonly develops in patients who had abdominal surgery. In rare instances, it occurs subsequent to gastrointestinal stromal tumor (GIST). This special situation has clinical significance in imatinib era. About 1000 patients with GIST in our institution from 1993 to 2010 were re-evaluated based on their clinical and pathological data, the treatment strategies and the follow-up information. We identified 2 patients who developed IAF after GIST resection. Patient 1 was a 54 year-old male and had 5 cm × 4.5 cm × 3.5 cm jejunal GIST excised on February 22, 1994. Three years later, an abdominal mass with 7 cm × 6 cm × 3 cm was identified. He was diagnosed as recurrent GIST from clinical point of view. After excision, the second tumor was confirmed to be IAF. Patient 2 was a 45-year-old male and had 6 cm × 4 cm × 3 cm duodenal GIST excised on August 19, 2008. One year later, a 4 cm mass was found at the original surgical site. The patient refused to take imatinib until the tumor increased to 8 cm six months later. The tumor continued to increase after 6 months' imatinib therapy, decision of surgical resection was made by multidisciplinary team. The second tumor was confirmed to be IAF with size of 17 cm × 13 cm × 11 cm. Although IAF subsequent to GIST is very rare, it is of clinical significance in imatinib era as an influencing factor for making clinical decision.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1076715989961803.",2013-07-31 +21258062,HTSanalyzeR: an R/Bioconductor package for integrated network analysis of high-throughput screens.,"

Motivation

High-throughput screens (HTS) by RNAi or small molecules are among the most promising tools in functional genomics. They enable researchers to observe detailed reactions to experimental perturbations on a genome-wide scale. While there is a core set of computational approaches used in many publications to analyze these data, a specialized software combining them and making them easily accessible has so far been missing.

Results

Here we describe HTSanalyzeR, a flexible software to build integrated analysis pipelines for HTS data that contains over-representation analysis, gene set enrichment analysis, comparative gene set analysis and rich sub-network identification. HTSanalyzeR interfaces with commonly used pre-processing packages for HTS data and presents its results as HTML pages and network plots.

Availability

Our software is written in the R language and freely available via the Bioconductor project at http://www.bioconductor.org.",2011-01-22 +23196989,PAIR: paired allelic log-intensity-ratio-based normalization method for SNP-CGH arrays.,"

Motivation

Normalization is critical in DNA copy number analysis. We propose a new method to correctly identify two-copy probes from the genome to obtain representative references for normalization in single nucleotide polymorphism arrays. The method is based on a two-state Hidden Markov Model. Unlike most currently available methods in the literature, the proposed method does not need to assume that the percentage of two-copy state probes is dominant in the genome, as long as there do exist two-copy probes.

Results

The real data analysis and simulation study show that the proposed algorithm is successful in that (i) it performs as well as the current methods (e.g. CGHnormaliter and popLowess) for samples with dominant two-copy states and outperforms these methods for samples with less dominant two-copy states; (ii) it can identify the copy-neutral loss of heterozygosity; and (iii) it is efficient in terms of the computational time used.

Availability

R scripts are available at http://publichealth.lsuhsc.edu/PAIR.html.",2012-11-29 +22556368,SINA: accurate high-throughput multiple sequence alignment of ribosomal RNA genes.,"

Motivation

In the analysis of homologous sequences, computation of multiple sequence alignments (MSAs) has become a bottleneck. This is especially troublesome for marker genes like the ribosomal RNA (rRNA) where already millions of sequences are publicly available and individual studies can easily produce hundreds of thousands of new sequences. Methods have been developed to cope with such numbers, but further improvements are needed to meet accuracy requirements.

Results

In this study, we present the SILVA Incremental Aligner (SINA) used to align the rRNA gene databases provided by the SILVA ribosomal RNA project. SINA uses a combination of k-mer searching and partial order alignment (POA) to maintain very high alignment accuracy while satisfying high throughput performance demands. SINA was evaluated in comparison with the commonly used high throughput MSA programs PyNAST and mothur. The three BRAliBase III benchmark MSAs could be reproduced with 99.3, 97.6 and 96.1 accuracy. A larger benchmark MSA comprising 38 772 sequences could be reproduced with 98.9 and 99.3% accuracy using reference MSAs comprising 1000 and 5000 sequences. SINA was able to achieve higher accuracy than PyNAST and mothur in all performed benchmarks.

Availability

Alignment of up to 500 sequences using the latest SILVA SSU/LSU Ref datasets as reference MSA is offered at http://www.arb-silva.de/aligner. This page also links to Linux binaries, user manual and tutorial. SINA is made available under a personal use license.",2012-05-03 +25501916,"Re: Madsen et al. ""Unnecessary work tasks and mental health: a prospective analysis of Danish human service workers"".","Madsen et al (1) recently published a secondary analysis on data provided by the Project on Burnout, Motivation and Job Satisfaction (PUMA). The aim of their study, published in the Scandinavian Journal of Work, Environment & Health was to examine the associations between unnecessary work tasks and a decreased level of mental health. Though the topic was quite novel, reading this work proved disturbing and raised issues. Based on the results of this study, the authors stated that there is an association between unnecessary work tasks (assessed by a single question) and a decreased level of mental health, idem [assessed by the Mental Health Inventory (MHI-5)], in the specific population included in this PUMA survey. The authors point out a limitation of the study, namely that unnecessary work tasks were evaluated using one single question: ""Do you sometimes have to do things in your job which appear to be unnecessary?"". Semmer defines unnecessary work task as ""tasks that should not be carried out at all because they do not make sense or because they could have been avoided, or could be carried out with less effort if things were organized more efficiently"" (2). De facto, qualifying what an unnecessary task is requires stating or explaining whether the task makes sense. Making sense or not is not an objective notion. It is very difficult for either a manager or an employee to say if a task is necessary or not. Most important is that it makes sense from the worker's point of view. Making sense and being necessary are not synonyms. Some tasks do not make sense but are economically necessary (eg, when, as physicians, we are reporting our activity using ICD-10 on computers instead of being at patients' bedsides or reading this journal). Thus, there is a wide gap between Semmer's definition and the question used by the authors to evaluate his concept. A secondary analysis based on a single question is not adequate to evaluate unnecessary tasks. Nowadays, the general trend is to reduce the size of questionnaires because they are too long and cannot be used in a routine practice. But an analysis performed on a single question is quite risky: in psychometrics, redundancy is used to confirm a measurement. We lose precision on what exactly we are testing by asking a single question. Madsen et al's results show that among workers saying they are always or often performing unnecessary tasks, the MHI mean score was 74.00 versus 78.20 for people who never or almost never perform unnecessary tasks (P=0.0038). Even though it is a statistically significant result, its clinical relevance is never questioned. What is the impact of losing 4.20 points at MHI test instead of losing 20 points for instance? Statistical difference does not mean clinical relevance. These results show a statistical association, not a causality relationship. The authors did not show that performing unnecessary tasks lowers the level of mental health. It may be the exact opposite. Maybe having poorer mental health (eg, depression, with anhedonia) may make the workers think that what they're doing is useless. In their conclusion, Madsen et al suggest that the elimination of unnecessary work tasks may be beneficial for employees' mental health. To our mind, on the contrary, it may increase psychic suffering. If we suggest to fight unnecessary tasks in workplaces, this may encourage reduction of the margin of manoeuvre (3). The principle of removing unnecessary tasks is part of a Taylorized organization. Some tasks may seem unnecessary or bothersome, but may correspond to work periods that allow for temporary rest. Concretely, in the workplace, managers rather than the employee will be the ones to decide whether a task is useless or not. To improve well-being in the workplace, a global vision of work organization is required. From our point of view, the conclusion drawn from this study should not be that we must eliminate unnecessary tasks, but that we should focus on what makes sense for the worker, with a global view on his work and - as usual - the aim of carrying out further studies on this subject. Conflicts of interest The authors declare no conflict of interest. References 1. Madsen IEH, Tripathi M, Borritz M, Rugulies R, Unnecessary work tasks and mental health: a prospective analysis of Danish human service workers, Scand J Work Environ Health. 2014;40(6):631-8. http://dx.doi.org/10.5271/sjweh.3453.  2. Semmer NK, Tschan F, Meier LL, Facchin S, Jacobshagen N, Illegitimate tasks and counterproductive work behavior, Appl Psychol. 2010;59:70-96. http://dx.doi.org/10.1111/j.1464-0597.2009.00416.x.  3. Durand MJ, Vézina N, Baril R, Loisel P, Richard MC, Ngomo S, Margin of manoeuvre indicators in the workplace during the rehabilitation process: a qualitative analysis, J Occup Rehab. 2009;19:194-202. http://dx.doi.org/10.1007/s10926-009-9173-4.",2014-12-11 +24005581,PRIMO: a graphical environment for the Monte Carlo simulation of Varian and Elekta linacs.,"

Background

The accurate Monte Carlo simulation of a linac requires a detailed description of its geometry and the application of elaborate variance-reduction techniques for radiation transport. Both tasks entail a substantial coding effort and demand advanced knowledge of the intricacies of the Monte Carlo system being used.

Methods

PRIMO, a new Monte Carlo system that allows the effortless simulation of most Varian and Elekta linacs, including their multileaf collimators and electron applicators, is introduced. PRIMO combines (1) accurate physics from the PENELOPE code, (2) dedicated variance-reduction techniques that significantly reduce the computation time, and (3) a user-friendly graphical interface with tools for the analysis of the generated data. PRIMO can tally dose distributions in phantoms and computerized tomographies, handle phase-space files in IAEA format, and import structures (planning target volumes, organs at risk) in the DICOM RT-STRUCT standard.

Results

A prostate treatment, conformed with a high definition Millenium multileaf collimator (MLC 120HD) from a Varian Clinac 2100 C/D, is presented as an example. The computation of the dose distribution in 1.86×3.00×1.86 mm3 voxels with an average 2% standard statistical uncertainty, performed on an eight-core Intel Xeon at 2.67 GHz, took 1.8 h-excluding the patient-independent part of the linac, which required 3.8 h but it is simulated only once.

Conclusion

PRIMO is a self-contained user-friendly system that facilitates the Monte Carlo simulation of dose distributions produced by most currently available linacs. This opens the door for routine use of Monte Carlo in clinical research and quality assurance purposes. It is free software that can be downloaded from http://www.primoproject.net.",2013-09-06 +22553363,ZINCPharmer: pharmacophore search of the ZINC database.,"ZINCPharmer (http://zincpharmer.csb.pitt.edu) is an online interface for searching the purchasable compounds of the ZINC database using the Pharmer pharmacophore search technology. A pharmacophore describes the spatial arrangement of the essential features of an interaction. Compounds that match a well-defined pharmacophore serve as potential lead compounds for drug discovery. ZINCPharmer provides tools for constructing and refining pharmacophore hypotheses directly from molecular structure. A search of 176 million conformers of 18.3 million compounds typically takes less than a minute. The results can be immediately viewed, or the aligned structures may be downloaded for off-line analysis. ZINCPharmer enables the rapid and interactive search of purchasable chemical space.",2012-05-02 +22087761,Improving gene expression data interpretation by finding latent factors that co-regulate gene modules with clinical factors.,"

Background

In the analysis of high-throughput data with a clinical outcome, researchers mostly focus on genes/proteins that show first-order relations with the clinical outcome. While this approach yields biomarkers and biological mechanisms that are easily interpretable, it may miss information that is important to the understanding of disease mechanism and/or treatment response. Here we test the hypothesis that unobserved factors can be mobilized by the living system to coordinate the response to the clinical factors.

Results

We developed a computational method named Guided Latent Factor Discovery (GLFD) to identify hidden factors that act in combination with the observed clinical factors to control gene modules. In simulation studies, the method recovered masked factors effectively. Using real microarray data, we demonstrate that the method identifies latent factors that are biologically relevant, and extracts more information than analyzing only the first-order response to the clinical outcome.

Conclusions

Finding latent factors using GLFD brings extra insight into the mechanisms of the disease/drug response. The R code of the method is available at http://userwww.service.emory.edu/~tyu8/GLFD.",2011-11-16 +24450739,"The freedom to explore: examining the influence of independent mobility on weekday, weekend and after-school physical activity behaviour in children living in urban and inner-suburban neighbourhoods of varying socioeconomic status.","

Background

Children's independent mobility (CIM) is critical to healthy development in childhood. The physical layout and social characteristics of neighbourhoods can impact opportunities for CIM. While global evidence is mounting on CIM, to the authors' knowledge, Canadian data on CIM and related health outcomes (i.e., physical activity (PA) behaviour) are missing. The purpose of this study was to examine if CIM is related to multiple characteristics of accelerometry-measured PA behaviour (total PA, light PA, moderate-to-vigorous PA, time spent sedentary) and whether associations between CIM and PA behaviour systematically vary by place of residence, stratifying by gender and type of day/period (weekdays, after-school, weekend).

Methods

Participants were recruited through Project BEAT (Built Environment and Active Transport; http://www.beat.utoronto.ca). Children (n = 856) were stratified into four neighbourhood classifications based on the period of neighbourhood development (urban built environment (BE) (old BE) versus inner-suburban BE (new BE)) and socioeconomic status (SES; low SES and high SES). Physical activity was measured via accelerometry (ActiGraph GT1M). CIM was assessed via parental report and two categories were created (low CIM, n = 332; high CIM, n = 524). A series of two-factor ANOVAs were used to determine gender-specific differences in PA for weekdays, weekend days and the after-school period, according to level of CIM, across four neighbourhood classifications.

Results

Children who were granted at least some independent mobility (high CIM) had more positive PA profiles across the school week, during the after-school period, and over the weekend; they were also less sedentary. The influence of CIM on PA behaviour was particularly salient during the after-school period. Associations of CIM with PA varied by gender, and also by neighbourhood classification. CIM seemed to matter more in urban neighbourhoods for boys and suburban neighbourhoods for girls.

Conclusion

Our findings highlight the importance of independent mobility to multiple characteristics of children's PA behaviour across the week. Furthermore, they emphasize that independent mobility-activity relationships need to be considered by gender and the type of neighbourhood independent mobility is offered in. Future work will focus on developing a predictive model of CIM that could be used to inform decision-making around alleviating barriers to CIM.",2014-01-22 +30727541,First Report of the Soybean Frogeye Leaf Spot Fungus (Cercospora sojina) Resistant to Quinone Outside Inhibitor Fungicides in North America.,"Quinone outside inhibitor (QoI; also known as strobilurin) fungicides sometimes are applied to soybean (Glycine max) fields to help manage frogeye leaf spot of soybean (caused by Cercospora sojina) in the United States. In August 2010, soybean leaflets exhibiting severe frogeye leaf spot symptoms were collected from a field in Lauderdale County, TN that had been treated twice with pyraclostrobin during that growing season. The field had been planted into soybean annually since at least 2008, and a QoI fungicide had been applied to the field in each of those years. Fifteen single-spore isolates of C. sojina were recovered from the affected soybean leaflets. These isolates were identified as C. sojina based on the observed symptoms on the soybean leaflets and the morphology and size of conidiophores and conidia (3). In addition, DNA was extracted from the cultures, PCR amplification of the small subunit rDNA and internal transcribed spacer (ITS) region was conducted (2), and the resulting PCR product was sequenced at the Keck Biotechnology Center at the University of Illinois, Urbana. The resulting nucleotide sequences were compared with sequences deposited in the nucleotide database ( http://www.ncbi.nlm.nih.gov ) and showed highest homology to sequences of C. sojina. The isolates were tested for their sensitivity to technical-grade formulations of the QoI fungicides azoxystrobin, pyraclostrobin, and trifloxystrobin with an in vitro conidial germination assay with fungicide + salicylhydroxamic acid (SHAM)-amended potato dextrose agar as described by Bradley and Pedersen (1). The effective concentration at which 50% conidial germination was inhibited (EC50) was determined for all 15 C. sojina isolates, with mean values of 3.1644 (2.7826 to 4.5409), 0.3297 (0.2818 to 0.6404), and 0.8573 (0.3665 to 2.5119) μg/ml for azoxystrobin, pyraclostrobin, and trifloxystrobin, respectively. When compared with previously established mean EC50 values of C. sojina baseline isolates (4), EC50 values of the C. sojina isolates collected from the Lauderdale County, TN soybean field were approximately 249- to 7,144-fold greater than the EC50 values of the baseline isolates. These results indicate that all isolates recovered from the Lauderdale County, TN soybean field were highly resistant to QoI fungicides. To our knowledge, this is the first report of QoI fungicide resistance occurring in C. sojina, and surveys for additional QoI fungicide-resistant C. sojina isolates are needed to determine their prevalence and geographic distribution. In light of these findings, soybean growers in Tennessee and adjacent states should consider utilizing alternative frogeye leaf spot management practices such as planting resistant cultivars, rotating to nonhost crops, and tilling affected soybean residue (3). References: (1) C. A. Bradley and D. K. Pedersen. Plant Dis. 95:189, 2011. (2) N. S. Lord et al. FEMS Microbiol. Ecol. 42:327, 2002. (3) D. V. Phillips. Page 20 in: Compendium of Soybean Diseases. 4th ed. G. L. Hartman et al., eds. The American Phytopathological Society, St. Paul, MN, 1999. (4) G. Zhang et al. Phytopathology (Abstr.) 100(suppl.):S145, 2010.",2012-05-01 +30727536,First Report of Leaf Spot Caused by Alternaria alternata on Switchgrass in Tennessee.,"Field-grown seedlings of 'Alamo' switchgrass (Panicum virgatum L.) from Vonore, TN exhibited light brown-to-dark brown leaf spots and general chlorosis in June 2009. Symptomatic leaf tissue was surface sterilized (95% ethanol for 1 min, 20% commercial bleach for 3 min, and 95% ethanol for 1 min), air dried on sterile filter paper, and plated on 2% water agar amended with 10 mg/liter rifampicin (Sigma-Aldrich, St. Louis, MO) and 5 μl/liter miticide (2.4 EC Danitol, Valent Chemical, Walnut Creek, CA). Plates were incubated at 26°C for 4 days in darkness. An asexual, dematiaceous mitosporic fungus was isolated and transferred to potato dextrose agar. Cultures were transferred to Alternaria sporulation medium (3) to induce conidial production. Club-shaped conidia were produced in chains with branching of chains present. Conidia were 27 to 50 × 10 to 15 μm, with an average of 42.5 × 12.5 μm. Morphological features and growth on dichloran rose bengal yeast extract sucrose agar were consistent with characteristics described previously for Alternaria alternata (1). Pathogenicity studies were conducted with 5-week-old 'Alamo' switchgrass plants grown from surface-sterilized seed. Nine pots with approximately 20 plants each were prepared. Plants were wounded by trimming the tops. Eight replicate pots were sprayed with a conidial spore suspension of 5.0 × 106 spores/ml sterile water and subjected to high humidity by enclosure in a plastic bag for 7 days. One pot was sprayed with sterile water and subjected to the same conditions to serve as a control. Plants were maintained in a growth chamber at 25/20°C with a 12-h photoperiod. Foliar leaf spot symptoms appeared 5 to 10 days postinoculation for all replicate pots inoculated with A. alternata. Symptoms of A. alternata infection were not observed on the control. Lesions were excised, surface sterilized, plated on water agar, and identified in the same manner as previously described. The internal transcribed spacer (ITS) region of ribosomal DNA and the mitochondrial small sub-unit region (SSU) from the original isolate and the reisolate recovered from the pathogenicity assay were amplified with PCR, with primer pairs ITS4 and ITS5 and NMS1 and NMS2, respectively. Resultant DNA fragments were sequenced and submitted to GenBank (Accession Nos. HQ130485.1 and HQ130486.1). A BLAST search (BLASTn, NCBI) was run against GenBank isolates. The ITS region sequences were 537 bp and matched 100% max identity with eight A. alternata isolates, including GenBank Accession No. AB470838. The SSU sequences were 551 bp and matched 100% max identity with seven A. alternata isolates, including GenBank Accession No. AF229648. A. alternata has been reported from switchgrass in Iowa and Oklahoma (2); however, this is the first report of A. alternata causing leaf spot on switchgrass in Tennessee. Switchgrass is being studied in several countries as a potentially important biofuel source, but understanding of the scope of its key diseases is limited. References: (1) B. Andersen et al. Mycol. Res. 105:291, 2001. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , September 22, 2011. (3) E. A. Shahin and J. F. Shepard. Phytopathology 69:618, 1979.",2012-05-01 +22579257,Knowledge-based potential for positioning membrane-associated structures and assessing residue-specific energetic contributions.,"The complex hydrophobic and hydrophilic milieus of membrane-associated proteins pose experimental and theoretical challenges to their understanding. Here, we produce a nonredundant database to compute knowledge-based asymmetric cross-membrane potentials from the per-residue distributions of C(β), C(γ) and functional group atoms. We predict transmembrane and peripherally associated regions from genomic sequence and position peptides and protein structures relative to the bilayer (available at http://www.degradolab.org/ez). The pseudo-energy topological landscapes underscore positional stability and functional mechanisms demonstrated here for antimicrobial peptides, transmembrane proteins, and viral fusion proteins. Moreover, experimental effects of point mutations on the relative ratio changes of dual-topology proteins are quantitatively reproduced. The functional group potential and the membrane-exposed residues display the largest energetic changes enabling to detect native-like structures from decoys. Hence, focusing on the uniqueness of membrane-associated proteins and peptides, we quantitatively parameterize their cross-membrane propensity, thus facilitating structural refinement, characterization, prediction, and design.",2012-05-01 +21932104,Assessment tools and classification systems used for the upper extremity in children with cerebral palsy.,"

Background

Clinicians interested in assessment and outcome measurement of upper extremity (UE) function and performance in children with cerebral palsy (CP) must choose from a wide range of tools.

Questions/purposes

We systematically reviewed the literature for UE assessment and classification tools for children with CP to compare instrument content, methodology, and clinical use.

Methods

We searched Health and Psychosocial Instruments (HaPI), US National Library of Medicine (PubMed), and Cumulative Index to Nursing and Allied Health Literature (CINAHL Plus) databases (1937 to the present) to identify UE assessment and outcomes tools. We identified 21 tools for further analysis and searched HaPI, PubMed, CINAHL Plus, and Google Scholar ( http://scholar.google.com/schhp?tab=ws ) databases to identify all validity and reliability studies, systematic reviews, and original references for each of the 21 tools.

Results

The tools identified covered ages birth to adulthood. International Classification of Functioning, Disability and Health domains addressed by these tools included body function, body structure, activities and participation, and environmental factors. Eleven of the tools were patient or family report, seven were clinician-based observations, and three tools could be used in either fashion. All of the tools had published evidence of validity. Nine of the tools were specifically designed for use in subjects with CP. Two of the tools required formal certification before use. Ten of the tools were provided free of charge by the investigators or institution who developed them.

Conclusions

Familiarity with the psychometric and clinometric properties of assessment and classification tools for the UE in children with CP greatly enhances a clinician's ability to select and use these tools in daily clinical practice for both clinical decision-making and assessment of outcome.",2012-05-01 +22289055,Pharmacology and functions of receptors for vasoactive intestinal peptide and pituitary adenylate cyclase-activating polypeptide: IUPHAR review 1.,"Vasoactive intestinal peptide (VIP) and pituitary adenylate cyclase-activating polypeptide (PACAP) are members of a superfamily of structurally related peptide hormones that includes glucagon, glucagon-like peptides, secretin, gastric inhibitory peptide (GIP) and growth hormone-releasing hormone (GHRH). VIP and PACAP exert their actions through three GPCRs - PAC(1) , VPAC(1) and VPAC(2) - belonging to class B (also referred to as class II, or secretin receptor-like GPCRs). This family comprises receptors for all peptides structurally related to VIP and PACAP, and also receptors for parathyroid hormone, corticotropin-releasing factor, calcitonin and related peptides. PAC(1) receptors are selective for PACAP, whereas VPAC(1) and VPAC(2) respond to both VIP and PACAP with high affinity. VIP and PACAP play diverse and important roles in the CNS, with functions in the control of circadian rhythms, learning and memory, anxiety and responses to stress and brain injury. Recent genetic studies also implicate the VPAC(2) receptor in susceptibility to schizophrenia and the PAC(1) receptor in post-traumatic stress disorder. In the periphery, VIP and PACAP play important roles in the control of immunity and inflammation, the control of pancreatic insulin secretion, the release of catecholamines from the adrenal medulla and as co-transmitters in autonomic and sensory neurons. This article, written by members of the International Union of Basic and Clinical Pharmacology Committee on Receptor Nomenclature and Drug Classification (NC-IUPHAR) subcommittee on receptors for VIP and PACAP, confirms the existing nomenclature for these receptors and reviews our current understanding of their structure, pharmacology and functions and their likely physiological roles in health and disease. More detailed information has been incorporated into newly revised pages in the IUPHAR database (http://www.iuphar-db.org/DATABASE/FamilyMenuForward?familyId=67).",2012-05-01 +22549903,Template-based structure prediction and classification of transcription factors in Arabidopsis thaliana.,"Transcription factors (TFs) play important roles in plants. However, there is no systematic study of their structures and functions of most TFs in plants. Here, we performed template-based structure prediction for all TFs in Arabidopsis thaliana, with their full-length sequences as well as C-terminal and N-terminal regions. A total of 2918 model structures were obtained with a high confidence score. We find that TF families employ only a smaller number of templates for DNA-binding domains (DBD) but a diverse number of templates for transcription regulatory domains (TRD). Although TF families are classified according to DBD, their sizes have a significant correlation with the number of unique non-DNA-binding templates employed in the family (Pearson correlation coefficient of 0.74). That is, the size of TF family is related to its functional diversity. Network analysis reveals new connections between TF families based on shared TRD or DBD templates; 81% TF families share DBD and 67% share TRD templates. Two large fully connected family clusters in this network are observed along with 69 island families. In addition, 25 genes with unknown functions are found to be DNA-binding and/or TF factors according to predicted structures. This work provides a global view of the classification of TFs based on their DBD or TRD templates, and hence, a deeper understanding of DNA-binding and regulatory functions from structural perspective. All structural models of TFs are deposited in the online database for public usage at http://sysbio.unl.edu/AthTF.",2012-05-01 +30727550,First Report of Colletotrichum coccodes Causing Leaf and Neck Anthracnose on Onions (Allium cepa) in Michigan and the United States.,"In July of 2010, dry, oval lesions, each with a salmon-colored center and bleached overall appearance, were observed on the leaves and neck of onions plants growing in production fields of Newaygo, Ottawa, Kent, and Ionia counties, Michigan. Acervuli and setae that are characteristic of Colletotrichum spp. were observed with a dissecting microscope, and elliptical conidia (8 to 23 × 3 to 12 μm) with attenuated ends were observed with a compound microscope. Symptomatic tissues were excised and cultured onto potato dextrose agar amended with 30 and 100 ppm of rifampicin and ampicillin, respectively. The cultures produced pale salmon-colored sporulation after growing for 5 days at 22 ± 2°C and black microsclerotia after 2 weeks. Six isolates were confirmed as C. coccodes based on sequence analysis of the internal transcribed (ITS) region of the ribosomal DNA and a 1-kb intron of the glutamine synthase gene (GS) (2). Sequences were submitted to GenBank (Accession Nos. JQ682644 and JQ682645 for ITS and GS, respectively). Pathogenicity tests were conducted on two- to three-leaved 'Stanley' and 'Cortland' onion seedlings. Prior to inoculation, seedlings were enclosed in clear plastic bags overnight to provide high relative humidity. The bags were removed, and seedlings were sprayed inoculated with a C. coccodes conidial suspension (5 × 105 conidia/ml and 25 ml/plant) in sterile double-distilled water. Control seedlings were sprayed with sterile double-distilled water. Tween (0.01%) was added to the conidial suspension and the water. Plants were enclosed in bags for 72 h postinoculation and incubated in growth chambers at 28°C day/23°C night with a 12-h photoperiod. Sunken, oval lesions were observed on the foliage of the onion seedlings inoculated with C. coccodes 4 days postinoculation. Lesions coalesced and foliage collapsed 7 days postinoculation. Control plants remained asymptomatic. When five leaf samples per replication were detached and incubated in a moist chamber for 3 days at 21 ± 2°C, abundant acervuli and setae were observed on the symptomatic tissue but not on control tissue. C. coccodes was consistently recovered from the onion seedling lesions. Six different Colletotrichum spp. have been reported to cause diseases on onions worldwide (1,4). C. circinans, which causes smudge, is an occasional onion pathogen in Michigan, while C. gloeosporioides has only been reported to be infecting onions in Georgia (3). To our knowledge, this is the first report of C. coccodes infecting and causing disease in onions plants. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , August 6, 2010. (2) J. C. Guerber et al. Mycologia 95:872. 2003. (3) C. Nischwitz et al. Plant Dis. 92:974. 2008. (4) H. F. Schwartz, and K. S. Mohan. Compendium of Onion and Garlic Diseases and Pests, 2nd ed. The American Phytopathological Society, St. Paul, MN. 1995.",2012-05-01 +22589098,Prediction and comparison of Salmonella-human and Salmonella-Arabidopsis interactomes.,"Salmonellosis caused by Salmonella bacteria is a food-borne disease and a worldwide health threat causing millions of infections and thousands of deaths every year. This pathogen infects an unusually broad range of host organisms including human and plants. A better understanding of the mechanisms of communication between Salmonella and its hosts requires identifying the interactions between Salmonella and host proteins. Protein-protein interactions (PPIs) are the fundamental building blocks of communication. Here, we utilize the prediction platform BIANA to obtain the putative Salmonella-human and Salmonella-Arabidopsis interactomes based on sequence and domain similarity to known PPIs. A gold standard list of Salmonella-host PPIs served to validate the quality of the human model. 24,726 and 10,926 PPIs comprising interactions between 38 and 33 Salmonella effectors and virulence factors with 9,740 human and 4,676 Arabidopsis proteins, respectively, were predicted. Putative hub proteins could be identified, and parallels between the two interactomes were discovered. This approach can provide insight into possible biological functions of so far uncharacterized proteins. The predicted interactions are available via a web interface which allows filtering of the database according to parameters provided by the user to narrow down the list of suspected interactions. The interactions are available via a web interface at http://sbi.imim.es/web/SHIPREC.php.",2012-05-01 +22548871,Fitting hidden Markov models of protein domains to a target species: application to Plasmodium falciparum.,"

Background

Hidden Markov Models (HMMs) are a powerful tool for protein domain identification. The Pfam database notably provides a large collection of HMMs which are widely used for the annotation of proteins in new sequenced organisms. In Pfam, each domain family is represented by a curated multiple sequence alignment from which a profile HMM is built. In spite of their high specificity, HMMs may lack sensitivity when searching for domains in divergent organisms. This is particularly the case for species with a biased amino-acid composition, such as P. falciparum, the main causal agent of human malaria. In this context, fitting HMMs to the specificities of the target proteome can help identify additional domains.

Results

Using P. falciparum as an example, we compare approaches that have been proposed for this problem, and present two alternative methods. Because previous attempts strongly rely on known domain occurrences in the target species or its close relatives, they mainly improve the detection of domains which belong to already identified families. Our methods learn global correction rules that adjust amino-acid distributions associated with the match states of HMMs. These rules are applied to all match states of the whole HMM library, thus enabling the detection of domains from previously absent families. Additionally, we propose a procedure to estimate the proportion of false positives among the newly discovered domains. Starting with the Pfam standard library, we build several new libraries with the different HMM-fitting approaches. These libraries are first used to detect new domain occurrences with low E-values. Second, by applying the Co-Occurrence Domain Discovery (CODD) procedure we have recently proposed, the libraries are further used to identify likely occurrences among potential domains with higher E-values.

Conclusion

We show that the new approaches allow identification of several domain families previously absent in the P. falciparum proteome and the Apicomplexa phylum, and identify many domains that are not detected by previous approaches. In terms of the number of new discovered domains, the new approaches outperform the previous ones when no close species are available or when they are used to identify likely occurrences among potential domains with high E-values. All predictions on P. falciparum have been integrated into a dedicated website which pools all known/new annotations of protein domains and functions for this organism. A software implementing the two proposed approaches is available at the same address: http://www.lirmm.fr/~terrapon/HMMfit/",2012-05-01 +22562758,Fourier transform based scalable image quality measure.,"We present a new image quality assessment (IQA) algorithm based on the phase and magnitude of the 2D (twodimensional) Discrete Fourier Transform (DFT). The basic idea is to compare the phase and magnitude of the reference and distorted images to compute the quality score. However, it is well known that the Human Visual Systems (HVSs) sensitivity to different frequency components is not the same. We accommodate this fact via a simple yet effective strategy of nonuniform binning of the frequency components. This process also leads to reduced space representation of the image thereby enabling the reduced-reference (RR) prospects of the proposed scheme. We employ linear regression to integrate the effects of the changes in phase and magnitude. In this way, the required weights are determined via proper training and hence more convincing and effective. Lastly, using the fact that phase usually conveys more information than magnitude, we use only the phase for RR quality assessment. This provides the crucial advantage of further reduction in the required amount of reference image information. The proposed method is therefore further scalable for RR scenarios. We report extensive experimental results using a total of 9 publicly available databases: 7 image (with a total of 3832 distorted images with diverse distortions) and 2 video databases (totally 228 distorted videos). These show that the proposed method is overall better than several of the existing fullreference (FR) algorithms and two RR algorithms. Additionally, there is a graceful degradation in prediction performance as the amount of reference image information is reduced thereby confirming its scalability prospects. To enable comparisons and future study, a Matlab implementation of the proposed algorithm is available at http://www.ntu.edu.sg/home/wslin/reduced_phase.rar.",2012-05-01 +23276146,"The ectopic expression of BRCA1 is associated with genesis, progression, and prognosis of breast cancer in young patients.","

Objective

The study is to explore the histopathological features and the molecular marker expression of young women with breast cancers.

Methods

The pathological data of 367 cases of female breast cancer patients were retrospectively analyzed, focusing on the analysis of young breast cancer incidence trends and the clinical and pathological features.

Results

Compared with elderly breast cancer patients, young women with breast cancers had larger tumor sizes, higher histological grades, and lymph node metastasis rates. The majority of patients were in the PTNM III stage, with the clinical and pathological features of strong invasiveness. The positive expression rate of the BRCA1 protein in the young group was higher than that in the old group. BRCA1 expression was positively correlated with the PTNM stage and axillary lymph node metastasis (P < 0.05).

Conclusions

The ectopic expression of BRCA1 is associated with the genesis, progression, and prognosis of young breast cancer patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1628000054838044.",2012-12-31 +24565134,cGRNB: a web server for building combinatorial gene regulatory networks through integrated engineering of seed-matching sequence information and gene expression datasets.,"

Background

We are witnessing rapid progress in the development of methodologies for building the combinatorial gene regulatory networks involving both TFs (Transcription Factors) and miRNAs (microRNAs). There are a few tools available to do these jobs but most of them are not easy to use and not accessible online. A web server is especially needed in order to allow users to upload experimental expression datasets and build combinatorial regulatory networks corresponding to their particular contexts.

Methods

In this work, we compiled putative TF-gene, miRNA-gene and TF-miRNA regulatory relationships from forward-engineering pipelines and curated them as built-in data libraries. We streamlined the R codes of our two separate forward-and-reverse engineering algorithms for combinatorial gene regulatory network construction and formalized them as two major functional modules. As a result, we released the cGRNB (combinatorial Gene Regulatory Networks Builder): a web server for constructing combinatorial gene regulatory networks through integrated engineering of seed-matching sequence information and gene expression datasets. The cGRNB enables two major network-building modules, one for MPGE (miRNA-perturbed gene expression) datasets and the other for parallel miRNA/mRNA expression datasets. A miRNA-centered two-layer combinatorial regulatory cascade is the output of the first module and a comprehensive genome-wide network involving all three types of combinatorial regulations (TF-gene, TF-miRNA, and miRNA-gene) are the output of the second module.

Conclusions

In this article we propose cGRNB, a web server for building combinatorial gene regulatory networks through integrated engineering of seed-matching sequence information and gene expression datasets. Since parallel miRNA/mRNA expression datasets are rapidly accumulated by the advance of next-generation sequencing techniques, cGRNB will be very useful tool for researchers to build combinatorial gene regulatory networks based on expression datasets. The cGRNB web-server is free and available online at http://www.scbit.org/cgrnb.",2013-10-14 +22829626,Folding RNA/DNA hybrid duplexes.,"

Motivation

While there are numerous programs that can predict RNA or DNA secondary structures, a program that predicts RNA/DNA hetero-dimers is still missing. The lack of easy to use tools for predicting their structure may be in part responsible for the small number of reports of biologically relevant RNA/DNA hetero-dimers.

Results

We present here an extension to the widely used ViennaRNA Package (Lorenz et al., 2011) for the prediction of the structure of RNA/DNA hetero-dimers.

Availability

http://www.tbi.univie.ac.at/~ronny/RNA/vrna2.html

Contact

ronny@tbi.univie.ac.at, berni@bioinf.uni-leipzig.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-24 +22547615,Systematic analysis of metallo-β-lactamases using an automated database.,"Metallo-β-lactamases (MBLs) are enzymes that hydrolyze β-lactam antibiotics, resulting in bacterial resistance to these drugs. These proteins have caused concerns due to their facile transference, broad substrate spectra, and the absence of clinically useful inhibitors. To facilitate the classification, nomenclature, and analysis of MBLs, an automated database system was developed, the Metallo-β-Lactamase Engineering Database (MBLED) (http://www.mbled.uni-stuttgart.de). It contains information on MBLs retrieved from the NCBI peptide database while strictly following the nomenclature by Jacoby and Bush (http://www.lahey.org/Studies/) and the generally accepted class B β-lactamase (BBL) standard numbering scheme for MBLs. The database comprises 597 MBL protein sequences and enables systematic analyses of these sequences. A systematic analysis employing the database resulted in the generation of mutation profiles of assigned IMP- and VIM-type MBLs, the identification of five MBL protein entries from the NCBI peptide database that were inconsistent with the Jacoby and Bush nomenclature, and the identification of 15 new IMP candidates and 9 new VIM candidates. Furthermore, the database was used to identify residues with high mutation frequencies and variability (mutation hot spots) that were unexpectedly distant from the active site located in the ββ sandwich: positions 208 and 266 in the IMP family and positions 215 and 258 in the VIM family. We expect that the MBLED will be a valuable tool for systematically cataloguing and analyzing the increasing number of MBLs being reported.",2012-04-30 +21586587,ChIP-Array: combinatory analysis of ChIP-seq/chip and microarray gene expression data to discover direct/indirect targets of a transcription factor.,"Chromatin immunoprecipitation (ChIP) coupled with high-throughput techniques (ChIP-X), such as next generation sequencing (ChIP-Seq) and microarray (ChIP-chip), has been successfully used to map active transcription factor binding sites (TFBS) of a transcription factor (TF). The targeted genes can be activated or suppressed by the TF, or are unresponsive to the TF. Microarray technology has been used to measure the actual expression changes of thousands of genes under the perturbation of a TF, but is unable to determine if the affected genes are direct or indirect targets of the TF. Furthermore, both ChIP-X and microarray methods produce a large number of false positives. Combining microarray expression profiling and ChIP-X data allows more effective TFBS analysis for studying the function of a TF. However, current web servers only provide tools to analyze either ChIP-X or expression data, but not both. Here, we present ChIP-Array, a web server that integrates ChIP-X and expression data from human, mouse, yeast, fruit fly and Arabidopsis. This server will assist biologists to detect direct and indirect target genes regulated by a TF of interest and to aid in the functional characterization of the TF. ChIP-Array is available at http://jjwanglab.hku.hk/ChIP-Array, with free access to academic users.",2011-05-17 +23603332,APP2: automatic tracing of 3D neuron morphology based on hierarchical pruning of a gray-weighted image distance-tree.,"

Motivation

Tracing of neuron morphology is an essential technique in computational neuroscience. However, despite a number of existing methods, few open-source techniques are completely or sufficiently automated and at the same time are able to generate robust results for real 3D microscopy images.

Results

We developed all-path-pruning 2.0 (APP2) for 3D neuron tracing. The most important idea is to prune an initial reconstruction tree of a neuron's morphology using a long-segment-first hierarchical procedure instead of the original termini-first-search process in APP. To further enhance the robustness of APP2, we compute the distance transform of all image voxels directly for a gray-scale image, without the need to binarize the image before invoking the conventional distance transform. We also design a fast-marching algorithm-based method to compute the initial reconstruction trees without pre-computing a large graph. This method allows us to trace large images. We bench-tested APP2 on ~700 3D microscopic images and found that APP2 can generate more satisfactory results in most cases than several previous methods.

Availability

The software has been implemented as an open-source Vaa3D plugin. The source code is available in the Vaa3D code repository http://vaa3d.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-19 +21769991,A whole brain fMRI atlas generated via spatially constrained spectral clustering.,"Connectivity analyses and computational modeling of human brain function from fMRI data frequently require the specification of regions of interests (ROIs). Several analyses have relied on atlases derived from anatomical or cyto-architectonic boundaries to specify these ROIs, yet the suitability of atlases for resting state functional connectivity (FC) studies has yet to be established. This article introduces a data-driven method for generating an ROI atlas by parcellating whole brain resting-state fMRI data into spatially coherent regions of homogeneous FC. Several clustering statistics are used to compare methodological trade-offs as well as determine an adequate number of clusters. Additionally, we evaluate the suitability of the parcellation atlas against four ROI atlases (Talairach and Tournoux, Harvard-Oxford, Eickoff-Zilles, and Automatic Anatomical Labeling) and a random parcellation approach. The evaluated anatomical atlases exhibit poor ROI homogeneity and do not accurately reproduce FC patterns present at the voxel scale. In general, the proposed functional and random parcellations perform equivalently for most of the metrics evaluated. ROI size and hence the number of ROIs in a parcellation had the greatest impact on their suitability for FC analysis. With 200 or fewer ROIs, the resulting parcellations consist of ROIs with anatomic homology, and thus offer increased interpretability. Parcellation results containing higher numbers of ROIs (600 or 1,000) most accurately represent FC patterns present at the voxel scale and are preferable when interpretability can be sacrificed for accuracy. The resulting atlases and clustering software have been made publicly available at: http://www.nitrc.org/projects/cluster_roi/.",2011-07-18 +22558086,"Enchytraeus albidus microarray: enrichment, design, annotation and database (EnchyBASE).","Enchytraeus albidus (Oligochaeta) is an ecologically relevant species used as standard test organisms for risk assessment. Effects of stressors in this species are commonly determined at the population level using reproduction and survival as endpoints. The assessment of transcriptomic responses can be very useful e.g. to understand underlying mechanisms of toxicity with gene expression fingerprinting. In the present paper the following is being addressed: 1) development of suppressive subtractive hybridization (SSH) libraries enriched for differentially expressed genes after metal and pesticide exposures; 2) sequencing and characterization of all generated cDNA inserts; 3) development of a publicly available genomic database on E. albidus. A total of 2100 Expressed Sequence Tags (ESTs) were isolated, sequenced and assembled into 1124 clusters (947 singletons and 177 contigs). From these sequences, 41% matched known proteins in GenBank (BLASTX, e-value ≤ 10(-5)) and 37% had at least one Gene Ontology (GO) term assigned. In total, 5.5% of the sequences were assigned to a metabolic pathway, based on KEGG. With this new sequencing information, an Agilent custom oligonucleotide microarray was designed, representing a potential tool for transcriptomic studies. EnchyBASE (http://bioinformatics.ua.pt/enchybase/) was developed as a web freely available database containing genomic information on E. albidus and will be further extended in the near future for other enchytraeid species. The database so far includes all ESTs generated for E. albidus from three cDNA libraries. This information can be downloaded and applied in functional genomics and transcription studies.",2012-04-27 +22544604,Improvements in the Protein Identifier Cross-Reference service.,"The Protein Identifier Cross-Reference (PICR) service is a tool that allows users to map protein identifiers, protein sequences and gene identifiers across over 100 different source databases. PICR takes input through an interactive website as well as Representational State Transfer (REST) and Simple Object Access Protocol (SOAP) services. It returns the results as HTML pages, XLS and CSV files. It has been in production since 2007 and has been recently enhanced to add new functionality and increase the number of databases it covers. Protein subsequences can be Basic Local Alignment Search Tool (BLAST) against the UniProt Knowledgebase (UniProtKB) to provide an entry point to the standard PICR mapping algorithm. In addition, gene identifiers from UniProtKB and Ensembl can now be submitted as input or mapped to as output from PICR. We have also implemented a 'best-guess' mapping algorithm for UniProt. In this article, we describe the usefulness of PICR, how these changes have been implemented, and the corresponding additions to the web services. Finally, we explain that the number of source databases covered by PICR has increased from the initial 73 to the current 102. New resources include several new species-specific Ensembl databases as well as the Ensembl Genome ones. PICR can be accessed at http://www.ebi.ac.uk/Tools/picr/.",2012-04-27 +22543972,"Small rare recurrent deletions and reciprocal duplications in 2q21.1, including brain-specific ARHGEF4 and GPR148.","We have identified a rare small (~450 kb unique sequence) recurrent deletion in a previously linked attention-deficit hyperactivity disorder (ADHD) locus at 2q21.1 in five unrelated families with developmental delay (DD)/intellectual disability (ID), ADHD, epilepsy and other neurobehavioral abnormalities from 17 035 samples referred for clinical chromosomal microarray analysis. Additionally, a DECIPHER (http://decipher.sanger.ac.uk) patient 2311 was found to have the same deletion and presented with aggressive behavior. The deletion was not found in either six control groups consisting of 13 999 healthy individuals or in the DGV database. We have also identified reciprocal duplications in five unrelated families with autism, developmental delay (DD), seizures and ADHD. This genomic region is flanked by large, complex low-copy repeats (LCRs) with directly oriented subunits of ~109 kb in size that have 97.7% DNA sequence identity. We sequenced the deletion breakpoints within the directly oriented paralogous subunits of the flanking LCR clusters, demonstrating non-allelic homologous recombination as a mechanism of formation. The rearranged segment harbors five genes: GPR148, FAM123C, ARHGEF4, FAM168B and PLEKHB2. Expression of ARHGEF4 (Rho guanine nucleotide exchange factor 4) is restricted to the brain and may regulate the actin cytoskeletal network, cell morphology and migration, and neuronal function. GPR148 encodes a G-protein-coupled receptor protein expressed in the brain and testes. We suggest that small rare recurrent deletion of 2q21.1 is pathogenic for DD/ID, ADHD, epilepsy and other neurobehavioral abnormalities and, because of its small size, low frequency and more severe phenotype might have been missed in other previous genome-wide screening studies using single-nucleotide polymorphism analyses.",2012-04-27 +22778902,Disintegrins from hematophagous sources.,"Bloodsucking arthropods are a rich source of salivary molecules (sialogenins) which inhibit platelet aggregation, neutrophil function and angiogenesis. Here we review the literature on salivary disintegrins and their targets. Disintegrins were first discovered in snake venoms, and were instrumental in our understanding of integrin function and also for the development of anti-thrombotic drugs. In hematophagous animals, most disintegrins described so far have been discovered in the salivary gland of ticks and leeches. A limited number have also been found in hookworms and horseflies, and none identified in mosquitoes or sand flies. The vast majority of salivary disintegrins reported display a RGD motif and were described as platelet aggregation inhibitors, and few others as negative modulator of neutrophil or endothelial cell functions. This notably low number of reported disintegrins is certainly an underestimation of the actual complexity of this family of proteins in hematophagous secretions. Therefore an algorithm was created in order to identify the tripeptide motifs RGD, KGD, VGD, MLD, KTS, RTS, WGD, or RED (flanked by cysteines) in sialogenins deposited in GenBank database. The search included sequences from various blood-sucking animals such as ticks (e.g., Ixodes sp., Argas sp., Rhipicephalus sp., Amblyommasp.), tabanids (e.g., Tabanus sp.), bugs (e.g., Triatoma sp., Rhodnius prolixus), mosquitoes (e.g., Anopheles sp., Aedes sp., Culex sp.), sand flies (e.g., Lutzomyia sp., Phlebotomus sp.), leeches (e.g., Macrobdella sp., Placobdella sp.) and worms (e.g., Ancylostoma sp.). This approach allowed the identification of a remarkably high number of novel putative sialogenins with tripeptide motifs typical of disintegrins (>450 sequences) whose biological activity remains to be verified. This database is accessible online as a hyperlinked worksheet and displays biochemical, taxonomic, and gene ontology aspects for each putative disintegrin. It is also freely available for download (right click with the mouse) at links http://exon.niaid.nih.gov/transcriptome/RGD/RGD-Peps-WEB.xlsx (web version) and http://exon.niaid.nih.gov/transcriptome/RGD/RGD-sialogenins.zip (stand alone version).",2012-04-26 +22539671,DecoyFinder: an easy-to-use python GUI application for building target-specific decoy sets.,"

Unlabelled

Decoys are molecules that are presumed to be inactive against a target (i.e. will not likely bind to the target) and are used to validate the performance of molecular docking or a virtual screening workflow. The Directory of Useful Decoys database (http://dud.docking.org/) provides a free directory of decoys for use in virtual screening, though it only contains a limited set of decoys for 40 targets.To overcome this limitation, we have developed an application called DecoyFinder that selects, for a given collection of active ligands of a target, a set of decoys from a database of compounds. Decoys are selected if they are similar to active ligands according to five physical descriptors (molecular weight, number of rotational bonds, total hydrogen bond donors, total hydrogen bond acceptors and the octanol-water partition coefficient) without being chemically similar to any of the active ligands used as an input (according to the Tanimoto coefficient between MACCS fingerprints). To the best of our knowledge, DecoyFinder is the first application designed to build target-specific decoy sets.

Availability

A complete description of the software is included on the application home page. A validation of DecoyFinder on 10 DUD targets is provided as Supplementary Table S1. DecoyFinder is freely available at http://URVnutrigenomica-CTNS.github.com/DecoyFinder.",2012-04-26 +22539672,e-Drug3D: 3D structure collections dedicated to drug repurposing and fragment-based drug design.,"

Motivation

In the drug discovery field, new uses for old drugs, selective optimization of side activities and fragment-based drug design (FBDD) have proved to be successful alternatives to high-throughput screening. e-Drug3D is a database of 3D chemical structures of drugs that provides several collections of ready-to-screen SD files of drugs and commercial drug fragments. They are natural inputs in studies dedicated to drug repurposing and FBDD.

Availability

e-Drug3D collections are freely available at http://chemoinfo.ipmc.cnrs.fr/e-drug3d.html either for download or for direct in silico web-based screenings.",2012-04-26 +21757467,Elemental composition determination based on MS(n).,"

Motivation

Identification of metabolites is essential for its use as biomarkers, for research in systems biology and for drug discovery. The first step before a structure can be elucidated is to determine its elemental composition. High-resolution mass spectrometry, which provides the exact mass, together with common constraint rules, for rejecting false proposed elemental compositions, cannot always provide one unique elemental composition solution.

Results

The Multistage Elemental Formula (MEF) tool is presented in this article to enable the correct assignment of elemental composition to compounds, their fragment ions and neutral losses that originate from the molecular ion by using multistage mass spectrometry (MS(n)). The method provided by MEF reduces the list of predicted elemental compositions for each ion by analyzing the elemental compositions of its parent (precursor ion) and descendants (fragments). MS(n) data of several metabolites were processed using the MEF tool to assign the correct elemental composition and validate the efficacy of the method. Especially, the link between the mass accuracy needed to generate one unique elemental composition and the topology of the MS(n) tree (the width and the depth of the tree) was addressed. This method makes an important step toward semi-automatic de novo identification of metabolites using MS(n) data.

Availability

Software available at: http://abs.lacdr.gorlaeus.net/people/rojas-cherto

Contact

m.rojas@lacdr.leidenuniv.nl; t.reijmers@lacdr.leidenuniv.nl

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-14 +22693214,DBD2BS: connecting a DNA-binding protein with its binding sites.,"By binding to short and highly conserved DNA sequences in genomes, DNA-binding proteins initiate, enhance or repress biological processes. Accurately identifying such binding sites, often represented by position weight matrices (PWMs), is an important step in understanding the control mechanisms of cells. When given coordinates of a DNA-binding domain (DBD) bound with DNA, a potential function can be used to estimate the change of binding affinity after base substitutions, where the changes can be summarized as a PWM. This technique provides an effective alternative when the chromatin immunoprecipitation data are unavailable for PWM inference. To facilitate the procedure of predicting PWMs based on protein-DNA complexes or even structures of the unbound state, the web server, DBD2BS, is presented in this study. The DBD2BS uses an atom-level knowledge-based potential function to predict PWMs characterizing the sequences to which the query DBD structure can bind. For unbound queries, a list of 1066 DBD-DNA complexes (including 1813 protein chains) is compiled for use as templates for synthesizing bound structures. The DBD2BS provides users with an easy-to-use interface for visualizing the PWMs predicted based on different templates and the spatial relationships of the query protein, the DBDs and the DNAs. The DBD2BS is the first attempt to predict PWMs of DBDs from unbound structures rather than from bound ones. This approach increases the number of existing protein structures that can be exploited when analyzing protein-DNA interactions. In a recent study, the authors showed that the kernel adopted by the DBD2BS can generate PWMs consistent with those obtained from the experimental data. The use of DBD2BS to predict PWMs can be incorporated with sequence-based methods to discover binding sites in genome-wide studies. Available at: http://dbd2bs.csie.ntu.edu.tw/, http://dbd2bs.csbb.ntu.edu.tw/, and http://dbd2bs.ee.ncku.edu.tw.",2012-06-11 +24232543,Analysis of factors affecting containment with extracted partial enclosures using computational fluid dynamics.,"The Health and Safety Executive's (HSE's) COSHH Essentials (HSE, 2002, COSHH Essentials: easy steps to control chemicals HSG193. 2nd edn. ISBN 0 71762737 3. Available at http://www.coshh-essentials.org.uk. Accessed 30 October 2013) provides guidance on identifying the approaches required to control exposure to chemicals in the workplace. The control strategies proposed in COSHH Essentials are grouped into four control approaches: general ventilation, engineering control, containment, or to seek specialist advice. We report the use of experimental measurements and computational fluid dynamics (CFD) modelling to examine the performance of an engineering control approach and a containment control approach. The engineering control approach simulated was an extracted partial enclosure, based on the COSHH Essentials G200, for which simulations were compared with data from experiments. The containment approach simulated was of drum filling (in an extracted partial enclosure), based on the COSHH Essentials G305. The influence of the following factors on containment was examined: face velocity, size and location of face opening, and movement and ventilation flows. CFD predictions of the engineering control approach agreed well with the majority of the experimental measurements demonstrating confidence in the modelling approach used. The results show that the velocity distribution at the face of the enclosure is not uniform and the location and size of the opening are significant factors affecting the flow field and hence the containment performance. The simulations of drum filling show the effect on containment of the movement of a drum through the face of an enclosure. Analysis of containment performance, using a tracer, showed that containment was affected by the interaction between the ventilation flow direction and drum movement and spacing. Validated CFD simulations are shown to be a useful tool for gaining insight into the flows in control strategies for exposure control and to aid the interpretation of experimental measurements. The results support the assumption in COSHH Essentials that the use of 'containment' as a control approach is capable of achieving a 100-fold reduction in potential exposure. Novel CFD modelling techniques have been used to create controlled containment scenarios, improve understanding of the flow behaviour in the scenarios, and provide information that may aid future containment design.",2013-11-14 +23749957,Differential network analysis for the identification of condition-specific pathway activity and regulation.,"

Motivation

Identification of differential expressed genes has led to countless new discoveries. However, differentially expressed genes are only a proxy for finding dysregulated pathways. The problem is to identify how the network of regulatory and physical interactions rewires in different conditions or in disease.

Results

We developed a procedure named DINA (DIfferential Network Analysis), which is able to identify set of genes, whose co-regulation is condition-specific, starting from a collection of condition-specific gene expression profiles. DINA is also able to predict which transcription factors (TFs) may be responsible for the pathway condition-specific co-regulation. We derived 30 tissue-specific gene networks in human and identified several metabolic pathways as the most differentially regulated across the tissues. We correctly identified TFs such as Nuclear Receptors as their main regulators and demonstrated that a gene with unknown function (YEATS2) acts as a negative regulator of hepatocyte metabolism. Finally, we showed that DINA can be used to make hypotheses on dysregulated pathways during disease progression. By analyzing gene expression profiles across primary and transformed hepatocytes, DINA identified hepatocarcinoma-specific metabolic and transcriptional pathway dysregulation.

Availability

We implemented an on-line web-tool http://dina.tigem.it enabling the user to apply DINA to identify tissue-specific pathways or gene signatures.

Contact

dibernardo@tigem.it

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-06-06 +23348723,Prediction of mutant mRNA splice isoforms by information theory-based exon definition.,"Mutations that affect mRNA splicing often produce multiple mRNA isoforms, resulting in complex molecular phenotypes. Definition of an exon and its inclusion in mature mRNA relies on joint recognition of both acceptor and donor splice sites. This study predicts cryptic and exon-skipping isoforms in mRNA produced by splicing mutations from the combined information contents (R(i), which measures binding-site strength, in bits) and distribution of the splice sites defining these exons. The total information content of an exon (R(i),total) is the sum of the R(i) values of its acceptor and donor splice sites, adjusted for the self-information of the distance separating these sites, that is, the gap surprisal. Differences between total information contents of an exon (ΔR(i,total)) are predictive of the relative abundance of these exons in distinct processed mRNAs. Constraints on splice site and exon selection are used to eliminate nonconforming and poorly expressed isoforms. Molecular phenotypes are computed by the Automated Splice Site and Exon Definition Analysis (http://splice.uwo.ca) server. Predictions of splicing mutations were highly concordant (85.2%; n = 61) with published expression data. In silico exon definition analysis will contribute to streamlining assessment of abnormal and normal splice isoforms resulting from mutations.",2013-02-21 +22345616,CytoSaddleSum: a functional enrichment analysis plugin for Cytoscape based on sum-of-weights scores.,"CytoSaddleSum provides Cytoscape users with access to the functionality of SaddleSum, a functional enrichment tool based on sum-of-weight scores. It operates by querying SaddleSum locally (using the standalone version) or remotely (through an HTTP request to a web server). The functional enrichment results are shown as a term relationship network, where nodes represent terms and edges show term relationships. Furthermore, query results are written as Cytoscape attributes allowing easy saving, retrieval and integration into network-based data analysis workflows.",2012-02-15 +23164167,Pulmonary haptoglobin (pHp) is part of the surfactant system in the human lung.,"Since the existence of pHp was demonstrated, it has been shown that this molecule and its receptor CD163 are regulated by different stimuli. Furthermore, a comparably fast secretion of pHp was described as well as the immuno-stimulatory effects. The intention of this study was to elucidate the role of pHp in the human lungs further. Here we show, by means of confocal microscopy and immune-electron-microscopy, a clear co-localization of pHp with surfactant protein-B in lamellar bodies of alveolar epithelial cells type II. These results are underlined by immunohistochemical stainings in differently fixed human lung tissues, which show pHp in vesicular and released form. The images of the released form resemble the intended position of surfactant in the human alveolus. pHp is secreted by Alveolar epithelial cells type II as previously shown. Moreover, pHp is co-localized with Surfactant protein-B. We conclude that the presented data shows that pHp is a native part of the surfactant system in the human lung.

Virtual slides

http://www.diagnosticpathology.diagnomx.eu/vs/2563584738239912.",2012-11-20 +22531216,Tachyon search speeds up retrieval of similar sequences by several orders of magnitude.,"

Unlabelled

The usage of current sequence search tools becomes increasingly slower as databases of protein sequences continue to grow exponentially. Tachyon, a new algorithm that identifies closely related protein sequences ~200 times faster than standard BLAST, circumvents this limitation with a reduced database and oligopeptide matching heuristic.

Availability and implementation

The tool is publicly accessible as a webserver at http://tachyon.bii.a-star.edu.sg and can also be accessed programmatically through SOAP.",2012-04-23 +21978489,Medusa: A tool for exploring and clustering biological networks.,"

Background

Biological processes such as metabolic pathways, gene regulation or protein-protein interactions are often represented as graphs in systems biology. The understanding of such networks, their analysis, and their visualization are today important challenges in life sciences. While a great variety of visualization tools that try to address most of these challenges already exists, only few of them succeed to bridge the gap between visualization and network analysis.

Findings

Medusa is a powerful tool for visualization and clustering analysis of large-scale biological networks. It is highly interactive and it supports weighted and unweighted multi-edged directed and undirected graphs. It combines a variety of layouts and clustering methods for comprehensive views and advanced data analysis. Its main purpose is to integrate visualization and analysis of heterogeneous data from different sources into a single network.

Conclusions

Medusa provides a concise visual tool, which is helpful for network analysis and interpretation. Medusa is offered both as a standalone application and as an applet written in Java. It can be found at: https://sites.google.com/site/medusa3visualization.",2011-10-06 +21630449,Four-dimensional visualisation and analysis of protein-protein interaction networks.,"Protein-protein interaction networks are typically built with interactions collated from many experiments. These networks are thus composite and show all interactions that are currently known to occur in a cell. However, these representations are static and ignore the constant changes in protein-protein interactions. Here we present software for the generation and analysis of dynamic, four-dimensional (4-D) protein interaction networks. In this, time-course-derived abundance data are mapped onto three-dimensional networks to generate network movies. These networks can be navigated, manipulated and queried in real time. Two types of dynamic networks can be generated: a 4-D network that maps expression data onto protein nodes and one that employs 'real-time rendering' by which protein nodes and their interactions appear and disappear in association with temporal changes in expression data. We illustrate the utility of this software by the analysis of singlish interface date hub interactions during the yeast cell cycle. In this, we show that proteins MLC1 and YPT52 show strict temporal control of when their interaction partners are expressed. Since these proteins have one and two interaction interfaces, respectively, it suggests that temporal control of gene expression may be used to limit competition at the interaction interfaces of some hub proteins. The software and movies of the 4-D networks are available at http://www.systemsbiology.org.au/downloads_geomi.html.",2011-06-01 +21121054,Visual integration of results from a large DNA biobank (BioVU) using synthesis-view.,"In this paper, we describe using Synthesis-View, a new method of presenting complex genetic data, to revisit results of a study from the BioVU Vanderbilt DNA databank. BioVU is a biorepository of DNA samples coupled with de-identified electronic medical records (EMR). In the Ritchie et al. study ~10,000 BioVU samples were genotyped for 21 SNPs that were previously associated with 5 diseases: atrial fibrillation, Crohn Disease, multiple sclerosis, rheumatoid arthritis, and type 2 diabetes. In the proof-of-concept study, the 21 tests of association replicated previous findings where sample size provided adequate power. The majority of the BioVU results were originally presented in tabular form. Herein we have revisited the results of this study using Synthesis-View. The Synthesis-View software tool visually synthesizes the results of complex, multi-layered studies that aim to characterize associations between small numbers of single-nucleotide polymorphisms (SNPs) and diseases and/or phenotypes, such as the results of replication and meta-analysis studies. Using Synthesis-View with the data of the Ritchie et al. study and presenting these data in this integrated visual format demonstrates new ways to investigate and interpret these kinds of data. Synthesis-View is freely available for non-commercial research institutions, for full details see https://chgr.mc.vanderbilt.edu/synthesisview.",2011-01-01 +23434047,MeRIP-PF: an easy-to-use pipeline for high-resolution peak-finding in MeRIP-Seq data.,"RNA modifications, especially methylation of the N(6) position of adenosine (A)-m(6)A, represent an emerging research frontier in RNA biology. With the rapid development of high-throughput sequencing technology, in-depth study of m(6)A distribution and function relevance becomes feasible. However, a robust method to effectively identify m(6)A-modified regions has not been available yet. Here, we present a novel high-efficiency and user-friendly analysis pipeline called MeRIP-PF for the signal identification of MeRIP-Seq data in reference to controls. MeRIP-PF provides a statistical P-value for each identified m(6)A region based on the difference of read distribution when compared to the controls and also calculates false discovery rate (FDR) as a cut off to differentiate reliable m(6)A regions from the background. Furthermore, MeRIP-PF also achieves gene annotation of m(6)A signals or peaks and produce outputs in both XLS and graphical format, which are useful for further study. MeRIP-PF is implemented in Perl and is freely available at http://software.big.ac.cn/MeRIP-PF.html.",2013-01-20 +21445301,GOBO: gene expression-based outcome for breast cancer online.,"Microarray-based gene expression analysis holds promise of improving prognostication and treatment decisions for breast cancer patients. However, the heterogeneity of breast cancer emphasizes the need for validation of prognostic gene signatures in larger sample sets stratified into relevant subgroups. Here, we describe a multifunctional user-friendly online tool, GOBO (http://co.bmc.lu.se/gobo), allowing a range of different analyses to be performed in an 1881-sample breast tumor data set, and a 51-sample breast cancer cell line set, both generated on Affymetrix U133A microarrays. GOBO supports a wide range of applications including: 1) rapid assessment of gene expression levels in subgroups of breast tumors and cell lines, 2) identification of co-expressed genes for creation of potential metagenes, 3) association with outcome for gene expression levels of single genes, sets of genes, or gene signatures in multiple subgroups of the 1881-sample breast cancer data set. The design and implementation of GOBO facilitate easy incorporation of additional query functions and applications, as well as additional data sets irrespective of tumor type and array platform.",2011-03-21 +22546559,Identification of mirtrons in rice using MirtronPred: a tool for predicting plant mirtrons.,"Studies from flies and insects have reported the existence of a special class of miRNA, called mirtrons that are produced from spliced-out introns in a DROSHA-independent manner. The spliced-out lariat is debranched and refolded into a stem-loop structure resembling the pre-miRNA, which can then be processed by DICER into mature ~21 nt species. The mirtrons have not been reported from plants. In this study, we present MirtronPred, a web based server to predict mirtrons from intronic sequences. We have used the server to predict 70 mirtrons in rice introns that were put through a stringent selection filter to shortlist 16 best sequences. The prediction accuracy was subsequently validated by northern analysis and RT-PCR of a predicted Os-mirtron-109. The target sequences for this mirtron were also found in the rice degradome database. The possible role of the mirtron in rice regulon is discussed. The MirtronPred web server is available at http://bioinfo.icgeb.res.in/mirtronPred.",2012-04-21 +22053078,Detecting genome-wide epistases based on the clustering of relatively frequent items.,"

Motivation

In genome-wide association studies (GWAS), up to millions of single nucleotide polymorphisms (SNPs) are genotyped for thousands of individuals. However, conventional single locus-based approaches are usually unable to detect gene-gene interactions underlying complex diseases. Due to the huge search space for complicated high order interactions, many existing multi-locus approaches are slow and may suffer from low detection power for GWAS.

Results

In this article, we develop a simple, fast and effective algorithm to detect genome-wide multi-locus epistatic interactions based on the clustering of relatively frequent items. Extensive experiments on simulated data show that our algorithm is fast and more powerful in general than some recently proposed methods. On a real genome-wide case-control dataset for age-related macular degeneration (AMD), the algorithm has identified genotype combinations that are significantly enriched in the cases.

Availability

http://www.cs.ucr.edu/~minzhux/EDCF.zip

Contact

minzhux@cs.ucr.edu; jingli@cwru.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-03 +22522686,Multilocus sequence typing scheme for the characterization of 936-like phages infecting Lactococcus lactis.,"Lactococcus lactis phage infections are costly for the dairy industry because they can slow down the fermentation process and adversely impact product safety and quality. Although many strategies have been developed to better control phage populations, new virulent phages continue to emerge. Thus, it is beneficial to develop an efficient method for the routine identification of new phages within a dairy plant to rapidly adapt antiphage tactics. Here, we present a multilocus sequence typing (MLST) scheme for the characterization of the 936-like phages, the most prevalent phage group infecting L. lactis strains worldwide. The proposed MLST system targets the internal portion of five highly conserved genomic sequences belonging to the packaging, morphogenesis, and lysis modules. Our MLST scheme was used to analyze 100 phages with different restriction fragment length polymorphism (RFLP) patterns isolated from 11 different countries between 1971 and 2010. PCR products were obtained for all the phages analyzed, and sequence analysis highlighted the high discriminatory power of the MLST system, detecting 93 different sequence types. A conserved locus within the lys gene (coding for endolysin) was the most discriminative, with 65 distinct alleles. The locus within the mcp gene (major capsid protein) was the most conserved (54 distinct alleles). Phylogenetic analyses of the concatenated sequences exhibited a strong concordance of the clusters with the phage host range, indicating the clonal evolution of these phages. A public database has been set up for the proposed MLST system, and it can be accessed at http://pubmlst.org/bacteriophages/.",2012-04-20 +22459672,"Meeting report: the Schizophrenia International Research Society (SIRS) South America Conference (August 5-7, 2011).","On August 5-7, 2011, São Paulo was home to the first regional meeting of the Schizophrenia International Research Society (SIRS). Over 400 people from many countries attended the activities and contributed with around 200 submissions for oral and poster presentations. This article summarizes the data presented during the meeting, with an emphasis on the plenary talks and sessions for short oral presentations. For information on the poster presentations, readers are referred to the special issue of Revista de Psiquiatria Clínica (Brazil) dedicated to the conference (available at: http://www.hcnet.usp.br/ipq/revista/vol38/s1/).",2012-03-27 +22672126,"Trends in midwife-attended births in the United States, 1989-2009.","

Introduction

Data on attendance at birth by midwives in the United States have been available on the national level since 1989. Rates of certified nurse-midwife (CNM)-attended births more than doubled between 1989 (3.3% of all births) and 2002 (7.7%) and have remained steady since. This article examines trends in midwife-attended births from 1989 to 2009.

Methods

The data in this report are based on records gathered as part of the US National Standard Certificate of Live Birth from a public use Web site, Vital Stats (http://www.cdc.gov/nchs/VitalStats.htm), that allows users to create and download specialized tables.

Results

Between 2007 and 2009, the proportion of all births attended by CNMs increased by 4% from 7.3% of all births to 7.6% and a total of 313,516. This represents a decline in total births attended by CNMs from 2008 but a higher proportion of all births because total US births dropped at a faster rate. The proportion of vaginal births attended by CNMs reached an all-time high of 11.4% in 2009. There were strong regional patterns to the distribution of CNM-attended births. Births attended by ""other midwives"" rose to 21,787 or 0.5% of all US births, and the total proportion of all births attended by midwives reached an all-time high of 8.1%. The race/ethnicity of mothers attended by CNMs has shifted over the years. In 1990, CNMs attended a disproportionately high number of births to non-white mothers, whereas in 2009, the profile of CNM births mirrors the national distribution in race/ethnicity.

Discussion

Midwife-attended births in the United States are increasing. The geographic patterns in the distribution of midwife-attended births warrant further study.",2012-06-04 +22847934,A MATLAB toolbox for structural kinetic modeling.,

Summary

Structural kinetic modeling (SKM) enables the analysis of dynamical properties of metabolic networks solely based on topological information and experimental data. Current SKM-based experiments are hampered by the time-intensive process of assigning model parameters and choosing appropriate sampling intervals for Monte-Carlo experiments. We introduce a toolbox for the automatic and efficient construction and evaluation of structural kinetic models (SK models). Quantitative and qualitative analyses of network stability properties are performed in an automated manner. We illustrate the model building and analysis process in detailed example scripts that provide toolbox implementations of previously published literature models.

Availability

The source code is freely available for download at http://bioinformatics.uni-potsdam.de/projects/skm.

Contact

girbig@mpimp-golm.mpg.de.,2012-07-30 +23650583,"GeneSetDB: A comprehensive meta-database, statistical and visualisation framework for gene set analysis.","Most ""omics"" experiments require comprehensive interpretation of the biological meaning of gene lists. To address this requirement, a number of gene set analysis (GSA) tools have been developed. Although the biological value of GSA is strictly limited by the breadth of the gene sets used, very few methods exist for simultaneously analysing multiple publically available gene set databases. Therefore, we constructed GeneSetDB (http://genesetdb.auckland.ac.nz/haeremai.html), a comprehensive meta-database, which integrates 26 public databases containing diverse biological information with a particular focus on human disease and pharmacology. GeneSetDB enables users to search for gene sets containing a gene identifier or keyword, generate their own gene sets, or statistically test for enrichment of an uploaded gene list across all gene sets, and visualise gene set enrichment and overlap using a clustered heat map.",2012-04-17 +22510480,Domain enhanced lookup time accelerated BLAST.,"

Background

BLAST is a commonly-used software package for comparing a query sequence to a database of known sequences; in this study, we focus on protein sequences. Position-specific-iterated BLAST (PSI-BLAST) iteratively searches a protein sequence database, using the matches in round i to construct a position-specific score matrix (PSSM) for searching the database in round i + 1. Biegert and Söding developed Context-sensitive BLAST (CS-BLAST), which combines information from searching the sequence database with information derived from a library of short protein profiles to achieve better homology detection than PSI-BLAST, which builds its PSSMs from scratch.

Results

We describe a new method, called domain enhanced lookup time accelerated BLAST (DELTA-BLAST), which searches a database of pre-constructed PSSMs before searching a protein-sequence database, to yield better homology detection. For its PSSMs, DELTA-BLAST employs a subset of NCBI's Conserved Domain Database (CDD). On a test set derived from ASTRAL, with one round of searching, DELTA-BLAST achieves a ROC5000 of 0.270 vs. 0.116 for CS-BLAST. The performance advantage diminishes in iterated searches, but DELTA-BLAST continues to achieve better ROC scores than CS-BLAST.

Conclusions

DELTA-BLAST is a useful program for the detection of remote protein homologs. It is available under the ""Protein BLAST"" link at http://blast.ncbi.nlm.nih.gov.",2012-04-17 +24088394,The shaping and functional consequences of the dosage effect landscape in multiple myeloma.,"

Background

Multiple myeloma (MM) is a malignant proliferation of plasma B cells. Based on recurrent aneuploidy such as copy number alterations (CNAs), myeloma is divided into two subtypes with different CNA patterns and patient survival outcomes. How aneuploidy events arise, and whether they contribute to cancer cell evolution are actively studied. The large amount of transcriptomic changes resultant of CNAs (dosage effect) pose big challenges for identifying functional consequences of CNAs in myeloma in terms of specific driver genes and pathways. In this study, we hypothesize that gene-wise dosage effect varies as a result from complex regulatory networks that translate the impact of CNAs to gene expression, and studying this variation can provide insights into functional effects of CNAs.

Results

We propose gene-wise dosage effect score and genome-wide karyotype plot as tools to measure and visualize concordant copy number and expression changes across cancer samples. We find that dosage effect in myeloma is widespread yet variable, and it is correlated with gene expression level and CNA frequencies in different chromosomes. Our analysis suggests that despite the enrichment of differentially expressed genes between hyperdiploid MM and non-hyperdiploid MM in the trisomy chromosomes, the chromosomal proportion of dosage sensitive genes is higher in the non-trisomy chromosomes. Dosage-sensitive genes are enriched by genes with protein translation and localization functions, and dosage resistant genes are enriched by apoptosis genes. These results point to future studies on differential dosage sensitivity and resistance of pro- and anti-proliferation pathways and their variation across patients as therapeutic targets and prognosis markers.

Conclusions

Our findings support the hypothesis that recurrent CNAs in myeloma are selected by their functional consequences. The novel dosage effect score defined in this work will facilitate integration of copy number and expression data for identifying driver genes in cancer genomics studies. The accompanying R code is available at http://www.canevolve.org/dosageEffect/.",2013-10-02 +22689776,Protein subcellular location pattern classification in cellular images using latent discriminative models.,"

Motivation

Knowledge of the subcellular location of a protein is crucial for understanding its functions. The subcellular pattern of a protein is typically represented as the set of cellular components in which it is located, and an important task is to determine this set from microscope images. In this article, we address this classification problem using confocal immunofluorescence images from the Human Protein Atlas (HPA) project. The HPA contains images of cells stained for many proteins; each is also stained for three reference components, but there are many other components that are invisible. Given one such cell, the task is to classify the pattern type of the stained protein. We first randomly select local image regions within the cells, and then extract various carefully designed features from these regions. This region-based approach enables us to explicitly study the relationship between proteins and different cell components, as well as the interactions between these components. To achieve these two goals, we propose two discriminative models that extend logistic regression with structured latent variables. The first model allows the same protein pattern class to be expressed differently according to the underlying components in different regions. The second model further captures the spatial dependencies between the components within the same cell so that we can better infer these components. To learn these models, we propose a fast approximate algorithm for inference, and then use gradient-based methods to maximize the data likelihood.

Results

In the experiments, we show that the proposed models help improve the classification accuracies on synthetic data and real cellular images. The best overall accuracy we report in this article for classifying 942 proteins into 13 classes of patterns is about 84.6%, which to our knowledge is the best so far. In addition, the dependencies learned are consistent with prior knowledge of cell organization.

Availability

http://murphylab.web.cmu.edu/software/.",2012-06-01 +23251644,Cutoff Finder: a comprehensive and straightforward Web application enabling rapid biomarker cutoff optimization.,"Gene or protein expression data are usually represented by metric or at least ordinal variables. In order to translate a continuous variable into a clinical decision, it is necessary to determine a cutoff point and to stratify patients into two groups each requiring a different kind of treatment. Currently, there is no standard method or standard software for biomarker cutoff determination. Therefore, we developed Cutoff Finder, a bundle of optimization and visualization methods for cutoff determination that is accessible online. While one of the methods for cutoff optimization is based solely on the distribution of the marker under investigation, other methods optimize the correlation of the dichotomization with respect to an outcome or survival variable. We illustrate the functionality of Cutoff Finder by the analysis of the gene expression of estrogen receptor (ER) and progesterone receptor (PgR) in breast cancer tissues. This distribution of these important markers is analyzed and correlated with immunohistologically determined ER status and distant metastasis free survival. Cutoff Finder is expected to fill a relevant gap in the available biometric software repertoire and will enable faster optimization of new diagnostic biomarkers. The tool can be accessed at http://molpath.charite.de/cutoff.",2012-12-14 +22672248,Methotrexate vs. fumaric acid esters in moderate-to-severe chronic plaque psoriasis: data registry report on the efficacy under daily life conditions.,"

Objective

To compare the clinical efficacy of methotrexate (MTX) vs. fumaric acid esters (FAE) in psoriasis treated under daily life conditions.

Methods

Data were extracted from a registry (http://www.psoriasisregistry.at) of 272 adult patients with moderate-to-severe chronic plaque psoriasis treated primarily with MTX (n = 72) or FAE (n = 200) between 2004 and 2011. Data from all patients, including those who did not complete at least 3 months of monotherapy, were included in an intention-to-treat (ITT) worst-case analysis.

Results

Thirty of 72 (41.7%) patients treated with MTX and 85 of 200 (42.5%) patients treated with FAE discontinued early, mainly due to side-effects or lack of response. Among patients who completed at least 3 months of treatment, the response to primary treatment with MTX vs. FAE did not differ significantly at any time point. In the ITT worst-case analysis at month 3, complete remission rate, PASI90, PASI75 and PASI50 rates were 6%, 7%, 24% and 39% in MTX-treated patients vs. 1%, 5%, 27% and 44% in FAE-treated patients. Overall mean PASI reduction score improved significantly in response to primary MTX and FAE treatment (by 10.6% and 12.6%, respectively) between 3 and 6 months (P = 0.0005; exact Wilcoxon test), but not between 6 and 12 months (P = 0.16). A subset of 32 patients who did not respond satisfactorily to primary treatment with FAE responded better to subsequent MTX therapy (P < 0.0001; paired Wilcoxon test).

Conclusions

As shown by retrospective analysis, the primary efficacy of FAE was similar to that of MTX under daily life conditions.",2012-06-02 +22523575,TumorHoPe: a database of tumor homing peptides.,"

Background

Cancer is responsible for millions of immature deaths every year and is an economical burden on developing countries. One of the major challenges in the present era is to design drugs that can specifically target tumor cells not normal cells. In this context, tumor homing peptides have drawn much attention. These peptides are playing a vital role in delivering drugs in tumor tissues with high specificity. In order to provide service to scientific community, we have developed a database of tumor homing peptides called TumorHoPe.

Description

TumorHoPe is a manually curated database of experimentally validated tumor homing peptides that specifically recognize tumor cells and tumor associated microenvironment, i.e., angiogenesis. These peptides were collected and compiled from published papers, patents and databases. Current release of TumorHoPe contains 744 peptides. Each entry provides comprehensive information of a peptide that includes its sequence, target tumor, target cell, techniques of identification, peptide receptor, etc. In addition, we have derived various types of information from these peptide sequences that include secondary/tertiary structure, amino acid composition, and physicochemical properties of peptides. Peptides in this database have been found to target different types of tumors that include breast, lung, prostate, melanoma, colon, etc. These peptides have some common motifs including RGD (Arg-Gly-Asp) and NGR (Asn-Gly-Arg) motifs, which specifically recognize tumor angiogenic markers. TumorHoPe has been integrated with many web-based tools like simple/complex search, database browsing and peptide mapping. These tools allow a user to search tumor homing peptides based on their amino acid composition, charge, polarity, hydrophobicity, etc.

Conclusion

TumorHoPe is a unique database of its kind, which provides comprehensive information about experimentally validated tumor homing peptides and their target cells. This database will be very useful in designing peptide-based drugs and drug-delivery system. It is freely available at http://crdd.osdd.net/raghava/tumorhope/.",2012-04-16 +24085567,Functional module identification in protein interaction networks by interaction patterns.,"

Motivation

Identifying functional modules in protein-protein interaction (PPI) networks may shed light on cellular functional organization and thereafter underlying cellular mechanisms. Many existing module identification algorithms aim to detect densely connected groups of proteins as potential modules. However, based on this simple topological criterion of 'higher than expected connectivity', those algorithms may miss biologically meaningful modules of functional significance, in which proteins have similar interaction patterns to other proteins in networks but may not be densely connected to each other. A few blockmodel module identification algorithms have been proposed to address the problem but the lack of global optimum guarantee and the prohibitive computational complexity have been the bottleneck of their applications in real-world large-scale PPI networks.

Results

In this article, we propose a novel optimization formulation LCP(2) (low two-hop conductance sets) using the concept of Markov random walk on graphs, which enables simultaneous identification of both dense and sparse modules based on protein interaction patterns in given networks through searching for LCP(2) by random walk. A spectral approximate algorithm SLCP(2) is derived to identify non-overlapping functional modules. Based on a bottom-up greedy strategy, we further extend LCP(2) to a new algorithm (greedy algorithm for LCP(2)) GLCP(2) to identify overlapping functional modules. We compare SLCP(2) and GLCP(2) with a range of state-of-the-art algorithms on synthetic networks and real-world PPI networks. The performance evaluation based on several criteria with respect to protein complex prediction, high level Gene Ontology term prediction and especially sparse module detection, has demonstrated that our algorithms based on searching for LCP(2) outperform all other compared algorithms.

Availability and implementation

All data and code are available at http://www.cse.usf.edu/~xqian/fmi/slcp2hop/.",2013-10-01 +23493256,Quartet-net: a quartet-based method to reconstruct phylogenetic networks.,"Phylogenetic networks can model reticulate evolutionary events such as hybridization, recombination, and horizontal gene transfer. However, reconstructing such networks is not trivial. Popular character-based methods are computationally inefficient, whereas distance-based methods cannot guarantee reconstruction accuracy because pairwise genetic distances only reflect partial information about a reticulate phylogeny. To balance accuracy and computational efficiency, here we introduce a quartet-based method to construct a phylogenetic network from a multiple sequence alignment. Unlike distances that only reflect the relationship between a pair of taxa, quartets contain information on the relationships among four taxa; these quartets provide adequate capacity to infer a more accurate phylogenetic network. In applications to simulated and biological data sets, we demonstrate that this novel method is robust and effective in reconstructing reticulate evolutionary events and it has the potential to infer more accurate phylogenetic distances than other conventional phylogenetic network construction methods such as Neighbor-Joining, Neighbor-Net, and Split Decomposition. This method can be used in constructing phylogenetic networks from simple evolutionary events involving a few reticulate events to complex evolutionary histories involving a large number of reticulate events. A software called ""Quartet-Net"" is implemented and available at http://sysbio.cvm.msstate.edu/QuartetNet/.",2013-03-14 +23047561,Predicting protein residue-residue contacts using deep networks and boosting.,"

Motivation

Protein residue-residue contacts continue to play a larger and larger role in protein tertiary structure modeling and evaluation. Yet, while the importance of contact information increases, the performance of sequence-based contact predictors has improved slowly. New approaches and methods are needed to spur further development and progress in the field.

Results

Here we present DNCON, a new sequence-based residue-residue contact predictor using deep networks and boosting techniques. Making use of graphical processing units and CUDA parallel computing technology, we are able to train large boosted ensembles of residue-residue contact predictors achieving state-of-the-art performance.

Availability

The web server of the prediction method (DNCON) is available at http://iris.rnet.missouri.edu/dncon/.

Contact

chengji@missouri.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-09 +24506079,Randomized placebo-controlled D-cycloserine with cognitive behavior therapy for pediatric posttraumatic stress.,"

Unlabelled

Abstract Objective: Research on D-cycloserine (DCS), a partial N-methyl-d-aspartic acid (NMDA) agonist, has suggested that it may enhance exposure-based therapies for anxiety disorders. RESULTS with DCS in adult posttraumatic stress disorder (PTSD) have been conflicting; however, no data have been reported on children with PTSD. Although many individuals with PTSD respond to exposure-based cognitive behavioral therapy (CBT), there are subgroups of individuals who are nonresponders, and many responders still have substantial residual symptoms. This randomized, triple-blind, placebo-controlled study tested DCS as an adjunct to CBT to improve and speed treatment response for PTSD in youth.

Methods

Seven to 18 year-old youth with exposure to trauma and PTSD were offered a 12 session, manualized CBT treatment. Those who remained in treatment at the fifth session were randomly allocated (n=57) to either CBT and DCS or CBT and placebo.

Results

Youth in the CBT and DCS group had significant reductions in symptoms, but these reductions were not greater than those in the CBT and placebo group. There was a trend toward DCS speeding PTSD symptom recovery during the exposure-based sessions, and evidence that the CBT and DCS group better maintained stability of gains on inattention ratings from posttreatment to the 3 month follow-up.

Conclusions

This initial study of CBT and DCS to treat pediatric PTSD provided suggestive and preliminary evidence for more rapid symptom recovery and beneficial effects on attention, but did not show an overall greater effect for reducing PTSD symptoms. It appears that augmentation with DCS represents unique challenges in PTSD. Because PTSD involves complex, life-threatening trauma memories, as opposed to the imagined dreadful outcomes of other anxiety disorders, the use of DCS may require greater attention to how its use is coupled with exposure-based techniques. DCS may have inadvertently enhanced reconsolidation of trauma memories rather than more positive and adaptive memories. In addition, the results suggest that future research could focus on the longer-term benefits of DCS on attention and ways to capitalize on attention-enhancing therapies. ClinicalTrials.gov registry: Effect of D-cycloserine on Treatment of Posttraumatic Stress Disorder (PTSD) in Youth, #NCT01157416, http://clinicaltrials.gov/ct2/results?term=NCT01157416&Search=Search , and D-cycloserine Adjunctive Treatment for Posttraumatic Stress Disorder (PTSD) in Adolescents, #NCT01157429, http://clinicaltrials.gov/ct2/results?term=NCT01157429&Search=Search .",2014-02-07 +21569557,A hidden two-locus disease association pattern in genome-wide association studies.,"

Background

Recent association analyses in genome-wide association studies (GWAS) mainly focus on single-locus association tests (marginal tests) and two-locus interaction detections. These analysis methods have provided strong evidence of associations between genetics variances and complex diseases. However, there exists a type of association pattern, which often occurs within local regions in the genome and is unlikely to be detected by either marginal tests or interaction tests. This association pattern involves a group of correlated single-nucleotide polymorphisms (SNPs). The correlation among SNPs can lead to weak marginal effects and the interaction does not play a role in this association pattern. This phenomenon is due to the existence of unfaithfulness: the marginal effects of correlated SNPs do not express their significant joint effects faithfully due to the correlation cancelation.

Results

In this paper, we develop a computational method to detect this association pattern masked by unfaithfulness. We have applied our method to analyze seven data sets from the Wellcome Trust Case Control Consortium (WTCCC). The analysis for each data set takes about one week to finish the examination of all pairs of SNPs. Based on the empirical result of these real data, we show that this type of association masked by unfaithfulness widely exists in GWAS.

Conclusions

These newly identified associations enrich the discoveries of GWAS, which may provide new insights both in the analysis of tagSNPs and in the experiment design of GWAS. Since these associations may be easily missed by existing analysis tools, we can only connect some of them to publicly available findings from other association studies. As independent data set is limited at this moment, we also have difficulties to replicate these findings. More biological implications need further investigation.

Availability

The software is freely available at http://bioinformatics.ust.hk/hidden_pattern_finder.zip.",2011-05-14 +22553393,VMD DisRg: New User-Friendly Implement for calculation distance and radius of gyration in VMD program.,"

Unlabelled

Molecular dynamic simulation is a practical and powerful technique for analysis of protein structure. Several programs have been developed to facilitate the mentioned investigation, under them the visual molecular dynamic or VMD is the most frequently used programs. One of the beneficial properties of the VMD is its ability to be extendable by designing new plug-in. We introduce here a new facility of the VMD for distance analysis and radius of gyration of biopolymers such as protein and DNA.

Availability

The database is available for free at http://trc.ajums.ac.ir/HomePage.aspx/?TabID/=12618/&Site/=trc.ajums.ac/&Lang/=fa-IR.",2012-04-13 +23006766,The LO-BaFL method and ALS microarray expression analysis.,"

Background

Sporadic Amyotrophic Lateral Sclerosis (sALS) is a devastating, complex disease of unknown etiology. We studied this disease with microarray technology to capture as much biological complexity as possible. The Affymetrix-focused BaFL pipeline takes into account problems with probes that arise from physical and biological properties, so we adapted it to handle the long-oligonucleotide probes on our arrays (hence LO-BaFL). The revised method was tested against a validated array experiment and then used in a meta-analysis of peripheral white blood cells from healthy control samples in two experiments. We predicted differentially expressed (DE) genes in our sALS data, combining the results obtained using the TM4 suite of tools with those from the LO-BaFL method. Those predictions were tested using qRT-PCR assays.

Results

LO-BaFL filtering and DE testing accurately predicted previously validated DE genes in a published experiment on coronary artery disease (CAD). Filtering healthy control data from the sALS and CAD studies with LO-BaFL resulted in highly correlated expression levels across many genes. After bioinformatics analysis, twelve genes from the sALS DE gene list were selected for independent testing using qRT-PCR assays. High-quality RNA from six healthy Control and six sALS samples yielded the predicted differential expression for 7 genes: TARDBP, SKIV2L2, C12orf35, DYNLT1, ACTG1, B2M, and ILKAP. Four of the seven have been previously described in sALS studies, while ACTG1, B2M and ILKAP appear in the context of this disease for the first time. Supplementary material can be accessed at: http://webpages.uncc.edu/~cbaciu/LO-BaFL/supplementary_data.html.

Conclusion

LO-BaFL predicts DE results that are broadly similar to those of other methods. The small healthy control cohort in the sALS study is a reasonable foundation for predicting DE genes. Modifying the BaFL pipeline allowed us to remove noise and systematic errors, improving the power of this study, which had a small sample size. Each bioinformatics approach revealed DE genes not predicted by the other; subsequent PCR assays confirmed seven of twelve candidates, a relatively high success rate.",2012-09-24 +22113082,GenomicTools: a computational platform for developing high-throughput analytics in genomics.,"

Motivation

Recent advances in sequencing technology have resulted in the dramatic increase of sequencing data, which, in turn, requires efficient management of computational resources, such as computing time, memory requirements as well as prototyping of computational pipelines.

Results

We present GenomicTools, a flexible computational platform, comprising both a command-line set of tools and a C++ API, for the analysis and manipulation of high-throughput sequencing data such as DNA-seq, RNA-seq, ChIP-seq and MethylC-seq. GenomicTools implements a variety of mathematical operations between sets of genomic regions thereby enabling the prototyping of computational pipelines that can address a wide spectrum of tasks ranging from pre-processing and quality control to meta-analyses. Additionally, the GenomicTools platform is designed to analyze large datasets of any size by minimizing memory requirements. In practical applications, where comparable, GenomicTools outperforms existing tools in terms of both time and memory usage.

Availability

The GenomicTools platform (version 2.0.0) was implemented in C++. The source code, documentation, user manual, example datasets and scripts are available online at http://code.google.com/p/ibm-cbc-genomic-tools.",2011-11-22 +23571819,Administration of Bifidobacterium animalis subsp. lactis BB-12 in early childhood: a post-trial effect on caries occurrence at four years of age.,"Probiotic bifidobacteria are widely used in the prevention of childhood diseases. These bacteria are also associated with caries occurrence. The present secondary analysis in a low-caries population evaluated the effect of early administration of Bifidobacterium animalis subsp. lactis BB-12 (BB-12) on caries occurrence and identified markers of dental decay in early childhood. In the original randomized, double-blind, placebo-controlled study (NCT00638677, http://www.clinicaltrials.gov), infants (n = 106) received BB-12, xylitol or sorbitol tablets from the age of 1-2 months to 2 years with a slow-release pacifier or a spoon (daily dose of BB-12 10(10) colony-forming units, polyol 200-600 mg). The present data were collected using clinical examinations and questionnaires at the age of 4 years. The occurrence of dental caries was assessed using the International Caries Detection and Assessment System. Oral hygiene status and mutans streptococci (MS) levels were also determined. No differences were detected between the study groups in the occurrence of enamel caries (p = 0.268) or obvious dentinal caries (p = 0.201). The occurrence of caries was associated with daily consumption of sweet drinks (p = 0.028), visible plaque observed (p = 0.002) and MS detected in the dental plaque (p = 0.002). Administration of BB-12 in infancy does not seem to increase or decrease the occurrence of caries by 4 years of age in a low-caries population.",2013-04-05 +23563150,Characterization of genome-reduced fission yeast strains.,"The Schizosaccharomyces pombe genome is one of the smallest among the free-living eukaryotes. We further reduced the S. pombe gene number by large-scale gene deletion to identify a minimal gene set required for growth under laboratory conditions. The genome-reduced strain has four deletion regions: 168.4 kb in the left arm of chromosome I, 155.4 kb in the right arm of chromosome I, 211.7 kb in the left arm of chromosome II and 121.6 kb in the right arm of chromosome II. The deletions corresponded to a loss of 223 genes of the original ~5100. The quadruple-deletion strain, with a total deletion size of 657.3 kb, showed a decreased ability to uptake glucose and some amino acids in comparison with the parental strain. The strain also showed increased gene expression of the mating pheromone M-factor precursor and the nicotinamide adenine dinucleotide phosphate -specific glutamate dehydrogenase. There was also a 2.7-fold increase in the concentration of cellular adenosine triphosphate, and levels of the heterologous proteins, enhanced green fluorescent protein and secreted human growth hormone were increased by 1.7- and 1.8-fold, respectively. The transcriptome data from this study have been submitted to the Gene Expression Omnibus (GEO: http://www.ncbi.nlm.nih.gov/geo/) under the accession number GSE38620 (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?token=vjkxjewuywgcovc&acc=GSE38620).",2013-04-05 +21622656,CoMet--a web server for comparative functional profiling of metagenomes.,"Analyzing the functional potential of newly sequenced genomes and metagenomes has become a common task in biomedical and biological research. With the advent of high-throughput sequencing technologies comparative metagenomics opens the way to elucidate the genetically determined similarities and differences of complex microbial communities. We developed the web server 'CoMet' (http://comet.gobics.de), which provides an easy-to-use comparative metagenomics platform that is well-suitable for the analysis of large collections of metagenomic short read data. CoMet combines the ORF finding and subsequent assignment of protein sequences to Pfam domain families with a comparative statistical analysis. Besides comprehensive tabular data files, the CoMet server also provides visually interpretable output in terms of hierarchical clustering and multi-dimensional scaling plots and thus allows a quick overview of a given set of metagenomic samples.",2011-05-26 +24034795,[New anti-angiogenic strategies in the management of kidney cancer].,"

Introduction

The aim of this study was to clarify the current role of adjuvant and neo-adjuvant in the treatment of kidney cancer.

Materials and methods

The data were explored in Medline (http://www.ncbi.nlm.nih.gov) using the following MeSH terms or combinations of these keywords: ""cancer"", ""rein"", ""kidney"", ""adjuvant"", ""neoadjuvant"", ""antiangiogenique"", ""antiangiogenic"" and selecting the items produced in their methodology, their relevance to the theme explored and their date of publication.

Results

Thirty-two English and French items published between 2001 and 2011 were selected: five studies of evidence level 1, nine level 2 studies, nine level 4 studies, five studies at level 5 and four literature reviews. The cytoreductive nephrectomy as first-line treatment of locally advanced or metastatic kidney cancer is now controversial with the advent of new targeted anti-angiogenic therapies. In neoadjuvant setting, these treatments showed a moderate decrease in tumor volume and rarely improved resectability. In adjuvant setting, their place has yet to be specified and several trials are currently underway.

Conclusion

Recent years have seen the anti-angiogenic therapeutic strategies upset in locally advanced and metastatic renal cancer. The development of clinical trials and research protocols will allow us to determine in the near future the optimal therapeutic sequences.",2013-03-07 +22495752,"GeneclusterViz: a tool for conserved gene cluster visualization, exploration and analysis.","

Motivation

Gene clusters are arrangements of functionally related genes on a chromosome. In bacteria, it is expected that evolutionary pressures would conserve these arrangements due to the functional advantages they provide. Visualization of conserved gene clusters across multiple genomes provides key insights into their evolutionary histories. Therefore, a software tool that enables visualization and functional analyses of gene clusters would be a great asset to the biological research community.

Results

We have developed GeneclusterViz, a Java-based tool that allows for the visualization, exploration and downstream analyses of conserved gene clusters across multiple genomes. GeneclusterViz combines an easy-to-use exploration interface for gene clusters with a host of other analysis features such as multiple sequence alignments, phylogenetic analyses and integration with the KEGG pathway database.

Availability

http://biohealth.snu.ac.kr/GeneclusterViz/; http://microbial.informatics.indiana.edu/GeneclusterViz/",2012-04-11 +22238260,"PHACTS, a computational approach to classifying the lifestyle of phages.","

Motivation

Bacteriophages have two distinct lifestyles: virulent and temperate. The virulent lifestyle has many implications for phage therapy, genomics and microbiology. Determining which lifestyle a newly sequenced phage falls into is currently determined using standard culturing techniques. Such laboratory work is not only costly and time consuming, but also cannot be used on phage genomes constructed from environmental sequencing. Therefore, a computational method that utilizes the sequence data of phage genomes is needed.

Results

Phage Classification Tool Set (PHACTS) utilizes a novel similarity algorithm and a supervised Random Forest classifier to make a prediction whether the lifestyle of a phage, described by its proteome, is virulent or temperate. The similarity algorithm creates a training set from phages with known lifestyles and along with the lifestyle annotation, trains a Random Forest to classify the lifestyle of a phage. PHACTS predictions are shown to have a 99% precision rate.

Availability and implementation

PHACTS was implemented in the PERL programming language and utilizes the FASTA program (Pearson and Lipman, 1988) and the R programming language library 'Random Forest' (Liaw and Weiner, 2010). The PHACTS software is open source and is available as downloadable stand-alone version or can be accessed online as a user-friendly web interface. The source code, help files and online version are available at http://www.phantome.org/PHACTS/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-11 +23577055,Genome-scale screening of drug-target associations relevant to Ki using a chemogenomics approach.,"The identification of interactions between drugs and target proteins plays a key role in genomic drug discovery. In the present study, the quantitative binding affinities of drug-target pairs are differentiated as a measurement to define whether a drug interacts with a protein or not, and then a chemogenomics framework using an unbiased set of general integrated features and random forest (RF) is employed to construct a predictive model which can accurately classify drug-target pairs. The predictability of the model is further investigated and validated by several independent validation sets. The built model is used to predict drug-target associations, some of which were confirmed by comparing experimental data from public biological resources. A drug-target interaction network with high confidence drug-target pairs was also reconstructed. This network provides further insight for the action of drugs and targets. Finally, a web-based server called PreDPI-Ki was developed to predict drug-target interactions for drug discovery. In addition to providing a high-confidence list of drug-target associations for subsequent experimental investigation guidance, these results also contribute to the understanding of drug-target interactions. We can also see that quantitative information of drug-target associations could greatly promote the development of more accurate models. The PreDPI-Ki server is freely available via: http://sdd.whu.edu.cn/dpiki.",2013-04-05 +23696650,NETAL: a new graph-based method for global alignment of protein-protein interaction networks.,"

Motivation

The interactions among proteins and the resulting networks of such interactions have a central role in cell biology. Aligning these networks gives us important information, such as conserved complexes and evolutionary relationships. Although there have been several publications on the global alignment of protein networks; however, none of proposed methods are able to produce a highly conserved and meaningful alignment. Moreover, time complexity of current algorithms makes them impossible to use for multiple alignment of several large networks together.

Results

We present a novel algorithm for the global alignment of protein-protein interaction networks. It uses a greedy method, based on the alignment scoring matrix, which is derived from both biological and topological information of input networks to find the best global network alignment. NETAL outperforms other global alignment methods in terms of several measurements, such as Edge Correctness, Largest Common Connected Subgraphs and the number of common Gene Ontology terms between aligned proteins. As the running time of NETAL is much less than other available methods, NETAL can be easily expanded to multiple alignment algorithm. Furthermore, NETAL overpowers all other existing algorithms in term of performance so that the short running time of NETAL allowed us to implement it as the first server for global alignment of protein-protein interaction networks.

Availability

Binaries supported on linux are freely available for download at http://www.bioinf.cs.ipm.ir/software/netal.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-05-21 +21478196,Performance assessment of copy number microarray platforms using a spike-in experiment.,"

Motivation

Changes in the copy number of chromosomal DNA segments [copy number variants (CNVs)] have been implicated in human variation, heritable diseases and cancers. Microarray-based platforms are the current established technology of choice for studies reporting these discoveries and constitute the benchmark against which emergent sequence-based approaches will be evaluated. Research that depends on CNV analysis is rapidly increasing, and systematic platform assessments that distinguish strengths and weaknesses are needed to guide informed choice.

Results

We evaluated the sensitivity and specificity of six platforms, provided by four leading vendors, using a spike-in experiment. NimbleGen and Agilent platforms outperformed Illumina and Affymetrix in accuracy and precision of copy number dosage estimates. However, Illumina and Affymetrix algorithms that leverage single nucleotide polymorphism (SNP) information make up for this disadvantage and perform well at variant detection. Overall, the NimbleGen 2.1M platform outperformed others, but only with the use of an alternative data analysis pipeline to the one offered by the manufacturer.

Availability

The data is available from http://rafalab.jhsph.edu/cnvcomp/.

Contact

pevsner@jhmi.edu; fspencer@jhmi.edu; rafa@jhu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-04-01 +22695796,"EvolView, an online tool for visualizing, annotating and managing phylogenetic trees.","EvolView is a web application for visualizing, annotating and managing phylogenetic trees. First, EvolView is a phylogenetic tree viewer and customization tool; it visualizes trees in various formats, customizes them through built-in functions that can link information from external datasets, and exports the customized results to publication-ready figures. Second, EvolView is a tree and dataset management tool: users can easily organize related trees into distinct projects, add new datasets to trees and edit and manage existing trees and datasets. To make EvolView easy to use, it is equipped with an intuitive user interface. With a free account, users can save data and manipulations on the EvolView server. EvolView is freely available at: http://www.evolgenius.info/evolview.html.",2012-06-13 +22496884,Computational comparative study of tuberculosis proteomes using a model learned from signal peptide structures.,"Secretome analysis is important in pathogen studies. A fundamental and convenient way to identify secreted proteins is to first predict signal peptides, which are essential for protein secretion. However, signal peptides are highly complex functional sequences that are easily confused with transmembrane domains. Such confusion would obviously affect the discovery of secreted proteins. Transmembrane proteins are important drug targets, but very few transmembrane protein structures have been determined experimentally; hence, prediction of the structures is essential. In the field of structure prediction, researchers do not make assumptions about organisms, so there is a need for a general signal peptide predictor.To improve signal peptide prediction without prior knowledge of the associated organisms, we present a machine-learning method, called SVMSignal, which uses biochemical properties as features, as well as features acquired from a novel encoding, to capture biochemical profile patterns for learning the structures of signal peptides directly.We tested SVMSignal and five popular methods on two benchmark datasets from the SPdb and UniProt/Swiss-Prot databases, respectively. Although SVMSignal was trained on an old dataset, it performed well, and the results demonstrate that learning the structures of signal peptides directly is a promising approach. We also utilized SVMSignal to analyze proteomes in the entire HAMAP microbial database. Finally, we conducted a comparative study of secretome analysis on seven tuberculosis-related strains selected from the HAMAP database. We identified ten potential secreted proteins, two of which are drug resistant and four are potential transmembrane proteins.SVMSignal is publicly available at http://bio-cluster.iis.sinica.edu.tw/SVMSignal. It provides user-friendly interfaces and visualizations, and the prediction results are available for download.",2012-04-09 +21208983,Wave-spec: a preprocessing package for mass spectrometry data.,"

Unlabelled

Wave-spec is a pre-processing package for mass spectrometry (MS) data. The package includes several novel algorithms that overcome conventional difficulties with the pre-processing of such data. In this application note, we demonstrate step-by-step use of this package on a real-world MALDI dataset.

Availability

The package can be downloaded at http://www.vicc.org/biostatistics/supp.php. A shared mailbox (wave-spec@vanderbilt.edu) also is available for questions regarding application of the package.",2011-01-05 +25210533,Current perspectives in transfusion-transmitted infectious diseases: emerging and re-emerging infections.,"

Background

In August 2009, a group from the AABB (Stramer et al., Transfusion 2009;99:1S-29S, Emerging Infectious Disease Agents and their Potential Threat to Transfusion Safety; http://www.aabb.org/resources/bct/eid/Pages/default.aspx) published a Supplement to Transfusion that reviewed emerging infectious disease (EID) agents that pose a real or theoretical threat to transfusion safety, but for which an existing effective intervention is lacking. The necessary attributes for transfusion transmission were outlined including: presence of the agent in blood during the donor's asymptomatic phase, the agent's survival/persistence in blood during processing/storage, and lastly that the agent must be recognized as responsible for a clinically apparent outcome in at least a proportion of recipients who become infected. Without these attributes, agents are not considered as a transfusion-transmission threat and were excluded. Sixty-eight such agents were identified with enough evidence/likelihood of transfusion transmission (e.g., blood phase) and potential for clinical disease to warrant further consideration. In the Supplement, Fact Sheets (FS) were published providing information on: agent classification; disease agent's importance; clinical syndromes/diseases caused; transmission modes (including vectors/reservoirs); likelihood of transfusion transmission, and if proven to be transfusion-transmitted, information on known cases; the feasibility/predicted success of interventions for donor screening (questioning) and tests available for diagnostics/ adapted for donor screening; and finally, the efficacy, if known, of inactivation methods for plasma-derived products. The Supplement included a separate section on pathogen reduction using published data. Agents were prioritized relative to their scientific/epidemiologic threat and their perceived threat to the community including concerns expressed by the regulators of blood. Agents given the highest priority due to a known transfusion-transmission threat and severe/fatal disease in recipients were the vCJD prion, dengue viruses and the obligate red-cell parasite that causes babesiosis (B. microti and related Babesia). Although the focus of the Supplement was towards the United States and Canada, many of the agents (and the process) are applicable worldwide.

Next steps

Since the publication of the Supplement, six new FSs (yellow fever viruses-including vaccine breakthrough infections, miscellaneous arboviruses, XMRV, human parvoviruses/bocaviruses other than B19, and most recently the Middle East respiratory syndrome coronavirus, MERS-CoV) were added and 14 existing FSs updated (Anaplasma, Babesia, Bartonella, Erhlichia, chronic wasting disease-CWD, human prions other than vCJD, vCJD, Coxiella burnetii-the agent of Q fever, dengue viruses, HAV, HEV, Japanese encephalitis-JE complex, tick-borne encephalitis viruses-TBEV, and human parvovirus B19). Also, tables were released outlining pathogen reduction clinical trials/results (published) and availability/commercial routine use of such technologies by country. Of necessity, the list of EID agents is not, and can never be, complete due to the nature of emergence. We recognized that a system of assessing the risk/threat of EIDs for their potential impact on blood safety and availability must include processes for monitoring, identifying, evaluating, estimating severity, assessing risk and developing interventions. Thus, a 'toolkit' containing the necessary 'tools' from EID monitoring (horizon scanning) to validation/effectiveness evaluations of interventions is being developed. The goal is, to develop a systematic approach to risk assessment and intervention development for the impact of emerging infectious upon blood safety intended to educate and provide advise about risks/interventions in a timely/accurate fashion.

Conclusions

The process and final product (toolkit) including methods to monitor EID agent emergence, identification/recognition of a transfusion-transmission threat, methods for quantitative risk assessments, and the appropriate management of such threats should be considered for implementation by all blood systems.",2014-07-28 +21576238,ResponseNet: revealing signaling and regulatory networks linking genetic and transcriptomic screening data.,"Cellular response to stimuli is typically complex and involves both regulatory and metabolic processes. Large-scale experimental efforts to identify components of these processes often comprise of genetic screening and transcriptomic profiling assays. We previously established that in yeast genetic screens tend to identify response regulators, while transcriptomic profiling assays tend to identify components of metabolic processes. ResponseNet is a network-optimization approach that integrates the results from these assays with data of known molecular interactions. Specifically, ResponseNet identifies a high-probability sub-network, composed of signaling and regulatory molecular interaction paths, through which putative response regulators may lead to the measured transcriptomic changes. Computationally, this is achieved by formulating a minimum-cost flow optimization problem and solving it efficiently using linear programming tools. The ResponseNet web server offers a simple interface for applying ResponseNet. Users can upload weighted lists of proteins and genes and obtain a sparse, weighted, molecular interaction sub-network connecting their data. The predicted sub-network and its gene ontology enrichment analysis are presented graphically or as text. Consequently, the ResponseNet web server enables researchers that were previously limited to separate analysis of their distinct, large-scale experiments, to meaningfully integrate their data and substantially expand their understanding of the underlying cellular response. ResponseNet is available at http://bioinfo.bgu.ac.il/respnet.",2011-05-16 +22689266,The Ornstein-Uhlenbeck third-order Gaussian process (OUGP) applied directly to the un-resampled heart rate variability (HRV) tachogram for detrending and low-pass filtering.,"The heart rate variability signal derived from the ECG is a beat-to-beat record of RR-intervals and is, as a time series, irregularly sampled. It is common engineering practice to resample this record, typically at 4 Hz, onto a regular time axis for conventional analysis using IIR and FIR filters, and power spectral estimators, in the time and frequency domain, respectively. However, such interpolative resampling introduces noise into the signal and the information quality is compromised. Here, the Ornstein-Uhlenbeck third-order band-pass filter is presented which operates on data sampled at arbitrary time and preserves fidelity. The algorithm is available as open source code for MATLAB(®) (MathWorks™ Inc.) and supported by an interactive website at http://clinengnhs.liv.ac.uk/OUGP.htm.",2012-06-12 +23967014,MATCHCLIP: locate precise breakpoints for copy number variation using CIGAR string by matching soft clipped reads.,"Copy number variations (CNVs) are associated with many complex diseases. Next generation sequencing data enable one to identify precise CNV breakpoints to better under the underlying molecular mechanisms and to design more efficient assays. Using the CIGAR strings of the reads, we develop a method that can identify the exact CNV breakpoints, and in cases when the breakpoints are in a repeated region, the method reports a range where the breakpoints can slide. Our method identifies the breakpoints of a CNV using both the positions and CIGAR strings of the reads that cover breakpoints of a CNV. A read with a long soft clipped part (denoted as S in CIGAR) at its 3'(right) end can be used to identify the 5'(left)-side of the breakpoints, and a read with a long S part at the 5' end can be used to identify the breakpoint at the 3'-side. To ensure both types of reads cover the same CNV, we require the overlapped common string to include both of the soft clipped parts. When a CNV starts and ends in the same repeated regions, its breakpoints are not unique, in which case our method reports the left most positions for the breakpoints and a range within which the breakpoints can be incremented without changing the variant sequence. We have implemented the methods in a C++ package intended for the current Illumina Miseq and Hiseq platforms for both whole genome and exon-sequencing. Our simulation studies have shown that our method compares favorably with other similar methods in terms of true discovery rate, false positive rate and breakpoint accuracy. Our results from a real application have shown that the detected CNVs are consistent with zygosity and read depth information. The software package is available at http://statgene.med.upenn.edu/softprog.html.",2013-08-16 +22693215,3DTF: a web server for predicting transcription factor PWMs using 3D structure-based energy calculations.,"We present the webserver 3D transcription factor (3DTF) to compute position-specific weight matrices (PWMs) of transcription factors using a knowledge-based statistical potential derived from crystallographic data on protein-DNA complexes. Analysis of available structures that can be used to construct PWMs shows that there are hundreds of 3D structures from which PWMs could be derived, as well as thousands of proteins homologous to these. Therefore, we created 3DTF, which delivers binding matrices given the experimental or modeled protein-DNA complex. The webserver can be used by biologists to derive novel PWMs for transcription factors lacking known binding sites and is freely accessible at http://www.gene-regulation.com/pub/programs/3dtf/.",2012-06-11 +22808931,Small rural maternity units without caesarean delivery capabilities: is it safe and sustainable in the eyes of health professionals in Tasmania?,"

Introduction

In Australia, over 50% of small rural maternity units have been closed in the past two decades. Workforce shortages, safety and quality concerns and cost considerations are the three interrelated reasons that have led to these closures. Women and families face many challenges when these critical services are absent from their local communities. In an effort to continue to provide maternity services in rural areas, small maternity units without caesarean delivery capabilities have been established in a few rural communities in Tasmania. However, they have divided the opinions of Tasmanian health professionals. This article is part of a larger study which focused on maternity services for rural women and reports the views of the health professionals on this model of care.

Methods

A qualitative study using semi-structured interviews was conducted with 20 maternity health providers across Tasmania to explore their experiences and views on the model of offering small rural maternity units without obstetric services. The data were analysed in NVivo v9 (www.qsrinternational.com) using grounded theory.

Results

Three main themes are grounded from interview data: (1) women's difficulties in rural areas; (2) women's expectations; and (3) maternity units without caesarean delivery capabilities. The results reveal that low-intervention style birthing services in rural areas could reduce women's difficulties that include access issues, disruption, anxiety and travel related issues, and address women's expectations in term of access to local services. However, this model is less likely to meet women's safety expectations, especially in emergency situations.

Conclusion

The findings of this study offer insights for policy-makers and state government with regard to the future planning of this model of care. It is recommended that safety and sustainability issues should be considered when this model of care is to be implemented in other rural communities.",2012-07-03 +21795323,RseqFlow: workflows for RNA-Seq data analysis.,"

Summary

We have developed an RNA-Seq analysis workflow for single-ended Illumina reads, termed RseqFlow. This workflow includes a set of analytic functions, such as quality control for sequencing data, signal tracks of mapped reads, calculation of expression levels, identification of differentially expressed genes and coding SNPs calling. This workflow is formalized and managed by the Pegasus Workflow Management System, which maps the analysis modules onto available computational resources, automatically executes the steps in the appropriate order and supervises the whole running process. RseqFlow is available as a Virtual Machine with all the necessary software, which eliminates any complex configuration and installation steps.

Availability and implementation

http://genomics.isi.edu/rnaseq

Contact

wangying@xmu.edu.cn; knowles@med.usc.edu; deelman@isi.edu; tingchen@usc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-27 +22493694,Context-specific protein network miner--an online system for exploring context-specific protein interaction networks from the literature.,"

Background

Protein interaction networks (PINs) specific within a particular context contain crucial information regarding many cellular biological processes. For example, PINs may include information on the type and directionality of interaction (e.g. phosphorylation), location of interaction (i.e. tissues, cells), and related diseases. Currently, very few tools are capable of deriving context-specific PINs for conducting exploratory analysis.

Results

We developed a literature-based online system, Context-specific Protein Network Miner (CPNM), which derives context-specific PINs in real-time from the PubMed database based on a set of user-input keywords and enhanced PubMed query system. CPNM reports enriched information on protein interactions (with type and directionality), their network topology with summary statistics (e.g. most densely connected proteins in the network; most densely connected protein-pairs; and proteins connected by most inbound/outbound links) that can be explored via a user-friendly interface. Some of the novel features of the CPNM system include PIN generation, ontology-based PubMed query enhancement, real-time, user-queried, up-to-date PubMed document processing, and prediction of PIN directionality.

Conclusions

CPNM provides a tool for biologists to explore PINs. It is freely accessible at http://www.biotextminer.com/CPNM/.",2012-04-06 +22080300,Immunogenetic Management Software: a new tool for visualization and analysis of complex immunogenetic datasets.,"Here we describe the Immunogenetic Management Software (IMS) system, a novel web-based application that permits multiplexed analysis of complex immunogenetic traits that are necessary for the accurate planning and execution of experiments involving large animal models, including nonhuman primates. IMS is capable of housing complex pedigree relationships, microsatellite-based MHC typing data, as well as MHC pyrosequencing expression analysis of class I alleles. It includes a novel, automated MHC haplotype naming algorithm and has accomplished an innovative visualization protocol that allows users to view multiple familial and MHC haplotype relationships through a single, interactive graphical interface. Detailed DNA and RNA-based data can also be queried and analyzed in a highly accessible fashion, and flexible search capabilities allow experimental choices to be made based on multiple, individualized and expandable immunogenetic factors. This web application is implemented in Java, MySQL, Tomcat, and Apache, with supported browsers including Internet Explorer and Firefox on Windows and Safari on Mac OS. The software is freely available for distribution to noncommercial users by contacting Leslie.kean@emory.edu. A demonstration site for the software is available at http://typing.emory.edu/typing_demo , user name: imsdemo7@gmail.com and password: imsdemo.",2011-11-15 +22607158,Intra-aortic balloon counterpulsation in the treatment of infarction-related cardiogenic shock--review of the current evidence.,"The European ST-elevated myocardial infarction (STEMI) guideline suggested the intra-aortic balloon pump (IABP) with a recommendation level I and a level of evidence C as an effective measure in combination with balloon angioplasty in patients with cardiogenic shock (CS), stent implantation, and inotropic and vasopressor support. Similarly, upon mechanical complication due to myocardial infarction (MI), the guideline suggests that in patients with a ventricular septal defect or in most patients with acute mitral regurgitation, preoperative IABP implantation is indicated for circulatory support. The American College of Cardiology/American Heart Association STEMI guideline recommends the use of the IABP with a recommendation level I and a level of evidence B if CS does not respond rapidly to pharmacological treatment. The guideline notes that the IABP is a stabilizing measure for angiography and early revascularization. Even in MI complications, the use of preoperative IABP is recommended before surgery. Within this overview, we summarize the current evidence on IABP use in patients with CS complicated by MI. From our Cochrane data analysis, we conclude that in CS due to acute MI (AMI) treated with adjuvant systemic fibrinolysis, the IABP should be implanted. In patients with CS following AMI, treated with primary percutaneous coronary intervention (PCI), the IABP can be implanted, although data are not distinctive (i.e., indicating positive and negative effects). In the future, randomized controlled trials are needed to determine the use of IABP in CS patients treated with PCI. When patients with CS are transferred to a PCI center with or without thrombolysis, patients should receive mechanical support with an IABP. To treat mechanical MI complications-in particular ventricular septal defect-patients should be treated with an IABP to stabilize their hemodynamic situation prior to cardiac surgery. Similar recommendations are given in the German Austrian guidelines on treatment of infarction-related CS patients (http://www.awmf.org/leitlinien/detail/ll/019-013.html).",2012-05-21 +23129300,MetaGeneTack: ab initio detection of frameshifts in metagenomic sequences.,"

Summary

Frameshift (FS) prediction is important for analysis and biological interpretation of metagenomic sequences. Since a genomic context of a short metagenomic sequence is rarely known, there is not enough data available to estimate parameters of species-specific statistical models of protein-coding and non-coding regions. The challenge of ab initio FS detection is, therefore, two fold: (i) to find a way to infer necessary model parameters and (ii) to identify positions of frameshifts (if any). Here we describe a new tool, MetaGeneTack, which uses a heuristic method to estimate parameters of sequence models used in the FS detection algorithm. It is shown on multiple test sets that the MetaGeneTack FS detection performance is comparable or better than the one of earlier developed program FragGeneScan.

Availability and implementation

MetaGeneTack is available as a web server at http://exon.gatech.edu/GeneTack/cgi/metagenetack.cgi. Academic users can download a standalone version of the program from http://exon.gatech.edu/license_download.cgi.",2012-11-04 +23813008,A method for integrating and ranking the evidence for biochemical pathways by mining reactions from text.,"

Motivation

To create, verify and maintain pathway models, curators must discover and assess knowledge distributed over the vast body of biological literature. Methods supporting these tasks must understand both the pathway model representations and the natural language in the literature. These methods should identify and order documents by relevance to any given pathway reaction. No existing system has addressed all aspects of this challenge.

Method

We present novel methods for associating pathway model reactions with relevant publications. Our approach extracts the reactions directly from the models and then turns them into queries for three text mining-based MEDLINE literature search systems. These queries are executed, and the resulting documents are combined and ranked according to their relevance to the reactions of interest. We manually annotate document-reaction pairs with the relevance of the document to the reaction and use this annotation to study several ranking methods, using various heuristic and machine-learning approaches.

Results

Our evaluation shows that the annotated document-reaction pairs can be used to create a rule-based document ranking system, and that machine learning can be used to rank documents by their relevance to pathway reactions. We find that a Support Vector Machine-based system outperforms several baselines and matches the performance of the rule-based system. The success of the query extraction and ranking methods are used to update our existing pathway search system, PathText.

Availability

An online demonstration of PathText 2 and the annotated corpus are available for research purposes at http://www.nactem.ac.uk/pathtext2/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +22476559,"Identification and molecular analysis of pathogenic yeasts in droppings of domestic pigeons in Beijing, China.","Feral pigeons are known as reservoirs of pathogenic yeasts that cause opportunistic infections in human. In the outskirts of Beijing, China, pigeons are more frequently raised at homes than are encountered in public areas. Many studies have focused on the presence of pathogenic yeasts in the excreta (fresh or withered) of a variety kinds of birds, pigeon crop and cloacae. One hundred and forty-three samples of fresh droppings were collected from three suburban pigeon-raising homes in an area of northern Beijing, China. The internal transcribed sequences (ITS) of all strains (except for 8 strains of Rhodotorula sp. ) were sequenced and compared with those of the databases of the National Center for Biotechnology Information website ( http://www.ncbi.nlm.nih.gov ) using the Basic Local Alignment Search Tool (BLAST). Yeasts representing 8 genera, Cryptococcus, Filobasidium, Rhodotorula, Candida, Debaryomyces, Saccaromyces, Trichosporon and Sporidiobolus, were identified from 120 isolates. Cryptococcus was the most prolific genera represented by eight species. The populations of yeast species isolated from fresh pigeon droppings were different among homes. Although it is well established that Cryptococcus neoformans exists mainly in old pigeon guano, several C. neoformans strains were still isolated from fresh pigeon excreta, providing a clue that live cryptococcal cells could move through the gastrointestinal tract of the pigeons. Eight genera identified from fresh droppings of domestic pigeons further confirm that pigeons serve as reservoirs, carriers and even spreaders of Cryptococcus species and other medically significant yeasts. The proportion of pathogenic yeasts in all isolates is more than 90 %.",2012-04-03 +22491796,MSV3d: database of human MisSense Variants mapped to 3D protein structure.,"The elucidation of the complex relationships linking genotypic and phenotypic variations to protein structure is a major challenge in the post-genomic era. We present MSV3d (Database of human MisSense Variants mapped to 3D protein structure), a new database that contains detailed annotation of missense variants of all human proteins (20 199 proteins). The multi-level characterization includes details of the physico-chemical changes induced by amino acid modification, as well as information related to the conservation of the mutated residue and its position relative to functional features in the available or predicted 3D model. Major releases of the database are automatically generated and updated regularly in line with the dbSNP (database of Single Nucleotide Polymorphism) and SwissVar releases, by exploiting the extensive Décrypthon computational grid resources. The database (http://decrypthon.igbmc.fr/msv3d) is easily accessible through a simple web interface coupled to a powerful query engine and a standard web service. The content is completely or partially downloadable in XML or flat file formats. Database URL: http://decrypthon.igbmc.fr/msv3d.",2012-04-03 +21768146,Ten years of software sustainability at the Infrared Processing and Analysis Center.,"This paper presents a case study of an approach to sustainable software architecture that has been successfully applied over a period of 10 years to astronomy software services at the NASA Infrared Processing and Analysis Center (IPAC), Caltech (http://www.ipac.caltech.edu). The approach was developed in response to the need to build and maintain the NASA Infrared Science Archive (http://irsa.ipac.caltech.edu), NASA's archive node for infrared astronomy datasets. When the archive opened for business in 1999 serving only two datasets, it was understood that the holdings would grow rapidly in size and diversity, and consequently in the number of queries and volume of data download. It was also understood that platforms and browsers would be modernized, that user interfaces would need to be replaced and that new functionality outside of the scope of the original specifications would be needed. The changes in scientific functionality over time are largely driven by the archive user community, whose interests are represented by a formal user panel. The approach has been extended to support four more major astronomy archives, which today host data from more than 40 missions and projects, to support a complete modernization of a powerful and unique legacy astronomy application for co-adding survey data, and to support deployment of Montage, a powerful image mosaic engine for astronomy. The approach involves using a component-based architecture, designed from the outset to support sustainability, extensibility and portability. Although successful, the approach demands careful assessment of new and emerging technologies before adopting them, and attention to a disciplined approach to software engineering and maintenance. The paper concludes with a list of best practices for software sustainability that are based on 10 years of experience at IPAC.",2011-08-01 +22894160,metaPIS: a sequence-based meta-server for protein interaction site prediction.,"The identification of interfaces in protein complexes is effective for the elucidation of protein function and helps us to understand their roles in biological processes. With the exponentially growing amount of protein sequence data, an exploration of new methods that predict protein interaction sites based solely on sequence information is becoming increasingly urgent. Because a combination of different methods could produce better results than a single method, interaction site prediction can be improved through the utilization of different methods. This paper describes a new method that predicts interaction sites based on protein sequences by integrating five different algorithms employing meta-method, Majority Vote and SVMhmm Regression techniques. The 'metaPIS' web-server was implemented for meta-prediction. An evaluation of the meta-methods using independent datasets revealed that Majority Vote achieved the highest average Matthews correlation coefficient (0.181) among all the methods assessed. SVMhmm Regression achieved a lower score but provided a more stable result. The metaPIS server allows experimental biologists to speculate regarding protein function by identifying potential interaction sites based on protein sequence. As a web server, metaPIS is freely accessible to the public at http://202.116.74.5:84/metapis.",2013-02-01 +22815356,UniMoG--a unifying framework for genomic distance calculation and sorting based on DCJ.,"

Summary

UniMoG is a software combining five genome rearrangement models: double cut and join (DCJ), restricted DCJ, Hannenhalli and Pevzner (HP), inversion and translocation. It can compute the pairwise genomic distances and a corresponding optimal sorting scenario for an arbitrary number of genomes. All five models can be unified through the DCJ model, thus the implementation is based on DCJ and, where reasonable, uses the most efficient existing algorithms for each distance and sorting problem. Both textual and graphical output is possible for visualizing the operations.

Availability and implementation

The software is available through the Bielefeld University Bioinformatics Web Server at http://bibiserv.techfak.uni-bielefeld.de/dcj with instructions and example data.

Contact

rhilker@cebitec.uni-bielefeld.de.",2012-07-18 +22618535,High-throughput analysis of epistasis in genome-wide association studies with BiForce.,"

Motivation

Gene-gene interactions (epistasis) are thought to be important in shaping complex traits, but they have been under-explored in genome-wide association studies (GWAS) due to the computational challenge of enumerating billions of single nucleotide polymorphism (SNP) combinations. Fast screening tools are needed to make epistasis analysis routinely available in GWAS.

Results

We present BiForce to support high-throughput analysis of epistasis in GWAS for either quantitative or binary disease (case-control) traits. BiForce achieves great computational efficiency by using memory efficient data structures, Boolean bitwise operations and multithreaded parallelization. It performs a full pair-wise genome scan to detect interactions involving SNPs with or without significant marginal effects using appropriate Bonferroni-corrected significance thresholds. We show that BiForce is more powerful and significantly faster than published tools for both binary and quantitative traits in a series of performance tests on simulated and real datasets. We demonstrate BiForce in analysing eight metabolic traits in a GWAS cohort (323 697 SNPs, >4500 individuals) and two disease traits in another (>340 000 SNPs, >1750 cases and 1500 controls) on a 32-node computing cluster. BiForce completed analyses of the eight metabolic traits within 1 day, identified nine epistatic pairs of SNPs in five metabolic traits and 18 SNP pairs in two disease traits. BiForce can make the analysis of epistasis a routine exercise in GWAS and thus improve our understanding of the role of epistasis in the genetic regulation of complex traits.

Availability and implementation

The software is free and can be downloaded from http://bioinfo.utu.fi/BiForce/.

Contact

wenhua.wei@igmm.ed.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-05-21 +21903628,An empirical comparison of several recent epistatic interaction detection methods.,"

Motivation

Many new methods have recently been proposed for detecting epistatic interactions in GWAS data. There is, however, no in-depth independent comparison of these methods yet.

Results

Five recent methods-TEAM, BOOST, SNPHarvester, SNPRuler and Screen and Clean (SC)-are evaluated here in terms of power, type-1 error rate, scalability and completeness. In terms of power, TEAM performs best on data with main effect and BOOST performs best on data without main effect. In terms of type-1 error rate, TEAM and BOOST have higher type-1 error rates than SNPRuler and SNPHarvester. SC does not control type-1 error rate well. In terms of scalability, we tested the five methods using a dataset with 100 000 SNPs on a 64 bit Ubuntu system, with Intel (R) Xeon(R) CPU 2.66 GHz, 16 GB memory. TEAM takes ~36 days to finish and SNPRuler reports heap allocation problems. BOOST scales up to 100 000 SNPs and the cost is much lower than that of TEAM. SC and SNPHarvester are the most scalable. In terms of completeness, we study how frequently the pruning techniques employed by these methods incorrectly prune away the most significant epistatic interactions. We find that, on average, 20% of datasets without main effect and 60% of datasets with main effect are pruned incorrectly by BOOST, SNPRuler and SNPHarvester.

Availability

The software for the five methods tested are available from the URLs below. TEAM: http://csbio.unc.edu/epistasis/download.php BOOST: http://ihome.ust.hk/~eeyang/papers.html. SNPHarvester: http://bioinformatics.ust.hk/SNPHarvester.html. SNPRuler: http://bioinformatics.ust.hk/SNPRuler.zip. Screen and Clean: http://wpicr.wpic.pitt.edu/WPICCompGen/.

Contact

wangyue@nus.edu.sg.",2011-09-07 +21697122,Identification of novel transcripts in annotated genomes using RNA-Seq.,"

Summary

We describe a new 'reference annotation based transcript assembly' problem for RNA-Seq data that involves assembling novel transcripts in the context of an existing annotation. This problem arises in the analysis of expression in model organisms, where it is desirable to leverage existing annotations for discovering novel transcripts. We present an algorithm for reference annotation-based transcript assembly and show how it can be used to rapidly investigate novel transcripts revealed by RNA-Seq in comparison with a reference annotation.

Availability

The methods described in this article are implemented in the Cufflinks suite of software for RNA-Seq, freely available from http://bio.math.berkeley.edu/cufflinks. The software is released under the BOOST license.

Contact

cole@broadinstitute.org; lpachter@math.berkeley.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-21 +22506283,DSM-IV versus DSM-5: implementation of proposed DSM-5 criteria in a large naturalistic database.,"

Objective

Problems with the current DSM-IV eating disorder (ED) section have resulted in proposed changes toward the upcoming DSM-5 (http://www.dsm5.org/ProposedRevisions/Pages/EatingDisorders.aspx). We investigated consequences of these by implementing the proposal in a large naturalistic database.

Method

Patients were 2,584 children/adolescents and adults enrolled at specialized ED clinics in Sweden. DSM-IV diagnoses anorexia nervosa, bulimia nervosa, and ""not otherwise specified"" examples were compared with DSM-5 anorexia, bulimia, and binge ED, as well as atypical anorexia, subthreshold bulimia, and binge eating, purging disorder, and the residual unspecified category. Assessment methods included a semistructured diagnostic interview and self-ratings of ED and psychiatric symptoms.

Results

We studied age-separated diagnostic distributions and explained variance in clinical variables associated with the two systems. Results showed some improvement of diagnostic specification as well as a slight increase in explained variance.

Discussion

Remaining problems with the proposed changes were also highlighted, and possible further refinement is discussed.",2012-04-01 +30727427,First Report of Powdery Mildew on Flamboyant Tree Caused by Erysiphe quercicola in Brazil.,"Flamboyant (Delonix regia) is an ornamental tree that is native to Madagascar and frequently used in gardens and parks worldwide. Powdery mildew was observed on flamboyant plants in the cities of Piracicaba and São Carlos (State of São Paulo, Brazil) during the springs of 2010 and 2011. All sampled plants (~15 plants) were affected by the disease. Affected plants had abundant, white powdery masses of conidia and mycelium on floral buds that is typical of powdery mildew, but these structures were not observed on leaves and petioles. Diseased buds were observed at all developmental stages. The fungus was identified as Erysiphe quercicola on the basis of scanning electron microscopy, light microscopy, and sequence analysis of the internal transcribed spacer (ITS) region. Conidia were produced in short chains of four to five spores on erect conidiophores. Conidiophores were unbranched, cylindrical, 50 to 80 μm long (mean 68.8 ± 10.8 μm), composed of a cylindrical foot cell 25 to 40 μm long (mean 32.2 ± 4.9 μm), and one to two shorter cells. Conidia were ellipsoid-ovoid to subcylindrical, 22 to 37 μm long (mean 30.9 ± 4.4 μm), and 10 to 18 μm wide (mean 15.1 ± 2.8 μm). Germ tubes were produced apically and ended in a lobed appressorium. Colonizing hyphae also had a well-developed lobed appressorium. Chasmothecia were not observed on buds. DNA was extracted from conidia, conidiophores, and mycelium and used to amplify the ITS (ITS1-5.8s-ITS2) region using the ITS1 and ITS4 primers (2) and its sequence (612 nt) was deposited under Accession No. JQ034229 in the GenBank. Searches with the BLASTn algorithm revealed 100% similarity with E. quercicola from oak (Accession Nos. AB292693.1, AB292691.1, and AB292690.1) (1). To fulfill Koch's postulates, 10 detached young floral buds, 0.4 to 0.8 cm in diameter, were inoculated with five to eight conidia collected on floral buds using an eyelash brush. Inoculated buds were placed on moistened filter paper in petri dishes. The negative control consisted of noninoculated young floral buds. Inoculated and noninoculated buds were incubated in a growth chamber at 25°C and a 12-h photoperiod. Powdery mildew structures were observed 6 to 8 days after inoculation. To our knowledge, E. quercicola has not been reported previously as a pathogen of flamboyant tree since there is no record in the Erysipahales database ( http://erysiphales.wsu.edu/ ). Although the economic impact of the disease is limited, its incidence might induce the abortion of floral buds and accelerate the senescence of flowers, thus reducing the aesthetic value of the trees. References: (1) S. Takamatsu et al. Mycol Res. 111:809, 2007. (2) T. J. White et al. PCR Protocols: A Guide to Methods and Applications. Academic Press, San Diego, 1990.",2012-04-01 +30727417,First Report of Corynespora Leaf Spot on Ailanthus altissima Caused by Corynespora cassiicola in Korea.,"Ailanthus altissima (Mill.) Swingle, known as tree-of-heaven, is a deciduous tree belonging to the family Simaroubaceae, which is native to both northeast and central China and Taiwan. The trees often have the ability to replace indigenous plants and disrupt native ecosystems (3). In August 2010, a leaf spot disease was observed on young trees in Yangpyeong, Korea. Field observation in 2010 and 2011 showed that infections are common on 1- or 2-year-old trees. Adult trees were rarely infected. Symptoms usually started at the margin of leaves and expanded into irregular, dark brown leaf spots, eventually causing significant premature defoliation. Representative samples were deposited in the herbarium of Korea University (KUS-F25174 and -F25304). Conidiophores of fungi observed microscopically on the leaf spots were erect, brown to dark brown, single or occasionally in clusters, 80 to 550 × 5 to 8 μm, and mostly arose on the abaxial surface of symptomatic leaves. Conidia were borne singly or in short chains of two to four, ranging from cylindrical to broadest at the base and tapering apically, straight to slightly curved, pale olivaceous brown, 3 to 18 pseudoseptate, 70 to 450 × 8 to 22 μm, each with a conspicuous thickened hilum. On potato dextrose agar, single-spore cultures of five isolates were identified as Corynespora cassiicola (Berk. & M.A. Curtis) C.T. Wei on the basis of morphological and cultural characteristics (1,4). A monoconidial isolate was preserved at the Korean Agricultural Culture Collection (Accession No. KACC45510). Genomic DNA was extracted with the DNeasy Plant Mini DNA Extraction Kit (Qiagen Inc., Valencia, CA). The complete internal transcribed spacer (ITS) region of rDNA was amplified with the primers ITS1/ITS4 and sequenced with an ABI Prism 337 automatic DNA sequencer (Applied Biosystems, Foster, CA). The resulting sequence of 548 bp was deposited in GenBank (Accession No. JN974462). The sequence showed >99% similarity (1-bp substitution) with a sequence of C. cassiicola from Ipomoea batatas (GenBank Accession No. FJ852716). To conduct a pathogenicity test, a conidial suspension (~2 × 104 conidia/ml) was prepared by harvesting conidia from 2-week-old cultures of KACC45510 and the suspension sprayed onto the leaves of three healthy seedlings. Three noninoculated seedlings served as control plants. Inoculated and noninoculated plants were kept in humid chambers for 48 h in a glasshouse. After 5 days, typical leaf spot symptoms started to develop on the leaves of all three inoculated plants. C. cassiicola was reisolated from the lesions, confirming Koch's postulates. No symptoms were observed on control plants. C. cassiicola is cosmopolitan with a very wide host range (2). To our knowledge, C. cassiicola has not been reported on A. altissima anywhere in the world. According to field observations in Korea, Corynespora leaf spot was most severe in August and September, especially following a prolonged period of moist weather. C. cassiicola may be a potential biocontrol agent for this highly invasive tree species. References: (1) M. B. Ellis. Dematiaceous Hyphomycetes. Commonwealth Mycological Institute: Kew, Surrey, England, 1971. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA, Retrieved from http://nt.ars-grin.gov/fungaldatabes/ , October 28, 2011. (3) L. B. Knapp and C. D. Canham. J. Torrey Bot. Soc. 127:307, 2000. (4) J. H. Kwon et al. Plant Pathol. J. 17:180, 2001.",2012-04-01 +22533413,A bioinformatics analysis of Lamin-A regulatory network: a perspective on epigenetic involvement in Hutchinson-Gilford progeria syndrome.,"Hutchinson-Gilford progeria syndrome (HGPS) is a rare human genetic disease that leads to premature aging. HGPS is caused by mutation in the Lamin-A (LMNA) gene that leads, in affected young individuals, to the accumulation of the progerin protein, usually present only in aging differentiated cells. Bioinformatics analyses of the network of interactions of the LMNA gene and transcripts are presented. The LMNA gene network has been analyzed using the BioGRID database (http://thebiogrid.org/) and related analysis tools such as Osprey (http://biodata.mshri.on.ca/osprey/servlet/Index) and GeneMANIA ( http://genemania.org/). The network of interaction of LMNA transcripts has been further analyzed following the competing endogenous (ceRNA) hypotheses (RNA cross-talk via microRNAs [miRNAs]) and using the miRWalk database and tools (www.ma.uni-heidelberg.de/apps/zmf/mirwalk/). These analyses suggest particular relevance of epigenetic modifiers (via acetylase complexes and specifically HTATIP histone acetylase) and adenosine triphosphate (ATP)-dependent chromatin remodelers (via pBAF, BAF, and SWI/SNF complexes).",2012-04-01 +30727439,First Report of Bacterial Spot of Peony Caused by a Xanthomonas sp. in the United States.,"In early May 2008 and 2009, peony samples (Paeonia spp.) with symptoms of leaf spot and blight were submitted to the Virginia Tech Plant Disease Clinic. The 2008 peony was an unknown cultivar from a northern Virginia landscape. The three cultivars (Dr. Alexander Fleming, Felix Crousse, and Karl Rosenfield) submitted in 2009 were from a commercial nursery in southwestern Virginia that was reporting leaf spot progressing to severe blight, which rendered plants unsalable, on 75% of a 1,219 m2 block during a 10-day period of heavy rainfall. Bacterial streaming from spots was observed. On the basis of phenotypic and biochemical tests, the isolates were determined to be xanthomonads. Two isolates (one recovered from the 2008 sample and one from the 2009 sample) were used in the following work. Isolates were characterized by multilocus sequencing (MLST) (4). PCR reactions were prepared and cycled using 2X ImmoMix (Bioline, Tauton, MA) according to manufacturer's recommendations with an annealing temperature of 58°C. Template DNA was added by touching a single colony with a 20-μl pipette tip and placing the tip into the reaction mix for 1 min. Four bands of the expected size were visualized on an electrophoresis gel and cleaned products were sequenced in forward and reverse directions at the University of Chicago, Cancer Research Center DNA Sequencing Facility. Corresponding gene fragments of each isolate were identical. A consensus sequence (PAMDB Isolate ID No. 936) for each of the four gene fragments was constructed and compared with sequences in NCBI ( http://www.ncbi.nlm.nih.gov/nuccore/ ) and PAMDB ( http://genome.ppws.vt.edu/cgi-bin/MLST/home.pl ) (1) databases using Blastn (2). No perfect match was found. Genetic distances between the peony isolates and all strains in PAMDB were determined by MegAlign (Lasergene; DNAStar, Madison, WI). The Xanthomonas strain most similar to the isolates recovered from the peony samples was Xanthomonas hortorum pv. hederae ICMP 1661 with a genetic distance of 0.023; this strongly suggests that the peony isolates belong to X. hortorum. For Koch's postulates, six surface-disinfested young leaflets from Paeonia lactiflora 'Karl Rosenfield' were inoculated by forcefully spraying a phosphate-buffered saline suspension of each bacterial isolate (~4.3 × 109 CFU/ml) into the underside of the leaf until leaf tissue appeared water soaked. Controls were inoculated similarly with phosphate-buffered saline solution. Moist chambers with inoculated leaves were incubated at ambient temperature under two 48W fluorescent grow lights with 12 h of light and dark. Circular spots were observed on leaves inoculated with the 2009 and 2008 isolates in 18 and 20 days, respectively. No symptoms were observed on controls. Bacterial streaming from leaf spots was observed by phase-contrast microscopy; bacteria were isolated and confirmed to be identical to the original isolates by the methods described above. To our knowledge, this is the first report of a Xanthomonas sp. causing leaf spot and blight on peony. Although bacterial blight of peony has been attributed to a xanthomonad in recent years, the pathogen had not been further characterized (3). References: (1) N. F. Almeida et al. Phytopathology 100:208, 2010. (2) D. J. Altschul et al. J. Mol. Biol. 215:403, 1990. (3) M. L. Gleason et al. Diseases of Herbaceous Perennials. The American Phytopathological Society, St. Paul, MN. 2009. (4) J. M. Young et al. Syst. Appl. Microbiol. 31:366, 2008.",2012-04-01 +22319162,Phylo-MCOA: a fast and efficient method to detect outlier genes and species in phylogenomics using multiple co-inertia analysis.,"Full genome data sets are currently being explored on a regular basis to infer phylogenetic trees, but there are often discordances among the trees produced by different genes. An important goal in phylogenomics is to identify which individual gene and species produce the same phylogenetic tree and are thus likely to share the same evolutionary history. On the other hand, it is also essential to identify which genes and species produce discordant topologies and therefore evolve in a different way or represent noise in the data. The latter are outlier genes or species and they can provide a wealth of information on potentially interesting biological processes, such as incomplete lineage sorting, hybridization, and horizontal gene transfers. Here, we propose a new method to explore the genomic tree space and detect outlier genes and species based on multiple co-inertia analysis (MCOA), which efficiently captures and compares the similarities in the phylogenetic topologies produced by individual genes. Our method allows the rapid identification of outlier genes and species by extracting the similarities and discrepancies, in terms of the pairwise distances, between all the species in all the trees, simultaneously. This is achieved by using MCOA, which finds successive decomposition axes from individual ordinations (i.e., derived from distance matrices) that maximize a covariance function. The method is freely available as a set of R functions. The source code and tutorial can be found online at http://phylomcoa.cgenomics.org.",2012-01-03 +23826371,Arctic Small Rodents Have Diverse Diets and Flexible Food Selection.,"

Unlabelled

The ecology of small rodent food selection is poorly understood, as mammalian herbivore food selection theory has mainly been developed by studying ungulates. Especially, the effect of food availability on food selection in natural habitats where a range of food items are available is unknown. We studied diets and selectivity of grey-sided voles (Myodes rufocanus) and tundra voles (Microtus oeconomus), key herbivores in European tundra ecosystems, using DNA metabarcoding, a novel method enabling taxonomically detailed diet studies. In order to cover the range of food availabilities present in the wild, we employed a large-scale study design for sampling data on food availability and vole diets. Both vole species had ingested a range of plant species and selected particularly forbs and grasses. Grey-sided voles also selected ericoid shrubs and tundra voles willows. Availability of a food item rarely affected its utilization directly, although seasonal changes of diets and selection suggest that these are positively correlated with availability. Moreover, diets and selectivity were affected by availability of alternative food items. These results show that the focal sub-arctic voles have diverse diets and flexible food preferences and rarely compensate low availability of a food item with increased searching effort. Diet diversity itself is likely to be an important trait and has previously been underrated owing to methodological constraints. We suggest that the roles of alternative food item availability and search time limitations for small rodent feeding ecology should be investigated.

Nomenclature

Annotated Checklist of the Panarctic Flora (PAF), Vascular plants. Available at: http://nhm2.uio.no/paf/, accessed 15.6.2012.",2013-06-27 +23007888,Lattice Microbes: high-performance stochastic simulation method for the reaction-diffusion master equation.,"Spatial stochastic simulation is a valuable technique for studying reactions in biological systems. With the availability of high-performance computing (HPC), the method is poised to allow integration of data from structural, single-molecule and biochemical studies into coherent computational models of cells. Here, we introduce the Lattice Microbes software package for simulating such cell models on HPC systems. The software performs either well-stirred or spatially resolved stochastic simulations with approximated cytoplasmic crowding in a fast and efficient manner. Our new algorithm efficiently samples the reaction-diffusion master equation using NVIDIA graphics processing units and is shown to be two orders of magnitude faster than exact sampling for large systems while maintaining an accuracy of !0.1%. Display of cell models and animation of reaction trajectories involving millions of molecules is facilitated using a plug-in to the popular VMD visualization platform. The Lattice Microbes software is open source and available for download at http://www.scs.illinois.edu/schulten/lm",2012-09-25 +21994224,Pyicos: a versatile toolkit for the analysis of high-throughput sequencing data.,"

Motivation

High-throughput sequencing (HTS) has revolutionized gene regulation studies and is now fundamental for the detection of protein-DNA and protein-RNA binding, as well as for measuring RNA expression. With increasing variety and sequencing depth of HTS datasets, the need for more flexible and memory-efficient tools to analyse them is growing.

Results

We describe Pyicos, a powerful toolkit for the analysis of mapped reads from diverse HTS experiments: ChIP-Seq, either punctuated or broad signals, CLIP-Seq and RNA-Seq. We prove the effectiveness of Pyicos to select for significant signals and show that its accuracy is comparable and sometimes superior to that of methods specifically designed for each particular type of experiment. Pyicos facilitates the analysis of a variety of HTS datatypes through its flexibility and memory efficiency, providing a useful framework for data integration into models of regulatory genomics.

Availability

Open-source software, with tutorials and protocol files, is available at http://regulatorygenomics.upf.edu/pyicos or as a Galaxy server at http://regulatorygenomics.upf.edu/galaxy

Contact

eduardo.eyras@upf.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-12 +22467915,DeOri: a database of eukaryotic DNA replication origins.,"

Summary

DNA replication, a central event for cell proliferation, is the basis of biological inheritance. The identification of replication origins helps to reveal the mechanism of the regulation of DNA replication. However, only few eukaryotic replication origins were characterized not long ago; nevertheless, recent genome-wide approaches have boosted the number of mapped replication origins. To gain a comprehensive understanding of the nature of eukaryotic replication origins, we have constructed a Database of Eukaryotic ORIs (DeOri), which contains all the eukaryotic ones identified by genome-wide analyses currently available. A total of 16 145 eukaryotic replication origins have been collected from 6 eukaryotic organisms in which genome-wide studies have been performed, the replication-origin numbers being 433, 7489, 1543, 148, 348 and 6184 for humans, mice, Arabidopsis thaliana, Kluyveromyces lactis, Schizosaccharomyces pombe and Drosophila melanogaster, respectively.

Availability

Database of Eukaryotic ORIs (DeOri) can be accessed from http://tubic.tju.edu.cn/deori/",2012-03-30 +22467916,Large-scale analysis of conserved rare codon clusters suggests an involvement in co-translational molecular recognition events.,"

Motivation

An increasing amount of evidence from experimental and computational analysis suggests that rare codon clusters are functionally important for protein activity. Most of the studies on rare codon clusters were performed on a limited number of proteins or protein families. In the present study, we present the Sherlocc program and how it can be used for large scale protein family analysis of evolutionarily conserved rare codon clusters and their relation to protein function and structure. This large-scale analysis was performed using the whole Pfam database covering over 70% of the known protein sequence universe. Our program Sherlocc, detects statistically relevant conserved rare codon clusters and produces a user-friendly HTML output.

Results

Statistically significant rare codon clusters were detected in a multitude of Pfam protein families. The most statistically significant rare codon clusters were predominantly identified in N-terminal Pfam families. Many of the longest rare codon clusters are found in membrane-related proteins which are required to interact with other proteins as part of their function, for example in targeting or insertion. We identified some cases where rare codon clusters can play a regulating role in the folding of catalytically important domains. Our results support the existence of a widespread functional role for rare codon clusters across species. Finally, we developed an online filter-based search interface that provides access to Sherlocc results for all Pfam families.

Availability

The Sherlocc program and search interface are open access and are available at http://bcb.med.usherbrooke.ca",2012-03-30 +22463110,Multilocus sequence typing reveals high genetic diversity and epidemic population structure for the fish pathogen Yersinia ruckeri.,"Yersinia ruckeri is the causative agent of enteric redmouth in fish and one of the major bacterial pathogens causing losses in salmonid aquaculture. Previously typing methods, including restriction enzyme analysis, pulsed-field gel electrophoresis and multilocus enzyme electrophoresis (MLEE) have indicated a clonal population structure. In this work, we describe a multilocus sequence typing (MLST) scheme for Y.ruckeri based on the internal fragment sequence of six housekeeping genes. This MLST scheme was applied to 103 Y.ruckeri strains from diverse geographic areas and hosts as well as environmental sources. Sequences obtained from this work were deposited and are available in a public database (http://publmst.org/yruckeri/). Thirty different sequence types (ST) were identified, 21 of which were represented by a single isolate, evidencing high genetic diversity. ST2 comprised more than one-third of the isolates and was most frequently observed among isolates from trout. Two major clonal complexes (CC) were identified by eBURST analysis showing a common evolutionary origin for 94 isolates forming 21 STs into CC1 and for 6 isolates of 6 STs in the CC2. It was also possible to associate some unique ST with isolates from recent outbreaks in vaccinated salmonid fish.",2012-03-30 +22796956,SQID-XLink: implementation of an intensity-incorporated algorithm for cross-linked peptide identification.,"

Summary

Peptide identification algorithm is a major bottleneck for mass spectrometry based chemical cross-linking experiments. Our lab recently developed an intensity-incorporated peptide identification algorithm, and here we implemented this scheme for cross-linked peptide discovery. Our program, SQID-XLink, searches all regular, dead-end, intra and inter cross-linked peptides simultaneously, and its effectiveness is validated by testing a published dataset. This new algorithm provides an alternative approach for high confidence cross-linking identification.

Availability

SQID-XLink program is freely available for download from http://quiz2.chem.arizona.edu/wysocki/bioinformatics.htm

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

vwysocki@email.arizona.edu.",2012-07-12 +22536960,Argot2: a large scale function prediction tool relying on semantic similarity of weighted Gene Ontology terms.,"

Background

Predicting protein function has become increasingly demanding in the era of next generation sequencing technology. The task to assign a curator-reviewed function to every single sequence is impracticable. Bioinformatics tools, easy to use and able to provide automatic and reliable annotations at a genomic scale, are necessary and urgent. In this scenario, the Gene Ontology has provided the means to standardize the annotation classification with a structured vocabulary which can be easily exploited by computational methods.

Results

Argot2 is a web-based function prediction tool able to annotate nucleic or protein sequences from small datasets up to entire genomes. It accepts as input a list of sequences in FASTA format, which are processed using BLAST and HMMER searches vs UniProKB and Pfam databases respectively; these sequences are then annotated with GO terms retrieved from the UniProtKB-GOA database and the terms are weighted using the e-values from BLAST and HMMER. The weighted GO terms are processed according to both their semantic similarity relations described by the Gene Ontology and their associated score. The algorithm is based on the original idea developed in a previous tool called Argot. The entire engine has been completely rewritten to improve both accuracy and computational efficiency, thus allowing for the annotation of complete genomes.

Conclusions

The revised algorithm has been already employed and successfully tested during in-house genome projects of grape and apple, and has proven to have a high precision and recall in all our benchmark conditions. It has also been successfully compared with Blast2GO, one of the methods most commonly employed for sequence annotation. The server is freely accessible at http://www.medcomp.medicina.unipd.it/Argot2.",2012-03-28 +22536955,Accurate multiple sequence alignment of transmembrane proteins with PSI-Coffee.,"

Background

Transmembrane proteins (TMPs) constitute about 20~30% of all protein coding genes. The relative lack of experimental structure has so far made it hard to develop specific alignment methods and the current state of the art (PRALINE™) only manages to recapitulate 50% of the positions in the reference alignments available from the BAliBASE2-ref7.

Methods

We show how homology extension can be adapted and combined with a consistency based approach in order to significantly improve the multiple sequence alignment of alpha-helical TMPs. TM-Coffee is a special mode of PSI-Coffee able to efficiently align TMPs, while using a reduced reference database for homology extension.

Results

Our benchmarking on BAliBASE2-ref7 alpha-helical TMPs shows a significant improvement over the most accurate methods such as MSAProbs, Kalign, PROMALS, MAFFT, ProbCons and PRALINE™. We also estimated the influence of the database used for homology extension and show that highly non-redundant UniRef databases can be used to obtain similar results at a significantly reduced computational cost over full protein databases. TM-Coffee is part of the T-Coffee package, a web server is also available from http://tcoffee.crg.cat/tmcoffee and a freeware open source code can be downloaded from http://www.tcoffee.org/Packages/Stable/Latest.",2012-03-28 +22796953,RIBFIND: a web server for identifying rigid bodies in protein structures and to aid flexible fitting into cryo EM maps.,"

Motivation

To better analyze low-resolution cryo electron microscopy maps of macromolecular assemblies, component atomic structures frequently have to be flexibly fitted into them. Reaching an optimal fit and preventing the fitting process from getting trapped in local minima can be significantly improved by identifying appropriate rigid bodies (RBs) in the fitted component.

Results

Here we present the RIBFIND server, a tool for identifying RBs in protein structures. The server identifies RBs in proteins by calculating spatial proximity between their secondary structural elements.

Availability

The RIBFIND web server and its standalone program are available at http://ribfind.ismb.lon.ac.uk.

Contact

a.pandurangan@mail.cryst.bbk.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-12 +24267948,"PAV ontology: provenance, authoring and versioning.","

Background

Provenance is a critical ingredient for establishing trust of published scientific content. This is true whether we are considering a data set, a computational workflow, a peer-reviewed publication or a simple scientific claim with supportive evidence. Existing vocabularies such as Dublin Core Terms (DC Terms) and the W3C Provenance Ontology (PROV-O) are domain-independent and general-purpose and they allow and encourage for extensions to cover more specific needs. In particular, to track authoring and versioning information of web resources, PROV-O provides a basic methodology but not any specific classes and properties for identifying or distinguishing between the various roles assumed by agents manipulating digital artifacts, such as author, contributor and curator.

Results

We present the Provenance, Authoring and Versioning ontology (PAV, namespace http://purl.org/pav/): a lightweight ontology for capturing ""just enough"" descriptions essential for tracking the provenance, authoring and versioning of web resources. We argue that such descriptions are essential for digital scientific content. PAV distinguishes between contributors, authors and curators of content and creators of representations in addition to the provenance of originating resources that have been accessed, transformed and consumed. We explore five projects (and communities) that have adopted PAV illustrating their usage through concrete examples. Moreover, we present mappings that show how PAV extends the W3C PROV-O ontology to support broader interoperability.

Method

The initial design of the PAV ontology was driven by requirements from the AlzSWAN project with further requirements incorporated later from other projects detailed in this paper. The authors strived to keep PAV lightweight and compact by including only those terms that have demonstrated to be pragmatically useful in existing applications, and by recommending terms from existing ontologies when plausible.

Discussion

We analyze and compare PAV with related approaches, namely Provenance Vocabulary (PRV), DC Terms and BIBFRAME. We identify similarities and analyze differences between those vocabularies and PAV, outlining strengths and weaknesses of our proposed model. We specify SKOS mappings that align PAV with DC Terms. We conclude the paper with general remarks on the applicability of PAV.",2013-11-22 +22759586,IDDI: integrated domain-domain interaction and protein interaction analysis system.,"

Background

Deciphering protein-protein interaction (PPI) in domain level enriches valuable information about binding mechanism and functional role of interacting proteins. The 3D structures of complex proteins are reliable source of domain-domain interaction (DDI) but the number of proven structures is very limited. Several resources for the computationally predicted DDI have been generated but they are scattered in various places and their prediction show erratic performances. A well-organized PPI and DDI analysis system integrating these data with fair scoring system is necessary.

Method

We integrated three structure-based DDI datasets and twenty computationally predicted DDI datasets and constructed an interaction analysis system, named IDDI, which enables to browse protein and domain interactions with their relationships. To integrate heterogeneous DDI information, a novel scoring scheme is introduced to determine the reliability of DDI by considering the prediction scores of each DDI and the confidence levels of each prediction method in the datasets, and independencies between predicted datasets. In addition, we connected this DDI information to the comprehensive PPI information and developed a unified interface for the interaction analysis exploring interaction networks at both protein and domain level.

Result

IDDI provides 204,705 DDIs among total 7,351 Pfam domains in the current version. The result presents that total number of DDIs is increased eight times more than that of previous studies. Due to the increment of data, 50.4% of PPIs could be correlated with DDIs which is more than twice of previous resources. Newly designed scoring scheme outperformed the previous system in its accuracy too. User interface of IDDI system provides interactive investigation of proteins and domains in interactions with interconnected way. A specific example is presented to show the efficiency of the systems to acquire the comprehensive information of target protein with PPI and DDI relationships. IDDI is freely available at http://pcode.kaist.ac.kr/iddi/.",2012-06-21 +21470960,Interactive Tree Of Life v2: online annotation and display of phylogenetic trees made easy.,"Interactive Tree Of Life (http://itol.embl.de) is a web-based tool for the display, manipulation and annotation of phylogenetic trees. It is freely available and open to everyone. In addition to classical tree viewer functions, iTOL offers many novel ways of annotating trees with various additional data. Current version introduces numerous new features and greatly expands the number of supported data set types. Trees can be interactively manipulated and edited. A free personal account system is available, providing management and sharing of trees in user defined workspaces and projects. Export to various bitmap and vector graphics formats is supported. Batch access interface is available for programmatic access or inclusion of interactive trees into other web services.",2011-04-05 +22057160,Large-scale motif discovery using DNA Gray code and equiprobable oligomers.,"

Motivation

How to find motifs from genome-scale functional sequences, such as all the promoters in a genome, is a challenging problem. Word-based methods count the occurrences of oligomers to detect excessively represented ones. This approach is known to be fast and accurate compared with other methods. However, two problems have hampered the application of such methods to large-scale data. One is the computational cost necessary for clustering similar oligomers, and the other is the bias in the frequency of fixed-length oligomers, which complicates the detection of significant words.

Results

We introduce a method that uses a DNA Gray code and equiprobable oligomers, which solve the clustering problem and the oligomer bias, respectively. Our method can analyze 18 000 sequences of ~1 kbp long in 30 s. We also show that the accuracy of our method is superior to that of a leading method, especially for large-scale data and small fractions of motif-containing sequences.

Availability

The online and stand-alone versions of the application, named Hegma, are available at our website: http://www.genome.ist.i.kyoto-u.ac.jp/~ichinose/hegma/

Contact

ichinose@i.kyoto-u.ac.jp; o.gotoh@i.kyoto-u.ac.jp",2011-11-03 +23665455,Capturing native/native like structures with a physico-chemical metric (pcSM) in protein folding.,"Specification of the three dimensional structure of a protein from its amino acid sequence, also called a ""Grand Challenge"" problem, has eluded a solution for over six decades. A modestly successful strategy has evolved over the last couple of decades based on development of scoring functions (e.g. mimicking free energy) that can capture native or native-like structures from an ensemble of decoys generated as plausible candidates for the native structure. A scoring function must be fast enough in discriminating the native from unfolded/misfolded structures, and requires validation on a large data set(s) to generate sufficient confidence in the score. Here we develop a scoring function called pcSM that detects true native structure in the top 5 with 93% accuracy from an ensemble of candidate structures. If we eliminate the native from ensemble of decoys then pcSM is able to capture near native structure (RMSD<=5Ǻ) in top 10 with 86% accuracy. The parameters considered in pcSM are a C-alpha Euclidean metric, secondary structural propensity, surface areas and an intramolecular energy function. pcSM has been tested on 415 systems consisting 142,698 decoys (public and CASP-largest reported hitherto in literature). The average rank for the native is 2.38, a significant improvement over that existing in literature. In-silico protein structure prediction requires robust scoring technique(s). Therefore, pcSM is easily amenable to integration into a successful protein structure prediction strategy. The tool is freely available at http://www.scfbio-iitd.res.in/software/pcsm.jsp.",2013-05-07 +23665300,TRAK ontology: defining standard care for the rehabilitation of knee conditions.,"In this paper we discuss the design and development of TRAK (Taxonomy for RehAbilitation of Knee conditions), an ontology that formally models information relevant for the rehabilitation of knee conditions. TRAK provides the framework that can be used to collect coded data in sufficient detail to support epidemiologic studies so that the most effective treatment components can be identified, new interventions developed and the quality of future randomized control trials improved to incorporate a control intervention that is well defined and reflects clinical practice. TRAK follows design principles recommended by the Open Biomedical Ontologies (OBO) Foundry. TRAK uses the Basic Formal Ontology (BFO) as the upper-level ontology and refers to other relevant ontologies such as Information Artifact Ontology (IAO), Ontology for General Medical Science (OGMS) and Phenotype And Trait Ontology (PATO). TRAK is orthogonal to other bio-ontologies and represents domain-specific knowledge about treatments and modalities used in rehabilitation of knee conditions. Definitions of typical exercises used as treatment modalities are supported with appropriate illustrations, which can be viewed in the OBO-Edit ontology editor. The vast majority of other classes in TRAK are cross-referenced to the Unified Medical Language System (UMLS) to facilitate future integration with other terminological sources. TRAK is implemented in OBO, a format widely used by the OBO community. TRAK is available for download from http://www.cs.cf.ac.uk/trak. In addition, its public release can be accessed through BioPortal, where it can be browsed, searched and visualized.",2013-05-07 +24398050,"Relationships of polychlorinated biphenyls and dichlorodiphenyldichloroethylene (p,p'-DDE) with testosterone levels in adolescent males.","

Background

Concern persists over endocrine-disrupting effects of persistent organic pollutants (POPs) on human growth and sexual maturation. Potential effects of toxicant exposures on testosterone levels during puberty are not well characterized.

Objectives

In this study we evaluated the relationship between toxicants [polychlorinated biphenyls (PCBs), dichlorodiphenyldichloroethylene (p,p´-DDE), hexachlorobenzene (HCB), and lead] and testosterone levels among 127 Akwesasne Mohawk males 10 to < 17 years of age with documented toxicant exposures.

Methods

Data were collected between February 1996 and January 2000. Fasting blood specimens were collected before breakfast by trained Akwesasne Mohawk staff. Multivariable regression models were used to estimates associations between toxicants and serum testosterone, adjusted for other toxicants, Tanner stage, and potential confounders.

Results

The sum of 16 PCB congeners (Σ16PCBs) that were detected in ≥ 50% of the population was significantly and negatively associated with serum testosterone levels, such that a 10% change in exposure was associated with a 5.6% decrease in testosterone (95% CI: -10.8, -0.5%). Of the 16 congeners, the more persistent ones (Σ8PerPCBs) were related to testosterone, whereas the less persistent ones, possibly reflecting more recent exposure, were not. When PCB congeners were subgrouped, the association was significant for the sum of eight more persistent PCBs (5.7% decrease; 95% CI: -11, -0.4%), and stronger than the sum of six less persistent congeners (3.1% decrease; 95% CI: -7.2, 0.9%). p,p´-DDE was positively but not significantly associated with serum testosterone (5.2% increase with a 10% increase in exposure; 95% CI: -0.5, 10.9%). Neither lead nor HCB was significantly associated with testosterone levels.

Conclusions

Exposure to PCBs, particularly the more highly persistent congeners, may negatively influence testosterone levels among adolescent males. The positive relationship between p,p´-DDE and testosterone indicates that not all POPs act similarly.

Citation

Schell LM, Gallo MV, Deane GD, Nelder KR, DeCaprio AP, Jacobs A; Akwesasne Task Force on the Environment. 2014. Relationships of polychlorinated biphenyls and dichlorodiphenyldichloroethylene (p,p´-DDE) with testosterone levels in adolescent males. Environ Health Perspect 122:304-309; http://dx.doi.org/10.1289/ehp.1205984.",2013-12-20 +23915145,Overexpression of CARM1 in breast cancer is correlated with poorly characterized clinicopathologic parameters and molecular subtypes.,"

Background

Coactivator-associated arginine methyltransferase 1 (CARM1) belongs to the protein arginine methyltransferase family. CARM1 has been reported to be associated with high grade tumors in breast cancer. It still remains unknown the expression pattern of CARM1 in breast cancer and its relationships with clinicopathological characteristics and molecular subtypes.

Methods

Two hundred forty-seven invasive breast cancer cases were collected and prepared for tissue array. There were thirty-seven tumors with benign glandular epithelium adjacent to the tumors among these cases. Molecular subtype and CARM1 expression were investigated using immunohistochemistry.

Results

Cell staining was observed in the cytoplasm and/or nucleus. Staining for CARM1 was significantly stronger in adenocarcinoma compared with adjacent benign epithelium. There is a significant correlation between CARM1 overexpression with young age, high grade, estrogen receptor (ER) and progesterone receptor (PR) negative, increased p53 expression, and high Ki-67 index. Our study demonstrated CARM1 overexpression was associated with an increase in the protein expression of HER2. Furthermore, our data indicated CARM1-overexpression rate were remarkably higher in HER2 subtype (69.6%), luminal B subtype (59.6%) and TN subtype (57.1%) compared with luminal A subtype (41.3%).

Conclusions

CARM1 expression was increased in invasive breast cancer. CARM1 overexpression was associated with poorly characterized clinicopathologic parameters and HER2 overexpression. There were significant differences between different molecular subtypes in their relationship to CARM1 overexpression. Our results support the value of using CARM1 in prognostic stratification of breast cancer patients and its potential therapeutic implications in targeting treatment.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/4116338491022965.",2013-08-02 +23297320,Comparison of nonenhanced MR angiographic subtraction techniques for infragenual arteries at 1.5 T: a preliminary study.,"

Purpose

To evaluate diagnostic performance of three nonenhanced methods: variable-refocusing-flip angle (FA) fast spin-echo (SE)-based magnetic resonance (MR) angiography (variable FA MR) and constant-refocusing-FA fast SE-based MR angiography (constant-FA MR) and flow-sensitive dephasing (FSD)-prepared steady-state free precession MR angiography (FSD MR) for calf arteries, with dual-injection three-station contrast material-enhanced MR angiography (gadolinium-enhanced MR) as reference.

Materials and methods

This prospective study was institutional review board approved and HIPAA compliant, with informed consent. Twenty-one patients (13 men, eight women; mean age, 62.6 years) underwent calf-station variable-FA MR, constant-FA MR, and FSD MR at 1.5 T, with gadolinium-enhanced MR as reference. Image quality and stenosis severity were assessed in 13 segments per leg by two radiologists blinded to clinical data. Combined constant-FA MR and FSD MR reading was also performed. Methods were compared (logistic regression for correlated data) for diagnostic accuracy.

Results

Of 546 arterial segments, 148 (27.1%) had a hemodynamically significant (≥ 50%) stenosis. Image quality was satisfactory for all nonenhanced MR sequences. FSD MR was significantly superior to both other sequences (P < .0001), with 5-cm smaller field of view; 9.6% variable-FA MR, 9.6% constant-FA MR, and 0% FSD MR segmental evaluations had nondiagnostic image quality scores, mainly from high diastolic flow (variable-FA MR) and motion artifact (constant-FA MR). Stenosis sensitivity and specificity were highest for FSD MR (80.3% and 81.7%, respectively), compared with those for constant-FA MR (72.3%, P = .086; and 81.8%, P = .96) and variable-FA MR (75.9%, P = .54; and 75.6%, P = .22). Combined constant-FA MR and FSD MR had superior sensitivity (81.8%) and specificity (88.3%) compared with constant-FA MR (P = .0076), variable-FA MR (P = .0044), and FSD MR (P = .0013). All sequences had an excellent negative predictive value (NPV): 93.2%, constant-FA MR; 94.7%, variable-FA MR; 91.7%, FSD MR; and 92.9%, combined constant-FA MR and FSD MR.

Conclusion

At 1.5 T, all evaluated nonenhanced MR angiographic methods demonstrated satisfactory image quality and excellent NPV for hemodynamically significant stenosis.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120859/-/DC1.",2013-01-07 +22442313,Concurrent serotyping and genotyping of pneumococci by use of PCR and electrospray ionization mass spectrometry.,"A pneumococcal serotyping/genotyping system (PSGS) was developed based upon targeted PCR, followed by electrospray ionization mass spectrometry and amplicon base composition analysis. Eight multiplex PCRs, 32 targeting serotype-determining capsular biosynthetic loci, and 8 targeting multilocus sequence typing (MLST) loci were employed for each of 229 highly diverse Streptococcus pneumoniae isolates. The most powerful aspect of the PSGS system was the identification of capsular serotypes accounting for the majority of invasive and carried pneumococcal strains. Altogether, 45 different serotypes or serogroups were correctly predicted among the 196 resolvable isolates, with only 2 unexpected negative results. All 33 isolates that represented 23 serotypes not included in the PSGS yielded negative serotyping results. A genotyping database was constructed using the base compositions of 65- to 100-bp sections of MLST alleles compiled within http://www.mlst.net. From this database, one or more MLST sequence types (STs) that comprised a PSGS genotype were identified. The end result of more PSGS genotypes (163) than conventional STs actually tested (155) was primarily due to amplification failures of 1 to 3 targets. In many instances, the PSGS genotype could provide resolution of single- and double-locus variants. This molecular serotyping/genotyping scheme is well suited to rapid characterization of large sets of pneumococcal isolates.",2012-03-21 +22437851,MDWeb and MDMoby: an integrated web-based platform for molecular dynamics simulations.,"

Summary

MDWeb and MDMoby constitute a web-based platform to help access to molecular dynamics (MD) in the standard and high-throughput regime. The platform provides tools to prepare systems from PDB structures mimicking the procedures followed by human experts. It provides inputs and can send simulations for three of the most popular MD packages (Amber, NAMD and Gromacs). Tools for analysis of trajectories, either provided by the user or retrieved from our MoDEL database (http://mmb.pcb.ub.es/MoDEL) are also incorporated. The platform has two ways of access, a set of web-services based on the BioMoby framework (MDMoby), programmatically accessible and a web portal (MDWeb).

Availability

http://mmb.irbbarcelona.org/MDWeb; additional information and methodology details can be found at the web site ( http://mmb.irbbarcelona.org/MDWeb/help.php)",2012-03-21 +22875362,TOFwave: reproducibility in biomarker discovery from time-of-flight mass spectrometry data.,"Many are the sources of variability that can affect reproducibility of disease biomarkers from time-of-flight (TOF) Mass Spectrometry (MS) data. Here we present TOFwave, a complete software pipeline for TOF-MS biomarker identification, that limits the impact of parameter tuning along the whole chain of preprocessing and model selection modules. Peak profiles are obtained by a preprocessing based on Continuous Wavelet Transform (CWT), coupled with a machine learning protocol aimed at avoiding selection bias effects. Only two parameters (minimum peak width and a signal to noise cutoff) have to be explicitly set. The TOFwave pipeline is built on top of the mlpy Python package. Examples on Matrix-Assisted Laser Desorption and Ionization (MALDI) TOF datasets are presented. Software prototype, datasets and details to replicate results in this paper can be found at http://mlpy.sf.net/tofwave/.",2012-08-09 +24345350,"Associations between extreme precipitation and gastrointestinal-related hospital admissions in Chennai, India.","

Background

Understanding the potential links between extreme weather events and human health in India is important in the context of vulnerability and adaptation to climate change. Research exploring such linkages in India is sparse.

Objectives

We evaluated the association between extreme precipitation and gastrointestinal (GI) illness-related hospital admissions in Chennai, India, from 2004 to 2007.

Methods

Daily hospital admissions were extracted from two government hospitals in Chennai, India, and meteorological data were retrieved from the Chennai International Airport. We evaluated the association between extreme precipitation (≥ 90th percentile) and hospital admissions using generalized additive models. Both single-day and distributed lag models were explored over a 15-day period, controlling for apparent temperature, day of week, and long-term time trends. We used a stratified analysis to explore the association across age and season.

Results

Extreme precipitation was consistently associated with GI-related hospital admissions. The cumulative summary of risk ratios estimated for a 15-day period corresponding to an extreme event (relative to no precipitation) was 1.60 (95% CI: 1.29, 1.98) among all ages, 2.72 (95% CI: 1.25, 5.92) among the young (≤ 5 years of age), and 1.62 (95% CI: 0.97, 2.70) among the old (≥ 65 years of age). The association was stronger during the pre-monsoon season (March-May), with a cumulative risk ratio of 6.50 (95% CI: 2.22, 19.04) for all ages combined compared with other seasons.

Conclusions

Hospital admissions related to GI illness were positively associated with extreme precipitation in Chennai, India, with positive cumulative risk ratios for a 15-day period following an extreme event in all age groups. Projected changes in precipitation and extreme weather events suggest that climate change will have important implications for human health in India, where health disparities already exist.

Citation

Bush KF, O'Neill MS, Li S, Mukherjee B, Hu H, Ghosh S, Balakrishnan K. 2014. Associations between extreme precipitation and gastrointestinal-related hospital admissions in Chennai, India. Environ Health Perspect 122:249-254; http://dx.doi.org/10.1289/ehp.1306807.",2013-12-17 +22121228,ProtoNet 6.0: organizing 10 million protein sequences in a compact hierarchical family tree.,"ProtoNet 6.0 (http://www.protonet.cs.huji.ac.il) is a data structure of protein families that cover the protein sequence space. These families are generated through an unsupervised bottom-up clustering algorithm. This algorithm organizes large sets of proteins in a hierarchical tree that yields high-quality protein families. The 2012 ProtoNet (Version 6.0) tree includes over 9 million proteins of which 5.5% come from UniProtKB/SwissProt and the rest from UniProtKB/TrEMBL. The hierarchical tree structure is based on an all-against-all comparison of 2.5 million representatives of UniRef50. Rigorous annotation-based quality tests prune the tree to most informative 162,088 clusters. Every high-quality cluster is assigned a ProtoName that reflects the most significant annotations of its proteins. These annotations are dominated by GO terms, UniProt/Swiss-Prot keywords and InterPro. ProtoNet 6.0 operates in a default mode. When used in the advanced mode, this data structure offers the user a view of the family tree at any desired level of resolution. Systematic comparisons with previous versions of ProtoNet are carried out. They show how our view of protein families evolves, as larger parts of the sequence space become known. ProtoNet 6.0 provides numerous tools to navigate the hierarchy of clusters.",2011-11-25 +23259535,Evidence from neuroimaging to explore brain plasticity in humans during an ultra-endurance burden.,"Physical activity, likely through induction of neuroplasticity, is a promising intervention to promote brain health. In athletes it is clear that training can and does, by physiological adaptations, extend the frontiers of performance capacity. The limits of our endurance capacity lie deeply in the human brain, determined by various personal factors yet to be explored. The human brain, with its vast neural connections and its potential for seemingly endless behaviors, constitutes one of the final frontiers of medicine. In a recent study published in BMC Medicine, the TransEurope FootRace Project followed 10 ultra-endurance runners over around 4,500 km across Europe and recorded a large data collection of brain imaging scans. This study indicates that the cerebral atrophy amounting to a reduction of approximately 6% throughout the two months of the race is reversed upon follow-up. While this study will contribute to advances in the limits of human performance on the neurophysiological processes in sports scientists, it will also bring important understanding to clinicians about cerebral atrophy in people who are vulnerable to physical and psychological stress long term.See related research article http://www.biomedcentral.com/1741-7015/10/170.",2012-12-21 +22434831,Disease model curation improvements at Mouse Genome Informatics.,"Optimal curation of human diseases requires an ontology or structured vocabulary that contains terms familiar to end users, is robust enough to support multiple levels of annotation granularity, is limited to disease terms and is stable enough to avoid extensive reannotation following updates. At Mouse Genome Informatics (MGI), we currently use disease terms from Online Mendelian Inheritance in Man (OMIM) to curate mouse models of human disease. While OMIM provides highly detailed disease records that are familiar to many in the medical community, it lacks structure to support multilevel annotation. To improve disease annotation at MGI, we evaluated the merged Medical Subject Headings (MeSH) and OMIM disease vocabulary created by the Comparative Toxicogenomics Database (CTD) project. Overlaying MeSH onto OMIM provides hierarchical access to broad disease terms, a feature missing from the OMIM. We created an extended version of the vocabulary to meet the genetic disease-specific curation needs at MGI. Here we describe our evaluation of the CTD application, the extensions made by MGI and discuss the strengths and weaknesses of this approach. DATABASE URL: http://www.informatics.jax.org/",2012-03-20 +22689647,SIFT web server: predicting effects of amino acid substitutions on proteins.,"The Sorting Intolerant from Tolerant (SIFT) algorithm predicts the effect of coding variants on protein function. It was first introduced in 2001, with a corresponding website that provides users with predictions on their variants. Since its release, SIFT has become one of the standard tools for characterizing missense variation. We have updated SIFT's genome-wide prediction tool since our last publication in 2009, and added new features to the insertion/deletion (indel) tool. We also show accuracy metrics on independent data sets. The original developers have hosted the SIFT web server at FHCRC, JCVI and the web server is currently located at BII. The URL is http://sift-dna.org (24 May 2012, date last accessed).",2012-06-11 +21349862,PrimerProspector: de novo design and taxonomic analysis of barcoded polymerase chain reaction primers.,"

Motivation

PCR amplification of DNA is a key preliminary step in many applications of high-throughput sequencing technologies, yet design of novel barcoded primers and taxonomic analysis of novel or existing primers remains a challenging task.

Results

PrimerProspector is an open-source software package that allows researchers to develop new primers from collections of sequences and to evaluate existing primers in the context of taxonomic data.

Availability

PrimerProspector is open-source software available at http://pprospector.sourceforge.net

Contact

rob.knight@colorado.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-23 +22493526,IntDb: A comprehensive database for classified introns of saccharomyces & human.,"

Unlabelled

Introns (intra-genic) are non-coding regions of several eukaryotic genes. However, their role in regulation of transcription, embryonic development, stimulate gene (HEG) is apparent in recent years. Thus current research focuses on mutation in introns and their influence in causing various diseases. Though many available intron databases like YIDB, IDB, ExInt, GISSD, FUGOID, etc. discusses on various aspects of introns but none of them have classified the introns where identification of start intron is found to be important which mainly regulates the various activities of protein at gene level. This lead to an idea for development of ""Intdb""; a database meant for classifying introns as start, middle and stop on the basis of position of specific consensus site. Information provided in IntDb is useful for gene prediction, determination of splicing sites and identification of diseases. In addition, the main focus is on violation of consensus rule and frequency of other deviations observed in classified introns. Further, GC content, length variations according to the biased residues and occurrence of consensus pattern to discover potential role of introns is also emphasized in IntDb.

Availability

The database is available for free at http://introndb.bicpu.edu.in/",2012-03-17 +22493529,Gene Locater: Genetic linkage analysis software using three-point testcross.,"

Unlabelled

Locating genes on a chromosome is important for understanding the gene function and its linkage and recombination. Knowledge of gene positions on chromosomes is necessary for annotation. The study is essential for disease genetics and genomics, among other aspects. Currently available software's for calculating recombination frequency is mostly limited to the range and flexibility of this type of analysis. GENE LOCATER is a fully customizable program for calculating recombination frequency, written in JAVA. Through an easy-to-use interface, GENE LOCATOR allows users a high degree of flexibility in calculating genetic linkage and displaying linkage group. Among other features, this software enables user to identify linkage groups with output visualized graphically. The program calculates interference and coefficient of coincidence with elevated accuracy in sample datasets.

Availability

The database is available for free at http://www.moperandib.com.",2012-03-17 +24004467,Expression of CD44v6 and integrin-β1 for the prognosis evaluation of pancreatic cancer patients after cryosurgery.,"

Background

Many previous studies demonstrated that cell adhesion molecules CD44v6 and integrin-β1 had been extensively investigated as potential prognostic markers of various cancers. However, data in PC are scarce.

Methods

We now investigate CD44v6 and integrin-β1 mRNA expression in PBMC by a triplex real-time RT-PCR assay and protein expression in plasma by ELISA. All specimens were collected from 54 PC patients who received the treatment of cryosurgery as well as 20 healthy individuals (control).

Results

The mRNA and protein expression levels of CD44v6 and integrin-β1 in patients were significantly increased compared with control group (P<0.05). The high CD44v6 mRNA and protein expression were significantly correlated with clinical stage, tumor differentiation, LNM, liver metastasis and decreased median DFS (P<0.05), while the high integrin-β1 mRNA and protein expression were significantly correlated with clinical stage, LNM, liver metastasis and decreased median DFS (P<0.05). Clinical stage, LNM, liver metastasis, CD44v6 mRNA and protein expression were the independent predictors of survival in PC patients (P<0.05). Moreover, CD44v6 and integrin-β1 mRNA and protein expression levels were significantly decreased in patients in 3 months after cryosurgery (P<0.05). No significant difference was found in CD44v6 mRNA and protein expression between patients in 3 months after cryosurgery and control group (P>0.05).

Conclusion

CD44v6 and integrin-β1 mRNA and protein expression in blood may serve as biomarkers for the development and metastasis of PC, and as prognostic indicators for PC. They may become useful predictors in assessing outcome of PC patients after cryosurgery.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/4035308681009006.",2013-09-02 +21813478,GlobalMIT: learning globally optimal dynamic bayesian network with the mutual information test criterion.,"

Motivation

Dynamic Bayesian networks (DBN) are widely applied in modeling various biological networks including the gene regulatory network (GRN). Due to the NP-hard nature of learning static Bayesian network structure, most methods for learning DBN also employ either local search such as hill climbing, or a meta stochastic global optimization framework such as genetic algorithm or simulated annealing.

Results

This article presents GlobalMIT, a toolbox for learning the globally optimal DBN structure from gene expression data. We propose using a recently introduced information theoretic-based scoring metric named mutual information test (MIT). With MIT, the task of learning the globally optimal DBN is efficiently achieved in polynomial time.

Availability

The toolbox, implemented in Matlab and C++, is available at http://code.google.com/p/globalmit.

Contact

vinh.nguyen@monash.edu; madhu.chetty@monash.edu

Supplementary information

Supplementary data is available at Bioinformatics online.",2011-08-03 +24215399,"Coreceptor tropism determined by genotypic assay in HIV-1 circulating in Thailand, where CRF01_AE predominates.","

Objectives

Chemokine (C-C motif) receptor 5 (CCR5) inhibitors are a novel class of antiretroviral agents that are promising for treatment of patients who harbour the HIV-1 R5 strain. Data on coreceptor tropism in non-B HIV-1 subtypes are limited. We studied coreceptor tropism in HIV-1 circulating in Thailand, where CRF01_AE predominates, using a genotypic assay.

Methods

We compiled V3 sequences of HIV-1 strains circulating in Thailand during 2010-2012. Coreceptor tropism was predicted based on V3 sequences using geno2pheno version 2.5 (http://coreceptor.bioinf.mpi-inf.mpg.de).

Results

One hundred and fifty-five HIV-1-infected patients were enrolled in this study. Ninety-nine patients (63.9%) were antiretroviral-naïve, and the remainder had virological failure. The median (interquartile range) CD4 cell count and HIV-1 RNA were 220 (74-379) cells/μL and 75,374 (14,127-226,686) HIV-1 RNA copies/mL, respectively. Of the sequences obtained from these patients, 119 (76.8%) were CRF01_AE and 22 (14.2%) were subtype B. At a false positive rate of < 5%, 61 (39.4%) HIV-1-infected individuals were predicted to harbour the X4 phenotype. X4 viruses were detected more frequently in the treatment-failure group compared with the treatment-naïve group (30.3 vs. 55.4%, respectively; P = 0.002). Those with CRF01_AE had a higher proportion of X4 viruses compared with non-AE subtypes (47.9 vs. 11.1%, respectively; P < 0.001). By multivariate logistic regression, CRF01_AE and treatment failure were independently associated with predicted X4 phenotype [odds ratio (OR) 7.93; 95% confidence interval (CI) 2.57-24.50; P < 0.001, and OR 3.10; 95% CI 1.50-6.42; P = 0.002, respectively].

Conclusions

CRF01_AE and treatment failure are associated with the predicted X4 phenotype. In regions where CRF01_AE predominates, use of CCR5 inhibitors must be considered with caution. The phenotypic assay and its correlation with genotypes should be further investigated in CRF01_AE.",2013-11-11 +25540463,CrasyDSE: A framework for solving Dyson-Schwinger equations.,"Dyson-Schwinger equations are important tools for non-perturbative analyses of quantum field theories. For example, they are very useful for investigations in quantum chromodynamics and related theories. However, sometimes progress is impeded by the complexity of the equations. Thus automating parts of the calculations will certainly be helpful in future investigations. In this article we present a framework for such an automation based on a C++ code that can deal with a large number of Green functions. Since also the creation of the expressions for the integrals of the Dyson-Schwinger equations needs to be automated, we defer this task to a Mathematica notebook. We illustrate the complete workflow with an example from Yang-Mills theory coupled to a fundamental scalar field that has been investigated recently. As a second example we calculate the propagators of pure Yang-Mills theory. Our code can serve as a basis for many further investigations where the equations are too complicated to tackle by hand. It also can easily be combined with DoFun, a program for the derivation of Dyson-Schwinger equations.

Program summary

Program title: CrasyDSE Catalogue identifier: AEMY _v1_0 Program summary URL: http://cpc.cs.qub.ac.uk/summaries/AEMY_v1_0.html Program obtainable from: CPC Program Library, Queen's University, Belfast, N. Ireland Licensing provisions: Standard CPC licence, http://cpc.cs.qub.ac.uk/licence/licence.html No. of lines in distributed program, including test data, etc.: 49030 No. of bytes in distributed program, including test data, etc.: 303958 Distribution format: tar.gz Programming language: Mathematica 8 and higher, C++. Computer: All on which Mathematica and C++ are available. Operating system: All on which Mathematica and C++ are available (Windows, Unix, Mac OS). Classification: 11.1, 11.4, 11.5, 11.6. Nature of problem: Solve (large) systems of Dyson-Schwinger equations numerically. Solution method: Create C++ functions in Mathematica to be used for the numeric code in C++. This code uses structures to handle large numbers of Green functions. Unusual features: Provides a tool to convert Mathematica expressions into C++ expressions including conversion of function names. Running time: Depending on the complexity of the investigated system solving the equations numerically can take seconds on a desktop PC to hours on a cluster.",2012-11-01 +21685062,Error correction of high-throughput sequencing datasets with non-uniform coverage.,"

Motivation

The continuing improvements to high-throughput sequencing (HTS) platforms have begun to unfold a myriad of new applications. As a result, error correction of sequencing reads remains an important problem. Though several tools do an excellent job of correcting datasets where the reads are sampled close to uniformly, the problem of correcting reads coming from drastically non-uniform datasets, such as those from single-cell sequencing, remains open.

Results

In this article, we develop the method Hammer for error correction without any uniformity assumptions. Hammer is based on a combination of a Hamming graph and a simple probabilistic model for sequencing errors. It is a simple and adaptable algorithm that improves on other tools on non-uniform single-cell data, while achieving comparable results on normal multi-cell data.

Availability

http://www.cs.toronto.edu/~pashadag.

Contact

pmedvedev@cs.ucsd.edu.",2011-07-01 +22416975,Histological spectrum of pulmonary manifestations in kidney transplant recipients on sirolimus inclusive immunosuppressive regimens.,"

Background

After the introduction of novel effective immunosuppressive therapies, kidney transplantation became the treatment of choice for end stage renal disease. While these new therapies lead to better graft survival, they can also cause a variety of complications. Only small series or case reports describe pulmonary pathology in renal allograft recipients on mTOR inhibitor inclusive therapies. The goal of this study was to provide a systematic review of thoracic biopsies in kidney transplant recipients for possible association between a type of immunosuppressive regimen and pulmonary complications.

Methods

A laboratory database search revealed 28 of 2140 renal allograft recipients (18 males and 10 females, 25 to 77 years old, mean age 53 years) who required a biopsy for respiratory symptoms. The histological features were correlated with clinical findings including immunosuppressive medications.

Results

The incidence of neoplasia on lung biopsy was 0.4% (9 cases), which included 3 squamous cell carcinomas, 2 adenocarcinomas, 1 diffuse large B-cell lymphoma, 1 lymphomatoid granulomatosis, and 2 post transplant B-cell lymphoproliferative disorders. Diffuse parenchymal lung disease was identified in 0.4% (9 cases), and included 5 cases of pulmonary hemorrhage, 3 cases of organizing pneumonia and 1 case of pulmonary alveolar proteinosis. Five (0.2%) cases showed histological features indicative of a localized infectious process. Patients on sirolimus had neoplasia less frequently than patients on other immunosuppressive combinations (12.5% vs. 58.3%, p = 0.03). Lung biopsies in 4 of 5 patients with clinically suspected sirolimus toxicity revealed pulmonary hemorrhage as the sole histological finding or in combination with other patterns.

Conclusions

Our study documents a spectrum of neoplastic and non-neoplastic lesions in renal allograft recipients on current immunosuppressive therapies. Sirolimus inclusive regimens are associated with increased risk of pulmonary toxicity but may be beneficial in cases of posttransplant neoplasia.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/3320012126569395.",2012-03-14 +23151252,Commentary on the clinical management of metabolic syndrome: why a healthy lifestyle is important.,"Metabolic syndrome (MS) is associated with an increased risk of type 2 diabetes mellitus and cardiovascular diseases. There is no recognized method to manage MS. Many physicians treat the individual characteristics of MS (high blood pressure, high triglycerides, and so on) instead of the syndrome as a whole, placing particular emphasis on those components that are easily amenable to drug treatment. However, regular physical exercise and a healthy diet have been demonstrated to improve the health of a number of populations, but few studies have assessed their effects in patients with MS. A meta-analysis by Yamaoka and Tango in BMC Medicine found that a lifestyle change program (dietary counseling and encouragement to exercise) resulted in improvements in components of MS and in reducing the proportion of patients with MS. The effects may not be impressive in absolute terms, but the data should be interpreted with the heterogeneity of the included studies in mind. Because of the many adverse side effects of the drugs used to correct individual aspects of MS, this meta-analysis provides strong evidence that lifestyle changes must be the first-line approach to manage MS.See related article http://www.biomedcentral.com/1741-7015/10/138/abstract.",2012-11-14 +22334387,The androgen receptor gene mutations database: 2012 update.,"The current version of the androgen receptor gene (AR) mutations database is described. A major change to the database is that the nomenclature and numbering scheme now conforms to all Human Genome Variation Society norms. The total number of reported mutations has risen from 605 to 1,029 since 2004. The database now contains a number of mutations that are associated with prostate cancer (CaP) treatment regimens, while the number of AR mutations found in CaP tissues has more than doubled from 76 to 159. In addition, in a number of androgen insensitivity syndrome (AIS) and CaP cases, multiple mutations have been found within the same tissue samples. For the first time, we report on a disconnect within the AIS phenotype-genotype relationship among our own patient database, in that over 40% of our patients with a classic complete AIS or partial AIS phenotypes did not appear to have a mutation in their AR gene. The implications of this phenomenon on future locus-specific mutation database (LSDB) development are discussed, together with the concept that mutations can be associated with both loss- and gain-of-function, and the effect of multiple AR mutations within individuals. The database is available on the internet (http://androgendb.mcgill.ca), and a web-based LSDB with the variants using the Leiden Open Variation Database platform is available at http://www.lovd.nl/AR.",2012-03-13 +23063363,A resource for manipulating gene expression and analyzing cis-regulatory modules in the Drosophila CNS.,"Here, we describe the embryonic central nervous system expression of 5,000 GAL4 lines made using molecularly defined cis-regulatory DNA inserted into a single attP genomic location. We document and annotate the patterns in early embryos when neurogenesis is at its peak, and in older embryos where there is maximal neuronal diversity and the first neural circuits are established. We note expression in other tissues, such as the lateral body wall (muscle, sensory neurons, and trachea) and viscera. Companion papers report on the adult brain and larval imaginal discs, and the integrated data sets are available online (http://www.janelia.org/gal4-gen1). This collection of embryonically expressed GAL4 lines will be valuable for determining neuronal morphology and function. The 1,862 lines expressed in small subsets of neurons (<20/segment) will be especially valuable for characterizing interneuronal diversity and function, because although interneurons comprise the majority of all central nervous system neurons, their gene expression profile and function remain virtually unexplored.",2012-10-11 +21342590,Multi-resolution independent component analysis for high-performance tumor classification and biomarker discovery.,"

Background

Although high-throughput microarray based molecular diagnostic technologies show a great promise in cancer diagnosis, it is still far from a clinical application due to its low and instable sensitivities and specificities in cancer molecular pattern recognition. In fact, high-dimensional and heterogeneous tumor profiles challenge current machine learning methodologies for its small number of samples and large or even huge number of variables (genes). This naturally calls for the use of an effective feature selection in microarray data classification.

Methods

We propose a novel feature selection method: multi-resolution independent component analysis (MICA) for large-scale gene expression data. This method overcomes the weak points of the widely used transform-based feature selection methods such as principal component analysis (PCA), independent component analysis (ICA), and nonnegative matrix factorization (NMF) by avoiding their global feature-selection mechanism. In addition to demonstrating the effectiveness of the multi-resolution independent component analysis in meaningful biomarker discovery, we present a multi-resolution independent component analysis based support vector machines (MICA-SVM) and linear discriminant analysis (MICA-LDA) to attain high-performance classifications in low-dimensional spaces.

Results

We have demonstrated the superiority and stability of our algorithms by performing comprehensive experimental comparisons with nine state-of-the-art algorithms on six high-dimensional heterogeneous profiles under cross validations. Our classification algorithms, especially, MICA-SVM, not only accomplish clinical or near-clinical level sensitivities and specificities, but also show strong performance stability over its peers in classification. Software that implements the major algorithm and data sets on which this paper focuses are freely available at https://sites.google.com/site/heyaumapbc2011/.

Conclusions

This work suggests a new direction to accelerate microarray technologies into a clinical routine through building a high-performance classifier to attain clinical-level sensitivities and specificities by treating an input profile as a 'profile-biomarker'. The multi-resolution data analysis based redundant global feature suppressing and effective local feature extraction also have a positive impact on large scale 'omics' data mining.",2011-02-15 +23049970,The National NeuroAIDS Tissue Consortium brain gene array: two types of HIV-associated neurocognitive impairment.,"

Background

The National NeuroAIDS Tissue Consortium (NNTC) performed a brain gene expression array to elucidate pathophysiologies of Human Immunodeficiency Virus type 1 (HIV-1)-associated neurocognitive disorders.

Methods

Twenty-four human subjects in four groups were examined A) Uninfected controls; B) HIV-1 infected subjects with no substantial neurocognitive impairment (NCI); C) Infected with substantial NCI without HIV encephalitis (HIVE); D) Infected with substantial NCI and HIVE. RNA from neocortex, white matter, and neostriatum was processed with the Affymetrix® array platform.

Results

With HIVE the HIV-1 RNA load in brain tissue was three log(10) units higher than other groups and over 1,900 gene probes were regulated. Interferon response genes (IFRGs), antigen presentation, complement components and CD163 antigen were strongly upregulated. In frontal neocortex downregulated neuronal pathways strongly dominated in HIVE, including GABA receptors, glutamate signaling, synaptic potentiation, axon guidance, clathrin-mediated endocytosis and 14-3-3 protein. Expression was completely different in neuropsychologically impaired subjects without HIVE. They had low brain HIV-1 loads, weak brain immune responses, lacked neuronally expressed changes in neocortex and exhibited upregulation of endothelial cell type transcripts. HIV-1-infected subjects with normal neuropsychological test results had upregulation of neuronal transcripts involved in synaptic transmission of neostriatal circuits.

Interpretation

Two patterns of brain gene expression suggest that more than one pathophysiological process occurs in HIV-1-associated neurocognitive impairment. Expression in HIVE suggests that lowering brain HIV-1 replication might improve NCI, whereas NCI without HIVE may not respond in kind; array results suggest that modulation of transvascular signaling is a potentially promising approach. Striking brain regional differences highlighted the likely importance of circuit level disturbances in HIV/AIDS. In subjects without impairment regulation of genes that drive neostriatal synaptic plasticity reflects adaptation. The array provides an infusion of public resources including brain samples, clinicopathological data and correlative gene expression data for further exploration (http://www.nntc.org/gene-array-project).",2012-09-26 +23462799,A cross-sectional multicenter study of cognitive and behavioural features in multiple system atrophy patients of the parkinsonian and cerebellar type.,"Imaging and neuropathology studies have demonstrated significant abnormalities not only in subcortical, but also in cortical regions of patients with multiple system atrophy (MSA). This raises the possibility that cognitive dysfunction may contribute to the clinical spectrum of this disorder to a greater extent than it is currently not widely appreciated. In this cross-sectional multicenter study from the European multiple system atrophy study group ( http://www.emsa-sg.org ), we applied an extensive neuropsychological test battery in a series of 61 clinically diagnosed probable MSA patients. The results demonstrated that general cognitive decline as assessed by MMSE was uncommon (2 out of 61 patients <24). In contrast, frontal lobe-related functions (as measured by FAB) were impaired in 41 % of patients, with abstract reasoning and sustained attention less compromised. This pattern was similar to our control group of 20 patients with Parkinson's disease (matched for disease duration and age at onset). There was no difference in cognitive performance between MSA patients with the parkinsonian versus the cerebellar variant. Behaviourally, MSA patients had greater depression than PD and in the case of MSA of the cerebellar variant significantly lower anxiety. Our data show that cognitive abnormalities are relatively frequent in multiple system atrophy and this involves primarily frontal-executive functions. Their contribution to clinical disability and disease progression needs to be addressed in larger prospective studies.",2013-03-06 +21712247,MetATT: a web-based metabolomics tool for analyzing time-series and two-factor datasets.,"

Summary

Time-series and multifactor studies have become increasingly common in metabolomic studies. Common tasks for analyzing data from these relatively complex experiments include identification of major variations associated with each experimental factor, comparison of temporal profiles across different biological conditions, as well as detection and validation of the presence of interactions. Here we introduce MetATT, a web-based tool for time-series and two-factor metabolomic data analysis. MetATT offers a number of complementary approaches including 3D interactive principal component analysis, two-way heatmap visualization, two-way ANOVA, ANOVA-simultaneous component analysis and multivariate empirical Bayes time-series analysis. These procedures are presented through an intuitive web interface. At the end of each session, a detailed analysis report is generated to facilitate understanding of the results.

Availability

Freely available at http://metatt.metabolomics.ca

Contact

jianguox@ualberta.ca.",2011-06-27 +23988671,Teambuilding across healthcare professions: the ELDER project.,"

Background

The key to ensuring quality care for older adults is a nursing workforce that collaborates across professions and provider levels (Wright M.C., Phillips-Bute, B.G., Petrusa, E.R., Griffin, K.L., Hobbs, G.W., & Taekman, J.M. (2008). Assessing teamwork in medical education and practice: Relating behavioural teamwork ratings and clinical performance. Med Teach, 29, 1-9).

Purpose

To improve communication and teamwork among interprofessional health care providers (HCPs) by using innovative teambuilding activities over three years.

Methods

Participants

97 multi-disciplinary HCPs from five long term or home care agencies in an underserved region of New England. PARTICIPANTS attended six interactive sessions focused on teambuilding skills through the use of role play, case studies, games, exercises and teambuilding strategies. The J. A. Hartford Foundation's (John A. Hartford Foundation. (2001). The John A. Hartford Foundation Geriatric Interdisciplinary Team Training (GITT) Program. Available at: http://www.nygec.org/index.cfm?section_id=26&sub_section_id=18&page_id=98) Geriatric Interdisciplinary Team Training (GITT) instrument and Interdisciplinary Teamwork IQ test were used to measure changes in knowledge and attitudes.

Results

T tests performed on matched pre/post GITT instruments (n=26) revealed no significant change, although scores improved slightly from pre: (71%) to post test (73.3%) (p=.39). Teamwork IQ scores also improved slightly though not significantly. Qualitative data gathered suggest that teambuilding exercises were helpful in practice and allowed for better understanding of other provider roles.

Conclusions

Rarely is a variety of health care disciplines invited to participate in educational opportunities together. The interprofessional small group methodology used is a replicable model with potential to overcome barriers in communication and teamwork skills.",2013-08-27 +22398859,Restriction endonucleases: natural and directed evolution.,"Type II restriction endonucleases (REs) are highly sequence-specific compared with other classes of nucleases. PD-(D/E)XK nucleases, initially represented by only type II REs, now comprise a large and extremely diverse superfamily of proteins and, although sharing a structurally conserved core, typically display little or no detectable sequence similarity except for the active site motifs. Sequence similarity can only be observed in methylases and few isoschizomers. As a consequence, REs are classified according to combinations of functional properties rather than on the basis of genetic relatedness. New alignment matrices and classification systems based on structural core connectivity and cleavage mechanisms have been developed to characterize new REs and related proteins. REs recognizing more than 300 distinct specificities have been identified in RE database (REBASE: http://rebase.neb.com/cgi-bin/statlist ) but still the need for newer specificities is increasing due to the advancement in molecular biology and applications. The enzymes have undergone constant evolution through structural changes in protein scaffolds which include random mutations, homologous recombinations, insertions, and deletions of coding DNA sequences but rational mutagenesis or directed evolution delivers protein variants with new functions in accordance with defined biochemical or environmental pressures. Redesigning through random mutation, addition or deletion of amino acids, methylation-based selection, synthetic molecules, combining recognition and cleavage domains from different enzymes, or combination with domains of additional functions change the cleavage specificity or substrate preference and stability. There is a growing number of patents awarded for the creation of engineered REs with new and enhanced properties.",2012-03-08 +22268718,"SoMART: a web server for plant miRNA, tasiRNA and target gene analysis.","Plant microRNAs (miRNAs) and trans-acting small interfering RNAs (tasiRNAs) play important roles in a variety of biological processes. Bioinformatics prediction and small RNA (sRNA) cloning are the most important approaches for identification of miRNAs and tasiRNAs and their targets. However, these approaches are not readily accessible to every researcher. Here we present SoMART, a web server for miRNA/tasiRNA analysis resources and tools, which is designed for researchers who are interested in identifying miRNAs or tasiRNAs that potentially regulate genes of interest. The server includes four sets of tools: 'Slicer detector' for detecting sRNAs targeting input genes, 'dRNA mapper' for detecting degradome (d)RNA products derived from input genes, 'PreMIR detector' for identifying miRNA precursors (MIRs) or tasiRNA precursor (TASs) of input sRNAs, and 'sRNA mapper' for mapping sRNAs onto input genes. We also developed a dRNA-seq protocol to achieve longer dRNA reads for better characterization of miRNA precursors by dRNA mapper. To validate the server function and robustness, we installed sRNA, dRNA and collected genomic DNA or transcriptome databases from Arabidopsis and solanaceous plants, and characterized miR172-mediated regulation of the APETALA2 gene in potato (Solanum tuberosum) and demonstrated conservation of MIR390-triggered TAS3 in tomato (Solanum lycopersicum). More importantly, we predicted the existence of MIR482-triggered TAS5 in tomato. We further tested and confirmed the efficiency and accuracy of the server by analyses of 21 validated miRNA targets and 115 miRNA precursors in Arabidopsis thaliana. SoMART is available at http://somart.ist.berkeley.edu.",2012-03-08 +23815292,A subgraph isomorphism algorithm and its application to biochemical data.,"

Background

Graphs can represent biological networks at the molecular, protein, or species level. An important query is to find all matches of a pattern graph to a target graph. Accomplishing this is inherently difficult (NP-complete) and the efficiency of heuristic algorithms for the problem may depend upon the input graphs. The common aim of existing algorithms is to eliminate unsuccessful mappings as early as and as inexpensively as possible.

Results

We propose a new subgraph isomorphism algorithm which applies a search strategy to significantly reduce the search space without using any complex pruning rules or domain reduction procedures. We compare our method with the most recent and efficient subgraph isomorphism algorithms (VFlib, LAD, and our C++ implementation of FocusSearch which was originally distributed in Modula2) on synthetic, molecules, and interaction networks data. We show a significant reduction in the running time of our approach compared with these other excellent methods and show that our algorithm scales well as memory demands increase.

Conclusions

Subgraph isomorphism algorithms are intensively used by biochemical tools. Our analysis gives a comprehensive comparison of different software approaches to subgraph isomorphism highlighting their weaknesses and strengths. This will help researchers make a rational choice among methods depending on their application. We also distribute an open-source package including our system and our own C++ implementation of FocusSearch together with all the used datasets (http://ferrolab.dmi.unict.it/ri.html). In future work, our findings may be extended to approximate subgraph isomorphism algorithms.",2013-04-22 +21837777,Can the prostate risk calculator based on Western population be applied to Asian population?,"

Background

We developed a korean prostate cancer risk calculator (KPCRC) for predicting the probability of a positive initial prostate biopsy using clinical and laboratory data from a Korean male population (http://pcrc.korea.ac.kr). We compared its performance to prostate-specific antigen (PSA) testing and the Prostate Risk Calculator 3 (PRC 3) based on data from the Dutch part of European Randomized Study of Screening for Prostate Cancer (ERSPC), which predicts biopsy results for previously unscreened men.

Methods

Data were collected from 602 Korean men who were previously unscreened and underwent initial ten-core prostate biopsies. Multiple logistic regression analysis was performed to determine the significant predictors. Area under the receiver operating characteristic curve (AUC) and calibration plots of both calculators were evaluated.

Results

Prostate cancer (PCa) was detected in 172 (28.6%) men. Independent predictors of a positive biopsy included advanced age, elevated PSA levels, reduced volume of the transition zone, and abnormal digital rectal examination findings. The AUC of the KPCRC was higher than the PRC 3 and PSA alone on internal and external validation. Calibration plots of the KPCRC showed better performance than the other models on internal and external validation. Applying a cut-off of 10% of KPCRC implied that 251 of the 602 men (42%) would not have been biopsied and that 12 of the 172 PCa cases (7%) would not have been diagnosed.

Conclusions

The KPCRC improves the performance of the PRC 3 and PSA testing in predicting Korean population's risk of PCa. It implies that Asian populations need their own risk calculators for PCa.",2011-08-11 +21824971,FDM: a graph-based statistical method to detect differential transcription using RNA-seq data.,"

Motivation

In eukaryotic cells, alternative splicing expands the diversity of RNA transcripts and plays an important role in tissue-specific differentiation, and can be misregulated in disease. To understand these processes, there is a great need for methods to detect differential transcription between samples. Our focus is on samples observed using short-read RNA sequencing (RNA-seq).

Methods

We characterize differential transcription between two samples as the difference in the relative abundance of the transcript isoforms present in the samples. The magnitude of differential transcription of a gene between two samples can be measured by the square root of the Jensen Shannon Divergence (JSD*) between the gene's transcript abundance vectors in each sample. We define a weighted splice-graph representation of RNA-seq data, summarizing in compact form the alignment of RNA-seq reads to a reference genome. The flow difference metric (FDM) identifies regions of differential RNA transcript expression between pairs of splice graphs, without need for an underlying gene model or catalog of transcripts. We present a novel non-parametric statistical test between splice graphs to assess the significance of differential transcription, and extend it to group-wise comparison incorporating sample replicates.

Results

Using simulated RNA-seq data consisting of four technical replicates of two samples with varying transcription between genes, we show that (i) the FDM is highly correlated with JSD* (r=0.82) when average RNA-seq coverage of the transcripts is sufficiently deep; and (ii) the FDM is able to identify 90% of genes with differential transcription when JSD* >0.28 and coverage >7. This represents higher sensitivity than Cufflinks (without annotations) and rDiff (MMD), which respectively identified 69 and 49% of the genes in this region as differential transcribed. Using annotations identifying the transcripts, Cufflinks was able to identify 86% of the genes in this region as differentially transcribed. Using experimental data consisting of four replicates each for two cancer cell lines (MCF7 and SUM102), FDM identified 1425 genes as significantly different in transcription. Subsequent study of the samples using quantitative real time polymerase chain reaction (qRT-PCR) of several differential transcription sites identified by FDM, confirmed significant differences at these sites.

Availability

http://csbio-linux001.cs.unc.edu/nextgen/software/FDM CONTACT: darshan@email.unc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-08 +23601859,P2RP: a Web-based framework for the identification and analysis of regulatory proteins in prokaryotic genomes.,"

Background

Regulatory proteins (RPs) such as transcription factors (TFs) and two-component system (TCS) proteins control how prokaryotic cells respond to changes in their external and/or internal state. Identification and annotation of TFs and TCSs is non-trivial, and between-genome comparisons are often confounded by different standards in annotation. There is a need for user-friendly, fast and convenient tools to allow researchers to overcome the inherent variability in annotation between genome sequences.

Results

We have developed the web-server P2RP (Predicted Prokaryotic Regulatory Proteins), which enables users to identify and annotate TFs and TCS proteins within their sequences of interest. Users can input amino acid or genomic DNA sequences, and predicted proteins therein are scanned for the possession of DNA-binding domains and/or TCS domains. RPs identified in this manner are categorised into families, unambiguously annotated, and a detailed description of their features generated, using an integrated software pipeline. P2RP results can then be outputted in user-specified formats.

Conclusion

Biologists have an increasing need for fast and intuitively usable tools, which is why P2RP has been developed as an interactive system. As well as assisting experimental biologists to interrogate novel sequence data, it is hoped that P2RP will be built into genome annotation pipelines and re-annotation processes, to increase the consistency of RP annotation in public genomic sequences. P2RP is the first publicly available tool for predicting and analysing RP proteins in users' sequences. The server is freely available and can be accessed along with documentation at http://www.p2rp.org.",2013-04-20 +23055156,CHull: a generic convex-hull-based model selection method.,"When analyzing data, researchers are often confronted with a model selection problem (e.g., determining the number of components/factors in principal components analysis [PCA]/factor analysis or identifying the most important predictors in a regression analysis). To tackle such a problem, researchers may apply some objective procedure, like parallel analysis in PCA/factor analysis or stepwise selection methods in regression analysis. A drawback of these procedures is that they can only be applied to the model selection problem at hand. An interesting alternative is the CHull model selection procedure, which was originally developed for multiway analysis (e.g., multimode partitioning). However, the key idea behind the CHull procedure--identifying a model that optimally balances model goodness of fit/misfit and model complexity--is quite generic. Therefore, the procedure may also be used when applying many other analysis techniques. The aim of this article is twofold. First, we demonstrate the wide applicability of the CHull method by showing how it can be used to solve various model selection problems in the context of PCA, reduced K-means, best-subset regression, and partial least squares regression. Moreover, a comparison of CHull with standard model selection methods for these problems is performed. Second, we present the CHULL software, which may be downloaded from http://ppw.kuleuven.be/okp/software/CHULL/, to assist the user in applying the CHull procedure.",2013-03-01 +22391676,Nature and nurture in stuttering: a systematic review on the case of Moses.,"Stuttering is a disturbance of normal fluency of speech whose pathophysiology is still not well understood. We investigated one of the most ancient speech disorders in the biblical person Moses who lived in approximately 1300 BC. To get the most complete medical and non-medical information on Moses, we did systematic searches in the Holy Bible using the Bible-Discovery v2.3© software ( http://www.bible-discovery.com ) looking for verses containing the terms ""Moses"", ""Stuttering"" and ""Stutter""; and in PubMed/Medline database for manuscripts having the terms ""Moses"", ""Bible"" and ""Stuttering"". From the Bible search, 742 verses were found, of which 23 were relevant; three additional verses were found by hand search. Six papers discussing Moses's pathology were found in the PubMed search. The analysis of ancient descriptions in the light of current research suggests that stuttering is the most likely pathology Moses had, with clear evidence for both genetic origin and environmental triggers. Further, it was found that Moses practiced some ""sensory tricks"" that could be used to relieve his speech disorder which are, to our knowledge, the first ""tricks"" that successfully modulated a movement disorder described in the medical literature.",2012-03-06 +22166077,Processing methods for signal suppression of FTMS data.,"

Background

Fourier Transform Mass Spectrometry coupled with Liquid Chromatography(LC-FTMS) has been widely used in proteomics. Past investigation has revealed that there exists an intensity dependent random suppression in peptide elution profiles in LC-FTMS data. The suppression is homogenous for the same peptide but non-homogenous for different peptides. The correction of suppressed profiles and an estimation on the range of suppression are necessary for accurate and reliable quantification using FTMS data.

Results

A software package, Gcorr, is presented. The software corrects peptide profiles that satisfy correction conditions, and it can predict fold change null distributions at different intensity levels. Subsequently, the significance P-values of measured fold changes can be estimated based on the predicted null distributions. We have used an 1:1 LC-FTMS label-free dataset pair collected based on the same sample to verify that our predicted null distributions conforms to that of the observed null distribution.

Conclusions

This software is able to provide suppression correction for peptide profiles, suppression distribution analysis and peptide differential expression analysis in terms of its fold change significance. The software is freely available at http://compgenomics.utsa.edu/Suppression_Study.html.",2011-10-14 +24380957,Traffic-related air pollution and congenital anomalies in Barcelona.,"

Background

A recent meta-analysis suggested evidence for an effect of exposure to ambient air pollutants on risk of certain congenital heart defects. However, few studies have investigated the effects of traffic-related air pollutants with sufficient spatial accuracy.

Objectives

We estimated associations between congenital anomalies and exposure to traffic-related air pollution in Barcelona, Spain.

Method

Cases with nonchromosomal anomalies (n = 2,247) and controls (n = 2,991) were selected from the Barcelona congenital anomaly register during 1994-2006. Land use regression models from the European Study of Cohorts for Air Pollution Effects (ESCAPE), were applied to residential addresses at birth to estimate spatial exposure to nitrogen oxides and dioxide (NOx, NO2), particulate matter with diameter ≤ 10 μm (PM10), 10-2.5 μm (PMcoarse), ≤ 2.5 μm (PM2.5), and PM2.5 absorbance. Spatial estimates were adjusted for temporal trends using data from routine monitoring stations for weeks 3-8 of each pregnancy. Logistic regression models were used to calculate odds ratios (ORs) for 18 congenital anomaly groups associated with an interquartile-range (IQR) increase in exposure estimates.

Results

In spatial and spatiotemporal exposure models, we estimated statistically significant associations between an IQR increase in NO2 (12.2 μg/m3) and coarctation of the aorta (ORspatiotemporal = 1.15; 95% CI: 1.01, 1.31) and digestive system defects (ORspatiotemporal = 1.11; 95% CI: 1.00, 1.23), and between an IQR increase in PMcoarse (3.6 μg/m3) and abdominal wall defects (ORspatiotemporal = 1.93; 95% CI: 1.37, 2.73). Other statistically significant increased and decreased ORs were estimated based on the spatial model only or the spatiotemporal model only, but not both.

Conclusions

Our results overall do not indicate an association between traffic-related air pollution and most groups of congenital anomalies. Findings for coarctation of the aorta are consistent with those of the previous meta-analysis.

Citation

Schembari A, Nieuwenhuijsen MJ, Salvador J, de Nazelle A, Cirach M, Dadvand P, Beelen R, Hoek G, Basagaña X, Vrijheid M. 2014. Traffic-related air pollution and congenital anomalies in Barcelona. Environ Health Perspect 122:317-323; http://dx.doi.org/10.1289/ehp.1306802.",2014-01-03 +22523085,PocketQuery: protein-protein interaction inhibitor starting points from protein-protein interaction structure.,PocketQuery (http://pocketquery.csb.pitt.edu) is a web interface for exploring the properties of protein-protein interaction (PPI) interfaces with a focus on the discovery of promising starting points for small-molecule design. PocketQuery rapidly focuses attention on the key interacting residues of an interaction using a 'druggability' score that provides an estimate of how likely the chemical mimicry of a cluster of interface residues would result in a small-molecule inhibitor of an interaction. These residue clusters are chemical starting points that can be seamlessly exported to a pharmacophore-based drug discovery workflow. PocketQuery is updated on a weekly basis to contain all applicable PPI structures deposited in the Protein Data Bank and allows users to upload their own custom structures for analysis.,2012-04-20 +22711791,Bluues server: electrostatic properties of wild-type and mutated protein structures.,"

Motivation

Electrostatic calculations are an important tool for deciphering many functional mechanisms in proteins. Generalized Born (GB) models offer a fast and convenient computational approximation over other implicit solvent-based electrostatic models. Here we present a novel GB-based web server, using the program Bluues, to calculate numerous electrostatic features including pKa-values and surface potentials. The output is organized allowing both experts and beginners to rapidly sift the data. A novel feature of the Bluues server is that it explicitly allows to find electrostatic differences between wild-type and mutant structures.

Availability

The Bluues server, examples and extensive help files are available for non-commercial use at URL: http://protein.bio.unipd.it/bluues/.",2012-06-17 +23455476,RIPSeeker: a statistical package for identifying protein-associated transcripts from RIP-seq experiments.,"RIP-seq has recently been developed to discover genome-wide RNA transcripts that interact with a protein or protein complex. RIP-seq is similar to both RNA-seq and ChIP-seq, but presents unique properties and challenges. Currently, no statistical tool is dedicated to RIP-seq analysis. We developed RIPSeeker (http://www.bioconductor.org/packages/2.12/bioc/html/RIPSeeker.html), a free open-source Bioconductor/R package for de novo RIP peak predictions based on HMM. To demonstrate the utility of the software package, we applied RIPSeeker and six other published programs to three independent RIP-seq datasets and two PAR-CLIP datasets corresponding to six distinct RNA-binding proteins. Based on receiver operating curves, RIPSeeker demonstrates superior sensitivity and specificity in discriminating high-confidence peaks that are consistently agreed on among a majority of the comparison methods, and dominated 9 of the 12 evaluations, averaging 80% area under the curve. The peaks from RIPSeeker are further confirmed based on their significant enrichment for biologically meaningful genomic elements, published sequence motifs and association with canonical transcripts known to interact with the proteins examined. While RIPSeeker is specifically tailored for RIP-seq data analysis, it also provides a suite of bioinformatics tools integrated within a self-contained software package comprehensively addressing issues ranging from post-alignments' processing to visualization and annotation.",2013-02-28 +21998154,Sequential Monte Carlo multiple testing.,"

Motivation

In molecular biology, as in many other scientific fields, the scale of analyses is ever increasing. Often, complex Monte Carlo simulation is required, sometimes within a large-scale multiple testing setting. The resulting computational costs may be prohibitively high.

Results

We here present MCFDR, a simple, novel algorithm for false discovery rate (FDR) modulated sequential Monte Carlo (MC) multiple hypothesis testing. The algorithm iterates between adding MC samples across tests and calculating intermediate FDR values for the collection of tests. MC sampling is stopped either by sequential MC or based on a threshold on FDR. An essential property of the algorithm is that it limits the total number of MC samples whatever the number of true null hypotheses. We show on both real and simulated data that the proposed algorithm provides large gains in computational efficiency.

Availability

MCFDR is implemented in the Genomic HyperBrowser (http://hyperbrowser.uio.no/mcfdr), a web-based system for genome analysis. All input data and results are available and can be reproduced through a Galaxy Pages document at: http://hyperbrowser.uio.no/mcfdr/u/sandve/p/mcfdr.

Contact

geirksa@ifi.uio.no.",2011-10-13 +23236535,A global comparison of the human and T. brucei degradomes gives insights about possible parasite drug targets.,"We performed a genome-level computational study of sequence and structure similarity, the latter using crystal structures and models, of the proteases of Homo sapiens and the human parasite Trypanosoma brucei. Using sequence and structure similarity networks to summarize the results, we constructed global views that show visually the relative abundance and variety of proteases in the degradome landscapes of these two species, and provide insights into evolutionary relationships between proteases. The results also indicate how broadly these sequence sets are covered by three-dimensional structures. These views facilitate cross-species comparisons and offer clues for drug design from knowledge about the sequences and structures of potential drug targets and their homologs. Two protease groups (""M32"" and ""C51"") that are very different in sequence from human proteases are examined in structural detail, illustrating the application of this global approach in mining new pathogen genomes for potential drug targets. Based on our analyses, a human ACE2 inhibitor was selected for experimental testing on one of these parasite proteases, TbM32, and was shown to inhibit it. These sequence and structure data, along with interactive versions of the protein similarity networks generated in this study, are available at http://babbittlab.ucsf.edu/resources.html.",2012-12-06 +23599503,A powerful and efficient set test for genetic markers that handles confounders.,"

Motivation

Approaches for testing sets of variants, such as a set of rare or common variants within a gene or pathway, for association with complex traits are important. In particular, set tests allow for aggregation of weak signal within a set, can capture interplay among variants and reduce the burden of multiple hypothesis testing. Until now, these approaches did not address confounding by family relatedness and population structure, a problem that is becoming more important as larger datasets are used to increase power.

Results

We introduce a new approach for set tests that handles confounders. Our model is based on the linear mixed model and uses two random effects-one to capture the set association signal and one to capture confounders. We also introduce a computational speedup for two random-effects models that makes this approach feasible even for extremely large cohorts. Using this model with both the likelihood ratio test and score test, we find that the former yields more power while controlling type I error. Application of our approach to richly structured Genetic Analysis Workshop 14 data demonstrates that our method successfully corrects for population structure and family relatedness, whereas application of our method to a 15 000 individual Crohn's disease case-control cohort demonstrates that it additionally recovers genes not recoverable by univariate analysis.

Availability

A Python-based library implementing our approach is available at http://mscompbio.codeplex.com.",2013-04-18 +21685084,A generalized model for multi-marker analysis of cell cycle progression in synchrony experiments.,"

Motivation

To advance understanding of eukaryotic cell division, it is important to observe the process precisely. To this end, researchers monitor changes in dividing cells as they traverse the cell cycle, with the presence or absence of morphological or genetic markers indicating a cell's position in a particular interval of the cell cycle. A wide variety of marker data is available, including information-rich cellular imaging data. However, few formal statistical methods have been developed to use these valuable data sources in estimating how a population of cells progresses through the cell cycle. Furthermore, existing methods are designed to handle only a single binary marker of cell cycle progression at a time. Consequently, they cannot facilitate comparison of experiments involving different sets of markers.

Results

Here, we develop a new sampling model to accommodate an arbitrary number of different binary markers that characterize the progression of a population of dividing cells along a branching process. We engineer a strain of Saccharomyces cerevisiae with fluorescently labeled markers of cell cycle progression, and apply our new model to two image datasets we collected from the strain, as well as an independent dataset of different markers. We use our model to estimate the duration of post-cytokinetic attachment between a S.cerevisiae mother and daughter cell. The Java implementation is fast and extensible, and includes a graphical user interface. Our model provides a powerful and flexible cell cycle analysis tool, suitable to any type or combination of binary markers.

Availability

The software is available from: http://www.cs.duke.edu/~amink/software/cloccs/.

Contact

michael.mayhew@duke.edu; amink@cs.duke.edu.",2011-07-01 +21999284,Learning cellular sorting pathways using protein interactions and sequence motifs.,"Proper subcellular localization is critical for proteins to perform their roles in cellular functions. Proteins are transported by different cellular sorting pathways, some of which take a protein through several intermediate locations until reaching its final destination. The pathway a protein is transported through is determined by carrier proteins that bind to specific sequence motifs. In this article, we present a new method that integrates protein interaction and sequence motif data to model how proteins are sorted through these sorting pathways. We use a hidden Markov model (HMM) to represent protein sorting pathways. The model is able to determine intermediate sorting states and to assign carrier proteins and motifs to the sorting pathways. In simulation studies, we show that the method can accurately recover an underlying sorting model. Using data for yeast, we show that our model leads to accurate prediction of subcellular localization. We also show that the pathways learned by our model recover many known sorting pathways and correctly assign proteins to the path they utilize. The learned model identified new pathways and their putative carriers and motifs and these may represent novel protein sorting mechanisms. Supplementary results and software implementation are available from http://murphylab.web.cmu.edu/software/2010_RECOMB_pathways/.",2011-10-14 +23445933,Neural connectivity abnormalities in autism: insights from the Tuberous Sclerosis model.,"Autism Spectrum Disorder (ASD) is a behavioral syndrome caused by complex genetic and non-genetic risk factors. It has been proposed that these risk factors lead to alterations in the development and 'wiring' of brain circuits and hence, the emergence of ASD. Although several lines of research lend support to this theory, etiological and clinical heterogeneity, methodological issues and inconsistent findings have led to significant doubts. One of the best established, albeit rare, causes of ASD is the genetic condition Tuberous Sclerosis Complex (TSC), where 40% of individuals develop ASD. A recent study by Peters and Taquet et al. analyzed electroencephalography (EEG) data using graph theory to model neural 'connectivity' in individuals with TSC with and without ASD and cases with 'idiopathic' ASD. TSC cases exhibited global under-connectivity and abnormal network topology, whereas individuals with TSC + ASD demonstrated similar connectivity patterns to those seen in individuals with idiopathic ASD: decreased long- over short-range connectivity. The similarity in connectivity abnormalities in TSC + ASD and ASD suggest a common final pathway and provide further support for 'mis-wired' neural circuitry in ASD. The origins of the connectivity changes, and their role in mediating between the neural and the cognitive/behavioral manifestations, will require further study.Please see related research article here http://www.biomedcentral.com/1741-7015/11/54.",2013-02-27 +23091307,Prediction of microbial infection of cultured cells using DNA microarray gene-expression profiles of host responses.,"Infection by microorganisms may cause fatally erroneous interpretations in the biologic researches based on cell culture. The contamination by microorganism in the cell culture is quite frequent (5% to 35%). However, current approaches to identify the presence of contamination have many limitations such as high cost of time and labor, and difficulty in interpreting the result. In this paper, we propose a model to predict cell infection, using a microarray technique which gives an overview of the whole genome profile. By analysis of 62 microarray expression profiles under various experimental conditions altering cell type, source of infection and collection time, we discovered 5 marker genes, NM_005298, NM_016408, NM_014588, S76389, and NM_001853. In addition, we discovered two of these genes, S76389, and NM_001853, are involved in a Mycolplasma-specific infection process. We also suggest models to predict the source of infection, cell type or time after infection. We implemented a web based prediction tool in microarray data, named Prediction of Microbial Infection (http://www.snubi.org/software/PMI).",2012-10-02 +22395997,Nomograms for predicting the risk of arm lymphedema after axillary dissection in breast cancer.,"

Background

Lymphedema (LE) after axillary lymph node dissection (ALND) is a multifactorial, chronic, and disabling condition that currently affects an estimated 4 million people worldwide. Although several risk factors have been described, it is difficult to estimate the risk in individual patients. We therefore developed nomograms based on a large data set.

Methods

Clinicopathologic features were collected from a prospective cohort comprising 1,054 women with unilateral breast cancer undergoing ALND as part of their surgical treatment from August 2001 to November 2002. LE was defined as a volume difference of at least 200 ml between arms at 6 months or more after surgery. The cumulative incidence of LE was ascertained by the Kaplan-Meier method, and Cox proportional hazard models were used to predict the risk of developing LE on the basis of the available data at each time point: model 1, preoperatively; model 2, within 6 months from surgery; and model 3, at 6 months or later after surgery.

Results

The 5 year cumulative incidence of LE was 30.3%. Independent risk factors for LE were age, body mass index, ipsilateral arm chemotherapy infusions, level of ALND, location of radiotherapy field, development of postoperative seroma, infection, and early edema. When applied to the validation set, the concordance indices were 0.706, 0.729, and 0.736 for models 1, 2, and 3, respectively.

Conclusions

The proposed nomograms can help physicians and patients predict the 5 year probability of LE after ALND for breast cancer. Free online versions of the nomograms are available at http://www.lymphedemarisk.com/ .",2012-03-07 +22428748,ZeBase: an open-source relational database for zebrafish laboratories.,"Abstract ZeBase is an open-source relational database for zebrafish inventory. It is designed for the recording of genetic, breeding, and survival information of fish lines maintained in a single- or multi-laboratory environment. Users can easily access ZeBase through standard web-browsers anywhere on a network. Convenient search and reporting functions are available to facilitate routine inventory work; such functions can also be automated by simple scripting. Optional barcode generation and scanning are also built-in for easy access to the information related to any fish. Further information of the database and an example implementation can be found at http://zebase.bio.purdue.edu.",2012-03-01 +22390938,WHIDE--a web tool for visual data mining colocation patterns in multivariate bioimages.,"

Motivation

Bioimaging techniques rapidly develop toward higher resolution and dimension. The increase in dimension is achieved by different techniques such as multitag fluorescence imaging, Matrix Assisted Laser Desorption / Ionization (MALDI) imaging or Raman imaging, which record for each pixel an N-dimensional intensity array, representing local abundances of molecules, residues or interaction patterns. The analysis of such multivariate bioimages (MBIs) calls for new approaches to support users in the analysis of both feature domains: space (i.e. sample morphology) and molecular colocation or interaction. In this article, we present our approach WHIDE (Web-based Hyperbolic Image Data Explorer) that combines principles from computational learning, dimension reduction and visualization in a free web application.

Results

We applied WHIDE to a set of MBI recorded using the multitag fluorescence imaging Toponome Imaging System. The MBI show field of view in tissue sections from a colon cancer study and we compare tissue from normal/healthy colon with tissue classified as tumor. Our results show, that WHIDE efficiently reduces the complexity of the data by mapping each of the pixels to a cluster, referred to as Molecular Co-Expression Phenotypes and provides a structural basis for a sophisticated multimodal visualization, which combines topology preserving pseudocoloring with information visualization. The wide range of WHIDE's applicability is demonstrated with examples from toponome imaging, high content screens and MALDI imaging (shown in the Supplementary Material).

Availability and implementation

The WHIDE tool can be accessed via the BioIMAX website http://ani.cebitec.uni-bielefeld.de/BioIMAX/; Login: whidetestuser; Password: whidetest.",2012-03-05 +24168782,Randomized controlled trials in pediatric critical care: a scoping review.,"

Introduction

Evidence from randomized controlled trials (RCTs) is required to guide treatment of critically ill children, but the number of RCTs available is limited and the publications are often difficult to find. The objectives of this review were to systematically identify RCTs in pediatric critical care and describe their methods and reporting.

Methods

We searched MEDLINE, EMBASE, LILACS and CENTRAL (from inception to April 16, 2013) and reference lists of included RCTs and relevant systematic reviews. We included published RCTs administering any intervention to children in a pediatric ICU. We excluded trials conducted in neonatal ICUs, those enrolling exclusively preterm infants, and individual patient crossover trials. Pairs of reviewers independently screened studies for eligibility, assessed risk of bias, and abstracted data. Discrepancies were resolved by consensus.

Results

We included 248 RCTs: 45 (18%) were multicentered and 14 (6%) were multinational. Trials most frequently enrolled both medical and surgical patients (43%) but postoperative cardiac surgery was the single largest population studied (19%). The most frequently evaluated types of intervention were medications (63%), devices (11%) and nutrition (8%). Laboratory or physiological measurements were the most frequent type of primary outcomes (18%). Half of these trials (50%) reported blinding. Of the 107 (43%) trials that reported an a priori sample size, 34 (32%) were stopped early. The median number of children randomized per trial was 49 and ranged from 6 to 4,947. The frequency of RCT publications increased at a mean rate of 0.7 RCTs per year (P<0.001) from 1 to 20 trials per year.

Conclusions

This scoping review identified the available RCTs in pediatric critical care and made them accessible to clinicians and researchers (http://epicc.mcmaster.ca). Most focused on medications and intermediate or surrogate outcomes, were single-centered and were conducted in North America and Western Europe. The results of this review underscore the need for trials with rigorous methodology, appropriate outcome measures, and improved quality of reporting to ensure that high quality evidence exists to support clinical decision-making in this vulnerable population.",2013-10-29 +22130591,SCPC: a method to structurally compare protein complexes.,"

Motivation

Protein-protein interactions play vital functional roles in various biological phenomena. Physical contacts between proteins have been revealed using experimental approaches that have solved the structures of protein complexes at atomic resolution. To examine the huge number of protein complexes available in the Protein Data Bank, an efficient automated method that compares protein complexes is required.

Results

We have developed Structural Comparison of Protein Complexes (SCPC), a novel method to structurally compare protein complexes. SCPC compares the spatial arrangements of subunits in a complex with those in another complex using secondary structure elements. Similar substructures are detected in two protein complexes and the similarity is scored. SCPC was applied to dimers, homo-oligomers and haemoglobins. SCPC properly estimated structural similarities between the dimers examined as well as an existing method, MM-align. Conserved substructures were detected in a homo-tetramer and a homo-hexamer composed of homologous proteins. Classification of quaternary structures of haemoglobins using SCPC was consistent with the conventional classification. The results demonstrate that SCPC is a valuable tool to investigate the structures of protein complexes.

Availability

SCPC is available at http://idp1.force.cs.is.nagoya-u.ac.jp/scpc/.

Contact

rkoike@is.nagoya-u.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-29 +23307009,TG13 indications and techniques for gallbladder drainage in acute cholecystitis (with videos).,"Percutaneous transhepatic gallbladder drainage (PTGBD) is considered a safe alternative to early cholecystectomy, especially in surgically high-risk patients with acute cholecystitis. Although randomized prospective controlled trials are lacking, data from most retrospective studies demonstrate that PTGBD is the most common gallbladder drainage method. There are several alternatives to PTGBD. Percutaneous transhepatic gallbladder aspiration is a simple alternative drainage method with fewer complications; however, its clinical usefulness has been shown only by case-series studies. Endoscopic naso-gallbladder drainage and gallbladder stenting via a transpapillary endoscopic approach are also alternative methods in acute cholecystitis, but both of them have technical difficulties resulting in lower success rates than that of PTGBD. Recently, endoscopic ultrasonography-guided transmural gallbladder drainage has been reported as a special technique for gallbladder drainage. However, it is not yet an established technique. Therefore, it should be performed in high-volume institutes by skilled endoscopists. Further prospective evaluations of the feasibility, safety, and efficacy of these various approaches are needed. This article describes indications and techniques of drainage for acute cholecystitis.Free full-text articles and a mobile application of TG13 are available via http://www.jshbps.jp/en/guideline/tg13.html.",2013-01-01 +21708002,A mutation degree model for the identification of transcriptional regulatory elements.,"

Background

Current approaches for identifying transcriptional regulatory elements are mainly via the combination of two properties, the evolutionary conservation and the overrepresentation of functional elements in the promoters of co-regulated genes. Despite the development of many motif detection algorithms, the discovery of conserved motifs in a wide range of phylogenetically related promoters is still a challenge, especially for the short motifs embedded in distantly related gene promoters or very closely related promoters, or in the situation that there are not enough orthologous genes available.

Results

A mutation degree model is proposed and a new word counting method is developed for the identification of transcriptional regulatory elements from a set of co-expressed genes. The new method comprises two parts: 1) identifying overrepresented oligo-nucleotides in promoters of co-expressed genes, 2) estimating the conservation of the oligo-nucleotides in promoters of phylogenetically related genes by the mutation degree model. Compared with the performance of other algorithms, our method shows the advantages of low false positive rate and higher specificity, especially the robustness to noisy data. Applying the method to co-expressed gene sets from Arabidopsis, most of known cis-elements were successfully detected. The tool and example are available at http://mcube.nju.edu.cn/jwang/lab/soft/ocw/OCW.html.

Conclusions

The mutation degree model proposed in this paper is adapted to phylogenetic data of different qualities, and to a wide range of evolutionary distances. The new word-counting method based on this model has the advantage of better performance in detecting short sequence of cis-elements from co-expressed genes of eukaryotes and is robust to less complete phylogenetic data.",2011-06-27 +23432846,Correlation between chemosensitivity to anticancer drugs and telomerase reverse transcriptase mRNA expression in gastric cancer.,"

Background

The determination of sensitive chemotherapy drugs for gastric cancer (GC) is one of the greatest challenges of adjuvant therapy. Here we evaluated the chemosensitivity of GC to anticancer drugs and the telomerase reverse transcriptase (hTERT) mRNA expression, and investigated the relationship of them.

Methods

The GC cells which were collected from 68 patients with primary GC were primary cultured. The chemosensitivity of GC cells to anticancer drugs was evaluated successfully using the MTT assay for 60 cases of GC cells, and the hTERT mRNA expression was examined in 60 cases of GC tissues and corresponding normal gastric mucosa and 6 cases of chronic superficial gastritis mucosa by in situ hybridization.

Results

Taxol, cisplatin and 5-fluorouracil were in general more effective than adriamycin and mitomycin for GC cells, and the chemosensitivity to anticancer drugs was associated with tumor histological types and a worse tumor grade. Compared to normal gastric mucosa tissues, hTERT mRNA expression was significantly increased in GC (P<0.05), which was related with a worse differentiation and drug-resistance to 5-fluorouracil or adriamycin in GC.

Conclusions

These data demonstrate for the first time that examinations of hTERT mRNA expression as an important factor could be used to select the chemotherapeutic drugs for GC patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1793217009875483.",2013-02-22 +21685047,"AMIGO, a toolbox for advanced model identification in systems biology using global optimization.","

Motivation

Mathematical models of complex biological systems usually consist of sets of differential equations which depend on several parameters which are not accessible to experimentation. These parameters must be estimated by fitting the model to experimental data. This estimation problem is very challenging due to the non-linear character of the dynamics, the large number of parameters and the frequently poor information content of the experimental data (poor practical identifiability). The design of optimal (more informative) experiments is an associated problem of the highest interest.

Results

This work presents AMIGO, a toolbox which facilitates parametric identification by means of advanced numerical techniques which cover the full iterative identification procedure putting especial emphasis on robust methods for parameter estimation and practical identifiability analyses, plus flexible capabilities for optimal experimental design.

Availability

The toolbox and the corresponding documentation may be downloaded from: http://www.iim.csic.es/~amigo

Contact

ebalsa@iim.csic.es.",2011-06-17 +22389711,Identification and differentiation of the twenty six bluetongue virus serotypes by RT-PCR amplification of the serotype-specific genome segment 2.,"Bluetongue (BT) is an arthropod-borne viral disease, which primarily affects ruminants in tropical and temperate regions of the world. Twenty six bluetongue virus (BTV) serotypes have been recognised worldwide, including nine from Europe and fifteen in the United States. Identification of BTV serotype is important for vaccination programmes and for BTV epidemiology studies. Traditional typing methods (virus isolation and serum or virus neutralisation tests (SNT or VNT)) are slow (taking weeks, depend on availability of reference virus-strains or antisera) and can be inconclusive. Nucleotide sequence analyses and phylogenetic comparisons of genome segment 2 (Seg-2) encoding BTV outer-capsid protein VP2 (the primary determinant of virus serotype) were completed for reference strains of BTV-1 to 26, as well as multiple additional isolates from different geographic and temporal origins. The resulting Seg-2 database has been used to develop rapid (within 24 h) and reliable RT-PCR-based typing assays for each BTV type. Multiple primer-pairs (at least three designed for each serotype) were widely tested, providing an initial identification of serotype by amplification of a cDNA product of the expected size. Serotype was confirmed by sequencing of the cDNA amplicons and phylogenetic comparisons to previously characterised reference strains. The results from RT-PCR and sequencing were in perfect agreement with VNT for reference strains of all 26 BTV serotypes, as well as the field isolates tested. The serotype-specific primers showed no cross-amplification with reference strains of the remaining 25 serotypes, or multiple other isolates of the more closely related heterologous BTV types. The primers and RT-PCR assays developed in this study provide a rapid, sensitive and reliable method for the identification and differentiation of the twenty-six BTV serotypes, and will be updated periodically to maintain their relevance to current BTV distribution and epidemiology (http://www.reoviridae.org/dsRNA_virus_proteins/ReoID/rt-pcr-primers.htm).",2012-02-28 +22419842,GIST: Genomic island suite of tools for predicting genomic islands in genomic sequences.,"

Unlabelled

Genomic Islands (GIs) are genomic regions that are originally from other organisms, through a process known as Horizontal Gene Transfer (HGT). Detection of GIs plays a significant role in biomedical research since such align genomic regions usually contain important features, such as pathogenic genes. We have developed a use friendly graphic user interface, Genomic Island Suite of Tools (GIST), which is a platform for scientific users to predict GIs. This software package includes five commonly used tools, AlienHunter, IslandPath, Colombo SIGI-HMM, INDeGenIUS and Pai-Ida. It also includes an optimization program EGID that ensembles the result of existing tools for more accurate prediction. The tools in GIST can be used either separately or sequentially. GIST also includes a downloadable feature that facilitates collecting the input genomes automatically from the FTP server of the National Center for Biotechnology Information (NCBI). GIST was implemented in Java, and was compiled and executed on Linux/Unix operating systems.

Availability

The database is available for free at http://www5.esu.edu/cpsc/bioinfo/software/GIST.",2012-02-28 +23589652,UPDtool: a tool for detection of iso- and heterodisomy in parent-child trios using SNP microarrays.,"

Unlabelled

UPDtool is a computational tool for detection and classification of uniparental disomy (UPD) in trio SNP-microarray experiments. UPDs are rare events of chromosomal malsegregation and describe the condition of two homologous chromosomes or homologous chromosomal segments that were inherited from one parent. The occurrence of UPD can be of major clinical relevance. Though high-throughput molecular screening techniques are widely used, detection of UPDs and especially the subclassification remains complex. We developed UPDtool to detect and classify UPDs from SNP microarray data of parent-child trios. The algorithm was tested using five positive controls including both iso- and heterodisomic segmental UPDs and 30 trios from the HapMap project as negative controls. With UPDtool, we were able to correctly identify all occurrences of non-mosaic UPD within our positive controls, whereas no occurrence of UPD was found within our negative controls. In addition, the chromosomal breakage points could be determined more precisely than by microsatellite analysis. Our results were compared with both the gold standard, microsatellite analysis and SNPtrio, another program available for UPD detection. UPDtool is platform independent, light weight and flexible. Because of its simple input format, UPDtool may also be used with other high-throughput technologies (e.g., next-generation sequencing).

Availability and implementation

UPDtool executables, documentation and examples can be downloaded from http://www.uni-tuebingen.de/uni/thk/de/f-genomik-software.html.",2013-04-14 +22789124,Early life programming of cardiometabolic disease in the Western Australian pregnancy cohort (Raine) study.,"The Raine study (http://www.rainestudy.org.au, accessed 18 June 2012) is a longitudinal Australian birth cohort that has serially assessed the offspring of 2900 pregnant women from 18 weeks gestation in utero to 17 years of age. The Raine study data have shown that low birth weight is a surrogate for poor in utero growth from 18 weeks gestation. A U-shaped relationship between birth size and cardiometabolic risk exists in this Western Australian cohort, implying that both low and high birth weight are associated with increased risk. High birth weight is a risk factor for cardiometabolic risk, particularly for females. Lifetime adiposity trajectories are better at predicting metabolic risk of the offspring than birth size or current body mass index. Therefore, early life programming is an ongoing process, starting in utero and undergoing at least some level of modification in parallel with changes in adiposity during early childhood. Maternal smoking during pregnancy, maternal obesity, hypertension and diabetes increase the risk for metabolic risk in the offspring. Breast feeding is protective for cardiometabolic risk in this Australian cohort.",2012-11-01 +22692220,mcaGUI: microbial community analysis R-Graphical User Interface (GUI).,"

Unlabelled

Microbial communities have an important role in natural ecosystems and have an impact on animal and human health. Intuitive graphic and analytical tools that can facilitate the study of these communities are in short supply. This article introduces Microbial Community Analysis GUI, a graphical user interface (GUI) for the R-programming language (R Development Core Team, 2010). With this application, researchers can input aligned and clustered sequence data to create custom abundance tables and perform analyses specific to their needs. This GUI provides a flexible modular platform, expandable to include other statistical tools for microbial community analysis in the future.

Availability

The mcaGUI package and source are freely available as part of Bionconductor at http://www.bioconductor.org/packages/release/bioc/html/mcaGUI.html",2012-06-12 +21383924,The Antimicrobial Index: a comprehensive literature-based antimicrobial database and reference work.,"Although the ever-growing usage of antimicrobials in the fields of medicine, pharmacology, and microbiology have undoubtedly allowed for unprecedented advances in the scientific world, these advances are nevertheless accompanied by unprecedented challenges. Sharp increases in antibiotic usages have led to inefficient and wasteful usage practices. Bacterial resistances have dramatically increased and therefore hindered the effectiveness of traditional antibiotics, thus forcing many life-science professionals to turn to plant extracts and synthetic chemicals [1]. The Antimicrobial Index (TAMI) seeks to alleviate some of these mounting difficulties through the collection and centralization of relevant antimicrobial susceptibility data from journals. Data compiled for antimicrobials include: method of action, physical properties, resistance genes, side effects, and minimal inhibitory concentrations (MIC50, MIC90 and/or ranges). TAMI currently contains data on 960 antimicrobials and over 24,000 microorganisms (3,500 unique strains) which were collected from over 400 pieces of published literature. Volume and scope of the index have been and will continue to increase and it is hoped that such an index will further foster international cooperation and communication of antimicrobial-related knowledge. TAMI can be accessed at: http://antibiotics.toku-e.com/.",2011-01-22 +22766367,Perioperative mortality after pancreatectomy: a risk score to aid decision-making.,"

Background

Undergoing a pancreatectomy obligates the patient to risks and benefits. For complex operations such as pancreatectomy, the objective assessment of baseline risks may be useful in decision-making. We developed an integer-based risk score estimating in-hospital mortality after pancreatectomy, incorporating institution-specific mortality rates to enhance its use.

Methods

Pancreatic resections were identified from the Nationwide Inpatient Sample (1998-2006), and categorized as proximal, distal, or nonspecified by the International Classification of Diseases, 9th edition. Logistic regression and bootstrap methods were used to estimate in-hospital mortality using demographics, diagnosis, comorbidities (Charlson index), procedure, and hospital volume; 80% of this cohort was selected randomly to create the score and 20% was used for validation. Score assignments were subsequently individually fitted to risk distributions around specific mortality rates.

Results

Sixteen thousand one hundred sixteen patient discharges were identified. Nationwide in-hospital mortality was 5.3%. Integers were assigned to predictors (age group, Charlson index, sex, diagnosis, pancreatectomy type, and hospital volume) and applied to an additive score. Three score groups were defined to stratify in-hospital mortality (national mortality, 1.3%, 4.9%, and 14.3%; P < .0001), with sufficient discrimination of derivation and validation sets (C statistics, 0.72 and 0.74). Score groups were shifted algorithmically to calculate risk based on institutional data (eg, with institutional mortality of 2.0%, low-, medium-, and high-risk patient groups had 0.5%, 1.9%, and 5.4% mortality, respectively). A web-based tool was developed and is available online (http://www.umassmed.edu/surgery/panc_mortality_custom.aspx).

Conclusion

To maximize patient benefit, objective assessment of risk for major procedures is necessary. We developed a Surgical Outcomes Analysis and Research risk score predicting pancreatectomy mortality that combines national and institution-specific data to enhance decision-making. This type of risk stratification tool may identify opportunities to improve care for patients undergoing specific operative procedures.",2012-07-03 +21464513,Smolign: a spatial motifs-based protein multiple structural alignment method.,"Availability of an effective tool for protein multiple structural alignment (MSTA) is essential for discovery and analysis of biologically significant structural motifs that can help solve functional annotation and drug design problems. Existing MSTA methods collect residue correspondences mostly through pairwise comparison of consecutive fragments, which can lead to suboptimal alignments, especially when the similarity among the proteins is low. We introduce a novel strategy based on: building a contact-window based motif library from the protein structural data, discovery and extension of common alignment seeds from this library, and optimal superimposition of multiple structures according to these alignment seeds by an enhanced partial order curve comparison method. The ability of our strategy to detect multiple correspondences simultaneously, to catch alignments globally, and to support flexible alignments, endorse a sensitive and robust automated algorithm that can expose similarities among protein structures even under low similarity conditions. Our method yields better alignment results compared to other popular MSTA methods, on several protein structure data sets that span various structural folds and represent different protein similarity levels. A web-based alignment tool, a downloadable executable, and detailed alignment results for the data sets used here are available at http://sacan.biomed. drexel.edu/Smolign and http://bio.cse.ohio-state.edu/Smolign.",2011-03-30 +21569426,Fast network centrality analysis using GPUs.,"

Background

With the exploding volume of data generated by continuously evolving high-throughput technologies, biological network analysis problems are growing larger in scale and craving for more computational power. General Purpose computation on Graphics Processing Units (GPGPU) provides a cost-effective technology for the study of large-scale biological networks. Designing algorithms that maximize data parallelism is the key in leveraging the power of GPUs.

Results

We proposed an efficient data parallel formulation of the All-Pairs Shortest Path problem, which is the key component for shortest path-based centrality computation. A betweenness centrality algorithm built upon this formulation was developed and benchmarked against the most recent GPU-based algorithm. Speedup between 11 to 19% was observed in various simulated scale-free networks. We further designed three algorithms based on this core component to compute closeness centrality, eccentricity centrality and stress centrality. To make all these algorithms available to the research community, we developed a software package gpu-fan (GPU-based Fast Analysis of Networks) for CUDA enabled GPUs. Speedup of 10-50× compared with CPU implementations was observed for simulated scale-free networks and real world biological networks.

Conclusions

gpu-fan provides a significant performance improvement for centrality computation in large-scale networks. Source code is available under the GNU Public License (GPL) at http://bioinfo.vanderbilt.edu/gpu-fan/.",2011-05-12 +22238268,Predicting folding free energy changes upon single point mutations.,"

Motivation

The folding free energy is an important characteristic of proteins stability and is directly related to protein's wild-type function. The changes of protein's stability due to naturally occurring mutations, missense mutations, are typically causing diseases. Single point mutations made in vitro are frequently used to assess the contribution of given amino acid to the stability of the protein. In both cases, it is desirable to predict the change of the folding free energy upon single point mutations in order to either provide insights of the molecular mechanism of the change or to design new experimental studies.

Results

We report an approach that predicts the free energy change upon single point mutation by utilizing the 3D structure of the wild-type protein. It is based on variation of the molecular mechanics Generalized Born (MMGB) method, scaled with optimized parameters (sMMGB) and utilizing specific model of unfolded state. The corresponding mutations are built in silico and the predictions are tested against large dataset of 1109 mutations with experimentally measured changes of the folding free energy. Benchmarking resulted in root mean square deviation = 1.78 kcal/mol and slope of the linear regression fit between the experimental data and the calculations was 1.04. The sMMGB is compared with other leading methods of predicting folding free energy changes upon single mutations and results discussed with respect to various parameters.

Availability

All the pdb files we used in this article can be downloaded from http://compbio.clemson.edu/downloadDir/mentaldisorders/sMMGB_pdb.rar.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-11 +21478488,Reliability-oriented bioinformatic networks visualization.,"

Summary

We present our protein-protein interaction (PPI) network visualization system RobinViz (reliability-oriented bioinformatic networks visualization). Clustering the PPI network based on gene ontology (GO) annotations or biclustered gene expression data, providing a clustered visualization model based on a central/peripheral duality, computing layouts with algorithms specialized for interaction reliabilities represented as weights, completely automated data acquisition, processing are notable features of the system.

Availability

RobinViz is a free, open-source software protected under GPL. It is written in C++ and Python, and consists of almost 30 000 lines of code, excluding the employed libraries. Source code, user manual and other Supplementary Material are available for download at http://code.google.com/p/robinviz/.",2011-04-09 +23192052,Hierarchical shrinkage priors and model fitting for high-dimensional generalized linear models. ,"Abstract Genetic and other scientific studies routinely generate very many predictor variables, which can be naturally grouped, with predictors in the same groups being highly correlated. It is desirable to incorporate the hierarchical structure of the predictor variables into generalized linear models for simultaneous variable selection and coefficient estimation. We propose two prior distributions: hierarchical Cauchy and double-exponential distributions, on coefficients in generalized linear models. The hierarchical priors include both variable-specific and group-specific tuning parameters, thereby not only adopting different shrinkage for different coefficients and different groups but also providing a way to pool the information within groups. We fit generalized linear models with the proposed hierarchical priors by incorporating flexible expectation-maximization (EM) algorithms into the standard iteratively weighted least squares as implemented in the general statistical package R. The methods are illustrated with data from an experiment to identify genetic polymorphisms for survival of mice following infection with Listeria monocytogenes. The performance of the proposed procedures is further assessed via simulation studies. The methods are implemented in a freely available R package BhGLM (http://www.ssg.uab.edu/bhglm/).",2012-11-26 +23517329,"Constraint Network Analysis (CNA): a Python software package for efficiently linking biomacromolecular structure, flexibility, (thermo-)stability, and function.","For deriving maximal advantage from information on biomacromolecular flexibility and rigidity, results from rigidity analyses must be linked to biologically relevant characteristics of a structure. Here, we describe the Python-based software package Constraint Network Analysis (CNA) developed for this task. CNA functions as a front- and backend to the graph-based rigidity analysis software FIRST. CNA goes beyond the mere identification of flexible and rigid regions in a biomacromolecule in that it (I) provides a refined modeling of thermal unfolding simulations that also considers the temperature-dependence of hydrophobic tethers, (II) allows performing rigidity analyses on ensembles of network topologies, either generated from structural ensembles or by using the concept of fuzzy noncovalent constraints, and (III) computes a set of global and local indices for quantifying biomacromolecular stability. This leads to more robust results from rigidity analyses and extends the application domain of rigidity analyses in that phase transition points (""melting points"") and unfolding nuclei (""structural weak spots"") are determined automatically. Furthermore, CNA robustly handles small-molecule ligands in general. Such advancements are important for applying rigidity analysis to data-driven protein engineering and for estimating the influence of ligand molecules on biomacromolecular stability. CNA maintains the efficiency of FIRST such that the analysis of a single protein structure takes a few seconds for systems of several hundred residues on a single core. These features make CNA an interesting tool for linking biomacromolecular structure, flexibility, (thermo-)stability, and function. CNA is available from http://cpclab.uni-duesseldorf.de/software for nonprofit organizations.",2013-04-08 +22492640,PaGeFinder: quantitative identification of spatiotemporal pattern genes.,"

Unlabelled

Pattern Gene Finder (PaGeFinder) is a web-based server for on-line detection of gene expression patterns from serial transcriptomic data generated by high-throughput technologies like microarray or next-generation sequencing. Three particular parameters, the specificity measure, the dispersion measure and the contribution measure, were introduced and implemented in PaGeFinder to help quantitative and interactive identification of pattern genes like housekeeping genes, specific (selective) genes and repressed genes. Besides the on-line computation service, the PaGeFinder also provides downloadable Java programs for local detection of gene expression patterns.

Availability

http://bioinf.xmu.edu.cn:8080/PaGeFinder/index.jsp",2012-04-06 +22796960,GAPIT: genome association and prediction integrated tool.,"

Summary

Software programs that conduct genome-wide association studies and genomic prediction and selection need to use methodologies that maximize statistical power, provide high prediction accuracy and run in a computationally efficient manner. We developed an R package called Genome Association and Prediction Integrated Tool (GAPIT) that implements advanced statistical methods including the compressed mixed linear model (CMLM) and CMLM-based genomic prediction and selection. The GAPIT package can handle large datasets in excess of 10 000 individuals and 1 million single-nucleotide polymorphisms with minimal computational time, while providing user-friendly access and concise tables and graphs to interpret results.

Availability

http://www.maizegenetics.net/GAPIT.

Contact

zhiwu.zhang@cornell.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-13 +23445440,New enumeration algorithm for protein structure comparison and classification.,"

Background

Protein structure comparison and classification is an effective method for exploring protein structure-function relations. This problem is computationally challenging. Many different computational approaches for protein structure comparison apply the secondary structure elements (SSEs) representation of protein structures.

Results

We study the complexity of the protein structure comparison problem based on a mixed-graph model with respect to different computational frameworks. We develop an effective approach for protein structure comparison based on a novel independent set enumeration algorithm. Our approach (named: ePC, efficient enumeration-based Protein structure Comparison) is tested for general purpose protein structure comparison as well as for specific protein examples. Compared with other graph-based approaches for protein structure comparison, the theoretical running-time O(1.47 rnn2) of our approach ePC is significantly better, where n is the smaller number of SSEs of the two proteins, r is a parameter of small value.

Conclusion

Through the enumeration algorithm, our approach can identify different substructures from a list of high-scoring solutions of biological interest. Our approach is flexible to conduct protein structure comparison with the SSEs in sequential and non-sequential order as well. Supplementary data of additional testing and the source of ePC will be available at http://bioinformatics.astate.edu/.",2013-02-15 +23418397,Ultrafast approximation for phylogenetic bootstrap.,"Nonparametric bootstrap has been a widely used tool in phylogenetic analysis to assess the clade support of phylogenetic trees. However, with the rapidly growing amount of data, this task remains a computational bottleneck. Recently, approximation methods such as the RAxML rapid bootstrap (RBS) and the Shimodaira-Hasegawa-like approximate likelihood ratio test have been introduced to speed up the bootstrap. Here, we suggest an ultrafast bootstrap approximation approach (UFBoot) to compute the support of phylogenetic groups in maximum likelihood (ML) based trees. To achieve this, we combine the resampling estimated log-likelihood method with a simple but effective collection scheme of candidate trees. We also propose a stopping rule that assesses the convergence of branch support values to automatically determine when to stop collecting candidate trees. UFBoot achieves a median speed up of 3.1 (range: 0.66-33.3) to 10.2 (range: 1.32-41.4) compared with RAxML RBS for real DNA and amino acid alignments, respectively. Moreover, our extensive simulations show that UFBoot is robust against moderate model violations and the support values obtained appear to be relatively unbiased compared with the conservative standard bootstrap. This provides a more direct interpretation of the bootstrap support. We offer an efficient and easy-to-use software (available at http://www.cibiv.at/software/iqtree) to perform the UFBoot analysis with ML tree inference.",2013-02-15 +23564843,LibSBMLSim: a reference implementation of fully functional SBML simulator.,"

Motivation

The Systems Biology Markup Language (SBML) is currently supported by >230 software tools, among which 160 support numerical integration of ordinary differential equation (ODE) models. Although SBML is a widely accepted standard within this field, there is still no language-neutral library that supports all features of SBML for simulating ODE models. Therefore, a demand exists for a simple portable implementation of a numerical integrator that supports SBML to enhance the development of a computational platform for systems biology.

Results

We implemented a library called libSBMLSim, which supports all the features of SBML and confirmed that the library passes all tests in the SBML test suite including those for SBML Events, AlgebraicRules, 'fast' attribute on Reactions and Delay. LibSBMLSim is implemented in the C programming language and does not depend on any third-party library except libSBML, which is a library to handle SBML documents. For the numerical integrator, both explicit and implicit methods are written from scratch to support all the functionality of SBML features in a straightforward implementation. We succeeded in implementing libSBMLSim as a platform-independent library that can run on most common operating systems (Windows, MacOSX and Linux) and also provides several language bindings (Java, C#, Python and Ruby).

Availability

The source code of libSBMLSim is available from http://fun.bio.keio.ac.jp/software/libsbmlsim/. LibSBMLSim is distributed under the terms of LGPL.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-05 +22342116,Hepatitis B virus variant with the a194t substitution within reverse transcriptase before and under adefovir and tenofovir therapy.,"Retrospective analysis of our local HBV reverse transcriptase (rt) sequence database including 973 sequences recovered from 616 patients identified one unique HBV DNA sequence harbouring amino acid (aa) substitution rtA194T, which has been suspected to confer reduced susceptibility to tenofovir but whose implication in antiviral resistance has not been confirmed. This sequence has been recovered from the blood sample of a 35-year-old man presenting with chronic hepatitis B and cirrhosis, at time of initiation of HBV therapy with adefovir (ADV). Retrospective analysis showed that viruses were rt194A eleven months earlier. Nonetheless, rt sequences recovered from the two sequential serum samples showed 98% nucleotide identity and were clustered in phylogenetic reconstruction. Clonal sequencing was performed retrospectively, which showed that rt194A HBV sequences were the only detected in the earliest sample and rt194T HBV sequences were the only detected in the later sample. HBV rtA194T mutants were still the majoritary quasi-species 17 months after being identified for the first time. HBV genotype determined by means of population sequencing then phylogeny reconstruction was E. HBV harboring a rt194T can be naturally observed, although very rarely, in absence of any prior therapy. Indeed, they represent six (0.2%) of three 110 sequences recovered from drug naive patients in the Stanford HBV sequence database (http://hivdb.stanford.edu/HBV/DB/cgi-bin/MutPrevByGenotypeRxHBV.cgi). In the present observation, we cannot interpret the virological response under anti-HBV therapy due to short follow-up and nonoptimal drug compliance, as indicated by patient's interview and TDF plasma Ctrough determination.",2012-02-18 +22366368,ITS2 database IV: interactive taxon sampling for internal transcribed spacer 2 based phylogenies.,"The first step of any molecular phylogenetic analysis is the selection of the species and sequences to be included, the taxon sampling. Already here different pitfalls exist. Sequences can contain errors, annotations in databases can be inaccurate and even the taxonomic classification of a species can be wrong. Usually, these artefacts become evident only after calculation of the phylogenetic tree. Following, the taxon sampling has to be corrected iteratively. This can become tedious and time consuming, as in most cases the taxon sampling is de-coupled from the further steps of the phylogenetic analysis. Here, we present the ITS2 Workbench (http://its2.bioapps.biozentrum.uni-wuerzburg.de/), which eliminates this problem by a tight integration of taxon sampling, secondary structure prediction, multiple alignment and phylogenetic tree calculation. The ITS2 Workbench has access to more than 280,000 ITS2 sequences and their structures provided by the ITS2 database enabling sequence-structure based alignment and tree reconstruction. This allows the interactive improvement of the taxon sampling throughout the whole phylogenetic tree reconstruction process. Thus, the ITS2 Workbench enables a fast, interactive and iterative taxon sampling leading to more accurate ITS2 based phylogenies.",2012-02-18 +23812990,ThreaDom: extracting protein domain boundary information from multiple threading alignments.,"

Motivation

Protein domains are subunits that can fold and evolve independently. Identification of domain boundary locations is often the first step in protein folding and function annotations. Most of the current methods deduce domain boundaries by sequence-based analysis, which has low accuracy. There is no efficient method for predicting discontinuous domains that consist of segments from separated sequence regions. As template-based methods are most efficient for protein 3D structure modeling, combining multiple threading alignment information should increase the accuracy and reliability of computational domain predictions.

Result

We developed a new protein domain predictor, ThreaDom, which deduces domain boundary locations based on multiple threading alignments. The core of the method development is the derivation of a domain conservation score that combines information from template domain structures and terminal and internal alignment gaps. Tested on 630 non-redundant sequences, without using homologous templates, ThreaDom generates correct single- and multi-domain classifications in 81% of cases, where 78% have the domain linker assigned within ±20 residues. In a second test on 486 proteins with discontinuous domains, ThreaDom achieves an average precision 84% and recall 65% in domain boundary prediction. Finally, ThreaDom was examined on 56 targets from CASP8 and had a domain overlap rate 73, 87 and 85% with the target for Free Modeling, Hard multiple-domain and discontinuous domain proteins, respectively, which are significantly higher than most domain predictors in the CASP8. Similar results were achieved on the targets from the most recently CASP9 and CASP10 experiments.

Availability

http://zhanglab.ccmb.med.umich.edu/ThreaDom/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +23325618,DSK: k-mer counting with very low memory usage.,"

Summary

Counting all the k-mers (substrings of length k) in DNA/RNA sequencing reads is the preliminary step of many bioinformatics applications. However, state of the art k-mer counting methods require that a large data structure resides in memory. Such structure typically grows with the number of distinct k-mers to count. We present a new streaming algorithm for k-mer counting, called DSK (disk streaming of k-mers), which only requires a fixed user-defined amount of memory and disk space. This approach realizes a memory, time and disk trade-off. The multi-set of all k-mers present in the reads is partitioned, and partitions are saved to disk. Then, each partition is separately loaded in memory in a temporary hash table. The k-mer counts are returned by traversing each hash table. Low-abundance k-mers are optionally filtered. DSK is the first approach that is able to count all the 27-mers of a human genome dataset using only 4.0 GB of memory and moderate disk space (160 GB), in 17.9 h. DSK can replace a popular k-mer counting software (Jellyfish) on small-memory servers.

Availability

http://minia.genouest.org/dsk",2013-01-16 +21970648,Ortho2ExpressMatrix--a web server that interprets cross-species gene expression data by gene family information.,"

Background

The study of gene families is pivotal for the understanding of gene evolution across different organisms and such phylogenetic background is often used to infer biochemical functions of genes. Modern high-throughput experiments offer the possibility to analyze the entire transcriptome of an organism; however, it is often difficult to deduct functional information from that data.

Results

To improve functional interpretation of gene expression we introduce Ortho2ExpressMatrix, a novel tool that integrates complex gene family information, computed from sequence similarity, with comparative gene expression profiles of two pre-selected biological objects: gene families are displayed with two-dimensional matrices. Parameters of the tool are object type (two organisms, two individuals, two tissues, etc.), type of computational gene family inference, experimental meta-data, microarray platform, gene annotation level and genome build. Family information in Ortho2ExpressMatrix bases on computationally different protein family approaches such as EnsemblCompara, InParanoid, SYSTERS and Ensembl Family. Currently, respective all-against-all associations are available for five species: human, mouse, worm, fruit fly and yeast. Additionally, microRNA expression can be examined with respect to miRBase or TargetScan families. The visualization, which is typical for Ortho2ExpressMatrix, is performed as matrix view that displays functional traits of genes (differential expression) as well as sequence similarity of protein family members (BLAST e-values) in colour codes. Such translations are intended to facilitate the user's perception of the research object.

Conclusions

Ortho2ExpressMatrix integrates gene family information with genome-wide expression data in order to enhance functional interpretation of high-throughput analyses on diseases, environmental factors, or genetic modification or compound treatment experiments. The tool explores differential gene expression in the light of orthology, paralogy and structure of gene families up to the point of ambiguity analyses. Results can be used for filtering and prioritization in functional genomic, biomedical and systems biology applications. The web server is freely accessible at http://bioinf-data.charite.de/o2em/cgi-bin/o2em.pl.",2011-10-04 +22121222,NTRFinder: a software tool to find nested tandem repeats.,"We introduce the software tool NTRFinder to search for a complex repetitive structure in DNA we call a nested tandem repeat (NTR). An NTR is a recurrence of two or more distinct tandem motifs interspersed with each other. We propose that NTRs can be used as phylogenetic and population markers. We have tested our algorithm on both real and simulated data, and present some real NTRs of interest. NTRFinder can be downloaded from http://www.maths.otago.ac.nz/~aamatroud/.",2011-11-25 +22345424,The plant proteome folding project: structure and positive selection in plant protein families.,"Despite its importance, relatively little is known about the relationship between the structure, function, and evolution of proteins, particularly in land plant species. We have developed a database with predicted protein domains for five plant proteomes (http://pfp.bio.nyu.edu) and used both protein structural fold recognition and de novo Rosetta-based protein structure prediction to predict protein structure for Arabidopsis and rice proteins. Based on sequence similarity, we have identified ~15,000 orthologous/paralogous protein family clusters among these species and used codon-based models to predict positive selection in protein evolution within 175 of these sequence clusters. Our results show that codons that display positive selection appear to be less frequent in helical and strand regions and are overrepresented in amino acid residues that are associated with a change in protein secondary structure. Like in other organisms, disordered protein regions also appear to have more selected sites. Structural information provides new functional insights into specific plant proteins and allows us to map positively selected amino acid sites onto protein structures and view these sites in a structural and functional context.",2012-02-16 +21041879,Efficient sparse voxel octrees.,"In this paper, we examine the possibilities of using voxel representations as a generic way for expressing complex and feature-rich geometry on current and future GPUs. We present in detail a compact data structure for storing voxels and an efficient algorithm for performing ray casts using this structure. We augment the voxel data with novel contour information that increases geometric resolution, allows more compact encoding of smooth surfaces, and accelerates ray casts. We also employ a novel normal compression format for storing high-precision object-space normals. Finally, we present a variable-radius postprocess filtering technique for smoothing out blockiness caused by discrete sampling of shading attributes. Based on benchmark results, we show that our voxel representation is competitive with triangle-based representations in terms of ray casting performance, while allowing tremendously greater geometric detail and unique shading information for every voxel. Our voxel codebase is open sourced and available at http://code.google.com/p/efficient-sparse-voxel-octrees/.",2011-08-01 +22336888,"Database of the clinical phenotypes, genotypes and mutant arylsulfatase B structures in mucopolysaccharidosis type VI.","Mucopolysaccharidosis type VI (MPS VI) is a genetic disorder caused by a deficiency of arylsulfatase B (ARSB). In our previous study, we investigated the structural changes in ARSB caused by amino acid substitutions associated with MPS VI, and revealed that such structural changes in ARSB were correlated with the clinical phenotypes. To the best of our knowledge, there is no database containing the structures of mutant ARSBs. Here, we built a database of clinical phenotypes, genotypes and structures of mutant ARSBs (http://mps6-database.org). This database can be accessed via the Internet, and is user friendly being equipped with powerful computational tools. This database will be useful for a better understanding of MPS VI.",2012-02-16 +22345617,DOMIRE: a web server for identifying structural domains and their neighbors in proteins.,"

Summary

The DOMIRE web server implements a novel, automatic, protein structural domain assignment procedure based on 3D substructures of the query protein which are also found within structures of a non-redundant protein database. These common 3D substructures are transformed into a co-occurrence matrix that offers a global view of the protein domain organization. Three different algorithms are employed to define structural domain boundaries from this co-occurrence matrix. For each query, a list of structural neighbors and their alignments are provided. DOMIRE, by displaying the protein structural domain organization, can be a useful tool for defining protein common cores and for unravelling the evolutionary relationship between different proteins.

Availability

http://genome.jouy.inra.fr/domire

Contact

jean.garnier@jouy.inra.fr.",2012-02-15 +22348130,HIPPIE: Integrating protein interaction networks with experiment based quality scores.,"Protein function is often modulated by protein-protein interactions (PPIs) and therefore defining the partners of a protein helps to understand its activity. PPIs can be detected through different experimental approaches and are collected in several expert curated databases. These databases are used by researchers interested in examining detailed information on particular proteins. In many analyses the reliability of the characterization of the interactions becomes important and it might be necessary to select sets of PPIs of different confidence levels. To this goal, we generated HIPPIE (Human Integrated Protein-Protein Interaction rEference), a human PPI dataset with a normalized scoring scheme that integrates multiple experimental PPI datasets. HIPPIE's scoring scheme has been optimized by human experts and a computer algorithm to reflect the amount and quality of evidence for a given PPI and we show that these scores correlate to the quality of the experimental characterization. The HIPPIE web tool (available at http://cbdm.mdc-berlin.de/tools/hippie) allows researchers to do network analyses focused on likely true PPI sets by generating subnetworks around proteins of interest at a specified confidence level.",2012-02-14 +22348024,Quantification of miRNA-mRNA interactions.,"miRNAs are small RNA molecules (' 22nt) that interact with their corresponding target mRNAs inhibiting the translation of the mRNA into proteins and cleaving the target mRNA. This second effect diminishes the overall expression of the target mRNA. Several miRNA-mRNA relationship databases have been deployed, most of them based on sequence complementarities. However, the number of false positives in these databases is large and they do not overlap completely. Recently, it has been proposed to combine expression measurement from both miRNA and mRNA and sequence based predictions to achieve more accurate relationships. In our work, we use LASSO regression with non-positive constraints to integrate both sources of information. LASSO enforces the sparseness of the solution and the non-positive constraints restrict the search of miRNA targets to those with down-regulation effects on the mRNA expression. We named this method TaLasso (miRNA-Target LASSO).We used TaLasso on two public datasets that have paired expression levels of human miRNAs and mRNAs. The top ranked interactions recovered by TaLasso are especially enriched (more than using any other algorithm) in experimentally validated targets. The functions of the genes with mRNA transcripts in the top-ranked interactions are meaningful. This is not the case using other algorithms.TaLasso is available as Matlab or R code. There is also a web-based tool for human miRNAs at http://talasso.cnb.csic.es/.",2012-02-14 +24427527,On-lattice agent-based simulation of populations of cells within the open-source Chaste framework.,"Over the years, agent-based models have been developed that combine cell division and reinforced random walks of cells on a regular lattice, reaction-diffusion equations for nutrients and growth factors; and ordinary differential equations for the subcellular networks regulating the cell cycle. When linked to a vascular layer, this multiple scale model framework has been applied to tumour growth and therapy. Here, we report on the creation of an agent-based multi-scale environment amalgamating the characteristics of these models within a Virtual Physiological Human (VPH) Exemplar Project. This project enables reuse, integration, expansion and sharing of the model and relevant data. The agent-based and reaction-diffusion parts of the multi-scale model have been implemented and are available for download as part of the latest public release of Chaste (Cancer, Heart and Soft Tissue Environment; http://www.cs.ox.ac.uk/chaste/), part of the VPH Toolkit (http://toolkit.vph-noe.eu/). The environment functionalities are verified against the original models, in addition to extra validation of all aspects of the code. In this work, we present the details of the implementation of the agent-based environment, including the system description, the conceptual model, the development of the simulation model and the processes of verification and validation of the simulation results. We explore the potential use of the environment by presenting exemplar applications of the 'what if' scenarios that can easily be studied in the environment. These examples relate to tumour growth, cellular competition for resources and tumour responses to hypoxia (low oxygen levels). We conclude our work by summarizing the future steps for the expansion of the current system.",2013-04-01 +22006916,Enrich: software for analysis of protein function by enrichment and depletion of variants.,"

Summary

Measuring the consequences of mutation in proteins is critical to understanding their function. These measurements are essential in such applications as protein engineering, drug development, protein design and genome sequence analysis. Recently, high-throughput sequencing has been coupled to assays of protein activity, enabling the analysis of large numbers of mutations in parallel. We present Enrich, a tool for analyzing such deep mutational scanning data. Enrich identifies all unique variants (mutants) of a protein in high-throughput sequencing datasets and can correct for sequencing errors using overlapping paired-end reads. Enrich uses the frequency of each variant before and after selection to calculate an enrichment ratio, which is used to estimate fitness. Enrich provides an interactive interface to guide users. It generates user-accessible output for downstream analyses as well as several visualizations of the effects of mutation on function, thereby allowing the user to rapidly quantify and comprehend sequence-function relationships.

Availability and implementation

Enrich is implemented in Python and is available under a FreeBSD license at http://depts.washington.edu/sfields/software/enrich/. Enrich includes detailed documentation as well as a small example dataset.

Contact

dfowler@uw.edu; fields@uw.edu

Supplementary information

Supplementary data is available at Bioinformatics online.",2011-10-17 +22333245,A method of finding optimal weight factors for compound identification in gas chromatography-mass spectrometry.,"

Motivation

The compound identification in gas chromatography-mass spectrometry (GC-MS) is achieved by matching the experimental mass spectrum to the mass spectra in a spectral library. It is known that the intensities with higher m/z value in the GC-MS mass spectrum are the most diagnostic. Therefore, to increase the relative significance of peak intensities of higher m/z value, the intensities and m/z values are usually transformed with a set of weight factors. A poor quality of weight factors can significantly decrease the accuracy of compound identification. With the significant enrichment of the mass spectral database and the broad application of GC-MS, it is important to re-visit the methods of discovering the optimal weight factors for high confident compound identification.

Results

We developed a novel approach to finding the optimal weight factors only through a reference library for high accuracy compound identification. The developed approach first calculates the ratio of skewness to kurtosis of the mass spectral similarity scores among spectra (compounds) in a reference library and then considers a weight factor with the maximum ratio as the optimal weight factor. We examined our approach by comparing the accuracy of compound identification using the mass spectral library maintained by the National Institute of Standards and Technology. The results demonstrate that the optimal weight factors for fragment ion peak intensity and m/z value found by the developed approach outperform the current weight factors for compound identification.

Availability

The results and R package are available at http://stage.louisville.edu/faculty/x0zhan17/software/ software-development.",2012-02-13 +23161352,The role of microbial communities in parturition: is there evidence of association with preterm birth and perinatal morbidity and mortality?,"In 2005, the World Health Organization estimated that 9.6% or 12.9 million births worldwide were born preterm at <37 weeks of gestation and were accompanied by a mortality rate as high as 42% (http://www.who.int/bulletin/volumes/88/1/08-062554). Significant data suggesting that intrauterine infection is an important modifier for the risk of preterm birth have emerged over the past four decades. However, causative microbial culprits have yet to be identified, and interventional trials with antimicrobials have uniformly failed to demonstrate a significant benefit. To the contrary, treatment for clinically asymptomatic, commonly associated polymicrobial communities (i.e., bacterial vaginosis) has resulted in an increase in the rate of preterm birth. This article discusses the importance of vaginal microbiome and the variance in its composition during normal pregnancy. We will expand this discussion to include possible mechanisms that might trigger preterm birth in at-risk subjects. Finally, we will review why preterm birth may be an ideal forum with which to apply our rapidly expanding metagenomic sequencing and analytic pipelines to discern the role of host and microbe in the relative continuum of health and disease.",2012-11-16 +21775302,Comparative analysis of RNA-Seq alignment algorithms and the RNA-Seq unified mapper (RUM).,"

Motivation

A critical task in high-throughput sequencing is aligning millions of short reads to a reference genome. Alignment is especially complicated for RNA sequencing (RNA-Seq) because of RNA splicing. A number of RNA-Seq algorithms are available, and claim to align reads with high accuracy and efficiency while detecting splice junctions. RNA-Seq data are discrete in nature; therefore, with reasonable gene models and comparative metrics RNA-Seq data can be simulated to sufficient accuracy to enable meaningful benchmarking of alignment algorithms. The exercise to rigorously compare all viable published RNA-Seq algorithms has not been performed previously.

Results

We developed an RNA-Seq simulator that models the main impediments to RNA alignment, including alternative splicing, insertions, deletions, substitutions, sequencing errors and intron signal. We used this simulator to measure the accuracy and robustness of available algorithms at the base and junction levels. Additionally, we used reverse transcription-polymerase chain reaction (RT-PCR) and Sanger sequencing to validate the ability of the algorithms to detect novel transcript features such as novel exons and alternative splicing in RNA-Seq data from mouse retina. A pipeline based on BLAT was developed to explore the performance of established tools for this problem, and to compare it to the recently developed methods. This pipeline, the RNA-Seq Unified Mapper (RUM), performs comparably to the best current aligners and provides an advantageous combination of accuracy, speed and usability.

Availability

The RUM pipeline is distributed via the Amazon Cloud and for computing clusters using the Sun Grid Engine (http://cbil.upenn.edu/RUM).

Contact

ggrant@pcbi.upenn.edu; epierce@mail.med.upenn.edu

Supplementary information

The RNA-Seq sequence reads described in the article are deposited at GEO, accession GSE26248.",2011-07-19 +21349866,A high-throughput processing service for retention time alignment of complex proteomics and metabolomics LC-MS data.,"

Unlabelled

Warp2D is a novel time alignment approach, which uses the overlapping peak volume of the reference and sample peak lists to correct misleading peak shifts. Here, we present an easy-to-use web interface for high-throughput Warp2D batch processing time alignment service using the Dutch Life Science Grid, reducing processing time from days to hours. This service provides the warping function, the sample chromatogram peak list with adjusted retention times and normalized quality scores based on the sum of overlapping peak volume of all peaks. Heat maps before and after time alignment are created from the arithmetic mean of the sum of overlapping peak area rearranged with hierarchical clustering, allowing the quality control of the time alignment procedure. Taverna workflow and command line tool are provided for remote processing of local user data.

Availability

online data processing service is available at http://www.nbpp.nl/warp2d.html. Taverna workflow is available at myExperiment with title '2D Time Alignment-Webservice and Workflow' at http://www.myexperiment.org/workflows/1283.html. Command line tool is available at http://www.nbpp.nl/Warp2D_commandline.zip.

Contact

p.l.horvatovich@rug.nl

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-23 +22253532,ColorPhylo: A Color Code to Accurately Display Taxonomic Classifications.,"Color may be very useful to visualise complex data. As far as taxonomy is concerned, color may help observing various species' characteristics in correlation with classification. However, choosing the number of subclasses to display is often a complex task: on the one hand, assigning a limited number of colors to taxa of interest hides the structure imbedded in the subtrees of the taxonomy; on the other hand, differentiating a high number of taxa by giving them specific colors, without considering the underlying taxonomy, may lead to unreadable results since relationships between displayed taxa would not be supported by the color code. In the present paper, an automatic color coding scheme is proposed to visualise the levels of taxonomic relationships displayed as overlay on any kind of data plot. To achieve this goal, a dimensionality reduction method allows displaying taxonomic ""distances"" onto a Euclidean two-dimensional space. The resulting map is projected onto a 2D color space (the Hue, Saturation, Brightness colorimetric space with brightness set to 1). Proximity in the taxonomic classification corresponds to proximity on the map and is therefore materialised by color proximity. As a result, each species is related to a color code showing its position in the taxonomic tree. The so called ColorPhylo displays taxonomic relationships intuitively and can be combined with any biological result. A Matlab version of ColorPhylo is available at http://sy.lespi.free.fr/ColorPhylo-homepage.html. Meanwhile, an ad-hoc distance in case of taxonomy with unknown edge lengths is proposed.",2011-11-13 +22080467,Expression2Kinases: mRNA profiling linked to multiple upstream regulatory layers.,"

Motivation

Genome-wide mRNA profiling provides a snapshot of the global state of cells under different conditions. However, mRNA levels do not provide direct understanding of upstream regulatory mechanisms. Here, we present a new approach called Expression2Kinases (X2K) to identify upstream regulators likely responsible for observed patterns in genome-wide gene expression. By integrating chromatin immuno-precipitation (ChIP)-seq/chip and position weight matrices (PWMs) data, protein-protein interactions and kinase-substrate phosphorylation reactions, we can better identify regulatory mechanisms upstream of genome-wide differences in gene expression. We validated X2K by applying it to recover drug targets of food and drug administration (FDA)-approved drugs from drug perturbations followed by mRNA expression profiling; to map the regulatory landscape of 44 stem cells and their differentiating progeny; to profile upstream regulatory mechanisms of 327 breast cancer tumors; and to detect pathways from profiled hepatic stellate cells and hippocampal neurons. The X2K approach can advance our understanding of cell signaling and unravel drugs mechanisms of action.

Availability

The software and source code are freely available at: http://www.maayanlab.net/X2K.

Contact

avi.maayan@mssm.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-10 +21884570,Leveraging a clinical research information system to assist biospecimen data and workflow management: a hybrid approach.,"

Background

Large multi-center clinical studies often involve the collection and analysis of biological samples. It is necessary to ensure timely, complete and accurate recording of analytical results and associated phenotypic and clinical information. The TRIBE-AKI Consortium http://www.yale.edu/tribeaki supports a network of multiple related studies and sample biorepository, thus allowing researchers to take advantage of a larger specimen collection than they might have at an individual institution.

Description

We describe a biospecimen data management system (BDMS) that supports TRIBE-AKI and is intended for multi-center collaborative clinical studies that involve shipment of biospecimens between sites. This system works in conjunction with a clinical research information system (CRIS) that stores the clinical data associated with the biospecimens, along with other patient-related parameters. Inter-operation between the two systems is mediated by an interactively invoked suite of Web Services, as well as by batch code. We discuss various challenges involved in integration.

Conclusions

Our experience indicates that an approach that emphasizes inter-operability is reasonably optimal in allowing each system to be utilized for the tasks for which it is best suited.",2011-08-25 +22328782,Spliceman--a computational web server that predicts sequence variations in pre-mRNA splicing.,"

Summary

It was previously demonstrated that splicing elements are positional dependent. We exploited this relationship between location and function by comparing positional distributions between all possible 4096 hexamers around a database of human splice sites. The distance measure used in this study found point mutations that produced higher distances disrupted splicing, whereas point mutations with smaller distances generally had no effect on splicing. Reasoning the idea that functional splicing elements have signature positional distributions around constitutively spliced exons, we introduce Spliceman-an online tool that predicts how likely distant mutations around annotated splice sites were to disrupt splicing. Spliceman takes a set of DNA sequences with point mutations and returns a ranked list to predict the effects of point mutations on pre-mRNA splicing. The current implementation included the analyses of 11 genomes: human, chimp, rhesus, mouse, rat, dog, cat, chicken, guinea pig, frog and zebrafish.

Availability

Freely available on the web at http://fairbrother.biomed.brown.edu/spliceman/

Contact

fairbrother@brown.edu.",2012-02-10 +22454131,Revealing mammalian evolutionary relationships by comparative analysis of gene clusters.,"Many software tools for comparative analysis of genomic sequence data have been released in recent decades. Despite this, it remains challenging to determine evolutionary relationships in gene clusters due to their complex histories involving duplications, deletions, inversions, and conversions. One concept describing these relationships is orthology. Orthologs derive from a common ancestor by speciation, in contrast to paralogs, which derive from duplication. Discriminating orthologs from paralogs is a necessary step in most multispecies sequence analyses, but doing so accurately is impeded by the occurrence of gene conversion events. We propose a refined method of orthology assignment based on two paradigms for interpreting its definition: by genomic context or by sequence content. X-orthology (based on context) traces orthology resulting from speciation and duplication only, while N-orthology (based on content) includes the influence of conversion events. We developed a computational method for automatically mapping both types of orthology on a per-nucleotide basis in gene cluster regions studied by comparative sequencing, and we make this mapping accessible by visualizing the output. All of these steps are incorporated into our newly extended CHAP 2 package. We evaluate our method using both simulated data and real gene clusters (including the well-characterized α-globin and β-globin clusters). We also illustrate use of CHAP 2 by analyzing four more loci: CCL (chemokine ligand), IFN (interferon), CYP2abf (part of cytochrome P450 family 2), and KIR (killer cell immunoglobulin-like receptors). These new methods facilitate and extend our understanding of evolution at these and other loci by adding automated accurate evolutionary inference to the biologist's toolkit. The CHAP 2 package is freely available from http://www.bx.psu.edu/miller_lab.",2012-03-27 +21773767,RecurrenceOnline: an online analysis tool to determine breast cancer recurrence and hormone receptor status using microarray data.,"In the last decades, several gene expression-based predictors of clinical behavior were developed for breast cancer. A common feature of these is the use of multiple genes to predict hormone receptor status and the probability of tumor recurrence, survival or response to chemotherapy. We developed an online analysis tool to compute ER and HER2 status, Oncotype DX 21-gene recurrence score and an independent recurrence risk classification using gene expression data obtained by interrogation of Affymetrix microarray profiles. We implemented rigorous quality control algorithms to promptly exclude any biases related to sample processing, hybridization and scanning. After uploading the raw microarray data, the system performs the complete evaluation automatically and provides a report summarizing the results. The system is accessible online at http://www.recurrenceonline.com . We validated the system using data from 2,472 publicly available microarrays. The validation of the prediction of the 21-gene recurrence score was significant in lymph node negative patients (Cox-Mantel, P = 5.6E-16, HR = 0.4, CI = 0.32-0.5). A correct classification was obtained for 88.5% of ER- and 90.5% of ER + tumors (n = 1,894). The prediction of recurrence risk in all patients by using the mean of the independent six strongest genes (P < 1E-16, HR = 2.9, CI = 2.5-3.3), of the four strongest genes in lymph node negative ER positive patients (P < 1E-16, HR = 2.8, CI = 2.2-3.5) and of the three genes in lymph node positive patients (P = 3.2E-9, HR = 2.5, CI = 1.8-3.4) was highly significant. In summary, we integrated available knowledge in one platform to validate currently used predictors and to provide a global tool for the online determination of different prognostic parameters simultaneously using genome-wide microarrays.",2011-07-16 +22325123,YADAMP: yet another database of antimicrobial peptides.,"This work presents an antimicrobial peptide database (YADAMP) based on an extensive literature search. This database is focused primarily on bacteria, with detailed information for 2133 peptides active against bacteria. YADAMP was created to facilitate access to critical information on antimicrobial peptides (AMPs). The main difference between YADAMP and other web databases of AMPs is the explicit presence of antimicrobial activity against the most common bacterial strains. YADAMP allows complex queries, easily accessible through a web interface. Peptide information can be retrieved based on peptide name, number of amino acids, net charge, hydrophobic percentage, sequence motif, structure and activity against bacteria. YADAMP is suitable for reviewing information on AMPs and for structure-function analyses of peptides. The database can be accessed via a web-based browser at http://www.yadamp.unisa.it.",2012-02-09 +22318347,On the trail of EHEC/EAEC--unraveling the gene regulatory networks of human pathogenic Escherichia coli bacteria.,"Pathogenic Escherichia coli, such as Enterohemorrhagic E. coli (EHEC) and Enteroaggregative E. coli (EAEC), are globally widespread bacteria. Some may cause the hemolytic uremic syndrome (HUS). Varying strains cause epidemics all over the world. Recently, we observed an epidemic outbreak of a multi-resistant EHEC strain in Western Europe, mainly in Germany. The Robert Koch Institute reports >4300 infections and >50 deaths (July, 2011). Farmers lost several million EUR since the origin of infection was unclear. Here, we contribute to the currently ongoing research with a computer-aided study of EHEC transcriptional regulatory interactions, a network of genetic switches that control, for instance, pathogenicity, survival and reproduction of bacterial cells. Our strategy is to utilize knowledge of gene regulatory networks from the evolutionary relative E. coli K-12, a harmless strain mainly used for wet lab studies. In order to provide high-potential candidates for human pathogenic E. coli bacteria, such as EHEC, we developed the integrated online database and an analysis platform EhecRegNet. We utilize 3489 known regulations from E. coli K-12 for predictions of yet unknown gene regulatory interactions in 16 human pathogens. For these strains we predict 40,913 regulatory interactions. EhecRegNet is based on the identification of evolutionarily conserved regulatory sites within the DNA of the harmless E. coli K-12 and the pathogens. Identifying and characterizing EHEC's genetic control mechanism network on a large scale will allow for a better understanding of its survival and infection strategies. This will support the development of urgently needed new treatments. EhecRegNet is online via http://www.ehecregnet.de.",2012-02-09 +22392968,Binding site and affinity prediction of general anesthetics to protein targets using docking.,"

Background

The protein targets for general anesthetics remain unclear. A tool to predict anesthetic binding for potential binding targets is needed. In this study, we explored whether a computational method, AutoDock, could serve as such a tool.

Methods

High-resolution crystal data of water-soluble proteins (cytochrome C, apoferritin, and human serum albumin), and a membrane protein (a pentameric ligand-gated ion channel from Gloeobacter violaceus [GLIC]) were used. Isothermal titration calorimetry (ITC) experiments were performed to determine anesthetic affinity in solution conditions for apoferritin. Docking calculations were performed using DockingServer with the Lamarckian genetic algorithm and the Solis and Wets local search method (http://www.dockingserver.com/web). Twenty general anesthetics were docked into apoferritin. The predicted binding constants were compared with those obtained from ITC experiments for potential correlations. In the case of apoferritin, details of the binding site and their interactions were compared with recent cocrystallization data. Docking calculations for 6 general anesthetics currently used in clinical settings (isoflurane, sevoflurane, desflurane, halothane, propofol, and etomidate) with known 50% effective concentration (EC(50)) values were also performed in all tested proteins. The binding constants derived from docking experiments were compared with known EC(50) values and octanol/water partition coefficients for the 6 general anesthetics.

Results

All 20 general anesthetics docked unambiguously into the anesthetic binding site identified in the crystal structure of apoferritin. The binding constants for 20 anesthetics obtained from the docking calculations correlate significantly with those obtained from ITC experiments (P = 0.04). In the case of GLIC, the identified anesthetic binding sites in the crystal structure are among the docking predicted binding sites, but not the top ranked site. Docking calculations suggest a most probable binding site located in the extracellular domain of GLIC. The predicted affinities correlated significantly with the known EC(50) values for the 6 frequently used anesthetics in GLIC for the site identified in the experimental crystal data (P = 0.006). However, predicted affinities in apoferritin, human serum albumin, and cytochrome C did not correlate with these 6 anesthetics' known experimental EC(50) values. A weak correlation between the predicted affinities and the octanol/water partition coefficients was observed for the sites in GLIC.

Conclusion

We demonstrated that anesthetic binding sites and relative affinities can be predicted using docking calculations in an automatic docking server (AutoDock) for both water-soluble and membrane proteins. Correlation of predicted affinity and EC(50) for 6 frequently used general anesthetics was only observed in GLIC, a member of a protein family relevant to anesthetic mechanism.",2012-03-05 +23237707,"LMP1 and LMP2A are potential prognostic markers of extranodal NK/T-cell lymphoma, nasal type (ENKTL).","

Background

Latent membrane protein (LMP) 1 and LMP2A encoded by Epstein-Barr virus (EBV) are associated with the development of malignancies, but their expression in extranodal NK/T-cell lymphoma, nasal type (ENKTL) and the relationship with clinical characteristics of this disease remain poorly understood. In the present study, we examined the expression of LMP1 and LMP2A in ENKTL, and investigated the correlations between LMP1 and LMP2A expression with clinicopathological characteristics of ENKTL patients.

Methods

Paraffin sections of surgically removed samples from 16 ENKTL patients were analyzed by immunohistochemistry and the related clinicopathological data were collected and analyzed.

Results

Elevated expression (immunohistochemistry score ≥ 4) of LMP1 and LMP2A was detected in the tumor cells of ENKTL. High LMP1 expression was associated with positive B symptoms (p = 0.012), while high LMP2A expression was related to gender (p = 0.029). The expression of both LMP1 and LMP2A showed significant correlations with patients' overall survival (p = 0.049, p = 0.036).

Conclusion

LMP1 and LMP2A may be prognostic indicators of survival in patients with ENKTL.

Virtual slides

http://www.diagnosticpathology.diagnomx.eu/vs/2443352538545899.",2012-12-13 +22332235,Positional correlation analysis improves reconstruction of full-length transcripts and alternative isoforms from noisy array signals or short reads.,"

Motivation

A reconstruction of full-length transcripts observed by next-generation sequencer or tiling arrays is an essential technique to know all phenomena of transcriptomes. Several techniques of the reconstruction have been developed. However, problems of high-level noises and biases still remain and interrupt the reconstruction. A method is required that is robust against noise and bias and correctly reconstructs transcripts regardless of equipment used.

Results

We propose a completely new statistical method that reconstructs full-length transcripts and can be applied on both next-generation sequencers and tiling arrays. The method called ARTADE2 analyzes 'positional correlation', meaning correlations of expression values for every combination on genomic positions of multiple transcriptional data. ARTADE2 then reconstructs full-length transcripts using a logistic model based on the positional correlation and the Markov model. ARTADE2 elucidated 17 591 full-length transcripts from 55 transcriptome datasets and showed notable performance compared with other recent prediction methods. Moreover, 1489 novel transcripts were discovered. We experimentally tested 16 novel transcripts, among which 14 were confirmed by reverse transcription-polymerase chain reaction and sequence mapping. The method also showed notable performance for reconstructing of mRNA observed by a next-generation sequencer. Moreover, the positional correlation and factor analysis embedded in ARTADE2 successfully detected regions at which alternative isoforms may exist, and thus are expected to be applied for discovering transcript biomarkers for a wide range of disciplines including preemptive medicine.

Availability

http://matome.base.riken.jp

Contact

toyoda@base.riken.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-13 +22309450,Medicago truncatula transporter database: a comprehensive database resource for M. truncatula transporters.,"

Background

Medicago truncatula has been chosen as a model species for genomic studies. It is closely related to an important legume, alfalfa. Transporters are a large group of membrane-spanning proteins. They deliver essential nutrients, eject waste products, and assist the cell in sensing environmental conditions by forming a complex system of pumps and channels. Although studies have effectively characterized individual M. truncatula transporters in several databases, until now there has been no available systematic database that includes all transporters in M. truncatula.

Description

The M. truncatula transporter database (MTDB) contains comprehensive information on the transporters in M. truncatula. Based on the TransportTP method, we have presented a novel prediction pipeline. A total of 3,665 putative transporters have been annotated based on International Medicago Genome Annotated Group (IMGAG) V3.5 V3 and the M. truncatula Gene Index (MTGI) V10.0 releases and assigned to 162 families according to the transporter classification system. These families were further classified into seven types according to their transport mode and energy coupling mechanism. Extensive annotations referring to each protein were generated, including basic protein function, expressed sequence tag (EST) mapping, genome locus, three-dimensional template prediction, transmembrane segment, and domain annotation. A chromosome distribution map and text-based Basic Local Alignment Search Tools were also created. In addition, we have provided a way to explore the expression of putative M. truncatula transporter genes under stress treatments.

Conclusions

In summary, the MTDB enables the exploration and comparative analysis of putative transporters in M. truncatula. A user-friendly web interface and regular updates make MTDB valuable to researchers in related fields. The MTDB is freely available now to all users at http://bioinformatics.cau.edu.cn/MtTransporter/.",2012-02-06 +23044552,Predicting pseudoknotted structures across two RNA sequences.,"

Motivation

Laboratory RNA structure determination is demanding and costly and thus, computational structure prediction is an important task. Single sequence methods for RNA secondary structure prediction are limited by the accuracy of the underlying folding model, if a structure is supported by a family of evolutionarily related sequences, one can be more confident that the prediction is accurate. RNA pseudoknots are functional elements, which have highly conserved structures. However, few comparative structure prediction methods can handle pseudoknots due to the computational complexity.

Results

A comparative pseudoknot prediction method called DotKnot-PW is introduced based on structural comparison of secondary structure elements and H-type pseudoknot candidates. DotKnot-PW outperforms other methods from the literature on a hand-curated test set of RNA structures with experimental support.

Availability

DotKnot-PW and the RNA structure test set are available at the web site http://dotknot.csse.uwa.edu.au/pw.

Contact

janaspe@csse.uwa.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-08 +21856737,Comparative analysis of algorithms for next-generation sequencing read alignment.,"

Motivation

The advent of next-generation sequencing (NGS) techniques presents many novel opportunities for many applications in life sciences. The vast number of short reads produced by these techniques, however, pose significant computational challenges. The first step in many types of genomic analysis is the mapping of short reads to a reference genome, and several groups have developed dedicated algorithms and software packages to perform this function. As the developers of these packages optimize their algorithms with respect to various considerations, the relative merits of different software packages remain unclear. However, for scientists who generate and use NGS data for their specific research projects, an important consideration is choosing the software that is most suitable for their application.

Results

With a view to comparing existing short read alignment software, we develop a simulation and evaluation suite, Seal, which simulates NGS runs for different configurations of various factors, including sequencing error, indels and coverage. We also develop criteria to compare the performances of software with disparate output structure (e.g. some packages return a single alignment while some return multiple possible alignments). Using these criteria, we comprehensively evaluate the performances of Bowtie, BWA, mr- and mrsFAST, Novoalign, SHRiMP and SOAPv2, with regard to accuracy and runtime.

Conclusion

We expect that the results presented here will be useful to investigators in choosing the alignment software that is most suitable for their specific research aims. Our results also provide insights into the factors that should be considered to use alignment results effectively. Seal can also be used to evaluate the performance of algorithms that use deep sequencing data for various purposes (e.g. identification of genomic variants).

Availability

Seal is available as open source at http://compbio.case.edu/seal/.

Contact

matthew.ruffalo@case.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-19 +23788715,Iterative reconstruction algorithm for CT: can radiation dose be decreased while low-contrast detectability is preserved?,"

Purpose

To compare the low-contrast detectability and image quality of computed tomography (CT) at different radiation dose levels reconstructed with iterative reconstruction (IR) and filtered back projection (FBP).

Materials and methods

A custom liver phantom with 12 simulated hypoattenuating tumors (diameters of 5, 10, 15, and 20 mm; tumor-to-liver contrast values of -10, -20, and -40 HU) was designed. The phantom was scanned with a standard abdominal CT protocol with a volume CT dose index of 21.6 mGy (equivalent 100% dose) and four low-dose protocols (20%, 40%, 60%, and 80% of the standard protocol dose). CT data sets were reconstructed with IR and FBP. Image noise was measured, and the tumors' contrast-to-noise ratios (CNRs) were calculated. Tumor detection was independently assessed by three radiologists who were blinded to the CT technique used. A total of 840 simulated tumors were presented to the radiologists. Statistical analyses included analysis of variance.

Results

IR yielded an image noise reduction of 43.9%-63.9% and a CNR increase of 74.1%-180% compared with FBP at the same dose level (P < .001). The overall sensitivity for tumor detection was 64.7%-85.3% for IR and 66.3%-85.7% for FBP at the 20%-100% doses, respectively. There was no significant difference in the sensitivity for tumor detection between IR and FBP at the same dose level (P = .99). The sensitivity of the protocol at the 20% dose with FBP and IR was significantly lower than that of the protocol at the 100% dose with FBP and IR (P = .019).

Conclusion

As the radiation dose at CT decreases, the IR algorithm does not preserve the low-contrast detectability.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.13122349/-/DC1.",2013-06-20 +23659667,Clinico- pathological profile of patients with breast diseases.,"

Background

To evaluate the spectrum of breast diseases and their association with presenting complains of patients.

Methodology

It was a cross sectional study conducted from 1st January 2010 - 30th December 2012. A total of 254 breast specimens of patients, who were admitted in Civil Hospital Karachi with breast complaints, were included. Specimens were collected either from mastectomy, lumpectomy or needle biopsy from the admitted patients. Informed written consent was taken from all the patients. All patients with primary breast diseases were included. Patients undergoing chemotherapy or with secondary breast disease and slides with insufficient specimen were excluded. All data was entered and analyzed through SPSS 19.

Result

There were 254 breast lesions, histologically diagnosed in 3 year review period. The overall mean age of patients with breast lesion was 25.18, SD ± 11.73 with a wide age range of 12-74 years. Most common cases identified are benign 191(75.3%), followed by inflammatory 30(11.8%) and malignant lesions 30(11.8%). Most patients presenting with the complain of pain have diagnosis of fibroadenoma 24 (63.2%) while patient with complain of lump also have the most common diagnosis of fibroadenoma 147 (72.8%).

Conclusions

Study shows that in Pakistani females, mostly encountered breast lesion was fibroadenoma. Due to lack of awareness breast diseases present lately. Awareness must be created among women to reduce the mortality and morbidity with breast lesions.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1037059088969395.",2013-05-09 +22311032,Haplotype analysis of the polymorphic 17 YSTR markers in Kerala nontribal populations.,"The origin of the Kerala non tribal population has been a matter of contention for centuries. While some claim that Negritos were the first inhabitants, some historians suggest a Dravidian origin for all Keralites. The aim of our study has been to provide sufficient scientific evidence based on Y chromosome short tandem repeat (Y STR) analysis for tracing the paternal lineage and also to create a database of the Y STR haplotype of the male population for future forensic analysis. Whole blood samples (n = 168) were collected from unrelated healthy men of the Kerala non-tribal population over a period of 2 years from October 2009. Genomic DNA was extracted by salting out method. All samples were genotyped for the 17 Y STR loci by the AmpFLSTR Y-filer PCR Amplification Kit. The haplotype and allele frequencies were determined by direct counting and analyzed using Arlequin 3.1 software, and molecular variance was calculated with the Y chromosome haplotype reference database online analysis tool, www.yhrd.org . Haplotype diversity was calculated using HaPYDive ( http://portugene.com/hapydive.html ). The majority of haplotypes were unique (149/168). The variant allele 17.1 was observed in DYS 385 loci in three samples. Fifteen samples (8.93%) showed the presence of alleles that are not within the established marker range denoted as outside marker range (OMR). The allele frequency of Kerala non tribal population ranged from 0.00003 to 0.5809. The most polymorphic single locus marker was DYS 458. The haplotype diversity value for Kerala non tribal population was 0.9978. The pairwise difference value ranged from 0.0531 to 0.0854 on comparison of the haplotypes of the Kerala non tribals with other Indian populations. The multi dimensional scaling plot depicted the proximity of Kerala non tribal population with Vasterbotten population (Swedish) and Paiwan, Patyal population of Taiwan, Thailand, and Zhuang population of China. The results of the study indicate towards a European paternal lineage in the non tribal Kerala population.",2012-02-05 +23228079,"Passive smoking, invasive meningococcal disease and preventive measures: a commentary.","Active smoking is a recognized risk factor of various infectious diseases. In a systematic review published in BMC Public Health, Murray et al. demonstrated that exposure to passive smoking significantly increased the risk of meningococcal disease among children. Their review especially highlights that the risk remains high even if the exposure occurs during pregnancy or after birth, although the authors could not disentangle the independent effects of smoking during pregnancy from those in the postnatal period. How passive smoking increases the risk of childhood meningococcal disease is not precisely known. Both exposure to 'smoke', or 'smokers' (who are highly susceptible to pharyngeal carriage of meningococci) are postulated mechanisms, but unfortunately very few studies have examined the risk of exposure by considering these two variables separately, and this therefore remains a research priority. Quitting may well be the mainstay of preventing tobacco-related hazards but the available global data suggest that most smokers are reluctant to quit. Among other interventions, immunizing children with a meningococcal conjugate vaccine could, theoretically, reduce the risk of meningococcal disease among children and their smoker household contacts through herd immunity. See related article http://www.biomedcentral.com/1471-2458/12/1062",2012-12-10 +21917853,Predicting coaxial helical stacking in RNA junctions.,"RNA junctions are important structural elements that form when three or more helices come together in space in the tertiary structures of RNA molecules. Determining their structural configuration is important for predicting RNA 3D structure. We introduce a computational method to predict, at the secondary structure level, the coaxial helical stacking arrangement in junctions, as well as classify the junction topology. Our approach uses a data mining approach known as random forests, which relies on a set of decision trees trained using length, sequence and other variables specified for any given junction. The resulting protocol predicts coaxial stacking within three- and four-way junctions with an accuracy of 81% and 77%, respectively; the accuracy increases to 83% and 87%, respectively, when knowledge from the junction family type is included. Coaxial stacking predictions for the five to ten-way junctions are less accurate (60%) due to sparse data available for training. Additionally, our application predicts the junction family with an accuracy of 85% for three-way junctions and 74% for four-way junctions. Comparisons with other methods, as well applications to unsolved RNAs, are also presented. The web server Junction-Explorer to predict junction topologies is freely available at: http://bioinformatics.njit.edu/junction.",2011-09-14 +22122996,Estimates of array and pool-construction variance for planning efficient DNA-pooling genome wide association studies.,"

Background

Until recently, genome-wide association studies (GWAS) have been restricted to research groups with the budget necessary to genotype hundreds, if not thousands, of samples. Replacing individual genotyping with genotyping of DNA pools in Phase I of a GWAS has proven successful, and dramatically altered the financial feasibility of this approach. When conducting a pool-based GWAS, how well SNP allele frequency is estimated from a DNA pool will influence a study's power to detect associations. Here we address how to control the variance in allele frequency estimation when DNAs are pooled, and how to plan and conduct the most efficient well-powered pool-based GWAS.

Methods

By examining the variation in allele frequency estimation on SNP arrays between and within DNA pools we determine how array variance [var(e(array))] and pool-construction variance [var(e(construction))] contribute to the total variance of allele frequency estimation. This information is useful in deciding whether replicate arrays or replicate pools are most useful in reducing variance. Our analysis is based on 27 DNA pools ranging in size from 74 to 446 individual samples, genotyped on a collective total of 128 Illumina beadarrays: 24 1M-Single, 32 1M-Duo, and 72 660-Quad.

Results

For all three Illumina SNP array types our estimates of var(e(array)) were similar, between 3-4 × 10-4 for normalized data. Var(e(construction)) accounted for between 20-40% of pooling variance across 27 pools in normalized data.

Conclusions

We conclude that relative to var(e(array)), var(e(construction)) is of less importance in reducing the variance in allele frequency estimation from DNA pools; however, our data suggests that on average it may be more important than previously thought. We have prepared a simple online tool, PoolingPlanner (available at http://www.kchew.ca/PoolingPlanner/), which calculates the effective sample size (ESS) of a DNA pool given a range of replicate array values. ESS can be used in a power calculator to perform pool-adjusted calculations. This allows one to quickly calculate the loss of power associated with a pooling experiment to make an informed decision on whether a pool-based GWAS is worth pursuing.",2011-11-28 +23046539,Toward unbiased assessment of treatment and prevention: modeling household transmission of pandemic influenza.,"Providing valid and reliable estimates of the transmissibility and severity of pandemic influenza in real time is key to guide public health policymaking. In particular, early estimates of the transmissibility are indispensable for determining the type and intensity of interventions. A recent study by House and colleagues in BMC Medicine devised a stochastic transmission model to estimate the unbiased risk of transmission within households, applying the method to datasets of the 2009 A/H1N1 influenza pandemic. Here, we discuss future challenges in household transmission studies and underscore the need to systematically collect epidemiological data to decipher the household transmission dynamics. We emphasize the need to consider three critical issues for future improvements: (i) capturing age-dependent heterogeneity within households calls for intensive modeling efforts, (ii) the timeline of observation during the course of an epidemic and the length of follow-up should be aligned with study objectives, and (iii) the use of laboratory methods, especially molecular techniques, is encouraged to distinguish household transmissions from those arising in the community.See related article: http://www.biomedcentral.com/1741-7015/10/117.",2012-10-09 +23522935,Reliability of delirium rating scale (DRS) and delirium rating scale-revised-98 (DRS-R98) using variance-based multivariate modelling.,"Delirium's characteristic fluctuation in symptom severity complicates the assessment of test-retest reliability of scales using classical analyses, but application of modelling to longitudinal data offers a new approach. We evaluated test-retest reliability of the delirium rating scale (DRS) and delirium rating scale-revised-98 (DRS-R98), two widely used instruments with high validity and inter-rater reliability. Two existing longitudinal datasets for each scale included DSM-IV criteria for delirium diagnosis and repeated measurements using the DRS or DRS-R98. To estimate the reliability coefficients RT and RΛ for each scale we used a macros provided by Dr. Laenen at http://www.ibiostat.be/software/measurement.asp. For each dataset a linear mixed-effects model was fitted to estimate the variance-covariance parameters. A total of 531 cases with between 4 and 9 measurement points across studies including both delirious and non-delirious patients. Comorbid dementia in the datasets varied from 27% to 55%. Overall RT for the DRS were 0.71 and 0.50 and for DRS-R98 0.75 and 0.84. RΛ values for DRS were 0.99 and 0.98 and for DRS-R98 were 0.92 and 0.96. Individual RT measures for DRS-R98 and DRS across visits within studies showed more range than overall values. Our models found high overall reliability for both scales. Multiple factors impact a scale's reliability values including sample size, repeated measurements, patient population, etc in addition to rater variability.",2013-03-21 +21339535,Recipe for uncovering predictive genes using support vector machines based on model population analysis.,"Selecting a small number of informative genes for microarray-based tumor classification is central to cancer prediction and treatment. Based on model population analysis, here we present a new approach, called Margin Influence Analysis (MIA), designed to work with support vector machines (SVM) for selecting informative genes. The rationale for performing margin influence analysis lies in the fact that the margin of support vector machines is an important factor which underlies the generalization performance of SVM models. Briefly, MIA could reveal genes which have statistically significant influence on the margin by using Mann-Whitney U test. The reason for using the Mann-Whitney U test rather than two-sample t test is that Mann-Whitney U test is a nonparametric test method without any distribution-related assumptions and is also a robust method. Using two publicly available cancerous microarray data sets, it is demonstrated that MIA could typically select a small number of margin-influencing genes and further achieves comparable classification accuracy compared to those reported in the literature. The distinguished features and outstanding performance may make MIA a good alternative for gene selection of high dimensional microarray data. (The source code in MATLAB with GNU General Public License Version 2.0 is freely available at http://code.google.com/p/mia2009/).",2011-11-01 +23362108,Epigenetic regulation of the X-linked tumour suppressors BEX1 and LDOC1 in oral squamous cell carcinoma.,"The strong associations between oral squamous cell carcinoma (OSCC) and dietary habits such as alcohol consumption (A), betel quid chewing (B) and cigarette smoking (C) and its predominance in men have been well documented; however, systemic analysis of OSCC is limited. Our study applied high-throughput screening methods to identify causative epigenetic targets in a cohort of men with ABC-associated OSCC. We identified BEX1 and LDOC1 as two epigenetically silenced X-linked tumour suppressors and demonstrated a functional link between the transcription of BEX1 and LDOC1 and promoter hypermethylation. Methylation of the BEX1 and LDOC1 promoters was associated significantly (p < 0.0001) with OSCC and were detected in 75% (42/56) and 89% (50/56) of the samples, respectively. We observed concordant increases in the methylation of both genes in 71% (40/56) of the tumours, and potent in vitro and in vivo growth inhibitory effects in OSCC cells ectopically expressing BEX1 and/or LDOC1. Restored expression of BEX1 and LDOC1 suppressed the nuclear factor-κB (NF-κB) signalling pathway, which is the most frequently hyperactivated signalling pathway in OSCC. This suppression might result from decreased p50 and p65 expression. These findings suggest that silencing of BEX1 and LDOC1 by promoter hypermethylation might represent a critical event in the molecular pathogenesis of OSCC and account for the oncogenic effects of ABC exposure and the male predominance of OSCC occurrence. Microarray data are available in the Gene Expression Omnibus (GEO; http://www.ncbi.nlm.nih.gov/geo/)",2013-03-21 +25937701,HIGH-PRECISION BIOLOGICAL EVENT EXTRACTION: EFFECTS OF SYSTEM AND OF DATA.,"We approached the problems of event detection, argument identification, and negation and speculation detection in the BioNLP'09 information extraction challenge through concept recognition and analysis. Our methodology involved using the OpenDMAP semantic parser with manually written rules. The original OpenDMAP system was updated for this challenge with a broad ontology defined for the events of interest, new linguistic patterns for those events, and specialized coordination handling. We achieved state-of-the-art precision for two of the three tasks, scoring the highest of 24 teams at precision of 71.81 on Task 1 and the highest of 6 teams at precision of 70.97 on Task 2. We provide a detailed analysis of the training data and show that a number of trigger words were ambiguous as to event type, even when their arguments are constrained by semantic class. The data is also shown to have a number of missing annotations. Analysis of a sampling of the comparatively small number of false positives returned by our system shows that major causes of this type of error were failing to recognize second themes in two-theme events, failing to recognize events when they were the arguments to other events, failure to recognize nontheme arguments, and sentence segmentation errors. We show that specifically handling coordination had a small but important impact on the overall performance of the system. The OpenDMAP system and the rule set are available at http://bionlp.sourceforge.net.",2011-11-01 +23555214,Analysis of physicochemical and structural properties determining HIV-1 coreceptor usage.,"The relationship of HIV tropism with disease progression and the recent development of CCR5-blocking drugs underscore the importance of monitoring virus coreceptor usage. As an alternative to costly phenotypic assays, computational methods aim at predicting virus tropism based on the sequence and structure of the V3 loop of the virus gp120 protein. Here we present a numerical descriptor of the V3 loop encoding its physicochemical and structural properties. The descriptor allows for structure-based prediction of HIV tropism and identification of properties of the V3 loop that are crucial for coreceptor usage. Use of the proposed descriptor for prediction results in a statistically significant improvement over the prediction based solely on V3 sequence with 3 percentage points improvement in AUC and 7 percentage points in sensitivity at the specificity of the 11/25 rule (95%). We additionally assessed the predictive power of the new method on clinically derived 'bulk' sequence data and obtained a statistically significant improvement in AUC of 3 percentage points over sequence-based prediction. Furthermore, we demonstrated the capacity of our method to predict therapy outcome by applying it to 53 samples from patients undergoing Maraviroc therapy. The analysis of structural features of the loop informative of tropism indicates the importance of two loop regions and their physicochemical properties. The regions are located on opposite strands of the loop stem and the respective features are predominantly charge-, hydrophobicity- and structure-related. These regions are in close proximity in the bound conformation of the loop potentially forming a site determinant for the coreceptor binding. The method is available via server under http://structure.bioinf.mpi-inf.mpg.de/.",2013-03-21 +22293517,SpliceGrapher: detecting patterns of alternative splicing from RNA-Seq data in the context of gene models and EST data.,"We propose a method for predicting splice graphs that enhances curated gene models using evidence from RNA-Seq and EST alignments. Results obtained using RNA-Seq experiments in Arabidopsis thaliana show that predictions made by our SpliceGrapher method are more consistent with current gene models than predictions made by TAU and Cufflinks. Furthermore, analysis of plant and human data indicates that the machine learning approach used by SpliceGrapher is useful for discriminating between real and spurious splice sites, and can improve the reliability of detection of alternative splicing. SpliceGrapher is available for download at http://SpliceGrapher.sf.net.",2012-01-31 +30731814,First Report of Leaf Blight and Stem Canker of Pachysandra terminalis Caused by Pseudonectria pachysandricola in Korea.,"Pachysandra terminalis Siebold & Zucc., known as Japanese pachysandra, is a creeping evergreen perennial belonging to the family Buxaceae. In April 2011, hundreds of plants showing symptoms of leaf blight and stem canker with nearly 100% incidence were found in a private garden in Suwon, Korea. Plants with the same symptoms were found in Seoul in May and Hongcheon in August. Affected leaves contained tan-to-yellow brown blotches. Stem and stolon cankers first appeared as water soaked and developed into necrotic lesions. Sporodochia were solitary, erumpent, circular, 50 to 150 μm in diameter, salmon-colored, pink-orange when wet, and with or without setae. Setae were hyaline, acicular, 60 to 100 μm long, and had a base that was 4 to 6 μm wide. Conidiophores were in a dense fascicle, not branched, hyaline, aseptate or uniseptate, and 8 to 20 × 2 to 3.5 μm. Conidia were long, ellipsoid to cylindric, fusiform, rounded at the apex, subtruncate at the base, straight to slightly bent, guttulate, hyaline, aseptate, 11 to 26 × 2.5 to 4.0 μm. A single-conidial isolate formed cream-colored colonies that turned into salmon-colored colonies on potato dextrose agar (PDA). Morphological and cultural characteristics of the fungus were consistent with previous reports of Pseudonectria pachysandricola B.O. Dodge (1,3,4). Voucher specimens were housed at Korea University (KUS). Two isolates, KACC46110 (ex KUS-F25663) and KACC46111 (ex KUS-F25683), were accessioned in the Korean Agricultural Culture Collection. Fungal DNA was extracted with DNeasy Plant Mini DNA Extraction Kits (Qiagen Inc., Valencia, CA). The complete internal transcribed spacer (ITS) region of rDNA was amplified with the primers ITS1/ITS4 and sequenced using ABI Prism 337 automatic DNA sequencer (Applied Biosystems, Foster, CA). The resulting sequence of 487 bp was deposited in GenBank (Accession No. JN797821). This showed 100% similarity with a sequence of P. pachysandricola from the United States (HQ897807). Isolate KACC46110 was used in pathogenicity tests. Inoculum was prepared by harvesting conidia from 2-week-old cultures on PDA. Ten young leaves wounded with needles were sprayed with conidial suspensions (~1 × 106 conidia/ml). Ten young leaves that served as the control were treated with sterile distilled water. Plants were covered with plastic bags to maintain a relative humidity of 100% at 25 ± 2°C for 24 h. Typical symptoms of brown spots appeared on the inoculated leaves 4 days after inoculation and were identical to the ones observed in the field. P. pachysandricola was reisolated from 10 symptomatic leaf tissues, confirming Koch's postulates. No symptoms were observed on control plants. Previously, the disease was reported in the United States, Britain, Japan, and the Czech Republic (2,3), but not in Korea. To our knowledge, this is the first report of P. pachysandricola on Pachysandra terminalis in Korea. Since this plant is popular and widely planted in Korea, this disease could cause significant damage to nurseries and the landscape. References: (1) B. O. Dodge. Mycologia 36:532, 1944. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , September 24, 2011. (3) I. Safrankova. Plant Prot. Sci. 43:10, 2007. (4) W. A. Sinclair and H. H. Lyon. Disease of Trees and Shrubs. 2nd ed. Cornell University Press, Ithaca, NY, 2005.",2012-02-01 +23057249,[Written and pictorial content in magazines and their possible relationship to eating disorders].,"

Unlabelled

In the current study we reviewed the literature on studies exploring the magazine reading frequency, written and pictorial contents appearing in magazines and their connection to eating disorders. Reading different fashion and fitness magazines has effect on readers through several indirect and direct factors and through trustable and false information. They affect readers' body satisfaction, self-esteem, eating habits and more generally their health behavior. Different theories have been explained to account for these associations and several other studies examined empirically the connection between the frequency of magazine reading and eating disorders, as well as the symptoms leading to eating disorders.

Methods

We analyzed and summarized articles between 1975 and 2009 from online databases. We used the following sources: Science Direct (http://www.sciencedirect.com/), Springer-Verlag GmbH (http://www.springerlink.com/) and SAGE Publications Ltd (http://online.sagepub. com/).

Results

The pictorial and written magazine contents were associated with the development and maintenance of eating disorders or with symptoms that might lead to eating disorders. The publications compared to previous years featured an increased number of advertisements for unhealthy foods, for unhealthy radical diet plans and exercise programs. Furthermore the magazines contained conflicting messages about nutrition, body functions and eating disorders.

Conclusion

Written and pictorial magazine contents, messages might increase the risk for development of eating disorders, especially in vulnerable individuals.",2012-02-01 +24511461,Post-translational Modifications of Natural Antimicrobial Peptides and Strategies for Peptide Engineering.,"Natural antimicrobial peptides (AMPs) are gene-coded defense molecules discovered in all the three life domains: Eubacteria, Archaea, and Eukarya. The latter covers protists, fungi, plants, and animals. It is now recognized that amino acid composition, peptide sequence, and post-translational modifications determine to a large extent the structure and function of AMPs. This article systematically describes post-translational modifications of natural AMPs annotated in the antimicrobial peptide database (http://aps.unmc.edu/AP). Currently, 1147 out of 1755 AMPs in the database are modified and classified into more than 17 types. Through chemical modifications, the peptides fold into a variety of structural scaffolds that target bacterial surfaces or molecules within cells. Chemical modifications also confer desired functions to a particular peptide. Meanwhile, these modifications modulate other peptide properties such as stability. Elucidation of the relationship between AMP property and chemical modification inspires peptide engineering. Depending on the objective of our design, peptides may be modified in various ways so that the desired features can be enhanced whereas unwanted properties can be minimized. Therefore, peptide design plays an essential role in developing natural AMPs into a new generation of therapeutic molecules.",2012-02-01 +21576219,SNPsyn: detection and exploration of SNP-SNP interactions.,"SNPsyn (http://snpsyn.biolab.si) is an interactive software tool for the discovery of synergistic pairs of single nucleotide polymorphisms (SNPs) from large genome-wide case-control association studies (GWAS) data on complex diseases. Synergy among SNPs is estimated using an information-theoretic approach called interaction analysis. SNPsyn is both a stand-alone C++/Flash application and a web server. The computationally intensive part is implemented in C++ and can run in parallel on a dedicated cluster or grid. The graphical user interface is written in Adobe Flash Builder 4 and can run in most web browsers or as a stand-alone application. The SNPsyn web server hosts the Flash application, receives GWAS data submissions, invokes the interaction analysis and serves result files. The user can explore details on identified synergistic pairs of SNPs, perform gene set enrichment analysis and interact with the constructed SNP synergy network.",2011-05-16 +23884597,Comparison of efficacy of 95-gene and 21-gene classifier (Oncotype DX) for prediction of recurrence in ER-positive and node-negative breast cancer patients.,"We recently developed a 95-gene classifier (95(GC)) for the prognostic prediction for ER-positive and node-negative breast cancer patients treated with only adjuvant hormonal therapy. The aim of this study was to validate the efficacy of 95(GC) and compare it with that of 21(GC) (Oncotype DX) as well as to evaluate the combination of 95(GC) and 21(GC). DNA microarray data (gene expression) of ER-positive and node-negative breast cancer patients (n = 459) treated with adjuvant hormone therapy alone as well as those of ER-positive breast cancer patients treated with neoadjuvant chemotherapy (n = 359) were classified with 95(GC) and 21(GC) (Recurrence Online at http://www.recurrenceonline.com/ ). 95(GC) classified the 459 patients into low-risk (n = 285; 10 year relapse-free survival: 88.8 %) and high-risk groups (n = 174; 70.6 %) (P = 5.5e-10), and 21(GC) into low-risk group (n = 286; 89.3 %), intermediate-risk (n = 81; 75.7 %), and high-risk (n = 92; 64.7 %) groups (P = 2.9e-10). The combination of 95(GC) and 21(GC) classified them into low-risk (n = 324; 88.9 %) and high-risk (n = 135; 65.0 %) groups (P = 5.9e-14), and also showed that pathological complete response rates were significantly (P = 2.5e-6) higher for the high-risk (17.9 %) than the low-risk group (3.6 %). In addition, we demonstrated that 95(GC) was calculated on a single-sample basis if the reference robust multi-array average workflow was used for normalization. The prognostic prediction capability of 95(GC) appears to be comparable to that of 21(GC). Moreover, their combination seems to result in the identification of more low-risk patients who do not need chemotherapy than either classification alone. The patients in the high-risk group were found to be more chemo-sensitive so that they can benefit more from adjuvant chemotherapy.",2013-07-25 +23556014,A cell-surface phylome for African trypanosomes.,"The cell surface of Trypanosoma brucei, like many protistan blood parasites, is crucial for mediating host-parasite interactions and is instrumental to the initiation, maintenance and severity of infection. Previous comparisons with the related trypanosomatid parasites T. cruzi and Leishmania major suggest that the cell-surface proteome of T. brucei is largely taxon-specific. Here we compare genes predicted to encode cell surface proteins of T. brucei with those from two related African trypanosomes, T. congolense and T. vivax. We created a cell surface phylome (CSP) by estimating phylogenies for 79 gene families with putative surface functions to understand the more recent evolution of African trypanosome surface architecture. Our findings demonstrate that the transferrin receptor genes essential for bloodstream survival in T. brucei are conserved in T. congolense but absent from T. vivax and include an expanded gene family of insect stage-specific surface glycoproteins that includes many currently uncharacterized genes. We also identify species-specific features and innovations and confirm that these include most expression site-associated genes (ESAGs) in T. brucei, which are absent from T. congolense and T. vivax. The CSP presents the first global picture of the origins and dynamics of cell surface architecture in African trypanosomes, representing the principal differences in genomic repertoire between African trypanosome species and provides a basis from which to explore the developmental and pathological differences in surface architectures. All data can be accessed at: http://www.genedb.org/Page/trypanosoma_surface_phylome.",2013-03-21 +22293603,NBC update: The addition of viral and fungal databases to the Naïve Bayes classification tool.,"

Background

Classifying the fungal and viral content of a sample is an important component of analyzing microbial communities in environmental media. Therefore, a method to classify any fragment from these organisms' DNA should be implemented.

Results

We update the näive Bayes classification (NBC) tool to classify reads originating from viral and fungal organisms. NBC classifies a fungal dataset similarly to Basic Local Alignment Search Tool (BLAST) and the Ribosomal Database Project (RDP) classifier. We also show NBC's similarities and differences to RDP on a fungal large subunit (LSU) ribosomal DNA dataset. For viruses in the training database, strain classification accuracy is 98%, while for those reads originating from sequences not in the database, the order-level accuracy is 78%, where order indicates the taxonomic level in the tree of life.

Conclusions

In addition to being competitive to other classifiers available, NBC has the potential to handle reads originating from any location in the genome. We recommend using the Bacteria/Archaea, Fungal, and Virus databases separately due to algorithmic biases towards long genomes. The tool is publicly available at: http://nbc.ece.drexel.edu.",2012-01-31 +21605094,Predicting melanoma risk for the Australian population.,"

Background

As melanoma incidence in Australia continues to rise, targeting high-risk individuals for early detection is of paramount importance.

Objectives

We aimed to design a population-specific risk assessment tool to improve on the use of intuition alone for assignment of surveillance strategies for high-risk individuals and help communicate risk more accurately and effectively to patients.

Methods

Methods used in the development of breast cancer risk models were adopted. Data from a large meta-analysis was used to determine risk estimates. Attributable risk was calculated for each risk factor using data from the Victorian Melanoma Service. Local prevalence data from state cancer registries was incorporated to estimate 5-year risk of melanoma.

Results

Independent risk factors identified were common naevi, atypical naevi, hair colour, freckles, family history of melanoma and personal history of non-melanoma skin cancer. Personal history of melanoma was the strongest risk factor for developing another (relative risk 7.28, 7.24). Absolute risk for individuals varies greatly with age, risk factor profiles and proximity to the equator.

Conclusion

We have developed a melanoma risk assessment tool based on the best available information (http://www.victorianmelanomaservice.org/calculator). The tool is easily modified as new information becomes available.",2011-03-01 +23964054,SFAs do not impair endothelial function and arterial stiffness.,"

Background

It is uncertain whether saturated fatty acids (SFAs) impair endothelial function and contribute to arterial stiffening.

Objective

We tested the effects of replacing SFAs with monounsaturated fatty acids (MUFAs) or carbohydrates on endothelial function and arterial stiffness.

Design

With the use of a parallel-designed randomized controlled trial in 121 insulin-resistant men and women, we measured vascular function after 1 mo of consumption of a high-SFA (HS) diet and after 24 wk after random assignment to the HS diet or diets that contained <10% SFAs and were high in either MUFAs or carbohydrates. The primary outcome was a change in flow-mediated dilation (FMD), and secondary outcomes were changes in carotid to femoral pulse wave velocity (PWV) and plasma 8-isoprostane F2α-III concentrations.

Results

For 112 participants with data available for analysis on the specified outcomes, no significant differences were shown. FMD with the HS reference diet was 6.7 ± 2.2%, and changes (95% CIs) after 6 mo of intervention were +0.3 (-0.4, 1.1), -0.2 (-0.8, 0.5), and -0.1 (-0.6, 0.7) with HS, high-MUFA (HM), and high-carbohydrate (HC) diets, respectively. After consumption of the HS reference diet, the geometric mean (±SD) PWV was 7.67 ± 1.62 m/s, and mean percentages of changes (95% CIs) were -1.0 (-6.2, 4.3) with the HS diet, 2.7 (-1.4, 6.9) with the HM diet, and -1.0 (-5.5, 3.4) with the HC diet. With the HS reference diet, the geometric mean (±SD) plasma 8-isoprostane F2α-III concentration was 176 ± 85 pmol/L, and mean percentage of changes (95% CIs) were 1 (-12, 14) with the HS diet, 6 (-5, 16) with the HM diet, and 4 (-7, 16) with the HC diet.

Conclusion

The replacement of SFAs with MUFAs or carbohydrates in healthy subjects does not affect vascular function. This trial was registered at Current Controlled Trials (http://www.controlled-trials.com/ISRCTN) as ISRCTN 29111298.",2013-09-01 +22293552,"Uberon, an integrative multi-species anatomy ontology.","We present Uberon, an integrated cross-species ontology consisting of over 6,500 classes representing a variety of anatomical entities, organized according to traditional anatomical classification criteria. The ontology represents structures in a species-neutral way and includes extensive associations to existing species-centric anatomical ontologies, allowing integration of model organism and human data. Uberon provides a necessary bridge between anatomical structures in different taxa for cross-species inference. It uses novel methods for representing taxonomic variation, and has proved to be essential for translational phenotype analyses. Uberon is available at http://uberon.org.",2012-01-31 +23524031,Acute diagnostic biomarkers for spinal cord injury: review of the literature and preliminary research report.,"

Objective

Many efforts have been made to create new diagnostic technologies for use in the diagnosis of central nervous system injury. However, there is still no consensus for the use of biomarkers in clinical acute spinal cord injury (SCI). The aims of this review are (1) to evaluate the current status of neurochemical biomarkers and (2) to discuss their potential acute diagnostic role in SCI by reviewing the literature.

Methods

PubMed (http://www.ncbi.nlm.nih.gov/pubmed) was searched up to 2012 to identify publications concerning diagnostic biomarkers in SCI. To support more knowledge, we also checked secondary references in the primarily retrieved literature.

Results

Neurofilaments, cleaved-Tau, microtubule-associated protein 2, myelin basic protein, neuron-specific enolase, S100β, and glial fibrillary acidic protein were identified as structural protein biomarkers in SCI by this review process. We could not find reports relating ubiquitin C-terminal hydrolase-L1 and α-II spectrin breakdown products, which are widely researched in other central nervous system injuries. Therefore, we present our preliminary data relating to these two biomarkers. Some of biomarkers showed promising results for SCI diagnosis and outcome prediction; however, there were unresolved issues relating to accuracy and their accessibility.

Conclusion

Currently, there still are not many reports focused on diagnostic biomarkers in SCI. This fact warranted the need for greater efforts to innovate sensitive and reliable biomarkers for SCI.",2013-03-19 +22674022,Diffusive oxygen shunting between vessels in the preglomerular renal vasculature: anatomic observations and computational modeling.,"To understand how geometric factors affect arterial-to-venous (AV) oxygen shunting, a mathematical model of diffusive oxygen transport in the renal cortex was developed. Preglomerular vascular geometry was investigated using light microscopy (providing vein shape, AV separation, and capillary density near arteries) and published micro-computed tomography (CT) data (providing vessel size and AV separation; Nordsletten DA, Blackett S, Bentley MD, Ritman EL, Smith NP. IUPS Physiome Project. http://www.physiome.org.nz/publications/nordsletten_blackett_ritman_bentley_smith_2005/folder_contents). A ""U-shaped"" relationship was observed between the arterial radius and the distance between the arterial and venous lumens. Veins were found to partially wrap around the artery more consistently for larger rather than smaller arteries. Intrarenal arteries were surrounded by an area of fibrous tissue, lacking capillaries, the thickness of which increased from ∼5 μm for the smallest arteries (<16-μm diameter) to ∼20 μm for the largest arteries (>200-μm diameter). Capillary density was greater near smaller arteries than larger arteries. No capillaries were observed between wrapped AV vessel pairs. The computational model comprised a single AV pair in cross section. Geometric parameters critical in renal oxygen transport were altered according to variations observed by CT and light microscopy. Lumen separation and wrapping of the vein around the artery were found to be the critical geometric factors determining the amount of oxygen shunted between AV pairs. AV oxygen shunting increases both as lumen separation decreases and as the degree of wrapping increases. The model also predicts that capillaries not only deliver oxygen, but can also remove oxygen from the cortical parenchyma close to an AV pair. Thus the presence of oxygen sinks (capillaries or tubules) near arteries would reduce the effectiveness of AV oxygen shunting. Collectively, these data suggest that AV oxygen shunting would be favored in larger vessels common to the cortical and medullary circulations (i.e., arcuate and proximal interlobular arteries) rather than the smaller vessels specific to the cortical circulation (distal interlobular arteries and afferent arterioles).",2012-06-06 +21585228,Colorectal cancer screening in rural and remote areas: analysis of the National Bowel Cancer Screening Program data for South Australia.,"

Introduction

In Australia, colorectal cancer is the second most commonly diagnosed cancer and cause of death from malignant diseases, and its incidence is rising. The aim of this article was to present an analysis of National Bowel Cancer Screening Program (NBCSP) data for rural and remote South Australia (SA), in order to identify geographical areas and population groups that may benefit from targeted approaches to increase participation rates in colorectal cancer screening.

Methods

De-identified data from the NBCSP (February 2007 to July 2008) were provided by Medicare Australia. Mapping and analysis of the NBCSP data was performed using ESRI ArcGIS (http://www.esri.com/software/arcgis/index.html) and MapInfo (http://slp.pbinsight.com/info/mipro-sem-au). Data were aggregated to postcode and Accessibility/Remoteness Index of Australia (ARIA) and participation was then mapped according to overall participation rates, sex, age, Indigenous status and Socio-Economic Indexes for Areas (SEIFA)-Index of Relative Socio-Economic Disadvantage (IRSD). The participants were South Australians who turned 55 and 65 years between 2007 and 2008 who returned the completed NBCSP test sent to them by Medicare Australia.

Results

The overall participation rate was 46.1% in rural and remote SA, although this was statistically significantly different (p<.001) according to sex (46.7% for males and 53.3% for females), age (45.2% for those 55 years, and 52% for those 65 years), socio-economic status (from 43% in 'most deprived' quintile to 50% in 'most affluent' quintile) and remoteness (45.6% for metropolitan, 46% for remote and 48.6% for rural areas). Indigenous participation was 0.5%.

Conclusions

The findings of this study suggest lower NBCSP participation rates for people from metropolitan and remote areas, compared with those from rural areas. The uptake of cancer screening is lower for older rural and remote residents, men, Indigenous people, lower socioeconomic groups and those living in the Far North subdivision of SA.",2011-04-01 +22201703,Attrition in web-based treatment for problem drinkers.,"

Background

Web-based interventions for problem drinking are effective but characterized by high rates of attrition. There is a need to better understand attrition rates in order to improve the completion rates and the success of Web-based treatment programs.

Objective

The objectives of our study were to (1) examine attrition prevalence and pretreatment predictors of attrition in a sample of open-access users of a Web-based program for problem drinkers, and (2) to further explore attrition data from our randomized controlled trial (RCT) of the Web-based program.

Methods

Attrition data from two groups of Dutch-speaking problem drinkers were collected: (1) open-access participants enrolled in the program in 2009 (n = 885), and (2) RCT participants (n = 156). Participants were classified as noncompleters if they did not complete all 12 treatment sessions (9 assignments and 3 assessments). In both samples we assessed prevalence of attrition and pretreatment predictors of treatment completion. Logistic regression analysis was used to explore predictors of treatment completion. In the RCT sample, we additionally measured reasons for noncompletion and participants' suggestions to enhance treatment adherence. The qualitative data were analyzed using thematic analysis.

Results

The open-access and RCT group differed significantly in the percentage of treatment completers (273/780, 35.0% vs 65/144, 45%, χ(2) (1) = 5.4, P = .02). Logistic regression analysis revealed a significant contribution of treatment readiness, gender, education level, age, baseline alcohol consumption, and readiness to change to predict treatment completion. The key reasons for noncompletion were personal reasons, dissatisfaction with the intervention, and satisfaction with their own improvement. The main suggestions for boosting strategies involved email notification and more flexibility in the intervention.

Conclusions

The challenge of Web-based alcohol treatment programs no longer seems to be their effectiveness but keeping participants involved until the end of the treatment program. Further research should investigate whether the suggested strategies to improve adherence decrease attrition rates in Web-based interventions. If we can succeed in improving attrition rates, the success of Web-based alcohol interventions will also improve and, as a consequence, their public health impact will increase.

Trial

International Standard Randomized Controlled Trial Number (ISRCTN): 39104853; http://www.controlled-trials.com/ISRCTN39104853 (Archived by WebCite at http://www.webcitation.org/63IKDul1T).",2011-12-27 +22310480,Charged single alpha-helices in proteomes revealed by a consensus prediction approach.,"Charged single α-helices (CSAHs) constitute a recently recognized protein structural motif. Its presence and role is characterized in only a few proteins. To explore its general features, a comprehensive study is necessary. We have set up a consensus prediction method available as a web service (at http://csahserver.chem.elte.hu) and downloadable scripts capable of predicting CSAHs from protein sequences. Using our method, we have performed a comprehensive search on the UniProt database. We found that the motif is very rare but seems abundant in proteins involved in symbiosis and RNA binding/processing. Although there are related proteins with CSAH segments, the motif shows no deep conservation in protein families. We conclude that CSAH-containing proteins, although rare, are involved in many key biological processes. Their conservation pattern and prevalence in symbiosis-associated proteins suggest that they might be subjects of relatively rapid molecular evolution and thus can contribute to the emergence of novel functions.",2012-01-28 +24150820,[Appropriate off-label prescription in practice].,"

Background

More than 2,000 different types of disease entities are treated in dermatology. Even for some of the more commonly occurring diseases there is no explicitly approved medication. Further limitations in the approval status can be found for skin diseases in children and adolescents, in pregnancy and with multiple comorbidities. Therefore, for medical and ethical reasons in many dermatological treatment situations prescription of medications off label is necessary. Against the background of the difficult formal and legal framework conditions for off-label prescription, knowledge of the regulations on off-label use is essential for dermatologists.

Methoden

The presented data were taken from the essential sources of the social security statutes V (SGB V), pharmaceutical guidelines and legal texts on jurisprudence of off-label use.

Results

There are no standardized regulations on off-label use for dermatological diseases in Germany. Only a few indications and pharmaceuticals have as yet been included in the processing procedure by the Federal Joint Committee. The large proportion of the necessary treatment in off-label use refers to the jurisprudence, in particular the Federal Social Court. According to this an off-label use can be justified in exceptional cases, and from the sociolegal perspective can even be demanded by patients, if the existing indications represent a serious life-threatening disease or one which permanently reduces the quality of life, a suitable therapy under approved conditions is not available and there is a well-founded prospect of successful treatment. For appropriate prescription in off-label use it is necessary to carry out an appropriate and thorough evaluation and documentation of previous treatment and of the degree of suffering on the side of the patient and to have knowledge of the international literature on studies involving the selected substance. Off-label use is involved in approximately 5-15% of necessary pharmaceutical prescriptions in dermatology and affects some 30 drugs and several hundred indications in routine treatment. Currently the CVderm is constructing a databank for dermatology called E-skin for simplification of off-label use in dermatology (http://www.arzneimittelleitfaden.de).

Conclusion

Prescriptions for off-label use are an integral component of qualified dermatological treatment. Despite regulatory and legal hurdles off-label use can be implemented with knowledge of the juridical and formal conditions.",2013-10-01 +21498399,Normalizing bead-based microRNA expression data: a measurement error model-based approach.,"

Motivation

Compared with complementary DNA (cDNA) or messenger RNA (mRNA) microarray data, microRNA (miRNA) microarray data are harder to normalize due to the facts that the total number of miRNAs is small, and that the majority of miRNAs usually have low expression levels. In bead-based microarrays, the hybridization is completed in several pools. As a result, the number of miRNAs tested in each pool is even smaller, which poses extra difficulty to intrasample normalization and ultimately affects the quality of the final profiles assembled from various pools. In this article, we consider a measurement error model-based method for bead-based microarray intrasample normalization.

Results

In this study, results from quantitative real-time PCR (qRT-PCR) assays are used as 'gold standards' for validation. The performance of the proposed measurement error model-based method is evaluated via a simulation study and real bead-based miRNA expression data. Simulation results show that the new method performs well to assemble complete profiles from subprofiles from various pools. Compared with two intrasample normalization methods recommended by the manufacturer, the proposed approach produces more robust final complete profiles and results in better agreement with the qRT-PCR results in identifying differentially expressed miRNAs, and hence improves the reproducibility between the two microarray platforms. Meaningful results are obtained by the proposed intrasample normalization method, together with quantile normalization as a subsequent complemental intersample normalization method.

Availability

Datasets and R package are available at http://gauss.usouthal.edu/publ/beadsme/.",2011-04-15 +22356929,Molecular dynamics simulation and quantum mechanical calculations on α-D-N-acetylneuraminic acid.,"N-Acetylneuraminic acid is a sugar molecule of biological significance due to its pivotal role in molecular recognition processes. The three dimensional structure and conformation of α-Neu5Ac in biological environments can be clearly observed by molecular dynamics (MD) simulation and quantum mechanical (QM) calculations. A 10ns MD simulation on α-Neu5Ac yields two conformational models which are stabilized by water mediated hydrogen bond between O-8/O-9 hydroxyl oxygen and carbonyl of carboxylate group. The average life time of the conformers and the residual time of water which mediates the hydrogen bonding interactions are computed. Based on the amphiprotic nature of water, water mediation of each conformer is divided into two different modes, one donor-one acceptor mode and two donor modes. According to the analysis of simulation trajectories, the preferred mode of water mediation for conformers is the one donor-one acceptor mode. The energy and geometry of the MD derived conformational models of α-Neu5Ac are optimized using HF/6-31G(∗) basis set of Gaussian03. QM calculations also resulted that α-Neu5Ac is preferentially stabilized by water mediated hydrogen bonding between O-8 hydroxyl and the carboxylate group where the mediation is one donor-one acceptor type. The optimized geometry of α-Neu5Ac which is in good agreement with the crystal structure of α-D-N-acetyl-1-O-methylneuraminic acid methyl ester is deposited in the public domain database 3DSDSCAR (http://3dsdscar.org). This optimized structure can be used by biotechnologists, biophysicists and glycobiologists for modelling the sialylglycans and also to design drugs using sialic acid analog inhibitors.",2012-01-28 +22285560,"vHOG, a multispecies vertebrate ontology of homologous organs groups.","

Motivation

Most anatomical ontologies are species-specific, whereas a framework for comparative studies is needed. We describe the vertebrate Homologous Organs Groups ontology, vHOG, used to compare expression patterns between species.

Results

vHOG is a multispecies anatomical ontology for the vertebrate lineage. It is based on the HOGs used in the Bgee database of gene expression evolution. vHOG version 1.4 includes 1184 terms, follows OBO principles and is based on the Common Anatomy Reference Ontology (CARO). vHOG only describes structures with historical homology relations between model vertebrate species. The mapping to species-specific anatomical ontologies is provided as a separate file, so that no homology hypothesis is stated within the ontology itself. Each mapping has been manually reviewed, and we provide support codes and references when available.

Availability and implementation

vHOG is available from the Bgee download site (http://bgee.unil.ch/), as well as from the OBO Foundry and the NCBO Bioportal websites.

Contact

bgee@isb-sib.ch; frederic.bastian@unil.ch.",2012-01-27 +23304658,An integrated nano-scale approach to profile miRNAs in limited clinical samples.,"Profiling miRNA expression in cells that directly contribute to human disease pathogenesis is likely to aid the discovery of novel drug targets and biomarkers. However, tissue heterogeneity and the limited amount of human diseased tissue available for research purposes present fundamental difficulties that often constrain the scope and potential of such studies. We established a flow cytometry-based method for isolating pure populations of pathogenic T cells from bronchial biopsy samples of asthma patients, and optimized a high-throughput nano-scale qRT-PCR method capable of accurately measuring 96 miRNAs in as little as 100 cells. Comparison of circulating and airway T cells from healthy and asthmatic subjects revealed asthma-associated and tissue-specific miRNA expression patterns. These results establish the feasibility and utility of investigating miRNA expression in small populations of cells involved in asthma pathogenesis, and set a precedent for application of our nano-scale approach in other human diseases. The microarray data from this study (Figure 7) has been submitted to the NCBI Gene Expression Omnibus (GEO; http://ncbi.nlm.nih.gov/geo) under accession no. GSE31030.",2012-11-01 +21493650,An optimal peak alignment for comprehensive two-dimensional gas chromatography mass spectrometry using mixture similarity measure.,"

Motivation

Comprehensive two-dimensional gas chromatography mass spectrometry (GC × GC-MS) brings much increased separation capacity, chemical selectivity and sensitivity for metabolomics and provides more accurate information about metabolite retention times and mass spectra. However, there is always a shift of retention times in the two columns that makes it difficult to compare metabolic profiles obtained from multiple samples exposed to different experimental conditions.

Results

The existing peak alignment algorithms for GC × GC-MS data use the peak distance and the spectra similarity sequentially and require predefined either distance-based window and/or spectral similarity-based window. To overcome the limitations of the current alignment methods, we developed an optimal peak alignment using a novel mixture similarity by employing the peak distance and the spectral similarity measures simultaneously without any variation windows. In addition, we examined the effect of the four different distance measures such as Euclidean, Maximum, Manhattan and Canberra distances on the peak alignment. The performance of our proposed peak alignment algorithm was compared with the existing alignment methods on the two sets of GC × GC-MS data. Our analysis showed that Canberra distance performed better than other distances and the proposed mixture similarity peak alignment algorithm prevailed against all literature reported methods.

Availability

The data and software mSPA are available at http://stage.louisville.edu/faculty/x0zhan17/software/software-development.",2011-04-14 +22718787,miRcode: a map of putative microRNA target sites in the long non-coding transcriptome.,"

Summary

Although small non-coding RNAs, such as microRNAs, have well-established functions in the cell, long non-coding RNAs (lncRNAs) have only recently started to emerge as abundant regulators of cell physiology, and their functions may be diverse. A small number of studies describe interactions between small and lncRNAs, with lncRNAs acting either as inhibitory decoys or as regulatory targets of microRNAs, but such interactions are still poorly explored. To facilitate the study of microRNA-lncRNA interactions, we implemented miRcode: a comprehensive searchable map of putative microRNA target sites across the complete GENCODE annotated transcriptome, including 10 419 lncRNA genes in the current version.

Availability

http://www.mircode.org

Contact

erik.larsson@gu.se

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-06-19 +24120711,"Prospective randomized, double-blind, placebo controlled trial to evaluate infection prevention in adult patients after tension-free inguinal hernia repair.","

Objective

Infection is one of possible complications after prosthetic material hernia repair surgery. Antibiotic prophylaxis is applied routinely in China, but its effect is still controversial. The present study aims to offer direct clinical evidence on prevention of infection after tension-free inguinal hernia repair.

Methods

A total of 1,200 cases with primary inguinal hernia treated in 6 hospitals in Shaanxi Province were enrolled in this study. They were randomly divided into three groups (n = 400 per group): placebo control group, Cefazolin group and Levofloxacin group after tensionfree inguinal hernia repair using polypropylene mesh. Hernia type, age, gender, weight and complications were recorded. The surgical-site infection was diagnosed according to APIC, CDC criteria (http://www.apic. org). Infections were evaluated every other day in the first week, and then at 14 days, 21 days and 30 days following surgery.

Results

Two cases from the placebo group, 3 from the Cefazolin group and 3 from the Levofloxacin group failed to follow-up. Six patients (2 non-following the protocol, 2 severe depression, and 2 laparoscopic surgery) from the placebo group, 14 (8 nonreceiving trial medication, 5 laparoscopic surgery, and 1 failure to tolerance) from the Cefazolin group, and 12 (2 combination of antibiotic usage, 5 laparoscopic surgery and 5 failure to tolerance) from the Levofloxacin group were excluded. The data of the 1,160 cases were statistically analyzed in the incidence rates of surgical-site infection and complications after inguinal hernia repair. Surgical-site infection including wound infection, cellulitis or mesh-related infection was found in 20 cases (5.1%) of the control group, 15 (3.92%) of the Cefazolin group and 17 (4.42%) of the Levofloxacin group, and the difference among the three groups was not statistically significant (χ2 = 0.438, p = 0.803). There was also no significant difference in post-surgery complications including seroma (p = 0.6366), urinary retention (p = 0.8136), fat liquefaction (p = 0.8061), pulmonary infection (p = 0.1911), and urinary tract infection (p = 0.8144) among the three groups.

Conclusions

Prophylactic use of Cefazolin or Levofloxacin did not significantly decrease the risk of wound infection in these patients undergoing inguinal hernia repair. The present results do not support the administration of antibiotic prophylaxis for tension-free inguinal hernia repair. *The authors contributed equally to this work.",2013-12-01 +22292078,SPPS: a sequence-based method for predicting probability of protein-protein interaction partners.,"

Background

The molecular network sustained by different types of interactions among proteins is widely manifested as the fundamental driving force of cellular operations. Many biological functions are determined by the crosstalk between proteins rather than by the characteristics of their individual components. Thus, the searches for protein partners in global networks are imperative when attempting to address the principles of biology.

Results

We have developed a web-based tool ""Sequence-based Protein Partners Search"" (SPPS) to explore interacting partners of proteins, by searching over a large repertoire of proteins across many species. SPPS provides a database containing more than 60,000 protein sequences with annotations and a protein-partner search engine in two modes (Single Query and Multiple Query). Two interacting proteins of human FBXO6 protein have been found using the service in the study. In addition, users can refine potential protein partner hits by using annotations and possible interactive network in the SPPS web server.

Conclusions

SPPS provides a new type of tool to facilitate the identification of direct or indirect protein partners which may guide scientists on the investigation of new signaling pathways. The SPPS server is available to the public at http://mdl.shsmu.edu.cn/SPPS/.",2012-01-26 +21565797,BiQ Analyzer HT: locus-specific analysis of DNA methylation by high-throughput bisulfite sequencing.,"Bisulfite sequencing is a widely used method for measuring DNA methylation in eukaryotic genomes. The assay provides single-base pair resolution and, given sufficient sequencing depth, its quantitative accuracy is excellent. High-throughput sequencing of bisulfite-converted DNA can be applied either genome wide or targeted to a defined set of genomic loci (e.g. using locus-specific PCR primers or DNA capture probes). Here, we describe BiQ Analyzer HT (http://biq-analyzer-ht.bioinf.mpi-inf.mpg.de/), a user-friendly software tool that supports locus-specific analysis and visualization of high-throughput bisulfite sequencing data. The software facilitates the shift from time-consuming clonal bisulfite sequencing to the more quantitative and cost-efficient use of high-throughput sequencing for studying locus-specific DNA methylation patterns. In addition, it is useful for locus-specific visualization of genome-wide bisulfite sequencing data.",2011-05-11 +23493321,Modeling risk stratification in human cancer.,"

Motivation

Despite huge prognostic promises, gene expression-based survival assessment is rarely used in clinical routine. Main reasons include difficulties in performing and reporting analyses and restriction in most methods to one high-risk group with the vast majority of patients being unassessed. The present study aims at limiting these difficulties by (i) mathematically defining the number of risk groups without any a priori assumption; (ii) computing the risk of an independent cohort by considering each patient as a new patient incorporated to the validation cohort and (iii) providing an open-access Web site to freely compute risk for every new patient.

Results

Using the gene expression profiles of 551 patients with multiple myeloma, 602 with breast-cancer and 460 with glioma, we developed a model combining running log-rank tests under controlled chi-square conditions and multiple testing corrections to build a risk score and a classification algorithm using simultaneous global and between-group log-rank chi-square maximization. For each cancer entity, we provide a statistically significant three-group risk prediction model, which is corroborated with publicly available validation cohorts.

Conclusion

In constraining between-group significances, the risk score compares favorably with previous risk classifications.

Availability

Risk assessment is freely available on the Web at https://gliserv.montp.inserm.fr/PrognoWeb/ for personal or test data files. Web site implementation in Perl, R and Apache.",2013-03-14 +22298711,Rapid and robust resampling-based multiple-testing correction with application in a genome-wide expression quantitative trait loci study.,"Genome-wide expression quantitative trait loci (eQTL) studies have emerged as a powerful tool to understand the genetic basis of gene expression and complex traits. In a typical eQTL study, the huge number of genetic markers and expression traits and their complicated correlations present a challenging multiple-testing correction problem. The resampling-based test using permutation or bootstrap procedures is a standard approach to address the multiple-testing problem in eQTL studies. A brute force application of the resampling-based test to large-scale eQTL data sets is often computationally infeasible. Several computationally efficient methods have been proposed to calculate approximate resampling-based P-values. However, these methods rely on certain assumptions about the correlation structure of the genetic markers, which may not be valid for certain studies. We propose a novel algorithm, rapid and exact multiple testing correction by resampling (REM), to address this challenge. REM calculates the exact resampling-based P-values in a computationally efficient manner. The computational advantage of REM lies in its strategy of pruning the search space by skipping genetic markers whose upper bounds on test statistics are small. REM does not rely on any assumption about the correlation structure of the genetic markers. It can be applied to a variety of resampling-based multiple-testing correction methods including permutation and bootstrap methods. We evaluate REM on three eQTL data sets (yeast, inbred mouse, and human rare variants) and show that it achieves accurate resampling-based P-value estimation with much less computational cost than existing methods. The software is available at http://csbio.unc.edu/eQTL.",2012-01-31 +22829571,ADOPS--Automatic Detection Of Positively Selected Sites.,"Maximum-likelihood methods based on models of codon substitution have been widely used to infer positively selected amino acid sites that are responsible for adaptive changes. Nevertheless, in order to use such an approach, software applications are required to align protein and DNA sequences, infer a phylogenetic tree and run the maximum-likelihood models. Therefore, a significant effort is made in order to prepare input files for the different software applications and in the analysis of the output of every analysis. In this paper we present the ADOPS (Automatic Detection Of Positively Selected Sites) software. It was developed with the goal of providing an automatic and flexible tool for detecting positively selected sites given a set of unaligned nucleotide sequence data. An example of the usefulness of such a pipeline is given by showing, under different conditions, positively selected amino acid sites in a set of 54 Coffea putative S-RNase sequences. ADOPS software is freely available and can be downloaded from http://sing.ei.uvigo.es/ADOPS.",2012-07-24 +28517089,SU-E-T-570: Improvement to the Histogram Analysis in Radiation Therapy (HART): An Open Source Software System for the Multi-Dimensional Dose- Volume Histogram Analysis in Digital Image Communication in Medicine - Radiation Therapy (DICOM-RT) Treatment Plans.,"

Purpose

Histogram Analysis in Radiation Therapy (HART) is an efficient and accurate dose-volume histogram (DVH) computational tool in radiotherapy research. Several applications of the program have been presented previously (J Appl Clin Med Phys 11(1): 3013, 2010; Med Phys 38(6), p.3678, 2011) for the Radiation Therapy Oncology Group (RTOG) users. The program has been further developed to incorporate various types of DVH analysis features to support the research using DICOM-RT plans. The main objective of this work was to present the improvement and compatibility of the program for the DICOM-RT plans.

Methods and materials

MATLAB based codes were primarily designed to read and write a simpler HART format from the standard DICOM-RT data objects exported from the Xio treatment planning system (CMS Inc., St. Louis, MO). This format employed an optimal polynomial fitting technique to interpolate the co-ordinates of the contours in the regions-of-interest. The format was efficient for the (a) precise extraction of the cumulative DVH (cDVH) and spatial DVH (sDVH; x-,y-, and z-DVHs respectively) data- statistics, (b) universal-plan indices evaluation, (c) biological modeling based outcome analyses (BMOA), (d) radiobiological dose-response modeling, and (e) physical parameterization modules. The fundamental DVH statistics were validated using the DVH statistics extracted from the Computational Environment for Radiotherapy Research program.

Results

HART offers various types of DVH computational functionalities, several plan evaluation and radiobiological outcome analysis modules in a user- friendly software package for the RTOG and DICOM-RT planners. The cDVH and BMOA modules were found to be the most applicable features for the global researchers.

Conclusions

HART is a novel and universal multi-dimensional DVH analysis tool for the radiation therapy research. We further expect to develop HART for the space-time DVH analysis and proton therapy applications. The software is available online (http://www2.uic.edu/∼apyaku1) for the radiotherapy research. This work was partially supported by NIH-NIDCD grant.",2012-06-01 +23194656,Potential use of inflammation and early immunological event biomarkers in assessing vaccine safety.,"Highly effective vaccines have traditionally been designed in a rather empirical way, often with incomplete understanding of their mode of action. Full assessment of efficacy and reactogenicity takes time and, as a result, vaccine introduction to the market is usually slow and expensive. In addition, in rare cases, unacceptable reactogenicity may only become apparent after years of development or even widespread use. However, recent advances in cell biology and immunology offer a range of new technologies and systems for identifying biological responses or ""biomarkers"" that could possibly be used to evaluate and predict efficacy and safety during vaccine development and post-marketing surveillance. This report reflects the conclusions of a group of scientists from academia, regulatory agencies and industry who attended a conference on the potential use of biomarkers to assess vaccine safety which was held in Baltimore, Maryland, USA, from 10 to 11 May 2012 and organized by the International Association for Biologicals (IABS). The conference focused particularly on determining which biomarkers might relate to vaccine efficacy and reactogenicity and whether our knowledge base was sufficiently robust at this time for the data to be used for decision-making. More information on the conference output can be found on the IABS website, http://www.iabs.org/.",2012-11-27 +22528350,Establishment of hESC lines from the inner cell mass of blastocyst-stage embryos and single blastomeres of 4-cell stage embryos.,"More than 600 human embryonic stem cell (hESC) lines have been reported today at the human European Embryonic Stem Cell Registry ( http://www.hescreg.eu/ ). Despite these high numbers, there are currently no general protocols for derivation, culture, and characterization of hESC. Moreover, data on the culture of the embryo used for the derivation (medium, day of ICM isolation) are usually not available but can have an impact on the derivation rate. We present here the protocols for derivation, culture and characterization as we applied them for the 22 hESC lines (named VUB-hESC) in our laboratory.",2012-01-01 +21349867,"Interactive, multiscale navigation of large and complicated biological networks.","

Motivation

Many types of omics data are compiled as lists of connections between elements and visualized as networks or graphs where the nodes and edges correspond to the elements and the connections, respectively. However, these networks often appear as 'hair-balls'-with a large number of extremely tangled edges-and cannot be visually interpreted.

Results

We present an interactive, multiscale navigation method for biological networks. Our approach can automatically and rapidly abstract any portion of a large network of interest to an immediately interpretable extent. The method is based on an ultrafast graph clustering technique that abstracts networks of about 100 000 nodes in a second by iteratively grouping densely connected portions and a biological-property-based clustering technique that takes advantage of biological information often provided for biological entities (e.g. Gene Ontology terms). It was confirmed to be effective by applying it to real yeast protein network data, and would greatly help modern biologists faced with large, complicated networks in a similar manner to how Web mapping services enable interactive multiscale navigation of geographical maps (e.g. Google Maps).

Availability

Java implementation of our method, named NaviCluster, is available at http://navicluster.cb.k.u-tokyo.ac.jp/.

Contact

thanet@cb.k.u-tokyo.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-23 +22111688,MMSAT: automated quantification of metabolites in selected reaction monitoring experiments.,"Selected reaction monitoring (SRM) is a mass spectrometry-based approach commonly used to increase analytical sensitivity and selectively for specific compounds in complex metabolomic samples. While the goal of well-designed SRM methods is to monitor for unique precursor-product ion pairs, in practice this is not always possible due to the diversity of the metabome and the resolution limits of mass spectrometers that are capable of SRM. Isobaric or near-isobaric precursor ions with different chromatographic properties but identical product ions often arise in complex samples. Without analytical standards, such metabolites will go undetected by conventional data analysis methods. Furthermore, a single SRM method may include simultaneous monitoring of tens to hundreds of different metabolites across multiple samples making quantification of all detected ions a challenging task. To facilitate the analysis of SRM data from complex metabolomic samples, we have developed the Metabolite Mass Spectrometry Analysis Tool (MMSAT). MMSAT is a web-based tool that objectively quantifies every metabolite peak detected in a set of samples and aligns peaks across multiple samples to enable quantitative comparison of each metabolite between samples. The analysis incorporates quantification of multiple peaks/ions that have different chromatographic retention times but are detected within a single SRM transition. We compare the performance of MMSAT against existing tools using a human glioblastoma tissue extract and illustrate its ability to automatically quantify multiple precursors within each of three different transitions. The Web-interface and source code is avaliable at http://www.cancerresearch.unsw.edu.au/crcweb.nsf/page/MMSAT .",2011-12-13 +22171334,Detection of microRNAs in color space.,"

Motivation

Deep sequencing provides inexpensive opportunities to characterize the transcriptional diversity of known genomes. The AB SOLiD technology generates millions of short sequencing reads in color-space; that is, the raw data is a sequence of colors, where each color represents 2 nt and each nucleotide is represented by two consecutive colors. This strategy is purported to have several advantages, including increased ability to distinguish sequencing errors from polymorphisms. Several programs have been developed to map short reads to genomes in color space. However, a number of previously unexplored technical issues arise when using SOLiD technology to characterize microRNAs.

Results

Here we explore these technical difficulties. First, since the sequenced reads are longer than the biological sequences, every read is expected to contain linker fragments. The color-calling error rate increases toward the 3(') end of the read such that recognizing the linker sequence for removal becomes problematic. Second, mapping in color space may lead to the loss of the first nucleotide of each read. We propose a sequential trimming and mapping approach to map small RNAs. Using our strategy, we reanalyze three published insect small RNA deep sequencing datasets and characterize 22 new microRNAs.

Availability and implementation

A bash shell script to perform the sequential trimming and mapping procedure, called SeqTrimMap, is available at: http://www.mirbase.org/tools/seqtrimmap/

Contact

antonio.marco@manchester.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-09 +23497426,A gene expression profile test to resolve head & neck squamous versus lung squamous cancers.,"

Background

The differential diagnosis between metastatic head & neck squamous cell carcinomas (HNSCC) and lung squamous cell carcinomas (lung SCC) is often unresolved because the histologic appearance of these two tumor types is similar. We have developed and validated a gene expression profile test (GEP-HN-LS) that distinguishes HNSCC and lung SCC in formalin-fixed, paraffin-embedded (FFPE) specimens using a 2160-gene classification model.

Methods

The test was validated in a blinded study using a pre-specified algorithm and microarray data files for 76 metastatic or poorly-differentiated primary tumors with a known HNSCC or lung SCC diagnosis.

Results

The study met the primary Bayesian statistical endpoint for acceptance. Measures of test performance include overall agreement with the known diagnosis of 82.9% (95% CI, 72.5% to 90.6%), an area under the ROC curve (AUC) of 0.91 and a diagnostics odds ratio (DOR) of 23.6. HNSCC (N = 38) gave an agreement with the known diagnosis of 81.6% and lung SCC (N = 38) gave an agreement of 84.2%. Reproducibility in test results between three laboratories had a concordance of 91.7%.

Conclusion

GEP-HN-LS can aid in resolving the important differential diagnosis between HNSCC and lung SCC tumors.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1753227817890930.",2013-03-11 +22923298,MEGA-CC: computing core of molecular evolutionary genetics analysis program for automated and iterative data analysis.,"

Unlabelled

There is a growing need in the research community to apply the molecular evolutionary genetics analysis (MEGA) software tool for batch processing a large number of datasets and to integrate it into analysis workflows. Therefore, we now make available the computing core of the MEGA software as a stand-alone executable (MEGA-CC), along with an analysis prototyper (MEGA-Proto). MEGA-CC provides users with access to all the computational analyses available through MEGA's graphical user interface version. This includes methods for multiple sequence alignment, substitution model selection, evolutionary distance estimation, phylogeny inference, substitution rate and pattern estimation, tests of natural selection and ancestral sequence inference. Additionally, we have upgraded the source code for phylogenetic analysis using the maximum likelihood methods for parallel execution on multiple processors and cores. Here, we describe MEGA-CC and outline the steps for using MEGA-CC in tandem with MEGA-Proto for iterative and automated data analysis.

Availability

http://www.megasoftware.net/.",2012-08-24 +21857117,Eu-Detect: an algorithm for detecting eukaryotic sequences in metagenomic data sets.,"Physical partitioning techniques are routinely employed (during sample preparation stage) for segregating the prokaryotic and eukaryotic fractions of metagenomic samples. In spite of these efforts, several metagenomic studies focusing on bacterial and archaeal populations have reported the presence of contaminating eukaryotic sequences in metagenomic data sets. Contaminating sequences originate not only from genomes of micro-eukaryotic species but also from genomes of (higher) eukaryotic host cells. The latter scenario usually occurs in the case of host-associated metagenomes. Identification and removal of contaminating sequences is important, since these sequences not only impact estimates of microbial diversity but also affect the accuracy of several downstream analyses. Currently, the computational techniques used for identifying contaminating eukaryotic sequences, being alignment based, are slow, inefficient, and require huge computing resources. In this article, we present Eu-Detect, an alignment-free algorithm that can rapidly identify eukaryotic sequences contaminating metagenomic data sets. Validation results indicate that on a desktop with modest hardware specifications, the Eu-Detect algorithm is able to rapidly segregate DNA sequence fragments of prokaryotic and eukaryotic origin, with high sensitivity. A Web server for the Eu-Detect algorithm is available at http://metagenomics.atc.tcs.com/Eu-Detect/.",2011-09-01 +21575225,NSMAP: a method for spliced isoforms identification and quantification from RNA-Seq.,"

Background

The development of techniques for sequencing the messenger RNA (RNA-Seq) enables it to study the biological mechanisms such as alternative splicing and gene expression regulation more deeply and accurately. Most existing methods employ RNA-Seq to quantify the expression levels of already annotated isoforms from the reference genome. However, the current reference genome is very incomplete due to the complexity of the transcriptome which hiders the comprehensive investigation of transcriptome using RNA-Seq. Novel study on isoform inference and estimation purely from RNA-Seq without annotation information is desirable.

Results

A Nonnegativity and Sparsity constrained Maximum APosteriori (NSMAP) model has been proposed to estimate the expression levels of isoforms from RNA-Seq data without the annotation information. In contrast to previous methods, NSMAP performs identification of the structures of expressed isoforms and estimation of the expression levels of those expressed isoforms simultaneously, which enables better identification of isoforms. In the simulations parameterized by two real RNA-Seq data sets, more than 77% expressed isoforms are correctly identified and quantified. Then, we apply NSMAP on two RNA-Seq data sets of myelodysplastic syndromes (MDS) samples and one normal sample in order to identify differentially expressed known and novel isoforms in MDS disease.

Conclusions

NSMAP provides a good strategy to identify and quantify novel isoforms without the knowledge of annotated reference genome which can further realize the potential of RNA-Seq technique in transcriptome analysis. NSMAP package is freely available at https://sites.google.com/site/nsmapforrnaseq.",2011-05-16 +22359444,Ectomychorrizal DB: a symbiotic association database.,"

Unlabelled

Ectomycorrhizal (ECM) fungal species, a ""Symbiotic"" relationship between tress and fungi in forest has a great ecological and economic importance. Here is an attempt to describe database named ""EctomycorrhizalDB"", addressing ECM diversity of Central Himalaya (Kumaun region), with special emphasis on their characterization, physical properties and morphological features along with specifications. This database would help the scientific community to draw a better understanding of the environmental factors that affects species diversity.

Availability

The database is available for free at http://www.kubic.nic.in/ectomychorhiza.",2012-01-20 +24063302,Comparison of RefSeq protein-coding regions in human and vertebrate genomes.,"

Background

Advances in high-throughput sequencing technology have yielded a large number of publicly available vertebrate genomes, many of which are selected for inclusion in NCBI's RefSeq project and subsequently processed by NCBI's eukaryotic annotation pipeline. Genome annotation results are affected by differences in available support evidence and may be impacted by annotation pipeline software changes over time. The RefSeq project has not previously assessed annotation trends across organisms or over time. To address this deficiency, we have developed a comparative protocol which integrates analysis of annotated protein-coding regions across a data set of vertebrate orthologs in genomic sequence coordinates, protein sequences, and protein features.

Results

We assessed an ortholog dataset that includes 34 annotated vertebrate RefSeq genomes including human. We confirm that RefSeq protein-coding gene annotations in mammals exhibit considerable similarity. Over 50% of the orthologous protein-coding genes in 20 organisms are supported at the level of splicing conservation with at least three selected reference genomes. Approximately 7,500 ortholog sets include at least half of the analyzed organisms, show highly similar sequence and conserved splicing, and may serve as a minimal set of mammalian ""core proteins"" for initial assessment of new mammalian genomes. Additionally, 80% of the proteins analyzed pass a suite of tests to detect proteins that lack splicing conservation and have unusual sequence or domain annotation. We use these tests to define an annotation quality metric that is based directly on the annotated proteins thus operates independently of other quality metrics such as availability of transcripts or assembly quality measures. Results are available on the RefSeq FTP site [http://ftp.ncbi.nlm.nih.gov/refseq/supplemental/ProtCore/SM1.txt].

Conclusions

Our multi-factored analysis demonstrates a high level of consistency in RefSeq protein representation among vertebrates. We find that the majority of the RefSeq vertebrate proteins for which we have calculated orthology are good as measured by these metrics. The process flow described provides specific information on the scope and degree of conservation for the analyzed protein sequences and annotations and will be used to enrich the quality of RefSeq records by identifying targets for further improvement in the computational annotation pipeline, and by flagging specific genes for manual curation.",2013-09-25 +21682143,Development and validation of a robust QSAR model for prediction of carcinogenicity of drugs.,"Carcinogenicity is one of the toxicological endpoints causing the highest concern. Also, the standard bioassays in rodents used to assess the carcinogenic potential of chemicals and drugs are extremely long, costly and require the sacrifice of large numbers of animals. For these reasons, we have attempted development of a global quantitative structure-activity relationship (QSAR) model using a data set of 1464 compounds (the Galvez data set available from http://www.uv.es/-galvez/tablevi.pdf), including many marketed drugs for their carcinogenesis potential. Though experimental toxicity testing using animal models is unavoidable for new drug candidates at an advanced stage of drug development, yet the developed global QSAR model can in silico predict the carcinogenicity of new drug compounds to provide a tool for initial screening of new drug candidate molecules with reduced number of animal testing, money and time. Considering large number of data points with diverse structural features used for model development (n(training) = 732) and model validation (n(test) = 732), the model developed in this study has an encouraging statistical quality (leave-one-out Q2 = 0.731, R2pred = 0.716). Our developed model suggests that higher lipophilicity values and conjugated ring systems, thioketo and nitro groups contribute positively towards drug carcinogenicity. On the contrary, tertiary and secondary nitrogens, phenolic, enolic and carboxylic OH fragments and presence of three-membered rings reduce the carcinogenicity. Branching, size and shape are found to be crucial factors for drug-induced carcinogenicity. One may consider all these points to reduce carcinogenic potential of the molecules.",2011-04-01 +21342549,Score regularization for peptide identification.,"

Background

Peptide identification from tandem mass spectrometry (MS/MS) data is one of the most important problems in computational proteomics. This technique relies heavily on the accurate assessment of the quality of peptide-spectrum matches (PSMs). However, current MS technology and PSM scoring algorithm are far from perfect, leading to the generation of incorrect peptide-spectrum pairs. Thus, it is critical to develop new post-processing techniques that can distinguish true identifications from false identifications effectively.

Results

In this paper, we present a consistency-based PSM re-ranking method to improve the initial identification results. This method uses one additional assumption that two peptides belonging to the same protein should be correlated to each other. We formulate an optimization problem that embraces two objectives through regularization: the smoothing consistency among scores of correlated peptides and the fitting consistency between new scores and initial scores. This optimization problem can be solved analytically. The experimental study on several real MS/MS data sets shows that this re-ranking method improves the identification performance.

Conclusions

The score regularization method can be used as a general post-processing step for improving peptide identifications. Source codes and data sets are available at: http://bioinformatics.ust.hk/SRPI.rar.",2011-02-15 +22403431,"Automating HIV drug resistance genotyping with RECall, a freely accessible sequence analysis tool.","Genotypic HIV drug resistance testing is routinely used to guide clinical decisions. While genotyping methods can be standardized, a slow, labor-intensive, and subjective manual sequence interpretation step is required. We therefore performed external validation of our custom software RECall, a fully automated sequence analysis pipeline. HIV-1 drug resistance genotyping was performed on 981 clinical samples at the Stanford Diagnostic Virology Laboratory. Sequencing trace files were first interpreted manually by a laboratory technician and subsequently reanalyzed by RECall, without intervention. The relative performances of the two methods were assessed by determination of the concordance of nucleotide base calls, identification of key resistance-associated substitutions, and HIV drug resistance susceptibility scoring by the Stanford Sierra algorithm. RECall is freely available at http://pssm.cfenet.ubc.ca. In total, 875 of 981 sequences were analyzed by both human and RECall interpretation. RECall analysis required minimal hands-on time and resulted in a 25-fold improvement in processing speed (∼150 technician-hours versus ∼6 computation-hours). Excellent concordance was obtained between human and automated RECall interpretation (99.7% agreement for >1,000,000 bases compared). Nearly all discordances (99.4%) were due to nucleotide mixtures being called by one method but not the other. Similarly, 98.6% of key antiretroviral resistance-associated mutations observed were identified by both methods, resulting in 98.5% concordance of resistance susceptibility interpretations. This automated sequence analysis tool provides both standardization of analysis and a significant improvement in data workflow. The time-consuming, error-prone, and dreadfully boring manual sequence analysis step is replaced with a fully automated system without compromising the accuracy of reported HIV drug resistance data.",2012-03-07 +21460061,"Shotgun proteomics aids discovery of novel protein-coding genes, alternative splicing, and ""resurrected"" pseudogenes in the mouse genome.","Recent advances in proteomic mass spectrometry (MS) offer the chance to marry high-throughput peptide sequencing to transcript models, allowing the validation, refinement, and identification of new protein-coding loci. We present a novel pipeline that integrates highly sensitive and statistically robust peptide spectrum matching with genome-wide protein-coding predictions to perform large-scale gene validation and discovery in the mouse genome for the first time. In searching an excess of 10 million spectra, we have been able to validate 32%, 17%, and 7% of all protein-coding genes, exons, and splice boundaries, respectively. Moreover, we present strong evidence for the identification of multiple alternatively spliced translations from 53 genes and have uncovered 10 entirely novel protein-coding genes, which are not covered in any mouse annotation data sources. One such novel protein-coding gene is a fusion protein that spans the Ins2 and Igf2 loci to produce a transcript encoding the insulin II and the insulin-like growth factor 2-derived peptides. We also report nine processed pseudogenes that have unique peptide hits, demonstrating, for the first time, that they are not just transcribed but are translated and are therefore resurrected into new coding loci. This work not only highlights an important utility for MS data in genome annotation but also provides unique insights into the gene structure and propagation in the mouse genome. All these data have been subsequently used to improve the publicly available mouse annotation available in both the Vega and Ensembl genome browsers (http://vega.sanger.ac.uk).",2011-04-01 +21345871,BiC: a web server for calculating bimodality of coexpression between gene and protein networks.,"

Unlabelled

Bimodal patterns of expression have recently been shown to be useful not only in prioritizing genes that distinguish phenotypes, but also in prioritizing network models that correlate with proteomic evidence. In particular, subgroups of strongly coexpressed gene pairs result in an increased variance of the correlation distribution. This variance, a measure of association between sets of genes (or proteins), can be summarized as the bimodality of coexpression (BiC). We developed an online tool to calculate the BiC for user-defined gene lists and associated mRNA expression data. BiC is a comprehensive application that provides researchers with the ability to analyze both publicly available and user-collected array data.

Availability

The freely available web service and the documentation can be accessed at http://gurkan.case.edu/software.

Contact

gurkan@case.edu.",2011-02-23 +22554833,VEGF and angiopoietins in diabetic glomerulopathy: how far for a new treatment?,"Diabetic nephropathy (DN) is the major cause of end-stage renal disease in Western countries and its prevalence continues to increase (United States Renal Data System 2010, http://www.usrds.org/). Treatments currently utilised for DN provide only partial renoprotection, hence the need to identify new targets for therapeutic intervention. Metabolic and haemodynamic abnormalities have been implicated in the pathogenesis of DN, triggering the activation of intracellular signaling molecules that lead to the dysregulation of vascular growth factors and cytokines, such as vascular endothelial growth factor (VEGF) and angiopoietins, important players in the functional and structural regulation of the glomerular filtration barrier. This review focuses on the importance of VEGF-A and angiopoietins in kidney physiology and in the diabetic kidney, exploring their potential therapeutic role in the prevention and delay of diabetic glomerulopathy.",2012-05-01 +23162086,Visualization and Phospholipid Identification (VaLID): online integrated search engine capable of identifying and visualizing glycerophospholipids with given mass.,"

Motivation

Establishing phospholipid identities in large lipidomic datasets is a labour-intensive process. Where genomics and proteomics capitalize on sequence-based signatures, glycerophospholipids lack easily definable molecular fingerprints. Carbon chain length, degree of unsaturation, linkage, and polar head group identity must be calculated from mass to charge (m/z) ratios under defined mass spectrometry (MS) conditions. Given increasing MS sensitivity, many m/z values are not represented in existing prediction engines. To address this need, Visualization and Phospholipid Identification is a web-based application that returns all theoretically possible phospholipids for any m/z value and MS condition. Visualization algorithms produce multiple chemical structure files for each species. Curated lipids detected by the Canadian Institutes of Health Research Training Program in Neurodegenerative Lipidomics are provided as high-resolution structures.

Availability

VaLID is available through the Canadian Institutes of Health Research Training Program in Neurodegenerative Lipidomics resources web site at https://www.med.uottawa.ca/lipidomics/resources.html.

Contacts

lipawrd@uottawa.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-18 +22247275,Roundup 2.0: enabling comparative genomics for over 1800 genomes.,"

Unlabelled

Roundup is an online database of gene orthologs for over 1800 genomes, including 226 Eukaryota, 1447 Bacteria, 113 Archaea and 21 Viruses. Orthologs are inferred using the Reciprocal Smallest Distance algorithm. Users may query Roundup for single-linkage clusters of orthologous genes based on any group of genomes. Annotated query results may be viewed in a variety of ways including as clusters of orthologs and as phylogenetic profiles. Genomic results may be downloaded in formats suitable for functional as well as phylogenetic analysis, including the recent OrthoXML standard. In addition, gene IDs can be retrieved using FASTA sequence search. All source code and orthologs are freely available.

Availability

http://roundup.hms.harvard.edu.",2012-01-13 +23410359,Improved contact prediction in proteins: using pseudolikelihoods to infer Potts models.,"Spatially proximate amino acids in a protein tend to coevolve. A protein's three-dimensional (3D) structure hence leaves an echo of correlations in the evolutionary record. Reverse engineering 3D structures from such correlations is an open problem in structural biology, pursued with increasing vigor as more and more protein sequences continue to fill the data banks. Within this task lies a statistical inference problem, rooted in the following: correlation between two sites in a protein sequence can arise from firsthand interaction but can also be network-propagated via intermediate sites; observed correlation is not enough to guarantee proximity. To separate direct from indirect interactions is an instance of the general problem of inverse statistical mechanics, where the task is to learn model parameters (fields, couplings) from observables (magnetizations, correlations, samples) in large systems. In the context of protein sequences, the approach has been referred to as direct-coupling analysis. Here we show that the pseudolikelihood method, applied to 21-state Potts models describing the statistical properties of families of evolutionarily related proteins, significantly outperforms existing approaches to the direct-coupling analysis, the latter being based on standard mean-field techniques. This improved performance also relies on a modified score for the coupling strength. The results are verified using known crystal structures of specific sequence instances of various protein families. Code implementing the new method can be found at http://plmdca.csc.kth.se/.",2013-01-11 +21551140,Application of the Bayesian MMSE estimator for classification error to gene expression microarray data.,"

Motivation

With the development of high-throughput genomic and proteomic technologies, coupled with the inherent difficulties in obtaining large samples, biomedicine faces difficult small-sample classification issues, in particular, error estimation. Most popular error estimation methods are motivated by intuition rather than mathematical inference. A recently proposed error estimator based on Bayesian minimum mean square error estimation places error estimation in an optimal filtering framework. In this work, we examine the application of this error estimator to gene expression microarray data, including the suitability of the Gaussian model with normal-inverse-Wishart priors and how to find prior probabilities.

Results

We provide an implementation for non-linear classification, where closed form solutions are not available. We propose a methodology for calibrating normal-inverse-Wishart priors based on discarded microarray data and examine the performance on synthetic high-dimensional data and a real dataset from a breast cancer study. The calibrated Bayesian error estimator has superior root mean square performance, especially with moderate to high expected true errors and small feature sizes.

Availability

We have implemented in C code the Bayesian error estimator for Gaussian distributions and normal-inverse-Wishart priors for both linear classifiers, with exact closed-form representations, and arbitrary classifiers, where we use a Monte Carlo approximation. Our code for the Bayesian error estimator and a toolbox of related utilities are available at http://gsp.tamu.edu/Publications/supplementary/dalton11a. Several supporting simulations are also included.

Contact

ldalton@tamu.edu",2011-05-05 +22238261,"SSuMMo: rapid analysis, comparison and visualization of microbial communities.","

Motivation

Next-generation sequencing methods are generating increasingly massive datasets, yet still do not fully capture genetic diversity in the richest environments. To understand such complicated and elusive systems, effective tools are needed to assist with delineating the differences found in and between community datasets.

Results

The Small Subunit Markov Modeler (SSuMMo) was developed to probabilistically assign SSU rRNA gene fragments from any sequence dataset to recognized taxonomic clades, producing consistent, comparable cladograms. Accuracy tests predicted >90% of genera correctly for sequences downloaded from public reference databases. Sequences from a next-generation sequence dataset, sampled from lean, overweight and obese individuals, were analysed to demonstrate parallel visualization of comparable datasets. SSuMMo shows potential as a valuable curatorial tool, as numerous incorrect and outdated taxonomic entries and annotations were identified in public databases.

Availability and implementation

SSuMMo is GPLv3 open source Python software, available at http://code.google.com/p/ssummo/. Taxonomy and HMM databases can be downloaded from http://bioltfws1.york.ac.uk/ssummo/.

Supplementary information

Supplemental materials are available at Bioinformatics Online.",2012-01-11 +21551143,A multiple network learning approach to capture system-wide condition-specific responses.,"

Motivation

Condition-specific networks capture system-wide behavior under varying conditions such as environmental stresses, cell types or tissues. These networks frequently comprise parts that are unique to each condition, and parts that are shared among related conditions. Existing approaches for learning condition-specific networks typically identify either only differences or only similarities across conditions. Most of these approaches first learn networks per condition independently, and then identify similarities and differences in a post-learning step. Such approaches do not exploit the shared information across conditions during network learning.

Results

We describe an approach for learning condition-specific networks that identifies the shared and unique subgraphs during network learning simultaneously, rather than as a post-processing step. Our approach learns networks across condition sets, shares data from different conditions and produces high-quality networks that capture biologically meaningful information. On simulated data, our approach outperformed an existing approach that learns networks independently for each condition, especially for small training datasets. On microarray data of hundreds of deletion mutants in two, yeast stationary-phase cell populations, the inferred network structure identified several common and population-specific effects of these deletion mutants and several high-confidence cases of double-deletion pairs, which can be experimentally tested. Our results are consistent with and extend the existing knowledge base of differentiated cell populations in yeast stationary phase.

Availability and implementation

C++ code can be accessed from http://www.broadinstitute.org/~sroy/condspec/ .",2011-05-05 +23243273,Ontogeny of erythroid gene expression.,"Erythroid ontogeny is characterized by overlapping waves of primitive and definitive erythroid lineages that share many morphologic features during terminal maturation but have marked differences in cell size and globin expression. In the present study, we compared global gene expression in primitive, fetal definitive, and adult definitive erythroid cells at morphologically equivalent stages of maturation purified from embryonic, fetal, and adult mice. Surprisingly, most transcriptional complexity in erythroid precursors is already present by the proerythroblast stage. Transcript levels are markedly modulated during terminal erythroid maturation, but housekeeping genes are not preferentially lost. Although primitive and definitive erythroid lineages share a large set of nonhousekeeping genes, annotation of lineage-restricted genes shows that alternate gene usage occurs within shared functional categories, as exemplified by the selective expression of aquaporins 3 and 8 in primitive erythroblasts and aquaporins 1 and 9 in adult definitive erythroblasts. Consistent with the known functions of Aqp3 and Aqp8 as H2O2 transporters, primitive, but not definitive, erythroblasts preferentially accumulate reactive oxygen species after exogenous H2O2 exposure. We have created a user-friendly Web site (http://www.cbil.upenn.edu/ErythronDB) to make these global expression data readily accessible and amenable to complex search strategies by the scientific community.",2012-12-12 +22591474,Virus-ECC-mPLoc: a multi-label predictor for predicting the subcellular localization of virus proteins with both single and multiple sites based on a general form of Chou's pseudo amino acid composition.,"Protein subcellular localization aims at predicting the location of a protein within a cell using computational methods. Knowledge of subcellular localization of viral proteins in a host cell or virus-infected cell is important because it is closely related to their destructive tendencies and consequences. Prediction of viral protein subcellular localization is an important but challenging problem, particularly when proteins may simultaneously exist at, or move between, two or more different subcellular location sites. Most of the existing protein subcellular localization methods specialized for viral proteins are only used to deal with the single-location proteins. To better reflect the characteristics of multiplex proteins, a new predictor, called Virus-ECC-mPLoc, has been developed that can be used to deal with the systems containing both singleplex and multiplex proteins by introducing a powerful multi-label learning approach which exploits correlations between subcellular locations and by hybridizing the gene ontology information with the dipeptide composition information. It can be utilized to identify viral proteins among the following six locations: (1) viral capsid, (2) host cell membrane, (3) host endoplasmic reticulum, (4) host cytoplasm, (5) host nucleus, and (6) secreted. Experimental results show that the overall success rates thus obtained by Virus-ECC-mPLoc are 86.9% for jackknife test and 87.2% for independent data set test, which are significantly higher than that by any of the existing predictors. As a user-friendly web-server, Virus-ECCmPLoc is freely accessible to the public at the web-site http://levis.tongji.edu.cn:8080/bioinfo/Virus-ECC-mPLoc/.",2013-03-01 +22967795,Improved variation calling via an iterative backbone remapping and local assembly method for bacterial genomes.,"Sequencing data analysis remains limiting and problematic, especially for low complexity repeat sequences and transposon elements due to inherent sequencing errors and short sequence read lengths. We have developed a program, ReviSeq, which uses a hybrid method composed of iterative remapping and local assembly upon a bacterial sequence backbone. Application of this method to six Brucella suis field isolates compared to the newly revised B. suis 1330 reference genome identified on average 13, 15, 19 and 9 more variants per sample than STAMPY/SAMtools, BWA/SAMtools, iCORN and BWA/PINDEL pipelines, and excluded on average 4, 2, 3 and 19 variants per sample, respectively. In total, using this iterative approach, we identified on average 87 variants including SNVs, short INDELs and long INDELs per strain when compared to the reference. Our program outperforms other methods especially for long INDEL calling. The program is available at http://reviseq.sourceforge.net.",2012-08-10 +22617482,Proceedings of the 2012 Rheumatology Winter Clinical Symposia.,"Recent years have witnessed important developments in rheumatology. Novel diagnostic methods, stratification approaches, and treatment paradigms have been brought into the clinic for a number of rheumatologic and autoimmune diseases. In addition, there have been developments in related medical disciplines that are relevant to the care of patients with rheumatic diseases. Keeping pace with these many developments is a challenge, and clinical rheumatologists have used various methods to educate themselves about these advances. In January 2012, the 5th annual Rheumatology Winter Clinical Symposium was held. At this meeting, faculty and participants held discussions and exchanged knowledge about new scientific data and how it may impact the care of rheumatology patients. Excerpts from some of the lectures from the Rheumatology Winter Clinical Symposium 2012 are included in this review. These and other presentations can be viewed in their entirety at http://www.r-w-c-s.com.",2012-06-01 +21325302,Proteomics to go: Proteomatic enables the user-friendly creation of versatile MS/MS data evaluation workflows.,"

Unlabelled

We present Proteomatic, an operating system independent and user-friendly platform that enables the construction and execution of MS/MS data evaluation pipelines using free and commercial software. Required external programs such as for peptide identification are downloaded automatically in the case of free software. Due to a strict separation of functionality and presentation, and support for multiple scripting languages, new processing steps can be added easily.

Availability and implementation

Proteomatic is implemented in C++/Qt, scripts are implemented in Ruby, Python and PHP. All source code is released under the LGPL. Source code and installers for Windows, Mac OS X, and Linux are freely available at http://www.proteomatic.org.

Contact

michael.specht@uni-muenster.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-16 +23468606,Spartan: a comprehensive tool for understanding uncertainty in simulations of biological systems.,"Integrating computer simulation with conventional wet-lab research has proven to have much potential in furthering the understanding of biological systems. Success requires the relationship between simulation and the real-world system to be established: substantial aspects of the biological system are typically unknown, and the abstract nature of simulation can complicate interpretation of in silico results in terms of the biology. Here we present spartan (Simulation Parameter Analysis RToolkit ApplicatioN), a package of statistical techniques specifically designed to help researchers understand this relationship and provide novel biological insight. The tools comprising spartan help identify which simulation results can be attributed to the dynamics of the modelled biological system, rather than artefacts of biological uncertainty or parametrisation, or simulation stochasticity. Statistical analyses reveal the influence that pathways and components have on simulation behaviour, offering valuable biological insight into aspects of the system under study. We demonstrate the power of spartan in providing critical insight into aspects of lymphoid tissue development in the small intestine through simulation. Spartan is released under a GPLv2 license, implemented within the open source R statistical environment, and freely available from both the Comprehensive R Archive Network (CRAN) and http://www.cs.york.ac.uk/spartan. The techniques within the package can be applied to traditional ordinary or partial differential equation simulations as well as agent-based implementations. Manuals, comprehensive tutorials, and example simulation data upon which spartan can be applied are available from the website.",2013-02-28 +23586520,Predicting substrates of the human breast cancer resistance protein using a support vector machine method.,"

Background

Human breast cancer resistance protein (BCRP) is an ATP-binding cassette (ABC) efflux transporter that confers multidrug resistance in cancers and also plays an important role in the absorption, distribution and elimination of drugs. Prediction as to if drugs or new molecular entities are BCRP substrates should afford a cost-effective means that can help evaluate the pharmacokinetic properties, efficacy, and safety of these drugs or drug candidates. At present, limited studies have been done to develop in silico prediction models for BCRP substrates. In this study, we developed support vector machine (SVM) models to predict wild-type BCRP substrates based on a total of 263 known BCRP substrates and non-substrates collected from literature. The final SVM model was integrated to a free web server.

Results

We showed that the final SVM model had an overall prediction accuracy of ~73% for an independent external validation data set of 40 compounds. The prediction accuracy for wild-type BCRP substrates was ~76%, which is higher than that for non-substrates. The free web server (http://bcrp.althotas.com) allows the users to predict whether a query compound is a wild-type BCRP substrate and calculate its physicochemical properties such as molecular weight, logP value, and polarizability.

Conclusions

We have developed an SVM prediction model for wild-type BCRP substrates based on a relatively large number of known wild-type BCRP substrates and non-substrates. This model may prove valuable for screening substrates and non-substrates of BCRP, a clinically important ABC efflux drug transporter.",2013-04-15 +22166371,Towards cross-lingual alerting for bursty epidemic events.,"

Background

Online news reports are increasingly becoming a source for event-based early warning systems that detect natural disasters. Harnessing the massive volume of information available from multilingual newswire presents as many challanges as opportunities due to the patterns of reporting complex spatio-temporal events.

Results

In this article we study the problem of utilising correlated event reports across languages. We track the evolution of 16 disease outbreaks using 5 temporal aberration detection algorithms on text-mined events classified according to disease and outbreak country. Using ProMED reports as a silver standard, comparative analysis of news data for 13 languages over a 129 day trial period showed improved sensitivity, F1 and timeliness across most models using cross-lingual events. We report a detailed case study analysis for Cholera in Angola 2010 which highlights the challenges faced in correlating news events with the silver standard.

Conclusions

The results show that automated health surveillance using multilingual text mining has the potential to turn low value news into high value alerts if informed choices are used to govern the selection of models and data sources. An implementation of the C2 alerting algorithm using multilingual news is available at the BioCaster portal http://born.nii.ac.jp/?page=globalroundup.",2011-10-06 +23184517,Fast algorithm for population-based protein structural model analysis.,"De novo protein structure prediction often generates a large population of candidates (models), and then selects near-native models through clustering. Existing structural model clustering methods are time consuming due to pairwise distance calculation between models. In this paper, we present a novel method for fast model clustering without losing the clustering accuracy. Instead of the commonly used pairwise root mean square deviation and TM-score values, we propose two new distance measures, Dscore1 and Dscore2, based on the comparison of the protein distance matrices for describing the difference and the similarity among models, respectively. The analysis indicates that both the correlation between Dscore1 and root mean square deviation and the correlation between Dscore2 and TM-score are high. Compared to the existing methods with calculation time quadratic to the number of models, our Dscore1-based clustering achieves a linearly time complexity while obtaining almost the same accuracy for near-native model selection. By using Dscore2 to select representatives of clusters, we can further improve the quality of the representatives with little increase in computing time. In addition, for large size (~500 k) models, we can give a fast data visualization based on the Dscore distribution in seconds to minutes. Our method has been implemented in a package named MUFOLD-CL, available at http://mufold.org/clustering.php.",2013-01-03 +22219203,PASSion: a pattern growth algorithm-based pipeline for splice junction detection in paired-end RNA-Seq data.,"

Motivation

RNA-seq is a powerful technology for the study of transcriptome profiles that uses deep-sequencing technologies. Moreover, it may be used for cellular phenotyping and help establishing the etiology of diseases characterized by abnormal splicing patterns. In RNA-Seq, the exact nature of splicing events is buried in the reads that span exon-exon boundaries. The accurate and efficient mapping of these reads to the reference genome is a major challenge.

Results

We developed PASSion, a pattern growth algorithm-based pipeline for splice site detection in paired-end RNA-Seq reads. Comparing the performance of PASSion to three existing RNA-Seq analysis pipelines, TopHat, MapSplice and HMMSplicer, revealed that PASSion is competitive with these packages. Moreover, the performance of PASSion is not affected by read length and coverage. It performs better than the other three approaches when detecting junctions in highly abundant transcripts. PASSion has the ability to detect junctions that do not have known splicing motifs, which cannot be found by the other tools. Of the two public RNA-Seq datasets, PASSion predicted ≈ 137,000 and 173,000 splicing events, of which on average 82 are known junctions annotated in the Ensembl transcript database and 18% are novel. In addition, our package can discover differential and shared splicing patterns among multiple samples.

Availability

The code and utilities can be freely downloaded from https://trac.nbic.nl/passion and ftp://ftp.sanger.ac.uk/pub/zn1/passion.",2012-01-04 +23734783,Bellerophon: a hybrid method for detecting interchromosomal rearrangements at base pair resolution using next-generation sequencing data.,"

Background

Somatically-acquired translocations may serve as important markers for assessing the cause and nature of diseases like cancer. Algorithms to locate translocations may use next-generation sequencing (NGS) platform data. However, paired-end strategies do not accurately predict precise translocation breakpoints, and ""split-read"" methods may lose sensitivity if a translocation boundary is not captured by many sequenced reads. To address these challenges, we have developed ""Bellerophon"", a method that uses discordant read pairs to identify potential translocations, and subsequently uses ""soft-clipped"" reads to predict the location of the precise breakpoints. Furthermore, for each chimeric breakpoint, our method attempts to classify it as a participant in an unbalanced translocation, balanced translocation, or interchromosomal insertion.

Results

We compared Bellerophon to four previously published algorithms for detecting structural variation (SV). Using two simulated datasets and two prostate cancer datasets, Bellerophon had overall better performance than the other methods. Furthermore, our method accurately predicted the presence of the interchromosomal insertions placed in our simulated dataset, which is an ability that the other SV prediction programs lack.

Conclusions

The combined use of paired reads and soft-clipped reads allows Bellerophon to detect interchromosomal breakpoints with high sensitivity, while also mitigating losses in specificity. This trend is seen across all datasets examined. Because it does not perform assembly on soft-clipped subreads, Bellerophon may be limited in experiments where sequence read lengths are short.

Availability

The program can be downloaded from http://cbc.case.edu/Bellerophon.",2013-04-10 +22578220,Genomic characterisation of acral melanoma cell lines.,"Acral melanoma is a rare melanoma subtype with distinct epidemiological, clinical and genetic features. To determine if acral melanoma cell lines are representative of this melanoma subtype, six lines were analysed by whole-exome sequencing and array comparative genomic hybridisation. We demonstrate that the cell lines display a mutation rate that is comparable to that of published primary and metastatic acral melanomas and observe a mutational signature suggestive of UV-induced mutagenesis in two of the cell lines. Mutations were identified in oncogenes and tumour suppressors previously linked to melanoma including BRAF, NRAS, KIT, PTEN and TP53, in cancer genes not previously linked to melanoma and in genes linked to DNA repair such as BRCA1 and BRCA2. Our findings provide strong circumstantial evidence to suggest that acral melanoma cell lines and acral tumours share genetic features in common and that these cells are therefore valuable tools to investigate the biology of this aggressive melanoma subtype. Data are available at: http://rock.icr.ac.uk/collaborations/Furney_et_al_2012/.",2012-07-01 +22230096,ProFASTA: a pipeline web server for fungal protein scanning with integration of cell surface prediction software.,"Surface proteins, such as those located in the cell wall of fungi, play an important role in the interaction with the surrounding environment. For instance, they mediate primary host-pathogen interactions and are crucial to the establishment of biofilms and fungal infections. Surface localization of proteins is determined by specific sequence features and can be predicted by combining different freely available web servers. However, user-friendly tools that allow rapid analysis of large datasets (whole proteomes or larger) in subsequent analyses were not yet available. Here, we present the web tool ProFASTA, which integrates multiple tools for rapid scanning of protein sequence properties in large datasets and returns sequences in FASTA format. ProFASTA also allows for pipeline filtering of proteins with cell surface characteristics by analysis of the output created with SignalP, TMHMM and big-PI. In addition, it provides keyword, iso-electric point, composition and pattern scanning. Furthermore, ProFASTA contains all fungal protein sequences present in the NCBI Protein database. As the full fungal NCBI Taxonomy is included, sequence subsets can be selected by supplying a taxon name. The usefulness of ProFASTA is demonstrated here with a few examples; in the recent past, ProFASTA has already been applied successfully to the annotation of covalently-bound fungal wall proteins as part of community-wide genome annotation programs. ProFASTA is available at: http://www.bioinformatics.nl/tools/profasta/.",2012-01-03 +24334390,Characterizing the topology of probabilistic biological networks.,"

Unlabelled

Biological interactions are often uncertain events, that may or may not take place with some probability. This uncertainty leads to a massive number of alternative interaction topologies for each such network. The existing studies analyze the degree distribution of biological networks by assuming that all the given interactions take place under all circumstances. This strong and often incorrect assumption can lead to misleading results. In this paper, we address this problem and develop a sound mathematical basis to characterize networks in the presence of uncertain interactions. Using our mathematical representation, we develop a method that can accurately describe the degree distribution of such networks. We also take one more step and extend our method to accurately compute the joint-degree distributions of node pairs connected by edges. The number of possible network topologies grows exponentially with the number of uncertain interactions. However, the mathematical model we develop allows us to compute these degree distributions in polynomial time in the number of interactions. Our method works quickly even for entire protein-protein interaction (PPI) networks. It also helps us find an adequate mathematical model using MLE. We perform a comparative study of node-degree and joint-degree distributions in two types of biological networks: the classical deterministic networks and the more flexible probabilistic networks. Our results confirm that power-law and log-normal models best describe degree distributions for both probabilistic and deterministic networks. Moreover, the inverse correlation of degrees of neighboring nodes shows that, in probabilistic networks, nodes with large number of interactions prefer to interact with those with small number of interactions more frequently than expected. We also show that probabilistic networks are more robust for node-degree distribution computation than the deterministic ones.

Availability

all the data sets used, the software implemented and the alignments found in this paper are available at http://bioinformatics.cise.ufl.edu/projects/probNet/.",2013-07-01 +22350309,Triple-anchoring sub-SMAS face-lift.,"BACKGROUND: This article presents the senior author's (ARB) 10-year experience with a rhytidectomy technique that incorporates concepts of modern facial shaping and contour using a sub-SMAS repositioning of volumetric units with partial sub-SMAS elevation (tunnel dissection), periosteal anchoring of SMAS and volumetric units, and limited skin undermining. METHODS: The cases of triple-anchoring sub-SMAS face-lift performed from January 2000 to January 2010 were analyzed retrospectively. Primary and secondary cases were included. All case data and photography were analyzed. Data regarding patient age, sex, and operative time were gathered. Complication rates were also calculated. RESULTS: A total of 626 patients had a face-lift using the author's technique in this period. A total of 484 patients had a primary face-lift and 142 had secondary surgery. Experience with 626 rhytidectomies performed over a 10-year period indicates that this operation can be completed safely with satisfactory results and a low incidence of complications. To alter effectively the facial contour, a system that reliably elevates the facial volumetric units is needed. After adequate release of the retaining ligaments, the thick SMAS is used as a load-bearing layer. All the tension is transferred from the SMAS to a fixed periosteal anchor point. Each volumetric unit of the face (mala, lateral, and cervical) should be independently mobilized and fixed. CONCLUSION: The author's technique was shown to be effective in repositioning facial volumes. It was a predictable and reliable technique with few complications over the years. LEVEL OF EVIDENCE IV: This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors at http://www.springer.com/00266.",2012-02-21 +23428641,EBSeq: an empirical Bayes hierarchical model for inference in RNA-seq experiments.,"

Motivation

Messenger RNA expression is important in normal development and differentiation, as well as in manifestation of disease. RNA-seq experiments allow for the identification of differentially expressed (DE) genes and their corresponding isoforms on a genome-wide scale. However, statistical methods are required to ensure that accurate identifications are made. A number of methods exist for identifying DE genes, but far fewer are available for identifying DE isoforms. When isoform DE is of interest, investigators often apply gene-level (count-based) methods directly to estimates of isoform counts. Doing so is not recommended. In short, estimating isoform expression is relatively straightforward for some groups of isoforms, but more challenging for others. This results in estimation uncertainty that varies across isoform groups. Count-based methods were not designed to accommodate this varying uncertainty, and consequently, application of them for isoform inference results in reduced power for some classes of isoforms and increased false discoveries for others.

Results

Taking advantage of the merits of empirical Bayesian methods, we have developed EBSeq for identifying DE isoforms in an RNA-seq experiment comparing two or more biological conditions. Results demonstrate substantially improved power and performance of EBSeq for identifying DE isoforms. EBSeq also proves to be a robust approach for identifying DE genes.

Availability and implementation

An R package containing examples and sample datasets is available at http://www.biostat.wisc.edu/kendzior/EBSEQ/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-21 +24247797,Cholesterol level in human serum: seasonal variations and differences in 14 distant regions.,"

Introduction

To investigate differences between regions, a whole year's serum cholesterol results (304,156 data points) from 14 distant Polish laboratories (4,311 - 67,167 results) were used. The JEG, an original statistical method (http://ibib.waw.pl/JEGen.html) gives three levels of the Gaussian part of any analyzed dataset (Gaussian Reference Intervals, GRIs): low (GRImin), modal or middle (GRIopt), and high (GRImax). The quality was proven with reproducibility of 52 groups of week's sample collections in any laboratory; CV% was a mean of 5.73 and not worse than 8.29. The same groups were used to state the variation profiles for all three levels. The modal (GRIopt) serum cholesterol values were 4.73-5.72 mMol/L for women and 4.55-5.508 mMol/L for men. Sex and age dependency of the results were analyzed on 36-38 sets of 5-year age classes and showed, that there were more female results in childhood and in the older age classes, but in the youth age classes males dominated. The annual seasonal variation profile curves appeared as very variable, from M-letter forms through flat ones to W-letter forms. The shape of the variation curve was not stable in any particular laboratory and could vary from year to year to the opposite form. All results demonstrate the variability of cholesterol level in any analyzed subpopulation.",2013-01-01 +26834391,Natural antimicrobial peptides as promising anti-HIV candidates.,"Human immunodeficiency virus type 1 (HIV-1) infection remains to be one of the major global health problems. It is thus necessary to identify novel therapeutic molecules to combat HIV-1. Natural antimicrobial peptides (AMPs) have been recognized as promising templates for developing topical microbicides. This review systematically discusses over 80 anti-HIV peptides annotated in the antimicrobial peptide database (http://aps.unmc.edu/AP). Such peptides have been discovered from bacteria, plants, and animals. Examples include gramicidin and bacteriocins from bacteria, cyclotides from plants, melittins and cecropins from insects, piscidins from fish, ascaphins, caerins, dermaseptins, esculentins, and maximins from amphibians, and cathelicidins and defensins from vertebrates. These peptides appear to work by different mechanisms and could block viral entry in multiple ways. As additional advantages, such anti-HIV peptides may possess other desired features such as antibacterial, antiparasital, spermicidal, and anticancer activity. With continued optimization of peptide stability, production, formulation and delivery methods, it is anticipated that some of these compounds may eventually become new anti-HIV drugs.",2012-01-01 +22961451,Identifying cancer highly-expressed membrane receptors for targeted drug delivery.,"Currently, the accompanying side effects of anti-cancer drugs owing to incorrect delivery to normal tissues should be reduced. We present a database (MRTDD) with identified cancer highly-expressed membrane receptors (CHMRs) which can be used in targeted drug delivery. To evaluate the probability of occurrence of incorrect delivery, we calculate tissue index for each CHMR and expect to identify good candidates. The information provided includes: (1) genomic annotations; (2) gene expression profiles of membrane receptors in cancer tissue vs. corresponding normal tissue, normal tissues of body and cancer cell-lines; (3) available antibody services of manufacturers. MRTDD is available at http://mrtdd.mbc.nctu.edu.tw/.",2012-01-01 +23446782,A review of pharmacogenetics of adverse drug reactions in elderly people.,"Older adults are more susceptible to the prevalence of therapeutic failure and adverse drug reactions (ADRs). Recent advances in genomic research have shed light on the crucial role of genetic variants, mainly involving genes encoding drug-metabolizing enzymes, drug transporters and genes responsible for a compound's mechanism of action, in driving different treatment responses among individuals, in terms of therapeutic efficacy and safety. The interindividual variations of these genes may account for the differences observed in drug efficacy and the appearance of ADRs in elderly people. The advent of whole genome mapping techniques has allowed researchers to begin to characterize the genetic components underlying serious ADRs. The identification and validation of these genetic markers will enable the screening of patients at risk of serious ADRs and to establish personalized treatment regimens.The aim of this review was to provide an update on the recent developments in geriatric pharmacogenetics in clinical practice by reviewing the available evidence in the PubMed database to September 2012. A Pubmed search was performed (years 1999-2012) using the following two search strategies: ('pharmacogenomic' OR 'pharmacogenetic ') AND ('geriatric' or 'elderly ') AND 'adverse drug reactions'; [gene name] AND ('geriatric' or 'elderly ') AND 'adverse drug reactions', in which the gene names were those contained in the Table of Pharmacogenomic Biomarkers in Drug Labels published online by the US Food and Drug Administration ( http://www.fda.gov/drugs/scienceresearch/researchareas/pharmacogenetics/ucm083378.htm ). Reference lists of included original articles and relevant review articles were also screened. The search was limited to studies published in the English language.",2012-01-01 +22486148,Design and implementation of a web directory for medical education (WDME): a tool to facilitate research in medical education.,"

Background

Access to the medical resources on the web is one of current challenges for researchers and medical science educators. The purpose of current project was to design and implement a comprehensive and specific subject/web directory of medical education.

Methods

First, the categories to be incorporated in the directory were defined through reviewing related directories and obtaining medical education experts' opinions in a focus group. Then, number of sources such as (Meta) search engines, subject directories, databases and library catalogs searched/browsed for selecting and collecting high quality resources. Finally, the website was designed and the resources were entered into the directory.

Results

The main categories incorporating WDME resources are: Journals, Organizations, Best Evidence in Medical Education, and Textbooks. Each category is divided into sub-categories and related resources of each category are described shortly within it. The resources in this directory could be accessed both by browsing and keyword searching. WDME is accessible on http://medirectory.org.

Conclusions

The innovative Web Directory for Medical Education (WDME) presented in this paper, is more comprehensive than other existing directories, and expandable through user suggestions. It may help medical educators to find their desirable resources more quickly and easily; hence have more informed decisions in education.",2012-01-01 +30731874,First Report of Plasmopara obducens Causing Downy Mildew on Impatiens walleriana in Hungary.,"In May 2011, young impatiens plants (Impatiens walleriana Hook.f.) showing downy mildew symptoms were collected from a greenhouse in Kecskemét, Hungary. The disease was later reported from different parts of the country from a number of different cultivars. The upper surface of affected leaves turned yellow and white fungal-like growth was observed on the underside. Diseased plants wilted and rapidly collapsed, resulting in losses of nearly 100%. Appearance of the disease caused a loss of approximately 2,000 euros for the growers in Kecskemét. In Hungary, losses for growers and consumers could have reached half a million euros. Downy mildew of impatiens can be caused by two pathogens, Plasmopara obducens or Bremiella sphaerosperma (1). P. obducens differs from B. sphaerosperma by monopodial sporangiophores with straight branches and the lack of apical thickening on the branchlets. Sporangia of B. sphaerosperma are spherical, while those of P. obducens are subspherical. Sporangiophores of the fungus were colorless with straight trunks and monopodially branched four to seven times. Sporangiophores ended with three apical branchlets at right angles to the main axis, measuring 4.6 to 16.4 μm (average 9.6 μm). The ovoid and hyaline sporangia measured 11.04 (7.7 to 13.8) × 13.9 (9.9 to 17.4) μm. For molecular identification, total DNA was extracted from the pathogen scraped from the leaves using a cetyltrimethylammoniumbromide (CTAB) extraction method. The 5'-end of the large subunit of ribosomal RNA gene was amplified by PCR using NL1 and NL4 primers (3) and cloned and sequenced. The nucleotide sequence of IWPO-H1 Hungarian isolate (GenBank Accession No. HE577169) showed highest identity with Accession Nos. AY587558 and HQ246451 from the United Kingdom and Serbia (2), respectively, with 99.72% identity. On the basis of the symptoms and molecular and morphological characters, the pathogen was identified as P. obducens. Pathogenicity was confirmed by spraying young I. walleriana plants with a sporangial suspension (1 × 104 sporangiospores ml-1). Control plants were sprayed with sterile water. Plants were incubated at room temperature under glass vessels at approximately 90% relative humidity. Symptoms of downy mildew occurred 12 to 16 days after inoculation, while control plants remained healthy. Presence of P. obducens was reported from Austria, Australia, Bulgaria, China, the Czech Republic, Denmark, Germany, Finland, Korea, Lithuania, Norway, Pakistan, Poland, Romania, Russia, the United Kingdom, the United States, and recently from Serbia (2). To our knowledge, this is the first report of downy mildew of I. walleriana caused by P. obducens in Hungary. The appearance of P. obducens in Hungary seriously endangers the production of I. walleriana, which is an important and popular ornamental in gardens and city parks. References: (1) O. Constantinescu. Mycologia 83:473, 1991. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , September 1, 2011. (3) K. L. O'Donnell. The Fungal Holomorph: Mitotic, Meiotic and Pleomorphic Speciation in Fungal Systematics. CAB International, Wallingford, UK, 1993.",2012-01-01 +25165759,The Polish collection of parasitic helminths (a report on realization of works concerning fusion of parasitic collections dispersed among different scientific institutions).,"The paper presents the results of works on preparation of a proper museum collection of parasitic helminths gathered by the Polish scientists and dispersed in various scientific institutions. The collection composed of 11 author's collections and a special collection of available typical series of species described by the Polish authors has been deposited in the Museum of Natural History of Wroclaw University. It includes almost 16.000 slides with 90 taxons of Monogenea, 251 Trematoda, 144 Cestoda, 43 Nematoda, 11 Acanthocephala, in total 486 nominal species and 53 taxons of genus level. Among them 12 species appear new for fauna of Poland; for 57 species new hosts in Polish territory have been recorded; two new species of Cestoda have been described in separate publications. A special paragraph contains a checklist of all species described by the Polish scientists with information whether they are available in any other collection. Detailed information about every deposited slide is given in the Database available online at http://www.helminths.eu .",2012-01-01 +22799759,Age-specific MRI templates for pediatric neuroimaging.,"This study created a database of pediatric age-specific magnetic resonance imaging (MRI) brain templates for normalization and segmentation. Participants included children from 4.5 through 19.5 years, totaling 823 scans from 494 subjects. Open-source processing programs (FMRIB Software Library, Statistical Parametric Mapping, Advanced Normalization Tools [ANTS]) constructed head, brain, and segmentation templates in 6-month intervals. The tissue classification (white matter [WM], gray matter [GM], cerebrospinal fluid) showed changes over age similar to previous reports. A volumetric analysis of age-related changes in WM and GM based on these templates showed expected increase/decrease pattern in GM and an increase in WM over the sampled ages. This database is available for use for neuroimaging studies (http://jerlab.psych.sc.edu/neurodevelopmentalmridatabase).",2012-01-01 +22638586,Super: a web server to rapidly screen superposable oligopeptide fragments from the protein data bank.,"Searching for well-fitting 3D oligopeptide fragments within a large collection of protein structures is an important task central to many analyses involving protein structures. This article reports a new web server, Super, dedicated to the task of rapidly screening the protein data bank (PDB) to identify all fragments that superpose with a query under a prespecified threshold of root-mean-square deviation (RMSD). Super relies on efficiently computing a mathematical bound on the commonly used structural similarity measure, RMSD of superposition. This allows the server to filter out a large proportion of fragments that are unrelated to the query; >99% of the total number of fragments in some cases. For a typical query, Super scans the current PDB containing over 80,500 structures (with ∼40 million potential oligopeptide fragments to match) in under a minute. Super web server is freely accessible from: http://lcb.infotech.monash.edu.au/super.",2012-05-25 +30731883,First Report of Mango Dieback Caused by Pseudofusicoccum stromaticum in Brazil.,"From September to December 2010, mango (Mangifera indica L.) stems showing dieback symptoms were collected during a survey conducted in São Francisco Valley, northeastern Brazil. Small pieces (4 to 5 mm) of necrotic tissues were surface sterilized for 1 min in 1.5% NaOCl, washed twice with sterile distilled water, and plated onto potato dextrose agar (PDA) amended with 0.5 g liter-1 streptomycin sulfate. Plates were incubated at 25°C in the dark for 14 to 21 days and colonies that were morphologically similar to species of Botryosphaeriaceae were transferred to PDA. Colonies developed a compact mycelium that was initially white, but becoming gray dark after 4 to 6 days of incubation at 25°C in darkness. Identification was made using morphological characteristics and DNA based molecular techniques. Pycnidia were obtained on 2% water agar with sterilized pine needles as substratum after 3 weeks of incubation at 25°C under near-UV light. Pycnidia were large, multilocular, eustromatic, covered with hyphae; locule totally embedded without ostioles, locule walls consisting of a dark brown textura angularis, becoming thinner and hyaline toward the conidiogenous region. Conidia were hyaline, thin to slightly thickened walled, aseptate, with granular contents, bacilliform, straight to slightly curved, apex and base both bluntly rounded or just blunt, 15.6 to 25.0 (20.8) μm long, and 2.7 to 7.9 (5.2) μm wide, length/width = 4.00. According to these morphological characteristics, three isolates (CMM1364, CMM1365, and CMM1450) were identified as Pseudofusicoccum stromaticum (1,3,4). PCR amplification by universal primers (ITS4/ITS5) and DNA sequencing of the internal transcribed spacer (ITS1-5.8S-ITS2 rRNA gene cluster) were conducted to confirm the identifications through BLAST searches in GenBank. The isolates were 100% homologous with P. stromaticum (3) (GenBank Accession Nos. AY693974 and DQ436935). Representative sequences of the isolates were deposited in GenBank (Accession Nos. JF896432, JF966392, and JF966393). Pathogenicity tests were conducted with the P. stromaticum strains on 5-month-old mango seedlings (cv. Tommy Atkins). Mycelial plugs taken from the margin of actively growing colonies (PDA) of each isolate were applied in shallow wounds (0.4 cm in diameter) on the stem (center) of each plant. Inoculation wounds were wrapped with Parafilm. Control seedlings received sterile PDA plugs. Inoculated and control seedlings (five each) were kept in a greenhouse at 25 to 30°C. After 5 weeks, all inoculated seedlings showed leaf wilting, drying out of the branches, and necrotic lesions in the stems. No symptoms were observed in the control plants. P. stromaticum was successfully reisolated from symptomatic plants to fulfill Koch's postulates. P. stromaticum was described from Acacia, Eucalyptus, and Pinus trees in Venezuela (3,4), and there are no reports of this fungus in other hosts (2). To our knowledge, this is the first report of P. stromaticum causing mango dieback in Brazil and worldwide. References: (1) P. W. Crous et al. Stud. Mycol. 55:235, 2006. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 18 May 2011. (3) S. Mohali et al. Mycol. Res. 110:405, 2006. (4) S. R. Mohali et al. Fungal Divers. 25:103, 2007.",2012-01-01 +22990488,YgiW homologous gene from Pseudomonas aeruginosa 25W is responsible for tributyltin resistance.,"A tributyltin (TBT) resistance gene was isolated from the TBT-resistant marine origin bacterium Pseudomonas aeruginosa 25W. This gene was identical to PA0320 deposited in the P. aeruginosa PAO1 database (http://www.pseudomonas.com). The deduced amino acid sequence of PA0320 appears to be homologous to the YgiW proteins of Escherichia coli and Salmonella enterica. The deletion mutant of PA0320 showed a reduction of growth rate in the presence of TBT. A susceptibility test to cadmium, mercury, hydrogen peroxide and acidic pH in the deletion mutant showed an increasing susceptibility to them. PA0320 plays a certain role in stress tolerance against TBT as well as in stressors producing reactive oxygen species.",2012-01-01 +22907343,Use of IMGT(®) databases and tools for antibody engineering and humanization.,"IMGT(®), the international ImMunoGeneTics information system(®) (http://www.imgt.org), was created in 1989 to manage the huge diversity of the antigen receptors, immunoglobulins (IG) or antibodies, and T cell receptors (TR). Standardized sequence and structure analysis of antibody using IMGT(®) databases and tools allows one to bridge, for the first time, the gap between antibody sequences and three-dimensional (3D) structures. This is achieved through the IMGT Scientific chart rules, based on the IMGT-ONTOLOGY concepts of classification (IMGT gene and allele nomenclature), description (IMGT standardized labels), and numerotation (IMGT unique numbering and IMGT Colliers de Perles). IMGT(®) is the international reference for immunogenetics and immunoinformatics and its standards are particularly useful for antibody humanization and evaluation of immunogenicity. IMGT(®) databases for antibody nucleotide sequences and genes include IMGT/LIGM-DB and IMGT/GENE-DB, respectively, whereas nucleotide sequence analysis is performed by the IMGT/V-QUEST, IMGT/HighV-QUEST, and IMGT/JunctionAnalysis tools. In this chapter, we focus on IMGT(®) databases and tools for amino acid sequences, two-dimensional (2D) and three-dimensional (3D) structures: the IMGT/DomainGapAlign and IMGT/Collier-de-Perles tools, the IMGT/2Dstructure-DB database for amino acid sequences of monoclonal antibodies (mAb, suffix -mab) and fusion proteins for immune applications (FPIA, suffix -cept) of the World Health Organization/International Nonproprietary Name (WHO/INN) programme and other proteins of interest, and the IMGT/3Dstructure-DB database for crystallized antibodies and its associated tools (IMGT/StructuralQuery, IMGT/DomainSuperimpose).",2012-01-01 +24107548,Analysis of MAT3 gene expression in NSCLC.,"

Background

Many studies have suggested different roles of Metastasis-associated protein 3 (MAT3) in different types of human cancers. However, expression of MAT3 in primary lung cancer and its relationship with clinicopathological factors have not been examined and the biological roles of MTA3 in lung cancer cells are still unclear.

Methods

The expression of MAT3 mRNA and protein were detected with quantitative real-time RT-PCR and immunohistochemical methods in 118 NSCLC samples and corresponding non-neoplastic samples. Survival curves were made with follow-up data. The relations of the prognosis with clinical and pathological characteristics were analyzed.

Results

The expression level of MAT3 mRNA and the positive rate of MAT3 protein were significantly higher in NSCLC samples than that in non-neoplastic samples, and in NSCLC samples with lymph node metastasis than that in NSCLC samples without lymph node metastasis (P < 0.01). MAT3 mRNA expression level was a risk factor of lymph node metastasis in patients with NSCLC (P = 0.006). There were significant differences in survival curves between lymph node metastatic group and non-metastatic group (P = 0.000), among groups of MAT3 positive and negative (P = 0.000), among groups of TNM stage I, II and III (P = 0.000) and among groups of tumor status T1, T2 and T3T4 (P = 0.000); but no statistical significance between male patients and female patients (P = 0.516), between ≥ 60 years old patients and <60 years old patients (P = 0.133), between histology types adenocarcinoma and squamous cell carcinoma (P = 0.865) and between well differentiation and moderate-poor differentiation (P = 0.134). The level of MAT3 mRNA (P = 0.000) and protein (P = 0.000) were risk factors of survival.

Conclusion

Our study showed that MAT3 over-expression in NSCLC tissue, and MAT3 mRNA level is a risk factor of lymph node metastasis. The level of MAT3 mRNA and protein were risk factors of survival in patients with NSCLC. It suggested that this antigen could be used as a simple and efficient parameter with which to identify high-risk patients.

Virtual slides

The virtual slides for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/5585901065503943.",2013-10-09 +23467571,"""PULS."" - a blog-based online-magazine for students of medicine of the Goethe University Frankfurt.","In the context of nationwide protests 2009 also students of the faculty of medicine/dentistry at Goethe-University in Frankfurt demanded more transparency and communication. To satisfy these demands, a web 2.0-tool offered an innovative solution: A blog-based online-magazine for students and other faculty-members. The online-magazine ""PULS."" is realized with the share-ware blog-software (wordpress version 3.1.3) and is conceived and written by an online-journalist. ""PULS."" is available from https://newsmagazin.puls.med.uni-frankfurt.de/wp/. The articles are generated from own investigations and from ideas of different groups of the faculty- deanship, students and lecturers. A user-analysis is conducted with the open-source software Piwik and considers the data security. Additionally, every year an anonymous online-user-survey (Survey Monkey) is conducted. ""PULS."" is continuously online since 14.02.2010 and has published 806 articles (state: 27.11.2012) and has about 2400 readers monthly. The content focuses on the needs of Frankfurt medical students. The close cooperation with different groups of the faculty - deanship, students and lecturers - furthermore guarantees themes relevant to the academic faculty. ""PULS."" flanks complex projects and decisions with background-information and communicates them understandable. The user-evaluation shows a growing number of readers and a high acceptance for the online-magazine, its themes and its style. The web 2.0-tool ""Blog"" and the web-specific language comply with media habits of the main target group, the students of the faculty medicine/dentistry. Thus, ""PULS."" has proven as a suitable and strategic instrument. It pushes towards a higher transparency, more communication and a stronger identification of the students with their faculty.",2013-02-21 +23902469,Reconstruction of viral population structure from next-generation sequencing data using multicommodity flows.,"

Background

Highly mutable RNA viruses exist in infected hosts as heterogeneous populations of genetically close variants known as quasispecies. Next-generation sequencing (NGS) allows for analysing a large number of viral sequences from infected patients, presenting a novel opportunity for studying the structure of a viral population and understanding virus evolution, drug resistance and immune escape. Accurate reconstruction of genetic composition of intra-host viral populations involves assembling the NGS short reads into whole-genome sequences and estimating frequencies of individual viral variants. Although a few approaches were developed for this task, accurate reconstruction of quasispecies populations remains greatly unresolved.

Results

Two new methods, AmpMCF and ShotMCF, for reconstruction of the whole-genome intra-host viral variants and estimation of their frequencies were developed, based on Multicommodity Flows (MCFs). AmpMCF was designed for NGS reads obtained from individual PCR amplicons and ShotMCF for NGS shotgun reads. While AmpMCF, based on covering formulation, identifies a minimal set of quasispecies explaining all observed reads, ShotMCS, based on packing formulation, engages the maximal number of reads to generate the most probable set of quasispecies. Both methods were evaluated on simulated data in comparison to Maximum Bandwidth and ViSpA, previously developed state-of-the-art algorithms for estimating quasispecies spectra from the NGS amplicon and shotgun reads, respectively. Both algorithms were accurate in estimation of quasispecies frequencies, especially from large datasets.

Conclusions

The problem of viral population reconstruction from amplicon or shotgun NGS reads was solved using the MCF formulation. The two methods, ShotMCF and AmpMCF, developed here afford accurate reconstruction of the structure of intra-host viral population from NGS reads. The implementations of the algorithms are available at http://alan.cs.gsu.edu/vira.html (AmpMCF) and http://alan.cs.gsu.edu/NGS/?q=content/shotmcf (ShotMCF).",2013-06-28 +23647538,The pH-triggered conversion of the PrP(c) to PrP(sc.).,"Transmissible spongiform encephalopathies (TSEs) are prion protein misfolding diseases that involve the accumulation of an abnormal β-sheet-rich prion protein aggregated form (PrP(sc)) of the normal α- helix-rich prion protein (PrP(c)) within the central nervous system (CNS) and other organs. On account of its large size and insolubility properties, characterization of PrP(c) is quite difficult. A soluble intermediate, called PrP(β) or β(o), exhibiting many of the same features as PrP(sc), can be generated using a combination of low pH and/or mild denaturing conditions. Here, we review the current knowledge on the following five issues relevant to the conversion mechanisms of PrP(c) to PrP(sc) : (1) How is the Stability of the Helical Structures in the Native PrP(c) Related to the Primary Structure of the PrP(c) (2) Why the Low pH Solution System is a Ideal Trigger of PrP(c) to PrP(sc) Conversion (3) How are the Structural and Dynamical Characteristics of the α-helixrich Intermediates Determined using NMR Data (4) How are the Premolten (PrP(α4) and PrP(αβ)) and β-Oligomer (PrP(β)) Intermediates Detected and Assayed, and (5) Can the Disordered N-terminal Domain be folded into the Structural Segment? Particularly, Chou's wenxiang diagram (http://en.wikipedia.org/wiki/Wenxiang_diagram) was introduced for providing an intuitive picture. This review may help to further understand the prion protein misfolding mechanism.",2013-01-01 +23419122,Immunohistochemical detection of mutations in the epidermal growth factor receptor gene in lung adenocarcinomas using mutation-specific antibodies.,"

Background

The recent development of antibodies specific for the major hotspot mutations in the epidermal growth factor receptor (EGFR), L858R and E746_A750del, may provide an opportunity to use immunohistochemistry (IHC) as a screening test for EGFR gene mutations. This study was designed to optimize the IHC protocol and the criteria for interpretation of the results using DNA sequencing as the gold-standard.

Methods

Tumor sections from fifty lung adenocarcinoma specimens from Chinese patients were immunostained using L858R and E746_A750del-specific antibodies using three different antigen retrieval solutions, and the results were evaluated using three different sets of criteria. The same specimens were used for DNA purification and analysis of EGFR gene mutations.

Results

In this study the optimal buffer for antigen retrieval was EDTA (pH 8.0), and the optimal scoring method was to call positive results when there was moderate to strong staining of membrane and/or cytoplasm in >10% of the tumor cells. Using the optimized protocol, L858R-specific IHC showed a sensitivity of 81% and a specificity of 97%, and E746_A750del-specific IHC showed a sensitivity of 59% and a specificity of 100%, both compared with direct DNA analysis. Additionally, the mutant proteins as assessed by IHC showed a more homogeneous than heterogeneous pattern of expression.

Conclusions

Our data demonstrate that mutation-specific IHC, using optimized procedures, is a reliable prescreening test for detecting EGFR mutations in lung adenocarcinoma.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2059012601872392.",2013-02-18 +23317703,TrFAST: a tool to predict signaling pathway-specific transcription factor binding sites.,"Recent advances in the development of high-throughput tools have significantly revolutionized our understanding of molecular mechanisms underlying normal and dysfunctional biological processes. Here we present a novel computational tool, transcription factor search and analysis tool (TrFAST), which was developed for the in silico analysis of transcription factor binding sites (TFBSs) of signaling pathway-specific TFs. TrFAST facilitates searching as well as comparative analysis of regulatory motifs through an exact pattern matching algorithm followed by the graphical representation of matched binding sites in multiple sequences up to 50kb in length. TrFAST is proficient in reducing the number of comparisons by the exact pattern matching strategy. In contrast to the pre-existing tools that find TFBS in a single sequence, TrFAST seeks out the desired pattern in multiple sequences simultaneously. It counts the GC content within the given multiple sequence data set and assembles the combinational details of consensus sequence(s) located at these regions, thereby generating a visual display based on the abundance of unique pattern. Comparative regulatory region analysis of multiple orthologous sequences simultaneously enhances the features of TrFAST and provides a significant insight into study of conservation of non-coding cis-regulatory elements. TrFAST is freely available at http://www.fi-pk.com/trfast.html.",2012-12-02 +22210871,SNPdbe: constructing an nsSNP functional impacts database.,"

Unlabelled

Many existing databases annotate experimentally characterized single nucleotide polymorphisms (SNPs). Each non-synonymous SNP (nsSNP) changes one amino acid in the gene product (single amino acid substitution;SAAS). This change can either affect protein function or be neutral in that respect. Most polymorphisms lack experimental annotation of their functional impact. Here, we introduce SNPdbe-SNP database of effects, with predictions of computationally annotated functional impacts of SNPs. Database entries represent nsSNPs in dbSNP and 1000 Genomes collection, as well as variants from UniProt and PMD. SAASs come from >2600 organisms; 'human' being the most prevalent. The impact of each SAAS on protein function is predicted using the SNAP and SIFT algorithms and augmented with experimentally derived function/structure information and disease associations from PMD, OMIM and UniProt. SNPdbe is consistently updated and easily augmented with new sources of information. The database is available as an MySQL dump and via a web front end that allows searches with any combination of organism names, sequences and mutation IDs.

Availability

http://www.rostlab.org/services/snpdbe.",2011-12-30 +22646788,"The thermostable β-1,3-1,4-glucanase from Clostridium thermocellum improves the nutritive value of highly viscous barley-based diets for broilers.","1. Microbial β-1,3-1,4-glucanases improve the nutritive value of barley-based diets for poultry by effectively decreasing the degree of polymerisation of the anti-nutritive soluble β-glucans. Glycoside hydrolases (GHs) acting on recalcitrant polysaccharides display a modular architecture comprising a catalytic domain linked to one or more non-catalytic Carbohydrate-Binding Modules (CBMs). 2. GHs and CBMs have been classified in different families based on primary structure similarity (see CAZy webpage at http://www.cazy.org ). The role of CBMs is to anchor the appended GHs into their target substrates, therefore eliciting the efficient hydrolysis of structural carbohydrates. 3. Here we describe the biochemical properties of the family 16 GH from Clostridium thermocellum, termed CtGlc16A. CtGlc16A is a thermostable enzyme that specifically acts on β-1,3-1,4-glucans with a remarkable catalytic activity (38000 U/mg protein). 4. CtGlc16A, individually or fused to the family 11 β-glucan-binding domain of cellulase CtLic26A-Cel5E of C. thermocellum, was used to supplement a highly viscous barley-based diet for broilers. 5. The data showed that birds fed on diets supplemented with the recombinant enzymes displayed an improved performance when compared with birds given diets not supplemented with exogenous enzymes. However, inclusion of the non-catalytic CBMs had no influence on the capacity of CtGlc16A to reduce the anti-nutritive effects of soluble β-1,3-1,4-glucans. 6. The data suggest that at elevated dosage rates, CBMs might be unable to potentiate the catalytic activity of appended catalytic domains; this effect may only be revealed when feed enzymes are incorporated at low levels.",2012-01-01 +23687176,Acute stroke: automatic perfusion lesion outlining using level sets.,"

Purpose

To develop a user-independent algorithm for the delineation of hypoperfused tissue on perfusion-weighted images and evaluate its performance relative to a standard threshold method in simulated data, as well as in acute stroke patients.

Materials and methods

The study was approved by the local ethics committee, and patients gave written informed consent prior to their inclusion in the study. The algorithm identifies hypoperfused tissue in mean transit time maps by simultaneously minimizing the mean square error between individual and mean perfusion values inside and outside a smooth boundary. In 14 acute stroke patients, volumetric agreement between automated outlines and manual outlines determined in consensus among four neuroradiologists was assessed with Bland-Altman analysis, while spatial agreement was quantified by using lesion overlap relative to mean lesion volume (Dice coefficient). Performance improvement relative to a standard threshold approach was tested with the Wilcoxon signed rank test.

Results

The mean difference in lesion volume between automated outlines and manual outlines was -9.0 mL ± 44.5 (standard deviation). The lowest mean volume difference for the threshold approach was -25.8 mL ± 88.2. A significantly higher Dice coefficient was observed with the algorithm (0.71; interquartile range [IQR], 0.42-0.75) compared with the threshold approach (0.50; IQR, 0.27- 0.57; P , .001). The corresponding agreement among experts was 0.79 (IQR, 0.69-0.83).

Conclusion

The perfusion lesions outlined by the automated algorithm agreed well with those defined manually in consensus by four experts and were superior to those obtained by using the standard threshold approach. This user-independent algorithm may improve the assessment of perfusion images as part of acute stroke treatment.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.13121622/-/DC1.",2013-05-17 +21208982,Tabix: fast retrieval of sequence features from generic TAB-delimited files.,"

Unlabelled

Tabix is the first generic tool that indexes position sorted files in TAB-delimited formats such as GFF, BED, PSL, SAM and SQL export, and quickly retrieves features overlapping specified regions. Tabix features include few seek function calls per query, data compression with gzip compatibility and direct FTP/HTTP access. Tabix is implemented as a free command-line tool as well as a library in C, Java, Perl and Python. It is particularly useful for manually examining local genomic features on the command line and enables genome viewers to support huge data files and remote custom tracks over networks.

Availability and implementation

http://samtools.sourceforge.net.",2011-01-05 +23564938,DAMBE5: a comprehensive software package for data analysis in molecular biology and evolution.,"Since its first release in 2001 as mainly a software package for phylogenetic analysis, data analysis for molecular biology and evolution (DAMBE) has gained many new functions that may be classified into six categories: 1) sequence retrieval, editing, manipulation, and conversion among more than 20 standard sequence formats including MEGA, NEXUS, PHYLIP, GenBank, and the new NeXML format for interoperability, 2) motif characterization and discovery functions such as position weight matrix and Gibbs sampler, 3) descriptive genomic analysis tools with improved versions of codon adaptation index, effective number of codons, protein isoelectric point profiling, RNA and protein secondary structure prediction and calculation of minimum folding energy, and genomic skew plots with optimized window size, 4) molecular phylogenetics including sequence alignment, testing substitution saturation, distance-based, maximum parsimony, and maximum-likelihood methods for tree reconstructions, testing the molecular clock hypothesis with either a phylogeny or with relative-rate tests, dating gene duplication and speciation events, choosing the best-fit substitution models, and estimating rate heterogeneity over sites, 5) phylogeny-based comparative methods for continuous and discrete variables, and 6) graphic functions including secondary structure display, optimized skew plot, hydrophobicity plot, and many other plots of amino acid properties along a protein sequence, tree display and drawing by dragging nodes to each other, and visual searching of the maximum parsimony tree. DAMBE features a graphic, user-friendly, and intuitive interface and is freely available from http://dambe.bio.uottawa.ca (last accessed April 16, 2013).",2013-04-05 +21253556,Relationships between gene expression and brain wiring in the adult rodent brain.,"We studied the global relationship between gene expression and neuroanatomical connectivity in the adult rodent brain. We utilized a large data set of the rat brain ""connectome"" from the Brain Architecture Management System (942 brain regions and over 5000 connections) and used statistical approaches to relate the data to the gene expression signatures of 17,530 genes in 142 anatomical regions from the Allen Brain Atlas. Our analysis shows that adult gene expression signatures have a statistically significant relationship to connectivity. In particular, brain regions that have similar expression profiles tend to have similar connectivity profiles, and this effect is not entirely attributable to spatial correlations. In addition, brain regions which are connected have more similar expression patterns. Using a simple optimization approach, we identified a set of genes most correlated with neuroanatomical connectivity, and find that this set is enriched for genes involved in neuronal development and axon guidance. A number of the genes have been implicated in neurodevelopmental disorders such as autistic spectrum disorder. Our results have the potential to shed light on the role of gene expression patterns in influencing neuronal activity and connectivity, with potential applications to our understanding of brain disorders. Supplementary data are available at http://www.chibi.ubc.ca/ABAMS.",2011-01-06 +23419377,Accounting for non-genetic factors by low-rank representation and sparse regression for eQTL mapping.,"

Motivation

Expression quantitative trait loci (eQTL) studies investigate how gene expression levels are affected by DNA variants. A major challenge in inferring eQTL is that a number of factors, such as unobserved covariates, experimental artifacts and unknown environmental perturbations, may confound the observed expression levels. This may both mask real associations and lead to spurious association findings.

Results

In this article, we introduce a LOw-Rank representation to account for confounding factors and make use of Sparse regression for eQTL mapping (LORS). We integrate the low-rank representation and sparse regression into a unified framework, in which single-nucleotide polymorphisms and gene probes can be jointly analyzed. Given the two model parameters, our formulation is a convex optimization problem. We have developed an efficient algorithm to solve this problem and its convergence is guaranteed. We demonstrate its ability to account for non-genetic effects using simulation, and then apply it to two independent real datasets. Our results indicate that LORS is an effective tool to account for non-genetic effects. First, our detected associations show higher consistency between studies than recently proposed methods. Second, we have identified some new hotspots that can not be identified without accounting for non-genetic effects.

Availability

The software is available at: http://bioinformatics.med.yale.edu/software.aspx.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-17 +22213543,"Camps 2.0: exploring the sequence and structure space of prokaryotic, eukaryotic, and viral membrane proteins.","Structural bioinformatics of membrane proteins is still in its infancy, and the picture of their fold space is only beginning to emerge. Because only a handful of three-dimensional structures are available, sequence comparison and structure prediction remain the main tools for investigating sequence-structure relationships in membrane protein families. Here we present a comprehensive analysis of the structural families corresponding to α-helical membrane proteins with at least three transmembrane helices. The new version of our CAMPS database (CAMPS 2.0) covers nearly 1300 eukaryotic, prokaryotic, and viral genomes. Using an advanced classification procedure, which is based on high-order hidden Markov models and considers both sequence similarity as well as the number of transmembrane helices and loop lengths, we identified 1353 structurally homogeneous clusters roughly corresponding to membrane protein folds. Only 53 clusters are associated with experimentally determined three-dimensional structures, and for these clusters CAMPS is in reasonable agreement with structure-based classification approaches such as SCOP and CATH. We therefore estimate that ∼1300 structures would need to be determined to provide a sufficient structural coverage of polytopic membrane proteins. CAMPS 2.0 is available at http://webclu.bio.wzw.tum.de/CAMPS2.0/.",2011-12-28 +21519813,The DIADEM metric: comparing multiple reconstructions of the same neuron.,"Digital reconstructions of neuronal morphology are used to study neuron function, development, and responses to various conditions. Although many measures exist to analyze differences between neurons, none is particularly suitable to compare the same arborizing structure over time (morphological change) or reconstructed by different people and/or software (morphological error). The metric introduced for the DIADEM (DIgital reconstruction of Axonal and DEndritic Morphology) Challenge quantifies the similarity between two reconstructions of the same neuron by matching the locations of bifurcations and terminations as well as their topology between the two reconstructed arbors. The DIADEM metric was specifically designed to capture the most critical aspects in automating neuronal reconstructions, and can function in feedback loops during algorithm development. During the Challenge, the metric scored the automated reconstructions of best-performing algorithms against manually traced gold standards over a representative data set collection. The metric was compared with direct quality assessments by neuronal reconstruction experts and with clocked human tracing time saved by automation. The results indicate that relevant morphological features were properly quantified in spite of subjectivity in the underlying image data and varying research goals. The DIADEM metric is freely released open source ( http://diademchallenge.org ) as a flexible instrument to measure morphological error or change in high-throughput reconstruction projects.",2011-09-01 +22851530,"PRISM: pair-read informed split-read mapping for base-pair level detection of insertion, deletion and structural variants.","

Motivation

The development of high-throughput sequencing technologies has enabled novel methods for detecting structural variants (SVs). Current methods are typically based on depth of coverage or pair-end mapping clusters. However, most of these only report an approximate location for each SV, rather than exact breakpoints.

Results

We have developed pair-read informed split mapping (PRISM), a method that identifies SVs and their precise breakpoints from whole-genome resequencing data. PRISM uses a split-alignment approach informed by the mapping of paired-end reads, hence enabling breakpoint identification of multiple SV types, including arbitrary-sized inversions, deletions and tandem duplications. Comparisons to previous datasets and simulation experiments illustrate PRISM's high sensitivity, while PCR validations of PRISM results, including previously uncharacterized variants, indicate an overall precision of ~90%.

Availability

PRISM is freely available at http://compbio.cs.toronto.edu/prism.",2012-07-31 +22198341,HHblits: lightning-fast iterative protein sequence searching by HMM-HMM alignment.,"Sequence-based protein function and structure prediction depends crucially on sequence-search sensitivity and accuracy of the resulting sequence alignments. We present an open-source, general-purpose tool that represents both query and database sequences by profile hidden Markov models (HMMs): 'HMM-HMM-based lightning-fast iterative sequence search' (HHblits; http://toolkit.genzentrum.lmu.de/hhblits/). Compared to the sequence-search tool PSI-BLAST, HHblits is faster owing to its discretized-profile prefilter, has 50-100% higher sensitivity and generates more accurate alignments.",2011-12-25 +26105921,PP068. Evaluation of cardiovascular health in previously preeclamptic women.,"

Introduction

Women with a history of preeclampsia have an increased risk of developing cardiovascular disease (CVD) later in life. Classical risk scores are not suitable as risk estimates of CVD in this young population. Recent recommendations from the American Heart Association are aimed to improve cardiovascular health (CVH).

Objectives

Examining CVH by Health Life Check (HLC) (http://mylifecheck.heart.org/) in previously severe preeclamptic women is part of our cardiovascular risk follow-up program. Final score is a scale from 1 to 10, where 10 represents ideal CVH.

Results

Since 2011 HLC is offered to all women in this program. So far, 213 women were included, 148 (70%) underwent a CVH assessment by performing HLC between three months and one year after delivery. The overall HLC score was 7.4 (median; range: 0.8-10.0) at 3.6 months after the delivery. Only 2 out of 148 women (1.4%) had an ideal score. HLC score was 7.1 (median; range: 0.8-10.0) for 48 women who had a HLC score within 6 months after delivery versus 8.2 (median; range: 2.6-9.8) in the second half of the first year after delivery.

Conclusion

These are the first data on CVH in women after severe preeclampsia. Only 1.4% of these women had an ideal score. Active counselling of these women could be the reason of the improved score over time. We showed that CVH as assessed by HLC is an excellent tool for cardiovascular risk management in this specific group of women.",2013-04-01 +21780740,Integrated online system for a pyrosequencing-based microbial source tracking method that targets Bacteroidetes 16S rDNA.,"Genotypic microbial source tracking (MST) methods are now routinely used to determine sources of fecal contamination impacting waterways. We previously reported the development of a pyrosequencing-based MST method that assigns contamination sources based on shared operational taxonomic units (OTUs) between fecal and environmental bacterial communities. Despite decreasing sequencing costs, pyrosequencing-based MST approaches are not used in routine water quality monitoring studies due in large part to difficulties in handling massive data sets and difficulties in determining sources of fecal contamination. In the studies presented here we describe the development of an online MST tool, PyroMiST ( http://env1.gist.ac.kr/∼aeml/MST.html) that uses total bacterial or Bacteroidetes 16S rDNA pyrosequencing reads to determine fecal contamination of waterways. The program cd-hit was used for OTU assignment and a Perl script was used to calculate the number of shared OTUs. The analyses require only a small number of pyrosequencing reads from environmental samples. Our results indicate that PyroMiST provides a user-friendly web interface for pyrosequence data that significantly reduces analysis time required to determine potential sources of fecal contamination in the environment.",2011-08-02 +30708525,First Report of Impatiens Downy Mildew Outbreaks Caused by Plasmopara obducens Throughout the Hawai'ian Islands.,"Downy mildew of impatiens (Impatiens walleriana Hook.f.) was first reported from the continental United States in 2004. In 2011 to 2012, severe and widespread outbreaks were documented across the United States mainland, resulting in considerable economic losses. On May 5, 2013, downy mildew disease symptoms were observed from I. walleriana 'Super Elfin' at a retail nursery in Mililani, on the Hawai'ian island of Oahu. Throughout May and June 2013, additional sightings of the disease were documented from the islands of Oahu, Kauai, Maui, and Hawai'i from nurseries, home gardens, and botanical park and landscape plantings. Symptoms of infected plants initially showed downward leaf curl, followed by a stippled chlorotic appearance on the adaxial leaf surfaces. Abaxial leaf surfaces were covered with a layer of white mycelia. Affected plants exhibited defoliation, flower drop, and stem rot as the disease progressed. Based on morphological and molecular data, the organism was identified as Plasmopara obducens (J. Schröt.) J. Schröt. Microscopic observation disclosed coenocytic mycelium and hyaline, thin-walled, tree-like (monopodial branches), straight, 94.0 to 300.0 × 3.2 to 10.8 μm sporangiophores. Ovoid, hyaline sporangia measuring 11.0 to 14.6 × 12.2 to 16.2 (average 13.2 × 14.7) μm were borne on sterigma tips of rigid branchlets (8.0 to 15.0 μm) at right angle to the main axis of the sporangiophores (1,3). Molecular identification of the pathogen was conducted by removing hyphae from the surface of three heavily infected leaves using sterile tweezers, then extracting DNA using the QIAGEN Plant DNA kit (QIAGEN, Gaithersburg, MD). The nuclear rDNA internal transcribed spacer was sequenced from each of the three samples bidirectionally from Illustra EXOStar (GE Healthcare, Piscataway, NJ) purified amplicon generated from primers ITS1-O and LR-0R (4). Resultant sequences (GenBank KF366378 to 80) shared 99 to 100% nucleotide identity with P. obducens accession DQ665666 (4). A voucher specimen (BPI892676) was deposited in the U.S. National Fungus Collections, Beltsville, MD. Pathogenicity tests were performed by spraying 6-week-old impatiens plants (I. walleriana var. Super Elfin) grown singly in 4-inch pots with a suspension of 1 × 104 P. obducens sporangia/ml until runoff using a handheld atomizer. Control plants were sprayed with distilled water. The plants were kept in high humidity by covering with black plastic bags for 48 h at 20°C, and then maintained in the greenhouse (night/day temperature of 20/24°C). The first symptoms (downward curling and chlorotic stippling of leaves) and sporulation of the pathogen on under-leaf surfaces of the inoculated plants appeared at 10 days and 21 days after inoculation, respectively. Control plants remained healthy. Morphological features and measurements matched those of the original inoculum, thus fulfilling Koch's postulates. To our knowledge, this is the first report of downy mildew on I. walleriana in Hawai'i (2). The disease appears to be widespread throughout the islands and is likely to cause considerable losses in Hawai'ian landscapes and production settings. References: (1) O. Constantinescu. Mycologia 83:473, 1991. (2) D. F. Farr and A. Y. Rossman. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ July 16, 2013. (3) P. A. Saccardo. Syllogue Fungorum 7:242, 1888. (4) M. Thines. Fungal Genet Biol 44:199, 2007.",2014-05-01 +24238148,"Time-to-diagnosis and symptoms of myeloma, lymphomas and leukaemias: a report from the Haematological Malignancy Research Network.","

Background

Prior to diagnosis, patients with haematological cancers often have multiple primary care consultations, resulting in diagnostic delay. They are less likely to be referred urgently to hospital and often present as emergencies. We examined patient perspectives of time to help-seeking and diagnosis, as well as associated symptoms and experiences.

Methods

The UK's Haematological Malignancy Research Network (http://www.hmrn.org) routinely collects data on all patients newly diagnosed with myeloma, lymphoma and leukaemia (>2000 annually; population 3.6 million). With clinical agreement, patients are also invited to participate in an on-going survey about the circumstances leading to their diagnosis (presence/absence of symptoms; type of symptom(s) and date(s) of onset; date medical advice first sought (help-seeking); summary of important experiences in the time before diagnosis). From 2004-2011, 8858 patients were approached and 5038 agreed they could be contacted for research purposes; 3329 requested and returned a completed questionnaire. The duration of the total interval (symptom onset to diagnosis), patient interval (symptom onset to help-seeking) and diagnostic interval (help-seeking to diagnosis) was examined by patient characteristics and diagnosis. Type and frequency of symptoms were examined collectively, by diagnosis and compared to UK Referral Guidelines.

Results

Around one-third of patients were asymptomatic at diagnosis. In those with symptoms, the median patient interval tended to be shorter than the diagnostic interval across most diseases. Intervals varied markedly by diagnosis: acute myeloid leukaemia being 41 days (Interquartile range (IQR) 17-85), diffuse large B-cell lymphoma 98 days (IQR 53-192) and myeloma 163 days (IQR 84-306). Many symptoms corresponded to those cited in UK Referral Guidelines, but some were rarely reported (e.g. pain on drinking alcohol). By contrast others, absent from the guidance, were more frequent (e.g. stomach and bowel problems). Symptoms such as tiredness and pain were common across all diseases, although some specificity was evident by sub-type, such as lymphadenopathy in lymphoma and bleeding and bruising in acute leukaemia.

Conclusions

Pathways to diagnosis are varied and can be unacceptably prolonged, particularly for myeloma and some lymphomas. More evidence is needed, along with interventions to reduce time-to-diagnosis, such as public education campaigns and GP decision-making aids, as well as refinement of existing Referral Guidelines.",2013-10-31 +22788675,A graph-based approach for designing extensible pipelines.,"

Background

In bioinformatics, it is important to build extensible and low-maintenance systems that are able to deal with the new tools and data formats that are constantly being developed. The traditional and simplest implementation of pipelines involves hardcoding the execution steps into programs or scripts. This approach can lead to problems when a pipeline is expanding because the incorporation of new tools is often error prone and time consuming. Current approaches to pipeline development such as workflow management systems focus on analysis tasks that are systematically repeated without significant changes in their course of execution, such as genome annotation. However, more dynamism on the pipeline composition is necessary when each execution requires a different combination of steps.

Results

We propose a graph-based approach to implement extensible and low-maintenance pipelines that is suitable for pipeline applications with multiple functionalities that require different combinations of steps in each execution. Here pipelines are composed automatically by compiling a specialised set of tools on demand, depending on the functionality required, instead of specifying every sequence of tools in advance. We represent the connectivity of pipeline components with a directed graph in which components are the graph edges, their inputs and outputs are the graph nodes, and the paths through the graph are pipelines. To that end, we developed special data structures and a pipeline system algorithm. We demonstrate the applicability of our approach by implementing a format conversion pipeline for the fields of population genetics and genetic epidemiology, but our approach is also helpful in other fields where the use of multiple software is necessary to perform comprehensive analyses, such as gene expression and proteomics analyses. The project code, documentation and the Java executables are available under an open source license at http://code.google.com/p/dynamic-pipeline. The system has been tested on Linux and Windows platforms.

Conclusions

Our graph-based approach enables the automatic creation of pipelines by compiling a specialised set of tools on demand, depending on the functionality required. It also allows the implementation of extensible and low-maintenance pipelines and contributes towards consolidating openness and collaboration in bioinformatics systems. It is targeted at pipeline developers and is suited for implementing applications with sequential execution steps and combined functionalities. In the format conversion application, the automatic combination of conversion tools increased both the number of possible conversions available to the user and the extensibility of the system to allow for future updates with new file formats.",2012-07-12 +23180791,"OrthoDB: a hierarchical catalog of animal, fungal and bacterial orthologs.","The concept of orthology provides a foundation for formulating hypotheses on gene and genome evolution, and thus forms the cornerstone of comparative genomics, phylogenomics and metagenomics. We present the update of OrthoDB-the hierarchical catalog of orthologs (http://www.orthodb.org). From its conception, OrthoDB promoted delineation of orthologs at varying resolution by explicitly referring to the hierarchy of species radiations, now also adopted by other resources. The current release provides comprehensive coverage of animals and fungi representing 252 eukaryotic species, and is now extended to prokaryotes with the inclusion of 1115 bacteria. Functional annotations of orthologous groups are provided through mapping to InterPro, GO, OMIM and model organism phenotypes, with cross-references to major resources including UniProt, NCBI and FlyBase. Uniquely, OrthoDB provides computed evolutionary traits of orthologs, such as gene duplicability and loss profiles, divergence rates, sibling groups, and now extended with exon-intron architectures, syntenic orthologs and parent-child trees. The interactive web interface allows navigation along the species phylogenies, complex queries with various identifiers, annotation keywords and phrases, as well as with gene copy-number profiles and sequence homology searches. With the explosive growth of available data, OrthoDB also provides mapping of newly sequenced genomes and transcriptomes to the current orthologous groups.",2012-11-24 +22199382,Modeling mechanistic biological networks: an advanced Boolean approach.,"

Motivation

The understanding of the molecular sources for diseases like cancer can be significantly improved by computational models. Recently, Boolean networks have become very popular for modeling signaling and regulatory networks. However, such models rely on a set of Boolean functions that are in general not known. Unfortunately, while detailed information on the molecular interactions becomes available in large scale through electronic databases, the information on the Boolean functions does not become available simultaneously and has to be included manually into the models, if at all known.

Results

We propose a new Boolean approach which can directly utilize the mechanistic network information available through modern databases. The Boolean function is implicitly defined by the reaction mechanisms. Special care has been taken for the treatment of kinetic features like inhibition. The method has been applied to a signaling model combining the Wnt and MAPK pathway.

Availability

A sample C++ implementation of the proposed method is available for Linux and compatible systems through http://code.google.com/p/libscopes/wiki/Paper2011.",2011-12-22 +22199390,PIE the search: searching PubMed literature for protein interaction information.,"

Motivation

Finding protein-protein interaction (PPI) information from literature is challenging but an important issue. However, keyword search in PubMed(®) is often time consuming because it requires a series of actions that refine keywords and browse search results until it reaches a goal. Due to the rapid growth of biomedical literature, it has become more difficult for biologists and curators to locate PPI information quickly. Therefore, a tool for prioritizing PPI informative articles can be a useful assistant for finding this PPI-relevant information.

Results

PIE (Protein Interaction information Extraction) the search is a web service implementing a competition-winning approach utilizing word and syntactic analyses by machine learning techniques. For easy user access, PIE the search provides a PubMed-like search environment, but the output is the list of articles prioritized by PPI confidence scores. By obtaining PPI-related articles at high rank, researchers can more easily find the up-to-date PPI information, which cannot be found in manually curated PPI databases.

Availability

http://www.ncbi.nlm.nih.gov/IRET/PIE/.",2011-12-22 +22929026,WikiGWA: an open platform for collecting and using genome-wide association results.,"The number of discovered genetic variants from genome-wide association (GWA) studies (GWAS) has been growing rapidly. Centralized efforts such as the National Human Genome Research Institute's GWAS catalog provide regular updates and a convenient interface for quick lookup. However, the catalog entries are manually curated and rely on data from published articles. Other tools such as SNPedia (http://www.snpedia.com) collect published results regarding functional consequences of genetic variations. Here, we propose an approach that allows individual investigators to share their GWA results through an open platform. Unlike GWAS catalog or SNPedia, wikiGWA collects first-hand GWAS results and in a much larger scale. Investigators are not only able to post a much larger amount of results, but also post results from unpublished studies, which could alleviate publication bias and facilitate identification of weak signals. Our interface allows for flexible and fast queries, and the query results are formatted to work seamlessly with the LocusZoom program for visualization and annotation. We here describe wikiGWA, made publically available at http://www.wikiGWA.org.",2012-08-29 +21864382,Deep sequencing reveals as-yet-undiscovered small RNAs in Escherichia coli.,"

Background

In Escherichia coli, approximately 100 regulatory small RNAs (sRNAs) have been identified experimentally and many more have been predicted by various methods. To provide a comprehensive overview of sRNAs, we analysed the low-molecular-weight RNAs (< 200 nt) of E. coli with deep sequencing, because the regulatory RNAs in bacteria are usually 50-200 nt in length.

Results

We discovered 229 novel candidate sRNAs (≥ 50 nt) with computational or experimental evidence of transcription initiation. Among them, the expression of seven intergenic sRNAs and three cis-antisense sRNAs was detected by northern blot analysis. Interestingly, five novel sRNAs are expressed from prophage regions and we note that these sRNAs have several specific characteristics. Furthermore, we conducted an evolutionary conservation analysis of the candidate sRNAs and summarised the data among closely related bacterial strains.

Conclusions

This comprehensive screen for E. coli sRNAs using a deep sequencing approach has shown that many as-yet-undiscovered sRNAs are potentially encoded in the E. coli genome. We constructed the Escherichia coli Small RNA Browser (ECSBrowser; http://rna.iab.keio.ac.jp/), which integrates the data for previously identified sRNAs and the novel sRNAs found in this study.",2011-08-24 +21232146,SAMMate: a GUI tool for processing short read alignments in SAM/BAM format.,"

Background

Next Generation Sequencing (NGS) technology generates tens of millions of short reads for each DNA/RNA sample. A key step in NGS data analysis is the short read alignment of the generated sequences to a reference genome. Although storing alignment information in the Sequence Alignment/Map (SAM) or Binary SAM (BAM) format is now standard, biomedical researchers still have difficulty accessing this information.

Results

We have developed a Graphical User Interface (GUI) software tool named SAMMate. SAMMate allows biomedical researchers to quickly process SAM/BAM files and is compatible with both single-end and paired-end sequencing technologies. SAMMate also automates some standard procedures in DNA-seq and RNA-seq data analysis. Using either standard or customized annotation files, SAMMate allows users to accurately calculate the short read coverage of genomic intervals. In particular, for RNA-seq data SAMMate can accurately calculate the gene expression abundance scores for customized genomic intervals using short reads originating from both exons and exon-exon junctions. Furthermore, SAMMate can quickly calculate a whole-genome signal map at base-wise resolution allowing researchers to solve an array of bioinformatics problems. Finally, SAMMate can export both a wiggle file for alignment visualization in the UCSC genome browser and an alignment statistics report. The biological impact of these features is demonstrated via several case studies that predict miRNA targets using short read alignment information files.

Conclusions

With just a few mouse clicks, SAMMate will provide biomedical researchers easy access to important alignment information stored in SAM/BAM files. Our software is constantly updated and will greatly facilitate the downstream analysis of NGS data. Both the source code and the GUI executable are freely available under the GNU General Public License at http://sammate.sourceforge.net.",2011-01-13 +23824634,Validating a Coarse-Grained Potential Energy Function through Protein Loop Modelling.,"Coarse-grained (CG) methods for sampling protein conformational space have the potential to increase computational efficiency by reducing the degrees of freedom. The gain in computational efficiency of CG methods often comes at the expense of non-protein like local conformational features. This could cause problems when transitioning to full atom models in a hierarchical framework. Here, a CG potential energy function was validated by applying it to the problem of loop prediction. A novel method to sample the conformational space of backbone atoms was benchmarked using a standard test set consisting of 351 distinct loops. This method used a sequence-independent CG potential energy function representing the protein using [Formula: see text]-carbon positions only and sampling conformations with a Monte Carlo simulated annealing based protocol. Backbone atoms were added using a method previously described and then gradient minimised in the Rosetta force field. Despite the CG potential energy function being sequence-independent, the method performed similarly to methods that explicitly use either fragments of known protein backbones with similar sequences or residue-specific [Formula: see text]/[Formula: see text]-maps to restrict the search space. The method was also able to predict with sub-Angstrom accuracy two out of seven loops from recently solved crystal structures of proteins with low sequence and structure similarity to previously deposited structures in the PDB. The ability to sample realistic loop conformations directly from a potential energy function enables the incorporation of additional geometric restraints and the use of more advanced sampling methods in a way that is not possible to do easily with fragment replacement methods and also enable multi-scale simulations for protein design and protein structure prediction. These restraints could be derived from experimental data or could be design restraints in the case of computational protein design. C++ source code is available for download from http://www.sbg.bio.ic.ac.uk/phyre2/PD2/.",2013-06-18 +23013469,The simulation of meiosis in diploid and tetraploid organisms using various genetic models.,"

Background

While the genetics of diploid inheritance are well studied and software for linkage mapping, haplotyping and QTL analysis are available, for tetraploids the available tools are limited. In order to develop such tools it would be helpful if simulated populations based on a variety of models of the tetraploid meiosis would be available.

Results

Here we present PedigreeSim, a software package that simulates meiosis in both diploid and tetraploid species and uses this to simulate pedigrees and cross populations. For tetraploids a variety of models can be used, including both bivalent and quadrivalent formation, varying degrees of preferential pairing of hom(oe)ologous chromosomes, different quadrivalent configurations and more. Simulation of quadrivalent meiosis results as expected in double reduction and recombination between more than two hom(oe)ologous chromosomes. The results are shown to match theoretical predictions.

Conclusions

This is the first simulation software that implements all features of meiosis in tetraploids. It allows to generate data for tetraploid and diploid populations, and to investigate different models of tetraploid meiosis. The software and manual are available from http://www.plantbreeding.nl/UK/software_pedigreeSim.html and as Additional files 1, 2, 3 and 4 with this publication.",2012-09-26 +22185599,Which clustering algorithm is better for predicting protein complexes?,"

Background

Protein-Protein interactions (PPI) play a key role in determining the outcome of most cellular processes. The correct identification and characterization of protein interactions and the networks, which they comprise, is critical for understanding the molecular mechanisms within the cell. Large-scale techniques such as pull down assays and tandem affinity purification are used in order to detect protein interactions in an organism. Today, relatively new high-throughput methods like yeast two hybrid, mass spectrometry, microarrays, and phage display are also used to reveal protein interaction networks.

Results

In this paper we evaluated four different clustering algorithms using six different interaction datasets. We parameterized the MCL, Spectral, RNSC and Affinity Propagation algorithms and applied them to six PPI datasets produced experimentally by Yeast 2 Hybrid (Y2H) and Tandem Affinity Purification (TAP) methods. The predicted clusters, so called protein complexes, were then compared and benchmarked with already known complexes stored in published databases.

Conclusions

While results may differ upon parameterization, the MCL and RNSC algorithms seem to be more promising and more accurate at predicting PPI complexes. Moreover, they predict more complexes than other reviewed algorithms in absolute numbers. On the other hand the spectral clustering algorithm achieves the highest valid prediction rate in our experiments. However, it is nearly always outperformed by both RNSC and MCL in terms of the geometrical accuracy while it generates the fewest valid clusters than any other reviewed algorithm. This article demonstrates various metrics to evaluate the accuracy of such predictions as they are presented in the text below. Supplementary material can be found at: http://www.bioacademy.gr/bioinformatics/projects/ppireview.htm.",2011-12-20 +23012263,Noncoder: a web interface for exon array-based detection of long non-coding RNAs.,"Due to recent technical developments, a high number of long non-coding RNAs (lncRNAs) have been discovered in mammals. Although it has been shown that lncRNAs are regulated differently among tissues and disease statuses, functions of these transcripts are still unknown in most cases. GeneChip Exon 1.0 ST Arrays (exon arrays) from Affymetrix, Inc. have been used widely to profile genome-wide expression changes and alternative splicing of protein-coding genes. Here, we demonstrate that re-annotation of exon array probes can be used to profile expressions of tens of thousands of lncRNAs. With this annotation, a detailed inspection of lncRNAs and their isoforms is possible. To allow for a general usage to the research community, we developed a user-friendly web interface called 'noncoder'. By uploading CEL files from exon arrays and with a few mouse clicks and parameter settings, exon array data will be normalized and analysed to identify differentially expressed lncRNAs. Noncoder provides the detailed annotation information of lncRNAs and is equipped with unique features to allow for an efficient search for interesting lncRNAs to be studied further. The web interface is available at http://noncoder.mpi-bn.mpg.de.",2012-09-24 +21367871,Connectedness of PPI network neighborhoods identifies regulatory hub proteins.,"

Motivation

With the growing availability of high-throughput protein-protein interaction (PPI) data, it has become possible to consider how a protein's local or global network characteristics predict its function.

Results

We introduce a graph-theoretic approach that identifies key regulatory proteins in an organism by analyzing proteins' local PPI network structure. We apply the method to the yeast genome and describe several properties of the resulting set of regulatory hubs. Finally, we demonstrate how the identified hubs and putative target gene sets can be used to identify causative, functional regulators of differential gene expression linked to human disease.

Availability

Code is available at http://bcb.cs.tufts.edu/hubcomps.

Contact

fox.andrew.d@gmail.com; slonim@cs.tufts.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-03-02 +23536993,Estimation of optic nerve sheath diameter on an initial brain computed tomography scan can contribute prognostic information in traumatic brain injury patients.,"

Introduction

The aim of this study was to evaluate the prognostic value of optic nerve sheath diameter (ONSD) measured on the initial brain computed tomography (CT) scan for intensive care unit (ICU) mortality in severe traumatic brain injury (TBI) patients.

Methods

A prospective observational study of all severe TBI patients admitted to a neurosurgical ICU (over a 10-month period). Demographic and clinical data and brain CT scan results were recorded. ONSD for each eye was measured on the initial CT scan. The group of ICU survivors was compared to non-survivors. Glasgow Outcome Scale (GOS) was evaluated six months after ICU discharge.

Results

Seventy-seven patients were included (age: 43±18; 81% males; mean Injury Severity Score: 35±15; ICU mortality: 28.5% (n=22)). Mean ONSD on the initial brain CT scan was 7.8±0.1 mm in non-survivors vs. 6.8±0.1 mm in survivors (P<0.001). The operative value of ONSD was a good predictor of mortality (area under the curve: 0.805). An ONSD cutoff≥7.3 had a sensitivity of 86.4% and a specificity of 74.6% and was independently associated with mortality in this population (adjusted odds ratio 95% confidence interval: 22.7 (3.2 to 159.6), P=0.002). There was a relationship between initial ONSD values and six-month GOS (P=0.03).

Conclusions

ONSD measured on the initial brain CT scan is independently associated with ICU mortality rate (when ≥7.3 mm) in severe TBI patients.See related commentary by Masquère et al.,http://ccforum.com/content/17/3/151.",2013-03-27 +23281872,A flexible ancestral genome reconstruction method based on gapped adjacencies.,"

Background

The ""small phylogeny"" problem consists in inferring ancestral genomes associated with each internal node of a phylogenetic tree of a set of extant species. Existing methods can be grouped into two main categories: the distance-based methods aiming at minimizing a total branch length, and the synteny-based (or mapping) methods that first predict a collection of relations between ancestral markers in term of ""synteny"", and then assemble this collection into a set of Contiguous Ancestral Regions (CARs). The predicted CARs are likely to be more reliable as they are more directly deduced from observed conservations in extant species. However the challenge is to end up with a completely assembled genome.

Results

We develop a new synteny-based method that is flexible enough to handle a model of evolution involving whole genome duplication events, in addition to rearrangements, gene insertions, and losses. Ancestral relationships between markers are defined in term of Gapped Adjacencies, i.e. pairs of markers separated by up to a given number of markers. It improves on a previous restricted to direct adjacencies, which revealed a high accuracy for adjacency prediction, but with the drawback of being overly conservative, i.e. of generating a large number of CARs. Applying our algorithm on various simulated data sets reveals good performance as we usually end up with a completely assembled genome, while keeping a low error rate.

Availability

All source code is available at http://www.iro.umontreal.ca/~mabrouk.",2012-12-19 +23527569,Probing the structure of Mycobacterium tuberculosis MbtA: model validation using molecular dynamics simulations and docking studies.,"Multidrug resistance capacity of Mycobacterium tuberculosis demands urgent need for developing new antitubercular drugs. The present work is on M. tuberculosis-MbtA, an enzyme involved in the biosynthesis of siderophores, having a critical role in bacterial growth and virulence. The molecular models of both holo and apo forms of M. tuberculosis-MbtA have been constructed and validated. A docking study with a series of 42 5'-O-[N-(salicyl) sulfamoyl] adenosine derivatives, using GOLD software, revealed significant correlation (R(2) = 0.8611) between Goldscore and the reported binding affinity data. Further, binding energies of the docked poses were calculated and compared with the observed binding affinities (R(2) = 0.901). All-atom molecular dynamics simulation was performed for apo form, holo form without ligand and holo form with ligands. The holo form without ligand on molecular dynamics simulation for 20 ns converged to the apo form and the apo form upon induced fit docking of the natural substrate, 2,3-dihydroxybenzoic acid-adenylate, yielded the holo structure. The molecular dynamics simulation of the holo form with ligands across the time period of 20 ns provided with the insights into ligand-receptor interactions for inhibition of the enzyme. A thorough study involving interaction energy calculation between the ligands and the active site residues of MbtA model identified the key residues implicated in ligand binding. The holo model was capable to differentiate active compounds from decoys. In the absence of experimental structure of MbtA, the homology models together with the insights gained from this study will promote the rational design of potent and selective MbtA inhibitors as antitubercular therapeutics. An animated interactive 3D complement (I3DC) is available in Proteopedia at http://proteopedia.org/w/Journal:JBSD:33.",2013-03-25 +22815362,Cell-Dock: high-performance protein-protein docking.,"

Summary

The application of docking to large-scale experiments or the explicit treatment of protein flexibility are part of the new challenges in structural bioinformatics that will require large computer resources and more efficient algorithms. Highly optimized fast Fourier transform (FFT) approaches are broadly used in docking programs but their optimal code implementation leaves hardware acceleration as the only option to significantly reduce the computational cost of these tools. In this work we present Cell-Dock, an FFT-based docking algorithm adapted to the Cell BE processor. We show that Cell-Dock runs faster than FTDock with maximum speedups of above 200×, while achieving results of similar quality.

Availability and implementation

The source code is released under GNU General Public License version 2 and can be downloaded from http://mmb.pcb.ub.es/~cpons/Cell-Dock.

Contact

djimenez@ac.upc.edu or juanf@bsc.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-19 +22289516,Improving bioinformatic pipelines for exome variant calling.,"Exome sequencing analysis is a cost-effective approach for identifying variants in coding regions. However, recognizing the relevant single nucleotide variants, small insertions and deletions remains a challenge for many researchers and diagnostic laboratories typically do not have access to the bioinformatic analysis pipelines necessary for clinical application. The Atlas2 suite, recently released by Baylor Genome Center, is designed to be widely accessible, runs on desktop computers but is scalable to computational clusters, and performs comparably with other popular variant callers. Atlas2 may be an accessible alternative for data processing when a rapid solution for variant calling is required.See research article http://www.biomedcentral.com/1471-2105/13/8.",2012-01-30 +23383626,"Para-(benzoyl)-phenylalanine as a potential inhibitor against LpxC of Leptospira spp.: homology modeling, docking, and molecular dynamics study.","Leptospira interrogans, a Gram-negative bacterial pathogen is the main cause of human leptospirosis. Lipid A is a highly immunoreactive endotoxic center of lipopolysaccharide (LPS) that anchors LPS into the outer membrane of Leptospira. Discovery of compounds inhibiting lipid-A biosynthetic pathway would be promising for dissolving the structural integrity of membrane leading to cell lysis and death of Leptospira. LpxC, a unique enzyme of lipid-A biosynthetic pathway was identified as common drug target of Leptospira. Herein, homology modeling, docking, and molecular dynamics (MD) simulations were employed to discover potential inhibitors of LpxC. A reliable tertiary structure of LpxC in complex with inhibitor BB-78485 was constructed in Modeller 9v8. A data-set of BB-78485 structural analogs were docked with LpxC in Maestro v9.2 virtual screening workflow, which implements three stage Glide docking protocol. Twelve lead molecules with better XP Gscore compared to BB-78485 were proposed as potential inhibitors of LpxC. Para-(benzoyl)-phenylalanine - that showed lowest XP Gscore (-10.35 kcal/mol) - was predicted to have best binding affinity towards LpxC. MD simulations were performed for LpxC and para-(benzoyl)-phenylalanine docking complex in Desmond v3.0. Trajectory analysis showed the docking complex and inter-molecular interactions was stable throughout the entire production part of MD simulations. The results indicate para-(benzoyl)-phenylalanine as a potent drug molecule against leptospirosis. An animated Interactive 3D Complement (I3DC) is available in Proteopedia at http://proteopedia.org/w/Journal:JBSD:10.",2013-02-05 +21949701,Functional analysis beyond enrichment: non-redundant reciprocal linkage of genes and biological terms.,"Functional analysis of large sets of genes and proteins is becoming more and more necessary with the increase of experimental biomolecular data at omic-scale. Enrichment analysis is by far the most popular available methodology to derive functional implications of sets of cooperating genes. The problem with these techniques relies in the redundancy of resulting information, that in most cases generate lots of trivial results with high risk to mask the reality of key biological events. We present and describe a computational method, called GeneTerm Linker, that filters and links enriched output data identifying sets of associated genes and terms, producing metagroups of coherent biological significance. The method uses fuzzy reciprocal linkage between genes and terms to unravel their functional convergence and associations. The algorithm is tested with a small set of well known interacting proteins from yeast and with a large collection of reference sets from three heterogeneous resources: multiprotein complexes (CORUM), cellular pathways (SGD) and human diseases (OMIM). Statistical Precision, Recall and balanced F-score are calculated showing robust results, even when different levels of random noise are included in the test sets. Although we could not find an equivalent method, we present a comparative analysis with a widely used method that combines enrichment and functional annotation clustering. A web application to use the method here proposed is provided at http://gtlinker.cnb.csic.es.",2011-09-16 +23878286,Recurrent glioblastoma: optimum area under the curve method derived from dynamic contrast-enhanced T1-weighted perfusion MR imaging.,"

Purpose

To determine whether the ratio of the initial area under the time-signal intensity curve (AUC) (IAUC) to the final AUC--or AUCR--derived from dynamic contrast material-enhanced magnetic resonance (MR) imaging can be an imaging biomarker for distinguishing recurrent glioblastoma multiforme (GBM) from radiation necrosis and to compare the diagnostic accuracy of the AUCR with commonly used model-free dynamic contrast-enhanced MR imaging parameters.

Materials and methods

The institutional review board approved this retrospective study and waived the informed consent requirement. Fifty-seven consecutive patients with pathologically confirmed recurrent GBM (n = 32) or radiation necrosis (n = 25) underwent dynamic contrast-enhanced MR imaging. Histogram parameters of the IAUC at 30, 60, and 120 seconds and the AUCR, which included the mean value at the higher curve of the bimodal histogram (mAUCR(H)), as well as 90th percentile cumulative histogram cutoffs, were calculated and were correlated with final pathologic findings. The best predictor for differentiating recurrent GBM from radiation necrosis was determined by means of receiver operating characteristic (ROC) curve analysis.

Results

The demographic data were not significantly different between the two patient groups. There were statistically significant differences in all of the IAUC and AUCR parameters between the recurrent GBM and the radiation necrosis patient groups (P < .05 for each). ROC curve analyses showed mAUCR(H) to be the best single predictor of recurrent GBM (mAUCR(H) for recurrent GBM = 0.35 ± 0.11 [standard deviation], vs 0.19 ± 0.17 for radiation necrosis; P < .0001; optimum cutoff, 0.23), with a sensitivity of 93.8% and a specificity of 88.0%.

Conclusion

A bimodal histogram analysis of AUCR derived from dynamic contrast-enhanced MR imaging can be a potential noninvasive imaging biomarker for differentiating recurrent GBM from radiation necrosis.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.13130016/-/DC1.",2013-07-22 +22168213,MeInfoText 2.0: gene methylation and cancer relation extraction from biomedical literature.,"

Background

DNA methylation is regarded as a potential biomarker in the diagnosis and treatment of cancer. The relations between aberrant gene methylation and cancer development have been identified by a number of recent scientific studies. In a previous work, we used co-occurrences to mine those associations and compiled the MeInfoText 1.0 database. To reduce the amount of manual curation and improve the accuracy of relation extraction, we have now developed MeInfoText 2.0, which uses a machine learning-based approach to extract gene methylation-cancer relations.

Description

Two maximum entropy models are trained to predict if aberrant gene methylation is related to any type of cancer mentioned in the literature. After evaluation based on 10-fold cross-validation, the average precision/recall rates of the two models are 94.7/90.1 and 91.8/90% respectively. MeInfoText 2.0 provides the gene methylation profiles of different types of human cancer. The extracted relations with maximum probability, evidence sentences, and specific gene information are also retrievable. The database is available at http://bws.iis.sinica.edu.tw:8081/MeInfoText2/.

Conclusion

The previous version, MeInfoText, was developed by using association rules, whereas MeInfoText 2.0 is based on a new framework that combines machine learning, dictionary lookup and pattern matching for epigenetics information extraction. The results of experiments show that MeInfoText 2.0 outperforms existing tools in many respects. To the best of our knowledge, this is the first study that uses a hybrid approach to extract gene methylation-cancer relations. It is also the first attempt to develop a gene methylation and cancer relation corpus.",2011-12-14 +23153072,Cooperative stabilization of Zn(2+):DNA complexes through netropsin binding in the minor groove of FdU-substituted DNA.,"The simultaneous binding of netropsin in the minor groove and Zn(2+) in the major groove of a DNA hairpin that includes 10 consecutive FdU nucleotides at the 3'-terminus (3'FdU) was demonstrated based upon NMR spectroscopy, circular dichroism (CD), and computational modeling studies. The resulting Zn(2+)/netropsin: 3'FdU complex had very high thermal stability with aspects of the complex intact at 85 °C, conditions that result in complete dissociation of Mg(2+) complexes. CD and (19)F NMR spectroscopy were consistent with Zn(2+) binding in the major groove of the DNA duplex and utilizing F5 and O4 of consecutive FdU nucleotides as ligands with FdU nucleotides hemi-deprotonated in the complex. Netropsin is bound in the minor groove of the DNA duplex based upon 2D NOESY data demonstrating contacts between AH2 (1)H and netropsin (1)H resonances. The Zn(2+)/netropsin: 3'FdU complex displayed increased cytotoxicity towards PC3 prostate cancer (PCa) cells relative to the constituent components or separate complexes (e.g. Zn(2+):3'FdU) indicating that this new structural motif may be therapeutically useful for PCa treatment. An animated interactive 3D complement (I3DC) is available in Proteopedia at http://proteopedia.org/w/Journal:JBSD:32.",2012-11-16 +24063607,A modular framework for biomedical concept recognition.,"

Background

Concept recognition is an essential task in biomedical information extraction, presenting several complex and unsolved challenges. The development of such solutions is typically performed in an ad-hoc manner or using general information extraction frameworks, which are not optimized for the biomedical domain and normally require the integration of complex external libraries and/or the development of custom tools.

Results

This article presents Neji, an open source framework optimized for biomedical concept recognition built around four key characteristics: modularity, scalability, speed, and usability. It integrates modules for biomedical natural language processing, such as sentence splitting, tokenization, lemmatization, part-of-speech tagging, chunking and dependency parsing. Concept recognition is provided through dictionary matching and machine learning with normalization methods. Neji also integrates an innovative concept tree implementation, supporting overlapped concept names and respective disambiguation techniques. The most popular input and output formats, namely Pubmed XML, IeXML, CoNLL and A1, are also supported. On top of the built-in functionalities, developers and researchers can implement new processing modules or pipelines, or use the provided command-line interface tool to build their own solutions, applying the most appropriate techniques to identify heterogeneous biomedical concepts. Neji was evaluated against three gold standard corpora with heterogeneous biomedical concepts (CRAFT, AnEM and NCBI disease corpus), achieving high performance results on named entity recognition (F1-measure for overlap matching: species 95%, cell 92%, cellular components 83%, gene and proteins 76%, chemicals 65%, biological processes and molecular functions 63%, disorders 85%, and anatomical entities 82%) and on entity normalization (F1-measure for overlap name matching and correct identifier included in the returned list of identifiers: species 88%, cell 71%, cellular components 72%, gene and proteins 64%, chemicals 53%, and biological processes and molecular functions 40%). Neji provides fast and multi-threaded data processing, annotating up to 1200 sentences/second when using dictionary-based concept identification.

Conclusions

Considering the provided features and underlying characteristics, we believe that Neji is an important contribution to the biomedical community, streamlining the development of complex concept recognition solutions. Neji is freely available at http://bioinformatics.ua.pt/neji.",2013-09-24 +21775305,CalcTav--integration of a spreadsheet and Taverna workbench.,"

Motivation

Taverna workbench is an environment for construction, visualization and execution of bioinformatic workflows that integrates specialized tools available on the Internet. It already supports major bioinformatics services and is constantly gaining popularity. However, its user interface requires considerable effort to learn, and sometimes requires programming or scripting experience from its users. We have integrated Taverna with OpenOffice Calc, making the functions of the scientific workflow system available in the spreadsheet. In CalcTav, one can define workflows using the spreadsheet interface and analyze the results using the spreadsheet toolset.

Results

Technically, CalcTav is a plugin for OpenOffice Calc, which provides the functionality of Taverna available in the form of spreadsheet functions. Even basic familiarity with spreadsheets already suffices to define and use spreadsheet workflows with Taverna services. The data processed by the Taverna components is automatically transferred to and from spreadsheet cells, so all the visualization and data analysis tools of OpenOffice Calc are available to the workflow creator within one, consistent user interface.

Availability

CalcTav is available under GPLv2 from http://code.google.com/p/calctav/

Contact

sroka@mimuw.edu.pl.",2011-07-19 +21771335,FAAST: Flow-space Assisted Alignment Search Tool.,"

Background

High throughput pyrosequencing (454 sequencing) is the major sequencing platform for producing long read high throughput data. While most other sequencing techniques produce reading errors mainly comparable with substitutions, pyrosequencing produce errors mainly comparable with gaps. These errors are less efficiently detected by most conventional alignment programs and may produce inaccurate alignments.

Results

We suggest a novel algorithm for calculating the optimal local alignment which utilises flowpeak information in order to improve alignment accuracy. Flowpeak information can be retained from a 454 sequencing run through interpretation of the binary SFF-file format. This novel algorithm has been implemented in a program named FAAST (Flow-space Assisted Alignment Search Tool).

Conclusions

We present and discuss the results of simulations that show that FAAST, through the use of the novel algorithm, can gain several percentage points of accuracy compared to Smith-Waterman-Gotoh alignments, depending on the 454 data quality. Furthermore, through an efficient multi-thread aware implementation, FAAST is able to perform these high quality alignments at high speed. The tool is available at http://www.ifm.liu.se/bioinfo/",2011-07-19 +22311862,Medical image segmentation by combining graph cuts and oriented active appearance models.,"In this paper, we propose a novel method based on a strategic combination of the active appearance model (AAM), live wire (LW), and graph cuts (GCs) for abdominal 3-D organ segmentation. The proposed method consists of three main parts: model building, object recognition, and delineation. In the model building part, we construct the AAM and train the LW cost function and GC parameters. In the recognition part, a novel algorithm is proposed for improving the conventional AAM matching method, which effectively combines the AAM and LW methods, resulting in the oriented AAM (OAAM). A multiobject strategy is utilized to help in object initialization. We employ a pseudo-3-D initialization strategy and segment the organs slice by slice via a multiobject OAAM method. For the object delineation part, a 3-D shape-constrained GC method is proposed. The object shape generated from the initialization step is integrated into the GC cost computation, and an iterative GC-OAAM method is used for object delineation. The proposed method was tested in segmenting the liver, kidneys, and spleen on a clinical CT data set and also on the MICCAI 2007 Grand Challenge liver data set. The results show the following: 1) The overall segmentation accuracy of true positive volume fraction TPVF > 94.3% and false positive volume fraction can be achieved; 2) the initialization performance can be improved by combining the AAM and LW; 3) the multiobject strategy greatly facilitates initialization; 4) compared with the traditional 3-D AAM method, the pseudo-3-D OAAM method achieves comparable performance while running 12 times faster; and 5) the performance of the proposed method is comparable to state-of-the-art liver segmentation algorithm. The executable version of the 3-D shape-constrained GC method with a user interface can be downloaded from http://xinjianchen.wordpress.com/research/.",2012-01-31 +22098204,Stereochemically consistent reaction mapping and identification of multiple reaction mechanisms through integer linear optimization.,"Reaction mappings are of fundamental importance to researchers studying the mechanisms of chemical reactions and analyzing biochemical pathways. We have developed an automated method based on integer linear optimization, ILP, to identify optimal reaction mappings that minimize the number of bond changes. An alternate objective function is also proposed that minimizes the number of bond order changes. In contrast to previous approaches, our method produces mappings that respect stereochemistry. We also show how to locate multiple reaction mappings efficiently and determine which of those mappings correspond to distinct reaction mechanisms by automatically detecting molecular symmetries. We demonstrate our techniques through a number of computational studies on the GRI-Mech, KEGG LIGAND, and BioPath databases. The computational studies indicate that 99% of the 8078 reactions tested can be addressed within 1 CPU hour. The proposed framework has been incorporated into the Web tool DREAM ( http://selene.princeton.edu/dream/ ), which is freely available to the scientific community.",2011-12-13 +22267903,LCGbase: A Comprehensive Database for Lineage-Based Co-regulated Genes.,"Animal genes of different lineages, such as vertebrates and arthropods, are well-organized and blended into dynamic chromosomal structures that represent a primary regulatory mechanism for body development and cellular differentiation. The majority of genes in a genome are actually clustered, which are evolutionarily stable to different extents and biologically meaningful when evaluated among genomes within and across lineages. Until now, many questions concerning gene organization, such as what is the minimal number of genes in a cluster and what is the driving force leading to gene co-regulation, remain to be addressed. Here, we provide a user-friendly database-LCGbase (a comprehensive database for lineage-based co-regulated genes)-hosting information on evolutionary dynamics of gene clustering and ordering within animal kingdoms in two different lineages: vertebrates and arthropods. The database is constructed on a web-based Linux-Apache-MySQL-PHP framework and effective interactive user-inquiry service. Compared to other gene annotation databases with similar purposes, our database has three comprehensible advantages. First, our database is inclusive, including all high-quality genome assemblies of vertebrates and representative arthropod species. Second, it is human-centric since we map all gene clusters from other genomes in an order of lineage-ranks (such as primates, mammals, warm-blooded, and reptiles) onto human genome and start the database from well-defined gene pairs (a minimal cluster where the two adjacent genes are oriented as co-directional, convergent, and divergent pairs) to large gene clusters. Furthermore, users can search for any adjacent genes and their detailed annotations. Third, the database provides flexible parameter definitions, such as the distance of transcription start sites between two adjacent genes, which is extendable to genes that flanking the cluster across species. We also provide useful tools for sequence alignment, gene ontology (GO) annotation, promoter identification, gene expression (co-expression), and evolutionary analysis. This database not only provides a way to define lineage-specific and species-specific gene clusters but also facilitates future studies on gene co-regulation, epigenetic control of gene expression (DNA methylation and histone marks), and chromosomal structures in a context of gene clusters and species evolution. LCGbase is freely available at http://lcgbase.big.ac.cn/LCGbase.",2011-12-13 +21995661,Scedosporium and Pseudallescheria low molecular weight metabolites revealed by database search.,"The potential of mMass software search tool with new compound libraries was demonstrated on metabolomics of Scedosporium prolificans, S. apiospermum and Pseudallescheria boydii sensu stricto. Cyclic peptides pseudacyclins, small molecular weight tyroscherin analogues and various lipids were annotated by public software tool (http://www.mmass.org) utilising accurate matrix-assisted laser desorption/ionisation mass spectral data of intact fungal spores. Electrospray ionisation combined with tandem mass spectrometry was used for monohexosylceramide characterisation in fungal extracts.",2011-10-01 +22150118,A curated database of genetic markers from the angiogenesis/VEGF pathway and their relation to clinical outcome in human cancers.,"

Introduction

Angiogenesis causes local growth, aggressiveness and metastasis in solid tumors, and thus, is almost always associated with poor prognosis and survival in cancer patients. Because of this clinical importance, several chemotherapeutic agents targeting angiogenesis have also been developed. Genes and genetic variations in angiogenesis/VEGF pathway thus may be correlated with clinical outcome in cancer patients.

Material and methods

Here, we describe a manually curated public database, dbANGIO, which posts the results of studies testing the possible correlation of genetic variations (polymorphisms and mutations) from the angiogenesis/VEGF pathway with demographic features, clinicopathological features, treatment response and toxicity, and prognosis and survival-related endpoints in human cancers. The scientific findings are retrieved from PUBMED and posted in the dbANGIO website in a summarized form.

Results and conclusion

As of September 2011, dbANGIO includes 362 entries from 83 research articles encompassing 154 unique genetic variations from 39 genes investigated in several solid and hematological cancers. By curating the literature findings and making them freely available to researchers, dbANGIO will expedite the research on genetic factors from the angiogenesis pathway and will assist in their utility in clinical management of cancer patients. dbANGIO is freely available for non-profit institutions at http://www.med.mun.ca/angio.",2011-12-12 +22285830,A co-module approach for elucidating drug-disease associations and revealing their molecular basis.,"

Motivation

Understanding how drugs and diseases are associated in the molecular level is of critical importance to unveil disease mechanisms and treatments. Until recently, few studies attempt end to discover important gene modules shared by both drugs and diseases.

Results

Here, we propose a novel presentation of drug-gene-disease relationship, a 'co-module', which is characterized by closely related drugs, diseases and genes. We first define a network-based gene closeness profile to relate drug to disease. Then, we develop a Bayesian partition method to identify drug-gene-disease co-modules underlying the gene closeness data. Genes share similar notable patterns with respect not only to the drugs but also the diseases within a co-module. Simulations show that our method, comCIPHER, achieves a better performance compared with a popular co-module detection method, PPA. We apply comCIPHER to a set consisting of 723 drugs, 275 diseases and 1442 genes and demonstrate that our co-module approach is able to identify new drug-disease associations and highlight their molecular basis. Disease co-morbidity emerges as well. Three co-modules are further illustrated in which new drug applications, including the anti-cancer metastasis activity of an anti-asthma drug Pranlukast, and a cardiovascular stress-testing agent Arbutamine for obesity, as well as potential side-effects, e.g. hypotension for Triamterene, are computationally identified.

Availability

The compiled version of comCIPHER can be found at http://bioinfo.au.tsinghua.edu.cn/comCIPHER/. The 86 co-modules can be downloaded from http://bioinfo.au.tsinghua.edu.cn/comCIPHER/Co_Module_Results.zip.

Contact

shaoli@mail.tsinghua.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-28 +22355238,Gene-chromosome locations of neuropsychiatric diseases.,

Unlabelled

A number of genes are involved in various neuropsychiatric disorders. A comprehensive compilation of these genes is important for a better understanding of these diseases. We report an online file that lists genes by chromosome number and location. This is useful for the rapid examination of chromosome bands for genes involved in these diseases. This is not an exhaustive list and does not include single nucleotide polymorphism (SNP) results for genes that are currently being examined by genome wide association studies (GWAS) and other molecular methodologies.

Availability

The database is available for free at http://www.bioinformation.net/007/paul.xls.,2011-12-10 +23759396,Methodology for quantifying fasting glucose homeostasis in type 2 diabetes: observed variability and lability.,"

Background

Increased glycemic variability is associated with an increase risk of adverse clinical outcomes in diabetes. Central to the understanding of diabetes is glucose homeostasis. ""Good"" homeostasis is equated to low glycemic variability, and ""poor"" homeostasis is linked to greater glycemic variability. We have, therefore, developed a method with the aim to objectively quantify the domain of glucose-insulin homeostasis. We have termed this method as Observed Variability And Lability (OVAL).

Method

Blood samples for the measurement of glucose and insulin concentrations were acquired every 2 min for 120 min from 12 patients with type 2 diabetes mellitus [T2DM; median (range) age 35 (25-47) years and duration of diabetes 7 (2-9) years receiving oral hypoglycemic treatment] and 27 controls [aged 38(30-53) years] with an equal split of genders and equal distribution of body mass indexes. The insulin-glucose time variant data form the boundaries of OVAL, defined as the ellipse enclosing the 95% confidence intervals of the insulin and glucose concentrations plotted on an x-y scatter graph and normalized to ensure equal weighting of insulin and glucose.

Results

Less precise OVAL homeostasis was observed in subjects with T2DM, by a factor of 4, in comparison with controls [OVAL, T2DM 7.8(3.8) versus controls 1.9(1.0); p = .0003]. The assessment remained statistically robust (p < .001) with increased sampling intervals up to 8 min.

Conclusion

The OVAL model is a robust method for measuring glucose-insulin homeostasis in controls and T2DM subjects (available online at http://www.oval-calc.co.uk). Deranged glucose-insulin homeostasis is the hallmark of diabetes and OVAL has the capacity to quantify in the fasting state.",2013-05-01 +23630177,Intervention in gene regulatory networks with maximal phenotype alteration.,"

Motivation

A basic issue for translational genomics is to model gene interaction via gene regulatory networks (GRNs) and thereby provide an informatics environment to study the effects of intervention (say, via drugs) and to derive effective intervention strategies. Taking the view that the phenotype is characterized by the long-run behavior (steady-state distribution) of the network, we desire interventions to optimally move the probability mass from undesirable to desirable states Heretofore, two external control approaches have been taken to shift the steady-state mass of a GRN: (i) use a user-defined cost function for which desirable shift of the steady-state mass is a by-product and (ii) use heuristics to design a greedy algorithm. Neither approach provides an optimal control policy relative to long-run behavior.

Results

We use a linear programming approach to optimally shift the steady-state mass from undesirable to desirable states, i.e. optimization is directly based on the amount of shift and therefore must outperform previously proposed methods. Moreover, the same basic linear programming structure is used for both unconstrained and constrained optimization, where in the latter case, constraints on the optimization limit the amount of mass that may be shifted to 'ambiguous' states, these being states that are not directly undesirable relative to the pathology of interest but which bear some perceived risk. We apply the method to probabilistic Boolean networks, but the theory applies to any Markovian GRN.

Availability

Supplementary materials, including the simulation results, MATLAB source code and description of suboptimal methods are available at http://gsp.tamu.edu/Publications/supplementary/yousefi13b.

Contact

edward@ece.tamu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-29 +22412877,OneG: a computational tool for predicting cryptic intermediates in the unfolding kinetics of proteins under native conditions.,"Understanding the relationships between conformations of proteins and their stabilities is one key to address the protein folding paradigm. The free energy change (ΔG) of unfolding reactions of proteins is measured by traditional denaturation methods and native hydrogen-deuterium (H/D) exchange methods. However, the free energy of unfolding (ΔG(U)) and the free energy of exchange (ΔG(HX)) of proteins are not in good agreement, though the experimental conditions of both methods are well matching to each other. The anomaly is due to any one or combinations of the following reasons: (i) effects of cis-trans proline isomerisation under equilibrium unfolding reactions of proteins (ii) inappropriateness in accounting the baselines of melting curves (iii) presence of cryptic intermediates, which may elude the melting curve analysis and (iv) existence of higher energy metastable states in the H/D exchange reactions of proteins. Herein, we have developed a novel computational tool, OneG, which accounts the discrepancy between ΔG(U) and ΔG(HX) of proteins by systematically accounting all the four factors mentioned above. The program is fully automated and requires four inputs: three-dimensional structures of proteins, ΔG(U), ΔG(U)(*) and residue-specific ΔG(HX) determined under EX2-exchange conditions in the absence of denaturants. The robustness of the program has been validated using experimental data available for proteins such as cytochrome c and apocytochrome b(562) and the data analyses revealed that cryptic intermediates of the proteins detected by the experimental methods and the cryptic intermediates predicted by the OneG for those proteins were in good agreement. Furthermore, using OneG, we have shown possible existence of cryptic intermediates and metastable states in the unfolding pathways of cardiotoxin III and cobrotoxin, respectively, which are homologous proteins. The unique application of the program to map the unfolding pathways of proteins under native conditions have been brought into fore and the program is publicly available at http://sblab.sastra.edu/oneg.html.",2012-03-07 +21999478,The Enzyme Function Initiative.,"The Enzyme Function Initiative (EFI) was recently established to address the challenge of assigning reliable functions to enzymes discovered in bacterial genome projects; in this Current Topic, we review the structure and operations of the EFI. The EFI includes the Superfamily/Genome, Protein, Structure, Computation, and Data/Dissemination Cores that provide the infrastructure for reliably predicting the in vitro functions of unknown enzymes. The initial targets for functional assignment are selected from five functionally diverse superfamilies (amidohydrolase, enolase, glutathione transferase, haloalkanoic acid dehalogenase, and isoprenoid synthase), with five superfamily specific Bridging Projects experimentally testing the predicted in vitro enzymatic activities. The EFI also includes the Microbiology Core that evaluates the in vivo context of in vitro enzymatic functions and confirms the functional predictions of the EFI. The deliverables of the EFI to the scientific community include (1) development of a large-scale, multidisciplinary sequence/structure-based strategy for functional assignment of unknown enzymes discovered in genome projects (target selection, protein production, structure determination, computation, experimental enzymology, microbiology, and structure-based annotation), (2) dissemination of the strategy to the community via publications, collaborations, workshops, and symposia, (3) computational and bioinformatic tools for using the strategy, (4) provision of experimental protocols and/or reagents for enzyme production and characterization, and (5) dissemination of data via the EFI's Website, http://enzymefunction.org. The realization of multidisciplinary strategies for functional assignment will begin to define the full metabolic diversity that exists in nature and will impact basic biochemical and evolutionary understanding, as well as a wide range of applications of central importance to industrial, medicinal, and pharmaceutical efforts.",2011-10-26 +22160653,vHoT: a database for predicting interspecies interactions between viral microRNA and host genomes.,"Some viruses have been reported to transcribe microRNAs, implying complex relationships between the host and the pathogen at the post-transcriptional level through microRNAs in virus-infected cells. Although many computational algorithms have been developed for microRNA target prediction, few have been designed exclusively to find cellular or viral mRNA targets of viral microRNAs in a user-friendly manner. To address this, we introduce the viral microRNA host target (vHoT) database for predicting interspecies interactions between viral microRNA and host genomes. vHoT supports target prediction of 271 viral microRNAs from human, mouse, rat, rhesus monkey, cow, and virus genomes. vHoT is freely available at http://dna.korea.ac.kr/vhot.",2011-12-08 +22796954,Gene and translation initiation site prediction in metagenomic sequences.,"

Motivation

Gene prediction in metagenomic sequences remains a difficult problem. Current sequencing technologies do not achieve sufficient coverage to assemble the individual genomes in a typical sample; consequently, sequencing runs produce a large number of short sequences whose exact origin is unknown. Since these sequences are usually smaller than the average length of a gene, algorithms must make predictions based on very little data.

Results

We present MetaProdigal, a metagenomic version of the gene prediction program Prodigal, that can identify genes in short, anonymous coding sequences with a high degree of accuracy. The novel value of the method consists of enhanced translation initiation site identification, ability to identify sequences that use alternate genetic codes and confidence values for each gene call. We compare the results of MetaProdigal with other methods and conclude with a discussion of future improvements.

Availability

The Prodigal software is freely available under the General Public License from http://code.google.com/p/prodigal/.",2012-07-12 +22291602,Inference of population structure using dense haplotype data.,"The advent of genome-wide dense variation data provides an opportunity to investigate ancestry in unprecedented detail, but presents new statistical challenges. We propose a novel inference framework that aims to efficiently capture information on population structure provided by patterns of haplotype similarity. Each individual in a sample is considered in turn as a recipient, whose chromosomes are reconstructed using chunks of DNA donated by the other individuals. Results of this ""chromosome painting"" can be summarized as a ""coancestry matrix,"" which directly reveals key information about ancestral relationships among individuals. If markers are viewed as independent, we show that this matrix almost completely captures the information used by both standard Principal Components Analysis (PCA) and model-based approaches such as STRUCTURE in a unified manner. Furthermore, when markers are in linkage disequilibrium, the matrix combines information across successive markers to increase the ability to discern fine-scale population structure using PCA. In parallel, we have developed an efficient model-based approach to identify discrete populations using this matrix, which offers advantages over PCA in terms of interpretability and over existing clustering algorithms in terms of speed, number of separable populations, and sensitivity to subtle population structure. We analyse Human Genome Diversity Panel data for 938 individuals and 641,000 markers, and we identify 226 populations reflecting differences on continental, regional, local, and family scales. We present multiple lines of evidence that, while many methods capture similar information among strongly differentiated groups, more subtle population structure in human populations is consistently present at a much finer level than currently available geographic labels and is only captured by the haplotype-based approach. The software used for this article, ChromoPainter and fineSTRUCTURE, is available from http://www.paintmychromosomes.com/.",2012-01-26 +22844732,[Prevention and treatment of atherosclerosis and cardiovascular diseases].,"Atherosclerosis is one of the main causes of morbidity and mortality world-wide and specifically in Israel. These guidelines update the previous guidelines of the Israeli Society for Research, Prevention and Treatment of Atherosclerosis, published in 2005. The need for an update is based on new scientific data published in recent years necessitating changes in the recommendations for preventing and treating atherosclerosis. These guidelines were written in collaboration between all the societies outlined here and the content of this statement was approved by the delegates of these societies. The recommendations were written taking into consideration guidelines published by other international medical societies and also the specific needs of the Israeli medical system. Due to limitations of space, in the current paper we present: assessment of cardiovascular risk, smoking cessation and the treatment of dyslipidemia. Other sections including: recommendations to the general population, nutritional and physical activity recommendations, treatment of hypertension, prevention of ischemic stroke and the metabolic syndrome are available at http://www.ima.org.il/harefuah.",2012-05-01 +22155303,Functional and structural characterization of a new serine protease with thrombin-like activity TLBan from Bothrops andianus (Andean Lancehead) snake venom.,"A new serine protease with thrombin-like activity (TLBan) from Bothrops andianus (Andean Lancehead) was isolated in two chromatographic steps in LC molecular exclusion and reverse phase-HPLC. TLBan is a glycoprotein that contains both N-linked carbohydrates and sialic acid in its structure, with Mr ∼29 kDa under reducing conditions and non-reducing ∼25 kDa conditions and confirmed by MALDI-TOF mass spectrometry (25,835.65 Da) and exhibited high specificity for BAρNA, Michaelis-Menten behavior with Km 5.4 × 10(-1) M and the V(max) 7.9 × 10(-1) nmoles ρ-NA/L/min for this substrate and high stability when was analyzed at different temperatures (25 to 60 °C), pHs (4.0 to 8.0), was inhibited by soybean trypsin inhibitor, EDTA and phenylmethylsulfonyl fluoride (PMSF). The total amino acid sequence was obtained through sequencing of selected tryptic peptides and by inference obtained using SwissProt database http://br.expasy.org/ with the search restricted to serine proteases from Crotalinae snakes and show high amino acid sequence identity with other serine proteases from snake venom. TLBan showed the presence of His(44), Asp(91) residues and Ser was deduced (187) position, in the corresponding positions to the catalytic triad established in the serine proteases and Ser(187) are inhibited by phenylmethylsulfonyl fluoride (PMSF). In this work, we investigated the ability of TLBan to degrade fibrinogen and we observed that it is able to cause α- and β-chain cleavage. Enzymatic activities as well as the platelet aggregation were strongly inhibited when were incubated with PMSF, a specific inhibitor of serine protease. TLBan showed a potential medical-scientific interest to understand the pathophysiological mechanism of the snake venom action and identification of new blood coagulation cascade acting enzymes of natural sources.",2011-12-06 +23236419,Drug cocktail optimization in chemotherapy of cancer.,"

Background

In general, drug metabolism has to be considered to avoid adverse effects and ineffective therapy. In particular, chemotherapeutic drug cocktails strain drug metabolizing enzymes especially the cytochrome P450 family (CYP). Furthermore, a number of important chemotherapeutic drugs such as cyclophosphamide, ifosfamide, tamoxifen or procarbazine are administered as prodrugs and have to be activated by CYP. Therefore, the genetic variability of these enzymes should be taken into account to design appropriate therapeutic regimens to avoid inadequate drug administration, toxicity and inefficiency.

Objective

The aim of this work was to find drug interactions and to avoid side effects or ineffective therapy in chemotherapy.

Data sources and methods

Information on drug administration in the therapy of leukemia and their drug metabolism was collected from scientific literature and various web resources. We carried out an automated textmining approach. Abstracts of PubMed were filtered for relevant articles using specific keywords. Abstracts were automatically screened for antineoplastic drugs and their synonyms in combination with a set of human CYPs in title or abstract.

Results

We present a comprehensive analysis of over 100 common cancer treatment regimens regarding drug-drug interactions and present alternatives avoiding CYP overload. Typical concomitant medication, e.g. antiemetics or antibiotics is a preferred subject to improvement. A webtool, which allows drug cocktail optimization was developed and is publicly available on http://bioinformatics.charite.de/chemotherapy.",2012-12-07 +22144685,The 2012 Nucleic Acids Research Database Issue and the online Molecular Biology Database Collection.,"The 19th annual Database Issue of Nucleic Acids Research features descriptions of 92 new online databases covering various areas of molecular biology and 100 papers describing recent updates to the databases previously described in NAR and other journals. The highlights of this issue include, among others, a description of neXtProt, a knowledgebase on human proteins; a detailed explanation of the principles behind the NCBI Taxonomy Database; NCBI and EBI papers on the recently launched BioSample databases that store sample information for a variety of database resources; descriptions of the recent developments in the Gene Ontology and UniProt Gene Ontology Annotation projects; updates on Pfam, SMART and InterPro domain databases; update papers on KEGG and TAIR, two universally acclaimed databases that face an uncertain future; and a separate section with 10 wiki-based databases, introduced in an accompanying editorial. The NAR online Molecular Biology Database Collection, available at http://www.oxfordjournals.org/nar/database/a/, has been updated and now lists 1380 databases. Brief machine-readable descriptions of the databases featured in this issue, according to the BioDBcore standards, will be provided at the http://biosharing.org/biodbcore web site. The full content of the Database Issue is freely available online on the Nucleic Acids Research web site (http://nar.oxfordjournals.org/).",2011-12-05 +22668790,Bayesian model-based clustering of temporal gene expression using autoregressive panel data approach.,"

Motivation

In a microarray time series analysis, due to the large number of genes evaluated, the first step toward understanding the complex time network is the clustering of genes that share similar expression patterns over time. Up until now, the proposed methods do not point simultaneously to the temporal autocorrelation of the gene expression and the model-based clustering. We present a Bayesian method that considers jointly the fit of autoregressive panel data models and hierarchical gene clustering.

Results

The proposed methodology was able to cluster genes that share similar expression over time, which was determined jointly by the estimates of autoregression parameters, by the average level of expression) and by the quality of the fitted model.

Availability and implementation

The R codes for implementation of the proposed clustering method and for simulation study, as well as the real and simulated datasets, are freely accessible on the Web http://www.det.ufv.br/~moyses/links.php.

Contact

moysesnascim@ufv.br.",2012-06-04 +21733130,Estimating the location and shape of hybrid zones.,We propose a new model to make use of georeferenced genetic data for inferring the location and shape of a hybrid zone. The model output includes the posterior distribution of a parameter that quantifies the width of the hybrid zone. The model proposed is implemented in the GUI and command-line versions of the Geneland program versions ≥ 3.3.0. Information about the program can be found on http://www2.imm.dtu.dk/gigu/Geneland/.,2011-07-07 +23228330,Impact of mutations on the allosteric conformational equilibrium.,"Allostery in a protein involves effector binding at an allosteric site that changes the structure and/or dynamics at a distant, functional site. In addition to the chemical equilibrium of ligand binding, allostery involves a conformational equilibrium between one protein substate that binds the effector and a second substate that less strongly binds the effector. We run molecular dynamics simulations using simple, smooth energy landscapes to sample specific ligand-induced conformational transitions, as defined by the effector-bound and effector-unbound protein structures. These simulations can be performed using our web server (http://salilab.org/allosmod/). We then develop a set of features to analyze the simulations and capture the relevant thermodynamic properties of the allosteric conformational equilibrium. These features are based on molecular mechanics energy functions, stereochemical effects, and structural/dynamic coupling between sites. Using a machine-learning algorithm on a data set of 10 proteins and 179 mutations, we predict both the magnitude and the sign of the allosteric conformational equilibrium shift by the mutation; the impact of a large identifiable fraction of the mutations can be predicted with an average unsigned error of 1k(B)T. With similar accuracy, we predict the mutation effects for an 11th protein that was omitted from the initial training and testing of the machine-learning algorithm. We also assess which calculated thermodynamic properties contribute most to the accuracy of the prediction.",2012-12-07 +22377270,All Your Base: a fast and accurate probabilistic approach to base calling.,"The accuracy of base calls produced by Illumina sequencers is adversely affected by several processes, with laser cross-talk and cluster phasing being prominent. We introduce an explicit statistical model of the sequencing process that generalizes current models of phasing and cross-talk and forms the basis of a base calling method which improves on the best existing base callers, especially when comparing the number of error-free reads. The novel algorithms implemented in All Your Base (AYB) are comparable in speed to other competitive base-calling methods, do not require training data and are designed to be robust to gross errors, producing sensible results where other techniques struggle. AYB is available at http://www.ebi.ac.uk/goldman-srv/AYB/.",2012-02-29 +22809383,P.R.E.S.S.--an R-package for exploring residual-level protein structural statistics.,"P.R.E.S.S. is an R-package developed to allow researchers to get access to and manipulate a large set of statistical data on protein residue-level structural properties such as residue-level virtual bond lengths, virtual bond angles, and virtual torsion angles. A large set of high-resolution protein structures is downloaded and surveyed. Their residue-level structural properties are calculated and documented. The statistical distributions and correlations of these properties can be queried and displayed. Tools are also provided for modeling and analyzing a given structure in terms of its residue-level structural properties. In particular, new tools for computing residue-level statistical potentials and displaying residue-level Ramachandran-like plots are developed for structural analysis and refinement. P.R.E.S.S. has been released in R as an open source software package, with a user-friendly GUI, accessible and executable by a public user in any R environment. P.R.E.S.S. can also be downloaded directly at http://www.math.iastate.edu/press/.",2012-06-01 +23058674,PAAQD: Predicting immunogenicity of MHC class I binding peptides using amino acid pairwise contact potentials and quantum topological molecular similarity descriptors.,"Prediction of peptide immunogenicity is a promising approach for novel vaccine discovery. Conventionally, epitope prediction methods have been developed to accelerate the process of vaccine production by searching for candidate peptides from pathogenic proteins. However, recent studies revealed that peptides with high binding affinity to major histocompatibility complex molecules (MHCs) do not always result in high immunogenicity. Therefore, it is promising to predict the peptide immunogenicity rather than epitopes in order to discover new vaccines more effectively. To this end, we developed a novel T-cell reactivity predictor which we call PAAQD. Nonapeptides were encoded numerically, using combining information of amino acid pairwise contact potentials (AAPPs) and quantum topological molecular similarity (QTMS) descriptors. Encoded data were used in the construction of our classification model. Our numerical experiments suggested that the predictive performance of PAAQD is at least comparable with POPISK, one of the pioneering techniques for T-cell reactivity prediction. Also, our experiment suggested that the first and eighth positions of nonapeptides are the most important for immunogenicity and most of the anchor residues in epitope prediction were not important in T-cell reactivity prediction. The R implementation of PAAQD is available at http://pirun.ku.ac.th/~fsciiok/PAAQD.rar.",2012-10-09 +21880702,PepCrawler: a fast RRT-based algorithm for high-resolution refinement and binding affinity estimation of peptide inhibitors.,"

Motivation

Design of protein-protein interaction (PPI) inhibitors is a key challenge in structural bioinformatics and computer-aided drug design. Peptides, which partially mimic the interface area of one of the interacting proteins, are natural candidates to form protein-peptide complexes competing with the original PPI. The prediction of such complexes is especially challenging due to the high flexibility of peptide conformations.

Results

In this article, we present PepCrawler, a new tool for deriving binding peptides from protein-protein complexes and prediction of peptide-protein complexes, by performing high-resolution docking refinement and estimation of binding affinity. By using a fast path planning approach, PepCrawler rapidly generates large amounts of flexible peptide conformations, allowing backbone and side chain flexibility. A newly introduced binding energy funnel 'steepness score' was applied for the evaluation of the protein-peptide complexes binding affinity. PepCrawler simulations predicted high binding affinity for native protein-peptide complexes benchmark and low affinity for low-energy decoy complexes. In three cases, where wet lab data are available, the PepCrawler predictions were consistent with the data. Comparing to other state of the art flexible peptide-protein structure prediction algorithms, our algorithm is very fast, and takes only minutes to run on a single PC.

Availability

http://bioinfo3d.cs.tau.ac.il/PepCrawler/

Contact

eladdons@tau.ac.il; wolfson@tau.ac.il.",2011-08-31 +20623657,Accurate estimation of solvation free energy using polynomial fitting techniques.,"This report details an approach to improve the accuracy of free energy difference estimates using thermodynamic integration data (slope of the free energy with respect to the switching variable λ) and its application to calculating solvation free energy. The central idea is to utilize polynomial fitting schemes to approximate the thermodynamic integration data to improve the accuracy of the free energy difference estimates. Previously, we introduced the use of polynomial regression technique to fit thermodynamic integration data (Shyu and Ytreberg, J Comput Chem, 2009, 30, 2297). In this report we introduce polynomial and spline interpolation techniques. Two systems with analytically solvable relative free energies are used to test the accuracy of the interpolation approach. We also use both interpolation and regression methods to determine a small molecule solvation free energy. Our simulations show that, using such polynomial techniques and nonequidistant λ values, the solvation free energy can be estimated with high accuracy without using soft-core scaling and separate simulations for Lennard-Jones and partial charges. The results from our study suggest that these polynomial techniques, especially with use of nonequidistant λ values, improve the accuracy for ΔF estimates without demanding additional simulations. We also provide general guidelines for use of polynomial fitting to estimate free energy. To allow researchers to immediately utilize these methods, free software and documentation is provided via http://www.phys.uidaho.edu/ytreberg/software.",2011-01-01 +21774475,Integrated decision support for assessing chemical liabilities.,"Chemical liabilities, such as adverse effects and toxicity, have a major impact on today's drug discovery process. In silico prediction of chemical liabilities is an important approach which can reduce costs and animal testing by complementing or replacing in vitro and in vivo liability models. There is a lack of integrated, extensible decision support systems for chemical liability assessment which run quickly and have easily interpretable results. Here we present a method which integrates similarity searches, structural alerts, and QSAR models which all are available from the Bioclipse workbench. Emphasis has been placed on interpretation of results, and substructures which are important for predictions are highlighted in the original chemical structures. This allows for interactively changing chemical structures with instant visual feedback and can be used for hypothesis testing of single chemical structures as well as compound collections. The system has a clear separation between methods and data, and the extensible architecture enables straightforward extension via addition of more plugins (such as new data sets and computational models). We demonstrate our method on three important safety end points: mutagenicity, carcinogenicity, and aryl hydrocarbon receptor (AhR) activation. Bioclipse and the decision support implementation are free, open source, and available from http://www.bioclipse.net/decision-support .",2011-08-05 +23945724,"Efficacy and safety of new complementary feeding guidelines with an emphasis on red meat consumption: a randomized trial in Bogota, Colombia.","

Background

Iron deficiency and poor linear growth are common in infants from deprived socioeconomic backgrounds and may be associated with inadequate complementary feeding (CF) practices.

Objective

We tested the hypothesis that new CF guidelines emphasizing meat as a source of iron and zinc would improve linear growth, iron, and zinc status in infants living in poor socioeconomic circumstances in Bogota, Colombia.

Design

A total of 85 term infants who were exclusively breastfed for ≥4 mo were randomly assigned at 6 mo of age to a control group [CG (n = 43); current advice] or intervention group (new guidelines group [NGG (n = 42); with counseling to 1) continue breastfeeding, 2) offer red meat ≥3 d/wk, and 3) offer fruit and vegetables daily]). Main outcomes were 1) linear growth from 6 to 12 mo of age; 2) hemoglobin, hematocrit, iron [serum ferritin (SF)], and zinc status at 12 mo of age; and 3) meat intake at 12 mo of age (by using a food-frequency questionnaire).

Results

A total of 38 infants/group provided data at 12 mo of age. NGG infants had significantly higher red meat intake [mean ± SD: 5.4 ± 1.8 compared with 3.5 ± 1.7 d/wk at 12 mo of age; P < 0.001), higher hemoglobin and hematocrit at 12 mo of age, and a significantly greater increase in hemoglobin (mean ± SD change: 0.41 ± 0.8 compared with -0.13 ± 1.0; P = 0.01) and hematocrit (1.04 ± 2.2 compared with -0.15 ± 2.4; P = 0.03) from 6 to 12 mo of age than those in CG infants. There were no significant differences in linear growth from 6 to 12 mo of age or in SF or zinc.

Conclusions

The new guidelines showed efficacy with higher red meat intake and positive effects on hemoglobin and hematocrit. The intervention was acceptable and affordable for most mothers. These preliminary results suggest that the intervention merits investigation in a larger cohort with longer-term follow-up. This trial was registered at http://isrctn.org as ISRCTN57733004.",2013-08-14 +23044547,Robust design of microbial strains.,"

Motivation

Metabolic engineering algorithms provide means to optimize a biological process leading to the improvement of a biotechnological interesting molecule. Therefore, it is important to understand how to act in a metabolic pathway in order to have the best results in terms of productions. In this work, we present a computational framework that searches for optimal and robust microbial strains that are able to produce target molecules. Our framework performs three tasks: it evaluates the parameter sensitivity of the microbial model, searches for the optimal genetic or fluxes design and finally calculates the robustness of the microbial strains. We are capable to combine the exploration of species, reactions, pathways and knockout parameter spaces with the Pareto-optimality principle.

Results

Our framework provides also theoretical and practical guidelines for design automation. The statistical cross comparison of our new optimization procedures, performed with respect to currently widely used algorithms for bacteria (e.g. Escherichia coli) over different multiple functions, reveals good performances over a variety of biotechnological products.

Availability

http://www.dmi.unict.it/nicosia/pathDesign.html.

Contact

nicosia@dmi.unict.it or pl219@cam.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-07 +30731981,First Report of Gray Mold Caused by Botrytis cinerea on Yellow Cosmos (Bidens sulphurea) in Brazil.,"Bidens sulphurea (synonym Cosmos sulphureus) (Asteraceae), commonly known as yellow cosmos, is a native herbaceous species from Mexico that is widely used as an ornamental. It has been introduced in Brazil and has escaped from gardens, becoming a minor weed in ruderal, crop and pasture areas (2). In June 2010, groups of B. sulphurea individuals were found in a garden at the locality of Piúna, municipality of Viçosa (state of Minas Gerais, Brazil), that were severely attacked by gray mold. The disease led to flower rot with dieback of infected peduncles and stems. Plant tissues became brown to grayish brown and were covered by extensive fungal sporulation; in addition, seeds were colonized and destroyed by the fungus. A hyphomycete was regularly found associated with the diseased flowers, which was readily recognized as having a morphology typical of Botrytis cinerea: conidiophores solitary, cylindrical, terminally branched, 15 to 20 μm wide, grayish to olivaceous gray, and smooth; conidiogenous cells polyblastic, subcylindrical to ampulliform, and 120 to 230 × (13-) 14 to 16 (-19) μm; conidia ellipsoid to obovoid, 8 to 12 × 6.5 to 8 (-9) μm, with a discrete hilum at the base, 1 to 2 μm, aseptate, and hyaline. The fungus was isolated in pure culture and inoculation of one isolate on healthy B. suphurea individuals was carried out with a 2.14 × 106 conidia/ml suspension, which was sprayed to runoff onto three plants bearing four to six inflorescences. All plants were left in a moist chamber for 48 h and later transferred to a bench in a greenhouse at 21 ± 3°C. Gray mold symptoms appeared after 10 days that led to rapid and complete necrosis of flowers and peduncles. Infection first appeared on the flowers but progressed downward, leading to top dieback and finally plant death (not seen in the field). Only Botrytis cinerea was obtained in isolations from diseased flowers, demonstrating the pathogenicity of the fungus. A representative sample was deposited in the UFV herbarium (VIC 31602). The only other record of Botrytis cinerea causing gray mold of B. sulphurea is from China (1,3). To our knowledge, this is the first record of Botrytis cinerea causing gray mold on B. sulphurea in Brazil. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Botany and Mycology Laboratory, ARS, UDSA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 2011. (2) H. Lorenzi and H. M. Souza. Plantas Ornamentais no Brasil. Plantarum, Nova Odessa, Brazil. 1995. (3) Z. Zhang. Flora Fungorum Sinicorum. Vol. 26. Botrytis, Ramularia. Science Press, Beijing, China. 2006.",2011-12-01 +30732007,First Report of Powdery Mildew Caused by Golovinomyces biocellatus on Monarda didyma in Korea.,"Bergamot (Monarda didyma L.), which is native to eastern North America, is an aromatic herb in the family Lamiaceae. It is widely planted in gardens and parks for ornamental purposes and also grown indoors as a fragrant houseplant in Korea. In October 2007, several dozen bergamots planted outdoors in Bonghwa, Korea were found to be heavily infected with a powdery mildew. Symptoms first appeared as thin, white colonies, which subsequently developed into abundant growth on both sides of the leaves. Severe infections often caused leaf distortions and premature senescence. The same symptoms have also been found in bergamot plots in Osan, Suwon, Incheon, and Seoul from 2007 to 2011. Voucher specimens were deposited at Korea University, Seoul, Korea. Hyphae were septate, branched, and 4 to 8 μm wide. Appressoria on the mycelium were nipple shaped. Conidiophores arose from the lateral part of the hyphae, measured 100 to 180 × 10 to 12 μm, were simple, and produced two to four immature conidia in chains, followed by two to three cells. Conidia were hyaline, ellipsoid to barrel shaped, measured 28 to 40 × 16 to 20 μm (length/width ratio = 1.4 to 2.2), lacked distinct fibrosin bodies, and produced germ tubes on the subterminal position, with reticulate wrinkling of the outer walls. No chasmothecia were observed. The structures described above were typical of the Oidium subgenus Reticuloidium anamorph of the genus Golovinomyces, and the fungus measurements were compatible with those of Golovinomyces biocellatus (Ehrenb.) V.P. Heluta as described previously (1,4). The only other powdery mildew known on Monarda spp. is Neoerysiphe galeopsidis (1), which is clearly distinguished by its lobed hyphal appressoria and fine striations on conidial surfaces. To confirm the tentative identification based on morphological characteristics, internal transcribed spacer (ITS) rDNA sequences from two representative isolates (KUS-F23070 and F23117) were obtained using primers ITS5 and P3 as described by Takamatsu et al. (3). The resulting sequences of 523 bp were deposited in GenBank (Accession Nos. JN228358 and JN228359). A GenBank BLAST search produced an exact match for the sequences of G. biocellatus on several plants belonging to the Lamiaceae, with a 100% sequence similarity. Pathogenicity was confirmed through inoculation by gently pressing diseased leaves onto leaves of five healthy potted bergamot plants. Five noninoculated plants served as controls. Plants were maintained in a greenhouse at 25 ± 2°C. Inoculated plants developed signs and symptoms after 6 days, whereas the control plants remained healthy. The fungus present on the inoculated plants was morphologically identical to that originally observed on diseased plants. The powdery mildew infections of bergamot plants associated with G. biocellatus have been known in Europe (2) and Japan (4). The current work confirmed the occurrence of G. biocellatus infecting M. didyma in Korea. References: (1) U. Braun. Beih. Nova Hedw. 89:1, 1987. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory. ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , July 12, 2011, (3) S. Takamatsu et al. Mycol. Res. 113:117, 2009. (4) S. Tanda. J. Agric. Sci. Tokyo Agric. Univ. 47:274, 1997.",2011-12-01 +22135290,PoSSuM: a database of similar protein-ligand binding and putative pockets.,"Numerous potential ligand-binding sites are available today, along with hundreds of thousands of known binding sites observed in the PDB. Exhaustive similarity search for such vastly numerous binding site pairs is useful to predict protein functions and to enable rapid screening of target proteins for drug design. Existing databases of ligand-binding sites offer databases of limited scale. For example, SitesBase covers only ~33,000 known binding sites. Inferring protein function and drug discovery purposes, however, demands a much more comprehensive database including known and putative-binding sites. Using a novel algorithm, we conducted a large-scale all-pairs similarity search for 1.8 million known and potential binding sites in the PDB, and discovered over 14 million similar pairs of binding sites. Here, we present the results as a relational database Pocket Similarity Search using Multiple-sketches (PoSSuM) including all the discovered pairs with annotations of various types. PoSSuM enables rapid exploration of similar binding sites among structures with different global folds as well as similar ones. Moreover, PoSSuM is useful for predicting the binding ligand for unbound structures, which provides important clues for characterizing protein structures with unclear functions. The PoSSuM database is freely available at http://possum.cbrc.jp/PoSSuM/.",2011-12-01 +22135305,DOMMINO: a database of macromolecular interactions.,"With the growing number of experimentally resolved structures of macromolecular complexes, it becomes clear that the interactions that involve protein structures are mediated not only by the protein domains, but also by various non-structured regions, such as interdomain linkers, or terminal sequences. Here, we present DOMMINO (http://dommino.org), a comprehensive database of macromolecular interactions that includes the interactions between protein domains, interdomain linkers, N- and C-terminal regions and protein peptides. The database complements SCOP domain annotations with domain predictions by SUPERFAMILY and is automatically updated every week. The database interface is designed to provide the user with a three-stage pipeline to study macromolecular interactions: (i) a flexible search that can include a PDB ID, type of interaction, SCOP family of interacting proteins, organism name, interaction keyword and a minimal threshold on the number of contact pairs; (ii) visualization of subunit interaction network, where the user can investigate the types of interactions within a macromolecular assembly; and (iii) visualization of an interface structure between any pair of the interacting subunits, where the user can highlight several different types of residues within the interfaces as well as study the structure of the corresponding binary complex of subunits.",2011-12-01 +30731994,Outbreaks of Smut Caused by Tilletia maclaganii on Switchgrass in New York and Pennsylvania.,"Switchgrass (Panicum virgatum L.) is a native perennial grass with potential as a biofuel crop. The smut fungus, Tilletia maclaganii (Berk.) Clint., is associated with significant biomass reduction in switchgrass in the Midwest (4), but has not been reported in the northeast United States in more than 60 years (New York in 1890 and Pennsylvania in 1946) (2,3). From 2007 to 2010, smutted panicles were observed on the majority of plants in stands of several switchgrass cultivars at the USDA-NRCS Plant Materials Center in Big Flats (Chemung County), NY; in production fields of several switchgrass cultivars near Meadville (Crawford County), PA; and in an ornamental bed of switchgrass in Ithaca (Tompkins County), NY. Smutted panicles emerged 3 to 4 weeks prior to healthy panicles, had a compact, club-shaped appearance, and enlarged florets with swollen ovaries that readily released a powdery mass of odorless, rusty orange-to-dark brown teliospores when pinched. The entire caryopsis of every floret within a panicle was smutted and the infected plants appeared stunted, indicative of systemic infection. The fungus from each location was identified as T. maclaganii based on host, habit, and teliospore morphology (3). Teliospores were pale yellowish brown to reddish brown, varied from globose to slightly irregular in shape, and averaged 21 μm (18 to 25 μm) in diameter. The exospore was thick (2 to 3 μm), finely verrucose, and no sheath was present. True sterile cells, pale yellow and 10 to 18 μm in diameter, were sparsely present. Teliospores germinated and formed large (40 to 60 × 3 to 6 μm), nonconjugating basidiospores within 20 h on 2% water agar (WA). Occasionally, we also found the floret-infecting species T. pulcherrima (1) on switchgrass at very low incidence in Big Flats, NY, but it was easily distinguished from T. maclaganii. Stratified seeds (3 g) of 'Shelter', washed and found to be free of teliospores, were dusted with 0.04 g of teliospores of T. maclaganii isolate Tm001NY09 (Cornell Plant Pathology Herbarium Accession CUP-67931) harvested from infected 'Shelter' in Big Flats, NY in 2009. Inoculated and noninoculated seeds were sown in seedling trays, transplanted, and evaluated at panicle emergence. There were no symptoms on plants from noninoculated seeds. Symptoms on inoculated plants were consistent with field observations and teliospores were reisolated from infected panicles and cultured on 2% WA. Teliospores harvested from a single panicle infected with Tm001NY09 were used for culturing and DNA extraction. The fully annotated sequence of the rDNA internal transcribed spacer and 5.8S regions of this isolate were deposited in GenBank (Accession No. JF745116). Smut outbreaks in New York and Pennsylvania suggest that T. maclaganii must be managed effectively if switchgrass production is to be sustainable in the Northeast. References: (1) L. M. Carris et al. Plant Dis. 92:1707, 2008. (2) R. Durán and G. W. Fischer. The Genus Tilletia. Washington State University, Pullman, WA, 1961. (3) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , May 3, 2011 (4) P. M. Thomsen et al. Online publication. doi:10.1094/PHP-2008-0317-01-RS. Plant Health Progress, 2008.",2011-12-01 +30731979,First Report of Gray Mold of Blackberry Caused by Botrytis cinerea in South Carolina.,"Botrytis cinerea Pers.: Fr. is a causal agent of gray mold of blackberry but may also affect grapevine, tomato, bulb flowers, and ornamental crops (2). In August 2010, blackberries (Rubus fruticosus and other species) showing gray mold symptoms were found in Longcreek, Six Mile, and Cheddar, SC. Symptomatic blackberry fruit exhibited patterns of brown-to-gray mycelia and conidiophores. Upon isolation, the mycelium grew at a rate of 12.3 mm per day at 22°C on potato dextrose agar, forming pale white-to-gray colonies with concentric rings and conidiophores (less than 12 h of fluorescent light per day). Some isolates formed dark brown sclerotia in the dark after 18 days. The lemon-shaped spores averaged 12 × 9 μm and were consistent with descriptions of B. cinerea. (1) The ribosomal internal transcribed spacer (ITS) ITS1-5.8S-ITS2 region was amplified via PCR from genomic DNA obtained from mycelia using primers ITS1 and ITS4. A BLAST search in GenBank revealed highest similarity (99 to 100%) to sequences from various Botrytis spp. collected in China, Canada, and Spain (GenBank Accession Nos. FJ169666.1, GU934505.1, and EF207414.1). The ITS sequence amplified from the blackberry isolate was submitted to GenBank (Accession No. JN164269). The pathogen was further identified to the species level as B. cinerea using glyceraldehyde-3-phosphate dehydrogenase, heat-shock protein 60 (HSP60), and DNA-dependent RNA polymerase subunit II (RPB2) gene sequences (2) (GenBank Accession Nos. JN164270, JN164271, JN164272). Pathogenicity was confirmed by inoculating three surface-sterilized (soaked in 5% bleach for 15 min), mature blackberry fruit (R. fruticosus) with a conidial suspension (105 spores/ml) of the blackberry isolate. A 20-μl droplet was placed on the fruit; control fruit received sterile water without conidia. After 5 days of incubation at room temperature in an air-tight Magenta box, the inoculated fruit developed typical signs and symptoms of gray mold. The developing spores on inoculated fruit were confirmed to be B. cinerea. All control fruit remained healthy. To our knowledge, this is the first report of B. cinerea on blackberry in South Carolina. The disease must be managed with fungicides to obtain high quality fruit with market-requested shelf life. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , June 17, 2011. (2) M. Staats et al. Mol. Biol. Evol. 22:333, 2005.",2011-12-01 +22139910,The NCBI Taxonomy database.,"The NCBI Taxonomy database (http://www.ncbi.nlm.nih.gov/taxonomy) is the standard nomenclature and classification repository for the International Nucleotide Sequence Database Collaboration (INSDC), comprising the GenBank, ENA (EMBL) and DDBJ databases. It includes organism names and taxonomic lineages for each of the sequences represented in the INSDC's nucleotide and protein sequence databases. The taxonomy database is manually curated by a small group of scientists at the NCBI who use the current taxonomic literature to maintain a phylogenetic taxonomy for the source organisms represented in the sequence databases. The taxonomy database is a central organizing hub for many of the resources at the NCBI, and provides a means for clustering elements within other domains of NCBI web site, for internal linking between domains of the Entrez system and for linking out to taxon-specific external resources on the web. Our primary purpose is to index the domain of sequences as conveniently as possible for our user community.",2011-12-01 +22659403,Determining pair distance distribution function from SAXS data using parametric functionals.,"Small angle X-ray scattering (SAXS) experiments are widely applied in structural biology. The SAXS experiments yield one-dimensional profile that needs further analysis to reveal structural information. The pair distance distribution function (PDDF), P(r), can provide molecular structures more intuitively, and it can be used to guide ab initio model reconstructions, making it a critical step to derive P(r) from experimental SAXS profiles. To calculate the P(r) curves, a new method based on a specially designed parametric functional form is developed, and implemented in pregxs. This method is tested against both synthetic and experimental data, the estimated P(r) functions are in good agreement with correct or known P(r). The method can also predict the molecular size. In summary, the pregxs method is robust and accurate in P(r) determination from SAXS profiles. The pregxs source code and an online server are available at http://www.sastbx.als.lbl.gov.",2012-06-01 +22139934,Cube-DB: detection of functional divergence in human protein families.,"Cube-DB is a database of pre-evaluated results for detection of functional divergence in human/vertebrate protein families. The analysis is organized around the nomenclature associated with the human proteins, but based on all currently available vertebrate genomes. Using full genomes enables us, through a mutual-best-hit strategy, to construct comparable taxonomical samples for all paralogues under consideration. Functional specialization is scored on the residue level according to two models of behavior after divergence: heterotachy and homotachy. In the first case, the positions on the protein sequence are scored highly if they are conserved in the reference group of orthologs, and overlap poorly with the residue type choice in the paralogs groups (such positions will also be termed functional determinants). The second model additionally requires conservation within each group of paralogs (functional discriminants). The scoring functions are phylogeny independent, but sensitive to the residue type similarity. The results are presented as a table of per-residue scores, and mapped onto related structure (when available) via browser-embedded visualization tool. They can also be downloaded as a spreadsheet table, and sessions for two additional molecular visualization tools. The database interface is available at http://epsf.bmad.bii.a-star.edu.sg/cube/db/html/home.html.",2011-12-01 +22066534,Lose weight with traditional chinese medicine? Potential suppression of fat mass and obesity-associated protein.,"Overweight and obesity are common health problems in modern society, particularly in developed countries. Excessive body mass has been linked to numerous diseases, such as cardiovascular diseases, diabetes, and cancer. Fat mass and obesity-associated protein (FTO) activity have direct impact on food intake and results in obesity. Inhibition of FTO activity may cause weight loss and reduce obese-linked health risks. We investigated the potential weight loss effects of traditional Chinese medicine (TCM), particularly by inhibiting FTO functions. Molecular docking was performed to screen TCM compounds from TCM Database@Taiwan (http://tcm.cmu.edu.tw). Three candidates were identified that contained either a tetrahydropyridine group or potent electronegative phenol group in the structure scaffold. Molecular dynamics simulation analysis of the docking poses of each complex indicated stabilizing trends in the protein-ligand complex movements. In addition, the number of hydrogen bonds increased throughout the 20 ns simulation. These results suggest that these TCM candidates could be potential FTO inhibitors through competitive inhibition.",2011-12-01 +21664781,Realignment strategies for awake-monkey fMRI data.,"Functional magnetic resonance imaging (fMRI) experiments with awake nonhuman primates (NHPs) have recently seen a surge of applications. However, the standard fMRI analysis tools designed for human experiments are not optimal for NHP data collected at high fields. One major difference is the experimental setup. Although real head movement is impossible for NHPs, MRI image series often contain visible motion artifacts. Animal body movement results in image position changes and geometric distortions. Since conventional realignment methods are not appropriate to address such differences, algorithms tailored specifically for animal scanning become essential. We have implemented a series of high-field NHP specific methods in a software toolbox, fMRI Sandbox (http://kyb.tuebingen.mpg.de/~stoewer/), which allows us to use different realignment strategies. Here we demonstrate the effect of different realignment strategies on the analysis of awake-monkey fMRI data acquired at high field (7 T). We show that the advantage of using a nonstandard realignment algorithm depends on the amount of distortion in the dataset. While the benefits for less distorted datasets are minor, the improvement of statistical maps for heavily distorted datasets is significant.",2011-06-12 +23221645,miRDeep*: an integrated application tool for miRNA identification from RNA sequencing data.,"miRDeep and its varieties are widely used to quantify known and novel micro RNA (miRNA) from small RNA sequencing (RNAseq). This article describes miRDeep*, our integrated miRNA identification tool, which is modeled off miRDeep, but the precision of detecting novel miRNAs is improved by introducing new strategies to identify precursor miRNAs. miRDeep* has a user-friendly graphic interface and accepts raw data in FastQ and Sequence Alignment Map (SAM) or the binary equivalent (BAM) format. Known and novel miRNA expression levels, as measured by the number of reads, are displayed in an interface, which shows each RNAseq read relative to the pre-miRNA hairpin. The secondary pre-miRNA structure and read locations for each predicted miRNA are shown and kept in a separate figure file. Moreover, the target genes of known and novel miRNAs are predicted using the TargetScan algorithm, and the targets are ranked according to the confidence score. miRDeep* is an integrated standalone application where sequence alignment, pre-miRNA secondary structure calculation and graphical display are purely Java coded. This application tool can be executed using a normal personal computer with 1.5 GB of memory. Further, we show that miRDeep* outperformed existing miRNA prediction tools using our LNCaP and other small RNAseq datasets. miRDeep* is freely available online at http://www.australianprostatecentre.org/research/software/mirdeep-star.",2012-12-04 +22369494,UASIS: Universal Automatic SNP Identification System.,"

Background

SNP (Single Nucleotide Polymorphism), the most common genetic variations between human beings, is believed to be a promising way towards personalized medicine. As more and more research on SNPs are being conducted, non-standard nomenclatures may generate potential problems. The most serious issue is that researchers cannot perform cross referencing among different SNP databases. This will result in more resources and time required to track SNPs. It could be detrimental to the entire academic community.

Results

UASIS (Universal Automated SNP Identification System) is a web-based server for SNP nomenclature standardization and translation at DNA level. Three utilities are available. They are UASIS Aligner, Universal SNP Name Generator and SNP Name Mapper. UASIS maps SNPs from different databases, including dbSNP, GWAS, HapMap and JSNP etc., into an uniform view efficiently using a proposed universal nomenclature and state-of-art alignment algorithms. UASIS is freely available at http://www.uasis.tk with no requirement of log-in.

Conclusions

UASIS is a helpful platform for SNP cross referencing and tracking. By providing an informative, unique and unambiguous nomenclature, which utilizes unique position of a SNP, we aim to resolve the ambiguity of SNP nomenclatures currently practised. Our universal nomenclature is a good complement to mainstream SNP notations such as rs# and HGVS guidelines. UASIS acts as a bridge to connect heterogeneous representations of SNPs.",2011-11-30 +22369658,DetoxiProt: an integrated database for detoxification proteins.,"

Background

Detoxification proteins are a class of proteins for degradation and/or elimination of endogenous and exogenous toxins or medicines, as well as reactive oxygen species (ROS) produced by these materials. Most of these proteins are generated as a response to the stimulation of toxins or medicines. They are essential for the clearance of harmful substances and for maintenance of physiological balance in organisms. Thus, it is important to collect and integrate information on detoxification proteins.

Results

To store, retrieve and analyze the information related to their features and functions, we developed the DetoxiProt, a comprehensive database for annotation of these proteins. This database provides detailed introductions about different classes of the detoxification proteins. Extensive annotations of these proteins, including sequences, structures, features, inducers, inhibitors, substrates, chromosomal location, functional domains as well as physiological-biochemical properties were generated. Furthermore, pre-computed BLAST results, multiple sequence alignments and evolutionary trees for detoxification proteins are also provided for evolutionary study of conserved function and pathways. The current version of DetoxiProt contains 5956 protein entries distributed in 628 organisms. An easy to use web interface was designed, so that annotations about each detoxification protein can be retrieved by browsing with a specific method or by searching with different criteria.

Conclusions

DetoxiProt provides an effective and efficient way of accessing the detoxification protein sequences and other high-quality information. This database would be a valuable source for toxicologists, pharmacologists and medicinal chemists. DetoxiProt database is freely available at http://lifecenter.sgst.cn/detoxiprot/.",2011-11-30 +23721921,A novel tool for reliable and accurate prediction of renal complications in patients undergoing percutaneous coronary intervention.,"

Objectives

The aim of the study was to develop and validate a tool for predicting risk of contrast-induced nephropathy (CIN) in patients undergoing contemporary percutaneous coronary intervention (PCI).

Background

CIN is a common complication of PCI and is associated with adverse short- and long-term outcomes. Previously described risk scores for predicting CIN either have modest discrimination or include procedural variables and thus cannot be applied for pre-procedural risk stratification.

Methods

Random forest models were developed using 46 pre-procedural clinical and laboratory variables to estimate the risk of CIN in patients undergoing PCI. The 15 most influential variables were selected for inclusion in a reduced model. Model performance estimating risk of CIN and new requirement for dialysis (NRD) was evaluated in an independent validation data set using area under the receiver-operating characteristic curve (AUC), with net reclassification improvement used to compare full and reduced model CIN prediction after grouping in low-, intermediate-, and high-risk categories.

Results

Our study cohort comprised 68,573 PCI procedures performed at 46 hospitals between January 2010 and June 2012 in Michigan, of which 48,001 (70%) were randomly selected for training the models and 20,572 (30%) for validation. The models demonstrated excellent calibration and discrimination for both endpoints (CIN AUC for full model 0.85 and for reduced model 0.84, p for difference <0.01; NRD AUC for both models 0.88, p for difference = 0.82; net reclassification improvement for CIN 2.92%, p = 0.06).

Conclusions

The risk of CIN and NRD among patients undergoing PCI can be reliably calculated using a novel easy-to-use computational tool (https://bmc2.org/calculators/cin). This risk prediction algorithm may prove useful for both bedside clinical decision making and risk adjustment for assessment of quality.",2013-06-01 +22127860,IndelFR: a database of indels in protein structures and their flanking regions.,"Insertion/deletion (indel) is one of the most common methods of protein sequence variation. Recent studies showed that indels could affect their flanking regions and they are important for protein function and evolution. Here, we describe the Indel Flanking Region Database (IndelFR, http://indel.bioinfo.sdu.edu.cn), which provides sequence and structure information about indels and their flanking regions in known protein domains. The indels were obtained through the pairwise alignment of homologous structures in SCOP superfamilies. The IndelFR database contains 2,925,017 indels with flanking regions extracted from 373,402 structural alignment pairs of 12,573 non-redundant domains from 1053 superfamilies. IndelFR provides access to information about indels and their flanking regions, including amino acid sequences, lengths, locations, secondary structure constitutions, hydrophilicity/hydrophobicity, domain information, 3D structures and so on. IndelFR has already been used for molecular evolution studies and may help to promote future functional studies of indels and their flanking regions.",2011-11-29 +22127870,The Pfam protein families database.,"Pfam is a widely used database of protein families, currently containing more than 13,000 manually curated protein families as of release 26.0. Pfam is available via servers in the UK (http://pfam.sanger.ac.uk/), the USA (http://pfam.janelia.org/) and Sweden (http://pfam.sbc.su.se/). Here, we report on changes that have occurred since our 2010 NAR paper (release 24.0). Over the last 2 years, we have generated 1840 new families and increased coverage of the UniProt Knowledgebase (UniProtKB) to nearly 80%. Notably, we have taken the step of opening up the annotation of our families to the Wikipedia community, by linking Pfam families to relevant Wikipedia pages and encouraging the Pfam and Wikipedia communities to improve and expand those pages. We continue to improve the Pfam website and add new visualizations, such as the 'sunburst' representation of taxonomic distribution of families. In this work we additionally address two topics that will be of particular interest to the Pfam community. First, we explain the definition and use of family-specific, manually curated gathering thresholds. Second, we discuss some of the features of domains of unknown function (also known as DUFs), which constitute a rapidly growing class of families within Pfam.",2011-11-29 +22326420,Automated prediction of three-way junction topological families in RNA secondary structures.,"We present an algorithm for automatically predicting the topological family of any RNA three-way junction, given only the information from the secondary structure: the sequence and the Watson-Crick pairings. The parameters of the algorithm have been determined on a data set of 33 three-way junctions whose 3D conformation is known. We applied the algorithm on 53 other junctions and compared the predictions to the real shape of those junctions. We show that the correct answer is selected out of nine possible configurations 64% of the time. Additionally, these results are noticeably improved if homology information is used. The resulting software, Cartaj, is available online and downloadable (with source) at: http://cartaj.lri.fr.",2012-01-11 +23203876,OrtholugeDB: a bacterial and archaeal orthology resource for improved comparative genomic analysis.,"Prediction of orthologs (homologous genes that diverged because of speciation) is an integral component of many comparative genomics methods. Although orthologs are more likely to have similar function versus paralogs (genes that diverged because of duplication), recent studies have shown that their degree of functional conservation is variable. Also, there are inherent problems with several large-scale ortholog prediction approaches. To address these issues, we previously developed Ortholuge, which uses phylogenetic distance ratios to provide more precise ortholog assessments for a set of predicted orthologs. However, the original version of Ortholuge required manual intervention and was not easily accessible; therefore, we now report the development of OrtholugeDB, available online at http://www.pathogenomics.sfu.ca/ortholugedb. OrtholugeDB provides ortholog predictions for completely sequenced bacterial and archaeal genomes from NCBI based on reciprocal best Basic Local Alignment Search Tool hits, supplemented with further evaluation by the more precise Ortholuge method. The OrtholugeDB web interface facilitates user-friendly and flexible ortholog analysis, from single genes to genomes, plus flexible data download options. We compare Ortholuge with similar methods, showing how it may more consistently identify orthologs with conserved features across a wide range of taxonomic distances. OrtholugeDB facilitates rapid, and more accurate, bacterial and archaeal comparative genomic analysis and large-scale ortholog predictions.",2012-11-29 +22123743,PASS2 version 4: an update to the database of structure-based sequence alignments of structural domain superfamilies.,"Accurate structure-based sequence alignments of distantly related proteins are crucial in gaining insight about protein domains that belong to a superfamily. The PASS2 database provides alignments of proteins related at the superfamily level and are characterized by low sequence identity. We thus report an automated, updated version of the superfamily alignment database known as PASS2.4, consisting of 1961 superfamilies and 10,569 protein domains, which is in direct correspondence with SCOP (1.75) database. Database organization, improved methods for efficient structure-based sequence alignments and the analysis of extreme distantly related proteins within superfamilies formed the focus of this update. Alignment of family-specific functional residues can be realized using such alignments and is shown using one superfamily as an example. The database of alignments and other related features can be accessed at http://caps.ncbs.res.in/pass2/.",2011-11-28 +23221084,A tabu search approach for the NMR protein structure-based assignment problem.,"Spectroscopy is an experimental technique which exploits the magnetic properties of specific nuclei and enables the study of proteins in solution. The key bottleneck of NMR studies is to map the NMR peaks to corresponding nuclei, also known as the assignment problem. Structure-Based Assignment (SBA) is an approach to solve this computationally challenging problem by using prior information about the protein obtained from a homologous structure. NVR-BIP used the Nuclear Vector Replacement (NVR) framework to model SBA as a binary integer programming problem. In this paper, we prove that this problem is NP-hard and propose a tabu search (TS) algorithm (NVR-TS) equipped with a guided perturbation mechanism to efficiently solve it. NVR-TS uses a quadratic penalty relaxation of NVR-BIP where the violations in the Nuclear Overhauser Effect constraints are penalized in the objective function. Experimental results indicate that our algorithm finds the optimal solution on NVRBIP’s data set which consists of seven proteins with 25 templates (31 to 126 residues). Furthermore, it achieves relatively high assignment accuracies on two additional large proteins, MBP and EIN (348 and 243 residues, respectively), which NVR-BIP failed to solve. The executable and the input files are available for download at http://people.sabanciuniv.edu/catay/NVR-TS/NVR-TS.html.",2012-11-01 +21255416,Modeling RNA polymerase competition: the effect of σ-subunit knockout and heat shock on gene transcription level.,"

Background

Modeling of a complex biological process can explain the results of experimental studies and help predict its characteristics. Among such processes is transcription in the presence of competing RNA polymerases. This process involves RNA polymerases collision followed by transcription termination.

Results

A mathematical and computer simulation model is developed to describe the competition of RNA polymerases during genes transcription on complementary DNA strands. E.g., in the barley Hordeum vulgare the polymerase competition occurs in the locus containing plastome genes psbA, rpl23, rpl2 and four bacterial type promoters. In heat shock experiments on isolated chloroplasts, a twofold decrease of psbA transcripts and even larger increase of rpl23-rpl2 transcripts were observed, which is well reproduced in the model. The model predictions are in good agreement with virtually all relevant experimental data (knockout, heat shock, chromatogram data, etc.). The model allows to hypothesize a mechanism of cell response to knockout and heat shock, as well as a mechanism of gene expression regulation in presence of RNA polymerase competition. The model is implemented for multiprocessor platforms with MPI and supported on Linux and MS Windows. The source code written in C++ is available under the GNU General Public License from the laboratory website. A user-friendly GUI version is also provided at http://lab6.iitp.ru/en/rivals.

Conclusions

The developed model is in good agreement with virtually all relevant experimental data. The model can be applied to estimate intensities of binding of the holoenzyme and phage type RNA polymerase to their promoters using data on gene transcription levels, as well as to predict characteristics of RNA polymerases and the transcription process that are difficult to measure directly, e.g., the intensity (frequency) of holoenzyme binding to the promoter in correlation to its nucleotide composition and the type of σ-subunit, the amount of transcription initiation aborts, etc. The model can be used to make functional predictions, e.g., heat shock response in isolated chloroplasts and changes of gene transcription levels under knockout of different σ-subunits or RNA polymerases or due to gene expression regulation.",2011-01-21 +22674449,[Renal replacement therapy for refractory heart failure].,"After broad cardiological and nephrological evaluation and consideration of optimal conservative options according to national and international guidelines, renal replacement therapy might be helpful in patients with refractory heart failure even if they are not dialysis-dependent. This is even more important as renal failure is a strong predictor for mortality in patients with severe congestive heart failure (CHF) and CHF is one of the fastest growing morbidities in western countries. Although peritoneal dialysis (PD) is frequently used in patients with CHF its role remains unclear. Acute chronic volume overload in refractory CHF is still an unresolved clinical problem. In patients with acute heart and renal failure with need of management in an intensive care unit, extracorporeal ultrafiltration or a dialysis modality should be preferred. In patients with chronic refractory CHF, volume overload and renal failure, peritoneal dialysis should be the therapy of choice. Due to the limited data available, treatment and outcome parameters should be recorded in the registry of the German Society of Nephrology (http://www.herz-niere.de).",2012-07-01 +22121227,Quantitative model of R-loop forming structures reveals a novel level of RNA-DNA interactome complexity.,"R-loop is the structure co-transcriptionally formed between nascent RNA transcript and DNA template, leaving the non-transcribed DNA strand unpaired. This structure can be involved in the hyper-mutation and dsDNA breaks in mammalian immunoglobulin (Ig) genes, oncogenes and neurodegenerative disease related genes. R-loops have not been studied at the genome scale yet. To identify the R-loops, we developed a computational algorithm and mapped R-loop forming sequences (RLFS) onto 66,803 sequences defined by UCSC as 'known' genes. We found that ∼59% of these transcribed sequences contain at least one RLFS. We created R-loopDB (http://rloop.bii.a-star.edu.sg/), the database that collects all RLFS identified within over half of the human genes and links to the UCSC Genome Browser for information integration and visualisation across a variety of bioinformatics sources. We found that many oncogenes and tumour suppressors (e.g. Tp53, BRCA1, BRCA2, Kras and Ptprd) and neurodegenerative diseases related genes (e.g. ATM, Park2, Ptprd and GLDC) could be prone to significant R-loop formation. Our findings suggest that R-loops provide a novel level of RNA-DNA interactome complexity, playing key roles in gene expression controls, mutagenesis, recombination process, chromosomal rearrangement, alternative splicing, DNA-editing and epigenetic modifications. RLFSs could be used as a novel source of prospective therapeutic targets.",2011-11-25 +22002877,The pKa Cooperative: a collaborative effort to advance structure-based calculations of pKa values and electrostatic effects in proteins.,"The pK(a) Cooperative (http://www.pkacoop.org) was organized to advance development of accurate and useful computational methods for structure-based calculation of pK(a) values and electrostatic energies in proteins. The Cooperative brings together laboratories with expertise and interest in theoretical, computational, and experimental studies of protein electrostatics. To improve structure-based energy calculations, it is necessary to better understand the physical character and molecular determinants of electrostatic effects. Thus, the Cooperative intends to foment experimental research into fundamental aspects of proteins that depend on electrostatic interactions. It will maintain a depository for experimental data useful for critical assessment of methods for structure-based electrostatics calculations. To help guide the development of computational methods, the Cooperative will organize blind prediction exercises. As a first step, computational laboratories were invited to reproduce an unpublished set of experimental pK(a) values of acidic and basic residues introduced in the interior of staphylococcal nuclease by site-directed mutagenesis. The pK(a) values of these groups are unique and challenging to simulate owing to the large magnitude of their shifts relative to normal pK(a) values in water. Many computational methods were tested in this first Blind Prediction Challenge and critical assessment exercise. A workshop was organized in the Telluride Science Research Center to objectively assess the performance of many computational methods tested on this one extensive data set. This volume of Proteins: Structure, Function, and Bioinformatics introduces the pK(a) Cooperative, presents reports submitted by participants in the Blind Prediction Challenge, and highlights some of the problems in structure-based calculations identified during this exercise.",2011-10-15 +22140171,Introducing EzTaxon-e: a prokaryotic 16S rRNA gene sequence database with phylotypes that represent uncultured species.,"Despite recent advances in commercially optimized identification systems, bacterial identification remains a challenging task in many routine microbiological laboratories, especially in situations where taxonomically novel isolates are involved. The 16S rRNA gene has been used extensively for this task when coupled with a well-curated database, such as EzTaxon, containing sequences of type strains of prokaryotic species with validly published names. Although the EzTaxon database has been widely used for routine identification of prokaryotic isolates, sequences from uncultured prokaryotes have not been considered. Here, the next generation database, named EzTaxon-e, is formally introduced. This new database covers not only species within the formal nomenclatural system but also phylotypes that may represent species in nature. In addition to an identification function based on Basic Local Alignment Search Tool (blast) searches and pairwise global sequence alignments, a new objective method of assessing the degree of completeness in sequencing is proposed. All sequences that are held in the EzTaxon-e database have been subjected to phylogenetic analysis and this has resulted in a complete hierarchical classification system. It is concluded that the EzTaxon-e database provides a useful taxonomic backbone for the identification of cultured and uncultured prokaryotes and offers a valuable means of communication among microbiologists who routinely encounter taxonomically novel isolates. The database and its analytical functions can be found at http://eztaxon-e.ezbiocloud.net/.",2011-11-25 +23323762,"Mojo Hand, a TALEN design tool for genome editing applications.","

Background

Recent studies of transcription activator-like (TAL) effector domains fused to nucleases (TALENs) demonstrate enormous potential for genome editing. Effective design of TALENs requires a combination of selecting appropriate genetic features, finding pairs of binding sites based on a consensus sequence, and, in some cases, identifying endogenous restriction sites for downstream molecular genetic applications.

Results

We present the web-based program Mojo Hand for designing TAL and TALEN constructs for genome editing applications (http://www.talendesign.org). We describe the algorithm and its implementation. The features of Mojo Hand include (1) automatic download of genomic data from the National Center for Biotechnology Information, (2) analysis of any DNA sequence to reveal pairs of binding sites based on a user-defined template, (3) selection of restriction-enzyme recognition sites in the spacer between the TAL monomer binding sites including options for the selection of restriction enzyme suppliers, and (4) output files designed for subsequent TALEN construction using the Golden Gate assembly method.

Conclusions

Mojo Hand enables the rapid identification of TAL binding sites for use in TALEN design. The assembly of TALEN constructs, is also simplified by using the TAL-site prediction program in conjunction with a spreadsheet management aid of reagent concentrations and TALEN formulation. Mojo Hand enables scientists to more rapidly deploy TALENs for genome editing applications.",2013-01-16 +21803804,Bayesian prediction of tissue-regulated splicing using RNA sequence and cellular context.,"

Motivation

Alternative splicing is a major contributor to cellular diversity in mammalian tissues and relates to many human diseases. An important goal in understanding this phenomenon is to infer a 'splicing code' that predicts how splicing is regulated in different cell types by features derived from RNA, DNA and epigenetic modifiers.

Methods

We formulate the assembly of a splicing code as a problem of statistical inference and introduce a Bayesian method that uses an adaptively selected number of hidden variables to combine subgroups of features into a network, allows different tissues to share feature subgroups and uses a Gibbs sampler to hedge predictions and ascertain the statistical significance of identified features.

Results

Using data for 3665 cassette exons, 1014 RNA features and 4 tissue types derived from 27 mouse tissues (http://genes.toronto.edu/wasp), we benchmarked several methods. Our method outperforms all others, and achieves relative improvements of 52% in splicing code quality and up to 22% in classification error, compared with the state of the art. Novel combinations of regulatory features and novel combinations of tissues that share feature subgroups were identified using our method.

Contact

frey@psi.toronto.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-29 +22962484,Event extraction across multiple levels of biological organization.,"

Motivation

Event extraction using expressive structured representations has been a significant focus of recent efforts in biomedical information extraction. However, event extraction resources and methods have so far focused almost exclusively on molecular-level entities and processes, limiting their applicability.

Results

We extend the event extraction approach to biomedical information extraction to encompass all levels of biological organization from the molecular to the whole organism. We present the ontological foundations, target types and guidelines for entity and event annotation and introduce the new multi-level event extraction (MLEE) corpus, manually annotated using a structured representation for event extraction. We further adapt and evaluate named entity and event extraction methods for the new task, demonstrating that both can be achieved with performance broadly comparable with that for established molecular entity and event extraction tasks.

Availability

The resources and methods introduced in this study are available from http://nactem.ac.uk/MLEE/.

Contact

pyysalos@cs.man.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-01 +23812985,Minimum curvilinearity to enhance topological prediction of protein interactions by network embedding.,"

Motivation

Most functions within the cell emerge thanks to protein-protein interactions (PPIs), yet experimental determination of PPIs is both expensive and time-consuming. PPI networks present significant levels of noise and incompleteness. Predicting interactions using only PPI-network topology (topological prediction) is difficult but essential when prior biological knowledge is absent or unreliable.

Methods

Network embedding emphasizes the relations between network proteins embedded in a low-dimensional space, in which protein pairs that are closer to each other represent good candidate interactions. To achieve network denoising, which boosts prediction performance, we first applied minimum curvilinear embedding (MCE), and then adopted shortest path (SP) in the reduced space to assign likelihood scores to candidate interactions. Furthermore, we introduce (i) a new valid variation of MCE, named non-centred MCE (ncMCE); (ii) two automatic strategies for selecting the appropriate embedding dimension; and (iii) two new randomized procedures for evaluating predictions.

Results

We compared our method against several unsupervised and supervisedly tuned embedding approaches and node neighbourhood techniques. Despite its computational simplicity, ncMCE-SP was the overall leader, outperforming the current methods in topological link prediction.

Conclusion

Minimum curvilinearity is a valuable non-linear framework that we successfully applied to the embedding of protein networks for the unsupervised prediction of novel PPIs. The rationale for our approach is that biological and evolutionary information is imprinted in the non-linear patterns hidden behind the protein network topology, and can be exploited for predicting new protein links. The predicted PPIs represent good candidates for testing in high-throughput experiments or for exploitation in systems biology tools such as those used for network-based inference and prediction of disease-related functional modules.

Availability

https://sites.google.com/site/carlovittoriocannistraci/home.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +23587371,Reliability of rapid diagnostic test for diagnosing peripheral and placental malaria in an area of unstable malaria transmission in Eastern Sudan.,"

Background

Diagnosing Plasmodium falciparum malaria during pregnancy is a great challenge for clinicians because of the low density of parasites in the peripheral blood and parasite sequestration in the placenta. Nevertheless, few data on the use of malaria rapid diagnostic test (RDT) during pregnancy have been published.

Methods

P. falciparum infections were assessed in 156 febrile pregnant women by microscopic examination of their blood smears and by RDT and polymerase chain reactions (PCR). In addition, 150 women were assessed at the time of delivery by microscopy, RDT, PCR and placental histology investigations. The study was conducted at the Gadarif Hospital, Eastern Sudan. The SD Bioline P. f / P. v (Bio Standard Diagnostics, Gurgaon, Korea) RDT kit was evaluated in this study.

Results

Among the febrile pregnant women, 17 (11.0%), 26 (16.7%) and 18 (11.5%) positive cases of P. falciparum were detected by microscopy, RDT, and PCR, respectively. The sensitivity and specificity of the microscopy was 94.4% and 100%, respectively. The corresponding values for RDT evaluation were 83.3% and 92.0%, as compared with PCR as the gold standard.While there were no detected cases of malaria by microscopic examination of blood smears, 27 (18.0%), 21(14.0%) and 46 (30.7%) out of the 150 placentae investigated had P. falciparum as determined by RDT, PCR, and histology, respectively. The sensitivity and specificity for RDT was 17.4% and 81.7%, respectively. The corresponding values for PCR were 6.5% and 82.7%, where histology was used as the gold standard.

Conclusions

The RDT kit used in this study has poor performance for peripheral and placental P. falciparum malaria detection in this setting.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1092363465928479.",2013-04-15 +22045651,Novel comprehensive diagnostic strategy in Pitt-Hopkins syndrome: clinical score and further delineation of the TCF4 mutational spectrum.,"Pitt-Hopkins syndrome (PTHS), characterized by severe intellectual disability and typical facial gestalt, is part of the clinical spectrum of Rett-like syndromes. TCF4, encoding a basic helix-loop-helix (bHLH) transcription factor, was identified as the disease-causing gene with de novo molecular defects. While PTHS appears to be a recognizable clinical entity, it seems to remain underdiagnosed, especially when facial gestalt is less typical. With the aim to facilitate the diagnosis of PTHS and to increase its rate and specificity, we have investigated 33 novel patients and defined a Clinical Diagnosis Score. Analysis of 112 individuals (79 previously reported and 33 novel patients) allowed us to delineate the TCF4 mutational spectrum, with 40% point mutations, 30% small deletions/insertions, and 30% deletions. Most of these were private mutations and generated premature stop codons. Missense mutations were localized in the bHLH domain, which is a mutational hotspot. No obvious difference was observed between patients harboring truncating, missense mutations, or deletions, further supporting TCF4 haploinsufficiency as the molecular mechanism underlying PTHS. In this study, we have summarized the current knowledge of TCF4 molecular pathology, reported all the mutations in the TCF4 database (http://www.LOVD.nl/TCF4), and present a novel and comprehensive diagnostic strategy for PTHS.",2011-11-23 +21995939,Genes2WordCloud: a quick way to identify biological themes from gene lists and free text.,"

Background

Word-clouds recently emerged on the web as a solution for quickly summarizing text by maximizing the display of most relevant terms about a specific topic in the minimum amount of space. As biologists are faced with the daunting amount of new research data commonly presented in textual formats, word-clouds can be used to summarize and represent biological and/or biomedical content for various applications.

Results

Genes2WordCloud is a web application that enables users to quickly identify biological themes from gene lists and research relevant text by constructing and displaying word-clouds. It provides users with several different options and ideas for the sources that can be used to generate a word-cloud. Different options for rendering and coloring the word-clouds give users the flexibility to quickly generate customized word-clouds of their choice.

Methods

Genes2WordCloud is a word-cloud generator and a word-cloud viewer that is based on WordCram implemented using Java, Processing, AJAX, mySQL, and PHP. Text is fetched from several sources and then processed to extract the most relevant terms with their computed weights based on word frequencies. Genes2WordCloud is freely available for use online; it is open source software and is available for installation on any web-site along with supporting documentation at http://www.maayanlab.net/G2W.

Conclusions

Genes2WordCloud provides a useful way to summarize and visualize large amounts of textual biological data or to find biological themes from several different sources. The open source availability of the software enables users to implement customized word-clouds on their own web-sites and desktop applications.",2011-10-13 +22116063,Xanthusbase after five years expands to become Openmods.,"Xanthusbase (http://www.xanthusbase.org), a model organism database for the bacterium Myxococcus xanthus, functions as a collaborative information repository based on Wikipedia principles. It was created more than 5 years ago to serve as a cost-effective reference database for M. xanthus researchers, an education tool for undergraduate students to learn about genome annotation, and a means for the community of researchers to collaboratively improve their organism's annotation. We have achieved several goals and are seeking creative solutions to ongoing challenges. Along the way we have made several important improvements to Xanthusbase related to stability, security and usability. Most importantly, we have designed and implemented an installer that enables other microbial model organism communities to use it as a MOD. This version, called Openmods, has already been used to create Xenorhabdusbase (http://xenorhabdusbase.bact.wisc.edu), Caulobacterbase (http://caulobacterbase.bsd.uchicago.edu) and soon Bdellovibriobase.",2011-11-23 +22014078,"Inputs to quality: supervision, management, and community involvement in health facilities in Egypt in 2004.","

Background

As low- and middle-income countries experience economic development, ensuring quality of health care delivery is a central component of health reform. Nevertheless, health reforms in low- and middle-income countries have focused more on access to services rather than the quality of these services, and reporting on quality has been limited. In the present study, we sought to examine the prevalence and regional variation in key management practices in Egyptian health facilities within three domains: supervision of the facility from the Ministry of Health and Population (MOHP), managerial processes, and patient and community involvement in care.

Methods

We conducted a cross-sectional analysis of data from 559 facilities surveyed with the Egyptian Service Provision Assessment (ESPA) survey in 2004, the most recent such survey in Egypt. We registered on the Measure Demographic and Health Survey (DHS) website http://legacy.measuredhs.com/login.cfm to gain access to the survey data. From the ESPA sampled 559 MOHP facilities, we excluded a total of 79 facilities because they did not offer facility-based 24-hour care or have at least one physician working in the facility, resulting in a final sample of 480 facilities. The final sample included 76 general service hospitals, 307 rural health units, and 97 maternal and child health and urban health units (MCH/urban units). We used standard frequency analyses to describe facility characteristics and tested the statistical significance of regional differences using chi-square statistics.

Results

Nearly all facilities reported having external supervision within the 6 months preceding the interview. In contrast, key facility-level managerial processes, such as having routine and documented management meetings and applying quality assurance approaches, were uncommon. Involvement of communities and patients was also reported in a minority of facilities. Hospitals and health units located in Urban Egypt compared with more rural parts of Egypt were significantly more likely to have management committees that met at least monthly, to keep official records of the meetings, and to have an approach for reviewing quality assurance activities.

Conclusions

Although the data precede the recent reform efforts of the MOHP, they provide a baseline against which future progress can be measured. Targeted efforts to improve facility-level management are critical to supporting quality improvement initiatives directed at improving the quality of health care throughout the country.",2011-10-20 +23496949,"GenePainter: a fast tool for aligning gene structures of eukaryotic protein families, visualizing the alignments and mapping gene structures onto protein structures.","

Background

All sequenced eukaryotic genomes have been shown to possess at least a few introns. This includes those unicellular organisms, which were previously suspected to be intron-less. Therefore, gene splicing must have been present at least in the last common ancestor of the eukaryotes. To explain the evolution of introns, basically two mutually exclusive concepts have been developed. The introns-early hypothesis says that already the very first protein-coding genes contained introns while the introns-late concept asserts that eukaryotic genes gained introns only after the emergence of the eukaryotic lineage. A very important aspect in this respect is the conservation of intron positions within homologous genes of different taxa.

Results

GenePainter is a standalone application for mapping gene structure information onto protein multiple sequence alignments. Based on the multiple sequence alignments the gene structures are aligned down to single nucleotides. GenePainter accounts for variable lengths in exons and introns, respects split codons at intron junctions and is able to handle sequencing and assembly errors, which are possible reasons for frame-shifts in exons and gaps in genome assemblies. Thus, even gene structures of considerably divergent proteins can properly be compared, as it is needed in phylogenetic analyses. Conserved intron positions can also be mapped to user-provided protein structures. For their visualization GenePainter provides scripts for the molecular graphics system PyMol.

Conclusions

GenePainter is a tool to analyse gene structure conservation providing various visualization options. A stable version of GenePainter for all operating systems as well as documentation and example data are available at http://www.motorprotein.de/genepainter.html.",2013-03-04 +23915178,How do medical doctors use a web-based oncology protocol system? A comparison of Australian doctors at different levels of medical training using logfile analysis and an online survey.,"

Background

Electronic decision support is commonplace in medical practice. However, its adoption at the point-of-care is dependent on a range of organisational, patient and clinician-related factors. In particular, level of clinical experience is an important driver of electronic decision support uptake. Our objective was to examine the way in which Australian doctors at different stages of medical training use a web-based oncology system (http://www.eviq.org.au).

Methods

We used logfiles to examine the characteristics of eviQ registrants (2009-2012) and patterns of eviQ use in 2012, according to level of medical training. We also used a web-based survey to evaluate the way doctors at different levels of medical training use the online system and to elicit perceptions of the system's utility in oncology care.

Results

Our study cohort comprised 2,549 eviQ registrants who were hospital-based medical doctors across all levels of training. 65% of the cohort used eviQ in 2012, with 25% of interns/residents, 61% of advanced oncology trainees and 47% of speciality-qualified oncologists accessing eviQ in the last 3 months of 2012. The cohort accounted for 445,492 webhits in 2012. On average, advanced trainees used eviQ up to five-times more than other doctors (42.6 webhits/month compared to 22.8 for specialty-qualified doctors and 7.4 webhits/month for interns/residents). Of the 52 survey respondents, 89% accessed eviQ's chemotherapy protocols on a daily or weekly basis in the month prior to the survey. 79% of respondents used eviQ at least weekly to initiate therapy and to support monitoring (29%), altering (35%) or ceasing therapy (19%). Consistent with the logfile analysis, advanced oncology trainees report more frequent eviQ use than doctors at other stages of medical training.

Conclusions

The majority of the Australian oncology workforce are registered on eviQ. The frequency of use directly mirrors the clinical role of doctors and attitudes about the utility of eviQ in decision-making. Evaluations of this kind generate important data for system developers and medical educators to drive improvements in electronic decision support to better meet the needs of clinicians. This end-user focus will optimise the uptake of systems which will translate into improvements in processes of care and patient outcomes.",2013-08-04 +21415247,Colorectal cancer: CT colonography and colonoscopy for detection--systematic review and meta-analysis.,"

Purpose

To perform a systematic review and meta-analysis of published studies assessing the sensitivity of both computed tomographic (CT) colonography and optical colonoscopy (OC) for colorectal cancer detection.

Materials and methods

Analysis followed Preferred Reporting Items for Systematic Reviews and Meta-Analyses recommendations. The primary data source was the results of a detailed PubMed search from 1994 to 2009. Diagnostic studies evaluating CT colonography detection of colorectal cancer were assessed by using predefined inclusion and exclusion criteria, in particular requiring both OC and histologic confirmation of disease. Studies that also included a mechanism to assess true-positive versus false-negative diagnoses at OC (eg, segmental unblinding) were used to calculate OC sensitivity. Assessment and data extraction were performed independently by two authors. Potential bias was ascertained by using Quality Assessment of Diagnostic Accuracy Studies guidelines. Specific CT colonography techniques were cataloged. Forest plots of per-patient sensitivity were produced on the basis of random-effect models. Potential bias across primary studies was assessed by using the I(2) statistic. Original study authors were contacted for data clarification when necessary.

Results

Forty-nine studies provided data on 11,151 patients with a cumulative colorectal cancer prevalence of 3.6% (414 cancers). The sensitivity of CT colonography for colorectal cancer was 96.1% (398 of 414; 95% confidence interval [CI]: 93.8%, 97.7%). No heterogeneity (I(2) = 0%) was detected. No cancers were missed at CT colonography when both cathartic and tagging agents were combined in the bowel preparation. The sensitivity of OC for colorectal cancer, derived from a subset of 25 studies including 9223 patients, was 94.7% (178 of 188; 95% CI: 90.4%, 97.2%). A moderate degree of heterogeneity (I(2) = 50%) was present.

Conclusion

CT colonography is highly sensitive for colorectal cancer, especially when both cathartic and tagging agents are combined in the bowel preparation. Given the relatively low prevalence of colorectal cancer, primary CT colonography may be more suitable than OC for initial investigation of suspected colorectal cancer, assuming reasonable specificity.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11101887/-/DC1.",2011-03-17 +21235461,Drug utilization 75% (DU75%) in 17 European hospitals (2000-2005): results from the ESAC-2 Hospital Care Sub Project.,"The study aimed to assess 75% of drug utilization (DU75%) in participating hospitals and identify quality indicators which should be used to monitor performance within the hospitals. In the European Surveillance of Antimicrobial Consumption (ESAC; http://www.esac.ua.ac.be) project anatomic therapeutic chemical (ATC), defined daily dose (DDD) and route of administration (RoA) were used for drug categorization. Data were collected for: antibacterials for systemic use; intestinal antibiotics; rifampicin; and nitroimidazole derivatives. Each hospital's annual data were analyzed separately (hospital-year) adding up to a total of 97 hospital-year data-sets. The drug most persistently present within DU75% was ciprofloxacin (84/97 hospital-years). Co-amoxiclav was the drug which most frequently ranked first (28 times). The number of drugs constituting the DU75% by substance ranged from 7-15 (median 12) and 8-19 (median 15) by RoA which identified oral amoxicillin most frequently ranking first (17 times). In many hospitals the oral route accounted for most of the DU75%. Therefore, the extent of oral use was identified as a quality indicator which could be monitored using DU75% methodology. Since substantial variation both in extent and distribution of antibiotic use was observed, DU75% methodology is best adapted for intra-hospital consumption trend analyses or for hospitals with comparable characteristics and formularies. The number of drugs within DU75% was identified as another quality indicator. Thus, aspiring to decrease the consumption of overused drug classes should be set by the hospitals as a quality indicator on prescribing patterns.",2011-02-01 +22355227,IGIPT - Integrated genomic island prediction tool.,"

Unlabelled

IGIPT is a web-based integrated platform for the identification of genomic islands (GIs). It incorporates thirteen parametric measures based on anomalous nucleotide composition on a single platform, thus improving the predictive power of a horizontally acquired region, since it is known that no single measure can absolutely predict a horizontally transferred region. The tool filters putative GIs based on standard deviation from genomic average and also provides raw output in MS excel format for further analysis. To facilitate the identification of various structural features, viz., tRNA integration sites, repeats, etc. in the vicinity of GIs, the tool provides option to extract the predicted regions and its flanking regions.

Availability

The database is available for free at http://bioinf.iiit.ac.in/IGIPT/",2011-11-20 +21624642,Changes in water quality of the River Frome (UK) from 1965 to 2009: is phosphorus mitigation finally working?,"The water quality of the River Frome, Dorset, southern England, was monitored at weekly intervals from 1965 until 2009. Determinands included phosphorus, nitrogen, silicon, potassium, calcium, sodium, magnesium, pH, alkalinity and temperature. Nitrate-N concentrations increased from an annual average of 2.4 mg l⁻¹ in the mid to late 1960s to 6.0 mg l⁻¹ in 2008-2009, but the rate of increase was beginning to slow. Annual soluble reactive phosphorus (SRP) concentrations increased from 101 μg l⁻¹ in the mid 1960s to a maximum of 190 μg l⁻¹ in 1989. In 2002, there was a step reduction in SRP concentration (average=88 μg l⁻¹ in 2002-2005), with further improvement in 2007-2009 (average=49 μg l⁻¹), due to the introduction of phosphorus stripping at sewage treatment works. Phosphorus and nitrate concentrations showed clear annual cycles, related to the timing of inputs from the catchment, and within-stream bioaccumulation and release. Annual depressions in silicon concentration each spring (due to diatom proliferation) reached a maximum between 1980 and 1991, (the period of maximum SRP concentration) indicating that algal biomass had increased within the river. The timing of these silicon depressions was closely related to temperature. Excess carbon dioxide partial pressures (EpCO₂) of 60 times atmospheric CO₂ were also observed through the winter periods from 1980 to 1992, when phosphorus concentration was greatest, indicating very high respiration rates due to microbial decomposition of this enhanced biomass. Declining phosphorus concentrations since 2002 reduced productivity and algal biomass in the summer, and EpCO₂ through the winter, indicating that sewage treatment improvements had improved riverine ecology. Algal blooms were limited by phosphorus, rather than silicon concentration. The value of long-term water quality data sets is discussed. The data from this monitoring programme are made freely available to the wider science community through the CEH data portal (http://gateway.ceh.ac.uk/).",2011-05-31 +21510904,Detection of recurrent rearrangement breakpoints from copy number data.,"

Background

Copy number variants (CNVs), including deletions, amplifications, and other rearrangements, are common in human and cancer genomes. Copy number data from array comparative genome hybridization (aCGH) and next-generation DNA sequencing is widely used to measure copy number variants. Comparison of copy number data from multiple individuals reveals recurrent variants. Typically, the interior of a recurrent CNV is examined for genes or other loci associated with a phenotype. However, in some cases, such as gene truncations and fusion genes, the target of variant lies at the boundary of the variant.

Results

We introduce Neighborhood Breakpoint Conservation (NBC), an algorithm for identifying rearrangement breakpoints that are highly conserved at the same locus in multiple individuals. NBC detects recurrent breakpoints at varying levels of resolution, including breakpoints whose location is exactly conserved and breakpoints whose location varies within a gene. NBC also identifies pairs of recurrent breakpoints such as those that result from fusion genes. We apply NBC to aCGH data from 36 primary prostate tumors and identify 12 novel rearrangements, one of which is the well-known TMPRSS2-ERG fusion gene. We also apply NBC to 227 glioblastoma tumors and predict 93 novel rearrangements which we further classify as gene truncations, germline structural variants, and fusion genes. A number of these variants involve the protein phosphatase PTPN12 suggesting that deregulation of PTPN12, via a variety of rearrangements, is common in glioblastoma.

Conclusions

We demonstrate that NBC is useful for detection of recurrent breakpoints resulting from copy number variants or other structural variants, and in particular identifies recurrent breakpoints that result in gene truncations or fusion genes. Software is available at http://http.//cs.brown.edu/people/braphael/software.html.",2011-04-21 +21840874,Conserved and differential gene interactions in dynamical biological systems.,"

Motivation

While biological systems operated from a common genome can be conserved in various ways, they can also manifest highly diverse dynamics and functions. This is because the same set of genes can interact differentially across specific molecular contexts. For example, differential gene interactions give rise to various stages of morphogenesis during cerebellar development. However, after over a decade of efforts toward reverse engineering biological networks from high-throughput omic data, gene networks of most organisms remain sketchy. This hindrance has motivated us to develop comparative modeling to highlight conserved and differential gene interactions across experimental conditions, without reconstructing complete gene networks first.

Results

We established a comparative dynamical system modeling (CDSM) approach to identify conserved and differential interactions across molecular contexts. In CDSM, interactions are represented by ordinary differential equations and compared across conditions through statistical heterogeneity and homogeneity tests. CDSM demonstrated a consistent superiority over differential correlation and reconstruct-then-compare in simulation studies. We exploited CDSM to elucidate gene interactions important for cellular processes poorly understood during mouse cerebellar development. We generated hypotheses on 66 differential genetic interactions involved in expansion of the external granule layer. These interactions are implicated in cell cycle, differentiation, apoptosis and morphogenesis. Additional 1639 differential interactions among gene clusters were also identified when we compared gene interactions during the presence of Rhombic lip versus the presence of distinct internal granule layer. Moreover, compared with differential correlation and reconstruct-then-compare, CDSM makes fewer assumptions on data and thus is applicable to a wider range of biological assays.

Availability

Source code in C++ and R is available for non-commercial organizations upon request from the corresponding author. The cerebellum gene expression dataset used in this article is available upon request from the Goldowitz lab (dang@cmmt.ubc.ca, http://grits.dglab.org/).

Contact

joemsong@cs.nmsu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-11 +22730432,Inferring epigenetic and transcriptional regulation during blood cell development with a mixture of sparse linear models.,"

Motivation

Blood cell development is thought to be controlled by a circuit of transcription factors (TFs) and chromatin modifications that determine the cell fate through activating cell type-specific expression programs. To shed light on the interplay between histone marks and TFs during blood cell development, we model gene expression from regulatory signals by means of combinations of sparse linear regression models.

Results

The mixture of sparse linear regression models was able to improve the gene expression prediction in relation to the use of a single linear model. Moreover, it performed an efficient selection of regulatory signals even when analyzing all TFs with known motifs (>600). The method identified interesting roles for histone modifications and a selection of TFs related to blood development and chromatin remodelling.

Availability

The method and datasets are available from http://www.cin.ufpe.br/~igcf/SparseMix.

Contact

igcf@cin.ufpe.br

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-06-23 +21712251,Paired-end RAD-seq for de novo assembly and marker design without available reference.,"

Motivation

Next-generation sequencing technologies have facilitated the study of organisms on a genome-wide scale. A recent method called restriction site associated DNA sequencing (RAD-seq) allows to sample sequence information at reduced complexity across a target genome using the Illumina platform. Single-end RAD-seq has proven to provide a large number of informative genetic markers in reference as well as non-reference organisms.

Results

Here, we present a method for de novo assembly of paired-end RAD-seq data in order to produce extended contigs flanking a restriction site. We were able to reconstruct one-tenth of the guppy genome represented by 200-500 bp contigs associated to EcoRI recognition sites. In addition, these contigs were used as reference allowing the detection of thousands of new polymorphic markers that are informative for mapping and population genetic studies in the guppy.

Availability

A perl and C++ implementation of the method demonstrated in this article is available under http://guppy.weigelworld.org/weigeldatabases/radMarkers/ as package RApiD.

Contact

christine.dreyer@tuebingen.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-27 +21622953,ICSNPathway: identify candidate causal SNPs and pathways from genome-wide association study by one analytical framework.,"Genome-wide association study (GWAS) is widely utilized to identify genes involved in human complex disease or some other trait. One key challenge for GWAS data interpretation is to identify causal SNPs and provide profound evidence on how they affect the trait. Currently, researches are focusing on identification of candidate causal variants from the most significant SNPs of GWAS, while there is lack of support on biological mechanisms as represented by pathways. Although pathway-based analysis (PBA) has been designed to identify disease-related pathways by analyzing the full list of SNPs from GWAS, it does not emphasize on interpreting causal SNPs. To our knowledge, so far there is no web server available to solve the challenge for GWAS data interpretation within one analytical framework. ICSNPathway is developed to identify candidate causal SNPs and their corresponding candidate causal pathways from GWAS by integrating linkage disequilibrium (LD) analysis, functional SNP annotation and PBA. ICSNPathway provides a feasible solution to bridge the gap between GWAS and disease mechanism study by generating hypothesis of SNP → gene → pathway(s). The ICSNPathway server is freely available at http://icsnpathway.psych.ac.cn/.",2011-05-27 +22102578,COLT-Cancer: functional genetic screening resource for essential genes in human cancer cell lines.,"Genome-wide pooled shRNA screens enable global identification of the genes essential for cancer cell survival and proliferation and provide a 'functional genetic' map of human cancer to complement genomic studies. Using a lentiviral shRNA library targeting approximately 16,000 human genes and a newly developed scoring approach, we identified essential gene profiles in more than 70 breast, pancreatic and ovarian cancer cell lines. We developed a web-accessible database system for capturing information from each step in our standardized screening pipeline and a gene-centric search tool for exploring shRNA activities within a given cell line or across multiple cell lines. The database consists of a laboratory information and management system for tracking each step of a pooled shRNA screen as well as a web interface for querying and visualization of shRNA and gene-level performance across multiple cancer cell lines. COLT-Cancer Version 1.0 is currently accessible at http://colt.ccbr.utoronto.ca/cancer.",2011-11-18 +22102573,DBETH: a Database of Bacterial Exotoxins for Human.,"Pathogenic bacteria produce protein toxins to survive in the hostile environments defined by the host's defense systems and immune response. Recent progresses in high-throughput genome sequencing and structure determination techniques have contributed to a better understanding of mechanisms of action of the bacterial toxins at the cellular and molecular levels leading to pathogenicity. It is fair to assume that with time more and more unknown toxins will emerge not only by the discovery of newer species but also due to the genetic rearrangement of existing bacterial genomes. Hence, it is crucial to organize a systematic compilation and subsequent analyses of the inherent features of known bacterial toxins. We developed a Database for Bacterial ExoToxins (DBETH, http://www.hpppi.iicb.res.in/btox/), which contains sequence, structure, interaction network and analytical results for 229 toxins categorized within 24 mechanistic and activity types from 26 bacterial genuses. The main objective of this database is to provide a comprehensive knowledgebase for human pathogenic bacterial toxins where various important sequence, structure and physico-chemical property based analyses are provided. Further, we have developed a prediction server attached to this database which aims to identify bacterial toxin like sequences either by establishing homology with known toxin sequences/domains or by classifying bacterial toxin specific features using a support vector based machine learning techniques.",2011-11-18 +22390853,FamLink--a user friendly software for linkage calculations in family genetics.,"The present number of STR loci adopted in relationship testing is chiefly limited to unlinked markers, in most cases residing on different chromosomes. In order to solve more complex cases of relatedness, e.g. deficient paternities and disputed sibships, the number of core loci can be extended. The inclusion of multiple loci on the same chromosome will, however, increase the risk of possible linkage between markers. We present a new software, FamLink, freely available from http://www.FamLink.se, that can perform statistical calculations based on pedigree structures and account for linkage between pairs of markers. In addition, FamLink can simulate genotype data in order to study the effect of accounting for linkage or not. We demonstrate the importance of taking linkage properly into account using examples and real cases.",2012-03-04 +22102581,ProRepeat: an integrated repository for studying amino acid tandem repeats in proteins.,"ProRepeat (http://prorepeat.bioinformatics.nl/) is an integrated curated repository and analysis platform for in-depth research on the biological characteristics of amino acid tandem repeats. ProRepeat collects repeats from all proteins included in the UniProt knowledgebase, together with 85 completely sequenced eukaryotic proteomes contained within the RefSeq collection. It contains non-redundant perfect tandem repeats, approximate tandem repeats and simple, low-complexity sequences, covering the majority of the amino acid tandem repeat patterns found in proteins. The ProRepeat web interface allows querying the repeat database using repeat characteristics like repeat unit and length, number of repetitions of the repeat unit and position of the repeat in the protein. Users can also search for repeats by the characteristics of repeat containing proteins, such as entry ID, protein description, sequence length, gene name and taxon. ProRepeat offers powerful analysis tools for finding biological interesting properties of repeats, such as the strong position bias of leucine repeats in the N-terminus of eukaryotic protein sequences, the differences of repeat abundance among proteomes, the functional classification of repeat containing proteins and GC content constrains of repeats' corresponding codons.",2011-11-18 +22102590,Reorganizing the protein space at the Universal Protein Resource (UniProt).,"The mission of UniProt is to support biological research by providing a freely accessible, stable, comprehensive, fully classified, richly and accurately annotated protein sequence knowledgebase, with extensive cross-references and querying interfaces. UniProt is comprised of four major components, each optimized for different uses: the UniProt Archive, the UniProt Knowledgebase, the UniProt Reference Clusters and the UniProt Metagenomic and Environmental Sequence Database. A key development at UniProt is the provision of complete, reference and representative proteomes. UniProt is updated and distributed every 4 weeks and can be accessed online for searches or download at http://www.uniprot.org.",2011-11-18 +22101057,"DECIPHER, a search-based approach to chimera identification for 16S rRNA sequences.","DECIPHER is a new method for finding 16S rRNA chimeric sequences by the use of a search-based approach. The method is based upon detecting short fragments that are uncommon in the phylogenetic group where a query sequence is classified but frequently found in another phylogenetic group. The algorithm was calibrated for full sequences (fs_DECIPHER) and short sequences (ss_DECIPHER) and benchmarked against WigeoN (Pintail), ChimeraSlayer, and Uchime using artificially generated chimeras. Overall, ss_DECIPHER and Uchime provided the highest chimera detection for sequences 100 to 600 nucleotides long (79% and 81%, respectively), but Uchime's performance deteriorated for longer sequences, while ss_DECIPHER maintained a high detection rate (89%). Both methods had low false-positive rates (1.3% and 1.6%). The more conservative fs_DECIPHER, benchmarked only for sequences longer than 600 nucleotides, had an overall detection rate lower than that of ss_DECIPHER (75%) but higher than those of the other programs. In addition, fs_DECIPHER had the lowest false-positive rate among all the benchmarked programs (<0.20%). DECIPHER was outperformed only by ChimeraSlayer and Uchime when chimeras were formed from closely related parents (less than 10% divergence). Given the differences in the programs, it was possible to detect over 89% of all chimeras with just the combination of ss_DECIPHER and Uchime. Using fs_DECIPHER, we detected between 1% and 2% additional chimeras in the RDP, SILVA, and Greengenes databases from which chimeras had already been removed with Pintail or Bellerophon. DECIPHER was implemented in the R programming language and is directly accessible through a webpage or by downloading the program as an R package (http://DECIPHER.cee.wisc.edu).",2011-11-18 +22099701,Structural alphabet motif discovery and a structural motif database.,"This study proposes a general framework for structural motif discovery. The framework is based on a modular design in which the system components can be modified or replaced independently to increase its applicability to various studies. It is a two-stage approach that first converts protein 3D structures into structural alphabet sequences, and then applies a sequence motif-finding tool to these sequences to detect conserved motifs. We named the structural motif database we built the SA-Motifbase, which provides the structural information conserved at different hierarchical levels in SCOP. For each motif, SA-Motifbase presents its 3D view; alphabet letter preference; alphabet letter frequency distribution; and the significance. SA-Motifbase is available at http://bioinfo.cis.nctu.edu.tw/samotifbase/.",2011-11-17 +22096233,MINAS--a database of Metal Ions in Nucleic AcidS.,"Correctly folded into the respective native 3D structure, RNA and DNA are responsible for uncountable key functions in any viable organism. In order to exert their function, metal ion cofactors are closely involved in folding, structure formation and, e.g. in ribozymes, also the catalytic mechanism. The database MINAS, Metal Ions in Nucleic AcidS (http://www.minas.uzh.ch), compiles the detailed information on innersphere, outersphere and larger coordination environment of >70,000 metal ions of 36 elements found in >2000 structures of nucleic acids contained today in the PDB and NDB. MINAS is updated monthly with new structures and offers a multitude of search functions, e.g. the kind of metal ion, metal-ligand distance, innersphere and outersphere ligands defined by element or functional group, residue, experimental method, as well as PDB entry-related information. The results of each search can be saved individually for later use with so-called miniPDB files containing the respective metal ion together with the coordination environment within a 15 Å radius. MINAS thus offers a unique way to explore the coordination geometries and ligands of metal ions together with the respective binding pockets in nucleic acids.",2011-11-16 +22096229,InterPro in 2011: new developments in the family and domain prediction database.,"InterPro (http://www.ebi.ac.uk/interpro/) is a database that integrates diverse information about protein families, domains and functional sites, and makes it freely available to the public via Web-based interfaces and services. Central to the database are diagnostic models, known as signatures, against which protein sequences can be searched to determine their potential function. InterPro has utility in the large-scale analysis of whole genomes and meta-genomes, as well as in characterizing individual protein sequences. Herein we give an overview of new developments in the database and its associated software since 2009, including updates to database content, curation processes and Web and programmatic interfaces.",2011-11-16 +22096230,WikiPathways: building research communities on biological pathways.,"Here, we describe the development of WikiPathways (http://www.wikipathways.org), a public wiki for pathway curation, since it was first published in 2008. New features are discussed, as well as developments in the community of contributors. New features include a zoomable pathway viewer, support for pathway ontology annotations, the ability to mark pathways as private for a limited time and the availability of stable hyperlinks to pathways and the elements therein. WikiPathways content is freely available in a variety of formats such as the BioPAX standard, and the content is increasingly adopted by external databases and tools, including Wikipedia. A recent development is the use of WikiPathways as a staging ground for centrally curated databases such as Reactome. WikiPathways is seeing steady growth in the number of users, page views and edits for each pathway. To assess whether the community curation experiment can be considered successful, here we analyze the relation between use and contribution, which gives results in line with other wiki projects. The novel use of pathway pages as supplementary material to publications, as well as the addition of tailored content for research domains, is expected to stimulate growth further.",2011-11-16 +22096231,eggNOG v3.0: orthologous groups covering 1133 organisms at 41 different taxonomic ranges.,"Orthologous relationships form the basis of most comparative genomic and metagenomic studies and are essential for proper phylogenetic and functional analyses. The third version of the eggNOG database (http://eggnog.embl.de) contains non-supervised orthologous groups constructed from 1133 organisms, doubling the number of genes with orthology assignment compared to eggNOG v2. The new release is the result of a number of improvements and expansions: (i) the underlying homology searches are now based on the SIMAP database; (ii) the orthologous groups have been extended to 41 levels of selected taxonomic ranges enabling much more fine-grained orthology assignments; and (iii) the newly designed web page is considerably faster with more functionality. In total, eggNOG v3 contains 721,801 orthologous groups, encompassing a total of 4,396,591 genes. Additionally, we updated 4873 and 4850 original COGs and KOGs, respectively, to include all 1133 organisms. At the universal level, covering all three domains of life, 101,208 orthologous groups are available, while the others are applicable at 40 more limited taxonomic ranges. Each group is amended by multiple sequence alignments and maximum-likelihood trees and broad functional descriptions are provided for 450,904 orthologous groups (62.5%).",2011-11-16 +21775309,coMOTIF: a mixture framework for identifying transcription factor and a coregulator motif in ChIP-seq data.,"

Motivation

ChIP-seq data are enriched in binding sites for the protein immunoprecipitated. Some sequences may also contain binding sites for a coregulator. Biologists are interested in knowing which coregulatory factor motifs may be present in the sequences bound by the protein ChIP'ed.

Results

We present a finite mixture framework with an expectation-maximization algorithm that considers two motifs jointly and simultaneously determines which sequences contain both motifs, either one or neither of them. Tested on 10 simulated ChIP-seq datasets, our method performed better than repeated application of MEME in predicting sequences containing both motifs. When applied to a mouse liver Foxa2 ChIP-seq dataset involving ~ 12 000 400-bp sequences, coMOTIF identified co-occurrence of Foxa2 with Hnf4a, Cebpa, E-box, Ap1/Maf or Sp1 motifs in ~6-33% of these sequences. These motifs are either known as liver-specific transcription factors or have an important role in liver function.

Availability

Freely available at http://www.niehs.nih.gov/research/resources/software/comotif/.

Contact

li3@niehs.nih.gov

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-19 +21204564,SQID: an intensity-incorporated protein identification algorithm for tandem mass spectrometry.,"To interpret LC-MS/MS data in proteomics, most popular protein identification algorithms primarily use predicted fragment m/z values to assign peptide sequences to fragmentation spectra. The intensity information is often undervalued, because it is not as easy to predict and incorporate into algorithms. Nevertheless, the use of intensity to assist peptide identification is an attractive prospect and can potentially improve the confidence of matches and generate more identifications. On the basis of our previously reported study of fragmentation intensity patterns, we developed a protein identification algorithm, SeQuence IDentfication (SQID), that makes use of the coarse intensity from a statistical analysis. The scoring scheme was validated by comparing with Sequest and X!Tandem using three data sets, and the results indicate an improvement in the number of identified peptides, including unique peptides that are not identified by Sequest or X!Tandem. The software and source code are available under the GNU GPL license at http://quiz2.chem.arizona.edu/wysocki/bioinformatics.htm.",2011-02-23 +22086950,"MEROPS: the database of proteolytic enzymes, their substrates and inhibitors.","Peptidases, their substrates and inhibitors are of great relevance to biology, medicine and biotechnology. The MEROPS database (http://merops.sanger.ac.uk) aims to fulfil the need for an integrated source of information about these. The database has hierarchical classifications in which homologous sets of peptidases and protein inhibitors are grouped into protein species, which are grouped into families, which are in turn grouped into clans. The database has been expanded to include proteolytic enzymes other than peptidases. Special identifiers for peptidases from a variety of model organisms have been established so that orthologues can be detected in other species. A table of predicted active-site residue and metal ligand positions and the residue ranges of the peptidase domains in orthologues has been added to each peptidase summary. New displays of tertiary structures, which can be rotated or have the surfaces displayed, have been added to the structure pages. New indexes for gene names and peptidase substrates have been made available. Among the enhancements to existing features are the inclusion of small-molecule inhibitors in the tables of peptidase-inhibitor interactions, a table of known cleavage sites for each protein substrate, and tables showing the substrate-binding preferences of peptidases derived from combinatorial peptide substrate libraries.",2011-11-15 +23159758,"Muricauda zhangzhouensis sp. nov., isolated from mangrove sediment.","A Gram-staining-negative, rod-shaped and non-motile bacterial strain, designated 12C25(T), was isolated from the crude-oil-degrading bacterial consortium enriched from mangrove sediment collected in Fujian Province, China. Optimal growth was observed at 25-28 °C, at pH 7.0 and in the presence of 2% (w/v) NaCl + 2% (w/v) KCl. Comparative 16S rRNA gene sequence analysis demonstrated that strain 12C25(T) shared the highest sequence similarity with members of the genus Muricauda (97.7-93.9%), exhibiting 97.7% sequence similarity and 33.7 ± 4% DNA-DNA relatedness to Muricauda aquimarina SW-63(T). The DNA G+C content of strain 12C25(T) was 39.9 mol%. The dominant fatty acids were iso-C15:1 G, iso-C17:0 3-OH, iso-C15:0, C18:0 and iso-C15:0 3-OH, and menaquinone with six isoprene units (MK-6) was the only respiratory quinone. On the basis of phenotypic data and phylogenetic inference, the novel strain belongs to the genus Muricauda, but can readily be distinguished from known species of this genus http://dx.doi.org/10.1601/nm.8170 and thus represents a novel species of the genus Muricauda. The name Muricauda zhangzhouensis sp. nov. is proposed and the type strain is 12C25(T) (=CGMCC 1.11028(T)=MCCC 1F01096(T)=DSM 25030(T)).",2012-11-16 +21756356,Statistical mutation calling from sequenced overlapping DNA pools in TILLING experiments.,"

Background

TILLING (Targeting induced local lesions IN genomes) is an efficient reverse genetics approach for detecting induced mutations in pools of individuals. Combined with the high-throughput of next-generation sequencing technologies, and the resolving power of overlapping pool design, TILLING provides an efficient and economical platform for functional genomics across thousands of organisms.

Results

We propose a probabilistic method for calling TILLING-induced mutations, and their carriers, from high throughput sequencing data of overlapping population pools, where each individual occurs in two pools. We assign a probability score to each sequence position by applying Bayes' Theorem to a simplified binomial model of sequencing error and expected mutations, taking into account the coverage level. We test the performance of our method on variable quality, high-throughput sequences from wheat and rice mutagenized populations.

Conclusions

We show that our method effectively discovers mutations in large populations with sensitivity of 92.5% and specificity of 99.8%. It also outperforms existing SNP detection methods in detecting real mutations, especially at higher levels of coverage variability across sequenced pools, and in lower quality short reads sequence data. The implementation of our method is available from: http://www.cs.ucdavis.edu/filkov/CAMBa/.",2011-07-14 +22080550,BYKdb: the Bacterial protein tYrosine Kinase database.,"Bacterial tyrosine-kinases share no resemblance with their eukaryotic counterparts and they have been unified in a new protein family named BY-kinases. These enzymes have been shown to control several biological functions in the bacterial cells. In recent years biochemical studies, sequence analyses and structure resolutions allowed the deciphering of a common signature. However, BY-kinase sequence annotations in primary databases remain incomplete. This prompted us to develop a specialized database of computer-annotated BY-kinase sequences: the Bacterial protein tyrosine-kinase database (BYKdb). BY-kinase sequences are first identified, thanks to a workflow developed in a previous work. A second workflow annotates the UniProtKB entries in order to provide the BYKdb entries. The database can be accessed through a web interface that allows static and dynamic queries and offers integrated sequence analysis tools. BYKdb can be found at http://bykdb.ibcp.fr.",2011-11-12 +22080558,HotRegion: a database of predicted hot spot clusters.,"Hot spots are energetically important residues at protein interfaces and they are not randomly distributed across the interface but rather clustered. These clustered hot spots form hot regions. Hot regions are important for the stability of protein complexes, as well as providing specificity to binding sites. We propose a database called HotRegion, which provides the hot region information of the interfaces by using predicted hot spot residues, and structural properties of these interface residues such as pair potentials of interface residues, accessible surface area (ASA) and relative ASA values of interface residues of both monomer and complex forms of proteins. Also, the 3D visualization of the interface and interactions among hot spot residues are provided. HotRegion is accessible at http://prism.ccbb.ku.edu.tr/hotregion.",2011-11-12 +23428638,The prediction of organelle-targeting peptides in eukaryotic proteins with Grammatical-Restrained Hidden Conditional Random Fields.,"

Motivation

Targeting peptides are the most important signal controlling the import of nuclear encoded proteins into mitochondria and plastids. In the lack of experimental information, their prediction is an essential step when proteomes are annotated for inferring both the localization and the sequence of mature proteins.

Results

We developed TPpred a new predictor of organelle-targeting peptides based on Grammatical-Restrained Hidden Conditional Random Fields. TPpred is trained on a non-redundant dataset of proteins where the presence of a target peptide was experimentally validated, comprising 297 sequences. When tested on the 297 positive and some other 8010 negative examples, TPpred outperformed available methods in both accuracy and Matthews correlation index (96% and 0.58, respectively). Given its very low-false-positive rate (3.0%), TPpred is, therefore, well suited for large-scale analyses at the proteome level. We predicted that from ∼4 to 9% of the sequences of human, Arabidopsis thaliana and yeast proteomes contain targeting peptides and are, therefore, likely to be localized in mitochondria and plastids. TPpred predictions correlate to a good extent with the experimental annotation of the subcellular localization, when available. TPpred was also trained and tested to predict the cleavage site of the organelle-targeting peptide: on this task, the average error of TPpred on mitochondrial and plastidic proteins is 7 and 15 residues, respectively. This value is lower than the error reported by other methods currently available.

Availability

The TPpred datasets are available at http://biocomp.unibo.it/valentina/TPpred/. TPpred is available on request from the authors.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-02-21 +23564844,Active learning-based information structure analysis of full scientific articles and two applications for biomedical literature review.,"

Motivation

Techniques that are capable of automatically analyzing the information structure of scientific articles could be highly useful for improving information access to biomedical literature. However, most existing approaches rely on supervised machine learning (ML) and substantial labeled data that are expensive to develop and apply to different sub-fields of biomedicine. Recent research shows that minimal supervision is sufficient for fairly accurate information structure analysis of biomedical abstracts. However, is it realistic for full articles given their high linguistic and informational complexity? We introduce and release a novel corpus of 50 biomedical articles annotated according to the Argumentative Zoning (AZ) scheme, and investigate active learning with one of the most widely used ML models-Support Vector Machines (SVM)-on this corpus. Additionally, we introduce two novel applications that use AZ to support real-life literature review in biomedicine via question answering and summarization.

Results

We show that active learning with SVM trained on 500 labeled sentences (6% of the corpus) performs surprisingly well with the accuracy of 82%, just 2% lower than fully supervised learning. In our question answering task, biomedical researchers find relevant information significantly faster from AZ-annotated than unannotated articles. In the summarization task, sentences extracted from particular zones are significantly more similar to gold standard summaries than those extracted from particular sections of full articles. These results demonstrate that active learning of full articles' information structure is indeed realistic and the accuracy is high enough to support real-life literature review in biomedicine.

Availability

The annotated corpus, our AZ classifier and the two novel applications are available at http://www.cl.cam.ac.uk/yg244/12bioinfo.html",2013-04-05 +21752801,DECOD: fast and accurate discriminative DNA motif finding.,"

Motivation

Motif discovery is now routinely used in high-throughput studies including large-scale sequencing and proteomics. These datasets present new challenges. The first is speed. Many motif discovery methods do not scale well to large datasets. Another issue is identifying discriminative rather than generative motifs. Such discriminative motifs are important for identifying co-factors and for explaining changes in behavior between different conditions.

Results

To address these issues we developed a method for DECOnvolved Discriminative motif discovery (DECOD). DECOD uses a k-mer count table and so its running time is independent of the size of the input set. By deconvolving the k-mers DECOD considers context information without using the sequences directly. DECOD outperforms previous methods both in speed and in accuracy when using simulated and real biological benchmark data. We performed new binding experiments for p53 mutants and used DECOD to identify p53 co-factors, suggesting new mechanisms for p53 activation.

Availability

The source code and binaries for DECOD are available at http://www.sb.cs.cmu.edu/DECOD CONTACT: zivbj@cs.cmu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-12 +23592958,A computational model predicting disruption of blood vessel development.,"Vascular development is a complex process regulated by dynamic biological networks that vary in topology and state across different tissues and developmental stages. Signals regulating de novo blood vessel formation (vasculogenesis) and remodeling (angiogenesis) come from a variety of biological pathways linked to endothelial cell (EC) behavior, extracellular matrix (ECM) remodeling and the local generation of chemokines and growth factors. Simulating these interactions at a systems level requires sufficient biological detail about the relevant molecular pathways and associated cellular behaviors, and tractable computational models that offset mathematical and biological complexity. Here, we describe a novel multicellular agent-based model of vasculogenesis using the CompuCell3D (http://www.compucell3d.org/) modeling environment supplemented with semi-automatic knowledgebase creation. The model incorporates vascular endothelial growth factor signals, pro- and anti-angiogenic inflammatory chemokine signals, and the plasminogen activating system of enzymes and proteases linked to ECM interactions, to simulate nascent EC organization, growth and remodeling. The model was shown to recapitulate stereotypical capillary plexus formation and structural emergence of non-coded cellular behaviors, such as a heterologous bridging phenomenon linking endothelial tip cells together during formation of polygonal endothelial cords. Molecular targets in the computational model were mapped to signatures of vascular disruption derived from in vitro chemical profiling using the EPA's ToxCast high-throughput screening (HTS) dataset. Simulating the HTS data with the cell-agent based model of vascular development predicted adverse effects of a reference anti-angiogenic thalidomide analog, 5HPP-33, on in vitro angiogenesis with respect to both concentration-response and morphological consequences. These findings support the utility of cell agent-based models for simulating a morphogenetic series of events and for the first time demonstrate the applicability of these models for predictive toxicology.",2013-04-04 +23559640,Detecting regulatory gene-environment interactions with unmeasured environmental factors.,"

Motivation

Genomic studies have revealed a substantial heritable component of the transcriptional state of the cell. To fully understand the genetic regulation of gene expression variability, it is important to study the effect of genotype in the context of external factors such as alternative environmental conditions. In model systems, explicit environmental perturbations have been considered for this purpose, allowing to directly test for environment-specific genetic effects. However, such experiments are limited to species that can be profiled in controlled environments, hampering their use in important systems such as human. Moreover, even in seemingly tightly regulated experimental conditions, subtle environmental perturbations cannot be ruled out, and hence unknown environmental influences are frequent. Here, we propose a model-based approach to simultaneously infer unmeasured environmental factors from gene expression profiles and use them in genetic analyses, identifying environment-specific associations between polymorphic loci and individual gene expression traits.

Results

In extensive simulation studies, we show that our method is able to accurately reconstruct environmental factors and their interactions with genotype in a variety of settings. We further illustrate the use of our model in a real-world dataset in which one environmental factor has been explicitly experimentally controlled. Our method is able to accurately reconstruct the true underlying environmental factor even if it is not given as an input, allowing to detect genuine genotype-environment interactions. In addition to the known environmental factor, we find unmeasured factors involved in novel genotype-environment interactions. Our results suggest that interactions with both known and unknown environmental factors significantly contribute to gene expression variability.

Availability

and implementation: Software available at http://pmbio.github.io/envGPLVM/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-04-04 +22253290,IMA: an R package for high-throughput analysis of Illumina's 450K Infinium methylation data.,

Unlabelled

The Illumina Infinium HumanMethylation450 BeadChip is a newly designed high-density microarray for quantifying the methylation level of over 450 000 CpG sites within human genome. Illumina Methylation Analyzer (IMA) is a computational package designed to automate the pipeline for exploratory analysis and summarization of site-level and region-level methylation changes in epigenetic studies utilizing the 450K DNA methylation microarray. The pipeline loads the data from Illumina platform and provides user-customized functions commonly required to perform exploratory methylation analysis for individual sites as well as annotated regions.

Availability

IMA is implemented in the R language and is freely available from http://www.rforge.net/IMA.,2012-01-16 +21602262,Inferring transcription factor complexes from ChIP-seq data.,"Chromatin immunoprecipitation followed by high-throughput sequencing (ChIP-seq) allows researchers to determine the genome-wide binding locations of individual transcription factors (TFs) at high resolution. This information can be interrogated to study various aspects of TF behaviour, including the mechanisms that control TF binding. Physical interaction between TFs comprises one important aspect of TF binding in eukaryotes, mediating tissue-specific gene expression. We have developed an algorithm, spaced motif analysis (SpaMo), which is able to infer physical interactions between the given TF and TFs bound at neighbouring sites at the DNA interface. The algorithm predicts TF interactions in half of the ChIP-seq data sets we test, with the majority of these predictions supported by direct evidence from the literature or evidence of homodimerization. High resolution motif spacing information obtained by this method can facilitate an improved understanding of individual TF complex structures. SpaMo can assist researchers in extracting maximum information relating to binding mechanisms from their TF ChIP-seq data. SpaMo is available for download and interactive use as part of the MEME Suite (http://meme.nbcr.net).",2011-05-20 +22075996,SNPeffect 4.0: on-line prediction of molecular and structural effects of protein-coding variants.,"Single nucleotide variants (SNVs) are, together with copy number variation, the primary source of variation in the human genome and are associated with phenotypic variation such as altered response to drug treatment and susceptibility to disease. Linking structural effects of non-synonymous SNVs to functional outcomes is a major issue in structural bioinformatics. The SNPeffect database (http://snpeffect.switchlab.org) uses sequence- and structure-based bioinformatics tools to predict the effect of protein-coding SNVs on the structural phenotype of proteins. It integrates aggregation prediction (TANGO), amyloid prediction (WALTZ), chaperone-binding prediction (LIMBO) and protein stability analysis (FoldX) for structural phenotyping. Additionally, SNPeffect holds information on affected catalytic sites and a number of post-translational modifications. The database contains all known human protein variants from UniProt, but users can now also submit custom protein variants for a SNPeffect analysis, including automated structure modeling. The new meta-analysis application allows plotting correlations between phenotypic features for a user-selected set of variants.",2011-11-10 +22080514,PolymiRTS Database 2.0: linking polymorphisms in microRNA target sites with human diseases and complex traits.,"The polymorphism in microRNA target site (PolymiRTS) database aims to identify single-nucleotide polymorphisms (SNPs) that affect miRNA targeting in human and mouse. These polymorphisms can disrupt the regulation of gene expression by miRNAs and are candidate genetic variants responsible for transcriptional and phenotypic variation. The database is therefore organized to provide links between SNPs in miRNA target sites, cis-acting expression quantitative trait loci (eQTLs), and the results of genome-wide association studies (GWAS) of human diseases. Here, we describe new features that have been integrated in the PolymiRTS database, including: (i) polymiRTSs in genes associated with human diseases and traits in GWAS, (ii) polymorphisms in target sites that have been supported by a variety of experimental methods and (iii) polymorphisms in miRNA seed regions. A large number of newly identified microRNAs and SNPs, recently published mouse phenotypes, and human and mouse eQTLs have also been integrated into the database. The PolymiRTS database is available at http://compbio.uthsc.edu/miRSNP/.",2011-11-10 +22075997,STITCH 3: zooming in on protein-chemical interactions.,"To facilitate the study of interactions between proteins and chemicals, we have created STITCH, an aggregated database of interactions connecting over 300,000 chemicals and 2.6 million proteins from 1133 organisms. Compared to the previous version, the number of chemicals with interactions and the number of high-confidence interactions both increase 4-fold. The database can be accessed interactively through a web interface, displaying interactions in an integrated network view. It is also available for computational studies through downloadable files and an API. As an extension in the current version, we offer the option to switch between two levels of detail, namely whether stereoisomers of a given compound are shown as a merged entity or as separate entities. Separate display of stereoisomers is necessary, for example, for carbohydrates and chiral drugs. Combining the isomers increases the coverage, as interaction databases and publications found through text mining will often refer to compounds without specifying the stereoisomer. The database is accessible at http://stitch.embl.de/.",2011-11-09 +22705213,How predictable is the position of third molars over time?,"

Purpose

The purpose of this study was to review contemporaneous longitudinal studies focused on changes in the position of third molars.

Materials and methods

A systematic search of the National Library of Medicine (PubMed, http://www.pubmed.gov) and the Cochrane Central Register of Controlled Trials (http://www.mrw.interscience.wiley.com/cochrane) was conducted to identify eligible articles. The inclusion criteria were 1) longitudinal assessment (retrospective or prospective); 2) published in English; and 3) full text available online or at the University of North Carolina Health Sciences Library.

Results

Five studies met the inclusion criteria. The status of third molars with respect to eruption/angulation was operationalized in multiple ways, making any comparison of the frequency of changes in position difficult. The major findings of each study are reviewed.

Conclusions

Few longitudinal data exist on the changes over time of impacted third molars. Impacted teeth that remain static, with no changes in position or angulation over time, are rare.",2012-06-16 +23308060,Phyletic profiling with cliques of orthologs is enhanced by signatures of paralogy relationships.,"New microbial genomes are sequenced at a high pace, allowing insight into the genetics of not only cultured microbes, but a wide range of metagenomic collections such as the human microbiome. To understand the deluge of genomic data we face, computational approaches for gene functional annotation are invaluable. We introduce a novel model for computational annotation that refines two established concepts: annotation based on homology and annotation based on phyletic profiling. The phyletic profiling-based model that includes both inferred orthologs and paralogs-homologs separated by a speciation and a duplication event, respectively-provides more annotations at the same average Precision than the model that includes only inferred orthologs. For experimental validation, we selected 38 poorly annotated Escherichia coli genes for which the model assigned one of three GO terms with high confidence: involvement in DNA repair, protein translation, or cell wall synthesis. Results of antibiotic stress survival assays on E. coli knockout mutants showed high agreement with our model's estimates of accuracy: out of 38 predictions obtained at the reported Precision of 60%, we confirmed 25 predictions, indicating that our confidence estimates can be used to make informed decisions on experimental validation. Our work will contribute to making experimental validation of computational predictions more approachable, both in cost and time. Our predictions for 998 prokaryotic genomes include ~400000 specific annotations with the estimated Precision of 90%, ~19000 of which are highly specific-e.g. ""penicillin binding,"" ""tRNA aminoacylation for protein translation,"" or ""pathogenesis""-and are freely available at http://gorbi.irb.hr/.",2013-01-03 +22067448,VFDB 2012 update: toward the genetic diversity and molecular evolution of bacterial virulence factors.,"The virulence factor database (VFDB, http://www.mgc.ac.cn/VFs/) has served as a comprehensive repository of bacterial virulence factors (VFs) for >7 years. Bacterial virulence is an exciting and dynamic field, due to the availability of complete sequences of bacterial genomes and increasing sophisticated technologies for manipulating bacteria and bacterial genomes. The intricacy of virulence mechanisms offers a challenge, and there exists a clear need to decipher the 'language' used by VFs more effectively. In this article, we present the recent major updates of VFDB in an attempt to summarize some of the most important virulence mechanisms by comparing different compositions and organizations of VFs from various bacterial pathogens, identifying core components and phylogenetic clades and shedding new light on the forces that shape the evolutionary history of bacterial pathogenesis. In addition, the 2012 release of VFDB provides an improved user interface.",2011-11-08 +22070882,Crystallography Open Database (COD): an open-access collection of crystal structures and platform for world-wide collaboration.,"Using an open-access distribution model, the Crystallography Open Database (COD, http://www.crystallography.net) collects all known 'small molecule / small to medium sized unit cell' crystal structures and makes them available freely on the Internet. As of today, the COD has aggregated ~150,000 structures, offering basic search capabilities and the possibility to download the whole database, or parts thereof using a variety of standard open communication protocols. A newly developed website provides capabilities for all registered users to deposit published and so far unpublished structures as personal communications or pre-publication depositions. Such a setup enables extension of the COD database by many users simultaneously. This increases the possibilities for growth of the COD database, and is the first step towards establishing a world wide Internet-based collaborative platform dedicated to the collection and curation of structural knowledge.",2011-11-08 +22067456,FlyRNAi.org--the database of the Drosophila RNAi screening center: 2012 update.,"FlyRNAi (http://www.flyrnai.org), the database and website of the Drosophila RNAi Screening Center (DRSC) at Harvard Medical School, serves a dual role, tracking both production of reagents for RNA interference (RNAi) screening in Drosophila cells and RNAi screen results. The database and website is used as a platform for community availability of protocols, tools, and other resources useful to researchers planning, conducting, analyzing or interpreting the results of Drosophila RNAi screens. Based on our own experience and user feedback, we have made several changes. Specifically, we have restructured the database to accommodate new types of reagents; added information about new RNAi libraries and other reagents; updated the user interface and website; and added new tools of use to the Drosophila community and others. Overall, the result is a more useful, flexible and comprehensive website and database.",2011-11-08 +22073040,ClubSub-P: Cluster-Based Subcellular Localization Prediction for Gram-Negative Bacteria and Archaea.,"The subcellular localization (SCL) of proteins provides important clues to their function in a cell. In our efforts to predict useful vaccine targets against Gram-negative bacteria, we noticed that misannotated start codons frequently lead to wrongly assigned SCLs. This and other problems in SCL prediction, such as the relatively high false-positive and false-negative rates of some tools, can be avoided by applying multiple prediction tools to groups of homologous proteins. Here we present ClubSub-P, an online database that combines existing SCL prediction tools into a consensus pipeline from more than 600 proteomes of fully sequenced microorganisms. On top of the consensus prediction at the level of single sequences, the tool uses clusters of homologous proteins from Gram-negative bacteria and from Archaea to eliminate false-positive and false-negative predictions. ClubSub-P can assign the SCL of proteins from Gram-negative bacteria and Archaea with high precision. The database is searchable, and can easily be expanded using either new bacterial genomes or new prediction tools as they become available. This will further improve the performance of the SCL prediction, as well as the detection of misannotated start codons and other annotation errors. ClubSub-P is available online at http://toolkit.tuebingen.mpg.de/clubsubp/",2011-11-08 +22067445,"SCRIPDB: a portal for easy access to syntheses, chemicals and reactions in patents.","The patent literature is a rich catalog of biologically relevant chemicals; many public and commercial molecular databases contain the structures disclosed in patent claims. However, patents are an equally rich source of metadata about bioactive molecules, including mechanism of action, disease class, homologous experimental series, structural alternatives, or the synthetic pathways used to produce molecules of interest. Unfortunately, this metadata is discarded when chemical structures are deposited separately in databases. SCRIPDB is a chemical structure database designed to make this metadata accessible. SCRIPDB provides the full original patent text, reactions and relationships described within any individual patent, in addition to the molecular files common to structural databases. We discuss how such information is valuable in medical text mining, chemical image analysis, reaction extraction and in silico pharmaceutical lead optimization. SCRIPDB may be searched by exact chemical structure, substructure or molecular similarity and the results may be restricted to patents describing synthetic routes. SCRIPDB is available at http://dcv.uhnres.utoronto.ca/SCRIPDB.",2011-11-08 +22067444,zfishbook: connecting you to a world of zebrafish revertible mutants.,"zfishbook is an internet-based openly accessible database of revertible protein trap gene-breaking transposon (GBT) insertional mutants in the zebrafish, Danio rerio. In these lines, a monomeric red fluorescent protein (mRFP) is encoded by an artificial 3' exon, resulting in a translational fusion to endogenous loci. The natural transparency of the zebrafish embryo and larvae greatly facilitates the expression annotation of tagged loci using new capillary-based SCORE imaging methods. Molecular annotation of each line is facilitated by cloning methods such as 5'-Rapid Amplification of cDNA Ends (RACE) and inverse polymerase chain reaction (PCR). zfishbook (http://zfishbook.org) represents a central hub for molecular, expression and mutational information about GBT lines from the International Zebrafish Protein Trap Consortium (IZPTC) that includes researchers from around the globe. zfishbook is open to community-wide contributions including expression and functional annotation. zfishbook also represents a central location for information on how to obtain these lines from diverse members of the IZPTC and integration within other zebrafish community databases including Zebrafish Information Network (ZFIN), Ensembl and National Center for Biotechnology Information.",2011-11-08 +22102890,Sequence-based classification using discriminatory motif feature selection.,"Most existing methods for sequence-based classification use exhaustive feature generation, employing, for example, all k-mer patterns. The motivation behind such (enumerative) approaches is to minimize the potential for overlooking important features. However, there are shortcomings to this strategy. First, practical constraints limit the scope of exhaustive feature generation to patterns of length ≤ k, such that potentially important, longer (> k) predictors are not considered. Second, features so generated exhibit strong dependencies, which can complicate understanding of derived classification rules. Third, and most importantly, numerous irrelevant features are created. These concerns can compromise prediction and interpretation. While remedies have been proposed, they tend to be problem-specific and not broadly applicable. Here, we develop a generally applicable methodology, and an attendant software pipeline, that is predicated on discriminatory motif finding. In addition to the traditional training and validation partitions, our framework entails a third level of data partitioning, a discovery partition. A discriminatory motif finder is used on sequences and associated class labels in the discovery partition to yield a (small) set of features. These features are then used as inputs to a classifier in the training partition. Finally, performance assessment occurs on the validation partition. Important attributes of our approach are its modularity (any discriminatory motif finder and any classifier can be deployed) and its universality (all data, including sequences that are unaligned and/or of unequal length, can be accommodated). We illustrate our approach on two nucleosome occupancy datasets and a protein solubility dataset, previously analyzed using enumerative feature generation. Our method achieves excellent performance results, with and without optimization of classifier tuning parameters. A Python pipeline implementing the approach is available at http://www.epibiostat.ucsf.edu/biostat/sen/dmfs/.",2011-11-10 +22890635,"Clinical, transcranial Doppler ultrasound, radiological features and, prognostic significance of delayed cerebral ischemia.","

Objective

We aimed to investigate the profiles and prognostic values of delayed cerebral ischemia (DCI) and delayed cerebral infarction.

Methods

IMASH (Intravenous Magnesium Sulphate for Aneurysmal Subarachnoid Hemorrhage) was registered at http://www.strokecenter.org/trials , and http://www.ClinicalTrials.gov (NCT00124150). Data of 327 patients were retrieved for logistic regression analyses.

Results

Seventy-one (22%) patients developed DCI, and 35 (11%) patients developed delayed cerebral infarction. Only 18 (25%) patients with DCI and 7/35 (20%) patients with delayed cerebral infarction had mean middle cerebral artery velocities (transcranial Doppler ultrasound) over 120 cm/s. Regarding the prognostic significance of the components of DCI, delayed cerebral infarction predicted unfavorable outcome in terms of Extended Glasgow Outcome Scale (OR 3.1, 95% [CI] 1.3-7.8), poor outcome in terms of modified Rankin Scale (odds ratio [OR] 3.0, 95% confidence interval CI 1.2-7.7), and dependent activity of daily living in terms of Barthel Index (OR 3.6, 95% CI 1.4-9.2) at 6 months, after adjustments for other prognostic factors. On the other hand, clinical deterioration predicted inpatient mortality (OR 8.8, 95% CI 1.6-48.8) after adjustments for other prognostic factors.

Conclusions

Delayed cerebral ischemia and delayed cerebral infarction carried different prognostic values in aneurysmal subarachnoid hemorrhage.",2013-01-01 +22064851,"HaploReg: a resource for exploring chromatin states, conservation, and regulatory motif alterations within sets of genetically linked variants.","The resolution of genome-wide association studies (GWAS) is limited by the linkage disequilibrium (LD) structure of the population being studied. Selecting the most likely causal variants within an LD block is relatively straightforward within coding sequence, but is more difficult when all variants are intergenic. Predicting functional non-coding sequence has been recently facilitated by the availability of conservation and epigenomic information. We present HaploReg, a tool for exploring annotations of the non-coding genome among the results of published GWAS or novel sets of variants. Using LD information from the 1000 Genomes Project, linked SNPs and small indels can be visualized along with their predicted chromatin state in nine cell types, conservation across mammals and their effect on regulatory motifs. Sets of SNPs, such as those resulting from GWAS, are analyzed for an enrichment of cell type-specific enhancers. HaploReg will be useful to researchers developing mechanistic hypotheses of the impact of non-coding variants on clinical phenotypes and normal variation. The HaploReg database is available at http://compbio.mit.edu/HaploReg.",2011-11-07 +22065541,A novel structural position-specific scoring matrix for the prediction of protein secondary structures.,"

Motivation

The precise prediction of protein secondary structure is of key importance for the prediction of 3D structure and biological function. Although the development of many excellent methods over the last few decades has allowed the achievement of prediction accuracies of up to 80%, progress seems to have reached a bottleneck, and further improvements in accuracy have proven difficult.

Results

We propose for the first time a structural position-specific scoring matrix (SPSSM), and establish an unprecedented database of 9 million sequences and their SPSSMs. This database, when combined with a purpose-designed BLAST tool, provides a novel prediction tool: SPSSMPred. When the SPSSMPred was validated on a large dataset (10,814 entries), the Q3 accuracy of the protein secondary structure prediction was 93.4%. Our approach was tested on the two latest EVA sets; accuracies of 82.7 and 82.0% were achieved, far higher than can be achieved using other predictors. For further evaluation, we tested our approach on newly determined sequences (141 entries), and obtained an accuracy of 89.6%. For a set of low-homology proteins (40 entries), the SPSSMPred still achieved a Q3 value of 84.6%.

Availability

The SPSSMPred server is available at http://cal.tongji.edu.cn/SPSSMPred/

Contact

lith@tongji.edu.cn",2011-11-07 +24683959,Tree-space statistics and approximations for large-scale analysis of anatomical trees.,"Statistical analysis of anatomical trees is hard to perform due to differences in the topological structure of the trees. In this paper we define statistical properties of leaf-labeled anatomical trees with geometric edge attributes by considering the anatomical trees as points in the geometric space of leaf-labeled trees. This tree-space is a geodesic metric space where any two trees are connected by a unique shortest path, which corresponds to a tree deformation. However, tree-space is not a manifold, and the usual strategy of performing statistical analysis in a tangent space and projecting onto tree-space is not available. Using tree-space and its shortest paths, a variety of statistical properties, such as mean, principal component, hypothesis testing and linear discriminant analysis can be defined. For some of these properties it is still an open problem how to compute them; others (like the mean) can be computed, but efficient alternatives are helpful in speeding up algorithms that use means iteratively, like hypothesis testing. In this paper, we take advantage of a very large dataset (N = 8016) to obtain computable approximations, under the assumption that the data trees parametrize the relevant parts of tree-space well. Using the developed approximate statistics, we illustrate how the structure and geometry of airway trees vary across a population and show that airway trees with Chronic Obstructive Pulmonary Disease come from a different distribution in tree-space than healthy ones. Software is available from http://image.diku.dk/aasa/software.php.",2013-01-01 +22796961,Bayesian sampling of evolutionarily conserved RNA secondary structures with pseudoknots.,"

Motivation

Today many non-coding RNAs are known to play an active role in various important biological processes. Since RNA's functionality is correlated with specific structural motifs that are often conserved in phylogenetically related molecules, computational prediction of RNA structure should ideally be based on a set of homologous primary structures. But many available RNA secondary structure prediction programs that use sequence alignments do not consider pseudoknots or their estimations consist on a single structure without information on uncertainty.

Results

In this article we present a method that takes advantage of the evolutionary history of a group of aligned RNA sequences for sampling consensus secondary structures, including pseudoknots, according to their approximate posterior probability. We investigate the benefit of using evolutionary history and demonstrate the competitiveness of our method compared with similar methods based on RNase P RNA sequences and simulated data.

Availability

PhyloQFold, a C + + implementation of our method, is freely available from http://evol.bio.lmu.de/_statgen/software/phyloqfold/.",2012-07-13 +23543664,GrowthHormone Research Society workshop summary: consensus guidelines for recombinant human growth hormone therapy in Prader-Willi syndrome.,"

Context

Recombinant human GH (rhGH) therapy in Prader-Willi syndrome (PWS) has been used by the medical community and advocated by parental support groups since its approval in the United States in 2000 and in Europe in 2001. Its use in PWS represents a unique therapeutic challenge that includes treating individuals with cognitive disability, varied therapeutic goals that are not focused exclusively on increased height, and concerns about potential life-threatening adverse events.

Objective

The aim of the study was to formulate recommendations for the use of rhGH in children and adult patients with PWS.

Evidence

We performed a systematic review of the clinical evidence in the pediatric population, including randomized controlled trials, comparative observational studies, and long-term studies (>3.5 y). Adult studies included randomized controlled trials of rhGH treatment for ≥ 6 months and uncontrolled trials. Safety data were obtained from case reports, clinical trials, and pharmaceutical registries.

Methodology

Forty-three international experts and stakeholders followed clinical practice guideline development recommendations outlined by the AGREE Collaboration (www.agreetrust.org). Evidence was synthesized and graded using a comprehensive multicriteria methodology (EVIDEM) (http://bit.ly.PWGHIN).

Conclusions

Following a multidisciplinary evaluation, preferably by experts, rhGH treatment should be considered for patients with genetically confirmed PWS in conjunction with dietary, environmental, and lifestyle interventions. Cognitive impairment should not be a barrier to treatment, and informed consent/assent should include benefit/risk information. Exclusion criteria should include severe obesity, uncontrolled diabetes mellitus, untreated severe obstructive sleep apnea, active cancer, or psychosis. Clinical outcome priorities should vary depending upon age and the presence of physical, mental, and social disability, and treatment should be continued for as long as demonstrated benefits outweigh the risks.",2013-03-29 +22796270,Reduced white matter integrity in primary open-angle glaucoma: a DTI study using tract-based spatial statistics.,"

Background and purpose

The present study was designed to map the alteration of white matter in primary open-angle glaucoma (POAG) by applying tract-based spatial statistics (TBSS) analysis.

Methods

Diffusion tensor imaging (DTI) data from MRI brain scans were collected from 15 POAG patients and 15 gender- and age-matched non-glaucoma controls using a SIEMENS Trio 3.0-Tesla scanner. For the white-matter analysis, DTI images were processed using FSL software (http://www.fmrib.ox.ac.uk/fsl/index.html). Fractional anisotropy (FA) between the POAG and control groups was compared by TBSS analysis corrected for multiple comparisons using threshold-free cluster enhancement (TFCE).

Results

Compared with non-glaucoma subjects, the occipital white matter in POAG patients had significantly lower FA values (p<0.05, corrected).

Conclusion

The change in white-matter FA may indicate atrophy of the visual cortex that may be important in the diagnosis and treatment of POAG patients.",2012-07-12 +22058133,"ConoServer: updated content, knowledge, and discovery tools in the conopeptide database.","ConoServer (http://www.conoserver.org) is a database specializing in the sequences and structures of conopeptides, which are toxins expressed by marine cone snails. Cone snails are carnivorous gastropods, which hunt their prey using a cocktail of toxins that potently subvert nervous system function. The ability of these toxins to specifically target receptors, channels and transporters of the nervous system has attracted considerable interest for their use in physiological research and as drug leads. Since the founding publication on ConoServer in 2008, the number of entries in the database has nearly doubled, the interface has been redesigned and new annotations have been added, including a more detailed description of cone snail species, biological activity measurements and information regarding the identification of each sequence. Automatically updated statistics on classification schemes, three-dimensional structures, conopeptide-bearing species and endoplasmic reticulum signal sequence conservation trends, provide a convenient overview of current knowledge on conopeptides. Transcriptomics and proteomics have began generating massive numbers of new conopeptide sequences, and two dedicated tools have been recently implemented in ConoServer to standardize the analysis of conopeptide precursor sequences and to help in the identification by mass spectrometry of toxins whose sequences were predicted at the nucleic acid level.",2011-11-03 +21486749,cn.FARMS: a latent variable model to detect copy number variations in microarray data with a low false discovery rate.,"Cost-effective oligonucleotide genotyping arrays like the Affymetrix SNP 6.0 are still the predominant technique to measure DNA copy number variations (CNVs). However, CNV detection methods for microarrays overestimate both the number and the size of CNV regions and, consequently, suffer from a high false discovery rate (FDR). A high FDR means that many CNVs are wrongly detected and therefore not associated with a disease in a clinical study, though correction for multiple testing takes them into account and thereby decreases the study's discovery power. For controlling the FDR, we propose a probabilistic latent variable model, 'cn.FARMS', which is optimized by a Bayesian maximum a posteriori approach. cn.FARMS controls the FDR through the information gain of the posterior over the prior. The prior represents the null hypothesis of copy number 2 for all samples from which the posterior can only deviate by strong and consistent signals in the data. On HapMap data, cn.FARMS clearly outperformed the two most prevalent methods with respect to sensitivity and FDR. The software cn.FARMS is publicly available as a R package at http://www.bioinf.jku.at/software/cnfarms/cnfarms.html.",2011-04-12 +22182631,PhosTryp: a phosphorylation site predictor specific for parasitic protozoa of the family trypanosomatidae.,"

Background

Protein phosphorylation modulates protein function in organisms at all levels of complexity. Parasites of the Leishmania genus undergo various developmental transitions in their life cycle triggered by changes in the environment. The molecular mechanisms that these organisms use to process and integrate these external cues are largely unknown. However Leishmania lacks transcription factors, therefore most regulatory processes may occur at a post-translational level and phosphorylation has recently been demonstrated to be an important player in this process. Experimental identification of phosphorylation sites is a time-consuming task. Moreover some sites could be missed due to the highly dynamic nature of this process or to difficulties in phospho-peptide enrichment.

Results

Here we present PhosTryp, a phosphorylation site predictor specific for trypansomatids. This method uses an SVM-based approach and has been trained with recent Leishmania phosphosproteomics data. PhosTryp achieved a 17% improvement in prediction performance compared with Netphos, a non organism-specific predictor. The analysis of the peptides correctly predicted by our method but missed by Netphos demonstrates that PhosTryp captures Leishmania-specific phosphorylation features. More specifically our results show that Leishmania kinases have sequence specificities which are different from their counterparts in higher eukaryotes. Consequently we were able to propose two possible Leishmania-specific phosphorylation motifs.We further demonstrate that this improvement in performance extends to the related trypanosomatids Trypanosoma brucei and Trypanosoma cruzi. Finally, in order to maximize the usefulness of PhosTryp, we trained a predictor combining all the peptides from L. infantum, T. brucei and T. cruzi.

Conclusions

Our work demonstrates that training on organism-specific data results in an improvement that extends to related species. PhosTryp is freely available at http://phostryp.bio.uniroma2.it.",2011-12-19 +30731764,First Report of Powdery Mildew Caused by Golovinomyces ambrosiae on Ambrosia trifida in Korea.,"Ambrosia trifida L., commonly known as giant ragweed, is native to North America and was introduced to Korea in the 1970s (3). It is now widely naturalized, and since 1999, has been designated as one of 11 'harmful nonindigenous plants' by the Korean Ministry of Environment because of its adverse effects on native plants. Various strategies to eradicate this noxious weed have been tried without any success (3). In September 2009, powdery mildew infections of giant ragweed were found for the first time in Dongducheon, Korea, and specimens were isolated and deposited in the Korea University Herbarium (KUS-F24683). White mycelial and conidial growth was present mostly on adaxial leaf surfaces with sparse growth on abaxial leaf sides. Severely infected leaves were malformed. Slight purplish discoloration occurred on the leaves contiguous with colony growth. Mycelial colonies were conspicuous, amphigenous, and epiphytic with indistinct to nipple-shaped appressoria. Conidiophores were 80 to 180 μm long and produced two to five immature conidia in chains. Conidia were ellipsoid or doliiform, 28 to 38 × 16 to 24 μm, and lacked distinct fibrosin bodies. Chasmothecia were amphigenous, scattered or partly clustered, dark brown, spherical, 95 to 130 μm in diameter, and contained 6 to 16 asci. Appendages were mycelioid, numbering 10 to 24 per chasmothecium, 0.5 to 2.5 times as long as the chasmothecial diameter, 1 to 4 septate, and were brown at the base and becoming paler toward the tip. Asci were short stalked, 50 to 75 × 32 to 42 μm and contained two spores. Ascospores were ellipsoid-ovoid with a dimension of 22 to 30 × 15 to 18 μm. On the basis of these morphological characteristics, this fungus was identified as Golovinomyces ambrosiae (Schwein.) U. Braun & R.T.A. Cook (= G. cichoracearum var. latisporus (U. Braun) U. Braun) (1). To confirm the identification, the complete internal transcribed spacer (ITS) region of rDNA from KUS-F24683 was amplified with the primers ITS5 and P3 and sequenced (4). The resulting sequence of 508 bp was deposited in GenBank (Accession No. JF907589) and was identical to the ITS sequences of G. ambropsiae on A. artemisiifolia var. elatior from Japan (AB077631) and Korea (JF919680) as well as on A. trifida from the United States (AF011292). Therefore, the sequence analysis verified the pathogen to be G. ambrosiae. To our knowledge, this is the first record of powdery mildew infections on giant ragweed outside of North America (2). Although the disease incidence is still low, the disease could be a limiting factor to suppress the expansion of this noxious weed in Korea. References: (1) U. Braun and R. T. A. Cook. Mycol. Res. 113:616, 2009. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , May 5, 2011. (3) S. M. Oh et al. Impacts of Invasive Alien Weeds and Control Strategies of Noxious Weeds in Korea. National Institute of Agricultural Science and Technology, Suwon, Korea, 2007. (4) S. Takamatsu et al. Mycol. Res. 111:117, 2009.",2011-11-01 +30731759,First Report of Sclerotinia Blight Caused by Sclerotinia sclerotiorum on Fan Columbine (Aquilegia flabellata) in Italy.,"Fan columbine is a perennial garden species belonging to the family Ranunculaceae. During the spring of 2011, extensive wilting was observed on 5-month-old potted plants of Aquilegia flabellata grown in an experimental glasshouse belonging to the Center AGROINNOVA at Grugliasco (northern Italy). First symptoms included stem necrosis and darkening and withering of leaves. Plant wilt occurred a few days after the appearance of the first symptoms. Infected plants were characterized by the presence of soft, watery tissues. In the presence of high relative humidity, lesions became covered with a whitish mycelium and irregular, dark gray sclerotia (1.5 to 4.0 × 1.0 to 2.8, average 2.8 × 2.1 mm) were produced on the mycelium. Diseased tissue was surface sterilized for 1 min in 1% NaOCl and plated on potato dextrose agar (PDA) amended with 100 mg/l of streptomycin sulfate. Sclerotinia sclerotiorum (Lib.) de Bary (2) was consistently recovered from infected stem pieces. Sclerotia produced on PDA measured 2.0 to 7.0 × 2.0 to 5.0 (average 4.2 × 2.9) mm. The internal transcribed spacer (ITS) region of rDNA was amplified using the primers ITS1F/ITS4 and sequenced. BLAST analysis (1) of the 575-bp segment showed a 100% homology with the sequence of S. sclerotiorum (EF091809). The nucleotide sequence has been assigned the GenBank Accession No. JN013184. Pathogenicity of one isolate obtained from sclerotia of infected plants was confirmed by inoculating three 6-month-old plants transplanted in 16-cm-diameter pots in a glasshouse in a sphagnum peat/pomix/pine bark/clay (50:20:20:10) mix. Inoculum that consisted of 3 g/l of substrate of sterile wheat kernels infested with mycelium and sclerotia was placed in the soil and around the base of each plant. Three noninoculated plants served as controls. Plants were maintained in a growth chamber at 21 ± 1°C and relative humidity >90%. The inoculation trial was carried out twice. All inoculated plants developed leaf yellowing within 15 days of soil infestation. White, cottony mycelium and dark sclerotia developed on stems and at the base of all inoculated plants. Eventually, infected plants wilted. Control plants remained symptomless. S. sclerotiorum was reisolated from the stems of inoculated plants. To our knowledge, this is the first report of S. sclerotiorum on A. flabellata in Italy. The disease has been previously reported on A. vulgaris in the United States (3) and A. glandulosa in Russia (4). The economic importance of this disease in Italy is currently limited. References: (1) S. F. Altschul et al. Nucleic Acids Res. 25:3389, 1997. (2) N. F. Buchwald. Kgl. Veterisk Landb. Aarssk. 75, 1949. (3) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrived from http://nt.ars-grin.gov/fungaldatabases/ , July 7, 2011. (4) P. M. Zhiboedov et al. Mikol. Fitopatol, 36:48, 2002.",2011-11-01 +22069465,Segtor: rapid annotation of genomic coordinates and single nucleotide variations using segment trees.,"Various research projects often involve determining the relative position of genomic coordinates, intervals, single nucleotide variations (SNVs), insertions, deletions and translocations with respect to genes and their potential impact on protein translation. Due to the tremendous increase in throughput brought by the use of next-generation sequencing, investigators are routinely faced with the need to annotate very large datasets. We present Segtor, a tool to annotate large sets of genomic coordinates, intervals, SNVs, indels and translocations. Our tool uses segment trees built using the start and end coordinates of the genomic features the user wishes to use instead of storing them in a database management system. The software also produces annotation statistics to allow users to visualize how many coordinates were found within various portions of genes. Our system currently can be made to work with any species available on the UCSC Genome Browser. Segtor is a suitable tool for groups, especially those with limited access to programmers or with interest to analyze large amounts of individual genomes, who wish to determine the relative position of very large sets of mapped reads and subsequently annotate observed mutations between the reads and the reference. Segtor (http://lbbc.inca.gov.br/segtor/) is an open-source tool that can be freely downloaded for non-profit use. We also provide a web interface for testing purposes.",2011-11-01 +21339531,CUDA-BLASTP: accelerating BLASTP on CUDA-enabled graphics hardware.,"Scanning protein sequence database is an often repeated task in computational biology and bioinformatics. However, scanning large protein databases, such as GenBank, with popular tools such as BLASTP requires long runtimes on sequential architectures. Due to the continuing rapid growth of sequence databases, there is a high demand to accelerate this task. In this paper, we demonstrate how GPUs, powered by the Compute Unified Device Architecture (CUDA), can be used as an efficient computational platform to accelerate the BLASTP algorithm. In order to exploit the GPU’s capabilities for accelerating BLASTP, we have used a compressed deterministic finite state automaton for hit detection as well as a hybrid parallelization scheme. Our implementation achieves speedups up to 10.0 on an NVIDIA GeForce GTX 295 GPU compared to the sequential NCBI BLASTP 2.2.22. CUDA-BLASTP source code which is available at https://sites.google.com/site/liuweiguohome/software.",2011-11-01 +23731338,Do not hesitate to use Tversky-and other hints for successful active analogue searches with feature count descriptors.,"This study is an exhaustive analysis of the neighborhood behavior over a large coherent data set (ChEMBL target/ligand pairs of known Ki, for 165 targets with >50 associated ligands each). It focuses on similarity-based virtual screening (SVS) success defined by the ascertained optimality index. This is a weighted compromise between purity and retrieval rate of active hits in the neighborhood of an active query. One key issue addressed here is the impact of Tversky asymmetric weighing of query vs candidate features (represented as integer-value ISIDA colored fragment/pharmacophore triplet count descriptor vectors). The nearly a 3/4 million independent SVS runs showed that Tversky scores with a strong bias in favor of query-specific features are, by far, the most successful and the least failure-prone out of a set of nine other dissimilarity scores. These include classical Tanimoto, which failed to defend its privileged status in practical SVS applications. Tversky performance is not significantly conditioned by tuning of its bias parameter α. Both initial ""guesses"" of α = 0.9 and 0.7 were more successful than Tanimoto (at its turn, better than Euclid). Tversky was eventually tested in exhaustive similarity searching within the library of 1.6 M commercial + bioactive molecules at http://infochim.u-strasbg.fr/webserv/VSEngine.html , comparing favorably to Tanimoto in terms of ""scaffold hopping"" propensity. Therefore, it should be used at least as often as, perhaps in parallel to Tanimoto in SVS. Analysis with respect to query subclasses highlighted relationships of query complexity (simply expressed in terms of pharmacophore pattern counts) and/or target nature vs SVS success likelihood. SVS using more complex queries are more robust with respect to the choice of their operational premises (descriptors, metric). Yet, they are best handled by ""pro-query"" Tversky scores at α > 0.5. Among simpler queries, one may distinguish between ""growable"" (allowing for active analogs with additional features), and a few ""conservative"" queries not allowing any growth. These (typically bioactive amine transporter ligands) form the specific application domain of ""pro-candidate"" biased Tversky scores at α < 0.5.",2013-06-13 +22553364,DSP: a protein shape string and its profile prediction server.,"Many studies have demonstrated that shape string is an extremely important structure representation, since it is more complete than the classical secondary structure. The shape string provides detailed information also in the regions denoted random coil. But few services are provided for systematic analysis of protein shape string. To fill this gap, we have developed an accurate shape string predictor based on two innovative technologies: a knowledge-driven sequence alignment and a sequence shape string profile method. The performance on blind test data demonstrates that the proposed method can be used for accurate prediction of protein shape string. The DSP server provides both predicted shape string and sequence shape string profile for each query sequence. Using this information, the users can compare protein structure or display protein evolution in shape string space. The DSP server is available at both http://cheminfo.tongji.edu.cn/dsp/ and its main mirror http://chemcenter.tongji.edu.cn/dsp/.",2012-05-02 +22125388,HGT-Gen: a tool for generating a phylogenetic tree with horizontal gene transfer.,"

Unlabelled

Horizontal gene transfer (HGT) is a common event in prokaryotic evolution. Therefore, it is very important to consider HGT in the study of molecular evolution of prokaryotes. This is true also for conducting computer simulations of their molecular phylogeny because HGT is known to be a serious disturbing factor for estimating their correct phylogeny. To the best of our knowledge, no existing computer program has generated a phylogenetic tree with HGT from an original phylogenetic tree. We developed a program called HGT-Gen that generates a phylogenetic tree with HGT on the basis of an original phylogenetic tree of a protein or gene. HGT-Gen converts an operational taxonomic unit or a clade from one place to another in a given phylogenetic tree. We have also devised an algorithm to compute the average length between any pair of branches in the tree. It defines and computes the relative evolutionary time to normalize evolutionary time for each lineage. The algorithm can generate an HGT between a pair of donor and acceptor lineages at the same evolutionary time. HGT-Gen is used with a sequence-generating program to evaluate the influence of HGT on the molecular phylogeny of prokaryotes in a computer simulation study.

Availability

The database is available for free at http://www.grl.shizuoka.ac.jp/˜thoriike/HGT-Gen.html.",2011-10-31 +23267171,SRmapper: a fast and sensitive genome-hashing alignment tool.,"

Unlabelled

Modern sequencing instruments have the capability to produce millions of short reads every day. The large number of reads produced in conjunction with variations between reads and reference genomic sequences caused both by legitimate differences, such as single-nucleotide polymorphisms and insertions/deletions (indels), and by sequencer errors make alignment a difficult and computationally expensive task, and many reads cannot be aligned. Here, we introduce a new alignment tool, SRmapper, which in tests using real data can align 10s of billions of base pairs from short reads to the human genome per computer processor day. SRmapper tolerates a higher number of mismatches than current programs based on Burrows-Wheeler transform and finds about the same number of alignments in 2-8× less time depending on read length (with higher performance gain for longer read length). The current version of SRmapper aligns both single and pair-end reads in base space fastq format and outputs alignments in Sequence Alignment/Map format. SRmapper uses a probabilistic approach to set a default number of mismatches allowed and determines alignment quality. SRmapper's memory footprint (∼2.5 GB) is small enough that it can be run on a computer with 4 GB of random access memory for a genome the size of a human. Finally, SRmapper is designed so that its function can be extended to finding small indels as well as long deletions and chromosomal translocations in future versions.

Availability

http://www.umsl.edu/∼wongch/software.html.",2012-12-24 +23078046,Quantitative prediction of antitarget interaction profiles for chemical compounds.,"The evaluation of possible interactions between chemical compounds and antitarget proteins is an important task of the research and development process. Here, we describe the development and validation of QSAR models for the prediction of antitarget end-points, created on the basis of multilevel and quantitative neighborhoods of atom descriptors and self-consistent regression. Data on 4000 chemical compounds interacting with 18 antitarget proteins (13 receptors, 2 enzymes, and 3 transporters) were used to model 32 sets of end-points (IC(50), K(i), and K(act)). Each set was randomly divided into training and test sets in a ratio of 80% to 20%, respectively. The test sets were used for external validation of QSAR models created on the basis of the training sets. The coverage of prediction for all test sets exceeded 95%, and for half of the test sets, it was 100%. The accuracy of prediction for 29 of the end-points, based on the external test sets, was typically in the range of R(2)(test) = 0.6-0.9; three tests sets had lower R(2)(test) values, specifically 0.55-0.6. The proposed approach showed a reasonable accuracy of prediction for 91% of the antitarget end-points and high coverage for all external test sets. On the basis of the created models, we have developed a freely available online service for in silico prediction of 32 antitarget end-points: http://www.pharmaexpert.ru/GUSAR/antitargets.html.",2012-11-02 +22037400,Comprehensive transcriptome analysis of the periodontopathogenic bacterium Porphyromonas gingivalis W83.,"High-density tiling microarray and RNA sequencing technologies were used to analyze the transcriptome of the periodontopathogenic bacterium Porphyromonas gingivalis. The compiled P. gingivalis transcriptome profiles were based on total RNA samples isolated from three different laboratory culturing conditions, and the strand-specific transcription profiles generated covered the entire genome, including both protein coding and noncoding regions. The transcription profiles revealed various operon structures, 5'- and 3'-end untranslated regions (UTRs), differential expression patterns, and many novel, not-yet-annotated transcripts within intergenic and antisense regions. Further transcriptome analysis identified the majority of the genes as being expressed within operons and most 5' and 3' ends to be protruding UTRs, of which several 3' UTRs were extended to overlap genes carried on the opposite/antisense strand. Extensive antisense RNAs were detected opposite most insertion sequence (IS) elements. Pairwise comparative analyses were also performed among transcriptome profiles of the three culture conditions, and differentially expressed genes and metabolic pathways were identified. With the growing realization that noncoding RNAs play important biological functions, the discovery of novel RNAs and the comprehensive transcriptome profiles compiled in this study may provide a foundation to further understand the gene regulation and virulence mechanisms in P. gingivalis. The transcriptome profiles can be viewed at and downloaded from the Microbial Transcriptome Database website, http://bioinformatics.forsyth.org/mtd.",2011-10-28 +22465017,The UniProtKB/Swiss-Prot Tox-Prot program: A central hub of integrated venom protein data.,"Animal toxins are of interest to a wide range of scientists, due to their numerous applications in pharmacology, neurology, hematology, medicine, and drug research. This, and to a lesser extent the development of new performing tools in transcriptomics and proteomics, has led to an increase in toxin discovery. In this context, providing publicly available data on animal toxins has become essential. The UniProtKB/Swiss-Prot Tox-Prot program (http://www.uniprot.org/program/Toxins) plays a crucial role by providing such an access to venom protein sequences and functions from all venomous species. This program has up to now curated more than 5000 venom proteins to the high-quality standards of UniProtKB/Swiss-Prot (release 2012_02). Proteins targeted by these toxins are also available in the knowledgebase. This paper describes in details the type of information provided by UniProtKB/Swiss-Prot for toxins, as well as the structured format of the knowledgebase.",2012-03-23 +22856375,EnigmaVis: online interactive visualization of genome-wide association studies of the Enhancing NeuroImaging Genetics through Meta-Analysis (ENIGMA) consortium.,"In an attempt to increase power to detect genetic associations with brain phenotypes derived from human neuroimaging data, we recently conducted a large-scale, genome-wide association meta-analysis of hippocampal, brain, and intracranial volume through the Enhancing NeuroImaging Genetics through Meta-Analysis (ENIGMA) consortium. Here, we present a freely available online interactive tool, EnigmaVis, which makes it easy to visualize the association results generated by the consortium alongside allele frequency, genes, and functional annotations. EnigmaVis runs natively within the web browser, and generates plots that show the level of association between brain phenotypes at user-specified genomic positions. Uniquely, EnigmaVis is dynamic; users can interact with elements on the plot in real time. This software will be useful when exploring the effect on brain structure of particular genetic variants influencing neuropsychiatric illness and cognitive function. Future projects of the consortium and updates to EnigmaVis will also be displayed on the site. EnigmaVis is freely available online at http://enigma.loni.ucla.edu/enigma-vis/",2012-06-01 +21542935,A library of protein surface patches discriminates between native structures and decoys generated by structure prediction servers.,"

Background

Protein surfaces serve as an interface with the molecular environment and are thus tightly bound to protein function. On the surface, geometric and chemical complementarity to other molecules provides interaction specificity for ligand binding, docking of bio-macromolecules, and enzymatic catalysis.As of today, there is no accepted general scheme to represent protein surfaces. Furthermore, most of the research on protein surface focuses on regions of specific interest such as interaction, ligand binding, and docking sites. We present a first step toward a general purpose representation of protein surfaces: a novel surface patch library that represents most surface patches (~98%) in a data set regardless of their functional roles.

Results

Surface patches, in this work, are small fractions of the protein surface. Using a measure of inter-patch distance, we clustered patches extracted from a data set of high quality, non-redundant, proteins. The surface patch library is the collection of all the cluster centroids; thus, each of the data set patches is close to one of the elements in the library.We demonstrate the biological significance of our method through the ability of the library to capture surface characteristics of native protein structures as opposed to those of decoy sets generated by state-of-the-art protein structure prediction methods. The patches of the decoys are significantly less compatible with the library than their corresponding native structures, allowing us to reliably distinguish native models from models generated by servers. This trend, however, does not extend to the decoys themselves, as their similarity to the native structures does not correlate with compatibility with the library.

Conclusions

We expect that this high-quality, generic surface patch library will add a new perspective to the description of protein structures and improve our ability to predict them. In particular, we expect that it will help improve the prediction of surface features that are apparently neglected by current techniques.The surface patch libraries are publicly available at http://www.cs.bgu.ac.il/~keasar/patchLibrary.",2011-05-04 +21729866,A statistical framework for biomarker discovery in metabolomic time course data.,"

Motivation

Metabolomics is the study of the complement of small molecule metabolites in cells, biofluids and tissues. Many metabolomic experiments are designed to compare changes observed over time under two experimental conditions or groups (e.g. a control and drug-treated group) with the goal of identifying discriminatory metabolites or biomarkers that characterize each condition. A common study design consists of repeated measurements taken on each experimental unit thus producing time courses of all metabolites. We describe a statistical framework for estimating time-varying metabolic profiles and their within-group variability and for detecting between-group differences. Specifically, we propose (i) a smoothing splines mixed effects (SME) model that treats each longitudinal measurement as a smooth function of time and (ii) an associated functional test statistic. Statistical significance is assessed by a non-parametric bootstrap procedure.

Results

The methodology has been extensively evaluated using simulated data and has been applied to real nuclear magnetic resonance spectroscopy data collected in a preclinical toxicology study as part of a larger project lead by the COMET (Consortium for Metabonomic Toxicology). Our findings are compatible with the previously published studies.

Availability

An R script is freely available for download at http://www2.imperial.ac.uk/~gmontana/sme.htm.",2011-07-01 +22689754,Incorporating prior information into association studies.,"

Unlabelled

Recent technological developments in measuring genetic variation have ushered in an era of genome-wide association studies which have discovered many genes involved in human disease. Current methods to perform association studies collect genetic information and compare the frequency of variants in individuals with and without the disease. Standard approaches do not take into account any information on whether or not a given variant is likely to have an effect on the disease. We propose a novel method for computing an association statistic which takes into account prior information. Our method improves both power and resolution by 8% and 27%, respectively, over traditional methods for performing association studies when applied to simulations using the HapMap data. Advantages of our method are that it is as simple to apply to association studies as standard methods, the results of the method are interpretable as the method reports p-values, and the method is optimal in its use of prior information in regards to statistical power.

Availability

The method presented herein is available at http://masa.cs.ucla.edu.",2012-06-01 +23034802,MiRmap: comprehensive prediction of microRNA target repression strength.,"MicroRNAs, or miRNAs, post-transcriptionally repress the expression of protein-coding genes. The human genome encodes over 1000 miRNA genes that collectively target the majority of messenger RNAs (mRNAs). Base pairing of the so-called miRNA 'seed' region with mRNAs identifies many thousands of putative targets. Evaluating the strength of the resulting mRNA repression remains challenging, but is essential for a biologically informative ranking of potential miRNA targets. To address these challenges, predictors may use thermodynamic, evolutionary, probabilistic or sequence-based features. We developed an open-source software library, miRmap, which for the first time comprehensively covers all four approaches using 11 predictor features, 3 of which are novel. This allowed us to examine feature correlations and to compare their predictive power in an unbiased way using high-throughput experimental data from immunopurification, transcriptomics, proteomics and polysome fractionation experiments. Overall, target site accessibility appears to be the most predictive feature. Our novel feature based on PhyloP, which evaluates the significance of negative selection, is the best performing predictor in the evolutionary category. We combined all the features into an integrated model that almost doubles the predictive power of TargetScan. miRmap is freely available from http://cegg.unige.ch/mirmap.",2012-10-02 +22056846,"Tripping over emerging pathogens around the world: a phylogeographical approach for determining the epidemiology of Porcine circovirus-2 (PCV-2), considering global trading.","Porcine circovirus-2 (PCV-2) is an emerging virus associated with a number of different syndromes in pigs known as Porcine Circovirus Associated Diseases (PCVAD). Since its identification and characterization in the early 1990s, PCV-2 has achieved a worldwide distribution, becoming endemic in most pig-producing countries, and is currently considered as the main cause of losses on pig farms. In this study, we analyzed the main routes of the spread of PCV-2 between pig-producing countries using phylogenetic and phylogeographical approaches. A search for PCV-2 genome sequences in GenBank was performed, and the 420 PCV-2 sequences obtained were grouped into haplotypes (group of sequences that showed 100% identity), based on the infinite sites model of genome evolution. A phylogenetic hypothesis was inferred by Bayesian Inference for the classification of viral strains and a haplotype network was constructed by Median Joining to predict the geographical distribution of and genealogical relationships between haplotypes. In order to establish an epidemiological and economic context in these analyses, we considered all information about PCV-2 sequences available in GenBank, including papers published on viral isolation, and live pig trading statistics available on the UN Comtrade database (http://comtrade.un.org/). In these analyses, we identified a strong correlation between the means of PCV-2 dispersal predicted by the haplotype network and the statistics on the international trading of live pigs. This correlation provides a new perspective on the epidemiology of PCV-2, highlighting the importance of the movement of animals around the world in the emergence of new pathogens, and showing the need for effective sanitary barriers when trading live animals.",2011-10-26 +23113980,A novel algorithm for simultaneous SNP selection in high-dimensional genome-wide association studies.,"

Background

Identification of causal SNPs in most genome wide association studies relies on approaches that consider each SNP individually. However, there is a strong correlation structure among SNPs that needs to be taken into account. Hence, increasingly modern computationally expensive regression methods are employed for SNP selection that consider all markers simultaneously and thus incorporate dependencies among SNPs.

Results

We develop a novel multivariate algorithm for large scale SNP selection using CAR score regression, a promising new approach for prioritizing biomarkers. Specifically, we propose a computationally efficient procedure for shrinkage estimation of CAR scores from high-dimensional data. Subsequently, we conduct a comprehensive comparison study including five advanced regression approaches (boosting, lasso, NEG, MCP, and CAR score) and a univariate approach (marginal correlation) to determine the effectiveness in finding true causal SNPs.

Conclusions

Simultaneous SNP selection is a challenging task. We demonstrate that our CAR score-based algorithm consistently outperforms all competing approaches, both uni- and multivariate, in terms of correctly recovered causal SNPs and SNP ranking. An R package implementing the approach as well as R code to reproduce the complete study presented here is available from http://strimmerlab.org/software/care/.",2012-10-31 +23259794,"RNAmap2D - calculation, visualization and analysis of contact and distance maps for RNA and protein-RNA complex structures.","

Background

The structures of biological macromolecules provide a framework for studying their biological functions. Three-dimensional structures of proteins, nucleic acids, or their complexes, are difficult to visualize in detail on flat surfaces, and algorithms for their spatial superposition and comparison are computationally costly. Molecular structures, however, can be represented as 2D maps of interactions between the individual residues, which are easier to visualize and compare, and which can be reconverted to 3D structures with reasonable precision. There are many visualization tools for maps of protein structures, but few for nucleic acids.

Results

We developed RNAmap2D, a platform-independent software tool for calculation, visualization and analysis of contact and distance maps for nucleic acid molecules and their complexes with proteins or ligands. The program addresses the problem of paucity of bioinformatics tools dedicated to analyzing RNA 2D maps, given the growing number of experimentally solved RNA structures in the Protein Data Bank (PDB) repository, as well as the growing number of tools for RNA 2D and 3D structure prediction. RNAmap2D allows for calculation and analysis of contacts and distances between various classes of atoms in nucleic acid, protein, and small ligand molecules. It also discriminates between different types of base pairing and stacking.

Conclusions

RNAmap2D is an easy to use method to visualize, analyze and compare structures of nucleic acid molecules and their complexes with other molecules, such as proteins or ligands and metal ions. Its special features make it a very useful tool for analysis of tertiary structures of RNAs. RNAmap2D for Windows/Linux/MacOSX is freely available for academic users at http://iimcb.genesilico.pl/rnamap2d.html.",2012-12-21 +21791535,ReplacementMatrix: a web server for maximum-likelihood estimation of amino acid replacement rate matrices.,"

Summary

Amino acid replacement rate matrices are an essential basis of protein studies (e.g. in phylogenetics and alignment). A number of general purpose matrices have been proposed (e.g. JTT, WAG, LG) since the seminal work of Margaret Dayhoff and co-workers. However, it has been shown that matrices specific to certain protein groups (e.g. mitochondrial) or life domains (e.g. viruses) differ significantly from general average matrices, and thus perform better when applied to the data to which they are dedicated. This Web server implements the maximum-likelihood estimation procedure that was used to estimate LG, and provides a number of tools and facilities. Users upload a set of multiple protein alignments from their domain of interest and receive the resulting matrix by email, along with statistics and comparisons with other matrices. A non-parametric bootstrap is performed optionally to assess the variability of replacement rate estimates. Maximum-likelihood trees, inferred using the estimated rate matrix, are also computed optionally for each input alignment. Finely tuned procedures and up-to-date ML software (PhyML 3.0, XRATE) are combined to perform all these heavy calculations on our clusters.

Availability

http://www.atgc-montpellier.fr/ReplacementMatrix/

Contact

olivier.gascuel@lirmm.fr

Supplementary information

Supplementary data are available at http://www.atgc-montpellier.fr/ReplacementMatrix/",2011-07-26 +21546398,PLIO: an ontology for formal description of protein-ligand interactions.,"

Motivation

Biomedical ontologies have proved to be valuable tools for data analysis and data interoperability. Protein-ligand interactions are key players in drug discovery and development; however, existing public ontologies that describe the knowledge space of biomolecular interactions do not cover all aspects relevant to pharmaceutical modelling and simulation.

Results

The protein--ligand interaction ontology (PLIO) was developed around three main concepts, namely target, ligand and interaction, and was enriched by adding synonyms, useful annotations and references. The quality of the ontology was assessed based on structural, functional and usability features. Validation of the lexicalized ontology by means of natural language processing (NLP)-based methods showed a satisfactory performance (F-score = 81%). Through integration into our information retrieval environment we can demonstrate that PLIO supports lexical search in PubMed abstracts. The usefulness of PLIO is demonstrated by two use-case scenarios and it is shown that PLIO is able to capture both confirmatory and new knowledge from simulation and empirical studies.

Availability

The PLIO ontology is made freely available to the public at http://www.scai.fraunhofer.de/bioinformatics/downloads.html.",2011-05-05 +22716043,"Directory of useful decoys, enhanced (DUD-E): better ligands and decoys for better benchmarking.","A key metric to assess molecular docking remains ligand enrichment against challenging decoys. Whereas the directory of useful decoys (DUD) has been widely used, clear areas for optimization have emerged. Here we describe an improved benchmarking set that includes more diverse targets such as GPCRs and ion channels, totaling 102 proteins with 22886 clustered ligands drawn from ChEMBL, each with 50 property-matched decoys drawn from ZINC. To ensure chemotype diversity, we cluster each target's ligands by their Bemis-Murcko atomic frameworks. We add net charge to the matched physicochemical properties and include only the most dissimilar decoys, by topology, from the ligands. An online automated tool (http://decoys.docking.org) generates these improved matched decoys for user-supplied ligands. We test this data set by docking all 102 targets, using the results to improve the balance between ligand desolvation and electrostatics in DOCK 3.6. The complete DUD-E benchmarking set is freely available at http://dude.docking.org.",2012-07-05 +22040741,A proteome reference map and virulence factors analysis of Yersinia pestis 91001.,"In this report, we carried out the in-depth proteomic analysis of Yersinia pestis strain 91001 under in vitro flea-simulated condition using three technique routes, SDS-PAGE combined with LTQ-FT, two-dimensional liquid chromatography peptide (2D-LC peptide) separation combined with LTQ-FT and intact protein separation followed by 2D-LC peptide separation combined with LTQ-FT. Totally, 1926 proteins (13082 peptides) were identified, covering 46.50% (1926/4142) of the predicted proteome. Transcriptome analysis based on a whole genome DNA microarray of Y. pestis defined 1655 genes with the coincidence of 56.65% to the proteomic results. Through analyzing the identifications of virulence factors involving in the life cycle of Y. pestis, it was found that Hms system and murine toxin, which are virulence factors involved in Y. pestis maintenance in flea, were highly expressed in our analysis. Moreover, some virulence factors also appeared with different extents, such as plasminogen activator, PhoP/PhoQ two-component system, type III secretion system, iron acquisition systems (Ybt, Yfe and Yfu) and ferric uptake regulator. These results indicated that Y. pestis may prepare itself with various strategies in advance for its survival when it evades the hosts. The protein identifications can be accessed through PRIDE database (http://www.ebi.ac.uk/pride) with accession number 18578-18605",2011-10-25 +24035024,Evaluation of rumen-protected lysine supplementation to lactating dairy cows consuming increasing amounts of distillers dried grains with solubles.,"Twenty multiparous Holstein cows were used in four 5 × 5 Latin squares to determine the effects of feeding increasing amounts of distillers dried grains with solubles (DDGS) in diets with or without the supplementation (60 g/d) of a rumen-protected Lys (RPL) product (AminoShure-L, 38% l-Lys; Balchem Encapsulates, New Hampton, NY) on milk yield and composition and plasma concentration of AA. Dietary treatments were (1) control (CON; no DDGS), (2) 10% DDGS (10DG), (3) 20% DDGS (20DG), (4) 10% DDGS plus RPL (10DGRPL), and (5) 20% DDGS plus RPL (20DGRPL). Diets were formulated using the Cornell-Penn-Miner Dairy model (CPM v3.0; http://cahpwww.vet.upenn.edu/node/77) to provide a predicted decreasing supply of Lys (117, 99, and 91% of requirements) for the CON, 10DG, and 20DG diets, respectively. Addition of RPL to the 10DG and 20DG diets (unsupplemented diets) resulted in 2 additional treatments, 10DGRPL and 20DGRPL diets, respectively. The 10DGRPL and 20DGRPL diets met 110 and 100% of the Lys requirements, respectively. Periods lasted 21d, with the last 3d for data collection. Compared with cows fed the CON diet, cows fed diets with DDGS had a similar dry matter intake (DMI; 25.4 ± 0.88 kg/d), milk yield (30.7 ± 1.67 kg/d), and composition, except for protein percentage, which was higher (3.15 vs. 3.21 ± 0.05%) and resulted in higher (0.94 vs. 1.00 ± 0.05 kg/d) protein yield by cows fed diets containing 20% DDGS. Unexpectedly, despite diets being formulated based on predicted DMI of 23.3 kg/d and milk yield of 38.5 kg/d, cows had a greater DMI and lower milk yield across all treatments, which resulted in diets that were predicted by CPM Dairy to supply sufficient amounts of Lys (140, 118, and 104% of requirement for the CON, 10 DG, and 20 DG diet, respectively) and consequently, supplementation with RPL did not have an effect on milk production or composition. Plasma concentration of Lys decreased (11.8%) as DDGS inclusion increased. For other essential AA, plasma concentrations of cows fed diets with DDGS were lower for Arg, His, and Val and greater for Leu and Met compared with cows fed the CON diet. Supplementation with RPL failed to decrease the plasma concentration of other essential AA, which provides support that Lys was not limiting.",2013-09-12 +24091406,Reformulated Kemeny optimal aggregation with application in consensus ranking of microRNA targets.,"MicroRNAs are very recently discovered small noncoding RNAs, responsible for negative regulation of gene expression. Members of this endogenous family of small RNA molecules have been found implicated in many genetic disorders. Each microRNA targets tens to hundreds of genes. Experimental validation of target genes is a time- and cost-intensive procedure. Therefore, prediction of microRNA targets is a very important problem in computational biology. Though, dozens of target prediction algorithms have been reported in the past decade, they disagree significantly in terms of target gene ranking (based on predicted scores). Rank aggregation is often used to combine multiple target orderings suggested by different algorithms. This technique has been used in diverse fields including social choice theory, meta search in web, and most recently, in bioinformatics. Kemeny optimal aggregation (KOA) is considered the more profound objective for rank aggregation. The consensus ordering obtained through Kemeny optimal aggregation incurs minimum pairwise disagreement with the input orderings. Because of its computational intractability, heuristics are often formulated to obtain a near optimal consensus ranking. Unlike its real time use in meta search, there are a number of scenarios in bioinformatics (e.g., combining microRNA target rankings, combining disease-related gene rankings obtained from microarray experiments) where evolutionary approaches can be afforded with the ambition of better optimization. We conjecture that an ideal consensus ordering should have its total disagreement shared, as equally as possible, with the input orderings. This is also important to refrain the evolutionary processes from getting stuck to local extremes. In the current work, we reformulate Kemeny optimal aggregation while introducing a trade-off between the total pairwise disagreement and its distribution. A simulated annealing-based implementation of the proposed objective has been found effective in context of microRNA target ranking. Supplementary data and source code link are available at: >http://www.isical.ac.in/bioinfo_miu/ieee_tcbb_kemeny.rar.",2013-05-01 +22724294,StruLocPred: structure-based protein subcellular localisation prediction using multi-class support vector machine.,"Knowledge of protein subcellular locations can help decipher a protein's biological function. This work proposes new features: sequence-based: Hybrid Amino Acid Pair (HAAP) and two structure-based: Secondary Structural Element Composition (SSEC) and solvent accessibility state frequency. A multi-class Support Vector Machine is developed to predict the locations. Testing on two established data sets yields better prediction accuracies than the best available systems. Comparisons with existing methods show comparable results to ESLPred2. When StruLocPred is applied to the entire Arabidopsis proteome, over 77% of proteins with known locations match the prediction results. An implementation of this system is at http://wgzhou.ece. iastate.edu/StruLocPred/.",2012-01-01 +22321700,Mytoe: automatic analysis of mitochondrial dynamics.,"

Summary

We present Mytoe, a tool for analyzing mitochondrial morphology and dynamics from fluorescence microscope images. The tool provides automated quantitative analysis of mitochondrial motion by optical flow estimation and of morphology by segmentation of individual branches of the network-like structure of the organelles. Mytoe quantifies several features of individual branches, such as length, tortuosity and speed, and of the macroscopic structure, such as mitochondrial area and degree of clustering. We validate the methods and apply them to the analysis of sequences of images of U2OS human cells with fluorescently labeled mitochondria.

Availability

Source code, Windows software and Manual available at http://www.cs.tut.fi/%7Esanchesr/mito

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

eero.lihavainen@tut.fi; andre.ribeiro@tut.fi.",2012-02-09 +22962458,Genomic context analysis reveals dense interaction network between vertebrate ultraconserved non-coding elements.,"

Motivation

Genomic context analysis, also known as phylogenetic profiling, is widely used to infer functional interactions between proteins but rarely applied to non-coding cis-regulatory DNA elements. We were wondering whether this approach could provide insights about utlraconserved non-coding elements (UCNEs). These elements are organized as large clusters, so-called gene regulatory blocks (GRBs) around key developmental genes. Their molecular functions and the reasons for their high degree of conservation remain enigmatic.

Results

In a special setting of genomic context analysis, we analyzed the fate of GRBs after a whole-genome duplication event in five fish genomes. We found that in most cases all UCNEs were retained together as a single block, whereas the corresponding target genes were often retained in two copies, one completely devoid of UCNEs. This 'winner-takes-all' pattern suggests that UCNEs of a GRB function in a highly cooperative manner. We propose that the multitude of interactions between UCNEs is the reason for their extreme sequence conservation.

Supplementary information

Supplementary data are available at Bioinformatics online and at http://ccg.vital-it.ch/ucne/",2012-09-01 +21357752,RNAcode: robust discrimination of coding and noncoding regions in comparative sequence data.,"With the availability of genome-wide transcription data and massive comparative sequencing, the discrimination of coding from noncoding RNAs and the assessment of coding potential in evolutionarily conserved regions arose as a core analysis task. Here we present RNAcode, a program to detect coding regions in multiple sequence alignments that is optimized for emerging applications not covered by current protein gene-finding software. Our algorithm combines information from nucleotide substitution and gap patterns in a unified framework and also deals with real-life issues such as alignment and sequencing errors. It uses an explicit statistical model with no machine learning component and can therefore be applied ""out of the box,"" without any training, to data from all domains of life. We describe the RNAcode method and apply it in combination with mass spectrometry experiments to predict and confirm seven novel short peptides in Escherichia coli and to analyze the coding potential of RNAs previously annotated as ""noncoding."" RNAcode is open source software and available for all major platforms at http://wash.github.com/rnacode.",2011-02-28 +22847936,EuGene: maximizing synthetic gene design for heterologous expression.,"

Unlabelled

Numerous software applications exist to deal with synthetic gene design, granting the field of heterologous expression a significant support. However, their dispersion requires the access to different tools and online services in order to complete one single project. Analyzing codon usage, calculating codon adaptation index (CAI), aligning orthologs and optimizing genes are just a few examples. A software application, EuGene, was developed for the optimization of multiple gene synthetic design algorithms. In a seamless automatic form, EuGene calculates or retrieves genome data on codon usage (relative synonymous codon usage and CAI), codon context (CPS and codon pair bias), GC content, hidden stop codons, repetitions, deleterious sites, protein primary, secondary and tertiary structures, gene orthologs, species housekeeping genes, performs alignments and identifies genes and genomes. The main function of EuGene is analyzing and redesigning gene sequences using multi-objective optimization techniques that maximize the coding features of the resulting sequence.

Availability

EuGene is freely available for non-commercial use, at http://bioinformatics.ua.pt/eugene.",2012-07-30 +21901790,Novel LOVD databases for hereditary breast cancer and colorectal cancer genes in the Chinese population.,"The Human Variome Project (HVP) is an international consortium of clinicians, geneticists, and researchers from over 30 countries, aiming to facilitate the establishment and maintenance of standards, systems, and infrastructure for the worldwide collection and sharing of all genetic variations effecting human disease. The HVP-China Node will build new and supplement existing databases of genetic diseases. As the first effort, we have created a novel variant database of BRCA1 and BRCA2, mismatch repair genes (MMR), and APC genes for breast cancer, Lynch syndrome, and familial adenomatous polyposis (FAP), respectively, in the Chinese population using the Leiden Open Variation Database (LOVD) format. We searched PubMed and some Chinese search engines to collect all the variants of these genes in the Chinese population that have already been detected and reported. There are some differences in the gene variants between the Chinese population and that of other ethnicities. The database is available online at http://www.genomed.org/LOVD/. Our database will appear to users who survey other LOVD databases (e.g., by Google search, or by NCBI GeneTests search). Remote submissions are accepted, and the information is updated monthly.",2011-10-20 +22013895,The oomycete broad-host-range pathogen Phytophthora capsici.,"

Unlabelled

Phytophthora capsici is a highly dynamic and destructive pathogen of vegetables. It attacks all cucurbits, pepper, tomato and eggplant, and, more recently, snap and lima beans. The disease incidence and severity have increased significantly in recent decades and the molecular resources to study this pathogen are growing and now include a reference genome. At the population level, the epidemiology varies according to the geographical location, with populations in South America dominated by clonal reproduction, and populations in the USA and South Africa composed of many unique genotypes in which sexual reproduction is common. Just as the impact of crop loss as a result of P. capsici has increased in recent decades, there has been a similar increase in the development of new tools and resources to study this devastating pathogen. Phytophthora capsici presents an attractive model for understanding broad-host-range oomycetes, the impact of sexual recombination in field populations and the basic mechanisms of Phytophthora virulence.

Taxonomy

Kingdom Chromista; Phylum Oomycota; Class Oomycetes; Order Peronosporales; Family Peronosporaceae; Genus Phytophthora; Species capsici.

Disease symptoms

Symptoms vary considerably according to the host, plant part infected and environmental conditions. For example, in dry areas (e.g. southwestern USA and southern France), infection on tomato and bell or chilli pepper is generally on the roots and crown, and the infected plants have a distinctive black/brown lesion visible at the soil line (Fig. 1). In areas in which rainfall is more common (e.g. eastern USA), all parts of the plant are infected, including the roots, crown, foliage and fruit (Fig. 1). Root infections cause damping off in seedlings, whereas, in older plants, it is common to see stunted growth, wilting and, eventually, death. For tomatoes, it is common to see significant adventitious root growth just above an infected tap root, and the stunted plants, although severely compromised, may not die. For many cucurbit fruit, the expanding lesions produce fresh sporangia over days (or even weeks depending on the size of the fruit) and the fruit often look as if they have been dipped in white powdered confectioner's sugar (Fig. 1). Generally, hyphae do not emerge from infected plants or fruit (common with Pythium infections) and all that is visible on the surface of an infected plant is sporangia.

Importance

Phytophthora capsici presents an oomycete worst-case scenario to growers as it has a broad host range, often produces long-lived dormant sexual spores, has extensive genotypic diversity and has an explosive asexual disease cycle. It is becoming increasingly apparent that novel control strategies are needed to safeguard food production from P. capsici and other oomycetes. Considering that P. capsici is easy to grow, mate and manipulate in the laboratory and infects many plant species, this pathogen is a robust model for investigations, particularly those related to sexual reproduction, host range and virulence.

Useful websites

Phytophthora capsici genome database: http://genome.jgi-psf.org/Phyca11/Phyca11.home.html. Molecular tools to identify Phytophthora isolates: http://phytophthora-id.org.",2011-10-20 +22084255,PubChem promiscuity: a web resource for gathering compound promiscuity data from PubChem.,"

Summary

Promiscuity counts allow for a better understanding of a compound's assay activity profile and drug potential. Although PubChem contains a vast amount of compound and assay data, it currently does not have a convenient or efficient method to obtain in-depth promiscuity counts for compounds. PubChem promiscuity fills this gap. It is a Java servlet that uses NCBI Entrez (eUtils) web services to interact with PubChem and provide promiscuity counts in a variety of categories along with compound descriptors, including PAINS-based functional group detection.

Availability

http://chemutils.florida.scripps.edu/pcpromiscuity

Contact

southern@scripps.edu",2011-11-13 +23023980,Advanced complex trait analysis.,"

Motivation

The Genome-wide Complex Trait Analysis (GCTA) software package can quantify the contribution of genetic variation to phenotypic variation for complex traits. However, as those datasets of interest continue to increase in size, GCTA becomes increasingly computationally prohibitive. We present an adapted version, Advanced Complex Trait Analysis (ACTA), demonstrating dramatically improved performance.

Results

We restructure the genetic relationship matrix (GRM) estimation phase of the code and introduce the highly optimized parallel Basic Linear Algebra Subprograms (BLAS) library combined with manual parallelization and optimization. We introduce the Linear Algebra PACKage (LAPACK) library into the restricted maximum likelihood (REML) analysis stage. For a test case with 8999 individuals and 279,435 single nucleotide polymorphisms (SNPs), we reduce the total runtime, using a compute node with two multi-core Intel Nehalem CPUs, from ∼17 h to ∼11 min.

Availability and implementation

The source code is fully available under the GNU Public License, along with Linux binaries. For more information see http://www.epcc.ed.ac.uk/software-products/acta.

Contact

a.gray@ed.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-27 +22016395,New families of carboxyl peptidases: serine-carboxyl peptidases and glutamic peptidases.,"Peptidases or proteinases are now classified into seven families based on the nature of the catalytic residues [MEROPS-the peptidase database (http://merops.sanger.ac.uk/)]. They are aspartic- (first described in 1993), cysteine- (1993), serine- (1993) metallo- (1993), threonine- (1997), glutamic- (2004) and asparagine-peptidase (2010). By using an S-PI (pepstatin Ac) as a probe, a new subfamily of serine peptidase, serine-carboxyl peptidase (sedolisin) was discovered in 2001. In addition, the sixth family of peptidase, glutamic peptidase (eqolisin) was also discovered in 2004. The former peptidase is widely distributed in nature from archea to mammals, including humans. One of these enzymes is related to a human fatal hereditable disease, Batten disease. In contrast, the distribution of the latter peptidases is limited, with most of them found in human or plant pathogenic fungi. One such enzyme was isolated from a fungal infection in an HIV-infected patient. In this review, the background of the findings, and crystal structures, catalytic mechanisms, substrates specificities and distribution of the new peptidase families are described.",2011-10-19 +22419785,SAMSCOPE: an OpenGL-based real-time interactive scale-free SAM viewer.,"

Summary

Existing SAM visualization tools like 'samtools tview' (Li et al., 2009) are limited to a small region of the genome, and tools like Tablet (Milne et al., 2010) are limited to a relatively small number of reads and may fail outright on large datasets. We need to visualize complex ChIP-Seq and RNA-Seq features such as polarity as well as coverage across whole 3 Gbp genomes such as Human. We have addressed these problems in a lightweight visualization system called SAMSCOPE accelerated by OpenGL. The extensive pre-processing and fast OpenGL interface of SAMSCOPE provides instantaneous and intuitive browsing of complex data at all levels of detail across multiple experiments.

Availability and implementation

The SAMSCOPE software, implemented in C++ for Linux, with source code, binary packages and documentation are freely available from http://samscope.dna.bio.keio.ac.jp.",2012-03-13 +22383964,TEPITOPEpan: extending TEPITOPE for peptide binding prediction covering over 700 HLA-DR molecules.,"

Motivation

Accurate identification of peptides binding to specific Major Histocompatibility Complex Class II (MHC-II) molecules is of great importance for elucidating the underlying mechanism of immune recognition, as well as for developing effective epitope-based vaccines and promising immunotherapies for many severe diseases. Due to extreme polymorphism of MHC-II alleles and the high cost of biochemical experiments, the development of computational methods for accurate prediction of binding peptides of MHC-II molecules, particularly for the ones with few or no experimental data, has become a topic of increasing interest. TEPITOPE is a well-used computational approach because of its good interpretability and relatively high performance. However, TEPITOPE can be applied to only 51 out of over 700 known HLA DR molecules.

Method

We have developed a new method, called TEPITOPEpan, by extrapolating from the binding specificities of HLA DR molecules characterized by TEPITOPE to those uncharacterized. First, each HLA-DR binding pocket is represented by amino acid residues that have close contact with the corresponding peptide binding core residues. Then the pocket similarity between two HLA-DR molecules is calculated as the sequence similarity of the residues. Finally, for an uncharacterized HLA-DR molecule, the binding specificity of each pocket is computed as a weighted average in pocket binding specificities over HLA-DR molecules characterized by TEPITOPE.

Result

The performance of TEPITOPEpan has been extensively evaluated using various data sets from different viewpoints: predicting MHC binding peptides, identifying HLA ligands and T-cell epitopes and recognizing binding cores. Among the four state-of-the-art competing pan-specific methods, for predicting binding specificities of unknown HLA-DR molecules, TEPITOPEpan was roughly the second best method next to NETMHCIIpan-2.0. Additionally, TEPITOPEpan achieved the best performance in recognizing binding cores. We further analyzed the motifs detected by TEPITOPEpan, examining the corresponding literature of immunology. Its online server and PSSMs therein are available at http://www.biokdd.fudan.edu.cn/Service/TEPITOPEpan/.",2012-02-23 +22155863,BPDA2d--a 2D global optimization-based Bayesian peptide detection algorithm for liquid chromatograph-mass spectrometry.,"

Motivation

Peptide detection is a crucial step in mass spectrometry (MS) based proteomics. Most existing algorithms are based upon greedy isotope template matching and thus may be prone to error propagation and ineffective to detect overlapping peptides. In addition, existing algorithms usually work at different charge states separately, isolating useful information that can be drawn from other charge states, which may lead to poor detection of low abundance peptides.

Results

BPDA2d models spectra as a mixture of candidate peptide signals and systematically evaluates all possible combinations of possible peptide candidates to interpret the given spectra. For each candidate, BPDA2d takes into account its elution profile, charge state distribution and isotope pattern, and it combines all evidence to infer the candidate's signal and existence probability. By piecing all evidence together--especially by deriving information across charge states--low abundance peptides can be better identified and peptide detection rates can be improved. Instead of local template matching, BPDA2d performs global optimization for all candidates and systematically optimizes their signals. Since BPDA2d looks for the optimal among all possible interpretations of the given spectra, it has the capability in handling complex spectra where features overlap. BPDA2d estimates the posterior existence probability of detected peptides, which can be directly used for probability-based evaluation in subsequent processing steps. Our experiments indicate that BPDA2d outperforms state-of-the-art detection methods on both simulated data and real liquid chromatography-mass spectrometry data, according to sensitivity and detection accuracy.

Availability

The BPDA2d software package is available at http://gsp.tamu.edu/Publications/supplementary/sun11a/.",2011-12-06 +22267916,Alpha-Tocopherol Alters Transcription Activities that Modulates Tumor Necrosis Factor Alpha (TNF-α) Induced Inflammatory Response in Bovine Cells.,"To further investigate the potential role of α-tocopherol in maintaining immuno-homeostasis in bovine cells (Madin-Darby bovine kidney epithelial cell line), we undertook in vitro experiments using recombinant TNF-α as an immuno-stimulant to simulate inflammation response in cells with or without α-tocopherol pre-treatment. Using microarray global-profiling and IPA (Ingenuity Pathways Analysis, Ingenuity(®) Systems, http://www.ingenuity.com) data analysis on TNF-α-induced gene perturbation in those cells, we focused on determining whether α-tocopherol treatment of normal bovine cells in a standard cell culture condition can modify cell's immune response induced by TNF-α challenge. When three datasets were filtered and compared using IPA, there were a total of 1750 genes in all three datasets for comparison, 97 genes were common in all three sets; 615 genes were common in at least two datasets; there were 261 genes unique in TNF-α challenge, 399 genes were unique in α-tocopherol treatment, and 378 genes were unique in the α-tocopherol plus TNF-α treatment. TNF-α challenge induced significant change in gene expression. Many of those genes induced by TNF-α are related to the cells immune and inflammatory responses. The results of IPA data analysis showed that α-tocopherol-pretreatment of cells modulated cell's response to TNF-α challenge. In most of the canonical pathways, α-tocopherol pretreatment showed the antagonistic effect against the TNF-α-induced pro-inflammatory responses. We concluded that α-tocopherol pre-treatment has a significant antagonistic effect that modulates the cell's response to the TNF-α challenge by altering the gene expression activities of some important signaling molecules.",2011-12-05 +23813000,Poly(A) motif prediction using spectral latent features from human DNA sequences.,"

Motivation

Polyadenylation is the addition of a poly(A) tail to an RNA molecule. Identifying DNA sequence motifs that signal the addition of poly(A) tails is essential to improved genome annotation and better understanding of the regulatory mechanisms and stability of mRNA. Existing poly(A) motif predictors demonstrate that information extracted from the surrounding nucleotide sequences of candidate poly(A) motifs can differentiate true motifs from the false ones to a great extent. A variety of sophisticated features has been explored, including sequential, structural, statistical, thermodynamic and evolutionary properties. However, most of these methods involve extensive manual feature engineering, which can be time-consuming and can require in-depth domain knowledge.

Results

We propose a novel machine-learning method for poly(A) motif prediction by marrying generative learning (hidden Markov models) and discriminative learning (support vector machines). Generative learning provides a rich palette on which the uncertainty and diversity of sequence information can be handled, while discriminative learning allows the performance of the classification task to be directly optimized. Here, we used hidden Markov models for fitting the DNA sequence dynamics, and developed an efficient spectral algorithm for extracting latent variable information from these models. These spectral latent features were then fed into support vector machines to fine-tune the classification performance. We evaluated our proposed method on a comprehensive human poly(A) dataset that consists of 14 740 samples from 12 of the most abundant variants of human poly(A) motifs. Compared with one of the previous state-of-the-art methods in the literature (the random forest model with expert-crafted features), our method reduces the average error rate, false-negative rate and false-positive rate by 26, 15 and 35%, respectively. Meanwhile, our method makes ~30% fewer error predictions relative to the other string kernels. Furthermore, our method can be used to visualize the importance of oligomers and positions in predicting poly(A) motifs, from which we can observe a number of characteristics in the surrounding regions of true and false motifs that have not been reported before.

Availability

http://sfb.kaust.edu.sa/Pages/Software.aspx.

Supplementary information

Supplementary data are available at Bioinformatics online.",2013-07-01 +22826541,RILogo: visualizing RNA-RNA interactions.,"

Summary

With the increasing amount of newly discovered non-coding RNAs, the interactions between RNA molecules become an increasingly important aspect for characterizing their functionality. Many computational tools have been developed to predict the formation of duplexes between two RNAs, either based on single sequences or alignments of homologous sequences. Here, we present RILogo, a program to visualize inter- and intramolecular base pairing between two RNA molecules. The input for RILogo is a pair of structure-annotated sequences or alignments. In the latter case, RILogo displays the alignments in the form of sequence logos, including the mutual information of base paired columns. We also introduce two novel mutual information based measures that weigh the covariance information by the evolutionary distances of the aligned sequences. We show that the new measures have an increased accuracy compared with previous mutual information measures.

Availability and implementation

RILogo is freely available as a stand-alone program and is accessible via a web server at http://rth.dk/resources/rilogo.

Contact

pmenzel@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-23 +22406831,Novel insight into the non-coding repertoire through deep sequencing analysis.,"Non-coding RNAs (ncRNA) account for a large portion of the transcribed genomic output. This diverse family of untranslated RNA molecules play a crucial role in cellular function. The use of 'deep sequencing' technology (also known as 'next generation sequencing') to infer transcript expression levels in general, and ncRNA specifically, is becoming increasingly common in molecular and clinical laboratories. We developed a software termed 'RandA' (which stands for ncRNA Read-and-Analyze) that performs comprehensive ncRNA profiling and differential expression analysis on deep sequencing generated data through a graphical user interface running on a local personal computer. Using RandA, we reveal the complexity of the ncRNA repertoire in a given cell population. We further demonstrate the relevance of such an extensive ncRNA analysis by elucidating a multitude of characterizing features in pathogen infected mammalian cells. RandA is available for download at http://ibis.tau.ac.il/RandA.",2012-03-09 +21554767,MetaPath: identifying differentially abundant metabolic pathways in metagenomic datasets.,"

Background

Enabled by rapid advances in sequencing technology, metagenomic studies aim to characterize entire communities of microbes bypassing the need for culturing individual bacterial members. One major goal of metagenomic studies is to identify specific functional adaptations of microbial communities to their habitats. The functional profile and the abundances for a sample can be estimated by mapping metagenomic sequences to the global metabolic network consisting of thousands of molecular reactions. Here we describe a powerful analytical method (MetaPath) that can identify differentially abundant pathways in metagenomic datasets, relying on a combination of metagenomic sequence data and prior metabolic pathway knowledge.

Methods

First, we introduce a scoring function for an arbitrary subnetwork and find the max-weight subnetwork in the global network by a greedy search algorithm. Then we compute two p values (pabund and pstruct) using nonparametric approaches to answer two different statistical questions: (1) is this subnetwork differentically abundant? (2) What is the probability of finding such good subnetworks by chance given the data and network structure? Finally, significant metabolic subnetworks are discovered based on these two p values.

Results

In order to validate our methods, we have designed a simulated metabolic pathways dataset and show that MetaPath outperforms other commonly used approaches. We also demonstrate the power of our methods in analyzing two publicly available metagenomic datasets, and show that the subnetworks identified by MetaPath provide valuable insights into the biological activities of the microbiome.

Conclusions

We have introduced a statistical method for finding significant metabolic subnetworks from metagenomic datasets. Compared with previous methods, results from MetaPath are more robust against noise in the data, and have significantly higher sensitivity and specificity (when tested on simulated datasets). When applied to two publicly available metagenomic datasets, the output of MetaPath is consistent with previous observations and also provides several new insights into the metabolic activity of the gut microbiome. The software is freely available at http://metapath.cbcb.umd.edu.",2011-05-28 +22484411,A generalized form of context-dependent psychophysiological interactions (gPPI): a comparison to standard approaches.,"Functional MRI (fMRI) allows one to study task-related regional responses and task-dependent connectivity analysis using psychophysiological interaction (PPI) methods. The latter affords the additional opportunity to understand how brain regions interact in a task-dependent manner. The current implementation of PPI in Statistical Parametric Mapping (SPM8) is configured primarily to assess connectivity differences between two task conditions, when in practice fMRI tasks frequently employ more than two conditions. Here we evaluate how a generalized form of context-dependent PPI (gPPI; http://www.nitrc.org/projects/gppi), which is configured to automatically accommodate more than two task conditions in the same PPI model by spanning the entire experimental space, compares to the standard implementation in SPM8. These comparisons are made using both simulations and an empirical dataset. In the simulated dataset, we compare the interaction beta estimates to their expected values and model fit using the Akaike information criterion (AIC). We found that interaction beta estimates in gPPI were robust to different simulated data models, were not different from the expected beta value, and had better model fits than when using standard PPI (sPPI) methods. In the empirical dataset, we compare the model fit of the gPPI approach to sPPI. We found that the gPPI approach improved model fit compared to sPPI. There were several regions that became non-significant with gPPI. These regions all showed significantly better model fits with gPPI. Also, there were several regions where task-dependent connectivity was only detected using gPPI methods, also with improved model fit. Regions that were detected with all methods had more similar model fits. These results suggest that gPPI may have greater sensitivity and specificity than standard implementation in SPM. This notion is tempered slightly as there is no gold standard; however, data simulations with a known outcome support our conclusions about gPPI. In sum, the generalized form of context-dependent PPI approach has increased flexibility of statistical modeling, and potentially improves model fit, specificity to true negative findings, and sensitivity to true positive findings.",2012-03-30 +22122757,Tuberculosis in UK cities: workload and effectiveness of tuberculosis control programmes.,"

Background

Tuberculosis (TB) has increased within the UK and, in response, targets for TB control have been set and interventions recommended. The question was whether these had been implemented and, if so, had they been effective in reducing TB cases.

Methods

Epidemiological data were obtained from enhanced surveillance and clinics. Primary care trusts or TB clinics with an average of > 100 TB cases per year were identified and provided reflections on the reasons for any change in their local incidence, which was compared to an audit against the national TB plan.

Results

Access to data for planning varied (0-22 months). Sputum smear status was usually well recorded within the clinics. All cities had TB networks, a key worker for each case, free treatment and arrangements to treat HIV co-infection. Achievement of targets in the national plan correlated well with change in workload figures for the commissioning organizations (Spearman's rank correlation R = 0.8, P < 0.01) but not with clinic numbers. Four cities had not achieved the target of one nurse per 40 notifications (Birmingham, Bradford, Manchester and Sheffield). Compared to other cities, their loss to follow-up during treatment was usually > 6% (χ2 = 4.2, P < 0.05), there was less TB detected by screening and less outreach. Manchester was most poorly resourced and showed the highest rate of increase of TB. Direct referral from radiology, sputum from primary care and outreach workers were cited as important in TB control.

Conclusion

TB control programmes depend on adequate numbers of specialist TB nurses for early detection and case-holding.Please see related article: http://www.biomedcentral.com/1741-7015/9/127.",2011-11-28 +23077130,Knowledge-based biomedical word sense disambiguation: an evaluation and application to clinical document classification.,"

Background

Word sense disambiguation (WSD) methods automatically assign an unambiguous concept to an ambiguous term based on context, and are important to many text-processing tasks. In this study we developed and evaluated a knowledge-based WSD method that uses semantic similarity measures derived from the Unified Medical Language System (UMLS) and evaluated the contribution of WSD to clinical text classification.

Methods

We evaluated our system on biomedical WSD datasets and determined the contribution of our WSD system to clinical document classification on the 2007 Computational Medicine Challenge corpus.

Results

Our system compared favorably with other knowledge-based methods. Machine learning classifiers trained on disambiguated concepts significantly outperformed those trained using all concepts.

Conclusions

We developed a WSD system that achieves high disambiguation accuracy on standard biomedical WSD datasets and showed that our WSD system improves clinical document classification.

Data sharing

We integrated our WSD system with MetaMap and the clinical Text Analysis and Knowledge Extraction System, two popular biomedical natural language processing systems. All codes required to reproduce our results and all tools developed as part of this study are released as open source, available under http://code.google.com/p/ytex.",2012-10-16 +21800894,AMASS: algorithm for MSI analysis by semi-supervised segmentation.,"Mass Spectrometric Imaging (MSI) is a molecular imaging technique that allows the generation of 2D ion density maps for a large complement of the active molecules present in cells and sectioned tissues. Automatic segmentation of such maps according to patterns of co-expression of individual molecules can be used for discovery of novel molecular signatures (molecules that are specifically expressed in particular spatial regions). However, current segmentation techniques are biased toward the discovery of higher abundance molecules and large segments; they allow limited opportunity for user interaction, and validation is usually performed by similarity to known anatomical features. We describe here a novel method, AMASS (Algorithm for MSI Analysis by Semi-supervised Segmentation). AMASS relies on the discriminating power of a molecular signal instead of its intensity as a key feature, uses an internal consistency measure for validation, and allows significant user interaction and supervision as options. An automated segmentation of entire leech embryo data images resulted in segmentation domains congruent with many known organs, including heart, CNS ganglia, nephridia, nephridiopores, and lateral and ventral regions, each with a distinct molecular signature. Likewise, segmentation of a rat brain MSI slice data set yielded known brain features and provided interesting examples of co-expression between distinct brain regions. AMASS represents a new approach for the discovery of peptide masses with distinct spatial features of expression. Software source code and installation and usage guide are available at http://bix.ucsd.edu/AMASS/ .",2011-08-25 +21984475,NAPP: the Nucleic Acid Phylogenetic Profile Database.,"Nucleic acid phylogenetic profiling (NAPP) classifies coding and non-coding sequences in a genome according to their pattern of conservation across other genomes. This procedure efficiently distinguishes clusters of functional non-coding elements in bacteria, particularly small RNAs and cis-regulatory RNAs, from other conserved sequences. In contrast to other non-coding RNA detection pipelines, NAPP does not require the presence of conserved RNA secondary structure and therefore is likely to identify previously undetected RNA genes or elements. Furthermore, as NAPP clusters contain both coding and non-coding sequences with similar occurrence profiles, they can be analyzed under a functional perspective. We recently improved the NAPP pipeline and applied it to a collection of 949 bacterial and 68 archaeal species. The database and web interface available at http://napp.u-psud.fr/ enable detailed analysis of NAPP clusters enriched in non-coding RNAs, graphical display of phylogenetic profiles, visualization of predicted RNAs in their genome context and extraction of predicted RNAs for use with genome browsers or other software.",2011-10-08 +23427988,Dispom: a discriminative de-novo motif discovery tool based on the jstacs library.,"DNA-binding proteins are a main component of gene regulation as they activate or repress gene expression by binding to specific binding sites in target regions of genomic DNA. However, de-novo discovery of these binding sites in target regions obtained by wet-lab experiments is a challenging problem in computational biology, which has not yet been solved satisfactorily. Here, we present a detailed description and analysis of the de-novo motif discovery tool Dispom, which has been developed for finding binding sites of DNA-binding proteins that are differentially abundant in a set of target regions compared to a set of control regions. Two additional features of Dispom are its capability of modeling positional preferences of binding sites and adjusting the length of the motif in the learning process. Dispom yields an increased prediction accuracy compared to existing tools for de-novo motif discovery, suggesting that the combination of searching for differentially abundant motifs, inferring their positional distributions, and adjusting the motif lengths is beneficial for de-novo motif discovery. When applying Dispom to promoters of auxin-responsive genes and those of ABI3 target genes from Arabidopsis thaliana, we identify relevant binding motifs with pronounced positional distributions. These results suggest that learning motifs, their positional distributions, and their lengths by a discriminative learning principle may aid motif discovery from ChIP-chip and gene expression data. We make Dispom freely available as part of Jstacs, an open-source Java library that is tailored to statistical sequence analysis. To facilitate extensions of Dispom, we describe its implementation using Jstacs in this manuscript. In addition, we provide a stand-alone application of Dispom at http://www.jstacs.de/index.php/Dispom for instant use.",2013-01-21 +22539429,jmzIdentML API: A Java interface to the mzIdentML standard for peptide and protein identification data.,"We present a Java application programming interface (API), jmzIdentML, for the Human Proteome Organisation (HUPO) Proteomics Standards Initiative (PSI) mzIdentML standard for peptide and protein identification data. The API combines the power of Java Architecture of XML Binding (JAXB) and an XPath-based random-access indexer to allow a fast and efficient mapping of extensible markup language (XML) elements to Java objects. The internal references in the mzIdentML files are resolved in an on-demand manner, where the whole file is accessed as a random-access swap file, and only the relevant piece of XMLis selected for mapping to its corresponding Java object. The APIis highly efficient in its memory usage and can handle files of arbitrary sizes. The APIfollows the official release of the mzIdentML (version 1.1) specifications and is available in the public domain under a permissive licence at http://www.code.google.com/p/jmzidentml/.",2012-03-01 +23497112,HuntMi: an efficient and taxon-specific approach in pre-miRNA identification.,"

Background

Machine learning techniques are known to be a powerful way of distinguishing microRNA hairpins from pseudo hairpins and have been applied in a number of recognised miRNA search tools. However, many current methods based on machine learning suffer from some drawbacks, including not addressing the class imbalance problem properly. It may lead to overlearning the majority class and/or incorrect assessment of classification performance. Moreover, those tools are effective for a narrow range of species, usually the model ones. This study aims at improving performance of miRNA classification procedure, extending its usability and reducing computational time.

Results

We present HuntMi, a stand-alone machine learning miRNA classification tool. We developed a novel method of dealing with the class imbalance problem called ROC-select, which is based on thresholding score function produced by traditional classifiers. We also introduced new features to the data representation. Several classification algorithms in combination with ROC-select were tested and random forest was selected for the best balance between sensitivity and specificity. Reliable assessment of classification performance is guaranteed by using large, strongly imbalanced, and taxon-specific datasets in 10-fold cross-validation procedure. As a result, HuntMi achieves a considerably better performance than any other miRNA classification tool and can be applied in miRNA search experiments in a wide range of species.

Conclusions

Our results indicate that HuntMi represents an effective and flexible tool for identification of new microRNAs in animals, plants and viruses. ROC-select strategy proves to be superior to other methods of dealing with class imbalance problem and can possibly be used in other machine learning classification tasks. The HuntMi software as well as datasets used in the research are freely available at http://lemur.amu.edu.pl/share/HuntMi/.",2013-03-05 +23587030,Correlation of hK6 expression with tumor recurrence and prognosis in advanced gastric cancer.,"

Background

Human kallikrein gene 6 (KLK6) is a member of the human kallikrein gene family (Kallikreins, KLKs). Human kallikrein-related peptidase 6 (hK6) is a trypsin-like serine protease encoded by the KLK6, has been reported to be highly expressed in several cancers including gastric cancer. In this study, we investigated the the correlation of hK6 expression with clinicopathological characteristics, tumor recurrence and prognosis in advanced gastric carcinoma after curative resection.

Methods

We retrospectively analyzed the clinical data of 129 cases advanced gastric cancer after curative gastrectomy. The expression of hK6 in advanced gastric cancer tissues compared to adjacent noncancerous tissues were examined, and the relationship between hK6 expression and clinicopathological characteristics was evaluated. In additional, these patients were followed up to investigate the relationship between hK6 expression and the survival time.

Results

The positive rate of hK6 expression was significantly higher in advanced gastric cancer tissue, than that in adjacent noncancerous and gastric ulcer tissues (36.5%, 33.3%, respectively, P < 0.001). There was a close relationship between hK6 expression and TNM stage (P = 0.005), vascular invasion (P = 0.037) and perineural invasion (P = 0.035). Furthermore, patients with hK6 positive showed significantly higher recurrence and poorer prognosis than those with hK6 negative. Multivariate analysis showed that hK6 expression was a significant independent factor for tumor recurrence and overall survival.

Conclusion

hK6 is overexpressed in advanced gastric cancer tissues. Its clinical utility may be used as an unfavorable indicator in predicting tumor recurrence and prognosis for advanced gastric cancer after operation. This study also suggests that hK6 might be a potential therapeutic target for gastric cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/8558403578787206.",2013-04-15 +21975133,Biblio-MetReS: a bibliometric network reconstruction application and server.,"

Background

Reconstruction of genes and/or protein networks from automated analysis of the literature is one of the current targets of text mining in biomedical research. Some user-friendly tools already perform this analysis on precompiled databases of abstracts of scientific papers. Other tools allow expert users to elaborate and analyze the full content of a corpus of scientific documents. However, to our knowledge, no user friendly tool that simultaneously analyzes the latest set of scientific documents available on line and reconstructs the set of genes referenced in those documents is available.

Results

This article presents such a tool, Biblio-MetReS, and compares its functioning and results to those of other user-friendly applications (iHOP, STRING) that are widely used. Under similar conditions, Biblio-MetReS creates networks that are comparable to those of other user friendly tools. Furthermore, analysis of full text documents provides more complete reconstructions than those that result from using only the abstract of the document.

Conclusions

Literature-based automated network reconstruction is still far from providing complete reconstructions of molecular networks. However, its value as an auxiliary tool is high and it will increase as standards for reporting biological entities and relationships become more widely accepted and enforced. Biblio-MetReS is an application that can be downloaded from http://metres.udl.cat/. It provides an easy to use environment for researchers to reconstruct their networks of interest from an always up to date set of scientific documents.",2011-10-05 +23341908,Image classification of human carcinoma cells using complex wavelet-based covariance descriptors.,"Cancer cell lines are widely used for research purposes in laboratories all over the world. Computer-assisted classification of cancer cells can alleviate the burden of manual labeling and help cancer research. In this paper, we present a novel computerized method for cancer cell line image classification. The aim is to automatically classify 14 different classes of cell lines including 7 classes of breast and 7 classes of liver cancer cells. Microscopic images containing irregular carcinoma cell patterns are represented by subwindows which correspond to foreground pixels. For each subwindow, a covariance descriptor utilizing the dual-tree complex wavelet transform (DT-[Formula: see text]WT) coefficients and several morphological attributes are computed. Directionally selective DT-[Formula: see text]WT feature parameters are preferred primarily because of their ability to characterize edges at multiple orientations which is the characteristic feature of carcinoma cell line images. A Support Vector Machine (SVM) classifier with radial basis function (RBF) kernel is employed for final classification. Over a dataset of 840 images, we achieve an accuracy above 98%, which outperforms the classical covariance-based methods. The proposed system can be used as a reliable decision maker for laboratory studies. Our tool provides an automated, time- and cost-efficient analysis of cancer cell morphology to classify different cancer cell lines using image-processing techniques, which can be used as an alternative to the costly short tandem repeat (STR) analysis. The data set used in this manuscript is available as supplementary material through http://signal.ee.bilkent.edu.tr/cancerCellLineClassificationSampleImages.html.",2013-01-16 +21228048,"CNAmet: an R package for integrating copy number, methylation and expression data.","

Summary

Gene copy number and DNA methylation alterations are key regulators of gene expression in cancer. Accordingly, genes that show simultaneous methylation, copy number and expression alterations are likely to have a key role in tumor progression. We have implemented a novel software package (CNAmet) for integrative analysis of high-throughput copy number, DNA methylation and gene expression data. To demonstrate the utility of CNAmet, we use copy number, DNA methylation and gene expression data from 50 glioblastoma multiforme and 188 ovarian cancer primary tumor samples. Our results reveal a synergistic effect of DNA methylation and copy number alterations on gene expression for several known oncogenes as well as novel candidate oncogenes.

Availability

CNAmet R-package and user guide are freely available under GNU General Public License at http://csbi.ltdk.helsinki.fi/CNAmet.",2011-01-12 +21963895,Yeast two-hybrid screening of proteins interacting with plasmin receptor subunit: C-terminal fragment of annexin A2.,"

Aim

To identify proteins that interact with the C-terminal fragment of annexin A2 (A2IC), generated by plasmin cleavage of the plasmin receptor, a heterotetramer (AA2t) containing annexin A2.

Methods

The gene that encodes the A2IC fragment was obtained from PCR-amplified cDNA isolated from human monocytes, and was ligated into the pBTM116 vector using a DNA ligation kit. The resultant plasmid (pBTM116-A2IC) was sequenced with an ABI PRISM 310 Genetic Analyzer. The expression of an A2IC bait protein fused with a LexA-DNA binding domain (BD) was determined using Western blot analysis. The identification of proteins that interact with A2IC and are encoded in a human monocyte cDNA library was performed using yeast two-hybrid screening. The DNA sequences of the relevant cDNAs were determined using an ABI PRISM BigDye terminator cycle sequencing ready reaction kit. Nucleotide sequence databases were searched for homologous sequences using BLAST search analysis (http://www.ncbi.nlm.nih.gov). Confirmation of the interaction between the protein LexA-A2IC and each of cathepsin S and SNX17 was conducted using a small-scale yeast transformation and X-gal assay.

Results

The yeast transformed with plasmids encoding the bait proteins were screened with a human monocyte cDNA library by reconstituting full-length transcription factors containing the GAL4-active domain (GAL4-AD) as the prey in a yeast two-hybrid approach. After screening 1×10(7) clones, 23 independent β-Gal-positive clones were identified. Sequence analysis and a database search revealed that 15 of these positive clones matched eight different proteins (SNX17, ProCathepsin S, RPS2, ZBTB4, OGDH, CCDC32, PAPD4, and actin which was already known to interact with annexin A2).

Conclusion

A2IC A2IC interacts with various proteins to form protein complexes, which may contribute to the molecular mechanism of monocyte activation induced by plasmin. The yeast two-hybrid system is an efficient approach for investigating protein interactions.",2011-10-03 +22935142,The apelin receptor inhibits the angiotensin II type 1 receptor via allosteric trans-inhibition.,"

Background and purpose

The apelin receptor (APJ) is often co-expressed with the angiotensin II type-1 receptor (AT1) and acts as an endogenous counter-regulator. Apelin antagonizes Ang II signalling, but the precise molecular mechanism has not been elucidated. Understanding this interaction may lead to new therapies for the treatment of cardiovascular disease.

Experimental approach

The physical interaction of APJ and AT1 receptors was detected by co-immunoprecipitation and bioluminescence resonance energy transfer (BRET). Functional and pharmacological interactions were measured by G-protein-dependent signalling and recruitment of β-arrestin. Allosterism and cooperativity between APJ and AT1 were measured by radioligand binding assays.

Key results

Apelin, but not Ang II, induced APJ : AT1 heterodimerization forced AT1 into a low-affinity state, reducing Ang II binding. Likewise, apelin mediated a concentration-dependent depression in the maximal production of inositol phosphate (IP(1) ) and β-arrestin recruitment to AT1 in response to Ang II. The signal depression approached a limit, the magnitude of which was governed by the cooperativity indicative of a negative allosteric interaction. Fitting the data to an operational model of allosterism revealed that apelin-mediated heterodimerization significantly reduces Ang II signalling efficacy. These effects were not observed in the absence of apelin.

Conclusions and implications

Apelin-dependent heterodimerization between APJ and AT1 causes negative allosteric regulation of AT1 function. As AT1 is significant in the pathogenesis of cardiovascular disease, these findings suggest that impaired apelin and APJ function may be a common underlying aetiology.

Linked article

This article is commented on by Goupil et al., pp. 1101-1103 of this issue. To view this commentary visit http://dx.doi.org/10.1111/bph.12040.",2013-03-01 +23788717,Myocardium: dynamic versus single-shot CT perfusion imaging.,"

Purpose

To determine the diagnostic accuracy of dynamic computed tomographic (CT) perfusion imaging of the myocardium for the detection of hemodynamically relevant coronary artery stenosis compared with the accuracy of coronary angiography and fractional flow reserve (FFR) measurement.

Materials and methods

This study was approved by the institutional review board and the Federal Radiation Safety Council (Bundesamt für Strahlenschutz). All patients provided written informed consent. Thirty-two consecutive patients in adenosine stress conditions underwent dynamic CT perfusion imaging (14 consecutive data sets) performed by using a 256-section scanner with an 8-cm detector and without table movement. Time to peak, area under the curve, upslope, and peak enhancement were determined after calculation of time-attenuation curves. In addition, myocardial blood flow (MBF) was determined quantitatively. Results were compared with those of coronary angiography and FFR measurement by using a receiver operating characteristic (ROC) analysis. In addition, threshold values based on the Youden index and sensitivity and specificity were calculated.

Results

Area under the ROC curve, sensitivity, and specificity, respectively, were 0.67, 41.4% (95% confidence interval [CI]: 23.5%, 61.1%), and 86.6% (95% CI: 76.0%, 93.7%) for time to peak; 0.74, 58.6% (95% CI: 38.9%, 76.5%), and 83.6% (95% CI: 72.5%, 91.5%) for area under the curve; 0.87, 82.8% (95% CI: 64.2%, 94.1%), and 88.1% (95% CI: 77.8%, 94.7%) for upslope; 0.83, 82.8% (95% CI: 64.2%, 94.1%), and 89.6% (95% CI: 79.6%, 95.7%) for peak enhancement; and 0.86, 75.9% (95% CI: 56.5%, 89.7%), and 100% (95% CI: 94.6%, 100%) for MBF. The thresholds determined by using the Youden index were 148.5 HU · sec for area under the curve, 12 seconds for time to peak, 2.5 HU/sec for upslope, 34 HU for peak enhancement, and 1.64 mL/g/min for MBF.

Conclusion

The semiquantitative parameters upslope and peak enhancement and the quantitative parameter MBF showed similar high diagnostic accuracy.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.13121441/-/DC1.",2013-06-20 +30731678,First Report of Charcoal Rot Caused by Macrophomina phaseolina on Sunflower in Illinois.,"In September 2009, sunflower (Helianthus annuus L.) plants (cv. Mycogen 8C451) from a University of Illinois field research trial in Fayette County, Illinois exhibited silvery gray girdling lesions on the lower stems and premature death. When lower stems and roots were split open, the pith tissue was compressed into layers. Black microsclerotia (90 to 180 μm) were present on the outside of the lower stem tissue and in the stem vascular tissue. Five pieces (approximately 1 cm long) of symptomatic stem tissue from five different affected plants (25 pieces total) were soaked in a 0.5% solution of NaOCl for 30 s, rinsed with sterile distilled water, and placed on potato dextrose agar (PDA; Becton, Dickinson, and Company, Franklin Lakes, NJ). Gray hyphae grew from all of the stem pieces, which subsequently turned black and formed black microsclerotia (75 to 175 μm). On the basis of plant symptoms and size and color of the microsclerotia, the disease was diagnosed as charcoal rot caused by Macrophomina phaseolina (Tassi) Goid (2). To confirm that the isolated fungus was M. phaseolina, DNA was extracted from the pure culture, and PCR amplification of a subunit rDNA and internal transcribed spacer (ITS) region with primers EF3RCNL and ITS4 was performed (3). The Keck Biotechnology Center at the University of Illinois, Urbana sequenced the PCR product. The resulting nucleotide sequence shared the highest homology (99%) with sequences of M. phaseolina when compared with the subunit rDNA and ITS sequences in the nucleotide database ( http://www.ncbi.nlm.nih.gov ). A greenhouse experiment was conducted to confirm pathogenicity; the greenhouse temperature was approximately 27°C and sunflower plants (cv. Cargill 270) were grown in pots and watered daily to maintain adequate soil moisture for growth. Sterile toothpicks were infested with M. phaseolina and placed through the stems (10 cm above the soil surface) of five 40-day-old sunflower plants that were approximately at growth stage R4 (1,4). Five sterile, noninfested toothpicks were similarly placed through sunflower plants to act as controls. Parafilm was used to hold the toothpick in the stem and seal the stem injury. Thirty-five days after inoculation, the mean lesion length on stems inoculated with M. phaseolina was 595 mm and no lesions developed on the control plants. M. phaseolina-inoculated plants also began to wilt and die. Cultures identical to the original M. phaseolina isolate were reisolated from stem lesions of the M. phaseolina-inoculated plants. This is the first report of charcoal rot on sunflower in Illinois to our knowledge. Sunflower is currently not a major crop grown in Illinois, but on-going research is focused on evaluating sunflower as a potential late-planted crop to follow winter wheat. If sunflower production increases in Illinois, growers may need to take precautions to manage charcoal rot. References: (1) L. K. Edmunds. Phytopathology 54:514, 1964. (2) T. Gulya et al. Page 263 in: Sunflower Technology and Production. American Society of Agronomy, Madison, WI, 1997. (3) N. S. Lord et al. FEMS Microbiol. Ecol. 42:327, 2002. (4) A. A. Schneiter and J. F. Miller. Crop Sci. 21:901, 1981.",2011-10-01 +22155609,In-silico human genomics with GeneCards.,"Since 1998, the bioinformatics, systems biology, genomics and medical communities have enjoyed a synergistic relationship with the GeneCards database of human genes (http://www.genecards.org). This human gene compendium was created to help to introduce order into the increasing chaos of information flow. As a consequence of viewing details and deep links related to specific genes, users have often requested enhanced capabilities, such that, over time, GeneCards has blossomed into a suite of tools (including GeneDecks, GeneALaCart, GeneLoc, GeneNote and GeneAnnot) for a variety of analyses of both single human genes and sets thereof. In this paper, we focus on inhouse and external research activities which have been enabled, enhanced, complemented and, in some cases, motivated by GeneCards. In turn, such interactions have often inspired and propelled improvements in GeneCards. We describe here the evolution and architecture of this project, including examples of synergistic applications in diverse areas such as synthetic lethality in cancer, the annotation of genetic variations in disease, omics integration in a systems biology approach to kidney disease, and bioinformatics tools.",2011-10-01 +21546390,Multiple-rule bias in the comparison of classification rules.,"

Motivation

There is growing discussion in the bioinformatics community concerning overoptimism of reported results. Two approaches contributing to overoptimism in classification are (i) the reporting of results on datasets for which a proposed classification rule performs well and (ii) the comparison of multiple classification rules on a single dataset that purports to show the advantage of a certain rule.

Results

This article provides a careful probabilistic analysis of the second issue and the 'multiple-rule bias', resulting from choosing a classification rule having minimum estimated error on the dataset. It quantifies this bias corresponding to estimating the expected true error of the classification rule possessing minimum estimated error and it characterizes the bias from estimating the true comparative advantage of the chosen classification rule relative to the others by the estimated comparative advantage on the dataset. The analysis is applied to both synthetic and real data using a number of classification rules and error estimators.

Availability

We have implemented in C code the synthetic data distribution model, classification rules, feature selection routines and error estimation methods. The code for multiple-rule analysis is implemented in MATLAB. The source code is available at http://gsp.tamu.edu/Publications/supplementary/yousefi11a/. Supplementary simulation results are also included.",2011-05-05 +22101402,DemQSAR: predicting human volume of distribution and clearance of drugs.,"In silico methods characterizing molecular compounds with respect to pharmacologically relevant properties can accelerate the identification of new drugs and reduce their development costs. Quantitative structure-activity/-property relationship (QSAR/QSPR) correlate structure and physico-chemical properties of molecular compounds with a specific functional activity/property under study. Typically a large number of molecular features are generated for the compounds. In many cases the number of generated features exceeds the number of molecular compounds with known property values that are available for learning. Machine learning methods tend to overfit the training data in such situations, i.e. the method adjusts to very specific features of the training data, which are not characteristic for the considered property. This problem can be alleviated by diminishing the influence of unimportant, redundant or even misleading features. A better strategy is to eliminate such features completely. Ideally, a molecular property can be described by a small number of features that are chemically interpretable. The purpose of the present contribution is to provide a predictive modeling approach, which combines feature generation, feature selection, model building and control of overtraining into a single application called DemQSAR. DemQSAR is used to predict human volume of distribution (VD(ss)) and human clearance (CL). To control overtraining, quadratic and linear regularization terms were employed. A recursive feature selection approach is used to reduce the number of descriptors. The prediction performance is as good as the best predictions reported in the recent literature. The example presented here demonstrates that DemQSAR can generate a model that uses very few features while maintaining high predictive power. A standalone DemQSAR Java application for model building of any user defined property as well as a web interface for the prediction of human VD(ss) and CL is available on the webpage of DemPRED: http://agknapp.chemie.fu-berlin.de/dempred/ .",2011-11-20 +22577179,A review and update on the current status of stem cell therapy and the retina.,"

Introduction or background

Many diseases of the retina result in irreversible visual loss. Stem cell (SC) therapy is a rapidly developing field and represents a novel approach to replace non-functioning neuro-retinal cells.

Sources of data

A systematic computerized literature search was conducted on PubMed (http://www.ncbi.nlm.nih.gov/pubmed/).

Areas of agreement

The use of stem cells (SCs) in animal models of retinal diseases has resulted in improvement in visual function and performance. SC therapy represents an exciting prospect in restoring vision. Areas of controversy The use of human embryonic SCs raises ethical concerns.

Growing points

Human trials using SCs in retinal diseases have recently been approved.

Areas timely for developing research

The success of SCs in retinal therapy depends not only on implanted cell survival, but also on how well SCs migrate, integrate and form synapses. Further research will be needed to overcome these hurdles.",2012-05-09 +22962312,"DECIPHER: web-based, community resource for clinical interpretation of rare variants in developmental disorders.","Patients with developmental disorders often harbour sub-microscopic deletions or duplications that lead to a disruption of normal gene expression or perturbation in the copy number of dosage-sensitive genes. Clinical interpretation for such patients in isolation is hindered by the rarity and novelty of such disorders. The DECIPHER project (https://decipher.sanger.ac.uk) was established in 2004 as an accessible online repository of genomic and associated phenotypic data with the primary goal of aiding the clinical interpretation of rare copy-number variants (CNVs). DECIPHER integrates information from a variety of bioinformatics resources and uses visualization tools to identify potential disease genes within a CNV. A two-tier access system permits clinicians and clinical scientists to maintain confidential linked anonymous records of phenotypes and CNVs for their patients that, with informed consent, can subsequently be shared with the wider clinical genetics and research communities. Advances in next-generation sequencing technologies are making it practical and affordable to sequence the whole exome/genome of patients who display features suggestive of a genetic disorder. This approach enables the identification of smaller intragenic mutations including single-nucleotide variants that are not accessible even with high-resolution genomic array analysis. This article briefly summarizes the current status and achievements of the DECIPHER project and looks ahead to the opportunities and challenges of jointly analysing structural and sequence variation in the human genome.",2012-09-08 +21991360,Predicting P-glycoprotein-mediated drug transport based on support vector machine and three-dimensional crystal structure of P-glycoprotein.,"Human P-glycoprotein (P-gp) is an ATP-binding cassette multidrug transporter that confers resistance to a wide range of chemotherapeutic agents in cancer cells by active efflux of the drugs from cells. P-gp also plays a key role in limiting oral absorption and brain penetration and in facilitating biliary and renal elimination of structurally diverse drugs. Thus, identification of drugs or new molecular entities to be P-gp substrates is of vital importance for predicting the pharmacokinetics, efficacy, safety, or tissue levels of drugs or drug candidates. At present, publicly available, reliable in silico models predicting P-gp substrates are scarce. In this study, a support vector machine (SVM) method was developed to predict P-gp substrates and P-gp-substrate interactions, based on a training data set of 197 known P-gp substrates and non-substrates collected from the literature. We showed that the SVM method had a prediction accuracy of approximately 80% on an independent external validation data set of 32 compounds. A homology model of human P-gp based on the X-ray structure of mouse P-gp as a template has been constructed. We showed that molecular docking to the P-gp structures successfully predicted the geometry of P-gp-ligand complexes. Our SVM prediction and the molecular docking methods have been integrated into a free web server (http://pgp.althotas.com), which allows the users to predict whether a given compound is a P-gp substrate and how it binds to and interacts with P-gp. Utilization of such a web server may prove valuable for both rational drug design and screening.",2011-10-04 +21729030,Evidence-based (S3) guideline on topical corticosteroids in pregnancy.,"Women with skin conditions may need topical corticosteroids during pregnancy. However, little is known about the effects of topical corticosteroids on the fetus. A guideline subcommittee of the European Dermatology Forum was organized to develop an evidence-based guideline on the use of topical corticosteroids in pregnancy (http://www.euroderm.org/edf/images/stories/guidelines/EDF-Guideline-on-Steroids-in-Pregnancy.pdf). The evidence from a Cochrane Review suggested that the major possible adverse effects on the fetus of topical corticosteroids were orofacial clefts when used preconceptionally and in the first trimester of pregnancy, and fetal growth restriction when very potent topical corticosteroids were used during pregnancy. To obtain robust evidence, a large population-based cohort study (on 84,133 pregnant women from the U.K. General Practice Research Database) was performed, which found a significant association of fetal growth restriction with maternal exposure to potent/very potent topical corticosteroids, but not with mild/moderate topical corticosteroids. No associations of maternal exposure to topical corticosteroids of any potency with orofacial cleft, preterm delivery and fetal death were found. Moreover, another recent Danish cohort study did not support a causal association between topical corticosteroid and orofacial cleft. The current best evidence suggests that mild/moderate topical corticosteroids are preferred to potent/very potent ones in pregnancy, because of the associated risk of fetal growth restriction with the latter.",2011-09-29 +21956165,NHLBI-AbDesigner: an online tool for design of peptide-directed antibodies.,"Investigation of physiological mechanisms at a cellular level often requires production of high-quality antibodies, frequently using synthetic peptides as immunogens. Here we describe a new, web-based software tool called NHLBI-AbDesigner that allows the user to visualize the information needed to choose optimal peptide sequences for peptide-directed antibody production (http://helixweb.nih.gov/AbDesigner/). The choice of an immunizing peptide is generally based on a need to optimize immunogenicity, antibody specificity, multispecies conservation, and robustness in the face of posttranslational modifications (PTMs). AbDesigner displays information relevant to these criteria as follows: 1) ""Immunogenicity Score,"" based on hydropathy and secondary structure prediction; 2) ""Uniqueness Score,"" a predictor of specificity of an antibody against all proteins expressed in the same species; 3) ""Conservation Score,"" a predictor of ability of the antibody to recognize orthologs in other animal species; and 4) ""Protein Features"" that show structural domains, variable regions, and annotated PTMs that may affect antibody performance. AbDesigner displays the information online in an interactive graphical user interface, which allows the user to recognize the trade-offs that exist for alternative synthetic peptide choices and to choose the one that is best for a proposed application. Several examples of the use of AbDesigner for the display of such trade-offs are presented, including production of a new antibody to Slc9a3. We also used the program in large-scale mode to create a database listing the 15-amino acid peptides with the highest Immunogenicity Scores for all known proteins in five animal species, one plant species (Arabidopsis thaliana), and Saccharomyces cerevisiae.",2011-09-28 +21961884,Interactive metagenomic visualization in a Web browser.,"

Background

A critical output of metagenomic studies is the estimation of abundances of taxonomical or functional groups. The inherent uncertainty in assignments to these groups makes it important to consider both their hierarchical contexts and their prediction confidence. The current tools for visualizing metagenomic data, however, omit or distort quantitative hierarchical relationships and lack the facility for displaying secondary variables.

Results

Here we present Krona, a new visualization tool that allows intuitive exploration of relative abundances and confidences within the complex hierarchies of metagenomic classifications. Krona combines a variant of radial, space-filling displays with parametric coloring and interactive polar-coordinate zooming. The HTML5 and JavaScript implementation enables fully interactive charts that can be explored with any modern Web browser, without the need for installed software or plug-ins. This Web-based architecture also allows each chart to be an independent document, making them easy to share via e-mail or post to a standard Web server. To illustrate Krona's utility, we describe its application to various metagenomic data sets and its compatibility with popular metagenomic analysis tools.

Conclusions

Krona is both a powerful metagenomic visualization tool and a demonstration of the potential of HTML5 for highly accessible bioinformatic visualizations. Its rich and interactive displays facilitate more informed interpretations of metagenomic analyses, while its implementation as a browser-based application makes it extremely portable and easily adopted into existing analysis packages. Both the Krona rendering code and conversion tools are freely available under a BSD open-source license, and available from: http://krona.sourceforge.net.",2011-09-30 +21702951,AMDORAP: non-targeted metabolic profiling based on high-resolution LC-MS.,"

Background

Liquid chromatography-mass spectrometry (LC-MS) utilizing the high-resolution power of an orbitrap is an important analytical technique for both metabolomics and proteomics. Most important feature of the orbitrap is excellent mass accuracy. Thus, it is necessary to convert raw data to accurate and reliable m/z values for metabolic fingerprinting by high-resolution LC-MS.

Results

In the present study, we developed a novel, easy-to-use and straightforward m/z detection method, AMDORAP. For assessing the performance, we used real biological samples, Bacillus subtilis strains 168 and MGB874, in the positive mode by LC-orbitrap. For 14 identified compounds by measuring the authentic compounds, we compared obtained m/z values with other LC-MS processing tools. The errors by AMDORAP were distributed within ±3 ppm and showed the best performance in m/z value accuracy.

Conclusions

Our method can detect m/z values of biological samples much more accurately than other LC-MS analysis tools. AMDORAP allows us to address the relationships between biological effects and cellular metabolites based on accurate m/z values. Obtaining the accurate m/z values from raw data should be indispensable as a starting point for comparative LC-orbitrap analysis. AMDORAP is freely available under an open-source license at http://amdorap.sourceforge.net/.",2011-06-24 +21968957,Subcellular localization prediction through boosting association rules.,"Computational methods for predicting protein subcellular localization have used various types of features, including N-terminal sorting signals, amino acid compositions, and text annotations from protein databases. Our approach does not use biological knowledge such as the sorting signals or homologues, but use just protein sequence information. The method divides a protein sequence into short $k$-mer sequence fragments which can be mapped to word features in document classification. A large number of class association rules are mined from the protein sequence examples that range from the N-terminus to the C-terminus. Then, a boosting algorithm is applied to those rules to build up a final classifier. Experimental results using benchmark datasets show our method is excellent in terms of both the classification performance and the test coverage. The result also implies that the $k$-mer sequence features which determine subcellular locations do not necessarily exist in specific positions of a protein sequence. Online prediction service implementing our method is available at http://isoft.postech.ac.kr/research/BCAR/subcell.",2011-09-27 +21627809,A color spectrographic phonocardiography (CSP) applied to the detection and characterization of heart murmurs: preliminary results.,"

Background

Although cardiac auscultation remains important to detect abnormal sounds and murmurs indicative of cardiac pathology, the application of electronic methods remains seldom used in everyday clinical practice. In this report we provide preliminary data showing how the phonocardiogram can be analyzed using color spectrographic techniques and discuss how such information may be of future value for noninvasive cardiac monitoring.

Methods

We digitally recorded the phonocardiogram using a high-speed USB interface and the program Gold Wave http://www.goldwave.com in 55 infants and adults with cardiac structural disease as well as from normal individuals and individuals with innocent murmurs. Color spectrographic analysis of the signal was performed using Spectrogram (Version 16) as a well as custom MATLAB code.

Results

Our preliminary data is presented as a series of seven cases.

Conclusions

We expect the application of spectrographic techniques to phonocardiography to grow substantially as ongoing research demonstrates its utility in various clinical settings. Our evaluation of a simple, low-cost phonocardiographic recording and analysis system to assist in determining the characteristic features of heart murmurs shows promise in helping distinguish innocent systolic murmurs from pathological murmurs in children and is expected to useful in other clinical settings as well.",2011-05-31 +21943350,GOmotif: A web server for investigating the biological role of protein sequence motifs.,"

Background

Many proteins contain conserved sequence patterns (motifs) that contribute to their functionality. The process of experimentally identifying and validating novel protein motifs can be difficult, expensive, and time consuming. A means for helping to identify in advance the possible function of a novel motif is important to test hypotheses concerning the biological relevance of these motifs, thus reducing experimental trial-and-error.

Results

GOmotif accepts PROSITE and regular expression formatted motifs as input and searches a Gene Ontology annotated protein database using motif search tools. The search returns the set of proteins containing matching motifs and their associated Gene Ontology terms. These results are presented as: 1) a hierarchical, navigable tree separated into the three Gene Ontology biological domains - biological process, cellular component, and molecular function; 2) corresponding pie charts indicating raw and statistically adjusted distributions of the results, and 3) an interactive graphical network view depicting the location of the results in the Gene Ontology.

Conclusions

GOmotif is a web-based tool designed to assist researchers in investigating the biological role of novel protein motifs. GOmotif can be freely accessed at http://www.gomotif.ca.",2011-09-26 +21552256,Combining quantitative proteomics data processing workflows for greater sensitivity.,"We here describe a normalization method to combine quantitative proteomics data. By merging the output of two popular quantification software packages, we obtained a 20% increase (on average) in the number of quantified human proteins without suffering from a loss of quality. Our integrative workflow is freely available through our user-friendly, open-source Rover software (http://compomics-rover.googlecode.com/).",2011-05-08 +23298369,Comprehensively designed consensus of standalone secondary structure predictors improves Q3 by over 3%.,"Protein fold is defined by a spatial arrangement of three types of secondary structures (SSs) including helices, sheets, and coils/loops. Current methods that predict SS from sequences rely on complex machine learning-derived models and provide the three-state accuracy (Q3) at about 82%. Further improvements in predictive quality could be obtained with a consensus-based approach, which so far received limited attention. We perform first-of-its-kind comprehensive design of a SS consensus predictor (SScon), in which we consider 12 modern standalone SS predictors and utilize Support Vector Machine (SVM) to combine their predictions. Using a large benchmark data-set with 10 random training-test splits, we show that a simple, voting-based consensus of carefully selected base methods improves Q3 by 1.9% when compared to the best single predictor. Use of SVM provides additional 1.4% improvement with the overall Q3 at 85.6% and segment overlap (SOV3) at 83.7%, when compared to 82.3 and 80.9%, respectively, obtained by the best individual methods. We also show strong improvements when the consensus is based on ab-initio methods, with Q3 = 82.3% and SOV3 = 80.7% that match the results from the best template-based approaches. Our consensus reduces the number of significant errors where helix is confused with a strand, provides particularly good results for short helices and strands, and gives the most accurate estimates of the content of individual SSs in the chain. Case studies are used to visualize the improvements offered by the consensus at the residue level. A web-server and a standalone implementation of SScon are available at http://biomine.ece.ualberta.ca/SSCon/ .",2013-01-09 +22591628,European Association of Urology guidelines on Male Infertility: the 2012 update.,"

Context

New data regarding the diagnosis and treatment of male infertility have emerged and led to an update of the European Association of Urology (EAU) guidelines for Male Infertility.

Objective

To review the new EAU guidelines for Male Infertility.

Evidence acquisition

A comprehensive work-up of the literature obtained from Medline, the Cochrane Central Register of Systematic Reviews, and reference lists in publications and review articles was developed and screened by a group of urologists and andrologists appointed by the EAU Guidelines Committee. Previous recommendations based on the older literature on this subject were taken into account. Levels of evidence and grade of guideline recommendations were added, modified from the Oxford Centre for Evidence-based Medicine Levels of Evidence.

Evidence summary

These EAU guidelines are a short comprehensive overview of the updated guidelines of male infertility as recently published by the EAU (http://www.uroweb.org/guidelines/online-guidelines/), and they are also available in the National Guideline Clearinghouse (http://www.guideline.gov/).",2012-05-03 +21963610,BEAGLE: an application programming interface and high-performance computing library for statistical phylogenetics.,"Phylogenetic inference is fundamental to our understanding of most aspects of the origin and evolution of life, and in recent years, there has been a concentration of interest in statistical approaches such as Bayesian inference and maximum likelihood estimation. Yet, for large data sets and realistic or interesting models of evolution, these approaches remain computationally demanding. High-throughput sequencing can yield data for thousands of taxa, but scaling to such problems using serial computing often necessitates the use of nonstatistical or approximate approaches. The recent emergence of graphics processing units (GPUs) provides an opportunity to leverage their excellent floating-point computational performance to accelerate statistical phylogenetic inference. A specialized library for phylogenetic calculation would allow existing software packages to make more effective use of available computer hardware, including GPUs. Adoption of a common library would also make it easier for other emerging computing architectures, such as field programmable gate arrays, to be used in the future. We present BEAGLE, an application programming interface (API) and library for high-performance statistical phylogenetic inference. The API provides a uniform interface for performing phylogenetic likelihood calculations on a variety of compute hardware platforms. The library includes a set of efficient implementations and can currently exploit hardware including GPUs using NVIDIA CUDA, central processing units (CPUs) with Streaming SIMD Extensions and related processor supplementary instruction sets, and multicore CPUs via OpenMP. To demonstrate the advantages of a common API, we have incorporated the library into several popular phylogenetic software packages. The BEAGLE library is free open source software licensed under the Lesser GPL and available from http://beagle-lib.googlecode.com. An example client program is available as public domain software.",2011-10-01 +23297802,Molecular mechanism of HIV-1 gp120 mutations that reduce CD4 binding affinity.,"The interaction of the HIV-1 fusion protein gp120 with its cellular receptor CD4 represents a crucial step of the viral infection process, thus rendering gp120 a promising target for the intervention with anti-HIV drugs. Naturally occurring mutations of gp120, however, can decrease its affinity for anti-infective ligands like therapeutic antibodies or soluble CD4. To understand this phenomenon on a structural level, we performed molecular dynamics simulations of two gp120 variants (termed gp1203-2 and gp1202-1), which exhibit a significantly decreased binding of soluble CD4. In both variants, the exchange of a nonpolar residue by glutamate was identified as an important determinant for reduced binding. However, those glutamates are located at different sequence positions and affect different steps of the recognition process: E471 in gp1203-2 predominantly affects the CD4-bound conformation, whereas E372 in gp1202-1 mainly modulates the conformational sampling of free gp120. Despite these differences, there exists an interesting similarity between the two variants: both glutamates exert their function by modulating the conformation and interactions of glycine-rich motifs (G366-G367, G471-G473) resulting in an accumulation of binding incompetent gp120 conformations or a loss of intermolecular gp120-CD4 hydrogen bonds. Thus, the present data suggests that interference with the structure and dynamics of glycine-rich stretches might represent a more widespread mechanism, by which gp120 mutations reduce binding affinity. This knowledge should be helpful to predict the resistance of novel gp120 mutations or to design gp120-ligands with improved binding properties. An animated interactive 3D complement (I3DC) is available in Proteopedia at http://proteopedia.org/w/Journal:JBSD:41.",2013-01-09 +21496265,SNP-based pathway enrichment analysis for genome-wide association studies.,"

Background

Recently we have witnessed a surge of interest in using genome-wide association studies (GWAS) to discover the genetic basis of complex diseases. Many genetic variations, mostly in the form of single nucleotide polymorphisms (SNPs), have been identified in a wide spectrum of diseases, including diabetes, cancer, and psychiatric diseases. A common theme arising from these studies is that the genetic variations discovered by GWAS can only explain a small fraction of the genetic risks associated with the complex diseases. New strategies and statistical approaches are needed to address this lack of explanation. One such approach is the pathway analysis, which considers the genetic variations underlying a biological pathway, rather than separately as in the traditional GWAS studies. A critical challenge in the pathway analysis is how to combine evidences of association over multiple SNPs within a gene and multiple genes within a pathway. Most current methods choose the most significant SNP from each gene as a representative, ignoring the joint action of multiple SNPs within a gene. This approach leads to preferential identification of genes with a greater number of SNPs.

Results

We describe a SNP-based pathway enrichment method for GWAS studies. The method consists of the following two main steps: 1) for a given pathway, using an adaptive truncated product statistic to identify all representative (potentially more than one) SNPs of each gene, calculating the average number of representative SNPs for the genes, then re-selecting the representative SNPs of genes in the pathway based on this number; and 2) ranking all selected SNPs by the significance of their statistical association with a trait of interest, and testing if the set of SNPs from a particular pathway is significantly enriched with high ranks using a weighted Kolmogorov-Smirnov test. We applied our method to two large genetically distinct GWAS data sets of schizophrenia, one from European-American (EA) and the other from African-American (AA). In the EA data set, we found 22 pathways with nominal P-value less than or equal to 0.001 and corresponding false discovery rate (FDR) less than 5%. In the AA data set, we found 11 pathways by controlling the same nominal P-value and FDR threshold. Interestingly, 8 of these pathways overlap with those found in the EA sample. We have implemented our method in a JAVA software package, called SNP Set Enrichment Analysis (SSEA), which contains a user-friendly interface and is freely available at http://cbcl.ics.uci.edu/SSEA.

Conclusions

The SNP-based pathway enrichment method described here offers a new alternative approach for analysing GWAS data. By applying it to schizophrenia GWAS studies, we show that our method is able to identify statistically significant pathways, and importantly, pathways that can be replicated in large genetically distinct samples.",2011-04-15 +23308169,Prediction and analysis of antibody amyloidogenesis from sequences.,"Antibody amyloidogenesis is the aggregation of soluble proteins into amyloid fibrils that is one of major causes of the failures of humanized antibodies. The prediction and prevention of antibody amyloidogenesis are helpful for restoring and enhancing therapeutic effects. Due to a large number of possible germlines, the existing method is not practical to predict sequences of novel germlines, which establishes individual models for each known germline. This study proposes a first automatic and across-germline prediction method (named AbAmyloid) capable of predicting antibody amyloidogenesis from sequences. Since the amyloidogenesis is determined by a whole sequence of an antibody rather than germline-dependent properties such as mutated residues, this study assess three types of germline-independent sequence features (amino acid composition, dipeptide composition and physicochemical properties). AbAmyloid using a Random Forests classifier with dipeptide composition performs well on a data set of 12 germlines. The within- and across-germline prediction accuracies are 83.10% and 83.33% using Jackknife tests, respectively, and the novel-germline prediction accuracy using a leave-one-germline-out test is 72.22%. A thorough analysis of sequence features is conducted to identify informative properties for further providing insights to antibody amyloidogenesis. Some identified informative physicochemical properties are amphiphilicity, hydrophobicity, reverse turn, helical structure, isoelectric point, net charge, mutability, coil, turn, linker, nuclear protein, etc. Additionally, the numbers of ubiquitylation sites in amyloidogenic and non-amyloidogenic antibodies are found to be significantly different. It reveals that antibodies less likely to be ubiquitylated tend to be amyloidogenic. The method AbAmyloid capable of automatically predicting antibody amyloidogenesis of novel germlines is implemented as a publicly available web server at http://iclab.life.nctu.edu.tw/abamyloid.",2013-01-07 +21943292,EnzymeDetector: an integrated enzyme function prediction tool and database.,"

Background

The ability to accurately predict enzymatic functions is an essential prerequisite for the interpretation of cellular functions, and the reconstruction and analysis of metabolic models. Several biological databases exist that provide such information. However, in many cases these databases provide partly different and inconsistent genome annotations.

Description

We analysed nine prokaryotic genomes and found about 70% inconsistencies in the enzyme predictions of the main annotation resources. Therefore, we implemented the annotation pipeline EnzymeDetector. This tool automatically compares and evaluates the assigned enzyme functions from the main annotation databases and supplements them with its own function prediction. This is based on a sequence similarity analysis, on manually created organism-specific enzyme information from BRENDA (Braunschweig Enzyme Database), and on sequence pattern searches.

Conclusions

EnzymeDetector provides a fast and comprehensive overview of the available enzyme function annotations for a genome of interest. The web interface allows the user to work with customisable weighting schemes and cut-offs for the different prediction methods. These customised quality criteria can easily be applied, and the resulting annotation can be downloaded. The summarised view of all used annotation sources provides up-to-date information. Annotation errors that occur in only one of the databases can be recognised (because of their low relevance score). The results are stored in a database and can be accessed at http://enzymedetector.tu-bs.de.",2011-09-23 +23186127,Enhanced anti-tumour effects of Vinca alkaloids given separately from cytostatic therapies.,"

Background and purpose

In polychemotherapy protocols, that is for treatment of neuroblastoma and Ewing sarcoma, Vinca alkaloids and cell cycle-arresting drugs are usually administered on the same day. Here we studied whether this combination enables the optimal antitumour effects of Vinca alkaloids to be manifested.

Experimental approach

Vinca alkaloids were tested in a preclinical mouse model in vivo and in vitro in combination with cell cycle-arresting drugs. Signalling pathways were characterized using RNA interference.

Key results

In vitro, knockdown of cyclins significantly inhibited vincristine-induced cell death indicating, in accordance with previous findings, Vinca alkaloids require active cell cycling and M-phase transition for induction of cell death. In contrast, anthracyclines, irradiation and dexamethasone arrested the cell cycle and acted like cytostatic drugs. The combination of Vinca alkaloids with cytostatic therapeutics resulted in diminished cell death in 31 of 36 (86%) tumour cell lines. In a preclinical tumour model, anthracyclines significantly inhibited the antitumour effect of Vinca alkaloids in vivo. Antitumour effects of Vinca alkaloids in the presence of cytostatic drugs were restored by caffeine, which maintained active cell cycling, or by knockdown of p53, which prevented drug-induced cell cycle arrest. Therapeutically most important, optimal antitumour effects were obtained in vivo upon separating the application of Vinca alkaloids from cytostatic therapeutics.

Conclusion and implications

Clinical trials are required to prove whether Vinca alkaloids act more efficiently in cancer patients if they are applied uncoupled from cytostatic therapies. On a conceptual level, our data suggest the implementation of polychemotherapy protocols based on molecular mechanisms of drug-drug interactions.

Linked article

This article is commented on by Solary, pp 1555-1557 of this issue. To view this commentary visit http://dx.doi.org/10.1111/bph.12101.",2013-04-01 +21940398,BitterDB: a database of bitter compounds.,"Basic taste qualities like sour, salty, sweet, bitter and umami serve specific functions in identifying food components found in the diet of humans and animals, and are recognized by proteins in the oral cavity. Recognition of bitter taste and aversion to it are thought to protect the organism against the ingestion of poisonous food compounds, which are often bitter. Interestingly, bitter taste receptors are expressed not only in the mouth but also in extraoral tissues, such as the gastrointestinal tract, indicating that they may play a role in digestive and metabolic processes. BitterDB database, available at http://bitterdb.agri.huji.ac.il/bitterdb/, includes over 550 compounds that were reported to taste bitter to humans. The compounds can be searched by name, chemical structure, similarity to other bitter compounds, association with a particular human bitter taste receptor, and so on. The database also contains information on mutations in bitter taste receptors that were shown to influence receptor activation by bitter compounds. The aim of BitterDB is to facilitate studying the chemical features associated with bitterness. These studies may contribute to predicting bitterness of unknown compounds, predicting ligands for bitter receptors from different species and rational design of bitterness modulators.",2011-09-22 +21963950,A new antigen scanning strategy for monitoring HIV-1 specific T-cell immune responses.,"Delineation of the immune correlates of protection in natural infection or after vaccination is a mandatory step for vaccine development. Although the most recent techniques allow a sensitive and specific detection of the cellular immune response, a consensus on the best strategy to assess their magnitude and breadth is yet to be reached. Within the AIDS Vaccine Integrated Project (AVIP http://www.avip-eu.org) we developed an antigen scanning strategy combining the empirical-based approach of overlapping peptides with a vast array of database information. This new system, termed Variable Overlapping Peptide Scanning Design (VOPSD), was used for preparing two peptide sets encompassing the candidate HIV-1 vaccine antigens Tat and Nef. Validation of the VOPSD strategy was obtained by direct comparison with 15mer or 20mer peptide sets in a trial involving six laboratories of the AVIP consortium. Cross-reactive background responses were measured in 80 HIV seronegative donors (HIV-), while sensitivity and magnitude of Tat and Nef-specific T-cell responses were assessed on 90 HIV+ individuals. In HIV-, VOPSD peptides generated background responses comparable with those of the standard sets. In HIV-1+ individuals the VOPSD pools showed a higher sensitivity in detecting individual responses (Tat VOPSD vs. Tat 15mers or 20mers: p≤0.01) as well as in generating stronger responses (Nef VOPSD vs. Nef 20mers: p<0.001) than standard sets, enhancing both CD4 and CD8 T-cell responses. Moreover, this peptide design allowed a marked reduction of the peptides number, representing a powerful tool for investigating novel HIV-1 candidate vaccine antigens in cohorts of HIV-seronegative and seropositive individuals.",2011-09-22 +21966371,Hydrophilic aromatic residue and in silico structure for carbohydrate binding module.,"Carbohydrate binding modules (CBMs) are found in polysaccharide-targeting enzymes and increase catalytic efficiency. Because only a relatively small number of CBM structures have been solved, computational modeling represents an alternative approach in conjunction with experimental assessment of CBM functionality and ligand-binding properties. An accurate target-template sequence alignment is the crucial step during homology modeling. However, low sequence identities between target/template sequences can be a major bottleneck. We therefore incorporated the predicted hydrophilic aromatic residues (HARs) and secondary structure elements into our feature-incorporated alignment (FIA) algorithm to increase CBM alignment accuracy. An alignment performance comparison for FIA and six others was made, and the greatest average sequence identities and similarities were achieved by FIA. In addition, structure models were built for 817 representative CBMs. Our models possessed the smallest average surface-potential z scores. Besides, a large true positive value for liagnd-binding aromatic residue prediction was obtained by HAR identification. Finally, the pre-simulated CBM structures have been deposited in the Database of Simulated CBM structures (DS-CBMs). The web service is publicly available at http://dscbm.life.nthu.edu.tw/ and http://dscbm.cs.ntou.edu.tw/.",2011-09-22 +23024010,Site identification in high-throughput RNA-protein interaction data.,"

Motivation

Post-transcriptional and co-transcriptional regulation is a crucial link between genotype and phenotype. The central players are the RNA-binding proteins, and experimental technologies [such as cross-linking with immunoprecipitation- (CLIP-) and RIP-seq] for probing their activities have advanced rapidly over the course of the past decade. Statistically robust, flexible computational methods for binding site identification from high-throughput immunoprecipitation assays are largely lacking however.

Results

We introduce a method for site identification which provides four key advantages over previous methods: (i) it can be applied on all variations of CLIP and RIP-seq technologies, (ii) it accurately models the underlying read-count distributions, (iii) it allows external covariates, such as transcript abundance (which we demonstrate is highly correlated with read count) to inform the site identification process and (iv) it allows for direct comparison of site usage across cell types or conditions.

Availability and implementation

We have implemented our method in a software tool called Piranha. Source code and binaries, licensed under the GNU General Public License (version 3) are freely available for download from http://smithlab.usc.edu.

Contact

andrewds@usc.edu

Supplementary information

Supplementary data available at Bioinformatics online.",2012-09-28 +23426037,Effects of green tea catechins with or without caffeine on glycemic control in adults: a meta-analysis of randomized controlled trials.,"

Background

The effect of green tea catechins (GTCs) with or without caffeine on glycemic control is controversial.

Objective

We aimed to identify and quantify the effects of GTCs or GTC-caffeine mixtures on glucose metabolism in adults.

Design

A comprehensive literature search was conducted to identify relevant trials of GTCs with or without caffeine on markers of glycemic control [fasting blood glucose (FBG), fasting blood insulin (FBI), glycated hemoglobin (Hb A1c), and homeostatic model assessment of insulin resistance (HOMA-IR)]. Weighted mean differences were calculated for net changes by using fixed-effects models. Prespecified subgroup analyses were performed to explore the influence of covariates on net changes in FBG and FBI concentrations.

Results

Twenty-two eligible randomized controlled trials with 1584 subjects were identified. Pooled analyses showed that FBG (-1.48 mg/dL; 95% CI: -2.57, -0.40 mg/dL) decreased significantly with GTCs with or without caffeine, whereas FBI (0.04 μU/mL; 95% CI: -0.36, 0.45 μU/mL), Hb A1c (-0.04%; 95% CI: -0.15, 0.08%), and HOMA-IR (-0.05; 95% CI: -0.37, 0.26) did not. Subgroup analyses indicated that the glucose-lowering effect was apparent when the duration of follow-up was over a median of 12 wk. Overall, no significant heterogeneity was detected for FBG, FBI, Hb A1c, or HOMA-IR.

Conclusions

The meta-analysis showed that the administration of GTCs with or without caffeine resulted in a significant reduction in FBG. The limited data available on GTCs did not support a positive effect on FBI, Hb A1c, or HOMA-IR. Thus, more large and well-designed trials are needed in the future. This trial was registered at http://www.crd.york.ac.uk/prospero as CRD42012002139.",2013-02-20 +21945336,Normalized global alignment for protein sequences.,"Global alignment is used to compare proteins in different fields, for example in phylogenetic research. In order to reduce the length and composition dependence of global alignment scores, Z-score is computed with a Monte-Carlo algorithm. This technique requires a great number of sequence alignments on shuffled sequences, leading to a high computational cost. In this work, a normalized global alignment score is introduced in order to correct the length dependence of global alignments. This score is defined as the best ratio between the score of an alignment and its length, and an algorithm to compute it based on fractional programming is implemented. The properties and effectiveness of normalized global alignment applied to protein comparison are analyzed. Experiments with proteins selected from the SCOP ASTRAL database were run to study relationship of normalized global alignment with Z-score and performance in homologous detection. Results show that normalized global alignment has a computational cost equivalent to 2.5 Needleman-Wunsch runs and a linear relationship with Z-score. This linearity allows us to use normalized global alignment as a cheap substitute to a computationally expensive Z-score. Experiments show that normalized global alignment improves the ability to identify homologous proteins. Software used to compute normalized global alignments is available from http://www3.uji.es/∼peris/nga.",2011-09-21 +23419168,CD44/CD24 immunophenotypes on clinicopathologic features of salivary glands malignant neoplasms.,"

Background

Salivary glands malignant neoplasms (SGMNs) account for 3-6% of head and neck cancers and 0.3% of all cancers. Tumor cells that express CD44 and CD24 exhibit a stem-cell-like behavior. CD44 is the binding site for hyaluronic acid, and CD24 is a receptor that interacts with P-selectin to induce metastasis and tumor progression. The present study aims to evaluate the expression of CD44 and CD24 on SGMNs and correlated these data with several clinicopathologic features.

Methods

Immunohistochemical stains for CD44 and CD24 were performed on tissue microarrays containing SGMN samples from 69 patients. The CD44, CD24 and CD44/CD24 expression phenotypes were correlated to patient clinicopathologic features and outcome.

Results

CD44 expression was associated with the primary site of neoplasm (p = 0.046). CD24 was associated with clinical stage III/IV (p = 0.008), T stage (p = 0,27) and lymph node (p = 0,001). The CD44/CD24 profiles were associated with the primary site of injury (p = 0.005), lymph node (p = 0.011) and T stage (p = 0.023). Univariate analysis showed a significant relationship between clinical staging and disease- free survival (p = 0.009), and the overall survival presents relation with male gender (p = 0.011) and metastasis (p = 0.027).

Conclusion

In summary, our investigation confirms that the clinical stage, in accordance with the literature, is the main prognostic factor for SGMN. Additionally, we have presented some evidence that the analysis of isolated CD44 and CD24 immunoexpression or the two combined markers could give prognostic information associated to clinicopathologic features in SGMN.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1284611098470676.",2013-02-18 +22185628,An expression map for Anopheles gambiae.,"

Background

Quantitative transcriptome data for the malaria-transmitting mosquito Anopheles gambiae covers a broad range of biological and experimental conditions, including development, blood feeding and infection. Web-based summaries of differential expression for individual genes with respect to these conditions are a useful tool for the biologist, but they lack the context that a visualisation of all genes with respect to all conditions would give. For most organisms, including A. gambiae, such a systems-level view of gene expression is not yet available.

Results

We have clustered microarray-based gene-averaged expression values, available from VectorBase, for 10194 genes over 93 experimental conditions using a self-organizing map. Map regions corresponding to known biological events, such as egg production, are revealed. Many individual gene clusters (nodes) on the map are highly enriched in biological and molecular functions, such as protein synthesis, protein degradation and DNA replication. Gene families, such as odorant binding proteins, can be classified into distinct functional groups based on their expression and evolutionary history. Immunity-related genes are non-randomly distributed in several distinct regions on the map, and are generally distant from genes with house-keeping roles. Each immunity-rich region appears to represent a distinct biological context for pathogen recognition and clearance (e.g. the humoral and gut epithelial responses). Several immunity gene families, such as peptidoglycan recognition proteins (PGRPs) and defensins, appear to be specialised for these distinct roles, while three genes with physically interacting protein products (LRIM1/APL1C/TEP1) are found in close proximity.

Conclusions

The map provides the first genome-scale, multi-experiment overview of gene expression in A. gambiae and should also be useful at the gene-level for investigating potential interactions. A web interface is available through the VectorBase website http://www.vectorbase.org/. It is regularly updated as new experimental data becomes available.",2011-12-20 +22649057,idTarget: a web server for identifying protein targets of small chemical molecules with robust scoring functions and a divide-and-conquer docking approach.,"Identification of possible protein targets of small chemical molecules is an important step for unravelling their underlying causes of actions at the molecular level. To this end, we construct a web server, idTarget, which can predict possible binding targets of a small chemical molecule via a divide-and-conquer docking approach, in combination with our recently developed scoring functions based on robust regression analysis and quantum chemical charge models. Affinity profiles of the protein targets are used to provide the confidence levels of prediction. The divide-and-conquer docking approach uses adaptively constructed small overlapping grids to constrain the searching space, thereby achieving better docking efficiency. Unlike previous approaches that screen against a specific class of targets or a limited number of targets, idTarget screen against nearly all protein structures deposited in the Protein Data Bank (PDB). We show that idTarget is able to reproduce known off-targets of drugs or drug-like compounds, and the suggested new targets could be prioritized for further investigation. idTarget is freely available as a web-based server at http://idtarget.rcas.sinica.edu.tw.",2012-05-30 +22927229,3Drefine: consistent protein structure refinement by optimizing hydrogen bonding network and atomic-level energy minimization.,"One of the major limitations of computational protein structure prediction is the deviation of predicted models from their experimentally derived true, native structures. The limitations often hinder the possibility of applying computational protein structure prediction methods in biochemical assignment and drug design that are very sensitive to structural details. Refinement of these low-resolution predicted models to high-resolution structures close to the native state, however, has proven to be extremely challenging. Thus, protein structure refinement remains a largely unsolved problem. Critical assessment of techniques for protein structure prediction (CASP) specifically indicated that most predictors participating in the refinement category still did not consistently improve model quality. Here, we propose a two-step refinement protocol, called 3Drefine, to consistently bring the initial model closer to the native structure. The first step is based on optimization of hydrogen bonding (HB) network and the second step applies atomic-level energy minimization on the optimized model using a composite physics and knowledge-based force fields. The approach has been evaluated on the CASP benchmark data and it exhibits consistent improvement over the initial structure in both global and local structural quality measures. 3Drefine method is also computationally inexpensive, consuming only few minutes of CPU time to refine a protein of typical length (300 residues). 3Drefine web server is freely available at http://sysbio.rnet.missouri.edu/3Drefine/.",2012-09-26 +22053076,deepBlockAlign: a tool for aligning RNA-seq profiles of read block patterns.,"

Motivation

High-throughput sequencing methods allow whole transcriptomes to be sequenced fast and cost-effectively. Short RNA sequencing provides not only quantitative expression data but also an opportunity to identify novel coding and non-coding RNAs. Many long transcripts undergo post-transcriptional processing that generates short RNA sequence fragments. Mapped back to a reference genome, they form distinctive patterns that convey information on both the structure of the parent transcript and the modalities of its processing. The miR-miR* pattern from microRNA precursors is the best-known, but by no means singular, example.

Results

deepBlockAlign introduces a two-step approach to align RNA-seq read patterns with the aim of quickly identifying RNAs that share similar processing footprints. Overlapping mapped reads are first merged to blocks and then closely spaced blocks are combined to block groups, each representing a locus of expression. In order to compare block groups, the constituent blocks are first compared using a modified sequence alignment algorithm to determine similarity scores for pairs of blocks. In the second stage, block patterns are compared by means of a modified Sankoff algorithm that takes both block similarities and similarities of pattern of distances within the block groups into account. Hierarchical clustering of block groups clearly separates most miRNA and tRNA, and also identifies about a dozen tRNAs clustering together with miRNA. Most of these putative Dicer-processed tRNAs, including eight cases reported to generate products with miRNA-like features in literature, exhibit read blocks distinguished by precise start position of reads.

Availability

The program deepBlockAlign is available as source code from http://rth.dk/resources/dba/.

Contact

gorodkin@rth.dk; studla@bioinf.uni-leipzig.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-03 +21949676,MACSE: Multiple Alignment of Coding SEquences accounting for frameshifts and stop codons.,"Until now the most efficient solution to align nucleotide sequences containing open reading frames was to use indirect procedures that align amino acid translation before reporting the inferred gap positions at the codon level. There are two important pitfalls with this approach. Firstly, any premature stop codon impedes using such a strategy. Secondly, each sequence is translated with the same reading frame from beginning to end, so that the presence of a single additional nucleotide leads to both aberrant translation and alignment.We present an algorithm that has the same space and time complexity as the classical Needleman-Wunsch algorithm while accommodating sequencing errors and other biological deviations from the coding frame. The resulting pairwise coding sequence alignment method was extended to a multiple sequence alignment (MSA) algorithm implemented in a program called MACSE (Multiple Alignment of Coding SEquences accounting for frameshifts and stop codons). MACSE is the first automatic solution to align protein-coding gene datasets containing non-functional sequences (pseudogenes) without disrupting the underlying codon structure. It has also proved useful in detecting undocumented frameshifts in public database sequences and in aligning next-generation sequencing reads/contigs against a reference coding sequence.MACSE is distributed as an open-source java file executable with freely available source code and can be used via a web interface at: http://mbb.univ-montp2.fr/macse.",2011-09-16 +21920017,Balancing donor and recipient risk factors in liver transplantation: the value of D-MELD with particular reference to HCV recipients.,"Donor-recipient match is a matter of debate in liver transplantation. D-MELD (donor age × recipient biochemical model for end-stage liver disease [MELD]) and other factors were analyzed on a national Italian database recording 5946 liver transplants. Primary endpoint was to determine factors predictive of 3-year patient survival. D-MELD cutoff predictive of 5-year patient survival <50% (5yrsPS<50%) was investigated. A prognosis calculator was implemented (http://www.D-MELD.com). Differences among D-MELD deciles allowed their regrouping into three D-MELD classes (A < 338, B 338-1628, C >1628). At 3 years, the odds ratio (OR) for death was 2.03 (95% confidence interval [CI], 1.44-2.85) in D-MELD class C versus B. The OR was 0.40 (95% CI, 0.24-0.66) in class A versus class B. Other predictors were hepatitis C virus (HCV; OR = 1.42; 95% CI, 1.11-1.81), hepatitis B virus (HBV; OR = 0.69; 95% CI, 0.51-0.93), retransplant (OR = 1.82; 95% CI, 1.16-2.87) and low-volume center (OR = 1.48; 95% CI, 1.11-1.99). Cox regressions up to 90 months confirmed results. The hazard ratio was 1.97 (95% CI, 1.59-2.43) for D-MELD class C versus class B and 0.42 (95% CI, 0.29-0.60) for D-MELD class A versus class B. Recipient age, HCV, HBV and retransplant were also significant. The 5yrsPS<50% cutoff was identified only in HCV patients (D-MELD ≥ 1750). The innovative approach offered by D-MELD and covariates is helpful in predicting outcome after liver transplantation, especially in HCV recipients.",2011-09-15 +21793105,DYT6 dystonia: review of the literature and creation of the UMD Locus-Specific Database (LSDB) for mutations in the THAP1 gene.,"By family-based screening, first Fuchs and then many other authors showed that mutations in THAP1 (THAP [thanatos-associated protein] domain-containing, apoptosis-associated protein 1) account for a substantial proportion of familial, early-onset, nonfocal, primary dystonia cases (DYT6 dystonia). THAP1 is the first transcriptional factor involved in primary dystonia and the hypothesis of a transcriptional deregulation, which was primarily proposed for the X-linked dystonia-parkinsonism (DYT3 dystonia), provided thus a new way to investigate the possible mechanism underlying the development of dystonic movements. Currently, 56 families present with a THAP1 mutation; however, no genotype/phenotype relationship has been found. Therefore, we carried out a systematic review of the literature on the THAP1 gene to colligate all reported patients with a specific THAP1 mutation and the associated clinical signs in order to describe the broad phenotypic continuum of this disorder. To facilitate the comparison of the identified mutations, we created a Locus-Specific Database (UMD-THAP1 LSDB) available at http://www.umd.be/THAP1/. Currently, the database lists 56 probands and 43 relatives with the associated clinical phenotype when available. The identification of a larger number of THAP1 mutations and collection of high-quality clinical information for each described mutation through international collaborative effort will help investigating the structure-function and genotype-phenotype correlations in DYT6 dystonia.",2011-09-15 +21303543,An EST-based analysis identifies new genes and reveals distinctive gene expression features of Coffea arabica and Coffea canephora.,"

Background

Coffee is one of the world's most important crops; it is consumed worldwide and plays a significant role in the economy of producing countries. Coffea arabica and C. canephora are responsible for 70 and 30% of commercial production, respectively. C. arabica is an allotetraploid from a recent hybridization of the diploid species, C. canephora and C. eugenioides. C. arabica has lower genetic diversity and results in a higher quality beverage than C. canephora. Research initiatives have been launched to produce genomic and transcriptomic data about Coffea spp. as a strategy to improve breeding efficiency.

Results

Assembling the expressed sequence tags (ESTs) of C. arabica and C. canephora produced by the Brazilian Coffee Genome Project and the Nestlé-Cornell Consortium revealed 32,007 clusters of C. arabica and 16,665 clusters of C. canephora. We detected different GC3 profiles between these species that are related to their genome structure and mating system. BLAST analysis revealed similarities between coffee and grape (Vitis vinifera) genes. Using KA/KS analysis, we identified coffee genes under purifying and positive selection. Protein domain and gene ontology analyses suggested differences between Coffea spp. data, mainly in relation to complex sugar synthases and nucleotide binding proteins. OrthoMCL was used to identify specific and prevalent coffee protein families when compared to five other plant species. Among the interesting families annotated are new cystatins, glycine-rich proteins and RALF-like peptides. Hierarchical clustering was used to independently group C. arabica and C. canephora expression clusters according to expression data extracted from EST libraries, resulting in the identification of differentially expressed genes. Based on these results, we emphasize gene annotation and discuss plant defenses, abiotic stress and cup quality-related functional categories.

Conclusion

We present the first comprehensive genome-wide transcript profile study of C. arabica and C. canephora, which can be freely assessed by the scientific community at http://www.lge.ibi.unicamp.br/coffea. Our data reveal the presence of species-specific/prevalent genes in coffee that may help to explain particular characteristics of these two crops. The identification of differentially expressed transcripts offers a starting point for the correlation between gene expression profiles and Coffea spp. developmental traits, providing valuable insights for coffee breeding and biotechnology, especially concerning sugar metabolism and stress tolerance.",2011-02-08 +21575263,CASCADE_SCAN: mining signal transduction network from high-throughput data based on steepest descent method.,"

Background

Signal transduction is an essential biological process involved in cell response to environment changes, by which extracellular signaling initiates intracellular signaling. Many computational methods have been generated in mining signal transduction networks with the increasing of high-throughput genomic and proteomic data. However, more effective means are still needed to understand the complex mechanisms of signaling pathways.

Results

We propose a new approach, namely CASCADE_SCAN, for mining signal transduction networks from high-throughput data based on the steepest descent method using indirect protein-protein interactions (PPIs). This method is useful for actual biological application since the given proteins utilized are no longer confined to membrane receptors or transcription factors as in existing methods. The precision and recall values of CASCADE_SCAN are comparable with those of other existing methods. Moreover, functional enrichment analysis of the network components supported the reliability of the results.

Conclusions

CASCADE_SCAN is a more suitable method than existing methods for detecting underlying signaling pathways where the membrane receptors or transcription factors are unknown, providing significant insight into the mechanism of cellular signaling in growth, development and cancer. A new tool based on this method is freely available at http://www.genomescience.com.cn/CASCADE_SCAN/.",2011-05-17 +23276152,Relationship of red splenic arteriolar hyaline with rapid death: a clinicopathological study of 82 autopsy cases.,"

Background

Little is known about the relationship between splenic arteriolar hyaline and cause of death. The purpose of this retrospective study was to evaluate the clinicopathological significance of splenic arteriolar hyaline in autopsy cases and estimate the applicability of hyaline for diagnosing the cause and rapidity of death.

Methods

Archival data and histological slides from 82 cases were reviewed retrospectively. One section of each spleen was evaluated microscopically. The tinctorial pattern of splenic arteriolar hyaline was examined with Heidenhain's Azan trichrome stain, and the relationships between this pattern and age, cause of death, and rapidity of death were investigated.

Results

Fifty-four cases demonstrated hyaline change, with 3 different tinctorial patterns: red, blue, and a combination of red and blue. The 3 patterns coexisted in various proportions in each tissue section. Frequency of the blue pattern increased with age (P < 0.01) and was unrelated to cause of death. By contrast, the red pattern was unrelated to age and appeared with different frequency according to cause of death. The red pattern appeared with significantly higher frequency in the circulatory disease group and the drowning and asphyxia group (both P < 0.01). Moreover, the presence of the red pattern had high specificity for the detection of rapidly fatal cases. The combination of the 2 colors was excluded from clinicopathological analyses due to its admixed nature.

Conclusions

Estimation of splenic arteriolar hyaline with Heidenhain's Azan trichrome stain is useful for assessment of the cause and rapidity of death.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1132441651796836.",2012-12-31 +21917859,GETPrime: a gene- or transcript-specific primer database for quantitative real-time PCR.,"The vast majority of genes in humans and other organisms undergo alternative splicing, yet the biological function of splice variants is still very poorly understood in large part because of the lack of simple tools that can map the expression profiles and patterns of these variants with high sensitivity. High-throughput quantitative real-time polymerase chain reaction (qPCR) is an ideal technique to accurately quantify nucleic acid sequences including splice variants. However, currently available primer design programs do not distinguish between splice variants and also differ substantially in overall quality, functionality or throughput mode. Here, we present GETPrime, a primer database supported by a novel platform that uniquely combines and automates several features critical for optimal qPCR primer design. These include the consideration of all gene splice variants to enable either gene-specific (covering the majority of splice variants) or transcript-specific (covering one splice variant) expression profiling, primer specificity validation, automated best primer pair selection according to strict criteria and graphical visualization of the latter primer pairs within their genomic context. GETPrime primers have been extensively validated experimentally, demonstrating high transcript specificity in complex samples. Thus, the free-access, user-friendly GETPrime database allows fast primer retrieval and visualization for genes or groups of genes of most common model organisms, and is available at http://updepla1srv1.epfl.ch/getprime/. Database URL: http://deplanckelab.epfl.ch.",2011-09-14 +23630338,Quantitative properties and receptor reserve of the DAG and PKC branch of G(q)-coupled receptor signaling.,"Gq protein-coupled receptors (GqPCRs) of the plasma membrane activate the phospholipase C (PLC) signaling cascade. PLC cleaves the membrane lipid phosphatidylinositol 4,5-bisphosphate (PIP2) into the second messengers diacylgycerol (DAG) and inositol 1,4,5-trisphosphate (IP3), leading to calcium release, protein kinase C (PKC) activation, and in some cases, PIP2 depletion. We determine the kinetics of each of these downstream endpoints and also ask which is responsible for the inhibition of KCNQ2/3 (KV7.2/7.3) potassium channels in single living tsA-201 cells. We measure DAG production and PKC activity by Förster resonance energy transfer-based sensors, and PIP2 by KCNQ2/3 channels. Fully activating endogenous purinergic receptors by uridine 5'triphosphate (UTP) leads to calcium release, DAG production, and PKC activation, but no net PIP2 depletion. Fully activating high-density transfected muscarinic receptors (M1Rs) by oxotremorine-M (Oxo-M) leads to similar calcium, DAG, and PKC signals, but PIP2 is depleted. KCNQ2/3 channels are inhibited by the Oxo-M treatment (85%) and not by UTP (<1%), indicating that depletion of PIP2 is required to inhibit KCNQ2/3 in response to receptor activation. Overexpression of A kinase-anchoring protein (AKAP)79 or calmodulin (CaM) does not increase KCNQ2/3 inhibition by UTP. From these results and measurements of IP3 and calcium presented in our companion paper (Dickson et al. 2013. J. Gen. Physiol. http://dx.doi.org/10.1085/jgp.201210886), we extend our kinetic model for signaling from M1Rs to DAG/PKC and IP3/calcium signaling. We conclude that calcium/CaM and PKC-mediated phosphorylation do not underlie dynamic KCNQ2/3 channel inhibition during GqPCR activation in tsA-201 cells. Finally, our experimental data provide indirect evidence for cleavage of PI(4)P by PLC in living cells, and our modeling revisits/explains the concept of receptor reserve with measurements from all steps of GqPCR signaling.",2013-05-01 +23631806,Upregulation of microRNA-224 is associated with aggressive progression and poor prognosis in human cervical cancer.,"

Objective

Accumulating evidence for differential expression of microRNA-224 (miR-224) in various types of human cancer suggests that it may be play a crucial role in tumor biology. The previous microarray detection also shown that miR-224 was one of miRNAs with significant upregulation in cervical cancer tissues relative to adjacent normal tissues. However, little is known about the function of miR-224 in human cervical cancer. The aim of this study was to investigate the clinical significance of miR-224 expression in cervical cancer.

Methods

MiR-224 expression in 126 pairs of fresh human cervical cancer and adjacent normal tissues was measured by real-time quantitative RT-PCR assay.

Results

miR-224 expression was significantly upregulated in cervical cancer tissues when compared with corresponding adjacent normal tissues (P<0.001). It was also significantly higher in the cancerous tissues of patients with advanced FIGO stage cervical cancer than those with early FIGO stage (P=0.02). In addition, miR-224 was expressed at significantly higher levels in lymph node metastasis-positive patients than in lymph node metastasis-negative patients (P=0.008). Moreover, we found that lesser differentiated tumors expressed higher miR-224 (P=0.03). Finally, there were sufficient evidence to confirm its value in the status of vascular invasion (P=0.01) and human papillomavirus (HPV) infection (P=0.02) in cervical cancer. More importantly, Kaplan-Meier analysis showed that cervical cancer patients with high miR-224 expression tend to have shorter overall survival. In multivariate analysis stratified for known prognostic variables, miR-224 was identified as an independent prognostic marker.

Conclusion

Our data indicated that miR-224 upregulation was associated with aggressive progression and poor prognosis in cervical cancer. MiR-224 was identified for the first time as an independent marker for predicting the clinical outcome of cervical cancer patients.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2170449349527493.",2013-04-30 +21952778,ExMS: data analysis for HX-MS experiments.,"A previous paper considered the problems that presently limit the hydrogen exchange-mass spectrometry (HX-MS) method for studying the biophysical and functional properties of proteins. Many of these problems can be overcome by obtaining and analyzing hundreds of sequentially overlapping peptide fragments that cover the protein many times over (Mayne et al. J. Am. Soc. Mass Spectrom. 2011: 10.1007/s13361-011-0235-4). This paper describes a computer program called ExMS that furthers this advance by making it possible to efficiently process crowded mass spectra and definitively identify and characterize these many peptide fragments. ExMS automatically scans through high resolution MS data to find the individual isotopic peaks and isotopic envelopes of a list of peptides previously identified by MS/MS. It performs a number of tests to ensure correct identification in spite of peptide overlap in both chromatographic and mass spectrometric dimensions and possible multi-modal envelopes due to static or dynamic structural heterogeneity or HX EX1 behavior. The program can automatically process data from many sequential HX time points with no operator intervention at the rate of ~2 sec per peptide per HX time point using desktop computer equipment, but it also provides for rapid manual checking and decision when ambiguity exists. Additional subroutines can provide a step by step report of performance at each test along the way and parameter adjustment, deconvolute isotopic envelopes, and plot the time course of single and multi-modal H-D exchange. The program will be available on an open source basis at: http://HX2.med.upenn.edu/download.html.",2011-09-15 +21325299,Identifying dispersed epigenomic domains from ChIP-Seq data.,"

Motivation

Post-translational modifications to histones have several well known associations with regulation of gene expression. While some modifications appear concentrated narrowly, covering promoters or enhancers, others are dispersed as epigenomic domains. These domains mark contiguous regions sharing an epigenomic property, such as actively transcribed or poised genes, or heterochromatically silenced regions. While high-throughput methods like ChIP-Seq have led to a flood of high-quality data about these epigenomic domains, there remain important analysis problems that are not adequately solved by current analysis tools.

Results

We present the RSEG method for identifying epigenomic domains from ChIP-Seq data for histone modifications. In contrast with other methods emphasizing the locations of 'peaks' in read density profiles, our method identifies the boundaries of domains. RSEG is also able to incorporate a control sample and find genomic regions with differential histone modifications between two samples.

Availability

RSEG, including source code and documentation, is freely available at http://smithlab.cmb.usc.edu/histone/rseg/.",2011-02-16 +22515686,SNPpath: characterizing cattle SNPs by enriched pathway terms.,"High-density single nucleotide polymorphism (SNP) microarrays have made large-scale genome-wide association studies (GWAS) and genomic selection (GS) feasible. Valuable insight into the genetic basis underlying complex polygenic traits will likely be gained by considering functionally related sets of genes simultaneously. SNPpath, a suite of computer-generated imagery-based web servers has been developed to automatically annotate and characterize cattle SNPs by enriched KEGG (Kyoto Encyclopedia of Genes and Genomes) pathway terms. The SNPpath allows users to navigate and analysis large SNP sets and is the only web server currently providing pathway annotations of cattle SNPs in National Center for Biotechnology Information's dbSNP database and three commercial platforms. Hence, we describe SNPpath and provide details of the query options, as well as biological examples of use. The SNPpath may be favorable for the analysis of combining SNP association analysis with pathway-driven gene set enrichment analysis and is freely available at http://klab.sjtu.edu.cn/SNPpath.",2011-09-12 +22621612,C-GATE - catalogue of genes affected by transposable elements.,"

Background

Functional regulatory sequences are present in many transposable element (TE) copies, resulting in TEs being frequently exapted by host genes. Today, many examples of TEs impacting host gene expression can be found in the literature and we believe a new catalogue of such exaptations would be useful for the field.

Findings

We have established the catalogue of genes affected by transposable elements (C-GATE), which can be found at https://sites.google.com/site/tecatalog/. To date, it holds 221 cases of biologically verified TE exaptations and more than 10,000 in silico TE-gene partnerships. C-GATE is interactive and allows users to include missed or new TE exaptation data. C-GATE provides a graphic representation of the entire library, which may be used for future statistical analysis of TE impact on host gene expression.

Conclusions

We hope C-GATE will be valuable for the TE community but also for others who have realized the role that TEs may have in their research.",2012-05-23 +23555212,Network-based survival analysis reveals subnetwork signatures for predicting outcomes of ovarian cancer treatment.,"Cox regression is commonly used to predict the outcome by the time to an event of interest and in addition, identify relevant features for survival analysis in cancer genomics. Due to the high-dimensionality of high-throughput genomic data, existing Cox models trained on any particular dataset usually generalize poorly to other independent datasets. In this paper, we propose a network-based Cox regression model called Net-Cox and applied Net-Cox for a large-scale survival analysis across multiple ovarian cancer datasets. Net-Cox integrates gene network information into the Cox's proportional hazard model to explore the co-expression or functional relation among high-dimensional gene expression features in the gene network. Net-Cox was applied to analyze three independent gene expression datasets including the TCGA ovarian cancer dataset and two other public ovarian cancer datasets. Net-Cox with the network information from gene co-expression or functional relations identified highly consistent signature genes across the three datasets, and because of the better generalization across the datasets, Net-Cox also consistently improved the accuracy of survival prediction over the Cox models regularized by L(2) or L(1). This study focused on analyzing the death and recurrence outcomes in the treatment of ovarian carcinoma to identify signature genes that can more reliably predict the events. The signature genes comprise dense protein-protein interaction subnetworks, enriched by extracellular matrix receptors and modulators or by nuclear signaling components downstream of extracellular signal-regulated kinases. In the laboratory validation of the signature genes, a tumor array experiment by protein staining on an independent patient cohort from Mayo Clinic showed that the protein expression of the signature gene FBN1 is a biomarker significantly associated with the early recurrence after 12 months of the treatment in the ovarian cancer patients who are initially sensitive to chemotherapy. Net-Cox toolbox is available at http://compbio.cs.umn.edu/Net-Cox/.",2013-03-21 +22419781,IsoCor: correcting MS data in isotope labeling experiments.,"

Unlabelled

Mass spectrometry (MS) is widely used for isotopic labeling studies of metabolism and other biological processes. Quantitative applications-e.g. metabolic flux analysis-require tools to correct the raw MS data for the contribution of all naturally abundant isotopes. IsoCor is a software that allows such correction to be applied to any chemical species. Hence it can be used to exploit any isotopic tracer, from well-known ((13)C, (15)N, (18)O, etc) to unusual ((57)Fe, (77)Se, etc) isotopes. It also provides new features-e.g. correction for the isotopic purity of the tracer-to improve the accuracy of quantitative isotopic studies, and implements an efficient algorithm to process large datasets. Its user-friendly interface makes isotope labeling experiments more accessible to a wider biological community.

Availability

IsoCor is distributed under OpenSource license at http://metasys.insa-toulouse.fr/software/isocor/",2012-03-13 +21325300,Automated workflows for accurate mass-based putative metabolite identification in LC/MS-derived metabolomic datasets.,"

Motivation

The study of metabolites (metabolomics) is increasingly being applied to investigate microbial, plant, environmental and mammalian systems. One of the limiting factors is that of chemically identifying metabolites from mass spectrometric signals present in complex datasets.

Results

Three workflows have been developed to allow for the rapid, automated and high-throughput annotation and putative metabolite identification of electrospray LC-MS-derived metabolomic datasets. The collection of workflows are defined as PUTMEDID_LCMS and perform feature annotation, matching of accurate m/z to the accurate mass of neutral molecules and associated molecular formula and matching of the molecular formulae to a reference file of metabolites. The software is independent of the instrument and data pre-processing applied. The number of false positives is reduced by eliminating the inaccurate matching of many artifact, isotope, multiply charged and complex adduct peaks through complex interrogation of experimental data.

Availability

The workflows, standard operating procedure and further information are publicly available at http://www.mcisb.org/resources/putmedid.html.

Contact

warwick.dunn@manchester.ac.uk.",2011-02-16 +21938213,DDTRP: Database of Drug Targets for Resistant Pathogens.,"Emergence of drug resistance is a major threat to public health. Many pathogens have developed resistance to most of the existing antibiotics, and multidrug-resistant and extensively drug resistant strains are extremely difficult to treat. This has resulted in an urgent need for novel drugs. We describe a database called 'Database of Drug Targets for Resistant Pathogens' (DDTRP). The database contains information on drugs with reported resistance, their respective targets, metabolic pathways involving these targets, and a list of potential alternate targets for seven pathogens. The database can be accessed freely at http://bmi.icmr.org.in/DDTRP.",2011-09-06 +21938212,RiDs db: Repeats in diseases database.,"

Unlabelled

The non-coding fraction of the human genome, which is approximately 98%, is mainly constituted by repeats. Transpositions, expansions and deletions of these repeat elements contribute to a number of diseases. None of the available databases consolidates information on both tandem and interspersed repeats with the flexibility of FASTA based homology search with reference to disease genes. Repeats in diseases database (RiDs db) is a web accessible relational database, which aids analysis of repeats associated with Mendelian disorders. It is a repository of disease genes, which can be searched by FASTA program or by limitedor free- text keywords. Unlike other databases, RiDs db contains the sequences of these genes with access to corresponding information on both interspersed and tandem repeats contained within them, on a unified platform. Comparative analysis of novel or patient sequences with the reference sequences in RiDs db using FASTA search will indicate change in structure of repeats, if any, with a particular disorder. This database also provides links to orthologs in model organisms such as zebrafish, mouse and Drosophila.

Availability

The database is available for free at http://115.111.90.196/ridsdb/index.php.",2011-09-06 +22948723,Parameter estimation and quantitative parametric linkage analysis with GENEHUNTER-QMOD.,"

Objective

We present a parametric method for linkage analysis of quantitative phenotypes. The method provides a test for linkage as well as an estimate of different phenotype parameters. We have implemented our new method in the program GENEHUNTER-QMOD and evaluated its properties by performing simulations.

Methods

The phenotype is modeled as a normally distributed variable, with a separate distribution for each genotype. Parameter estimates are obtained by maximizing the LOD score over the normal distribution parameters with a gradient-based optimization called PGRAD method.

Results

The PGRAD method has lower power to detect linkage than the variance components analysis (VCA) in case of a normal distribution and small pedigrees. However, it outperforms the VCA and Haseman-Elston regression for extended pedigrees, nonrandomly ascertained data and non-normally distributed phenotypes. Here, the higher power even goes along with conservativeness, while the VCA has an inflated type I error. Parameter estimation tends to underestimate residual variances but performs better for expectation values of the phenotype distributions.

Conclusion

With GENEHUNTER-QMOD, a powerful new tool is provided to explicitly model quantitative phenotypes in the context of linkage analysis. It is freely available at http://www.helmholtz-muenchen.de/genepi/downloads.",2012-08-19 +22551170,Wrapper-based selection of genetic features in genome-wide association studies through fast matrix operations.,"

Background

Through the wealth of information contained within them, genome-wide association studies (GWAS) have the potential to provide researchers with a systematic means of associating genetic variants with a wide variety of disease phenotypes. Due to the limitations of approaches that have analyzed single variants one at a time, it has been proposed that the genetic basis of these disorders could be determined through detailed analysis of the genetic variants themselves and in conjunction with one another. The construction of models that account for these subsets of variants requires methodologies that generate predictions based on the total risk of a particular group of polymorphisms. However, due to the excessive number of variants, constructing these types of models has so far been computationally infeasible.

Results

We have implemented an algorithm, known as greedy RLS, that we use to perform the first known wrapper-based feature selection on the genome-wide level. The running time of greedy RLS grows linearly in the number of training examples, the number of features in the original data set, and the number of selected features. This speed is achieved through computational short-cuts based on matrix calculus. Since the memory consumption in present-day computers can form an even tighter bottleneck than running time, we also developed a space efficient variation of greedy RLS which trades running time for memory. These approaches are then compared to traditional wrapper-based feature selection implementations based on support vector machines (SVM) to reveal the relative speed-up and to assess the feasibility of the new algorithm. As a proof of concept, we apply greedy RLS to the Hypertension - UK National Blood Service WTCCC dataset and select the most predictive variants using 3-fold external cross-validation in less than 26 minutes on a high-end desktop. On this dataset, we also show that greedy RLS has a better classification performance on independent test data than a classifier trained using features selected by a statistical p-value-based filter, which is currently the most popular approach for constructing predictive models in GWAS.

Conclusions

Greedy RLS is the first known implementation of a machine learning based method with the capability to conduct a wrapper-based feature selection on an entire GWAS containing several thousand examples and over 400,000 variants. In our experiments, greedy RLS selected a highly predictive subset of genetic variants in a fraction of the time spent by wrapper-based selection methods used together with SVM classifiers. The proposed algorithms are freely available as part of the RLScore software library at http://users.utu.fi/aatapa/RLScore/.",2012-05-02 +24451270,CloVR-ITS: Automated internal transcribed spacer amplicon sequence analysis pipeline for the characterization of fungal microbiota.,"

Background

Besides the development of comprehensive tools for high-throughput 16S ribosomal RNA amplicon sequence analysis, there exists a growing need for protocols emphasizing alternative phylogenetic markers such as those representing eukaryotic organisms.

Results

Here we introduce CloVR-ITS, an automated pipeline for comparative analysis of internal transcribed spacer (ITS) pyrosequences amplified from metagenomic DNA isolates and representing fungal species. This pipeline performs a variety of steps similar to those commonly used for 16S rRNA amplicon sequence analysis, including preprocessing for quality, chimera detection, clustering of sequences into operational taxonomic units (OTUs), taxonomic assignment (at class, order, family, genus, and species levels) and statistical analysis of sample groups of interest based on user-provided information. Using ITS amplicon pyrosequencing data from a previous human gastric fluid study, we demonstrate the utility of CloVR-ITS for fungal microbiota analysis and provide runtime and cost examples, including analysis of extremely large datasets on the cloud. We show that the largest fractions of reads from the stomach fluid samples were assigned to Dothideomycetes, Saccharomycetes, Agaricomycetes and Sordariomycetes but that all samples were dominated by sequences that could not be taxonomically classified. Representatives of the Candida genus were identified in all samples, most notably C. quercitrusa, while sequence reads assigned to the Aspergillus genus were only identified in a subset of samples. CloVR-ITS is made available as a pre-installed, automated, and portable software pipeline for cloud-friendly execution as part of the CloVR virtual machine package (http://clovr.org).

Conclusion

The CloVR-ITS pipeline provides fungal microbiota analysis that can be complementary to bacterial 16S rRNA and total metagenome sequence analysis allowing for more comprehensive studies of environmental and host-associated microbial communities.",2013-02-04 +21920474,"A novel polyclonal antibody library for expression profiling of poorly characterized, membrane and secreted human proteins.","The YOMICS™ antibody library (http://www.yomics.com/) presented in this article is a new collection of 1559 murine polyclonal antibodies specific for 1287 distinct human proteins. This antibody library is designed to target marginally characterized membrane-associated and secreted proteins. It was generated against human proteins annotated as transmembrane or secreted in GenBank, EnsEMBL, Vega and Uniprot databases, described in no or very few dedicated PubMed-linked publications. The selected proteins/protein regions were expressed in E. coli, purified and used to raise antibodies in the mouse. The capability of YOMICS™ antibodies to specifically recognize their target proteins either as recombinant form or as expressed in cells and tissues was confirmed through several experimental approaches, including Western blot, confocal microscopy and immunohistochemistry (IHC). Moreover, to show the applicability of the library for biomarker investigation by IHC, five antibodies against proteins either known to be expressed in some cancers or homologous to tumor-associated proteins were tested on tissue microarrays carrying tumor and normal tissues from breast, colon, lung, ovary and prostate. A consistent differential expression in cancer was observed. Our results indicate that the YOMICS™ antibody library is a tool for systematic protein expression profile analysis that nicely complements the already available commercial antibody collections.",2011-09-05 +22522224,Genome structure determination via 3C-based data integration by the Integrative Modeling Platform.,"The three-dimensional (3D) architecture of a genome determines the spatial localization of regulatory elements and the genes they regulate. Thus, elucidating the 3D structure of a genome may result in significant insights about how genes are regulated. The current state-of-the art in experimental methods, including light microscopy and cell/molecular biology, are now able to provide detailed information on the position of genes and their interacting partners. However, such methods by themselves are not able to determine the high-resolution 3D structure of genomes or genomic domains. Here we describe a computational module of the Integrative Modeling Platform (IMP, http://www.integrativemodeling.org) that uses chromosome conformation capture data to determine the 3D architecture of genomic domains and entire genomes at unprecedented resolutions. This approach, through the visualization of looping interactions between distal regulatory elements, allows characterizing global chromatin features and their relation to gene expression. We illustrate our work by outlining the determination of the 3D architecture of the α-globin domain in the human genome.",2012-04-13 +22965123,FadE: whole genome methylation analysis for multiple sequencing platforms.,"DNA methylation plays a central role in genomic regulation and disease. Sodium bisulfite treatment (SBT) causes unmethylated cytosines to be sequenced as thymine, which allows methylation levels to reflected in the number of 'C'-'C' alignments covering reference cytosines. Di-base color reads produced by lifetech's SOLiD sequencer provide unreliable results when translated to bases because single sequencing errors effect the downstream sequence. We describe FadE, an algorithm to accurately determine genome-wide methylation rates directly in color or nucleotide space. FadE uses SBT unmethylated and untreated data to determine background error rates and incorporate them into a model which uses Newton-Raphson optimization to estimate the methylation rate and provide a credible interval describing its distribution at every reference cytosine. We sequenced two slides of human fibroblast cell-line bisulfite-converted fragment library with the SOLiD sequencer to investigate genome-wide methylation levels. FadE reported widespread differences in methylation levels across CpG islands and a large number of differentially methylated regions adjacent to genes which compares favorably to the results of an investigation on the same cell-line using nucleotide-space reads at higher coverage levels, suggesting that FadE is an accurate method to estimate genome-wide methylation with color or nucleotide reads. http://code.google.com/p/fade/.",2012-09-10 +21266083,The maternal and early embryonic transcriptome of the milkweed bug Oncopeltus fasciatus.,"

Background

Most evolutionary developmental biology (""evo-devo"") studies of emerging model organisms focus on small numbers of candidate genes cloned individually using degenerate PCR. However, newly available sequencing technologies such as 454 pyrosequencing have recently begun to allow for massive gene discovery in animals without sequenced genomes. Within insects, although large volumes of sequence data are available for holometabolous insects, developmental studies of basally branching hemimetabolous insects typically suffer from low rates of gene discovery.

Results

We used 454 pyrosequencing to sequence over 500 million bases of cDNA from the ovaries and embryos of the milkweed bug Oncopeltus fasciatus, which lacks a sequenced genome. This indirectly developing insect occupies an important phylogenetic position, branching basal to Diptera (including fruit flies) and Hymenoptera (including honeybees), and is an experimentally tractable model for short-germ development. 2,087,410 reads from both normalized and non-normalized cDNA assembled into 21,097 sequences (isotigs) and 112,531 singletons. The assembled sequences fell into 16,617 unique gene models, and included predictions of splicing isoforms, which we examined experimentally. Discovery of new genes plateaued after assembly of ~1.5 million reads, suggesting that we have sequenced nearly all transcripts present in the cDNA sampled. Many transcripts have been assembled at close to full length, and there is a net gain of sequence data for over half of the pre-existing O. fasciatus accessions for developmental genes in GenBank. We identified 10,775 unique genes, including members of all major conserved metazoan signaling pathways and genes involved in several major categories of early developmental processes. We also specifically address the effects of cDNA normalization on gene discovery in de novo transcriptome analyses.

Conclusions

Our sequencing, assembly and annotation framework provide a simple and effective way to achieve high-throughput gene discovery for organisms lacking a sequenced genome. These data will have applications to the study of the evolution of arthropod genes and genetic pathways, and to the wider evolution, development and genomics communities working with emerging model organisms.[The sequence data from this study have been submitted to GenBank under study accession number SRP002610 (http://www.ncbi.nlm.nih.gov/sra?term=SRP002610). Custom scripts generated are available at http://www.extavourlab.com/protocols/index.html. Seven Additional files are available.].",2011-01-25 +22894735,Histopathological analysis of vesicular and bullous lesions in Kaposi sarcoma.,"

Background

In this study, the clinical and morphological features of vesiculobullous lesions observed in Kaposi sarcoma are analyzed, and the features of bullous Kaposi sarcoma cases are emphasized.

Methods

A total of 178 biopsy materials of 75 cases diagnosed as classic-type cutaneous Kaposi sarcoma were reviewed. Twenty-five cases showing vesiculobullous features were included in the study. Tumor, epidermis, dermis, and clinical data regarding these cases was evaluated.

Results

Vesicular changes were observed in 21 (12%) out of 178 lesions of the 75 cases, while bullous changes were present in only 4 (2%). In all cases where vesicular and bullous changes were detected, tumor, epidermis, and dermis changes were similar. All cases were nodular stage KS lesions, whereas hyperkeratosis and serum exudation in the epidermis, marked edema in the dermis, and enlarged lymphatic vessels and chronic inflammatory response were observed.

Conclusions

Our findings suggest that changes in vascular resistance occurring during tumor progression are the most important factors comprising vesiculobullous morphology.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1646397188748474.",2012-08-15 +23088694,Timing is everything: when to consult palliative care.,"

Purpose

Consults promote additional perspectives and help with complex patient management. As the population ages and healthcare demands increase, providers are consulting palliative care (PC). Nurse practitioners (NPs) should understand when to consult PC.

Data sources

Information was obtained from an extensive search of the scientific literature to include Pallimed (http://www.pallimed.org/) and the author's clinical experience.

Conclusions

Based on the 2009 Clinical Practice Guidelines for Quality PC developed from the Hospice and PC Coalition, PC should be consulted at diagnosis. These findings have also been validated in a landmark randomized controlled trial by Temel et al. (2010). The goals of PC are to alleviate suffering and promote quality of life for people with illnesses. PC accepts and incorporates hospice philosophies, but is distinct.

Implications for practice

Many professional organizations are incorporating PC into their specialties and guidelines. In addition to incorporating PC at diagnosis, PC access needs to be improved. New and experienced NPs may appreciate collaborating with PC specialists. Such conversations and relationships will likely offer practical and supportive guidance to both patients and NPs. Overall, the future for PC is promising.",2012-06-15 +22008641,MrBac: a web server for draft metabolic network reconstructions for bacteria.,"Genome-scale metabolic network reconstruction can be used for simulating cellular behaviors by simultaneously monitoring thousands of biochemical reactions, and is therefore important for systems biology studies in microbes. However, the labor-intensive and time-consuming reconstruction process has hindered the progress of this important field. Here we present a web server, MrBac (Metabolic network Reconstructions for Bacteria), to streamline the network reconstruction process for draft genome-scale metabolic networks and to provide annotation information from multiple databases for further curation of the draft reconstructions. MrBac integrates comparative genomics, retrieval of genome annotations, and generation of standard systems biology file format ready for network analyses. We also used MrBac to automatically generate a draft metabolic model of Salmonella enteric serovar Typhimurium LT2. The high similarity between this automatic model and the experimentally validated models further supports the usefulness and accuracy of MrBac. The high efficiency and accuracy of MrBac may accelerate the advances of systems biology studies on microbiology. MrBac is freely available at http://sb.nhri.org.tw/MrBac.",2011-09-01 +30732027,First Report of Leaf Spot Caused by Bipolaris spicifera on Switchgrass in the United States.,"Light-to-dark brown leaf spots and general chlorosis were observed on 'Alamo' switchgrass (Panicum virgatum L.) grown in ornamental plantings on the campus of the University of Tennessee in Knoxville in December 2007. Disease distribution was patchy, infecting ~10% of plants. Patches had mild to severely infected plants with stunting in areas of severe infection. Symptomatic leaf tissue was surface sterilized, air dried on sterile filter paper, and plated on 2% water agar amended with 10 mg/liter of rifampicin (Sigma-Aldrich, St. Louis, MO) and 10 μl/liter of 2.4 EC Danitol miticide (Valent Chemical, Walnut Creek, CA). Plates were incubated at 26°C in darkness for 5 days. A sporulating, dematiaceous mitosporic fungus was observed and transferred to potato dextrose agar (PDA). Conidiophores were single, light brown, multiseptate, mostly straight, polytretic, geniculate, and sympodial. Conidia were 17.5 × 12 (22) to 30 × 14 (12.5) μm, oval, light brown, and distoseptate, with one to three septa and a flattened hilum on the basal cell. Conidia germinated from both poles. The causal agent was identified as Bipolaris spicifera (Bainier) Subram. Morphological features were as described for B. spicifera (2). Pathogenicity studies were conducted with 5-week-old 'Alamo' switchgrass plants grown from surface-sterilized seed in 9 × 9-cm pots containing 50% ProMix Potting and Seeding Mix (Premier Tech Horticulture, Rivière-du-Loup, Québec, Canada) and 50% Turface ProLeague (Profile Products, Buffalo Grove, IL) (vol/vol). Ten replicate pots with ~20 plants each were sprayed with a spore suspension of 4.5 × 106 spores/ml of sterile water prepared from 6-day-old cultures grown on PDA. Plants were subjected to high humidity for 45 h then incubated at 25/20°C with a 12-h photoperiod in a growth chamber. Leaf spot symptoms similar to the original disease appeared on plants in each of the 10 replicate pots 6 days postinoculation. Lesions were excised from leaves, surface sterilized, plated on water agar, and the resulting cultures were again identified as B. spicifera. The internal transcribed spacer (ITS) region of ribosomal DNA from the original isolate used for inoculation and the reisolated culture recovered from plants in the pathogenicity studies were amplified with PCR using primers ITS4 and ITS5 (3). PCR amplicons of ~560 bp were obtained from both isolates and sequenced. Amplicon sequences were identical and the sequence was submitted to GenBank (Accession No. HQ015445). The DNA sequence had 100% homology to the ITS sequence of B. spicifera strain NRRL 47508 (GenBank Accession No. GU183125.1) that had been isolated from sorghum seed. To our knowledge, leaf spot caused by B. spicifera has not been described on switchgrass (1). B. spicifera can be seedborne and has been reported on turfgrass seed exported from the United States to Korea (2). As switchgrass is transitioned from a prairie grass to a biofuels crop planted in large acreages, disease incidences and severities will likely increase, necessitating rapid disease identification and cost effective management strategies. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 4 August 2010. (2) H.-M. Koo et al. Plant Pathol. J. 19:133, 2003. (3) T. J. White et al. Page 315 in: PCR Protocols: A Guide to Methods and Applications. M. A. Innis et al., eds, Academic Press, San Diego, 1990.",2011-09-01 +30732049,First Report of Spot Blotch and Common Root Rot Caused by Bipolaris sorokiniana on Switchgrass in Tennessee.,"Light-to-dark brown, irregular-shaped leaf spots, chlorosis, necrotic roots, and severe stunting were observed on 'Alamo' switchgrass (Panicum virgatum L.) grown on the campus of the University of Tennessee in December 2007. Symptomatic leaf and root samples were surface sterilized, air dried on sterile filter paper, and plated on 2% water agar amended with 10 mg/liter of rifampicin (Sigma-Aldrich, St. Louis, MO) and 10 μl/liter of 2,4 EC Danitol miticide (Valent Chemical, Walnut Creek, CA). Plates were incubated at 25°C in darkness for 4 days. A sporulating, dematiaceous mitosporic fungus was noted and transferred to potato dextrose agar (PDA). Conidia were ovate, oblong, mostly straight, and olive to brown with three to nine septa. Conidial dimensions were 12.5 × 27.5 (17.5) to 20 × 77.5 (57) μm. Conidia were produced on single, light brown, multiseptate conidiophores that were polytretic, geniculate, and sympodial. Morphological features were as described for Bipolaris sorokiniana (Sacc.) Shoemaker (teleomorph = Cochliobolus sativus) (2,3). Disease assays were conducted with 5-week-old 'Alamo' switchgrass grown from surface-sterilized seed. Ten 9 × 9-cm2 with ~20 switchgrass seedlings were sprayed with 2.4 × 105 spores/ml of sterile water. Plants were subjected to high humidity created by enclosure in a plastic bag for 45 h. The bag was removed and plants were incubated at 25/20°C with 50 to 60% relative humidity. During the incubation, plants were maintained in growth chamber with a 12-h photoperiod of fluorescent and incandescent lighting. Foliar leaf spot symptoms appeared 6 to 10 days postinoculation for plants in all 10 replicates and necrotic lesions were observed on roots. Foliar lesions and diseased roots were surface sterilized, plated on water agar, and resultant fungal colonies were identified as B. sorokiniana. The internal transcribed spacer (ITS) and mitochondrial small subunit (SSU) regions of ribosomal DNA from the original isolate, and the isolate recovered from plants in the pathogenicity assay, were amplified with PCR, with primer pairs ITS4 and ITS5 and NMS1 and NMS2. PCR amplicons of ~551 and 571 bp were obtained with the two primer pairs, respectively. Both amplicons were obtained from both isolates and sequenced. Amplicon sequences from the original isolate and re-isolate were identical and the sequences were submitted to GenBank (Accession Nos. HQ611957 and HQ611958). The ITS sequences had 98% homology to 23 B. sorokiniana isolates, including B. sorokiniana strain DSM 62608 (GenBank Accession No. EF187908); SSU sequences had 99% homology to Cochliobolus sativus isolate AFTOL-ID 271 (GenBank Accession No. FJ190589). Spot blotch caused by B. sorokiniana has been reported on switchgrass in Iowa, Nebraska, Pennsylvania, and Virginia (1). To our knowledge, this is the first report of B. sorokiniana causing spot blotch or common root rot of switchgrass in Tennessee, which extends the current known distribution of these diseases. More recently, we isolated B. sorokiniana from switchgrass seed received from commercial sources in the United States, indicating a seedborne transmission. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 15 November 2010. (2) R. F. Nyvall and J. A. Percich. Plant Dis. 83:936, 1999. (3) A. Sivanesan and P. Holliday. CMI Descr. Pathog. Fungi bact. 71:701, 1981.",2011-09-01 +23748438,NIH consensus development conference: diagnosing gestational diabetes mellitus.,"

Objective

To provide healthcare providers, patients, and the general public with a responsible assessment of currently available data on diagnosing gestational diabetes mellitus (GDM).

Participants

A non-U.S. Department of Health and Human Services, nonadvocate 15-member panel representing the fields of obstetrics and gynecology, maternal-fetal medicine, pediatrics, diabetic research, biostatistics, women's health issues, health services research, decision analysis, health management and policy, health economics, epidemiology, and community engagement. In addition, 16 experts from pertinent fields presented data to the panel and conference audience.

Evidence

Presentations by experts and a systematic review of the literature prepared by the University of Alberta Evidence-based Practice Centre, through the Agency for Healthcare Research and Quality (AHRQ). Scientific evidence was given precedence over anecdotal experience.

Conference process

The panel drafted its statement based on scientific evidence presented in open forum and on published scientific literature. The draft statement was posted at http://prevention.nih.gov/ for public comment and the panel released a final statement approximately 10 weeks later. The final statement is an independent report of the panel and is not a policy statement of the NIH or the Federal Government.

Conclusions

At present, GDM is commonly diagnosed in the United States using a 1-hour screening test with a 50-gram glucose load followed by a 3-hour 100-gram glucose tolerance test (a two-step approach) for those found to be abnormal on the screen. This approach identifies approximately 5% to 6% of the population as having GDM. In contrast, newly proposed diagnostic strategies rely on the administration of a 2-hour glucose tolerance test (a one-step approach) with a fasting component and a 75-gram glucose load. These strategies differ on whether a 1-hour sample is included, whether two abnormal values are required, and the diagnostic cutoffs that are used. The International Association of Diabetes and Pregnancy Study Groups (IADPSG) has proposed diagnostic thresholds based on demonstrated associations between glycemic levels and an increased risk of obstetric and perinatal morbidities. The panel considered whether a one-step approach to the diagnosis of GDM should be adopted in place of the two-step approach. The one-step approach offers certain operational advantages. The current two-step approach is used only during pregnancy and is largely restricted to the United States. There would be value in a consistent, international diagnostic standard across one's lifespan. This unification would allow better standardization of best practices in patient care and comparability of research outcomes. The one-step approach also holds potential advantages for women and their health care providers, as it would allow a diagnosis to be achieved within the context of one visit as opposed to two. However, the one-step approach, as proposed by the IADPSG, is anticipated to increase the frequency of the diagnosis of GDM by twofold to threefold, to a prevalence of approximately 15% to 20%. There are several concerns regarding the diagnosis of GDM in these additional women. It is not well understood whether the additional women identified by this approach will benefit from treatment, and if so, to what extent. Moreover, the care of these women will generate additional direct and indirect health care costs. There is also evidence that the labeling of these women may have unintended consequences, such as an increase in cesarean delivery and more intensive newborn assessments. In addition, increased patient costs, life disruptions, and psychosocial burdens have been identified. Available studies do not provide clear evidence that a one-step approach is cost-effective in comparison with the current two-step approach. After much deliberation, the panel believes that there are clear benefits to international standardization with regard to the one-step approach. Nevertheless, at present, the panel believes that there is not sufficient evidence to adopt a one-step approach. The panel is particularly concerned about the adoption of new criteria that would increase the prevalence of GDM, and the corresponding costs and interventions, without clear demonstration of improvements in the most clinically important health and patient-centered outcomes. Thus, the panel recommends that the two-step approach be continued. However, given the potential benefits of a one-step approach, resolution of the uncertainties associated with its use would warrant revision of this conclusion.",2013-03-06 +30732028,First Report of Fusarium torulosum Causing Dry Rot of Seed Potato Tubers in the United States.,"Fusarium dry rot of potato (Solanum tuberosum L.) is a postharvest disease caused by several Fusarium species and is of worldwide importance. Thirteen species of Fusarium have been implicated in fungal dry rots of potatoes worldwide. Among them, eight species have been reported in the northern United States (2). In Michigan potato production, F. sambucinum was the predominant species reported to be affecting seed potato in storage and causing seed piece decay after planting (3). Some previous identifications of F. sambucinum as dry rot may have been F. torulosum since F. torulosum was previously classified within F. sambucinum (4). To further investigate this, dry rot symptomatic tubers were collected from Michigan seed lots in the summers of 2009 and 2010. Small sections from the margins of necrotic regions were cut with a scalpel, surface sterilized in 0.5% sodium hypochlorite for 10 s, rinsed twice in sterile distilled water, and blotted with sterile filter paper. The tissue pieces were plated on half-strength potato dextrose agar (PDA) amended with 0.5 g/liter of streptomycin sulfate and incubated at 23°C for 5 to 7 days. Cultures resembling Fusarium species were transferred onto water agar, and single hyphal tips from actively growing isolates were removed and plated either on carnation leaf agar (CLA) or on half-strength PDA to generate pure cultures. Among the Fusarium isolates obtained, five isolates were identified as F. torulosum (GenBank Accessions Nos. JF803658-JF803660). Identification was based on colony and conidial morphology on PDA and CLA, respectively. These features included slow growth (2.8 ± 0.2 cm in 5 days), white mycelium that became pigmented with age, narrow concentric rings, red or white pigmentation on agar, macroconidia (32.4 ± 0.4 μm average length) with five septa, a pointed apical cell, and a foot-shaped basal cell (4). The identity was confirmed through DNA extraction followed by amplification and sequencing of the translation elongation factor (EF-1α) gene region (1). The Fusarium-ID.v (1) and the NCBI database were used to obtain the closest match (99%) to previously sequenced materials (GenBank Accession No. AJ543611). Pathogenicity testing was done on disease-free potato tubers cv. Red Norland. Tubers were surface sterilized for 10 min in 0.5% sodium hypochlorite and rinsed twice in distilled water. Three tubers per isolate were injected with 20 μl of a conidial suspension (106 conidia/ml) made from F. torulosum cultures grown on PDA for 7 to 10 days. Control tubers were injected with 20 μl of sterile distilled water. All tubers inoculated with F. torulosum developed typical potato dry rot symptoms consisting of a brown and dry decay. There was no disease incidence on the control tubers. F. torulosum was reisolated from the symptomatic tubers. To our knowledge, this is the first report of F. torulosum causing potato dry rot in the United States. References: (1) D. Geiser et al. Eur. J. Plant Pathol. 110:473, 2004. (2) L. E. Hanson et al. Phytopathology 86:378, 1996. (3) M. L. Lacy and R. Hammerschmidt. Fusarium dry rot. Extension Bulletin. Retrieved from http://web1.msue.msu.edu/msue/iac/onlinepubs/pubs/E/E2448POT , 23 May 2010. (4) J. F. Leslie and B. A. Summerell. The Fusarium Laboratory Manual. Wiley-Blackwell, Hoboken, NJ, 2006.",2011-09-01 +24062097,Binding-sites Prediction Assisting Protein-protein Docking.,"Most biological actions of proteins, including their ability to interact with one another, involve some specific parts of their three-dimensional structure, called binding sites. These have evolved for their ability to bind other molecules effectively and are often conserved in different proteins. Identifying protein-protein binding sites in a protein that is known to interact with other proteins can provide important clues to the function of the protein and can also be used in protein-protein docking studies to reduce the search space explored by docking algorithms. We have developed an algorithm for structural similarity search in a database of non-redundant protein structures to find conserved binding regions on proteins involved in protein-protein interactions. We have used this algorithm to find conserved regions on a protein surface. The structurally conserved residues found were labeled as a protein-protein binding site, which allowed us to tune the AutoDock docking algorithm to predict the native protein complex structure from unbound protein structures. The conservation of protein structures that correctly predicted protein-protein binding site was used in AutoDock program to improve protein-protein docking. A web application based on our method is available at http://probis.cmm.ki.si.",2011-09-01 +24627776,Nigerian secondary school adolescents' perspective on abstinence-only sexual education as an effective tool for promotion of sexual health.,"The success of any type of sexual education programme depends on the knowledge and preparedness for practice by adolescents. A recent study has found that an 'abstinence-only' sexual education programme is effective in reducing sexual activity among adolescents. Knowledge of abstinence-only sexual education and preparedness for practice as an effective tool for promotion of sexual health among Nigerian secondary school adolescents was studied. An analytic descriptive survey design was used for the study. The research population comprised of all public secondary schools in three southern geopolitical zones of the Niger Delta Region of Nigeria. A multistage sampling technique was used to select 2020 senior secondary school (SS1-SS3) students as sample for the study. A partially self-designed and partially adapted questionnaire from an 'abstinence-only versus comprehensive sex education' debate, from debatepedia (http://wiki.idebate.org/), entitled 'Questionnaire on Nigerian Secondary School Adolescents' Perspective on Abstinence-Only Sexual Education (QNSSAPAOSE)' was used in eliciting information from respondents. Hypotheses were formulated and tested. Frequency counts, percentage and Pearson Product Moment Correlation were used in analysing data. A greater proportion of secondary school adolescents in this study lacked knowledge of sexual education. About 80% of the respondents could not define sexual education. The general perspective on abstinence-only sexual education was negative, as revealed by the larger number of respondents who demonstrated unwillingness to practice abstinence-only sexual education. Specifically, of those who responded in favour of abstinence-only sexual education, the youngest group of adolescents (11-13 years) and the male respondents were more likely to accept this type of education than the other groups. Poor knowledge of sexual education could be responsible for unwillingness to practice abstinence-only sexual education. Sexual education should, therefore, be introduced into the secondary school curriculum and taught by well-prepared teachers to enable an informed decision on practice.",2013-03-13 +22158337,Determination of molecular structures of HIV envelope glycoproteins using cryo-electron tomography and automated sub-tomogram averaging. ,"Since its discovery nearly 30 years ago, more than 60 million people have been infected with the human immunodeficiency virus (HIV) (www.usaid.gov). The virus infects and destroys CD4+ T-cells thereby crippling the immune system, and causing an acquired immunodeficiency syndrome (AIDS) (2). Infection begins when the HIV Envelope glycoprotein ""spike"" makes contact with the CD4 receptor on the surface of the CD4+ T-cell. This interaction induces a conformational change in the spike, which promotes interaction with a second cell surface co-receptor (5,9). The significance of these protein interactions in the HIV infection pathway makes them of profound importance in fundamental HIV research, and in the pursuit of an HIV vaccine. The need to better understand the molecular-scale interactions of HIV cell contact and neutralization motivated the development of a technique to determine the structures of the HIV spike interacting with cell surface receptor proteins and molecules that block infection. Using cryo-electron tomography and 3D image processing, we recently demonstrated the ability to determine such structures on the surface of native virus, at ˜20 Å resolution (9,14). This approach is not limited to resolving HIV Envelope structures, and can be extended to other viral membrane proteins and proteins reconstituted on a liposome. In this protocol, we describe how to obtain structures of HIV envelope glycoproteins starting from purified HIV virions and proceeding stepwise through preparing vitrified samples, collecting, cryo-electron microscopy data, reconstituting and processing 3D data volumes, averaging and classifying 3D protein subvolumes, and interpreting results to produce a protein model. The computational aspects of our approach were adapted into modules that can be accessed and executed remotely using the Biowulf GNU/Linux parallel processing cluster at the NIH (http://biowulf.nih.gov). This remote access, combined with low-cost computer hardware and high-speed network access, has made possible the involvement of researchers and students working from school or home.",2011-12-01 +21880310,Improving protein secondary structure prediction using a multi-modal BP method.,"Methods for predicting protein secondary structures provide information that is useful both in ab initio structure prediction and as additional restraints for fold recognition algorithms. Secondary structure predictions may also be used to guide the design of site directed mutagenesis studies, and to locate potential functionally important residues. In this article, we propose a multi-modal back propagation neural network (MMBP) method for predicting protein secondary structures. Using a Knowledge Discovery Theory based on Inner Cognitive Mechanism (KDTICM) method, we have constructed a compound pyramid model (CPM), which is composed of three layers of intelligent interface that integrate multi-modal back propagation neural network (MMBP), mixed-modal SVM (MMS), modified Knowledge Discovery in Databases (KDD(⁎)) process and so on. The CPM method is both an integrated web server and a standalone application that exploits recent advancements in knowledge discovery and machine learning to perform very accurate protein secondary structure predictions. Using a non-redundant test dataset of 256 proteins from RCASP256, the CPM method achieves an average Q(3) score of 86.13% (SOV99=84.66%). Extensive testing indicates that this is significantly better than any other method currently available. Assessments using RS126 and CB513 datasets indicate that the CPM method can achieve average Q(3) score approaching 83.99% (SOV99=80.25%) and 85.58% (SOV99=81.15%). By using both sequence and structure databases and by exploiting the latest techniques in machine learning it is possible to routinely predict protein secondary structure with an accuracy well above 80%. A program and web server, called CPM, which performs these secondary structure predictions, is accessible at http://kdd.ustb.edu.cn/protein_Web/.",2011-08-30 +23282203,CTF: a CRF-based transcription factor binding sites finding system.,"

Background

Identifying the location of transcription factor bindings is crucial to understand transcriptional regulation. Currently, Chromatin Immunoprecipitation followed with high-throughput Sequencing (ChIP-seq) is able to locate the transcription factor binding sites (TFBSs) accurately in high throughput and it has become the gold-standard method for TFBS finding experimentally. However, due to its high cost, it is impractical to apply the method in a very large scale. Considering the large number of transcription factors, numerous cell types and various conditions, computational methods are still very valuable to accurate TFBS identification.

Results

In this paper, we proposed a novel integrated TFBS prediction system, CTF, based on Conditional Random Fields (CRFs). Integrating information from different sources, CTF was able to capture patterns of TFBSs contained in different features (sequence, chromatin and etc) and predicted the TFBS locations with a high accuracy. We compared CTF with several existing tools as well as the PWM baseline method on a dataset generated by ChIP-seq experiments (TFBSs of 13 transcription factors in mouse genome). Results showed that CTF performed significantly better than existing methods tested.

Conclusions

CTF is a powerful tool to predict TFBSs by integrating high throughput data and different features. It can be a useful complement to ChIP-seq and other experimental methods for TFBS identification and thus improve our ability to investigate functional elements in post-genomic era.

Availability

CTF is freely available to academic users at: http://cbb.sjtu.edu.cn/~ccwei/pub/software/CTF/CTF.php.",2012-12-17 +22398396,An algorithmic method for functionally defining regions of interest in the ventral visual pathway.,"In a widely used functional magnetic resonance imaging (fMRI) data analysis method, functional regions of interest (fROIs) are handpicked in each participant using macroanatomic landmarks as guides, and the response of these regions to new conditions is then measured. A key limitation of this standard handpicked fROI method is the subjectivity of decisions about which clusters of activated voxels should be treated as the particular fROI in question in each subject. Here we apply the Group-Constrained Subject-Specific (GSS) method for defining fROIs, recently developed for identifying language fROIs (Fedorenko et al., 2010), to algorithmically identify fourteen well-studied category-selective regions of the ventral visual pathway (Kanwisher, 2010). We show that this method retains the benefit of defining fROIs in individual subjects without the subjectivity inherent in the traditional handpicked fROI approach. The tools necessary for using this method are available on our website (http://web.mit.edu/bcs/nklab/GSS.shtml).",2012-03-03 +22689646,SPEER-SERVER: a web server for prediction of protein specificity determining sites.,"Sites that show specific conservation patterns within subsets of proteins in a protein family are likely to be involved in the development of functional specificity. These sites, generally termed specificity determining sites (SDS), might play a crucial role in binding to a specific substrate or proteins. Identification of SDS through experimental techniques is a slow, difficult and tedious job. Hence, it is very important to develop efficient computational methods that can more expediently identify SDS. Herein, we present Specificity prediction using amino acids' Properties, Entropy and Evolution Rate (SPEER)-SERVER, a web server that predicts SDS by analyzing quantitative measures of the conservation patterns of protein sites based on their physico-chemical properties and the heterogeneity of evolutionary changes between and within the protein subfamilies. This web server provides an improved representation of results, adds useful input and output options and integrates a wide range of analysis and data visualization tools when compared with the original standalone version of the SPEER algorithm. Extensive benchmarking finds that SPEER-SERVER exhibits sensitivity and precision performance that, on average, meets or exceeds that of other currently available methods. SPEER-SERVER is available at http://www.hpppi.iicb.res.in/ss/.",2012-06-11 +22689389,BatMis: a fast algorithm for k-mismatch mapping.,"

Motivation

Second-generation sequencing (SGS) generates millions of reads that need to be aligned to a reference genome allowing errors. Although current aligners can efficiently map reads allowing a small number of mismatches, they are not well suited for handling a large number of mismatches. The efficiency of aligners can be improved using various heuristics, but the sensitivity and accuracy of the alignments are sacrificed. In this article, we introduce Basic Alignment tool for Mismatches (BatMis)--an efficient method to align short reads to a reference allowing k mismatches. BatMis is a Burrows-Wheeler transformation based aligner that uses a seed and extend approach, and it is an exact method.

Results

Benchmark tests show that BatMis performs better than competing aligners in solving the k-mismatch problem. Furthermore, it can compete favorably even when compared with the heuristic modes of the other aligners. BatMis is a useful alternative for applications where fast k-mismatch mappings, unique mappings or multiple mappings of SGS data are required.

Availability and implementation

BatMis is written in C/C++ and is freely available from http://code.google.com/p/batmis/",2012-06-10 +21967198,"jTraML: an open source Java API for TraML, the PSI standard for sharing SRM transitions.","We here present jTraML, a Java API for the Proteomics Standards Initiative TraML data standard. The library provides fully functional classes for all elements specified in the TraML XSD document, as well as convenient methods to construct controlled vocabulary-based instances required to define SRM transitions. The use of jTraML is demonstrated via a two-way conversion tool between TraML documents and vendor specific files, facilitating the adoption process of this new community standard. The library is released as open source under the permissive Apache2 license and can be downloaded from http://jtraml.googlecode.com . TraML files can also be converted online at http://iomics.ugent.be/jtraml .",2011-10-13 +21368279,An optimal weighted aggregated association test for identification of rare variants involved in common diseases.,"The advent of next generation sequencing technologies allows one to discover nearly all rare variants in a genomic region of interest. This technological development increases the need for an effective statistical method for testing the aggregated effect of rare variants in a gene on disease susceptibility. The idea behind this approach is that if a certain gene is involved in a disease, many rare variants within the gene will disrupt the function of the gene and are associated with the disease. In this article, we present the rare variant weighted aggregate statistic (RWAS), a method that groups rare variants and computes a weighted sum of differences between case and control mutation counts. We show that our method outperforms the groupwise association test of Madsen and Browning in the disease-risk model that assumes that each variant makes an equally small contribution to disease risk. In addition, we can incorporate prior information into our method of which variants are likely causal. By using simulated data and real mutation screening data of the susceptibility gene for ataxia telangiectasia, we demonstrate that prior information has a substantial influence on the statistical power of association studies. Our method is publicly available at http://genetics.cs.ucla.edu/rarevariants.",2011-03-02 +21450717,A qualitative continuous model of cellular auxin and brassinosteroid signaling and their crosstalk.,"

Motivation

Hormone pathway interactions are crucial in shaping plant development, such as synergism between the auxin and brassinosteroid pathways in cell elongation. Both hormone pathways have been characterized in detail, revealing several feedback loops. The complexity of this network, combined with a shortage of kinetic data, renders its quantitative analysis virtually impossible at present.

Results

As a first step towards overcoming these obstacles, we analyzed the network using a Boolean logic approach to build models of auxin and brassinosteroid signaling, and their interaction. To compare these discrete dynamic models across conditions, we transformed them into qualitative continuous systems, which predict network component states more accurately and can accommodate kinetic data as they become available. To this end, we developed an extension for the SQUAD software, allowing semi-quantitative analysis of network states. Contrasting the developmental output depending on cell type-specific modulators enabled us to identify a most parsimonious model, which explains initially paradoxical mutant phenotypes and revealed a novel physiological feature.

Availability

The package SQUADD is freely available via the Bioconductor repository at http://www.bioconductor.org/help/bioc-views/release/bioc/html/SQUADD.html.",2011-03-30 +22676320,Quartet decomposition server: a platform for analyzing phylogenetic trees.,"

Background

The frequent exchange of genetic material among prokaryotes means that extracting a majority or plurality phylogenetic signal from many gene families, and the identification of gene families that are in significant conflict with the plurality signal is a frequent task in comparative genomics, and especially in phylogenomic analyses. Decomposition of gene trees into embedded quartets (unrooted trees each with four taxa) is a convenient and statistically powerful technique to address this challenging problem. This approach was shown to be useful in several studies of completely sequenced microbial genomes.

Results

We present here a web server that takes a collection of gene phylogenies, decomposes them into quartets, generates a Quartet Spectrum, and draws a split network. Users are also provided with various data download options for further analyses. Each gene phylogeny is to be represented by an assessment of phylogenetic information content, such as sets of trees reconstructed from bootstrap replicates or sampled from a posterior distribution. The Quartet Decomposition server is accessible at http://quartets.uga.edu.

Conclusions

The Quartet Decomposition server presented here provides a convenient means to perform Quartet Decomposition analyses and will empower users to find statistically supported phylogenetic conflicts.",2012-06-07 +23587428,The Th17/Treg balance and the expression of related cytokines in Uygur cervical cancer patients.,"

Background

The fine balance of Th17/Treg is crucial for maintenance of immune homeostasis. The objective of this study was to investigate the balance of Th17/Treg and the expression of related cytokines in Uighur cervical cancer patients.

Methods

Peripheral blood was collected from 65 cases of cervical cancer patients, 42 cases of cervical CIN patients and 40 healthy people. Flow cytometry was used to detect the percentages of T cell subsets, including CD3+ T cells, CD4+ T cells, CD8+ T cells, Treg cells and Th17 cells. ELISA assay was conducted to detect expression levels of TGF-β, IL-6, IL-10, IL-17, IL-23 and IFN-γ.

Results

There were no significant difference in the levels of CD3+ T cells, CD4+ T cells, CD8+ T cells, and the ratio of CD4+/CD8+ among the cervical cancer group, the CIN group and the healthy control group. However, compared with the healthy control group, the percentages of CD4+ CD25+ Treg, CD4+CD25+CD127- Treg, CD4+IL17+ Th17, CD4+CD25+Foxp3+, CD4+CD25- Foxp3+, CD8+CD25+CD127-Treg and CD8+CD25+Foxp3 were significantly higher in the cervical cancer group and the CIN group. Similar results were also found in the Th17/Treg ratio and the related cytokines. There was no significant difference between the cervical cancer group and the CIN group. Additionally, Th17 cell levels were positively correlated with IL-6, IL-23 and IL-17. Also, Treg cell levels were positively correlated with TGF-β, IL-10 and IL-6. Contrarily, Treg cell levels and IFN-γ were negatively correlated.

Conclusions

Our data indicated that the Th17/Treg balance was broken in peripheral blood of cervical cancer patients. Analysis of Th17/Treg balance may have a significant implication in diagnosing cervical cancer.

Virtual slides

The virtual slide for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1813823795931511.",2013-04-15 +21569406,Human papillomavirus vaccine introduction in low-income and middle-income countries: guidance on the use of cost-effectiveness models.,"

Background

The World Health Organization (WHO) recommends that the cost effectiveness of introducing human papillomavirus (HPV) vaccination is considered before such a strategy is implemented. However, developing countries often lack the technical capacity to perform and interpret results of economic appraisals of vaccines. To provide information about the feasibility of using such models in a developing country setting, we evaluated models of HPV vaccination in terms of their capacity, requirements, limitations and comparability.

Methods

A literature review identified six HPV vaccination models suitable for low-income and middle-income country use and representative of the literature in terms of provenance and model structure. Each model was adapted by its developers using standardised data sets representative of two hypothetical developing countries (a low-income country with no screening and a middle-income country with limited screening). Model predictions before and after vaccination of adolescent girls were compared in terms of HPV prevalence and cervical cancer incidence, as was the incremental cost-effectiveness ratio of vaccination under different scenarios.

Results

None of the models perfectly reproduced the standardised data set provided to the model developers. However, they agreed that large decreases in type 16/18 HPV prevalence and cervical cancer incidence are likely to occur following vaccination. Apart from the Thai model (in which vaccine and non-vaccine HPV types were combined), vaccine-type HPV prevalence dropped by 75% to 100%, and vaccine-type cervical cancer incidence dropped by 80% to 100% across the models (averaging over age groups). The most influential factors affecting cost effectiveness were the discount rate, duration of vaccine protection, vaccine price and HPV prevalence. Demographic change, access to treatment and data resolution were found to be key issues to consider for models in developing countries.

Conclusions

The results indicated the usefulness of considering results from several models and sets of modelling assumptions in decision making. Modelling groups were prepared to share their models and expertise to work with stakeholders in developing countries. Please see related article: http://www.biomedcentral.com/1741-7007/9/55.",2011-05-12 +22962475,Random sampling of elementary flux modes in large-scale metabolic networks.,"

Motivation

The description of a metabolic network in terms of elementary (flux) modes (EMs) provides an important framework for metabolic pathway analysis. However, their application to large networks has been hampered by the combinatorial explosion in the number of modes. In this work, we develop a method for generating random samples of EMs without computing the whole set.

Results

Our algorithm is an adaptation of the canonical basis approach, where we add an additional filtering step which, at each iteration, selects a random subset of the new combinations of modes. In order to obtain an unbiased sample, all candidates are assigned the same probability of getting selected. This approach avoids the exponential growth of the number of modes during computation, thus generating a random sample of the complete set of EMs within reasonable time. We generated samples of different sizes for a metabolic network of Escherichia coli, and observed that they preserve several properties of the full EM set. It is also shown that EM sampling can be used for rational strain design. A well distributed sample, that is representative of the complete set of EMs, should be suitable to most EM-based methods for analysis and optimization of metabolic networks.

Availability

Source code for a cross-platform implementation in Python is freely available at http://code.google.com/p/emsampler.

Contact

dmachado@deb.uminho.pt

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-01 +22962460,"Inferring duplications, losses, transfers and incomplete lineage sorting with nonbinary species trees.","

Motivation

Gene duplication (D), transfer (T), loss (L) and incomplete lineage sorting (I) are crucial to the evolution of gene families and the emergence of novel functions. The history of these events can be inferred via comparison of gene and species trees, a process called reconciliation, yet current reconciliation algorithms model only a subset of these evolutionary processes.

Results

We present an algorithm to reconcile a binary gene tree with a nonbinary species tree under a DTLI parsimony criterion. This is the first reconciliation algorithm to capture all four evolutionary processes driving tree incongruence and the first to reconcile non-binary species trees with a transfer model. Our algorithm infers all optimal solutions and reports complete, temporally feasible event histories, giving the gene and species lineages in which each event occurred. It is fixed-parameter tractable, with polytime complexity when the maximum species outdegree is fixed. Application of our algorithms to prokaryotic and eukaryotic data show that use of an incomplete event model has substantial impact on the events inferred and resulting biological conclusions.

Availability

Our algorithms have been implemented in Notung, a freely available phylogenetic reconciliation software package, available at http://www.cs.cmu.edu/~durand/Notung.

Contact

mstolzer@andrew.cmu.edu.",2012-09-01 +22367748,Bayesian phylogenetics with BEAUti and the BEAST 1.7.,"Computational evolutionary biology, statistical phylogenetics and coalescent-based population genetics are becoming increasingly central to the analysis and understanding of molecular sequence data. We present the Bayesian Evolutionary Analysis by Sampling Trees (BEAST) software package version 1.7, which implements a family of Markov chain Monte Carlo (MCMC) algorithms for Bayesian phylogenetic inference, divergence time dating, coalescent analysis, phylogeography and related molecular evolutionary analyses. This package includes an enhanced graphical user interface program called Bayesian Evolutionary Analysis Utility (BEAUti) that enables access to advanced models for molecular sequence and phenotypic trait evolution that were previously available to developers only. The package also provides new tools for visualizing and summarizing multispecies coalescent and phylogeographic analyses. BEAUti and BEAST 1.7 are open source under the GNU lesser general public license and available at http://beast-mcmc.googlecode.com and http://beast.bio.ed.ac.uk.",2012-02-25 +21864299,An analysis of scholarly productivity in United States academic anaesthesiologists by citation bibliometrics.,"The h-index is used to evaluate scholarly productivity in academic medicine, but has not been extensively used in anaesthesia. We analysed the publications, citations, citations per publication and h-index from 1996 to date using the Scopus(®) database for 1630 (1120 men, 510 women) for faculty members from 24 randomly selected US academic anaesthesiology departments The median (interquartile range [range]) h-index of US academic anaesthesiologists was 1 [0-5 (0-44)] with 3 [0-18 (0-398)] total publications, 24 [0-187 (0-8515)] total citations, and 5 [0-14 (0-252)] citations per publication. Faculty members in departments with National Institutes of Health funding were more productive than colleagues in departments with little or no government funding. The h-index increased significantly between successive academic ranks concomitant with increases in the number of publications and total citations. Men had higher median h-index than women concomitant with more publications and citations, but the number of citations per publication was similar between groups. Our results suggest that h-index is a reasonable indicator of scholarly productivity in anaesthesia. The results may help comparisons of academic productivity across countries and may be used to assess whether new initiatives designed to reverse recent declines in academic anaesthetic are working. You can respond to this article at http://www.anaesthesiacorrespondence.com.",2011-08-22 +23864102,Sorafenib alone versus sorafenib combined with transarterial chemoembolization for advanced-stage hepatocellular carcinoma: results of propensity score analyses.,"

Purpose

To compare the time to progression (TTP) and overall survival (OS) in patients with advanced-stage hepatocellular carcinoma (HCC) who are undergoing sorafenib treatment combined with transarterial chemoembolization (TACE) versus sorafenib monotherapy.

Materials and methods

The retrospective analysis of the data was approved by the institutional review board, and the requirement to obtain informed consent was waived. Of 355 patients with advanced-stage HCC (Barcelona Clinic Liver Cancer stage C) who were undergoing sorafenib therapy for at least 5 weeks between April 2007 and July 2011, 164 (46.2%) underwent repeat TACE (or chemolipiodolization if indicated) along with sorafenib therapy (combined group); the remaining 191 patients (53.8%) received sorafenib alone (monotherapy group). The median patient age was 53 years (range, 22-84 years). The median age was 53 years (range, 26-84 years) for men and 56 years (range, 22-75 years) for women. Propensity score-based methods were used to minimize bias when evaluating TTP on the basis of modified Response Evaluation Criteria in Solid Tumors and OS. Statistical analysis was performed with the Kaplan-Meier method by using the log-rank test and Cox regression models.

Results

In the combined and monotherapy groups, respectively, 64.6% and 49.2% of patients had vascular invasion, 87.8% and 91.1% had extrahepatic metastasis, and 54.3% and 47.1% had both. During follow-up (median duration, 5.5 months), the median TTP and OS in the combined group were longer than those in the monotherapy group (TTP: 2.5 months vs 2.1 months, respectively, P = .008; OS: 8.9 months vs 5.9 months, P = .009). At univariate and subsequent multivariate analyses, additional TACE was an independent predictor of favorable TTP and OS (adjusted hazard ratio: 0.74 and 0.57, respectively; P < .05 for both), consistent with the outcomes of inverse probability of treatment weighting. In the propensity score-matched cohort (96 pairs), the median TTP in the combined group was significantly longer than that in the monotherapy group (2.7 months vs 2.1 months, respectively; P = .011), but median OS was not (9.1 months vs 6.7 months, P = .21).

Conclusion

In this retrospective study, TACE plus sorafenib was superior to sorafenib alone with respect to TTP in patients with advanced-stage HCC, although it may or may not improve OS.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.13130150/-/DC1.",2013-07-17 +21505036,A reference dataset for the analyses of membrane protein secondary structures and transmembrane residues using circular dichroism spectroscopy.,"

Motivation

Empirical analyses of protein secondary structures based on circular dichroism (CD) and synchrotron radiation circular dichroism (SRCD) spectroscopic data rely on the availability of reference datasets comprised of spectra of relevant proteins, whose crystal structures have been determined. Datasets comprised of only soluble proteins have not proven suitable for analysing the spectra of membrane proteins.

Results

A new reference dataset, MP180, has been created containing the spectra of 30 membrane proteins encompassing the secondary structure and fold space covered by all known membrane protein structures. In addition a mixed soluble and membrane protein dataset, SMP180, has been created, which includes 98 soluble protein spectra (SP) plus the MP180 spectra. Calculations of both membrane and soluble protein secondary structures using SMP180 are significantly improved with respect to those produced, using soluble protein-only datasets. The SMP180 dataset also enables determination of the percentage of transmembrane residues, thus enhancing the information previously obtainable from CD spectroscopy.

Availability and implementation

Reference dataset online at the DichroWeb analysis server (http://dichroweb.cryst.bbk.ac.uk); individual protein spectra in the Protein Circular Dichroism Data Bank (http://pcddb.cryst.bbk.ac.uk).",2011-04-19 +21904438,Antagomirbase- a putative antagomir database.,"

Unlabelled

The accurate prediction of a comprehensive set of messenger putative antagomirs against microRNAs (miRNAs) remains an open problem. In particular, a set of putative antagomirs against human miRNA is predicted in this current version of database. We have developed Antagomir database, based on putative antagomirs-miRNA heterodimers. In this work, the human miRNA dataset was used as template to design putative antagomirs, using GC content and secondary structures as parameters. The algorithm used predicted the free energy of unbound antagomirs. Although in its infancy the development of antagomirs, that can target cell specific genes or families of genes, may pave the way forward for the generation of a new class of therapeutics, to treat complex inflammatory diseases. Future versions need to incorporate further sequences from other mammalian homologues for designing of antagomirs for aid in research.

Availability

The database is available for free at http://bioinfopresidencycollegekolkata.edu.in/antagomirs.html.",2011-08-20 +21904439,DEB: A web interface for RNA-seq digital gene expression analysis.,"

Unlabelled

Digital expression (DE) is an important application of RNA-seq technology to quantify the transcriptome. The number of mapped reads to each transcript or gene varies under different conditions and replicates. Currently, three different statistical algorithms (edgeR, DESeq and bayseq) are available as R packages, to compare the reads to identify significantly expressed transcripts or genes. So far, users have to manually install and run each R package separately. It is also of users' interest to compare the results of different approaches. Here, we present a pipeline DEB which automates all the steps in file preparation, computation and result comparison.

Availability

The database is available for free at http://www.ijbcb.org/DEB/php/onlinetool.php.",2011-08-20 +22155872,SomaticSniper: identification of somatic point mutations in whole genome sequencing data.,"

Motivation

The sequencing of tumors and their matched normals is frequently used to study the genetic composition of cancer. Despite this fact, there remains a dearth of available software tools designed to compare sequences in pairs of samples and identify sites that are likely to be unique to one sample.

Results

In this article, we describe the mathematical basis of our SomaticSniper software for comparing tumor and normal pairs. We estimate its sensitivity and precision, and present several common sources of error resulting in miscalls.

Availability and implementation

Binaries are freely available for download at http://gmt.genome.wustl.edu/somatic-sniper/current/, implemented in C and supported on Linux and Mac OS X.

Contact

delarson@wustl.edu; lding@wustl.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-06 +22155862,Computing graphlet signatures of network nodes and motifs in Cytoscape with GraphletCounter.,"

Unlabelled

Biological network analysis can be enhanced by examining the connections between nodes and the rest of the network. For this purpose we have developed GraphletCounter, an open-source software tool for computing graphlet degree signatures that can operate on its own or as a plug-in to the network analysis environment Cytoscape. A unique characteristic of GraphletCounter is its ability to compute the graphlet signatures of network motifs, which can be specified by files generated by the motif-finding tool mfinder. GraphletCounter displays graphlet signatures for visual inspection within Cytoscape, and can output graphlet data for integration with larger workflows.

Availability and implementation

GraphletCounter is implemented in Java. It can be downloaded from the Cytoscape plugin repository, and is also available at http://sonmezsysbio.org/software/ graphletcounter.",2011-12-06 +21904437,ValFold: Program for the aptamer truncation process.,"

Unlabelled

DNA or RNA aptamers have gained attention as the next generation antibody-like molecules for medical or diagnostic use. Conventional secondary structure prediction tools for nucleic acids play an important role to truncate or minimize sequence, or introduce limited chemical modifications without compromising or changing its binding affinity to targets in the design of improved aptamers selected by Systematic Evolution of Ligands by EXponential enrichment (SELEX). We describe a novel software package, ValFold, capable of predicting secondary structures with improved accuracy based on unique aptamer characteristics. ValFold predicts not only the canonical Watson-Crick pairs but also G-G pairs derived from G-quadruplex (known structure for many aptamers) using the stem candidate selection algorithm.

Availability

The database is available for free at http://code.google.com/p/valfold/",2011-08-20 +21871708,Trends in hypospadias surgery: results of a worldwide survey.,"

Background

Hypospadias is a challenging field of urogenital reconstructive surgery, with different techniques currently being used.

Objective

Evaluate international trends in hypospadias surgery.

Design, setting, and participants

Paediatric urologists, paediatric surgeons, urologists, and plastic surgeons worldwide were invited to participate an anonymous online questionnaire (http://www.hypospadias-center.info).

Measurements

General epidemiologic data, preferred technique in the correction of hypospadias, and preferred technique in the correction of penile curvature were gathered.

Results and limitations

Three hundred seventy-seven participants from 68 countries returned completed questionnaires. In distal hypospadias (subcoronal to midshaft), the tubularised incised plate (TIP) repair is preferred by 52.9-71.0% of the participants. Meatal advancement and glanuloplasty (MAGPI) is still a preferred method in glandular hypospadias. In the repair of proximal hypospadias, the two-stage repair is preferred by 43.3-76.6%. TIP repair in proximal hypospadias is used by 0.9-16.7%. Onlay flaps and tubes are used by 11.3-29.5% of the study group. Simple plication and Nesbit's procedure are the techniques of choice in curvature up to 30°; urethral division and ventral incision of the tunica albuginea with grafting is performed by about 20% of the participants in severe chordee. The frequency of hypospadias repairs does not influence the choice of technique.

Conclusions

In this study, we identified current international trends in the management of hypospadias. In distal hypospadias, the TIP repair is the preferred technique. In proximal hypospadias, the two-stage repair is most commonly used. A variety of techniques are used for chordee correction. This study contains data on the basis of personal experience. However, future research must focus on prospective controlled trials.",2011-08-22 +22201070,Exploiting intrastructure information for secondary structure prediction with multifaceted pipelines.,"Predicting the secondary structure of proteins is still a typical step in several bioinformatic tasks, in particular, for tertiary structure prediction. Notwithstanding the impressive results obtained so far, mostly due to the advent of sequence encoding schemes based on multiple alignment, in our view the problem should be studied from a novel perspective, in which understanding how available information sources are dealt with plays a central role. After revisiting a well-known secondary structure predictor viewed from this perspective (with the goal of identifying which sources of information have been considered and which have not), we propose a generic software architecture designed to account for all relevant information sources. To demonstrate the validity of the approach, a predictor compliant with the proposed generic architecture has been implemented and compared with several state-of-the-art secondary structure predictors. Experiments have been carried out on standard data sets, and the corresponding results confirm the validity of the approach. The predictor is available at http://iasc.diee.unica.it/ssp2/ through the corresponding web application or as downloadable stand-alone portable unpack-and-run bundle.",2012-05-01 +22519544,Involvement of redox-signalling in endogenous hydrogen sulfide production.,"

Unlabelled

Recently, cystathionine-γ-lyase (CSE) was found to provide the major physiological pathway for H(2) S, the third member of the gasotransmitter family. In various pathophysiological conditions, H(2) S exerted protective effects based on its antioxidant, anti-inflammatory, anti-hypertensive and other regulatory functions. Interestingly, CSE expression had been only poorly studied and only in relation with inflammatory processes. Therefore, the study by Hassan et al. in this issue of the BJP, provides a considerable advance by furnishing direct experimental evidence for the involvement of redox signalling in the regulation of CSE gene expression. They found that PDGF up-regulated CSE expression and activity that was abolished by antioxidants and by deletion of the transcription factor nuclear erythroid-2-related factor-2 (Nrf2). Furthermore, PDGF induced Nrf2 binding to its consensus sequence that was again reversed by antioxidants. As Nrf2 also governs CO biosynthesis, and PDGF inversely affects H(2) S and NO production, these data could indicate a concerted regulation of the three gasotransmitters by redox signalling.

Linked article

This article is a commentary on Hassan et al., pp. 2231-2242 of this issue. To view this paper visit http://dx.doi.org/10.1111/j.1476-5381.2012.01949.x.",2012-08-01 +22160795,SWIFT MODELLER v2.0: a platform-independent GUI for homology modeling.,"SWIFT MODELLER v2.0 is a platform-independent Java-based graphical user interface to MODELLER. It provides an interactive homology modeling solution by automating the formatting, scripting, and data extraction processes, meaning that the user only needs to paste in the protein target sequence as input. SWIFT MODELLER v2.0 takes a step-by-step approach where the flow of the software screens depicts steps in the homology modeling protocol. Ramachandran plots and DOPE profile graphs are sketched and displayed for in-depth model analysis, along with an embedded Jmol viewer for 3D visualization of the constructed model. SWIFT MODELLER v2.0 is functional on all Linux-based and Microsoft Windows operating systems for which MODELLER has been developed. The software is available as freeware at http://www.bitmesra.ac.in/swift-modeller/swift.htm .",2011-12-09 +22321698,Automatic recognition of conceptualization zones in scientific articles and two life science applications.,"

Motivation

Scholarly biomedical publications report on the findings of a research investigation. Scientists use a well-established discourse structure to relate their work to the state of the art, express their own motivation and hypotheses and report on their methods, results and conclusions. In previous work, we have proposed ways to explicitly annotate the structure of scientific investigations in scholarly publications. Here we present the means to facilitate automatic access to the scientific discourse of articles by automating the recognition of 11 categories at the sentence level, which we call Core Scientific Concepts (CoreSCs). These include: Hypothesis, Motivation, Goal, Object, Background, Method, Experiment, Model, Observation, Result and Conclusion. CoreSCs provide the structure and context to all statements and relations within an article and their automatic recognition can greatly facilitate biomedical information extraction by characterizing the different types of facts, hypotheses and evidence available in a scientific publication.

Results

We have trained and compared machine learning classifiers (support vector machines and conditional random fields) on a corpus of 265 full articles in biochemistry and chemistry to automatically recognize CoreSCs. We have evaluated our automatic classifications against a manually annotated gold standard, and have achieved promising accuracies with 'Experiment', 'Background' and 'Model' being the categories with the highest F1-scores (76%, 62% and 53%, respectively). We have analysed the task of CoreSC annotation both from a sentence classification as well as sequence labelling perspective and we present a detailed feature evaluation. The most discriminative features are local sentence features such as unigrams, bigrams and grammatical dependencies while features encoding the document structure, such as section headings, also play an important role for some of the categories. We discuss the usefulness of automatically generated CoreSCs in two biomedical applications as well as work in progress.

Availability

A web-based tool for the automatic annotation of articles with CoreSCs and corresponding documentation is available online at http://www.sapientaproject.com/software http://www.sapientaproject.com also contains detailed information pertaining to CoreSC annotation and links to annotation guidelines as well as a corpus of manually annotated articles, which served as our training data.

Contact

liakata@ebi.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-08 +21862568,Integration of SNP genotyping confidence scores in IBD inference.,"

Motivation

High-throughput single nucleotide polymorphism (SNP) arrays have become the standard platform for linkage and association analyses. The high SNP density of these platforms allows high-resolution identification of ancestral recombination events even for distant relatives many generations apart. However, such inference is sensitive to marker mistyping and current error detection methods rely on the genotyping of additional close relatives. Genotyping algorithms provide a confidence score for each marker call that is currently not integrated in existing methods. There is a need for a model that incorporates this prior information within the standard identical by descent (IBD) and association analyses.

Results

We propose a novel model that incorporates marker confidence scores within IBD methods based on the Lander-Green Hidden Markov Model. The novel parameter of this model is the joint distribution of confidence scores and error status per array. We estimate this probability distribution by applying a modified expectation-maximization (EM) procedure on data from nuclear families genotyped with Affymetrix 250K SNP arrays. The converged tables from two different genotyping algorithms are shown for a wide range of error rates. We demonstrate the efficacy of our method in refining the detection of IBD signals using nuclear pedigrees and distant relatives.

Availability

Plinke, a new version of Plink with an extended pairwise IBD inference model allowing per marker error probabilities is freely available at: http://bioinfo.bgu.ac.il/bsu/software/plinke.

Contact

obirk@bgu.ac.il; markusb@bgu.ac.il

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-23 +22708542,In search for an explanation to the upsurge in infant mortality in Kenya during the 1988-2003 period.,"

Background

In Kenya, infant mortality rate increased from 59 deaths per 1000 live births in 1988 to 78 deaths per 1000 live births by 2003. This was an increase of about 32 percent in 15 years. The reasons behind this upturn are poorly understood. This paper investigates the probable factors behind the upsurge in infant mortality in Kenya during the 1988-2003 period. Understanding the causes behind the upsurge is critical in designing high impact public health strategies for the acceleration of national and international public health goals such as the Millennium Development Goals (MDGs). The reversals in early child mortality is also regarded as one of the most important topics in contemporary demography.

Methods

A merged dataset drawn from the Kenya Demographic and Health Surveys of 1993, 1998 and 2003 was used. The merged KDHS included a total of 5265 singletons. Permission to use the KDHS data was obtained from ICF international on the following website: http://www.measuredhs.com. Stata version 11.0 was used for data analysis. The paper used regression decomposition techniques as the main method for analysing the contribution of the selected covariates on the upsurge in infant mortality.

Results

The duration of breastfeeding; maternal education, regional HIV prevalence and malaria endemicity were the factors that appeared to have contributed much to the observed rise in infant mortality in Kenya over the period. If all the live births that occurred in the 1996/03 period had the same mean values of all explanatory variables as those of live births that occurred in the 1988/95 period, then infant mortality would have increased by a massive 14 deaths per 1000 live births. However, if the live births that occurred in the 1988/95 period had the same mean values of all explanatory variables as those that occurred in the 1996/03 period, the upsurge in infant mortality would have been negligible. While the role of HIV in the upturn in infant mortality in Kenya and other sub Saharan African countries is indisputable, this study demonstrates that it is the duration of breastfeeding and Malaria endemicity that played a more significant role in Kenya's upsurge in infant mortality during the 1988-2003 period.

Conclusions

Efforts aimed at controlling and preventing malaria and HIV should be stepped up to avert an upsurge in infant mortality. There is need to step up alternative baby feeding practices among mothers who are HIV positive especially after the first six months of breastfeeding. Owing to the widely known inverse relationship between maternal education and infant mortality, there is need for concerted efforts to promote girl child education. Owing to the important role played by the short preceding birth interval to the upsurge in infant mortality, there is need to promote family planning methods in Kenya.",2012-06-18 +22355081,An integrated strategy for prediction uncertainty analysis.,"

Motivation

To further our understanding of the mechanisms underlying biochemical pathways mathematical modelling is used. Since many parameter values are unknown they need to be estimated using experimental observations. The complexity of models necessary to describe biological pathways in combination with the limited amount of quantitative data results in large parameter uncertainty which propagates into model predictions. Therefore prediction uncertainty analysis is an important topic that needs to be addressed in Systems Biology modelling.

Results

We propose a strategy for model prediction uncertainty analysis by integrating profile likelihood analysis with Bayesian estimation. Our method is illustrated with an application to a model of the JAK-STAT signalling pathway. The analysis identified predictions on unobserved variables that could be made with a high level of confidence, despite that some parameters were non-identifiable.

Availability and implementation

Source code is available at: http://bmi.bmt.tue.nl/sysbio/software/pua.html.",2012-02-21 +21860421,Oncogenic activation of FOXR1 by 11q23 intrachromosomal deletion-fusions in neuroblastoma.,"Neuroblastoma tumors frequently show loss of heterozygosity of chromosome 11q with a shortest region of overlap in the 11q23 region. These deletions are thought to cause inactivation of tumor suppressor genes leading to haploinsufficiency. Alternatively, micro-deletions could lead to gene fusion products that are tumor driving. To identify such events we analyzed a series of neuroblastomas by comparative genomic hybridization and single-nucleotide polymorphism arrays and integrated these data with Affymetrix mRNA profiling data with the bioinformatic tool R2 (http://r2.amc.nl). We identified three neuroblastoma samples with small interstitial deletions at 11q23, upstream of the forkhead-box R1 transcription factor (FOXR1). Genes at the proximal side of the deletion were fused to FOXR1, resulting in fusion transcripts of MLL-FOXR1 and PAFAH1B2-FOXR1. FOXR1 expression has only been detected in early embryogenesis. Affymetrix microarray analysis showed high FOXR1 mRNA expression exclusively in the neuroblastomas with micro-deletions and rare cases of other tumor types, including osteosarcoma cell line HOS. RNAi silencing of FOXR1 strongly inhibited proliferation of HOS cells and triggered apoptosis. Expression profiling of these cells and reporter assays suggested that FOXR1 is a negative regulator of fork-head box factor-mediated transcription. The neural crest stem cell line JoMa1 proliferates in culture conditional to activity of a MYC-ER transgene. Over-expression of the wild-type FOXR1 could functionally replace MYC and drive proliferation of JoMa1. We conclude that FOXR1 is recurrently activated in neuroblastoma by intrachromosomal deletion/fusion events, resulting in overexpression of fusion transcripts. Forkhead-box transcription factors have not been previously implicated in neuroblastoma pathogenesis. Furthermore, this is the first identification of intrachromosomal fusion genes in neuroblastoma.",2011-08-22 +23220383,Comprehensive gene expression changes associated with mouse postnatal kidney development.,"

Purpose

To provide a portrait of the molecular alterations in renal growth that occur in mice postnatally, we performed gene expression profiling at discrete time points during the first 5 weeks of life.

Materials and methods

Kidneys were harvested from C57BL/6 mice at embryonic day 19.5, and postnatal days 1, 3, 5, 7, 10, 14, 21, 28 and 35. Total RNA was extracted and gene expression profiling was done using microarrays (Agilent Technologies, Santa Clara, California). Transcripts whose expression levels changed during the study course were identified using StepMiner software (http://chicory.stanford.edu/sahoo/public/StepMiner/). Biological functions of the modulated genes were identified using IPA® software.

Results

Postnatal kidney growth and development are associated with widespread changes in gene expression with 6,949 transcripts significantly up-regulated and 6,696 down-regulated during the first 5 weeks of life. Pathway analysis showed progressive down-regulation of pathways associated with cell growth and embryonic development (postnatal days 5 to 7). This was followed by increased expression of transcripts associated with lipid/energy metabolism and molecular transport (postnatal days 10 to 14), and down-regulation of genes related to DNA replication, cell cycle, tissue development, protein trafficking and cell morphology (postnatal days 14 to 21).

Conclusions

To our knowledge we report the most comprehensive temporal survey of postnatal kidney development to date. This data set provides a framework for interpreting nephropathy, such as that induced by congenital obstruction.",2012-12-05 +21849047,A novel method to compare protein structures using local descriptors.,"

Background

Protein structure comparison is one of the most widely performed tasks in bioinformatics. However, currently used methods have problems with the so-called ""difficult similarities"", including considerable shifts and distortions of structure, sequential swaps and circular permutations. There is a demand for efficient and automated systems capable of overcoming these difficulties, which may lead to the discovery of previously unknown structural relationships.

Results

We present a novel method for protein structure comparison based on the formalism of local descriptors of protein structure - DEscriptor Defined Alignment (DEDAL). Local similarities identified by pairs of similar descriptors are extended into global structural alignments. We demonstrate the method's capability by aligning structures in difficult benchmark sets: curated alignments in the SISYPHUS database, as well as SISY and RIPC sets, including non-sequential and non-rigid-body alignments. On the most difficult RIPC set of sequence alignment pairs the method achieves an accuracy of 77% (the second best method tested achieves 60% accuracy).

Conclusions

DEDAL is fast enough to be used in whole proteome applications, and by lowering the threshold of detectable structure similarity it may shed additional light on molecular evolution processes. It is well suited to improving automatic classification of structure domains, helping analyze protein fold space, or to improving protein classification schemes. DEDAL is available online at http://bioexploratorium.pl/EP/DEDAL.",2011-08-17 +22247278,nEASE: a method for gene ontology subclassification of high-throughput gene expression data.,"

Unlabelled

High-throughput technologies can identify genes whose expression profiles correlate with specific phenotypes; however, placing these genes into a biological context remains challenging. To help address this issue, we developed nested Expression Analysis Systematic Explorer (nEASE). nEASE complements traditional gene ontology enrichment approaches by determining statistically enriched gene ontology subterms within a list of genes based on co-annotation. Here, we overview an open-source software version of the nEASE algorithm. nEASE can be used either stand-alone or as part of a pathway discovery pipeline.

Availability

nEASE is implemented within the Multiple Experiment Viewer software package available at http://www.tm4.org/mev.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-13 +21871588,Noise induced changes in the expression of p38/MAPK signaling proteins in the sensory epithelium of the inner ear.,"Noise exposure is a major cause of hearing loss. Classical methods of studying protein involvement have provided a basis for understanding signaling pathways that mediate hearing loss and damage repair but do not lend themselves to studying large networks of proteins that are likely to increase or decrease during noise trauma. To address this issue, antibody microarrays were used to quantify the very early changes in protein expression in three distinct regions of the chinchilla cochlea 2h after exposure to a 0.5-8 kHz band of noise for 2h at 112 dB SPL. The noise exposure caused significant functional impairment 2h post-exposure which only partially recovered. Distortion product otoacoustic emissions were abolished 2h after the exposure, but at 4 weeks post-exposure, otoacoustic emissions were present, but still greatly depressed. Cochleograms obtained 4 weeks post-exposure demonstrated significant loss of outer hair cells in the basal 60% of the cochlea corresponding to frequencies in the noise spectrum. A comparative analysis of the very early (2h post-exposure) noise-induced proteomic changes indicated that the sensory epithelium, lateral wall and modiolus differ in their biological response to noise. Bioinformatic analysis of the cochlear protein profile using ""The Database for Annotation, Visualization and Integrated Discovery 2008"" (DAVID - http://david.abcc. ncifcrf.gov) revealed the initiation of the cell death process in sensory epithelium and modiolus. An increase in Fas and phosphorylation of FAK and p38/MAPK in the sensory epithelium suggest that noise-induced stress signals at the cell membrane are transmitted to the nucleus by Fas and focal adhesion signaling through the p38/MAPK signaling pathway. Up-regulation of downstream nuclear proteins E2F3 and WSTF in immunoblots and microarrays along with their immunolocalization in the outer hair cells supported the pivotal role of p38/MAPK signaling in the mechanism underlying noise-induced hearing loss.",2011-08-16 +22937107,FunSAV: predicting the functional effect of single amino acid variants using a two-stage random forest model.,"Single amino acid variants (SAVs) are the most abundant form of known genetic variations associated with human disease. Successful prediction of the functional impact of SAVs from sequences can thus lead to an improved understanding of the underlying mechanisms of why a SAV may be associated with certain disease. In this work, we constructed a high-quality structural dataset that contained 679 high-quality protein structures with 2,048 SAVs by collecting the human genetic variant data from multiple resources and dividing them into two categories, i.e., disease-associated and neutral variants. We built a two-stage random forest (RF) model, termed as FunSAV, to predict the functional effect of SAVs by combining sequence, structure and residue-contact network features with other additional features that were not explored in previous studies. Importantly, a two-step feature selection procedure was proposed to select the most important and informative features that contribute to the prediction of disease association of SAVs. In cross-validation experiments on the benchmark dataset, FunSAV achieved a good prediction performance with the area under the curve (AUC) of 0.882, which is competitive with and in some cases better than other existing tools including SIFT, SNAP, Polyphen2, PANTHER, nsSNPAnalyzer and PhD-SNP. The sourcecodes of FunSAV and the datasets can be downloaded at http://sunflower.kuicr.kyoto-u.ac.jp/sjn/FunSAV.",2012-08-24 +23485231,Cancer incidence and mortality patterns in Europe: estimates for 40 countries in 2012.,"

Introduction

Cancer incidence and mortality estimates for 25 cancers are presented for the 40 countries in the four United Nations-defined areas of Europe and for the European Union (EU-27) for 2012.

Methods

We used statistical models to estimate national incidence and mortality rates in 2012 from recently-published data, predicting incidence and mortality rates for the year 2012 from recent trends, wherever possible. The estimated rates in 2012 were applied to the corresponding population estimates to obtain the estimated numbers of new cancer cases and deaths in Europe in 2012.

Results

There were an estimated 3.45 million new cases of cancer (excluding non-melanoma skin cancer) and 1.75 million deaths from cancer in Europe in 2012. The most common cancer sites were cancers of the female breast (464,000 cases), followed by colorectal (447,000), prostate (417,000) and lung (410,000). These four cancers represent half of the overall burden of cancer in Europe. The most common causes of death from cancer were cancers of the lung (353,000 deaths), colorectal (215,000), breast (131,000) and stomach (107,000). In the European Union, the estimated numbers of new cases of cancer were approximately 1.4 million in males and 1.2 million in females, and around 707,000 men and 555,000 women died from cancer in the same year.

Conclusion

These up-to-date estimates of the cancer burden in Europe alongside the description of the varying distribution of common cancers at both the regional and country level provide a basis for establishing priorities to cancer control actions in Europe. The important role of cancer registries in disease surveillance and in planning and evaluating national cancer plans is becoming increasingly recognised, but needs to be further advocated. The estimates and software tools for further analysis (EUCAN 2012) are available online as part of the European Cancer Observatory (ECO) (http://eco.iarc.fr).",2013-02-26 +21712249,"CytoscapeRPC: a plugin to create, modify and query Cytoscape networks from scripting languages.","

Summary

CytoscapeRPC is a plugin for Cytoscape which allows users to create, query and modify Cytoscape networks from any programming language which supports XML-RPC. This enables them to access Cytoscape functionality and visualize their data interactively without leaving the programming environment with which they are familiar.

Availability

Install through the Cytoscape plugin manager or visit the web page: http://wiki.nbic.nl/index.php/CytoscapeRPC for the user tutorial and download.

Contact

j.j.bot@tudelft.nl; j.j.bot@tudelft.nl.",2011-06-27 +23323973,The complex transcriptional landscape of the anucleate human platelet.,"

Background

Human blood platelets are essential to maintaining normal hemostasis, and platelet dysfunction often causes bleeding or thrombosis. Estimates of genome-wide platelet RNA expression using microarrays have provided insights to the platelet transcriptome but were limited by the number of known transcripts. The goal of this effort was to deep-sequence RNA from leukocyte-depleted platelets to capture the complex profile of all expressed transcripts.

Results

From each of four healthy individuals we generated long RNA (≥40 nucleotides) profiles from total and ribosomal-RNA depleted RNA preparations, as well as short RNA (<40 nucleotides) profiles. Analysis of ~1 billion reads revealed that coding and non-coding platelet transcripts span a very wide dynamic range (≥16 PCR cycles beyond β-actin), a result we validated through qRT-PCR on many dozens of platelet messenger RNAs. Surprisingly, ribosomal-RNA depletion significantly and adversely affected estimates of the relative abundance of transcripts. Of the known protein-coding loci, ~9,500 are present in human platelets. We observed a strong correlation between mRNAs identified by RNA-seq and microarray for well-expressed mRNAs, but RNASeq identified many more transcripts of lower abundance and permitted discovery of novel transcripts.

Conclusions

Our analyses revealed diverse classes of non-coding RNAs, including: pervasive antisense transcripts to protein-coding loci; numerous, previously unreported and abundant microRNAs; retrotransposons; and thousands of novel un-annotated long and short intronic transcripts, an intriguing finding considering the anucleate nature of platelets. The data are available through a local mirror of the UCSC genome browser and can be accessed at: http://cm.jefferson.edu/platelets_2012/.",2013-01-16 +24564762,Identifying conserved protein complexes between species by constructing interolog networks.,"

Background

Protein complexes conserved across species indicate processes that are core to cellular machinery (e.g. cell-cycle or DNA damage-repair complexes conserved across human and yeast). While numerous computational methods have been devised to identify complexes from the protein interaction (PPI) networks of individual species, these are severely limited by noise and errors (false positives) in currently available datasets. Our analysis using human and yeast PPI networks revealed that these methods missed several important complexes including those conserved between the two species (e.g. the MLH1-MSH2-PMS2-PCNA mismatch-repair complex). Here, we note that much of the functionalities of yeast complexes have been conserved in human complexes not only through sequence conservation of proteins but also of critical functional domains. Therefore, integrating information of domain conservation might throw further light on conservation patterns between yeast and human complexes.

Results

We identify conserved complexes by constructing an interolog network (IN) leveraging on the functional conservation of proteins between species through domain conservation (from Ensembl) in addition to sequence similarity. We employ 'state-of-the-art' methods to cluster the interolog network, and map these clusters back to the original PPI networks to identify complexes conserved between the species. Evaluation of our IN-based approach (called COCIN) on human and yeast interaction data identifies several additional complexes (76% recall) compared to direct complex detection from the original PINs (54% recall). Our analysis revealed that the IN-construction removes several non-conserved interactions many of which are false positives, thereby improving complex prediction. In fact removing non-conserved interactions from the original PINs also resulted in higher number of conserved complexes, thereby validating our IN-based approach. These complexes included the mismatch repair complex, MLH1-MSH2-PMS2-PCNA, and other important ones namely, RNA polymerase-II, EIF3 and MCM complexes, all of which constitute core cellular processes known to be conserved across the two species.

Conclusions

Our method based on integrating domain conservation and sequence similarity to construct interolog networks helps to identify considerably more conserved complexes between the PPI networks from two species compared to direct complex prediction from the PPI networks. We observe from our experiments that protein complexes are not conserved from yeast to human in a straightforward way, that is, it is not the case that a yeast complex is a (proper) sub-set of a human complex with a few additional proteins present in the human complex. Instead complexes have evolved multifold with considerable re-organization of proteins and re-distribution of their functions across complexes. This finding can have significant implications on attempts to extrapolate other kinds of relationships such as synthetic lethality from yeast to human, for example in the identification of novel cancer targets.

Availability

http://www.comp.nus.edu.sg/~leonghw/COCIN/.",2013-10-22 +21890392,The spectrum of phenylketonuria genotypes in the Armenian population: identification of three novel mutant PAH alleles.,"We present the spectrum of phenylalanine hydroxylase (PAH) gene mutations upon investigating 35 index patients identified with hyperphenylalaninemia in Armenia. One patient was diagnosed with dihydropteridine reductase (DHPR) deficiency, whereas all other 34 and their 6 affected siblings presented with mild or classical phenylketonuria (PKU). By analyzing all 13 exons plus exon-intron boundaries of the PAH gene, we identified two mutant alleles in 23 PKU patients, three mutations in 1, only one mutation in 5, and no mutation in 5 PKU patients. The most prevalent mutation was the well defined splicing error in intron 10, c.1066-11G>A (17/68 alleles). The three alterations, c.836C>T (p.Pro279Leu) in exon 7, c.1129T>G (p.Tyr377Asp) in exon 11, and c.1244A>T (p.Asp415Val) in exon 12, have not been reported in the PAH locus database (http://www.pahdb.mcgill.ca) and, thus, might be specific for the culturally homogenous Armenian population.",2011-08-12 +21858037,Quantitative expression profile of distinct functional regions in the adult mouse brain.,"The adult mammalian brain is composed of distinct regions with specialized roles including regulation of circadian clocks, feeding, sleep/awake, and seasonal rhythms. To find quantitative differences of expression among such various brain regions, we conducted the BrainStars (B*) project, in which we profiled the genome-wide expression of ∼50 small brain regions, including sensory centers, and centers for motion, time, memory, fear, and feeding. To avoid confounds from temporal differences in gene expression, we sampled each region every 4 hours for 24 hours, and pooled the samples for DNA-microarray assays. Therefore, we focused on spatial differences in gene expression. We used informatics to identify candidate genes with expression changes showing high or low expression in specific regions. We also identified candidate genes with stable expression across brain regions that can be used as new internal control genes, and ligand-receptor interactions of neurohormones and neurotransmitters. Through these analyses, we found 8,159 multi-state genes, 2,212 regional marker gene candidates for 44 small brain regions, 915 internal control gene candidates, and 23,864 inferred ligand-receptor interactions. We also found that these sets include well-known genes as well as novel candidate genes that might be related to specific functions in brain regions. We used our findings to develop an integrated database (http://brainstars.org/) for exploring genome-wide expression in the adult mouse brain, and have made this database openly accessible. These new resources will help accelerate the functional analysis of the mammalian brain and the elucidation of its regulatory network systems.",2011-08-12 +22829624,YAHA: fast and flexible long-read alignment with optimal breakpoint detection.,"

Motivation

With improved short-read assembly algorithms and the recent development of long-read sequencers, split mapping will soon be the preferred method for structural variant (SV) detection. Yet, current alignment tools are not well suited for this.

Results

We present YAHA, a fast and flexible hash-based aligner. YAHA is as fast and accurate as BWA-SW at finding the single best alignment per query and is dramatically faster and more sensitive than both SSAHA2 and MegaBLAST at finding all possible alignments. Unlike other aligners that report all, or one, alignment per query, or that use simple heuristics to select alignments, YAHA uses a directed acyclic graph to find the optimal set of alignments that cover a query using a biologically relevant breakpoint penalty. YAHA can also report multiple mappings per defined segment of the query. We show that YAHA detects more breakpoints in less time than BWA-SW across all SV classes, and especially excels at complex SVs comprising multiple breakpoints.

Availability

YAHA is currently supported on 64-bit Linux systems. Binaries and sample data are freely available for download from http://faculty.virginia.edu/irahall/YAHA.

Contact

imh4y@virginia.edu.",2012-07-24 +21342586,Linkage disequilibrium based genotype calling from low-coverage shotgun sequencing reads.,"

Background

Recent technology advances have enabled sequencing of individual genomes, promising to revolutionize biomedical research. However, deep sequencing remains more expensive than microarrays for performing whole-genome SNP genotyping.

Results

In this paper we introduce a new multi-locus statistical model and computationally efficient genotype calling algorithms that integrate shotgun sequencing data with linkage disequilibrium (LD) information extracted from reference population panels such as Hapmap or the 1000 genomes project. Experiments on publicly available 454, Illumina, and ABI SOLiD sequencing datasets suggest that integration of LD information results in genotype calling accuracy comparable to that of microarray platforms from sequencing data of low-coverage. A software package implementing our algorithm, released under the GNU General Public License, is available at http://dna.engr.uconn.edu/software/GeneSeq/.

Conclusions

Integration of LD information leads to significant improvements in genotype calling accuracy compared to prior LD-oblivious methods, rendering low-coverage sequencing as a viable alternative to microarrays for conducting large-scale genome-wide association studies.",2011-02-15 +22130590,An infrastructure for ontology-based information systems in biomedicine: RICORDO case study.,"

Summary

The article presents an infrastructure for supporting the semantic interoperability of biomedical resources based on the management (storing and inference-based querying) of their ontology-based annotations. This infrastructure consists of: (i) a repository to store and query ontology-based annotations; (ii) a knowledge base server with an inference engine to support the storage of and reasoning over ontologies used in the annotation of resources; (iii) a set of applications and services allowing interaction with the integrated repository and knowledge base. The infrastructure is being prototyped and developed and evaluated by the RICORDO project in support of the knowledge management of biomedical resources, including physiology and pharmacology models and associated clinical data.

Availability and implementation

The RICORDO toolkit and its source code are freely available from http://ricordo.eu/relevant-resources.

Contact

sarala@ebi.ac.uk.",2011-11-29 +23432998,SHIFT: server for hidden stops analysis in frame-shifted translation.,"

Background

Frameshift is one of the three classes of recoding. Frame-shifts lead to waste of energy, resources and activity of the biosynthetic machinery. In addition, some peptides synthesized after frame-shifts are probably cytotoxic which serve as plausible cause for innumerable number of diseases and disorders such as muscular dystrophies, lysosomal storage disorders, and cancer. Hidden stop codons occur naturally in coding sequences among all organisms. These codons are associated with the early termination of translation for incorrect reading frame selection and help to reduce the metabolic cost related to the frameshift events. Researchers have identified several consequences of hidden stop codons and their association with myriad disorders. However the wealth of information available is speckled and not effortlessly acquiescent to data-mining. To reduce this gap, this work describes an algorithmic web based tool to study hidden stops in frameshifted translation for all the lineages through respective genetic code systems.

Findings

This paper describes SHIFT, an algorithmic web application tool that provides a user-friendly interface for identifying and analyzing hidden stops in frameshifted translation of genomic sequences for all available genetic code systems. We have calculated the correlation between codon usage frequencies and the plausible contribution of codons towards hidden stops in an off-frame context. Markovian chains of various order have been used to model hidden stops in frameshifted peptides and their evolutionary association with naturally occurring hidden stops. In order to obtain reliable and persuasive estimates for the naturally occurring and predicted hidden stops statistical measures have been implemented.

Conclusions

This paper presented SHIFT, an algorithmic tool that allows user-friendly exploration, analysis, and visualization of hidden stop codons in frameshifted translations. It is expected that this web based tool would serve as a useful complement for analyzing hidden stop codons in all available genetic code systems. SHIFT is freely available for academic and research purpose at http://www.nuccore.org/shift/.",2013-02-23 +21835016,"Determination of circulating Mycobacterium tuberculosis strains and transmission patterns among pulmonary TB patients in Kawempe municipality, Uganda, using MIRU-VNTR.","

Background

Mycobacterial interspersed repetitive units - variable number of tandem repeats (MIRU-VNTR) genotyping is a powerful tool for unraveling clonally complex Mycobacterium tuberculosis (MTB) strains and detection of transmission patterns. Using MIRU-VNTR, MTB genotypes and their transmission patterns among patients with new and active pulmonary tuberculosis (PTB) in Kawempe municipality in Kampala, Uganda was determined.

Results

MIRU-VNTR genotyping was performed by PCR-amplification of 15 MTB-MIRU loci from 113 cultured specimens from 113 PTB patients (one culture sample per patient). To determine lineages, the genotypes were entered into the MIRU-VNTRplus database [http://www.miru-vntrplus.org/] as numerical codes corresponding to the number of alleles at each locus. Ten different lineages were obtained: Uganda II (40% of specimens), Uganda I (14%), LAM (6%), Delhi/CAS (3%), Haarlem (3%), Beijing (3%), Cameroon (3%), EAI (2%), TUR (2%) and S (1%). Uganda I and Uganda II were the most predominant genotypes. Genotypes for 29 isolates (26%) did not match any strain in the database and were considered unique. There was high diversity of MIRU-VNTR genotypes, with a total of 94 distinct patterns. Thirty four isolates grouped into 15 distinct clusters each with two to four isolates. Eight households had similar MTB strains for both index and contact cases, indicating possible transmission.

Conclusion

MIRU-VNTR genotyping revealed high MTB strain diversity with low clustering in Kawempe municipality. The technique has a high discriminatory power for genotyping MTB strains in Uganda.",2011-08-11 +21969735,HFOLD - A program package for calculating two-body MSSM Higgs decays at full one-loop level.,"HFOLD (Higgs Full One Loop Decays) is a Fortran program package for calculating all MSSM Higgs two-body decay widths and the corresponding branching ratios at full one-loop level. The package is done in the SUSY Parameter Analysis convention and supports the SUSY Les Houches Accord input and output format. PROGRAM SUMMARY: Program title: HFOLD Catalogue identifier: AEJG_v1_0 Program summary URL:http://cpc.cs.qub.ac.uk/summaries/AEJG_v1_0.html Program obtainable from: CPC Program Library, Queen's University, Belfast, N. Ireland Licensing provisions: Standard CPC licence, http://cpc.cs.qub.ac.uk/licence/licence.html No. of lines in distributed program, including test data, etc.: 340 621 No. of bytes in distributed program, including test data, etc.: 1 760 051 Distribution format: tar.gz Programming language: Fortran 77 Computer: Workstation, PC Operating system: Linux RAM: 524 288 000 Bytes Classification: 11.1 External routines: LoopTools 2.2 (http://www.feynarts.de/looptools/), SLHALib 2.2 (http://www.feynarts.de/slha/). The LoopTools code is included in the distribution package. Nature of problem: A future high-energy e+e- linear collider will be the best environment for the precise measurements of masses, cross sections, branching ratios, etc. Experimental accuracies are expected at the per-cent down to the per-mile level. These must be matched from the theoretical side. Therefore higher order calculations are mandatory. Solution method: This program package calculates all MSSM Higgs two-body decay widths and the corresponding branching ratios at full one-loop level. The renormalization is done in the DR scheme following the SUSY Parameter Analysis convention. The program supports the SUSY Les Houches Accord input and output format. Running time: The example provided takes only a few seconds to run.",2011-10-01 +21846735,Reconstructing the architecture of the ancestral amniote genome.,"

Motivation

The ancestor of birds and mammals lived approximately 300 million years ago. Inferring its genome organization is key to understanding the differentiated evolution of these two lineages. However, detecting traces of its chromosomal organization in its extant descendants is difficult due to the accumulation of molecular evolution since birds and mammals lineages diverged.

Results

We address several methodological issues for the detection and assembly of ancestral genomic features of ancient vertebrate genomes, which encompass adjacencies, contiguous segments, syntenies and double syntenies in the context of a whole genome duplication. Using generic, but stringent, methods for all these problems, some of them new, we analyze 15 vertebrate genomes, including 12 amniotes and 3 teleost fishes, and infer a high-resolution genome organization of the amniote ancestral genome, composed of 39 ancestral linkage groups at a resolution of 100 kb. We extensively discuss the validity and robustness of the method to variations of data and parameters. We introduce a support value for each of the groups, and show that 36 out of 39 have maximum support.

Conclusions

Single methodological principle cannot currently be used to infer the organization of the amniote ancestral genome, and we demonstrate that it is possible to gather several principles into a computational paleogenomics pipeline. This strategy offers a solid methodological base for the reconstruction of ancient vertebrate genomes.

Availability

Source code, in C++ and Python, is available at http://www.cecm.sfu.ca/~cchauve/SUPP/AMNIOTE2010/

Contact

cedric.chauve@sfu.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-16 +21955929,A physical map of Brassica oleracea shows complexity of chromosomal changes following recursive paleopolyploidizations.,"

Background

Evolution of the Brassica species has been recursively affected by polyploidy events, and comparison to their relative, Arabidopsis thaliana, provides means to explore their genomic complexity.

Results

A genome-wide physical map of a rapid-cycling strain of B. oleracea was constructed by integrating high-information-content fingerprinting (HICF) of Bacterial Artificial Chromosome (BAC) clones with hybridization to sequence-tagged probes. Using 2907 contigs of two or more BACs, we performed several lines of comparative genomic analysis. Interspecific DNA synteny is much better preserved in euchromatin than heterochromatin, showing the qualitative difference in evolution of these respective genomic domains. About 67% of contigs can be aligned to the Arabidopsis genome, with 96.5% corresponding to euchromatic regions, and 3.5% (shown to contain repetitive sequences) to pericentromeric regions. Overgo probe hybridization data showed that contigs aligned to Arabidopsis euchromatin contain ~80% of low-copy-number genes, while genes with high copy number are much more frequently associated with pericentromeric regions. We identified 39 interchromosomal breakpoints during the diversification of B. oleracea and Arabidopsis thaliana, a relatively high level of genomic change since their divergence. Comparison of the B. oleracea physical map with Arabidopsis and other available eudicot genomes showed appreciable 'shadowing' produced by more ancient polyploidies, resulting in a web of relatedness among contigs which increased genomic complexity.

Conclusions

A high-resolution genetically-anchored physical map sheds light on Brassica genome organization and advances positional cloning of specific genes, and may help to validate genome sequence assembly and alignment to chromosomes.All the physical mapping data is freely shared at a WebFPC site (http://lulu.pgml.uga.edu/fpc/WebAGCoL/brassica/WebFPC/; Temporarily password-protected: account: pgml; password: 123qwe123.",2011-09-28 +21943313,Analysing the operative experience of basic surgical trainees in Ireland using a web-based logbook.,"

Background

There is concern about the adequacy of operative exposure in surgical training programmes, in the context of changing work practices. We aimed to quantify the operative exposure of all trainees on the National Basic Surgical Training (BST) programme in Ireland and compare the results with arbitrary training targets.

Methods

Retrospective analysis of data obtained from a web-based logbook (http://www.elogbook.org) for all general surgery and orthopaedic training posts between July 2007 and June 2009.

Results

104 trainees recorded 23,918 operations between two 6-month general surgery posts. The most common general surgery operation performed was simple skin excision with trainees performing an average of 19.7 (± 9.9) over the 2-year training programme. Trainees most frequently assisted with cholecystectomy with an average of 16.0 (± 11.0) per trainee. Comparison of trainee operative experience to arbitrary training targets found that 2-38% of trainees achieved the targets for 9 emergency index operations and 24-90% of trainees achieved the targets for 8 index elective operations. 72 trainees also completed a 6-month post in orthopaedics and recorded 7,551 operations. The most common orthopaedic operation that trainees performed was removal of metal, with an average of 2.90 (± 3.27) per trainee. The most common orthopaedic operation that trainees assisted with was total hip replacement, with an average of 10.46 (± 6.21) per trainee.

Conclusions

A centralised web-based logbook provides valuable data to analyse training programme performance. Analysis of logbooks raises concerns about operative experience at junior trainee level. The provision of adequate operative exposure for trainees should be a key performance indicator for training programmes.",2011-09-25 +23052038,Navigating the unexplored seascape of pre-miRNA candidates in single-genome approaches.,"

Motivation

The computational search for novel microRNA (miRNA) precursors often involves some sort of structural analysis with the aim of identifying which type of structures are prone to being recognized and processed by the cellular miRNA-maturation machinery. A natural way to tackle this problem is to perform clustering over the candidate structures along with known miRNA precursor structures. Mixed clusters allow then the identification of candidates that are similar to known precursors. Given the large number of pre-miRNA candidates that can be identified in single-genome approaches, even after applying several filters for precursor robustness and stability, a conventional structural clustering approach is unfeasible.

Results

We propose a method to represent candidate structures in a feature space, which summarizes key sequence/structure characteristics of each candidate. We demonstrate that proximity in this feature space is related to sequence/structure similarity, and we select candidates that have a high similarity to known precursors. Additional filtering steps are then applied to further reduce the number of candidates to those with greater transcriptional potential. Our method is compared with another single-genome method (TripletSVM) in two datasets, showing better performance in one and comparable performance in the other, for larger training sets. Additionally, we show that our approach allows for a better interpretation of the results.

Availability and implementation

The MinDist method is implemented using Perl scripts and is freely available at http://www.cravela.org/?mindist=1.

Contact

backofen@informatik.uni-freiburg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-10-10 +21949271,Pybedtools: a flexible Python library for manipulating genomic datasets and annotations.,"

Summary

pybedtools is a flexible Python software library for manipulating and exploring genomic datasets in many common formats. It provides an intuitive Python interface that extends upon the popular BEDTools genome arithmetic tools. The library is well documented and efficient, and allows researchers to quickly develop simple, yet powerful scripts that enable complex genomic analyses.

Availability

pybedtools is maintained under the GPL license. Stable versions of pybedtools as well as documentation are available on the Python Package Index at http://pypi.python.org/pypi/pybedtools.

Contact

dalerr@niddk.nih.gov; arq5x@virginia.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-23 +23298385,Correlation of microRNA-372 upregulation with poor prognosis in human glioma.,"MicroRNA-372 (miR-372) acts as either an oncogenic miRNA or an anti-oncomiR in various human malignancies. However, its roles in gliomas have not been elucidated. To address this problem, we here detected miR-372 expression in human gliomas and non-neoplastic brain tissues by real-time quantitative RT-PCR assay. The association of miR-372 expression with clinicopathological factors or prognosis of glioma patients was also statistically analyzed. As the results, miR-372 expression levels were significantly upregulated in glioma tissues compared to the corresponding non-neoplastic brain tissues (P<0.001). In addition, the high miR-372 expression was significantly associated with the advanced pathological grade (P=0.008) and the low Karnofsky performance score (KPS) of glioma patients (P=0.01). Moreover, the overall survival of patients with high miR-372 expression was dramatically shorter than those with low miR-372 expression (P<0.001). Furthermore, multivariate Cox regression analysis indicated that miR-372 expression was an independent prognostic factor for glioma patients (P=0.008). More importantly, subgroup analyses according to tumor pathological grade revealed that the cumulative overall survival of glioma patients with advanced pathological grades was significantly worse for high miR-372 expression group than for low miR-372 expression group (P<0.001), but no significant difference was found for patients with low pathological grades (P=0.08). Taken together, these data offer the convincing evidence for the first time that miR-372 may act as an oncogenic miRNA in gliomas and represent a potential regulator of aggressive development and a candidate prognostic marker for this malignancy, especially for advanced tumors with high pathological grades.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1707761328850011.",2013-01-08 +23303794,iRSpot-PseDNC: identify recombination spots with pseudo dinucleotide composition.,"Meiotic recombination is an important biological process. As a main driving force of evolution, recombination provides natural new combinations of genetic variations. Rather than randomly occurring across a genome, meiotic recombination takes place in some genomic regions (the so-called 'hotspots') with higher frequencies, and in the other regions (the so-called 'coldspots') with lower frequencies. Therefore, the information of the hotspots and coldspots would provide useful insights for in-depth studying of the mechanism of recombination and the genome evolution process as well. So far, the recombination regions have been mainly determined by experiments, which are both expensive and time-consuming. With the avalanche of genome sequences generated in the postgenomic age, it is highly desired to develop automated methods for rapidly and effectively identifying the recombination regions. In this study, a predictor, called 'iRSpot-PseDNC', was developed for identifying the recombination hotspots and coldspots. In the new predictor, the samples of DNA sequences are formulated by a novel feature vector, the so-called 'pseudo dinucleotide composition' (PseDNC), into which six local DNA structural properties, i.e. three angular parameters (twist, tilt and roll) and three translational parameters (shift, slide and rise), are incorporated. It was observed by the rigorous jackknife test that the overall success rate achieved by iRSpot-PseDNC was >82% in identifying recombination spots in Saccharomyces cerevisiae, indicating the new predictor is promising or at least may become a complementary tool to the existing methods in this area. Although the benchmark data set used to train and test the current method was from S. cerevisiae, the basic approaches can also be extended to deal with all the other genomes. Particularly, it has not escaped our notice that the PseDNC approach can be also used to study many other DNA-related problems. As a user-friendly web-server, iRSpot-PseDNC is freely accessible at http://lin.uestc.edu.cn/server/iRSpot-PseDNC.",2013-01-08 +22812021,"Health, United States, 2011: With Special Feature on Socioeconomic Status and Health","Health, United States, 2011 is the 35th report on the health status of the Nation and is submitted by the Secretary of the Department of Health and Human Services to the President and the Congress of the United States in compliance with Section 308 of the Public Health Service Act. This report was compiled by the Centers for Disease Control and Prevention’s (CDC) National Center for Health Statistics (NCHS). The National Committee on Vital and Health Statistics served in a review capacity. The Health, United States series presents an annual look at national trends in health statistics. The report contains a Chartbook that assesses the Nation’s health by presenting trends and current information on selected measures of morbidity, mortality, health care utilization, health risk factors, prevention, health insurance, and personal health care expenditures. This year’s Chartbook includes a Special Feature on Socioeconomic Status and Health. The report also contains 151 Trend Tables organized around four major subject areas: health status and determinants, health care utilization, health care resources, and health care expenditures. A companion product to Health, United States—Health, United States: In Brief—features information extracted from the full report. The complete report, In Brief, and related data products are available on the Health, United States website at: http://www.cdc.gov/nchs/hus.htm.",2012-07-20 +23531395,Analysis of HAX-1 gene expression in esophageal squamous cell carcinoma.,"

Objective

To explore the expression of HAX-1 mRNA and protein in esophageal squamous cell carcinoma (ESCC) and its relation with the prognosis of patients with ESCC.

Methods

The expression of HAX-1 mRNA and protein were detected with quantitative real-time RT-PCR and immunohistochemical method in 112 ESCC samples and 112 corresponding non-neoplastic samples. Survival curves were made with follow-up data. The relations of the prognosis with clinical and pathological characteristics were analyzed.

Results

The expression level of HAX-1 mRNA and the strong positive rate of HAX-1 protein were significantly higher in ESCC samples (0.527 ± 0.060 and 45.54%) than that in non-neoplastic samples (0.121 ± 0.017 and 0.00%), and in ESCC samples with lymph node metastasis (0.554 ± 0.054 and 71.11%) than that in ESCC samples without lymph node metastasis (0.509 ± 0.058 and 28.36%) (all P < 0.01). HAX-1 mRNA expression level was a risk factor of lymph node metastasis in patients with ESCC (P = 0.000). There were significant differences in survival curves between lymph node metastatic group and non-metastatic group (P = 0.000), and among groups of HAX-1 protein expression +, ++and +++(,P = 0.000); but no statistical significance between male patients and female patients (P = 0.119), and between ≥60 years old patients and <60 years old patients (P = 0.705). The level of HAX-1 mRNA (P = 0.000) and protein (P = 0.005) were risk factors of survival, but lymph node metastasis (P = 0.477) was not.

Conclusion

There is HAX-1 over-expression in ESCC tissue and HAX-1 mRNA level is a risk factor of lymph node metastasis. The level of HAX-1 mRNA and protein were risk factors of survival in patients with ESCC. HAX-1 may be a novel therapeutic target for ESCC treatment.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/5130393079296037.",2013-03-25 +21462344,Dynamics of post-translational modifications and protein stability in the stroma of Chlamydomonas reinhardtii chloroplasts.,"The proteome of any system is a dynamic entity dependent on the intracellular concentration of the entire set of expressed proteins. In turn, this whole protein concentration will be reliant on the stability/turnover of each protein as dictated by their relative rates of synthesis and degradation. In this study, we have investigated the dynamics of the stromal proteome in the model organism Chlamydomonas reinhardtii by characterizing the half-life of the whole set of proteins. 2-DE stromal proteins profiling was set up and coupled with MS analyses. These identifications featuring an average of 26% sequence coverage and eight non-redundant peptides per protein have been obtained for 600 independent samples related to 253 distinct spots. An interactive map of the global stromal proteome, of 274 distinct protein variants is now available on-line at http://www.isv.cnrs-gif.fr/gel2dv2/. N-α-terminal-Acetylation (NTA) was noticed to be the most frequently detectable post-translational modification, and new experimental data related to the chloroplastic transit peptide cleavage site was obtained. Using this data set supplemented with series of pulse-chase experiments, elements directing the relationship between half-life and N-termini were analyzed. Positive correlation between NTA and protein half-life suggests that NTA could contribute to protein stabilization in the stroma.",2011-04-04 +22889043,Multifocal Langerhans cell sarcoma involving epidermis: a case report and review.,"

Objective

To study the clinico-pathological characteristics of Langerhans cell sarcoma (LCS) which involving epidermis.

Methods

A case of primary multifocal LCS was analyzed in histopathology and immunophenotype.

Results

A 41-year-old man with multifocal cutaneous LCS involving the inguina and waist was reported. Clinical and pathology data were available. Neoplastic cells with markedly malignant cytological features were observed. Tumor cells exhibited irregular shape with abundant and eosinophilic red staining cytoplasm; large, irregular-shaped, showing lobulated or dented nucleus and some cells with a longitudinal nuclear groove and prominent nucleoli. The tumor cells expressed CD1a, Langerin (CD207), S-100 protein, CD68 and vimentin, and did not express pan-T or B cell markers and epithelial markers. The patient died less than 1 year after diagnosis due to local recurrence and metastasis to the lung, despite the administration of local radiation and chemotherapy.

Conclusions

LCS is a tumor with markedly malignant cytological features that originates from Langerhans cells. Primary multifocal neoplasms involving epidermis is even rare. Accurate diagnosis is based on the histopathological and immunohistochemical of the tumor cells.

Virtual slide

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1182345104754765.",2012-08-14 +21676910,Comparative transcriptome and network biology analyses demonstrate antiproliferative and hyperapoptotic phenotypes in human keratoconus corneas.,"

Purpose

To decipher the biological pathways involved in keratoconus pathophysiology by determining the patterns of differential gene expression between keratoconus and control corneas.

Methods

RNA was extracted from surgically removed corneas of 10 keratoconus patients and from normal corneas of 10 control patients who had undergone enucleation of an eye for ocular melanoma. Several hundred thousand RNA transcripts were assessed using exon microarrays. Statistical comparison and identification of differentially regulated and differentially spliced RNA transcripts was performed by comparing keratoconus cases and controls. In addition, relevant biological pathways were identified by information extraction using network biology.

Results

Eighty-seven genes showed significant differences in expression levels. Among these, 69 were downregulated in keratoconus patients, particularly partners of the transcription factor AP-1. The 18 overexpressed genes include mucins, keratins, and genes involved in fibroblast proliferation. In addition, 36 genes were shown to be differentially spliced, including 9 among those that were differentially expressed. Network biology and analysis using Gene Ontology descriptors suggest that many members of both groups belong to pathways of apoptosis and regulation of the balance between cellular differentiation and proliferation.

Conclusions

This work constitutes the first genome-wide transcriptome analysis of keratoconus patient corneas that include all currently known genes and exons. Differential expression suggests that mechanisms of cell loss resulting from antiproliferative and hyperapoptotic phenotypes may be responsible for the pathogenesis of keratoconus. Array information, experimental design, raw intensities, and processed log(2) ratios were deposited at the European Bioinformatic Institute's ArrayExpress database (http://www.ebi.ac.uk/arrayexpress/). The accession number is E-MEXP-2777.",2011-08-03 +23175758,A Lasso multi-marker mixed model for association mapping with population structure correction.,"

Motivation

Exploring the genetic basis of heritable traits remains one of the central challenges in biomedical research. In traits with simple Mendelian architectures, single polymorphic loci explain a significant fraction of the phenotypic variability. However, many traits of interest seem to be subject to multifactorial control by groups of genetic loci. Accurate detection of such multivariate associations is non-trivial and often compromised by limited statistical power. At the same time, confounding influences, such as population structure, cause spurious association signals that result in false-positive findings.

Results

We propose linear mixed models LMM-Lasso, a mixed model that allows for both multi-locus mapping and correction for confounding effects. Our approach is simple and free of tuning parameters; it effectively controls for population structure and scales to genome-wide datasets. LMM-Lasso simultaneously discovers likely causal variants and allows for multi-marker-based phenotype prediction from genotype. We demonstrate the practical use of LMM-Lasso in genome-wide association studies in Arabidopsis thaliana and linkage mapping in mouse, where our method achieves significantly more accurate phenotype prediction for 91% of the considered phenotypes. At the same time, our model dissects the phenotypic variability into components that result from individual single nucleotide polymorphism effects and population structure. Enrichment of known candidate genes suggests that the individual associations retrieved by LMM-Lasso are likely to be genuine.

Availability

Code available under http://webdav.tuebingen. mpg.de/u/karsten/Forschung/research.html.

Contact

rakitsch@tuebingen.mpg.de, ippert@microsoft.com or stegle@ebi.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-11-22 +21976864,VSDK: Virtual screening of small molecules using AutoDock Vina on Windows platform.,"

Unlabelled

Screening of ligand molecules to target proteins using computer-aided docking is a critical step in rational drug discovery. Based on this circumstance, we attempted to develop a virtual screening application system, named VSDK Virtual Screening by Docking, which can function under the Windows platform. This is a user-friendly, flexible, and versatile tool which can be used by users who are familiar with Windows OS. The virtual screening performance was tested for an arbitrarily-selected receptor, FGFR tyrosine kinase (pdb code: 1agw), by using ligands downloaded from ZINC database with its grid size of x,y,z = 30,30,30 and run number of 10. It took 90 minutes for 100 molecules for this virtual screening. VSDK is freely available at the designated URL, and a simplified manual can be downloaded from VSDK home page. This tool will have a more challenging scope and achievement as the computer speed and accuracy are increased and secured in the future.

Availability

The database is available for free at http://www.pharm.kobegakuin.ac.jp/˜akaho/english_top.html.",2011-08-02 +21904429,Mycobacteriophage genome database.,

Unlabelled

Mycobacteriophage genome database (MGDB) is an exclusive repository of the 64 completely sequenced mycobacteriophages with annotated information. It is a comprehensive compilation of the various gene parameters captured from several databases pooled together to empower mycobacteriophage researchers. The MGDB (Version No.1.0) comprises of 6086 genes from 64 mycobacteriophages classified into 72 families based on ACLAME database. Manual curation was aided by information available from public databases which was enriched further by analysis. Its web interface allows browsing as well as querying the classification. The main objective is to collect and organize the complexity inherent to mycobacteriophage protein classification in a rational way. The other objective is to browse the existing and new genomes and describe their functional annotation.

Availability

The database is available for free at http://mpgdb.ibioinformatics.org/mpgdb.php.,2011-08-02 +21904427,RegStatGel: proteomic software for identifying differentially expressed proteins based on 2D gel images.,"

Unlabelled

Image analysis of two-dimensional gel electrophoresis is a key step in proteomic workflow for identifying proteins that change under different experimental conditions. Since there are usually large amount of proteins and variations shown in the gel images, the use of software for analysis of 2D gel images is inevitable. We developed open-source software with graphical user interface for differential analysis of 2D gel images. The user-friendly software, RegStatGel, contains fully automated as well as interactive procedures. It was developed and has been tested under Matlab 7.01.

Availability

The database is available for free at http://www.mediafire.com/FengLi/2DGelsoftware.",2011-08-02 +21372087,GBOOST: a GPU-based tool for detecting gene-gene interactions in genome-wide case control studies.,"

Motivation

Collecting millions of genetic variations is feasible with the advanced genotyping technology. With a huge amount of genetic variations data in hand, developing efficient algorithms to carry out the gene-gene interaction analysis in a timely manner has become one of the key problems in genome-wide association studies (GWAS). Boolean operation-based screening and testing (BOOST), a recent work in GWAS, completes gene-gene interaction analysis in 2.5 days on a desktop computer. Compared with central processing units (CPUs), graphic processing units (GPUs) are highly parallel hardware and provide massive computing resources. We are, therefore, motivated to use GPUs to further speed up the analysis of gene-gene interactions.

Results

We implement the BOOST method based on a GPU framework and name it GBOOST. GBOOST achieves a 40-fold speedup compared with BOOST. It completes the analysis of Wellcome Trust Case Control Consortium Type 2 Diabetes (WTCCC T2D) genome data within 1.34 h on a desktop computer equipped with Nvidia GeForce GTX 285 display card.

Availability

GBOOST code is available at http://bioinformatics.ust.hk/BOOST.html#GBOOST.",2011-03-03 +30732092,First Report of Myrothecium Leaf Spot of Hemionitis arifolia Caused by Myrothecium roridum in China.,"Hemionitis arifolia (Burm.) Moore. was grown commercially as an ornamental plant in China. In January 2010, a new foliar disease with typical leaf spot symptoms was observed on H. arifolia in Dongguan City, Guangdong Province. Approximately 10% of the plants in the Dongguan nursery were affected. Leaf spots were circular to subcircular, dark brown, with distinct concentric zones, and ranged from 10 to 20 mm in diameter. Lesions developed mostly on the lower leaves and black sporodochia with white mycelial tufts were produced mostly in older lesions under high humidity. Single-spore isolates from lesions plated on potato dextrose agar (PDA) produced white, floccose colonies and dark green-to-black sporodochia. Colonies reached 60 mm on PDA at 25°C after 14 days. Conidiophores branched repeatedly. Conidiogenous cells in whorls of two to six on ultimate branches were hyaline, cylindrical, and 13 to 20 × 1.2 to 1.8 μm. Conidia were hyaline, cylindrical, mostly with both rounded ends, occasionally one blunt end, and mean size was 6.1 (4.5 to 7.0) × 2.3 (1.8 to 3.0) μm. These characteristics were consistent with the descriptions of Myrothecium roridum Tode ex Fr. (2-4). The internal transcribed spacer regions including ITS1, ITS2, and the 5.8S rRNA of one isolate were PCR amplified and sequenced. A BLAST search in GenBank revealed highest similarity (99%) to M. roridum sequences from isolates collected from Germany (Accession Nos. AJ302001.1 and AJ301995.1). The M. roridum sequence from the Chinese isolate was submitted to GenBank (Accession No. JF343832). To confirm pathogenicity, five leaves on five H. arifolia plants were inoculated with 5-mm mycelial plugs from the periphery of 7-day-old cultures; sterile PDA plugs were used as the control treatment. Plants were covered with plastic bags and incubated in a growth chamber at 28°C. Necrotic lesions appeared 2 to 3 days after inoculation and the symptoms described above were observed after 7 days on all inoculated leaves, whereas sterile PDA plugs did not produce any leaf lesion. The pathogen was reisolated from the inoculated leaves and confirmed to be M. roridum on the basis of morphological characteristics. There are approximately 271 hosts of M. roridum (1), including many ornamental plants such as salvia (2) and garden hydrangea (3). To our knowledge, this is the first report of Myrothecium leaf spot on H. arifolia. Because the disease caused damage to the foliage and reduced the ornamental value of H. arifolia plants, control measures may need to be implemented for production of this species in ornamental nurseries. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrived from http://nt.ars-grin.gov/fungaldatabases/ , 6 February 2011. (2) J. A. Mangandi et al. Plant Dis. 91:772, 2007. (3) M. T. Mmbaga et al. Plant Dis. 94:1266, 2010. (4) M. Tulloch. Mycol. Pap. No. 130. CMI, Wallingford, UK, 1972.",2011-08-01 +21887657,rRNASelector: a computer program for selecting ribosomal RNA encoding sequences from metagenomic and metatranscriptomic shotgun libraries.,"Metagenomic and metatranscriptomic shotgun sequencing techniques are gaining popularity as more cost-effective next-generation sequencing technologies become commercially available. The initial stage of bioinfor-matic analysis generally involves the identification of phylogenetic markers such as ribosomal RNA genes. The sequencing reads that do not code for rRNA can then be used for protein-based analysis. Hidden Markov model is a well-known method for pattern recognition. Hidden Markov models that are trained on well-curated rRNA sequence databases have been successfully used to identify DNA sequence coding for rRNAs in pro-karyotes. Here, we introduce rRNASelector, which is a computer program for selecting rRNA genes from massive metagenomic and metatranscriptomic sequences using hidden Markov models. The program successfully identified prokaryotic 5S, 26S, and 23S rRNA genes from Roche 454 FLX Titanium-based metagenomic and metatranscriptomic libraries. The rRNASelector program is available at http://sw.ezbiocloud.net/rrnaselector .",2011-08-01 +30732075,First Report of Anthracnose Caused by Colletotrichum navitas on Switchgrass in New York.,"Switchgrass (Panicum virgatum L.) is a perennial grass with significant potential as a biofuel crop. From 2007 to 2010, foliar lesions were observed in new and mature stands of switchgrass in various locations in New York. Lesions were elliptical with purple margins and white necrotic centers, generally <3 cm long, ~1 mm wide, often coalesced, and containing black setae. Upon incubation, symptomatic leaf tissue developed acervuli with masses of salmon-colored spores. The fungus was identified as Colletotrichum nativas Crouch on the basis of typical cultural characteristics and conidial morphology (1). Conidia were one-celled, hyaline, fusiform, and generally falcate. Conidial length averaged 40 μm (22 to 47 μm) and width averaged 5 μm (4 to 7 μm). Compared with other graminicolous species of Colletotrichum, the conidia were larger and varied from straight to irregularly bent. Sequences of the rDNA internal transcribed spacer (ITS) regions of three isolates (Cornell accession and corresponding GenBank Nos.: Cn071NY08 (from a >20-year-old naturalized stand of switchgrass in Steuben County), JF437053; Cn080NY08 (from 'Pathfinder' in Chemung County), JF437054; and Cn101NY09 (from 'Blackwell' in Chemung County), JF437055) exhibited 100% nucleotide identity to the type isolate of C. nativas (GenBank No. GQ919068) collected from switchgrass selection 'Brooklyn' in New Jersey (1). Pathogenicity of the sequenced isolates along with seven other isolates (Cn105NY09 from 'Sunburst' in Tompkins County; Cn107NY09 from 'Trailblazer' in Tompkins County; Cn109NY09 from 'Forestburg' in Tompkins County; Cn111NY09 and Cn112NY09 from 'Shelter' in Tompkins County; and Cn122NY09 and Cn123NY09 from 'Cave-in-Rock' in Genesee County) was evaluated in greenhouse experiments. Seven- to eight-week-old switchgrass plants were inoculated with conidial suspensions (1 × 106 conidia/ml) of C. nativas. Inoculum or sterilized water was sprayed until runoff. Three plants of each of 'Cave-in-Rock' and 'Kanlow' were sprayed per treatment and the experiment was repeated for 3 of the 10 isolates. Inoculated plants were placed in a mist chamber for 48 h before they were returned to the greenhouse and observed for disease development, which occurred within 1 week of inoculation for both cultivars. No symptoms developed on the control plants. Foliar lesions closely resembled those observed in the field. C. nativas was consistently reisolated from symptomatic tissue collected from greenhouse experiments. Switchgrass anthracnose associated with C. graminicola sensu lata has been reported in many U.S. states (2). On the basis of molecular phylogenetics and distinguishing morphological characters, Crouch et al. erected C. navitas as a novel species distinct from C. graminicola sensu stricto, a taxon restricted to the corn anthracnose pathogen (1). C. nativas was first documented on switchgrass in New Jersey (1) and appears to be the same pathogen causing anthracnose of switchgrass in the adjoining state of Pennsylvania (1,3). To our knowledge, this is the first report of C. nativas causing anthracnose of switchgrass in New York. References: (1) J. A. Crouch et al. Mycol. Res. 113:1411, 2009. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , May 5, 2011. (3) M. A. Sanderson et al. Agron. J. 100:510, 2008.",2011-08-01 +21520341,dbNSFP: a lightweight database of human nonsynonymous SNPs and their functional predictions.,"With the advance of sequencing technologies, whole exome sequencing has increasingly been used to identify mutations that cause human diseases, especially rare Mendelian diseases. Among the analysis steps, functional prediction (of being deleterious) plays an important role in filtering or prioritizing nonsynonymous SNP (NS) for further analysis. Unfortunately, different prediction algorithms use different information and each has its own strength and weakness. It has been suggested that investigators should use predictions from multiple algorithms instead of relying on a single one. However, querying predictions from different databases/Web-servers for different algorithms is both tedious and time consuming, especially when dealing with a huge number of NSs identified by exome sequencing. To facilitate the process, we developed dbNSFP (database for nonsynonymous SNPs' functional predictions). It compiles prediction scores from four new and popular algorithms (SIFT, Polyphen2, LRT, and MutationTaster), along with a conservation score (PhyloP) and other related information, for every potential NS in the human genome (a total of 75,931,005). It is the first integrated database of functional predictions from multiple algorithms for the comprehensive collection of human NSs. dbNSFP is freely available for download at http://sites.google.com/site/jpopgen/dbNSFP.",2011-08-01 +21774864,Novel sequence types (STs) of Staphylococcus aureus isolates causing clinical and subclinical mastitis in flocks of sheep in the northeast of Brazil.,"Staphylococcus aureus is one of the most important infectious mastitis causative agents in small ruminants. In order to know the distribution of Staph. aureus strains associated with infectious mastitis in flocks of sheep in the northeast of Brazil and establish whether these clones are related to the strains distributed internationally, this study analysed the genetic diversity of Staph. aureus isolates from cases of clinical and subclinical mastitis in ewes by pulsed-field gel electrophoresis (PFGE) and multilocus sequence typing (MLST). In this research, 135 ewes with mastitis from 31 sheep flocks distributed in 15 districts were examined. Staph. aureus was isolated from sheep milk in 9 (29%) out of 31 herds located in 47% of the districts surveyed. MLST analysis allowed the identification of four STs (ST750, ST1728, ST1729 and ST1730). The last three with their respective novel alleles (glp-220; pta-182 and yqil-180) were recently reported in the Staph. aureus MLST database (http://www.mlst.net). Each novel allele showed only a nucleotide different from those already described. The occurrence of CC133 (ST750 and ST1729) in this study is in agreement with other reports that only a few clones of Staph. aureus seem to be responsible for most cases of mastitis in dairy farms and that some of these clones may have broad geographic distribution. However, the prevalence of CC5 (ST1728 and ST1730)--an important group related to cases of colonization or infection in humans--differs from previous studies by its widespread occurrence and may suggest human contamination followed by selective pressures of the allelic diversifications presented for these STs.",2011-08-01 +21447175,A 44K microarray dataset of the changing transcriptome in developing Atlantic salmon (Salmo salar L.).,"

Background

Atlantic salmon (Salmo salar L.) is an environmentally and economically important organism and its gene content is reasonably well characterized. From a transcriptional standpoint, it is important to characterize the changes in gene expression over the course of unperturbed early development, from fertilization through to the parr stage.

Findings

S. salar samples were taken at 17 time points from 2 to 89 days post fertilization. Total RNA was extracted and cRNA was synthesized and hybridized to a newly developed 44K oligo salmonid microarray platform. Quantified results were subjected to preliminary data analysis and submitted to NCBI's Gene Expression Omnibus (GEO). Data can be found under the GEO accession number GSE25938. http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE25938

Conclusions

Throughout the entire period of development, several thousand genes were found to be differentially regulated. This work represents the trancriptional characterization of a very large geneset that will be extremely valuable in further examination of the transcriptional changes in Atlantic salmon during the first few months of development. The expression profiles can help to annotate salmon genes in addition to being used as references against any number of experimental variables to which developing salmonids might be subjected.",2011-03-29 +22796635,Nutrilyzer: a tool for deciphering atomic stoichiometry of differentially expressed paralogous proteins.,"Organisms try to maintain homeostasis by balanced uptake of nutrients from their environment. From an atomic perspective this means that, for example, carbon:nitrogen:sulfur ratios are kept within given limits. Upon limitation of, for example, sulfur, its acquisition is triggered. For yeast it was shown that transporters and enzymes involved in sulfur uptake are encoded as paralogous genes that express different isoforms. Sulfur deprivation leads to up-regulation of isoforms that are poor in sulfur-containing amino acids, that is, methinone and cysteine. Accordingly, sulfur-rich isoforms are down-regulated. We developed a web-based software, doped Nutrilyzer, that extracts paralogous protein coding sequences from an annotated genome sequence and evaluates their atomic composition. When fed with gene-expression data for nutrient limited and normal conditions, Nutrilyzer provides a list of genes that are significantly differently expressed and simultaneously contain significantly different amounts of the limited nutrient in their atomic composition. Its intended use is in the field of ecological stoichiometry. Nutrilyzer is available at http://nutrilyzer.hs-mittweida.de. Here we describe the work flow and results with an example from a whole-genome Arabidopsis thaliana gene-expression analysis upon oxygen deprivation. 43 paralogs distributed over 37 homology clusters were found to be significantly differently expressed while containing significantly different amounts of oxygen.",2012-07-16 +21685048,A tree-based approach for motif discovery and sequence classification.,"

Motivation

Pattern discovery algorithms are widely used for the analysis of DNA and protein sequences. Most algorithms have been designed to find overrepresented motifs in sparse datasets of long sequences, and ignore most positional information. We introduce an algorithm optimized to exploit spatial information in sparse-but-populous datasets.

Results

Our algorithm Tree-based Weighted-Position Pattern Discovery and Classification (T-WPPDC) supports both unsupervised pattern discovery and supervised sequence classification. It identifies positionally enriched patterns using the Kullback-Leibler distance between foreground and background sequences at each position. This spatial information is used to discover positionally important patterns. T-WPPDC then uses a scoring function to discriminate different biological classes. We validated T-WPPDC on an important biological problem: prediction of single nucleotide polymorphisms (SNPs) from flanking sequence. We evaluated 672 separate experiments on 120 datasets derived from multiple species. T-WPPDC outperformed other pattern discovery methods and was comparable to the supervised machine learning algorithms. The algorithm is computationally efficient and largely insensitive to dataset size. It allows arbitrary parameterization and is embarrassingly parallelizable.

Conclusions

T-WPPDC is a minimally parameterized algorithm for both pattern discovery and sequence classification that directly incorporates positional information. We use it to confirm the predictability of SNPs from flanking sequence, and show that positional information is a key to this biological problem.

Availability

The algorithm, code and data are available at: http://www.cs.utoronto.ca/~juris/data/TWPPDC",2011-06-17 +22500001,"optiCall: a robust genotype-calling algorithm for rare, low-frequency and common variants.","

Motivation

Existing microarray genotype-calling algorithms adopt either SNP-by-SNP (SNP-wise) or sample-by-sample (sample-wise) approaches to calling. We have developed a novel genotype-calling algorithm for the Illumina platform, optiCall, that uses both SNP-wise and sample-wise calling to more accurately ascertain genotypes at rare, low-frequency and common variants.

Results

Using data from 4537 individuals from the 1958 British Birth Cohort genotyped on the Immunochip, we estimate the proportion of SNPs lost to downstream analysis due to false quality control failures, and rare variants misclassified as monomorphic, is only 1.38% with optiCall, in comparison to 3.87, 7.85 and 4.09% for Illuminus, GenoSNP and GenCall, respectively. We show that optiCall accurately captures rare variants and can correctly account for SNPs where probe intensity clouds are shifted from their expected positions.

Availability and implementation

optiCall is implemented in C++ for use on UNIX operating systems and is available for download at http://www.sanger.ac.uk/resources/software/opticall/.",2012-04-12 +23162085,Bridging the scales: semantic integration of quantitative SBML in graphical multi-cellular models and simulations with EPISIM and COPASI.,"

Motivation

Biological reality can in silico only be comprehensively represented in multi-scaled models. To this end, cell behavioural models addressing the multi-cellular level have to be semantically linked with mechanistic molecular models. These requirements have to be met by flexible software workflows solving the issues of different time scales, inter-model variable referencing and flexible sub-model embedding.

Results

We developed a novel software workflow (EPISIM) for the semantic integration of Systems Biology Markup Language (SBML)-based quantitative models in multi-scaled tissue models and simulations. This workflow allows to import and access SBML-based models. SBML model species, reactions and parameters are semantically integrated in cell behavioural models (CBM) represented by graphical process diagrams. By this, cellular states like proliferation and differentiation can be flexibly linked to gene-regulatory or biochemical reaction networks. For a multi-scale agent-based tissue simulation executable code is automatically generated where different time scales of imported SBML models and CBM have been mapped. We demonstrate the capabilities of the novel software workflow by integrating Tyson's cell cycle model in our model of human epidermal tissue homeostasis. Finally, we show the semantic interplay of the different biological scales during tissue simulation.

Availability

The EPISIM platform is available as binary executables for Windows, Linux and Mac OS X at http://www.tiga.uni-hd.de. Supplementary data are available at http://www.tiga.uni-hd.de/supplements/SemSBMLIntegration.html.

Contact

niels.grabe@bioquant.uni-heidelberg.de.",2012-11-18 +22695797,Monte Carlo simulations of peptide-membrane interactions with the MCPep web server.,"The MCPep server (http://bental.tau.ac.il/MCPep/) is designed for non-experts wishing to perform Monte Carlo (MC) simulations of helical peptides in association with lipid membranes. MCPep is a web implementation of a previously developed MC simulation model. The model has been tested on a variety of peptides and protein fragments. The simulations successfully reproduced available empirical data and provided new molecular insights, such as the preferred locations of peptides in the membrane and the contribution of individual amino acids to membrane association. MCPep simulates the peptide in the aqueous phase and membrane environments, both described implicitly. In the former, the peptide is subjected solely to internal conformational changes, and in the latter, each MC cycle includes additional external rigid body rotational and translational motions to allow the peptide to change its location in the membrane. The server can explore the interaction of helical peptides of any amino-acid composition with membranes of various lipid compositions. Given the peptide's sequence or structure and the natural width and surface charge of the membrane, MCPep reports the main determinants of peptide-membrane interactions, e.g. average location and orientation in the membrane, free energy of membrane association and the peptide's helical content. Snapshots of example simulations are also provided.",2012-06-13 +21989112,"FluReF, an automated flu virus reassortment finder based on phylogenetic trees.","

Background

Reassortments are events in the evolution of the genome of influenza (flu), whereby segments of the genome are exchanged between different strains. As reassortments have been implicated in major human pandemics of the last century, their identification has become a health priority. While such identification can be done ""by hand"" on a small dataset, researchers and health authorities are building up enormous databases of genomic sequences for every flu strain, so that it is imperative to develop automated identification methods. However, current methods are limited to pairwise segment comparisons.

Results

We present FluReF, a fully automated flu virus reassortment finder. FluReF is inspired by the visual approach to reassortment identification and uses the reconstructed phylogenetic trees of the individual segments and of the full genome. We also present a simple flu evolution simulator, based on the current, source-sink, hypothesis for flu cycles. On synthetic datasets produced by our simulator, FluReF, tuned for a 0% false positive rate, yielded false negative rates of less than 10%. FluReF corroborated two new reassortments identified by visual analysis of 75 Human H3N2 New York flu strains from 2005-2008 and gave partial verification of reassortments found using another bioinformatics method.

Methods

FluReF finds reassortments by a bottom-up search of the full-genome and segment-based phylogenetic trees for candidate clades--groups of one or more sampled viruses that are separated from the other variants from the same season. Candidate clades in each tree are tested to guarantee confidence values, using the lengths of key edges as well as other tree parameters; clades with reassortments must have validated incongruencies among segment trees.

Conclusions

FluReF demonstrates robustness of prediction for geographically and temporally expanded datasets, and is not limited to finding reassortments with previously collected sequences. The complete source code is available from http://lcbb.epfl.ch/software.html.",2011-07-27 +22199392,ART: a next-generation sequencing read simulator.,"

Unlabelled

ART is a set of simulation tools that generate synthetic next-generation sequencing reads. This functionality is essential for testing and benchmarking tools for next-generation sequencing data analysis including read alignment, de novo assembly and genetic variation discovery. ART generates simulated sequencing reads by emulating the sequencing process with built-in, technology-specific read error models and base quality value profiles parameterized empirically in large sequencing datasets. We currently support all three major commercial next-generation sequencing platforms: Roche's 454, Illumina's Solexa and Applied Biosystems' SOLiD. ART also allows the flexibility to use customized read error model parameters and quality profiles.

Availability

Both source and binary software packages are available at http://www.niehs.nih.gov/research/resources/software/art.",2011-12-23 +23014630,integIRTy: a method to identify genes altered in cancer by accounting for multiple mechanisms of regulation using item response theory.,"

Motivation

Identifying genes altered in cancer plays a crucial role in both understanding the mechanism of carcinogenesis and developing novel therapeutics. It is known that there are various mechanisms of regulation that can lead to gene dysfunction, including copy number change, methylation, abnormal expression, mutation and so on. Nowadays, all these types of alterations can be simultaneously interrogated by different types of assays. Although many methods have been proposed to identify altered genes from a single assay, there is no method that can deal with multiple assays accounting for different alteration types systematically.

Results

In this article, we propose a novel method, integration using item response theory (integIRTy), to identify altered genes by using item response theory that allows integrated analysis of multiple high-throughput assays. When applied to a single assay, the proposed method is more robust and reliable than conventional methods such as Student's t-test or the Wilcoxon rank-sum test. When used to integrate multiple assays, integIRTy can identify novel-altered genes that cannot be found by looking at individual assay separately. We applied integIRTy to three public cancer datasets (ovarian carcinoma, breast cancer, glioblastoma) for cross-assay type integration which all show encouraging results.

Availability and implementation

The R package integIRTy is available at the web site http://bioinformatics.mdanderson.org/main/OOMPA:Overview.

Contact

kcoombes@mdanderson.org.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-26 +22370884,Improved method for predicting protein fold patterns with ensemble classifiers.,"Protein folding is recognized as a critical problem in the field of biophysics in the 21st century. Predicting protein-folding patterns is challenging due to the complex structure of proteins. In an attempt to solve this problem, we employed ensemble classifiers to improve prediction accuracy. In our experiments, 188-dimensional features were extracted based on the composition and physical-chemical property of proteins and 20-dimensional features were selected using a coupled position-specific scoring matrix. Compared with traditional prediction methods, these methods were superior in terms of prediction accuracy. The 188-dimensional feature-based method achieved 71.2% accuracy in five cross-validations. The accuracy rose to 77% when we used a 20-dimensional feature vector. These methods were used on recent data, with 54.2% accuracy. Source codes and dataset, together with web server and software tools for prediction, are available at: http://datamining.xmu.edu.cn/main/~cwc/ProteinPredict.html.",2012-01-27 +21386048,Adaptive statistical iterative reconstruction technique for radiation dose reduction in chest CT: a pilot study.,"

Purpose

To compare lesion detection and image quality of chest computed tomographic (CT) images acquired at various tube current-time products (40-150 mAs) and reconstructed with adaptive statistical iterative reconstruction (ASIR) or filtered back projection (FBP).

Materials and methods

In this Institutional Review Board-approved HIPAA-compliant study, CT data from 23 patients (mean age, 63 years ± 7.3 [standard deviation]; 10 men, 13 women) were acquired at varying tube current-time products (40, 75, 110, and 150 mAs) on a 64-row multidetector CT scanner with 10-cm scan length. All patients gave informed consent. Data sets were reconstructed at 30%, 50%, and 70% ASIR-FBP blending. Two thoracic radiologists assessed image noise, visibility of small structures, lesion conspicuity, and diagnostic confidence. Objective noise and CT number were measured in the thoracic aorta. CT dose index volume, dose-length product, weight, and transverse diameter were recorded. Data were analyzed by using analysis of variance and the Wilcoxon signed rank test.

Results

FBP had unacceptable noise at 40 and 75 mAs in 17 and five patients, respectively, whereas ASIR had acceptable noise at 40-150 mAs. Objective noise with 30%, 50%, and 70% ASIR blending (11.8 ± 3.8, 9.6 ± 3.1, and 7.5 ± 2.6, respectively) was lower than that with FBP (15.8 ± 4.8) (P < .0001). No lesions were missed on FBP or ASIR images. Lesion conspicuity was graded as well seen on both FBP and ASIR images (P < .05). Mild pixilated blotchy texture was noticed with 70% blended ASIR images.

Conclusion

Acceptable image quality can be obtained for chest CT images acquired at 40 mAs by using ASIR without any substantial artifacts affecting diagnostic confidence.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11101450/-/DC1.",2011-03-08 +22378710,StochSens--Matlab package for sensitivity analysis of stochastic chemical systems.,"

Motivation

The growing interest in the role of stochasticity in biochemical systems drives the demand for tools to analyse stochastic dynamical models of chemical reactions. One powerful tool to elucidate performance of dynamical systems is sensitivity analysis. Traditionally, however, the concept of sensitivity has mainly been applied to deterministic systems, and the difficulty to generalize these concepts for stochastic systems results from necessity of extensive Monte Carlo simulations.

Results

Here we present a Matlab package, StochSens, that implements sensitivity analysis for stochastic chemical systems using the concept of the Fisher Information Matrix (FIM). It uses the linear noise approximation to represent the FIM in terms of solutions of ordinary differential equations. This is the first computational tool that allows for quick computation of the Information Matrix for stochastic systems without the need for Monte Carlo simulations.

Availability

http://www.theosysbio.bio.ic.ac.uk/resources/stns

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-03-01 +23033330,Asynchronous BCI based on motor imagery with automated calibration and neurofeedback training.,"A new multiclass brain-computer interface (BCI) based on the modulation of sensorimotor oscillations by imagining movements is described. By the application of advanced signal processing tools, statistics and machine learning, this BCI system offers: 1) asynchronous mode of operation, 2) automatic selection of user-dependent parameters based on an initial calibration, 3) incremental update of the classifier parameters from feedback data. The signal classification uses spatially filtered signals and is based on spectral power estimation computed in individualized frequency bands, which are automatically identified by a specially tailored AR-based model. Relevant features are chosen by a criterion based on Mutual Information. Final recognition of motor imagery is effectuated by a multinomial logistic regression classifier. This BCI system was evaluated in two studies. In the first study, five participants trained the ability to imagine movements of the right hand, left hand and feet in response to visual cues. The accuracy of the classifier was evaluated across four training sessions with feedback. The second study assessed the information transfer rate (ITR) of the BCI in an asynchronous application. The subjects' task was to navigate a cursor along a computer rendered 2-D maze. A peak information transfer rate of 8.0 bit/min was achieved. Five subjects performed with a mean ITR of 4.5 bit/min and an accuracy of 74.84%. These results demonstrate that the use of automated interfaces to reduce complexity for the intended operator (outside the laboratory) is indeed possible. The signal processing and classifier source code embedded in BCI2000 is available from https://www.brain-project.org/downloads.html.",2012-09-24 +21646520,Reference-guided assembly of four diverse Arabidopsis thaliana genomes.,"We present whole-genome assemblies of four divergent Arabidopsis thaliana strains that complement the 125-Mb reference genome sequence released a decade ago. Using a newly developed reference-guided approach, we assembled large contigs from 9 to 42 Gb of Illumina short-read data from the Landsberg erecta (Ler-1), C24, Bur-0, and Kro-0 strains, which have been sequenced as part of the 1,001 Genomes Project for this species. Using alignments against the reference sequence, we first reduced the complexity of the de novo assembly and later integrated reads without similarity to the reference sequence. As an example, half of the noncentromeric C24 genome was covered by scaffolds that are longer than 260 kb, with a maximum of 2.2 Mb. Moreover, over 96% of the reference genome was covered by the reference-guided assembly, compared with only 87% with a complete de novo assembly. Comparisons with 2 Mb of dideoxy sequence reveal that the per-base error rate of the reference-guided assemblies was below 1 in 10,000. Our assemblies provide a detailed, genomewide picture of large-scale differences between A. thaliana individuals, most of which are difficult to access with alignment-consensus methods only. We demonstrate their practical relevance in studying the expression differences of polymorphic genes and show how the analysis of sRNA sequencing data can lead to erroneous conclusions if aligned against the reference genome alone. Genome assemblies, raw reads, and further information are accessible through http://1001genomes.org/projects/assemblies.html.",2011-06-06 +21788211,RNAG: a new Gibbs sampler for predicting RNA secondary structure for unaligned sequences.,"

Motivation

RNA secondary structure plays an important role in the function of many RNAs, and structural features are often key to their interaction with other cellular components. Thus, there has been considerable interest in the prediction of secondary structures for RNA families. In this article, we present a new global structural alignment algorithm, RNAG, to predict consensus secondary structures for unaligned sequences. It uses a blocked Gibbs sampling algorithm, which has a theoretical advantage in convergence time. This algorithm iteratively samples from the conditional probability distributions P(Structure | Alignment) and P(Alignment | Structure). Not surprisingly, there is considerable uncertainly in the high-dimensional space of this difficult problem, which has so far received limited attention in this field. We show how the samples drawn from this algorithm can be used to more fully characterize the posterior space and to assess the uncertainty of predictions.

Results

Our analysis of three publically available datasets showed a substantial improvement in RNA structure prediction by RNAG over extant prediction methods. Additionally, our analysis of 17 RNA families showed that the RNAG sampled structures were generally compact around their ensemble centroids, and at least 11 families had at least two well-separated clusters of predicted structures. In general, the distance between a reference structure and our predicted structure was large relative to the variation among structures within an ensemble.

Availability

The Perl implementation of the RNAG algorithm and the data necessary to reproduce the results described in Sections 3.1 and 3.2 are available at http://ccmbweb.ccv.brown.edu/rnag.html

Contact

charles_lawrence@brown.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-24 +22479706,Network-based functional enrichment.,"

Background

Many methods have been developed to infer and reason about molecular interaction networks. These approaches often yield networks with hundreds or thousands of nodes and up to an order of magnitude more edges. It is often desirable to summarize the biological information in such networks. A very common approach is to use gene function enrichment analysis for this task. A major drawback of this method is that it ignores information about the edges in the network being analyzed, i.e., it treats the network simply as a set of genes. In this paper, we introduce a novel method for functional enrichment that explicitly takes network interactions into account.

Results

Our approach naturally generalizes Fisher's exact test, a gene set-based technique. Given a function of interest, we compute the subgraph of the network induced by genes annotated to this function. We use the sequence of sizes of the connected components of this sub-network to estimate its connectivity. We estimate the statistical significance of the connectivity empirically by a permutation test. We present three applications of our method: i) determine which functions are enriched in a given network, ii) given a network and an interesting subnetwork of genes within that network, determine which functions are enriched in the sub-network, and iii) given two networks, determine the functions for which the connectivity improves when we merge the second network into the first. Through these applications, we show that our approach is a natural alternative to network clustering algorithms.

Conclusions

We presented a novel approach to functional enrichment that takes into account the pairwise relationships among genes annotated by a particular function. Each of the three applications discovers highly relevant functions. We used our methods to study biological data from three different organisms. Our results demonstrate the wide applicability of our methods. Our algorithms are implemented in C++ and are freely available under the GNU General Public License at our supplementary website. Additionally, all our input data andresults are available at http://bioinformatics.cs.vt.edu/~murali/supplements/2011-incob-nbe/.",2011-11-30 +23372469,Use of a supplementary internet based education program improves sleep literacy in college psychology students.,"

Introduction

Knowledge regarding the importance of sleep in health and performance and good sleep hygiene practices is low, especially among adolescents and young adults. It is important to improve sleep literacy. Introductory psychology is one of the most highly enrolled courses at colleges and universities. This study tested the impact of an Internet-based learning module on improving sleep literacy in this venue.

Methods

An Internet-based supplementary learning module containing sleep physiology and hygiene information was developed using content from the Harvard Medical School sleep educational website http://www.understandingsleep.org. Access to the module was provided as an extra credit activity for 2 of 4 sections (Supplemental Sleep, SS, N = 889) of an introductory college psychology course during their standard instruction on sleep and dreaming. The remaining 2 sections (Standard Instruction, SI, N = 878) only were encouraged to visit the website without further direction. Level of knowledge was assessed before and after availability to the module/website and at the end of the semester. Students were asked to complete a survey at the end of the semester inquiring whether they made any changes in their sleep behaviors.

Results

Two hundred fifty students participated in the extra credit activity and had data available at all testing points. Students in the SS Group had a significant improvement in sleep knowledge test scores after interacting with the website in comparison to the SI group (19.41 ± 3.15 vs. 17.94 ± 3.08, p < 0.001). This difference persisted, although at a lower level, at the end of the semester. In addition, 55.9% of the SS group versus 45.1% of the SI group indicated that they made changes in their sleep habits after participation in the extra credit sleep activity (p < 0.01). The most common change was a more consistent wake time.

Conclusion

Use of a supplementary internet-based sleep learning module has the potential to enhance sleep literacy and change behavior among students enrolled in an introductory college psychology course.",2013-02-01 +22551396,Stem cells and other innovative intra-articular therapies for osteoarthritis: what does the future hold?,"Osteoarthritis (OA), the most common type of arthritis in the world, is associated with suffering due to pain, productivity loss, decreased mobility and quality of life. Systemic therapies available for OA are mostly symptom modifying and have potential gastrointestinal, renal, hepatic, and cardiac side effects. BMC Musculoskeletal Disorders recently published a study showing evidence of reparative effects demonstrated by homing of intra-articularly injected autologous bone marrow stem cells in damaged cartilage in an animal model of OA, along with clinical and radiographic benefit. This finding adds to the growing literature showing the potential benefit of intra-articular (IA) bone marrow stem cells. Other emerging potential IA therapies include IL-1 receptor antagonists, conditioned autologous serum, botulinum toxin, and bone morphogenetic protein-7. For each of these therapies, trial data in humans have been published, but more studies are needed to establish that they are safe and effective. Several additional promising new OA treatments are on the horizon, but challenges remain to finding safe and effective local and systemic therapies for OA.Please see related article: http://www.biomedcentral.com/1471-2474/12/259.",2012-05-02 +23782612,Exact algorithms for haplotype assembly from whole-genome sequence data.,"

Motivation

Haplotypes play a crucial role in genetic analysis and have many applications such as gene disease diagnoses, association studies, ancestry inference and so forth. The development of DNA sequencing technologies makes it possible to obtain haplotypes from a set of aligned reads originated from both copies of a chromosome of a single individual. This approach is often known as haplotype assembly. Exact algorithms that can give optimal solutions to the haplotype assembly problem are highly demanded. Unfortunately, previous algorithms for this problem either fail to output optimal solutions or take too long time even executed on a PC cluster.

Results

We develop an approach to finding optimal solutions for the haplotype assembly problem under the minimum-error-correction (MEC) model. Most of the previous approaches assume that the columns in the input matrix correspond to (putative) heterozygous sites. This all-heterozygous assumption is correct for most columns, but it may be incorrect for a small number of columns. In this article, we consider the MEC model with or without the all-heterozygous assumption. In our approach, we first use new methods to decompose the input read matrix into small independent blocks and then model the problem for each block as an integer linear programming problem, which is then solved by an integer linear programming solver. We have tested our program on a single PC [a Linux (x64) desktop PC with i7-3960X CPU], using the filtered HuRef and the NA 12878 datasets (after applying some variant calling methods). With the all-heterozygous assumption, our approach can optimally solve the whole HuRef data set within a total time of 31 h (26 h for the most difficult block of the 15th chromosome and only 5 h for the other blocks). To our knowledge, this is the first time that MEC optimal solutions are completely obtained for the filtered HuRef dataset. Moreover, in the general case (without the all-heterozygous assumption), for the HuRef dataset our approach can optimally solve all the chromosomes except the most difficult block in chromosome 15 within a total time of 12 days. For both of the HuRef and NA12878 datasets, the optimal costs in the general case are sometimes much smaller than those in the all-heterozygous case. This implies that some columns in the input matrix (after applying certain variant calling methods) still correspond to false-heterozygous sites.

Availability

Our program, the optimal solutions found for the HuRef dataset available at http://rnc.r.dendai.ac.jp/hapAssembly.html.",2013-06-18 +21784873,Genome-scale phylogenetic function annotation of large and diverse protein families.,"The Statistical Inference of Function Through Evolutionary Relationships (SIFTER) framework uses a statistical graphical model that applies phylogenetic principles to automate precise protein function prediction. Here we present a revised approach (SIFTER version 2.0) that enables annotations on a genomic scale. SIFTER 2.0 produces equivalently precise predictions compared to the earlier version on a carefully studied family and on a collection of 100 protein families. We have added an approximation method to SIFTER 2.0 and show a 500-fold improvement in speed with minimal impact on prediction results in the functionally diverse sulfotransferase protein family. On the Nudix protein family, previously inaccessible to the SIFTER framework because of the 66 possible molecular functions, SIFTER achieved 47.4% accuracy on experimental data (where BLAST achieved 34.0%). Finally, we used SIFTER to annotate all of the Schizosaccharomyces pombe proteins with experimental functional characterizations, based on annotations from proteins in 46 fungal genomes. SIFTER precisely predicted molecular function for 45.5% of the characterized proteins in this genome, as compared with four current function prediction methods that precisely predicted function for 62.6%, 30.6%, 6.0%, and 5.7% of these proteins. We use both precision-recall curves and ROC analyses to compare these genome-scale predictions across the different methods and to assess performance on different types of applications. SIFTER 2.0 is capable of predicting protein molecular function for large and functionally diverse protein families using an approximate statistical model, enabling phylogenetics-based protein function prediction for genome-wide analyses. The code for SIFTER and protein family data are available at http://sifter.berkeley.edu.",2011-07-22 +22540359,Intra-word inconsistency in apraxic Hebrew-speaking children.,"Intra-word inconsistency in a child is perceived as an indicator of speech impairment. Because the speech of typically developing children is highly variable, the extent and nature of the inconsistency must be defined when used as a diagnostic marker of speech impairment (McLeod, S., & Hewett, S. R. (2008). Variability in the production of words containing consonant clusters by typical 2- and 3-year-old children. Folia Phoniatrica et Logopaedica, 60(4), 163-172). In this paper, we study inconsistency with reference to the prosodic hierarchy (McCarthy, J. J., & Prince, A. S. (1996). Prosodic morphology 1986. Amherst, MA: University of Massachusetts. Retrieved April 15, 2010, from http://ruccs.rutgers.edu/pub/papers/pm86all.pdf), suggesting a new way to describe this phenomenon in childhood apraxia of speech (CAS). The prosodic hierarchy has been used in recent years to demonstrate the phonological development of typical and atypical populations. Sixteen children diagnosed with CAS (average age 3;11) participated in the study. The data, collected from each child in the course of eight weekly meetings, are drawn from naming single words. The results indicate that inconsistency is dominant for two prosodic levels, the segmental and the syllabic, while the prosodic word level was largely preserved.",2012-06-01 +22424533,Immunohistochemistry profiles of breast ductal carcinoma: factor analysis of digital image analysis data.,"

Background

Molecular studies of breast cancer revealed biological heterogeneity of the disease and opened new perspectives for personalized therapy. While multiple gene expression-based systems have been developed, current clinical practice is largely based upon conventional clinical and pathologic criteria. This gap may be filled by development of combined multi-IHC indices to characterize biological and clinical behaviour of the tumours. Digital image analysis (DA) with multivariate statistics of the data opens new opportunities in this field.

Methods

Tissue microarrays of 109 patients with breast ductal carcinoma were stained for a set of 10 IHC markers (ER, PR, HER2, Ki67, AR, BCL2, HIF-1α, SATB1, p53, and p16). Aperio imaging platform with the Genie, Nuclear and Membrane algorithms were used for the DA. Factor analysis of the DA data was performed in the whole group and hormone receptor (HR) positive subgroup of the patients (n = 85).

Results

Major factor potentially reflecting aggressive disease behaviour (i-Grade) was extracted, characterized by opposite loadings of ER/PR/AR/BCL2 and Ki67/HIF-1α. The i-Grade factor scores revealed bimodal distribution and were strongly associated with higher Nottingham histological grade (G) and more aggressive intrinsic subtypes. In HR-positive tumours, the aggressiveness of the tumour was best defined by positive Ki67 and negative ER loadings. High Ki67/ER factor scores were strongly associated with the higher G and Luminal B types, but also were detected in a set of G1 and Luminal A cases, potentially indicating high risk patients in these categories. Inverse relation between HER2 and PR expression was found in the HR-positive tumours pointing at differential information conveyed by the ER and PR expression. SATB1 along with HIF-1α reflected the second major factor of variation in our patients; in the HR-positive group they were inversely associated with the HR and BCL2 expression and represented the major factor of variation. Finally, we confirmed high expression levels of p16 in Triple-negative tumours.

Conclusion

Factor analysis of multiple IHC biomarkers measured by automated DA is an efficient exploratory tool clarifying complex interdependencies in the breast ductal carcinoma IHC profiles and informative value of single IHC markers. Integrated IHC indices may provide additional risk stratifications for the currently used grading systems and prove to be useful in clinical outcome studies.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1512077125668949.",2012-03-16 +21763173,Europe's neglected infections of poverty.,"

Objectives

To review the prevalence, incidence, and geographic distribution of the major neglected infections of poverty in Europe as a basis for future policy recommendations.

Methods

We reviewed the literature from 1999 to 2010 for neglected tropical diseases listed by PLoS Neglected Tropical Diseases (http://www.plosntds.org/static/scope.action) and the geographic regions and countries of (continental) Europe. Reference lists of identified articles and reviews were also hand searched, as were World Health Organization databases.

Results

In Eastern Europe, the soil-transmitted helminth infections (especially ascariasis, trichuriasis, and toxocariasis), giardiasis, and toxoplasmosis remain endemic. High incidence rates of selected food-borne helminthiases including trichinellosis, opisthorchiasis, taeniasis, and echinococcosis also occur, while brucellosis and leptospirosis represent important bacterial zoonoses. Turmoil and economic collapse following the war in the Balkans, the fall of Communism, and Europe's recent recession have helped to promote their high prevalence and incidence rates. In Southern Europe, vector-borne zoonoses have emerged, including leishmaniasis and Chagas disease, and key arboviral infections. Additional vulnerable populations include the Roma, orphans destined for international adoption, and some immigrant groups.

Conclusions

Among the policy recommendations are increased efforts to determine the prevalence, incidence, and geographic distribution of Europe's neglected infections, epidemiological studies to understand the ecology and mechanisms of disease transmission, and research and development for new control tools.",2011-07-16 +22993389,Clinical spectrum and severity of hemolytic anemia in glucose 6-phosphate dehydrogenase-deficient children receiving dapsone.,"Drug-induced acute hemolytic anemia led to the discovery of G6PD deficiency. However, most clinical data are from isolated case reports. In 2 clinical trials of antimalarial preparations containing dapsone (4,4'-diaminodiphenylsulfone; 2.5 mg/kg once daily for 3 days), 95 G6PD-deficient hemizygous boys, 24 G6PD-deficient homozygous girls, and 200 girls heterozygous for G6PD deficiency received this agent. In the first 2 groups, there was a maximum decrease in hemoglobin averaging -2.64 g/dL (range -6.70 to +0.30 g/dL), which was significantly greater than for the comparator group receiving artemether-lumefantrine (adjusted difference -1.46 g/dL; 95% confidence interval -1.76, -1.15). Hemoglobin concentrations were decreased by ≥ 40% versus pretreatment in 24/119 (20.2%) of the G6PD-deficient children; 13/119 (10.9%) required blood transfusion. In the heterozygous girls, the mean maximum decrease in hemoglobin was -1.83 g/dL (range +0.90 to -5.20 g/dL); 1 in 200 (0.5%) required blood transfusion. All children eventually recovered. All the G6PD-deficient children had the G6PD A- variant, ie, mutations V68M and N126D. Drug-induced acute hemolytic anemia in G6PD A- subjects can be life-threatening, depending on the nature and dosage of the drug trigger. Therefore, contrary to current perception, in clinical terms the A- type of G6PD deficiency cannot be regarded as mild. This study is registered at http://www.clinicaltrials.gov as NCT00344006 and NCT00371735.",2012-09-19 +22548923,A new analysis approach of epidermal growth factor receptor pathway activation patterns provides insights into cetuximab resistance mechanisms in head and neck cancer.,"The pathways downstream of the epidermal growth factor receptor (EGFR) have often been implicated to play crucial roles in the development and progression of various cancer types. Different authors have proposed models in cell lines in which they study the modes of pathway activities after perturbation experiments. It is prudent to believe that a better understanding of these pathway activation patterns might lead to novel treatment concepts for cancer patients or at least allow a better stratification of patient collectives into different risk groups or into groups that might respond to different treatments. Traditionally, such analyses focused on the individual players of the pathways. More recently in the field of systems biology, a plethora of approaches that take a more holistic view on the signaling pathways and their downstream transcriptional targets has been developed. Fertig et al. have recently developed a new method to identify patterns and biological process activity from transcriptomics data, and they demonstrate the utility of this methodology to analyze gene expression activity downstream of the EGFR in head and neck squamous cell carcinoma to study cetuximab resistance. Please see related article: http://www.biomedcentral.com/1471-2164/13/160.",2012-05-01 +22661579,FastML: a web server for probabilistic reconstruction of ancestral sequences.,"Ancestral sequence reconstruction is essential to a variety of evolutionary studies. Here, we present the FastML web server, a user-friendly tool for the reconstruction of ancestral sequences. FastML implements various novel features that differentiate it from existing tools: (i) FastML uses an indel-coding method, in which each gap, possibly spanning multiples sites, is coded as binary data. FastML then reconstructs ancestral indel states assuming a continuous time Markov process. FastML provides the most likely ancestral sequences, integrating both indels and characters; (ii) FastML accounts for uncertainty in ancestral states: it provides not only the posterior probabilities for each character and indel at each sequence position, but also a sample of ancestral sequences from this posterior distribution, and a list of the k-most likely ancestral sequences; (iii) FastML implements a large array of evolutionary models, which makes it generic and applicable for nucleotide, protein and codon sequences; and (iv) a graphical representation of the results is provided, including, for example, a graphical logo of the inferred ancestral sequences. The utility of FastML is demonstrated by reconstructing ancestral sequences of the Env protein from various HIV-1 subtypes. FastML is freely available for all academic users and is available online at http://fastml.tau.ac.il/.",2012-05-31 +21775308,Discovering novel subsystems using comparative genomics.,"

Motivation

Key problems for computational genomics include discovering novel pathways in genome data, and discovering functional interaction partners for genes to define new members of partially elucidated pathways.

Results

We propose a novel method for the discovery of subsystems from annotated genomes. For each gene pair, a score measuring the likelihood that the two genes belong to a same subsystem is computed using genome context methods. Genes are then grouped based on these scores, and the resulting groups are filtered to keep only high-confidence groups. Since the method is based on genome context analysis, it relies solely on structural annotation of the genomes. The method can be used to discover new pathways, find missing genes from a known pathway, find new protein complexes or other kinds of functional groups and assign function to genes. We tested the accuracy of our method in Escherichia coli K-12. In one configuration of the system, we find that 31.6% of the candidate groups generated by our method match a known pathway or protein complex closely, and that we rediscover 31.2% of all known pathways and protein complexes of at least 4 genes. We believe that a significant proportion of the candidates that do not match any known group in E.coli K-12 corresponds to novel subsystems that may represent promising leads for future laboratory research. We discuss in-depth examples of these findings.

Availability

Predicted subsystems are available at http://brg.ai.sri.com/pwy-discovery/journal.html.

Contact

lferrer@ai.sri.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-19 +21765097,PGAT: a multistrain analysis resource for microbial genomes.,"

Motivation

The Prokaryotic-genome Analysis Tool (PGAT) is a web-based database application for comparing gene content and sequence across multiple microbial genomes facilitating the discovery of genetic differences that may explain observed phenotypes. PGAT supports database queries to identify genes that are present or absent in user-selected genomes, comparison of sequence polymorphisms in sets of orthologous genes, multigenome display of regions surrounding a query gene, comparison of the distribution of genes in metabolic pathways and manual community annotation.

Availability and implementation

The PGAT website may be accessed at http://nwrce.org/pgat.

Contact

mbrittna@uw.edu.",2011-07-15 +21762488,EuroPineDB: a high-coverage web database for maritime pine transcriptome.,"

Background

Pinus pinaster is an economically and ecologically important species that is becoming a woody gymnosperm model. Its enormous genome size makes whole-genome sequencing approaches are hard to apply. Therefore, the expressed portion of the genome has to be characterised and the results and annotations have to be stored in dedicated databases.

Description

EuroPineDB is the largest sequence collection available for a single pine species, Pinus pinaster (maritime pine), since it comprises 951 641 raw sequence reads obtained from non-normalised cDNA libraries and high-throughput sequencing from adult (xylem, phloem, roots, stem, needles, cones, strobili) and embryonic (germinated embryos, buds, callus) maritime pine tissues. Using open-source tools, sequences were optimally pre-processed, assembled, and extensively annotated (GO, EC and KEGG terms, descriptions, SNPs, SSRs, ORFs and InterPro codes). As a result, a 10.5× P. pinaster genome was covered and assembled in 55 322 UniGenes. A total of 32 919 (59.5%) of P. pinaster UniGenes were annotated with at least one description, revealing at least 18 466 different genes. The complete database, which is designed to be scalable, maintainable, and expandable, is freely available at: http://www.scbi.uma.es/pindb/. It can be retrieved by gene libraries, pine species, annotations, UniGenes and microarrays (i.e., the sequences are distributed in two-colour microarrays; this is the only conifer database that provides this information) and will be periodically updated. Small assemblies can be viewed using a dedicated visualisation tool that connects them with SNPs. Any sequence or annotation set shown on-screen can be downloaded. Retrieval mechanisms for sequences and gene annotations are provided.

Conclusions

The EuroPineDB with its integrated information can be used to reveal new knowledge, offers an easy-to-use collection of information to directly support experimental work (including microarray hybridisation), and provides deeper knowledge on the maritime pine transcriptome.",2011-07-15 +22848736,Analyzing multi-locus plant barcoding datasets with a composition vector method based on adjustable weighted distance.,"

Background

The composition vector (CV) method has been proved to be a reliable and fast alignment-free method to analyze large COI barcoding data. In this study, we modify this method for analyzing multi-gene datasets for plant DNA barcoding. The modified method includes an adjustable-weighted algorithm for the vector distance according to the ratio in sequence length of the candidate genes for each pair of taxa.

Methodology/principal findings

Three datasets, matK+rbcL dataset with 2,083 sequences, matK+rbcL dataset with 397 sequences and matK+rbcL+trnH-psbA dataset with 397 sequences, were tested. We showed that the success rates of grouping sequences at the genus/species level based on this modified CV approach are always higher than those based on the traditional K2P/NJ method. For the matK+rbcL datasets, the modified CV approach outperformed the K2P-NJ approach by 7.9% in both the 2,083-sequence and 397-sequence datasets, and for the matK+rbcL+trnH-psbA dataset, the CV approach outperformed the traditional approach by 16.7%.

Conclusions

We conclude that the modified CV approach is an efficient method for analyzing large multi-gene datasets for plant DNA barcoding. Source code, implemented in C++ and supported on MS Windows, is freely available for download at http://math.xtu.edu.cn/myphp/math/research/source/Barcode_source_codes.zip.",2012-07-27 +23360738,"Overinterpretation and misreporting of diagnostic accuracy studies: evidence of ""spin"".","

Purpose

To estimate the frequency of distorted presentation and overinterpretation of results in diagnostic accuracy studies.

Materials and methods

MEDLINE was searched for diagnostic accuracy studies published between January and June 2010 in journals with an impact factor of 4 or higher. Articles included were primary studies of the accuracy of one or more tests in which the results were compared with a clinical reference standard. Two authors scored each article independently by using a pretested data-extraction form to identify actual overinterpretation and practices that facilitate overinterpretation, such as incomplete reporting of study methods or the use of inappropriate methods (potential overinterpretation). The frequency of overinterpretation was estimated in all studies and in a subgroup of imaging studies.

Results

Of the 126 articles, 39 (31%; 95% confidence interval [CI]: 23, 39) contained a form of actual overinterpretation, including 29 (23%; 95% CI: 16, 30) with an overly optimistic abstract, 10 (8%; 96% CI: 3%, 13%) with a discrepancy between the study aim and conclusion, and eight with conclusions based on selected subgroups. In our analysis of potential overinterpretation, authors of 89% (95% CI: 83%, 94%) of the studies did not include a sample size calculation, 88% (95% CI: 82%, 94%) did not state a test hypothesis, and 57% (95% CI: 48%, 66%) did not report CIs of accuracy measurements. In 43% (95% CI: 34%, 52%) of studies, authors were unclear about the intended role of the test, and in 3% (95% CI: 0%, 6%) they used inappropriate statistical tests. A subgroup analysis of imaging studies showed 16 (30%; 95% CI: 17%, 43%) and 53 (100%; 95% CI: 92%, 100%) contained forms of actual and potential overinterpretation, respectively.

Conclusion

Overinterpretation and misreporting of results in diagnostic accuracy studies is frequent in journals with high impact factors.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120527/-/DC1.",2013-01-29 +21752876,"International Union of Basic and Clinical Pharmacology. LXXXIII: classification of prostanoid receptors, updating 15 years of progress.","It is now more than 15 years since the molecular structures of the major prostanoid receptors were elucidated. Since then, substantial progress has been achieved with respect to distribution and function, signal transduction mechanisms, and the design of agonists and antagonists (http://www.iuphar-db.org/DATABASE/FamilyIntroductionForward?familyId=58). This review systematically details these advances. More recent developments in prostanoid receptor research are included. The DP(2) receptor, also termed CRTH2, has little structural resemblance to DP(1) and other receptors described in the original prostanoid receptor classification. DP(2) receptors are more closely related to chemoattractant receptors. Prostanoid receptors have also been found to heterodimerize with other prostanoid receptor subtypes and nonprostanoids. This may extend signal transduction pathways and create new ligand recognition sites: prostacyclin/thromboxane A(2) heterodimeric receptors for 8-epi-prostaglandin E(2), wild-type/alternative (alt4) heterodimers for the prostaglandin FP receptor for bimatoprost and the prostamides. It is anticipated that the 15 years of research progress described herein will lead to novel therapeutic entities.",2011-07-13 +22844098,Discovering the hidden sub-network component in a ranked list of genes or proteins derived from genomic experiments.,"Genomic experiments (e.g. differential gene expression, single-nucleotide polymorphism association) typically produce ranked list of genes. We present a simple but powerful approach which uses protein-protein interaction data to detect sub-networks within such ranked lists of genes or proteins. We performed an exhaustive study of network parameters that allowed us concluding that the average number of components and the average number of nodes per component are the parameters that best discriminate between real and random networks. A novel aspect that increases the efficiency of this strategy in finding sub-networks is that, in addition to direct connections, also connections mediated by intermediate nodes are considered to build up the sub-networks. The possibility of using of such intermediate nodes makes this approach more robust to noise. It also overcomes some limitations intrinsic to experimental designs based on differential expression, in which some nodes are invariant across conditions. The proposed approach can also be used for candidate disease-gene prioritization. Here, we demonstrate the usefulness of the approach by means of several case examples that include a differential expression analysis in Fanconi Anemia, a genome-wide association study of bipolar disorder and a genome-scale study of essentiality in cancer genes. An efficient and easy-to-use web interface (available at http://www.babelomics.org) based on HTML5 technologies is also provided to run the algorithm and represent the network.",2012-07-27 +23483883,Protein-protein docking with F(2)Dock 2.0 and GB-rerank.,"

Motivation

Computational simulation of protein-protein docking can expedite the process of molecular modeling and drug discovery. This paper reports on our new F(2) Dock protocol which improves the state of the art in initial stage rigid body exhaustive docking search, scoring and ranking by introducing improvements in the shape-complementarity and electrostatics affinity functions, a new knowledge-based interface propensity term with FFT formulation, a set of novel knowledge-based filters and finally a solvation energy (GBSA) based reranking technique. Our algorithms are based on highly efficient data structures including the dynamic packing grids and octrees which significantly speed up the computations and also provide guaranteed bounds on approximation error.

Results

The improved affinity functions show superior performance compared to their traditional counterparts in finding correct docking poses at higher ranks. We found that the new filters and the GBSA based reranking individually and in combination significantly improve the accuracy of docking predictions with only minor increase in computation time. We compared F(2) Dock 2.0 with ZDock 3.0.2 and found improvements over it, specifically among 176 complexes in ZLab Benchmark 4.0, F(2) Dock 2.0 finds a near-native solution as the top prediction for 22 complexes; where ZDock 3.0.2 does so for 13 complexes. F(2) Dock 2.0 finds a near-native solution within the top 1000 predictions for 106 complexes as opposed to 104 complexes for ZDock 3.0.2. However, there are 17 and 15 complexes where F(2) Dock 2.0 finds a solution but ZDock 3.0.2 does not and vice versa; which indicates that the two docking protocols can also complement each other.

Availability

The docking protocol has been implemented as a server with a graphical client (TexMol) which allows the user to manage multiple docking jobs, and visualize the docked poses and interfaces. Both the server and client are available for download. Server: http://www.cs.utexas.edu/~bajaj/cvc/software/f2dock.shtml. Client: http://www.cs.utexas.edu/~bajaj/cvc/software/f2dockclient.shtml.",2013-03-06 +21752802,"RxnFinder: biochemical reaction search engines using molecular structures, molecular fragments and reaction similarity.","

Summary

Biochemical reactions play a key role to help sustain life and allow cells to grow. RxnFinder was developed to search biochemical reactions from KEGG reaction database using three search criteria: molecular structures, molecular fragments and reaction similarity. RxnFinder is helpful to get reference reactions for biosynthesis and xenobiotics metabolism.

Availability

RxnFinder is freely available via: http://sdd.whu.edu.cn/rxnfinder.

Contact

qnhu@whu.edu.cn.",2011-07-12 +22833524,Contact map prediction using a large-scale ensemble of rule sets and the fusion of multiple predicted structural features.,"

Motivation

The prediction of a protein's contact map has become in recent years, a crucial stepping stone for the prediction of the complete 3D structure of a protein. In this article, we describe a methodology for this problem that was shown to be successful in CASP8 and CASP9. The methodology is based on (i) the fusion of the prediction of a variety of structural aspects of protein residues, (ii) an ensemble strategy used to facilitate the training process and (iii) a rule-based machine learning system from which we can extract human-readable explanations of the predictor and derive useful information about the contact map representation.

Results

The main part of the evaluation is the comparison against the sequence-based contact prediction methods from CASP9, where our method presented the best rank in five out of the six evaluated metrics. We also assess the impact of the size of the ensemble used in our predictor to show the trade-off between performance and training time of our method. Finally, we also study the rule sets generated by our machine learning system. From this analysis, we are able to estimate the contribution of the attributes in our representation and how these interact to derive contact predictions.

Availability

http://icos.cs.nott.ac.uk/servers/psp.html.

Contact

natalio.krasnogor@nottingham.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-25 +21984770,CAMBerVis: visualization software to support comparative analysis of multiple bacterial strains.,"

Motivation

A number of inconsistencies in genome annotations are documented among bacterial strains. Visualization of the differences may help biologists to make correct decisions in spurious cases.

Results

We have developed a visualization tool, CAMBerVis, to support comparative analysis of multiple bacterial strains. The software manages simultaneous visualization of multiple bacterial genomes, enabling visual analysis focused on genome structure annotations.

Availability

The CAMBerVis software is freely available at the project website: http://bioputer.mimuw.edu.pl/camber. Input datasets for Mycobacterium tuberculosis and Staphylocacus aureus are integrated with the software as examples.

Contact

m.wozniak@mimuw.edu.pl

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-08 +21752249,Split-based computation of majority-rule supertrees.,"

Background

Supertree methods combine overlapping input trees into a larger supertree. Here, I consider split-based supertree methods that first extract the split information of the input trees and subsequently combine this split information into a phylogeny. Well known split-based supertree methods are matrix representation with parsimony and matrix representation with compatibility. Combining input trees on the same taxon set, as in the consensus setting, is a well-studied task and it is thus desirable to generalize consensus methods to supertree methods.

Results

Here, three variants of majority-rule (MR) supertrees that generalize majority-rule consensus trees are investigated. I provide simple formulas for computing the respective score for bifurcating input- and supertrees. These score computations, together with a heuristic tree search minmizing the scores, were implemented in the python program PluMiST (Plus- and Minus SuperTrees) available from http://www.cibiv.at/software/plumist. The different MR methods were tested by simulation and on real data sets. The search heuristic was successful in combining compatible input trees. When combining incompatible input trees, especially one variant, MR(-) supertrees, performed well.

Conclusions

The presented framework allows for an efficient score computation of three majority-rule supertree variants and input trees. I combined the score computation with a heuristic search over the supertree space. The implementation was tested by simulation and on real data sets and showed promising results. Especially the MR(-) variant seems to be a reasonable score for supertree reconstruction. Generalizing these computations to multifurcating trees is an open problem, which may be tackled using this framework.",2011-07-13 +22936054,Systematic analysis of human lysine acetylation proteins and accurate prediction of human lysine acetylation through bi-relative adapted binomial score Bayes feature representation.,"Lysine acetylation is a reversible post-translational modification (PTM) which has been linked to many biological and pathological implications. Hence, localization of lysine acetylation is essential for deciphering the mechanism of such implications. Whereas many acetylated lysines in human proteins have been localized through experimental approaches in wet lab, it still fails to reach completion. In the present study, we proposed a novel feature extraction approach, bi-relative adapted binomial score Bayes (BRABSB), combined with support vector machines (SVMs) to construct a human-specific lysine acetylation predictor, which yields, on average, a sensitivity of 83.91%, a specificity of 87.25% and an accuracy of 85.58%, in the case of 5-fold cross validation experiments. Results obtained through the validation on independent data sets show that the proposed approach here outperforms other existing lysine acetylation predictors. Furthermore, due to the fact that global analysis of human lysine acetylproteins, which would ultimately facilitate the systematic investigation of the biological and pathological consequences associated with lysine acetylation events, remains to be resolved, we made an attempt to systematically analyze human lysine acetylproteins, demonstrating their diversity with respect to subcellular localization as well as biological process and predominance by ""binding"" in terms of molecular function. Our analysis also revealed that human lysine acetylproteins are significantly enriched in neurodegenerative disorders and cancer pathways. Remarkably, lysine acetylproteins in mitochondria are significantly related to neurodegenerative disorders and those in the nucleus are instead significantly involved in pathways in cancers, all of which might ultimately provide novel global insights into such pathological processes for the therapeutic purpose. The web server is deployed at http://www.bioinfo.bio.cuhk.edu.hk/bpbphka.",2012-11-01 +21743062,CSO validator: improving manual curation workflow for biological pathways.,"

Summary

Manual curation and validation of large-scale biological pathways are required to obtain high-quality pathway databases. In a typical curation process, model validation and model update based on appropriate feedback are repeated and requires considerable cooperation of scientists. We have developed a CSO (Cell System Ontology) validator to reduce the repetition and time during the curation process. This tool assists in quickly obtaining agreement among curators and domain experts and in providing a consistent and accurate pathway database.

Availability

The tool is available on http://csovalidator.csml.org.

Contact

masao@hgc.jp.",2011-07-08 +23238606,Transfer from paediatric rheumatology to the adult rheumatology setting: experiences and expectations of young adults with juvenile idiopathic arthritis.,"Adolescents with juvenile idiopathic arthritis (JIA) are transferred from paediatrics to adult-oriented healthcare when they reach early adulthood. Research on the extent to which patients' expectations about the adult healthcare setting match their actual experience after transfer, may promote successful transfer from paediatrics to adult care. As part of the 'Don't Retard' project ( http://www.kuleuven.be/switch2/rheuma.html ), experiences and expectations of young adults regarding their transfer from paediatric rheumatology to adult rheumatology were explored. A qualitative study was conducted using semi-structured, in-depth interviews of 11 patients with JIA, aged 18 to 30. Data were analysed using procedures inherent to the content analysis approach. For both concepts, experiences and expectations, three main themes emerged: 'preparation', 'parental involvement' and an 'adapted setting for the late-adolescent or early adult'. The need for a gradual process covered the themes 'preparation' and 'parental involvement'. Young people with JIA prefer to have a say in the moment of transfer and in the reduction of parental involvement. The majority of the participants like their parents' presence at the first consultation at the adult rheumatology department. They expect a healthcare setting adapted to their needs and the possibility to meet peers in this setting. Sudden confrontation with older patients with severe rheumatoid arthritis at adult rheumatology was an unsettling experience for some of the young patients and they declared that better preparation is needed. This study enabled us to define three main themes important in transfer. These themes can facilitate healthcare professionals in developing specific interventions to prepare the young people to transfer, to regulate parental involvement and to arrange an adapted setting for them. Since we included patients who were in follow-up at one tertiary care centre, in which both paediatric and adult rheumatology care are located, the results of the study cannot be generalised to the entire population of patients with JIA.",2012-12-14 +21735248,ORchestra: an online reference database of OR/MS literature in health care.,"We introduce the categorized reference database ORchestra, which is available online at http://www.utwente.nl/choir/orchestra/.",2011-07-07 +22147662,Haploscope: a tool for the graphical display of haplotype structure in populations.,"Patterns of linkage disequilibrium are often depicted pictorially by using tools that rely on visualizations of raw data or pairwise correlations among individual markers. Such approaches can fail to highlight some of the more interesting and complex features of haplotype structure. To enable natural visual comparisons of haplotype structure across subgroups of a population (e.g. isolated subpopulations or cases and controls), we propose an alternative visualization that provides a novel graphical representation of haplotype frequencies. We introduce Haploscope, a tool for visualizing the haplotype cluster frequencies that are produced by statistical models for population haplotype variation. We demonstrate the utility of our technique by examining haplotypes around the LCT gene, an example of recent positive selection, in samples from the Human Genome Diversity Panel. Haploscope, which has flexible options for annotation and inspection of haplotypes, is available for download at http://scheet.org/software.",2011-12-06 +23922562,"Sequence and structural analysis of 3' untranslated region of hepatitis C virus, genotype 3a, from pakistani isolates.","

Background

Hepatitis C virus (HCV) is the cause of high morbidity and mortality worldwide, inflicting around one million people in Pakistan alone. The HCV genomic RNA harbors conserved structural elements that are indispensable for its replication. The 3' untranslated region (UTR) contains several of these elements essentially involved in regulating the major steps of the viral life cycle.

Objectives

Differences in regulatory elements of HCV may contribute towards differential infectivity of local isolates. The present study explicates sequence analysis and secondary structure prediction of HCV 3'UTR region of subtype 3a from Pakistan to characterize this particular region.

Patients and methods

HCV 3'UTR region was amplified, cloned and sequenced from five different patients. Sequence and structural analysis was performed and phylogenetic analysis was carried out using the 3'UTR sequence reported in NCBI nucleotide data base (http://www.ncbi.nlm.nih.gov/nuccore) by other studies.

Results

Sequence analysis of the amplified fragment from five patients indicated that the 3'UTR is composed of 214-235 nts. Its sequence contains a type-specific variable region followed by a poly U/UC region and a highly conserved X-tail of 98 nts. The variable region reported here has 26 nts and one stem loop at the secondary structure that differentiate it from HCV genotype 1a ( GT1a) 3'UTR which contains additional 14 nts and two stem loops. The poly U/UC region varied in length (100-79 nts) and nucleotide sequence within the Pakistani isolates, and among different genotypes. Some substitutions found in the X-tail do not affect secondary structure of this element suggesting that this region might play an important role in replication, stabilization and packaging of HCV genome. Additionally, U residues are not present at the end of the X-tail in Pakistani 3a isolates as otherwise reported for the variants of genotype 1b.

Conclusions

Sequence and structural diversity of the 3'UTR variable region and Poly U/UC region found in the local isolates indicate specificity in the regulating elements of 3'UTR that might be associated with differential replication efficacy of the HCV Pakistani isolates. The study necessitates functional characterization of these regulating elements to elucidate variable viral efficiency and pathogenicity associated with inter-geographical isolates.",2013-05-09 +21992029,Genome-wide prediction of splice-modifying SNPs in human genes using a new analysis pipeline called AASsites.,"

Background

Some single nucleotide polymorphisms (SNPs) are known to modify the risk of developing certain diseases or the reaction to drugs. Due to next generation sequencing methods the number of known human SNPs has grown. Not all SNPs lead to a modified protein, which may be the origin of a disease. Therefore, the recognition of functional SNPs is needed. Because most SNP annotation tools look for SNPs which lead to an amino acid exchange or a premature stop, we designed a new tool called AASsites which searches for SNPs which modify splicing.

Results

AASsites uses several gene prediction programs and open reading frame prediction to compare the wild type (wt) and the variant gene sequence. The results of the comparison are combined by a handmade rule system to classify a change in splicing as ""likely, probable, unlikely"". Having received good results from tests with SNPs known for changing the splicing pattern we checked 80,000 SNPs from the human genome which are located near splice sites for their ability to change the splicing pattern of the gene and hereby result in a different protein. We identified 301 ""likely"" and 985 ""probable"" classified SNPs with such characteristics. Within this set 33 SNPs are described in the ssSNP Target database to cause modified splicing.

Conclusions

With AASsites single SNPs can be checked for those causing splice modifications. Screening 80,000 known human SNPs we detected about 1,200 SNPs which probably modify splicing. AASsites is available at http://genius.embnet.dkfz-heidelberg.de/menu/biounit/open-husar using any web browser.",2011-07-05 +21992066,Challenges in the association of human single nucleotide polymorphism mentions with unique database identifiers.,"

Background

Most information on genomic variations and their associations with phenotypes are covered exclusively in scientific publications rather than in structured databases. These texts commonly describe variations using natural language; database identifiers are seldom mentioned. This complicates the retrieval of variations, associated articles, as well as information extraction, e. g. the search for biological implications. To overcome these challenges, procedures to map textual mentions of variations to database identifiers need to be developed.

Results

This article describes a workflow for normalization of variation mentions, i.e. the association of them to unique database identifiers. Common pitfalls in the interpretation of single nucleotide polymorphism (SNP) mentions are highlighted and discussed. The developed normalization procedure achieves a precision of 98.1 % and a recall of 67.5% for unambiguous association of variation mentions with dbSNP identifiers on a text corpus based on 296 MEDLINE abstracts containing 527 mentions of SNPs. The annotated corpus is freely available at http://www.scai.fraunhofer.de/snp-normalization-corpus.html.

Conclusions

Comparable approaches usually focus on variations mentioned on the protein sequence and neglect problems for other SNP mentions. The results presented here indicate that normalizing SNPs described on DNA level is more difficult than the normalization of SNPs described on protein level. The challenges associated with normalization are exemplified with ambiguities and errors, which occur in this corpus.",2011-07-05 +21897815,"Nipype: a flexible, lightweight and extensible neuroimaging data processing framework in python.","Current neuroimaging software offer users an incredible opportunity to analyze their data in different ways, with different underlying assumptions. Several sophisticated software packages (e.g., AFNI, BrainVoyager, FSL, FreeSurfer, Nipy, R, SPM) are used to process and analyze large and often diverse (highly multi-dimensional) data. However, this heterogeneous collection of specialized applications creates several issues that hinder replicable, efficient, and optimal use of neuroimaging analysis approaches: (1) No uniform access to neuroimaging analysis software and usage information; (2) No framework for comparative algorithm development and dissemination; (3) Personnel turnover in laboratories often limits methodological continuity and training new personnel takes time; (4) Neuroimaging software packages do not address computational efficiency; and (5) Methods sections in journal articles are inadequate for reproducing results. To address these issues, we present Nipype (Neuroimaging in Python: Pipelines and Interfaces; http://nipy.org/nipype), an open-source, community-developed, software package, and scriptable library. Nipype solves the issues by providing Interfaces to existing neuroimaging software with uniform usage semantics and by facilitating interaction between these packages using Workflows. Nipype provides an environment that encourages interactive exploration of algorithms, eases the design of Workflows within and between packages, allows rapid comparative development of algorithms and reduces the learning curve necessary to use different packages. Nipype supports both local and remote execution on multi-core machines and clusters, without additional scripting. Nipype is Berkeley Software Distribution licensed, allowing anyone unrestricted usage. An open, community-driven development philosophy allows the software to quickly adapt and address the varied needs of the evolving neuroimaging community, especially in the context of increasing demand for reproducible research.",2011-08-22 +22618880,MAGIA²: from miRNA and genes expression data integrative analysis to microRNA-transcription factor mixed regulatory circuits (2012 update).,"MAGIA(2) (http://gencomp.bio.unipd.it/magia2) is an update, extension and evolution of the MAGIA web tool. It is dedicated to the integrated analysis of in silico target prediction, microRNA (miRNA) and gene expression data for the reconstruction of post-transcriptional regulatory networks. miRNAs are fundamental post-transcriptional regulators of several key biological and pathological processes. As miRNAs act prevalently through target degradation, their expression profiles are expected to be inversely correlated to those of the target genes. Low specificity of target prediction algorithms makes integration approaches an interesting solution for target prediction refinement. MAGIA(2) performs this integrative approach supporting different association measures, multiple organisms and almost all target predictions algorithms. Nevertheless, miRNAs activity should be viewed as part of a more complex scenario where regulatory elements and their interactors generate a highly connected network and where gene expression profiles are the result of different levels of regulation. The updated MAGIA(2) tries to dissect this complexity by reconstructing mixed regulatory circuits involving either miRNA or transcription factor (TF) as regulators. Two types of circuits are identified: (i) a TF that regulates both a miRNA and its target and (ii) a miRNA that regulates both a TF and its target.",2012-05-21 +23731648,International aspirations for speech-language pathologists' practice with multilingual children with speech sound disorders: development of a position paper.,"

Unlabelled

A major challenge for the speech-language pathology profession in many cultures is to address the mismatch between the ""linguistic homogeneity of the speech-language pathology profession and the linguistic diversity of its clientele"" (Caesar & Kohler, 2007, p. 198). This paper outlines the development of the Multilingual Children with Speech Sound Disorders: Position Paper created to guide speech-language pathologists' (SLPs') facilitation of multilingual children's speech. An international expert panel was assembled comprising 57 researchers (SLPs, linguists, phoneticians, and speech scientists) with knowledge about multilingual children's speech, or children with speech sound disorders. Combined, they had worked in 33 countries and used 26 languages in professional practice. Fourteen panel members met for a one-day workshop to identify key points for inclusion in the position paper. Subsequently, 42 additional panel members participated online to contribute to drafts of the position paper. A thematic analysis was undertaken of the major areas of discussion using two data sources: (a) face-to-face workshop transcript (133 pages) and (b) online discussion artifacts (104 pages). Finally, a moderator with international expertise in working with children with speech sound disorders facilitated the incorporation of the panel's recommendations. The following themes were identified: definitions, scope, framework, evidence, challenges, practices, and consideration of a multilingual audience. The resulting position paper contains guidelines for providing services to multilingual children with speech sound disorders (http://www.csu.edu.au/research/multilingual-speech/position-paper). The paper is structured using the International Classification of Functioning, Disability and Health: Children and Youth Version (World Health Organization, 2007) and incorporates recommendations for (a) children and families, (b) SLPs' assessment and intervention, (c) SLPs' professional practice, and (d) SLPs' collaboration with other professionals.

Learning outcomes

Readers will 1. recognize that multilingual children with speech sound disorders have both similar and different needs to monolingual children when working with speech-language pathologists. 2. Describe the challenges for speech-language pathologists who work with multilingual children. 3. Recall the importance of cultural competence for speech-language pathologists. 4. Identify methods for international collaboration and consultation. 5. Recognize the importance of engaging with families and people within their local communities for supporting multilingual children in context.",2013-05-07 +22807216,Using maximal segmental score in genome-wide association studies.,"Genome-wide association studies (GWAS) have become the method of choice for identifying disease susceptibility genes in common disease genetics research. Despite successes in these studies, much of the heritability remains unexplained due to lack of power and low resolution. High-density genotyping arrays can now screen more than 5 million genetic markers. As a result, multiple comparison has become an important issue especially in the era of next-generation sequencing. We propose to use a two-stage maximal segmental score procedure (MSS) which uses region-specific empirical P-values to identify genomic segments most likely harboring the disease gene. We develop scoring systems based on Fisher's P-value combining method to convert locus-specific significance levels into region-specific scores. Through simulations, our result indicated that MSS increased the power to detect genetic association as compared with conventional methods provided type I error was at 5%. We demonstrated the application of MSS on a publicly available case-control dataset of Parkinson's disease and replicated the findings in the literature. MSS provides an efficient exploratory tool for high-density association data in the current era of next-generation sequencing. R source codes to implement the MSS procedure are freely available at http://www.csjfann.ibms.sinica.edu.tw/EAG/program/programlist.htm.",2012-07-16 +23446898,Price discounts significantly enhance fruit and vegetable purchases when combined with nutrition education: a randomized controlled supermarket trial.,"

Background

Reducing fruit and vegetable (F&V) prices is a frequently considered policy to improve dietary habits in the context of health promotion. However, evidence on the effectiveness of this intervention is limited.

Objective

The objective was to examine the effects of a 50% price discount on F&Vs or nutrition education or a combination of both on supermarket purchases.

Design

A 6-mo randomized controlled trial within Dutch supermarkets was conducted. Regular supermarket shoppers were randomly assigned to 1 of 4 conditions: 50% price discounts on F&Vs, nutrition education, 50% price discounts plus nutrition education, or no intervention. A total of 199 participants provided baseline data; 151 (76%) were included in the final analysis. F&V purchases were measured by using supermarket register receipts at baseline, at 1 mo after the start of the intervention, at 3 mo, at 6 mo (end of the intervention period), and 3 mo after the intervention ended (9 mo).

Results

Adjusted multilevel models showed significantly higher F&V purchases (per household/2 wk) as a result of the price discount (+3.9 kg; 95% CI: 1.5, 6.3 kg) and the discount plus education intervention (+5.6 kg; 95% CI: 3.2, 7.9 kg) at 6 mo compared with control. Moreover, the percentage of participants who consumed recommended amounts of F&Vs (≥400 g/d) increased from 42.5% at baseline to 61.3% at 6 mo in both discount groups (P = 0.03). Education alone had no significant effect.

Conclusions

Discounting F&Vs is a promising intervention strategy because it resulted in substantially higher F&V purchases, and no adverse effects were observed. Therefore, pricing strategies form an important focus for future interventions or policy. However, the long-term effects and the ultimate health outcomes require further investigation. This trial was registered at the ISRCTN Trial Register as number ISRCTN56596945 and at the Dutch Trial Register (http://www.trialregister.nl/trialreg/index.asp) as number NL22568.029.08.",2013-02-27 +21602269,CENTDIST: discovery of co-associated factors by motif distribution.,"Transcription factors (TFs) do not function alone but work together with other TFs (called co-TFs) in a combinatorial fashion to precisely control the transcription of target genes. Mining co-TFs is thus important to understand the mechanism of transcriptional regulation. Although existing methods can identify co-TFs, their accuracy depends heavily on the chosen background model and other parameters such as the enrichment window size and the PWM score cut-off. In this study, we have developed a novel web-based co-motif scanning program called CENTDIST (http://compbio.ddns.comp.nus.edu.sg/~chipseq/centdist/). In comparison to current co-motif scanning programs, CENTDIST does not require the input of any user-specific parameters and background information. Instead, CENTDIST automatically determines the best set of parameters and ranks co-TF motifs based on their distribution around ChIP-seq peaks. We tested CENTDIST on 14 ChIP-seq data sets and found CENTDIST is more accurate than existing methods. In particular, we applied CENTDIST on an Androgen Receptor (AR) ChIP-seq data set from a prostate cancer cell line and correctly predicted all known co-TFs (eight TFs) of AR in the top 20 hits as well as discovering AP4 as a novel co-TF of AR (which was missed by existing methods). Taken together, CENTDIST, which exploits the imbalanced nature of co-TF binding, is a user-friendly, parameter-less and powerful predictive web-based program for understanding the mechanism of transcriptional co-regulation.",2011-05-20 +21659320,The LabelHash server and tools for substructure-based functional annotation.,"

Summary

The LabelHash server and tools are designed for large-scale substructure comparison. The main use is to predict the function of unknown proteins. Given a set of (putative) functional residues, LabelHash finds all occurrences of matching substructures in the entire Protein Data Bank, along with a statistical significance estimate and known functional annotations for each match. The results can be downloaded for further analysis in any molecular viewer. For Chimera, there is a plugin to facilitate this process.

Availability

The web site is free and open to all users with no login requirements at http://labelhash.kavrakilab.org",2011-06-08 +21305028,ReadDepth: a parallel R package for detecting copy number alterations from short sequencing reads.,"Copy number alterations are important contributors to many genetic diseases, including cancer. We present the readDepth package for R, which can detect these aberrations by measuring the depth of coverage obtained by massively parallel sequencing of the genome. In addition to achieving higher accuracy than existing packages, our tool runs much faster by utilizing multi-core architectures to parallelize the processing of these large data sets. In contrast to other published methods, readDepth does not require the sequencing of a reference sample, and uses a robust statistical model that accounts for overdispersed data. It includes a method for effectively increasing the resolution obtained from low-coverage experiments by utilizing breakpoint information from paired end sequencing to do positional refinement. We also demonstrate a method for inferring copy number using reads generated by whole-genome bisulfite sequencing, thus enabling integrative study of epigenomic and copy number alterations. Finally, we apply this tool to two genomes, showing that it performs well on genomes sequenced to both low and high coverage. The readDepth package runs on Linux and MacOSX, is released under the Apache 2.0 license, and is available at http://code.google.com/p/readdepth/.",2011-01-31 +23104886,STAR: ultrafast universal RNA-seq aligner.,"

Motivation

Accurate alignment of high-throughput RNA-seq data is a challenging and yet unsolved problem because of the non-contiguous transcript structure, relatively short read lengths and constantly increasing throughput of the sequencing technologies. Currently available RNA-seq aligners suffer from high mapping error rates, low mapping speed, read length limitation and mapping biases.

Results

To align our large (>80 billon reads) ENCODE Transcriptome RNA-seq dataset, we developed the Spliced Transcripts Alignment to a Reference (STAR) software based on a previously undescribed RNA-seq alignment algorithm that uses sequential maximum mappable seed search in uncompressed suffix arrays followed by seed clustering and stitching procedure. STAR outperforms other aligners by a factor of >50 in mapping speed, aligning to the human genome 550 million 2 × 76 bp paired-end reads per hour on a modest 12-core server, while at the same time improving alignment sensitivity and precision. In addition to unbiased de novo detection of canonical junctions, STAR can discover non-canonical splices and chimeric (fusion) transcripts, and is also capable of mapping full-length RNA sequences. Using Roche 454 sequencing of reverse transcription polymerase chain reaction amplicons, we experimentally validated 1960 novel intergenic splice junctions with an 80-90% success rate, corroborating the high precision of the STAR mapping strategy.

Availability and implementation

STAR is implemented as a standalone C++ code. STAR is free open source software distributed under GPLv3 license and can be downloaded from http://code.google.com/p/rna-star/.",2012-10-25 +22199386,Optimal structural inference of signaling pathways from unordered and overlapping gene sets.,"

Motivation

A plethora of bioinformatics analysis has led to the discovery of numerous gene sets, which can be interpreted as discrete measurements emitted from latent signaling pathways. Their potential to infer signaling pathway structures, however, has not been sufficiently exploited. Existing methods accommodating discrete data do not explicitly consider signal cascading mechanisms that characterize a signaling pathway. Novel computational methods are thus needed to fully utilize gene sets and broaden the scope from focusing only on pairwise interactions to the more general cascading events in the inference of signaling pathway structures.

Results

We propose a gene set based simulated annealing (SA) algorithm for the reconstruction of signaling pathway structures. A signaling pathway structure is a directed graph containing up to a few hundred nodes and many overlapping signal cascades, where each cascade represents a chain of molecular interactions from the cell surface to the nucleus. Gene sets in our context refer to discrete sets of genes participating in signal cascades, the basic building blocks of a signaling pathway, with no prior information about gene orderings in the cascades. From a compendium of gene sets related to a pathway, SA aims to search for signal cascades that characterize the optimal signaling pathway structure. In the search process, the extent of overlap among signal cascades is used to measure the optimality of a structure. Throughout, we treat gene sets as random samples from a first-order Markov chain model. We evaluated the performance of SA in three case studies. In the first study conducted on 83 KEGG pathways, SA demonstrated a significantly better performance than Bayesian network methods. Since both SA and Bayesian network methods accommodate discrete data, use a 'search and score' network learning strategy and output a directed network, they can be compared in terms of performance and computational time. In the second study, we compared SA and Bayesian network methods using four benchmark datasets from DREAM. In our final study, we showcased two context-specific signaling pathways activated in breast cancer.

Availability

Source codes are available from http://dl.dropbox.com/u/16000775/sa_sc.zip.",2011-12-22 +22078224,Consistent Differential Expression Pattern (CDEP) on microarray to identify genes related to metastatic behavior.,"

Background

To utilize the large volume of gene expression information generated from different microarray experiments, several meta-analysis techniques have been developed. Despite these efforts, there remain significant challenges to effectively increasing the statistical power and decreasing the Type I error rate while pooling the heterogeneous datasets from public resources. The objective of this study is to develop a novel meta-analysis approach, Consistent Differential Expression Pattern (CDEP), to identify genes with common differential expression patterns across different datasets.

Results

We combined False Discovery Rate (FDR) estimation and the non-parametric RankProd approach to estimate the Type I error rate in each microarray dataset of the meta-analysis. These Type I error rates from all datasets were then used to identify genes with common differential expression patterns. Our simulation study showed that CDEP achieved higher statistical power and maintained low Type I error rate when compared with two recently proposed meta-analysis approaches. We applied CDEP to analyze microarray data from different laboratories that compared transcription profiles between metastatic and primary cancer of different types. Many genes identified as differentially expressed consistently across different cancer types are in pathways related to metastatic behavior, such as ECM-receptor interaction, focal adhesion, and blood vessel development. We also identified novel genes such as AMIGO2, Gem, and CXCL11 that have not been shown to associate with, but may play roles in, metastasis.

Conclusions

CDEP is a flexible approach that borrows information from each dataset in a meta-analysis in order to identify genes being differentially expressed consistently. We have shown that CDEP can gain higher statistical power than other existing approaches under a variety of settings considered in the simulation study, suggesting its robustness and insensitivity to data variation commonly associated with microarray experiments.

Availability

CDEP is implemented in R and freely available at: http://genomebioinfo.musc.edu/CDEP/.

Contact

zhengw@musc.edu.",2011-11-11 +22711792,Bellerophontes: an RNA-Seq data analysis framework for chimeric transcripts discovery based on accurate fusion model.,"

Motivation

Next-generation sequencing technology allows the detection of genomic structural variations, novel genes and transcript isoforms from the analysis of high-throughput data. In this work, we propose a new framework for the detection of fusion transcripts through short paired-end reads which integrates splicing-driven alignment and abundance estimation analysis, producing a more accurate set of reads supporting the junction discovery and taking into account also not annotated transcripts. Bellerophontes performs a selection of putative junctions on the basis of a match to an accurate gene fusion model.

Results

We report the fusion genes discovered by the proposed framework on experimentally validated biological samples of chronic myelogenous leukemia (CML) and on public NCBI datasets, for which Bellerophontes is able to detect the exact junction sequence. With respect to state-of-art approaches, Bellerophontes detects the same experimentally validated fusions, however, it is more selective on the total number of detected fusions and provides a more accurate set of spanning reads supporting the junctions. We finally report the fusions involving non-annotated transcripts found in CML samples.

Availability and implementation

Bellerophontes JAVA/Perl/Bash software implementation is free and available at http://eda.polito.it/bellerophontes/.",2012-06-17 +22954625,Performance reproducibility index for classification.,"

Motivation

A common practice in biomarker discovery is to decide whether a large laboratory experiment should be carried out based on the results of a preliminary study on a small set of specimens. Consideration of the efficacy of this approach motivates the introduction of a probabilistic measure, for whether a classifier showing promising results in a small-sample preliminary study will perform similarly on a large independent sample. Given the error estimate from the preliminary study, if the probability of reproducible error is low, then there is really no purpose in substantially allocating more resources to a large follow-on study. Indeed, if the probability of the preliminary study providing likely reproducible results is small, then why even perform the preliminary study?

Results

This article introduces a reproducibility index for classification, measuring the probability that a sufficiently small error estimate on a small sample will motivate a large follow-on study. We provide a simulation study based on synthetic distribution models that possess known intrinsic classification difficulties and emulate real-world scenarios. We also set up similar simulations on four real datasets to show the consistency of results. The reproducibility indices for different distributional models, real datasets and classification schemes are empirically calculated. The effects of reporting and multiple-rule biases on the reproducibility index are also analyzed.

Availability

We have implemented in C code the synthetic data distribution model, classification rules, feature selection routine and error estimation methods. The source code is available at http://gsp.tamu.edu/Publications/supplementary/yousefi12a/.",2012-09-06 +30731706,First Report of a Leaf Spot on Basella alba Caused by a Bipolaris sp. in Florida.,"Malabar spinach (Basella alba L.) is a fast-growing, perennial vegetable crop grown largely in the tropics of Asia and Africa. This crop is widely used in the cuisine of different regions for its thick, semisucculent leaves, mild flavor, and mucilaginous texture. Leaf spots were observed on both surfaces of symptomatic leaf samples received from a home garden in Homestead, FL in November 2009. The necrotic lesions (up to 2 mm in diameter) were round, semicircular, or irregular-shaped with grayish centers surrounded by dark brown borders. A fungus was consistently isolated from symptomatic tissues on clarified V8 (CV8) agar. Fungal colonies on CV8 agar were black and velvet-like with minimal mycelial growth and conidiophores were dark brown, simple, borne singly or in groups upon the substrate. Conidia were straight, pale to medium golden brown, rounded at the ends with three to six septa, and on average measured 75 × 15 μm (48 to 97 × 9 to 18 μm). Cultural and conidial characteristics of the isolates were closely similar to those of a Bipolaris sp. (1). The internal transcribed spacer (ITS) region (~570 bp) of rDNA was amplified using the primers ITS1/ITS4 and sequenced bidirectionally (GenBank Accession No. JF506092). Subsequent database searches by the BLASTN program indicated that the resulting sequence had a 95% identity over 531 bp with the corresponding gene sequence of Bipolaris portulacae (GenBank Accession No. AY004778.1), a fungal pathogen reported to cause leaf spot on purslane (Portulaca oleracea) (2,3). However, our isolate has consistently smaller conidia and does not match descriptions of B. portulacae (BPI 871173, U.S. National Fungus Collections). The pathogenicity was confirmed through inoculation of healthy Malabar spinach plants with conidia of the isolate reproduced on CV8. Six Malabar spinach plants were inoculated with a suspension containing 1 × 106 conidia per ml and sprayed until runoff (approximately 15 ml per plant) with a handheld pressurized canister. Another six noninoculated plants served as a control. Immediately after inoculation, plants were covered with plastic bags for 24 h to maintain high relative humidity and maintained in a greenhouse under ambient conditions. Ten days after inoculation, the symptoms described above were observed on leaves of all inoculated plants, whereas symptoms did not develop on the control plants. A Bipolaris sp. was reisolated and identified by the above methods, fulfilling Koch's postulates. This pathogenicity test was carried out three times. To our knowledge, this is the first report of a Bipolaris sp. affecting Malabar spinach in Florida. Further work should be conducted to confirm identity of these isolates. Because of limited plantings of Malabar spinach, the economic importance of this disease in Florida is currently not known. Nevertheless, this pathogen poses a threat to the growing market of continuously produced oriental vegetables in Florida. References: (1) J. L. Alcorn. Mycotaxon 39:361, 1990. (2) S. A. Alfieri, Jr. et al. Bull. 14. Index of Plant Diseases in Florida (Revised). Florida Dep. Agric. Consumer Serv., Div. Plant Ind., 1984. (3) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory. ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 25 January 2010.",2011-07-01 +22016855,GyDB mobilomics: LTR retroelements and integrase-related transposons of the pea aphid Acyrthosiphon pisum genome.,"The Gypsy Database concerning Mobile Genetic Elements (release 2.0) is a wiki-style project devoted to the phylogenetic classification of LTR retroelements and their viral and host gene relatives characterized from distinct organisms. Furthermore, GyDB 2.0 is concerned with studying mobile elements within genomes. Therefore, an in-progress repository was created for databases with annotations of mobile genetic elements from particular genomes. This repository is called Mobilomics and the first uploaded database contains 549 LTR retroelements and related transposases which have been annotated from the genome of the Pea aphid Acyrthosiphon pisum. Mobilomics is accessible from the GyDB 2.0 project using the URL: http://gydb.org/index.php/Mobilomics.",2011-07-01 +21715386,KOBAS 2.0: a web server for annotation and identification of enriched pathways and diseases.,"High-throughput experimental technologies often identify dozens to hundreds of genes related to, or changed in, a biological or pathological process. From these genes one wants to identify biological pathways that may be involved and diseases that may be implicated. Here, we report a web server, KOBAS 2.0, which annotates an input set of genes with putative pathways and disease relationships based on mapping to genes with known annotations. It allows for both ID mapping and cross-species sequence similarity mapping. It then performs statistical tests to identify statistically significantly enriched pathways and diseases. KOBAS 2.0 incorporates knowledge across 1327 species from 5 pathway databases (KEGG PATHWAY, PID, BioCyc, Reactome and Panther) and 5 human disease databases (OMIM, KEGG DISEASE, FunDO, GAD and NHGRI GWAS Catalog). KOBAS 2.0 can be accessed at http://kobas.cbi.pku.edu.cn.",2011-07-01 +21527381,Evaluation of a computer-aided diagnosis system for diabetic retinopathy screening on public data.,"

Purpose

To evaluate the performance of a comprehensive computer-aided diagnosis (CAD) system for diabetic retinopathy (DR) screening, using a publicly available database of retinal images, and to compare its performance with that of human experts.

Methods

A previously developed, comprehensive DR CAD system was applied to 1200 digital color fundus photographs (nonmydriatic camera, single field) of 1200 eyes in the publicly available Messidor dataset (Methods to Evaluate Segmentation and Indexing Techniques in the Field of Retinal Ophthalmology (http://messidor.crihan.fr). The ability of the system to distinguish normal images from those with DR was determined by using receiver operator characteristic (ROC) analysis. Two experts also determined the presence of DR in each of the images.

Results

The system achieved an area under the ROC curve of 0.876 for successfully distinguishing normal images from those with DR with a sensitivity of 92.2% at a specificity of 50%. These compare favorably with the two experts, who achieved sensitivities of 94.5% and 91.2% at a specificity of 50%.

Conclusions

This study shows, for the first time, the performance of a comprehensive DR screening system on an independent, publicly available dataset. The performance of the system on this dataset is comparable with that of human experts.",2011-07-01 +30731727,First Report of Pseudoperonospora cubensis on Cucurbita moschata in the Czech Republic.,"Pseudoperonospora cubensis (Berk. & M.A. Curtis) Rostovzev, the causal agent of cucurbit downy mildew, was observed for the first time on Cucurbita moschata Duchesne in the Czech Republic (CR) in August 2009 and repeatedly in September 2010. Recently, C. moschata has not been an economically important crop in the CR; however, related crops C. pepo and C. maxima have increased in importance. Infected plants with P. cubensis were found in two locations: in a hobby garden in north Moravia (Nový Jičín - Kojetín [49°33'48.088″N, 17°59'16.632″E], 2009 and 2010) and in a commercial field in central Moravia (Olomouc-Holice [49°34'31.95″N, 17°17'35.462″E], 2010). The pathogen caused small, angular, yellowish or pale green lesions on the upper leaf surfaces and produced sporangiophores and sporangia on the lower leaf surfaces. The lesions were delimited by leaf veins and later turned necrotic. Sporangiophores were hyaline, branched, and emerged in groups from stomata. Olive brown-to-dark brown sporangia were ellipsoidal to oblong. Our morphological observations confirmed that the pathogen was P. cubensis (2). No previous reports are available of P. cubensis on C. moschata in CR or anywhere in Central Europe. However, P. cubensis is common on C. moschata in some parts of Asia and the United States (1,2). P. cubensis exhibiting clear host specialization has been reported in different countries and geographic areas (2). A C. moschata isolate (PC 88/2009) originating from the naturally infected plants was inoculated (1 × 105 spores per ml and incubation temperature of 18/15°C during light/dark cycles) according to the methodology described by Lebeda and Urban (3) onto the abaxial surface of leaf discs of all genotypes of a differential set of cucurbits for P. cubensis pathotype determination (4). C. moschata (line Novo5, Nohel-Garden, CR) was added to this set. The isolate PC 88/2009 was highly pathogenic to all screened Cucurbita spp. genotypes (C. pepo, C. maxima, and C. moschata). However, no infection was detected on most of the Cucumis accessions; only Cucumis melo subsp. agrestis var. conomon was susceptible. Also, no infection was observed on other differentials (Citrullus, Benincasa, Luffa, and Lagenaria). The pathotype was classified as Pc 4/15/0. This pathotype had not been previously detected in CR. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.arsgrin. gov/fungaldatabases/ . December 16, 2010. (2) A. Lebeda and Y. Cohen. Eur. J. Plant Pathol. 129:157, 2011. (3) A. Lebeda and J. Urban. Page 285 in: Mass Screening Techniques for Selecting Crops Resistant to Disease. M. M. Spencer and A. Lebeda, eds. International Atomic Energy Agency (IAEA), Vienna, Austria, 2010 (4) A. Lebeda and M. P. Widrlechner. J. Plant Dis. Protect. 110:337, 2003.",2011-07-01 +21715387,FFAS server: novel features and applications.,"The Fold and Function Assignment System (FFAS) server [Jaroszewski et al. (2005) FFAS03: a server for profile-profile sequence alignments. Nucleic Acids Research, 33, W284-W288] implements the algorithm for protein profile-profile alignment introduced originally in [Rychlewski et al. (2000) Comparison of sequence profiles. Strategies for structural predictions using sequence information. Protein Science: a Publication of the Protein Society, 9, 232-241]. Here, we present updates, changes and novel functionality added to the server since 2005 and discuss its new applications. The sequence database used to calculate sequence profiles was enriched by adding sets of publicly available metagenomic sequences. The profile of a user's protein can now be compared with ∼20 additional profile databases, including several complete proteomes, human proteins involved in genetic diseases and a database of microbial virulence factors. A newly developed interface uses a system of tabs, allowing the user to navigate multiple results pages, and also includes novel functionality, such as a dotplot graph viewer, modeling tools, an improved 3D alignment viewer and links to the database of structural similarities. The FFAS server was also optimized for speed: running times were reduced by an order of magnitude. The FFAS server, http://ffas.godziklab.org, has no log-in requirement, albeit there is an option to register and store results in individual, password-protected directories. Source code and Linux executables for the FFAS program are available for download from the FFAS server.",2011-07-01 +21346658,"Subclinical abnormal gyration pattern, a potential anatomic marker of epileptogenic zone in patients with magnetic resonance imaging-negative frontal lobe epilepsy.","

Background

Epilepsy surgery for magnetic resonance imaging (MRI)-negative patients has a less favorable outcome.

Objective

Detection of subclinical abnormal gyration (SAG) patterns and their potential contribution to assessment of the topography of the epileptogenic zone (EZ) is addressed in MRI-negative patients with frontal lobe epilepsy.

Methods

Between September 1998 and July 2005, 12 MRI-negative frontal lobe epilepsy patients underwent stereoelectroencephalography with postcorticectomy follow-up of longer than 1 year (average, 3.3 years). Original software (BrainVISA/Anatomist, http://brainvisa.info) trained on a database of normal volunteers was used to determine which sulci had morphology out of the normal range (SAG). Topography of the EZ, SAG pattern, corticectomy, postoperative seizure control, and histopathology were analyzed.

Results

At last follow-up, 8 of 12 patients (66.7%) were Engel class I (7 IA and 1 IB), 2 class II, and 2 class IV. Small focal cortical dysplasia was histologically diagnosed in 9 of the 12 patients (75%), including 7 of 8 seizure-free patients (87.5%). A SAG pattern was found to be in the EZ area in 9 patients (75%), in the ipsilateral frontal lobe out of the EZ in 2, and limited to the contralateral hemisphere in 1.

Conclusion

SAG patterns appear to be associated with the topography of the EZ in MRI-negative frontal lobe epilepsy and may have a useful role in preoperative assessment. Small focal cortical dysplasia not detected with MRI is often found on histopathological examination, particularly in the depth of the posterior part of the superior frontal sulcus and intermediate frontal sulcus, suggesting a specific developmental critical zone in these locations.",2011-07-01 +21715389,RSAT 2011: regulatory sequence analysis tools.,"RSAT (Regulatory Sequence Analysis Tools) comprises a wide collection of modular tools for the detection of cis-regulatory elements in genome sequences. Thirteen new programs have been added to the 30 described in the 2008 NAR Web Software Issue, including an automated sequence retrieval from EnsEMBL (retrieve-ensembl-seq), two novel motif discovery algorithms (oligo-diff and info-gibbs), a 100-times faster version of matrix-scan enabling the scanning of genome-scale sequence sets, and a series of facilities for random model generation and statistical evaluation (random-genome-fragments, random-motifs, random-sites, implant-sites, sequence-probability, permute-matrix). Our most recent work also focused on motif comparison (compare-matrices) and evaluation of motif quality (matrix-quality) by combining theoretical and empirical measures to assess the predictive capability of position-specific scoring matrices. To process large collections of peak sequences obtained from ChIP-seq or related technologies, RSAT provides a new program (peak-motifs) that combines several efficient motif discovery algorithms to predict transcription factor binding motifs, match them against motif databases and predict their binding sites. Availability (web site, stand-alone programs and SOAP/WSDL (Simple Object Access Protocol/Web Services Description Language) web services): http://rsat.ulb.ac.be/rsat/.",2011-07-01 +30731713,First Report of Powdery Mildew of Hexinia polydichotoma Caused by Leveillula lactucae-serriolae in China.,"Hexinia polydichotoma (Ostenf) H.L. Yang (synonym Chondrilla polydichotoma Ostenf.) is an indigenous sand-binding plant that is widely distributed only in the desert regions of Northwest China. During the summer of 2007, severe outbreaks of a previously unknown powdery mildew were observed in the Taklimakan Desert in Xinjiang, China. Almost 95% of the plants surveyed were affected in this area. The upper surfaces of the stem were covered with white mycelia and the corresponding abaxial surfaces of infected leaves were chlorotic. Affected young, green stems also showed extended chlorosis. As the disease progressed, the infected stems turned yellow and necrotic. Heavy infection resulted in death of the plants. The primary conidia of the fungus were lanceolate with apical pointed, rarely cylindrical or subcylindrical with attenuated apex. They measured 53 to 73 × 15 to 21 μm and had a surface with a net of irregular rides and warts. Subcylindrical or subclavate secondary conidia with rounded ends measuring 50 to 77 × 13 to 20 μm were observed. The ascomata are subgregarious to scattered, globose, and 165 to 200 μm in diameter that are immersed in the dense mycelial tomentum. Numerous and well-developed appendages on the lower half of the ascomata are irregularly branched and can be as long as up to the ascomata diameter. The appendages measure 79 to 106 × 5 to 10 μm and are aseptate, thin walled, and smooth. Asci are numerous (usually more than 20 per ascoma), stalked, clavate-ovoid to nearly cylindrical, and contain two spores (rarely one or three). Ascospores are ellipsoid, hyaline, and measure 25 to 35 × 14 to 20 μm. On the basis of these characteristics, the fungus was identified as Leveillula lactucae-serriolae (2). A voucher specimen was deposited in the Herbarium of Martin Luther University, Halle, Germany (Accession No. HAL 2439F). To confirm the identification, the internal transcribed spacer (ITS) rDNA was amplified and sequenced, and deposited in GenBank (Accession No. HQ821500). Comparison with sequences available in the GenBank database revealed that the ITS sequence shares 99% similarity with that of L. lactucaeserriolae on Lactuca serriola from Iran (Accession No. AB044375.1) (1). Thus, the pathogen was identified as L. lactucae-serriolae based on the host plant species, anamorph morphology, and ITS sequence. Pathogenicity was confirmed through inoculation by gently pressing a diseased stem onto the stem of healthy H. polydichotoma plants. Five inoculated plants were kept under a plastic humid chamber, whereas the same number of noninoculated plants served as the control. The plants were placed under natural conditions (25 to 28°C) with 80 to 90% humidity. At 15 days after inoculation, typical symptoms of powdery mildew developed on the inoculated plants. No symptoms were seen on the control plants. To our knowledge, this is the first report of L. lactucaeserriolae in China and the first record of L. lactucae-serriolae on H. polydichotoma in the world ( http://nt.ars-grin.gov/fungaldatabases/index.cfm ). Because the plant is becoming widely cultivated in the Taklimakan Desert for use in sand-binding, the powdery mildew poses a serious threat to desertification control. References: (1) S. A. Khodaparast et al. Mycol Res. 105:909. 2001. (2) S. A. Khodaparast et al. Mycoscience 43:459, 2002.",2011-07-01 +21702733,Keeping up with genetic discoveries in amyotrophic lateral sclerosis: the ALSoD and ALSGene databases.,"Amyotrophic lateral sclerosis (ALS) is a genetically heterogeneous disorder that shows a characteristic dichotomy of familial forms typically displaying Mendelian inheritance patterns, and sporadic ALS showing no or less obvious familial aggregation. While the former is caused by rare, highly penetrant, and pathogenic mutations, risk for sporadic ALS is probably the result of the combined effects of common polymorphisms with minor to moderate effect sizes. Owing to recent advances in high-throughput genotyping and sequencing technologies, genetic research in both fields is evolving at a rapidly increasing pace making it more and more difficult to follow and evaluate the most significant progress in the field. To alleviate this problem, our groups have created dedicated and freely available online databases, ALSoD ( http://alsod.iop.kcl.ac.uk/ ) and ALSGene ( http://www.alsgene.org ), which provide systematic and in-depth qualitative and quantitative overviews of genetic research in both familial and sporadic ALS. This review briefly introduces the background and main features of both databases and provides an overview of the currently most compelling genetic findings in ALS derived from analyses using these resources.",2011-07-01 +21685102,Multi-view methods for protein structure comparison using latent dirichlet allocation.,"

Motivation

With rapidly expanding protein structure databases, efficiently retrieving structures similar to a given protein is an important problem. It involves two major issues: (i) effective protein structure representation that captures inherent relationship between fragments and facilitates efficient comparison between the structures and (ii) effective framework to address different retrieval requirements. Recently, researchers proposed vector space model of proteins using bag of fragments representation (FragBag), which corresponds to the basic information retrieval model.

Results

In this article, we propose an improved representation of protein structures using latent dirichlet allocation topic model. Another important requirement is to retrieve proteins, whether they are either close or remote homologs. In order to meet diverse objectives, we propose multi-viewpoint based framework that combines multiple representations and retrieval techniques. We compare the proposed representation and retrieval framework on the benchmark dataset developed by Kolodny and co-workers. The results indicate that the proposed techniques outperform state-of-the-art methods.

Availability

http://www.cse.iitm.ac.in/~ashishvt/research/protein-lda/.

Contact

ashishvt@cse.iitm.ac.in.",2011-07-01 +21740962,The protein interaction network mediated by human SH3 domains.,"Families of conserved protein domains, specialized in mediating interactions with short linear peptide motifs, are responsible for the formation of a variety of dynamic complexes in the cell. An important subclass of these motifs are characterized by a high proline content and play a pivotal role in biological processes requiring the coordinated assembly of multi-protein complexes. This is achieved via interaction of proteins containing modules such as Src Homology-3 (SH3) or WW domains and specific proline rich patterns. Here we make available via a publicly accessible database a synopsis of our current understanding of the interaction landscape of the human SH3 protein family. This is achieved by integrating an information extraction strategy with a new experimental approach. In a first approach we have used a text mining strategy to capture a large number of manuscripts reporting interactions between SH3 domains and target peptides. Relevant information was annotated in the MINT database. In a second experimental approach we have used a variant of the WISE (Whole Interactome Scanning Experiment) strategy to probe a large number of naturally occurring and chemically-synthesized peptides arrayed at high density on a glass surface. By this method we have tested 60 human SH3 domains for their ability to bind a collection of 9192 poly-proline containing peptides immobilized on a glass chip. To evaluate the quality of the resulting interaction dataset, we retested some of the interactions on a smaller scale and performed a series of pull down experiments on native proteins. Peptide chips, pull down assays, SPOT synthesis and phage display experiments have allowed us to further characterize the specificity and promiscuity of proline-rich binding domains and to map their interaction network. Both the information captured from the literature and the interactions inferred from the peptide chip experiments were collected and stored in the PepspotDB (http://mint.bio.uniroma2.it/PepspotDB/).",2011-06-29 +21707958,The Littorina sequence database (LSD)--an online resource for genomic data.,"We present an interactive, searchable expressed sequence tag database for the periwinkle snail Littorina saxatilis, an upcoming model species in evolutionary biology. The database is the result of a hybrid assembly between Sanger and 454 sequences, 1290 and 147,491 sequences respectively. Normalized and non-normalized cDNA was obtained from different ecotypes of L. saxatilis collected in the UK and Sweden. The Littorina sequence database (LSD) contains 26,537 different contigs, of which 2453 showed similarity with annotated proteins in UniProt. Querying the LSD permits the selection of the taxonomic origin of blast hits for each contig, and the search can be restricted to particular taxonomic groups. The database allows access to UniProt annotations, blast output, protein family domains (PFAM) and Gene Ontology. The database will allow users to search for genetic markers and identifying candidate genes or genes for expression analyses. It is open for additional deposition of sequence information for L. saxatilis and other species of the genus Littorina. The LSD is available at http://mbio-serv2.mbioekol.lu.se/Littorina/.",2011-06-28 +21685083,Mixed-model coexpression: calculating gene coexpression while accounting for expression heterogeneity.,"

Motivation

The analysis of gene coexpression is at the core of many types of genetic analysis. The coexpression between two genes can be calculated by using a traditional Pearson's correlation coefficient. However, unobserved confounding effects may cause inflation of the Pearson's correlation so that uncorrelated genes appear correlated. Many general methods have been suggested, which aim to remove the effects of confounding from gene expression data. However, the residual confounding which is not accounted for by these generic correction procedures has the potential to induce correlation between genes. Therefore, a method that specifically aims to calculate gene coexpression between gene expression arrays, while accounting for confounding effects, is desirable.

Results

In this article, we present a statistical model for calculating gene coexpression called mixed model coexpression (MMC), which models coexpression within a mixed model framework. Confounding effects are expected to be encoded in the matrix representing the correlation between arrays, the inter-sample correlation matrix. By conditioning on the information in the inter-sample correlation matrix, MMC is able to produce gene coexpressions that are not influenced by global confounding effects and thus significantly reduce the number of spurious coexpressions observed. We applied MMC to both human and yeast datasets and show it is better able to effectively prioritize strong coexpressions when compared to a traditional Pearson's correlation and a Pearson's correlation applied to data corrected with surrogate variable analysis (SVA).

Availability

The method is implemented in the R programming language and may be found at http://genetics.cs.ucla.edu/mmc.

Contact

nfurlott@cs.ucla.edu; eeskin@cs.ucla.edu.",2011-07-01 +23432962,Genome sequence-based species delimitation with confidence intervals and improved distance functions.,"

Background

For the last 25 years species delimitation in prokaryotes (Archaea and Bacteria) was to a large extent based on DNA-DNA hybridization (DDH), a tedious lab procedure designed in the early 1970s that served its purpose astonishingly well in the absence of deciphered genome sequences. With the rapid progress in genome sequencing time has come to directly use the now available and easy to generate genome sequences for delimitation of species. GBDP (Genome Blast Distance Phylogeny) infers genome-to-genome distances between pairs of entirely or partially sequenced genomes, a digital, highly reliable estimator for the relatedness of genomes. Its application as an in-silico replacement for DDH was recently introduced. The main challenge in the implementation of such an application is to produce digital DDH values that must mimic the wet-lab DDH values as close as possible to ensure consistency in the Prokaryotic species concept.

Results

Correlation and regression analyses were used to determine the best-performing methods and the most influential parameters. GBDP was further enriched with a set of new features such as confidence intervals for intergenomic distances obtained via resampling or via the statistical models for DDH prediction and an additional family of distance functions. As in previous analyses, GBDP obtained the highest agreement with wet-lab DDH among all tested methods, but improved models led to a further increase in the accuracy of DDH prediction. Confidence intervals yielded stable results when inferred from the statistical models, whereas those obtained via resampling showed marked differences between the underlying distance functions.

Conclusions

Despite the high accuracy of GBDP-based DDH prediction, inferences from limited empirical data are always associated with a certain degree of uncertainty. It is thus crucial to enrich in-silico DDH replacements with confidence-interval estimation, enabling the user to statistically evaluate the outcomes. Such methodological advancements, easily accessible through the web service at http://ggdc.dsmz.de, are crucial steps towards a consistent and truly genome sequence-based classification of microorganisms.",2013-02-21 +23531523,Swine influenza and vaccines: an alternative approach for decision making about pandemic prevention.,"

Background

During the global pandemic of A/H1N1/California/07/2009 (A/H1N1/Cal) influenza, many governments signed contracts with vaccine producers for a universal influenza immunization program and bought hundreds of millions of vaccines doses. We argue that, as Health Ministers assumed the occurrence of the worst possible scenario (generalized pandemic influenza) and followed the strong version of the Precautionary Principle, they undervalued the possibility of mild or weak pandemic wave.

Methodology

An alternative decision rule, based on the non-extensive entropy principle, is introduced, and a different Precautionary Principle characterization is applied. This approach values extreme negative results (catastrophic events) in a different way and predicts more plausible and mild events. It introduces less pessimistic forecasts in the case of uncertain influenza pandemic outbreaks. A simplified application is presented using seasonal data of morbidity and severity among Italian children influenza-like illness for the period 2003-10.

Principal findings

Established literature results predict an average attack rate of not less than 15% for the next pandemic influenza [Meltzer M, Cox N, Fukuda K. The economic impact of pandemic influenza in the United States: implications for setting priorities for interventions. Emerg Infect Dis 1999;5:659-71; Meltzer M, Cox N, Fukuda K. Modeling the Economic Impact of Pandemic Influenza in the United States: Implications for Setting Priorities for Intervention. Background paper. Atlanta, GA: CDC, 1999. Available at: http://www.cdc.gov/ncidod/eid/vol5no5/melt_back.htm (7 January 2011, date last accessed))]. The strong version of the Precautionary Principle would suggest using this prediction for vaccination campaigns. On the contrary, the non-extensive maximum entropy principle predicts a lower attack rate, which induces a 20% saving in public funding for vaccines doses.

Conclusions

The need for an effective influenza pandemic prevention program, coupled with an efficient use of public funding, calls for a rethinking of the Precautionary Principle. The non-extensive maximum entropy principle, which incorporates vague and incomplete information available to decision makers, produces a more coherent forecast of possible influenza pandemic and a conservative spending in public funding.",2013-03-26 +21604800,Pharmer: efficient and exact pharmacophore search.,"Pharmacophore search is a key component of many drug discovery efforts. Pharmer is a new computational approach to pharmacophore search that scales with the breadth and complexity of the query, not the size of the compound library being screened. Two novel methods for organizing pharmacophore data, the Pharmer KDB-tree and Bloom fingerprints, enable Pharmer to perform an exact pharmacophore search of almost two million structures in less than a minute. In general, Pharmer is more than an order of magnitude faster than existing technologies. The complete source code is available under an open-source license at http://pharmer.sourceforge.net .",2011-06-02 +22113083,A novel and versatile computational tool to model translation.,"

Motivation

Much is now known about the mechanistic details of gene translation. There are also rapid advances in high-throughput technologies to determine quantitative aspects of the system. As a consequence-realistic and system-wide simulation models of translation are now feasible. Such models are also needed as devices to integrate a large volume of highly fragmented data known about translation. Software: In this application note, we present a novel, highly efficient software tool to model translation. The tool represents the main aspects of translation. Features include a representation of exhaustible tRNA pools, ribosome-ribosome interactions and differential initiation rates for different mRNA species. The tool is written in Java, and is hence portable and can be parameterized for any organism.

Availability

The model can be obtained from the authors or directly downloaded from the authors' home-page (http://goo.gl/JUWvI).",2011-11-22 +21654312,The utilization of oncology web-based resources in Spanish-speaking Internet users.,"

Objectives

There currently are few web-based resources written in Spanish providing oncology-specific information. This study examines utilization of Spanish-language oncology web-based resources and evaluates oncology-related Internet browsing practices of Spanish-speaking patients.

Methods

OncoLink (http://www.oncolink.org) is the oldest and among the largest Internet-based cancer information resources. In September 2005, OncoLink pioneered OncoLink en español (OEE) (http://es.oncolink.org), a Spanish translation of OncoLink. Internet utilization data on these sites for 2006 to 2007 were compared.

Results

Visits to OncoLink rose from 4,440,843 in 2006 to 5,125,952 in 2007. OEE had 204,578 unique visitors and 240,442 visits in 2006, and 351,228 visitors and 412,153 visits in 2007. Although there was no time predilection for viewing OncoLink, less relative browsing on OEE was conducted during weekends and early morning hours. Although OncoLink readers searched for information on the most common cancers in the United States, OEE readers most often search for gastric, vaginal, osteosarcoma, leukemia, penile, cervical, and testicular malignancies. Average visit duration on OEE was shorter, and fewer readers surveyed OEE more than 15 minutes (4.5% vs. 14.9%, P < 0.001).

Conclusions

Spanish-speaking users of web-based oncology resources are increasingly using the Internet to supplement their cancer knowledge. Limited available resources written in Spanish contribute to disparities in information access and disease outcomes. Spanish-speaking oncology readers differ from English-speaking readers in day and time of Internet browsing, visit duration, Internet search patterns, and types of cancers searched. By acknowledging these differences, content of web-based oncology resources can be developed to best target the needs of Spanish-speaking viewers.",2012-12-01 +22226708,"Detection, annotation and visualization of alternative splicing from RNA-Seq data with SplicingViewer.","Alternative splicing is a crucial mechanism by which diverse gene products can be generated from a limited number of genes, and is thought to be involved in complex orchestration of eukaryotic gene expression. Next-generation sequencing technologies, with reduced time and cost, provide unprecedented opportunities for deep interrogation of alternative splicing at the genome-wide scale. In this study, an integrated software SplicingViewer has been developed for unambiguous detection, annotation and visualization of splice junctions and alternative splicing events from RNA-Seq data. Specifically, it allows easy identification and characterization of splice junctions, and holds a versatile computational pipeline for in-depth annotation and classification of alternative splicing with different patterns. Moreover, it provides a user-friendly environment in which an alternative splicing landscape can be displayed in a straightforward and flexible manner. In conclusion, SplicingViewer can be widely used for studying alternative splicing easily and efficiently. SplicingViewer can be freely accessed at http://bioinformatics.zj.cn/splicingviewer.",2011-12-28 +22767354,GPR-Analyzer: a simple tool for quantitative analysis of hierarchical multispecies microarrays.,"Monitoring of marine microalgae is important to predict and manage harmful algae blooms. It currently relies mainly on light-microscopic identification and enumeration of algal cells, yet several molecular tools are currently being developed to complement traditional methods. MIcroarray Detection of Toxic ALgae (MIDTAL) is an FP7-funded EU project aiming to establish a hierarchical multispecies microarray as one of these tools. Prototype arrays are currently being tested with field samples, yet the analysis of the large quantities of data generated by these arrays presents a challenge as suitable analysis tools or protocols are scarce. This paper proposes a two-part protocol for the analysis of the MIDTAL and other hierarchical multispecies arrays: Signal-to-noise ratios can be used to determine the presence or absence of signals and to identify potential false-positives considering parallel and hierarchical probes. In addition, normalized total signal intensities are recommended for comparisons between microarrays and in order to relate signals for specific probes to cell concentrations using external calibration curves. Hybridization- and probe-specific detection limits can be calculated to help evaluate negative results. The suggested analyses were implemented in ""GPR-Analyzer"", a platform-independent and graphical user interface-based application, enabling non-specialist users to quickly and quantitatively analyze hierarchical multispecies microarrays. It is available online at http://folk.uio.no/edvardse/gpranalyzer .",2012-07-06 +22958544,Sharing knowledge to advance healthcare policies in Europe for people living with dementia and their carers: the ALCOVE project.,"

Unlabelled

Background

Alzheimer's disease and other related dementias are public health priorities in the European Union due to their prevalence, cost and profound impact on society. Because of these pressing implications, the European Union decided to create a Joint Action to share knowledge about dementia and health policy in order to preserve the health, quality of life, autonomy and dignity of people living with dementia and their carers in Europe.

Methods

ALCOVE is a European Community-funded Joint Action coordinated by the HAS (French National Authority for Health) with a 24-month duration. The project's life cycle has been divided into the following four steps: (1) collection of existing information, (2) analysis of existing information and making comparisons across Member States, (3) identifying Evidence, Needs, and Priorities, (4) drafting recommendations and disseminating them.

Results

19 countries are participating in the ALCOVE initiative. The project will publish its final findings in 2013. The project's objectives, participants, method, on-going procedures and work plans are already available on the ALCOVE website: http://www.alcove-project.eu/. Preliminary results show that recommendations will need to focus on clinical and epidemiological data collection, diagnostic system assessment, outstanding approaches for treating behavioural disorders, limiting antipsychotic use, and competence assessment in this vulnerable population.

Conclusions

The European Member States involved are mobilized to share best health policy practices in order to tackle the challenge of dementia's threat on European health and social systems and to improve the quality of life and care for individuals and their family carers.",2012-08-28 +21414208,pROC: an open-source package for R and S+ to analyze and compare ROC curves.,"

Background

Receiver operating characteristic (ROC) curves are useful tools to evaluate classifiers in biomedical and bioinformatics applications. However, conclusions are often reached through inconsistent use or insufficient statistical analysis. To support researchers in their ROC curves analysis we developed pROC, a package for R and S+ that contains a set of tools displaying, analyzing, smoothing and comparing ROC curves in a user-friendly, object-oriented and flexible interface.

Results

With data previously imported into the R or S+ environment, the pROC package builds ROC curves and includes functions for computing confidence intervals, statistical tests for comparing total or partial area under the curve or the operating points of different classifiers, and methods for smoothing ROC curves. Intermediary and final results are visualised in user-friendly interfaces. A case study based on published clinical and biomarker data shows how to perform a typical ROC analysis with pROC.

Conclusions

pROC is a package for R and S+ specifically dedicated to ROC analysis. It proposes multiple statistical tests to compare ROC curves, and in particular partial areas under the curve, allowing proper ROC interpretation. pROC is available in two versions: in the R programming language or with a graphical user interface in the S+ statistical software. It is accessible at http://expasy.org/tools/pROC/ under the GNU General Public License. It is also distributed through the CRAN and CSAN public repositories, facilitating its installation.",2011-03-17 +21478195,CyClus3D: a Cytoscape plugin for clustering network motifs in integrated networks.,"

Summary

Network motifs in integrated molecular networks represent functional relationships between distinct data types. They aggregate to form dense topological structures corresponding to functional modules which cannot be detected by traditional graph clustering algorithms. We developed CyClus3D, a Cytoscape plugin for clustering composite three-node network motifs using a 3D spectral clustering algorithm.

Availability

Via the Cytoscape plugin manager or http://bioinformatics.psb.ugent.be/software/details/CyClus3D.",2011-04-08 +22925655,Apoptosis signal-regulating kinase 1 is associated with the effect of claudin-6 in breast cancer.,"

Background

Previous studies have demonstrated that claudin-6 functions as a cancer suppressor in human MCF-7 breast cancer cells. The growth inhibitory effect could be attributed to inhibition of cell proliferation and induction of apoptosis. The purpose of the current study was to examine the involvement of apoptosis signal-regulating kinase 1 (ASK1) in the anticancer effect of claudin-6.

Methods

Immunohistochemical analysis was performed to evaluate the ASK1 protein expression and the correlation between ASK1, claudin-6 and clinicopathological features in 85 samples of breast invasive ductal carcinomas (IDC). Western blotting and RT-PCR was carried out to examine the expression of ASK1 and claudin-6 in MCF-7 cell clones transfected with claudin-6.

Results

Immunohistochemical analysis showed that ASK1 expression was significantly related with that of claudin-6 in breast invasive ductal carcinomas (P < 0.05). In addition, a positive correlation between ASK1 and C-erb B 2 protein expression was identified (P < 0.05). Western blotting and RT-PCR consistently revealed that the level of ASK1 protein and mRNA was upregulated in MCF-7 cell clones transfected with claudin-6.

Conclusions

Our data suggests, for the first time, that the ASK1 signal may play a positive role in the inhibitory effect of claudin-6 in breast cancer.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1200314318763661.",2012-08-27 +21994227,BioTextQuest: a web-based biomedical text mining suite for concept discovery.,"

Summary

BioTextQuest combines automated discovery of significant terms in article clusters with structured knowledge annotation, via Named Entity Recognition services, offering interactive user-friendly visualization. A tag-cloud-based illustration of terms labeling each document cluster are semantically annotated according to the biological entity, and a list of document titles enable users to simultaneously compare terms and documents of each cluster, facilitating concept association and hypothesis generation. BioTextQuest allows customization of analysis parameters, e.g. clustering/stemming algorithms, exclusion of documents/significant terms, to better match the biological question addressed.

Availability

http://biotextquest.biol.ucy.ac.cy

Contact

vprobon@ucy.ac.cy; iliopj@med.uoc.gr

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-12 +21662242,PocketAlign a novel algorithm for aligning binding sites in protein structures.,"A fundamental task in bioinformatics involves a transfer of knowledge from one protein molecule onto another by way of recognizing similarities. Such similarities are obtained at different levels, that of sequence, whole fold, or important substructures. Comparison of binding sites is important to understand functional similarities among the proteins and also to understand drug cross-reactivities. Current methods in literature have their own merits and demerits, warranting exploration of newer concepts and algorithms, especially for large-scale comparisons and for obtaining accurate residue-wise mappings. Here, we report the development of a new algorithm, PocketAlign, for obtaining structural superpositions of binding sites. The software is available as a web-service at http://proline.physics.iisc.ernet.in/pocketalign/. The algorithm encodes shape descriptors in the form of geometric perspectives, supplemented by chemical group classification. The shape descriptor considers several perspectives with each residue as the focus and captures relative distribution of residues around it in a given site. Residue-wise pairings are computed by comparing the set of perspectives of the first site with that of the second, followed by a greedy approach that incrementally combines residue pairings into a mapping. The mappings in different frames are then evaluated by different metrics encoding the extent of alignment of individual geometric perspectives. Different initial seed alignments are computed, each subsequently extended by detecting consequential atomic alignments in a three-dimensional grid, and the best 500 stored in a database. Alignments are then ranked, and the top scoring alignments reported, which are then streamed into Pymol for visualization and analyses. The method is validated for accuracy and sensitivity and benchmarked against existing methods. An advantage of PocketAlign, as compared to some of the existing tools available for binding site comparison in literature, is that it explores different schemes for identifying an alignment thus has a better potential to capture similarities in ligand recognition abilities. PocketAlign, by finding a detailed alignment of a pair of sites, provides insights as to why two sites are similar and which set of residues and atoms contribute to the similarity.",2011-06-21 +21538686,A comprehensive review of reported heritable noggin-associated syndromes and proposed clinical utility of one broadly inclusive diagnostic term: NOG-related-symphalangism spectrum disorder (NOG-SSD).,"The NOG gene encodes noggin, a secreted polypeptide that is important for regulating multiple signaling pathways during human development, particularly in cartilage and bone. The hallmark of NOG-related syndromes is proximal symphalangism, defined by abnormal fusion of the proximal interphalangeal joints of the hands and feet. Many additional features secondary to NOG mutations are commonly but inconsistently observed, including a characteristic facies with a hemicylindrical nose, congenital conductive hearing loss due to stapes fixation, and hyperopia. The variable clinical presentations led to the designation of five different autosomal dominant syndromes, all subsequently found to have resulted from NOG mutations. These include (1) proximal symphalangism; (2) multiple synostoses syndrome 1; (3) stapes ankylosis with broad thumbs and toes; (4) tarsal-carpal coalition syndrome; and (5) brachydactyly type B2. Herein, we review the phenotypic features associated with mutations in the NOG gene, demonstrating the overlapping characteristics of these syndromes. Due to the variable phenotypic spectrum within families and among families with the same mutation, we propose a unifying term, NOG-related symphalangism spectrum disorder (NOG-SSD), to aid in the clinical recognition and evaluation of all affected individuals with these phenotypes. These NOG gene variants are available in a new locus-specific database (https://NOG.lovd.nl).",2011-06-21 +21693021,Determinants of antigenicity and specificity in immune response for protein sequences.,"

Background

Target specific antibodies are pivotal for the design of vaccines, immunodiagnostic tests, studies on proteomics for cancer biomarker discovery, identification of protein-DNA and other interactions, and small and large biochemical assays. Therefore, it is important to understand the properties of protein sequences that are important for antigenicity and to identify small peptide epitopes and large regions in the linear sequence of the proteins whose utilization result in specific antibodies.

Results

Our analysis using protein properties suggested that sequence composition combined with evolutionary information and predicted secondary structure, as well as solvent accessibility is sufficient to predict successful peptide epitopes. The antigenicity and the specificity in immune response were also found to depend on the epitope length. We trained the B-Cell Epitope Oracle (BEOracle), a support vector machine (SVM) classifier, for the identification of continuous B-Cell epitopes with these protein properties as learning features. The BEOracle achieved an F1-measure of 81.37% on a large validation set. The BEOracle classifier outperformed the classical methods based on propensity and sophisticated methods like BCPred and Bepipred for B-Cell epitope prediction. The BEOracle classifier also identified peptides for the ChIP-grade antibodies from the modENCODE/ENCODE projects with 96.88% accuracy. High BEOracle score for peptides showed some correlation with the antibody intensity on Immunofluorescence studies done on fly embryos. Finally, a second SVM classifier, the B-Cell Region Oracle (BROracle) was trained with the BEOracle scores as features to predict the performance of antibodies generated with large protein regions with high accuracy. The BROracle classifier achieved accuracies of 75.26-63.88% on a validation set with immunofluorescence, immunohistochemistry, protein arrays and western blot results from Protein Atlas database.

Conclusions

Together our results suggest that antigenicity is a local property of the protein sequences and that protein sequence properties of composition, secondary structure, solvent accessibility and evolutionary conservation are the determinants of antigenicity and specificity in immune response. Moreover, specificity in immune response could also be accurately predicted for large protein regions without the knowledge of the protein tertiary structure or the presence of discontinuous epitopes. The dataset prepared in this work and the classifier models are available for download at https://sites.google.com/site/oracleclassifiers/.",2011-06-21 +23617269,Ensemble-based prediction of RNA secondary structures.,"

Background

Accurate structure prediction methods play an important role for the understanding of RNA function. Energy-based, pseudoknot-free secondary structure prediction is one of the most widely used and versatile approaches, and improved methods for this task have received much attention over the past five years. Despite the impressive progress that as been achieved in this area, existing evaluations of the prediction accuracy achieved by various algorithms do not provide a comprehensive, statistically sound assessment. Furthermore, while there is increasing evidence that no prediction algorithm consistently outperforms all others, no work has been done to exploit the complementary strengths of multiple approaches.

Results

In this work, we present two contributions to the area of RNA secondary structure prediction. Firstly, we use state-of-the-art, resampling-based statistical methods together with a previously published and increasingly widely used dataset of high-quality RNA structures to conduct a comprehensive evaluation of existing RNA secondary structure prediction procedures. The results from this evaluation clarify the performance relationship between ten well-known existing energy-based pseudoknot-free RNA secondary structure prediction methods and clearly demonstrate the progress that has been achieved in recent years. Secondly, we introduce AveRNA, a generic and powerful method for combining a set of existing secondary structure prediction procedures into an ensemble-based method that achieves significantly higher prediction accuracies than obtained from any of its component procedures.

Conclusions

Our new, ensemble-based method, AveRNA, improves the state of the art for energy-based, pseudoknot-free RNA secondary structure prediction by exploiting the complementary strengths of multiple existing prediction procedures, as demonstrated using a state-of-the-art statistical resampling approach. In addition, AveRNA allows an intuitive and effective control of the trade-off between false negative and false positive base pair predictions. Finally, AveRNA can make use of arbitrary sets of secondary structure prediction procedures and can therefore be used to leverage improvements in prediction accuracy offered by algorithms and energy models developed in the future. Our data, MATLAB software and a web-based version of AveRNA are publicly available at http://www.cs.ubc.ca/labs/beta/Software/AveRNA.",2013-04-24 +22682510,Visually integrating and exploring high throughput Phenome-Wide Association Study (PheWAS) results using PheWAS-View.,"

Background

Phenome-Wide Association Studies (PheWAS) can be used to investigate the association between single nucleotide polymorphisms (SNPs) and a wide spectrum of phenotypes. This is a complementary approach to Genome Wide Association studies (GWAS) that calculate the association between hundreds of thousands of SNPs and one or a limited range of phenotypes. The extensive exploration of the association between phenotypic structure and genotypic variation through PheWAS produces a set of complex and comprehensive results. Integral to fully inspecting, analysing, and interpreting PheWAS results is visualization of the data.

Results

We have developed the software PheWAS-View for visually integrating PheWAS results, including information about the SNPs, relevant genes, phenotypes, and the interrelationships between phenotypes, that exist in PheWAS. As a result both the fine grain detail as well as the larger trends that exist within PheWAS results can be elucidated.

Conclusions

PheWAS can be used to discover novel relationships between SNPs, phenotypes, and networks of interrelated phenotypes; identify pleiotropy; provide novel mechanistic insights; and foster hypothesis generation - and these results can be both explored and presented with PheWAS-View. PheWAS-View is freely available for non-commercial research institutions, for full details see http://ritchielab.psu.edu/ritchielab/software.",2012-06-08 +22675073,KOMODO: a web tool for detecting and visualizing biased distribution of groups of homologous genes in monophyletic taxa.,"The enrichment analysis is a standard procedure to interpret 'omics' experiments that generate large gene lists as outputs, such as transcriptomics and protemics. However, despite the huge success of enrichment analysis in these classes of experiments, there is a surprising lack of application of this methodology to survey other categories of large-scale biological data available. Here, we report Kegg Orthology enrichMent-Online DetectiOn (KOMODO), a web tool to systematically investigate groups of monophyletic genomes in order to detect significantly enriched groups of homologous genes in one taxon when compared with another. The results are displayed in their proper biochemical roles in a visual, explorative way, allowing users to easily formulate and investigate biological hypotheses regarding the taxonomical distribution of genomic elements. We validated KOMODO by analyzing portions of central carbon metabolism in two taxa extensively studied regarding their carbon metabolism profile (Enterobacteriaceae family and Lactobacillales order). Most enzymatic activities significantly biased were related to known key metabolic traits in these taxa, such as the distinct fates of pyruvate (the known tendency of lactate production in Lactobacillales and its complete oxidation in Enterobacteriaceae), demonstrating that KOMODO could detect biologically meaningful differences in the frequencies of shared genomic elements among taxa. KOMODO is freely available at http://komodotool.org.",2012-06-06 +22360891,GoBean: a Java GUI application for visual exploration of GO term enrichments.,"We have developed a biologist-friendly, Java GUI application (GoBean) for GO term enrichment analysis. It was designed to be a comprehensive and flexible GUI tool for GO term enrichment analysis, combining the merits of other programs and incorporating extensive graphic exploration of enrichment results. An intuitive user interface with multiple panels allows for extensive visual scrutiny of analysis results. The program includes many essential and useful features, such as enrichment analysis algorithms, multiple test correction methods, and versatile filtering of enriched GO terms for more focused analyses. A unique graphic interface reflecting the GO tree structure was devised to facilitate comparisons of multiple GO analysis results, which can provide valuable insights for biological interpretation. Additional features to enhance user convenience include built in ID conversion, evidence code-based gene-GO association filtering, set operations of gene lists and enriched GO terms, and user -provided data files. It is available at http://neon.gachon.ac.kr/GoBean/.",2012-02-01 +22199379,Hawkeye and AMOS: visualizing and assessing the quality of genome assemblies.,"Since its launch in 2004, the open-source AMOS project has released several innovative DNA sequence analysis applications including: Hawkeye, a visual analytics tool for inspecting the structure of genome assemblies; the Assembly Forensics and FRCurve pipelines for systematically evaluating the quality of a genome assembly; and AMOScmp, the first comparative genome assembler. These applications have been used to assemble and analyze dozens of genomes ranging in complexity from simple microbial species through mammalian genomes. Recent efforts have been focused on enhancing support for new data characteristics brought on by second- and now third-generation sequencing. This review describes the major components of AMOS in light of these challenges, with an emphasis on methods for assessing assembly quality and the visual analytics capabilities of Hawkeye. These interactive graphical aspects are essential for navigating and understanding the complexities of a genome assembly, from the overall genome structure down to individual bases. Hawkeye and AMOS are available open source at http://amos.sourceforge.net.",2011-12-23 +23308147,Automatic peak selection by a Benjamini-Hochberg-based algorithm.,"A common issue in bioinformatics is that computational methods often generate a large number of predictions sorted according to certain confidence scores. A key problem is then determining how many predictions must be selected to include most of the true predictions while maintaining reasonably high precision. In nuclear magnetic resonance (NMR)-based protein structure determination, for instance, computational peak picking methods are becoming more and more common, although expert-knowledge remains the method of choice to determine how many peaks among thousands of candidate peaks should be taken into consideration to capture the true peaks. Here, we propose a Benjamini-Hochberg (B-H)-based approach that automatically selects the number of peaks. We formulate the peak selection problem as a multiple testing problem. Given a candidate peak list sorted by either volumes or intensities, we first convert the peaks into [Formula: see text]-values and then apply the B-H-based algorithm to automatically select the number of peaks. The proposed approach is tested on the state-of-the-art peak picking methods, including WaVPeak [1] and PICKY [2]. Compared with the traditional fixed number-based approach, our approach returns significantly more true peaks. For instance, by combining WaVPeak or PICKY with the proposed method, the missing peak rates are on average reduced by 20% and 26%, respectively, in a benchmark set of 32 spectra extracted from eight proteins. The consensus of the B-H-selected peaks from both WaVPeak and PICKY achieves 88% recall and 83% precision, which significantly outperforms each individual method and the consensus method without using the B-H algorithm. The proposed method can be used as a standard procedure for any peak picking method and straightforwardly applied to some other prediction selection problems in bioinformatics. The source code, documentation and example data of the proposed method is available at http://sfb.kaust.edu.sa/pages/software.aspx.",2013-01-07 +22669902,HomozygosityMapper2012--bridging the gap between homozygosity mapping and deep sequencing.,"Homozygosity mapping is a common method to map recessive traits in consanguineous families. To facilitate these analyses, we have developed HomozygosityMapper, a web-based approach to homozygosity mapping. HomozygosityMapper allows researchers to directly upload the genotype files produced by the major genotyping platforms as well as deep sequencing data. It detects stretches of homozygosity shared by the affected individuals and displays them graphically. Users can interactively inspect the underlying genotypes, manually refine these regions and eventually submit them to our candidate gene search engine GeneDistiller to identify the most promising candidate genes. Here, we present the new version of HomozygosityMapper. The most striking new feature is the support of Next Generation Sequencing *.vcf files as input. Upon users' requests, we have implemented the analysis of common experimental rodents as well as of important farm animals. Furthermore, we have extended the options for single families and loss of heterozygosity studies. Another new feature is the export of *.bed files for targeted enrichment of the potential disease regions for deep sequencing strategies. HomozygosityMapper also generates files for conventional linkage analyses which are already restricted to the possible disease regions, hence superseding CPU-intensive genome-wide analyses. HomozygosityMapper is freely available at http://www.homozygositymapper.org/.",2012-06-04 +21464096,Guidelines for the management of inflammatory bowel disease in adults.,"The management of inflammatory bowel disease represents a key component of clinical practice for members of the British Society of Gastroenterology (BSG). There has been considerable progress in management strategies affecting all aspects of clinical care since the publication of previous BSG guidelines in 2004, necessitating the present revision. Key components of the present document worthy of attention as having been subject to re-assessment, and revision, and having direct impact on practice include: The data generated by the nationwide audits of inflammatory bowel disease (IBD) management in the UK in 2006, and 2008. The publication of 'Quality Care: service standards for the healthcare of people with IBD' in 2009. The introduction of the Montreal classification for Crohn's disease and ulcerative colitis. The revision of recommendations for the use of immunosuppressive therapy. The detailed analysis, guidelines and recommendations for the safe and appropriate use of biological therapies in Crohn's disease and ulcerative colitis. The reassessment of the role of surgery in disease management, with emphasis on the importance of multi-disciplinary decision-making in complex cases. The availablity of new data on the role of reconstructive surgery in ulcerative colitis. The cross-referencing to revised guidelines for colonoscopic surveillance, for the management of metabolic bone disease, and for the care of children with inflammatory bowel disease. Use of the BSG discussion forum available on the BSG website to enable ongoing feedback on the published document http://www.bsg.org.uk/forum (accessed Oct 2010). The present document is intended primarily for the use of clinicians in the United Kingdom, and serves to replace the previous BSG guidelines in IBD, while complementing recent consensus statements published by the European Crohn's and Colitis Organisation (ECCO) https://www.ecco-ibd.eu/index.php (accessed Oct 2010).",2011-05-01 +21698189,Predicting protein folds with fold-specific PSSM libraries.,"Accurately assigning folds for divergent protein sequences is a major obstacle to structural studies. Herein, we outline an effective method for fold recognition using sets of PSSMs, each of which is constructed for different protein folds. Our analyses demonstrate that FSL (Fold-specific Position Specific Scoring Matrix Libraries) can predict/relate structures given only their amino acid sequences of highly divergent proteins. This ability to detect distant relationships is dependent on low-identity sequence alignments obtained from FSL. Results from our experiments demonstrate that FSL perform well in recognizing folds from the ""twilight-zone"" SABmark dataset. Further, this method is capable of accurate fold prediction in newly determined structures. We suggest that by building complete PSSM libraries for all unique folds within the Protein Database (PDB), FSL can be used to rapidly and reliably annotate a large subset of protein folds at proteomic level. The related programs and fold-specific PSSMs for our FSL are publicly available at: http://ccp.psu.edu/download/FSLv1.0/.",2011-06-16 +21695124,"Gene-disease network analysis reveals functional modules in mendelian, complex and environmental diseases.","

Background

Scientists have been trying to understand the molecular mechanisms of diseases to design preventive and therapeutic strategies for a long time. For some diseases, it has become evident that it is not enough to obtain a catalogue of the disease-related genes but to uncover how disruptions of molecular networks in the cell give rise to disease phenotypes. Moreover, with the unprecedented wealth of information available, even obtaining such catalogue is extremely difficult.

Principal findings

We developed a comprehensive gene-disease association database by integrating associations from several sources that cover different biomedical aspects of diseases. In particular, we focus on the current knowledge of human genetic diseases including mendelian, complex and environmental diseases. To assess the concept of modularity of human diseases, we performed a systematic study of the emergent properties of human gene-disease networks by means of network topology and functional annotation analysis. The results indicate a highly shared genetic origin of human diseases and show that for most diseases, including mendelian, complex and environmental diseases, functional modules exist. Moreover, a core set of biological pathways is found to be associated with most human diseases. We obtained similar results when studying clusters of diseases, suggesting that related diseases might arise due to dysfunction of common biological processes in the cell.

Conclusions

For the first time, we include mendelian, complex and environmental diseases in an integrated gene-disease association database and show that the concept of modularity applies for all of them. We furthermore provide a functional analysis of disease-related modules providing important new biological insights, which might not be discovered when considering each of the gene-disease association repositories independently. Hence, we present a suitable framework for the study of how genetic and environmental factors, such as drugs, contribute to diseases.

Availability

The gene-disease networks used in this study and part of the analysis are available at http://ibi.imim.es/DisGeNET/DisGeNETweb.html#Download.",2011-06-14 +21672959,firestar--advances in the prediction of functionally important residues.,"firestar is a server for predicting catalytic and ligand-binding residues in protein sequences. Here, we present the important developments since the first release of firestar. Previous versions of the server required human interpretation of the results; the server is now fully automatized. firestar has been implemented as a web service and can now be run in high-throughput mode. Prediction coverage has been greatly improved with the extension of the FireDB database and the addition of alignments generated by HHsearch. Ligands in FireDB are now classified for biological relevance. Many of the changes have been motivated by the critical assessment of techniques for protein structure prediction (CASP) ligand-binding prediction experiment, which provided us with a framework to test the performance of firestar. URL: http://firedb.bioinfo.cnio.es/Php/FireStar.php.",2011-06-14 +21672958,"antiSMASH: rapid identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genome sequences.","Bacterial and fungal secondary metabolism is a rich source of novel bioactive compounds with potential pharmaceutical applications as antibiotics, anti-tumor drugs or cholesterol-lowering drugs. To find new drug candidates, microbiologists are increasingly relying on sequencing genomes of a wide variety of microbes. However, rapidly and reliably pinpointing all the potential gene clusters for secondary metabolites in dozens of newly sequenced genomes has been extremely challenging, due to their biochemical heterogeneity, the presence of unknown enzymes and the dispersed nature of the necessary specialized bioinformatics tools and resources. Here, we present antiSMASH (antibiotics & Secondary Metabolite Analysis Shell), the first comprehensive pipeline capable of identifying biosynthetic loci covering the whole range of known secondary metabolite compound classes (polyketides, non-ribosomal peptides, terpenes, aminoglycosides, aminocoumarins, indolocarbazoles, lantibiotics, bacteriocins, nucleosides, beta-lactams, butyrolactones, siderophores, melanins and others). It aligns the identified regions at the gene cluster level to their nearest relatives from a database containing all other known gene clusters, and integrates or cross-links all previously available secondary-metabolite specific gene analysis methods in one interactive view. antiSMASH is available at http://antismash.secondarymetabolites.org.",2011-06-14 +29387165,Effects of harvesting of increasing intensities on genetic diversity and population structure of white spruce.,"Forest harvesting of increasing intensities is expected to have intensifying impacts on the genetic diversity and population structure of postharvest naturally regenerated stands by affecting the magnitude of evolutionary processes, such as genetic drift, gene flow, mating system, and selection. We have tested this hypothesis for the first time by employing widely distributed boreal white spruce (Picea glauca) as a model and controlled, replicated experimental harvesting and regeneration experiment at the EMEND project site (http://www.emendproject.org). We used two approaches. First, genetic diversity and population structure of postharvest natural regeneration after five harvesting treatments (green tree retention of 75%, 50%, 20%, and 10%, and clearcut) were assessed and compared with those of the unharvested control (pristine preharvest old-growth) in two replicates each of conifer-dominated (CD) and mixed-wood (MW) forest, using 10 (six EST (expressed sequence tag) and four genomic) microsatellite markers. Second, genetic diversity and population structure of preharvest old-growth were compared with those of postharvest natural regeneration after five harvesting treatments in the same treatment blocks in one replicate each of CD and MW forests. Contrary to our expectations, genetic diversity, inbreeding levels, and population genetic structure were similar between unharvested control or preharvest old-growth and postharvest natural regeneration after five harvesting treatments, with clearcut showing no negative genetic impacts. The potential effects of genetic drift and inbreeding resulting from harvesting bottlenecks were counterbalanced by predominantly outcrossing mating system and high gene flow from the residual and/or surrounding white spruce. CD and MW forests responded similarly to harvesting of increasing intensities. Simulated data for 10, 50, and 100 microsatellite markers showed the same results as obtained empirically from 10 microsatellite markers. Similar patterns of genetic diversity and population structure were observed for EST and genomic microsatellites. In conclusion, harvesting of increasing intensities did not show any significant negative impact on genetic diversity, population structure, and evolutionary potential of white spruce in CD and MW forests. Our first of its kind of study addresses the broad central forest management question how forest harvesting and regeneration practices can best maintain genetic biodiversity and ecosystem integrity.",2013-04-18 +21684350,MeSHy: Mining unanticipated PubMed information using frequencies of occurrences and concurrences of MeSH terms.,"

Motivation

PubMed is the most widely used database of biomedical literature. To the detriment of the user though, the ranking of the documents retrieved for a query is not content-based, and important semantic information in the form of assigned Medical Subject Headings (MeSH) terms is not readily presented or productively utilized. The motivation behind this work was the discovery of unanticipated information through the appropriate ranking of MeSH term pairs and, indirectly, documents. Such information can be useful in guiding novel research and following promising trends.

Methods

A web-based tool, called MeSHy, was developed implementing a mainly statistical algorithm. The algorithm takes into account the frequencies of occurrences, concurrences, and the semantic similarities of MeSH terms in retrieved PubMed documents to create MeSH term pairs. These are then scored and ranked, focusing on their unexpectedly frequent or infrequent occurrences.

Results

MeSHy presents results through an online interactive interface facilitating further manipulation through filtering and sorting. The results themselves include the MeSH term pairs, along with MeSH categories, the score, and document IDs, all of which are hyperlinked for convenience. To highlight the applicability of the tool, we report the findings of an expert in the pharmacology field on querying the molecularly-targeted drug imatinib and nutrition-related flavonoids. To the best of our knowledge, MeSHy is the first publicly available tool able to directly provide such a different perspective on the complex nature of published work.

Implementation and availability

Implemented in Perl and served by Apache2 at http://bat.ina.certh.gr/tools/meshy/ with all major browsers supported.",2011-06-13 +23393060,What is new in the management of wet age-related macular degeneration?,"

Introduction or background

The hallmark of wet age-related macular degeneration (AMD) is choroidal neovascularization (CNV). The key cytokine involved in the pathogenesis of CNV is vascular endothelial growth factor (VEGF). Since 2005, antiVEGF therapy has revolutionized the management of this condition.

Sources of data

A systematic computerized literature search was conducted on PubMed (http://www.ncbi.nlm.nih.gov/pubmed/).

Areas of agreement

AntiVEGF therapy has resulted in improvement in visual function and performance. Currently, practitioners are spoilt for choice of these agents.

Areas of controversy

Bevacizumab is unlicensed for intraocular use but has a better market share than ranibizumab in the treatment of wet AMD as it is approximately 40 times cheaper than ranibizumab, if aliquoted into smaller doses for intraocular use. This has stirred up questions on indemnity, safety, dosing, treatment regimen and quality control, despite the fact that well-designed clinical trials have shown that both drugs are equally effective. Another dilemma for the physicians is the choice of treatment regimens with antiVEGF agents that include fixed dosing, optical coherence tomography (OCT)-guided re-treatment, treat and extend or a combination of proactive and reactive dosing. Real-life outcomes of physician-dependent OCT-guided re-treatment with these agents are inferior to outcomes reported in clinical trials.

Growing points

A recently food and drug administration-approved antiVEGF agent, aflibercept, is rapidly becoming a popular choice as well-designed randomized clinical trials indicate that eight weekly fixed dosing of aflibercept is non-inferior to monthly ranibizumab.

Areas timely for developing research

Options for reducing the frequency of repeated intravitreal injections are being explored. Combination therapy with photodynamic therapy and epimacular brachytherapy seem scientifically plausible due to their synergistic effects. However, so far the results on these combinations have not shown any superior visual outcomes to antiVEGF monotherapy, and the practicalities of delivering these therapies are formidable. So, research into other novel therapeutic approaches such as pigment epithelium-derived factor and designed ankyrin repeat proteins are gaining momentum.",2013-02-07 +24131861,"Focal psychodynamic therapy, cognitive behaviour therapy, and optimised treatment as usual in outpatients with anorexia nervosa (ANTOP study): randomised controlled trial.","

Background

Psychotherapy is the treatment of choice for patients with anorexia nervosa, although evidence of efficacy is weak. The Anorexia Nervosa Treatment of OutPatients (ANTOP) study aimed to assess the efficacy and safety of two manual-based outpatient treatments for anorexia nervosa--focal psychodynamic therapy and enhanced cognitive behaviour therapy--versus optimised treatment as usual.

Methods

The ANTOP study is a multicentre, randomised controlled efficacy trial in adults with anorexia nervosa. We recruited patients from ten university hospitals in Germany. Participants were randomly allocated to 10 months of treatment with either focal psychodynamic therapy, enhanced cognitive behaviour therapy, or optimised treatment as usual (including outpatient psychotherapy and structured care from a family doctor). The primary outcome was weight gain, measured as increased body-mass index (BMI) at the end of treatment. A key secondary outcome was rate of recovery (based on a combination of weight gain and eating disorder-specific psychopathology). Analysis was by intention to treat. This trial is registered at http://isrctn.org, number ISRCTN72809357.

Findings

Of 727 adults screened for inclusion, 242 underwent randomisation: 80 to focal psychodynamic therapy, 80 to enhanced cognitive behaviour therapy, and 82 to optimised treatment as usual. At the end of treatment, 54 patients (22%) were lost to follow-up, and at 12-month follow-up a total of 73 (30%) had dropped out. At the end of treatment, BMI had increased in all study groups (focal psychodynamic therapy 0·73 kg/m(2), enhanced cognitive behaviour therapy 0·93 kg/m(2), optimised treatment as usual 0·69 kg/m(2)); no differences were noted between groups (mean difference between focal psychodynamic therapy and enhanced cognitive behaviour therapy -0·45, 95% CI -0·96 to 0·07; focal psychodynamic therapy vs optimised treatment as usual -0·14, -0·68 to 0·39; enhanced cognitive behaviour therapy vs optimised treatment as usual -0·30, -0·22 to 0·83). At 12-month follow-up, the mean gain in BMI had risen further (1·64 kg/m(2), 1·30 kg/m(2), and 1·22 kg/m(2), respectively), but no differences between groups were recorded (0·10, -0·56 to 0·76; 0·25, -0·45 to 0·95; 0·15, -0·54 to 0·83, respectively). No serious adverse events attributable to weight loss or trial participation were recorded.

Interpretation

Optimised treatment as usual, combining psychotherapy and structured care from a family doctor, should be regarded as solid baseline treatment for adult outpatients with anorexia nervosa. Focal psychodynamic therapy proved advantageous in terms of recovery at 12-month follow-up, and enhanced cognitive behaviour therapy was more effective with respect to speed of weight gain and improvements in eating disorder psychopathology. Long-term outcome data will be helpful to further adapt and improve these novel manual-based treatment approaches.

Funding

German Federal Ministry of Education and Research (Bundesministerium für Bildung und Forschung, BMBF), German Eating Disorders Diagnostic and Treatment Network (EDNET).",2013-10-14 +21798964,"Dasty3, a WEB framework for DAS.",

Motivation

Dasty3 is a highly interactive and extensible Web-based framework. It provides a rich Application Programming Interface upon which it is possible to develop specialized clients capable of retrieving information from DAS sources as well as from data providers not using the DAS protocol. Dasty3 provides significant improvements on previous Web-based frameworks and is implemented using the 1.6 DAS specification.

Availability

Dasty3 is an open-source tool freely available at http://www.ebi.ac.uk/dasty/ under the terms of the GNU General public license. Source and documentation can be found at http://code.google.com/p/dasty/.

Contact

hhe@ebi.ac.uk.,2011-07-28 +21666259,PAComplex: a web server to infer peptide antigen families and binding models from TCR-pMHC complexes.,"One of the most adaptive immune responses is triggered by specific T-cell receptors (TCR) binding to peptide-major histocompatibility complexes (pMHC). Despite the availability of many prediction servers to identify peptides binding to MHC, these servers are often lacking in peptide-TCR interactions and detailed atomic interacting models. PAComplex is the first web server investigating both pMHC and peptide-TCR interfaces to infer peptide antigens and homologous peptide antigens of a query. This server first identifies significantly similar TCR-pMHC templates (joint Z-value ≥ 4.0) of the query by using antibody-antigen and protein-protein interacting scoring matrices for peptide-TCR and pMHC interfaces, respectively. PAComplex then identifies the homologous peptide antigens of these hit templates from complete pathogen genome databases (≥10(8) peptide candidates from 864,628 protein sequences of 389 pathogens) and experimental peptide databases (80,057 peptides in 2287 species). Finally, the server outputs peptide antigens and homologous peptide antigens of the query and displays detailed interacting models (e.g. hydrogen bonds and steric interactions in two interfaces) of hitTCR-pMHC templates. Experimental results demonstrate that the proposed server can achieve high prediction accuracy and offer potential peptide antigens across pathogens. We believe that the server is able to provide valuable insights for the peptide vaccine and MHC restriction. The PAComplex sever is available at http://PAcomplex.life.nctu.edu.tw.",2011-06-11 +22371019,CORAL: QSAR models for acute toxicity in fathead minnow (Pimephales promelas).,"CORrelation And Logic (CORAL) is a software that generates quantitative structure activity relationships (QSAR) for different endpoints. This study is dedicated to the QSAR analysis of acute toxicity in Fathead minnow (Pimephales promelas). Statistical quality for the external test set is a complex function of the split (into training and test subsets), the number of epochs of the Monte Carlo optimization, and the threshold that is a criterion for dividing the correlation weights into two classes rare (blocked) and not rare (active). Computational experiments with three random splits (data on 568 compounds) indicated that this approach can satisfactorily predict the desired endpoint (the negative decimal logarithm of the 50% lethal concentration, in mmol/L, pLC50). The average correlation coefficients (r2) are 0.675 ± 0.0053, 0.824 ± 0.0242, 0.787 ± 0.0101 for subtraining, calibration, and test set, respectively. The average standard errors of estimation (s) are 0.837 ± 0.021, 0.555 ± 0.047, 0.606 ± 0.049 for subtraining, calibration, and test set, respectively. The CORAL software together with three random splits into subtraining, calibration, and test sets can be downloaded on the Internet (http://www.insilico.eu/coral/).",2012-02-27 +23590940,Unrooted unordered homeomorphic subtree alignment of RNA trees.,": We generalize some current approaches for RNA tree alignment, which are traditionally confined to ordered rooted mappings, to also consider unordered unrooted mappings. We define the Homeomorphic Subtree Alignment problem (HSA), and present a new algorithm which applies to several modes, combining global or local, ordered or unordered, and rooted or unrooted tree alignments. Our algorithm generalizes previous algorithms that either solved the problem in an asymmetric manner, or were restricted to the rooted and/or ordered cases. Focusing here on the most general unrooted unordered case, we show that for input trees T and S, our algorithm has an O(nTnS + min(dT,dS)LTLS) time complexity, where nT,LT and dT are the number of nodes, the number of leaves, and the maximum node degree in T, respectively (satisfying dT ≤ LT ≤ nT), and similarly for nS,LS and dS with respect to the tree S. This improves the time complexity of previous algorithms for less general variants of the problem.In order to obtain this time bound for HSA, we developed new algorithms for a generalized variant of the Min-Cost Bipartite Matching problem (MCM), as well as to two derivatives of this problem, entitled All-Cavity-MCM and All-Pairs-Cavity-MCM. For two input sets of size n and m, where n ≤ m, MCM and both its cavity derivatives are solved in O(n3 + nm) time, without the usage of priority queues (e.g. Fibonacci heaps) or other complex data structures. This gives the first cubic time algorithm for All-Pairs-Cavity-MCM, and improves the running times of MCM and All-Cavity-MCM problems in the unbalanced case where n ≪ m.We implemented the algorithm (in all modes mentioned above) as a graphical software tool which computes and displays similarities between secondary structures of RNA given as input, and employed it to a preliminary experiment in which we ran all-against-all inter-family pairwise alignments of RNAse P and Hammerhead RNA family members, exposing new similarities which could not be detected by the traditional rooted ordered alignment approaches. The results demonstrate that our approach can be used to expose structural similarity between some RNAs with higher sensitivity than the traditional rooted ordered alignment approaches. Source code and web-interface for our tool can be found in http://www.cs.bgu.ac.il/\~negevcb/FRUUT.",2013-04-16 +21653513,FACIL: Fast and Accurate Genetic Code Inference and Logo.,"

Motivation

The intensification of DNA sequencing will increasingly unveil uncharacterized species with potential alternative genetic codes. A total of 0.65% of the DNA sequences currently in Genbank encode their proteins with a variant genetic code, and these exceptions occur in many unrelated taxa.

Results

We introduce FACIL (Fast and Accurate genetic Code Inference and Logo), a fast and reliable tool to evaluate nucleic acid sequences for their genetic code that detects alternative codes even in species distantly related to known organisms. To illustrate this, we apply FACIL to a set of mitochondrial genomic contigs of Globobulimina pseudospinescens. This foraminifer does not have any sequenced close relative in the databases, yet we infer its alternative genetic code with high confidence values. Results are intuitively visualized in a Genetic Code Logo.

Availability and implementation

FACIL is available as a web-based service at http://www.cmbi.ru.nl/FACIL/ and as a stand-alone program.",2011-06-08 +22730436,RESQUE: network reduction using semi-Markov random walk scores for efficient querying of biological networks.,"

Motivation

Recent technological advances in measuring molecular interactions have resulted in an increasing number of large-scale biological networks. Translation of these enormous network data into meaningful biological insights requires efficient computational techniques that can unearth the biological information that is encoded in the networks. One such example is network querying, which aims to identify similar subnetwork regions in a large target network that are similar to a given query network. Network querying tools can be used to identify novel biological pathways that are homologous to known pathways, thereby enabling knowledge transfer across different organisms.

Results

In this article, we introduce an efficient algorithm for querying large-scale biological networks, called RESQUE. The proposed algorithm adopts a semi-Markov random walk (SMRW) model to probabilistically estimate the correspondence scores between nodes that belong to different networks. The target network is iteratively reduced based on the estimated correspondence scores, which are also iteratively re-estimated to improve accuracy until the best matching subnetwork emerges. We demonstrate that the proposed network querying scheme is computationally efficient, can handle any network query with an arbitrary topology and yields accurate querying results.

Availability

The source code of RESQUE is freely available at http://www.ece.tamu.edu/~bjyoon/RESQUE/",2012-06-23 +21647737,iScreen: world's first cloud-computing web server for virtual screening and de novo drug design based on TCM database@Taiwan.,"The rapidly advancing researches on traditional Chinese medicine (TCM) have greatly intrigued pharmaceutical industries worldwide. To take initiative in the next generation of drug development, we constructed a cloud-computing system for TCM intelligent screening system (iScreen) based on TCM Database@Taiwan. iScreen is compacted web server for TCM docking and followed by customized de novo drug design. We further implemented a protein preparation tool that both extract protein of interest from a raw input file and estimate the size of ligand bind site. In addition, iScreen is designed in user-friendly graphic interface for users who have less experience with the command line systems. For customized docking, multiple docking services, including standard, in-water, pH environment, and flexible docking modes are implemented. Users can download first 200 TCM compounds of best docking results. For TCM de novo drug design, iScreen provides multiple molecular descriptors for a user's interest. iScreen is the world's first web server that employs world's largest TCM database for virtual screening and de novo drug design. We believe our web server can lead TCM research to a new era of drug development. The TCM docking and screening server is available at http://iScreen.cmu.edu.tw/.",2011-06-07 +21887016,miRTour: Plant miRNA and target prediction tool.,"

Unlabelled

MicroRNAs (miRNAs) are important negative regulators of gene expression in plant and animals, which are endogenously produced from their own genes. Computational comparative approach based on evolutionary conservation of mature miRNAs has revealed a number of orthologs of known miRNAs in different plant species. The homology-based plant miRNA discovery, followed by target prediction, comprises several steps, which have been done so far manually. Here, we present the bioinformatics pipeline miRTour which automates all the steps of miRNA similarity search, miRNA precursor selection, target prediction and annotation, each of them performed with the same set of input sequences.

Availability

The database is available for free at http://bio2server.bioinfo.uni-plovdiv.bg/miRTour/",2011-06-06 +22276185,Fast computation and applications of genome mappability.,"We present a fast mapping-based algorithm to compute the mappability of each region of a reference genome up to a specified number of mismatches. Knowing the mappability of a genome is crucial for the interpretation of massively parallel sequencing experiments. We investigate the properties of the mappability of eukaryotic DNA/RNA both as a whole and at the level of the gene family, providing for various organisms tracks which allow the mappability information to be visually explored. In addition, we show that mappability varies greatly between species and gene classes. Finally, we suggest several practical applications where mappability can be used to refine the analysis of high-throughput sequencing data (SNP calling, gene expression quantification and paired-end experiments). This work highlights mappability as an important concept which deserves to be taken into full account, in particular when massively parallel sequencing technologies are employed. The GEM mappability program belongs to the GEM (GEnome Multitool) suite of programs, which can be freely downloaded for any use from its website (http://gemlibrary.sourceforge.net).",2012-01-19 +22638583,"Seq2Logo: a method for construction and visualization of amino acid binding motifs and sequence profiles including sequence weighting, pseudo counts and two-sided representation of amino acid enrichment and depletion.","Seq2Logo is a web-based sequence logo generator. Sequence logos are a graphical representation of the information content stored in a multiple sequence alignment (MSA) and provide a compact and highly intuitive representation of the position-specific amino acid composition of binding motifs, active sites, etc. in biological sequences. Accurate generation of sequence logos is often compromised by sequence redundancy and low number of observations. Moreover, most methods available for sequence logo generation focus on displaying the position-specific enrichment of amino acids, discarding the equally valuable information related to amino acid depletion. Seq2logo aims at resolving these issues allowing the user to include sequence weighting to correct for data redundancy, pseudo counts to correct for low number of observations and different logotype representations each capturing different aspects related to amino acid enrichment and depletion. Besides allowing input in the format of peptides and MSA, Seq2Logo accepts input as Blast sequence profiles, providing easy access for non-expert end-users to characterize and identify functionally conserved/variable amino acids in any given protein of interest. The output from the server is a sequence logo and a PSSM. Seq2Logo is available at http://www.cbs.dtu.dk/biotools/Seq2Logo (14 May 2012, date last accessed).",2012-05-25 +23151179,"High-resolution genome-wide scan of genes, gene-networks and cellular systems impacting the yeast ionome.","

Background

To balance the demand for uptake of essential elements with their potential toxicity living cells have complex regulatory mechanisms. Here, we describe a genome-wide screen to identify genes that impact the elemental composition ('ionome') of yeast Saccharomyces cerevisiae. Using inductively coupled plasma - mass spectrometry (ICP-MS) we quantify Ca, Cd, Co, Cu, Fe, K, Mg, Mn, Mo, Na, Ni, P, S and Zn in 11890 mutant strains, including 4940 haploid and 1127 diploid deletion strains, and 5798 over expression strains.

Results

We identified 1065 strains with an altered ionome, including 584 haploid and 35 diploid deletion strains, and 446 over expression strains. Disruption of protein metabolism or trafficking has the highest likelihood of causing large ionomic changes, with gene dosage also being important. Gene over expression produced more extreme ionomic changes, but over expression and loss of function phenotypes are generally not related. Ionomic clustering revealed the existence of only a small number of possible ionomic profiles suggesting fitness tradeoffs that constrain the ionome. Clustering also identified important roles for the mitochondria, vacuole and ESCRT pathway in regulation of the ionome. Network analysis identified hub genes such as PMR1 in Mn homeostasis, novel members of ionomic networks such as SMF3 in vacuolar retrieval of Mn, and cross-talk between the mitochondria and the vacuole. All yeast ionomic data can be searched and downloaded at http://www.ionomicshub.org.

Conclusions

Here, we demonstrate the power of high-throughput ICP-MS analysis to functionally dissect the ionome on a genome-wide scale. The information this reveals has the potential to benefit both human health and agriculture.",2012-11-14 +23431106,Novel CDH1 germline mutations identified in Chinese gastric cancer patients.,"

Aim

To give a comprehensive report of E-cadherin gene (CDH1) variations in a population at a high risk for gastric cancer (GC).

Methods

The samples consisted of 178 men and 58 women with a mean age of 62.3 ± 9.4 years and an age range of 30-84 years. A total of 240 cancer-free controls were recruited (mean age of 61.8 ± 10.1 years, age range of 26-82 years). Samples were screened for CDH1 germline mutations by high-resolution melting analysis or directly sequencing. Luciferase reporter assay, RNA splicing assay and bioinformatic analysis were used to evaluate the effect of mutations.

Results

Four novel CDH1 sequence alterations were identified in GC patients including a G>T transition 49 bp before the start codon; a three-nucleotide deletion, c.44_46del TGC; one missense mutation, c.604G>A (V202I); and one variation in the intron, c.1320+7A>G. In addition, polymorphism frequencies were observed for CDH1-164delT, -161C>A, -73A>C, c.48+6C>T, c.48+62_48+63delinsCGTGCCCCAGCCC, c.894C>T (A298A), c.1224G>A (A408A), c.1888C>G (L630V), c.2076T>C (A692A), and c.2253C>T (N751N) which is similar to the data reported in http://www.ncbi.nlm.nih.gov/projects/SNP/. RNA splicing analysis suggested that the c.1320+7A>G and c.1224G>A variations did not affect exon splicing ability. Luciferase reporter assay demonstrated that the c.-49T variation might be helpful for E-cadherin transcription, though the increase in transcription activity is limited (only 33%). SIFT score and PolyPhen analysis both demonstrated that the L630V missense mutation probably damages protein function, while the V202I variant does not.

Conclusion

This study reveals novel mutations in sporadic GC patients which had been poorly investigated for susceptibility genes.",2013-02-01 +21642403,"Determination of microbial diversity of Aeromonas strains on the basis of multilocus sequence typing, phenotype, and presence of putative virulence genes.","The genus Aeromonas has been described as comprising several species associated with the aquatic environment, which represents their principal reservoir. Aeromonas spp. are commonly isolated from diseased and healthy fish, but the involvement of such bacteria in human infection and gastroenteritis has frequently been reported. The primary challenge in establishing an unequivocal link between the Aeromonas genus and pathogenesis in humans is the extremely complicated taxonomy. With the aim of clarifying taxonomic relationships among the strains and phenotypes, a multilocus sequencing approach was developed and applied to characterize 23 type and reference strains of Aeromonas spp. and a collection of 77 field strains isolated from fish, crustaceans, and mollusks. All strains were also screened for putative determinants of virulence by PCR (ast, ahh1, act, asa1, eno, ascV, and aexT) and the production of acylated homoserine lactones (AHLs). In addition, the phenotypic fingerprinting obtained from 29 biochemical tests was submitted to the nonparametric combination (NPC) test methodology to define the statistical differences among the identified genetic clusters. Multilocus sequence typing (MLST) achieved precise strain genotyping, and the phylogenetic analysis of concatenated sequences delineated the relationship among the taxa belonging to the genus Aeromonas, providing a powerful tool for outbreak traceability, host range diffusion, and ecological studies. The NPC test showed the feasibility of phenotypic differentiation among the majority of the MLST clusters by using a selection of tests or the entire biochemical fingerprinting. A Web-based MLST sequence database (http://pubmlst.org/aeromonas) specific for the Aeromonas genus was developed and implemented with all the results.",2011-06-03 +23513071,Environmental health resilience.,"The capacity of the Earth's environment to support increasing and expanding human populations has been questioned at least for hundreds of years, but never more than in the mid to late 20th Century and early 21st Century. Global human population now exceeds seven billion and continues to increase at an unprecedented rate. Estimates of future (2050) human populations on Earth range from a low of about 7.4 billion to a high of 10.6 billion (""United Nations World Population to 2300"", 2004 accessed at http://www.un.org/esa/population/publications/longrange2/WorldPop2300final.pdf). Current human populations already place an extreme burden on global environmental resources, including air, water and food quality as well as increasing challenges related to human waste management and disease prevention, control and treatment. In fact, some have proposed that humans have entered the ""anthropocene"", an age in which the global environment is dominated by human activities (http://www.sciencedaily.com/releases/2012/11/121101131609.htm). Climate change and expanding human populations contribute to increased risk of transmission of infectious and non-infectious disease. Developing nations with huge human populations such as China and India are benefitting from increased economic globalization, allowing for increased availability of personal luxuries such as automobiles, which in turn results in increased pollution and further depletion of natural resources such as global oil reserves. Increasing availability to global resources also may contribute to global conflict over environmental resources such as oil, water and food. In the United States, 2013 was the hottest year on record. Average global temperatures are also on the rise, with Australia being another prime example. Globally, 2012 was the tenth hottest year on record since data collection began in 1880 (http://www.ncdc.noaa.gov/sotc/global/2012/13). Many people are now starting to question the ability of human populations to continue to grow, and perhaps even for humans continue to exist on the planet without significant changes in the way that we interact with our global environment. Others point out that dire predictions of the fragility of humanity have been made for thousands of years and that humans have continued to survive and even grow in spite of these challenges.",2013-03-07 +21636590,Identification of cavities on protein surface using multiple computational approaches for drug binding site prediction.,"

Motivation

Protein-ligand binding sites are the active sites on protein surface that perform protein functions. Thus, the identification of those binding sites is often the first step to study protein functions and structure-based drug design. There are many computational algorithms and tools developed in recent decades, such as LIGSITE(cs/c), PASS, Q-SiteFinder, SURFNET, and so on. In our previous work, MetaPocket, we have proved that it is possible to combine the results of many methods together to improve the prediction result.

Results

Here, we continue our previous work by adding four more methods Fpocket, GHECOM, ConCavity and POCASA to further improve the prediction success rate. The new method MetaPocket 2.0 and the individual approaches are all tested on two datasets of 48 unbound/bound and 210 bound structures as used before. The results show that the average success rate has been raised 5% at the top 1 prediction compared with previous work. Moreover, we construct a non-redundant dataset of drug-target complexes with known structure from DrugBank, DrugPort and PDB database and apply MetaPocket 2.0 to this dataset to predict drug binding sites. As a result, >74% drug binding sites on protein target are correctly identified at the top 3 prediction, and it is 12% better than the best individual approach.

Availability

The web service of MetaPocket 2.0 and all the test datasets are freely available at http://projects.biotec.tu-dresden.de/metapocket/ and http://sysbio.zju.edu.cn/metapocket.",2011-06-02 +21636591,PathVisio-MIM: PathVisio plugin for creating and editing Molecular Interaction Maps (MIMs).,"

Motivation

A plugin for the Java-based PathVisio pathway editor has been developed to help users draw diagrams of bioregulatory networks according to the Molecular Interaction Map (MIM) notation. Together with the core PathVisio application, this plugin presents a simple to use and cross-platform application for the construction of complex MIM diagrams with the ability to annotate diagram elements with comments, literature references and links to external databases. This tool extends the capabilities of the PathVisio pathway editor by providing both MIM-specific glyphs and support for a MIM-specific markup language file format for exchange with other MIM-compatible tools and diagram validation.

Availability

The PathVisio-MIM plugin is freely available and works with versions of PathVisio 2.0.11 and later on Windows, Mac OS X and Linux. Information about MIM notation and the MIMML format is available at http://discover.nci.nih.gov/mim. The plugin, along with diagram examples, instructions and Java source code, may be downloaded at http://discover.nci.nih.gov/mim/mim_pathvisio.html.",2011-06-02 +22151470,Accelerated large-scale multiple sequence alignment.,"

Background

Multiple sequence alignment (MSA) is a fundamental analysis method used in bioinformatics and many comparative genomic applications. Prior MSA acceleration attempts with reconfigurable computing have only addressed the first stage of progressive alignment and consequently exhibit performance limitations according to Amdahl's Law. This work is the first known to accelerate the third stage of progressive alignment on reconfigurable hardware.

Results

We reduce subgroups of aligned sequences into discrete profiles before they are pairwise aligned on the accelerator. Using an FPGA accelerator, an overall speedup of up to 150 has been demonstrated on a large data set when compared to a 2.4 GHz Core2 processor.

Conclusions

Our parallel algorithm and architecture accelerates large-scale MSA with reconfigurable computing and allows researchers to solve the larger problems that confront biologists today. Program source is available from http://dna.cs.byu.edu/msa/.",2011-12-07 +22706384,DEFOG: discrete enrichment of functionally organized genes.,"High-throughput biological experiments commonly result in a list of genes or proteins of interest. In order to understand the observed changes of the genes and to generate new hypotheses, one needs to understand the functions and roles of the genes and how those functions relate to the experimental conditions. Typically, statistical tests are performed in order to detect enriched Gene Ontology categories or pathways, i.e. the categories are observed in the genes of interest more often than is expected by chance. Depending on the number of genes and the complexity and quantity of functions in which they are involved, such an analysis can easily result in hundreds of enriched terms. To this end we developed DEFOG, a web-based application that facilitates the functional analysis of gene sets by hierarchically organizing the genes into functionally related modules. Our computational pipeline utilizes three powerful tools to achieve this goal: (1) GeneMANIA creates a functional consensus network of the genes of interest based on gene-list-specific data fusion of hundreds of genomic networks from publicly available sources; (2) Transitivity Clustering organizes those genes into a clear hierarchy of functionally related groups, and (3) Ontologizer performs a Gene Ontology enrichment analysis on the resulting gene clusters. DEFOG integrates this computational pipeline within an easy-to-use web interface, thus allowing for a novel visual analysis of gene sets that aids in the discovery of potentially important biological mechanisms and facilitates the creation of new hypotheses. DEFOG is available at http://www.mooneygroup.org/defog.",2012-06-18 +21639954,Combining DNA-microarray data in systemic lupus erythematosus.,"Systemic lupus erythematosus is a systemic, heterogeneous autoimmune disease. Understanding of its molecular complexity is incomplete and there is a need to identify new therapeutic targets and to optimize criteria for its diagnosis, assessment and prognosis. Recently, Arasappan and colleagues have described a new meta-analysis method that enables data analysis across different DNA-microarray datasets to identify genes and processes relevant to systemic lupus erythematosus. Their study provides a simple and valuable meta-analysis method for the selection of biomarkers and pathways in disease.See related research by Arasappan et al.: http://www.biomedcentral.com/1741-7015/9/65.",2011-05-31 +30731940,Fusarium Crown and Root Rot of Tarragon in California Caused by Fusarium solani.,"Tarragon, also known as estragon or dragon's-wort (Artemisia dracunculus), is a perennial plant in the Asteraceae. Tarragon is grown for use in cooking as a fresh and dried herb. In May 2010, commercial tarragon grown in a field on California's central coast was affected by a previously undescribed disease. Initial symptoms consisted of chlorosis of leaves and wilting of shoot tips. As the disease progressed, entire shoots and branches turned brown and died. The plant crown epidermis and cortex and the upper cortex of the main roots turned brown with occasional black streaking. Diseased plants died several weeks after the onset of wilting. A Fusarium species was consistently isolated from symptomatic crown and root tissues. On carnation leaf agar (CLA) incubated under lights, the isolates produced stout, slightly curved macroconidia having blunt apical cells. One- and two-celled oval to cylindrical microconidia were abundant and born in false heads on extremely long monophialides. Chlamydospores were present in 1-month-old cultures. On potato dextrose agar incubated under lights, the isolates produced abundant white aerial mycelium with bluish coloration of the culture surface. The isolates were identified as Fusarium solani (2). Pathogenicity tests were conducted using six isolates, with inoculum produced on CLA. For each isolate, 250 ml of a spore suspension (1 × 106 conidia/ml) were poured onto the roots of 10-cm potted tarragon plants. Ten plants were inoculated for each of the six isolates. A control set of tarragon was treated with 250 ml of water. All plants were maintained in a greenhouse set at 24 to 25°C. After 8 weeks, plants inoculated with the spore suspensions began to show wilting and browning of leaves. Crown epidermis and cortex and root cortex tissues were brown; Fusarium solani was reisolated from the crowns and roots. The experiment was repeated and the results were the same. To my knowledge, this is the first report of F. solani causing a crown and root rot disease of tarragon. The disease caused significant damage with approximately 50% of the commercial field affected. The other Fusarium species previously reported on tarragon is an uncharacterized F. oxysporum isolated from roots of plants grown in California (1). References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , March 8, 2011, (2) P. E. Nelson et al. Fusarium Species: An Illustrated Manual for Identification. Pennsylvania State University Press, University Park, PA, 1983.",2011-06-01 +30731924,First Report of Blueberry Leaf Rust Caused by Thekopsora minima on Vaccinium corymbosum in Michigan.,"Leaf rust symptoms have been noticed sporadically on northern highbush blueberry plants (Vaccinium corymbosum L.) in Michigan for the past 8 years. In 2009, leaf rust was seen in several cultivated blueberry fields and on greenhouse-grown blueberry plants in southwest Michigan. In 2010, leaf rust was widespread throughout western Michigan and particularly evident in the fall, sometimes resulting in premature defoliation. Cultivars Rubel, Jersey, Elliott, Liberty, and Brigitta were most commonly affected. Both the 2009 and 2010 growing seasons were characterized by above-average precipitation in early to mid-summer. Early symptoms on the adaxial leaf surface consisted of roughly circular yellow spots that later developed brown, necrotic centers. Older lesions were more angular and sometimes surrounded by a purplish border. In the fall, a ""green island"" effect was sometimes apparent around the lesions. On the abaxial side, numerous yellow-to-orange rust pustules (uredinia) were visible. Uredinia were dome shaped, erumpent, 100 to 400 μm in diameter, clustered, and sometimes coalescing. Urediniospores were broadly obovate with dark yellowish content and measured 19 to 25 × 16 to 20 μm (average 22 × 18 μm, n = 30). Spore walls were hyaline, echinulate, and 1.0 to 1.5 μm thick with obscure germ pores. Uredinia were examined with light and scanning electron microscopy for the presence of conspicuous ostiolar cells characteristic of Naohidemyces vaccinii (Wint.) Sato, Katsuya et Y. Hiratsuka, but none were observed. No telia or teliospores were observed. On the basis of morphology, the pathogen was identified as Thekopsora minima P. Syd. & Syd. (3,4) and a sample was deposited in the U.S. National Fungus Collection (BPI 881107). Genomic DNA was extracted from urediniospores of rust isolates from six different locations, and a 267-bp fragment of the ITS2 region was amplified and sequenced using the primers ITS3 and ITS4 (GenBank Accession No. HQ661383). All sequences were identical to each other and shared 99% identity (232 of 234 bp) with a T. minima isolate from South Africa (GenBank Accession No. GU355675). The alternate host, hemlock (mostly Tsuga canadensis L.) is a common and valuable conifer in the Michigan landscape. Hemlock trees were not examined for the presence of aecia but are assumed to play a role in the epidemiology of the disease in Michigan because leaf rust tends to be more severe near hemlock trees. Pucciniastrum vaccinii (G. Wint.) Jorst. was considered the causal agent of blueberry leaf rust until Sato et al. (1,4) identified three unique species. While T. minima has been reported on black huckleberry (Gaylussacia baccata [Wangenh.] K. Koch) in Michigan (4), to our knowledge, this is the first report of T. minima on highbush blueberry in the state. T. minima has been reported on highbush blueberry in Delaware and New York (4), Japan (2), and South Africa (3). The severity of the outbreak in 2010 warrants further research into economic losses, epidemiology, and management of the disease. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Botany and Mycology Laboratory, ARS, UDSA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 2010. (2) T. Kobayashi. Page 1227 in: Index of Fungi Inhabiting Woody Plants in Japan. Host, Distribution and Literature. Zenkoku-Noson-Kyoiku Kyokai Publishing Co., Tokyo, 2007. (3) L. Mostert et al. Plant Dis. 94:478, 2010. (4) S. Sato et al. Trans. Mycol. Soc. Jpn. 34:47, 1993.",2011-06-01 +21631914,Faster Smith-Waterman database searches with inter-sequence SIMD parallelisation.,"

Background

The Smith-Waterman algorithm for local sequence alignment is more sensitive than heuristic methods for database searching, but also more time-consuming. The fastest approach to parallelisation with SIMD technology has previously been described by Farrar in 2007. The aim of this study was to explore whether further speed could be gained by other approaches to parallelisation.

Results

A faster approach and implementation is described and benchmarked. In the new tool SWIPE, residues from sixteen different database sequences are compared in parallel to one query residue. Using a 375 residue query sequence a speed of 106 billion cell updates per second (GCUPS) was achieved on a dual Intel Xeon X5650 six-core processor system, which is over six times more rapid than software based on Farrar's 'striped' approach. SWIPE was about 2.5 times faster when the programs used only a single thread. For shorter queries, the increase in speed was larger. SWIPE was about twice as fast as BLAST when using the BLOSUM50 score matrix, while BLAST was about twice as fast as SWIPE for the BLOSUM62 matrix. The software is designed for 64 bit Linux on processors with SSSE3. Source code is available from http://dna.uio.no/swipe/ under the GNU Affero General Public License.

Conclusions

Efficient parallelisation using SIMD on standard hardware makes it possible to run Smith-Waterman database searches more than six times faster than before. The approach described here could significantly widen the potential application of Smith-Waterman searches. Other applications that require optimal local alignment scores could also benefit from improved performance.",2011-06-01 +21697124,"MetaABC--an integrated metagenomics platform for data adjustment, binning and clustering.","

Summary

MetaABC is a metagenomic platform that integrates several binning tools coupled with methods for removing artifacts, analyzing unassigned reads and controlling sampling biases. It allows users to arrive at a better interpretation via series of distinct combinations of analysis tools. After execution, MetaABC provides outputs in various visual formats such as tables, pie and bar charts as well as clustering result diagrams.

Availability

MetaABC source code and documentation are available at http://bits2.iis.sinica.edu.tw/MetaABC/ CONTACT: dywang@gate.sinica.edu.tw; hktsai@iis.sinica.edu.tw

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-22 +21624888,"SwissDock, a protein-small molecule docking web service based on EADock DSS.","Most life science processes involve, at the atomic scale, recognition between two molecules. The prediction of such interactions at the molecular level, by so-called docking software, is a non-trivial task. Docking programs have a wide range of applications ranging from protein engineering to drug design. This article presents SwissDock, a web server dedicated to the docking of small molecules on target proteins. It is based on the EADock DSS engine, combined with setup scripts for curating common problems and for preparing both the target protein and the ligand input files. An efficient Ajax/HTML interface was designed and implemented so that scientists can easily submit dockings and retrieve the predicted complexes. For automated docking tasks, a programmatic SOAP interface has been set up and template programs can be downloaded in Perl, Python and PHP. The web site also provides an access to a database of manually curated complexes, based on the Ligand Protein Database. A wiki and a forum are available to the community to promote interactions between users. The SwissDock web site is available online at http://www.swissdock.ch. We believe it constitutes a step toward generalizing the use of docking tools beyond the traditional molecular modeling community.",2011-05-29 +21619696,Conotoxin protein classification using free scores of words and support vector machines.,"

Background

Conotoxin has been proven to be effective in drug design and could be used to treat various disorders such as schizophrenia, neuromuscular disorders and chronic pain. With the rapidly growing interest in conotoxin, accurate conotoxin superfamily classification tools are desirable to systematize the increasing number of newly discovered sequences and structures. However, despite the significance and extensive experimental investigations on conotoxin, those tools have not been intensively explored.

Results

In this paper, we propose to consider suboptimal alignments of words with restricted length. We developed a scoring system based on local alignment partition functions, called free score. The scoring system plays the key role in the feature extraction step of support vector machine classification. In the classification of conotoxin proteins, our method, SVM-Freescore, features an improved sensitivity and specificity by approximately 5.864% and 3.76%, respectively, over previously reported methods. For the generalization purpose, SVM-Freescore was also applied to classify superfamilies from curated and high quality database such as ConoServer. The average computed sensitivity and specificity for the superfamily classification were found to be 0.9742 and 0.9917, respectively.

Conclusions

The SVM-Freescore method is shown to be a useful sequence-based analysis tool for functional and structural characterization of conotoxin proteins. The datasets and the software are available at http://faculty.uaeu.ac.ae/nzaki/SVM-Freescore.htm.",2011-05-29 +21765095,IRiS: construction of ARG networks at genomic scales.,"

Summary

Given a set of extant haplotypes IRiS first detects high confidence recombination events in their shared genealogy. Next using the local sequence topology defined by each detected event, it integrates these recombinations into an ancestral recombination graph. While the current system has been calibrated for human population data, it is easily extendible to other species as well.

Availability

IRiS (Identification of Recombinations in Sequences) binary files are available for non-commercial use in both Linux and Microsoft Windows, 32 and 64 bit environments from https://researcher.ibm.com/researcher/view_project.php?id = 2303

Contact

parida@us.ibm.com.",2011-07-15 +22691297,Pharmacokinetic/pharmacodynamic model of the testosterone effects of triptorelin administered in sustained release formulations in patients with prostate cancer.,"The objectives of the current work were to develop a predictive population pharmacokinetic (PK)/pharmacodynamic (PD) model for the testosterone (TST) effects of triptorelin (TRP) administered in sustained-release (SR) formulations to patients with prostate cancer and determine the minimal required triptorelin serum concentration (C(TRP_min)) to keep the testosterone levels of the patients below or equal to the level of castration (TST ≤ 0.5 ng/ml). A total of eight healthy male volunteers and 74 patients with prostate cancer received one or two doses of triptorelin injected subcutaneously or intramuscularly. Five different triptorelin formulations were tested. Pharmacokinetic (serum concentration of triptorelin) and pharmacodynamic (TST levels in serum) data were analyzed by using the population approach with NONMEM software (http://www.iconplc.com/technology/products/nonmem/). The PK/PD model was constructed by assembling the agonist nature of triptorelin with the competitive reversible receptor binding interaction with the endogenous agonist, a process responsible for the initial and transient TST flare-up, and triggering down-regulation mechanisms described as a decrease in receptor synthesis. The typical population values of K(D), the receptor equilibrium dissociation constant of triptorelin, and C(TRP_min) to keep 95% of the patients castrated were 0.931 and 0.0609 ng/ml, respectively. The semimechanistic nature of the model renders the predictions of the effect of triptorelin on TST possible regardless the type of SR formulation administered, while exploring different designs during the development of new delivery systems.",2012-06-12 +21315797,An application programming interface for CellNetAnalyzer.,"CellNetAnalyzer (CNA) is a MATLAB toolbox providing computational methods for studying structure and function of metabolic and cellular signaling networks. In order to allow non-experts to use these methods easily, CNA provides GUI-based interactive network maps as a means of parameter input and result visualization. However, with the availability of high-throughput data, there is a need to make CNA's functionality also accessible in batch mode for automatic data processing. Furthermore, as some algorithms of CNA are of general relevance for network analysis it would be desirable if they could be called as sub-routines by other applications. For this purpose, we developed an API (application programming interface) for CNA allowing users (i) to access the content of network models in CNA, (ii) to use CNA's network analysis capabilities independent of the GUI, and (iii) to interact with the GUI to facilitate the development of graphical plugins. Here we describe the organization of network projects in CNA and the application of the new API functions to these projects. This includes the creation of network projects from scratch, loading and saving of projects and scenarios, and the application of the actual analysis methods. Furthermore, API functions for the import/export of metabolic models in SBML format and for accessing the GUI are described. Lastly, two example applications demonstrate the use and versatile applicability of CNA's API. CNA is freely available for academic use and can be downloaded from http://www.mpi-magdeburg.mpg.de/projects/cna/cna.html.",2011-02-21 +21622954,Swimming into peptidomimetic chemical space using pepMMsMIMIC.,"pepMMsMIMIC is a novel web-oriented peptidomimetic compound virtual screening tool based on a multi-conformers three-dimensional (3D)-similarity search strategy. Key to the development of pepMMsMIMIC has been the creation of a library of 17 million conformers calculated from 3.9 million commercially available chemicals collected in the MMsINC® database. Using as input the 3D structure of a peptide bound to a protein, pepMMsMIMIC suggests which chemical structures are able to mimic the protein-protein recognition of this natural peptide using both pharmacophore and shape similarity techniques. We hope that the accessibility of pepMMsMIMIC (freely available at http://mms.dsfarm.unipd.it/pepMMsMIMIC) will encourage medicinal chemists to de-peptidize protein-protein recognition processes of biological interest, thus increasing the potential of in silico peptidomimetic compound screening of known small molecules to expedite drug development.",2011-05-27 +21619643,An efficient algorithm for systematic analysis of nucleotide strings suitable for siRNA design.,"

Background

The ""off-target"" silencing effect hinders the development of siRNA-based therapeutic and research applications. Existing solutions for finding possible locations of siRNA seats within a large database of genes are either too slow, miss a portion of the targets, or are simply not designed to handle a very large number of queries. We propose a new approach that reduces the computational time as compared to existing techniques.

Findings

The proposed method employs tree-based storage in a form of a modified truncated suffix tree to sort all possible short string substrings within given set of strings (i.e. transcriptome). Using the new algorithm, we pre-computed a list of the best siRNA locations within each human gene (""siRNA seats""). siRNAs designed to reside within siRNA seats are less likely to hybridize off-target. These siRNA seats could be used as an input for the traditional ""set-of-rules"" type of siRNA designing software. The list of siRNA seats is available through a publicly available database located at http://web.cos.gmu.edu/~gmanyam/siRNA_db/search.php

Conclusions

In attempt to perform top-down prediction of the human siRNA with minimized off-target hybridization, we developed an efficient algorithm that employs suffix tree based storage of the substrings. Applications of this approach are not limited to optimal siRNA design, but can also be useful for other tasks involving selection of the characteristic strings specific to individual genes. These strings could then be used as siRNA seats, as specific probes for gene expression studies by oligonucleotide-based microarrays, for the design of molecular beacon probes for Real-Time PCR and, generally, any type of PCR primers.",2011-05-27 +22446067,Paint4Net: COBRA Toolbox extension for visualization of stoichiometric models of metabolism.,"A visual analysis of reconstructions and large stoichiometric models with elastic change of the visualization scope and representation methods becomes increasingly important due to the rapidly growing size and number of available reconstructions. The Paint4Net is a novel COBRA Toolbox extension for automatic generation of a hypergraph layout of defined scope with the steady state rates of reaction fluxes of stoichiometric models. Directionalities and fluxes of reactions are constantly represented in the visualization while detailed information about reaction (ID, name and synonyms, and formula) and metabolite (ID, name and synonyms, and charged formula) appears placing the cursor on the item of interest. Additionally Paint4Net functionality can be used to: (1) get lists of involved metabolites and dead end metabolites of the visualized part of the network, (2) exclude (filter) particular metabolites from representation, (3) find isolated parts of a network and (4) find running cycles when all the substrates are cut down. Layout pictures can be saved in various formats and easily distributed. The Paint4Net is open source software under the GPL v3 license. Relevant documentation and sample data is available at http://www.biosystems.lv/paint4net. The Paint4Net works on MATLAB starting from version of 2009.",2012-03-15 +21622642,Curation of characterized glycoside hydrolases of fungal origin.,"Fungi produce a wide range of extracellular enzymes to break down plant cell walls, which are composed mainly of cellulose, lignin and hemicellulose. Among them are the glycoside hydrolases (GH), the largest and most diverse family of enzymes active on these substrates. To facilitate research and development of enzymes for the conversion of cell-wall polysaccharides into fermentable sugars, we have manually curated a comprehensive set of characterized fungal glycoside hydrolases. Characterized glycoside hydrolases were retrieved from protein and enzyme databases, as well as literature repositories. A total of 453 characterized glycoside hydrolases have been cataloged. They come from 131 different fungal species, most of which belong to the phylum Ascomycota. These enzymes represent 46 different GH activities and cover 44 of the 115 CAZy GH families. In addition to enzyme source and enzyme family, available biochemical properties such as temperature and pH optima, specific activity, kinetic parameters and substrate specificities were recorded. To simplify comparative studies, enzyme and species abbreviations have been standardized, Gene Ontology terms assigned and reference to supporting evidence provided. The annotated genes have been organized in a searchable, online database called mycoCLAP (Characterized Lignocellulose-Active Proteins of fungal origin). It is anticipated that this manually curated collection of biochemically characterized fungal proteins will be used to enhance functional annotation of novel GH genes. Database URL: http://mycoCLAP.fungalgenomics.ca/.",2011-05-26 +21512872,Generating nonsymbolic number stimuli.,"Studies investigating nonsymbolic numbers (e.g., dot arrays) are confronted with the problem that changes in numerosity are always accompanied by changes in the visual properties of the stimulus. It is therefore debated whether the visual properties of the stimulus rather than number can explain the results obtained in studies investigating nonsymbolic number processing. In this report, we present a program (available at http://titiagebuis.eu/Materials.html ; note that the program is designed to work with the Psychophysics Toolbox in MATLAB) that exports information about the visual properties of stimuli that co-vary with number (area extended, item size, total surface, density, and circumference). Consequently, insight into the relation between the visual properties of the stimulus and numerical distance can be achieved, and post hoc analyses can be conducted to directly reveal whether numerical distance or (some combinations of) the visual properties of a stimulus could be the most likely candidate underlying the results. Here, we report data that demonstrate the program's usefulness for research on nonsymbolic number stimuli.",2011-12-01 +22505320,Expanding molecular modeling and design tools to non-natural sidechains.,"Protein-protein interactions encode the wiring diagram of cellular signaling pathways and their deregulations underlie a variety of diseases, such as cancer. Inhibiting protein-protein interactions with peptide derivatives is a promising way to develop new biological and therapeutic tools. Here, we develop a general framework to computationally handle hundreds of non-natural amino acid sidechains and predict the effect of inserting them into peptides or proteins. We first generate all structural files (pdb and mol2), as well as parameters and topologies for standard molecular mechanics software (CHARMM and Gromacs). Accurate predictions of rotamer probabilities are provided using a novel combined knowledge and physics based strategy. Non-natural sidechains are useful to increase peptide ligand binding affinity. Our results obtained on non-natural mutants of a BCL9 peptide targeting beta-catenin show very good correlation between predicted and experimental binding free-energies, indicating that such predictions can be used to design new inhibitors. Data generated in this work, as well as PyMOL and UCSF Chimera plug-ins for user-friendly visualization of non-natural sidechains, are all available at http://www.swisssidechain.ch. Our results enable researchers to rapidly and efficiently work with hundreds of non-natural sidechains.",2012-04-14 +21911332,Metavir: a web server dedicated to virome analysis.,"

Summary

Metavir is a web server dedicated to the analysis of viral metagenomes (viromes). In addition to classical approaches for analyzing metagenomes (general sequence characteristics, taxonomic composition), new tools developed specifically for viral sequence analysis make it possible to: (i) explore viral diversity through automatically constructed phylogenies for selected marker genes, (ii) estimate gene richness through rarefaction curves and (iii) perform cross-comparison against other viromes using sequence similarities. Metavir is thus unique as a platform that allows a comprehensive virome analysis.

Availability

Metavir is freely available online at: http://metavir-meb.univ-bpclermont.fr.

Contact

simon.roux@univ-bpclermont.fr.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-11 +21747092,"Nomograms for predicting local recurrence, distant metastases, and overall survival for patients with locally advanced rectal cancer on the basis of European randomized clinical trials.","

Purpose

The purpose of this study was to develop accurate models and nomograms to predict local recurrence, distant metastases, and survival for patients with locally advanced rectal cancer treated with long-course chemoradiotherapy (CRT) followed by surgery and to allow for a selection of patients who may benefit most from postoperative adjuvant chemotherapy and close follow-up.

Patients and methods

All data (N = 2,795) from five major European clinical trials for rectal cancer were pooled and used to perform an extensive survival analysis and to develop multivariate nomograms based on Cox regression. Data from one trial was used as an external validation set. The variables used in the analysis were sex, age, clinical tumor stage stage, tumor location, radiotherapy dose, concurrent and adjuvant chemotherapy, surgery procedure, and pTNM stage. Model performance was evaluated by the concordance index (c-index). Risk group stratification was proposed for the nomograms.

Results

The nomograms are able to predict events with a c-index for external validation of local recurrence (LR; 0.68), distant metastases (DM; 0.73), and overall survival (OS; 0.70). Pathologic staging is essential for accurate prediction of long-term outcome. Both preoperative CRT and adjuvant chemotherapy have an added value when predicting LR, DM, and OS rates. The stratification in risk groups allows significant distinction between Kaplan-Meier curves for outcome.

Conclusion

The easy-to-use nomograms can predict LR, DM, and OS over a 5-year period after surgery. They may be used as decision support tools in future trials by using the three defined risk groups to select patients for postoperative chemotherapy and close follow-up (http://www.predictcancer.org).",2011-07-11 +21609960,The PETfold and PETcofold web servers for intra- and intermolecular structures of multiple RNA sequences.,"The function of non-coding RNA genes largely depends on their secondary structure and the interaction with other molecules. Thus, an accurate prediction of secondary structure and RNA-RNA interaction is essential for the understanding of biological roles and pathways associated with a specific RNA gene. We present web servers to analyze multiple RNA sequences for common RNA structure and for RNA interaction sites. The web servers are based on the recent PET (Probabilistic Evolutionary and Thermodynamic) models PETfold and PETcofold, but add user friendly features ranging from a graphical layer to interactive usage of the predictors. Additionally, the web servers provide direct access to annotated RNA alignments, such as the Rfam 10.0 database and multiple alignments of 16 vertebrate genomes with human. The web servers are freely available at: http://rth.dk/resources/petfold/",2011-05-23 +21246630,XANNpred: neural nets that predict the propensity of a protein to yield diffraction-quality crystals.,"Production of diffracting crystals is a critical step in determining the three-dimensional structure of a protein by X-ray crystallography. Computational techniques to rank proteins by their propensity to yield diffraction-quality crystals can improve efficiency in obtaining structural data by guiding both protein selection and construct design. XANNpred comprises a pair of artificial neural networks that each predict the propensity of a selected protein sequence to produce diffraction-quality crystals by current structural biology techniques. Blind tests show XANNpred has accuracy and Matthews correlation values ranging from 75% to 81% and 0.50 to 0.63 respectively; values of area under the receiver operator characteristic (ROC) curve range from 0.81 to 0.88. On blind test data XANNpred outperforms the other available algorithms XtalPred, PXS, OB-Score, and ParCrys. XANNpred also guides construct design by presenting graphs of predicted propensity for diffraction-quality crystals against residue sequence position. The XANNpred-SG algorithm is likely to be most useful to target selection in structural genomics consortia, while the XANNpred-PDB algorithm is more suited to the general structural biology community. XANNpred predictions that include sliding window graphs are freely available from http://www.compbio.dundee.ac.uk/xannpred",2011-01-18 +22847931,Deep architectures for protein contact map prediction.,"

Motivation

Residue-residue contact prediction is important for protein structure prediction and other applications. However, the accuracy of current contact predictors often barely exceeds 20% on long-range contacts, falling short of the level required for ab initio structure prediction.

Results

Here, we develop a novel machine learning approach for contact map prediction using three steps of increasing resolution. First, we use 2D recursive neural networks to predict coarse contacts and orientations between secondary structure elements. Second, we use an energy-based method to align secondary structure elements and predict contact probabilities between residues in contacting alpha-helices or strands. Third, we use a deep neural network architecture to organize and progressively refine the prediction of contacts, integrating information over both space and time. We train the architecture on a large set of non-redundant proteins and test it on a large set of non-homologous domains, as well as on the set of protein domains used for contact prediction in the two most recent CASP8 and CASP9 experiments. For long-range contacts, the accuracy of the new CMAPpro predictor is close to 30%, a significant increase over existing approaches.

Availability

CMAPpro is available as part of the SCRATCH suite at http://scratch.proteomics.ics.uci.edu/.

Contact

pfbaldi@uci.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-07-30 +22992715,Health disparities in later life: a simultaneous equations analysis of utilization.,"

Objective

This article examines health disparities between older Blacks/Whites by recognizing the importance of health services utilization. Although previous studies have examined health and utilization independently, this is among the first to (a) model its endogenous relation with utilization, and (b) use a continuous measure for health.

Data

Household Component files from Medical Expenditures Panel Survey (MEPS) from 2004 and 2005, with 1,369 observations (1,169 for White and 200 for Black) between the ages of 61-69.

Methods

The methods employed are two-equation modeling where Medicare eligibility functions as the identification criterion and also as an exogenous shock.

Results

The results show older Blacks continue to remain in poorer health despite access to care and insurance status. The author shows underutilization accounts for some of this observed disparity and offers novel approaches to overcome this issue.

Conclusion

With the baby-boom cohort approaching retirement, this area of research is timely. This work is also practical because the Department of Health and Human Services has launched various projects examining health care issues for Americans. One major project, Healthy People 2010, provides a ""framework for prevention for the Nation [and is] designed to identify the most significant preventable threats to health and to establish national goals to reduce these threats"" (http://www.healthypeople.gov/About/). Of the 28 areas, this article complements objectives relating to (a) disability and secondary conditions, and (b) access to quality health services. This article also supports Healthy People 2020, which sets a high priority on access to health care (one of 12 topic areas) and categorizes health disparities as part of the Leading Health Indicators Framework.",2012-09-19 +22869843,Management of thyroid dysfunction during pregnancy and postpartum: an Endocrine Society clinical practice guideline.,"

Objective

The aim was to update the guidelines for the management of thyroid dysfunction during pregnancy and postpartum published previously in 2007. A summary of changes between the 2007 and 2012 version is identified in the Supplemental Data (published on The Endocrine Society's Journals Online web site at http://jcem.endojournals.org).

Evidence

This evidence-based guideline was developed according to the U.S. Preventive Service Task Force, grading items level A, B, C, D, or I, on the basis of the strength of evidence and magnitude of net benefit (benefits minus harms) as well as the Grading of Recommendations, Assessment, Development, and Evaluation (GRADE) system to describe both the strength of recommendations and the quality of evidence.

Consensus process

The guideline was developed through a series of e-mails, conference calls, and one face-to-face meeting. An initial draft was prepared by the Task Force, with the help of a medical writer, and reviewed and commented on by members of The Endocrine Society, Asia and Oceania Thyroid Association, and the Latin American Thyroid Society. A second draft was reviewed and approved by The Endocrine Society Council. At each stage of review, the Task Force received written comments and incorporated substantive changes.

Conclusions

Practice guidelines are presented for diagnosis and treatment of patients with thyroid-related medical issues just before and during pregnancy and in the postpartum interval. These include evidence-based approaches to assessing the cause of the condition, treating it, and managing hypothyroidism, hyperthyroidism, gestational hyperthyroidism, thyroid autoimmunity, thyroid tumors, iodine nutrition, postpartum thyroiditis, and screening for thyroid disease. Indications and side effects of therapeutic agents used in treatment are also presented.",2012-08-01 +22515319,SprayQc: a real-time LC-MS/MS quality monitoring system to maximize uptime using off the shelf components.,"With the advent of high-throughput mass spectrometry (MS)-based proteomics, the magnitude and complexity of the performed experiments has increased dramatically. Likewise, investments in chromatographic and MS instrumentation are a large proportion of the budget of proteomics laboratories. Guarding measurement quality and maximizing uptime of the LC-MS/MS systems therefore requires constant care despite automated workflows. We describe a real-time surveillance system, called SprayQc, that continuously monitors the status of the peripheral equipment to ensure that operational parameters are within an acceptable range. SprayQc is composed of multiple plug-in software components that use computer vision to analyze electrospray conditions, monitor the chromatographic device for stable backpressure, interact with a column oven to control pressure by temperature, and ensure that the mass spectrometer is still acquiring data. Action is taken when a failure condition has been detected, such as stopping the column oven and the LC flow, as well as automatically notifying the appropriate operator. Additionally, all defined metrics can be recorded synchronized on retention time with the MS acquisition file, allowing for later inspection and providing valuable information for optimization. SprayQc has been extensively tested in our laboratory, supports third-party plug-in development, and is freely available for download from http://sourceforge.org/projects/sprayqc .",2012-05-11 +21467568,Increasing power of genome-wide association studies by collecting additional single-nucleotide polymorphisms.,"Genome-wide association studies (GWASs) have been effectively identifying the genomic regions associated with a disease trait. In a typical GWAS, an informative subset of the single-nucleotide polymorphisms (SNPs), called tag SNPs, is genotyped in case/control individuals. Once the tag SNP statistics are computed, the genomic regions that are in linkage disequilibrium (LD) with the most significantly associated tag SNPs are believed to contain the causal polymorphisms. However, such LD regions are often large and contain many additional polymorphisms. Following up all the SNPs included in these regions is costly and infeasible for biological validation. In this article we address how to characterize these regions cost effectively with the goal of providing investigators a clear direction for biological validation. We introduce a follow-up study approach for identifying all untyped associated SNPs by selecting additional SNPs, called follow-up SNPs, from the associated regions and genotyping them in the original case/control individuals. We introduce a novel SNP selection method with the goal of maximizing the number of associated SNPs among the chosen follow-up SNPs. We show how the observed statistics of the original tag SNPs and human genetic variation reference data such as the HapMap Project can be utilized to identify the follow-up SNPs. We use simulated and real association studies based on the HapMap data and the Wellcome Trust Case Control Consortium to demonstrate that our method shows superior performance to the correlation- and distance-based traditional follow-up SNP selection approaches. Our method is publicly available at http://genetics.cs.ucla.edu/followupSNPs.",2011-04-05 +22578385,[Ageing: research in Spain and Europe].,"Researchers, stakeholders and policy makers agree about the importance of the population ageing in modern societies, so a broad analysis of current research strategies is in progress, such as FUTURAGE, a network for drawing a map for future research on ageing. This document presents the Spanish contribution to this map following FUTURAGE guidelines, drawn from the debates held in the 'Ageing. Research in Spain and Europe' Workshop. The first part consists of general ideas seeking to define future challenges on research using a multidisciplinary approach, in which the theoretical and methodological debate, the comparative and multilevel perspective, the transfer of knowledge and involvement of the older people would be essential to consider. Some of the main issues according to FUTURAGE structure are, the bio-gerontology of ageing, healthy and active ageing, and the socioeconomic and environmental resources of ageing. The interaction between these contents is pivotal to understand the research on ageing. Finally, the document provides some methodological and instrumental ideas to reinforce the need for cross-sectional research initiatives, integrating different data and combining methods in order to develop assessment and intervention strategies. Other aspects look into the mechanisms to coordinate research within a European context. The map on ageing research has been published after the consultation process in Europe (http://futurage.group.shef.ac.uk/road-map.html) and is now ready to be considered for integration into future European and Spanish research programs.",2012-05-09 +21602264,miRvestigator: web application to identify miRNAs responsible for co-regulated gene expression patterns discovered through transcriptome profiling.,"Transcriptome profiling studies have produced staggering numbers of gene co-expression signatures for a variety of biological systems. A significant fraction of these signatures will be partially or fully explained by miRNA-mediated targeted transcript degradation. miRvestigator takes as input lists of co-expressed genes from Caenorhabditis elegans, Drosophila melanogaster, G. gallus, Homo sapiens, Mus musculus or Rattus norvegicus and identifies the specific miRNAs that are likely to bind to 3' un-translated region (UTR) sequences to mediate the observed co-regulation. The novelty of our approach is the miRvestigator hidden Markov model (HMM) algorithm which systematically computes a similarity P-value for each unique miRNA seed sequence from the miRNA database miRBase to an overrepresented sequence motif identified within the 3'-UTR of the query genes. We have made this miRNA discovery tool accessible to the community by integrating our HMM algorithm with a proven algorithm for de novo discovery of miRNA seed sequences and wrapping these algorithms into a user-friendly interface. Additionally, the miRvestigator web server also produces a list of putative miRNA binding sites within 3'-UTRs of the query transcripts to facilitate the design of validation experiments. The miRvestigator is freely available at http://mirvestigator.systemsbiology.net.",2011-05-20 +21598906,A comparison of different QSAR approaches to modeling CYP450 1A2 inhibition.,"Prediction of CYP450 inhibition activity of small molecules poses an important task due to high risk of drug-drug interactions. CYP1A2 is an important member of CYP450 superfamily and accounts for 15% of total CYP450 presence in human liver. This article compares 80 in-silico QSAR models that were created by following the same procedure with different combinations of descriptors and machine learning methods. The training and test sets consist of 3745 and 3741 inhibitors and noninhibitors from PubChem BioAssay database. A heterogeneous external test set of 160 inhibitors was collected from literature. The studied descriptor sets involve E-state, Dragon and ISIDA SMF descriptors. Machine learning methods involve Associative Neural Networks (ASNN), K Nearest Neighbors (kNN), Random Tree (RT), C4.5 Tree (J48), and Support Vector Machines (SVM). The influence of descriptor selection on model accuracy was studied. The benefits of ""bagging"" modeling approach were shown. Applicability domain approach was successfully applied in this study and ways of increasing model accuracy through use of applicability domain measures were demonstrated as well as fragment-based model interpretation was performed. The most accurate models in this study achieved values of 83% and 68% correctly classified instances on the internal and external test sets, respectively. The applicability domain approach allowed increasing the prediction accuracy to 90% for 78% of the internal and 17% of the external test sets, respectively. The most accurate models are available online at http://ochem.eu/models/Q5747 .",2011-05-20 +22668791,Borrowing strength: a likelihood ratio test for related sparse signals.,"

Motivation

Cancer biology is a field where the complexity of the phenomena battles against the availability of data. Often only a few observations per signal source, i.e. genes, are available. Such scenarios are becoming increasingly more relevant as modern sensing technologies generally have no trouble in measuring lots of channels, but where the number of subjects, such as patients or samples, is limited. In statistics, this problem falls under the heading 'large p, small n'. Moreover, in such situations the use of asymptotic analytical results should generally be mistrusted.

Results

We consider two cancer datasets, with the aim to mine the activity of functional groups of genes. We propose a hierarchical model with two layers in which the individual signals share a common variance component. A likelihood ratio test is defined for the difference between two collections of corresponding signals. The small number of observations requires a careful consideration of the bias of the statistic, which is corrected through an explicit Bartlett correction. The test is validated on Monte Carlo simulations, which show improved detection of differences compared with other methods. In a leukaemia study and a cancerous fibroblast cell line, we find that the method also works better in practice, i.e. it gives a richer picture of the underlying biology.

Availability

The MATLAB code is available from the authors or on http://www.math.rug.nl/stat/Software.

Contact

e.c.wit@rug.nl d.bakewell@liv.ac.uk.",2012-06-04 +22210604,Discovery and mapping of a new expressed sequence tag-single nucleotide polymorphism and simple sequence repeat panel for large-scale genetic studies and breeding of Theobroma cacao L.,"Theobroma cacao is an economically important tree of several tropical countries. Its genetic improvement is essential to provide protection against major diseases and improve chocolate quality. We discovered and mapped new expressed sequence tag-single nucleotide polymorphism (EST-SNP) and simple sequence repeat (SSR) markers and constructed a high-density genetic map. By screening 149 650 ESTs, 5246 SNPs were detected in silico, of which 1536 corresponded to genes with a putative function, while 851 had a clear polymorphic pattern across a collection of genetic resources. In addition, 409 new SSR markers were detected on the Criollo genome. Lastly, 681 new EST-SNPs and 163 new SSRs were added to the pre-existing 418 co-dominant markers to construct a large consensus genetic map. This high-density map and the set of new genetic markers identified in this study are a milestone in cocoa genomics and for marker-assisted breeding. The data are available at http://tropgenedb.cirad.fr.",2011-12-30 +23622232,Plant cell wall profiling by fast maximum likelihood reconstruction (FMLR) and region-of-interest (ROI) segmentation of solution-state 2D 1H-13C NMR spectra.,"

Background

Interest in the detailed lignin and polysaccharide composition of plant cell walls has surged within the past decade partly as a result of biotechnology research aimed at converting biomass to biofuels. High-resolution, solution-state 2D 1H-13C HSQC NMR spectroscopy has proven to be an effective tool for rapid and reproducible fingerprinting of the numerous polysaccharides and lignin components in unfractionated plant cell wall materials, and is therefore a powerful tool for cell wall profiling based on our ability to simultaneously identify and comparatively quantify numerous components within spectra generated in a relatively short time. However, assigning peaks in new spectra, integrating them to provide relative component distributions, and producing color-assigned spectra, are all current bottlenecks to the routine use of such NMR profiling methods.

Results

We have assembled a high-throughput software platform for plant cell wall profiling that uses spectral deconvolution by Fast Maximum Likelihood Reconstruction (FMLR) to construct a mathematical model of the signals present in a set of related NMR spectra. Combined with a simple region of interest (ROI) table that maps spectral regions to NMR chemical shift assignments of chemical entities, the reconstructions can provide rapid and reproducible fingerprinting of numerous polysaccharide and lignin components in unfractionated cell wall material, including derivation of lignin monomer unit (S:G:H) ratios or the so-called SGH profile. Evidence is presented that ROI-based amplitudes derived from FMLR provide a robust feature set for subsequent multivariate analysis. The utility of this approach is demonstrated on a large transgenic study of Arabidopsis requiring concerted analysis of 91 ROIs (including both assigned and unassigned regions) in the lignin and polysaccharide regions of almost 100 related 2D 1H-13C HSQC spectra.

Conclusions

We show that when a suitable number of replicates are obtained per sample group, the correlated patterns of enriched and depleted cell wall components can be reliably and objectively detected even prior to multivariate analysis. The analysis methodology has been implemented in a publicly-available, cross-platform (Windows/Mac/Linux), web-enabled software application that enables researchers to view and publish detailed annotated spectra in addition to summary reports in simple spreadsheet data formats. The analysis methodology is not limited to studies of plant cell walls but is amenable to any NMR study where ROI segmentation techniques generate meaningful results.Please see Research Article: http://www.biotechnologyforbiofuels.com/content/6/1/46/.",2013-04-26 +21593126,HMMER web server: interactive sequence similarity searching.,"HMMER is a software suite for protein sequence similarity searches using probabilistic methods. Previously, HMMER has mainly been available only as a computationally intensive UNIX command-line tool, restricting its use. Recent advances in the software, HMMER3, have resulted in a 100-fold speed gain relative to previous versions. It is now feasible to make efficient profile hidden Markov model (profile HMM) searches via the web. A HMMER web server (http://hmmer.janelia.org) has been designed and implemented such that most protein database searches return within a few seconds. Methods are available for searching either a single protein sequence, multiple protein sequence alignment or profile HMM against a target sequence database, and for searching a protein sequence against Pfam. The web server is designed to cater to a range of different user expertise and accepts batch uploading of multiple queries at once. All search methods are also available as RESTful web services, thereby allowing them to be readily integrated as remotely executed tasks in locally scripted workflows. We have focused on minimizing search times and the ability to rapidly display tabular results, regardless of the number of matches found, developing graphical summaries of the search results to provide quick, intuitive appraisement of them.",2011-05-18 +22668788,DiNuP: a systematic approach to identify regions of differential nucleosome positioning.,"

Motivation

With the rapid development of high-throughput sequencing technologies, the genome-wide profiling of nucleosome positioning has become increasingly affordable. Many future studies will investigate the dynamic behaviour of nucleosome positioning in cells that have different states or that are exposed to different conditions. However, a robust method to effectively identify the regions of differential nucleosome positioning (RDNPs) has not been previously available.

Results

We describe a novel computational approach, DiNuP, that compares nucleosome profiles generated by high-throughput sequencing under various conditions. DiNuP provides a statistical P-value for each identified RDNP based on the difference of read distributions. DiNuP also empirically estimates the false discovery rate as a cutoff when two samples have different sequencing depths and differentiate reliable RDNPs from the background noise. Evaluation of DiNuP showed it to be both sensitive and specific for the detection of changes in nucleosome location, occupancy and fuzziness. RDNPs that were identified using publicly available datasets revealed that nucleosome positioning dynamics are closely related to the epigenetic regulation of transcription.

Availability and implementation

DiNuP is implemented in Python and is freely available at http://www.tongji.edu.cn/~zhanglab/DiNuP.

Contact

yzhang@tongji.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-06-04 +22208765,A new strategy for better genome assembly from very short reads.,"

Background

With the rapid development of the next generation sequencing (NGS) technology, large quantities of genome sequencing data have been generated. Because of repetitive regions of genomes and some other factors, assembly of very short reads is still a challenging issue.

Results

A novel strategy for improving genome assembly from very short reads is proposed. It can increase accuracies of assemblies by integrating de novo contigs, and produce comparative contigs by allowing multiple references without limiting to genomes of closely related strains. Comparative contigs are used to scaffold de novo contigs. Using simulated and real datasets, it is shown that our strategy can effectively improve qualities of assemblies of isolated microbial genomes and metagenomes.

Conclusions

With more and more reference genomes available, our strategy will be useful to improve qualities of genome assemblies from very short reads. Some scripts are provided to make our strategy applicable at http://code.google.com/p/cd-hybrid/.",2011-12-30 +21310746,FISH Finder: a high-throughput tool for analyzing FISH images.,"

Motivation

Fluorescence in situ hybridization (FISH) is used to study the organization and the positioning of specific DNA sequences within the cell nucleus. Analyzing the data from FISH images is a tedious process that invokes an element of subjectivity. Automated FISH image analysis offers savings in time as well as gaining the benefit of objective data analysis. While several FISH image analysis software tools have been developed, they often use a threshold-based segmentation algorithm for nucleus segmentation. As fluorescence signal intensities can vary significantly from experiment to experiment, from cell to cell, and within a cell, threshold-based segmentation is inflexible and often insufficient for automatic image analysis, leading to additional manual segmentation and potential subjective bias. To overcome these problems, we developed a graphical software tool called FISH Finder to automatically analyze FISH images that vary significantly. By posing the nucleus segmentation as a classification problem, compound Bayesian classifier is employed so that contextual information is utilized, resulting in reliable classification and boundary extraction. This makes it possible to analyze FISH images efficiently and objectively without adjustment of input parameters. Additionally, FISH Finder was designed to analyze the distances between differentially stained FISH probes.

Availability

FISH Finder is a standalone MATLAB application and platform independent software. The program is freely available from: http://code.google.com/p/fishfinder/downloads/list.",2011-02-09 +21586583,Taxonomic classification of metagenomic shotgun sequences with CARMA3.,"The vast majority of microbes are unculturable and thus cannot be sequenced by means of traditional methods. High-throughput sequencing techniques like 454 or Solexa-Illumina make it possible to explore those microbes by studying whole natural microbial communities and analysing their biological diversity as well as the underlying metabolic pathways. Over the past few years, different methods have been developed for the taxonomic and functional characterization of metagenomic shotgun sequences. However, the taxonomic classification of metagenomic sequences from novel species without close homologue in the biological sequence databases poses a challenge due to the high number of wrong taxonomic predictions on lower taxonomic ranks. Here we present CARMA3, a new method for the taxonomic classification of assembled and unassembled metagenomic sequences that has been adapted to work with both BLAST and HMMER3 homology searches. We show that our method makes fewer wrong taxonomic predictions (at the same sensitivity) than other BLAST-based methods. CARMA3 is freely accessible via the web application WebCARMA from http://webcarma.cebitec.uni-bielefeld.de.",2011-05-17 +21586727,Network-based tools for the identification of novel drug targets.,"In the past few years, network-based tools have become increasingly important in the identification of novel molecular targets for drug development. Systems-based approaches to predict signal transduction-related drug targets have developed into an especially promising field. Here, we summarize our studies, which indicate that modular bridges and overlaps of protein-protein interaction and signaling networks may be of key importance in future drug design. Intermodular nodes are very efficient in mediating the transmission of perturbations between signaling modules and are important in network cooperation. The analysis of stress-induced rearrangements of the yeast interactome by the ModuLand modularization algorithm indicated that components of modular overlap are key players in cellular adaptation to stress. Signaling crosstalk was much more pronounced in humans than in Caenorhabditis elegans or Drosophila melanogaster in the SignaLink (http://www.SignaLink.org) database, a uniformly curated database of eight major signaling pathways. We also showed that signaling proteins that participate in multiple pathways included multiple established drug targets and drug target candidates. Lastly, we caution that the pervasive overlap of cellular network modules implies that wider use of multitarget drugs to partially inhibit multiple individual proteins will be necessary to modify specific cellular functions, because targeting single proteins for complete disruption usually affects multiple cellular functions with little specificity for a particular process. Tools for analyzing network topology and especially network dynamics have great potential to identify alternative sets of targets for developing multitarget drugs.",2011-05-17 +21586582,iPBA: a tool for protein structure comparison using sequence alignment strategies.,"With the immense growth in the number of available protein structures, fast and accurate structure comparison has been essential. We propose an efficient method for structure comparison, based on a structural alphabet. Protein Blocks (PBs) is a widely used structural alphabet with 16 pentapeptide conformations that can fairly approximate a complete protein chain. Thus a 3D structure can be translated into a 1D sequence of PBs. With a simple Needleman-Wunsch approach and a raw PB substitution matrix, PB-based structural alignments were better than many popular methods. iPBA web server presents an improved alignment approach using (i) specialized PB Substitution Matrices (SM) and (ii) anchor-based alignment methodology. With these developments, the quality of ∼88% of alignments was improved. iPBA alignments were also better than DALI, MUSTANG and GANGSTA(+) in >80% of the cases. The webserver is designed to for both pairwise comparisons and database searches. Outputs are given as sequence alignment and superposed 3D structures displayed using PyMol and Jmol. A local alignment option for detecting subs-structural similarity is also embedded. As a fast and efficient 'sequence-based' structure comparison tool, we believe that it will be quite useful to the scientific community. iPBA can be accessed at http://www.dsimb.inserm.fr/dsimb_tools/ipba/.",2011-05-17 +21372085,Comparative visualization of genetic and physical maps with Strudel.,"

Unlabelled

Data visualization can play a key role in comparative genomics, for example, underpinning the investigation of conserved synteny patterns. Strudel is a desktop application that allows users to easily compare both genetic and physical maps interactively and efficiently. It can handle large datasets from several genomes simultaneously, and allows all-by-all comparisons between these.

Availability and implementation

Installers for Strudel are available for Windows, Linux, Solaris and Mac OS X at http://bioinf.scri.ac.uk/strudel/.",2011-03-03 +21575179,SORGOdb: Superoxide Reductase Gene Ontology curated DataBase.,"

Background

Superoxide reductases (SOR) catalyse the reduction of superoxide anions to hydrogen peroxide and are involved in the oxidative stress defences of anaerobic and facultative anaerobic organisms. Genes encoding SOR were discovered recently and suffer from annotation problems. These genes, named sor, are short and the transfer of annotations from previously characterized neelaredoxin, desulfoferrodoxin, superoxide reductase and rubredoxin oxidase has been heterogeneous. Consequently, many sor remain anonymous or mis-annotated.

Description

SORGOdb is an exhaustive database of SOR that proposes a new classification based on domain architecture. SORGOdb supplies a simple user-friendly web-based database for retrieving and exploring relevant information about the proposed SOR families. The database can be queried using an organism name, a locus tag or phylogenetic criteria, and also offers sequence similarity searches using BlastP. Genes encoding SOR have been re-annotated in all available genome sequences (prokaryotic and eukaryotic (complete and in draft) genomes, updated in May 2010).

Conclusions

SORGOdb contains 325 non-redundant and curated SOR, from 274 organisms. It proposes a new classification of SOR into seven different classes and allows biologists to explore and analyze sor in order to establish correlations between the class of SOR and organism phenotypes. SORGOdb is freely available at http://sorgo.genouest.org/index.php.",2011-05-16 +21576220,The FALC-Loop web server for protein loop modeling.,"The FALC-Loop web server provides an online interface for protein loop modeling by employing an ab initio loop modeling method called FALC (fragment assembly and analytical loop closure). The server may be used to construct loop regions in homology modeling, to refine unreliable loop regions in experimental structures or to model segments of designed sequences. The FALC method is computationally less expensive than typical ab initio methods because the conformational search space is effectively reduced by the use of fragments derived from a structure database. The analytical loop closure algorithm allows efficient search for loop conformations that fit into the protein framework starting from the fragment-assembled structures. The FALC method shows prediction accuracy comparable to other state-of-the-art loop modeling methods. Top-ranked model structures can be visualized on the web server, and an ensemble of loop structures can be downloaded for further analysis. The web server can be freely accessed at http://falc-loop.seoklab.org/.",2011-05-16 +21593112,Cardiovascular disease in diabetes: where does glucose fit in?,"

Context

Recent prospective clinical trials have failed to confirm a unique benefit from normalization of glycemia on cardiovascular disease outcomes, despite evidence from basic vascular biology, epidemiological, and cohort studies.

Evidence acquisition

The literature was searched using the http://www.ncbi.nlm.nih.gov search engine including over 20 million citations on MEDLINE (1970 to present). Keyword searches included: atherosclerosis, cardiovascular, and glucose. Epidemiological, cohort, and interventional data on cardiovascular disease outcomes and glycemic control were reviewed along with analysis of recent reviews on this topic.

Evidence synthesis

High glucose activates a proatherogenic phenotype in all cell types in the vessel wall including endothelial cells, vascular smooth muscle cells, inflammatory cells, fibroblasts, and platelets, leading to a feedforward atherogenic response. EPIDEMIOLOGICAL AND COHORT STUDIES: Epidemiological and cohort evidence indicates a clear and consistent correlation of glycemia with cardiovascular disease. A recent report of over 25,000 subjects with diabetes in the Swedish National Diabetes Registry verifies this relationship in contemporary practice. Interventional Studies: Prospective randomized interventions targeting a hemoglobin A1c of 6-6.5% for cardiovascular disease prevention failed to consistently decrease cardiovascular events or all-cause mortality.

Conclusions

Basic vascular biology data plus epidemiological and cohort evidence would predict that glucose control should impact cardiovascular events. Prospective clinical trials demonstrate that current strategies that improve blood glucose do not achieve this goal but suggest that a period of optimal control may confer long-term cardiovascular disease benefit. Clinicians should target a hemoglobin A1c of 7% for the prevention of microvascular complications, individualized to avoid hypoglycemia.",2011-05-18 +22204606,Asymptomatic neurocognitive disorders in patients infected by HIV: fact or fiction?,"Neurocognitive disorders are emerging as a possible complication in patients infected with HIV. Even if asymptomatic, neurocognitive abnormalities are frequently detected using a battery of tests. This supported the creation of asymptomatic neurocognitive impairment (ANI) as a new entity. In a recent article published in BMC Infectious Diseases, Magnus Gisslén and colleagues applied a statistical approach, concluding that there is an overestimation of the actual problem. In fact, about 20% of patients are classified as neurocognitively impaired without a clear impact on daily activities. In the present commentary, we discuss the clinical implications of their findings. Although a cautious approach would indicate a stricter follow-up of patients affected by this disorder, it is premature to consider it as a proper disease. Based on a review of the data in the current literature we conclude that it is urgent to conduct more studies to estimate the overall risk of progression of the asymptomatic neurocognitive impairment. Moreover, it is important to understand whether new biomarkers or neuroimaging tools can help to identify better the most at risk population. Please see related article: http://www.biomedcentral.com/1471-2334/11/356.",2011-12-28 +22967011,AbsIDconvert: an absolute approach for converting genetic identifiers at different granularities.,"

Background

High-throughput molecular biology techniques yield vast amounts of data, often by detecting small portions of ribonucleotides corresponding to specific identifiers. Existing bioinformatic methodologies categorize and compare these elements using inferred descriptive annotation given this sequence information irrespective of the fact that it may not be representative of the identifier as a whole.

Results

All annotations, no matter the granularity, can be aligned to genomic sequences and therefore annotated by genomic intervals. We have developed AbsIDconvert, a methodology for converting between genomic identifiers by first mapping them onto a common universal coordinate system using an interval tree which is subsequently queried for overlapping identifiers. AbsIDconvert has many potential uses, including gene identifier conversion, identification of features within a genomic region, and cross-species comparisons. The utility is demonstrated in three case studies: 1) comparative genomic study mapping plasmodium gene sequences to corresponding human and mosquito transcriptional regions; 2) cross-species study of Incyte clone sequences; and 3) analysis of human Ensembl transcripts mapped by Affymetrix®; and Agilent microarray probes. AbsIDconvert currently supports ID conversion of 53 species for a given list of input identifiers, genomic sequence, or genome intervals.

Conclusion

AbsIDconvert provides an efficient and reliable mechanism for conversion between identifier domains of interest. The flexibility of this tool allows for custom definition identifier domains contingent upon the availability and determination of a genomic mapping interval. As the genomes and the sequences for genetic elements are further refined, this tool will become increasingly useful and accurate. AbsIDconvert is freely available as a web application or downloadable as a virtual machine at: http://bioinformatics.louisville.edu/abid/.",2012-09-12 +21619932,Practical performance evaluation of a 10k × 10k CCD for electron cryo-microscopy.,"Electron cryo-microscopy (cryo-EM) images are commonly collected using either charge-coupled devices (CCD) or photographic film. Both film and the current generation of 16 megapixel (4k × 4k) CCD cameras have yielded high-resolution structures. Yet, despite the many advantages of CCD cameras, more than two times as many structures of biological macromolecules have been published in recent years using photographic film. The continued preference to film, especially for subnanometer-resolution structures, may be partially influenced by the finer sampling and larger effective specimen imaging area offered by film. Large format digital cameras may finally allow them to overtake film as the preferred detector for cryo-EM. We have evaluated a 111-megapixel (10k × 10k) CCD camera with a 9 μm pixel size. The spectral signal-to-noise ratios of low dose images of carbon film indicate that this detector is capable of providing signal up to at least 2/5 Nyquist frequency potentially retrievable for 3D reconstructions of biological specimens, resulting in more than double the effective specimen imaging area of existing 4k × 4k CCD cameras. We verified our estimates using frozen-hydrated ε15 bacteriophage as a biological test specimen with previously determined structure, yielding a ∼7 Å resolution single particle reconstruction from only 80 CCD frames. Finally, we explored the limits of current CCD technology by comparing the performance of this detector to various CCD cameras used for recording data yielding subnanometer resolution cryo-EM structures submitted to the electron microscopy data bank (http://www.emdatabank.org/).",2011-05-17 +21586517,Hidden conformations in protein structures.,"

Motivation

Prediction of interactions between protein residues (contact map prediction) can facilitate various aspects of 3D structure modeling. However, the accuracy of ab initio contact prediction is still limited. As structural genomics initiatives move ahead, solved structures of homologous proteins can be used as multiple templates to improve contact prediction of the major conformation of an unsolved target protein. Furthermore, multiple templates may provide a wider view of the protein's conformational space. However, successful usage of multiple structural templates is not straightforward, due to their variable relevance to the target protein, and because of data redundancy issues.

Results

We present here an algorithm that addresses these two limitations in the use of multiple structure templates. First, the algorithm unites contact maps extracted from templates sharing high sequence similarity with each other in a fashion that acknowledges the possibility of multiple conformations. Next, it weights the resulting united maps in inverse proportion to their evolutionary distance from the target protein. Testing this algorithm against CASP8 targets resulted in high precision contact maps. Remarkably, based solely on structural data of remote homologues, our algorithm identified residue-residue interactions that account for all the known conformations of calmodulin, a multifaceted protein. Therefore, employing multiple templates, which improves prediction of contact maps, can also be used to reveal novel conformations. As multiple templates will soon be available for most proteins, our scheme suggests an effective procedure for their optimal consideration.

Availability

A Perl script implementing the WMC algorithm described in this article is freely available for academic use at http://tau.ac.il/~haimash/WMC.",2011-05-17 +23782947,High frequency of the X-chromosome inactivation in young female patients with high-grade glioma.,"

Background

Gliomas are common tumors and high-grade ones account for 62% of primary malignant brain tumors. Though current evidence have suggested that inherited risks play a role in glioma susceptibility, it was conveyed that glioma was such a complex disease, and the direct genetic contribution to glioma risk factors and its relation to other factors should be discussed more deeply. X-chromosome inactivation (XCI) is the mechanism by which gene dosage equivalence is achieved between female mammals with two X chromosomes and male mammals with a single X chromosome. As skewed XCI has been linked to development of some solid tumors, including ovarian, breast, and pulmonary and esophageal carcinomas, it is challenging to elucidate the relation of skewed XCI to high-grade gliomas development.

Objective

The present study aimed to determine the general concordance between XCI pattern in blood cells and brain tissues, and SXCI frequencies in female patients with high-grade glioma compared to healthy controls.

Methods

1,103 Chinese females without a detectable tumor and 173 female high-grade glioma patients, were detected in the study. Normal brain tissues surrounding the lesions in gliomas were obtained from 49 patients among the 173 ones, with the microdissection using a laser microdissection microscope Genomic DNA was extracted from the peripheral blood cells and the normal brain tissues from the subjects. Exon 1 of androgen receptor (AR) gene was amplified, and its products of different alleles were resolved on denaturing polyacrylamide gels and visualized after silver staining. The corrected ratios (CR) of the products before and after HpaII digestion were calculated.

Results

Occurrence of SXCI was detected in both the patients and controls at similar frequencies. However, the phenomenon, as defined as CR ≥ 3, was more frequent in the patients aging ≤ 40 (23.6%) compared to the corresponding reference group (5.1%, P <0.0001). When CR ≥ 10 was adopted, the frequencies were 5.5% and 1.6%, respectively. Their difference did not attain statistical significance (P=0.10). When detected, both blood cells and brain tissue were compared after determination of a high concordance of XCI between blood cells and brain tissue collected from the same individuals (n=48, r =0.57, P <0.01).

Conclusions

The data from the current study demonstrated that SXCI may be a predisposing factor for development of high-grade glioma in young female patients and further study will verify its suitability as a biomarker to assess susceptibility of young female patients to high-grade glioma.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1935066233982578.",2013-06-19 +21835007,TopHat-Fusion: an algorithm for discovery of novel fusion transcripts.,"TopHat-Fusion is an algorithm designed to discover transcripts representing fusion gene products, which result from the breakage and re-joining of two different chromosomes, or from rearrangements within a chromosome. TopHat-Fusion is an enhanced version of TopHat, an efficient program that aligns RNA-seq reads without relying on existing annotation. Because it is independent of gene annotation, TopHat-Fusion can discover fusion products deriving from known genes, unknown genes and unannotated splice variants of known genes. Using RNA-seq data from breast and prostate cancer cell lines, we detected both previously reported and novel fusions with solid supporting evidence. TopHat-Fusion is available at http://tophat-fusion.sourceforge.net/.",2011-08-11 +22385986,CpGPAP: CpG island predictor analysis platform.,"

Background

Genomic islands play an important role in medical, methylation and biological studies. To explore the region, we propose a CpG islands prediction analysis platform for genome sequence exploration (CpGPAP).

Results

CpGPAP is a web-based application that provides a user-friendly interface for predicting CpG islands in genome sequences or in user input sequences. The prediction algorithms supported in CpGPAP include complementary particle swarm optimization (CPSO), a complementary genetic algorithm (CGA) and other methods (CpGPlot, CpGProD and CpGIS) found in the literature. The CpGPAP platform is easy to use and has three main features (1) selection of the prediction algorithm; (2) graphic visualization of results; and (3) application of related tools and dataset downloads. These features allow the user to easily view CpG island results and download the relevant island data. CpGPAP is freely available at http://bio.kuas.edu.tw/CpGPAP/.

Conclusions

The platform's supported algorithms (CPSO and CGA) provide a higher sensitivity and a higher correlation coefficient when compared to CpGPlot, CpGProD, CpGIS, and CpGcluster over an entire chromosome.",2012-03-02 +21296750,SIMA: simultaneous multiple alignment of LC/MS peak lists.,"

Motivation

Alignment of multiple liquid chromatography/mass spectrometry (LC/MS) experiments is a necessity today, which arises from the need for biological and technical repeats. Due to limits in sampling frequency and poor reproducibility of retention times, current LC systems suffer from missing observations and non-linear distortions of the retention times across runs. Existing approaches for peak correspondence estimation focus almost exclusively on solving the pairwise alignment problem, yielding straightforward but suboptimal results for multiple alignment problems.

Results

We propose SIMA, a novel automated procedure for alignment of peak lists from multiple LC/MS runs. SIMA combines hierarchical pairwise correspondence estimation with simultaneous alignment and global retention time correction. It employs a tailored multidimensional kernel function and a procedure based on maximum likelihood estimation to find the retention time distortion function that best fits the observed data. SIMA does not require a dedicated reference spectrum, is robust with regard to outliers, needs only two intuitive parameters and naturally incorporates incomplete correspondence information. In a comparison with seven alternative methods on four different datasets, we show that SIMA yields competitive and superior performance on real-world data.

Availability

A C++ implementation of the SIMA algorithm is available from http://hci.iwr.uni-heidelberg.de/MIP/Software.",2011-02-03 +21566017,Method for milk oligosaccharide profiling by 2-aminobenzamide labeling and hydrophilic interaction chromatography.,"Although the properties of milk oligosaccharides have been of scientific interest for many years, their structural diversity presents a challenging analytical task. In the quest for a simple and robust technology to characterize the different oligosaccharides present in milk, we developed an analytical scheme based on their fluorescent labeling, pre-fractionation by weak anionic exchange chromatography and separation by hydrophilic interaction liquid chromatography (HILIC)-high performance liquid chromatography (HPLC). HILIC relies on the hydrophilic potential of the molecule, which accounts for differences in properties such as molecular volume, lipophilic surface area, charge, composition, structure, linkage and oligosaccharide branching. The robustness of the methodology has been demonstrated using bovine colostrum oligosaccharides as a case study. Structural assignments for 37 free glycans, including 20 sialylated species, were obtained by a combination of HILIC-HPLC, exoglycosidase digestion and offline negative-ion mode mass spectrometry (MS)/MS. Parameters obtained for each glycan, including linkages, enzymatic digestion products and glucose unit values, will be added to GlycoBase, a public access database (http://glycobase.nibrt.ie/glycobase.html). This approach provides a basis for the analysis of free milk oligosaccharides in a fast and sensitive manner and could be adapted for an automated technology platform amenable to diverse environments. Indeed, our approach, in conjunction with bacterial-binding assays, can provide a better understanding of the structural elements required for biological activity of free milk oligosaccharides and could serve as a scientific basis for the selection of such bioactives from various food sources.",2011-05-12 +21828085,NetDS: a Cytoscape plugin to analyze the robustness of dynamics and feedforward/feedback loop structures of biological networks.,"

Summary

NetDS is a novel Cytoscape plugin that conveniently simulates dynamics related to robustness, and examines structural properties with respect to feedforward/feedback loops. It can evaluate how robustly a network sustains a stable state against mutations by employing a Boolean network model. In addition, the plugin can examine all feedforward/feedback loops appearing in a network and determine whether or not a pair of loops is coupled. Random networks can also be generated to evaluate whether or not an interesting finding in real biological networks is significantly random.

Availability

NetDS is freely available for non-commercial purposes at http://netds.sourceforge.net/.

Contact

kwonyk@ulsan.ac.kr

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-09 +22283809,Development of novel CXCR4-based therapeutics.,"

Introduction

During embryogenesis, CXCR4, a chemokine receptor, and its ligand, stromal cell-derived factor-1 (SDF-1/CXCL12), are critically involved in the development of the hematopoietic, nerve and endothelial tissues by regulating tissue progenitor cell migration, homing and survival. In adult life, the CXCR4 axis serves as the key factor for stem and immune cell trafficking. More importantly, CXCR4-CXCL12 axis plays a critical role in HIV, stem cell mobilization, autoimmune diseases, cancer and tissue regeneration. Targeting the CXCR4-CXCL12 axis, therefore, is an attractive therapeutic approach in various diseases.

Areas covered

In this review, we update current knowledge about CXCR4-CXCL12 biology, therapeutic approaches and therapeutic agents. The data presented was collected from http://www.ncbi.nlm.nih.gov/pubmed , http://clinicaltrials.gov/ , http://bloodjournal.hematologylibrary.org/ .

Expert opinion

Development of CXCR4 antagonists with increased affinity, extended pharmacokinetics and/or pharmacodynamics and with the capacity to differentially target CXCR4 may lead to a development of novel therapeutics for HIV, cancer, tissue regeneration and stem cell collection.",2012-01-28 +21534005,Executive function on the Psychology Experiment Building Language tests.,"The measurement of executive function has a long history in clinical and experimental neuropsychology. The goal of the present report was to determine the profile of behavior across the lifespan on four computerized measures of executive function contained in the recently developed Psychology Experiment Building Language (PEBL) test battery http://pebl.sourceforge.net/ and evaluate whether this pattern is comparable to data previously obtained with the non-PEBL versions of these tests. Participants (N = 1,223; ages, 5-89 years) completed the PEBL Trail Making Test (pTMT), the Wisconsin Card Sort Test (pWCST; Berg, Journal of General Psychology, 39, 15-22, 1948; Grant & Berg, Journal of Experimental Psychology, 38, 404-411, 1948), the Tower of London (pToL), or a time estimation task (Time-Wall). Age-related effects were found over all four tests, especially as age increased from young childhood through adulthood. For several tests and measures (including pToL and pTMT), age-related slowing was found as age increased in adulthood. Together, these findings indicate that the PEBL tests provide valid and versatile new research tools for measuring executive functions.",2012-03-01 +21572887,Database and interaction network of genes involved in oral cancer: Version II.,"The oral cancer gene database has been compiled to enable fast retrieval of updated information and role of the genes implicated in oral cancer. The first version of the database with 242 genes was published in Online Journal of Bioinformatics 8(1), 41-44, 2007. In the second version, the database has been enlarged to include 374 genes by adding 132 gene entries. The architecture and format of the database is similar to the earlier version, and includes updated information and external hyperlinks for all the genes. The functional gene interaction network for important biological processes and molecular functions has been rebuilt based on 374 genes using 'String 8.3'. The database is freely available at http://www.actrec.gov.in/OCDB/index.htm and provides the scientist information and external links for the genes involved in oral cancer, interactions between them, and their role in the biology of oral cancer along with clinical relevance.",2011-05-07 +22088841,NoRSE: noise reduction and state evaluator for high-frequency single event traces.,"

Unlabelled

NoRSE was developed to analyze high-frequency datasets collected from multistate, dynamic experiments, such as molecular adsorption and desorption onto carbon nanotubes. As technology improves sampling frequency, these stochastic datasets become increasingly large with faster dynamic events. More efficient algorithms are needed to accurately locate the unique states in each time trace. NoRSE adapts and optimizes a previously published noise reduction algorithm and uses a custom peak flagging routine to rapidly identify unique event states. The algorithm is explained using experimental data from our lab and its fitting accuracy and efficiency are then shown with a generalized model of stochastic datasets. The algorithm is compared to another recently published state finding algorithm and is found to be 27 times faster and more accurate over 55% of the generalized experimental space. NoRSE is written as an M-file for Matlab.

Availability

http://web.mit.edu/stranogroup/NoRSE.txt.",2011-11-15 +21689413,Sniper: improved SNP discovery by multiply mapping deep sequenced reads.,"SNP (single nucleotide polymorphism) discovery using next-generation sequencing data remains difficult primarily because of redundant genomic regions, such as interspersed repetitive elements and paralogous genes, present in all eukaryotic genomes. To address this problem, we developed Sniper, a novel multi-locus Bayesian probabilistic model and a computationally efficient algorithm that explicitly incorporates sequence reads that map to multiple genomic loci. Our model fully accounts for sequencing error, template bias, and multi-locus SNP combinations, maintaining high sensitivity and specificity under a broad range of conditions. An implementation of Sniper is freely available at http://kim.bio.upenn.edu/software/sniper.shtml.",2011-06-20 +21426944,TMBHMM: a frequency profile based HMM for predicting the topology of transmembrane beta barrel proteins and the exposure status of transmembrane residues.,"Transmembrane beta barrel (TMB) proteins are found in the outer membranes of bacteria, mitochondria and chloroplasts. TMBs are involved in a variety of functions such as mediating flux of metabolites and active transport of siderophores, enzymes and structural proteins, and in the translocation across or insertion into membranes. We present here TMBHMM, a computational method based on a hidden Markov model for predicting the structural topology of putative TMBs from sequence. In addition to predicting transmembrane strands, TMBHMM also predicts the exposure status (i.e., exposed to the membrane or hidden in the protein structure) of the residues in the transmembrane region, which is a novel feature of the TMBHMM method. Furthermore, TMBHMM can also predict the membrane residues that are not part of beta barrel forming strands. The training of the TMBHMM was performed on a non-redundant data set of 19 TMBs. The self-consistency test yielded Q(2) accuracy of 0.87, Q(3) accuracy of 0.83, Matthews correlation coefficient of 0.74 and SOV for beta strand of 0.95. In this self-consistency test the method predicted 83% of transmembrane residues with correct exposure status. On an unseen, non-redundant test data set of 10 proteins, the 2-state and 3-state TMBHMM prediction accuracies are around 73% and 72%, respectively, and are comparable to other methods from the literature. The TMBHMM web server takes an amino acid sequence or a multiple sequence alignment as an input and predicts the exposure status and the structural topology as output. The TMBHMM web server is available under the tmbhmm tab at: http://service.bioinformatik.uni-saarland.de/tmx-site/.",2011-03-21 +23046521,A novel neural response algorithm for protein function prediction.,"

Background

Large amounts of data are being generated by high-throughput genome sequencing methods. But the rate of the experimental functional characterization falls far behind. To fill the gap between the number of sequences and their annotations, fast and accurate automated annotation methods are required. Many methods, such as GOblet, GOFigure, and Gotcha, are designed based on the BLAST search. Unfortunately, the sequence coverage of these methods is low as they cannot detect the remote homologues. Adding to this, the lack of annotation specificity advocates the need to improve automated protein function prediction.

Results

We designed a novel automated protein functional assignment method based on the neural response algorithm, which simulates the neuronal behavior of the visual cortex in the human brain. Firstly, we predict the most similar target protein for a given query protein and thereby assign its GO term to the query sequence. When assessed on test set, our method ranked the actual leaf GO term among the top 5 probable GO terms with accuracy of 86.93%.

Conclusions

The proposed algorithm is the first instance of neural response algorithm being used in the biological domain. The use of HMM profiles along with the secondary structure information to define the neural response gives our method an edge over other available methods on annotation accuracy. Results of the 5-fold cross validation and the comparison with PFP and FFPred servers indicate the prominent performance by our method. The program, the dataset, and help files are available at http://www.jjwanglab.org/NRProF/.",2012-07-16 +22962490,From phenotype to genotype: an association study of longitudinal phenotypic markers to Alzheimer's disease relevant SNPs.,"

Motivation

Imaging genetic studies typically focus on identifying single-nucleotide polymorphism (SNP) markers associated with imaging phenotypes. Few studies perform regression of SNP values on phenotypic measures for examining how the SNP values change when phenotypic measures are varied. This alternative approach may have a potential to help us discover important imaging genetic associations from a different perspective. In addition, the imaging markers are often measured over time, and this longitudinal profile may provide increased power for differentiating genotype groups. How to identify the longitudinal phenotypic markers associated to disease sensitive SNPs is an important and challenging research topic.

Results

Taking into account the temporal structure of the longitudinal imaging data and the interrelatedness among the SNPs, we propose a novel 'task-correlated longitudinal sparse regression' model to study the association between the phenotypic imaging markers and the genotypes encoded by SNPs. In our new association model, we extend the widely used ℓ(2,1)-norm for matrices to tensors to jointly select imaging markers that have common effects across all the regression tasks and time points, and meanwhile impose the trace-norm regularization onto the unfolded coefficient tensor to achieve low rank such that the interrelationship among SNPs can be addressed. The effectiveness of our method is demonstrated by both clearly improved prediction performance in empirical evaluations and a compact set of selected imaging predictors relevant to disease sensitive SNPs.

Availability

Software is publicly available at: http://ranger.uta.edu/%7eheng/Longitudinal/

Contact

heng@uta.edu or shenli@inpui.edu.",2012-09-01 +22962448,Indel-tolerant read mapping with trinucleotide frequencies using cache-oblivious kd-trees.,"

Motivation

Mapping billions of reads from next generation sequencing experiments to reference genomes is a crucial task, which can require hundreds of hours of running time on a single CPU even for the fastest known implementations. Traditional approaches have difficulties dealing with matches of large edit distance, particularly in the presence of frequent or large insertions and deletions (indels). This is a serious obstacle both in determining the spectrum and abundance of genetic variations and in personal genomics.

Results

For the first time, we adopt the approximate string matching paradigm of geometric embedding to read mapping, thus rephrasing it to nearest neighbor queries in a q-gram frequency vector space. Using the L(1) distance between frequency vectors has the benefit of providing lower bounds for an edit distance with affine gap costs. Using a cache-oblivious kd-tree, we realize running times, which match the state-of-the-art. Additionally, running time and memory requirements are about constant for read lengths between 100 and 1000 bp. We provide a first proof-of-concept that geometric embedding is a promising paradigm for read mapping and that L(1) distance might serve to detect structural variations. TreQ, our initial implementation of that concept, performs more accurate than many popular read mappers over a wide range of structural variants.

Availability and implementation

TreQ will be released under the GNU Public License (GPL), and precomputed genome indices will be provided for download at http://treq.sf.net.

Contact

pavelm@cs.rutgers.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-09-01 +22276099,Networks of emotion concepts.,"The aim of this work was to study the similarity network and hierarchical clustering of Finnish emotion concepts. Native speakers of Finnish evaluated similarity between the 50 most frequently used Finnish words describing emotional experiences. We hypothesized that methods developed within network theory, such as identifying clusters and specific local network structures, can reveal structures that would be difficult to discover using traditional methods such as multidimensional scaling (MDS) and ordinary cluster analysis. The concepts divided into three main clusters, which can be described as negative, positive, and surprise. Negative and positive clusters divided further into meaningful sub-clusters, corresponding to those found in previous studies. Importantly, this method allowed the same concept to be a member in more than one cluster. Our results suggest that studying particular network structures that do not fit into a low-dimensional description can shed additional light on why subjects evaluate certain concepts as similar. To encourage the use of network methods in analyzing similarity data, we provide the analysis software for free use (http://www.becs.tkk.fi/similaritynets/).",2012-01-20 +23516507,The landscape of host transcriptional response programs commonly perturbed by bacterial pathogens: towards host-oriented broad-spectrum drug targets.,"

Background

The emergence of drug-resistant pathogen strains and new infectious agents pose major challenges to public health. A promising approach to combat these problems is to target the host's genes or proteins, especially to discover targets that are effective against multiple pathogens, i.e., host-oriented broad-spectrum (HOBS) drug targets. An important first step in the discovery of such drug targets is the identification of host responses that are commonly perturbed by multiple pathogens.

Results

In this paper, we present a methodology to identify common host responses elicited by multiple pathogens. First, we identified host responses perturbed by each pathogen using a gene set enrichment analysis of publicly available genome-wide transcriptional datasets. Then, we used biclustering to identify groups of host pathways and biological processes that were perturbed only by a subset of the analyzed pathogens. Finally, we tested the enrichment of each bicluster in human genes that are known drug targets, on the basis of which we elicited putative HOBS targets for specific groups of bacterial pathogens. We identified 84 up-regulated and three down-regulated statistically significant biclusters. Each bicluster contained a group of pathogens that commonly dysregulated a group of biological processes. We validated our approach by checking whether these biclusters correspond to known hallmarks of bacterial infection. Indeed, these biclusters contained biological process such as inflammation, activation of dendritic cells, pro- and anti- apoptotic responses and other innate immune responses. Next, we identified biclusters containing pathogens that infected the same tissue. After a literature-based analysis of the drug targets contained in these biclusters, we suggested new uses of the drugs Anakinra, Etanercept, and Infliximab for gastrointestinal pathogens Yersinia enterocolitica, Helicobacter pylori kx2 strain, and enterohemorrhagic Escherichia coli and the drug Simvastatin for hematopoietic pathogen Ehrlichia chaffeensis.

Conclusions

Using a combination of automated analysis of host-response gene expression data and manual study of the literature, we have been able to suggest host-oriented treatments for specific bacterial infections. The analyses and suggestions made in this study may be utilized to generate concrete hypothesis on which gene sets to probe further in the quest for HOBS drug targets for bacterial infections. All our results are available at the following supplementary website: http://bioinformatics.cs.vt.edu/ murali/supplements/2013-kidane-plos-one.",2013-03-13 +21566560,"Summary of notifiable diseases: United States, 2009.","The Summary of Notifiable Diseases--- United States, 2009 contains the official statistics, in tabular and graphic form, for the reported occurrence of nationally notifiable infectious diseases in the United States for 2009. Unless otherwise noted, the data are final totals for 2009 reported as of June 30, 2010. These statistics are collected and compiled from reports sent by state health departments and territories to the National Notifiable Diseases Surveillance System (NNDSS), which is operated by CDC in collaboration with the Council of State and Territorial Epidemiologists (CSTE). The Summary is available at http://www.cdc.gov/mmwr/summary.html. This site also includes Summary publications from previous years.",2011-05-01 +30731961,First Report of Gliocephalotrichum bulbilium Causing Cranberry Fruit Rot in New Jersey and Massachusetts.,"Cranberry (Vaccinium macrocarpon) fruit were collected as part of a fruit rot survey conducted in September 2010 on farms in New Jersey and Massachusetts. There are more than 20 fungal species reported as causing fruit rot (2) and symptoms are generally not diagnostic. The rotted fruit were surface sterilized in a 10% bleach solution for 5 min, sliced in half, and plated on V8 agar (nonclarified). A novel, fast-growing fungus that produced sporulating orange-brown colonies emerged from 5% of the fruit collected on three of the farms included in the survey. The fungus was notable as the only species present in the rotted fruit, suggesting it may be pathogenic. The conidia were produced as gloeoid masses on phialidic conidiogenous cells arranged in a polyverticillate penicillus. The conidiogenous cells were subtended at variable distances by zero to four sterile appendages that formed on the lightly pigmented conidiophore. On the basis of these characteristics, the fungus was identified as a species of Gliocephalotrichum (3). Further investigation of the growth medium revealed the presence of clustered, red-brown chlamydospores that were produced abundantly in all isolates. These structures, also known as bulbils, are restricted to two species in the genus, G. bulbilium and G. longibrachium (1). On average, the bulbils were 42.0 × 48.3 μm and conidia were 5.75 × 2.5 μm. On the basis of size and shape of conidia and presence of bulbils, the isolates were identified as G. bulbilium (1). To confirm the identity of the fungus, genomic DNA was extracted and ITS1-5.8S-ITS2 and the 5' end of the β-tubulin gene were amplified and sequenced (1). The sequences (GenBank Accession Nos. HQ828060 and HQ828061) were compared with published sequences of Gliocephalotrichum isolates (1) and results confirmed the cranberry isolates were G. bulbilium. The isolates were tested for pathogenicity on harvested cranberry fruit. Fifty ripe cranberry fruit (cv. Stevens) were inoculated by injecting approximately 20 μl (using a 26G 9.5-mm needle) of conidia (1 × 105 ml-1) into the side of each berry. As a comparison, isolates of two common cranberry fruit rot pathogens, Colletotrichum acutatum and C. gloeosporioides, were inoculated on to fruit using the same technique. A water-only inoculation was used as the control. Fruit rot developed on all inoculated fruit except the water control. In the case of G. bulbilium, all fruit rotted within 2 days, whereas the other two species developed symptoms within 4 to 7 days. G. bulbilium and both species of Colletotrichum were consistently reisolated from all of the respectively inoculated fruit. To our knowledge, this is the first report of G. bulbilium causing fruit rot on cranberry. The species has been reported as an important postharvest fruit rot (4) on rambutan (Nephelium lappaceum) in Thailand, rambutan and guava (Psidium guajava) in Hawaii, and durian (Durio spp.) in Brunei Darussalam. This report of G. bulbilium extends the range within the United States to include Louisiana, Hawaii, Wisconsin, West Virginia, New Jersey, and Massachusetts (2). References: (1) C. Decock et al. Mycologia 98:488, 2006. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , 16 December 2010. (3) A. Rossman et al. Mycologia, 85:685, 1993. (4) A. Sivapalan et al. Australas. Plant Pathol. 27:274, 1998.",2011-05-01 +30731956,First Report of Seiridium unicorne Causing Bark Cankers on a Monterey Cypress in California.,"In June 2009, dieback of distal branches and resin exudation associated with bark lesions were observed in an adult Cupressus macrocarpa tree in Sonoma County, California (Glenn Ellen; 38°21'N, 122°31'W, elevation 233 m). The fungal pathogen, Seiridium unicorne (Cooke and Ellis) Sutton, was obtained by plating fragments of necrotic bark from the margins of branch cankers on potato dextrose agar (PDA). Identification was based on cultural, morphological, and molecular traits (2,3). Colonies on PDA were dense, cottony, off-white at first and then turning pale gray-green, and 2.3 and 4.3 cm in diameter after 1 and 2 weeks of growth at 20°C, respectively. Colonies of the fungus showed a faster radial growth at 20°C than at 25°C. Acervuli were abundantly produced on water agar amended with autoclaved cypress seeds after 2 to 3 weeks at 18°C under a mixture of fluorescent and near UV light. Conidia were six celled (five euseptate), fusiform, 20.9 to 35.2 × 7.11 to 10.57 μm, straight or slightly curved, with four, brown median cells, and with end cells bearing unbranched appendages 2 to 5 μm long. The DNA sequence of a portion of the β-tubulin locus (GenBank Accession No. HQ678171) revealed a 100% homology with sequences of S. unicorne isolates from Portugal and South Africa, while being clearly distinct from sequences of S. cupressi and S. cardinale isolates (2). Greenhouse stem inoculations were performed by underbark placement of a 3-mm plug taken from the margins of a colony of the fungus grown on PDA. Inoculations were repeated twice in the spring and fall of 2010 on 10 C. macrocarpa saplings grown in pots for 3 years. Three months postinoculation, the pathogen could be successfully reisolated from the edges of 15 to 30 mm long elliptical lesions, present on each one of the inoculated saplings. The observed S. unicorne isolate is atypical because of its shorter appendages compared with the form reported in the literature (2,3). Because of its shorter conidial appendages and in vitro temperature optimum of 18 to 20°C, the fungus described here is similar to an unnamed Coryneum sp. observed by Wagener on C. macrocarpa (4). S. unicorne is a pathogen of many Cupressaceae in Africa, New Zealand, Japan, and some U.S. states (Georgia, South Carolina, Kansas, and Texas) (3), and although it was mentioned in a USDA Plant Quarantine Division report from 1963 as found on cypress in San Francisco (1), it has never been officially reported from California. Since similar disease symptoms were observed on many Cupressaceae in the course of an extensive survey performed in 2009 in California, it may be important to evaluate the relative incidence of S. unicorne compared with that of S. cardinale, a pathogen more commonly reported in association with the disease (4). References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/fungushost/fungushost.cfm , 1/19/2011. (2) P. Krokene et al. Mycologia 96:1352, 2004. (3) N. A. Tisserat. Plant Dis. 75:138, 1991. (4) W. W. Wagener. J. Agric. Res. 58:1, 1939.",2011-05-01 +30731967,First Report of Flyspeck Caused by Zygophiala wisconsinensis on Sweet Persimmon Fruit in Korea.,"Sweet persimmon (Diospyros kaki L.), a fruit tree in the Ebenaceae, is cultivated widely in Korea and Japan, the leading producers worldwide (2). Sweet persimmon fruit with flyspeck symptoms were collected from orchards in the Jinju area of Korea in November 2010. The fruit had fungal clusters of black, round to ovoid, sclerotium-like fungal bodies with no visible evidence of a mycelial mat. Orchard inspections revealed that disease incidence ranged from 10 to 20% in the surveyed area (approximately 10 ha) in 2010. Flyspeck symptoms were observed on immature and mature fruit. Sweet persimmon fruit peels with flyspeck symptoms were removed, dried, and individual speck lesions transferred to potato dextrose agar (PDA) and cultured at 22°C in the dark. Fungal isolates were obtained from flyspeck colonies on 10 sweet persimmon fruit harvested from each of three orchards. Fungal isolates that grew from the lesions were identified based on a previous description (1). To confirm identity of the causal fungus, the complete internal transcribed spacer (ITS) rDNA sequence of a representative isolate was amplified and sequenced using primers ITS1 and ITS4 (4). The resulting 552-bp sequence was deposited in GenBank (Accession No. HQ698923). Comparison with ITS rDNA sequences showed 100% similarity with a sequence of Zygophiala wisconsinensis Batzer & Crous (GenBank Accession No. AY598855), which infects apple. To fulfill Koch's postulates, mature, intact sweet persimmon fruit were surface sterilized with 70% ethanol and dried. Three fungal isolates from this study were grown on PDA for 1 month. A colonized agar disc (5 mm in diameter) of each isolate was cut from the advancing margin of a colony with a sterilized cork borer, transferred to a 1.5-ml Eppendorf tube, and ground into a suspension of mycelial fragments and conidia in a blender with 1 ml of sterile, distilled water. The inoculum of each isolate was applied by swabbing a sweet persimmon fruit with the suspension. Three sweet persimmon fruit were inoculated per isolate. Three fruit were inoculated similarly with sterile, distilled water as the control treatment. After 1 month of incubation in a moist chamber at 22°C, the same fungal fruiting symptoms were reproduced as observed in the orchards, and the fungus was reisolated from these symptoms, but not from the control fruit, which were asymptomatic. On the basis of morphological characteristics of the fungal colonies, ITS sequence, and pathogenicity to persimmon fruit, the fungus was identified as Z. wisconsinensis (1). Flyspeck is readily isolated from sweet persimmon fruit in Korea and other sweet persimmon growing regions (3). The exposure of fruit to unusual weather conditions in Korea in recent years, including drought, and low-temperature and low-light situations in late spring, which are favorable for flyspeck, might be associated with an increase in occurrence of flyspeck on sweet persimmon fruit in Korea. To our knowledge, this is the first report of Z. wisconsinensis causing flyspeck on sweet persimmon in Korea. References: (1) J. C. Batzer et al. Mycologia 100:246, 2008. (2) FAOSTAT Database. Retrieved from http://faostat.fao.org/ , 2008. (3) H. Nasu and H. Kunoh. Plant Dis. 71:361, 1987. (4) T. J. White et al. Page 315 in: PCR Protocols: A Guide to Methods and Applications. M. A. Innis et al., eds. Academic Press, Inc., New York, 1990.",2011-05-01 +21569736,[Screening program on novel drug resistance mutations of subtype B' in human immunodeficiency virus type 1 in China].,"

Objective

To screen the level of novel drug resistance mutations in subtype B' in China.

Methods

451 pol sequences collected from the previous study, which including 354 AIDS patients who had received antiretroviral treatment (ART) and 97 the untreated patients. Entire protease gene (codons 1 - 99) and full-length reverse transcriptase gene (codons 1 - 560) were included. Variation of mutations between the treated and the untreated patients with consensus/ancestral sequences were compared and the mutations with higher frequencies in the treated patients than in the untreated patients were screened before submitting the mutations to the Stanford HIV Drug Resistance Database (SHDB) (http: //hivdb.stanford.edu/). Relation between the mutations and resistance preliminarily was then analyzed, according to the information including SHDB.

Results

Frequencies of 7 mutations at 6 positions, D123E, V292I, K366R, T369A, T369V, A371V and I375V, 2 at DNA polymerase domain and 5 at connection domain of reverse transcriptase (RT) were higher in the treated patients than in the untreated patients. The information of 7 mutations including the SHDB showed that 7 mutations were major variants at corresponding positions, and theirs frequencies were higher in the treated patients using some drugs, than in the untreated patients.

Conclusion

7 mutations being screened from the China subtype B were possibly associated with the resistance, which called for the construction of mutated viruses by site-directed mutagenesis to identify their effects on the susceptivity of different drugs.",2011-05-01 +22536903,Predicting folding pathways between RNA conformational structures guided by RNA stacks.,"

Background

Accurately predicting low energy barrier folding pathways between conformational secondary structures of an RNA molecule can provide valuable information for understanding its catalytic and regulatory functions. Most existing heuristic algorithms guide the construction of folding pathways by free energies of intermediate structures in the next move during the folding. However due to the size and ruggedness of RNA energy landscape, energy-guided search can become trapped in local optima.

Results

In this paper, we propose an algorithm that guides the construction of folding pathways through the formation and destruction of RNA stacks. Guiding the construction of folding pathways by coarse grained movements of RNA stacks can help reduce the search space and make it easier to jump out of local optima. RNAEAPath is able to find lower energy barrier folding pathways between secondary structures of conformational switches and outperforms the existing heuristic algorithms in most test cases.

Conclusions

RNAEAPath provides an alternate approach for predicting low-barrier folding pathways between RNA conformational secondary structures. The source code of RNAEAPath and the test data sets are available at http://genome.ucf.edu/RNAEAPath.",2012-03-21 +22784445,Relapsing catastrophic antiphospholipid syndrome potential role of microangiopathic hemolytic anemia in disease relapses.,"

Objective

To analyze the clinical and laboratory characteristics of patients with catastrophic antiphospholipid syndrome (APS) who suffer relapses.

Methods

We analyzed the Web site--based international registry of patients with catastrophic APS (""CAPS Registry"") http://infmed.fcrb.es/es/web/caps and selected those cases that relapsed.

Results

Relapses were reported in 9 of 282 (3.2%) patients with catastrophic APS. A total of 35 episodes of catastrophic APS were found: 6 patients presented 2 relapses, 2 patients suffered 3 relapses, and 1 patient developed 17 relapses. However, the last patient was not included in the statistical analysis because his clinical and immunologic characteristics were not fully described. Therefore, a total of 18 episodes were analyzed. In 9 (50%) episodes, a precipitating factor was identified. The most frequent precipitating factor, found in 5 (28%) episodes, was infection. Brain, kidney, heart, and lung were the most common organs involved. Laboratory features of microangiopathic hemolytic anemia (MHA) were present in 13 of 18 (72%) episodes (definitive in 9, corresponding to 4 patients, and probable in 4, corresponding to 2 patients). Three relapses did not present with features of MHA and in the remaining 2 these data were not reported. The mortality rate was 38%.

Conclusions

Although relapses are rare in patients with catastrophic APS, these results support the hypothesis that an association between MHA and relapsing of catastrophic APS could be present.",2012-07-10 +22780965,ngLOC: software and web server for predicting protein subcellular localization in prokaryotes and eukaryotes.,"

Background

Understanding protein subcellular localization is a necessary component toward understanding the overall function of a protein. Numerous computational methods have been published over the past decade, with varying degrees of success. Despite the large number of published methods in this area, only a small fraction of them are available for researchers to use in their own studies. Of those that are available, many are limited by predicting only a small number of organelles in the cell. Additionally, the majority of methods predict only a single location for a sequence, even though it is known that a large fraction of the proteins in eukaryotic species shuttle between locations to carry out their function.

Findings

We present a software package and a web server for predicting the subcellular localization of protein sequences based on the ngLOC method. ngLOC is an n-gram-based Bayesian classifier that predicts subcellular localization of proteins both in prokaryotes and eukaryotes. The overall prediction accuracy varies from 89.8% to 91.4% across species. This program can predict 11 distinct locations each in plant and animal species. ngLOC also predicts 4 and 5 distinct locations on gram-positive and gram-negative bacterial datasets, respectively.

Conclusions

ngLOC is a generic method that can be trained by data from a variety of species or classes for predicting protein subcellular localization. The standalone software is freely available for academic use under GNU GPL, and the ngLOC web server is also accessible at http://ngloc.unmc.edu.",2012-07-10 +21438504,"Densities and apparent molar volumes of atmospherically important electrolyte solutions. 1. The solutes H2SO4, HNO3, HCl, Na2SO4, NaNO3, NaCl, (NH4)2SO4, NH4NO3, and NH4Cl from 0 to 50 °C, including extrapolations to very low temperature and to the pure liquid state, and NaHSO4, NaOH, and NH3 at 25 °C.","Calculations of the size and density of atmospheric aerosols are complicated by the fact that they can exist at concentrations highly supersaturated with respect to dissolved salts and supercooled with respect to ice. Densities and apparent molar volumes of solutes in aqueous solutions containing the solutes H(2)SO(4), HNO(3), HCl, Na(2)SO(4), NaNO(3), NaCl, (NH(4))(2)SO(4), NH(4)NO(3), and NH(4)Cl have been critically evaluated and represented using fitted equations from 0 to 50 °C or greater and from infinite dilution to concentrations saturated or supersaturated with respect to the dissolved salts. Using extrapolated densities of high-temperature solutions and melts, the relationship between density and concentration is extended to the hypothetical pure liquid solutes. Above a given reference concentration of a few mol kg(-1), it is observed that density increases almost linearly with decreasing temperature, and comparisons with available data below 0 °C suggest that the fitted equations for density can be extrapolated to very low temperatures. As concentration is decreased below the reference concentration, the variation of density with temperature tends to that of water (which decreases as temperature is reduced below 3.98 °C). In this region below the reference concentration, and below 0 °C, densities are calculated using extrapolated apparent molar volumes which are constrained to agree at the reference concentrations with an equation for the directly fitted density. Calculated volume properties agree well with available data at low temperatures, for both concentrated and dilute solutions. Comparisons are made with literature data for temperatures of maximum density. Apparent molar volumes at infinite dilution are consistent, on a single ion basis, to better than ±0.1 cm(3) mol(-1) from 0 to 50 °C. Volume properties of aqueous NaHSO(4), NaOH, and NH(3) have also been evaluated, at 25 °C only. In part 2 of this work (ref 1 ) an ion interaction (Pitzer) model has been used to calculate apparent molar volumes of H(2)SO(4) in 0-3 mol kg(-1) aqueous solutions of the pure acid and to represent directly the effect of the HSO(4)(-) ↔ H(+) + SO(4)(2-) reaction. The results are incorporated into the treatment of aqueous H(2)SO(4) density described here. Densities and apparent molar volumes from -20 to 50 °C, and from 0 to 100 wt % of solute, are tabulated for the electrolytes listed in the title and have also been incorporated into the extended aerosol inorganics model (E-AIM, http://www.aim.env.uea.ac.uk/aim/aim.php) together with densities of the solid salts and hydrates.",2011-03-25 +21851591,PARalyzer: definition of RNA binding sites from PAR-CLIP short-read sequence data.,"Crosslinking and immunoprecipitation (CLIP) protocols have made it possible to identify transcriptome-wide RNA-protein interaction sites. In particular, PAR-CLIP utilizes a photoactivatable nucleoside for more efficient crosslinking. We present an approach, centered on the novel PARalyzer tool, for mapping high-confidence sites from PAR-CLIP deep-sequencing data. We show that PARalyzer delineates sites with a high signal-to-noise ratio. Motif finding identifies the sequence preferences of RNA-binding proteins, as well as seed-matches for highly expressed microRNAs when profiling Argonaute proteins. Our study describes tailored analytical methods and provides guidelines for future efforts to utilize high-throughput sequencing in RNA biology. PARalyzer is available at http://www.genome.duke.edu/labs/ohler/research/PARalyzer/.",2011-08-18 +21528353,Genetic variants of 6q25 and breast cancer susceptibility: a two-stage fine mapping study in a Chinese population.,"A recent genome-wide association study identified a novel single nucleotide polymorphism (SNP), rs2046210, in the 6q25 region as a breast cancer susceptibility locus in Chinese and subsequently replicated in a multicenter study. Further fine-mapping of this region may help identify the potential causative SNPs of breast cancer. We employed a block-based fine mapping analysis to investigate the tagging SNPs in a 41 kb block with the marker-SNP rs2046210 in the 6q25 region, and also extended our study by including two potentially functional SNPs (rs2234693 and rs1801132) within the ESR1 gene by a two-stage case-control study with 1,792 breast cancer cases and 1,867 controls (878 cases and 900 controls in the testing set and 914 cases and 967 controls in the validation set). Significant associations with breast cancer risk were observed for rs1038304, rs6929137, rs2046210, and rs10484919 in the 41 kb block of the 6q25 region in the testing set after controlling multiple testing. Together with the validation set samples, these four SNPs were all significantly associated with increased risk of breast cancer (additive OR from 1.25 to 1.34, additive P from 4.84 × 10(-6) to 7.17 × 10(-9)). After conditional regression and linkage disequilibrium analyses, rs6929137 and rs10484919 tend to be susceptible markers of breast cancer in this region and both of them were located at sites of histone modification according to the UCSC (http://genome.ucsc.edu/) genome database. Our results support that the 6q25 region is an important susceptibility region for breast cancer in Chinese women, and rs6929137 and rs10484919 are causative or marker SNPs for this region.",2011-04-28 +22057159,Gene Ontology-driven inference of protein-protein interactions using inducers.,"

Motivation

Protein-protein interactions (PPIs) are pivotal for many biological processes and similarity in Gene Ontology (GO) annotation has been found to be one of the strongest indicators for PPI. Most GO-driven algorithms for PPI inference combine machine learning and semantic similarity techniques. We introduce the concept of inducers as a method to integrate both approaches more effectively, leading to superior prediction accuracies.

Results

An inducer (ULCA) in combination with a Random Forest classifier compares favorably to several sequence-based methods, semantic similarity measures and multi-kernel approaches. On a newly created set of high-quality interaction data, the proposed method achieves high cross-species prediction accuracies (Area under the ROC curve ≤ 0.88), rendering it a valuable companion to sequence-based methods.

Availability

Software and datasets are available at http://bioinformatics.org.au/go2ppi/

Contact

m.ragan@uq.edu.au.",2011-11-04 +22057161,Detecting differential binding of transcription factors with ChIP-seq.,"

Summary

Increasing number of ChIP-seq experiments are investigating transcription factor binding under multiple experimental conditions, for example, various treatment conditions, several distinct time points and different treatment dosage levels. Hence, identifying differential binding sites across multiple conditions is of practical importance in biological and medical research. To this end, we have developed a powerful and flexible program, called DBChIP, to detect differentially bound sharp binding sites across multiple conditions, with or without matching control samples. By assigning uncertainty measure to the putative differential binding sites, DBChIP facilitates downstream analysis. DBChIP is implemented in R programming language and can work with a wide range of sequencing file formats.

Availability

R package DBChIP is available at http://pages.cs.wisc.edu/~kliang/DBChIP/ CONTACT: kliang@stat.wisc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-03 +21453324,V-REVCOMP: automated high-throughput detection of reverse complementary 16S rRNA gene sequences in large environmental and taxonomic datasets.,"Reverse complementary DNA sequences - sequences that are inadvertently given backwards with all purines and pyrimidines transposed - can affect sequence analysis detrimentally unless taken into account. We present an open-source, high-throughput software tool -v-revcomp (http://www.cmde.science.ubc.ca/mohn/software.html) - to detect and reorient reverse complementary entries of the small-subunit rRNA (16S) gene from sequencing datasets, particularly from environmental sources. The software supports sequence lengths ranging from full length down to the short reads that are characteristic of next-generation sequencing technologies. We evaluated the reliability of v-revcomp by screening all 406 781 16S sequences deposited in release 102 of the curated SILVA database and demonstrated that the tool has a detection accuracy of virtually 100%. We subsequently used v-revcomp to analyse 1 171 646 16S sequences deposited in the International Nucleotide Sequence Databases and found that about 1% of these user-submitted sequences were reverse complementary. In addition, a nontrivial proportion of the entries were otherwise anomalous, including reverse complementary chimeras, sequences associated with wrong taxa, nonribosomal genes, sequences of poor quality or otherwise erroneous sequences without a reasonable match to any other entry in the database. Thus, v-revcomp is highly efficient in detecting and reorienting reverse complementary 16S sequences of almost any length and can be used to detect various sequence anomalies.",2011-04-27 +21521667,Blind image quality assessment: from natural scene statistics to perceptual quality.,"Our approach to blind image quality assessment (IQA) is based on the hypothesis that natural scenes possess certain statistical properties which are altered in the presence of distortion, rendering them un-natural; and that by characterizing this un-naturalness using scene statistics, one can identify the distortion afflicting the image and perform no-reference (NR) IQA. Based on this theory, we propose an (NR)/blind algorithm-the Distortion Identification-based Image Verity and INtegrity Evaluation (DIIVINE) index-that assesses the quality of a distorted image without need for a reference image. DIIVINE is based on a 2-stage framework involving distortion identification followed by distortion-specific quality assessment. DIIVINE is capable of assessing the quality of a distorted image across multiple distortion categories, as against most NR IQA algorithms that are distortion-specific in nature. DIIVINE is based on natural scene statistics which govern the behavior of natural images. In this paper, we detail the principles underlying DIIVINE, the statistical features extracted and their relevance to perception and thoroughly evaluate the algorithm on the popular LIVE IQA database. Further, we compare the performance of DIIVINE against leading full-reference (FR) IQA algorithms and demonstrate that DIIVINE is statistically superior to the often used measure of peak signal-to-noise ratio (PSNR) and statistically equivalent to the popular structural similarity index (SSIM). A software release of DIIVINE has been made available online: ""http://live.ece.utexas.edu/research/quality/DIIVINE_release.zip"" xmlns:xlink=""http://www.w3.org/1999/xlink"">http://live.ece.utexas.edu/research/quality/DIIVINE_release.zip for public use and evaluation.",2011-04-25 +22332239,SBAL: a practical tool to generate and edit structure-based amino acid sequence alignments.,"

Summary

Both alignment generation and visualization are important processes for producing biologically meaningful sequence alignments. Computational tools that combine reliable, automated and semi-automated approaches to produce secondary structure-based alignments with an appropriate visualization of the results are rare. We have developed SBAL, a tool to generate and edit secondary structure-based sequence alignments. It is easy to install and provides a user-friendly interface. Sequence alignments are displayed, with secondary structure assignments mapped to their corresponding regions in the sequence by using a simple colour scheme. The algorithm implemented for automated and semi-automated secondary structure-based alignment calculations shows a comparable performance to existing software.

Availability and implementation

SBAL has been implemented in Java to provide cross-platform compatibility. SBAL is freely available to academic users at http://www.structuralchemistry.org/pcsb/. Users will be asked for their name, institution and email address. A manual can also be downloaded from this site. The software, manual and test sets are also available as supplementary material.

Contact

conan.wang@griffith.edu.au

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-12 +21526128,vProtein: identifying optimal amino acid complements from plant-based foods.,"

Background

Indispensible amino acids (IAAs) are used by the body in different proportions. Most animal-based foods provide these IAAs in roughly the needed proportions, but many plant-based foods provide different proportions of IAAs. To explore how these plant-based foods can be better used in human nutrition, we have created the computational tool vProtein to identify optimal food complements to satisfy human protein needs.

Methods

vProtein uses 1251 plant-based foods listed in the United States Department of Agriculture standard release 22 database to determine the quantity of each food or pair of foods required to satisfy human IAA needs as determined by the 2005 daily recommended intake. The quantity of food in a pair is found using a linear programming approach that minimizes total calories, total excess IAAs, or the total weight of the combination.

Results

For single foods, vProtein identifies foods with particularly balanced IAA patterns such as wheat germ, quinoa, and cauliflower. vProtein also identifies foods with particularly unbalanced IAA patterns such as macadamia nuts, degermed corn products, and wakame seaweed. Although less useful alone, some unbalanced foods provide unusually good complements, such as Brazil nuts to legumes. Interestingly, vProtein finds no statistically significant bias toward grain/legume pairings for protein complementation. These analyses suggest that pairings of plant-based foods should be based on the individual foods themselves instead of based on broader food group-food group pairings. Overall, the most efficient pairings include sweet corn/tomatoes, apple/coconut, and sweet corn/cherry. The top pairings also highlight the utility of less common protein sources such as the seaweeds laver and spirulina, pumpkin leaves, and lambsquarters. From a public health perspective, many of the food pairings represent novel, low cost food sources to combat malnutrition. Full analysis results are available online at http://www.foodwiki.com/vprotein.",2011-04-22 +22576365,QGRS-H Predictor: a web server for predicting homologous quadruplex forming G-rich sequence motifs in nucleotide sequences.,"Naturally occurring G-quadruplex structural motifs, formed by guanine-rich nucleic acids, have been reported in telomeric, promoter and transcribed regions of mammalian genomes. G-quadruplex structures have received significant attention because of growing evidence for their role in important biological processes, human disease and as therapeutic targets. Lately, there has been much interest in the potential roles of RNA G-quadruplexes as cis-regulatory elements of post-transcriptional gene expression. Large-scale computational genomics studies on G-quadruplexes have difficulty validating their predictions without laborious testing in 'wet' labs. We have developed a bioinformatics tool, QGRS-H Predictor that can map and analyze conserved putative Quadruplex forming 'G'-Rich Sequences (QGRS) in mRNAs, ncRNAs and other nucleotide sequences, e.g. promoter, telomeric and gene flanking regions. Identifying conserved regulatory motifs helps validate computations and enhances accuracy of predictions. The QGRS-H Predictor is particularly useful for mapping homologous G-quadruplex forming sequences as cis-regulatory elements in the context of 5'- and 3'-untranslated regions, and CDS sections of aligned mRNA sequences. QGRS-H Predictor features highly interactive graphic representation of the data. It is a unique and user-friendly application that provides many options for defining and studying G-quadruplexes. The QGRS-H Predictor can be freely accessed at: http://quadruplex.ramapo.edu/qgrs/app/start.",2012-05-10 +22316088,Can racial disparities in optimal gout treatment be reduced? Evidence from a randomized trial.,"There is a disproportionate burden of gout in African-Americans in the U.S. due to a higher disease prevalence and lower likelihood of receiving urate-lowering therapy (ULT), compared to Caucasians. There is an absence of strong data as to whether the response to ULT differs by race/ethnicity. BMC Musculoskeletal Disorders recently published a secondary analyses of the CONFIRMS trial, a large randomized controlled, double-blind trial of 2,269 gout patients. The authors reported that the likelihood of achieving the primary study efficacy end-point of achieving serum urate<6 mg/dl was similar between African-Americans and Caucasians, for all three treatment arms (Febuxostat 40 mg and 80 mg and allopurinol 300/200 mg). More importantly, rates were similar in subgroups of patients with mild or moderate renal insufficiency. Adverse event rates were similar, as were the rates of gout flares. These findings constitute a convincing evidence to pursue aggressive ULT in gout patients, regardless of race/ethnicity. This approach will likely help to narrow the documented racial disparities in gout care. Please see related article: http://www.biomedcentral.com/1471-2474/13/15.",2012-02-09 +23166753,An integrative computational framework based on a two-step random forest algorithm improves prediction of zinc-binding sites in proteins.,"Zinc-binding proteins are the most abundant metalloproteins in the Protein Data Bank where the zinc ions usually have catalytic, regulatory or structural roles critical for the function of the protein. Accurate prediction of zinc-binding sites is not only useful for the inference of protein function but also important for the prediction of 3D structure. Here, we present a new integrative framework that combines multiple sequence and structural properties and graph-theoretic network features, followed by an efficient feature selection to improve prediction of zinc-binding sites. We investigate what information can be retrieved from the sequence, structure and network levels that is relevant to zinc-binding site prediction. We perform a two-step feature selection using random forest to remove redundant features and quantify the relative importance of the retrieved features. Benchmarking on a high-quality structural dataset containing 1,103 protein chains and 484 zinc-binding residues, our method achieved >80% recall at a precision of 75% for the zinc-binding residues Cys, His, Glu and Asp on 5-fold cross-validation tests, which is a 10%-28% higher recall at the 75% equal precision compared to SitePredict and zincfinder at residue level using the same dataset. The independent test also indicates that our method has achieved recall of 0.790 and 0.759 at residue and protein levels, respectively, which is a performance better than the other two methods. Moreover, AUC (the Area Under the Curve) and AURPC (the Area Under the Recall-Precision Curve) by our method are also respectively better than those of the other two methods. Our method can not only be applied to large-scale identification of zinc-binding sites when structural information of the target is available, but also give valuable insights into important features arising from different levels that collectively characterize the zinc-binding sites. The scripts and datasets are available at http://protein.cau.edu.cn/zincidentifier/.",2012-11-14 +21533164,Evidence for reductive genome evolution and lateral acquisition of virulence functions in two Corynebacterium pseudotuberculosis strains.,"

Background

Corynebacterium pseudotuberculosis, a gram-positive, facultative intracellular pathogen, is the etiologic agent of the disease known as caseous lymphadenitis (CL). CL mainly affects small ruminants, such as goats and sheep; it also causes infections in humans, though rarely. This species is distributed worldwide, but it has the most serious economic impact in Oceania, Africa and South America. Although C. pseudotuberculosis causes major health and productivity problems for livestock, little is known about the molecular basis of its pathogenicity.

Methodology and findings

We characterized two C. pseudotuberculosis genomes (Cp1002, isolated from goats; and CpC231, isolated from sheep). Analysis of the predicted genomes showed high similarity in genomic architecture, gene content and genetic order. When C. pseudotuberculosis was compared with other Corynebacterium species, it became evident that this pathogenic species has lost numerous genes, resulting in one of the smallest genomes in the genus. Other differences that could be part of the adaptation to pathogenicity include a lower GC content, of about 52%, and a reduced gene repertoire. The C. pseudotuberculosis genome also includes seven putative pathogenicity islands, which contain several classical virulence factors, including genes for fimbrial subunits, adhesion factors, iron uptake and secreted toxins. Additionally, all of the virulence factors in the islands have characteristics that indicate horizontal transfer.

Conclusions

These particular genome characteristics of C. pseudotuberculosis, as well as its acquired virulence factors in pathogenicity islands, provide evidence of its lifestyle and of the pathogenicity pathways used by this pathogen in the infection process. All genomes cited in this study are available in the NCBI Genbank database (http://www.ncbi.nlm.nih.gov/genbank/) under accession numbers CP001809 and CP001829.",2011-04-18 +21789182,REVIGO summarizes and visualizes long lists of gene ontology terms.,"Outcomes of high-throughput biological experiments are typically interpreted by statistical testing for enriched gene functional categories defined by the Gene Ontology (GO). The resulting lists of GO terms may be large and highly redundant, and thus difficult to interpret.REVIGO is a Web server that summarizes long, unintelligible lists of GO terms by finding a representative subset of the terms using a simple clustering algorithm that relies on semantic similarity measures. Furthermore, REVIGO visualizes this non-redundant GO term set in multiple ways to assist in interpretation: multidimensional scaling and graph-based visualizations accurately render the subdivisions and the semantic relationships in the data, while treemaps and tag clouds are also offered as alternative views. REVIGO is freely available at http://revigo.irb.hr/.",2011-07-18 +21819139,Learning to predict chemical reactions.,"Being able to predict the course of arbitrary chemical reactions is essential to the theory and applications of organic chemistry. Approaches to the reaction prediction problems can be organized around three poles corresponding to: (1) physical laws; (2) rule-based expert systems; and (3) inductive machine learning. Previous approaches at these poles, respectively, are not high throughput, are not generalizable or scalable, and lack sufficient data and structure to be implemented. We propose a new approach to reaction prediction utilizing elements from each pole. Using a physically inspired conceptualization, we describe single mechanistic reactions as interactions between coarse approximations of molecular orbitals (MOs) and use topological and physicochemical attributes as descriptors. Using an existing rule-based system (Reaction Explorer), we derive a restricted chemistry data set consisting of 1630 full multistep reactions with 2358 distinct starting materials and intermediates, associated with 2989 productive mechanistic steps and 6.14 million unproductive mechanistic steps. And from machine learning, we pose identifying productive mechanistic steps as a statistical ranking, information retrieval problem: given a set of reactants and a description of conditions, learn a ranking model over potential filled-to-unfilled MO interactions such that the top-ranked mechanistic steps yield the major products. The machine learning implementation follows a two-stage approach, in which we first train atom level reactivity filters to prune 94.00% of nonproductive reactions with a 0.01% error rate. Then, we train an ensemble of ranking models on pairs of interacting MOs to learn a relative productivity function over mechanistic steps in a given system. Without the use of explicit transformation patterns, the ensemble perfectly ranks the productive mechanism at the top 89.05% of the time, rising to 99.86% of the time when the top four are considered. Furthermore, the system is generalizable, making reasonable predictions over reactants and conditions which the rule-based expert does not handle. A web interface to the machine learning based mechanistic reaction predictor is accessible through our chemoinformatics portal ( http://cdb.ics.uci.edu) under the Toolkits section.",2011-09-02 +22574113,DNA barcode goes two-dimensions: DNA QR code web server.,"The DNA barcoding technology uses a standard region of DNA sequence for species identification and discovery. At present, ""DNA barcode"" actually refers to DNA sequences, which are not amenable to information storage, recognition, and retrieval. Our aim is to identify the best symbology that can represent DNA barcode sequences in practical applications. A comprehensive set of sequences for five DNA barcode markers ITS2, rbcL, matK, psbA-trnH, and CO1 was used as the test data. Fifty-three different types of one-dimensional and ten two-dimensional barcode symbologies were compared based on different criteria, such as coding capacity, compression efficiency, and error detection ability. The quick response (QR) code was found to have the largest coding capacity and relatively high compression ratio. To facilitate the further usage of QR code-based DNA barcodes, a web server was developed and is accessible at http://qrfordna.dnsalias.org. The web server allows users to retrieve the QR code for a species of interests, convert a DNA sequence to and from a QR code, and perform species identification based on local and global sequence similarities. In summary, the first comprehensive evaluation of various barcode symbologies has been carried out. The QR code has been found to be the most appropriate symbology for DNA barcode sequences. A web server has also been constructed to allow biologists to utilize QR codes in practical DNA barcoding applications.",2012-05-04 +22903279,Different roles of GNAS and cAMP signaling during early and late stages of osteogenic differentiation.,"Progressive osseous heteroplasia (POH) and fibrous dysplasia (FD) are genetic diseases of bone formation at opposite ends of the osteogenic spectrum: imperfect osteogenesis of the skeleton occurs in FD, while heterotopic ossification in skin, subcutaneous fat, and skeletal muscle forms in POH. POH is caused by heterozygous inactivating germline mutations in GNAS, which encodes G-protein subunits regulating the cAMP pathway, while FD is caused by GNAS somatic activating mutations. We used pluripotent mouse ES cells to examine the effects of Gnas dysregulation on osteoblast differentiation. At the earliest stages of osteogenesis, Gnas transcripts Gsα, XLαs and 1A are expressed at low levels and cAMP levels are also low. Inhibition of cAMP signaling (as in POH) by 2',5'-dideoxyadenosine enhanced osteoblast differentiation while conversely, increased cAMP signaling (as in FD), induced by forskolin, inhibited osteoblast differentiation. Notably, increased cAMP was inhibitory for osteogenesis only at early stages after osteogenic induction. Expression of osteogenic and adipogenic markers showed that increased cAMP enhanced adipogenesis and impaired osteoblast differentiation even in the presence of osteogenic factors, supporting cAMP as a critical regulator of osteoblast and adipocyte lineage commitment. Furthermore, increased cAMP signaling decreased BMP pathway signaling, indicating that G protein-cAMP pathway activation (as in FD) inhibits osteoblast differentiation, at least in part by blocking the BMP-Smad pathway, and suggesting that GNAS inactivation as occurs in POH enhances osteoblast differentiation, at least in part by stimulating BMP signaling. These data support that differences in cAMP levels during early stages of cell differentiation regulate cell fate decisions. Supporting information available online at http:/www.thieme-connect.de/ejournals/toc/hmr.",2012-08-17 +23249606,Mining biomarker information in biomedical literature.,"

Background

For selection and evaluation of potential biomarkers, inclusion of already published information is of utmost importance. In spite of significant advancements in text- and data-mining techniques, the vast knowledge space of biomarkers in biomedical text has remained unexplored. Existing named entity recognition approaches are not sufficiently selective for the retrieval of biomarker information from the literature. The purpose of this study was to identify textual features that enhance the effectiveness of biomarker information retrieval for different indication areas and diverse end user perspectives.

Methods

A biomarker terminology was created and further organized into six concept classes. Performance of this terminology was optimized towards balanced selectivity and specificity. The information retrieval performance using the biomarker terminology was evaluated based on various combinations of the terminology's six classes. Further validation of these results was performed on two independent corpora representing two different neurodegenerative diseases.

Results

The current state of the biomarker terminology contains 119 entity classes supported by 1890 different synonyms. The result of information retrieval shows improved retrieval rate of informative abstracts, which is achieved by including clinical management terms and evidence of gene/protein alterations (e.g. gene/protein expression status or certain polymorphisms) in combination with disease and gene name recognition. When additional filtering through other classes (e.g. diagnostic or prognostic methods) is applied, the typical high number of unspecific search results is significantly reduced. The evaluation results suggest that this approach enables the automated identification of biomarker information in the literature. A demo version of the search engine SCAIView, including the biomarker retrieval, is made available to the public through http://www.scaiview.com/scaiview-academia.html.

Conclusions

The approach presented in this paper demonstrates that using a dedicated biomarker terminology for automated analysis of the scientific literature maybe helpful as an aid to finding biomarker information in text. Successful extraction of candidate biomarkers information from published resources can be considered as the first step towards developing novel hypotheses. These hypotheses will be valuable for the early decision-making in the drug discovery and development process.",2012-12-18 +21498547,The Laccase Engineering Database: a classification and analysis system for laccases and related multicopper oxidases.,"Laccases and their homologues form the protein superfamily of multicopper oxidases (MCO). They catalyze the oxidation of many, particularly phenolic substances, and, besides playing an important role in many cellular activities, are of interest in biotechnological applications. The Laccase Engineering Database (LccED, http://www.lcced.uni-stuttgart.de) was designed to serve as a tool for a systematic sequence-based classification and analysis of the diverse multicopper oxidase protein family. More than 2200 proteins were classified into 11 superfamilies and 56 homologous families. For each family, the LccED provides multiple sequence alignments, phylogenetic trees and family-specific HMM profiles. The integration of structures for 14 different proteins allows a comprehensive comparison of sequences and structures to derive biochemical properties. Among the families, the distribution of the proteins regarding different kingdoms was investigated. The database was applied to perform a comprehensive analysis by MCO- and laccase-specific patterns. The LccED combines information of sequences and structures of MCOs. It serves as a classification tool to assign new proteins to a homologous family and can be applied to investigate sequence-structure-function relationship and to guide protein engineering. Database URL: http://www.lcced.uni-stuttgart.de.",2011-04-15 +21495663,TIN-a combinatorial compound collection of synthetically feasible multicomponent synthesis products.,"The synthetic feasibility of any compound library used for virtual screening is critical to the drug discovery process. TIN, a recursive acronym for 'TIN Is Not commercial', is a virtual combinatorial database enumeration of diversity-orientated multicomponent syntheses (MCR). Using a 'one-pot' synthetic technique, 12 unique small molecule scaffolds were developed, predominantly styrylisoxazoles and bis-acetylenic ketones, with extensive derivatization potential. Importantly, the scaffolds were accessible in a single operation from commercially available sources containing R-groups which were then linked combinatorially. This resulted in a combinatorial database of over 28 million product structures, each of which is synthetically feasible. These structures can be accessed through a free Web-based 2D structure search engine or downloaded in SMILES, MOL2, and SDF formats. Subsets include a 10% diversity subset, a drug-like subset, and a lead-like subset that are also freely available for download and virtual screening ( http://mmg.rcsi.ie:8080/tin ).",2011-04-15 +21264334,PoolHap: inferring haplotype frequencies from pooled samples by next generation sequencing.,"With the advance of next-generation sequencing (NGS) technologies, increasingly ambitious applications are becoming feasible. A particularly powerful one is the sequencing of polymorphic, pooled samples. The pool can be naturally occurring, as in the case of multiple pathogen strains in a blood sample, multiple types of cells in a cancerous tissue sample, or multiple isoforms of mRNA in a cell. In these cases, it's difficult or impossible to partition the subtypes experimentally before sequencing, and those subtype frequencies must hence be inferred. In addition, investigators may occasionally want to artificially pool the sample of a large number of individuals for reasons of cost-efficiency, e.g., when carrying out genetic mapping using bulked segregant analysis. Here we describe PoolHap, a computational tool for inferring haplotype frequencies from pooled samples when haplotypes are known. The key insight into why PoolHap works is that the large number of SNPs that come with genome-wide coverage can compensate for the uneven coverage across the genome. The performance of PoolHap is illustrated and discussed using simulated and real data. We show that PoolHap is able to accurately estimate the proportions of haplotypes with less than 2% error for 34-strain mixtures with 2X total coverage Arabidopsis thaliana whole genome polymorphism data. This method should facilitate greater biological insight into heterogeneous samples that are difficult or impossible to isolate experimentally. Software and users manual are freely available at http://arabidopsis.gmi.oeaw.ac.at/quan/poolhap/.",2011-01-05 +21498403,PathScan: a tool for discerning mutational significance in groups of putative cancer genes.,"

Motivation

The expansion of cancer genome sequencing continues to stimulate development of analytical tools for inferring relationships between somatic changes and tumor development. Pathway associations are especially consequential, but existing algorithms are demonstrably inadequate.

Methods

Here, we propose the PathScan significance test for the scenario where pathway mutations collectively contribute to tumor development. Its design addresses two aspects that established methods neglect. First, we account for variations in gene length and the consequent differences in their mutation probabilities under the standard null hypothesis of random mutation. The associated spike in computational effort is mitigated by accurate convolution-based approximation. Second, we combine individual probabilities into a multiple-sample value using Fisher-Lancaster theory, thereby improving differentiation between a few highly mutated genes and many genes having only a few mutations apiece. We investigate accuracy, computational effort and power, reporting acceptable performance for each.

Results

As an example calculation, we re-analyze KEGG-based lung adenocarcinoma pathway mutations from the Tumor Sequencing Project. Our test recapitulates the most significant pathways and finds that others for which the original test battery was inconclusive are not actually significant. It also identifies the focal adhesion pathway as being significantly mutated, a finding consistent with earlier studies. We also expand this analysis to other databases: Reactome, BioCarta, Pfam, PID and SMART, finding additional hits in ErbB and EPHA signaling pathways and regulation of telomerase. All have implications and plausible mechanistic roles in cancer. Finally, we discuss aspects of extending the method to integrate gene-specific background rates and other types of genetic anomalies.

Availability

PathScan is implemented in Perl and is available from the Genome Institute at: http://genome.wustl.edu/software/pathscan.",2011-04-14 +22207818,Cambrian archaeocyathan metazoans: revision of morphological characters and standardization of genus descriptions to establish an online identification tool.,"Archaeocyatha represent the oldest calcified sponges and the first metazoans to build bioconstructions in association with calcimicrobes. They are a key group in biology, evolutionary studies, biostratigraphy, paleoecology and paleogeography of the early Cambrian times. The establishing of a new standardized terminology for archaeocyathans description has permitted the creation of the first knowledge base in English including descriptions of all archaeocyathan genera. This base, using the XPER² software package, is an integral part of the -Archaeocyatha- a knowledge base website, freely available at url http://www.infosyslab.fr/archaeocyatha. The website is composed of common information about Archaeocyatha, general remarks about the knowledge base, the description of the 307 genera recognized with images of type-specimens of type-species for each genus, as well as additional morphological data, an interactive free access key and its user guide.The automatic analysis and comparison of the digitized descriptions have identified some genera with highly similar morphology. These results are a great help for future taxonomic revisions and suggest a number of possible synonymies that require further study.",2011-11-28 +21563225,TMKink: a method to predict transmembrane helix kinks.,"A hallmark of membrane protein structure is the large number of distorted transmembrane helices. Because of the prevalence of bends, it is important to not only understand how they are generated but also to learn how to predict their occurrence. Here, we find that there are local sequence preferences in kinked helices, most notably a higher abundance of proline, which can be exploited to identify bends from local sequence information. A neural network predictor identifies over two-thirds of all bends (sensitivity 0.70) with high reliability (specificity 0.89). It is likely that more structural data will allow for better helix distortion predictors with increased coverage in the future. The kink predictor, TMKink, is available at http://tmkinkpredictor.mbi.ucla.edu/.",2011-06-02 +23143026,Findings of the UK national audit evaluating image-guided or image-assisted liver biopsy. Part II. Minor and major complications and procedure-related mortality.,"

Purpose

To determine the frequency of complications and death following image-guided and/or image-assisted liver biopsy and to identify significant variables associated with an increased risk of complications or death.

Materials and methods

Institutional review board approval for this type of study is not required in the United Kingdom. United Kingdom radiology departments with a department leader for audit registered with the Royal College of Radiologists were invited to participate. The first 50 consecutive patients who underwent liver biopsy in 2008 were included. Audit standards were developed for minor pain (<30%), severe pain (<3%), vasovagal hypotension (<3%), significant hemorrhage (<0.5%), hemobilia (<0.1%), puncture of another organ (<0.1%), and death (<0.1%). Organizational, clinical, and coagulation variables were investigated statistically for their association with complications and/or death.

Results

Data were obtained from 87 of 210 departments (41%). Audit standards were met for pain, hypotension, hemorrhage, hemobilia, and puncture of another organ. There were four hemorrhage-related deaths, and this target was narrowly missed (rate achieved in practice, 0.11% [four of 3486 patients]). Fifteen additional patients experienced at least one major complication. The international normalized ratio (INR) was absent in 3% of cases (97 of 2951 patients), the platelet count was absent in 1% (32 of 2986 patients), the INR was more than 1 week old in 8% (229 of 2888 patients), and the platelet count was more than 1 week old in 10% (291 of 2955 patients).

Conclusion

Results of this audit confirm that image-guided and image-assisted biopsy is performed safely in United Kingdom radiology departments, with complication rates within expected parameters. Preprocedural clotting assessment was inadequate in some cases and would merit repeat audit.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120224/-/DC1.",2012-11-09 +22496776,Fast and accurate taxonomic assignments of metagenomic sequences using MetaBin.,"Taxonomic assignment of sequence reads is a challenging task in metagenomic data analysis, for which the present methods mainly use either composition- or homology-based approaches. Though the homology-based methods are more sensitive and accurate, they suffer primarily due to the time needed to generate the Blast alignments. We developed the MetaBin program and web server for better homology-based taxonomic assignments using an ORF-based approach. By implementing Blat as the faster alignment method in place of Blastx, the analysis time has been reduced by severalfold. It is benchmarked using both simulated and real metagenomic datasets, and can be used for both single and paired-end sequence reads of varying lengths (≥45 bp). To our knowledge, MetaBin is the only available program that can be used for the taxonomic binning of short reads (<100 bp) with high accuracy and high sensitivity using a homology-based approach. The MetaBin web server can be used to carry out the taxonomic analysis, by either submitting reads or Blastx output. It provides several options including construction of taxonomic trees, creation of a composition chart, functional analysis using COGs, and comparative analysis of multiple metagenomic datasets. MetaBin web server and a standalone version for high-throughput analysis are available freely at http://metabin.riken.jp/.",2012-04-04 +21919860,RSARF: prediction of residue solvent accessibility from protein sequence using random forest method.,"Prediction of protein structure from its amino acid sequence is still a challenging problem. The complete physicochemical understanding of protein folding is essential for the accurate structure prediction. Knowledge of residue solvent accessibility gives useful insights into protein structure prediction and function prediction. In this work, we propose a random forest method, RSARF, to predict residue accessible surface area from protein sequence information. The training and testing was performed using 120 proteins containing 22006 residues. For each residue, buried and exposed state was computed using five thresholds (0%, 5%, 10%, 25%, and 50%). The prediction accuracy for 0%, 5%, 10%, 25%, and 50% thresholds are 72.9%, 78.25%, 78.12%, 77.57% and 72.07% respectively. Further, comparison of RSARF with other methods using a benchmark dataset containing 20 proteins shows that our approach is useful for prediction of residue solvent accessibility from protein sequence without using structural information. The RSARF program, datasets and supplementary data are available at http://caps.ncbs.res.in/download/pugal/RSARF/.",2012-01-01 +21491495,Structure-based identification of catalytic residues.,"The identification of catalytic residues is an essential step in functional characterization of enzymes. We present a purely structural approach to this problem, which is motivated by the difficulty of evolution-based methods to annotate structural genomics targets that have few or no homologs in the databases. Our approach combines a state-of-the-art support vector machine (SVM) classifier with novel structural features that augment structural clues by spatial averaging and Z scoring. Special attention is paid to the class imbalance problem that stems from the overwhelming number of non-catalytic residues in enzymes compared to catalytic residues. This problem is tackled by: (1) optimizing the classifier to maximize a performance criterion that considers both Type I and Type II errors in the classification of catalytic and non-catalytic residues; (2) under-sampling non-catalytic residues before SVM training; and (3) during SVM training, penalizing errors in learning catalytic residues more than errors in learning non-catalytic residues. Tested on four enzyme datasets, one specifically designed by us to mimic the structural genomics scenario and three previously evaluated datasets, our structure-based classifier is never inferior to similar structure-based classifiers and comparable to classifiers that use both structural and evolutionary features. In addition to the evaluation of the performance of catalytic residue identification, we also present detailed case studies on three proteins. This analysis suggests that many false positive predictions may correspond to binding sites and other functional residues. A web server that implements the method, our own-designed database, and the source code of the programs are publicly available at http://www.cs.bgu.ac.il/∼meshi/functionPrediction.",2011-04-12 +21491493,Integrating the intrinsic conformational preferences of noncoded α-amino acids modified at the peptide bond into the noncoded amino acids database.,"Recently, we reported a database (Noncoded Amino acids Database; http://recerca.upc.edu/imem/index.htm) that was built to compile information about the intrinsic conformational preferences of nonproteinogenic residues determined by quantum mechanical calculations, as well as bibliographic information about their synthesis, physical and spectroscopic characterization, the experimentally established conformational propensities, and applications (Revilla-López et al., J Phys Chem B 2010;114:7413-7422). The database initially contained the information available for α-tetrasubstituted α-amino acids. In this work, we extend NCAD to three families of compounds, which can be used to engineer peptides and proteins incorporating modifications at the--NHCO--peptide bond. Such families are: N-substituted α-amino acids, thio-α-amino acids, and diamines and diacids used to build retropeptides. The conformational preferences of these compounds have been analyzed and described based on the information captured in the database. In addition, we provide an example of the utility of the database and of the compounds it compiles in protein and peptide engineering. Specifically, the symmetry of a sequence engineered to stabilize the 3(10)-helix with respect to the α-helix has been broken without perturbing significantly the secondary structure through targeted replacements using the information contained in the database.",2011-04-12 +21486937,Cobweb: a Java applet for network exploration and visualisation.,"

Summary

Cobweb is a Java applet for real-time network visualization; its strength lies in enabling the interactive exploration of networks. Therefore, it allows new nodes to be interactively added to a network by querying a database on a server. The network constantly rearranges to provide the most meaningful topological view.

Availability

Cobweb is available under the GPLv3 and may be freely downloaded at http://bioinformatics.charite.de/cobweb.",2011-04-12 +23021410,An approach for diagnosing plasma cell myeloma by three-color flow cytometry based on kappa/lambda ratios of CD38-gated CD138(+) cells.,"

Background

World Health Organization (WHO) criteria are commonly used to diagnose plasma cell myeloma (PCM); however, these criteria are complex and require several laboratory parameters. For differentiating reactive plasmacytosis from clonal plasma cell (PC) neoplasms such as PCM, it is important to accurately determine the expression of cytoplasmic immunoglobulin light chains.

Methods

We retrospectively analyzed the records of 27 selected patients with PCM who underwent bone biopsies for confirmative diagnosis according to WHO criteria. Twenty-three controls were also investigated. In the present study, all the samples were analyzed using flow cytometry (FC) in the side scatter vs. CD38 histogram mode, and the CD38-gated PC population was identified. Bivariate histograms of CD138/kappa and CD138/lambda were assessed, and the ratios of dual-positive cells to the CD138(+) PC population were calculated. The kappa/lambda ratio was defined as the ratio of CD138/kappa to CD138/lambda.

Results

PCM cells were distinguished from normal PCs using cutoff levels between 0.76 and 1.5, at a sensitivity of 96.3% and specificity of 95.7%.

Conclusions

Three-color FC analysis is simple to perform and inexpensive, with clinically relevant data obtained soon after the completion of FC measurements. The detection of the cytoplasmic kappa/lambda ratio of CD38-gated CD138(+) PCs may be a useful tool in the diagnosis of PCM. To the best of our knowledge, this report represents the first diagnostic assessment of the cytoplasmic kappa/lambda ratio in CD38-gated CD138+ PCs using FC analysis. This method may help in more simple, efficient, rapid, and accurate diagnosis of PCM.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1568085959771735.",2012-09-28 +22068921,MEG-SIM: a web portal for testing MEG analysis methods using realistic simulated and empirical data.,"MEG and EEG measure electrophysiological activity in the brain with exquisite temporal resolution. Because of this unique strength relative to noninvasive hemodynamic-based measures (fMRI, PET), the complementary nature of hemodynamic and electrophysiological techniques is becoming more widely recognized (e.g., Human Connectome Project). However, the available analysis methods for solving the inverse problem for MEG and EEG have not been compared and standardized to the extent that they have for fMRI/PET. A number of factors, including the non-uniqueness of the solution to the inverse problem for MEG/EEG, have led to multiple analysis techniques which have not been tested on consistent datasets, making direct comparisons of techniques challenging (or impossible). Since each of the methods is known to have their own set of strengths and weaknesses, it would be beneficial to quantify them. Toward this end, we are announcing the establishment of a website containing an extensive series of realistic simulated data for testing purposes ( http://cobre.mrn.org/megsim/ ). Here, we present: 1) a brief overview of the basic types of inverse procedures; 2) the rationale and description of the testbed created; and 3) cases emphasizing functional connectivity (e.g., oscillatory activity) suitable for a wide assortment of analyses including independent component analysis (ICA), Granger Causality/Directed transfer function, and single-trial analysis.",2012-04-01 +23003214,Bulk superconductivity in bismuth oxysulfide Bi4O4S3.,"A very recent report on the observation of superconductivity in Bi(4)O(4)S(3) [Mizuguchi, Y.; http://arxiv.org/abs/1207.3145] could potentially reignite the search for superconductivity in a broad range of layered sulfides. We report here the synthesis of Bi(4)O(4)S(3) at 500 °C by a vacuum encapsulation technique and its basic characterizations. The as-synthesized Bi(4)O(4)S(3) was contaminated with small amounts of Bi(2)S(3) and Bi impurities. The majority phase was found to be tetragonal (space group I4/mmm) with lattice parameters a = 3.9697(2) Å and c = 41.3520(1) Å. Both AC and DC magnetization measurements confirmed that Bi(4)O(4)S(3) is a bulk superconductor with a superconducting transition temperature (T(c)) of 4.4 K. Isothermal magnetization (M-H) measurements indicated closed loops with clear signatures of flux pinning and irreversible behavior. The lower critical field (H(c1)) at 2 K for the new superconductor was found to be ~15 Oe. Magnetotransport measurements showed a broadening of the resistivity (ρ) and a decrease in T(c) (ρ = 0) with increasing magnetic field. The extrapolated upper critical field H(c2)(0) was ~31 kOe with a corresponding Ginzburg-Landau coherence length of ~100 Å . In the normal state, the ρ ~ T(2) dependence was not indicated. Hall resistivity data showed a nonlinear magnetic field dependence. Our magnetization and electrical transport measurements substantiate the appearance of bulk superconductivity in as-synthesized Bi(4)O(4)S(3). On the other hand, Bi heat-treated at the same temperature is not superconducting, thus excluding the possibility of impurity-driven superconductivity in the newly discovered superconductor Bi(4)O(4)S(3).",2012-09-27 +21992572,Characteristics of Finnish and Swedish intensive care nursing narratives: a comparative analysis to support the development of clinical language technologies.,"

Background

Free text is helpful for entering information into electronic health records, but reusing it is a challenge. The need for language technology for processing Finnish and Swedish healthcare text is therefore evident; however, Finnish and Swedish are linguistically very dissimilar. In this paper we present a comparison of characteristics in Finnish and Swedish free-text nursing narratives from intensive care. This creates a framework for characterising and comparing clinical text and lays the groundwork for developing clinical language technologies.

Methods

Our material included daily nursing narratives from one intensive care unit in Finland and one in Sweden. Inclusion criteria for patients were an inpatient period of least five days and an age of at least 16 years. We performed a comparative analysis as part of a collaborative effort between Finnish- and Swedish-speaking healthcare and language technology professionals that included both qualitative and quantitative aspects. The qualitative analysis addressed the content and structure of three average-sized health records from each country. In the quantitative analysis 514 Finnish and 379 Swedish health records were studied using various language technology tools.

Results

Although the two languages are not closely related, nursing narratives in Finland and Sweden had many properties in common. Both made use of specialised jargon and their content was very similar. However, many of these characteristics were challenging regarding development of language technology to support producing and using clinical documentation.

Conclusions

The way Finnish and Swedish intensive care nursing was documented, was not country or language dependent, but shared a common context, principles and structural features and even similar vocabulary elements. Technology solutions are therefore likely to be applicable to a wider range of natural languages, but they need linguistic tailoring.

Availability

The Finnish and Swedish data can be found at: http://www.dsv.su.se/hexanord/data/.",2011-07-14 +22679863,An insight into the sialotranscriptome of Triatoma rubida (Hemiptera: Heteroptera).,"The kissing bug Triatoma rubida (Uhler, 1894) is found in southwestern United States and parts of Mexico where it is found infected with Trypanosoma cruzi, invades human dwellings and causes allergies from their bites. Although the protein salivary composition of several triatomine species is known, not a single salivary protein sequence is known from T. rubida. Furthermore, the salivary diversity of related hematophagous arthropods is very large probably because of the immune pressure from their hosts. Here we report the sialotranscriptome analysis of T. rubida based on the assembly of 1,820 high-quality expressed sequence tags, 51% of which code for putative secreted peptides, including lipocalins, members of the antigen five family, apyrase, hemolysin, and trialysin families. Interestingly, T. rubida lipocalins are at best 40% identical in primary sequence to those of T. protracta, a kissing bug that overlaps its range with T. rubida, indicating the diversity of the salivary lipocalins among species of the same hematophagous genus. We additionally found several expressed sequence tags coding for proteins of clear Trypanosoma spp. origin. This work contributes to the future development of markers of human and pet exposure to T. rubida and to the possible development of desensitization therapies. Supp. Data 1 and 2 (online only) of the transcriptome and deducted protein sequences can be obtained from http://exon.niaid.nih.gov/transcriptome/Trubida/Triru-S1-web.xlsx and http://exon.niaid.nih.gov/transcriptome/Trubida/Triru-S2-web.xlsx.",2012-05-01 +22210866,influx_s: increasing numerical stability and precision for metabolic flux analysis in isotope labelling experiments.,"

Motivation

The problem of stationary metabolic flux analysis based on isotope labelling experiments first appeared in the early 1950s and was basically solved in early 2000s. Several algorithms and software packages are available for this problem. However, the generic stochastic algorithms (simulated annealing or evolution algorithms) currently used in these software require a lot of time to achieve acceptable precision. For deterministic algorithms, a common drawback is the lack of convergence stability for ill-conditioned systems or when started from a random point.

Results

In this article, we present a new deterministic algorithm with significantly increased numerical stability and accuracy of flux estimation compared with commonly used algorithms. It requires relatively short CPU time (from several seconds to several minutes with a standard PC architecture) to estimate fluxes in the central carbon metabolism network of Escherichia coli.

Availability

The software package influx_s implementing this algorithm is distributed under an OpenSource licence at http://metasys.insa-toulouse.fr/software/influx/.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-12-30 +22723110,Identifying protein kinase target preferences using mass spectrometry.,"A general question in molecular physiology is how to identify candidate protein kinases corresponding to a known or hypothetical phosphorylation site in a protein of interest. It is generally recognized that the amino acid sequence surrounding the phosphorylation site provides information that is relevant to identification of the cognate protein kinase. Here, we present a mass spectrometry-based method for profiling the target specificity of a given protein kinase as well as a computational tool for the calculation and visualization of the target preferences. The mass spectrometry-based method identifies sites phosphorylated in response to in vitro incubation of protein mixtures with active recombinant protein kinases followed by standard phosphoproteomic methodologies. The computational tool, called ""PhosphoLogo,"" uses an information-theoretic algorithm to calculate position-specific amino acid preferences and anti-preferences from the mass-spectrometry data (http://helixweb.nih.gov/PhosphoLogo/). The method was tested using protein kinase A (catalytic subunit α), revealing the well-known preference for basic amino acids in positions -2 and -3 relative to the phosphorylated amino acid. It also provides evidence for a preference for amino acids with a branched aliphatic side chain in position +1, a finding compatible with known crystal structures of protein kinase A. The method was also employed to profile target preferences and anti-preferences for 15 additional protein kinases with potential roles in regulation of epithelial transport: CK2, p38, AKT1, SGK1, PKCδ, CaMK2δ, DAPK1, MAPKAPK2, PKD3, PIM1, OSR1, STK39/SPAK, GSK3β, Wnk1, and Wnk4.",2012-06-20 +22528414,Simulation of the effects of complex- formation equilibria in electrophoresis: I. mathematical model.,"Simul 5 Complex is a one-dimensional dynamic simulation software designed for electrophoresis, and it is based on a numerical solution of the governing equations, which include electromigration, diffusion and acid-base equilibria. A new mathematical model has been derived and implemented that extends the simulation capabilities of the program by complexation equilibria. The simulation can be set up with any number of constituents (analytes), which are complexed by one complex-forming agent (ligand). The complexation stoichiometry is 1:1, which is typical for systems containing cyclodextrins as the ligand. Both the analytes and the ligand can have multiple dissociation states. Simul 5 Complex with the complexation mode runs under Windows and can be freely downloaded from our web page http://natur.cuni.cz/gas. The article has two separate parts. Here, the mathematical model is derived and tested by simulating the published results obtained by several methods used for the determination of complexation equilibrium constants: affinity capillary electrophoresis, vacancy affinity capillary electrophoresis, Hummel-Dreyer method, vacancy peak method, frontal analysis, and frontal analysis continuous capillary electrophoresis. In the second part of the paper, the agreement of the simulated and the experimental data is shown and discussed.",2012-03-01 +23327593,"An analysis of cyclin D1, cytokeratin 5/6 and cytokeratin 8/18 expression in breast papillomas and papillary carcinomas.","

Background

To evaluate the expression levels of cyclin D1 in breast papillomas and papillary carcinomas, and to analyze the types of cells that co-express cyclin D1 with cytokeratin 5/6 (CK 5/6) or with cytokeratin 8/18(CK 8/18).

Methods

Fifty-nine cases of papillary lesions including 36 papillomas and 23 intracystic papillary carcinomas were examined. Cyclin D1, CK 5/6 and CK 8/18 expression levels were evaluated by double immunostaining.

Results

Cyclin D1 is highly expressed in papillary carcinomas (27.54% ± 15.43%) compared with papillomas (8.81% ± 8.41%, p < 0.01). Cyclin D1 is predominantly expressed in cytokeratin 8/18- expressing cells, rather than in cytokeratin 5/6-expressing cells, regardless of the type of lesion. In papillomas, cyclin D1 exhibited a mean 11.42% (11.42% ± 10.17%) co-expression rate with cytokeratin 8/18 compared with a mean 2.50% (2.50% ± 3.24%) co-expression rate with cytokeratin 5/6 (p < 0.01). In papillary carcinomas, cyclin D1 exhibited a mean 34.74% (34.74% ± 16.32%) co-expression rate with cytokeratin 8/18 compared with a co-expression rate of 0.70% (0.70% ± 0.93%) with cytokeratin 5/6 (p < 0.01).

Conclusions

The increase in cyclin D1 suggests an association of cyclin D1 staining with papillary carcinomas. Although cyclin D1 is an effective marker for the differential diagnosis of other papillary lesions, it cannot be used to distinguish between papilloma and papillary carcinoma lesions because its expression occurs in both lesions. Our results show that cyclin D1 and CK 5/6 staining could be used in concert to distinguish between the diagnosis of papilloma (cyclin D1 < 4.20%, CK 5/6 positive) or papillary carcinoma (cyclin D1 > 37.00%, CK 5/6 negative). In addition, our data suggest that cyclin D1 is expressed only in the cancer stem or progenitor cells that co-immunostained with CK 8/18 in papillary carcinomas, and predominantly with CK 8/18 in the papillomas.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/7299340558756848.",2013-01-18 +21745130,Long-term evaluation of postmastectomy breast reconstruction.,"

Background

Reconstructing a breast mound constitutes the basis of breast reconstruction. The breast can be reconstructed using autologous tissue, implants or a combination thereof. The number of women wishing a breast reconstruction has increased, but evaluation of the results is lacking. The current study examined the long-term results from three methods of breast reconstruction to assess the subjective and the objective outcome.

Patients and methods

Patients undergoing first-time post mastectomy reconstruction, selected from the cohort of Danish women in the Central and North Region of Denmark, were evaluated. We included 363 women, reconstructed in 1990-2005. Data was collected from patient charts, a study specific questionnaire (to be found online at http://www.informahealthcare.com/doi/abs/10.3109/0284186X.2011.584554 ) and a clinical follow-up visit. The questionnaire included questions regarding demographic background and evaluation of the reconstructed breast and donor site. The clinical follow-up visit included an examination of the overall result and donor site.

Results

The questionnaire was answered by 263 women, of whom 137 had an implant, 26 had a latissimus dorsi musculocutaneus flap and 100 had a pedicled transverse rectus abdominis musculocutaneus flap. Women reconstructed with autologous tissue were significantly more pleased with the result of the breast reconstruction than women reconstructed with an implant. After a median of seven years, neither the patient's age nor the length of time since the reconstruction significantly affected the patients' opinion of the overall result. There was no difference in the incidences of minor complications among the different reconstructive methods. BMI, smoking and radiation therapy influenced the risk of complications. Objective evaluation of the 180 women participating in the follow-up visit was in agreement with data from the questionnaire.

Conclusion

The type of reconstruction had a significant long-term influence on patient satisfaction and the objective result. Women reconstructed with autologous tissue were significantly more pleased, and the objective outcome was assessed as superior.",2011-07-11 +21478194,Prediction of metabolic reactions based on atomic and molecular properties of small-molecule compounds.,"

Motivation

Our knowledge of the metabolites in cells and their reactions is far from complete as revealed by metabolomic measurements that detect many more small molecules than are documented in metabolic databases. Here, we develop an approach for predicting the reactivity of small-molecule metabolites in enzyme-catalyzed reactions that combines expert knowledge, computational chemistry and machine learning.

Results

We classified 4843 reactions documented in the KEGG database, from all six Enzyme Commission classes (EC 1-6), into 80 reaction classes, each of which is marked by a characteristic functional group transformation. Reaction centers and surrounding local structures in substrates and products of these reactions were represented using SMARTS. We found that each of the SMARTS-defined chemical substructures is widely distributed among metabolites, but only a fraction of the functional groups in these substructures are reactive. Using atomic properties of atoms in a putative reaction center and molecular properties as features, we trained support vector machine (SVM) classifiers to discriminate between functional groups that are reactive and non-reactive. Classifier accuracy was assessed by cross-validation analysis. A typical sensitivity [TP/(TP+FN)] or specificity [TN/(TN+FP)] is ≈0.8. Our results suggest that metabolic reactivity of small-molecule compounds can be predicted with reasonable accuracy based on the presence of a potentially reactive functional group and the chemical features of its local environment.

Availability

The classifiers presented here can be used to predict reactions via a web site (http://cellsignaling.lanl.gov/Reactivity/). The web site is freely available.",2011-04-08 +21961827,Evaluating complex interventions and health technologies using normalization process theory: development of a simplified approach and web-enabled toolkit.,"

Background

Normalization Process Theory (NPT) can be used to explain implementation processes in health care relating to new technologies and complex interventions. This paper describes the processes by which we developed a simplified version of NPT for use by clinicians, managers, and policy makers, and which could be embedded in a web-enabled toolkit and on-line users manual.

Methods

Between 2006 and 2010 we undertook four tasks. (i) We presented NPT to potential and actual users in multiple workshops, seminars, and presentations. (ii) Using what we discovered from these meetings, we decided to create a simplified set of statements and explanations expressing core constructs of the theory (iii) We circulated these statements to a criterion sample of 60 researchers, clinicians and others, using SurveyMonkey to collect qualitative textual data about their criticisms of the statements. (iv) We then reconstructed the statements and explanations to meet users' criticisms, embedded them in a web-enabled toolkit, and beta tested this 'in the wild'.

Results

On-line data collection was effective: over a four week period 50/60 participants responded using SurveyMonkey (40/60) or direct phone and email contact (10/60). An additional nine responses were received from people who had been sent the SurveyMonkey form by other respondents. Beta testing of the web enabled toolkit produced 13 responses, from 327 visits to http://www.normalizationprocess.org. Qualitative analysis of both sets of responses showed a high level of support for the statements but also showed that some statements poorly expressed their underlying constructs or overlapped with others. These were rewritten to take account of users' criticisms and then embedded in a web-enabled toolkit. As a result we were able translate the core constructs into a simplified set of statements that could be utilized by non-experts.

Conclusion

Normalization Process Theory has been developed through transparent procedures at each stage of its life. The theory has been shown to be sufficiently robust to merit formal testing. This project has provided a user friendly version of NPT that can be embedded in a web-enabled toolkit and used as a heuristic device to think through implementation and integration problems.",2011-09-30 +22022543,Graph constrained discriminant analysis: a new method for the integration of a graph into a classification process.,"Integrating gene regulatory networks (GRNs) into the classification process of DNA microarrays is an important issue in bioinformatics, both because this information has a true biological interest and because it helps in the interpretation of the final classifier. We present a method called graph-constrained discriminant analysis (gCDA), which aims to integrate the information contained in one or several GRNs into a classification procedure. We show that when the integrated graph includes erroneous information, gCDA's performance is only slightly worse, thus showing robustness to misspecifications in the given GRNs. The gCDA framework also allows the classification process to take into account as many a priori graphs as there are classes in the dataset. The gCDA procedure was applied to simulated data and to three publicly available microarray datasets. gCDA shows very interesting performance when compared to state-of-the-art classification methods. The software package gcda, along with the real datasets that were used in this study, are available online: http://biodev.cea.fr/gcda/.",2011-10-14 +22994653,High abundance of Serine/Threonine-rich regions predicted to be hyper-O-glycosylated in the secretory proteins coded by eight fungal genomes.,"

Background

O-glycosylation of secretory proteins has been found to be an important factor in fungal biology and virulence. It consists in the addition of short glycosidic chains to Ser or Thr residues in the protein backbone via O-glycosidic bonds. Secretory proteins in fungi frequently display Ser/Thr rich regions that could be sites of extensive O-glycosylation. We have analyzed in silico the complete sets of putatively secretory proteins coded by eight fungal genomes (Botrytis cinerea, Magnaporthe grisea, Sclerotinia sclerotiorum, Ustilago maydis, Aspergillus nidulans, Neurospora crassa, Trichoderma reesei, and Saccharomyces cerevisiae) in search of Ser/Thr-rich regions as well as regions predicted to be highly O-glycosylated by NetOGlyc (http://www.cbs.dtu.dk).

Results

By comparison with experimental data, NetOGlyc was found to overestimate the number of O-glycosylation sites in fungi by a factor of 1.5, but to be quite reliable in the prediction of highly O-glycosylated regions. About half of secretory proteins have at least one Ser/Thr-rich region, with a Ser/Thr content of at least 40% over an average length of 40 amino acids. Most secretory proteins in filamentous fungi were predicted to be O-glycosylated, sometimes in dozens or even hundreds of sites. Residues predicted to be O-glycosylated have a tendency to be grouped together forming hyper-O-glycosylated regions of varying length.

Conclusions

About one fourth of secretory fungal proteins were predicted to have at least one hyper-O-glycosylated region, which consists of 45 amino acids on average and displays at least one O-glycosylated Ser or Thr every four residues. These putative highly O-glycosylated regions can be found anywhere along the proteins but have a slight tendency to be at either one of the two ends.",2012-09-20 +21257608,Graphics processing unit implementations of relative expression analysis algorithms enable dramatic computational speedup.,"

Summary

The top-scoring pair (TSP) and top-scoring triplet (TST) algorithms are powerful methods for classification from expression data, but analysis of all combinations across thousands of human transcriptome samples is computationally intensive, and has not yet been achieved for TST. Implementation of these algorithms for the graphics processing unit results in dramatic speedup of two orders of magnitude, greatly increasing the searchable combinations and accelerating the pace of discovery.

Availability

http://www.igb.illinois.edu/labs/price/downloads/.",2011-01-20 +22996744,Magnetic particle imaging: visualization of instruments for cardiovascular intervention.,"

Purpose

To evaluate the feasibility of different approaches of instrument visualization for cardiovascular interventions guided by using magnetic particle imaging (MPI).

Materials and methods

Two balloon (percutaneous transluminal angioplasty) catheters were used. The balloon was filled either with diluted superparamagnetic iron oxide (SPIO) ferucarbotran (25 mmol of iron per liter) or with sodium chloride. Both catheters were inserted into a vessel phantom that was filled oppositional to the balloon content with sodium chloride or diluted SPIO (25 mmol of iron per liter). In addition, the administration of a 1.4-mL bolus of pure SPIO (500 mmol of iron per liter) followed by 5 mL of sodium chloride through a SPIO-labeled balloon catheter into the sodium chloride-filled vessel phantom was recorded. Images were recorded by using a preclinical MPI demonstrator. All images were acquired by using a field of view of 3.6 × 3.6 × 2.0 cm.

Results

By using MPI, both balloon catheters could be visualized with high temporal (21.54 msec per image) and sufficient spatial (≤ 3 mm) resolution without any motion artifacts. The movement through the field of view, the inflation and deflation of the balloon, and the application of the SPIO bolus were visualized at a rate of 46 three-dimensional data sets per second.

Conclusion

Visualization of SPIO-labeled instruments for cardiovascular intervention at high temporal resolution as well as monitoring the application of a SPIO-based tracer by using labeled instruments is feasible. Further work is necessary to evaluate different labeling approaches for diagnostic catheters and guidewires and to demonstrate their navigation in the vascular system after administration of contrast material.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120424/-/DC1.",2012-09-20 +21998157,BDTcomparator: a program for comparing binary classifiers.,"

Summary

The BDTcomparator facilitates the selection of the best performing binary classification model or binary diagnostic procedure from the many possible alternatives by comparing their predictions with a known output, measured with the use of a system recognized as the gold standard. The program calculates the estimates of accuracy, sensitivity, specificity, predictive values and diagnostic likelihood ratios along with appropriate confidence intervals. Furthermore, all pairwise comparisons with respect to the above-mentioned measures are calculated. The formatted results can be exported to a text-file.

Availability and implementation

BDTcomparator is distributed under the GNU GPLv3 license and is freely available for download from http://www.tox-portal.net. We provide programs for both Linux and Windows operating systems. The source code of the program is provided in our companion website http://code.google.com/p/bdtcomparator/.

Contact

kamil.fijorek@uek.krakow.pl

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-13 +21085053,Database of genetic studies of bipolar disorder.,"This study describes the construction and preliminary analysis of a database of summary level genetic findings for bipolar disorder from the literature. The database is available for noncommercial use at http://bioprogramming.bsd.uchicago.edu/BDStudies/. This may be the first complete collection of published gene-specific linkage and association findings on bipolar disorder, including genome-wide association studies. Both the positive and negative findings have been incorporated so that the statistical and contextual significance of each finding may be compared semi-quantitatively and qualitatively across studies of mixed technologies. The database is appropriate for searching a literature populated by mainly underpowered studies, and if 'hits' are viewed as tentative knowledge for future hypothesis generation. It can serve as the basis for a mega-analysis of candidate genes. Herein, we discuss the most robust and best replicated gene findings to date in a contextual manner.",2011-04-01 +21641561,Computational analysis of drought stress-associated miRNAs and miRNA co-regulation network in Physcomitrella patens.,"miRNAs are non-coding small RNAs that involve diverse biological processes. Until now, little is known about their roles in plant drought resistance. Physcomitrella patens is highly tolerant to drought; however, it is not clear about the basic biology of the traits that contribute P. patens this important character. In this work, we discovered 16 drought stress-associated miRNA (DsAmR) families in P. patens through computational analysis. Due to the possible discrepancy of expression periods and tissue distributions between potential DsAmRs and their targeting genes, and the existence of false positive results in computational identification, the prediction results should be examined with further experimental validation. We also constructed an miRNA co-regulation network, and identified two network hubs, miR902a-5p and miR414, which may play important roles in regulating drought-resistance traits. We distributed our results through an online database named ppt-miRBase, which can be accessed at http://bioinfor.cnu.edu.cn/ppt_miRBase/index.php. Our methods in finding DsAmR and miRNA co-regulation network showed a new direction for identifying miRNA functions.",2011-04-01 +23217202,Protein Nano-Object Integrator (ProNOI) for generating atomic style objects for molecular modeling.,"

Background

With the progress of nanotechnology, one frequently has to model biological macromolecules simultaneously with nano-objects. However, the atomic structures of the nano objects are typically not available or they are solid state entities. Because of that, the researchers have to investigate such nano systems by generating models of the nano objects in a manner that the existing software be able to carry the simulations. In addition, it should allow generating composite objects with complex shape by combining basic geometrical figures and embedding biological macromolecules within the system.

Results

Here we report the Protein Nano-Object Integrator (ProNOI) which allows for generating atomic-style geometrical objects with user desired shape and dimensions. Unlimited number of objects can be created and combined with biological macromolecules in Protein Data Bank (PDB) format file. Once the objects are generated, the users can use sliders to manipulate their shape, dimension and absolute position. In addition, the software offers the option to charge the objects with either specified surface or volumetric charge density and to model them with user-desired dielectric constants. According to the user preference, the biological macromolecule atoms can be assigned charges and radii according to four different force fields: Amber, Charmm, OPLS and PARSE. The biological macromolecules and the atomic-style objects are exported as a position, charge and radius (PQR) file, or if a default dielectric constant distribution is not selected, it is exported as a position, charge, radius and epsilon (PQRE) file. As illustration of the capabilities of the ProNOI, we created a composite object in a shape of a robot, aptly named the Clemson Robot, whose parts are charged with various volumetric charge densities and holds the barnase-barstar protein complex in its hand.

Conclusions

The Protein Nano-Object Integrator (ProNOI) is a convenient tool for generating atomic-style nano shapes in conjunction with biological macromolecule(s). Charges and radii on the macromolecule atoms and the atoms in the shapes are assigned according to the user's preferences allowing various scenarios of modeling. The default output file is in PQR (PQRE) format which is readable by almost any software available in biophysical field. It can be downloaded from: http://compbio.clemson.edu/downloadDir/ProNO_integrator.tar.gz.",2012-12-05 +21516242,Linking NCBI to Wikipedia: a wiki-based approach.,"The NCBI Taxonomy underpins many bioinformatics and phyloinformatics databases, but by itself provides limited information on the taxa it contains. One readily available source of information on many taxa is Wikipedia. This paper describes iPhylo Linkout, a Semantic wiki that maps taxa in NCBI's taxonomy database onto corresponding pages in Wikipedia. Storing the mapping in a wiki makes it easy to edit, correct, or otherwise annotate the links between NCBI and Wikipedia. The mapping currently comprises some 53,000 taxa, and is available at http://iphylo.org/linkout. The links between NCBI and Wikipedia are also made available to NCBI users through the NCBI LinkOut service.",2011-03-31 +21590677,Evaluation of the positional difference between two common geocoding methods.,"Geocoding, the process of matching addresses to geographic coordinates, is a necessary first step when using geographical information systems (GIS) technology. However, different geocoding methodologies can result in different geographic coordinates. The objective of this study was to compare the positional (i.e. longitude/latitude) difference between two common geocoding methods, i.e. ArcGIS (Environmental System Research Institute, Redlands, CA, USA) and Batchgeo (freely available online at http://www.batchgeo.com). Address data came from the YMCA-Harvard After School Food and Fitness Project, an obesity prevention intervention involving children aged 5-11 years and their families participating in YMCA-administered, after-school programmes located in four geographically diverse metropolitan areas in the USA. Our analyses include baseline addresses (n = 748) collected from the parents of the children in the after school sites. Addresses were first geocoded to the street level and assigned longitude and latitude coordinates with ArcGIS, version 9.3, then the same addresses were geocoded with Batchgeo. For this analysis, the ArcGIS minimum match score was 80. The resulting geocodes were projected into state plane coordinates, and the difference in longitude and latitude coordinates were calculated in meters between the two methods for all data points in each of the four metropolitan areas. We also quantified the descriptions of the geocoding accuracy provided by Batchgeo with the match scores from ArcGIS. We found a 94% match rate (n = 705), 2% (n = 18) were tied and 3% (n = 25) were unmatched using ArcGIS. Forty-eight addresses (6.4%) were not matched in ArcGIS with a match score ≥80 (therefore only 700 addresses were included in our positional difference analysis). Six hundred thirteen (87.6%) of these addresses had a match score of 100. Batchgeo yielded a 100% match rate for the addresses that ArcGIS geocoded. The median for longitude and latitude coordinates for all the data was just over 25 m. Overall, the range for longitude was 0.04-12,911.8 m, and the range for latitude was 0.02-37,766.6 m. Comparisons show minimal differences in the median and minimum values, while there were slightly larger differences in the maximum values. The majority (>75%) of the geographic differences were within 50 m of each other; mostly <25 m from each other (about 49%). Only about 4% overall were ≥400 m apart. We also found geographic differences in the proportion of addresses that fell within certain meter ranges. The match-score range associated with the Batchgeo accuracy level ""approximate"" (least accurate) was 84-100 (mean = 92), while the ""rooftop"" Batchgeo accuracy level (most accurate) delivered a mean of 98.9 but the range was the same. Although future research should compare the positional difference of Batchgeo to criterion measures of longitude/latitude (e.g. with global positioning system measurement), this study suggests that Batchgeo is a good, free-of-charge option to geocode addresses.",2011-05-01 +22855211,Measures of phylogenetic differentiation provide robust and complementary insights into microbial communities.,"High-throughput sequencing techniques have made large-scale spatial and temporal surveys of microbial communities routine. Gaining insight into microbial diversity requires methods for effectively analyzing and visualizing these extensive data sets. Phylogenetic β-diversity measures address this challenge by allowing the relationship between large numbers of environmental samples to be explored using standard multivariate analysis techniques. Despite the success and widespread use of phylogenetic β-diversity measures, an extensive comparative analysis of these measures has not been performed. Here, we compare 39 measures of phylogenetic β diversity in order to establish the relative similarity of these measures along with key properties and performance characteristics. While many measures are highly correlated, those commonly used within microbial ecology were found to be distinct from those popular within classical ecology, and from the recently recommended Gower and Canberra measures. Many of the measures are surprisingly robust to different rootings of the gene tree, the choice of similarity threshold used to define operational taxonomic units, and the presence of outlying basal lineages. Measures differ considerably in their sensitivity to rare organisms, and the effectiveness of measures can vary substantially under alternative models of differentiation. Consequently, the depth of sequencing required to reveal underlying patterns of relationships between environmental samples depends on the selected measure. Our results demonstrate that using complementary measures of phylogenetic β diversity can further our understanding of how communities are phylogenetically differentiated. Open-source software implementing the phylogenetic β-diversity measures evaluated in this manuscript is available at http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity.",2012-08-02 +21448736,[Estimation of substitution volume after burn trauma. Systematic review of published formulae].,"

Background

Fluid resuscitation after severe burns remains a challenging task particularly in the preclinical and early clinical phases. To facilitate volume substitution after burn trauma several formulae have been published and evaluated, nevertheless, the optimal formula has not yet been identified.

Methods

A systematic PubMed search was performed to identify published formulae for fluid resuscitation after severe burns. The search terms ""burn"", ""thermal"", ""treatment"", ""therapy"" or ""resuscitation"", ""fluid"", ""formula"" and ""adult"", ""pediatric"" or ""paediatric"" were used in various combinations. Analysis was limited to the period from 01.01.1950 to 30.06.2010 and database entries in PubMed (http://www.pubmed.com). Additionally, references cited in the papers were analyzed and relevant publications were also included. Publications and formulae were assessed and classified by two independent investigators.

Results

Within the specified time frame eight publications (five original contributions and three book chapters) were identified of which three formulae recommended colloid solutions, four recommended electrolyte solutions and one suggested hypertonic solutions within the first 24 h for fluid resuscitation. Only one formula specifically dealt with fluid resuscitation in infants.

Conclusion

The identified formulae led to sometimes strikingly diverse calculations of resuscitation fluid volumes. Therefore their use should be monitored closely and clinical values included. Urine output is a well established individual parameter. Use of colloid and hypertonic solutions leads to a reduced total fluid volume but is still controversially discussed.",2011-03-31 +21327471,Improved web-based calculators for predicting breast carcinoma outcomes.,"We describe a set of web-based calculators, available at http://www.CancerMath.net , which estimate the risk of breast carcinoma death, the reduction in life expectancy, and the impact of various adjuvant treatment choices. The published SNAP method of the binary biological model of cancer metastasis uses information on tumor size, nodal status, and other prognostic factors to accurately estimate of breast cancer lethality at 15 years after diagnosis. By combining these 15-year lethality estimates with data on the breast cancer hazard function, breast cancer lethality can be estimated at each of the 15 years after diagnosis. A web-based calculator was then created to visualize the estimated lethality with and without a range of adjuvant therapy options at any of the 15 years after diagnosis, and enable conditional survival calculations. NIH population data was used to estimate non-breast-cancer chance of death. The accuracy of the calculators was tested against two large breast carcinoma datasets: 7,907 patients seen at two academic hospitals and 362,491 patients from the SEER national dataset. The calculators were found to be highly accurate and specific, as seen by their capacity for stratifying patients into groups differing by as little as a 2% risk of death, and accurately accounting for nodal status, histology, grade, age, and hormone receptor status. Our breast carcinoma calculators provide accurate and useful estimates of the risk of death, which can aid in analysis of the various adjuvant therapy options available to each patient.",2011-02-15 +21450960,Web-accessible database of hsp65 sequences from Mycobacterium reference strains.,"Mycobacteria include a large number of pathogens. Identification to species level is important for diagnoses and treatments. Here, we report the development of a Web-accessible database of the hsp65 locus sequences (http://msis.mycobacteria.info) from 149 out of 150 Mycobacterium species/subspecies. This database can serve as a reference for identifying Mycobacterium species.",2011-03-30 +34875773,SequenceMatrix: concatenation software for the fast assembly of multi-gene datasets with character set and codon information.,"We present SequenceMatrix, software that is designed to facilitate the assembly and analysis of multi-gene datasets. Genes are concatenated by dragging and dropping FASTA, NEXUS, or TNT files with aligned sequences into the program window. A multi-gene dataset is concatenated and displayed in a spreadsheet; each sequence is represented by a cell that provides information on sequence length, number of indels, the number of ambiguous bases (""Ns""), and the availability of codon information. Alternatively, GenBank numbers for the sequences can be displayed and exported. Matrices with hundreds of genes and taxa can be concatenated within minutes and exported in TNT, NEXUS, or PHYLIP formats, preserving both character set and codon information for TNT and NEXUS files. SequenceMatrix also creates taxon sets listing taxa with a minimum number of characters or gene fragments, which helps assess preliminary datasets. Entire taxa, whole gene fragments, or individual sequences for a particular gene and species can be excluded from export. Data matrices can be re-split into their component genes and the gene fragments can be exported as individual gene files. SequenceMatrix also includes two tools that help to identify sequences that may have been compromised through laboratory contamination or data management error. One tool lists identical or near-identical sequences within genes, while the other compares the pairwise distance pattern of one gene against the pattern for all remaining genes combined. SequenceMatrix is Java-based and compatible with the Microsoft Windows, Apple MacOS X and Linux operating systems. The software is freely available from http://code.google.com/p/sequencematrix/. © The Willi Hennig Society 2010.",2011-04-01 +22261220,A review of thresholding strategies applied to human chromosome segmentation.,"Karyotype analysis is a widespread procedure in cytogenetics to assess the presence of genetic defects by the visualization of the structure of chromosomes. The procedure is lengthy and repetitive and an effective automatic analysis would greatly help the cytogeneticist routine work. Still, automatic segmentation and the full disentangling of chromosomes are open issues. The first step in every automatic procedure is the thresholding step, which detect blobs that represent either single chromosomes or clusters of chromosomes. The better the thresholding step, the easier is the subsequent disentanglement of chromosome clusters into single entities. We implemented eleven thresholding methods, i.e. the ones that appear in the literature as the best performers, and compared their performance in segmenting chromosomes and chromosome clusters in cytogenetic Q-band images. The images are affected by the presence of hyper- or hypo-fluorescent regions and by a contrast variability between the stained chromosomes and the background. A thorough analysis of the results highlights that, although every single algorithm shows peculiar strong/weak points, Adaptive Threshold and Region Based Level Set have the overall best performance. In order to provide the scientific community with a public dataset, the data and manual segmentation used in this paper are available for public download at http://bioimlab.dei.unipd.it.",2012-01-18 +21685090,A method for probing the mutational landscape of amyloid structure.,"

Motivation

Proteins of all kinds can self-assemble into highly ordered β-sheet aggregates known as amyloid fibrils, important both biologically and clinically. However, the specific molecular structure of a fibril can vary dramatically depending on sequence and environmental conditions, and mutations can drastically alter amyloid function and pathogenicity. Experimental structure determination has proven extremely difficult with only a handful of NMR-based models proposed, suggesting a need for computational methods.

Results

We present AmyloidMutants, a statistical mechanics approach for de novo prediction and analysis of wild-type and mutant amyloid structures. Based on the premise of protein mutational landscapes, AmyloidMutants energetically quantifies the effects of sequence mutation on fibril conformation and stability. Tested on non-mutant, full-length amyloid structures with known chemical shift data, AmyloidMutants offers roughly 2-fold improvement in prediction accuracy over existing tools. Moreover, AmyloidMutants is the only method to predict complete super-secondary structures, enabling accurate discrimination of topologically dissimilar amyloid conformations that correspond to the same sequence locations. Applied to mutant prediction, AmyloidMutants identifies a global conformational switch between Aβ and its highly-toxic 'Iowa' mutant in agreement with a recent experimental model based on partial chemical shift data. Predictions on mutant, yeast-toxic strains of HET-s suggest similar alternate folds. When applied to HET-s and a HET-s mutant with core asparagines replaced by glutamines (both highly amyloidogenic chemically similar residues abundant in many amyloids), AmyloidMutants surprisingly predicts a greatly reduced capacity of the glutamine mutant to form amyloid. We confirm this finding by conducting mutagenesis experiments.

Availability

Our tool is publically available on the web at http://amyloid.csail.mit.edu/.

Contact

lindquist_admin@wi.mit.edu; bab@csail.mit.edu.",2011-07-01 +22861649,A re-evaluation of 9-HODE activity at TRPV1 channels in comparison with anandamide: enantioselectivity and effects at other TRP channels and in sensory neurons.,"

Background and purpose

Two oxidation products of linoleic acid, 9- and 13-hydroxy-octadecadienoic acids (HODEs), have recently been suggested to act as endovanilloids, that is, endogenous agonists of transient receptor potential vanilloid-1 (TRPV1) channels, thereby contributing to inflammatory hyperalgesia in rats. However, HODE activity at rat TRPV1 in comparison with the best established endovanilloid, anandamide, and its enantioselectivity and selectivity towards other TRP channels that are also abundant in sensory neurons have never been investigated.

Experimental approach

We studied the effect of 9(R)-HODE, 9(S)-HODE, (+/-)13-HODE, 15(S)-hydroxyanandamide and anandamide on [Ca(2+) ](i) in HEK-293 cells stably expressing the rat or human recombinant TRPV1, or rat recombinant TRPV2, TRPA1 or TRPM8, and also the effect of 9(S)-HODE in rat dorsal root ganglion (DRG) neurons by calcium imaging.

Key results

Anandamide and 15(S)-hydroxyanandamide were the most potent endovanilloids at human TRPV1, whereas 9(S)-HODE was approximately threefold less efficacious and 75- and 3-fold less potent, respectively, and did not perform much better at rat TRPV1. The 9(R)-HODE and (+/-)13-HODE were almost inactive at TRPV1. Unlike anandamide and 15(S)-hydroxyanandamide, all HODEs were very weak at desensitizing TRPV1 to the action of capsaicin, but activated rat TRPV2 [only (+/-)13-HODE] and rat TRPA1, and antagonized rat TRPM8, at concentrations higher than those required to activate TRPV1. Finally, 9(S)-HODE elevated [Ca(2+) ](i) in DRG neurons almost exclusively in capsaicin-sensitive cells but only at concentrations between 25 and 100 μM.

Conclusions and implications

The present data suggest that HODEs are less important endovanilloids than anandamide.

Linked articles

This article is part of a themed section on Cannabinoids. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2012.167.issue-8.",2012-12-01 +21693559,KINARI-Web: a server for protein rigidity analysis.,"KINARI-Web is an interactive web server for performing rigidity analysis and visually exploring rigidity properties of proteins. It also provides tools for pre-processing the input data, such as selecting relevant chains from PDB files, adding hydrogen atoms and identifying stabilizing interactions. KINARI-Web offers a quick-start option for beginners, and highly customizable features for the experienced user. Chains, residues or atoms, as well as stabilizing constraints can be selected, removed or added, and the user can designate how different chemical interactions should be modeled during rigidity analysis. The enhanced Jmol-based visualizer allows for zooming in, highlighting or investigating different calculated rigidity properties of a molecular structure. KINARI-Web is freely available at http://kinari.cs.umass.edu.",2011-06-21 +22251421,Integrative role of neuropeptides and cytokines in cancer anorexia-cachexia syndrome.,"BACKGROUND: The cachexia anorexia syndrome is a complex metabolic syndrome associated with cancer and some other palliative conditions characterized by involuntary weight loss involving fat and muscle, weight loss, anorexia, early satiety, fatigue, weakness due to shifts in metabolism caused by tumour by-products and cytokines. Various neuropeptides like Leptin, neuropeptide Y, melanocortin, agouti-related peptides have been known to regulate appetite and body weight. METHOD: A comprehensive literature search was carried out on the websites of Pubmed Central (http://www.pubmedcentral.nih.gov/), National Library of Medicine (http://www.ncbl.nlm.nih.gov) and various other net resources. RESULT: Data from observational studies shows that various cytokines (TNF-α, IL-6 and IL-1) are associated with metabolic changes resulting in cachexia in cancer patients. These cytokines may mimic the action of various neuropeptides resulting in anorexia, various metabolic effects resulting from enhanced catabolic state and weight loss. CONCLUSION: There is a need to understand and explore the role of various neuropeptides and cytokines in the pathophysiology of cancer-anorexia syndrome so that therapeutic measures may be designed for effective palliative care.",2012-01-11 +21864300,Sonographic assessment of arterial frequency and distribution within the brachial plexus: a comparison with the cadaveric record.,"We investigated the number and distribution of arteries within the brachial plexus territory using a portable ultrasound device, and compared these findings with known cadaveric data. We recruited 200 volunteers and carried out 400 brachial plexus examinations in a prospective observational study design. We identified arteries within the brachial plexus in more than 90% of subjects. Most of these were located in the upper and middle zones of the plexus and therefore lie within the possible path of a block needle. These findings correlate well with previous cadaveric studies, suggesting that arteries within the brachial plexus territory can be reliably identified with a portable ultrasound device. The presence of these vessels may impact upon the safety and efficacy of brachial plexus blockade. Routine pre-procedural sonographic assessment may offer improved safety and efficacy. You can respond to this article at http://www.anaesthesiacorrespondence.com.",2011-08-22 +21442443,"PPI_SVM: prediction of protein-protein interactions using machine learning, domain-domain affinities and frequency tables.","Protein-protein interactions (PPI) control most of the biological processes in a living cell. In order to fully understand protein functions, a knowledge of protein-protein interactions is necessary. Prediction of PPI is challenging, especially when the three-dimensional structure of interacting partners is not known. Recently, a novel prediction method was proposed by exploiting physical interactions of constituent domains. We propose here a novel knowledge-based prediction method, namely PPI_SVM, which predicts interactions between two protein sequences by exploiting their domain information. We trained a two-class support vector machine on the benchmarking set of pairs of interacting proteins extracted from the Database of Interacting Proteins (DIP). The method considers all possible combinations of constituent domains between two protein sequences, unlike most of the existing approaches. Moreover, it deals with both single-domain proteins and multi domain proteins; therefore it can be applied to the whole proteome in high-throughput studies. Our machine learning classifier, following a brainstorming approach, achieves accuracy of 86%, with specificity of 95%, and sensitivity of 75%, which are better results than most previous methods that sacrifice recall values in order to boost the overall precision. Our method has on average better sensitivity combined with good selectivity on the benchmarking dataset. The PPI_SVM source code, train/test datasets and supplementary files are available freely in the public domain at: http://code.google.com/p/cmater-bioinfo/.",2011-03-20 +22820203,Discriminative local subspaces in gene expression data for effective gene function prediction.,"

Motivation

Massive amounts of genome-wide gene expression data have become available, motivating the development of computational approaches that leverage this information to predict gene function. Among successful approaches, supervised machine learning methods, such as Support Vector Machines (SVMs), have shown superior prediction accuracy. However, these methods lack the simple biological intuition provided by co-expression networks (CNs), limiting their practical usefulness.

Results

In this work, we present Discriminative Local Subspaces (DLS), a novel method that combines supervised machine learning and co-expression techniques with the goal of systematically predict genes involved in specific biological processes of interest. Unlike traditional CNs, DLS uses the knowledge available in Gene Ontology (GO) to generate informative training sets that guide the discovery of expression signatures: expression patterns that are discriminative for genes involved in the biological process of interest. By linking genes co-expressed with these signatures, DLS is able to construct a discriminative CN that links both, known and previously uncharacterized genes, for the selected biological process. This article focuses on the algorithm behind DLS and shows its predictive power using an Arabidopsis thaliana dataset and a representative set of 101 GO terms from the Biological Process Ontology. Our results show that DLS has a superior average accuracy than both SVMs and CNs. Thus, DLS is able to provide the prediction accuracy of supervised learning methods while maintaining the intuitive understanding of CNs.

Availability

A MATLAB® implementation of DLS is available at http://virtualplant.bio.puc.cl/cgi-bin/Lab/tools.cgi.",2012-07-20 +27466777,QSAR Modelling of Rat Acute Toxicity on the Basis of PASS Prediction.,"The method for QSAR modelling of rat acute toxicity based on the combination of QNA (Quantitative Neighbourhoods of Atoms) descriptors, PASS (Prediction of Activity Spectra for Substances) predictions and self-consistent regression (SCR) is presented. PASS predicted biological activity profiles are used as independent input variables for QSAR modelling with SCR. QSAR models were developed using LD50 values for compounds tested on rats with four types of administration (oral, intravenous, intraperitoneal, subcutaneous). The proposed method was evaluated on the set of compounds tested for acute rat toxicity with oral administration (7286 compounds) used for testing the known QSAR methods in T.E.S.T. 3.0 program (U.S. EPA). The several other sets of compounds tested for acute rat toxicity by different routes of administration selected from SYMYX MDL Toxicity Database were used too. The method was compared with the results of prediction of acute rodent toxicity for noncongeneric sets obtained by ACD/Labs Inc. The test sets were predicted with regards to the applicability domain. Comparison of accuracy for QSAR models obtained separately using QNA descriptors, PASS predictions, nearest neighbours' assessment with consensus models clearly demonstrated the benefits of consensus prediction. Free available web-service for prediction of LD50 values of rat acute toxicity was developed: http://www.pharmaexpert.ru/GUSAR/AcuToxPredict/.",2011-03-18 +21410407,GRIPDB - G protein coupled Receptor Interaction Partners DataBase.,"The G protein Coupled Receptor (GPCR) superfamily is one of the most important pharmaceutical targets. Studies of GPCRs have long been performed under the assumption that GPCRs function as monomers. However, recent studies have revealed that many GPCRs function as homo- and/or hetero-dimers or higher-order oligomeric molecular complexes. As a result, information about GPCR oligomerization is rapidly accumulating, although the molecular mechanisms of oligomerization are not fully understood. A comprehensive collection of information about oligomerization would accelerate investigations of the molecular mechanisms of GPCRs' oligomerization and involvement in signaling. Hence, we have developed a database, G protein coupled Receptor Interaction Partners DataBase (GRIPDB), which provides information about GPCR oligomerization. The entries in the database are divided into two sections: (I) Experiment Information section and (II) Prediction Information section. The Experiment Information section contains (I-i) experimentally indentified GPCR oligomers and their annotations, and (I-ii) experimentally suggested interfaces for the oligomerization. Since the number of experimentally suggested interfaces is limited, the entries in the Prediction Information section have been introduced to provide information about the oligomerization interfaces predicted by our computational method. The experimentally suggested or computationally predicted interfaces are displayed by 3D graphics, using GPCRs with available coordinates. The information in the GRIPDB, especially that about the interfaces, is useful to investigate the molecular mechanisms of signal transduction via GPCR oligomerization. The GRIPDB is available on the web at the following URL: http://grip.cbrc.jp/GDB/index.html .",2011-03-17 +22689783,Recognition models to predict DNA-binding specificities of homeodomain proteins.,"

Motivation

Recognition models for protein-DNA interactions, which allow the prediction of specificity for a DNA-binding domain based only on its sequence or the alteration of specificity through rational design, have long been a goal of computational biology. There has been some progress in constructing useful models, especially for C(2)H(2) zinc finger proteins, but it remains a challenging problem with ample room for improvement. For most families of transcription factors the best available methods utilize k-nearest neighbor (KNN) algorithms to make specificity predictions based on the average of the specificities of the k most similar proteins with defined specificities. Homeodomain (HD) proteins are the second most abundant family of transcription factors, after zinc fingers, in most metazoan genomes, and as a consequence an effective recognition model for this family would facilitate predictive models of many transcriptional regulatory networks within these genomes.

Results

Using extensive experimental data, we have tested several machine learning approaches and find that both support vector machines and random forests (RFs) can produce recognition models for HD proteins that are significant improvements over KNN-based methods. Cross-validation analyses show that the resulting models are capable of predicting specificities with high accuracy. We have produced a web-based prediction tool, PreMoTF (Predicted Motifs for Transcription Factors) (http://stormo.wustl.edu/PreMoTF), for predicting position frequency matrices from protein sequence using a RF-based model.",2012-06-01 +21421552,SplamiR--prediction of spliced miRNAs in plants.,"

Motivation

MicroRNAs (miRNAs) are important regulators of biological processes in plants and animals. Recently, miRNA genes have been discovered, whose primary transcripts are spliced and which cannot be predicted directly from genomic sequence. Hence, more sophisticated programs for the detection of spliced miRNAs are required.

Results

Here, we present the first method for the prediction of spliced miRNAs in plants. For a given genomic sequence, SplamiR creates a database of complementary sequence pairs, which might encode for RNAs folding into stem-loop structures. Next, in silico splice variants of database sequences with complementarity to an mRNA of interest are classified as to whether they could represent miRNAs targeting this mRNA. Our method identifies all known cases of spliced miRNAs in rice, and a previously undiscovered miRNA in maize which is supported by an expressed sequence tag (EST). SplamiR permits identification of spliced miRNAs for a given target mRNA in many plant genomes.

Availability

The program is freely available at http://www.uni-jena.de/SplamiR.html.",2011-03-17 +21414988,Compounds In Literature (CIL): screening for compounds and relatives in PubMed.,"

Summary

Searching for certain compounds in literature can be an elaborate task, with many compounds having several different synonyms. Often, only the structure is known but not its name. Furthermore, rarely investigated compounds may not be described in the available literature at all. In such cases, preceding searches for described similar compounds facilitate literature mining. Highlighted names of proteins in selected texts may further accelerate the time-consuming process of literary research. Compounds In Literature (CIL) provides a web interface to automatically find names, structures, and similar structures in over 28 million compounds of PubChem and more than 18 million citations provided by the PubMed service. CIL's pre-calculated database contains more than 56 million parent compound-abstract relations. Found compounds, relatives and abstracts are related to proteins in a concise 'heat map'-like overview. Compounds and proteins are highlighted in their respective abstracts, and are provided with links to PubChem and UniProt.

Availability

An easy-to-use web interface with detailed descriptions, help and statistics is available from http://cil.pharmaceutical-bioinformatics.de.

Contact

stefan.guenther@pharmazie.uni-freiburg.de.",2011-03-16 +23173870,Reduced expression of microRNA-100 confers unfavorable prognosis in patients with bladder cancer.,"

Objective

MicroRNA-100 (miR-100) has been demonstrated to be downregulated in bladder cancer tissues, and enforced expression of this miRNA may inhibit cell growth and colony formation of human bladder cancer 5637 cells in vitro. However, the clinical significance of miR-100 in human bladder cancer has not yet been elucidated. Thus, the aim of this study was to investigate the diagnostic and prognostic values of miR-100 in this disease.

Methods

Expression levels of miR-100 in 126 pairs of bladder cancer and adjacent normal tissues were detected by TaqMan real-time quantitative RT-PCR assay. In order to determine its prognostic value, overall survival (OS) and progression-free survival (PFS) were evaluated using the Kaplan-Meier method, and multivariate analysis was performed using the Cox proportional hazard analysis.

Results

Expression levels of miR-100 in bladder cancer tissues were significantly lower than those in adjacent normal tissues (mean expression level: 2.6 ± 1.2 vs. 3.9 ± 1.5, P < 0.001). When categorized into low vs. high expression, low miR-100 expression was negatively associated with the stage (P = 0.01), the recurrence (P = 0.008), the progression (P = 0.01), and the death (P < 0.001) of patients with bladder cancer. Moreover, low miR-100 expression clearly predicted poorer PFS (P = 0.001) and OS (P < 0.001). In the multivariate analysis, low miR-100 expression was an independent prognostic factor for both PFS (P = 0.01) and OS (P = 0.008).

Conclusion

Our data offer the convincing evidence that miR-100 may play an important role in the progression of bladder cancer and that the reduced expression of this miRNA may be independently associated with shorter PFS and OS of patients, suggesting that miR-100 might be a potential marker for further risk stratification in the treatment of this cancer.

Virtual slides

The virtual slides' for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1105483419841671.",2012-11-22 +22810503,RadRAT: a radiation risk assessment tool for lifetime cancer risk projection.,"Risk projection methods allow for timely assessment of the potential magnitude of radiation-related cancer risks following low-dose radiation exposures. The estimation of such risks directly through observational studies would generally require infeasibly large studies and long-term follow-up to achieve reasonable statistical power. We developed an online radiation risk assessment tool (RadRAT) which can be used to estimate the lifetime risk of radiation-related cancer with uncertainty intervals following a user-specified exposure history (https://irep.nci.nih.gov/radrat). The uncertainty intervals constitute a key component of the program because of the various assumptions that are involved in such calculations. The risk models used in RadRAT are broadly based on those developed by the BEIR VII committee for estimating lifetime risk following low-dose radiation exposure of the US population for eleven site-specific cancers. We developed new risk models for seven additional cancer sites, oral, oesophagus, gallbladder, pancreas, rectum, kidney and brain/central nervous system (CNS) cancers, using data from Japanese atomic bomb survivors. The lifetime risk estimates are slightly higher for RadRAT than for BEIR VII across all exposure ages mostly because the weighting of the excess relative risk and excess absolute risk models was conducted on an arithmetic rather than a logarithmic scale. The calculator can be used to estimate lifetime cancer risk from both uniform and non-uniform doses that are acute or chronic. It is most appropriate for low-LET radiation doses < 1 Gy, and for individuals with life-expectancy and cancer rates similar to the general population in the US.",2012-07-19 +22303342,SOAPsplice: Genome-Wide ab initio Detection of Splice Junctions from RNA-Seq Data.,"RNA-Seq, a method using next generation sequencing technologies to sequence the transcriptome, facilitates genome-wide analysis of splice junction sites. In this paper, we introduce SOAPsplice, a robust tool to detect splice junctions using RNA-Seq data without using any information of known splice junctions. SOAPsplice uses a novel two-step approach consisting of first identifying as many reasonable splice junction candidates as possible, and then, filtering the false positives with two effective filtering strategies. In both simulated and real datasets, SOAPsplice is able to detect many reliable splice junctions with low false positive rate. The improvement gained by SOAPsplice, when compared to other existing tools, becomes more obvious when the depth of sequencing is low. SOAPsplice is freely available at http://soap.genomics.org.cn/soapsplice.html.",2011-07-07 +21411447,Using computational predictions to improve literature-based Gene Ontology annotations: a feasibility study.,"Annotation using Gene Ontology (GO) terms is one of the most important ways in which biological information about specific gene products can be expressed in a searchable, computable form that may be compared across genomes and organisms. Because literature-based GO annotations are often used to propagate functional predictions between related proteins, their accuracy is critically important. We present a strategy that employs a comparison of literature-based annotations with computational predictions to identify and prioritize genes whose annotations need review. Using this method, we show that comparison of manually assigned 'unknown' annotations in the Saccharomyces Genome Database (SGD) with InterPro-based predictions can identify annotations that need to be updated. A survey of literature-based annotations and computational predictions made by the Gene Ontology Annotation (GOA) project at the European Bioinformatics Institute (EBI) across several other databases shows that this comparison strategy could be used to maintain and improve the quality of GO annotations for other organisms besides yeast. The survey also shows that although GOA-assigned predictions are the most comprehensive source of functional information for many genomes, a large proportion of genes in a variety of different organisms entirely lack these predictions but do have manual annotations. This underscores the critical need for manually performed, literature-based curation to provide functional information about genes that are outside the scope of widely used computational methods. Thus, the combination of manual and computational methods is essential to provide the most accurate and complete functional annotation of a genome. Database URL: http://www.yeastgenome.org.",2011-03-15 +22647057,Detection and correction of probe-level artefacts on microarrays.,"

Background

A recent large-scale analysis of Gene Expression Omnibus (GEO) data found frequent evidence for spatial defects in a substantial fraction of Affymetrix microarrays in the GEO. Nevertheless, in contrast to quality assessment, artefact detection is not widely used in standard gene expression analysis pipelines. Furthermore, although approaches have been proposed to detect diverse types of spatial noise on arrays, the correction of these artefacts is mostly left to either summarization methods or the corresponding arrays are completely discarded.

Results

We show that state-of-the-art robust summarization procedures are vulnerable to artefacts on arrays and cannot appropriately correct for these. To address this problem, we present a simple approach to detect artefacts with high recall and precision, which we further improve by taking into account the spatial layout of arrays. Finally, we propose two correction methods for these artefacts that either substitute values of defective probes using probeset information or filter corrupted probes. We show that our approach can identify and correct defective probe measurements appropriately and outperforms existing tools.

Conclusions

While summarization is insufficient to correct for defective probes, this problem can be addressed in a straightforward way by the methods we present for identification and correction of defective probes. As these methods output CEL files with corrected probe values that serve as input to standard normalization and summarization procedures, they can be easily integrated into existing microarray analysis pipelines as an additional pre-processing step. An R package is freely available from http://www.bio.ifi.lmu.de/artefact-correction.",2012-05-30 +21266443,An efficient hierarchical generalized linear mixed model for pathway analysis of genome-wide association studies.,"

Motivation

In genome-wide association studies (GWAS) of complex diseases, genetic variants having real but weak associations often fail to be detected at the stringent genome-wide significance level. Pathway analysis, which tests disease association with combined association signals from a group of variants in the same pathway, has become increasingly popular. However, because of the complexities in genetic data and the large sample sizes in typical GWAS, pathway analysis remains to be challenging. We propose a new statistical model for pathway analysis of GWAS. This model includes a fixed effects component that models mean disease association for a group of genes, and a random effects component that models how each gene's association with disease varies about the gene group mean, thus belongs to the class of mixed effects models.

Results

The proposed model is computationally efficient and uses only summary statistics. In addition, it corrects for the presence of overlapping genes and linkage disequilibrium (LD). Via simulated and real GWAS data, we showed our model improved power over currently available pathway analysis methods while preserving type I error rate. Furthermore, using the WTCCC Type 1 Diabetes (T1D) dataset, we demonstrated mixed model analysis identified meaningful biological processes that agreed well with previous reports on T1D. Therefore, the proposed methodology provides an efficient statistical modeling framework for systems analysis of GWAS.

Availability

The software code for mixed models analysis is freely available at http://biostat.mc.vanderbilt.edu/LilyWang.",2011-01-25 +22084009,Using binding profiles to predict binding sites of target RNAs.,"Prediction of RNA-RNA interaction is a key to elucidating possible functions of small non-coding RNAs, and a number of computational methods have been proposed to analyze interacting RNA secondary structures. In this article, we focus on predicting binding sites of target RNAs that are expected to interact with regulatory antisense RNAs in a general form of interaction. For this purpose, we propose bistaRNA, a novel method for predicting multiple binding sites of target RNAs. bistaRNA employs binding profiles that represent scores for hybridized structures, leading to reducing the computational cost for interaction prediction. bistaRNA considers an ensemble of equilibrium interacting structures and seeks to maximize expected accuracy using dynamic programming. Experimental results on real interaction data validate good accuracy and fast computation time of bistaRNA as compared with several competitive methods. Moreover, we aim to find new targets given specific antisense RNAs, which provides interesting insights into antisense RNA regulation. bistaRNA is implemented in C++. The program and Supplementary Material are available at http://rna.naist.jp/program/bistarna/.",2011-12-01 +21398670,Identification and quantification of metabolites in (1)H NMR spectra by Bayesian model selection.,"

Motivation

Nuclear magnetic resonance (NMR) spectroscopy is widely used for high-throughput characterization of metabolites in complex biological mixtures. However, accurate interpretation of the spectra in terms of identities and abundances of metabolites can be challenging, in particular in crowded regions with heavy peak overlap. Although a number of computational approaches for this task have recently been proposed, they are not entirely satisfactory in either accuracy or extent of automation.

Results

We introduce a probabilistic approach Bayesian Quantification (BQuant), for fully automated database-based identification and quantification of metabolites in local regions of (1)H NMR spectra. The approach represents the spectra as mixtures of reference profiles from a database, and infers the identities and the abundances of metabolites by Bayesian model selection. We show using a simulated dataset, a spike-in experiment and a metabolomic investigation of plasma samples that BQuant outperforms the available automated alternatives in accuracy for both identification and quantification.

Availability

The R package BQuant is available at: http://www.stat.purdue.edu/~ovitek/BQuant-Web/.",2011-03-12 +22642116,[A fast algorithm to build a supertree with a set of gene trees].,"Important desired properties of an algorithm to construct a supertree (species tree) by reconciling input trees are its low complexity and applicability to large biological data. In its common statement the problem is proved to be NP-hard, i.e. to have an exponential complexity in practice. We propose a reformulation of the supertree building problem that allows a computationally effective solution. We introduce a biologically natural requirement that the supertree is sought for such that it does not contain clades incompatible with those existing in the input trees. The algorithm was tested with simulated and biological trees and was shown to possess an almost square complexity even if horizontal transfers are allowed. If HGTs are not assumed, the algorithm is mathematically correct and possesses the longest running time of n3 x[V0]3, where n is the number of input trees and [V0] is the total number of species. The authors are unaware of analogous solutions in published evidence. The corresponding inferring program, its usage examples and manual are freely available at http://lab6.iitp.ru/en/super3gl. The available program does not implement HGTs. The generalized case is described in the publication ""A tree nearest in average to a set of trees"" (Information Transmission Problems, 2011).",2012-01-01 +21389154,"Automated identification of medically important bacteria by 16S rRNA gene sequencing using a novel comprehensive database, 16SpathDB.","Despite the increasing use of 16S rRNA gene sequencing, interpretation of 16S rRNA gene sequence results is one of the most difficult problems faced by clinical microbiologists and technicians. To overcome the problems we encountered in the existing databases during 16S rRNA gene sequence interpretation, we built a comprehensive database, 16SpathDB (http://147.8.74.24/16SpathDB) based on the 16S rRNA gene sequences of all medically important bacteria listed in the Manual of Clinical Microbiology and evaluated its use for automated identification of these bacteria. Among 91 nonduplicated bacterial isolates collected in our clinical microbiology laboratory, 71 (78%) were reported by 16SpathDB as a single bacterial species having >98.0% nucleotide identity with the query sequence, 19 (20.9%) were reported as more than one bacterial species having >98.0% nucleotide identity with the query sequence, and 1 (1.1%) was reported as no match. For the 71 bacterial isolates reported as a single bacterial species, all results were identical to their true identities as determined by a polyphasic approach. For the 19 bacterial isolates reported as more than one bacterial species, all results contained their true identities as determined by a polyphasic approach and all of them had their true identities as the ""best match in 16SpathDB."" For the isolate (Gordonibacter pamelaeae) reported as no match, the bacterium has never been reported to be associated with human disease and was not included in the Manual of Clinical Microbiology. 16SpathDB is an automated, user-friendly, efficient, accurate, and regularly updated database for 16S rRNA gene sequence interpretation in clinical microbiology laboratories.",2011-03-09 +23056405,Predicting the functional effect of amino acid substitutions and indels.,"As next-generation sequencing projects generate massive genome-wide sequence variation data, bioinformatics tools are being developed to provide computational predictions on the functional effects of sequence variations and narrow down the search of casual variants for disease phenotypes. Different classes of sequence variations at the nucleotide level are involved in human diseases, including substitutions, insertions, deletions, frameshifts, and non-sense mutations. Frameshifts and non-sense mutations are likely to cause a negative effect on protein function. Existing prediction tools primarily focus on studying the deleterious effects of single amino acid substitutions through examining amino acid conservation at the position of interest among related sequences, an approach that is not directly applicable to insertions or deletions. Here, we introduce a versatile alignment-based score as a new metric to predict the damaging effects of variations not limited to single amino acid substitutions but also in-frame insertions, deletions, and multiple amino acid substitutions. This alignment-based score measures the change in sequence similarity of a query sequence to a protein sequence homolog before and after the introduction of an amino acid variation to the query sequence. Our results showed that the scoring scheme performs well in separating disease-associated variants (n = 21,662) from common polymorphisms (n = 37,022) for UniProt human protein variations, and also in separating deleterious variants (n = 15,179) from neutral variants (n = 17,891) for UniProt non-human protein variations. In our approach, the area under the receiver operating characteristic curve (AUC) for the human and non-human protein variation datasets is ∼0.85. We also observed that the alignment-based score correlates with the deleteriousness of a sequence variation. In summary, we have developed a new algorithm, PROVEAN (Protein Variation Effect Analyzer), which provides a generalized approach to predict the functional effects of protein sequence variations including single or multiple amino acid substitutions, and in-frame insertions and deletions. The PROVEAN tool is available online at http://provean.jcvi.org.",2012-10-08 +21320865,Improving SNP discovery by base alignment quality.,"

Unlabelled

I propose a new application of profile Hidden Markov Models in the area of SNP discovery from resequencing data, to greatly reduce false SNP calls caused by misalignments around insertions and deletions (indels). The central concept is per-Base Alignment Quality, which accurately measures the probability of a read base being wrongly aligned. The effectiveness of BAQ has been positively confirmed on large datasets by the 1000 Genomes Project analysis subgroup.

Availability

http://samtools.sourceforge.net

Contact

hengli@broadinstitute.org.",2011-02-13 +21309043,Prediction of single-nucleotide substitutions that result in exon skipping: identification of a splicing silencer in BRCA1 exon 6.,"Missense, nonsense, and translationally silent mutations can inactivate genes by altering the inclusion of mutant exons in mRNA, but their overall frequency among disease-causing exonic substitutions is unknown. Here, we have tested missense and silent mutations deposited in the BRCA1 mutation databases of unclassified variants for their effects on exon inclusion. Analysis of 21 BRCA1 variants using minigene assays revealed a single exon-skipping mutation c.231G>T. Comprehensive mutagenesis of an adjacent 12-nt segment showed that this silent mutation resulted in a higher level of exon skipping than the 35 other single-nucleotide substitutions. Exon inclusion levels of mutant constructs correlated significantly with predicted splicing enhancers/silencers, prompting the development of two online utilities freely available at http://www.dbass.org.uk. EX-SKIP quickly estimates which allele is more susceptible to exon skipping, whereas HOT-SKIP examines all possible mutations at each exon position and identifies candidate exon-skipping positions/substitutions. We demonstrate that the distribution of exon-skipping and disease-associated substitutions previously identified in coding regions was biased toward top-ranking HOT-SKIP mutations. Finally, we show that proteins 9G8, SC35, SF2/ASF, Tra2, and hnRNP A1 were associated with significant alterations of BRCA1 exon 6 inclusion in the mRNA. Together, these results facilitate prediction of exonic substitutions that reduce exon inclusion in mature transcripts.",2011-03-08 +22296788,WegoLoc: accurate prediction of protein subcellular localization using weighted Gene Ontology terms.,"

Summary

We present an accurate and fast web server, WegoLoc for predicting subcellular localization of proteins based on sequence similarity and weighted Gene Ontology (GO) information. A term weighting method in the text categorization process is applied to GO terms for a support vector machine classifier. As a result, WegoLoc surpasses the state-of-the-art methods for previously used test datasets. WegoLoc supports three eukaryotic kingdoms (animals, fungi and plants) and provides human-specific analysis, and covers several sets of cellular locations. In addition, WegoLoc provides (i) multiple possible localizations of input protein(s) as well as their corresponding probability scores, (ii) weights of GO terms representing the contribution of each GO term in the prediction, and (iii) a BLAST E-value for the best hit with GO terms. If the similarity score does not meet a given threshold, an amino acid composition-based prediction is applied as a backup method.

Availability

WegoLoc and User's guide are freely available at the website http://www.btool.org/WegoLoc

Contact

smchiks@ks.ac.kr; dougnam@unist.ac.kr

Supplementary information

Supplementary data is available at http://www.btool.org/WegoLoc.",2012-01-31 +23244338,Searching for transcription factor binding sites in vector spaces.,"

Background

Computational approaches to transcription factor binding site identification have been actively researched in the past decade. Learning from known binding sites, new binding sites of a transcription factor in unannotated sequences can be identified. A number of search methods have been introduced over the years. However, one can rarely find one single method that performs the best on all the transcription factors. Instead, to identify the best method for a particular transcription factor, one usually has to compare a handful of methods. Hence, it is highly desirable for a method to perform automatic optimization for individual transcription factors.

Results

We proposed to search for transcription factor binding sites in vector spaces. This framework allows us to identify the best method for each individual transcription factor. We further introduced two novel methods, the negative-to-positive vector (NPV) and optimal discriminating vector (ODV) methods, to construct query vectors to search for binding sites in vector spaces. Extensive cross-validation experiments showed that the proposed methods significantly outperformed the ungapped likelihood under positional background method, a state-of-the-art method, and the widely-used position-specific scoring matrix method. We further demonstrated that motif subtypes of a TF can be readily identified in this framework and two variants called the k NPV and k ODV methods benefited significantly from motif subtype identification. Finally, independent validation on ChIP-seq data showed that the ODV and NPV methods significantly outperformed the other compared methods.

Conclusions

We conclude that the proposed framework is highly flexible. It enables the two novel methods to automatically identify a TF-specific subspace to search for binding sites. Implementations are available as source code at: http://biogrid.engr.uconn.edu/tfbs_search/.",2012-08-27 +22654892,IMGT-ONTOLOGY 2012.,"Immunogenetics is the science that studies the genetics of the immune system and immune responses. Owing to the complexity and diversity of the immune repertoire, immunogenetics represents one of the greatest challenges for data interpretation: a large biological expertise, a considerable effort of standardization and the elaboration of an efficient system for the management of the related knowledge were required. IMGT®, the international ImMunoGeneTics information system® (http://www.imgt.org) has reached that goal through the building of a unique ontology, IMGT-ONTOLOGY, which represents the first ontology for the formal representation of knowledge in immunogenetics and immunoinformatics. IMGT-ONTOLOGY manages the immunogenetics knowledge through diverse facets that rely on the seven axioms of the Formal IMGT-ONTOLOGY or IMGT-Kaleidoscope: ""IDENTIFICATION,"" ""DESCRIPTION,"" ""CLASSIFICATION,"" ""NUMEROTATION,"" ""LOCALIZATION,"" ""ORIENTATION,"" and ""OBTENTION."" The concepts of identification, description, classification, and numerotation generated from the axioms led to the elaboration of the IMGT(®) standards that constitute the IMGT Scientific chart: IMGT®standardized keywords (concepts of identification), IMGT® standardized labels (concepts of description), IMGT® standardized gene and allele nomenclature (concepts of classification) and IMGT unique numbering and IMGT Collier de Perles (concepts of numerotation). IMGT-ONTOLOGY has become the global reference in immunogenetics and immunoinformatics for the knowledge representation of immunoglobulins (IG) or antibodies, T cell receptors (TR), and major histocompatibility (MH) proteins of humans and other vertebrates, proteins of the immunoglobulin superfamily (IgSF) and MH superfamily (MhSF), related proteins of the immune system (RPI) of vertebrates and invertebrates, therapeutic monoclonal antibodies (mAbs), fusion proteins for immune applications (FPIA), and composite proteins for clinical applications (CPCA).",2012-05-23 +21251949,Activity profiles of 309 ToxCast™ chemicals evaluated across 292 biochemical targets.,"Understanding the potential health risks posed by environmental chemicals is a significant challenge elevated by the large number of diverse chemicals with generally uncharacterized exposures, mechanisms, and toxicities. The present study is a performance evaluation and critical analysis of assay results for an array of 292 high-throughput cell-free assays aimed at preliminary toxicity evaluation of 320 environmental chemicals in EPA's ToxCast™ project (Phase I). The chemicals (309 unique, 11 replicates) were mainly precursors or the active agent of commercial pesticides, for which a wealth of in vivo toxicity data is available. Biochemical HTS (high-throughput screening) profiled cell and tissue extracts using semi-automated biochemical and pharmacological methodologies to evaluate a subset of G-protein coupled receptors (GPCRs), CYP450 enzymes (CYPs), kinases, phosphatases, proteases, HDACs, nuclear receptors, ion channels, and transporters. The primary screen tested all chemicals at a relatively high concentration 25 μM concentration (or 10 μM for CYP assays), and a secondary screen re-tested 9132 chemical-assay pairs in 8-point concentration series from 0.023 to 50 μM (or 0.009-20 μM for CYPs). Mapping relationships across 93,440 chemical-assay pairs based on half-maximal activity concentration (AC50) revealed both known and novel targets in signaling and metabolic pathways. The primary dataset, summary data and details on quality control checks are available for download at http://www.epa.gov/ncct/toxcast/.",2011-01-18 +21497713,"Overexpression of OsRDCP1, a rice RING domain-containing E3 ubiquitin ligase, increased tolerance to drought stress in rice (Oryza sativa L.).","CaRma1H1 was previously identified as a hot pepper drought-induced RING E3 Ub ligase. We have identified five putative proteins that display a significant sequence identity with CaRma1H1 in the rice genome database (http://signal.salk.edu/cgi-bin/RiceGE). These five rice paralogs possess a single RING motif in their N-terminal regions, consistent with the notion that RING proteins are encoded by a multi-gene family. Therefore, these proteins were named OsRDCPs (Oryza sativa RING domain-containing proteins). Among these paralogs, OsRDCP1 was induced by drought stress, whereas the other OsRDCP members were constitutively expressed, with OsRDCP4 transcripts expressed at the highest level in rice seedlings. osrdcp1 loss-of-function knockout mutant and OsRDCP1-overexpressing transgenic rice plants were developed. Phenotypic analysis showed that wild-type plants and the homozygous osrdcp1 G2 mutant line displayed similar phenotypes under normal growth conditions and in response to drought stress. This may be due to complementation by other OsRDCP paralogs. In contrast, 35S:OsRDCP1 T2 transgenic rice plants exhibited improved tolerance to severe water deficits. Although the physiological function of OsRDCP1 remains unclear, there are several possible mechanisms for its involvement in a subset of physiological responses to counteract dehydration stress in rice plants.",2011-03-04 +22923300,RIsearch: fast RNA-RNA interaction search using a simplified nearest-neighbor energy model.,"

Motivation

Regulatory, non-coding RNAs often function by forming a duplex with other RNAs. It is therefore of interest to predict putative RNA-RNA duplexes in silico on a genome-wide scale. Current computational methods for predicting these interactions range from fast complementary-based searches to those that take intramolecular binding into account. Together these methods constitute a trade-off between speed and accuracy, while leaving room for improvement within the context of genome-wide screens. A fast pre-filtering of putative duplexes would therefore be desirable.

Results

We present RIsearch, an implementation of a simplified Turner energy model for fast computation of hybridization, which significantly reduces runtime while maintaining accuracy. Its time complexity for sequences of lengths m and n is with a much smaller pre-factor than other tools. We show that this energy model is an accurate approximation of the full energy model for near-complementary RNA-RNA duplexes. RIsearch uses a Smith-Waterman-like algorithm using a dinucleotide scoring matrix which approximates the Turner nearest-neighbor energies. We show in benchmarks that we achieve a speed improvement of at least 2.4× compared with RNAplex, the currently fastest method for searching near-complementary regions. RIsearch shows a prediction accuracy similar to RNAplex on two datasets of known bacterial short RNA (sRNA)-messenger RNA (mRNA) and eukaryotic microRNA (miRNA)-mRNA interactions. Using RIsearch as a pre-filter in genome-wide screens reduces the number of binding site candidates reported by miRNA target prediction programs, such as TargetScanS and miRanda, by up to 70%. Likewise, substantial filtering was performed on bacterial RNA-RNA interaction data.

Availability

The source code for RIsearch is available at: http://rth.dk/resources/risearch.",2012-08-24 +21252076,Length bias correction for RNA-seq data in gene set analyses.,"

Motivation

Next-generation sequencing technologies are being rapidly applied to quantifying transcripts (RNA-seq). However, due to the unique properties of the RNA-seq data, the differential expression of longer transcripts is more likely to be identified than that of shorter transcripts with the same effect size. This bias complicates the downstream gene set analysis (GSA) because the methods for GSA previously developed for microarray data are based on the assumption that genes with same effect size have equal probability (power) to be identified as significantly differentially expressed. Since transcript length is not related to gene expression, adjusting for such length dependency in GSA becomes necessary.

Results

In this article, we proposed two approaches for transcript-length adjustment for analyses based on Poisson models: (i) At individual gene level, we adjusted each gene's test statistic using the square root of transcript length followed by testing for gene set using the Wilcoxon rank-sum test. (ii) At gene set level, we adjusted the null distribution for the Fisher's exact test by weighting the identification probability of each gene using the square root of its transcript length. We evaluated these two approaches using simulations and a real dataset, and showed that these methods can effectively reduce the transcript-length biases. The top-ranked GO terms obtained from the proposed adjustments show more overlaps with the microarray results.

Availability

R scripts are at http://www.soph.uab.edu/Statgenetics/People/XCui/r-codes/.",2011-01-19 +21366926,Bioinformatics analysis of disordered proteins in prokaryotes.,"

Background

A significant number of proteins have been shown to be intrinsically disordered, meaning that they lack a fixed 3 D structure or contain regions that do not posses a well defined 3 D structure. It has also been proven that a protein's disorder content is related to its function. We have performed an exhaustive analysis and comparison of the disorder content of proteins from prokaryotic organisms (i.e., superkingdoms Archaea and Bacteria) with respect to functional categories they belong to, i.e., Clusters of Orthologous Groups of proteins (COGs) and groups of COGs-Cellular processes (Cp), Information storage and processing (Isp), Metabolism (Me) and Poorly characterized (Pc). We also analyzed the disorder content of proteins with respect to various genomic, metabolic and ecological characteristics of the organism they belong to. We used correlations and association rule mining in order to identify the most confident associations between specific modalities of the characteristics considered and disorder content.

Results

Bacteria are shown to have a somewhat higher level of protein disorder than archaea, except for proteins in the Me functional group. It is demonstrated that the Isp and Cp functional groups in particular (L-repair function and N-cell motility and secretion COGs of proteins in specific) possess the highest disorder content, while Me proteins, in general, posses the lowest. Disorder fractions have been confirmed to have the lowest level for the so-called order-promoting amino acids and the highest level for the so-called disorder promoters. For each pair of organism characteristics, specific modalities are identified with the maximum disorder proteins in the corresponding organisms, e.g., high genome size-high GC content organisms, facultative anaerobic-low GC content organisms, aerobic-high genome size organisms, etc. Maximum disorder in archaea is observed for high GC content-low genome size organisms, high GC content-facultative anaerobic or aquatic or mesophilic organisms, etc. Maximum disorder in bacteria is observed for high GC content-high genome size organisms, high genome size-aerobic organisms, etc. Some of the most reliable association rules mined establish relationships between high GC content and high protein disorder, medium GC content and both medium and low protein disorder, anaerobic organisms and medium protein disorder, Gammaproteobacteria and low protein disorder, etc. A web site Prokaryote Disorder Database has been designed and implemented at the address http://bioinfo.matf.bg.ac.rs/disorder, which contains complete results of the analysis of protein disorder performed for 296 prokaryotic completely sequenced genomes.

Conclusions

Exhaustive disorder analysis has been performed by functional classes of proteins, for a larger dataset of prokaryotic organisms than previously done. Results obtained are well correlated to those previously published, with some extension in the range of disorder level and clear distinction between functional classes of proteins. Wide correlation and association analysis between protein disorder and genomic and ecological characteristics has been performed for the first time. The results obtained give insight into multi-relationships among the characteristics and protein disorder. Such analysis provides for better understanding of the evolutionary process and may be useful for taxon determination. The main drawback of the approach is the fact that the disorder considered has been predicted and not experimentally established.",2011-03-02 +21465914,High altitude pulmonary oedema.,"High altitude pulmonary oedema (HAPE) is an important and preventable cause of death at high altitudes. However, little is known about the global incidence of HAPE, in part because most cases occur in remote environments where no records are kept. Furthermore, despite international efforts to achieve consensus, there is wide disparity in the diagnostic criteria in clinical and research use. We have reviewed the literature on the incidence and epidemiology of HAPE. There is broad agreement between studies that HAPE incidence at 2500m is around 0.01%, and increases to 1.9% at 3600m and 2.5-5% at 4300m. Risk factors for HAPE include rate of ascent, intensity of exercise and absolute altitude attained, although an individual pre-disposition to developing the condition is also well described and suggests an underlying genetic susceptibility. It is increasingly recognised that clinically-detectable HAPE is an extreme of a continuous spectrum of excess pulmonary fluid accumulation, which has been demonstrated in asymptomatic individuals. There is a continued need to ensure awareness of the diagnosis and treatment of HAPE among visitors to high altitude. It is likely that HAPE is preventable in all cases by progressive acclimatisation, and we advocate a pragmatic ""golden rules"" approach. Our understanding of the epidemiology and underlying genetic susceptibility to HAPE may be advanced if susceptible individuals register with the International HAPE Database: http://www.altitude.org/hape.php. HAPE has direct relevance to military training and operations and is likely to be the leading cause of death at high altitude.",2011-03-01 +30743530,First Report of Fusarium Wilt Caused by Fusarium oxysporum f. sp. canariensis on Canary Island Date Palm in Texas and South Carolina.,"Canary Island date palm (Phoenix canariensis) is native to the Canary Islands and widely grown throughout the world as an ornamental. At a home site in Austin, TX in May 2008 and a commercial site near Charleston, SC in December 2009, declining Canary Island date palms were observed. Symptoms included individual leaves with chlorotic or necrotic leaflets on one side of the leaf blade (one-sided wilt or death) and a distinct reddish brown stripe along the petiole and rachis. Cross-sections through the petiole or rachis exhibited discoloration of internal tissue. Fusarium oxysporum was isolated from the internal petiole or rachis tissue of each palm sample onto one-quarter-strength potato dextrose agar (PDA). Typical macroconidia in pale orange sporodochia, microconidia in false heads on short monophialides, and chlamydospores were observed (2). Macroconidia were mostly 3-septate, slightly curved, and ranged from 3.8 to 4.2 × 42.9 to 46.5 μm. Microconidia were single cell, oval to reniform, and ranged from 2.5 to 2.9 × 7.2 to 7.8 μm. Single-spore isolates grown on full-strength PDA (12-h light and 26°C) produced abundant white-to-pale lavender mycelia with a purple pigment in the agar. One isolate from each location (PLM-385B from Texas and PLM-511A from South Carolina) was selected for pathogenicity tests and molecular characterization. The translation elongation factor 1-α gene (EF-1α) was amplified in each isolate by PCR using the ef1 and ef2 primers (1). Products were sequenced and queried for similarity against the NCBI database and the FUSARIUM-ID database ( http://isolate.fusariumdb.org/index.php ) (1) using the BLAST search tool. In both databases, both isolates matched F. oxysporum f. sp. canariensis strain NRRL 26035 (GenBank Accession No. AF008485; FD_01211) at 100% sequence similarity. Sequences for PLM-385B and PLM-511A have been deposited in the NCBI database (GenBank Accession Nos. HM591537 and HM591538, respectively). Pathogenicity of these two isolates was tested on three-leaf Canary Island date palm seedlings. There were five replicate palms per isolate and control treatment. All potting mix was shaken from the roots and three groups of five seedlings were placed in small buckets. Twenty-five milliliters of a 106 conidia ml-1 suspension was pipetted down among the leaf bases and the excess drained onto the roots. Control palms received sterile water. Seedlings were covered with plastic for 48 h and then transplanted into separate growing containers. Ten weeks after inoculation, initial symptoms of a leaf wilt (off-color and folded over) were observed on some of the inoculated palms. After 4 months, all palms inoculated with PLM-511A were dead and three of the five palms inoculated with PLM-385B were dead. The pathogen was reisolated from diseased palms. All five control palms remained healthy. While the symptomatic palm in Texas had been in the home site approximately 2 years, which implied the palm could have been already infected when transplanted, the palm in South Carolina had been planted in 1990. To our knowledge, this is the first report of Fusarium wilt of Canary Island date palm in Texas and South Carolina. Previously in the United States, the disease had only been noted in California, Florida, and Nevada. References: (1) D. M. Geiser et al. Eur. J. Plant Pathol. 110:473, 2004. (2) J. F. Leslie and B. A. Summerell. The Fusarium Laboratory Manual. Blackwell Publishing, Ames, IA, 2006.",2011-03-01 +30743515,First Report of False Rust Caused by Synchytrium minutum on Kudzu in Korea.,"Kudzu (Pueraria montana var. lobata (Willd.) Maesen & S. Almeida) is a weedy, fabaceous vine that is native to and widely distributed in Asia where it is used for various medicinal purposes such as treating convulsions and fever (2). In the United States, especially the southeastern states, kudzu has become a problematic invasive species that overgrows nearly every substrate on which it occurs. Thus, biological control strategies for controlling this vine are of great interest (4). From October to November 2004, a disease of kudzu was observed in Gwangju and Pyeongtaek in Gyeonggi Province, Korea. The disease appeared on leaves and stems as numerous, discrete, small galls, which enlarged, becoming yellowish orange and eventually erupting into orange, pulverulent sori. Galls were scattered or gregarious, amphigenous, predominately hypophyllous, and sometimes formed along veins as well as on petioles and stems. Sori that formed from galls were solitary but sometimes became confluent, 0.1 to 1 mm in diameter, globose to subglobose, and orange to dark orange; walls were hyaline and thin. Sporangia were copious in sori, typically polyhedral due to compression or globose, 16 to 32 μm in diameter, with smooth, hyaline walls and orange contents. Zoospores were not observed during several failed attempts to germinate sporangia. On the basis of morphological descriptions and keys (3), the fungus was identified as Synchytrium minutum (Pat.) Gäum. (Chytridiomycota), the only species of Synchytrium known to occur on Pueraria (1,3). Comparison with specimens from China and New Guinea (BPI 794733 and BPI 1109528) confirmed this identification. Portions of the nLSU and nSSU rDNA from one of the two Korean specimens deposited as voucher material in the U.S. National Fungus Collections (BPI 880898 and BPI 880899) were sequenced (GenBank Accession Nos. HQ324138 and HQ324139), and a subsequent BLAST search against GenBank confirmed placement in the genus Synchytrium with 95% similarity to S. decipiens. S. minutum is widespread in Asia and Oceania and also has been reported from California (1,3). To our knowledge, this is the first report of S. minutum in Korea (1) and is noteworthy to those interested in biological control of kudzu because S. minutum may have potential in this regard. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , September, 2010. (2) H. S. Jung. M.S. thesis. Seoul National University, Seoul, Korea, 1997. (3) J. S. Karling. Synchytrium. Academic Press Inc., New York, NY, 1964. (4) M. A. Weaver et al. Biol. Control 50:150, 2009.",2011-03-01 +30743514,First Report of Puccinia thaliae Rust on Canna Lily in Louisiana.,"Canna lily is a monocot, herbaceous perennial ornamental plant in the Cannaceae that is native to tropical South America and cultivated throughout the southern United States. Canna lily is a popular garden and landscaping plant and a large horticultural industry depends on this plant. In September 2008 and again in November 2009, two species of Canna lily (Canna × generalis L.H. Bailey and C. indica L.) were found to be severely infected with rust disease in three garden locations in southern Louisiana (East Baton Rouge Parish, Lafayette Parish, and Orleans Parish). Diseased samples from both host species and all locations exhibited similar symptoms of numerous, yellowish brown, subepidermal, erumpent, and irregular-shaped uredinia on both leaf surfaces. Initially, sori were scattered, later covering the entire leaf with coalescing pustules. Urediniospores were subglobose to ovoid or pyriform, echinulate, and measured 25.74 to 37.18 (-38.61) × 17.16 to 27.17 (-28.6) μm, with thickened apical walls, 1.3 to 1.6 μm, and one to two equatorial germ pores. Telia and teliospores were not observed on any of the collected samples. Pathogen identity was confirmed as Puccinia thaliae Dietel by nuclear ribosomal large subunit (28S) DNA sequencing with rust-specific primers (1). The sequence (deposited in GenBank as No. HQ434482), when blasted, was found to match sequence No. EU851154 of P. thaliae from C. indica with 98% identity (719 of 730 bp), the differences being attributed to a single insertion at bp 423 to 436 of sequence No. EU851154. The sequences of P. thaliae obtained from two different samples from Louisiana were identical and did not match any other sequence in GenBank. In North America P. thaliae is reported to cause rust on C. indica L. in Florida and C. × generalis in Texas, as well as on two members of the Marantaceae (Maranta arundinacea L. and Thalia geniculata L.) in Florida and M. arundinaceae in Mexico (2). To our knowledge, this is the first report of P. thaliae in Louisiana on Canna lily. Voucher materials (C. × generalis = LSU00123378 and C. indica = LSU00123384) have been deposited in the Bernard Lowy Mycological Herbarium (LSUM). References: (1) M. C. Aime. Mycoscience 47:112, 2006. (2) D. F. Farr and A.Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved 12 February from http://nt.ars-grin.gov/fungaldatabases/ , 2010.",2011-03-01 +21385030,SubMAP: aligning metabolic pathways with subnetwork mappings.,"We consider the problem of aligning two metabolic pathways. Unlike traditional approaches, we do not restrict the alignment to one-to-one mappings between the molecules (nodes) of the input pathways (graphs). We follow the observation that, in nature, different organisms can perform the same or similar functions through different sets of reactions and molecules. The number and the topology of the molecules in these alternative sets often vary from one organism to another. With the motivation that an accurate biological alignment should be able to reveal these functionally similar molecule sets across different species, we develop an algorithm that first measures the similarities between different nodes using a mixture of homology and topological similarity. We combine the two metrics by employing an eigenvalue formulation. We then search for an alignment between the two input pathways that maximizes a similarity score, evaluated as the sum of the similarities of the mapped subnetworks of size at most a given integer k, and also does not contain any conflicting mappings. Here we prove that this maximization is NP-hard by a reduction from the maximum weight independent set (MWIS) problem. We then convert our problem to an instance of MWIS and use an efficient vertex-selection strategy to extract the mappings that constitute our alignment. We name our algorithm SubMAP (Subnetwork Mappings in Alignment of Pathways). We evaluate its accuracy and performance on real datasets. Our empirical results demonstrate that SubMAP can identify biologically relevant mappings that are missed by traditional alignment methods. Furthermore, we observe that SubMAP is scalable for metabolic pathways of arbitrary topology, including searching for a query pathway of size 70 against the complete KEGG database of 1,842 pathways. Implementation in C++ is available at http://bioinformatics.cise.ufl.edu/SubMAP.html.",2011-03-01 +20955172,PredCSF: an integrated feature-based approach for predicting conotoxin superfamily.,"Conotoxins are small disulfide-rich peptides that are invaluable channel-targeted peptides and target neuronal receptors. They show prospects for being potent pharmaceuticals in the treatment of Alzheimer's disease, Parkinson's disease, and epilepsy. Accurate and fast prediction of conotoxin superfamily is very helpful towards the understanding of its biological and pharmacological functions especially in the post-genomic era. In the present study, we have developed a novel approach called PredCSF for predicting the conotoxin superfamily from the amino acid sequence directly based on fusing different kinds of sequential features by using modified one-versus-rest SVMs. The input features to the PredCSF classifiers are composed of physicochemical properties, evolutionary information, predicted second structure and amino acid composition, where the most important features are further screened by random forest feature selection to improve the prediction performance. The prediction results show that PredCSF can obtain an overall accuracy of 90.65% based on a benchmark dataset constructed from the most recent database, which consists of 4 main conotoxin superfamilies and 1 class of non-conotoxin class. Systematic experiments also show that combing different features is helpful for enhancing the prediction power when dealing with complex biological problems. PredCSF is expected to be a powerful tool for in silico identification of novel conotonxins and is freely available for academic use at http://www.csbio.sjtu.edu.cn/bioinf/PredCSF.",2011-03-01 +21908542,HIV Therapy Simulator: a graphical user interface for comparing the effectiveness of novel therapy regimens.,"

Unlabelled

Computer simulation models can be useful in exploring the efficacy of HIV therapy regimens in preventing the evolution of drug-resistant viruses. Current modeling programs, however, were designed by researchers with expertise in computational biology, limiting their accessibility to those who might lack such a background. We have developed a user-friendly graphical program, HIV Therapy Simulator (HIVSIM), that is accessible to non-technical users. The program allows clinicians and researchers to explore the effectiveness of various therapeutic strategies, such as structured treatment interruptions, booster therapies and induction-maintenance therapies. We anticipate that HIVSIM will be useful for evaluating novel drug-based treatment concepts in clinical research, and as an educational tool.

Availability

HIV Therapy Simulator is freely available for Mac OS and Windows at http://sites.google.com/site/hivsimulator/.

Contact

jmittler@uw.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-09 +21361385,ReverseScreen3D: a structure-based ligand matching method to identify protein targets.,"Ligand promiscuity, which is now recognized as an extremely common phenomenon, is a major underlying cause of drug toxicity. We have developed a new reverse virtual screening (VS) method called ReverseScreen3D, which can be used to predict the potential protein targets of a query compound of interest. The method uses a 2D fingerprint-based method to select a ligand template from each unique binding site of each protein within a target database. The target database contains only the structurally determined bioactive conformations of known ligands. The 2D comparison is followed by a 3D structural comparison to the selected query ligand using a geometric matching method, in order to prioritize each target binding site in the database. We have evaluated the performance of the ReverseScreen2D and 3D methods using a diverse set of small molecule protein inhibitors known to have multiple targets, and have shown that they are able to provide a highly significant enrichment of true targets in the database. Furthermore, we have shown that the 3D structural comparison improves early enrichment when compared with the 2D method alone, and that the 3D method performs well even in the absence of 2D similarity to the template ligands. By carrying out further experimental screening on the prioritized list of targets, it may be possible to determine the potential targets of a new compound or determine the off-targets of an existing drug. The ReverseScreen3D method has been incorporated into a Web server, which is freely available at http://www.modelling.leeds.ac.uk/ReverseScreen3D .",2011-02-28 +22998891,Patterns of medication initiation in newly diagnosed diabetes mellitus: quality and cost implications.,"Currently, 25 million Americans are known to have diabetes, with an additional 7 million cases believed to be undiagnosed. It is estimated that direct and indirect costs of diabetes top $200 billion. Due to the significant health and financial burdens associated with diabetes, it is imperative that this disease be treated quickly and aggressively. In 2009, the American Diabetes Association and the European Association for the Study of Diabetes developed a consensus statement regarding the treatment of type 2 diabetes, citing lifestyle modification and metformin as the preferred first line therapies. In this study, the authors looked at prescription claims data for adults who were newly initiated on oral hypoglycemic monotherapy between January 1, 2006, and December 31, 2008, to determine if initiation patterns changed over time, to evaluate how well the treatment guidelines were being followed, and to assess the economic consequences of prescribing patterns by drug class for both patients and insurers. The results showed that over the course of the study period the proportion of patients initially treated with metformin increased, whereas those receiving sulfonylureas as first-line therapy decreased. Thiazolidinediones experienced the greatest decrease, falling from 20% to 8%, while prescriptions for dipeptidyl peptidase-4 inhibitors increase from 0-7%. Over a 6-month period, patients taking metformin or sulfonylureas paid approximately $38 to $40 in co-pays while insurance paid about $77. Patients taking other agents paid approximately $130 in co-pays and insurance paid over $500. The authors concluded that based its cost and safety profile, metformin should be the first line drug therapy for patients with newly diagnosed type 2 diabetes. This CME multimedia activity, which is part of a 2-part multimedia activity on the management and treatment of diabetes, contains a video presentation and is available through the website of The American Journal of Medicine at http://amjmed.com/content/multimedia. Click on ""Patterns of Medication Initiation in Newly Diagnosed Diabetes Mellitus: Quality and Cost Implications"" to access this part of the multimedia program.",2012-10-01 +21493659,SMETHILLIUM: spatial normalization method for Illumina infinium HumanMethylation BeadChip.,

Summary

DNA methylation is a major epigenetic modification in human cells. Illumina HumanMethylation27 BeadChip makes it possible to quantify the methylation state of 27 578 loci spanning 14 495 genes. We developed a non-parametric normalization method to correct the spatial background noise in order to improve the signal-to-noise ratio. The prediction performance of the proposed method was assessed on three fully methylated samples and three fully unmethylated DNA samples. We demonstrate that the spatial normalization outperforms BeadStudio to predict the methylation state of a given locus.

Availability and implementation

A R script and the data are available at the following address: http://bioinfo.curie.fr/projects/smethillium.,2011-04-14 +21296754,"SpaCEM3: a software for biological module detection when data is incomplete, high dimensional and dependent.","

Summary

Among classical methods for module detection, SpaCEM(3) provides ad hoc algorithms that were shown to be particularly well adapted to specific features of biological data: high-dimensionality, interactions between components (genes) and integrated treatment of missingness in observations. The software, currently in its version 2.0, is developed in C++ and can be used either via command line or with the GUI under Linux and Windows environments.

Availability

The SpaCEM(3) software, a documentation and datasets are available from http://spacem3.gforge.inria.fr/.",2011-02-03 +22095399,The SuBliMinaL Toolbox: automating steps in the reconstruction of metabolic networks.,"The generation and use of metabolic network reconstructions has increased over recent years. The development of such reconstructions has typically involved a time-consuming, manual process. Recent work has shown that steps undertaken in reconstructing such metabolic networks are amenable to automation. The SuBliMinaL Toolbox (http://www.mcisb.org/subliminal/) facilitates the reconstruction process by providing a number of independent modules to perform common tasks, such as generating draft reconstructions, determining metabolite protonation state, mass and charge balancing reactions, suggesting intracellular compartmentalisation, adding transport reactions and a biomass function, and formatting the reconstruction to be used in third-party analysis packages. The individual modules manipulate reconstructions encoded in Systems Biology Markup Language (SBML), and can be chained to generate a reconstruction pipeline, or used individually during a manual curation process. This work describes the individual modules themselves, and a study in which the modules were used to develop a metabolic reconstruction of Saccharomyces cerevisiae from the existing data resources KEGG and MetaCyc. The automatically generated reconstruction is analysed for blocked reactions, and suggestions for future improvements to the toolbox are discussed.",2011-11-18 +23016917,Can p63 serve as a biomarker for giant cell tumor of bone? A Moroccan experience.,"

Background

Multinucleated giant cell-containing tumors and pseudotumors of bone represent a heterogeneous group of benign and malignant lesions. Differential diagnosis can be challenging, particularly in instances of limited sampling. The purpose of this study was to evaluate the contribution of the P63 in the positive and differential diagnosis of giant cell tumor of bone.

Methods

This study includes 48 giant cell-containing tumors and pseudotumors of bone. P63 expression was evaluated by immunohistochemistry. Data analysis was performed using Epi-info software and SPSS software package (version 17).

Results

Immunohistochemical analysis showed a P63 nuclear expression in all giant cell tumors of bone, in 50% of osteoid osteomas, 40% of aneurysmal bone cysts, 37.5% of osteoblastomas, 33.3% of chondromyxoide fibromas, 25% of non ossifiant fibromas and 8.3% of osteosarcomas. Only one case of chondroblastoma was included in this series and expressed p63. No P63 immunoreactivity was detected in any of the cases of central giant cell granulomas or langerhans cells histiocytosis. The sensitivity and negative predictive value (NPV) of P63 immunohistochemistry for the diagnosis of giant cell tumor of bone were 100%. The specificity and positive predictive value (PPV) were 74.42% and 59.26% respectively.

Conclusions

This study found not only that GCTOB expresses the P63 but it also shows that this protein may serve as a biomarker for the differential diagnosis between two morphologically similar lesions particularly in instances of limited sampling. Indeed, P63 expression seems to differentiate between giant cell tumor of bone and central giant cell granuloma since the latter does not express P63. Other benign and malignant giant cell-containing lesions express P63, decreasing its specificity as a diagnostic marker, but a strong staining was seen, except a case of chondroblastoma, only in giant cell tumor of bone. Clinical and radiological confrontation remains essential for an accurate diagnosis.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1838562590777252.",2012-09-27 +21278185,Quality control and preprocessing of metagenomic datasets.,"

Summary

Here, we present PRINSEQ for easy and rapid quality control and data preprocessing of genomic and metagenomic datasets. Summary statistics of FASTA (and QUAL) or FASTQ files are generated in tabular and graphical form and sequences can be filtered, reformatted and trimmed by a variety of options to improve downstream analysis.

Availability and implementation

This open-source application was implemented in Perl and can be used as a stand alone version or accessed online through a user-friendly web interface. The source code, user help and additional information are available at http://prinseq.sourceforge.net/.",2011-01-28 +32313364,The Supramap project: linking pathogen genomes with geography to fight emergent infectious diseases.,"Novel pathogens have the potential to become critical issues of national security, public health and economic welfare. As demonstrated by the response to Severe Acute Respiratory Syndrome (SARS) and influenza, genomic sequencing has become an important method for diagnosing agents of infectious disease. Despite the value of genomic sequences in characterizing novel pathogens, raw data on their own do not provide the information needed by public health officials and researchers. One must integrate knowledge of the genomes of pathogens with host biology and geography to understand the etiology of epidemics. To these ends, we have created an application called Supramap (http://supramap.osu.edu) to put information on the spread of pathogens and key mutations across time, space and various hosts into a geographic information system (GIS). To build this application, we created a web service for integrated sequence alignment and phylogenetic analysis as well as methods to describe the tree, mutations, and host shifts in Keyhole Markup Language (KML). We apply the application to 239 sequences of the polymerase basic 2 (PB2) gene of recent isolates of avian influenza (H5N1). We map a mutation, glutamic acid to lysine at position 627 in the PB2 protein (E627K), in H5N1 influenza that allows for increased replication of the virus in mammals. We use a statistical test to support the hypothesis of a correlation of E627K mutations with avian-mammalian host shifts but reject the hypothesis that lineages with E627K are moving westward. Data, instructions for use, and visualizations are included as supplemental materials at: http://supramap.osu.edu/sm/supramap/publications. © The Willi Hennig Society 2010.",2011-01-04 +22080468,BadiRate: estimating family turnover rates by likelihood-based methods.,"

Motivation

The comparative analysis of gene gain and loss rates is critical for understanding the role of natural selection and adaptation in shaping gene family sizes. Studying complete genome data from closely related species allows accurate estimation of gene family turnover rates. Current methods and software tools, however, are not well designed for dealing with certain kinds of functional elements, such as microRNAs or transcription factor binding sites.

Results

Here, we describe BadiRate, a new software tool to estimate family turnover rates, as well as the number of elements in internal phylogenetic nodes, by likelihood-based methods and parsimony. It implements two stochastic population models, which provide the appropriate statistical framework for testing hypothesis, such as lineage-specific gene family expansions or contractions. We have assessed the accuracy of BadiRate by computer simulations, and have also illustrated its functionality by analyzing a representative empirical dataset.

Availability

BadiRate software and documentation is available from http://www.ub.edu/softevol/badirate.",2011-11-10 +21726364,Pseudomonas syringae pv. phaseolicola: from 'has bean' to supermodel.,"Pseudomonas syringae pv. phaseolicola causes halo blight of the common bean, Phaseolus vulgaris, worldwide and remains difficult to control. Races of the pathogen cause either disease symptoms or a resistant hypersensitive response on a series of differentially reacting bean cultivars. The molecular genetics of the interaction between P. syringae pv. phaseolicola and bean, and the evolution of bacterial virulence, have been investigated in depth and this research has led to important discoveries in the field of plant-microbe interactions. In this review, we discuss several of the areas of study that chart the rise of P. syringae pv. phaseolicola from a common pathogen of bean plants to a molecular plant-pathogen supermodel bacterium.

Taxonomy

Bacteria; Proteobacteria, gamma subdivision; order Pseudomonadales; family Pseudomonadaceae; genus Pseudomonas; species Pseudomonas syringae; Genomospecies 2; pathogenic variety phaseolicola.

Microbiological properties

Gram-negative, aerobic, motile, rod-shaped, 1.5 µm long, 0.7-1.2 µm in diameter, at least one polar flagellum, optimal temperatures for growth of 25-30°C, oxidase negative, arginine dihydrolase negative, levan positive and elicits the hypersensitive response on tobacco.

Host range

Major bacterial disease of common bean (Phaseolus vulgaris) in temperate regions and above medium altitudes in the tropics. Natural infections have been recorded on several other legume species, including all members of the tribe Phaseoleae with the exception of Desmodium spp. and Pisum sativum.

Disease symptoms

Water-soaked lesions on leaves, pods, stems or petioles, that quickly develop greenish-yellow haloes on leaves at temperatures of less than 23°C. Infected seeds may be symptomless, or have wrinkled or buttery-yellow patches on the seed coat. Seedling infection is recognized by general chlorosis, stunting and distortion of growth.

Epidemiology

Seed borne and disseminated from exudation by water-splash and wind occurring during rainfall. Bacteria invade through wounds and natural openings (notably stomata). Weedy and cultivated alternative hosts may also harbour the bacterium.

Disease control

Some measure of control is achieved with copper formulations and streptomycin. Pathogen-free seed and resistant cultivars are recommended.

Useful websites

Pseudomonas-plant interaction http://www.pseudomonas-syringae.org/; PseudoDB http://xbase.bham.ac.uk/pseudodb/; Plant Associated and Environmental Microbes Database (PAMDB) http://genome.ppws.vt.edu/cgi-bin/MLST/home.pl; PseudoMLSA Database http://www.uib.es/microbiologiaBD/Welcome.html.",2011-02-17 +22784580,"miR2Gene: pattern discovery of single gene, multiple genes, and pathways by enrichment analysis of their microRNA regulators.","

Background

In recent years, a number of tools have been developed to explore microRNAs (miRNAs) by analyzing their target genes. However, a reverse problem, that is, inferring patterns of protein-coding genes through their miRNA regulators, has not been explored. As various miRNA annotation data become available, exploring gene patterns by analyzing the prior knowledge of their miRNA regulators is becoming more feasible.

Results

In this study, we developed a tool, miR2Gene, for this purpose. Various sets of miRNAs, according to prior rules such as function, associated disease, tissue specificity, family, and cluster, were integrated with miR2Gene. For given genes, miR2Gene evaluates the enrichment of the predicted miRNAs that regulate them in each miRNA set. This tool can be used for single genes, multiple genes, and KEGG pathways. For the KEGG pathway, genes with enriched miRNA sets are highlighted according to various rules. We confirmed the usefulness of miR2Gene through case studies.

Conclusions

miR2Gene represents a novel and useful tool that integrates miRNA knowledge for protein-coding gene analysis. miR2Gene is freely available at http://cmbi.hsc.pku.edu.cn/mir2gene.",2011-12-14 +21330290,FIMO: scanning for occurrences of a given motif.,"

Unlabelled

A motif is a short DNA or protein sequence that contributes to the biological function of the sequence in which it resides. Over the past several decades, many computational methods have been described for identifying, characterizing and searching with sequence motifs. Critical to nearly any motif-based sequence analysis pipeline is the ability to scan a sequence database for occurrences of a given motif described by a position-specific frequency matrix.

Results

We describe Find Individual Motif Occurrences (FIMO), a software tool for scanning DNA or protein sequences with motifs described as position-specific scoring matrices. The program computes a log-likelihood ratio score for each position in a given sequence database, uses established dynamic programming methods to convert this score to a P-value and then applies false discovery rate analysis to estimate a q-value for each position in the given sequence. FIMO provides output in a variety of formats, including HTML, XML and several Santa Cruz Genome Browser formats. The program is efficient, allowing for the scanning of DNA sequences at a rate of 3.5 Mb/s on a single CPU.

Availability and implementation

FIMO is part of the MEME Suite software toolkit. A web server and source code are available at http://meme.sdsc.edu.",2011-02-16 +22556366,Analysis of case-control association studies with known risk variants.,"

Motivation

The question of how to best use information from known associated variants when conducting disease association studies has yet to be answered. Some studies compute a marginal P-value for each Several Nucleotide Polymorphisms independently, ignoring previously discovered variants. Other studies include known variants as covariates in logistic regression, but a weakness of this standard conditioning strategy is that it does not account for disease prevalence and non-random ascertainment, which can induce a correlation structure between candidate variants and known associated variants even if the variants lie on different chromosomes. Here, we propose a new conditioning approach, which is based in part on the classical technique of liability threshold modeling. Roughly, this method estimates model parameters for each known variant while accounting for the published disease prevalence from the epidemiological literature.

Results

We show via simulation and application to empirical datasets that our approach outperforms both the no conditioning strategy and the standard conditioning strategy, with a properly controlled false-positive rate. Furthermore, in multiple data sets involving diseases of low prevalence, standard conditioning produces a severe drop in test statistics whereas our approach generally performs as well or better than no conditioning. Our approach may substantially improve disease gene discovery for diseases with many known risk variants.

Availability

LTSOFT software is available online http://www.hsph.harvard.edu/faculty/alkes-price/software/.",2012-05-03 +21646339,PileLineGUI: a desktop environment for handling genome position files in next-generation sequencing studies.,"Next-generation sequencing (NGS) technologies are making sequence data available on an unprecedented scale. In this context, new catalogs of Single Nucleotide Polymorphism and mutations generated by resequencing studies are usually stored in genome position files (e.g. Variant Call Format, SAMTools pileup, BED, GFF) comprising of large lists of genomic positions, which are difficult to handle by researchers. Here, we present PileLineGUI, a novel desktop application primarily designed for manipulating, browsing and analysing genome position files (GPF), with specific support to somatic mutation finding studies. The developed tool also integrates a new genome browser module specially designed for inspecting GPFs. PileLineGUI is free, multiplatform and designed to be intuitively used by biomedical researchers. PileLineGUI is available at: http://sing.ei.uvigo.es/pileline/pilelinegui.html.",2011-06-06 +21943898,Systems biology of recombinant protein production using Bacillus megaterium.,"The Gram-negative bacterium Escherichia coli is the most widely used production host for recombinant proteins in both academia and industry. The Gram-positive bacterium Bacillus megaterium represents an increasingly used alternative for high yield intra- and extracellular protein synthesis. During the past two decades, multiple tools including gene expression plasmids and production strains have been developed. Introduction of free replicating and integrative plasmids into B. megaterium is possible via protoplasts transformation or transconjugation. Using His(6)- and StrepII affinity tags, the intra- or extracellular produced proteins can easily be purified in one-step procedures. Different gene expression systems based on the xylose controlled promoter P(xylA) and various phage RNA polymerase (T7, SP6, K1E) driven systems enable B. megaterium to produce up to 1.25g of recombinant protein per liter. Biomass concentrations of up to 80g/l can be achieved by high cell density cultivations in bioreactors. Gene knockouts and gene replacements in B. megaterium are possible via an optimized gene disruption system. For a safe application in industry, sporulation and protease-deficient as well as UV-sensitive mutants are available. With the help of the recently published B. megaterium genome sequence, it is possible to characterize bottle necks in the protein production process via systems biology approaches based on transcriptome, proteome, metabolome, and fluxome data. The bioinformatical platform (Megabac, http://www.megabac.tu-bs.de) integrates obtained theoretical and experimental data.",2011-01-01 +21873640,Fast and accurate prediction of protein side-chain conformations.,"

Summary

We developed a fast and accurate side-chain modeling program [Optimized Side Chain Atomic eneRgy (OSCAR)-star] based on orientation-dependent energy functions and a rigid rotamer model. The average computing time was 18 s per protein for 218 test proteins with higher prediction accuracy (1.1% increase for χ(1) and 0.8% increase for χ(1+2)) than the best performing program developed by other groups. We show that the energy functions, which were calibrated to tolerate the discrete errors of rigid rotamers, are appropriate for protein loop selection, especially for decoys without extensive structural refinement.

Availability

OSCAR-star and the 218 test proteins are available for download at http://sysimm.ifrec.osaka-u.ac.jp/OSCAR CONTACT: standley@ifrec.osaka-u.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-27 +21682142,MAPS: an interactive web server for membrane annotation of transmembrane protein structures.,"The exact positioning of the membrane in transmembrane (TM) proteins plays important functional roles. Yet, the structures of TM proteins in protein data bank (pdb) have no information about the explicit position of the membrane. Using a simple hydrophobic lipid-protein mismatch energy function and a flexible lipid/water boundary, the position of lipid bilayer for representative TM proteins in pdb have been annotated. A web server called MAPS (Membrane Annotation of Protein Structures; available at: http://www.boseinst.ernet.in/gautam/maps) has been set up that allows the user to interactively analyze membrane-protein orientations of any uploaded pdb structure with user-defined membrane flexibility parameters.",2011-04-01 +22427840,β-sheet topology prediction with high precision and recall for β and mixed α/β proteins.,"The prediction of the correct β-sheet topology for pure β and mixed α/β proteins is a critical intermediate step toward the three dimensional protein structure prediction. The predicted beta sheet topology provides distance constraints between sequentially separated residues, which reduces the three dimensional search space for a protein structure prediction algorithm. Here, we present a novel mixed integer linear optimization based framework for the prediction of β-sheet topology in β and mixed α/β proteins. The objective is to maximize the total strand-to-strand contact potential of the protein. A large number of physical constraints are applied to provide biologically meaningful topology results. The formulation permits the creation of a rank-ordered list of preferred β-sheet arrangements. Finally, the generated topologies are re-ranked using a fully atomistic approach involving torsion angle dynamics and clustering. For a large, non-redundant data set of 2102 β and mixed α/β proteins with at least 3 strands taken from the PDB, the proposed approach provides the top 5 solutions with average precision and recall greater than 78%. Consistent results are obtained in the β-sheet topology prediction for blind targets provided during the CASP8 and CASP9 experiments, as well as for actual and predicted secondary structures. The β-sheet topology prediction algorithm, BeST, is available to the scientific community at http://selene.princeton.edu/BeST/.",2012-03-09 +23175547,Iliac vein compression as risk factor for left- versus right-sided deep venous thrombosis: case-control study.,"

Purpose

To determine if compression of the left common iliac vein (LCIV) by the right common iliac artery is associated with left-sided deep venous thrombosis (DVT).

Materials and methods

This institutional review board-approved case-control study was performed in a cohort of 230 consecutive patients (94 men, 136 women; mean age, 57.5 years; range, 10-94 years) at one institution who had undergone contrast material-enhanced computed tomography of the pelvis prior to a diagnosis of unilateral DVT. Demographic data and information on risk factors were collected. Two board-certified radiologists determined iliac vein compression by using quantitative measures of percentage compression {[1 minus (LCIV diameter at point of maximal compression/distal right common iliac vein diameter)] times 100%}, as well as qualitative measures (none, mild, moderate, severe), with estimates of measurement variability. Logistic regression analysis was performed (independent variable, left vs right DVT; dependent variable, iliac vein compression). Cutpoints of relevant compression were evaluated by using splines. Means (with 95% confidence intervals [CIs]) and odds ratios (ORs) (and 95% CIs) of left DVT per 1% increase in percentage compression were calculated.

Results

Patients with right DVT were more likely than those with left DVT to have a history of pulmonary embolism. Overall, in all study patients, mean percentage compression was 36.6%, 66 (29.7%) of 222 had greater than 50% compression, and 16 (7.2%) had greater than 70% compression. At most levels of compression, increasing compression was not associated with left DVT (adjusted ORs, 1.00, 0.99, 1.02) but above 70%, LCIV compression may be associated with left DVT (adjusted ORs, 3.03, 0.91, 10.15).

Conclusion

Increasing levels of percentage compression were not associated with left-sided DVT up to 70%; however, greater than 70% compression may be associated with left DVT.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12111580/-/DC1.",2012-12-01 +30722246,First Report of Garlic common latent virus Infecting Garlic in Sudan.,"Garlic (Allium sativum L.) is one of the most important vegetable field crops in Sudan, cultivated on an area of more than 6,000 ha with a total yield of 27,000 t in 2010 (faostat.fao.org). As part of a project which started in 2010 to improve the garlic production in Sudan, samples from local varieties showing severe mosaic and/or mottling were collected in winter 2011 from the main production areas in River Nile State, Northern State, and Darfur State. The plant material used for garlic production came from Sudan and was not imported. Because no reliable data were available on which viruses occur in garlic in Sudan, specific tests were initially omitted. In order to get an overview of the viruses present, dsRNA was prepared of a mixed leaf sample (12 leaves of different samples). This resulted in a high molecular weight dsRNA of approximately 9 kbp that served as template for a random RT-PCR followed by cloning and sequencing (3). Three identical clones originating from one PCR product covering the C-terminal part of the coat protein to the N-terminal part of the nucleic acid binding protein showed the highest sequence similarity to Garlic common latent virus (GarCLV). The nucleotide sequence identities of the 554-bp insert range from 85% to an isolate from India (Accession No. FJ154841) up to 97% to a GarCLV isolate from The Netherlands (AB004804), identifying the virus as a Sudanese isolate of GarCLV, one of the most common garlic infecting viruses. GarCLV belongs to the genus Carlavirus (1) and has previously been reported from Asia, Europe, and South America ( http://sdb.im.ac.cn/vide/descr352.htm ). In order to confirm these results, a double antibody sandwich (DAS)-ELISA was performed with six individual garlic samples in which five samples showed a clear reaction with a GarCLV specific antiserum (AS-0230, DSMZ, Germany). The occurrence of GarCLV could be further confirmed for the ELISA positive samples by a specific RT-PCR using the primers published by Majumder and Baranwal (2). Fragments of the expected size were obtained for all five samples. In addition, one of the positive samples was examined by electron microscopy (Dr. K. Richert-Pöggeler, JKI Braunschweig); filamentous flexous particles typical for carlaviruses could be observed. The random RT-PCR sequence obtained in this study has been submitted to GenBank (KC013030). To our knowledge, this is the first report of GarCLV in garlic in Sudan and Africa. The impact of GarCLV on garlic production in Sudan needs to be evaluated, but the awareness of the occurrence of the virus and the availability of a reliable diagnostic tool will help to select virus-free propagation material. This will form the basis for a sustainable garlic production. References: (1) A. M. Q. King et al. Virus Taxonomy 924, 2012. (2) S. Majumder and V. K. Baranwal. Plant Dis. 93:106, 2009. (3) W. Menzel et al. Arch. Virol. 154:1343, 2009.",2013-04-01 +22142146,Comparison of methods for calculating conditional expectations of sufficient statistics for continuous time Markov chains.,"

Background

Continuous time Markov chains (CTMCs) is a widely used model for describing the evolution of DNA sequences on the nucleotide, amino acid or codon level. The sufficient statistics for CTMCs are the time spent in a state and the number of changes between any two states. In applications past evolutionary events (exact times and types of changes) are unaccessible and the past must be inferred from DNA sequence data observed in the present.

Results

We describe and implement three algorithms for computing linear combinations of expected values of the sufficient statistics, conditioned on the end-points of the chain, and compare their performance with respect to accuracy and running time. The first algorithm is based on an eigenvalue decomposition of the rate matrix (EVD), the second on uniformization (UNI), and the third on integrals of matrix exponentials (EXPM). The implementation in R of the algorithms is available at http://www.birc.au.dk/~paula/.

Conclusions

We use two different models to analyze the accuracy and eight experiments to investigate the speed of the three algorithms. We find that they have similar accuracy and that EXPM is the slowest method. Furthermore we find that UNI is usually faster than EVD.",2011-12-05 +21539309,LocaPep: localization of epitopes on protein surfaces using peptides from phage display libraries.,"The use of peptides from a phage display library selected by binding to a given antibody is a widespread technique to probe epitopes of antigenic proteins. However, the identification of interaction sites mimicked by these peptides on the antigen surface is a difficult task. LocaPep is a computer program developed to localize epitopes using a new clusters algorithm that focuses on protein surface properties. The program is constructed with the aim of providing a flexible computational tool for predicting the location of epitopes on protein structures. As a first set of testing results, the localization of epitope regions in eight different antigenic proteins for which experimental data on their antibody interactions exist is correctly identified by LocaPep. These results represent a disparate sample of biologically different systems. The program is freely available at http://atenea.montes.upm.es.",2011-05-11 +21303863,GeneTUKit: a software for document-level gene normalization.,"

Motivation

Linking gene mentions in an article to entries of biological databases can facilitate indexing and querying biological literature greatly. Due to the high ambiguity of gene names, this task is particularly challenging. Manual annotation for this task is cost expensive, time consuming and labor intensive. Therefore, providing assistive tools to facilitate the task is of high value.

Results

We developed GeneTUKit, a document-level gene normalization software for full-text articles. This software employs both local context surrounding gene mentions and global context from the whole full-text document. It can normalize genes of different species simultaneously. When participating in BioCreAtIvE III, the system obtained good results among 37 runs: the system was ranked first, fourth and seventh in terms of TAP-20, TAP-10 and TAP-5, respectively on the 507 full-text test articles.

Availability and implementation

The software is available at http://www.qanswers.net/GeneTUKit/.",2011-02-08 +21541042,"FurinDB: A database of 20-residue furin cleavage site motifs, substrates and their associated drugs.","FurinDB (freely available online at http://www.nuolan.net/substrates.html) is a database of furin substrates. This database includes experimentally verified furin cleavage sites, substrates, species, experimental methods, original publications of experiments and associated drugs targeting furin substrates. The current database release contains 126 furin cleavage sites from three species: mammals, bacteria and viruses. A main feature of this database is that all furin cleavage sites are recorded as a 20-residue motif, including one core region (eight amino acids, P6-P2') and two flanking solvent accessible regions (eight amino acids, P7-P14, and four amino acids, P3'-P6'), that represent our current understanding of the molecular biology of furin cleavage. This database is important for understanding the molecular evolution and relationships between sequence motifs, 3D structures, cellular functions and physical properties required by furin for cleavage, and for elucidating the molecular mechanisms and the progression of furin cleavage associated human diseases, including pathogenic infections, neurological disorders, tumorigenesis, tumor invasion, angiogenesis, and metastasis. FurinDB database will be a solid addition to the publicly available infrastructure for scientists in the field of molecular biology.",2011-02-08 +22241974,Joint modelling of confounding factors and prominent genetic regulators provides increased accuracy in genetical genomics studies.,"Expression quantitative trait loci (eQTL) studies are an integral tool to investigate the genetic component of gene expression variation. A major challenge in the analysis of such studies are hidden confounding factors, such as unobserved covariates or unknown subtle environmental perturbations. These factors can induce a pronounced artifactual correlation structure in the expression profiles, which may create spurious false associations or mask real genetic association signals. Here, we report PANAMA (Probabilistic ANAlysis of genoMic dAta), a novel probabilistic model to account for confounding factors within an eQTL analysis. In contrast to previous methods, PANAMA learns hidden factors jointly with the effect of prominent genetic regulators. As a result, this new model can more accurately distinguish true genetic association signals from confounding variation. We applied our model and compared it to existing methods on different datasets and biological systems. PANAMA consistently performs better than alternative methods, and finds in particular substantially more trans regulators. Importantly, our approach not only identifies a greater number of associations, but also yields hits that are biologically more plausible and can be better reproduced between independent studies. A software implementation of PANAMA is freely available online at http://ml.sheffield.ac.uk/qtl/.",2012-01-05 +21795811,A conformation-dependent stereochemical library improves crystallographic refinement even at atomic resolution.,"To utilize a new conformation-dependent backbone-geometry library (CDL) in protein refinements at atomic resolution, a script was written that creates a restraint file for the SHELXL refinement program. It was found that the use of this library allows models to be created that have a substantially better fit to main-chain bond angles and lengths without degrading their fit to the X-ray data even at resolutions near 1 Å. For models at much higher resolution (∼0.7 Å), the refined model for parts adopting single well occupied positions is largely independent of the restraints used, but these structures still showed much smaller r.m.s.d. residuals when assessed with the CDL. Examination of the refinement tests across a wide resolution range from 2.4 to 0.65 Å revealed consistent behavior supporting the use of the CDL as a next-generation restraint library to improve refinement. CDL restraints can be generated using the service at http://pgd.science.oregonstate.edu/cdl_shelxl/.",2011-07-12 +22558382,Linking proteins to signaling pathways for experiment design and evaluation.,"Biomedical experimental work often focuses on altering the functions of selected proteins. These changes can hit signaling pathways, and can therefore unexpectedly and non-specifically affect cellular processes. We propose PathwayLinker, an online tool that can provide a first estimate of the possible signaling effects of such changes, e.g., drug or microRNA treatments. PathwayLinker minimizes the users' efforts by integrating protein-protein interaction and signaling pathway data from several sources with statistical significance tests and clear visualization. We demonstrate through three case studies that the developed tool can point out unexpected signaling bias in normal laboratory experiments and identify likely novel signaling proteins among the interactors of known drug targets. In our first case study we show that knockdown of the Caenorhabditis elegans gene cdc-25.1 (meant to avoid progeny) may globally affect the signaling system and unexpectedly bias experiments. In the second case study we evaluate the loss-of-function phenotypes of a less known C. elegans gene to predict its function. In the third case study we analyze GJA1, an anti-cancer drug target protein in human, and predict for this protein novel signaling pathway memberships, which may be sources of side effects. Compared to similar services, a major advantage of PathwayLinker is that it drastically reduces the necessary amount of manual literature searches and can be used without a computational background. PathwayLinker is available at http://PathwayLinker.org. Detailed documentation and source code are available at the website.",2012-04-27 +23352299,[Greenlight(®) photoselective vaporisation for benign prostatic hyperplasia: a systematic review].,"

Introduction

Transurethral resection of the prostate (TURP) is the most common surgical procedure in urology and remains the gold standard treatment of complicated benign prostatic hyperplasia or refractory to medical treatment. Routinely used since the 2000s, prostate photoselective vaporization (PVP) with Greenlight(®) laser has been developed to improve the safety of hemostasis in elderly patients and/or with high surgical risk. The purpose of this study was to review the results of PVP from the international literature.

Material and methods

[corrected] A systematic review of the literature on the research base Pubmed (http://www.ncbi.nlm.nih.gov/) was performed using the keywords benign prostatic hyperplasia; greenlight; photovaporisation; Laser; IPSS score; endoscopicsurgery; morbidity; complication. Prospective and retrospective studies in English and French were selected from its first use in 1998. Finally, we looked for studies that reported at least one of the following items: surgical technique; operative data; complications; anatomical and functional results and/or direct comparison between PVP and TURP.

Results

Regardless the PVP technique used to treat adenoma and identify the limits of the prostatic capsule, some parameters are well defined (sweepspeed, angle and distance of the fiber with the tissue) but others are still debated (number of joules per volume, when do we have to stop the PVP) and are reported in a heterogeneous manner due to the different generators. Versus TURP, PVP would offer the same functional results in the medium term but with a lower risk of per- and postoperative bleeding. The study of the risk of erectile dysfunction (ED) after PVP is made difficult due to the heterogeneity of DE assessment and study populations. However, PVP does not seem associated with an increased risk of ED versus TURP. The lack of histological material should lead to preoperative individual screening of prostate. The economy generated by PVP regarding the decrease in average length of stay has been clearly identified in Australia, Canada, Switzerland and USA. Studies will be published soon on French economic model.

Conclusion

PVP with Greenlight(®) laser appears to be a safe and effective technique. With the new generator XPS, the PVP technique reaches maturity. Its development will certainly lead to a long-term evaluation with high levels of evidence based.",2012-11-26 +22719993,Improving disease gene prioritization by comparing the semantic similarity of phenotypes in mice with those of human diseases.,"Despite considerable progress in understanding the molecular origins of hereditary human diseases, the molecular basis of several thousand genetic diseases still remains unknown. High-throughput phenotype studies are underway to systematically assess the phenotype outcome of targeted mutations in model organisms. Thus, comparing the similarity between experimentally identified phenotypes and the phenotypes associated with human diseases can be used to suggest causal genes underlying a disease. In this manuscript, we present a method for disease gene prioritization based on comparing phenotypes of mouse models with those of human diseases. For this purpose, either human disease phenotypes are ""translated"" into a mouse-based representation (using the Mammalian Phenotype Ontology), or mouse phenotypes are ""translated"" into a human-based representation (using the Human Phenotype Ontology). We apply a measure of semantic similarity and rank experimentally identified phenotypes in mice with respect to their phenotypic similarity to human diseases. Our method is evaluated on manually curated and experimentally verified gene-disease associations for human and for mouse. We evaluate our approach using a Receiver Operating Characteristic (ROC) analysis and obtain an area under the ROC curve of up to . Furthermore, we are able to confirm previous results that the Vax1 gene is involved in Septo-Optic Dysplasia and suggest Gdf6 and Marcks as further potential candidates. Our method significantly outperforms previous phenotype-based approaches of prioritizing gene-disease associations. To enable the adaption of our method to the analysis of other phenotype data, our software and prioritization results are freely available under a BSD licence at http://code.google.com/p/phenomeblast/wiki/CAMP. Furthermore, our method has been integrated in PhenomeNET and the results can be explored using the PhenomeBrowser at http://phenomebrowser.net.",2012-06-14 +22297131,Caipirini: using gene sets to rank literature.,"

Background

Keeping up-to-date with bioscience literature is becoming increasingly challenging. Several recent methods help meet this challenge by allowing literature search to be launched based on lists of abstracts that the user judges to be 'interesting'. Some methods go further by allowing the user to provide a second input set of 'uninteresting' abstracts; these two input sets are then used to search and rank literature by relevance. In this work we present the service 'Caipirini' (http://caipirini.org) that also allows two input sets, but takes the novel approach of allowing ranking of literature based on one or more sets of genes.

Results

To evaluate the usefulness of Caipirini, we used two test cases, one related to the human cell cycle, and a second related to disease defense mechanisms in Arabidopsis thaliana. In both cases, the new method achieved high precision in finding literature related to the biological mechanisms underlying the input data sets.

Conclusions

To our knowledge Caipirini is the first service enabling literature search directly based on biological relevance to gene sets; thus, Caipirini gives the research community a new way to unlock hidden knowledge from gene sets derived via high-throughput experiments.",2012-02-01 +22316280,Allometric or lean body mass scaling of propofol pharmacokinetics: towards simplifying parameter sets for target-controlled infusions.,"Uncertainty exists as to the most suitable pharmacokinetic parameter sets for propofol target-controlled infusions (TCI). The pharmacokinetic parameter sets currently employed are clearly not universally applicable, particularly when patient attributes differ from those of the subjects who participated in the original research from which the models were derived. Increasing evidence indicates that the pharmacokinetic parameters of propofol can be scaled allometrically as well as in direct proportion to lean body mass (LBM). Appraisal of hitherto published studies suggests that an allometrically scaled pharmacokinetic parameter set may be applicable to a wide range of patients ranging from children to obese adults. On the other hand, there is evidence that propofol pharmacokinetic parameters, scaled linearly to LBM, provide improved dosing in normal and obese adults. The 'Schnider' pharmacokinetic parameter set that has been programmed into commercially available TCI pumps cannot be employed at present for morbidly obese patients (body mass index >40 kg/m2), because of anomalous behaviour of the equation used to calculate LBM, resulting in administration of excessive amounts of propofol. Simulations of TCI using improved equations to calculate LBM indicate that the Schnider model delivers similar amounts of propofol to morbidly obese patients as do the allometrically scaled pharmacokinetic parameter sets. These hypotheses deserve further investigation. To facilitate further investigation, researchers are encouraged to make their data freely available to the WorldSIVA Open TCI Initiative (http://opentci.org).",2012-03-01 +23531149,Striking reduction of amyloid plaque burden in an Alzheimer's mouse model after chronic administration of carmustine.,"

Background

Currently available therapies for Alzheimer's disease (AD) do not treat the underlying cause of AD. Anecdotal observations in nursing homes from multiple studies strongly suggest an inverse relationship between cancer and AD. Therefore, we reasoned that oncology drugs may be effective against AD.

Methods

We screened a library of all the FDA-approved oncology drugs and identified bis-chloroethylnitrosourea (BCNU or carmustine) as an effective amyloid beta (Aβ) reducing compound. To quantify Aβ levels, Chinese hamster ovary (CHO) cells stably expressing amyloid precursor protein 751WT (APP751WT) called 7WD10 cells were exposed to different concentrations of BCNU for 48 hours and the conditioned media were collected. To detect Aβ the conditioned media were immunoprecipitated with Ab9 antibody and subjected to immunoblot detection. Amyloid plaques were quantified in the brains of a mouse model of AD after chronic exposure to BCNU by thoflavin S staining.

Results

BCNU decreased normalized levels of Aβ starting from 5 μM by 39% (P < 0.05), 10 μM by 51% (P < 0.01) and 20 μM by 63% (P < 0.01) in CHO cells compared to a control group treated with butyl amine, a structural derivative of BCNU. Interestingly, soluble amyloid precursor protein α (sAPPα) levels were increased to 167% (P < 0.01) at 0.5 μM, 186% (P < 0.05) at 1 μM, 204% (P < 0.01) at 5 μM and 152% (P < 0.05) at 10 μM compared to untreated cells. We also tested the effects of 12 structural derivatives of BCNU on Aβ levels, but none of them were as potent as BCNU. BCNU treatment at 5 μM led to an accumulation of immature APP at the cell surface resulting in an increased ratio of surface to total APP by 184% for immature APP, but no change in mature APP. It is also remarkable that BCNU reduced Aβ generation independent of secretases which were not altered up to 40 μM. Interestingly, levels of transforming growth factor beta (TGFβ) were increased at 5 μM (43%, P < 0.05), 10 μM (73%, P < 0.01) and 20 μM (92%, P < 0.001). Most significantly, cell culture results were confirmed in vivo after chronic administration of BCNU at 0.5 mg/kg which led to the reduction of Aβ40 by 75% and amyloid plaque burden by 81%. Conversely, the levels of sAPPα were increased by 45%.

Conclusions

BCNU reduces Aβ generation and plaque burden at non-toxic concentrations possibly through altered intracellular trafficking and processing of APP. Taken together these data provided unequivocal evidence that BCNU is a potent secretase-sparing anti-Aβ drug. See related commentary article here http://www.biomedcentral.com/1741-7015/11/82.",2013-03-26 +30743441,First Report of Nigrospora sphaerica Causing Leaf Spots on Chinese Wisteria: A New Host of the Pathogen.,"Chinese wisteria, Wisteria sinensis (Sims) DC., is a woody, twining vine and is commonly cultivated as an ornamental for its foliage and striking, drooping racemes of white, pink, or lavender sweet pea-like flower. Distinct leaf spots were observed in several gardens, retail nurseries, and parks located in Hatay Province since May 2009. The primary infection zones are frequently observed on the leaf margins and apices, brown, up to 2 mm in diameter, and often surrounded by a yellow zone. Foliar symptoms are characterized by grayish, round, semicircular or irregular-shaped, numerous spots (up to 9 mm in diameter) with dark brown borders and the appearance of black, granular structure within the dead leaf tissues. A fungus was consistently isolated from symptomatic tissues on potato dextrose agar (PDA). Fungal colonies were initially white, becoming light to dark gray with the onset of sporulation with black, spherical to subspherical single-celled conidia (15 to 18 × 12 to 15 μm), which were borne on a hyaline vesicle at the tip of the conidiophore. These characteristics agree with published descriptions of Nigrospora sphaerica (Sacc.) E.W. Mason 1927 (1,3). To fulfill Koch's postulates, a conidial suspension (106 conidia per ml) collected from PDA cultures was used to spray inoculate leaves of potted 3-year-old Chinese wisteria plants. Inoculated plants were kept for 48 h in polyethylene bags and maintained in a controlled environment chamber at 20°C with a 12-h photoperiod. The bags were removed after 3 days. In addition, five 3-year-old plants were sprayed with sterile water to serve as controls. After 14 to 20 days, inoculated leaves showed infection symptoms similar to those observed on naturally infected leaves with N. sphaerica. The pathogen was reisolated from the margins of necrotic tissues, but not from the controls. Although N. sphaerica is frequently encountered as a secondary invader or as a saprophyte on many plant species, this fungal agent is also known as a leaf pathogen on several hosts worldwide (2,4). To our knowledge, this is the first report of N. sphaerica as a leaf pathogen of Chinese wisteria in Turkey or worldwide. References: (1) M. B. Ellis. Dematiaceous Hyphomycetes. CMI, Kew, Surrey, UK, 1971. (2) D. F. Farr and A. Y. Rossman. Fungal Databases. Systematic Mycology and Microbiology Laboratory. Online publication. ARS, USDA. Retrieved 28 October from http://nt.ars-grin.gov/fungaldatabases/ , 2010. (3) P. M. Kirk. IMI Descr. Fungi Bact. 106:1056, 1991. (4) E. R. Wright et al. Plant Dis. 92:171, 2008.",2011-02-01 +22182607,Culture independent survey of the microbiota of the glassy-winged sharpshooter (Homalodisca vitripennis) using 454 pyrosequencing.,"The glassy-winged sharpshooter, Homalodisca vitripennis (Germar), is an invasive pest that has spread across the southern and western United States. H. vitripennis is highly polyphagous and voracious, feeding on at least 100 plant species and consuming up to 100 times its weight in xylem fluid daily. The insect is a vector of the phytopathogen Xylella fastidiosa (Wells), which is the causative agent of Pierce's disease in grapevines. To evaluate the microbial flora associated with H. vitripennis, total DNA extracts from hemolymph, alimentary canal excretions, and whole insect bodies were subjected to 16S rDNA pyrosequencing using the bTEFAP methodology and the resulting sequences (370-520 bp in length) were compared with a curated high quality 16S database derived from GenBank http://www.ncbi.nlm.nih.gov. Species from the genera Wolbachia, Delftia (formerly Pseudomonas), Pectobacterium, Moraxella, Serratia, Bacillus, and many others were detected and a comprehensive picture of the microbiome associated with H. vitripennis was established. Some of the bacteria identified in this report are initial discoveries; providing a breadth of knowledge to the microbial flora of this insect pest can serve as a reservoir of information for developing biological control strategies.",2011-02-01 +21366659,Reference Value Advisor: a new freeware set of macroinstructions to calculate reference intervals with Microsoft Excel.,"International recommendations for determination of reference intervals have been recently updated, especially for small reference sample groups, and use of the robust method and Box-Cox transformation is now recommended. Unfortunately, these methods are not included in most software programs used for data analysis by clinical laboratories. We have created a set of macroinstructions, named Reference Value Advisor, for use in Microsoft Excel to calculate reference limits applying different methods. For any series of data, Reference Value Advisor calculates reference limits (with 90% confidence intervals [CI]) using a nonparametric method when n≥40 and by parametric and robust methods from native and Box-Cox transformed values; tests normality of distributions using the Anderson-Darling test and outliers using Tukey and Dixon-Reed tests; displays the distribution of values in dot plots and histograms and constructs Q-Q plots for visual inspection of normality; and provides minimal guidelines in the form of comments based on international recommendations. The critical steps in determination of reference intervals are correct selection of as many reference individuals as possible and analysis of specimens in controlled preanalytical and analytical conditions. Computing tools cannot compensate for flaws in selection and size of the reference sample group and handling and analysis of samples. However, if those steps are performed properly, Reference Value Advisor, available as freeware at http://www.biostat.envt.fr/spip/spip.php?article63, permits rapid assessment and comparison of results calculated using different methods, including currently unavailable methods. This allows for selection of the most appropriate method, especially as the program provides the CI of limits. It should be useful in veterinary clinical pathology when only small reference sample groups are available.",2011-02-07 +22028466,ANAT: a tool for constructing and analyzing functional protein networks.,"Genome-scale screening studies are gradually accumulating a wealth of data on the putative involvement of hundreds of genes in various cellular responses or functions. A fundamental challenge is to chart the molecular pathways that underlie these systems. ANAT is an interactive software tool, implemented as a Cytoscape plug-in, for elucidating functional networks of proteins. It encompasses a number of network inference algorithms and provides access to networks of physical associations in several organisms. In contrast to existing software tools, ANAT can be used to infer subnetworks that connect hundreds of proteins to each other or to a given set of ""anchor"" proteins, a fundamental step in reconstructing cellular subnetworks. The interactive component of ANAT provides an array of tools for evaluating and exploring the resulting subnetwork models and for iteratively refining them. We demonstrate the utility of ANAT by studying the crosstalk between the autophagic and apoptotic cell death modules in humans, using a network of physical interactions. Relative to published software tools, ANAT is more accurate and provides more features for comprehensive network analysis. The latest version of the software is available at http://www.cs.tau.ac.il/~bnet/ANAT_SI.",2011-10-25 +21682852,Querying large read collections in main memory: a versatile data structure.,"

Background

High Throughput Sequencing (HTS) is now heavily exploited for genome (re-) sequencing, metagenomics, epigenomics, and transcriptomics and requires different, but computer intensive bioinformatic analyses. When a reference genome is available, mapping reads on it is the first step of this analysis. Read mapping programs owe their efficiency to the use of involved genome indexing data structures, like the Burrows-Wheeler transform. Recent solutions index both the genome, and the k-mers of the reads using hash-tables to further increase efficiency and accuracy. In various contexts (e.g. assembly or transcriptome analysis), read processing requires to determine the sub-collection of reads that are related to a given sequence, which is done by searching for some k-mers in the reads. Currently, many developments have focused on genome indexing structures for read mapping, but the question of read indexing remains broadly unexplored. However, the increase in sequence throughput urges for new algorithmic solutions to query large read collections efficiently.

Results

Here, we present a solution, named Gk arrays, to index large collections of reads, an algorithm to build the structure, and procedures to query it. Once constructed, the index structure is kept in main memory and is repeatedly accessed to answer queries like ""given a k-mer, get the reads containing this k-mer (once/at least once)"". We compared our structure to other solutions that adapt uncompressed indexing structures designed for long texts and show that it processes queries fast, while requiring much less memory. Our structure can thus handle larger read collections. We provide examples where such queries are adapted to different types of read analysis (SNP detection, assembly, RNA-Seq).

Conclusions

Gk arrays constitute a versatile data structure that enables fast and more accurate read analysis in various contexts. The Gk arrays provide a flexible brick to design innovative programs that mine efficiently genomics, epigenomics, metagenomics, or transcriptomics reads. The Gk arrays library is available under Cecill (GPL compliant) license from http://www.atgc-montpellier.fr/ngs/.",2011-06-17 +21551136,BINOCh: binding inference from nucleosome occupancy changes.,"

Unlabelled

Transcription factor binding events are frequently associated with a pattern of nucleosome occupancy changes in which nucleosomes flanking the binding site increase in occupancy, while those in the vicinity of the binding site itself are displaced. Genome-wide information on enhancer proximal nucleosome occupancy can be readily acquired using ChIP-seq targeting enhancer-related histone modifications such as H3K4me2. Here, we present a software package, BINOCh that allows biologists to use such data to infer the identity of key transcription factors that regulate the response of a cell to a stimulus or determine a program of differentiation.

Availability

The BINOCh open source Python package is freely available at http://liulab.dfci.harvard.edu/BINOCh under the FreeBSD license.",2011-05-05 +22848443,Prediction and analysis of the protein interactome in Pseudomonas aeruginosa to enable network-based drug target selection.,"Pseudomonas aeruginosa (PA) is a ubiquitous opportunistic pathogen that is capable of causing highly problematic, chronic infections in cystic fibrosis and chronic obstructive pulmonary disease patients. With the increased prevalence of multi-drug resistant PA, the conventional ""one gene, one drug, one disease"" paradigm is losing effectiveness. Network pharmacology, on the other hand, may hold the promise of discovering new drug targets to treat a variety of PA infections. However, given the urgent need for novel drug target discovery, a PA protein-protein interaction (PPI) network of high accuracy and coverage, has not yet been constructed. In this study, we predicted a genome-scale PPI network of PA by integrating various genomic features of PA proteins/genes by a machine learning-based approach. A total of 54,107 interactions covering 4,181 proteins in PA were predicted. A high-confidence network combining predicted high-confidence interactions, a reference set and verified interactions that consist of 3,343 proteins and 19,416 potential interactions was further assembled and analyzed. The predicted interactome network from this study is the first large-scale PPI network in PA with significant coverage and high accuracy. Subsequent analysis, including validations based on existing small-scale PPI data and the network structure comparison with other model organisms, shows the validity of the predicted PPI network. Potential drug targets were identified and prioritized based on their essentiality and topological importance in the high-confidence network. Host-pathogen protein interactions between human and PA were further extracted and analyzed. In addition, case studies were performed on protein interactions regarding anti-sigma factor MucA, negative periplasmic alginate regulator MucB, and the transcriptional regulator RhlR. A web server to access the predicted PPI dataset is available at http://research.cchmc.org/PPIdatabase/.",2012-07-24 +22522775,The frailty index in Europeans: association with age and mortality.,"

Background

the frailty index (FI) is an approach to the operationalisation of frailty based on accumulation of deficits. It has been less studied in Europeans.

Objective

to construct sex-specific FIs from a large sample of Europeans and study their associations with age and mortality.

Design

longitudinal population-based survey.

Setting

the Survey of Health, Ageing and Retirement in Europe (SHARE, http://share-dev.mpisoc.mpg.de/).

Subjects

a total of 16,217 females and 13,688 males aged ≥50 from wave 1 (2004-05). Mortality data were collected between 2005 and 2006 (mean follow-up: 2.4 years).

Methods

regression curve estimations between age and an FI constructed as per the standard procedure. Logistic regressions were used to assess the relative effects of age and the FI towards mortality.

Results

in both sexes, there was a significant non-linear association between age and the FI (females: quadratic R(2) = 0.20, P < 0.001; males: quadratic R(2) = 0.14, P < 0.001). Overall, the FI was a much stronger predictor of mortality than age, even after adjusting for the latter (females: age-adjusted OR 100.5, 95% confidence interval (CI): 46.3-218.2, P < 0.001; males: age-adjusted OR 221.1, 95% CI: 106.7-458.4, P < 0.001).

Conclusion

the FI had the expected properties in this large sample of Europeans.",2012-04-19 +21276248,ZFNGenome: a comprehensive resource for locating zinc finger nuclease target sites in model organisms.,"

Background

Zinc Finger Nucleases (ZFNs) have tremendous potential as tools to facilitate genomic modifications, such as precise gene knockouts or gene replacements by homologous recombination. ZFNs can be used to advance both basic research and clinical applications, including gene therapy. Recently, the ability to engineer ZFNs that target any desired genomic DNA sequence with high fidelity has improved significantly with the introduction of rapid, robust, and publicly available techniques for ZFN design such as the Oligomerized Pool ENgineering (OPEN) method. The motivation for this study is to make resources for genome modifications using OPEN-generated ZFNs more accessible to researchers by creating a user-friendly interface that identifies and provides quality scores for all potential ZFN target sites in the complete genomes of several model organisms.

Description

ZFNGenome is a GBrowse-based tool for identifying and visualizing potential target sites for OPEN-generated ZFNs. ZFNGenome currently includes a total of more than 11.6 million potential ZFN target sites, mapped within the fully sequenced genomes of seven model organisms; S. cerevisiae, C. reinhardtii, A. thaliana, D. melanogaster, D. rerio, C. elegans, and H. sapiens and can be visualized within the flexible GBrowse environment. Additional model organisms will be included in future updates. ZFNGenome provides information about each potential ZFN target site, including its chromosomal location and position relative to transcription initiation site(s). Users can query ZFNGenome using several different criteria (e.g., gene ID, transcript ID, target site sequence). Tracks in ZFNGenome also provide ""uniqueness"" and ZiFOpT (Zinc Finger OPEN Targeter) ""confidence"" scores that estimate the likelihood that a chosen ZFN target site will function in vivo. ZFNGenome is dynamically linked to ZiFDB, allowing users access to all available information about zinc finger reagents, such as the effectiveness of a given ZFN in creating double-stranded breaks.

Conclusions

ZFNGenome provides a user-friendly interface that allows researchers to access resources and information regarding genomic target sites for engineered ZFNs in seven model organisms. This genome-wide database of potential ZFN target sites should greatly facilitate the utilization of ZFNs in both basic and clinical research.ZFNGenome is freely available at: http://bindr.gdcb.iastate.edu/ZFNGenome or at the Zinc Finger Consortium website: http://www.zincfingers.org/.",2011-01-28 +21975766,Oral lactoferrin for the treatment of sepsis and necrotizing enterocolitis in neonates.,"

Background

Neonatal sepsis and necrotizing enterocolitis (NEC) cause significant neonatal mortality and morbidity in spite of appropriate antibiotic therapy. Enhancing host defence and modulating inflammation by using lactoferrin as an adjunct to antibiotics in the treatment of sepsis and/or NEC may improve clinical outcomes.

Objectives

The primary objective is to assess safety and efficacy of oral lactoferrin as an adjunct to antibiotics in the treatment of neonates with suspected or confirmed sepsis and/or NEC.

Search strategy

Relevant trials in any language were searched in July 2011 in the Cochrane Central Register of Controlled Trials (CENTRAL, The Cochrane Library), MEDLINE, PREMEDLINE, EMBASE, CINAHL, web sites: www.clinicaltrials.gov and www.controlled-trials.com, abstracts from the annual meeting of Pediatric Academic Societies (1990 to July 2011), by contacting authors who have published in this field, from the reference lists of identified clinical trials and in the reviewer's personal files.

Selection criteria

Randomized or quasi-randomized controlled trials evaluating oral lactoferrin (at any dose or duration) used as an adjunct to antibiotic therapy compared with antibiotic therapy alone (with or without placebo) or other adjuncts to antibiotic therapy to treat neonates at any gestational age up to 44 weeks postmenstrual age with confirmed or suspected sepsis or necrotizing enterocolitis (Bell's Stage II or III).

Data collection and analysis

We used the standardized methods of the Cochrane Neonatal Review Group (CNRG) for conducting a systematic review and for assessing the methodological quality of the studies (http://neonatal.cochrane.org/en/index.html). The titles and the abstracts of studies identified by the search strategy were independently assessed by the two review authors and full text version was obtained for assessment if necessary. Forms were designed for trial inclusion/exclusion and data extraction.

Main results

We did not identify any eligible neonatal trial evaluating lactoferrin for treatment of neonatal sepsis or NEC.

Authors' conclusions

Implications for practice

Currently there is no evidence to recommend or refute the use of lactoferrin for the treatment of neonatal sepsis or necrotizing enterocolitis as an adjunct to antibiotic therapy.

Implications for research

The safety and efficacy of different preparations and doses of lactoferrin need to be established in neonates. Well designed adequately powered randomized multicenter trials are needed to address the efficacy and safety of lactoferrin in the treatment of neonatal sepsis and necrotizing enterocolitis. These trials should evaluate long-term neurodevelopmental and pulmonary outcomes in addition to short-term outcomes.",2011-10-05 +23339658,Reassessment of the Listeria monocytogenes pan-genome reveals dynamic integration hotspots and mobile genetic elements as major components of the accessory genome.,"

Background

Listeria monocytogenes is an important food-borne pathogen and model organism for host-pathogen interaction, thus representing an invaluable target considering research on the forces governing the evolution of such microbes. The diversity of this species has not been exhaustively explored yet, as previous efforts have focused on analyses of serotypes primarily implicated in human listeriosis. We conducted complete genome sequencing of 11 strains employing 454 GS FLX technology, thereby achieving full coverage of all serotypes including the first complete strains of serotypes 1/2b, 3c, 3b, 4c, 4d, and 4e. These were comparatively analyzed in conjunction with publicly available data and assessed for pathogenicity in the Galleria mellonella insect model.

Results

The species pan-genome of L. monocytogenes is highly stable but open, suggesting an ability to adapt to new niches by generating or including new genetic information. The majority of gene-scale differences represented by the accessory genome resulted from nine hyper variable hotspots, a similar number of different prophages, three transposons (Tn916, Tn554, IS3-like), and two mobilizable islands. Only a subset of strains showed CRISPR/Cas bacteriophage resistance systems of different subtypes, suggesting a supplementary function in maintenance of chromosomal stability. Multiple phylogenetic branches of the genus Listeria imply long common histories of strains of each lineage as revealed by a SNP-based core genome tree highlighting the impact of small mutations for the evolution of species L. monocytogenes. Frequent loss or truncation of genes described to be vital for virulence or pathogenicity was confirmed as a recurring pattern, especially for strains belonging to lineages III and II. New candidate genes implicated in virulence function were predicted based on functional domains and phylogenetic distribution. A comparative analysis of small regulatory RNA candidates supports observations of a differential distribution of trans-encoded RNA, hinting at a diverse range of adaptations and regulatory impact.

Conclusions

This study determined commonly occurring hyper variable hotspots and mobile elements as primary effectors of quantitative gene-scale evolution of species L. monocytogenes, while gene decay and SNPs seem to represent major factors influencing long-term evolution. The discovery of common and disparately distributed genes considering lineages, serogroups, serotypes and strains of species L. monocytogenes will assist in diagnostic, phylogenetic and functional research, supported by the comparative genomic GECO-LisDB analysis server (http://bioinfo.mikrobio.med.uni-giessen.de/geco2lisdb).",2013-01-22 +22350305,Periauricular contouring suspension in secondary face-lift.,"BACKGROUND: The management of revision face-lift surgery is always challenging and controversial. We introduce the periauricular contouring suspension which combines a concentric SMAS suspension with a linear platysmal suspension for optimal facial rejuvenation. METHODS: Complete clinical examination was a prerequisite to any secondary face-lift. We included every patient who had undergone a secondary face-lift between 2007 and 2010 in this study. The periauricular contouring suspension technique was used. It is inspired by Stocchero's round-block SMAS treatment procedure and also features a linear platysmal suspension associated with liposuction. Data gathered for every patient included sex, age, delay between primary and secondary procedures, and follow-up. RESULTS: Sixty-two patients (50 women and 12 men) underwent the procedure and were included in this study. The average age was 66 years old. The average delay between primary and secondary surgery was 11 years. Average follow-up was 29 months. CONCLUSIONS: Periauricular contouring suspension is a reliable technique for a secondary face-lift. It is a minimally SMAS-invasive technique in which postoperative outcomes are excellent and has become our preferred technique for revision face-lifting. LEVEL OF EVIDENCE IV: This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors at http://www.springer.com/00266.",2012-02-21 +21269480,Searching the protein structure database for ligand-binding site similarities using CPASS v.2.,"

Background

A recent analysis of protein sequences deposited in the NCBI RefSeq database indicates that ~8.5 million protein sequences are encoded in prokaryotic and eukaryotic genomes, where ~30% are explicitly annotated as ""hypothetical"" or ""uncharacterized"" protein. Our Comparison of Protein Active-Site Structures (CPASS v.2) database and software compares the sequence and structural characteristics of experimentally determined ligand binding sites to infer a functional relationship in the absence of global sequence or structure similarity. CPASS is an important component of our Functional Annotation Screening Technology by NMR (FAST-NMR) protocol and has been successfully applied to aid the annotation of a number of proteins of unknown function.

Findings

We report a major upgrade to our CPASS software and database that significantly improves its broad utility. CPASS v.2 is designed with a layered architecture to increase flexibility and portability that also enables job distribution over the Open Science Grid (OSG) to increase speed. Similarly, the CPASS interface was enhanced to provide more user flexibility in submitting a CPASS query. CPASS v.2 now allows for both automatic and manual definition of ligand-binding sites and permits pair-wise, one versus all, one versus list, or list versus list comparisons. Solvent accessible surface area, ligand root-mean square difference, and Cβ distances have been incorporated into the CPASS similarity function to improve the quality of the results. The CPASS database has also been updated.

Conclusions

CPASS v.2 is more than an order of magnitude faster than the original implementation, and allows for multiple simultaneous job submissions. Similarly, the CPASS database of ligand-defined binding sites has increased in size by ~ 38%, dramatically increasing the likelihood of a positive search result. The modification to the CPASS similarity function is effective in reducing CPASS similarity scores for false positives by ~30%, while leaving true positives unaffected. Importantly, receiver operating characteristics (ROC) curves demonstrate the high correlation between CPASS similarity scores and an accurate functional assignment. As indicated by distribution curves, scores ≥ 30% infer a functional similarity. Software URL: http://cpass.unl.edu.",2011-01-26 +22353813,A web-based prognostic tool for extremity and trunk wall soft tissue sarcomas and its external validation.,"

Background

We developed a web-based, prognostic tool for extremity and trunk wall soft tissue sarcoma to predict 10-year sarcoma-specific survival. External validation was performed.

Methods

Patients referred during 1987-2002 to Helsinki University Central Hospital are included. External validation was obtained from the Lund University Hospital register. Cox proportional hazards models were fitted with the Helsinki data. The previously described model (SIN) includes size, necrosis, and vascular invasion. The extended model (SAM) includes the SIN factors and in addition depth, location, grade, and size on a continuous scale. Models were statistically compared according to accuracy (area under the ROC curve=AUC) of 10-year sarcoma-specific survival prediction.

Results

The AUC of the SAM model in 10-year survival prediction in the Helsinki patient series was 0.81 as compared with 0.74 for the SIN model (P=0.0007). The corresponding AUCs in the external validation series were 0.77 for the SAM model and 0.73 for the SIN model (P=0.03). A web-based calculator for the SAM model is available at http://www.prognomics.org/sam.

Conclusion

Addition of grade, depth, and location as well as tumour size on a continuous scale significantly improved the accuracy of the prognostic model when compared with a model that includes only size, necrosis, and vascular invasion.",2012-02-21 +22196229,Investigating the relationship between in vitro-in vivo genotoxicity: derivation of mechanistic QSAR models for in vivo liver genotoxicity and in vivo bone marrow micronucleus formation which encompass metabolism.,"Strategic testing as part of an integrated testing strategy (ITS) to maximize information and avoid the use of animals where possible is fast becoming the norm with the advent of new legislation such as REACH. Genotoxicity is an area where regulatory testing is clearly defined as part of ITS schemes. Under REACH, the specific information requirements depend on the tonnage manufactured or imported. Two types of test systems exist to meet these information requirements, in vivo genotoxicity assays, which take into account the whole animal, and in vitro assays, which are conducted outside the living mammalian organism using microbial or mammalian cells under appropriate culturing conditions. Clearly, with these different broad experimental categories, results for a given chemical can often differ, which presents challenges in the interpretation as well as in attempting to model the results in silico. This study attempted to compare the differences between in vitro and in vivo genotoxicity results, to rationalize these differences with plausible hypothesis in concert with available data. Two proof of concept (Q)SAR models were developed, one for in vivo genotoxicity effects in liver and a second for in vivo micronucleus formation in bone marrow. These ""mechanistic models"" will be of practical value in testing strategies, and both have been implemented into the TIMES software platform ( http://oasis-lmc.org ) to help predict the genotoxicity outcome of new untested chemicals.",2012-01-24 +21666844,A standard set of American-English voiced stop-consonant stimuli from morphed natural speech.,"Linear predictive coding (LPC) analysis was used to create morphed natural tokens of English voiced stop consonants ranging from /b/ to /d/ and /d/ to /g/ in four vowel contexts (/i/, /æ/, /a/, /u/). Both vowel-consonant-vowel (VCV) and consonant-vowel (CV) stimuli were created. A total of 320 natural-sounding acoustic speech stimuli were created, comprising 16 stimulus series. A behavioral experiment demonstrated that the stimuli varied perceptually from /b/ to /d/ to /g/, and provided useful reference data for the ambiguity of each token. Acoustic analyses indicated that the stimuli compared favorably to standard characteristics of naturally-produced consonants, and that the LPC morphing procedure successfully modulated multiple acoustic parameters associated with place of articulation. The entire set of stimuli is freely available on the Internet (http://www.psy.cmu.edu/~lholt/php/StephensHoltStimuli.php) for use in research applications.",2011-07-01 +23047839,Brain dysfunction primarily related to previous overt hepatic encephalopathy compared with minimal hepatic encephalopathy: resting-state functional MR imaging demonstration.,"

Purpose

To investigate whether resting-state brain functional connectivity (FC) differed among cirrhotic patients without overt hepatic encephalopathy (HE) (OHE), those who currently had minimal HE (MHE), or those who had recovered from previous OHE and to investigate whether previous bouts of OHE rather than current MHE predominantly contributed to brain dysfunction in patients without current OHE.

Materials and methods

This study was approved by the institutional ethics committee, and informed consent was obtained. Resting-state functional magnetic resonance (MR) data were compared between healthy controls and the following groups of cirrhotic patients: (a) patients without MHE and without previous OHE, (b) patients with current MHE and without previous OHE, and (c) patients with previous OHE. Independent component analysis was applied to identify the best-fit component for the default-mode network (DMN). One-way analysis of variance was performed to detect different FC among groups. Pearson correlation analyses were conducted to determine the relationships between FC and neurocognitive performance.

Results

Two important regions within the DMN, including the precuneus and posterior cingulate cortex and left medial frontal gyrus, showed significantly different FC among the four groups. A trend of gradually reduced FC in two regions was observed from controls, to patients without HE, and to patients with current MHE, while patients with previous OHE showed remarkably reduced FC in these two regions. Significant correlations were found between FC and neurocognitive performance in cirrhotic patients.

Conclusion

The reduced resting-state FC within DMN was associated with neurocognitive impairments in MHE and after clinical resolution of OHE. Previous OHE rather than current MHE might be primarily related to brain dysfunction in patients with latent OHE.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120026/-/DC1.",2012-10-09 +22689756,Joint stage recognition and anatomical annotation of Drosophila gene expression patterns.,"

Motivation

Staining the mRNA of a gene via in situ hybridization (ISH) during the development of a Drosophila melanogaster embryo delivers the detailed spatio-temporal patterns of the gene expression. Many related biological problems such as the detection of co-expressed genes, co-regulated genes and transcription factor binding motifs rely heavily on the analysis of these image patterns. To provide the text-based pattern searching for facilitating related biological studies, the images in the Berkeley Drosophila Genome Project (BDGP) study are annotated with developmental stage term and anatomical ontology terms manually by domain experts. Due to the rapid increase in the number of such images and the inevitable bias annotations by human curators, it is necessary to develop an automatic method to recognize the developmental stage and annotate anatomical terms.

Results

In this article, we propose a novel computational model for jointly stage classification and anatomical terms annotation of Drosophila gene expression patterns. We propose a novel Tri-Relational Graph (TG) model that comprises the data graph, anatomical term graph, developmental stage term graph, and connect them by two additional graphs induced from stage or annotation label assignments. Upon the TG model, we introduce a Preferential Random Walk (PRW) method to jointly recognize developmental stage and annotate anatomical terms by utilizing the interrelations between two tasks. The experimental results on two refined BDGP datasets demonstrate that our joint learning method can achieve superior prediction results on both tasks than the state-of-the-art methods.

Availability

http://ranger.uta.edu/%7eheng/Drosophila/.",2012-06-01 +23043671,Evaluating health worker performance in Benin using the simulated client method with real children.,"

Background

The simulated client (SC) method for evaluating health worker performance utilizes surveyors who pose as patients to make surreptitious observations during consultations. Compared to conspicuous observation (CO) by surveyors, which is commonly done in developing countries, SC data better reflect usual health worker practices. This information is important because CO can cause performance to be better than usual. Despite this advantage of SCs, the method's full potential has not been realized for evaluating performance for pediatric illnesses because real children have not been utilized as SCs. Previous SC studies used scenarios of ill children that were not actually brought to health workers. During a trial that evaluated a quality improvement intervention in Benin (the Integrated Management of Childhood Illness [IMCI] strategy), we conducted an SC survey with adult caretakers as surveyors and real children to evaluate the feasibility of this approach and used the results to assess the validity of CO.

Methods

We conducted an SC survey and a CO survey (one right after the other) of health workers in the same 55 health facilities. A detailed description of the SC survey process was produced. Results of the two surveys were compared for 27 performance indicators using logistic regression modeling.

Results

SC and CO surveyors observed 54 and 185 consultations, respectively. No serious problems occurred during the SC survey. Performance levels measured by CO were moderately higher than those measured by SCs (median CO - SC difference = 16.4 percentage-points). Survey differences were sometimes much greater for IMCI-trained health workers (median difference = 29.7 percentage-points) than for workers without IMCI training (median difference = 3.1 percentage-points).

Conclusion

SC surveys can be done safely with real children if appropriate precautions are taken. CO can introduce moderately large positive biases, and these biases might be greater for health workers exposed to quality improvement interventions.

Trial number

http://clinicaltrials.gov Identifier NCT00510679.",2012-10-08 +21419689,Evaluation framework for carotid bifurcation lumen segmentation and stenosis grading.,"This paper describes an evaluation framework that allows a standardized and objective quantitative comparison of carotid artery lumen segmentation and stenosis grading algorithms. We describe the data repository comprising 56 multi-center, multi-vendor CTA datasets, their acquisition, the creation of the reference standard and the evaluation measures. This framework has been introduced at the MICCAI 2009 workshop 3D Segmentation in the Clinic: A Grand Challenge III, and we compare the results of eight teams that participated. These results show that automated segmentation of the vessel lumen is possible with a precision that is comparable to manual annotation. The framework is open for new submissions through the website http://cls2009.bigr.nl.",2011-02-17 +22658349,Individual risk prediction model for incident cardiovascular disease: a Bayesian clinical reasoning approach.,"

Background

A Bayesian clinical reasoning model was developed to predict an individual risk for cardiovascular disease (CVD) for desk-top reference.

Methods

Three Bayesian models were constructed to estimate the CVD risk by sequentially incorporating demographic features (basic), six metabolic syndrome components (metabolic score) and conventional risk factors (enhanced model). By considering clinical weights (regression coefficients) of each model as normal distribution, individual risk can be predicted making allowance for uncertainty of clinical weights. A community-based cohort that enrolled 64,489 participants free of CVD at baseline and followed up over five years to ascertain newly diagnosed CVD cases during the period through 2000 to 2004 was used for the illustration of the three proposed models (full empirical data are available from website http://homepage.ntu.edu.tw/~chenlin/CVD_prediction_data.rar).

Results

The proposed models can be applied to predicting the CVD risk with any combination of risk factors. For a 47-year-old man, the five-year risk for CVD with the basic model was 11.2% (95% CI: 7.8%-15.6%). His metabolic syndrome score, leading to 1.488 of likelihood ratio, enhanced the risk for CVD up to 15.8% (95% CI: 11.0%-21.5%) and put him in highest deciles. As with the habit of smoking over 2 packs per-day and family history of CVD, yielding the likelihood ratios of 1.62 and 1.47, respectively, the risk was further raised to 30.9% (95% CI: 20.7%-39.8%).

Conclusions

We demonstrate how to make individual risk prediction for CVD by incorporating routine information with a sequential Bayesian clinical reasoning approach.",2012-06-02 +21423723,Seaweed metabolite database (SWMD): A database of natural compounds from marine algae.,"

Unlabelled

The cataloguing of marine chemicals is a fundamental aspect for bioprospecting. This has applications in the development of drugs from marine sources. A publicly accessible database that provides comprehensive information about these compounds is therefore helpful. The Seaweed Metabolite Database (SWMD) is designed to provide information about the known compounds and their biological activity described in the literature. Geographical origin of the seaweed, extraction method and the chemical descriptors of each the compounds are recorded to enable effective chemo-informatics analysis. Crosslinks to other databases are also introduced to facilitate the access of information about 3D Structure by X-ray and NMR activity, drug properties and related literature for each compound. This database currently contains entries for 517 compounds encompassing 25 descriptive fields mostly from the Red algae of the genus Laurencia (Ceramiales, Rhodomelaceae). The customized search engine of this database will enable wildcard querying, which includes Accession Number, Compound type, Seaweed Binomial name, IUPAC name, SMILES notation or InChI.

Availability

The database is available for free at http://www.swmd.co.in.",2011-01-22 +21258063,Automated validation of genetic variants from large databases: ensuring that variant references refer to the same genomic locations.,"

Summary

Accurate annotations of genomic variants are necessary to achieve full-genome clinical interpretations that are scientifically sound and medically relevant. Many disease associations, especially those reported before the completion of the HGP, are limited in applicability because of potential inconsistencies with our current standards for genomic coordinates, nomenclature and gene structure. In an effort to validate and link variants from the medical genetics literature to an unambiguous reference for each variant, we developed a software pipeline and reviewed 68 641 single amino acid mutations from Online Mendelian Inheritance in Man (OMIM), Human Gene Mutation Database (HGMD) and dbSNP. The frequency of unresolved mutation annotations varied widely among the databases, ranging from 4 to 23%. A taxonomy of primary causes for unresolved mutations was produced.

Availability

This program is freely available from the web site (http://safegene.hms.harvard.edu/aa2nt/).",2011-01-22 +21257609,OrthoNets: simultaneous visual analysis of orthologs and their interaction neighborhoods across different organisms.,"

Motivation

Protein interaction networks contain a wealth of biological information, but their large size often hinders cross-organism comparisons. We present OrthoNets, a Cytoscape plugin that displays protein-protein interaction (PPI) networks from two organisms simultaneously, highlighting orthology relationships and aggregating several types of biomedical annotations. OrthoNets also allows PPI networks derived from experiments to be overlaid on networks extracted from public databases, supporting the identification and verification of new interactors. Any newly identified PPIs can be validated by checking whether their orthologs interact in another organism.

Availability

OrthoNets is freely available at http://wodaklab.org/orthonets/.",2011-01-20 +21398673,PRIMe: a method for characterization and evaluation of pleiotropic regions from multiple genome-wide association studies.,"

Motivation

The concept of pleiotropy was proposed a century ago, though up to now there have been insufficient efforts to design robust statistics and software aimed at visualizing and evaluating pleiotropy at a regional level. The Pleiotropic Region Identification Method (PRIMe) was developed to evaluate potentially pleiotropic loci based upon data from multiple genome-wide association studies (GWAS).

Methods

We first provide a software tool to systematically identify and characterize genomic regions where low association P-values are observed with multiple traits. We use the term Pleiotropy Index to denote the number of traits with low association P-values at a particular genomic region. For GWAS assumed to be uncorrelated, we adopted the binomial distribution to approximate the statistical significance of the Pleiotropy Index. For GWAS conducted on traits with known correlation coefficients, simulations are performed to derive the statistical distribution of the Pleiotropy Index under the null hypothesis of no genotype-phenotype association. For six hematologic and three blood pressure traits where full GWAS results were available from the Cohorts for Heart and Aging Research in Genomic Epidemiology (CHARGE) Consortium, we estimated the trait correlations and applied the simulation approach to examine genomic regions with statistical evidence of pleiotropy. We then applied the approximation approach to explore GWAS summarized in the National Human Genome Research Institute (NHGRI) GWAS Catalog.

Results

By simulation, we identified pleiotropic regions including SH2B3 and BRAP (12q24.12) for hematologic and blood pressure traits. By approximation, we confirmed the genome-wide significant pleiotropy of these two regions based on the GWAS Catalog data, together with an exploration on other regions which highlights the FTO, GCKR and ABO regions.

Availability and implementation

The Perl and R scripts are available at http://www.framinghamheartstudy.org/research/gwas_pleiotropictool.html.",2011-03-12 +21700676,ODORactor: a web server for deciphering olfactory coding.,"

Summary

ODORactor is an open access web server aimed at providing a platform for identifying odorant receptors (ORs) for small molecules and for browsing existing OR-ligand pairs. It enables the prediction of ORs from the molecular structures of arbitrary chemicals by integrating two individual functionalities: odorant verification and OR recognition. The prediction of the ORs for several odorants was experimentally validated in the study. In addition, ODORactor features a comprehensive repertoire of olfactory information that has been manually curated from literature. Therefore, ODORactor may provide an effective way to decipher olfactory coding and could be a useful server tool for both basic olfaction research in academia and for odorant discovery in industry.

Availability

Freely available at http://mdl.shsmu.edu.cn/ODORactor

Contact

jian.zhang@sjtu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-23 +21700671,eResponseNet: a package prioritizing candidate disease genes through cellular pathways.,"

Motivation

Although genome-wide association studies (GWAS) have found many common genetic variants associated with human diseases, it remains a challenge to elucidate the functional links between associated variants and complex traits.

Results

We developed a package called eResponseNet by implementing and extending the existing ResponseNet algorithm for prioritizing candidate disease genes through cellular pathways. Using type II diabetes (T2D) as a study case, we demonstrate that eResponseNet outperforms currently available approaches in prioritizing candidate disease genes. More importantly, the package is instrumental in revealing cellular pathways underlying disease-associated genetic variations.

Availability

The eResponseNet package is freely downloadable at http://hanlab.genetics.ac.cn/eResponseNet.

Contact

jdhan@picb.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-23 +21310640,BINANA: a novel algorithm for ligand-binding characterization.,"Computational chemists and structural biologists are often interested in characterizing ligand-receptor complexes for hydrogen-bond, hydrophobic, salt-bridge, van der Waals, and other interactions in order to assess ligand binding. When done by hand, this characterization can become tedious, especially when many complexes need be analyzed. In order to facilitate the characterization of ligand binding, we here present a novel Python-implemented computer algorithm called BINANA (BINding ANAlyzer), which is freely available for download at http://www.nbcr.net/binana/. To demonstrate the utility of the new algorithm, we use BINANA to confirm that the number of hydrophobic contacts between a ligand and its protein receptor is positively correlated with ligand potency. Additionally, we show how BINANA can be used to search through a large ligand-receptor database to identify those complexes that are remarkable for selected binding features, and to identify lead candidates from a virtual screen with specific, desirable binding characteristics. We are hopeful that BINANA will be useful to computational chemists and structural biologists who wish to automatically characterize many ligand-receptor complexes for key binding characteristics.",2011-01-19 +21255607,"Aromatic-Aromatic Interactions Database, A(2)ID: an analysis of aromatic π-networks in proteins.","The geometrical arrangement of the aromatic rings of phenylalanine, tyrosine, tryptophan and histidine has been analyzed at a database level using the X-ray crystal structure of proteins from PDB in order to find out the aromatic-aromatic (π-π) networks in proteins and to understand how these aromatic rings are connected with each-other in a specific π-π network. A stringent examination of the 7848 proteins indicates that close to 89% of the proteins have occurrence of at least a network of 2π or a higher π-π network. The occurrence of π-π networks in various protein superfamilies based on SCOP, CATH and EC classifiers has also been probed in the present work. In general, we find that multidomain and membrane proteins as well as lyases show a more number of these networks. Analysis of the distribution of angle between planes of two proximal aromatic rings (ϕ) distribution indicates that at a larger cutoff distance (between centroid of two aromatic rings), above 5Å, C-H⋯π interactions (T-shaped orientation) are more prevalent, while π-π interactions (stacked orientation) are more prevalent at a smaller cutoff distance. The connectivity patterns of π-π networks propose strong propensity of finding arrangement of aromatic residues as clusters rather than linear arrangement. We have also made a public domain database ""Aromatic-Aromatic Interactions Database"" (A(2)ID) comprising of all types of π-π networks and their connectivity pattern present in proteins. It can be accessed by url http://203.199.182.73/gnsmmg/databases/aidb/aidb.html.",2011-01-19 +22355308,Independent EEG sources are dipolar.,"Independent component analysis (ICA) and blind source separation (BSS) methods are increasingly used to separate individual brain and non-brain source signals mixed by volume conduction in electroencephalographic (EEG) and other electrophysiological recordings. We compared results of decomposing thirteen 71-channel human scalp EEG datasets by 22 ICA and BSS algorithms, assessing the pairwise mutual information (PMI) in scalp channel pairs, the remaining PMI in component pairs, the overall mutual information reduction (MIR) effected by each decomposition, and decomposition 'dipolarity' defined as the number of component scalp maps matching the projection of a single equivalent dipole with less than a given residual variance. The least well-performing algorithm was principal component analysis (PCA); best performing were AMICA and other likelihood/mutual information based ICA methods. Though these and other commonly-used decomposition methods returned many similar components, across 18 ICA/BSS algorithms mean dipolarity varied linearly with both MIR and with PMI remaining between the resulting component time courses, a result compatible with an interpretation of many maximally independent EEG components as being volume-conducted projections of partially-synchronous local cortical field activity within single compact cortical domains. To encourage further method comparisons, the data and software used to prepare the results have been made available (http://sccn.ucsd.edu/wiki/BSSComparison).",2012-02-15 +21400228,Predicting sub-cellular localization of tRNA synthetases from their primary structures.,"Since endo-symbiotic events occur, all genes of mitochondrial aminoacyl tRNA synthetase (AARS) were lost or transferred from ancestral mitochondrial genome into the nucleus. The canonical pattern is that both cytosolic and mitochondrial AARSs coexist in the nuclear genome. In the present scenario all mitochondrial AARSs are nucleus-encoded, synthesized on cytosolic ribosomes and post-translationally imported from the cytosol into the mitochondria in eukaryotic cell. The site-based discrimination between similar types of enzymes is very challenging because they have almost same physico-chemical properties. It is very important to predict the sub-cellular location of AARSs, to understand the mitochondrial protein synthesis. We have analyzed and optimized the distinguishable patterns between cytosolic and mitochondrial AARSs. Firstly, support vector machines (SVM)-based modules have been developed using amino acid and dipeptide compositions and achieved Mathews correlation coefficient (MCC) of 0.82 and 0.73, respectively. Secondly, we have developed SVM modules using position-specific scoring matrix and achieved the maximum MCC of 0.78. Thirdly, we developed SVM modules using N-terminal, intermediate residues, C-terminal and split amino acid composition (SAAC) and achieved MCC of 0.82, 0.70, 0.39 and 0.86, respectively. Finally, a SVM module was developed using selected attributes of split amino acid composition (SA-SAAC) approach and achieved MCC of 0.92 with an accuracy of 96.00%. All modules were trained and tested on a non-redundant data set and evaluated using fivefold cross-validation technique. On the independent data sets, SA-SAAC based prediction model achieved MCC of 0.95 with an accuracy of 97.77%. The web-server 'MARSpred' based on above study is available at http://www.imtech.res.in/raghava/marspred/.",2011-03-13 +22088842,Dragon PolyA Spotter: predictor of poly(A) motifs within human genomic DNA sequences.,"

Motivation

Recognition of poly(A) signals in mRNA is relatively straightforward due to the presence of easily recognizable polyadenylic acid tail. However, the task of identifying poly(A) motifs in the primary genomic DNA sequence that correspond to poly(A) signals in mRNA is a far more challenging problem. Recognition of poly(A) signals is important for better gene annotation and understanding of the gene regulation mechanisms. In this work, we present one such poly(A) motif prediction method based on properties of human genomic DNA sequence surrounding a poly(A) motif. These properties include thermodynamic, physico-chemical and statistical characteristics. For predictions, we developed Artificial Neural Network and Random Forest models. These models are trained to recognize 12 most common poly(A) motifs in human DNA. Our predictors are available as a free web-based tool accessible at http://cbrc.kaust.edu.sa/dps. Compared with other reported predictors, our models achieve higher sensitivity and specificity and furthermore provide a consistent level of accuracy for 12 poly(A) motif variants.

Contact

vladimir.bajic@kaust.edu.sa

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-15 +21994225,Pico-inplace-inversions between human and chimpanzee.,"

Motivation

There have been several studies on the micro-inversions between human and chimpanzee, but there are large discrepancies among their results. Furthermore, all of them rely on alignment procedures or existing alignment results to identify inversions. However, the core alignment procedures do not take very small inversions into consideration. Therefore, their analyses cannot find inversions that are too small to be detected by a classic aligner. We call such inversions pico-inversions.

Results

We re-analyzed human-chimpanzee alignment from the UCSC Genome Browser for micro-inplace-inversions and screened for pico-inplace-inversions using a likelihood ratio test. We report that the quantity of inplace-inversions between human and chimpanzee is substantially greater than what had previously been discovered. We also present the software tool PicoInversionMiner to detect pico-inplace-inversions between closely related species.

Availability

Software tools, scripts and result data are available at http://faculty.cs.niu.edu/~hou/PicoInversion.html.

Contact

mhou@cs.niu.edu.",2011-10-12 +23392076,NIH State-of-the-Science Conference Statement: Role of active surveillance in the management of men with localized prostate cancer.,"

Objective

To provide healthcare providers, patients, and the general public with a responsible assessment of currently available data on the use of active surveillance and other observational management strategies for low-grade, localized prostate cancer.

Participants

A non-U.S. Department of Health and Human Services, nonadvocate 14-member panel representing the fields of cancer prevention and control, urology, pathology, epidemiology, genetics, transplantation, bioethics, economics, health services research, shared decisionmaking, health communication, and community engagement. In addition, 22 experts from pertinent fields presented data to the panel and conference audience.

Evidence

Presentations by experts and a systematic review of the literature prepared by the Tufts Evidence-based Practice Center, through the Agency for Healthcare Research and Quality (AHRQ). Scientific evidence was given precedence over anecdotal experience.

Conference process

The panel drafted its statement based on scientific evidence presented in open forum and on published scientific literature. The draft statement was presented on the final day of the conference and circulated to the audience for comment. The panel released a revised statement later that day at http://consensus.nih.gov. This statement is an independent report of the panel and is not a policy statement of the NIH or the Federal Government.

Conclusions

Prostate cancer screening with prostate-specific antigen (PSA) testing has identified many men with low-risk disease. Because of the very favorable prognosis of low-risk prostate cancer, strong consideration should be given to modifying the anxiety-provoking term ""cancer"" for this condition. Treatment of low-risk prostate cancer patients with radical prostatectomy or radiation therapy leads to side effects such as impotence and incontinence in a substantial number. Active surveillance has emerged as a viable option that should be offered to patients with low-risk prostate cancer. More than 100,000 men a year diagnosed with prostate cancer in the United States are candidates for this approach. However, there are many unanswered questions about active surveillance strategies and prostate cancer that require further research and clarification. These include: • Improvements in the accuracy and consistency of pathologic diagnosis of prostate cancer • Consensus on which men are the most appropriate candidates for active surveillance • The optimal protocol for active surveillance and the potential for individualizing the approach based on clinical and patient factors • Optimal ways to communicate the option of active surveillance to patients • Methods to assist patient decisionmaking • Reasons for acceptance or rejection of active surveillance as a treatment strategy • Short- and long-term outcomes of active surveillance. Well-designed studies to address these questions and others raised in this statement represent an important health research priority. Qualitative, observational, and interventional research designs are needed. Due to the paucity of evidence about this important public health problem, all patients being considered for active surveillance should be offered participation in multicenter research studies that incorporate community settings and partners.",2011-12-01 +21245051,Automated bond order assignment as an optimization problem.,"

Motivation

Numerous applications in Computational Biology process molecular structures and hence strongly rely not only on correct atomic coordinates but also on correct bond order information. For proteins and nucleic acids, bond orders can be easily deduced but this does not hold for other types of molecules like ligands. For ligands, bond order information is not always provided in molecular databases and thus a variety of approaches tackling this problem have been developed. In this work, we extend an ansatz proposed by Wang et al. that assigns connectivity-based penalty scores and tries to heuristically approximate its optimum. In this work, we present three efficient and exact solvers for the problem replacing the heuristic approximation scheme of the original approach: an A*, an ILP and an fixed-parameter approach (FPT) approach.

Results

We implemented and evaluated the original implementation, our A*, ILP and FPT formulation on the MMFF94 validation suite and the KEGG Drug database. We show the benefit of computing exact solutions of the penalty minimization problem and the additional gain when computing all optimal (or even suboptimal) solutions. We close with a detailed comparison of our methods.

Availability

The A* and ILP solution are integrated into the open-source C++ LGPL library BALL and the molecular visualization and modelling tool BALLView and can be downloaded from our homepage www.ball-project.org. The FPT implementation can be downloaded from http://bio.informatik.uni-jena.de/software/.",2011-01-17 +21505033,Rgtsp: a generalized top scoring pairs package for class prediction.,"

Summary

A top scoring pair (TSP) classifier consists of a pair of variables whose relative ordering can be used for accurately predicting the class label of a sample. This classification rule has the advantage of being easily interpretable and more robust against technical variations in data, as those due to different microarray platforms. Here we describe a parallel implementation of this classifier which significantly reduces the training time, and a number of extensions, including a multi-class approach, which has the potential of improving the classification performance.

Availability and implementation

Full C++ source code and R package Rgtsp are freely available from http://lausanne.isb-sib.ch/~vpopovic/research/. The implementation relies on existing OpenMP libraries.",2011-04-19 +22328084,Dual transcriptional activator and repressor roles of TBX20 regulate adult cardiac structure and function.,"The ongoing requirement in adult heart for transcription factors with key roles in cardiac development is not well understood. We recently demonstrated that TBX20, a transcriptional regulator required for cardiac development, has key roles in the maintenance of functional and structural phenotypes in adult mouse heart. Conditional ablation of Tbx20 in adult cardiomyocytes leads to a rapid onset and progression of heart failure, with prominent conduction and contractility phenotypes that lead to death. Here we describe a more comprehensive molecular characterization of the functions of TBX20 in adult mouse heart. Coupling genome-wide chromatin immunoprecipitation and transcriptome analyses (RNA-Seq), we identified a subset of genes that change expression in Tbx20 adult cardiomyocyte-specific knockout hearts which are direct downstream targets of TBX20. This analysis revealed a dual role for TBX20 as both a transcriptional activator and a repressor, and that each of these functions regulates genes with very specialized and distinct molecular roles. We also show how TBX20 binds to its targets genome-wide in a context-dependent manner, using various cohorts of co-factors to either promote or repress distinct genetic programs within adult heart. Our integrative approach has uncovered several novel aspects of TBX20 and T-box protein function within adult heart. Sequencing data accession number (http://www.ncbi.nlm.nih.gov/geo): GSE30943.",2012-02-10 +22321699,A novel significance score for gene selection and ranking.,"

Motivation

When identifying differentially expressed (DE) genes from high-throughput gene expression measurements, we would like to take both statistical significance (such as P-value) and biological relevance (such as fold change) into consideration. In gene set enrichment analysis (GSEA), a score that can combine fold change and P-value together is needed for better gene ranking.

Results

We defined a gene significance score π-value by combining expression fold change and statistical significance (P-value), and explored its statistical properties. When compared to various existing methods, π-value based approach is more robust in selecting DE genes, with the largest area under curve in its receiver operating characteristic curve. We applied π-value to GSEA and found it comparable to P-value and t-statistic based methods, with added protection against false discovery in certain situations. Finally, in a gene functional study of breast cancer profiles, we showed that using π-value helps elucidating otherwise overlooked important biological functions.

Availability

http://gccri.uthscsa.edu/Pi_Value_Supplementary.asp

Contact

xy@ieee.org, cheny8@uthscsa.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-09 +22312592,Alignment-free detection of horizontal gene transfer between closely related bacterial genomes.,"Bacterial epidemics are often caused by strains that have acquired their increased virulence through horizontal gene transfer. Due to this association with disease, the detection of horizontal gene transfer continues to receive attention from microbiologists and bioinformaticians alike. Most software for detecting transfer events is based on alignments of sets of genes or of entire genomes. But despite great advances in the design of algorithms and computer programs, genome alignment remains computationally challenging. We have therefore developed an alignment-free algorithm for rapidly detecting horizontal gene transfer between closely related bacterial genomes. Our implementation of this algorithm is called alfy for ""ALignment Free local homologY"" and is freely available from http://guanine.evolbio.mpg.de/alfy/. In this comment we demonstrate the application of alfy to the genomes of Staphylococcus aureus. We also argue that-contrary to popular belief and in spite of increasing computer speed-algorithmic optimization is becoming more, not less, important if genome data continues to accumulate at the present rate.",2011-09-01 +22247277,Mining and evaluation of molecular relationships in literature.,"

Motivation

Specific information on newly discovered proteins is often difficult to find in literature. Particularly if only sequences and no common names of proteins or genes are available, preceding sequence similarity searches can be crucial for the process of information collection. In drug research, it is important to know whether a small molecule targets only one specific protein or whether similar or homologous proteins are also influenced that may account for possible side effects.

Results

prolific (protein-literature investigation for interacting compounds) provides a one-step solution to investigate available information on given protein names, sequences, similar proteins or sequences on the gene level. Co-occurrences of UniProtKB/Swiss-Prot proteins and PubChem compounds in all PubMed abstracts are retrievable. Concise 'heat-maps' and tables display frequencies of co-occurrences. They provide links to processed literature with highlighted found protein and compound synonyms. Evaluation with manually curated drug-protein relationships showed that up to 69% could be discovered by automatic text-processing. Examples are presented to demonstrate the capabilities of prolific.

Availability

The web-application is available at http://prolific.pharmaceutical-bioinformatics.de and a web service at http://www.pharmaceutical-bioinformatics.de/prolific/soap/prolific.wsdl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-13 +22080466,Multifunctional proteins revealed by overlapping clustering in protein interaction network.,"

Motivation

Multifunctional proteins perform several functions. They are expected to interact specifically with distinct sets of partners, simultaneously or not, depending on the function performed. Current graph clustering methods usually allow a protein to belong to only one cluster, therefore impeding a realistic assignment of multifunctional proteins to clusters.

Results

Here, we present Overlapping Cluster Generator (OCG), a novel clustering method which decomposes a network into overlapping clusters and which is, therefore, capable of correct assignment of multifunctional proteins. The principle of OCG is to cover the graph with initial overlapping classes that are iteratively fused into a hierarchy according to an extension of Newman's modularity function. By applying OCG to a human protein-protein interaction network, we show that multifunctional proteins are revealed at the intersection of clusters and demonstrate that the method outperforms other existing methods on simulated graphs and PPI networks.

Availability

This software can be downloaded from http://tagc.univ-mrs.fr/welcome/spip.php?rubrique197

Contact

brun@tagc.univ-mrs.fr

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-10 +22492313,Approximate probabilistic analysis of biopathway dynamics.,"

Motivation

Biopathways are often modeled as systems of ordinary differential equations (ODEs). Such systems will usually have many unknown parameters and hence will be difficult to calibrate. Since the data available for calibration will have limited precision, an approximate representation of the ODEs dynamics should suffice. One must, however, be able to efficiently construct such approximations for large models and perform model calibration and subsequent analysis.

Results

We present a graphical processing unit (GPU) based scheme by which a system of ODEs is approximated as a dynamic Bayesian network (DBN). We then construct a model checking procedure for DBNs based on a simple probabilistic linear time temporal logic. The GPU implementation considerably extends the reach of our previous PC-cluster-based implementation (Liu et al., 2011b). Further, the key components of our algorithm can serve as the GPU kernel for other Monte Carlo simulations-based analysis of biopathway dynamics. Similarly, our model checking framework is a generic one and can be applied in other systems biology settings. We have tested our methods on three ODE models of bio-pathways: the epidermal growth factor-nerve growth factor pathway, the segmentation clock network and the MLC-phosphorylation pathway models. The GPU implementation shows significant gains in performance and scalability whereas the model checking framework turns out to be convenient and efficient for specifying and verifying interesting pathways properties.

Availability

The source code is freely available at http://www.comp.nus.edu.sg/~rpsysbio/pada-gpu/",2012-04-05 +21791533,VISTA Region Viewer (RViewer)--a computational system for prioritizing genomic intervals for biomedical studies.,"

Summary

Current genome browsers are designed for linear browsing of individual genomic regions, but the high-throughput nature of experiments aiming to elucidate the genetic component of human disease makes it very important to develop user-friendly tools for comparing several genomic regions in parallel and prioritizing them based on their functional content. We introduce VISTA Region Viewer (RViewer), an interactive online tool that allows for efficient screening and prioritization of regions of the human genome for follow-up studies. The tool takes as input genetic variation data from different biomedical studies, determines a number of various functional parameters for both coding and non-coding sequences in each region and allows for sorting and searching the results of the analysis in multiple ways.

Availability and implementation

The tool is implemented as a web application and is freely accessible on the Web at http://rviewer.lbl.gov

Contact

rviewer@lbl.gov; ildubchak@lbl.gov.",2011-07-26 +21227933,ARTADE2DB: improved statistical inferences for Arabidopsis gene functions and structure predictions by dynamic structure-based dynamic expression (DSDE) analyses.,"Recent advances in technologies for observing high-resolution genomic activities, such as whole-genome tiling arrays and high-throughput sequencers, provide detailed information for understanding genome functions. However, the functions of 50% of known Arabidopsis thaliana genes remain unknown or are annotated only on the basis of static analyses such as protein motifs or similarities. In this paper, we describe dynamic structure-based dynamic expression (DSDE) analysis, which sequentially predicts both structural and functional features of transcripts. We show that DSDE analysis inferred gene functions 12% more precisely than static structure-based dynamic expression (SSDE) analysis or conventional co-expression analysis based on previously determined gene structures of A. thaliana. This result suggests that more precise structural information than the fixed conventional annotated structures is crucial for co-expression analysis in systems biology of transcriptional regulation and dynamics. Our DSDE method, ARabidopsis Tiling-Array-based Detection of Exons version 2 and over-representation analysis (ARTADE2-ORA), precisely predicts each gene structure by combining two statistical analyses: a probe-wise co-expression analysis of multiple transcriptome measurements and a Markov model analysis of genome sequences. ARTADE2-ORA successfully identified the true functions of about 90% of functionally annotated genes, inferred the functions of 98% of functionally unknown genes and predicted 1,489 new gene structures and functions. We developed a database ARTADE2DB that integrates not only the information predicted by ARTADE2-ORA but also annotations and other functional information, such as phenotypes and literature citations, and is expected to contribute to the study of the functional genomics of A. thaliana. URL: http://artade.org.",2011-01-12 +21296267,Efficacy of Clinacanthus nutans extracts in patients with herpes infection: systematic review and meta-analysis of randomised clinical trials.,"

Objective

To examine the efficacy of Clinacanthus nutans extracts in treatment of Herpes genitalis and Herpes zoster from randomised clinical trials (RCTs).

Methods

Bibliographic databases, including MEDLINE, EMBASE, CINAHL, Cochrane CENTRAL, AMED, WHO trial registry, http://www.clinicaltrial.gov, Thai Index Medicus, and Index Medicus Siriraj library, were searched from their inception dates to February 2010 without language restrictions. Methodological quality of included trials was assessed using Jadad's quality scale and Cochrane's risk of bias.

Results

Four RCTs (n=286) met our inclusion criteria which include two studies on H. genitalis and the other two on H. zoster; in these studies, a total of 151 patients were assigned to the C. nutans group of H. genitalis trials, a pooled relative risk of C. nutans preparations against placebo for a 3 day-full crusting was 6.62 (95% C.I. 3.83-11.47) and of a 7-day complete healing was 3.77 (95% C.I. 2.46-5.78). In H. zoster, the relative risk for a 3 day-full crusting was 3.21 (IQR 0.97-10.58).

Conclusions

This meta-analysis and systematic review suggests some beneficial effects of C. nutans preparations on treatments of H. genitalis and H. zoster. However, more robustly designed trials are needed to substantiate the benefit of these plants, specifically on their active purified compounds, and their potencies and benefits on treatment outcome of H. genitalis and H. zoster.",2011-01-12 +21282865,SLIDER: a generic metaheuristic for the discovery of correlated motifs in protein-protein interaction networks.,"Correlated motif mining (cmm) is the problem of finding overrepresented pairs of patterns, called motifs, in sequences of interacting proteins. Algorithmic solutions for cmm thereby provide a computational method for predicting binding sites for protein interaction. In this paper, we adopt a motif-driven approach where the support of candidate motif pairs is evaluated in the network. We experimentally establish the superiority of the Chi-square-based support measure over other support measures. Furthermore, we obtain that cmm is an np-hard problem for a large class of support measures (including Chi-square) and reformulate the search for correlated motifs as a combinatorial optimization problem. We then present the generic metaheuristic slider which uses steepest ascent with a neighborhood function based on sliding motifs and employs the Chi-square-based support measure. We show that slider outperforms existing motif-driven cmm methods and scales to large protein-protein interaction networks. The slider-implementation and the data used in the experiments are available on http://bioinformatics.uhasselt.be.",2011-09-01 +22897152,Integration of 198 ChIP-seq datasets reveals human cis-regulatory regions.,"We analyzed 198 datasets of chromatin immunoprecipitation followed by high throughput sequencing (ChIP-seq) and developed a methodology for identification of high-confidence enhancer and promoter regions from transcription factor ChIP-seq data alone. We identify 32,467 genomic regions marked with ChIP-seq binding peaks in 15 or more experiments as high-confidence cis-regulatory regions. Although the selected regions mark only ~0.67% of the genome, 70.5% of our predicted binding regions fall within independently identified, strongly expression-correlated and histone-marked enhancer regions, which cover ~8% of the genome (Ernst et al., Nature 2011 , 473, 43-49). Even more remarkably, 85.6% of our selected regions overlap transcription factor (TF) binding regions identified in evolutionarily conserved DNase1 hypersensitivity cluster regions, which cover 0.75% of the genome (Boyle et al., Genome Research 2011 , 21, 456-464). P-values for these overlaps are effectively zero (Z-scores of 328 and 715 respectively). Furthermore, 62% of our selected regions overlap the intersection of the evolutionarily conserved DNase1 hypersensitivity-identified TF-binding regions of Boyle et al. (2011) with the histone-marked enhancers found to be strongly associated with transcriptional activity by Ernst et al. (2011). Two hundred thirty of our candidate cis-regulatory regions overlap cancer-associated variants reported in the Catalogue of Somatic Mutations in Cancer ( http://www.sanger.ac.uk/genetics/CGP/cosmic/ ). We also identify 1,252 potential proximal promoters for the 7,561 disjoint lincRNA regions currently in the Human lincRNA Catalog (www.broadinstitute.org/genome_bio/human_lincrnas/). Our investigation used approximately half of all currently available ENCODE ChIP-seq datasets, suggesting further gains are likely from analysis of all datasets currently available.",2012-08-16 +21498400,BACOM: in silico detection of genomic deletion types and correction of normal cell contamination in copy number data.,"

Motivation

Identification of somatic DNA copy number alterations (CNAs) and significant consensus events (SCEs) in cancer genomes is a main task in discovering potential cancer-driving genes such as oncogenes and tumor suppressors. The recent development of SNP array technology has facilitated studies on copy number changes at a genome-wide scale with high resolution. However, existing copy number analysis methods are oblivious to normal cell contamination and cannot distinguish between contributions of cancerous and normal cells to the measured copy number signals. This contamination could significantly confound downstream analysis of CNAs and affect the power to detect SCEs in clinical samples.

Results

We report here a statistically principled in silico approach, Bayesian Analysis of COpy number Mixtures (BACOM), to accurately estimate genomic deletion type and normal tissue contamination, and accordingly recover the true copy number profile in cancer cells. We tested the proposed method on two simulated datasets, two prostate cancer datasets and The Cancer Genome Atlas high-grade ovarian dataset, and obtained very promising results supported by the ground truth and biological plausibility. Moreover, based on a large number of comparative simulation studies, the proposed method gives significantly improved power to detect SCEs after in silico correction of normal tissue contamination. We develop a cross-platform open-source Java application that implements the whole pipeline of copy number analysis of heterogeneous cancer tissues including relevant processing steps. We also provide an R interface, bacomR, for running BACOM within the R environment, making it straightforward to include in existing data pipelines.

Availability

The cross-platform, stand-alone Java application, BACOM, the R interface, bacomR, all source code and the simulation data used in this article are freely available at authors' web site: http://www.cbil.ece.vt.edu/software.htm.",2011-04-15 +21663689,FARO server: Meta-analysis of gene expression by matching gene expression signatures to a compendium of public gene expression data.,"

Background

Although, systematic analysis of gene annotation is a powerful tool for interpreting gene expression data, it sometimes is blurred by incomplete gene annotation, missing expression response of key genes and secondary gene expression responses. These shortcomings may be partially circumvented by instead matching gene expression signatures to signatures of other experiments.

Findings

To facilitate this we present the Functional Association Response by Overlap (FARO) server, that match input signatures to a compendium of 242 gene expression signatures, extracted from more than 1700 Arabidopsis microarray experiments.

Conclusions

Hereby we present a publicly available tool for robust characterization of Arabidopsis gene expression experiments which can point to similar experimental factors in other experiments. The server is available at http://www.cbs.dtu.dk/services/faro/.",2011-06-11 +21216786,LPS-annotate: complete annotation of compositionally biased regions in the protein knowledgebase.,"Compositional bias (i.e. a skew in the composition of a biological sequence towards a subset of residue types) can occur at a wide variety of scales, from compositional biases of whole genomes, down to short regions in individual protein and gene-DNA sequences that are compositionally biased (CB regions). Such CB regions are made from a subset of residue types that are strewn along the length of the region in an irregular way. Here, we have developed the database server LPS-annotate, for the analysis of such CB regions, and protein disorder in protein sequences. The algorithm defines compositional bias through a thorough search for lowest-probability subsequences (LPSs) (i.e., the least likely sequence regions in terms of composition). Users can (i) initially annotate CB regions in input protein or nucleotide sequences of interest, and then (ii) query a database of greater than 1,500,000 pre-calculated protein-CB regions, for investigation of further functional hypotheses and inferences, about the specific CB regions that were discovered, and their protein disorder propensities. We demonstrate how a user can search for CB regions of similar compositional bias and protein disorder, with a worked example. We show that our annotations substantially augment the CB-region annotations that already exist in the UniProt database, with more comprehensive annotation of more complex CB regions. Our analysis indicates tens of thousands of CB regions that do not comprise globular domains or transmembrane domains, and that do not have a propensity to protein disorder, indicating a large cohort of protein-CB regions of biophysically uncharacterized types. This server and database is a conceptually novel addition to the workbench of tools now available to molecular biologists to generate hypotheses and inferences about the proteins that they are investigating. It can be accessed at http://libaio.biol.mcgill.ca/lps-annotate.html. Database URL: http://libaio.biol.mcgill.ca/lps-annotate.html.",2011-01-06 +21216773,A method for identifying haplotypes carrying the causative allele in positive natural selection and genome-wide association studies.,"

Motivation

Methods for detecting positive selection relied on finding evidence of long haplotypes to identify candidate regions under selection. However, these methods generally do not identify the length and form of the selected haplotype.

Results

We present HapFinder, a method which can find the common longest haplotype under three different settings from a database, which is relevant in the analysis of positive selection in population genetics and also in medical genetics for finding the likely haplotype form carrying the causal allele at the functional polymorphism.

Availability

A java program, implementing the methods described in HapFinder, together with R scripts and datasets for producing the figures presented in this article are publicly available at http://www.nus-cme.org.sg/sgvp/software/hapfinder.html. The site also hosts an online browser for finding haplotypes from the International HapMap Project and the Singapore Genome Variation Project.",2011-01-06 +21216777,HiSpOD: probe design for functional DNA microarrays.,"

Motivation

The use of DNA microarrays allows the monitoring of the extreme microbial diversity encountered in complex samples like environmental ones as well as that of their functional capacities. However, no probe design software currently available is adapted to easily design efficient and explorative probes for functional gene arrays.

Results

We present a new efficient functional microarray probe design algorithm called HiSpOD (High Specific Oligo Design). This uses individual nucleic sequences or consensus sequences produced by multiple alignments to design highly specific probes. Indeed, to bypass crucial problem of cross-hybridizations, probe specificity is assessed by similarity search against a large formatted database dedicated to microbial communities containing about 10 million coding sequences (CDS). For experimental validation, a microarray targeting genes encoding enzymes involved in chlorinated solvent biodegradation was built. The results obtained from a contaminated environmental sample proved the specificity and the sensitivity of probes designed with the HiSpOD program.

Availability

http://fc.isima.fr/~g2im/hispod/.",2011-01-06 +22988806,[The architectonics of microbe ecology in the purulent surgery department of municipal clinical hospital].,"S. aureus, S. pyogenes, P. aerugenosa, E. coli, P mirabilis, A. baumannii, S. epidermidis, K. pneumoniae, E. faecalis, E. cloacae are the priority pathogens of various forms of pyoinflammatory diseases. They form the architectonics of microbe ecology of the purulent surgery department of multi-type hospital of regional level with S.aureus subsp.aureus playing dominating role. In case of unchanged specter of priority pathogens of pyoinflammatory and pyoseptic diseases the number of gram-positive coccuses decreases at the expense of decrease of number of streptococcuses and staphylococcuses. At the same time, the number of enterobacteria and gram-negative non-fermentative bacteria increases. The resistance ofgram-positive coccuses increases regarding erythromycin, clyndamicin and cyprofloxacin. The resistance of gram-negative bacilli increases regarding ciprofloxacin, cephalosporins of III-IV generations, amikacin. The resistance is the highest among clinical isolates MRSA, K. pneumoniae, A. baumannii. The vancomycin is active regarding all gram-positive pathogens. The carbapenems are active regarding all enterobacteriae. The carbapenems, cefoperazone/tazobactam, cefepime are most active regarding non-fermentative glucose oxidizing bacteria. The netimicin is active regarding A.baumannii. The polymyxine is active regarding P. aerugenosa. The circulation of S. aureus hospital strain of particular genotype is established confirming the propagation of epidemic S. aureus strains in Moscow multi-type medical institutions. The strains are genetically affined to epidemic strains in European and other countries according the international data base (http://SpaServer.ridom.de). The genetic typing of S. aureus ssp. aureus out-hospital hemocultures detected their considerable genetic variety. The epidemic relationship between isolates from different patients is not established. The algorithms of rationale antibacterial chemotherapy of pyoinflammatory and pyoseptic diseases are developed to be implemented in the purulent surgery department of municipal clinical hospital of Moscow.",2012-07-01 +21291922,Internet-based atlas of the primate spinal cord.,"In 2009, we reported an online brain atlas of the common marmoset (Callithrix jacchus) at http://marmoset-brain.org:2008. Here we report new digital images of the primate spinal cord sections added to the website. We prepared histological sections of every segment of the spinal cord of the common marmoset, rhesus monkey and Japanese monkey with various staining techniques. The sections were scanned with Carl Zeiss MIRAX SCAN at light microscopic resolution. Obtained digital data were processed and converted into multi-resolutionary images with Adobe Photoshop and Zoomify Design. These images of the primate spinal cords are now available on the web via the Internet.",2011-02-01 +22449401,Gene2DGE: a Perl package for gene model renewal with digital gene expression data.,"For transcriptome analysis, it is critical to precisely define all the transcripts across the whole genome. More and more digital gene expression (DGE) scannings have indicated the presence of huge amount of novel transcripts in addition to the known gene models. However, almost all these studies still depend crucially on existing annotation. Here, we present Gene2DGE, a Perl software package for gene model renewal with DGE data. We applied Gene2DGE to the mouse blastomere transcriptome, and defined 98,532 read-enriched regions (RERs) by read clustering supported by more than four reads for each base pair. Taking advantage of this ab initio method, we refined 2,104 exonic regions (4% of a total of 48,501 annotated transcribed regions) with remarkable extension into un-annotated regions (>50 bp). For 5% of uniquely mapped reads falling within intron regions, we identified 13,291 additional possible exons. As a result, we renewed 4,788 gene models, which account for 39% of a total of 12,277 transcribed genes. Furthermore, we identified 12,613 intergenic RERs, suggesting the possible presence of novel genes outside the existing gene models. In this study, therefore, we have developed a suitable tool for renewal of known gene models by ab initio prediction in transcriptome dissection. The Gene2DGE package is freely available at http://bighapmap.big.ac.cn/.",2012-02-01 +22450757,LocARNA-P: accurate boundary prediction and improved detection of structural RNAs.,"Current genomic screens for noncoding RNAs (ncRNAs) predict a large number of genomic regions containing potential structural ncRNAs. The analysis of these data requires highly accurate prediction of ncRNA boundaries and discrimination of promising candidate ncRNAs from weak predictions. Existing methods struggle with these goals because they rely on sequence-based multiple sequence alignments, which regularly misalign RNA structure and therefore do not support identification of structural similarities. To overcome this limitation, we compute columnwise and global reliabilities of alignments based on sequence and structure similarity; we refer to these structure-based alignment reliabilities as STARs. The columnwise STARs of alignments, or STAR profiles, provide a versatile tool for the manual and automatic analysis of ncRNAs. In particular, we improve the boundary prediction of the widely used ncRNA gene finder RNAz by a factor of 3 from a median deviation of 47 to 13 nt. Post-processing RNAz predictions, LocARNA-P's STAR score allows much stronger discrimination between true- and false-positive predictions than RNAz's own evaluation. The improved accuracy, in this scenario increased from AUC 0.71 to AUC 0.87, significantly reduces the cost of successive analysis steps. The ready-to-use software tool LocARNA-P produces structure-based multiple RNA alignments with associated columnwise STARs and predicts ncRNA boundaries. We provide additional results, a web server for LocARNA/LocARNA-P, and the software package, including documentation and a pipeline for refining screens for structural ncRNA, at http://www.bioinf.uni-freiburg.de/Supplements/LocARNA-P/.",2012-03-26 +30743662,First Report of Eggplant mottled dwarf virus in Pittosporum tobira in Spain.,"In 2009, Pittosporum tobira (Thunb.) Ait. plants showing virus-like symptoms were observed in two ornamental greenhouses in two regions of the eastern coast of Spain (Tarragona and Valencia). Affected plants showed veinal yellowing and interveinal yellow mottling on the leaves. In addition, surveys conducted in 2010 in three public gardens in Valencia revealed 4% of P. tobira plants grown as hedges showed similar, but less severe symptoms. Five symptomatic and five asymptomatic P. tobira leaves were collected and analyzed by double antibody sandwich-ELISA using polyclonal antisera for Alfalfa mosaic virus (AMV) (SEDIAG S.A.S., Longvic, France) and Eggplant mottled dwarf virus (EMDV) (Deutsche Sammlung von Mikroorganismen und Zellkulturen Gmbh [DSMZ], Braunschweig, Germany). Samples were considered positive only if the mean absorbance value of duplicate wells was more than three times the mean absorbance of healthy control leaf samples. Only the five symptomatic samples tested positive for EMDV in the serological analyses. To confirm the results, a pair of EMDV-specific primers was designed using the published sequence of a fragment of the EMDV polymerase gene available in GenBank (Accession No. AM922322): EMDV-D (5' TATGCGAGAATTGGGAGTGGGTAGT 3') and EMDV-R (5' CATTGTTATCCCGGGAAGTATTT 3') targeting a 400-bp fragment. Total RNA was extracted from the symptomatic leaves and tested by reverse transcription (RT)-PCR assay with specific primers for AMV (4) and the primer pair designed for EMDV. The type isolate (EMDV-PV-0031, DSMZ) was used as a positive control sample in the serological and molecular analyses. None of the samples tested positive for AMV. The same five symptomatic samples that tested positive in the serological assays also tested positive for EMDV in the RT-PCR assay. Two RT-PCR products amplified from RNA of symptomatic P. tobira leaves and one from the type isolate were purified and directly sequenced. BLAST analyses of two sequences from infected P. tobira leaves (Accession Nos. HM636918 and HM636919) revealed 90% nucleotide identity to both the EMDV-Egg isolate (Accession No. AM922322) and the type isolate (EMDV-PV-0031, DSMZ), and 98% similarity among the P. tobira isolates. EMDV was first reported in the Canary Islands, Spain (3), and later was detected in the northeastern peninsular Spain on cucumber and eggplant (1). Although EMDV has been described as affecting P. tobira in countries such as Italy, Libya, and the former Yugoslavia (3), to our knowledge, this is the first report of EMDV infecting P. tobira in Spain. EMDV is generally considered of minor importance. However, P. tobira infection might have epidemiological consequences for susceptible cultivated crops such as eggplant or cucumber. Moreover, where P. tobira is used as a vegetatively propagated ornamental plant, EMDV could be transmitted from infected plants by the leafhopper vector (2). References: (1) J. Aramburu et al. Plant Pathol. 55:565, 2006. (2) G. H. Babaie and K. Izadpanah. J. Phytopathol. 151:679, 2003. (3) A. A. Brunt et al. Plant Viruses Online: Descriptions and Lists from the VIDE Database. Version: 20. Retrieved from http://biology.anu.edu.au/Groups/MES/vide/ , August, 1996. (4) L. Martínez-Priego et al. Plant Dis. 88:908, 2004.",2011-01-01 +21296743,Exploring the potential relevance of human-specific genes to complex disease.,"Although human disease genes generally tend to be evolutionarily more ancient than non-disease genes, complex disease genes appear to be represented more frequently than Mendelian disease genes among genes of more recent evolutionary origin. It is therefore proposed that the analysis of human-specific genes might provide new insights into the genetics of complex disease. Cross-comparison with the Human Gene Mutation Database (http://www.hgmd.org) revealed a number of examples of disease-causing and disease-associated mutations in putatively human-specific genes. A sizeable proportion of these were missense polymorphisms associated with complex disease. Since both human-specific genes and genes associated with complex disease have often experienced particularly rapid rates of evolutionary change, either due to weaker purifying selection or positive selection, it is proposed that a significant number of human-specific genes may play a role in complex disease.",2011-01-01 +21598707,[Locus HS.633957 expression in human gastrointestinal tract and tumors].,"Human locus HS.633957 corresponds to its namesake cluster in the UniGene database http:/www.ncbi.nlm.nih.gov/unigene. It is located on chromosome 7 and is 3.7 tpn in size. It does not seem to encode proteins nor has its function been identified. According to bioinformation evidence, its expression is tumor-specific. PCR assay on kDNA samples from different intact human tissues detected its slight expression in liver, heart, embryonal brain and kidney as well as in a wide spectrum of tumors. This work features locus Hs.633957 expression in different parts of human gastrointestinal tract and tumors.",2011-01-01 +21713630,Refolding your protein with a little help from REFOLD.,"The expression and harvesting of proteins from insoluble inclusion bodies by solubilization and refolding is a technique commonly used in the production of recombinant proteins. Despite the importance of refolding, publications in the literature are essentially ad hoc reports consisting of a dazzling array of experimental protocols and a diverse collection of buffer cocktails. For the protein scientists, using this information to refold their protein of interest presents enormous challenges. Here, we describe some of the practical considerations in refolding and present several standard protocols. Further, we describe how refolding procedures can be designed and modified using the information in the REFOLD database (http://refold.med.monash.edu.au), a freely available, open repository for protocols describing the refolding and purification of recombinant proteins.",2011-01-01 +21296742,Naming 'junk': human non-protein coding RNA (ncRNA) gene nomenclature.,"Previously, the majority of the human genome was thought to be 'junk' DNA with no functional purpose. Over the past decade, the field of RNA research has rapidly expanded, with a concomitant increase in the number of non-protein coding RNA (ncRNA) genes identified in this 'junk'. Many of the encoded ncRNAs have already been shown to be essential for a variety of vital functions, and this wealth of annotated human ncRNAs requires standardised naming in order to aid effective communication. The HUGO Gene Nomenclature Committee (HGNC) is the only organisation authorised to assign standardised nomenclature to human genes. Of the 30,000 approved gene symbols currently listed in the HGNC database (http://www.genenames.org/search), the majority represent protein-coding genes; however, they also include pseudogenes, phenotypic loci and some genomic features. In recent years the list has also increased to include almost 3,000 named human ncRNA genes. HGNC is actively engaging with the RNA research community in order to provide unique symbols and names for each sequence that encodes an ncRNA. Most of the classical small ncRNA genes have now been provided with a unique nomenclature, and work on naming the long (>200 nucleotides) non-coding RNAs (lncRNAs) is ongoing.",2011-01-01 +21624339,[A new resource for the bibliography research: project experience ILISI® Index of Italian Literature on Nursing Sciences].,"Since July 2008 the ILISI (Index of Italian Literature on Nursing Sciences) elaborated by the IPASVI nursing college of Rome has been available on-line at the page http://www.ipasvi.roma.it/ita/ILISI/ . The aim of this is to make Italian nursing literature more available and to favor bibliographic research. About 3000 articles and 30 nursing journals are available : the necessary software is open source (free) and has been adapted to allow searches by author, topic or word content. Indexation has been carried out by a group of volunteer nurses using a Thesaurus created by the project group. This article describes the aims of the project , how it has been created , the resources employed and the potential of the database. Use of the latter is on the increase: in fact, during the first 12 months of availability , the number of consultations reached 9000.",2011-01-01 +22876807,Coupling SIMD and SIMT architectures to boost performance of a phylogeny-aware alignment kernel.,"

Background

Aligning short DNA reads to a reference sequence alignment is a prerequisite for detecting their biological origin and analyzing them in a phylogenetic context. With the PaPaRa tool we introduced a dedicated dynamic programming algorithm for simultaneously aligning short reads to reference alignments and corresponding evolutionary reference trees. The algorithm aligns short reads to phylogenetic profiles that correspond to the branches of such a reference tree. The algorithm needs to perform an immense number of pairwise alignments. Therefore, we explore vector intrinsics and GPUs to accelerate the PaPaRa alignment kernel.

Results

We optimized and parallelized PaPaRa on CPUs and GPUs. Via SSE 4.1 SIMD (Single Instruction, Multiple Data) intrinsics for x86 SIMD architectures and multi-threading, we obtained a 9-fold acceleration on a single core as well as linear speedups with respect to the number of cores. The peak CPU performance amounts to 18.1 GCUPS (Giga Cell Updates per Second) using all four physical cores on an Intel i7 2600 CPU running at 3.4 GHz. The average CPU performance (averaged over all test runs) is 12.33 GCUPS. We also used OpenCL to execute PaPaRa on a GPU SIMT (Single Instruction, Multiple Threads) architecture. A NVIDIA GeForce 560 GPU delivered peak and average performance of 22.1 and 18.4 GCUPS respectively. Finally, we combined the SIMD and SIMT implementations into a hybrid CPU-GPU system that achieved an accumulated peak performance of 33.8 GCUPS.

Conclusions

This accelerated version of PaPaRa (available at http://www.exelixis-lab.org/software.html) provides a significant performance improvement that allows for analyzing larger datasets in less time. We observe that state-of-the-art SIMD and SIMT architectures deliver comparable performance for this dynamic programming kernel when the ""competing programmer approach"" is deployed. Finally, we show that overall performance can be substantially increased by designing a hybrid CPU-GPU system with appropriate load distribution mechanisms.",2012-08-09 +22035331,Pedigree reconstruction using identity by descent.,"Can we find the family trees, or pedigrees, that relate the haplotypes of a group of individuals? Collecting the genealogical information for how individuals are related is a very time-consuming and expensive process. Methods for automating the construction of pedigrees could stream-line this process. While constructing single-generation families is relatively easy given whole genome data, reconstructing multi-generational, possibly inbred, pedigrees is much more challenging. This article addresses the important question of reconstructing monogamous, regular pedigrees, where pedigrees are regular when individuals mate only with other individuals at the same generation. This article introduces two multi-generational pedigree reconstruction methods: one for inbreeding relationships and one for outbreeding relationships. In contrast to previous methods that focused on the independent estimation of relationship distances between every pair of typed individuals, here we present methods that aim at the reconstruction of the entire pedigree. We show that both our methods out-perform the state-of-the-art and that the outbreeding method is capable of reconstructing pedigrees at least six generations back in time with high accuracy. The two programs are available at http://cop.icsi.berkeley.edu/cop/.",2011-10-28 +22219718,Recovering protein-protein and domain-domain interactions from aggregation of IP-MS proteomics of coregulator complexes.,"Coregulator proteins (CoRegs) are part of multi-protein complexes that transiently assemble with transcription factors and chromatin modifiers to regulate gene expression. In this study we analyzed data from 3,290 immuno-precipitations (IP) followed by mass spectrometry (MS) applied to human cell lines aimed at identifying CoRegs complexes. Using the semi-quantitative spectral counts, we scored binary protein-protein and domain-domain associations with several equations. Unlike previous applications, our methods scored prey-prey protein-protein interactions regardless of the baits used. We also predicted domain-domain interactions underlying predicted protein-protein interactions. The quality of predicted protein-protein and domain-domain interactions was evaluated using known binary interactions from the literature, whereas one protein-protein interaction, between STRN and CTTNBP2NL, was validated experimentally; and one domain-domain interaction, between the HEAT domain of PPP2R1A and the Pkinase domain of STK25, was validated using molecular docking simulations. The scoring schemes presented here recovered known, and predicted many new, complexes, protein-protein, and domain-domain interactions. The networks that resulted from the predictions are provided as a web-based interactive application at http://maayanlab.net/HT-IP-MS-2-PPI-DDI/.",2011-12-29 +22046350,Predicting residue-residue contacts and helix-helix interactions in transmembrane proteins using an integrative feature-based random forest approach.,"Integral membrane proteins constitute 25-30% of genomes and play crucial roles in many biological processes. However, less than 1% of membrane protein structures are in the Protein Data Bank. In this context, it is important to develop reliable computational methods for predicting the structures of membrane proteins. Here, we present the first application of random forest (RF) for residue-residue contact prediction in transmembrane proteins, which we term as TMhhcp. Rigorous cross-validation tests indicate that the built RF models provide a more favorable prediction performance compared with two state-of-the-art methods, i.e., TMHcon and MEMPACK. Using a strict leave-one-protein-out jackknifing procedure, they were capable of reaching the top L/5 prediction accuracies of 49.5% and 48.8% for two different residue contact definitions, respectively. The predicted residue contacts were further employed to predict interacting helical pairs and achieved the Matthew's correlation coefficients of 0.430 and 0.424, according to two different residue contact definitions, respectively. To facilitate the academic community, the TMhhcp server has been made freely accessible at http://protein.cau.edu.cn/tmhhcp.",2011-10-28 +22905152,CDA: combinatorial drug discovery using transcriptional response modules.,"

Background

Anticancer therapies that target single signal transduction pathways often fail to prevent proliferation of cancer cells because of overlapping functions and cross-talk between different signaling pathways. Recent research has identified that balanced multi-component therapies might be more efficacious than highly specific single component therapies in certain cases. Ideally, synergistic combinations can provide 1) increased efficacy of the therapeutic effect 2) reduced toxicity as a result of decreased dosage providing equivalent or increased efficacy 3) the avoidance or delayed onset of drug resistance. Therefore, the interest in combinatorial drug discovery based on systems-oriented approaches has been increasing steadily in recent years.

Methodology

Here we describe the development of Combinatorial Drug Assembler (CDA), a genomics and bioinformatics system, whereby using gene expression profiling, multiple signaling pathways are targeted for combinatorial drug discovery. CDA performs expression pattern matching of signaling pathway components to compare genes expressed in an input cell line (or patient sample data), with expression patterns in cell lines treated with different small molecules. Then it detects best pattern matching combinatorial drug pairs across the input gene set-related signaling pathways to detect where gene expression patterns overlap and those predicted drug pairs could likely be applied as combination therapy. We carried out in vitro validations on non-small cell lung cancer cells and triple-negative breast cancer (TNBC) cells. We found two combinatorial drug pairs that showed synergistic effect on lung cancer cells. Furthermore, we also observed that halofantrine and vinblastine were synergistic on TNBC cells.

Conclusions

CDA provides a new way for rational drug combination. Together with phExplorer, CDA also provides functional insights into combinatorial drugs. CDA is freely available at http://cda.i-pharm.org.",2012-08-08 +22917185,CDS: a fold-change based statistical test for concomitant identification of distinctness and similarity in gene expression analysis.,"The problem of identifying differential activity such as in gene expression is a major defeat in biostatistics and bioinformatics. Equally important, however much less frequently studied, is the question of similar activity from one biological condition to another. The fold-change, or ratio, is usually considered a relevant criterion for stating difference and similarity between measurements. Importantly, no statistical method for concomitant evaluation of similarity and distinctness currently exists for biological applications. Modern microarray, digital PCR (dPCR), and Next-Generation Sequencing (NGS) technologies frequently provide a means of coefficient of variation estimation for individual measurements. Using fold-change, and by making the assumption that measurements are normally distributed with known variances, we designed a novel statistical test that allows us to detect concomitantly, thus using the same formalism, differentially and similarly expressed genes (http://cds.ihes.fr). Given two sets of gene measurements in different biological conditions, the probabilities of making type I and type II errors in stating that a gene is differentially or similarly expressed from one condition to the other can be calculated. Furthermore, a confidence interval for the fold-change can be delineated. Finally, we demonstrate that the assumption of normality can be relaxed to consider arbitrary distributions numerically. The Concomitant evaluation of Distinctness and Similarity (CDS) statistical test correctly estimates similarities and differences between measurements of gene expression. The implementation, being time and memory efficient, allows the use of the CDS test in high-throughput data analysis such as microarray, dPCR, and NGS experiments. Importantly, the CDS test can be applied to the comparison of single measurements (N=1) provided the variance (or coefficient of variation) of the signals is known, making CDS a valuable tool also in biomedical analysis where typically a single measurement per subject is available.",2012-06-25 +21298571,Exploratory regression analysis: a tool for selecting models and determining predictor importance.,"Linear regression analysis is one of the most important tools in a researcher's toolbox for creating and testing predictive models. Although linear regression analysis indicates how strongly a set of predictor variables, taken together, will predict a relevant criterion (i.e., the multiple R), the analysis cannot indicate which predictors are the most important. Although there is no definitive or unambiguous method for establishing predictor variable importance, there are several accepted methods. This article reviews those methods for establishing predictor importance and provides a program (in Excel) for implementing them (available for direct download at http://dl.dropbox.com/u/2480715/ERA.xlsm?dl=1) . The program investigates all 2(p) - 1 submodels and produces several indices of predictor importance. This exploratory approach to linear regression, similar to other exploratory data analysis techniques, has the potential to yield both theoretical and practical benefits.",2011-06-01 +21595984,STAT6 expression in glioblastoma promotes invasive growth.,"

Background

Glioblastoma (GBM) is a highly aggressive malignant primary brain tumor, characterized by rapid growth, diffuse infiltration of cells into both adjacent and remote brain regions, and a generalized resistance to currently available treatment modalities. Recent reports in the literature suggest that Signal Transducers and Activators of Transcription (STATs) play important roles in the regulation of GBM pathophysiology.

Methods

STAT6 protein expression was analyzed by Western blotting in GBM cell lines and by immunohistochemistry in a tissue microarray (TMA) of glioma patient tissues. We utilized shRNA against STAT6 to investigate the effects of prolonged STAT6 depletion on the growth and invasion of two STAT6-positive GBM cell lines. Cell proliferation was assessed by measuring (3)H-Thymidine uptake over time. Invasion was measured using an in vitro transwell assay in which cells invade through a type IV collagen matrix toward a chemoattractant (Fetal Bovine Serum). Cells were then stained and counted. Kaplan-Meyer survival curves were generated to show the correlation between STAT6 gene expression and patient survival in 343 glioma patients and in a subset of patients with only GBM. Gene expression microarray and clinical data were acquired from the Rembrandt 1 public data depository (https://caintegrator.nci.nih.gov/rembrandt/). Lastly, a genome-wide expression microarray analysis was performed to compare gene expression in wild-type GBM cells to expression in stable STAT6 knockdown clones.

Results

STAT6 was expressed in 2 GBM cell lines, U-1242MG and U-87MG, and in normal astrocytes (NHA) but not in the U-251MG GBM cell line. In our TMA study, STAT6 immunostaining was visible in the majority of astrocytomas of all grades (I-IV) but not in normal brain tissue. In positive cells, STAT6 was localized exclusively in the nuclei over 95% of the time. STAT6-deficient GBM cells showed a reduction in (3)H-Thymidine uptake compared to the wild-type. There was some variation among the different shRNA- silenced clones, but all had a reduction in (3)H-Thymidine uptake ranging from 35%- 70% in U-1242MG and 40- 50% in U-87MG cells. Additionally, STAT6- depleted cells were less invasive than controls in our in vitro transmembrane invasion assay. Invasiveness was decreased by 25-40% and 30-75% in U-1242MG and U-87MG cells, respectively. The microarray analysis identified matrix metalloproteinase 1 (MMP-1) and urokinase Plasminogen activator (uPA) as potential STA6 target genes involved in the promotion of GBM cell invasion. In a Kaplan-Meier survival curve based on Rembrandt 1 gene expression microarray and clinical data, there was a significant difference in survival (P < 0.05) between glioma patients with up- and down-regulated STAT6. Decreased STAT6 expression correlated with longer survival times. In two subsets of patients with either grade IV tumors (GBM) or Grade II/III astrocytomas, there was a similar trend that however did not reach statistical significance.

Conclusions

Taken together, these findings suggest a role for STAT6 in enhancing cell proliferation and invasion in GBM, which may explain why up-regulation of STAT6 correlates with shorter survival times in glioma patients. This report thus identifies STAT6 as a new and potentially promising therapeutic target.",2011-05-20 +22199391,Measuring the distance between multiple sequence alignments.,"

Motivation

Multiple sequence alignment (MSA) is a core method in bioinformatics. The accuracy of such alignments may influence the success of downstream analyses such as phylogenetic inference, protein structure prediction, and functional prediction. The importance of MSA has lead to the proliferation of MSA methods, with different objective functions and heuristics to search for the optimal MSA. Different methods of inferring MSAs produce different results in all but the most trivial cases. By measuring the differences between inferred alignments, we may be able to develop an understanding of how these differences (i) relate to the objective functions and heuristics used in MSA methods, and (ii) affect downstream analyses.

Results

We introduce four metrics to compare MSAs, which include the position in a sequence where a gap occurs or the location on a phylogenetic tree where an insertion or deletion (indel) event occurs. We use both real and synthetic data to explore the information given by these metrics and demonstrate how the different metrics in combination can yield more information about MSA methods and the differences between them.

Availability

MetAl is a free software implementation of these metrics in Haskell. Source and binaries for Windows, Linux and Mac OS X are available from http://kumiho.smith.man.ac.uk/whelan/software/metal/.",2011-12-23 +21926179,Assemblathon 1: a competitive assessment of de novo short read assembly methods.,"Low-cost short read sequencing technology has revolutionized genomics, though it is only just becoming practical for the high-quality de novo assembly of a novel large genome. We describe the Assemblathon 1 competition, which aimed to comprehensively assess the state of the art in de novo assembly methods when applied to current sequencing technologies. In a collaborative effort, teams were asked to assemble a simulated Illumina HiSeq data set of an unknown, simulated diploid genome. A total of 41 assemblies from 17 different groups were received. Novel haplotype aware assessments of coverage, contiguity, structure, base calling, and copy number were made. We establish that within this benchmark: (1) It is possible to assemble the genome to a high level of coverage and accuracy, and that (2) large differences exist between the assemblies, suggesting room for further improvements in current methods. The simulated benchmark, including the correct answer, the assemblies, and the code that was used to evaluate the assemblies is now public and freely available from http://www.assemblathon.org/.",2011-09-16 +22554261,"Comparative analysis of grapevine whole-genome gene predictions, functional annotation, categorization and integration of the predicted gene sequences.","

Background

The first draft assembly and gene prediction of the grapevine genome (8X base coverage) was made available to the scientific community in 2007, and functional annotation was developed on this gene prediction. Since then additional Sanger sequences were added to the 8X sequences pool and a new version of the genomic sequence with superior base coverage (12X) was produced.

Results

In order to more efficiently annotate the function of the genes predicted in the new assembly, it is important to build on as much of the previous work as possible, by transferring 8X annotation of the genome to the 12X version. The 8X and 12X assemblies and gene predictions of the grapevine genome were compared to answer the question, ""Can we uniquely map 8X predicted genes to 12X predicted genes?"" The results show that while the assemblies and gene structure predictions are too different to make a complete mapping between them, most genes (18,725) showed a one-to-one relationship between 8X predicted genes and the last version of 12X predicted genes. In addition, reshuffled genomic sequence structures appeared. These highlight regions of the genome where the gene predictions need to be taken with caution. Based on the new grapevine gene functional annotation and in-depth functional categorization, twenty eight new molecular networks have been created for VitisNet while the existing networks were updated.

Conclusions

The outcomes of this study provide a functional annotation of the 12X genes, an update of VitisNet, the system of the grapevine molecular networks, and a new functional categorization of genes. Data are available at the VitisNet website (http://www.sdstate.edu/ps/research/vitis/pathways.cfm).",2012-05-03 +22379410,Dynamic modelling under uncertainty: the case of Trypanosoma brucei energy metabolism.,"Kinetic models of metabolism require detailed knowledge of kinetic parameters. However, due to measurement errors or lack of data this knowledge is often uncertain. The model of glycolysis in the parasitic protozoan Trypanosoma brucei is a particularly well analysed example of a quantitative metabolic model, but so far it has been studied with a fixed set of parameters only. Here we evaluate the effect of parameter uncertainty. In order to define probability distributions for each parameter, information about the experimental sources and confidence intervals for all parameters were collected. We created a wiki-based website dedicated to the detailed documentation of this information: the SilicoTryp wiki (http://silicotryp.ibls.gla.ac.uk/wiki/Glycolysis). Using information collected in the wiki, we then assigned probability distributions to all parameters of the model. This allowed us to sample sets of alternative models, accurately representing our degree of uncertainty. Some properties of the model, such as the repartition of the glycolytic flux between the glycerol and pyruvate producing branches, are robust to these uncertainties. However, our analysis also allowed us to identify fragilities of the model leading to the accumulation of 3-phosphoglycerate and/or pyruvate. The analysis of the control coefficients revealed the importance of taking into account the uncertainties about the parameters, as the ranking of the reactions can be greatly affected. This work will now form the basis for a comprehensive Bayesian analysis and extension of the model considering alternative topologies.",2012-01-19 +21914630,High-dimensional bolstered error estimation.,"

Motivation

In small-sample settings, bolstered error estimation has been shown to perform better than cross-validation and competitively with bootstrap with regard to various criteria. The key issue for bolstering performance is the variance setting for the bolstering kernel. Heretofore, this variance has been determined in a non-parametric manner from the data. Although bolstering based on this variance setting works well for small feature sets, results can deteriorate for high-dimensional feature spaces.

Results

This article computes an optimal kernel variance depending on the classification rule, sample size, model and feature space, both the original number and the number remaining after feature selection. A key point is that the optimal variance is robust relative to the model. This allows us to develop a method for selecting a suitable variance to use in real-world applications where the model is not known, but the other factors in determining the optimal kernel are known.

Availability

Companion website at http://compbio.tgen.org/paper_supp/high_dim_bolstering.

Contact

edward@mail.ece.tamu.edu.",2011-09-13 +21448269,Differential expression of salivary proteins between susceptible and insecticide-resistant mosquitoes of Culex quinquefasciatus.,"

Background

The Culex quinquefasciatus mosquito, a major pest and vector of filariasis and arboviruses in the tropics, has developed multiple resistance mechanisms to the main insecticide classes currently available in public health. Among them, the insensitive acetylcholinesterase (ace-1(R) allele) is widespread worldwide and confers cross-resistance to organophosphates and carbamates. Fortunately, in an insecticide-free environment, this mutation is associated with a severe genetic cost that can affect various life history traits. Salivary proteins are directly involved in human-vector contact during biting and therefore play a key role in pathogen transmission.

Methods and results

An original proteomic approach combining 2D-electrophoresis and mass spectrometry was adopted to compare the salivary expression profiles of two strains of C. quinquefasciatus with the same genetic background but carrying either the ace-1(R) resistance allele or not (wild type). Four salivary proteins were differentially expressed (>2 fold, P<0.05) in susceptible (SLAB) and resistant (SR) mosquito strains. Protein identification indicated that the D7 long form, a major salivary protein involved in blood feeding success, presented lower expression in the resistant strain than the susceptible strain. In contrast, three other proteins, including metabolic enzymes (endoplasmin, triosephosphate isomerase) were significantly over-expressed in the salivary gland of ace-1(R) resistant mosquitoes. A catalogue of 67 salivary proteins of C. quinquefasciatus sialotranscriptome was also identified and described.

Conclusion

The ""resistance""-dependent expression of salivary proteins in mosquitoes may have considerable impact on biting behaviour and hence on the capacity to transmit parasites/viruses to humans. The behaviour of susceptible and insecticide-resistant mosquitoes in the presence of vertebrate hosts and its impact on pathogen transmission urgently requires further investigation.

Data deposition

All proteomic data will be deposited at PRIDE (http://www.ebi.ac.uk/pride/).",2011-03-23 +30722561,First Report of Colletotrichum fructicola Causing Bitter Rot of Pear (Pyrus bretschneideri) in China.,"Pyrus bretschneideri cv. Dangshansuli is the most important commercial Asiatic pear cultivar worldwide. In recent years, a fruit rot disease of unknown etiology have caused considerable fresh market losses in the 'Dangshansuli' production operations in Dangshan county, Anhui Province, China. Fresh market losses typically range from 60 to 90% and in 2008 were estimated at US$150 million. Symptomatic mature 'Dangshansuli' pears were collected from an orchard in Dangshan County in February 2008. A thin section (about 1 mm3) of symptomatic tissue was sterilized in a bleach and placed on potato dextrose agar (PDA) medium for isolation. From all fruit, a single fungus was recovered displaying gray-white dense aerial mycelium. Identical fungi were isolated from six additional symptomatic 'Dangshansuli' pears collected from other orchards in the county. Pathogenicity tests using one isolate (DS-0) were conducted in triplicate by placing 4 mm diameter discs from 7-day-old PDA plates onto the mature 'Dangshansuli' pear fruit that were incubated in an incubator at 25°C with a 12-h photoperiod for 30 days. An equal number of noncolonized PDA inoculations were included as a control. Isolate DS-0 caused symptoms similar to those in the field within 7 days and complete collapse of cortical tissues within 30 days. No symptoms were observed on control fruit. Round brownish lesions with a diameter of about 3 cm on inoculated fruit was populated by sunken, rotiform acervuli on which numerous, colorless, oblong single cell shape conidia with width/length of 6 × 20 μm were produced. A comparison of morphology and sequence analysis of the ribosomal internal transcribed spacer (ITS) regions in pre- and post-inoculation cultures from inoculated fruit confirmed the presence DS-0. To further characterize DS-0, aliquots of extracted genomic DNA from the fungus were subjected to PCR amplification and sequencing of seven gene regions from the ITS, actin (ACT), β-tubulin 2 (TUB2), glyceraldehyde-3-phosphate dehydrogenase (GAPDH), manganese-superoxide dismutase (SOD2), chitin synthase (CHS-1), and calmodulin (CAL), using the primers listed by Weir et al (4), except for the primer pair of ITS1 (5'-TCCGTAGGTGAACCTGCGG-3') and ITS4 (5'-TCCTCCGCTTATTGATATGC-3') for ITS amplification, and SODglo2-R (5'-TAGTACGCGTGCTCGGACAT-3') and SODglo2-R (5'-TAGTACGCGTGCTCGGACAT-3') for TBU2 amplification. Two or three clones of PCR products of each gene were sequenced and compared (GenBank Accession Nos. KC410780 to KC410786) to published data at http://www.cbs.knaw.nl/colletotrichum . The result indicated that DS-0 shared the highest similarity of 99.91% with Colletotrichum fructicola, corroborating numerous reports of Colletotrichum spp. causing bitter rot of pear on P. pyrifolia (1,2,3,4). C. fructicola was only recently reported as causing bitter rot of P. pyrifolia (4) and to our knowledge, this is the first report of C. fructicola causing bitter rot of P. bretschneideri, which will help producers select the best management practices for this devastating disease. References: (1) P. F. Cannon et al. Stud. Mycol. 73:181, 2012. (2) N. Tashiro et al. J. Gen. Plant Pathol. 78:221, 2012. (3) G. K. Wan et al. Mycobiology 35:238, 2007. (4) B. S. Weir et al. Stud. Mycol. 73:115, 2012.",2013-07-01 +21300701,HLA*IMP--an integrated framework for imputing classical HLA alleles from SNP genotypes.,"

Motivation

Genetic variation at classical HLA alleles influences many phenotypes, including susceptibility to autoimmune disease, resistance to pathogens and the risk of adverse drug reactions. However, classical HLA typing methods are often prohibitively expensive for large-scale studies. We previously described a method for imputing classical alleles from linked SNP genotype data. Here, we present a modification of the original algorithm implemented in a freely available software suite that combines local data preparation and QC with probabilistic imputation through a remote server.

Results

We introduce two modifications to the original algorithm. First, we present a novel SNP selection function that leads to pronounced increases (up by 40% in some scenarios) in call rate. Second, we develop a parallelized model building algorithm that allows us to process a reference set of over 2500 individuals. In a validation experiment, we show that our framework produces highly accurate HLA type imputations at class I and class II loci for independent datasets: at call rates of 95-99%, imputation accuracy is between 92% and 98% at the four-digit level and over 97% at the two-digit level. We demonstrate utility of the method through analysis of a genome-wide association study for psoriasis where there is a known classical HLA risk allele (HLA-C*06:02). We show that the imputed allele shows stronger association with disease than any single SNP within the region. The imputation framework, HLA*IMP, provides a powerful tool for dissecting the architecture of genetic risk within the HLA.

Availability

HLA*IMP, implemented in C++ and Perl, is available from http://oxfordhla.well.ox.ac.uk and is free for academic use.",2011-02-07 +21908864,Dissection of human MiRNA regulatory influence to subpathway.,"The global insight into the relationships between miRNAs and their regulatory influences remains poorly understood. And most of complex diseases may be attributed to certain local areas of pathway (subpathway) instead of the entire pathway. Here, we reviewed the studies on miRNA regulations to pathways and constructed a bipartite miRNAs and subpathways network for systematic analyzing the miRNA regulatory influences to subpathways. We found that a small fraction of miRNAs were global regulators, environmental information processing pathways were preferentially regulated by miRNAs, and miRNAs had synergistic effect on regulating group of subpathways with similar function. Integrating the disease states of miRNAs, we also found that disease miRNAs regulated more subpathways than nondisease miRNAs, and for all miRNAs, the number of regulated subpathways was not in proportion to the number of the related diseases. Therefore, the study not only provided a global view on the relationships among disease, miRNA and subpathway, but also uncovered the function aspects of miRNA regulations and potential pathogenesis of complex diseases. A web server to query, visualize and download for all the data can be freely accessed at http://bioinfo.hrbmu.edu.cn/miR2Subpath.",2011-09-10 +23151829,Assessment of primary colorectal cancer heterogeneity by using whole-tumor texture analysis: contrast-enhanced CT texture as a biomarker of 5-year survival.,"

Purpose

To determine if computed tomographic (CT) texture features of primary colorectal cancer are related to 5-year overall survival rate.

Materials and methods

Institutional review board waiver was obtained for this retrospective analysis. Texture features of the entire primary tumor were assessed with contrast material-enhanced staging CT studies obtained in 57 patients as part of an ethically approved study and by using proprietary software. Entropy, uniformity, kurtosis, skewness, and standard deviation of the pixel distribution histogram were derived from CT images without filtration and with filter values corresponding to fine (1.0), medium (1.5, 2.0), and coarse (2.5) textures. Patients were followed up until death and were censored at 5 years if they were still alive. Kaplan-Meier analysis was performed to determine the relationship, if any, between CT texture and 5-year overall survival rate. The Cox proportional hazards model was used to assess independence of texture parameters from stage.

Results

Follow-up data were available for 55 of 57 patients. There were eight stage I, 19 stage II, 17 stage III, and 11 stage IV cancers. Fine-texture feature Kaplan-Meier survival plots for entropy, uniformity, kurtosis, skewness, and standard deviation of the pixel distribution histogram were significantly different for tumors above and below each respective threshold receiver operating characteristic (ROC) curve optimal cutoff value (P = .001, P = .018, P = .032, P = .008, and P = .001, respectively), with poorer prognosis for ROC optimal values (a) less than 7.89 for entropy, (b) at least 0.01 for uniformity, (c) less than 2.48 for kurtosis, (d) at least -0.38 for skewness, and (e) less than 61.83 for standard deviation. Multivariate Cox proportional hazards regression analysis showed that each parameter was independent from the stage predictor of overall survival rate (P = .001, P = .009, P = .006, P = .02, and P = .001, respectively).

Conclusion

Fine-texture features are associated with poorer 5-year overall survival rate in patients with primary colorectal cancer.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12120254/-/DC1.",2012-11-14 +22719234,Metabolic reconstruction for metagenomic data and its application to the human microbiome.,"Microbial communities carry out the majority of the biochemical activity on the planet, and they play integral roles in processes including metabolism and immune homeostasis in the human microbiome. Shotgun sequencing of such communities' metagenomes provides information complementary to organismal abundances from taxonomic markers, but the resulting data typically comprise short reads from hundreds of different organisms and are at best challenging to assemble comparably to single-organism genomes. Here, we describe an alternative approach to infer the functional and metabolic potential of a microbial community metagenome. We determined the gene families and pathways present or absent within a community, as well as their relative abundances, directly from short sequence reads. We validated this methodology using a collection of synthetic metagenomes, recovering the presence and abundance both of large pathways and of small functional modules with high accuracy. We subsequently applied this method, HUMAnN, to the microbial communities of 649 metagenomes drawn from seven primary body sites on 102 individuals as part of the Human Microbiome Project (HMP). This provided a means to compare functional diversity and organismal ecology in the human microbiome, and we determined a core of 24 ubiquitously present modules. Core pathways were often implemented by different enzyme families within different body sites, and 168 functional modules and 196 metabolic pathways varied in metagenomic abundance specifically to one or more niches within the microbiome. These included glycosaminoglycan degradation in the gut, as well as phosphate and amino acid transport linked to host phenotype (vaginal pH) in the posterior fornix. An implementation of our methodology is available at http://huttenhower.sph.harvard.edu/humann. This provides a means to accurately and efficiently characterize microbial metabolic pathways and functional modules directly from high-throughput sequencing reads, enabling the determination of community roles in the HMP cohort and in future metagenomic studies.",2012-06-13 +23043260,Frnakenstein: multiple target inverse RNA folding.,"

Background

RNA secondary structure prediction, or folding, is a classic problem in bioinformatics: given a sequence of nucleotides, the aim is to predict the base pairs formed in its three dimensional conformation. The inverse problem of designing a sequence folding into a particular target structure has only more recently received notable interest. With a growing appreciation and understanding of the functional and structural properties of RNA motifs, and a growing interest in utilising biomolecules in nano-scale designs, the interest in the inverse RNA folding problem is bound to increase. However, whereas the RNA folding problem from an algorithmic viewpoint has an elegant and efficient solution, the inverse RNA folding problem appears to be hard.

Results

In this paper we present a genetic algorithm approach to solve the inverse folding problem. The main aims of the development was to address the hitherto mostly ignored extension of solving the inverse folding problem, the multi-target inverse folding problem, while simultaneously designing a method with superior performance when measured on the quality of designed sequences. The genetic algorithm has been implemented as a Python program called Frnakenstein. It was benchmarked against four existing methods and several data sets totalling 769 real and predicted single structure targets, and on 292 two structure targets. It performed as well as or better at finding sequences which folded in silico into the target structure than all existing methods, without the heavy bias towards CG base pairs that was observed for all other top performing methods. On the two structure targets it also performed well, generating a perfect design for about 80% of the targets.

Conclusions

Our method illustrates that successful designs for the inverse RNA folding problem does not necessarily have to rely on heavy biases in base pair and unpaired base distributions. The design problem seems to become more difficult on larger structures when the target structures are real structures, while no deterioration was observed for predicted structures. Design for two structure targets is considerably more difficult, but far from impossible, demonstrating the feasibility of automated design of artificial riboswitches. The Python implementation is available at http://www.stats.ox.ac.uk/research/genome/software/frnakenstein.",2012-10-09 +22460772,Epidemiology of sepsis in pediatric intensive care units: first Colombian multicenter study.,"

Objectives

In 2002, the Surviving Sepsis Campaign pointed out the need to recognize sepsis as an important cause of death and high economic and social costs. There are few epidemiologic studies of this disease in pediatrics and none in Colombia. The objective of this study was to describe the sociodemographic and clinical characteristics of patients with sepsis who were admitted at participating pediatric intensive care units.

Design

Prospective study.

Setting and patients

A Web site, http://www.sepsisencolombia.com, was created, in which 19 pediatric intensive care units from the ten principal cities in the country reported epidemiologic data about patients with sepsis between March 1, 2009, and February 28, 2010.

Interventions

None.

Measurements and main results

There were 1,051 patients. Of these, 55% were male. Fifty-six percent came from urban areas. Fifty-six percent were <2 yrs of age. Seventy-six percent belonged to a low socioeconomic strata and 44% received government-subsidized health insurance. Forty-eight percent of patients had septic shock, 25% severe sepsis, and 27% sepsis. Forty-three percent were diagnosed with multiple organ dysfunction syndrome. In 54%, the infection was of respiratory origin followed by the abdomen as the site of origin in 18% of the patients. In almost 50%, the etiological agent was detected with Gram-negative bacteria being the most frequent and of highest mortality. Fifty percent had some type of relevant pathologic antecedent. Eleven percent had an invasive device on admission. Sixty-eight percent of the patients required mechanical ventilation. Mortality rate was 18%. The most important risk factors for mortality were age under 2 yrs, presence of shock or multiple organ dysfunction syndrome, and presence of Gram-negative bacteria.

Conclusions

Sepsis is common in Colombian pediatric intensive care units. Clear risk factors for getting sick and dying from this disease were identified. Mortality resulting from this disease is considerable for a developing society like ours.",2012-09-01 +21698456,Accurate prediction of protein structural class using auto covariance transformation of PSI-BLAST profiles.,"Computational prediction of protein structural class based solely on sequence data remains a challenging problem in protein science. Existing methods differ in the protein sequence representation models and prediction engines adopted. In this study, a powerful feature extraction method, which combines position-specific score matrix (PSSM) with auto covariance (AC) transformation, is introduced. Thus, a sample protein is represented by a series of discrete components, which could partially incorporate the long-range sequence order information and evolutionary information reflected from the PSI-BLAST profile. To verify the performance of our method, jackknife cross-validation tests are performed on four widely used benchmark datasets. Comparison of our results with existing methods shows that our method provides the state-of-the-art performance for structural class prediction. A Web server that implements the proposed method is freely available at http://202.194.133.5/xinxi/AAC_PSSM_AC/index.htm.",2011-06-23 +22925442,Tamoxifen for the management of breast events induced by non-steroidal antiandrogens in patients with prostate cancer: a systematic review.,"

Background

Tamoxifen has emerged as a potential management option for gynecomastia and breast pain due to non-steroidal antiandrogens, and it is considered an alternative to surgery or radiotherapy. The objective of this systematic review was to assess the benefits and harms of tamoxifen, in comparison to other treatment options, for either the prophylaxis or treatment of breast events induced by non-steroidal antiandrogens in prostate cancer patients.

Methods

We searched CENTRAL, MEDLINE, EMBASE, reference lists, the abstracts of three major conferences and three trial registers to identify ongoing randomized controlled trials (RCTs). Two authors independently screened the articles identified, assessed the trial quality and extracted data. The protocol was prospectively registered (CRD42011001320; http://www.crd.york.ac.uk/PROSPERO).

Results

Four studies were identified. Tamoxifen significantly reduced the risk of suffering from gynecomastia (risk ratio 9RR0 0.10, 95% CI 0.05 to 0.22) or breast pain (RR 0.06, 95% CI 0.02 to 0.17) at six months compared to untreated controls. Tamoxifen also showed a significant benefit for the prevention of gynecomastia (RR 0.22, 95% CI 0.08 to 0.58) and breast pain (RR 0.25, 95% CI 0.10 to 0.64) when compared to anastrozole after a median of 12 months. One study showed a significant benefit of tamoxifen for the prevention of gynecomastia (RR 0.24, 95% CI 0.09 to 0.65) and breast pain (RR 0.20, 95% CI 0.06 to 0.65) when compared with radiotherapy at six months. Radiotherapy increased the risk of suffering from nipple erythema and skin irritation, but there were no significant differences for any other adverse events (all P>0.05).

Conclusions

The currently available evidence suggests good efficacy of tamoxifen for the prevention and treatment of breast events induced by non-steroidal antiandrogens. The impact of tamoxifen therapy on long-term adverse events, disease progression and survival remains unclear. Further large, well-designed RCTs, including long-term follow-ups, are warranted. Also, the optimal dose needs to be clarified.",2012-08-28 +21697123,In-depth annotation of SNPs arising from resequencing projects using NGS-SNP.,"

Summary

NGS-SNP is a collection of command-line scripts for providing rich annotations for SNPs identified by the sequencing of whole genomes from any organism with reference sequences in Ensembl. Included among the annotations, several of which are not available from any existing SNP annotation tools, are the results of detailed comparisons with orthologous sequences. These comparisons can, for example, identify SNPs that affect conserved residues, or alter residues or genes linked to phenotypes in another species.

Availability

NGS-SNP is available both as a set of scripts and as a virtual machine. The virtual machine consists of a Linux operating system with all the NGS-SNP dependencies pre-installed. The source code and virtual machine are freely available for download at http://stothard.afns.ualberta.ca/downloads/NGS-SNP/.

Contact

stothard@ualberta.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-22 +21564068,Efficacy of psoralen plus ultraviolet A therapy vs. biologics in moderate to severe chronic plaque psoriasis: retrospective data analysis of a patient registry.,"

Background

Few studies have directly compared the clinical efficacy of psoralen plus ultraviolet A (PUVA) vs. biologics in the treatment of psoriasis.

Objectives

To compare the clinical efficacy of PUVA and biologic therapies for psoriasis under daily life conditions.

Methods

Data from a psoriasis registry (http://www.psoriasis-therapieregister.at) of 172 adult patients with moderate to severe chronic plaque psoriasis treated between 2003 and 2010 were analysed retrospectively. These patients had received oral PUVA [118 treatment courses including 5-methoxypsoralen (5-MOP; n = 32) and 8-methoxypsoralen (8-MOP; n = 86)] and/or biologic agents [130 treatment courses including adalimumab (n = 18), alefacept (n = 32), efalizumab (n = 17), etanercept (n = 38), infliximab (n = 7) and ustekinumab (n = 18)]. Treatment responses were analysed in terms of Psoriasis Area and Severity Index (PASI) improvement, including complete remission (CR) and reduction of PASI by at least 90% (PASI 90) or 75% (PASI 75), at treatment completion for PUVA (median time 10·3 and 9·2 weeks, for 8-MOP and 5-MOP, respectively) and at week 12 for biologics.

Results

Intention-to-treat-as observed CR, PASI 90 and PASI 75 rate was 22%, 69% and 86% for PUVA compared with 6%, 22% and 56% for adalimumab (P = 0·0034 by adapted Wilcoxon test), 3%, 3% and 25% for alefacept (P = 0·000000002), 6%, 6% and 59% for efalizumab (P = 0·000053), 6%, 29% and 39% for etanercept (P = 0·0000086), 29%, 71% and 100% for infliximab (P = 0·36) and 6%, 39% and 67% for ustekinumab (P = 0·028). When applying a more conservative post-hoc modified worst-case scenario analysis, with CR of 15%, PASI 90 of 58% and PASI 75 of 69%, PUVA was superior only to alefacept (P = 0·000013), efalizumab (P = 0·015) and etanercept (P = 0·0037). There were no statistically significant differences in PASI reduction rates between PUVA and infliximab.

Conclusions

Retrospective analysis of registry data revealed that the primary efficacy of PUVA was superior to that of certain biologics. Prospective head-to-head studies of PUVA and biologics are warranted to confirm these observations.",2011-07-11 +23505398,Multi-Scale Continuum Modeling of Biological Processes: From Molecular Electro-Diffusion to Sub-Cellular Signaling Transduction. ,"This article provides a brief review of multi-scale modeling at the molecular to cellular scale, with new results for heart muscle cells. A finite element-based simulation package (SMOL) was used to investigate the signaling transduction at molecular and sub-cellular scales (http://mccammon.ucsd.edu/smol/, http://FETK.org) by numerical solution of time-dependent Smoluchowski equations and a reaction-diffusion system. At the molecular scale, SMOL has yielded experimentally-validated estimates of the diffusion-limited association rates for the binding of acetylcholine to mouse acetylcholinesterase using crystallographic structural data. The predicted rate constants exhibit increasingly delayed steady-state times with increasing ionic strength and demonstrate the role of an enzyme's electrostatic potential in influencing ligand binding. At the sub-cellular scale, an extension of SMOL solves a non-linear, reaction-diffusion system describing Ca2+ ligand buffering and diffusion in experimentally-derived rodent ventricular myocyte geometries. Results reveal the important role for mobile and stationary Ca2+ buffers, including Ca2+ indicator dye. We found that the alterations in Ca2+-binding and dissociation rates of troponin C (TnC) and total TnC concentration modulate subcellular Ca2+ signals. Model predicts that reduced off-rate in whole troponin complex (TnC, TnI, TnT) versus reconstructed thin filaments (Tn, Tm, actin) alters cytosolic Ca2+ dynamics under control conditions or in disease-linked TnC mutations. The ultimate goal of these studies is to develop scalable methods and theories for integration of molecular-scale information into simulations of cellular-scale systems.",2012-03-01 +22435961,Healthcare continuity from hospital to territory in Lombardy: TELEMACO project.,"

Objectives

To verify implementation and use of TELEMACO (TELEMedicina Ai piccoli COmunilombardi; http://www.telemaco.regione.lombardia.it/), which provides specialized continuity of care with innovative healthcare services in remote areas of the Lombardy region of Italy; to design a network in the territory for sharing of continuityof- care programs; and to allow the relevant health authorities to collect cost data to establish a model for sustainable pricing for implementing these services.

Methods

TELEMACO provides home-based telemanagement services for patients with chronic heart failure and chronic obstructive pulmonary disease (COPD), as well as second-opinion teleconsultations in cardiology, dermatology, diabetology, and pulmonology for general practitioners and second-opinion teleconsultations on digital images in cases of traumatic brain injury and stroke. A total of 2 service centers, 10 cardiology and pneumology departments, 30 specialists, 176 general practitioners, 40 nurses, 2 emergency departments, and 2 consultant hospitals were involved.

Results

A total of 166 patients with chronic heart failure and 474 patients with COPD were enrolled. There were 4830, 51, and 44 second-opinion teleconsultations for cardiologic, dermatologic, and diabetic conditions, respectively. There were 147 second-opinion teleconsultations on digital images, 68 for stroke, and 79 for traumatic brain injury. Implementation of TELEMACO introduced innovations in working methods and provided evidence to the health authorities for allocating funds for such services.

Conclusions

TELEMACO provided evidence that there is a growing need for home management of patients using telemedicine, a common and efficacious approach that can ensure care continuity, especially in chronic diseases.",2012-03-01 +22895984,Image-guided versus blind glucocorticoid injection for shoulder pain.,"

Background

Traditionally, glucocorticoid injection for the treatment of shoulder pain has been performed guided by anatomical landmarks alone. With the advent of readily available imaging tools such as ultrasound, image-guided injections have increasingly become accepted into routine care. While there is some evidence that the use of imaging improves accuracy, it is unclear from current evidence whether or not it improves patient-relevant outcomes.

Objectives

The aim of this review was to assess whether image-guided glucocorticoid injections improve patient-relevant outcomes compared to landmark-guided or systemic intramuscular injections in patients with shoulder pain.

Search methods

We searched the Cochrane Central Register of Controlled Trials (CENTRAL, via The Cochrane Library), MEDLINE (Ovid), and EMBASE (Ovid) to June 2011. We also searched the World Health Organisation International Clinical Trials Registry Platform (http://www.who.int/trialsearch/Default.aspx) to identify ongoing trials and screened reference lists of retrieved review articles and trials to identify potentially relevant studies.

Selection criteria

We included randomised controlled trials (RCTs) and quasi-randomised controlled clinical trials that compared image-guided glucocorticoid injection to landmark-guided or systemic intramuscular injection. Outcomes of interest included pain, function, range of motion, proportion of participants with overall improvement and adverse events. There were no restrictions on language or date of publication.

Data collection and analysis

Two review authors independently selected the studies for inclusion, extracted the data and performed a risk of bias assessment. Disagreement about inclusion or exclusion of individual studies and risk of bias was resolved by a third review author.

Main results

Five studies (290 participants) were included in the review. The image-guided groups in all trials used ultrasound to guide injection. Four studies included participants with rotator cuff disease; in three the comparator was local landmarks to direct injection into the subacromial bursa and in the fourth the comparator was systemic intramuscular injection into the upper gluteal muscles in the buttock region. One study included participants with adhesive capsulitis and injection was directed into the glenohumeral joint by either ultrasound or anatomical landmark guidance.No significant differences between groups were observed with respect to reduction in pain at one to two weeks (two trials, 146 participants, standardized mean difference (SMD) -1.44, 95% CI -4.14 to 1.26), or function at one to two weeks (two trials, 146 participants, SMD 0.95, 95% confidence interval (CI) -1.29 to 3.20; back-translated to mean difference (MD) 4 points, 95% CI -5 to 13, on a 0 to 100 point scale, higher score means better function) or six weeks (three trials, 207 participants, SMD 0.63, 95% CI -0.06 to 1.33; back-translated to MD -3 points, 95% CI -11 to 5, on a 0 to 100 point scale) and the sensitivity analyses did not alter these results. While there was a significant difference between groups with respect to reduction in pain at six weeks favouring image guidance (three trials, 207 participants, SMD -0.80, 95% CI -1.46 to -0.14), there was considerable statistical heterogeneity and after removing trials with inadequate allocation concealment and inadequate blinding in a sensitivity analysis, the difference was no longer significant (one trial, 106 participants, MD -0.60 points, 95% CI -1.44 to 0.24 points on a 9-point scale).No statistical difference in adverse events between groups was identified (10/104 image-guided group versus 16/103 comparator; risk ratio (RR) 0.55, 95% CI 0.17 to 1.85). Minor adverse events reported included transient post-injection pain, facial redness and warmth.

Authors' conclusions

Based upon moderate evidence from five trials, our review was unable to establish any advantage in terms of pain, function, shoulder range of motion or safety, of ultrasound-guided glucocorticoid injection for shoulder disorders over either landmark-guided or intramuscular injection. The lack of any added benefit of ultrasound guided subacromial bursal injection over glucocorticoid injection administered into the upper gluteal muscles of the buttock suggests that the benefits of glucocorticoid may arise through systemic rather than local effects. Therefore, although ultrasound guidance may improve the accuracy of injection to the putative site of pathology in the shoulder, it is not clear that this improves its efficacy to justify the significant added cost.",2012-08-15 +22659506,"Derxia lacustris sp. nov., a nitrogen-fixing bacterium isolated from a freshwater lake.","A novel nitrogen-fixing strain, designated HL-12(T), was isolated from a freshwater lake in Taiwan. Cells of strain HL-12(T) were aerobic, Gram-negative, motile rods that were surrounded by a thick capsule, contained poly-β-hydroxybutyrate granules, and formed light-yellow to brownish-red colonies. Growth occurred at 15-40 °C (optimum 25-35 °C), at pH 6.0-7.0 (optimum pH 6.0) and with 0-4 % NaCl (optimum 0-1 %). Phylogenetic analysis based on 16S rRNA gene sequences showed that strain HL-12(T) belonged to the genus Derxia and exhibited 99.1 and 98.8 % 16S rRNA gene sequence similarity, respectively, with Derxia gummosa IAM 14990 and D. gummosa IAM 13946(T). The major fatty acids (>10 %) of strain HL-12(T) were summed feature 3 (comprising C16 : 1ω7c and/or C16 : 1ω6c), C16 : 0 and C18 : 1ω7c. The cellular hydroxy fatty acids were C12 : 0 3-OH, C14 : 0 2-OH and C14 : 0 3-OH. The isoprenoid quinone was Q-8 and the DNA G+C content was 72.0 mol%. The polar lipid profile contained phosphatidylethanolamine, phosphatidylglycerol, diphosphatidylglycerol and several unknown aminophospholipids and phospholipids. DNA-DNA relatedness between strain HL-12(T) and http://dx.doi.org/10.1601/nm.1758 LMG 3975 and http://dx.doi.org/10.1601/nm.1758 LMG 3977(T) was <70 %. On the basis of the genotypic and phenotypic data, strain HL-12(T) represents a novel species in the genus Derxia, for which the name Derxia lacustris sp. nov. is proposed. The type strain is HL-12(T) ( = BCRC 80208(T)  = KCTC 23311(T)).",2012-06-01 +22144906,Hierarchical generalized linear models for multiple groups of rare and common variants: jointly estimating group and individual-variant effects.,"Complex diseases and traits are likely influenced by many common and rare genetic variants and environmental factors. Detecting disease susceptibility variants is a challenging task, especially when their frequencies are low and/or their effects are small or moderate. We propose here a comprehensive hierarchical generalized linear model framework for simultaneously analyzing multiple groups of rare and common variants and relevant covariates. The proposed hierarchical generalized linear models introduce a group effect and a genetic score (i.e., a linear combination of main-effect predictors for genetic variants) for each group of variants, and jointly they estimate the group effects and the weights of the genetic scores. This framework includes various previous methods as special cases, and it can effectively deal with both risk and protective variants in a group and can simultaneously estimate the cumulative contribution of multiple variants and their relative importance. Our computational strategy is based on extending the standard procedure for fitting generalized linear models in the statistical software R to the proposed hierarchical models, leading to the development of stable and flexible tools. The methods are illustrated with sequence data in gene ANGPTL4 from the Dallas Heart Study. The performance of the proposed procedures is further assessed via simulation studies. The methods are implemented in a freely available R package BhGLM (http://www.ssg.uab.edu/bhglm/).",2011-12-01 +21385032,Hierarchical generative biclustering for microRNA expression analysis.,"Clustering methods are a useful and common first step in gene expression studies, but the results may be hard to interpret. We bring in explicitly an indicator of which genes tie each cluster, changing the setup to biclustering. Furthermore, we make the indicators hierarchical, resulting in a hierarchy of progressively more specific biclusters. A non-parametric Bayesian formulation makes the model rigorous yet flexible and computations feasible. The model can additionally be used in information retrieval for relating relevant samples. We show that the model outperforms four other biclustering procedures on a large miRNA data set. We also demonstrate the model's added interpretability and information retrieval capability in a case study. Software is publicly available at http://research.ics.tkk.fi/mi/software/treebic/.",2011-03-01 +23190601,IMP3 can predict aggressive behaviour of lung adenocarcinoma.,"

Background

Lung cancer most often presents as an inoperable tumour and the diagnosis is usually performed on a small biopsy/cytology specimen. In the group of non small cell lung cancer - not otherwise specified, adenocarcinoma phenotype can be determined immunohistochemically using TTF-1 and Napsin A. Expression of oncofetal protein IMP3 in human cancer is associated with poor differentiation and aggressive behaviour. In the present study expression of IMP3 was correlated with expression of TTF-1 and Napsin A, histological subtype and clinical stage of lung adenocarcinoma. We were interested whether distant metastases are associated with IMP3 overexpression, regardless of the histologic subtype of adenocarcinoma.

Methods

In retrospective study, consecutive series of 105 patients with advanced lung adenocarcinoma diagnosed from 2006 to 2009 in Clinical Hospital Center Split, Croatia, were analysed. Clinical data were collected from the Pulmology Department and time of death from the Mortality Registry. Paraffin blocks of bronchoscopic biopsies were collected from the Institute of Pathology and 15 cases excluded from the analysis due to insufficient material. Expression of IMP3, Napsin A and TTF-1 were analysed by indirect enzyme immunohistochemistry. Statistical analysis was performed and P values less than 0.05 considered significant.

Results

Of 90 patients, 71 (78%) were males and 19 (22%) females. Median age for males was 61.5 years (min-max 43-83) and for females 61 years (min-max 44-86). Pleural effusion was found in 15 (16.6%) and distant metastases in 45 (50%) cases. According to histological subtypes, there were 34 acinar, 2 lepidic, 2 papillary and 52 solid subtypes. IMP3 overexpression was found in 63 cases (70%) and was correlated with solid subtype (P = 0.002) and negative/weak Napsin A expression (P = 0.004). Strong Napsin A expression correlated with TTF-1 expression (P = 0.003) and lower histological grades (P = 0.031). Patients with IMP3 overexpression more often had distant metastases than patients with negative IMP3, 55.5% versus 33.3% (P = 0.033). Non solid subtypes with IMP3 overexpression developed distant metastasis more common than non solid subtypes with negative IMP3, 72% versus 35% (P = 0.028).

Conclusions

Expression of IMP3 correlates with solid subtype and with distant metastases regardless of histological subtype of lung adenocarcinoma.

Virtual slides

http://www.diagnosticpathology.diagnomx.eu/vs/1966211581795258",2012-11-28 +22637376,Detailed protocol for the lifestyle intervention in the BeWEL randomised controlled trial of weight loss in adults who have had a colorectal adenoma. ,"The BeWEL study is aimed at assessing the impact of a personalised lifestyle programme on body weight in people at risk of developing colorectal adenomas. The study is a two-arm multicentre randomised controlled trial comparing the BeWEL lifestyle programme against usual care. Over 12 months, 316 people who have had a colorectal adenoma removed through the national screening programme will be randomised to provide 80% power to detect a weight loss (primary outcome) of 7% over 12 months. The 12-month intervention will be delivered by lifestyle counsellors via three face-to-face visits followed by nine monthly telephone support calls. Consultant endorsement for the study will be stressed. An individualised caloric prescription based on estimates for weight maintenance -600 kcal will be calculated. Motivational interviewing techniques will be used to identify personal motivations for weight change and ways to improve perceived self-efficacy. The programme will utilise personalised diet and physical activity data from baseline measures to set behavioural goals. A range of behavioural strategies will be employed to support lifestyle change including goal setting, identifying specific implementation intentions, self-monitoring and feedback. Emphasis will be placed on self-monitoring body weight, and weighing scales will be provided. Programme acceptability will be explored postintervention with indepth interviews. Compliance and impact will be assessed by baseline and follow-up measures of diet by self-report, activity by accelerometry and anthropometry. Ethical approval has been obtained from the Tayside Committee on Medical Research Ethics. Dissemination of results will focus on publications in peer-reviewed journals, presentations at national/international cancer meetings and NHS groups. In addition, the work will be communicated to the public through forums such at The Scottish Cancer Prevention Network (http://www.cancerpreventionscotland.co.uk/). The trial is registered with Current Controlled Trials (International Standard Randomised Controlled Trials No: ISRCTN53033856).",2012-05-25 +21653523,"The Biological Connection Markup Language: a SBGN-compliant format for visualization, filtering and analysis of biological pathways.","

Motivation

Many models and analysis of signaling pathways have been proposed. However, neither of them takes into account that a biological pathway is not a fixed system, but instead it depends on the organism, tissue and cell type as well as on physiological, pathological and experimental conditions.

Results

The Biological Connection Markup Language (BCML) is a format to describe, annotate and visualize pathways. BCML is able to store multiple information, permitting a selective view of the pathway as it exists and/or behave in specific organisms, tissues and cells. Furthermore, BCML can be automatically converted into data formats suitable for analysis and into a fully SBGN-compliant graphical representation, making it an important tool that can be used by both computational biologists and 'wet lab' scientists.

Availability and implementation

The XML schema and the BCML software suite are freely available under the LGPL for download at http://bcml.dc-atlas.net. They are implemented in Java and supported on MS Windows, Linux and OS X.",2011-06-07 +22333244,ShapePheno: unsupervised extraction of shape phenotypes from biological image collections.,"

Motivation

Accurate large-scale phenotyping has recently gained considerable importance in biology. For example, in genome-wide association studies technological advances have rendered genotyping cheap, leaving phenotype acquisition as the major bottleneck. Automatic image analysis is one major strategy to phenotype individuals in large numbers. Current approaches for visual phenotyping focus predominantly on summarizing statistics and geometric measures, such as height and width of an individual, or color histograms and patterns. However, more subtle, but biologically informative phenotypes, such as the local deformation of the shape of an individual with respect to the population mean cannot be automatically extracted and quantified by current techniques.

Results

We propose a probabilistic machine learning model that allows for the extraction of deformation phenotypes from biological images, making them available as quantitative traits for downstream analysis. Our approach jointly models a collection of images using a learned common template that is mapped onto each image through a deformable smooth transformation. In a case study, we analyze the shape deformations of 388 guppy fish (Poecilia reticulata). We find that the flexible shape phenotypes our model extracts are complementary to basic geometric measures. Moreover, these quantitative traits assort the observations into distinct groups and can be mapped to polymorphic genetic loci of the sample set.

Availability

Code is available under: http://bioweb.me/GEBI CONTACT: theofanis.karaletsos@tuebingen.mpg.de; oliver.stegle@tuebingen.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-13 +21624157,How orthogonal are the OBO Foundry ontologies?,"

Background

Ontologies in biomedicine facilitate information integration, data exchange, search and query of biomedical data, and other critical knowledge-intensive tasks. The OBO Foundry is a collaborative effort to establish a set of principles for ontology development with the eventual goal of creating a set of interoperable reference ontologies in the domain of biomedicine. One of the key requirements to achieve this goal is to ensure that ontology developers reuse term definitions that others have already created rather than create their own definitions, thereby making the ontologies orthogonal.

Methods

We used a simple lexical algorithm to analyze the extent to which the set of OBO Foundry candidate ontologies identified from September 2009 to September 2010 conforms to this vision. Specifically, we analyzed (1) the level of explicit term reuse in this set of ontologies, (2) the level of overlap, where two ontologies define similar terms independently, and (3) how the levels of reuse and overlap changed during the course of this year.

Results

We found that 30% of the ontologies reuse terms from other Foundry candidates and 96% of the candidate ontologies contain terms that overlap with terms from the other ontologies. We found that while term reuse increased among the ontologies between September 2009 and September 2010, the level of overlap among the ontologies remained relatively constant. Additionally, we analyzed the six ontologies announced as OBO Foundry members on March 5, 2010, and identified that the level of overlap was extremely low, but, notably, so was the level of term reuse.

Conclusions

We have created a prototype web application that allows OBO Foundry ontology developers to see which classes from their ontologies overlap with classes from other ontologies in the OBO Foundry (http://obomap.bioontology.org). From our analysis, we conclude that while the OBO Foundry has made significant progress toward orthogonality during the period of this study through increased adoption of explicit term reuse, a large amount of overlap remains among these ontologies. Furthermore, the characteristics of the identified overlap, such as the terms it comprises and its distribution among the ontologies, indicate that the achieving orthogonality will be exceptionally difficult, if not impossible.",2011-05-17 +22807942,Development of novel breast cancer recurrence prediction model using support vector machine.,"

Purpose

The prediction of breast cancer recurrence is a crucial factor for successful treatment and follow-up planning. The principal objective of this study was to construct a novel prognostic model based on support vector machine (SVM) for the prediction of breast cancer recurrence within 5 years after breast cancer surgery in the Korean population, and to compare the predictive performance of the model with the previously established models.

Methods

Data on 679 patients, who underwent breast cancer surgery between 1994 and 2002, were collected retrospectively from a Korean tertiary teaching hospital. The following variables were selected as independent variables for the prognostic model, by using the established medical knowledge and univariate analysis: histological grade, tumor size, number of metastatic lymph node, estrogen receptor, lymphovascular invasion, local invasion of tumor, and number of tumors. Three prediction algorithms, with each using SVM, artificial neural network and Cox-proportional hazard regression model, were constructed and compared with one another. The resultant and most effective model based on SVM was compared with previously established prognostic models, which included Adjuvant! Online, Nottingham prognostic index (NPI), and St. Gallen guidelines.

Results

The SVM-based prediction model, named 'breast cancer recurrence prediction based on SVM (BCRSVM),' proposed herein outperformed other prognostic models (area under the curve=0.85, 0.71, 0.70, respectively for the BCRSVM, Adjuvant! Online, and NPI). The BCRSVM evidenced substantially high sensitivity (0.89), specificity (0.73), positive predictive values (0.75), and negative predictive values (0.89).

Conclusion

As the selected prognostic factors can be easily obtained in clinical practice, the proposed model might prove useful in the prediction of breast cancer recurrence. The prediction model is freely available in the website (http://ami.ajou.ac.kr/bcr/).",2012-06-28 +21886100,Predicting protein-protein interactions on a proteome scale by matching evolutionary and structural similarities at interfaces using PRISM.,"Prediction of protein-protein interactions at the structural level on the proteome scale is important because it allows prediction of protein function, helps drug discovery and takes steps toward genome-wide structural systems biology. We provide a protocol (termed PRISM, protein interactions by structural matching) for large-scale prediction of protein-protein interactions and assembly of protein complex structures. The method consists of two components: rigid-body structural comparisons of target proteins to known template protein-protein interfaces and flexible refinement using a docking energy function. The PRISM rationale follows our observation that globally different protein structures can interact via similar architectural motifs. PRISM predicts binding residues by using structural similarity and evolutionary conservation of putative binding residue 'hot spots'. Ultimately, PRISM could help to construct cellular pathways and functional, proteome-scale annotation. PRISM is implemented in Python and runs in a UNIX environment. The program accepts Protein Data Bank-formatted protein structures and is available at http://prism.ccbb.ku.edu.tr/prism_protocol/.",2011-08-11 +22584875,Are you Baby-Friendly? Knowledge deficit among US maternity staff.,"

Background

The Baby-Friendly Hospital Initiative began in 1991. In 2010, approximately 3% of United States (US) hospitals were Baby-Friendly certified. When collecting data for related studies, we noted that many maternity staff erroneously claimed their hospital was Baby-Friendly.™

Objective

To determine whether maternity staff in US hospitals could accurately describe their institution's status with regard to Baby-Friendly certification.

Methods

In 2010-2011, we called all maternity hospitals in the US and asked to be connected to the maternity service. We then asked the person answering the maternity service phone: ""Is your hospital a Baby-Friendly hospital?"" and recorded the position of the respondent.

Results

We called 2974 hospitals, and received answers on Baby-Friendly status from 2851. According to the Baby-Friendly USA Website (http://www.babyfriendlyusa.org), 3% (75/2851) of these hospitals were Baby-Friendly. However, staff at 62% (1780/2851) stated their hospital was Baby-Friendly. Staff at 15% (424/2851) did not know what the caller meant by ""Baby-Friendly hospital."" Accuracy of knowledge varied dependent on the respondent's job title (P < .001). International Board Certified Lactation Consultants were most likely to be accurate, with 89% answering correctly. There was a strong positive correlation between the proportion of Baby-Friendly hospitals and the proportion of correct responses by state (r = 0.62, P < .001).

Conclusion

Although the Baby-Friendly Hospital Initiative was established over 20 years ago, most US maternity staff responding to a telephone survey either incorrectly believed their hospital to be Baby-Friendly certified or were unaware of the meaning of ""Baby-Friendly hospital.""",2012-05-14 +22479614,GPS-ARM: computational analysis of the APC/C recognition motif by predicting D-boxes and KEN-boxes.,"Anaphase-promoting complex/cyclosome (APC/C), an E3 ubiquitin ligase incorporated with Cdh1 and/or Cdc20 recognizes and interacts with specific substrates, and faithfully orchestrates the proper cell cycle events by targeting proteins for proteasomal degradation. Experimental identification of APC/C substrates is largely dependent on the discovery of APC/C recognition motifs, e.g., the D-box and KEN-box. Although a number of either stringent or loosely defined motifs proposed, these motif patterns are only of limited use due to their insufficient powers of prediction. We report the development of a novel GPS-ARM software package which is useful for the prediction of D-boxes and KEN-boxes in proteins. Using experimentally identified D-boxes and KEN-boxes as the training data sets, a previously developed GPS (Group-based Prediction System) algorithm was adopted. By extensive evaluation and comparison, the GPS-ARM performance was found to be much better than the one using simple motifs. With this powerful tool, we predicted 4,841 potential D-boxes in 3,832 proteins and 1,632 potential KEN-boxes in 1,403 proteins from H. sapiens, while further statistical analysis suggested that both the D-box and KEN-box proteins are involved in a broad spectrum of biological processes beyond the cell cycle. In addition, with the co-localization information, we predicted hundreds of mitosis-specific APC/C substrates with high confidence. As the first computational tool for the prediction of APC/C-mediated degradation, GPS-ARM is a useful tool for information to be used in further experimental investigations. The GPS-ARM is freely accessible for academic researchers at: http://arm.biocuckoo.org.",2012-03-29 +21615972,MixtureTree: a program for constructing phylogeny.,"

Background

MixtureTree v1.0 is a Linux based program (written in C++) which implements an algorithm based on mixture models for reconstructing phylogeny from binary sequence data, such as single-nucleotide polymorphisms (SNPs). In addition to the mixture algorithm with three different optimization options, the program also implements a bootstrap procedure with majority-rule consensus.

Results

The MixtureTree program written in C++ is a Linux based package. The User's Guide and source codes will be available at http://math.asu.edu/~scchen/MixtureTree.html

Conclusions

The efficiency of the mixture algorithm is relatively higher than some classical methods, such as Neighbor-Joining method, Maximum Parsimony method and Maximum Likelihood method. The shortcoming of the mixture tree algorithms, for example timing consuming, can be improved by implementing other revised Expectation-Maximization(EM) algorithms instead of the traditional EM algorithm.",2011-04-21 +21505035,"FR-HIT, a very fast program to recruit metagenomic reads to homologous reference genomes.","

Summary

Fragment recruitment, a process of aligning sequencing reads to reference genomes, is a crucial step in metagenomic data analysis. The available sequence alignment programs are either slow or insufficient for recruiting metagenomic reads. We implemented an efficient algorithm, FR-HIT, for fragment recruitment. We applied FR-HIT and several other tools including BLASTN, MegaBLAST, BLAT, LAST, SSAHA2, SOAP2, BWA and BWA-SW to recruit four metagenomic datasets from different type of sequencers. On average, FR-HIT and BLASTN recruited significantly more reads than other programs, while FR-HIT is about two orders of magnitude faster than BLASTN. FR-HIT is slower than the fastest SOAP2, BWA and BWA-SW, but it recruited 1-5 times more reads.

Availability

http://weizhongli-lab.org/frhit.",2011-04-19 +21615913,A novel and well-defined benchmarking method for second generation read mapping.,"

Background

Second generation sequencing technologies yield DNA sequence data at ultra high-throughput. Common to most biological applications is a mapping of the reads to an almost identical or highly similar reference genome. The assessment of the quality of read mapping results is not straightforward and has not been formalized so far. Hence, it has not been easy to compare different read mapping approaches in a unified way and to determine which program is the best for what task.

Results

We present a new benchmark method, called Rabema (Read Alignment BEnchMArk), for read mappers. It consists of a strict definition of the read mapping problem and of tools to evaluate the result of arbitrary read mappers supporting the SAM output format.

Conclusions

We show the usefulness of the benchmark program by performing a comparison of popular read mappers. The tools supporting the benchmark are licensed under the GPL and available from http://www.seqan.de/projects/rabema.html.",2011-05-26 +21827286,ModuleSearch: finding functional modules in a protein-protein interaction network.,"Many biological processes are performed by a group of proteins rather than by individual proteins. Proteins involved in the same biological process often form a densely connected sub-graph in a protein-protein interaction network. Therefore, finding a dense sub-graph provides useful information to predict the function or protein complex of uncharacterised proteins in the sub-graph. We developed a heuristic algorithm that finds functional modules in a protein-protein interaction network and visualises the modules. The algorithm has been implemented in a platform-independent, standalone program called ModuleSearch. In an interaction network of yeast proteins, ModuleSearch found 366 overlapping modules. Of the modules, 71% have a function shared by more than half the proteins in the module and 58% have a function shared by all proteins in the module. Comparison of ModuleSearch with other programs shows that ModuleSearch finds more sub-graphs than most other programs, yet a higher proportion of the sub-graphs correspond to known functional modules. ModuleSearch and sample data are freely available to academics at http://bclab.inha.ac.kr/ModuleSearch.",2011-08-09 +21827640,GONe: software for estimating effective population size in species with generational overlap.,"GONe is a user-friendly, Windows-based program for estimating effective size (N(e) ) in populations with overlapping generations. It uses the Jorde-Ryman modification to the temporal method to account for age structure in populations. This method requires estimates of age-specific survival and birth rate and allele frequencies measured in two or more consecutive cohorts. Allele frequencies are acquired by reading in genotypic data from files formatted for either GENEPOP or TEMPOFS. For each interval between consecutive cohorts, N(e) is estimated at each locus and over all loci. Furthermore, N(e) estimates are output for three different genetic drift estimators (F(s) , F(c) and F(k) ). Confidence intervals are derived from a chi-square distribution with degrees of freedom equal to the number of independent alleles. GONe has been validated over a wide range of N(e) values, and for scenarios where survival and birth rates differ between sexes, sex ratios are unequal and reproductive variances differ. GONe is freely available for download at https://bcrc.bio.umass.edu/pedigreesoftware/.",2011-08-09 +21715388,inGAP-sv: a novel scheme to identify and visualize structural variation from paired end mapping data.,"Mining genetic variation from personal genomes is a crucial step towards investigating the relationship between genotype and phenotype. However, compared to the detection of SNPs and small indels, characterizing large and particularly complex structural variation is much more difficult and less intuitive. In this article, we present a new scheme (inGAP-sv) to detect and visualize structural variation from paired-end mapping data. Under this scheme, abnormally mapped read pairs are clustered based on the location of a gap signature. Several important features, including local depth of coverage, mapping quality and associated tandem repeat, are used to evaluate the quality of predicted structural variation. Compared with other approaches, it can detect many more large insertions and complex variants with lower false discovery rate. Moreover, inGAP-sv, written in Java programming language, provides a user-friendly interface and can be performed in multiple operating systems. It can be freely accessed at http://ingap.sourceforge.net/.",2011-07-01 +21037493,Discus: investigating subjective judgment of optic disc damage.,"

Purpose

To describe a software package (Discus) for investigating clinicians' subjective assessment of optic disc damage [diagnostic accuracy in detecting visual field (VF) damage, decision criteria, and agreement with a panel of experts] and to provide reference data from a group of expert observers.

Methods

Optic disc images were selected from patients with manifest or suspected glaucoma or ocular hypertension who attended the Manchester Royal Eye Hospital. Eighty images came from eyes without evidence of VF loss in at least four consecutive tests (VF negatives), and 20 images from eyes with repeatable VF loss (VF positives). Software was written to display these images in randomized order, for up to 60 s. Expert observers (n = 12) rated optic disc damage on a 5-point scale (definitely healthy, probably healthy, not sure, probably damaged, and definitely damaged).

Results

Optic disc damage as determined by the expert observers predicted VF loss with less than perfect accuracy (mean area under receiver-operating characteristic curve, 0.78; range, 0.72 to 0.85). When the responses were combined across the panel of experts, the area under receiver-operating characteristic curve reached 0.87, corresponding to a sensitivity of ∼60% at 90% specificity. Although the observers' performances were similar, there were large differences between the criteria they adopted (p < 0.001), even though all observers had been given identical instructions.

Conclusions

Discus provides a simple and rapid means for assessing important aspects of optic disc interpretation. The data from the panel of expert observers provide a reference against which students, trainees, and clinicians may compare themselves. The program and the analyses described in this article are freely accessible from http://www.discusproject.blogspot.com/.",2011-01-01 +22492479,Effect of alendronate for reducing fracture by FRAX score and femoral neck bone mineral density: the Fracture Intervention Trial.,"The WHO Fracture Risk Assessment Tool (FRAX; http://www.shef.ac.uk/FRAX) estimates the 10-year probability of major osteoporotic fracture. Clodronate and bazedoxifene reduced nonvertebral and clinical fracture more effectively on a relative scale in women with higher FRAX scores. We used data from the Fracture Intervention Trial (FIT) to evaluate the interaction between FRAX score and treatment with alendronate. We combined the Clinical Fracture (CF) arm and Vertebral Fracture (VF) arm of FIT. The CF and VF arm of FIT randomized 4432 and 2027 women, respectively, to placebo or alendronate for 4 and 3 years, respectively. FRAX risk factors were assessed at baseline. FRAX scores were calculated by WHO. We used Poisson regression models to assess the interaction between alendronate and FRAX score on the risk of nonvertebral, clinical, major osteoporotic, and radiographic vertebral fractures. Overall, alendronate significantly reduced the risk of nonvertebral fracture (incidence rate ratio [IRR] 0.86; 95% confidence interval [CI], 0.75-0.99), but the effect was greater for femoral neck (FN) bone mineral density (BMD) T-score ≤ -2.5 (IRR 0.76; 95% CI, 0.62-0.93) than for FN T-score > -2.5 (IRR 0.96; 95% CI, 0.80-1.16) (p = 0.02, interaction between alendronate and FN BMD). However, there was no evidence of an interaction between alendronate and FRAX score with FN BMD for risk of nonvertebral fracture (interaction p = 0.61). The absolute benefit of alendronate was greatest among women with highest FRAX scores. Results were similar for clinical fractures, major osteoporotic fractures, and radiographic vertebral fractures and whether or not FRAX scores included FN BMD. Among this cohort of women with low bone mass there was no significant interaction between FRAX score and alendronate for nonvertebral, clinical or major osteoporotic fractures, or radiographic vertebral fractures. These results suggest that the effect of alendronate on a relative scale does not vary by FRAX score. A randomized controlled trial testing the effect of antifracture agents among women with high FRAX score but without osteoporosis is warranted.",2012-08-01 +22859988,"An expanded multilocus sequence typing scheme for propionibacterium acnes: investigation of 'pathogenic', 'commensal' and antibiotic resistant strains.","The Gram-positive bacterium Propionibacterium acnes is a member of the normal human skin microbiota and is associated with various infections and clinical conditions. There is tentative evidence to suggest that certain lineages may be associated with disease and others with health. We recently described a multilocus sequence typing scheme (MLST) for P. acnes based on seven housekeeping genes (http://pubmlst.org/pacnes). We now describe an expanded eight gene version based on six housekeeping genes and two 'putative virulence' genes (eMLST) that provides improved high resolution typing (91eSTs from 285 isolates), and generates phylogenies congruent with those based on whole genome analysis. When compared with the nine gene MLST scheme developed at the University of Bath, UK, and utilised by researchers at Aarhus University, Denmark, the eMLST method offers greater resolution. Using the scheme, we examined 208 isolates from disparate clinical sources, and 77 isolates from healthy skin. Acne was predominately associated with type IA(1) clonal complexes CC1, CC3 and CC4; with eST1 and eST3 lineages being highly represented. In contrast, type IA(2) strains were recovered at a rate similar to type IB and II organisms. Ophthalmic infections were predominately associated with type IA(1) and IA(2) strains, while type IB and II were more frequently recovered from soft tissue and retrieved medical devices. Strains with rRNA mutations conferring resistance to antibiotics used in acne treatment were dominated by eST3, with some evidence for intercontinental spread. In contrast, despite its high association with acne, only a small number of resistant CC1 eSTs were identified. A number of eSTs were only recovered from healthy skin, particularly eSTs representing CC72 (type II) and CC77 (type III). Collectively our data lends support to the view that pathogenic versus truly commensal lineages of P. acnes may exist. This is likely to have important therapeutic and diagnostic implications.",2012-07-30 +22160766,ALF--a simulation framework for genome evolution.,"In computational evolutionary biology, verification and benchmarking is a challenging task because the evolutionary history of studied biological entities is usually not known. Computer programs for simulating sequence evolution in silico have shown to be viable test beds for the verification of newly developed methods and to compare different algorithms. However, current simulation packages tend to focus either on gene-level aspects of genome evolution such as character substitutions and insertions and deletions (indels) or on genome-level aspects such as genome rearrangement and speciation events. Here, we introduce Artificial Life Framework (ALF), which aims at simulating the entire range of evolutionary forces that act on genomes: nucleotide, codon, or amino acid substitution (under simple or mixture models), indels, GC-content amelioration, gene duplication, gene loss, gene fusion, gene fission, genome rearrangement, lateral gene transfer (LGT), or speciation. The other distinctive feature of ALF is its user-friendly yet powerful web interface. We illustrate the utility of ALF with two possible applications: 1) we reanalyze data from a study of selection after globin gene duplication and test the statistical significance of the original conclusions and 2) we demonstrate that LGT can dramatically decrease the accuracy of two well-established orthology inference methods. ALF is available as a stand-alone application or via a web interface at http://www.cbrg.ethz.ch/alf.",2011-12-08 +21464987,CPORT: a consensus interface predictor and its performance in prediction-driven docking with HADDOCK.,"

Background

Macromolecular complexes are the molecular machines of the cell. Knowledge at the atomic level is essential to understand and influence their function. However, their number is huge and a significant fraction is extremely difficult to study using classical structural methods such as NMR and X-ray crystallography. Therefore, the importance of large-scale computational approaches in structural biology is evident. This study combines two of these computational approaches, interface prediction and docking, to obtain atomic-level structures of protein-protein complexes, starting from their unbound components.

Methodology/principal findings

Here we combine six interface prediction web servers into a consensus method called CPORT (Consensus Prediction Of interface Residues in Transient complexes). We show that CPORT gives more stable and reliable predictions than each of the individual predictors on its own. A protocol was developed to integrate CPORT predictions into our data-driven docking program HADDOCK. For cases where experimental information is limited, this prediction-driven docking protocol presents an alternative to ab initio docking, the docking of complexes without the use of any information. Prediction-driven docking was performed on a large and diverse set of protein-protein complexes in a blind manner. Our results indicate that the performance of the HADDOCK-CPORT combination is competitive with ZDOCK-ZRANK, a state-of-the-art ab initio docking/scoring combination. Finally, the original interface predictions could be further improved by interface post-prediction (contact analysis of the docking solutions).

Conclusions/significance

The current study shows that blind, prediction-driven docking using CPORT and HADDOCK is competitive with ab initio docking methods. This is encouraging since prediction-driven docking represents the absolute bottom line for data-driven docking: any additional biological knowledge will greatly improve the results obtained by prediction-driven docking alone. Finally, the fact that original interface predictions could be further improved by interface post-prediction suggests that prediction-driven docking has not yet been pushed to the limit. A web server for CPORT is freely available at http://haddock.chem.uu.nl/services/CPORT.",2011-03-25 +21586519,3FunMap: full-sib family functional mapping of dynamic traits.,"

Motivation

Functional mapping that embeds the developmental mechanisms of complex traits shows great power to study the dynamic pattern of genetic effects triggered by individual quantitative trait loci (QTLs). A full-sib family, produced by crossing two heterozygous parents, is characteristic of uncertainties about cross-type at a locus and linkage phase between different loci. Integrating functional mapping into a full-sib family requires a model selection procedure capable of addressing these uncertainties. 3FunMap, written in VC++ 6.0, provides a flexible and extensible platform to perform full-sib functional mapping of dynamic traits. Functions in the package encompass linkage phase determination, marker map construction and the pattern identification of QTL segregation, dynamic tests of QTL effects, permutation tests and numerical simulation. We demonstrate the features of 3FunMap through real data analysis and computer simulation.

Availability

http://statgen.psu.edu/software.",2011-05-17 +22151882,STELLAR: fast and exact local alignments.,"

Background

Large-scale comparison of genomic sequences requires reliable tools for the search of local alignments. Practical local aligners are in general fast, but heuristic, and hence sometimes miss significant matches.

Results

We present here the local pairwise aligner STELLAR that has full sensitivity for ε-alignments, i.e. guarantees to report all local alignments of a given minimal length and maximal error rate. The aligner is composed of two steps, filtering and verification. We apply the SWIFT algorithm for lossless filtering, and have developed a new verification strategy that we prove to be exact. Our results on simulated and real genomic data confirm and quantify the conjecture that heuristic tools like BLAST or BLAT miss a large percentage of significant local alignments.

Conclusions

STELLAR is very practical and fast on very long sequences which makes it a suitable new tool for finding local alignments between genomic sequences under the edit distance model. Binaries are freely available for Linux, Windows, and Mac OS X at http://www.seqan.de/projects/stellar. The source code is freely distributed with the SeqAn C++ library version 1.3 and later at http://www.seqan.de.",2011-10-05 +22373355,HabiSign: a novel approach for comparison of metagenomes and rapid identification of habitat-specific sequences.,"

Background

One of the primary goals of comparative metagenomic projects is to study the differences in the microbial communities residing in diverse environments. Besides providing valuable insights into the inherent structure of the microbial populations, these studies have potential applications in several important areas of medical research like disease diagnostics, detection of pathogenic contamination and identification of hitherto unknown pathogens. Here we present a novel and rapid, alignment-free method called HabiSign, which utilizes patterns of tetra-nucleotide usage in microbial genomes to bring out the differences in the composition of both diverse and related microbial communities.

Results

Validation results show that the metagenomic signatures obtained using the HabiSign method are able to accurately cluster metagenomes at biome, phenotypic and species levels, as compared to an average tetranucleotide frequency based approach and the recently published dinucleotide relative abundance based approach. More importantly, the method is able to identify subsets of sequences that are specific to a particular habitat. Apart from this, being alignment-free, the method can rapidly compare and group multiple metagenomic data sets in a short span of time.

Conclusions

The proposed method is expected to have immense applicability in diverse areas of metagenomic research ranging from disease diagnostics and pathogen detection to bio-prospecting. A web-server for the HabiSign algorithm is available at http://metagenomics.atc.tcs.com/HabiSign/.",2011-11-30 +22824307,The treatable intellectual disability APP www.treatable-id.org: a digital tool to enhance diagnosis & care for rare diseases.,"

Background

Intellectual disability (ID) is a devastating and frequent condition, affecting 2-3% of the population worldwide. Early recognition of treatable underlying conditions drastically improves health outcomes and decreases burdens to patients, families and society. Our systematic literature review identified 81 such inborn errors of metabolism, which present with ID as a prominent feature and are amenable to causal therapy. The WebAPP translates this knowledge of rare diseases into a diagnostic tool and information portal.

Methods & results

Freely available as a WebAPP via http://www.treatable-id.org and end 2012 via the APP store, this diagnostic tool is designed for all specialists evaluating children with global delay / ID and laboratory scientists. Information on the 81 diseases is presented in different ways with search functions: 15 biochemical categories, neurologic and non-neurologic signs & symptoms, diagnostic investigations (metabolic screening tests in blood and urine identify 65% of all IEM), therapies & effects on primary (IQ/developmental quotient) and secondary outcomes, and available evidence For each rare condition a 'disease page' serves as an information portal with online access to specific genetics, biochemistry, phenotype, diagnostic tests and therapeutic options. As new knowledge and evidence is gained from expert input and PubMed searches this tool will be continually updated. The WebAPP is an integral part of a protocol prioritizing treatability in the work-up of every child with global delay / ID. A 3-year funded study will enable an evaluation of its effectiveness.

Conclusions

For rare diseases, a field for which financial and scientific resources are particularly scarce, knowledge translation challenges are abundant. With this WebAPP technology is capitalized to raise awareness for rare treatable diseases and their common presenting clinical feature of ID, with the potential to improve health outcomes. This innovative digital tool is designed to motivate health care providers to search actively for treatable causes of ID, and support an evidence-based approach to rare metabolic diseases. In our current -omics world with continuous information flow, the effective synthesis of data into accessible, clinical knowledge has become ever more essential to bridge the gap between research and care.",2012-07-23 +21349861,"Improved structure, function and compatibility for CellProfiler: modular high-throughput image analysis software.","

Unlabelled

There is a strong and growing need in the biology research community for accurate, automated image analysis. Here, we describe CellProfiler 2.0, which has been engineered to meet the needs of its growing user base. It is more robust and user friendly, with new algorithms and features to facilitate high-throughput work. ImageJ plugins can now be run within a CellProfiler pipeline.

Availability and implementation

CellProfiler 2.0 is free and open source, available at http://www.cellprofiler.org under the GPL v. 2 license. It is available as a packaged application for Macintosh OS X and Microsoft Windows and can be compiled for Linux.

Contact

anne@broadinstitute.org

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-23 +22558141,Linking the epigenome to the genome: correlation of different features to DNA methylation of CpG islands.,"DNA methylation of CpG islands plays a crucial role in the regulation of gene expression. More than half of all human promoters contain CpG islands with a tissue-specific methylation pattern in differentiated cells. Still today, the whole process of how DNA methyltransferases determine which region should be methylated is not completely revealed. There are many hypotheses of which genomic features are correlated to the epigenome that have not yet been evaluated. Furthermore, many explorative approaches of measuring DNA methylation are limited to a subset of the genome and thus, cannot be employed, e.g., for genome-wide biomarker prediction methods. In this study, we evaluated the correlation of genetic, epigenetic and hypothesis-driven features to DNA methylation of CpG islands. To this end, various binary classifiers were trained and evaluated by cross-validation on a dataset comprising DNA methylation data for 190 CpG islands in HEPG2, HEK293, fibroblasts and leukocytes. We achieved an accuracy of up to 91% with an MCC of 0.8 using ten-fold cross-validation and ten repetitions. With these models, we extended the existing dataset to the whole genome and thus, predicted the methylation landscape for the given cell types. The method used for these predictions is also validated on another external whole-genome dataset. Our results reveal features correlated to DNA methylation and confirm or disprove various hypotheses of DNA methylation related features. This study confirms correlations between DNA methylation and histone modifications, DNA structure, DNA sequence, genomic attributes and CpG island properties. Furthermore, the method has been validated on a genome-wide dataset from the ENCODE consortium. The developed software, as well as the predicted datasets and a web-service to compare methylation states of CpG islands are available at http://www.cogsys.cs.uni-tuebingen.de/software/dna-methylation/.",2012-04-30 +21471012,ProDy: protein dynamics inferred from theory and experiments.,"

Summary

We developed a Python package, ProDy, for structure-based analysis of protein dynamics. ProDy allows for quantitative characterization of structural variations in heterogeneous datasets of structures experimentally resolved for a given biomolecular system, and for comparison of these variations with the theoretically predicted equilibrium dynamics. Datasets include structural ensembles for a given family or subfamily of proteins, their mutants and sequence homologues, in the presence/absence of their substrates, ligands or inhibitors. Numerous helper functions enable comparative analysis of experimental and theoretical data, and visualization of the principal changes in conformations that are accessible in different functional states. ProDy application programming interface (API) has been designed so that users can easily extend the software and implement new methods.

Availability

ProDy is open source and freely available under GNU General Public License from http://www.csb.pitt.edu/ProDy/.",2011-04-05 +21972962,Analysis of potential genomic confounding in genetic association studies and an online genomic confounding browser (GCB).,"Genome-wide association studies have transformed genetic studies of disease susceptibility, identifying many variants that may tag functional polymorphism nearby. Variants are often ascribed to a physically close gene exhibiting plausible functionality for a causal pathway. However, more physically remote genes may be at a lesser linkage or linkage disequilibrium (LD) distance from the tested SNP and could therefore contain the functional variant tagged. This analysis aims to identify instances where research may be misled by misassociation of a variant with a gene and develop tools to analyse genomic confounding. A catalogue of reported associations was systematically analysed for unreported genes which may represent the true functionality ascribed to a reported variant, calculating physical and genetic distances for all genes within 1 cM of the tagging polymorphism. Results revealed 55 SNPs where recombination was lower between the identified SNP and a physically more remote gene than initially reported, and 374 where an alternative gene was genetically and physically closer than the reported gene. Analyses show potential for genomic confounding through false inferences of variant association to a gene. An online visualization tool (http://gcb.genes.org.uk/) was developed to plot genes by physical and genetic distance relative to a variant, along with LD data.",2011-11-01 +22130595,Prediction and analysis of nucleotide-binding residues using sequence and sequence-derived structural descriptors.,"

Motivation

Nucleotides are multifunctional molecules that are essential for numerous biological processes. They serve as sources for chemical energy, participate in the cellular signaling and they are involved in the enzymatic reactions. The knowledge of the nucleotide-protein interactions helps with annotation of protein functions and finds applications in drug design.

Results

We propose a novel ensemble of accurate high-throughput predictors of binding residues from the protein sequence for ATP, ADP, AMP, GTP and GDP. Empirical tests show that our NsitePred method significantly outperforms existing predictors and approaches based on sequence alignment and residue conservation scoring. The NsitePred accurately finds more binding residues and binding sites and it performs particularly well for the sites with residues that are clustered close together in the sequence. The high predictive quality stems from the usage of novel, comprehensive and custom-designed inputs that utilize information extracted from the sequence, evolutionary profiles, several sequence-predicted structural descriptors and sequence alignment. Analysis of the predictive model reveals several sequence-derived hallmarks of nucleotide-binding residues; they are usually conserved and flanked by less conserved residues, and they are associated with certain arrangements of secondary structures and amino acid pairs in the specific neighboring positions in the sequence.

Availability

http://biomine.ece.ualberta.ca/nSITEpred/

Contact

lkurgan@ece.ualberta.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-29 +21460026,Rigid substructure search.,"

Motivation

Identifying the location of binding sites on proteins is of fundamental importance for a wide range of applications, including molecular docking, de novo drug design, structure identification and comparison of functional sites. Here we present Erebus, a web server that searches the entire Protein Data Bank for a given substructure defined by a set of atoms of interest, such as the binding scaffolds for small molecules. The identified substructure contains atoms having the same names, belonging to same amino acids and separated by the same distances (within a given tolerance) as the atoms of the query structure. The accuracy of a match is measured by the root-mean-square deviation or by the normal weight with a given variance. Tests show that our approach can reliably locate rigid binding scaffolds of drugs and metal ions.

Availability and implementation

We provide this service through a web server at http://erebus.dokhlab.org.",2011-04-01 +21775303,miRDeep-P: a computational tool for analyzing the microRNA transcriptome in plants.,"

Motivation

Ultra-deep sampling of small RNA libraries by next-generation sequencing has provided rich information on the microRNA (miRNA) transcriptome of various plant species. However, few computational tools have been developed to effectively deconvolute the complex information.

Results

We sought to employ the signature distribution of small RNA reads along the miRNA precursor as a model in plants to profile expression of known miRNA genes and to identify novel ones. A freely available package, miRDeep-P, was developed by modifying miRDeep, which is based on a probabilistic model of miRNA biogenesis in animals, with a plant-specific scoring system and filtering criteria. We have tested miRDeep-P on eight small RNA libraries derived from three plants. Our results demonstrate miRDeep-P as an effective and easy-to-use tool for characterizing the miRNA transcriptome in plants.

Availability

http://faculty.virginia.edu/lilab/miRDP/ CONTACT: ll4jn@virginia.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-19 +22535207,A computational tool to detect and avoid redundancy in selected reaction monitoring.,"Selected reaction monitoring (SRM), also called multiple reaction monitoring, has become an invaluable tool for targeted quantitative proteomic analyses, but its application can be compromised by nonoptimal selection of transitions. In particular, complex backgrounds may cause ambiguities in SRM measurement results because peptides with interfering transitions similar to those of the target peptide may be present in the sample. Here, we developed a computer program, the SRMCollider, that calculates nonredundant theoretical SRM assays, also known as unique ion signatures (UIS), for a given proteomic background. We show theoretically that UIS of three transitions suffice to conclusively identify 90% of all yeast peptides and 85% of all human peptides. Using predicted retention times, the SRMCollider also simulates time-scheduled SRM acquisition, which reduces the number of interferences to consider and leads to fewer transitions necessary to construct an assay. By integrating experimental fragment ion intensities from large scale proteome synthesis efforts (SRMAtlas) with the information content-based UIS, we combine two orthogonal approaches to create high quality SRM assays ready to be deployed. We provide a user friendly, open source implementation of an algorithm to calculate UIS of any order that can be accessed online at http://www.srmcollider.org to find interfering transitions. Finally, our tool can also simulate the specificity of novel data-independent MS acquisition methods in Q1-Q3 space. This allows us to predict parameters for these methods that deliver a specificity comparable with that of SRM. Using SRM interference information in addition to other sources of information can increase the confidence in an SRM measurement. We expect that the consideration of information content will become a standard step in SRM assay design and analysis, facilitated by the SRMCollider.",2012-04-24 +22031444,Temporal dynamics and genetic control of transcription in the human prefrontal cortex.,"Previous investigations have combined transcriptional and genetic analyses in human cell lines, but few have applied these techniques to human neural tissue. To gain a global molecular perspective on the role of the human genome in cortical development, function and ageing, we explore the temporal dynamics and genetic control of transcription in human prefrontal cortex in an extensive series of post-mortem brains from fetal development through ageing. We discover a wave of gene expression changes occurring during fetal development which are reversed in early postnatal life. One half-century later in life, this pattern of reversals is mirrored in ageing and in neurodegeneration. Although we identify thousands of robust associations of individual genetic polymorphisms with gene expression, we also demonstrate that there is no association between the total extent of genetic differences between subjects and the global similarity of their transcriptional profiles. Hence, the human genome produces a consistent molecular architecture in the prefrontal cortex, despite millions of genetic differences across individuals and races. To enable further discovery, this entire data set is freely available (from Gene Expression Omnibus: accession GSE30272; and dbGaP: accession phs000417.v1.p1) and can also be interrogated via a biologist-friendly stand-alone application (http://www.libd.org/braincloud).",2011-10-26 +21551147,The role of indirect connections in gene networks in predicting function.,"

Motivation

Gene networks have been used widely in gene function prediction algorithms, many based on complex extensions of the 'guilt by association' principle. We sought to provide a unified explanation for the performance of gene function prediction algorithms in exploiting network structure and thereby simplify future analysis.

Results

We use co-expression networks to show that most exploited network structure simply reconstructs the original correlation matrices from which the co-expression network was obtained. We show the same principle works in predicting gene function in protein interaction networks and that these methods perform comparably to much more sophisticated gene function prediction algorithms.

Availability and implementation

Data and algorithm implementation are fully described and available at http://www.chibi.ubc.ca/extended. Programs are provided in Matlab m-code.

Contact

paul@chibi.ubc.ca",2011-05-06 +22408192,SMURFLite: combining simplified Markov random fields with simulated evolution improves remote homology detection for beta-structural proteins into the twilight zone.,"

Motivation

One of the most successful methods to date for recognizing protein sequences that are evolutionarily related has been profile hidden Markov models (HMMs). However, these models do not capture pairwise statistical preferences of residues that are hydrogen bonded in beta sheets. These dependencies have been partially captured in the HMM setting by simulated evolution in the training phase and can be fully captured by Markov random fields (MRFs). However, the MRFs can be computationally prohibitive when beta strands are interleaved in complex topologies. We introduce SMURFLite, a method that combines both simplified MRFs and simulated evolution to substantially improve remote homology detection for beta structures. Unlike previous MRF-based methods, SMURFLite is computationally feasible on any beta-structural motif.

Results

We test SMURFLite on all propeller and barrel folds in the mainly-beta class of the SCOP hierarchy in stringent cross-validation experiments. We show a mean 26% (median 16%) improvement in area under curve (AUC) for beta-structural motif recognition as compared with HMMER (a well-known HMM method) and a mean 33% (median 19%) improvement as compared with RAPTOR (a well-known threading method) and even a mean 18% (median 10%) improvement in AUC over HHPred (a profile-profile HMM method), despite HHpred's use of extensive additional training data. We demonstrate SMURFLite's ability to scale to whole genomes by running a SMURFLite library of 207 beta-structural SCOP superfamilies against the entire genome of Thermotoga maritima, and make over a 100 new fold predictions. Availability and implementaion: A webserver that runs SMURFLite is available at: http://smurf.cs.tufts.edu/smurflite/",2012-03-09 +21665924,SA-Mot: a web server for the identification of motifs of interest extracted from protein loops.,"The detection of functional motifs is an important step for the determination of protein functions. We present here a new web server SA-Mot (Structural Alphabet Motif) for the extraction and location of structural motifs of interest from protein loops. Contrary to other methods, SA-Mot does not focus only on functional motifs, but it extracts recurrent and conserved structural motifs involved in structural redundancy of loops. SA-Mot uses the structural word notion to extract all structural motifs from uni-dimensional sequences corresponding to loop structures. Then, SA-Mot provides a description of these structural motifs using statistics computed in the loop data set and in SCOP superfamily, sequence and structural parameters. SA-Mot results correspond to an interactive table listing all structural motifs extracted from a target structure and their associated descriptors. Using this information, the users can easily locate loop regions that are important for the protein folding and function. The SA-Mot web server is available at http://sa-mot.mti.univ-paris-diderot.fr.",2011-06-10 +21258065,Computational refinement of post-translational modifications predicted from tandem mass spectrometry.,"

Motivation

A post-translational modification (PTM) is a chemical modification of a protein that occurs naturally. Many of these modifications, such as phosphorylation, are known to play pivotal roles in the regulation of protein function. Henceforth, PTM perturbations have been linked to diverse diseases like Parkinson's, Alzheimer's, diabetes and cancer. To discover PTMs on a genome-wide scale, there is a recent surge of interest in analyzing tandem mass spectrometry data, and several unrestrictive (so-called 'blind') PTM search methods have been reported. However, these approaches are subject to noise in mass measurements and in the predicted modification site (amino acid position) within peptides, which can result in false PTM assignments.

Results

To address these issues, we devised a machine learning algorithm, PTMClust, that can be applied to the output of blind PTM search methods to improve prediction quality, by suppressing noise in the data and clustering peptides with the same underlying modification to form PTM groups. We show that our technique outperforms two standard clustering algorithms on a simulated dataset. Additionally, we show that our algorithm significantly improves sensitivity and specificity when applied to the output of three different blind PTM search engines, SIMS, InsPecT and MODmap. Additionally, PTMClust markedly outperforms another PTM refinement algorithm, PTMFinder. We demonstrate that our technique is able to reduce false PTM assignments, improve overall detection coverage and facilitate novel PTM discovery, including terminus modifications. We applied our technique to a large-scale yeast MS/MS proteome profiling dataset and found numerous known and novel PTMs. Accurately identifying modifications in protein sequences is a critical first step for PTM profiling, and thus our approach may benefit routine proteomic analysis.

Availability

Our algorithm is implemented in Matlab and is freely available for academic use. The software is available online from http://genes.toronto.edu.",2011-01-22 +28517378,SU-D-217BCD-01: Corrupted DICOM Image Recovering: A Clinical Experience.,"

Purpose

Colored DICOM secondary capture images generated from CT perfusion studies were corrupted if they were sent directly from a Siemens acquisition workstation to a GE viewing workstation. However, those images were properly displayed in the GE viewing workstation if they were transferred through a GE PACS first. The purpose of this work is to investigate the cause of image corruption and determine why passing through PACS corrected it.

Methods

DICOM headers of corrupted and non-corrupted (sent through the PACS) images were compared with a free DICOM software tool (http://DVTK.org); the differences were highlighted. Certain header tags were found in non-corrupted images, but not in corrupted images. These tags were sequentially removed until the non- corrupted image became corrupted. Once a candidate tag was found, fresh corrupt images were modified by adding a 'repair' tag and tested.

Results

It was found that the absence of Planar Configuration (0028, 0006) is the cause of image corruption. This attribute is used in the DICOM color image to specify whether the color pixel data are sent color-by-plane or color-by- pixel and should be present if the Sample per Pixel (0028, 0002) tag has a value greater than 1. In our DICOM color images, the values of (0028, 0002) and Photometric Interpretation (0028, 0004) are 3 and RGB, respectively. Thus (0028, 0006) should equal 0 (color-by-pixel), which is used for uncompressed or lossless compressed transfer syntaxes. Adding this tag and setting the value to zero manually repaired corrupt images.

Conclusions

Using open source DICOM tools and following the described process can be a valuable ally in the search for causes of image corruption. Comparing the headers and finding the handful of different tags rapidly led to an explanation that could be used by the vendor for a permanent fix.",2012-06-01 +22014212,pcrEfficiency: a Web tool for PCR amplification efficiency prediction.,"

Background

Relative calculation of differential gene expression in quantitative PCR reactions requires comparison between amplification experiments that include reference genes and genes under study. Ignoring the differences between their efficiencies may lead to miscalculation of gene expression even with the same starting amount of template. Although there are several tools performing PCR primer design, there is no tool available that predicts PCR efficiency for a given amplicon and primer pair.

Results

We have used a statistical approach based on 90 primer pair combinations amplifying templates from bacteria, yeast, plants and humans, ranging in size between 74 and 907 bp to identify the parameters that affect PCR efficiency. We developed a generalized additive model fitting the data and constructed an open source Web interface that allows the obtention of oligonucleotides optimized for PCR with predicted amplification efficiencies starting from a given sequence.

Conclusions

pcrEfficiency provides an easy-to-use web interface allowing the prediction of PCR efficiencies prior to web lab experiments thus easing quantitative real-time PCR set-up. A web-based service as well the source code are provided freely at http://srvgen.upct.es/efficiency.html under the GPL v2 license.",2011-10-20 +22510499,Clinicopathological significance of SOX4 expression in primary gallbladder carcinoma.,"

Aim

SOX4, as a member of the SRY-related HMG-box (SOX) transcription factor family, has been demonstrated to be involved in tumorigenesis of many human malignancies; however, its role in primary gallbladder carcinoma (PGC) is still largely unknown. The aim of this study was to investigate SOX4 expression in PGC and its prognostic significance.

Methods

From 1997 to 2006, 136 patients underwent resection for PGC. The median follow-up was 12.8 months. Immunostainings for SOX4 were performed on these archival tissues. The correlation of SOX4 expression with clinicopathological features including survival was analyzed.

Results

SOX4 was expressed in 75.0% (102/136) of PGC but not in the normal epithelium of the gallbladder. In addition, the over-expression of SOX4 was significantly associated with low histologic grade (P = 0.02), low pathologic T stage (P = 0.02), and early clinical stage (P = 0.03). The levels of SOX4 immunostainings in PGC tissues with positive nodal metastasis were also significantly lower than those without (P = 0.01). Moreover, Kaplan-Meier curves showed that SOX4 over-expression was significantly related to better overall (P = 0.008) and disease-free survival (P = 0.01). Furthermore, multivariate analyses showed that SOX4 expression was an independent risk factor for both overall (P = 0.03, hazard ratio, 3.682) and disease-free survival (P = 0.04, hazard ratio, 2.215).

Conclusion

Our data indicate for the first time that the over-expression of SOX4 in PGC was significantly correlated with favorable clinicopathologic features and was an independent prognostic factor for better overall and disease-free survival in patients. Therefore, SOX4 might be an auxiliary parameter for predicting malignant behavior for PGC.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1534825818694957.",2012-04-17 +22399642,"No consistent relationship of glioblastoma incidence and cytomegalovirus seropositivity in whites, blacks, and Hispanics.","

Unlabelled

Glioblastoma multiforme is the most common and most aggressive type of primary brain tumor, accounting for 52% of all primary brain tumor cases and 20% of all intracranial tumors. Recently, evidence for a viral cause has been postulated, possibly cytomegalovirus (CMV). In one report, 80% of patients with newly diagnosed glioblastoma multiforme had detectable cytomegalovirus DNA in their peripheral blood, while sero-positive normal donors and other surgical patients did not exhibit detectable virus. However, another study reported that five glioblastoma patients showed no circulating CMV detected either with RT-PCR or blood culture.

Materials and methods

We utilized Cytomegalovirus Seroprevalence in the United States data from the National Health and Nutrition Examination Surveys, 1988-2004. Glioblastoma Incidence Rates 2004-2008 by race and gender are from Cancer of the Brain and Other Nervous System - SEER Stat Fact Sheets (http://seer.cancer.gov/statfacts/html/brain.html). Statistical significance was determined from published 95% confidence intervals.

Results

CMV seroprevalence rates are not consistently related to glioblastoma incidence rates. CMV seroprevalence is significantly lower in whites than in blacks or Hispanics (Mexican Americans), while glioblastoma incidence is higher. However, both CMV seroprevalence and glioblastoma incidence are higher in Hispanics than in blacks. CMV seroprevalence rates are significantly higher in women, 55.5% (53.3-57.7, mean ± 95% CI) than men, 45.2% (42.4-48.0), although glioblastoma is more common in men.

Conclusion

A possible CMV-glioblastoma association cannot be readily substantiated with CMV seropositivity rates.",2012-03-01 +22088847,Protein subcellular localization of fluorescence imagery using spatial and transform domain features.,"

Motivation

Subcellular localization of proteins is one of the most significant characteristics of living cells. Prediction of protein subcellular locations is crucial to the understanding of various protein functions. Therefore, an accurate, computationally efficient and reliable prediction system is required.

Results

In this article, the predictions of various Support Vector Machine (SVM) models have been combined through majority voting. The proposed ensemble SVM-SubLoc has achieved the highest success rates of 99.7% using hybrid features of Haralick textures and local binary patterns (HarLBP), 99.4% using hybrid features of Haralick textures and Local Ternary Patterns (HarLTP). In addition, SVM-SubLoc has yielded 99.0% accuracy using only local ternary patterns (LTPs) based features. The dimensionality of HarLBP feature vector is 581 compared with 78 and 52 for HarLTP and LTPs, respectively. Hence, SVM-SubLoc in conjunction with LTPs is fast, sufficiently accurate and simple predictive system. The proposed SVM-SubLoc approach thus provides superior prediction performance using the reduced feature space compared with existing approaches.

Availability

A web server accompanying the proposed prediction scheme is available at http://111.68.99.218/ SVM-SubLoc

Contact

asif@pieas.edu.pk; khan.asifullah@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-11-15 +21296749,LSPR: an integrated periodicity detection algorithm for unevenly sampled temporal microarray data.,"

Unlabelled

We propose a three-step periodicity detection algorithm named LSPR. Our method first preprocesses the raw time-series by removing the linear trend and filtering noise. In the second step, LSPR employs a Lomb-Scargle periodogram to estimate the periodicity in the time-series. Finally, harmonic regression is applied to model the cyclic components. Inferred periodic transcripts are selected by a false discovery rate procedure. We have applied LSPR to unevenly sampled synthetic data and two Arabidopsis diurnal expression datasets, and compared its performance with the existing well-established algorithms. Results show that LSPR is capable of identifying periodic transcripts more accurately than existing algorithms.

Availability

LSPR algorithm is implemented as MATLAB software and is available at http://bioinformatics.cau.edu.cn/LSPR.",2011-02-03 +22889003,"Optical endomicroscopy and the road to real-time, in vivo pathology: present and future.","Epithelial cancers account for substantial mortality and are an important public health concern. With the need for earlier detection and treatment of these malignancies, the ability to accurately detect precancerous lesions has an increasingly important role in controlling cancer incidence and mortality. New optical technologies are capable of identifying early pathology in tissues or organs in which cancer is known to develop through stages of dysplasia, including the esophagus, colon, pancreas, liver, bladder, and cervix. These diagnostic imaging advances, together as a field known as optical endomicroscopy, are based on confocal microscopy, spectroscopy-based imaging, and optical coherence tomography (OCT), and function as ""optical biopsies,"" enabling tissue pathology to be imaged in situ and in real time without the need to excise and process specimens as in conventional biopsy and histopathology. Optical biopsy techniques can acquire high-resolution, cross-sectional images of tissue structure on the micron scale through the use of endoscopes, catheters, laparoscopes, and needles. Since the inception of these technologies, dramatic technological advances in accuracy, speed, and functionality have been realized. The current paradigm of optical biopsy, or single-area, point-based images, is slowly shifting to more comprehensive microscopy of larger tracts of mucosa. With the development of Fourier-domain OCT, also known as optical frequency domain imaging or, more recently, volumetric laser endomicroscopy, comprehensive surveillance of the entire distal esophagus is now achievable at speeds that were not possible with conventional OCT technologies. Optical diagnostic technologies are emerging as clinically useful tools with the potential to set a new standard for real-time diagnosis. New imaging techniques enable visualization of high-resolution, cross-sectional images and offer the opportunity to guide biopsy, allowing maximal diagnostic yields and appropriate staging without the limitations and risks inherent with current random biopsy protocols. However, the ability of these techniques to achieve widespread adoption in clinical practice depends on future research designed to improve accuracy and allow real-time data transmission and storage, thereby linking pathology to the treating physician. These imaging advances are expected to eventually offer a see-and-treat paradigm, leading to improved patient care and potential cost reduction.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/5372548637202968.",2012-08-13 +21802968,OligoPred: a web-server for predicting homo-oligomeric proteins by incorporating discrete wavelet transform into Chou's pseudo amino acid composition.,"In vivo, some proteins exist as monomers (single polypeptide chains) and others as oligomers. Not like monomers, oligomers are composed of two or more chains (subunits) that are associated with each other through non-covalent interactions and, occasionally, through disulfide bonds. These proteins are the structural components of various biological functions, including cooperative effects, allosteric mechanisms and ion-channel gating. However, with the dramatic increase in the number of protein sequences submitted to the public data bank, it is important for both basic research and drug discovery research to acquire the possible knowledge about homo-oligomeric attributes of their interested proteins in a timely manner. In this paper, a high-throughput method, combined support vector machines with discrete wavelet transform, has been developed to predict the protein homo-oligomers. The total accuracy obtained by the re-substitution test, jackknife test and independent dataset test are 99.94%, 96.17% and 96.18%, respectively, showing that the proposed method of extracting feature from the protein sequences is effective and feasible for predicting homo-oligomers. The online service is available at http://bioinfo.ncu.edu.cn/Services.aspx.",2011-07-07 +21357521,Evaluation of two iterative techniques for reducing metal artifacts in computed tomography.,"

Purpose

To evaluate two methods for reducing metal artifacts in computed tomography (CT)--the metal deletion technique (MDT) and the selective algebraic reconstruction technique (SART)--and compare these methods with filtered back projection (FBP) and linear interpolation (LI).

Materials and methods

The institutional review board approved this retrospective HIPAA-compliant study; informed patient consent was waived. Simulated projection data were calculated for a phantom that contained water, soft tissue, bone, and iron. Clinical projection data were obtained retrospectively from 11 consecutively identified CT scans with metal streak artifacts, with a total of 178 sections containing metal. Each scan was reconstructed using FBP, LI, SART, and MDT. The simulated scans were evaluated quantitatively by calculating the average error in Hounsfield units for each pixel compared with the original phantom. Two radiologists who were blinded to the reconstruction algorithms used qualitatively evaluated the clinical scans, ranking the overall severity of artifacts for each algorithm. P values for comparisons of the image quality ranks were calculated from the binomial distribution.

Results

The simulations showed that MDT reduces artifacts due to photon starvation, beam hardening, and motion and does not introduce new streaks between metal and bone. MDT had the lowest average error (76% less than FBP, 42% less than LI, 17% less than SART). Blinded comparison of the clinical scans revealed that MDT had the best image quality 100% of the time (95% confidence interval: 72%, 100%). LI had the second best image quality, and SART and FBP had the worst image quality. On images from two CT scans, as compared with images generated by the scanner, MDT revealed information of potential clinical importance.

Conclusion

For a wide range of scans, MDT yields reduced metal streak artifacts and better-quality images than does FBP, LI, or SART.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11101782/-/DC1.",2011-02-25 +21624890,ADGO 2.0: interpreting microarray data and list of genes using composite annotations.,"ADGO 2.0 is a web-based tool that provides composite interpretations for microarray data comparing two sample groups as well as lists of genes from diverse sources of biological information. Some other tools also incorporate composite annotations solely for interpreting lists of genes but usually provide highly redundant information. This new version has the following additional features: first, it provides multiple gene set analysis methods for microarray inputs as well as enrichment analyses for lists of genes. Second, it screens redundant composite annotations when generating and prioritizing them. Third, it incorporates union and subtracted sets as well as intersection sets. Lastly, users can upload their own gene sets (e.g. predicted miRNA targets) to generate and analyze new composite sets. The first two features are unique to ADGO 2.0. Using our tool, we demonstrate analyses of a microarray dataset and a list of genes for T-cell differentiation. The new ADGO is available at http://www.btool.org/ADGO2.",2011-05-29 +21539947,Clustering of MS spectra for improved protein identification rate and screening for protein variants and modifications by MALDI-MS/MS.,"It is an established fact that allelic variation and post-translational modifications create different variants of proteins, which are observed as isoelectric and size subspecies in two-dimensional gel based proteomics. Here we explore the stromal proteome of spinach and Arabidopsis chloroplast and show that clustering of mass spectra is a useful tool for investigating such variants and detecting modified peptides with amino acid substitutions or post-translational modifications. This study employs data mining by hierarchical clustering of MALDI-MS spectra, using the web version of the SPECLUST program (http://bioinfo.thep.lu.se/speclust.html). The tool can also be used to remove peaks of contaminating proteins and to improve protein identification, especially for species without a fully sequenced genome. Mutually exclusive peptide peaks within a cluster provide a good starting point for MS/MS investigation of modified peptides, here exemplified by the identification of an A to E substitution that accounts for the isoelectric heterogeneity in protein isoforms.",2011-04-22 +22537006,PIntron: a fast method for detecting the gene structure due to alternative splicing via maximal pairings of a pattern and a text.,"

Background

A challenging issue in designing computational methods for predicting the gene structure into exons and introns from a cluster of transcript (EST, mRNA) sequences, is guaranteeing accuracy as well as efficiency in time and space, when large clusters of more than 20,000 ESTs and genes longer than 1 Mb are processed. Traditionally, the problem has been faced by combining different tools, not specifically designed for this task.

Results

We propose a fast method based on ad hoc procedures for solving the problem. Our method combines two ideas: a novel algorithm of proved small time complexity for computing spliced alignments of a transcript against a genome, and an efficient algorithm that exploits the inherent redundancy of information in a cluster of transcripts to select, among all possible factorizations of EST sequences, those allowing to infer splice site junctions that are largely confirmed by the input data. The EST alignment procedure is based on the construction of maximal embeddings, that are sequences obtained from paths of a graph structure, called embedding graph, whose vertices are the maximal pairings of a genomic sequence T and an EST P. The procedure runs in time linear in the length of P and T and in the size of the output.The method was implemented into the PIntron package. PIntron requires as input a genomic sequence or region and a set of EST and/or mRNA sequences. Besides the prediction of the full-length transcript isoforms potentially expressed by the gene, the PIntron package includes a module for the CDS annotation of the predicted transcripts.

Conclusions

PIntron, the software tool implementing our methodology, is available at http://www.algolab.eu/PIntron under GNU AGPL. PIntron has been shown to outperform state-of-the-art methods, and to quickly process some critical genes. At the same time, PIntron exhibits high accuracy (sensitivity and specificity) when benchmarked with ENCODE annotations.",2012-04-12 +22662130,Using prior information from the medical literature in GWAS of oral cancer identifies novel susceptibility variant on chromosome 4--the AdAPT method.,"

Background

Genome-wide association studies (GWAS) require large sample sizes to obtain adequate statistical power, but it may be possible to increase the power by incorporating complementary data. In this study we investigated the feasibility of automatically retrieving information from the medical literature and leveraging this information in GWAS.

Methods

We developed a method that searches through PubMed abstracts for pre-assigned keywords and key concepts, and uses this information to assign prior probabilities of association for each single nucleotide polymorphism (SNP) with the phenotype of interest--the Adjusting Association Priors with Text (AdAPT) method. Association results from a GWAS can subsequently be ranked in the context of these priors using the Bayes False Discovery Probability (BFDP) framework. We initially tested AdAPT by comparing rankings of known susceptibility alleles in a previous lung cancer GWAS, and subsequently applied it in a two-phase GWAS of oral cancer.

Results

Known lung cancer susceptibility SNPs were consistently ranked higher by AdAPT BFDPs than by p-values. In the oral cancer GWAS, we sought to replicate the top five SNPs as ranked by AdAPT BFDPs, of which rs991316, located in the ADH gene region of 4q23, displayed a statistically significant association with oral cancer risk in the replication phase (per-rare-allele log additive p-value [p(trend)] = 2.5×10(-3)). The combined OR for having one additional rare allele was 0.83 (95% CI: 0.76-0.90), and this association was independent of previously identified susceptibility SNPs that are associated with overall UADT cancer in this gene region. We also investigated if rs991316 was associated with other cancers of the upper aerodigestive tract (UADT), but no additional association signal was found.

Conclusion

This study highlights the potential utility of systematically incorporating prior knowledge from the medical literature in genome-wide analyses using the AdAPT methodology. AdAPT is available online (url: http://services.gate.ac.uk/lld/gwas/service/config).",2012-05-25 +22038678,Compound toxicity screening and structure-activity relationship modeling in Escherichia coli.,"Synthetic biology and metabolic engineering are used to develop new strategies for producing valuable compounds ranging from therapeutics to biofuels in engineered microorganisms. When developing methods for high-titer production cells, toxicity is an important element to consider. Indeed the production rate can be limited due to toxic intermediates or accumulation of byproducts of the heterologous biosynthetic pathway of interest. Conversely, highly toxic molecules are desired when designing antimicrobials. Compound toxicity in bacteria plays a major role in metabolic engineering as well as in the development of new antibacterial agents. Here, we screened a diversified chemical library of 166 compounds for toxicity in Escherichia coli. The dataset was built using a clustering algorithm maximizing the chemical diversity in the library. The resulting assay data was used to develop a toxicity predictor that we used to assess the toxicity of metabolites throughout the metabolome. This new tool for predicting toxicity can thus be used for fine-tuning heterologous expression and can be integrated in a computational-framework for metabolic pathway design. Many structure-activity relationship tools have been developed for toxicology studies in eukaryotes [Valerio (2009), Toxicol Appl Pharmacol, 241(3): 356-370], however, to the best of our knowledge we present here the first E. coli toxicity prediction web server based on QSAR models (EcoliTox server: http://www.issb.genopole.fr/∼faulon/EcoliTox.php).",2011-11-09 +22871125,Automatic online spike sorting with singular value decomposition and fuzzy C-mean clustering.,"

Background

Understanding how neurons contribute to perception, motor functions and cognition requires the reliable detection of spiking activity of individual neurons during a number of different experimental conditions. An important problem in computational neuroscience is thus to develop algorithms to automatically detect and sort the spiking activity of individual neurons from extracellular recordings. While many algorithms for spike sorting exist, the problem of accurate and fast online sorting still remains a challenging issue.

Results

Here we present a novel software tool, called FSPS (Fuzzy SPike Sorting), which is designed to optimize: (i) fast and accurate detection, (ii) offline sorting and (iii) online classification of neuronal spikes with very limited or null human intervention. The method is based on a combination of Singular Value Decomposition for fast and highly accurate pre-processing of spike shapes, unsupervised Fuzzy C-mean, high-resolution alignment of extracted spike waveforms, optimal selection of the number of features to retain, automatic identification the number of clusters, and quantitative quality assessment of resulting clusters independent on their size. After being trained on a short testing data stream, the method can reliably perform supervised online classification and monitoring of single neuron activity. The generalized procedure has been implemented in our FSPS spike sorting software (available free for non-commercial academic applications at the address: http://www.spikesorting.com) using LabVIEW (National Instruments, USA). We evaluated the performance of our algorithm both on benchmark simulated datasets with different levels of background noise and on real extracellular recordings from premotor cortex of Macaque monkeys. The results of these tests showed an excellent accuracy in discriminating low-amplitude and overlapping spikes under strong background noise. The performance of our method is competitive with respect to other robust spike sorting algorithms.

Conclusions

This new software provides neuroscience laboratories with a new tool for fast and robust online classification of single neuron activity. This feature could become crucial in situations when online spike detection from multiple electrodes is paramount, such as in human clinical recordings or in brain-computer interfaces.",2012-08-08 +22545997,Structural dynamics of full-length retroviral integrase: a molecular dynamics analysis.,"HIV integrase catalyzes the integration between host and viral DNA and is considered as an interesting target for treating HIV. Knowledge of the complete structure of integrase is inevitable to describe the communicative inter-domain interactions affecting the HIV integration and disintegration process and hence the study on full-length integrase turns out to be an essential task. In this investigation, a structure of full-length integrase is designed to analyze the global dynamics of integrase dimer and monomers (with and without the C-terminal, 270-288 amino acids) for a period of 20 ns. The molecular dynamics analysis and the subsequent DynDom analysis reveal (i) a stable dynamics of dimeric CCD and NTD domains and (ii) CCD-α11-mediated rotational-cum-translational CTD motion as the functional dynamics of IN dimer. This observation supports that (i) aggregation enhances the integrase activity and (ii) flexible CTD for its cis and trans coordination with CCD. The role of C-loop over the dynamics of integrase is also explored, which unveils that the spatial arrangement of integrase domains is changed during dynamics in the absence of C-loop. In essence, here we report a C-loop-dependent structural dynamics of integrase and the active dynamics of integrase in dimer. Further studies on C-loop sensing mechanism and the multimerization of integrase would provide insight into HIV integration and disintegration processes. Supplementary material. Movies generated from molecular dynamics trajectory showing the CTD dynamics of IN structures (monomers with & without C-loop and dimer) are linked online to this article. The remaining supplementary data can be downloaded from the author's server at the URL http://ramutha.bicpu.edu.in .",2012-01-01 +22210858,Efficient large-scale protein sequence comparison and gene matching to identify orthologs and co-orthologs.,"Broadly, computational approaches for ortholog assignment is a three steps process: (i) identify all putative homologs between the genomes, (ii) identify gene anchors and (iii) link anchors to identify best gene matches given their order and context. In this article, we engineer two methods to improve two important aspects of this pipeline [specifically steps (ii) and (iii)]. First, computing sequence similarity data [step (i)] is a computationally intensive task for large sequence sets, creating a bottleneck in the ortholog assignment pipeline. We have designed a fast and highly scalable sort-join method (afree) based on k-mer counts to rapidly compare all pairs of sequences in a large protein sequence set to identify putative homologs. Second, availability of complex genomes containing large gene families with prevalence of complex evolutionary events, such as duplications, has made the task of assigning orthologs and co-orthologs difficult. Here, we have developed an iterative graph matching strategy where at each iteration the best gene assignments are identified resulting in a set of orthologs and co-orthologs. We find that the afree algorithm is faster than existing methods and maintains high accuracy in identifying similar genes. The iterative graph matching strategy also showed high accuracy in identifying complex gene relationships. Standalone afree available from http://vbc.med.monash.edu.au/∼kmahmood/afree. EGM2, complete ortholog assignment pipeline (including afree and the iterative graph matching method) available from http://vbc.med.monash.edu.au/∼kmahmood/EGM2.",2011-12-30 +22117812,Guidance document for structured reporting of diuresis renography.,"This Guidance Document for structured reporting of diuresis renography in adults was developed by the International Scientific Committee of Radionuclides in Nephro-urology (ISCORN; http://www.iscorn.org). ISCORN chose diuresis renography for its first structured report Guidance Document because suspected obstruction is the most common reason for referral, most radionuclide renal studies are conducted at institutions that perform fewer than 3 studies per week, and a large percentage of studies are interpreted by physicians with limited training in nuclear medicine. Ten panelists were asked to categorize specific reporting elements as essential, recommended, optional (without sufficient data to support a higher ranking), and unnecessary (does not contribute to scan interpretation or quality assurance). The final document was developed through an iterative series of comments and questionnaires with a majority vote required to place an element in a specific category. The Guidance Document recommends a reporting structure organized into indications, clinical history, study procedure, findings and impression and specifies the elements considered essential or recommended in each category. The Guidance Document is not intended to be restrictive but, rather, to provide a basic structure and rationale so that the diuresis renography report will: (1) communicate the results to the referring physician in a clear and concise manner designed to optimize patient care; (2) contain the essential elements required to evaluate and interpret the study; (3) clearly document the technical components of the study necessary for accountability, quality assurance and reimbursement; and (4) encourage clinical research by facilitating better comparison and extrapolation of results between institutions.",2012-01-01 +21967761,MDpocket: open-source cavity detection and characterization on molecular dynamics trajectories.,"

Motivation

A variety of pocket detection algorithms are now freely or commercially available to the scientific community for the analysis of static protein structures. However, since proteins are dynamic entities, enhancing the capabilities of these programs for the straightforward detection and characterization of cavities taking into account protein conformational ensembles should be valuable for capturing the plasticity of pockets, and therefore allow gaining insight into structure-function relationships.

Results

This article describes a new method, called MDpocket, providing a fast, free and open-source tool for tracking small molecule binding sites and gas migration pathways on molecular dynamics (MDs) trajectories or other conformational ensembles. MDpocket is based on the fpocket cavity detection algorithm and a valuable contribution to existing analysis tools. The capabilities of MDpocket are illustrated for three relevant cases: (i) the detection of transient subpockets using an ensemble of crystal structures of HSP90; (ii) the detection of known xenon binding sites and migration pathways in myoglobin; and (iii) the identification of suitable pockets for molecular docking in P38 Map kinase.

Availability

MDpocket is free and open-source software and can be downloaded at http://fpocket.sourceforge.net.

Contact

pschmidtke@ub.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-03 +22084189,Novel oncologic drugs: what they do and how they affect images.,"Targeted therapies are designed to interfere with specific aberrant biologic pathways involved in tumor development. The main classes of novel oncologic drugs include antiangiogenic drugs, antivascular agents, drugs interfering with EGFR-HER2 or KIT receptors, inhibitors of the PI3K/Akt/mTOR pathway, and hormonal therapies. Cancer cells usurp normal signal transduction pathways used by growth factors to stimulate proliferation and sustain viability. The interaction of growth factors with their receptors activates different intracellular pathways affecting key tumor biologic processes such as neoangiogenesis, tumor metabolism, and tumor proliferation. The response of tumors to anticancer therapy can be evaluated with anatomic response assessment, qualitative response assessment, and response assessment with functional and molecular imaging. Angiogenesis can be measured by means of perfusion imaging with computed tomography and magnetic resonance (MR) imaging. Diffusion-weighted MR imaging allows imaging evaluation of tumor cellularity. The main imaging techniques for studying tumor metabolism in vivo are positron emission tomography and MR spectroscopy. Familiarity with imaging findings secondary to tumor response to targeted therapies may help the radiologist better assist the clinician in accurate evaluation of tumor response to these anticancer treatments. Functional and molecular imaging techniques may provide valuable data and augment conventional assessment of tumor response to targeted therapies. Supplemental material available at http://radiographics.rsna.org/lookup/suppl/doi:10.1148/rg.317115108/-/DC1.",2011-11-01 +21199577,MTML-msBayes: approximate Bayesian comparative phylogeographic inference from multiple taxa and multiple loci with rate heterogeneity.,"

Background

MTML-msBayes uses hierarchical approximate Bayesian computation (HABC) under a coalescent model to infer temporal patterns of divergence and gene flow across codistributed taxon-pairs. Under a model of multiple codistributed taxa that diverge into taxon-pairs with subsequent gene flow or isolation, one can estimate hyper-parameters that quantify the mean and variability in divergence times or test models of migration and isolation. The software uses multi-locus DNA sequence data collected from multiple taxon-pairs and allows variation across taxa in demographic parameters as well as heterogeneity in DNA mutation rates across loci. The method also allows a flexible sampling scheme: different numbers of loci of varying length can be sampled from different taxon-pairs.

Results

Simulation tests reveal increasing power with increasing numbers of loci when attempting to distinguish temporal congruence from incongruence in divergence times across taxon-pairs. These results are robust to DNA mutation rate heterogeneity. Estimating mean divergence times and testing simultaneous divergence was less accurate with migration, but improved if one specified the correct migration model. Simulation validation tests demonstrated that one can detect the correct migration or isolation model with high probability, and that this HABC model testing procedure was greatly improved by incorporating a summary statistic originally developed for this task (Wakeley's ΨW). The method is applied to an empirical data set of three Australian avian taxon-pairs and a result of simultaneous divergence with some subsequent gene flow is inferred.

Conclusions

To retain flexibility and compatibility with existing bioinformatics tools, MTML-msBayes is a pipeline software package consisting of Perl, C and R programs that are executed via the command line. Source code and binaries are available for download at http://msbayes.sourceforge.net/ under an open source license (GNU Public License).",2011-01-03 +22520469,Simulating a base population in honey bee for molecular genetic studies.,"

Background

Over the past years, reports have indicated that honey bee populations are declining and that infestation by an ecto-parasitic mite (Varroa destructor) is one of the main causes. Selective breeding of resistant bees can help to prevent losses due to the parasite, but it requires that a robust breeding program and genetic evaluation are implemented. Genomic selection has emerged as an important tool in animal breeding programs and simulation studies have shown that it yields more accurate breeding value estimates, higher genetic gain and low rates of inbreeding. Since genomic selection relies on marker data, simulations conducted on a genomic dataset are a pre-requisite before selection can be implemented. Although genomic datasets have been simulated in other species undergoing genetic evaluation, simulation of a genomic dataset specific to the honey bee is required since this species has a distinct genetic and reproductive biology. Our software program was aimed at constructing a base population by simulating a random mating honey bee population. A forward-time population simulation approach was applied since it allows modeling of genetic characteristics and reproductive behavior specific to the honey bee.

Results

Our software program yielded a genomic dataset for a base population in linkage disequilibrium. In addition, information was obtained on (1) the position of markers on each chromosome, (2) allele frequency, (3) χ(2) statistics for Hardy-Weinberg equilibrium, (4) a sorted list of markers with a minor allele frequency less than or equal to the input value, (5) average r(2) values of linkage disequilibrium between all simulated marker loci pair for all generations and (6) average r2 value of linkage disequilibrium in the last generation for selected markers with the highest minor allele frequency.

Conclusion

We developed a software program that takes into account the genetic and reproductive biology specific to the honey bee and that can be used to constitute a genomic dataset compatible with the simulation studies necessary to optimize breeding programs. The source code together with an instruction file is freely accessible at http://msproteomics.org/Research/Misc/honeybeepopulationsimulator.html.",2012-06-27 +21863443,Chloroplast phenomics: systematic phenotypic screening of chloroplast protein mutants in Arabidopsis.,"As part of a project to analyze chloroplast functional networks systematically, we have subjected mutants in >3,200 nuclear genes predicted to encode chloroplast-targeted proteins in Arabidopsis thaliana (http://www.plastid.msu.edu) to parallel phenotypic assays. Detailed methods are presented for the various assays being used in this project to study chloroplast biology. These include morphological analysis of plants, chloroplasts, and seeds using controlled vocabulary. Metabolites synthesized in the chloroplast such as starch, amino acids, and fatty acids are analyzed in groups according to their chemical properties. As an indicator for the relative composition of seed storage oil and proteins, the carbon and nitrogen contents are determined by an elemental analyzer. The methods in this chapter describe how the assays are configured to run in relatively high throughput, maximizing data quality.",2011-01-01 +21177990,ASSIMILATOR: a new tool to inform selection of associated genetic variants for functional studies.,"

Motivation

Fine-mapping experiments from genome-wide association studies (GWAS) are underway for many complex diseases. These are likely to identify a number of putative causal variants, which cannot be separated further in terms of strength of genetic association due to linkage disequilibrium. The challenge will be selecting which variant to prioritize for subsequent expensive functional studies. A wealth of functional information generated from wet lab experiments now exists but cannot be easily interrogated by the user. Here, we describe a program designed to quickly assimilate this data called ASSIMILATOR and validate the method by interrogating two regions to show its effectiveness.

Availability

http://www.medicine.manchester.ac.uk/musculoskeletal/research/arc/genetics/bioinformatics/assimilator/.",2011-01-01 +21991593,RAD in the realm of next-generation sequencing technologies.,"The first North American RAD Sequencing and Genomics Symposium, sponsored by Floragenex (http://www.floragenex.com/radmeeting/), took place in Portland, Oregon (USA) on 19 April 2011. This symposium was convened to promote and discuss the use of restriction-site-associated DNA (RAD) sequencing technologies. RAD sequencing is one of several strategies recently developed to increase the power of data generated via short-read sequencing technologies by reducing their complexity (Baird et al. 2008; Huang et al. 2009; Andolfatto et al. 2011; Elshire et al. 2011). RAD sequencing, as a form of genotyping by sequencing, has been effectively applied in genetic mapping and quantitative trait loci (QTL) analyses in a range of organisms including nonmodel, genetically highly heterogeneous organisms (Table 1; Baird et al. 2008; Baxter et al. 2011; Chutimanitsakun et al. 2011; Pfender et al. 2011). RAD sequencing has recently found applications in phylogeography (Emerson et al. 2010) and population genomics (Hohenlohe et al. 2010). Considering the diversity of talks presented during this meeting, more developments are to be expected in the very near future.",2011-09-01 +21705106,TimeLapseAnalyzer: multi-target analysis for live-cell imaging and time-lapse microscopy.,"The direct observation of cells over time using time-lapse microscopy can provide deep insights into many important biological processes. Reliable analyses of motility, proliferation, invasive potential or mortality of cells are essential to many studies involving live cell imaging and can aid in biomarker discovery and diagnostic decisions. Given the vast amount of image- and time-series data produced by modern microscopes, automated analysis is a key feature to capitalize the potential of time-lapse imaging devices. To provide fast and reproducible analyses of multiple aspects of cell behaviour, we developed TimeLapseAnalyzer. Apart from general purpose image enhancements and segmentation procedures, this extensible, self-contained, modular cross-platform package provides dedicated modalities for fast and reliable analysis of multi-target cell tracking, scratch wound healing analysis, cell counting and tube formation analysis in high throughput screening of live-cell experiments. TimeLapseAnalyzer is freely available (MATLAB, Open Source) at http://www.informatik.uni-ulm.de/ni/mitarbeiter/HKestler/tla.",2011-06-25 +21159117,Parasitological efficacy of antimalarials in the treatment and prevention of falciparum malaria in pregnancy 1998 to 2009: a systematic review.,"

Background

Pregnant women are at increased risk from malaria. Resistance to all classes of antimalarials has affected the treatment and prevention of malaria in pregnancy.

Objectives

To review the therapeutic efficacy of antimalarials used for treatment and intermittent preventive treatment (IPT) in pregnancy.

Search strategy

We searched MEDLINE and the Cochrane Library between January 1998 and December 2009 for publications using the medical subject headings: efficacy, antimalarials, malaria, pregnancy, pharmacokinetics, treatment, IPT and placenta positive. In May 2010 we searched the register of clinical trials (http://clinicaltrials.gov/) and of WHO (http://apps.who.int/trialsearch/) using 'malaria', and 'pregnancy' and 'treatment'.

Selection criteria

We identified 233 abstracts, reviewed 83 full text articles and included 60 studies.

Data collection and analysis

Two authors entered extracted data to an excel spreadsheet.

Main results

Parasitological failure rates, placenta positivity rates (assessed by microscopy) or both were reported in 44% (21/48), 46% (22/48) and 10% (5/48) of articles, respectively. Most pharmacokinetic studies (9/12) suggested dose optimisation. In 23 treatment studies 17 different antimalarial drugs were delivered in 53 study arms; 43.4% (23/53) reported a failure rate of < 5%; 83.3% of sulphadoxine-pyrimethamine (SP) arms and 9% of artemisinin combination therapy (ACT) arms had failure rates ≥ 10%. Placenta-positive rates (mostly reported in the context of IPT in pregnancy) were > 10% in 68% (23/34) of SP trial arms and > 15% in all seven chloroquine arms. The ACT provided lower parasitological failure and gametocyte carriage rates.

Author's conclusions

Drugs used in pregnancy should aim for 95% efficacy but many currently deployed regimens are associated with much lower cure rates.",2011-01-01 +21989326,A hidden Markov model for copy number variant prediction from whole genome resequencing data.,"

Motivation

Copy Number Variants (CNVs) are important genetic factors for studying human diseases. While high-throughput whole genome re-sequencing provides multiple lines of evidence for detecting CNVs, computational algorithms need to be tailored for different type or size of CNVs under different experimental designs.

Results

To achieve optimal power and resolution of detecting CNVs at low depth of coverage, we implemented a Hidden Markov Model that integrates both depth of coverage and mate-pair relationship. The novelty of our algorithm is that we infer the likelihood of carrying a deletion jointly from multiple mate pairs in a region without the requirement of a single mate pairs being obvious outliers. By integrating all useful information in a comprehensive model, our method is able to detect medium-size deletions (200-2000bp) at low depth (<10× per sample). We applied the method to simulated data and demonstrate the power of detecting medium-size deletions is close to theoretical values.

Availability

A program implemented in Java, Zinfandel, is available at http://www.cs.columbia.edu/~itsik/zinfandel/",2011-07-28 +23218662,"Intraoperative ultrasound guidance for palpable breast cancer excision (COBALT trial): a multicentre, randomised controlled trial.","

Background

Breast-conserving surgery for palpable breast cancer is associated with tumour-involved margins in up to 41% of cases and excessively large excision volumes. Ultrasound-guided surgery has the potential to resolve both of these problems, thereby improving surgical accuracy for palpable breast cancer. We aimed to compare ultrasound-guided surgery with the standard for palpable breast cancer-palpation-guided surgery-with respect to margin status and extent of healthy breast tissue resection.

Methods

In this randomised controlled trial, patients with palpable T1-T2 invasive breast cancer were recruited from six medical centres in the Netherlands between October, 2010, and March, 2012. Eligible participants were randomly assigned to either ultrasound-guided surgery or palpation-guided surgery in a 1:1 ratio via a computer-generated random sequence and were stratified by study centre. Patients and investigators were aware of treatment assignments. Primary outcomes were surgical margin involvement, need for additional treatment, and excess healthy tissue resection (defined with a calculated resection ratio derived from excision volume and tumour diameter). Data were analysed by intention to treat. This trial is registered at http://www.TrialRegister.nl, number NTR2579.

Findings

134 patients were eligible for random allocation. Two (3%) of 65 patients allocated ultrasound-guided surgery had tumour-involved margins compared with 12 (17%) of 69 who were assigned palpation-guided surgery (difference 14%, 95% CI 4-25; p=0·0093). Seven (11%) patients who received ultrasound-guided surgery and 19 (28%) of those who received palpation-guided surgery required additional treatment (17%, 3-30; p=0·015). Ultrasound-guided surgery also resulted in smaller excision volumes (38 [SD 26] vs 57 [41] cm(3); difference 19 cm(3), 95% CI 7-31; p=0·002) and a reduced calculated resection ratio (1·0 [SD 0·5] vs 1·7 [1·2]; difference 0·7, 95% CI 0·4-1·0; p=0·0001) compared with palpation-guided surgery.

Interpretation

Compared with palpation-guided surgery, ultrasound-guided surgery can significantly lower the proportion of tumour-involved resection margins, thus reducing the need for re-excision, mastectomy, and radiotherapy boost. By achieving optimum resection volumes, ultrasound-guided surgery reduces unnecessary resection of healthy breast tissue and could contribute to improved cosmetic results and quality of life.

Funding

Dutch Pink Ribbon Foundation, Osinga-Kluis Foundation, Toshiba Medical Systems.",2012-12-04 +22331654,Resting state cortical electroencephalographic rhythms are related to gray matter volume in subjects with mild cognitive impairment and Alzheimer's disease.,"Cortical gray matter volume and resting state cortical electroencephalographic rhythms are typically abnormal in subjects with amnesic mild cognitive impairment (MCI) and Alzheimer's disease (AD). Here we tested the hypothesis that in amnesic MCI and AD subjects, abnormalities of EEG rhythms are a functional reflection of cortical atrophy across the disease. Eyes-closed resting state EEG data were recorded in 57 healthy elderly (Nold), 102 amnesic MCI, and 108 AD patients. Cortical gray matter volume was indexed by magnetic resonance imaging recorded in the MCI and AD subjects according to Alzheimer's disease neuroimaging initiative project (http://www.adni-info.org/). EEG rhythms of interest were delta (2-4 Hz), theta (4-8 Hz), alpha1 (8-10.5 Hz), alpha2 (10.5-13 Hz), beta1 (13-20 Hz), beta2 (20-30 Hz), and gamma (30-40 Hz). These rhythms were indexed by LORETA. Compared with the Nold, the MCI showed a decrease in amplitude of alpha 1 sources. With respect to the Nold and MCI, the AD showed an amplitude increase of delta sources, along with a strong amplitude reduction of alpha 1 sources. In the MCI and AD subjects as a whole group, the lower the cortical gray matter volume, the higher the delta sources, the lower the alpha 1 sources. The better the score to cognitive tests the higher the gray matter volume, the lower the pathological delta sources, and the higher the alpha sources. These results suggest that in amnesic MCI and AD subjects, abnormalities of resting state cortical EEG rhythms are not epiphenomena but are strictly related to neurodegeneration (atrophy of cortical gray matter) and cognition.",2012-02-14 +22199254,Hobbes: optimized gram-based methods for efficient read alignment.,"Recent advances in sequencing technology have enabled the rapid generation of billions of bases at relatively low cost. A crucial first step in many sequencing applications is to map those reads to a reference genome. However, when the reference genome is large, finding accurate mappings poses a significant computational challenge due to the sheer amount of reads, and because many reads map to the reference sequence approximately but not exactly. We introduce Hobbes, a new gram-based program for aligning short reads, supporting Hamming and edit distance. Hobbes implements two novel techniques, which yield substantial performance improvements: an optimized gram-selection procedure for reads, and a cache-efficient filter for pruning candidate mappings. We systematically tested the performance of Hobbes on both real and simulated data with read lengths varying from 35 to 100 bp, and compared its performance with several state-of-the-art read-mapping programs, including Bowtie, BWA, mrsFast and RazerS. Hobbes is faster than all other read mapping programs we have tested while maintaining high mapping quality. Hobbes is about five times faster than Bowtie and about 2-10 times faster than BWA, depending on read length and error rate, when asked to find all mapping locations of a read in the human genome within a given Hamming or edit distance, respectively. Hobbes supports the SAM output format and is publicly available at http://hobbes.ics.uci.edu.",2011-12-22 +22990455,Pediatric Acute Lung Injury Epidemiology and Natural History study: Incidence and outcome of the acute respiratory distress syndrome in children.,"

Objectives

The incidence and outcome of the acute respiratory distress syndrome in children are not well-known, especially under current ventilatory practices. The goal of this study was to determine the incidence, etiology, and outcome of acute respiratory distress syndrome in the pediatric population in the setting of lung protective ventilation.

Design

A 1-yr, prospective, multicenter, observational study in 12 geographical areas of Spain (serving a population of 3.77 million ≤ 15 yrs of age) covered by 21 pediatric intensive care units.

Subjects

All consecutive pediatric patients receiving invasive mechanical ventilation and meeting American-European Consensus Criteria for acute respiratory distress syndrome.

Interventions

None.

Measurements and main results

Data on ventilatory management, gas exchange, hemodynamics, and organ dysfunction were collected. A total of 146 mechanically ventilated patients fulfilled the acute respiratory distress syndrome definition, representing a incidence of 3.9/100,000 population ≤ 15 yrs of age/yr. Pneumonia and sepsis were the most common causes of acute respiratory distress syndrome. At the time of meeting acute respiratory distress syndrome criteria, mean PaO2/FIO2 was 99 mm Hg ± 41 mm Hg, mean tidal volume was 7.6 mL/kg ± 1.8 mL/kg predicted body weight, mean plateau pressure was 27 cm H2O ± 6 cm H2O, and mean positive end-expiratory pressure was 8.9 cm ± 2.9 cm H2O. Overall pediatric intensive care unit and hospital mortality were 26% (95% confidence interval 19.6-33.7) and 27.4% (95% confidence interval 20.8-35.1), respectively. At 24 hrs, after the assessment of oxygenation under standard ventilatory settings, 118 (80.8%) patients continued to meet acute respiratory distress syndrome criteria (PaO2/FIO2 104 mm Hg ± 36 mm Hg; pediatric intensive care units mortality 30.5%), whereas 28 patients (19.2%) had a PaO2/FIO2 >200 mm Hg (pediatric intensive care units mortality 7.1%) (p = .014).

Conclusions

This is the largest study to estimate prospectively the pediatric population-based acute respiratory distress syndrome incidence and the first incidence study performed during the routine application of lung protective ventilation in children. Our findings support a lower acute respiratory distress syndrome incidence and mortality than those reported for adults. PaO2/FIO2 ratios at acute respiratory distress syndrome onset and at 24 hrs after onset were helpful in defining groups at greater risk of dying (clinical trials registered with http://www.clinicaltrials.gov; NCT 01142544).",2012-12-01 +22194717,Cognitive consilience: primate non-primary neuroanatomical circuits underlying cognition.,"Interactions between the cerebral cortex, thalamus, and basal ganglia form the basis of cognitive information processing in the mammalian brain. Understanding the principles of neuroanatomical organization in these structures is critical to understanding the functions they perform and ultimately how the human brain works. We have manually distilled and synthesized hundreds of primate neuroanatomy facts into a single interactive visualization. The resulting picture represents the fundamental neuroanatomical blueprint upon which cognitive functions must be implemented. Within this framework we hypothesize and detail 7 functional circuits corresponding to psychological perspectives on the brain: consolidated long-term declarative memory, short-term declarative memory, working memory/information processing, behavioral memory selection, behavioral memory output, cognitive control, and cortical information flow regulation. Each circuit is described in terms of distinguishable neuronal groups including the cerebral isocortex (9 pyramidal neuronal groups), parahippocampal gyrus and hippocampus, thalamus (4 neuronal groups), basal ganglia (7 neuronal groups), metencephalon, basal forebrain, and other subcortical nuclei. We focus on neuroanatomy related to primate non-primary cortical systems to elucidate the basis underlying the distinct homotypical cognitive architecture. To display the breadth of this review, we introduce a novel method of integrating and presenting data in multiple independent visualizations: an interactive website (http://www.frontiersin.org/files/cognitiveconsilience/index.html) and standalone iPhone and iPad applications. With these tools we present a unique, annotated view of neuroanatomical consilience (integration of knowledge).",2011-12-20 +21883128,A quality assurance programme for cell salvage in cardiac surgery.,"At the same time as cell salvage was introduced into our institution for all patients undergoing cardiac surgery with cardiopulmonary bypass, we established a supporting programme of quality assurance to reassure clinicians regarding safety and efficacy. Data collected in patients operated on between 2001 and 2007 included pre- and post-wash heparin concentration, haemoglobin concentration and free haemoglobin concentration. Cell salvage was used in 6826 out of a total of 7243 patients (94%). Post-wash heparin concentration was consistently low (always < 0.4 IU.ml(-1)). There was a significant decrease in post-wash haemoglobin concentration in 2003 compared to 2001, from a median (IQR [range]) of 19.6 (16.7-22.2 [12.9-25.5]) g.dl(-1) to 17.5 (13.6-20.8 [12.6-23.7]) g.dl(-1) (p < 0.015). In addition, there was a significant increase in free plasma haemoglobin in 2006 compared to 2001, from 0.5 (0.3-0.8 [0.1-2.6]) g.l(-1) to 0.8 (0.3-1.4 [0.3-5.2]) g.l(-1) (p < 0.001). This programme led to the detection of a change in operator behaviour in 2003 and progressive machine deterioration resulting in appropriate fleet replacement in 2006. You can respond to this article at http://www.anaesthesiacorrespondence.com.",2011-08-24 +21471014,Correcting errors in short reads by multiple alignments.,"

Motivation

Current sequencing technologies produce a large number of erroneous reads. The sequencing errors present a major challenge in utilizing the data in de novo sequencing projects as assemblers have difficulties in dealing with errors.

Results

We present Coral which corrects sequencing errors by forming multiple alignments. Unlike previous tools for error correction, Coral can utilize also bases distant from the error in the correction process because the whole read is present in the alignment. Coral is easily adjustable to reads produced by different sequencing technologies like Illumina Genome Analyzer and Roche/454 Life Sciences sequencing platforms because the sequencing error model can be defined by the user. We show that our method is able to reduce the error rate of reads more than previous methods.

Availability

The source code of Coral is freely available at http://www.cs.helsinki.fi/u/lmsalmel/coral/.",2011-04-05 +21681421,Selection of 29 highly informative InDel markers for human identification and paternity analysis in Chinese Han population by the SNPlex genotyping system.,"The interest of forensic researchers in single nucleotide polymorphism (SNP) has been attracted because of its potential advantages, such as low mutation rates, amenable to high-throughput automated platform and the improved application in the analysis of degraded samples. In this paper, 29 highly informative insertion/deletion (InDel, a special kind of SNP) markers were selected from the dbSNP ( http://www.ncbi.nlm.nih.gov/SNP/ ) according to the given criteria. 109 unrelated Chinese Han subjects were genotyped for the 29 InDels with SNPlex genotyping system. The allele frequency data revealed that the combined power of discrimination for the 29 InDel markers was 0.999999999990867 and the combined probability of paternity exclusion (PE) was 0.9930. Sensitivity studies were performed to evaluate the flexibility of the SNPlex genotyping system on the set of 29 InDels. Highly reproducible results could be obtained with 40-100 ng genomic DNA and the proportion of total allele drop-in was significantly increased when the amount of DNA added to PCR was lower than 35 ng. These results suggested that the set of 29 InDels was useful in paternity analysis or human identification in the future.",2011-06-18 +22935204,Molecular correlates and prognostic significance of SATB1 expression in colorectal cancer.,"

Background

Special AT-rich sequence-binding protein 1 (SATB1) is a global gene regulator that has been reported to confer malignant behavior and associate with poor prognosis in several cancer forms. SATB1 expression has been demonstrated to correlate with unfavourable tumour characteristics in rectal cancer, but its association with clinical outcome in colorectal cancer (CRC) remains unclear. In this study, we examined the prognostic impact of SATB1 expression in CRC, and its association with important molecular characteristics; i.e. beta-catenin overexpression, microsatellite instability (MSI) screening status, and SATB2 expression.

Methods

Immunohistochemical expression of SATB1 and beta-catenin was assessed in tissue microarrays with tumours from 529 incident CRC cases in the prospective population-based Malmö Diet and Cancer Study, previously analysed for SATB2 expression and MSI screening status. Spearmans Rho and Chi-Square tests were used to explore correlations between SATB1 expression, clinicopathological and investigative parameters. Kaplan Meier analysis and Cox proportional hazards modelling were used to explore the impact of SATB1 expression on cancer specific survival (CSS) and overall survival (OS).

Results

SATB1 was expressed in 222 (42%) CRC cases and negative, or sparsely expressed, in adjacent colorectal mucosa (n = 16). SATB1 expression was significantly associated with microsatellite stable tumours (p < 0.001), beta-catenin overexpression (p < 0.001) and SATB2 expression (p < 0.001). While not prognostic in the full cohort, SATB1 expression was significantly associated with poor prognosis in SATB2 negative tumours (HR = 2.63; 95% CI 1.46-4.71; p(interaction) = 0.011 for CSS and HR = 2.31; 95% CI 1.32-4.04; p(interaction) = 0.015 for OS), remaining significant in multivariable analysis.

Conclusions

The results of this study demonstrate that SATB1 expression in CRC is significantly associated with beta-catenin overexpression, microsatellite stability and SATB2 expression. Furthermore, SATB1 expression is a factor of poor prognosis in SATB2 negative tumours. Altogether, these data indicate an important role for SATB1 in colorectal carcinogenesis and suggest prognostically antagonistic effects of SATB1 and SATB2. The mechanistic basis for these observations warrants further study.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1922643082772076.",2012-08-30 +21784792,GenSSI: a software toolbox for structural identifiability analysis of biological models.,"

Summary

Mathematical modeling has a key role in systems biology. Model building is often regarded as an iterative loop involving several tasks, among which the estimation of unknown parameters of the model from a certain set of experimental data is of central importance. This problem of parameter estimation has many possible pitfalls, and modelers should be very careful to avoid them. Many of such difficulties arise from a fundamental (yet often overlooked) property: the so-called structural (or a priori) identifiability, which considers the uniqueness of the estimated parameters. Obviously, the structural identifiability of any tentative model should be checked at the beginning of the model building loop. However, checking this property for arbitrary non-linear dynamic models is not an easy task. Here we present a software toolbox, GenSSI (Generating Series for testing Structural Identifiability), which enables non-expert users to carry out such analysis. The toolbox runs under the popular MATLAB environment and is accompanied by detailed documentation and relevant examples.

Availability

The GenSSI toolbox and the related documentation are available at http://www.iim.csic.es/%7Egenssi.

Contact

ebalsa@iim.csic.es.",2011-07-22 +21672956,BioPortal: enhanced functionality via new Web services from the National Center for Biomedical Ontology to access and use ontologies in software applications.,"The National Center for Biomedical Ontology (NCBO) is one of the National Centers for Biomedical Computing funded under the NIH Roadmap Initiative. Contributing to the national computing infrastructure, NCBO has developed BioPortal, a web portal that provides access to a library of biomedical ontologies and terminologies (http://bioportal.bioontology.org) via the NCBO Web services. BioPortal enables community participation in the evaluation and evolution of ontology content by providing features to add mappings between terms, to add comments linked to specific ontology terms and to provide ontology reviews. The NCBO Web services (http://www.bioontology.org/wiki/index.php/NCBO_REST_services) enable this functionality and provide a uniform mechanism to access ontologies from a variety of knowledge representation formats, such as Web Ontology Language (OWL) and Open Biological and Biomedical Ontologies (OBO) format. The Web services provide multi-layered access to the ontology content, from getting all terms in an ontology to retrieving metadata about a term. Users can easily incorporate the NCBO Web services into software applications to generate semantically aware applications and to facilitate structured data collection.",2011-06-14 +22016406,Predicting residue-residue contacts using random forest models.,"

Motivation

Protein residue-residue contact prediction can be useful in predicting protein 3D structures. Current algorithms for such a purpose leave room for improvement.

Results

We develop ProC_S3, a set of Random Forest algorithm-based models, for predicting residue-residue contact maps. The models are constructed based on a collection of 1490 non-redundant, high-resolution protein structures using >1280 sequence-based features. A new amino acid residue contact propensity matrix and a new set of seven amino acid groups based on contact preference are developed and used in ProC_S3. ProC_S3 delivers a 3-fold cross-validated accuracy of 26.9% with coverage of 4.7% for top L/5 predictions (L is the number of residues in a protein) of long-range contacts (sequence separation ≥24). Further benchmark tests deliver an accuracy of 29.7% and coverage of 5.6% for an independent set of 329 proteins. In the recently completed Ninth Community Wide Experiment on the Critical Assessment of Techniques for Protein Structure Prediction (CASP9), ProC_S3 is ranked as No. 1, No. 3, and No. 2 accuracies in the top L/5, L/10 and best 5 predictions of long-range contacts, respectively, among 18 automatic prediction servers.

Availability

http://www.abl.ku.edu/proc/proc_s3.html.

Contact

jwfang@ku.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-20 +22270496,"Interactive, computer-based pediatric chest atlas.","

Objectives

The authors created a computer-based, interactive atlas of pediatric chest radiographs to facilitate (1) understanding of normal variants and (2) interpretation of pediatric chest radiographs in the acute setting.

Methods

Seventy-three normal pediatric chest radiographs were selected for inclusion in the atlas by consensus after review by 3 pediatric radiologists. Sixteen abnormal pediatric chest radiographs showing a variety of abnormalities (infection, malignancy, congenital abnormalities, foreign body, and acquired disease), as well as 4 normal adult chest radiographs, were also included in the atlas. Images for the atlas were derived from Digital Imaging and Communications in Medicine-compliant data. The atlas software was written in C# and offers features of a picture archiving and communication system viewer. In addition, the atlas offers annotated series that describe particular radiographic features of normal variants and disease.

Results

The digital, interactive pediatric chest atlas displays normal chest radiographs of children aged 7 days to 17.8 years, as well as 4 normal adult chest radiographs and 16 abnormal pediatric chest radiographs. The digital interactive format of the atlas allows for (1) easy manipulation of atlas cases and (2) direct comparison between normal atlas cases and provided abnormal cases, as well as (3) the potential for direct comparison with images displayed on an institution's picture archiving and communication system. The atlas is available for free download at http://www.seattlechildrens.org/radiologyeducation/pediatric-chest.

Conclusions

Improved interpretation of pediatric chest radiographs in the acute setting may be facilitated by a comprehensive, computer-based, pediatric chest atlas.",2012-02-01 +22559942,Tavaxy: integrating Taverna and Galaxy workflows with cloud computing support.,"

Background

Over the past decade the workflow system paradigm has evolved as an efficient and user-friendly approach for developing complex bioinformatics applications. Two popular workflow systems that have gained acceptance by the bioinformatics community are Taverna and Galaxy. Each system has a large user-base and supports an ever-growing repository of application workflows. However, workflows developed for one system cannot be imported and executed easily on the other. The lack of interoperability is due to differences in the models of computation, workflow languages, and architectures of both systems. This lack of interoperability limits sharing of workflows between the user communities and leads to duplication of development efforts.

Results

In this paper, we present Tavaxy, a stand-alone system for creating and executing workflows based on using an extensible set of re-usable workflow patterns. Tavaxy offers a set of new features that simplify and enhance the development of sequence analysis applications: It allows the integration of existing Taverna and Galaxy workflows in a single environment, and supports the use of cloud computing capabilities. The integration of existing Taverna and Galaxy workflows is supported seamlessly at both run-time and design-time levels, based on the concepts of hierarchical workflows and workflow patterns. The use of cloud computing in Tavaxy is flexible, where the users can either instantiate the whole system on the cloud, or delegate the execution of certain sub-workflows to the cloud infrastructure.

Conclusions

Tavaxy reduces the workflow development cycle by introducing the use of workflow patterns to simplify workflow creation. It enables the re-use and integration of existing (sub-) workflows from Taverna and Galaxy, and allows the creation of hybrid workflows. Its additional features exploit recent advances in high performance cloud computing to cope with the increasing data size and complexity of analysis.The system can be accessed either through a cloud-enabled web-interface or downloaded and installed to run within the user's local environment. All resources related to Tavaxy are available at http://www.tavaxy.org.",2012-05-04 +21216778,X-MATE: a flexible system for mapping short read data.,"

Summary

Accurate and complete mapping of short-read sequencing to a reference genome greatly enhances the discovery of biological results and improves statistical predictions. We recently presented RNA-MATE, a pipeline for the recursive mapping of RNA-Seq datasets. With the rapid increase in genome re-sequencing projects, progression of available mapping software and the evolution of file formats, we now present X-MATE, an updated version of RNA-MATE, capable of mapping both RNA-Seq and DNA datasets and with improved performance, output file formats, configuration files, and flexibility in core mapping software.

Availability

Executables, source code, junction libraries, test data and results and the user manual are available from http://grimmond.imb.uq.edu.au/X-MATE/.",2011-01-06 +21930509,ecoPrimers: inference of new DNA barcode markers from whole genome sequence analysis.,"Using non-conventional markers, DNA metabarcoding allows biodiversity assessment from complex substrates. In this article, we present ecoPrimers, a software for identifying new barcode markers and their associated PCR primers. ecoPrimers scans whole genomes to find such markers without a priori knowledge. ecoPrimers optimizes two quality indices measuring taxonomical range and discrimination to select the most efficient markers from a set of reference sequences, according to specific experimental constraints such as marker length or specifically targeted taxa. The key step of the algorithm is the identification of conserved regions among reference sequences for anchoring primers. We propose an efficient algorithm based on data mining, that allows the analysis of huge sets of sequences. We evaluate the efficiency of ecoPrimers by running it on three different sequence sets: mitochondrial, chloroplast and bacterial genomes. Identified barcode markers correspond either to barcode regions already in use for plants or animals, or to new potential barcodes. Results from empirical experiments carried out on a promising new barcode for analyzing vertebrate diversity fully agree with expectations based on bioinformatics analysis. These tests demonstrate the efficiency of ecoPrimers for inferring new barcodes fitting with diverse experimental contexts. ecoPrimers is available as an open source project at: http://www.grenoble.prabi.fr/trac/ecoPrimers.",2011-09-19 +22292898,Genome-wide landscape of liver X receptor chromatin binding and gene regulation in human macrophages.,"

Background

The liver X receptors (LXRs) are oxysterol sensing nuclear receptors with multiple effects on metabolism and immune cells. However, the complete genome-wide cistrome of LXR in cells of human origin has not yet been provided.

Results

We performed ChIP-seq in phorbol myristate acetate-differentiated THP-1 cells (macrophage-type) after stimulation with the potent synthetic LXR ligand T0901317 (T09). Microarray gene expression analysis was performed in the same cellular model. We identified 1357 genome-wide LXR locations (FDR < 1%), of which 526 were observed after T09 treatment. De novo analysis of LXR binding sequences identified a DR4-type element as the major motif. On mRNA level T09 up-regulated 1258 genes and repressed 455 genes. Our results show that LXR actions are focused on 112 genomic regions that contain up to 11 T09 target genes per region under the control of highly stringent LXR binding sites with individual constellations for each region. We could confirm that LXR controls lipid metabolism and transport and observed a strong association with apoptosis-related functions.

Conclusions

This first report on genome-wide binding of LXR in a human cell line provides new insights into the transcriptional network of LXR and its target genes with their link to physiological processes, such as apoptosis.The gene expression microarray and sequence data have been submitted collectively to the NCBI Gene Expression Omnibus http://www.ncbi.nlm.nih.gov/geo under accession number GSE28319.",2012-01-31 +22307104,"Evaluating the travel, physical activity and carbon impacts of a 'natural experiment' in the provision of new walking and cycling infrastructure: methods for the core module of the iConnect study.","

Introduction

Improving infrastructure to support walking and cycling is often regarded as fundamental to encouraging their widespread uptake. However, there is little evidence that specific provision of this kind has led to a significant increase in walking or cycling in practice, let alone wider impacts such as changes in overall physical activity or carbon emissions. Connect2 is a major new project that aims to promote walking and cycling in the UK by improving local pedestrian and cycle routes. It therefore provides a useful opportunity to contribute new evidence in this field by means of a natural experimental study.

Methods and analysis

iConnect is an independent study that aims to integrate the perspectives of public health and transport research on the measurement and evaluation of the travel, physical activity and carbon impacts of the Connect2 programme. In this paper, the authors report the study design and methods for the iConnect core module. This comprised a cohort study of residents living within 5 km of three case study Connect2 projects in Cardiff, Kenilworth and Southampton, supported by a programme of qualitative interviews with key informants about the projects. Participants were asked to complete postal questionnaires, repeated before and after the opening of the new infrastructure, which collected data on demographic and socioeconomic characteristics, travel, car fuel purchasing and physical activity, and potential psychosocial and environmental correlates and mediators of those behaviours. In the absence of suitable no-intervention control groups, the study design drew on heterogeneity in exposure both within and between case study samples to provide for a counterfactual.

Ethics and dissemination

The study was approved by the University of Southampton Research Ethics Committee. The findings will be disseminated through academic presentations, peer-reviewed publications and the study website (http://www.iconnect.ac.uk) and by means of a national seminar at the end of the study.",2012-02-02 +22647358,Cross talk between microRNA and coding cancer genes.,"MicroRNAs (miRNAs) are a class of noncoding RNAs (ncRNAs) and posttranscriptional gene regulators shown to be involved in pathogenesis of all types of human cancers. Their aberrant expression as tumor suppressors can lead to cancerogenesis by inhibiting malignant potential, or when acting as oncogenes, by activating malignant potential. Differential expression of miRNA genes in tumorous tissues can occur owing to several factors including positional effects when mapping to cancer-associated genomic regions, epigenetic mechanisms, and malfunctioning of the miRNA processing machinery, all of which can contribute to a complex miRNA-mediated gene network misregulation. They may increase or decrease expression of protein-coding genes, can target 3'-UTR or other genic regions (5'-UTR, promoter, coding sequences), and can function in various subcellular compartments, developmental, and metabolic processes. Because expanding research on miRNA-cancer associations has already produced large amounts of data, our main objective here was to summarize main findings and critically examine the intricate network connecting the miRNAs and coding genes in regulatory mechanisms and their function and phenotypic consequences for cancer. By examining such interactions, we aimed to gain insights for the development of new diagnostic markers as well as identification of potential venues for more selective tumor therapy. To enable efficient examination of the main past and current miRNA discoveries, we developed a Web-based miRNA timeline tool that will be regularly updated (http://www.integratomics-time.com/miRNA_timeline). Further development of this tool will be directed at providing additional analyses to clarify complex network interactions between miRNAs, other classes of ncRNAs, and protein-coding genes and their involvement in development of diseases including cancer. This tool therefore provides curated relevant information about the miRNA basic research and therapeutic application all at hand on one site to help researchers and clinicians in making informed decision about their miRNA cancer-related research or clinical practice.",2012-05-01 +21757466,NASP: a parallel program for identifying evolutionarily conserved nucleic acid secondary structures from nucleotide sequence alignments.,"

Summary

Many natural nucleic acid sequences have evolutionarily conserved secondary structures with diverse biological functions. A reliable computational tool for identifying such structures would be very useful in guiding experimental analyses of their biological functions. NASP (Nucleic Acid Structure Predictor) is a program that takes into account thermodynamic stability, Boltzmann base pair probabilities, alignment uncertainty, covarying sites and evolutionary conservation to identify biologically relevant secondary structures within multiple sequence alignments. Unique to NASP is the consideration of all this information together with a recursive permutation-based approach to progressively identify and list the most conserved probable secondary structures that are likely to have the greatest biological relevance. By focusing on identifying only evolutionarily conserved structures, NASP forgoes the prediction of complete nucleotide folds but outperforms various other secondary structure prediction methods in its ability to selectively identify actual base pairings.

Availability

Downloable and web-based versions of NASP are freely available at http://web.cbio.uct.ac.za/~yves/nasp_portal.php

Contact

yves@cbio.uct.ac.za

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-14 +21208987,flowPhyto: enabling automated analysis of microscopic algae from continuous flow cytometric data.,"

Motivation

Flow cytometry is a widely used technique among biologists to study the abundances of populations of microscopic algae living in aquatic environments. A new generation of high-frequency flow cytometers collects up to several hundred samples per day and can run continuously for several weeks. Automated computational methods are needed to analyze the different phytoplankton populations present in each sample. Software packages in the programming environment R provide powerful tools for conducting such analyses.

Results

We introduce flowPhyto, an R package that performs aggregate statistics on virtually unlimited collections of raw flow cytometry files and provides a memory efficient, parallelized solution for analyzing high-throughput flow cytometric data.

Availability

Freely accessible at http://www.bioconductor.org.",2011-01-05 +21275939,Late-onset inversa recessive dystrophic epidermolysis bullosa caused by glycine substitutions in collagen type VII.,"Dystrophic epidermolysis bullosa (DEB) is a rare hereditary skin disorder caused by mutations in COL7A1, encoding collagen type VII.1 Clinical manifestations of COL7A1 mutations range from generalized skin blistering to mild localized blistering or nail dystrophy.2 The investigation of the molecular basis of DEB has revealed more than 540 different mutations that cannot entirely explain phenotypic variations (HGMD Professional 2010.3, https://portal.biobase-international. com/hgmd/). Inversa recessive DEB (RDEB-I) is a subtype characterized by generalized blistering in the neonatal period. The condition improves with age, and in adults blistering is restricted to intertriginous areas, and severe lesions of the oral and genital mucosa and nail changes occur in the majority of described patients.2 Recent data suggested that amino-acid substitutions affecting arginines or glycines at borders of collagenic subdomains might cause this phenotype.3 We report a German patient with an unusually mild RDEB-I harbouring compound heterozygous mutations in COL7A1.",2011-05-01 +22295750,Detection and 2-dimensional display of short tandem repeats based on signal decomposition.,This paper presents a wavelet-based Empirical Mode Decomposition (EMD) to detect short tandem repeats in DNA sequences. A wavelet subspace algorithm combined with EMD is introduced as a pre-processor and a Cross-Correlation Analysis (CCA) is applied as a post-processor to create subspaced Intrinsic Mode Functions (IMFs). The new proposed method is called the Empirical Mode and Wavelet Decomposition (EMWD). The algorithms can display the power spectral density in the two-dimensional frequency-time (f-t) plane efficiently for both very long signals and short signals. Simulations are applied on the real human DNA sequences from public data source Genbank (http://www.ncbi.nlm.nih.gov/Genbank/). Application of the EMWD algorithms to the short tandem repeat detection has achieved an averaged accuracy of 98.5%.,2011-01-01 +21877713,"AADS--an automated active site identification, docking, and scoring protocol for protein targets based on physicochemical descriptors.","We report here a robust automated active site detection, docking, and scoring (AADS) protocol for proteins with known structures. The active site finder identifies all cavities in a protein and scores them based on the physicochemical properties of functional groups lining the cavities in the protein. The accuracy realized on 620 proteins with sizes ranging from 100 to 600 amino acids with known drug active sites is 100% when the top ten cavity points are considered. These top ten cavity points identified are then submitted for an automated docking of an input ligand/candidate molecule. The docking protocol uses an all atom energy based Monte Carlo method. Eight low energy docked structures corresponding to different locations and orientations of the candidate molecule are stored at each cavity point giving 80 docked structures overall which are then ranked using an effective free energy function and top five structures are selected. The predicted structure and energetics of the complexes agree quite well with experiment when tested on a data set of 170 protein-ligand complexes with known structures and binding affinities. The AADS methodology is implemented on an 80 processor cluster and presented as a freely accessible, easy to use tool at http://www.scfbio-iitd.res.in/dock/ActiveSite_new.jsp .",2011-09-15 +22425540,LINGO1 and risk for essential tremor: results of a meta-analysis of rs9652490 and rs11856808.,"

Background/objectives

Recently, a genome-wide association study revealed a significant statistical association between LINGO1 rs9652490 and rs11856808 polymorphisms and the risk of developing essential tremor (ET) in Icelandic people. Because the results of further association studies were controversial, we conducted a meta-analysis including all the studies published on the risk of ET related with these polymorphisms.

Methods

The metaanalysis included 11 association studies between LINGO1 rs9652490 (3972 ET patients, 20,714 controls) and 7 association studies between LINGO1 rs11856808, and risk for ET (2076 ET patients, 18,792 controls), and was carried out by using the software Meta-Disc 1.1.1 (http://www.hrc.es/investigacion/metadisc.html; Unit of Clinical Statistics, Hospital Ramón y Cajal, Madrid, Spain). Heterogeneity between studies in terms of degree of association was tested using the Q-statistic.

Results

Global diagnostic odds-ratios (ORs) and 95% confidence intervals (CI) for rs9652490 and rs11856808 of the total series were, respectively, 1.17 (1.00-1.36) (p=0.069) and 1.20 (1.05-1.36) (p=0.016). After excluding data on Icelandic people of the discovery series (that was responsible of a high degree of heterogeneity for rs9652490 polymorphism), the ORs and CI were 1.10 (0.97-1.26) (p=0.063) and 1.12 (0.99-1.27) (p=0.034). Global ORs and 95% CI for rs9652490 and rs11856808 of familial ET patients were, respectively, 1.27 (1.03-1.57) (p=0.014) and 1.21 (1.10-1.44) (p=0.031).

Conclusions

The results of the meta-analysis suggest a relationship between LINGO1 rs11856808 polymorphism and the risk for ET and for familial ET, while rs9652490 polymorphism was only related with the risk for familial ET.",2012-03-17 +21311847,Developmental neurotoxicity testing: recommendations for developing alternative methods for the screening and prioritization of chemicals.,Developmental neurotoxicity testing (DNT) is perceived by many stakeholders to be an area in critical need of alternative methods to current animal testing protocols and guidelines. An immediate goal is to develop test methods that are capable of screening large numbers of chemicals. This document provides recommendations for developing alternative DNT approaches that will generate the type of data required for evaluating and comparing predictive capacity and efficiency across test methods and laboratories. These recommendations were originally drafted to stimulate and focus discussions of alternative testing methods and models for DNT at the TestSmart DNT II meeting (http://caat.jhsph.edu/programs/workshops/dnt2.html) and this document reflects critical feedback from all stakeholders that participated in this meeting. The intent of this document is to serve as a catalyst for engaging the research community in the development of DNT alternatives and it is expected that these recommendations will continue to evolve with the science.,2011-01-01 +21994220,FlyExpress: visual mining of spatiotemporal patterns for genes and publications in Drosophila embryogenesis.,"

Summary

Images containing spatial expression patterns illuminate the roles of different genes during embryogenesis. In order to generate initial clues to regulatory interactions, biologists frequently need to know the set of genes expressed at the same time at specific locations in a developing embryo, as well as related research publications. However, text-based mining of image annotations and research articles cannot produce all relevant results, because the primary data are images that exist as graphical objects. We have developed a unique knowledge base (FlyExpress) to facilitate visual mining of images from Drosophila melanogaster embryogenesis. By clicking on specific locations in pictures of fly embryos from different stages of development and different visual projections, users can produce a list of genes and publications instantly. In FlyExpress, each queryable embryo picture is a heat-map that captures the expression patterns of more than 4500 genes and more than 2600 published articles. In addition, one can view spatial patterns for particular genes over time as well as find other genes with similar expression patterns at a given developmental stage. Therefore, FlyExpress is a unique tool for mining spatiotemporal expression patterns in a format readily accessible to the scientific community.

Availability

http://www.flyexpress.net

Contact

s.kumar@asu.edu.",2011-10-12 +21986959,Predicting protein sumoylation sites from sequence features.,"Protein sumoylation is a post-translational modification that plays an important role in a wide range of cellular processes. Small ubiquitin-related modifier (SUMO) can be covalently and reversibly conjugated to the sumoylation sites of target proteins, many of which are implicated in various human genetic disorders. The accurate prediction of protein sumoylation sites may help biomedical researchers to design their experiments and understand the molecular mechanism of protein sumoylation. In this study, a new machine learning approach has been developed for predicting sumoylation sites from protein sequence information. Random forests (RFs) and support vector machines (SVMs) were trained with the data collected from the literature. Domain-specific knowledge in terms of relevant biological features was used for input vector encoding. It was shown that RF classifier performance was affected by the sequence context of sumoylation sites, and 20 residues with the core motif ΨKXE in the middle appeared to provide enough context information for sumoylation site prediction. The RF classifiers were also found to outperform SVM models for predicting protein sumoylation sites from sequence features. The results suggest that the machine learning approach gives rise to more accurate prediction of protein sumoylation sites than the other existing methods. The accurate classifiers have been used to develop a new web server, called seeSUMO (http://bioinfo.ggc.org/seesumo/), for sequence-based prediction of protein sumoylation sites.",2011-10-07 +21827963,Realistic retrospective dose assessments to members of the public around Spanish nuclear facilities.,"In the frame of an epidemiological study carried out in the influence areas around the Spanish nuclear facilities (ISCIII-CSN, 2009. Epidemiological Study of The Possible Effect of Ionizing Radiations Deriving from The Operation of Spanish Nuclear Fuel Cycle Facilities on The Health of The Population Living in Their Vicinity. Final report December 2009. Ministerio de Ciencia e Innovación, Instituto de Salud Carlos III, Consejo de Seguridad Nuclear. Madrid. Available from: http://www.csn.es/images/stories/actualidad_datos/especiales/epidemiologico/epidemiological_study.pdf), annual effective doses to public have been assessed by the Spanish Nuclear Safety Council (CSN) for over 45 years using a retrospective realistic-dose methodology. These values are compared with data from natural radiation exposure. For the affected population, natural radiation effective doses are in average 2300 times higher than effective doses due to the operation of nuclear installations (nuclear power stations and fuel cycle facilities). When considering the impact on the whole Spanish population, effective doses attributable to nuclear facilities represent in average 3.5×10(-5)mSv/y, in contrast to 1.6mSv/y from natural radiation or 1.3mSv/y from medical exposures.",2011-08-07 +21685097,An integrative clustering and modeling algorithm for dynamical gene expression data.,"

Motivation

The precise dynamics of gene expression is often crucial for proper response to stimuli. Time-course gene-expression profiles can provide insights about the dynamics of many cellular responses, but are often noisy and measured at arbitrary intervals, posing a major analysis challenge.

Results

We developed an algorithm that interleaves clustering time-course gene-expression data with estimation of dynamic models of their response by biologically meaningful parameters. In combining these two tasks we overcome obstacles posed in each one. Moreover, our approach provides an easy way to compare between responses to different stimuli at the dynamical level. We use our approach to analyze the dynamical transcriptional responses to inflammation and anti-viral stimuli in mice primary dendritic cells, and extract a concise representation of the different dynamical response types. We analyze the similarities and differences between the two stimuli and identify potential regulators of this complex transcriptional response.

Availability

The code to our method is freely available http://www.compbio.cs.huji.ac.il/DynaMiteC.

Contact

nir@cs.huji.ac.il.",2011-07-01 +21410983,Confab - Systematic generation of diverse low-energy conformers.,"

Background

Many computational chemistry analyses require the generation of conformers, either on-the-fly, or in advance. We present Confab, an open source command-line application for the systematic generation of low-energy conformers according to a diversity criterion.

Results

Confab generates conformations using the 'torsion driving approach' which involves iterating systematically through a set of allowed torsion angles for each rotatable bond. Energy is assessed using the MMFF94 forcefield. Diversity is measured using the heavy-atom root-mean-square deviation (RMSD) relative to conformers already stored. We investigated the recovery of crystal structures for a dataset of 1000 ligands from the Protein Data Bank with fewer than 1 million conformations. Confab can recover 97% of the molecules to within 1.5 Å at a diversity level of 1.5 Å and an energy cutoff of 50 kcal/mol.

Conclusions

Confab is available from http://confab.googlecode.com.",2011-03-16 +21569402,Cost effectiveness of pediatric pneumococcal conjugate vaccines: a comparative assessment of decision-making tools.,"

Background

Several decision support tools have been developed to aid policymaking regarding the adoption of pneumococcal conjugate vaccine (PCV) into national pediatric immunization programs. The lack of critical appraisal of these tools makes it difficult for decision makers to understand and choose between them. With the aim to guide policymakers on their optimal use, we compared publicly available decision-making tools in relation to their methods, influential parameters and results.

Methods

The World Health Organization (WHO) requested access to several publicly available cost-effectiveness (CE) tools for PCV from both public and private provenance. All tools were critically assessed according to the WHO's guide for economic evaluations of immunization programs. Key attributes and characteristics were compared and a series of sensitivity analyses was performed to determine the main drivers of the results. The results were compared based on a standardized set of input parameters and assumptions.

Results

Three cost-effectiveness modeling tools were provided, including two cohort-based (Pan-American Health Organization (PAHO) ProVac Initiative TriVac, and PneumoADIP) and one population-based model (GlaxoSmithKline's SUPREMES). They all compared the introduction of PCV into national pediatric immunization program with no PCV use. The models were different in terms of model attributes, structure, and data requirement, but captured a similar range of diseases. Herd effects were estimated using different approaches in each model. The main driving parameters were vaccine efficacy against pneumococcal pneumonia, vaccine price, vaccine coverage, serotype coverage and disease burden. With a standardized set of input parameters developed for cohort modeling, TriVac and PneumoADIP produced similar incremental costs and health outcomes, and incremental cost-effectiveness ratios.

Conclusions

Vaccine cost (dose price and number of doses), vaccine efficacy and epidemiology of critical endpoint (for example, incidence of pneumonia, distribution of serotypes causing pneumonia) were influential parameters in the models we compared. Understanding the differences and similarities of such CE tools through regular comparisons could render decision-making processes in different countries more efficient, as well as providing guiding information for further clinical and epidemiological research. A tool comparison exercise using standardized data sets can help model developers to be more transparent about their model structure and assumptions and provide analysts and decision makers with a more in-depth view behind the disease dynamics. Adherence to the WHO guide of economic evaluations of immunization programs may also facilitate this process. Please see related article: http://www.biomedcentral.com/1741-7007/9/55.",2011-05-12 +21373993,An open source multivariate framework for n-tissue segmentation with evaluation on public data.,"We introduce Atropos, an ITK-based multivariate n-class open source segmentation algorithm distributed with ANTs ( http://www.picsl.upenn.edu/ANTs). The Bayesian formulation of the segmentation problem is solved using the Expectation Maximization (EM) algorithm with the modeling of the class intensities based on either parametric or non-parametric finite mixtures. Atropos is capable of incorporating spatial prior probability maps (sparse), prior label maps and/or Markov Random Field (MRF) modeling. Atropos has also been efficiently implemented to handle large quantities of possible labelings (in the experimental section, we use up to 69 classes) with a minimal memory footprint. This work describes the technical and implementation aspects of Atropos and evaluates its performance on two different ground-truth datasets. First, we use the BrainWeb dataset from Montreal Neurological Institute to evaluate three-tissue segmentation performance via (1) K-means segmentation without use of template data; (2) MRF segmentation with initialization by prior probability maps derived from a group template; (3) Prior-based segmentation with use of spatial prior probability maps derived from a group template. We also evaluate Atropos performance by using spatial priors to drive a 69-class EM segmentation problem derived from the Hammers atlas from University College London. These evaluation studies, combined with illustrative examples that exercise Atropos options, demonstrate both performance and wide applicability of this new platform-independent open source segmentation tool.",2011-12-01 +22515642,Expression features of SOX9 associate with tumor progression and poor prognosis of hepatocellular carcinoma.,"

Background

SOX9 as a member of the SOX (SRY [sex determining region Y] box) gene superfamily has been previously demonstrated to be a proto-oncogene in a variety of malignancies. However, the clinical significance of SOX9 expression in hepatocellular carcinoma (HCC) remains unclear. The aim of this study was to investigate the expression of SOX9 in HCC and determine its correlation with tumor progression and prognosis.

Methods

One-hundred and thirty HCC patients who had undergone curative liver resection were selected and immunohistochemistry, Western blotting, and quantitative real time polymerase chain reaction (Q-PCR) were performed to analyze SOX9 expression in the respective tumors.

Results

Immunohistochemistry, Western blotting, and Q-PCR consistently confirmed SOX9 overexpression in HCC tissues compared with their adjacent nonneoplastic tissues (P ≪ 0.01). Additionally, immunostaining showed more SOX9 positive cells in the higher tumor stage (T3 ~ 4) and tumor grade (G3) than in the lower tumor stage (T1 ~ 2, P = 0.03) and tumor grade (G1 ~ 2, P = 0.01), respectively. Moreover, HCC patients with high SOX9 expression were significantly associated with lower 5-year overall survival (P ≪ 0.01) and lower 5-year disease-free survival (P ≪ 0.01), respectively. The Cox proportional hazards model further showed that SOX9 over-expression was an independent poor prognostic factor for both 5-year disease-free survival (hazards ratio [HR] = 2.621, 95% confidence interval[CI] = 1.548-5.829, P = 0.01) and 5-year overall survival (HR = 3.825, CI = 1.638-7.612, P = 0.003) in HCC.

Conclusion

Our data suggest for the first time that the overexpression of SOX9 protein in HCC tissues is of predictive value on tumor progression and poor prognosis.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/9029740396926377.",2012-04-19 +21685089,Reconstruction of genealogical relationships with applications to Phase III of HapMap.,"

Motivation

Accurate inference of genealogical relationships between pairs of individuals is paramount in association studies, forensics and evolutionary analyses of wildlife populations. Current methods for relationship inference consider only a small set of close relationships and have limited to no power to distinguish between relationships with the same number of meioses separating the individuals under consideration (e.g. aunt-niece versus niece-aunt or first cousins versus great aunt-niece).

Results

We present CARROT (ClAssification of Relationships with ROTations), a novel framework for relationship inference that leverages linkage information to differentiate between rotated relationships, that is, between relationships with the same number of common ancestors and the same number of meioses separating the individuals under consideration. We demonstrate that CARROT clearly outperforms existing methods on simulated data. We also applied CARROT on four populations from Phase III of the HapMap Project and detected previously unreported pairs of third- and fourth-degree relatives.

Availability

Source code for CARROT is freely available at http://carrot.stanford.edu.

Contact

sofiakp@stanford.edu.",2011-07-01 +20855923,Predicting MHC-II binding affinity using multiple instance regression.,"Reliably predicting the ability of antigen peptides to bind to major histocompatibility complex class II (MHC-II) molecules is an essential step in developing new vaccines. Uncovering the amino acid sequence correlates of the binding affinity of MHC-II binding peptides is important for understanding pathogenesis and immune response. The task of predicting MHC-II binding peptides is complicated by the significant variability in their length. Most existing computational methods for predicting MHC-II binding peptides focus on identifying a nine amino acids core region in each binding peptide. We formulate the problems of qualitatively and quantitatively predicting flexible length MHC-II peptides as multiple instance learning and multiple instance regression problems, respectively. Based on this formulation, we introduce MHCMIR, a novel method for predicting MHC-II binding affinity using multiple instance regression. We present results of experiments using several benchmark data sets that show that MHCMIR is competitive with the state-of-the-art methods for predicting MHC-II binding peptides. An online web server that implements the MHCMIR method for MHC-II binding affinity prediction is freely accessible at http://ailab.cs.iastate.edu/mhcmir.",2011-07-01 +21436879,Unifying gene expression measures from multiple platforms using factor analysis.,"In the Cancer Genome Atlas (TCGA) project, gene expression of the same set of samples is measured multiple times on different microarray platforms. There are two main advantages to combining these measurements. First, we have the opportunity to obtain a more precise and accurate estimate of expression levels than using the individual platforms alone. Second, the combined measure simplifies downstream analysis by eliminating the need to work with three sets of expression measures and to consolidate results from the three platforms.We propose to use factor analysis (FA) to obtain a unified gene expression measure (UE) from multiple platforms. The UE is a weighted average of the three platforms, and is shown to perform well in terms of accuracy and precision. In addition, the FA model produces parameter estimates that allow the assessment of the model fit.The R code is provided in File S2. Gene-level FA measurements for the TCGA data sets are available from http://tcga-data.nci.nih.gov/docs/publications/unified_expression/.",2011-03-11 +21505032,Mcheza: a workbench to detect selection using dominant markers.,"

Motivation

Dominant markers (DArTs and AFLPs) are commonly used for genetic analysis in the fields of evolutionary genetics, ecology and conservation of genetic resources. The recent prominence of these markers has coincided with renewed interest in detecting the effects of local selection and adaptation at the level of the genome.

Results

We present Mcheza, an application for detecting loci under selection based on a well-evaluated F(ST)-outlier method. The application allows robust estimates to be made of model parameters (e.g. genome-wide average, neutral F(ST)), provides data import and export functions, iterative contour smoothing and generation of graphics in an easy to use graphical user interface with a computation engine that supports multicore processors for enhanced performance. Mcheza also provides functionality to mitigate common analytical errors when scanning for loci under selection.

Availability

Mcheza is freely available under GPL version 3 from http://popgen.eu/soft/mcheza.",2011-04-19 +21798034,Conversion events in gene clusters.,"

Background

Gene clusters containing multiple similar genomic regions in close proximity are of great interest for biomedical studies because of their associations with inherited diseases. However, such regions are difficult to analyze due to their structural complexity and their complicated evolutionary histories, reflecting a variety of large-scale mutational events. In particular, conversion events can mislead inferences about the relationships among these regions, as traced by traditional methods such as construction of phylogenetic trees or multi-species alignments.

Results

To correct the distorted information generated by such methods, we have developed an automated pipeline called CHAP (Cluster History Analysis Package) for detecting conversion events. We used this pipeline to analyze the conversion events that affected two well-studied gene clusters (α-globin and β-globin) and three gene clusters for which comparative sequence data were generated from seven primate species: CCL (chemokine ligand), IFN (interferon), and CYP2abf (part of cytochrome P450 family 2). CHAP is freely available at http://www.bx.psu.edu/miller_lab.

Conclusions

These studies reveal the value of characterizing conversion events in the context of studying gene clusters in complex genomes.",2011-07-28 +22247280,Estimation of pairwise sequence similarity of mammalian enhancers with word neighbourhood counts.,"

Motivation

The identity of cells and tissues is to a large degree governed by transcriptional regulation. A major part is accomplished by the combinatorial binding of transcription factors at regulatory sequences, such as enhancers. Even though binding of transcription factors is sequence-specific, estimating the sequence similarity of two functionally similar enhancers is very difficult. However, a similarity measure for regulatory sequences is crucial to detect and understand functional similarities between two enhancers and will facilitate large-scale analyses like clustering, prediction and classification of genome-wide datasets.

Results

We present the standardized alignment-free sequence similarity measure N2, a flexible framework that is defined for word neighbourhoods. We explore the usefulness of adding reverse complement words as well as words including mismatches into the neighbourhood. On simulated enhancer sequences as well as functional enhancers in mouse development, N2 is shown to outperform previous alignment-free measures. N2 is flexible, faster than competing methods and less susceptible to single sequence noise and the occurrence of repetitive sequences. Experiments on the mouse enhancers reveal that enhancers active in different tissues can be separated by pairwise comparison using N2.

Conclusion

N2 represents an improvement over previous alignment-free similarity measures without compromising speed, which makes it a good candidate for large-scale sequence comparison of regulatory sequences.

Availability

The software is part of the open-source C++ library SeqAn (www.seqan.de) and a compiled version can be downloaded at http://www.seqan.de/projects/alf.html.

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-01-12 +21492447,Using framework-based synthesis for conducting reviews of qualitative studies.,"Framework analysis is a technique used for data analysis in primary qualitative research. Recent years have seen its being adapted to conduct syntheses of qualitative studies. Framework-based synthesis shows considerable promise in addressing applied policy questions. An innovation in the approach, known as 'best fit' framework synthesis, has been published in BMC Medical Research Methodology this month. It involves reviewers in choosing a conceptual model likely to be suitable for the question of the review, and using it as the basis of their initial coding framework. This framework is then modified in response to the evidence reported in the studies in the reviews, so that the final product is a revised framework that may include both modified factors and new factors that were not anticipated in the original model. 'Best fit' framework-based synthesis may be especially suitable in addressing urgent policy questions where the need for a more fully developed synthesis is balanced by the need for a quick answer. Please see related article: http://www.biomedcentral.com/1471-2288/11/29.",2011-04-14 +21948794,Bayesian multiple-instance motif discovery with BAMBI: inference of recombinase and transcription factor binding sites.,"Finding conserved motifs in genomic sequences represents one of essential bioinformatic problems. However, achieving high discovery performance without imposing substantial auxiliary constraints on possible motif features remains a key algorithmic challenge. This work describes BAMBI-a sequential Monte Carlo motif-identification algorithm, which is based on a position weight matrix model that does not require additional constraints and is able to estimate such motif properties as length, logo, number of instances and their locations solely on the basis of primary nucleotide sequence data. Furthermore, should biologically meaningful information about motif attributes be available, BAMBI takes advantage of this knowledge to further refine the discovery results. In practical applications, we show that the proposed approach can be used to find sites of such diverse DNA-binding molecules as the cAMP receptor protein (CRP) and Din-family site-specific serine recombinases. Results obtained by BAMBI in these and other settings demonstrate better statistical performance than any of the four widely-used profile-based motif discovery methods: MEME, BioProspector with BioOptimizer, SeSiMCMC and Motif Sampler as measured by the nucleotide-level correlation coefficient. Additionally, in the case of Din-family recombinase target site discovery, the BAMBI-inferred motif is found to be the only one functionally accurate from the underlying biochemical mechanism standpoint. C++ and Matlab code is available at http://www.ee.columbia.edu/~guido/BAMBI or http://genomics.lbl.gov/BAMBI/.",2011-09-24 +21527030,STSE: Spatio-Temporal Simulation Environment Dedicated to Biology.,"

Background

Recently, the availability of high-resolution microscopy together with the advancements in the development of biomarkers as reporters of biomolecular interactions increased the importance of imaging methods in molecular cell biology. These techniques enable the investigation of cellular characteristics like volume, size and geometry as well as volume and geometry of intracellular compartments, and the amount of existing proteins in a spatially resolved manner. Such detailed investigations opened up many new areas of research in the study of spatial, complex and dynamic cellular systems. One of the crucial challenges for the study of such systems is the design of a well stuctured and optimized workflow to provide a systematic and efficient hypothesis verification. Computer Science can efficiently address this task by providing software that facilitates handling, analysis, and evaluation of biological data to the benefit of experimenters and modelers.

Results

The Spatio-Temporal Simulation Environment (STSE) is a set of open-source tools provided to conduct spatio-temporal simulations in discrete structures based on microscopy images. The framework contains modules to digitize, represent, analyze, and mathematically model spatial distributions of biochemical species. Graphical user interface (GUI) tools provided with the software enable meshing of the simulation space based on the Voronoi concept. In addition, it supports to automatically acquire spatial information to the mesh from the images based on pixel luminosity (e.g. corresponding to molecular levels from microscopy images). STSE is freely available either as a stand-alone version or included in the linux live distribution Systems Biology Operational Software (SB.OS) and can be downloaded from http://www.stse-software.org/. The Python source code as well as a comprehensive user manual and video tutorials are also offered to the research community. We discuss main concepts of the STSE design and workflow. We demonstrate it's usefulness using the example of a signaling cascade leading to formation of a morphological gradient of Fus3 within the cytoplasm of the mating yeast cell Saccharomyces cerevisiae.

Conclusions

STSE is an efficient and powerful novel platform, designed for computational handling and evaluation of microscopic images. It allows for an uninterrupted workflow including digitization, representation, analysis, and mathematical modeling. By providing the means to relate the simulation to the image data it allows for systematic, image driven model validation or rejection. STSE can be scripted and extended using the Python language. STSE should be considered rather as an API together with workflow guidelines and a collection of GUI tools than a stand alone application. The priority of the project is to provide an easy and intuitive way of extending and customizing software using the Python language.",2011-04-28 +22727185,URDME: a modular framework for stochastic simulation of reaction-transport processes in complex geometries.,"

Background

Experiments in silico using stochastic reaction-diffusion models have emerged as an important tool in molecular systems biology. Designing computational software for such applications poses several challenges. Firstly, realistic lattice-based modeling for biological applications requires a consistent way of handling complex geometries, including curved inner- and outer boundaries. Secondly, spatiotemporal stochastic simulations are computationally expensive due to the fast time scales of individual reaction- and diffusion events when compared to the biological phenomena of actual interest. We therefore argue that simulation software needs to be both computationally efficient, employing sophisticated algorithms, yet in the same time flexible in order to meet present and future needs of increasingly complex biological modeling.

Results

We have developed URDME, a flexible software framework for general stochastic reaction-transport modeling and simulation. URDME uses Unstructured triangular and tetrahedral meshes to resolve general geometries, and relies on the Reaction-Diffusion Master Equation formalism to model the processes under study. An interface to a mature geometry and mesh handling external software (Comsol Multiphysics) provides for a stable and interactive environment for model construction. The core simulation routines are logically separated from the model building interface and written in a low-level language for computational efficiency. The connection to the geometry handling software is realized via a Matlab interface which facilitates script computing, data management, and post-processing. For practitioners, the software therefore behaves much as an interactive Matlab toolbox. At the same time, it is possible to modify and extend URDME with newly developed simulation routines. Since the overall design effectively hides the complexity of managing the geometry and meshes, this means that newly developed methods may be tested in a realistic setting already at an early stage of development.

Conclusions

In this paper we demonstrate, in a series of examples with high relevance to the molecular systems biology community, that the proposed software framework is a useful tool for both practitioners and developers of spatial stochastic simulation algorithms. Through the combined efforts of algorithm development and improved modeling accuracy, increasingly complex biological models become feasible to study through computational methods. URDME is freely available at http://www.urdme.org.",2012-06-22 +22935476,"Supporting teachers and children in schools: the effectiveness and cost-effectiveness of the Incredible Years teacher classroom management programme in primary school children: a cluster randomised controlled trial, with parallel economic and process evaluations.","

Background

Childhood antisocial behaviour has high immediate and long-term costs for society and the individual, particularly in relation to mental health and behaviours that jeopardise health. Managing challenging behaviour is a commonly reported source of stress and burn out among teachers, ultimately resulting in a substantial number leaving the profession. Interventions to improve parenting do not transfer easily to classroom-based problems and the most vulnerable parents may not be easily able to access them. Honing teachers' skills in proactive behaviour management and the promotion of socio-emotional regulation, therefore, has the potential to improve both child and teacher mental health and well-being and the advantage that it might potentially benefit all the children subsequently taught by any teacher that accesses the training.

Methods/design

Cluster randomised controlled trial (RCT) of the Incredible Years teacher classroom management (TCM) course with combined economic and process evaluations.One teacher of children aged 4-9 years, from 80 schools in the South West Peninsula will be randomised to attend the TCM (intervention arm) or to ""teach as normal"" (control arm). The primary outcome measure will be the total difficulties score from the Strengths and Difficulties Questionnaire (SDQ) completed by the current class teachers prior to randomisation, and at 9, 18 and 30 months follow-up, supplemented by parent SDQs. Secondary measures include academic attainment (teacher report supplemented by direct measurement in a sub-sample), children's enjoyment of school, and teacher reports of their professional self-efficacy, and levels of burn out and stress, supplemented by structured observations of teachers classroom management skills in a subsample. Cost data for the economic evaluation will be based on parental reports of services accessed. Cost-effectiveness, using the SDQ as the measure of effect, will be examined over the period of the RCT and over the longer term using decision analytic modelling. The process evaluation will use quantitative and qualitative approaches to assess fidelity to model, as well as explore Head teacher and teachers' experiences of TCM and investigate school factors that influence the translation of skills learnt to practice.

Discussion

This study will provide important information about whether the Teacher Classroom Management course influences child and teacher mental health and well-being in both the short and long term. It will also provide valuable insights into factors that may facilitate or impede any impact.The trial has been registered with ISCTRN (Controlled Trials Ltd) and assigned an ISRCTN number ISRCTN84130388. (http://www.controlled-trials.com/isrctn/search.html?srch=ISRCTN84130388&sort=3&dir=desc&max=10).",2012-08-30 +21729295,Comparative analysis of methods for detecting interacting loci.,"

Background

Interactions among genetic loci are believed to play an important role in disease risk. While many methods have been proposed for detecting such interactions, their relative performance remains largely unclear, mainly because different data sources, detection performance criteria, and experimental protocols were used in the papers introducing these methods and in subsequent studies. Moreover, there have been very few studies strictly focused on comparison of existing methods. Given the importance of detecting gene-gene and gene-environment interactions, a rigorous, comprehensive comparison of performance and limitations of available interaction detection methods is warranted.

Results

We report a comparison of eight representative methods, of which seven were specifically designed to detect interactions among single nucleotide polymorphisms (SNPs), with the last a popular main-effect testing method used as a baseline for performance evaluation. The selected methods, multifactor dimensionality reduction (MDR), full interaction model (FIM), information gain (IG), Bayesian epistasis association mapping (BEAM), SNP harvester (SH), maximum entropy conditional probability modeling (MECPM), logistic regression with an interaction term (LRIT), and logistic regression (LR) were compared on a large number of simulated data sets, each, consistent with complex disease models, embedding multiple sets of interacting SNPs, under different interaction models. The assessment criteria included several relevant detection power measures, family-wise type I error rate, and computational complexity. There are several important results from this study. First, while some SNPs in interactions with strong effects are successfully detected, most of the methods miss many interacting SNPs at an acceptable rate of false positives. In this study, the best-performing method was MECPM. Second, the statistical significance assessment criteria, used by some of the methods to control the type I error rate, are quite conservative, thereby limiting their power and making it difficult to fairly compare them. Third, as expected, power varies for different models and as a function of penetrance, minor allele frequency, linkage disequilibrium and marginal effects. Fourth, the analytical relationships between power and these factors are derived, aiding in the interpretation of the study results. Fifth, for these methods the magnitude of the main effect influences the power of the tests. Sixth, most methods can detect some ground-truth SNPs but have modest power to detect the whole set of interacting SNPs.

Conclusion

This comparison study provides new insights into the strengths and limitations of current methods for detecting interacting loci. This study, along with freely available simulation tools we provide, should help support development of improved methods. The simulation tools are available at: http://code.google.com/p/simulation-tool-bmc-ms9169818735220977/downloads/list.",2011-07-05 +22665256,"IMGT(®) tools for the nucleotide analysis of immunoglobulin (IG) and T cell receptor (TR) V-(D)-J repertoires, polymorphisms, and IG mutations: IMGT/V-QUEST and IMGT/HighV-QUEST for NGS.","IMGT/V-QUEST is the highly customized and integrated online IMGT(®) tool for the standardized analysis of the immunoglobulin (IG) or antibody and T cell receptor (TR) rearranged nucleotide sequences. The analysis of these antigen receptors represents a crucial challenge for the study of the adaptive immune response in normal and disease-related situations. The expressed IG and TR repertoires represent a potential of 10(12) IG and 10(12) TR per individual. This huge diversity results from mechanisms that occur at the DNA level during the IG and TR molecular synthesis. These mechanisms include the combinatorial rearrangements of the variable (V), diversity (D) and joining (J) genes, the N-diversity (deletion and addition at random of nucleotides during the V-(D)-J rearrangement) and, for IG, somatic hypermutations. IMGT/V-QUEST identifies the V, D, J genes and alleles by alignment with the germline IG and TR gene and allele sequences of the IMGT reference directory. The tool describes the V-REGION mutations and identifies the hot spot positions in the closest germline V gene. IMGT/V-QUEST integrates IMGT/JunctionAnalysis for a detailed analysis of the V-J and V-D-J junctions and IMGT/Automat for a complete annotation of the sequences and also provides IMGT Collier de Perles. IMGT/HighV-QUEST, the high-throughput version of IMGT/V-QUEST, implemented to answer the needs of deep sequencing data analysis from Next Generation Sequencing (NGS), allows the analysis of thousands of IG and TR sequences in a single run. IMGT/V-QUEST and IMGT/HighV-QUEST are available at the IMGT(®) Home page, http://www.imgt.org.",2012-01-01 +21649637,Cannabinoids and bone: endocannabinoids modulate human osteoclast function in vitro.,"

Background and purpose

Both CB(1) and CB(2) cannabinoid receptors have been shown to play a role in bone metabolism. Crucially, previous studies have focussed on the effects of cannabinoid ligands in murine bone cells. This study aimed to investigate the effects of cannabinoids on human bone cells in vitro.

Experimental approach

Quantitative RT-PCR was used to determine expression of cannabinoid receptors and liquid chromatography-electrospray ionization tandem mass spectrometry was used to determine the presence of endocannabinoids in human bone cells. The effect of cannabinoids on human osteoclast formation, polarization and resorption was determined by assessing the number of cells expressing α(v) β(3) or with F-actin rings, or measurement of resorption area.

Key results

Human osteoclasts express both CB(1) and CB(2) receptors. CB(2) expression was significantly higher in human monocytes compared to differentiated osteoclasts. Furthermore, the differentiation of human osteoclasts from monocytes was associated with a reduction in 2-AG levels and an increase in anandamide (AEA) levels. Treatment of osteoclasts with LPS significantly increased levels of AEA. Nanomolar concentrations of AEA and the synthetic agonists CP 55 940 and JWH015 stimulated human osteoclast polarization and resorption; these effects were attenuated in the presence of CB(1) and/or CB(2) antagonists.

Conclusions

AND IMPLICATIONS Low concentrations of cannabinoids activate human osteoclasts in vitro. There is a dynamic regulation of the expression of the CB(2) receptor and the production of the endocannabinoids during the differentiation of human bone cells. These data suggest that small molecules modulating the endocannabinoid system could be important therapeutics in human bone disease.

Linked articles

This article is part of a themed section on Cannabinoids in Biology and Medicine. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2012.165.issue-8. To view Part I of Cannabinoids in Biology and Medicine visit http://dx.doi.org/10.1111/bph.2011.163.issue-7.",2012-04-01 +21874762,[Is the rate of medical publication from Israel similar to other countries? A comparative study of three medical specialties].,"

Unlabelled

Academic careers of individual doctors are commonly evaluated by examining the number and quality of authored publications. Similarly, the extent and quality of medical research may be assessed nationwide by measuring the number of publications originating from the country of interest over time. This in turn, may indicate on the quality of medicine practiced. To evaluate the extent and quality of IsraeLi publications we measured the rate and quality of medical publications originating from Israel for two decades in the fields of urology, cardiology and orthopedics, and compared the data to those of other countries.

Methods

Leading journals in urology, cardiology, and orthopedics were selected. A Medline search (http://www.ncbi.ntm.nih.gov/sites/entrez] was conducted for all the publications originating in Israel between the years 1990-2009 in the selected journals. Data from Israel was compared to those from Italy, France, Germany, Egypt and Turkey. The change in rate of publications was tested using Linear regression. The quality of publications was calculated by multiplying the number of publications by the relevant impact factor.

Results

While the urology publications rate in Israel increased by 32.7% in the second study decade as compared with the first, the urology publication rates during the same time period from Italy, France, Germany, Egypt and Turkey were 199%, 115%, 184%, 180% and 227% respectively. The regression coefficient for the urology publication rate was 0.51 for Israel, and 0.78, 0.95, 0.78, 0.87 and 0.97 for the other countries, respectively. The regression coefficient for the change in the quality of publications from Israel was 0.31 and 0.81, 0.75, 0.92, 0.73, and 0.92 for the other countries, respectively. In cardiology, the Israeli publication rate increased by 26% during the second study decade, whereas in the other countries the increments were 46%, 35%, 76%, 80% and 309% respectively. The regression coefficient for Israeli pubLication rate was 0.45, and 0.78, 0.54, 0.62, 0.13 and 0.75 for the other countries, respectively. The regression coefficient of the quality of publications in Israel was 0.3 as opposed to 0.47, 0.36, 0.48, 0.01, and 0.78 respectively. The Israeli publications in orthopedics increased by 9.3% during the second decade compared with the first. At the same time, other countries increased the publication rate in orthopedics by 69%, 121%, 173%, 140% and 296% respectively. The regression coefficient for the publication rate in orthopedics was 0.02 for Israel, and 0.62, 0.64, 0.78, 0.34 and 0.71 for the other countries, respectively. The regression coefficient of the quality of publications in Israel was 0.05 as opposed to 0.67, 0.62, 0.75, 0.31, and 0.66 in the other countries, respectively.

Conclusions

Israel lags behind Italy, France, Germany, Egypt and Turkey with regard to the increase of both the number and the quality of medical publications in urology and orthopedics. While the rate and quality of IsraeLi publications in cardiology surpasses those from Egypt, they lag in the number of publications in this medical field behind those of all the rest of the countries examined. In a world of rapid progress and expansion of medical research, Israel has been stagnant in publications in 3 medical specialties, rendering it inferior to other nations.",2011-07-01 +22399321,Molecular recognition by the EWS transcriptional activation domain.,"Interactions between Intrinsically Disordered Protein Regions (IDRs) and their targets commonly exhibit localised contacts via target-induced disorder to order transitions. Other more complex IDR target interactions have been termed ""fuzzy"" because the IDR does not form a well-defined induced structure. In some remarkable cases of fuzziness IDR function is apparently sequence independent and conferred by amino acid composition. Such cases have been referred to as ""random fuzziness"" but the molecular features involved are poorly characterised. The transcriptional activation domain (EAD) of oncogenic Ewing's Sarcoma Fusion Proteins (EFPs) is an ≈280 residue IDR with a biased composition restricted to Ala, Gly, Gln, Pro, Ser, Thr and Tyr. Multiple aromatic side chains (exclusively from Try residues) and the particular EAD composition are crucial for molecular recognition but there appears to be no other major geometrically constrained requirement. Computational analysis of the EAD using PONDR (Molecular Kinetics, Inc. http://www.pondr. com) complements the functional data and shows, accordingly, that propensity for structural order within the EAD is conferred by Tyr residues. To conclude, molecular recognition by the EAD is extraordinarily malleable and involves multiple aromatic contacts facilitated by a flexible peptide backbone and, most likely, a limited number of weaker contributions from amenable side chains. I propose to refer to this mode of fuzzy recognition as ""polyaromatic"", noting that it shares some fundamental features with the ""polyelectrostatic"" (phosphorylation-dependent) interaction of the Sic1 Cdk inhibitor and Cdc4._I will also speculate on more detailed models for molecular recognition by the EAD and their relationship to native (non-oncogenic) EAD function.",2012-01-01 +21914205,GOMMA: a component-based infrastructure for managing and analyzing life science ontologies and their evolution.,"

Background

Ontologies are increasingly used to structure and semantically describe entities of domains, such as genes and proteins in life sciences. Their increasing size and the high frequency of updates resulting in a large set of ontology versions necessitates efficient management and analysis of this data.

Results

We present GOMMA, a generic infrastructure for managing and analyzing life science ontologies and their evolution. GOMMA utilizes a generic repository to uniformly and efficiently manage ontology versions and different kinds of mappings. Furthermore, it provides components for ontology matching, and determining evolutionary ontology changes. These components are used by analysis tools, such as the Ontology Evolution Explorer (OnEX) and the detection of unstable ontology regions. We introduce the component-based infrastructure and show analysis results for selected components and life science applications. GOMMA is available at http://dbs.uni-leipzig.de/GOMMA.

Conclusions

GOMMA provides a comprehensive and scalable infrastructure to manage large life science ontologies and analyze their evolution. Key functions include a generic storage of ontology versions and mappings, support for ontology matching and determining ontology changes. The supported features for analyzing ontology changes are helpful to assess their impact on ontology-dependent applications such as for term enrichment. GOMMA complements OnEX by providing functionalities to manage various versions of mappings between two ontologies and allows combining different match approaches.",2011-09-13 +22463804,Validation of diagnostic accuracy using digital slides in routine histopathology.,"

Background

Robust hardware and software tools have been developed in digital microscopy during the past years for pathologists. Reports have been advocated the reliability of digital slides in routine diagnostics. We have designed a retrospective, comparative study to evaluate the scanning properties and digital slide based diagnostic accuracy.

Methods

8 pathologists reevaluated 306 randomly selected cases from our archives. The slides were scanned with a 20× Plan-Apochromat objective, using a 3-chip Hitachi camera, resulting 0.465 μm/pixel resolution. Slide management was supported with dedicated Data Base and Viewer software tools. Pathologists used their office PCs for evaluation and reached the digital slides via intranet connection. The diagnostic coherency and uncertainty related to digital slides and scanning quality were analyzed.

Results

Good to excellent image quality of slides was recorded in 96%. In half of the critical 61 digital slides, poor image quality was related to section folds or floatings. In 88.2% of the studied cases the digital diagnoses were in full agreement with the consensus. Out of the overall 36 incoherent cases, 7 (2.3%) were graded relevant without any recorded uncertainty by the pathologist. Excluding the non-field specific cases from each pathologist's record this ratio was 1.76% of all cases.

Conclusions

Our results revealed that: 1) digital slide based histopathological diagnoses can be highly coherent with those using optical microscopy; 2) the competency of pathologists is a factor more important than the quality of digital slide; 3) poor digital slide quality do not endanger patient safety as these errors are recognizable by the pathologist and further actions for correction could be taken.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1913324336747310.",2012-03-31 +21551149,An MCMC algorithm for detecting short adjacent repeats shared by multiple sequences.,"

Motivation

Repeats detection problems are traditionally formulated as string matching or signal processing problems. They cannot readily handle gaps between repeat units and are incapable of detecting repeat patterns shared by multiple sequences. This study detects short adjacent repeats with interunit insertions from multiple sequences. For biological sequences, such studies can shed light on molecular structure, biological function and evolution.

Results

The task of detecting short adjacent repeats is formulated as a statistical inference problem by using a probabilistic generative model. An Markov chain Monte Carlo algorithm is proposed to infer the parameters in a de novo fashion. Its applications on synthetic and real biological data show that the new method not only has a competitive edge over existing methods, but also can provide a way to study the structure and the evolution of repeat-containing genes.

Availability

The related C++ source code and datasets are available at http://ihome.cuhk.edu.hk/%7Eb118998/share/BASARD.zip.

Contact

xfan@sta.cuhk.edu.hk",2011-05-06 +22568892,Primary central nervous system plasmablastic lymphoma presenting in human immunodeficiency virus-negative but Epstein-Barr virus-positive patient: a case report.,"We report a 32-year-old Outer Mongolian man, with plasmablastic lymphoma (PBL) primarily occured in the central nervous system and diagnosed by surgical resection. This patient appeared headache and magnetic resonance imaging (MRI) showed multiple lesions in the right cerebral hemisphere including the right frontal-parietal lobe and right basal ganglia and the left cerebellum, he was diagnosed as lymphoma by stereotactic biopsy in January 2009 in local hospital, and was given radiotherapy 33 times after the biopsy. The patient was admitted to The Military General Hospital of Beijing PLA., Beijing, P.R. China on March 9th, 2011, with chief complaints of right limbs convulsioned suddenly, then fell down and lose of his consciousness, then awoke after 4 to 5 minutes, with symptoms of angulus oris numbness and the right upper limb powerless ten days ago.MRI of the brain revealed a well-defined hyperdense and enhancing mass in the left frontal-parietal lobe, the meninges are closely related, there was extensive peritumoural edema noted with pressure effects, as evident by effacement of the left lateral ventricles and a 0.5 cm shift of the midline to the right side.Surgical resection showed markedly atypical, large singly dispersed or cohesive proliferation of plasmacytoid cells with frequent abnormal mitoses and binucleation, some neoplastic cells were large with round or oval nuclei and showed coarse chromatin and smaller or unapparent nucleoli, some neoplastic cells with prominent nucleoli, apoptosis and necrosis were often presented. Immunohistochemistry staining and gene rearrangement together with other supportive investigation confirmed the diagnosis of primary central nervous system plasmablastic lymphoma. A month later, he was started on chemotherapy with R-CHOP (rituximab, cyclophosphamide, doxorubicin, leurocristime and prednisone) for a week. Other supportive treatment was provided for symptomatic epilepsy. The patient regained muscle strength in both upper limbs and right lower limb and the symptomatic epilepsy was controlled after two weeks. Then the patient was discharged. Follow-up data shows the patient to be alive eleven months after discharge.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1649317674697046.",2012-05-08 +23354762,"Muscle-splitting, subglandular, and partial submuscular augmentation mammoplasties: a 12-year retrospective analysis of 2026 primary cases.","

Background

Augmentation mammoplasty is a commonly performed procedure with a very high satisfaction rate. Various techniques have been described since the report of the first augmentation mammoplasty in 1963. Muscle-splitting augmentation mammoplasty, a technique first published in 2007, has been used by the author for primary and secondary augmentation mammoplasties and for mastopexy with augmentation.

Methods

A retrospective analysis of data prospectively collected using the Excel spreadsheet was performed. The patients were divided into three groups. The mammoplasty for group A used the subglandular pocket. In group B, the partial submuscular pocket was used for mammoplasties. Both of these groups had their mammoplasties performed between 1999 and 2005. Group C, the third group, included patients who had muscle-splitting mammoplasties between 2005 and 2011.

Results

Group A involved 793 patients who had their augmentation mammoplasties in the subglandular pocket. Of these 793 patients, 751 had the same size implants and were included in the analysis. The mean age of the patients in group A was 30.9±7.98 years (range 18-59 years), and their mean implant size was 317.5 cc±2.05 (range 200-555). In group A, 45.1% (n=339) of the patients were smokers, and 62.2% (n=467) had drains. The majority of the patients (78%) had an overnight stay in the clinic. Hematoma was seen in 2.7% (n=20) of the group A patients. Revision was performed for 6% (n=45). Periprosthetic infection was seen in 0.4% (n=3) and minor wound healing problems in 1.3% (n=10). Group B comprised 110 patients who had mammoplasties performed in partial submuscular pockets. All the patients had the same size implants. The mean age of the group B patients was 33±8.26 years (range 20-58 years), and their mean implant size was 300.6 cc±35.92 (range 205-395). Of these 110 patients, 51.8% (n=57) were smokers, and 94.5% (n=104) had drains. Hematoma was seen in 1.8% (n=2), and revision was performed for 7.3% (n=8) in the submuscular subgroup. Infection was seen in 3.6% (n=4) and minor wound healing problems in 4.5% (n=5). Group C consisted of 1,123 patients who had breast augmentation in the muscle-splitting biplane. Of these 1,123 patients, 914 had the same size implants. The mean age of the patients was 30.0±8.78 years (range 18-67 years), and their mean implant size was 338.2 cc±58.01 (range 170-655). In group C, 33.6% of the patients were smokers, and 8 % had drains. The majority of the patients (93.4%) were treated as day cases. Hematoma was seen in 0.7%, and 1.2% of the patients had revision surgery. Infection was seen in 1.6% (n=15) and minor wound healing in 4% (n=45).

Conclusion

Muscle-splitting mammoplasty is a technique that can be performed as a day case without drains. The overall complications in the group were significantly lower than with the other two techniques performed by the author.

Level of evidence iv

This journal requires that authors assign a 41 level of evidence to each article. For a full description of 42 these Evidence-Based Medicine ratings, please refer to the 43 Table of Contents or the online Instructions to Authors 44 http://www.springer.com/00266 .",2013-01-26 +21554019,Motif finding in DNA sequences based on skipping nonconserved positions in background Markov chains.,"One strategy to identify transcription factor binding sites is through motif finding in upstream DNA sequences of potentially co-regulated genes. Despite extensive efforts, none of the existing algorithms perform very well. We consider a string representation that allows arbitrary ignored positions within the nonconserved portion of single motifs, and use O(2(l)) Markov chains to model the background distributions of motifs of length l while skipping these positions within each Markov chain. By focusing initially on positions that have fixed nucleotides to define core occurrences, we develop an algorithm to identify motifs of moderate lengths. We compare the performance of our algorithm to other motif finding algorithms on a few benchmark data sets, and show that significant improvement in accuracy can be obtained when the sites are sufficiently conserved within a given sample, while comparable performance is obtained when the site conservation rate is low. A software program (PosMotif ) and detailed results are available online at http://faculty.cse.tamu.edu/shsze/posmotif.",2011-05-01 +21903632,Protein-protein binding affinity prediction on a diverse set of structures.,"

Motivation

Accurate binding free energy functions for protein-protein interactions are imperative for a wide range of purposes. Their construction is predicated upon ascertaining the factors that influence binding and their relative importance. A recent benchmark of binding affinities has allowed, for the first time, the evaluation and construction of binding free energy models using a diverse set of complexes, and a systematic assessment of our ability to model the energetics of conformational changes.

Results

We construct a large set of molecular descriptors using commonly available tools, introducing the use of energetic factors associated with conformational changes and disorder to order transitions, as well as features calculated on structural ensembles. The descriptors are used to train and test a binding free energy model using a consensus of four machine learning algorithms, whose performance constitutes a significant improvement over the other state of the art empirical free energy functions tested. The internal workings of the learners show how the descriptors are used, illuminating the determinants of protein-protein binding.

Availability

The molecular descriptor set and descriptor values for all complexes are available in the Supplementary Material. A web server for the learners and coordinates for the bound and unbound structures can be accessed from the website: http://bmm.cancerresearchuk.org/~Affinity.

Contact

paul.bates@cancer.org.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-07 +21979275,"Using the T-Coffee package to build multiple sequence alignments of protein, RNA, DNA sequences and 3D structures.","T-Coffee (Tree-based consistency objective function for alignment evaluation) is a versatile multiple sequence alignment (MSA) method suitable for aligning most types of biological sequences. The main strength of T-Coffee is its ability to combine third party aligners and to integrate structural (or homology) information when building MSAs. The series of protocols presented here show how the package can be used to multiply align proteins, RNA and DNA sequences. The protein section shows how users can select the most suitable T-Coffee mode for their data set. Detailed protocols include T-Coffee, the default mode, M-Coffee, a meta version able to combine several third party aligners into one, PSI (position-specific iterated)-Coffee, the homology extended mode suitable for remote homologs and Expresso, the structure-based multiple aligner. We then also show how the T-RMSD (tree based on root mean square deviation) option can be used to produce a functionally informative structure-based clustering. RNA alignment procedures are described for using R-Coffee, a mode able to use predicted RNA secondary structures when aligning RNA sequences. DNA alignments are illustrated with Pro-Coffee, a multiple aligner specific of promoter regions. We also present some of the many reformatting utilities bundled with T-Coffee. The package is an open-source freeware available from http://www.tcoffee.org/.",2011-11-01 +22316129,"Evolution of the eukaryotic ARP2/3 activators of the WASP family: WASP, WAVE, WASH, and WHAMM, and the proposed new family members WAWH and WAML.","

Background

WASP family proteins stimulate the actin-nucleating activity of the ARP2/3 complex. They include members of the well-known WASP and WAVE/Scar proteins, and the recently identified WASH and WHAMM proteins. WASP family proteins contain family specific N-terminal domains followed by proline-rich regions and C-terminal VCA domains that harbour the ARP2/3-activating regions.

Results

To reveal the evolution of ARP2/3 activation by WASP family proteins we performed a ""holistic"" analysis by manually assembling and annotating all homologs in most of the eukaryotic genomes available. We have identified two new families: the WAML proteins (WASP and MIM like), which combine the membrane-deforming and actin bundling functions of the IMD domains with the ARP2/3-activating VCA regions, and the WAWH protein (WASP without WH1 domain) that have been identified in amoebae, Apusozoa, and the anole lizard. Surprisingly, with one exception we did not identify any alternative splice forms for WASP family proteins, which is in strong contrast to other actin-binding proteins like Ena/VASP, MIM, or NHS proteins that share domains with WASP proteins.

Conclusions

Our analysis showed that the last common ancestor of the eukaryotes must have contained a homolog of WASP, WAVE, and WASH. Specific families have subsequently been lost in many taxa like the WASPs in plants, algae, Stramenopiles, and Euglenozoa, and the WASH proteins in fungi. The WHAMM proteins are metazoa specific and have most probably been invented by the Eumetazoa. The diversity of WASP family proteins has strongly been increased by many species- and taxon-specific gene duplications and multimerisations. All data is freely accessible via http://www.cymobase.org.",2012-02-08 +21429113,HapStar: automated haplotype network layout and visualization.,"Haplotype networks are commonly used for representing associations between sequences, yet there is currently no straightforward way to create optimal layouts. Automated optimal layouts are particularly useful not only because of the time-saving element but also because they avoid both human error and human-induced biases in the presentation of figures. HapStar directly uses the network connection output data generated from Arlequin (or a simple user-generated input file) and uses a force-directed algorithm to automatically lay out the network for easy visualization. In addition, this program is able to use the alternative connections generated by Arlequin to create a minimum spanning tree. HapStar provides a straightforward user-friendly interface, and publication-ready figures can be exported simply. HapStar is freely available (under a GPLv3 licence) for download for MacOSX, UNIX and Windows, at http://fo.am/hapstar.",2011-01-01 +21743942,[The role of recombinant activated factor VII in neuro- surgical and neurocritical patients].,"Central nervous system haemorrhage is a severe pathology, as a small amount of bleeding inside the brain can result in devastating consequences. Haemostatic agents might decrease the consequences of intra- cranial bleeding, whichever spontaneous, traumatic, or anticoagulation treatment etiology. Proacogulant recombinant activated factor VII (rFVIIa) has been given after central nervous system bleeding, with an off-label indication. In this update, we go over the drug mechanism of action, its role in the treatment of central nervous system haemorrhage and the published evidences regarding this subject. We carried out a literature review concerning the treatment with rFVIIa in central nervous system haemorrhage, neurocritical pathologies and neurosurgical procedures, searching in MEDLINE and in clinical trials registry: http://clinicaltrials.gov (last review September 2010), as well as performing a manual analysis of collected articles, looking for aditional references. The results of randomized clinical trials do not support the systematic administration of rFVIIa for spontaneous intracranial cerebral haemorrhage. In other central nervous system related haemorrhages, the current available data consist on retrospective studies, expert opinion or isolated case reports.",2011-06-01 +34875812,Spatial analysis of vicariance: a method for using direct geographical information in historical biogeography.,"Based on Hovenkamp's ideas on historical biogeography, we present a method for analysis of taxon history, spatial analysis of vicariance, which uses observed distributions as data, thus requiring neither predefined areas nor assumptions of hierarchical relations between areas. The method is based on identifying sister nodes with disjunct (allopatric/vicariant) distributions. To do this across the tree, internal nodes are assigned distributions (as the sum of the distributions of the descendant nodes). When distributions are less than ideal, ignoring the distribution of the problematic node(s) when assigning a distribution to their ancestors may allow us to consider additional sister nodes (i.e. those resulting from splits basal to the problematic node) as having disjunct distributions. The optimality criterion seeks to find the best (possibly weighted) compromise between the maximum possible number of disjunct sister nodes and the minimum number of eliminated distributions. The method can also take overlap into account. The methodology presented is implemented in VIP, a computer program available at http://www.zmuc.dk/public/phylogeny/vip. © The Willi Hennig Society 2011.",2011-04-26 +22546238,Introduction of virtual microscopy in routine surgical pathology--a hypothesis and personal view from Europe.,"The technology of whole image acquisition from histological glass slides (Virtual slides, (VS)) and its associated software such as image storage, viewers, and virtual microscopy (VM), has matured in the recent years. There is an ongoing discussion whether to introduce VM into routine diagnostic surgical pathology (tissue-based diagnosis) or not, and if these are to be introduced how best to do this. The discussion also centres around how to substantially define the mandatory standards and working conditions related to introducing VM. This article briefly describes some hypotheses alongside our perspective and that of several of our European colleagues who have experienced VS and VM either in research or routine praxis. After consideration of the different opinions and published data the following statements can be derived: 1. Experiences from static and remote telepathology as well as from daily routine diagnoses, confirm that VM is a diagnostic tool that can be handled with the same diagnostic accuracy as conventional microscopy; at least no statistically significant differences (p > 0.05) exist. 2. VM possesses several practical advantages in comparison to conventional microscopy; such as digital image storage and retrieval and contemporary display of multiple images (acquired from different stains, and/or different cases). 3. VM enables fast and efficient feedback between the pathologist and the laboratory in terms of ordered additional stains, automated access to the latest research for references, and fast consultation with outstanding telepathology experts. 4. Industry has already invested ""big money"" into this technology which certainly will be of influence in its future development. The main constraints against VM include the questionable reimbursement of the initial investment, the missing direct and short term financial benefit, and the loss of potential biological identity between the patient and the examined tissue. This article tries to analyze and evaluate the factors that influence the implementation of VM into routine tissue-based diagnosis, for example in combination with predictive diagnosis. It focuses on describing the advantages of modern and innovative electronically based communication technology.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1245603103708547.",2012-04-30 +22184725,Innate transcriptional networks activated in bladder in response to uropathogenic Escherichia coli drive diverse biological pathways and rapid synthesis of IL-10 for defense against bacterial urinary tract infection.,"Early transcriptional activation events that occur in bladder immediately following bacterial urinary tract infection (UTI) are not well defined. In this study, we describe the whole bladder transcriptome of uropathogenic Escherichia coli (UPEC) cystitis in mice using genome-wide expression profiling to define the transcriptome of innate immune activation stemming from UPEC colonization of the bladder. Bladder RNA from female C57BL/6 mice, analyzed using 1.0 ST-Affymetrix microarrays, revealed extensive activation of diverse sets of innate immune response genes, including those that encode multiple IL-family members, receptors, metabolic regulators, MAPK activators, and lymphocyte signaling molecules. These were among 1564 genes differentially regulated at 2 h postinfection, highlighting a rapid and broad innate immune response to bladder colonization. Integrative systems-level analyses using InnateDB (http://www.innatedb.com) bioinformatics and ingenuity pathway analysis identified multiple distinct biological pathways in the bladder transcriptome with extensive involvement of lymphocyte signaling, cell cycle alterations, cytoskeletal, and metabolic changes. A key regulator of IL activity identified in the transcriptome was IL-10, which was analyzed functionally to reveal marked exacerbation of cystitis in IL-10-deficient mice. Studies of clinical UTI revealed significantly elevated urinary IL-10 in patients with UPEC cystitis, indicating a role for IL-10 in the innate response to human UTI. The whole bladder transcriptome presented in this work provides new insight into the diversity of innate factors that determine UTI on a genome-wide scale and will be valuable for further data mining. Identification of protective roles for other elements in the transcriptome will provide critical new insight into the complex cascade of events that underpin UTI.",2011-12-19 +21685068,Physical Module Networks: an integrative approach for reconstructing transcription regulation.,"

Motivation

Deciphering the complex mechanisms by which regulatory networks control gene expression remains a major challenge. While some studies infer regulation from dependencies between the expression levels of putative regulators and their targets, others focus on measured physical interactions.

Results

Here, we present Physical Module Networks, a unified framework that combines a Bayesian model describing modules of co-expressed genes and their shared regulation programs, and a physical interaction graph, describing the protein-protein interactions and protein-DNA binding events that coherently underlie this regulation. Using synthetic data, we demonstrate that a Physical Module Network model has similar recall and improved precision compared to a simple Module Network, as it omits many false positive regulators. Finally, we show the power of Physical Module Networks to reconstruct meaningful regulatory pathways in the genetically perturbed yeast and during the yeast cell cycle, as well as during the response of primary epithelial human cells to infection with H1N1 influenza.

Availability

The PMN software is available, free for academic use at http://www.compbio.cs.huji.ac.il/PMN/.

Contact

aregev@broad.mit.edu; nirf@cs.huji.ac.il.",2011-07-01 +22308149,Discriminating response groups in metabolic and regulatory pathway networks.,"

Motivation

Analysis of omics experiments generates lists of entities (genes, metabolites, etc.) selected based on specific behavior, such as changes in response to stress or other signals. Functional interpretation of these lists often uses category enrichment tests using functional annotations like Gene Ontology terms and pathway membership. This approach does not consider the connected structure of biochemical pathways or the causal directionality of events.

Results

The Omics Response Group (ORG) method, described in this work, interprets omics lists in the context of metabolic pathway and regulatory networks using a statistical model for flow within the networks. Statistical results for all response groups are visualized in a novel Pathway Flow plot. The statistical tests are based on the Erlang distribution model under the assumption of independent and identically Exponential-distributed random walk flows through pathways. As a proof of concept, we applied our method to an Escherichia coli transcriptomics dataset where we confirmed common knowledge of the E.coli transcriptional response to Lipid A deprivation. The main response is related to osmotic stress, and we were also able to detect novel responses that are supported by the literature. We also applied our method to an Arabidopsis thaliana expression dataset from an abscisic acid study. In both cases, conventional pathway enrichment tests detected nothing, while our approach discovered biological processes beyond the original studies.

Availability

We created a prototype for an interactive ORG web tool at http://ecoserver.vrac.iastate.edu/pathwayflow (source code is available from https://subversion.vrac.iastate.edu/Subversion/jlv/public/jlv/pathwayflow). The prototype is described along with additional figures and tables in Supplementary Material.

Contact

julied@iastate.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2012-02-04 +21701948,Mining twitter: a source for psychological wisdom of the crowds.,"Over the last few years, microblogging has gained prominence as a form of personal broadcasting media where information and opinion are mixed together without an established order, usually tightly linked with current reality. Location awareness and promptness provide researchers using the Internet with the opportunity to create ""psychological landscapes""--that is, to detect differences and changes in voiced (twittered) emotions, cognitions, and behaviors. In our article, we present iScience Maps, a free Web service for researchers, available from http://maps.iscience.deusto.es/ and http://tweetminer.eu/ . Technologically, the service is based on Twitter's streaming and search application programming interfaces (APIs), accessed through several PHP libraries, and a JavaScript frontend. This service allows researchers to assess via Twitter the effect of specific events in different places as they are happening and to make comparisons between cities, regions, or countries regarding psychological states and their evolution in the course of an event. In a step-by-step example, it is shown how to replicate a study on affective and personality characteristics inferred from first names (Mehrabian & Piercy, Personality and Social Psychology Bulletin, 19, 755-758 1993) by mining Twitter data with iScience Maps.Results from the original study are replicated in both world regions we tested (the western U.S. and the U.K./Ireland); we also discover base rate of names to be a confound that needs to be controlled for in future research.",2011-09-01 +21912585,"Combining independent, weighted P-values: achieving computational stability by a systematic expansion with controllable accuracy.","Given the expanding availability of scientific data and tools to analyze them, combining different assessments of the same piece of information has become increasingly important for social, biological, and even physical sciences. This task demands, to begin with, a method-independent standard, such as the P-value, that can be used to assess the reliability of a piece of information. Good's formula and Fisher's method combine independent P-values with respectively unequal and equal weights. Both approaches may be regarded as limiting instances of a general case of combining P-values from m groups; P-values within each group are weighted equally, while weight varies by group. When some of the weights become nearly degenerate, as cautioned by Good, numeric instability occurs in computation of the combined P-values. We deal explicitly with this difficulty by deriving a controlled expansion, in powers of differences in inverse weights, that provides both accurate statistics and stable numerics. We illustrate the utility of this systematic approach with a few examples. In addition, we also provide here an alternative derivation for the probability distribution function of the general case and show how the analytic formula obtained reduces to both Good's and Fisher's methods as special cases. A C++ program, which computes the combined P-values with equal numerical stability regardless of whether weights are (nearly) degenerate or not, is available for download at our group website http://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads/CoinedPValues.html.",2011-08-31 +22883224,Markers of hypercoagulability in CAD patients. Effects of single aspirin and clopidogrel treatment.,"

Unlabelled

Background

Cardiovascular disease with disturbances in the haemostatic system, might lead to thrombotic complications with clinical manifestations like acute myocardial infarction (AMI) and stroke. Activation of the coagulation cascade with subsequent increased thrombin generation, characterizes a prothrombotic phenotype. In the present study we investigated whether prothrombotic markers were associated with risk factors and clinical subgroups in a cohort of patients with angiographically verified coronary artery disease (CAD). The patients were randomized to long-term treatment with the antiplatelet drugs aspirin or clopidogrel, and we further investigated the effect on hypercoagulability of such treatment for 1 year, of which limited data exists.

Methods

Venous blood samples were collected in fasting condition between 08:00 and 10:30 am, at baseline when all patients were on aspirin therapy (n = 1001) and in 276 patients after 1 year follow-up on aspirin or clopidogrel. In vivo thrombin generation was assessed by prothrombin fragment 1 + 2 (F1+2) and D-dimer, and the endogenous thrombin potentiale (ETP) in the calibrated automated thrombogram (CAT) assay, representing ex vivo thrombin generation. In addition soluble tissue factor (sTF) and free- and total tissue factor pathway inhibitor (TFPI) were measured.

Results

We found age to be significantly associated with F1+2 and D-dimer (β = 0.229 and β =0.417 respectively, p <0.001, both). Otherwise, only weak associations were found. F1+2 and D-dimer were higher in women compared to men (p <0.001 and p = 0.033, respectively). Smokers had elevated levels of ETP compared to non-smokers (p = 0.014). Additionally, patients on renin-angiotensin system (RAS) inhibition showed significantly higher levels of F1+2, compared to non-users (p = 0.013). Both aspirin and clopidogrel reduced levels of ETP after 12 months intervention (p = 0.003 and p <0.001, respectively) and the levels of F1+2 were significantly more reduced on aspirin compared to clopidogrel (p = 0.023).

Conclusions

In the present population of stable CAD, we could demonstrate a more hypercoagulable profile among women, smokers and patients on RAS medication, assessed by the prothrombotic markers F1+2, D-dimer and ETP. Long-term antiplatelet treatment with aspirin alone seems to attenuate thrombin generation to a greater extent than with clopidogrel alone. The study is registered at http://www.clinicaltrials.gov: NCT00222261.",2012-08-10 +21714868,phenosim--A software to simulate phenotypes for testing in genome-wide association studies.,"

Background

There is a great interest in understanding the genetic architecture of complex traits in natural populations. Genome-wide association studies (GWAS) are becoming routine in human, animal and plant genetics to understand the connection between naturally occurring genotypic and phenotypic variation. Coalescent simulations are commonly used in population genetics to simulate genotypes under different parameters and demographic models.

Results

Here, we present phenosim, a software to add a phenotype to genotypes generated in time-efficient coalescent simulations. Both qualitative and quantitative phenotypes can be generated and it is possible to partition phenotypic variation between additive effects and epistatic interactions between causal variants. The output formats of phenosim are directly usable as input for different GWAS tools. The applicability of phenosim is shown by simulating a genome-wide association study in Arabidopsis thaliana.

Conclusions

By using the coalescent approach to generate genotypes and phenosim to add phenotypes, the data sets can be used to assess the influence of various factors such as demography, genetic architecture or selection on the statistical power of association methods to detect causal genetic variants under a wide variety of population genetic scenarios. phenosim is freely available from the authors' website http://evoplant.uni-hohenheim.de.",2011-06-29 +22038416,Carboxylator: incorporating solvent-accessible surface area for identifying protein carboxylation sites.,"In proteins, glutamate (Glu) residues are transformed into γ-carboxyglutamate (Gla) residues in a process called carboxylation. The process of protein carboxylation catalyzed by γ-glutamyl carboxylase is deemed to be important due to its involvement in biological processes such as blood clotting cascade and bone growth. There is an increasing interest within the scientific community to identify protein carboxylation sites. However, experimental identification of carboxylation sites via mass spectrometry-based methods is observed to be expensive, time-consuming, and labor-intensive. Thus, we were motivated to design a computational method for identifying protein carboxylation sites. This work aims to investigate the protein carboxylation by considering the composition of amino acids that surround modification sites. With the implication of a modified residue prefers to be accessible on the surface of a protein, the solvent-accessible surface area (ASA) around carboxylation sites is also investigated. Radial basis function network is then employed to build a predictive model using various features for identifying carboxylation sites. Based on a five-fold cross-validation evaluation, a predictive model trained using the combined features of amino acid sequence (AA20D), amino acid composition, and ASA, yields the highest accuracy at 0.874. Furthermore, an independent test done involving data not included in the cross-validation process indicates that in silico identification is a feasible means of preliminary analysis. Additionally, the predictive method presented in this work is implemented as Carboxylator ( http://csb.cse.yzu.edu.tw/Carboxylator/ ), a web-based tool for identifying carboxylated proteins with modification sites in order to help users in investigating γ-glutamyl carboxylation.",2011-10-22 +21873288,Predominance of sequence type 1 group with serotype VI among group B streptococci with reduced penicillin susceptibility identified in Japan.,"

Background

Although group B Streptococcus (GBS; i.e. Streptococcus agalactiae) has been considered to be uniformly susceptible to β-lactams, GBS isolates with reduced penicillin susceptibility (PRGBS) have been reported from Japan and North America. In this study, PRGBS from Japan were characterized by multilocus sequence typing (MLST) and the results compared with data on PRGBS reported from the USA.

Methods

Twenty-eight clinical isolates of PRGBS recovered in Japan (including 22 isolates previously analysed by PFGE) were analysed by MLST and eBURST (http://eburst.mlst.net/).

Results

Twenty-three isolates were found to belong to the sequence type 1 (ST1) group (11 ST458, 7 ST1, 3 ST297, 1 ST358 and 1 ST4), while the remaining 5 isolates formed the ST23 group. Among 11 ST458 and 7 ST1 isolates, 9 and 4 were serotype VI, respectively, indicating a probable correlation between the ST1 group and serotype VI for PRGBS in Japan.

Conclusions

PRGBS in Japan could be classified into at least two ST groups, ST1 and ST23, which are genetically different from the ST19 PRGBS isolated in the USA, though five allele variations were seen between ST1 and ST19, implying a slight genetic relatedness.",2011-08-26 +22373375,User centered and ontology based information retrieval system for life sciences.,"

Background

Because of the increasing number of electronic resources, designing efficient tools to retrieve and exploit them is a major challenge. Some improvements have been offered by semantic Web technologies and applications based on domain ontologies. In life science, for instance, the Gene Ontology is widely exploited in genomic applications and the Medical Subject Headings is the basis of biomedical publications indexation and information retrieval process proposed by PubMed. However current search engines suffer from two main drawbacks: there is limited user interaction with the list of retrieved resources and no explanation for their adequacy to the query is provided. Users may thus be confused by the selection and have no idea on how to adapt their queries so that the results match their expectations.

Results

This paper describes an information retrieval system that relies on domain ontology to widen the set of relevant documents that is retrieved and that uses a graphical rendering of query results to favor user interactions. Semantic proximities between ontology concepts and aggregating models are used to assess documents adequacy with respect to a query. The selection of documents is displayed in a semantic map to provide graphical indications that make explicit to what extent they match the user's query; this man/machine interface favors a more interactive and iterative exploration of data corpus, by facilitating query concepts weighting and visual explanation. We illustrate the benefit of using this information retrieval system on two case studies one of which aiming at collecting human genes related to transcription factors involved in hemopoiesis pathway.

Conclusions

The ontology based information retrieval system described in this paper (OBIRS) is freely available at: http://www.ontotoolkit.mines-ales.fr/ObirsClient/. This environment is a first step towards a user centred application in which the system enlightens relevant information to provide decision help.",2012-01-25 +23033500,Nonmalignant breast lesions: ADCs of benign and high-risk subtypes assessed as false-positive at dynamic enhanced MR imaging.,"

Purpose

To evaluate the diffusion-weighted (DW) imaging characteristics of nonmalignant lesion subtypes assessed as false-positive findings at conventional breast magnetic resonance (MR) imaging.

Materials and methods

This HIPAA-compliant retrospective study had institutional review board approval, and the need for informed patient consent was waived. Lesions assessed as Breast Imaging Reporting and Data System category 4 or 5 at clinical dynamic contrast material-enhanced MR imaging that subsequently proved nonmalignant at biopsy were retrospectively reviewed. One hundred seventy-five nonmalignant breast lesions in 165 women were evaluated. Apparent diffusion coefficients (ADCs) from DW imaging (b = 0, 600 sec/mm(2)) were calculated for each lesion and were compared between subtypes and with an ADC threshold of 1.81 × 10(-3) mm(2)/sec (determined in a prior study to achieve 100% sensitivity).

Results

Eighty-one (46%) lesions exhibited ADCs greater than the predetermined threshold. The most prevalent lesion subtypes with mean ADCs above the threshold were fibroadenoma ([1.94 ± 0.38 {standard deviation}] × 10(-3) mm(2)/sec; n = 30), focal fibrosis ([1.84 ± 0.48] × 10(-3) mm(2)/sec; n = 19), normal tissue ([1.81 ± 0.47] × 10(-3) mm(2)/sec; n = 13), apocrine metaplasia ([2.01 ± 0.38] × 10(-3) mm(2)/sec; n = 13), usual ductal hyperplasia ([1.83 ± 0.49] × 10(-3) mm(2)/sec; n = 12), and inflammation ([1.95 ± 0.46] × 10(-3) mm(2)/sec; n = 10). Atypical ductal hyperplasia ([1.48 ± 0.36] × 10(-3) mm(2)/sec; n = 23) was the most common lesion subtype with ADC below the threshold. Lymph nodes exhibited the lowest mean ADC of all nonmalignant lesions ([1.28 ± 0.23] × 10(-3) mm(2)/sec; n = 4). High-risk lesions (atypical ductal hyperplasia and lobular neoplasia) showed significantly lower ADCs than other benign lesions (P < .0001) and were the most common lesions with ADCs below the threshold.

Conclusion

Assessing ADC along with dynamic contrast-enhanced MR imaging features may decrease the number of avoidable false-positive findings at breast MR imaging and reduce the number of preventable biopsies. The ability of DW imaging to help differentiate high-risk lesions requiring additional work-up from other nonmalignant subtypes may further improve patient care.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.12112672/-/DC1.",2012-10-02 +21784795,Estimating classification probabilities in high-dimensional diagnostic studies.,"

Motivation

Classification algorithms for high-dimensional biological data like gene expression profiles or metabolomic fingerprints are typically evaluated by the number of misclassifications across a test dataset. However, to judge the classification of a single case in the context of clinical diagnosis, we need to assess the uncertainties associated with that individual case rather than the average accuracy across many cases. Reliability of individual classifications can be expressed in terms of class probabilities. While classification algorithms are a well-developed area of research, the estimation of class probabilities is considerably less progressed in biology, with only a few classification algorithms that provide estimated class probabilities.

Results

We compared several probability estimators in the context of classification of metabolomics profiles. Evaluation criteria included sparseness biases, calibration of the estimator, the variance of the estimator and its performance in identifying highly reliable classifications. We observed that several of them display artifacts that compromise their use in practice. Classification probabilities based on a combination of local cross-validation error rates and monotone regression prove superior in metabolomic profiling.

Availability

The source code written in R is freely available at http://compdiag.uni-regensburg.de/software/probEstimation.shtml.

Contact

inka.appel@klinik.uni-regensburg.de.",2011-07-22 +21518053,Next-generation mapping of Arabidopsis genes.,"Next-generation genomic sequencing technologies have made it possible to directly map mutations responsible for phenotypes of interest via direct sequencing. However, most mapping strategies proposed to date require some prior genetic analysis, which can be very time-consuming even in genetically tractable organisms. Here we present a de novo method for rapidly and robustly mapping the physical location of EMS mutations by sequencing a small pooled F₂ population. This method, called Next Generation Mapping (NGM), uses a chastity statistic to quantify the relative contribution of the parental mutant and mapping lines to each SNP in the pooled F₂ population. It then uses this information to objectively localize the candidate mutation based on its exclusive segregation with the mutant parental line. A user-friendly, web-based tool for performing NGM analysis is available at http://bar.utoronto.ca/NGM. We used NGM to identify three genes involved in cell-wall biology in Arabidopsis thaliana, and, in a power analysis, demonstrate success in test mappings using as few as ten F₂ lines and a single channel of Illumina Genome Analyzer data. This strategy can easily be applied to other model organisms, and we expect that it will also have utility in crops and any other eukaryote with a completed genome sequence.",2011-07-18 +21342541,An efficient algorithmic approach for mass spectrometry-based disulfide connectivity determination using multi-ion analysis.,"

Background

Determining the disulfide (S-S) bond pattern in a protein is often crucial for understanding its structure and function. In recent research, mass spectrometry (MS) based analysis has been applied to this problem following protein digestion under both partial reduction and non-reduction conditions. However, this paradigm still awaits solutions to certain algorithmic problems fundamental amongst which is the efficient matching of an exponentially growing set of putative S-S bonded structural alternatives to the large amounts of experimental spectrometric data. Current methods circumvent this challenge primarily through simplifications, such as by assuming only the occurrence of certain ion-types (b-ions and y-ions) that predominate in the more popular dissociation methods, such as collision-induced dissociation (CID). Unfortunately, this can adversely impact the quality of results.

Method

We present an algorithmic approach to this problem that can, with high computational efficiency, analyze multiple ions types (a, b, bo, b*, c, x, y, yo, y*, and z) and deal with complex bonding topologies, such as inter/intra bonding involving more than two peptides. The proposed approach combines an approximation algorithm-based search formulation with data driven parameter estimation. This formulation considers only those regions of the search space where the correct solution resides with a high likelihood. Putative disulfide bonds thus obtained are finally combined in a globally consistent pattern to yield the overall disulfide bonding topology of the molecule. Additionally, each bond is associated with a confidence score, which aids in interpretation and assimilation of the results.

Results

The method was tested on nine different eukaryotic Glycosyltransferases possessing disulfide bonding topologies of varying complexity. Its performance was found to be characterized by high efficiency (in terms of time and the fraction of search space considered), sensitivity, specificity, and accuracy. The method was also compared with other techniques at the state-of-the-art. It was found to perform as well or better than the competing techniques. An implementation is available at: http://tintin.sfsu.edu/~whemurad/disulfidebond.

Conclusions

This research addresses some of the significant challenges in MS-based disulfide bond determination. To the best of our knowledge, this is the first algorithmic work that can consider multiple ion types in this problem setting while simultaneously ensuring polynomial time complexity and high accuracy of results.",2011-02-15 +21994223,An automatic method for CASP9 free modeling structure prediction assessment.,"

Motivation

Manual inspection has been applied to and is well accepted for assessing critical assessment of protein structure prediction (CASP) free modeling (FM) category predictions over the years. Such manual assessment requires expertise and significant time investment, yet has the problems of being subjective and unable to differentiate models of similar quality. It is beneficial to incorporate the ideas behind manual inspection to an automatic score system, which could provide objective and reproducible assessment of structure models.

Results

Inspired by our experience in CASP9 FM category assessment, we developed an automatic superimposition independent method named Quality Control Score (QCS) for structure prediction assessment. QCS captures both global and local structural features, with emphasis on global topology. We applied this method to all FM targets from CASP9, and overall the results showed the best agreement with Manual Inspection Scores among automatic prediction assessment methods previously applied in CASPs, such as Global Distance Test Total Score (GDT_TS) and Contact Score (CS). As one of the important components to guide our assessment of CASP9 FM category predictions, this method correlates well with other scoring methods and yet is able to reveal good-quality models that are missed by GDT_TS.

Availability

The script for QCS calculation is available at http://prodata.swmed.edu/QCS/.

Contact

grishin@chop.swmed.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-10-12 +21367869,MemLoci: predicting subcellular localization of membrane proteins in eukaryotes.,"

Motivation

Subcellular localization is a key feature in the process of functional annotation of both globular and membrane proteins. In the absence of experimental data, protein localization is inferred on the basis of annotation transfer upon sequence similarity search. However, predictive tools are necessary when the localization of homologs is not known. This is so particularly for membrane proteins. Furthermore, most of the available predictors of subcellular localization are specifically trained on globular proteins and poorly perform on membrane proteins.

Results

Here we develop MemLoci, a new support vector machine-based tool that discriminates three membrane protein localizations: plasma, internal and organelle membrane. When tested on an independent set, MemLoci outperforms existing methods, reaching an overall accuracy of 70% on predicting the location in the three membrane types, with a generalized correlation coefficient as high as 0.50.

Availability

The MemLoci server is freely available on the web at: http://mu2py.biocomp.unibo.it/memloci. Datasets described in the article can be downloaded at the same site.",2011-03-02 +21779367,High-resolution quantification of focal adhesion spatiotemporal dynamics in living cells.,"Focal adhesions (FAs) are macromolecular complexes that provide a linkage between the cell and its external environment. In a motile cell, focal adhesions change size and position to govern cell migration, through the dynamic processes of assembly and disassembly. To better understand the dynamic regulation of focal adhesions, we have developed an analysis system for the automated detection, tracking, and data extraction of these structures in living cells. This analysis system was used to quantify the dynamics of fluorescently tagged Paxillin and FAK in NIH 3T3 fibroblasts followed via Total Internal Reflection Fluorescence Microscopy (TIRF). High content time series included the size, shape, intensity, and position of every adhesion present in a living cell. These properties were followed over time, revealing adhesion lifetime and turnover rates, and segregation of properties into distinct zones. As a proof-of-concept, we show how a single point mutation in Paxillin at the Jun-kinase phosphorylation site Serine 178 changes FA size, distribution, and rate of assembly. This study provides a detailed, quantitative picture of FA spatiotemporal dynamics as well as a set of tools and methodologies for advancing our understanding of how focal adhesions are dynamically regulated in living cells. A full, open-source software implementation of this pipeline is provided at http://gomezlab.bme.unc.edu/tools.",2011-07-14 +21558154,Model building and intelligent acquisition with application to protein subcellular location classification.,"

Motivation

We present a framework and algorithms to intelligently acquire movies of protein subcellular location patterns by learning their models as they are being acquired, and simultaneously determining how many cells to acquire as well as how many frames to acquire per cell. This is motivated by the desire to minimize acquisition time and photobleaching, given the need to build such models for all proteins, in all cell types, under all conditions. Our key innovation is to build models during acquisition rather than as a post-processing step, thus allowing us to intelligently and automatically adapt the acquisition process given the model acquired.

Results

We validate our framework on protein subcellular location classification, and show that the combination of model building and intelligent acquisition results in time and storage savings without loss of classification accuracy, or alternatively, higher classification accuracy for the same total acquisition time.

Availability and implementation

The data and software used for this study will be made available upon publication at http://murphylab.web.cmu.edu/software and http://www.andrew.cmu.edu/user/jelenak/Software.

Contact

jelenak@cmu.edu.",2011-05-09 +22395766,Enabling high grayscale resolution displays and accurate response time measurements on conventional computers. ,"Display systems based on conventional computer graphics cards are capable of generating images with 8-bit gray level resolution. However, most experiments in vision research require displays with more than 12 bits of luminance resolution. Several solutions are available. Bit++ (1) and DataPixx (2) use the Digital Visual Interface (DVI) output from graphics cards and high resolution (14 or 16-bit) digital-to-analog converters to drive analog display devices. The VideoSwitcher (3) described here combines analog video signals from the red and blue channels of graphics cards with different weights using a passive resister network (4) and an active circuit to deliver identical video signals to the three channels of color monitors. The method provides an inexpensive way to enable high-resolution monochromatic displays using conventional graphics cards and analog monitors. It can also provide trigger signals that can be used to mark stimulus onsets, making it easy to synchronize visual displays with physiological recordings or response time measurements. Although computer keyboards and mice are frequently used in measuring response times (RT), the accuracy of these measurements is quite low. The RTbox is a specialized hardware and software solution for accurate RT measurements. Connected to the host computer through a USB connection, the driver of the RTbox is compatible with all conventional operating systems. It uses a microprocessor and high-resolution clock to record the identities and timing of button events, which are buffered until the host computer retrieves them. The recorded button events are not affected by potential timing uncertainties or biases associated with data transmission and processing in the host computer. The asynchronous storage greatly simplifies the design of user programs. Several methods are available to synchronize the clocks of the RTbox and the host computer. The RTbox can also receive external triggers and be used to measure RT with respect to external events. Both VideoSwitcher and RTbox are available for users to purchase. The relevant information and many demonstration programs can be found at http://lobes.usc.edu/.",2012-02-29 +21546392,Boulder ALignment Editor (ALE): a web-based RNA alignment tool.,"

Summary

The explosion of interest in non-coding RNAs, together with improvements in RNA X-ray crystallography, has led to a rapid increase in RNA structures at atomic resolution from 847 in 2005 to 1900 in 2010. The success of whole-genome sequencing has led to an explosive growth of unaligned homologous sequences. Consequently, there is a compelling and urgent need for user-friendly tools for producing structure-informed RNA alignments. Most alignment software considers the primary sequence alone; some specialized alignment software can also include Watson-Crick base pairs, but none adequately addresses the needs introduced by the rapid influx of both sequence and structural data. Therefore, we have developed the Boulder ALignment Editor (ALE), which is a web-based RNA alignment editor, designed for editing and assessing alignments using structural information. Some features of BoulderALE include the annotation and evaluation of an alignment based on isostericity of Watson-Crick and non-Watson-Crick base pairs, along with the collapsing (horizontally and vertically) of the alignment, while maintaining the ability to edit the alignment.

Availability

http://www.microbio.me/boulderale.",2011-05-05 +21349863,ACT: aggregation and correlation toolbox for analyses of genome tracks.,"

Unlabelled

We have implemented aggregation and correlation toolbox (ACT), an efficient, multifaceted toolbox for analyzing continuous signal and discrete region tracks from high-throughput genomic experiments, such as RNA-seq or ChIP-chip signal profiles from the ENCODE and modENCODE projects, or lists of single nucleotide polymorphisms from the 1000 genomes project. It is able to generate aggregate profiles of a given track around a set of specified anchor points, such as transcription start sites. It is also able to correlate related tracks and analyze them for saturation--i.e. how much of a certain feature is covered with each new succeeding experiment. The ACT site contains downloadable code in a variety of formats, interactive web servers (for use on small quantities of data), example datasets, documentation and a gallery of outputs. Here, we explain the components of the toolbox in more detail and apply them in various contexts.

Availability

ACT is available at http://act.gersteinlab.org

Contact

pi@gersteinlab.org.",2011-02-23 +21546400,Mixture models for analysis of the taxonomic composition of metagenomes.,"

Motivation

Inferring the taxonomic profile of a microbial community from a large collection of anonymous DNA sequencing reads is a challenging task in metagenomics. Because existing methods for taxonomic profiling of metagenomes are all based on the assignment of fragmentary sequences to phylogenetic categories, the accuracy of results largely depends on fragment length. This dependence complicates comparative analysis of data originating from different sequencing platforms or resulting from different preprocessing pipelines.

Results

We here introduce a new method for taxonomic profiling based on mixture modeling of the overall oligonucleotide distribution of a sample. Our results indicate that the mixture-based profiles compare well with taxonomic profiles obtained with other methods. However, in contrast to the existing methods, our approach shows a nearly constant profiling accuracy across all kinds of read lengths and it operates at an unrivaled speed.

Availability

A platform-independent implementation of the mixture modeling approach is available in terms of a MATLAB/Octave toolbox at http://gobics.de/peter/taxy. In addition, a prototypical implementation within an easy-to-use interactive tool for Windows can be downloaded.",2011-05-05 +22016062,"Multiplex sequencing of seven ocular herpes simplex virus type-1 genomes: phylogeny, sequence variability, and SNP distribution.","

Purpose

Little is known about the role of sequence variation in the pathology of HSV-1 keratitis virus. The goal was to show that a multiplex, high-throughput genome-sequencing approach is feasible for simultaneously sequencing seven HSV-1 ocular strains.

Methods

A genome sequencer was used to sequence the HSV-1 ocular isolates TFT401, 134, CJ311, CJ360, CJ394, CJ970, and OD4, in a single lane. Reads were mapped to the HSV-1 strain 17 reference genome by high-speed sequencing. ClustalW was used for alignment, and the Mega 4 package was used for phylogenetic analysis (www.megasoftware.net). Simplot was used to compare genetic variability and high-speed sequencing was used to identify SNPs (developed by Stuart Ray, Johns Hopkins University School of Medicine, Baltimore, MD, http://sray.med.som.jhml.edu/SCRoftware/simplot).

Results

Approximately 95% to 99% of the seven genomes were sequenced in a single lane with average coverage ranging from 224 to 1345. Phylogenetic analysis of the sequenced genome regions revealed at least three clades. Each strain had approximately 200 coding SNPs compared to strain 17, and these were evenly spaced along the genomes. Four genes were highly conserved, and six were more variable. Reduced coverage was obtained in the highly GC-rich terminal repeat regions.

Conclusions

Multiplex sequencing is a cost-effective way to obtain the genomic sequences of ocular HSV-1 isolates with sufficient coverage of the unique regions for genomic analysis. The number of SNPs and their distribution will be useful for analyzing the genetics of virulence, and the sequence data will be useful for studying HSV-1 evolution and for the design of structure-function studies.",2011-11-25 +21646342,"CSpritz: accurate prediction of protein disorder segments with annotation for homology, secondary structure and linear motifs.","CSpritz is a web server for the prediction of intrinsic protein disorder. It is a combination of previous Spritz with two novel orthogonal systems developed by our group (Punch and ESpritz). Punch is based on sequence and structural templates trained with support vector machines. ESpritz is an efficient single sequence method based on bidirectional recursive neural networks. Spritz was extended to filter predictions based on structural homologues. After extensive testing, predictions are combined by averaging their probabilities. The CSpritz website can elaborate single or multiple predictions for either short or long disorder. The server provides a global output page, for download and simultaneous statistics of all predictions. Links are provided to each individual protein where the amino acid sequence and disorder prediction are displayed along with statistics for the individual protein. As a novel feature, CSpritz provides information about structural homologues as well as secondary structure and short functional linear motifs in each disordered segment. Benchmarking was performed on the very recent CASP9 data, where CSpritz would have ranked consistently well with a Sw measure of 49.27 and AUC of 0.828. The server, together with help and methods pages including examples, are freely available at URL: http://protein.bio.unipd.it/cspritz/.",2011-06-06 +21982653,The National Registry of Genetically Triggered Thoracic Aortic Aneurysms and Cardiovascular Conditions (GenTAC): results from phase I and scientific opportunities in phase II.,"

Background

Genetically triggered thoracic aortic conditions (GenTACs) represent an important problem for patients and their families. Accordingly, the National Heart, Lung, and Blood Institute established the first phase of its national GenTAC Registry in 2006.

Enrollment and diagnoses

Between 2007 and 2010, 6 enrolling centers established the GenTAC I Registry consisting of 2,046 patients (Marfan syndrome 576 [28.2%], bicuspid aortic valve disease 504 [24.6%], aneurysm or dissection age <50 years 369 [18%], and others). Biologic samples for DNA analyses (white blood cells or saliva) are available in 97%, and stored plasma is available in 60% of enrollees.

Results

Initial scientific inquiry using the GenTAC Registry has included validation studies of genetic causes for aortic syndromes, potential usefulness of transforming growth factor beta (TGFB) blood levels in Marfan subjects, and current surgical approaches to ascending aortic conditions.

Future opportunity

The second phase of GenTAC will allow biannual follow-up of GenTAC I enrollees for up to 9 years, enrollment of an additional 1,500 subjects, further integration of imaging findings with clinical and genetic data through utilization of an imaging core laboratory, important validation of phenotype-genotype correlations through a phenotyping core laboratory, and integration of a scientific advisory committee to help define the full range and depth of the Registry's scientific capabilities. The registry resources are available to the external scientific community through an application process accessible at https://gentac.rti.org.",2011-10-01 +21634072,"Health, United States, 2010: With Special Feature on Death and Dying","Health, United States, 2010 is the 34th report on the health status of the Nation and is submitted by the Secretary of the Department of Health and Human Services to the President and the Congress of the United States in compliance with Section 308 of the Public Health Service Act. This report was compiled by the Centers for Disease Control and Prevention’s (CDC) National Center for Health Statistics (NCHS). The National Committee on Vital and Health Statistics served in a review capacity. The Health, United States series presents national trends in health statistics. The report contains a Chartbook that assesses the Nation’s health by presenting trends and current information on selected measures of morbidity, mortality, health care utilization, health risk factors, prevention, health insurance, and personal health care expenditures. This year’s Chartbook includes a special feature on death and dying. The report also contains 148 trend tables organized around four major subject areas: health status and determinants, health care utilization, health care resources, and health care expenditures. A companion product to Health, United States—Health, United States: In Brief—features information extracted from the full report. The complete report, In Brief, and related data products are available on the Health, United States website at: http://www.cdc.gov/nchs/hus.htm.",2011-06-03 +22357569,"Fall in peptic ulcer mortality associated with increased consultant input, prompt surgery and use of high dependency care identified through peer-review audit.","

Objectives

Patients with peptic ulceration continue to present to surgeons with complications of bleeding or perforation and to die under surgical care. This study sought to examine whether improved consultant input, timely interventions and perioperative care could reduce mortality from peptic ulcer.

Design

Prospective collection of peer-review mortality data using Scottish Audit of Surgical Mortality methodologies (http://www.SASM.org) and analysed using SPSS.

Setting

Secondary care; all hospitals in Scotland, UK, admitting surgical patients over 13 years (1994-2006).

Participants

42 736 patients admitted (38 782 operative and 3954 non-operative) with peptic ulcer disease; 1952 patients died (1338 operative and 614 non-operative deaths) with a diagnosis of peptic ulcer.

Primary and secondary outcome measures

Adverse events; consultant presence at operation, operations performed within 2 h and high dependency/intensive therapy unit (HDU/ITU) use.

Results

Annual mortality fell from 251 in 1994 to 83 in 2006, proportionately greater than the reduction in hospital admissions with peptic ulcer. Adverse events declined over time and were rare for non-operative patients. Consultant surgeon presence at operation rose from 40.0% in 1994 to 73.4% in 2006, operations performed within 2 h of admission from 10.3% in 1994 to 28.1% in 2006 and HDU/ITU use from 52.7% in 1994 to 84.4% in 2006. Consultant involvement (p=0.005) and HDU/ITU care (p=0.026) were significantly associated with a reduction in operative deaths.

Conclusion

Patients with complications of peptic ulceration admitted under surgical care should be offered consultant surgeon input, timely surgery and HDU/ITU care.",2012-02-22 +22359049,RNA-PAIRS: RNA probabilistic assignment of imino resonance shifts.,"The significant biological role of RNA has further highlighted the need for improving the accuracy, efficiency and the reach of methods for investigating RNA structure and function. Nuclear magnetic resonance (NMR) spectroscopy is vital to furthering the goals of RNA structural biology because of its distinctive capabilities. However, the dispersion pattern in the NMR spectra of RNA makes automated resonance assignment, a key step in NMR investigation of biomolecules, remarkably challenging. Herein we present RNA Probabilistic Assignment of Imino Resonance Shifts (RNA-PAIRS), a method for the automated assignment of RNA imino resonances with synchronized verification and correction of predicted secondary structure. RNA-PAIRS represents an advance in modeling the assignment paradigm because it seeds the probabilistic network for assignment with experimental NMR data, and predicted RNA secondary structure, simultaneously and from the start. Subsequently, RNA-PAIRS sets in motion a dynamic network that reverberates between predictions and experimental evidence in order to reconcile and rectify resonance assignments and secondary structure information. The procedure is halted when assignments and base-parings are deemed to be most consistent with observed crosspeaks. The current implementation of RNA-PAIRS uses an initial peak list derived from proton-nitrogen heteronuclear multiple quantum correlation ((1)H-(15)N 2D HMQC) and proton-proton nuclear Overhauser enhancement spectroscopy ((1)H-(1)H 2D NOESY) experiments. We have evaluated the performance of RNA-PAIRS by using it to analyze NMR datasets from 26 previously studied RNAs, including a 111-nucleotide complex. For moderately sized RNA molecules, and over a range of comparatively complex structural motifs, the average assignment accuracy exceeds 90%, while the average base pair prediction accuracy exceeded 93%. RNA-PAIRS yielded accurate assignments and base pairings consistent with imino resonances for a majority of the NMR resonances, even when the initial predictions are only modestly accurate. RNA-PAIRS is available as a public web-server at http://pine.nmrfam.wisc.edu/RNA/.",2012-02-23 +22812450,"The TransEurope FootRace Project: longitudinal data acquisition in a cluster randomized mobile MRI observational cohort study on 44 endurance runners at a 64-stage 4,486 km transcontinental ultramarathon.","

Background

The TransEurope FootRace 2009 (TEFR09) was one of the longest transcontinental ultramarathons with an extreme endurance physical load of running nearly 4,500 km in 64 days. The aim of this study was to assess the wide spectrum of adaptive responses in humans regarding the different tissues, organs and functional systems being exposed to such chronic physical endurance load with limited time for regeneration and resulting negative energy balance. A detailed description of the TEFR project and its implemented measuring methods in relation to the hypotheses are presented.

Methods

The most important research tool was a 1.5 Tesla magnetic resonance imaging (MRI) scanner mounted on a mobile unit following the ultra runners from stage to stage each day. Forty-four study volunteers (67% of the participants) were cluster randomized into two groups for MRI measurements (22 subjects each) according to the project protocol with its different research modules: musculoskeletal system, brain and pain perception, cardiovascular system, body composition, and oxidative stress and inflammation. Complementary to the diverse daily mobile MR-measurements on different topics (muscle and joint MRI, T2*-mapping of cartilage, MR-spectroscopy of muscles, functional MRI of the brain, cardiac and vascular cine MRI, whole body MRI) other methods were also used: ice-water pain test, psychometric questionnaires, bioelectrical impedance analysis (BIA), skinfold thickness and limb circumference measurements, daily urine samples, periodic blood samples and electrocardiograms (ECG).

Results

Thirty volunteers (68%) reached the finish line at North Cape. The mean total race speed was 8.35 km/hour. Finishers invested 552 hours in total. The completion rate for planned MRI investigations was more than 95%: 741 MR-examinations with 2,637 MRI sequences (more than 200,000 picture data), 5,720 urine samples, 244 blood samples, 205 ECG, 1,018 BIA, 539 anthropological measurements and 150 psychological questionnaires.

Conclusions

This study demonstrates the feasibility of conducting a trial based centrally on mobile MR-measurements which were performed during ten weeks while crossing an entire continent. This article is the reference for contemporary result reports on the different scientific topics of the TEFR project, which may reveal additional new knowledge on the physiological and pathological processes of the functional systems on the organ, cellular and sub-cellular level at the limits of stress and strain of the human body. Please see related articles: http://www.biomedcentral.com/1741-7015/10/76 and http://www.biomedcentral.com/1741-7015/10/77.",2012-07-19 +21627843,Transcriptome map of plant mitochondria reveals islands of unexpected transcribed regions.,"

Background

Plant mitochondria contain a relatively large amount of genetic information, suggesting that their functional regulation may not be as straightforward as that of metazoans. We used a genomic tiling array to draw a transcriptomic atlas of Oryza sativa japonica (rice) mitochondria, which was predicted to be approximately 490-kb long.

Results

Whereas statistical analysis verified the transcription of all previously known functional genes such as the ones related to oxidative phosphorylation, a similar extent of RNA expression was frequently observed in the inter-genic regions where none of the previously annotated genes are located. The newly identified open reading frames (ORFs) predicted in these transcribed inter-genic regions were generally not conserved among flowering plant species, suggesting that these ORFs did not play a role in mitochondrial principal functions. We also identified two partial fragments of retrotransposon sequences as being transcribed in rice mitochondria.

Conclusion

The present study indicated the previously unexpected complexity of plant mitochondrial RNA metabolism. Our transcriptomic data (Oryza sativa Mitochondrial rna Expression Server: OsMES) is publicly accessible at [http://bioinf.mind.meiji.ac.jp/cgi-bin/gbrowse/OsMes/#search].",2011-06-01 +21564082,The endocannabinoid system in the rat dorsolateral periaqueductal grey mediates fear-conditioned analgesia and controls fear expression in the presence of nociceptive tone.,"

Background and purpose

Endocannabinoids in the midbrain periaqueductal grey (PAG) modulate nociception and unconditioned stress-induced analgesia; however, their role in fear-conditioned analgesia (FCA) has not been examined. The present study examined the role of the endocannabinoid system in the dorsolateral (dl) PAG in formalin-evoked nociceptive behaviour, conditioned fear and FCA in rats.

Experimental approach

Rats received intra-dlPAG administration of the CB(1) receptor antagonist/inverse agonist rimonabant, or vehicle, before re-exposure to a context paired 24 h previously with foot shock. Formalin-evoked nociceptive behaviour and fear-related behaviours (freezing and 22 kHz ultrasonic vocalization) were assessed. In a separate cohort, levels of endocannabinoids [2-arachidonoyl glycerol (2-AG) and N-arachidonoyl ethanolamide (anandamide; AEA)] and the related N-acylethanolamines (NAEs) [N-palmitoyl ethanolamide (PEA) and N-oleoyl ethanolamide (OEA)] were measured in dlPAG tissue following re-exposure to conditioned context in the presence or absence of formalin-evoked nociceptive tone.

Key results

Re-exposure of rats to the context previously associated with foot shock resulted in FCA. Intra-dlPAG administration of rimonabant significantly attenuated FCA and fear-related behaviours expressed in the presence of nociceptive tone. Conditioned fear without formalin-evoked nociceptive tone was associated with increased levels of 2-AG, AEA, PEA and OEA in the dlPAG. FCA was specifically associated with an increase in AEA levels in the dlPAG.

Conclusions and implications

Conditioned fear to context mobilises endocannabinoids and NAEs in the dlPAG. These data support a role for endocannabinoids in the dlPAG in mediating the potent suppression of pain responding which occurs during exposure to conditioned aversive contexts.

Linked articles

This article is part of a themed section on Cannabinoids in Biology and Medicine. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2012.165.issue-8. To view Part I of Cannabinoids in Biology and Medicine visit http://dx.doi.org/10.1111/bph.2011.163.issue-7.",2012-04-01 +21718301,The GPCR-associated sorting protein 1 regulates ligand-induced down-regulation of GPR55.,"

Background and purpose

Many GPCRs, including the CB(1) cannabinoid receptor, are down-regulated following prolonged agonist exposure by interacting with the GPCR-associated sorting protein-1 (GASP-1). The CB(1) receptor antagonist rimonabant has also recently been described to be an agonist at GPR55, a cannabinoid-related receptor. Here we investigated the post-endocytic properties of GPR55 after agonist exposure and tested whether GASP-1 is involved in this process.

Experimental approach

We evaluated the direct protein-protein interaction of GPR55 with GASP-1 using (i) GST-binding assays and (ii) co-immunoprecipitation assays in GPR55-HEK293 cells with endogenous GASP-1 expression. We further tested the internalization, recycling and degradation of GPR55 using confocal fluorescence microscopy and biotinylation assays in the presence and absence of GASP-1 (lentiviral small hairpin RNA knockdown of GASP-1) under prolonged agonist [rimonabant (RIM), lysophosphatidylinositol (LPI)] stimulation.

Key results

We showed that the prolonged activation of GPR55 with rimonabant or LPI down-regulates GPR55 via GASP-1. GASP-1 binds to GPR55 in vitro, and this interaction was required for targeting GPR55 for degradation. Disrupting the GPR55-GASP-1 interaction prevented post-endocytic receptor degradation, and thereby allowed receptor recycling.

Conclusion and implications

These data implicate GASP-1 as an important regulator of ligand-mediated down-regulation of GPR55. By identifying GASP-1 as a key regulator of the trafficking and, by extension, functional expression of GPR55, we may be one step closer to gaining a better understanding of this receptor in response to cannabinoid drugs.

Linked articles

This article is part of a themed section on Cannabinoids in Biology and Medicine. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2012.165.issue-8. To view Part I of Cannabinoids in Biology and Medicine visit http://dx.doi.org/10.1111/bph.2011.163.issue-7.",2012-04-01 +24159459,"Estimation of HIV Seroprevalence in Colorectal Hospitals by Questionnaire Survey in Korea, 2002-2007.","

Objectives

The incidence of anal disease is higher among persons with human immunodeficiency virus (HIV) infection than among the general population. We surveyed the status of seroprevalence in colorectal hospitals in Korea.

Methods

The survey was conducted in colorectal hospitals in Korea from November to December 2008. The questionnaire was comprised of six topics about the status of HIV testing in colorectal hospitals. We gathered the data by website (http://hivqa.nih.go.kr/risk) or fax.

Results

Among 774 colorectal hospitals contacted, 109 (14%) hospitals participated in the survey. Among these, 48 hospitals (44%) performed HIV tests in their own hospitals and 11 (23%) took HIV testing by rapid method. The main reason for recommending an HIV test was surgical operation (54%) followed by endoscope (11%) and health checkup (9%). The annual number of HIV tests increased from 58,647 (at 21 hospitals) in 2002 to 246,709 (at 58 hospitals) in 2007. HIV seroprevalence was >3.0 per 10,000 individuals during 2002-2005, decreased to 2.2 per 10,000 individuals in 2006 and rose to 2.8 per 10,000 individuals in 2007.

Conclusions

HIV seroprevalence of colorectal hospitals was more than twice that of general hospitals in Korea. HIV surveillance systems based on colorectal hospitals for HIV/AIDS transmission prevention by early HIV diagnosis are needed.",2011-08-04 +21494193,"The efficacy of systematic active conservative treatment for patients with severe sciatica: a single-blind, randomized, clinical, controlled trial.","

Study design

Prospective single-blind, randomized, clinical, controlled trial.

Objective

To evaluate the efficacy of active conservative treatment and to compare 2 active conservative treatment programs for patients with severe sciatica.

Summary of background data

Reviews have demonstrated little or no efficacy for passive conservative treatment modalities in patients suffering from sciatica. The results for surgery are conflicting. Cohort studies have shown excellent results for active treatment modalities in patients with sciatica.

Methods

One hundred eighty-one consecutive patients with radicular pain below the knee were examined at the baseline, at 8 weeks, and at 1 year after the treatment. Participants were randomized into 2 groups: (1) symptom-guided exercises + information + advice to stay active and (2) sham exercises + information + advice to stay active. Symptom-guided exercises consisted of a variety of back-related exercises given in accordance with a written algorithm in which symptoms or response to exercises determined the exercises given (http://www.sygehuslillebaelt.dk/wm345075, click exercises). Sham exercises were optional, designed to increase general blood circulation, and had no targeted effect on the back. The information was comprehensive and included anatomy, pathogenesis, and how discs heal without surgery. The advice included encouragement to stay as active as possible but to reduce activity if leg pain increased. The use of medication was optional, but only paracetamol and nonsteroidal anti-inflammatory drugs were recommended.

Results

A mean of 4.8 treatment sessions were provided. All patients experienced statistically significant and clinically important improvements in global assessment, functional status, pain, vocational status, and clinical findings. The symptom-guided exercise group improved significantly more than the sham exercise group in most outcomes.

Conclusion

Active conservative treatment was effective for patients who had symptoms and clinical findings that would normally qualify them for surgery. Although participating patients had greater faith in the sham exercises before treatment, the symptom-guided exercises were superior for most outcomes.",2012-04-01 +22333114,CodingMotif: exact determination of overrepresented nucleotide motifs in coding sequences.,"

Background

It has been increasingly appreciated that coding sequences harbor regulatory sequence motifs in addition to encoding for protein. These sequence motifs are expected to be overrepresented in nucleotide sequences bound by a common protein or small RNA. However, detecting overrepresented motifs has been difficult because of interference by constraints at the protein level. Sampling-based approaches to solve this problem based on codon-shuffling have been limited to exploring only an infinitesimal fraction of the sequence space and by their use of parametric approximations.

Results

We present a novel O(N(log N)2)-time algorithm, CodingMotif, to identify nucleotide-level motifs of unusual copy number in protein-coding regions. Using a new dynamic programming algorithm we are able to exhaustively calculate the distribution of the number of occurrences of a motif over all possible coding sequences that encode the same amino acid sequence, given a background model for codon usage and dinucleotide biases. Our method takes advantage of the sparseness of loci where a given motif can occur, greatly speeding up the required convolution calculations. Knowledge of the distribution allows one to assess the exact non-parametric p-value of whether a given motif is over- or under- represented. We demonstrate that our method identifies known functional motifs more accurately than sampling and parametric-based approaches in a variety of coding datasets of various size, including ChIP-seq data for the transcription factors NRSF and GABP.

Conclusions

CodingMotif provides a theoretically and empirically-demonstrated advance for the detection of motifs overrepresented in coding sequences. We expect CodingMotif to be useful for identifying motifs in functional genomic datasets such as DNA-protein binding, RNA-protein binding, or microRNA-RNA binding within coding regions. A software implementation is available at http://bioinformatics.bc.edu/chuanglab/codingmotif.tar.",2012-02-14 +22082126,Correlated mutations via regularized multinomial regression.,"

Background

In addition to sequence conservation, protein multiple sequence alignments contain evolutionary signal in the form of correlated variation among amino acid positions. This signal indicates positions in the sequence that influence each other, and can be applied for the prediction of intra- or intermolecular contacts. Although various approaches exist for the detection of such correlated mutations, in general these methods utilize only pairwise correlations. Hence, they tend to conflate direct and indirect dependencies.

Results

We propose RMRCM, a method for Regularized Multinomial Regression in order to obtain Correlated Mutations from protein multiple sequence alignments. Importantly, our method is not restricted to pairwise (column-column) comparisons only, but takes into account the network nature of relationships between protein residues in order to predict residue-residue contacts. The use of regularization ensures that the number of predicted links between columns in the multiple sequence alignment remains limited, preventing overprediction. Using simulated datasets we analyzed the performance of our approach in predicting residue-residue contacts, and studied how it is influenced by various types of noise. For various biological datasets, validation with protein structure data indicates a good performance of the proposed algorithm for the prediction of residue-residue contacts, in comparison to previous results. RMRCM can also be applied to predict interactions (in addition to only predicting interaction sites or contact sites), as demonstrated by predicting PDZ-peptide interactions.

Conclusions

A novel method is presented, which uses regularized multinomial regression in order to obtain correlated mutations from protein multiple sequence alignments.

Availability

R-code of our implementation is available via http://www.ab.wur.nl/rmrcm.",2011-11-14 +21791534,A dynamic programming algorithm for identification of triplex-forming sequences.,"

Motivation

Current methods for identification of potential triplex-forming sequences in genomes and similar sequence sets rely primarily on detecting homopurine and homopyrimidine tracts. Procedures capable of detecting sequences supporting imperfect, but structurally feasible intramolecular triplex structures are needed for better sequence analysis.

Results

We modified an algorithm for detection of approximate palindromes, so as to account for the special nature of triplex DNA structures. From available literature, we conclude that approximate triplexes tolerate two classes of errors. One, analogical to mismatches in duplex DNA, involves nucleotides in triplets that do not readily form Hoogsteen bonds. The other class involves geometrically incompatible neighboring triplets hindering proper alignment of strands for optimal hydrogen bonding and stacking. We tested the statistical properties of the algorithm, as well as its correctness when confronted with known triplex sequences. The proposed algorithm satisfactorily detects sequences with intramolecular triplex-forming potential. Its complexity is directly comparable to palindrome searching.

Availability

Our implementation of the algorithm is available at http://www.fi.muni.cz/lexa/triplex as source code and a web-based search tool. The source code compiles into a library providing searching capability to other programs, as well as into a stand-alone command-line application based on this library.

Contact

lexa@fi.muni.cz

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-26 +21431536,Two dimensional gel electrophoresis analysis of mesenchymal stem cells.,"Proteomic analysis is a powerful tool to follow physiological modifications and phenotypes of mesenchymal stem cells (MSC). This approach generates informative data on expression and post-translational modifications of proteins which are of interest to assess the true potential of MSC in regenerative medicine. No matter the technologies used, proteomic analysis is always a challenge as the proteome is extremely diverse (in terms of constituents and concentrations), is changing with time, and is highly sensitive to pre-analytical conditions. In the framework of a European project (GENOSTEM http://www.genostem.org/), we have set up a multisite two dimensional gel electrophoresis (2DE) proteomic comparison of MSC. The goal is to compare cells from different origins, to follow their differentiation and to ultimately define a specific MSC proteomic signature. One important initial task is the optimization of 2DE protocols such that they are robust enough to be used in a multisite project. In this chapter, we detail these protocols which can be used not only for MSC but also for other cells in culture.",2011-01-01 +21791532,"Alignment of distantly related protein structures: algorithm, bound and implications to homology modeling.","

Motivation

Building an accurate alignment of a large set of distantly related protein structures is still very challenging.

Results

This article presents a novel method 3DCOMB that can generate a multiple structure alignment (MSA) with not only as many conserved cores as possible, but also high-quality pairwise alignments. 3DCOMB is unique in that it makes use of both local and global structure environments, combined by a statistical learning method, to accurately identify highly similar fragment blocks (HSFBs) among all proteins to be aligned. By extending the alignments of these HSFBs, 3DCOMB can quickly generate an accurate MSA without using progressive alignment. 3DCOMB significantly excels others in aligning distantly related proteins. 3DCOMB can also generate correct alignments for functionally similar regions among proteins of very different structures while many other MSA tools fail. 3DCOMB is useful for many real-world applications. In particular, it enables us to find out that there is still large improvement room for multiple template homology modeling while several other MSA tools fail to do so.

Availability

3DCOMB is available at http://ttic.uchicago.edu/~jinbo/software.htm.

Contact

jinboxu@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-26 +21622961,CoPub update: CoPub 5.0 a text mining system to answer biological questions.,"In this article, we present CoPub 5.0, a publicly available text mining system, which uses Medline abstracts to calculate robust statistics for keyword co-occurrences. CoPub was initially developed for the analysis of microarray data, but we broadened the scope by implementing new technology and new thesauri. In CoPub 5.0, we integrated existing CoPub technology with new features, and provided a new advanced interface, which can be used to answer a variety of biological questions. CoPub 5.0 allows searching for keywords of interest and its relations to curated thesauri and provides highlighting and sorting mechanisms, using its statistics, to retrieve the most important abstracts in which the terms co-occur. It also provides a way to search for indirect relations between genes, drugs, pathways and diseases, following an ABC principle, in which A and C have no direct connection but are connected via shared B intermediates. With CoPub 5.0, it is possible to create, annotate and analyze networks using the layout and highlight options of Cytoscape web, allowing for literature based systems biology. Finally, operations of the CoPub 5.0 Web service enable to implement the CoPub technology in bioinformatics workflows. CoPub 5.0 can be accessed through the CoPub portal http://www.copub.org.",2011-05-27 +21609440,ParaHaplo 3.0: A program package for imputation and a haplotype-based whole-genome association study using hybrid parallel computing.,"

Background

Use of missing genotype imputations and haplotype reconstructions are valuable in genome-wide association studies (GWASs). By modeling the patterns of linkage disequilibrium in a reference panel, genotypes not directly measured in the study samples can be imputed and used for GWASs. Since millions of single nucleotide polymorphisms need to be imputed in a GWAS, faster methods for genotype imputation and haplotype reconstruction are required.

Results

We developed a program package for parallel computation of genotype imputation and haplotype reconstruction. Our program package, ParaHaplo 3.0, is intended for use in workstation clusters using the Intel Message Passing Interface. We compared the performance of ParaHaplo 3.0 on the Japanese in Tokyo, Japan and Han Chinese in Beijing, and Chinese in the HapMap dataset. A parallel version of ParaHaplo 3.0 can conduct genotype imputation 20 times faster than a non-parallel version of ParaHaplo.

Conclusions

ParaHaplo 3.0 is an invaluable tool for conducting haplotype-based GWASs. The need for faster genotype imputation and haplotype reconstruction using parallel computing will become increasingly important as the data sizes of such projects continue to increase. ParaHaplo executable binaries and program sources are available at http://en.sourceforge.jp/projects/parallelgwas/releases/.",2011-05-24 +21790786,Invasive species research to meet the needs of resource management and planning.,"As zebra mussels (Dreissena polymorpha) continue to spread among inland lakes of the United States and Canada, there is growing interest from professionals, citizens, and other stakeholders to know which lakes are likely to be colonized by zebra mussels. Thus, we developed a classification of lake suitability for zebra mussels on the basis of measured or estimated concentrations of dissolved calcium in lake water and applied the classification to >11,500 lakes in Wisconsin and the Upper Peninsula of Michigan. The majority of lakes (58%) were classified as unsuitable (<10 mg/L Ca) for survival and reproduction of zebra mussels, 27% were identified as suitable (≥21 mg/L Ca), and 15% were classified as borderline suitable (≥10 and <21 mg/L Ca). Of the 77 inland lakes with confirmed zebra mussel records for which data on dissolved calcium were available, our method classified 74 as suitable and 3 as borderline suitable. To communicate this lake-specific suitability information and to help prioritize regional efforts to monitor and prevent the expansion of zebra mussels and other invasive species, we developed a web-based interface (available from http://www.aissmartprevention.wisc.edu/). Although we are still uncertain of how access to suitability information ultimately affects decision making, we believe this is a useful case study of building communication channels among researchers, practitioners, and the public.",2011-07-25 +21370081,Statistical analysis principles for Omics data.,"In Omics experiments, typically thousands of hypotheses are tested simultaneously, each based on very few independent replicates. Traditional tests like the t-test were shown to perform poorly with this new type of data. Furthermore, simultaneous consideration of many hypotheses, each prone to a decision error, requires powerful adjustments for this multiple testing situation. After a general introduction to statistical testing, we present the moderated t-statistic, the SAM statistic, and the RankProduct statistic which have been developed to evaluate hypotheses in typical Omics experiments. We also provide an introduction to the multiple testing problem and discuss some state-of-the-art procedures to address this issue. The presented test statistics are subjected to a comparative analysis of a microarray experiment comparing tissue samples of two groups of tumors. All calculations can be done using the freely available statistical software R. Accompanying, commented code is available at: http://www.meduniwien.ac.at/msi/biometrie/MIMB.",2011-01-01 +21689482,MetaDBSite: a meta approach to improve protein DNA-binding sites prediction.,"

Background

Protein-DNA interactions play an important role in many fundamental biological activities such as DNA replication, transcription and repair. Identification of amino acid residues involved in DNA binding site is critical for understanding of the mechanism of gene regulations. In the last decade, there have been a number of computational approaches developed to predict protein-DNA binding sites based on protein sequence and/or structural information.

Results

In this article, we present metaDBSite, a meta web server to predict DNA-binding residues for DNA-binding proteins. MetaDBSite integrates the prediction results from six available online web servers: DISIS, DNABindR, BindN, BindN-rf, DP-Bind and DBS-PRED and it solely uses sequence information of proteins. A large dataset of DNA-binding proteins is constructed from the Protein Data Bank and it serves as a gold-standard benchmark to evaluate the metaDBSite approach and the other six predictors.

Conclusions

The comparison results show that metaDBSite outperforms single individual approach. We believe that metaDBSite will become a useful and integrative tool for protein DNA-binding residues prediction. The MetaDBSite web-server is freely available at http://projects.biotec.tu-dresden.de/metadbsite/ and http://sysbio.zju.edu.cn/metadbsite.",2011-06-20 +21592312,Field guide to next-generation DNA sequencers.,"The diversity of available 2(nd) and 3(rd) generation DNA sequencing platforms is increasing rapidly. Costs for these systems range from < $100,000 to more than $1,000,000, with instrument run times ranging from minutes to weeks. Extensive trade-offs exist among these platforms. I summarize the major characteristics of each commercially available platform to enable direct comparisons. In terms of cost per megabase (Mb) of sequence, the Illumina and SOLiD platforms are clearly superior (≤ $0.10/Mb vs. > $10/Mb for 454 and some Ion Torrent chips). In terms of cost per nonmultiplexed sample and instrument run time, the Pacific Biosciences and Ion Torrent platforms excel, with the 454 GS Junior and Illumina MiSeq also notable in this regard. All platforms allow multiplexing of samples, but details of library preparation, experimental design and data analysis can constrain the options. The wide range of characteristics among available platforms provides opportunities both to conduct groundbreaking studies and to waste money on scales that were previously infeasible. Thus, careful thought about the desired characteristics of these systems is warranted before purchasing or using any of them. Updated information from this guide will be maintained at: http://dna.uga.edu/ and http://tomato.biol.trinity.edu/blog/.",2011-05-19 +22054122,AIGO: towards a unified framework for the analysis and the inter-comparison of GO functional annotations.,"

Background

In response to the rapid growth of available genome sequences, efforts have been made to develop automatic inference methods to functionally characterize them. Pipelines that infer functional annotation are now routinely used to produce new annotations at a genome scale and for a broad variety of species. These pipelines differ widely in their inference algorithms, confidence thresholds and data sources for reasoning. This heterogeneity makes a comparison of the relative merits of each approach extremely complex. The evaluation of the quality of the resultant annotations is also challenging given there is often no existing gold-standard against which to evaluate precision and recall.

Results

In this paper, we present a pragmatic approach to the study of functional annotations. An ensemble of 12 metrics, describing various aspects of functional annotations, is defined and implemented in a unified framework, which facilitates their systematic analysis and inter-comparison. The use of this framework is demonstrated on three illustrative examples: analysing the outputs of state-of-the-art inference pipelines, comparing electronic versus manual annotation methods, and monitoring the evolution of publicly available functional annotations. The framework is part of the AIGO library (http://code.google.com/p/aigo) for the Analysis and the Inter-comparison of the products of Gene Ontology (GO) annotation pipelines. The AIGO library also provides functionalities to easily load, analyse, manipulate and compare functional annotations and also to plot and export the results of the analysis in various formats.

Conclusions

This work is a step toward developing a unified framework for the systematic study of GO functional annotations. This framework has been designed so that new metrics on GO functional annotations can be added in a very straightforward way.",2011-11-03 +21332983,"RSpred, a set of Hidden Markov Models to detect and classify the RIFIN and STEVOR proteins of Plasmodium falciparum.","

Background

Many parasites use multicopy protein families to avoid their host's immune system through a strategy called antigenic variation. RIFIN and STEVOR proteins are variable surface antigens uniquely found in the malaria parasites Plasmodium falciparum and P. reichenowi. Although these two protein families are different, they have more similarity to each other than to any other proteins described to date. As a result, they have been grouped together in one Pfam domain. However, a recent study has described the sub-division of the RIFIN protein family into several functionally distinct groups. These sub-groups require phylogenetic analysis to sort out, which is not practical for large-scale projects, such as the sequencing of patient isolates and meta-genomic analysis.

Results

We have manually curated the rif and stevor gene repertoires of two Plasmodium falciparum genomes, isolates DD2 and HB3. We have identified 25% of mis-annotated and ~30 missing rif and stevor genes. Using these data sets, as well as sequences from the well curated reference genome (isolate 3D7) and field isolate data from Uniprot, we have developed a tool named RSpred. The tool, based on a set of hidden Markov models and an evaluation program, automatically identifies STEVOR and RIFIN sequences as well as the sub-groups: A-RIFIN, B-RIFIN, B1-RIFIN and B2-RIFIN. In addition to these groups, we distinguish a small subset of STEVOR proteins that we named STEVOR-like, as they either differ remarkably from typical STEVOR proteins or are too fragmented to reach a high enough score. When compared to Pfam and TIGRFAMs, RSpred proves to be a more robust and more sensitive method. We have applied RSpred to the proteomes of several P. falciparum strains, P. reichenowi, P. vivax, P. knowlesi and the rodent malaria species. All groups were found in the P. falciparum strains, and also in the P. reichenowi parasite, whereas none were predicted in the other species.

Conclusions

We have generated a tool for the sorting of RIFIN and STEVOR proteins, large antigenic variant protein groups, into homogeneous sub-families. Assigning functions to such protein families requires their subdivision into meaningful groups such as we have shown for the RIFIN protein family. RSpred removes the need for complicated and time consuming phylogenetic analysis methods. It will benefit both research groups sequencing whole genomes as well as others working with field isolates. RSpred is freely accessible via http://www.ifm.liu.se/bioinfo/.",2011-02-18 +22190692,ESpritz: accurate and fast prediction of protein disorder.,"

Motivation

Intrinsically disordered regions are key for the function of numerous proteins, and the scant available experimental annotations suggest the existence of different disorder flavors. While efficient predictions are required to annotate entire genomes, most existing methods require sequence profiles for disorder prediction, making them cumbersome for high-throughput applications.

Results

In this work, we present an ensemble of protein disorder predictors called ESpritz. These are based on bidirectional recursive neural networks and trained on three different flavors of disorder, including a novel NMR flexibility predictor. ESpritz can produce fast and accurate sequence-only predictions, annotating entire genomes in the order of hours on a single processor core. Alternatively, a slower but slightly more accurate ESpritz variant using sequence profiles can be used for applications requiring maximum performance. Two levels of prediction confidence allow either to maximize reasonable disorder detection or to limit expected false positives to 5%. ESpritz performs consistently well on the recent CASP9 data, reaching a S(w) measure of 54.82 and area under the receiver operator curve of 0.856. The fast predictor is four orders of magnitude faster and remains better than most publicly available CASP9 methods, making it ideal for genomic scale predictions.

Conclusions

ESpritz predicts three flavors of disorder at two distinct false positive rates, either with a fast or slower and slightly more accurate approach. Given its state-of-the-art performance, it can be especially useful for high-throughput applications.

Availability

Both a web server for high-throughput analysis and a Linux executable version of ESpritz are available from: http://protein.bio.unipd.it/espritz/.",2011-12-20 +21908540,Revisiting the negative example sampling problem for predicting protein-protein interactions.,"

Motivation

A number of computational methods have been proposed that predict protein-protein interactions (PPIs) based on protein sequence features. Since the number of potential non-interacting protein pairs (negative PPIs) is very high both in absolute terms and in comparison to that of interacting protein pairs (positive PPIs), computational prediction methods rely upon subsets of negative PPIs for training and validation. Hence, the need arises for subset sampling for negative PPIs.

Results

We clarify that there are two fundamentally different types of subset sampling for negative PPIs. One is subset sampling for cross-validated testing, where one desires unbiased subsets so that predictive performance estimated with them can be safely assumed to generalize to the population level. The other is subset sampling for training, where one desires the subsets that best train predictive algorithms, even if these subsets are biased. We show that confusion between these two fundamentally different types of subset sampling led one study recently published in Bioinformatics to the erroneous conclusion that predictive algorithms based on protein sequence features are hardly better than random in predicting PPIs. Rather, both protein sequence features and the 'hubbiness' of interacting proteins contribute to effective prediction of PPIs. We provide guidance for appropriate use of random versus balanced sampling.

Availability

The datasets used for this study are available at http://www.marcottelab.org/PPINegativeDataSampling.

Contact

yungki@mail.utexas.edu; marcotte@icmb.utexas.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-09 +21372083,A probabilistic model of nuclear import of proteins.,"

Motivation

Nucleo-cytoplasmic trafficking of proteins is a core regulatory process that sustains the integrity of the nuclear space of eukaryotic cells via an interplay between numerous factors. Despite progress on experimentally characterizing a number of nuclear localization signals, their presence alone remains an unreliable indicator of actual translocation.

Results

This article introduces a probabilistic model that explicitly recognizes a variety of nuclear localization signals, and integrates relevant amino acid sequence and interaction data for any candidate nuclear protein. In particular, we develop and incorporate scoring functions based on distinct classes of classical nuclear localization signals. Our empirical results show that the model accurately predicts whether a protein is imported into the nucleus, surpassing the classification accuracy of similar predictors when evaluated on the mouse and yeast proteomes (area under the receiver operator characteristic curve of 0.84 and 0.80, respectively). The model also predicts the sequence position of a nuclear localization signal and whether it interacts with importin-α.

Availability

http://pprowler.itee.uq.edu.au/NucImport",2011-03-03 +21893518,SiTaR: a novel tool for transcription factor binding site prediction.,"

Motivation

Prediction of transcription factor binding sites (TFBSs) is crucial for promoter modeling and network inference. Quality of the predictions is spoiled by numerous false positives, which persist as the main problem for all presently available TFBS search methods.

Results

We suggest a novel approach, which is alternative to widely used position weight matrices (PWMs) and Hidden Markov Models. Each motif of the input set is used as a search template to scan a query sequence. Found motifs are assigned scores depending on the non-randomness of the motif's occurrence, the number of matching searching motifs and the number of mismatches. The non-randomness is estimated by comparison of observed numbers of matching motifs with those predicted to occur by chance. The latter can be calculated given the base compositions of the motif and the query sequence. The method does not require preliminary alignment of the input motifs, hence avoiding uncertainties introduced by the alignment procedure. In comparison with PWM-based tools, our method demonstrates higher precision by the same sensitivity and specificity. It also tends to outperform methods combining pattern and PWM search. Most important, it allows reducing the number of false positive predictions significantly.

Availability

The method is implemented in a tool called SiTaR (Site Tracking and Recognition) and is available at http://sbi.hki-jena.de/sitar/index.php.

Contact

ekaterina.shelest@hki-jena.de

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-04 +21896508,Correlated evolution of transcription factors and their binding sites.,"

Motivation

The interaction between transcription factor (TF) and transcription factor binding site (TFBS) is essential for gene regulation. Mutation in either the TF or the TFBS may weaken their interaction and thus result in abnormalities. To maintain such vital interaction, a mutation in one of the interacting partners might be compensated by a corresponding mutation in its binding partner during the course of evolution. Confirming this co-evolutionary relationship will guide us in designing protein sequences to target a specific DNA sequence or in predicting TFBS for poorly studied proteins, or even correcting and rescuing disease mutations in clinical applications.

Results

Based on six, publicly available, experimentally validated TF-TFBS binding datasets for the basic Helix-Loop-Helix (bHLH) family, Homeo family, High-Mobility Group (HMG) family and Transient Receptor Potential channels (TRP) family, we showed that the evolutions of the TFs and their TFBSs are significantly correlated across eukaryotes. We further developed a mutual information-based method to identify co-evolved protein residues and DNA bases. This research sheds light on the dynamic relationship between TF and TFBS during their evolution. The same principle and strategy can be applied to co-evolutionary studies on protein-DNA interactions in other protein families.

Availability

All the datasets, scripts and other related files have been made freely available at: http://jjwanglab.org/co-evo.

Contact

junwen@uw.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-06 +21356002,Sequencing and assembly of low copy and genic regions of isolated Triticum aestivum chromosome arm 7DS.,"The genome of bread wheat (Triticum aestivum) is predicted to be greater than 16 Gbp in size and consist predominantly of repetitive elements, making the sequencing and assembly of this genome a major challenge. We have reduced genome sequence complexity by isolating chromosome arm 7DS and applied second-generation technology and appropriate algorithmic analysis to sequence and assemble low copy and genic regions of this chromosome arm. The assembly represents approximately 40% of the chromosome arm and all known 7DS genes. Comparison of the 7DS assembly with the sequenced genomes of rice (Oryza sativa) and Brachypodium distachyon identified large regions of conservation. The syntenic relationship between wheat, B. distachyon and O. sativa, along with available genetic mapping data, has been used to produce an annotated draft 7DS syntenic build, which is publicly available at http://www.wheatgenome.info. Our results suggest that the sequencing of isolated chromosome arms can provide valuable information of the gene content of wheat and is a step towards whole-genome sequencing and variation discovery in this important crop.",2011-02-28 +22760212,Detection of rare genomic variants from pooled sequencing using SPLINTER. ,"As DNA sequencing technology has markedly advanced in recent years(2), it has become increasingly evident that the amount of genetic variation between any two individuals is greater than previously thought(3). In contrast, array-based genotyping has failed to identify a significant contribution of common sequence variants to the phenotypic variability of common disease(4,5). Taken together, these observations have led to the evolution of the Common Disease / Rare Variant hypothesis suggesting that the majority of the ""missing heritability"" in common and complex phenotypes is instead due to an individual's personal profile of rare or private DNA variants(6-8). However, characterizing how rare variation impacts complex phenotypes requires the analysis of many affected individuals at many genomic loci, and is ideally compared to a similar survey in an unaffected cohort. Despite the sequencing power offered by today's platforms, a population-based survey of many genomic loci and the subsequent computational analysis required remains prohibitive for many investigators. To address this need, we have developed a pooled sequencing approach(1,9) and a novel software package(1) for highly accurate rare variant detection from the resulting data. The ability to pool genomes from entire populations of affected individuals and survey the degree of genetic variation at multiple targeted regions in a single sequencing library provides excellent cost and time savings to traditional single-sample sequencing methodology. With a mean sequencing coverage per allele of 25-fold, our custom algorithm, SPLINTER, uses an internal variant calling control strategy to call insertions, deletions and substitutions up to four base pairs in length with high sensitivity and specificity from pools of up to 1 mutant allele in 500 individuals. Here we describe the method for preparing the pooled sequencing library followed by step-by-step instructions on how to use the SPLINTER package for pooled sequencing analysis (http://www.ibridgenetwork.org/wustl/splinter). We show a comparison between pooled sequencing of 947 individuals, all of whom also underwent genome-wide array, at over 20kb of sequencing per person. Concordance between genotyping of tagged and novel variants called in the pooled sample were excellent. This method can be easily scaled up to any number of genomic loci and any number of individuals. By incorporating the internal positive and negative amplicon controls at ratios that mimic the population under study, the algorithm can be calibrated for optimal performance. This strategy can also be modified for use with hybridization capture or individual-specific barcodes and can be applied to the sequencing of naturally heterogeneous samples, such as tumor DNA.",2012-06-23 +22721760,MicroRNA expression profiles of trophoblastic cells.,"

Background

MicroRNAs (miRNAs) are small single-stranded RNA molecules working as post-transcriptional modulators of gene expression. Trophoblast cells are a heterogenous group of fetal cells forming the feto-maternal interface and displaying a wide spectrum of functions. The regulation of their behavior may partly underly the control through miRNAs. Therefore, we aimed to compare the miRNA profile of primary first and third trimester trophoblast cells with that of different trophoblastic cell lines.

Material and methods

Total RNA was obtained from isolated cytotrophoblast cells from healthy term and first trimester placentae and the cell lines HTR-8/SVneo (immortalized trophoblast cells), JEG-3 (choriocarcinoma), ACH-3P and AC1-M59, which are choriocarcinoma cells fused with first and third trimester trophoblast cells, respectively. The expression level of 762 different miRNAs was quantitatively analyzed by using a TaqMan Human MicroRNA Array. For testing the reproducibility of the array technique, the expression of 9 selected miRNAs has been re-analyzed by individual qPCR.

Results

The analyzed cell types share many similar patterns of miRNAs, but are significantly distinct in the expression of three miRNA clusters: chromosome 19 miRNA cluster (C19MC; containing 54 different miRNAs), C14MC (34 miRNAs) and a minor cluster (miRNA-371 to miRNA-373 cluster), also located on chromosome 19. Expression of miRNAs within C19MC increases significantly from first to third trimester trophoblast while that of C14MC members decreases. MiRNAs within the miR-371-3 cluster augment slightly. C19MC and the miR-371-3 cluster are not expressed by HTR-8/SVneo cells whilst C14MC is almost not detectable in the choriocarcinoma-derived cell lines complete array data available at NCBI Gene Expression Omnibus accession number GSE32346: http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE32346). Beside the miRNAs within the mentioned clusters, further 27 miRNAs are differentially expressed (>100 fold) between term and first trimester trophoblast cells. The placenta-specific miRNAs miR-141 and miR-21 as well as let-7g are expressed in all tested cells with the highest expression in primary trophoblast cells.

Conclusion

Primary first trimester and term trophoblast cells and trophoblastic cell lines display major differences in their miRNA fingerprints which may be involved in their different behavior and characteristics.",2012-06-19 +21800425,Dietary phytoestrogen supplementation induces sex differences in the myocardial protein pattern of mice: a comparative proteomics study.,"Elevated cardiovascular risk in postmenopausal women and beneficial actions of estrogen replacement in animal models have been related to protective effects of estrogens. However, randomized trials of hormone replacement therapy with synthetic estrogens in humans failed confirmation and phytoestrogens, natural plant hormones with agonistic properties for estrogen receptors, could represent potential alternatives. The aim of the present study is to characterize an animal model for alternative hormone replacement with genistein as a natural estrogenic compound. We performed a 2-DE/ESI-LC-MS approach in order to identify protein species varying with genistein receipt and sex in their relative abundance in the healthy murine heart (http://www.mpiib-berlin.mpg.de/2D-PAGE). Oral genistein treatment revealed a substantial effect on the relative abundance of both estrogen receptors. Several enzymes of the fatty acid metabolism and their transcriptional regulators varied differentially in male and in female animals, at the transcript and/or the protein species level. Increased levels of enzyme species involved in the oxidative phosphorylation and generation of ROS were accompanied by decreased amounts of antioxidants in male mice receiving genistein compared with control males, which have been previously associated with various pathological conditions. Exposure of female animals to genistein provoked an increased abundance of two species of LIM domain-binding protein and one species of desmin. These proteins have been associated with cardiac hypertrophy and our data warrant caution for the use of them as molecular markers, since the animals did not exhibit any histological signs of cardiac hypertrophy.",2011-08-30 +22420020,"Guidance on the use of handheld survey meters for radiological triage: time-dependent detector count rates corresponding to 50, 250, and 500 mSV effective dose for adult males and adult females.","In June 2006, the Radiation Studies Branch of the Centers for Disease Control and Prevention held a workshop to explore rapid methods of facilitating radiological triage of large numbers of potentially contaminated individuals following detonation of a radiological dispersal device. Two options were discussed. The first was the use of traditional gamma cameras in nuclear medicine departments operated as makeshift wholebody counters. Guidance on this approach is currently available from the CDC. This approach would be feasible if a manageable number of individuals were involved, transportation to the relevant hospitals was quickly provided, and the medical staff at each facility had been previously trained in this non-traditional use of their radiopharmaceutical imaging devices. If, however,substantially larger numbers of individuals (100’s to 1,000’s) needed radiological screening, other options must be given to first responders, first receivers, and health physicists providing medical management. In this study, the second option of the workshop was investigated--the use of commercially available portable survey meters (either NaI or GM based) for assessing potential ranges of effective dose (< 50, 50-250, 250-500,and >500 mSv). Two hybrid computational phantoms were used to model an adult male and an adult female subject internally contaminated with 241Am, 60Cs, 137Cs, 131I, or 192Ir following an acute inhalation or ingestion intake. As a function of time following the exposure, the net count rates corresponding to committed effective doses of 50, 250, and 500 mSv were estimated via Monte Carlo radiation transport simulation for each of four different detector types, positions, and screening distances.Measured net count rates can be compared to these values, and an assignment of one of four possible effective dose ranges could be made. The method implicitly assumes that all external contamination has been removed prior to screening and that the measurements be conducted in a low background, and possibly mobile, facility positioned at the triage location. Net count rate data are provided in both tabular and graphical format within a series of eight handbooks available at the CDC website (http://www.bt.cdc.gov/radiation/clinicians/evaluation).",2012-03-01 +21685065,Environment specific substitution tables improve membrane protein alignment.,"

Motivation

Membrane proteins are both abundant and important in cells, but the small number of solved structures restricts our understanding of them. Here we consider whether membrane proteins undergo different substitutions from their soluble counterparts and whether these can be used to improve membrane protein alignments, and therefore improve prediction of their structure.

Results

We construct substitution tables for different environments within membrane proteins. As data is scarce, we develop a general metric to assess the quality of these asymmetric tables. Membrane proteins show markedly different substitution preferences from soluble proteins. For example, substitution preferences in lipid tail-contacting parts of membrane proteins are found to be distinct from all environments in soluble proteins, including buried residues. A principal component analysis of the tables identifies the greatest variation in substitution preferences to be due to changes in hydrophobicity; the second largest variation relates to secondary structure. We demonstrate the use of our tables in pairwise sequence-to-structure alignments (also known as 'threading') of membrane proteins using the FUGUE alignment program. On average, in the 10-25% sequence identity range, alignments are improved by 28 correctly aligned residues compared with alignments made using FUGUE's default substitution tables. Our alignments also lead to improved structural models.

Availability

Substitution tables are available at: http://www.stats.ox.ac.uk/proteins/resources.",2011-07-01 +21873639,SCLpred: protein subcellular localization prediction by N-to-1 neural networks.,"

Summary

Knowledge of the subcellular location of a protein provides valuable information about its function and possible interaction with other proteins. In the post-genomic era, fast and accurate predictors of subcellular location are required if this abundance of sequence data is to be fully exploited. We have developed a subcellular localization predictor (SCLpred), which predicts the location of a protein into four classes for animals and fungi and five classes for plants (secreted, cytoplasm, nucleus, mitochondrion and chloroplast) using machine learning models trained on large non-redundant sets of protein sequences. The algorithm powering SCLpred is a novel Neural Network (N-to-1 Neural Network, or N1-NN) we have developed, which is capable of mapping whole sequences into single properties (a functional class, in this work) without resorting to predefined transformations, but rather by adaptively compressing the sequence into a hidden feature vector. We benchmark SCLpred against other publicly available predictors using two benchmarks including a new subset of Swiss-Prot Release 2010_06. We show that SCLpred surpasses the state of the art. The N1-NN algorithm is fully general and may be applied to a host of problems of similar shape, that is, in which a whole sequence needs to be mapped into a fixed-size array of properties, and the adaptive compression it operates may shed light on the space of protein sequences.

Availability

The predictive systems described in this article are publicly available as a web server at http://distill.ucd.ie/distill/.

Contact

gianluca.pollastri@ucd.ie.",2011-08-27 +21718396,Correlation of rRNA gene amplicon pyrosequencing and bacterial culture for microbial compositional analysis of faecal samples from elderly Irish subjects.,"

Aims

The aim of this investigation was to establish the degree of correlation between measurements from culture-dependent microbiological techniques and from next generation sequencing technologies.

Methods and results

Data generated by both techniques were collected from faecal samples from 185 elderly Irish people involved in the ongoing ELDERMET study (http://eldermet.ucc.ie). The results for three groups of intestinal bacteria were compared. Bifidobacterium sp., Lactobacillus sp. and Enterobacteriaceae were enumerated on selective media through culture-dependent techniques, whereas proportions of these bacteria were determined through sequencing technology against the background of other bacteria. The Spearman's rank correlation coefficient determined a good correlation between results from culture-dependent microbiology and culture-independent techniques for all three bacterial groups assessed (correlation coefficients for Bifidobacterium sp., Lactobacillus sp. and Enterobacteriaceae were 0·380, 0·366 and 0·437, respectively).

Conclusion

Correlation between the two methods implies that a single method is capable of profiling intestinal Bifidobacterium, Lactobacillus and Enterobacteriaceae populations. However, both methods have advantages that justify their use in tandem.

Significance and impact of the study

This is the first extensive study to compare bacterial counts from culture-dependent microbiological techniques and from next generation sequencing technologies.",2011-07-01 +21883851,Trends in urological stone disease.,"

Objective

To summarize the changes in prevalence and treatment of upper urinary tract stone disease in the UK over the last 10 years.

Methods

Data from the Hospital Episode Statistics (HES) website (http://www.hesonline.nhs.uk) were extracted, summarized and presented.

Results

The number of upper urinary tract stone hospital episodes increased by 63% to 83,050 in the 10-year period. The use of shock wave lithotripsy (SWL) for treating all upper tract stones increased from 14,491 cases in 2000-2001 to 22,402 cases in 2010 (a 55% increase) with a 69% increase in lithotripsy for renal stones. There was a 127% increase in the number of ureteroscopic stone treatments from 6,283 to 14,242 cases over the 10-year period with a 49% increase from 2007/2008 to 2009/2010. There was a decline in open surgery for upper tract stones from 278 cases in 2000/2001 to 47 cases in 2009/2010 (an 83% reduction). Treatment for stone disease has increased substantially in comparison with other urological activity. In 2009/2010, SWL was performed almost as frequently as transurethral resection of the prostate or transurethral resection of bladder tumour, ureteroscopy for stones was performed more frequently than nephrectomy, radical prostatectomy and cystectomy combined, and percutaneous nephrolithotomy was performed more frequently than cystectomy.

Conclusions

The present study highlights the increase in prevalence and treatment of stone disease in the UK over the last 10 years. If this trend continues it has important implications for workforce planning, training, service delivery and research in the field of urolithiasis.",2011-08-26 +22134096,Epilepsy in the Twitter era: a need to re-tweet the way we think about seizures.,"Seizures have long been associated with misconceptions and stigma. Exponential growth in Internet use has seen the rapid expansion of social media, such as Twitter, for health promotion. In view of the popularity of Twitter, we sought to explore how seizures are being portrayed on this social networking website and to consider its potential for information dissemination. A 48-hour Twitter search was used as a preliminary data set to determine an appropriate classification scheme of ""seizure""-related posts (""tweets""). Analysis was then conducted using ""seizure"" tweets from a 7-consecutive day sample period. Tweets were analyzed and coded by two independent reviewers. Predominant categories were Metaphorical (32%), Personal Accounts (31%), Informative (12%), and Ridicule/Joke (9%). This study supports the notion that stigmatization associated with seizures continues to flourish, as 41% of ""seizure"" tweets were derogatory in nature. Although Twitter could be used to disseminate accurate information on seizures and epilepsy, this study suggests that it is currently propagating negative attitudes toward seizures with potential for fueling stigma. In recent years there have been significant advancements in technology offering many new methods of sharing information. Social networking sites allow real-time communication while providing the opportunity for exchange of information and opinions. Twitter, a website launched in 2006, allows users to communicate through ""tweets"" limited to 140 characters. Twitter's popularity has drastically increased since its inception, with approximately 110 million tweets per day from 200 million users worldwide, as of January 2011 (http://blogs.forbes.com/oliverchiang/2011/01/19/twitter-hits-nearly-200m-users-110m-tweets-per-day-focuses-on-global-expansion/). Such social media facilitate communication about an array of health-related topics including seizures and epilepsy.",2011-11-30 +21278116,"Predicting breed composition using breed frequencies of 50,000 markers from the US Meat Animal Research Center 2,000 Bull Project.","Knowledge of breed composition can be useful in multiple aspects of cattle production, and can be critical for analyzing the results of whole genome-wide association studies currently being conducted around the world. We examine the feasibility and accuracy of using genotype data from the most prevalent bovine genome-wide association studies platform, the Illumina BovineSNP50 array (Illumina Inc., San Diego, CA), to estimate breed composition for individual breeds of cattle. First, allele frequencies (of Illumina-defined allele B) of SNP on the array for each of 16 beef cattle breeds were defined by genotyping a large set of more than 2,000 bulls selected in cooperation with the respective breed associations to be representative of their breed. With these breed-specific allele frequencies, the breed compositions of approximately 2,000 two-, three-, and four-way cross (of 8 breeds) cattle produced at the US Meat Animal Research Center were predicted by using a simple multiple regression technique or Mendel (http://www.genetics.ucla.edu/software/mendel) and their genotypes from the Illumina BovineSNP50 array, and were then compared with pedigree-based estimates of breed composition. The accuracy of marker-based breed composition estimates was 89% when using either estimation method for all breeds except Angus and Red Angus (averaged 79%), based on comparing estimates with pedigree-based average breed composition. Accuracy increased to approximately 88% when these 2 breeds were combined into an aggregate Angus group. Additionally, we used a subset of these markers, approximately 3,000 that populate the Illumina Bovine3K (Illumina Inc.), to see whether breed composition could be estimated with similar accuracy when using this reduced panel of SNP makers. When breed composition was estimated using only SNP in common with the Bovine 3K array, accuracy was slightly reduced to 83%. These results suggest that SNP data from these arrays could be used to estimate breed composition in most US beef cattle in situations where pedigree is not known (e.g., multiple-sire natural service matings, non-source-verified animals in feedlots or at slaughter). This approach can aid analyses that depend on knowledge of breed composition, including identification and adjustment of breed-based population stratification, when performing genome-wide association studies on populations with incomplete pedigrees. In addition, SNP-based breed composition estimates may facilitate fitting cow germplasm to the environment, managing cattle in the feedlot, and tracing disease cases back to the geographic region or farm of origin.",2011-01-28 +21729705,Correlation between the flexibility and periodic dinucleotide patterns in yeast nucleosomal DNA sequences.,"Nucleosome formation and positioning, which play important roles in a number of biological processes, are thought to be related to the distinctive periodic dinucleotide patterns observed in the DNA sequence wrapped around the protein octamer. Previous research shows that flexibility is a key structural property of a nucleosomal DNA sequence. However, the relationship between the flexibility and the periodic dinucleotide patterns has received little attention in research in the past. In this study, we propose the use of three different models to measure the flexibility of yeast DNA sequences. Although the three models involve different parameters, they deliver consistent results showing that yeast nucleosomal DNA sequences are more flexible than non-nucleosomal ones. In contrast to random flexibility values along non-nucleosomal DNA sequences, the flexibility of nucleosomal DNA sequences shows a clear periodicity of 10.14 base pairs, which is consistent with the periodicity of dinucleotide distributions. We also demonstrate that there is a strong relationship between the peak positions of the flexibility and the dinucleotide frequencies. Correlation between the flexibility and the dinucleotide patterns of CA/TG, CG, GC, GG/CC, AG/CT, AC/GT and GA/TC are positive with an average value of 0.5946. The highest correlation is shown by CA/TG with a value of 0.7438 and the lowest correlation is shown by AA/TT with a value of -0.7424. The source codes and data sets are available for downloading on http://www.hy8.com/bioinformatics.htm.",2011-06-25 +21700677,Enriching targeted sequencing experiments for rare disease alleles.,"

Motivation

Next-generation targeted resequencing of genome-wide association study (GWAS)-associated genomic regions is a common approach for follow-up of indirect association of common alleles. However, it is prohibitively expensive to sequence all the samples from a well-powered GWAS study with sufficient depth of coverage to accurately call rare genotypes. As a result, many studies may use next-generation sequencing for single nucleotide polymorphism (SNP) discovery in a smaller number of samples, with the intent to genotype candidate SNPs with rare alleles captured by resequencing. This approach is reasonable, but may be inefficient for rare alleles if samples are not carefully selected for the resequencing experiment.

Results

We have developed a probability-based approach, SampleSeq, to select samples for a targeted resequencing experiment that increases the yield of rare disease alleles substantially over random sampling of cases or controls or sampling based on genotypes at associated SNPs from GWAS data. This technique allows for smaller sample sizes for resequencing experiments, or allows the capture of rarer risk alleles. When following up multiple regions, SampleSeq selects subjects with an even representation of all the regions. SampleSeq also can be used to calculate the sample size needed for the resequencing to increase the chance of successful capture of rare alleles of desired frequencies.

Software

http://biostat.mc.vanderbilt.edu/SampleSeq",2011-06-23 +21700672,Gaia: automated quality assessment of protein structure models.,"

Motivation

Increasing use of structural modeling for understanding structure-function relationships in proteins has led to the need to ensure that the protein models being used are of acceptable quality. Quality of a given protein structure can be assessed by comparing various intrinsic structural properties of the protein to those observed in high-resolution protein structures.

Results

In this study, we present tools to compare a given structure to high-resolution crystal structures. We assess packing by calculating the total void volume, the percentage of unsatisfied hydrogen bonds, the number of steric clashes and the scaling of the accessible surface area. We assess covalent geometry by determining bond lengths, angles, dihedrals and rotamers. The statistical parameters for the above measures, obtained from high-resolution crystal structures enable us to provide a quality-score that points to specific areas where a given protein structural model needs improvement.

Availability and implementation

We provide these tools that appraise protein structures in the form of a web server Gaia (http://chiron.dokhlab.org). Gaia evaluates the packing and covalent geometry of a given protein structure and provides quantitative comparison of the given structure to high-resolution crystal structures.

Contact

dokh@unc.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-23 +21696592,Structural influence of gene networks on their inference: analysis of C3NET.,"

Background

The availability of large-scale high-throughput data possesses considerable challenges toward their functional analysis. For this reason gene network inference methods gained considerable interest. However, our current knowledge, especially about the influence of the structure of a gene network on its inference, is limited.

Results

In this paper we present a comprehensive investigation of the structural influence of gene networks on the inferential characteristics of C3NET - a recently introduced gene network inference algorithm. We employ local as well as global performance metrics in combination with an ensemble approach. The results from our numerical study for various biological and synthetic network structures and simulation conditions, also comparing C3NET with other inference algorithms, lead a multitude of theoretical and practical insights into the working behavior of C3NET. In addition, in order to facilitate the practical usage of C3NET we provide an user-friendly R package, called c3net, and describe its functionality. It is available from https://r-forge.r-project.org/projects/c3net and from the CRAN package repository.

Conclusions

The availability of gene network inference algorithms with known inferential properties opens a new era of large-scale screening experiments that could be equally beneficial for basic biological and biomedical research with auspicious prospects. The availability of our easy to use software package c3net may contribute to the popularization of such methods.",2011-06-22 +21695537,SOMPNN: an efficient non-parametric model for predicting transmembrane helices.,"Accurately predicting the transmembrane helices (TMH) in a helical membrane protein is an important but challenging task. Recent researches have demonstrated that statistics-based methods are promising routes to improve the TMH prediction accuracy. However, most of existing TMH predictors are parametric models and they have to make assumptions of several or even hundreds of adjustable parameters based on the underlying probability distribution, which is difficult when no a priori knowledge is available. Besides the performances of these parametric predictors significantly depend on the estimated parameters, some of them need to exploit the entire training dataset in the prediction stage, which will lead to low prediction efficiency and this problem will become even worse when dealing with large-scale dataset. In this paper, we propose a novel SOMPNN model for prediction of TMH that features by minimal parameter assumptions requirement and high computational efficiency. In the SOMPNN model, a self-organizing map (SOM) is used to adaptively learn the helices distribution knowledge hidden in the training data, and then a probabilistic neural network (PNN) is adopted to predict TMH segments based on the knowledge learned by SOM. Experimental results on two benchmark datasets show that the proposed SOMPNN outperforms most existing popular TMH predictors and is promising to be extended to deal with other complicated biological problems. The datasets and the source codes of SOMPNN are available at http://www.csbio.sjtu.edu.cn/bioinf/SOMPNN/.",2011-06-22 +22676423,Nanomaterial toxicity testing in the 21st century: use of a predictive toxicological approach and high-throughput screening.,"The production of engineered nanomaterials (ENMs) is a scientific breakthrough in material design and the development of new consumer products. While the successful implementation of nanotechnology is important for the growth of the global economy, we also need to consider the possible environmental health and safety (EHS) impact as a result of the novel physicochemical properties that could generate hazardous biological outcomes. In order to assess ENM hazard, reliable and reproducible screening approaches are needed to test the basic materials as well as nanoenabled products. A platform is required to investigate the potentially endless number of biophysicochemical interactions at the nano/bio interface, in response to which we have developed a predictive toxicological approach. We define a predictive toxicological approach as the use of mechanisms-based high-throughput screening in vitro to make predictions about the physicochemical properties of ENMs that may lead to the generation of pathology or disease outcomes in vivo. The in vivo results are used to validate and improve the in vitro high-throughput screening (HTS) and to establish structure-activity relationships (SARs) that allow hazard ranking and modeling by an appropriate combination of in vitro and in vivo testing. This notion is in agreement with the landmark 2007 report from the US National Academy of Sciences, ""Toxicity Testing in the 21st Century: A Vision and a Strategy"" (http://www.nap.edu/catalog.php?record_id=11970), which advocates increased efficiency of toxicity testing by transitioning from qualitative, descriptive animal testing to quantitative, mechanistic, and pathway-based toxicity testing in human cells or cell lines using high-throughput approaches. Accordingly, we have implemented HTS approaches to screen compositional and combinatorial ENM libraries to develop hazard ranking and structure-activity relationships that can be used for predicting in vivo injury outcomes. This predictive approach allows the bulk of the screening analysis and high-volume data generation to be carried out in vitro, following which limited, but critical, validation studies are carried out in animals or whole organisms. Risk reduction in the exposed human or environmental populations can then focus on limiting or avoiding exposures that trigger these toxicological responses as well as implementing safer design of potentially hazardous ENMs. In this Account, we review the tools required for establishing predictive toxicology paradigms to assess inhalation and environmental toxicological scenarios through the use of compositional and combinatorial ENM libraries, mechanism-based HTS assays, hazard ranking, and development of nano-SARs. We will discuss the major injury paradigms that have emerged based on specific ENM properties, as well as describing the safer design of ZnO nanoparticles based on characterization of dissolution chemistry as a major predictor of toxicity.",2012-06-07 +21976109,Association of obesity-related genetic variants with endometrial cancer risk: a report from the Shanghai Endometrial Cancer Genetics Study.,"Obesity is a well-established risk factor for endometrial cancer, the most common gynecologic malignancy. Recent genome-wide association studies (GWAS) have identified multiple genetic markers for obesity. The authors evaluated the association of obesity-related single nucleotide polymorphisms (SNPs) with endometrial cancer using GWAS data from their recently completed study, the Shanghai Endometrial Cancer Genetics Study, which comprised 832 endometrial cancer cases and 2,049 controls (1996-2005). Thirty-five SNPs previously associated with obesity or body mass index (BMI; weight (kg)/height (m)(2)) at a minimum significance level of ≤5 × 10(-7) in the US National Human Genome Research Institute's GWAS catalog (http://genome.gov/gwastudies) and representing 26 unique loci were evaluated by either direct genotyping or imputation. The authors found that for 22 of the 26 unique loci tested (84.6%), the BMI-associated risk variants were present at a higher frequency in cases than in population controls (P = 0.0003). Multiple regression analysis showed that 9 of 35 BMI-associated variants, representing 7 loci, were significantly associated (P ≤ 0.05) with the risk of endometrial cancer; for all but 1 SNP, the direction of association was consistent with that found for BMI. For consistent SNPs, the allelic odds ratios ranged from 1.15 to 1.29. These 7 loci are in the SEC16B/RASAL, TMEM18, MSRA, SOX6, MTCH2, FTO, and MC4R genes. The associations persisted after adjustment for BMI, suggesting that genetic markers of obesity provide value in addition to BMI in predicting endometrial cancer risk.",2011-10-05 +21388232,A simple protocol for the comparative analysis of the structure and occurrence of biochemical pathways across superkingdoms.,"A biochemical pathway can be viewed as a series of chemical reactions occurring within a cell, each of which is carried out by one or more biological macromolecules (protein, RNA, or complexes thereof). Computational methods can be applied to assess whether one organism is able to perform a biochemical process of interest by checking whether its genome encodes all the components that are known to be necessary for the task. Here we present a simple strategy for collecting the above data that is based on, but not limited to, our experience on processes involving metal ions and metal-binding cofactors. The strategy is fully implemented in a bioinformatics package, Retrieval of Domains and Genome Browsing (RDGB), which is available from http://www.cerm.unifi.it/home/research/genomebrowsing.html . The use of RDGB allows users to perform all the operations that are needed to implement the aforementioned strategy with minimal intervention and to gather all results in an ordered manner, with a tabular summary. This minimizes the (bio)informatics needed, thus facilitating its use by nonexperts. As examples, we analyzed the pathways for the degradation of organic compounds containing one or two aromatic rings as well as the distribution of some proteins involved in Cu(A) assembly in more than a thousand prokaryotes.",2011-03-10 +22174280,SEPP: SATé-enabled phylogenetic placement.,"We address the problem of Phylogenetic Placement, in which the objective is to insert short molecular sequences (called query sequences) into an existing phylogenetic tree and alignment on full-length sequences for the same gene. Phylogenetic placement has the potential to provide information beyond pure ""species identification"" (i.e., the association of metagenomic reads to existing species), because it can also give information about the evolutionary relationships between these query sequences and to known species. Approaches for phylogenetic placement have been developed that operate in two steps: first, an alignment is estimated for each query sequence to the alignment of the full-length sequences, and then that alignment is used to find the optimal location in the phylogenetic tree for the query sequence. Recent methods of this type include HMMALIGN+EPA, HMMALIGN+pplacer, and PaPaRa+EPA.We report on a study evaluating phylogenetic placement methods on biological and simulated data. This study shows that these methods have extremely good accuracy and computational tractability under conditions where the input contains a highly accurate alignment and tree for the full-length sequences, and the set of full-length sequences is sufficiently small and not too evolutionarily diverse; however, we also show that under other conditions accuracy declines and the computational requirements for memory and time exceed acceptable limits. We present SEPP, a general ""boosting"" technique to improve the accuracy and/or speed of phylogenetic placement techniques. The key algorithmic aspect of this booster is a dataset decomposition technique in SATé, a method that utilizes an iterative divide-and-conquer technique to co-estimate alignments and trees on large molecular sequence datasets. We show that SATé-boosting improves HMMALIGN+pplacer, placing short sequences more accurately when the set of input sequences has a large evolutionary diameter and produces placements of comparable accuracy in a fraction of the time for easier cases. SEPP software and the datasets used in this study are all available for free at http://www.cs.utexas.edu/users/phylo/software/sepp/submission.",2012-01-01 +22328784,WaVPeak: picking NMR peaks through wavelet-based smoothing and volume-based filtering.,"

Motivation

Nuclear magnetic resonance (NMR) has been widely used as a powerful tool to determine the 3D structures of proteins in vivo. However, the post-spectra processing stage of NMR structure determination usually involves a tremendous amount of time and expert knowledge, which includes peak picking, chemical shift assignment and structure calculation steps. Detecting accurate peaks from the NMR spectra is a prerequisite for all following steps, and thus remains a key problem in automatic NMR structure determination.

Results

We introduce WaVPeak, a fully automatic peak detection method. WaVPeak first smoothes the given NMR spectrum by wavelets. The peaks are then identified as the local maxima. The false positive peaks are filtered out efficiently by considering the volume of the peaks. WaVPeak has two major advantages over the state-of-the-art peak-picking methods. First, through wavelet-based smoothing, WaVPeak does not eliminate any data point in the spectra. Therefore, WaVPeak is able to detect weak peaks that are embedded in the noise level. NMR spectroscopists need the most help isolating these weak peaks. Second, WaVPeak estimates the volume of the peaks to filter the false positives. This is more reliable than intensity-based filters that are widely used in existing methods. We evaluate the performance of WaVPeak on the benchmark set proposed by PICKY (Alipanahi et al., 2009), one of the most accurate methods in the literature. The dataset comprises 32 2D and 3D spectra from eight different proteins. Experimental results demonstrate that WaVPeak achieves an average of 96%, 91%, 88%, 76% and 85% recall on (15)N-HSQC, HNCO, HNCA, HNCACB and CBCA(CO)NH, respectively. When the same number of peaks are considered, WaVPeak significantly outperforms PICKY.

Availability

WaVPeak is an open source program. The source code and two test spectra of WaVPeak are available at http://faculty.kaust.edu.sa/sites/xingao/Pages/Publications.aspx. The online server is under construction.

Contact

statliuzhi@xmu.edu.cn; ahmed.abbas@kaust.edu.sa; majing@ust.hk; xin.gao@kaust.edu.sa.",2012-02-10 +22347389,Identification of amino acid propensities that are strong determinants of linear B-cell epitope using neural networks.,"

Background

Identification of amino acid propensities that are strong determinants of linear B-cell epitope is very important to enrich our knowledge about epitopes. This can also help to obtain better epitope prediction. Typical linear B-cell epitope prediction methods combine various propensities in different ways to improve prediction accuracies. However, fewer but better features may yield better prediction. Moreover, for a propensity, when the sequence length is k, there will be k values, which should be treated as a single unit for feature selection and hence usual feature selection method will not work. Here we use a novel Group Feature Selecting Multilayered Perceptron, GFSMLP, which treats a group of related information as a single entity and selects useful propensities related to linear B-cell epitopes, and uses them to predict epitopes.

Methodology/ principal findings

We use eight widely known propensities and four data sets. We use GFSMLP to rank propensities by the frequency with which they are selected. We find that Chou's beta-turn and Ponnuswamy's polarity are better features for prediction of linear B-cell epitope. We examine the individual and combined discriminating power of the selected propensities and analyze the correlation between paired propensities. Our results show that the selected propensities are indeed good features, which also cooperate with other propensities to enhance the discriminating power for predicting epitopes. We find that individually polarity is not the best predictor, but it collaborates with others to yield good prediction. Usual feature selection methods cannot provide such information.

Conclusions/ significance

Our results confirm the effectiveness of active (group) feature selection by GFSMLP over the traditional passive approaches of evaluating various combinations of propensities. The GFSMLP-based feature selection can be extended to more than 500 remaining propensities to enhance our biological knowledge about epitopes and to obtain better prediction. A graphical-user-interface version of GFSMLP is available at: http://bio.classcloud.org/GFSMLP/.",2012-02-08 +21653519,MixupMapper: correcting sample mix-ups in genome-wide datasets increases power to detect small genetic effects.,"

Motivation

Sample mix-ups can arise during sample collection, handling, genotyping or data management. It is unclear how often sample mix-ups occur in genome-wide studies, as there currently are no post hoc methods that can identify these mix-ups in unrelated samples. We have therefore developed an algorithm (MixupMapper) that can both detect and correct sample mix-ups in genome-wide studies that study gene expression levels.

Results

We applied MixupMapper to five publicly available human genetical genomics datasets. On average, 3% of all analyzed samples had been assigned incorrect expression phenotypes: in one of the datasets 23% of the samples had incorrect expression phenotypes. The consequences of sample mix-ups are substantial: when we corrected these sample mix-ups, we identified on average 15% more significant cis-expression quantitative trait loci (cis-eQTLs). In one dataset, we identified three times as many significant cis-eQTLs after correction. Furthermore, we show through simulations that sample mix-ups can lead to an underestimation of the explained heritability of complex traits in genome-wide association datasets.

Availability and implementation

MixupMapper is freely available at http://www.genenetwork.nl/mixupmapper/",2011-06-07 +21807647,EliXR: an approach to eligibility criteria extraction and representation.,"

Objective

To develop a semantic representation for clinical research eligibility criteria to automate semistructured information extraction from eligibility criteria text.

Materials and methods

An analysis pipeline called eligibility criteria extraction and representation (EliXR) was developed that integrates syntactic parsing and tree pattern mining to discover common semantic patterns in 1000 eligibility criteria randomly selected from http://ClinicalTrials.gov. The semantic patterns were aggregated and enriched with unified medical language systems semantic knowledge to form a semantic representation for clinical research eligibility criteria.

Results

The authors arrived at 175 semantic patterns, which form 12 semantic role labels connected by their frequent semantic relations in a semantic network.

Evaluation

Three raters independently annotated all the sentence segments (N=396) for 79 test eligibility criteria using the 12 top-level semantic role labels. Eight-six per cent (339) of the sentence segments were unanimously labelled correctly and 13.8% (55) were correctly labelled by two raters. The Fleiss' κ was 0.88, indicating a nearly perfect interrater agreement.

Conclusion

This study present a semi-automated data-driven approach to developing a semantic network that aligns well with the top-level information structure in clinical research eligibility criteria text and demonstrates the feasibility of using the resulting semantic role labels to generate semistructured eligibility criteria with nearly perfect interrater reliability.",2011-07-31 +22192089,Myogenic progenitors contribute to open but not closed fracture repair.,"

Background

Bone repair is dependent on the presence of osteocompetent progenitors that are able to differentiate and generate new bone. Muscle is found in close association with orthopaedic injury, however its capacity to make a cellular contribution to bone repair remains ambiguous. We hypothesized that myogenic cells of the MyoD-lineage are able to contribute to bone repair.

Methods

We employed a MyoD-Cre+:Z/AP+ conditional reporter mouse in which all cells of the MyoD-lineage are permanently labeled with a human alkaline phosphatase (hAP) reporter. We tracked the contribution of MyoD-lineage cells in mouse models of tibial bone healing.

Results

In the absence of musculoskeletal trauma, MyoD-expressing cells are limited to skeletal muscle and the presence of reporter-positive cells in non-muscle tissues is negligible. In a closed tibial fracture model, there was no significant contribution of hAP+ cells to the healing callus. In contrast, open tibial fractures featuring periosteal stripping and muscle fenestration had up to 50% of hAP+ cells detected in the open fracture callus. At early stages of repair, many hAP+ cells exhibited a chondrocyte morphology, with lesser numbers of osteoblast-like hAP+ cells present at the later stages. Serial sections stained for hAP and type II and type I collagen showed that MyoD-lineage cells were surrounded by cartilaginous or bony matrix, suggestive of a functional role in the repair process. To exclude the prospect that osteoprogenitors spontaneously express MyoD during bone repair, we created a metaphyseal drill hole defect in the tibia. No hAP+ staining was observed in this model suggesting that the expression of MyoD is not a normal event for endogenous osteoprogenitors.

Conclusions

These data document for the first time that muscle cells can play a significant secondary role in bone repair and this knowledge may lead to important translational applications in orthopaedic surgery. Please see related article: http://www.biomedcentral.com/1741-7015/9/136.",2011-12-22 +22185298,Digital Subtraction Phonocardiography (DSP) applied to the detection and characterization of heart murmurs.,"

Background

During the cardiac cycle, the heart normally produces repeatable physiological sounds. However, under pathologic conditions, such as with heart valve stenosis or a ventricular septal defect, blood flow turbulence leads to the production of additional sounds, called murmurs. Murmurs are random in nature, while the underlying heart sounds are not (being deterministic).

Innovation

We show that a new analytical technique, which we call Digital Subtraction Phonocardiography (DSP), can be used to separate the random murmur component of the phonocardiogram from the underlying deterministic heart sounds.

Methods

We digitally recorded the phonocardiogram from the anterior chest wall in 60 infants and adults using a high-speed USB interface and the program Gold Wave http://www.goldwave.com. The recordings included individuals with cardiac structural disease as well as recordings from normal individuals and from individuals with innocent heart murmurs. Digital Subtraction Analysis of the signal was performed using a custom computer program called Murmurgram. In essence, this program subtracts the recorded sound from two adjacent cardiac cycles to produce a difference signal, herein called a ""murmurgram"". Other software used included Spectrogram (Version 16), GoldWave (Version 5.55) as well as custom MATLAB code.

Results

Our preliminary data is presented as a series of eight cases. These cases show how advanced signal processing techniques can be used to separate heart sounds from murmurs. Note that these results are preliminary in that normal ranges for obtained test results have not yet been established.

Conclusions

Cardiac murmurs can be separated from underlying deterministic heart sounds using DSP. DSP has the potential to become a reliable and economical new diagnostic approach to screening for structural heart disease. However, DSP must be further evaluated in a large series of patients with well-characterized pathology to determine its clinical potential.",2011-12-20 +21821014,"Red cell distribution width, C-reactive protein, the complete blood count, and mortality in patients with coronary disease and a normal comparison population.","

Background

Red cell distribution width (RDW) is associated with morbidity and mortality in coronary artery disease (CAD), but the connection of RDW with chronic inflammation is equivocal.

Methods

In 1,489 patients with CAD and 8.4-15.2 years of follow-up all-cause mortality and RDW were studied using Cox regression. RDW and its associations with inflammation, liver function, renal function, and body mass were assessed. A population of 449 normal (No-CAD) patients also was evaluated.

Results

RDW predicted all-cause mortality in a step-wise manner (HR=1.37 per quintile; 95% CI=1.29, 1.46; p-trend<0.001). A significant but meaningless correlation between RDW and high-sensitivity C-reactive protein (hsCRP) was identified (r=0.181; p<0.001). With full adjustment, RDW remained significant (p-trend<0.001) and the strongest predictor of mortality among all factors included in the model. RDW also strongly predicted all-cause mortality in the normal control population (HR=1.33 per quintile, CI=1.15, 1.55; p-trend<0.001), but hsCRP did not predict mortality among normal controls.

Conclusions

RDW was associated with mortality in patients with CAD and may provide clinically useful prognostication. Although RDW was correlated with hsCRP, they were independent predictors of mortality. RDW has been incorporated into risk prediction tool using data from basic chemistries available at: http://intermountainhealthcare.org/IMRS.",2011-07-27 +21798041,GO Trimming: Systematically reducing redundancy in large Gene Ontology datasets.,"

Background

The increased accessibility of gene expression tools has enabled a wide variety of experiments utilizing transcriptomic analyses. As these tools increase in prevalence, the need for improved standardization in processing and presentation of data increases, as does the need to guard against interpretation bias. Gene Ontology (GO) analysis is a powerful method of interpreting and summarizing biological functions. However, while there are many tools available to investigate GO enrichment, there remains a need for methods that directly remove redundant terms from enriched GO lists that often provide little, if any, additional information.

Findings

Here we present a simple yet novel method called GO Trimming that utilizes an algorithm designed to reduce redundancy in lists of enriched GO categories. Depending on the needs of the user, this method can be performed with variable stringency. In the example presented here, an initial list of 90 terms was reduced to 54, eliminating 36 largely redundant terms. We also compare this method to existing methods and find that GO Trimming, while simple, performs well to eliminate redundant terms in a large dataset throughout the depth of the GO hierarchy.

Conclusions

The GO Trimming method provides an alternative to other procedures, some of which involve removing large numbers of terms prior to enrichment analysis. This method should free up the researcher from analyzing overly large, redundant lists, and instead enable the concise presentation of manageable, informative GO lists. The implementation of this tool is freely available at: http://lucy.ceh.uvic.ca/go_trimming/cbr_go_trimming.py.",2011-07-28 +21950680,Pediatric allergy and immunology in Spain.,"The data of the ISAAC project in Spain show a prevalence of childhood asthma ranging from 7.1% to 15.3%, with regional differences; a higher prevalence, 22.6% to 35.8%, is described for rhinitis, and atopic dermatitis is found in 4.1% to 7.6% of children. The prevalence of food allergy is 3%. All children in Spain have the right to be visited in the National Health System. The medical care at the primary level is provided by pediatricians, who have obtained their titles through a 4-yr medical residency training program. The education on pediatric allergy during that period is not compulsory and thus very variable. There are currently 112 certified European pediatric allergists in Spain, who have obtained the accreditation of the European Union of Medical Specialist for proven skills and experience in pediatric allergy. Future specialists in pediatric allergy should obtain their titles through a specific education program to be developed in one of the four accredited training units on pediatric allergy, after obtaining the title on pediatrics. The Spanish Society of Pediatric Allergy and Clinical Immunology (SEICAP) gathers over 350 pediatric allergists and pediatricians working in this field. SEICAP has a growing activity including yearly congresses, continued education courses, elaboration of technical clinical documents and protocols, education of patients, and collaboration with other scientific societies and associations of patients. The official journal of SEICAP is Allergologia et Immunophatologia, published every 2 months since 1972. The web site of SEICAP, http://www.seicap.es, open since 2004, offers information for professionals and extensive information on pediatric allergic and immunologic disorders for the lay public; the web site is receiving 750 daily visits during 2011. The pediatric allergy units are very active in clinical work, procedures as immunotherapy or induction of oral tolerance in food allergy, contribution to scientific literature, and collaboration in international projects.",2011-11-01 +21622959,A novel bioinformatics pipeline for identification and characterization of fusion transcripts in breast cancer and normal cell lines.,"SnowShoes-FTD, developed for fusion transcript detection in paired-end mRNA-Seq data, employs multiple steps of false positive filtering to nominate fusion transcripts with near 100% confidence. Unique features include: (i) identification of multiple fusion isoforms from two gene partners; (ii) prediction of genomic rearrangements; (iii) identification of exon fusion boundaries; (iv) generation of a 5'-3' fusion spanning sequence for PCR validation; and (v) prediction of the protein sequences, including frame shift and amino acid insertions. We applied SnowShoes-FTD to identify 50 fusion candidates in 22 breast cancer and 9 non-transformed cell lines. Five additional fusion candidates with two isoforms were confirmed. In all, 30 of 55 fusion candidates had in-frame protein products. No fusion transcripts were detected in non-transformed cells. Consideration of the possible functions of a subset of predicted fusion proteins suggests several potentially important functions in transformation, including a possible new mechanism for overexpression of ERBB2 in a HER-positive cell line. The source code of SnowShoes-FTD is provided in two formats: one configured to run on the Sun Grid Engine for parallelization, and the other formatted to run on a single LINUX node. Executables in PERL are available for download from our web site: http://mayoresearch.mayo.edu/mayo/research/biostat/stand-alone-packages.cfm.",2011-05-27 +21233169,Clustering 16S rRNA for OTU prediction: a method of unsupervised Bayesian clustering.,"

Motivation

With the advancements of next-generation sequencing technology, it is now possible to study samples directly obtained from the environment. Particularly, 16S rRNA gene sequences have been frequently used to profile the diversity of organisms in a sample. However, such studies are still taxed to determine both the number of operational taxonomic units (OTUs) and their relative abundance in a sample.

Results

To address these challenges, we propose an unsupervised Bayesian clustering method termed Clustering 16S rRNA for OTU Prediction (CROP). CROP can find clusters based on the natural organization of data without setting a hard cut-off threshold (3%/5%) as required by hierarchical clustering methods. By applying our method to several datasets, we demonstrate that CROP is robust against sequencing errors and that it produces more accurate results than conventional hierarchical clustering methods.

Availability and implementation

Source code freely available at the following URL: http://code.google.com/p/crop-tingchenlab/, implemented in C++ and supported on Linux and MS Windows.",2011-01-13 +22578009,"The effects of ambient temperature, humidity and season of year on urine composition in patients with nephrolithiasis.","

Unlabelled

Study Type--Prognosis (cohort series) Level of Evidence 2b. What's known on the subject? and What does the study add? Epidemiologic studies have shown that warmer climates are associated with increased incidence of nephrolithiasis. Many hypothesize that this is due to dehydration and lower urine volumes. The current study of stone formers reports that greater temperatures are associated with significant increases in urine calcium which may shed light on the mechanism underlying the increased stone incidence associated with increased ambient temperature.

Objective

• To understand the effects of temperature, humidity and season of year on 24-h urine composition in patients with nephrolithiasis.

Patients and method

• A retrospective review was performed of patients evaluated at four metabolic stone clinics. • Multivariate linear regression models examined the relationship between mean temperature, average humidity, season of year and 24-h urine composition. • Multivariate models adjusted for known risk factors for stone disease. • Mean temperature and average humidity data were obtained from http://www.weatherunderground.com based on patient-provided addresses.

Results

• A total of 599 patients were included in the study, comprising 239 women and 360 men with a mean age of 53.6 years (sd 15.0). • Mean temperature was 16.9 °C (sd 4.8, range -21.1 to 38.3 °C) and average humidity was 58.1% (sd 23.5, range 11-100%). • On multivariate linear regression, increasing temperature was associated with increasing urine calcium (β = 11.3, 95% CI 2.2-20.0), super-saturation of calcium oxalate (β = 0.6, 95% CI 0.2-0.9), super-saturation of calcium phosphate (β = 0.14, 95% CI 0.03-0.2), and decreasing urine sodium (β = -5.2, 95% CI -10.3 to -0.1). • As seasons become warmer (i.e. from winter to autumn to spring to summer), changes were increased urine volume (β = 0.09, 95% CI 0.01-0.2) and decreased super-saturation of calcium phosphate (β = -0.2, 95% CI -0.3 to -0.03). • There were no associations between quintile of humidity and any 24-h urine constituents.

Conclusions

• Increasing temperature may increase stone risk by increasing urine excretion of calcium, and the super-saturation of calcium oxalate and calcium phosphate. • These findings were independent of humidity and of season of year. • This appears to be related to a physiological impact of temperature itself, rather than to geographic location.",2012-05-11 +21611721,Fish under exercise.,"Improved knowledge on the swimming physiology of fish and its application to fisheries science and aquaculture (i.e., farming a fitter fish) is currently needed in the face of global environmental changes, high fishing pressures, increased aquaculture production as well as increased concern on fish well-being. Here, we review existing data on teleost fish that indicate that sustained exercise at optimal speeds enhances muscle growth and has consequences for flesh quality. Potential added benefits of sustained exercise may be delay of ovarian development and stimulation of immune status. Exercise could represent a natural, noninvasive, and economical approach to improve growth, flesh quality as well as welfare of aquacultured fish: a FitFish for a healthy consumer. All these issues are important for setting directions for policy decisions and future studies in this area. For this purpose, the FitFish workshop on the Swimming Physiology of Fish ( http://www.ub.edu/fitfish2010 ) was organized to bring together a multidisciplinary group of scientists using exercise models, industrial partners, and policy makers. Sixteen international experts from Europe, North America, and Japan were invited to present their work and view on migration of fishes in their natural environment, beneficial effects of exercise, and applications for sustainable aquaculture. Eighty-eight participants from 19 different countries contributed through a poster session and round table discussion. Eight papers from invited speakers at the workshop have been contributed to this special issue on The Swimming Physiology of Fish.",2011-05-25 +22577869,Cytokine/chemokine patterns connect host and viral characteristics with clinics during chronic hepatitis C.,"

Background

In chronic hepatitis C virus (HCV) infection, liver tissue pathology and HCV genotype are important determinants of clinical and/or treatment-related outcome. Although consistent epidemiological and/or molecular-biological clues derived from different studies on single virus-host interactions are meanwhile published, the in vivo transcriptional responses and cellular pathways affected in >1 key aspects of the disease or treatment process are far from being understood.

Methods

Microarray analysis was performed in peripheral whole blood (PB) samples from 36 therapy-naïve HCV-infected patients with known liver histology. Linear regression analysis identified gene expression profiles significantly correlating (P < 0.015) with ≥1 out of 7 variables: sustained viral response (SVR), viral non-response (NR), end of treatment viral response (ETR), viral breakthrough (VB), HCV genotype (Gt. 1 vs. Gt. 2/3), stage of hepatic fibrosis [St. 0/1 vs. St. 2/3/4] and grade of hepatic inflammation (Gr. 0/1 vs. Gr. 2/3/4). Correlation values across all seven contrasts were considered for hierarchical clustering (HCL).

Results

A total of 1,697 genes showed ≥1 significant correlation results and genes involved in cell differentiation (183), immune response (53), and apoptosis (170) were leading fractions. HCL grouped the genes into six major clusters. Functional annotation analysis using DAVID (http://david.abcc.ncifcrf.gov) revealed that expression profiles that best linked these variables were highly enriched in cytokine/chemokine activity (Fisher-exact P < 0.0001) and specific biological module-centric algorithms finally led our focus on four out of fifty-three immune response genes: SMAD family member 3 (SMAD3), interleukin 1 receptor accessory protein (IL1RAP), tumor necrosis factor receptor superfamily member 1A (TNFRSF1A), and chemokine 'C-C motif' receptor 5 (CCR5). Of those, TNFRSF1A and CCR5 showed significant correlation with two out of seven variables based on microarray and/or quantitative real-time polymerase chain reaction (qRT-PCR) data.

Conclusion

We identified molecular targets of the innate and adaptive immune system and validated their transcriptional specificity in vivo suggesting significant involvement in two unique outcomes during HCV treatment.",2012-05-11 +21335320,Topology and prediction of RNA pseudoknots.,"

Motivation

Several dynamic programming algorithms for predicting RNA structures with pseudoknots have been proposed that differ dramatically from one another in the classes of structures considered.

Results

Here, we use the natural topological classification of RNA structures in terms of irreducible components that are embeddable in the surfaces of fixed genus. We add to the conventional secondary structures four building blocks of genus one in order to construct certain structures of arbitrarily high genus. A corresponding unambiguous multiple context-free grammar provides an efficient dynamic programming approach for energy minimization, partition function and stochastic sampling. It admits a topology-dependent parametrization of pseudoknot penalties that increases the sensitivity and positive predictive value of predicted base pairs by 10-20% compared with earlier approaches. More general models based on building blocks of higher genus are also discussed.

Availability

The source code of gfold is freely available at http://www.combinatorics.cn/cbpc/gfold.tar.gz.

Contact

duck@santafe.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-17 +21515634,"WebFR3D--a server for finding, aligning and analyzing recurrent RNA 3D motifs.","WebFR3D is the on-line version of 'Find RNA 3D' (FR3D), a program for annotating atomic-resolution RNA 3D structure files and searching them efficiently to locate and compare RNA 3D structural motifs. WebFR3D provides on-line access to the central features of FR3D, including geometric and symbolic search modes, without need for installing programs or downloading and maintaining 3D structure data locally. In geometric search mode, WebFR3D finds all motifs similar to a user-specified query structure. In symbolic search mode, WebFR3D finds all sets of nucleotides making user-specified interactions. In both modes, users can specify sequence, sequence-continuity, base pairing, base-stacking and other constraints on nucleotides and their interactions. WebFR3D can be used to locate hairpin, internal or junction loops, list all base pairs or other interactions, or find instances of recurrent RNA 3D motifs (such as sarcin-ricin and kink-turn internal loops or T- and GNRA hairpin loops) in any PDB file or across a whole set of 3D structure files. The output page provides facilities for comparing the instances returned by the search by superposition of the 3D structures and the alignment of their sequences annotated with pairwise interactions. WebFR3D is available at http://rna.bgsu.edu/webfr3d.",2011-04-22 +21671271,Analysis of protein function and its prediction from amino acid sequence.,"Understanding protein function is one of the keys to understanding life at the molecular level. It is also important in the context of human disease because many conditions arise as a consequence of alterations of protein function. The recent availability of relatively inexpensive sequencing technology has resulted in thousands of complete or partially sequenced genomes with millions of functionally uncharacterized proteins. Such a large volume of data, combined with the lack of high-throughput experimental assays to functionally annotate proteins, attributes to the growing importance of automated function prediction. Here, we study proteins annotated by Gene Ontology (GO) terms and estimate the accuracy of functional transfer from protein sequence only. We find that the transfer of GO terms by pairwise sequence alignments is only moderately accurate, showing a surprisingly small influence of sequence identity (SID) in a broad range (30-100%). We developed and evaluated a new predictor of protein function, functional annotator (FANN), from amino acid sequence. The predictor exploits a multioutput neural network framework which is well suited to simultaneously modeling dependencies between functional terms. Experiments provide evidence that FANN-GO (predictor of GO terms; available from http://www.informatics.indiana.edu/predrag) outperforms standard methods such as transfer by global or local SID as well as GOtcha, a method that incorporates the structure of GO.",2011-04-19 +21504602,Estimation of alternative splicing isoform frequencies from RNA-Seq data.,"

Background

Massively parallel whole transcriptome sequencing, commonly referred as RNA-Seq, is quickly becoming the technology of choice for gene expression profiling. However, due to the short read length delivered by current sequencing technologies, estimation of expression levels for alternative splicing gene isoforms remains challenging.

Results

In this paper we present a novel expectation-maximization algorithm for inference of isoform- and gene-specific expression levels from RNA-Seq data. Our algorithm, referred to as IsoEM, is based on disambiguating information provided by the distribution of insert sizes generated during sequencing library preparation, and takes advantage of base quality scores, strand and read pairing information when available. The open source Java implementation of IsoEM is freely available at http://dna.engr.uconn.edu/software/IsoEM/.

Conclusions

Empirical experiments on both synthetic and real RNA-Seq datasets show that IsoEM has scalable running time and outperforms existing methods of isoform and gene expression level estimation. Simulation experiments confirm previous findings that, for a fixed sequencing cost, using reads longer than 25-36 bases does not necessarily lead to better accuracy for estimating expression levels of annotated isoforms and genes.",2011-04-19 +21903624,Finding stable local optimal RNA secondary structures.,"

Motivation

Many RNAs, such as riboswitches, can fold into multiple alternate structures and perform different biological functions. These biologically functional structures usually have low free energies in their local energy landscapes and are very stable such that they cannot easily jump out of the current states and fold into other stable conformations. The conformational space of feasible RNA secondary structures is prohibitively large, and accurate prediction of functional structure conformations is challenging. Because the stability of an RNA secondary structure is determined predominantly by energetically favorable helical regions (stacks), we propose to use configurations of putative stacks to represent RNA secondary structures. By considering a reduced conformational space of local optimal stack configurations instead of all feasible RNA structures, we first present an algorithm for enumerating all possible local optimal stack configurations. In addition, we present a fast heuristic algorithm for approximating energy barriers encountered during folding pathways between each pair of local optimal stack configurations and finding all the stable local optimal structures.

Results

Benchmark tests have been conducted on several RNA riboswitches, whose alternate secondary structures have been experimentally verified. The benchmark results show that our method can successfully predict the native 'on' and 'off' secondary structures, and better rank them compared with other state-of-art approaches.

Availability

The software is freely available and can be downloaded at http://genome.ucf.edu/RNASLOpt.

Contact

shzhang@eecs.ucf.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-08 +21593135,Evaluation of drug-human serum albumin binding interactions with support vector machine aided online automated docking.,"

Motivation

Human serum albumin (HSA), the most abundant plasma protein is well known for its extraordinary binding capacity for both endogenous and exogenous substances, including a wide range of drugs. Interaction with the two principal binding sites of HSA in subdomain IIA (site 1) and in subdomain IIIA (site 2) controls the free, active concentration of a drug, provides a reservoir for a long duration of action and ultimately affects the ADME (absorption, distribution, metabolism, and excretion) profile. Due to the continuous demand to investigate HSA binding properties of novel drugs, drug candidates and drug-like compounds, a support vector machine (SVM) model was developed that efficiently predicts albumin binding. Our SVM model was integrated to a free, web-based prediction platform (http://albumin.althotas.com). Automated molecular docking calculations for prediction of complex geometry are also integrated into the web service. The platform enables the users (i) to predict if albumin binds the query ligand, (ii) to determine the probable ligand binding site (site 1 or site 2), (iii) to select the albumin X-ray structure which is complexed with the most similar ligand and (iv) to calculate complex geometry using molecular docking calculations. Our SVM model and the potential offered by the combined use of in silico calculation methods and experimental binding data is illustrated.",2011-05-18 +21893517,Gaussian interaction profile kernels for predicting drug-target interaction.,"

Motivation

The in silico prediction of potential interactions between drugs and target proteins is of core importance for the identification of new drugs or novel targets for existing drugs. However, only a tiny portion of all drug-target pairs in current datasets are experimentally validated interactions. This motivates the need for developing computational methods that predict true interaction pairs with high accuracy.

Results

We show that a simple machine learning method that uses the drug-target network as the only source of information is capable of predicting true interaction pairs with high accuracy. Specifically, we introduce interaction profiles of drugs (and of targets) in a network, which are binary vectors specifying the presence or absence of interaction with every target (drug) in that network. We define a kernel on these profiles, called the Gaussian Interaction Profile (GIP) kernel, and use a simple classifier, (kernel) Regularized Least Squares (RLS), for prediction drug-target interactions. We test comparatively the effectiveness of RLS with the GIP kernel on four drug-target interaction networks used in previous studies. The proposed algorithm achieves area under the precision-recall curve (AUPR) up to 92.7, significantly improving over results of state-of-the-art methods. Moreover, we show that using also kernels based on chemical and genomic information further increases accuracy, with a neat improvement on small datasets. These results substantiate the relevance of the network topology (in the form of interaction profiles) as source of information for predicting drug-target interactions.

Availability

Software and Supplementary Material are available at http://cs.ru.nl/~tvanlaarhoven/drugtarget2011/.

Contact

tvanlaarhoven@cs.ru.nl; elenam@cs.ru.nl.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-09-04 +21757463,Sufficient statistics and expectation maximization algorithms in phylogenetic tree models.,"

Motivation

Measuring evolutionary conservation is a routine step in the identification of functional elements in genome sequences. Although a number of studies have proposed methods that use the continuous time Markov models (CTMMs) to find evolutionarily constrained elements, their probabilistic structures have been less frequently investigated.

Results

In this article, we investigate a sufficient statistic for CTMMs. The statistic is composed of the fractional duration of nucleotide characters over evolutionary time, F(d), and the number of substitutions occurring in phylogenetic trees, N(s). We first derive basic properties of the sufficient statistic. Then, we derive an expectation maximization (EM) algorithm for estimating the parameters of a phylogenetic model, which iteratively computes the expectation values of the sufficient statistic. We show that the EM algorithm exhibits much faster convergence than other optimization methods that use numerical gradient descent algorithms. Finally, we investigate the genome-wide distribution of fractional duration time F(d) which, unlike the number of substitutions N(s), has rarely been investigated. We show that F(d) has evolutionary information that is distinct from that in N(s), which may be useful for detecting novel types of evolutionary constraints existing in the human genome.

Availability

The C++ source code of the 'Fdur' software is available at http://www.ncrna.org/software/fdur/

Contact

kiryu-h@k.u-tokyo.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-14 +22230938,Sign: large-scale gene network estimation environment for high performance computing.,"Our research group is currently developing software for estimating large-scale gene networks from gene expression data. The software, called SiGN, is specifically designed for the Japanese flagship supercomputer ""K computer"" which is planned to achieve 10 petaflops in 2012, and other high performance computing environments including Human Genome Center (HGC) supercomputer system. SiGN is a collection of gene network estimation software with three different sub-programs: SiGN-BN, SiGN-SSM and SiGN-L1. In these three programs, five different models are available: static and dynamic nonparametric Bayesian networks, state space models, graphical Gaussian models, and vector autoregressive models. All these models require a huge amount of computational resources for estimating large-scale gene networks and therefore are designed to be able to exploit the speed of 10 petaflops. The software will be available freely for ""K computer"" and HGC supercomputer system users. The estimated networks can be viewed and analyzed by Cell Illustrator Online and SBiP (Systems Biology integrative Pipeline). The software project web site is available at http://sign.hgc.jp/ .",2011-01-01 +21190133,Structure determination of genomic domains by satisfaction of spatial restraints.,"The three-dimensional (3D) architecture of a genome is non-random and known to facilitate the spatial colocalization of regulatory elements with the genes they regulate. Determining the 3D structure of a genome may therefore probe an essential step in characterizing how genes are regulated. Currently, there are several experimental and theoretical approaches that aim at determining the 3D structure of genomes and genomic domains; however, approaches integrating experiments and computation to identify the most likely 3D folding of a genome at medium to high resolutions have not been widely explored. Here, we review existing methodologies and propose that the integrative modeling platform (http://www.integrativemodeling.org), a computational package developed for structurally characterizing protein assemblies, could be used for integrating diverse experimental data towards the determination of the 3D architecture of genomic domains and entire genomes at unprecedented resolution. Our approach, through the visualization of looping interactions between distal regulatory elements, will allow for the characterization of global chromatin features and their relation to gene expression. We illustrate our work by outlining the recent determination of the 3D architecture of the α-globin domain in the human genome.",2011-01-01 +21844254,Quantitative trait loci associated with the humoral innate immune response in chickens were confirmed in a cross between Green-Legged Partridgelike and White Leghorn.,"Natural antibodies (NA) create a crucial barrier at the initial steps of the innate humoral immune response. The main role of NA in the defense system is to bind the pathogens at early stages of infection. Different pathogens are recognized by the presence of highly conserved antigen determinant [e.g., lipopolysaccharide (LPS) in gram-negative bacteria or lipoteichoic acid (LTA) in gram-positive bacteria]. In chickens, a different genetic background of NA binds LPS and LTA antigens, encoded by different QTL. The main objective of this work was to confirm known QTL associated with LPS and LTA NA. For this purpose a chicken reference population was created by crossing 2 breeds: a commercial layer, White Leghorn, and a Polish indigenous chicken, Green-Legged Partridgelike. The chromosomal regions analyzed harbored to GGA3, GGA5, GGA6, GGA8, GGA9, GGA10, GGA14, GGA15, GGA18, and GGAZ. The data collected consisted of the NA titers binding LPS and LTA (determined by ELISA at 12 wk of age) as well as the genotypes (30 short tandem repeat markers; average of 3 markers/chromosome, collected for generations F(0), F(1), and F(2)). The analyses were performed with 3 statistical models (paternal and maternal half-sib, line cross, and linkage analysis and linkage disequilibrium) implemented in GridQTL software (http://www.gridqtl.org.uk/). The QTL study of humoral innate immune response traits resulted in the confirmation of 3 QTL associated with NA titers binding LPS (located on GGA9, GGA18, and GGAZ) and 2 QTL associated with NA titers binding LTA (located on GGA5 and GGA14). A set of candidate genes within the regions of the validated QTL has been proposed.",2011-09-01 +22783839,An appraisal of the current and potential value of web 2.0 contributions to continuing education in oral implantology.,"

Objective

To systematically assess the informational value, quality, intention, source and bias of web 2.0 footage whose aim is peer-to-peer education about oral implantology.

Methods

YouTube (http://www.youtube.com) was scanned on 15 October 2010 for oral implantology-related videos using an adequately pre-defined search query. Search results were filtered with the system-generated category 'education' and the additional criterion 'most viewed'. Only those videos with at least 1000 views were included (total 124, of which 27 were excluded because they were not related to implantology). Filtered videos were discussed and rated with particular regard to the educational needs of potential groups of addressees [(i) undergraduates and laymen, (ii) dentists without or currently undergoing a specialisation in oral implantology and (iii) dentists who have completed a specialisation in the field of oral implantology] by a jury consisting of (i) an accredited post-graduate university instructor with 22 years of professional teaching experience in the field of implantology, (ii) a university lecturer in dentistry/orthodontics with 10 years teaching experience and (iii) a university haematologist/oncologist. They were required to fill out a questionnaire for each video. The data were statistically analysed using non-parametric ANOVA (α = 5%) and a sign test (α = 0.05/3 = 0.017).

Results

The YouTube scan produced 1710 results in the category 'EDU'. The analysis revealed that there is a wide range of instructional footage on this topic, but with highly variable range in quality and informational value. Footage intention was to large proportions (47.4%) a mixture of education and advertisement. Its usefulness differed significantly for the three groups of addressees, offering greater novelty to undergraduates and post-graduates.

Conclusion

YouTube and similar social media websites may have a potential capacity and value in complementing continuing education in the technique of oral implantology. As a means of achieving an acceptable level of knowledge about the topic when used alone, it should not be considered to be suitable at this point in time.",2012-01-12 +21873327,Finding recurrent copy number alterations preserving within-sample homogeneity.,"

Motivation

Copy number alterations (CNAs) represent an important component of genetic variation and play a significant role in many human diseases. Development of array comparative genomic hybridization (aCGH) technology has made it possible to identify CNAs. Identification of recurrent CNAs represents the first fundamental step to provide a list of genomic regions which form the basis for further biological investigations. The main problem in recurrent CNAs discovery is related to the need to distinguish between functional changes and random events without pathological relevance. Within-sample homogeneity represents a common feature of copy number profile in cancer, so it can be used as additional source of information to increase the accuracy of the results. Although several algorithms aimed at the identification of recurrent CNAs have been proposed, no attempt of a comprehensive comparison of different approaches has yet been published.

Results

We propose a new approach, called Genomic Analysis of Important Alterations (GAIA), to find recurrent CNAs where a statistical hypothesis framework is extended to take into account within-sample homogeneity. Statistical significance and within-sample homogeneity are combined into an iterative procedure to extract the regions that likely are involved in functional changes. Results show that GAIA represents a valid alternative to other proposed approaches. In addition, we perform an accurate comparison by using two real aCGH datasets and a carefully planned simulation study.

Availability

GAIA has been implemented as R/Bioconductor package. It can be downloaded from the following page http://bioinformatics.biogem.it/download/gaia.

Contact

ceccarelli@unisannio.it; morganella@unisannio.it.

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-25 +21573116,ALG: automated genotype calling of Luminex assays.,"Single nucleotide polymorphisms (SNPs) are the most commonly used polymorphic markers in genetics studies. Among the different platforms for SNP genotyping, Luminex is one of the less exploited mainly due to the lack of a robust (semi-automated and replicable) freely available genotype calling software. Here we describe a clustering algorithm that provides automated SNP calls for Luminex genotyping assays. We genotyped 3 SNPs in a cohort of 330 childhood leukemia patients, 200 parents of patient and 325 healthy individuals and used the Automated Luminex Genotyping (ALG) algorithm for SNP calling. ALG genotypes were called twice to test for reproducibility and were compared to sequencing data to test for accuracy. Globally, this analysis demonstrates the accuracy (99.6%) of the method, its reproducibility (99.8%) and the low level of no genotyping calls (3.4%). The high efficiency of the method proves that ALG is a suitable alternative to the current commercial software. ALG is semi-automated, and provides numerical measures of confidence for each SNP called, as well as an effective graphical plot. Moreover ALG can be used either through a graphical user interface, requiring no specific informatics knowledge, or through command line with access to the open source code. The ALG software has been implemented in R and is freely available for non-commercial use either at http://alg.sourceforge.net or by request to mathieu.bourgey@umontreal.ca.",2011-05-06 +21551151,Integrative gene network construction for predicting a set of complementary prostate cancer genes.,"

Motivation

Diagnosis and prognosis of cancer and understanding oncogenesis within the context of biological pathways is one of the most important research areas in bioinformatics. Recently, there have been several attempts to integrate interactome and transcriptome data to identify subnetworks that provide limited interpretations of known and candidate cancer genes, as well as increase classification accuracy. However, these studies provide little information about the detailed roles of identified cancer genes.

Results

To provide more information to the network, we constructed the network by incorporating genetic interactions and manually curated gene regulations to the protein interaction network. To make our newly constructed network cancer specific, we identified edges where two genes show different expression patterns between cancer and normal phenotypes. We showed that the integration of various datasets increased classification accuracy, which suggests that our network is more complete than a network based solely on protein interactions. We also showed that our network contains significantly more known cancer-related genes than other feature selection algorithms. Through observations of some examples of cancer-specific subnetworks, we were able to predict more detailed and interpretable roles of oncogenes and other cancer candidate genes in the prostate cancer cells.

Availability

http://embio.yonsei.ac.kr/~Ahn/tc.php.

Contact

sanghyun@cs.yonsei.ac.kr",2011-05-06 +21685106,IPknot: fast and accurate prediction of RNA secondary structures with pseudoknots using integer programming.,"

Motivation

Pseudoknots found in secondary structures of a number of functional RNAs play various roles in biological processes. Recent methods for predicting RNA secondary structures cover certain classes of pseudoknotted structures, but only a few of them achieve satisfying predictions in terms of both speed and accuracy.

Results

We propose IPknot, a novel computational method for predicting RNA secondary structures with pseudoknots based on maximizing expected accuracy of a predicted structure. IPknot decomposes a pseudoknotted structure into a set of pseudoknot-free substructures and approximates a base-pairing probability distribution that considers pseudoknots, leading to the capability of modeling a wide class of pseudoknots and running quite fast. In addition, we propose a heuristic algorithm for refining base-paring probabilities to improve the prediction accuracy of IPknot. The problem of maximizing expected accuracy is solved by using integer programming with threshold cut. We also extend IPknot so that it can predict the consensus secondary structure with pseudoknots when a multiple sequence alignment is given. IPknot is validated through extensive experiments on various datasets, showing that IPknot achieves better prediction accuracy and faster running time as compared with several competitive prediction methods.

Availability

The program of IPknot is available at http://www.ncrna.org/software/ipknot/. IPknot is also available as a web server at http://rna.naist.jp/ipknot/.

Contact

satoken@k.u-tokyo.ac.jp; ykato@is.naist.jp

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-01 +28083171,A Prototype Interactive Mapping Tool to Target Low Health Literacy in Missouri.,"An estimated 36 percent of American adults have health literacy levels rated at ""basic or below,"" indicating that they have difficulty obtaining, processing, and understanding basic health information and services. To help healthcare decisionmakers in Missouri identify neighborhood-level ""hotspots"" of suboptimal health or healthcare that may be due to low health literacy, RAND developed a prototype interactive web-based mapping tool. This builds on earlier RAND work to develop a predictive model of health literacy and estimate levels of health literacy in small geographic areas (e.g., census tracts). The interactive mapping tool allows stakeholders to select the level of geography (e.g., census tract, county), obtain information for and map specific regions of interest, select the characteristics to be mapped (i.e., estimates of community-level health literacy, health outcomes and care quality, neighborhood sociodemographic characteristics, and neighborhood health services data), and generate tables and reports on the regions and characteristics of interest. Housed on a dedicated RAND website (http://www.rand.org/health/projects/missouri-health-literacy.html), the mapping tool makes it possible for a range of stakeholders, from health plans to community organizations, to access and use the tool to help address healthcare disparities in their communities.",2011-03-01 +21685072,Optimally discriminative subnetwork markers predict response to chemotherapy.,"

Motivation

Molecular profiles of tumour samples have been widely and successfully used for classification problems. A number of algorithms have been proposed to predict classes of tumor samples based on expression profiles with relatively high performance. However, prediction of response to cancer treatment has proved to be more challenging and novel approaches with improved generalizability are still highly needed. Recent studies have clearly demonstrated the advantages of integrating protein-protein interaction (PPI) data with gene expression profiles for the development of subnetwork markers in classification problems.

Results

We describe a novel network-based classification algorithm (OptDis) using color coding technique to identify optimally discriminative subnetwork markers. Focusing on PPI networks, we apply our algorithm to drug response studies: we evaluate our algorithm using published cohorts of breast cancer patients treated with combination chemotherapy. We show that our OptDis method improves over previously published subnetwork methods and provides better and more stable performance compared with other subnetwork and single gene methods. We also show that our subnetwork method produces predictive markers that are more reproducible across independent cohorts and offer valuable insight into biological processes underlying response to therapy.

Availability

The implementation is available at: http://www.cs.sfu.ca/~pdao/personal/OptDis.html

Contact

cenk@cs.sfu.ca; alapuk@prostatecentre.com; ccollins@prostatecentre.com.",2011-07-01 +22245503,Non-homogeneous stereological properties of the rat hippocampus from high-resolution 3D serial reconstruction of thin histological sections.,"Integrating hippocampal anatomy from neuronal dendrites to whole system may help elucidate its relation to function. Toward this aim, we digitally traced the cytoarchitectonic boundaries of the dentate gyrus (DG) and areas CA3/CA1 throughout their entire longitudinal extent from high-resolution images of thin cryostatic sections of adult rat brain. The 3D computational reconstruction identified all isotropic 16 μm voxels with appropriate subregions and layers (http://krasnow1.gmu.edu/cn3/hippocampus3d). Overall, DG, CA3, and CA1 occupied comparable volumes (15.3, 12.2, and 18.8 mm(3), respectively), but displayed substantial rostrocaudal volumetric gradients: CA1 made up more than half of the posterior hippocampus, whereas CA3 and DG were more prominent in the anterior regions. The CA3/CA1 ratio increased from ∼0.4 to ∼1 septo-temporally because of a specific change in stratum radiatum volume. Next we virtually embedded 1.8 million neuronal morphologies stochastically resampled from 244 digital reconstructions, emulating the dense packing of granular and pyramidal layers, and appropriately orienting the principal dendritic axes relative to local curvature. The resulting neuropil occupancy reproduced recent electron microscopy data measured in a restricted location. Extension of this analysis across each layer and subregion over the whole hippocampus revealed highly non-homogeneous dendritic density. In CA1, dendritic occupancy was >60% higher temporally than septally (0.46 vs. 0.28, s.e.m. ∼0.05). CA3 values varied both across subfields (from 0.35 in CA3b/CA3c to 0.50 in CA3a) and layers (0.48, 0.34, and 0.27 in oriens, radiatum, and lacunosum-moleculare, respectively). Dendritic occupancy was substantially lower in DG, especially in the supra-pyramidal blade (0.18). The computed probability of dendrodendritic collision significantly correlated with expression of the membrane repulsion signal Down syndrome cell adhesion molecule (DSCAM). These heterogeneous stereological properties reflect and complement the non-uniform molecular composition, circuit connectivity, and computational function of the hippocampus across its transverse, longitudinal, and laminar organization.",2012-01-04 +21504919,Whose butt is it? tobacco industry research about smokers and cigarette butt waste.,"

Background

Cigarette filters are made of non-biodegradable cellulose acetate. As much as 766,571 metric tons of butts wind up as litter worldwide per year. Numerous proposals have been made to prevent or mitigate cigarette butt pollution, but none has been effective; cigarette butts are consistently found to be the single most collected item in beach clean-ups and litter surveys.

Methods

We searched the Legacy Tobacco Documents Library (http://legacy.library.ucsf.edu) and http://tobaccodocuments.org using a snowball strategy beginning with keywords (eg, 'filter', 'biodegradable', 'butts'). Data from approximately 680 documents, dated 1959-2006, were analysed using an interpretive approach.

Results

The tobacco industry has feared being held responsible for cigarette litter for more than 20 years. Their efforts to avoid this responsibility included developing biodegradable filters, creating anti-litter campaigns, and distributing portable and permanent ashtrays. They concluded that biodegradable filters would probably encourage littering and would not be marketable, and that smokers were defensive about discarding their tobacco butts and not amenable to anti-litter efforts.

Conclusions

Tobacco control and environmental advocates should develop partnerships to compel the industry to take financial and practical responsibility for cigarette butt waste.",2011-05-01 +21523935,ncRNA consensus secondary structure derivation using grammar strings.,"Many noncoding RNAs (ncRNAs) function through both their sequences and secondary structures. Thus, secondary structure derivation is an important issue in today's RNA research. The state-of-the-art structure annotation tools are based on comparative analysis, which derives consensus structure of homologous ncRNAs. Despite promising results from existing ncRNA aligning and consensus structure derivation tools, there is a need for more efficient and accurate ncRNA secondary structure modeling and alignment methods. In this work, we introduce a consensus structure derivation approach based on grammar string, a novel ncRNA secondary structure representation that encodes an ncRNA's sequence and secondary structure in the parameter space of a context-free grammar (CFG) and a full RNA grammar including pseudoknots. Being a string defined on a special alphabet constructed from a grammar, grammar string converts ncRNA alignment into sequence alignment. We derive consensus secondary structures from hundreds of ncRNA families from BraliBase 2.1 and 25 families containing pseudoknots using grammar string alignment. Our experiments have shown that grammar string-based structure derivation competes favorably in consensus structure quality with Murlet and RNASampler. Source code and experimental data are available at http://www.cse.msu.edu/~yannisun/grammar-string.",2011-04-01 +21357575,STOCHSIMGPU: parallel stochastic simulation for the Systems Biology Toolbox 2 for MATLAB.,"

Motivation

The importance of stochasticity in biological systems is becoming increasingly recognized and the computational cost of biologically realistic stochastic simulations urgently requires development of efficient software. We present a new software tool STOCHSIMGPU that exploits graphics processing units (GPUs) for parallel stochastic simulations of biological/chemical reaction systems and show that significant gains in efficiency can be made. It is integrated into MATLAB and works with the Systems Biology Toolbox 2 (SBTOOLBOX2) for MATLAB.

Results

The GPU-based parallel implementation of the Gillespie stochastic simulation algorithm (SSA), the logarithmic direct method (LDM) and the next reaction method (NRM) is approximately 85 times faster than the sequential implementation of the NRM on a central processing unit (CPU). Using our software does not require any changes to the user's models, since it acts as a direct replacement of the stochastic simulation software of the SBTOOLBOX2.

Availability

The software is open source under the GPL v3 and available at http://www.maths.ox.ac.uk/cmb/STOCHSIMGPU. The web site also contains supplementary information.

Contact

klingbeil@maths.ox.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-25 +22110646,MaturePred: efficient identification of microRNAs within novel plant pre-miRNAs.,"

Background

MicroRNAs (miRNAs) are a set of short (19∼24 nt) non-coding RNAs that play significant roles as posttranscriptional regulators in animals and plants. The ab initio prediction methods show excellent performance for discovering new pre-miRNAs. While most of these methods can distinguish real pre-miRNAs from pseudo pre-miRNAs, few can predict the positions of miRNAs. Among the existing methods that can also predict the miRNA positions, most of them are designed for mammalian miRNAs, including human and mouse. Minority of methods can predict the positions of plant miRNAs. Accurate prediction of the miRNA positions remains a challenge, especially for plant miRNAs. This motivates us to develop MaturePred, a machine learning method based on support vector machine, to predict the positions of plant miRNAs for the new plant pre-miRNA candidates.

Methodology/principal findings

A miRNA:miRNA* duplex is regarded as a whole to capture the binding characteristics of miRNAs. We extract the position-specific features, the energy related features, the structure related features, and stability related features from real/pseudo miRNA:miRNA* duplexes. A set of informative features are selected to improve the prediction accuracy. Two-stage sample selection algorithm is proposed to combat the serious imbalance problem between real and pseudo miRNA:miRNA* duplexes. The prediction method, MaturePred, can accurately predict plant miRNAs and achieve higher prediction accuracy compared with the existing methods. Further, we trained a prediction model with animal data to predict animal miRNAs. The model also achieves higher prediction performance. It further confirms the efficiency of our miRNA prediction method.

Conclusions

The superior performance of the proposed prediction model can be attributed to the extracted features of plant miRNAs and miRNA*s, the selected training dataset, and the carefully selected features. The web service of MaturePred, the training datasets, the testing datasets, and the selected features are freely available at http://nclab.hit.edu.cn/maturepred/.",2011-11-16 +22001825,Identification of Salmonella enterica species- and subgroup-specific genomic regions using Panseq 2.0.,"The pan-genome of a taxonomic group consists of evolutionarily conserved core genes shared by all members and accessory genes that are present only in some members of the group. Group- and subgroup-specific core genes are thought to contribute to shared phenotypes such as virulence and niche specificity. In this study we analyzed 39 Salmonella enterica genomes (16 closed, 23 draft), a species that contains two human-specific serovars that cause typhoid fever, as well as a large number of zoonotic serovars that cause gastroenteritis in humans. Panseq 2.0 was used to define the pan-genome by adjusting the threshold at which group-specific ""core"" loci are defined. We found the pan-genome to be 9.03 Mbp in size, and that the core genome size decreased, while the number of SNPs/100 bp increased, as the number of strains used to define the core genome increased, suggesting substantial divergence among S. enterica subgroups. Subgroup-specific ""core"" genes, in contrast, had fewer SNPs/100 bp, likely reflecting their more recent acquisition. Phylogenetic trees were created from the concatenated and aligned pan-genome, the core genome, and multi-locus-sequence typing (MLST) loci. Branch support increased among the trees, and strains of the same serovar grouped closer together as the number of loci used to create the tree increased. Further, high levels of discrimination were achieved even amongst the most closely related strains of S. enterica Typhi, suggesting that the data generated by Panseq may also be of value in short-term epidemiological studies. Panseq provides an easy and fast way of performing pan-genomic analyses, which can include the identification of group-dominant as well as group-specific loci and is available as a web-server and a standalone version at http://lfz.corefacility.ca/panseq/.",2011-10-01 +21834962,A prognostic tool to identify adolescents at high risk of becoming daily smokers.,"

Background

The American Academy of Pediatrics advocates that pediatricians should be involved in tobacco counseling and has developed guidelines for counseling. We present a prognostic tool for use by health care practitioners in both clinical and non-clinical settings, to identify adolescents at risk of becoming daily smokers.

Methods

Data were drawn from the Nicotine Dependence in Teens (NDIT) Study, a prospective investigation of 1293 adolescents, initially aged 12-13 years, recruited in 10 secondary schools in Montreal, Canada in 1999. Questionnaires were administered every three months for five years. The prognostic tool was developed using estimated coefficients from multivariable logistic models. Model overfitting was corrected using bootstrap cross-validation. Goodness-of-fit and predictive ability of the models were assessed by R2, the c-statistic, and the Hosmer-Lemeshow test.

Results

The 1-year and 2-year probability of initiating daily smoking was a joint function of seven individual characteristics: age; ever smoked; ever felt like you needed a cigarette; parent(s) smoke; sibling(s) smoke; friend(s) smoke; and ever drank alcohol. The models were characterized by reasonably good fit and predictive ability. They were transformed into user-friendly tables such that the risk of daily smoking can be easily computed by summing points for responses to each item. The prognostic tool is also available on-line at http://episerve.chumontreal.qc.ca/calculation_risk/daily-risk/daily_smokingadd.php.

Conclusions

The prognostic tool to identify youth at high risk of daily smoking may eventually be an important component of a comprehensive tobacco control system.",2011-08-11 +22500694,Genomic amplification patterns of human telomerase RNA gene and C-MYC in liquid-based cytological specimens used for the detection of high-grade cervical intraepithelial neoplasia.,"

Background

The amplification of oncogenes initiated by high-risk human papillomavirus (HPV) infection is an early event in cervical carcinogenesis and can be used for cervical lesion diagnosis. We measured the genomic amplification rates and the patterns of human telomerase RNA gene (TERC) and C-MYC in the liquid-based cytological specimens to evaluate the diagnostic characteristics for the detection of high-grade cervical lesions.

Methods

Two hundred and forty-three residual cytological specimens were obtained from outpatients aged 25 to 64 years at Qilu Hospital, Shandong University. The specimens were evaluated by fluorescence in situ hybridization (FISH) using chromosome probes to TERC (3q26) and C-MYC (8q24). All of the patients underwent colposcopic examination and histological evaluation. A Chi-square test was used for categorical data analysis.

Results

In the normal, cervical intraepithelial neoplasia grade 1 (CIN1), grade 2 (CIN2), grade 3 (CIN3) and squamous cervical cancer (SCC) cases, the TERC positive rates were 9.2%, 17.2%, 76.2%, 100.0% and 100.0%, respectively; the C-MYC positive rates were 20.7%, 31.0%, 71.4%, 81.8% and 100.0%, respectively. The TERC and C-MYC positive rates were higher in the CIN2+ (CIN2, CIN3 and SCC) cases than in the normal and CIN1 cases (p < 0.01). Compared with cytological analysis, the TERC test showed higher sensitivity (90.0% vs. 84.0%) and higher specificity (89.6% vs. 64.3%). The C-MYC test showed lower sensitivity (80.0% vs. 84.0%) and higher specificity (77.7% vs. 64.3%). Using a cut-off value of 5% or more aberrant cells, the TERC test showed the highest combination of sensitivity and specificity. The CIN2+ group showed more high-level TERC gene copy number (GCN) cells than did the normal/CIN1 group (p < 0.05). For C-MYC, no significant difference between the two histological categories was detected (p > 0.05).

Conclusions

The TERC test is highly sensitive and is therefore suitable for cervical cancer screening. The C-MYC test is not suitable for cancer screening because of its lower sensitivity. The amplification patterns of TERC become more diverse and complex as the severity of cervical diseases increases, whereas for C-MYC, the amplification patterns are similar between the normal/CIN1 and CIN2+ groups.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1308004512669913.",2012-04-13 +21685046,Noise reduction in genome-wide perturbation screens using linear mixed-effect models.,"

Motivation

High-throughput perturbation screens measure the phenotypes of thousands of biological samples under various conditions. The phenotypes measured in the screens are subject to substantial biological and technical variation. At the same time, in order to enable high throughput, it is often impossible to include a large number of replicates, and to randomize their order throughout the screens. Distinguishing true changes in the phenotype from stochastic variation in such experimental designs is extremely challenging, and requires adequate statistical methodology.

Results

We propose a statistical modeling framework that is based on experimental designs with at least two controls profiled throughout the experiment, and a normalization and variance estimation procedure with linear mixed-effects models. We evaluate the framework using three comprehensive screens of Saccharomyces cerevisiae, which involve 4940 single-gene knock-out haploid mutants, 1127 single-gene knock-out diploid mutants and 5798 single-gene overexpression haploid strains. We show that the proposed approach (i) can be used in conjunction with practical experimental designs; (ii) allows extensions to alternative experimental workflows; (iii) enables a sensitive discovery of biologically meaningful changes; and (iv) strongly outperforms the existing noise reduction procedures.

Availability

All experimental datasets are publicly available at www.ionomicshub.org. The R package HTSmix is available at http://www.stat.purdue.edu/~ovitek/HTSmix.html.

Contact

ovitek@stat.purdue.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-06-17 +21511715,"PONDEROSA, an automated 3D-NOESY peak picking program, enables automated protein structure determination.","

Summary

PONDEROSA (Peak-picking Of Noe Data Enabled by Restriction of Shift Assignments) accepts input information consisting of a protein sequence, backbone and sidechain NMR resonance assignments, and 3D-NOESY ((13)C-edited and/or (15)N-edited) spectra, and returns assignments of NOESY crosspeaks, distance and angle constraints, and a reliable NMR structure represented by a family of conformers. PONDEROSA incorporates and integrates external software packages (TALOS+, STRIDE and CYANA) to carry out different steps in the structure determination. PONDEROSA implements internal functions that identify and validate NOESY peak assignments and assess the quality of the calculated three-dimensional structure of the protein. The robustness of the analysis results from PONDEROSA's hierarchical processing steps that involve iterative interaction among the internal and external modules. PONDEROSA supports a variety of input formats: SPARKY assignment table (.shifts) and spectrum file formats (.ucsf), XEASY proton file format (.prot), and NMR-STAR format (.star). To demonstrate the utility of PONDEROSA, we used the package to determine 3D structures of two proteins: human ubiquitin and Escherichia coli iron-sulfur scaffold protein variant IscU(D39A). The automatically generated structural constraints and ensembles of conformers were as good as or better than those determined previously by much less automated means.

Availability

The program, in the form of binary code along with tutorials and reference manuals, is available at http://ponderosa.nmrfam.wisc.edu/.",2011-04-21 +21682915,'The smoking toolkit study': a national study of smoking and smoking cessation in England.,"

Background

Up-to-date data tracking of national smoking patterns and cessation-related behaviour is required to evaluate and inform tobacco control strategies. The Smoking Toolkit Study (STS) was designed for this role. This paper describes the methodology of the STS and examines as far as possible the representativeness of the samples.

Methods

The STS consists of monthly, cross sectional household interviews of adults aged 16 and over in England with smokers and recent ex-smokers in each monthly wave followed up by postal questionnaires three and six months later. Between November 2006 and December 2010 the baseline survey was completed by 90,568 participants. STS demographic, prevalence and cigarette consumption estimates are compared with those from the Health Survey for England (HSE) and the General Lifestyle Survey (GLF) for 2007-2009.

Results

Smoking prevalence estimates of all the surveys were similar from 2008 onwards (e.g 2008 STS=22.0%, 95% C.I.=21.4% to 22.6%, HSE=21.7%, 95% C.I.=20.9% to 22.6%, GLF=20.8%, 95% C.I.=19.7% to 21.9%), although there was heterogeneity in 2007 (chi-square=50.30, p<0.001). Some differences were observed across surveys within sociodemographic sub-groups, although largely in 2007. Cigarette consumption was virtually identical in all surveys and years.

Conclusion

There is reason to believe that the STS findings (see http://www.smokinginengland.info) are generalisable to the adult population of England.",2011-06-18 +21824970,Inferring disease and gene set associations with rank coherence in networks.,"

Motivation

To validate the candidate disease genes identified from high-throughput genomic studies, a necessary step is to elucidate the associations between the set of candidate genes and disease phenotypes. The conventional gene set enrichment analysis often fails to reveal associations between disease phenotypes and the gene sets with a short list of poorly annotated genes, because the existing annotations of disease-causative genes are incomplete. This article introduces a network-based computational approach called rcNet to discover the associations between gene sets and disease phenotypes. A learning framework is proposed to maximize the coherence between the predicted phenotype-gene set relations and the known disease phenotype-gene associations. An efficient algorithm coupling ridge regression with label propagation and two variants are designed to find the optimal solution to the objective functions of the learning framework.

Results

We evaluated the rcNet algorithms with leave-one-out cross-validation on Online Mendelian Inheritance in Man (OMIM) data and an independent test set of recently discovered disease-gene associations. In the experiments, the rcNet algorithms achieved best overall rankings compared with the baselines. To further validate the reproducibility of the performance, we applied the algorithms to identify the target diseases of novel candidate disease genes obtained from recent studies of Genome-Wide Association Study (GWAS), DNA copy number variation analysis and gene expression profiling. The algorithms ranked the target disease of the candidate genes at the top of the rank list in many cases across all the three case studies.

Availability

http://compbio.cs.umn.edu/dgsa_rcNet

Contact

kuang@cs.umn.edu.",2011-08-08 +21669962,An EST-SSR linkage map of Raphanus sativus and comparative genomics of the Brassicaceae.,"Raphanus sativus (2n = 2x = 18) is a widely cultivated member of the family Brassicaceae, for which genomic resources are available only to a limited extent in comparison to many other members of the family. To promote more genetic and genomic studies and to enhance breeding programmes of R. sativus, we have prepared genetic resources such as complementary DNA libraries, expressed sequences tags (ESTs), simple sequence repeat (SSR) markers and a genetic linkage map. A total of 26 606 ESTs have been collected from seedlings, roots, leaves, and flowers, and clustered into 10 381 unigenes. Similarities were observed between the expression patterns of transcripts from R. sativus and those from representative members of the genera Arabidopsis and Brassica, indicating their functional relatedness. The EST sequence data were used to design 3800 SSR markers and consequently 630 polymorphic SSR loci and 213 reported marker loci have been mapped onto nine linkage groups, covering 1129.2 cM with an average distance of 1.3 cM between loci. Comparison of the mapped EST-SSR marker positions in R. sativus with the genome sequence of A. thaliana indicated that the Brassicaceae members have evolved from a common ancestor. It appears that genomic fragments corresponding to those of A. thaliana have been doubled and tripled in R. sativus. The genetic map developed here is expected to provide a standard map for the genetics, genomics, and molecular breeding of R. sativus as well as of related species. The resources are available at http://marker.kazusa.or.jp/Daikon.",2011-06-13 +21317142,Topological entropy of DNA sequences.,"

Motivation

Topological entropy has been one of the most difficult to implement of all the entropy-theoretic notions. This is primarily due to finite sample effects and high-dimensionality problems. In particular, topological entropy has been implemented in previous literature to conclude that entropy of exons is higher than of introns, thus implying that exons are more 'random' than introns.

Results

We define a new approximation to topological entropy free from the aforementioned difficulties. We compute its expected value and apply this definition to the intron and exon regions of the human genome to observe that as expected, the entropy of introns are significantly higher than that of exons. We also find that introns are less random than expected: their entropy is lower than the computed expected value. We also observe the perplexing phenomena that introns on chromosome Y have atypically low and bimodal entropy, possibly corresponding to random sequences (high entropy) and sequences that posses hidden structure or function (low entropy).

Availability

A Mathematica implementation is available at http://www.math.psu.edu/koslicki/entropy.nb

Contact

koslicki@math.psu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-10 +21966383,Persistence and availability of Web services in computational biology.,"We have conducted a study on the long-term availability of bioinformatics Web services: an observation of 927 Web services published in the annual Nucleic Acids Research Web Server Issues between 2003 and 2009. We found that 72% of Web sites are still available at the published addresses, only 9% of services are completely unavailable. Older addresses often redirect to new pages. We checked the functionality of all available services: for 33%, we could not test functionality because there was no example data or a related problem; 13% were truly no longer working as expected; we could positively confirm functionality only for 45% of all services. Additionally, we conducted a survey among 872 Web Server Issue corresponding authors; 274 replied. 78% of all respondents indicate their services have been developed solely by students and researchers without a permanent position. Consequently, these services are in danger of falling into disrepair after the original developers move to another institution, and indeed, for 24% of services, there is no plan for maintenance, according to the respondents. We introduce a Web service quality scoring system that correlates with the number of citations: services with a high score are cited 1.8 times more often than low-scoring services. We have identified key characteristics that are predictive of a service's survival, providing reviewers, editors, and Web service developers with the means to assess or improve Web services. A Web service conforming to these criteria receives more citations and provides more reliable service for its users. The most effective way of ensuring continued access to a service is a persistent Web address, offered either by the publishing journal, or created on the authors' own initiative, for example at http://bioweb.me. The community would benefit the most from a policy requiring any source code needed to reproduce results to be deposited in a public repository.",2011-09-22 +22182763,"Virtual slides in peer reviewed, open access medical publication.","

Background

Application of virtual slides (VS), the digitalization of complete glass slides, is in its infancy to be implemented in routine diagnostic surgical pathology and to issues that are related to tissue-based diagnosis, such as education and scientific publication.

Approach

Electronic publication in Pathology offers new features of scientific communication in pathology that cannot be obtained by conventional paper based journals. Most of these features are based upon completely open or partly directed interaction between the reader and the system that distributes the article. One of these interactions can be applied to microscopic images allowing the reader to navigate and magnify the presented images. VS and interactive Virtual Microscopy (VM) are a tool to increase the scientific value of microscopic images.

Technology and performance

The open access journal Diagnostic Pathology http://www.diagnosticpathology.org has existed for about five years. It is a peer reviewed journal that publishes all types of scientific contributions, including original scientific work, case reports and review articles. In addition to digitized still images the authors of appropriate articles are requested to submit the underlying glass slides to an institution (DiagnomX.eu, and Leica.com) for digitalization and documentation. The images are stored in a separate image data bank which is adequately linked to the article. The normal review process is not involved. Both processes (peer review and VS acquisition) are performed contemporaneously in order to minimize a potential publication delay. VS are not provided with a DOI index (digital object identifier). The first articles that include VS were published in March 2011.

Results and perspectives

Several logistic constraints had to be overcome until the first articles including VS could be published. Step by step an automated acquisition and distribution system had to be implemented to the corresponding article. The acceptance of VS by the reader is high as well as by the authors. Of specific value are the increased confidence to and reputation of authors as well as the presented information to the reader. Additional associated functions such as access to author-owned related image collections, reader-controlled automated image measurements and image transformations are in preparation.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1232133347629819.",2011-12-19 +21493653,A robust and accurate binning algorithm for metagenomic sequences with arbitrary species abundance ratio.,"

Motivation

With the rapid development of next-generation sequencing techniques, metagenomics, also known as environmental genomics, has emerged as an exciting research area that enables us to analyze the microbial environment in which we live. An important step for metagenomic data analysis is the identification and taxonomic characterization of DNA fragments (reads or contigs) resulting from sequencing a sample of mixed species. This step is referred to as 'binning'. Binning algorithms that are based on sequence similarity and sequence composition markers rely heavily on the reference genomes of known microorganisms or phylogenetic markers. Due to the limited availability of reference genomes and the bias and low availability of markers, these algorithms may not be applicable in all cases. Unsupervised binning algorithms which can handle fragments from unknown species provide an alternative approach. However, existing unsupervised binning algorithms only work on datasets either with balanced species abundance ratios or rather different abundance ratios, but not both.

Results

In this article, we present MetaCluster 3.0, an integrated binning method based on the unsupervised top--down separation and bottom--up merging strategy, which can bin metagenomic fragments of species with very balanced abundance ratios (say 1:1) to very different abundance ratios (e.g. 1:24) with consistently higher accuracy than existing methods.

Availability

MetaCluster 3.0 can be downloaded at http://i.cs.hku.hk/~alse/MetaCluster/.",2011-04-14 +21685567,STEPP - Search Tool for Exploration of Petri net Paths: A New Tool for Petri Net-Based Path Analysis in Biochemical Networks.,"

Unlabelled

To understand biochemical processes caused by, e.g., mutations or deletions in the genome, the knowledge of possible alternative paths between two arbitrary chemical compounds is of increasing interest for biotechnology, pharmacology, medicine, and drug design. With the steadily increasing amount of data from high-throughput experiments new biochemical networks can be constructed and existing ones can be extended, which results in many large metabolic, signal transduction, and gene regulatory networks. The search for alternative paths within these complex and large networks can provide a huge amount of solutions, which can not be handled manually. Moreover, not all of the alternative paths are generally of interest. Therefore, we have developed and implemented a method, which allows us to define constraints to reduce the set of all structurally possible paths to the truly interesting path set. The paper describes the search algorithm and the constraints definition language. We give examples for path searches using this dedicated special language for a Petri net model of the sucrose-to-starch breakdown in the potato tuber.

Availability

http://sanaga.tfh-berlin.de/~stepp/",2011-01-01 +21646341,An alternative approach to multiple genome comparison.,"Genome comparison is now a crucial step for genome annotation and identification of regulatory motifs. Genome comparison aims for instance at finding genomic regions either specific to or in one-to-one correspondence between individuals/strains/species. It serves e.g. to pre-annotate a new genome by automatically transferring annotations from a known one. However, efficiency, flexibility and objectives of current methods do not suit the whole spectrum of applications, genome sizes and organizations. Innovative approaches are still needed. Hence, we propose an alternative way of comparing multiple genomes based on segmentation by similarity. In this framework, rather than being formulated as a complex optimization problem, genome comparison is seen as a segmentation question for which a single optimal solution can be found in almost linear time. We apply our method to analyse three strains of a virulent pathogenic bacteria, Ehrlichia ruminantium, and identify 92 new genes. We also find out that a substantial number of genes thought to be strain specific have potential orthologs in the other strains. Our solution is implemented in an efficient program, qod, equipped with a user-friendly interface, and enables the automatic transfer of annotations between compared genomes or contigs (Video in Supplementary Data). Because it somehow disregards the relative order of genomic blocks, qod can handle unfinished genomes, which due to the difficulty of sequencing completion may become an interesting characteristic for the future. Availabilty: http://www.atgc-montpellier.fr/qod.",2011-06-06 +21621992,Lung function and respiratory symptoms in a 1-year randomized smoking cessation trial of varenicline in COPD patients.,"

Unlabelled

There are few data concerning changes in lung function and respiratory symptoms in smokers with chronic obstructive pulmonary disease (COPD) weeks to months after quitting smoking. We examined serial changes in spirometry and Clinical COPD Questionnaire (CCQ) scores (measuring respiratory symptoms and health-related quality of life) in COPD participants by smoking status during a smoking cessation trial. In this randomized, double-blind trial, smokers with mild-to-moderate COPD were treated with varenicline 1 mg b.i.d. or placebo for 12 weeks and followed to Week 52. Primary endpoints of abstinence were previously reported. Secondary endpoints were mean changes from baseline in post-bronchodilator forced expired volume in 1 s (FEV(1)) and CCQ scores. Change from baseline in post-bronchodilator FEV(1) was significantly improved in continuous abstainers (121.8 mL) vs. continuous smokers (37.9 mL) at Week 12 (P = 0.0069), but not at Weeks 24 or 52. Mean change from baseline at Week 12 in CCQ Total Score was significantly better in continuous abstainers (-1.04) vs. continuous smokers (-0.53; P < 0.0001): this improvement was sustained at Weeks 24 and 52. In a 1-year cessation trial of smokers with COPD, continuous abstinence compared with continuous smoking significantly improved post-bronchodilator FEV(1) at Week 12 (although the difference narrowed subsequently) and CCQ Total Scores at Week 12, with sustained improvement thereafter. (

Trial registry

http://www.clinicaltrials.gov; trial identifier: NCT00285012).",2011-05-31 +21784794,Identifying disease-associated SNP clusters via contiguous outlier detection.,"

Motivation

Although genome-wide association studies (GWAS) have identified many disease-susceptibility single-nucleotide polymorphisms (SNPs), these findings can only explain a small portion of genetic contributions to complex diseases, which is known as the missing heritability. A possible explanation is that genetic variants with small effects have not been detected. The chance is < 8 that a causal SNP will be directly genotyped. The effects of its neighboring SNPs may be too weak to be detected due to the effect decay caused by imperfect linkage disequilibrium. Moreover, it is still challenging to detect a causal SNP with a small effect even if it has been directly genotyped.

Results

In order to increase the statistical power when detecting disease-associated SNPs with relatively small effects, we propose a method using neighborhood information. Since the disease-associated SNPs account for only a small fraction of the entire SNP set, we formulate this problem as Contiguous Outlier DEtection (CODE), which is a discrete optimization problem. In our formulation, we cast the disease-associated SNPs as outliers and further impose a spatial continuity constraint for outlier detection. We show that this optimization can be solved exactly using graph cuts. We also employ the stability selection strategy to control the false positive results caused by imperfect parameter tuning. We demonstrate its advantage in simulations and real experiments. In particular, the newly identified SNP clusters are replicable in two independent datasets.

Availability

The software is available at: http://bioinformatics.ust.hk/CODE.zip.

Contact

eeyu@ust.hk

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-22 +22436596,Effect of image compression and scaling on automated scoring of immunohistochemical stainings and segmentation of tumor epithelium.,"

Background

Digital whole-slide scanning of tissue specimens produces large images demanding increasing storing capacity. To reduce the need of extensive data storage systems image files can be compressed and scaled down. The aim of this article is to study the effect of different levels of image compression and scaling on automated image analysis of immunohistochemical (IHC) stainings and automated tumor segmentation.

Methods

Two tissue microarray (TMA) slides containing 800 samples of breast cancer tissue immunostained against Ki-67 protein and two TMA slides containing 144 samples of colorectal cancer immunostained against EGFR were digitized with a whole-slide scanner. The TMA images were JPEG2000 wavelet compressed with four compression ratios: lossless, and 1:12, 1:25 and 1:50 lossy compression. Each of the compressed breast cancer images was furthermore scaled down either to 1:1, 1:2, 1:4, 1:8, 1:16, 1:32, 1:64 or 1:128. Breast cancer images were analyzed using an algorithm that quantitates the extent of staining in Ki-67 immunostained images, and EGFR immunostained colorectal cancer images were analyzed with an automated tumor segmentation algorithm. The automated tools were validated by comparing the results from losslessly compressed and non-scaled images with results from conventional visual assessments. Percentage agreement and kappa statistics were calculated between results from compressed and scaled images and results from lossless and non-scaled images.

Results

Both of the studied image analysis methods showed good agreement between visual and automated results. In the automated IHC quantification, an agreement of over 98% and a kappa value of over 0.96 was observed between losslessly compressed and non-scaled images and combined compression ratios up to 1:50 and scaling down to 1:8. In automated tumor segmentation, an agreement of over 97% and a kappa value of over 0.93 was observed between losslessly compressed images and compression ratios up to 1:25.

Conclusions

The results of this study suggest that images stored for assessment of the extent of immunohistochemical staining can be compressed and scaled significantly, and images of tumors to be segmented can be compressed without compromising computer-assisted analysis results using studied methods.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/2442925476534995.",2012-03-21 +21880147,An integrative approach to ortholog prediction for disease-focused and other functional studies.,"

Background

Mapping of orthologous genes among species serves an important role in functional genomics by allowing researchers to develop hypotheses about gene function in one species based on what is known about the functions of orthologs in other species. Several tools for predicting orthologous gene relationships are available. However, these tools can give different results and identification of predicted orthologs is not always straightforward.

Results

We report a simple but effective tool, the Drosophila RNAi Screening Center Integrative Ortholog Prediction Tool (DIOPT; http://www.flyrnai.org/diopt), for rapid identification of orthologs. DIOPT integrates existing approaches, facilitating rapid identification of orthologs among human, mouse, zebrafish, C. elegans, Drosophila, and S. cerevisiae. As compared to individual tools, DIOPT shows increased sensitivity with only a modest decrease in specificity. Moreover, the flexibility built into the DIOPT graphical user interface allows researchers with different goals to appropriately 'cast a wide net' or limit results to highest confidence predictions. DIOPT also displays protein and domain alignments, including percent amino acid identity, for predicted ortholog pairs. This helps users identify the most appropriate matches among multiple possible orthologs. To facilitate using model organisms for functional analysis of human disease-associated genes, we used DIOPT to predict high-confidence orthologs of disease genes in Online Mendelian Inheritance in Man (OMIM) and genes in genome-wide association study (GWAS) data sets. The results are accessible through the DIOPT diseases and traits query tool (DIOPT-DIST; http://www.flyrnai.org/diopt-dist).

Conclusions

DIOPT and DIOPT-DIST are useful resources for researchers working with model organisms, especially those who are interested in exploiting model organisms such as Drosophila to study the functions of human disease genes.",2011-08-31 +21245417,"VeryGene: linking tissue-specific genes to diseases, drugs, and beyond for knowledge discovery.","In addition to many other genes, tissue-specific genes (TSGs) represent a set of genes of great importance for human physiology. However, the links among TSGs, diseases, and potential therapeutic agents are often missing, hidden, or too scattered to find. There is a need to establish a knowledgebase for researchers to share this and additional information in order to speed up discovery and clinical practice. As an initiative toward systems biology, the VeryGene web server was developed to fill this gap. A significant effort has been made to integrate TSGs from two large-scale data analyses with respective information on subcellular localization, Gene Ontology, Reactome, KEGG pathway, Mouse Genome Informatics (MGI) Mammalian Phenotype, disease association, and targeting drugs. The current release carefully selected 3,960 annotated TSGs derived from 127 normal human tissues and cell types, including 5,672 gene-disease and 2,171 drug-target relationships. In addition to being a specialized source for TSGs, VeryGene can be used as a discovery tool by generating novel inferences. Some inherently useful but hidden relations among genes, diseases, drugs, and other important aspects can be inferred to form testable hypotheses. VeryGene is available online at http://www.verygene.com.",2011-01-18 +21333642,Hepcidin: a novel peptide hormone regulating iron metabolism.,"

Background

Hepcidin is a low-molecular weight hepatic peptide regulating iron homeostasis. Hepcidin inhibits the cellular efflux of iron by binding to, and inducing the internalization and degradation of, ferroportin, the exclusive iron exporter in iron-transporting cells. It has been recently recognized as a main hormone behind anemia of chronic disease.

Method

A comprehensive literature search was conducted from the websites of Pubmed Central, the US National Library of Medicine's digital archive of life sciences literature (http://www.pubmedcentral.nih.gov/) and the National Library of Medicine (http://www.ncbl.nlm.nih.gov). The data was also assessed from journals and books that published relevant articles in this field.

Result

Hepcidin regulates iron uptake constantly on a daily basis, to maintain sufficient iron stores for erythropoiesis. Hepcidin, by its iron regulatory action on iron metabolism may be expected to have an important role in immune regulation, inflammatory diseases and malignancies. Hepcidin is the underlying cause of anemia in these clinical settings.

Conclusion

Hepcidin analysis may prove to be a novel tool for differential diagnosis and monitoring of disorders of iron metabolism, and establishment of therapeutic measures in various disease conditions like hereditary hemochromatosis, anemia associated with chronic kidney disease, rheumatoid arthritis and cancers.",2011-02-17 +21586134,"A formal MIM specification and tools for the common exchange of MIM diagrams: an XML-Based format, an API, and a validation method.","

Background

The Molecular Interaction Map (MIM) notation offers a standard set of symbols and rules on their usage for the depiction of cellular signaling network diagrams. Such diagrams are essential for disseminating biological information in a concise manner. A lack of software tools for the notation restricts wider usage of the notation. Development of software is facilitated by a more detailed specification regarding software requirements than has previously existed for the MIM notation.

Results

A formal implementation of the MIM notation was developed based on a core set of previously defined glyphs. This implementation provides a detailed specification of the properties of the elements of the MIM notation. Building upon this specification, a machine-readable format is provided as a standardized mechanism for the storage and exchange of MIM diagrams. This new format is accompanied by a Java-based application programming interface to help software developers to integrate MIM support into software projects. A validation mechanism is also provided to determine whether MIM datasets are in accordance with syntax rules provided by the new specification.

Conclusions

The work presented here provides key foundational components to promote software development for the MIM notation. These components will speed up the development of interoperable tools supporting the MIM notation and will aid in the translation of data stored in MIM diagrams to other standardized formats. Several projects utilizing this implementation of the notation are outlined herein. The MIM specification is available as an additional file to this publication. Source code, libraries, documentation, and examples are available at http://discover.nci.nih.gov/mim.",2011-05-17 +22479880,Palliative care in the management of lung cancer: analgesic utilization and barriers to optimal pain management.,"

Objective

Little data exist on assessing pain medication utilization among lung cancer patients or on the reasons they fail to receive optimal analgesic treatment. This study evaluates those reasons and investigates perceived causes of pain among individuals with lung cancer.

Design

An institutional review board-approved Internet-based questionnaire was posted on http://www.oncolink.org that included 22 queries evaluating analgesic utilization, pain control, and attitudes regarding analgesics.

Patients and participants

Between November 2005 and July 2008, 90 respondents with lung malignancies participated. Respondents were Caucasian (89 percent), male (54 percent), and had non-small-cell lung cancer (79 percent), small-cell lung cancer (12 percent), or mesothelioma (9 percent).

Results

Respondents underwent surgery (48 percent), chemotherapy (58 percent), and radiotherapy (44 percent). Most respondents (92 percent) reported experiencing pain, with 52 percent attributing pain directly to cancer, 38 percent to cancer treatment, and 67 percent unsure of the primary cause. Among respondents experiencing pain, 33 percent did not use analgesics. Analgesic utilization was less in men (p = 0.050) but did not differ by minority status (p = 0.127), education level (p = 0.37), or lung cancer histology (p = 0.134). Analgesic use was higher in subjects receiving radiotherapy (p = 0.002) and chemotherapy (p = 0.013) but not surgery (p = 0.16). Reasons for not taking analgesics included fear of addiction/dependence (76 percent), healthcare providers not recommending medications (71 percent), and inability to pay for analgesics (56 percent). Participants pursued physical therapy (76 percent) and other complementary modalities (24 percent) for pain control.

Conclusions

Many individuals with lung cancer perceive pain from both their disease and their cancer treatment. However, some study respondents did not use analgesics due to concerns of addiction, cost, or their healthcare providers not recommending analgesics. Medicalprofessionals providing medical management for lung cancer patients should make pain management a priority and regularly discuss pain symptoms and pain management with patients.",2012-01-01 +21576180,Classification with correlated features: unreliability of feature ranking and solutions.,"

Motivation

Classification and feature selection of genomics or transcriptomics data is often hampered by the large number of features as compared with the small number of samples available. Moreover, features represented by probes that either have similar molecular functions (gene expression analysis) or genomic locations (DNA copy number analysis) are highly correlated. Classical model selection methods such as penalized logistic regression or random forest become unstable in the presence of high feature correlations. Sophisticated penalties such as group Lasso or fused Lasso can force the models to assign similar weights to correlated features and thus improve model stability and interpretability. In this article, we show that the measures of feature relevance corresponding to the above-mentioned methods are biased such that the weights of the features belonging to groups of correlated features decrease as the sizes of the groups increase, which leads to incorrect model interpretation and misleading feature ranking.

Results

With simulation experiments, we demonstrate that Lasso logistic regression, fused support vector machine, group Lasso and random forest models suffer from correlation bias. Using simulations, we show that two related methods for group selection based on feature clustering can be used for correcting the correlation bias. These techniques also improve the stability and the accuracy of the baseline models. We apply all methods investigated to a breast cancer and a bladder cancer arrayCGH dataset and in order to identify copy number aberrations predictive of tumor phenotype.

Availability

R code can be found at: http://www.mpi-inf.mpg.de/~laura/Clustering.r.",2011-05-16 +21602919,A comprehensive analysis of gene expression changes provoked by bacterial and fungal infection in C. elegans.,"While Caenorhabditis elegans specifically responds to infection by the up-regulation of certain genes, distinct pathogens trigger the expression of a common set of genes. We applied new methods to conduct a comprehensive and comparative study of the transcriptional response of C. elegans to bacterial and fungal infection. Using tiling arrays and/or RNA-sequencing, we have characterized the genome-wide transcriptional changes that underlie the host's response to infection by three bacterial (Serratia marcescens, Enterococcus faecalis and otorhabdus luminescens) and two fungal pathogens (Drechmeria coniospora and Harposporium sp.). We developed a flexible tool, the WormBase Converter (available at http://wormbasemanager.sourceforge.net/), to allow cross-study comparisons. The new data sets provided more extensive lists of differentially regulated genes than previous studies. Annotation analysis confirmed that genes commonly up-regulated by bacterial infections are related to stress responses. We found substantial overlaps between the genes regulated upon intestinal infection by the bacterial pathogens and Harposporium, and between those regulated by Harposporium and D. coniospora, which infects the epidermis. Among the fungus-regulated genes, there was a significant bias towards genes that are evolving rapidly and potentially encode small proteins. The results obtained using new methods reveal that the response to infection in C. elegans is determined by the nature of the pathogen, the site of infection and the physiological imbalance provoked by infection. They form the basis for future functional dissection of innate immune signaling. Finally, we also propose alternative methods to identify differentially regulated genes that take into account the greater variability in lowly expressed genes.",2011-05-13 +22564289,Ascertaining gene flow patterns in livestock populations of developing countries: a case study in Burkina Faso goat.,"

Background

Introgression of Sahel livestock genes southwards in West Africa may be favoured by human activity and the increase of the duration of the dry seasons since the 1970's. The aim of this study is to assess the gene flow patterns in Burkina Faso goat and to ascertain the most likely factors influencing geographic patterns of genetic variation in the Burkina Faso goat population.

Results

A total of 520 goat were sampled in 23 different locations of Burkina Faso and genotyped for a set of 19 microsatellites. Data deposited in the Dryad repository: http://dx.doi.org/10.5061/dryad.41h46j37. Although overall differentiation is poor (FST = 0.067 ± 0.003), the goat population of Burkina Faso is far from being homogeneous. Barrier analysis pointed out the existence of: a) genetic discontinuities in the Central and Southeast Burkina Faso; and b) genetic differences within the goat sampled in the Sahel or the Sudan areas of Burkina Faso. Principal component analysis and admixture proportion scores were computed for each population sampled and used to construct interpolation maps. Furthermore, Population Graph analysis revealed that the Sahel and the Sudan environmental areas of Burkina Faso were connected through a significant number of extended edges, which would be consistent with the hypothesis of long-distance dispersal. Genetic variation of Burkina Faso goat followed a geographic-related pattern. This pattern of variation is likely to be related to the presence of vectors of African animal trypanosomosis. Partial Mantel test identified the present Northern limit of trypanosome vectors as the most significant landscape boundary influencing the genetic variability of Burkina Faso goat (p = 0.008). The contribution of Sahel goat genes to the goat populations in the Northern and Eastern parts of the Sudan-Sahel area of Burkina Faso was substantial. The presence of perennial streams explains the existence of trypanosome vectors. The South half of the Nakambé river (Southern Ouagadougou) and the Mouhoun river loop determined, respectively, the Eastern and Northern limits for the expansion of Sahelian goat genes. Furthermore, results from partial Mantel test suggest that the introgression of Sahelian goat genes into Djallonké goat using human-influenced genetic corridors has a limited influence when compared to the biological boundary defined by the northern limits for the distribution of the tsetse fly. However, the genetic differences found between the goat sampled in Bobo Dioulasso and the other populations located in the Sudan area of Burkina Faso may be explained by the broad goat trade favoured by the main road of the country.

Conclusions

The current analysis clearly suggests that genetic variation in Burkina Faso goat: a) follows a North to South clinal; and b) is affected by the distribution of the tsetse fly that imposes a limit to the Sahelian goat expansion due to their trypanosusceptibility. Here we show how extensive surveys on livestock populations can be useful to indirectly assess the consequences of climate change and human action in developing countries.",2012-05-07 +21310714,Optimization of turn-back primers in isothermal amplification.,"The application of isothermal amplification technologies is rapidly expanding and currently covers different areas such as infectious disease, genetic disorder and drug dosage adjustment. Meanwhile, many of such technologies have complex reaction processes and often require a fine-tuned primer set where existing primer design tools are not sufficient. We have developed a primer selection system for one important primer, the turn-back primer (TP), which is commonly used in loop-mediated amplification (LAMP) and smart amplification process (SmartAmp). We chose 78 parameters related to the primer and target sequence, and explored their relationship to amplification speed using experimental data for 1344 primer combinations. We employed the least absolute shrinkage and selection operator (LASSO) method for parameter selection and estimation of their numerical coefficients. We subsequently evaluated our prediction model using additional independent experiments and compared to the LAMP primer design tool, Primer Explorer version4 (PE4). The evaluation showed that our approach yields a superior primer design in isothermal amplification and is robust against variations in the experimental setup. Our LASSO regression analysis revealed that availability of the 3'- and 5'-end of the primer are particularly important factors for efficient isothermal amplification. Our computer script is freely available at: http://gerg.gsc.riken.jp/TP_optimization/.",2011-02-09 +21210742,Identifying contributors of DNA mixtures by means of quantitative information of STR typing.,"Estimating the weight of evidence in forensic genetics is often done in terms of a likelihood ratio, LR. The LR evaluates the probability of the observed evidence under competing hypotheses. Most often, probabilities used in the LR only consider the evidence from the genomic variation identified using polymorphic genetic markers. However, modern typing techniques supply additional quantitative data, which contain very important information about the observed evidence. This is particularly true for cases of DNA mixtures, where more than one individual has contributed to the observed biological stain. This article presents a method for including the quantitative information of short tandem repeat (STR) DNA mixtures in the LR. Also, an efficient algorithmic method for finding the best matching combination of DNA mixture profiles is derived and implemented in an on-line tool for two- and three-person DNA mixtures. Finally, we demonstrate for two-person mixtures how this best matching pair of profiles can be used in estimating the likelihood ratio using importance sampling. The reason for using importance sampling for estimating the likelihood ratio is the often vast number of combinations of profiles needed for the evaluation of the weight of evidence. Online tool is available at http://people.math.aau.dk/~tvede/dna/.",2011-01-06 +21559271,Calpain cleavage prediction using multiple kernel learning.,"Calpain, an intracellular Ca²⁺-dependent cysteine protease, is known to play a role in a wide range of metabolic pathways through limited proteolysis of its substrates. However, only a limited number of these substrates are currently known, with the exact mechanism of substrate recognition and cleavage by calpain still largely unknown. While previous research has successfully applied standard machine-learning algorithms to accurately predict substrate cleavage by other similar types of proteases, their approach does not extend well to calpain, possibly due to its particular mode of proteolytic action and limited amount of experimental data. Through the use of Multiple Kernel Learning, a recent extension to the classic Support Vector Machine framework, we were able to train complex models based on rich, heterogeneous feature sets, leading to significantly improved prediction quality (6% over highest AUC score produced by state-of-the-art methods). In addition to producing a stronger machine-learning model for the prediction of calpain cleavage, we were able to highlight the importance and role of each feature of substrate sequences in defining specificity: primary sequence, secondary structure and solvent accessibility. Most notably, we showed there existed significant specificity differences across calpain sub-types, despite previous assumption to the contrary. Prediction accuracy was further successfully validated using, as an unbiased test set, mutated sequences of calpastatin (endogenous inhibitor of calpain) modified to no longer block calpain's proteolytic action. An online implementation of our prediction tool is available at http://calpain.org.",2011-05-03 +21543442,DREME: motif discovery in transcription factor ChIP-seq data.,"

Motivation

Transcription factor (TF) ChIP-seq datasets have particular characteristics that provide unique challenges and opportunities for motif discovery. Most existing motif discovery algorithms do not scale well to such large datasets, or fail to report many motifs associated with cofactors of the ChIP-ed TF.

Results

We present DREME, a motif discovery algorithm specifically designed to find the short, core DNA-binding motifs of eukaryotic TFs, and optimized to analyze very large ChIP-seq datasets in minutes. Using DREME, we discover the binding motifs of the the ChIP-ed TF and many cofactors in mouse ES cell (mESC), mouse erythrocyte and human cell line ChIP-seq datasets. For example, in mESC ChIP-seq data for the TF Esrrb, we discover the binding motifs for eight cofactor TFs important in the maintenance of pluripotency. Several other commonly used algorithms find at most two cofactor motifs in this same dataset. DREME can also perform discriminative motif discovery, and we use this feature to provide evidence that Sox2 and Oct4 do not bind in mES cells as an obligate heterodimer. DREME is much faster than many commonly used algorithms, scales linearly in dataset size, finds multiple, non-redundant motifs and reports a reliable measure of statistical significance for each motif found. DREME is available as part of the MEME Suite of motif-based sequence analysis tools (http://meme.nbcr.net).",2011-05-04 +21375394,Research priorities associated with family caregivers in palliative care: international perspectives.,"

Background and purpose

Reviews of the literature have consistently highlighted significant gaps with regard to research associated with family caregivers within the context of palliative care. We sought to determine a priority driven research agenda for this field of inquiry.

Methods

A Web-based survey was sent to 80 people who had previously expressed interest in, or were members of The International Palliative Care Family Carer Research Collaboration (http://centreforpallcare.org/index.php/research/ipcfcrc/).

Results

Fifty-five participants completed the survey (response rate, 70%) from 12 countries. Priority research areas included: intervention development and testing; underresearched caregiver groups; access to services; unmet needs; bereavement; experience and implications of the caregiver role; and development of assessment tools. Qualitative responses complemented these data and also acknowledged the importance of collaboration and development of a critical mass of researchers focusing in this area in order to progress knowledge.

Conclusions

These results reinforce the findings of systematic reviews that have demonstrated a need for the evolution of intervention development focused on improving family caregiver support. However, there are other key areas that also warrant comprehensive attention, including marginalized family caregivers and strategies to assist health professionals to identify family caregivers who have significant psychosocial issues.",2011-03-04 +21840876,A phylogenetic mixture model for the identification of functionally divergent protein residues.,"

Motivation

To understand the evolution of molecular function within protein families, it is important to identify those amino acid residues responsible for functional divergence; i.e. those sites in a protein family that affect cofactor, protein or substrate binding preferences; affinity; catalysis; flexibility; or folding. Type I functional divergence (FD) results from changes in conservation (evolutionary rate) at a site between protein subfamilies, whereas type II FD occurs when there has been a shift in preferences for different amino acid chemical properties. A variety of methods have been developed for identifying both site types in protein subfamilies, both from phylogenetic and information-theoretic angles. However, evaluation of the performance of these methods has typically relied upon a handful of reasonably well-characterized biological datasets or analyses of a single biological example. While experimental validation of many truly functionally divergent sites (true positives) can be relatively straightforward, determining that particular sites do not contribute to functional divergence (i.e. false positives and true negatives) is much more difficult, resulting in noisy 'gold standard' examples.

Results

We describe a novel, phylogeny-based functional divergence classifier, FunDi. Unlike previous approaches, FunDi uses a unified mixture model-based approach to detect type I and type II FD. To assess FunDi's overall classification performance relative to other methods, we introduce two methods for simulating functionally divergent datasets. We find that the FunDi method performs better than several other predictors over a wide variety of simulation conditions.

Availability

http://rogerlab.biochem.dal.ca/Software

Contact

andrew.roger@dal.ca

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-08-11 +21853133,Designing of highly effective complementary and mismatch siRNAs for silencing a gene.,"In past, numerous methods have been developed for predicting efficacy of short interfering RNA (siRNA). However these methods have been developed for predicting efficacy of fully complementary siRNA against a gene. Best of author's knowledge no method has been developed for predicting efficacy of mismatch siRNA against a gene. In this study, a systematic attempt has been made to identify highly effective complementary as well as mismatch siRNAs for silencing a gene.Support vector machine (SVM) based models have been developed for predicting efficacy of siRNAs using composition, binary and hybrid pattern siRNAs. We achieved maximum correlation 0.67 between predicted and actual efficacy of siRNAs using hybrid model. All models were trained and tested on a dataset of 2182 siRNAs and performance was evaluated using five-fold cross validation techniques. The performance of our method desiRm is comparable to other well-known methods. In this study, first time attempt has been made to design mutant siRNAs (mismatch siRNAs). In this approach we mutated a given siRNA on all possible sites/positions with all possible nucleotides. Efficacy of each mutated siRNA is predicted using our method desiRm. It is well known from literature that mismatches between siRNA and target affects the silencing efficacy. Thus we have incorporated the rules derived from base mismatches experimental data to find out over all efficacy of mutated or mismatch siRNAs. Finally we developed a webserver, desiRm (http://www.imtech.res.in/raghava/desirm/) for designing highly effective siRNA for silencing a gene. This tool will be helpful to design siRNA to degrade disease isoform of heterozygous single nucleotide polymorphism gene without depleting the wild type protein.",2011-08-10 +21294843,"Starch-binding domains in the CBM45 family--low-affinity domains from glucan, water dikinase and α-amylase involved in plastidial starch metabolism.","Starch-binding domains are noncatalytic carbohydrate-binding modules that mediate binding to granular starch. The starch-binding domains from the carbohydrate-binding module family 45 (CBM45, http://www.cazy.org) are found as N-terminal tandem repeats in a small number of enzymes, primarily from photosynthesizing organisms. Isolated domains from representatives of each of the two classes of enzyme carrying CBM45-type domains, the Solanum tuberosumα-glucan, water dikinase and the Arabidopsis thaliana plastidial α-amylase 3, were expressed as recombinant proteins and characterized. Differential scanning calorimetry was used to verify the conformational integrity of an isolated CBM45 domain, revealing a surprisingly high thermal stability (T(m) of 84.8 °C). The functionality of CBM45 was demonstrated in planta by yellow/green fluorescent protein fusions and transient expression in tobacco leaves. Affinities for starch and soluble cyclodextrin starch mimics were measured by adsorption assays, surface plasmon resonance and isothermal titration calorimetry analyses. The data indicate that CBM45 binds with an affinity of about two orders of magnitude lower than the classical starch-binding domains from extracellular microbial amylolytic enzymes. This suggests that low-affinity starch-binding domains are a recurring feature in plastidial starch metabolism, and supports the hypothesis that reversible binding, effectuated through low-affinity interaction with starch granules, facilitates dynamic regulation of enzyme activities and, hence, of starch metabolism.",2011-03-01 +20528863,Testing for homogeneity in meta-analysis I. The one-parameter case: standardized mean difference.,"Meta-analysis seeks to combine the results of several experiments in order to improve the accuracy of decisions. It is common to use a test for homogeneity to determine if the results of the several experiments are sufficiently similar to warrant their combination into an overall result. Cochran's Q statistic is frequently used for this homogeneity test. It is often assumed that Q follows a chi-square distribution under the null hypothesis of homogeneity, but it has long been known that this asymptotic distribution for Q is not accurate for moderate sample sizes. Here, we present an expansion for the mean of Q under the null hypothesis that is valid when the effect and the weight for each study depend on a single parameter, but for which neither normality nor independence of the effect and weight estimators is needed. This expansion represents an order O(1/n) correction to the usual chi-square moment in the one-parameter case. We apply the result to the homogeneity test for meta-analyses in which the effects are measured by the standardized mean difference (Cohen's d-statistic). In this situation, we recommend approximating the null distribution of Q by a chi-square distribution with fractional degrees of freedom that are estimated from the data using our expansion for the mean of Q. The resulting homogeneity test is substantially more accurate than the currently used test. We provide a program available at the Paper Information link at the Biometrics website http://www.biometrics.tibs.org for making the necessary calculations.",2011-03-01 +21826666,Increased midkine expression correlates with desmoid tumour recurrence: a potential biomarker and therapeutic target.,"Desmoid tumours (DTs) are soft tissue monoclonal neoplasms exhibiting a unique phenotype, consisting of aggressive local invasiveness without metastatic capacity. While DTs can infrequently occur as part of familial adenomatosis polyposis, most cases arise sporadically. Sporadic DTs harbour a high prevalence of CTNNB1 mutations and hence increased β-catenin signalling. However, β-catenin downstream transcriptional targets and other molecular deregulations operative in DT inception and progression are currently not well defined, contributing to the lack of sensitive molecular prognosticators and efficacious targeted therapeutic strategies. We compared the gene expression profiles of 14 sporadic DTs to those of five corresponding normal tissues and six solitary fibrous tumour specimens. A DT expression signature consisting of 636 up- and 119 down-regulated genes highly enriched for extracellular matrix, cell adhesion and wound healing-related proteins was generated. Furthermore, 98 (15%) of the over-expressed genes were demonstrated to contain a TCF/LEF consensus binding site in their promoters, possibly heralding direct β-catenin downstream targets relevant to DT. The protein products of three of the up-regulated DT genes: ADAM12, MMP2 and midkine, were found to be commonly expressed in a large cohort of human DT samples assembled on a tissue microarray. Interestingly, enhanced midkine expression significantly correlated with a higher propensity and decreased time for primary DT recurrence (log-rank p = 0.0025). Finally, midkine was found to enhance the migration and invasion of primary DT cell cultures. Taken together, these studies provide insights into potential DT molecular aberrations and novel β-catenin transcriptional targets. Further studies to confirm the utility of midkine as a clinical DT molecular prognosticator and a potential therapeutic target are therefore warranted. Raw gene array data can be found at: http://smd.stanford.edu/",2011-08-08 +22356677,Cytoplasmic BRMS1 expression in malignant melanoma is associated with increased disease-free survival.,"

Background/aims

Breast cancer metastasis suppressor 1 (BRMS1) blocks metastasis in melanoma xenografts; however, its usefulness as a biomarker in human melanomas has not been widely studied. The goal was to measure BRMS1 expression in benign nevi, primary and metastatic melanomas and evaluate its impact on disease progression and prognosis.

Methods

Paraffin-embedded tissue from 155 primary melanomas, 69 metastases and 15 nevi was examined for BRMS1 expression using immunohistochemistry. siRNA mediated BRMS1 down-regulation was used to study impact on invasion and migration in melanoma cell lines.

Results

A significantly higher percentage of nevi (87%), compared to primary melanomas (20%) and metastases (48%), expressed BRMS1 in the nucelus (p < 0.0001). Strong nuclear staining intensity was observed in 67% of nevi, and in 9% and 24% of the primary and metastatic melanomas, respectively (p < 0.0001). Comparable cytoplasmic expression was observed (nevi; 87%, primaries; 86%, metastases; 72%). However, a decline in cytoplasmic staining intensity was observed in metastases compared to nevi and primary tumors (26%, 47%, and 58%, respectively, p < 0.0001). Score index (percentage immunopositive celles multiplied with staining intensity) revealed that high cytoplasmic score index (≥ 4) was associated with thinner tumors (p = 0.04), lack of ulceration (p = 0.02) and increased disease-free survival (p = 0.036). When intensity and percentage BRMS1 positive cells were analyzed separately, intensity remained associated with tumor thickness (p = 0.024) and ulceration (p = 0.004) but was inversely associated with expression of proliferation markers (cyclin D3 (p = 0.008), cyclin A (p = 0.007), and p21Waf1/Cip1 (p = 0.009)). Cytoplasmic score index was inversely associated with nuclear p-Akt (p = 0.013) and positively associated with cytoplasmic p-ERK1/2 expression (p = 0.033). Nuclear BRMS1 expression in ≥ 10% of primary melanoma cells was associated with thicker tumors (p = 0.016) and decreased relapse-free period (p = 0.043). Nuclear BRMS1 was associated with expression of fatty acid binding protein 7 (FABP7; p = 0.011), a marker of invasion in melanomas. In line with this, repression of BRMS1 expression reduced the ability of melanoma cells to migrate and invade in vitro.

Conclusion

Our data suggest that BRMS1 is localized in cytoplasm and nucleus of melanocytic cells and that cellular localization determines its in vivo effect. We hypothesize that cytoplasmic BRMS1 restricts melanoma progression while nuclear BRMS1 possibly promotes melanoma cell invasion.Please see related article: http://www.biomedcentral.com/1741-7015/10/19.",2012-02-22 +21521932,Pitavastatin increases ABCA1 expression by dual mechanisms: SREBP2-driven transcriptional activation and PPARα-dependent protein stabilization but without activating LXR in rat hepatoma McARH7777 cells.,"Hepatic ATP-binding cassette transporter A1 (ABCA1) plays a key role in high-density lipoprotein (HDL) production by apolipoprotein A-I (ApoA-I) lipidation. 3-Hydroxy-3-methylglutaryl coenzyme A (HMG-CoA) reductase inhibitors, statins, increase ABCA1 mRNA levels in hepatoma cell lines, but their mechanism of action is not yet clear. We investigated how statins increase ABCA1 in rat hepatoma McARH7777 cells. Pitavastatin, atorvastatin, and simvastatin increased total ABCA1 mRNA levels, whereas pravastatin had no effect. Pitavastatin also increased ABCA1 protein. Hepatic ABCA1 expression in rats is regulated by both liver X receptor (LXR) and sterol regulatory element-binding protein (SREBP2) pathways. Pitavastatin repressed peripheral type ABCA1 mRNA levels and its LXR-driven promoter, but activated the liver-type SREBP-driven promoter, and eventually increased total ABCA1 mRNA expression. Furthermore, pitavastatin increased peroxisome proliferator-activated receptor α (PPARα) and its downstream gene expression. Knockdown of PPARα attenuated the increase in ABCA1 protein, indicating that pitavastatin increased ABCA1 protein via PPARα activation, although it repressed LXR activation. Furthermore, the degradation of ABCA1 protein was retarded in pitavastatin-treated cells. These data suggest that pitavastatin increases ABCA1 protein expression by dual mechanisms: SREBP2-mediated mRNA transcription and PPARα-mediated ABCA1 protein stabilization, but not by the PPAR-LXR-ABCA1 pathway. [Supplementary Figures: available only at http://dx.doi.org/10.1254/jphs.10241FP].",2011-04-27 +21680907,Prediction of early stroke risk in transient symptoms with infarction: relevance to the new tissue-based definition.,"

Background and purpose

The risk of stroke shortly after transient ischemic attack with infarction on diffusion-weighted images, also known as transient symptoms with infarction (TSI), is substantially higher than is the risk after imaging-normal transient ischemic attack. We sought to assess the utility of a Web-based recurrence risk estimator (RRE; http://www.nmr.mgh.harvard.edu/RRE/) originally developed for use in patients with ischemic stroke for predicting 7-day risk of stroke in patients with TSI.

Methods

We calculated RRE and ABCD² scores in a retrospective series of 257 consecutive patients with TSI diagnosed by diffusion-weighted images within 24 hours of symptom onset. We defined subsequent stroke as clinical deterioration associated with new infarction spatially distinct from the index lesion. We assessed the predictive performance of each model by computing the area under receiver-operating characteristics curve.

Results

Over 7-day follow-up, 16 patients developed a recurrent stroke (6.2%). The sensitivity and specificity of an RRE score of ≥ 2 for predicting 7-day stroke risk were 87% and 73%, respectively. The area under the receiver-operating characteristics curve was 0.85 (95% CI, 0.78-0.92) for RRE and 0.57 (95% CI, 0.45-0.69) for ABCD² score (z-test; P<0.001).

Conclusions

The RRE score seems to predict 7-day risk of stroke after a TSI. If further validated in larger data sets, the RRE score could be useful in identifying high-risk patients with TSI who may benefit from early intervention with targeted stroke prevention strategies.",2011-06-16 +21335612,"fconv: Format conversion, manipulation and feature computation of molecular data.","

Unlabelled

fconv is a program intended for parsing and manipulating multiple aspects and properties of molecular data. Up to now, it has been developed and extensively tested for 3 years. It has become a very robust and comprehensive tool involved in a broad range of computational workflows that are currently applied in our drug design environment. Typical tasks are as follows: conversion and error correction of formats such as PDB(QT), MOL2, SDF, DLG and CIF; extracting ligands from PDB as MOL2; automatic or ligand-based cavity detection; rmsd calculation and clustering; substructure searches; alignment and structural superposition; building of crystal packings; adding hydrogens; calculation of various properties like the number of rotatable bonds; molecular weights or vdW volumes. The atom type classification is based on a consistent assignment of internal atom types, which are by far more differentiated compared with e.g. Sybyl atom types. Apart from the predefined mapping of these types onto Sybyl types, the user is able to assign own mappings by providing modified template files, thus allowing for tailor-made atom type sets.

Availability

fconv is free software available under GNU General Public License. C++ sources and precompiled executables for LINUX/UNIX, Mac OS and Windows, as well as tutorials are available on http://www.agklebe.de.",2011-02-18 +21337514,Genome-wide identification of the subcellular localization of the Escherichia coli B proteome using experimental and computational methods.,"Escherichia coli K-12 and B strains have most widely been employed for scientific studies as well as industrial applications. Recently, the complete genome sequences of two representative descendants of E. coli B strains, REL606 and BL21(DE3), have been determined. Here, we report the subproteome reference maps of E. coli B REL606 by analyzing cytoplasmic, periplasmic, inner and outer membrane, and extracellular proteomes based on the genome information using experimental and computational approaches. Among the total of 3487 spots, 651 proteins including 410 non-redundant proteins were identified and characterized by 2-DE and LC-MS/MS; they include 440 cytoplasmic, 45 periplasmic, 50 inner membrane, 61 outer membrane, and 55 extracellular proteins. In addition, subcellular localizations of all 4205 ORFs of E. coli B were predicted by combined computational prediction methods. The subcellular localizations of 1812 (43.09%) proteins of currently unknown function were newly assigned. The results of computational prediction were also compared with the experimental results, showing that overall precision and recall were 92.16 and 92.16%, respectively. This work represents the most comprehensive analyses of the subproteomes of E. coli B, and will be useful as a reference for proteome profiling studies under various conditions. The complete proteome data are available online (http://ecolib.kaist.ac.kr).",2011-02-17 +27182360,A Description of Variability of Pacing in Marathon Distance Running.,"The purpose of this study was twofold: 1) to describe variability of pacing during a marathon and 2) to determine if there is a relationship between variability of pacing and marathon performance. Publically available personal global positioning system profiles from two marathons (Race 1 n = 116, Race 2 n = 169) were downloaded (http://connect.garmin.com) for analysis. The coefficient of variation of velocity (Velcov) was calculated for each profile. Each profile was categorized as finishing in under 3.9 hours, between 3.9 and 4.6 hours, or longer than 4.6 hours. Linear and quadratic lines of best fit were computed to describe the relationship between marathon finish time and Velcov. A 2 (Race) × 3 (bin) analysis of variance (ANOVA) was used to compare the dependent variable (Velcov) between races and the marathon bin finish times. Velcov was not influenced by the interaction of finish time bin and Race (p>0.05) and was not different between races (Race 1: 16.6 ± 6.4%, Race 2: 16.8 ± 6.6%, p>0.05). Velcov was different between finish time categories (p<0.05) for each race such that Velcov was lower for faster finish times. Using combined data from both races, linear (marathon finish time = marathon finish time = 0.09Velcov + 2.9, R^2 = 0.46) and quadratic (marathon finish time = -0.0006 Velcov 2 + 0.11 Velcov + 2.7, R^2 = 0.46) lines of best fit were significant (p<0.05). Slower marathon finishers had greater variability of pace compared to faster marathoner finishers.",2011-04-15 +21653520,A probabilistic method for the detection and genotyping of small indels from population-scale sequence data.,"

Motivation

High-throughput sequencing technologies have made population-scale studies of human genetic variation possible. Accurate and comprehensive detection of DNA sequence variants is crucial for the success of these studies. Small insertions and deletions represent the second most frequent class of variation in the human genome after single nucleotide polymorphisms (SNPs). Although several alignment tools for the gapped alignment of sequence reads to a reference genome are available, computational methods for discriminating indels from sequencing errors and genotyping indels directly from sequence reads are needed.

Results

We describe a probabilistic method for the accurate detection and genotyping of short indels from population-scale sequence data. In this approach, aligned sequence reads from a population of individuals are used to automatically account for context-specific sequencing errors associated with indels. We applied this approach to population sequence datasets from the 1000 Genomes exon pilot project generated using the Roche 454 and Illumina sequencing platforms, and were able to detect a significantly greater number of indels than reported previously. Comparison to indels identified in the 1000 Genomes pilot project demonstrated the sensitivity of our method. The consistency in the number of indels and the fraction of indels whose length is a multiple of three across different human populations and two different sequencing platforms indicated that our method has a low false discovery rate. Finally, the method represents a general approach for the detection and genotyping of small-scale DNA sequence variants for population-scale sequencing projects.

Availability

A program implementing this method is available at http://polymorphism.scripps.edu/~vbansal/software/piCALL/",2011-06-07 +22318612,Prostate cancer pain management: EAU guidelines on pain management.,"

Context

The first publication of the European Association of Urology (EAU) guidelines on Pain Management in Urology dates back to 2003. Since then, these guidelines have been revised several times with the most recent update achieved in 2010.

Objective

Given the scope of the full text guidelines, condensing the entire document was no option in this context. This paper presents a summary of the section of pain management in prostate cancer, a topic considered of direct relevance for the practicing urologist.

Evidence acquisition

A multidisciplinary expert panel (urologists, anaesthesiologists, radio-oncologists) compiled this document based on a comprehensive consultation of the literature. Data were identified through a structured search, covering the time frame 2000 through 2010, using Medline and Embase as well as the Cochrane Library of systematic reviews. The scientific papers were weighed by the expert panel and a level of evidence (LE) assigned. Recommendations have been graded as a means to provide transparency between the underlying evidence and the guidance provided. Pain can occur in each stage of prostate cancer. It could be caused by the cancer itself (77%), be related to the cancer treatment (19%) or be unrelated to either (3%). The incidence of pain rises to 90% as patients enter the terminal phase of their illness. The physician's task is to discover and treat the cause of pain and the pain itself, to determine whether or not the underlying cause is treatable, to provide pain relief and palliative care. These tasks more often than not require a multidisciplinary team. Pain management involves mainly pharmacotherapy, including direct anticancer therapy such as androgen deprivation and chemotherapy, as well as analgetics, for instance non-steroidal anti-inflammatory drugs (NSAIDs) or opioids. In case of local impairment due to the cancer or its metastases, primary treatments like surgery, radiotherapy or radionuclides can provide adequate pain relief. In addition, in palliative care, functional, psychosocial and spiritual support are essential components. The EAU guidelines on Pain Management in Urology are available in a number of different formats through the EAU Central Office and the EAU website ( http://www.uroweb.org/guidelines/online-guidelines/ ).

Conclusion

The mainstay of pain management in prostate cancer is involvement of and collaboration between experts from a number of disciplines to be able to achieve a complete pain evaluation and to offer the full range of treatment options. Prostate cancer-related pain can, in most cases, be managed effectively, but it requires careful monitoring where a balance should be found between pain relief and potential side effects of treatment and quality of life (QoL).",2012-02-09 +21647209,HumMod: A Modeling Environment for the Simulation of Integrative Human Physiology.,"Mathematical models and simulations are important tools in discovering key causal relationships governing physiological processes. Simulations guide and improve outcomes of medical interventions involving complex physiology. We developed HumMod, a Windows-based model of integrative human physiology. HumMod consists of 5000 variables describing cardiovascular, respiratory, renal, neural, endocrine, skeletal muscle, and metabolic physiology. The model is constructed from empirical data obtained from peer-reviewed physiological literature. All model details, including variables, parameters, and quantitative relationships, are described in Extensible Markup Language (XML) files. The executable (HumMod.exe) parses the XML and displays the results of the physiological simulations. The XML description of physiology in HumMod's modeling environment allows investigators to add detailed descriptions of human physiology to test new concepts. Additional or revised XML content is parsed and incorporated into the model. The model accurately predicts both qualitative and quantitative changes in clinical and experimental responses. The model is useful in understanding proposed physiological mechanisms and physiological interactions that are not evident, allowing one to observe higher level emergent properties of the complex physiological systems. HumMod has many uses, for instance, analysis of renal control of blood pressure, central role of the liver in creating and maintaining insulin resistance, and mechanisms causing orthostatic hypotension in astronauts. Users simulate different physiological and pathophysiological situations by interactively altering numerical parameters and viewing time-dependent responses. HumMod provides a modeling environment to understand the complex interactions of integrative physiology. HumMod can be downloaded at http://hummod.org.",2011-04-13 +21486936,MEME-ChIP: motif analysis of large DNA datasets.,"

Motivation

Advances in high-throughput sequencing have resulted in rapid growth in large, high-quality datasets including those arising from transcription factor (TF) ChIP-seq experiments. While there are many existing tools for discovering TF binding site motifs in such datasets, most web-based tools cannot directly process such large datasets.

Results

The MEME-ChIP web service is designed to analyze ChIP-seq 'peak regions'--short genomic regions surrounding declared ChIP-seq 'peaks'. Given a set of genomic regions, it performs (i) ab initio motif discovery, (ii) motif enrichment analysis, (iii) motif visualization, (iv) binding affinity analysis and (v) motif identification. It runs two complementary motif discovery algorithms on the input data--MEME and DREME--and uses the motifs they discover in subsequent visualization, binding affinity and identification steps. MEME-ChIP also performs motif enrichment analysis using the AME algorithm, which can detect very low levels of enrichment of binding sites for TFs with known DNA-binding motifs. Importantly, unlike with the MEME web service, there is no restriction on the size or number of uploaded sequences, allowing very large ChIP-seq datasets to be analyzed. The analyses performed by MEME-ChIP provide the user with a varied view of the binding and regulatory activity of the ChIP-ed TF, as well as the possible involvement of other DNA-binding TFs.

Availability

MEME-ChIP is available as part of the MEME Suite at http://meme.nbcr.net.",2011-04-12 +21568620,Initial evaluation of rural programs at the Australian National University: understanding the effects of rural programs on intentions for rural and remote medical practice.,"

Introduction

Rural health workforce issues are a priority area for the Australian Government and substantial funding has been provided for rural education programs to address health workforce disparities across Australia's rural and remote communities. The Australian Government established a Rural Health Strategy in 2001 and as a result there are now 14 rural clinical schools in Australia. The 2008 Urbis Report highlighted the lack of research on rural programs and workforce outcomes, essential to ensuring that educational efforts, resources and funding are being concentrated appropriately. This study examined the Australian National University (ANU) Medical School's 4 year rural program to identify the impact of elective and compulsory program components on student intentions to practice in a rural and remote location post-graduation. The study also explores factors that affect student decisions to apply for year-long rural placements. METHODS; ANU Medical School's graduating cohort of 2008 fourth year medical students completed an anonymous and voluntary online survey questionnaire. Survey sections included student demographics, compulsory and elective components of the ANU rural program, and an overall evaluation of the ANU rural curriculum. The survey contained a mixture of forced-answer questions and open-ended commentary. Quantitative data were analyzed for descriptive and frequency statistics using EpiInfo V3.5.1 (
http://wwwn.cdc.gov/epiinfo/). Qualitative data were reviewed and consistent themes among responses extracted.

Results

In total, 40 students from a cohort of 88 (45%) responded, with 26 respondents (65%) indicating that at medical school commencement they considered working in a rural or remote area. At the end of their medical education, 33 respondents (82%) indicated their intention to spend some time in their careers working in a rural or remote area. Students from non-rural backgrounds had greater positive change in their intentions to practice rurally as a direct effect of ANU rural programs when compared with students from rural backgrounds. More than 70% of students believed the amount of rural focus in the curriculum was correct, 75% believed that they will be better medical practitioners because of the program, and 85% found the curriculum was delivered effectively. Students who undertook elective rural programs such as a year-long rural placement were more likely to have future rural career intentions when compared with students undertaking compulsory rural components. Compulsory components, however, had a strong influence on students applying for elective programs. Regarding application for the year-long rural placement, students reported clinical exposure was the most encouraging factor, and time away from family and friends, and lack of spousal and family support were the most discouraging factors.

Conclusion

Rural programs at the ANU, and medical school exposure to rural health experiences is important in influencing students' perceptions of a career in rural and remote health. This study provides evidence that both compulsory and elective components contribute to a successful holistic rural program which nurtures the rural interest of all students. Overall, students at the ANU medical school were satisfied with the rural curriculum. The results confirm that there is difficulty in recruiting students with family commitments into year-long rural placement programs, despite incentives. Those students who select long-term rural study for reasons other than an interest in a career in rural health end the program with positive rural intentions.",2011-04-01 +21784793,Structural analysis of the hot spots in the binding between H1N1 HA and the 2D1 antibody: do mutations of H1N1 from 1918 to 2009 affect much on this binding?,"

Motivation

Worldwide and substantial mortality caused by the 2009 H1N1 influenza A has stimulated a new surge of research on H1N1 viruses. An epitope conservation has been learned in the HA1 protein that allows antibodies to cross-neutralize both 1918 and 2009 H1N1. However, few works have thoroughly studied the binding hot spots in those two antigen-antibody interfaces which are responsible for the antibody cross-neutralization.

Results

We apply predictive methods to identify binding hot spots at the epitope sites of the HA1 proteins and at the paratope sites of the 2D1 antibody. We find that the six mutations at the HA1's epitope from 1918 to 2009 should not harm its binding to 2D1. Instead, the change of binding free energy on the whole exhibits an increased tendency after these mutations, making the binding stronger. This is consistent with the observation that the 1918 H1N1 neutralizing antibody can cross-react with 2009 H1N1. We identified three distinguished hot spot residues, including Lys(166), common between the two epitopes. These common hot spots again can explain why 2D1 cross-reacted. We believe that these hot spot residues are mutation candidates which may help H1N1 viruses to evade the immune system. We also identified eight residues at the paratope site of 2D1, five from its heavy chain and three from its light chain, that are predicted to be energetically important in the HA1 recognition. The identification of these hot spot residues and their structural analysis are potentially useful to fight against H1N1 viruses.

Contact

jinyan.li@uts.edu.au

Availability

Z-score is available at http://155.69.2.25/liuqian/indexz.py

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-07-22 +21471011,Alignment-free detection of local similarity among viral and bacterial genomes.,"

Motivation

Bacterial and viral genomes are often affected by horizontal gene transfer observable as abrupt switching in local homology. In addition to the resulting mosaic genome structure, they frequently contain regions not found in close relatives, which may play a role in virulence mechanisms. Due to this connection to medical microbiology, there are numerous methods available to detect horizontal gene transfer. However, these are usually aimed at individual genes and viral genomes rather than the much larger bacterial genomes. Here, we propose an efficient alignment-free approach to describe the mosaic structure of viral and bacterial genomes, including their unique regions.

Results

Our method is based on the lengths of exact matches between pairs of sequences. Long matches indicate close homology, short matches more distant homology or none at all. These exact match lengths can be looked up efficiently using an enhanced suffix array. Our program implementing this approach, alfy (ALignment-Free local homologY), efficiently and accurately detects the recombination break points in simulated DNA sequences and among recombinant HIV-1 strains. We also apply alfy to Escherichia coli genomes where we detect new evidence for the hypothesis that strains pathogenic in poultry can infect humans.

Availability

alfy is written in standard C and its source code is available under the GNU General Public License from http://guanine.evolbio.mpg.de/alfy/. The software package also includes documentation and example data.",2011-04-06 +21465562,Revisiting gap locations in amino acid sequence alignments and a proposal for a method to improve them by introducing solvent accessibility.,"In comparative modeling, the quality of amino acid sequence alignment still constitutes a major bottleneck in the generation of high quality models of protein three-dimensional (3D) structures. Substantial efforts have been made to improve alignment quality by revising the substitution matrix, introducing multiple sequences, replacing dynamic programming with hidden Markov models, and incorporating 3D structure information. Improvements in the gap penalty have not been a major focus, however, following the development of the affine gap penalty and of the secondary structure dependent gap penalty. We revisited the correlation between protein 3D structure and gap location in a large protein 3D structure data set, and found that the frequency of gap locations approximated to an exponential function of the solvent accessibility of the inserted residues. The nonlinearity of the gap frequency as a function of accessibility corresponded well to the relationship between residue mutation pattern and residue accessibility. By introducing this relationship into the gap penalty calculation for pairwise alignment between template and target amino acid sequences, we were able to obtain a sequence alignment much closer to the structural alignment. The quality of the alignments was substantially improved on a pair of sequences with identity in the ""twilight zone"" between 20 and 40%. The relocation of gaps by our new method made a significant improvement in comparative modeling, exemplified here by the Bacillus subtilis yitF protein. The method was implemented in a computer program, ALAdeGAP (ALignment with Accessibility dependent GAp Penalty), which is available at http://cib.cf.ocha.ac.jp/target_protein/.",2011-04-04 +21613640,Recommending MeSH terms for annotating biomedical articles.,"

Background

Due to the high cost of manual curation of key aspects from the scientific literature, automated methods for assisting this process are greatly desired. Here, we report a novel approach to facilitate MeSH indexing, a challenging task of assigning MeSH terms to MEDLINE citations for their archiving and retrieval.

Methods

Unlike previous methods for automatic MeSH term assignment, we reformulate the indexing task as a ranking problem such that relevant MeSH headings are ranked higher than those irrelevant ones. Specifically, for each document we retrieve 20 neighbor documents, obtain a list of MeSH main headings from neighbors, and rank the MeSH main headings using ListNet-a learning-to-rank algorithm. We trained our algorithm on 200 documents and tested on a previously used benchmark set of 200 documents and a larger dataset of 1000 documents.

Results

Tested on the benchmark dataset, our method achieved a precision of 0.390, recall of 0.712, and mean average precision (MAP) of 0.626. In comparison to the state of the art, we observe statistically significant improvements as large as 39% in MAP (p-value <0.001). Similar significant improvements were also obtained on the larger document set.

Conclusion

Experimental results show that our approach makes the most accurate MeSH predictions to date, which suggests its great potential in making a practical impact on MeSH indexing. Furthermore, as discussed the proposed learning framework is robust and can be adapted to many other similar tasks beyond MeSH indexing in the biomedical domain. All data sets are available at: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/indexing.",2011-05-25 +22303397,Accurate microRNA Target Prediction Using Detailed Binding Site Accessibility and Machine Learning on Proteomics Data.,"MicroRNAs (miRNAs) are a class of small regulatory genes regulating gene expression by targeting messenger RNA. Though computational methods for miRNA target prediction are the prevailing means to analyze their function, they still miss a large fraction of the targeted genes and additionally predict a large number of false positives. Here we introduce a novel algorithm called DIANA-microT-ANN which combines multiple novel target site features through an artificial neural network (ANN) and is trained using recently published high-throughput data measuring the change of protein levels after miRNA overexpression, providing positive and negative targeting examples. The features characterizing each miRNA recognition element include binding structure, conservation level, and a specific profile of structural accessibility. The ANN is trained to integrate the features of each recognition element along the 3'untranslated region into a targeting score, reproducing the relative repression fold change of the protein. Tested on two different sets the algorithm outperforms other widely used algorithms and also predicts a significant number of unique and reliable targets not predicted by the other methods. For 542 human miRNAs DIANA-microT-ANN predicts 120000 targets not provided by TargetScan 5.0. The algorithm is freely available at http://microrna.gr/microT-ANN.",2011-01-01 +20876522,"Membrane-associated activation of cholesterol α-glucosyltransferase, an enzyme responsible for biosynthesis of cholesteryl-α-D-glucopyranoside in Helicobacter pylori critical for its survival.","Helicobacter pylori (H. pylori) is the causative pathogen underlying gastric diseases such as chronic gastritis and gastric cancer. Previously, the authors revealed that α1,4-linked N-acetylglucosamine-capped O-glycan (αGlcNAc) found in gland mucin suppresses H. pylori growth and motility by inhibiting catalytic activity of cholesterol α-glucosyltransferase (CHLαGcT), the enzyme responsible for biosynthesis of the major cell wall component cholesteryl-α-D-glucopyranoside (CGL). Here, the authors developed a polyclonal antibody specific for CHLαGcT and then undertook quantitative ultrastructural analysis of the enzyme's localization in H. pylori. They show that 66.3% of CHLαGcT is detected in the cytoplasm beneath the H. pylori inner membrane, whereas 24.7% is present on the inner membrane. In addition, 2.6%, 5.0%, and 1.4% of the protein were detected in the periplasm, on the outer membrane, and outside microbes, respectively. By using an in vitro CHLαGcT assay with fractionated H. pylori proteins, which were used as an enzyme source for CHLαGcT, the authors demonstrated that the membrane fraction formed CGL, whereas other fractions did not. These data combined together indicate that CHLαGcT is originally synthesized in the cytoplasm of H. pylori as an inactive form and then activated when it is associated with the cell membrane. This article contains online supplemental material at http://www.jhc.org. Please visit this article online to view these materials.",2011-01-01 +21317021,An MRI digital brain phantom for validation of segmentation methods.,"Knowledge of the exact spatial distribution of brain tissues in images acquired by magnetic resonance imaging (MRI) is necessary to measure and compare the performance of segmentation algorithms. Currently available physical phantoms do not satisfy this requirement. State-of-the-art digital brain phantoms also fall short because they do not handle separately anatomical structures (e.g. basal ganglia) and provide relatively rough simulations of tissue fine structure and inhomogeneity. We present a software procedure for the construction of a realistic MRI digital brain phantom. The phantom consists of hydrogen nuclear magnetic resonance spin-lattice relaxation rate (R1), spin-spin relaxation rate (R2), and proton density (PD) values for a 24 × 19 × 15.5 cm volume of a ""normal"" head. The phantom includes 17 normal tissues, each characterized by both mean value and variations in R1, R2, and PD. In addition, an optional tissue class for multiple sclerosis (MS) lesions is simulated. The phantom was used to create realistic magnetic resonance (MR) images of the brain using simulated conventional spin-echo (CSE) and fast field-echo (FFE) sequences. Results of mono-parametric segmentation of simulations of sequences with different noise and slice thickness are presented as an example of possible applications of the phantom. The phantom data and simulated images are available online at http://lab.ibb.cnr.it/.",2011-01-28 +21575255,Comprehensive expressional analyses of antisense transcripts in colon cancer tissues using artificial antisense probes.,"

Background

Recent studies have identified thousands of sense-antisense gene pairs across different genomes by computational mapping of cDNA sequences. These studies have shown that approximately 25% of all transcriptional units in the human and mouse genomes are involved in cis-sense-antisense pairs. However, the number of known sense-antisense pairs remains limited because currently available cDNA sequences represent only a fraction of the total number of transcripts comprising the transcriptome of each cell type.

Methods

To discover novel antisense transcripts encoded in the antisense strand of important genes, such as cancer-related genes, we conducted expression analyses of antisense transcripts using our custom microarray platform along with 2376 probes designed specifically to detect the potential antisense transcripts of 501 well-known genes suitable for cancer research.

Results

Using colon cancer tissue and normal tissue surrounding the cancer tissue obtained from 6 patients, we found that antisense transcripts without poly(A) tails are expressed from approximately 80% of these well-known genes. This observation is consistent with our previous finding that many antisense transcripts expressed in a cell are poly(A)-. We also identified 101 and 71 antisense probes displaying a high level of expression specifically in normal and cancer tissues respectively.

Conclusion

Our microarray analysis identified novel antisense transcripts with expression profiles specific to cancer tissue, some of which might play a role in the regulatory networks underlying oncogenesis and thus are potential targets for further experimental validation. Our microarray data are available at http://www.brc.riken.go.jp/ncrna2007/viewer-Saito-01/index.html.",2011-05-16 +22417439,Effect of ultraviolet and far infrared radiation on microbial decontamination and quality of cumin seeds.,"

Unlabelled

Cumin seeds might be exposed to a high level of natural bacterial contamination, and this could potentially create a public health risk besides leading to problems in exportation. Ultraviolet (UVC) and far infrared (FIR) radiation has low penetration power, and due to that, there might be no detrimental defects to the products during a possible decontamination process. Therefore, the objective of this study was to determine the effect of UVC and FIR treatment on microbial decontamination and quality of cumin seeds. For this purpose, FIR treatment at different exposure times and temperatures were applied followed by constant UVC treatment with an intensity of 10.5 mW/cm² for 2 h. Total mesophilic aerobic bacteria of the cumin seeds were decreased to the target level of 10⁴ CFU/g after 1.57, 2.8, and 4.8 min FIR treatment at 300, 250, and 200 °C, respectively, following a 2 h UVC treatment. Under the given conditions, a complete elimination for total yeast and molds were obtained while there were no significant changes in volatile oil content and color of the cumin seeds. Consequently, combined UVC and FIR treatment was determined to be a promising method for decontamination of the cumin seeds.

Practical application

This research attempts to apply UVC and far infrared (FIR) radiation for pasteurization of cumin seeds. The data suggested that combined UVC and FIR radiation treatments can become a promising new method for pasteurization of cumin seeds without causing any detrimental defect to the quality parameters. The results of this industry partnered (Kadioglu Baharat, Mersin, Turkey--http://www.kadioglubaharat.com) study were already applied in industrial scale production lines.",2011-05-09 +21400203,Transcript catalogs of human chromosome 21 and orthologous chimpanzee and mouse regions.,"A comprehensive representation of the gene content of the long arm of human chromosome 21 (Hsa21q) remains of interest for the study of Down syndrome, its associated phenotypic features, and mouse models. Here we compare transcript catalogs for Hsa21q, chimpanzee chromosome 21 (Ptr21q), and orthologous regions of mouse chromosomes 16, 17, and 10 for open reading frame (ORF) characteristics and conservation. The Hsa21q and mouse catalogs contain 552 and 444 gene models, respectively, of which only 162 are highly conserved. Hsa21q transcripts were used to identify orthologous exons in Ptr21q and assemble 533 putative transcripts. Transcript catalogs for all three organisms are searchable for nucleotide and amino acid sequence features of ORF length, repeat content, experimental support, gene structure, and conservation. For human and mouse comparisons, three additional summaries are provided: (1) the chromosomal distribution of novel ORF transcripts versus potential functional RNAs, (2) the distribution of species-specific transcripts within Hsa21q and mouse models of Down syndrome, and (3) the organization of sense-antisense and putative sense-antisense structures defining potential regulatory mechanisms. Catalogs, summaries, and nucleotide and amino acid sequences of all composite transcripts are available and searchable at http://gfuncpathdb.ucdenver.edu/iddrc/chr21/home.php. These data sets provide comprehensive information useful for evaluation of candidate genes and mouse models of Down syndrome and for identification of potential functional RNA genes and novel regulatory mechanisms involving Hsa21q genes. These catalogs and search tools complement and extend information available from other gene annotation projects.",2011-03-13 +21554017,Finding nearly optimal GDT scores.,"Global Distance Test (GDT) is one of the commonly accepted measures to assess the quality of predicted protein structures. Given a set of distance thresholds, GDT maximizes the percentage of superimposed (or matched) residue pairs under each threshold, and reports the average of these percentages as the final score. The computation of GDT score was conjectured to be NP-hard. All available methods are heuristic and do not guarantee the optimality of scores. These heuristic strategies usually result in underestimated GDT scores. Contrary to the conjecture, the problem can be solved exactly in polynomial time, albeit the method would be too slow for practical usage. In this paper we propose an efficient tool called OptGDT to obtain GDT scores with theoretically guaranteed accuracies. Denote ℓ as the number of matched residue pairs found by OptGDT for a given threshold d. Let ℓ' be the optimal number of matched residues pairs for threshold d/(1 + ε), where ε is a parameter in our computation. OptGDT guarantees that ℓ ≥ ℓ'. We applied our tool to CASP8 (The eighth Critical Assessment of Structure Prediction Techniques) data. For 87.3% of the predicted models, better GDT scores are obtained when OptGDT is used. In some cases, the number of matched residue pairs were improved by at least 10%. The tool runs in time O(n³) log n/ε⁵) for a given threshold d and parameter ε. In the case of globular proteins, the tool can be improved to a randomized algorithm of O(n log² n) runtime with probability at least 1 - O(1/n). Released under the GPL license and downloadable from http://bioinformatics.uwaterloo.ca/∼scli/OptGDT/ .",2011-05-01 +21217122,"A fast, lock-free approach for efficient parallel counting of occurrences of k-mers.","

Motivation

Counting the number of occurrences of every k-mer (substring of length k) in a long string is a central subproblem in many applications, including genome assembly, error correction of sequencing reads, fast multiple sequence alignment and repeat detection. Recently, the deep sequence coverage generated by next-generation sequencing technologies has caused the amount of sequence to be processed during a genome project to grow rapidly, and has rendered current k-mer counting tools too slow and memory intensive. At the same time, large multicore computers have become commonplace in research facilities allowing for a new parallel computational paradigm.

Results

We propose a new k-mer counting algorithm and associated implementation, called Jellyfish, which is fast and memory efficient. It is based on a multithreaded, lock-free hash table optimized for counting k-mers up to 31 bases in length. Due to their flexibility, suffix arrays have been the data structure of choice for solving many string problems. For the task of k-mer counting, important in many biological applications, Jellyfish offers a much faster and more memory-efficient solution.

Availability

The Jellyfish software is written in C++ and is GPL licensed. It is available for download at http://www.cbcb.umd.edu/software/jellyfish.",2011-01-07 +21208984,Model selection in Bayesian segmentation of multiple DNA alignments.,"

Motivation

The analysis of multiple sequence alignments is allowing researchers to glean valuable insights into evolution, as well as identify genomic regions that may be functional, or discover novel classes of functional elements. Understanding the distribution of conservation levels that constitutes the evolutionary landscape is crucial to distinguishing functional regions from non-functional. Recent evidence suggests that a binary classification of evolutionary rates is inappropriate for this purpose and finds only highly conserved functional elements. Given that the distribution of evolutionary rates is multi-modal, determining the number of modes is of paramount concern. Through simulation, we evaluate the performance of a number of information criterion approaches derived from MCMC simulations in determining the dimension of a model.

Results

We utilize a deviance information criterion (DIC) approximation that is more robust than the approximations from other information criteria, and show our information criteria approximations do not produce superfluous modes when estimating conservation distributions under a variety of circumstances. We analyse the distribution of conservation for a multiple alignment comprising four primate species and mouse, and repeat this on two additional multiple alignments of similar species. We find evidence of six distinct classes of evolutionary rates that appear to be robust to the species used.

Availability

Source code and data are available at http://dl.dropbox.com/u/477240/changept.zip.",2011-01-05 +20607691,Gabedit--a graphical user interface for computational chemistry softwares.,"Gabedit is a freeware graphical user interface, offering preprocessing and postprocessing adapted (to date) to nine computational chemistry software packages. It includes tools for editing, displaying, analyzing, converting, and animating molecular systems. A conformational search tool is implemented using a molecular mechanics or a semiempirical potential. Input files can be generated for the computational chemistry software supported by Gabedit. Some molecular properties of interest are processed directly from the output of the computational chemistry programs; others are calculated by Gabedit before display. Molecular orbitals, electron density, electrostatic potential, nuclear magnetic resonance shielding density, and any other volumetric data properties can be displayed. It can display electronic circular dichroism, UV-visible, infrared, and Raman-computed spectra after a convolution. Gabedit can generate a Povray file for geometry, surfaces, contours, and color-coded planes. Output can be exported to a selection of popular image and vector graphics file formats; the program can also generate a series of pictures for animation. Quantum mechanical electrostatic potentials can be calculated using the partial charges on atoms, or by solving the Poisson equation using the multigrid method. The atoms in molecule charges can also be calculated. Gabedit is platform independent. The code is distributed under free open source X11 style license and is available at http://gabedit.sourceforge.net/.",2011-01-01 +21082406,Osteogenic differentiation strategies for adipose-derived mesenchymal stem cells.,"Adipose stem cell preparations, either obtained as a freshly isolated so-called stromal vascular fraction (SVF) or as cells cultured to homogeneity and then referred to as adipose stem cells (ASCs), have found widespread use in a broad variety of studies on tissue engineering and regenerative medicine applications, including bone repair.For newcomers within the field, but also for established research laboratories having up to 10 years of expertise in this research area, it may be convenient to strive for, and use consensus protocols (1) for studying the osteogenic differentiation potential of ASC preparations in vitro, and (2) for osteogenic induction regimes for in vivo implementation. To assist in achieving this goal, this chapter describes various step-by-step osteogenic differentiation protocols for adipose-derived stem cell populations (SVF as well as ASCs) currently applied within our laboratory, with particular emphasis on protocols aimed at intra-operative use. The protocols describe the use of inducing compounds, including the bone morphogenetic proteins (BMPs), 1,25-dihydroxyvitamin-D3, and polyamines, as well as methods and parameters for evaluating the level of differentiation achieved.We would appreciate receiving feedback on the protocols described; this will facilitate the development of consensus protocols, which in turn will allow better comparison of data sets generated by different research groups. This continuing standardization, which might be reported on at international meetings like those of IFATS ( http://www.IFATS.org ), might be of benefit for the whole ASC research community.",2011-01-01 +21357414,Informatics in radiology: use of a C-arm fluoroscopy simulator to support training in intraoperative radiography.,"Mobile image intensifier systems (C-arms) are used frequently in orthopedic and reconstructive surgery, especially in trauma and emergency settings, but image quality and radiation exposure levels may vary widely, depending on the extent of the C-arm operator's knowledge and experience. Current training programs consist mainly of theoretical instruction in C-arm operation, the physical foundations of radiography, and radiation avoidance, and are largely lacking in hands-on application. A computer-based simulation program such as that tested by the authors may be one way to improve the effectiveness of C-arm training. In computer simulations of various scenarios commonly encountered in the operating room, trainees using the virtX program interact with three-dimensional models to test their knowledge base and improve their skill levels. Radiographs showing the simulated patient anatomy and surgical implants are ""reconstructed"" from data computed on the basis of the trainee's positioning of models of a C-arm, patient, and table, and are displayed in real time on the desktop monitor. Trainee performance is signaled in real time by color graphics in several control panels and, on completion of the exercise, is compared in detail with the performance of an expert operator. Testing of this computer-based training program in continuing medical education courses for operating room personnel showed an improvement in the overall understanding of underlying principles of intraoperative radiography performed with a C-arm, with resultant higher image quality, lower overall radiation exposure, and greater time efficiency. Supplemental material available at http://radiographics.rsna.org/lookup/suppl/doi:10.1148/rg.313105125/-/DC1.",2011-02-25 +21332642,Designing new UK-WHO growth charts: implications for health staff use and understanding of charts and growth monitoring.,"New pre-school UK charts have been produced incorporating the new World Health Organization growth standards based on healthy breastfed infants. This paper describes the process by which the charts and evidence-based instructions were designed and evaluated, and what it revealed about professional understanding of charts and growth monitoring. A multidisciplinary expert group drew on existing literature, new data analyses and parent focus groups as well as two series of chart-plotting workshops for health staff. The first series explored possible design features and general chart understanding. The second evaluated an advanced prototype with instructions, using plotting and interpretation of three separate scenarios on the old charts, compared with the new charts. The first plotting workshops (46 participants) allowed decisions to be made about the exact chart format, but it also revealed widespread confusion about use of adjustment for gestation and the plotting of birthweight. In the second series (78 participants), high levels of plotting inaccuracy were identified on both chart formats, with 64% of respondents making at least one major mistake. Significant neonatal weight loss was poorly recognized. While most participants recognized abnormal and normal growth patterns, 13-20% did not. Many respondents had never received any formal training in chart use. Growth charts are complex clinical tools that are, at present, poorly understood and inconsistently used. The importance of clear guidelines and formal training has now been recognized and translated into supporting educational materials (free to download at http://www.growthcharts.rcpch.ac.uk).",2011-02-17 +21489405,Genome-wide analysis shows increased frequency of copy number variation deletions in Dutch schizophrenia patients.,"

Background

Since 2008, multiple studies have reported on copy number variations (CNVs) in schizophrenia. However, many regions are unique events with minimal overlap between studies. This makes it difficult to gain a comprehensive overview of all CNVs involved in the etiology of schizophrenia. We performed a systematic CNV study on the basis of a homogeneous genome-wide dataset aiming at all CNVs ≥ 50 kilobase pair. We complemented this analysis with a review of cytogenetic and chromosomal abnormalities for schizophrenia reported in the literature with the purpose of combining classical genetic findings and our current understanding of genomic variation.

Methods

We investigated 834 Dutch schizophrenia patients and 672 Dutch control subjects. The CNVs were included if they were detected by QuantiSNP (http://www.well.ox.ac.uk/QuantiSNP/) as well as PennCNV (http://www.neurogenome.org/cnv/penncnv/) and contain known protein coding genes. The integrated identification of CNV regions and cytogenetic loci indicates regions of interest (cytogenetic regions of interest [CROIs]).

Results

In total, 2437 CNVs were identified with an average number of 2.1 CNVs/subject for both cases and control subjects. We observed significantly more deletions but not duplications in schizophrenia cases versus control subjects. The CNVs identified coincide with loci previously reported in the literature, confirming well-established schizophrenia CROIs 1q42 and 22q11.2 as well as indicating a potentially novel CROI on chromosome 5q35.1.

Conclusions

Chromosomal deletions are more prevalent in schizophrenia patients than in healthy subjects and therefore confer a risk factor for pathogenicity. The combination of our CNV data with previously reported cytogenetic abnormalities in schizophrenia provides an overview of potentially interesting regions for positional candidate genes.",2011-04-13 +21622958,psRNATarget: a plant small RNA target analysis server.,"Plant endogenous non-coding short small RNAs (20-24 nt), including microRNAs (miRNAs) and a subset of small interfering RNAs (ta-siRNAs), play important role in gene expression regulatory networks (GRNs). For example, many transcription factors and development-related genes have been reported as targets of these regulatory small RNAs. Although a number of miRNA target prediction algorithms and programs have been developed, most of them were designed for animal miRNAs which are significantly different from plant miRNAs in the target recognition process. These differences demand the development of separate plant miRNA (and ta-siRNA) target analysis tool(s). We present psRNATarget, a plant small RNA target analysis server, which features two important analysis functions: (i) reverse complementary matching between small RNA and target transcript using a proven scoring schema, and (ii) target-site accessibility evaluation by calculating unpaired energy (UPE) required to 'open' secondary structure around small RNA's target site on mRNA. The psRNATarget incorporates recent discoveries in plant miRNA target recognition, e.g. it distinguishes translational and post-transcriptional inhibition, and it reports the number of small RNA/target site pairs that may affect small RNA binding activity to target transcript. The psRNATarget server is designed for high-throughput analysis of next-generation data with an efficient distributed computing back-end pipeline that runs on a Linux cluster. The server front-end integrates three simplified user-friendly interfaces to accept user-submitted or preloaded small RNAs and transcript sequences; and outputs a comprehensive list of small RNA/target pairs along with the online tools for batch downloading, key word searching and results sorting. The psRNATarget server is freely available at http://plantgrn.noble.org/psRNATarget/.",2011-05-27 +21600674,[EAU Guidelines on Urinary Incontinence].,"

Context

The first European Association of Urology (EAU) guidelines on incontinence were published in 2001. These guidelines were periodically updated in past years.

Objective

The aim of this paper is to present a summary of the 2009 update of the EAU guidelines on urinary incontinence (UI).

Evidence acquisition

The EAU working panel was part of the 4th International Consultation on Incontinence (ICI) and, with permission of the ICI, extracted the relevant data. The methodology of the 4th ICI was a comprehensive literature review by international experts and consensus formation. In addition, level of evidence was rated according to a modified Oxford system and grades of recommendation were given accordingly.

Evidence summary

A full version of the EAU guidelines on urinary incontinence is available as a printed document (extended and short form) and as a CD-ROM from the EAU office or online from the EAU Web site (http://www.uroweb.org/guidelines/online-guidelines/). The extent and invasiveness of assessment of UI depends on severity and/or complexity of symptoms and clinical signs and is different for men, women, frail older persons, children, and patients with neuropathy. At the level of initial management, basic diagnostic tests are applied to exclude an underlying disease or condition such as urinary tract infection. Treatment is mostly conservative (lifestyle interventions, physiotherapy, physical therapy, pharmacotherapy) and is of an empirical nature. At the level of specialised management (when primary therapy failed, diagnosis is unclear, or symptoms and/or signs are complex/severe),more elaborate assessment is generally required, including imaging, endoscopy, and urodynamics. Treatment options include invasive interventions and surgery.

Conclusions

Treatment options for UI are rapidly expanding. These EAU guidelines provide ratings of the evidence (guided by evidence-based medicine) and graded recommendations for the appropriate assessment and according treatment options and put them into clinical perspective.",2011-05-19 +21593131,FusionMap: detecting fusion genes from next-generation sequencing data at base-pair resolution.,"

Motivation

Next generation sequencing technology generates high-throughput data, which allows us to detect fusion genes at both transcript and genomic levels. To detect fusion genes, the current bioinformatics tools heavily rely on paired-end approaches and overlook the importance of reads that span fusion junctions. Thus there is a need to develop an efficient aligner to detect fusion events by accurate mapping of these junction-spanning single reads, particularly when the read gets longer with the improvement in sequencing technology.

Results

We present a novel method, FusionMap, which aligns fusion reads directly to the genome without prior knowledge of potential fusion regions. FusionMap can detect fusion events in both single- and paired-end datasets from either RNA-Seq or gDNA-Seq studies and characterize fusion junctions at base-pair resolution. We showed that FusionMap achieved high sensitivity and specificity in fusion detection on two simulated RNA-Seq datasets, which contained 75 nt paired-end reads. FusionMap achieved substantially higher sensitivity and specificity than the paired-end approach when the inner distance between read pairs was small. Using FusionMap to characterize fusion genes in K562 chronic myeloid leukemia cell line, we further demonstrated its accuracy in fusion detection in both single-end RNA-Seq and gDNA-Seq datasets. These combined results show that FusionMap provides an accurate and systematic solution to detecting fusion events through junction-spanning reads.

Availability

FusionMap includes reference indexing, read filtering, fusion alignment and reporting in one package. The software is free for noncommercial use at (http://www.omicsoft.com/fusionmap).",2011-05-18 +21595880,Genotype calling in tetraploid species from bi-allelic marker data using mixture models.,"

Background

Automated genotype calling in tetraploid species was until recently not possible, which hampered genetic analysis. Modern genotyping assays often produce two signals, one for each allele of a bi-allelic marker. While ample software is available to obtain genotypes (homozygous for either allele, or heterozygous) for diploid species from these signals, such software is not available for tetraploid species which may be scored as five alternative genotypes (aaaa, baaa, bbaa, bbba and bbbb; nulliplex to quadruplex).

Results

We present a novel algorithm, implemented in the R package fitTetra, to assign genotypes for bi-allelic markers to tetraploid samples from genotyping assays that produce intensity signals for both alleles. The algorithm is based on the fitting of several mixture models with five components, one for each of the five possible genotypes. The models have different numbers of parameters specifying the relation between the five component means, and some of them impose a constraint on the mixing proportions to conform to Hardy-Weinberg equilibrium (HWE) ratios. The software rejects markers that do not allow a reliable genotyping for the majority of the samples, and it assigns a missing score to samples that cannot be scored into one of the five possible genotypes with sufficient confidence.

Conclusions

We have validated the software with data of a collection of 224 potato varieties assayed with an Illumina GoldenGate™ 384 SNP array and shown that all SNPs with informative ratio distributions are fitted. Almost all fitted models appear to be correct based on visual inspection and comparison with diploid samples. When the collection of potato varieties is analyzed as if it were a population, almost all markers seem to be in Hardy-Weinberg equilibrium. The R package fitTetra is freely available under the GNU Public License from http://www.plantbreeding.wur.nl/UK/software_fitTetra.html and as Additional files with this article.",2011-05-19 +21278186,Modeling and comparing the organization of circular genomes.,"

Motivation

Most prokaryotic genomes are circular with a single chromosome (called circular genomes), which consist of bacteria and archaea. Orthologous genes (abbreviated as orthologs) are genes directly evolved from an ancestor gene, and can be traced through different species in evolution. Shared orthologs between bacterial genomes have been used to measure their genome evolution. Here, organization of circular genomes is analyzed via distributions of shared orthologs between genomes. However, these distributions are often asymmetric and bimodal; to date, there is no joint distribution to model such data. This motivated us to develop a family of bivariate distributions with generalized von Mises marginals (BGVM) and its statistical inference.

Results

A new measure based on circular grade correlation and the fraction of shared orthologs is proposed for association between circular genomes, and a visualization tool developed to depict genome structure similarity. The proposed procedures are applied to eight pairs of prokaryotes separated from domain down to species, and 13 mycoplasma bacteria that are mammalian pathogens belonging to the same genus. We close with remarks on further applications to many features of genomic organization, e.g. shared transcription factor binding sites, between any pair of circular genomes. Thus, the proposed procedures may be applied to identifying conserved chromosome backbones, among others, for genome construction in synthetic biology.

Availability

All codes of the BGVM procedures and 1000+ prokaryotic genomes are available at http://www.stat.sinica.edu.tw/∼gshieh/bgvm.htm.",2011-01-28 +21258067,Discovery of genome-wide DNA polymorphisms in a landrace cultivar of Japonica rice by whole-genome sequencing.,"Molecular breeding approaches are of growing importance to crop improvement. However, closely related cultivars generally used for crossing material lack sufficient known DNA polymorphisms due to their genetic relatedness. Next-generation sequencing allows the identification of a massive number of DNA polymorphisms such as single nucleotide polymorphisms (SNPs) and insertions-deletions (InDels) between highly homologous genomes. Using this technology, we performed whole-genome sequencing of a landrace of japonica rice, Omachi, which is used for sake brewing and is an important source for modern cultivars. A total of 229 million reads, each comprising 75 nucleotides of the Omachi genome, was generated with 45-fold coverage and uniquely mapped to 89.7% of the Nipponbare genome, a closely related cultivar. We identified 132,462 SNPs, 16,448 insertions and 19,318 deletions between the Omachi and Nipponbare genomes. An SNP array was designed to validate 731 selected SNPs, resulting in validation rates of 95 and 88% for the Omachi and Nipponbare genomes, respectively. Among the 577 SNPs validated in both genomes, 532 are entirely new SNP markers not previously reported between related rice cultivars. We also validated InDels on a part of chromosome 2 as DNA markers and successfully genotyped five japonica rice cultivars. Our results present the methodology and extensive data on SNPs and InDels available for whole-genome genotyping and marker-assisted breeding. The polymorphism information between Omachi and Nipponbare is available at NGRC_Rice_Omachi (http://www.nodai-genome.org/oryza_sativa_en.html).",2011-01-21 +21283524,Regulation of neutrophil senescence by microRNAs.,"Neutrophils are rapidly recruited to sites of tissue injury or infection, where they protect against invading pathogens. Neutrophil functions are limited by a process of neutrophil senescence, which renders the cells unable to respond to chemoattractants, carry out respiratory burst, or degranulate. In parallel, aged neutrophils also undergo spontaneous apoptosis, which can be delayed by factors such as GMCSF. This is then followed by their subsequent removal by phagocytic cells such as macrophages, thereby preventing unwanted inflammation and tissue damage. Neutrophils translate mRNA to make new proteins that are important in maintaining functional longevity. We therefore hypothesised that neutrophil functions and lifespan might be regulated by microRNAs expressed within human neutrophils. Total RNA from highly purified neutrophils was prepared and subjected to microarray analysis using the Agilent human miRNA microarray V3. We found human neutrophils expressed a selected repertoire of 148 microRNAs and that 6 of these were significantly upregulated after a period of 4 hours in culture, at a time when the contribution of apoptosis is negligible. A list of predicted targets for these 6 microRNAs was generated from http://mirecords.biolead.org and compared to mRNA species downregulated over time, revealing 83 genes targeted by at least 2 out of the 6 regulated microRNAs. Pathway analysis of genes containing binding sites for these microRNAs identified the following pathways: chemokine and cytokine signalling, Ras pathway, and regulation of the actin cytoskeleton. Our data suggest that microRNAs may play a role in the regulation of neutrophil senescence and further suggest that manipulation of microRNAs might represent an area of future therapeutic interest for the treatment of inflammatory disease.",2011-01-19 +21992500,Random generation of RNA secondary structures according to native distributions.,"

Background

Random biological sequences are a topic of great interest in genome analysis since, according to a powerful paradigm, they represent the background noise from which the actual biological information must differentiate. Accordingly, the generation of random sequences has been investigated for a long time. Similarly, random object of a more complicated structure like RNA molecules or proteins are of interest.

Results

In this article, we present a new general framework for deriving algorithms for the non-uniform random generation of combinatorial objects according to the encoding and probability distribution implied by a stochastic context-free grammar. Briefly, the framework extends on the well-known recursive method for (uniform) random generation and uses the popular framework of admissible specifications of combinatorial classes, introducing weighted combinatorial classes to allow for the non-uniform generation by means of unranking. This framework is used to derive an algorithm for the generation of RNA secondary structures of a given fixed size. We address the random generation of these structures according to a realistic distribution obtained from real-life data by using a very detailed context-free grammar (that models the class of RNA secondary structures by distinguishing between all known motifs in RNA structure). Compared to well-known sampling approaches used in several structure prediction tools (such as SFold) ours has two major advantages: Firstly, after a preprocessing step in time O(n2) for the computation of all weighted class sizes needed, with our approach a set of m random secondary structures of a given structure size n can be computed in worst-case time complexity Om⋅n⋅ log(n) while other algorithms typically have a runtime in O(m⋅n2). Secondly, our approach works with integer arithmetic only which is faster and saves us from all the discomforting details of using floating point arithmetic with logarithmized probabilities.

Conclusion

A number of experimental results shows that our random generation method produces realistic output, at least with respect to the appearance of the different structural motifs. The algorithm is available as a webservice at http://wwwagak.cs.uni-kl.de/NonUniRandGen and can be used for generating random secondary structures of any specified RNA type. A link to download an implementation of our method (in Wolfram Mathematica) can be found there, too.",2011-10-12 +21236247,Cytokines in recurrent pregnancy loss.,"

Background

Recurrent pregnancy loss (RPL) is defined as the occurrence of three or more consecutive miscarriages prior to 20 weeks gestation. Exaggerated maternal immune response to fetal antigens has been proposed to be one of the mechanisms underlying recurrent pregnancy loss.

Method

A comprehensive literature search was conducted from the websites of the National Library of Medicine (http://www.ncbl.nlm.nih.gov) and Pubmed Central, the US National Library of Medicine's digital archive of life sciences literature (http://www.pubmedcentral.nih.gov/). The data was assessed from books and journals that published relevant articles in this field.

Result

In normal pregnancy, tolerance of the genetically incompatible fetus by the maternal immune system depends on the interactions of an array of cytokines secreted by maternal and fetal cells at the site of implantation. Earlier research indicated that altered immunity in RPL is dominated by the Th1/Th2 hypothesis, which proposed that the fetus escapes maternal-derived T-cell responses through skewing the Th0 differentiation toward Th2 pathway which dampens pro-inflammatory Th1-type immunity. Recent studies indicate the role of proinflammatory Th17 cells and immunoregulatory Treg cells in RPL in addition to Th1/Th2 interactions.

Conclusion

Cytokines form a complex regulatory network which maintains homeostasis between the fetal unit and the maternal immune system. If this delicate balance is adversely affected, immunoregulatory mechanisms may be insufficient to restore homeostasis and this may lead to pregnancy failure.",2011-01-12 +21364914,Using genomic sequencing for classical genetics in E. coli K12.,"We here develop computational methods to facilitate use of 454 whole genome shotgun sequencing to identify mutations in Escherichia coli K12. We had Roche sequence eight related strains derived as spontaneous mutants in a background without a whole genome sequence. They provided difference tables based on assembling each genome to reference strain E. coli MG1655 (NC_000913). Due to the evolutionary distance to MG1655, these contained a large number of both false negatives and positives. By manual analysis of the dataset, we detected all the known mutations (24 at nine locations) and identified and genetically confirmed new mutations necessary and sufficient for the phenotypes we had selected in four strains. We then had Roche assemble contigs de novo, which we further assembled to full-length pseudomolecules based on synteny with MG1655. This hybrid method facilitated detection of insertion mutations and allowed annotation from MG1655. After removing one genome with less than the optimal 20- to 30-fold sequence coverage, we identified 544 putative polymorphisms that included all of the known and selected mutations apart from insertions. Finally, we detected seven new mutations in a total of only 41 candidates by comparing single genomes to composite data for the remaining six and using a ranking system to penalize homopolymer sequencing and misassembly errors. An additional benefit of the analysis is a table of differences between MG1655 and a physiologically robust E. coli wild-type strain NCM3722. Both projects were greatly facilitated by use of comparative genomics tools in the CoGe software package (http://genomevolution.org/).",2011-02-25 +21967658,Missing and accounted for: gaps and areas of wealth in the public health review literature.,"

Background

High-quality review evidence is useful for informing and influencing public health policy and practice decisions. However, certain topic areas lack representation in terms of the quantity and quality of review literature available. The objectives of this paper are to identify the quantity, as well as quality, of review-level evidence available on the effectiveness of public health interventions for public health decision makers.

Methods

Searches conducted on http://www.health-evidence.ca produced an inventory of public health review literature in 21 topic areas. Gaps and areas of wealth in the review literature, as well as the proportion of reviews rated methodologically strong, moderate, or weak were identified. The top 10 topic areas of interest for registered users and visitors of http://www.health-evidence.ca were extracted from user profile data and Google Analytics.

Results

Registered users' top three interests included: 1) healthy communities, 2) chronic diseases, and 3) nutrition. The top three preferences for visitors included: 1) chronic diseases, 2) physical activity, and 3) addiction/substance use. All of the topic areas with many (301+) available reviews were of interest to registered users and/or visitors (mental health, physical activity, addiction/substance use, adolescent health, child health, nutrition, adult health, and chronic diseases). Conversely, the majority of registered users and/or visitors did not have preference for topic areas with few (≤ 150) available reviews (food safety and inspection, dental health, environmental health) with the exception of social determinants of health and healthy communities. Across registered users' and visitors' topic areas of preference, 80.2% of the reviews were of well-done methodological quality, with 43.5% of reviews having a strong quality rating and 36.7% a moderate review quality rating.

Conclusions

In topic areas in which many reviews are available, higher level syntheses are needed to guide policy and practice. For other topic areas with few reviews, it is necessary to determine whether primary study evidence exists, or is needed, so that reviews can be conducted in the future. Considering that less than half of the reviews available on http://www.health-evidence.ca are of strong methodological quality, the quality of the review-level evidence needs to improve across the range of public health topic areas.",2011-10-03 +21514296,3D visualization of aqueous humor outflow structures in-situ in humans.,"Aqueous humor (AH) exiting the eye via the trabecular meshwork and Schlemm's canal (SC) passes through the deep and intrascleral venous plexus (ISVP) or directly through aqueous veins. The purpose of this study was to visualize the human AH outflow system 360° in three dimensions (3D) during active AH outflow in a virtual casting. The conventional AH outflow pathways of 7 donor eyes were imaged with a modified Bioptigen spectral-domain optical coherence tomography system (Bioptigen Inc, USA; SuperLum LTD, Ireland) at a perfusion pressure of 20 mmHg (N = 3), and 10 mmHg (N = 4). In all eyes, 36 scans (3 equally distributed in each clock hour), each covering a 2 × 3 × 2 mm volume (512 frames, each 512 × 1024 pixels), were obtained. All image data were black/white inverted, and the background subtracted (ImageJ 1.40 g, http://rsb.info.nih.gov/ij/). Contrast was adjusted to isolate the ISVP. SC, collector channels, the deep and ISVP, and episcleral veins were observed throughout the limbus. Aqueous veins could be observed extending into the episcleral veins. Individual scan ISVP castings were rendered and assembled in 3D space in Amira 4.1 (Visage Imaging Inc. USA). A 360-degree casting of the ISVP was obtained in all perfused eyes. The ISVP tended to be dense and overlapping in the superior and inferior quadrants, and thinner in the lateral quadrants. The human AH outflow pathway can be imaged using SD-OCT. The more superficial structures of the AH outflow pathway present with sufficient contrast as to be optically isolated and cast in-situ 360° in cadaver eye perfusion models. This approach may be useful as a model in future studies of human AH outflow.",2011-04-15 +21533142,Functional cohesion of gene sets determined by latent semantic indexing of PubMed abstracts.,"

Unlabelled

High-throughput genomic technologies enable researchers to identify genes that are co-regulated with respect to specific experimental conditions. Numerous statistical approaches have been developed to identify differentially expressed genes. Because each approach can produce distinct gene sets, it is difficult for biologists to determine which statistical approach yields biologically relevant gene sets and is appropriate for their study. To address this issue, we implemented Latent Semantic Indexing (LSI) to determine the functional coherence of gene sets. An LSI model was built using over 1 million Medline abstracts for over 20,000 mouse and human genes annotated in Entrez Gene. The gene-to-gene LSI-derived similarities were used to calculate a literature cohesion p-value (LPv) for a given gene set using a Fisher's exact test. We tested this method against genes in more than 6,000 functional pathways annotated in Gene Ontology (GO) and found that approximately 75% of gene sets in GO biological process category and 90% of the gene sets in GO molecular function and cellular component categories were functionally cohesive (LPv<0.05). These results indicate that the LPv methodology is both robust and accurate. Application of this method to previously published microarray datasets demonstrated that LPv can be helpful in selecting the appropriate feature extraction methods. To enable real-time calculation of LPv for mouse or human gene sets, we developed a web tool called Gene-set Cohesion Analysis Tool (GCAT). GCAT can complement other gene set enrichment approaches by determining the overall functional cohesion of data sets, taking into account both explicit and implicit gene interactions reported in the biomedical literature.

Availability

GCAT is freely available at http://binf1.memphis.edu/gcat.",2011-04-14 +22230945,How to measure cortical folding from MR images: a step-by-step tutorial to compute local gyrification index.,"Cortical folding (gyrification) is determined during the first months of life, so that adverse events occurring during this period leave traces that will be identifiable at any age. As recently reviewed by Mangin and colleagues(2), several methods exist to quantify different characteristics of gyrification. For instance, sulcal morphometry can be used to measure shape descriptors such as the depth, length or indices of inter-hemispheric asymmetry(3). These geometrical properties have the advantage of being easy to interpret. However, sulcal morphometry tightly relies on the accurate identification of a given set of sulci and hence provides a fragmented description of gyrification. A more fine-grained quantification of gyrification can be achieved with curvature-based measurements, where smoothed absolute mean curvature is typically computed at thousands of points over the cortical surface(4). The curvature is however not straightforward to comprehend, as it remains unclear if there is any direct relationship between the curvedness and a biologically meaningful correlate such as cortical volume or surface. To address the diverse issues raised by the measurement of cortical folding, we previously developed an algorithm to quantify local gyrification with an exquisite spatial resolution and of simple interpretation. Our method is inspired of the Gyrification Index(5), a method originally used in comparative neuroanatomy to evaluate the cortical folding differences across species. In our implementation, which we name local Gyrification Index (lGI(1)), we measure the amount of cortex buried within the sulcal folds as compared with the amount of visible cortex in circular regions of interest. Given that the cortex grows primarily through radial expansion(6), our method was specifically designed to identify early defects of cortical development. In this article, we detail the computation of local Gyrification Index, which is now freely distributed as a part of the FreeSurfer Software (http://surfer.nmr.mgh.harvard.edu/, Martinos Center for Biomedical Imaging, Massachusetts General Hospital). FreeSurfer provides a set of automated reconstruction tools of the brain's cortical surface from structural MRI data. The cortical surface extracted in the native space of the images with sub-millimeter accuracy is then further used for the creation of an outer surface, which will serve as a basis for the lGI calculation. A circular region of interest is then delineated on the outer surface, and its corresponding region of interest on the cortical surface is identified using a matching algorithm as described in our validation study(1). This process is repeatedly iterated with largely overlapping regions of interest, resulting in cortical maps of gyrification for subsequent statistical comparisons (Fig. 1). Of note, another measurement of local gyrification with a similar inspiration was proposed by Toro and colleagues(7), where the folding index at each point is computed as the ratio of the cortical area contained in a sphere divided by the area of a disc with the same radius. The two implementations differ in that the one by Toro et al. is based on Euclidian distances and thus considers discontinuous patches of cortical area, whereas ours uses a strict geodesic algorithm and include only the continuous patch of cortical area opening at the brain surface in a circular region of interest.",2012-01-02 +21342564,iGEMDOCK: a graphical environment of enhancing GEMDOCK using pharmacological interactions and post-screening analysis.,"

Background

Pharmacological interactions are useful for understanding ligand binding mechanisms of a therapeutic target. These interactions are often inferred from a set of active compounds that were acquired experimentally. Moreover, most docking programs loosely coupled the stages (binding-site and ligand preparations, virtual screening, and post-screening analysis) of structure-based virtual screening (VS). An integrated VS environment, which provides the friendly interface to seamlessly combine these VS stages and to identify the pharmacological interactions directly from screening compounds, is valuable for drug discovery.

Results

We developed an easy-to-use graphic environment, iGEMDOCK, integrating VS stages (from preparations to post-screening analysis). For post-screening analysis, iGEMDOCK provides biological insights by deriving the pharmacological interactions from screening compounds without relying on the experimental data of active compounds. The pharmacological interactions represent conserved interacting residues, which often form binding pockets with specific physico-chemical properties, to play the essential functions of a target protein. Our experimental results show that the pharmacological interactions derived by iGEMDOCK are often hot spots involving in the biological functions. In addition, iGEMDOCK provides the visualizations of the protein-compound interaction profiles and the hierarchical clustering dendrogram of the compounds for post-screening analysis.

Conclusions

We have developed iGEMDOCK to facilitate steps from preparations of target proteins and ligand libraries toward post-screening analysis. iGEMDOCK is especially useful for post-screening analysis and inferring pharmacological interactions from screening compounds. We believe that iGEMDOCK is useful for understanding the ligand binding mechanisms and discovering lead compounds. iGEMDOCK is available at http://gemdock.life.nctu.edu.tw/dock/igemdock.php.",2011-02-15 +21828189,Discrimination of benign and malignant breast lesions by using shutter-speed dynamic contrast-enhanced MR imaging.,"

Purpose

To assess the accuracy of the shutter-speed approach compared with standard approach dynamic contrast material-enhanced magnetic resonance (MR) imaging pharmacokinetic analysis for breast cancer diagnosis.

Materials and methods

This study was approved by the institutional review board and was HIPAA compliant. Informed consent was obtained from 89 high-risk women (age range, 28-83 years) who had 92 suspicious lesions with negative findings at mammography (but visible at MR imaging). Each underwent a research dynamic contrast-enhanced MR imaging examination just prior to a clinical MR imaging-guided interventional procedure. Tumor region of interest (ROI) averaged and (for some) pixel-by-pixel dynamic contrast-enhanced time-course data, together with mean arterial input function, were subjected to serial standard and shutter-speed approach analyses to extract pharmacokinetic parameters, including rate constant for passive contrast reagent transfer between plasma and interstitium (K(trans)) and interstitial space volume fraction, or v(e). Pathologic findings were used as reference standards. Diagnostic accuracy was assessed with receiver operating characteristic analyses.

Results

The pathologic analyses revealed 20 malignant and 72 benign lesions. Positive predictive value of the institutional clinical breast MR imaging protocol was 22%. At 100% sensitivity, ROI-averaged shutter-speed approach K(trans) had significantly (P = .008) higher diagnostic specificity than standard approach K(trans): 86.1% versus 77.8%. The difference in the ROI-averaged K(trans) parameter value, or ΔK(trans) (≡ K(trans) [shutter-speed approach] - K(trans) [standard approach]), had even higher specificity (88.9%). Combined use of ROI analysis and pixel-by-pixel mapping of ΔK(trans) achieved 98.6% specificity at 100% sensitivity.

Conclusion

The use of the shutter-speed dynamic contrast-enhanced MR imaging method has the potential to improve breast cancer diagnostic accuracy and reduce putatively unnecessary biopsy procedures that yield benign pathologic findings.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11102413/-/DC1.",2011-08-09 +21989211,Inferring viral quasispecies spectra from 454 pyrosequencing reads.,"

Background

RNA viruses infecting a host usually exist as a set of closely related sequences, referred to as quasispecies. The genomic diversity of viral quasispecies is a subject of great interest, particularly for chronic infections, since it can lead to resistance to existing therapies. High-throughput sequencing is a promising approach to characterizing viral diversity, but unfortunately standard assembly software was originally designed for single genome assembly and cannot be used to simultaneously assemble and estimate the abundance of multiple closely related quasispecies sequences.

Results

In this paper, we introduce a new Viral Spectrum Assembler (ViSpA) method for quasispecies spectrum reconstruction and compare it with the state-of-the-art ShoRAH tool on both simulated and real 454 pyrosequencing shotgun reads from HCV and HIV quasispecies. Experimental results show that ViSpA outperforms ShoRAH on simulated error-free reads, correctly assembling 10 out of 10 quasispecies and 29 sequences out of 40 quasispecies. While ShoRAH has a significant advantage over ViSpA on reads simulated with sequencing errors due to its advanced error correction algorithm, ViSpA is better at assembling the simulated reads after they have been corrected by ShoRAH. ViSpA also outperforms ShoRAH on real 454 reads. Indeed, 7 most frequent sequences reconstructed by ViSpA from a real HCV dataset are viable (do not contain internal stop codons), and the most frequent sequence was within 1% of the actual open reading frame obtained by cloning and Sanger sequencing. In contrast, only one of the sequences reconstructed by ShoRAH is viable. On a real HIV dataset, ShoRAH correctly inferred only 2 quasispecies sequences with at most 4 mismatches whereas ViSpA correctly reconstructed 5 quasispecies with at most 2 mismatches, and 2 out of 5 sequences were inferred without any mismatches. ViSpA source code is available at http://alla.cs.gsu.edu/~software/VISPA/vispa.html.

Conclusions

ViSpA enables accurate viral quasispecies spectrum reconstruction from 454 pyrosequencing reads. We are currently exploring extensions applicable to the analysis of high-throughput sequencing data from bacterial metagenomic samples and ecological samples of eukaryote populations.",2011-07-28 +21414992,Integrative network alignment reveals large regions of global network similarity in yeast and human.,"

Motivation

High-throughput methods for detecting molecular interactions have produced large sets of biological network data with much more yet to come. Analogous to sequence alignment, efficient and reliable network alignment methods are expected to improve our understanding of biological systems. Unlike sequence alignment, network alignment is computationally intractable. Hence, devising efficient network alignment heuristics is currently a foremost challenge in computational biology.

Results

We introduce a novel network alignment algorithm, called Matching-based Integrative GRAph ALigner (MI-GRAAL), which can integrate any number and type of similarity measures between network nodes (e.g. proteins), including, but not limited to, any topological network similarity measure, sequence similarity, functional similarity and structural similarity. Hence, we resolve the ties in similarity measures and find a combination of similarity measures yielding the largest contiguous (i.e. connected) and biologically sound alignments. MI-GRAAL exposes the largest functional, connected regions of protein-protein interaction (PPI) network similarity to date: surprisingly, it reveals that 77.7% of proteins in the baker's yeast high-confidence PPI network participate in such a subnetwork that is fully contained in the human high-confidence PPI network. This is the first demonstration that species as diverse as yeast and human contain so large, continuous regions of global network similarity. We apply MI-GRAAL's alignments to predict functions of un-annotated proteins in yeast, human and bacteria validating our predictions in the literature. Furthermore, using network alignment scores for PPI networks of different herpes viruses, we reconstruct their phylogenetic relationship. This is the first time that phylogeny is exactly reconstructed from purely topological alignments of PPI networks.

Availability

Supplementary files and MI-GRAAL executables: http://bio-nets.doc.ic.ac.uk/MI-GRAAL/.",2011-03-16 +22155673,Evaluation of a Web-based intervention to promote hand hygiene: exploratory randomized controlled trial.,"

Background

Hand-washing is regarded as a potentially important behavior for preventing transmission of respiratory infection, particularly during a pandemic.

Objective

The objective of our study was to evaluate whether a Web-based intervention can encourage more frequent hand-washing in the home, and to examine potential mediators and moderators of outcomes, as a necessary first step before testing effects of the intervention on infection rates in the PRIMIT trial (PRimary care trial of a website based Infection control intervention to Modify Influenza-like illness and respiratory infection Transmission).

Methods

In a parallel-group pragmatic exploratory trial design, 517 nonblinded adults recruited through primary care were automatically randomly assigned to a fully automated intervention comprising 4 sessions of tailored motivational messages and self-regulation support (n = 324) or to a no-intervention control group (n = 179; ratio 2:1). Hand-washing frequency and theory of planned behavior cognitions relating to hand-washing were assessed by online questionnaires at baseline (in only half of the control participants, to permit evaluation of effects of baseline assessment on effect sizes), at 4 weeks (postintervention; all participants), and at 12 weeks.

Results

Hand-washing rates in the intervention group were higher at 4 weeks than in the control group (mean 4.40, n = 285 and mean 4.04, n = 157, respectively; P < .001, Cohen d = 0.42) and remained higher at 12 weeks (mean 4.45, n = 282 and mean 4.12, n = 154, respectively; P < .001, Cohen d = 0.34). Hand-washing intentions and positive attitudes toward hand-washing increased more from baseline to 4 weeks in the intervention group than in the control group. Mediation analyses revealed positive indirect effects of the intervention on change in hand-washing via intentions (coefficient = .15, 95% confidence interval [CI], .08-.26) and attitudes (coefficient = 0.16, 95% CI, .09-.26). Moderator analyses confirmed that the intervention was similarly effective for men and women, those of higher and lower socioeconomic status, and those with higher and lower levels of perceived risk.

Conclusions

This study provides promising evidence that Web-based interventions could potentially provide an effective method of promoting hand hygiene in the home. Data were collected during the 2010 influenza pandemic, when participants in both groups had already been exposed to extensive publicity about the need for hand hygiene, suggesting that our intervention could add to existing public health campaigns. However, further research is required to determine the effects of the intervention on actual infection rates.

Trial

International Standard Randomized Controlled Trial Number (ISRCTN): 75058295; http://www.controlled-trials.com/ISRCTN75058295 (Archived by WebCite at http://www.webcitation.org/62KSbkNmm).",2011-12-09 +21872950,Ranking freshwater fish farms for the risk of pathogen introduction and spread.,"A semi-quantitative model is presented to rank freshwater rainbow trout farms within a country or region with regards to the risk of becoming infected and spreading a specified pathogen. The model was developed to support a risk-based surveillance scheme for notifiable salmonid pathogens. Routes of pathogen introduction and spread were identified through a process of expert consultation in a series of workshops. The routes were combined into themes (e.g. exposure via water, mechanical transmission). Themes were weighted based on expert opinion. Risk factors for each route were scored and combined into a theme score which was adjusted by the weight. The number of sources and consignments were used to assess introduction via live fish movements onto the farm. Biosecurity measures were scored to assess introduction on fomites. Upstream farms, wild fish and processing plants were included in assessing the likelihood of introduction by water. The scores for each theme were combined to give separate risk scores for introduction and spread. A matrix was used to combine these to give an overall risk score. A case study for viral haemorrhagic septicaemia is presented. Nine farms that represent a range of farming practices of rainbow trout farms in England and Wales are used as worked examples of the model. The model is suited to risk rank freshwater salmonid farms which are declared free of the pathogen(s) under consideration. The score allocated to a farm does not equate to a quantitative probability estimate of the farm to become infected or spread infection. Nevertheless, the method provides a transparent approach to ranking farms with regards to pathogen transmission risks. The output of the model at a regional or national level allows the allocation of surveillance effort to be risk based. It also provides fish farms with information on how they can reduce their risk score by improving biosecurity. The framework of the model can be applied to different production systems which may have other routes of disease spread. Further work is recommended to validate the allocated scores. Expert opinion was obtained through workshops, where the outputs from groups were single point estimates for relative weights of risks. More formal expert opinion elicitation methods could be used to capture variation in the experts' estimates and uncertainty and would provide data on which to simulate the model stochastically. The model can be downloaded (in Microsoft(®)-Excel format) from the Internet at: http://www.cefas.defra.gov.uk/6701.aspx.",2011-08-27 +21867558,Global transcriptome analysis of two ameiotic1 alleles in maize anthers: defining steps in meiotic entry and progression through prophase I.,"

Background

Developmental cues to start meiosis occur late in plants. Ameiotic1 (Am1) encodes a plant-specific nuclear protein (AM1) required for meiotic entry and progression through early prophase I. Pollen mother cells (PMCs) remain mitotic in most am1 mutants including am1-489, while am1-praI permits meiotic entry but PMCs arrest at the leptotene/zygotene (L/Z) transition, defining the roles of AM1 protein in two distinct steps of meiosis. To gain more insights into the roles of AM1 in the transcriptional pre-meiotic and meiotic programs, we report here an in depth analysis of gene expression alterations in carefully staged anthers at 1 mm (meiotic entry) and 1.5 mm (L/Z) caused by each of these am1 alleles.

Results

1.0 mm and 1.5 mm anthers of am1-489 and am1-praI were profiled in comparison to fertile siblings on Agilent® 4 × 44 K microarrays. Both am1-489 and am1-praI anthers are cytologically normal at 1.0 mm and show moderate transcriptome alterations. At the 1.5-mm stage both mutants are aberrant cytologically, and show more drastic transcriptome changes. There are substantially more absolute On/Off and twice as many differentially expressed genes (sterile versus fertile) in am1-489 than in am1-praI. At 1.5 mm a total of 4,418 genes are up- or down-regulated in either am1-489 or am1-praI anthers. These are predominantly stage-specific transcripts. Many putative meiosis-related genes were found among them including a small subset of allele-specific, mis-regulated genes specific to the PMCs. Nearly 60% of transcriptome changes in the set of transcripts mis-regulated in both mutants (N = 530) are enriched in PMCs, and only 1% are enriched in the tapetal cell transcriptome. All array data reported herein will be deposited and accessible at MaizeGDB http://www.maizegdb.org/.

Conclusions

Our analysis of anther transcriptome modulations by two distinct am1 alleles, am1-489 and am1-praI, redefines the role of AM1 as a modulator of expression of a subset of meiotic genes, important for meiotic progression and provided stage-specific insights into the genetic networks associated with meiotic entry and early prophase I progression.",2011-08-26 +21645377,CloudAligner: A fast and full-featured MapReduce based tool for sequence mapping.,"

Background

Research in genetics has developed rapidly recently due to the aid of next generation sequencing (NGS). However, massively-parallel NGS produces enormous amounts of data, which leads to storage, compatibility, scalability, and performance issues. The Cloud Computing and MapReduce framework, which utilizes hundreds or thousands of shared computers to map sequencing reads quickly and efficiently to reference genome sequences, appears to be a very promising solution for these issues. Consequently, it has been adopted by many organizations recently, and the initial results are very promising. However, since these are only initial steps toward this trend, the developed software does not provide adequate primary functions like bisulfite, pair-end mapping, etc., in on-site software such as RMAP or BS Seeker. In addition, existing MapReduce-based applications were not designed to process the long reads produced by the most recent second-generation and third-generation NGS instruments and, therefore, are inefficient. Last, it is difficult for a majority of biologists untrained in programming skills to use these tools because most were developed on Linux with a command line interface.

Results

To urge the trend of using Cloud technologies in genomics and prepare for advances in second- and third-generation DNA sequencing, we have built a Hadoop MapReduce-based application, CloudAligner, which achieves higher performance, covers most primary features, is more accurate, and has a user-friendly interface. It was also designed to be able to deal with long sequences. The performance gain of CloudAligner over Cloud-based counterparts (35 to 80%) mainly comes from the omission of the reduce phase. In comparison to local-based approaches, the performance gain of CloudAligner is from the partition and parallel processing of the huge reference genome as well as the reads. The source code of CloudAligner is available at http://cloudaligner.sourceforge.net/ and its web version is at http://mine.cs.wayne.edu:8080/CloudAligner/.

Conclusions

Our results show that CloudAligner is faster than CloudBurst, provides more accurate results than RMAP, and supports various input as well as output formats. In addition, with the web-based interface, it is easier to use than its counterparts.",2011-06-06 +21716603,Probing of Brain States in Real-Time: Introducing the ConSole Environment.,"Recent years have seen huge advancements in the methods available and used in neuroscience employing EEG or MEG. However, the standard approach is to average a large number of trials for experimentally defined conditions in order to reduce intertrial-variability, i.e., treating it as a source of ""noise."" Yet it is now more and more accepted that trial-to-trial fluctuations bear functional significance, reflecting fluctuations of ""brain states"" that predispose perception and action. Such effects are often revealed in a pre-stimulus period, when comparing response variability to an invariant stimulus. However such offline analyses are disadvantageous as they are correlational by drawing conclusions in a post hoc-manner and stimulus presentation is random with respect to the feature of interest. A more direct test is to trigger stimulus presentation when the relevant feature is present. The current paper introduces Constance System for Online EEG (ConSole), a software package capable of analyzing ongoing EEG/MEG in real-time and presenting auditory and visual stimuli via internal routines. Stimulation via external devices (e.g., transcranial magnetic stimulation) or third-party software (e.g., PsyScope X) is possible by sending TTL-triggers. With ConSole it is thus possible to target the stimulation at specific brain states. In contrast to many available applications, ConSole is open-source. Its modular design enhances the power of the software as it can be easily adapted to new challenges and writing new experiments is an easy task. ConSole is already pre-equipped with modules performing standard signal processing steps. The software is also independent from the EEG/MEG system, as long as a driver can be written (currently two EEG systems are supported). Besides a general introduction, we present benchmark data regarding performance and validity of the calculations used, as well as three example applications of ConSole in different settings. ConSole can be downloaded at: http://console-kn.sf.net.",2011-03-09 +21352070,Emerging therapeutics for primary peritoneal cancer.,"

Introduction

Primary peritoneal cancer describes a malignancy that originates from the peritoneal lining of the abdomen. The diagnosis is clearest when the ovaries are uninvolved; however, this is rarely the case and, as such, the declaration is often made pathologically by extrinsic or secondary involvement of the ovaries. The disease shares nearly all of the clinicopathologic features of primary ovarian cancer, most importantly, a molecular homology, which has made it unfruitful for considering it a different entity. Because of this, both standard of care treatment algorithms and contemporary drug development protocols nearly uniformly consider these cancers as primary ovarian cancers.

Areas covered

A Medline search was performed as well as a review of trials presented in the National Cancer Institute clinical trials website (http://www.Clinicaltrials.gov). We also reviewed abstracts presented at recent oncology congresses, such as the 2010 Annual meetings of the Society of Gynecologic Oncologists and the American Society of Clinical Oncology. The purpose of this review is to highlight areas of current drug development for patients with primary peritoneal carcinoma. While there are numerous investigational agents being evaluated which follow patients with this disease, our review focuses on the most promising agents that are in mature clinical development. In addition, given the recent positive Phase III data of bevacizumab in the first-line setting for patients with this disease, we consider changes that we can anticipate in this field.

Expert opinion

Numerous novel agents are being explored in this disease with the majority focusing on direct and indirect perturbations of tumor angiogenesis. Based on ongoing and recently completed investigations, targeted therapies are likely to become part of the armamentarium of first-line and recurrent treatment for patients with peritoneal cancers. Future studies of pathway-specific targeting will probably include pretreatment biomarker selection or eligibility criteria as well as combinatorial strategies.",2011-03-01 +21354447,"Validation of the ""Metroticket"" predictor in a cohort of patients transplanted for predominantly HBV-related hepatocellular carcinoma.","

Background & aims

The ""Metroticket"" prognostic model for survival post liver transplant for hepatocellular carcinoma (HCC) was developed from a European cohort of patients with predominantly alcoholic liver disease and hepatitis C-related HCC. The aim of this study was to evaluate the prognostic value of the Metroticket in an independent cohort of patients with predominantly HBV-related HCC, in an Asia-Pacific transplant programme.

Methods

All patients listed for HCC at the New Zealand Liver Transplant Unit (NZLTU) between January 1998 and November 2009 were included. For each patient, the predicted 3 and 5 year post-transplant survival score was calculated using the Metroticket model (http://www.hcc-oltmetroticket.org/calculator/index.php). The observed and predicted survivals were compared.

Results

Ninety-five patients with HCC were listed, 82 were transplanted (40 with HBV) and 13 delisted for progression. Predicted survival calculated by the Metroticket model based on pre-transplant radiological data (n = 82) was 76.3% and 69.7% at 3 and 5 years, respectively, while the observed survival was 83% (49/59) and 74% (35/47), respectively. Of the 40 patients with HBV, observed survivals were 84% (26/31) and 80% (20/25) at 3 and 5 years, compared with 80% (23/28) and 69.6% (16/23), respectively, for the 42 patients without HBV. On intent to treat analysis, survival after listing was 73.8% (95% CI 62.7-82.1) at 3 years and 69.1% (53.7-78.2%) at 5 years. AFP level was associated with vascular invasion.

Conclusions

The Metroticket calculator incorporating pre-transplant radiological Staging was an accurate predictor of post-transplant survival in a cohort of predominantly HBV-related HCC.",2011-02-25 +21595921,'A major lobbying effort to change and unify the excise structure in six Central American countries': How British American Tobacco influenced tax and tariff rates in the Central American Common Market.,"

Background

Transnational tobacco companies (TTCs) may respond to processes of regional trade integration both by acting politically to influence policy and by reorganising their own operations. The Central American Common Market (CACM) was reinvigorated in the 1990s, reflecting processes of regional trade liberalisation in Latin America and globally. This study aimed to ascertain how British American Tobacco (BAT), which dominated the markets of the CACM, sought to influence policy towards it by member country governments and how the CACM process impacted upon BAT's operations.

Methods

The study analysed internal tobacco industry documents released as a result of litigation in the US and available from the online Legacy Tobacco Documents Library at http://legacy.library.ucsf.edu/. Documents were retrieved by searching the BAT collection using key terms in an iterative process. Analysis was based on an interpretive approach involving a process of attempting to understand the meanings of individual documents and relating these to other documents in the set, identifying the central themes of documents and clusters of documents, contextualising the documentary data, and choosing representative material in order to present findings.

Results

Utilising its multinational character, BAT was able to act in a coordinated way across the member countries of the CACM to influence tariffs and taxes to its advantage. Documents demonstrate a high degree of access to governments and officials. The company conducted a coordinated, and largely successful, attempt to keep external tariff rates for cigarettes high and to reduce external tariffs for key inputs, whilst also influencing the harmonisation of excise taxes between countries. Protected by these high external tariffs, it reorganised its own operations to take advantage of regional economies of scale. In direct contradiction to arguments presented to CACM governments that affording the tobacco industry protection via high cigarette tariffs would safeguard employment, the company's regional reorganisation involved the loss of hundreds of jobs.

Conclusions

Regional integration organisations and their member states should be aware of the capacity of TTCs to act in a coordinated transnational manner to influence policy in their own interests, and coordinate their own public health and tax policies in a similarly effective way.",2011-05-19 +21699684,Study of large and highly stratified population datasets by combining iterative pruning principal component analysis and structure.,"

Background

The ever increasing sizes of population genetic datasets pose great challenges for population structure analysis. The Tracy-Widom (TW) statistical test is widely used for detecting structure. However, it has not been adequately investigated whether the TW statistic is susceptible to type I error, especially in large, complex datasets. Non-parametric, Principal Component Analysis (PCA) based methods for resolving structure have been developed which rely on the TW test. Although PCA-based methods can resolve structure, they cannot infer ancestry. Model-based methods are still needed for ancestry analysis, but they are not suitable for large datasets. We propose a new structure analysis framework for large datasets. This includes a new heuristic for detecting structure and incorporation of the structure patterns inferred by a PCA method to complement STRUCTURE analysis.

Results

A new heuristic called EigenDev for detecting population structure is presented. When tested on simulated data, this heuristic is robust to sample size. In contrast, the TW statistic was found to be susceptible to type I error, especially for large population samples. EigenDev is thus better-suited for analysis of large datasets containing many individuals, in which spurious patterns are likely to exist and could be incorrectly interpreted as population stratification. EigenDev was applied to the iterative pruning PCA (ipPCA) method, which resolves the underlying subpopulations. This subpopulation information was used to supervise STRUCTURE analysis to infer patterns of ancestry at an unprecedented level of resolution. To validate the new approach, a bovine and a large human genetic dataset (3945 individuals) were analyzed. We found new ancestry patterns consistent with the subpopulations resolved by ipPCA.

Conclusions

The EigenDev heuristic is robust to sampling and is thus superior for detecting structure in large datasets. The application of EigenDev to the ipPCA algorithm improves the estimation of the number of subpopulations and the individual assignment accuracy, especially for very large and complex datasets. Furthermore, we have demonstrated that the structure resolved by this approach complements parametric analysis, allowing a much more comprehensive account of population structure. The new version of the ipPCA software with EigenDev incorporated can be downloaded from http://www4a.biotec.or.th/GI/tools/ippca.",2011-06-23 +21880096,"A 10-year survey of US deans: trends, challenges, and mentoring in prosthodontics. Part 2.","

Purpose

Part 2 of this survey reports on the 2009 survey findings distributed to the deans of US dental schools. A national, electronic survey of 58 dental school deans was distributed by e-mail to evaluate an interest in specialty training, an interest in specialty training in prosthodontics, faculty shortage issues, predoctoral curriculum in prosthodontics, ideology regarding dental specialties, and the administrative position of prosthodontics within the schools.

Materials and methods

The survey data were transferred to an online spreadsheet program for statistical analysis (Key Survey, Inc. http://www.keysurvey.com, Braintree, MA). The opinions of dental school deans were viewed as legitimate indicators of change within predoctoral and postdoctoral prosthodontic education. Statistical analysis was carried out using Statistica Version 9.1 (Statsoft, Tulsa, OK).

Results

Of the 58 deans, 42 deans responded, for a 72.4% response rate. Twenty-three deans reported an increase in the number of students seeking specialty training after dental school. Only three deans reported a decrease in those seeking specialty training. In the 2009 survey, 45% the deans responded that there was an increased interest in prosthodontics. One or more open faculty positions in prosthodontics existed at 24 (59%) of the dental schools, and 30 (71%) offered at least one incentive or a variety of incentives to recruit faculty. The 2009 respondents to the deans' survey revealed predoctoral student exposure to prosthodontists was high, and exposure to advanced education in prosthodontics students was low. A survey of internal school programs that might have an impact on an increased interest in prosthodontics revealed the presence of a predoctoral mentoring program for prosthodontics in 36 (88%) of the institutions. The clinical curriculum included treatment of a variety of cases including complex cases as defined by a diagnostic classification system. The 2009 survey respondents reported an increase in the number of schools where prosthodontics is a separate entity or department.

Conclusion

Deans reported an increased interest in prosthodontics in the 2009 survey. Open faculty positions in prosthodontics existed in the majority of dental schools, and most schools offered incentives to recruit faculty. The survey of deans found a very high level of exposure of dental students to full-time prosthodontists and a very low exposure level to students enrolled in advanced education in prosthodontics. The establishment of mentoring programs in prosthodontics was reported by most deans, and the predoctoral curriculum included treating complex cases. Most deans stated that dual-specialty training in prosthodontics and periodontics would be beneficial. The 2009 survey reported an increase in the number of departments of prosthodontics in US schools.",2011-08-31 +21436089,Mammography with synchrotron radiation: first clinical experience with phase-detection technique.,"

Purpose

To prospectively evaluate the diagnostic contribution of mammography with synchrotron radiation in patients with questionable or suspicious breast abnormalities identified at combined digital mammography (DM) and ultrasonography (US).

Materials and methods

The ethics committee approved this prospective study, and written informed consent was obtained from all patients. Mammography with synchrotron radiation was performed with a phase-detection technique at a synchrotron radiation laboratory. Forty-nine women who met at least one of the inclusion criteria (palpable mass, focal asymmetry, architectural distortion, or equivocal or suspicious mass at DM; none clarified at US) were enrolled. Forty-seven women (mean age, 57.8 years ± 8.8 [standard deviation]; age range, 43-78 years) completed the study protocol, which involved biopsy or follow-up for 1 year as the reference standard. Breast Imaging Reporting and Data System (BI-RADS) scores of 1-3 were considered to indicate a negative result, while scores 4-5 were considered to indicate a positive result. The visibility of breast abnormalities and the glandular parenchymal structure at DM and at mammography with synchrotron radiation was compared by using the Wilcoxon signed rank test.

Results

In 29 of the 31 patients with a final diagnosis of benign entity, mammography with synchrotron radiation yielded BI-RADS scores of 1-3. In 13 of the remaining 16 patients with a final diagnosis of malignancy, mammography with synchrotron radiation yielded BI-RADS scores of 4-5. Therefore, a sensitivity of 81% (13 of 16 patients) and a specificity of 94% (29 of 31 patients) were achieved with use of the described BI-RADS dichotomization system.

Conclusion

These study results suggest that mammography with synchrotron radiation can be used to clarify cases of questionable or suspicious breast abnormalities identified at DM.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11100745/-/DC1.",2011-03-24 +21536339,Identification of enzymatic and regulatory genes of plant metabolism through QTL analysis in Arabidopsis.,"The biochemical diversity in the plant kingdom is estimated to well exceed 100,000 distinct compounds (Weckwerth, 2003) and 4000 to 20,000 metabolites per species seem likely (Fernie et al., 2004). In recent years extensive progress has been made towards the identification of enzymes and regulatory genes working in a complex network to generate this large arsenal of metabolites. Genetic loci influencing quantitative traits, e.g. metabolites or biomass, may be mapped to associated molecular markers, a method called quantitative trait locus mapping (QTL mapping), which may facilitate the identification of novel genes in biochemical pathways. Arabidopsis thaliana, as a model organism for seed plants, is a suitable target for metabolic QTL (mQTL) studies due to the availability of highly developed molecular and genetic tools, and the extensive knowledge accumulated on the metabolite profile. While intensely studied, in particular since the availability of its complete sequence, the genome of Arabidopsis still comprises a large proportion of genes with only tentative function based on sequence homology. From a total number of 33,518 genes currently listed (TAIR 9, http://www.arabidopsis.org), only about 25% have direct experimental evidence for their molecular function and biological process, while for more than 30% no biological data are available. Modern metabolomics approaches together with continually extended genomic resources will facilitate the task of assigning functions to those genes. In our previous study we reported on the identification of mQTL (Lisec et al., 2008). In this paper, we summarize the current status of mQTL analyses and causal gene identification in Arabidopsis and present evidence that a candidate gene located within the confidence interval of a fumarate mQTL (AT5G50950) encoding a putative fumarase is likely to be the causal gene of this QTL. The total number of genes molecularly identified based on mQTL studies is still limited, but the advent of multi-parallel analysis techniques for measurement of gene expression, as well as protein and metabolite abundances and for rapid gene identification will assist in the important task of assigning enzymes and regulatory genes to the growing network of known metabolic reactions.",2011-05-04 +21527044,Sequential interim analyses of survival data in DNA microarray experiments.,"

Background

Discovery of biomarkers that are correlated with therapy response and thus with survival is an important goal of medical research on severe diseases, e.g. cancer. Frequently, microarray studies are performed to identify genes of which the expression levels in pretherapeutic tissue samples are correlated to survival times of patients. Typically, such a study can take several years until the full planned sample size is available.Therefore, interim analyses are desirable, offering the possibility of stopping the study earlier, or of performing additional laboratory experiments to validate the role of the detected genes. While many methods correcting the multiple testing bias introduced by interim analyses have been proposed for studies of one single feature, there are still open questions about interim analyses of multiple features, particularly of high-dimensional microarray data, where the number of features clearly exceeds the number of samples. Therefore, we examine false discovery rates and power rates in microarray experiments performed during interim analyses of survival studies. In addition, the early stopping based on interim results of such studies is evaluated. As stop criterion we employ the achieved average power rate, i.e. the proportion of detected true positives, for which a new estimator is derived and compared to existing estimators.

Results

In a simulation study, pre-specified levels of the false discovery rate are maintained in each interim analysis, where reduced levels as used in classical group sequential designs of one single feature are not necessary. Average power rates increase with each interim analysis, and many studies can be stopped prior to their planned end when a certain pre-specified power rate is achieved. The new estimator for the power rate slightly deviates from the true power rate but is comparable to other estimators.

Conclusions

Interim analyses of microarray experiments can provide evidence for early stopping of long-term survival studies. The developed simulation framework, which we also offer as a new R package 'SurvGenesInterim' available at http://survgenesinter.R-Forge.R-Project.org, can be used for sample size planning of the evaluated study design.",2011-04-29 +21269479,Graphical analysis of pH-dependent properties of proteins predicted using PROPKA.,"

Background

Charge states of ionizable residues in proteins determine their pH-dependent properties through their pKa values. Thus, various theoretical methods to determine ionization constants of residues in biological systems have been developed. One of the more widely used approaches for predicting pKa values in proteins is the PROPKA program, which provides convenient structural rationalization of the predicted pKa values without any additional calculations.

Results

The PROPKA Graphical User Interface (GUI) is a new tool for studying the pH-dependent properties of proteins such as charge and stabilization energy. It facilitates a quantitative analysis of pKa values of ionizable residues together with their structural determinants by providing a direct link between the pKa data, predicted by the PROPKA calculations, and the structure via the Visual Molecular Dynamics (VMD) program. The GUI also calculates contributions to the pH-dependent unfolding free energy at a given pH for each ionizable group in the protein. Moreover, the PROPKA-computed pKa values or energy contributions of the ionizable residues in question can be displayed interactively. The PROPKA GUI can also be used for comparing pH-dependent properties of more than one structure at the same time.

Conclusions

The GUI considerably extends the analysis and validation possibilities of the PROPKA approach. The PROPKA GUI can conveniently be used to investigate ionizable groups, and their interactions, of residues with significantly perturbed pKa values or residues that contribute to the stabilization energy the most. Charge-dependent properties can be studied either for a single protein or simultaneously with other homologous structures, which makes it a helpful tool, for instance, in protein design studies or structure-based function predictions. The GUI is implemented as a Tcl/Tk plug-in for VMD, and can be obtained online at http://propka.ki.ku.dk/~luca/wiki/index.php/GUI_Web.",2011-01-26 +21846404,Prediction of conformational B-cell epitopes from 3D structures by random forests with a distance-based feature.,"

Background

Antigen-antibody interactions are key events in immune system, which provide important clues to the immune processes and responses. In Antigen-antibody interactions, the specific sites on the antigens that are directly bound by the B-cell produced antibodies are well known as B-cell epitopes. The identification of epitopes is a hot topic in bioinformatics because of their potential use in the epitope-based drug design. Although most B-cell epitopes are discontinuous (or conformational), insufficient effort has been put into the conformational epitope prediction, and the performance of existing methods is far from satisfaction.

Results

In order to develop the high-accuracy model, we focus on some possible aspects concerning the prediction performance, including the impact of interior residues, different contributions of adjacent residues, and the imbalanced data which contain much more non-epitope residues than epitope residues. In order to address above issues, we take following strategies. Firstly, a concept of 'thick surface patch' instead of 'surface patch' is introduced to describe the local spatial context of each surface residue, which considers the impact of interior residue. The comparison between the thick surface patch and the surface patch shows that interior residues contribute to the recognition of epitopes. Secondly, statistical significance of the distance distribution difference between non-epitope patches and epitope patches is observed, thus an adjacent residue distance feature is presented, which reflects the unequal contributions of adjacent residues to the location of binding sites. Thirdly, a bootstrapping and voting procedure is adopted to deal with the imbalanced dataset. Based on the above ideas, we propose a new method to identify the B-cell conformational epitopes from 3D structures by combining conventional features and the proposed feature, and the random forest (RF) algorithm is used as the classification engine. The experiments show that our method can predict conformational B-cell epitopes with high accuracy. Evaluated by leave-one-out cross validation (LOOCV), our method achieves the mean AUC value of 0.633 for the benchmark bound dataset, and the mean AUC value of 0.654 for the benchmark unbound dataset. When compared with the state-of-the-art prediction models in the independent test, our method demonstrates comparable or better performance.

Conclusions

Our method is demonstrated to be effective for the prediction of conformational epitopes. Based on the study, we develop a tool to predict the conformational epitopes from 3D structures, available at http://code.google.com/p/my-project-bpredictor/downloads/list.",2011-08-17 +21283776,Is my network module preserved and reproducible?,"In many applications, one is interested in determining which of the properties of a network module change across conditions. For example, to validate the existence of a module, it is desirable to show that it is reproducible (or preserved) in an independent test network. Here we study several types of network preservation statistics that do not require a module assignment in the test network. We distinguish network preservation statistics by the type of the underlying network. Some preservation statistics are defined for a general network (defined by an adjacency matrix) while others are only defined for a correlation network (constructed on the basis of pairwise correlations between numeric variables). Our applications show that the correlation structure facilitates the definition of particularly powerful module preservation statistics. We illustrate that evaluating module preservation is in general different from evaluating cluster preservation. We find that it is advantageous to aggregate multiple preservation statistics into summary preservation statistics. We illustrate the use of these methods in six gene co-expression network applications including 1) preservation of cholesterol biosynthesis pathway in mouse tissues, 2) comparison of human and chimpanzee brain networks, 3) preservation of selected KEGG pathways between human and chimpanzee brain networks, 4) sex differences in human cortical networks, 5) sex differences in mouse liver networks. While we find no evidence for sex specific modules in human cortical networks, we find that several human cortical modules are less preserved in chimpanzees. In particular, apoptosis genes are differentially co-expressed between humans and chimpanzees. Our simulation studies and applications show that module preservation statistics are useful for studying differences between the modular structure of networks. Data, R software and accompanying tutorials can be downloaded from the following webpage: http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/ModulePreservation.",2011-01-20 +21827692,Genome-wide algorithm for detecting CNV associations with diseases.,"

Background

SNP genotyping arrays have been developed to characterize single-nucleotide polymorphisms (SNPs) and DNA copy number variations (CNVs). Nonparametric and model-based statistical algorithms have been developed to detect CNVs from SNP data using the marker intensities. However, these algorithms lack specificity to detect small CNVs owing to the high false positive rate when calling CNVs based on the intensity values. Therefore, the resulting association tests lack power even if the CNVs affecting disease risk are common. An alternative procedure called PennCNV uses information from both the marker intensities as well as the genotypes and therefore has increased sensitivity.

Results

By using the hidden Markov model (HMM) implemented in PennCNV to derive the probabilities of different copy number states which we subsequently used in a logistic regression model, we developed a new genome-wide algorithm to detect CNV associations with diseases. We compared this new method with association test applied to the most probable copy number state for each individual that is provided by PennCNV after it performs an initial HMM analysis followed by application of the Viterbi algorithm, which removes information about copy number probabilities. In one of our simulation studies, we showed that for large CNVs (number of SNPs ≥ 10), the association tests based on PennCNV calls gave more significant results, but the new algorithm retained high power. For small CNVs (number of SNPs <10), the logistic algorithm provided smaller average p-values (e.g., p = 7.54e - 17 when relative risk RR = 3.0) in all the scenarios and could capture signals that PennCNV did not (e.g., p = 0.020 when RR = 3.0). From a second set of simulations, we showed that the new algorithm is more powerful in detecting disease associations with small CNVs (number of SNPs ranging from 3 to 5) under different penetrance models (e.g., when RR = 3.0, for relatively weak signals, power = 0.8030 comparing to 0.2879 obtained from the association tests based on PennCNV calls). The new method was implemented in software GWCNV. It is freely available at http://gwcnv.sourceforge.net, distributed under a GPL license.

Conclusions

We conclude that the new algorithm is more sensitive and can be more powerful in detecting CNV associations with diseases than the existing HMM algorithm, especially when the CNV association signal is weak and a limited number of SNPs are located in the CNV.",2011-08-09 +21471273,Mild cognitive impairment: baseline and longitudinal structural MR imaging measures improve predictive prognosis.,"

Purpose

To assess whether single-time-point and longitudinal volumetric magnetic resonance (MR) imaging measures provide predictive prognostic information in patients with amnestic mild cognitive impairment (MCI).

Materials and methods

This study was conducted with institutional review board approval and in compliance with HIPAA regulations. Written informed consent was obtained from all participants or the participants' legal guardians. Cross-validated discriminant analyses of MR imaging measures were performed to differentiate 164 Alzheimer disease (AD) cases from 203 healthy control cases. Separate analyses were performed by using data from MR images obtained at one time point or by combining single-time-point measures with 1-year change measures. Resulting discriminant functions were applied to 317 MCI cases to derive individual patient risk scores. Risk of conversion to AD was estimated as a continuous function of risk score percentile. Kaplan-Meier survival curves were computed for risk score quartiles. Odds ratios (ORs) for the conversion to AD were computed between the highest and lowest quartile scores.

Results

Individualized risk estimates from baseline MR examinations indicated that the 1-year risk of conversion to AD ranged from 3% to 40% (average group risk, 17%; OR, 7.2 for highest vs lowest score quartiles). Including measures of 1-year change in global and regional volumes significantly improved risk estimates (P = 001), with the risk of conversion to AD in the subsequent year ranging from 3% to 69% (average group risk, 27%; OR, 12.0 for highest vs lowest score quartiles).

Conclusion

Relative to the risk of conversion to AD conferred by the clinical diagnosis of MCI alone, MR imaging measures yield substantially more informative patient-specific risk estimates. Such predictive prognostic information will be critical if disease-modifying therapies become available.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11101975/-/DC1.",2011-04-06 +21797998,A consensus linkage map for molecular markers and quantitative trait loci associated with economically important traits in melon (Cucumis melo L.).,"

Background

A number of molecular marker linkage maps have been developed for melon (Cucumis melo L.) over the last two decades. However, these maps were constructed using different marker sets, thus, making comparative analysis among maps difficult. In order to solve this problem, a consensus genetic map in melon was constructed using primarily highly transferable anchor markers that have broad potential use for mapping, synteny, and comparative quantitative trait loci (QTL) analysis, increasing breeding effectiveness and efficiency via marker-assisted selection (MAS).

Results

Under the framework of the International Cucurbit Genomics Initiative (ICuGI, http://www.icugi.org), an integrated genetic map has been constructed by merging data from eight independent mapping experiments using a genetically diverse array of parental lines. The consensus map spans 1150 cM across the 12 melon linkage groups and is composed of 1592 markers (640 SSRs, 330 SNPs, 252 AFLPs, 239 RFLPs, 89 RAPDs, 15 IMAs, 16 indels and 11 morphological traits) with a mean marker density of 0.72 cM/marker. One hundred and ninety-six of these markers (157 SSRs, 32 SNPs, 6 indels and 1 RAPD) were newly developed, mapped or provided by industry representatives as released markers, including 27 SNPs and 5 indels from genes involved in the organic acid metabolism and transport, and 58 EST-SSRs. Additionally, 85 of 822 SSR markers contributed by Syngenta Seeds were included in the integrated map. In addition, 370 QTL controlling 62 traits from 18 previously reported mapping experiments using genetically diverse parental genotypes were also integrated into the consensus map. Some QTL associated with economically important traits detected in separate studies mapped to similar genomic positions. For example, independently identified QTL controlling fruit shape were mapped on similar genomic positions, suggesting that such QTL are possibly responsible for the phenotypic variability observed for this trait in a broad array of melon germplasm.

Conclusions

Even though relatively unsaturated genetic maps in a diverse set of melon market types have been published, the integrated saturated map presented herein should be considered the initial reference map for melon. Most of the mapped markers contained in the reference map are polymorphic in diverse collection of germplasm, and thus are potentially transferrable to a broad array of genetic experimentation (e.g., integration of physical and genetic maps, colinearity analysis, map-based gene cloning, epistasis dissection, and marker-assisted selection).",2011-07-28 +21148158,Translational research network and patient registry for auto-inflammatory diseases.,"

Objective

Auto-inflammatory diseases (AIDs) are characterized by recurrent self-limiting systemic inflammation. In a multicentre effort, we set out to register genetic, epidemiological and clinical features as well as prognostic factors of these diseases by prospective longitudinal and long-term documentation, in order to define novel AIDs and to better understand treatment responses and outcome.

Methods

In 2009, a federally funded clinical and research consortium (AID-Net) was established, including an online registry for AIDs (http://www.aid-register.uk-essen.de). Inclusion criteria are disease-associated mutations for hereditary periodic fever syndromes [FMF, hyperimmunoglobulinaemia D and periodic fever syndrome (HIDS), TNF receptor 1-associated periodic syndrome (TRAPS) and cryopyrin-associated periodic syndrome (CAPS)], or, alternatively, clinically confirmed AID, systemic-onset JIA (SoJIA) and periodic fever, aphthous stomatitis, pharyngitis and adenopathy (PFAPA) syndrome with unknown genetic background. Patients were recruited to the registry and patient material was deposited in biomaterial banks (DNA/serum). In addition, basic research projects were initiated that focus on molecular mechanisms of AID.

Results

During the first 9 months, 117 patients (65 males, 52 females; age 1-21 years) have been recorded and classified as FMF (n=84), HIDS (n=1), TRAPS (n=3) and CAPS (n=1); clinically confirmed AID (n=5); SoJIA (n=22); and PFAPA (n=1). One hundred and fifty blood samples of 18 patients were included in biomaterial banks.

Conclusion

Recruitment and follow-up of patients with AID will enable us to comprehensively address the correlation between clinical and epidemiological data, genetics and biomarkers. The translational approach may help to identify genetic or inflammatory markers relevant for the course and outcome of diseases.",2011-01-01 +21467251,Patient-specific radiation dose and cancer risk for pediatric chest CT.,"

Purpose

To estimate patient-specific radiation dose and cancer risk for pediatric chest computed tomography (CT) and to evaluate factors affecting dose and risk, including patient size, patient age, and scanning parameters.

Materials and methods

The institutional review board approved this study and waived informed consent. This study was HIPAA compliant. The study included 30 patients (0-16 years old), for whom full-body computer models were recently created from clinical CT data. A validated Monte Carlo program was used to estimate organ dose from eight chest protocols, representing clinically relevant combinations of bow tie filter, collimation, pitch, and tube potential. Organ dose was used to calculate effective dose and risk index (an index of total cancer incidence risk). The dose and risk estimates before and after normalization by volume-weighted CT dose index (CTDI(vol)) or dose-length product (DLP) were correlated with patient size and age. The effect of each scanning parameter was studied.

Results

Organ dose normalized by tube current-time product or CTDI(vol) decreased exponentially with increasing average chest diameter. Effective dose normalized by tube current-time product or DLP decreased exponentially with increasing chest diameter. Chest diameter was a stronger predictor of dose than weight and total scan length. Risk index normalized by tube current-time product or DLP decreased exponentially with both chest diameter and age. When normalized by DLP, effective dose and risk index were independent of collimation, pitch, and tube potential (<10% variation).

Conclusion

The correlations of dose and risk with patient size and age can be used to estimate patient-specific dose and risk. They can further guide the design and optimization of pediatric chest CT protocols.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11101900/-/DC1.",2011-04-05 +21571895,Multi-state Beef Reproduction Task Force provides science-based recommendations for the application of reproductive technologies.,"Since its formation, the Beef Reproduction Task Force (BRTF) has worked to enhance productivity and profitability of US beef herds by integrating research and extension efforts with the intent of more effectively transferring the use of reproductive technologies to the field. A key early step was to coordinate efforts in identifying effective breeding management protocols for beef cattle and to clarify their associated acronyms. A short list of recommended protocols and their acronyms for synchronization of estrus and ovulation in beef cattle was developed based on results from peer-reviewed, published research and a comprehensive review of data collected from the field. The list of recommended protocols was developed by the BRTF in cooperation with veterinarians and cattle AI industries. These protocols and their acronyms are presented uniformly in all of the major AI sire directories and are available online at http://www.beefrepro.info. Protocol updates are made annually to incorporate the most recent research findings related to estrous cycle control in beef cattle. The Estrus Synchronization Planner, a software program developed in cooperation with the Iowa Beef Center, now reflects these same recommendations. Beginning in 2002, the BRTF hosted and presented 11 educational workshops to more than 1,900 attendees in key cow-calf states. These Applied Reproductive Strategies in Beef Cattle workshops targeted beef producers, AI industry personnel, veterinarians, allied industry representatives, and academicians. A national media sponsor provided online coverage of the last 3 workshops at http://www.appliedreprostrategies.com. A postmeeting evaluation, developed to assess application of information from 2 recent workshops, was returned by 55% of those contacted (n = 150). Attendees averaged 16 (± 13.4 SD) yr of AI experience, and 80% of respondents represented more than 100 cows. Respondents were asked to estimate the value of AI-sired calves compared with natural-service-sired calves to their operation on a per-animal-marketed basis, and 17 and 31% responded $50 to $100 per animal and more than $100 per animal, respectively. As a result of what was learned at these conferences, 78% of respondents were better able to troubleshoot management-related issues, 60% made alterations to a protocol they had been using, and 35% of the respondents indicated they changed to a different estrus synchronization protocol.",2011-05-13 +22377962,"Malaria surveillance--United States, 2010.","PROBLEM/CONDITION: Malaria in humans is caused by intraerythrocytic protozoa of the genus Plasmodium. These parasites are transmitted by the bite of an infective female Anopheles mosquito. The majority of malaria infections in the United States occur among persons who have traveled to areas with ongoing malaria transmission. In the United States, cases can occur through exposure to infected blood products, congenital transmission, or local mosquito-borne transmission. Malaria surveillance is conducted to identify episodes of local transmission and to guide prevention recommendations for travelers. PERIOD COVERED: This report summarizes cases in persons with onset of illness in 2010 and summarizes trends during previous years. DESCRIPTION OF SYSTEM: Malaria cases diagnosed by blood film, polymerase chain reaction, or rapid diagnostic tests are mandated to be reported to local and state health departments by health-care providers or laboratory staff. Case investigations are conducted by local and state health departments, and reports are transmitted to CDC through the National Malaria Surveillance System (NMSS), National Notifiable Diseases Surveillance System (NNDSS), or direct CDC consults. Data from these reporting systems serve as the basis for this report. RESULTS: CDC received 1,691 reported cases of malaria, including 1,688 cases classified as imported, one transfusion-related case, and two cryptic cases, with an onset of symptoms in 2010 among persons in the United States. The total number of cases represents an increase of 14% from the 1,484 cases reported for 2009. Plasmodium falciparum, P. vivax, P. malariae, and P. ovale were identified in 58%, 19%, 2%, and 2% of cases, respectively. Thirteen patients were infected by two or more species. The infecting species was unreported or undetermined in 18% of cases. Among the 898 cases in U.S. civilians for whom information on chemoprophylaxis use and travel area was known, 45 (5%) reported that they had followed and adhered to a chemoprophylactic drug regimen recommended by CDC for the areas to which they had traveled. Forty-one cases were reported in pregnant women, among whom only two (5%) adhered to chemoprophylaxis. Among all reported cases, 176 (10%) were classified as severe infections, of which nine were fatal. INTERPRETATION: The number of cases reported in 2010 marked the largest number of cases reported since 1980. Despite the apparent progress in reducing the global burden of malaria, many areas remain malaria endemic and the use of appropriate prevention measures by travelers is still inadequate. PUBLIC HEALTH ACTIONS: Travelers visiting friends and relatives (VFR) continue to be a difficult population to reach with effective malaria prevention strategies. Evidence-based prevention strategies that effectively target VFR travelers need to be developed and implemented to have a substantial impact on the numbers of imported malaria cases in the United States. A large number of pregnant travelers diagnosed with malaria did not take any chemoprophylaxis. Pregnant women traveling to areas in which malaria is endemic are at higher risk for severe malaria and must use appropriate malaria prevention strategies including chemoprophylaxis. Malaria prevention recommendations are available online (http://www.cdc.gov/malaria/travelers/drugs.html). Malaria infections can be fatal if not diagnosed and treated promptly with antimalarial medications appropriate for the patient's age and medical history, the likely country of malaria acquisition, and previous use of antimalarial chemoprophylaxis. Clinicians should consult the CDC Guidelines for Treatment of Malaria and contact the CDC's Malaria Hotline for case management advice, when needed. Malaria treatment recommendations can be obtained online (http://www.cdc.gov/malaria/diagnosis_treatment) or by calling the Malaria Hotline (770-488-7788 or toll-free at 855-856-4713).",2012-03-01 +21342592,UMARS: Un-MAppable Reads Solution.,"

Background

Un-MAppable Reads Solution (UMARS) is a user-friendly web service focusing on retrieving valuable information from sequence reads that cannot be mapped back to reference genomes. Recently, next-generation sequencing (NGS) technology has emerged as a powerful tool for generating high-throughput sequencing data and has been applied to many kinds of biological research. In a typical analysis, adaptor-trimmed NGS reads were first mapped back to reference sequences, including genomes or transcripts. However, a fraction of NGS reads failed to be mapped back to the reference sequences. Such un-mappable reads are usually imputed to sequencing errors and discarded without further consideration.

Methods

We are investigating possible biological relevance and possible sources of un-mappable reads. Therefore, we developed UMARS to scan for virus genomic fragments or exon-exon junctions of novel alternative splicing isoforms from un-mappable reads. For mapping un-mappable reads, we first collected viral genomes and sequences of exon-exon junctions. Then, we constructed UMARS pipeline as an automatic alignment interface.

Results

By demonstrating the results of two UMARS alignment cases, we show the applicability of UMARS. We first showed that the expected EBV genomic fragments can be detected by UMARS. Second, we also detected exon-exon junctions from un-mappable reads. Further experimental validation also ensured the authenticity of the UMARS pipeline. The UMARS service is freely available to the academic community and can be accessed via http://musk.ibms.sinica.edu.tw/UMARS/.

Conclusions

In this study, we have shown that some un-mappable reads are not caused by sequencing errors. They can originate from viral infection or transcript splicing. Our UMARS pipeline provides another way to examine and recycle the un-mappable reads that are commonly discarded as garbage.",2011-02-15 +21531769,A detailed investigation of accessibilities around target sites of siRNAs and miRNAs.,"

Motivation

The importance of RNA sequence analysis has been increasing since the discovery of various types of non-coding RNAs transcribed in animal cells. Conventional RNA sequence analyses have mainly focused on structured regions, which are stabilized by the stacking energies acting on adjacent base pairs. On the other hand, recent findings regarding the mechanisms of small interfering RNAs (siRNAs) and transcription regulation by microRNAs (miRNAs) indicate the importance of analyzing accessible regions where no base pairs exist. So far, relatively few studies have investigated the nature of such regions.

Results

We have conducted a detailed investigation of accessibilities around the target sites of siRNAs and miRNAs. We have exhaustively calculated the correlations between the accessibilities around the target sites and the repression levels of the corresponding mRNAs. We have computed the accessibilities with an originally developed software package, called 'Raccess', which computes the accessibility of all the segments of a fixed length for a given RNA sequence when the maximal distance between base pairs is limited to a fixed size W. We show that the computed accessibilities are relatively insensitive to the choice of the maximal span W. We have found that the efficacy of siRNAs depends strongly on the accessibility of the very 3'-end of their binding sites, which might reflect a target site recognition mechanism in the RNA-induced silencing complex. We also show that the efficacy of miRNAs has a similar dependence on the accessibilities, but some miRNAs also show positive correlations between the efficacy and the accessibilities in broad regions downstream of their putative binding sites, which might imply that the downstream regions of the target sites are bound by other proteins that allow the miRNAs to implement their functions. We have also investigated the off-target effects of an siRNA as a potential RNAi therapeutic. We show that the off-target effects of the siRNA have similar correlations to the miRNA repression, indicating that they are caused by the same mechanism.

Availability

The C++ source code of the Raccess software is available at http://www.ncrna.org/software/Raccess/ The microarray data on the measurements of the siRNA off-target effects are also available at the same site.

Contact

kiryu-h@k.u-tokyo.ac.jp",2011-04-29 +21281482,nocoRNAc: characterization of non-coding RNAs in prokaryotes.,"

Background

The interest in non-coding RNAs (ncRNAs) constantly rose during the past few years because of the wide spectrum of biological processes in which they are involved. This led to the discovery of numerous ncRNA genes across many species. However, for most organisms the non-coding transcriptome still remains unexplored to a great extent. Various experimental techniques for the identification of ncRNA transcripts are available, but as these methods are costly and time-consuming, there is a need for computational methods that allow the detection of functional RNAs in complete genomes in order to suggest elements for further experiments. Several programs for the genome-wide prediction of functional RNAs have been developed but most of them predict a genomic locus with no indication whether the element is transcribed or not.

Results

We present NOCORNAc, a program for the genome-wide prediction of ncRNA transcripts in bacteria. NOCORNAc incorporates various procedures for the detection of transcriptional features which are then integrated with functional ncRNA loci to determine the transcript coordinates. We applied RNAz and NOCORNAc to the genome of Streptomyces coelicolor and detected more than 800 putative ncRNA transcripts most of them located antisense to protein-coding regions. Using a custom design microarray we profiled the expression of about 400 of these elements and found more than 300 to be transcribed, 38 of them are predicted novel ncRNA genes in intergenic regions. The expression patterns of many ncRNAs are similarly complex as those of the protein-coding genes, in particular many antisense ncRNAs show a high expression correlation with their protein-coding partner.

Conclusions

We have developed NOCORNAc, a framework that facilitates the automated characterization of functional ncRNAs. NOCORNAc increases the confidence of predicted ncRNA loci, especially if they contain transcribed ncRNAs. NOCORNAc is not restricted to intergenic regions, but it is applicable to the prediction of ncRNA transcripts in whole microbial genomes. The software as well as a user guide and example data is available at http://www.zbit.uni-tuebingen.de/pas/nocornac.htm.",2011-01-31 +21406628,Effect of resting-state functional MR imaging duration on stability of graph theory metrics of brain network connectivity.,"

Purpose

To investigate the effect of resting-state (RS) functional magnetic resonance (MR) imaging blood oxygen level-dependent (BOLD) signal acquisition duration on stability of computed graph theory metrics of brain network connectivity.

Materials and methods

An institutional ethics committee approved this study, and informed consent was obtained. BOLD signal (7.5 minutes worth) was obtained from 30 subjects and truncated into 30-second time bins that ranged from 1.5 to 7.5 minutes. A binarized adjacency matrix for each subject and acquisition duration was generated at network costs between 0.1 and 0.5, where network cost is defined as the ratio of the number of edges (connections) in a network to the maximum possible number of edges. Measures of correlation coefficient stability associated with functional connectivity matrices (correlation coefficient standard deviation [SD] and correlation threshold) and associated graph theory metrics (small worldness, local efficiency, and global efficiency) were computed for each subject at each BOLD signal acquisition duration. Computations were implemented with a 15-node 30-core computer cluster to enable analysis of the approximately 2000 resulting brain networks. Analysis of variance and posthoc analyses were conducted to identify differences between time bins for each measure.

Results

Small worldness, local efficiency, and global efficiency stabilized after 2 minutes of BOLD signal acquisition, whereas correlation coefficient data from functional connectivity matrices (correlation coefficient SD and cost-associated threshold) stabilized after 5 minutes of BOLD signal acquisition.

Conclusion

Graph theory metrics of brain network connectivity (small worldness, local efficiency, and global efficiency) may be accurately computed from as little as 1.5-2.0 minutes of RS functional MR imaging BOLD signal. As such, implementation of these methods in the context of time-constrained clinical imaging protocols may be feasible and cost-effective.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11101708/-/DC1.",2011-03-15 +21248682,Antiviral agents for the treatment and chemoprophylaxis of influenza --- recommendations of the Advisory Committee on Immunization Practices (ACIP).,"This report updates previous recommendations by CDC's Advisory Committee on Immunization Practices (ACIP) regarding the use of antiviral agents for the prevention and treatment of influenza (CDC. Prevention and control of influenza: recommendations of the Advisory Committee on Immunization Practices [ACIP]. MMWR 2008;57[No. RR-7]).This report contains information on treatment and chemoprophylaxis of influenza virus infection and provides a summary of the effectiveness and safety of antiviral treatment medications. Highlights include recommendations for use of 1) early antiviral treatment of suspected or confirmed influenza among persons with severe influenza (e.g., those who have severe, complicated, or progressive illness or who require hospitalization); 2) early antiviral treatment of suspected or confirmed influenza among persons at higher risk for influenza complications; and 3) either oseltamivir or zanamivir for persons with influenza caused by 2009 H1N1 virus, influenza A (H3N2) virus, or influenza B virus or when the influenza virus type or influenza A virus subtype is unknown; 4) antiviral medications among children aged <1 year; 5) local influenza testing and influenza surveillance data, when available, to help guide treatment decisions; and 6) consideration of antiviral treatment for outpatients with confirmed or suspected influenza who do not have known risk factors for severe illness, if treatment can be initiated within 48 hours of illness onset. Additional information is available from CDC's influenza website at http://www.cdc.gov/flu, including any updates or supplements to these recommendations that might be required during the 2010-11 influenza season. Health-care providers should be alert to announcements of recommendation updates and should check the CDC influenza website periodically for additional information. Recommendations related to the use of vaccines for the prevention of influenza during the 2010-11 influenza season have been published previously (CDC. Prevention and control of influenza with vaccines: recommendations of the Advisory Committee on Immunization Practices [ACIP], 2010. MMWR 2010;59[No. RR-8]).",2011-01-01 +23912807,"Macromolecular crowding: chemistry and physics meet biology (Ascona, Switzerland, 10-14 June 2012).","More than 60 years of biochemical and biophysical studies have accustomed us to think of proteins as highly purified entities that act in isolation, more or less freely diffusing until they find their cognate partner to bind to. While in vitro experiments that reproduce these conditions largely remain the only way to investigate the intrinsic properties of molecules, this approach ignores an important factor: in their natural milieu , proteins are surrounded by several other molecules of different chemical nature, and this crowded environment can considerably modify their behaviour. About 40% of the cellular volume on average is occupied by all sorts of molecules. Furthermore, biological macromolecules live and operate in an extremely structured and complex environment within the cell (endoplasmic reticulum, Golgi apparatus, cytoskeletal structures, etc). Hence, to further complicate the picture, the interior of the cell is by no means a simply crowded medium, rather, a most crowded and confining one. In recent times, several approaches have been developed in the attempt to take into account important factors such as the ones mentioned above, at both theoretical and experimental levels, so that this field of research is now emerging as one of the most thriving in molecular and cell biology (see figure 1). [Formula: see text] Figure 1. Left: number of articles containing the word 'crowding' as a keyword limited to the biological and chemical science domains (source: ISI Web of Science). The arrow flags the 2003 'EMBO Workshop on Biological Implications of Macromolecular Crowding' (Embo, 2012). Right: number of citations to articles containing the word 'crowding' limited to the same domains (bars) and an exponential regression curve (source: Elsevier Scopus). To promote the importance of molecular crowding and confinement and provide researchers active in this field an interdisciplinary forum for meeting and exchanging ideas, we recently organized an international conference held in Ascona from 10 to 14 June 2012. In the unique scenario of the Maggiore lake and absorbed in the magic atmosphere of the Centro Stefano Franscini (CSF) at Monte Verità, we enjoyed three-and-a-half days of intense and inspiring activity, where not only many of the most prominent scientists working on macromolecular crowding, but also experts in closely related fields such as colloids and soft matter presented their work. The meeting was intended and has been organized to bring theoreticians and experimentalists together in the attempt to promote an active dialogue. Moreover, we wanted different disciplines to be represented, notably physics and chemistry, besides biology, as cross-fertilization is proving an increasingly fundamental source of inspiration and advancement. This issue of Physical Biology (PB) features a selection of the oral contributions presented at the conference, expanded in the form of research or review articles. PB, one of the scientific journals of the Institute of Physics (IOP), is one of the most dynamic and lively forums active at the interface between biology on one side, and physics and mathematics on the other. As its mission is stated by IOP, PB 'focuses on research in which physics-based approaches lead to new insights into biological systems at all scales of space and time, and all levels of complexity'. For these reasons, and also in view of its high reputation and broad readership, PB appears to be the ideal place for disseminating the thriving pieces of research presented at the conference. We are extremely grateful to PB and its kind and efficient editorial staff who helped make this issue a great scientific follow-up to the conference. The opening lecture of the conference, the first of four day-opening keynote lectures, was given by Allen P Minton from NIH (USA), possibly the most influential among the pioneers in the field. He provided a lucid and well-thought-out overview of the concept of macromolecular crowding through an exhaustive chronological account of the major milestones. It is clear that the concept of excluded volume as a key factor remains central to the concept of molecular crowding. As a consequence, simple descriptive paradigms borrowed essentially from colloid physics may still provide useful tools to understand the subtle effects of crowding and confinement in living matter. The contiguity between crowding, colloids and soft matter further emerged as an important concept in the course of the conference in several theoretical lectures and a few experimental ones. Dave Thirumalai, from the University of Maryland (USA), one of the most active theoreticians in the field of theoretical biophysics, outlined scaling theories, concepts from colloid literature and different simulation techniques to describe scenarios for crowding-induced changes in the structure and dynamics of proteins and RNA. In particular, he showed the importance of the shape of crowding particles in affecting folding oligomerization of amyloidogenic peptides. Johannes Schöneberg, from IMPRS, Mathematics Institute (Germany), illustrated ReaDDy , a newly developed particle-based simulation software tool for reaction-diffusion dynamics, developed in the group of Frank Noe at EMPRS. He showed that ReaDDy makes it possible to bridge the gap between soft matter and molecular dynamics (MD) simulations on the one hand and particle-based stochastic reaction-diffusion simulations on the other. We asked Johannes to organize a tutorial session to lead interested participants into the package and 'get their hands wet' under the guidance of the developers. The tutorial session was indeed successful and the broad possibilities offered by the simulation toolkit appeared to be clear to the participants. Paolo De Los Rios, from the Ecole Polytechnique Fédérale de Lausanne (EPFL, Switzerland), examined the complexity of the effects caused by crowding conditions from the point of view of statistical physics. Starting from a modification of the well-known Smoluchowski approach to calculate the encounter rate of diffusion-limited reactions, he showed how more realistic situations accounting for crowding effects could be treated equally well on the same theoretical grounds. This talk marked an important point in the conference as it reinforced the idea that simple models of theoretical physics still have the power to provide inspiring results in spite of the intrinsic simplifications of such theoretical approaches. Along the same lines, Nicolas Dorsaz, from the University of Cambridge (UK), proposed an extension of the Smoluchowski framework that incorporates repulsive and attracting interactions between the reactants. This approach was illustrated by reaction rates obtained from event-driven Brownian dynamics and dynamical Monte Carlo simulations. Another striking example of the physical subtleties associated with modelling crowding effects was provided by Jeffrey Skolnick, from the Georgia Institute of Technology (USA). He examined the role of hydrodynamic interactions in the self-organization of biological assemblies in the presence of crowding. His results strongly suggest that hydrodynamic interactions greatly affect the kinetics of self-assembly reactions, so that including them in the picture appears crucial for understanding the dynamics of biological systems in vivo . Margareth Cheung, from the University of Houston (USA), emphasized that how the crowded environment inside a cell affects the structural conformation of a protein with a spherical shape is a vital question because the geometry of proteins and protein-protein complexes are far from globules in vivo . Her work demonstrates the malleability of 'native' proteins and implies that crowding-induced shape changes may be important for protein function and malfunction in vivo . Huan-Xiang Zhou, from the Florida State University (USA), focused on atomistic simulations of protein folding and binding under crowding conditions. His lab has developed a post-processing method that allows the atomistic representation of proteins in folding and binding processes under crowding. A comparison with experimental results was also presented. Other lecturers pointed out that there are still aspects not entirely explored in the effects of both crowding and confinement. As suggested in the talk by Gary Pielak, from the University of North Carolina (USA), the currently used synthetic crowding agents are far from being satisfactory in replicating naturally occurring effects associated with crowded environments. For example, non-specific binding seems to play a subtle role in the cell, as natural macromolecules can induce both stabilization and destabilization when used as crowders. It is indeed possible to fine-tune the effect of proteins, as crowders, on the stability of other proteins. Another aspect that became clear is that new, more powerful methods need to be developed to study the effect of crowding, but even more to compare crowding and confinement. Indeed, it appeared clear from the lecture by Pierandrea Temussi, from the University of Naples (Italy), that a reliable comparison of the effects of crowding and confinement on the stability of proteins can only be based on the measurement of the whole stability curve of the same protein. Controversial aspects do not pertain only to the influence of crowding on protein stability, but also to aggregation phenomena in natural fluids. Domenico Sanfelice, from NIMR (London, UK), reported an interesting case of the apparent influence of crowding on aggregation. Hen egg white, a possible natural medium to study macromolecules in crowded conditions can dramatically increase the aggregation kinetics of proteins with an inbuilt tendency to associate. By carefully dissecting the phenomenology, it was shown that only part of this effect is due to crowding, while another factor playing an important role is the interaction with proteins from the milieu . In other words, high-molecular-weight glycoproteins can act as efficient molecular seeds for aggregation. A special topic of great relevance in the conference appeared to be the direct study of crowding in living systems. Alan Verkman, from the University of California, San Francisco (USA), one of the world's leading scientific personalities in the field of experimental investigation of crowding and confinement, was invited to give the second plenary lecture devoted to the experimental study of crowding effects in vivo . In his keynote lecture, Dr Verkman led us on a wide and compelling tour, exploring the main experimental approaches to study molecular crowding in and around cells. After a thorough examination of methods such as fluorescence recovery after photo-bleaching, fluorescence correlation spectroscopy, photo-activation localization microscopy and stochastic reconstruction microscopy, he concluded that the general consensus emerging from experimental studies is that the notion of universally anomalous diffusion in and around cells as a consequence of molecular crowding may not be correct, and that the slowing of diffusion in cells is less marked than has been widely assumed and can be simply described through a five- to sixfold reduction of the normal diffusion coefficient. A Soranno, from the University of Zürich (Switzerland), described how, by employing FRET measurements, it is possible to quantify the effect of molecular crowding on the dimensions of the highly charged, intrinsically disordered protein human prothymosin alpha. For a large variety of polymeric crowders (PEG, PVP, Ficoll, Dextran, PVA, PAA), a collapse of the polypeptide chain is observed with increasing polymer size and polymer concentration. The largest extent of collapse is observed for polymer radii comparable to the dimensions of the protein, in agreement with theoretical considerations. For his contribution, A Soranno was awarded the CSF Award for the best contributed talk. In his most inspiring talk, Clifford Brangwynne, from Princeton University (USA), drew attention to very important objects, namely Ribonucleoprotein (RNP) bodies. These are non-membrane-bound macromolecular assemblies that form from the dynamic interactions of RNA and proteins. The assembly of RNP bodies may sensitively depend on the biophysical features of the surrounding cytoplasm, including the degree of crowding, transport coefficients and mechanical properties. This dependency may have important implications for the RNA processing reactions involved in fundamental biological processes such as developmental cell growth. Remarkably, Brangwynne showed how RNPs behave in the cell as liquid droplets, pointing to a possible entirely new means that the cell could use to control and fine-tune its internal processes, in fact, more than that, a completely unexplored, new state of organization of living matter, and a functional one. Giuseppe Zaccai, from Institut Laue Langevin, Grenoble (France), showed that protein dynamics is more sensitive than structure to environmental factors such as crowding, solvent, temperature or pressure. Furthermore, he convincingly explained how neutron scattering provides unique experimental data to underpin MD calculations in this context. Following up on environment-induced modulations of protein functional dynamics, Ruth Nussinov, from Tel Aviv University (Israel), addressed the important problem of whether cellular signals can travel long distances in a crowded environment. She proposed a model based on the evolution of at least three properties: a modular functional organization of the cellular network, sequences in some key regions of proteins, such as linkers or loops, and compact interactions between proteins, possibly favoured by a crowded environment. The workshop ended on a keynote lecture by Jean-Marie Lehn, from the Université de Strasbourg (France). Lehn, 1987 Nobel Laureate in chemistry, offered a 'supramolecular view' of the field of molecular interactions. Supramolecular chemistry explores the design of systems undergoing self-organization , i.e. systems capable of generating well-defined functional supramolecular architectures by self-assembling from their components, thus behaving as programmed chemical systems . Chemistry may therefore be considered an information science , the science of informed matter. Supramolecular chemistry is intrinsically a dynamic chemistry in view of the ability of the interactions connecting the molecular components of a supramolecular entity and the resulting ability of supramolecular species to exchange their constituents. The same holds for molecular chemistry when the molecular entity contains covalent bonds that may form and break reversibly, so as to allow a continuous change in constitution by the reorganization and exchange of building blocks. These features define a constitutional dynamic chemistry (CDC) on both the molecular and supramolecular levels. CDC takes advantage of dynamic constitutional diversity to allow variation and selection in response to either internal or external factors to achieve adaptation . The merging of the features-information and programmability, dynamics and reversibility, constitution and structural diversity-points towards the emergence of adaptive and evolutive chemistry . The whole workshop could have not taken place without the help of the Centro Stefano Franscini. The CSF is the congress centre of the Swiss Federal Institute of Technology of Zurich (ETH Zurich) and has been situated at Monte Verità since 1989. It is an ideal meeting point for all members of the international scientific community who wish to discuss the state-of-the-art and new challenges of any field of research. The CSF supports 20-25 international conferences every year and, since 2010, up to ten winter doctoral schools1. The competence and professionalism of the staff were at the same level of beauty and inspiring character as that of Monte Verità. A meeting of this sort, if successful, leaves the audience with more open questions than settled answers, and this was definitely the case for Crowding 2012. Excluded volume is clearly a fundamental concept that has allowed crowding, a very familiar concept in soft matter, to enter into the domain of biological sciences. However, the complexity of the biological milieu calls for more refined descriptions. What is the role of electrostatic and electrodynamic interactions? What is the role of hydrodynamics interactions? To what extent does the strong spatial inhomogeneity (clustering of molecules, cellular compartmentalization, etc) have to be taken into account? Or, more generally, what are the minimal elements that prove crucial to describe reactions within a cell? How does the diffusion proceed (diffusion, slow diffusion, sub-diffusion) given that the experimental evidences are still controversial? In conclusion, we knew that allowing scientists with very different backgrounds and ideas to mingle was a hazardous attempt. Despite that, the workshop turned out to be a very successful experiment, which was highly enjoyed both by the participants and the organizers. Discussions sparked regularly among ever-changing groups, comprising senior scientists and students, despite the rather tight schedule, adding to the sense of fulfilment ignited by the outstanding level of the presentations. Given the success of the meeting Crowding 2012, a new event has been organized and will take place on the same themes during fall 2013, this time in the beautiful scenery of the Loire valley in France. The workshop 'Macromolecular crowding effects in cell biology: models and experiments' will be held on the CNRS campus in Orléans, France, on 24-25 October 2013. More information can be found on the workshop website: http://dirac.cnrs-orleans.fr/∼piazza/. 1Source: www.csf.ethz.ch/",2013-08-02 +22087863,Maximum a posteriori Bayesian estimation of mycophenolic Acid area under the concentration-time curve: is this clinically useful for dosage prediction yet?,"This review seeks to summarize the available data about Bayesian estimation of area under the plasma concentration-time curve (AUC) and dosage prediction for mycophenolic acid (MPA) and evaluate whether sufficient evidence is available for routine use of Bayesian dosage prediction in clinical practice. A literature search identified 14 studies that assessed the predictive performance of maximum a posteriori Bayesian estimation of MPA AUC and one report that retrospectively evaluated how closely dosage recommendations based on Bayesian forecasting achieved targeted MPA exposure. Studies to date have mostly been undertaken in renal transplant recipients, with limited investigation in patients treated with MPA for autoimmune disease or haematopoietic stem cell transplantation. All of these studies have involved use of the mycophenolate mofetil (MMF) formulation of MPA, rather than the enteric-coated mycophenolate sodium (EC-MPS) formulation. Bias associated with estimation of MPA AUC using Bayesian forecasting was generally less than 10%. However some difficulties with imprecision was evident, with values ranging from 4% to 34% (based on estimation involving two or more concentration measurements). Evaluation of whether MPA dosing decisions based on Bayesian forecasting (by the free website service https://pharmaco.chu-limoges.fr) achieved target drug exposure has only been undertaken once. When MMF dosage recommendations were applied by clinicians, a higher proportion (72-80%) of subsequent estimated MPA AUC values were within the 30-60 mg · h/L target range, compared with when dosage recommendations were not followed (only 39-57% within target range). Such findings provide evidence that Bayesian dosage prediction is clinically useful for achieving target MPA AUC. This study, however, was retrospective and focussed only on adult renal transplant recipients. Furthermore, in this study, Bayesian-generated AUC estimations and dosage predictions were not compared with a later full measured AUC but rather with a further AUC estimate based on a second Bayesian analysis. This study also provided some evidence that a useful monitoring schedule for MPA AUC following adult renal transplant would be every 2 weeks during the first month post-transplant, every 1-3 months between months 1 and 12, and each year thereafter. It will be interesting to see further validations in different patient groups using the free website service. In summary, the predictive performance of Bayesian estimation of MPA, comparing estimated with measured AUC values, has been reported in several studies. However, the next step of predicting dosages based on these Bayesian-estimated AUCs, and prospectively determining how closely these predicted dosages give drug exposure matching targeted AUCs, remains largely unaddressed. Further prospective studies are required, particularly in non-renal transplant patients and with the EC-MPS formulation. Other important questions remain to be answered, such as: do Bayesian forecasting methods devised to date use the best population pharmacokinetic models or most accurate algorithms; are the methods simple to use for routine clinical practice; do the algorithms actually improve dosage estimations beyond empirical recommendations in all groups that receive MPA therapy; and, importantly, do the dosage predictions, when followed, improve patient health outcomes?",2011-12-01 +21414234,ATAQS: A computational software tool for high throughput transition optimization and validation for selected reaction monitoring mass spectrometry.,"

Background

Since its inception, proteomics has essentially operated in a discovery mode with the goal of identifying and quantifying the maximal number of proteins in a sample. Increasingly, proteomic measurements are also supporting hypothesis-driven studies, in which a predetermined set of proteins is consistently detected and quantified in multiple samples. Selected reaction monitoring (SRM) is a targeted mass spectrometric technique that supports the detection and quantification of specific proteins in complex samples at high sensitivity and reproducibility. Here, we describe ATAQS, an integrated software platform that supports all stages of targeted, SRM-based proteomics experiments including target selection, transition optimization and post acquisition data analysis. This software will significantly facilitate the use of targeted proteomic techniques and contribute to the generation of highly sensitive, reproducible and complete datasets that are particularly critical for the discovery and validation of targets in hypothesis-driven studies in systems biology.

Result

We introduce a new open source software pipeline, ATAQS (Automated and Targeted Analysis with Quantitative SRM), which consists of a number of modules that collectively support the SRM assay development workflow for targeted proteomic experiments (project management and generation of protein, peptide and transitions and the validation of peptide detection by SRM). ATAQS provides a flexible pipeline for end-users by allowing the workflow to start or end at any point of the pipeline, and for computational biologists, by enabling the easy extension of java algorithm classes for their own algorithm plug-in or connection via an external web site.This integrated system supports all steps in a SRM-based experiment and provides a user-friendly GUI that can be run by any operating system that allows the installation of the Mozilla Firefox web browser.

Conclusions

Targeted proteomics via SRM is a powerful new technique that enables the reproducible and accurate identification and quantification of sets of proteins of interest. ATAQS is the first open-source software that supports all steps of the targeted proteomics workflow. ATAQS also provides software API (Application Program Interface) documentation that enables the addition of new algorithms to each of the workflow steps. The software, installation guide and sample dataset can be found in http://tools.proteomecenter.org/ATAQS/ATAQS.html.",2011-03-18 +21349865,All-atom knowledge-based potential for RNA structure prediction and assessment.,"

Motivation

Over the recent years, the vision that RNA simply serves as information transfer molecule has dramatically changed. The study of the sequence/structure/function relationships in RNA is becoming more important. As a direct consequence, the total number of experimentally solved RNA structures has dramatically increased and new computer tools for predicting RNA structure from sequence are rapidly emerging. Therefore, new and accurate methods for assessing the accuracy of RNA structure models are clearly needed.

Results

Here, we introduce an all-atom knowledge-based potential for the assessment of RNA three-dimensional (3D) structures. We have benchmarked our new potential, called Ribonucleic Acids Statistical Potential (RASP), with two different decoy datasets composed of near-native RNA structures. In one of the benchmark sets, RASP was able to rank the closest model to the X-ray structure as the best and within the top 10 models for ∼93 and ∼95% of decoys, respectively. The average correlation coefficient between model accuracy, calculated as the root mean square deviation and global distance test-total score (GDT-TS) measures of C3' atoms, and the RASP score was 0.85 and 0.89, respectively. Based on a recently released benchmark dataset that contains hundreds of 3D models for 32 RNA motifs with non-canonical base pairs, RASP scoring function compared favorably to ROSETTA FARFAR force field in the selection of accurate models. Finally, using the self-splicing group I intron and the stem-loop IIIc from hepatitis C virus internal ribosome entry site as test cases, we show that RASP is able to discriminate between known structure-destabilizing mutations and compensatory mutations.

Availability

RASP can be readily applied to assess all-atom or coarse-grained RNA structures and thus should be of interest to both developers and end-users of RNA structure prediction methods. The computer software and knowledge-based potentials are freely available at http://melolab.org/supmat.html.

Contact

fmelo@bio.puc.cl; mmarti@cipf.es

Supplementary information

Supplementary data are available at Bioinformatics online.",2011-02-23 +21223597,Characteristics of control group participants who increased their physical activity in a cluster-randomized lifestyle intervention trial.,"

Background

Meaningful improvement in physical activity among control group participants in lifestyle intervention trials is not an uncommon finding, and may be partly explained by participant characteristics. This study investigated which baseline demographic, health and behavioural characteristics were predictive of successful improvement in physical activity in usual care group participants recruited into a telephone-delivered physical activity and diet intervention trial, and descriptively compared these characteristics with those that were predictive of improvement among intervention group participants.

Methods

Data come from the Logan Healthy Living Program, a primary care-based, cluster-randomized controlled trial of a physical activity and diet intervention. Multivariable logistic regression models examined variables predictive of an improvement of at least 60 minutes per week of moderate-to-vigorous intensity physical activity among usual care (n = 166) and intervention group (n = 175) participants.

Results

Baseline variables predictive of a meaningful change in physical activity were different for the usual care and intervention groups. Being retired and completing secondary school (but no further education) were predictive of physical activity improvement for usual care group participants, whereas only baseline level of physical activity was predictive of improvement for intervention group participants. Higher body mass index and being unmarried may also be predictors of physical activity improvement for usual care participants.

Conclusion

This is the first study to examine differences in predictors of physical activity improvement between intervention group and control group participants enrolled in a physical activity intervention trial. While further empirical research is necessary to confirm findings, results suggest that participants with certain socio-demographic characteristics may respond favourably to minimal intensity interventions akin to the treatment delivered to participants in a usual care group. In future physical activity intervention trials, it may be possible to screen participants for baseline characteristics in order to target minimal-intensity interventions to those most likely to benefit. (Australian Clinical Trials Registry, http://www.anzctr.org.au/default.aspx, ACTRN012607000195459).",2011-01-11 +21371586,"Preparation of protein samples for NMR structure, function, and small-molecule screening studies.","In this chapter, we concentrate on the production of high-quality protein samples for nuclear magnetic resonance (NMR) studies. In particular, we provide an in-depth description of recent advances in the production of NMR samples and their synergistic use with recent advancements in NMR hardware. We describe the protein production platform of the Northeast Structural Genomics Consortium and outline our high-throughput strategies for producing high-quality protein samples for NMR studies. Our strategy is based on the cloning, expression, and purification of 6×-His-tagged proteins using T7-based Escherichia coli systems and isotope enrichment in minimal media. We describe 96-well ligation-independent cloning and analytical expression systems, parallel preparative scale fermentation, and high-throughput purification protocols. The 6×-His affinity tag allows for a similar two-step purification procedure implemented in a parallel high-throughput fashion that routinely results in purity levels sufficient for NMR studies (>97% homogeneity). Using this platform, the protein open reading frames of over 17,500 different targeted proteins (or domains) have been cloned as over 28,000 constructs. Nearly 5000 of these proteins have been purified to homogeneity in tens of milligram quantities (see Summary Statistics, http://nesg.org/statistics.html), resulting in more than 950 new protein structures, including more than 400 NMR structures, deposited in the Protein Data Bank. The Northeast Structural Genomics Consortium pipeline has been effective in producing protein samples of both prokaryotic and eukaryotic origin. Although this chapter describes our entire pipeline for producing isotope-enriched protein samples, it focuses on the major updates introduced during the last 5 years (Phase 2 of the National Institute of General Medical Sciences Protein Structure Initiative). Our advanced automated and/or parallel cloning, expression, purification, and biophysical screening technologies are suitable for implementation in a large individual laboratory or by a small group of collaborating investigators for structural biology, functional proteomics, ligand screening, and structural genomics research.",2011-01-01 +21273522,Differentiation of malignant and benign pulmonary nodules with quantitative first-pass 320-detector row perfusion CT versus FDG PET/CT.,"

Purpose

To prospectively compare the capability of quantitative first-pass perfusion 320-detector row computed tomography (CT) (ie, area-detector CT) with that of combined positron emission tomography and CT (PET/CT) for differentiation between malignant and benign pulmonary nodules.

Materials and methods

This prospective study was approved by the institutional review board, and written informed consent was obtained from 50 consecutive patients with 76 pulmonary nodules. All patients underwent dynamic area-detector CT, PET/CT, and microbacterial and/or histopathologic examinations. All pulmonary nodules were divided into three groups: malignant nodules (n = 43), benign nodules with low biologic activity (n = 6), and benign nodules with high biologic activity (n = 27). For each dynamic area-detector CT data set, the perfusion derived by using the maximum slope model (PF(MS)), extraction fraction derived by using the Patlak plot model (EF(PP)), and blood volume derived by using the Patlak plot model (BV(PP)) were calculated. These parameters were statistically compared among the three nodule groups. Receiver operating characteristic (ROC) analyses were used to compare the diagnostic capability of the CT and PET/CT indexes. Finally, the sensitivity, specificity, and accuracy of each index were compared by using the McNemar test.

Results

All indexes in the malignant nodule group were significantly different from those in the low-biologic-activity benign nodule group (P < .05). Areas under the ROC curve for PF(MS) and EF(PP) were significantly larger than those for BV(PP) (P < .05) and maximal standard uptake value (SUV(max)) (P < .05). The specificity and accuracy of PF(MS) and EF(PP) were significantly higher than those of BV(PP) and SUV(max) (P < .05).

Conclusion

Dynamic first-pass area-detector perfusion CT has the potential to be more specific and accurate than PET/CT for differentiating malignant from benign pulmonary nodules.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.10100245/-/DC1.",2011-02-01 +23000707,Soft tissue management for dental implants: what are the most effective techniques? A Cochrane systematic review.,"

Unlabelled

This review is based on a Cochrane systematic review entitled 'Interventions for replacing missing teeth: management of soft tissues for dental implants' published in The Cochrane Library (see http:// www.cochrane.org/ for information). Cochrane systematic reviews are regularly updated to include new research, and in response to comments and criticisms from readers. If you wish to comment on this review, please send your comments to the Cochrane website or to Marco Esposito. The Cochrane Library should be consulted for the most recent version of the review. The results of a Cochrane review can be interpreted differently, depending on people's perspectives and circumstances. Please consider the conclusions presented carefully. They are the opinions of the review authors, and are not necessarily shared by the Cochrane Collaboration.

Purpose

To evaluate whether flapless procedures are beneficial for patients and which is the ideal flap design, whether soft tissue correction/augmentation techniques are beneficial for patients and which are the best techniques, whether techniques to increase the peri-implant keratinised mucosa are beneficial for patients and which are the best techniques, and which are the best suturing techniques/ materials.

Materials and methods

The Cochrane Oral Health Group's Trials Register, CENTRAL, MEDLINE and EMBASE were searched up to the 9th of June 2011 for randomised controlled trials (RCTs) of rootform osseointegrated dental implants, with a follow-up of at least 6 months after function, comparing various techniques to handle soft tissues in relation to dental implants. Primary outcome measures were prosthetic failures, implant failures and biological complications. Screening of eligible studies, assessment of the methodological quality of the trials and data extraction were conducted at least in duplicate and independently by two or more review authors. The statistical unit was the patient and not the prosthesis, the procedure or the implant. RESULTS were expressed using risk ratios for dichotomous outcomes and mean differences for continuous outcomes with 95% confidence intervals (CI).

Results

Seventeen potentially eligible RCTs were identified but only six trials with 138 patients in total could be included. The following techniques were compared in the six included studies: flapless placement of dental implants versus conventional flap elevation (2 trials, 56 patients), crestal versus vestibular incisions (1 trial, 10 patients), Erbium:YAG laser versus flap elevation at the second-stage surgery for implant exposure (1 trial, 20 patients), whether a connective tissue graft at implant placement could be effective in augmenting peri-implant tissues (1 split-mouth trial, 10 patients), and autograft versus an animal-derived collagen matrix to increase the height of the keratinised mucosa (1 trial, 40 patients). On a patient rather than per implant basis, implants placed with a flapless technique and implant exposures performed with laser lead to statistically significantly less postoperative pain than flap elevation. Sites augmented with soft tissue connective grafts had better aesthetics and thicker tissues. Both palatal autografts or the use of a porcine-derived collagen matrix are effective in increasing the height of keratinised mucosa at the cost of a 0.5 mm recession of peri-implant soft tissues. There were no other statistically significant differences for any of the remaining analyses.

Conclusions

There is limited weak evidence suggesting that flapless implant placement is feasible and has been shown to reduce patient postoperative discomfort in adequately selected patients, that augmentation at implant sites with soft tissue grafts is effective in increasing soft tissue thickness and improving aesthetics, and that one technique to increase the height of keratinised mucosa using autografts or an animal-derived collagen matrix was able to achieve its goal but at the cost of a worsened aesthetic outcome (0.5 mm of recession). There is insufficient reliable evidence to provide recommendations on which is the ideal flap design, the best soft tissue augmentation technique, whether techniques to increase the width of keratinised/attached mucosa are beneficial to patients or not, and which are the best incision/suture techniques/materials. Properly designed and conducted RCTs, with at least 6 months of follow-up, are needed to provide reliable answers to these questions.",2012-01-01 +21214904,PICS-Ord: unlimited coding of ambiguous regions by pairwise identity and cost scores ordination.,"

Background

We present a novel method to encode ambiguously aligned regions in fixed multiple sequence alignments by 'Pairwise Identity and Cost Scores Ordination' (PICS-Ord). The method works via ordination of sequence identity or cost scores matrices by means of Principal Coordinates Analysis (PCoA). After identification of ambiguous regions, the method computes pairwise distances as sequence identities or cost scores, ordinates the resulting distance matrix by means of PCoA, and encodes the principal coordinates as ordered integers. Three biological and 100 simulated datasets were used to assess the performance of the new method.

Results

Including ambiguous regions coded by means of PICS-Ord increased topological accuracy, resolution, and bootstrap support in real biological and simulated datasets compared to the alternative of excluding such regions from the analysis a priori. In terms of accuracy, PICS-Ord performs equal to or better than previously available methods of ambiguous region coding (e.g., INAASE), with the advantage of a practically unlimited alignment size and increased analytical speed and the possibility of PICS-Ord scores to be analyzed together with DNA data in a partitioned maximum likelihood model.

Conclusions

Advantages of PICS-Ord over step matrix-based ambiguous region coding with INAASE include a practically unlimited number of OTUs and seamless integration of PICS-Ord codes into phylogenetic datasets, as well as the increased speed of phylogenetic analysis. Contrary to word- and frequency-based methods, PICS-Ord maintains the advantage of pairwise sequence alignment to derive distances, and the method is flexible with respect to the calculation of distance scores. In addition to distance and maximum parsimony, PICS-Ord codes can be analyzed in a Bayesian or maximum likelihood framework. RAxML (version 7.2.6 or higher that was developed for this study) allows up to 32-state ordered or unordered characters. A GTR, MK, or ORDERED model can be applied to analyse the PICS-Ord codes partition, with GTR performing slightly better than MK and ORDERED.

Availability

An implementation of the PICS-Ord algorithm is available from http://scit.us/projects/ngila/wiki/PICS-Ord. It requires both the statistical software, R http://www.r-project.org and the alignment software Ngila http://scit.us/projects/ngila.",2011-01-07 +21266061,Annotation-based genome-wide SNP discovery in the large and complex Aegilops tauschii genome using next-generation sequencing without a reference genome sequence.,"

Background

Many plants have large and complex genomes with an abundance of repeated sequences. Many plants are also polyploid. Both of these attributes typify the genome architecture in the tribe Triticeae, whose members include economically important wheat, rye and barley. Large genome sizes, an abundance of repeated sequences, and polyploidy present challenges to genome-wide SNP discovery using next-generation sequencing (NGS) of total genomic DNA by making alignment and clustering of short reads generated by the NGS platforms difficult, particularly in the absence of a reference genome sequence.

Results

An annotation-based, genome-wide SNP discovery pipeline is reported using NGS data for large and complex genomes without a reference genome sequence. Roche 454 shotgun reads with low genome coverage of one genotype are annotated in order to distinguish single-copy sequences and repeat junctions from repetitive sequences and sequences shared by paralogous genes. Multiple genome equivalents of shotgun reads of another genotype generated with SOLiD or Solexa are then mapped to the annotated Roche 454 reads to identify putative SNPs. A pipeline program package, AGSNP, was developed and used for genome-wide SNP discovery in Aegilops tauschii-the diploid source of the wheat D genome, and with a genome size of 4.02 Gb, of which 90% is repetitive sequences. Genomic DNA of Ae. tauschii accession AL8/78 was sequenced with the Roche 454 NGS platform. Genomic DNA and cDNA of Ae. tauschii accession AS75 was sequenced primarily with SOLiD, although some Solexa and Roche 454 genomic sequences were also generated. A total of 195,631 putative SNPs were discovered in gene sequences, 155,580 putative SNPs were discovered in uncharacterized single-copy regions, and another 145,907 putative SNPs were discovered in repeat junctions. These SNPs were dispersed across the entire Ae. tauschii genome. To assess the false positive SNP discovery rate, DNA containing putative SNPs was amplified by PCR from AL8/78 and AS75 and resequenced with the ABI 3730 xl. In a sample of 302 randomly selected putative SNPs, 84.0% in gene regions, 88.0% in repeat junctions, and 81.3% in uncharacterized regions were validated.

Conclusion

An annotation-based genome-wide SNP discovery pipeline for NGS platforms was developed. The pipeline is suitable for SNP discovery in genomic libraries of complex genomes and does not require a reference genome sequence. The pipeline is applicable to all current NGS platforms, provided that at least one such platform generates relatively long reads. The pipeline package, AGSNP, and the discovered 497,118 Ae. tauschii SNPs can be accessed at (http://avena.pw.usda.gov/wheatD/agsnp.shtml).",2011-01-25 +21406627,Small (≤ 20 mm) pancreatic adenocarcinomas: analysis of enhancement patterns and secondary signs with multiphasic multidetector CT.,"

Purpose

To evaluate the enhancement patterns, prevalence of secondary signs, and histopathologic features of 20-mm-diameter or smaller pancreatic cancers seen on multiphasic multidetector computed tomographic (CT) images.

Materials and methods

This retrospective study was approved by the institutional review board; the requirement for informed consent was waived. From January 2002 through September 2009, the authors reviewed the clinical and imaging data of 130 consecutive patients (76 men, 54 women; mean age, 64.1 years; age range, 28-82 years) who had surgically proven 30-mm-diameter or smaller pancreatic cancers and underwent preoperative multidetector CT and 33 consecutive patients (17 men, 16 women; mean age, 65.1 years; age range, 48-84 years) who had histopathologically proven pancreatic cancer and underwent incidental multidetector CT before the diagnosis was rendered. Only pancreatic phase CT was performed in two patients, and only hepatic venous phase CT was performed in nine patients. Two radiologists in consensus classified the tumor attenuation as hyper-, iso-, or hypoattenuation during the pancreatic and hepatic venous phases. Accompanying secondary signs, temporal changes in tumor attenuation, and histopathologic findings also were analyzed. The Fisher exact test, χ(2) test, generalized estimating equation, and Student t test were used to compare the variables.

Results

Seventy tumors were 20 mm or smaller, and 93 were 21-30 mm. Isoattenuating pancreatic cancers were more commonly observed among the 20-mm or smaller tumors (16 of 59, 27%) than among the 21-30-mm tumors (12 of 93, 13%) (P = .033). They were also more common among well-differentiated tumors (seven of 12, 58%) than among moderately differentiated (20 of 124, 16%) and poorly differentiated (one of 10, 10%) tumors (P = .001). The prevalence of secondary signs differed significantly according to tumor size (53 [76%] of 70 ≤20-mm tumors vs 92 [99%] of 93 21-30-mm tumors) (P < .001). The prevalence of secondary signs was high among isoattenuating pancreatic cancers (14 [88%] of 16 ≤20-mm tumors vs all 12 [100%] 21-30-mm tumors). Most of the isoattenuating tumors seen at prediagnostic CT were hypoattenuating after 6 months (100% [four of four] during pancreatic phase, 71% [five of seven] during hepatic venous phase).

Conclusion

The prevalence of isoattenuating pancreatic cancers differed significantly according to tumor size and cellular differentiation. Most small isoattenuating pancreatic cancers showed secondary signs.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11101133/-/DC1.",2011-03-15 +21205295,Involvement of aryl hydrocarbon receptor signaling in the development of small cell lung cancer induced by HPV E6/E7 oncoproteins.,"

Background

Lung cancers consist of four major types that and for clinical-pathological reasons are often divided into two broad categories: small cell lung cancer (SCLC) and non-small cell lung cancer (NSCLC). All major histological types of lung cancer are associated with smoking, although the association is stronger for SCLC and squamous cell carcinoma than adenocarcinoma. To date, epidemiological studies have identified several environmental, genetic, hormonal and viral factors associated with lung cancer risk. It has been estimated that 15-25% of human cancers may have a viral etiology. The human papillomavirus (HPV) is a proven cause of most human cervical cancers, and might have a role in other malignancies including vulva, skin, oesophagus, head and neck cancer. HPV has also been speculated to have a role in the pathogenesis of lung cancer. To validate the hypothesis of HPV involvement in small cell lung cancer pathogenesis we performed a gene expression profile of transgenic mouse model of SCLC induced by HPV-16 E6/E7 oncoproteins.

Methods

Gene expression profile of SCLC has been performed using Agilent whole mouse genome (4 × 44k) representing ~ 41000 genes and mouse transcripts. Samples were obtained from two HPV16-E6/E7 transgenic mouse models and from littermate's normal lung. Data analyses were performed using GeneSpring 10 and the functional classification of deregulated genes was performed using Ingenuity Pathway Analysis (Ingenuity® Systems, http://www.ingenuity.com).

Results

Analysis of deregulated genes induced by the expression of E6/E7 oncoproteins supports the hypothesis of a linkage between HPV infection and SCLC development. As a matter of fact, comparison of deregulated genes in our system and those in human SCLC showed that many of them are located in the Aryl Hydrocarbon Receptor Signal transduction pathway.

Conclusions

In this study, the global gene expression of transgenic mouse model of SCLC induced by HPV-16 E6/E7 oncoproteins led us to identification of several genes involved in SCLC tumor development. Furthermore, our study revealed that the Aryl Hydrocarbon Receptor Signaling is the primarily affected pathway by the E6/E7 oncoproteins expression and that this pathway is also deregulated in human SCLC. Our results provide the basis for the development of new therapeutic approaches against human SCLC.",2011-01-04 +21279663,Histologic and biomechanical evaluation of a novel macroporous polytetrafluoroethylene knit mesh compared to lightweight and heavyweight polypropylene mesh in a porcine model of ventral incisional hernia repair.,"

Purpose

To evaluate the biocompatibility of heavyweight polypropylene (HWPP), lightweight polypropylene (LWPP), and monofilament knit polytetrafluoroethylene (mkPTFE) mesh by comparing biomechanics and histologic response at 1, 3, and 5 months in a porcine model of incisional hernia repair.

Methods

Bilateral full-thickness abdominal wall defects measuring 4 cm in length were created in 27 Yucatan minipigs. Twenty-one days after hernia creation, animals underwent bilateral preperitoneal ventral hernia repair with 8 × 10 cm pieces of mesh. Repairs were randomized to Bard(®)Mesh (HWPP, Bard/Davol, http://www.davol.com), ULTRAPRO(®) (LWPP, Ethicon, http://www.ethicon.com), and GORE(®)INFINIT Mesh (mkPTFE, Gore & Associates, http://www.gore.com). Nine animals were sacrificed at each timepoint (1, 3, and 5 months). At harvest, a 3 × 4 cm sample of mesh and incorporated tissue was taken from the center of the implant site and subjected to uniaxial tensile testing at a rate of 0.42 mm/s. The maximum force (N) and tensile strength (N/cm) were measured with a tensiometer, and stiffness (N/mm) was calculated from the slope of the force-versus-displacement curve. Adjacent sections of tissue were stained with hematoxylin and eosin (H&E) and analyzed for inflammation, fibrosis, and tissue ingrowth. Data are reported as mean ± SEM. Statistical significance (P < 0.05) was determined using a two-way ANOVA and Bonferroni post-test.

Results

No significant difference in maximum force was detected between meshes at any of the time points (P > 0.05 for all comparisons). However, for each mesh type, the maximum strength at 5 months was significantly lower than that at 1 month (P < 0.05). No significant difference in stiffness was detected between the mesh types or between timepoints (P > 0.05 for all comparisons). No significant differences with regard to inflammation, fibrosis, or tissue ingrowth were detected between mesh types at any time point (P > 0.09 for all comparisons). However, over time, inflammation decreased significantly for all mesh types (P < 0.001) and tissue ingrowth reached a slight peak between 1 and 3 months (P = 0.001) but did not significantly change thereafter (P > 0.09).

Conclusions

The maximum tensile strength of mesh in the abdominal wall decreased over time for HWPP, LWPP, and mkPTFE mesh materials alike. This trend may actually reflect inability to adequately grip specimens at later time points rather than any mesh-specific trend. Histologically, inflammation decreased with time (P = 0.000), and tissue ingrowth increased (P = 0.019) for all meshes. No specific trends were observed between the polypropylene meshes and the monofilament knit PTFE, suggesting that this novel construction may be a suitable alternative to existing polypropylene meshes.",2011-01-30 +21420908,"Selective digestive tract decontamination and selective oropharyngeal decontamination and antibiotic resistance in patients in intensive-care units: an open-label, clustered group-randomised, crossover study.","

Background

Previously, we assessed selective digestive tract decontamination (SDD) and selective oropharyngeal decontamination (SOD) on survival and prevention of bacteraemia in patients in intensive-care units. In this analysis, we aimed to assess effectiveness of these interventions for prevention of respiratory tract colonisation and bacteraemia with highly resistant microorganisms acquired in intensive-care units.

Methods

We did an open-label, clustered group-randomised, crossover study in 13 intensive-care units in the Netherlands between May, 2004, and July, 2006. Participants admitted to intensive-care units with an expected duration of mechanical ventilation of more than 48 h or an expected stay of more than 72 h received SOD (topical tobramycin, colistin, and amphotericin B in the oropharynx), SDD (SOD antibiotics in the oropharynx and stomach plus 4 days' intravenous cefotaxime), or standard care. The computer-randomised order of study regimens was applied by an independent clinical pharmacist who was masked to intensive-care-unit identity. We calculated crude odds ratios (95% CI) for rates of bacteraemia or respiratory tract colonisation with highly resistant microorganisms in patients who stayed in intensive-care units for more than 3 days (ie, acquired infection). This trial is registered at http://isrctn.org, number ISRCTN35176830.

Findings

Data were available for 5927 (>99%) of 5939 patients, of whom 5463 (92%) were in intensive-care units for more than 3 days. 239 (13%) of 1837 patients in standard care acquired bacteraemia after 3 days, compared with 158 (9%) of 1758 in SOD (odds ratio 0·66, 95% CI 0·53-0·82), and 124 (7%) of 1868 in SDD (0·48, 0·38-0·60). Eight patients acquired bacteraemia with highly resistant microorganisms during SDD, compared with 18 patients (with 19 episodes) during standard care (0·41, 0·18-0·94; rate reduction [RR] 59%, absolute risk reduction [ARR] 0·6%) and 20 during SOD (0·37, 0·16-0·85; RR 63%, ARR 0·7%). Of the patients staying in intensive-care units for more than 3 days, we obtained endotracheal aspirate cultures for 881 (49%) patients receiving standard care, 886 (50%) receiving SOD, and 828 (44%) receiving SDD. 128 (15%) patients acquired respiratory tract colonisation with highly resistant microorganisms during standard care, compared with 74 (8%) during SDD (0·58, 0·43-0·78; RR 38%, ARR 5·5%) and 88 (10%) during SOD (0·65, 0·49-0·87; RR 32%, ARR 4·6%). Acquired respiratory tract colonisation with Gram-negative bacteria or cefotaxime-resistant and colistin-resistant pathogens was lowest during SDD.

Interpretation

Widespread use of SDD and SOD in intensive-care units with low levels of antibiotic resistance is justified.

Funding

None.",2011-03-21 +21334061,"Comparison of adaptive pacing therapy, cognitive behaviour therapy, graded exercise therapy, and specialist medical care for chronic fatigue syndrome (PACE): a randomised trial.","

Background

Trial findings show cognitive behaviour therapy (CBT) and graded exercise therapy (GET) can be effective treatments for chronic fatigue syndrome, but patients' organisations have reported that these treatments can be harmful and favour pacing and specialist health care. We aimed to assess effectiveness and safety of all four treatments.

Methods

In our parallel-group randomised trial, patients meeting Oxford criteria for chronic fatigue syndrome were recruited from six secondary-care clinics in the UK and randomly allocated by computer-generated sequence to receive specialist medical care (SMC) alone or with adaptive pacing therapy (APT), CBT, or GET. Primary outcomes were fatigue (measured by Chalder fatigue questionnaire score) and physical function (measured by short form-36 subscale score) up to 52 weeks after randomisation, and safety was assessed primarily by recording all serious adverse events, including serious adverse reactions to trial treatments. Primary outcomes were rated by participants, who were necessarily unmasked to treatment assignment; the statistician was masked to treatment assignment for the analysis of primary outcomes. We used longitudinal regression models to compare SMC alone with other treatments, APT with CBT, and APT with GET. The final analysis included all participants for whom we had data for primary outcomes. This trial is registered at http://isrctn.org, number ISRCTN54285094.

Findings

We recruited 641 eligible patients, of whom 160 were assigned to the APT group, 161 to the CBT group, 160 to the GET group, and 160 to the SMC-alone group. Compared with SMC alone, mean fatigue scores at 52 weeks were 3·4 (95% CI 1·8 to 5·0) points lower for CBT (p = 0·0001) and 3·2 (1·7 to 4·8) points lower for GET (p = 0·0003), but did not differ for APT (0·7 [-0·9 to 2·3] points lower; p = 0·38). Compared with SMC alone, mean physical function scores were 7·1 (2·0 to 12·1) points higher for CBT (p = 0·0068) and 9·4 (4·4 to 14·4) points higher for GET (p = 0·0005), but did not differ for APT (3·4 [-1·6 to 8·4] points lower; p=0·18). Compared with APT, CBT and GET were associated with less fatigue (CBT p = 0·0027; GET p = 0·0059) and better physical function (CBT p=0·0002; GET p<0·0001). Subgroup analysis of 427 participants meeting international criteria for chronic fatigue syndrome and 329 participants meeting London criteria for myalgic encephalomyelitis yielded equivalent results. Serious adverse reactions were recorded in two (1%) of 159 participants in the APT group, three (2%) of 161 in the CBT group, two (1%) of 160 in the GET group, and two (1%) of 160 in the SMC-alone group.

Interpretation

CBT and GET can safely be added to SMC to moderately improve outcomes for chronic fatigue syndrome, but APT is not an effective addition.

Funding

UK Medical Research Council, Department of Health for England, Scottish Chief Scientist Office, Department for Work and Pensions.",2011-02-18 +21508921,"Malaria surveillance--United States, 2009.","

Problem/condition

Malaria in humans is caused by intraerythrocytic protozoa of the genus Plasmodium. These parasites are transmitted by the bite of an infective female Anopheles mosquito. The majority of malaria infections in the United States occur among persons who have traveled to areas with ongoing malaria transmission. In the United States, cases can occur through exposure to infected blood products, congenital transmission, or local mosquitoborne transmission. Malaria surveillance is conducted to identify episodes of local transmission and to guide prevention recommendations for travelers.

Period covered

This report summarizes cases in persons with onset of illness in 2009 and summarizes trends during previous years.

Description of system

Malaria cases diagnosed by blood film, polymerase chain reaction or rapid diagnostic tests are mandated to be reported to local and state health departments by health-care providers or laboratory staff. Case investigations are conducted by local and state health departments, and reports are transmitted to CDC through the National Malaria Surveillance System (NMSS), National Notifiable Diseases Surveillance System (NNDSS), or direct CDC consults. Data from these reporting systems serve as the basis for this report.

Results

CDC received reports of 1,484 cases of malaria, including two transfusion-related cases, three possible congenital cases, one transplant case and four fatal cases, with an onset of symptoms in 2009 among persons in the United States. This number represents an increase of 14% from the 1,298 cases reported for 2008. Plasmodium falciparum, P. vivax, P. malariae, and P. ovale were identified in 46%, 11%, 2%, and 2% of cases, respectively. Thirteen patients were infected by two or more species. The infecting species was unreported or undetermined in 38% of cases. Among the 1,484 cases 1,478 were classified as imported. Among the 103 U.S. civilians for whom information on chemoprophylaxis use and travel area was known, only 34 (33%) reported that they had followed and adhered to a chemoprophylactic drug regimen recommended by CDC for the area to which they had traveled. Nineteen cases were reported in pregnant women, among whom none adhered to chemoprophylaxis. Almost 22% of the cases among pregnant women were treated with an inappropriate treatment drug regimen, of which 39% were among cases with either a P. vivax or P. ovale infection where primaquine was not taken. Among all the reasons for travel, travelers visiting friends and relatives (VFR) and missionaries were the groups with the lowest proportion of chemoprophylexis use.

Interpretation

A notable increase in the number of malaria cases was reported from 2008 to 2009; however, the number of cases in 2009 is consistent with the average number of cases reported during the preceding 4 years. In the majority of reported cases, U.S. civilians who acquired infection abroad had not adhered to a chemoprophylaxis regimen that was appropriate for the country in which they acquired malaria. Furthermore, treatment of malaria, while appropriate for the majority of cases, was insufficient for a large number of P. vivax and P. ovale infections, putting patients at risk for relapsing malaria.

Public health actions

Decreasing the number of malaria cases in subsequent years will require conveying the importance of adhering to appropriate preventive measures for malaria specifically targeting travelers visiting friends and relatives, missionary, and pregnant populations. Clinicians require education on the need to encourage use of malaria prophylaxis and need further information on the appropriate diagnostic and treatment guidelines for malaria. Malaria prevention recommendations are available online (http://www.cdc.gov/malaria/travelers/ or http://wwwnc.cdc.gov/travel/yellowbook/2010/chapter-2/malaria.aspx#990). Malaria infections can be fatal if not diagnosed and treated promptly with antimalarial medications appropriate for the individual patient's age and medical history, the likely site of malaria acquisition, and previous use of antimalarial chemoprophylaxis. Clinicians should consult the CDC Guidelines for Treatment and contact the CDC's Malaria Hotline for case management advisement when needed. Malaria treatment recommendations can be obtained online (http://www.cdc.gov/malaria/diagnosis_treatment) or by calling the Malaria Hotline (770-488-7788).",2011-04-01 +21290625,"64Cu-1,4,7-Triazacyclononane-1,4-diacetic acid-9-aminonanoic acid-Gln-Trp-Ala-Val-Gly-His-Leu-Met-NH2","64Cu-1,4,7-Triazacyclononane-1,4-diacetic acid (NO2A)-9-aminonanoic acid (9-Anc)-Gln-Trp-Ala-Val-Gly-His-Leu-Met-NH2 (BBN(7–14)NH2), abbreviated as 64Cu-NO2A-(9-Anc)-BBN(7–14)NH2, is a bombesin (BBN)-based, 64Cu-NO2A-conjugated peptide that was synthesized by Lane et al. for use in positron emission tomography (PET) of tumors expressing gastrin-releasing peptide receptor (GRPR) (1, 2). GRPR is a glycosylated G-protein–coupled receptor that is normally expressed in non-neuroendocrine tissues of the breast and pancreas and in neuroendocrine cells of the brain, gastrointestinal tract, lung, and prostate (3, 4). GRPR has been found to be overexpressed in various human tumors, and a large number of BBN analogs have been investigated for GRPR-targeted imaging and therapy (5, 6). These analogs have been synthesized on the basis of either truncated BBN (BBN(6–14) or BBN(7–14)) or full-length BBN(1–14) (7, 8). Chelators and spacers have been used frequently for chelating metals and for improving the kinetics of conjugates (9-11). 64Cu is a radiometal with potential applications in diagnostic and therapeutic nuclear medicine. The half-life for 64Cu (t1/2 = 12.7 h) is long enough for drug preparation, quality control, imaging, and therapy (12, 13). However, use of 64Cu is limited by issues of in vivo transchelation to proteins found in blood and liver (such as superoxide dismutase) (1). A variety of chelators have been investigated for the purpose of stably chelating 64Cu (13). In general, 64Cu-labeled 1,4,7,10-tetraazacyclodecane-1,4,7,10-tetraacetic acid (64Cu-DOTA) and 64Cu-labeled 1,4,8,11-tetraazacyclotetradecane-1,4,8,11-tetraacetic acid (64Cu-TETA) exhibit high uptake and retention in nontarget organs, which limits their application. Cross-bridged (CB) analogs, such as CB-DO2A ((1,4,7,10-tetraazabicyclo[5.5.2]tetradecane-4,10-diyl)diacetic acid), CB-TE2A ((1,4,8,11-tetraazabicyclo[6.6.2]hexadecane-4,11-diyl)diacetic acid), SarAr (1-N-(4-aminobenzyl)-3,6,10,13,16,19-hexa-aza-bicyclo-[6.6.6]eichosane-1,8-diamine), and NOTA (1,4,7-triazacyclononane-1,4,7-triacetic acid), demonstrate improved copper containment by enhancing the ligand's rigidity (2, 14). Prasanphanich et al. recently reported that the NOTA-based 64Cu-NOTA-8-Aoc-BBN(7–14)NH2 conjugate (where 8-Aoc = 8-aminooctanoic acid) exhibited decreased accumulation in hepatic tissue as compared with other chelator-based (DOTA, TETA, and CB-TE2A) conjugates (2, 14). To improve the tumor uptake and maintain the good pharmacokinetic properties of the 64Cu-NOTA-8-Aoc-BBN(7–14)NH2 conjugate, Lane et al. synthesized a new group of conjugates with the NOTA derivative NO2A and replaced the spacer 8-Aoc with an aliphatic or aromatic linking (1). These conjugates were abbreviated as 64Cu-NO2A-(X)-BBN(7–14)NH2, where X denotes the pharmacokinetic modifier, such as AMBA (para-aminobenzoic acid), β-Ala (beta-alanine), 5-Ava (5-aminovaleric acid), 6-Ahx (6-aminohexanoic acid), 8-Aoc, and 9-Anc. The β-Ala, 5-Ava, 6-Ahx, and 9-Anc are aliphatic pharmacokinetic modifiers, ranging from three to nine carbons in length, whereas AMBA is an aromatic pharmacokinetic modifier and is more rigid than the aliphatic modifiers. Evidence indicates that a spacing moiety, ranging from three to eight carbons in length, can assist in receptor-mediated uptake (15). Conjugates containing an aromatic linker have significantly higher uptake and retention in PC-3 tumor tissue than those containing hydrocarbon or ether linkers (15, 16). Studies by Lane et al. have shown that the spacer X in the 64Cu-NO2A-(X)-BBN(7–14)NH2 conjugates has a significant role in clearance, accumulation, and retention of the conjugates in tumor tissue (1). The four conjugates showing the most favorable pharmacokinetic properties and the highest degree of pancreas and tumor accumulation were those in which X = 6-Ahx, 8-Aoc, 9-Anc, or AMBA. PET imaging with these conjugates produced high-contrast images of PC-3 tumor xenografts in severe combined immunodeficient (SCID) mice (1). This chapter describes the data obtained with 64Cu-NO2A-(9-Anc)-BBN(7–14)NH2. Detailed information for other 64Cu-NO2A-(X)-BBN(7–14)NH2 conjugates is available in MICAD (http://www.ncbi.nlm.nih.gov/books/NBK5330/) (1).",2011-02-04 +21290624,"64Cu-1,4,7-Triazacyclononane-1,4-diacetic acid-6-aminohexanoic acid-Gln-Trp-Ala-Val-Gly-His-Leu-Met-NH2","64Cu-1,4,7-Triazacyclononane-1,4-diacetic acid (NO2A)-6-aminohexanoic acid (6-Ahx)-Gln-Trp-Ala-Val-Gly-His-Leu-Met-NH2 (BBN(7–14)NH2), abbreviated as 64Cu-NO2A-(6-Ahx)-BBN(7–14)NH2, is a bombesin (BBN)-based, 64Cu-NO2A-conjugated peptide that was synthesized by Lane et al. for use in positron emission tomography (PET) of tumors expressing gastrin-releasing peptide receptor (GRPR) (1, 2). GRPR is a glycosylated G-protein–coupled receptor that is normally expressed in non-neuroendocrine tissues of the breast and pancreas and in neuroendocrine cells of the brain, gastrointestinal tract, lung, and prostate (3, 4). GRPR has been found to be overexpressed in various human tumors, and a large number of BBN analogs have been investigated for GRPR-targeted imaging and therapy (5, 6). These analogs have been synthesized on the basis of either truncated BBN (BBN(6–14) or BBN(7–14)) or full-length BBN(1–14) (7, 8). Chelators and spacers have been used frequently for chelating metals and for improving the kinetics of conjugates (9-11). 64Cu is a radiometal with potential applications in diagnostic and therapeutic nuclear medicine. The half-life for 64Cu (t1/2 = 12.7 h) is long enough for drug preparation, quality control, imaging, and therapy (12, 13). However, use of 64Cu is limited by issues of in vivo transchelation to proteins found in blood and liver (such as superoxide dismutase) (1). A variety of chelators have been investigated for the purpose of stably chelating 64Cu (13). In general, 64Cu-labeled 1,4,7,10-tetraazacyclodecane-1,4,7,10-tetraacetic acid (64Cu-DOTA) and 64Cu-labeled 1,4,8,11-tetraazacyclotetradecane-1,4,8,11-tetraacetic acid (64Cu-TETA) exhibit high uptake and retention in nontarget organs, which limits their application. Cross-bridged (CB) analogs, such as CB-DO2A ((1,4,7,10-tetraazabicyclo[5.5.2]tetradecane-4,10-diyl)diacetic acid), CB-TE2A ((1,4,8,11-tetraazabicyclo[6.6.2]hexadecane-4,11-diyl)diacetic acid), SarAr (1-N-(4-aminobenzyl)-3,6,10,13,16,19-hexa-aza-bicyclo-[6.6.6]eichosane-1,8-diamine), and NOTA (1,4,7-triazacyclononane-1,4,7-triacetic acid), demonstrate improved copper containment by enhancing the ligand's rigidity (2, 14). Prasanphanich et al. recently reported that the NOTA-based 64Cu-NOTA-8-Aoc-BBN(7–14)NH2 conjugate (where 8-Aoc = 8-aminooctanoic acid) exhibited decreased accumulation in hepatic tissue as compared with other chelator-based (DOTA, TETA, and CB-TE2A) conjugates (2, 14). To improve the tumor uptake and maintain the good pharmacokinetic properties of the 64Cu-NOTA-8-Aoc-BBN(7–14)NH2 conjugate, Lane et al. synthesized a new group of conjugates with the NOTA derivative NO2A and replaced the spacer 8-Aoc with an aliphatic or aromatic linking (1). These conjugates were abbreviated as 64Cu-NO2A-(X)-BBN(7–14)NH2, where X denotes the pharmacokinetic modifier, such as AMBA (para-aminobenzoic acid), β-Ala (beta-alanine), 5-Ava (5-aminovaleric acid), 6-Ahx, 8-Aoc, and 9-Anc (9-aminonanoic acid). The β-Ala, 5-Ava, 6-Ahx, and 9-Anc are aliphatic pharmacokinetic modifiers, ranging from three to nine carbons in length, whereas AMBA is an aromatic pharmacokinetic modifier and is more rigid than the aliphatic modifiers. Evidence indicates that a spacing moiety, ranging from three to eight carbons in length, can assist in receptor-mediated uptake (15). Conjugates containing an aromatic linker have significantly higher uptake and retention in PC-3 tumor tissue than those containing hydrocarbon or ether linkers (15, 16). Studies by Lane et al. have shown that the spacer X in the 64Cu-NO2A-(X)-BBN(7–14)NH2 conjugates has a significant role in clearance, accumulation, and retention of the conjugates in tumor tissue (1). The four conjugates showing the most favorable pharmacokinetic properties and the highest degree of pancreas and tumor accumulation were those in which X = 6-Ahx, 8-Aoc, 9-Anc, or AMBA. PET imaging with these conjugates produced high-contrast images of PC-3 tumor xenografts in severe combined immunodeficient (SCID) mice (1). This chapter describes the data obtained with 64Cu-NO2A-(6-Ahx)-BBN(7–14)NH2. Detailed information for other 64Cu-NO2A-(X)-BBN(7–14)NH2 conjugates is available in MICAD (http://www.ncbi.nlm.nih.gov/books/NBK5330/) (1).",2011-02-04 +21290623,"64Cu-1,4,7-Triazacyclononane-1,4-diacetic acid-para-aminobenzoic acid-Gln-Trp-Ala-Val-Gly-His-Leu-Met-NH2","64Cu-1,4,7-Triazacyclononane-1,4-diacetic acid (NO2A)-para-aminobenzoic acid (AMBA)-Gln-Trp-Ala-Val-Gly-His-Leu-Met-NH2 (BBN(7–14)NH2), abbreviated as 64Cu-NO2A-(AMBA)-BBN(7–14)NH2, is a bombesin (BBN)-based, 64Cu-NO2A-conjugated peptide that was synthesized by Lane et al. for use in positron emission tomography (PET) of tumors expressing gastrin-releasing peptide receptor (GRPR) (1, 2). GRPR is a glycosylated G-protein–coupled receptor that is normally expressed in non-neuroendocrine tissues of the breast and pancreas and in neuroendocrine cells of the brain, gastrointestinal tract, lung, and prostate (3, 4). GRPR has been found to be overexpressed in various human tumors, and a large number of BBN analogs have been investigated for GRPR-targeted imaging and therapy (5, 6). These analogs have been synthesized on the basis of either truncated BBN ((BBN(6–14) or BBN(7–14)) or full-length BBN(1–14) (7, 8). Chelators and spacers have been used frequently for chelating metals and for improving the kinetics of conjugates (9-11). 64Cu is a radiometal with potential applications in diagnostic and therapeutic nuclear medicine. The half-life for 64Cu (t1/2 = 12.7 h) is long enough for drug preparation, quality control, imaging, and therapy (12, 13). However, use of 64Cu is limited by issues of in vivo transchelation to proteins found in blood and liver (such as superoxide dismutase) (1). A variety of chelators have been investigated for the purpose of stably chelating 64Cu (13). In general, 64Cu-labeled 1,4,7,10-tetraazacyclodecane-1,4,7,10-tetraacetic acid (64Cu-DOTA) and 64Cu-labeled 1,4,8,11-tetraazacyclotetradecane-1,4,8,11-tetraacetic acid (64Cu-TETA) exhibit high uptake and retention in nontarget organs, which limits their application. Cross-bridged (CB) analogs, such as CB-DO2A ((1,4,7,10-tetraazabicyclo[5.5.2]tetradecane-4,10-diyl)diacetic acid), CB-TE2A ((1,4,8,11-tetraazabicyclo[6.6.2]hexadecane-4,11-diyl)diacetic acid), SarAr (1-N-(4-aminobenzyl)-3,6,10,13,16,19-hexa-aza-bicyclo-[6.6.6]eichosane-1,8-diamine), and NOTA (1,4,7-triazacyclononane-1,4,7-triacetic acid), demonstrate improved copper containment by enhancing the ligand's rigidity (2, 14). Prasanphanich et al. recently reported that the NOTA-based 64Cu-NOTA-8-Aoc-BBN(7–14)NH2 conjugate (where 8-Aoc = 8-aminooctanoic acid) exhibited decreased accumulation in hepatic tissue as compared with other chelator-based (DOTA, TETA and CB-TE2A) conjugates (2, 14). To improve the tumor uptake and maintain the good pharmacokinetic properties of the 64Cu-NOTA-8-Aoc-BBN(7–14)NH2 conjugate, Lane et al. synthesized a new group of conjugates with the NOTA derivative NO2A and replaced the spacer 8-Aoc with an aliphatic or aromatic linking (1). These conjugates were abbreviated as 64Cu-NO2A-(X)-BBN(7–14)NH2, where X denotes the pharmacokinetic modifier, such as AMBA, β-Ala (beta-alanine), 5-Ava (5-aminovaleric acid), 6-Ahx (6-aminohexanoic acid), 8-Aoc, and 9-Anc (9-aminonanoic acid). The β-Ala, 5-Ava, 6-Ahx, and 9-Anc are aliphatic pharmacokinetic modifiers, ranging from three to nine carbons in length, whereas AMBA is an aromatic pharmacokinetic modifier and is more rigid than the aliphatic modifiers. Evidence indicates that a spacing moiety, ranging from three to eight carbons in length, can assist in receptor-mediated uptake (15). Conjugates containing an aromatic linker have significantly higher uptake and retention in PC-3 tumor tissue than those containing hydrocarbon or ether linkers (15, 16). Studies by Lane et al. have shown that the spacer X in the 64Cu-NO2A-(X)-BBN(7–14)NH2 conjugates has a significant role in clearance, accumulation, and retention of the conjugates in tumor tissue (1). 64Cu-NO2A-(AMBA)-BBN(7–14)NH2 exhibited the highest accumulation in tumor tissue and the most efficient clearance from whole-body tissues via the renal–urinary excretion pathway. PET imaging with 64Cu-NO2A-(AMBA)-BBN(7–14)NH2 produced high-contrast images of PC-3 tumor xenografts in severe combined immunodeficient (SCID) mice (1). This chapter describes the data obtained with 64Cu-NO2A-(AMBA)-BBN(7–14)NH2. Detailed information for other 64Cu-NO2A-(X)-BBN(7–14)NH2 conjugates is available in MICAD (http://www.ncbi.nlm.nih.gov/books/NBK5330/).",2011-02-04 diff --git a/data/final_inventory_2022.csv b/data/final_inventory_2022.csv new file mode 100644 index 0000000..2a11912 --- /dev/null +++ b/data/final_inventory_2022.csv @@ -0,0 +1,3113 @@ +ID,best_name,best_name_prob,best_common,best_common_prob,best_full,best_full_prob,article_count,extracted_url,extracted_url_status,extracted_url_country,extracted_url_coordinates,wayback_url,publication_date,affiliation,authors,grant_ids,grant_agencies,num_citations,affiliation_countries +21389154,16SpathDB,0.996311396,16SpathDB,0.996311396,,0.0,1,http://147.8.74.24/16SpathDB,"HTTPConnectionPool(host='147.8.74.24', port=80): Max retries exceeded with url: /16SpathDB (Caused by ConnectTimeoutError(, 'Connection to 147.8.74.24 timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140722082245/http://147.8.74.24/16SpathDB/,2011-03-09,"State Key Laboratory of Emerging Infectious Diseases, Research Centre of Infection and Immunology, Department ofMicrobiology, and Carol Yu Centre for Infection, The University of Hong Kong, Hong Kong. pcywoo@hkucc.hku.hk","Woo PC, Teng JL, Yeung JM, Tse H, Lau SK, Yuen KY",,,24.0,"Hong Kong, Hong Kong" +23293959,3DMET,0.974991322,3DMET,0.974991322,Three-dimensional structure database of natural metabolites,0.861961424,1,"http://www.genome.jp/kegg/compound/, http://www.3dmet.dna.affrc.go.jp","301, HTTPConnectionPool(host='www.3dmet.dna.affrc.go.jp', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.3dmet.dna.affrc.go.jp timed out. (connect timeout=5)'))",,"(35.0211,135.7538), ","http://web.archive.org/web/20220615140437/https://www.genome.jp/kegg/compound//, http://web.archive.org/web/20201127014135/http://www.3dmet.dna.affrc.go.jp/",2013-03-07,"Biomolecular Research Unit, National Institute of Agrobiological Sciences , 2-1-2 Kannondai, Tsukuba, Ibaraki 305-8602, Japan.","Maeda MH, Kondo K",,,9.0,Japan +24081580,3did,0.997478525,3did,0.997478525,interacting domains,0.710986495,1,http://3did.irbbarcelona.org,302,,"(41.4301,2.1925)",http://web.archive.org/web/20221019195830/https://3did.irbbarcelona.org/,2013-09-29,"Joint IRB-BSC Program in Computational Biology, Institute for Research in Biomedicine (IRB Barcelona), c/ Baldiri Reixac 10-12, 08028 Barcelona, Spain, Center for Genomic Science of IIT@SEMM, Istituto Italiano di Tecnologia (IIT), Via Adamello 16, 20139 Milan, Italy, California Institute for Quantitative Biomedical Research (qb3) and Department of Bioengineering and Therapeutic Sciences, MC 2530, University of California San Francisco (UCSF) CA 94158-2330, USA and Institució Catalana de Recerca i Estudis Avançats (ICREA), Passeig Lluís Companys 23, 08010 Barcelona, Spain.","Mosca R, Céol A, Stein A, Olivella R, Aloy P",,,89.0,"Spain, Spain, Italy, United States" +24275494,1000 Genomes Selection Browser,0.770321417,,0.0,1000 Genomes Selection Browser,0.770321417,1,http://hsb.upf.edu,"HTTPConnectionPool(host='hsb.upf.edu', port=80): Pool is closed.",,,no_wayback,2013-11-25,"Program for Population Genetics, Institute of Evolutionary Biology (CSIC-Universitat Pompeu Fabra), 08003 Barcelona, Spain, Population Genomics Node, National Institute for Bioinformatics (INB), Universitat Pompeu Fabra, 08003 Barcelona, Spain, Institute of Molecular Biology and Biotechnology-FORTH, Heraklion, Crete GR 700 13, Greece and Department of Evolutionary Genetics, Max Planck Institute for Evolutionary Anthropology, 04103 Leipzig, Germany.","Pybus M, Dall'Olio GM, Luisi P, Uzkudun M, Carreño-Torres A, Pavlidis P, Laayouni H, Bertranpetit J, Engelken J",,,86.0,"Germany, Spain, Spain, Greece" +24526713,3DGD,0.996886671,3DGD,0.996886671,Genome Database,0.968708754,1,http://3dgd.biosino.org,"HTTPConnectionPool(host='3dgd.biosino.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170326060852/http://3dgd.biosino.org:80/,2014-02-12,"Key Laboratory of Systems Biology, Shanghai Institutes for Biological Sciences, Chinese Academy of Sciences, 320 Yue Yang Road, Shanghai 200031, P. R. China, University of Chinese Academy of Sciences, 19A Yuquan Road, Beijing 100049, National Center for Protein Science, Shanghai 333 Haike Road, Pudong District, Shanghai 201210 and Shanghai Center for Bioinformation Technology, 1278 Keyuan Road, Shanghai 201203, P. R. ChinaKey Laboratory of Systems Biology, Shanghai Institutes for Biological Sciences, Chinese Academy of Sciences, 320 Yue Yang Road, Shanghai 200031, P. R. China, University of Chinese Academy of Sciences, 19A Yuquan Road, Beijing 100049, National Center for Protein Science, Shanghai 333 Haike Road, Pudong District, Shanghai 201210 and Shanghai Center for Bioinformation Technology, 1278 Keyuan Road, Shanghai 201203, P. R. China.","Li C, Dong X, Fan H, Wang C, Ding G, Li Y",,,9.0,"China, China, China" +27081154,3CDB,0.995992641,3CDB,0.995992641,,0.0,1,http://3cdb.big.ac.cn,301,,"(39.9075,116.3972)",http://web.archive.org/web/20211129151313/http://3cdb.big.ac.cn/,2016-04-14,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing, 100101, China University of Chinese Academy of Sciences, Beijing, 100049, China.","Yun X, Xia L, Tang B, Zhang H, Li F, Zhang Z",,,4.0,"China, China" +27694207,3DFlu,0.99639225,3DFlu,0.99639225,,0.0,1,http://nucleus3d.cent.uw.edu.pl/influenza,301,,"(52.2298,21.0118)",http://web.archive.org/web/20190305071202/http://nucleus3d.cent.uw.edu.pl:80/influenza/,2016-10-02,"Centre of New Technologies, University of Warsaw, Warsaw, Poland Institute of Computer Science, Polish Academy of Sciences, Warsaw, Poland.","Mazzocco G, Lazniewski M, Migdał P, Szczepińska T, Radomski JP, Plewczynski D",,,3.0,"Poland, Poland" +28511181,1-CMDb,0.992982775,1-CMDb,0.992982775,one-carbon metabolism database,0.925644517,1,http://slsdb.manipal.edu/ocm,"HTTPConnectionPool(host='slsdb.manipal.edu', port=80): Max retries exceeded with url: /ocm (Caused by ConnectTimeoutError(, 'Connection to slsdb.manipal.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220615195722/http://slsdb.manipal.edu/ocm/,2017-05-17,"Department of Bioinformatics, School of Life Sciences, Manipal University, Manipal, India.","Bhat MK, Gadekar VP, Jain A, Paul B, Rai PS, Satyamoorthy K",,,2.0,India +30304689,10KIP,0.928437392,10KIP,0.928437392,"10,000 Immunomes Project",0.844234129,1,http://10kimmunomes.org,302,,"(34.0522,-118.2437)",http://web.archive.org/web/20190915234016/http://10kimmunomes.org:80/,2018-10-01,"Bakar Computational Health Sciences Institute, University of California, San Francisco, San Francisco, CA 94158, USA; Department of Pediatrics, University of California, San Francisco, San Francisco, CA 94158, USA.","Zalocusky KA, Kan MJ, Hu Z, Dunn P, Thomson E, Wiser J, Bhattacharya S, Butte AJ",,"NIAID NIH HHS, National Institute of Allergy and Infectious Diseases",13.0,"United States, United States" +34244700,2DProts,0.990597233,2DProts,0.990597233,,0.0,1,http://2dprots.ncbr.muni.cz,301,,"(49.1952,16.6080)",http://web.archive.org/web/20220712050142/https://2dprots.ncbr.muni.cz/,2021-07-09,"National Centre for Biomolecular Research, Faculty of Science, Masaryk University, Brno, 625 00, Czech Republic.","Hutařová Vařeková I, Hutař J, Midlik A, Horský V, Hladká E, Svobodová R, Berka K",,"Ministry of Education, Youth and Sports, European Regional Development Fund",0.0, +24888382,AAIR,0.995133579,AAIR,0.995133579,Allergic Airway Inflammation Repository,0.976998336,1,http://aair.cimed.ike.liu.se,"HTTPConnectionPool(host='aair.cimed.ike.liu.se', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-06-03,"Department of Clinical and Experimental Medicine, Centre for Individualised Medicine, Linköping University, Linköping, Sweden.","Gawel DR, Rani James A, Benson M, Liljenström R, Muraro A, Nestor CE, Zhang H, Gustafsson M",,,0.0,Sweden +25465051,A-WINGS,0.987336159,A-WINGS,0.987336159,,0,1,http://bioinf.mind.meiji.ac.jp/a-wings,"HTTPConnectionPool(host='bioinf.mind.meiji.ac.jp', port=80): Max retries exceeded with url: /a-wings (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -5] No address associated with hostname'))",,,http://web.archive.org/web/20211128172638/http://bioinf.mind.meiji.ac.jp/a-wings/,2014-12-03,None,"Yamamoto N, Suzuki T, Kobayashi M, Dohra H, Sasaki Y, Hirai H, Yokoyama K, Kawagishi H, Yano K",,,1.0, +26490961,5SRNAdb,0.94896158,5SRNAdb,0.94896158,,0,1,http://combio.pl/5srnadb,301,,"(52.4069,16.9299)",no_wayback,2015-10-20,"Department of Computational Biology, Institute of Molecular Biology and Biotechnology, Adam Mickiewicz University, 61-614 Poznan, Poland mszyman@amu.edu.pl.","Szymanski M, Zielezinski A, Barciszewski J, Erdmann VA, Karlowski WM",,,17.0,Poland +"27924021, 32162267",AAgAtlas,0.991741061,AAgAtlas,0.991741061,,0,2,http://biokb.ncpsb.org/aagatlas,301,,"(22.2783,114.1747)",no_wayback,2020-01-01,"State Key Laboratory of Proteomics, Beijing Proteome Research Center, National Center for Protein Sciences-Beijing (PHOENIX Center), Beijing Institute of Radiation Medicine, Beijing 102206, China., State Key Laboratory of Proteomics, Beijing Proteome Research Center, National Center for Protein Sciences-Beijing (PHOENIX Center), Beijing Institute of Lifeomics, Beijing, China.","Wang D, Yang L, Zhang P, LaBaer J, Hermjakob H, Li D, Yu X, Wang D, Zhang Y, Meng Q, Yu X",", ",", ",18.0,"China, China" +28365738,ABCMdb,0.99738276,ABCMdb,0.99738276,,0,1,http://abcm2.hegelab.org,200,,"(47.4984,19.0404)",http://web.archive.org/web/20220810212250/http://abcm2.hegelab.org/,2017-01-01,"MTA-SE Molecular Biophysics Research Group, Hungarian Academy of Sciences and Department of Biophysics and Radiation Biology, Semmelweis University, Budapest 1094, Hungary.","Tordai H, Jakab K, Gyimesi G, András K, Brózik A, Sarkadi B, Hegedus T",,,3.0,Hungary +28779078,A2MDB,0.993841752,A2MDB,0.993841752,Aspergillus Secondary Metabolites Database,0.987300144,1,http://www.iictindia.org/A2MDB,302,,"(17.4177,78.5190)",no_wayback,2017-08-04,"Pharmacology & Toxicology Division, CSIR-Indian Institute of Chemical Technology (IICT), Uppal Road, Tarnaka, Hyderabad, 500 607, India.","Vadlapudi V, Borah N, Yellusani KR, Gade S, Reddy P, Rajamanikyam M, Vempati LNS, Gubbala SP, Chopra P, Upadhyayula SM, Amanchy R",,,20.0,India +28977551,AAgMarker,0.988109291,AAgMarker,0.988109291,,0,1,http://bioinfo.wilmer.jhu.edu/AAgMarker,404,,,no_wayback,2018-01-01,"Department of Ophthalmology, Johns Hopkins School of Medicine, Baltimore, MD 21205, USA.","Pan J, Liu S, Zhu H, Qian J",,"NIGMS NIH HHS, NEI NIH HHS",1.0,United States +29156005,aBiofilm,0.9043421,aBiofilm,0.9043421,,0,1,http://bioinfo.imtech.res.in/manojk/abiofilm,404,,,http://web.archive.org/web/20220225182742/https://bioinfo.imtech.res.in/manojk/abiofilm/,2018-01-01,"Bioinformatics Centre, Institute of Microbial Technology, Council of Scientific and Industrial Research (CSIR), Sector 39-A, Chandigarh 160036, India.","Rajput A, Thakur A, Sharma S, Kumar M",,,22.0,India +31832668,AcetoBase,0.99637115,AcetoBase,0.99637115,,0,1,http://acetobase.molbio.slu.se,301,,"(59.8588,17.6389)",http://web.archive.org/web/20220331003305/https://acetobase.molbio.slu.se/,2019-01-01,"Department of Molecular Sciences, Swedish University of Agricultural Sciences, Uppsala BioCenter, Box 7025, SE-750 07 Uppsala, Sweden.","Singh A, Müller B, Fuxelius HH, Schnürer A",,"Swedish Energy Agency, Interreg Europe, Västra Götaland Region",5.0,Sweden +33169878,A.P.E.S,0.811360155,A.P.E.S,0.811360155,,0,1,http://apeswiki.eva.mpg.de,"HTTPConnectionPool(host='apeswiki.eva.mpg.de', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,no_wayback,2020-11-10,"Department of Primatology, Max Planck Institute for Evolutionary Anthropology, Leipzig, Germany.","Heinicke S, Ordaz-Németh I, Junker J, Bachmann ME, Marrocoli S, Wessling EG, Byler D, Cheyne SM, Desmond J, Dowd D, Fitzgerald M, Fourrier M, Goedmakers A, Hernandez-Aguilar RA, Hillers A, Hockings KJ, Jones S, Kaiser M, Koops K, Lapuente JM, Maisels F, Riedel J, Terrade E, Tweh CG, Vergnes V, Vogt T, Williamson EA, Kühl HS",,"Max-Planck-Gesellschaft, Robert Bosch Stiftung",0.0,Germany +20949389,ACSR,0.963491698,ACSR,0.963491698,AIDS and Cancer Specimen Resource,0.931551437,1,http://acsr.ucsf.edu,301,,"(37.7749,-122.4194)",http://web.archive.org/web/20200729012332/https://acsr.ucsf.edu/,2011-01-01,"Department of Pathology, College of Medicine and Public Health, The Ohio State University, Columbus, OH, USA.","Ayers LW, Silver S, Orenstein JM, McGrath MS, Garcia DL",,NCI NIH HHS,4.0,United States +22829726,Actinobase,0.993042409,Actinobase,0.993042409,,0,1,http://www.actinobase.in,"HTTPConnectionPool(host='www.actinobase.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180320222043/http://actinobase.in/,2012-06-16,"Department of Biosciences, Saurashtra University, Rajkot, Gujarat, India-360 005.","Sharma AK, Gohel S, Singh SP",,,3.0,India +25229122,ACPro,0.962495983,ACPro,0.962495983,,0,1,http://www.ats.amherst.edu/protein,301,,"(42.3671,-72.4646)",http://web.archive.org/web/20210301185544/https://www.ats.amherst.edu/protein/,2014-10-14,"Department of Mathematics and Statistics, Amherst College, Amherst, Massachusetts.","Wagaman AS, Coburn A, Brand-Thomas I, Dash B, Jaswal SS",,NSF (UBM-Institutional-Collaborative: The Four-College Biomath Consortium),6.0, +29126202,ActiveDriverDB,0.994272113,ActiveDriverDB,0.994272113,,0,1,http://www.ActiveDriverDB.org,302,,"(43.7001,-79.4163)",http://web.archive.org/web/20221017121141/https://activedriverdb.org/,2018-01-01,"Computational Biology Program, Ontario Institute for Cancer Research, Toronto, Ontario, Canada.","Krassowski M, Paczkowska M, Cullion K, Huang T, Dzneladze I, Ouellette BFF, Yamada JT, Fradet-Turcotte A, Reimand J",,,27.0,Canada +29956270,AD&FTD,0.950616956,AD&FTD,0.950616956,,0,1,http://www.molgen.vib-ua.be/FTDmutations,301,,"(51.2205,4.4003)",http://web.archive.org/web/20151001064408/http://www.molgen.vib-ua.be:80/FTDMutations/,2018-01-01,"Neurodegenerative Brain Diseases Group, Center for Molecular Neurology, VIB, University of Antwerp - CDE, Antwerp, Belgium.","Cruts M, Van Broeckhoven C",,,1.0,Belgium +32702093,AciDB,0.99098736,AciDB,0.99098736,,0,1,"http://AciDB.cl, http://gitlab.com/Hawkline451/acidb","308, 301",,"(34.0183,-117.8546), (37.7621,-122.3971)","http://web.archive.org/web/20220809105104/https://acidb.cl/, no_wayback",2020-12-01,"Center for Bioinformatics and Genome Biology, Fundación Ciencia & Vida, Santiago, Chile.","Neira G, Cortez D, Jil J, Holmes DS",,"FONDECYT, Fundación Ciencia & Vida, Programa de Apoyo a Centros con Financiamiento Basal",3.0,Chile +33068435,AcrDB,0.997832954,AcrDB,0.997832954,,0,1,http://bcb.unl.edu/AcrDB,302,,"(40.8000,-96.6670)",http://web.archive.org/web/20220721183718/https://bcb.unl.edu/AcrDB/,2021-01-01,"Department of Genetics, University of North Carolina at Chapel Hill, NC, USA.","Huang L, Yang B, Yi H, Asif A, Wang J, Lithgow T, Zhang H, Minhas FUAA, Yin Y",,"United States Department of Agriculture, National Science Foundation, UNL",7.0,United States +33137193,AcrHub,0.99744314,AcrHub,0.99744314,,0,1,http://pacrispr.erc.monash.edu/AcrHub,302,,"(37.5331,-122.2486)",http://web.archive.org/web/20221027110039/https://pacrispr.erc.monash.edu/AcrHub/,2021-01-01,"Infection and Immunity Program, Biomedicine Discovery Institute and Department of Microbiology, Monash University, VIC 3800, Australia.","Wang J, Dai W, Li J, Li Q, Xie R, Zhang Y, Stubenrauch C, Lithgow T",,National Health and Medical Research Council,3.0,Australia +34025933,AddictGene,0.997377753,AddictGene,0.997377753,,0,1,http://159.226.67.237/sun/addictgedb,301,,"(39.9075,116.3972)",no_wayback,2021-04-19,"CAS Key Laboratory of Mental Health, Institute of Psychology, Chinese Academy of Sciences, Beijing 100101, China.","Shi L, Wang Y, Li C, Zhang K, Du Q, Zhao M",,National Natural Science Foundation of China,1.0,China +34497528,ACNPD,0.9954561,ACNPD,0.9954561,anti-cancer natural product database,0.887865017,1,http://www.acnpd-fu.com,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220709072601/http://www.acnpd-fu.com/,2021-08-23,"School of Life Science and Engineering, Southwest Jiaotong University, Chengdu, China.","Tan X, Fu J, Yuan Z, Zhu L, Fu L",,,0.0,China +22080511,ADHDgene,0.933751583,ADHDgene,0.933751583,,0,1,http://adhd.psych.ac.cn,"HTTPConnectionPool(host='adhd.psych.ac.cn', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='adhd.psych.ac.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220124054010/http://adhd.psych.ac.cn/,2011-11-10,"Key Laboratory of Mental Health, Institute of Psychology, Chinese Academy of Sciences, Beijing 100101, China.","Zhang L, Chang S, Li Z, Zhang K, Du Y, Ott J, Wang J",,,32.0,China +22682155,AFFINOMICS,0.97896564,AFFINOMICS,0.97896564,,0,1,http://www.affinomics.org,"HTTPConnectionPool(host='www.affinomics.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.affinomics.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190205045454/http://www.affinomics.org:80/?,2012-06-01,None,"Stoevesandt O, Taussig MJ",,"EU 7th Framework Programme, Biotechnology and Biological Sciences Research Council",15.0, +23092397,admetSAR,0.990115881,admetSAR,0.990115881,,0,1,http://www.admetexp.org,302,,"(-37.9747,145.0269)",http://web.archive.org/web/20220617194242/http://www.admetexp.org/,2012-11-01,"Shanghai Key Laboratory of New Drug Design, School of Pharmacy, East China University of Science and Technology, 130 Meilong Road, Shanghai 200237, China.","Cheng F, Li W, Zhou Y, Shen J, Wu Z, Liu G, Lee PW, Tang Y",,,380.0,"China, China" +25048123,AgAbDb,0.995833278,AgAbDb,0.995833278,Antigen-Antibody Interaction Database,0.982613113,1,http://bioinfo.net.in/AgAbDb.htm,403,,,no_wayback,2014-01-01,"Bioinformatics Centre, University of Pune, Ganeshkhind Road, Pune, 411007, Maharashtra, India, urmila@bioinfo.net.in.","Kulkarni-Kale U, Raskar-Renuse S, Natekar-Kalantre G, Saxena SA",,,6.0,India +25361966,ADReCS,0.995718002,ADReCS,0.995718002,Adverse Drug Reaction Classification System,0.979371885,1,http://bioinf.xmu.edu.cn/ADReCS,302,,"(39.9906,116.2887)",http://web.archive.org/web/20220616010942/http://bioinf.xmu.edu.cn/ADReCS/,2014-10-31,"State Key Laboratory of Stress Cell Biology, School of Life Sciences, Xiamen University, Xiamen, Fujian 361102, P.R. China.","Cai MC, Xu Q, Pan YJ, Pan W, Ji N, Li YB, Jin HJ, Liu K, Ji ZL",,,25.0,China +27507885,ADPriboDB,0.997497678,ADPriboDB,0.997497678,,0,1,http://ADPriboDB.leunglab.org,405,,,http://web.archive.org/web/20201103132030/http://adpribodb.leunglab.org/,2016-08-09,"Department of Biochemistry and Molecular Biology, Bloomberg School of Public Health, Johns Hopkins University, Baltimore, MD 21205, USA.","Vivelo CA, Wat R, Agrawal C, Tee HY, Leung AK",,NIGMS NIH HHS,24.0,United States +30239679,AgBioData,0.996884644,AgBioData,0.996884644,,0,1,http://www.agbiodata.org,302,,"(46.7313,-117.1796)",http://web.archive.org/web/20221006060918/https://www.agbiodata.org/,2018-01-01,"Corn Insects and Crop Genetics Research Unit, USDA-ARS, Ames, IA, USA.","Harper L, Campbell J, Cannon EKS, Jung S, Poelchau M, Walls R, Andorf C, Arnaud E, Berardini TZ, Birkett C, Cannon S, Carson J, Condon B, Cooper L, Dunn N, Elsik CG, Farmer A, Ficklin SP, Grant D, Grau E, Herndon N, Hu ZL, Humann J, Jaiswal P, Jonquet C, Laporte MA, Larmande P, Lazo G, McCarthy F, Menda N, Mungall CJ, Munoz-Torres MC, Naithani S, Nelson R, Nesdill D, Park C, Reecy J, Reiser L, Sanderson LA, Sen TZ, Staton M, Subramaniam S, Tello-Ruiz MK, Unda V, Unni D, Wang L, Ware D, Wegrzyn J, Williams J, Woodhouse M, Yu J, Main D",,"National Science Foundation, U.S. Department of Agriculture, National Institutes of Health, International Center for Tropical Agriculture, The US Land Grant Universities, National Science Foundation, U.S. Department of Agriculture, U.S. Department of Agriculture, Fondazione Edmund Mach, US Dry Pea and Lentil Council, Washington Tree Fruit Research, National Science Foundation, U.S. Department of Agriculture, National Science Foundation, U.S. Department of Agriculture, U.S. Department of Agriculture, Bill and Melinda Gates Foundation, Agence Nationale de la Recherche, Consultative Group for International Agricultural Research, Research and Innovation Center, National Science Foundation, National Science Foundation, U.S. Department of Energy, National Science Foundation, National Science Foundation, U.S. Department of Agriculture, U.S. Department of Agriculture, National Institutes of Health, The Northern Pulse Growers, U.S. Department of Agriculture, U.S. Department of Agriculture, University of Montpellier, U.S. Department of Agriculture, U.S. Department of Agriculture, Bill and Melinda Gates Foundation",15.0,United States +31648087,AdditiveChem,0.995760739,AdditiveChem,0.995760739,,0,1,http://www.rxnfinder.org/additivechem,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220527215043/http://www.rxnfinder.org/additivechem/,2019-09-11,"CAS Key Laboratory of Computational Biology, CAS-MPG Partner Institute for Computational Biology, Shanghai Institute of Nutrition and Health, Shanghai Institutes for Biological Sciences, University of Chinese Academy of Sciences, Chinese Academy of Sciences, Shanghai 200333, PR China.","Zhang D, Cheng X, Sun D, Ding S, Cai P, Yuan L, Tian Y, Tu W, Hu QN",,"CAS, Natural Science Foundation of Tianjin, National Key Research and Development Program of China, National Science Foundation of China, Chinese Academy of Sciences, CAS",0.0,China +32291734,AELP,0.983020544,AELP,0.983020544,Auditory English Lexicon Project,0.609619483,1,http://inetapps.nus.edu.sg/aelp,"HTTPConnectionPool(host='inetapps.nus.edu.sg', port=80): Max retries exceeded with url: /aelp (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20211027004053/https://inetapps.nus.edu.sg/aelp/,2020-10-01,"Department of Psychology, National University of Singapore, Singapore, 117570, Singapore. psygohw@nus.edu.sg.","Goh WD, Yap MJ, Chee QW",,,1.0,"Singapore, Singapore, Singapore" +33401309,ADeditome,0.997124791,ADeditome,0.997124791,,0,1,http://ccsm.uth.edu/ADeditome,302,,"(29.7633,-95.3633)",http://web.archive.org/web/20211028055748/https://ccsm.uth.edu/ADeditome/,2021-09-01,"School of Life Science and Technology, Xidian University, Xi'an, China.","Wu S, Yang M, Kim P, Zhou X",,"National Institutes of Health, National Institutes of Health, National Institutes of Health",2.0,China +21148158,AID-Net,0.68992514,AID-Net,0.68992514,,0,1,http://www.aid-register.uk-essen.de,"HTTPConnectionPool(host='www.aid-register.uk-essen.de', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20121116071806/http://www.aid-register.uk-essen.de/,2011-01-01,"Department of Paediatric Rheumatology, Children's Hospital, University Duisburg-Essen, Essen, Germany. elke.lainka@uni-due.de","Lainka E, Bielak M, Hilger V, Basu O, Neudorf U, Wittkowski H, Holzinger D, Roth J, Niehues T, Foell D",,,20.0,Germany +22084200,AH-DB,0.99563925,AH-DB,0.99563925,Apo-Holo DataBase,0.77852336,1,"http://ahdb.ee.ncku.edu.tw/, http://ahdb.csbb.ntu.edu.tw","200, HTTPConnectionPool(host='ahdb.csbb.ntu.edu.tw', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to ahdb.csbb.ntu.edu.tw timed out. (connect timeout=5)'))",,"(22.9908,120.2133), ","http://web.archive.org/web/20220418015226/http://ahdb.ee.ncku.edu.tw/, http://web.archive.org/web/20150429222327/http://ahdb.csbb.ntu.edu.tw:80/",2011-11-13,"Department of Electrical Engineering, National Cheng Kung University, Tainan 70101, Taiwan. darby@mail.ncku.edu.tw","Chang DT, Yao TJ, Fan CY, Chiang CY, Bai YH",,,11.0, +24217911,AgeFactDB,0.997452736,AgeFactDB,0.997452736,JenAge Ageing Factor Database,0.809117585,1,http://agefactdb.jenage.de,200,,"(50.9787,11.0328)",http://web.archive.org/web/20221016233555/https://agefactdb.jenage.de/,2013-11-11,"Biocomputing Group, Leibniz Institute for Age Research - Fritz Lipmann Institute, Jena Centre for Systems Biology of Ageing - JenAge, Beutenbergstrasse 11, Jena, Germany.","Hühne R, Thalheim T, Sühnel J",,,28.0,Germany +25392419,AHTPDB,0.997885823,AHTPDB,0.997885823,,0,1,http://crdd.osdd.net/raghava/ahtpdb,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/ahtpdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20211020204914/http://crdd.osdd.net/raghava/ahtpdb/,2014-11-11,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh 160036, India.","Kumar R, Chaudhary K, Sharma M, Nagpal G, Chauhan JS, Singh S, Gautam A, Raghava GP",,,46.0,India +25853886,ALDB,0.989124298,ALDB,0.989124298,nimal,0.511159897,1,http://res.xaut.edu.cn/aldb/index.jsp,404,,,http://web.archive.org/web/20170201113523/http://res.xaut.edu.cn:80/aldb/index.jsp,2015-04-08,"School of Computer Science and Technology, Xidian University, Xi'an, Shaanxi, PR China; School of Computer Science and Engineering, Xi'an University of Technology, Xi'an, Shaanxi, PR China.","Li A, Zhang J, Zhou Z, Wang L, Liu Y, Liu Y",,,56.0,"China, China" +26602690,Aging Chart,0.833349913,Aging Chart,0.833349913,,0,1,http://agingchart.org,502,,,http://web.archive.org/web/20141216193449/http://agingchart.org/,2015-11-23,"Laboratory of molecular radiobiology and gerontology, Institute of Biology of Komi Science Center of Ural Branch of Russian Academy of Sciences, Syktyvkar, 167982, Russia Laboratory of genetics of aging and longevity, Moscow Institute of Physics and Technology, Dolgoprudny, 141700, Russia Laboratory of postgenomic studies, Engelhardt Institute of Molecular Biology of Russian Academy of Sciences, Moscow, 119991, Russia School of Systems Biology, George Mason University, VA, Manassas, 20110, USA Branch of N.I.Pirogov Russian State Medical University ""Scientific Clinical Center of Gerontology"", Moscow, 117997, Russia amoskalev@list.ru.","Moskalev A, Zhikrivetskaya S, Shaposhnikov M, Dobrovolskaya E, Gurinovich R, Kuryan O, Pashuk A, Jellen LC, Aliper A, Peregudov A, Zhavoronkov A",,,6.0,United States +26644461,ALCOdb,0.997920737,ALCOdb,0.997920737,Algae Gene Coexpression database,0.973787616,1,http://alcodb.jp,200,,"(38.2570,140.8523)",http://web.archive.org/web/20220616024452/http://alcodb.jp/,2015-12-07,"Graduate School of Information Sciences, Tohoku University, 6-3-09, Aramaki-Aza-Aoba, Aoba-ku, Sendai, 980-8579 Japan Core Research for Evolutional Science and Technology (CREST), Japan Science and Technology Agency (JST), Kawaguchi, Saitama, Japan.","Aoki Y, Okamura Y, Ohta H, Kinoshita K, Obayashi T",,,20.0,"Japan, Japan, Japan" +28376796,AHCODA-DB,0.993691454,AHCODA-DB,0.993691454,,0,1,http://public.sylics.com,301,,"(52.3702,5.2141)",http://web.archive.org/web/20220806121813/https://public.sylics.com/,2017-04-04,"Sylics (Synaptologics BV), Amsterdam, The Netherlands. bastijn.koopmans@sylics.com.","Koopmans B, Smit AB, Verhage M, Loos M",,"Dutch Research Council (NWO), Agentschap NL, Seventh Framework Programme",2.0,Netherlands +30231853,AgriSeqDB,0.997874022,AgriSeqDB,0.997874022,,0,1,http://expression.latrobe.edu.au/agriseqdb,302,,"(-37.8140,144.9633)",http://web.archive.org/web/20221018114406/https://expression.latrobe.edu.au/agriseqdb,2018-09-19,"Genomics Platform, La Trobe University, Melbourne, Australia.","Robinson AJ, Tamiru M, Salby R, Bolitho C, Williams A, Huggard S, Fisch E, Unsworth K, Whelan J, Lewsey MG",,The Australian National Data Service,3.0,Australia +31123286,AICD,0.981977388,AICD,0.981977388,nti-Inflammatory Compounds Database,0.904850148,1,http://956023.ichengyun.net/AICD/index.php,"HTTPConnectionPool(host='956023.ichengyun.net', port=80): Max retries exceeded with url: /AICD/index.php (Caused by ConnectTimeoutError(, 'Connection to 956023.ichengyun.net timed out. (connect timeout=5)'))",,,no_wayback,2019-05-23,"Research Center of Integrative Medicine, School of Basic Medical Science, Guangzhou University of Chinese Medicine, Guangzhou, 510006, China.","Wang K, Xiao J, Liu X, Jiang Z, Zhan Y, Yin T, He L, Zhang F, Xing S, Chen B, Li Y, Zhang F, Kuang Z, Du B, Gu J",,"Guangdong Provincial Hospital of Chinese Medicine Science and Technology Research Program, the National Undergraduate Training Programs for Innovation and Entrepreneurship, the start-up support for scientific research of Xinglin Young Scholar in Guangzhou University of Chinese Medicine, Guangdong Science and Technology project",5.0,China +21498548,Allie,0.968772352,Allie,0.968772352,,0,1,http://allie.dbcls.jp,200,,"(35.1167,138.9167)",http://web.archive.org/web/20221108173150/https://allie.dbcls.jp/,2011-04-15,"Database Center for Life Science, Bunkyo-ku, Tokyo, Japan. yy@dbcls.rois.ac.jp","Yamamoto Y, Yamaguchi A, Bono H, Takagi T",,,9.0,Japan +22039151,ALFRED,0.992198706,ALFRED,0.992198706,,0,1,http://alfred.med.yale.edu,302,,"(41.3081,-72.9282)",http://web.archive.org/web/20221022171443/https://alfred.med.yale.edu/,2011-10-28,"Department of Genetics and Center for Medical Informatics, Yale University School of Medicine, New Haven, CT 06520-8005, USA.","Rajeevan H, Soundararajan U, Kidd JR, Pakstis AJ, Kidd KK",,,36.0,United States +22659196,Alkamid,0.985504031,Alkamid,0.985504031,,0,1,http://alkamid.ugent.be,307,,"(51.0500,3.7167)",http://web.archive.org/web/20220801192843/https://alkamid.ugent.be/,2012-05-30,"Drug Quality and Registration (DruQuaR) Group, Faculty of Pharmaceutical Sciences, Ghent University, Harelbekestraat 72, B-9000 Ghent, Belgium.","Boonen J, Bronselaer A, Nielandt J, Veryser L, De Tré G, De Spiegeleer B",,“Institute for the Promotion of Innovation through Science and Technology in Flanders (IWT-Vlaanderen)”,34.0,Belgium +23193282,Allen Brain Atlas,0.758832355,Allen Brain Atlas,0.758832355,,0,1,http://www.brain-map.org,302,,"(47.6302,-122.3210)",http://web.archive.org/web/20180924225234/http://www.brain-map.org/,2012-11-28,"Allen Institute for Brain Science, 551 North 34th Street, Seattle, WA 98103, USA. susans@alleninstitute.org","Sunkin SM, Ng L, Lau C, Dolbeare T, Gilbert TL, Thompson CL, Hawrylycz M, Dang C",,"PHS HHS, NIMH NIH HHS, NHLBI NIH HHS, NIMH NIH HHS, NIMH NIH HHS",247.0,United States +24556904,AlfalfaTFDB,0.975518227,AlfalfaTFDB,0.975518227,,0,1,http://plantpathology.ba.ars.usda.gov/alfalfatfdb.html,"HTTPConnectionPool(host='plantpathology.ba.ars.usda.gov', port=80): Max retries exceeded with url: /alfalfatfdb.html (Caused by ConnectTimeoutError(, 'Connection to plantpathology.ba.ars.usda.gov timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210410133959/https://plantpathology.ba.ars.usda.gov/alfalfatfdb.html,2014-02-21,"Molecular Plant Pathology Laboratory, Beltsville Agricultural Research Center, United States Department of Agriculture, 10300 Baltimore Avenue, Beltsville, MD, 20705, USA.","Postnikova OA, Shao J, Nemchinov LG",,,5.0,"United States, United States" +24628857,AlgaePath,0.996096075,AlgaePath,0.996096075,,0,1,http://algaepath.itps.ncku.edu.tw,200,,"(22.9908,120.2133)",http://web.archive.org/web/20220615134754/http://algaepath.itps.ncku.edu.tw/,2014-03-14,None,"Zheng HQ, Chiang-Hsieh YF, Chien CH, Hsu BK, Liu TL, Chen CN, Chang WC",,,13.0, +25097382,AllergenPro,0.997101068,AllergenPro,0.997101068,,0,1,http://nabic.rda.go.kr/allergen,405,,,no_wayback,2014-06-30,"Genomics Division, National Academy of Agricultural Science (NAAS), Suwon 441-707, Korea.","Kim CK, Seol YJ, Lee DJ, Jeong IS, Yoon UH, Lee JY, Lee GS, Park DS",,,2.0, +28069893,Alga-PrAS,0.994979988,Alga-PrAS,0.994979988,Algal Protein Annotation Suite database,0.78106205,1,http://alga-pras.riken.jp,200,,"(35.4333,139.6500)",http://web.archive.org/web/20220217144220/http://alga-pras.riken.jp/,2017-01-01,"RIKEN Center for Sustainable Resource Science, Suehiro, Tsurumi, Yokohama, Kanagawa, Japan.","Kurotani A, Yamada Y, Sakurai T",,,4.0,Japan +30357390,ALEdb,0.993111849,ALEdb,0.993111849,,0,1,http://aledb.org,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20221012195757/https://aledb.org/,2019-01-01,"Bioinformatics and Systems Biology Program, University of California, San Diego, La Jolla, CA 92093, USA.","Phaneuf PV, Gosting D, Palsson BO, Feist AM",,"Novo Nordisk Fonden, Technical University of Denmark, NIAID NIH HHS, National Institute of Allergy and Infectious Diseases, NNF Center for Biosustainability",21.0,United States +34361108,alfaNET,0.997048736,alfaNET,0.997048736,,0,1,http://bioinfo.usu.edu/alfanet,301,,"(41.7355,-111.8344)",http://web.archive.org/web/20220615171639/http://bioinfo.usu.edu/alfanet/,2021-08-03,"Department of Plants, Soils, and Climate, College of Agriculture and Applied Sciences, Utah State University, Logan, UT 84322, USA.","Kataria R, Kaundal R",,,0.0,United States +21880703,AluHunter,0.993216336,AluHunter,0.993216336,,0,1,http://www.aluhunter.com,200,,"(39.0997,-94.5786)",http://web.archive.org/web/20211026160017/http://aluhunter.com/,2011-08-31,"Department of Anthropology, New York University, New York, NY 10003, USA. cmb433@nyu.edu",Bergey CM,,,2.0,United States +22559261,AlliumMap,0.986065328,AlliumMap,0.986065328,,0,1,http://alliumgenetics.org,"HTTPConnectionPool(host='alliumgenetics.org', port=80): Pool is closed.",,,http://web.archive.org/web/20170912193312/http://alliumgenetics.org/,2012-05-04,"The New Zealand Institute for Plant & Food Research Ltd, Christchurch, New Zealand. john.mccallum@plantandfood.co.nz","McCallum J, Baldwin S, Shigyo M, Deng Y, van Heusden S, Pither-Joyce M, Kenel F",,,14.0,"New Zealand, New Zealand" +22647208,AlzPathway,0.996937859,AlzPathway,0.996937859,,0,1,http://alzpathway.org,200,,"(35.6895,139.6917)",http://web.archive.org/web/20220518040753/http://alzpathway.org/,2012-05-30,"Department of Bioinformatics, Tokyo Medical and Dental University, Yushima 1-5-45, Tokyo, 113-8510, Japan.","Mizuno S, Iijima R, Ogishima S, Kikuchi M, Matsuoka Y, Ghosh S, Miyamoto T, Miyashita A, Kuwano R, Tanaka H",,,65.0,Japan +25432889,AlzBase,0.974508405,AlzBase,0.974508405,,0,1,http://alz.big.ac.cn/alzBase,"HTTPConnectionPool(host='alz.big.ac.cn', port=80): Max retries exceeded with url: /alzBase (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220531172723/http://alz.big.ac.cn/alzBase/,2014-11-29,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing, 100101, China.","Bai Z, Han G, Xie B, Wang J, Song F, Peng X, Lei H",,,14.0,China +25762455,amamutdb.no,0.977049748,amamutdb.no,0.977049748,,0,1,http://amamutdb.no,301,,"(69.6489,18.9551)",no_wayback,2015-04-09,"Division of Child and Adolescent Health, Department of Medical Genetics, University Hospital of North Norway, Tromsø, Norway.","Riise Stensland HM, Frantzen G, Kuokkanen E, Buvang EK, Klenow HB, Heikinheimo P, Malm D, Nilssen Ø",,European Commission FP VI,5.0,"Norway, Norway" +30365033,AlloMAPS,0.996572554,AlloMAPS,0.996572554,,0,1,http://allomaps.bii.a-star.edu.sg,200,,"(32.7939,-96.8319)",http://web.archive.org/web/20220523151323/http://allomaps.bii.a-star.edu.sg/,2019-01-01,"Bioinformatics Institute, Agency for Science, Technology and Research (A*STAR), 30 Biopolis Street, #07-01, Matrix, 138671 Singapore.","Tan ZW, Tee WV, Guarnera E, Booth L, Berezovsky IN",,Biomedical Research Council,14.0,Singapore +32193422,AmazonFish,0.907141924,AmazonFish,0.907141924,,0,1,http://www.amazon-fish.com,302,,"(50.6942,3.1746)",http://web.archive.org/web/20221006190731/https://www.amazon-fish.com/,2020-03-19,"UMR EDB (Laboratoire Évolution et Diversité Biologique), CNRS 5174, IRD253, UPS, 118 route de Narbonne, F-31062, Toulouse, France. celine.jezequel@ird.fr.","Jézéquel C, Tedesco PA, Bigorne R, Maldonado-Ocampo JA, Ortega H, Hidalgo M, Martens K, Torrente-Vilara G, Zuanon J, Acosta A, Agudelo E, Barrera Maure S, Bastos DA, Bogotá Gregory J, Cabeceira FG, Canto ALC, Carvajal-Vallejos FM, Carvalho LN, Cella-Ribeiro A, Covain R, Donascimiento C, Dória CRC, Duarte C, Ferreira EJG, Galuch AV, Giarrizzo T, Leitão RP, Lundberg JG, Maldonado M, Mojica JI, Montag LFA, Ohara WM, Pires THS, Pouilly M, Prada-Pedreros S, de Queiroz LJ, Rapp Py-Daniel L, Ribeiro FRV, Ríos Herrera R, Sarmiento J, Sousa LM, Stegmann LF, Valdiviezo-Rivera J, Villa F, Yunoki T, Oberdorff T",,,5.0,France +33852582,AlnC,0.996692717,AlnC,0.996692717,Catalogue,0.617510498,1,http://www.nipgr.ac.in/AlnC,301,,"(28.6453,77.2128)",no_wayback,2021-04-14,"Bioinformatics Lab, National Institute of Plant Genome Research (NIPGR), Aruna Asaf Ali Marg, New Delhi, India.","Singh A, Vivek AT, Kumar S",,,1.0,India +34316271,ALTB,0.855050087,ALTB,0.855050087,,0,1,"http://altaiflora.asu.ru/en/, http://altb.asu.ru","200, 200",,"(53.3600,83.7600), (53.3600,83.7600)","http://web.archive.org/web/20221004221431/http://altaiflora.asu.ru/en/, http://web.archive.org/web/20221007014405/http://altb.asu.ru/",2021-07-13,"Altai State University, Barnaul, Russia Altai State University Barnaul Russia.","Vaganov AV, Shmakov AI, Smirnov SV, Usik NA, Shibanova AA, Kechaykin AA, Kosachev PA, Kopytina TM, Zholnerova EA, Medvedeva KE, Zaikov VF, Sinitsyna TA, Shalimov AP, Antonyuk EV, Gudkova PD, Dmitriev DA, Batkin AA, Kasatkin DE, Belkin DL",,,1.0, +34335304,Amadis,0.995023131,Amadis,0.995023131,,0,1,http://gift2disease.net/GIFTED,302,,"(39.9075,116.3972)",http://web.archive.org/web/20220812210329/http://www.gift2disease.net/GIFTED/,2021-07-14,"Department of General Surgery, The First Affiliated Hospital of Harbin Medical University, Harbin, China.","Li L, Jing Q, Yan S, Liu X, Sun Y, Zhu D, Wang D, Hao C, Xue D",,,2.0,China +23317704,AMDD,0.986542583,AMDD,0.986542583,anti microbial drug database,0.958445907,1,http://www.amddatabase.info,302,,,http://web.archive.org/web/20160424014447/http://www.amddatabase.info:80/,2012-11-07,"Interdisciplinary Biotechnology Unit, Aligarh Muslim University, Aligarh 202002, India.","Danishuddin M, Kaushal L, Hassan Baig M, Khan AU",,"Biotechnology Unit, AMU and DBT",4.0,India +26088800,AmyLoad,0.924672663,AmyLoad,0.924672663,,0,1,http://comprec-lin.iiar.pwr.edu.pl/amyload,301,,"(51.1000,17.0333)",http://web.archive.org/web/20161011225651/http://comprec-lin.iiar.pwr.edu.pl:80/amyload/,2015-06-17,"Department of Biomedical Engineering, Wroclaw University of Technology, Wroclaw, Poland.","Wozniak PP, Kotulska M",,,22.0,Poland +26828034,AmphibiaChina,0.995878279,AmphibiaChina,0.995878279,,0,1,http://www.amphibiachina.org,200,,"(39.9075,116.3972)",http://web.archive.org/web/20221103230002/https://www.amphibiachina.org/,2016-01-01,"State Key Laboratory of Genetic Resources and Evolution, Kunming Institute of Zoology, Chinese Academy of Sciences, Kunming, Yunnan 650223, China. chej@mail.kiz.ac.cn.","Che J, Wang K",,,4.0,China +27582018,ANGIOGENES,0.997215807,ANGIOGENES,0.997215807,,0,1,http://angiogenes.uni-frankfurt.de,200,,"(50.1155,8.6842)",http://web.archive.org/web/20220810193205/http://angiogenes.uni-frankfurt.de/,2016-09-01,"Institute of Cardiovascular Regeneration, Centre for Molecular Medicine, Goethe University Frankfurt, Theodor-Stern-Kai 7, Frankfurt am Main 60590, Germany.","Müller R, Weirick T, John D, Militello G, Chen W, Dimmeler S, Uchida S",,,16.0,Germany +29040693,AmyPro,0.99600482,AmyPro,0.99600482,,0,1,http://amypro.net,200,,"(52.3740,4.8897)",http://web.archive.org/web/20221017070002/https://www.amypro.net/,2018-01-01,"Protein Data Bank in Europe, European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Varadi M, De Baets G, Vranken WF, Tompa P, Pancsa R",,Medical Research Council,16.0, +30247677,AmtDB,0.998366475,AmtDB,0.998366475,,0,1,http://amtdb.org,301,,"(50.0408,15.7766)",http://web.archive.org/web/20220726062455/https://amtdb.org/,2019-01-01,"Institute of Molecular Genetics of the ASCR, Vídeňská 1083, 142 20 Prague 4, Czech Republic.","Ehler E, Ehler E, Novotný J, Juras A, Chylenski M, Moravcík O, Paces J",,"Ministry of Education, Ministry of Education, Polish National Science Center",4.0, +30371900,Ancestral Genomes,0.586193487,Ancestral Genomes,0.586193487,,0,1,http://ancestralgenomes.org,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20221005224953/http://www.ancestralgenomes.org/,2019-01-01,"School of Life Sciences, Guangzhou University, Guangzhou 510006, China.","Huang X, Albou LP, Mushayahama T, Muruganujan A, Tang H, Thomas PD",,"National Science Foundation, NHGRI NIH HHS, National Institutes of Health",5.0,China +31094220,AmyCo,0.988337398,AmyCo,0.988337398,amyloidoses collection,0.742020524,1,http://bioinformatics.biol.uoa.gr/amyco,301,,"(37.9757,23.7691)",no_wayback,2019-05-16,"a Section of Cell Biology and Biophysics, Department of Biology, School of Sciences, National and Kapodistrian University of Athens , Panepistimiopolis , Athens , Greece.","Nastou KC, Nasi GI, Tsiolaki PL, Litou ZI, Iconomidou VA",,"European Union and Greek National Funds through the Operational Program ‘Competitiveness, Entrepreneurship and Innovation’",3.0,Greece +31347432,ANDB,0.951109529,ANDB,0.951109529,Areca nut database,0.914340153,1,http://arecanut.icmr.org.in,301,,"(29.9657,76.8370)",no_wayback,2019-07-26,"Division of Molecular Diagnostics, ICMR-National Institute of Cancer Prevention & Research (NICPR), I-7, Sector-39, Noida, Gautam Buddha Nagar, Uttar Pradesh, India.","Thakur N, Sharma AK, Singh H, Mehrotra R",,,0.0,India +32765587,Analysis of Breast Cancer GWAS,0.94699221,ABC-GWAS,0.923012972,Analysis of Breast Cancer GWAS,0.94699221,1,http://education.knoweng.org/abc-gwas,301,,"(40.1106,-88.2073)",http://web.archive.org/web/20221102055941/http://education.knoweng.org/abc-gwas/,2020-07-20,"Department of Physics, University of Illinois at Urbana-Champaign, Urbana, IL, United States.","Manjunath M, Zhang Y, Zhang S, Roy S, Perez-Pinera P, Song JS",,"National Institutes of Health, NCI NIH HHS, NIGMS NIH HHS",0.0,United States +21904438,Antagomir,0.96470964,Antagomir,0.96470964,,0,1,http://bioinfopresidencycollegekolkata.edu.in/antagomirs.html,"HTTPConnectionPool(host='bioinfopresidencycollegekolkata.edu.in', port=80): Max retries exceeded with url: /antagomirs.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20110728111550/http://bioinfopresidencycollegekolkata.edu.in:80/antagomirs.html,2011-08-20,"DBT-Centre for Bioinformatics, Presidency University, Kolkata - 700073.","Ganguli S, Mitra S, Datta A",,,2.0, +27671474,ANTISTAPHYBASE,0.932832658,ANTISTAPHYBASE,0.932832658,,0,1,http://www.antistaphybase.com,"HTTPConnectionPool(host='www.antistaphybase.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180530060019/http://www.antistaphybase.com:80/,2016-09-26,"Nutraceuticals and Functional Proteomics Potential of Biodiversity in Tunisia, University of Tunis ELMANAR, Tunis, Tunisia. zouheirabdelmajid@yahoo.fr.","Zouhir A, Taieb M, Lamine MA, Cherif A, Jridi T, Mahjoubi B, Mbarek S, Fliss I, Nefzi A, Sebei K, Ben Hamida J",,,4.0,"Tunisia, Tunisia" +"27924032, 30395294, 33152079",antiSMASH,0.958897054,antiSMASH,0.958897054,,0,3,http://antismash-db.secondarymetabolites.org,301,,"(55.6759,12.5655)",http://web.archive.org/web/20220617002435/https://antismash-db.secondarymetabolites.org/,2021-01-01,"The Novo Nordisk Foundation Center for Biosustainability, Technical University of Denmark, 2800 Kgs. Lyngby, Denmark., Novo Nordisk Foundation Center for Biosustainability, Technical University of Denmark, Kemitorvet, Building 220, 2800 Kgs. Lyngby, Denmark., The Novo Nordisk Foundation Center for Biosustainability, Technical University of Denmark, Kgs. Lyngby 2800, Denmark.","Blin K, Medema MH, Kottmann R, Lee SY, Weber T, Blin K, Pascal Andreu V, de Los Santos ELC, Del Carratore F, Lee SY, Medema MH, Weber T, Blin K, Shaw S, Kautsar SA, Medema MH, Weber T",", , ","NNF Center for Biosustainability, Novo Nordisk Fonden, Biotechnology and Biological Sciences Research Council, Engineering and Physical Sciences Research Council, Novo Nordisk Fonden, Novo Nordisk Foundation, Dutch Research Council (NWO), Novo Nordisk Fonden, Novo Nordisk Foundation, NNF Center for Biosustainability, Novo Nordisk Fonden, Danish National Research Foundation, Novo Nordisk Foundation, Novo Nordisk Fonden, NNF Center for Biosustainability, NNF Center for Biosustainability, Graduate School for Experimental Plant Sciences, Novo Nordisk Foundation",202.0,"Denmark, Denmark, Denmark, Denmark, Denmark, Denmark" +30639529,Antimicrobial Enzyme Combinations Database,0.657052189,,0,Antimicrobial Enzyme Combinations Database,0.657052189,1,http://www.ceb.uminho.pt/aecd,301,,"(41.5503,-8.4200)",no_wayback,2019-01-09,"CEB - Centre of Biological Engineering, LIBRO - Laboratory of Research in Biofilms Rosário Oliveira, University of Minho, Campus de Gualtar, 4710-057 Braga, Portugal. Electronic address: paulajorge@ceb.uminho.pt.","Jorge P, Alves D, Pereira MO",,European Regional Development Fund,0.0,Portugal +30937442,Animal sncRNA Atlas,0.959473997,ASRA,0.944391489,Animal sncRNA Atlas,0.959473997,1,http://www.ccb.uni-saarland.de/asra,302,,"(49.2326,7.0098)",no_wayback,2019-05-01,"Chair for Clinical Bioinformatics, Saarland University, 66123 Saarbrücken, Germany.","Fehlmann T, Backes C, Pirritano M, Laufer T, Galata V, Kern F, Kahraman M, Gasparoni G, Ludwig N, Lenhof HP, Gregersen HA, Francke R, Meese E, Simon M, Keller A",,"Saarland University, Michael J. Fox Foundation for Parkinson’s Research",6.0,Germany +31584087,Animal-ImputeDB,0.997726774,Animal-ImputeDB,0.997726774,,0,1,http://gong_lab.hzau.edu.cn/Animal_ImputeDB,"HTTPConnectionPool(host='gong_lab.hzau.edu.cn', port=80): Max retries exceeded with url: /Animal_ImputeDB (Caused by ReadTimeoutError(""HTTPConnectionPool(host='gong_lab.hzau.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20221016213031/http://gong_lab.hzau.edu.cn/Animal_ImputeDB,2020-01-01,"Hubei Key Laboratory of Agricultural Bioinformatics, College of Informatics, Huazhong Agricultural University, Wuhan 430070, P. R. China.","Yang W, Yang Y, Zhao C, Yang K, Wang D, Yang J, Niu X, Gong J",,"Huazhong Agricultural University Scientific & Technological Self-innovation Foundation, National Natural Science Foundation of China, Fundamental Research Funds for the Central University",5.0,China +31680137,ANISEED,0.995582819,ANISEED,0.995582819,,0,1,http://www.aniseed.cnrs.fr,200,,"(43.6109,3.8763)",http://web.archive.org/web/20221002131416/http://www.aniseed.cnrs.fr/,2020-01-01,"CRBM, Université de Montpellier, CNRS, Montpellier, France.","Dardaillon J, Dauga D, Simion P, Faure E, Onuma TA, DeBiasse MB, Louis A, Nitta KR, Naville M, Besnardeau L, Reeves W, Wang K, Fagotto M, Guéroult-Bellone M, Fujiwara S, Dumollard R, Veeman M, Volff JN, Roest Crollius H, Douzery E, Ryan JF, Davidson B, Nishida H, Dantec C, Lemaire P",,"MEXT, Agence Nationale de la Recherche, Kato Memorial Research Foundation, JSPS, Agence Nationale de la Recherche, Agence Nationale de la Recherche, Agence Nationale de la Recherche, Institut Français de Bioinformatique, JSPS, JSPS, NICHD NIH HHS, CNRS, Japan Foundation for Applied Enzymology, JSPS, JSPS, JSPS, Inamori Foundation, JSPS, Agence Nationale de la Recherche, National Science Foundation, Sumitomo Foundation, JSPS",13.0,France +32584882,Antimicrobial chemotherapeutics database,0.987474948,ACD,0.987468759,Antimicrobial chemotherapeutics database,0.987474948,1,http://amdr.amu.ac.in/acd,"HTTPConnectionPool(host='amdr.amu.ac.in', port=80): Max retries exceeded with url: /acd (Caused by ReadTimeoutError(""HTTPConnectionPool(host='amdr.amu.ac.in', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20200823014936/http://amdr.amu.ac.in/acd/,2020-06-25,"Medical Microbiology and Molecular Biology Lab, Interdisciplinary Biotechnology Unit, Aligarh Muslim University, Aligarh, India.","Azam MW, Kumar A, Khan AU",,,0.0,India +32986825,Animal-APAdb,0.993082929,Animal-APAdb,0.993082929,,0,1,http://gong_lab.hzau.edu.cn/Animal-APAdb,"HTTPConnectionPool(host='gong_lab.hzau.edu.cn', port=80): Max retries exceeded with url: /Animal-APAdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='gong_lab.hzau.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20201101101153/http://gong_lab.hzau.edu.cn/Animal-APAdb/,2021-01-01,"Hubei Key Laboratory of Agricultural Bioinformatics, College of Informatics, Huazhong Agricultural University, Wuhan 430070, P.R. China.","Jin W, Zhu Q, Yang Y, Yang W, Wang D, Yang J, Niu X, Yu D, Gong J",,"Jiangsu Agricultural Science and Technology Independent Innovation Fund, Fundamental Research Funds for the Central Universities, Huazhong Agricultural University Scientific & Technological Self-innovation Foundation, National Natural Science Foundation of China",4.0,China +33996073,antifungal drug interactions database,0.586464763,,0,antifungal drug interactions database,0.586464763,1,http://antifungalinteractions.org/was,404,,,no_wayback,2021-01-01,"Fungal Infection Trust, PO Box 482, Macclesfield, Cheshire SK10 9AR.","Niazi-Ali S, Atherton GT, Walczak M, Denning DW",,,0.0, +22067098,ApoptoProteomics,0.990143538,ApoptoProteomics,0.990143538,,0,1,http://apoptoproteomics.uio.no,200,,"(59.9127,10.7461)",http://web.archive.org/web/20220619221946/http://apoptoproteomics.uio.no/,2011-11-08,"Biotechnology Centre of Oslo, University of Oslo, 0317 Oslo, Norway.","Arntzen MØ, Thiede B",,,16.0,Norway +25052703,APADB,0.981904626,APADB,0.981904626,,0,1,http://tools.genxpro.net/apadb,"HTTPConnectionPool(host='tools.genxpro.net', port=80): Max retries exceeded with url: /apadb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20210616202153/http://tools.genxpro.net/apadb/,2014-07-22,"Plant Molecular Biology, Molecular BioSciences, University of Frankfurt am Main, Marie-Curie-Street 9, D-60439 Frankfurt, Germany, GenXPro GmbH, Frankfurt Innovation Center Biotechnology, Altenhöferallee 3, D-60438 Frankfurt, Germany, Molecular Bioinformatics Group, Faculty of Computer Science and Mathematics, Cluster of Excellence Frankfurt ""Macromolecular Complexes"", Institute of Computer Science, Robert-Mayer-Strasse 11-15, D-60325 Frankfurt am Main, Germany, Department of Internal Medicine IV; Saarland University Medical Center, Kirrberger Strasse, D-66421 Homburg/Saar, Germany, Experimental Neurology, Department of Neurology, Goethe University Medical School, Heinrich, Hoffmann Strasse 7, D-60528 Frankfurt am Main, Germany, Institute for Ecology, Evolution and Diversity, Aquatic Ecotoxicology, University of Frankfurt am Main, Max-von-Laue-Str. 13, D-60438 Frankfurt, Germany and Department of Pediatrics, University Hospital Schleswig-Holstein, Schwanenweg 20, D-24105 Kiel, GermanyPlant Molecular Biology, Molecular BioSciences, University of Frankfurt am Main, Marie-Curie-Street 9, D-60439 Frankfurt, Germany, GenXPro GmbH, Frankfurt Innovation Center Biotechnology, Altenhöferallee 3, D-60438 Frankfurt, Germany, Molecular Bioinformatics Group, Faculty of Computer Science and Mathematics, Cluster of Excellence Frankfurt ""Macromolecular Complexes"", Institute of Computer Science, Robert-Mayer-Strasse 11-15, D-60325 Frankfurt am Main, Germany, Department of Internal Medicine IV; Saarland University Medical Center, Kirrberger Strasse, D-66421 Homburg/Saar, Germany, Experimental Neurology, Department of Neurology, Goethe University Medical School, Heinrich, Hoffmann Strasse 7, D-60528 Frankfurt am Main, Germany, Institute for Ecology, Evolution and Diversity, Aquatic Ecotoxicology, University of Frankfurt am Main, Max-von-Laue-Str. 13, D-60438 Frankfurt, Germany and Department of Pediatrics, University Hospital Schleswig-Holstein, Schwanenweg 20, D-24105 Kiel, Germany","Müller S, Rycak L, Afonso-Grunz F, Winter P, Zawada AM, Damrath E, Scheider J, Schmäh J, Koch I, Kahl G, Rotter B",,,39.0,"Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany" +25378337,APASdb,0.995926738,APASdb,0.995926738,,0,1,http://mosas.sysu.edu.cn/utr,"HTTPConnectionPool(host='mosas.sysu.edu.cn', port=80): Max retries exceeded with url: /utr (Caused by ConnectTimeoutError(, 'Connection to mosas.sysu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160222180412/http://mosas.sysu.edu.cn:80/utr/,2014-11-06,"State Key Laboratory of Biocontrol, Guangdong Province Key Laboratory of Pharmaceutical Functional Genes, School of Life Sciences, Sun Yat-Sen University, Higher Education Mega Center, Guangzhou 510006, People's Republic of China School of Basic Medical Sciences, Beijing University of Chinese Medicine, Beijing 100029, People's Republic of China.","You L, Wu J, Feng Y, Fu Y, Guo Y, Long L, Zhang H, Luan Y, Tian P, Chen L, Huang G, Huang S, Li Y, Li J, Chen C, Zhang Y, Chen S, Xu A",,,40.0,"China, China" +26861916,ApoCanD,0.977928281,ApoCanD,0.977928281,,0,1,http://crdd.osdd.net/raghava/apocand,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/apocand (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220523103042/http://crdd.osdd.net/raghava/apocand/,2016-02-10,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh-160036, India.","Kumar R, Raghava GP",,,5.0,India +28413782,APMicroDB,0.996813858,APMicroDB,0.996813858,microsatellite repeat database of the pea aphid,0.969031361,1,http://deepaklab.com/aphidmicrodb,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20210925071300/http://deepaklab.com/aphidmicrodb/,2017-03-30,"Institute of Microbial Technology, Sector 39-A, Chandigarh 160036, India.","Bishnoi R, Singla D",,,0.0,India +28784999,AOD,0.994940579,AOD,0.994940579,Antioxidant Database,0.930749983,1,http://lin.uestc.edu.cn/AODdatabase/index.aspx,"HTTPConnectionPool(host='lin.uestc.edu.cn', port=80): Max retries exceeded with url: /AODdatabase/index.aspx (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2017-08-07,"Hebei Province Key Laboratory of Occupational Health and Safety for Coal Industry, School of Public Health, North China University of Science and Technology, Tangshan, 063000, China.","Feng P, Ding H, Lin H, Chen W",,,22.0,"China, China" +30715274,APID,0.780257463,APID,0.780257463,,0,1,http://apid.dep.usal.es,200,,"(40.9688,-5.6639)",http://web.archive.org/web/20220122000317/http://apid.dep.usal.es/,2019-01-01,"Cancer Research Center (CiC-IBMCC, CSIC/USAL/IBSAL), Consejo Superior de Investigaciones Científicas and University of Salamanca, Salamanca, Spain.","Alonso-López D, Campos-Laborie FJ, Gutiérrez MA, Lambourne L, Calderwood MA, Vidal M, De Las Rivas J",,"Instituto de Salud Carlos III, European Project H2020, Instituto de Salud Carlos III, European Project H2020, Federación Española de Enfermedades Raras",29.0,Spain +31586392,APAatlas,0.997015476,APAatlas,0.997015476,,0,1,http://hanlab.uth.edu/apa,404,,,http://web.archive.org/web/20220329164651/https://hanlab.uth.edu/apa/,2020-01-01,"Department of Biochemistry and Molecular Biology, McGovern Medical School at The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Hong W, Ruan H, Zhang Z, Ye Y, Liu Y, Li S, Jing Y, Zhang H, Diao L, Liang H, Han L",,"National Institutes of Health, National Institutes of Health, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, National Institutes of Health, Cancer Prevention & Research Institute of Texas",13.0,United States +31695717,AppleMDO,0.992001891,AppleMDO,0.992001891,,0,1,http://bioinformatics.cau.edu.cn/AppleMDO,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220618084641/http://bioinformatics.cau.edu.cn/AppleMDO/,2019-10-22,"State Key Laboratory of Plant Physiology and Biochemistry, College of Biological Sciences, China Agricultural University, Beijing, China.","Da L, Liu Y, Yang J, Tian T, She J, Ma X, Xu W, Su Z",,National Natural Science Foundation of China,10.0,"China, China" +31978081,AOE,0.969555974,AOE,0.969555974,,0,1,http://aoe.dbcls.jp,200,,"(37.3394,-121.8950)",http://web.archive.org/web/20220626120058/https://aoe.dbcls.jp/,2020-01-24,"Database Center for Life Science (DBCLS), Joint Support-Center for Data Science Research, Research Organization of Information and Systems, Mishima,Japan.",Bono H,,Japan Science and Technology Agency,8.0,Japan +22345505,Arabidopsis Network Analysis Pipeline,0.910027817,ANAP,0.893375516,Arabidopsis Network Analysis Pipeline,0.910027817,1,http://gmdd.shgmo.org/Computational-Biology/ANAP,"HTTPConnectionPool(host='gmdd.shgmo.org', port=80): Max retries exceeded with url: /Computational-Biology/ANAP (Caused by ConnectTimeoutError(, 'Connection to gmdd.shgmo.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160403072450/http://gmdd.shgmo.org:80/Computational-Biology/ANAP/,2012-02-16,"State Key Laboratory of Hybrid Rice, School of Life Sciences and Biotechnology, Bio-X Center, Shanghai Jiao Tong University, Shanghai 200240, China.","Wang C, Marshall A, Zhang D, Wilson ZA",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",21.0,China +22760305,AraPath,0.983345628,AraPath,0.983345628,,0,1,http://bioinformatics.sdstate.edu/arapath,404,,,http://web.archive.org/web/20150901222514/http://bioinformatics.sdstate.edu:80/arapath/,2012-07-03,"Department of Mathematics and Statistics, South Dakota State University, Brookings, SD 57007, USA.","Lai L, Liberzon A, Hennessey J, Jiang G, Qi J, Mesirov JP, Ge SX",,"NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",16.0,United States +24272250,Arabidopsis Genome Encyclopedia II,0.959254963,RARGE II,0.89071492,Arabidopsis Genome Encyclopedia II,0.959254963,1,http://rarge-v2.psc.riken.jp,200,,,http://web.archive.org/web/20221016200631/http://rarge-v2.psc.riken.jp/,2013-11-21,"RIKEN Center for Sustainable Resource Science, Yokohama, Kanagawa, 230-0045 Japan.","Akiyama K, Kurotani A, Iida K, Kuromori T, Shinozaki K, Sakurai T",,,13.0,Japan +25355510,AraNet,0.987630188,AraNet,0.987630188,,0,1,http://www.inetbio.org/aranet,301,,"(37.5598,126.9439)",http://web.archive.org/web/20220818200014/https://www.inetbio.org/aranet/,2014-10-29,"Department of Biotechnology, College of Life Science and Biotechnology, Yonsei University, Seoul, Korea.","Lee T, Yang S, Kim E, Ko Y, Hwang S, Shin J, Shim JE, Shim H, Kim H, Kim C, Lee I",,,62.0, +25414324,Araport,0.875419378,Araport,0.875419378,,0,1,http://www.araport.org,301,,"(43.6684,-79.3689)",http://web.archive.org/web/20221022214625/https://www.araport.org/,2014-11-20,"Plant Genomics, J. Craig Venter Institute, Rockville, MD 20850, USA vkrishna@jcvi.org.","Krishnakumar V, Hanlon MR, Contrino S, Ferlanti ES, Karamycheva S, Kim M, Rosen BD, Cheng CY, Moreira W, Mock SA, Stubbs J, Sullivan JM, Krampis K, Miller JR, Micklem G, Vaughn M, Town CD",,Biotechnology and Biological Sciences Research Council,98.0,United States +25487439,ARALIP,0.893481195,ARALIP,0.893481195,,0,1,http://aralip.plantbiology.msu.edu,302,,"(42.7370,-84.4839)",http://web.archive.org/web/20111225050123/http://aralip.plantbiology.msu.edu:80/,2014-12-10,"Department of Plant Biology, Michigan State University, East Lansing, MI, 48824, USA.","McGlew K, Shaw V, Zhang M, Kim RJ, Yang W, Shorrosh B, Suh MC, Ohlrogge J",,,14.0,United States +27995664,AraQTL,0.994779587,AraQTL,0.994779587,,0,1,http://www.bioinformatics.nl/Ara,302,,"(51.9700,5.6667)",no_wayback,2017-02-13,"Wageningen Seed Lab, Laboratory of Plant Physiology, Wageningen University, Droevendaalsesteeg 1, Wageningen, NL-6708 PB, The Netherlands.","Nijveen H, Ligterink W, Keurentjes JJ, Loudet O, Long J, Sterken MG, Prins P, Hilhorst HW, de Ridder D, Kammenga JE, Snoek BL",,Dutch Research Council (NWO),8.0,Netherlands +28095775,ARA-PEPs,0.987573981,ARA-PEPs,0.987573981,,0,1,http://www.biw.kuleuven.be/CSB/ARA-PEPs,301,,,http://web.archive.org/web/20181214164051/https://www.biw.kuleuven.be/CSB/ARA-PEPs/,2017-01-17,"KU Leuven, Centre of Microbial and Plant Genetics, Kasteelpark Arenberg 20, Leuven, B-3001, Belgium.","Hazarika RR, De Coninck B, Yamamoto LR, Martin LR, Cammue BP, van Noort V",,"Fonds Wetenschappelijk Onderzoek (BE), Onderzoeksraad, KU Leuven (BE), Vlaams Instituut voor Biotechnologie",12.0,Belgium +29059333,AraGWAS,0.865443408,AraGWAS,0.865443408,,0,1,http://aragwas.1001genomes.org,302,,"(48.2167,16.3500)",http://web.archive.org/web/20221016200523/https://aragwas.1001genomes.org/,2018-01-01,"Machine Learning and Computational Biology Lab, Department of Biosystems Science and Engineering, ETH Zürich, 4058 Basel, Switzerland.","Togninalli M, Seren Ü, Meng D, Fitz J, Nordborg M, Weigel D, Borgwardt K, Korte A, Grimm DG",,,23.0,"Switzerland, Ethiopia" +29069336,ArachnoServer,0.996285319,ArachnoServer,0.996285319,,0,1,http://arachnoserver.org,"HTTPConnectionPool(host='arachnoserver.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))",,,http://web.archive.org/web/20210927035730/http://www.arachnoserver.org/,2018-03-01,Institute for Molecular Bioscience.,"Pineda SS, Chaumeil PA, Kunert A, Kaas Q, Thang MWC, Le L, Nuhn M, Herzig V, Saez NJ, Cristofori-Armstrong B, Anangi R, Senff S, Gorse D, King GF",,,36.0, +24265221,ArchDB,0.988814056,ArchDB,0.988814056,,0,1,http://sbi.imim.es/archdb,301,,"(41.3888,2.1590)",http://web.archive.org/web/20221019195828/http://sbi.imim.es/archdb/,2013-11-21,"Structural Bioinformatics Lab (GRIB-IMIM), Universitat Pompeu Fabra, Barcelona Research Park of Biomedicine (PRBB), Barcelona, Catalonia, 08950, Spain and Institute of Biological, Environmental and Rural Sciences, Aberystwyth University, SY23 3DA Aberystwyth, Ceredigion, UK.","Bonet J, Planas-Iglesias J, Garcia-Garcia J, Marín-López MA, Fernandez-Fuentes N, Oliva B",,Biotechnology and Biological Sciences Research Council,14.0,Spain +25428357,arrayMap,0.994105458,arrayMap,0.994105458,,0,1,http://www.arraymap.org,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20180730082037/http://arraymap.org/,2014-11-26,"Institute of Molecular Life Sciences, University of Zurich, 8057 Zurich, Switzerland Swiss Institute of Bioinformatics, 8057 Zurich, Switzerland Center of Growth, Metabolism, and Aging, Key Laboratory of Bio-Resources and Eco-Environment, College of Life Sciences, Sichuan University, Chengdu 610064, Sichuan, China haoyang.cai@gmail.com.","Cai H, Gupta S, Rath P, Ai N, Baudis M",,,12.0,"Switzerland, Switzerland, China" +25468931,AromaDeg,0.997393668,AromaDeg,0.997393668,,0,1,http://aromadeg.siona.helmholtz-hzi.de,200,,"(52.2258,10.5271)",http://web.archive.org/web/20221007182737/http://aromadeg.siona.helmholtz-hzi.de/,2014-12-01,"Microbial Interactions and Processes Research Group, HZI-Helmholtz Centre for Infection Research, Inhoffenstr. 7, D-38124 Braunschweig, Germany, Research Group Microbial Ecology, Metabolism, Genomics and Evolution of Communities of Environmental Microorganisms, CorpoGen. Carrera 5 No. 66A-35, Bogotá, Colombia and Faculty of Basic and Applied Sciences, Universidad Militar Nueva Granada-UMNG, Campus Cajicá, Bogotá DC, Colombia.","Duarte M, Jauregui R, Vilchez-Vargas R, Junca H, Pieper DH",,,21.0,"Colombia, Colombia, Germany" +26602692,AREsite2,0.991525769,AREsite2,0.991525769,,0,1,http://rna.tbi.univie.ac.at/AREsite,307,,"(48.2085,16.3721)",http://web.archive.org/web/20170910103459/http://rna.tbi.univie.ac.at/AREsite/,2015-11-23,"Institute for Theoretical Chemistry, University of Vienna, Währingerstraße 17/3, A-1090 Vienna, Austria.","Fallmann J, Sedlyarov V, Tanzer A, Kovarik P, Hofacker IL",,,39.0,Austria +27242037,ArthropodaCyc,0.994480968,ArthropodaCyc,0.994480968,,0,1,http://arthropodacyc.cycadsys.org,200,,"(48.8534,2.3488)",http://web.archive.org/web/20221017001638/https://arthropodacyc.cycadsys.org/,2016-05-30,"Univ Lyon, INSA-Lyon, INRA, BF2I, UMR0203, F-69621, Villeurbanne, France.","Baa-Puyoulet P, Parisot N, Febvay G, Huerta-Cepas J, Vellozo AF, Gabaldón T, Calevro F, Charles H, Colella S",,,5.0,France +29077946,ARED-Plus,0.983083916,ARED-Plus,0.983083916,AU-Rich Element Database,0.900331807,1,http://brp.kfshrc.edu.sa/ared,302,,"(24.6877,46.7219)",http://web.archive.org/web/20221102060635/https://brp.kfshrc.edu.sa/ared,2018-01-01,"Molecular BioMedicine Program, Research Centre, King Faisal Specialist Hospital and Research Centre, Riyadh 11211, Saudi Arabia.","Bakheet T, Hitti E, Khabar KSA",,,27.0,Saudi Arabia +30150996,AromaDb,0.996747673,AromaDb,0.996747673,,0,1,http://bioinfo.cimap.res.in/aromadb,301,,"(25.3986,81.8418)",http://web.archive.org/web/20220202145655/http://bioinfo.cimap.res.in/aromadb/,2018-08-13,"Department of Metabolic and Structural Biology, CSIR-Central Institute of Medicinal and Aromatic Plants, Lucknow, India.","Kumar Y, Prakash O, Tripathi H, Tandon S, Gupta MM, Rahman LU, Lal RK, Semwal M, Darokar MP, Khan F",,"Central Institute of Medicinal and Aromatic Plants, Central Institute of Medicinal and Aromatic Plants, Central Institute of Medicinal and Aromatic Plants, Central Institute of Medicinal and Aromatic Plants",6.0,India +32449934,ASAP,0.72770232,ASAP,0.72770232,Automated Single-cell Analysis Portal,0.707243107,1,http://asap.epfl.ch,301,,"(46.5290,6.5626)",http://web.archive.org/web/20221016220758/https://asap.epfl.ch/,2020-07-01,"Institute of Bioengineering, School of Life Sciences, École Polytechnique Fédérale de Lausanne (EPFL), CH-1015 Lausanne, Switzerland.","David FPA, Litovchenko M, Deplancke B, Gardeux V",,"Swiss National Science Foundation, Chan Zuckerberg Initiative, EPFL, Swiss National Science Foundation, Precision Health & related Technologies, Swiss National Science Foundation",7.0,Switzerland +32507889,articles.ELM,0.941570143,articles.ELM,0.941570143,,0,1,http://slim.icr.ac.uk/articles,301,,"(51.5085,-0.1257)",no_wayback,2020-01-01,"Departamento de Ciencia y Tecnología, Universidad Nacional de Quilmes, CONICET, Roque Saenz Peña 352, Bernal, Buenos Aires B1876BXD, Argentina.","Palopoli N, Iserte JA, Chemes LB, Marino-Buslje C, Parisi G, Gibson TJ, Davey NE",,"Agencia Nacional de Promoción Científica y Tecnológica, Cancer Research UK, Consejo Nacional de Investigaciones Científicas y Técnicas, Agencia Nacional de Promoción Científica y Tecnológica",1.0,Argentina +34738791,AroCageDB,0.994719088,AroCageDB,0.994719088,Aromatic Cage Database,0.872022057,1,http://www.pharmbioinf.uni-freiburg.de/arocagedb,301,,"(47.9959,7.8522)",no_wayback,2021-11-05,"Institute of Pharmaceutical Sciences, Faculty of Chemistry and Pharmacy, Albert-Ludwigs-Universität Freiburg, Hermann-Herder-Straße 9, D-79104 Freiburg, Germany.","Li J, Moumbock AFA, Qaseem A, Xu Q, Feng Y, Wang D, Günther S",,"Baden-W??rttemberg Stiftung, Deutsche Forschungsgemeinschaft, Deutscher Akademischer Austauschdienst",0.0,Germany +"22080559, 24194595",AspGD,0.997273564,AspGD,0.997273564,Aspergillus Genome Database,0.978452827,2,http://www.aspgd.org,"HTTPConnectionPool(host='www.aspgd.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.aspgd.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20211206203550/http://aspgd.org/,2013-11-04,"Department of Genetics, Stanford University Medical School, Stanford, CA 94305-5120, USA. arnaudm@stanford.edu, Broad Institute of Harvard and MIT, 7 Cambridge Center, Cambridge, MA 02141, USA Department of Genetics, Stanford University Medical School, Stanford, CA 94305-5120, USA and Institute for Genome Sciences, University of Maryland School of Medicine, Baltimore, MD 21201, USA.","Arnaud MB, Cerqueira GC, Inglis DO, Skrzypek MS, Binkley J, Chibucos MC, Crabtree J, Howarth C, Orvis J, Shah P, Wymore F, Binkley G, Miyasato SR, Simison M, Sherlock G, Wortman JR, Cerqueira GC, Arnaud MB, Inglis DO, Skrzypek MS, Binkley G, Simison M, Miyasato SR, Binkley J, Orvis J, Shah P, Wymore F, Sherlock G, Wortman JR",", ","NIAID NIH HHS, NIAID NIH HHS",230.0,"United States, United States, United States, United States" +24475134,ASDCD,0.993572259,ASDCD,0.993572259,Antifungal Synergistic Drug Combination Database,0.987889366,1,http://ASDCD.amss.ac.cn,"HTTPConnectionPool(host='asdcd.amss.ac.cn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to asdcd.amss.ac.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220501202409/http://asdcd.amss.ac.cn/,2014-01-24,"National Centre for Mathematics and Interdisciplinary Sciences, Chinese Academy of Sciences, Beijing, P. R. China ; Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Beijing, P. R. China.","Chen X, Ren B, Chen M, Liu MX, Ren W, Wang QX, Zhang LX, Yan GY",,,30.0,"China, China" +27193158,ASL-LEX,0.995315278,ASL-LEX,0.995315278,,0,1,http://asl-lex.org,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20221101045703/https://asl-lex.org/,2017-04-01,"Programs in Deaf Studies, Boston University, Boston, MA, USA. nkc@bu.edu.","Caselli NK, Sehyr ZS, Cohen-Goldberg AM, Emmorey K",,"NIDCD NIH HHS, National Institutes of Health, NIDCD NIH HHS, Tufts University, Tufts University",28.0,United States +29106599,ASpedia,0.996890545,ASpedia,0.996890545,,0,1,http://combio.snu.ac.kr/aspedia,"HTTPConnectionPool(host='combio.snu.ac.kr', port=80): Max retries exceeded with url: /aspedia (Caused by ReadTimeoutError(""HTTPConnectionPool(host='combio.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220317144056/http://combio.snu.ac.kr/aspedia/,2018-01-01,"Research Institute, National Cancer Center, 323 Ilsan-ro, Goyang-si, Kyeonggi-do 10408, Republic of Korea.","Hyung D, Kim J, Cho SY, Park C",,,13.0, +29321052,ASGDB,0.969647527,ASGDB,0.969647527,sinensis genome database,0.934053496,1,http://www.asgdb.org,301,,"(35.6895,139.6917)",http://web.archive.org/web/20220524234153/http://asgdb.org/,2018-01-10,"Department of Pathogen Biology, Nanjing Medical University, Nanjing, Jiangsu, 210029, People's Republic of China.","Zhou D, Xu Y, Zhang C, Hu MX, Huang Y, Sun Y, Ma L, Shen B, Zhu CL",,"National Natural Science Foundation of China, Priority Academic Program Development of Jiangsu Higher Education Institutions",0.0,China +31036810,ASNR,0.994962871,ASNR,0.994962871,Animal Social Network Repository,0.941487324,1,http://bansallab.github.io/asnr,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20220216052257/https://bansallab.github.io/asnr/,2019-04-29,"Department of Biology, Georgetown University, Washington, DC, USA.","Sah P, Méndez JD, Bansal S",,National Science Foundation,5.0,United States +"31665428, 31707700",ASD,0.992668907,ASD,0.992668907,The Allosteric Database,0.877874815,2,http://mdl.shsmu.edu.cn/ASD,302,,"(31.2222,121.4581)",http://web.archive.org/web/20220527163720/http://mdl.shsmu.edu.cn/ASD/,2020-01-01,"State Key Laboratory of Oncogenes and Related Genes, Renji Hospital, Shanghai Jiao Tong University School of Medicine, Shanghai 200127, China., Department of Pathophysiology, Key Laboratory of Cell Differentiation and Apoptosis of Chinese Ministry of Education, Shanghai Jiao Tong University, School of Medicine, Shanghai, China.","Liu X, Lu S, Song K, Shen Q, Ni D, Li Q, He X, Zhang H, Wang Q, Chen Y, Li X, Wu J, Sheng C, Chen G, Liu Y, Lu X, Zhang J, Song K, Zhang J, Lu S",", ","Shanghai Sailing Program, Chinese National Precise Medical Research key project, Shanghai Health and Family Planning Commission, Shanghai Municipal Education Commission, National Natural Science Foundation of China, Natural Science Foundation of Shanghai Municipal Commission of Health and Family Planning, Shanghai Health and Family Planning Commission, National Natural Science Foundation of China, National Natural Science Foundation of China, Shanghai Natural Science Foundation, Shanghai Science and Technology Innovation, National Natural Science Foundation of China, ",11.0,"China, China" +31843802,ASRD,0.982676625,ASRD,0.982676625,Arabidopsis Small RNA Database,0.980507145,1,http://ipf.sustech.edu.cn/pub/asrd,301,,"(22.5455,114.0683)",http://web.archive.org/web/20220617065305/http://ipf.sustech.edu.cn/pub/asrd,2019-12-16,"Harbin Institute of Technology, Harbin, Heilongjiang 150001, China.","Feng L, Zhang F, Zhang H, Zhao Y, Meyers BC, Zhai J",,"National Key R&D Program of China Grant, Program for Guangdong Introducing Innovative and Entrepreneurial Teams, National Natural Science Foundation of China, Shenzhen Sci-Tech Fund",2.0,China +32294195,ASFVdb,0.996319771,ASFVdb,0.996319771,African swine fever virus database,0.897414913,1,http://asfvdb.popgenetics.net,200,,"(29.5603,106.5577)",http://web.archive.org/web/20220202024922/http://asfvdb.popgenetics.net/,2020-01-01,"School of Life Sciences, Chongqing University, Chongqing, China.","Zhu Z, Meng G",,"National Natural Science Foundation of China, Central Universities in China, National Key Research and Development Program",5.0,China +34839012,ASER,0.987263083,ASER,0.987263083,Animal Sex Reversal Database,0.97150902,1,http://aser.ihb.ac.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220501154103/http://aser.ihb.ac.cn/,2021-11-25,"State Key Laboratory of Virology, College of Life Sciences, Wuhan University, Wuhan 430072, China.","Li Y, Chen Z, Liu H, Li Q, Lin X, Ji S, Li R, Li S, Fan W, Zhao H, Zhu Z, Hu W, Zhou Y, Luo D",,"Hubei Province Natural Science Foundation, National Natural Science Foundation of China",1.0,China +22800758,AthaMap,0.989823103,AthaMap,0.989823103,,0,1,http://www.athamap.de,200,,"(52.2680,10.5200)",http://web.archive.org/web/20220527151753/http://www.athamap.de/,2012-07-16,"Institut für Genetik, Technische Universität Braunschweig, Spielmannstr, 38106, Braunschweig, Germany. r.hehl@tu-braunschweig.de.","Bülow L, Bolívar JC, Ruhe J, Brill Y, Hehl R",,,8.0,Germany +22876890,AtlasT4SS,0.984139264,AtlasT4SS,0.984139264,,0,1,http://www.t4ss.lncc.br,200,,"(-22.5050,-43.1786)",http://web.archive.org/web/20220615180939/http://www.t4ss.lncc.br/,2012-08-09,"The National Laboratory for Scientific Computing LNCC, Getúlio Vargas, Petrópolis, Rio de Janeiro, Brazil.","Souza RC, del Rosario Quispe Saji G, Costa MO, Netto DS, Lima NC, Klein CC, Vasconcelos AT, Nicolás MF",,,26.0,Brazil +23904744,ASRDb,0.997044563,ASRDb,0.997044563,Archaeal Stress Response Database,0.98826167,1,http://121.241.218.70/ASRDb,"HTTPConnectionPool(host='121.241.218.70', port=80): Max retries exceeded with url: /ASRDb (Caused by ConnectTimeoutError(, 'Connection to 121.241.218.70 timed out. (connect timeout=5)'))",,,no_wayback,2013-07-12,"Biomedical Informatics Center, National Institute of Cholera and Enteric Diseases, P-33, C.I.T Road, Scheme-XM, Beliaghata, Kolkata-700010, India.","Labala RK, Das S, Basak S",,,0.0,India +28968841,AtCircDB,0.993186295,AtCircDB,0.993186295,,0,1,http://genome.sdau.edu.cn/circRNA,"HTTPConnectionPool(host='genome.sdau.edu.cn', port=80): Max retries exceeded with url: /circRNA (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))",,,http://web.archive.org/web/20180626224546/http://genome.sdau.edu.cn:80/circRNA/,2019-01-01,"Agricultural Big-Data Research Center, College of Information Science and Engineering, Shandong Agricultural University, Taian, Shandong, China.","Ye J, Wang L, Li S, Zhang Q, Zhang Q, Tang W, Wang K, Song K, Sablok G, Sun X, Zhao H",,National Natural Science Foundation of China,17.0,China +29987736,AT_CHLORO,0.995981574,AT_CHLORO,0.995981574,,0,1,http://at-chloro.prabi.fr/at_chloro,302,,,http://web.archive.org/web/20211017210825/http://at-chloro.prabi.fr/at_chloro/,2018-01-01,"Laboratoire de Physiologie Cellulaire et Végétale, Université Grenoble Alpes, Centre National de la Recherche Scientifique, Institut National de la Recherche Agronomique, Commissariat à l'Energie Atomique et aux Energies Alternatives, Grenoble, France.","Salvi D, Bournais S, Moyet L, Bouchnak I, Kuntz M, Bruley C, Rolland N",,,2.0,France +30239683,ATD,0.981130342,ATD,0.981130342,Autophagy To Disease,0.913275957,1,http://auto2disease.nwsuaflmz.com,406,,,http://web.archive.org/web/20221017041119/https://auto2disease.nwsuaflmz.com/,2018-01-01,"College of Life Sciences, Northwest A&F University, Yangling, Shaanxi, China.","Wang W, Zhang P, Li L, Chen Z, Bai W, Liu G, Zhang L, Jia H, Li L, Yu Y, Liao M",,"National Natural Science Foundation of China, Natural Science Foundation of Shaanxi Province",2.0,China +30624648,AtFusionDB,0.996401966,AtFusionDB,0.996401966,,0,1,http://www.nipgr.res.in/AtFusionDB,"HTTPConnectionPool(host='www.nipgr.res.in', port=80): Max retries exceeded with url: /AtFusionDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))",,,http://web.archive.org/web/20190109233448/http://www.nipgr.res.in:80/AtFusionDB/,2019-01-01,"Bioinformatics Laboratory, National Institute of Plant Genome Research Aruna Asaf Ali Marg, New Delhi, India.","Singh A, Zahra S, Das D, Kumar S",,"National Institute of Plant Genome Research, India",2.0,India +31535335,Atacama,0.921638429,Atacama,0.921638429,,0,1,http://www.atacamadb.cl,301,,,http://web.archive.org/web/20221025103310/https://www.atacamadb.cl/,2019-09-18,"Centre for Biotechnology and Bioengineering (CeBiB), Department of Chemical Engineering, Biotechnology and Materials, University of Chile, Beauchef 851, 8370456, Santiago, Chile. cacontad@ing.uchile.cl.","Contador CA, Veas-Castillo L, Tapia E, Antipán M, Miranda N, Ruiz-Tagle B, García-Araya J, Andrews BA, Marin M, Dorador C, Asenjo JA",,Comisión Nacional de Investigación Científica y Tecnológica,3.0,"Chile, Chile" +32681639,ATdb,0.992148757,ATdb,0.992148757,Autophagy and Tumor Database,0.977843061,1,http://www.bigzju.com/ATdb,302,,"(30.2936,120.1614)",http://web.archive.org/web/20220621152734/http://www.bigzju.com/ATdb/,2020-01-01,"Department of Toxicology of School of Public Health, and Department of Gynecologic Oncology of Women's Hospital, Zhejiang University School of Medicine, Hangzhou 310058, China.","Chen K, Yang D, Zhao F, Wang S, Ye Y, Sun W, Lu H, Ruan Z, Xu J, Wang T, Lu G, Wang L, Shi Y, Zhang H, Wu H, Lu W, Shen HM, Xia D, Wu Y",,"National Natural Science Foundation of China, Zhejiang Provincial Natural Science Foundation of China, Fundamental Research Funds for the Central Universities, National Natural Science Foundation of China, WeiJian Special Foundation, Zhejiang University School of Public Health",6.0,China +33125076,ATACdb,0.997429222,ATACdb,0.997429222,chromatin accessibility database,0.808434457,1,http://www.licpathway.net/ATACdb,301,,,http://web.archive.org/web/20221102055609/http://www.licpathway.net/ATACdb/,2021-01-01,"School of Medical Informatics, Daqing Campus, Harbin Medical University, Daqing 163319, China.","Wang F, Bai X, Wang Y, Jiang Y, Ai B, Zhang Y, Liu Y, Xu M, Wang Q, Han X, Pan Q, Li Y, Li X, Zhang J, Zhao J, Zhang G, Feng C, Zhu J, Li C",,"National Natural Science Foundation of China, Natural Science Foundation, National Natural Science Foundation of China",3.0,China +"21217125, 24334350, 26546318, 29216398",ATTED-II,0.954756707,ATTED-II,0.954756707,,0,4,http://atted.jp,301,,"(38.2570,140.8523)",no_wayback,2018-01-01,"Graduate School of Information Science, Tohoku University, 6-3-09, Aramaki-Aza-Aoba, Aoba-ku, Sendai, 980-8679 Japan. takeshi.obayashi@atted.jp, Graduate School of Information Sciences, Tohoku University, 6-3-09, Aramaki-Aza-Aoba, Aoba-ku, Sendai, 980-8679 Japan., Graduate School of Information Sciences, Tohoku University, 6-3-09, Aramaki-Aza-Aoba, Aoba-ku, Sendai, 980-8679 Japan Core Research for Evolutional Science and Technology (CREST), Japan Science and Technology Agency, Kawaguchi, Saitama, Japan., Graduate School of Information Sciences, Tohoku University, 6-3-09, Aramaki-Aza-Aoba, Aoba-ku, Sendai, 980-8679 Japan.","Obayashi T, Nishida K, Kasahara K, Kinoshita K, Obayashi T, Okamura Y, Ito S, Tadaka S, Aoki Y, Shirota M, Kinoshita K, Aoki Y, Okamura Y, Tadaka S, Kinoshita K, Obayashi T, Obayashi T, Aoki Y, Tadaka S, Kagaya Y, Kinoshita K",", , , ",", , , JSPS, Japan Society for the Promotion of Science",304.0,"Japan, Japan, Japan, Japan, Japan, Japan" +"22057158, 26779400",AURA,0.97320962,AURA,0.97320962,Atlas of UTR Regulatory Activity,0.889621913,2,http://aura.science.unitn.it,"HTTPConnectionPool(host='aura.science.unitn.it', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to aura.science.unitn.it timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220417211458/http://aura.science.unitn.it/,2014-01-29,"Laboratory of Translational Genomics - Centre for Integrative Biology, University of Trento, Via delle Regole, 101, 38123 Mattarello (TN), Italy., Laboratory of Translational Genomics; Centre for Integrative Biology; University of Trento; Trento, Italy.","Dassi E, Malossini A, Re A, Mazza T, Tebaldi T, Caputi L, Quattrone A, Dassi E, Re A, Leo S, Tebaldi T, Pasini L, Peroni D, Quattrone A",", ",", ",51.0,"Italy, Italy" +22397531,AtPAN,0.98359412,AtPAN,0.98359412,Arabidopsis thaliana Promoter Analysis Net,0.875704557,1,http://AtPAN.itps.ncku.edu.tw,200,,"(22.9908,120.2133)",http://web.archive.org/web/20220802172834/http://atpan.itps.ncku.edu.tw/,2012-03-08,"Institute of Tropical Plant Sciences, National Cheng Kung University, Tainan 701, Taiwan.","Chen YA, Wen YC, Chang WC",,,9.0, +23774715,Autism Brain Imaging Data Exchange,0.98496213,ABIDE,0.968805671,Autism Brain Imaging Data Exchange,0.98496213,1,http://fcon_1000.projects.nitrc.org/indi/abide,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20220610050551/https://fcon_1000.projects.nitrc.org/indi/abide/,2013-06-18,"Phyllis Green and Randolph Cowen Institute for Pediatric Neuroscience at the NYU Child Study Center, New York University Langone Medical Center, New York, NY, USA.","Di Martino A, Yan CG, Li Q, Denio E, Castellanos FX, Alaerts K, Anderson JS, Assaf M, Bookheimer SY, Dapretto M, Deen B, Delmonte S, Dinstein I, Ertl-Wagner B, Fair DA, Gallagher L, Kennedy DP, Keown CL, Keysers C, Lainhart JE, Lord C, Luna B, Menon V, Minshew NJ, Monk CS, Mueller S, Müller RA, Nebel MB, Nigg JT, O'Hearn K, Pelphrey KA, Peltier SJ, Rudie JD, Sunaert S, Thioux M, Tyszka JM, Uddin LQ, Verhoeven JS, Wenderoth N, Wiggins JL, Mostofsky SH, Milham MP",,"Autism Speaks, Autism Speaks, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIDCD NIH HHS, Autism Speaks, NIMH NIH HHS, NIMH NIH HHS, NICHD NIH HHS, NIMH NIH HHS, Dutch Research Council (NWO), Dutch Research Council (NWO), NIDCD NIH HHS, NIDCD NIH HHS, NICHD NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, Dutch Research Council (NWO), NIMH NIH HHS, NIMH NIH HHS, NICHD NIH HHS, NIMH NIH HHS, NICHD NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NICHD NIH HHS, Autism Speaks, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NICHD NIH HHS, NIMH NIH HHS, NINDS NIH HHS, NINDS NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIDCD NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NICHD NIH HHS, NIMH NIH HHS, NICHD NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIGMS NIH HHS, NICHD NIH HHS, Autism Speaks, Autism Speaks, NIDCD NIH HHS, NICHD NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NINDS NIH HHS, NICHD NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIDCD NIH HHS, NICHD NIH HHS",630.0,United States +25972521,AtmiRNET,0.997909009,AtmiRNET,0.997909009,,0,1,http://AtmiRNET.itps.ncku.edu.tw,200,,"(22.9908,120.2133)",http://web.archive.org/web/20221005195411/http://atmirnet.itps.ncku.edu.tw/,2015-05-13,"College of Biosciences and Biotechnology, Institute of Tropical Plant Sciences, National Cheng Kung University, Tainan 70101, Taiwan.","Chien CH, Chiang-Hsieh YF, Chen YA, Chow CN, Wu NY, Hou PF, Chang WC",,,8.0, +27899679,AtPID,0.997839749,AtPID,0.997839749,Arabidopsis thaliana Protein Interactome Database,0.974847411,1,http://www.megabionet.org/atpid,301,,"(37.3394,-121.8950)",no_wayback,2016-11-28,"Center for Bioinformatics and Computational Biology, and the Institute of Biomedical Sciences, School of Life Sciences, East China Normal University, Shanghai 200241, China.","Lv Q, Lan Y, Shi Y, Wang H, Pan X, Li P, Shi T",,,1.0,"China, China" +29186576,AutDB,0.997044206,AutDB,0.997044206,,0,1,http://autism.mindspec.org/autdb/Welcome.do,"HTTPConnectionPool(host='autism.mindspec.org', port=80): Max retries exceeded with url: /autdb/Welcome.do (Caused by ReadTimeoutError(""HTTPConnectionPool(host='autism.mindspec.org', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220812202928/http://autism.mindspec.org/autdb/Welcome.do,2018-01-01,"MindSpec Inc., 8280 Greensboro Drive, Suite 150, McLean, VA 22102, USA.","Pereanu W, Larsen EC, Das I, Estévez MA, Sarkar AA, Spring-Pearson S, Kollu R, Basu SN, Banerjee-Basu S",,,11.0,United States +29198880,AureoWiki,0.996498525,AureoWiki,0.996498525,,0,1,http://aureowiki.med.uni-greifswald.de,301,,"(54.0931,13.3879)",no_wayback,2017-11-24,"FG13 Nosocomial Pathogens and Antibiotic Resistance, Robert Koch Institute, Wernigerode, Germany; Institute for Microbiology, Ernst-Moritz-Arndt-University Greifswald, Greifswald, Germany.","Fuchs S, Mehlan H, Bernhardt J, Hennig A, Michalik S, Surmann K, Pané-Farré J, Giese A, Weiss S, Backert L, Herbig A, Nieselt K, Hecker M, Völker U, Mäder U",,Deutsche Forschungsgemeinschaft,36.0,"Germany, Germany" +30534948,atSNP,0.982023239,atSNP,0.982023239,,0,1,http://atsnp.biostat.wisc.edu,200,,"(43.0731,-89.4012)",http://web.archive.org/web/20220618182206/http://atsnp.biostat.wisc.edu/,2019-08-01,"Department of Mathematical Sciences, University of Texas at Dallas, Richardson, TX, USA.","Shin S, Hudson R, Harrison C, Craven M, Keleş S",,"NHGRI NIH HHS, NIAID NIH HHS, National Human Genome Research Institute, National Institutes of Health, NHGRI NIH HHS, National Human Genome Research Institute, NHGRI NIH HHS, National Human Genome Research Institute, National Institutes of Health BD2K",9.0,United States +33219693,AtMAD,0.989043355,AtMAD,0.989043355,Arabidopsis thaliana Multi-omics Association Database,0.988037554,1,http://www.megabionet.org/atmad,301,,"(37.3394,-121.8950)",no_wayback,2021-01-01,"Key Laboratory of Saline-alkali Vegetation Ecology Restoration, Ministry of Education, Northeast Forestry University, Harbin, Heilongjiang 150040, China.","Lan Y, Sun R, Ouyang J, Ding W, Kim MJ, Wu J, Li Y, Shi T",,"Beihang University & Capital Medical University Plan, National Natural Science Foundation of China, Shanghai Municipal Science and Technology, Beihang University & Capital Medical University Plan, National Natural Science Foundation of China, National Natural Science Foundation of China",1.0,China +21335611,B2G-FAR,0.91893776,B2G-FAR,0.91893776,Annotation Repository,0.673282564,1,http://www.b2gfar.org,301,,"(50.1025,8.6299)",http://web.archive.org/web/20170921101632/http://b2gfar.org/,2011-02-18,"Bioinformatics and Genomics Department, Centro de Investigaciones Príncipe Felipe (CIPF), Valencia, Spain. sgoetz@cipf.es","Götz S, Arnold R, Sebastián-León P, Martín-Rodríguez S, Tischler P, Jehl MA, Dopazo J, Rattei T, Conesa A",,,83.0,Spain +22139918,AutismKB,0.762071371,AutismKB,0.762071371,,0,1,http://autismkb.cbi.pku.edu.cn,"HTTPConnectionPool(host='autismkb.cbi.pku.edu.cn', port=80): Pool is closed.",,,http://web.archive.org/web/20200105115504/http://autismkb.cbi.pku.edu.cn:80/,2011-12-01,"Center for Bioinformatics, State Key Laboratory of Protein and Plant Gene Research, College of Life Sciences, Peking University, Beijing 100871, PR China.","Xu LM, Li JR, Huang Y, Zhao M, Tang X, Wei L",,,103.0,China +22753780,AutoBind,0.98091805,AutoBind,0.98091805,,0,1,"http://autobind.csie.ncku.edu.tw/, http://autobind.mc.ntu.edu.tw","301, HTTPConnectionPool(host='autobind.mc.ntu.edu.tw', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to autobind.mc.ntu.edu.tw timed out. (connect timeout=5)'))",,"(22.9908,120.2133), ","http://web.archive.org/web/20181107084844/http://autobind.csie.ncku.edu.tw/, no_wayback",2012-07-02,"Department of Electrical Engineering, Department of Computer Science and Information Engineering, National Cheng Kung University, Tainan 70101, Taiwan.","Chang DT, Ke CH, Lin JH, Chiang JH",,,4.0, +24285301,AVPdb,0.996542871,AVPdb,0.996542871,,0,1,http://crdd.osdd.net/servers/avpdb,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /servers/avpdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220122052654/http://crdd.osdd.net/servers/avpdb/,2013-11-26,"Bioinformatics Centre, Institute of Microbial Technology, Council of Scientific and Industrial Research, Sector 39-A, Chandigarh-160036, India.","Qureshi A, Thakur N, Tandon H, Kumar M",,,65.0,India +30215764,AWESOME,0.993797898,AWESOME,0.993797898,Exhibits SNP,0.626161829,1,http://www.awesome-hust.com,200,,"(33.9192,-118.4165)",http://web.archive.org/web/20220616074154/http://www.awesome-hust.com/,2019-01-01,"Key Laboratory for Environment and Health (Ministry of Education), Department of Epidemiology and Biostatistics, School of Public Health, Tongji Medical College, Huazhong University of Sciences and Technology, Wuhan, 430030, China.","Yang Y, Peng X, Ying P, Tian J, Li J, Ke J, Zhu Y, Gong Y, Zou D, Yang N, Wang X, Mei S, Zhong R, Gong J, Chang J, Miao X",,"National Key Research and Development Plan Program, National Natural Science Foundation of China, National Natural Science Foundation of China",19.0,China +30669929,AutophagySMDB,0.995587846,AutophagySMDB,0.995587846,Autophagy Small Molecule Database,0.9487999,1,http://www.autophagysmdb.org,405,,,http://web.archive.org/web/20180104002954/http://autophagysmdb.org/,2019-02-03,"a Department of Molecular Biology , CSIR-Institute of Microbial Technology , Chandigarh , India.","Nanduri R, Kalra R, Bhagyaraj E, Chacko AP, Ahuja N, Tiwari D, Kumar S, Jain M, Parkesh R, Gupta P",,"Department of Biotechnology, Ministry of Science and Technology, National Bioscience Award project<Q4/>, Council of Scientific and Industrial Research (CSIR) 12th Plan Network project Genesis",6.0,India +30893420,AYbRAH,0.996119797,AYbRAH,0.996119797,Analyzing Yeasts by Reconstructing Ancestry of Homologs,0.874202971,1,http://lmse.github.io/aybrah,301,,"(37.7621,-122.3971)",no_wayback,2019-01-01,"Department of Chemical Engineering and Applied Chemistry, University of Toronto, College Street, Toronto, ON, Canada.","Correia K, Yu SM, Mahadevan R",,Natural Sciences and Engineering Research Council of Canada,4.0,Canada +33176685,AVIMM,0.990362942,AVIMM,0.990362942,Avian Immunome DB,0.966759622,1,http://avimm.ab.mpg.de,301,,"(51.5344,9.9323)",http://web.archive.org/web/20220521190634/https://avimm.ab.mpg.de/,2020-11-12,"Department of Migration, Max Planck Institute of Animal Behavior, Am Obstberg, 78315, Radolfzell, Germany. rmueller@ab.mpg.de.","Mueller RC, Mallig N, Smith J, Eöry L, Kuo RI, Kraus RHS",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Ministry of Science, Research and the Arts of the State of Baden-Württemberg, Biotechnology and Biological Sciences Research Council, Projekt DEAL",3.0,Germany +34269889,B3Pdb,0.996475801,B3Pdb,0.996475801,,0,1,http://webs.iiitd.edu.in/raghava/b3pdb,301,,"(28.6453,77.2128)",http://web.archive.org/web/20220616130948/https://webs.iiitd.edu.in/raghava/b3pdb/,2021-07-16,"Department of Computational Biology, Indraprastha Institute of Information Technology, Okhla Industrial Estate, Phase III, New Delhi, 110020, India.","Kumar V, Patiyal S, Kumar R, Sahai S, Kaur D, Lathwal A, Raghava GPS",,,4.0,India +34976872,B-AMP,0.991384625,B-AMP,0.991384625,,0,1,http://b-amp.karishmakaushiklab.com,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20221018112345/https://b-amp.karishmakaushiklab.com/,2021-12-16,"Department of Bioinformatics, Guru Nanak Khalsa College of Arts, Science and Commerce (Autonomous), Mumbai, India.","Mhade S, Panse S, Tendulkar G, Awate R, Narasimhan Y, Kadam S, Yennamalli RM, Kaushik KS",,,0.0,India +22135301,BacMap,0.990752041,BacMap,0.990752041,,0,1,http://bacmap.wishartlab.com,200,,"(53.5501,-113.4687)",http://web.archive.org/web/20220518024459/http://bacmap.wishartlab.com/,2011-12-01,"Department of Computing Science, Food and Nutritional Science, University of Alberta, Edmonton, AB, Canada T6G 2E8.","Cruz J, Liu Y, Liang Y, Zhou Y, Wilson M, Dennis JJ, Stothard P, Van Domselaar G, Wishart DS",,Canadian Institutes of Health Research,9.0,Canada +"24214959, 26424852, 30256983",BacDive,0.997053862,BacDive,0.997053862,Bacterial Diversity Metadatabase,0.693979033,3,http://bacdive.dsmz.de,301,,"(50.9787,11.0328)",http://web.archive.org/web/20221108031257/https://bacdive.dsmz.de/,2019-01-01,"Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Inhoffenstr. 7B, 38124 Braunschweig, Germany., Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Braunschweig, Germany cas11@dsmz.de., Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Braunschweig, Germany.","Söhngen C, Bunk B, Podstawka A, Gleim D, Overmann J, Söhngen C, Podstawka A, Bunk B, Gleim D, Vetcininova A, Reimer LC, Ebeling C, Pendarovski C, Overmann J, Reimer LC, Vetcininova A, Carbasse JS, Söhngen C, Gleim D, Ebeling C, Overmann J",", , ",", , Federal Ministry of Education and Research, Deutsche Forschungsgemeinschaft",90.0,"Germany, Germany, Germany" +24304895,BacMet,0.994012475,BacMet,0.994012475,,0,1,http://bacmet.biomedicine.gu.se)--a,"HTTPConnectionPool(host='bacmet.biomedicine.gu.se)--a', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-12-03,"Department of Infectious Diseases, Institute of Biomedicine, The Sahlgrenska Academy at the University of Gothenburg, Gothenburg, SE-413 46, Sweden, Department of Plant and Environmental Sciences, University of Copenhagen, Copenhagen, DK-1871, Denmark and Department of Mathematical Sciences, Chalmers University of Technology, SE-412 96, Gothenburg, Sweden.","Pal C, Pal C, Bengtsson-Palme J, Rensing C, Kristiansson E, Larsson DG",,,162.0,"Denmark, Sweden, Sweden" +24602877,BambooGDB,0.996197879,BambooGDB,0.996197879,,0,1,"http://www.bamboogdb.org/, http://www.bamboogdb.org","HTTPConnectionPool(host='www.bamboogdb.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.bamboogdb.org timed out. (connect timeout=5)')), HTTPConnectionPool(host='www.bamboogdb.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.bamboogdb.org timed out. (connect timeout=5)'))",,", ","no_wayback, http://web.archive.org/web/20211014111508/http://www.bamboogdb.org/",2014-03-05,"State Forestry Administration Key Open Laboratory on the Science and Technology of Bamboo and Rattan, International Center for Bamboo and Rattan, Beijing 100102, China, State key laboratory of tree genetics and breeding, Research Institute of Forestry, Chinese Academy of Forestry, Beijing 100091, China and Key Laboratory of Tree Breeding and Cultivation, State Forestry Administration, Research Institute of Forestry, Chinese Academy of Forestry, Beijing 100091, China.","Zhao H, Peng Z, Fei B, Li L, Hu T, Gao Z, Jiang Z",,,29.0,"China, China, China" +25336620,BARCdb,0.997212112,BARCdb,0.997212112,The Biobanking Analysis Resource Catalogue,0.877752744,1,http://www.barcdb.org,"HTTPConnectionPool(host='www.barcdb.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.barcdb.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221017080924/http://www.barcdb.org/,,"Department of Immunology, Genetics and Pathology, Science for Life Laboratory, Uppsala University, SE-751 08 Uppsala, Sweden joakim.galli@igp.uu.se.","Galli J, Oelrich J, Taussig MJ, Andreasson U, Ortega-Paino E, Landegren U",,,1.0,Sweden +25377257,BactPepDB,0.998118401,BactPepDB,0.998118401,,0,1,"http://bactpepdb.rpbs.univ-paris-diderot.fr, http://www.yeastgenome.org","302, 301",,"(48.8534,2.3488), (45.8399,-119.7006)","no_wayback, no_wayback",2014-11-06,"INSERM, U973, MTi, F-75205 Paris, France, Université Paris Diderot, Sorbonne Paris Cité, F-75205 Paris, France and RPBS, F-75205 Paris, France INSERM, U973, MTi, F-75205 Paris, France, Université Paris Diderot, Sorbonne Paris Cité, F-75205 Paris, France and RPBS, F-75205 Paris, France INSERM, U973, MTi, F-75205 Paris, France, Université Paris Diderot, Sorbonne Paris Cité, F-75205 Paris, France and RPBS, F-75205 Paris, France.","Rey J, Deschavanne P, Tuffery P",,,6.0,"France, France, France, France, France, France, France, France, France" +25477388,BARD,0.990859985,BARD,0.990859985,BioAssay Research Database,0.854191172,1,http://bard.nih.gov,"HTTPConnectionPool(host='bard.nih.gov', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180726095431/https://bard.nih.gov/,2014-12-04,"Center for the Science of Therapeutics, Broad Institute, 415 Main Street, Cambridge, MA 02142, USA.","Howe EA, de Souza A, Lahr DL, Chatwin S, Montgomery P, Alexander BR, Nguyen DT, Cruz Y, Stonich DA, Walzer G, Rose JT, Picard SC, Liu Z, Rose JN, Xiang X, Asiedu J, Durkin D, Levine J, Yang JJ, Schürer SC, Braisted JC, Southall N, Southern MR, Chung TD, Brudz S, Tanega C, Schreiber SL, Bittker JA, Guha R, Clemons PA",,NHGRI NIH HHS,13.0,United States +"26433226, 30715167, 33010178",BacWGSTdb,0.994153142,BacWGSTdb,0.994153142,,0,3,http://bacdb.org/BacWGSTdb,301,,"(34.0559,-118.2666)",no_wayback,2021-01-01,"Sir Run Run Shaw Hospital, School of Medicine, Zhejiang University, Hangzhou, 310016, China., Sir Run Run Shaw Hospital, Zhejiang University School of Medicine, Hangzhou, China., Sir Run Run Shaw Hospital, Zhejiang University School of Medicine, Hangzhou 310016, China.","Ruan Z, Feng Y, Ruan Z, Yu Y, Feng Y, Feng Y, Zou S, Chen H, Yu Y, Ruan Z",", , ",", Zhejiang Province Public Welfare Technology Application Research Project, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Zhejiang Province Public Welfare Technology Application Research Project, National Natural Science Foundation of China",140.0,"China, China, China" +30272193,BACTOME,0.997656286,BACTOME,0.997656286,,0,1,http://bactome.helmholtz-hzi.de,"HTTPConnectionPool(host='bactome.helmholtz-hzi.de', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to bactome.helmholtz-hzi.de timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20201125114422/https://bactome.helmholtz-hzi.de/,2019-01-01,"Institute of Molecular Bacteriology, Helmholtz Centre for Infection Research, D-38124 Braunschweig, Germany.","Hornischer K, Khaledi A, Pohl S, Schniederjans M, Pezoldt L, Casilag F, Muthukumarasamy U, Bruchmann S, Thöming J, Kordes A, Häussler S",,"Federal Ministry of Education and Research, European Research council, European Research Council, European Research council, European Research Council, German Research Foundation",21.0,Germany +34838806,bacteria.guru,0.995123416,bacteria.guru,0.995123416,,0,1,http://bacteria.guru,302,,"(39.0997,-94.5786)",no_wayback,2021-11-25,"School of Biological Sciences, Nanyang Technological University, 60 Nanyang Drive, Singapore 637551, Singapore.","Lim PK, Davey EE, Wee S, Seetoh WS, Goh JC, Zheng X, Phang SKA, Seah ESK, Ng JWZ, Wee XJH, Quek AJH, Lim JJ, Rodrigues EE, Lee H, Lim CY, Tan WZ, Dan YR, Lee B, Chee SEL, Lim ZZE, Guan JS, Tan IJL, Arong TJ, Mutwil M",,Nanyang Technological University - Jurong Campus,0.0,"Singapore, Singapore" +24077841,BBGRE,0.97111398,BBGRE,0.97111398,Brain and Body Genetic Resource Exchange,0.924002247,1,http://bbgre.org,"HTTPConnectionPool(host='bbgre.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-09-27,"Department of Cytogenetics, Guy's and St Thomas NHS Foundation Trust, London, SE1 9RT, UK, MRC Social, Genetic and Developmental Psychiatry Centre, Institute of Psychiatry, King's College London, De Crespigny Park, London, SE5 8AF and NIHR Biomedical Research Centre for Mental Health at South London and Maudsley NHS Foundation, London, SE5 8AF.","Ahn JW, Dixit A, Johnston C, Ogilvie CM, Collier DA, Curran S, Dobson RJ",,Alzheimer's Society,3.0, +24250117,BBGD454,0.995709896,BBGD454,0.995709896,blueberry genomic database,0.90938741,1,http://bioinformatics.towson.edu/BBGD454,301,,"(39.4015,-76.6019)",http://web.archive.org/web/20220527204245/http://bioinformatics.towson.edu/BBGD454/,2013-10-16,"Department of Computer and Information Sciences, Towson University, Towson, MD 21252, USA.","Darwish O, Rowland LJ, Alkharouf NW",,,1.0,United States +27161011,BC5CDR,0.981737942,BC5CDR,0.981737942,,0,1,http://www.biocreative.org/tasks/biocreative-v/track-3-cdr,301,,"(39.6837,-75.7497)",http://web.archive.org/web/20190116035644/http://www.biocreative.org:80/tasks/biocreative-v/track-3-cdr,2016-05-09,"1Institute of Medical Information, Chinese Academy of Medical Sciences, Beijing 100020, China.","Li J, Sun Y, Johnson RJ, Sciaky D, Wei CH, Leaman R, Davis AP, Mattingly CJ, Wiegers TC, Lu Z",,,45.0,China +27376128,BcCluster,0.995449245,BcCluster,0.995449245,,0,1,"http://www.bccluster.org/, http://rubyonrails.org","HTTPConnectionPool(host='www.bccluster.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 101] Network is unreachable')), 301",,", (37.7621,-122.3971)","http://web.archive.org/web/20220621051952/http://www.bccluster.org/, http://web.archive.org/web/20221105123520/https://rubyonrails.org/",2016-01-07,"Charité-Universitätsmedizin Berlin, Berlin, Germany; Mosaiques diagnostics GmbH, Hannover, Germany.","Bhat A, Mokou M, Zoidakis J, Jankowski V, Vlahou A, Mischak H",,,3.0,"Germany, Germany" +31665503,BBCancer,0.997065306,BBCancer,0.997065306,,0,1,http://bbcancer.renlab.org,200,,"(33.4223,-111.8226)",http://web.archive.org/web/20220808104944/https://bbcancer.renlab.org/,2020-01-01,"State Key Laboratory of Oncology in South China, Cancer Center, Collaborative Innovation Center for Cancer Medicine, School of Life Sciences, Sun Yat-sen University, Guangzhou 510060, China.","Zuo Z, Hu H, Xu Q, Luo X, Peng D, Zhu K, Zhao Q, Xie Y, Ren J",,"National Key R&D Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Science and Technology Program of Guangzhou, National Natural Science Foundation of China, National Natural Science Foundation of China, Guangdong Natural Science Foundation, National Natural Science Foundation of China, Guangdong Introducing Innovative and Entrepreneurial Teams, National Natural Science Foundation of China, National Natural Science Foundation of China, Science and Technology Program of Guangzhou",18.0,"China, China" +33247932,BarleyVarDB,0.997781932,BarleyVarDB,0.997781932,,0,1,http://146.118.64.11/BarleyVar,"HTTPConnectionPool(host='146.118.64.11', port=80): Pool is closed.",,,http://web.archive.org/web/20200730164730/http://146.118.64.11/BarleyVar/,2020-11-01,"Western Barley Genetics Alliance, Agricultural Sciences, College of Science, Health, Engineering and Education, Murdoch University, 90 South Street, Murdoch, WA 6150, Australia.","Tan C, Chapman B, Wang P, Zhang Q, Zhou G, Zhang XQ, Barrero RA, Bellgard MI, Li C",,Australian Grain Research and Development Corporation,0.0,Australia +33599248,bc-GenExMiner,0.983812017,bc-GenExMiner,0.983812017,Breast cancer gene-expression miner,0.969961941,1,http://bcgenex.ico.unicancer.fr,301,,"(47.2173,-1.5534)",no_wayback,2021-02-01,"Unité de Bioinfomique, Institut de Cancérologie de l'Ouest, Bd Jacques Monod, Saint Herblain Cedex 44805, France.","Jézéquel P, Gouraud W, Ben Azzouz F, Guérin-Charbonnel C, Juin PP, Lasla H, Campone M",,,12.0,France +33882119,BC-TFdb,0.990113586,BC-TFdb,0.990113586,Breast Cancer Transcription Factors database,0.960293174,1,http://www.dqweilab-sjtu.com/index.php,301,,"(60.3540,24.9794)",http://web.archive.org/web/20220109154427/http://dqweilab-sjtu.com/index.php,2021-04-01,"Department of Bioinformatics and Biological Statistics, School of Life Sciences and Biotechnology, Shanghai Jiao Tong University, Shanghai 200240, P.R. China.","Khan A, Khan T, Nasir SN, Ali SS, Suleman M, Rizwan M, Waseem M, Ali S, Zhao X, Wei DQ",,"National Natural Science Foundation of China, National Natural Science Foundation of China",1.0,China +34264745,BARRA:CuRDa,0.988691002,BARRA:CuRDa,0.988691002,Benchmarking of ARtificial intelligence Research,0.805160913,1,http://sbcb.inf.ufrgs.br/barracurda,301,,"(-30.0328,-51.2302)",http://web.archive.org/web/20210725193900/https://sbcb.inf.ufrgs.br/barracurda,2021-07-14,"Institute of Informatics, Department of Theoretical Computer Science, Federal University of Rio Grande do Sul, Porto Alegre, Brazil.","Feltes BC, Poloni JF, Dorn M",,,1.0,Brazil +34615485,Bayberry,0.680092216,Bayberry,0.680092216,,0,1,http://www.bayberrybase.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220804195634/http://bayberrybase.cn/,2021-10-06,"Institute of Horticulture, Zhejiang Academy of Agricultural Sciences, Hangzhou, 310021, China. renhy@zaas.ac.cn.","Ren H, He Y, Qi X, Zheng X, Zhang S, Yu Z, Hu F",,,1.0,China +28327601,BCIP,0.996045272,BCIP,0.996045272,Breast Cancer Integrative Platform,0.955718553,1,http://omics.bmi.ac.cn/bcancer,"HTTPConnectionPool(host='omics.bmi.ac.cn', port=80): Max retries exceeded with url: /bcancer (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))",,,no_wayback,2017-03-22,"Beijing Institute of Basic Medical Sciences, Beijing 100850, China.","Wu J, Hu S, Chen Y, Li Z, Zhang J, Yuan H, Shi Q, Shao N, Ying X",,,12.0,China +24608034,BCL2DB,0.997701004,BCL2DB,0.997701004,,0,1,http://bcl2db.ibcp.fr,301,,"(45.7469,4.8444)",http://web.archive.org/web/20190802011530/https://bcl2db.ibcp.fr/,2014-03-06,"Unité Bases Moléculaires et Structurales des Systèmes Infectieux, UMR 5086 CNRS - Université Claude Bernard Lyon 1, IBCP - 7, passage du Vercors, 69367 Lyon cedex 07, France and Molecular Biology of the Cell Laboratory, Ecole Normale Supérieure de Lyon, LBMC UMR 5239 CNRS - UCBL - HCL - ENS Lyon, 46 Allée d'Italie, 69364 Lyon Cedex 07, France.","Rech de Laval V, Deléage G, Aouacheria A, Combet C",,,7.0,"France, France" +34736471,BDdb,0.995132804,BDdb,0.995132804,birth defect multi-omics database,0.690108945,1,http://t21omics.cngb.org,200,,"(22.5455,114.0683)",no_wayback,2021-11-04,"College of Life Sciences, University of Chinese Academy of Sciences, Beijing, 100049, People's Republic of China.","Zhang D, Zhou S, Zhou Z, Jiang X, Chen D, Sun HX, Huang J, Qu S, Yang S, Gu Y, Zhang X, Jin X, Gao Y, Gao Y, Shen Y, Chen F",,"Key-Area Research and Development Program of Guangdong Province, Science, Technology and Innovation Commission of Shenzhen Municipality, Guangdong Provincial Key Laboratory of Genome Read and Write, Stiftung für Pathobiochemie und Molekulare Diagnostik, Shenzhen Municipal Government of China",0.0,China +23764453,BDgene,0.972416461,BDgene,0.972416461,,0,1,http://bdgene.psych.ac.cn,"HTTPConnectionPool(host='bdgene.psych.ac.cn', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='bdgene.psych.ac.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20200520215446/http://bdgene.psych.ac.cn/,2013-06-10,"Key Laboratory of Mental Health, Institute of Psychology, Chinese Academy of Sciences, Beijing, China.","Chang SH, Gao L, Li Z, Zhang WN, Du Y, Wang J",,,25.0,China +34081565,BEE,0.989852111,BEE,0.989852111,Biomedical Entity Explorer,0.810965747,1,http://bike-bee.snu.ac.kr,"HTTPConnectionPool(host='bike-bee.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='bike-bee.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20210920030157/http://bike-bee.snu.ac.kr/,2021-06-02,"Biomedical Knowledge Engineering Laboratory, Seoul National University School of Dentistry, Seoul, Korea.","Jung J, Joe H, Ha K, Lim JM, Kim HG",,,0.0, +32655358,BETA,0.910390854,BETA,0.910390854,BCI,0.519562721,1,http://bci.med.tsinghua.edu.cn/download.html,200,,"(39.9906,116.2887)",http://web.archive.org/web/20220422051523/http://bci.med.tsinghua.edu.cn/download.html,2020-06-23,"Department of Biomedical Engineering, Tsinghua University, Beijing, China.","Liu B, Huang X, Wang Y, Chen X, Gao X",,,5.0,China +24399916,bex-db,0.995140064,bex-db,0.995140064,Barley Gene Expression Database,0.958310401,1,http://barleyflc.dna.affrc.go.jp/bexdb/index.html,"HTTPConnectionPool(host='barleyflc.dna.affrc.go.jp', port=80): Max retries exceeded with url: /bexdb/index.html (Caused by ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')))",,,http://web.archive.org/web/20201024123852/https://barleyflc.dna.affrc.go.jp/bexdb/index.html,2013-12-01,"Agrogenomics Research Center, National Institute of Agrobiological Sciences , 2-1-2 Kannondai, Tsukuba, Ibaraki 305-8602 , Japan.","Tanaka T, Sakai H, Fujii N, Kobayashi F, Nakamura S, Itoh T, Matsumoto T, Wu J",,,0.0,Japan +22250003,BFGR,0.983292729,BFGR,0.983292729,Biofuel Feedstock Genomics Resource,0.972530476,1,http://bfgr.plantbiology.msu.edu,"HTTPConnectionPool(host='bfgr.plantbiology.msu.edu', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to bfgr.plantbiology.msu.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210512030715/http://bfgr.plantbiology.msu.edu/,2012-01-15,,,,,0.0, +22125386,BFluenza,0.856544018,BFluenza,0.856544018,,0,1,http://www.bfluenza.info,"HTTPConnectionPool(host='www.bfluenza.info', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20120401220658/http://bfluenza.info:80/?,2011-09-28,None,"Salahuddin P, Khan AU",,,0.0, +24570022,BGBX,0.957761586,BGBX,0.957761586,Brazilian Genetic Database of Chromosome X,0.940487146,1,http://www.bgbx.com.br,"HTTPConnectionPool(host='www.bgbx.com.br', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20211128004853/http://bgbx.com.br/,2014-02-26,"Laboratório de Investigação de Paternidade , Faculdade de Ciências Farmacêuticas, UNESP - Univ Estadual Paulista, Rodovia Araraquara-Jaú, Araraquara, São Paulo, 14801-902, Brazil, joyce_apa@hotmail.com.","Martins JA, Kawamura B, Cardoso AE, Cicarelli RM",,,2.0,Brazil +23894186,BGDB,0.981280982,BGDB,0.981280982,,0,1,http://dailab.sysu.edu.cn/bgdb,"HTTPConnectionPool(host='dailab.sysu.edu.cn', port=80): Max retries exceeded with url: /bgdb (Caused by ConnectTimeoutError(, 'Connection to dailab.sysu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20130702205755/http://dailab.sysu.edu.cn/bgdb/,2013-07-26,"Department of Electronics, School of Information Science and Technology, Sun Yat-Sen University, Guangzhou 510006, China.","Li Q, Lian S, Dai Z, Xiang Q, Dai X",,,11.0,China +33037820,Bgee,0.995789826,Bgee,0.995789826,,0,1,http://bgee.org,301,,"(46.5160,6.6328)",http://web.archive.org/web/20221103233024/https://bgee.org/,2021-01-01,"Department of Ecology and Evolution, University of Lausanne, 1015 Lausanne, Switzerland.","Bastian FB, Roux J, Niknejad A, Comte A, Fonseca Costa SS, de Farias TM, Moretti S, Parmentier G, de Laval VR, Rosikiewicz M, Wollbrett J, Echchiki A, Escoriza A, Gharib WH, Gonzales-Porta M, Jarosz Y, Laurenczy B, Moret P, Person E, Roelli P, Sanjeev K, Seppey M, Robinson-Rechavi M",,"NCI NIH HHS, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss Institute of Bioinformatics, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Horizon 2020, NIH, Canton de Vaud",23.0,Switzerland +31807141,BGISEQ-500,0.996839881,BGISEQ-500,0.996839881,,0,1,http://seqBEACON.genomics.cn:443/home.html,"HTTPConnectionPool(host='seqbeacon.genomics.cn', port=443): Max retries exceeded with url: /home.html (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,no_wayback,2019-11-15,"BGI-Wuhan Clinical Laboratories, Building B2, No.666 Gaoxin Road, Wuhan East lake Hi-tech Development zone, Wuhan, 430074 China.","Zhou Y, Liu C, Zhou R, Lu A, Huang B, Liu L, Chen L, Luo B, Huang J, Tian Z",,,3.0,China +22084196,BGMUT,0.994283319,BGMUT,0.994283319,Blood Group Antigen Gene Mutation Database,0.958958909,1,http://www.ncbi.nlm.nih.gov/projects/gv/rbc/xslcgi.fcgi?cmd=bgmut,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20140722111632/http://www.ncbi.nlm.nih.gov/projects/gv/rbc/xslcgi.fcgi?cmd=bgmut,2011-11-13,,,,,0.0, +34897852,BGvar,0.97764498,BGvar,0.97764498,Blood Group Associated Genomic Variant Resource,0.957016902,1,http://clingen.igib.res.in/bgvar,301,,"(26.7907,75.2061)",http://web.archive.org/web/20220211105137/https://clingen.igib.res.in/bgvar/,2021-12-13,"Genome Informatics and Big Data, CSIR Institute of Genomics and Integrative Biology, Delhi, India.","Rophina M, Pandhare K, Jadhao S, Nagaraj SH, Scaria V",,"The Council of Scientific and Industrial Research, India",0.0,India +32540200,BGVD,0.991665125,BGVD,0.991665125,Bovine Genome Variation Database,0.976398796,1,http://animal.nwsuaf.edu.cn/BosVar,301,,"(39.9906,116.2887)",no_wayback,2020-04-01,"Key Laboratory of Animal Genetics, Breeding and Reproduction Shaanxi Province, College of Animal Science and Technology, Northwest A&F University, Yangling 712100, China.","Chen N, Fu W, Zhao J, Shen J, Chen Q, Zheng Z, Chen H, Sonstegard TS, Lei C, Jiang Y",,"National Natural Science Foundation of China, National Beef Cattle and Yak Industrial Technology System, China, National Natural Science Foundation of China, National Thousand Youth Talents Plan, China",5.0,China +33010170,BiG-FAM,0.988548267,BiG-FAM,0.988548267,biosynthetic gene cluster families,0.828322877,1,http://bigfam.bioinformatics.nl,301,,"(51.9700,5.6667)",no_wayback,2021-01-01,"Bioinformatics Group, Wageningen University, 6708PB Wageningen, The Netherlands.","Kautsar SA, Blin K, Shaw S, Weber T, Medema MH",,"Danish National Research Foundation, Novo Nordisk Fonden, NNF Center for Biosustainability, Novo Nordisk Foundation, Graduate School for Experimental Plant Sciences, Novo Nordisk Fonden, NNF Center for Biosustainability, Novo Nordisk Foundation",25.0,Netherlands +21233089,BIND,0.947274288,BIND,0.947274288,Biomolecular Interaction Network Database,0.91527611,1,http://download.baderlab.org/BINDTranslation,301,,"(43.7001,-79.4163)",http://web.archive.org/web/20221016224039/https://download.baderlab.org/BINDTranslation/,2011-01-12,,,,,0.0, +25378330,Binding MOAD,0.90315028,Binding MOAD,0.90315028,,0,1,http://www.BindingMOAD.org,200,,"(42.2776,-83.7409)",http://web.archive.org/web/20220526115835/http://www.BindingMOAD.org,2014-11-06,"Department of Medicinal Chemistry, University of Michigan, 428 Church St, Ann Arbor, MI 48109-1065, USA.","Ahmed A, Smith RD, Clark JJ, Dunbar JB Jr, Carlson HA",,"NCATS NIH HHS, NIGMS NIH HHS, NCATS NIH HHS, NIGMS NIH HHS, NCATS NIH HHS",29.0,United States +31405382,bio.tools,0.987343351,bio.tools,0.987343351,,0,1,http://bio.tools,302,,"(55.7704,12.5038)",http://web.archive.org/web/20221101084937/https://bio.tools/,2019-08-12,"National Life Science Supercomputing Center, Technical University of Denmark, Building 208, DK-2800, Kongens Lyngby, Denmark. jison@bioinformatics.dtu.dk.","Ison J, Ienasescu H, Chmura P, Rydza E, Ménager H, KalaÅ¡ M, Schwämmle V, Grüning B, Beard N, Lopez R, Duvaud S, Stockinger H, Persson B, Vařeková RS, Raček T, Vondrášek J, Peterson H, Salumets A, Jonassen I, Hooft R, Nyrönen T, Valencia A, Capella S, Gelpí J, Zambelli F, Savakis B, LeskoÅ¡ek B, Rapacki K, Blanchet C, Jimenez R, Oliveira A, Vriend G, Collin O, van Helden J, Løngreen P, Brunak S",,"The Danish Ministry of Higher Education and Science, Villum Fonden, ELIXIR-EXCELERATE under the European Union's Horizon 2020 research and innovation programme, Novo Nordisk Foundation Center for Protein Research",8.0,"Denmark, Denmark" +28708831,biochem4j,0.994090736,biochem4j,0.994090736,,0,1,http://biochem4j.org,"HTTPConnectionPool(host='biochem4j.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220121032805/http://biochem4j.org/,2017-07-14,,,,,0.0, +25516260,Bioclock,0.913504779,Bioclock,0.913504779,Aedes aegypti Circadian Database,0.621617784,1,http://www.nd.edu,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20221110032231/https://www.nd.edu/,2014-12-17,None,"Leming MT, Rund SS, Behura SK, Duffield GE, O'Tousa JE",,"NIGMS NIH HHS, NCATS NIH HHS, NCATS NIH HHS, NIGMS NIH HHS",26.0, +22359433,BiodEnz,0.967340887,BiodEnz,0.967340887,,0,1,http://www.biodenzdatabase.in,"HTTPConnectionPool(host='www.biodenzdatabase.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140722114959/http://www.biodenzdatabase.in,2012-01-06,None,"Sugumar S, Thangam B",,,0.0, +28875065,BioFuelDB,0.865778863,BioFuelDB,0.865778863,,0,1,"http://metabiosys.iiserb.ac.in/biofueldb, http://metagenomics.iiserb.ac.in/biofueldb","404, 301",,", (23.2547,77.4029)","http://web.archive.org/web/20170828171516/http://metabiosys.iiserb.ac.in/biofueldb/, no_wayback",2017-08-28,"Department of Biological Sciences, Indian Institute of Science Education and Research, Bhopal, Madhya Pradesh, India.","Chaudhary N, Gupta A, Gupta S, Sharma VK",,"Department of Biotechnology, DST-INSPIRE Fellowship",2.0,India +21904428,Biogen base,0.733742376,Biogen base,0.733742376,IOGEN BASE,0.716412246,1,http://www.tnaugenomics.com/biogenbase/casava.php,"HTTPConnectionPool(host='www.tnaugenomics.com', port=80): Max retries exceeded with url: /biogenbase/casava.php (Caused by ReadTimeoutError(""HTTPConnectionPool(host='www.tnaugenomics.com', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220617044326/http://www.tnaugenomics.com/biogenbase/casava.php,2011-08-02,None,"Jayakodi M, Selvan SG, Natesan S, Muthurajan R, Duraisamy R, Ramineni JJ, Rathinasamy SA, Karuppusamy N, Lakshmanan P, Chokkappan M",,,2.0, +27246819,BioHub,0.989485323,BioHub,0.989485323,,0,1,http://biohub.cs.manchester.ac.uk/ontology/biohub-kb.owl,"HTTPConnectionPool(host='biohub.cs.manchester.ac.uk', port=80): Max retries exceeded with url: /ontology/biohub-kb.owl (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,no_wayback,2016-06-01,,,,,0.0, +26401099,BioImg,0.641896486,BioImg,0.641896486,,0,1,http://bioimg.org,"HTTPConnectionPool(host='bioimg.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170612174918/https://bioimg.org/,2015-09-10,"SNIC-UPPMAX, Department of Information Technology, Uppsala University, Uppsala, Sweden. ; Science for Life Laboratory, Uppsala University, Uppsala, Sweden. ; Department of Pharmaceutical Biosciences, Uppsala University, Uppsala, Sweden.","Dahlö M, Haziza F, Kallio A, Korpelainen E, Bongcam-Rudloff E, Spjuth O",,,2.0,"Sweden, Sweden, Sweden" +23087378,BioLiP,0.991326213,BioLiP,0.991326213,,0,1,http://zhanglab.ccmb.med.umich.edu/BioLiP,301,,"(42.2776,-83.7409)",http://web.archive.org/web/20210726124551/https://zhanglab.ccmb.med.umich.edu/BioLiP/,2012-10-18,,,,,0.0, +28605773,BioM2MetDisease,0.99300829,BioM2MetDisease,0.99300829,,0,1,http://www.bio-bigdata.com/BioM2MetDisease,502,,,http://web.archive.org/web/20190803231850/http://www.bio-bigdata.com:80/BioM2MetDisease/,2017-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150081, China.","Xu Y, Yang H, Wu T, Dong Q, Sun Z, Shang D, Li F, Xu Y, Su F, Liu S, Zhang Y, Li X",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",1.0,China +33552037,BioMaster,0.990046203,BioMaster,0.990046203,,0,1,http://www.biomaster-uestc.cn,"HTTPConnectionPool(host='www.biomaster-uestc.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20210225025020/http://www.biomaster-uestc.cn/,2021-01-21,"School of Life Science and Technology, University of Electronic Science and Technology of China, Chengdu, China.","Wang B, Yang H, Sun J, Dou C, Huang J, Guo FB",,,0.0,"China, China" +31599923,BiomeNet,0.979145229,BiomeNet,0.979145229,,0,1,http://kobic.re.kr/biomenet,301,,"(37.5660,126.9784)",http://web.archive.org/web/20220518151605/https://www.kobic.re.kr/biomenet/,2020-03-01,"Department of Biotechnology, Yonsei University, Seodaemun-gu, Seoul 03722, Korea.","Kim E, Bae D, Yang S, Ko G, Lee S, Lee B, Lee I",,"Korean Government, Korean Government, Korean Government, Korean Government, National Research Foundation of Korea",3.0, +"25414348, 31701150",BioModels,0.990961909,BioModels,0.990961909,,0,2,http://www.ebi.ac.uk/biomodels,301,,"(51.5085,-0.1257)",no_wayback,2020-01-01,"nan, European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","nan, Malik-Sheriff RS, Glont M, Nguyen TVN, Tiwari K, Roberts MG, Xavier A, Vu MT, Men J, Maire M, Kananathan S, Fairbanks EL, Meyer JP, Arankalle C, Varusai TM, Knight-Schrijver V, Li L, Dueñas-Roca C, Dass G, Keating SM, Park YM, Buso N, Rodriguez N, Hucka M, Hermjakob H","nan, ","nan, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, European Molecular Biology Laboratory, Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Innovative Medicines Initiative, Innovative Medicines Initiative, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",70.0, +30053270,BioMuta,0.790885687,BioMuta,0.790885687,,0,1,"http://hive.biochemistry.gwu.edu/biomuta, http://hive.biochemistry.gwu.edu/bioxpress","301, 301",,"(38.8951,-77.0364), (38.8951,-77.0364)","http://web.archive.org/web/20220617132955/https://hive.biochemistry.gwu.edu/biomuta/, http://web.archive.org/web/20220723022143/https://hive.biochemistry.gwu.edu/bioxpress/",2018-01-01,,,,,0.0, +24244913,BioNames,0.997415423,BioNames,0.997415423,,0,1,http://bionames.org,200,,"(37.7621,-122.3971)",http://web.archive.org/web/20221102122048/http://bionames.org/,2013-10-29,,,,,0.0, +29637199,BiOnIC,0.997383654,BiOnIC,0.997383654,,0,1,http://onto-apps.stanford.edu/bionic,"HTTPConnectionPool(host='onto-apps.stanford.edu', port=80): Max retries exceeded with url: /bionic (Caused by ConnectTimeoutError(, 'Connection to onto-apps.stanford.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210506033209/http://onto-apps.stanford.edu/bionic,2017-10-04,,,,,0.0, +29529902,BioPepDB,0.996458948,BioPepDB,0.996458948,,0,1,http://bis.zju.edu.cn/biopepdbr,301,,"(30.2936,120.1614)",http://web.archive.org/web/20220518225953/http://bis.zju.edu.cn/biopepdbr/,2018-03-12,"a Department of Bioinformatics, College of Life Sciences , Zhejiang University , Hangzhou , China.","Li Q, Zhang C, Chen H, Xue J, Guo X, Liang M, Chen M",,Infinitus Co. Ltd,8.0,China +25360160,BioPhytMol,0.970525742,BioPhytMol,0.970525742,,0,1,http://ab-openlab.csir.res.in/biophytmol,302,,"(28.6358,77.2245)",http://web.archive.org/web/20220527084827/https://ab-openlab.csir.res.in/biophytmol/,2014-10-11,,,,,0.0, +31133849,BioPlanet,0.989541769,BioPlanet,0.989541769,,0,1,http://tripod.nih.gov/bioplanet,301,,"(38.9807,-77.1003)",http://web.archive.org/web/20220513214818/https://tripod.nih.gov/bioplanet/,2019-04-26,,,,,0.0, +22139929,BioProject,0.682163715,BioProject,0.682163715,,0,1,"http://www.ncbi.nlm.nih.gov/bioproject, http://www.ncbi.nlm.nih.gov/biosample","301, 301",,"(38.9896,-77.1538), (38.9896,-77.1538)","http://web.archive.org/web/20221110023042/https://www.ncbi.nlm.nih.gov/bioproject/, http://web.archive.org/web/20221110023001/https://www.ncbi.nlm.nih.gov/biosample/",2011-12-01,,,,,0.0, +27189610,BioSharing,0.98941499,BioSharing,0.98941499,,0,1,"http://www.biosharing.org, http://www.biosharing.org","301, 301",,"(51.7522,-1.2560), (51.7522,-1.2560)","http://web.archive.org/web/20190329072135/http://www.biosharing.org:80/, http://web.archive.org/web/20190329072135/http://www.biosharing.org:80/",2016-05-17,,,,,0.0, +26820405,Biosurveillance Analytics Resource Directory,0.863397956,ARD,0.646821141,Biosurveillance Analytics Resource Directory,0.863397956,1,http://brd.bsvgateway.org/brd,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20160311105353/http://brd.bsvgateway.org:80/brd/,2016-01-28,,,,,0.0, +23550138,Biotinidase Gene Variants Registry,0.961240504,,0,Biotinidase Gene Variants Registry,0.961240504,1,http://arup.utah.edu/database/BTD/BTD_welcome.php,302,,"(40.7371,-111.8258)",http://web.archive.org/web/20221104202600/https://arup.utah.edu/database/BTD/BTD_welcome.php,2013-04-09,,,,,0.0, +28423831,BioTop,0.992148519,BioTop,0.992148519,,0,1,http://purl.org/biotop,301,,"(37.5483,-121.9886)",no_wayback,2017-01-01,"IMI - Medical University of Graz, Austria.","Schulz S, Boeker M, Martinez-Costa C",,,1.0,Austria +32369809,Bird Chromosome Database,0.89551114,BCD,0.883974791,Bird Chromosome Database,0.89551114,1,http://sites.unipampa.edu.br/birdchromosomedatabase,301,,"(-29.7831,-55.7919)",http://web.archive.org/web/20210426134138/https://sites.unipampa.edu.br/birdchromosomedatabase/,2020-05-06,None,"Degrandi TM, Barcellos SA, Costa AL, Garnero ADV, Hass I, Gunski RJ",,,13.0, +23390356,BIRS,0.993844032,BIRS,0.993844032,BIRS - Bioterrorism Information Retrieval System,0.7868629,1,http://www.bioterrorism.biowaves.org,200,,"(40.0334,-83.1582)",http://web.archive.org/web/20220617044504/http://www.bioterrorism.biowaves.org/,2013-01-18,"Department of Biotechnology, Jaypee Institute of Information Technology, A-10, Sector-62, NOIDA, U.P., 201301, India.","Tewari AK, Rashi, Wadhwa G, Sharma SK, Jain CK",,,1.0,India +24264865,Bival-Bind,0.987922519,Bival-Bind,0.987922519,,0,1,http://agknapp.chemie.fu-berlin.de/bivalbind,301,,"(52.5244,13.4105)",http://web.archive.org/web/20140726125627/http://agknapp.chemie.fu-berlin.de/bivalbind,2013-11-22,"Fachbereich Biologie Chemie, Pharmazie/Institute of Chemistry and Biochemistry, Freie Universität Berlin, 14195, Berlin, Germany.","Meyer T, Knapp EW",,,0.0,Germany +24185696,BloodChIP,0.998485148,BloodChIP,0.998485148,,0,1,http://www.med.unsw.edu.au/CRCWeb.nsf/page/BloodChIP,302,,"(-33.8678,151.2073)",no_wayback,2013-10-31,"Lowy Cancer Research Centre and the Prince of Wales Clinical School, University of New South Wales, Sydney, NSW 2052, Australia.","Chacon D, Beck D, Perera D, Wong JW, Pimanda JE",,,27.0,Australia +27623959,BmncRNAdb,0.86746788,BmncRNAdb,0.86746788,,0,1,http://gene.cqu.edu.cn/BmncRNAdb/index.php,"HTTPConnectionPool(host='gene.cqu.edu.cn', port=80): Max retries exceeded with url: /BmncRNAdb/index.php (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 101] Network is unreachable'))",,,http://web.archive.org/web/20160925010434/http://gene.cqu.edu.cn:80/BmncRNAdb/index.php,2016-09-13,"Laboratory of Evolutionary and Functional Genomics, School of Life Sciences, Chongqing University, Huxi Campus, No. 55 Daxuecheng South Rd., Shapingba, Chongqing, 401331, China.","Zhou QZ, Zhang B, Yu QY, Zhang Z",,"the National High Technology Research and Development Program of China, Chongqing Graduate Student Research Innovation Project, the National Natural Science Foundation of China",13.0,China +23886610,BmTEdb,0.981323123,BmTEdb,0.981323123,,0,1,http://gene.cqu.edu.cn/BmTEdb,"HTTPConnectionPool(host='gene.cqu.edu.cn', port=80): Max retries exceeded with url: /BmTEdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 101] Network is unreachable'))",,,http://web.archive.org/web/20161024013400/http://gene.cqu.edu.cn:80/BmTEdb/,2013-07-25,"State Key Laboratory of Silkworm Genome Biology, Southwest University, Chongqing 400716, China.","Xu HE, Zhang HH, Xia T, Han MJ, Shen YH, Zhang Z",,,20.0,China +28365726,BMW,0.984817743,BMW,0.984817743,Boechera Microsatellite Website,0.962700583,1,http://sites.biology.duke.edu/windhamlab,302,,"(35.9940,-78.8986)",http://web.archive.org/web/20221016215901/https://sites.biology.duke.edu/windhamlab/,2017-01-01,"Department of Biology, Duke University, Durham, NC 27708, USA.","Li FW, Rushworth CA, Beck JB, Windham MD",,"Division of Environmental Biology, Division of Environmental Biology, Division of Environmental Biology",5.0,United States +24214957,bNAber,0.994556367,bNAber,0.994556367,,0,1,http://bNAber.org,404,,,http://web.archive.org/web/20191216055842/http://bnaber.org:80/,2013-11-07,"Center for HIV/AIDS Vaccine Immunology and Immunogen Discovery, The Scripps Research Institute, 10550 North Torrey Pines Road La Jolla, CA 92037, USA, Bioinformatics and Systems Biology Program, Sanford-Burnham Medical Research Institute, 10901 North Torrey Pines Road, La Jolla, CA 92037, USA, Department of Immunology & Microbiology, The Scripps Research Institute, 10550 North Torrey Pines Road, La Jolla, CA 92037, USA, Ragon Institute of MGH, MIT and Harvard, 400 Technology Square, Cambridge, MA 02139, USA and Center for Research in Biological Systems, UC San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA.","Eroshkin AM, LeBlanc A, Weekes D, Post K, Li Z, Rajput A, Butera ST, Burton DR, Godzik A",,"NIAID NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS",48.0,"United States, United States, United States, United States, United States" +33399824,BnaGVD,0.99503864,BnaGVD,0.99503864,rapeseed genomic variation database,0.979584813,1,http://rapeseed.biocloud.net/home,200,,"(36.0649,120.3804)",no_wayback,2021-01-05,"Provincial Key Laboratory of Crop Gene Resource, Zhejiang University, 866 Yu-Hang-Tang Road, Hangzhou, 310058, PR China.","Yan T, Yao Y, Wu D, Jiang L",,"Zhejiang Provincial Key Research Project, Jiangsu Collaborative Innovation Centre for Modern Crop Production",2.0,China +24079801,Bolbase,0.990760982,Bolbase,0.990760982,,0,1,http://ocri-genomics.org/bolbase,"HTTPConnectionPool(host='ocri-genomics.org', port=80): Max retries exceeded with url: /bolbase (Caused by ConnectTimeoutError(, 'Connection to ocri-genomics.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190619044436/http://ocri-genomics.org:80/bolbase/,2013-09-30,"The Key Laboratory of Oil Crops Biology and Genetic Breeding, the Ministry of Agriculture, Oil Crops Research Institute, the Chinese Academy of Agricultural Sciences, Wuhan 430062, China. huawei@oilcrops.cn.","Yu J, Zhao M, Wang X, Tong C, Huang S, Tehrim S, Liu Y, Hua W, Liu S",,,48.0,China +28453653,Bologna Annotation Resource,0.698403805,,0,Bologna Annotation Resource,0.698403805,1,http://bar.biocomp.unibo.it/bar3,302,,"(44.4938,11.3387)",http://web.archive.org/web/20221017025500/https://bar.biocomp.unibo.it/bar3/,2017-07-01,"Biocomputing Group, BiGeA/CIG, 'Luigi Galvani' Interdepartmental Center for Integrated Studies of Bioinformatics, Biophysics and Biocomplexity, University of Bologna, Bologna 40126, Italy.","Profiti G, Martelli PL, Casadio R",,,8.0,Italy +31958638,BoMiProt,0.996749759,BoMiProt,0.996749759,,0,1,http://bomiprot.org,301,,"(52.6958,6.1944)",http://web.archive.org/web/20211203093646/http://bomiprot.org/,2020-01-17,"Department of Biotechnology, Indian Institute of Technology Roorkee, Roorkee 247667, India.","Maity S, Bhat AH, Giri K, Ambatipudi K",,"Department of Science and Technology, Department of Science and Technology, National Postdoctoral Fellowship, Council of Scientific and Industrial Research, Science and Engineering Research Board, Ministry of Human Resource Development",5.0,India +23203889,Bookshelf,0.957379818,Bookshelf,0.957379818,,0,1,http://www.ncbi.nlm.nih.gov/books,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221110123414/https://www.ncbi.nlm.nih.gov/books,2012-11-29,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 45 Center Drive, Bethesda, MD 20892, USA. hoeppner@ncbi.nlm.nih.gov",Hoeppner MA,,Intramural NIH HHS,2.0,United States +24994456,BorreliaBase,0.995369911,BorreliaBase,0.995369911,,0,1,http://borreliabase.org,200,,"(40.8223,-74.4569)",http://web.archive.org/web/20220313024212/http://borreliabase.org/,2014-07-03,None,"Di L, Pagan PE, Packer D, Martin CL, Akther S, Ramrattan G, Mongodin EF, Fraser CM, Schutzer SE, Luft BJ, Casjens SR, Qiu WG",,"NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIMHD NIH HHS, NIMHD NIH HHS, NIGMS NIH HHS, NIAID NIH HHS, NIAID NIH HHS",22.0, +25784642,Brain Tumor Database,0.788560754,Brain Tumor,0.489937514,Brain Tumor Database,0.788560754,1,http://tumorsdatabase.altervista.org,200,,"(51.1065,13.6605)",http://web.archive.org/web/20220617081147/http://tumorsdatabase.altervista.org/,2015-03-01,"University of Genoa, Italy maurizio.bergamino@gmail.com.","Bergamino M, Hamilton DJ, Castelletti L, Barletta L, Castellan L",,,0.0,Italy +34528715,BRAIN UK,0.898930291,BRAIN UK,0.898930291,,0,1,http://www.brain-uk.org,301,,"(50.9040,-1.4043)",no_wayback,2021-09-28,"Clinical Neurosciences, Clinical and Experimental Sciences, Faculty of Medicine, University of Southampton, Southampton, UK.","Nicoll JAR, Bloom T, Clarke A, Boche D, Hilton D",,"British Neuropathological Society, Medical Research Council, Medical Research Council Canada, Medical Research Council, Brain Tumour Research, Medical Research Council",0.0, +29985970,BrainEXP,0.997224808,BrainEXP,0.997224808,Brain EXPression Database,0.938273266,1,http://www.brainexp.org,200,,"(1.2897,103.8501)",http://web.archive.org/web/20211126180622/http://brainexp.org/,2019-01-01,"Center for Medical Genetics, School of Life Science, Central South University, Changsha, China.","Jiao C, Yan P, Xia C, Shen Z, Tan Z, Tan Y, Wang K, Jiang Y, Huang L, Dai R, Wei Y, Xia Y, Meng Q, Ouyang Y, Yi L, Duan F, Dai J, Zhao S, Liu C, Chen C",,"NIEHS NIH HHS, National Key Plan for Scientific Research and Development of China, NIMH NIH HHS, NIMH NIH HHS, NIH, NIH, Innovation-Driven Project of Central South University, Innovation-Driven Project of Central South University, National Natural Science Foundation of China, National Natural Science Foundation of China",5.0,China +26794641,BRAINS,0.985277057,BRAINS,0.985277057,Brain Images of Normal Subjects,0.966778862,1,"http://www.brainsimagebank.ac.uk, http://dicom.nema.org","301, 301",,"(55.9521,-3.1965), (38.8964,-77.0447)","http://web.archive.org/web/20221008160910/https://www.brainsimagebank.ac.uk/, http://web.archive.org/web/20211114213324/https://dicom.nema.org/",2016-01-18,"Brain Research Imaging Centre (BRIC), & Centre for Clinical Brain Sciences (CCBS), The University of Edinburgh, Division of Clinical Neurosciences, Western General Hospital, Crewe Road, Edinburgh EH4 2XU, United Kingdom; Scottish Imaging Network, 15 Redburn Avenue, Giffnock, Glasgow G46 6RH, United Kingdom. Electronic address: dominic.job@ed.ac.uk.","Job DE, Dickie DA, Rodriguez D, Robson A, Danso S, Pernet C, Bastin ME, Boardman JP, Murray AD, Ahearn T, Waiter GD, Staff RT, Deary IJ, Shenkin SD, Wardlaw JM",,"Medical Research Council, Medical Research Council, Medical Research Council, Medical Research Council, Medical Research Council, Scottish Funding Council, Biotechnology and Biological Sciences Research Council, Scottish Funding Council",22.0,"United Kingdom, United Kingdom" +24259684,BrassiBase,0.991759181,BrassiBase,0.991759181,,0,1,http://brassibase.cos.uni-heidelberg.de,301,,"(49.4077,8.6908)",http://web.archive.org/web/20221016201905/https://brassibase.cos.uni-heidelberg.de/,2013-11-19,"Department of Biodiversity and Plant Systematics, Centre for Organismal Studies (COS) Heidelberg, Heidelberg University, D-69120 Heidelberg, Germany.","Kiefer M, Schmickl R, German DA, Mandáková T, Lysak MA, Al-Shehbaz IA, Franzke A, Mummenhoff K, Stamatakis A, Koch MA",,,45.0,Germany +24948109,BrassicaTED,0.992530644,BrassicaTED,0.992530644,,0,1,http://im-crop.snu.ac.kr/BrassicaTED/index.php,"HTTPConnectionPool(host='im-crop.snu.ac.kr', port=80): Max retries exceeded with url: /BrassicaTED/index.php (Caused by ReadTimeoutError(""HTTPConnectionPool(host='im-crop.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2014-06-20,None,"Murukarthick J, Sampath P, Lee SC, Choi BS, Senthil N, Liu S, Yang TJ",,,8.0, +29136180,Breast Cancer Now Tissue Bank bioinformatics,0.752365947,BCNTB,0.615942299,Breast Cancer Now Tissue Bank bioinformatics,0.752365947,1,http://bioinformatics.breastcancertissuebank.org,"HTTPConnectionPool(host='bioinformatics.breastcancertissuebank.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to bioinformatics.breastcancertissuebank.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210614143011/http://bioinformatics.breastcancertissuebank.org/,2018-01-01,"Bioinformatics Unit, Centre for Molecular Oncology, Barts Cancer Institute, Queen Mary University London, London EC1M 6BQ, UK.","Gadaleta E, Pirrò S, Dayem Ullah AZ, Marzec J, Chelala C",,,3.0, +28821760,Breast Oncogenic Specific siRNAs,0.987298205,BOSS,0.972771386,Breast Oncogenic Specific siRNAs,0.987298205,1,http://bioinformatics.cimap.res.in/sharma/boss,"HTTPConnectionPool(host='bioinformatics.cimap.res.in', port=80): Max retries exceeded with url: /sharma/boss (Caused by ConnectTimeoutError(, 'Connection to bioinformatics.cimap.res.in timed out. (connect timeout=5)'))",,,no_wayback,2017-08-18,"Biotechnology Division, CSIR-Central Institute of Medicinal and Aromatic Plants, P.O.-CIMAP, Near Kukrail Picnic Spot, Lucknow, 226 015, Uttar Pradesh, India. ks26atul@gmail.com.","Tyagi A, Semwal M, Sharma A",,,0.0,India +26586806,BreCAN-DB,0.997648492,BreCAN-DB,0.997648492,,0,1,http://brecandb.igib.res.in,301,,"(28.6453,77.2128)",http://web.archive.org/web/20220423150519/https://brecandb.igib.res.in/,2015-11-19,"School of Computational and Integrative Sciences, Jawaharlal Nehru University, New Delhi, India.","Narang P, Dhapola P, Chowdhury S",,"DBT/Wellcome Trust India Alliance, Wellcome Trust",1.0,India +"23203881, 25378310, 33211880",BRENDA,0.997791767,BRENDA,0.997791767,BRaunschweig ENzyme DAtabase,0.770335999,3,http://www.brenda-enzymes.org,302,,"(52.2680,10.5200)",http://web.archive.org/web/20221102105902/https://brenda-enzymes.org/,2021-01-01,"Technische Universität Braunschweig, Dpt. for Bioinformatics and Biochemistry, Langer Kamp 19 B, 38106 Braunschweig, Germany., Department of Bioinformatics and Biochemistry, Technische Universität Braunschweig, Langer Kamp 19 B, D-38106 Braunschweig, Germany., Technische Universität Braunschweig, Braunschweig Integrated Centre of Systems Biology (BRICS), Rebenring 56, 38106 Braunschweig, Germany.","Schomburg I, Chang A, Placzek S, Söhngen C, Rother M, Lang M, Munaretto C, Ulas S, Stelzer M, Grote A, Scheer M, Schomburg D, Chang A, Schomburg I, Placzek S, Jeske L, Ulbrich M, Xiao M, Sensen CW, Schomburg D, Chang A, Jeske L, Ulbrich S, Hofmann J, Koblitz J, Schomburg I, Neumann-Schaal M, Jahn D, Schomburg D",", , ",", , German Federal Ministry of Education and Research, German Federal Ministry of Education and Research, German Federal Ministry of Education and Research, German Federal Ministry of Education and Research",304.0,"Germany, Germany, Germany" +27164438,BrucellaBase,0.996022701,BrucellaBase,0.996022701,,0,1,"http://www.dbtbrucellosis.in/brucellabase.html, http://59.99.226.203/brucellabase/homepage.html","HTTPConnectionPool(host='www.dbtbrucellosis.in', port=80): Max retries exceeded with url: /brucellabase.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known')), HTTPConnectionPool(host='59.99.226.203', port=80): Max retries exceeded with url: /brucellabase/homepage.html (Caused by ConnectTimeoutError(, 'Connection to 59.99.226.203 timed out. (connect timeout=5)'))",,", ","http://web.archive.org/web/20200807231951/https://dbtbrucellosis.in/brucellabase.html, no_wayback",2016-05-07,"Department of Genetics, School of Biological Sciences, Madurai Kamaraj University, Madurai - 625021, Tamil Nadu, India.","Sankarasubramanian J, Vishnu US, Khader LK, Sridhar J, Gunasekaran P, Rajendhran J",,"Department of Biotechnology, Govt. of India",3.0,India +33539279,BSGatlas,0.995428026,BSGatlas,0.995428026,,0,1,http://rth.dk/resources/bsgatlas,301,,"(55.6703,12.5882)",http://web.archive.org/web/20220617134834/https://rth.dk/resources/bsgatlas/,2021-02-01,"Center for Non-coding RNA in Technology and Health, Department of Veterinary and Animal Sciences, University of Copenhagen, 1871 Frederiksberg, Denmark.","Geissler AS, Anthon C, Alkan F, González-Tortuero E, Poulsen LD, Kallehauge TB, Breüner A, Seemann SE, Vinther J, Gorodkin J",,Innovationsfonden,0.0,Denmark +32026396,BSM-Arc,0.996515274,BSM-Arc,0.996515274,Biological Structure Model Archive,0.965628356,1,http://bsma.pdbj.org,301,,"(34.8257,135.5182)",http://web.archive.org/web/20221016223837/https://bsma.pdbj.org/,2020-02-05,"Institute for Protein Research, Osaka University, 3-2 Yamadaoka, Suita, Osaka, 565-0871, Japan. gertjan.bekker@protein.osaka-u.ac.jp.","Bekker GJ, Kawabata T, Kurisu G",,"Japan Agency for Medical Research and Development, Japan Agency for Medical Research and Development (JP)",12.0,Japan +23203879,BSRD,0.995059371,BSRD,0.995059371,,0,1,http://kwanlab.bio.cuhk.edu.hk/BSRD,301,,"(22.2783,114.1747)",http://web.archive.org/web/20221017065303/http://kwanlab.bio.cuhk.edu.hk/BSRD/,2012-11-29,"Biology Programme, School of Life Sciences, The Chinese University of Hong Kong, Hong Kong SAR, China.","Li L, Huang D, Cheung MK, Nong W, Huang Q, Kwan HS",,,60.0,"China, Hong Kong, Hong Kong" +21210251,BTECH,0.996129572,BTECH,0.996129572,,0,1,http://cmbteg.childrensmemorial.org,"HTTPConnectionPool(host='cmbteg.childrensmemorial.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to cmbteg.childrensmemorial.org timed out. (connect timeout=5)'))",,,no_wayback,2011-03-01,"Cancer Biology and Epigenomics Program, Children's Memorial Research Center, Department of Pediatrics, Feinberg School of Medicine, Northwestern University, 2300 Children's Plaza, Box 220, Chicago, IL 60614-3394, USA. mwang@childrensmemorial.org","Wang M, Xie H, Stellpflug W, Rajaram V, Bonaldo Mde F, Goldman S, Tomita T, Soares MB",,,7.0,United States +25656309,BtoxDB,0.996941984,BtoxDB,0.996941984,,0,1,http://www.gurupi.uft.edu.br/btoxdb,"HTTPConnectionPool(host='www.gurupi.uft.edu.br', port=80): Max retries exceeded with url: /btoxdb (Caused by ConnectTimeoutError(, 'Connection to www.gurupi.uft.edu.br timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20161108041744/http://www.gurupi.uft.edu.br:80/btoxdb/,2015-01-17,"UFT - Federal University of Tocantins, Department of Biotechnology, Caixa Postal 66, Gurupi 77402-970, Tocantins, Brazil. Electronic address: luiz_cbb@hotmail.com.","Barbosa LC, Garrido SS, Marchetto R",,"CAPES, CNPq, FAPESP",6.0,Brazil +23336431,BuffSatDB,0.98745203,BuffSatDB,0.98745203,Buffalo MicroSatellite Database,0.644897648,1,http://cabindb.iasri.res.in/buffsatdb,"HTTPConnectionPool(host='cabindb.iasri.res.in', port=80): Max retries exceeded with url: /buffsatdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20130723203531/http://cabindb.iasri.res.in:80/buffsatdb/,2013-01-19,"Centre for Agricultural Bioinformatics, Indian Agricultural Statistics Research Institute, Library Avenue, New Delhi, 110012, India.","Sarika, Arora V, Iquebal MA, Rai A, Kumar D",,,7.0,India +22080550,BYKdb,0.956137955,BYKdb,0.956137955,Bacterial protein tYrosine Kinase database,0.903621137,1,http://bykdb.ibcp.fr,301,,"(45.7469,4.8444)",http://web.archive.org/web/20171124200635/https://bykdb.ibcp.fr/,2011-11-12,"Unité Bases Moléculaires et Structurales des Systèmes Infectieux, UMR 5086 CNRS - Université Claude Bernard Lyon 1, IBCP FR 3302 - 7, Passage du Vercors, 69367 Lyon CEDEX 07, France.","Jadeau F, Grangeasse C, Shi L, Mijakovic I, Deléage G, Combet C",,,20.0,France +22621612,C-GATE,0.988704075,C-GATE,0.988704075,affected by transposable,0.630219376,1,http://sites.google.com/site/tecatalog,302,,"(34.0522,-118.2437)",http://web.archive.org/web/20220326165655/https://sites.google.com/site/tecatalog/,2012-05-23,"Terry Fox Laboratory, British Columbia Cancer Agency, 675 West 10th Avenue, Vancouver, BC, V5Z1L3, Canada. rrebollo@bccrc.ca.","Rebollo R, Farivar S, Mager DL",,,23.0,Canada +27050421,C-terminome,0.84958427,C-terminome,0.84958427,,0,1,http://cterminome.bio-toolkit.com,"HTTPConnectionPool(host='cterminome.bio-toolkit.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-04-06,"Nevada Institute of Personalized Medicine, and School of Life Sciences, University of Nevada, Las Vegas, Nevada, United States of America.","Sharma S, Toledo O, Hedden M, Lyon KF, Brooks SB, David RP, Limtong J, Newsome JM, Novakovic N, Rajasekaran S, Thapar V, Williams SR, Schiller MR",,"NLM NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",5.0,United States +31649674,cAb-Rep,0.99714224,cAb-Rep,0.99714224,,0,1,http://cab-rep.c2b2.columbia.edu,301,,"(40.7143,-74.0060)",http://web.archive.org/web/20220607081611/https://cab-rep.c2b2.columbia.edu/,2019-10-09,"Zuckerman Mind Brain Behavior Institute, Columbia University, New York, NY, United States.","Guo Y, Chen K, Kwong PD, Shapiro L, Sheng Z",,"NIAID NIH HHS, National Institute of Allergy and Infectious Diseases",21.0,United States +22584068,CACG,0.998239875,CACG,0.998239875,,0,1,http://cgc.kribb.re.kr/map,"HTTPConnectionPool(host='cgc.kribb.re.kr', port=80): Max retries exceeded with url: /map (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-05-11,"Genome Resource Center, Korea Research Institute of Bioscience and Biotechnology (KRIBB), 111 Gwahangno, Yuseong-gu, Daejeon 305-806, Republic of Korea.","Kim DS, Kim DW, Kim MY, Nam SH, Choi SH, Kim RN, Kang A, Kim A, Park HS",,"Ministry of Education, Science and Technology",5.0, +22080563,CADRE,0.994582891,CADRE,0.994582891,Central Aspergillus Data REpository,0.973802202,1,http://www.cadre-genomes.org.uk,406,,,http://web.archive.org/web/20221020175517/https://www.cadre-genomes.org.uk/,2011-11-12,"School of Translational Medicine, University of Manchester, Manchester M23 9LT, UK. jane.gilsenan@manchester.ac.uk","Mabey Gilsenan J, Cooley J, Bowyer P",,European Commission FP7,28.0, +30329086,CAGm,0.991939008,CAGm,0.991939008,Comparative Analysis of Germline Microsatellites,0.97115584,1,http://www.cagmdb.org,200,,"(34.9803,-81.9168)",http://web.archive.org/web/20220308040843/http://www.cagmdb.org/,2019-01-01,"Edward Via College of Osteopathic Medicine, 2265 Kraft Drive, Blacksburg, VA 24060, USA.","Kinney N, Titus-Glover K, Wren JD, Varghese RT, Michalak P, Liao H, Anandakrishnan R, Pulenthiran A, Kang L, Garner HR",,Edward Via College of Osteopathic Medicine,3.0,United States +33444113,CALR-ETdb,0.93914603,CALR-ETdb,0.93914603,,0,1,http://www.dsimb.inserm.fr/CALR-ET,301,,"(48.8534,2.3488)",no_wayback,2021-01-14,"Université de Paris, UMR_S 1134, Université De La Réunion, Université Des Antilles, Paris, France.","El Jahrani N, Cretin G, de Brevern AG",,"Indo-French Centre for the Promotion of Advanced Research, Agence Nationale de la Recherche, Agence Nationale de la Recherche, Grand Equipement National de Calcul Intensif, Agence Nationale de la Recherche",0.0,France +24265220,CAMP,0.997035444,CAMP,0.997035444,Collection of Antimicrobial Peptide,0.979017951,1,http://www.camp.bicnirrh.res.in,"HTTPConnectionPool(host='www.camp.bicnirrh.res.in', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.camp.bicnirrh.res.in timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221024211154/http://www.camp.bicnirrh.res.in/,2013-11-21,"Biomedical Informatics Centre of ICMR, National Institute for Research in Reproductive Health, Mumbai 400012, Maharashtra, India.","Waghu FH, Gopi L, Barai RS, Ramteke P, Nizami B, Idicula-Thomas S",,,90.0,India +22213543,CAMPS,0.880289257,CAMPS,0.880289257,,0,1,http://webclu.bio.wzw.tum.de/CAMPS2.0,302,,"(48.4035,11.7488)",no_wayback,2011-12-28,"Department of Genome Oriented Bioinformatics, Technische Universität München, Wissenschaftszentrum Weihenstephan, 85354 Freising, Germany.","Neumann S, Hartmann H, Martin-Galiano AJ, Fuchs A, Frishman D",,,4.0,Germany +33306801,CamRegBase,0.996998787,CamRegBase,0.996998787,,0,1,http://camregbase.org,200,,"(42.7370,-84.4839)",http://web.archive.org/web/20220615224415/https://camregbase.org/,2020-12-01,"Department of Biochemistry and Molecular Biology, 603 Wilson Road, Room 212, Biochemistry Building, East Lansing, MI 48824-6473, USA.","Gomez-Cano F, Carey L, Lucas K, García Navarrete T, Mukundi E, Lundback S, Schnell D, Grotewold E",,"Division of Graduate Education, Basic Energy Sciences",1.0,United States +30367574,CamurWeb,0.970218897,CamurWeb,0.970218897,,0,1,http://bioinformatics.iasi.cnr.it/camurweb,502,,,no_wayback,2018-10-15,"Department of Engineering, Uninettuno International University, Corso Vittorio Emanuele II 39, Rome, 00186, Italy. emanuel@iasi.cnr.it.","Weitschek E, Lauro SD, Cappelli E, Bertolazzi P, Felici G",,,2.0,Italy +28453651,Cancer PanorOmics,0.830598618,Cancer PanorOmics,0.830598618,,0,1,http://panoromics.irbbarcelona.org,302,,"(41.4301,2.1925)",http://web.archive.org/web/20221017162748/https://panoromics.irbbarcelona.org/,2017-07-01,"Institute for Research in Biomedicine (IRB Barcelona). The Barcelona Institute of Science and Technology, Barcelona 08028, Catalonia, Spain.","Mateo L, Guitart-Pla O, Pons C, Duran-Frigola M, Mosca R, Aloy P",,European Research Council,5.0,Spain +34903605,Cancer-Immu,0.998258367,Cancer-Immu,0.998258367,,0,1,http://bioinfo.vanderbilt.edu/database/Cancer-Immu,"HTTPConnectionPool(host='bioinfo.vanderbilt.edu', port=80): Max retries exceeded with url: /database/Cancer-Immu (Caused by ConnectTimeoutError(, 'Connection to bioinfo.vanderbilt.edu timed out. (connect timeout=5)'))",,,no_wayback,2021-12-13,"Department of Biostatistics and Center for Quantitative Sciences, Vanderbilt University Medical Center.","Yang J, Zhao S, Wang J, Sheng Q, Liu Q, Shyr Y",,"NCI NIH HHS, NCI NIH HHS, NCI, NCI NIH HHS, Cancer Center Support Grant, NCI NIH HHS, NCI NIH HHS, NCI, SPORE in Breast Cancer",1.0, +"25392415, 30407596",Cancer3D,0.993081719,Cancer3D,0.993081719,,0,2,http://www.cancer3d.org,200,,"(37.3476,-121.8870)",http://web.archive.org/web/20221017032107/http://cancer3d.org/,2019-01-01,"Bioinformatics and Systems Biology Program, Sanford-Burnham Medical Research Institute, 10901 North Torrey Pines Road, La Jolla, CA 92037, USA., Bioinformatics and Systems Biology Program, Sanford Burnham Prebys Medical Discovery Institute, 10901 North Torrey Pines Road, La Jolla, CA 92037, USA.","Porta-Pardo E, Hrabe T, Godzik A, Sedova M, Iyer M, Li Z, Jaroszewski L, Post KW, Hrabe T, Porta-Pardo E, Godzik A",", ","NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, National Institute of Health, NIH HHS",27.0,"United States, United States" +23486013,CancerDR,0.992159104,CancerDR,0.992159104,cancer drug resistance database,0.807872832,1,http://crdd.osdd.net/raghava/cancerdr,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/cancerdr (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220528005253/http://crdd.osdd.net/raghava/cancerdr/,2013-01-01,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh-160036, India.","Kumar R, Chaudhary K, Gupta S, Singh H, Kumar S, Gautam A, Kapoor P, Raghava GP",,,47.0,India +32360910,CancerEnD,0.998435497,CancerEnD,0.998435497,,0,1,http://webs.iiitd.edu.in/raghava/cancerend,301,,"(28.6453,77.2128)",http://web.archive.org/web/20221017081951/https://webs.iiitd.edu.in/raghava/cancerend/,2020-05-01,"Bioinformatics Centre, CSIR - Institute of Microbial Technology, Chandigarh, India. Electronic address: b.rajesh@imtech.res.in.","Kumar R, Lathwal A, Kumar V, Patiyal S, Raghav PK, Raghava GPS",,"University Grants Commission, Council of Scientific and Industrial Research, India",3.0,India +31598703,CancerGeneNet,0.994320154,CancerGeneNet,0.994320154,,0,1,http://signor.uniroma2.it/CancerGeneNet,301,,"(41.8661,12.5896)",http://web.archive.org/web/20221016220840/http://signor.uniroma2.it/CancerGeneNet/,2020-01-01,"Department of Biology, University of Rome, Tor Vergata, 00133 Rome, Italy.","Iannuccelli M, Micarelli E, Surdo PL, Palma A, Perfetto L, Rozzo I, Castagnoli L, Licata L, Cesareni G",,"Italian Association for Cancer Research, Italian Association for Cancer Research",11.0,Italy +26074488,CancerHSP,0.992445946,CancerHSP,0.992445946,anticancer herbs database of,0.878181517,1,http://lsp.nwsuaf.edu.cn/CancerHSP.php,"HTTPConnectionPool(host='lsp.nwsuaf.edu.cn', port=80): Max retries exceeded with url: /CancerHSP.php (Caused by ConnectTimeoutError(, 'Connection to lsp.nwsuaf.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160812002730/http://lsp.nwsuaf.edu.cn:80/CancerHSP.php,2015-06-15,"Center of Bioinformatics, College of Life Science, Northwest A&F University, Yangling, Shaanxi 712100, China.","Tao W, Li B, Gao S, Bai Y, Shar PA, Zhang W, Guo Z, Sun K, Fu Y, Huang C, Zheng C, Mu J, Pei T, Wang Y, Li Y, Wang Y",,,11.0,China +33010176,CancerImmunityQTL,0.99584347,CancerImmunityQTL,0.99584347,,0,1,http://www.cancerimmunityqtl-hust.com,301,,"(1.3215,103.6957)",http://web.archive.org/web/20211021044743/http://www.cancerimmunityqtl-hust.com/,2021-01-01,"Department of Epidemiology and Biostatistics, Key Laboratory of Environmental Health of Ministry of Education, School of Public Health, Tongji Medical College, Huazhong University of Science and Technology, Wuhan 430030, China.","Tian J, Cai Y, Li Y, Lu Z, Huang J, Deng Y, Yang N, Wang X, Ying P, Zhang S, Zhu Y, Zhang H, Zhong R, Chang J, Miao X",,"National Science Fund for Distinguished Young Scholars, National Natural Science Foundation of China",2.0,China +31110280,CancerMine,0.981553555,CancerMine,0.981553555,,0,1,http://bionlp.bcgsc.ca/cancermine,301,,"(49.2767,-123.1300)",http://web.archive.org/web/20220527130509/http://bionlp.bcgsc.ca/cancermine/,2019-05-20,"Canada's Michael Smith Genome Sciences Centre, Vancouver, British Columbia, Canada.","Lever J, Zhao EY, Grewal J, Jones MR, Jones SJM",,,44.0,"Canada, Canada" +26690544,CancerNet,0.995466173,CancerNet,0.995466173,,0,1,http://bis.zju.edu.cn/CancerNet,200,,"(30.2936,120.1614)",http://web.archive.org/web/20220619140044/http://bis.zju.edu.cn/CancerNet,2015-12-21,"Department of Bioinformatics, College of Life Sciences, Zhejiang University, Hangzhou, China.","Meng X, Wang J, Yuan C, Li X, Zhou Y, Hofestädt R, Chen M",,,11.0,China +28473704,CancerPDF,0.993715048,CancerPDF,0.993715048,Cancer Peptidome Database of bioFluids,0.919792932,1,http://crdd.osdd.net/raghava/cancerpdf,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/cancerpdf (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20211206074718/http://crdd.osdd.net/raghava/cancerpdf/,2017-05-04,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Sector 39A, Chandigarh, 160036, India.","Bhalla S, Verma R, Kaur H, Kumar R, Usmani SS, Sharma S, Raghava GPS",,,11.0,India +25270878,CancerPPD,0.996373594,CancerPPD,0.996373594,,0,1,http://crdd.osdd.net/raghava/cancerppd,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/cancerppd (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2014-09-30,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh 160036, Punjab, India.","Tyagi A, Tuknait A, Anand P, Gupta S, Sharma M, Mathur D, Joshi A, Singh S, Gautam A, Raghava GP",,,79.0,India +22659240,CancerProView,0.997434795,CancerProView,0.997434795,,0,1,http://cancerproview.dmb.med.keio.ac.jp/php/cpv.html,404,,,http://web.archive.org/web/20171022002622/http://cancerproview.dmb.med.keio.ac.jp:80/php/cpv.html,2012-05-31,"Laboratory of Gene Medicine, Keio University School of Medicine, 35 Shinanomachi, Shinjuku-ku, Tokyo, 160-8582, Japan.","Mitsuyama S, Shimizu N",,"Japan Society for the Promotion of Science, Ministry of Education, Culture, Sports, Science and Technology",1.0,Japan +30329142,CancerSEA,0.997147083,CancerSEA,0.997147083,,0,1,"http://biocc.hrbmu.edu.cn/CancerSEA/, http://202.97.205.69/CancerSEA","200, HTTPConnectionPool(host='202.97.205.69', port=80): Max retries exceeded with url: /CancerSEA (Caused by ConnectTimeoutError(, 'Connection to 202.97.205.69 timed out. (connect timeout=5)'))",,"(31.2222,121.4581), ","http://web.archive.org/web/20220526105630/http://biocc.hrbmu.edu.cn/CancerSEA/, no_wayback",2019-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150081, China.","Yuan H, Yan M, Zhang G, Liu W, Deng C, Liao G, Xu L, Luo T, Yan H, Long Z, Shi A, Zhao T, Xiao Y, Li X",,"China Postdoctoral Science Foundation, National Natural Science Foundation of China, National Program on Key Basic Research, National High Technology Research and Development Program of China, National Natural Science Foundation of China, Higher Education in Heilongjiang Province, Heilongjiang Postdoctoral Foundation, Harbin Medical University",157.0,China +30329095,CancerSplicingQTL,0.997213602,CancerSplicingQTL,0.997213602,,0,1,http://www.cancersplicingqtl-hust.com,200,,"(1.3215,103.6957)",http://web.archive.org/web/20220712041513/http://www.cancersplicingqtl-hust.com/,2019-01-01,"Key Laboratory of Environmental Health of Ministry of Education, Department of Epidemiology and Biostatistics, School of Public Health, Tongji Medical College, Huazhong University of Science and Technology, Wuhan, Hubei 430030, PR China.","Tian J, Wang Z, Mei S, Yang N, Yang Y, Ke J, Zhu Y, Gong Y, Zou D, Peng X, Wang X, Wan H, Zhong R, Chang J, Gong J, Han L, Miao X",,"National Key Research and Development Plan Program, National Natural Science Foundation of China, National Natural Science Foundation of China",16.0,China +27832200,Cancertope,0.991436124,Cancertope,0.991436124,,0,1,http://www.imtech.res.in/raghava/cancertope,301,,"(30.7363,76.7884)",http://web.archive.org/web/20170609053622/http://www.imtech.res.in:80/raghava/cancertope/,2016-11-10,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh-160036, India.","Gupta S, Chaudhary K, Dhanda SK, Kumar R, Kumar S, Sehgal M, Nagpal G, Raghava GP",,"Indian Council of Medical Research, Open Source Drug Discovery, University Grants Commission, Science and Engineering Research Board, Council of Scientific and Industrial Research, Council of Scientific and Industrial Research",3.0,India +31701131,CancerTracer,0.997136056,CancerTracer,0.997136056,,0,1,http://cailab.labshare.cn/cancertracer,301,,"(37.3394,-121.8950)",http://web.archive.org/web/20221016213105/http://cailab.labshare.cn/cancertracer/,2020-01-01,"Center of Growth, Metabolism and Aging, Key Laboratory of Bio-Resources and Eco-Environment, College of Life Sciences, Sichuan University, Chengdu 610064, China.","Wang C, Yang J, Luo H, Wang K, Wang Y, Xiao ZX, Tao X, Jiang H, Cai H",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",5.0,China +21718534,CANGS,0.988113701,CANGS,0.988113701,Analyzing Next Generation Sequences DataBase,0.793947458,1,http://code.google.com/p/cangsdb,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20151228233153/https://code.google.com/p/cangsdb/,2011-06-30,"Institut für Populationsgenetik, Veterinärmedizinische Universität Wien, Veterinärplatz 1, Vienna, Austria. christian.schloetterer@vetmeduni.ac.at.","Pandey RV, Nolte V, Boenigk J, Schlötterer C",,Austrian Science Fund FWF,3.0,Austria +34345532,CanImmunother,0.998033106,CanImmunother,0.998033106,,0,1,http://www.biomedical-web.com/cancerit,302,,"(29.4159,121.3397)",http://web.archive.org/web/20221006165546/http://www.biomedical-web.com/cancerit/,2021-07-16,"Department of Pediatrics, The University of Hong Kong-Shenzhen Hospital, Shenzhen, China.","Zhang W, Zeng B, Lin H, Guan W, Mo J, Wu S, Wei Y, Zhang Q, Yu D, Li W, Chan GC",,"China Postdoctoral Science Foundation, National Natural Science Foundation of China, Shenzhen Basic Research Fund, Fundamental Research Funds of the Central Universities, National Key R&D Program of China, Natural Science Foundation of Guangdong Province, Strategic Priority CAS Project, Sanming Project of Medicine, Guangdong Project, Guangdong Basic and Applied Basic Research Foundation, China, Dongsheng Yu, Sun Yat-sen University, National Key R&D Program of China, Support Scheme of Guangzhou for Leading Talents in Innovation and Entrepreneurship, China Postdoctoral Science Foundation, Wenliang Zhang, Binghui Zeng, Weizhong Li",0.0,"China, Hong Kong" +33942873,CANNUSE,0.991508305,CANNUSE,0.991508305,,0,1,http://cannusedb.csic.es,302,,"(40.4165,-3.7026)",http://web.archive.org/web/20220519115121/https://cannusedb.csic.es/,2021-05-01,"Institut Botànic de Barcelona (IBB, CSIC-Ajuntament de Barcelona), Passeig del Migdia s/n, Barcelona, Catalonia 08038, Spain.","Balant M, Gras A, Gálvez F, Garnatje T, Vallès J, Vitales D",,"Ministerio de Ciencia, Innovación y Universidades, Generalitat de Catalunya, Spanish government, Institut d’Estudis Catalans",1.0,Spain +"26657895, 30945201",CANTATAdb,0.954158604,CANTATAdb,0.954158604,,0,2,"http://cantata.amu.edu.pl, http://yeti.amu.edu.pl/CANTATA","200, 301",,"(52.4069,16.9299), (52.4069,16.9299)","http://web.archive.org/web/20220718203144/http://cantata.amu.edu.pl/, http://web.archive.org/web/20220123073106/http://yeti.amu.edu.pl/CANTATA/",2019-01-01,"Department of Bioinformatics, Institute of Molecular Biology and Biotechnology, Faculty of Biology, Adam Mickiewicz University in Poznan, 61-614 Poznan, Poland miszcz@amu.edu.pl., Laboratory of Integrative Genomics, Institute of Anthropology, Adam Mickiewicz University, Poznan, Poland. miszcz@amu.edu.pl.","Szcześniak MW, Rosikiewicz W, Makałowska I, Szcześniak MW, Bryzghalov O, Ciomborowska-Basheer J, Makałowska I",", ",", ",69.0,"Poland, Poland" +34174131,CanVaS,0.996876478,CanVaS,0.996876478,Cancer Variation,0.886106948,1,http://ithaka.rrp.demokritos.gr/CanVaS,"HTTPConnectionPool(host='ithaka.rrp.demokritos.gr', port=80): Max retries exceeded with url: /CanVaS (Caused by ConnectTimeoutError(, 'Connection to ithaka.rrp.demokritos.gr timed out. (connect timeout=5)'))",,,no_wayback,2021-07-06,"Department of Genetics, Development & Molecular Biology, School of Biology, Aristotle University of Thessaloniki, Thessaloniki, Greece.","Kalfakakou D, Fostira F, Papathanasiou A, Apostolou P, Dellatola V, Gavra IE, Vlachos IS, Scouras ZG, Drosopoulou E, Yannoukakos D, Konstantopoulou I",,Stavros Niarchos Foundation,0.0,Greece +22021380,CAPS-DB,0.997070983,CAPS-DB,0.997070983,,0,1,http://www.bioinsilico.org/CAPSDB,301,,"(41.3888,2.1590)",http://web.archive.org/web/20211206113631/http://www.bioinsilico.org/CAPSDB/,2011-10-22,"Leeds Institute of Molecular Medicine, Section of Experimental Therapeutics, University of Leeds, St James's University Hospital, Leeds LS9 7TF, UK.","Segura J, Oliva B, Fernandez-Fuentes N",,,3.0, +29509874,CarbonylDB,0.994347632,CarbonylDB,0.994347632,,0,1,http://digbio.missouri.edu/CarbonylDB,404,,,http://web.archive.org/web/20220331044112/http://digbio.missouri.edu/CarbonylDB/,2018-07-01,"Biostatistics and Bioinformatics Division, Yenepoya Research Center, Yenepoya University, Mangalore, Karnataka, India.","Rao RSP, Zhang N, Xu D, Møller IM",,"Danish Council for Independent Research–Technology and Production Sciences, NIGMS NIH HHS, National Institutes of Health",6.0,India +29939204,CARDIO-LNCRNAS,0.93584047,CARDIO-LNCRNAS,0.93584047,,0,1,"http://bio-bigdata.hrbmu.edu.cn/CARDIO-LNCRNAS/, http://www.bio-bigdata.net/CARDIO-LNCRNAS","200, 502",,"(31.2222,121.4581), ","no_wayback, no_wayback",2019-09-01,"College of Bioinformatics Science and Technology, Harbin Medical University, 194 Xuefu Road, Harbin, Heilongjiang, China.","Jiang C, Ding N, Li J, Jin X, Li L, Pan T, Huo C, Li Y, Xu J, Li X",,"National Natural Science Foundation of China, National High Technology Research and Development Program of China, National Program on Key Basic Research Project, Heilongjiang Province Youth Science and technology, Heilongjiang Province Youth Science and technology, China Postdoctoral Science Foundation, National Natural Science Foundation of China, Natural Science Foundation of Heilongjiang Province, Weihan Yu Youth Science Fund Project of Harbin Medical University, National Natural Science Foundation of China, National Program on Key Basic Research Project, National Program on Key Basic Research Project, China Postdoctoral Science Foundation, China Postdoctoral Science Foundation",6.0,China +27635320,CardioTF,0.989912987,CardioTF,0.989912987,,0,1,http://www.cardiosignal.org/database/cardiotf.html,"HTTPConnectionPool(host='www.cardiosignal.org', port=80): Max retries exceeded with url: /database/cardiotf.html (Caused by ConnectTimeoutError(, 'Connection to www.cardiosignal.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20181016074040/http://www.cardiosignal.org:80/database/cardiotf.html,2016-08-23,"State Key Laboratory of Cardiovascular Disease, Fuwai Hospital, National Center for Cardiovascular Diseases, Chinese Academy of Medical Sciences and Peking Union Medical College , Beijing , China.",Zhen Y,,National Natural Science Foundation of China,1.0,China +23794735,CARLSBAD,0.99360311,CARLSBAD,0.99360311,,0,1,http://carlsbad.health.unm.edu/carlsbad,404,,,http://web.archive.org/web/20191227200105/http://carlsbad.health.unm.edu:80/carlsbad/,2013-06-21,"Department of Internal Medicine, Translational Informatics Division, University of New Mexico School of Medicine, 1 University of New Mexico, MSC09 5025, Albuquerque, NM 87131, USA.","Mathias SL, Hines-Kay J, Yang JJ, Zahoransky-Kohalmi G, Bologa CG, Ursu O, Oprea TI",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",11.0,"Mexico, Mexico, United States" +26040787,CARMO,0.95898664,CARMO,0.95898664,comprehensive annotation of rice multi-omics data,0.74433168,1,http://bioinfo.sibs.ac.cn/carmo,301,,"(39.9906,116.2887)",http://web.archive.org/web/20220516220442/http://bioinfo.sibs.ac.cn/carmo/,2015-07-01,"National Laboratory of Plant Molecular Genetics, Shanghai Institute of Plant Physiology and Ecology, Shanghai Institutes for Biological Sciences, Chinese Academy of Sciences, 300 Fenglin Road, Shanghai, 200032, China.","Wang J, Qi M, Liu J, Zhang Y",,"Shanghai Institutes for Biological Sciences, Chinese Academy of Sciences, Strategic Priority Research Program of the Chinese Academy of Sciences",21.0,China +28365725,Carotenoid,0.51871109,,0,Carotenoid,0.51871109,1,http://carotenoiddb.jp,200,,"(34.6937,135.5022)",http://web.archive.org/web/20221015182212/http://carotenoiddb.jp/,2017-01-01,"Center for Information Biology, National Institute of Genetics, Yata 1111, Mishima, Shizuoka 411-8540, Japan.",Yabuzaki J,,,47.0,Japan +25267795,CarrotDB,0.99532944,CarrotDB,0.99532944,,0,1,"http://apiaceae.njau.edu.cn/carÃ, http://apiaceae.njau.edu.cn/carrotdb","HTTPConnectionPool(host='apiaceae.njau.edu.cn', port=80): Max retries exceeded with url: /car%C3%83%C2%A2%C3%82%C2%88%C3%82%C2%9A%C3%83%C2%83%C3%82%C2%89%C3%83%C2%82%C3%82%C2%AC%C3%83%C2%83%C3%82%C2%89%C3%83%C2%A2%C3%82%C2%88%C3%82%C2%9A%C3%83%C2%83%C3%82%C2%87%C3%83%C2%82%C3%82%C2%AC%C3%83%C2%83%C3%82%C2%89%C3%83%C2%A2%C3%82%C2%88%C3%82%C2%9A%C3%83%C2%83%C3%82%C2%89%C3%83%C2%82%C3%82%C2%AC%C3%83%C2%83%C3%82%C2%87%C3%83%C2%A2%C3%82%C2%88%C3%82%C2%9A%C3%83%C2%83%C3%82%C2%87%C3%83%C2%82%C3%82%C2%AC%C3%83%C2%83%C3%82%C2%89 (Caused by ReadTimeoutError(""HTTPConnectionPool(host='apiaceae.njau.edu.cn', port=80): Read timed out. (read timeout=5)"")), HTTPConnectionPool(host='apiaceae.njau.edu.cn', port=80): Max retries exceeded with url: /carrotdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='apiaceae.njau.edu.cn', port=80): Read timed out. (read timeout=5)""))",,", ","no_wayback, http://web.archive.org/web/20150826035959/http://apiaceae.njau.edu.cn:80/carrotdb/",2014-09-29,"State Key Laboratory of Crop Genetics and Germplasm Enhancement, College of Horticulture, Nanjing Agricultural University, Nanjing 210095, China.","Xu ZS, Tan HW, Wang F, Hou XL, Xiong AS",,,23.0,China +31024751,CASBench,0.995392978,CASBench,0.995392978,,0,1,http://biokinet.belozersky.msu.ru/casbench,301,,"(55.7522,37.6156)",http://web.archive.org/web/20211127203350/https://biokinet.belozersky.msu.ru/casbench,2019-01-01,"Lomonosov Moscow State University, Belozersky Institute of Physicochemical Biology and Faculty of Bioengineering and Bioinformatics, Lenin hills 1, bldg. 73, 119991, Moscow, Russia.","Zlobin A, Suplatov D, Kopylov K, Å vedas V",,,1.0, +35134148,CATA,0.994815171,CATA,0.994815171,chromatin accessibility database,0.624158442,1,http://www.xiejjlab.bio/cata,502,,,no_wayback,2020-01-01,None,"Zhou J, Li Y, Cao H, Yang M, Chu L, Li T, Yu Z, Yu R, Qiu B, Wang Q, Li X, Xie J",,"Natural Science Foundation of Guangdong Province-Outstanding Youth Projec, Basic & Applied Basic Research Programs of Guangdong province, Basic & Applied Basic Research Programs of Guangdong province, National Natural Science Foundation of China",0.0, +25392409,CATdb,0.994086862,CATdb,0.994086862,,0,1,http://urgv.evry.inra.fr/CATdb,400,,,http://web.archive.org/web/20210115131842/http://urgv.evry.inra.fr/CATdb,2014-11-11,"INRA, Unité de Recherche en Génomique Végétale, UMR 1165, ERL CNRS 8196, Saclay Plant Sciences, CP 5708, F-91057 Evry, France UEVE, Unité de Recherche en Génomique Végétale, UMR 1165, ERL CNRS 8196, Saclay Plant Sciences, CP 5708, F-91057 Evry, France.","Zaag R, Tamby JP, Guichard C, Tariq Z, Rigaill G, Delannoy E, Renou JP, Balzergue S, Mary-Huard T, Aubourg S, Martin-Magniette ML, Brunaud V",,,9.0,"France, France" +23493402,CathaCyc,0.995314121,CathaCyc,0.995314121,,0,1,http://www.cathacyc.org,"HTTPConnectionPool(host='www.cathacyc.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))",,,http://web.archive.org/web/20170627234833/http://www.cathacyc.org/,2013-03-14,"Department of Plant Systems Biology, VIB, B-9052 Gent, Belgium.","Van Moerkercke A, Fabris M, Pollier J, Baart GJ, Rombauts S, Hasnain G, Rischer H, Memelink J, Oksman-Caldentey KM, Goossens A",,,53.0,Belgium +31691819,CAUSALdb,0.994969904,CAUSALdb,0.994969904,,0,1,http://mulinlab.org/causaldb,301,,"(37.4056,-122.0775)",http://web.archive.org/web/20221006125952/http://www.mulinlab.org/causaldb/,2020-01-01,"2011 Collaborative Innovation Center of Tianjin for Medical Epigenetics, National Clinical Research Center for Cancer, Tianjin Medical University Cancer Institute and Hospital, Tianjin Medical University, Tianjin, China.","Wang J, Huang D, Zhou Y, Yao H, Liu H, Zhai S, Wu C, Zheng Z, Zhao K, Wang Z, Yi X, Zhang S, Liu X, Liu Z, Chen K, Yu Y, Sham PC, Li MJ",,"National Natural Science Foundation of China, Natural Science Foundation of Tianjin, National Natural Science Foundation of China",17.0,China +"23514094, 24270786",CAZy,0.99791199,CAZy,0.99791199,Carbohydrate-Active Enzymes database,0.980871044,2,http://www.cazy.org,200,,"(48.8534,2.3488)",http://web.archive.org/web/20221014223218/http://www.cazy.org/,2013-11-21,"INRA, UMR1163 Biotechnologie des Champignons Filamenteux, Aix-Marseille Université, ESIL Polytech Marseille, 163 avenue de Luminy, CP 925, 13288, Marseille, Cedex 09, France. Anthony.Levasseur@univ-amu.fr., Centre National de la Recherche Scientifique, CNRS UMR 7257, 13288 Marseille, France and Aix-Marseille Université, AFMB, 163 Avenue de Luminy, 13288 Marseille, France.","Levasseur A, Drula E, Lombard V, Coutinho PM, Henrissat B, Lombard V, Golaconda Ramulu H, Drula E, Coutinho PM, Henrissat B",", ",", ",2853.0,"France, France, France" +29040563,CAZypedia,0.9983778,CAZypedia,0.9983778,,0,1,http://www.cazypedia.org,301,,"(49.2497,-123.1693)",no_wayback,2018-12-01,None,,,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",54.0, +26660198,CB,0.666790307,CB,0.666790307,Blindness,0.491330385,1,http://search.bwh.harvard.edu/new/CBDatabase.html,301,,"(42.3584,-71.0598)",http://web.archive.org/web/20220401061244/https://search.bwh.harvard.edu/new/CBDatabase.html,2016-12-01,"Visual Attention Lab, Harvard Medical School, Brigham & Women's Hospital, 64 Sidney St., Suite 170, Cambridge, MA, 02139, USA. preeti.sareen@yale.edu.","Sareen P, Ehinger KA, Wolfe JM",,NEI NIH HHS,7.0,United States +30717315,CBD,0.993405302,CBD,0.993405302,CRC biomarker database,0.776337951,1,http://sysbio.suda.edu.cn/CBD/index.html,404,,,http://web.archive.org/web/20190307053549/http://sysbio.suda.edu.cn:80/CBD/index.html,2019-02-01,"School of Medicine, Institute of Medical Sciences, Örebro University, SE-70182 Örebro, Sweden. zhang.xueli@oru.se.","Zhang X, Sun XF, Shen B, Zhang H",,"Swedish Cancer Foundation, Svenska ForskningsrÃ¥det Formas",8.0,Sweden +29020642,cBiT,0.986032426,cBiT,0.986032426,Compendium for Biomaterial Transcriptomics,0.794626407,1,http://cbit.maastrichtuniversity.nl,301,,"(50.8483,5.6889)",http://web.archive.org/web/20190816190159/https://cbit.maastrichtuniversity.nl/,2017-10-03,"MERLN Institute for Technology-inspired Regenerative Medicine, Maastricht University, Universiteitssingel 40, 6229 ER, Maastricht, The Netherlands. Electronic address: d.hebels@maastrichtuniversity.nl.","Hebels DGAJ, Carlier A, Coonen MLJ, Theunissen DH, de Boer J",,"Dutch Science Foundation, Dutch province of Limburg",4.0,Netherlands +29753807,CbLncRNAdb,0.888481498,CbLncRNAdb,0.888481498,,0,1,http://cabgrid.res.in/cblncrnadb,301,,"(28.6109,77.1792)",http://web.archive.org/web/20220619210634/http://cabgrid.res.in/cblncrnadb/,2018-05-26,"ICAR-Indian Agricultural Statistics Research Institute, New Delhi, India; Amity University, Noida, Uttar Pradesh, India.","Sahu S, Rao AR, Pandey J, Gaikwad K, Ghoshal S, Mohapatra T",,"Indian Council of Agricultural Research, Indian Council of Agricultural Research",2.0,"India, India" +25475113,CBMAR,0.971514285,CBMAR,0.971514285,,0,1,http://14.139.227.92/mkumar/lactamasedb,"HTTPConnectionPool(host='14.139.227.92', port=80): Max retries exceeded with url: /mkumar/lactamasedb (Caused by ConnectTimeoutError(, 'Connection to 14.139.227.92 timed out. (connect timeout=5)'))",,,no_wayback,2014-12-03,"Department of Biophysics and Department of Microbiology, University of Delhi South Campus, Benito Juarez Road, New Delhi 110021, India.","Srivastava A, Singhal N, Goel M, Virdi JS, Kumar M",,,16.0,India +23228284,CBS,0.998403251,CBS,0.998403251,Conserved regulatory Binding Sites,0.973923177,1,http://compfly.bio.ub.es/CBS,"HTTPConnectionPool(host='compfly.bio.ub.es', port=80): Max retries exceeded with url: /CBS (Caused by ConnectTimeoutError(, 'Connection to compfly.bio.ub.es timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20150611184603/http://compfly.bio.ub.es:80/CBS/,2012-12-10,,,,,0.0, +29126148,CCDS,0.956499179,CCDS,0.956499179,Consensus Coding Sequence,0.615940392,1,http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi,301,,"(38.9896,-77.1538)",no_wayback,2018-01-01,,,,,0.0, +32386544,CCFv3,0.989022076,CCFv3,0.989022076,Mouse Brain Common Coordinate Framework,0.890492062,1,http://atlas.brain-map.org,200,,"(47.6302,-122.3210)",no_wayback,2020-05-07,"Allen Institute for Brain Science, Seattle, WA 98109, USA.","Wang Q, Ding SL, Li Y, Royall J, Feng D, Lesnar P, Graddis N, Naeemi M, Facer B, Ho A, Dolbeare T, Blanchard B, Dee N, Wakeman W, Hirokawa KE, Szafer A, Sunkin SM, Oh SW, Bernard A, Phillips JW, Hawrylycz M, Koch C, Zeng H, Harris JA, Ng L",,"NIMH NIH HHS, NIMH NIH HHS, National Institute of Mental Health",119.0,United States +28147217,CCG,0.97024858,CCG,0.97024858,Catalogue of Cancer Genes,0.896562586,1,http://ccg.xingene.net,200,,"(37.7621,-122.3971)",no_wayback,2016-12-01,"MOE Key Laboratory of Bioinformatics, Center for Synthetic and Systems Biology, Center for Plant Biology and Tsinghua-Peking Joint Center for Life Sciences, School of Life Sciences, Tsinghua University, Beijing 100084, China.","Liu M, Yang YT, Xu G, Tan C, Lu ZJ",,,4.0,China +25190456,CCGD,0.979682426,CCGD,0.979682426,Candidate Cancer Gene Database,0.891004175,1,http://ccgd-starrlab.oit.umn.edu,200,,"(44.9800,-93.2638)",http://web.archive.org/web/20221017003700/http://ccgd-starrlab.oit.umn.edu/,2014-09-04,"Department of Obstetrics, Gynecology & Women's Health, University of Minnesota, Minneapolis, MN 55455, USA.","Abbott KL, Nyre ET, Abrahante J, Ho YY, Isaksson Vogel R, Starr TK",,"NCI NIH HHS, NCATS NIH HHS, NCATS NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NIMHD NIH HHS",52.0,United States +30208340,CCGD-ESCC,0.976696948,CCGD-ESCC,0.976696948,Chinese Cancer Genomic Database-Esophageal Squamous Cell Carcinoma,0.954146482,1,http://db.cbi.pku.edu.cn/ccgd/ESCCdb,"HTTPConnectionPool(host='db.cbi.pku.edu.cn', port=80): Max retries exceeded with url: /ccgd/ESCCdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2018-08-01,"Department of Etiology and Carcinogenesis, National Cancer Center/Cancer Hospital, Chinese Academy of Medical Sciences and Peking Union Medical College, Beijing 100021, China.","Peng L, Cheng S, Lin Y, Cui Q, Luo Y, Chu J, Shao M, Fan W, Chen Y, Lin A, Xi Y, Sun Y, Zhang L, Zhang C, Tan W, Gao G, Wu C, Lin D",,,9.0,China +26519468,ccmGDB,0.998047173,ccmGDB,0.998047173,Cancer Cell Metabolism Gene DataBase,0.980098925,1,http://bioinfo.mc.vanderbilt.edu/ccmGDB,"HTTPConnectionPool(host='bioinfo.mc.vanderbilt.edu', port=80): Max retries exceeded with url: /ccmGDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160401224529/http://bioinfo.mc.vanderbilt.edu:80/ccmGDB/,2015-10-30,,,,,0.0, +28053168,ccNET,0.994896412,ccNET,0.994896412,,0,1,http://structuralbiology.cau.edu.cn/gossypium,301,,"(39.9075,116.3972)",http://web.archive.org/web/20211207181810/http://structuralbiology.cau.edu.cn/gossypium/,2016-10-07,"State key Laboratory of Plant Physiology and Biochemistry, College of Biological Sciences, China Agricultural University, Beijing 100193, China.","You Q, Xu W, Zhang K, Zhang L, Yi X, Yao D, Wang C, Zhang X, Zhao X, Provart NJ, Li F, Su Z",,,28.0,"China, China" +26868054,CCSI,0.997085452,CCSI,0.997085452,chromatin-chromatin spatial interaction,0.910043742,1,http://songyanglab.sysu.edu.cn/ccsi,"HTTPConnectionPool(host='songyanglab.sysu.edu.cn', port=80): Max retries exceeded with url: /ccsi (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-02-11,"Key Laboratory of Gene Engineering of the Ministry of Education and State Key Laboratory of Biocontrol, School of Life Sciences, Sun Yat-Sen University, Guangzhou 510006, China xyyan@mail.sysu.edu.cn zhimdai@gmail.com.","Xie X, Ma W, Songyang Z, Luo Z, Huang J, Dai Z, Xiong Y",,,8.0,China +"23197659, 25414356, 27899674, 31777944, 31851420",CDD,0.996487617,CDD,0.996487617,Conserved Domain Database,0.861336619,5,http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221015002029/https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml,2020-03-01,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bldg. 38 A, Room 8N805, 8600 Rockville Pike, Bethesda, MD 20894, USA. bauer@ncbi.nlm.nih.gov, National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bldg. 38 A, Room 8N805, 8600 Rockville Pike, Bethesda, MD 20894, USA bauer@ncbi.nlm.nih.gov., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bldg. 38 A, Room 8N805, 8600 Rockville Pike, Bethesda, MD 20894, USA bauer@ncbi.nlm.nih.gov., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bldg. 38 A, Room 8N805, 8600 Rockville Pike, Bethesda, MD 20894, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, Maryland.","Marchler-Bauer A, Zheng C, Chitsaz F, Derbyshire MK, Geer LY, Geer RC, Gonzales NR, Gwadz M, Hurwitz DI, Lanczycki CJ, Lu F, Lu S, Marchler GH, Song JS, Thanki N, Yamashita RA, Zhang D, Bryant SH, Marchler-Bauer A, Derbyshire MK, Gonzales NR, Lu S, Chitsaz F, Geer LY, Geer RC, He J, Gwadz M, Hurwitz DI, Lanczycki CJ, Lu F, Marchler GH, Song JS, Thanki N, Wang Z, Yamashita RA, Zhang D, Zheng C, Bryant SH, Marchler-Bauer A, Bo Y, Han L, He J, Lanczycki CJ, Lu S, Chitsaz F, Derbyshire MK, Geer RC, Gonzales NR, Gwadz M, Hurwitz DI, Lu F, Marchler GH, Song JS, Thanki N, Wang Z, Yamashita RA, Zhang D, Zheng C, Geer LY, Bryant SH, Lu S, Wang J, Chitsaz F, Derbyshire MK, Geer RC, Gonzales NR, Gwadz M, Hurwitz DI, Marchler GH, Song JS, Thanki N, Yamashita RA, Yang M, Zhang D, Zheng C, Lanczycki CJ, Marchler-Bauer A, Yang M, Derbyshire MK, Yamashita RA, Marchler-Bauer A",", , , , ","Intramural NIH HHS, Intramural NIH HHS, , National Institutes of Health, U.S. Department of Health and Human Services, Intramural NIH HHS, U.S. National Library of Medicine, National Science Foundation",3401.0,"United States, United States, United States, United States" +29997612,CDG,0.988353372,CDG,0.988353372,,0,1,http://lab.rockefeller.edu/casanova/CDG,302,,"(40.7143,-74.0060)",http://web.archive.org/web/20220709102137/http://lab.rockefeller.edu/casanova/CDG,2018-06-27,"St. Giles Laboratory of Human Genetics of Infectious Diseases (Rockefeller Branch), The Rockefeller University, New York, NY, United States.","Requena D, Maffucci P, Bigio B, Shang L, Abhyankar A, Boisson B, Stenson PD, Cooper DN, Cunningham-Rundles C, Casanova JL, Abel L, Itan Y",,"NIAID NIH HHS, NIAID NIH HHS",2.0,United States +23537399,CDP,0.828693906,CDP,0.828693906,death proteomics,0.697402914,1,http://celldeathproteomics.uio.no,200,,"(59.9127,10.7461)",http://web.archive.org/web/20220330035630/http://celldeathproteomics.uio.no/,2013-04-10,"The Biotechnology Centre of Oslo, University of Oslo, 0317 Oslo, Norway. magnus.arntzen@biotek.uio.no","Arntzen MØ, Bull VH, Thiede B",,,6.0,Norway +30759968,CDRgator,0.994814634,CDRgator,0.994814634,Cancer Drug Resistance navigator,0.785553472,1,http://cdrgator.ewha.ac.kr,"HTTPConnectionPool(host='cdrgator.ewha.ac.kr', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,no_wayback,2019-02-12,"Ewha Research Center for Systems Biology, Department of Life Science, Division of Molecular & Life Sciences, Ewha Womans University, Seoul 03760, Korea.","Jang SK, Yoon BH, Kang SM, Yoon YG, Kim SY, Kim W",,,2.0, +23893318,CDSA,0.964991828,CDSA,0.964991828,Cancer Digital Slide Archive,0.8173123,1,http://cancer.digitalslidearchive.net,405,,,http://web.archive.org/web/20190823153914/http://cancer.digitalslidearchive.net:80/,2013-07-25,"Department of Biomedical Informatics, Emory University School of Medicine, Atlanta, Georgia, USA.","Gutman DA, Cobb J, Somanna D, Park Y, Wang F, Kurc T, Saltz JH, Brat DJ, Cooper LA",,"NLM NIH HHS, NLM NIH HHS, NLM NIH HHS, NCI NIH HHS, NCI NIH HHS, NCATS NIH HHS, CCR NIH HHS, NLM NIH HHS, NLM NIH HHS, NCRR NIH HHS",52.0,"Georgia, United States" +24580755,CDSbank,0.99717778,CDSbank,0.99717778,,0,1,http://hazeslab.med.ualberta.ca/CDSbank,"HTTPConnectionPool(host='hazeslab.med.ualberta.ca', port=80): Max retries exceeded with url: /CDSbank (Caused by ReadTimeoutError(""HTTPConnectionPool(host='hazeslab.med.ualberta.ca', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20200221081107/http://hazeslab.med.ualberta.ca:80/CDSbank/,2014-02-28,"Department of Medical Microbiology & Immunology, 6-020 Katz Group Centre, University of Alberta, Edmonton, Alberta T6G 2E1, Canada. bart.hazes@ualberta.ca.",Hazes B,,,1.0,Canada +27899660,CEBS,0.996447543,CEBS,0.996447543,Chemical Effects in Biological Systems database,0.990120093,1,http://www.niehs.nih.gov/research/resources/databases/cebs,301,,"(38.9807,-77.1003)",http://web.archive.org/web/20180901175224/https://www.niehs.nih.gov/research/resources/databases/cebs/,2016-11-28,"ASRCFederal Vistronix, 430 Davis Dr, Suite 260, Morrisville, NC 27569, USA Isabel.lea@nih.gov.","Lea IA, Gong H, Paleja A, Rashid A, Fostel J",,,24.0,United States +25392417,CeCaFDB,0.99858432,CeCaFDB,0.99858432,Central Carbon Metabolic Flux Database,0.983667357,1,http://www.cecafdb.org,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220622210414/http://www.cecafdb.org/,2014-11-11,"College of Computer Science and Technology, Guizhou University, Guiyang, Guizhou 550025, P.R. China.","Zhang Z, Shen T, Rui B, Zhou W, Zhou X, Shang C, Xin C, Liu X, Li G, Jiang J, Li C, Li R, Han M, You S, Yu G, Yi Y, Wen H, Liu Z, Xie X",,,19.0,China +26527719,CEGA,0.962110519,CEGA,0.962110519,Conserved Elements from Genomic Alignments,0.852080087,1,http://cega.ezlab.org,301,,"(46.2022,6.1457)",http://web.archive.org/web/20211129182512/https://cega.ezlab.org/,2015-11-02,"Department of Genetic Medicine and Development, University of Geneva Medical School, Swiss Institute of Bioinformatics, rue Michel-Servet 1, 1211 Geneva, Switzerland.","Dousse A, Junier T, Zdobnov EM",,Swiss National Science Foundation,10.0,Switzerland +29992323,CeleryDB,0.992402375,CeleryDB,0.992402375,,0,1,http://apiaceae.njau.edu.cn/celerydb,"HTTPConnectionPool(host='apiaceae.njau.edu.cn', port=80): Max retries exceeded with url: /celerydb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='apiaceae.njau.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20210418195406/http://apiaceae.njau.edu.cn/celerydb,2018-01-01,"State Key Laboratory of Crop Genetics and Germplasm Enhancement, Key Laboratory of Biology and Germplasm Enhancement of Horticultural Crops in East China, Ministry of Agriculture, College of Horticulture, Nanjing Agricultural University, Nanjing 210095, China.","Feng K, Hou XL, Li MY, Jiang Q, Xu ZS, Liu JX, Xiong AS",,"National Natural Science Foundation of China, Priority Academic Program Development of Jiangsu Higher Education Institutions, Natural Science Foundation of Jiangsu Province, Program for New Century Excellent Talents in University",10.0,"China, China" +34971674,CellDepot,0.996150434,CellDepot,0.996150434,,0,1,http://celldepot.bxgenomics.com,301,,"(43.0730,-89.4528)",no_wayback,2021-12-28,"Research Department, Biogen, Inc., 225 Binney St, Cambridge, MA 02142, USA. Electronic address: dongdong.lin@biogen.com.","Lin D, Chen Y, Negi S, Cheng D, Ouyang Z, Sexton D, Li K, Zhang B",,,0.0,United States +24304896,CellFinder,0.99684298,CellFinder,0.99684298,,0,1,http://www.cellfinder.org,301,,"(48.1374,11.5755)",http://web.archive.org/web/20221028193054/http://cellfinder.org/,2013-12-03,"Berlin Brandenburg Center for Regenerative Medicine, Charité - Universitätsmedizin Berlin, Berlin 13353, Germany, Max Delbrück Center for Molecular Medicine, Computational Biology and Data Mining, Berlin 13125, Germany, Humboldt Universität zu Berlin, Institute for Computer Science, Berlin 10099, Germany and Seoul National University, College of Veterinary Medicine and Research Institute for Veterinary Science, Seoul 151-742, Republic of Korea.","Stachelscheid H, Seltmann S, Lekschas F, Fontaine JF, Mah N, Neves M, Andrade-Navarro MA, Leser U, Kurtz A",,,15.0,"Germany, Germany, Germany" +33471060,Cellinker,0.993874848,Cellinker,0.993874848,,0,1,http://www.rna-society.org/cellinker,301,,"(40.2338,-111.6585)",http://web.archive.org/web/20221107194814/http://www.rna-society.org/cellinker/,2021-01-20,"Shunde Hospital, Southern Medical University (The First People's Hospital of Shunde Foshan), Foshan 528308, China.","Zhang Y, Liu T, Wang J, Zou B, Li L, Yao L, Chen K, Ning L, Wu B, Zhao X, Wang D",,"China Postdoctoral Science Foundation, Guangzhou science and technology project key project topic, Basic and Applied Basic Research Fund of Guangdong Province, National Natural Science Foundation of China, National Key Research and Development Project of China, National Natural Science Foundation of China, Basic and Applied Basic Research Fund of Guangdong Province, China Postdoctoral Science Foundation, National Natural Science Foundation of China",10.0,China +23251048,CellLineMiner,0.992360115,CellLineMiner,0.992360115,,0,1,http://dev.pubgene.com/cellmine,302,,"(59.9127,10.7461)",no_wayback,2012-11-13,"Department of Tumor Biology, Institute for Cancer Research, Norwegian Radium Hospital - Oslo University Hospital, Norway.","Nakken S, Johansen M, Fillebeen J, Berge OP, Kirkerød H, Jenssen TK, Hovig E",,,0.0,Norway +30289549,CellMarker,0.992818952,CellMarker,0.992818952,,0,1,"http://biocc.hrbmu.edu.cn/CellMarker/, http://bio-bigdata.hrbmu.edu.cn/CellMarker","200, 302",,"(31.2222,121.4581), (31.2222,121.4581)","http://web.archive.org/web/20221110040426/http://biocc.hrbmu.edu.cn/CellMarker/, http://web.archive.org/web/20220316135008/http://bio-bigdata.hrbmu.edu.cn/CellMarker/",2019-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150081, China.","Zhang X, Lan Y, Xu J, Quan F, Zhao E, Deng C, Luo T, Xu L, Liao G, Yan M, Ping Y, Li F, Shi A, Bai J, Zhao T, Li X, Xiao Y",,"National Natural Science Foundation of China, Heilongjiang Postdoctoral Foundation, National High Technology Research and Development Program of China, National Natural Science Foundation of China, China Postdoctoral Science Foundation, National Natural Science Foundation of China, National Program on Key Basic Research, Construction of Higher Education in Heilongjiang Province, Harbin Medical University",208.0,China +33086069,CellMiner-SCLC,0.997686309,CellMiner-SCLC,0.997686309,,0,1,http://discover.nci.nih.gov/SclcCellMinerCDB,301,,"(39.4143,-77.4105)",no_wayback,2020-10-01,"Developmental Therapeutics Branch, Center for Cancer Research, National Cancer Institute, NIH, Bethesda, MD 20892, USA.","Tlemsani C, Pongor L, Elloumi F, Girard L, Huffman KE, Roper N, Varma S, Luna A, Rajapakse VN, Sebastian R, Kohn KW, Krushkal J, Aladjem MI, Teicher BA, Meltzer PS, Reinhold WC, Minna JD, Thomas A, Pommier Y",,"Intramural NIH HHS, Intramural NIH HHS, NCI NIH HHS, Intramural NIH HHS, NCI NIH HHS, NCI NIH HHS, Intramural NIH HHS, CCR NIH HHS, Intramural NIH HHS, NCI NIH HHS, Intramural NIH HHS, National Cancer Institute",20.0,United States +24016071,CellMinerHCC,0.941295445,CellMinerHCC,0.941295445,,0,1,http://www.medicalgenomics.org/cellminerhcc,301,,"(52.5244,13.4105)",http://web.archive.org/web/20220129020738/https://medicalgenomics.org/cellminerhcc,2013-09-09,"Department of Medicine I, Johannes Gutenberg University, Mainz, Germany.","Staib F, Krupp M, Maass T, Itzel T, Weinmann A, Lee JS, Schmidt B, Müller M, Thorgeirsson SS, Galle PR, Teufel A",,"Intramural NIH HHS, Intramural NIH HHS",7.0,Germany +22039163,CELLPEDIA,0.998041213,CELLPEDIA,0.998041213,,0,1,http://cellpedia.cbrc.jp,301,,"(35.6895,139.6917)",http://web.archive.org/web/20131229112640/http://cellpedia.cbrc.jp:80/,2011-10-29,"Computational Biology Research Center, Advanced Industrial Science and Technology (AIST), 2-4-7 Aomi, Koto-ku, Tokyo 135-0064, Japan.","Hatano A, Chiba H, Moesa HA, Taniguchi T, Nagaie S, Yamanegi K, Takai-Igarashi T, Tanaka H, Fujibuchi W",,,17.0,Japan +33147626,CellTalkDB,0.99723649,CellTalkDB,0.99723649,,0,1,http://tcm.zju.edu.cn/celltalkdb,301,,"(30.2936,120.1614)",http://web.archive.org/web/20220407104448/http://tcm.zju.edu.cn/celltalkdb/,2021-07-01,"Pharmaceutical Informatics Institute, College of Pharmaceutical Sciences, Zhejiang University, China.","Shao X, Liao J, Li C, Lu X, Cheng J, Fan X",,"National Natural Science Foundation of China, Natural Science Foundation of Zhejiang Province, National Youth Top-notch Talent Support Program",18.0,China +25592564,CELLX,0.997325838,CELLX,0.997325838,Cell Index Database,0.766975661,1,http://cellx.sourceforge.net,301,,"(37.7621,-122.3971)",no_wayback,2015-01-01,"Oncology Research Unit, Pfizer Global Research & Development, Pfizer Inc., 10777 Science Center Drive San Diego, CA 92121, USA. keith.ching@pfizer.com.","Ching KA, Wang K, Kan Z, Fernandez J, Zhong W, Kostrowicki J, Xie T, Zhu Z, Martini JF, Koehler M, Arndt K, Rejto P",,,3.0,United States +25970778,CEMTDD,0.996073186,CEMTDD,0.996073186,Chinese Ethnic Minority Traditional Drug Database,0.91564708,1,http://www.cemtdd.com/index.html,200,,"(22.5455,114.0683)",http://web.archive.org/web/20180816072806/http://cemtdd.com/index.html,2015-07-01,"Key Laboratory of Structure-Based Drug Design & Discovery of Ministry of Education, School of Traditional Chinese Materia Medica, Shenyang Pharmaceutical University, Shenyang, China.","Huang J, Zheng Y, Wu W, Xie T, Yao H, Pang X, Sun F, Ouyang L, Wang J",,,3.0,China +27701074,CeNDR,0.993216455,CeNDR,0.993216455,elegans Natural Diversity Resource,0.823794746,1,http://www.elegansvariation.org,302,,"(34.0522,-118.2437)",http://web.archive.org/web/20221016212132/https://www.elegansvariation.org/,2016-10-03,"Interdisciplinary Biological Sciences Program, Northwestern University, Evanston, IL 60208, USA.","Cook DE, Zdraljevic S, Roberts JP, Andersen EC",,"NIGMS NIH HHS, NIGMS NIH HHS",97.0,United States +24270791,CentrosomeDB,0.996333599,CentrosomeDB,0.996333599,,0,1,http://centrosome.cnb.csic.es,"HTTPConnectionPool(host='centrosome.cnb.csic.es', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20130612103641/http://centrosome.cnb.csic.es/,2013-11-21,"Functional Bioinformatics Group, National Center for Biotechnology-CSIC, Madrid 28049, Spain.","Alves-Cruzeiro JM, Nogales-Cadenas R, Pascual-Montano AD",,,29.0,Spain +21247929,CEREALAB,0.995646238,CEREALAB,0.995646238,,0,1,http://www.cerealab.unimore.it/jws/cerealab.jnlp,"HTTPConnectionPool(host='www.cerealab.unimore.it', port=80): Max retries exceeded with url: /jws/cerealab.jnlp (Caused by ConnectTimeoutError(, 'Connection to www.cerealab.unimore.it timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140715111221/http://www.cerealab.unimore.it/jws/cerealab.jnlp,2011-01-18,"Department of Agricultural and Food Sciences, University of Modena and Reggio Emilia, via G. Amendola 2, 42122 Reggio Emilia, Italy.","Milc J, Sala A, Bergamaschi S, Pecchioni N",,,2.0,Italy +32754757,CerealsDB,0.996999264,CerealsDB,0.996999264,,0,1,http://www.cerealsdb.uk.net/cerealgenomics/CerealsDB/indexNEW.php,301,,"(51.4552,-2.5966)",http://web.archive.org/web/20220402105659/https://www.cerealsdb.uk.net/cerealgenomics/CerealsDB/indexNEW.php,2020-01-01,"School of Biological Sciences, University of Bristol, Bristol Life Sciences Building, 24 Tyndall Avenue, Bristol, BS8 1TQ, UK.","Wilkinson PA, Allen AM, Tyrrell S, Wingen LU, Bian X, Winfield MO, Burridge A, Shaw DS, Zaucha J, Griffiths S, Davey RP, Edwards KJ, Barker GLA",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, UK, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, UK, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, UK, Biotechnology and Biological Sciences Research Council, UK",6.0, +31428785,CFEA,0.997379303,CFEA,0.997379303,,0,1,http://www.bio-data.cn/CFEA,302,,"(22.2783,114.1747)",http://web.archive.org/web/20220329135311/http://www.bio-data.cn/CFEA/,2020-01-01,"School of Biomedical Engineering, School of Ophthalmology and Optometry and Eye Hospital, Wenzhou Medical University, Wenzhou 325011, Zhejiang, China.","Yu F, Li K, Li S, Liu J, Zhang Y, Zhou M, Zhao H, Chen H, Wu N, Liu Z, Su J",,"Beijing Natural Science Foundation, National Natural Science Foundation of China, Science Foundation of Zhejiang Province, Special Foundation for Key Basic Research of Wenzhou Institute of Biomaterials and Engineering, CAMS Innovation Fund for Medical Sciences, CAMS Innovation Fund for Medical Sciences, National Natural Science Foundation of China, National Natural Science Foundation of China",7.0,China +23193288,CFGP,0.979907155,CFGP,0.979907155,Comparative Fungal Genomics Platform,0.961724751,1,http://cfgp.snu.ac.kr,"HTTPConnectionPool(host='cfgp.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='cfgp.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2012-11-27,"Fungal Bioinformatics Laboratory, Department of Agricultural Biotechnology, Seoul National University, Seoul 151-742, Korea.","Choi J, Cheong K, Jung K, Jeon J, Lee GW, Kang S, Kim S, Lee YW, Lee YH",,,34.0, +28603918,CFTR-France,0.977899387,CFTR-France,0.977899387,ystic fibrosis transmembrane,0.618076883,1,http://cftr.iurc.montp.inserm.fr/cftr,302,,"(43.6109,3.8763)",http://web.archive.org/web/20220313053015/https://cftr.iurc.montp.inserm.fr/cftr/,2017-06-28,"Laboratoire de Génétique Moléculaire, Centre Hospitalier Universitaire et Université de Montpellier, Montpellier, France.","Claustres M, Thèze C, des Georges M, Baux D, Girodon E, Bienvenu T, Audrezet MP, Dugueperoux I, Férec C, Lalau G, Pagin A, Kitzis A, Thoreau V, Gaston V, Bieth E, Malinge MC, Reboul MP, Fergelot P, Lemonnier L, Mekki C, Fanen P, Bergougnoux A, Sasorith S, Raynal C, Bareil C",,Association Vaincre la Mucoviscidose,17.0,France +26160459,CGMD,0.992433429,CGMD,0.992433429,Cancer Gene Marker Database,0.956503713,1,http://cgmd.in,"HTTPConnectionPool(host='cgmd.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20161021074258/http://cgmd.in/,2015-07-10,"Division of Animal Biotechnology, Department of Zoology, Sri Venkateswara University, Tirupati-517502, Andhra Pradesh, India.","Pradeepkiran JA, Sainath SB, Kumar KK, Balasubramanyam L, Prabhakar KV, Bhaskar M",,,3.0,India +23486613,CGOB,0.997058729,CGOB,0.997058729,Candida Gene Order Browser,0.948766589,1,http://cgob.ucd.ie,200,,"(53.3331,-6.2489)",http://web.archive.org/web/20220121181212/http://cgob.ucd.ie/,2013-03-13,"UCD School of Biomolecular and Biomedical Science, Conway Institute, University College Dublin, Belfield, Dublin, Ireland.","Maguire SL, ÓhÉigeartaigh SS, Byrne KP, Schröder MS, O'Gaora P, Wolfe KH, Butler G",,"Wellcome Trust, European Research Council",76.0,Ireland +25518738,CGWR,0.983098507,CGWR,0.983098507,Chickpea Genomic Web Resource,0.932702328,1,http://www.nipgr.res.in/CGWR/home.php,"HTTPConnectionPool(host='www.nipgr.res.in', port=80): Max retries exceeded with url: /CGWR/home.php (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))",,,http://web.archive.org/web/20181010193319/http://www.nipgr.res.in:80/CGWR/home.php,2014-12-18,None,"Misra G, Priya P, Bandhiwal N, Bareja N, Jain M, Bhatia S, Chattopadhyay D, Tyagi AK, Yadav G",,,3.0, +22232598,Channelpedia,0.995265245,Channelpedia,0.995265245,,0,1,http://channelpedia.net,301,,"(46.5290,6.5626)",no_wayback,2011-12-30,"Blue Brain Project, Brain Mind Institute, Ecole Polytechnique Fédérale de Lausanne Lausanne, Switzerland.","Ranjan R, Khazen G, Gambazzi L, Ramaswamy S, Hill SL, Schürmann F, Markram H",,,24.0,Switzerland +29036719,ChannelsDB,0.998202503,ChannelsDB,0.998202503,,0,1,http://ncbr.muni.cz/ChannelsDB,301,,"(49.1952,16.6080)",http://web.archive.org/web/20220225182740/http://ncbr.muni.cz/ChannelsDB/,2018-01-01,"CEITEC - Central European Institute of Technology, Masaryk University Brno, Kamenice 5, 625 00 Brno-Bohunice, Czech Republic.","Pravda L, Sehnal D, Svobodová Vareková R, Navrátilová V, TouÅ¡ek D, Berka K, Otyepka M, Koca J",,,13.0, +22140108,CharProtDB,0.996790111,CharProtDB,0.996790111,,0,1,http://www.jcvi.org/charprotdb,301,,"(39.0437,-77.4875)",no_wayback,2011-12-02,"J Craig Venter institute, 9704 Medical Center Drive Rockville, MD 20850, USA. rmadupu@jcvi.org","Madupu R, Richter A, Dodson RJ, Brinkac L, Harkins D, Durkin S, Shrivastava S, Sutton G, Haft D",,NHGRI NIH HHS,19.0,United States +28651548,CHD,0.82080698,CHD,0.82080698,Canis mtDNA HV1,0.562380518,1,http://chd.vnbiology.com,200,,"(45.5088,-73.5878)",http://web.archive.org/web/20220713022800/http://chd.vnbiology.com/,2017-06-26,"Saigon University, 273 An Duong Vuong street, District 5, Ho Chi Minh city, Vietnam. quan.tk@cb.sgu.edu.vn.","Thai QK, Chung DA, Tran HD",,"Saigon University, The Vietnam National Gene Fund",2.0, +23818526,CHD@ZJU,0.971079409,CHD@ZJU,0.971079409,,0,1,http://tcm.zju.edu.cn/chd,301,,"(30.2936,120.1614)",http://web.archive.org/web/20220321024304/http://tcm.zju.edu.cn/chd/,2013-07-01,"Pharmaceutical Informatics Institute, College of Pharmaceutical Sciences, Zhejiang University, Hangzhou 310058, China.","Wu L, Li X, Yang J, Liu Y, Fan X, Cheng Y",,,5.0,China +32608479,CHDGKB,0.63245517,CHDGKB,0.63245517,,0,1,http://www.sysbio.org.cn/CHDGKB,301,,"(22.2783,114.1747)",http://web.archive.org/web/20200814083354/http://www.sysbio.org.cn/CHDGKB/,2020-01-01,"Center for Systems Biology, Soochow University, Suzhou 215006, China.","Yang L, Yang Y, Liu X, Chen Y, Chen Y, Lin Y, Sun Y, Shen B",,"National Natural Science Foundation of China, National Natural Science Foundation of China, Natural Science Foundation of the Jiangsu Higher Education Institutions of China, National Key Research and Development Program of China",2.0,China +28383342,CHEAR,0.995191038,CHEAR,0.995191038,Children's Health Exposure Analysis Resource,0.965028177,1,http://chearprogram.org,301,,"(39.0840,-77.1528)",http://web.archive.org/web/20220308032657/https://chearprogram.org/,2017-06-01,"Division of Extramural Research and Training, National Institute of Environmental Health Sciences, National Institutes of Health, Morrisville, North Carolina, USA.","Balshaw DM, Collman GW, Gray KA, Thompson CL",,Intramural NIH HHS,23.0,United States +"23180789, 26467479",ChEBI,0.998489141,ChEBI,0.998489141,,0,2,http://www.ebi.ac.uk/chebi,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20220924041949/https://www.ebi.ac.uk/chebi/,2015-10-13,"Department of Cheminformatics and Metabolism, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, CB10 1SD, UK. hastings@ebi.ac.uk, Cheminformatics and Metabolism, European Molecular Biology Laboratory-European Bioinformatics Institute (EMBL-EBI), Hinxton, UK hastings@ebi.ac.uk.","Hastings J, de Matos P, Dekker A, Ennis M, Harsha B, Kale N, Muthukrishnan V, Owen G, Turner S, Williams M, Steinbeck C, Hastings J, Owen G, Dekker A, Ennis M, Kale N, Muthukrishnan V, Turner S, Swainston N, Mendes P, Steinbeck C",", ","Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",535.0, +33970229,ChemHub,0.996807337,ChemHub,0.996807337,,0,1,http://www.rxnfinder.org/chemhub,301,,"(39.9075,116.3972)",http://web.archive.org/web/20210731123445/http://www.rxnfinder.org/chemhub/,2021-05-10,"CAS Key Laboratory of Computational Biology, Shanghai Institute of Nutrition and Health, University of Chinese Academy of Sciences, Chinese Academy of Sciences, Shanghai 200031, China.","Han M, Zhang D, Ding S, Tian Y, Cheng X, Yuan L, Sun D, Liu D, Gong L, Jia C, Cai P, Tu W, Chen J, Hu QN",,"National Natural Science Foundation of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China",1.0,China +26876982,ChemProt,0.998003364,ChemProt,0.998003364,,0,1,http://potentia.cbs.dtu.dk/ChemProt,"HTTPConnectionPool(host='potentia.cbs.dtu.dk', port=80): Max retries exceeded with url: /ChemProt (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170713095715/http://potentia.cbs.dtu.dk/ChemProt/,2016-02-13,"Department of Systems Biology, Center for Biological Sequence Analysis, buildin 208, kemitorvet, Technical University of Denmark, DK-2800 Lyngby, Denmark.","Kringelum J, Kjaerulff SK, Brunak S, Lund O, Oprea TI, Taboureau O",,,25.0,"Denmark, Denmark" +23185041,ChemProt-2.0,0.885878646,ChemProt-2.0,0.885878646,,0,1,http://www.cbs.dtu.dk/services/ChemProt-2.0,404,,,http://web.archive.org/web/20210324173615/http://www.cbs.dtu.dk/services/ChemProt-2.0/,2012-11-26,"Center for Biological Sequence Analysis, Department of Systems Biology, Technical University of Denmark, 2800 Lyngby, Denmark.","Kim Kjærulff S, Wich L, Kringelum J, Jacobsen UP, Kouskoumvekaki I, Audouze K, Lund O, Brunak S, Oprea TI, Taboureau O",,NIGMS NIH HHS,28.0,"Denmark, Denmark" +24470572,ChEpiMod,0.997203112,ChEpiMod,0.997203112,,0,1,http://chepimod.org,"HTTPConnectionPool(host='chepimod.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181117204544/http://chepimod.org/,2014-01-27,"Department of Structural and Chemical Biology, Icahn School of Medicine at Mount Sinai, 1425 Madison Avenue, New York, NY, 10029, USA.","Meslamani J, Smith SG, Sanchez R, Zhou MM",,"NHGRI NIH HHS, NIGMS NIH HHS, NCI NIH HHS",3.0,United States +30486838,CHESS,0.993582785,CHESS,0.993582785,,0,1,http://ccb.jhu.edu/chess,301,,"(39.2904,-76.6122)",http://web.archive.org/web/20220331091431/http://ccb.jhu.edu/chess/,2018-11-28,"Center for Computational Biology, McKusick-Nathans Institute of Genetic Medicine, Johns Hopkins University School of Medicine, Baltimore, MD, USA.","Pertea M, Shumate A, Pertea G, Varabyou A, Breitwieser FP, Chang YC, Madugundu AK, Pandey A, Salzberg SL",,"NHGRI NIH HHS, NIGMS NIH HHS, National Science Foundation, NIGMS NIH HHS, NHGRI NIH HHS, National Institute of General Medical Sciences, NCI NIH HHS, National Human Genome Research Institute",90.0,United States +33068420,chewie-NS,0.992993156,chewie-NS,0.992993156,Chewie Nomenclature Server,0.940701187,1,http://chewbbaca.online,301,,"(40.6610,-7.9097)",http://web.archive.org/web/20220617201512/https://chewbbaca.online/,2021-01-01,"Instituto de Microbiologia and Instituto de Medicina Molecular João Lobo Antunes, Faculdade de Medicina, Universidade de Lisboa, Av. Professor Egas Moniz, 1649-028 Lisboa, Portugal.","Mamede R, Vila-Cerqueira P, Silva M, Carriço JA, Ramirez M",,"Fundos Europeus Estruturais e de Investimento, Fundação para a Ciência e a Tecnologia, FEDER, FCT",3.0,Portugal +31210271,Chickspress,0.997966588,Chickspress,0.997966588,,0,1,http://geneatlas.arl.arizona.edu,301,,"(32.2217,-110.9265)",http://web.archive.org/web/20220307223514/https://geneatlas.arl.arizona.edu/,2019-01-01,"School of Animal and Comparative Biomedical Sciences, University of Arizona, Tucson AZ, USA.","McCarthy FM, Pendarvis K, Cooksey AM, Gresham CR, Bomhoff M, Davey S, Lyons E, Sonstegard TS, Bridges SM, Burgess SC",,"National Institutes of Health, U.S. Department of Agriculture",5.0,United States +24997141,ChiloDB,0.997956932,ChiloDB,0.997956932,,0,1,http://ento.njau.edu.cn/ChiloDB,"HTTPConnectionPool(host='ento.njau.edu.cn', port=80): Max retries exceeded with url: /ChiloDB (Caused by ConnectTimeoutError(, 'Connection to ento.njau.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20151029011833/http://ento.njau.edu.cn:80/ChiloDB/,2014-07-04,"Department of Entomology, College of Plant Protection, Nanjing Agricultural University, Jiangsu/The key laboratory of Monitoring and Management of Plant Diseases and Insects, Ministry of Agriculture, Nanjing, Jiangsu 210095, China, Department of Computer Science, College of Information Science and Technology, Nanjing Agricultural University, Nanjing, Jiangsu 210095, China and National Key Laboratory of Crop Genetic Improvement and National Centre of Plant Gene Research, Huazhong Agricultural University, Wuhan 430070, China.","Yin C, Liu Y, Liu J, Xiao H, Huang S, Lin Y, Han Z, Li F",,,18.0,"China, China, China" +33662628,Chinese Glioma Genome Atlas,0.975209602,CGGA,0.972158313,Chinese Glioma Genome Atlas,0.975209602,1,http://www.cgga.org.cn,200,,"(36.0649,120.3804)",http://web.archive.org/web/20221005072134/http://www.cgga.org.cn/,2021-02-01,"Beijing Neurosurgical Institute, Capital Medical University, Beijing 100070, China.","Zhao Z, Zhang KN, Wang Q, Li G, Zeng F, Zhang Y, Wu F, Chai R, Wang Z, Zhang C, Zhang W, Bao Z, Jiang T",,"National Natural Science Foundation of China, National Natural Science Foundation of China",76.0,China +23161675,ChIPBase,0.99755013,ChIPBase,0.99755013,,0,1,http://deepbase.sysu.edu.cn/chipbase,"HTTPConnectionPool(host='deepbase.sysu.edu.cn', port=80): Max retries exceeded with url: /chipbase (Caused by ConnectTimeoutError(, 'Connection to deepbase.sysu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160510001838/http://deepbase.sysu.edu.cn:80/chipbase/,2012-11-17,"RNA Information Center, Key Laboratory of Gene Engineering of the Ministry of Education, State Key Laboratory for Biocontrol, Sun Yat-sen University, Guangzhou 510275, P.R. China.","Yang JH, Li JH, Jiang S, Zhou H, Qu LH",,,191.0,China +30202990,ChIPprimersDB,0.948491991,ChIPprimersDB,0.948491991,,0,1,"http://umiamihealth.org/bascom-palmer-eye-institute/research/clinical-and-laboratory-research/ocular-oncology-laboratory/chip-primers, http://www.chipprimers.com","301, 301",,"(37.3058,-78.5462), (50.1155,8.6842)","no_wayback, http://web.archive.org/web/20220526173416/https://chipprimers.com/",2019-01-01,"Bascom Palmer Eye Institute, Sylvester Comprehensive Cancer Center, and Interdisciplinary Stem Cell Institute, University of Miami Miller School of Medicine, Miami, FL 33136, USA.","Kurtenbach S, Reddy R, Harbour JW",,"National Cancer Institute, National Cancer Institute, NCI NIH HHS, NCI NIH HHS, Department of Defense, Department of Defense, National Institute of Health Core",3.0,United States +31942977,ChIPSummitDB,0.995177627,ChIPSummitDB,0.995177627,,0,1,http://summit.med.unideb.hu/summitdb,301,,"(47.5317,21.6244)",http://web.archive.org/web/20220226192437/http://summit.med.unideb.hu/summitdb/,2020-01-01,"Department of Biochemistry and Molecular Biology, Faculty of Medicine, University of Debrecen, Egyetem tér 1, Debrecen H-4032, Hungary.","Czipa E, Schiller M, Nagy T, Kontra L, Steiner L, Koller J, Pálné-Szén O, Barta E",,"Ministry of Innovation and Technology in Hungary, Higher Education Institutional Excellence Programme, National Research, Development and Innovation Office of Hungary, National Research, Development and Innovation Office of Hungary",5.0,Hungary +31665454,ChlamDB,0.998569787,ChlamDB,0.998569787,,0,1,http://chlamdb.ch,301,,"(50.1025,8.6299)",http://web.archive.org/web/20221016225813/https://www.chlamdb.ch/,2020-01-01,"Institute of Microbiology, Lausanne University Hospital and University of Lausanne, Bugnon 48, 1011 Lausanne, Switzerland.","Pillonel T, Tagini F, Bertelli C, Greub G",,"Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation",6.0,Switzerland +22904610,chordate proteome history,0.743933582,chordate proteome history,0.743933582,,0,1,http://ioda.univ-provence.fr,"HTTPConnectionPool(host='ioda.univ-provence.fr', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to ioda.univ-provence.fr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160416051004/http://ioda.univ-provence.fr/,2012-08-01,"INRA, UMR1163 Biotechnologie des Champignons Filamenteux, Aix Marseille Université, ESIL Polytech, 163 avenue de Luminy, CP 925, 13288 Marseille Cedex 09, France.","Levasseur A, Paganini J, Dainat J, Thompson JD, Poch O, Pontarotti P, Gouret P",,,2.0,France +23405067,CHPC2012,0.927504778,CHPC2012,0.927504778,Catalogue of Human Protein Complexes,0.88755808,1,http://www1.i2r.a-star.edu.sg/xlli/CHPC2012/CHPC2012.htm,"HTTPConnectionPool(host='www1.i2r.a-star.edu.sg', port=80): Max retries exceeded with url: /xlli/CHPC2012/CHPC2012.htm (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-02-06,"School of Computer Engineering, Nanyang Technological University, Singapore, Singapore.","Wu M, Yu Q, Li X, Zheng J, Huang JF, Kwoh CK",,,9.0,"Singapore, Singapore" +22718786,ChromoHub,0.996145904,ChromoHub,0.996145904,,0,1,http://www.thesgc.org/chromohub,301,,"(45.5088,-73.5878)",no_wayback,2012-06-19,"Structural Genomics Consortium, University of Toronto, Toronto, ON M5G1L7, Canada.","Liu L, Zhen XT, Denton E, Marsden BD, Schapira M",,"CIHR, Wellcome Trust",46.0,Canada +"26722116, 29564831",ChromothripsisDB,0.99702704,ChromothripsisDB,0.99702704,,0,2,http://cgma.scu.edu.cn/ChromothripsisDB,"HTTPConnectionPool(host='cgma.scu.edu.cn', port=80): Max retries exceeded with url: /ChromothripsisDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160502110411/http://cgma.scu.edu.cn:80/ChromothripsisDB/,2018-01-01,"Center of Growth, Metabolism, and Aging, Key Laboratory of Bio-Resources and Eco-Environment, College of Life Sciences, Sichuan University, Chengdu, Sichuan 610064, China., Center of Growth, Metabolism, and Aging, Key Laboratory of Bio-Resources and Eco-Environment, College of Life Sciences, Sichuan University, Chengdu, Sichuan, China. haoyang.cai@scu.edu.cn.","Yang J, Deng G, Cai H, Cai H",", ",", ",7.0,"China, China" +23139595,CIBMAN,0.99584347,CIBMAN,0.99584347,,0,1,http://ibsd.gov.in/cibman,302,,"(24.8081,93.9442)",no_wayback,2012-09-11,Medicinal Plants and Horticultural Resources Division.,"Sanabam R, Somkuwar BG, Thingnam G, Moirangthem S, Handique PJ, Huidrom S",,,0.0, +24952649,CicArMiSatDB,0.988039034,CicArMiSatDB,0.988039034,Chickpea Microsatellite Database,0.596938559,1,http://cicarmisatdb.icrisat.org,301,,"(17.3840,78.4564)",http://web.archive.org/web/20170820074802/http://cicarmisatdb.icrisat.org:80/,2014-06-21,None,"Doddamani D, Katta MA, Khan AW, Agarwal G, Shah TM, Varshney RK",,,12.0, +26289427,CicArVarDB,0.995038033,CicArVarDB,0.995038033,,0,1,"http://cicarvardb.icrisat.org/, http://cicarvardb.icrisat.org","301, 301",,"(17.3840,78.4564), (17.3840,78.4564)","no_wayback, no_wayback",2015-08-19,"Research Program Grain Legumes, International Crops Research Institute for the Semi-Arid Tropics (ICRISAT), Hyderabad 502 324, Telangana State, India.","Doddamani D, Khan AW, Katta MA, Agarwal G, Thudi M, Ruperao P, Edwards D, Varshney RK",,,7.0,India +34762703,CicerSpTEdb,0.994646966,CicerSpTEdb,0.994646966,,0,1,http://cicersptedb.easyomics.org/index.php,200,,"(42.6975,23.3241)",http://web.archive.org/web/20220522194907/http://cicersptedb.easyomics.org/index.php,2021-11-11,"African Genome Center, Mohammed VI Polytechnic University, Ben Guerir, Morocco.","Mokhtar MM, Alsamman AM, Abd-Elhalim HM, El Allali A",,,0.0,Morocco +27472917,CicerTransDB,0.995746732,CicerTransDB,0.995746732,Cicer Transcription Factor Database,0.750756256,1,http://www.cicertransdb.esy.es,200,,"(54.6892,25.2798)",http://web.archive.org/web/20220803094314/http://cicertransdb.esy.es/,2016-07-29,"National Institute of Plant Genome Research, Jawaharlal Nehru University Campus, Aruna Asaf Ali Marg, New Delhi, 110067, India.","Gayali S, Acharya S, Lande NV, Pandey A, Chakraborty S, Chakraborty N",,Council of Scientific and Industrial Research,5.0,India +22809392,CIDeR,0.997793913,CIDeR,0.997793913,,0,1,http://mips.helmholtz-muenchen.de/cider,200,,"(48.2500,11.5667)",http://web.archive.org/web/20220615213744/http://mips.helmholtz-muenchen.de/cider/,2012-07-18,None,"Lechner M, Höhn V, Brauner B, Dunger I, Fobo G, Frishman G, Montrone C, Kastenmüller G, Waegele B, Ruepp A",,,14.0, +30045691,CIGene,0.99484241,CIGene,0.99484241,,0,1,http://soft.bioinfo-minzhao.org/cigene,406,,,http://web.archive.org/web/20221016234949/https://soft.bioinfo-minzhao.org/cigene/,2018-07-25,"School of Public Health, Institute for Chemical Carcinogenesis, Guangzhou Medical University, 195 Dongfengxi Road, Guangzhou, 510182, China.","Liu Y, Luo M, Li Q, Lu J, Zhao M, Qu H",,"University of the Sunshine Coast, National Natural Science Foundation of China, National Natural Science Foundation of China, the National Key Research and Development Program of China",0.0,China +"23203874, 23203874",CIL-CCDB,0.996889138,CIL-CCDB,0.996889138,an image library-CCDB,0.696443155,1,http://www.cellimagelibrary.org,302,,"(32.9595,-117.2653)",http://web.archive.org/web/20180110203810/http://www.cellimagelibrary.org:80/,2012-11-29,"Center for Research in Biological Systems, Basic Science Building, Room 1000, University of California, San Diego, 9500 Gilman Drive, Department Code 0608, La Jolla, CA 92093-0608, USA. dorloff@ncmir.ucsd.edu, Center for Research in Biological Systems, Basic Science Building, Room 1000, University of California, San Diego, 9500 Gilman Drive, Department Code 0608, La Jolla, CA 92093-0608, USA. dorloff@ncmir.ucsd.edu","Orloff DN, Iwasa JH, Martone ME, Ellisman MH, Kane CM, Orloff DN, Iwasa JH, Martone ME, Ellisman MH, Kane CM",", ","NIGMS NIH HHS, NINDS NIH HHS, NCRR NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NINDS NIH HHS, NCRR NIH HHS, NIGMS NIH HHS",48.0,"United States, United States" +31095607,CiliaCarta,0.935106993,CiliaCarta,0.935106993,,0,1,http://bioinformatics.bio.uu.nl/john/syscilia/ciliacarta,302,,"(52.0908,5.1222)",http://web.archive.org/web/20210225051633/http://bioinformatics.bio.uu.nl/john/syscilia/ciliacarta/,2019-05-16,"Centre for Molecular and Biomolecular Informatics, Radboud University Medical Center, Nijmegen, the Netherlands.","van Dam TJP, Kennedy J, van der Lee R, de Vrieze E, Wunderlich KA, Rix S, Dougherty GW, Lambacher NJ, Li C, Jensen VL, Leroux MR, Hjeij R, Horn N, Texier Y, Wissinger Y, van Reeuwijk J, Wheway G, Knapp B, Scheel JF, Franco B, Mans DA, van Wijk E, Képès F, Slaats GG, Toedt G, Kremer H, Omran H, Szymanska K, Koutroumpas K, Ueffing M, Nguyen TT, Letteboer SJF, Oud MM, van Beersum SEC, Schmidts M, Beales PL, Lu Q, Giles RH, Szklarczyk R, Russell RB, Russell RB, Gibson TJ, Johnson CA, Blacque OE, Wolfrum U, Boldt K, Roepman R, Hernandez-Hernandez V, Huynen MA",,"Seventh Framework Programme, Canadian Institutes of Health Research, National Institute for Health Research (NIHR), KRESCENT, Netherlands Genomics Initiative, Seventh Framework Programme, CIHR, NIHR Great Ormond Street Hospital Biomedical Research Center, The Sir Jules Thorn Charitable Trust, CIHR, Seventh Framework Programme, Telethon, Nierstichting, Dutch Governement, Medical Research Council, Biotechnology and Biological Sciences Research Council, Radboud Universitair Medisch Centrum, CIHR, Dutch Research Council (NWO), Radboud Universiteit, Michael Smith Foundation for Health Research, Dutch Research Council (NWO), Deutsche Forschungsgemeinschaft, Deutsche Forschungsgemeinschaft, Metakids Foundation",27.0,Netherlands +30004104,CIPEMAB,0.996844947,CIPEMAB,0.996844947,,0,1,http://www.citogenetica.ufes.br,301,,"(-20.3297,-40.2925)",http://web.archive.org/web/20221014195837/https://citogenetica.ufes.br/,2018-06-28,"Departamento de Ciências da Saúde, Centro Universitário Norte do Espírito Santo, Universidade Federal do Espírito Santo, São Mateus, ES, Brazil.","Paresque R, Rodrigues JDS, Righetti KB",,,0.0,Brazil +24339831,Circ2Traits,0.996000826,Circ2Traits,0.996000826,,0,1,http://gyanxet-beta.com/circdb,"HTTPConnectionPool(host='gyanxet-beta.com', port=80): Max retries exceeded with url: /circdb (Caused by ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')))",,,http://web.archive.org/web/20221017054250/https://gyanxet-beta.com/circdb,2013-12-10,"Computational Biology Group, Theory Department, Indian Association for the Cultivation of Science Kolkata, India.","Ghosal S, Das S, Sen R, Basak P, Chakrabarti J",,,238.0,India +32219412,circad,0.970793426,circad,0.970793426,,0,1,http://clingen.igib.res.in/circad,301,,"(26.7907,75.2061)",http://web.archive.org/web/20220601033516/https://clingen.igib.res.in/circad/,2020-01-01,"Genome Informatics Department, CSIR-Institute of Genomics and Integrative Biology, Mathura Road, District-South Delhi, New Delhi-110025, India.","Rophina M, Sharma D, Poojary M, Scaria V",,Council of Scientific and Industrial Research,9.0,India +"32345360, 32345360",CircAtlas,0.997903705,CircAtlas,0.997903705,,0,1,http://circatlas.biols.ac.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220524083528/http://circatlas.biols.ac.cn/,2020-04-28,"Computational Genomics Lab, Beijing Institutes of Life Science, Chinese Academy of Sciences, Beijing, 100101, China., Computational Genomics Lab, Beijing Institutes of Life Science, Chinese Academy of Sciences, Beijing, 100101, China.","Wu W, Ji P, Zhao F, Wu W, Ji P, Zhao F",", ","National Natural Science Foundation of China, National Key R&D Program, National Natural Science Foundation of China, National Key R&D Program",142.0,"China, China" +25234927,circBase,0.970117331,circBase,0.970117331,,0,1,http://www.circbase.org,200,,"(52.5244,13.4105)",http://web.archive.org/web/20221015032145/http://www.circbase.org/,2014-09-18,"Max Delbrück Center for Molecular Medicine, 13125 Berlin, Germany.","Glažar P, Papavasileiou P, Rajewsky N",,"DFG Graduate School, MDC-NYU",672.0,Germany +34296749,circExp,0.985390902,circExp,0.985390902,,0,1,"http://soft.bioinfo-minzhao.org/circexp, http://soft.bioinfominzhao.org/circexp","406, HTTPConnectionPool(host='soft.bioinfominzhao.org', port=80): Max retries exceeded with url: /circexp (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,", ","no_wayback, no_wayback",2021-07-01,"School of Science and Engineering, University of the Sunshine Coast, Maroochydore DC, QLD 4558, Australia.","Zhao M, Liu Y, Qu H",,"National Natural Science Foundation of China, National Key Research and Development Program of China, The research start-up fellowship of the University of the Sunshine Coast to M.Z., National Natural Science Foundation of China",2.0,Australia +29194536,circlncRNAnet,0.996753991,circlncRNAnet,0.996753991,,0,1,http://app.cgu.edu.tw/circlnc,"HTTPConnectionPool(host='app.cgu.edu.tw', port=80): Max retries exceeded with url: /circlnc (Caused by ConnectTimeoutError(, 'Connection to app.cgu.edu.tw timed out. (connect timeout=5)'))",,,no_wayback,2018-01-01,"Graduate Institute of Biomedical Sciences, College of Medicine, Chang Gung University, Guishan, Taoyuan, Taiwan.","Wu SM, Liu H, Huang PJ, Chang IY, Lee CC, Yang CY, Tsai WS, Tan BC",,,30.0, +26450965,CircNet,0.982483149,CircNet,0.982483149,,0,1,http://circnet.mbc.nctu.edu.tw,"HTTPConnectionPool(host='circnet.mbc.nctu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180703034910/http://circnet.mbc.nctu.edu.tw:80/,2015-10-07,"Institute of Bioinformatics and Systems Biology, National Chiao Tung University, HsinChu, Taiwan.","Liu YC, Li JR, Sun CH, Andrews E, Chao RF, Lin FM, Weng SL, Hsu SD, Huang CC, Cheng C, Liu CC, Huang HD",,NCI NIH HHS,172.0, +"27365365, 30172046",CIRCpedia,0.990549028,CIRCpedia,0.990549028,,0,2,http://www.picb.ac.cn/rnomics/circpedia,301,,"(39.9075,116.3972)",http://web.archive.org/web/20210803062127/https://www.picb.ac.cn/rnomics/circpedia/,2018-08-29,"Key Laboratory of Computational Biology, CAS Center for Excellence in Brain Science and Intelligence Technology, CAS-MPG Partner Institute for Computational Biology, Shanghai Institutes for Biological Sciences, Chinese Academy of Sciences, Shanghai 200031, China; University of Chinese Academy of Sciences, Beijing 100049, China;, CAS Key Laboratory of Computational Biology, CAS-MPG Partner Institute for Computational Biology, Shanghai Institute of Nutrition and Health, Shanghai Institutes for Biological Sciences, University of Chinese Academy of Sciences, Chinese Academy of Sciences, Shanghai 200031, China.","Zhang XO, Dong R, Zhang Y, Zhang JL, Luo Z, Zhang J, Chen LL, Yang L, Dong R, Ma XK, Li GW, Yang L",", ","National Natural Science Foundation of China, National Natural Science Foundation of China, Ministry of Science and Technology, Ministry of Science and Technology, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",385.0,"China, China, China" +33181824,circR2Cancer,0.993368697,circR2Cancer,0.993368697,,0,1,http://www.biobdlab.cn:8000,200,,,http://web.archive.org/web/20220805203016/http://www.biobdlab.cn:8000/,2020-01-01,"School of Computer, Electronic and Information, Guangxi University, No.100 Daxue East Road, Nanning, Guangxi, 530004, China.","Lan W, Zhu M, Chen Q, Chen B, Liu J, Li M, Chen YP",,"Natural Science Foundation of Guangxi Zhuang Autonomous Region, Hunan Provincial Science and Technology Program, the Natural Science Foundation of Yunnan Province of China, the scientific Research Foundation of Hunan Provincial Education Department, the foundation of Guangxi University, National Natural Science Foundation of China, Science and Technology Base and talent Special project of Guangxi, Key Research and Development Plan of Guangxi",0.0,China +34856391,CircR2Disease,0.969459782,CircR2Disease,0.969459782,,0,1,http://bioinfo.snnu.edu.cn/CircR2Disease_v2.0,"HTTPConnectionPool(host='bioinfo.snnu.edu.cn', port=80): Max retries exceeded with url: /CircR2Disease_v2.0 (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,no_wayback,2021-11-29,"School of Computer Science, Shaanxi Normal University, Xi'an 710119, China.","Fan C, Lei X, Tie J, Zhang Y, Wu F, Pan Y",,"National Natural Science Foundation of China, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities, National Natural Science Foundation of China",0.0,China +27725737,circRNADb,0.975911617,circRNADb,0.975911617,,0,1,http://reprod.njmu.edu.cn/circrnadb,"HTTPConnectionPool(host='reprod.njmu.edu.cn', port=80): Max retries exceeded with url: /circrnadb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='reprod.njmu.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220620112705/http://reprod.njmu.edu.cn/circrnadb/,2016-10-11,"Department of Biomedical Engineering, Nanjing University of Aeronautics and Astronautics, Nanjing 211106, China.","Chen X, Han P, Zhou T, Guo X, Song X, Li Y",,,169.0,China +33121433,circVAR,0.893169641,circVAR,0.893169641,,0,1,http://soft.bioinfo-minzhao.org/circvar,406,,,http://web.archive.org/web/20221017021854/https://soft.bioinfo-minzhao.org/circvar/,2020-10-29,"School of Science and Engineering, University of the Sunshine Coast, Maroochydore DC, Queensland, 4558, Australia.","Zhao M, Qu H",,"National Key Research and Development Program of China, National Natural Science Foundation of China, the research start-up fellowship of the University of the Sunshine Coast",3.0,Australia +29059379,CirGRDB,0.997688591,CirGRDB,0.997688591,,0,1,http://cirgrdb.biols.ac.cn,200,,"(39.9075,116.3972)",no_wayback,2018-01-01,"State Key Laboratory of Medical Genetics, School of Life Sciences, Central South University, Changsha, Hunan 410078, China.","Li X, Shi L, Zhang K, Wei W, Liu Q, Mao F, Li J, Cai W, Chen H, Teng H, Li J, Sun Z, Sun Z",,,10.0,China +29092931,Cistrome Cancer,0.990812868,Cistrome Cancer,0.990812868,,0,1,http://cistrome.org/CistromeCancer,301,,"(42.3584,-71.0598)",no_wayback,2017-11-01,"Shanghai Key Laboratory of Tuberculosis, Clinical Translational Research Center, Shanghai Pulmonary Hospital, Tongji University, Shanghai, China.","Mei S, Meyer CA, Zheng R, Qin Q, Wu Q, Jiang P, Li B, Shi X, Wang B, Fan J, Shih C, Brown M, Zang C, Liu XS",,"NCI, NCI NIH HHS, NCI NIH HHS, NIGMS NIH HHS",68.0,China +23508969,CistromeFinder,0.990140975,CistromeFinder,0.990140975,,0,1,http://cistrome.org/finder,404,,,http://web.archive.org/web/20160518115643/http://cistrome.org:80/finder/,2013-03-18,"Department of Bioinformatics, School of Life Science and Technology, Tongji University, Shanghai 20092, China.","Sun H, Qin B, Liu T, Wang Q, Liu J, Wang J, Lin X, Yang Y, Taing L, Rao PK, Brown M, Zhang Y, Long HW, Liu XS",,"NHGRI NIH HHS, NHGRI NIH HHS",10.0,China +29688375,CITGeneDB,0.997810602,CITGeneDB,0.997810602,,0,1,http://citgenedb.yubiolab.org,200,,"(37.7621,-122.3971)",http://web.archive.org/web/20221016205641/http://citgenedb.yubiolab.org/,2018-01-01,"Department of Electrical and Computer Engineering, Texas A&M University, College Station, TX 77843, USA.","Li J, Deng SP, Wei G, Yu P",,,2.0,United States +32025315,CitGVD,0.992236495,CitGVD,0.992236495,citrus genomic variation database,0.845783427,1,http://citgvd.cric.cn/home,"HTTPConnectionPool(host='citgvd.cric.cn', port=80): Max retries exceeded with url: /home (Caused by ConnectTimeoutError(, 'Connection to citgvd.cric.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20211019202556/http://citgvd.cric.cn/home,2020-02-01,"1Citrus Research Institute, Southwest University/Chinese Academy of Agricultural Sciences, 400712 Chongqing, China.","Li Q, Qi J, Qin X, Dou W, Lei T, Hu A, Jia R, Jiang G, Zou X, Long Q, Xu L, Peng A, Yao L, Chen S, He Y",,,5.0,China +24489955,Citrus sinensis annotation project,0.768495091,,0,Citrus sinensis annotation project,0.768495091,1,http://citrus.hzau.edu.cn,200,,"(30.5833,114.2667)",http://web.archive.org/web/20221013114000/https://citrus.hzau.edu.cn/,2014-01-28,"Center for Bioinformatics, College of Life Science and Technology, Huazhong Agricultural University, Wuhan, P.R. China ; School of Science, Huazhong Agricultural University, Wuhan, P.R. China.","Wang J, Chen D, Lei Y, Chang JW, Hao BH, Xing F, Li S, Xu Q, Deng XX, Chen LL",,,18.0,"China, China" +33181825,CitrusKB,0.99783051,CitrusKB,0.99783051,,0,1,http://bioinfo.deinfo.uepg.br/citrus,301,,"(-25.0950,-50.1619)",http://web.archive.org/web/20220616115613/http://bioinfo.deinfo.uepg.br/citrus/,2020-01-01,"Departamento de Informática, Universidade Estadual de Ponta Grossa (UEPG), Av. Carlos Cavalcanti, 4748, 84030-900, Ponta Grossa, PR, Brazil.","Ferrasa A, Murata MM, Cofre TDCG, Cavallini JS, Peron G, Julião MHM, Belasque J, Ferreira H, Ferro MIT, Leite RP, Penha HA, Carvalho FMS, Varani AM, Herai RH, Ferro JA",,"Fundação Araucária, Coordenação de Aperfeiçoamento de Pessoal de Nível Superior, Conselho Nacional de Desenvolvimento Científico e Tecnológico, scientific initiation scholarship, Coordenação de Aperfeiçoamento de Pessoal de Nível Superior, Fundação de Amparo à Pesquisa do Estado de São Paulo",1.0,Brazil +33109630,CKTTD,0.997114658,CKTTD,0.997114658,Checkpoint therapeutic target database,0.942019236,1,http://www.ckttdb.org,200,,"(19.0728,72.8826)",http://web.archive.org/web/20220523082520/http://ckttdb.org/,2020-10-01,"Department of Oncology, Shengjing Hospital of China Medical University, Shenyang, Liaoning, China.","Zhang Y, Yao Y, Chen P, Liu Y, Zhang H, Liu H, Liu Y, Xu H, Tian X, Wang Z, Chu P, Zhao D, Liu H, Zhang C, Chen S, Zhao Y, Liu C, Yang Y",,"Technological Special Project of Liaoning Province of China, the Construction of Liaoning Cancer Research Center, the National Natural Science Foundation in China, National Natural Science Foundation of China, the Fundamental Research Fund for Central University, National Natural Science Foundation in China, the Fundamental Research Fund for Central University",2.0,"China, China" +24678985,ClearedLeavesDB,0.982267678,ClearedLeavesDB,0.982267678,Cleared Leaf Image Database,0.655172122,1,http://clearedleavesdb.org,"HTTPConnectionPool(host='clearedleavesdb.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to clearedleavesdb.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220124160941/http://clearedleavesdb.org/,2014-03-28,None,"Das A, Bucksch A, Price CA, Weitz JS",,,9.0, +22916227,CLEARPOND,0.993278623,CLEARPOND,0.993278623,Resource for,0.694889188,1,http://clearpond.northwestern.edu,302,,"(42.0411,-87.6901)",http://web.archive.org/web/20221105235848/https://clearpond.northwestern.edu/,2012-08-20,"Northwestern University, Evanston, Illinois, USA. v-marian@northwestern.edu","Marian V, Bartolotti J, Chabal S, Shook A",,"NICHD NIH HHS, NICHD NIH HHS",90.0,United States +"26582918, 29165669, 31777943",ClinVar,0.996684611,ClinVar,0.996684611,,0,3,http://www.ncbi.nlm.nih.gov/clinvar,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221110055530/http://www.ncbi.nlm.nih.gov/clinvar/,2020-01-01,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20893, USA landrum@ncbi.nlm.nih.gov., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA.","Landrum MJ, Lee JM, Benson M, Brown G, Chao C, Chitipiralla S, Gu B, Hart J, Hoffman D, Hoover J, Jang W, Katz K, Ovetsky M, Riley G, Sethi A, Tully R, Villamarin-Salomon R, Rubinstein W, Maglott DR, Landrum MJ, Lee JM, Benson M, Brown GR, Chao C, Chitipiralla S, Gu B, Hart J, Hoffman D, Jang W, Karapetyan K, Katz K, Liu C, Maddipatla Z, Malheiro A, McDaniel K, Ovetsky M, Riley G, Zhou G, Holmes JB, Kattman BL, Maglott DR, Landrum MJ, Chitipiralla S, Brown GR, Chen C, Gu B, Hart J, Hoffman D, Jang W, Kaur K, Liu C, Lyoshin V, Maddipatla Z, Maiti R, Mitchell J, O'Leary N, Riley GR, Shi W, Zhou G, Schneider V, Maglott D, Holmes JB, Kattman BL",", , ","Intramural NIH HHS, , National Institutes of Health",1977.0,"United States, United States, United States" +25652745,CLIPdb,0.996144474,CLIPdb,0.996144474,,0,1,http://clipdb.ncrnalab.org,200,,"(37.7621,-122.3971)",http://web.archive.org/web/20221010123027/https://clipdb.ncrnalab.org/,2015-02-05,"MOE Key Laboratory of Bioinformatics, Center for Synthetic and Systems Biology and Center for Plant Biology, School of Life Sciences, Tsinghua University, Beijing, 100084, China. yang.thomas.yucheng@gmail.com.","Yang YC, Di C, Hu B, Zhou M, Liu Y, Song N, Li Y, Umetsu J, Lu ZJ",,,108.0,China +23193260,Clone,0.87266922,Clone,0.87266922,,0,1,http://www.ncbi.nlm.nih.gov/clone,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221016071038/https://www.ncbi.nlm.nih.gov/clone/,2012-11-27,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20892, USA. schneiva@ncbi.nlm.nih.gov","Schneider VA, Chen HC, Clausen C, Meric PA, Zhou Z, Bouk N, Husain N, Maglott DR, Church DM",,Intramural NIH HHS,11.0,United States +25913159,ClosIndb,0.993508637,ClosIndb,0.993508637,,0,1,http://bif.uohyd.ac.in/closindb,301,,"(17.3840,78.4564)",http://web.archive.org/web/20210302193539/http://bif.uohyd.ac.in/closindb/,2015-04-23,"Department of Biotechnology and Bioinformatics, School of Life Sciences, University of Hyderabad, Hyderabad 500046, India.","Polavarapu R, Meetei PA, Midha M, Bharne D, Vindal V",,RGYI,2.0,India +33735471,CLRP,0.958588719,CLRP,0.958588719,,0,1,"http://physics.carleton.ca/clrp/eye_plaque_v2, http://doi.org/10.22215/clrp/EPv2","302, 301",,"(45.4112,-75.6981), (37.7621,-122.3971)","http://web.archive.org/web/20220709151753/https://physics.carleton.ca/clrp/eye_plaque_v2, no_wayback",2021-04-17,"Carleton Laboratory for Radiotherapy Physics, Department of Physics, Carleton University, Ottawa, ON, K1S 5B6, Canada.","Safigholi H, Parsons Z, Deering SG, Thomson RM",,"Ministry of Research and Innovation of Ontario, Canada Research Chairs, Natural Sciences and Engineering Research Council of Canada",0.0,Canada +23104377,ClusterMine360,0.942080637,ClusterMine360,0.942080637,,0,1,http://www.clustermine360.ca,"HTTPConnectionPool(host='www.clustermine360.ca', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.clustermine360.ca timed out. (connect timeout=5)'))",,,no_wayback,2012-10-26,"Department of Chemistry, Center for Advanced Research in Environmental Genomics, University of Ottawa, Ottawa, Ontario K1N 6N5, Canada.","Conway KR, Boddy CN",,,60.0,Canada +23661693,CMAP,0.981967479,CMAP,0.981967479,Complement Map Database,0.876230553,1,http://www.complement.us/cmap,403,,,no_wayback,2013-05-09,"Department of Pathology and Laboratory Medicine, University of Pennsylvania, Philadelphia, PA 19104, USA.","Yang K, Dinasarapu AR, Reis ES, Deangelis RA, Ricklin D, Subramaniam S, Lambris JD",,"NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NEI NIH HHS, NEI NIH HHS, NIGMS NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",4.0,United States +30357356,CMAUP,0.998420835,CMAUP,0.998420835,Collective Molecular Activities of Useful Plants,0.974408348,1,http://bidd2.nus.edu.sg/CMAUP,"HTTPConnectionPool(host='bidd2.nus.edu.sg', port=80): Max retries exceeded with url: /CMAUP (Caused by ConnectTimeoutError(, 'Connection to bidd2.nus.edu.sg timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200215030635/http://bidd2.nus.edu.sg:80/CMAUP/,2019-01-01,"The State Key Laboratory of Chemical Oncogenomics, Key Laboratory of Chemical Biology, Tsinghua University Shenzhen Graduate School, Shenzhen Technology and Engineering Laboratory for Personalized Cancer Diagnostics and Therapeutics, Shenzhen Kivita Innovative Drug Discovery Institute, Guangdong 518055, P. R. China.","Zeng X, Zhang P, Wang Y, Qin C, Chen S, He W, Tao L, Tan Y, Gao D, Wang B, Chen Z, Chen W, Jiang YY, Chen YZ",,"Shenzhen Municipal Government, Shenzhen Municipal Government, Shenzhen Municipal Government, National Natural Science Foundation of China, Zhejiang Province Ministry of Science and Technology",16.0,China +33693668,CMBD,0.997389793,CMBD,0.997389793,,0,1,http://www.sysbio.org.cn/CMBD,301,,"(22.2783,114.1747)",no_wayback,2021-03-01,"Institutes for Systems Genetics, Frontiers Science Center for Disease-related Molecular Network, West China Hospital, Sichuan University, Chengdu, Sichuan 610041, China.","Chen J, Liu X, Shen L, Lin Y, Shen B",,"National Key Research and Development Program of China, National Natural Science Foundation of China",2.0,"China, China" +30668638,CMEP,0.926891863,CMEP,0.926891863,Circulating MicroRNA Expression Profiling,0.608082005,1,http://syslab5.nchu.edu.tw/CMEP,302,,"(24.1469,120.6839)",no_wayback,2019-09-01,Institute of Genomics and Bioinformatics.,"Li JR, Tong CY, Sung TJ, Kang TY, Zhou XJ, Liu CC",,"MOE, Featured Areas Research Center Program, Higher Education Sprout Project, NCI NIH HHS, National Institutes of Health, Advanced Plant Biotechnology Center, Ministry of Education",8.0, +25885062,CmMDb,0.993868947,CmMDb,0.993868947,Cucumis melo L,0.824225145,1,http://65.181.125.102/cmmdb2/index.html,"HTTPConnectionPool(host='65.181.125.102', port=80): Max retries exceeded with url: /cmmdb2/index.html (Caused by ConnectTimeoutError(, 'Connection to 65.181.125.102 timed out. (connect timeout=5)'))",,,no_wayback,2015-04-17,"National Bureau of Plant Genetic Resources, Genomics resources div., New Delhi-12, India.","Bhawna, Chaduvula PK, Bonthala VS, Manjusha V, Siddiq EA, Polumetla AK, Prasad GM",,,3.0,India +32986829,CMNPD,0.998440802,CMNPD,0.998440802,,0,1,http://www.cmnpd.org,301,,"(22.2783,114.1747)",http://web.archive.org/web/20220616152404/https://www.cmnpd.org/,2021-01-01,"State Key Laboratory of Natural and Biomimetic Drugs, School of Pharmaceutical Sciences, Peking University, Beijing 100191, China.","Lyu C, Chen T, Qiang B, Liu N, Wang H, Zhang L, Liu Z",,"National Major Scientific and Technological Special Project, National Major Scientific and Technological Special Project, National Key Technology R&D Program",17.0,China +25398898,CMPD,0.993044734,CMPD,0.993044734,cancer mutant proteome database,0.959299552,1,http://cgbc.cgu.edu.tw/cmpd,"HTTPConnectionPool(host='cgbc.cgu.edu.tw', port=80): Max retries exceeded with url: /cmpd (Caused by ConnectTimeoutError(, 'Connection to cgbc.cgu.edu.tw timed out. (connect timeout=5)'))",,,no_wayback,2014-11-14,"Bioinformatics Core Laboratory, Chang Gung University, Taoyuan 333, Taiwan Molecular Medicine Research Center, Chang Gung University, Taoyuan 333, Taiwan.","Huang PJ, Lee CC, Tan BC, Yeh YM, Julie Chu L, Chen TW, Chang KP, Lee CY, Gan RC, Liu H, Tang P",,,6.0, +26062809,CMRegNet,0.995652676,CMRegNet,0.995652676,,0,1,http://lgcm.icb.ufmg.br/cmregnet,"HTTPConnectionPool(host='lgcm.icb.ufmg.br', port=80): Max retries exceeded with url: /cmregnet (Caused by ConnectTimeoutError(, 'Connection to lgcm.icb.ufmg.br timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160622023717/http://www.lgcm.icb.ufmg.br:80/cmregnet/,2015-06-11,"Graduate Program in Bioinformatics, Institute of Biological Sciences, Federal University of Minas Gerais (Universidade Federal de Minas Gerais), Belo Horizonte, Minas Gerais, Brazil. vabreu@isoladas.grad.ufmg.br.","Abreu VA, Almeida S, Tiwari S, Hassan SS, Mariano D, Silva A, Baumbach J, Azevedo V, Röttger R",,Villum Fonden,2.0,Brazil +23630576,CMS,0.961046875,CMS,0.961046875,Cancer methylome system,0.863095567,1,http://cbbiweb.uthscsa.edu/KMethylomes,"HTTPConnectionPool(host='cbbiweb.uthscsa.edu', port=80): Max retries exceeded with url: /KMethylomes (Caused by ConnectTimeoutError(, 'Connection to cbbiweb.uthscsa.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20191020195442/http://cbbiweb.uthscsa.edu:80/KMethylomes/,2013-04-22,"Department of Molecular Medicine/Institute of Biotechnology, University of Texas Health Science Center at San Antonio, San Antonio, Texas, United States of America.","Gu F, Doderer MS, Huang YW, Roa JC, Goodfellow PJ, Kizer EL, Huang TH, Chen Y",,"NCI NIH HHS, NCATS NIH HHS, NCATS NIH HHS, NCATS NIH HHS, NCI NIH HHS, NIEHS NIH HHS, NCI NIH HHS",27.0,United States +31813095,CMVdb,0.988853097,CMVdb,0.988853097,CytoMegaloVirus Infection Database,0.726735294,1,"http://shaktisahislab.com/include/CMV/, http://weislab.com/WeiDOCK/include/content/CMV","200, 301",,"(51.5085,-0.1257), (-33.8678,151.2073)","http://web.archive.org/web/20220720115255/http://shaktisahislab.com/include/CMV/, no_wayback",2019-12-07,"Wuxi School of Medicine, Jiangnan University, Li Lake Avenue, Wuxi, 214122, Jiangsu, China.","Kaushik AC, Mehmood A, Upadhyay AK, Paul S, Srivastava S, Mali P, Xiong Y, Dai X, Wei DQ, Sahi S",,"Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, National Natural Science Foundation of China, The Technology Development Funding of Wuxi, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, Government of Jiangsu Province, Science and Technology Department of Henan Province, Ministry of Science and Technology of the People's Republic of China, State Key Lab of Microbial Metabolism and Joint Research Funds for Medical and Engineering and Scientific Research at Shanghai Jiao Tong University, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Jiangnan University, Ministry of Science and Technology of the People's Republic of China",2.0,China +31901979,CNAdbCC,0.992988825,CNAdbCC,0.992988825,,0,1,http://cailab.labshare.cn/CNAdbCC,301,,"(37.3394,-121.8950)",http://web.archive.org/web/20210525173145/http://cailab.labshare.cn/CNAdbCC/,2020-01-04,"Key Laboratory of Bio-Resources and Eco-Environment, Center of Growth, Metabolism, and Aging, College of Life Sciences, Sichuan University, Chengdu, 610064, China.","Luo H, Xu X, Yang J, Wang K, Wang C, Yang P, Cai H",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China (CN)",2.0,China +33095860,CNCDatabase,0.997600734,CNCDatabase,0.997600734,Cornell Non-coding Cancer driver Database,0.958675064,1,http://cncdatabase.med.cornell.edu,"HTTPConnectionPool(host='cncdatabase.med.cornell.edu', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to cncdatabase.med.cornell.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221017153918/https://cncdatabase.med.cornell.edu/,2021-01-01,"Department of Epidemiology and Biostatistics, Memorial Sloan Kettering Cancer Center, New York, NY 10017, USA.","Liu EM, Martinez-Fundichely A, Bollapragada R, Spiewack M, Khurana E",,"NCI NIH HHS, NCI NIH HHS, National Institutes of Health",2.0,United States +33010163,cncRNAdb,0.991504371,cncRNAdb,0.991504371,,0,1,http://www.rna-society.org/cncrnadb,301,,"(40.2338,-111.6585)",http://web.archive.org/web/20221102055635/http://www.rna-society.org/cncrnadb/,2021-01-01,"Shunde Hospital, Southern Medical University (The First People's Hospital of Shunde Foshan), Foshan 528308, China.","Huang Y, Wang J, Zhao Y, Wang H, Liu T, Li Y, Cui T, Li W, Feng Y, Luo J, Gong J, Ning L, Zhang Y, Wang D, Zhang Y",,"National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Basic and Applied Basic Research Fund, Basic and Applied Basic Research Fund, National Natural Science Foundation of China",10.0,China +32952115,CNGBdb,0.99559629,CNGBdb,0.99559629,China National GeneBank DataBase,0.846177836,1,http://db.cngb.org,200,,"(37.3394,-121.8950)",http://web.archive.org/web/20221101094654/https://db.cngb.org/,2020-08-01,"China National GeneBank, Shenzhen 518120, China.","Chen FZ, You LJ, Yang F, Wang LN, Guo XQ, Gao F, Hua C, Tan C, Fang L, Shan RQ, Zeng WJ, Wang B, Wang R, Xu X, Wei XF",,,33.0,"China, China" +32705130,CNSA,0.969382127,CNSA,0.969382127,CNGB,0.596538961,1,http://db.cngb.org/cnsa,301,,"(37.3394,-121.8950)",http://web.archive.org/web/20221028181358/https://db.cngb.org/cnsa/,2020-01-01,"China National GeneBank, Shenzhen 518120, China.","Guo X, Chen F, Gao F, Li L, Liu K, You L, Hua C, Yang F, Liu W, Peng C, Wang L, Yang X, Zhou F, Tong J, Cai J, Li Z, Wan B, Zhang L, Yang T, Zhang M, Yang L, Yang Y, Zeng W, Wang B, Wei X, Xu X",,Guangdong Provincial Key Laboratory of Genome Read and Write,40.0,"China, China" +22826268,CNVD,0.973126009,CNVD,0.973126009,Copy Number Variation in Disease database,0.926422502,1,http://bioinfo.hrbmu.edu.cn/CNVD,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /CNVD (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140722153438/http://bioinfo.hrbmu.edu.cn/CNVD/,2012-07-23,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Qiu F, Xu Y, Li K, Li Z, Liu Y, DuanMu H, Zhang S, Li Z, Chang Z, Zhou Y, Zhang R, Zhang S, Li C, Zhang Y, Liu M, Li X",,,16.0,China +30598077,CNVdigest,0.993451834,CNVdigest,0.993451834,,0,1,http://cnv.gtxlab.com,200,,"(33.9192,-118.4165)",http://web.archive.org/web/20220617070804/http://cnv.gtxlab.com/,2018-12-31,"School of Computer Science, National University of Defense Technology, Changsha, 410073, China.","Yang X, Song Z, Wu C, Wang W, Li G, Zhang W, Wu L, Lu K",,,5.0,China +34259866,CNVIntegrate,0.995991111,CNVIntegrate,0.995991111,,0,1,http://cnvintegrate.cgm.ntu.edu.tw,200,,"(25.0478,121.5319)",no_wayback,2021-07-01,"Bioinformatics and Biostatistics Core, Center of Genomic and Precision Medicine, National Taiwan University, Taipei 10055, Taiwan.","Chattopadhyay A, Teoh ZH, Wu CY, Juang JJ, Lai LC, Tsai MH, Wu CH, Lu TP, Chuang EY",,"Center for Biotechnology, National Taiwan University, Taiwan, Center of Genomics and Precision Medicine, Ministry of Science and Technology, Taiwan",0.0, +32392296,CoCoCoNet,0.998220623,CoCoCoNet,0.998220623,,0,1,http://milton.cshl.edu/CoCoCoNet,301,,"(40.8257,-73.4676)",http://web.archive.org/web/20221017044310/http://milton.cshl.edu/CoCoCoNet/,2020-07-01,"Stanley Institute for Cognitive Genomics, Cold Spring Harbor Laboratory, 500 Sunnyside Blvd., Woodbury, NY 11797, USA.","Lee J, Shah M, Ballouz S, Crow M, Gillis J",,"NIMH NIH HHS, NIMH NIH HHS, National Institutes of Health, National Institutes of Health, NLM NIH HHS, National Institutes of Health",4.0,United States +33423696,COCONUT,0.993007143,COCONUT,0.993007143,COlleCtion of Open Natural prodUcTs,0.784143726,1,http://coconut.naturalproducts.net,301,,"(50.9288,11.5899)",http://web.archive.org/web/20220508124218/https://coconut.naturalproducts.net/,2021-01-10,"Institute for Inorganic and Analytical Chemistry, University Friedrich-Schiller, Lessing Strasse 8, 07743, Jena, Germany. maria.sorokina@uni-jena.de.","Sorokina M, Merseburger P, Rajan K, Yirik MA, Steinbeck C",,Projekt DEAL,36.0,Germany +22070882,COD,0.984296083,COD,0.984296083,Crystallography Open Database,0.856681943,1,http://www.crystallography.net,200,,"(54.6892,25.2798)",http://web.archive.org/web/20221102185305/https://www.crystallography.net/,2011-11-08,"Department of Protein - DNA Interactions, Vilnius University Institute of Biotechnology, Graiciuno 8, LT-02241 Vilnius, France. grazulis@ibt.lt","Gražulis S, DaÅ¡kevič A, Merkys A, Chateigner D, Lutterotti L, Quirós M, Serebryanaya NR, Moeck P, Downs RT, Le Bail A",,,97.0,France +25270877,CODEX,0.997284114,CODEX,0.997284114,,0,1,http://codex.stemcells.cam.ac.uk,302,,"(52.2000,0.1167)",no_wayback,2014-09-30,"Department of Haematology, Wellcome Trust-MRC Cambridge Stem Cell Institute & Cambridge Institute for Medical Research, Cambridge University, Cambridge CB2 0XY, UK.","Sánchez-Castillo M, Ruau D, Wilkinson AC, Ng FS, Hannah R, Diamanti E, Lombard P, Wilson NK, Gottgens B",,"Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust, Cancer Research UK, Medical Research Council, Medical Research Council, Blood Cancer UK, Medical Research Council",71.0, +23846747,CoDNaS,0.99604851,CoDNaS,0.99604851,Conformational Diversity of Native State,0.846067939,1,http://www.codnas.com.ar,"HTTPConnectionPool(host='www.codnas.com.ar', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20141218141156/http://codnas.com.ar/,2013-07-11,"Departamento de Ciencia y Tecnología, Universidad Nacional de Quilmes, B1876BXD, Buenos Aires, Argentina.","Monzon AM, Juritz E, Fornasari MS, Parisi G",,,18.0,Argentina +34954795,CoDNaS-RNA,0.989147276,CoDNaS-RNA,0.989147276,,0,1,"http://ufq.unq.edu.ar/codnasrna, http://codnas-rna.bioinformatica.org","301, 301",,"(-34.7083,-58.2917), (39.0437,-77.4875)","http://web.archive.org/web/20221016223306/http://ufq.unq.edu.ar/codnasrna/, no_wayback",2021-12-25,"Departamento de Ciencia y Tecnología, Universidad Nacional de Quilmes, Buenos Aires, Argentina.","Buitrón MG, Cahui RRT, Ríos EG, Hirsh L, Parisi G, Fornasari MS, Palopoli N",,"CONICET, Consejo Nacional de Investigaciones Científicas y Técnicas, Agencia Nacional de Promoción de la Investigación, el Desarrollo Tecnológico y la Innovación, Universidad Nacional de Quilmes",0.0,Argentina +31029701,Codon and Codon-Pair Usage Tables,0.987904727,CoCoPUTs,0.973919183,Codon and Codon-Pair Usage Tables,0.987904727,1,http://hive.biochemistry.gwu.edu/review/codon2,301,,"(38.8951,-77.0364)",no_wayback,2019-04-26,"Division of Plasma Protein Therapeutics, Office of Tissue and Advanced Therapies, Center for Biologics Evaluation and Research, Food and Drug Administration, Silver Spring, MD 20993, USA.","Alexaki A, Kames J, Holcomb DD, Athey J, Santana-Quintero LV, Lam PVN, Hamasaki-Katagiri N, Osipova E, Simonyan V, Bar H, Komar AA, Kimchi-Sarfaty C",,U.S. Food and Drug Administration,41.0,United States +30357342,CoevDB,0.997794569,CoevDB,0.997794569,,0,1,http://phylodb.unil.ch/CoevDB,"HTTPConnectionPool(host='phylodb.unil.ch', port=80): Max retries exceeded with url: /CoevDB (Caused by ConnectTimeoutError(, 'Connection to phylodb.unil.ch timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200204075009/http://phylodb.unil.ch:80/CoevDB/,2019-01-01,"Department of Computational Biology, University of Lausanne, Biophore, 1015 Lausanne, Switzerland.","Meyer X, Dib L, Salamin N",,"Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation",1.0,Switzerland +28334239,coexpressMAP,0.980166078,coexpressMAP,0.980166078,human-mouse general co-expression difference database,0.907979217,1,http://www.bioapp.org/coexpressMAP,301,,"(36.0649,120.3804)",no_wayback,2018-09-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.","Liu D, Zhao L, Chen Y, Wang Z, Xu J, Li Y, Lei C, Hu S, Niu M, Jiang Y",,Natural Science Foundation of Heilongjiang Province,0.0,China +32436316,CoFly,0.555579185,CoFly,0.555579185,,0,1,http://bioinformatics.fafu.edu.cn/fly,"HTTPConnectionPool(host='bioinformatics.fafu.edu.cn', port=80): Max retries exceeded with url: /fly (Caused by ConnectTimeoutError(, 'Connection to bioinformatics.fafu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220124143556/http://bioinformatics.fafu.edu.cn/fly/,2020-05-20,"Department of Bioinformatics, School of Life Sciences, Fujian Agriculture and Forestry University, Fuzhou, China.","Liu W, Wang Y, He H",,"Open Project of Key laboratory of Loquat Germplasm Innovation and Utilization, Putian University, Fujian Province, National Natural Science Foundation of China",1.0,China +33167031,COG,0.928446889,COG,0.928446889,Clusters of Orthologous Genes,0.892548233,1,http://www.ncbi.nlm.nih.gov/research/COG,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221006163333/http://www.ncbi.nlm.nih.gov/research/cog/,2021-01-01,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, Maryland 20894, USA.","Galperin MY, Wolf YI, Makarova KS, Vera Alvarez R, Landsman D, Koonin EV",,"National Institutes of Health, NLM NIH HHS, Natural Environment Research Council",55.0,United States +25428365,COGs,0.89940232,COGs,0.89940232,Clusters of Orthologous Groups of proteins,0.764110201,1,http://www.ncbi.nlm.nih.gov/COG,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20200827092446/https://www.ncbi.nlm.nih.gov/COG/,2014-11-26,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 2094, USA.","Galperin MY, Makarova KS, Wolf YI, Koonin EV",,Intramural NIH HHS,544.0,United States +34964846,COGVIC,0.994361281,COGVIC,0.994361281,Catalog of Germline Variants in,0.96474456,1,http://www.cogvic.vip,200,,"(23.1167,113.2500)",http://web.archive.org/web/20211128155452/http://cogvic.vip/,2021-12-01,"Department of Thoracic Surgery, Nanfang Hospital, Southern Medical University, 1838 Guang Zhou Avenue North, Guangzhou 510515, P. R. China.","Shi X, Li R, Zhai J, Chen AM, Huang K, Zheng Z, Chen Z, Dong X, Liu X, Lu D, Feng S, Diao D, Ren P, Liu Z, Morahan G, Cai K",,"the Major Science and Technology Planning Project of Guangdong Province, the Research Initiative Fund of Southern Hospital 2018, the Science and Technology Program of Guangzhou, the National Natural Science Foundation of China",0.0,China +22275896,COINS,0.97806251,COINS,0.97806251,and,0.496829301,1,http://coins.mrn.org,"HTTPConnectionPool(host='coins.mrn.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to coins.mrn.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190126101903/https://coins.mrn.org/,2011-12-23,"The Mind Research Network Albuquerque, NM, USA.","Scott A, Courtney W, Wood D, de la Garza R, Lane S, King M, Wang R, Roberts J, Turner JA, Calhoun VD",,"NIBIB NIH HHS, NIBIB NIH HHS, NIBIB NIH HHS",85.0,United States +32073269,COLMAR Lipids,0.91474843,COLMAR Lipids,0.91474843,,0,1,http://spin.ccic.osu.edu/index.php/colmarm/index2,301,,"(39.9612,-82.9988)",no_wayback,2020-03-04,None,"Wang C, Timári I, Zhang B, Li DW, Leggett A, Amer AO, Bruschweiler-Li L, Kopec RE, Brüschweiler R",,"NIAID NIH HHS, Ohio State University, National Institute of General Medical Sciences, NIGMS NIH HHS",6.0, +33181822,Color Data,0.852575928,Color Data,0.852575928,,0,1,http://data.color.com,301,,"(33.9192,-118.4165)",http://web.archive.org/web/20221017032218/https://data.color.com/,2020-01-01,"Color Genomics, 831 Mitten Road, Suite 100, Burlingame, CA, 94010, USA.","Berger MJ, Williams HE, Barrett R, Zimmer AD, McKennon W, Hong H, Ginsberg J, Zhou AY, Neben CL",,Color Genomics,3.0,United States +33313674,ColorCells,0.997294188,ColorCells,0.997294188,,0,1,http://rna.sysu.edu.cn/colorcells,302,,"(39.9906,116.2887)",http://web.archive.org/web/20220120103402/https://rna.sysu.edu.cn/colorcells/,2021-07-01,"Key Laboratory of Gene Engineering of the Ministry of Education, State Key Laboratory for Biocontrol, Sun Yat-sen University, Guangzhou 510275, P. R. China.","Zheng LL, Xiong JH, Zheng WJ, Wang JH, Huang ZL, Chen ZR, Sun XY, Zheng YM, Zhou KR, Li B, Liu S, Qu LH, Yang JH",,"Guangdong Province, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Guangdong Province Key Laboratory of Computational Science, Pearl River S and T Nova Program of Guangzhou, Youth science and technology, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Central Universities in China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program of China, Guangdong Province, National Natural Science Foundation of China, Guangdong Province Computational Science Innovative Research Team, Guangzhou city, National Natural Science Foundation of China, Guangzhou city, National Natural Science Foundation of China",4.0,China +26496946,Colorectal Cancer Atlas,0.970538229,Colorectal Cancer Atlas,0.970538229,,0,1,http://www.colonatlas.org,302,,"(32.7831,-96.8067)",http://web.archive.org/web/20221017050607/https://www.colonatlas.org/,2015-10-22,"Department of Computer Science and Information Technology, La Trobe University, Bundoora, Victoria 3086, Australia.","Chisanga D, Keerthikumar S, Pathan M, Ariyaratne D, Kalra H, Boukouris S, Mathew NA, Al Saffar H, Gangoda L, Ang CS, Sieber OM, Mariadason JM, Dasgupta R, Chilamkurti N, Mathivanan S",,,24.0,Australia +26051695,ComiRNet,0.991619289,ComiRNet,0.991619289,Co-clustered miRNA Regulatory Networks,0.947112972,1,http://comirnet.di.uniba.it,"HTTPConnectionPool(host='comirnet.di.uniba.it', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to comirnet.di.uniba.it timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220615220325/http://comirnet.di.uniba.it/,2015-06-01,None,"Pio G, Ceci M, Malerba D, D'Elia D",,,11.0, +24225386,COMMODE,0.942243874,COMMODE,0.942243874,,0,1,http://commode.i-med.ac.at,"HTTPConnectionPool(host='commode.i-med.ac.at', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-11-13,None,"Dander A, Mueller LA, Gallasch R, Pabinger S, Emmert-Streib F, Graber A, Dehmer M",,Austrian Science Fund FWF,1.0, +22836712,Comparative Cellular and Molecular Biology of Longevity Database,0.74333477,,0,Comparative Cellular and Molecular Biology of Longevity Database,0.74333477,1,http://genomics.brocku.ca/ccmbl,302,,"(43.1713,-79.2427)",http://web.archive.org/web/20200221151735/http://genomics.brocku.ca:80/ccmbl/,2012-07-27,"Department of Biological Sciences, Brock University, St. Catharines, ON, Canada, L2S 3A1, jstuart@brocku.ca.","Stuart JA, Liang P, Luo X, Page MM, Gallagher EJ, Christoff CA, Robb EL",,,3.0,Canada +25333826,Complex Mixture Analysis by NMR,0.879976043,COLMAR,0.561694831,Complex Mixture Analysis by NMR,0.879976043,1,http://spin.ccic.ohio-state.edu/index.php/hsqc/index,301,,"(39.9612,-82.9988)",http://web.archive.org/web/20220617230517/http://spin.ccic.ohio-state.edu/index.php/hsqc/index,2014-11-05,"Department of Chemistry and Biochemistry, ‡Campus Chemical Instrument Center, The Ohio State University , Columbus, Ohio 43210, United States.","Bingol K, Li DW, Bruschweiler-Li L, Cabrera OA, Megraw T, Zhang F, Brüschweiler R",,"NIGMS NIH HHS, National Institute of Diabetes and Digestive and Kidney Diseases, NIGMS NIH HHS, NIDDK NIH HHS, National Institute of General Medical Sciences, NIDDK NIH HHS",44.0,United States +25348397,ComPPI,0.99763155,ComPPI,0.99763155,,0,1,http://ComPPI.LinkGroup.hu,200,,"(37.7621,-122.3971)",http://web.archive.org/web/20221016235027/https://comppi.linkgroup.hu/,2014-10-27,"Department of Medical Chemistry, Semmelweis University, Budapest, Hungary.","Veres DV, Gyurkó DM, Thaler B, Szalay KZ, Fazekas D, Korcsmáros T, Csermely P",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",43.0,Hungary +32754758,ConoMode,0.997492313,ConoMode,0.997492313,,0,1,http://conomode.qnlm.ac/conomode/conomode/index,200,,"(36.6683,116.9972)",http://web.archive.org/web/20200811165812/http://conomode.qnlm.ac/conomode/conomode/index,2020-01-01,"Key Laboratory of Marine Drugs, Chinese Ministry of Education, School of Medicine and Pharmacy, Ocean University of China, Qingdao 266003, China.","Li X, Liu H, Gao C, Li Y, Jia D, Yang Y, Yang J, Wei Z, Jiang T, Yu R",,"Major Scientific Research Platform Construction Project of Shandong Province, Fundamental Research Funds for the Central Universities, Marine S&T Fund of Shandong Province for Pilot National Laboratory for Marine Science and Technology, National Science and Technology Major Project for Significant New Drugs Development, Fundamental Research Funds for the Central Universities, National Laboratory Director Fund, National Key Research and Development Program of China",0.0,"China, China" +22058133,ConoServer,0.996880889,ConoServer,0.996880889,,0,1,http://www.conoserver.org,200,,"(1.2897,103.8501)",http://web.archive.org/web/20220806100057/https://www.conoserver.org/,2011-11-03,"Division of Chemistry and Structural Biology, Institute for Molecular Bioscience, The University of Queensland, Brisbane, Queensland 4072, Australia.","Kaas Q, Yu R, Jin AH, Dutertre S, Craik DJ",,,159.0,Australia +31702846,ConSurf,0.988563061,ConSurf,0.988563061,,0,1,http://consurfdb.tau.ac.il,302,,"(32.0809,34.7806)",http://web.archive.org/web/20221030200224/https://consurfdb.tau.ac.il/,2019-11-22,"Department of Biochemistry and Molecular Biology, George S. Wise Faculty of Life Sciences, Tel Aviv University, Tel Aviv, Israel.","Ben Chorin A, Masrati G, Kessel A, Narunsky A, Sprinzak J, Lahav S, Ashkenazy H, Ben-Tal N",,,13.0,Israel +27980519,ContaMiner,0.980479717,ContaMiner,0.980479717,,0,1,http://strube.cbrc.kaust.edu.sa/contaminer,301,,"(37.5331,-122.2486)",no_wayback,2016-11-02,"King Abdullah University of Science and Technology (KAUST) , Center for Computational Bioscience Research (CBRC), Division of Biological and Environmental Sciences and Engineering (BESE), Thuwal, 23955-6900, Saudi Arabia.","Hungler A, Momin A, Diederichs K, Arold ST",,,7.0,Saudi Arabia +33798715,COnVIDa,0.995118141,COnVIDa,0.995118141,,0,1,http://convida.inf.um.es,"HTTPConnectionPool(host='convida.inf.um.es', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20220524183021/https://convida.inf.um.es,2021-03-30,"Department of Information and Communications Engineering, University of Murcia, Murcia 30100, Spain. Electronic address: enriquetomas.martinezb@um.es.","Martínez Beltrán ET, Quiles Pérez M, Pastor-Galindo J, Nespoli P, García Clemente FJ, Gómez Mármol F",,,3.0,Spain +31269035,COOLR,0.98030597,COOLR,0.98030597,Cooperative Open Online Landslide Repository,0.584065162,1,http://landslides.nasa.gov,301,,"(41.4342,-81.8044)",no_wayback,2019-07-03,"Hydrological Sciences Laboratory, NASA Goddard Space Flight Center, Greenbelt, MD, United States of America.","Juang CS, Stanley TA, Kirschbaum DB",,,1.0,United States +27242036,CoopTFD,0.997820497,CoopTFD,0.997820497,Cooperative Transcription Factors Database,0.965329289,1,"http://cosbi.ee.ncku.edu.tw/CoopTFD/, http://cosbi2.ee.ncku.edu.tw/CoopTFD","301, 404",,"(22.9908,120.2133), ","http://web.archive.org/web/20200116232601/http://cosbi.ee.ncku.edu.tw:80/CoopTFD/, http://web.archive.org/web/20180123092403/http://cosbi2.ee.ncku.edu.tw:80/CoopTFD/",2016-05-30,"Department of Electrical Engineering, National Cheng Kung University, Tainan 70101, Taiwan wessonwu@mail.ncku.edu.tw.","Wu WS, Lai FJ, Tu BW, Chang DT",,,2.0, +21544197,CORE,0.780686021,CORE,0.780686021,,0,1,http://microbiome.osu.edu,200,,"(39.9612,-82.9988)",http://web.archive.org/web/20220616092821/http://microbiome.osu.edu/,2011-04-22,"Division of Pediatric Dentistry, College of Dentistry, The Ohio State University, Columbus, Ohio, United States of America. griffen.1@osu.edu","Griffen AL, Beall CJ, Firestone ND, Gross EL, Difranco JM, Hardman JH, Vriesendorp B, Faust RA, Janies DA, Leys EJ",,"NIDCR NIH HHS, NIDCR NIH HHS, NIDCR NIH HHS, NIDCR NIH HHS",106.0,United States +33382885,CorkOakDB,0.994003475,CorkOakDB,0.994003475,Genome,0.661612511,1,http://corkoakdb.org,301,,"(38.7167,-9.1333)",no_wayback,2020-12-01,"Instituto Gulbenkian de Ciência, Rua da Quinta Grande, Oeiras 2780-156, Lisboa, Portugal.","Arias-Baldrich C, Silva MC, Bergeretti F, Chaves I, Miguel C, Saibo NJM, Sobral D, Faria D, Barros PM",,"GREEN-IT - Bioresources for Sustainability, BioData.pt - Infraestrutura Portuguesa de Dados Biológicos",0.0,Portugal +34016708,CoronaCentral,0.994874954,CoronaCentral,0.994874954,,0,1,http://coronacentral.ai,302,,"(39.0437,-77.4875)",http://web.archive.org/web/20220717193610/https://coronacentral.ai/,2021-06-01,"Department of Bioengineering, Stanford University, Stanford, CA 94305 jlever@stanford.edu.","Lever J, Altman RB",,HHS | NIH | U.S. National Library of Medicine,6.0, +24991954,CORTECON,0.730165362,CORTECON,0.730165362,,0,1,http://cortecon.neuralsci.org,301,,"(38.8951,-77.0364)",http://web.archive.org/web/20221022052704/https://cortecon.neuralsci.org/,2014-07-01,"Neural Stem Cell Institute, Rensselaer, NY 12144, USA.","van de Leemput J, Boles NC, Kiehl TR, Corneo B, Lederman P, Menon V, Lee C, Martinez RA, Levi BP, Thompson CL, Yao S, Kaykas A, Temple S, Fasano CA",,NINDS NIH HHS,111.0,United States +30357367,CORUM,0.997224689,CORUM,0.997224689,of mammalian protein complexes,0.598213032,1,http://mips.helmholtz-muenchen.de/corum,302,,"(48.2500,11.5667)",http://web.archive.org/web/20221102060724/https://mips.helmholtz-muenchen.de/corum/,2019-01-01,"Institute for Bioinformatics and Systems Biology (IBIS), Helmholtz Zentrum München-German Research Center for Environmental Health (GmbH), Ingolstädter Landstraße 1, D-85764 Neuherberg, Germany.","Giurgiu M, Reinhard J, Brauner B, Dunger-Kaltenbach I, Fobo G, Frishman G, Montrone C, Ruepp A",,"NIA NIH HHS, National Institutes of Health",157.0,Germany +24466021,CoryneBase,0.996892035,CoryneBase,0.996892035,,0,1,http://corynebacterium.um.edu.my,"HTTPConnectionPool(host='corynebacterium.um.edu.my', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to corynebacterium.um.edu.my timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160807074058/http://corynebacterium.um.edu.my:80/,2014-01-17,"Genome Informatics Research Laboratory, HIR Building, University of Malaya, Kuala Lumpur, Malaysia ; Department of Software Engineering, Faculty of Computer Science and Information Technology, University of Malaya, Kuala Lumpur, Malaysia.","Heydari H, Siow CC, Tan MF, Jakubovics NS, Wee WY, Mutha NV, Wong GJ, Ang MY, Yazdi AH, Choo SW",,,2.0,"Malaysia, Malaysia" +22080556,CoryneRegNet,0.996991575,CoryneRegNet,0.996991575,,0,1,http://www.coryneregnet.de,302,,"(49.0094,8.4044)",http://web.archive.org/web/20160224193109/http://www.coryneregnet.de/,2011-11-12,"Computational Systems Biology, Max Planck Institute for Informatics, Campus E1.4, 66123 Saarbrücken, Germany.","Pauling J, Röttger R, Tauch A, Azevedo V, Baumbach J",,,41.0,Germany +"25355519, 27727438, 27899578, 30371878",COSMIC,0.99756813,COSMIC,0.99756813,the Catalogue Of Somatic Mutations In Cancer,0.939183259,4,http://cancer.sanger.ac.uk,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20200830110331/https://cancer.sanger.ac.uk/,2019-01-01,"Cancer Genome Project, Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, UK, CB10 1SA. saf@sanger.ac.uk., Wellcome Trust Sanger Institute, Wellcome Genome Campus, Hinxton, United Kingdom., Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SA, UK saf@sanger.ac.uk., Wellcome Sanger Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SA, UK.","Forbes SA, Beare D, Gunasekaran P, Leung K, Bindal N, Boutselakis H, Ding M, Bamford S, Cole C, Ward S, Kok CY, Jia M, De T, Teague JW, Stratton MR, McDermott U, Campbell PJ, Forbes SA, Beare D, Bindal N, Bamford S, Ward S, Cole CG, Jia M, Kok C, Boutselakis H, De T, Sondka Z, Ponting L, Stefancsik R, Harsha B, Tate J, Dawson E, Thompson S, Jubb H, Campbell PJ, Forbes SA, Beare D, Boutselakis H, Bamford S, Bindal N, Tate J, Cole CG, Ward S, Dawson E, Ponting L, Stefancsik R, Harsha B, Kok CY, Jia M, Jubb H, Sondka Z, Thompson S, De T, Campbell PJ, Tate JG, Bamford S, Jubb HC, Sondka Z, Beare DM, Bindal N, Boutselakis H, Cole CG, Creatore C, Dawson E, Fish P, Harsha B, Hathaway C, Jupe SC, Kok CY, Noble K, Ponting L, Ramshaw CC, Rye CE, Speedy HE, Stefancsik R, Thompson SL, Wang S, Ward S, Campbell PJ, Forbes SA",", , , ","Wellcome Trust, Wellcome Trust, , Wellcome Trust, Wellcome Trust, Wellcome Trust",3577.0,United Kingdom +28595571,CottonFGD,0.992367327,CottonFGD,0.992367327,Cotton Functional Genomic Database,0.977265196,1,http://cottonfgd.org,"HTTPConnectionPool(host='cottonfgd.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to cottonfgd.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220704115004/https://cottonfgd.org/,2017-06-08,"Biotechnology Research Institute, Chinese Academy of Agricultural Sciences, Beijing, 100081, China.","Zhu T, Zhu T, Liang C, Meng Z, Sun G, Meng Z, Guo S, Zhang R",,"Ministry of Science and Technology of the People's Republic of China, Ministry of Agriculture of the People's Republic of China, Ministry of Agriculture of the People's Republic of China",71.0,China +24203703,CottonGen,0.994491816,CottonGen,0.994491816,,0,1,http://www.cottongen.org,302,,"(46.7313,-117.1796)",http://web.archive.org/web/20221006054349/https://www.cottongen.org/,2013-11-06,"Department of Horticulture, Washington State University, Pullman, WA 99164-6414, USA, Cotton Incorporated, Cary, NC 27513, USA and Crop Germplasm Research Unit, USDA-ARS-SPARC, College Station, TX 77845, USA.","Yu J, Jung S, Cheng CH, Ficklin SP, Lee T, Zheng P, Jones D, Percy RG, Main D",,,112.0,"United States, United States, United States" +34992626,CottonGVD,0.994645226,CottonGVD,0.994645226,cotton genomic variation database,0.905625567,1,"http://120.78.174.209/, http://db.cngb.org/cottonGVD","200, 301",,"(29.4159,121.3397), (37.3394,-121.8950)","no_wayback, no_wayback",2021-12-21,"Zhengzhou Research Base, State Key Laboratory of Cotton Biology, Zhengzhou University, Zhengzhou, China.","Peng Z, Li H, Sun G, Dai P, Geng X, Wang X, Zhang X, Wang Z, Jia Y, Pan Z, Chen B, Du X, He S",,"Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China",0.0,China +25758743,CottonQTLdb,0.928469539,CottonQTLdb,0.928469539,,0,1,http://www.cottonqtldb.org,302,,"(34.0522,-118.2437)",http://web.archive.org/web/20220618033610/http://www2.cottonqtldb.org/,2015-03-11,"Department of Plant and Environmental Sciences, New Mexico State University, Las Cruces, NM, USA, joesaid@nmsu.edu.","Said JI, Knapka JA, Song M, Zhang J",,,60.0,"Mexico, United States" +32890396,CoV3D,0.995971898,CoV3D,0.995971898,,0,1,http://cov3d.ibbr.umd.edu,302,,"(38.9896,-76.9457)",http://web.archive.org/web/20221017070204/https://cov3d.ibbr.umd.edu/,2021-01-01,"University of Maryland Institute for Bioscience and Biotechnology Research, Rockville, MD 20850, USA.","Gowthaman R, Guest JD, Yin R, Adolf-Bryfogle J, Schief WR, Pierce BG",,"NIGMS NIH HHS, National Institutes of Health",29.0,United States +33068433,CovalentInDB,0.996971965,CovalentInDB,0.996971965,Covalent Inhibitor Database,0.987251094,1,http://cadd.zju.edu.cn/cidb,301,,"(30.2936,120.1614)",http://web.archive.org/web/20220127031349/http://cadd.zju.edu.cn/cidb/,2021-01-01,"Innovation Institute for Artificial Intelligence in Medicine of Zhejiang University, College of Pharmaceutical Sciences, Zhejiang University, Hangzhou 310058, Zhejiang, China.","Du H, Gao J, Weng G, Ding J, Chai X, Pang J, Kang Y, Li D, Cao D, Hou T",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Science & Technology Major Project of China, Zhejiang Provincial Natural Science Foundation, Primary Research and Development Program of Zhejiang Province",3.0,China +33009914,CoVdb,0.996979177,CoVdb,0.996979177,coronavirus database,0.904240698,1,http://covdb.popgenetics.net,302,,"(29.5603,106.5577)",no_wayback,2021-08-01,"School of Life Sciences, Chongqing University, No. 55 Daxuecheng South Rd., Shapingba, Chongqing, 401331, China.","Zhu Z, Meng K, Liu G, Meng G",,"National Natural Science Foundation of HeBei Province, Fundamental Research Funds for the Central Universities, National Key Research and Development Program, National Natural Science Foundation of China",8.0,China +32665542,CoVex,0.995960653,CoVex,0.995960653,,0,1,http://exbio.wzw.tum.de/covex,302,,"(48.1374,11.5755)",http://web.archive.org/web/20220830132149/https://exbio.wzw.tum.de/covex/,2020-07-14,"Chair of Experimental Bioinformatics, TUM School of Life Sciences, Technical University of Munich, München, Germany.","Sadegh S, Matschinske J, Blumenthal DB, Galindez G, Kacprowski T, List M, Nasirigerdeh R, Oubounyt M, Pichlmair A, Rose TD, Salgado-Albarrán M, Späth J, Stukalov A, Wenke NK, Yuan K, Pauling JK, Baumbach J",,"EC | EU Framework Programme for Research and Innovation H2020 | H2020 Priority Societal Challenges | H2020 Health (H2020 Societal Challenges - Health, Demographic Change and Well-being), Villum Fonden",67.0,Germany +34585731,COVIDium,0.977083564,COVIDium,0.977083564,,0,1,http://kraza.in/covidium,406,,,no_wayback,2021-09-29,None,"Satyam R, Yousef M, Qazi S, Bhat AM, Raza K",,,1.0, +34931882,CoxBase,0.994834006,CoxBase,0.994834006,,0,1,http://coxbase.q-gaps.de,302,,"(52.0333,8.5333)",no_wayback,2021-12-21,"University of Würzburggrid.8379.5, Würzburg, Germany.","Fasemore AM, Helbich A, Walter MC, Dandekar T, Vergnaud G, Förstner KU, Frangoulidis D",,"Bundesministerium für Bildung und Forschung, Bundesministerium für Bildung und Forschung (BMBF)",0.0,Germany +"23203868, 25392420, 30462320",COXPRESdb,0.98220855,COXPRESdb,0.98220855,,0,3,http://coxpresdb.jp,301,,"(38.2570,140.8523)",http://web.archive.org/web/20221104065213/https://coxpresdb.jp/,2019-01-01,"Graduate School of Information Sciences, Tohoku University, Sendai 980-8679, Japan., Graduate School of Information Sciences, Tohoku University, 6-3-09, Aramaki-Aza-Aoba, Aoba-ku, Sendai 980-8679, Japan., Graduate School of Information Sciences, Tohoku University, 6-3-09, Aramaki-Aza-Aoba, Aoba-ku, Sendai 980-8679, Japan.","Obayashi T, Okamura Y, Ito S, Tadaka S, Motoike IN, Kinoshita K, Okamura Y, Aoki Y, Obayashi T, Tadaka S, Ito S, Narise T, Kinoshita K, Obayashi T, Kagaya Y, Aoki Y, Tadaka S, Kinoshita K",", , ",", , Japan Agency for Medical Research and Development",178.0,"Japan, Japan, Japan" +21269480,CPASS,0.901464581,CPASS,0.901464581,ite Structures,0.53467082,1,http://cpass.unl.edu,301,,"(40.8000,-96.6670)",http://web.archive.org/web/20220526045019/https://cpass.unl.edu/,2011-01-26,"Department of Chemistry, University of Nebraska-Lincoln, Lincoln, NE 68588-0304 USA. rpowers3@unl.edu.","Powers R, Copeland JC, Stark JL, Caprez A, Guru A, Swanson D",,,7.0,United States +28962356,CPCat,0.98875711,CPCat,0.98875711,Chemical/Product Categories Database,0.958640075,1,http://actor.epa.gov/cpcat,301,,"(36.0512,-78.8577)",http://web.archive.org/web/20210323183712/https://actor.epa.gov/cpcat/,2015-01-02,"U.S. Environmental Protection Agency, National Exposure Research Laboratory, 109 T.W. Alexander Drive, MC E205-02, Research Triangle Park, NC 27709, USA.","Dionisio KL, Frame AM, Goldsmith MR, Wambaugh JF, Liddell A, Cathey T, Smith D, Vail J, Ernstoff AS, Fantke P, Jolliet O, Judson RS",,,38.0,United States +25861964,CPD,0.994626105,CPD,0.994626105,Cellular Phenotype Database,0.898467913,1,http://www.ebi.ac.uk/fg/sym,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20220501192455/https://www.ebi.ac.uk/fg/sym,2015-04-09,"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton CB10 1SD, UK.","Kirsanova C, Brazma A, Rustici G, Sarkans U",,,5.0, +22120664,CPGR,0.99133648,CPGR,0.99133648,Comprehensive Phytopathogen Genomics Resource,0.985549808,1,http://cpgr.plantbiology.msu.edu,"HTTPConnectionPool(host='cpgr.plantbiology.msu.edu', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to cpgr.plantbiology.msu.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210518100646/http://cpgr.plantbiology.msu.edu/,2011-11-26,"Department of Plant Biology, 178 Wilson Lane, Michigan State University, East Lansing, MI 48824, USA.","Hamilton JP, Neeno-Eckwall EC, Adhikari BN, Perna NT, Tisserat N, Leach JE, Lévesque CA, Buell CR",,,14.0,United States +24214993,CPLM,0.995593056,CPLM,0.995593056,Compendium of Protein Lysine Modifications,0.978943653,1,http://cplm.biocuckoo.org,200,,"(40.2338,-111.6585)",http://web.archive.org/web/20221007210246/https://cplm.biocuckoo.org/,2013-11-08,"Department of Biomedical Engineering, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China, Advanced Institute of Translational Medicine, Tongji University, Shanghai 200092, China and State Key Laboratory of Biocontrol, School of Life Sciences, Sun Yat-sen University, Guangzhou, Guangdong 510275, China.","Liu Z, Wang Y, Gao T, Pan Z, Cheng H, Yang Q, Cheng Z, Guo A, Ren J, Xue Y",,,77.0,"China, China, China" +24253304,CR Cistrome,0.879878566,CR Cistrome,0.879878566,,0,1,"http://compbio.tongji.edu.cn/cr, http://cistrome.org/cr","302, 404",,"(31.2222,121.4581), ","http://web.archive.org/web/20171104073350/http://compbio.tongji.edu.cn/cr/, no_wayback",2013-11-18,"Shanghai Key Laboratory of Signaling and Disease Research, School of Life Science and Technology, Tongji University, Shanghai 200092, China and Department of Biostatistics and Computational Biology, Dana-Farber Cancer Institute, Harvard school of Public Health, 450 Brookline Avenue, Boston, MA 02215, USA.","Wang Q, Huang J, Sun H, Liu J, Wang J, Wang Q, Qin Q, Mei S, Zhao C, Yang X, Liu XS, Zhang Y",,,19.0,"China, United States" +29036683,CR2Cancer,0.981912553,CR2Cancer,0.981912553,,0,1,http://cis.hku.hk/CR2Cancer,301,,"(22.2783,114.1747)",http://web.archive.org/web/20220225182736/http://cis.hku.hk/CR2Cancer/,2018-01-01,"School of Biological Sciences, The University of Hong Kong, Hong Kong 999077, China.","Ru B, Sun J, Tong Y, Wong CN, Chandra A, Tang ATS, Chow LKY, Wun WL, Levitskaya Z, Zhang J",,,7.0,"China, Hong Kong, Hong Kong" +31725864,CRAFT,0.874941985,CRAFT,0.874941985,Annotated,0.625489712,1,http://bionlp-corpora.sourceforge.net/CRAFT/index.shtml,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20221016210526/https://bionlp-corpora.sourceforge.net/CRAFT/index.shtml,2017-01-01,"School of Medicine, Department of Pharmacology, University of Colorado Anschutz Medical Campus, 12801 E. 17th Ave., P.O. Box 6511, MS 8303, Aurora, CO 80045-0511, USA.","Bada M, Vasilevsky N, Baumgartner WA, Haendel M, Hunter LE",,"National Institutes of Health, Defense Advanced Research Projects Agency, National Institutes of Health",1.0,United States +26862144,CrAgDb,0.977069199,CrAgDb,0.977069199,haperone repertoire in Archaeal,0.644523211,1,http://14.139.227.92/mkumar/cragdb,"HTTPConnectionPool(host='14.139.227.92', port=80): Max retries exceeded with url: /mkumar/cragdb (Caused by ConnectTimeoutError(, 'Connection to 14.139.227.92 timed out. (connect timeout=5)'))",,,no_wayback,2016-02-08,"Department of Biophysics, University of Delhi South Campus, Benito Juarez Road, New Delhi, 110021, India.","Rani S, Srivastava A, Kumar M, Goel M",,,1.0,India +26450948,CRCDA,0.995928064,CRCDA,0.995928064,resources for cancer NGS data analysis,0.909173157,1,http://bioinfo.au-kbc.org.in/ngs/ngshome.html,301,,"(19.0728,72.8826)",http://web.archive.org/web/20180506055359/http://bioinfo.au-kbc.org.in:80/ngs/ngshome.html,2015-10-08,"AU-KBC Research Centre, MIT Campus of Anna University, Chromepet, Chennai, India.","Thangam M, Gopal RK",,,1.0,India +23019048,CRCgene,0.993980265,CRCgene,0.993980265,,0,1,http://www.chs.med.ed.ac.uk/CRCgene,404,,,no_wayback,2012-09-26,"Centre for Population Health Sciences, University of Edinburgh, UK.","Theodoratou E, Montazeri Z, Hawken S, Allum GC, Gong J, Tait V, Kirac I, Tazari M, Farrington SM, Demarsh A, Zgaga L, Landry D, Benson HE, Read SH, Rudan I, Tenesa A, Dunlop MG, Campbell H, Little J",,"Cancer Research UK, Cancer Research UK, Chief Scientist Office, Medical Research Council, Cancer Research UK, Chief Scientist Office, Cancer Research UK, CIHR, CIHR, Cancer Research UK, Medical Research Council",66.0, +23868908,CREDO,0.997011185,CREDO,0.997011185,,0,1,http://www-cryst.bioc.cam.ac.uk/credo,"HTTPConnectionPool(host='www-cryst.bioc.cam.ac.uk', port=80): Max retries exceeded with url: /credo (Caused by ConnectTimeoutError(, 'Connection to www-cryst.bioc.cam.ac.uk timed out. (connect timeout=5)'))",,,no_wayback,2013-07-18,"Department of Biochemistry, University of Cambridge, 80 Tennis Court Road, CB2 1GA Cambridge, UK. ams214@cam.ac.uk","Schreyer AM, Blundell TL",,"Wellcome Trust, Wellcome Trust",15.0, +34482425,CrePortal,0.977086961,CrePortal,0.977086961,,0,1,http://www.informatics.jax.org/home/recombinase,200,,"(44.3876,-68.2039)",http://web.archive.org/web/20220308160552/http://www.informatics.jax.org/home/recombinase,2021-09-04,"The Jackson Laboratory, Bar Harbor, ME, 04609, USA.","Perry MN, Smith CM, Onda H, Ringwald M, Murray SA, Smith CL",,"national institute of child health and human development, NIH HHS, NICHD NIH HHS, nih office of the director",1.0,United States +26855883,CressInt,0.996792436,CressInt,0.996792436,,0,1,http://cressint.cchmc.org,302,,"(39.1271,-84.5144)",no_wayback,2015-09-01,"Department of Electrical Engineering and Computing Systems, College of Engineering and Applied Sciences, University of Cincinnati, Cincinnati, OH 45221; Center for Autoimmune Genomics and Etiology, Cincinnati Children's Hospital Medical Center, Department of Pediatrics, College of Medicine, University of Cincinnati, Cincinnati, OH 45229.","Chen X, Ernst K, Soman F, Borowczak M, Weirauch MT",,"NIH, NHGRI NIH HHS",2.0, +23668932,CreZoo,0.978289962,CreZoo,0.978289962,,0,1,http://crezoo.crt-dresden.de,302,,"(51.0509,13.7383)",no_wayback,2013-05-13,"Dresden University of Technology, Dresden, Germany.","Jungke P, Hans S, Brand M",,,13.0,Germany +33010154,CRISP-view,0.996309042,CRISP-view,0.996309042,,0,1,http://crispview.weililab.org,200,,"(39.0437,-77.4875)",http://web.archive.org/web/20220520131625/http://crispview.weililab.org/,2021-01-01,"Sanyi Road, Changsha, Hunan Province, People's Republic of China.","Cui Y, Cheng X, Chen Q, Song B, Chiu A, Gao Y, Dawson T, Chao L, Zhang W, Li D, Zeng Z, Yu J, Li Z, Fei T, Peng S, Li W",,"Center of Genetic Medicine Research, Pharmaceutical Research and Manufacturers of America Foundation, W.T. Gill Fellowship",5.0,China +31624845,CRISPRCasdb,0.988769293,CRISPRCasdb,0.988769293,,0,1,http://crisprcas.i2bc.paris-saclay.fr,301,,"(48.6833,2.1333)",http://web.archive.org/web/20220819043045/https://crisprcas.i2bc.paris-saclay.fr/,2020-01-01,"Institute for Integrative Biology of the Cell (I2BC), CEA, CNRS, Univ. Paris-Sud, Université Paris-Saclay, 91198 Gif-sur-Yvette, France.","Pourcel C, Touchon M, Villeriot N, Vernadet JP, Couvin D, Toffano-Nioche C, Vergnaud G",,"Institut Français de Bioinformatique, CNRS",36.0,France +30285246,CRISPRlnc,0.995125651,CRISPRlnc,0.995125651,,0,1,"http://www.crisprlnc.org, http://crisprlnc.xtbg.ac.cn","200, HTTPConnectionPool(host='crisprlnc.xtbg.ac.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,"(22.2783,114.1747), ","http://web.archive.org/web/20220615180834/http://www.crisprlnc.org/, no_wayback",2019-01-01,"CAS Key Laboratory of Tropical Plant Resources and Sustainable Use, Xishuangbanna Tropical Botanical Garden, Chinese Academy of Sciences, Kunming 650223, China.","Chen W, Zhang G, Li J, Zhang X, Huang S, Xiang S, Hu X, Liu C",,"Developmental Biology of Freshwater Fish, Developmental Biology of Hunan Province, National Natural Science Foundation of China, National Natural Science Foundation of China, Scientific Research Fund of Hunan Provincial Education Department",13.0,China +33084893,crisprSQL,0.997429252,crisprSQL,0.997429252,,0,1,http://www.crisprsql.com,200,,"(48.1374,11.5755)",http://web.archive.org/web/20220526054225/http://crisprsql.com/,2021-01-01,"Department of Computer Science, University of Oxford, Parks Road, Oxford OX1 3QD, UK.","Störtz F, Minary P",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",4.0, +26438539,CRISPRz,0.997380316,CRISPRz,0.997380316,,0,1,http://research.nhgri.nih.gov/CRISPRz,302,,"(38.9807,-77.1003)",http://web.archive.org/web/20221017001254/https://research.nhgri.nih.gov/crisprz/,2015-10-04,"Translational and Functional Genomics Branch, National Human Genome Research Institute, National Institutes of Health, Bethesda, MD 20892, USA.","Varshney GK, Zhang S, Pei W, Adomako-Ankomah A, Fohtung J, Schaffer K, Carrington B, Maskeri A, Slevin C, Wolfsberg T, Ledin J, Sood R, Burgess SM",,"PHS HHS, Intramural NIH HHS",21.0,United States +30598113,CRlncRNA,0.995219409,CRlncRNA,0.995219409,,0,1,http://crlnc.xtbg.ac.cn,"HTTPConnectionPool(host='crlnc.xtbg.ac.cn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to crlnc.xtbg.ac.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190925123321/http://crlnc.xtbg.ac.cn:80/,2018-12-31,"CAS Key Laboratory of Tropical Plant Resources and Sustainable Use, Xishuangbanna Tropical Botanical Garden, Chinese Academy of Sciences, Menglun, Yunnan, 666303, People's Republic of China.","Wang J, Zhang X, Chen W, Li J, Liu C",,,12.0,China +33529633,CRMarker,0.994199812,CRMarker,0.994199812,,0,1,http://crmarker.hnnu.edu.cn,302,,"(31.2222,121.4581)",http://web.archive.org/web/20210725215649/http://crmarker.hnnu.edu.cn/,2021-01-30,"School of Biological Engineering, Huainan Normal University, Huainan 232001, PR China; Department of Biostatistics and Computational Biology, School of Life Sciences, Fudan University, Shanghai 200436, PR China; Key Laboratory of Industrial Dust Prevention and Control & Occupational Health and Safety, Ministry of Education, Huainan, PR China; Anhui Shanhe Pharmaceutical Excipients Co., Ltd., Huainan, PR China. Electronic address: jifengzhang@fudan.edu.cn.","Zhang J, Yan S, Li R, Wang G, Kang S, Wang Y, Hou W, Wang C, Tian W",,,1.0,"China, China, China, China" +26602695,CRN,0.987320423,CRN,0.987320423,Cancer RNA-Seq Nexus,0.908770829,1,http://syslab4.nchu.edu.tw/CRN,"HTTPConnectionPool(host='syslab4.nchu.edu.tw', port=80): Max retries exceeded with url: /CRN (Caused by ConnectTimeoutError(, 'Connection to syslab4.nchu.edu.tw timed out. (connect timeout=5)'))",,,no_wayback,2015-11-23,"Institute of Genomics and Bioinformatics, National Chung Hsing University, Taichung 402, Taiwan PhD Program in Medical Biotechnology National Chung Hsing University, Taichung 402, Taiwan.","Li JR, Sun CH, Li W, Chao RF, Huang CC, Zhou XJ, Liu CC",,"NIGMS NIH HHS, PHS HHS, NHLBI NIH HHS, NHLBI NIH HHS",66.0, +30967897,croFGD,0.99771744,croFGD,0.99771744,Catharanthus roseus Functional Genomics Database,0.82261859,1,http://bioinformatics.cau.edu.cn/croFGD,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220802122721/http://bioinformatics.cau.edu.cn/croFGD/,2019-03-22,"State Key Laboratory of Plant Physiology and Biochemistry, College of Biological Sciences, China Agricultural University, Beijing, China.","She J, Yan H, Yang J, Xu W, Su Z",,,5.0,"China, China" +26556651,cropPAL,0.991583467,cropPAL,0.991583467,crop Proteins with Annotated Locations,0.932682865,1,http://crop-PAL.org,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20221027204933/https://crop-pal.org/,2015-11-09,"ARC Centre of Excellence in Plant Energy Biology, The University of Western Australia, Crawley, WA 6009, Australia cornelia.hooper@uwa.edu.au harvey.millar@uwa.edu.au.","Hooper CM, Castleden IR, Aryamanesh N, Jacoby RP, Millar AH",,,21.0,"Australia, Australia" +30548723,CropSNPdb,0.997403085,CropSNPdb,0.997403085,,0,1,http://snpdb.appliedbioinformatics.com.au,"HTTPConnectionPool(host='snpdb.appliedbioinformatics.com.au', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to snpdb.appliedbioinformatics.com.au timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220517064906/http://snpdb.appliedbioinformatics.com.au/,2019-01-28,"School of Biological Sciences and Institute of Agriculture, The University of Western Australia, Perth, WA, 6009, Australia.","Scheben A, Verpaalen B, Lawley CT, Chan CK, Bayer PE, Batley J, Edwards D",,"Australian Research Council, Australian Research Council, Australian Research Council, Australian Research Council",5.0,"Australia, Australia" +28724888,CrossCheck,0.990276933,CrossCheck,0.990276933,,0,1,http://proteinguru.com/crosscheck,302,,"(33.4484,-112.0740)",no_wayback,2017-07-19,"Department of Computer Engineering, Faculty of Engineering, Gazi University, Ankara, Turkey.","Najafov J, Najafov A",,,2.0,Turkey +23396301,CrossTope,0.991143823,CrossTope,0.991143823,,0,1,http://www.crosstope.com.br,308,,"(34.0183,-117.8546)",http://web.archive.org/web/20220816065324/https://www.crosstope.com.br/,2013-02-08,"NBLI - Núcleo de Bioinformática do Laboratório de Imunogenética, Department of Genetics, Universidade Federal do Rio Grande do Sul, 9500 Bento Gonçalves Avenue, Bldg 43323, Rm 225, 91501-970 Porto Alegre, RS, Brazil.","Sinigaglia M, Antunes DA, Rigo MM, Chies JA, Vieira GF",,,10.0,Brazil +34927675,CRPMKB,0.995024562,CRPMKB,0.995024562,cancer risk prediction model,0.801454693,1,http://www.sysbio.org.cn/CRPMKB,301,,"(22.2783,114.1747)",no_wayback,2021-12-20,"Institutes for Systems Genetics, Frontiers Science Center for Disease-related Molecular Network, West China Hospital, Sichuan University, Chengdu, Sichuan, 610212, China.","Ren S, Jin Y, Chen Y, Shen B",,"Sichuan and Guangxi Provinces, National Natural Science Foundation of China, National Natural Science Foundation of China",0.0,"China, China" +29178828,CrusTF,0.992500782,CrusTF,0.992500782,,0,1,http://qinlab.sls.cuhk.edu.hk/CrusTF,200,,"(22.2783,114.1747)",no_wayback,2017-11-25,"Simon F. S. Li Marine Science Laboratory, School of Life Sciences, The Chinese University of Hong Kong, Shatin, New Territories, Hong Kong, China. qinjing@cuhk.edu.hk.","Qin J, Hu Y, Ma KY, Jiang X, Ho CH, Tsang LM, Yi L, Leung RWT, Chu KH",,"National Natural Science Foundation of China, Direct Grant for Research from The Chinese University of Hong Kong, Natural Science Foundation of Guangdong Province, National Natural Science Foundation of China, Collaborative Research Fund of the Research Grants Council",1.0,"China, Hong Kong, Hong Kong" +32928113,CrustyBase,0.661278129,CrustyBase,0.661278129,,0,1,http://crustybase.org,301,,"(-27.4679,153.0281)",http://web.archive.org/web/20221020075228/https://crustybase.org/,2020-09-14,"Genecology Research Centre, University of the Sunshine Coast, Sippy Downs, Queensland, 4556, Australia. chyde@crustybase.org.","Hyde CJ, Fitzgibbon QP, Elizur A, Smith GG, Ventura T",,"Australian Research Council, Australian Research Council, Australian Research Council",2.0,Australia +31452162,CryptoDB,0.997344255,CryptoDB,0.997344255,,0,1,"http://cryptodb.org/, http://eupathdb.org","301, 301",,"(33.9609,-83.3779), (33.9609,-83.3779)","http://web.archive.org/web/20030805114943/http://cryptodb.org:80/, http://web.archive.org/web/20221020084858/http://www.eupathdb.org/",2020-01-01,"Center for Tropical and Emerging Global Diseases, University of Georgia, Athens, GA, USA.","Warrenfeltz S, Kissinger JC, ",,,1.0,"Georgia, United States" +25264971,CS-DEGs,0.965403174,CS-DEGs,0.965403174,,0,1,http://cs.psych.ac.cn,400,,,http://web.archive.org/web/20190531041607/http://cs.psych.ac.cn:80/,2014-10-01,"From the Key Laboratory of Mental Health (L.G., Y.D., S.C., W.Z., J.W.), Institute of Psychology, Chinese Academy of Sciences, Beijing, China; and University of Chinese Academy of Sciences (Y.D., W.Z.), Beijing, China.","Guo L, Du Y, Chang S, Zhang W, Wang J",,,1.0,"China, China" +24319146,CSA,0.969528695,CSA,0.969528695,Catalytic Site Atlas,0.905872226,1,http://www.ebi.ac.uk/thornton-srv/databases/CSA,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20221005170915/https://www.ebi.ac.uk/thornton-srv/databases/CSA/,2013-12-06,"European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK and Department of Biochemistry and Molecular Genetics, University of Virginia, 1300 Jefferson Park Ave., Charlottesville, VA 22908, USA.","Furnham N, Holliday GL, de Beer TA, Jacobsen JO, Pearson WR, Thornton JM",,Wellcome Trust,86.0,United States +29036403,CSCD,0.997561216,CSCD,0.997561216,pecific circRNA database,0.575147778,1,http://gb.whu.edu.cn/CSCD,301,,"(31.2222,121.4581)",no_wayback,2018-01-01,"School of Basic Medical Sciences, Wuhan University, Wuhan 430071, Hubei, China.","Xia S, Feng J, Chen K, Ma Y, Gong J, Cai F, Jin Y, Gao Y, Xia L, Chang H, Wei L, Han L, He C",,,154.0,China +26989154,CSCdb,0.995143592,CSCdb,0.995143592,CSCs database,0.68326959,1,http://bioinformatics.ustc.edu.cn/cscdb,"HTTPConnectionPool(host='bioinformatics.ustc.edu.cn', port=80): Max retries exceeded with url: /cscdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-03-17,School of Information Science and Technology.,"Shen Y, Yao H, Li A, Wang M",,,9.0, +28191780,CSCTT,0.99582231,CSCTT,0.99582231,Cancer Stem Cells Therapeutic Target Database,0.993961447,1,http://www.csctt.org,"HTTPConnectionPool(host='www.csctt.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-10-11,"Center for Molecular Medicine, School of Life Science and Biotechnology, Dalian University of Technology, Dalian, People's Republic of China.","Hu X, Cong Y, Luo HH, Wu S, Zhao LE, Liu Q, Yang Y",,,5.0,China +33211888,CSEA-DB,0.997746289,CSEA-DB,0.997746289,Cell type-Specific Enrichment Analysis DataBase,0.910774702,1,http://bioinfo.uth.edu/CSEADB,302,,"(29.7633,-95.3633)",no_wayback,2021-01-01,"Center for Precision Health, School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Dai Y, Hu R, Manuel AM, Liu A, Jia P, Zhao Z",,"National Institutes of Health, Data Science and Informatics Core for Cancer Research, Cancer Prevention and Research Institute of Texas, Data Science and Informatics Core for Cancer Research, NLM NIH HHS",3.0,United States +"25038066, 31432427",CSF-PR,0.993137106,CSF-PR,0.993137106,Cerebrospinal Fluid Proteome Resource,0.987415892,2,http://probe.uib.no/csf-pr,302,,"(60.3032,5.2855)",no_wayback,2019-01-01,"From the ‡Proteomics Unit (PROBE), Department of Biomedicine, University of Bergen, Bergen, Norway; §KG Jebsen Centre for Multiple Sclerosis Research, Department of Clinical Medicine, University of Bergen, Bergen, Norway;, Proteomics Unit, Department of Biomedicine, University of Bergen, Bergen, Norway.","Guldbrandsen A, Vethe H, Farag Y, Oveland E, Garberg H, Berle M, Myhr KM, Opsahl JA, Barsnes H, Berven FS, Guldbrandsen A, Farag YM, Lereim RR, Berven FS, Barsnes H",", ",", ",59.0,"Norway, Norway, Norway" +28704505,CSmiRTar,0.990449607,CSmiRTar,0.990449607,Condition-Specific miRNA Targets,0.896995284,1,"http://cosbi.ee.ncku.edu.tw/CSmiRTar/, http://cosbi4.ee.ncku.edu.tw/CSmiRTar","301, 404",,"(22.9908,120.2133), ","http://web.archive.org/web/20191209115359/http://cosbi.ee.ncku.edu.tw:80/CSmiRTar/, http://web.archive.org/web/20220615163253/http://cosbi4.ee.ncku.edu.tw/CSmiRTar/",2017-07-13,"Department of Electrical Engineering, National Cheng Kung University, Tainan, Taiwan.","Wu WS, Tu BW, Chen TT, Hou SW, Tseng JT",,"Ministry of Science and Technology, Taiwan",8.0, +32990755,CSVS,0.992008924,CSVS,0.992008924,Collaborative Spanish Variability Server,0.858899653,1,http://csvs.babelomics.org,200,,"(37.8916,-4.7728)",http://web.archive.org/web/20220616074949/http://csvs.babelomics.org/,2021-01-01,"Clinical Bioinformatics Area, Fundación Progreso y Salud (FPS), Hospital Virgen del Rocío, Sevilla 41013, Spain.","Peña-Chilet M, Roldán G, Perez-Florido J, Ortuño FM, Carmona R, Aquino V, Lopez-Lopez D, Loucera C, Fernandez-Rueda JL, Gallego A, García-Garcia F, González-Neira A, Pita G, Núñez-Torres R, Santoyo-López J, Ayuso C, Minguez P, Avila-Fernandez A, Corton M, Moreno-Pelayo MÁ, Morin M, Gallego-Martinez A, Lopez-Escamez JA, Borrego S, Antiñolo G, Amigo J, Salgado-Garrido J, Pasalodos-Sanchez S, Morte B, , Carracedo Á, Alonso Á, Dopazo J",,"Regional Government of Madrid, Ministry of Economy and Competitiveness, Regional Government of Madrid, Ministry of Economy and Competitiveness, Ministry of Economy and Competitiveness, Ministry of Economy and Competitiveness, Ministry of Economy and Competitiveness, European Regional Development Fund, Ministry of Economy and Competitiveness",11.0,Spain +23193294,CTCFBSDB,0.996337473,CTCFBSDB,0.996337473,,0,1,http://insulatordb.uthsc.edu,302,,"(35.1495,-90.0490)",http://web.archive.org/web/20221009190449/https://insulatordb.uthsc.edu/,2012-11-27,"Department of Microbiology, University of Tennessee Health Science Center, Memphis, TN 38163, USA.","Ziebarth JD, Bhattacharya A, Cui Y",,,85.0,United States +32294193,ctcRbase,0.997189045,ctcRbase,0.997189045,,0,1,http://www.origin-gene.cn/database/ctcRbase,301,,"(36.0649,120.3804)",no_wayback,2020-01-01,"Shanghai Key Laboratory of Regulatory Biology, Institute of Biomedical Sciences, School of Life Sciences, East China Normal University, No.500 Dongchuan Road, Shanghai, 200241 China.","Zhao L, Wu X, Li T, Luo J, Dong D",,"Technology and Education, Invigorating Health Care through Science, Jiangsu Provincial Key Medical Discipline",3.0,"China, China" +21214365,DBCAT,0.998600185,DBCAT,0.998600185,database of CpG,0.804415733,1,http://dbcat.cgm.ntu.edu.tw,200,,"(25.0478,121.5319)",http://web.archive.org/web/20220326195244/http://dbcat.cgm.ntu.edu.tw/,2011-01-08,"Research Center For Medical Excellence, National Taiwan University, Taipei, Taiwan.","Kuo HC, Lin PY, Chung TC, Chao CM, Lai LC, Tsai MH, Chuang EY",,,17.0, +21249531,DIADEM,0.989351451,DIADEM,0.989351451,DIgital reconstruction of Axonal and DEndritic Morphology,0.851490708,1,http://diademchallenge.org,301,,"(39.0570,-77.4441)",http://web.archive.org/web/20200627150549/http://diademchallenge.org:80/,2011-09-01,"Krasnow Institute for Advanced Study, George Mason University, Fairfax, VA, USA.","Brown KM, Barrionuevo G, Canty AJ, De Paola V, Hirsch JA, Jefferis GS, Lu J, Snippe M, Sugihara I, Ascoli GA",,"Medical Research Council, Medical Research Council, NINDS NIH HHS, PHS HHS",41.0,United States +21738316,CytReD,0.998036659,CytReD,0.998036659,Cytokine Receptor Database,0.98959893,1,http://www.cro-m.eu/CytReD,404,,,http://web.archive.org/web/20110814064149/http://www.cro-m.eu:80/CytReD/,2011-05-26,None,"Miele M, Sharma A, Capone F, Raucci R, Guerriero E, Colonna G, Castello G, Stasio MD, Costantini S",,,2.0, +21781283,DbMDR,0.997138441,DbMDR,0.997138441,,0,1,http://203.190.147.116/dbmdr,"HTTPConnectionPool(host='203.190.147.116', port=80): Max retries exceeded with url: /dbmdr (Caused by ConnectTimeoutError(, 'Connection to 203.190.147.116 timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140429114127/http://203.190.147.116:80/dbmdr/,2011-09-06,"Biotechnology Division, Central Institute of Medicinal and Aromatic Plants, Council of Scientific and Industrial Research, Lucknow, UP, India.","Gupta S, Mishra M, Sen N, Parihar R, Dwivedi GR, Khan F, Sharma A",,,0.0,India +21782820,DFRMLI,0.90959398,DFRMLI,0.90959398,Dana-Farber Repository for,0.82518174,1,http://bio.dfci.harvard.edu/DFRMLI,"HTTPConnectionPool(host='bio.dfci.harvard.edu', port=80): Max retries exceeded with url: /DFRMLI (Caused by ConnectTimeoutError(, 'Connection to bio.dfci.harvard.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20150917084743/http://bio.dfci.harvard.edu:80/DFRMLI/,2011-07-18,"Cancer Vaccine Center, Dana-Farber Cancer Institute, Boston, MA 02115, USA.","Zhang GL, Lin HH, Keskin DB, Reinherz EL, Brusic V",,"NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS",14.0,United States +21936021,dbHCCvar,0.986464977,dbHCCvar,0.986464977,,0,1,http://GenetMed.fudan.edu.cn/dbHCCvar,"HTTPConnectionPool(host='genetmed.fudan.edu.cn', port=80): Max retries exceeded with url: /dbHCCvar (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2011-09-20,"State Key Laboratory of Genetic Engineering, Institute of Genetics, School of Life Sciences, Fudan University, Shanghai, People's Republic of China.","Yu XJ, Fang F, Tang CL, Yao L, Yu L, Yu L",,,9.0,China +21938213,DDTRP,0.977842283,DDTRP,0.977842283,Database of Drug Targets for Resistant Pathogens,0.933010811,1,http://bmi.icmr.org.in/DDTRP,301,,"(29.9657,76.8370)",no_wayback,2011-09-06,"ICMR-Biomedical Informatics Centre, Tuberculosis Research Centre (ICMR), Chetpet, Chennai-600031, Tamil Nadu, India.","Sundaramurthi JC, Ramanandan P, Brindha S, Subhasree CR, Prasad A, Kumaraswami V, Hanna LE",,,2.0,India +22058129,DistiLD,0.8471573,DistiLD,0.8471573,,0,1,http://distild.jensenlab.org,200,,"(55.6759,12.5655)",http://web.archive.org/web/20220529104206/http://distild.jensenlab.org/,2011-11-03,"Novo Nordisk Foundation Center for Protein Research, Faculty of Health Sciences, University of Copenhagen, Copenhagen, Denmark.","Pallejà A, Horn H, Eliasson S, Jensen LJ",,Novo Nordisk Foundation Center for Protein Research,17.0,Denmark +22079417,DACS-DB,0.99001509,DACS-DB,0.99001509,disease associated cytokine SNP database,0.952232748,1,http://www.iupui.edu,301,,"(39.1653,-86.5264)",http://web.archive.org/web/20221109220800/https://www.iupui.edu/,2011-11-10,"School of Informatics, Indiana University-Purdue University Indianapolis, Indianapolis, IN 46202, USA. sbhushan@iupui.edu","Bhushan S, Perumal NB",,,6.0,United States +22080506,DIGIT,0.994396985,DIGIT,0.994396985,Database of ImmunoGlobulins with Integrated Tools,0.9503698,1,http://biocomputing.it/digit,301,,"(41.6398,13.3411)",no_wayback,2011-11-10,"Department of Physics, Sapienza University of Rome, P.le A Moro 5-00185, Italy.","Chailyan A, Tramontano A, Marcatili P",,,26.0,Italy +22086958,DBTSS,0.997758806,DBTSS,0.997758806,DataBase of Transcriptional Start Sites,0.971812087,1,http://dbtss.hgc.jp,301,,"(35.6895,139.6917)",http://web.archive.org/web/20221017022147/https://dbtss.hgc.jp/,2011-11-15,"Frontier Research Initiative, Institute of Medical Science, Human Genome Center, Institute of Medical Science, The University of Tokyo, 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.","Yamashita R, Sugano S, Suzuki Y, Nakai K",,,29.0,Japan +22096234,dbDEPC,0.980938792,dbDEPC,0.980938792,,0,1,http://lifecenter.sgst.cn/dbdepc/index.do,"HTTPConnectionPool(host='lifecenter.sgst.cn', port=80): Max retries exceeded with url: /dbdepc/index.do (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20171206175426/http://lifecenter.sgst.cn:80/dbdepc/index.do,2011-11-16,"Key Laboratory of Systems Biology, Chinese Academy of Sciences, Shanghai 200031, PR of China.","He Y, Zhang M, Ju Y, Yu Z, Lv D, Sun H, Yuan W, He F, Zhang J, Li H, Li J, Wang-Sattler R, Li Y, Zhang G, Xie L",,,15.0,China +22102573,DBETH,0.990900735,DBETH,0.990900735,Database for Bacterial ExoToxins,0.92325345,1,http://www.hpppi.iicb.res.in/btox,301,,"(22.5439,88.3067)",no_wayback,2011-11-18,"Department of Structural Biology and Bioinformatics Division, Indian Institute of Chemical Biology, Council for Scientific and Industrial Research, Jadavpur University, Kolkata, WB 700 032, India.","Chakraborty A, Ghosh S, Chowdhary G, Maulik U, Chakrabarti S",,,27.0,India +22110027,DNAtraffic,0.997660697,DNAtraffic,0.997660697,,0,1,http://dnatraffic.ibb.waw.pl,"HTTPConnectionPool(host='dnatraffic.ibb.waw.pl', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to dnatraffic.ibb.waw.pl timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20171104225149/http://dnatraffic.ibb.waw.pl/,2011-11-22,"Department of Molecular Biology, Institute of Biochemistry and Biophysics, Polish Academy of Sciences, Warsaw, Poland.","Kuchta K, Barszcz D, Grzesiuk E, Pomorski P, Krwawicz J",,,1.0,Poland +22110032,DAMPD,0.989307284,DAMPD,0.989307284,Dragon Antimicrobial Peptide Database,0.977057718,1,http://apps.sanbi.ac.za/dampd,"HTTPConnectionPool(host='apps.sanbi.ac.za', port=80): Max retries exceeded with url: /dampd (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180629180233/http://apps.sanbi.ac.za:80/dampd/,2011-11-21,"South African National Bioinformatics Institute, The University of the Western Cape, 7535 Bellville, South Africa.","Seshadri Sundararajan V, Gabere MN, Pretorius A, Adam S, Christoffels A, Lehväslaiho M, Archer JA, Bajic VB",,,44.0,South Africa +"22135302, 27899673",DiseaseMeth,0.993306458,DiseaseMeth,0.993306458,human disease methylation database,0.805280375,2,http://bioinfo.hrbmu.edu.cn/diseasemeth,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /diseasemeth (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200127055221/http://bioinfo.hrbmu.edu.cn:80/diseasemeth/,2016-11-29,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China., College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Lv J, Liu H, Su J, Wu X, Liu H, Li B, Xiao X, Wang F, Wu Q, Zhang Y, Xiong Y, Wei Y, Gu Y, Zhang S, Lyu J, Zhang B, Chen C, Zhu J, Wang Y, Liu H, Zhang Y",", ",", ",137.0,"China, China" +"22135305, 26827237",DOMMINO,0.927519023,DOMMINO,0.927519023,,0,2,http://dommino.org,"HTTPConnectionPool(host='dommino.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-01-30,"Informatics Institute and Department of Computer Science and Bond Life Science Center, University of Missouri, Columbia, MO 65211, USA., Informatics Institute, University of Missouri, Columbia, MO, USA.","Kuang X, Han JG, Zhao N, Pang B, Shyu CR, Korkin D, Kuang X, Dhroso A, Han JG, Shyu CR, Korkin D",", ",", ",14.0,"United States, United States" +22139934,Cube-DB,0.995983998,Cube-DB,0.995983998,,0,1,http://epsf.bmad.bii.a-star.edu.sg/cube/db/html/home.html,"HTTPConnectionPool(host='epsf.bmad.bii.a-star.edu.sg', port=80): Max retries exceeded with url: /cube/db/html/home.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190702051451/http://epsf.bmad.bii.a-star.edu.sg:80/cube/db/html/home.html,2011-12-01,"Bioinformatics Institute 30 Biopolis Street, #07-01 Matrix, Singapore 138671.","Zhang ZH, Bharatham K, Chee SM, Mihalek I",,,3.0,Singapore +22150118,dbANGIO,0.997428656,dbANGIO,0.997428656,,0,1,http://www.med.mun.ca/angio,301,,"(47.5649,-52.7093)",http://web.archive.org/web/20220805001332/https://www.med.mun.ca/angio/,2011-12-12,"Discipline of Genetics, Faculty of Medicine, Memorial University of Newfoundland, St. John's, Canada. savas@mun.ca",Savas S,,,6.0,Canada +22332784,DiatomCyc,0.997156441,DiatomCyc,0.997156441,,0,1,http://www.diatomcyc.org,"HTTPConnectionPool(host='www.diatomcyc.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180820100850/http://www.diatomcyc.org/,2012-03-31,"Department of Plant Systems Biology, VIB, B-9052 Gent, Belgium.","Fabris M, Matthijs M, Rombauts S, Vyverman W, Goossens A, Baart GJ",,,66.0,Belgium +22369658,DetoxiProt,0.993119836,DetoxiProt,0.993119836,,0,1,http://lifecenter.sgst.cn/detoxiprot,"HTTPConnectionPool(host='lifecenter.sgst.cn', port=80): Max retries exceeded with url: /detoxiprot (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20161015204343/http://lifecenter.sgst.cn:80/detoxiprot/,2011-11-30,"School of Life Science, Fudan University, HanDan Road 220#, Shanghai, 200433, China.","Yang Z, Yu Y, Yao L, Li G, Wang L, Hu Y, Wei H, Wang L, Hammami R, Razavi R, Zhong Y, Liang X",,,2.0,China +22467915,DeOri,0.925356305,DeOri,0.925356305,Database of Eukaryotic ORIs,0.895951286,1,http://tubic.tju.edu.cn/deori,301,,"(39.1422,117.1767)",http://web.archive.org/web/20220620073234/http://tubic.tju.edu.cn/deori/,2012-03-30,"Department of Physics, Tianjin University, Tianjin, China.","Gao F, Luo H, Zhang CT",,,14.0,China +22493695,DFL,0.954812447,DFL,0.954812447,Fish Library,0.858391941,1,http://www.digitalfishlibrary.org,"HTTPConnectionPool(host='www.digitalfishlibrary.org', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='www.digitalfishlibrary.org', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20221107052919/http://digitalfishlibrary.org/,2012-04-06,"Center for Scientific Computation in Imaging, University of California San Diego, La Jolla, California, United States of America.","Berquist RM, Gledhill KM, Peterson MW, Doan AH, Baxter GT, Yopak KE, Kang N, Walker HJ, Hastings PA, Frank LR",,,16.0,United States +22539672,e-Drug3D,0.993923992,e-Drug3D,0.993923992,,0,1,http://chemoinfo.ipmc.cnrs.fr/e-drug3d.html,301,,"(43.7031,7.2661)",http://web.archive.org/web/20190921203057/http://chemoinfo.ipmc.cnrs.fr:80/e-drug3d.html,2012-04-26,"Institut de Pharmacologie Moléculaire et Cellulaire, CNRS UMR7275, Université Nice-Sophia Antipolis, 660 route des lucioles, Valbonne, France.","Pihan E, Colliandre L, Guichou JF, Douguet D",,,47.0,France +"22782549, 25399423",dbSNO,0.99643296,dbSNO,0.99643296,,0,2,http://dbSNO.mbc.nctu.edu.tw,"HTTPConnectionPool(host='dbsno.mbc.nctu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220418021838/http://dbsno.mbc.nctu.edu.tw/,2014-11-15,"Department of Computer Science and Engineering, Yuan Ze University, Taoyuan 320, Taiwan. francis@saturn.yu.edu.tw, Institute of Chemistry, Academia Sinica, Taipei 115, Taiwan.","Lee TY, Chen YJ, Lu CT, Ching WC, Teng YC, Huang HD, Chen YJ, Chen YJ, Lu CT, Su MG, Huang KY, Ching WC, Yang HH, Liao YC, Chen YJ, Lee TY",", ",", ",90.0, +22829745,CyanoEXpress,0.994646311,CyanoEXpress,0.994646311,,0,1,http://cyanoexpress.sysbiolab.eu,503,,,http://web.archive.org/web/20220615190318/http://www.cyanoexpress.sysbiolab.eu/,2012-07-06,"Institute for Biotechnology and Bioengineering (Laboratório Associado), Centre for Molecular and Structural Biomedicine, University of Algarve, Campus de Gambelas, 8005-139 Faro, Portugal.","Hernandez-Prieto MA, Futschik ME",,,30.0,Portugal +22843230,CYP-nsSNP,0.985920737,CYP-nsSNP,0.985920737,,0,1,http://cypdatabase.sjtu.edu.cn,"HTTPConnectionPool(host='cypdatabase.sjtu.edu.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20130703074726/http://cypdatabase.sjtu.edu.cn,2012-06-01,"Key Laboratory of Microbial Metabolism, Ministry of Education, Luc Montagnier Biomedical Research Institute, School of Life Sciences and Biotechnology, Shanghai Jiao Tong University, Shanghai, 200240, China.","Zhang T, Zhou Q, Pang Y, Wang Y, Jin C, Huo J, Liu LA, Wei D",,,0.0,China +22917656,dbDiarrhea,0.997005939,dbDiarrhea,0.997005939,,0,1,http://www.juit.ac.in/attachments/dbdiarrhea/diarrhea_home.html,"HTTPConnectionPool(host='www.juit.ac.in', port=80): Max retries exceeded with url: /attachments/dbdiarrhea/diarrhea_home.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20221017143040/https://www.juit.ac.in/attachments/dbdiarrhea/diarrhea_home.html,2012-08-13,"Department of Biotechnology & Bioinformatics, Jaypee University of Information Technology, Waknaghat, Solan, India. jayashree_ramana@yahoo.co.in","Ramana J, Tamanna",,,9.0,India +"22962312, 24150940",DECIPHER,0.995207131,DECIPHER,0.995207131,,0,2,http://decipher.sanger.ac.uk,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20210319112948/https://decipher.sanger.ac.uk/,2013-10-22,"The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SA, UK., Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK and Cambridge University Department of Medical Genetics, Addenbrooke's Hospital, Cambridge CB2 2QQ, UK.","Swaminathan GJ, Bragin E, Chatzimichali EA, Corpas M, Bevan AP, Wright CF, Carter NP, Hurles ME, Firth HV, Bragin E, Chatzimichali EA, Wright CF, Hurles ME, Firth HV, Bevan AP, Swaminathan GJ",", ","Wellcome Trust, Wellcome Trust",143.0, +23074185,DARNED,0.997423947,DARNED,0.997423947,DAtabase of RNa,0.902838162,1,http://darned.ucc.ie,302,,"(51.8980,-8.4706)",http://web.archive.org/web/20220823010900/https://darned.ucc.ie/,2012-10-15,"Biochemistry Department, University College Cork, Cork, Ireland.","Kiran AM, O'Mahony JJ, Sanjeev K, Baranov PV",,Wellcome Trust,58.0,Ireland +23161684,dcGO,0.997635027,dcGO,0.997635027,,0,1,http://supfam.org/SUPERFAMILY/dcGO,301,,"(52.2000,0.1167)",http://web.archive.org/web/20220804175656/https://supfam.org/SUPERFAMILY/dcGO/,2012-11-17,"Department of Computer Science, University of Bristol, The Merchant Venturers Building, Bristol BS8 1UB, UK. hfang@cs.bris.ac.uk","Fang H, Gough J",,Biotechnology and Biological Sciences Research Council,55.0, +"23172289, 23494302",dictyBase,0.994000256,dictyBase,0.994000256,,0,2,http://dictybase.org,200,,"(42.0411,-87.6901)",http://web.archive.org/web/20220710050915/http://dictybase.org/,2013-01-01,"Biomedical Informatics Center and Center for Genetic Medicine, Northwestern Univesity, Feinberg School of Medicine, 750 North Lake Shore Drive, Chicago, IL 60611, USA., dictyBase and the Dicty Stock Center, Center for Genetic Medicine, Northwestern University, Chicago, IL, USA. pfey@northwestern.edu","Basu S, Fey P, Pandit Y, Dodson R, Kibbe WA, Chisholm RL, Fey P, Dodson RJ, Basu S, Chisholm RL",", ","NHGRI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCATS NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",136.0,"United States, United States" +23175607,CyanoLyase,0.991719246,CyanoLyase,0.991719246,,0,1,http://cyanolyase.genouest.org,200,,"(48.1120,-1.6743)",no_wayback,2012-11-21,"GenOuest Platform, France.","Bretaudeau A, Coste F, Humily F, Garczarek L, Le Corguillé G, Six C, Ratin M, Collin O, Schluchter WM, Partensky F",,,18.0,France +23185043,DoBISCUIT,0.998316765,DoBISCUIT,0.998316765,Database of BIoSynthesis clusters,0.979332394,1,http://www.bio.nite.go.jp/pks,302,,"(35.6895,139.6917)",http://web.archive.org/web/20190220235823/http://www.bio.nite.go.jp:80/pks/,2012-11-26,"Biological Resource Center, National Institute of Technology and Evaluation (NBRC), 2-49-10 Nishihara, Shibuya-ku, Tokyo 151-0006, Japan.","Ichikawa N, Sasagawa M, Yamamoto M, Komaki H, Yoshida Y, Yamazaki S, Fujita N",,,47.0,Japan +23185330,CyanoPhyChe,0.996099055,CyanoPhyChe,0.996099055,,0,1,http://bif.uohyd.ac.in/cpc,301,,"(17.3840,78.4564)",http://web.archive.org/web/20191231183521/http://bif.uohyd.ac.in:80/cpc/,2012-11-21,"Department of Plant Sciences, School of Life Sciences, University of Hyderabad, Hyderabad, Andhra Pradesh, India.","Arun PV, Bakku RK, Subhashini M, Singh P, Prabhu NP, Suzuki I, Prakash JS",,,4.0,India +"23193290, 26578568, 30418626",dbPTM,0.998039007,dbPTM,0.998039007,,0,3,http://dbPTM.mbc.nctu.edu.tw,"HTTPConnectionPool(host='dbptm.mbc.nctu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200716131448/http://dbptm.mbc.nctu.edu.tw:80/,2019-01-01,"Department of Computer Science and Engineering, Yuan Ze University, Chung-Li 320, Taiwan., Department of Computer Science and Engineering, Yuan Ze University, Taoyuan 320, Taiwan., Warshel Institute for Computational Biology, The Chinese University of Hong Kong, Shenzhen 518172, China.","Lu CT, Huang KY, Su MG, Lee TY, Bretaña NA, Chang WC, Chen YJ, Chen YJ, Huang HD, Huang KY, Su MG, Kao HJ, Hsieh YC, Jhong JH, Cheng KH, Huang HD, Lee TY, Huang KY, Lee TY, Kao HJ, Ma CT, Lee CC, Lin TH, Chang WC, Huang HD",", , ",", , Chinese University of Hong Kong",242.0,"China, Hong Kong" +23193291,DGVa,0.929166198,DGVa,0.929166198,,0,1,"http://www.ebi.ac.uk/dgva, http://www.ncbi.nlm.nih.gov/dbvar","301, 301",,"(51.5085,-0.1257), (38.9896,-77.1538)","http://web.archive.org/web/20220909004141/https://www.ebi.ac.uk/dgva/, http://web.archive.org/web/20221110045534/https://www.ncbi.nlm.nih.gov/dbvar/",2012-11-27,"European Bioinformatics Institute, Hinxton, CB10 1SD Cambridgeshire, UK.","Lappalainen I, Lopez J, Skipper L, Hefferon T, Spalding JD, Garner J, Chen C, Maguire M, Corbett M, Zhou G, Paschall J, Ananiev V, Flicek P, Church DM",,"Intramural NIH HHS, Wellcome Trust",125.0, +23197658,DGA,0.992823601,DGA,0.992823601,Disease and Gene Annotations database,0.900466466,1,http://dga.nubic.northwestern.edu,"HTTPConnectionPool(host='dga.nubic.northwestern.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20221028174325/http://dga.nubic.northwestern.edu/,2012-11-28,"The Department of Electronics and Information Engineering, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China.","Peng K, Xu W, Zheng J, Huang K, Wang H, Tong J, Lin Z, Liu J, Cheng W, Fu D, Du P, Kibbe WA, Lin SM, Xia T",,"NCI NIH HHS, NCRR NIH HHS, NCRR NIH HHS, NCATS NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCRR NIH HHS, NCRR NIH HHS, NCI NIH HHS, NCRR NIH HHS, NCATS NIH HHS, NCRR NIH HHS",30.0,China +23203878,D(2)P(2,0.978753158,D(2)P(2,0.978753158,Database of Disordered Protein Prediction,0.95127369,1,http://d2p2.pro,301,,"(52.2000,0.1167)",http://web.archive.org/web/20221017003137/https://d2p2.pro/,2012-11-29,"Department of Computer Science, University of Bristol, Bristol BS8 1UB, UK. Matt.Oates@bristol.ac.uk","Oates ME, Romero P, Ishida T, Ghalwash M, Mizianty MJ, Xue B, Dosztányi Z, Uversky VN, Obradovic Z, Kurgan L, Dunker AK, Gough J",,Biotechnology and Biological Sciences Research Council,265.0, +23264352,DegraBase,0.968228161,DegraBase,0.968228161,,0,1,http://wellslab.ucsf.edu/degrabase,"HTTPConnectionPool(host='wellslab.ucsf.edu', port=80): Max retries exceeded with url: /degrabase (Caused by ConnectTimeoutError(, 'Connection to wellslab.ucsf.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220812145105/https://wellslab.ucsf.edu/degrabase/,2012-12-20,"Department of Pharmaceutical Chemistry, University of California-San Francisco, CA 94158, USA.","Crawford ED, Seaman JE, Agard N, Hsu GW, Julien O, Mahrus S, Nguyen H, Shimbo K, Yoshihara HA, Zhuang M, Chalkley RJ, Wells JA",,"NCRR NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NCRR NIH HHS, NCRR NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",66.0,United States +23275696,DB Dehydrogenase,0.939671206,DB Dehydrogenase,0.939671206,,0,1,http://www.bifku.in/DBD,"HTTPConnectionPool(host='www.bifku.in', port=80): Max retries exceeded with url: /DBD (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20150821185741/http://www.bifku.in/DBD/,2012-10-13,"Department of Biochemistry & Biophysics, University of Kalyani, Kalyani, Dt. - Nadia, West Bengal, India.","Nandy SK, Bhuyan R, Seal A",,,1.0,India +23415072,DESMSCI,0.997198522,DESMSCI,0.997198522,Dragon Exploration System on Marine Sponge Compounds Interactions,0.903089372,1,http://www.cbrc.kaust.edu.sa/desmsci,302,,"(37.5331,-122.2486)",http://web.archive.org/web/20220127103354/https://www.cbrc.kaust.edu.sa/desmsci/,2013-02-16,"King Abdullah University of Science and Technology (KAUST), Computational Bioscience Research center, Thuwal, 23955-6900, Saudi Arabia. vladimir.bajic@kaust.edu.sa.","Sagar S, Kaur M, Radovanovic A, Bajic VB",,,11.0,Saudi Arabia +23475683,CYP-allele,0.778351414,CYP-allele,0.778351414,50,0.744195342,1,http://www.cypalleles.ki.se,302,,"(59.3294,18.0687)",http://web.archive.org/web/20220423214848/http://www.cypalleles.ki.se/,2013-01-01,"Section for Pharmacogenetics, Department of Physiology and Pharmacology, Karolinska Institute, Stockholm, Sweden.","Sim SC, Ingelman-Sundberg M",,,48.0,Sweden +23500449,DEER,0.993173659,DEER,0.993173659,,0,1,http://bsb.kiz.ac.cn:90/DEER,"HTTPConnectionPool(host='bsb.kiz.ac.cn', port=90): Max retries exceeded with url: /DEER (Caused by ConnectTimeoutError(, 'Connection to bsb.kiz.ac.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160316055008/http://bsb.kiz.ac.cn:90/DEER/,2013-03-13,"State Key Laboratory of Genetic Resources and Evolution, Kunming Institute of Zoology, Chinese Academy of Sciences, Kunming 650223, China. yuqi@mail.kiz.ac.cn","Yu Q, Huang JF",,,3.0,China +23516335,DEBDOM,0.996568859,DEBDOM,0.996568859,Database Exploring Banana Diversity of,0.850851814,1,http://ibsd.gov.in/debdom,302,,"(24.8081,93.9442)",http://web.archive.org/web/20191231034830/http://ibsd.gov.in:80/debdom/,2013-03-02,Medicinal Plants and Horticultural Resources Division.,"Singh WA, Gopalrao SB, Gourshyam T, Handique PJ, Devi HS",,,0.0, +23529715,Database of Instruments for Resource Use Measurement,0.910336412,DIRUM,0.710099598,Database of Instruments for Resource Use Measurement,0.910336412,1,http://www.dirum.org,200,,"(53.4106,-2.9779)",http://web.archive.org/web/20221102122518/https://www.dirum.org/,2013-06-01,"MRC ConDuCT Hub, School of Social and Community Medicine, University of Bristol, Canynge Hall, 39 Whatley Road, Bristol, BS8 2PS, UK. joanna.thorn@bristol.ac.uk","Thorn JC, Coast J, Cohen D, Hollingworth W, Knapp M, Noble SM, Ridyard C, Wordsworth S, Hughes D",,"Medical Research Council, Medical Research Council, Economic and Social Research Council, Medical Research Council",46.0, +23550061,curatedOvarianData,0.78992039,curatedOvarianData,0.78992039,,0,1,http://bcb.dfci.harvard.edu/ovariancancer,"HTTPConnectionPool(host='bcb.dfci.harvard.edu', port=80): Max retries exceeded with url: /ovariancancer (Caused by ConnectTimeoutError(, 'Connection to bcb.dfci.harvard.edu timed out. (connect timeout=5)'))",,,no_wayback,2013-04-02,"Department of Biostatistics and Computational Biology, Dana-Farber Cancer Institute, Boston, MA 02115, USA.","Ganzfried BF, Riester M, Haibe-Kains B, Risch T, Tyekucheva S, Jazic I, Wang XV, Ahmadifar M, Birrer MJ, Parmigiani G, Huttenhower C, Waldron L",,NCI NIH HHS,75.0,United States +23842462,DBATE,0.993781567,DBATE,0.993781567,DataBase of Alternative Transcripts Expression,0.966058254,1,http://bioinformatica.uniroma2.it/DBATE,301,,"(41.8661,12.5896)",http://web.archive.org/web/20220616054144/http://bioinformatica.uniroma2.it/DBATE/,2013-07-09,"Centre for Molecular Bioinformatics, Department of Biology, University of Rome Tor Vergata, Via della Ricerca Scientifica s.n.c., Rome 00133, Italy.","Bianchi V, Colantoni A, Calderone A, Ausiello G, Ferrè F, Helmer-Citterich M",,,6.0,Italy +23846594,DoSA,0.991789222,DoSA,0.991789222,Database of Structural Alignments,0.871132361,1,http://bo-protscience.fr/dosa,"HTTPConnectionPool(host='bo-protscience.fr', port=80): Max retries exceeded with url: /dosa (Caused by ConnectTimeoutError(, 'Connection to bo-protscience.fr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140722194020/http://bo-protscience.fr/dosa/,2013-07-11,"Dynamique des Structures et Interactions des Macromolécules Biologiques, UMR-S INSERM S665, Faculté des Sciences et Technologies, Université de La Réunion, F-97715 Saint Denis Messag Cedex 09, La Réunion, France.","Mahajan S, Agarwal G, Iftekhar M, Offmann B, de Brevern AG, Srinivasan N",,,0.0,France +23897986,EADB,0.976858467,EADB,0.976858467,Estrogenic Activity Database,0.930413914,1,http://www.fda.gov/ScienceResearch/BioinformaticsTools/EstrogenicActivityDatabaseEADB/default.htm,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20190423094136/https://www.fda.gov/ScienceResearch/BioinformaticsTools/EstrogenicActivityDatabaseEADB/default.htm,2013-07-28,"* Division of Bioinformatics and Biostatistics, National Center for Toxicological Research, U.S. Food and Drug Administration, Jefferson, Arkansas 72079;","Shen J, Xu L, Fang H, Richard AM, Bray JD, Judson RS, Zhou G, Colatsky TJ, Aungst JL, Teng C, Harris SC, Ge W, Dai SY, Su Z, Jacobs AC, Harrouk W, Perkins R, Tong W, Hong H",,,33.0, +23973272,Diseasecard,0.896775365,Diseasecard,0.896775365,,0,1,http://bioinformatics.ua.pt/diseasecard,302,,"(40.6443,-8.6455)",http://web.archive.org/web/20220620133305/https://bioinformatics.ua.pt/diseasecard/,2013-08-21,"DETI/IEETA, Universidade de Aveiro, Portugal. Electronic address: pedrolopes@ua.pt.","Lopes P, Oliveira JL",,,8.0,Portugal +24122041,DGIdb,0.997908175,DGIdb,0.997908175,Drug-Gene Interaction database,0.983872821,1,http://dgidb.org,"HTTPConnectionPool(host='dgidb.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to dgidb.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221103233157/https://www.dgidb.org/,2013-10-13,"1] The Genome Institute, Washington University School of Medicine, St. Louis, Missouri, USA. [2] Department of Genetics, Washington University School of Medicine, St. Louis, Missouri, USA. [3].","Griffith M, Griffith OL, Coffman AC, Weible JV, McMichael JF, Spies NC, Koval J, Das I, Callaway MB, Eldred JM, Miller CA, Subramanian J, Govindan R, Kumar RD, Bose R, Ding L, Walker JR, Larson DE, Dooling DJ, Smith SM, Ley TJ, Mardis ER, Wilson RK",,"NCI NIH HHS, NHGRI NIH HHS",246.0,"United States, United States" +24194603,dbPSHP,0.997607529,dbPSHP,0.997607529,,0,1,http://jjwanglab.org/dbpshp,"HTTPConnectionPool(host='jjwanglab.org', port=80): Max retries exceeded with url: /dbpshp (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200122190847/http://jjwanglab.org:80/dbpshp,2013-11-04,"Department of Biochemistry, LKS Faculty of Medicine, The University of Hong Kong, Hong Kong SAR, China, Shenzhen Institute of Research and Innovation, The University of Hong Kong, Shenzhen, Guangdong 518057, China, Department of Anaesthesiology, LKS Faculty of Medicine, The University of Hong Kong, Hong Kong SAR, China, Department of Pathology, LKS Faculty of Medicine, The University of Hong Kong, Hong Kong SAR, China, Department of Psychiatry, LKS Faculty of Medicine, The University of Hong Kong, Hong Kong SAR, China, State Key Laboratory in Cognitive and Brain Sciences, The University of Hong Kong, Hong Kong SAR, China and Centre for Genomic Sciences, LKS Faculty of Medicine, The University of Hong Kong, Hong Kong SAR, China.","Li MJ, Wang LY, Xia Z, Wong MP, Sham PC, Wang J",,,28.0,"China, China, China, China, China, China, China, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong" +24203711,DrugBank,0.995137691,DrugBank,0.995137691,,0,1,http://www.drugbank.ca,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20200917172220/https://www.drugbank.ca/,2013-11-06,"Department of Computing Science, University of Alberta, Edmonton, AB, Canada T6G 2E8, Department Biological Sciences, University of Alberta, Edmonton, AB, Canada T6G 2E8, Faculty of Pharmacy and Pharmaceutical Sciences, University of Alberta, Edmonton, AB, Canada T6G 2N8 and National Institute for Nanotechnology, 11421 Saskatchewan Drive, Edmonton, AB, Canada T6G 2M9.","Law V, Knox C, Djoumbou Y, Jewison T, Guo AC, Liu Y, Maciejewski A, Arndt D, Wilson M, Neveu V, Tang A, Gabriel G, Ly C, Adamjee S, Dame ZT, Han B, Zhou Y, Wishart DS",,Canadian Institutes of Health Research,914.0,"Canada, Canada, Canada, Canada" +"24214964, 26635391, 30542988, 31701128",DriverDB,0.995986342,DriverDB,0.995986342,,0,4,http://ngs.ym.edu.tw/driverdb,301,,"(25.0478,121.5319)",http://web.archive.org/web/20220327082921/http://ngs.ym.edu.tw/driverdb/,2020-01-01,"Pediatric Neurosurgery, Department of Surgery, Cheng Hsin General Hospital, Taipei 11220, Taiwan, VGH-YM Genomic Research Center, National Yang-Ming University, Taipei 11221, Taiwan, Institute of Biomedical Informatics, National Yang-Ming University, Taipei 11221, Taiwan, Information Technology Office, Taipei Veterans General Hospital, Taipei 11217, Taiwan, Institute of Microbiology and Immunology, National Yang-Ming University, Taipei 11221, Taiwan and Department of Education and Research, Taipei City Hospital, Taipei 10341, Taiwan., Institute of Biomedical Informatics, National Yang-Ming University, Taipei 11221, Taiwan Center for Systems and Synthetic Biology, National Yang-Ming University, Taipei, 11221, Taiwan., Graduate Institute of Biomedical Sciences, China Medical University, Taichung, Taiwan., Graduate Institute of Biomedical Science, China Medical University, Taichung 40403, Taiwan.","Cheng WC, Chung IF, Chen CY, Sun HJ, Fen JJ, Tang WC, Chang TY, Wong TT, Wang HW, Chung IF, Chen CY, Su SC, Li CY, Wu KJ, Wang HW, Cheng WC, Liu SH, Cheng WC, Liu SH, Shen PC, Chen CY, Hsu AN, Cho YC, Lai YL, Chen FH, Li CY, Wang SC, Chen M, Chung IF, Cheng WC",", , , ",", , , Ministry of Science and Technology, Ministry of Science and Technology, China Medical University, Ministry of Science and Technology",141.0,"China, China" +24214966,DOOR,0.986369848,DOOR,0.986369848,,0,1,http://csbl.bmb.uga.edu/DOOR,301,,"(33.9609,-83.3779)",http://web.archive.org/web/20190416053419/http://csbl.bmb.uga.edu:80/DOOR/,2013-11-07,"Computational Systems Biology Laboratory, Department of Biochemistry and Molecular Biology, and Institute of Bioinformatics, University of Georgia, Athens, GA 30602, USA, BioEnergy Science Center (BESC), Oak Ridge National Laboratory, Oak Ridge, Tennessee 37831, USA, School of Mathematics, Shandong University, Jinan, Shandong 250100, China, College of Computer Science and Technology, Jilin University, Changchun, Jilin 130012, China and College of Computer Science, Central China Normal University, Wuhan, Hubei 430079, China.","Mao X, Ma Q, Zhou C, Chen X, Zhang H, Yang J, Mao F, Lai W, Xu Y",,,101.0,"China, China, China, China, Georgia, United States, United States" +24225319,DNASU,0.963655591,DNASU,0.963655591,,0,1,"http://dnasu.asu.edu, http://dnasu.org","301, 301",,"(33.4148,-111.9093), (33.4148,-111.9093)","no_wayback, http://web.archive.org/web/20130601025445/http://dnasu.org/",2013-11-12,"Virginia G. Piper Center for Personalized Diagnostics, Biodesign Institute, Arizona State University, 1001 S. McAllister Dr. Tempe, AZ 85287-6401, USA and LabGenius, 20-22 Bedford Row, London, WC1R 4JS, UK.","Seiler CY, Park JG, Sharma A, Hunter P, Surapaneni P, Sedillo C, Field J, Algar R, Price A, Steel J, Throop A, Fiacco M, LaBaer J",,NIGMS NIH HHS,107.0,United States +24297256,dbGaP,0.997724652,dbGaP,0.997724652,Database of Genotypes and Phenotypes,0.917498708,1,http://www.ncbi.nlm.nih.gov/gap,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221111124902/https://www.ncbi.nlm.nih.gov/gap/,2013-12-01,"Information Engineering Branch, National Center for Biotechnology Information, Bethesda, MD 20894, USA.","Tryka KA, Hao L, Sturcke A, Jin Y, Wang ZY, Ziyabari L, Lee M, Popova N, Sharopova N, Kimura M, Feolo M",,Intramural NIH HHS,213.0,United States +24302579,DPRP,0.99437356,DPRP,0.99437356,,0,1,http://syslab.nchu.edu.tw/DPRP,"HTTPConnectionPool(host='syslab.nchu.edu.tw', port=80): Max retries exceeded with url: /DPRP (Caused by ConnectTimeoutError(, 'Connection to syslab.nchu.edu.tw timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20191220134842/http://syslab.nchu.edu.tw:80/DPRP/,2013-12-02,"Institute of Genomics and Bioinformatics, National Chung Hsing University, Taichung 402, Taiwan, Department of Computer Science and Engineering, National Chung Hsing University, Taichung 402, Taiwan, Department of Genetics, Geisel School of Medicine at Dartmouth, Hanover, NH, USA, Agricultural Biotechnology Center, National Chung Hsing University, Taichung 402, Taiwan, Institute for Quantitative Biomedical Sciences, Geisel School of Medicine at Dartmouth, Lebanon, NH, USA and Norris Cotton Cancer Center, Geisel School of Medicine at Dartmouth, Lebanon, NH, USA.","Tzeng DT, Tseng YT, Ung M, Liao IE, Liu CC, Cheng C",,,1.0,"Lebanon, Lebanon, United States, United States, United States" +24307774,DIACAN,0.994966656,DIACAN,0.994966656,Antidiabetic and Anticancer Medicinal Plants Database,0.987637103,1,http://www.kaubic.in/diacan,302,,"(33.7490,-84.3880)",http://web.archive.org/web/20220617020559/http://www.kaubic.in/diacan,2013-11-11,"Bioinformatics Centre (DIC), College of Horticulture, Kerala Agricultural University, KAU (PO), Vellanikkara, Kerala, India, 680656.","James P, Mathai VA, Shajikumar S, Pereppadan PA, Sudha P, Keshavachandran R, Nazeem PA",,,1.0,India +24344970,CVDHD,0.995064616,CVDHD,0.995064616,cardiovascular disease herbal database,0.954253152,1,http://pkuxxj.pku.edu.cn/CVDHD,"HTTPConnectionPool(host='pkuxxj.pku.edu.cn', port=80): Max retries exceeded with url: /CVDHD (Caused by ConnectTimeoutError(, 'Connection to pkuxxj.pku.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160428100209/http://pkuxxj.pku.edu.cn:80/CVDHD/,2013-12-18,None,"Gu J, Gui Y, Chen L, Yuan G, Xu X",,,13.0, +24434032,DBM-DB,0.997007683,DBM-DB,0.997007683,diamondback moth Genome Database,0.987979064,1,http://iae.fafu.edu.cn/DBM,"HTTPConnectionPool(host='iae.fafu.edu.cn', port=80): Max retries exceeded with url: /DBM (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20201127015801/http://iae.fafu.edu.cn/DBM/,2014-01-16,"Institute of Applied Ecology, Fujian Agriculture and Forestry University, Fuzhou 350002, China, Faculty of Life Sciences, Fujian Agriculture and Forestry University, Fuzhou 350002, China, Key Laboratory of Integrated Pest Management for Fujian-Taiwan Crops, Ministry of Agriculture, Fuzhou 350002, China, School of Molecular and Biomedical Science, The University of Adelaide, Adelaide SA 5005, Australia and Department of Botany, University of British Columbia, 3529-6270 University Boulevard, Vancouver, BC V6T 1Z4, Canada.","Tang W, Yu L, He W, Yang G, Ke F, Baxter SW, You S, Douglas CJ, You M",,,34.0,"Australia, Canada, China, China, China" +24548788,DR-GAS,0.991618946,DR-GAS,0.991618946,,0,1,http://www.bioinfoindia.org/drgas,301,,"(45.5088,-73.5878)",http://web.archive.org/web/20220325225633/http://www.bioinfoindia.org/drgas/,2014-02-16,"Department of Biotechnology and Bioinformatics, Jaypee University of Information Technology (JUIT), Waknaghat, Solan, HP 173234, India. Electronic address: manika.sehgal22@gmail.com.","Sehgal M, Singh TR",,Department of Science and Technology (DST),3.0,India +24618344,Drug2Gene,0.990610043,Drug2Gene,0.990610043,,0,1,http://www.drug2gene.com,"HTTPConnectionPool(host='www.drug2gene.com', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.drug2gene.com timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200226091122/http://www.drug2gene.com:80/,2014-03-11,None,"Roider HG, Pavlova N, Kirov I, Slavov S, Slavov T, Uzunov Z, Weiss B",,,16.0, +24647629,DBatVir,0.997140765,DBatVir,0.997140765,database of bat-associated viruses,0.885839264,1,http://www.mgc.ac.cn/DBatVir,301,,"(39.9075,116.3972)",http://web.archive.org/web/20221016222956/http://mgc.ac.cn/DBatVir/,2014-03-18,"MOH Key Laboratory of Systems Biology of Pathogens, Institute of Pathogen Biology, Chinese Academy of Medical Sciences & Peking Union Medical College, Beijing, China.","Chen L, Liu B, Yang J, Jin Q",,,55.0,China +24663501,DrugPath,0.983626783,DrugPath,0.983626783,,0,1,"http://www.drugpath.org, http://mimi.ncibi.org","301, 200",,"(40.8043,-74.0121), (42.2776,-83.7409)","http://web.archive.org/web/20201204234527/http://drugpath.org/, http://web.archive.org/web/20220615142402/http://mimi.ncibi.org/",2014-03-25,"Cancer Center and Departments of Cell Biology and Biochemistry, Pediatrics, and Internal Medicine, School of Medicine, Texas Tech University Health Sciences Center, 3601 4th St, STOP 9445, Lubbock, TX, 79430, USA.","Shah ED, Fisch BM, Arceci RJ, Buckley JD, Reaman GH, Sorensen PH, Triche TJ, Reynolds CP",,,0.0,United States +24700709,DBDB,0.992844542,DBDB,0.992844542,Developmental Brain Disorders Database,0.949120533,1,http://www.dbdb.urmc.rochester.edu/home,"HTTPConnectionPool(host='www.dbdb.urmc.rochester.edu', port=80): Max retries exceeded with url: /home (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190321223006/https://www.dbdb.urmc.rochester.edu/home,2014-04-03,"Center for Integrative Brain Research, Seattle Children's Research Institute, Seattle, Washington; Division of Genetic Medicine, Department of Pediatrics, University of Washington, Seattle, Washington.","Mirzaa GM, Millen KJ, Barkovich AJ, Dobyns WB, Paciorkowski AR",,"NINDS NIH HHS, NINDS NIH HHS, NINDS NIH HHS, NINDS NIH HHS, NINDS NIH HHS, NINDS NIH HHS, NINDS NIH HHS, NINDS NIH HHS, NINDS NIH HHS",9.0, +24790154,dbGSH,0.994318962,dbGSH,0.994318962,,0,1,http://csb.cse.yzu.edu.tw/dbGSH,404,,,http://web.archive.org/web/20200220160743/http://csb.cse.yzu.edu.tw:80/dbgsh/,2014-04-29,"Institute of Chemistry, Academia Sinica, Taipei 115, Taiwan and Department of Computer Science and Engineering, Yuan Ze University, Taoyuan 320, Taiwan.","Chen YJ, Lu CT, Lee TY, Chen YJ",,,30.0, +24918550,dbCerEx,0.99592483,dbCerEx,0.99592483,,0,1,http://128.135.207.10/dbCerEx,"HTTPConnectionPool(host='128.135.207.10', port=80): Max retries exceeded with url: /dbCerEx (Caused by ConnectTimeoutError(, 'Connection to 128.135.207.10 timed out. (connect timeout=5)'))",,,no_wayback,2014-06-11,"Hubei Maternal and Child Health Hospital, Wuhan, Hubei, P.R. China.","Zhou L, Zheng W, Luo M, Feng J, Jin Z, Wang Y, Zhang D, Tang Q, He Y",,,0.0,China +25002814,DOR,0.974434396,DOR,0.974434396,Database of Olfactory Receptors,0.88843143,1,http://caps.ncbs.res.in/DOR,301,,"(13.0637,77.5674)",http://web.archive.org/web/20210920210419/http://caps.ncbs.res.in/DOR/,2014-06-12,"National Center for Biological Sciences (TIFR), Bangalore, India.","Nagarathnam B, Karpe SD, Harini K, Sankar K, Iftekhar M, Rajesh D, Giji S, Archunan G, Balakrishnan V, Gromiha MM, Nemoto W, Fukui K, Sowdhamini R",,,11.0,India +25228099,DruGeVar,0.996403813,DruGeVar,0.996403813,,0,1,http://drugevar.genomicmedicinealliance.org,"HTTPConnectionPool(host='drugevar.genomicmedicinealliance.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-09-09,"Department of Pharmacy, School of Health Sciences, University of Patras, Patras, Greece.","Dalabira E, Viennas E, Daki E, Komianou A, Bartsakoulia M, Poulas K, Katsila T, Tzimas G, Patrinos GP",,,7.0,Greece +25232097,DAA,0.846934438,DAA,0.846934438,Ageing Atlas,0.621505678,1,http://ageing-map.org,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20221007172024/https://ageing-map.org/,2014-09-17,"Integrative Genomics of Ageing Group, Institute of Integrative Biology, University of Liverpool, Liverpool, UK.","Craig T, Smelick C, Tacutu R, Wuttke D, Wood SH, Stanley H, Janssens G, Savitskaya E, Moskalev A, Arking R, de Magalhães JP",,Biotechnology and Biological Sciences Research Council,30.0, +25278960,DaVIE,0.972131968,DaVIE,0.972131968,,0,1,http://echelon.cmmt.ubc.ca/dbaccess,"HTTPConnectionPool(host='echelon.cmmt.ubc.ca', port=80): Max retries exceeded with url: /dbaccess (Caused by ConnectTimeoutError(, 'Connection to echelon.cmmt.ubc.ca timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20141009033455/http://echelon.cmmt.ubc.ca:80/dbaccess/,2014-09-18,"Centre for Molecular Medicine and Therapeutics, Child and Family Research Institute, University of British Columbia Vancouver, BC, Canada.","Fejes AP, Jones MJ, Kobor MS",,,4.0,Canada +25326239,DEOP,0.958839059,DEOP,0.958839059,ragon Explorer of Osmoprotection associated Pathways,0.914404387,1,http://www.cbrc.kaust.edu.sa/deop,302,,"(37.5331,-122.2486)",http://web.archive.org/web/20220320004737/https://www.cbrc.kaust.edu.sa/deop/,2014-10-17,"King Abdullah University of Science and Technology (KAUST); Computational Bioscience Research Centre (CBRC); Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE); Thuwal, Jeddah, 23955-6900, Saudi Arabia.","Bougouffa S, Radovanovic A, Essack M, Bajic VB",,,10.0,Saudi Arabia +25332398,DEPOD,0.997329473,DEPOD,0.997329473,human DEPhOsphorylation Database,0.98109927,1,"http://www.depod.org, http://www.koehn.embl.de/depod","405, 302",,", (49.4095,8.6935)","no_wayback, http://web.archive.org/web/20160527061427/http://www.koehn.embl.de/depod/",2014-10-20,"European Molecular Biology Laboratory, Genome Biology Unit, Meyerhofstrasse 1, 69117 Heidelberg, Germany.","Duan G, Li X, Köhn M",,,39.0,Germany +25336621,DBTMEE,0.997862458,DBTMEE,0.997862458,,0,1,http://dbtmee.hgc.jp,301,,"(35.6895,139.6917)",http://web.archive.org/web/20221017083821/https://dbtmee.hgc.jp/,2014-10-21,"Human Genome Center, The Institute of Medical Science, The University of Tokyo, 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.","Park SJ, Shirahige K, Ohsugi M, Nakai K",,,32.0,Japan +25378341,diArk,0.992710948,diArk,0.992710948,,0,1,http://www.diark.org,301,,"(51.5344,9.9323)",http://web.archive.org/web/20081120164550/http://www.diark.org/,2014-11-06,"Group Systems Biology of Motor Proteins, Department of NMR-based Structural Biology, Max-Planck-Institute for Biophysical Chemistry, Göttingen, 37085, Germany mako@nmr.mpibpc.mpg.de.","Kollmar M, Kollmar L, Hammesfahr B, Simm D",,,3.0,Germany +25398897,DDMGD,0.992112517,DDMGD,0.992112517,,0,1,http://www.cbrc.kaust.edu.sa/ddmgd,302,,"(37.5331,-122.2486)",http://web.archive.org/web/20220320005537/https://www.cbrc.kaust.edu.sa/ddmgd/,2014-11-14,"Computational Bioscience Research Center (CBRC), Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE), King Abdullah University of Science and Technology (KAUST), Thuwal, Saudi Arabia.","Bin Raies A, Mansour H, Incitti R, Bajic VB",,,8.0,Saudi Arabia +25404132,DoGSD,0.996389866,DoGSD,0.996389866,,0,1,http://dogsd.big.ac.cn,302,,"(39.9075,116.3972)",http://web.archive.org/web/20180905123314/http://dogsd.big.ac.cn:80/,2014-11-17,"Laboratory for Conservation and Utilization of Bioresource & Key Laboratory for Microbial Resources of the Ministry of Education, Yunnan University, Kunming 650091, China State Key Laboratory of Genetic Resources and Evolution, and Yunnan Laboratory of Molecular Biology of Domestic Animals, Kunming Institute of Zoology, Chinese Academy of Sciences, Kunming 650223, China Kunming College of Life Science, University of Chinese Academy of Sciences,Kunming 650204, China.","Bai B, Zhao WM, Tang BX, Wang YQ, Wang L, Zhang Z, Yang HC, Liu YH, Zhu JW, Irwin DM, Wang GD, Zhang YP",,,57.0,"China, China, China" +25414358,DB-AT,0.903057555,DB-AT,0.903057555,of Apicomplexa,0.682551831,1,http://fullmal.hgc.jp,301,,"(35.6895,139.6917)",http://web.archive.org/web/20221011123441/https://fullmal.hgc.jp/,2014-11-20,"University of Münster, Faculty of Medicine, Institute of Bioinformatics, Niels-Stensen Strasse 14, 48149 Münster, Germany.","Jąkalski M, Wakaguri H, Kischka TG, Nishikawa Y, Kawazu S, Matsubayashi M, Kawahara F, Tsuji N, Cao S, Sunaga F, Xuan X, Okubo K, Igarashi I, Tuda J, Mongan AE, Eshita Y, Maeda R, Makałowski W, Suzuki Y, Yamagishi J",,,3.0,Germany +25416797,doRiNA,0.983651519,doRiNA,0.983651519,,0,1,http://dorina.mdc-berlin.de,301,,"(52.5244,13.4105)",http://web.archive.org/web/20221019174718/https://dorina.mdc-berlin.de/,2014-11-21,"Computational RNA Biology Group, Max Planck Institute for Biology of Ageing, 50931 Cologne, Germany.","Blin K, Dieterich C, Wurmus R, Rajewsky N, Landthaler M, Akalin A",,,67.0,Germany +25435547,Database,0.979652524,DB,0.85708645,Database,0.979652524,1,http://www.plantenergy.uwa.edu.au/applications/mpic,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20180313133227/http://www.plantenergy.uwa.edu.au/applications/mpic/,2014-11-29,"Australian Research Council Centre of Excellence in Plant Energy Biology, Bayliss Building M316, University of Western Australia, 35 Stirling Highway, Crawley 6009, Western Australia, Australia monika.murcha@uwa.edu.au.","Murcha MW, Narsai R, Devenish J, Kubiszewski-Jakubiak S, Whelan J",,,14.0,"Australia, Australia, Australia" +25474259,DBBP,0.99723047,DBBP,0.99723047,DataBase of Binding Pairs in protein-nucleic acid interactions,0.836951999,1,http://bclab.inha.ac.kr/dbbp,200,,"(37.4410,126.6661)",http://web.archive.org/web/20220526134506/http://bclab.inha.ac.kr/dbbp/,2014-12-03,None,"Park B, Kim H, Han K",,,2.0, +25484339,DISEASES,0.994236827,DISEASES,0.994236827,,0,1,http://diseases.jensenlab.org,302,,"(55.6759,12.5655)",no_wayback,2014-12-05,"Department of Disease Systems Biology, Novo Nordisk Foundation Center for Protein Research, Faculty of Health and Medical Sciences, University of Copenhagen, Copenhagen, Denmark.","Pletscher-Frankild S, Pallejà A, Tsafou K, Binder JX, Jensen LJ",,"European Union’s Seventh Framework Programme, Novo Nordisk Foundation Center for Protein Research, Novo Nordisk Foundation Center for Protein Research",191.0,Denmark +25505093,diXa,0.938735694,diXa,0.938735694,Infrastructure for Chemical Safety Assessment,0.744968057,1,http://www.dixa-fp7.eu,301,,"(51.1981,6.6850)",http://web.archive.org/web/20191108213115/http://www.dixa-fp7.eu:80/,2014-12-12,"Department of Toxicogenomics, School of Oncology and Developmental Biology (GROW), Maastricht University, 6200 MD Maastricht, The Netherlands, Dana-Farber Cancer Institute, Brigham and Women's Hospital, Harvard Medical School, Boston, 02215, MA, USA, European Molecular Biology Laboratory - European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambs CB10 1SD, UK, Computational and Systems Medicine, Department of Surgery and Cancer, Imperial College London, South Kensington, London SW7 2AZ, UK, Department of Bioinformatics - BiGCaT, Maastricht University, 6200 MD Maastricht, The Netherlands, Genedata AG, CH-4053 Basel, Switzerland, Department of Vertebrate Genomics, Max Planck Institute for Molecular Genetics, 14195 Berlin, Germany, Center of Physiology and Pathophysiology, Institute of Neurophysiology, University of Cologne, Cologne 50931, Germany and European Commission, Joint Research Centre, 21027 Ispra VA, Italy.","Hendrickx DM, Aerts HJ, Caiment F, Clark D, Ebbels TM, Evelo CT, Gmuender H, Hebels DG, Herwig R, Hescheler J, Jennen DG, Jetten MJ, Kanterakis S, Keun HC, Matser V, Overington JP, Pilicheva E, Sarkans U, Segura-Lepe MP, Sotiriadou I, Wittenberger T, Wittwehr C, Zanzi A, Kleinjans JC",,"Wellcome Trust, Wellcome Trust",12.0,"Switzerland, Germany, Germany, Italy, Netherlands, Netherlands, United States" +25534750,dbPPT,0.996150672,dbPPT,0.996150672,database of Phosphorylation site in,0.916097972,1,"http://dbppt.biocuckoo.org, http://dbppt.biocuckoo.or","200, HTTPConnectionPool(host='dbppt.biocuckoo.or', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,"(40.2338,-111.6585), ","http://web.archive.org/web/20220815131917/http://dbppt.biocuckoo.org/, no_wayback",2014-12-22,"Department of Biomedical Engineering, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China and State Key Laboratory of Biocontrol, School of Life Sciences, Sun Yat-sen University, Guangzhou, Guangdong 510275, China.","Cheng H, Deng W, Wang Y, Ren J, Liu Z, Xue Y",,,20.0,"China, China" +25632108,CyanOmics,0.996836007,CyanOmics,0.996836007,,0,1,http://lag.ihb.ac.cn/cyanomics,302,,"(39.9075,116.3972)",no_wayback,2015-01-28,"Key Laboratory of Algal Biology, Institute of Hydrobiology, Chinese Academy of Sciences, Wuhan 430072, China, University of Chinese Academy of Sciences, Beijing 100049, China, College of Life Science, Peking University, Beijing 100871, China Key Laboratory of Algal Biology, Institute of Hydrobiology, Chinese Academy of Sciences, Wuhan 430072, China, University of Chinese Academy of Sciences, Beijing 100049, China, College of Life Science, Peking University, Beijing 100871, China.","Yang Y, Feng J, Li T, Ge F, Zhao J",,,3.0,"China, China, China, China, China, China" +"25877637, 27924018, 31680165",DisGeNET,0.997833073,DisGeNET,0.997833073,,0,3,http://www.disgenet.org,301,,"(41.3888,2.1590)",http://web.archive.org/web/20221101140321/https://www.disgenet.org/,2020-01-01,"Research Programme on Biomedical Informatics (GRIB), Hospital del Mar Medical Research Institute (IMIM), Department of Experimental and Health Sciences, Universitat Pompeu Fabra, C/Dr Aiguader 88, E-08003 Barcelona, Spain, Roche Pharma Research and Early Development, pRED Informatics, Roche Innovation Center Penzberg, Roche Diagnostics GmbH, Nonnenwald 2, 82377 Penzberg, Germany and Scientific & Business Information Services, Roche Diagnostics GmbH, Nonnenwald 2, 82377 Penzberg, Germany., Research Programme on Biomedical Informatics (GRIB), Hospital del Mar Medical Research Institute (IMIM), Department of Experimental and Health Sciences (DCEXS), Universitat Pompeu Fabra (UPF), C/Dr Aiguader 88, E-08003 Barcelona, Spain., Research Programme on Biomedical Informatics (GRIB), Hospital del Mar Medical Research Institute (IMIM), Department of Experimental and Health Sciences, Pompeu Fabra University (UPF), Barcelona, Spain.","Piñero J, Queralt-Rosinach N, Bravo À, Deu-Pons J, Bauer-Mehren A, Baron M, Sanz F, Furlong LI, Piñero J, Bravo À, Queralt-Rosinach N, Gutiérrez-Sacristán A, Deu-Pons J, Centeno E, García-García J, Sanz F, Furlong LI, Piñero J, Ramírez-Anguita JM, Saüch-Pitarch J, Ronzano F, Centeno E, Sanz F, Furlong LI",", , ",", , MINECO, ISCIII-FEDER, FEDER, ISCIII-FEDER, ISCIII, ISCIII-FEDER, Research Programme on Biomedical Informatics, EU H2020 Programme, IMI-JU, EU-FP7, Agència de Gestió d’Ajuts Universitaris i de Recerca Generalitat de Catalunya, Spanish National Bioinformatics Institute",1428.0,"Germany, Germany, Spain, Spain, Spain" +25887129,dbVOR,0.990226269,dbVOR,0.990226269,,0,1,"http://watson.hgen.pitt.edu/register, http://watson.hgen.pitt.edu/register/docs/dbvor.html","302, 302",,"(40.4440,-79.9552), (40.4440,-79.9552)","http://web.archive.org/web/20220122055033/https://watson.hgen.pitt.edu/register/, http://web.archive.org/web/20210519021927/https://watson.hgen.pitt.edu/register/docs/dbvor.html",2015-03-18,"Department of Human Genetics, Graduate School of Public Health, University of Pittsburgh, PittsburghPennsylvania, 15261, USA. rvb5@pitt.edu.","Baron RV, Conley YP, Gorin MB, Weeks DE",,"NIGMS NIH HHS, NEI NIH HHS, NEI NIH HHS",1.0,United States +25951377,DeTEXT,0.969740272,DeTEXT,0.969740272,,0,1,http://prir.ustb.edu.cn/DeTEXT,"HTTPConnectionPool(host='prir.ustb.edu.cn', port=80): Max retries exceeded with url: /DeTEXT (Caused by ReadTimeoutError(""HTTPConnectionPool(host='prir.ustb.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20190413144420/http://prir.ustb.edu.cn:80/DeTEXT/,2015-05-07,"Department of Computer Science and Technology, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China.","Yin XC, Yang C, Pei WY, Man H, Zhang J, Learned-Miller E, Yu H",,"NIGMS NIH HHS, NIGMS NIH HHS, NCATS NIH HHS, NCATS NIH HHS",0.0,China +25978092,DBDiaSNP,0.997262299,DBDiaSNP,0.997262299,,0,1,http://www.juit.ac.in/attachments/dbdiasnp,"HTTPConnectionPool(host='www.juit.ac.in', port=80): Max retries exceeded with url: /attachments/dbdiasnp (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20210512044102/https://www.juit.ac.in/attachments/dbdiasnp/,2015-05-15,"Department of Biotechnology and Bioinformatics, Jaypee University of Information Technology , Solan, Himachal Pradesh, India .","Mehla K, Ramana J",,,2.0,India +25979979,DroughtDB,0.997056782,DroughtDB,0.997056782,,0,1,http://pgsb.helmholtz-muenchen.de/droughtdb,302,,"(48.2500,11.5667)",http://web.archive.org/web/20220803222830/https://pgsb.helmholtz-muenchen.de/droughtdb/,2015-05-15,"Plant Breeding, Center of Life and Food Sciences Weihenstephan, Technische Universität München, 85354 Freising, Germany, Department of Plant Genome and Systems Biology, Helmholtz Center Munich, German Research Center for Environmental Health, 85764 Neuherberg, Germany and College of Science, King Saud University, Riyadh 11451, Kingdom of Saudi Arabia.","Alter S, Bader KC, Spannagl M, Wang Y, Bauer E, Schön CC, Mayer KF",,,14.0,"Germany, Germany, Saudi Arabia" +25990557,DSigDB,0.996987581,DSigDB,0.996987581,Drug Signatures Database,0.949252993,1,http://tanlab.ucdenver.edu/DSigDB,"HTTPConnectionPool(host='tanlab.ucdenver.edu', port=80): Max retries exceeded with url: /DSigDB (Caused by ConnectTimeoutError(, 'Connection to tanlab.ucdenver.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20181120073417/http://tanlab.ucdenver.edu/DSigDB/,2015-05-19,"Department of Medicine, Translational Bioinformatics and Cancer Systems Biology Laboratory, Division of Medical Oncology, University of Colorado Anschutz Medical Campus, Aurora, CO 80045, USA and Department of Computer Science and Engineering, Korea University, Seoul 136-713, South Korea.","Yoo M, Shin J, Kim J, Ryall KA, Lee K, Lee S, Jeon M, Kang J, Tan AC",,"NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS",72.0,United States +26019122,DX,0.759305418,DX,0.759305418,COINS Data Exchange,0.739541188,1,http://coins.mrn.org/dx,"HTTPConnectionPool(host='coins.mrn.org', port=80): Max retries exceeded with url: /dx (Caused by ConnectTimeoutError(, 'Connection to coins.mrn.org timed out. (connect timeout=5)'))",,,no_wayback,2015-05-24,"The Mind Research Network, 1101 Yale Blvd NE, Albuquerque, NM, USA. Electronic address: dlandis@mrn.org.","Landis D, Courtney W, Dieringer C, Kelly R, King M, Miller B, Wang R, Wood D, Turner JA, Calhoun VD",,"NIBIB NIH HHS, NIGMS NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIH, NIH, NIH, NIBIB NIH HHS, NIGMS NIH HHS, NIMH NIH HHS, NIBIB NIH HHS, NIBIB NIH HHS, NIH, NIDA NIH HHS, NIBIB NIH HHS",19.0,United States +26030752,DMD,0.993849114,DMD,0.993849114,Dietary MicroRNA Databases,0.963514006,1,http://sbbi.unl.edu/dmd,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20160119210815/http://sbbi.unl.edu:80/dmd/,2015-06-01,"Department of Computer Science and Engineering, University of Nebraska-Lincoln, Lincoln, NE, United States of America.","Chiang K, Shu J, Zempleni J, Cui J",,"NIGMS NIH HHS, NIGMS NIH HHS",12.0,United States +26048563,CYCLoPs,0.996802568,CYCLoPs,0.996802568,Collection of Yeast Cells Localization Patterns,0.984272313,1,http://cyclops.ccbr.utoronto.ca,302,,"(43.7001,-79.4163)",http://web.archive.org/web/20190412232108/http://cyclops.ccbr.utoronto.ca:80/,2015-04-15,"The Donnelly Centre, University of Toronto, Toronto, Ontario, Canada, M5S3E1.","Koh JL, Chong YT, Friesen H, Moses A, Boone C, Andrews BJ, Moffat J",,Canadian Institutes of Health Research,38.0,Canada +26055100,dbHiMo,0.996748298,dbHiMo,0.996748298,database for histone-modifying enzymes,0.902459852,1,http://hme.riceblast.snu.ac.kr,"HTTPConnectionPool(host='hme.riceblast.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='hme.riceblast.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2015-06-08,"Department of Forest Sciences, University of Helsinki, 00014 Helsinki, Finland, Fungal Bioinformatics Laboratory, Seoul National University, Seoul 151-921, Korea, Department of Agricultural Biotechnology, College of Agriculture and Life Science, Seoul National University, Seoul 151-921, Korea, School of Biotechnology, Yeungnam University, Gyeongsan, Gyeongbuk 712-749, Korea, and Research Institute of Agriculture and Life Sciences, Center for Fungal Pathogenesis, Center for Fungal Genetic Resources, Plant Genomics and Breeding Institute, Seoul National University, Seoul 151-921, Korea.","Choi J, Kim KT, Huh A, Kwon S, Hong C, Asiegbu FO, Jeon J, Lee YH",,,6.0,Finland +26099468,dbEMT,0.992519438,dbEMT,0.992519438,,0,1,http://dbemt.bioinfo-minzhao.org,406,,,http://web.archive.org/web/20221104141719/https://www.dbemt.bioinfo-minzhao.org/,2015-06-23,"School of Engineering, Faculty of Science, Health, Education and Engineering, University of the Sunshine Coast, Maroochydore DC, Queensland, 4558, Australia.","Zhao M, Kong L, Liu Y, Qu H",,,66.0,Australia +26144527,dasHPPboard,0.991571605,dasHPPboard,0.991571605,,0,1,http://sphppdashboard.cnb.csic.es,"HTTPConnectionPool(host='sphppdashboard.cnb.csic.es', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2015-07-16,"ProteoRed-ISCIII, National Center for Biotechnology-CSIC (CNB) , C/Darwin 3, Madrid 28049, Spain.","Tabas-Madrid D, Alves-Cruzeiro J, Segura V, Guruceaga E, Vialas V, Prieto G, García C, Corrales FJ, Albar JP, Pascual-Montano A",,"Children's Tumor Foundation, Ministerio de Ciencia e Innovación, Comunidad de Madrid",4.0,Spain +26305368,Cyanobacterial KnowledgeBase,0.959425698,CKB,0.926491141,Cyanobacterial KnowledgeBase,0.959425698,1,http://nfmc.res.in/ckb/index.html,"HTTPConnectionPool(host='nfmc.res.in', port=80): Max retries exceeded with url: /ckb/index.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2015-08-25,"National Facility for Marine Cyanobacteria, Sub-Distributed Bioinformatics Centre (sponsored by Department of Biotechnology, Govt. of India), Department of Marine Biotechnology, School of Marine Sciences, Bharathidasan University, Tiruchirappalli, Tamil Nadu, India.","Peter AP, Lakshmanan K, Mohandass S, Varadharaj S, Thilagar S, Abdul Kareem KA, Dharmar P, Gopalakrishnan S, Lakshmanan U",,,4.0,"India, India" +26322998,CTDB,0.979035616,CTDB,0.979035616,Chickpea Transcriptome Database,0.944384923,1,http://nipgr.res.in/ctdb.html,"HTTPConnectionPool(host='nipgr.res.in', port=80): Max retries exceeded with url: /ctdb.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181018052531/http://www.nipgr.res.in:80/ctdb.html,2015-08-31,"Functional and Applied Genomics Laboratory, National Institute of Plant Genome Research (NIPGR), New Delhi, India.","Verma M, Kumar V, Patel RK, Garg R, Jain M",,,10.0,India +26342387,DENdb,0.997271538,DENdb,0.997271538,,0,1,http://www.cbrc.kaust.edu.sa/dendb,302,,"(37.5331,-122.2486)",http://web.archive.org/web/20161023201842/http://www.cbrc.kaust.edu.sa:80/dendb/,2015-09-05,"Computational Bioscience Research Center (CBRC), Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE) and.","Ashoor H, Kleftogiannis D, Radovanovic A, Bajic VB",,,23.0, +26363178,DIVAS,0.973447561,DIVAS,0.973447561,,0,1,http://rvs.u.hpc.mssm.edu/divas,"HTTPConnectionPool(host='rvs.u.hpc.mssm.edu', port=80): Max retries exceeded with url: /divas (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2015-09-12,"Department of Genetics and Genomic Sciences, Icahn School of Medicine at Mount Sinai, New York, NY 10029, USA.","Cheng WY, Hakenberg J, Li SD, Chen R",,,4.0,United States +26393351,dkNET,0.995002985,dkNET,0.995002985,NIDDK Information Network,0.846032488,1,http://dknet.org,301,,"(32.9595,-117.2653)",http://web.archive.org/web/20221102211122/https://dknet.org/,2015-09-22,"Center for Research in Biological Systems, University of California, San Diego, San Diego, California, United States of America.","Whetzel PL, Grethe JS, Banks DE, Martone ME",,"NIDDK NIH HHS, NIDDK NIH HHS",6.0,United States +26438538,dbSUPER,0.994813621,dbSUPER,0.994813621,,0,1,http://bioinfo.au.tsinghua.edu.cn/dbsuper,301,,"(39.9906,116.2887)",http://web.archive.org/web/20210726175529/https://bioinfo.au.tsinghua.edu.cn/dbsuper/,2015-10-04,"MOE Key Laboratory of Bioinformatics, Bioinformatics Division and Center for Synthetic and Systems Biology, TNLIST/Department of Automation, Tsinghua University, Beijing 100084, China.","Khan A, Zhang X",,,169.0,China +26481352,DIDA,0.982567787,DIDA,0.982567787,DIgenic diseases DAtabase,0.88692459,1,http://dida.ibsquare.be,200,,"(50.8505,4.3488)",http://web.archive.org/web/20210625093520/http://dida.ibsquare.be/,2015-10-19,"Interuniversity Institute of Bioinformatics in Brussels, Boulevard du Triomphe CP 263, 1050 Brussels, Belgium MLG, Département d'Informatique, Université Libre de Bruxelles, Boulevard du Triomphe, CP 212, 1050 Brussels, Belgium Center for Medical Genetics, Reproduction and Genetics, Reproduction Genetics and Regenerative Medicine, Vrije Universiteit Brussel, UZ Brussel, Laarbeeklaan 101, 1090 Brussels, Belgium.","Gazzo AM, Daneels D, Cilia E, Bonduelle M, Abramowicz M, Van Dooren S, Smits G, Lenaerts T",,,45.0,"Belgium, Belgium, Belgium" +26503248,dbMAE,0.997952342,dbMAE,0.997952342,database of autosomal monoallelic expression,0.85295561,1,http://mae.hms.harvard.edu,301,,"(42.3584,-71.0598)",http://web.archive.org/web/20220617173736/https://mae.hms.harvard.edu/,2015-10-25,"Dana-Farber Cancer Institute and Department of Genetics, Harvard Medical School, 450 Brookline Ave., Boston, MA 02215, USA Department of Systems Biology, Harvard Medical School, 200 Longwood Ave., Boston, MA 02215, USA Virginia_Savova@hms.harvard.edu.","Savova V, Patsenker J, Vigneau S, Gimelbrant AA",,"NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS, PHS HHS",20.0,"United States, United States" +26503254,Digital Development,0.921984583,Digital Development,0.921984583,,0,1,http://www.digital-development.org,200,,"(33.9400,-118.1326)",http://web.archive.org/web/20220802131128/http://digital-development.org/,2015-10-25,"Sloan Kettering Institute, New York, NY 10065, USA.","Santella A, Kovacevic I, Herndon LA, Hall DH, Du Z, Bao Z",,"NIGMS NIH HHS, NICHD NIH HHS, NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NIH HHS, NIH HHS",4.0,United States +26553809,Degradome,0.932848215,Degradome,0.932848215,,0,1,http://degradome.uniovi.es,200,,"(43.3466,-5.8906)",http://web.archive.org/web/20221006081525/http://degradome.uniovi.es/,2015-11-08,"From the Departamento de Bioquímica y Biología Molecular, Facultad de Medicina, Instituto Universitario de Oncología, Universidad de Oviedo, 33006 Oviedo, Spain.","Pérez-Silva JG, Español Y, Velasco G, Quesada V",,,22.0,Spain +26566288,DBGC,0.987804174,DBGC,0.987804174,Database of Human Gastric Cancer,0.921517382,1,http://bminfor.tongji.edu.cn/dbgc/index.do,"HTTPConnectionPool(host='bminfor.tongji.edu.cn', port=80): Max retries exceeded with url: /dbgc/index.do (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20151118013441/http://bminfor.tongji.edu.cn:80/dbgc/index.do,2015-11-13,"School of Life Science and Technology, Tongji University, 1239 Siping Road, Shanghai, China.","Wang C, Zhang J, Cai M, Zhu Z, Gu W, Yu Y, Zhang X",,,20.0,China +26577058,DBEndo,0.913484514,DBEndo,0.913484514,,0,1,http://dbendo.charite.de,"HTTPConnectionPool(host='dbendo.charite.de', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to dbendo.charite.de timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160308025004/http://dbendo.charite.de:80/,2015-11-17,"Department of Operative and Preventive Dentistry, Charité-Universitätsmedizin Berlin, Assmannshauser Straße 4-6, 14197, Berlin, Germany. saskia.preissner@charite.de.","Preissner S, Kostka E, Mokross M, Kersten NV, Blunck U, Preissner R",,,0.0,Germany +26586797,DIANA-miRGen,0.908604195,DIANA-miRGen,0.908604195,,0,1,http://www.microrna.gr/mirgen,301,,"(37.9838,23.7278)",http://web.archive.org/web/20220121004630/http://www.microrna.gr/mirgen/,2015-11-19,"DIANA-Lab, Department of Electrical & Computer Engineering, University of Thessaly, 382 21 Volos, Greece Hellenic Pasteur Institute, 115 21 Athens, Greece georgakilas@inf.uth.gr.","Georgakilas G, Vlachos IS, Zagganas K, Vergoulis T, Paraskevopoulou MD, Kanellos I, Tsanakas P, Dellis D, Fevgas A, Dalamagas T, Hatzigeorgiou AG",,,26.0,"Greece, Greece" +26697753,DLGP,0.996312201,DLGP,0.996312201,,0,1,http://lcgbase.big.ac.cn/DLGP,"HTTPConnectionPool(host='lcgbase.big.ac.cn', port=80): Max retries exceeded with url: /DLGP (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2015-12-15,"Stem Cell Laboratory, UCL Cancer Institute, University College London, London WC1E 6BT, UK; CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, PR China. Electronic address: dapeng.wang@ucl.ac.uk.",Wang D,,,0.0,China +26699919,ECGene,0.994564354,ECGene,0.994564354,Endometrial Cancer Gene database,0.8197405,1,http://ecgene.bioinfo-minzhao.org,406,,,http://web.archive.org/web/20200207083318/http://ecgene.bioinfo-minzhao.org:80/,2016-01-13,"School of Engineering, Faculty of Science, Health, Education and Engineering, University of the Sunshine Coast, Queensland, 4558, Australia.","Zhao M, Liu Y, O'Mara TA",,,9.0,Australia +26727469,dEMBF,0.996164143,dEMBF,0.996164143,Database of Enzymes of Microalgal Biofuel Feedstock,0.963428038,1,http://bbprof.immt.res.in/embf,404,,,http://web.archive.org/web/20200128095704/http://bbprof.immt.res.in:80/embf/,2016-01-04,"Academy of Scientific and Innovative Research, CSIR-Institute of Minerals and Materials Technology, Bhubaneswar, Odisha, India.","Misra N, Panda PK, Parida BK, Mishra BK",,,4.0,India +26836976,dbAARD,0.983507156,dbAARD,0.983507156,database of Aging and Age Related Disorders,0.922870524,1,"http://genomeinformatics.dce.edu/dbAARD/, http://genomeinformatics.dce.edu/AGP","HTTPConnectionPool(host='genomeinformatics.dce.edu', port=80): Max retries exceeded with url: /dbAARD/ (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known')), HTTPConnectionPool(host='genomeinformatics.dce.edu', port=80): Max retries exceeded with url: /AGP (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,", ","no_wayback, no_wayback",2016-02-02,"Department of Biotechnology, Delhi Technological University, Delhi 110042, India.","Srivastava I, Gahlot LK, Khurana P, Hasija Y",,,2.0,India +26895996,Cystic Fibrosis Cloud database,0.86593811,CFC,0.6286695,Cystic Fibrosis Cloud database,0.86593811,1,http://servoy.infocomsa.com/cfc_database,"HTTPConnectionPool(host='servoy.infocomsa.com', port=80): Max retries exceeded with url: /cfc_database (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20220520031021/http://servoy.infocomsa.com/cfc_database,2016-01-01,"CINDEFI, CONICET-CCT La Plata, Centro de Biotecnología Aplicada, Facultad de Ciencias Exactas, Universidad Nacional de La Plata, La Plata, Buenos Aires, Argentina.","Prieto CI, Palau MJ, Martina P, Achiary C, Achiary A, Bettiol M, Montanaro P, Cazzola ML, Leguizamón M, Massillo C, Figoli C, Valeiras B, Perez S, Rentería F, Diez G, Yantorno OM, Bosch A",,,0.0,Argentina +26909679,DsTRD,0.992711484,DsTRD,0.992711484,Danshen Transcriptional Resource Database,0.94311095,1,http://bi.sky.zstu.edu.cn/DsTRD/home.php,301,,"(30.2936,120.1614)",http://web.archive.org/web/20160403163806/http://bi.sky.zstu.edu.cn:80/DsTRD/home.php,2016-02-24,"Institute of Bioengineering, College of Life Sciences, Zhejiang Sci-Tech University, Hangzhou, 310018, China.","Shao Y, Wei J, Wu F, Zhang H, Yang D, Liang Z, Jin W",,,13.0,China +26940364,dbPHCC,0.919840753,dbPHCC,0.919840753,,0,1,http://lifecenter.sgst.cn/dbphcc/Conclusions,"HTTPConnectionPool(host='lifecenter.sgst.cn', port=80): Max retries exceeded with url: /dbphcc/Conclusions (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-03-02,"Biomedical Engineering, University of Shanghai for Science and Technology, Shanghai 200093, China.","Ouyang J, Sun Y, Li W, Zhang W, Wang D, Liu X, Lin Y, Lian B, Xie L",,"Chinese Human Proteome Projects, Key Infectious Disease Project, National Natural Science Foundation of China, Chinese Human Proteome Projects, National Natural Science Foundation of China, National Hi-Tech Program",3.0,China +26946289,dbPEC,0.996957302,dbPEC,0.996957302,Database for Preeclampsia,0.837573124,1,http://ptbdb.cs.brown.edu/dbpec,"HTTPConnectionPool(host='ptbdb.cs.brown.edu', port=80): Max retries exceeded with url: /dbpec (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20161031144223/http://ptbdb.cs.brown.edu:80/dbpec/,2016-03-05,"Department of Pediatrics, Women & Infants Hospital of Rhode Island, Providence, RI, USA Department of Pediatrics, Brown Alpert Medical School, Providence, RI, USA auzun@wihri.org.","Uzun A, Triche EW, Schuster J, Dewan AT, Padbury JF",,"NIGMS NIH HHS, NICHD NIH HHS, NHLBI NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NHLBI NIH HHS",3.0,"United States, United States" +26980516,DemaDb,0.9967255,DemaDb,0.9967255,,0,1,http://fungaldb.um.edu.my,200,,"(35.8089,140.1185)",no_wayback,2016-03-15,"Department of Medical Microbiology, Faculty of Medicine, University of Malaya, 50603 Kuala Lumpur, Malaysia.","Kuan CS, Yew SM, Chan CL, Toh YF, Lee KW, Cheong WH, Yee WY, Hoh CC, Yap SJ, Ng KP",,,1.0,Malaysia +26989155,dbWGFP,0.997119367,dbWGFP,0.997119367,,0,1,http://bioinfo.au.tsinghua.edu.cn/dbwgfp,301,,"(39.9906,116.2887)",http://web.archive.org/web/20210620175438/https://bioinfo.au.tsinghua.edu.cn/dbwgfp/,2016-03-17,"MOE Key Laboratory of Bioinformatics, Bioinformatics Division and Center for Synthetic & Systems Biology, TNLIST, Department of Automation, Tsinghua University, Beijing 100084, China.","Wu J, Wu M, Li L, Liu Z, Zeng W, Jiang R",,,12.0,China +27010073,dbPAF,0.997537851,dbPAF,0.997537851,,0,1,http://dbpaf.biocuckoo.org,200,,"(40.2338,-111.6585)",http://web.archive.org/web/20221017062413/https://dbpaf.biocuckoo.org/,2016-03-24,"Department of Bioinformatics &Systems Biology, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China.","Ullah S, Lin S, Xu Y, Deng W, Ma L, Zhang Y, Liu Z, Xue Y",,,31.0,China +27076334,DNAVaxDB,0.992402136,DNAVaxDB,0.992402136,,0,1,http://www.violinet.org/dnavaxdb,404,,,http://web.archive.org/web/20221006150345/https://violinet.org/dnavaxdb/,2016-01-01,"College of Pharmacy, University of Michigan, Ann Arbor, MI, 48109, USA.","Racz R, He Y",,,0.0,United States +27084938,DeepBlue,0.988800287,DeepBlue,0.988800287,,0,1,http://deepblue.mpi-inf.mpg.de,301,,"(49.2326,7.0098)",http://web.archive.org/web/20220806041407/https://deepblue.mpi-inf.mpg.de/,2016-04-15,"Max Planck Institute for Informatics, 66123 Saarbrücken, Germany Graduate School of Computer Science, Saarland University, 66123 Saarbrücken, Germany felipe.albrecht@mpi-inf.mpg.de.","Albrecht F, List M, Bock C, Lengauer T",,,21.0,"Germany, Germany" +27153640,e23D,0.991435468,e23D,0.991435468,,0,1,http://www.sheba-cancer.org.il/e23D,"HTTPConnectionPool(host='www.sheba-cancer.org.il', port=80): Max retries exceeded with url: /e23D (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-04-19,"Cancer Research Center, Sheba Medical Center, Ramat-Gan, Israel The Mina and Everard Goodman Faculty of Life Sciences, Bar-Ilan University, Ramat-Gan, Israel.","Solomon O, Eyal E, Amariglio N, Unger R, Rechavi G",,,2.0,"Israel, Israel" +27153700,dbDSM,0.997294545,dbDSM,0.997294545,database of Deleterious Synonymous Mutation,0.933270373,1,http://bioinfo.ahu.edu.cn:8080/dbDSM/index.jsp,302,,,http://web.archive.org/web/20210614082607/http://bioinfo.ahu.edu.cn:8080/dbDSM/index.jsp,2016-02-15,"Institute of Health Sciences, School of Life Sciences, Anhui University, Hefei, Anhui 230601, China and.","Wen P, Xiao P, Xia J",,,14.0,China +27164589,CVD2014,0.991893431,CVD2014,0.991893431,,0,1,http://www.helsinki.fi/psychology/groups/visualcognition,301,,"(60.1695,24.9354)",http://web.archive.org/web/20200713055306/http://www.helsinki.fi:80/psychology/groups/visualcognition/,2016-05-03,None,"Nuutinen M, Virtanen T, Vaahteranoksa M, Vuori T, Oittinen P, Hakkinen J",,,4.0, +27173524,DPTEdb,0.989185214,DPTEdb,0.989185214,dioecious plant transposable element database,0.863010341,1,http://genedenovoweb.ticp.net:81/DPTEdb/index.php,200,,,http://web.archive.org/web/20220615170005/http://genedenovoweb.ticp.net:81/DPTEdb/index.php,2016-05-12,"College of Life Sciences, Henan Normal University, Xinxiang 453007, China.","Li SF, Zhang GJ, Zhang XJ, Yuan JH, Deng CL, Gu LF, Gao WJ",,,8.0,China +27192119,dbCPG,0.991826773,dbCPG,0.991826773,Cancer Predisposition Gene Database,0.988404105,1,http://bioinfo.ahu.edu.cn:8080/dbCPG/index.jsp,302,,,http://web.archive.org/web/20180626225652/http://bioinfo.ahu.edu.cn:8080/dbCPG/index.jsp,2016-06-01,"Institute of Health Sciences, School of Computer Science and Technology, Anhui University, Hefei, Anhui, 230601, China.","Wei R, Yao Y, Yang W, Zheng CH, Zhao M, Xia J",,,4.0,China +27209279,DNetDB,0.996409118,DNetDB,0.996409118,,0,1,http://app.scbit.org/DNetDB,"HTTPConnectionPool(host='app.scbit.org', port=80): Max retries exceeded with url: /DNetDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20190914123127/http://app.scbit.org:80/DNetDB/,2016-05-21,"Shanghai Center for Bioinformation Technology, Shanghai, 200235, P.R. China.","Yang J, Wu SJ, Yang SY, Peng JW, Wang SN, Wang FY, Song YX, Qi T, Li YX, Li YY",,"the National “973” Key Basic Research Development Program, the Program of International S&T Cooperation, National Natural Science Foundation of China, the Fundamental Research Program of Shanghai Municipal Commission of Science and Technology",5.0,China +27226753,Cyanobacteria culture collection,0.949196661,Scratchpads,0.894429704,Cyanobacteria culture collection,0.949196661,1,http://cyanobacteria.myspecies.info,302,,"(51.5085,-0.1257)",http://web.archive.org/web/20221006020309/https://cyanobacteria.myspecies.info/,2016-04-06,"Department of Botany, School of Biology, Aristotle University of Thessaloniki, Thessaloniki, Greece.","Gkelis S, Panou M",,,7.0,Greece +27276067,DREMECELS,0.997764349,DREMECELS,0.997764349,,0,1,http://www.bioinfoindia.org/dremecels,301,,"(45.5088,-73.5878)",http://web.archive.org/web/20200924055325/http://www.bioinfoindia.org/dremecels/,2016-06-08,"Department of Biotechnology and Bioinformatics, Jaypee University of Information Technology (JUIT), Waknaghat, Solan, H.P., 173234, India.","Shukla A, Moussa A, Singh TR",,,0.0,India +27354697,Cyclo-lib,0.97895883,Cyclo-lib,0.97895883,,0,1,http://cyclo-lib.mduse.com,200,,"(40.4165,-3.7026)",http://web.archive.org/web/20221016215640/http://cyclo-lib.mduse.com/,2016-06-27,"Soft Matter & Molecular Biophysics Group, Department of Applied Physics, Universidade de Santiago de Compostela, 15782 Santiago de Compostela, Spain CONACYT - Instituto Nacional de Psiquiatría Ramón de la Fuente Muñiz, 14370 Tlalpan, Distrito Federal, Mexico.","Mixcoha E, Rosende R, Garcia-Fandino R, Piñeiro Á",,,1.0,"Spain, Mexico" +27391016,D-PLACE,0.995508909,D-PLACE,0.995508909,,0,1,http://d-place.org,301,,"(51.5344,9.9323)",no_wayback,2016-07-08,"Department of Ecology & Evolutionary Biology, University of Toronto, Toronto, Canada.","Kirby KR, Gray RD, Greenhill SJ, Jordan FM, Gomes-Ng S, Bibiko HJ, Blasi DE, Botero CA, Bowern C, Ember CR, Leehr D, Low BS, McCarter J, Divale W, Gavin MC",,"National Science Foundation, National Science Foundation",39.0,Canada +27553277,Cysteinome,0.994982839,Cysteinome,0.994982839,,0,1,http://www.cysteinome.org,302,,"(-37.9747,145.0269)",no_wayback,2016-08-20,"Center for Molecular Medicine, School of Life Science and Biotechnology, Dalian University of Technology, Dalian, 116023, PR China; School of Pharmacology, Dalian University of Technology, Dalian, 116023, PR China.","Wu S, Luo Howard H, Wang H, Zhao W, Hu Q, Yang Y",,"Key Laboratory of Liaoning Educational Council, Central University",11.0,"China, China" +27577567,DDRprot,0.997383237,DDRprot,0.997383237,,0,1,http://ddr.cbbio.es,"HTTPConnectionPool(host='ddr.cbbio.es', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-08-29,"Computational Biology and Bioinformatics Group, Institute of Biomedicine of Seville, 41013 Sevilla, Spain.","Andrés-León E, Cases I, Arcas A, Rojas AM",,,7.0,Spain +27618709,DenHunt,0.991763353,DenHunt,0.991763353,,0,1,http://proline.biochem.iisc.ernet.in/DenHunt,301,,"(12.9719,77.5937)",http://web.archive.org/web/20220804202636/http://proline.biochem.iisc.ernet.in/DenHunt/,2016-09-12,"Department of Biochemistry, Center of Research and Post Graduate Studies, Indian Academy Degree College, Bengaluru, Karnataka, India.","Karyala P, Metri R, Bathula C, Yelamanchi SK, Sahoo L, Arjunan S, Sastri NP, Chandra N",,,3.0,India +27766955,e-GRASP,0.995206547,e-GRASP,0.995206547,,0,1,http://www.mypeg.info/egrasp,404,,,http://web.archive.org/web/20211025072259/http://www.mypeg.info/egrasp,2016-10-17,"Center for Excellence in Genome Medicine and Research, King Abdulaziz University, Jeddah, Saudi Arabia.","Karim S, NourEldin HF, Abusamra H, Salem N, Alhathli E, Dudley J, Sanderford M, Scheinfeldt LB, Chaudhary AG, Al-Qahtani MH, Kumar S",,"NHGRI NIH HHS, NIDDK NIH HHS",3.0,Saudi Arabia +27899556,dbDEMC,0.986323833,dbDEMC,0.986323833,,0,1,http://www.picb.ac.cn/dbDEMC,301,,"(39.9075,116.3972)",http://web.archive.org/web/20200112140008/http://www.picb.ac.cn:80/dbDEMC/,2016-11-28,"Key Laboratory of Computational Biology, CAS-MPG Partner Institute for Computational Biology, 320 Yue Yang Road, Shanghai 200031, China.","Yang Z, Wu L, Wang A, Tang W, Zhao Y, Zhao H, Teschendorff AE",,,101.0,China +27903894,dbSAP,0.994454622,dbSAP,0.994454622,,0,1,http://www.megabionet.org/dbSAP/index.html,404,,,no_wayback,2016-11-29,"The Center for Bioinformatics and Computational Biology, Shanghai Key Laboratory of Regulatory Biology, the Institute of Biomedical Sciences and School of Life Sciences, East China Normal University, Shanghai 200241, China.","Cao R, Shi Y, Chen S, Ma Y, Chen J, Yang J, Chen G, Shi T",,,10.0,"China, China" +27907889,denovo-db,0.997356877,denovo-db,0.997356877,,0,1,http://denovo-db.gs.washington.edu,302,,"(47.6062,-122.3321)",no_wayback,2016-10-05,"Department of Genome Sciences, University of Washington School of Medicine, Seattle, WA 98195, USA tychele@u.washington.edu.","Turner TN, Yi Q, Krumm N, Huddleston J, Hoekzema K, F Stessman HA, Doebley AL, Bernier RA, Nickerson DA, Eichler EE",,"NHGRI NIH HHS, NICHD NIH HHS, Howard Hughes Medical Institute, NHGRI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIMH NIH HHS",81.0,United States +28187413,DaTo,0.992877007,DaTo,0.992877007,,0,1,http://bis.zju.edu.cn/DaTo,200,,"(30.2936,120.1614)",http://web.archive.org/web/20210918191144/http://bis.zju.edu.cn/DaTo/,2016-12-18,None,"Li Q, Zhou Y, Jiao Y, Zhang Z, Bai L, Tong L, Yang X, Sommer B, Hofestädt R, Chen M",,,1.0, +28234924,DNApod,0.990965605,DNApod,0.990965605,DNA polymorphism annotation database,0.922978652,1,http://tga.nig.ac.jp/dnapod,301,,"(35.1167,138.9167)",no_wayback,2017-02-24,"Genome Informatics Laboratory, National Institute of Genetics, Mishima, Shizuoka, Japan.","Mochizuki T, Tanizawa Y, Fujisawa T, Ohta T, Nikoh N, Shimizu T, Toyoda A, Fujiyama A, Kurata N, Nagasaki H, Kaminuma E, Nakamura Y",,"Japan Society for the Promotion of Science, Japan Society for the Promotion of Science (JSPS), Transdisciplinary Research Integration Center Project of the Research Organization of Information and Systems, Japan Society for the Promotion of Science, Japanese Ministry of Agriculture, Forestry and Fisheries",2.0,Japan +28294141,database of Genomic Elements Associated with drug Resistance,0.862406161,GEAR,0.687603295,database of Genomic Elements Associated with drug Resistance,0.862406161,1,http://gear.comp-sysbio.org,"HTTPConnectionPool(host='gear.comp-sysbio.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20191030015926/http://gear.comp-sysbio.org:80/,2017-03-15,"Department of Computer Science and Technology, Tongji University, Shanghai 201804, China.","Wang YY, Chen WH, Xiao PP, Xie WB, Luo Q, Bork P, Zhao XM",,,10.0,China +28299908,DrugAge,0.997624516,DrugAge,0.997624516,,0,1,http://genomics.senescence.info/drugs,301,,"(42.5467,-83.2113)",http://web.archive.org/web/20221026130346/https://www.genomics.senescence.info/drugs/,2017-03-16,"Integrative Genomics of Ageing Group, Institute of Ageing and Chronic Disease, University of Liverpool, Liverpool, UK.","Barardo D, Thornton D, Thoppil H, Walsh M, Sharifi S, Ferreira S, Anžič A, Fernandes M, Monteiro P, Grum T, Cordeiro R, De-Souza EA, Budovsky A, Araujo N, Gruber J, Petrascheck M, Fraifeld VE, Zhavoronkov A, Moskalev A, de Magalhães JP",,"Wellcome Trust, Israel Ministry of Science and Technology",46.0, +28381244,DisBind,0.997564256,DisBind,0.997564256,,0,1,http://biophy.dzu.edu.cn/DisBind,301,,"(36.0649,120.3804)",http://web.archive.org/web/20220616002910/http://biophy.dzu.edu.cn/DisBind/,2017-04-05,"Shandong Provincial Key Laboratory of Biophysics, Institute of Biophysics, Dezhou University, Dezhou, 253023, China.","Yu JF, Dou XH, Sha YJ, Wang CL, Wang HB, Chen YT, Zhang F, Zhou Y, Wang JH",,"National Health and Medical Research Council, National Natural Science Foundation of China",4.0,China +28440791,CyanoType,0.9971928,CyanoType,0.9971928,,0,1,http://lege.ciimar.up.pt/cyanotype,301,,"(41.1496,-8.6110)",http://web.archive.org/web/20221017080751/https://lege.ciimar.up.pt/cyanotype/,2017-04-25,"CIIMAR/CIMAR-Interdisciplinary Centre of Marine and Environmental Research, University of Porto, Terminal de Cruzeiros do Porto de Leixões, Av. General Norton de Matos s/n, Matosinhos 4450-208, Portugal.","Ramos V, Morais J, Vasconcelos VM",,,8.0,Portugal +28502574,DINeR,0.992539942,DINeR,0.992539942,Database for Insect Neuropeptide Research,0.922759838,1,http://www.neurostresspep.eu/diner,200,,"(39.0437,-77.4875)",http://web.archive.org/web/20220523152418/http://www.neurostresspep.eu/diner,2017-05-11,"Institute of Molecular, Cell and Systems Biology, College of Medical, Veterinary and Life Sciences, University of Glasgow, G12 8QQ Glasgow, Scotland, UK.","Yeoh JGC, Pandit AA, Zandawala M, Nässel DR, Davies SA, Dow JAT",,European Union's Horizon 2020 research and innovation programme,27.0, +28533016,DRodVir,0.988821983,DRodVir,0.988821983,,0,1,http://www.mgc.ac.cn/DRodVir,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220517001524/http://www.mgc.ac.cn/DRodVir/,2017-05-03,"MOH Key Laboratory of Systems Biology of Pathogens, Institute of Pathogen Biology, Chinese Academy of Medical Sciences & Peking Union Medical College, Beijing 100176, China.","Chen L, Liu B, Wu Z, Jin Q, Yang J",,"the National Key Research and Development Program, the CAMS Innovation Fund for Medical Sciences, National Major Science and Technology Project, Program for Changjiang Scholars and Innovative Research Team in University",9.0,China +28562632,DrugSig,0.996384323,DrugSig,0.996384323,,0,1,http://biotechlab.fudan.edu.cn/database/drugsig,500,,,http://web.archive.org/web/20220615180934/http://biotechlab.fudan.edu.cn/database/drugsig/,2017-05-31,"School of Life Sciences, Fudan University, Shanghai, China.","Wu H, Huang J, Zhong Y, Huang Q",,National health and family planning commission of the people's republic of china,10.0,China +28575155,Dynamic-BM,0.985531533,Dynamic-BM,0.985531533,,0,1,http://bioinfo.ibp.ac.cn/Dynamic-BM,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220709091556/http://bioinfo.ibp.ac.cn/Dynamic-BM/,2018-11-01,"Key Laboratory of RNA Biology, Institute of Biophysics and University of the Chinese Academy of Sciences, Beijing, China.","Cui Y, Chen X, Niu Y, Wang D, Luo H, Fan Z, Wang D, Wu W, Teng X, He S, Luo J, Chen R",,"National High Technology Research and Development Program of China, National Natural Science Foundation of China, National High Technology Research and Development Program of China",1.0,China +28977473,dbCoRC,0.997102678,dbCoRC,0.997102678,,0,1,http://dbcorc.cam-su.org,200,,"(31.3041,120.5954)",http://web.archive.org/web/20220225182838/http://dbcorc.cam-su.org/,2018-01-01,"School of Biology and Basic Medical Sciences, Soochow University, Suzhou 215123, China.","Huang M, Chen Y, Yang M, Guo A, Xu Y, Xu L, Koeffler HP",,,21.0,China +29031638,dbGAPs,0.978835642,dbGAPs,0.978835642,,0,1,http://www.bmicnip.in/dbgaps,404,,,http://web.archive.org/web/20170629091001/http://bmicnip.in:80/dbgaps/,2017-10-12,"Biomedical Informatics Centre, National Institute of Pathology - ICMR, New Delhi, India.","Aggarwal S, Nayek A, Pradhan D, Verma R, Yadav M, Ponnusamy K, Jain AK",,,2.0,India +29036667,DISNOR,0.998069823,DISNOR,0.998069823,,0,1,http://DISNOR.uniroma2.it,200,,"(41.8661,12.5896)",http://web.archive.org/web/20220615181133/https://disnor.uniroma2.it/,2018-01-01,"Bioinformatics and Computational Biology Unit, Department of Biology, University of Rome 'Tor Vergata', 00133 Rome, Italy.","Lo Surdo P, Calderone A, Iannuccelli M, Licata L, Peluso D, Castagnoli L, Cesareni G, Perfetto L",,,14.0,Italy +29059320,DiseaseEnhancer,0.996761441,DiseaseEnhancer,0.996761441,,0,1,http://biocc.hrbmu.edu.cn/DiseaseEnhancer,302,,"(31.2222,121.4581)",http://web.archive.org/web/20221031060833/http://biocc.hrbmu.edu.cn/DiseaseEnhancer/,2018-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150081, China.","Zhang G, Shi J, Zhu S, Lan Y, Xu L, Yuan H, Liao G, Liu X, Zhang Y, Xiao Y, Li X",,,25.0,China +29069447,DifferentialNet,0.995048225,DifferentialNet,0.995048225,,0,1,http://netbio.bgu.ac.il/diffnet,"HTTPConnectionPool(host='netbio.bgu.ac.il', port=80): Max retries exceeded with url: /diffnet (Caused by ConnectTimeoutError(, 'Connection to netbio.bgu.ac.il timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210101024348/https://netbio.bgu.ac.il/diffnet/,2018-01-01,"Department of Clinical Biochemistry & Pharmacology, Faculty of Health Sciences.","Basha O, Shpringer R, Argov CM, Yeger-Lotem E",,,30.0, +29145823,dbMDEGA,0.986826241,dbMDEGA,0.986826241,,0,1,http://dbmdega.shinyapps.io/dbMDEGA,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20210525174903/https://dbmdega.shinyapps.io/dbMDEGA/,2017-11-16,"Department of Occupational Health and Toxicology, School of Public Health, Nanchang University, BaYi Road 461, Nanchang, 330006, China.","Zhang S, Deng L, Jia Q, Huang S, Gu J, Zhou F, Gao M, Sun X, Feng C, Fan G",,"Jiangxi Provincial Natural Science Foundation, National Nature Science Foundation of China",6.0,China +29209336,DRDB,0.974232197,DRDB,0.974232197,Date Palm Genomic Resource Database,0.711140464,1,http://drdb.big.ac.cn/home,"HTTPConnectionPool(host='drdb.big.ac.cn', port=80): Max retries exceeded with url: /home (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,http://web.archive.org/web/20200215151352/http://drdb.big.ac.cn:80/home,2017-11-02,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing, China.","He Z, Zhang C, Liu W, Lin Q, Wei T, Aljohi HA, Chen WH, Hu S",,"National Natural Science Foundation of China, Chinese Academy of Sciences, King Abdulaziz City for Science and Technology, National Natural Science Foundation of China, National Natural Science Foundation of China",3.0,China +29236308,DBTFLC,0.955499542,DBTFLC,0.955499542,Database of Transcription Factors in Lung Cancer,0.953766271,1,http://www.vit.ac.in/files/database/Home.php,302,,"(12.9184,79.1325)",http://web.archive.org/web/20210516152316/https://vit.ac.in/files/database/Home.php,2018-04-17,"Department of Biotechnology, School of Bio Sciences and Technology, VIT University, Vellore, Tamilnadu, India.","Amalraj T, Dravid AA, Tripathi R, Lulu SS",,,2.0,India +29385418,DIBS,0.994214892,DIBS,0.994214892,,0,1,http://dibs.enzim.ttk.mta.hu,200,,"(47.4984,19.0404)",http://web.archive.org/web/20221028210638/http://dibs.enzim.ttk.mta.hu/,2018-02-01,"Research Centre for Natural Sciences, Institute of Enzymology, Hungarian Academy of Sciences, Budapest H-1117, Hungary.","Schad E, Fichó E, Pancsa R, Simon I, Dosztányi Z, Mészáros B",,"Hungarian Academy of Sciences, European Molecular Biology Organization, OTKA, OTKA, Medical Research Council",28.0,Hungary +29485625,Datasets2Tools,0.979977262,Datasets2Tools,0.979977262,,0,1,http://amp.pharm.mssm.edu/datasets2tools,307,,"(39.0437,-77.4875)",http://web.archive.org/web/20200803145203/http://amp.pharm.mssm.edu/datasets2tools/,2018-02-27,"Department of Pharmacological Sciences, Mount Sinai Center for Bioinformatics, BD2K-LINCS Data Coordination and Integration Center (DCIC), Team Nitrogen of the NIH Data Commons Pilot Project Consortium (DCPPC), Icahn School of Medicine at Mount Sinai, One Gustave L. Levy Place, Box 1603, New York, NY 10029, USA.","Torre D, Krawczuk P, Jagodnik KM, Lachmann A, Wang Z, Wang L, Kuleshov MV, Ma'ayan A",,"NCI NIH HHS, NCI NIH HHS, NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS",3.0,United States +29665371,dbSWEET,0.741674781,dbSWEET,0.741674781,,0,1,http://bioinfo.iitk.ac.in/bioinfo/dbSWEET/Home,"HTTPConnectionPool(host='bioinfo.iitk.ac.in', port=80): Max retries exceeded with url: /bioinfo/dbSWEET/Home (Caused by ConnectTimeoutError(, 'Connection to bioinfo.iitk.ac.in timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221031200931/http://bioinfo.iitk.ac.in/bioinfo/dbSWEET/Home,2018-04-14,"Department of Biological Sciences and Bioengineering, Indian Institute of Technology Kanpur, Kanpur 208016, India.","Gupta A, Sankararamakrishnan R",,,1.0,India +29764375,dbATM,0.992344558,dbATM,0.992344558,animal transcriptome map,0.634973332,1,http://dbATM.mbc.nctu.edu.tw,"HTTPConnectionPool(host='dbatm.mbc.nctu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220621180038/http://dbatm.mbc.nctu.edu.tw/,2018-05-09,"Institute of Bioinformatics and Systems Biology, National Chiao Tung University, Hsinchu, 300, Taiwan.","Chou CH, Huang HY, Huang WC, Hsu SD, Hsiao CD, Liu CY, Chen YH, Liu YC, Huang WY, Lee ML, Chen YC, Huang HD",,,1.0, +29860480,dbCRSR,0.995114565,dbCRSR,0.995114565,cancer radiosensitivity regulation factors database,0.697191248,1,http://bioinfo.ahu.edu.cn,302,,"(31.2222,121.4581)",http://web.archive.org/web/20200508035558/http://bioinfo.ahu.edu.cn/,2018-01-01,"Key Laboratory of High Magnetic Field and Ion Beam Physical Biology, Hefei Institutes of Physical Science, Chinese Academy of Sciences, Anhui Province Key Laboratory of Environmental Toxicology and Pollution Control Technology, Hefei, Anhui, People's Republic of China.","Wen P, Xia J, Cao X, Chen B, Tao Y, Wu L, Xu A, Zhao G",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",2.0,China +29961819,dbLGL,0.99372381,dbLGL,0.99372381,,0,1,http://soft.bioinfo-minzhao.org/lgl,406,,,http://web.archive.org/web/20220617164119/http://soft.bioinfo-minzhao.org/lgl/,2018-01-01,"The School of Public Health, Institute for Chemical Carcinogenesis, Guangzhou Medical University, Guangzhou, China.","Liu Y, Luo M, Jin Z, Zhao M, Qu H",,"National Key Research and Development Program of China, National Natural Science Foundation of China",2.0,China +30016397,dbCID,0.99611038,dbCID,0.99611038,database of Cancer driver InDels,0.814343606,1,http://bioinfo.ahu.edu.cn:8080/dbCID,302,,,http://web.archive.org/web/20210923030620/http://bioinfo.ahu.edu.cn:8080/dbCID/,2019-09-01,"Institute of Physical Science and Information Technology, School of Computer Science and Technology, Anhui University, Hefei, Anhui, China.","Yue Z, Zhao L, Cheng N, Yan H, Xia J",,National Natural Science Foundation of China,6.0,China +30053267,dbCAN-seq,0.997396141,dbCAN-seq,0.997396141,,0,1,http://cys.bios.niu.edu/dbCAN_seq,"HTTPConnectionPool(host='cys.bios.niu.edu', port=80): Max retries exceeded with url: /dbCAN_seq (Caused by ConnectTimeoutError(, 'Connection to cys.bios.niu.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190311151905/http://cys.bios.niu.edu:80/dbCAN_seq/,2018-01-01,"College of Computer and Control Engineering, Nankai University, Tianjin, China.","Huang L, Zhang H, Wu P, Entwistle S, Li X, Yohe T, Yi H, Yang Z, Yin Y",,NIGMS NIH HHS,68.0,China +30117424,CZRC,0.992718458,CZRC,0.992718458,China Zebrafish Resource Center,0.937906504,1,http://zfish.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220930070654/http://www.zfish.cn/,2018-08-01,"China Zebrafish Resource Center, State Key Laboratory of Freshwater Ecology and Biotechnology, Institute of Hydrobiology, Chinese Academy of Sciences, Wuhan 430072, China.","Xiong F, Xie XW, Pan LY, Li KY, Liu LY, Zhang Y, Li LL, Sun YH",,,0.0,"China, China" +30321383,CuGenDB,0.997316897,CuGenDB,0.997316897,Cucurbit Genomics Database,0.988422981,1,http://cucurbitgenomics.org,200,,"(42.4406,-76.4966)",no_wayback,2019-01-01,"Boyce Thompson Institute, Cornell University, Ithaca, NY 14853, USA.","Zheng Y, Wu S, Bai Y, Sun H, Jiao C, Guo S, Zhao K, Blanca J, Zhang Z, Huang S, Xu Y, Weng Y, Mazourek M, K Reddy U, Ando K, McCreight JD, Schaffer AA, Burger J, Tadmor Y, Katzir N, Tang X, Liu Y, Giovannoni JJ, Ling KS, Wechter WP, Levi A, Garcia-Mas J, Grumet R, Fei Z",,"US-Israel Binational Agricultural Research and Development, US-Israel Binational Agricultural Research and Development, US-Israel Binational Agricultural Research and Development, National Institute of Food and Agriculture",53.0,United States +"30371892, 33151287",DrugCentral,0.996682286,DrugCentral,0.996682286,,0,2,http://drugcentral.org,301,,"(40.7402,-73.9996)",http://web.archive.org/web/20221022210612/https://drugcentral.org/,2021-01-01,"Translational Informatics Division, Department of Internal Medicine, The University of New Mexico Health Science Center, Albuquerque, NM 87131, USA., Department of Computational Chemistry, ""Coriolan Dragulescu'' Institute of Chemistry, 24 Mihai Viteazu Blvd, Timişoara, Timiş, 300223, România.","Ursu O, Holmes J, Bologa CG, Yang JJ, Mathias SL, Stathias V, Nguyen DT, Schürer S, Oprea T, Avram S, Bologa CG, Holmes J, Bocci G, Wilson TB, Nguyen DT, Curpan R, Halip L, Bora A, Yang JJ, Knockel J, Sirimulla S, Ursu O, Oprea TI",", ","National Institutes of Health, NCATS NIH HHS, National Institutes of Health, NCI NIH HHS, National Institutes of Health, National Institutes of Health, National Science Foundation, NCI NIH HHS, Intramural Research Program, Division of Preclinical Innovation, NIH NCATS, NCATS NIH HHS, NCI NIH HHS, NIH NCATS Clinical and Translational Science Center for UNM, National Cancer Institute, Novo Nordisk Foundation Center for Protein Research, National Science Foundation",63.0,"Mexico, United States" +30379998,dbCPM,0.986568928,dbCPM,0.986568928,of Cancer Passenger Mutations,0.904574347,1,http://bioinfo.ahu.edu.cn:8080/dbCPM,302,,,http://web.archive.org/web/20210923025130/http://bioinfo.ahu.edu.cn:8080/dbCPM/,2018-10-30,"Institute of Physical Science and Information Technology, School of Computer Science and Technology, Anhui University, Hefei, Anhui, China.","Yue Z, Zhao L, Xia J",,"Anhui Provincial Outstanding Young Talent Support Plan, National Natural Science Foundation of China, oung Wanjiang Scholar Program of Anhui Province, China, National Natural Science Foundation of China",3.0,China +30380071,DSMNC,0.989300251,DSMNC,0.989300251,,0,1,http://dsmnc.big.ac.cn,301,,"(39.9075,116.3972)",http://web.archive.org/web/20211207110136/https://dsmnc.big.ac.cn/,2019-01-01,"Key Laboratory of Genomic and Precision Medicine, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Miao X, Li X, Wang L, Zheng C, Cai J",,"National Key R&D Program of China, National Natural Science Foundation of China, National Key R&D Program of China, National Natural Science Foundation of China, National Key R&D Program of China",3.0,China +"30380085, 30380085, 30380085",dbAMP,0.994630337,dbAMP,0.994630337,,0,1,http://csb.cse.yzu.edu.tw/dbAMP,301,,"(24.9937,121.2970)",no_wayback,2019-01-01,"Department of Computer Science and Engineering, Yuan Ze University, Taoyuan 320, Taiwan., Department of Computer Science and Engineering, Yuan Ze University, Taoyuan 320, Taiwan., Department of Computer Science and Engineering, Yuan Ze University, Taoyuan 320, Taiwan.","Jhong JH, Chi YH, Li WC, Lin TH, Huang KY, Lee TY, Jhong JH, Chi YH, Li WC, Lin TH, Huang KY, Lee TY, Jhong JH, Chi YH, Li WC, Lin TH, Huang KY, Lee TY",", , ","Chinese University of Hong Kong, Chinese University of Hong Kong, Chinese University of Hong Kong",87.0, +30482172,dbMPIKT,0.991190982,dbMPIKT,0.991190982,,0,1,http://DeepLearner.ahu.edu.cn/web/dbMPIKT,"HTTPConnectionPool(host='deeplearner.ahu.edu.cn', port=80): Max retries exceeded with url: /web/dbMPIKT (Caused by ConnectTimeoutError(, 'Connection to deeplearner.ahu.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2018-11-27,"Institute of Physical Science and Information Technology, Anhui University, Hefei, 230601, Anhui, China.","Liu Q, Chen P, Wang B, Zhang J, Li J",,National Natural Science Foundation of China,7.0,China +30665056,dbHDPLS,0.9490378,dbHDPLS,0.9490378,human disease-related protein-ligand structures,0.909200006,1,http://DeepLearner.ahu.edu.cn/web/dbDPLS,"HTTPConnectionPool(host='deeplearner.ahu.edu.cn', port=80): Max retries exceeded with url: /web/dbDPLS (Caused by ConnectTimeoutError(, 'Connection to deeplearner.ahu.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2019-01-11,"Institutes of Physical Science and Information Technology, Anhui University, 230601 Hefei, Anhui, China.","Zhu M, Song X, Chen P, Wang W, Wang B",,"National Natural Science Foundation of China, Anhui Province Funds for Excellent Youth Scholars in Colleges, National Natural Science Foundation of China, Anhui Scientific Research Foundation for Returned Scholars",1.0,China +30942868,DEE2,0.989227772,DEE2,0.989227772,Expression Explorer,0.759982735,1,http://dee2.io,301,,"(-37.8140,144.9633)",http://web.archive.org/web/20221016214545/https://dee2.io/,2019-04-01,"Deakin University, Geelong, Australia, School of Life and Environmental Sciences, 75 Pigdons Road, Waurn Ponds, VIC 3216, Australia.","Ziemann M, Kaspi A, El-Osta A",,"European Union Collaborative Research, National Health and Medical Research Council, National Health and Medical Research Council",18.0,"Australia, Australia" +31016417,DNAmod,0.983683288,DNAmod,0.983683288,,0,1,http://dnamod.hoffmanlab.org,301,,"(43.7001,-79.4163)",http://web.archive.org/web/20221017022741/https://dnamod.hoffmanlab.org/,2019-04-23,"Department of Medical Biophysics, University of Toronto, Princess Margaret Cancer Research Tower 15-701, 101 College Street, Toronto, ON, M5G 1L7, Canada.","Sood AJ, Viner C, Hoffman MM",,"University of Toronto McLaughlin Center, Natural Sciences and Engineering Research Council of Canada, Ontario Institute for Cancer Research, Princess Margaret Cancer Foundation, University of Toronto, Canadian Institutes of Health Research, Ontario Ministry of Training, Colleges and Universities, Canadian Cancer Society, Natural Sciences and Engineering Research Council of Canada, Sciences and Engineering Research Council of Canada, Ontario Ministry of Research, Innovation and Science",21.0,Canada +31066443,DrugComb,0.991504312,DrugComb,0.991504312,,0,1,http://drugcomb.fimm.fi,301,,"(64.2273,27.7285)",http://web.archive.org/web/20221017000912/https://drugcomb.fimm.fi/,2019-07-01,"Institute for Molecular Medicine Finland, Helsinki Life Science Institute, University of Helsinki, Finland.","Zagidullin B, Aldahdooh J, Zheng S, Wang W, Wang Y, Saad J, Malyutina A, Jafari M, Tanoli Z, Pessia A, Tang J",,"European Research Council, European Commission, China Scholarship Council, European Research Council, Academy of Finland Research Fellow, Finland's EDUFI Fellowship",40.0,"Finland, Finland" +31096089,DrugR,0.989119887,DrugR,0.989119887,,0,1,http://www.drugr.ir,301,,"(45.5088,-73.5878)",http://web.archive.org/web/20200717021100/http://drugr.ir/,2019-05-08,"Laboratory of Systems Biology and Bioinformatics (LBB), Institute of Biochemistry and Biophysics, University of Tehran, Tehran, Iran.","Masoudi-Sobhanzadeh Y, Omidi Y, Amanlou M, Masoudi-Nejad A",,,13.0, +31349169,DEDuCT,0.989132156,DEDuCT,0.989132156,Database of Endocrine Disrupting Chemicals and their Toxicity profiles,0.964690409,1,http://cb.imsc.res.in/deduct,301,,"(13.0156,80.2467)",http://web.archive.org/web/20220331093710/https://cb.imsc.res.in/deduct/,2019-07-16,"The Institute of Mathematical Sciences (IMSc), Homi Bhabha National Institute (HBNI), Chennai 600113, India.","Karthikeyan BS, Ravichandran J, Mohanraj K, Vivek-Ananth RP, Samal A",,,6.0,India +31366898,Daphnia stressor database,0.718929927,Daphnia stressor database,0.718929927,,0,1,http://www.daphnia-stressordb.uni-hamburg.de/dsdbstart.php,200,,"(53.5507,9.9930)",http://web.archive.org/web/20221022071825/https://www.daphnia-stressordb.uni-hamburg.de/dsdbstart.php,2019-07-31,"Department of Marine Sciences, Tjärnö Marine Laboratory, University of Gothenburg, 452 96, Strömstad, Sweden.","Ravindran SP, Lüneburg J, Gottschlich L, Tams V, Cordellier M",,,3.0,Sweden +31390943,D-lnc,0.984383583,D-lnc,0.984383583,,0,1,http://www.jianglab.cn/D-lnc,302,,"(30.2936,120.1614)",http://web.archive.org/web/20220420063408/http://www.jianglab.cn/D-lnc/,2019-08-07,"College of Automation Engineering, Nanjing University of Aeronautics and Astronautics , Nanjing , China.","Jiang W, Qu Y, Yang Q, Ma X, Meng Q, Xu J, Liu X, Wang S",,,8.0,China +31409791,DRAMP,0.984241545,DRAMP,0.984241545,Data Repository of Antimicrobial Peptides,0.965962529,1,http://dramp.cpu-bioinfor.org,200,,"(22.2783,114.1747)",http://web.archive.org/web/20220621031215/http://dramp.cpu-bioinfor.org/,2019-08-13,"School of Life Science and Technology, China Pharmaceutical University, Nanjing, 211100, P.R. China.","Kang X, Dong F, Shi C, Liu S, Sun J, Chen J, Li H, Xu H, Lao X, Zheng H",,,74.0,"China, China" +31560645,DTM,0.971463203,DTM,0.971463203,Trends,0.880578995,1,http://www.cdc.gov/nccdphp/dnpao/data-trends-maps/index.html,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20221104020509/https://www.cdc.gov/nccdphp/dnpao/data-trends-maps/index.html,2019-09-26,"Oak Ridge Institute for Science and Education, Research Participation Program, Division of Nutrition, Physical Activity, and Obesity, Centers for Disease Control and Prevention, Atlanta, Georgia.","Lange SJ, Moore LV, Galuska DA",,,2.0,Georgia +31581093,DORMAN,0.996239364,DORMAN,0.996239364,Database of Reconstructed Metabolic Networks,0.906664733,1,http://ciceklab.cs.bilkent.edu.tr/dorman,301,,"(39.9199,32.8543)",no_wayback,2021-07-01,None,"Ozden F, Siper MC, Acarsoy N, Elmas T, Marty B, Qi X, Cicek AE",,Scientific and Technological Research Council of Turkey &#x2013; TUBITAK,0.0, +31593887,Distances of Amino Acids,0.922101881,,0,Distances of Amino Acids,0.922101881,1,http://andromeda.matf.bg.ac.rs/aadis_dynamic,404,,,no_wayback,2019-09-28,"Department of Computer Science, Faculty of Mathematics, University of Belgrade, Studentski trg 16, 11000, Belgrade, Serbia. Electronic address: mirjana@matf.bg.ac.rs.",Maljković MM,,"Ministarstvo Prosvete, Nauke i TehnoloÅ¡kog Razvoja",0.0,Serbia +31598709,DNMIVD,0.995371461,DNMIVD,0.995371461,DNA Methylation Interactive Visualization Database,0.965341255,1,http://www.unimd.org/dnmivd,301,,"(37.3394,-121.8950)",no_wayback,2020-01-01,"Center for Bioinformatics and Computational Biology, and the Institute of Biomedical Sciences, School of Life Sciences, East China Normal University, Shanghai 200241, China.","Ding W, Chen J, Feng G, Chen G, Wu J, Guo Y, Ni X, Shi T",,"National Science Foundation of China, 111 Project, National Science Foundation of China, National Science Foundation of China, Beihang University & Capital Medical University Advanced Innovation Center for Big Data-Based Precision Medicine Plan, China Human Proteome Project, China Human Proteome Project",29.0,"China, China" +31603498,dbInDel,0.994747579,dbInDel,0.994747579,,0,1,http://enhancer-indel.cam-su.org,200,,"(31.3041,120.5954)",http://web.archive.org/web/20210724050927/http://enhancer-indel.cam-su.org/,2020-03-01,"Department of Bioinformatics, School of Biology and Basic Medical Sciences.","Huang M, Wang Y, Yang M, Yan J, Yang H, Zhuang W, Xu Y, Koeffler HP, Lin DC, Chen X",,"Priority Academic Program Development of Jiangsu Higher Education Institutions, National Key R&D Program of China, National Natural Science Foundation of China",1.0, +31612957,DNAproDB,0.995879531,DNAproDB,0.995879531,,0,1,http://dnaprodb.usc.edu,302,,"(34.0522,-118.2437)",http://web.archive.org/web/20221010033732/https://dnaprodb.usc.edu/,2020-01-01,"Quantitative and Computational Biology, Departments of Biological Sciences, Chemistry, Physics and Astronomy, and Computer Science, University of Southern California, Los Angeles, CA 90089, USA.","Sagendorf JM, Markarian N, Berman HM, Rohs R",,"NIGMS NIH HHS, Rose Hills Foundation, NIGMS NIH HHS, Human Frontier Science Program, National Institutes of Health, NIGMS NIH HHS, National Institutes of Health, National Institutes of Health, National Institutes of Health, NHGRI NIH HHS, National Institutes of Health, NIGMS NIH HHS",16.0,United States +31641158,Diat.barcode,0.889230361,Diat.barcode,0.889230361,,0,1,http://www6.inra.fr/carrtel-collection_eng/Barcoding-database,302,,"(48.8534,2.3488)",no_wayback,2019-10-22,"INRA, UMR CARRTEL, 75bis av. de Corzent - CS 50511, FR-74203, Thonon les Bains cedex, France. frederic.rimet@inra.fr.","Rimet F, Gusev E, Kahlert M, Kelly MG, Kulikovskiy M, Maltsev Y, Mann DG, Pfannkuchen M, Trobajo R, Vasselon V, Zimmermann J, Bouchez A",,,6.0,France +31664080,dendPoint,0.994515121,dendPoint,0.994515121,,0,1,http://biosig.unimelb.edu.au/dendpoint,302,,"(-37.8140,144.9633)",http://web.archive.org/web/20220522033202/http://biosig.unimelb.edu.au/dendpoint/,2019-10-29,"School of Biomedical Sciences, University of Queensland, St Lucia, Queensland, Australia. l.kaminskas@uq.edu.au.","Kaminskas LM, Pires DEV, Ascher DB",,Medical Research Council,7.0,Australia +31665429,DrugCombDB,0.996946216,DrugCombDB,0.996946216,,0,1,http://drugcombdb.denglab.org,200,,"(22.5231,113.3791)",no_wayback,2020-01-01,"Lab of Information Management, Changzhou University, Changzhou 213164, China.","Liu H, Zhang W, Zou B, Wang J, Deng Y, Deng L",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",43.0,China +31691822,DrLLPS,0.997929573,DrLLPS,0.997929573,,0,1,http://llps.biocuckoo.cn,200,,"(22.2783,114.1747)",http://web.archive.org/web/20220706083507/http://llps.biocuckoo.cn/,2020-01-01,"Key Laboratory of Molecular Biophysics of Ministry of Education, Hubei Bioinformatics and Molecular Imaging Key Laboratory, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China.","Ning W, Guo Y, Lin S, Mei B, Wu Y, Jiang P, Tan X, Zhang W, Chen G, Peng D, Chu L, Xue Y",,"Fundamental Research Funds for the Central Universities, Natural Science Foundation of China, Changjiang Scholars Program of China, National Key Research and Development Program, Fundamental Research Funds for the Central Universities, National Key Research and Development Program, Natural Science Foundation of China, Natural Science Foundation of China, Natural Science Foundation of China, Natural Science Foundation of China",21.0,China +31713636,DisProt,0.995845914,DisProt,0.995845914,Database of Protein Disorder,0.837283194,1,http://disprot.org,301,,"(45.4080,11.8859)",http://web.archive.org/web/20221107180718/https://disprot.org/,2020-01-01,"Department of Biomedical Sciences, University of Padova, Padova 35121, Italy.","Hatos A, Hajdu-Soltész B, Monzon AM, Palopoli N, Álvarez L, Aykac-Fas B, Bassot C, Benítez GI, Bevilacqua M, Chasapi A, Chemes L, Davey NE, Davidović R, Dunker AK, Elofsson A, Gobeill J, Foutel NSG, Sudha G, Guharoy M, Horvath T, Iglesias V, Kajava AV, Kovacs OP, Lamb J, Lambrughi M, Lazar T, Leclercq JY, Leonardi E, Macedo-Ribeiro S, Macossay-Castillo M, Maiani E, Manso JA, Marino-Buslje C, Martínez-Pérez E, Mészáros B, Mičetić I, Minervini G, Murvai N, Necci M, Ouzounis CA, Pajkos M, Paladin L, Pancsa R, Papaleo E, Parisi G, Pasche E, Barbosa Pereira PJ, Promponas VJ, Pujols J, Quaglia F, Ruch P, Salvatore M, Schad E, Szabo B, Szaniszló T, Tamana S, Tantos A, Veljkovic N, Ventura S, Vranken W, Dosztányi Z, Tompa P, Tosatto SCE, Piovesan D",,"Hungarian Academy of Sciences, Hungarian Academy of Sciences, Horizon 2020, Research Foundation Flanders, Elixir-GR, ICREA, Mexican National Council of Science and Technology, Agencia Nacional de Promoción Científica y Tecnológica, European Regional Development Fund, Carlsberg Distinguished Fellowship, Italian Ministry of Health Young Investigator Grant, Hungarian Academy of Sciences, National Research, Development and Innovation Office, European Regional Development Fund, Fundação para a Ciência e a Tecnologia, VetenskapsrÃ¥det, Danmarks Grundforskningsfond, Hungarian National Research, Development, and Innovation Office, Ministry of Education, Science and Technological Development of the Republic of Serbia, Ministerio de Economía y Competitividad, Hungarian Academy of Sciences, Agencia Nacional de Promoción Científica y Tecnológica",84.0,Italy +32183712,CuAS,0.986856222,CuAS,0.986856222,,0,1,http://cmb.bnu.edu.cn/alt_iso/index.php,200,,"(39.9906,116.2887)",http://web.archive.org/web/20220620113727/http://cmb.bnu.edu.cn/alt_iso/index.php,2020-03-18,"MOE Key Laboratory for Biodiversity Science and Ecological Engineering and Beijing Key Laboratory of Gene Resource and Molecular Development, College of Life Sciences, Beijing Normal University, No 19 Xinjiekouwai Street, Beijing, 100875, China.","Sun Y, Zhang Q, Liu B, Lin K, Zhang Z, Pang E",,the National Natural Science Foundation of China,3.0,China +32227657,dbMTS,0.996018052,dbMTS,0.996018052,,0,1,http://database.liulab.science/dbMTS,200,,"(27.9475,-82.4584)",http://web.archive.org/web/20220528171424/http://database.liulab.science/dbMTS,2020-04-06,"USF Genomics, College of Public Health, University of South Florida, Tampa, Florida.","Li C, Mou C, Swartz MD, Yu B, Bai Y, Tu Y, Liu X",,"NHGRI NIH HHS, National Human Genome Research Institute",5.0, +32307725,DE-pattern,0.593041122,DE-pattern,0.593041122,,0,1,http://2de-pattern.pnpi.nrcki.ru,200,,"(59.5764,30.1283)",http://web.archive.org/web/20220401184824/http://2de-pattern.pnpi.nrcki.ru/,2020-04-27,"Orekhovich Institute of Biomedical Chemistry of Russian Academy of Medical Sciences, Moscow, Russia.","Naryzhny S, Klopov N, Ronzhina N, Zorina E, Zgoda V, Kleyst O, Belyakova N, Legina O",,,2.0, +32510549,DenvInD,0.996846437,DenvInD,0.996846437,,0,1,http://webs.iiitd.edu.in/raghava/denvind,301,,"(28.6453,77.2128)",http://web.archive.org/web/20210927043950/https://webs.iiitd.edu.in/raghava/denvind/,2021-05-01,None,"Dwivedi VD, Arya A, Yadav P, Kumar R, Kumar V, Raghava GPS",,,1.0, +32527280,DDIEM,0.995850682,DDIEM,0.995850682,Drug Database for Inborn Errors of Metabolism,0.948216963,1,http://ddiem.phenomebrowser.net,200,,"(37.5331,-122.2486)",http://web.archive.org/web/20221025100100/http://ddiem.phenomebrowser.net/,2020-06-11,"Computational Bioscience Research Center (CBRC), King Abdullah University of Science and Technology, 4700 KAUST, Thuwal, 23955, Kingdom of Saudi Arabia.","Abdelhakim M, McMurray E, Syed AR, Kafkas S, Kamau AA, Schofield PN, Hoehndorf R",,,2.0,Saudi Arabia +32597467,DrugSimDB,0.995589495,DrugSimDB,0.995589495,,0,1,http://vafaeelab.com/drugSimDB.html,200,,"(1.2897,103.8501)",http://web.archive.org/web/20200603125843/http://vafaeelab.com/drugSimDB.html,2021-05-01,bioinformatics and computational biology at UNSW Sydney.,"Azad AKM, Dinarvand M, Nematollahi A, Swift J, Lutze-Mann L, Vafaee F",,,3.0, +32632099,EBRAINS,0.80999589,EBRAINS,0.80999589,,0,1,http://kg.ebrains.eu,301,,"(47.3667,8.5500)",http://web.archive.org/web/20221101134354/https://kg.ebrains.eu/,2020-07-06,"Department of Molecular Medicine, Institute of Basic Medical Sciences, University of Oslo, Oslo, Norway.","Bjerke IE, Puchades MA, Bjaalie JG, Leergaard TB",,"Norges ForskningsrÃ¥d, Norges ForskningsrÃ¥d (Research Council of Norway), EC | Horizon 2020 Framework Programme (EU Framework Programme for Research and Innovation H2020), EC | Horizon 2020 Framework Programme",0.0,Norway +32646415,ECCParaCorp,0.992599487,ECCParaCorp,0.992599487,Physician Data Query,0.659761125,1,http://www.phoc.org.cn/ECCParaCorp,404,,,http://web.archive.org/web/20220525154929/http://www.phoc.org.cn/ECCParaCorp/,2020-07-09,"Institute of Medical Information/Library, Chinese Academy of Medical Sciences and Peking Union Medical College, Beijing, China.","Ma H, Yang F, Ren J, Li N, Dai M, Wang X, Fang A, Li J, Qian Q, He J",,,0.0,China +32941621,dbCAN-PUL,0.993980992,dbCAN-PUL,0.993980992,,0,1,http://bcb.unl.edu/dbCAN_PUL,302,,"(40.8000,-96.6670)",http://web.archive.org/web/20220616115643/https://bcb.unl.edu/dbCAN_PUL/,2021-01-01,"Department of Biological Sciences, Northern Illinois University, DeKalb, IL 60115, USA.","Ausland C, Zheng J, Yi H, Yang B, Li T, Feng X, Zheng B, Yin Y",,"United States Department of Agriculture, UNL, National Science Foundation",12.0,United States +32964659,EANPDB,0.993993628,EANPDB,0.993993628,Eastern Africa Natural Products Database,0.975782382,1,http://african-compounds.org,301,,"(47.9959,7.8522)",no_wayback,2020-10-08,"Institute of Pharmacy, Martin-Luther University of Halle-Wittenberg, Kurt-Mothes-Str. 3, 06120, Halle/Saale, Germany.","Simoben CV, Qaseem A, Moumbock AFA, Telukunta KK, Günther S, Sippl W, Ntie-Kang F",,,6.0,Germany +32976589,DIGGER,0.998090148,DIGGER,0.998090148,,0,1,http://exbio.wzw.tum.de/digger,302,,"(48.1374,11.5755)",http://web.archive.org/web/20220616022705/https://exbio.wzw.tum.de/digger/,2021-01-01,"Chair of Experimental Bioinformatics, TUM School of Life Sciences Weihenstephan, Technical University of Munich, 85354 Freising, Germany.","Louadi Z, Yuan K, Gress A, Tsoy O, Kalinina OV, Baumbach J, Kacprowski T, List M",,"Villum Fonden, Federal Ministry of Education and Research, VILLUM Young Investor, Horizon 2020",9.0,Germany +33007622,DINAX,0.989951134,DINAX,0.989951134,Database for Inherited Ataxia,0.747629498,1,http://slsdb.manipal.edu/dinax,"HTTPConnectionPool(host='slsdb.manipal.edu', port=80): Max retries exceeded with url: /dinax (Caused by ConnectTimeoutError(, 'Connection to slsdb.manipal.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210817145952/http://slsdb.manipal.edu/dinax/,2020-09-17,"Department of Cellular and Molecular Biology, Manipal School of Life Sciences, Manipal Academy of Higher Education, Manipal, 576104, India.","Chaudhari S, Naha R, Mukherjee S, Sharma A, Jayaram P, Mallya S, Chakrabarty S, Satyamoorthy K",,UKIERI,0.0,India +33035337,DockCoV2,0.975444973,DockCoV2,0.975444973,,0,1,http://covirus.cc/drugs,308,,"(24.0733,120.5628)",http://web.archive.org/web/20220812151951/https://covirus.cc/drugs/,2021-01-01,"Taiwan AI Labs, Taipei 10351, Taiwan.","Chen TF, Chang YC, Hsiao Y, Lee KH, Hsiao YC, Lin YH, Tu YE, Huang HC, Chen CY, Juan HF",,"Ministry of Science and Technology, Taiwan, Ministry of Science and Technology, Taiwan, Ministry of Science and Technology, Taiwan, Higher Education Sprout Project, Ministry of Science and Technology, Taiwan",14.0, +33051688,dbGuide,0.996610284,dbGuide,0.996610284,,0,1,http://sgrnascorer.cancer.gov/dbguide,"HTTPConnectionPool(host='sgrnascorer.cancer.gov', port=80): Max retries exceeded with url: /dbguide (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220521212127/https://sgrnascorer.cancer.gov/dbguide/,2021-01-01,"Laboratory Animal Sciences Program, Frederick National Lab for Cancer Research, Frederick, MD 21702, USA.","Gooden AA, Evans CN, Sheets TP, Clapp ME, Chari R",,Frederick National Laboratory for Cancer Research,5.0,United States +33053178,DNAmoreDB,0.992567003,DNAmoreDB,0.992567003,,0,1,http://www.genesilico.pl/DNAmoreDB,301,,"(52.2298,21.0118)",http://web.archive.org/web/20220519122026/https://www.genesilico.pl/DNAmoreDB/,2021-01-01,"Laboratory of Bioinformatics and Protein Engineering, International Institute of Molecular and Cell Biology in Warsaw, ul. Ks. Trojdena 4, PL-02-109 Warsaw, Poland.","Ponce-Salvatierra A, Boccaletto P, Bujnicki JM",,"National Science Centre, Poland, National Science Centre, Poland",4.0,Poland +33079988,DKK,0.987769703,DKK,0.987769703,Dark Kinase Knowledgebase,0.946007538,1,http://darkkinome.org,301,,"(35.9132,-79.0558)",http://web.archive.org/web/20221017013854/https://darkkinome.org/,2021-01-01,"Department of Pharmacology, University of North Carolina at Chapel Hill, Chapel Hill, NC 27599, USA.","Berginski ME, Moret N, Liu C, Goldfarb D, Sorger PK, Gomez SM",,"National Institutes of Health, NIDDK NIH HHS",11.0,United States +33084904,DualSeqDB,0.997340143,DualSeqDB,0.997340143,,0,1,http://www.tartaglialab.com/dualseq,301,,"(53.3331,-6.2489)",http://web.archive.org/web/20220616150855/http://www.tartaglialab.com/dualseq/,2021-01-01,"Systems Biology of Infection Lab, Department of Biochemistry and Molecular Biology, Biosciences Faculty, Universitat Autònoma de Barcelona, 08193 Cerdanyola del Vallès, Spain.","Macho Rendón J, Lang B, Ramos Llorens M, Gaetano Tartaglia G, Torrent Burgas M",,European Research Council,3.0,Spain +33104791,DrugSpaceX,0.995280385,DrugSpaceX,0.995280385,,0,1,http://drugspacex.simm.ac.cn,301,,"(31.2222,121.4581)",http://web.archive.org/web/20220501202940/https://drugspacex.simm.ac.cn/,2021-01-01,"Drug Discovery and Design Center, State Key Laboratory of Drug Research, Shanghai Institute of Materia Medica, Chinese Academy of Sciences, 555 Zuchongzhi Road, Shanghai 201203, China.","Yang T, Li Z, Chen Y, Feng D, Wang G, Fu Z, Ding X, Tan X, Zhao J, Luo X, Chen K, Jiang H, Zheng M",,"National Science & Technology, National Natural Science Foundation of China, Chinese Academy of Sciences",2.0,China +33119734,DescribePROT,0.997794986,DescribePROT,0.997794986,,0,1,http://biomine.cs.vcu.edu/servers/DESCRIBEPROT,301,,"(37.5538,-77.4603)",http://web.archive.org/web/20220512093900/http://biomine.cs.vcu.edu/servers/DESCRIBEPROT/,2021-01-01,"Department of Computer Science, Virginia Commonwealth University, Richmond, VA, USA.","Zhao B, Katuwawala A, Oldfield CJ, Dunker AK, Faraggi E, Gsponer J, Kloczkowski A, Malhis N, Mirdita M, Obradovic Z, Söding J, Steinegger M, Zhou Y, Kurgan L",,"NIGMS NIH HHS, Robert J. Mattauch Endowment, National Science Foundation, National Science Foundation, National Institutes of Health",5.0,United States +33174603,Datanator,0.994000673,Datanator,0.994000673,,0,1,http://datanator.info,301,,"(45.5946,-121.1787)",http://web.archive.org/web/20221107181904/https://datanator.info/,2021-01-01,"Icahn Institute for Data Science and Genomic Technology and Department of Genetics and Genomic Sciences, Icahn School of Medicine at Mount Sinai, 1255 5th Avenue, Suite C2, New York, NY 10029, USA.","Roth YD, Lian Z, Pochiraju S, Shaikh B, Karr JR",,"NIGMS NIH HHS, National Institutes of Health, Icahn Institute of Data Science and Genomic Technology, National Science Foundation, National Institutes of Health, NIBIB NIH HHS",3.0,United States +33196844,dbCNS,0.997333527,dbCNS,0.997333527,,0,1,http://yamasati.nig.ac.jp/dbcns,301,,"(35.1167,138.9167)",http://web.archive.org/web/20220128111657/http://yamasati.nig.ac.jp/dbcns/,2021-04-01,"Population Genetics Laboratory, Department of Genomics and Evolutionary Biology, National Institute of Genetics, Mishima, Japan.","Inoue J, Saitou N",,"Japan Society for the Promotion of Science, Grants-in-Aid for Scientific Research",0.0,Japan +33216893,DPL,0.99479425,DPL,0.99479425,database of,0.562851697,1,http://www.peptide-ligand.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220802095741/http://www.peptide-ligand.cn/,2020-11-01,"Henan Key Laboratory of Animal Immunology, Henan Academy of Agricultural Sciences, 116# Huayuan Road, Zhengzhou, Henan Province, 450002, China.","Wang F, Li N, Wang C, Xing G, Cao S, Xu Q, Zhang Y, Hu M, Zhang G",,,0.0,China +33276297,DBCOVP,0.997579992,DBCOVP,0.997579992,,0,1,http://covp.immt.res.in,200,,"(20.2724,85.8338)",http://web.archive.org/web/20220617155516/http://covp.immt.res.in/,2020-11-21,"School of Biotechnology, Kalinga Institute of Industrial Technology (KIIT), Deemed to be University, Bhubaneswar, Odisha, India.","Sahoo S, Mahapatra SR, Parida BK, Rath S, Dehury B, Raina V, Mohakud NK, Misra N, Suar M",,"National Institute of Allergy and Infectious Diseases, CSIR-Institute of Minerals and Materials Technology (CSIR-IMMT), Bhubaneswar, School of Biotechnology, Kalinga Institute of Industrial Technology, Deemed to be University, Bhubaneswar",5.0,India +33320930,CyanoPATH,0.997585058,CyanoPATH,0.997585058,,0,1,http://www.csbg-jlu.info/CyanoPATH,301,,"(39.9075,116.3972)",no_wayback,2021-07-01,"Jilin University, China.","Du W, Li G, Ho N, Jenkins L, Hockaday D, Tan J, Cao H",,"National Natural Science Foundation of China, Natural Science Foundation of Jilin Province",0.0,China +33331653,DATAMAN,0.970632815,DATAMAN,0.970632815,,0,1,http://www.dataman.co.nz,301,,"(-37.8140,144.9633)",http://web.archive.org/web/20220518124926/http://dataman.co.nz/,2021-01-22,"Instituto de Investigaciones Agropecuarias (INIA), INIA Remehue, Carretera Panamericana Sur km. 8 Norte, Osorno, Chile.","Beltran I, van der Weerden TJ, Alfaro MA, Amon B, de Klein CAM, Grace P, Hafner S, Hassouna M, Hutchings N, Krol DJ, Leytem AB, Noble A, Salazar F, Thorman RE, Velthof GL",,,1.0,Chile +33382035,DIPPER,0.9955585,DIPPER,0.9955585,,0,1,http://www.sbms.hku.hk/dclab/DIPPER,301,,"(22.2783,114.1747)",http://web.archive.org/web/20220802113308/https://www.sbms.hku.hk/dclab/DIPPER/,2020-12-31,"School of Biomedical Sciences,, The University of Hong Kong, Hong Kong.","Tam V, Chen P, Yee A, Solis N, Klein T, Kudelko M, Sharma R, Chan WC, Overall CM, Haglund L, Sham PC, Cheah KSE, Chan D",,"Ministry of Science and Technology of the People's Republic of China, Canadian Institutes of Health Research, Research Grants Council, University Grants Committee, Research Grants Council, University Grants Committee, Research Grants Council, University Grants Committee",13.0,"Hong Kong, Hong Kong" +33426407,DSSTox,0.9958359,DSSTox,0.9958359,Toxicity,0.549814343,1,http://comptox.epa.gov/dashboard,302,,"(38.8951,-77.0364)",http://web.archive.org/web/20221103165343/https://comptox.epa.gov/dashboard/,2019-11-01,"National Center for Computational Toxicology, Office of Research & Development, US Environmental Protection Agency, Mail Drop D143-02, Research Triangle Park, NC 27711, USA.","Grulke CM, Williams AJ, Thillanadarajah I, Richard AM",,,12.0,United States +33641184,DbStRiPs,0.990017354,DbStRiPs,0.990017354,Database of Structural Repeats,0.719607194,1,http://bioinf.iiit.ac.in/dbstrips,301,,"(17.3840,78.4564)",no_wayback,2021-03-06,"Centre for Computational Natural Sciences and Bioinformatics, International Institute of Information Technology, Hyderabad, India.","Chakrabarty B, Parekh N",,,2.0,India +33787872,Drugmonizome,0.990401685,Drugmonizome,0.990401685,,0,1,http://maayanlab.cloud/drugmonizome,308,,"(39.0437,-77.4875)",http://web.archive.org/web/20210922105623/https://maayanlab.cloud/drugmonizome/,2021-03-01,"Department of Pharmacological Sciences; Mount Sinai Center for Bioinformatics; Big Data to Knowledge, Library of Integrated Network-Based Cellular Signatures, Data Coordination and Integration Center (BD2K-LINCS DCIC); Knowledge Management Center for Illuminating the Druggable Genome (KMC-IDG); Icahn School of Medicine at Mount Sinai, 1 Gustave L. Levy Place, Box 1603, New York, NY 10029, USA.","Kropiwnicki E, Evangelista JE, Stein DJ, Clarke DJB, Lachmann A, Kuleshov MV, Jeon M, Jagodnik KM, Ma'ayan A",,"National Institutes of Health, NIDDK NIH HHS, NCI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS",3.0,United States +33938221,D3DistalMutation,0.934222619,D3DistalMutation,0.934222619,,0,1,http://www.d3pharma.com/D3DistalMutation/index.php,200,,"(36.6683,116.9972)",no_wayback,2021-05-02,"CAS Key Laboratory of Receptor Research; Drug Discovery and Design Center, Shanghai Institute of Materia Medica, Chinese Academy of Sciences, Shanghai 201203, China.","Wang X, Zhang X, Peng C, Shi Y, Li H, Xu Z, Zhu W",,"Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China",1.0,China +33995899,DRscDB,0.997646034,DRscDB,0.997646034,,0,1,http://www.flyrnai.org/tools/single_cell/web,301,,"(42.3584,-71.0598)",http://web.archive.org/web/20220831211554/https://www.flyrnai.org/tools/single_cell/web/,2021-04-11,"Department of Genetics, Blavatnik Institute, Harvard Medical School, 77 Avenue Louis Pasteur, Boston, MA 02115, USA.","Hu Y, Tattikota SG, Liu Y, Comjean A, Gao Y, Forman C, Kim G, Rodiger J, Papatheodorou I, Dos Santos G, Mohr SE, Perrimon N",,"Howard Hughes Medical Institute, NIH, NIGMS NIH HHS, National Institute of General Medical Sciences, NHGRI NIH HHS",2.0,United States +34015403,Ebolabase,0.985048413,Ebolabase,0.985048413,,0,1,http://ebola.bicpu.edu.in,"HTTPConnectionPool(host='ebola.bicpu.edu.in', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='ebola.bicpu.edu.in', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20200805041812/http://ebola.bicpu.edu.in/,2021-05-17,"Centre for Bioinformatics, Pondicherry University, Puducherry-605014, India.","Muthaiyan M, Naorem LD, Seenappa V, Pushan SS, Venkatesan A",,"Council of Scientific and Industrial Research, India",0.0,India +34097004,DevOmics,0.997623146,DevOmics,0.997623146,,0,1,http://devomics.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20211201151531/http://devomics.cn/,2021-11-01,"Center for Reproductive Medicine, Department of Obstetrics and Gynecology, Peking University Third Hospital, Beijing 100191, China.","Yan Z, An J, Peng Y, Kong S, Liu Q, Yang M, He Q, Song S, Chen Y, Chen W, Li R, Qiao J, Yan L",,"National Key Research and Development Program, National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program",1.0,China +34256256,CytomegaloVirusDb,0.857445955,CytomegaloVirusDb,0.857445955,,0,1,http://www.cmvdb.dqweilab-sjtu.com/index.php,301,,"(60.3540,24.9794)",no_wayback,2021-06-09,"Department of Bioinformatics and Biological Statistics, School of Life Sciences and Biotechnology, Shanghai Jiao Tong University, Shanghai, 200240, PR China.","Khan T, Khan A, Nasir SN, Ahmad S, Ali SS, Wei DQ",,NSFC,5.0,China +34314366,dbMCS,0.993598044,dbMCS,0.993598044,for anti-Cancer drug Sensitivity,0.694444135,1,http://bioinfo.aielab.cc/dbMCS,302,,"(22.2783,114.1747)",no_wayback,2021-11-05,None,"Shen Y, Zhang Y, Xue W, Yue Z",,"Natural Science Young Foundation of Anhui, Natural Science Young Foundation of Anhui Agricultural University, Introduction and Stabilization of Talent Project of Anhui Agricultural University, National Key Research and Development, Graduate Innovation Fund of Anhui Agricultural University",0.0, +34405389,DoWLS-MAN,0.988136002,DoWLS-MAN,0.988136002,Database of Word-Level Statistics for Mandarin Chinese,0.950584922,1,http://dowls.site,406,,,http://web.archive.org/web/20220419042937/https://dowls.site/,2021-08-17,"Department of English (E21-1060), University of Macau, Avenida da Universidade, Taipa, Macau, S.A.R, China. karlneergaard@gmail.com.","Neergaard KD, Xu H, German JS, Huang CR",,,0.0,China +34774049,DREAM,0.993100206,DREAM,0.993100206,Drug Response Gene Expression Associated Map,0.914302438,1,http://bio-big-data.cn:8080/DREAM,302,,,http://web.archive.org/web/20220623024753/http://bio-big-data.cn:8080/DREAM/,2021-11-13,"Department of Neurosurgery, the Second Affiliated Hospital of Harbin Medical University, Neuroscience Institute, Heilongjiang Academy of Medical Sciences, Harbin, 150086, China.","Li S, Li L, Meng X, Sun P, Liu Y, Song Y, Zhang S, Jiang C, Cai J, Zhao Z",,"the research project of the health and family planning commission of heilongjiang province, karolinska institutet research foundation grants 2020-2021, excellent young talents project of central government supporting local university reform and development fund, postdoctoral research foundation of china, postdoctoral research foundation of china, national natural science foundation of china, national natural science foundation of china, heilongjiang provincial postdoctoral science foundation, national natural science foundation of china, National College Students Innovation and Entrepreneurship Training Program, national natural science foundation of china",1.0,China +34782688,CyFi-MAP,0.988857135,CyFi-MAP,0.988857135,,0,1,http://cysticfibrosismap.github.io,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20220125065314/https://cysticfibrosismap.github.io/,2021-11-15,"Faculty of Sciences, BioISI-Biosystems Integrative Sciences Institute, University of Lisboa, Campo Grande, 1749-016, Lisbon, Portugal.","Pereira C, Mazein A, Farinha CM, Gray MA, Kunzelmann K, Ostaszewski M, Balaur I, Amaral MD, Falcao AO",,"Fundação para a Ciência e a Tecnologia, Cystic Fibrosis Trust, Innovative Medicines Initiative, Fundação para a Ciência e a Tecnologia, Seventh Framework Programme",0.0,Portugal +34791106,database of cancer mutant protein domains,0.897191525,,0,database of cancer mutant protein domains,0.897191525,1,http://dcmp.vit.ac.in,200,,"(12.9184,79.1325)",http://web.archive.org/web/20211206174937/http://dcmp.vit.ac.in/,2021-11-01,"Bioinformatics Programming Lab, Department of Biotechnology, School of Bio Sciences and Technology, Vellore Institute of Technology, Vellore, TN 632 014, India.","Emerson IA, Chitluri KK",,Science and Engineering Research Board,0.0,India +34803258,DaiCee,0.974195957,DaiCee,0.974195957,,0,1,http://www.hccbif.org/usersearch.php,200,,"(33.4484,-112.0740)",http://web.archive.org/web/20220617044438/http://www.hccbif.org/usersearch.php,2020-11-30,"DBT-BIF Centre, PG & Research Department of Biotechnology & Bioinformatics, Holy Cross College (Autonomous) (Affiliated to Bharathidasan University), Trichy, Tamilnadu, India.","Rajalakshmi M, Suveena S, Vijayalakshmia P, Indu S, Roy A, Ludas A",,,0.0,India +35424427,DiaNat-DB,0.974225625,DiaNat-DB,0.974225625,,0,1,http://rdu.iquimica.unam.mx/handle/20.500.12214/1186,200,,"(19.4285,-99.1277)",no_wayback,2021-01-28,"Instituto de Química, Universidad Nacional Autónoma de México Mexico City 04510 Mexico amadariaga@iquimica.unam.mx kmtzm@unam.mx +52 55 56224770 ext. 46614.","Madariaga-Mazón A, Naveja JJ, Medina-Franco JL, Noriega-Colima KO, Martinez-Mayorga K",,"Dirección General de Asuntos del Personal Académico, Universidad Nacional Autónoma de México",0.0,"Mexico, Mexico" +21177656,Gene Expression Barcode,0.718592152,Gene Expression Barcode,0.718592152,,0,1,http://rafalab.jhsph.edu/barcode,301,,"(39.3009,-76.5799)",http://web.archive.org/web/20150828033826/http://rafalab.jhsph.edu:80/barcode/,2011-01-01,"Department of Biostatistics, Johns Hopkins University Bloomberg School of Public Health, 615 N Wolfe Street, Baltimore, MD 21205, USA.","McCall MN, Uppal K, Jaffee HA, Zilliox MJ, Irizarry RA",,"NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NCRR NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCRR NIH HHS",114.0,United States +21300622,FunSecKB,0.997838616,FunSecKB,0.997838616,Fungal Secretome KnowledgeBase,0.966365695,1,http://proteomics.ysu.edu/secretomes/fungi.php,200,,"(41.0998,-80.6495)",http://web.archive.org/web/20220219044145/http://proteomics.ysu.edu/secretomes/fungi.php,2011-02-06,"Department of Computer Science and Information Systems, Center for Applied Chemical Biology, Youngstown State University, Youngstown, OH 44555, USA.","Lum G, Min XJ",,,33.0,United States +21317139,eGOB,0.991934419,eGOB,0.991934419,eukaryotic Gene Order Browser,0.86592653,1,http://egob.biomedicine.gu.se,200,,"(57.7072,11.9668)",http://web.archive.org/web/20210513220345/http://egob.biomedicine.gu.se/,2011-02-10,"Department of Medical Biochemistry and Cell Biology, Institute of Biomedicine, Sahlgrenska Academy at University of Gothenburg, SE-405 30 Göteborg, Sweden.","López MD, Samuelsson T",,,7.0,Sweden +21541042,FurinDB,0.997923672,FurinDB,0.997923672,,0,1,http://www.nuolan.net/substrates.html,405,,,http://web.archive.org/web/20211030194326/http://www.nuolan.net/substrates.html,2011-02-08,"Institute of Biomechanics, School of Bioscience and Bioengineering, South China University of Technology, Guangzhou 510006, China; E-Mails: huangqqss@163.com (Q.H.); yfang@scut.edu (Y.F.).","Tian S, Huang Q, Fang Y, Wu J",,,48.0,"China, China" +21762488,EuroPineDB,0.993739247,EuroPineDB,0.993739247,,0,1,http://www.scbi.uma.es/pindb,301,,"(36.7202,-4.4203)",http://web.archive.org/web/20200813002433/http://www.scbi.uma.es/pindb/,2011-07-15,"Departamento de Biología Molecular y Bioquímica, Facultad de Ciencias, Campus de Teatinos s/n, Universidad de Málaga, 29071 Málaga, Spain.","Fernández-Pozo N, Canales J, Guerrero-Fernández D, Villalobos DP, Díaz-Moreno SM, Bautista R, Flores-Monterroso A, Guevara MÁ, Perdiguero P, Collada C, Cervera MT, Soto A, Ordás R, Cantón FR, Avila C, Cánovas FM, Claros MG",,,28.0,Spain +21803806,Gee Fu,0.952340484,Gee Fu,0.952340484,,0,1,http://tinyurl.com/geefu,308,,"(37.7621,-122.3971)",no_wayback,2011-07-29,"The Genome Analysis Centre, Norwich Research Park, Colney Lane, Norwich, UK, NR4 7UH.","Ramirez-Gonzalez R, Caccamo M, MacLean D",,Biotechnology and Biological Sciences Research Council,1.0, +21994220,FlyExpress,0.970165312,FlyExpress,0.970165312,,0,1,http://www.flyexpress.net,200,,"(39.9524,-75.1636)",http://web.archive.org/web/20221102181536/http://flyexpress.net/,2011-10-12,"Center for Evolutionary Medicine and Informatics, Biodesign Institute, Arizona State University, Tempe, AZ 85287, USA. s.kumar@asu.edu","Kumar S, Konikoff C, Van Emden B, Busick C, Davis KT, Ji S, Wu LW, Ramos H, Brody T, Panchanathan S, Ye J, Karr TL, Gerold K, McCutchan M, Newfeld SJ",,"NIGMS NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",29.0,United States +22064857,FungiDB,0.997218907,FungiDB,0.997218907,,0,1,"http://FungiDB.org, http://EuPathDB.org","301, 301",,"(33.9609,-83.3779), (33.9609,-83.3779)","no_wayback, http://web.archive.org/web/20221020084858/http://www.eupathdb.org/",2011-11-07,"Department of Plant Pathology & Microbiology, University of California, Riverside, CA 92521, USA. jason.stajich@ucr.edu","Stajich JE, Harris T, Brunk BP, Brestelli J, Fischer S, Harb OS, Kissinger JC, Li W, Nayak V, Pinney DF, Stoeckert CJ Jr, Roos DS",,,167.0,United States +"22080548, 23203883, 24214989, 26615190, 27899630, 29140475, 30395270, 31722421, 33175160",ENA,0.991740763,ENA,0.991740763,European Nucleotide Archive,0.941507971,9,http://www.ebi.ac.uk/ena,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20200804114054/https://www.ebi.ac.uk/ena,2021-01-01,"European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. amid@ebi.ac.uk, EMBL - European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. cochrane@ebi.ac.uk, European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridge, CB10 1SD, UK richardg@ebi.ac.uk., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK anat@ebi.ac.uk., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge, CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Amid C, Birney E, Bower L, Cerdeño-Tárraga A, Cheng Y, Cleland I, Faruque N, Gibson R, Goodgame N, Hunter C, Jang M, Leinonen R, Liu X, Oisel A, Pakseresht N, Plaister S, Radhakrishnan R, Reddy K, Rivière S, Rossello M, Senf A, Smirnov D, Ten Hoopen P, Vaughan D, Vaughan R, Zalunin V, Cochrane G, Cochrane G, Alako B, Amid C, Bower L, Cerdeño-Tárraga A, Cleland I, Gibson R, Goodgame N, Jang M, Kay S, Leinonen R, Lin X, Lopez R, McWilliam H, Oisel A, Pakseresht N, Pallreddy S, Park Y, Plaister S, Radhakrishnan R, Rivière S, Rossello M, Senf A, Silvester N, Smirnov D, Ten Hoopen P, Toribio A, Vaughan D, Zalunin V, Pakseresht N, Alako B, Amid C, Cerdeño-Tárraga A, Cleland I, Gibson R, Goodgame N, Gur T, Jang M, Kay S, Leinonen R, Li W, Liu X, Lopez R, McWilliam H, Oisel A, Pallreddy S, Plaister S, Radhakrishnan R, Rivière S, Rossello M, Senf A, Silvester N, Smirnov D, Squizzato S, ten Hoopen P, Toribio AL, Vaughan D, Zalunin V, Cochrane G, Gibson R, Alako B, Amid C, Cerdeño-Tárraga A, Cleland I, Goodgame N, Ten Hoopen P, Jayathilaka S, Kay S, Leinonen R, Liu X, Pallreddy S, Pakseresht N, Rajan J, Rosselló M, Silvester N, Smirnov D, Toribio AL, Vaughan D, Zalunin V, Cochrane G, Toribio AL, Alako B, Amid C, Cerdeño-Tarrága A, Clarke L, Cleland I, Fairley S, Gibson R, Goodgame N, Ten Hoopen P, Jayathilaka S, Kay S, Leinonen R, Liu X, Martínez-Villacorta J, Pakseresht N, Rajan J, Reddy K, Rosello M, Silvester N, Smirnov D, Vaughan D, Zalunin V, Cochrane G, Silvester N, Alako B, Amid C, Cerdeño-Tarrága A, Clarke L, Cleland I, Harrison PW, Jayathilaka S, Kay S, Keane T, Leinonen R, Liu X, Martínez-Villacorta J, Menchi M, Reddy K, Pakseresht N, Rajan J, Rossello M, Smirnov D, Toribio AL, Vaughan D, Zalunin V, Cochrane G, Harrison PW, Alako B, Amid C, Cerdeño-Tárraga A, Cleland I, Holt S, Hussein A, Jayathilaka S, Kay S, Keane T, Leinonen R, Liu X, Martínez-Villacorta J, Milano A, Pakseresht N, Rajan J, Reddy K, Richards E, Rosello M, Silvester N, Smirnov D, Toribio AL, Vijayaraja S, Cochrane G, Amid C, Alako BTF, Balavenkataraman Kadhirvelu V, Burdett T, Burgin J, Fan J, Harrison PW, Holt S, Hussein A, Ivanov E, Jayathilaka S, Kay S, Keane T, Leinonen R, Liu X, Martinez-Villacorta J, Milano A, Pakseresht A, Rahman N, Rajan J, Reddy K, Richards E, Smirnov D, Sokolov A, Vijayaraja S, Cochrane G, Harrison PW, Ahamed A, Aslam R, Alako BTF, Burgin J, Buso N, Courtot M, Fan J, Gupta D, Haseeb M, Holt S, Ibrahim T, Ivanov E, Jayathilaka S, Balavenkataraman Kadhirvelu V, Kumar M, Lopez R, Kay S, Leinonen R, Liu X, O'Cathail C, Pakseresht A, Park Y, Pesant S, Rahman N, Rajan J, Sokolov A, Vijayaraja S, Waheed Z, Zyoud A, Burdett T, Cochrane G",", , , , , , , , ","Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, ELIXIR-EXCELERATE, Horizon 2020, EMBRIC, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Horizon 2020, Horizon 2020, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Horizon 2020, Horizon 2020, Horizon 2020, Horizon 2020, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, The Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, The Biological Sciences Research Council, ELIXIR, Horizon 2020, Horizon 2020, Biotechnology and Biological Sciences Research Council, The Biological Sciences Research Council, The Biological Sciences Research Council, Wellcome Trust, Horizon 2020, Biotechnology and Biological Sciences Research Council, Gordon and Betty Moore Foundation, The Biological Sciences Research Council, European Molecular Biology Laboratory, European Union, European Union, European Union, Biotechnology and Biological Sciences Research Council, Biological Sciences Research Council, Biological Sciences Research Council, Biological Sciences Research Council, Wellcome Trust, Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biological Sciences Research Council, Biological Sciences Research Council, European Union, European Union, European Union, European Union, Biotechnology and Biological Sciences Research Council, Biological Sciences Research Council, Wellcome Trust, European Union, European Union, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, European Molecular Biology Laboratory, Wellcome Trust, European Union, European Union, Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",291.0, +22080549,GeneWeaver,0.980351031,GeneWeaver,0.980351031,,0,1,http://www.GeneWeaver.org,302,,"(44.3876,-68.2039)",http://web.archive.org/web/20220922002810/https://www.geneweaver.org/,2011-11-12,"School of Engineering & Computer Science, Baylor University, Waco, TX 76798, USA.","Baker EJ, Jay JJ, Bubier JA, Langston MA, Chesler EJ",,"NIAAA NIH HHS, NIAAA NIH HHS, NIAAA NIH HHS",64.0,United States +"22096231, 24297252, 26582926, 30418610",eggNOG,0.992302001,eggNOG,0.992302001,,0,4,http://eggnog.embl.de,302,,"(49.4095,8.6935)",http://web.archive.org/web/20111117034408/http://eggnog.embl.de:80/,2019-01-01,"European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany., European Molecular Biology Laboratory, Computational Biology Unit, Meyerhofstrasse 1, 69117 Heidelberg, Germany, University of Zurich and Swiss Institute of Bioinformatics, Institute of Molecular Life Sciences, Winterthurerstrasse 190, 8057 Zurich, Switzerland, Institute for Systems Biology, 401 Terry Avenue North, Seattle, WA 98109-5234, USA, Bioinformatics and Genomics Programme, Centre for Genomic Regulation (CRG), C/Dr. Aiguader 88, 08003 Barcelona, Spain, Universitat Pompeu Fabra (UPF), 08003 Barcelona, Spain, CUBE-Division of Computational Systems Biology, Department of Microbiology and Ecosystem Science, University of Vienna, Althanstraße 14, 1090 Vienna, Austria, Institute of Biological, Environmental & Rural Sciences, Aberystwyth University, Penglais, Aberystwyth, Ceredigion, SY23 3FG, UK, Biotechnology Center, TU Dresden, 01062 Dresden, Germany, Novo Nordisk Foundation Center for Protein Research, Faculty of Health Sciences, University of Copenhagen, 2200, Copenhagen N, Denmark and Max-Delbrück-Centre for Molecular Medicine, Robert-Rössle-Strasse 10, 13092 Berlin, Germany., Structural and Computational Biology Unit, European Molecular Biology Laboratory, Heidelberg, Germany., Structural and Computational Biology Unit, European Molecular Biology Laboratory, Heidelberg, Germany.","Powell S, Szklarczyk D, Trachana K, Roth A, Kuhn M, Muller J, Arnold R, Rattei T, Letunic I, Doerks T, Jensen LJ, von Mering C, Bork P, Powell S, Forslund K, Szklarczyk D, Trachana K, Roth A, Huerta-Cepas J, Gabaldón T, Rattei T, Creevey C, Kuhn M, Jensen LJ, von Mering C, Bork P, Huerta-Cepas J, Szklarczyk D, Forslund K, Cook H, Heller D, Walter MC, Rattei T, Mende DR, Sunagawa S, Kuhn M, Jensen LJ, von Mering C, Bork P, Huerta-Cepas J, Szklarczyk D, Heller D, Hernández-Plaza A, Forslund SK, Cook H, Mende DR, Letunic I, Rattei T, Jensen LJ, von Mering C, Bork P",", , , ",", European Research Council, Novo Nordisk Foundation Center for Protein Research, Biotechnology and Biological Sciences Research Council, European Research Council, Novo Nordisk Foundation Center for Protein Research, Bundesministerium für Bildung und Forschung, Horizon 2020, Swiss National Science Foundation, Swiss National Science Foundation, Novo Nordisk Foundation, Fondo Social Europeo, Novo Nordisk Foundation Center for Protein Research, Ramón y Cajal Programme",2093.0,"Austria, Switzerland, Germany, Germany, Germany, Germany, Germany, Germany, Denmark, Spain, Spain, United States" +22102771,EuDBase,0.996863604,EuDBase,0.996863604,,0,1,http://www.inbiosis.ukm.my/eudbase,404,,,http://web.archive.org/web/20100324185135/http://www.inbiosis.ukm.my:80/eudbase/,2011-10-14,None,"Hussein ZA, Loke KK, Abidin RA, Othman R",,,1.0, +22110038,GeneSigDB,0.998035491,GeneSigDB,0.998035491,,0,1,"http://www.genesigdb.org, http://compbio.dfci.harvard.edu/genesigdb","302, HTTPConnectionPool(host='compbio.dfci.harvard.edu', port=80): Max retries exceeded with url: /genesigdb (Caused by ConnectTimeoutError(, 'Connection to compbio.dfci.harvard.edu timed out. (connect timeout=5)'))",,"(42.8865,-78.8784), ","http://web.archive.org/web/20200921085945/https://genesigdb.org/, http://web.archive.org/web/20141219203615/http://compbio.dfci.harvard.edu/genesigdb/",2011-11-21,"Biostatistics and Computational Biology, Dana-Farber Cancer Institute, 450 Brookline Avenue, Boston, MA 02215, USA. aedin@jimmy.harvard.edu","Culhane AC, Schröder MS, Sultana R, Picard SC, Martinelli EN, Kelly C, Haibe-Kains B, Kapushesky M, St Pierre AA, Flahive W, Picard KC, Gusenleitner D, Papenhausen G, O'Connor N, Correll M, Quackenbush J",,"NLM NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NHGRI NIH HHS",68.0,United States +"22110040, 24214962, 26615199, 31680160",ELM,0.994939625,ELM,0.994939625,eukaryotic linear motif,0.945976029,4,http://elm.eu.org,200,,"(49.4095,8.6935)",http://web.archive.org/web/20220819185927/http://elm.eu.org/,2020-01-01,"Structural and Computational Biology, European Molecular Biology Laboratory, Heidelberg, Germany., Structural and Computational Biology, European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany, Department of Physiology, University of California, San Francisco, 600 16th Street, San Francisco, CA 94158, USA, Structural Studies Division, MRC, Laboratory of Molecular Biology, Francis Crick Avenue, Cambridge Biomedical Campus, Cambridge CB2 0QH, UK, Ruprecht-Karls-Universität, 69117 Heidelberg, Germany, School of Biology and Environmental Science, University College Dublin, Belfield, Dublin 4, Co. Dublin, Republic of Ireland, Laboratory of Bioinformatics and Biostatistics, Maria Sklodowska-Curie Memorial Cancer Center and Institute of Oncology, WK Roentgena 5, 02-781 Warsaw, Poland, Protein Structure-Function and Engineering Laboratory, Fundación Instituto Leloir and Instituto de Investigaciones Bioquímicas de Buenos Aires-Consejo Nacional de Investigaciones Científicas y Técnicas Avenida Patricias Argentinas 435 CP 1405 Buenos Aires, Argentina and Departamento de Química Biológica and IQUIBICEN-CONICET, Facultad de Ciencias Exactas y Naturales, Universidad de Buenos Aires, Intendente Gúiraldes 2160 CP 1428, Argentina., Structural and Computational Biology, European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany., Structural and Computational Biology Unit, European Molecular Biology Laboratory, Heidelberg 69117, Germany.","Dinkel H, Michael S, Weatheritt RJ, Davey NE, Van Roey K, Altenberg B, Toedt G, Uyar B, Seiler M, Budd A, Jödicke L, Dammert MA, Schroeter C, Hammer M, Schmidt T, Jehl P, McGuigan C, Dymecka M, Chica C, Luck K, Via A, Chatr-Aryamontri A, Haslam N, Grebnev G, Edwards RJ, Steinmetz MO, Meiselbach H, Diella F, Gibson TJ, Dinkel H, Van Roey K, Michael S, Davey NE, Weatheritt RJ, Born D, Speck T, Krüger D, Grebnev G, Kuban M, Strumillo M, Uyar B, Budd A, Altenberg B, Seiler M, Chemes LB, Glavina J, Sánchez IE, Diella F, Gibson TJ, Dinkel H, Van Roey K, Michael S, Kumar M, Uyar B, Altenberg B, Milchevskaya V, Schneider M, Kühn H, Behrendt A, Dahl SL, Damerell V, Diebel S, Kalman S, Klein S, Knudsen AC, Mäder C, Merrill S, Staudt A, Thiel V, Welti L, Davey NE, Diella F, Gibson TJ, Kumar M, Kumar M, Gouw M, Michael S, Sámano-Sánchez H, Pancsa R, Glavina J, Diakogianni A, Valverde JA, Bukirova D, ČalyÅ¡eva J, Palopoli N, Davey NE, Chemes LB, Gibson TJ",", , , ","European Commission FP7, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, European Commission FP7, Biotechnology and Biological Sciences Research Council, Medical Research Council, Medical Research Council, , Horizon 2020, Horizon 2020, Hungarian National Research, Cancer Research UK, German Academic Exchange, Hungarian Academy of Sciences, Consejo Nacional de Investigaciones Científicas y Técnicas, Agencia Nacional de Promoción Científica y Tecnológica, European Molecular Biology Laboratory, Agencia Nacional de Promoción Científica y Tecnológica, Argentine Ministry of Science and Technology",642.0,"Argentina, Argentina, Germany, Germany, Germany, Germany, Germany, Ireland, Poland, United States" +22116062,GeneDB,0.993148148,GeneDB,0.993148148,,0,1,http://www.genedb.org,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20210801032837/https://www.genedb.org/,2011-11-23,"Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SA, UK. fl2@sanger.ac.uk","Logan-Klumpler FJ, De Silva N, Boehme U, Rogers MB, Velarde G, McQuillan JA, Carver T, Aslett M, Olsen C, Subramanian S, Phan I, Farris C, Mitra S, Ramasamy G, Wang H, Tivey A, Jackson A, Houston R, Parkhill J, Holden M, Harb OS, Brunk BP, Myler PJ, Roos D, Carrington M, Smith DF, Hertz-Fowler C, Berriman M",,"Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust",139.0, +"22139938, 24270792, 26578585, 29112716",Gene3D,0.990353435,Gene3D,0.990353435,,0,4,http://gene3d.biochem.ucl.ac.uk,503,,,http://web.archive.org/web/20220211134446/http://gene3d.biochem.ucl.ac.uk/,2018-01-01,"Institute of Structural and Molecular Biology, University College London, Darwin Building, Gower St, London WC1E 6BT, UK. lees@biochem.ucl.ac.uk, Division of Biosciences, Institute of Structural and Molecular Biology, University College London, Gower Street, London WC1E 6BT, UK, Department of Infectious Disease Epidemiology, Imperial College London, St Mary's Campus, Norfolk Place, London W2 1PG, UK and Robert Koch Institut, Research Group Bioinformatics Ng4, Nordufer 20, 13353 Berlin, Germany., Institute of Structural and Molecular Biology, Division of Biosciences, University College London, Gower Street, London, WC1E 6BT, UK., Institute of Structural and Molecular Biology, Division of Biosciences, University College London, Gower Street, London WC1E 6BT, UK.","Lees J, Yeats C, Perkins J, Sillitoe I, Rentzsch R, Dessailly BH, Orengo C, Lees JG, Lee D, Studer RA, Dawson NL, Sillitoe I, Das S, Yeats C, Dessailly BH, Rentzsch R, Orengo CA, Lam SD, Dawson NL, Das S, Sillitoe I, Ashford P, Lee D, Lehtinen S, Orengo CA, Lees JG, Lewis TE, Sillitoe I, Dawson N, Lam SD, Clarke T, Lee D, Orengo C, Lees J",", , , ","Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, NIAID NIH HHS, Wellcome Trust, Swiss National Science Foundation, Medical Research Council, Biotechnology and Biological Sciences Research Council, Swiss National Science Foundation, NIAID NIH HHS, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Cancer Research UK, Cancer Research UK, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",156.0,Germany +22140171,EzTaxon-e,0.969534889,EzTaxon-e,0.969534889,,0,1,http://eztaxon-e.ezbiocloud.net,403,,,http://web.archive.org/web/20220615145907/https://eztaxon-e.ezbiocloud.net/,2011-11-25,"School of Biological Sciences, Seoul National University, Seoul, Republic of Korea.","Kim OS, Cho YJ, Lee K, Yoon SH, Kim M, Na H, Park SC, Jeon YS, Lee JH, Yi H, Won S, Chun J",,Interdisciplinary Research Program of Seoul National University,3441.0, +22267904,FED,0.953724042,FED,0.953724042,Fusion Events Database,0.76366665,1,http://www.bioacademy.gr/bioinformatics/projects/ProteinFusion/index.htm,404,,,no_wayback,2011-12-18,"Biomedical Research Foundation, Academy of Athens, Athens, Greece.","Tsagrasoulis D, Danos V, Kissa M, Trimpalis P, Koumandou VL, Karagouni AD, Tsakalidis A, Kossida S",,,2.0,Greece +22293322,G6PD,0.940137466,G6PD,0.940137466,,0,1,http://202.120.189.88/mutdb,404,,,http://web.archive.org/web/20140722072654/http://202.120.189.88/mutdb/,2012-01-30,"Laboratory of Clinical Molecular Diagnostics, Institute of Biochemistry and Clinical Biochemistry, Catholic University of Rome, Italy. angelo.minucci@virgilio.it","Minucci A, Moradkhani K, Hwang MJ, Zuppi C, Giardina B, Capoluongo E",,,131.0,Italy +22359444,EctomycorrhizalDB,0.745949835,EctomycorrhizalDB,0.745949835,,0,1,http://www.kubic.nic.in/ectomychorhiza,404,,,no_wayback,2012-01-20,"DBT-BIF Facility, Department of Biotechnology, Kumaun University, Nainital, Uttarakhand, India.","Pande V, Middha SK, Sharma NK, Lohani Y, Pandey M",,,0.0,India +22383735,GENI-DB,0.995142937,GENI-DB,0.995142937,,0,1,http://born.nii.ac.jp,"HTTPConnectionPool(host='born.nii.ac.jp', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20150310154842/http://born.nii.ac.jp:80/,2012-03-01,"National Institute of Informatics, ROIS, Tokyo, Japan. collier@nii.ac.jp","Collier N, Doan S",,,5.0,Japan +22417913,Genome Composition Database,0.859661317,GCD,0.811255554,Genome Composition Database,0.859661317,1,http://esper.lab.nig.ac.jp/genome-composition-database,"HTTPConnectionPool(host='esper.lab.nig.ac.jp', port=80): Max retries exceeded with url: /genome-composition-database (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-03-14,"Division of Population Genetics, National Institute of Genetics, Mishima, Japan.","Kryukov K, Sumiyama K, Ikeo K, Gojobori T, Saitou N",,,8.0,Japan +22489867,EnzyBase,0.992943108,EnzyBase,0.992943108,,0,1,http://biotechlab.fudan.edu.cn/database/EnzyBase/home.php,500,,,http://web.archive.org/web/20210513134126/http://biotechlab.fudan.edu.cn/database/EnzyBase/home.php,2012-04-11,"State Key Laboratory of Genetic Engineering, School of Life Sciences, Fudan University, Shanghai, China.","Wu H, Lu H, Huang J, Li G, Huang Q",,,14.0,China +22649282,Florabank1,0.991928935,Florabank1,0.991928935,,0,1,"http://data.gbif.org/datasets/resource/10969/, http://projects.biodiversity.be/ifblAll","307, 404",,"(55.6759,12.5655), ","http://web.archive.org/web/20160322174524/http://data.gbif.org/datasets/resource/10969, no_wayback",2012-05-16,"Research Institute for Nature and Forest, Kliniekstraat 25, 1070, Brussels, Belgium.","Landuyt WV, Vanhecke L, Brosens D",,,3.0,Belgium +22661580,ExPASy,0.99731946,ExPASy,0.99731946,,0,1,http://www.expasy.org,301,,"(46.5160,6.6328)",http://web.archive.org/web/20221110031521/https://www.expasy.org/,2012-05-31,"Vital-IT Group, SIB Swiss Institute of Bioinformatics, Lausanne, Switzerland.","Artimo P, Jonnalagedda M, Arnold K, Baratin D, Csardi G, de Castro E, Duvaud S, Flegel V, Fortier A, Gasteiger E, Grosdidier A, Hernandez C, Ioannidis V, Kuznetsov D, Liechti R, Moretti S, Mostaguir K, Redaschi N, Rossier G, Xenarios I, Stockinger H",,,783.0,Switzerland +22715304,FBIS,0.808700919,FBIS,0.808700919,Barcode,0.65479672,1,http://mail.nbfgr.res.in/fbis,301,,"(26.8393,80.9231)",http://web.archive.org/web/20220527070359/https://mail.nbfgr.res.in/fbis/,2012-05-31,"National Bureau of Fish Genetic Resources, Canal Ring Road, P.O - Dilkusha, Lucknow-226002, India.","Nagpure NS, Rashid I, Pathak AK, Singh M, Singh SP, Sarkar UK",,,4.0,India +22748121,Genes2FANs,0.972529342,Genes2FANs,0.972529342,,0,1,http://actin.pharm.mssm.edu/genes2FANs,"HTTPConnectionPool(host='actin.pharm.mssm.edu', port=80): Max retries exceeded with url: /genes2FANs (Caused by ConnectTimeoutError(, 'Connection to actin.pharm.mssm.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20181021235907/http://actin.pharm.mssm.edu:80/genes2FANs/,2012-07-02,"Department of Pharmacology and Systems Therapeutics, Systems Biology Center of New York, Mount Sinai School of Medicine, New York, NY 10029, USA.","Dannenfelser R, Clark NR, Ma'ayan A",,"NIGMS NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NIDDK NIH HHS, NIDDK NIH HHS, NCRR NIH HHS, NIGMS NIH HHS, NIDDK NIH HHS, NIGMS NIH HHS, NLM NIH HHS",16.0,United States +22766416,FunGene-DB,0.99046123,FunGene-DB,0.99046123,,0,1,http://www.fungene-db.org,302,,"(50.6942,3.1746)",no_wayback,2012-07-02,"INRA, UMR 1163 Biotechnologie des Champignons Filamenteux ESIL, 163 avenue de Luminy, CP 925, 13288 Marseille Cedex 09, France. david.navarro@univ-amu.fr","Navarro D, Favel A, Chabrol O, Pontarotti P, Haon M, Lesage-Meessen L",,the French National Research Agency,3.0,France +22912585,GenDR,0.975972056,GenDR,0.975972056,,0,1,http://genomics.senescence.info/diet,301,,"(42.5467,-83.2113)",http://web.archive.org/web/20221026130348/https://www.genomics.senescence.info/diet/,2012-08-09,"Integrative Genomics of Ageing Group, Institute of Integrative Biology, University of Liverpool, Liverpool, United Kingdom.","Wuttke D, Connor R, Vora C, Craig T, Li Y, Wood S, Vasieva O, Shmookler Reis R, Tang F, de Magalhães JP",,"Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council",27.0,United Kingdom +"23109553, 26481353",ECMDB,0.995682538,ECMDB,0.995682538,coli Metabolome Database,0.95562607,2,http://www.ecmdb.ca,301,,"(40.7402,-73.9996)",http://web.archive.org/web/20221105083701/https://ecmdb.ca/,2015-10-19,"Department of Computing Science, University of Alberta, Edmonton, Alberta T6G 2E8, Canada., Department of Computing Science, University of Alberta, Edmonton, AB, T6G 2E9, Canada.","Guo AC, Jewison T, Wilson M, Liu Y, Knox C, Djoumbou Y, Lo P, Mandal R, Krishnamurthy R, Wishart DS, Sajed T, Marcu A, Ramirez M, Pon A, Guo AC, Knox C, Wilson M, Grant JR, Djoumbou Y, Wishart DS",", ","Canadian Institutes of Health Research, Canadian Institutes of Health Research",98.0,"Canada, Canada" +23143106,EcoCyc,0.992975175,EcoCyc,0.992975175,,0,1,http://EcoCyc.org,302,,"(37.4538,-122.1822)",http://web.archive.org/web/20221103085023/https://ecocyc.org/,2012-11-09,"SRI International, 333 Ravenswood Avenue, Menlo Park, CA 94025, USA. keseler@ai.sri.com","Keseler IM, Mackie A, Peralta-Gil M, Santos-Zavaleta A, Gama-Castro S, Bonavides-Martínez C, Fulcher C, Huerta AM, Kothari A, Krummenacker M, Latendresse M, Muñiz-Rascado L, Ong Q, Paley S, Schröder I, Shearer AG, Subhraveti P, Travers M, Weerasinghe D, Weiss V, Collado-Vides J, Gunsalus RP, Paulsen I, Karp PD",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",337.0,United States +23161677,G4LDB,0.997929025,G4LDB,0.997929025,G-quadruplex ligands database,0.988358639,1,http://www.g4ldb.org,302,,"(25.7867,-80.1800)",http://web.archive.org/web/20220803110844/https://www.g4ldb.org/,2012-11-17,"Beijing National Laboratory for Molecular Sciences (BNLMS), Center for Molecular Sciences, State Key Laboratory for Structural Chemistry of Unstable and Stable Species, Institute of Chemistry, Chinese Academy of Sciences, Beijing 100190, PR China.","Li Q, Xiang JF, Yang QF, Sun HX, Guan AJ, Tang YL",,,50.0,China +23161689,GeneTack,0.659076989,GeneTack,0.659076989,,0,1,http://topaz.gatech.edu/GeneTack/db.html,200,,"(33.7490,-84.3880)",http://web.archive.org/web/20220727052004/http://topaz.gatech.edu/GeneTack/db.html,2012-11-17,"School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, GA 30332, USA.","Antonov I, Baranov P, Borodovsky M",,"Wellcome Trust, NHGRI NIH HHS",9.0,"Georgia, United States" +23161695,eProS,0.996275544,eProS,0.996275544,profile suite,0.617196172,1,http://bioservices.hs-mittweida.de/Epros,302,,"(50.9862,12.9754)",http://web.archive.org/web/20140722173916/http://bioservices.hs-mittweida.de/Epros/,2012-11-17,"Department of Mathematics, University of Applied Sciences Mittweida, Mittweida, Saxony, Technikumplatz 17, D-09648, Germany. florian.heinke@hs-mittweida.de","Heinke F, Schildbach S, Stockmann D, Labudde D",,,2.0,Germany +"23175615, 25388105, 27903906, 29761457",EuPathDB,0.998307586,EuPathDB,0.998307586,Eukaryotic Pathogen Genomics Database Resource,0.985402346,4,http://eupathdb.org,301,,"(33.9609,-83.3779)",http://web.archive.org/web/20221020084858/http://www.eupathdb.org/,2018-01-01,"Center for Tropical & Emerging Global Diseases, University of Georgia, Athens, GA 30602, USA., Department of Biology, University of Pennsylvania, 415 S. University Ave., Philadelphia, PA, 19104-6018, USA, oharb@sas.upenn.edu., Center for Tropical & Emerging Global Diseases, University of Georgia, Athens, GA 30602, USA., Center for Tropical and Emerging Global Diseases, University of Georgia, Athens, GA, USA. swfeltz@uga.edu.","Aurrecoechea C, Barreto A, Brestelli J, Brunk BP, Cade S, Doherty R, Fischer S, Gajria B, Gao X, Gingle A, Grant G, Harb OS, Heiges M, Hu S, Iodice J, Kissinger JC, Kraemer ET, Li W, Pinney DF, Pitts B, Roos DS, Srinivasamoorthy G, Stoeckert CJ Jr, Wang H, Warrenfeltz S, Harb OS, Roos DS, Aurrecoechea C, Barreto A, Basenko EY, Brestelli J, Brunk BP, Cade S, Crouch K, Doherty R, Falke D, Fischer S, Gajria B, Harb OS, Heiges M, Hertz-Fowler C, Hu S, Iodice J, Kissinger JC, Lawrence C, Li W, Pinney DF, Pulman JA, Roos DS, Shanmugasundram A, Silva-Franco F, Steinbiss S, Stoeckert CJ Jr, Spruill D, Wang H, Warrenfeltz S, Zheng J, Warrenfeltz S, Basenko EY, Crouch K, Harb OS, Kissinger JC, Roos DS, Shanmugasundram A, Silva-Franco F",", , , ","PHS HHS, Wellcome Trust, NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, Wellcome Trust, Wellcome Trust, NIAID NIH HHS, Wellcome Trust, Wellcome Trust",184.0,"Georgia, Georgia, Georgia, United States, United States, United States, United States" +23193256,ESTHER,0.996905684,ESTHER,0.996905684,,0,1,http://bioweb.ensam.inra.fr/esther,"HTTPConnectionPool(host='bioweb.ensam.inra.fr', port=80): Max retries exceeded with url: /esther (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-11-27,"Dynamique Musculaire et Métabolisme, INRA-UM1, Place Viala, 34060 Montpellier, France.","Lenfant N, Hotelier T, Velluet E, Bourne Y, Marchot P, Chatonnet A",,,104.0,France +23193262,Genomicus,0.997919381,Genomicus,0.997919381,,0,1,http://www.dyogen.ens.fr/genomicus,302,,"(48.8534,2.3488)",no_wayback,2012-11-27,"Ecole Normale Supérieure, Institut de Biologie de l'ENS, IBENS, Paris, France. alouis@biologie.ens.fr","Louis A, Muffato M, Roest Crollius H",,,88.0,France +23193271,GenomeRNAi,0.972192883,GenomeRNAi,0.972192883,,0,1,http://www.genomernai.org,200,,"(50.9787,11.0328)",http://web.archive.org/web/20221023183131/http://www.genomernai.org/,2012-11-27,"Division Signaling and Functional Genomics, German Cancer Research Center (DKFZ), D-69120 Heidelberg, Germany.","Schmidt EE, Pelz O, Buhlmann S, Kerr G, Horn T, Boutros M",,,83.0,Germany +23197660,EcoGene,0.984577298,EcoGene,0.984577298,,0,1,http://ecogene.org,"HTTPConnectionPool(host='ecogene.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to ecogene.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20180523221348/http://www.ecogene.org:80/,2012-11-28,"Department of Biochemistry and Molecular Biology, The Miller School of Medicine, University of Miami, Miami, FL 33143, USA.","Zhou J, Rudd KE",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",129.0,United States +23203866,FlyAtlas,0.997430265,FlyAtlas,0.997430265,,0,1,"http://flyatlas.org, http://flyatlas.gla.ac.uk","412, 301",,", (55.8651,-4.2576)","http://web.archive.org/web/20220620042800/http://www.flyatlas.org/, http://web.archive.org/web/20221016225221/https://flyatlas.gla.ac.uk/",2012-11-29,"School of Life Sciences, College of Medical, Veterinary and Life Sciences, University of Glasgow, Glasgow G12 8QQ, UK.","Robinson SW, Herzyk P, Dow JA, Leader DP",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",99.0, +23203870,EENdb,0.995524585,EENdb,0.995524585,engineered endonuclease database,0.9866168,1,http://eendb.zfgenetics.org,302,,"(34.0522,-118.2437)",http://web.archive.org/web/20220728191046/https://eendb.zfgenetics.org/,2012-11-29,"Key Laboratory of Cell Proliferation and Differentiation of the Ministry of Education, State Key Laboratory of Protein and Plant Gene Research, Center for Bioinformatics, College of Life Sciences, Peking University, Beijing 100871, China.","Xiao A, Wu Y, Yang Z, Hu Y, Wang W, Zhang Y, Kong L, Gao G, Zhu Z, Lin S, Zhang B",,,28.0,China +23203885,factorbook,0.995431662,factorbook,0.995431662,of,0.503504157,1,http://factorbook.org,200,,"(37.7621,-122.3971)",no_wayback,2012-11-29,"Program in Bioinformatics and Integrative Biology, Department of Biochemistry and Molecular Pharmacology, University of Massachusetts Medical School, Worcester, MA 01605, USA.","Wang J, Zhuang J, Iyer S, Lin XY, Greven MC, Kim BH, Moore J, Pierce BG, Dong X, Virgil D, Birney E, Hung JH, Weng Z",,"NHGRI NIH HHS, NHGRI NIH HHS",176.0,United States +23245398,Genome-Wide Docking Database,0.819814461,GWIDD,0.74214983,Genome-Wide Docking Database,0.819814461,1,http://gwidd.bioinformatics.ku.edu,301,,"(38.9717,-95.2352)",http://web.archive.org/web/20220615180936/http://gwidd.bioinformatics.ku.edu/,2012-07-11,None,"Kundrotas PJ, Zhu Z, Vakser IA",,NIGMS NIH HHS,9.0, +23299413,ERISdb,0.825776637,ERISdb,0.825776637,,0,1,http://lemur.amu.edu.pl/share/ERISdb,301,,"(52.4069,16.9299)",http://web.archive.org/web/20220617113317/http://lemur.amu.edu.pl/share/ERISdb/,2013-01-07,"Laboratory of Bioinformatics, Faculty of Biology, Adam Mickiewicz University, Poznan, Poland. izabel@amu.edu.pl","Szcześniak MW, Kabza M, Pokrzywa R, Gudyś A, Makałowska I",,,21.0,Poland +23340253,EDR,0.954789698,EDR,0.954789698,Endometrium Database Resource,0.906237185,1,http://edr.research.bcm.edu,"HTTPConnectionPool(host='edr.research.bcm.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20130421071352/http://edr.research.bcm.edu,2013-01-22,"Dan L. Duncan Cancer Center, Baylor College of Medicine, Houston, TX 77030, USA.","Darlington Y, Jeong JW, Lee KY, Franco HL, Chen ES, McOwiti A, Mistretta TA, Steffen D, Becnel L, DeMayo FJ",,"NCI NIH HHS, NICHD NIH HHS",0.0,United States +23411718,EimeriaTDB,0.991310894,EimeriaTDB,0.991310894,Eimeria transcript DB,0.578976917,1,http://www.coccidia.icb.usp.br/eimeriatdb,301,,"(-23.5475,-46.6361)",http://web.archive.org/web/20221021114634/http://coccidia.icb.usp.br/eimeriatdb/,2013-02-14,"Department of Parasitology, Institute of Biomedical Sciences, University of São Paulo, Avenida Professor Lineu Prestes 1374, São Paulo SP 05508-000, Brazil.","Rangel LT, Novaes J, Durham AM, Madeira AM, Gruber A",,,3.0,Brazil +23459781,FRIDa,0.992402077,FRIDa,0.992402077,FoodCast Research Image Database,0.776939595,1,http://foodcast.sissa.it/neuroscience,301,,"(46.0693,13.2371)",http://web.archive.org/web/20220523151205/https://foodcast.sissa.it/neuroscience/,2013-03-01,"Cognitive Neuroscience Sector, SISSA - Trieste Trieste, Italy.","Foroni F, Pergola G, Argiris G, Rumiati RI",,,43.0,Italy +23468181,EsPal,0.998021126,EsPal,0.998021126,,0,1,http://www.bcbl.eu/databases/espal,302,,"(43.3128,-1.9750)",http://web.archive.org/web/20220405200804/https://www.bcbl.eu/databases/espal/,2013-12-01,"Basque Center on Cognition, Brain, and Language, Donostia, Spain, a.duchon@bcbl.eu.","Duchon A, Perea M, Sebastián-Gallés N, Martí A, Carreiras M",,,105.0,Spain +23601403,FCDB,0.992240489,FCDB,0.992240489,Czech Food Composition Database,0.975081468,1,http://www.czfcdb.cz,301,,"(50.0880,14.4208)",no_wayback,2013-02-11,"Institute of Agricultural Economics and Information, Agricultural and Food Library, Manesova 1453/75, 120 56 Prague 2, Czech Republic. machackova.marie@uzei.cz","Machackova M, Holasova M, Maskova E, ",,,1.0, +23650583,GeneSetDB,0.996939301,GeneSetDB,0.996939301,,0,1,http://genesetdb.auckland.ac.nz/haeremai.html,302,,"(-36.8485,174.7635)",http://web.archive.org/web/20221020225633/https://www.genesetdb.auckland.ac.nz/haeremai.html,2012-04-17,"Department of Molecular Medicine & Pathology, School of Medical Sciences, Faculty of Medical and Health Sciences, The University of Auckland, Private Bag 92019, Auckland, New Zealand.","Araki H, Knapp C, Tsai P, Print C",,,34.0,New Zealand +23696792,ExtremeDB,0.997353792,ExtremeDB,0.997353792,,0,1,http://extrem.igib.res.in,"HTTPConnectionPool(host='extrem.igib.res.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160513002941/http://extrem.igib.res.in:80/,2013-05-16,"Environmental Biotechnology Division, Institute of Genomics and Integrative Biology, (IGIB-CSIR), Delhi, India.","Majhi MC, Behera AK, Kulshreshtha NM, Mahmooduzafar, Kumar R, Kumar A",,,3.0,India +23709164,ESCOLEX,0.997294307,ESCOLEX,0.997294307,,0,1,http://p-pal.di.uminho.pt/about/databases,200,,"(41.1496,-8.6110)",http://web.archive.org/web/20220120083316/http://p-pal.di.uminho.pt/about/databases,2014-03-01,"School of Psychology, University of Minho, Minho, Portugal, asoares@psi.uminho.pt.","Soares AP, Medeiros JC, Simões A, Machado J, Costa A, Iriarte Á, de Almeida JJ, Pinheiro AP, Comesaña M",,,6.0,Portugal +23757396,FSRD,0.955492318,FSRD,0.955492318,Fungal Stress Response Database,0.942606161,1,http://internal.med.unideb.hu/fsrd,"HTTPConnectionPool(host='internal.med.unideb.hu', port=80): Max retries exceeded with url: /fsrd (Caused by ConnectTimeoutError(, 'Connection to internal.med.unideb.hu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140722213822/http://internal.med.unideb.hu/fsrd/,2013-06-11,"Department of Medicine, Medical and Health Science Center, University of Debrecen, H-4032 Debrecen Nagyerdei krt. 98, Hungary.","Karányi Z, Holb I, Hornok L, Pócsi I, Miskei M",,,18.0,Hungary +23794736,ESCAPE,0.888467371,ESCAPE,0.888467371,Embryonic Stem Cell Atlas from,0.817273289,1,http://www.maayanlab.net/ESCAPE,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20220808172705/https://www.maayanlab.net/ESCAPE/,2013-06-21,"Department of Pharmacology and Systems Therapeutics, Icahn School of Medicine at Mount Sinai, One Gustave L. Levy Place, Box 1215, New York, NY 10029, USA.","Xu H, Baroukh C, Dannenfelser R, Chen EY, Tan CM, Kou Y, Kim YE, Lemischka IR, Ma'ayan A",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIDDK NIH HHS",43.0,United States +23798489,eFG,0.989360213,eFG,0.989360213,Electronic resource for Fusarium graminearum,0.876718317,1,http://csb.shu.edu.cn/efg,"HTTPConnectionPool(host='csb.shu.edu.cn', port=80): Max retries exceeded with url: /efg (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-06-22,"Department of Computer Science, School of Electronics and Information Engineering, Tongji University, Shanghai 201804, China.","Liu X, Zhang X, Tang WH, Chen L, Zhao XM",,,1.0,China +23872200,GAG,0.715960622,GAG,0.715960622,,0,1,http://gag.genouest.org,200,,"(48.1120,-1.6743)",http://web.archive.org/web/20220302051108/http://gag.genouest.org/,2013-07-16,"INRA, UMR1348 PEGASE, F-35000 Rennes, France. obadia@u707.jussieu.fr","Obadia T, Sallou O, Ouedraogo M, Guernec G, Lecerf F",,,0.0,France +23951158,FmMDb,0.998117232,FmMDb,0.998117232,Foxtail millet Marker Database,0.978147492,1,http://www.nipgr.res.in/foxtail.html,"HTTPConnectionPool(host='www.nipgr.res.in', port=80): Max retries exceeded with url: /foxtail.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181018060203/http://nipgr.res.in:80/foxtail.html,2013-08-12,"National Institute of Plant Genome Research, New Delhi, India.","B VS, Muthamilarasan M, Misra G, Prasad M",,,13.0,India +23977990,EvoSNP-DB,0.993930091,EvoSNP-DB,0.993930091,,0,1,http://biomi.cdc.go.kr/EvoSNP,"HTTPConnectionPool(host='biomi.cdc.go.kr', port=80): Max retries exceeded with url: /EvoSNP (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20140722231152/http://biomi.cdc.go.kr/EvoSNP/,2013-08-01,"Division of Bio-Medical Informatics, Center for Genome Science, National Institute of Health, Cheongwon 363-951, Korea.","Kim YU, Kim YJ, Lee JY, Park K",,,1.0, +"24009897, 25388151",EVpedia,0.995006621,EVpedia,0.995006621,,0,2,http://evpedia.info,"HTTPConnectionPool(host='evpedia.info', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20210120092525/http://evpedia.info/,2014-11-10,"Department of Life Science, Division of Molecular and Life Sciences, Pohang University of Science and Technology, Pohang, Republic of Korea., Department of Life Sciences, Pohang University of Science and Technology, Pohang, Republic of Korea, Division of Integrative Biosciences and Biotechnology, Pohang University of Science and Technology, Pohang, Republic of Korea, Cardiovascular Medicine, Brigham and Women's Hospital, Harvard Medical School, Boston, MA, USA, Department of Clinical Immunology, Polish-American Institute of Paediatrics, Jagiellonian University Medical College, Cracow, Poland, Department of Nephrology and Hypertension, University Medical Center Utrecht, Utrecht, The Netherlands, Section of Oncology, Department of Clinical Sciences, Lund University, Lund, Sweden, The Feinstein Institute for Medical Research, Manhasset, NY, USA, Department of Microbiology, Biochemistry, and Immunology, Morehouse School of Medicine, Atlanta, GA, USA, Institute of Biomedicine and Molecular Immunology (IBIM), National Research Council (CNR), Palermo, Italy, Innovation in Vesicles and Cells for Application in Therapy, Germans Trias i Pujol Research Institute, Germans Trias i Pujol University Hospital, Badalona, Spain, INSERM, UMR837 JEAN-PIERRE Aubert Research Centre, Lille, France, Department of Genetics, Cell- and Immunobiology, Semmelweis University, Budapest, Hungary, Department of Biochemistry and Molecular Biology, BIO21 Molecular Science and Biotechnology Institute, The University of Melbourne, Melbourne, VIC, Australia, Institute of Cancer & Genetics, School of Medicine, Velindre Cancer Centre, Cardiff University, Cardiff, UK, Program in Cellular and Molecular Medicine at Boston Children's Hospital and Department of Cell Biology, Harvard Medical School, Boston, MA, USA, Section of Pulmonary, Critical Care and Sleep Medicine, Department of Internal Medicine, Yale University School of Medicine, New Haven, CT, USA, Department of Neurology, College of Medicine, University of Tennessee Health Science Center, Memphis, TN, USA, Cancer Biology Program, Samuel Oschin Comprehensive Cancer Institute, Cedars-Sinai M","Kim DK, Kang B, Kim OY, Choi DS, Lee J, Kim SR, Go G, Yoon YJ, Kim JH, Jang SC, Park KS, Choi EJ, Kim KP, Desiderio DM, Kim YK, Lötvall J, Hwang D, Gho YS, Kim DK, Lee J, Kim SR, Choi DS, Yoon YJ, Kim JH, Go G, Nhung D, Hong K, Jang SC, Kim SH, Park KS, Kim OY, Park HT, Seo JH, Aikawa E, Baj-Krzyworzeka M, van Balkom BW, Belting M, Blanc L, Bond V, Bongiovanni A, Borràs FE, Buée L, Buzás EI, Cheng L, Clayton A, Cocucci E, Dela Cruz CS, Desiderio DM, Di Vizio D, Ekström K, Falcon-Perez JM, Gardiner C, Giebel B, Greening DW, Gross JC, Gupta D, Hendrix A, Hill AF, Hill MM, Nolte-'t Hoen E, Hwang DW, Inal J, Jagannadham MV, Jayachandran M, Jee YK, Jørgensen M, Kim KP, Kim YK, Kislinger T, Lässer C, Lee DS, Lee H, van Leeuwen J, Lener T, Liu ML, Lötvall J, Marcilla A, Mathivanan S, Möller A, Morhayim J, Mullier F, Nazarenko I, Nieuwland R, Nunes DN, Pang K, Park J, Patel T, Pocsfalvi G, Del Portillo H, Putz U, Ramirez MI, Rodrigues ML, Roh TY, Royo F, Sahoo S, Schiffelers R, Sharma S, Siljander P, Simpson RJ, Soekmadji C, Stahl P, Stensballe A, Stępień E, Tahara H, Trummer A, Valadi H, Vella LJ, Wai SN, Witwer K, Yáñez-Mó M, Youn H, Zeidler R, Gho YS",", ",", NHLBI NIH HHS, Medical Research Council, NIA NIH HHS, NHLBI NIH HHS, NIDDK NIH HHS, NHLBI NIH HHS, NCATS NIH HHS, NHLBI NIH HHS",354.0,"Australia, Spain, France, Hungary, Italy, Netherlands, Poland, Sweden, United States, United States, United States, United States, United States, United States" +24101916,FunGene,0.989818652,FunGene,0.989818652,Gene Pipeline,0.61831975,1,http://fungene.cme.msu.edu,200,,"(42.7370,-84.4839)",http://web.archive.org/web/20220617065107/http://fungene.cme.msu.edu/,2013-10-01,"Center for Microbial Ecology, Michigan State University East Lansing, MI, USA ; Department of Computer Science and Engineering, Michigan State University East Lansing, MI, USA.","Fish JA, Chai B, Wang Q, Sun Y, Brown CT, Tiedje JM, Cole JR",,"NIDDK NIH HHS, NIEHS NIH HHS",176.0,"United States, United States" +24146773,EMBRYS,0.995997906,EMBRYS,0.995997906,,0,1,http://embrys.jp/embrys/html/MainMenu.html,"HTTPConnectionPool(host='embrys.jp', port=80): Max retries exceeded with url: /embrys/html/MainMenu.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220930201954/https://www.embrys.jp/embrys/html/MainMenu.html,2013-10-16,"Department of Systems Biomedicine, National Research Institute for Child Health and Development, Tokyo, Japan.","Shimizu H, Kubo A, Uchibe K, Hashimoto M, Yokoyama S, Takada S, Mitsuoka K, Asahara H",,,4.0,Japan +24150938,GEISHA,0.996660411,GEISHA,0.996660411,Gallus Expression In Situ Hybridization Analysis,0.966287035,1,http://geisha.arizona.edu,200,,"(32.2217,-110.9265)",http://web.archive.org/web/20221022065239/http://www.geisha.arizona.edu/,2013-10-22,"Molecular Cardiovascular Research Program, Department of Cellular and Molecular Medicine, University of Arizona, Tucson, AZ 85724, USA.","Antin PB, Yatskievych TA, Davey S, Darnell DK",,"NICHD NIH HHS, NICHD NIH HHS",17.0,United States +24174536,GeneProf,0.996735215,GeneProf,0.996735215,,0,1,http://www.geneprof.org,302,,"(55.9521,-3.1965)",http://web.archive.org/web/20220123054246/http://www.geneprof.org/,2013-10-29,"Institute for Stem Cell Research, Centre for Regenerative Medicine, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh EH16 4UU, UK.","Halbritter F, Kousa AI, Tomlinson SR",,"Medical Research Council, Medical Research Council",12.0,France +24213601,EPITRANS,0.996830702,EPITRANS,0.996830702,,0,1,http://epitrans.org,"HTTPConnectionPool(host='epitrans.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-11-08,"Laboratory of Developmental Biology and Genomics, College of Veterinary Medicine, Research Institute for Veterinary Science, Brain Korea 21 Program for Veterinary Science, Seoul, Korea.","Cho SY, Chai JC, Park SJ, Seo H, Sohn CB, Lee YS",,,1.0, +24214991,EKPD,0.997297764,EKPD,0.997297764,,0,1,http://ekpd.biocuckoo.org,200,,"(40.2338,-111.6585)",http://web.archive.org/web/20221007193645/https://ekpd.biocuckoo.org/,2013-11-08,"Department of Biomedical Engineering, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China.","Wang Y, Liu Z, Cheng H, Gao T, Pan Z, Yang Q, Guo A, Xue Y",,,27.0,China +24234003,footprintDB,0.995756149,footprintDB,0.995756149,,0,1,http://floresta.eead.csic.es/footprintdb,301,,"(41.7185,-0.8405)",http://web.archive.org/web/20221017132112/https://floresta.eead.csic.es/footprintdb/,2013-11-14,"Laboratory of Computational Biology, Department of Genetics and Plant Production, Estación Experimental de Aula Dei/CSIC, Av. Montañana 1005, Zaragoza (http://www.eead.csic.es/compbio) and Fundación ARAID, Paseo María Agustín 36, Zaragoza, Spain.","Sebastian A, Contreras-Moreira B",,,34.0,Spain +24243844,FireDB,0.992642879,FireDB,0.992642879,,0,1,http://firedb.bioinfo.cnio.es,301,,"(40.4655,-3.7376)",http://web.archive.org/web/20220708084238/https://firedb.bioinfo.cnio.es/,2013-11-15,"Structural Biology and Biocomputing Programme, Spanish National Cancer Research Centre, Madrid, 28029, Spain and Spanish National Bioinformatics Institute (INB-ISCIII).","Maietta P, Lopez G, Carro A, Pingilley BJ, Leon LG, Valencia A, Tress ML",,NHGRI NIH HHS,7.0,Spain +24302289,EDdb,0.994813681,EDdb,0.994813681,Eating Disorder database,0.941057016,1,http://eddb.cbi.pku.edu.cn,"HTTPConnectionPool(host='eddb.cbi.pku.edu.cn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to eddb.cbi.pku.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140726072855/http://eddb.cbi.pku.edu.cn,2013-12-05,"Center for Bioinformatics, State Key Laboratory of Protein and Plant Gene Research, College of Life Sciences, Peking University, Beijing, 100871, China.","Zhao M, Li X, Qu H",,,17.0,China +24333540,EcoliOverExpressionDB,0.917664945,EcoliOverExpressionDB,0.917664945,,0,1,http://birg4.fbb.utm.my:8080/EcoliOverExpressionDB,"HTTPConnectionPool(host='birg4.fbb.utm.my', port=8080): Max retries exceeded with url: /EcoliOverExpressionDB (Caused by ConnectTimeoutError(, 'Connection to birg4.fbb.utm.my timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140726105256/http://birg4.fbb.utm.my:8080/EcoliOverExpressionDB/,2013-12-11,"Soft Computing Research Group, Faculty of Computing, Universiti Teknologi Malaysia, 81310 UTM Skudai, Johor, Malaysia. Electronic address: hnarjeskhatoon2@utm.live.my.","Habibi N, Samian MR, Hashim SZ, Norouzi A",,"Genetic Engineering laboratories in Universiti Teknologi Malaysia (UTM), Universiti Sains Malaysia (USM)",0.0,"Malaysia, Malaysia" +24444128,Exchangeable Gene Trap Clones,0.933022529,EGTC,0.735953112,Exchangeable Gene Trap Clones,0.933022529,1,http://egtc.jp,503,,,no_wayback,2014-01-20,"Institute of Resource Development and Analysis, Kumamoto University, 2-2-1 Honjo, Chuo-ku, Kumamoto, 860-0811, Japan.","Araki M, Nakahara M, Muta M, Itou M, Yanai C, Yamazoe F, Miyake M, Morita A, Araki M, Okamoto Y, Nakagata N, Yoshinobu K, Yamamura K, Araki K",,,10.0,Japan +24564786,Fungal PCWDE Database,0.985806865,FPDB,0.979104906,Fungal PCWDE Database,0.985806865,1,"http://pcwde.riceblast.snu.ac.kr/, http://cfgp.snu.ac.kr","HTTPConnectionPool(host='pcwde.riceblast.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='pcwde.riceblast.snu.ac.kr', port=80): Read timed out. (read timeout=5)"")), HTTPConnectionPool(host='cfgp.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='cfgp.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,", ","no_wayback, no_wayback",2013-10-16,None,"Choi J, Kim KT, Jeon J, Lee YH",,,28.0, +24569102,ECO,0.859230498,ECO,0.859230498,European Cancer Observatory,0.629971464,1,http://eco.iarc.fr,"HTTPConnectionPool(host='eco.iarc.fr', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180426230405/http://eco.iarc.fr:80/,2014-02-22,"Section of Cancer Information, International Agency for Research on Cancer, Lyon, France. Electronic address: steliarova@iarc.fr.","Steliarova-Foucher E, O'Callaghan M, Ferlay J, Masuyer E, Rosso S, Forman D, Bray F, Comber H",,,27.0,France +24678734,EPSLiM,0.966204286,EPSLiM,0.966204286,predictor for,0.749277949,1,http://epslim.bwh.harvard.edu,"HTTPConnectionPool(host='epslim.bwh.harvard.edu', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to epslim.bwh.harvard.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20151016162519/http://epslim.bwh.harvard.edu:80/,2014-03-28,"Research Program in Men's Health: Aging and Metabolism (R.X., S.B., J.C.C., R.J.), Boston Claude D. Pepper Older Americans Independence Center, Brigham and Women's Hospital, Harvard Medical School, Boston, Massachusetts 02215; The National Library of Medicine (M.N.Z.), National Center for Bioinformation Technology, The National Institutes of Health, Department of Health and Human Services, Bethesda, Maryland 20892; and Department of Bioengineering (Y.X.), Faculty of Engineering, McGill University, Montreal, Quebec H3A 0C3, Canada.","Xue R, Zakharov MN, Xia Y, Bhasin S, Costello JC, Jasuja R",,"NIA NIH HHS, NIA NIH HHS",2.0,Canada +24682734,EpimiR,0.989621162,EpimiR,0.989621162,,0,1,http://bioinfo.hrbmu.edu.cn/EpimiR,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /EpimiR (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190906110025/http://bioinfo.hrbmu.edu.cn:80/EpimiR/,2014-03-28,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, People's Republic of China.","Dai E, Yu X, Zhang Y, Meng F, Wang S, Liu X, Liu D, Wang J, Li X, Jiang W",,,20.0,China +24705206,FixPred,0.988829434,FixPred,0.988829434,,0,1,http://www.fixpred.com,"HTTPConnectionPool(host='www.fixpred.com', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,http://web.archive.org/web/20190906080235/http://www.fixpred.com:80/,2014-04-04,"Institute of Enzymology, Research Centre for Natural Sciences, Hungarian Academy of Sciences, H-1113 Budapest, Hungary.","Nagy A, Patthy L",,,4.0,Hungary +24885079,fPoxDB,0.997609806,fPoxDB,0.997609806,Fungal Peroxidase Database,0.992474094,1,http://peroxidase.riceblast.snu.ac.kr,"HTTPConnectionPool(host='peroxidase.riceblast.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='peroxidase.riceblast.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2014-05-08,None,"Choi J, Détry N, Kim KT, Asiegbu FO, Valkonen JP, Lee YH",,,23.0, +24904731,GenderMedDB,0.995782018,GenderMedDB,0.995782018,,0,1,http://gendermeddb.charite.de,200,,"(52.5244,13.4105)",http://web.archive.org/web/20220123101005/http://gendermeddb.charite.de/,2014-05-23,"Institute of Gender in Medicine, Charité-Universitätsmedizin Berlin, Hessische Str. 3/4, Berlin 10117, Germany ; German Center for Cardiovascular Research (DZHK), Berlin, Germany.","Oertelt-Prigione S, Gohlke BO, Dunkel M, Preissner R, Regitz-Zagrosek V",,,8.0,"Germany, Germany" +24928188,FreeSolv,0.989120781,FreeSolv,0.989120781,,0,1,http://www.escholarship.org/uc/item/6sd403pz,301,,"(33.4484,-112.0740)",http://web.archive.org/web/20220406205414/https://escholarship.org/uc/item/6sd403pz,2014-06-14,"Department of Pharmaceutical Sciences and Department of Chemistry, University of California, 147 Bison Modular, Irvine, CA, 92697, USA, dmobley@mobleylab.org.","Mobley DL, Guthrie JP",,"NIGMS NIH HHS, NIGMS NIH HHS",65.0,United States +24990533,GALT Protein Database,0.571744546,GALT Protein Database,0.571744546,,0,1,http://bioinformatica.isa.cnr.it/GALT/GALT2.0,200,,"(40.8522,14.2681)",http://web.archive.org/web/20210228145318/http://bioinformatica.isa.cnr.it/GALT/GALT2.0,2014-07-23,"Institute of Food Science, CNR, Avellino, 83100, Italy.","d'Acierno A, Facchiano A, Marabotti A",,"University of Salerno - Fondi di Ateneo per la Ricerca di Base (FARB), Italian Ministry of Education, University and Research and CNR",10.0,Italy +25005261,FmTFDb,0.99798429,FmTFDb,0.99798429,foxtail millet transcription factors database,0.956081793,1,http://59.163.192.91/FmTFDb/index.html,"HTTPConnectionPool(host='59.163.192.91', port=80): Max retries exceeded with url: /FmTFDb/index.html (Caused by ConnectTimeoutError(, 'Connection to 59.163.192.91 timed out. (connect timeout=5)'))",,,no_wayback,2014-07-09,"National Institute of Plant Genome Research (NIPGR), Aruna Asaf Ali Marg, JNU Campus, New Delhi, 110 067, India.","Bonthala VS, Muthamilarasan M, Roy R, Prasad M",,,4.0,India +25010047,GBM-BioDP,0.991873372,GBM-BioDP,0.991873372,Glioblastoma Bio Discovery Portal,0.790780693,1,http://gbm-biodp.nci.nih.gov,301,,"(39.4143,-77.4105)",http://web.archive.org/web/20220618031754/https://gbm-biodp.nci.nih.gov/,2014-07-10,"Radiation Oncology Branch, National Cancer Institute, National Institutes of Health, Bethesda, Maryland, United States of America.","Celiku O, Johnson S, Zhao S, Camphausen K, Shankavaram U",,Intramural NIH HHS,34.0,United States +25065645,EctoGEM,0.991552711,EctoGEM,0.991552711,scale metabolic network of Ectocarpus,0.682853514,1,http://ectogem.irisa.fr,"HTTPConnectionPool(host='ectogem.irisa.fr', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180422184734/http://ectogem.irisa.fr/,2014-08-27,"Université de Rennes 1, IRISA UMR 6074, Campus de Beaulieu, 35042, Rennes, France; CNRS, IRISA UMR 6074, Campus de Beaulieu, 35042, Rennes, France; Centre Rennes-Bretagne-Atlantique, Projet Dyliss, INRIA, Campus de Beaulieu, 35042, Rennes Cedex, France.","Prigent S, Collet G, Dittami SM, Delage L, Ethis de Corny F, Dameron O, Eveillard D, Thiele S, Cambefort J, Boyen C, Siegel A, Tonon T",,"French National Research Agency, Inria, University of Rennes 1, BIOTEMPO project",16.0,"France, France, France" +25149689,FusoBase,0.997971714,FusoBase,0.997971714,,0,1,http://fusobacterium.um.edu.my,"HTTPConnectionPool(host='fusobacterium.um.edu.my', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to fusobacterium.um.edu.my timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160807235801/http://fusobacterium.um.edu.my:80/,2014-08-22,"Genome Informatics Research Laboratory, High Impact Research Building, University of Malaya, 50603 Kuala Lumpur, Malaysia, Department of Oral Biology and Biomedical Sciences, Faculty of Dentistry, University of Malaya, 50603 Kuala Lumpur, Malaysia, Department of Software Engineering, Faculty of Computer Science and Information Technology, University of Malaya, 50603 Kuala Lumpur, Malaysia and Centre for Oral Health Research, School of Dental Sciences, Newcastle University, Framlington Place, Newcastle upon Tyne NE2 4BW, UK Genome Informatics Research Laboratory, High Impact Research Building, University of Malaya, 50603 Kuala Lumpur, Malaysia, Department of Oral Biology and Biomedical Sciences, Faculty of Dentistry, University of Malaya, 50603 Kuala Lumpur, Malaysia, Department of Software Engineering, Faculty of Computer Science and Information Technology, University of Malaya, 50603 Kuala Lumpur, Malaysia and Centre for Oral Health Research, School of Dental Sciences, Newcastle University, Framlington Place, Newcastle upon Tyne NE2 4BW, UK.","Ang MY, Heydari H, Jakubovics NS, Mahmud MI, Dutta A, Wee WY, Wong GJ, Mutha NV, Tan SY, Choo SW",,,2.0,"Malaysia, Malaysia, Malaysia, Malaysia, Malaysia, Malaysia" +25152233,ExpTreeDB,0.995681107,ExpTreeDB,0.995681107,,0,1,http://biotech.bmi.ac.cn/ExpTreeDB,"HTTPConnectionPool(host='biotech.bmi.ac.cn', port=80): Max retries exceeded with url: /ExpTreeDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-08-24,"Beijing Institute of Radiation Medicine, Beijing 100850, College of Life Sciences, Jilin University, Changchun 130012 and Henan University of Traditional Chinese Medicine, Zhengzhou 450008, China.","Ni M, Ye F, Zhu J, Li Z, Yang S, Yang B, Han L, Wu Y, Chen Y, Li F, Wang S, Bo X",,,5.0,China +25260589,FOAM,0.983061552,FOAM,0.983061552,Functional Ontology Assignments for Metagenomes,0.933293728,1,http://portal.nersc.gov/project/m1317/FOAM,308,,"(37.8716,-122.2728)",http://web.archive.org/web/20150811155001/http://portal.nersc.gov/project/m1317/FOAM/,2014-09-26,"Earth Sciences Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA Division of Biology, Kansas State University, Manhattan, Kansas 66506, USA.","Prestat E, David MM, Hultman J, Taş N, Lamendella R, Dvornik J, Mackelprang R, Myrold DD, Jumpponen A, Tringe SG, Holman E, Mavromatis K, Jansson JK",,,45.0,"United States, United States" +25324312,EpilepsyGene,0.996541262,EpilepsyGene,0.996541262,,0,1,http://61.152.91.49/EpilepsyGene,"HTTPConnectionPool(host='61.152.91.49', port=80): Max retries exceeded with url: /EpilepsyGene (Caused by ConnectTimeoutError(, 'Connection to 61.152.91.49 timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20170512223240/http://61.152.91.49:80/EpilepsyGene/,2014-10-16,"Institute of Genomic Medicine, Wenzhou Medical University, Wenzhou 325000, China.","Ran X, Li J, Shao Q, Chen H, Lin Z, Sun ZS, Sun ZS, Wu J",,,30.0,China +25324316,EzCatDB,0.997173011,EzCatDB,0.997173011,the enzyme reaction database,0.613930479,1,http://ezcatdb.cbrc.jp/EzCatDB,301,,"(35.6895,139.6917)",http://web.archive.org/web/20220709051812/https://ezcatdb.cbrc.jp/EzCatDB/,2014-10-16,"Computational Biology Research Center (CBRC), National Institute of Advanced Industrial Science and Technology (AIST), Tokyo Waterfront Bio-IT Research Building, 2-4-7 Aomi, Koto-ku, Tokyo 135-0064, Japan n.nagano@aist.go.jp.","Nagano N, Nakayama N, Ikeda K, Fukuie M, Yokota K, Doi T, Kato T, Tomii K",,,8.0,Japan +"25348407, 31733063",Genome3D,0.994092405,Genome3D,0.994092405,,0,2,http://www.genome3d.eu,200,,"(47.6339,-122.3476)",http://web.archive.org/web/20221022041836/http://www.genome3d.eu/,2020-01-01,"Institute of Structural and Molecular Biology, UCL, 636 Darwin Building, Gower Street, London, WC1E 6BT, UK., Institute of Structural and Molecular Biology, UCL, Gower Street, London WC1E 6BT, UK.","Lewis TE, Sillitoe I, Andreeva A, Blundell TL, Buchan DW, Chothia C, Cozzetto D, Dana JM, Filippis I, Gough J, Jones DT, Kelley LA, Kleywegt GJ, Minneci F, Mistry J, Murzin AG, Ochoa-Montaño B, Oates ME, Punta M, Rackham OJ, Stahlhacke J, Sternberg MJ, Velankar S, Orengo C, Sillitoe I, Andreeva A, Blundell TL, Buchan DWA, Finn RD, Gough J, Jones D, Kelley LA, Paysan-Lafosse T, Lam SD, Murzin AG, Pandurangan AP, Salazar GA, Skwark MJ, Sternberg MJE, Velankar S, Orengo C",", ","Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Medical Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Medical Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Botnar Foundation, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",23.0, +25352549,euL1db,0.994982398,euL1db,0.994982398,,0,1,http://euL1db.unice.fr,302,,"(43.7031,7.2661)",http://web.archive.org/web/20221017080901/http://eul1db.unice.fr/,2014-10-28,"INSERM, U1081, Institute for Research on Cancer and Aging of Nice (IRCAN), F-06100 Nice, France CNRS, UMR 7284, Institute for Research on Cancer and Aging of Nice (IRCAN), F-06100 Nice, France Faculty of Medicine, Institute for Research on Cancer and Aging of Nice (IRCAN), University of Nice-Sophia-Antipolis, F-06100 Nice, France.","Mir AA, Philippe C, Cristofari G",,European Research Council,29.0,"France, France, France" +25352573,ECODAB,0.991039038,ECODAB,0.991039038,Escherichia coli O-antigen database,0.911410725,1,http://www.casper.organ.su.se/ECODAB,301,,"(59.3294,18.0687)",http://web.archive.org/web/20140609195946/http://www.casper.organ.su.se:80/ECODAB/,2014-10-28,"Institute of Veterinary Physiology and Biochemistry, Justus-Liebig-University Giessen, Frankfurter Str. 100, Giessen 35392, Germany.","Rojas-Macias MA, StÃ¥hle J, Lütteke T, Widmalm G",,,15.0,Germany +25352729,FCDD,0.995983303,FCDD,0.995983303,Fruit Crops Diseases Database,0.945625222,1,http://www.fruitcropsdd.com,"HTTPConnectionPool(host='www.fruitcropsdd.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20161012004226/http://fruitcropsdd.com/,2014-09-30,"Applied Botany Centre, Department of Botany, University School of Sciences, Gujarat University, Ahmadabad 380009, Gujarat, India.","Chauhan R, Jasrai Y, Pandya H, Chaudhari S, Samota CM",,,0.0,India +25361971,GeneFriends,0.990629911,GeneFriends,0.990629911,,0,1,http://www.GeneFriends.org,403,,,http://web.archive.org/web/20221017004421/https://genefriends.org/,2014-10-31,"Integrative Genomics of Ageing Group, Institute of Integrative Biology, University of Liverpool, Liverpool, UK.","van Dam S, Craig T, de Magalhães JP",,Biotechnology and Biological Sciences Research Council,44.0, +"25378340, 29161421, 33180112",Europe PMC,0.938993772,Europe PMC,0.938993772,,0,3,http://europepmc.org,200,,"(51.5085,-0.1257)",http://web.archive.org/web/20221105203819/https://europepmc.org/,2021-01-01,"None, European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Cambridge, UK., Literature Services, EMBL-EBI, Wellcome Trust Genome Campus, Cambridge, UK.",", Levchenko M, Gou Y, Graef F, Hamelers A, Huang Z, Ide-Smith M, Iyer A, Kilian O, Katuri J, Kim JH, Marinos N, Nambiar R, Parkin M, Pi X, Rogers F, Talo F, Vartak V, Venkatesan A, McEntyre J, Ferguson C, Araújo D, Faulk L, Gou Y, Hamelers A, Huang Z, Ide-Smith M, Levchenko M, Marinos N, Nambiar R, Nassar M, Parkin M, Pi X, Rahman F, Rogers F, Roochun Y, Saha S, Selim M, Shafique Z, Sharma S, Stephenson D, Talo' F, Thouvenin A, Tirunagari S, Vartak V, Venkatesan A, Yang X, McEntyre J",", , ","Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust, Swiss National Science Foundation, MRC, Wellcome Trust",87.0, +25399415,GenoBase,0.995594084,GenoBase,0.995594084,,0,1,http://ecoli.naist.jp/GB,"HTTPConnectionPool(host='ecoli.naist.jp', port=80): Max retries exceeded with url: /GB (Caused by ConnectTimeoutError(, 'Connection to ecoli.naist.jp timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160912233907/http://ecoli.naist.jp:80/GB/,2014-11-15,"Graduate School of Biological Sciences, Nara Institute of Science and Technology, Ikoma, Nara 630-0101, Japan.","Otsuka Y, Muto A, Takeuchi R, Okada C, Ishikawa M, Nakamura K, Yamamoto N, Dose H, Nakahigashi K, Tanishima S, Suharnan S, Nomura W, Nakayashiki T, Aref WG, Bochner BR, Conway T, Gribskov M, Kihara D, Rudd KE, Tohsato Y, Wanner BL, Mori H",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",12.0,Japan +25414353,EHFPI,0.998098135,EHFPI,0.998098135,Essential Host Factors for Pathogenic Infection,0.983319062,1,http://biotech.bmi.ac.cn/ehfpi,"HTTPConnectionPool(host='biotech.bmi.ac.cn', port=80): Max retries exceeded with url: /ehfpi (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20150702224309/http://biotech.bmi.ac.cn:80/ehfpi,2014-11-20,"Department of Biotechnology, Beijing Institute of Radiation Medicine, Beijing 100850, P.R.China.","Liu Y, Xie D, Han L, Bai H, Li F, Wang S, Bo X",,,11.0,China +25432969,Ensembl Plants,0.933526009,Ensembl Plants,0.933526009,,0,1,http://plants.ensembl.org,301,,"(51.5085,-0.1257)",no_wayback,2014-11-27,"Ensembl Genomes, EMBL-European Bioinformatics Institute, Wellcome Trust Genome Campus, Cambridge CB10 1SD, UK.","Bolser DM, Kerhornou A, Walts B, Kersey P",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",31.0, +25432975,GenomicusPlants,0.995462537,GenomicusPlants,0.995462537,,0,1,http://www.genomicus.biologie.ens.fr/genomicus-plants,302,,"(48.8534,2.3488)",no_wayback,2014-11-27,"Ecole Normale Supérieure, Institut de Biologie de l'ENS, IBENS, Paris, F-75005 France CNRS, UMR 8197, Paris, F-75005 France Inserm, U1024, Paris, F-75005 France alouis@biologie.ens.fr.","Louis A, Murat F, Salse J, Crollius HR",,,9.0,"France, France, France" +25451822,EDCs DataBank,0.928272024,EDCs DataBank,0.928272024,,0,1,http://edcs.unicartagena.edu.co,200,,"(4.6097,-74.0817)",http://web.archive.org/web/20220523225113/http://edcs.unicartagena.edu.co/,2014-11-25,"Environmental and Computational Chemistry Group, School of Pharmaceutical Sciences, University of Cartagena, Campus of Zaragocilla, Cartagena, Bolivar 130015, Colombia. Electronic address: dmontesg@unicartagena.edu.co.","Montes-Grajales D, Olivero-Verbel J",,"Administrative Department of Science, Technology and Innovation of Colombia, Colciencias, Administrative Department of Science, Technology and Innovation of Colombia, Colciencias, University of Cartagena, Administrative Department of Science, Technology and Innovation of Colombia, Colciencias, Administrative Department of Science, Technology and Innovation of Colombia, Colciencias",6.0,Colombia +25522231,funRNA,0.992636442,funRNA,0.992636442,,0,1,http://funrna.riceblast.snu.ac.kr,"HTTPConnectionPool(host='funrna.riceblast.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='funrna.riceblast.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2014-12-08,None,"Choi J, Kim KT, Jeon J, Wu J, Song H, Asiegbu FO, Lee YH",,,10.0, +25534749,EssOilDB,0.99728632,EssOilDB,0.99728632,,0,1,http://nipgr.res.in/Essoildb,"HTTPConnectionPool(host='nipgr.res.in', port=80): Max retries exceeded with url: /Essoildb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181010195154/http://nipgr.res.in:80/Essoildb/,2014-12-22,"Computational Biology Laboratory, National Institute of Plant Genome Research (NIPGR), New Delhi 110067 India.","Kumari S, Pundhir S, Priya P, Jeena G, Punetha A, Chawla K, Firdos Jafaree Z, Mondal S, Yadav G",,,16.0,India +25650278,EcoliNet,0.970784962,EcoliNet,0.970784962,,0,1,http://www.inetbio.org/ecolinet,301,,"(37.5598,126.9439)",http://web.archive.org/web/20210421161656/https://www.inetbio.org/ecolinet/,2015-02-02,"Department of Biotechnology, College of Life Science and Biotechnology, Yonsei University, Seoul, Korea.","Kim H, Shim JE, Shin J, Lee I",,,8.0, +25725058,FR,0.466398388,FR,0.466398388,,0,1,http://www.fruitech.org,"HTTPConnectionPool(host='www.fruitech.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170615210046/http://fruitech.org/,2015-02-27,"School of Biotechnology and Food Engineering, Hefei University of Technology, Hefei 230009, China, School of Medical Engineering, Hefei University of Technology, Hefei 230009, China, School of Information Science and Technology, University of Science and Technology of China, Hefei 230009, China, Ministry of Education Key Laboratory for Bio-resource and Eco-environment, College of Life Science and State Key Laboratory of Hydraulics and Mountain River Engineering, Sichuan University, Chengdu 610064, China.","Yue J, Ma X, Ban R, Huang Q, Wang W, Liu J, Liu Y",,,3.0,"China, China, China, China, China" +25953079,GBIS,0.697626531,GBIS,0.697626531,,0,1,http://gbis.ipk-gatersleben.de,302,,"(49.0151,12.1016)",http://web.archive.org/web/20130821002236/http://gbis.ipk-gatersleben.de,2015-05-07,"Leibniz-Institut für Pflanzengenetik und Kulturpflanzenforschung (IPK) Gatersleben, OT Gatersleben, Corrensstraße 3, 06466 Stadt Seeland, Germany oppermann@ipk-gatersleben.de.","Oppermann M, Weise S, Dittmann C, Knüpffer H",,,13.0,Germany +26179317,Evalutil,0.994965792,Evalutil,0.994965792,,0,1,http://ssl2.isped.u-bordeaux2.fr/eva_003,404,,,no_wayback,2015-07-14,"Département santé travail, Institut de veille sanitaire, Saint-Maurice, France.","Orlowski E, Audignon-Durand S, Goldberg M, Imbernon E, Brochard P",,French Ministry of Labor,2.0,France +26215638,FusionCancer,0.993119001,FusionCancer,0.993119001,,0,1,http://donglab.ecnu.edu.cn/databases/FusionCancer/Conclusion,"HTTPConnectionPool(host='donglab.ecnu.edu.cn', port=80): Max retries exceeded with url: /databases/FusionCancer/Conclusion (Caused by ReadTimeoutError(""HTTPConnectionPool(host='donglab.ecnu.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2015-07-28,"Institute of Molecular Ecology and Evolution, SKLEC & IECR, East China Normal University, Shanghai, China. yunjinwang1222@gmail.com.","Wang Y, Wu N, Liu J, Wu Z, Dong D",,,34.0,"China, China" +26272709,GenomewidePDB,0.987685204,GenomewidePDB,0.987685204,Chromosome-centric,0.595189404,1,http://genomewidepdb.proteomix.org,200,,"(37.5598,126.9439)",http://web.archive.org/web/20211028035000/http://genomewidepdb.proteomix.org/,2015-08-19,"Yonsei Proteome Research Center and Biomedical Proteome Research Center , 50 Yonsei-Ro, Seodaemun-gu, Seoul 120-749, Korea.","Jeong SK, Hancock WS, Paik YK",,"National Research Foundation of Korea, Ministry of Health and Welfare, Ministry of Health and Welfare",4.0, +26317619,FMiR,0.989025816,FMiR,0.989025816,Mitogenome Resource,0.922336429,1,http://mail.nbfgr.res.in/fmir,301,,"(26.8393,80.9231)",http://web.archive.org/web/20220527053740/https://mail.nbfgr.res.in/fmir/,2015-08-28,"Division of Molecular Biology and Biotechnology, National Bureau of Fish Genetic Resources, Lucknow-226002, India.","Nagpure NS, Rashid I, Pathak AK, Singh M, Pati R, Singh SP, Sarkar UK",,,6.0,India +26384373,FARE-CAFE,0.994346166,FARE-CAFE,0.994346166,,0,1,http://ppi.bioinfo.asia.edu.tw/FARE-CAFE,"HTTPConnectionPool(host='ppi.bioinfo.asia.edu.tw', port=80): Max retries exceeded with url: /FARE-CAFE (Caused by ConnectTimeoutError(, 'Connection to ppi.bioinfo.asia.edu.tw timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160817123129/http://ppi.bioinfo.asia.edu.tw:80/FARE-CAFE/,2015-09-16,"Department of Bioinformatics and Medical Engineering, Asia University, Taichung 41354, Taiwan.","Korla PK, Cheng J, Huang CH, Tsai JJ, Liu YH, Kurubanjerdjit N, Hsieh WT, Chen HY, Ng KL",,,5.0, +26456067,FLAD,0.979612629,FLAD,0.979612629,Forensic Loci Allele Database,0.973130558,1,http://forensic.ugent.be/FLAD,301,,"(51.0500,3.7167)",http://web.archive.org/web/20220809094757/http://forensic.ugent.be/FLAD/,2015-09-21,"Laboratory of Pharmaceutical Biotechnology, Faculty of Pharmaceutical Sciences, Ghent University, Ghent, Belgium.","Van Neste C, Van Criekinge W, Deforce D, Van Nieuwerburgh F",,Ghent University Multidisciplinary Research Partnership,4.0,Belgium +26476447,FLOR-ID,0.993054897,FLOR-ID,0.993054897,Flowering-Interactive Database,0.88401364,1,http://www.flor-id.org,301,,"(50.6942,3.1746)",no_wayback,2015-10-17,"PhytoSYSTEMS, Laboratory of Plant Physiology, University of Liège, Quartier Vallée 1 Sart Tilman Campus, 4 Chemin de la Vallée, 4000 Liège, Belgium.","Bouché F, Lobet G, Tocquin P, Périlleux C",,,98.0,Belgium +26519912,eHALOPH,0.997677743,eHALOPH,0.997677743,,0,1,http://www.sussex.ac.uk/affiliates/halophytes,301,,"(50.8284,-0.1395)",http://web.archive.org/web/20220324130942/https://www.sussex.ac.uk/affiliates/halophytes/,2015-10-31,"Centre for Functional Ecology, Departamento de Ciências da Vida, Universidade de Coimbra, 3000-456 Coimbra, Portugal.","Santos J, Al-Azzawi M, Aronson J, Flowers TJ",,,25.0,Portugal +26567549,GEneSTATION,0.995596051,GEneSTATION,0.995596051,,0,1,http://genestation.org,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20181101061544/http://www.genestation.org/,2015-11-14,"Department of Biological Sciences, Vanderbilt University, Nashville, TN 37235, USA.","Kim M, Cooper BA, Venkat R, Phillips JB, Eidem HR, Hirbo J, Nutakki S, Williams SM, Muglia LJ, Capra JA, Petren K, Abbot P, Rokas A, McGary KL",,NLM NIH HHS,4.0,United States +26582924,enviPath,0.992086053,enviPath,0.992086053,The Environmental Contaminant Biotransformation Pathway Resource,0.90123873,1,http://envipath.org,301,,"(49.4663,7.1681)",http://web.archive.org/web/20221005212642/https://envipath.org/,2015-11-17,,,,,0.0, +26590402,EffectiveDB,0.997481704,EffectiveDB,0.997481704,,0,1,http://effectivedb.org,301,,"(48.2085,16.3721)",http://web.archive.org/web/20220709085847/https://effectivedb.org/,2015-11-20,"Division of Computational System Biology, Department of Microbiology and Ecosystem Science, University of Vienna, 1090 Vienna, Austria.","Eichinger V, Nussbaumer T, Platzer A, Jehl MA, Arnold R, Rattei T",,"Austrian Science Fund FWF, Austrian Science Fund FWF",60.0,Austria +"26590404, 30298402",FunTree,0.994794667,FunTree,0.994794667,,0,2,http://www.funtree.info,200,,"(55.8651,-4.2576)",http://web.archive.org/web/20220521180744/http://funtree.info/,2019-01-01,"Institute of Structural and Molecular Biology, University College London, Darwin Building, Gower Street, London WC1E 6BT, UK., EMBL-EBI, Wellcome Genome Campus, Hinxton, Cambridge, UK. tyzack@ebi.ac.uk.","Sillitoe I, Furnham N, Tyzack JD, Furnham N, Sillitoe I, Orengo CM, Thornton JM",", ","Biotechnology and Biological Sciences Research Council, Medical Research Council, Medical Research Council, Medical Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",7.0, +26748106,EpimiRBase,0.9903965,EpimiRBase,0.9903965,,0,1,http://www.epimirbase.eu,406,,,http://web.archive.org/web/20220615144629/https://www.epimirbase.eu/,2016-01-08,"Department of Physiology and Medical Physics, Royal College of Surgeons in Ireland.","Mooney C, Becker BA, Raoof R, Henshall DC",,,23.0,Ireland +26780094,GenomeSpace,0.996023118,GenomeSpace,0.996023118,,0,1,http://www.genomespace.org,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20221016214737/https://genomespace.org/,2016-01-18,"Program in Epithelial Biology, Stanford University School of Medicine, Stanford, CA, USA.","Qu K, Garamszegi S, Wu F, Thorvaldsdottir H, Liefeld T, Ocana M, Borges-Rivera D, Pochet N, Robinson JT, Demchak B, Hull T, Ben-Artzi G, Blankenberg D, Barber GP, Lee BT, Kuhn RM, Nekrutenko A, Segal E, Ideker T, Reich M, Regev A, Chang HY, Mesirov JP",,"NCI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS",23.0,United States +26800861,EchinoDB,0.992635667,EchinoDB,0.992635667,,0,1,"http://echinotol.org, http://echinodb.uncc.edu","302, 302",,"(-37.9747,145.0269), (40.0334,-83.1582)","http://web.archive.org/web/20220223193855/http://echinotol.org/, http://web.archive.org/web/20221025135154/https://echinodb.uncc.edu/",2016-01-22,"Department of Bioinformatics and Genomics, University of North Carolina at Charlotte, 9201 University City Blvd, Charlotte, NC, 28223-0001, USA. djanies@uncc.edu.","Janies DA, Witter Z, Linchangco GV, Foltz DW, Miller AK, Kerr AM, Jay J, Reid RW, Wray GA",,"National Science Foundation, National Science Foundation, National Science Foundation, National Science Foundation",10.0,United States +26806463,FiloBase,0.992046833,FiloBase,0.992046833,,0,1,http://filobase.bicpu.edu.in,"HTTPConnectionPool(host='filobase.bicpu.edu.in', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='filobase.bicpu.edu.in', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220618123357/http://filobase.bicpu.edu.in/,2016-01-25,"Centre for Bioinformatics, School of Life Science, Pondicherry University, Pondicherry-605014, India.","Sharma OP, Kumar MS",,,6.0,India +26851225,gametogenesis epigenetic modification database,0.95555062,GED,0.943311706,gametogenesis epigenetic modification database,0.95555062,1,http://gametsepi.nwsuaflmz.com,406,,,http://web.archive.org/web/20161208181824/http://gametsepi.nwsuaflmz.com:80/,2016-02-05,None,"Bai W, Yang W, Wang W, Wang Y, Liu C, Jiang Q, Hua J, Liao M",,,5.0, +26980518,Fish Karyome,0.700258017,Fish Karyome,0.700258017,,0,1,http://mail.nbfgr.res.in/Fish_Karyome,301,,"(26.8393,80.9231)",http://web.archive.org/web/20220527052255/https://mail.nbfgr.res.in/Fish_Karyome/,2016-03-15,Division of Molecular Biology and Biotechnology.,"Nagpure NS, Pathak AK, Pati R, Rashid I, Sharma J, Singh SP, Singh M, Sarkar UK, Kushwaha B, Kumar R, Murali S",,,2.0, +27037912,GAMDB,0.99449718,GAMDB,0.99449718,Gerontology-Autophagic-MicroRNA Database,0.98334261,1,http://gamdb.liu-lab.com/index.php,"HTTPConnectionPool(host='gamdb.liu-lab.com', port=80): Max retries exceeded with url: /index.php (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160510083751/http://gamdb.liu-lab.com:80/index.php,2016-03-31,"State Key Laboratory of Biotherapy and Cancer Center, West China Hospital, Sichuan University, and Collaborative Innovation Center of Biotherapy, Chengdu, 610041, China.","Zhang L, Xie T, Tian M, Li J, Song S, Ouyang L, Liu B, Cai H",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National 973 Basic Research Program of China, National Natural Science Foundation of China",5.0,"China, China" +27102089,GAT,0.967916131,GAT,0.967916131,gene-set activity toolbox,0.814590596,1,http://gat.sit.kmutt.ac.th,301,,"(13.7540,100.5014)",no_wayback,2016-03-15,"1 Data and Knowledge Engineering Laboratory, School of Information Technology, King Mongkut's University of Technology Thonburi, Bangkok, Thailand.","Engchuan W, Meechai A, Tongsima S, Doungpan N, Chan JH",,,3.0,Thailand +27127885,GCGene,0.995825529,GCGene,0.995825529,Gastric Cancer Gene database,0.884341019,1,http://gcgene.bioinfo-minzhao.org,406,,,http://web.archive.org/web/20221016133207/https://gcgene.bioinfo-minzhao.org/,2016-06-01,"School of Engineering, Faculty of Science, Health, Education and Engineering, University of The Sunshine Coast, Maroochydore DC, Queensland, Australia.","Zhao M, Chen L, Liu Y, Qu H",,,1.0,Australia +"27141961, 27141961, 27141961, 27141961, 27141961",Enrichr,0.996474743,Enrichr,0.996474743,,0,1,http://amp.pharm.mssm.edu/Enrichr,307,,"(39.0437,-77.4875)",http://web.archive.org/web/20200829172838/http://amp.pharm.mssm.edu/Enrichr/,2016-05-03,"Department of Pharmacology and Systems Therapeutics, BD2K-LINCS Data Coordination and Integration Center, Icahn School of Medicine at Mount Sinai, One Gustave L. Levy Place Box 1215, New York, NY 10029, USA., Department of Pharmacology and Systems Therapeutics, BD2K-LINCS Data Coordination and Integration Center, Icahn School of Medicine at Mount Sinai, One Gustave L. Levy Place Box 1215, New York, NY 10029, USA., Department of Pharmacology and Systems Therapeutics, BD2K-LINCS Data Coordination and Integration Center, Icahn School of Medicine at Mount Sinai, One Gustave L. Levy Place Box 1215, New York, NY 10029, USA., Department of Pharmacology and Systems Therapeutics, BD2K-LINCS Data Coordination and Integration Center, Icahn School of Medicine at Mount Sinai, One Gustave L. Levy Place Box 1215, New York, NY 10029, USA., Department of Pharmacology and Systems Therapeutics, BD2K-LINCS Data Coordination and Integration Center, Icahn School of Medicine at Mount Sinai, One Gustave L. Levy Place Box 1215, New York, NY 10029, USA.","Kuleshov MV, Jones MR, Rouillard AD, Fernandez NF, Duan Q, Wang Z, Koplev S, Jenkins SL, Jagodnik KM, Lachmann A, McDermott MG, Monteiro CD, Gundersen GW, Ma'ayan A, Kuleshov MV, Jones MR, Rouillard AD, Fernandez NF, Duan Q, Wang Z, Koplev S, Jenkins SL, Jagodnik KM, Lachmann A, McDermott MG, Monteiro CD, Gundersen GW, Ma'ayan A, Kuleshov MV, Jones MR, Rouillard AD, Fernandez NF, Duan Q, Wang Z, Koplev S, Jenkins SL, Jagodnik KM, Lachmann A, McDermott MG, Monteiro CD, Gundersen GW, Ma'ayan A, Kuleshov MV, Jones MR, Rouillard AD, Fernandez NF, Duan Q, Wang Z, Koplev S, Jenkins SL, Jagodnik KM, Lachmann A, McDermott MG, Monteiro CD, Gundersen GW, Ma'ayan A, Kuleshov MV, Jones MR, Rouillard AD, Fernandez NF, Duan Q, Wang Z, Koplev S, Jenkins SL, Jagodnik KM, Lachmann A, McDermott MG, Monteiro CD, Gundersen GW, Ma'ayan A",", , , , ","NHLBI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NHLBI NIH HHS",12590.0,"United States, United States, United States, United States, United States" +27242503,Ferretome,0.98255372,Ferretome,0.98255372,,0,1,http://bams1.org,301,,"(40.8223,-74.4569)",http://web.archive.org/web/20221017051157/https://bams1.org/,2016-05-10,,,,,0.0, +27481021,eTOX,0.934810936,eTOX,0.934810936,,0,1,http://www.etoxproject.eu,200,,"(41.3888,2.1590)",http://web.archive.org/web/20220518094824/http://etoxproject.eu/,2013-01-11,"Research Programme on Biomedical Informatics (GRIB), Hospital del Mar Medical Research Institute (IMIM), Dep. of Experimental and Health Sciences, Universitat Pompeu Fabra, C/ Dr. Aiguader 88, Barcelona, Spain phone/fax: + 34 933 160 524/ + 34 933 160 550.","Cases M, Pastor M, Sanz F",,,2.0,Spain +27664130,ENVO,0.97583425,ENVO,0.97583425,Ontology,0.52125144,1,"http://www.environmentontology.org/, http://purl.obolibrary.org/obo/envo.owl","200, 302",,"(39.0437,-77.4875), (39.0437,-77.4875)","http://web.archive.org/web/20220920192542/http://environmentontology.org/, no_wayback",2016-09-23,,,,,0.0, +27698587,Flora-On,0.985483035,Flora-On,0.985483035,,0,1,http://flora-on.pt,301,,"(38.7167,-9.1333)",http://web.archive.org/web/20221107191013/https://flora-on.pt/,2016-09-09,"Sociedade Portuguesa de Botânica (SPBotânica), Travessa do Jardim n° 3, A-dos-Potes, 2615-018 Alverca, Portugal.","Pereira AJ, Francisco A, Porto M",,,0.0,Portugal +27789686,GenomeCRISPR,0.993304312,GenomeCRISPR,0.993304312,,0,1,http://genomecrispr.org,302,,"(51.0395,14.5356)",no_wayback,2016-10-26,"German Cancer Research Center (DKFZ), Division Signaling and Functional Genomics and Heidelberg University, Department of Cell and Molecular Biology, Medical Faculty Mannheim, 69120 Heidelberg, Germany.","Rauscher B, Heigwer F, Breinig M, Winter J, Boutros M",,,25.0,Germany +27794553,FuzDB,0.998405516,FuzDB,0.998405516,complexes,0.655352414,1,http://protdyn-database.org,200,,"(42.7325,-84.5555)",http://web.archive.org/web/20220419034108/http://protdyn-database.org/,2016-10-28,"MTA-DE Momentum, Laboratory of Protein Dynamics, Department of Biochemistry and Molecular Biology, University of Debrecen, H-4032 Debrecen, Hungary.","Miskei M, Antal C, Fuxreiter M",,,41.0,Hungary +27899646,FAIRDOMHub,0.996229708,FAIRDOMHub,0.996229708,,0,1,http://fairdomhub.org,302,,"(49.4194,8.7345)",http://web.archive.org/web/20221007145120/https://fairdomhub.org/,2016-11-28,"Leiden Institute of Advanced Computer Science, Leiden University, Leiden, 2333 CA, Netherlands.","Wolstencroft K, Krebs O, Snoep JL, Stanford NJ, Bacall F, Golebiewski M, Kuzyakiv R, Nguyen Q, Owen S, Soiland-Reyes S, Straszewski J, van Niekerk DD, Williams AR, Malmström L, Rinn B, Müller W, Goble C",,"Biotechnology and Biological Sciences Research Council, Dutch Research Council (NWO), Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Engineering and Physical Sciences Research Council",33.0,Netherlands +27905517,G4IPDB,0.99755083,G4IPDB,0.99755083,Nucleic,0.905615389,1,http://bsbe.iiti.ac.in/bsbe/ipdb/index.php,302,,"(22.5387,75.9111)",http://web.archive.org/web/20211127203314/http://bsbe.iiti.ac.in/bsbe/ipdb/index.php,2016-12-01,"Centre for Biosciences and Biomedical Engineering, Indian Institute of Technology Indore, Indore, Madhya Pradesh, 453552, India.","Mishra SK, Tawani A, Mishra A, Kumar A",,,43.0,India +27924038,FARNA,0.99584657,FARNA,0.99584657,,0,1,http://cbrc.kaust.edu.sa/farna,302,,"(37.5331,-122.2486)",http://web.archive.org/web/20220319160124/https://www.cbrc.kaust.edu.sa/farna/,2017-03-01,"King Abdullah University of Science and Technology (KAUST), Computational Bioscience Research Center (CBRC), Thuwal, Saudi Arabia.","Alam T, Uludag M, Essack M, Salhi A, Ashoor H, Hanks JB, Kapfer C, Mineta K, Gojobori T, Bajic VB",,,15.0,Saudi Arabia +"27924041, 31724701",Exposome-Explorer,0.96887961,Exposome-Explorer,0.96887961,,0,2,http://exposome-explorer.iarc.fr,200,,"(40.7402,-73.9996)",http://web.archive.org/web/20221030081652/http://exposome-explorer.iarc.fr/,2020-01-01,"International Agency for Research on Cancer (IARC), Nutrition and Metabolism Section, Biomarkers Group, 150 Cours Albert Thomas, F-69372 Lyon Cedex 08, France., International Agency for Research on Cancer (IARC), Nutrition and Metabolism Section, Biomarkers Group, 150 Cours Albert Thomas, F-69372 Lyon Cedex 08, France.","Neveu V, Moussy A, Rouaix H, Wedekind R, Pon A, Knox C, Wishart DS, Scalbert A, Neveu V, Nicolas G, Salek RM, Wishart DS, Scalbert A",", ",", Joint Programming Initiative FOODBALL, World Health Organization, EXPOsOMICS FP7-KBBE-2012, International Agency for Research on Cancer",52.0,"France, France" +28013277,GABI-Kat,0.946155384,GABI-Kat,0.946155384,,0,1,http://www.gabi-kat.de/db/genehits.php,302,,"(52.0333,8.5333)",http://web.archive.org/web/20221016200618/https://www.gabi-kat.de/db/genehits.php,2017-01-01,"Center for Biotechnology and Department of Biology, Bielefeld University, Universitaetsstrasse, Bielefeld, Germany.","Kleinboelting N, Huep G, Weisshaar B",,,4.0,Germany +28077567,Functional Antibiotic Resistant Metagenomic Element,0.970071673,FARME,0.964730322,Functional Antibiotic Resistant Metagenomic Element,0.970071673,1,http://staff.washington.edu/jwallace/farme,301,,"(47.6062,-122.3321)",http://web.archive.org/web/20220518164010/http://staff.washington.edu/jwallace/farme/,2017-01-10,"Department of Environmental and Occupational Health Sciences, Institute for Risk Analysis and Risk Communication, University of Washington, Seattle, WA, USA.","Wallace JC, Port JA, Smith MN, Faustman EM",,,9.0,United States +28245064,FRED,0.980674982,FRED,0.980674982,Fine-Root Ecology Database,0.815621734,1,http://roots.ornl.gov,308,,"(36.0104,-84.2696)",http://web.archive.org/web/20221010201715/https://roots.ornl.gov/,2017-02-28,"Climate Change Science Institute and Environmental Sciences Division, Oak Ridge National Laboratory, Oak Ridge, TN, 37831, USA.","Iversen CM, McCormack ML, Powell AS, Blackwood CB, Freschet GT, Kattge J, Roumet C, Stover DB, Soudzilovskaia NA, Valverde-Barrantes OJ, van Bodegom PM, Violle C",,"European Research Council, Office of Biological and Environmental Research, European Research Council, Biological and Environmental Research",49.0,United States +28453687,GDISC,0.982105613,GDISC,0.982105613,gene,0.701045394,1,http://gdisc.bme.gatech.edu,302,,"(33.7490,-84.3880)",http://web.archive.org/web/20220124111821/https://gdisc.bme.gatech.edu/,2017-05-01,"Department of Biomedical Engineering, Georgia Institute of Technology and Emory University, Atlanta, GA 30332, USA.","Spainhour JCG, Lim J, Qiu P",,NCI NIH HHS,6.0,"Georgia, United States" +"28529706, 28529706",Expresso,0.997199416,Expresso,0.997199416,,0,1,http://bioinformatics.cs.vt.edu/expresso,404,,,http://web.archive.org/web/20220303230547/http://bioinformatics.cs.vt.edu/expresso/,2017-03-28,"Genetics, Bioinformatics, and Computational Biology (GBCB), Virginia Tech, Blacksburg, VA, 24061, USA., Genetics, Bioinformatics, and Computational Biology (GBCB), Virginia Tech, Blacksburg, VA, 24061, USA.","Aghamirzaie D, Raja Velmurugan K, Wu S, Altarawy D, Heath LS, Grene R, Aghamirzaie D, Raja Velmurugan K, Wu S, Altarawy D, Heath LS, Grene R",", ",", ",10.0,"United States, United States" +28651291,GenomeTrakrCP,0.995048881,GenomeTrakrCP,0.995048881,of Chloroplast Genome Sequences,0.815545069,1,http://www.ncbi.nlm.nih.gov/bioproject/PRJNA325670,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20220616115519/https://www.ncbi.nlm.nih.gov/bioproject/PRJNA325670/,2017-06-26,"Center for Food Safety and Applied Nutrition, Office of Regulatory Science, U. S. Food and Drug Administration, College Park, Maryland, United States.","Zhang N, Ramachandran P, Wen J, Duke JA, Metzman H, McLaughlin W, Ottesen AR, Timme RE, Handy SM",,,8.0,United States +28960889,eSnail,0.99808991,eSnail,0.99808991,,0,1,http://soft.bioinfo-minzhao.org/esnail,406,,,http://web.archive.org/web/20211217092755/http://soft.bioinfo-minzhao.org/esnail/,2017-11-12,"School of Engineering, Faculty of Science, Health, Education and Engineering, University of the Sunshine Coast, Maroochydore DC, Qld, Australia.","Zhao M, Wang T, Stewart MJ, Bose U, Suwansa-Ard S, Storey KB, Cummins SF",,,0.0,Australia +"28985416, 30945202",EVLncRNAs,0.995015562,EVLncRNAs,0.995015562,,0,2,http://biophy.dzu.edu.cn/EVLncRNAs,301,,"(36.0649,120.3804)",http://web.archive.org/web/20221102060126/http://biophy.dzu.edu.cn/EVLncRNAs/,2019-01-01,"Shandong Provincial Key Laboratory of Biophysics, Institute of Biophysics, Dezhou University, Dezhou 253023, China., Shandong Provincial Key Laboratory of Biophysics, Institute of Biophysics, Dezhou University, Dezhou, China.","Zhou B, Zhao H, Yu J, Guo C, Dou X, Song F, Hu G, Cao Z, Qu Y, Yang Y, Zhou Y, Wang J, Zhou B, Zhao H, Yu J, Guo C, Dou X, Song F, Hu G, Cao Z, Qu Y, Yang Y, Zhou Y, Wang J",", ",", ",41.0,"China, China" +29040751,EpiDenovo,0.997566879,EpiDenovo,0.997566879,,0,1,http://www.epidenovo.biols.ac.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220526120222/http://www.epidenovo.biols.ac.cn/,2018-01-01,"Beijing Institutes of Life Science, Chinese Academy of Sciences, Beijing 100101, China.","Mao F, Liu Q, Zhao X, Yang H, Guo S, Xiao L, Li X, Teng H, Sun Z, Sun Z, Dou Y",,,10.0,China +29059383,FlavorDB,0.997247577,FlavorDB,0.997247577,,0,1,http://cosylab.iiitd.edu.in/flavordb,301,,"(28.6453,77.2128)",http://web.archive.org/web/20220317004709/https://cosylab.iiitd.edu.in/flavordb/,2018-01-01,"Center for Computational Biology, Indraprastha Institute of Information Technology (IIIT-Delhi), New Delhi, India.","Garg N, Sethupathy A, Tuwani R, Nk R, Dokania S, Iyer A, Gupta A, Agrawal S, Singh N, Shukla S, Kathuria K, Badhwar R, Kanji R, Jain A, Kaur A, Nagpal R, Bagler G",,,17.0,India +29106618,eRAM,0.992225349,eRAM,0.992225349,encyclopedia of Rare disease Annotations for,0.910854608,1,http://www.unimd.org/eram,301,,"(37.3394,-121.8950)",no_wayback,2018-01-01,"The Center for Bioinformatics and Computational Biology, Shanghai Key Laboratory of Regulatory Biology, the Institute of Biomedical Sciences and School of Life Sciences, East China Normal University, Shanghai 200241, China.","Jia J, An Z, Ming Y, Guo Y, Li W, Liang Y, Guo D, Li X, Tai J, Chen G, Jin Y, Liu Z, Ni X, Shi T",,,13.0,"China, China" +29126995,FVD,0.953768969,FVD,0.953768969,The fish-associated virus database,0.77134944,1,http://bioinfo.ihb.ac.cn/fvd,301,,"(39.9075,116.3972)",no_wayback,2017-11-08,"Center for Molecular and Cellular Biology of Aquatic Organisms, Institute of Hydrobiology, The Chinese Academy of Sciences, Wuhan 430072, China; University of Chinese Academy of Sciences, Beijing 100049, China.","Chen Y, Shi M, Cheng Y, Zhang W, Tang Q, Xia XQ",,"National Natural Science Foundation of China, Chinese Academy of Sciences, Chinese Academy of Sciences",1.0,"China, China" +29175726,FROG-kb,0.992679045,FROG-kb,0.992679045,Forensic Resource/Reference on Genetics-knowledge base,0.930164516,1,http://frog.med.yale.edu/FrogKB,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20221025141849/https://frog.med.yale.edu/FrogKB/,2017-11-14,"Department of Genetics, Yale University School of Medicine, New Haven, CT 06520, USA. Electronic address: kenneth.kidd@yale.edu.","Kidd KK, Soundararajan U, Rajeevan H, Pakstis AJ, Moore KN, Ropero-Miller JD",,"National Institute of Justice, Forensic Technology Center of Excellence, National Institute of Justice, Office of Investigative Sciences",5.0,United States +29220485,funRiceGenes,0.978388309,funRiceGenes,0.978388309,,0,1,"http://funricegenes.github.io/, http://funricegenes.ncpgr.cn","301, 500",,"(37.7621,-122.3971), ","http://web.archive.org/web/20220810051244/https://funricegenes.github.io/, http://web.archive.org/web/20201125110202/http://funricegenes.ncpgr.cn/",2018-01-01,"National Key Laboratory of Crop Genetic Improvement, National Center of Plant Gene Research, Huazhong Agricultural University, Wuhan 430070, China.","Yao W, Li G, Yu Y, Ouyang Y",,,39.0,China +29328995,FilTer BaSe,0.918705657,FilTer BaSe,0.918705657,,0,1,http://bioinfo.net.in/filterbase,403,,,no_wayback,2018-01-06,"Bioinformatics Centre, Savitribai Phule Pune University, Pune, 411007, India.","Kolte BS, Londhe SR, Solanki BR, Gacche RN, Meshram RJ",,,2.0,India +29337142,Express,0.922874272,Express,0.922874272,,0,1,http://www.iupui.edu/Ã,301,,"(39.1653,-86.5264)",no_wayback,2018-01-11,"Department of BioHealth Informatics, School of Informatics and Computing, Indiana University Purdue University, 719 Indiana Ave Ste 319, Walker Plaza Building, Indianapolis, IN 46202, United States.","Budak G, Dash S, Srivastava R, Lachke SA, Janga SC",,"NIGMS NIH HHS, National Eye Institute, NEI NIH HHS, National Institutes of Health, National Institute of General Medical Sciences",8.0,United States +29512401,GENIPAC,0.994062304,GENIPAC,0.994062304,,0,1,http://genipac.cancerresearch.my,301,,"(1.2897,103.8501)",http://web.archive.org/web/20220419081926/https://genipac.cancerresearch.my/,2018-03-07,"1 Head and Neck Cancer Research Team, Cancer Research Malaysia, Subang Jaya, Malaysia.","Lee BKB, Gan CP, Chang JK, Tan JL, Fadlullah MZ, Abdul Rahman ZA, Prime SS, Gutkind JS, Liew CS, Khang TF, Tan AC, Cheong SC",,,5.0,"Malaysia, Malaysia" +29683130,EnviroAtlas,0.60561341,EnviroAtlas,0.60561341,,0,1,http://enviroatlas.epa.gov,301,,"(38.8951,-77.0364)",http://web.archive.org/web/20221104193251/https://enviroatlas.epa.gov/,2017-11-01,"National Exposure Research Laboratory, U.S. Environmental Protection Agency, Office of Research and Development, Research Triangle Park, Durham, NC 27711, U.S.A.","Wickham J, Riitters K, Vogt P, Costanza J, Neale A",,"Intramural EPA, U.S. Environmental Protection Agency, Office of Research and Development",1.0, +29688353,FSD,0.992719412,FSD,0.992719412,Fungal Stress Database,0.971582294,1,http://www.fung-stress.org,301,,"(52.3740,4.8897)",http://web.archive.org/web/20221006174317/https://www.fung-stress.org/,2018-01-01,"Department of Biotechnology and Microbiology, Faculty of Science and Technology, University of Debrecen, Egyetem tér 1, H-4032 Debrecen, Hungary.","Orosz E, van de Wiele N, Emri T, Zhou M, Robert V, de Vries RP, Pócsi I",,Dutch Research Council (NWO),5.0,Hungary +29890119,FlyXCDB,0.993109286,FlyXCDB,0.993109286,,0,1,http://prodata.swmed.edu/FlyXCDB,301,,"(32.8252,-96.8388)",http://web.archive.org/web/20220728160824/http://prodata.swmed.edu/FlyXCDB/,2018-06-08,"Howard Hughes Medical Institute, University of Texas Southwestern Medical Center, Dallas, TX 75390, USA. Electronic address: jpei@chop.swmed.edu.","Pei J, Kinch LN, Grishin NV",,"National Institutes of Health, Welch Foundation, NIGMS NIH HHS",1.0,United States +29976644,FusoPortal,0.994699359,FusoPortal,0.994699359,,0,1,http://fusoportal.org,200,,"(39.0437,-77.4875)",http://web.archive.org/web/20220618175359/http://fusoportal.org/,2018-07-05,"Department of Biochemistry, Virginia Polytechnic Institute and State University, Blacksburg, Virginia, USA.","Sanders BE, Umana A, Lemkul JA, Slade DJ",,,5.0,United States +30008982,EMDB,0.995346447,EMDB,0.995346447,Electron Microscopy Data Bank,0.956331355,1,"http://emdb-empiar.org, http://emdb-empiar.org/emstats","301, 301",,"(51.5085,-0.1257), (51.5085,-0.1257)","no_wayback, no_wayback",2018-03-01,"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton CB10 1SD, UK.","Abbott S, Iudin A, Korir PK, Somasundharam S, Patwardhan A",,"Medical Research Council, Wellcome Trust, Wellcome Trust, Wellcome Trust, NIGMS NIH HHS, Biotechnology and Biological Sciences Research Council",9.0, +30053265,exoRBase,0.996153355,exoRBase,0.996153355,,0,1,http://www.exoRBase.org,200,,"(38.8951,-77.0364)",http://web.archive.org/web/20220824141404/http://www.exorbase.org/,2018-01-01,"Fudan University Shanghai Cancer Center and Institutes of Biomedical Sciences, Shanghai Medical College, Fudan University, Shanghai 200032, China.","Li S, Li Y, Chen B, Zhao J, Yu S, Tang Y, Zheng Q, Li Y, Wang P, He X, Huang S",,,171.0,China +30143675,ESCC ATLAS,0.917660445,ESCC ATLAS,0.917660445,,0,1,http://www.esccatlas.org,302,,"(40.7143,-74.0060)",http://web.archive.org/web/20220223064845/https://www.esccatlas.org/,2018-08-24,"Mbiomics, Manipal, Karnataka, India.","Tungekar A, Mandarthi S, Mandaviya PR, Gadekar VP, Tantry A, Kotian S, Reddy J, Prabha D, Bhat S, Sahay S, Mascarenhas R, Badkillaya RR, Nagasampige MK, Yelnadu M, Pawar H, Hebbar P, Kashyap MK",,,5.0,India +30235322,GC4S,0.951814741,GC4S,0.951814741,,0,1,http://www.sing-group.org/gc4s,301,,"(42.3367,-7.8641)",http://web.archive.org/web/20221013135854/http://www.sing-group.org/gc4s/,2018-09-20,"ESEI-Escuela Superior de Ingeniería Informática, Universidad de Vigo, Ourense, Spain.","López-Fernández H, Reboiro-Jato M, Glez-Peña D, Laza R, Pavón R, Fdez-Riverola F",,"Consellería de Cultura, Educación e Ordenación Universitaria, Xunta de Galicia",1.0,Spain +30268934,GAAD,0.996480703,GAAD,0.996480703,Gene and Autoimmiune Disease Association Database,0.918741842,1,http://gaad.medgenius.info,406,,,http://web.archive.org/web/20221017141551/https://gaad.medgenius.info/,2018-08-01,"Department of Blood Transfusion, Tangdu Hospital, Fourth Military Medical University, Xi'an 710032, China.","Lu G, Hao X, Chen WH, Mu S",,,6.0,China +30321400,EWASdb,0.995485365,EWASdb,0.995485365,,0,1,"http://www.ewas.org.cn/ewasdb, http://www.bioapp.org/ewasdb","406, 301",,", (36.0649,120.3804)","no_wayback, http://web.archive.org/web/20220518232629/http://www.bioapp.org/ewasdb/",2019-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.","Liu D, Zhao L, Wang Z, Zhou X, Fan X, Li Y, Xu J, Hu S, Niu M, Song X, Li Y, Zuo L, Lei C, Zhang M, Tang G, Huang M, Zhang N, Duan L, Lv H, Zhang M, Li J, Xu L, Kong F, Feng R, Jiang Y",,"Heilongjiang Education Department Fund, National Natural Science Foundation of China, National Natural Science Foundation of China, Heilongjiang Postdoctoral, China Postdoctoral Science Foundation, National Natural Science Foundation of China, Heilongjiang Education Department Fund, National Natural Science Foundation of China, Fundamental Research Funds, National Natural Science Foundation of China",11.0,China +30335161,EVmiRNA,0.996470809,EVmiRNA,0.996470809,,0,1,http://bioinfo.life.hust.edu.cn/EVmiRNA,200,,"(31.2222,121.4581)",http://web.archive.org/web/20220521011236/http://bioinfo.life.hust.edu.cn/EVmiRNA,2019-01-01,"Department of Bioinformatics and Systems Biology, Key Laboratory of Molecular Biophysics of the Ministry of Education, Hubei Bioinformatics and Molecular Imaging Key Laboratory, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, PR China.","Liu T, Zhang Q, Zhang J, Li C, Miao YR, Lei Q, Li Q, Guo AY",,"China Postdoctoral Science Foundation, National Natural Science Foundation of China, National Natural Science Foundation of China, China Postdoctoral Science Foundation, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China",86.0,China +30349118,GeneATLAS,0.691261196,GeneATLAS,0.691261196,,0,1,http://geneatlas.roslin.ed.ac.uk,200,,"(55.9521,-3.1965)",http://web.archive.org/web/20220810135747/http://geneatlas.roslin.ed.ac.uk/,2018-10-22,"The Roslin Institute, Royal (Dick) School of Veterinary Studies, The University of Edinburgh, Midlothian, UK.","Canela-Xandri O, Rawlik K, Tenesa A",,"Biotechnology and Biological Sciences Research Council, Medical Research Council, Medical Research Council, Medical Research Council, Medical Research Council, Medical Research Council, Medical Research Council, Medical Research Council",211.0, +30357379,EndoDB,0.996743798,EndoDB,0.996743798,,0,1,http://vibcancer.be/software-tools/endodb,301,,"(50.8505,4.3488)",http://web.archive.org/web/20201021124612/https://vibcancer.be/software-tools/endodb,2019-01-01,"Department of Oncology and Leuven Cancer Institute (LKI), Laboratory of Angiogenesis and Vascular Metabolism, KU Leuven, 3000 Leuven, Belgium.","Khan S, Taverna F, Rohlenova K, Treps L, Geldhof V, de Rooij L, Sokol L, Pircher A, Conradi LC, Kalucka J, Schoonjans L, Eelen G, Dewerchin M, Karakach T, Li X, Goveia J, Carmeliet P",,"Foundation against Cancer, Foundation against Cancer, National Natural Science Foundation of China, European Research Council, Fritz Thyssen Stiftung, National Natural Science Foundation of China, Austrian Science Fund FWF",28.0,Belgium +"30357393, 33270111",GENCODE,0.998485327,GENCODE,0.998485327,,0,2,http://www.gencodegenes.org,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20221101143843/https://www.gencodegenes.org/,2021-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Frankish A, Diekhans M, Ferreira AM, Johnson R, Jungreis I, Loveland J, Mudge JM, Sisu C, Wright J, Armstrong J, Barnes I, Berry A, Bignell A, Carbonell Sala S, Chrast J, Cunningham F, Di Domenico T, Donaldson S, Fiddes IT, García Girón C, Gonzalez JM, Grego T, Hardy M, Hourlier T, Hunt T, Izuogu OG, Lagarde J, Martin FJ, Martínez L, Mohanan S, Muir P, Navarro FCP, Parker A, Pei B, Pozo F, Ruffier M, Schmitt BM, Stapleton E, Suner MM, Sycheva I, Uszczynska-Ratajczak B, Xu J, Yates A, Zerbino D, Zhang Y, Aken B, Choudhary JS, Gerstein M, Guigó R, Hubbard TJP, Kellis M, Paten B, Reymond A, Tress ML, Flicek P, Frankish A, Diekhans M, Jungreis I, Lagarde J, Loveland JE, Mudge JM, Sisu C, Wright JC, Armstrong J, Barnes I, Berry A, Bignell A, Boix C, Carbonell Sala S, Cunningham F, Di Domenico T, Donaldson S, Fiddes IT, García Girón C, Gonzalez JM, Grego T, Hardy M, Hourlier T, Howe KL, Hunt T, Izuogu OG, Johnson R, Martin FJ, Martínez L, Mohanan S, Muir P, Navarro FCP, Parker A, Pei B, Pozo F, Riera FC, Ruffier M, Schmitt BM, Stapleton E, Suner MM, Sycheva I, Uszczynska-Ratajczak B, Wolf MY, Xu J, Yang YT, Yates A, Zerbino D, Zhang Y, Choudhary JS, Gerstein M, Guigó R, Hubbard TJP, Kellis M, Paten B, Tress ML, Flicek P",", ","Medical Research Council, National Human Genome Research Institute, NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, Medical Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust, Medical Research Council, The British Council, NIGMS NIH HHS, National Institutes of Health, University of Bern, Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council, European Molecular Biology Laboratory, NHGRI NIH HHS, Swiss National Science Foundation",1075.0, +30357403,EncoMPASS,0.996818304,EncoMPASS,0.996818304,,0,1,http://encompass.ninds.nih.gov,301,,"(38.9807,-77.1003)",http://web.archive.org/web/20220517212642/https://encompass.ninds.nih.gov,2019-01-01,"Computational Structural Biology Section, National Institute of Neurological Disorders and Stroke, National Institutes of Health, Bethesda, MD 20892, USA.","Sarti E, Aleksandrova AA, Ganta SK, Yavatkar AS, Forrest LR",,National Institute of Neurological Disorders and Stroke,5.0,United States +30357418,EDK,0.992027998,EDK,0.992027998,Editome-Disease Knowledgebase,0.989611737,1,http://bigd.big.ac.cn/edk,301,,"(39.9075,116.3972)",http://web.archive.org/web/20210515073755/https://bigd.big.ac.cn/edk/,2019-01-01,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Niu G, Zou D, Li M, Zhang Y, Sang J, Xia L, Li M, Liu L, Cao J, Zhang Y, Wang P, Hu S, Hao L, Zhang Z",,"National Programs for High Technology Research and Development, Chinese Academy of Science, Chinese Academy of Sciences, National Key Research & Development Program of China, Chinese Academy of Sciences, Chinese Academy of Science, Chinese Academy of Sciences, National Programs for High Technology Research and Development",12.0,China +30364969,EWAS Atlas,0.979906976,EWAS Atlas,0.979906976,,0,1,http://bigd.big.ac.cn/ewas,301,,"(39.9075,116.3972)",http://web.archive.org/web/20210517102744/https://bigd.big.ac.cn/ewas/,2019-01-01,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Li M, Zou D, Li Z, Gao R, Sang J, Zhang Y, Li R, Xia L, Zhang T, Niu G, Bao Y, Zhang Z",,"International Partnership Program of the Chinese Academy of Sciences, National Programs for High Technology Research and Development, National Natural Science Foundation of China, Strategic Priority Research Program of the Chinese Academy of Sciences, Strategic Priority Research Program of the Chinese Academy of Sciences, National Key Research Program of China, 13th Five-year Informatization Plan of Chinese Academy of Sciences, National Programs for High Technology Research and Development",69.0,China +30365027,gcMeta,0.99671793,gcMeta,0.99671793,Global Catalogue of Metagenomics,0.914486587,1,http://gcmeta.wdcm.org,301,,"(39.9075,116.3972)",http://web.archive.org/web/20221018195331/https://gcmeta.wdcm.org/,2019-01-01,"Microbial Resource and Big Data Center, Institute of Microbiology, Chinese Academy of Sciences, Beijing 100101, China.","Shi W, Qi H, Sun Q, Fan G, Liu S, Wang J, Zhu B, Liu H, Zhao F, Wang X, Hu X, Li W, Liu J, Tian Y, Wu L, Ma J",,"National key Research Program of China, National key Research Program of China, National key Research Program of China, Developing Countries Around China, National key Research Program of China, National Science Foundation for Young Scientists of China, National key Research Program of China, 13th Five-year Informatization Plan of Chinese Academy of Sciences, 13th Five-year Informatization Plan of Chinese Academy of Sciences, Major State Basic Research Development Program, Key Research Program of the Chinese Academy of Sciences, National key Research Program of China, Strategic Priority Research Program of the Chinese Academy of Sciences",18.0,China +30365030,ETCM,0.994092822,ETCM,0.994092822,,0,1,http://www.nrc.ac.cn:9090/ETCM,"HTTPConnectionPool(host='www.nrc.ac.cn', port=9090): Max retries exceeded with url: /ETCM (Caused by ConnectTimeoutError(, 'Connection to www.nrc.ac.cn timed out. (connect timeout=5)'))",,,no_wayback,2019-01-01,"Institute of Chinese Materia Medica, China Academy of Chinese Medical Sciences, Beijing 100700, China.","Xu HY, Zhang YQ, Liu ZM, Chen T, Lv CY, Tang SH, Zhang XB, Zhang W, Li ZY, Zhou RR, Yang HJ, Wang XJ, Huang LQ",,"National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Central public welfare research institutes, National Natural Science Foundation of China, National Key Technology R&D Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Program of China, National Key Technology R&D Program of China, Key project at central government",125.0,"China, China" +30407583,FusionGDB,0.992917299,FusionGDB,0.992917299,fusion gene annotation DataBase,0.742048061,1,http://ccsm.uth.edu/FusionGDB,302,,"(29.7633,-95.3633)",http://web.archive.org/web/20220616064303/https://ccsm.uth.edu/FusionGDB/,2019-01-01,"Center for Computational Systems Medicine, School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Kim P, Zhou X",,"National Institutes of Health, NIAMS NIH HHS, NIGMS NIH HHS, National Institutes of Health, National Institutes of Health, NIGMS NIH HHS, NCI NIH HHS",25.0,United States +30446142,EPS-DB,0.998128307,EPS-DB,0.998128307,Database,0.788730741,1,http://www.epsdatabase.com,406,,,http://web.archive.org/web/20220527020729/http://www.epsdatabase.com/,2018-10-28,"Enzyme and Protein Chemistry, Department of Biotechnology and Biomedicine, Technical University of Denmark, Søltofts Plads, Building 224, DK-2800 Kgs. Lyngby, Denmark. Electronic address: jbirch@epsdatabase.com.","Birch J, Van Calsteren MR, Pérez S, Svensson B",,,7.0,"Denmark, Denmark" +30476229,ENPD,0.971551538,ENPD,0.971551538,Eukaryotic nucleic acid binding protein database,0.936309212,1,http://qinlab.sls.cuhk.edu.hk/ENPD,301,,"(22.2783,114.1747)",http://web.archive.org/web/20220818224410/http://qinlab.sls.cuhk.edu.hk/ENPD/,2019-01-01,"School of Life Sciences, The Chinese University of Hong Kong, Shatin, New Territories, Hong Kong, China.","Tak Leung RW, Jiang X, Chu KH, Qin J",,"National Natural Science Foundation of China, Chinese University of Hong Kong, Chinese University of Hong Kong",1.0,"China, Hong Kong, Hong Kong" +30611878,FisOmics,0.993046761,FisOmics,0.993046761,,0,1,http://mail.nbfgr.res.in/FisOmics,301,,"(26.8393,80.9231)",http://web.archive.org/web/20220527064612/https://mail.nbfgr.res.in/FisOmics/,2019-01-03,"Molecular Biology and Biotechnology Division, ICAR-National Bureau of Fish Genetic Resources, Canal Ring Road, P.O.- Dilkusha, Lucknow 226 002, India.","Pathak AK, Rashid I, Nagpure NS, Kumar R, Pati R, Singh M, Murali S, Kushwaha B, Kumar D, Rai A",,"Farmer Welfare, Centre for Agricultural Bioinformatics, Department of Agricultural Research and Education, Ministry of Agriculture, ICAR-Indian Agricultural Statistics Research Institute",1.0,India +30788500,EnDisease,0.997681141,EnDisease,0.997681141,,0,1,http://bioinfo.au.tsinghua.edu.cn/endisease,301,,"(39.9906,116.2887)",no_wayback,2019-01-01,"MOE Key Laboratory of Bioinformatics; Bioinformatics Division and Center for Synthetic & Systems Biology, Department of Automation, Tsinghua University, Beijing, China.","Zeng W, Min X, Jiang R",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China, Tsinghua-Fuzhou Institute for Data Technology",6.0,China +30864352,GenCoNet,0.993595004,GenCoNet,0.993595004,,0,1,http://genconet.kalis-amts.de,301,,"(49.0094,8.4044)",http://web.archive.org/web/20220618031750/https://genconet.kalis-amts.de/,2018-12-25,"Bielefeld University, Bioinformatics/Medical Informatics Department, Bielefeld, Germany.","Shoshi A, Hofestädt R, Zolotareva O, Friedrichs M, Maier A, Ivanisenko VA, Dosenko VE, Bragina EY",,"Volkswagen Foundation, International DFG Research Training Group GRK",2.0,Germany +30951672,exRNA Atlas,0.840340992,exRNA Atlas,0.840340992,,0,1,http://exrna-atlas.org,200,,"(29.7633,-95.3633)",http://web.archive.org/web/20221016235128/https://exrna-atlas.org/,2019-04-01,"Department of Molecular and Human Genetics, Baylor College of Medicine, Houston, TX 77030, USA.","Murillo OD, Thistlethwaite W, Rozowsky J, Subramanian SL, Lucero R, Shah N, Jackson AR, Srinivasan S, Chung A, Laurent CD, Kitchen RR, Galeev T, Warrell J, Diao JA, Welsh JA, Hanspers K, Riutta A, Burgstaller-Muehlbacher S, Shah RV, Yeri A, Jenkins LM, Ahsen ME, Cordon-Cardo C, Dogra N, Gifford SM, Smith JT, Stolovitzky G, Tewari AK, Wunsch BH, Yadav KK, Danielson KM, Filant J, Moeller C, Nejad P, Paul A, Simonson B, Wong DK, Zhang X, Balaj L, Gandhi R, Sood AK, Alexander RP, Wang L, Wu C, Wong DTW, Galas DJ, Van Keuren-Jensen K, Patel T, Jones JC, Das S, Cheung KH, Pico AR, Su AI, Raffai RL, Laurent LC, Roth ME, Gerstein MB, Milosavljevic A",,"NIH, NIDDK NIH HHS, NIH, NIH, NCI NIH HHS, NCATS NIH HHS, NIH, NIH, NIH, NHLBI NIH HHS, NCI NIH HHS, NCI NIH HHS, NIH, NCI NIH HHS, NIA NIH HHS, NHLBI NIH HHS, NCATS NIH HHS, NIH, Frank McGraw Memorial Chair in Cancer Research, NIH, NCI NIH HHS, NIH, NCATS NIH HHS, NIH, NCATS NIH HHS, NCATS NIH HHS, American Cancer Society, NIH, NIH, NHLBI NIH HHS, NIDA NIH HHS, NCATS NIH HHS, NIH, NIH, NIH, NHLBI NIH HHS, NHLBI NIH HHS, NCATS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCATS NIH HHS",90.0,United States +31034103,FAT-PTM,0.994561623,FAT-PTM,0.994561623,Functional Analysis Tools for Post-Translational Modifications,0.982620938,1,http://bioinformatics.cse.unr.edu/fat-ptm,301,,"(39.5296,-119.8138)",http://web.archive.org/web/20220705171432/https://bioinformatics.cse.unr.edu/fat-ptm/,2019-05-31,"Department of Biochemistry and Molecular Biology, University of Nevada, Reno, Reno, NV, 89557, USA.","Cruz ER, Nguyen H, Nguyen T, Wallace IS",,,11.0,United States +31164042,EmExplorer,0.986415982,EmExplorer,0.986415982,,0,1,http://bioinfor.imu.edu.cn/emexplorer,301,,"(39.9906,116.2887)",http://web.archive.org/web/20220616003226/http://bioinfor.imu.edu.cn/emexplorer/,2019-06-05,"1 State Key Laboratory of Reproductive Regulation and Breeding of Grassland Livestock, College of Life Sciences, Inner Mongolia University , Hohhot 010070 , People's Republic of China.","Hu B, Zheng L, Long C, Song M, Li T, Yang L, Zuo Y",,"National Nature Scientific Foundation of China, National Nature Scientific Foundation of China, Fund for Excellent Young Scholars of Inner Mongolia, Program for Young Talents of Science and Technology in Universities of Inner Mongolia Autonomous Region",12.0,"China, Mongolia" +31263866,ESID,0.5849545,ESID,0.5849545,,0,1,"http://esid.org/Working-Parties/Registry-Working-Party/ESID-Registry, http://cci-esid-reg-demo-app.uniklinik-freiburg.de/EERS","301, 301",,"(59.3294,18.0687), (47.9959,7.8522)","http://web.archive.org/web/20220812150726/https://esid.org/Working-Parties/Registry-Working-Party/ESID-Registry/, no_wayback",2019-12-01,Institute of Medical Biometry and Statistics.,"Scheible R, Rusch S, Guzman D, Mahlaoui N, Ehl S, Kindle G",,"Plasma Protein Therapeutics Association Europe, EURO-POLICY-PID, BMBF, BMBF, BMBF, BMBF, ESID society, European and National Grants",3.0, +31277321,GEDS,0.955554724,GEDS,0.955554724,Expression Display Server,0.659791191,1,http://bioinfo.life.hust.edu.cn/web/GEDS,301,,"(31.2222,121.4581)",http://web.archive.org/web/20220309020829/http://bioinfo.life.hust.edu.cn/web/GEDS/,2019-07-03,"Department of Bioinformatics and Systems Biology, Hubei Bioinformatics and Molecular Imaging Key Laboratory, Key Laboratory of Molecular Biophysics of the Ministry of Education, College of Life Science and Technology, Huazhong University of Science and Technology, Hubei, Wuhan 430074, China.","Xia M, Liu CJ, Zhang Q, Guo AY",,"National Natural Science Foundation of China, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities",7.0,China +31343654,eyeIntegration,0.929627955,eyeIntegration,0.929627955,,0,1,http://eyeIntegration.nei.nih.gov,301,,"(38.9807,-77.1003)",http://web.archive.org/web/20220619125639/https://eyeintegration.nei.nih.gov/,2019-07-01,"Ophthalmic Genetics and Visual Function Branch, National Eye Institute, National Institutes of Health, Bethesda, Maryland, United States.","Swamy V, McGaughey D",,,9.0,United States +31412866,ETM-DB,0.991336733,ETM-DB,0.991336733,,0,1,http://biosoft.kaist.ac.kr/etm,301,,"(36.3686,127.3615)",no_wayback,2019-08-14,"Department of Bio and Brain Engineering, Korea Advanced Institute of Science and Technology, Daejeon, 34141, South Korea.","Bultum LE, Woyessa AM, Lee D",,,4.0, +31584095,EWAS,0.916500092,EWAS,0.916500092,,0,1,http://bigd.big.ac.cn/ewas/datahub,301,,"(39.9075,116.3972)",http://web.archive.org/web/20210103212946/https://bigd.big.ac.cn/ewas/datahub,2020-01-01,"National Genomics Data Center, Beijing 100101, China.","Xiong Z, Li M, Yang F, Ma Y, Sang J, Li R, Li Z, Zhang Z, Bao Y",,"National Key Research and Development Program of China, Chinese Academy of Sciences, 13th Five-year Informatization Plan of Chinese Academy of Sciences, National Key Research and Development Program of China, International Partnership Program of the Chinese Academy of Sciences, National Key Research and Development Program of China",15.0,China +31592084,GCGVD,0.976200461,GCGVD,0.976200461,The Grass Carp Genomic Visualization Database,0.885653893,1,http://122.112.216.104,200,,"(31.2222,121.4581)",http://web.archive.org/web/20220617192619/http://122.112.216.104/,2019-08-07,"College of Information Technology, Shanghai Ocean University, Shanghai, 201306, China.","Tang M, Lu Y, Xiong Z, Chen M, Qin Y",,,2.0,China +31598693,EuRBPDB,0.998039126,EuRBPDB,0.998039126,,0,1,http://EuRBPDB.syshospital.org,301,,"(22.2783,114.1747)",http://web.archive.org/web/20211207063709/http://eurbpdb.syshospital.org/,2020-01-01,"Guangdong Provincial Key Laboratory of Malignant Tumor Epigenetics and Gene Regulation, Sun Yat-Sen Memorial Hospital, Sun Yat-Sen University, Guangzhou 510120, China.","Liao JY, Yang B, Zhang YC, Wang XJ, Ye Y, Peng JW, Yang ZZ, He JH, Zhang Y, Hu K, Lin DC, Yin D",,"Tip-top Scientific and Technical Innovative Youth Talents of Guangdong special support program, Guangzhou Bureau of Science and Information Technology, Guangzhou Bureau of Science and Information Technology, Natural Science Foundation of China, Guangzhou Bureau of Science and Information Technology, Guangzhou Bureau of Science and Information Technology, Natural Science Foundation of China, Natural Science Foundation of China, Natural Science Foundation of China, Tip-top Scientific and Technical Innovative Youth Talents of Guangdong special support program, Natural Science Foundation of China, Natural Science Foundation of China, Natural Science Foundation of China, Guangzhou Bureau of Science and Information Technology, Natural Science Foundation of China",15.0,China +31642488,ExonSkipDB,0.997633815,ExonSkipDB,0.997633815,,0,1,http://ccsm.uth.edu/ExonSkipDB,302,,"(29.7633,-95.3633)",http://web.archive.org/web/20220616230051/https://ccsm.uth.edu/ExonSkipDB/,2020-01-01,"School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Kim P, Yang M, Yiya K, Zhao W, Zhou X",,"NIGMS NIH HHS, NIAMS NIH HHS, NCI NIH HHS, National Institutes of Health, National Institutes of Health, National Institutes of Health",17.0,United States +31642496,Gene4Denovo,0.993604973,Gene4Denovo,0.993604973,,0,1,http://www.genemed.tech/gene4denovo,301,,"(22.2783,114.1747)",http://web.archive.org/web/20220624003659/http://www.genemed.tech/gene4denovo/,2020-01-01,"National Clinical Research Centre for Geriatric Disorders, Department of Geriatrics, Xiangya Hospital, Central South University, Changsha, Hunan, China.","Zhao G, Li K, Li B, Wang Z, Fang Z, Wang X, Zhang Y, Luo T, Zhou Q, Wang L, Xie Y, Wang Y, Chen Q, Xia L, Tang Y, Tang B, Xia K, Li J",,"CAST, CAST, National Natural Science Foundation of China, Natural Science Foundation for Young Scientists of Hunan Province, China",17.0,China +31665430,ENdb,0.997351289,ENdb,0.997351289,,0,1,http://www.licpathway.net/ENdb,301,,"(22.2783,114.1747)",http://web.archive.org/web/20221017060812/http://licpathway.net/ENdb/,2020-01-01,"School of Medical Informatics, Daqing Campus, Harbin Medical University. Daqing 163319, China.","Bai X, Shi S, Ai B, Jiang Y, Liu Y, Han X, Xu M, Pan Q, Wang F, Wang Q, Zhang J, Li X, Feng C, Li Y, Wang Y, Song Y, Feng K, Li C",,"Natural Science Foundation of Heilongjiang Province, Harbin Medical University, The Fundamental Research Funds for the Provincial Universities, National Natural Science Foundation of China, National Natural Science Foundation of China, Natural Science Foundation of Heilongjiang Province",12.0,China +31686102,FoldamerDB,0.997375488,FoldamerDB,0.997375488,,0,1,http://foldamerdb.ttk.hu,200,,"(47.4984,19.0404)",http://web.archive.org/web/20221016221330/http://foldamerdb.ttk.hu/,2020-01-01,"MTA TTK Lendület Biomolecular Self-Assembly Research Group, Institute of Materials and Environmental Chemistry, Research Centre for Natural Sciences, Hungarian Academy of Sciences, H-1117 Budapest, Magyar Tudósok krt. 2, Hungary.","Nizami B, Bereczki-Szakál D, Varró N, El Battioui K, Nagaraj VU, Szigyártó IC, Mándity I, Beke-Somfai T",,"National Competitiveness and Excellence Program, Hungarian Academy of Sciences",4.0,Hungary +31696036,FlavoDb,0.994724214,FlavoDb,0.994724214,,0,1,http://bioinfo.net.in/flavodb/home.html,403,,,no_wayback,2019-10-31,"1Bioinformatics Centre, Savitribai Phule Pune University, Pune, 411007 India.","Kolte BS, Londhe SR, Bagul KT, Pawnikar SP, Goundge MB, Gacche RN, Meshram RJ",,,0.0,India +31774482,FluReassort,0.980082214,FluReassort,0.980082214,,0,1,http://www.jianglab.tech/FluReassort,301,,"(31.2222,121.4581)",no_wayback,2020-12-01,"Suzhou Institute of Systems Medicine, Center of Systems Medicine, Chinese Academy of Medical Sciences & Peking Union Medical College.","Ding X, Yuan X, Mao L, Wu A, Jiang T",,"National Key Plan for Scientific Research and Development of China, Central Public-Interest Scientific Institution Basal Research Fund, National Natural Science Foundation of China, Non-profit Central Research Institute Fund of Chinese Academy of Medical Sciences, Central Public-Interest Scientific Institution Basal Research Fund, Central Public-Interest Scientific Institution Basal Research Fund, National Basic Research Program of China, National Natural Science Foundation of China, CAMS Initiative for Innovative Medicine",2.0, +31844835,GenFam,0.990917087,GenFam,0.990917087,,0,1,"http://mandadilab.webfactional.com/home/, http://mandadilab.webfactional.com/home/dload","404, 404",,", ","http://web.archive.org/web/20200802064828/http://mandadilab.webfactional.com:80/home/, http://web.archive.org/web/20200802064833/http://mandadilab.webfactional.com:80/home/dload/",2019-12-04,Texas A&M AgriLife Research & Extension Center Weslaco TX USA.,"Bedre R, Mandadi K",,,2.0,United States +31887789,ETph,0.99349612,ETph,0.99349612,,0,1,http://klab.sjtu.edu.cn/enhancer,302,,"(31.2222,121.4581)",no_wayback,2019-12-30,"Department of Animal Science, School of Agriculture and Biology, Shanghai Jiao Tong University, Shanghai, 200240, China.","Sun H, Liao Y, Wang Z, Zhang Z, Oyelami FO, Olasege BS, Wang Q, Pan Y",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Shanghai Jiao Tong University, Interdisciplinary Program of Shanghai Jiao Tong University, Shanghai Jiao Tong University, National Natural Science Foundation of China, Interdisciplinary Program of Shanghai Jiao Tong University",0.0,China +32008039,EPSD,0.98701781,EPSD,0.98701781,eukaryotic phosphorylation site database,0.948447161,1,http://epsd.biocuckoo.cn,200,,"(22.2783,114.1747)",no_wayback,2021-01-01,Huazhong University of Science and Technology.,"Lin S, Wang C, Zhou J, Shi Y, Ruan C, Tu Y, Yao L, Peng D, Xue Y",,"Fundamental Research Funds for the Central Universities, Natural Science Foundation of China, Changjiang Scholars Program of China, Natural Science Foundation of China, National Key R&D Program, Fundamental Research Funds for the Central Universities, Natural Science Foundation of China, Natural Science Foundation of China, National Key R&D Program",8.0, +32076423,FHLdb,0.99812746,FHLdb,0.99812746,Familial Hemophagocytic,0.593275462,1,http://www.biotoclin.org/FHLdb,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20220709233439/http://www.biotoclin.org/FHLdb/,2020-01-31,"Immunology Division, Hospital Universitari Vall d'Hebron (HUVH), Vall d'Hebron Research Institute (VHIR), Department of Cell Biology, Physiology and Immunology, Autonomous University of Barcelona (UAB), Barcelona, Spain.","Viñas-Giménez L, Padilla N, Batlle-Masó L, Casals F, Rivière JG, Martínez-Gallo M, de la Cruz X, Colobran R",,Instituto de Salud Carlos III,2.0,Spain +32120139,FRCD,0.996773586,FRCD,0.996773586,,0,1,http://www.rxnfinder.org/frcd,301,,"(39.9075,116.3972)",no_wayback,2020-02-24,"CAS Key Laboratory of Computational Biology, CAS-MPG Partner Institute for Computational Biology, Shanghai Institute of Nutrition and Health, Shanghai Institutes for Biological Sciences, University of Chinese Academy of Sciences, Chinese Academy of Sciences, Shanghai 200333, PR China. Electronic address: zhangdachuan@picb.ac.cn.","Zhang D, Gong L, Ding S, Tian Y, Jia C, Liu D, Han M, Cheng X, Sun D, Cai P, Tian Y, Yuan L, Tu W, Chen J, Wu A, Hu QN",,"Chinese Academy of Sciences of China, Natural Science Foundation of Tianjin, China, National Key Research and Development Program of China, National Key Research and Development Program of China, CAS STS program, National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China, Scientific Research Conditions and Technical Support System Program",1.0,China +32123502,FrogID,0.994272232,FrogID,0.994272232,,0,1,http://www.frogid.net.au,301,,"(-33.8678,151.2073)",http://web.archive.org/web/20221106023532/https://www.frogid.net.au/,2020-02-17,"Australian Museum Research Institute, Australian Museum, 1 William Street, Sydney, New South Wales 2010, Australia Australian Museum Research Institute Sydney Australia.","Rowley JJL, Callaghan CT",,Australian Museum,1.0,"Australia, Australia" +32219413,FerrDb,0.984397888,FerrDb,0.984397888,,0,1,http://www.zhounan.org/ferrdb,301,,"(45.8399,-119.7006)",http://web.archive.org/web/20221022001623/http://www.zhounan.org/ferrdb/,2020-01-01,"Affiliated Brain Hospital of Guangzhou Medical University, 36 Mingxin Rd, Guangzhou, 510370, China.","Zhou N, Bao J",,"Guangzhou Municipal Key Discipline in Medicine, Key Laboratory for Innovation Platform Plan, Science and Technology Program of Guangzhou, China, Guangzhou Municipal Psychiatric Disease Clinical Transformation Laboratory",122.0,China +32459338,EpiRegio,0.996769607,EpiRegio,0.996769607,,0,1,http://epiregio.de,301,,"(50.0837,8.6440)",http://web.archive.org/web/20220712221234/https://epiregio.de/,2020-07-01,"Institute for Cardiovascular Regeneration, Goethe University Hospital, 60590 Frankfurt am Main, Germany.","Baumgarten N, Hecker D, Karunanithi S, Schmidt F, List M, Schulz MH",,"Deutsches Zentrum für Herz-Kreislaufforschung, Deutsche Forschungsgemeinschaft",9.0,Germany +32591816,ExoBCD,0.998073876,ExoBCD,0.998073876,,0,1,http://exobcd.liumwei.org,301,,"(1.3557,103.8237)",http://web.archive.org/web/20211006210653/https://exobcd.liumwei.org/,2021-05-01,"Key Laboratory of Clinical Laboratory Diagnostics, College of Laboratory Medicine, Chongqing Medical University, Chongqing, China.","Wang X, Chai Z, Pan G, Hao Y, Li B, Ye T, Li Y, Long F, Xia L, Liu M",,"Natural Science Foundation of Chongqing of China, Science and Technology Innovation Commission of Shenzhen, Natural Science Foundation of Chongqing of China, Science Innovation Program of College of Laboratory Medicine; Chongqing Medical University, Science and Technology Research Program of Chongqing Municipal Education Commission",1.0,China +32681912,ExoceRNA Atlas,0.949007857,ExoceRNA Atlas,0.949007857,,0,1,http://www.exocerna-atlas.com/exoceRNA,"HTTPConnectionPool(host='www.exocerna-atlas.com', port=80): Max retries exceeded with url: /exoceRNA (Caused by ConnectTimeoutError(, 'Connection to www.exocerna-atlas.com timed out. (connect timeout=5)'))",,,no_wayback,2020-07-15,"College of Chemistry, Sichuan University, Chengdu, Sichuan, China.","Xu L, Zhang L, Wang T, Wu Y, Pu X, Li M, Guo Y",,"National Natural Science Foundation of China, National Natural Science Foundation of China",4.0,China +32726198,EnteroBase,0.997007787,EnteroBase,0.997007787,,0,1,http://enterobase.warwick.ac.uk,301,,"(52.4066,-1.5122)",http://web.archive.org/web/20221028181744/https://enterobase.warwick.ac.uk/,2020-07-29,"Leibniz Institute DSMZ, Braunschweig, Germany.","Frentrup M, Zhou Z, Steglich M, Meier-Kolthoff JP, Göker M, Riedel T, Bunk B, Spröer C, Overmann J, Blaschitz M, Indra A, von Müller L, Kohl TA, Niemann S, Seyboldt C, Klawonn F, Kumar N, Lawley TD, García-Fernández S, Cantón R, Del Campo R, Zimmermann O, Groß U, Achtman M, Nübel U",,"Wellcome Trust, Wellcome Trust, European Union Horizon 2020, Medical Research Council, Deutsches Zentrum für Infektionsforschung, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Niedersächsische Ministerium für Wissenschaft und Kultur",10.0,Germany +32862462,ExED,0.984499017,ExED,0.984499017,Expansin Engineering Database,0.970954204,1,http://exed.biocatnet.de,301,,"(48.7823,9.1770)",http://web.archive.org/web/20220122170927/https://exed.biocatnet.de/,2020-09-09,"Institute of Biochemistry and Technical Biochemistry, University of Stuttgart, Stuttgart, Germany.","Lohoff C, Buchholz PCF, Le Roes-Hill M, Pleiss J",,"Bundesministerium für Bildung und Forschung, Bundesministerium für Bildung und Forschung",2.0,Germany +33002112,EnzyMine,0.996929705,EnzyMine,0.996929705,,0,1,http://www.rxnfinder.org/enzymine,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220218132529/http://www.rxnfinder.org/enzymine/,2020-10-01,"CAS Key Laboratory of Computational Biology, CAS-MPG Partner Institute for Computational Biology, Shanghai Institute of Nutrition and Health, University of Chinese Academy of Sciences, Chinese Academy of Sciences, Shanghai 200333, P. R. China.","Sun D, Cheng X, Tian Y, Ding S, Zhang D, Cai P, Hu QN",,"National Natural Science Foundation of China, the Natural Science Foundation of Tianjin, CAS STS programme, Scientific Research Conditions and Technical Support System Programme, International Partnership Programme of Chinese Academy of Sciences of China, National Key Research and Development Programme of China",0.0,China +33119759,gcType,0.995411217,gcType,0.995411217,Global Catalogue of Type Strain,0.834868125,1,http://gctype.wdcm.org,301,,"(39.9075,116.3972)",http://web.archive.org/web/20221019185529/https://gctype.wdcm.org/,2021-01-01,"Microbial Resource and Big Data Center, Institute of Microbiology, Chinese Academy of Sciences, Beijing 100101, China.","Shi W, Sun Q, Fan G, Hideaki S, Moriya O, Itoh T, Zhou Y, Cai M, Kim SG, Lee JS, Sedlacek I, Arahal DR, Lucena T, Kawasaki H, Evtushenko L, Weir BS, Alexander S, Dénes D, Tanasupawat S, Eurwilaichitr L, Ingsriswang S, Gomez-Gil B, Hazbón MH, Riojas MA, Suwannachart C, Yao S, Vandamme P, Peng F, Chen Z, Liu D, Sun X, Zhang X, Zhou Y, Meng Z, Wu L, Ma J",,"National Key Research Program of China, National Key Research Program of China, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, National Science Foundation for Young Scientists of China, National Key Research Program of China, European Social Fund, Chinese Academy of Sciences, National Science Foundation for Young Scientists of China",11.0,China +33166383,FireProtDB,0.994380713,FireProtDB,0.994380713,,0,1,http://loschmidt.chemi.muni.cz/fireprotdb,301,,"(49.1952,16.6080)",http://web.archive.org/web/20220303211441/https://loschmidt.chemi.muni.cz/fireprotdb/,2021-01-01,"Loschmidt Laboratories, Department of Experimental Biology and RECETOX, Masaryk University, Brno, Czech Republic.","Stourac J, Dubrava J, Musil M, Horackova J, Damborsky J, Mazurenko S, Bednar D",,"Czech Science Foundation, The Ministry of Education, Youth and Sports, Horizon 2020, Brno University of Technology, The Ministry of Education, Youth and Sports, The Ministry of Education, Youth and Sports, The Ministry of Education, Youth and Sports, Operational Programme Research, Development and Education",5.0, +33203359,FishDB,0.997222185,FishDB,0.997222185,,0,1,http://fishdb.ihb.ac.cn,"HTTPConnectionPool(host='fishdb.ihb.ac.cn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to fishdb.ihb.ac.cn timed out. (connect timeout=5)'))",,,no_wayback,2020-11-17,"State Key Laboratory of Freshwater Ecology and Biotechnology, Institute of Hydrobiology, Chinese Academy of Sciences, Wuhan, 430072, China.","Yang L, Xu Z, Zeng H, Sun N, Wu B, Wang C, Bo J, Li L, Dong Y, He S",,National Natural Science Foundation of China,0.0,China +33497436,FifBase,0.995629787,FifBase,0.995629787,,0,1,http://www.nwsuaflmz.com/FifBase,406,,,no_wayback,2021-09-01,"College of Life Sciences, Northwest A&F University, Yangling, Shaanxi, China.","Li H, Hou J, Chen Z, Zeng J, Ni Y, Li Y, Xiao X, Zhou Y, Zhang N, Long D, Liu H, Yang L, Bai X, Li Q, Li T, Che D, Li L, Wang X, Zhang P, Liao M",,"National Natural Science Foundation of China, National Natural Science Foundation of China, China National Basic Research Program, Program of Shaanxi Province Science and Technology Innovation Team, National Natural Science Foundation of China, Mathematical Tianyuan Fund",0.0,China +33511767,FAWMine,0.997773767,FAWMine,0.997773767,Fall Armyworm Genome Database,0.988626023,1,http://159.226.67.243:8080/fawmine,"HTTPConnectionPool(host='159.226.67.243', port=8080): Max retries exceeded with url: /fawmine (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,no_wayback,2021-01-29,"Beijing Institutes of Life Science, Chinese Academy of Sciences, Beijing, 100101, China.","Yang P, Wang D, Guo W, Kang L",,"National Natural Science Foundation of China, The State Key Laboratory of Integrated Management of Pest Insects and Rodents, National Natural Science Foundation of China",0.0,China +33511845,FMODB,0.992171586,FMODB,0.992171586,FMO Database,0.612398446,1,http://drugdesign.riken.jp/FMODB,301,,"(35.4333,139.6500)",http://web.archive.org/web/20220302235933/https://drugdesign.riken.jp/FMODB/,2021-01-29,"RIKEN Center for Biosystems Dynamics Research, 1-7-22 Suehiro-cho Tsurumi-ku, Yokohama, Kanagawa 230-0045, Japan.","Takaya D, Watanabe C, Nagase S, Kamisaka K, Okiyama Y, Moriwaki H, Yuki H, Sato T, Kurita N, Yagi Y, Takagi T, Kawashita N, Takaba K, Ozawa T, Takimoto-Kamimura M, Tanaka S, Fukuzawa K, Honma T",,"Precursory Research for Embryonic Science and Technology, Japan Agency for Medical Research and Development, Japan Society for the Promotion of Science",6.0,Japan +33599246,Gemma,0.981963456,Gemma,0.981963456,,0,1,http://gemma.msl.ubc.ca/home.html,302,,"(49.2497,-123.1693)",http://web.archive.org/web/20220519000245/https://gemma.msl.ubc.ca/home.html,2021-02-01,"Genome Science and Technology Graduate Program, University of British Columbia, Vancouver, BC V6T1Z4, Canada.","Lim N, Tesar S, Belmadani M, Poirier-Morency G, Mancarci BO, Sicherman J, Jacobson M, Leong J, Tan P, Pavlidis P",,"Natural Sciences and Engineering Research Council of Canada, University of British Columbia Four–Year Doctoral Fellowship, National Institute of Mental Health",5.0,Canada +33897975,FGDB,0.979747037,FGDB,0.979747037,FSH Glycans DataBase,0.933014452,1,http://fgdb.unmc.edu,"HTTPConnectionPool(host='fgdb.unmc.edu', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to fgdb.unmc.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210725132201/https://fgdb.unmc.edu/,2021-03-22,"Department of Genetics, Cell Biology and Anatomy, University of Nebraska Medical Center, Omaha, NE 68198, United States.","Shakyawar SK, Pandey S, Harvey DJ, Bousfield G, Guda C",,NIH,0.0,United States +33898816,FermFooDb,0.998155117,FermFooDb,0.998155117,,0,1,http://webs.iiitd.edu.in/raghava/fermfoodb,301,,"(28.6453,77.2128)",http://web.archive.org/web/20220617134244/https://webs.iiitd.edu.in/raghava/fermfoodb/,2021-04-08,"Centre for Environmental Sciences and Resilient Agriculture, ICAR-IARI, New Delhi 110012, India.","Chaudhary A, Bhalla S, Patiyal S, Raghava GPS, Sahni G",,,4.0,India +33981200,Gene4PD,0.994049946,Gene4PD,0.994049946,,0,1,http://genemed.tech/gene4pd,301,,"(22.2783,114.1747)",http://web.archive.org/web/20220617172937/http://www.genemed.tech/gene4pd/,2021-04-26,"National Clinical Research Center for Geriatric Disorders, Department of Geriatrics, Xiangya Hospital, Central South University, Changsha, China.","Li B, Zhao G, Zhou Q, Xie Y, Wang Z, Fang Z, Lu B, Qin L, Zhao Y, Zhang R, Jiang L, Pan H, He Y, Wang X, Luo T, Zhang Y, Wang Y, Chen Q, Liu Z, Guo J, Tang B, Li J",,,2.0,China +33995920,ExVe,0.89774555,ExVe,0.89774555,,0,1,http://exve.icc.fiocruz.br,200,,"(-22.9064,-43.1822)",http://web.archive.org/web/20221020041602/http://exve.icc.fiocruz.br/,2021-04-17,"Instituto Carlos Chagas, FIOCRUZ, Rua Prof. Algacyr Munhoz Mader, 3775, CEP 81350-010, Curitiba/PR, Brazil.","Parreira VDSC, Santos LGC, Rodrigues ML, Passetti F",,"Conselho Nacional de Desenvolvimento Científico e Tecnológico, Coordenação de Aperfeiçoamento de Pessoal de Nível Superior, FIOCRUZ",0.0,Brazil +34010390,Echinobase,0.997653604,Echinobase,0.997653604,,0,1,http://echinobase.org,302,,"(51.0501,-114.0853)",http://web.archive.org/web/20220215020724/http://www.echinobase.org/,2021-05-01,"Department of Biological Sciences, Carnegie Mellon University, 5000 Forbes Avenue, Pittsburgh, PA 15213, USA.","Foley S, Ku C, Arshinoff B, Lotay V, Karimi K, Vize PD, Hinman V",,"National Institute of Health, National Science Foundation, National Science Foundation, Binational Science Foundation",3.0,United States +34048547,emiRIT,0.994740963,emiRIT,0.994740963,extracting miRNA Information from Text,0.961960316,1,http://research.bioinformatics.udel.edu/emirit,301,,"(39.6837,-75.7497)",http://web.archive.org/web/20220802140209/https://research.bioinformatics.udel.edu/emirit/,2021-05-01,"Department of Computer and Information Sciences, University of Delaware, 101 Smith Hall, 18 Amstel Ave, Newark, DE 19716, USA.","Roychowdhury D, Gupta S, Qin X, Arighi CN, Vijay-Shanker K",,,3.0,United States +34085038,EyeDiseases,0.998263299,EyeDiseases,0.998263299,,0,1,http://eyediseases.bio-data.cn,301,,"(22.2783,114.1747)",http://web.archive.org/web/20220501182452/https://eyediseases.bio-data.cn/,2021-06-01,"School of Ophthalmology & Optometry and Eye Hospital, Wenzhou Medical University, Wenzhou 325027, China.","Yuan J, Chen F, Fan D, Jiang Q, Xue Z, Zhang J, Yu X, Li K, Qu J, Su J",,"Zhejiang Provincial Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",0.0,China +34158935,GEMI,0.990873575,GEMI,0.990873575,Global Earth Mineral Inventory,0.72698319,1,http://dx.deepcarbon.net/11121/6200-6954-6634-8243-CC,303,,"(42.6526,-73.7562)",no_wayback,2020-11-11,"Tetherless World Constellation, Rensselaer Polytechnic Institute, Troy, NY, USA.","Prabhu A, Morrison SM, Eleish A, Zhong H, Huang F, Golden JJ, Perry SN, Hummer DR, Ralph J, Runyon SE, Fontaine K, Krivovichev S, Downs RT, Hazen RM, Fox P",,"Russian Science Foundation, Alfred P. Sloan Foundation, Intramural NASA, John Templeton Foundation, W. M. Keck Foundation",0.0,United States +34220930,FAANG,0.989271164,FAANG,0.989271164,Functional Annotation of,0.782491729,1,http://data.faang.org,200,,"(51.5085,-0.1257)",http://web.archive.org/web/20221005123920/https://data.faang.org/,2021-06-17,"European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, United Kingdom.","Harrison PW, Sokolov A, Nayak A, Fan J, Zerbino D, Cochrane G, Flicek P",,"Horizon 2020, Biotechnology and Biological Sciences Research Council, Horizon 2020, Horizon 2020, Biotechnology and Biological Sciences Research Council",0.0,United Kingdom +34368755,fIMDb,0.865089297,fIMDb,0.865089297,ace Image Meta-Database,0.734685099,1,http://cliffordworkman.com/resources,301,,"(37.7509,-122.4153)",http://web.archive.org/web/20220520005331/https://cliffordworkman.com/resources/,2021-07-24,"Department of Neurology, University of Pennsylvania, Philadelphia, PA.","Workman CI, Chatterjee A",,,0.0, +34485385,Fuzzle,0.910124898,Fuzzle,0.910124898,,0,1,http://fuzzle.uni-bayreuth.de/2.0,301,,"(49.9478,11.5789)",no_wayback,2021-08-18,"Department of Biochemistry, University of Bayreuth, Bayreuth, Germany.","Ferruz N, Michel F, Lobos F, Schmidt S, Höcker B",,"European Research Council, European Research Council",2.0,Germany +34493866,eQTL,0.880491197,eQTL,0.880491197,,0,1,http://www.ebi.ac.uk/eqtl,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20220414021533/https://www.ebi.ac.uk/eqtl/,2021-09-06,"Institute of Computer Science, University of Tartu, Tartu, Estonia.","Kerimov N, Hayhurst JD, Peikova K, Manning JR, Walter P, Kolberg L, Samoviča M, Sakthivel MP, Kuzmin I, Trevanion SJ, Burdett T, Jupp S, Parkinson H, Papatheodorou I, Yates AD, Zerbino DR, Alasoo K",,"Wellcome Trust, Eesti Teadusagentuur, European Molecular Biology Laboratory, EC | European Regional Development Fund, Wellcome Trust, European Bioinformatics Institute, EC | Horizon 2020 Framework Programme, Open Targets, Eesti Teadusagentuur, Eesti Teadusagentuur, Eesti Teadusagentuur",39.0,Estonia +34538772,GenOrigin,0.986215472,GenOrigin,0.986215472,,0,1,http://genorigin.chenzxlab.cn,200,,"(31.2222,121.4581)",http://web.archive.org/web/20220527122650/http://genorigin.chenzxlab.cn/,2021-06-14,"Hubei Hongshan Laboratory, College of Biomedicine and Health, Huazhong Agricultural University, Wuhan 430070, China; Hubei Key Laboratory of Agricultural Bioinformatics, College of Life Science and Technology, Huazhong Agricultural University, Wuhan 430070, China.","Tong YB, Shi MW, Qian SH, Chen YJ, Luo ZH, Tu YX, Xiong YL, Geng YJ, Chen C, Chen ZX",,,1.0,"China, China" +34562055,FinBOL,0.991614699,FinBOL,0.991614699,,0,1,http://laji.fi/en/theme/protax,302,,"(60.2052,24.6522)",no_wayback,2021-11-03,"Department of Ecology, Swedish University of Agricultural Sciences, Uppsala, Sweden.","Roslin T, Somervuo P, Pentinsaari M, Hebert PDN, Agda J, Ahlroth P, Anttonen P, Aspi J, Blagoev G, Blanco S, Chan D, Clayhills T, deWaard J, deWaard S, Elliot T, Elo R, Haapala S, Helve E, Ilmonen J, Hirvonen P, Ho C, Itämies J, Ivanov V, Jakovlev J, Juslén A, Jussila R, Kahanpää J, Kaila L, Jari-PekkaKaitila, Kakko A, Kakko I, Karhu A, Karjalainen S, Kjaerandsen J, Koskinen J, Laasonen EM, Laasonen L, Laine E, Lampila P, Levesque-Beaudin V, Lu L, Lähteenaro M, Majuri P, Malmberg S, Manjunath R, Martikainen P, Mattila J, McKeown J, Metsälä P, Miklasevskaja M, Miller M, Miskie R, Muinonen A, Veli-MattiMukkala, Naik S, Nikolova N, Nupponen K, Ovaskainen O, Österblad I, Paasivirta L, Pajunen T, Parkko P, Paukkunen J, Penttinen R, Perez K, Pohjoismäki J, Prosser S, Raekunnas M, Rahulan M, Rannisto M, Ratnasingham S, Raukko P, Rinne A, Rintala T, Miranda Romo S, Salmela J, Salokannel J, Savolainen R, Schulman L, Sihvonen P, Soliman D, Sones J, Steinke C, StÃ¥hls G, Tabell J, Tiusanen M, Várkonyi G, Vesterinen EJ, Viitanen E, Vikberg V, Viitasaari M, Vilen J, Warne C, Wei C, Winqvist K, Zakharov E, Mutanen M",,,1.0,Sweden +34583740,FPADMET,0.995484829,FPADMET,0.995484829,,0,1,http://gitlab.com/vishsoft/fpadmet,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20220511050720/https://gitlab.com/vishsoft/fpadmet,2021-09-28,"Norwegian University of Science and Technology, Realfagbygget, Gløshaugen, Høgskoleringen, 7491, Trondheim, Norway. vishwesh.venkatraman@ntnu.no.",Venkatraman V,,Norges ForskningsrÃ¥d,0.0,Norway +34626475,FEDA,0.989463806,FEDA,0.989463806,Food Enzyme Database,0.845898256,1,http://feda.sciensano.be,302,,"(50.8505,4.3488)",no_wayback,2021-10-01,"Transversal Activities in Applied Genomics (TAG), Sciensano, Brussels 1050, Belgium.","Deckers M, Van Braeckel J, Vanneste K, Deforce D, Fraiture MA, Roosens NHC",,Health Food Chain Safety and Environment,0.0,Belgium +34733322,Gene4HL,0.989707867,Gene4HL,0.989707867,,0,1,http://www.genemed.tech/gene4hl,301,,"(22.2783,114.1747)",no_wayback,2021-10-18,"College of Otolaryngology Head and Neck Surgery, Chinese PLA General Hospital, Chinese PLA Medical School, Beijing, China.","Huang S, Zhao G, Wu J, Li K, Wang Q, Fu Y, Zhang H, Bi Q, Li X, Wang W, Guo C, Zhang D, Wu L, Li X, Xu H, Han M, Wang X, Lei C, Qiu X, Li Y, Li J, Dai P, Yuan Y",,,0.0,China +34741074,Fibromine,0.996297419,Fibromine,0.996297419,,0,1,http://www.fibromine.com/Fibromine,301,,"(37.9757,23.7691)",http://web.archive.org/web/20220526042155/http://www.fibromine.com/Fibromine/,2021-11-05,"Institute for Bioinnovation, Biomedical Sciences Research Center ″Alexander Fleming″, 16672, Athens, Greece.","Fanidis D, Moulos P, Aidinis V",,General Secretariat for Research and Technology,1.0,Greece +34793786,FCCP,0.977487087,FCCP,0.977487087,Fragrance Chemicals in Children's Products,0.952817567,1,http://cb.imsc.res.in/fccp,301,,"(13.0156,80.2467)",no_wayback,2021-11-15,"The Institute of Mathematical Sciences (IMSc), Chennai 600113, India; Homi Bhabha National Institute (HBNI), Mumbai 400094, India.","Ravichandran J, Karthikeyan BS, Jost J, Samal A",,"Max Planck Society, Science and Engineering Research Board",0.0,"India, India" +34954426,FertilityOnline,0.993602693,FertilityOnline,0.993602693,,0,1,http://mcg.ustc.edu.cn/bsc/spermgenes2.0/index.html,200,,"(31.8639,117.2808)",no_wayback,2021-12-23,"The First Affiliated Hospital of USTC, Hefei National Laboratory for Physical Sciences at the Microscale, The CAS Key Laboratory of Innate Immunity and Chronic Diseases, School of Life Sciences, CAS Center for Excellence in Molecular Cell Science, University of Science and Technology of China, Collaborative Innovation Center of Genetics and Development, Hefei 230027, China.","Gao J, Zhang H, Jiang X, Ali A, Zhao D, Bao J, Jiang L, Iqbal F, Shi Q, Zhang Y",,"National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities, National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities",0.0,"China, China" +34964845,fungiofpakistan,0.679958761,fungiofpakistan,0.679958761,,0,1,"http://www.fungiofpakistan.com, http://fungiofpakistan.com","200, 200",,"(33.4484,-112.0740), (33.4484,-112.0740)","http://web.archive.org/web/20220316162257/https://www.fungiofpakistan.com/, http://web.archive.org/web/20220316162257/https://www.fungiofpakistan.com/",2021-12-01,"State Key Laboratory of Mycology, Institute of Microbiology, Chinese Academy of Sciences, Chaoyang District, Beijing 100101, P.R. China.","Raza M, Cai L, Abbasi MW, Tariq M, Wijayawardene NN",,,0.0,China +35308974,Epilepsy-Connect,0.927172029,Epilepsy-Connect,0.927172029,,0,1,http://bmhinformatics.case.edu/Epilepsyconnect/login,301,,"(41.4995,-81.6954)",no_wayback,2021-01-01,"Department of Population and Quantitative Health Sciences, Case Western Reserve University School of Medicine, Cleveland, OH, USA.","Prantzalos K, Zhang J, Shafiabadi N, Fernandez-BacaVaca G, Sahoo SS",,,0.0,United States +35694152,EnhFFL,0.994996548,EnhFFL,0.994996548,,0,1,http://lcbb.swjtu.edu.cn/EnhFFL,308,,"(30.6667,104.0667)",http://web.archive.org/web/20221108081442/http://lcbb.swjtu.edu.cn/EnhFFL/,2021-04-14,"School of Life Sciences and Engineering, Southwest Jiaotong University, Chengdu 610031, China.","Kang R, Tan Z, Lang M, Jin L, Zhang Y, Zhang Y, Guo T, Guo Z",,"Sichuan Science and Technology Program, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities",0.0,China +21337704,GPDE,0.997722995,GPDE,0.997722995,Griss Proteomics Database Engine,0.943638495,1,http://gpde.sourceforge.net,301,,"(37.7621,-122.3971)",no_wayback,2011-01-27,"Department of Medicine I, Comprehensive Cancer Center, Medical University of Vienna, Vienna, Austria.","Griss J, Haudek-Prinz V, Gerner C",,"Institute of Cancer Research, Medical University of Vienna, Christian Doppler Research Association, Austria and the Austrian “Krebshilfe”, GPDE at the Medical University of Vienna",10.0,Austria +21410407,GRIPDB,0.932066238,GRIPDB,0.932066238,coupled Receptor Interaction Partners DataBase,0.720138676,1,http://grip.cbrc.jp/GDB/index.html,"HTTPConnectionPool(host='grip.cbrc.jp', port=80): Max retries exceeded with url: /GDB/index.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20130925210600/http://grip.cbrc.jp/GDB/index.html,2011-03-17,"Computational Biology Research Center (CBRC), Advanced Industrial Science and Technology (AIST), AIST Tokyo, Japan. w.nemoto@aist.go.jp","Nemoto W, Fukui K, Toh H",,,3.0,Japan +21435384,HOCTARdb,0.978390336,HOCTARdb,0.978390336,,0,1,http://hoctar.tigem.it,301,,"(40.8439,14.0952)",http://web.archive.org/web/20220406031510/https://hoctar.tigem.it/,2011-03-22,"Telethon Institute of Genetics and Medicine, Naples, Italy. gennarin@bcm.edu","Gennarino VA, Sardiello M, Mutarelli M, Dharmalingam G, Maselli V, Lago G, Banfi S",,"Telethon, Italian Telethon Foundation",33.0,Italy +21450710,hmChIP,0.998008847,hmChIP,0.998008847,,0,1,http://jilab.biostat.jhsph.edu/database/cgi-bin/hmChIP.pl,"HTTPConnectionPool(host='jilab.biostat.jhsph.edu', port=80): Max retries exceeded with url: /database/cgi-bin/hmChIP.pl (Caused by ConnectTimeoutError(, 'Connection to jilab.biostat.jhsph.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210926134804/http://jilab.biostat.jhsph.edu/database/cgi-bin/hmChIP.pl,2011-03-30,"Department of Biostatistics, The Johns Hopkins University Bloomberg School of Public Health, Baltimore, MD 21205, USA.","Chen L, Wu G, Ji H",,"NHGRI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS",32.0,United States +21591763,GlycoFish,0.797046423,GlycoFish,0.797046423,,0,1,http://betenbaugh.jhu.edu/GlycoFish,301,,"(39.2904,-76.6122)",no_wayback,2011-06-08,"Department of Chemical and Biomolecular Engineering, Johns Hopkins University, Baltimore, Maryland 21218, USA.","Baycin-Hizal D, Tian Y, Akan I, Jacobson E, Clark D, Wu A, Jampol R, Palter K, Betenbaugh M, Zhang H",,"NCI NIH HHS, NCI NIH HHS, NCI NIH HHS",15.0,United States +21609420,GiSAO.db,0.948274958,GiSAO.db,0.948274958,Genes,0.665788551,1,http://gisao.genome.tugraz.at,301,,"(47.0667,15.4500)",no_wayback,2011-05-24,"Division for Bioinformatics, Biocenter, Innsbruck Medical University, Schöpfstrasse, Austria.","Hofer E, Laschober GT, Hackl M, Thallinger GG, Lepperdinger G, Grillari J, Jansen-Dürr P, Trajanoski Z",,,1.0,Austria +21695066,GENT,0.995685935,GENT,0.995685935,Gene Expression database of Normal and Tumor tissues,0.976307766,1,"http://medicalgenome.kribb.re.kr/GENT/, http://genome.kobic.re.kr/GENT","HTTPConnectionPool(host='medicalgenome.kribb.re.kr', port=80): Max retries exceeded with url: /GENT/ (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known')), HTTPConnectionPool(host='genome.kobic.re.kr', port=80): Max retries exceeded with url: /GENT (Caused by ConnectTimeoutError(, 'Connection to genome.kobic.re.kr timed out. (connect timeout=5)'))",,", ","http://web.archive.org/web/20200218092630/http://medicalgenome.kribb.re.kr:80/GENT/, no_wayback",2011-05-09,"Department of Bio and Information Technology, Graduate School, Chungbuk National University, 410 Seongbong-ro, Heungdeok-gu, Cheongju, Chungbuk, 361-763.","Shin G, Kang TW, Yang S, Baek SJ, Jeong YS, Kim SY",,,120.0, +21760913,HelmCoP,0.995496631,HelmCoP,0.995496631,Helminth Control,0.556904441,1,http://www.nematode.net/helmcop.html,"HTTPConnectionPool(host='www.nematode.net', port=80): Max retries exceeded with url: /helmcop.html (Caused by ConnectTimeoutError(, 'Connection to www.nematode.net timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190914144040/http://nematode.net:80/HelmCoP.html,2011-07-08,"The Genome Institute, Washington University School of Medicine, St. Louis, Missouri, United States of America.","Abubucker S, Martin J, Taylor CM, Mitreva M",,"NIAID NIH HHS, NIAID NIH HHS",13.0,United States +21930248,HCVpro,0.997242883,HCVpro,0.997242883,hepatitis C virus protein interaction database,0.859147181,1,"http://apps.sanbi.ac.za/hcvpro/, http://cbrc.kaust.edu.sa/hcvpro","HTTPConnectionPool(host='apps.sanbi.ac.za', port=80): Max retries exceeded with url: /hcvpro/ (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known')), 302",,", (37.5331,-122.2486)","http://web.archive.org/web/20110209185158/http://apps.sanbi.ac.za:80/hcvpro/, http://web.archive.org/web/20220805210210/https://www.cbrc.kaust.edu.sa/hcvpro/",2011-09-09,"South African National Bioinformatics Institute, University of the Western Cape, Private Bag X17, Modderdam Road, Bellville 7535, Cape Town, South Africa. samuel@sanbi.ac.za","Kwofie SK, Schaefer U, Sundararajan VS, Bajic VB, Christoffels A",,"DST/NRF Research Chair, National Research Foundation (South Africa), National Bioinformatics Network",41.0,South Africa +21982653,GenTAC,0.966817975,GenTAC,0.966817975,National Registry of Genetically Triggered Thoracic Aortic Aneurysms and Cardiovascular Conditions,0.941658658,1,http://gentac.rti.org,"HTTPConnectionPool(host='gentac.rti.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20130903025537/https://gentac.rti.org/,2011-10-01,"RTI International, Rockville, MD, USA. byk@rti.org","Kroner BL, Tolunay HE, Basson CT, Pyeritz RE, Holmes KW, Maslen CL, Milewicz DM, LeMaire SA, Hendershot T, Desvigne-Nickens P, Devereux RB, Dietz HC, Song HK, Ringer D, Mitchell M, Weinsaft JW, Ravekes W, Menashe V, Eagle KA",,NHLBI NIH HHS,24.0,United States +"22009673, 30407568",ICEberg,0.640117407,ICEberg,0.640117407,,0,2,http://db-mml.sjtu.edu.cn/ICEberg,302,,"(31.2222,121.4581)",http://web.archive.org/web/20210512021619/https://db-mml.sjtu.edu.cn/ICEberg/,2019-01-01,"State Key Laboratory of Microbial Metabolism and School of Life Sciences & Biotechnology, Shanghai Jiaotong University, Shanghai 200030, China., State Key Laboratory of Microbial Metabolism, Joint International Laboratory on Metabolic & Developmental Sciences, School of Life Sciences & Biotechnology, Shanghai Jiao Tong University, Shanghai 200030, China.","Bi D, Xu Z, Harrison EM, Tai C, Wei Y, He X, Jia S, Deng Z, Rajakumar K, Ou HY, Liu M, Li X, Xie Y, Bi D, Sun J, Li J, Tai C, Deng Z, Ou HY",", ",", National Natural Science Foundation of China, Shanghai Jiao Tong University, National Natural Science Foundation of China, National Key R&D Program of China, National Key R&D Program of China",203.0,"China, China" +22016855,GyDB,0.945335805,GyDB,0.945335805,Gypsy Database concerning Mobile Genetic,0.892279019,1,http://gydb.org/index.php/Mobilomics,"HTTPConnectionPool(host='gydb.org', port=80): Max retries exceeded with url: /index.php/Mobilomics (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20210117132759/https://gydb.org/index.php/Mobilomics,2011-07-01,"Biotechvana; Parc Cientific de la Universitat de València; Valencia, Spain.","Bernet GP, Muñoz-Pomer A, Domínguez-Escribá L, Covelli L, Bernad L, Ramasamy S, Futami R, Sempere JM, Moya A, Llorens C",,,3.0,Spain +22022467,HIVsirDB,0.950383782,HIVsirDB,0.950383782,,0,1,http://crdd.osdd.net/raghava/hivsir,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/hivsir (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220121044747/http://crdd.osdd.net/raghava/hivsir/,2011-10-11,"Bioinformatics Centre, Institute of Microbial Technology (CSIR), Chandigarh, India.","Tyagi A, Ahmed F, Thakur N, Sharma A, Raghava GP, Kumar M",,,21.0,India +22024348,HNOCDB,0.997382939,HNOCDB,0.997382939,,0,1,http://gyanxet.com/hno.html,"HTTPConnectionPool(host='gyanxet.com', port=80): Max retries exceeded with url: /hno.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160510005053/http://gyanxet.com:80/hno.html,2011-10-22,"Indian Association for the Cultivation of Science, Calcutta 700 032, India.","Mitra S, Das S, Das S, Ghosal S, Chakrabarti J",,,11.0,India +22064851,HaploReg,0.996389806,HaploReg,0.996389806,,0,1,http://compbio.mit.edu/HaploReg,301,,"(42.3751,-71.1056)",no_wayback,2011-11-07,"Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology and The Broad Institute of MIT and Harvard, Cambridge, MA 02139, USA. lukeward@mit.edu","Ward LD, Kellis M",,"NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",1350.0,United States +22064861,hemorrhagic fever virus database,0.78576076,HFV,0.767522514,hemorrhagic fever virus database,0.78576076,1,http://hfv.lanl.gov,302,,"(35.8881,-106.3070)",no_wayback,2011-11-07,"Theoretical Biology and Biophysics, T-10, Los Alamos National Laboratory, Los Alamos, NM 87545, USA. kuiken@lanl.gov","Kuiken C, Thurmond J, Dimitrijevic M, Yoon H",,,13.0,United States +22080558,HotRegion,0.989635229,HotRegion,0.989635229,,0,1,http://prism.ccbb.ku.edu.tr/hotregion,"HTTPConnectionPool(host='prism.ccbb.ku.edu.tr', port=80): Max retries exceeded with url: /hotregion (Caused by ConnectTimeoutError(, 'Connection to prism.ccbb.ku.edu.tr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210612153006/http://prism.ccbb.ku.edu.tr/hotregion/,2011-11-12,"Center for Computational Biology and Bioinformatics and College of Engineering, Koc University, Rumelifeneri Yolu, 34450 Sariyer Istanbul, Turkey.","Cukuroglu E, Gursoy A, Keskin O",,,44.0,Turkey +22102591,IBIS,0.99052155,IBIS,0.99052155,Inferred Biomolecular Interaction Server,0.952923278,1,http://www.ncbi.nlm.nih.gov/Structure/ibis/ibis.cgi,200,,"(38.9896,-77.1538)",http://web.archive.org/web/20220512100844/https://www.ncbi.nlm.nih.gov/Structure/ibis/ibis.cgi,2011-11-18,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Building 38A, Bethesda, MD 20894, USA.","Shoemaker BA, Zhang D, Tyagi M, Thangudu RR, Fong JH, Marchler-Bauer A, Bryant SH, Madej T, Panchenko AR",,Intramural NIH HHS,47.0,United States +"22123736, 25378336",GOA,0.988438288,GOA,0.988438288,Gene Ontology Annotation,0.705664259,2,http://www.ebi.ac.uk/GOA,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20221013172026/https://www.ebi.ac.uk/GOA/,2014-11-06,"European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. edimmer@ebi.ac.uk, European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK huntley@ebi.ac.uk.","Dimmer EC, Huntley RP, Alam-Faruque Y, Sawford T, O'Donovan C, Martin MJ, Bely B, Browne P, Mun Chan W, Eberhardt R, Gardner M, Laiho K, Legge D, Magrane M, Pichler K, Poggioli D, Sehra H, Auchincloss A, Axelsen K, Blatter MC, Boutet E, Braconi-Quintaje S, Breuza L, Bridge A, Coudert E, Estreicher A, Famiglietti L, Ferro-Rojas S, Feuermann M, Gos A, Gruaz-Gumowski N, Hinz U, Hulo C, James J, Jimenez S, Jungo F, Keller G, Lemercier P, Lieberherr D, Masson P, Moinat M, Pedruzzi I, Poux S, Rivoire C, Roechert B, Schneider M, Stutz A, Sundaram S, Tognolli M, Bougueleret L, Argoud-Puy G, Cusin I, Duek-Roggli P, Xenarios I, Apweiler R, Huntley RP, Sawford T, Mutowo-Meullenet P, Shypitsyna A, Bonilla C, Martin MJ, O'Donovan C",", ","British Heart Foundation, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, British Heart Foundation, NHGRI NIH HHS, Parkinson's UK, NHGRI NIH HHS",450.0, +22123737,hiPathDB,0.997191489,hiPathDB,0.997191489,,0,1,http://hiPathDB.kobic.re.kr,"HTTPConnectionPool(host='hipathdb.kobic.re.kr', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to hipathdb.kobic.re.kr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20180621150858/http://hipathdb.kobic.re.kr:80/,2011-11-28,"Korean Bioinformation Center, KRIBB, Daejeon 305-806, Korea.","Yu N, Seo J, Rho K, Jang Y, Park J, Kim WK, Lee S",,,21.0, +22134927,hUbiquitome,0.99319154,hUbiquitome,0.99319154,Really Interesting New Gene,0.800637662,1,http://202.38.126.151/hmdd/hubi,"HTTPConnectionPool(host='202.38.126.151', port=80): Max retries exceeded with url: /hmdd/hubi (Caused by ConnectTimeoutError(, 'Connection to 202.38.126.151 timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140722102202/http://202.38.126.151/hmdd/hubi/,2011-11-30,"Department of Medical Informatics, Peking University Health Science Center, Beijing 100191, China.","Du Y, Xu N, Lu M, Li T",,,14.0,China +"22139925, 26615194",GWASdb,0.991770446,GWASdb,0.991770446,,0,2,http://jjwanglab.org/gwasdb,"HTTPConnectionPool(host='jjwanglab.org', port=80): Max retries exceeded with url: /gwasdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20201105173855/http://jjwanglab.org/gwasdb,2015-11-28,"Department of Biochemistry, The University of Hong Kong, Hong Kong SAR, China., Centre for Genomic Sciences, LKS Faculty of Medicine, The University of Hong Kong, Hong Kong SAR, China School of Biomedical Sciences, LKS Faculty of Medicine, The University of Hong Kong, Hong Kong SAR, China.","Li MJ, Wang P, Liu X, Lim EL, Wang Z, Yeager M, Wong MP, Sham PC, Chanock SJ, Wang J, Li MJ, Liu Z, Wang P, Wong MP, Nelson MR, Kocher JP, Yeager M, Sham PC, Chanock SJ, Xia Z, Wang J",", ","Intramural NIH HHS, ",204.0,"China, China, China, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong" +22140100,HGPD,0.996275783,HGPD,0.996275783,Human Gene and Protein Database,0.967167735,1,http://www.HGPD.jp,"HTTPConnectionPool(host='www.hgpd.jp', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160523104334/http://www.hgpd.jp:80/,2011-12-02,"National Institute of Advanced Industrial Science and Technology, Japan Biological Informatics Consortium, Aomi, Koto-ku, Tokyo 135-0064, Japan.","Maruyama Y, Kawamura Y, Nishikawa T, Isogai T, Nomura N, Goshima N",,,9.0,"Japan, Japan" +22140112,HIstome,0.995750725,HIstome,0.995750725,,0,1,"http://www.iiserpune.ac.in/Ã, http://www.actrec.gov.in/histome","301, 301",,"(18.5586,73.7794), (19.0728,72.8826)","no_wayback, http://web.archive.org/web/20200118151738/http://www.actrec.gov.in:80/histome/",2011-12-02,"Cancer Research Institute, Advanced Centre for Treatment, Research and Education in Cancer, Kharghar, Navi Mumbai 410210, India.","Khare SP, Habib F, Sharma R, Gadewal N, Gupta S, Galande S",,,61.0,India +22165817,HOMER,0.992719769,HOMER,0.992719769,Human Organ-specific Molecular,0.831779265,1,http://bio.informatics.iupui.edu/homer,302,,"(39.2014,-85.9214)",no_wayback,2011-10-18,"School of Informatics, Indiana University, Indianapolis, IN 46202, USA.","Zhang F, Chen JY",,NCI NIH HHS,10.0,United States +22238269,GROMACS,0.992195368,GROMACS,0.992195368,,0,1,http://virtualchemistry.org,"HTTPConnectionPool(host='virtualchemistry.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20220521122928/http://virtualchemistry.org/,2012-01-11,"Department of Cell and Molecular Biology, Uppsala University, Husargatan 3, Box 596, SE-75124 Uppsala, Sweden. spoel@xray.bmc.uu.se","van der Spoel D, van Maaren PJ, Caleman C",,,46.0,Sweden +"22610854, 31114900",IEDB-AR,0.986699712,IEDB-AR,0.986699712,Immune Epitope Database Analysis Resource,0.965692446,2,http://tools.iedb.org,302,,"(32.8919,-117.2035)",no_wayback,2019-07-01,"La Jolla Institute for Allergy and Immunology, 9420 Athena Circle, La Jolla, CA 92037, USA., Division of Vaccine Discovery, La Jolla Institute for Allergy and Immunology, La Jolla, CA 92037, USA.","Kim Y, Ponomarenko J, Zhu Z, Tamang D, Wang P, Greenbaum J, Lundegaard C, Sette A, Lund O, Bourne PE, Nielsen M, Peters B, Dhanda SK, Mahajan S, Paul S, Yan Z, Kim H, Jespersen MC, Jurtz V, Andreatta M, Greenbaum JA, Marcatili P, Sette A, Nielsen M, Peters B",", ","PHS HHS, PHS HHS, NIAID NIH HHS, NIAID NIH HHS, National Institutes of Health, National Institutes of Health",291.0,"United States, United States" +22613085,HAltORF,0.995966196,HAltORF,0.995966196,Human alternative open reading frames,0.858935988,1,http://haltorf.roucoulab.com,301,,"(45.4001,-71.8991)",http://web.archive.org/web/20220129111303/http://haltorf.roucoulab.com/,2012-05-20,"Département de Biochimie, Faculté de Médecine et des Sciences de la Santé, Université de Sherbrooke, Québec, Canada.","Vanderperre B, Lucier JF, Roucou X",,Canadian Institutes of Health Research,19.0,Canada +22761927,HEMD,0.996305764,HEMD,0.996305764,human epigenetic enzyme and modulator database,0.944263808,1,http://mdl.shsmu.edu.cn/HEMD,302,,"(31.2222,121.4581)",http://web.archive.org/web/20220128170850/http://mdl.shsmu.edu.cn/HEMD/,2012-06-25,"Department of Pathophysiology and Key Laboratory of Cell Differentiation and Apoptosis of Chinese Ministry of Education, School of Medicine, Shanghai Jiao-Tong University, Shanghai, China.","Huang Z, Jiang H, Liu X, Chen Y, Wong J, Wang Q, Huang W, Shi T, Zhang J",,,9.0,China +22804825,HuPho,0.995498061,HuPho,0.995498061,human phosphatase portal,0.886762607,1,http://hupho.uniroma2.it,"HTTPConnectionPool(host='hupho.uniroma2.it', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20200722091216/http://hupho.uniroma2.it:80/,2012-08-24,"Department of Biology, University of Rome Tor Vergata, Rome, Italy. susanna.liberti@gmail.com","Liberti S, Sacco F, Calderone A, Perfetto L, Iannuccelli M, Panni S, Santonico E, Palma A, Nardozza AP, Castagnoli L, Cesareni G",,Telethon,28.0,Italy +22846459,HINT,0.995340347,HINT,0.995340347,,0,1,http://hint.yulab.org,200,,"(42.4406,-76.4966)",http://web.archive.org/web/20220121102935/http://hint.yulab.org/,2012-07-30,"Department of Biological Statistics and Computational Biology, Cornell University, Ithaca, NY 14853, USA. haiyuan.yu@cornell.edu.","Das J, Yu H",,"NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS",172.0,United States +22847935,GlycoCD,0.997937024,GlycoCD,0.997937024,,0,1,http://glycosciences.de/glycocd/index.php,200,,"(50.5873,8.6755)",http://web.archive.org/web/20220617100358/http://glycosciences.de/glycocd/index.php,2012-07-30,"D015, Translational Immunology, German Cancer Research Center, INF 280, D-69120 Heidelberg, Germany.","Kumar S, Lütteke T, Schwartz-Albiez R",,,6.0,Germany +22874333,HeTOP,0.993553936,HeTOP,0.993553936,The Health Terminology/Ontology Portal,0.898625046,1,http://hetop.eu,302,,"(49.4431,1.0993)",http://web.archive.org/web/20130610211923/http://hetop.eu/,2012-01-01,"Rouen University Hospital, Rouen Cedex, France. julien.grosjean@chu-rouen.fr","Grosjean J, Merabti T, Griffon N, Dahamna B, Darmoni SJ",,,4.0,France +22900683,HTRIdb,0.992245674,HTRIdb,0.992245674,Human Transcriptional Regulation Interactions database,0.976327971,1,http://www.lbbc.ibb.unesp.br/htri,302,,"(-22.8858,-48.4450)",http://web.archive.org/web/20220817125558/https://www.lbbc.ibb.unesp.br/htri/,2012-08-17,"Departamento de Física e Biofísica, Instituto de Biociências de Botucatu, Unesp - Univ Estadual Paulista, Distrito de Rubião Jr, s/n, Botucatu, São Paulo, 18618-970, Brazil. labovolenta@ibb.unesp.br","Bovolenta LA, Acencio ML, Lemke N",,,132.0,Brazil +22923302,HSPIR,0.99661684,HSPIR,0.99661684,Heat shock protein information resource,0.949629581,1,http://pdslab.biochem.iisc.ernet.in/hspir,301,,"(12.9719,77.5937)",no_wayback,2012-08-24,"Department of Biochemistry, Indian Institute of Science, Bangalore 560012, Karnataka, India.","R RK, N S N, S P A, Sinha D, Veedin Rajan VB, Esthaki VK, D'Silva P",,Wellcome Trust,23.0,India +"22948725, 24077912, 28349240",HGMD,0.993344128,HGMD,0.993344128,Human Gene Mutation Database,0.954743373,3,http://www.hgmd.org,308,,"(53.4809,-2.2374)",http://web.archive.org/web/20180610041034/http://www.hgmd.org,2017-03-27,"Cardiff University, Cardiff, United Kingdom., None, School of Medicine, Institute of Medical Genetics, Cardiff University, Heath Park, Cardiff, CF14 4XN, UK. stensonPD@cardiff.ac.uk.","Stenson PD, Ball EV, Mort M, Phillips AD, Shaw K, Cooper DN, Stenson PD, Mort M, Ball EV, Shaw K, Phillips A, Cooper DN, Stenson PD, Mort M, Ball EV, Evans K, Hayden M, Heywood S, Hussain M, Phillips AD, Cooper DN",", , ",", , ",1273.0,United Kingdom +23016940,HomeoDB2,0.997463286,HomeoDB2,0.997463286,Homeobox gene database,0.984303606,1,http://homeodb.zoo.ox.ac.uk,200,,"(51.7522,-1.2560)",http://web.archive.org/web/20220121165701/http://homeodb.zoo.ox.ac.uk/,2011-11-01,"Department of Zoology, University of Oxford, Oxford, UK. ying-fu.zhong@zoo.ox.ac.uk","Zhong YF, Holland PW",,,68.0, +23095257,IBDsite,0.995800972,IBDsite,0.995800972,,0,1,"http://www.itb.cnr.it/ibd, http://www.itb.cnr.it/galaxy","301, 301",,"(45.4643,9.1895), (45.4643,9.1895)","http://web.archive.org/web/20180208030052/http://www.itb.cnr.it:80/ibd/, no_wayback",2012-09-07,"Institute for Biomedical Technologies, National Research Council, Via Fratelli Cervi, 93, Segrate (Mi), Italy. ivan.merelli@itb.cnr.it","Merelli I, Viti F, Milanesi L",,,7.0,Italy +23095476,Hawaiian Freshwater Algal Database,0.969474773,HfwADB,0.964678764,Hawaiian Freshwater Algal Database,0.969474773,1,http://algae.manoa.hawaii.edu/hfwadb,"HTTPConnectionPool(host='algae.manoa.hawaii.edu', port=80): Max retries exceeded with url: /hfwadb (Caused by ConnectTimeoutError(, 'Connection to algae.manoa.hawaii.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20170513234420/http://algae.manoa.hawaii.edu:80/hfwadb/,2012-10-25,"Department of Botany, University of Hawaii at Manoa, 3190 Maile Way, Honolulu, Hawaii 96822, USA. asherwoo@hawaii.edu","Sherwood AR, Wang N, Carlile AL, Neumann JM, Wolfgruber TK, Presting GG",,,0.0,United States +23104379,GFDB,0.991825804,GFDB,0.991825804,glycan fragment database,0.877565131,1,http://www.glycanstructure.org,200,,"(40.6259,-75.3705)",no_wayback,2012-10-26,"Department of Molecular Biosciences and Center for Bioinformatics, The University of Kansas, 2030 Becker Drive, Lawrence, KS 66047, USA.","Jo S, Im W",,"NIGMS NIH HHS, NCRR NIH HHS",30.0,United States +23118488,HEXEvent,0.995822012,HEXEvent,0.995822012,,0,1,http://hexevent.mmg.uci.edu,200,,"(33.6425,-117.8417)",http://web.archive.org/web/20221007185018/http://hexevent.mmg.uci.edu/,2012-10-31,"Department of Microbiology and Molecular Genetics, University of California, Irvine, CA 92697-4025, USA.","Busch A, Hertel KJ",,"NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS",22.0,United States +23125365,HBVdb,0.996498883,HBVdb,0.996498883,,0,1,http://hbvdb.ibcp.fr,301,,"(45.7469,4.8444)",http://web.archive.org/web/20190831065853/https://hbvdb.ibcp.fr/,2012-11-03,"Unité Bases Moléculaires et Structurales des Systèmes Infectieux, Lyon, France.","Hayer J, Jadeau F, Deléage G, Kay A, Zoulim F, Combet C",,,83.0,France +23143109,HemaExplorer,0.986351252,HemaExplorer,0.986351252,,0,1,http://servers.binf.ku.dk/hemaexplorer,301,,"(55.6759,12.5655)",no_wayback,2012-11-09,"Bioinformatics Centre, Department of Biology, University of Copenhagen, Copenhagen, DK2200 Denmark.","Bagger FO, Rapin N, Theilgaard-Mönch K, Kaczkowski B, Thoren LA, Jendholm J, Winther O, Porse BT",,"Lundbeck Foundation, Novo Nordisk Fonden",50.0,Denmark +23193255,H2DB,0.996937255,H2DB,0.996937255,,0,1,http://tga.nig.ac.jp/h2db,301,,"(35.1167,138.9167)",http://web.archive.org/web/20221105022207/http://tga.nig.ac.jp/h2db/,2012-11-27,"Genome Informatics Laboratory, National Institute of Genetics, 1111 Yata, Mishima 411-8540, Japan.","Kaminuma E, Fujisawa T, Tanizawa Y, Sakamoto N, Kurata N, Shimizu T, Nakamura Y",,,1.0,Japan +"23193258, 27008011",GEO,0.986045718,GEO,0.986045718,,0,2,http://www.ncbi.nlm.nih.gov/geo,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221103131023/https://www.ncbi.nlm.nih.gov/geo/,2016-01-01,"National Center for Biotechnology Information, National Library of Medicine and Molecular Genetics Section, Genetics Branch, National Cancer Institute, National Institutes of Health, Bethesda, MD 20892, USA. barrett@ncbi.nlm.nih.gov, National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 45 Center Drive, MSC 6510, Building 45, Room AS13B, Bethesda, MD, 20892-6510, USA.","Barrett T, Wilhite SE, Ledoux P, Evangelista C, Kim IF, Tomashevsky M, Marshall KA, Phillippy KH, Sherman PM, Holko M, Yefanov A, Lee H, Zhang N, Robertson CL, Serova N, Davis S, Soboleva A, Clough E, Barrett T",", ","Intramural NIH HHS, Intramural NIH HHS",3724.0,"United States, United States" +23193275,GTR,0.947066327,GTR,0.947066327,Testing Registry,0.783265024,1,http://www.ncbi.nlm.nih.gov/gtr,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221111125221/https://www.ncbi.nlm.nih.gov/gtr/,2012-11-27,"National Institutes of Health, National Library of Medicine, National Center for Biotechnology Information, Bethesda, MD 20894, USA. wendy.rubinstein@nih.gov","Rubinstein WS, Maglott DR, Lee JM, Kattman BL, Malheiro AJ, Ovetsky M, Hem V, Gorelenkov V, Song G, Wallin C, Husain N, Chitipiralla S, Katz KS, Hoffman D, Jang W, Johnson M, Karmanov F, Ukrainchik A, Denisenko M, Fomous C, Hudson K, Ostell JM",,Intramural NIH HHS,79.0,United States +"23193293, 29121237",HAGR,0.996995687,HAGR,0.996995687,Human Ageing Genomic Resources,0.98925361,2,http://genomics.senescence.info,301,,"(42.5467,-83.2113)",http://web.archive.org/web/20221103114558/https://genomics.senescence.info/,2018-01-01,"Integrative Genomics of Ageing Group, Institute of Integrative Biology, University of Liverpool, Liverpool L69 7ZB, UK., Integrative Genomics of Ageing Group, Institute of Ageing and Chronic Disease, University of Liverpool, Liverpool L7 8TX, UK.","Tacutu R, Craig T, Budovsky A, Wuttke D, Lehmann G, Taranukha D, Costa J, Fraifeld VE, de Magalhães JP, Tacutu R, Thornton D, Johnson E, Budovsky A, Barardo D, Craig T, Diana E, Lehmann G, Toren D, Wang J, Fraifeld VE, de Magalhães JP",", ","Wellcome Trust, Wellcome Trust",404.0, +23281827,Helminth Secretome Database,0.980656524,HSD,0.909700066,Helminth Secretome Database,0.980656524,1,http://estexplorer.biolinfo.org/hsd,302,,"(39.5792,-104.8769)",no_wayback,2012-12-13,"Department of Chemistry and Biomolecular Sciences and ARC Centre of Excellence in Bioinformatics, Macquarie University, Sydney NSW 2109, Australia.","Garg G, Ranganathan S",,,14.0,Australia +23369322,HDAM,0.990325689,HDAM,0.990325689,Human Disease Associated Mutation,0.949979091,1,http://www.megabionet.org/HDAM,404,,,no_wayback,2013-01-23,"Center for Bioinformatics and Computational Biology, Shanghai Key Laboratory of Regulatory Biology, Institute of Biomedical Sciences and School of Life Science, East China Normal University, Shanghai 200241, China.","Jia M, Liu Y, Shen Z, Zhao C, Zhang M, Yi Z, Wen C, Deng Y, Shi T",,,3.0,"China, China" +23436708,Human Testis Proteome Database,0.981758612,HTPD,0.979909778,Human Testis Proteome Database,0.981758612,1,http://reprod.njmu.edu.cn/htpd,"HTTPConnectionPool(host='reprod.njmu.edu.cn', port=80): Max retries exceeded with url: /htpd (Caused by ReadTimeoutError(""HTTPConnectionPool(host='reprod.njmu.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20130712191028/http://reprod.njmu.edu.cn:80/htpd/,2013-03-06,"State Key Laboratory of Reproductive Medicine, Nanjing Medical University, Nanjing, China.","Liu M, Hu Z, Qi L, Wang J, Zhou T, Guo Y, Zeng Y, Zheng B, Wu Y, Zhang P, Chen X, Tu W, Zhang T, Zhou Q, Jiang M, Guo X, Zhou Z, Sha J",,,34.0,China +23504933,Human Proteinpedia,0.979407251,Human Proteinpedia,0.979407251,,0,1,"http://www.humanproteinpedia.org, http://www.hprd.org","503, 503",,", ","http://web.archive.org/web/20220519021230/http://humanproteinpedia.org/, http://web.archive.org/web/20220804041016/https://www.hprd.org/",2013-03-01,"Institute of Bioinformatics, Bangalore, India.","Muthusamy B, Thomas JK, Prasad TS, Pandey A",,"NHLBI NIH HHS, NHLBI NIH HHS, NIGMS NIH HHS, NHLBI NIH HHS, NCRR NIH HHS",8.0,India +23584836,hLGDB,0.994743705,hLGDB,0.994743705,Human Lysosome Gene Database,0.979168455,1,http://lysosome.unipg.it,200,,"(43.1122,12.3888)",http://web.archive.org/web/20220621004057/http://lysosome.unipg.it/,2013-04-12,"Department of Experimental Medicine and Biochemical Sciences, University of Perugia, Via del Giochetto, 06123 Perugia, Italy.","Brozzi A, Urbanelli L, Germain PL, Magini A, Emiliani C",,,22.0,Italy +23585031,HORDE,0.991316095,HORDE,0.991316095,Human Olfactory Receptor Data Explorer,0.980134517,1,http://genome.weizmann.ac.il/horde,302,,"(31.8942,34.8120)",http://web.archive.org/web/20220708233424/https://genome.weizmann.ac.il/horde/,2013-01-01,"Department of Molecular Genetics, Weizmann Institute of Science, Rehovot, Israel.","Olender T, Nativ N, Lancet D",,,20.0,Israel +23717556,HGPGD,0.977447295,HGPGD,0.977447295,human gene population genetic difference database,0.925682147,1,http://www.bioapp.org/hgpgd,301,,"(36.0649,120.3804)",http://web.archive.org/web/20170531074012/http://www.bioapp.org:80/hgpgd/,2013-05-22,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China. jiangyongshuai@gmail.com","Jiang Y, Zhang R, Lv H, Li J, Wang M, Chang Y, Lv W, Sheng X, Zhang J, Liu P, Zheng J, Shi M, Liu G",,,2.0,China +23798574,GPHR,0.621493533,GPHR,0.621493533,,0,1,http://ssfa-gphr.de,"HTTPConnectionPool(host='ssfa-gphr.de', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220615225351/http://www.ssfa-gphr.de/,2013-06-24,"Leibniz-Institut für Molekulare Pharmakologie, 13125 Berlin, Germany.","Kreuchwig A, Kleinau G, Krause G",,,9.0,Germany +23813641,GoSynthetic,0.995571911,GoSynthetic,0.995571911,,0,1,http://gosyn.bioapps.biozentrum.uni-wuerzburg.de,301,,"(49.7939,9.9512)",http://web.archive.org/web/20140722195050/http://gosyn.bioapps.biozentrum.uni-wuerzburg.de,2013-06-27,"Department of Bioinformatics, Biocenter, Am Hubland, University of Würzburg, 97074 Würzburg, Germany.","Liang C, Krüger B, Dandekar T",,,3.0,Germany +23913812,Hemophilia A Database,0.782407534,,0,Hemophilia A Database,0.782407534,1,"http://hadb.org.uk/, http://www.kcl.ac.uk/ip/petergreen/haemBdatabase.html","301, 301",,"(51.5085,-0.1257), (51.5085,-0.1257)","http://web.archive.org/web/20220808185512/https://www.hadb.org.uk/, http://web.archive.org/web/20120927170211/http://www.kcl.ac.uk/ip/petergreen/haemBdatabase.html",2013-09-09,None,"Rydz N, Leggo J, Tinlin S, James P, Lillicrap D",,,7.0, +23936191,HSC-Explorer,0.983055544,HSC-Explorer,0.983055544,,0,1,http://mips.helmholtz-muenchen.de/HSC,200,,"(48.2500,11.5667)",no_wayback,2013-07-30,"Institute for Bioinformatics and Systems Biology (IBIS), Helmholtz Zentrum München - German Research Center for Environmental Health (GmbH), Neuherberg, Germany.","Montrone C, Kokkaliaris KD, Loeffler D, Lechner M, Kastenmüller G, Schroeder T, Ruepp A",,,9.0,Germany +23958730,HippDB,0.994108021,HippDB,0.994108021,,0,1,"http://www.nyu.edu/projects/arora/hippdb, http://code.google.com/p/helidb","301, 301",,"(33.9192,-118.4165), (34.0522,-118.2437)","no_wayback, http://web.archive.org/web/20160109060425/https://code.google.com/p/helidb/",2013-08-19,"Department of Anthropology and Department of Chemistry, New York University, New York, NY 10003, USA.","Bergey CM, Watkins AM, Arora PS",,NIGMS NIH HHS,18.0,United States +24122843,HTS-DB,0.993688151,HTS-DB,0.993688151,,0,1,http://hts.cancerresearchuk.org/db/public,301,,"(38.8951,-77.0364)",http://web.archive.org/web/20140726065139/http://hts.cancerresearchuk.org/db/public/,2013-10-11,"High-throughput Screening Laboratory, Cancer Research UK, London Research Institute, 44 Lincoln's Inn Fields, London WC2A 3LY, UK.","Saunders RE, Instrell R, Rispoli R, Jiang M, Howell M",,Cancer Research UK,0.0, +24137008,HoPaCI-DB,0.997813225,HoPaCI-DB,0.997813225,host-Pseudomonas and Coxiella interaction database,0.856298451,1,http://mips.helmholtz-muenchen.de/HoPaCI,200,,"(48.2500,11.5667)",no_wayback,2013-10-16,"CNRS/Aix-Marseille University, Laboratoire d'Ingénierie des Systèmes Macromoléculaires (UMR7255), Institut de Microbiologie de la Méditerranée (IMM), 31 Chemin Joseph Aiguier, 13402 Marseille cedex 20, France, Institute for Bioinformatics and Systems Biology (MIPS), Helmholtz Zentrum München - German Research Center for Environmental Health (GmbH), Ingolstädter Landstr. 1, D-85764 Neuherberg, Germany, Department of Genome-Oriented Bioinformatics, Center of Life and Food Science Weihenstephan, Technische Universität München, Freising, Germany and Bundeswehr Institute of Microbiology, Neuherbergstrasse 11, 80937 Munich, Germany.","Bleves S, Dunger I, Walter MC, Frangoulidis D, Kastenmüller G, Voulhoux R, Ruepp A",,,11.0,"Germany, Germany, Germany, France" +24150944,HRaP,0.996109843,HRaP,0.996109843,,0,1,http://bioinfo.protres.ru/hrap,301,,"(54.8337,37.6114)",http://web.archive.org/web/20220121131232/http://bioinfo.protres.ru/hrap/,2013-10-22,"Group of Bioinformatics, Institute of Protein Research, Russian Academy of Sciences, Pushchino, Moscow Region 142290, Russia.","Lobanov MY, Sokolovskiy IV, Galzitskaya OV",,,17.0, +24178989,HypoxiaDB,0.996842146,HypoxiaDB,0.996842146,,0,1,http://www.hypoxiadb.com,406,,,http://web.archive.org/web/20220808221616/http://www.hypoxiadb.com/,2013-10-31,"Bioinformatics Group, Defence Institute of Physiology and Allied Sciences (DIPAS), Defence R&D Organization, Lucknow Road, Timarpur, New Delhi-110054, India.","Khurana P, Sugadev R, Jain J, Singh SB",,,17.0,India +24217912,HPO,0.964736124,HPO,0.964736124,The Human Phenotype Ontology project,0.657915694,1,http://www.human-phenotype-ontology.org,302,,"(52.5244,13.4105)",http://web.archive.org/web/20220905202537/https://www.human-phenotype-ontology.org/,2013-11-11,"Institute for Medical Genetics and Human Genetics, Charité-Universitätsmedizin Berlin, Augustenburger Platz 1, 13353 Berlin, Germany, Berlin-Brandenburg Center for Regenerative Therapies, Charité-Universitätsmedizin Berlin, Augustenburger Platz 1, 13353 Berlin, Germany, Lawrence Berkeley National Laboratory, Mail Stop 84R0171, Berkeley, CA 94720, USA, The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SA, UK, Department of Medical Genetics, Cambridge University Addenbrooke's Hospital, Cambridge CB2 2QQ, UK, Université Paul Sabatier, Faculté de Chirurgie Dentaire, CHU Toulouse, France, Centre for Genomic Medicine, Central Manchester University Hospitals NHS Foundation Trust, Manchester Academic Health Sciences Centre (MAHSC), Manchester, UK, Centre for Genomic Medicine, Institute of Human Development, Faculty of Medical and Human Sciences, University of Manchester, MAHSC, Manchester M13 9WL, UK, Institute of Genetic Medicine. Newcastle University, Central Parkway, Newcastle upon Tyne, NE1 3BZ, UK, Department of Computer Science, University of Toronto, Ontario, Canada, Centre for Computational Medicine, Hospital for Sick Children, Toronto, Ontario, Canada, Department of Clinical Genetics, Leeds Teaching Hospitals NHS Trust, Leeds LS2 9NS, UK, MRC Human Genetics Unit, MRC Institute of Genetic and Molecular Medicine, University of Edinburgh, Edinburgh EH4 2XU, UK, The Jackson Laboratory, Bar Harbor, ME 04609, USA, Center for Molecular and Vascular Biology, University of Leuven, Belgium, Department of Neuropediatrics, University Medical Center Schleswig-Holstein, Kiel Campus, 24105 Kiel, Germany, NE Thames Genetics Service, Great Ormond Street Hospital, London WC1N 3JH, UK, Drexel University College of Medicine, Philadelphia, PA 19102, USA, Department of Haematology, University of Cambridge and NHS Blood and Transplant Cambridge, CB2 0PT Cambridge, UK, Autism and Developmental Medicine Institute, Geisinger Health System","Köhler S, Doelken SC, Mungall CJ, Bauer S, Firth HV, Bailleul-Forestier I, Black GC, Brown DL, Brudno M, Campbell J, FitzPatrick DR, Eppig JT, Jackson AP, Freson K, Girdea M, Helbig I, Hurst JA, Jähn J, Jackson LG, Kelly AM, Ledbetter DH, Mansour S, Martin CL, Moss C, Mumford A, Ouwehand WH, Park SM, Riggs ER, Scott RH, Sisodiya S, Van Vooren S, Wapner RJ, Wilkie AO, Wright CF, Vulto-van Silfhout AT, de Leeuw N, de Vries BB, Washingthon NL, Smith CL, Westerfield M, Schofield P, Ruef BJ, Gkoutos GV, Haendel M, Smedley D, Lewis SE, Robinson PN",,"NHGRI NIH HHS, NIH HHS, NIH HHS, NIMH NIH HHS, National Institute for Health Research (NIHR), NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, National Institute for Health Research (NIHR), Fight for Sight, Medical Research Council, Medical Research Council, British Heart Foundation, NHGRI NIH HHS, National Institute for Health Research (NIHR)",390.0,"Belgium, Canada, Canada, Germany, Germany, Germany, France, United States, United States, United States" +"24301061, 31612961",GWAS Central,0.977628668,GWAS Central,0.977628668,,0,2,http://www.gwascentral.org,302,,"(52.6386,-1.1317)",http://web.archive.org/web/20221018193716/https://www.gwascentral.org/,2020-01-01,"Department of Genetics, University of Leicester, Leicester, UK., Department of Genetics and Genome Biology, University of Leicester, Leicester LE1 7RH, UK.","Beck T, Hastings RK, Gollapudi S, Free RC, Brookes AJ, Beck T, Shorter T, Brookes AJ",", ",", Medical Research Council, Medical Research Council, UKRI Innovation Fellowship, Health Data Research UK",112.0, +24304901,GPCRDB,0.998344004,GPCRDB,0.998344004,protein-coupled receptors database,0.987734778,1,http://www.gpcr.org/7tm,301,,"(50.1025,8.6299)",http://web.archive.org/web/20191026171449/http://www.gpcr.org:80/7tm/,2013-12-03,"Department of Drug Design and Pharmacology, University of Copenhagen, Universitetsparken 2, DK-2100 Copenhagen, Denmark, Bio-Prodict B.V., Castellastraat 116, 6512 EZ, Nijmegen, The Netherlands and CMBI, NCMLS, Radboudumc Nijmegen Medical Centre, Geert Grooteplein Zuid 26-28, 6525 GA, Nijmegen, The Netherlands.","Isberg V, Vroling B, van der Kant R, Li K, Vriend G, Gloriam D",,Lundbeck Foundation,54.0,"Denmark, Netherlands, Netherlands" +24336862,GIGA,0.974462986,GIGA,0.974462986,The Global Invertebrate Genomics Alliance,0.78900679,1,http://giga.nova.edu,404,,,http://web.archive.org/web/20210121021301/http://giga.nova.edu/,2014-01-01,None,", Bracken-Grissom H, Collins AG, Collins T, Crandall K, Distel D, Dunn C, Giribet G, Haddock S, Knowlton N, Martindale M, Medina M, Messing C, O'Brien SJ, Paulay G, Putnam N, Ravasi T, Rouse GW, Ryan JF, Schulze A, Wörheide G, Adamska M, Bailly X, Breinholt J, Browne WE, Diaz MC, Evans N, Flot JF, Fogarty N, Johnston M, Kamel B, Kawahara AY, Laberge T, Lavrov D, Michonneau F, Moroz LL, Oakley T, Osborne K, Pomponi SA, Rhodes A, Santos SR, Satoh N, Thacker RW, Van de Peer Y, Voolstra CR, Welch DM, Winston J, Zhou X",,NIMH NIH HHS,37.0, +24377417,Global catalogue of microorganisms,0.813462162,GCM,0.755801558,Global catalogue of microorganisms,0.813462162,1,http://gcm.wfcc.info,"HTTPConnectionPool(host='gcm.wfcc.info', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20210518171855/http://gcm.wfcc.info/,2013-12-30,None,"Wu L, Sun Q, Sugawara H, Yang S, Zhou Y, McCluskey K, Vasilenko A, Suzuki K, Ohkuma M, Lee Y, Robert V, Ingsriswang S, Guissart F, Philippe D, Ma J",,,9.0, +24428872,GuavaH,0.995271742,GuavaH,0.995271742,,0,1,http://www.GuavaH.org,404,,,http://web.archive.org/web/20210411234259/http://www.guavah.org/,2014-01-15,None,"Bartha I, McLaren PJ, Ciuffi A, Fellay J, Telenti A",,,7.0, +24558441,HTD,0.994018674,HTD,0.994018674,Transporter Database,0.934127271,1,http://htd.cbi.pku.edu.cn,"HTTPConnectionPool(host='htd.cbi.pku.edu.cn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to htd.cbi.pku.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20170721215447/http://htd.cbi.pku.edu.cn:80/,2014-02-18,"Center for Bioinformatics, State Key Laboratory of Protein and Plant Gene Research, College of Life Sciences, Peking University, Beijing, China ; Peking-Tsinghua Center for Life Sciences, College of Life Sciences, Peking University, Beijing, China.","Ye AY, Liu QR, Li CY, Zhao M, Qu H",,Intramural NIH HHS,12.0,"China, China" +24616562,IBIn,0.980620027,IBIn,0.980620027,Insect Barcode,0.783612788,1,http://www.nabg-nbaii.res.in/barcode,"HTTPConnectionPool(host='www.nabg-nbaii.res.in', port=80): Max retries exceeded with url: /barcode (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-02-19,"National Bureau of Agriculturally Important Insects, Post Bag No. 2491, H.A. Farm Post, Hebbal, Bellary Road, Bangalore - 560 024, Karnataka, India.","Pratheepa M, Jalali SK, Arokiaraj RS, Venkatesan T, Nagesh M, Panda M, Pattar S",,,1.0,India +24622612,GigaDB,0.98824966,GigaDB,0.98824966,,0,1,http://www.gigadb.org,200,,"(22.2783,114.1747)",http://web.archive.org/web/20221013170247/http://gigadb.org/,2014-03-12,"Department of Genetics, Stanford University, USA, GigaScience team, BGI HK Research Institute, 16 Dai Fu Street, Tai Po Industrial Estate, Hong Kong.","Sneddon TP, Zhe XS, Edmunds SC, Li P, Goodman L, Hunter CI",,,10.0,"Hong Kong, United States" +24670875,HerceptinR,0.99444294,HerceptinR,0.99444294,Herceptin,0.511912823,1,http://crdd.osdd.net/raghava/herceptinr,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/herceptinr (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220509051440/http://crdd.osdd.net/raghava/herceptinr/,2014-03-27,"1] CSIR-Institute of Microbial Technology Chandigarh, India [2].","Ahmad S, Gupta S, Kumar R, Varshney GC, Raghava GP",,,24.0,India +24767249,GMTV,0.963946044,GMTV,0.963946044,Genome-wide Mycobacterium tuberculosis Variation,0.959191367,1,http://mtb.dobzhanskycenter.org,"HTTPConnectionPool(host='mtb.dobzhanskycenter.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220301103104/http://mtb.dobzhanskycenter.org/,2014-04-25,"St, Petersburg State University, Theodosius Dobzhansky Center for Genome Bioinformatics, 41 Sredniy prospect, St, Petersburg, Russia. echernya@gmail.com.","Chernyaeva EN, Shulgina MV, Rotkevich MS, Dobrynin PV, Simonov SA, Shitikov EA, Ischenko DS, Karpova IY, Kostryukova ES, Ilina EN, Govorun VM, Zhuravlev VY, Manicheva OA, Yablonsky PK, Isaeva YD, Nosova EY, Mokrousov IV, Vyazovaya AA, Narvskaya OV, Lapidus AL, O'Brien SJ",,,47.0, +24857969,HeteroGenome,0.967456996,HeteroGenome,0.967456996,,0,1,http://www.jcbi.ru/lp_baze,403,,,http://web.archive.org/web/20161105045901/http://www.jcbi.ru:80/lp_baze/,2014-05-24,"Laboratory of Bioinformatics, Institute of Mathematical Problems of Biology, Russian Academy of Sciences, Institutskaya st. 4, 142290 Pushchino, Russia and Department of Computational Mathematics and Mathematical Physics, Moscow State Technical University n.a. N.E. Bauman, the 2nd Baumanskaya st., 5, 105005 Moscow, Russia maramaria@yandex.ru.","Chaley M, Kutyrkin V, Tulbasheva G, Teplukhina E, Nazipova N",,,3.0, +24923821,IFIM,0.941411674,IFIM,0.941411674,,0,1,"http://cefg.uestc.edu.cn/ifim/, http://cefg.cn/ifim","HTTPConnectionPool(host='cefg.uestc.edu.cn', port=80): Max retries exceeded with url: /ifim/ (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known')), 404",,", ","http://web.archive.org/web/20150321022915/http://cefg.uestc.edu.cn:80/ifim/, no_wayback",2014-06-11,"Center of Bioinformatics and Key Laboratory for NeuroInformation of the Ministry of Education, School of Life Science and Technology, University of Electronic Science and Technology of China, Chengdu 610054, China.","Wei W, Ye YN, Luo S, Deng YY, Lin D, Guo FB",,,4.0,"China, China" +24931751,HomBRex,0.99716574,HomBRex,0.99716574,Homeopathy Basic Research experiments,0.989604324,1,http://www.carstens-stiftung.de/hombrex,301,,"(48.4051,12.7575)",no_wayback,2014-07-01,"Karl und Veronica Carstens-Stiftung, Am Deimelsberg 36, 45276 Essen, Germany. Electronic address: info@Carstens-Stiftung.de.","Clausen J, van Wijk R, Albrecht H",,,3.0,Germany +25030426,HelicoBase,0.996223032,HelicoBase,0.996223032,,0,1,http://helicobacter.um.edu.my,"HTTPConnectionPool(host='helicobacter.um.edu.my', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to helicobacter.um.edu.my timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160806213616/http://helicobacter.um.edu.my:80/,2014-07-16,"Genome Informatics Research Laboratory, High Impact Research (HIR) Building, University of Malaya, 50603 Kuala Lumpur, Malaysia. lchoo@um.edu.my.","Choo SW, Ang MY, Fouladi H, Tan SY, Siow CC, Mutha NV, Heydari H, Wee WY, Vadivelu J, Loke MF, Rehvathy V, Wong GJ",,,5.0,Malaysia +25084271,GMEnzy,0.996969402,GMEnzy,0.996969402,,0,1,http://biotechlab.fudan.edu.cn/database/gmenzy,500,,,http://web.archive.org/web/20220710020150/http://biotechlab.fudan.edu.cn/database/gmenzy/,2014-08-01,"State Key Laboratory of Genetic Engineering, Institute of Genetics, School of Life Sciences, Fudan University, Shanghai, China; Shanghai High-Tech United Bio-Technological R&D Co., Ltd., Shanghai, China.","Wu H, Huang J, Lu H, Li G, Huang Q",,,0.0,"China, China" +25178365,Horse Single Nucleotide Polymorphism and Expression Database,0.986536476,HSDB,0.980438848,Horse Single Nucleotide Polymorphism and Expression Database,0.986536476,1,http://snugenome2.snu.ac.kr/HSDB,"HTTPConnectionPool(host='snugenome2.snu.ac.kr', port=80): Max retries exceeded with url: /HSDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-09-01,"Genomic Informatics Center, Hankyong National University, Anseong 456-749, Korea.","Lee JH, Lee T, Lee HK, Cho BW, Shin DH, Do KT, Sung S, Kwak W, Kim HJ, Kim H, Cho S, Park KD",,,0.0, +25189782,GOBLET,0.625621259,GOBLET,0.625621259,,0,1,http://mygoblet.org/training-portal,301,,"(-33.8678,151.2073)",http://web.archive.org/web/20220618061021/https://www.mygoblet.org/training-portal,2014-09-04,"The Genome Analysis Centre, Norwich, ELIXIR, Wellcome Trust Genome Campus, Hinxton, UK, The Swedish University for Agricultural Sciences, Uppsala, Sweden, European Molecular Biology Laboratory, Heidelberg, Germany, Ontario Institute for Cancer Research, Toronto, Canada, Instituto Gulbenkian de Ciência, Oeiras, Portugal, The University of New South Wales, Sydney, Australia, Netherlands Bioinformatics Centre, Department of Bioinformatics, Radboud Medical Center, Nijmegen, The Netherlands, CSC - IT Center for Science Ltd., Espoo, Finland, Whitehead Institute for Biomedical Research, MIT, Cambridge, MA, USA, CSIRO, Bioinformatics Core, Canberra, Australia, The Sainsbury Laboratory, Norwich Research Park, Norwich, UK, SIB Swiss Institute of Bioinformatics, 1 Rue Michel Servet, Genève, Switzerland, Academis, Illstrasse 12, 12161 Berlin, Germany, The Nowgen Centre, 29 Grafton Street, Manchester, UK, Department of Physics, Sapienza University, Rome, Italy, The Roslin Institute, Edinburgh, UK and The University of Manchester, Manchester, UK.","Corpas M, Jimenez RC, Bongcam-Rudloff E, Budd A, Brazas MD, Fernandes PL, Gaeta B, van Gelder C, Korpelainen E, Lewitter F, McGrath A, MacLean D, Palagi PM, Rother K, Taylor J, Via A, Watson M, Schneider MV, Attwood TK",,"Medical Research Council, Natural Environment Research Council, Medical Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",23.0,"Australia, Australia, Canada, Switzerland, Germany, Germany, Finland, Italy, Netherlands, Netherlands, Portugal, Sweden, United States" +25332403,i5k,0.991027579,i5k,0.991027579,5000 arthropod genomes,0.851789331,1,http://i5k.nal.usda.gov,301,,"(36.6676,-78.3875)",http://web.archive.org/web/20221109053056/http://i5k.nal.usda.gov/,2014-10-20,"National Agricultural Library, Beltsville, MD 20705, USA monica.poelchau@ars.usda.gov.","Poelchau M, Childers C, Moore G, Tsavatapalli V, Evans J, Lee CY, Lin H, Lin JW, Hackett K",,,76.0,United States +"25378303, 29069517",iBeetle-Base,0.997434308,iBeetle-Base,0.997434308,,0,2,http://ibeetle-base.uni-goettingen.de,200,,"(51.5344,9.9323)",http://web.archive.org/web/20220331062913/http://ibeetle-base.uni-goettingen.de/,2018-01-01,"Johann-Friedrich-Blumenbach Institute of Zoology and Anthropology, GZMB, Department of Evolutionary Developmental Genetics, Georg-August-University Göttingen, 37075 Göttingen, Germany contact@ibeetle-base.uni-goettingen.de., Dpt. of Evolutionary Developmental Genetics, Georg August University of Göttingen, 37077 Göttingen, Germany.","Dönitz J, Schmitt-Engel C, Grossmann D, Gerischer L, Tech M, Schoppmeier M, Klingler M, Bucher G, Dönitz J, Gerischer L, Hahnke S, Pfeiffer S, Bucher G",", ",", ",57.0,"Germany, Germany" +25450223,HIGDB,0.955801189,HIGDB,0.955801189,Haemophilus influenzae Genome Database,0.94999705,1,http://bioserver1.physics.iisc.ernet.in/HIGDB,301,,"(12.9719,77.5937)",http://web.archive.org/web/20180922212740/http://bioserver1.physics.iisc.ernet.in:80/HIGDB/,2014-10-14,"Medical & Biological Computing Laboratory, School of Biosciences and Technology, VIT University, Vellore 632 014, India.","Swetha RG, Kala Sekar DK, Ramaiah S, Anbarasu A, Sekar K",,"Indian Institute of Science, Bangalore, Supercomputer Education and Research Centre, management of VIT University, Indian Council of Medical Research (ICMR)",1.0,India +25468930,HProtDB,0.989957288,HProtDB,0.989957288,Halophile protein database,0.916799814,1,http://webapp.cabgrid.res.in/protein,301,,"(28.6109,77.1792)",http://web.archive.org/web/20220528094317/http://webapp.cabgrid.res.in/protein/,2014-12-01,"Center for Agricultural Bioinformatics, Indian Agricultural Statistics Research Institute, Pusa Campus, New Delhi 110012, India.","Sharma N, Farooqi MS, Chaturvedi KK, Lal SB, Grover M, Rai A, Pandey P",,,1.0,India +25502817,HGV&TB,0.993074507,HGV&TB,0.993074507,,0,1,http://genome.igib.res.in/hgvtb/index.html,200,,"(28.6453,77.2128)",http://web.archive.org/web/20220328221853/http://genome.igib.res.in/hgvtb/index.html,2014-12-13,"Department of Biotechnology, Delhi Technological University, Bawana Road, Delhi 110042, India, GN Ramachandran Knowledge Center for Genome Informatics, CSIR-Institute of Genomics and Integrative Biology (CSIR-IGIB), Mathura Road, Delhi 110025, India, Acharya Narendra Dev College, University of Delhi, Govindpuri, Kalkaji, New Delhi 110019, India, Council of Scientific and Industrial Research (CSIR), Anusandhan Bhawan, 2 Rafi Marg, New Delhi 110001, India and Academy of Scientific and Innovative Research (AcSIR), Anusandhan Bhawan, New Delhi 110001, India.","Sahajpal R, Kandoi G, Dhiman H, Raj S, , Scaria V, Bhartiya D, Hasija Y",,,2.0,"India, India, India, India, India" +25753716,Glyco3D,0.945883989,Glyco3D,0.945883989,,0,1,http://www.glyco3d.cermav.cnrs.fr,"HTTPConnectionPool(host='www.glyco3d.cermav.cnrs.fr', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20130725051751/http://glyco3d.cermav.cnrs.fr:80/,2015-01-01,"Centre de Recherches sur les Macromolécules Végétales, UPR5301, CNRS - Université Grenoble Alpes, BP53, 38041, Grenoble cédex 09, France.","Pérez S, Sarkar A, Rivet A, Breton C, Imberty A",,,20.0,France +25940562,HTT-DB,0.982042775,HTT-DB,0.982042775,,0,1,http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase,302,,,http://web.archive.org/web/20220814084307/http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase/,2015-05-04,"Campus São Gabriel, Universidade Federal do Pampa, São Gabriel.","Dotto BR, Carvalho EL, Silva AF, Duarte Silva LF, Pinto PM, Ortiz MF, Wallau GL",,,27.0, +25971743,GLASS,0.917461634,GLASS,0.917461634,,0,1,http://zhanglab.ccmb.med.umich.edu/GLASS,301,,"(42.2776,-83.7409)",http://web.archive.org/web/20201125143135/https://zhanglab.ccmb.med.umich.edu/GLASS/,2015-05-13,"Department of Biological Chemistry, Department of Computational Medicine and Bioinformatics, University of Michigan, Ann Arbor, MI 48109, USA, Department of Basic Sciences, University of North Dakota, School of Medicine and Health Sciences, Grand Forks, ND 58203, USA and Department of Computer Engineering, Bogazici University, Istanbul, Turkey.","Chan WK, Zhang H, Yang J, Brender JR, Hur J, Özgür A, Zhang Y",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NCI NIH HHS",31.0,"Turkey, United States, United States" +25982314,GermlncRNA,0.997644544,GermlncRNA,0.997644544,,0,1,http://germlncrna.cbiit.cuhk.edu.hk,"HTTPConnectionPool(host='germlncrna.cbiit.cuhk.edu.hk', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to germlncrna.cbiit.cuhk.edu.hk timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210919223016/http://germlncrna.cbiit.cuhk.edu.hk/,2015-05-17,"Reproduction, Development and Endocrinology Program, School of Biomedical Sciences, Faculty of Medicine, The Chinese University of Hong Kong-Shandong University (CUHK-SDU) Joint Laboratory on Reproductive Genetics and CUHK-BGI Innovation Institute of Trans-Omics, The Chinese University of Hong Kong, Shatin, Hong Kong, China, GigaScience, Beijing Genomics Institute-Hong Kong (BGI-HK) Research Institute, 16 Dai Fu Street, Tai Po Industrial Estate, Hong Kong, China, Beijing Genomics Institute-Shenzhen (BGI-SZ), Beishan Industrial Zone, Yantian District, Shenzhen, China and The Eunice Kennedy Shriver National Institute of Child Health and Human Development, National Institutes of Health, Bethesda, MD, USA Reproduction, Development and Endocrinology Program, School of Biomedical Sciences, Faculty of Medicine, The Chinese University of Hong Kong-Shandong University (CUHK-SDU) Joint Laboratory on Reproductive Genetics and CUHK-BGI Innovation Institute of Trans-Omics, The Chinese University of Hong Kong, Shatin, Hong Kong, China, GigaScience, Beijing Genomics Institute-Hong Kong (BGI-HK) Research Institute, 16 Dai Fu Street, Tai Po Industrial Estate, Hong Kong, China, Beijing Genomics Institute-Shenzhen (BGI-SZ), Beishan Industrial Zone, Yantian District, Shenzhen, China and The Eunice Kennedy Shriver National Institute of Child Health and Human Development, National Institutes of Health, Bethesda, MD, USA.","Luk AC, Gao H, Xiao S, Liao J, Wang D, Tu J, Rennert OM, Chan WY, Lee TL",,Intramural NIH HHS,9.0,"China, China, China, China, China, China, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, Hong Kong, United States, United States" +25982315,GraP,0.972606599,GraP,0.972606599,of,0.77096051,1,http://structuralbiology.cau.edu.cn/GraP,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220621022559/http://structuralbiology.cau.edu.cn/GraP/,2015-05-17,"State Key Laboratory of Plant Physiology and Biochemistry, College of Biological Sciences, China Agricultural University, Beijing 100193, China and College of Agriculture and Biotechnology, China Agricultural University, Beijing 100193, China.","Zhang L, Guo J, You Q, Yi X, Ling Y, Xu W, Hua J, Su Z",,,7.0,"China, China, China, China" +26039571,GPA,0.900930822,GPA,0.900930822,Gene Perturbation Atlas,0.898518195,1,http://biocc.hrbmu.edu.cn/GPA,"HTTPConnectionPool(host='biocc.hrbmu.edu.cn', port=80): Max retries exceeded with url: /GPA (Caused by ProtocolError('Connection aborted.', BadStatusLine(' OK\r\n')))",,,http://web.archive.org/web/20200127014822/http://biocc.hrbmu.edu.cn:80/GPA/,2015-06-03,"1] College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150086, China [2] Key Laboratory of Cardiovascular Medicine Research, Harbin Medical University, Ministry of Education.","Xiao Y, Gong Y, Lv Y, Lan Y, Hu J, Li F, Xu J, Bai J, Deng Y, Liu L, Zhang G, Yu F, Li X",,,5.0,China +26212453,Histone Antibody Specificity Database,0.951532856,,0,Histone Antibody Specificity Database,0.951532856,1,http://www.histoneantibodies.com,200,,"(33.4484,-112.0740)",http://web.archive.org/web/20221017080750/http://histoneantibodies.com/,2015-07-23,"Center for Epigenetics, Van Andel Research Institute, Grand Rapids, MI 49503, USA. Electronic address: scott.rothbart@vai.org.","Rothbart SB, Dickson BM, Raab JR, Grzybowski AT, Krajewski K, Guo AH, Shanle EK, Josefowicz SZ, Fuchs SM, Allis CD, Magnuson TR, Ruthenburg AJ, Strahl BD",,"NCI NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIMH NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",77.0,United States +26314736,GlycoMob,0.989852786,GlycoMob,0.989852786,,0,1,http://www.glycomob.org,405,,,http://web.archive.org/web/20220618211519/http://www.glycomob.org/,2015-08-28,"Department of Chemistry, University of Oxford, Oxford, OX1 3QZ, UK. weston.struwe@chem.ox.ac.uk.","Struwe WB, Pagel K, Benesch JL, Harvey DJ, Campbell MP",,"Biotechnology and Biological Sciences Research Council, British Heart Foundation",23.0, +26342919,Geroprotectors,0.934666336,Geroprotectors,0.934666336,,0,1,http://geroprotectors.org,200,,"(22.2783,114.1747)",http://web.archive.org/web/20220719163256/http://geroprotectors.org/,2015-09-01,"Laboratory of Molecular Radiobiology and Gerontology, Institute of Biology of Komi Science Center of Ural Branch of Russian Academy of Sciences, Syktyvkar 167982, Russia.","Moskalev A, Chernyagina E, de Magalhães JP, Barardo D, Thoppil H, Shaposhnikov M, Budovsky A, Fraifeld VE, Garazha A, Tsvetkov V, Bronovitsky E, Bogomolov V, Scerbacov A, Kuryan O, Gurinovich R, Jellen LC, Kennedy B, Mamoshina P, Dobrovolskaya E, Aliper A, Kaminsky D, Zhavoronkov A",,Wellcome Trust,42.0, +26430546,ICeE,0.880001485,ICeE,0.880001485,,0,1,http://www.ciml.univ-mrs.fr/EWBANK_jonathan/software.html,301,,"(43.2970,5.3811)",http://web.archive.org/web/20141026082851/http://www.ciml.univ-mrs.fr:80/EWBANK_jonathan/software.html,2014-07-01,"Centre d'Immunologie de Marseille-Luminy; UM2 Aix-Marseille Université ; Marseille, France ; INSERM U1104 ; Marseille, France ; CNRS UMR7280 ; Marseille, France.","Montañana F, Julien RA, Vaglio P, Matthews LR, Tichit L, Ewbank JJ",,,1.0,"France, France, France" +26510927,HAND,0.570083857,HAND,0.570083857,,0,1,http://www.handdatabase.org,200,,"(34.0522,-118.2437)",http://web.archive.org/web/20221020115032/http://www.handdatabase.org/,2015-10-28,"Department of Epidemiology and Biostatistics, University of Georgia, Athens, GA, 30602, USA. tessg@uga.edu.","Griffin TZ, Kang W, Ma Y, Zhang M",,"NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS",6.0,"Georgia, United States" +"26578586, 30945200",GreeNC,0.990412146,GreeNC,0.990412146,Non-Coding Database,0.685981143,2,http://greenc.sciencedesigners.com,"HTTPConnectionPool(host='greenc.sciencedesigners.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220526141609/http://greenc.sciencedesigners.com/,2019-01-01,"Sequentia Biotech SL, Calle Comte D'Urgell 240, Barcelona, Spain., Sequentia Biotech SL, Carrer Comte d'Urgell 240, Barcelona, Spain.","Paytuví Gallart A, Hermoso Pulido A, Anzar Martínez de Lagrán I, Sanseverino W, Aiese Cigliano R, Paytuvi-Gallart A, Sanseverino W, Aiese Cigliano R",", ",", ",76.0,"Spain, Spain" +26578596,HPMC,0.989222422,HPMC,0.989222422,Human Pan-Microbe Communities,0.749896427,1,http://www.hpmcd.org,"HTTPConnectionPool(host='www.hpmcd.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.hpmcd.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210920104926/http://hpmcd.org/,2015-11-17,"Host Microbiota Interactions Laboratory, Wellcome Trust Sanger Institute, Wellcome Genome Campus, Hinxton CB10 1SA, UK Centre for Innate Immunity and Infectious Diseases, Hudson Institute of Medical Research, Clayton 3168, Australia Department of Molecular and Translational Sciences, Monash University, Clayton 3800, Australia sf15@sanger.ac.uk.","Forster SC, Browne HP, Kumar N, Hunt M, Denise H, Mitchell A, Finn RD, Lawley TD",,"Wellcome Trust, Medical Research Council, Biotechnology and Biological Sciences Research Council",29.0,"Australia, Australia" +26578597,HGTree,0.998615086,HGTree,0.998615086,,0,1,http://hgtree.snu.ac.kr,"HTTPConnectionPool(host='hgtree.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='hgtree.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20221013021007/http://hgtree.snu.ac.kr/,2015-11-17,"Interdisciplinary Program in Bioinformatics, Seoul National University, Kwan-ak St. 599, Kwan-ak Gu, Seoul, 151-741, Republic of Korea Department of Animal Sciences, University of Illinois, Urbana, IL 61801, USA.","Jeong H, Sung S, Kwon T, Seo M, Caetano-Anollés K, Choi SH, Cho S, Nasir A, Kim H",,,18.0,United States +26631432,HDI,0.979960263,HDI,0.979960263,Human Disease Insight,0.578439275,1,http://humandiseaseinsight.com,"HTTPConnectionPool(host='humandiseaseinsight.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180118041850/http://humandiseaseinsight.com/,2015-11-27,"Centre for Interdisciplinary Research In Basic Sciences, Jamia Millia Islamia, Jamia Nagar, New Delhi 110025, India.","Tasleem M, Ishrat R, Islam A, Ahmad F, Hassan MI",,,2.0,India +26657893,HRGRN,0.979772747,HRGRN,0.979772747,,0,1,http://plantgrn.noble.org/hrgrn,200,,"(34.1566,-97.1792)",http://web.archive.org/web/20221017093226/https://plantgrn.noble.org/hrgrn/,2015-12-12,"Plant Biology Division, The Samuel Roberts Noble Foundation, 2510 Sam Noble Parkway, Ardmore, OK 73401, USA.","Dai X, Li J, Liu T, Zhao PX",,,12.0,United States +26673694,GtRNAdb,0.991023004,GtRNAdb,0.991023004,Genomic,0.679828405,1,http://gtrnadb.ucsc.edu,200,,"(36.9741,-122.0308)",http://web.archive.org/web/20220615144820/http://gtrnadb.ucsc.edu./,2015-12-15,"Department of Biomolecular Engineering, University of California Santa Cruz, CA 95064, USA.","Chan PP, Lowe TM",,NHGRI NIH HHS,303.0,United States +26708988,HitPredict,0.99754715,HitPredict,0.99754715,,0,1,http://hintdb.hgc.jp/htp,"HTTPConnectionPool(host='hintdb.hgc.jp', port=80): Max retries exceeded with url: /htp (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200226034942/http://hintdb.hgc.jp:80/htp/,2015-12-26,"Human Genome Center, The Institute of Medical Science, The University of Tokyo, Tokyo 108-8639, Japan Department of Computational Biology, Graduate School of Frontier Sciences, The University of Tokyo, Chiba 277-8561, Japan.","López Y, Nakai K, Patil A",,,27.0,"Japan, Japan" +26911352,Human genetic variation database,0.794143543,,0,Human genetic variation database,0.794143543,1,http://www.genome.med.kyoto-u.ac.jp/SnpDB,302,,"(36.4000,139.0833)",http://web.archive.org/web/20160924220138/http://www.genome.med.kyoto-u.ac.jp:80/SnpDB,2016-02-25,"Human Disease Genomics, Center for Genomic Medicine, Kyoto University Graduate School of Medicine, Kyoto, Japan.","Higasa K, Miyake N, Yoshimura J, Okamura K, Niihori T, Saitsu H, Doi K, Shimizu M, Nakabayashi K, Aoki Y, Tsurusaki Y, Morishita S, Kawaguchi T, Migita O, Nakayama K, Nakashima M, Mitsui J, Narahara M, Hayashi K, Funayama R, Yamaguchi D, Ishiura H, Ko WY, Hata K, Nagashima T, Yamada R, Matsubara Y, Umezawa A, Tsuji S, Matsumoto N, Matsuda F",,,139.0,Japan +26989147,HistoneDB,0.99104923,HistoneDB,0.99104923,,0,1,http://www.ncbi.nlm.nih.gov/projects/HistoneDB2.0,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20161220175149/https://www.ncbi.nlm.nih.gov/projects/HistoneDB2.0/,2016-03-17,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA.","Draizen EJ, Shaytan AK, Mariño-Ramírez L, Talbert PB, Landsman D, Panchenko AR",,,36.0,United States +27045824,GPKB,0.936947525,GPKB,0.936947525,Genomic and Proteomic Knowledge Base,0.911863849,1,http://www.bioinformatics.deib.polimi.it/GPKB,302,,"(45.4643,9.1895)",no_wayback,2016-03-01,None,"Masseroli M, Canakoglu A, Ceri S",,"PRIN project, Data-Driven Genomic Computing (GenData 2020), Italian Ministry of the University and Research",5.0, +27098585,Grape-CRISPR,0.896898484,Grape-CRISPR,0.896898484,,0,1,http://biodb.sdau.edu.cn/gc/index.html,"HTTPConnectionPool(host='biodb.sdau.edu.cn', port=80): Max retries exceeded with url: /gc/index.html (Caused by ConnectTimeoutError(, 'Connection to biodb.sdau.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20180520020708/http://biodb.sdau.edu.cn:80/gc/index.html,2016-04-21,"Beijing Key Laboratory of Grape Science and Enology and Key Laboratory of Plant Resource, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, P.R. China.","Wang Y, Liu X, Ren C, Zhong GY, Yang L, Li S, Liang Z",,"National Natural Science Foundation of China, Hundred Talent of Chinese Academy of Sciences",15.0,China +27113915,Hipposeq,0.914798081,Hipposeq,0.914798081,,0,1,http://hipposeq.janelia.org,302,,"(39.0570,-77.4441)",http://web.archive.org/web/20221016230827/https://hipposeq.janelia.org./,2016-04-26,"Janelia Research Campus, Howard Hughes Medical Institute, Ashburn, United States.","Cembrowski MS, Wang L, Sugino K, Shields BC, Spruston N",,Howard Hughes Medical Institute,127.0,United States +27189608,HLA-ADR,0.913742805,HLA-ADR,0.913742805,Allele Frequency Net Database,0.807270408,1,http://www.allelefrequencies.net/hla-adr,301,,"(32.7831,-96.8067)",http://web.archive.org/web/20200218025825/http://www.allelefrequencies.net:80/hla-adr/,2016-05-17,"Department of Molecular and Clinical Pharmacology, Institute of Translational Medicine Institute of Integrative Biology Liverpool Reviews and Implementation Group.","Ghattaoraya GS, Dundar Y, González-Galarza FF, Maia MH, Santos EJ, da Silva AL, McCabe A, Middleton D, Alfirevic A, Dickson R, Jones AR",,Biotechnology and Biological Sciences Research Council,9.0, +27242033,gEVE,0.957576275,gEVE,0.957576275,,0,1,http://geve.med.u-tokai.ac.jp,200,,"(35.4450,139.3695)",http://web.archive.org/web/20221017041851/http://geve.med.u-tokai.ac.jp/,2016-05-30,"Department of Molecular Life Science, Tokai University School of Medicine, 143 Shimokasuya, Isehara, Kanagawa 259-1193, Japan and Micro/Nano Technology Center, Tokai University, 411 Kitakaname, Hiratsuka, Kanagawa, 259-1292, Japan so@tokai.ac.jp.","Nakagawa S, Takahashi MU",,,15.0,"Japan, Japan" +27242038,GESDB,0.99487108,GESDB,0.99487108,Genetic Epidemiology Simulation Database,0.805485106,1,"http://gesdb.nhri.org.twDatabase, http://gesdb.nhri.org.tw","HTTPConnectionPool(host='gesdb.nhri.org.twdatabase', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known')), HTTPConnectionPool(host='gesdb.nhri.org.tw', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to gesdb.nhri.org.tw timed out. (connect timeout=5)'))",,", ","no_wayback, no_wayback",2016-05-30,"Division of Biostatistics and Bioinformatics, Institute of Population Health Sciences, National Health Research Institutes, Zhunan, Taiwan.","Yao PJ, Chung RH",,,1.0, +27374121,HPIDB,0.988542557,HPIDB,0.988542557,,0,1,http://www.agbase.msstate.edu/hpi/main.html,301,,"(33.4505,-88.8196)",http://web.archive.org/web/20170908144344/http://www.agbase.msstate.edu:80/hpi/main.html,2016-07-03,"School of Animal and Comparative Biomedical Sciences, University of Arizona, Tucson, AZ 85721, USA.","Ammari MG, Gresham CR, McCarthy FM, Nanduri B",,,52.0,United States +27436239,GlycoGAIT,0.980752434,GlycoGAIT,0.980752434,Glycosylation and Gut Associated Immune,0.728825476,1,http://apps.connexios.com/glycogait,"HTTPConnectionPool(host='apps.connexios.com', port=80): Max retries exceeded with url: /glycogait (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-07-18,"Connexios Life Sciences, Prestige South End, 3rd Floor, South End Road, Basavanagudi, Bangalore 560004, Karnataka, India; Glycoscience Group, National Centre for Biomedical Engineering Science, National University of Ireland Galway, Galway, Ireland. Electronic address: anup.oommen@connexios.com.","Oommen AM, Somaiya N, Vijayan J, Kumar S, Venkatachalam S, Joshi L",,"Science Foundation Ireland Strategic Research Cluster (SRC) programme to Alimentary Glycoscience Research Cluster, European Union FP7 programme in support of the GlycoHIT project",2.0,"India, Ireland, Ireland" +27504778,GNPS,0.987443785,GNPS,0.987443785,Global Natural Products Social Molecular Networking,0.953693228,1,http://gnps.ucsd.edu,302,,"(32.7157,-117.1647)",no_wayback,2016-08-01,"Computer Science and Engineering, UC San Diego, La Jolla, United States.","Wang M, Carver JJ, Phelan VV, Sanchez LM, Garg N, Peng Y, Nguyen DD, Watrous J, Kapono CA, Luzzatto-Knaan T, Porto C, Bouslimani A, Melnik AV, Meehan MJ, Liu WT, Crüsemann M, Boudreau PD, Esquenazi E, Sandoval-Calderón M, Kersten RD, Pace LA, Quinn RA, Duncan KR, Hsu CC, Floros DJ, Gavilan RG, Kleigrewe K, Northen T, Dutton RJ, Parrot D, Carlson EE, Aigle B, Michelsen CF, Jelsbak L, Sohlenkamp C, Pevzner P, Edlund A, McLean J, Piel J, Murphy BT, Gerwick L, Liaw CC, Yang YL, Humpf HU, Maansson M, Keyzers RA, Sims AC, Johnson AR, Sidebottom AM, Sedio BE, Klitgaard A, Larson CB, P CAB, Torres-Mendoza D, Gonzalez DJ, Silva DB, Marques LM, Demarque DP, Pociute E, O'Neill EC, Briand E, Helfrich EJN, Granatosky EA, Glukhov E, Ryffel F, Houson H, Mohimani H, Kharbush JJ, Zeng Y, Vorholt JA, Kurita KL, Charusanti P, McPhail KL, Nielsen KF, Vuong L, Elfeki M, Traxler MF, Engene N, Koyama N, Vining OB, Baric R, Silva RR, Mascuch SJ, Tomasi S, Jenkins S, Macherla V, Hoffman T, Agarwal V, Williams PG, Dai J, Neupane R, Gurr J, Rodríguez AMC, Lamsa A, Zhang C, Dorrestein K, Duggan BM, Almaliti J, Allard PM, Phapale P, Nothias LF, Alexandrov T, Litaudon M, Wolfender JL, Kyle JE, Metz TO, Peryea T, Nguyen DT, VanLeer D, Shinn P, Jadhav A, Müller R, Waters KM, Shi W, Liu X, Zhang L, Knight R, Jensen PR, Palsson BO, Pogliano K, Linington RG, Gutiérrez M, Lopes NP, Gerwick WH, Moore BS, Dorrestein PC, Bandeira N",,"Villum Fonden, NNF Center for Biosustainability, NIGMS NIH HHS, NIGMS NIH HHS, NIDCR NIH HHS, NIGMS NIH HHS, NCCIH NIH HHS, NCRR NIH HHS, NIGMS NIH HHS, NIDCR NIH HHS, NIAID NIH HHS, FIC NIH HHS, Swiss National Science Foundation, NIAID NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIAID NIH HHS, NIDDK NIH HHS, NIGMS NIH HHS, FIC NIH HHS, NIAID NIH HHS, Swiss National Science Foundation, Novo Nordisk Fonden, NIDCR NIH HHS, NIGMS NIH HHS, NIAID NIH HHS, Swiss National Science Foundation, NCRR NIH HHS",985.0,United States +27527702,HCVIVdb,0.992048419,HCVIVdb,0.992048419,HCV IRES variation database,0.664026876,1,http://www.hcvivdb.org,200,,"(49.1292,17.7692)",http://web.archive.org/web/20220805053123/http://www.hcvivdb.org/,2016-08-15,"Department of Genetics & Microbiology, Faculty of Science, Charles University in Prague, Viničná 5, 128 44, Prague 2, Czech Republic.","Floden EW, Khawaja A, Vopálenský V, Pospíšek M",,"Grantová Agentura eské Republiky, Univerzita Karlova v Praze ()",4.0, +27733501,HipSci,0.994878662,HipSci,0.994878662,Human Induced Pluripotent Stem Cell Initiative,0.975116302,1,"http://www.hipsci.org/lines, http://www.hipsci.org/data/trackhubs","301, 301",,"(51.5085,-0.1257), (51.5085,-0.1257)","http://web.archive.org/web/20220604221127/https://www.hipsci.org/lines/, http://web.archive.org/web/20170114212337/http://www.hipsci.org:80/data/trackhubs",2016-10-12,"European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Streeter I, Harrison PW, Faulconbridge A, , Flicek P, Parkinson H, Clarke L",,"Wellcome Trust, Wellcome Trust",37.0, +27742821,HieranoiDB,0.9942801,HieranoiDB,0.9942801,,0,1,http://hieranoiDB.sbc.su.se,301,,"(59.3294,18.0687)",http://web.archive.org/web/20221021072010/https://hieranoidb.sbc.su.se/,2016-10-13,"Stockholm Bioinformatics Center, Department of Biochemistry and Biophysics, Stockholm University, Science for Life Laboratory, Box 1031, 17121 Solna, Sweden mateusz.kaduk@scilifelab.se.","Kaduk M, Riegler C, Lemp O, Sonnhammer EL",,,8.0,Sweden +27976751,Hepitopes,0.971835365,Hepitopes,0.971835365,,0,1,http://www.expmedndm.ox.ac.uk/hepitopes,302,,"(51.5085,-0.1257)",http://web.archive.org/web/20220617063524/https://www.expmedndm.ox.ac.uk/hepitopes,2016-11-15,"Department of Infectious Diseases and Microbiology, Oxford University Hospitals NHS Foundation Trust, John Radcliffe Hospital, Oxford, UK.","Lumley S, Noble H, Hadley MJ, Callow L, Malik A, Chua YY, Duffey OJ, Grolmusova N, Kumar A, Ravenscroft S, Spencer JI, Neumann-Haefelin C, Thimme R, Andersson M, Klenerman P, Barnes E, Matthews PC",,"Wellcome Trust, Wellcome Trust, Medical Research Council, Wellcome Trust, Wellcome Trust, National Institute for Health Research (NIHR), National Institute for Health Research (NIHR)",9.0, +28083826,HICL,0.993846953,HICL,0.993846953,,0,1,http://medisp.bme.teiath.gr/hicl,302,,"(37.9833,23.6833)",http://web.archive.org/web/20200221114713/http://medisp.bme.teiath.gr:80/hicl/,2017-06-01,"Medical Image and Signal Processing Laboratory (MEDISP), Department of Biomedical Engineering, Technological Educational Institute of Athens, Ag. Spyridonos Street, 122 10, Egaleo, Athens, Greece.","Kostopoulos S, Ravazoula P, Asvestas P, Kalatzis I, Xenogiannopoulos G, Cavouras D, Glotsos D",,"Research Committee of the Technological Educational Institution (T.E.I.) of Athens, Greece",4.0,Greece +28090394,GExplore,0.892829657,GExplore,0.892829657,,0,1,http://genome.sfu.ca/gexplore,301,,"(49.2497,-123.1193)",http://web.archive.org/web/20221017035647/http://genome.sfu.ca/gexplore/,2016-09-19,"Department of Biological Sciences, Simon Fraser University , Burnaby, Canada.","Hutter H, Suh J",,,11.0,Canada +28212602,HAPPI,0.973021567,HAPPI,0.973021567,Human Annotated and Predicted Protein Interactions,0.80146156,1,http://discovery.informatics.uab.edu/HAPPI,301,,"(33.5207,-86.8025)",http://web.archive.org/web/20220615171814/http://discovery.informatics.uab.edu/HAPPI/,2017-02-17,"Wenzhou Medical University First Affiliate Hospital, Wenzhou, Zhejiang Province, China. jakechen@uab.edu.","Chen JY, Pandey R, Nguyen TM",,NIDDK NIH HHS,15.0,China +28231303,GreekLex,0.941649795,GreekLex,0.941649795,,0,1,http://www.psychology.nottingham.ac.uk/greeklex,302,,"(52.9536,-1.1505)",http://web.archive.org/web/20220617221929/https://psychology.nottingham.ac.uk/greeklex/,2017-02-23,"School of Psychology, University of Nottingham, Nottingham, United Kingdom.","Kyparissiadis A, van Heuven WJ, Pitchford NJ, Ledgeway T",,,5.0,United Kingdom +28358052,HIVed,0.991903603,HIVed,0.991903603,database,0.602060437,1,http://hivlatency.erc.monash.edu,301,,"(37.5331,-122.2486)",no_wayback,2017-03-30,"Infection and Immunity Program, Biomedicine Discovery Institute, Monash University, Melbourne, VIC 3800, Australia.","Li C, Ramarathinam SH, Revote J, Khoury G, Song J, Purcell AW",,,2.0,Australia +28365729,HIVoligoDB,0.98352025,HIVoligoDB,0.98352025,The HIV oligonucleotide database,0.841203025,1,http://portugene.com/HIVoligoDB,410,,,http://web.archive.org/web/20220308055933/http://portugene.com/HIVoligoDB/,2017-01-01,"Interdisciplinary Centre of Marine and Environmental Research (CIIMAR), University of Porto, Terminal de Cruzeiros do Porto de Leix?Av. General Norton de Matos s/n 4450-208 Porto.","Carneiro J, Resende A, Pereira F",,,1.0, +28365739,GrTEdb,0.997847676,GrTEdb,0.997847676,Gossypium raimondii transposable elements database,0.843352804,1,http://www.grtedb.org,"HTTPConnectionPool(host='www.grtedb.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200120192837/http://www.grtedb.org:80/,2017-01-01,"Key Laboratory of Cotton and Rapeseed (Nanjing), The Institute of Industrial Crops, Jiangsu Academy of Agricultural Sciences, Nanjing 210014, China.","Xu Z, Liu J, Ni W, Peng Z, Guo Y, Ye W, Huang F, Zhang X, Xu P, Guo Q, Shen X, Du J",,,6.0,China +28387199,GSA,0.994967302,GSA,0.994967302,Genome Sequence Archive,0.958378598,1,"http://bigd.big.ac.cn/gsa, http://gsa.big.ac.cn","301, 301",,"(39.9075,116.3972), (39.9075,116.3972)","http://web.archive.org/web/20210517105341/https://bigd.big.ac.cn/gsa/, http://web.archive.org/web/20200918093507/https://gsa.big.ac.cn/",2017-02-02,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Wang Y, Song F, Zhu J, Zhang S, Yang Y, Chen T, Tang B, Dong L, Ding N, Zhang Q, Bai Z, Dong X, Chen H, Sun M, Zhai S, Sun Y, Yu L, Lan L, Xiao J, Fang X, Lei H, Zhang Z, Zhao W",,"National Key Research Program of China, Key Program of the Chinese Academy of Sciences, Chinese Academy of Sciences, National High-tech R&D Program, National Key Research Program of China, Key Technology Talent Program of the Chinese Academy of Sciences, National Key Research Program of China, Chinese Academy of Sciences, International Partnership Program of the Chinese Academy of Sciences, National High-tech R&D Program, National Key Research Program of China",224.0,China +28415075,HopBase,0.980602145,HopBase,0.980602145,,0,1,"http://hopbase.org, http://hopbase.cgrb.oregonstate.edu","200, 200",,"(44.5646,-123.2620), (44.5646,-123.2620)","http://web.archive.org/web/20220612160541/http://hopbase.org/, no_wayback",2017-01-01,"Electrical Engineering and Computer Science, Oregon State University.","Hill ST, Sudarsanam R, Henning J, Hendrix D",,"Oregon State University, U.S. Department of Agriculture",8.0, +28529078,HCSGD,0.997187793,HCSGD,0.997187793,Human Cellular Senescence Gene Database,0.987302474,1,http://bioinfo.au.tsinghua.edu.cn/member/xwwang/HCSGD,301,,"(39.9906,116.2887)",http://web.archive.org/web/20170531045225/http://bioinfo.au.tsinghua.edu.cn:80/member/xwwang/HCSGD/,2017-04-29,"Ministry of Education Key Laboratory of Bioinformatics, Center for Synthetic and Systems Biology, Department of Automation, Tsinghua University, Beijing 100084, China; Bioinformatics Division, Tsinghua National Laboratory for Information Science and Technology, Beijing 100084, China.","Dong Q, Han H, Liu X, Wei L, Zhang W, Zhao Z, Zhang MQ, Wang X",,"Southeast University, National Natural Science Foundation of China, Tsinghua University",5.0,"China, China" +28549078,HEROD,0.983515739,HEROD,0.983515739,,0,1,http://bidd2.nus.edu.sg/herod/index.php,"HTTPConnectionPool(host='bidd2.nus.edu.sg', port=80): Max retries exceeded with url: /herod/index.php (Caused by ConnectTimeoutError(, 'Connection to bidd2.nus.edu.sg timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20170720075410/http://bidd2.nus.edu.sg:80/herod/index.php,2017-10-01,"The State Key Laboratory Breeding Base-Shenzhen Key Laboratory of Chemical Biology, the Graduate School at Shenzhen, Tsinghua University, Shenzhen Kivita Innovative Drug Discovery Institute, Shenzhen 518055, P. R. China.","Zeng X, Tao L, Zhang P, Qin C, Chen S, He W, Tan Y, Xia Liu H, Yang SY, Chen Z, Jiang YY, Chen YZ",,"Shenzhen Municipal Government, Shenzhen Municipal Government, China Scholarship Council, National Natural Science Foundation of China",1.0,China +28557712,ICE,0.860381097,ICE,0.860381097,Chemical Environment,0.508983597,1,"http://ice.ntp.niehs.nih.gov, http://doi.org/10.1289/EHP1759","302, 301",,"(38.9807,-77.1003), (37.7621,-122.3971)","http://web.archive.org/web/20221103172336/https://ice.ntp.niehs.nih.gov/, no_wayback",2017-05-25,"Integrated Laboratory Systems, Inc. (ILS), Research Triangle Park, North Carolina, USA.","Bell SM, Phillips J, Sedykh A, Tandon A, Sprankle C, Morefield SQ, Shapiro A, Allen D, Shah R, Maull EA, Casey WM, Kleinstreuer NC",,NIEHS NIH HHS,11.0,United States +28701700,HDNetDB,0.996480525,HDNetDB,0.996480525,,0,1,http://hdnetdb.sysbiolab.eu,503,,,http://web.archive.org/web/20221011165634/http://hdnetdb.sysbiolab.eu/,2017-07-12,"SysBioLab, Centre for Biomedical Research (CBMR), University of Algarve, Faro, Portugal. ravikiranreddy.kalathur@unibas.ch.","Kalathur RKR, Pedro Pinto J, Sahoo B, Chaurasia G, Futschik ME",,,9.0,Portugal +28708269,HemeOxDB,0.994890809,HemeOxDB,0.994890809,Heme Oxygenase Database,0.953735838,1,http://www.researchdsf.unict.it/hemeoxdb,301,,"(37.4922,15.0704)",http://web.archive.org/web/20220615152510/http://www.researchdsf.unict.it/hemeoxdb/,2017-08-09,"Department of Drug Sciences, University of Catania, Viale A. Doria 6, 95125, Catania, Italy.","Amata E, Marrazzo A, Dichiara M, Modica MN, Salerno L, Prezzavento O, Nastasi G, Rescifina A, Romeo G, Pittalà V",,Università di Catania,14.0,Italy +28771471,GeOMe,0.991666436,GeOMe,0.991666436,Genomic Observatories Metadatabase,0.737673,1,http://www.geome-db.org,301,,"(45.5946,-121.1787)",http://web.archive.org/web/20221016161411/https://geome-db.org/,2017-08-03,"Berkeley Natural History Museums, University of California, Berkeley, California, United States of America.","Deck J, Gaither MR, Ewing R, Bird CE, Davies N, Meyer C, Riginos C, Toonen RJ, Crandall ED",,,21.0,United States +28967693,HUMA,0.995447814,HUMA,0.995447814,Analysis,0.605870187,1,http://huma.rubi.ru.ac.za,301,,"(-33.3042,26.5328)",http://web.archive.org/web/20220902164113/https://huma.rubi.ru.ac.za/,2017-10-17,"Research Unit in Bioinformatics (RUBi), Department of Biochemistry and Microbiology, Rhodes University, Grahamstown, South Africa.","Brown DK, Tastan Bishop Ö",,"NHGRI NIH HHS, National Institutes of Health, NHGRI NIH HHS",4.0,South Africa +29028885,HoTResDB,0.99197798,HoTResDB,0.99197798,Host Transcriptional Response DataBase,0.976372804,1,http://hotresdb.bu.edu,200,,"(42.3584,-71.0598)",http://web.archive.org/web/20220418003802/http://hotresdb.bu.edu/,2018-01-01,"Bioinformatics Program, Boston University, 24 Cummington Mall, Boston, MA, USA.","Lo J, Zhang D, Speranza E, Negron JA, Connor JH",,"NIAID, NIH, National Institutes of Health, NIAID NIH HHS, NIAID",1.0,United States +29036693,ICG,0.994124234,ICG,0.994124234,,0,1,http://icg.big.ac.cn,301,,"(39.9075,116.3972)",http://web.archive.org/web/20210518233447/https://icg.big.ac.cn/,2018-01-01,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Sang J, Wang Z, Li M, Cao J, Niu G, Xia L, Zou D, Wang F, Xu X, Han X, Fan J, Yang Y, Zuo W, Zhang Y, Zhao W, Bao Y, Xiao J, Hu S, Hao L, Zhang Z",,,24.0,China +29040670,ICTV,0.662099421,ICTV,0.662099421,on Taxonomy of,0.613117501,1,"http://ictv.global, http://ictv.global/report","302, 302",,"(39.0437,-77.4875), (39.0437,-77.4875)","http://web.archive.org/web/20221110113212/https://ictv.global/, http://web.archive.org/web/20220921220752/https://ictv.global/report",2018-01-01,"Department of Microbiology, University of Alabama at Birmingham, Birmingham, AL 35294, USA.","Lefkowitz EJ, Dempsey DM, Hendrickson RC, Orton RJ, Siddell SG, Smith DB",,"Medical Research Council, Wellcome Trust, Medical Research Council",251.0,United States +29041922,iCAN,0.989729762,iCAN,0.989729762,Institute Collection and Analysis of Nanobodies,0.950395688,1,http://ican.ils.seu.edu.cn,403,,,http://web.archive.org/web/20201112230443/http://ican.ils.seu.edu.cn/,2017-10-17,"The Key Laboratory of Developmental Genes and Human Disease, Ministry of Education, Institute of Life Sciences, Southeast University, Nanjing, China.","Zuo J, Li J, Zhang R, Xu L, Chen H, Jia X, Su Z, Zhao L, Huang X, Xie W",,,13.0,China +"29069473, 33170268",GVM,0.997632504,GVM,0.997632504,Genome Variation Map,0.979110241,2,http://bigd.big.ac.cn/gvm,301,,"(39.9075,116.3972)",http://web.archive.org/web/20211020110435/http://bigd.big.ac.cn/gvm/,2021-01-01,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China., China National Center for Bioinformation, Beijing 100101, China.","Song S, Tian D, Li C, Tang B, Dong L, Xiao J, Bao Y, Zhao W, He H, Zhang Z, Li C, Tian D, Tang B, Liu X, Teng X, Zhao W, Zhang Z, Song S",", ",", Youth Innovation Promotion Association of Chinese Academy of Sciences, National Key Research and Development Program of China, Chinese Academy of Sciences, National Key Research and Development Program of China, 13th Five-year Informatization Plan of Chinese Academy of Sciences, National Key Research and Development Program of China, Genomics Data Center Construction of Chinese Academy of Sciences, Chinese Academy of Sciences, International Partnership Program of the Chinese Academy of Sciences, Chinese Academy of Sciences",29.0,"China, China, China" +29088455,HCMDB,0.996219456,HCMDB,0.996219456,human cancer metastasis database,0.887695861,1,http://hcmdb.i-sanger.com/index,200,,"(29.8782,121.5494)",http://web.archive.org/web/20220806090732/https://hcmdb.i-sanger.com/index,2018-01-01,"Shanghai Key Laboratory of Regulatory Biology, Institute of Biomedical Sciences, School of Life Sciences, East China Normal University, Shanghai 200241, China.","Zheng G, Ma Y, Zou Y, Yin A, Li W, Dong D",,,44.0,"China, China" +29315358,Horizontal Transposon Transfer DataBase,0.841958195,HTT-DB,0.735216126,Horizontal Transposon Transfer DataBase,0.841958195,1,"http://lpa.saogabriel.unipampa.edu.br, http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase","HTTPConnectionPool(host='lpa.saogabriel.unipampa.edu.br', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 101] Network is unreachable')), 302",,", ","no_wayback, http://web.archive.org/web/20220814084307/http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase/",2018-01-01,"Campus São Gabriel, Universidade Federal do Pampa, Avenida Antonio Trilha, 1847, São Gabriel, Rio Grande do Sul.","Dotto BR, Carvalho EL, da Silva AF, Dezordi FZ, Pinto PM, Campos TL, Rezende AM, Wallau GDL",,,5.0, +29483591,GourdBase,0.99763602,GourdBase,0.99763602,,0,1,http://www.gourdbase.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220525093216/http://gourdbase.cn/,2018-02-26,"Institute of Vegetables, Zhejiang Academy of Agricultural Sciences, Hangzhou, 310021, PR, China.","Wang Y, Xu P, Wu X, Wu X, Wang B, Huang Y, Hu Y, Lin J, Lu Z, Li G",,,9.0,China +"29532461, 33074547",HpBase,0.998306155,HpBase,0.998306155,,0,2,http://cell-innovation.nig.ac.jp/Hpul,301,,"(35.1167,138.9167)",http://web.archive.org/web/20220521004420/https://cell-innovation.nig.ac.jp/Hpul/,2021-01-01,"Center for Information Biology, National Institute of Genetics, Mishima, Japan., Department of Genomics and Evolutionary Biology, National Institute of Genetics, Shizuoka, Japan. skinjo@nig.ac.jp.","Kinjo S, Kiyomoto M, Yamamoto T, Ikeo K, Yaguchi S, Kinjo S, Kiyomoto M, Yamamoto T, Ikeo K, Yaguchi S",", ","Grant-in-Aid for Scientific Research, Grant-in-Aid for Scientific Research, Joint Usage/Educational Center, Japan Agency for Medical Research and Development, Grant-in-Aid for Young Scientists, Ministry of Education, Culture, Sports, Science and Technology, ",16.0,"Japan, Japan" +29548284,IDPM,0.976840337,IDPM,0.976840337,ion distribution in protein molecules,0.878570855,1,http://liulab.csrc.ac.cn/idpm,301,,"(39.9075,116.3972)",http://web.archive.org/web/20211021022437/http://liulab.csrc.ac.cn/idpm/,2018-03-16,"Complex Systems Division, Beijing Computational Science Research Center, Beijing, 100193, China.","Xiang X, Liu H",,National Natural Science Foundation of China,1.0,China +29649979,Ginseng Genome Database,0.915763418,,0,Ginseng Genome Database,0.915763418,1,http://ginsengdb.snu.ac.kr,"HTTPConnectionPool(host='ginsengdb.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='ginsengdb.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20200603234525/http://ginsengdb.snu.ac.kr/,2018-04-12,"Department of Plant Science, Plant Genomics and Breeding Institute, Research Institute for Agriculture and Life Sciences, College of Agriculture and Life Sciences, Seoul National University, Seoul, 151-921, Republic of Korea.","Jayakodi M, Choi BS, Lee SC, Kim NH, Park JY, Jang W, Lakshmanan M, Mohan SVG, Lee DY, Yang TJ",,Next-Generation BioGreen21 Program,22.0, +29760467,HOMD,0.944250524,HOMD,0.944250524,Human Oral Microbiome Database,0.942544252,1,http://www.sklod.org/ombc,301,,"(22.2783,114.1747)",no_wayback,2018-05-03,"State Key Laboratory of Oral Diseases, National Clinical Research Center for Oral Diseases, West China Hospital of Stomatology, Sichuan University, Chengdu, China.","Xian P, Xuedong Z, Xin X, Yuqing L, Yan L, Jiyao L, Xiaoquan S, Shi H, Jian X, Ga L",,,10.0,"China, China" +29796383,HaloDom,0.946077734,HaloDom,0.946077734,,0,1,http://www.halodom.bio.auth.gr,301,,"(40.6436,22.9309)",http://web.archive.org/web/20220621184420/http://halodom.bio.auth.gr/,2018-01-15,"Department of Genetics, Development & Molecular Biology, School of Biology, Aristotle University of Thessaloniki, 54124 Thessaloniki, Greece.","Loukas A, Kappas I, Abatzopoulos TJ",,,7.0,Greece +30032758,HFMDB,0.992333651,HFMDB,0.992333651,human fecal metabolome database,0.964461486,1,http://www.fecalmetabolome.ca,"HTTPConnectionPool(host='www.fecalmetabolome.ca', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.fecalmetabolome.ca timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221017131338/https://fecalmetabolome.ca/,2018-05-12,"Department of Biological Sciences, University of Alberta, Edmonton, AB, Canada. Electronic address: n.karu@lacdr.leidenuniv.nl.","Karu N, Deng L, Slae M, Guo AC, Sajed T, Huynh H, Wine E, Wishart DS",,"University of Calgary, The Canadian Institutes of Health Research, Western Economic Diversification, Shanghai Jiao Tong University, Alberta Innovates - Health Solutions, School of Medicine",65.0,Canada +30053237,HDncRNA,0.996445835,HDncRNA,0.996445835,Heart Disease-related Non-coding RNAs Database,0.990663501,1,http://hdncrna.cardiacdev.com,200,,"(39.0997,-94.5786)",no_wayback,2018-01-01,"Key Laboratory of Arrhythmias, Ministry of Education, Tongji University School of Medicine, No. 150, Jimo Road, Pudong New District, Shanghai, China.","Wang WJ, Wang YM, Hu Y, Lin Q, Chen R, Liu H, Cao WZ, Zhu HF, Tong C, Li L, Peng LY",,"National Natural Science Foundation of China, Shanghai Committee of Science and Technology, Shanghai Committee of Science and Technology, Fund for Subject Pi lot Program of Tongji University to Luying Peng, Fund of the Key Laboratory of Regenerative Biology of Chinese Academy of Science, Students Innovation Training Program",7.0,China +30066211,HAMdb,0.991403401,HAMdb,0.991403401,Human Autophagy Modulator Database,0.978727545,1,http://hamdb.scbdd.com,200,,"(30.2936,120.1614)",http://web.archive.org/web/20220617210101/http://hamdb.scbdd.com/,2018-07-31,"Xiangya School of Pharmaceutical Sciences, Central South University, No. 172, Tongzipo Road, Yuelu District, Changsha, People's Republic of China.","Wang NN, Dong J, Zhang L, Ouyang D, Cheng Y, Chen AF, Lu AP, Cao DS",,"National Key Basic Research Program, National Natural Science Foundation of China, National Natural Science Foundation of China",13.0,China +30196115,HeteroMeth,0.987609446,HeteroMeth,0.987609446,,0,1,http://qianlab.genetics.ac.cn/HeteroMeth,"HTTPConnectionPool(host='qianlab.genetics.ac.cn', port=80): Max retries exceeded with url: /HeteroMeth (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200605195901/http://qianlab.genetics.ac.cn/HeteroMeth/,2018-08-01,"State Key Laboratory of Plant Genomics, Institute of Genetics and Developmental Biology, Chinese Academy of Sciences, Beijing 100101, China; Key Laboratory of Genetic Network Biology, Institute of Genetics and Developmental Biology, Chinese Academy of Sciences, Beijing 100101, China.","Huan Q, Zhang Y, Wu S, Qian W",,Chinese Academy of Sciences,8.0,"China, China" +30247654,HACER,0.997927487,HACER,0.997927487,,0,1,http://bioinfo.vanderbilt.edu/AE/HACER,"HTTPConnectionPool(host='bioinfo.vanderbilt.edu', port=80): Max retries exceeded with url: /AE/HACER (Caused by ConnectTimeoutError(, 'Connection to bioinfo.vanderbilt.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220321010455/http://bioinfo.vanderbilt.edu/AE/HACER/,2019-01-01,"Center for Quantitative Sciences, Vanderbilt University Medical Center, Nashville, TN, USA.","Wang J, Dai X, Berry LD, Cogan JD, Liu Q, Shyr Y",,"NCI NIH HHS, National Cancer Institute, National Cancer Center, NCI NIH HHS",34.0,United States +30266410,HCCDB,0.998168528,HCCDB,0.998168528,Carcinoma,0.526366234,1,http://lifeome.net/database/hccdb,301,,"(39.9075,116.3972)",no_wayback,2018-08-01,"MOE Key Laboratory of Bioinformatics, Beijing National Research Center for Information Science and Technology, Bioinformatics Division, Department of Automation, Tsinghua University, Beijing 100084, China.","Lian Q, Wang S, Zhang G, Wang D, Luo G, Tang J, Chen L, Gu J",,"National Natural Science Foundation of China, Tsinghua University Initiative Scientific Research Program, National Natural Science Foundation of China, National Natural Science Foundation of China",80.0,China +30357361,Glycosciences.DB,0.974128564,Glycosciences.DB,0.974128564,,0,1,http://www.glycosciences.de/database,301,,"(50.5873,8.6755)",http://web.archive.org/web/20221019092346/http://www.glycosciences.de/database/,2019-01-01,"Institute of Veterinary Physiology and Biochemistry, Justus-Liebig University Giessen, Frankfurter Str. 100, 35392 Giessen, Germany.","Böhm M, Bohne-Lang A, Frank M, Loss A, Rojas-Macias MA, Lütteke T",,,25.0,Germany +30364992,GPs,0.745130181,GPs,0.745130181,,0,1,http://www.ebi.ac.uk/interpro/genomeproperties,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20221017010555/http://www.ebi.ac.uk/interpro/genomeproperties/,2019-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Richardson LJ, Rawlings ND, Salazar GA, Almeida A, Haft DR, Ducq G, Sutton GG, Finn RD",,"National Science Foundation, Biotechnology and Biological Sciences Research Council",10.0, +30371881,iDog,0.997192383,iDog,0.997192383,,0,1,http://bigd.big.ac.cn/idog,301,,"(39.9075,116.3972)",http://web.archive.org/web/20211022024723/http://bigd.big.ac.cn/idog/,2019-01-01,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Tang B, Zhou Q, Dong L, Li W, Zhang X, Lan L, Zhai S, Xiao J, Zhang Z, Bao Y, Zhang YP, Wang GD, Zhao W",,"Chinese Academy of Sciences, National Key Research Program of China, National Key R&D Program of China, National Natural Science Foundation of China, International Partnership Program of the Chinese Academy of Sciences, 13th Five-year Informatization Plan of Chinese Academy of Sciences, National Natural Science Foundation of China",14.0,China +30371888,HmtVar,0.997302175,HmtVar,0.997302175,,0,1,http://www.hmtvar.uniba.it,302,,"(44.4938,11.3387)",http://web.archive.org/web/20220328115645/https://www.hmtvar.uniba.it/,2019-01-01,"Department of Biosciences, Biotechnology and Biopharmaceutics, University of Bari, Bari 70126, Italy.","Preste R, Vitale O, Clima R, Gasparre G, Attimonelli M",,"Worldwide Cancer Research, DHOMOS Worldwide Cancer Research",25.0,Italy +30380109,iEKPD,0.997320652,iEKPD,0.997320652,,0,1,http://iekpd.biocuckoo.org,200,,"(40.2338,-111.6585)",http://web.archive.org/web/20220815015603/http://iekpd.biocuckoo.org/,2019-01-01,"Department of Bioinformatics & Systems Biology, Key Laboratory of Molecular Biophysics of Ministry of Education, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan 430074, China.","Guo Y, Peng D, Zhou J, Lin S, Wang C, Ning W, Xu H, Deng W, Xue Y",,"National Key R&D Program, Natural Science Foundation of China, China Postdoctoral Science Foundation, Natural Science Foundation of China, Natural Science Foundation of China, Fundamental Research Funds for the Central Universities",10.0,China +30395284,Haemopedia,0.996026993,Haemopedia,0.996026993,,0,1,http://www.haemosphere.org,301,,"(-37.7987,144.9469)",http://web.archive.org/web/20220802130406/https://www.haemosphere.org/,2019-01-01,"Molecular Medicine Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, Victoria, Australia.","Choi J, Baldwin TM, Wong M, Bolden JE, Fairfax KA, Lucas EC, Cole R, Biben C, Morgan C, Ramsay KA, Ng AP, Kauppi M, Corcoran LM, Shi W, Wilson N, Wilson MJ, Alexander WS, Hilton DJ, de Graaf CA",,"National Health and Medical Research Council, National Health and Medical Research Council, National Health and Medical Research Council, National Health and Medical Research Council, National Health and Medical Research Council",41.0,Australia +30418591,HumanNet,0.986103296,HumanNet,0.986103296,,0,1,http://www.inetbio.org/humannet,301,,"(37.5598,126.9439)",http://web.archive.org/web/20221017005156/http://www.inetbio.org/humannet/,2019-01-01,"Department of Biotechnology, College of Life Science and Biotechnology, Yonsei University, Seoul 03722, Korea.","Hwang S, Kim CY, Yang S, Kim E, Hart T, Marcotte EM, Lee I",,"Welch Foundation, National Research Foundation of Korea, National Research Foundation of Korea, National Institutes of Health, National Institutes of Health, National Research Foundation of Korea, NCI NIH HHS, NIGMS NIH HHS, Cancer Prevention and Research Institute of Texas, NIGMS NIH HHS, NIGMS NIH HHS, NIDDK NIH HHS",45.0, +30445434,GWAS,0.68312782,GWAS,0.68312782,,0,1,http://www.ebi.ac.uk/gwas,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20220921235758/https://www.ebi.ac.uk/gwas/,2019-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Buniello A, MacArthur JAL, Cerezo M, Harris LW, Hayhurst J, Malangone C, McMahon A, Morales J, Mountjoy E, Sollis E, Suveges D, Vrousgou O, Whetzel PL, Amode R, Guillen JA, Riat HS, Trevanion SJ, Hall P, Junkins H, Flicek P, Burdett T, Hindorff LA, Cunningham F, Parkinson H",,"National Institutes of Health, NHGRI NIH HHS",932.0, +30703169,HuVarBase,0.980674505,HuVarBase,0.980674505,,0,1,http://www.iitm.ac.in/bioinfo/huvarbase,302,,"(13.0156,80.2467)",http://web.archive.org/web/20221008035513/https://www.iitm.ac.in/bioinfo/huvarbase/,2019-01-31,"Department of Biotechnology, Bhupat and Jyoti Mehta School of BioSciences, Indian Institute of Technology Madras, Chennai, Tamilnadu, India.","Ganesan K, Kulandaisamy A, Binny Priya S, Gromiha MM",,"Department of Biotechnology, Ministry of Science and Technology",5.0,India +30967549,iFISH,0.997145891,iFISH,0.997145891,,0,1,http://ifish4u.org,200,,"(41.8919,12.5113)",http://web.archive.org/web/20221105131810/http://www.ifish4u.org/,2019-04-09,,,,,0.0, +30999860,HumCFS,0.988169968,HumCFS,0.988169968,,0,1,http://webs.iiitd.edu.in/raghava/humcfs,301,,"(28.6453,77.2128)",http://web.archive.org/web/20210927044030/https://webs.iiitd.edu.in/raghava/humcfs/,2019-04-18,"Center for Computational Biology, Indraprastha Institute of Information Technology, New Delhi, 110020, India.","Kumar R, Nagpal G, Kumar V, Usmani SS, Agrawal P, Raghava GPS",,"Science and Engineering Research Board (IN), JC Bose fellowship",23.0,India +31139565,HNCDB,0.998145655,HNCDB,0.998145655,Head and Neck Cancer Database,0.987272012,1,http://hncdb.cancerbio.info,200,,"(22.2783,114.1747)",http://web.archive.org/web/20220806104505/http://hncdb.cancerbio.info/,2019-05-14,"Key Laboratory of Oral Medicine, Guangzhou Institute of Oral Disease, Stomatology Hospital of Guangzhou Medical University, Guangzhou, China.","Zhang Q, Li X, Su X, Zhang H, Wang H, Yin S, Pei X, Yang A, Zuo Z",,,1.0,China +31210272,GrainGenes,0.994854987,GrainGenes,0.994854987,Genome Specific Primers,0.700459212,1,"http://wheat.pw.usda.gov, http://graingenes.org","HTTPConnectionPool(host='wheat.pw.usda.gov', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to wheat.pw.usda.gov timed out. (connect timeout=5)')), 301",,", (42.4406,-76.4966)","http://web.archive.org/web/20220901081907/http://wheat.pw.usda.gov/, http://web.archive.org/web/20221006220341/https://graingenes.org/",2019-01-01,"Western Regional Research Center, Crop Improvement and Genetics Research Unit, United States Department of Agriculture-Agricultural Research Service, Albany, CA, USA.","Blake VC, Woodhouse MR, Lazo GR, Odell SG, Wight CP, Tinker NA, Wang Y, Gu YQ, Birkett CL, Jannink JL, Matthews DE, Hane DL, Michel SL, Yao E, Sen TZ",,,23.0,"United States, United States" +31296229,GENT2,0.991305053,GENT2,0.991305053,Gene Expression database of Normal and Tumor tissues 2,0.9513432,1,http://gent2.appex.kr,200,,"(36.3631,127.3729)",http://web.archive.org/web/20220516152316/http://gent2.appex.kr/,2019-07-11,"Genome Editing Research Center, Korea Research Institute of Bioscience and Biotechnology (KRIBB), Daejeon, 34141, Korea.","Park SJ, Yoon BH, Kim SK, Kim SY",,,62.0, +31504765,GMrepo,0.998026729,GMrepo,0.998026729,data repository for Gut Microbiota,0.834152177,1,http://gmrepo.humangut.info,301,,"(39.9075,116.3972)",http://web.archive.org/web/20221107201145/https://gmrepo.humangut.info/,2020-01-01,"Key Laboratory of Molecular Biophysics of the Ministry of Education, Hubei Key Laboratory of Bioinformatics and Molecular-imaging, Department of Bioinformatics and Systems Biology, College of Life Science and Technology, Huazhong University of Science and Technology, 430074 Wuhan, Hubei, China.","Wu S, Sun C, Li Y, Wang T, Jia L, Lai S, Yang Y, Luo P, Dai D, Yang YQ, Luo Q, Gao NL, Ning K, He LJ, Zhao XM, Chen WH",,"National Key Research and Development Program of China, National Natural Science Foundation of China, Shanghai Municipal Science and Technology Major Project, National Key Research and Development Program of China, Natural Science Foundation of Shanghai, National Natural Science Foundation of China, National Natural Science Foundation of China",30.0,China +31509535,GutFeelingKB,0.991220474,GutFeelingKB,0.991220474,,0,1,http://hive.biochemistry.gwu.edu/gfkb,301,,"(38.8951,-77.0364)",http://web.archive.org/web/20220412032338/https://hive.biochemistry.gwu.edu/gfkb,2019-09-11,"The Department of Biochemistry & Molecular Medicine, School of Medicine and Health Sciences, George Washington University Medical Center, Washington, DC, United States of America.","King CH, Desai H, Sylvetsky AC, LoTempio J, Ayanyan S, Carrie J, Crandall KA, Fochtman BC, Gasparyan L, Gulzar N, Howell P, Issa N, Krampis K, Mishra L, Morizono H, Pisegna JR, Rao S, Ren Y, Simonyan V, Smith K, VedBrat S, Yao MD, Mazumder R",,"NIAAA NIH HHS, NCATS NIH HHS, National Science Foundation, NCI NIH HHS, BLRD VA, NCI NIH HHS",32.0,United States +31524396,HybridMolDB,0.995581388,HybridMolDB,0.995581388,,0,1,http://www.idruglab.com/HybridMolDB/index.php,301,,"(22.5455,114.0683)",no_wayback,2019-09-25,"Joint International Research Laboratory of Synthetic Biology and Medicine, Guangdong Provincial Engineering and Technology Research Center of Biopharmaceuticals, School of Biology and Biological Engineering , South China University of Technology , Guangzhou 510006 , China.","Li Y, Zhao C, Zhang J, Zhai S, Wei B, Wang L",,"National Natural Science Foundation of China, Science and Technology Program of Guangzhou, Medical Scientific Research Foundation of Guangdong Province, Medical Scientific Research Foundation of Guangdong Province, Natural Science Foundation of Guangdong Province, National Natural Science Foundation of China, Ministry of Education of the People's Republic of China",1.0,"China, China" +31566222,GWAS Atlas,0.990907868,GWAS Atlas,0.990907868,,0,1,http://bigd.big.ac.cn/gwas,301,,"(39.9075,116.3972)",no_wayback,2020-01-01,"National Genomics Data Center, Beijing 100101, China.","Tian D, Wang P, Tang B, Teng X, Li C, Liu X, Zou D, Song S, Zhang Z",,"The Youth Innovation Promotion Association of Chinese Academy of Sciences, National Natural Science Foundation of China, Chinese Academy of Sciences, CAS, National Key Research and Development Program of China, K.C. Wong Education Foundation, CAS, National Natural Science Foundation of China, National Natural Science Foundation of China, Chinese Academy of Sciences, National Natural Science Foundation of China, The 100 Talent Program of the Chinese Academy of Sciences",27.0,China +31584099,gutMDisorder,0.997505665,gutMDisorder,0.997505665,,0,1,http://bio-annotation.cn/gutMDisorder,302,,"(39.9906,116.2887)",http://web.archive.org/web/20220412032942/http://bio-annotation.cn/gutMDisorder/,2020-01-01,"NHC and CAMS Key Laboratory of Molecular Probe and Targeted Theranostics, Harbin Medical University, Harbin, Heilongjiang, China, 150028.","Cheng L, Qi C, Zhuang H, Fu T, Zhang X",,"The Tou-Yan Innovation Team Program of the Heilongjiang Province, China Postdoctoral Science Foundation, National Natural Science Foundation of China, Heilongjiang Province Postdoctoral Fund, China Postdoctoral Science Foundation, Heilongjiang Province Postdoctoral Fund",63.0,China +31600197,iCite,0.957005501,iCite,0.957005501,Open Citation Collection,0.840594471,1,http://icite.od.nih.gov,301,,"(45.8399,-119.7006)",http://web.archive.org/web/20221108102441/https://icite.od.nih.gov/,2019-10-10,"Office of Portfolio Analysis, Division of Program Coordination, Planning, and Strategic Initiatives, Office of the Director, National Institutes of Health, Bethesda, Maryland, United States of America.","Hutchins BI, Baker KL, Davis MT, Diwersy MA, Haque E, Harriman RM, Hoppe TA, Leicht SA, Meyer P, Santangelo GM",,Intramural NIH HHS,20.0,United States +31630971,GESUR,0.972582638,GESUR,0.972582638,,0,1,http://gesur.cancer-pku.cn,200,,"(36.0649,120.3804)",no_wayback,2019-09-25,"School of Life Sciences and BIOPIC, Peking University, Beijing, 100871, China. Electronic address: tangzefang@pku.edu.cn.","Tang Z, Chen T, Ren X, Zhang Z",,"Peking University, Key Technologies R&D Program, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",0.0,China +31725860,HisgAtlas,0.997663379,HisgAtlas,0.997663379,,0,1,http://biokb.ncpsb.org/HisgAtlas,301,,"(22.2783,114.1747)",no_wayback,2017-01-01,"State Key Laboratory of Proteomics, Beijing Proteome Research Center, National Center for Protein Sciences, Beijing Institute of Lifeomics, Beijing 102206, China.","Liu Y, He M, Wang D, Diao L, Diao L, Liu J, Tang L, Guo S, He F, Li D",,Beijing Nova Program,8.0,China +31725863,GRONS,0.979251385,GRONS,0.979251385,Genetic Resources Of Nicotine and Smoking,0.928520481,1,http://bioinfo.tmu.edu.cn/GRONS,"HTTPConnectionPool(host='bioinfo.tmu.edu.cn', port=80): Max retries exceeded with url: /GRONS (Caused by ConnectTimeoutError(, 'Connection to bioinfo.tmu.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2017-01-01,"School of Biomedical Engineering, Tianjin Medical University, Tianjin 300070, China.","Fang Z, Yang Y, Hu Y, Li MD, Wang J",,National Natural Science Foundation of China,0.0,China +31783725,HKPocket,0.97285378,HKPocket,0.97285378,Human Kinase Pocket,0.730029374,1,http://zhaoserver.com.cn/HKPocket/HKPocket.html,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220621024233/http://zhaoserver.com.cn/HKPocket/HKPocket.html,2019-11-29,"Department of Physics, Central China Normal University, Wuhan, 430079, China.","Wang H, Qiu J, Liu H, Xu Y, Jia Y, Zhao Y",,"National Natural Science Foundation of China, National Natural Science Foundation of China, Natural Science Foundation of Hubei, self-determined research funds of CCNU from the colleges' basic research and operation of MOE, self-determined research funds of CCNU from the colleges’ basic research and operation of MOE",2.0,"China, China" +31811943,GliomaDB,0.996975422,GliomaDB,0.996975422,,0,1,http://bigd.big.ac.cn/gliomaDB,301,,"(39.9075,116.3972)",http://web.archive.org/web/20210615075725/https://bigd.big.ac.cn/gliomaDB/,2019-08-01,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China; University of Chinese Academy of Sciences, Beijing 100049, China.","Yang Y, Sui Y, Xie B, Qu H, Fang X",,"National Key R&D Program of China, National Key R&D Program of China, National Key R&D Program of China, National Key R&D Program of China, Chinese Academy of Sciences, National Key R&D Program of China",5.0,"China, China" +31841142,GlyMDB,0.990882347,GlyMDB,0.990882347,Glycan Microarray Database,0.747555542,1,http://www.glycanstructure.org/glymdb,301,,"(40.6259,-75.3705)",http://web.archive.org/web/20220616031808/http://www.glycanstructure.org/glymdb/,2020-04-01,"Departments of Biological Sciences and Bioengineering, Lehigh University, Bethlehem, PA 18015, USA.","Cao Y, Park SJ, Mehta AY, Cummings RD, Im W",,"NIGMS NIH HHS, National Institutes of Health, National Science Foundation, National Institutes of Health, NIGMS NIH HHS",4.0,United States +31976536,HBDB,0.696552753,HBDB,0.696552753,Human Breathomics Database,0.63233763,1,http://hbdb.cmdm.tw,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20220518132352/https://hbdb.cmdm.tw/,2020-01-01,"Graduate Institute of Biomedical Electronics and Bioinformatics, National Taiwan University, No. 1, Sec. 4, Roosevelt Road, Taipei 10617, Taiwan.","Kuo TC, Tan CE, Wang SY, Lin OA, Su BH, Hsu MT, Lin J, Cheng YY, Chen CS, Yang YC, Chen KH, Lin SW, Ho CC, Kuo CH, Tseng YJ",,"Taiwan Ministry of Science and Technology, Taiwan Ministry of Science and Technology, Taiwan Ministry of Science and Technology, National Taiwan University, National Taiwan University, Taiwan Ministry of Science and Technology",7.0, +32055858,GREG,0.972979486,GREG,0.972979486,The Gene Regulation Graph Database,0.943068614,1,http://mora-lab.github.io/projects/greg.html,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20201014233624/https://mora-lab.github.io/projects/greg.html,2020-01-01,"School of Basic Medical Sciences, Guangzhou Medical University, Panyu Campus of Guangzhou Medical University, Xinzao, 511436 Guangzhou, P.R. China.","Mei S, Huang X, Xie C, Mora A",,"Joint School of Life Sciences, Guangzhou Medical University, Chinese Academy of Sciences",1.0,China +32090260,HSPMdb,0.998334646,HSPMdb,0.998334646,,0,1,http://bioinfo.imtech.res.in/bvs/hspmdb/index.php,404,,,no_wayback,2020-01-01,"Council of Scientific and Industrial Research-Institute of Microbial Technology, Sector 39A, Chandigarh-160036, India.","Singh P, Unik B, Puri A, Nagpal G, Singh B, Gautam A, Sharma D",,"Science and Engineering Research Board, Council of Scientific and Industrial Research",0.0,India +32163115,GRALL,0.987117052,GRALL,0.987117052,The Glycine Receptor Allosteric Ligands Library,0.934507086,1,http://ifm.chimie.unistra.fr/grall,301,,"(48.5839,7.7455)",no_wayback,2020-06-01,"Institut de Chimie de Strasbourg, UMR7177, CNRS, Université de Strasbourg, F-67083 Strasbourg Cedex, France.","Cerdan AH, Sisquellas M, Pereira G, Barreto Gomes DE, Changeux JP, Cecchini M",,"European Union’s Horizon 2020 Framework Program for Research and Innovation, Ecole Doctorale des Sciences Chimiques, French National Research Agency",1.0,France +32179762,Hepamine,0.994047046,Hepamine,0.994047046,,0,1,http://www.hepamine.de,200,,"(50.1155,8.6842)",http://web.archive.org/web/20190128014601/http://www.hepamine.de:80/,2020-03-16,"Division of Hepatology, Department of Medicine II, Medical Faculty Mannheim, Heidelberg University, Mannheim, Germany.","Itzel T, Neubauer M, Ebert M, Evert M, Teufel A",,,2.0,Germany +32315389,HotSpot3D,0.995569845,HotSpot3D,0.995569845,,0,1,http://niulab.scgrid.cn/HotSpot3D,302,,"(39.9075,116.3972)",http://web.archive.org/web/20220522034702/http://niulab.scgrid.cn/HotSpot3D/,2020-06-01,"Computer Network Information Center, Chinese Academy of Sciences.","Chen S, He X, Li R, Duan X, Niu B",,"Transformation Project in Scientific and Technological Achievements, National Natural Science Foundation of China",2.0, +32330167,geoBoundaries,0.997810702,geoBoundaries,0.997810702,geoBoundaries Global Administrative Database,0.939703067,1,http://www.geoboundaries.org,301,,"(38.9209,-77.5039)",no_wayback,2020-04-24,"Department of Applied Science, William & Mary, Williamsburg, Virginia, United States of America.","Runfola D, Anderson A, Baier H, Crittenden M, Dowker E, Fuhrig S, Goodman S, Grimsley G, Layko R, Melville G, Mulder M, Oberman R, Panganiban J, Peck A, Seitz L, Shea S, Slevin H, Youngerman R, Hobbs L",,Thomas F. and Kate Miller Jeffress Memorial Trust,12.0,United States +32422927,HDVdb,0.997277975,HDVdb,0.997277975,,0,1,http://hdvdb.bio.wzw.tum.de,301,,"(48.4035,11.7488)",no_wayback,2020-05-14,"Department of Bioinformatics, Wissenschaftszentrum Weihenstephan, Technische Universität München, 85354 Freising, Germany.","Usman Z, Velkov S, Protzer U, Roggendorf M, Frishman D, Karimzadeh H",,,6.0,Germany +32496513,gutMEGA,0.989511013,gutMEGA,0.989511013,gut MEtaGenome Atlas,0.956748178,1,http://gutmega.omicsbio.info,200,,"(22.2783,114.1747)",http://web.archive.org/web/20220329135136/http://gutmega.omicsbio.info/,2021-05-01,None,"Zhang Q, Yu K, Li S, Zhang X, Zhao Q, Zhao X, Liu Z, Cheng H, Liu ZX, Li X",,"Guangdong Introducing Innovative and Entrepreneurial Teams, Scientific and Technical Innovative Youth Talents of Guangdong, National Natural Science Foundation of China",8.0, +32510565,GreenCircRNA,0.994266331,GreenCircRNA,0.994266331,,0,1,http://greencirc.cn,301,,"(22.5455,114.0683)",http://web.archive.org/web/20220615235259/http://greencirc.cn/,2020-01-01,"College of Life Sciences, Shaanxi Normal University, West Chang'an Street, Xi'an 710062, China.","Zhang J, Hao Z, Yin S, Li G",,"Fundamental Research Funds for the Central Universities, National Science Foundation of China, National Science Foundation of China, National Science Foundation of China, Program for New Century Excellent Talents in University",4.0,China +32576192,GPSno,0.970677733,GPSno,0.970677733,,0,1,http://hanlab.uth.edu/GPSno,404,,,http://web.archive.org/web/20220617055850/https://hanlab.uth.edu/GPSno/,2020-06-23,"Department of Biochemistry and Molecular Biology, McGovern Medical School at The University of Texas Health Science Center at Houston, Houston, TX, 77030, USA.","Liu Y, Ruan H, Li S, Ye Y, Hong W, Gong J, Zhang Z, Jing Y, Zhang X, Diao L, Han L",,"Cancer Prevention and Research Institute of Texas, Cancer Prevention and Research Institute of Texas",4.0,United States +32661237,GlobalFungi,0.989663839,GlobalFungi,0.989663839,,0,1,http://globalfungi.com,301,,"(50.0408,15.7766)",http://web.archive.org/web/20220829005950/https://globalfungi.com/,2020-07-13,"Institute of Microbiology of the Czech Academy of Sciences, Vídeňská 1083, 14220, Praha 4, Czech Republic.","Větrovský T, Morais D, Kohout P, Lepinay C, Algora C, Awokunle Hollá S, Bahnmann BD, Bílohnědá K, Brabcová V, D'Alò F, Human ZR, Jomura M, Kolařík M, Kvasničková J, Lladó S, López-Mondéjar R, Martinović T, Mašínová T, MeszároÅ¡ová L, Michalčíková L, Michalová T, Mundra S, Navrátilová D, Odriozola I, Piché-Choquette S, Å tursová M, Å vec K, Tláskal V, Urbanová M, Vlk L, Voříšková J, Žifčáková L, Baldrian P",,"Ministerstvo Å kolství, Mládeže a Tělovýchovy (Ministry of Education, Youth and Sports), Ministerstvo Å kolství, Mládeže a Tělovýchovy, Ministerstvo Å kolství, Mládeže a Tělovýchovy",30.0, +32707486,hPSCreg,0.988741887,hPSCreg,0.988741887,Human Pluripotent Stem Cell Registry,0.973995652,1,http://hpscreg.eu,301,,"(48.1374,11.5755)",http://web.archive.org/web/20221103040005/https://hpscreg.eu/,2020-06-27,"Berlin-Brandenburger Centrum für Regenerative Therapien (BCRT), Charité - Universitätsmedizin Berlin, Berlin, Germany. Electronic address: nancy.mah@ibmt.fraunhofer.de.","Mah N, Seltmann S, Aran B, Steeg R, Dewender J, Bultjer N, Veiga A, Stacey GN, Kurtz A",,"European Union’s Horizon 2020 Research and Innovation Programme, Innovative Medicines Initiative",3.0,Germany +32758136,GSDB,0.924460858,GSDB,0.924460858,Genome Structure Database,0.767952025,1,http://sysbio.rnet.missouri.edu/3dgenome/GSDB,404,,,http://web.archive.org/web/20211028042751/http://sysbio.rnet.missouri.edu/3dgenome/GSDB/,2020-08-05,"Department of Computer Science, University of Colorado, Colorado Springs, CO, 80918, USA.","Oluwadare O, Highsmith M, Turner D, Lieberman Aiden E, Cheng J",,National Science Foundation,4.0,United States +32858223,hTFtarget,0.996637821,hTFtarget,0.996637821,,0,1,http://bioinfo.life.hust.edu.cn/hTFtarget,200,,"(31.2222,121.4581)",http://web.archive.org/web/20221101234659/http://bioinfo.life.hust.edu.cn/hTFtarget,2020-04-01,"Department of Bioinformatics and Systems Biology, Key Laboratory of Molecular Biophysics of the Ministry of Education, Hubei Bioinformatics and Molecular Imaging Key Laboratory, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan 430074, China.","Zhang Q, Liu W, Zhang HM, Xie GY, Miao YR, Xia M, Guo AY",,"National Natural Science Foundation of China, China Postdoctoral Science Foundation, National Natural Science Foundation of China, National Key R&D Program of China, National Natural Science Foundation of China",25.0,China +32941628,IDDB,0.976911902,IDDB,0.976911902,infertility disease database,0.88215218,1,http://mdl.shsmu.edu.cn/IDDB,302,,"(31.2222,121.4581)",http://web.archive.org/web/20221102055810/http://mdl.shsmu.edu.cn/IDDB/,2021-01-01,"Department of Assisted Reproduction, Shanghai Ninth People's Hospital, Shanghai Jiao Tong University School of Medicine (SJTU-SM), Shanghai 200011, China.","Wu J, Li D, Liu X, Li Q, He X, Wei J, Li X, Li M, Rehman AU, Xia Y, Wu C, Zhang J, Lu X",,"National Natural Science Foundation of China, Shanghai Science and Technology Innovation, Two-hundred Talent, National Natural Science Foundation of China, Chinese National Precise Medical Research, Shanghai Health and Family Planning System Excellent Subject Leader and Excellent Young Medical Talents Training Program, Shanghai Health and Family Planning Commission, Shanghai Health and Family Planning Commission, Clinical Rese Clinical Research Program of 9th People's Hospital, Key New Drug Creation and Manufacturing Program, Shanghai Municipal Education Commission, National Natural Science Foundation of China, National Natural Science Foundation of China, Shanghai Health and Family Planning System Excellent Subject Leader and Excellent Young Medical Talents Training Program, National Natural Science Foundation of China",6.0,China +33045729,GIMICA,0.988819897,GIMICA,0.988819897,Host Genetic and Immune Factors Shaping Human Microbiota,0.927597477,1,http://idrblab.org/gimica,301,,"(37.3394,-121.8950)",http://web.archive.org/web/20220804170537/https://idrblab.org/gimica/,2021-01-01,"College of Pharmaceutical Sciences, Zhejiang University, Hangzhou 310058, China.","Tang J, Wu X, Mou M, Wang C, Wang L, Li F, Guo M, Yin J, Xie W, Wang X, Wang Y, Ding Y, Xue W, Zhu F",,"Fundamental Research Funds for the Central Universities, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities, Key R&D Program of Zhejiang Province, Fundamental Research Funds for the Central Universities, Zhejiang University, National Key Research and Development Program of China, Fundamental Research Funds for the Central Universities, Fundamental Research Funds for the Central Universities, China Knowledge Centre for Engineering Sciences and Technology, National Natural Science Foundation of China, Technology Innovation and Application Demonstration Project of Chongqing",10.0,China +33119754,HeRA,0.993696928,HeRA,0.993696928,Human enhancer RNA Atlas,0.875278735,1,http://hanlab.uth.edu/HeRA,404,,,http://web.archive.org/web/20221011090418/https://hanlab.uth.edu/HeRA/,2021-01-01,"Department of Biochemistry and Molecular Biology, McGovern Medical School at The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Zhang Z, Hong W, Ruan H, Jing Y, Li S, Liu Y, Wang J, Li W, Diao L, Han L",,"NIDCR NIH HHS, NHLBI NIH HHS, NIGMS NIH HHS, Cancer Prevention Research Institute of Texas, NIDCR NIH HHS, National Institutes of Health, NIGMS NIH HHS, NIDCR NIH HHS, NHLBI NIH HHS, Cancer Prevention Research Institute of Texas, National Institutes of Health, Cancer Prevention Research Institute of Texas, National Institutes of Health, Cancer Prevention Research Training Program, National Institutes of Health, National Institutes of Health, National Institutes of Health",6.0,United States +33125055,HbVar,0.997882962,HbVar,0.997882962,,0,1,http://globin.bx.psu.edu/hbvar,301,,"(40.7934,-77.8600)",http://web.archive.org/web/20220808182103/https://globin.bx.psu.edu/hbvar/,2021-01-01,"The Pennsylvania State University, Center for Computational Biology and Bioinformatics, University Park, PA, USA.","Giardine BM, Joly P, Pissard S, Wajcman H, K Chui DH, Hardison RC, Patrinos GP",,"European Commission, United States Public Health Service, European Commission, European Commission, Golden Helix Foundation, NIGMS NIH HHS, NIDDK NIH HHS, United States Public Health Service",2.0,United States +33136065,HPREP,0.984554529,HPREP,0.984554529,,0,1,http://bioinfo.bdu.ac.in/hprep,301,,"(10.8155,78.6965)",http://web.archive.org/web/20221016232130/http://bioinfo.bdu.ac.in/hprep/,2020-11-03,"Department of Bioinformatics, School of Life Sciences, Bharathidasan University, Tiruchirappalli 620 024, India.","Mary Rajathei D, Parthasarathy S, Selvaraj S",,University Grants Commission,0.0,India +33137185,iCSDB,0.994947433,iCSDB,0.994947433,,0,1,http://www.kobic.re.kr/icsdb,"HTTPConnectionPool(host='www.kobic.re.kr', port=80): Max retries exceeded with url: /icsdb (Caused by ConnectTimeoutError(, 'Connection to www.kobic.re.kr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220728155646/https://www.kobic.re.kr/icsdb/,2021-01-01,"Department of Bio-Information Science, Ewha Womans University, Seoul 03760, Republic of Korea.","Choi A, Jang I, Han H, Kim MS, Choi J, Lee J, Cho SY, Jun Y, Lee C, Kim J, Lee B, Lee S",,"National Research Foundation of Korea, National Research Foundation of Korea, National Research Foundation of Korea, Korea Research Institute of Bioscience and Biotechnology",1.0, +33151298,GRNdb,0.997971952,GRNdb,0.997971952,,0,1,http://www.grndb.com,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220617184853/http://www.grndb.com/,2021-01-01,"Center for Bioinformatics and Computational Biology, and Shanghai Key Laboratory of Regulatory Biology, Institute of Biomedical Sciences, School of Life Sciences, East China Normal University, Shanghai 200241, China.","Fang L, Li Y, Ma L, Xu Q, Tan F, Chen G",,"National Key Research and Development Program of China, National Natural Science Foundation of China, Clinical Research Plan of SHDC, National Natural Science Foundation of China, Shanghai Municipal Health Commission",6.0,"China, China" +33174597,GlycoPOST,0.993143857,GlycoPOST,0.993143857,,0,1,http://glycopost.glycosmos.org,302,,"(35.1167,138.9167)",http://web.archive.org/web/20221011173051/https://glycopost.glycosmos.org/,2021-01-01,"Division of Bioinformatics, Niigata University Graduate School of Medical and Dental Sciences, 1-757 Asahimachi-dori, Chuo-ku, Niigata 951-8510, Japan.","Watanabe Y, Aoki-Kinoshita KF, Ishihama Y, Okuda S",,"Japan Science and Technology Agency, Japan Science and Technology Agency",17.0,Japan +33221926,HumanMetagenomeDB,0.983011484,HumanMetagenomeDB,0.983011484,,0,1,http://webapp.ufz.de/hmgdb,301,,"(51.3396,12.3713)",http://web.archive.org/web/20211130070329/https://webapp.ufz.de/hmgdb/,2021-01-01,"Institute of Mathematics and Computer Sciences, University of São Paulo, São Carlos, Brazil.","Kasmanas JC, Bartholomäus A, Corrêa FB, Tal T, Jehmlich N, Herberth G, von Bergen M, Stadler PF, Carvalho ACPLF, Nunes da Rocha U",,"FAPESP, Helmholtz Association, FAPESP",2.0,Brazil +33237299,GreenPhylDB,0.997109532,GreenPhylDB,0.997109532,,0,1,http://www.greenphyl.org,301,,"(43.6109,3.8763)",http://web.archive.org/web/20220622002308/https://greenphyl.org/,2021-01-01,"Bioversity International, Parc Scientifique Agropolis II, 34397 Montpellier, France.","Valentin G, Abdel T, Gaëtan D, Jean-François D, Matthieu C, Mathieu R",,"Syngenta Seeds SAS, CGIAR Research Program, Roots, Tubers and Bananas",8.0,France +33252190,GRINdb,0.997000337,GRINdb,0.997000337,RIN database,0.806897382,1,http://lmc.uab.es/grindb,302,,"(41.4911,2.1408)",no_wayback,2020-11-30,"Bellvitge Biomedical Research Institute (IDIBELL), L'Hospitalet de Llobregat, Barcelona, Spain.","García-Recio A, Santos-Gómez A, Soto D, Julia-Palacios N, García-Cazorla À, Altafaj X, Olivella M",,"Ministerio de Ciencia e Innovación, Instituto de Salud Carlos III, Ministerio de Ciencia e Innovación, Instituto de Salud Carlos III",5.0,Spain +33259604,HAHmiR.DB,0.977624993,HAHmiR.DB,0.977624993,High-Altitude Human miRNA Database,0.911724165,1,http://www.hahmirdb.in,200,,"(13.2257,77.5750)",http://web.archive.org/web/20220119114717/https://hahmirdb.in/,2020-12-01,"Defence Institute of Physiology and Allied Sciences (DIPAS), Defence R&D Organization (DRDO), Lucknow Road, Timarpur, Delhi 110054, India.","Khurana P, Gupta A, Sugadev R, Sharma YK, Kumar B",,,0.0,India +33264402,HERB,0.972203016,HERB,0.972203016,,0,1,http://herb.ac.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220616075005/http://herb.ac.cn./,2021-01-01,"Beijing University of Chinese Medicine, Chaoyang District, Beijing 100029, China.","Fang S, Dong L, Liu L, Guo J, Zhao L, Zhang J, Bu D, Liu X, Huo P, Cao W, Dong Q, Wu J, Zeng X, Wu Y, Zhao Y",,"National Key Research and Development Program of China, China Postdoctoral Science Foundation, China Postdoctoral Innovative Talent Foundation, National Natural Science Foundation for Young Scholars of China, National Natural Science Foundation for Young Scholars of China, National Key Research and Development Program of China, CAS, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Key Research and Development Program of China, Zhejiang Provincial Natural Science Foundation, BMICC of National Population Health Data Center, National Natural Science Foundation of China",35.0,China +33382884,HeartBioPortal,0.983459532,HeartBioPortal,0.983459532,,0,1,http://www.heartbioportal.com,301,,"(37.3483,-121.9844)",http://web.archive.org/web/20220705222550/https://www.heartbioportal.com/,2020-12-01,"Department of Medicine, Section of Computational Biomedicine and Biomedical Data Science, University of Chicago, Chicago, IL 60637, USA.","Khomtchouk BB, Nelson CS, Vand KA, Palmisano S, Grossman RL",,,1.0,United States +33406221,iCysMod,0.997266591,iCysMod,0.997266591,for protein cysteine modifications in eukaryotes,0.853484929,1,http://icysmod.omicsbio.info,200,,"(22.2783,114.1747)",http://web.archive.org/web/20220616033505/http://icysmod.omicsbio.info/,2021-09-01,"School of Life Sciences, Zhengzhou University, Zhengzhou, Henan, China.","Wang P, Zhang Q, Li S, Cheng B, Xue H, Wei Z, Shao T, Liu ZX, Cheng H, Wang Z",,"Tip-top Scientific and Technical Innovative Youth Talents of Guangdong Special Support Program, National Natural Science Foundation of China, Program for Guangdong Introducing Innovative and Entrepreneurial Teams, Key program for Department of Science and Technology of Qinghai province, National Natural Science Foundation of China, National Natural Science Foundation of China",3.0,China +33413085,H2V,0.848025173,H2V,0.848025173,,0,1,http://www.zhounan.org/h2v,301,,"(45.8399,-119.7006)",http://web.archive.org/web/20220524010651/http://www.zhounan.org/h2v/,2021-01-07,"Affiliated Brain Hospital of Guangzhou Medical University, 36 Mingxin Rd, Guangzhou, 510370, China.","Zhou N, Bao J, Ning Y",,,5.0,China +33417691,HGFDB,0.996328712,HGFDB,0.996328712,Helmeted Guinea Fowl Database,0.954183638,1,http://hgfdb.ynau.edu.cn,200,,"(39.9906,116.2887)",http://web.archive.org/web/20220501163105/http://hgfdb.ynau.edu.cn/,2021-01-01,"Faculty of Animal Science and Technology, Yunnan Agricultural University, Kunming, Yunnan 650201, China.","Li X, Li Z, Shen Q, Pan Y, Dong X, Xu Z, Duan S, Li Y, Du Y, Chen S, Ma Z, Dong Y",,Digitalization of Biological Resource Project,0.0,China +33439542,HDG,0.937619388,HDG,0.937619388,Human Disease Genes website series,0.917995095,1,http://humandiseasegenes.info,301,,"(52.3740,4.8897)",no_wayback,2021-01-13,"Department of Human Genetics, Donders Institute for Brain, Cognition and Behaviour, Radboud university medical center, Nijmegen, The Netherlands.","Dingemans AJM, Stremmelaar DE, Vissers LELM, Jansen S, Nabais Sá MJ, van Remortele A, Jonis N, Truijen K, van de Ven S, Ewals J, Verbruggen M, Koolen DA, Brunner HG, Eichler EE, Gecz J, de Vries BBA",,"Dutch Research Council (NWO), NIMH NIH HHS",6.0,Netherlands +33515030,HVIDB,0.997510344,HVIDB,0.997510344,Human-Virus Interaction DataBase,0.954682939,1,http://zzdlab.com/hvidb,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220709032347/http://zzdlab.com/hvidb/,2021-03-01,"State Key Laboratory of Agrobiotechnology, College of Biological Sciences, China Agricultural University, Beijing 100193, China.","Yang X, Lian X, Fu C, Wuchty S, Yang S, Zhang Z",,National Key Research and Development Program of China,5.0,"China, China" +33677507,HIR,0.787795722,HIR,0.787795722,,0,1,http://human.biomedtzc.cn,200,,"(30.2936,120.1614)",http://web.archive.org/web/20220521221343/http://human.biomedtzc.cn/,2021-03-01,"Institute of Big Data and Artificial Intelligence in Medicine, School of Electronics and Information Engineering, Taizhou University, 1139 Shifu Avenue, Taizhou City, Zhejiang Province, Taizhou 318000, China.","Guo WP, Ding XB, Jin J, Zhang HB, Yang QL, Chen PC, Yao H, Ruan LI, Tao YT, Chen X",,"Research project of education department of zhejiang province, National Natural Science Foundation of China, Science and technology project of Taizhou City, Humanities and Social Science Project of the Chinese Ministry of Education, National Natural Science Foundation of China",0.0,China +33740463,ICSCB,0.996838212,ICSCB,0.996838212,Integrated Collection of Stem Cell Bank data,0.959731273,1,http://icscb.stemcellinformatics.org,301,,"(35.6895,139.6917)",http://web.archive.org/web/20220520044042/https://icscb.stemcellinformatics.org/,2021-03-18,"Center for iPS Cell Research and Application (CiRA), Kyoto University, 53 Kawahara-cho, Sho-goin, Sakyo-ku, Kyoto 606-8507, Japan.","Chen Y, Sakurai K, Maeda S, Masui T, Okano H, Dewender J, Seltmann S, Kurtz A, Masuya H, Nakamura Y, Sheldon M, Schneider J, Stacey GN, Panina Y, Fujibuchi W",,"German Academic Exchange Service, Japan Agency for Medical Research and Development",1.0,Japan +33868597,GPCards,0.997809052,GPCards,0.997809052,,0,1,http://genemed.tech/gpcards,301,,"(22.2783,114.1747)",http://web.archive.org/web/20210920145829/http://genemed.tech/gpcards/,2021-03-22,"National Clinical Research Center for Geriatric Disorders, Department of Geriatrics, Xiangya Hospital, Central South University, Changsha, Hunan 410008, China.","Li B, Wang Z, Chen Q, Li K, Wang X, Wang Y, Zeng Q, Han Y, Lu B, Zhao Y, Zhang R, Jiang L, Pan H, Luo T, Zhang Y, Fang Z, Xiao X, Zhou X, Wang R, Zhou L, Wang Y, Yuan Z, Xia L, Guo J, Tang B, Xia K, Zhao G, Li J",,National Natural Science Foundation of China,0.0,China +33929018,IBDDB,0.994403943,IBDDB,0.994403943,IBD database,0.644221008,1,http://www.cbrc.kaust.edu.sa/ibd,302,,"(37.5331,-122.2486)",http://web.archive.org/web/20221017034419/https://www.cbrc.kaust.edu.sa/ibd/,2021-04-01,"School of Molecular and Cell Biology, University of the Witwatersrand, Private Bag 3, Johannesburg, Gauteng WITS-2050, South Africa.","Khan F, Radovanovic A, Gojobori T, Kaur M",,King Abdullah University of Science and Technology,0.0,South Africa +33965348,GGVD,0.992996335,GGVD,0.992996335,goat genome variation database,0.746109948,1,http://animal.nwsuaf.edu.cn/GoatVar,301,,"(39.9906,116.2887)",no_wayback,2021-03-01,"Key Laboratory of Animal Genetics, Breeding and Reproduction of Shaanxi Province, College of Animal Science and Technology, Northwest A&F University, Yangling, Shaanxi 712100, China.","Fu W, Wang R, Yu J, Hu D, Cai Y, Shao J, Jiang Y",,National Natural Science Foundation of China,0.0,China +33973408,hu.MAP,0.995122343,hu.MAP,0.995122343,,0,1,http://humap2.proteincomplexes.org,200,,"(30.2672,-97.7431)",http://web.archive.org/web/20220519093119/http://humap2.proteincomplexes.org/,2021-05-01,"Department of Molecular Biosciences, Center for Systems and Synthetic Biology, University of Texas, Austin, TX, USA.","Drew K, Wallingford JB, Marcotte EM",,"Eunice Kennedy Shriver National Institute of Child Health and Human Development, NICHD NIH HHS, NICHD NIH HHS, NICHD NIH HHS, NICHD NIH HHS, NICHD NIH HHS, National Heart, Lung, and Blood Institute, Division of Loan Repayment, Eunice Kennedy Shriver National Institute of Child Health and Human Development, NHLBI NIH HHS, NIGMS NIH HHS, NICHD NIH HHS, National Institute of Diabetes and Digestive and Kidney Diseases, Welch Foundation, NIDDK NIH HHS, National Institute of General Medical Sciences",10.0,United States +33984507,HisPhosSite,0.988221288,HisPhosSite,0.988221288,,0,1,http://reprod.njmu.edu.cn/hisphossite,"HTTPConnectionPool(host='reprod.njmu.edu.cn', port=80): Max retries exceeded with url: /hisphossite (Caused by ReadTimeoutError(""HTTPConnectionPool(host='reprod.njmu.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2021-05-10,"Department of Biomedical Engineering, Nanjing University of Aeronautics and Astronautics, Nanjing 210016, China.","Zhao J, Zou L, Li Y, Liu X, Zeng C, Xu C, Jiang B, Guo X, Song X",,"National Natural Science Foundation of China, Jiangsu Province Department of Human Resources and Social Security, Ministry of Education of the People's Republic of China, China Postdoctoral Science Foundation, Ministry of Science and Technology of the People's Republic of China",0.0,China +34032471,HSP,0.977086564,HSP,0.977086564,Human Salivary Proteome Wiki,0.836235136,1,http://salivaryproteome.nidcr.nih.gov,"HTTPConnectionPool(host='salivaryproteome.nidcr.nih.gov', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20210318000138/https://salivaryproteome.nidcr.nih.gov/,2021-05-25,"Office of Intramural Research, Center for Information Technology, National Institutes of Health, Bethesda, MD, USA.","Lau WW, Hardt M, Zhang YH, Freire M, Ruhl S",,NIDCR NIH HHS,0.0,United States +34164644,HFBD,0.994234284,HFBD,0.994234284,HF biomarker knowledge database,0.968227565,1,http://sysbio.org.cn/HFBD,301,,"(22.2783,114.1747)",no_wayback,2021-06-23,"Institutes for Systems Genetics, Frontiers Science Center for Disease-related Molecular Network, West China Hospital, Sichuan University, Chengdu, 610212, Sichuan, China.","He H, Shi M, Lin Y, Zhan C, Wu R, Bi C, Liu X, Ren S, Shen B",,"National Natural Science Foundation of China, National Key Research and Development Program of China, Sichuan and Guangxi Provinces",1.0,"China, China" +34175476,GWH,0.995265027,GWH,0.995265027,Genome Warehouse,0.928986808,1,"http://ngdc.cncb.ac.cn, http://ngdc.cncb.ac.cn/gwh","301, 301",,"(39.9075,116.3972), (39.9075,116.3972)","http://web.archive.org/web/20221108212102/https://ngdc.cncb.ac.cn/, http://web.archive.org/web/20220929075333/https://ngdc.cncb.ac.cn/gwh/",2021-06-24,"National Genomics Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences/China National Center for Bioinformation, Beijing 100101, China; CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Chen M, Ma Y, Wu S, Zheng X, Kang H, Sang J, Xu X, Hao L, Li Z, Gong Z, Xiao J, Zhang Z, Zhao W, Bao Y",,"National Key Research and Development Program of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, Chinese Academy of Sciences, National Key Research and Development Program of China, National Natural Science Foundation of China, National Key Research and Development Program of China, Chinese Academy of Sciences, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences",10.0,"China, China, China" +34330336,HumGut,0.968026817,HumGut,0.968026817,,0,1,http://arken.nmbu.no,302,,"(60.3927,10.5616)",http://web.archive.org/web/20221006010811/https://arken.nmbu.no/,2021-07-31,"Department of Chemistry, Biotechnology and Food Sciences, Norwegian University of Life Sciences, P.O. Box 5003, 1432, Ås, Norway. ph@genetic-analysis.com.","Hiseni P, Rudi K, Wilson RC, Hegge FT, Snipen L",,norway research council through r&d,3.0,Norway +34378141,GPGD,0.995944738,GPGD,0.995944738,Global Pharmacopoeia Genome Database,0.966468737,1,http://www.gpgenome.com,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220615172951/http://www.gpgenome.com/,2021-08-06,"Institute of Chinese Materia Medica, China Academy of Chinese Medical Sciences, Beijing, 100700, China.","Liao B, Hu H, Xiao S, Zhou G, Sun W, Chu Y, Meng X, Wei J, Zhang H, Xu J, Chen S",,,5.0,"China, China" +34461244,HVdb,0.98835696,HVdb,0.98835696,,0,1,http://hvdb.dqweilab-sjtu.com/index.php,301,,"(60.3540,24.9794)",no_wayback,2021-08-28,"Department of Bioinformatics and Biological Statistics, School of Life Sciences and Biotechnology, Shanghai Jiao Tong University, Shanghai, 200240, PR China. Electronic address: abbaskhan@sjtu.edu.cn.","Khan A, Khan S, Ahmad S, Anwar Z, Hussain Z, Safdar M, Rizwan M, Waseem M, Hussain A, Akhlaq M, Khan T, Ali SS, Wei DQ",,,1.0,China +34639237,GreeningDB,0.995977521,GreeningDB,0.995977521,,0,1,http://bioinfo.usu.edu/GreeningDB,301,,"(41.7355,-111.8344)",http://web.archive.org/web/20220616043030/http://bioinfo.usu.edu/GreeningDB/,2021-10-08,"Department of Plants, Soils and Climate, Utah State University, Logan, UT 84322, USA.","Loaiza CD, Duhan N, Kaundal R",,,0.0,United States +34642750,HBFP,0.931257725,HBFP,0.931257725,human body fluid proteome,0.65359441,1,http://bmbl.bmi.osumc.edu/HBFP,302,,"(39.9612,-82.9988)",http://web.archive.org/web/20220802124252/https://bmbl.bmi.osumc.edu/HBFP/,2021-10-01,"Department of Computer Science and Engineering, University of Nebraska-Lincoln, 122E Avery Hall, 1144 T St., Lincoln, NE 68588, USA.","Shao D, Huang L, Wang Y, Cui X, Li Y, Wang Y, Ma Q, Du W, Cui J",,"Jilin Province Key Laboratory of Big Data Intelligent Computing, Development Project of Jilin Province of China, Guangdong Key Project for Applied Fundamental Research, Development Project of Jilin Province of China, Development Project of Jilin Province of China, National Natural Science Foundation of China",1.0,United States +34699529,GH19ED,0.938115016,GH19ED,0.938115016,,0,1,http://gh19ed.biocatnet.de,301,,"(48.7823,9.1770)",http://web.archive.org/web/20220517013658/https://gh19ed.biocatnet.de/,2021-10-26,"Department of Biotechnology and Biosciences, University of Milano-Bicocca, Milan, Italy.","Orlando M, Buchholz PCF, Lotti M, Pleiss J",,"university of milano-bicocca, Deutsche Forschungsgemeinschaft, Bundesministerium für Bildung und Forschung",1.0,Italy +34700680,IDC,0.976822138,IDC,0.976822138,,0,1,http://portal.imagingdatacommons.cancer.gov,"HTTPConnectionPool(host='portal.imagingdatacommons.cancer.gov', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2021-11-01,"Brigham and Women's Hospital, Boston, MA.","Fedorov A, Longabaugh W, Pot D, Clunie D, Pieper S, Lewis R, Aerts H, Homeyer A, Herrmann M, Wagner U, Pihl T, Farahani K, Kikinis R",,,0.0, +34791105,HFIP,0.989193519,HFIP,0.989193519,Heart Failure Integrated Platform,0.772608399,1,http://heartfailure.medical-bigdata.com,200,,"(39.9075,116.3972)",no_wayback,2021-11-01,"Research Center of Medical Big Data, Chinese PLA General Hospital, 28 Fuxing Road, Beijing 100853, China.","Wu J, Zhao M, Li T, Sun J, Chen Q, Yin C, Jia Z, Zhao C, Lin G, Ni Y, Xie G, Shi J, He K",,"National Natural Science Foundation of China, National Natural Science Foundation of China, the National Key Research and Development Program of China",0.0,China +34846641,HODD,0.994872093,HODD,0.994872093,Human Ophthalmic Diseases Database,0.989201119,1,http://bio-bigdata.cn/HODD,404,,,no_wayback,2021-11-30,"State Key Laboratory of Ophthalmology, Zhongshan Ophthalmic Center, Sun Yat-sen University, Guangzhou, 510060, China.","Zhang Z, Tang Q, Wang Q, Nie F, Sun L, Luo D, Chen W, Ding X",,NSFC,0.0,China +34859531,gnomAD,0.988560289,gnomAD,0.988560289,Genome Aggregation Database,0.841847384,1,http://gnomad.broadinstitute.org,301,,"(39.0997,-94.5786)",http://web.archive.org/web/20221101074654/https://gnomad.broadinstitute.org/,2021-12-16,"Program in Medical and Population Genetics, Broad Institute of MIT and Harvard, Cambridge, MA, USA.","Gudmundsson S, Singer-Berk M, Watts NA, Phu W, Goodrich JK, Solomonson M, , Rehm HL, MacArthur DG, O'Donnell-Luria A",,"National Human Genome Research Institute, National Human Genome Research Institute, National Human Genome Research Institute, NHGRI NIH HHS, NIDDK NIH HHS, National Institute of Diabetes and Digestive and Kidney Diseases, NHGRI NIH HHS, NHGRI NIH HHS",1.0,United States +34877793,GEPSdb,0.998211384,GEPSdb,0.998211384,Gene Expression Database of Poplar under Stress,0.982485765,1,http://gepsdb.ahau-edu.cn,200,,"(22.5455,114.0683)",no_wayback,2021-12-08,"Laboratory of Modern Biotechnology, School of Forestry and Landscape Architecture, Anhui Agricultural Univ., Hefei, 230036, China.","Liu S, Wang Z, Lan Y, He T, Xiong R, Wu C, Xiang Y, Yan H",,,0.0,China +34907423,iCAV,0.99172157,iCAV,0.99172157,,0,1,http://icav.omicsbio.info,200,,"(22.2783,114.1747)",http://web.archive.org/web/20220709195310/http://icav.omicsbio.info/,2021-12-01,"School of Life Sciences, Zhengzhou University, Zhengzhou 450001, China.","Liu B, Zhang Q, Wang J, Cao S, Zhou Z, Liu ZX, Cheng H",,"Fostering Fund of Fundamental Research for Young Teachers of Zhengzhou University, Natural Science Foundation of China, Guangdong Esophageal Cancer Institute Science and Technology Program, Tip-Top Scientific and Technical Innovative Youth Talents of Guangdong special support program, Program for Guangdong Introducing Innovative and Entrepreneurial Teams",0.0,China +21177658,IsoBase,0.996967256,IsoBase,0.996967256,,0,1,http://isobase.csail.mit.edu,308,,"(42.3751,-71.1056)",no_wayback,2011-01-01,"Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology, Cambridge, MA 02139, USA.","Park D, Singh R, Baym M, Liao CS, Berger B",,"NIGMS NIH HHS, NIGMS NIH HHS",34.0,United States +21353266,LegProt,0.981720328,LegProt,0.981720328,legume,0.68427968,1,http://bioinfo.noble.org/manuscript-support/legumedb,200,,"(34.1566,-97.1792)",http://web.archive.org/web/20220708122010/http://bioinfo.noble.org/manuscript-support/legumedb/,2011-02-23,"Plant Biology Division, The Samuel Roberts Noble Foundation, 2510 Sam Noble Parkway, Ardmore, OK 73401, USA.","Lei Z, Dai X, Watson BS, Zhao PX, Sumner LW",,,11.0,United States +21492431,KID,0.998009106,KID,0.998009106,Yeast Kinase Interaction Database,0.994851972,1,http://www.moseslab.csb.utoronto.ca/KID,301,,"(43.6684,-79.3689)",http://web.archive.org/web/20220121091046/http://www.moseslab.csb.utoronto.ca/KID/,2011-04-14,"Department of Molecular Genetics, The Donnelly Centre for Cellular and Biomolecular Research, University of Toronto,160 College Street, Toronto, M3S 3E1, Canada.","Sharifpoor S, Nguyen Ba AN, Youn JY, van Dyk D, Friesen H, Douglas AC, Kurat CF, Chong YT, Founk K, Moses AM, Andrews BJ",,Canadian Institutes of Health Research,36.0,Canada +21498547,LccED,0.97930038,LccED,0.97930038,The Laccase Engineering Database,0.850810488,1,http://www.lcced.uni-stuttgart.de,200,,"(48.7823,9.1770)",http://web.archive.org/web/20220620063759/http://www.lcced.uni-stuttgart.de/,2011-04-15,"Institute of Technical Biochemistry, University of Stuttgart, Allmandring 31, 70569 Stuttgart, Germany.","Sirim D, Wagner F, Wang L, Schmid RD, Pleiss J",,,42.0,Germany +21624162,KUPKB,0.995265484,KUPKB,0.995265484,a kidney and urinary pathway,0.738103741,1,http://www.e-lico.eu/kupkb,302,,"(49.0094,8.4044)",http://web.archive.org/web/20110207034640/http://www.e-lico.eu:80/kupkb/,2011-05-17,"School of Computer Science, University of Manchester, UK. simon.jupp@manchester.ac.uk.","Jupp S, Klein J, Schanstra J, Stevens R",,,17.0, +21624339,ILISI,0.987797678,ILISI,0.987797678,Index of Italian Literature on Nursing Sciences,0.942488663,1,http://www.ipasvi.roma.it/ita/ILISI,404,,,http://web.archive.org/web/20170703204100/http://www.ipasvi.roma.it/ita/ilisi/,2011-01-01,Presidente del Collegio IPASVI di Roma.,"Rocco G, Bonfigli A, Bruno E, Fanfera E, Finocchi G, Montevecchi A, Napolano M, Nappini P, Stievano A, Tallarita F, Turci C",,,1.0, +21707958,Littorina sequence database,0.878103534,LSD,0.87695243,Littorina sequence database,0.878103534,1,http://mbio-serv2.mbioekol.lu.se/Littorina,301,,"(55.7058,13.1932)",no_wayback,2011-06-28,"Microbial Ecology, Department of Biology, Lund University, SE-223 62 Lund, Sweden.","Canbäck B, André C, Galindo J, Johannesson K, Johansson T, Panova M, Tunlid A, Butlin R",,"Natural Environment Research Council, Natural Environment Research Council",5.0,Sweden +21873645,iRefIndex,0.996739507,iRefIndex,0.996739507,,0,1,http://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads/ppiTrim.html,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20220617040202/http://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads/ppiTrim.html,2011-08-27,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA.","Stojmirović A, Yu YK",,Intramural NIH HHS,10.0,United States +21914464,IGD,0.97653389,IGD,0.97653389,Intronless Gene Database,0.875607576,1,http://www.bioinfo-cbs.org/igd,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20150809171018/http://bioinfo-cbs.org/igd/,2011-09-02,"Research Group on Molecular and Cellular Screening Processes, Laboratory of Microorganisms and biomolecules, P.O. Box 1177, 3018 Sfax, Centre of Biotechnology of Sfax, University of Sfax, Tunisia. amel.louhichi@cbs.rnrt.tn","Louhichi A, Fourati A, Rebaï A",,,31.0,Tunisia +21965461,ISSMIC,0.998472512,ISSMIC,0.998472512,,0,1,http://www.iss.it/ampp/dati/cont.php?id=233&lang=1&tipo=7,302,,"(41.8919,12.5113)",no_wayback,2011-09-30,"Environment and Health Department, Istituto Superiore di Sanita', 00161 Rome, Italy. romualdo.benigni@iss.it","Benigni R, Bossa C, Tcheremenskaia O, Tcheremenskaia O, Battistelli CL, Crettaz P",,,17.0,Italy +22053089,InterEvol,0.992820978,InterEvol,0.992820978,,0,1,http://biodev.cea.fr/interevol,301,,"(48.6833,2.1333)",http://web.archive.org/web/20220525220020/http://biodev.cea.fr/interevol/,2011-11-03,"CEA, iBiTecS, F-91191 Gif sur Yvette and CNRS, F-91191 Gif sur Yvette, France.","Faure G, Andreani J, Guerois R",,,30.0,France +22058127,MACiE,0.995728016,MACiE,0.995728016,Annotation,0.672201276,1,http://www.ebi.ac.uk/thornton-srv/databases/MACiE,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20220401084654/http://www.ebi.ac.uk/thornton-srv/databases/MACiE/,2011-11-03,"EMBL-EBI, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. gemma@ebi.ac.uk","Holliday GL, Andreini C, Fischer JD, Rahman SA, Almonacid DE, Williams ST, Pearson WR",,"NIGMS NIH HHS, NLM NIH HHS",42.0, +"22096229, 25428371, 27899635, 30398656, 33156333",InterPro,0.995504975,InterPro,0.995504975,,0,5,http://www.ebi.ac.uk/interpro,200,,"(51.5085,-0.1257)",http://web.archive.org/web/20221027022306/http://www.ebi.ac.uk/interpro,2021-01-01,"EMBL Outstation European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, CB10 1SD Cambridge, UK. hunter@ebi.ac.uk, European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK rdf@ebi.ac.uk., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK.","Hunter S, Jones P, Mitchell A, Apweiler R, Attwood TK, Bateman A, Bernard T, Binns D, Bork P, Burge S, de Castro E, Coggill P, Corbett M, Das U, Daugherty L, Duquenne L, Finn RD, Fraser M, Gough J, Haft D, Hulo N, Kahn D, Kelly E, Letunic I, Lonsdale D, Lopez R, Madera M, Maslen J, McAnulla C, McDowall J, McMenamin C, Mi H, Mutowo-Muellenet P, Mulder N, Natale D, Orengo C, Pesseat S, Punta M, Quinn AF, Rivoire C, Sangrador-Vegas A, Selengut JD, Sigrist CJ, Scheremetjew M, Tate J, Thimmajanarthanan M, Thomas PD, Wu CH, Yeats C, Yong SY, Mitchell A, Chang HY, Daugherty L, Fraser M, Hunter S, Lopez R, McAnulla C, McMenamin C, Nuka G, Pesseat S, Sangrador-Vegas A, Scheremetjew M, Rato C, Yong SY, Bateman A, Punta M, Attwood TK, Sigrist CJ, Redaschi N, Rivoire C, Xenarios I, Kahn D, Guyot D, Bork P, Letunic I, Gough J, Oates M, Haft D, Huang H, Natale DA, Wu CH, Orengo C, Sillitoe I, Mi H, Thomas PD, Finn RD, Finn RD, Attwood TK, Babbitt PC, Bateman A, Bork P, Bridge AJ, Chang HY, Dosztányi Z, El-Gebali S, Fraser M, Gough J, Haft D, Holliday GL, Huang H, Huang X, Letunic I, Lopez R, Lu S, Marchler-Bauer A, Mi H, Mistry J, Natale DA, Necci M, Nuka G, Orengo CA, Park Y, Pesseat S, Piovesan D, Potter SC, Rawlings ND, Redaschi N, Richardson L, Rivoire C, Sangrador-Vegas A, Sigrist C, Sillitoe I, Smithers B, Squizzato S, Sutton G, Thanki N, Thomas PD, Tosatto SC, Wu CH, Xenarios I, Yeh LS, Young SY, Mitchell AL, Mitchell AL, Attwood TK, Babbitt PC, Blum M, Bork P, Bridge A, Brown SD, Chang HY, El-Gebali S, Fraser MI, Gough J, Haft DR, Huang H, Letunic I, Lopez R, Luciani A, Madeira F, Marchler-Bauer A, Mi H, Natale DA, Necci M, Nuka G, Orengo C, Pandurangan AP, Paysan-Lafosse T, Pesseat S, Potter SC, Qureshi MA, Rawlings ND, Redaschi N, Richardson LJ, Rivoire C, Salazar GA, Sangrador-Vegas A, Sigrist CJA, Sillitoe I, Sutton GG, Thanki N, Thomas PD, Tosatto SCE, Yong SY, Finn RD, Blum M, Chang HY, Chuguransky S, Grego T, Kandasaamy S, Mitchell A, Nuka G, Paysan-Lafosse T, Qureshi M, Raj S, Richardson L, Salazar GA, Williams L, Bork P, Bridge A, Gough J, Haft DH, Letunic I, Marchler-Bauer A, Mi H, Natale DA, Necci M, Orengo CA, Pandurangan AP, Rivoire C, Sigrist CJA, Sillitoe I, Thanki N, Thomas PD, Tosatto SCE, Wu CH, Bateman A, Finn RD",", , , , ","Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, Biotechnology and Biological Sciences Research Council, European Commission FP7, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, National Science Foundation, Division of Biological Infrastructure, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, NHGRI NIH HHS, Medical Research Council, Division of Biological Infrastructure, Medical Research Council, DHHS, Biotechnology and Biological Sciences Research Council, National Science Foundation, Open Targets, European Molecular Biology Laboratory, National Institutes of Health, Wellcome Trust, Biotechnology and Biological Sciences Research Council, ELIXIR, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",2981.0, +"22110036, 26578557",LegumeIP,0.996482372,LegumeIP,0.996482372,,0,2,http://plantgrn.noble.org/LegumeIP,200,,"(34.1566,-97.1792)",http://web.archive.org/web/20220816100237/https://plantgrn.noble.org/LegumeIP,2015-11-17,"Plant Biology Division, The Samuel Roberts Noble Foundation, 2510 Sam Noble Parkway, Ardmore, OK 73401, USA., Bioinformatics Lab, Plant Biology Division, Samuel Roberts Noble Foundation, 2510 Sam Noble Parkway, Ardmore, OK 73401, USA.","Li J, Dai X, Liu T, Zhao PX, Li J, Dai X, Zhuang Z, Zhao PX",", ",", ",52.0,"United States, United States" +22112530,LIPABASE,0.963444293,LIPABASE,0.963444293,,0,1,http://www.lipabase-pfba-tun.org,"HTTPConnectionPool(host='www.lipabase-pfba-tun.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140112110227/http://lipabase-pfba-tun.org/,2011-01-01,"Institut Superieur des Sciences Biologiques Appliquees de Tunis, Universite Tunis El Manar, Tunisie. amessaoudi@tvtc.gov.sa","Messaoudi A, Belguith H, Ghram I, Hamida JB",,,7.0, +22120663,INOH,0.981793324,INOH,0.981793324,Integrating Network Objects with Hierarchies,0.783673547,1,http://www.inoh.org,200,,"(45.5088,-73.5878)",http://web.archive.org/web/20220615133323/http://inoh.org/,2011-11-26,"Institute for Bioinformatics Research and Development, Japan Science and Technology Agency, Kashiwa, Japan.","Yamamoto S, Sakai N, Nakamura H, Fukagawa H, Fukuda K, Takagi T",,,28.0,"Japan, Japan" +22127860,IndelFR,0.997826949,IndelFR,0.997826949,Indel Flanking Region Database,0.962372184,1,http://indel.bioinfo.sdu.edu.cn,"HTTPConnectionPool(host='indel.bioinfo.sdu.edu.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20121125073741/http://indel.bioinfo.sdu.edu.cn:80/,2011-11-29,"State Key Laboratory of Microbial Technology, Shandong University, Jinan 250100, China.","Zhang Z, Xing C, Wang L, Gong B, Liu H",,,10.0,China +22139933,IGDB.NSCLC,0.982541544,IGDB.NSCLC,0.982541544,,0,1,http://igdb.nsclc.ibms.sinica.edu.tw,200,,"(25.0478,121.5319)",no_wayback,2011-12-01,"Graduate Institute of Life Sciences, National Defense Medical Center, Institute of Biomedical Sciences, Academia Sinica, Taiwan.","Kao S, Shiau CK, Gu DL, Ho CM, Su WH, Chen CF, Lin CH, Jou YS",,,15.0, +22140115,IPAVS,0.987295449,IPAVS,0.987295449,Integrated Pathway Resources,0.952554718,1,http://ipavs.cidms.org,"HTTPConnectionPool(host='ipavs.cidms.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220615180835/http://ipavs.cidms.org/,2011-12-02,"School of Life Sciences and Systems Biology Research Center, Gwangju Institute of Science and Technology, Gwangju 500-712, Korea.","Sreenivasaiah PK, Rani S, Cayetano J, Arul N, Kim DH",,,11.0, +"22260278, 27679478",IRD,0.993188262,IRD,0.993188262,Influenza Research Database,0.962002027,2,http://www.fludb.org,301,,"(41.6736,-88.0017)",no_wayback,2016-09-26,"Department of Pathology, University of Texas Southwestern Medical Center, Dallas, TX 75390, USA., J. Craig Venter Institute, La Jolla, CA 92037, USA.","Squires RB, Noronha J, Hunt V, García-Sastre A, Macken C, Baumgarth N, Suarez D, Pickett BE, Zhang Y, Larsen CN, Ramsey A, Zhou L, Zaremba S, Kumar S, Deitrich J, Klem E, Scheuermann RH, Zhang Y, Aevermann BD, Anderson TK, Burke DF, Dauphin G, Gu Z, He S, Kumar S, Larsen CN, Lee AJ, Li X, Macken C, Mahaffey C, Pickett BE, Reardon B, Smith T, Stewart L, Suloway C, Sun G, Tong L, Vincent AL, Walters B, Zaremba S, Zhao H, Zhou L, Zmasek C, Klem EB, Scheuermann RH",", ","NIAID NIH HHS, NIAID NIH HHS",288.0,"United States, United States" +22267903,LCGbase,0.97235316,LCGbase,0.97235316,,0,1,http://lcgbase.big.ac.cn/LCGbase,"HTTPConnectionPool(host='lcgbase.big.ac.cn', port=80): Max retries exceeded with url: /LCGbase (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20150204080757/http://lcgbase.big.ac.cn:80/LCGbase/,2011-12-13,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100029, PR China.","Wang D, Zhang Y, Fan Z, Liu G, Yu J",,,6.0,China +22369201,Liverome,0.81696564,Liverome,0.81696564,,0,1,http://liverome.kobic.re.kr,"HTTPConnectionPool(host='liverome.kobic.re.kr', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to liverome.kobic.re.kr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210925143717/http://liverome.kobic.re.kr/,2011-11-30,"Korean Bioinformation Center, Korea Research Institute of Bioscience and Biotechnology, Daejeon, Korea.","Lee L, Wang K, Li G, Xie Z, Wang Y, Xu J, Sun S, Pocalyko D, Bhak J, Kim C, Lee KH, Jang YJ, Yeom YI, Yoo HS, Hwang S",,,25.0, +22493526,IntDb,0.989881754,IntDb,0.989881754,,0,1,http://introndb.bicpu.edu.in,"HTTPConnectionPool(host='introndb.bicpu.edu.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-03-17,"Centre for Bioinformatics, School of Life Sciences, Pondicherry University, R.V. Nagar, Kalapet, Puducherry - 605014.","Mohanty S, Nizam A, Biswal M",,,0.0, +22494395,InterStoreDB,0.997662365,InterStoreDB,0.997662365,,0,1,http://www.interstoredb.org,"HTTPConnectionPool(host='www.interstoredb.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-05-01,"Ludwig Institute for Cancer Research, Centre for Medical Research, Royal Melbourne Hospital, Royal Parade, Parkville, Victoria 3050, Australia. chris.love@ludwig.edu.au","Love CG, Andongabo AE, Wang J, Carion PW, Rawlings CJ, King GJ",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",11.0,Australia +22570419,LAHEDES,0.989177001,LAHEDES,0.989177001,LAGLIDADG,0.901116014,1,http://homingendonuclease.net,200,,"(47.6062,-122.3321)",http://web.archive.org/web/20220202063707/http://homingendonuclease.net/,2012-05-08,"Division of Basic Sciences, Fred Hutchinson Cancer Research Center, Seattle, WA 98109, USA.","Taylor GK, Petrucci LH, Lambert AR, Baxter SK, Jarjour J, Stoddard BL",,"NIGMS NIH HHS, NCI NIH HHS",18.0,United States +22592381,IRView,0.997845531,IRView,0.997845531,,0,1,http://ir.hgc.jp,200,,"(35.6895,139.6917)",http://web.archive.org/web/20220615180808/http://ir.hgc.jp/,2012-05-15,"Division of Interactome Medical Sciences, Institute of Medical Science, The University of Tokyo, Tokyo 108-8039, Japan.","Fujimori S, Hirai N, Masuoka K, Oshikubo T, Yamashita T, Washio T, Saito A, Nagasaki M, Miyano S, Miyamoto-Sato E",,,1.0,Japan +"22674159, 23087376",IUPHAR-DB,0.994823694,IUPHAR-DB,0.994823694,of Basic and Clinical Pharmacology Database,0.740390394,2,http://www.iuphar-db.org,302,,"(55.9521,-3.1965)",http://web.archive.org/web/20221105030550/https://www.iuphar-db.org/,2012-10-18,"University/BHF Centre for Cardiovascular Science, University of Edinburgh, Edinburgh, UK., University/BHF Centre for Cardiovascular Science, The Queen's Medical Research Institute, University of Edinburgh, Edinburgh EH16 4TJ, UK.","Mpamhanga CP, Sharman JL, Harmar AJ, , Sharman JL, Benson HE, Pawson AJ, Lukito V, Mpamhanga CP, Bombail V, Davenport AP, Peters JA, Spedding M, Harmar AJ, ",", ",", British Heart Foundation, Wellcome Trust, Wellcome Trust",59.0, +22698731,IGFmdb,0.971408069,IGFmdb,0.971408069,The insulin-like growth factor mutation database,0.886121653,1,http://www.adelaide.edu.au/igfmutation,301,,"(-34.9287,138.5986)",no_wayback,2012-06-13,"Discipline of Biochemistry, School of Molecular and Biomedical Science, University of Adelaide, SA, Australia.","Rajapaksha H, Alvino C, McCarthy P, Forbes BE",,,4.0,Australia +22750101,LSHGD,0.994387046,LSHGD,0.994387046,Leprosy Susceptible Human Gene Database,0.994129015,1,http://www.vit.ac.in/leprosy/leprosy.htm,302,,"(12.9184,79.1325)",http://web.archive.org/web/20140228024618/http://www.vit.ac.in:80/leprosy/leprosy.htm,2012-06-30,"Medical Biotechnology Division, School of Biosciences and Technology, Center for Nanobiotechnology, VIT University, Vellore 632014, Tamil Nadu, India. georgecp77@yahoo.co.in","George Priya Doss C, Nagasundaram N, Srajan J, Chiranjib C",,,4.0,India +22792232,Integrated Microbial Genomes and Metagenomes,0.979933851,IMG/M,0.961702744,Integrated Microbial Genomes and Metagenomes,0.979933851,1,http://www.hmpdacc-resources.org/imgm_hmp,403,,,no_wayback,2012-07-05,"Biological Data Management and Technology Center, Lawrence Berkeley National Laboratory, Berkeley, California, United States of America. vmmarkowitz@lbl.gov","Markowitz VM, Chen IM, Chu K, Szeto E, Palaniappan K, Jacob B, Ratner A, Liolios K, Pagani I, Huntemann M, Mavromatis K, Ivanova NN, Kyrpides NC",,"NHGRI NIH HHS, NHGRI NIH HHS",23.0,United States +22954629,MaConDa,0.993165344,MaConDa,0.993165344,Mass spectrometry Contaminants Database,0.910215296,1,http://www.maconda.bham.ac.uk,302,,"(52.4814,-1.8998)",http://web.archive.org/web/20221016210642/https://maconda.bham.ac.uk/,2012-09-06,"School of Biosciences, University of Birmingham, Birmingham B15 2TT, UK.","Weber RJ, Li E, Bruty J, He S, Viant MR",,British Heart Foundation,13.0, +22984621,IIID,0.976448039,IIID,0.976448039,Insect Innate Immunity Database,0.973630855,1,http://www.vanderbilt.edu/IIID,301,,"(33.4484,-112.0740)",no_wayback,2012-09-12,"Department of Biological Sciences, Vanderbilt University, Nashville, Tennessee, United States of America. s.bordenstein@vanderbilt.edu","Brucker RM, Funkhouser LJ, Setia S, Pauly R, Bordenstein SR",,,29.0,United States +23046449,IPAD,0.993825734,IPAD,0.993825734,Integrated Pathway Analysis Database for Systematic Enrichment Analysis,0.92183505,1,http://bioinfo.hsc.unt.edu/ipad,"HTTPConnectionPool(host='bioinfo.hsc.unt.edu', port=80): Max retries exceeded with url: /ipad (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180920033145/http://bioinfo.hsc.unt.edu:80/ipad/,2012-09-11,"Department of Academic and Institutional Resources and Technology, University of North Texas Health Science Center, Fort Worth, USA.","Zhang F, Drabier R",,,21.0,United States +23180781,InnateDB,0.998630166,InnateDB,0.998630166,,0,1,http://www.innatedb.com,200,,"(49.2497,-123.1193)",http://web.archive.org/web/20221023183118/http://www.innatedb.com/,2012-11-24,"Department of Molecular Biology and Biochemistry, Simon Fraser University, Burnaby, BC, V5A1S6, Canada.","Breuer K, Foroushani AK, Laird MR, Chen C, Sribnaia A, Lo R, Winsor GL, Hancock RE, Brinkman FS, Lynn DJ",,Canadian Institutes of Health Research,492.0,Canada +23193276,KEGG OC,0.981904417,KEGG OC,0.981904417,,0,1,http://www.genome.jp/tools/oc,301,,"(35.0211,135.7538)",http://web.archive.org/web/20220615133629/https://www.genome.jp/tools/oc/,2012-11-27,"Center for Transdisciplinary Research, Niigata University, 1-757 Asahimachi-dori, Chuo-ku, Niigata 951-8585, Japan.","Nakaya A, Katayama T, Itoh M, Hiranuka K, Kawashima S, Moriya Y, Okuda S, Tanaka M, Tokimatsu T, Yamanishi Y, Yoshizawa AC, Kanehisa M, Goto S",,,57.0,Japan +23193279,KIDFamMap,0.99645108,KIDFamMap,0.99645108,,0,1,http://gemdock.life.nctu.edu.tw/KIDFamMap,301,,"(24.8036,120.9686)",http://web.archive.org/web/20220621024038/http://gemdock.life.nctu.edu.tw/KIDFamMap/,2012-11-28,"Institute of Bioinformatics and Systems Biology, National Chiao Tung University, Hsinchu 30050, Taiwan.","Chiu YY, Lin CT, Huang JW, Hsu KC, Tseng JH, You SR, Yang JM",,,8.0, +23193296,LUCApedia,0.997837782,LUCApedia,0.997837782,,0,1,http://eeb.princeton.edu/lucapedia,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20160311062142/http://eeb.princeton.edu:80/lucapedia/,2012-11-27,"Department of Ecology and Evolutionary Biology, Princeton University, Princeton, NJ 08542, USA. adg@princeton.edu","Goldman AD, Bernhard TM, Dolzhenko E, Landweber LF",,,20.0,United States +23262288,LepChorionDB,0.994472027,LepChorionDB,0.994472027,,0,1,http://bioinformatics.biol.uoa.gr/LepChorionDB,301,,"(37.9757,23.7691)",http://web.archive.org/web/20210510080059/http://bioinformatics.biol.uoa.gr/LepChorionDB/,2012-12-20,"Centre of Immunology and Transplantation, Biomedical Research Foundation, Academy of Athens, Athens 11527, Greece.","Giannopoulos NG, Michalopoulos I, Papandreou NC, Malatras A, Iconomidou VA, Hamodrakas SJ",,University of Athens,1.0,Greece +23282057,IntPath,0.992235899,IntPath,0.992235899,Integrated Pathway gene relationship database,0.725374699,1,http://compbio.ddns.comp.nus.edu.sg:8080/IntPath,"HTTPConnectionPool(host='compbio.ddns.comp.nus.edu.sg', port=8080): Max retries exceeded with url: /IntPath (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140722170231/http://compbio.ddns.comp.nus.edu.sg:8080/IntPath/,2012-12-12,"NUS Graduate School for Integrative Sciences & Engineering, National University of Singapore, Singapore.","Zhou H, Jin J, Zhang H, Yi B, Wozniak M, Wong L",,,14.0,"Singapore, Singapore" +23452239,LjGEA,0.995089054,LjGEA,0.995089054,Lotus japonicus Gene Expression Atlas,0.94991678,1,http://ljgea.noble.org,200,,"(34.1566,-97.1792)",http://web.archive.org/web/20221024001344/https://ljgea.noble.org/,2013-03-04,"Samuel Roberts Noble Foundation, 2510 Sam Noble Parkway, Ardmore, OK 73401, USA.","Verdier J, Torres-Jerez I, Wang M, Andriankaja A, Allen SN, He J, Tang Y, Murray JD, Udvardi MK",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",71.0,United States +23584832,MalaCards,0.994350731,MalaCards,0.994350731,,0,1,http://www.malacards.org,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20221103233015/https://www.malacards.org/,2013-04-12,"Department of Molecular Genetics, Weizmann Institute of Science, 234 Hertzel St. Rehovot, 76100, Israel. noa.rappaport@weizmann.ac.il","Rappaport N, Nativ N, Stelzer G, Twik M, Guan-Golan Y, Stein TI, Bahir I, Belinky F, Morrey CP, Safran M, Lancet D",,,90.0,Israel +23599502,INstruct,0.997730315,INstruct,0.997730315,,0,1,http://instruct.yulab.org,200,,"(42.4406,-76.4966)",http://web.archive.org/web/20220520012739/http://instruct.yulab.org/,2013-04-18,"Department of Biological Statistics and Computational Biology and Weill Institute for Cell and Molecular Biology, Cornell University, Ithaca, NY 14853, USA.","Meyer MJ, Das J, Wang X, Yu H",,"NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",65.0,United States +23601370,LiverAtlas,0.997491121,LiverAtlas,0.997491121,,0,1,http://liveratlas.hupo.org.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220615180857/http://liveratlas.hupo.org.cn/,2013-04-21,"Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences & Peking Union Medical College, Beijing, China.","Zhang Y, Yang C, Wang S, Chen T, Li M, Wang X, Li D, Wang K, Ma J, Wu S, Zhang X, Zhu Y, Wu J, He F",,"National Natural Science Foundation of China, National High-Tech Research and Development Program, Chinese National Basic Research Program, National Natural Science Foundation of China, Beijing Municipal Natural Science Foundation, National High-Tech Research and Development Program",9.0,China +23626002,KGVDB,0.986184359,KGVDB,0.986184359,,0,1,http://biomi.cdc.go.kr/KGVDB,"HTTPConnectionPool(host='biomi.cdc.go.kr', port=80): Max retries exceeded with url: /KGVDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20140722210448/http://biomi.cdc.go.kr/KGVDB/,2013-04-26,"Division of Structural and Functional Genomics, Division of Bio-Medical informatics, Center for Genome Science, National Institute of Health, Chungcheongbuk-do, Korea.","Moon S, Jung KS, Kim YJ, Hwang MY, Han K, Lee JY, Park K, Kim BJ",,,8.0, +23658631,MabsBase,0.996929765,MabsBase,0.996929765,,0,1,http://mabscessus.um.edu.my,200,,"(3.1249,101.6528)",http://web.archive.org/web/20210526013003/http://mabscessus.um.edu.my/,2013-04-29,"Dental Research and Training Unit, Faculty of Dentistry, University of Malaya, Kuala Lumpur, Malaysia.","Heydari H, Wee WY, Lokanathan N, Hari R, Mohamed Yusoff A, Beh CY, Yazdi AH, Wong GJ, Ngeow YF, Choo SW",,,20.0,Malaysia +23667450,LipidHome,0.980949104,LipidHome,0.980949104,,0,1,http://www.ebi.ac.uk/apweiler-srv/lipidhome,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20140722221824/http://www.ebi.ac.uk/apweiler-srv/lipidhome/,2013-05-07,"EMBL Outstation, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, United Kingdom. jfoster@ebi.ac.uk","Foster JM, Moreno P, Fabregat A, Hermjakob H, Steinbeck C, Apweiler R, Wakelam MJ, Vizcaíno JA",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust",39.0,United Kingdom +23750084,InDiaMed,0.99003005,InDiaMed,0.99003005,,0,1,http://www.indiamed.info,"HTTPConnectionPool(host='www.indiamed.info', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220124052150/https://indiamed.info/,2013-04-13,"National Institute of Pharmaceutical Education and Research, Department of Pharmacology, Hyderabad, Andhra Pradesh, India - 500037 ; Authors Equally Contributed.","Tota K, Rayabarapu N, Moosa S, Talla V, Bhyravbhatla B, Rao S",,,4.0,India +23793747,Linc2GO,0.995874822,Linc2GO,0.995874822,,0,1,http://www.bioinfo.tsinghua.edu.cn,"HTTPConnectionPool(host='www.bioinfo.tsinghua.edu.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160306131038/http://www.bioinfo.tsinghua.edu.cn:80/,2013-06-22,"MOE Key Laboratory of Bioinformatics, State Key Laboratory of Biomembrane and Membrane Biotechnology, School of Life Sciences, Tsinghua University, Beijing, China.","Liu K, Yan Z, Li Y, Sun Z",,,63.0,China +23837716,KONAGAbase,0.980475366,KONAGAbase,0.980475366,,0,1,http://dbm.dna.affrc.go.jp/px,301,,"(36.2000,140.1000)",http://web.archive.org/web/20220615113139/http://dbm.dna.affrc.go.jp/px/,2013-07-09,"National Institute of Agrobiological Sciences, Tsukuba 305-8634, Japan. joraku@affrc.go.jp","Jouraku A, Yamamoto K, Kuwazaki S, Urio M, Suetsugu Y, Narukawa J, Miyamoto K, Kurita K, Kanamori H, Katayose Y, Matsumoto T, Noda H",,,25.0,Japan +23846593,lncRNome,0.995862365,lncRNome,0.995862365,,0,1,http://genome.igib.res.in/lncRNome,301,,"(28.6453,77.2128)",http://web.archive.org/web/20220331034830/http://genome.igib.res.in/lncRNome/,2013-07-11,"GN Ramachandran Knowledge Center for Genome Informatics, CSIR Institute of Genomics and Integrative Biology, Mall Road, Delhi 110007, India.","Bhartiya D, Pal K, Ghosh S, Kapoor S, Jalali S, Panwar B, Jain S, Sati S, Sengupta S, Sachidanandan C, Raghava GP, Sivasubbu S, Scaria V",,,73.0,India +23874394,LifeMap Discovery,0.972304722,LifeMap Discovery,0.972304722,,0,1,http://discovery.lifemapsc.com,301,,"(37.3058,-78.5462)",http://web.archive.org/web/20221103233038/https://discovery.lifemapsc.com/,2013-07-17,"LifeMap Sciences LTD, Tel Aviv, Israel. re@lifemapsc.com","Edgar R, Mazor Y, Rinon A, Blumenthal J, Golan Y, Buzhor E, Livnat I, Ben-Ari S, Lieder I, Shitrit A, Gilboa Y, Ben-Yehudah A, Edri O, Shraga N, Bogoch Y, Leshansky L, Aharoni S, West MD, Warshawsky D, Shtrichman R",,,53.0,Israel +23991755,IIMDB,0.996076167,IIMDB,0.996076167,In Vivo/In Silico Metabolites Database,0.911021487,1,http://metabolomics.pharm.uconn.edu/iimdb,"HTTPConnectionPool(host='metabolomics.pharm.uconn.edu', port=80): Max retries exceeded with url: /iimdb (Caused by ConnectTimeoutError(, 'Connection to metabolomics.pharm.uconn.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20180930125835/http://metabolomics.pharm.uconn.edu:80/iimdb,2013-09-12,"Department of Pharmaceutical Sciences, University of Connecticut , 69 North Eagleville Road, Storrs, Connecticut 06269, United States.","Menikarachchi LC, Hill DW, Hamdalla MA, Mandoiu II, Grant DF",,"NIGMS NIH HHS, NIGMS NIH HHS",14.0,United States +23998809,LongevityMap,0.995389521,LongevityMap,0.995389521,,0,1,http://genomics.senescence.info/longevity,301,,"(42.5467,-83.2113)",http://web.archive.org/web/20221001125556/https://genomics.senescence.info/longevity/,2013-08-30,"The Shraga Segal Department of Microbiology, Immunology and Genetics, Center for Multidisciplinary Research on Aging, Ben-Gurion University of the Negev, Beer-Sheva 84105, Israel; Judea Regional Research and Development Center, Carmel 90404, Israel.","Budovsky A, Craig T, Wang J, Tacutu R, Csordas A, Lourenço J, Fraifeld VE, de Magalhães JP",,Wellcome Trust,32.0,"Israel, Israel" +24001185,inTB,0.991402507,inTB,0.991402507,,0,1,http://www.evocell.org/inTB,200,,"(39.0997,-94.5786)",http://web.archive.org/web/20210518092130/http://www.evocell.org/inTB,2013-08-30,"Instituto Gulbenkian de Ciência, Rua da Quinta Grande 6, Apartado 14, Oeiras P-2781-901, Portugal. jleal@igc.gulbenkian.pt.","Soares P, Alves RJ, Abecasis AB, Penha-Gonçalves C, Gomes MG, Pereira-Leal JB",,,4.0,Portugal +24002112,M2SG,0.962027684,M2SG,0.962027684,Mendelian Inheritance in Man,0.855303322,1,http://prodata.swmed.edu/M2S/mut2seq.cgi,200,,"(32.8252,-96.8388)",http://web.archive.org/web/20220128123548/http://prodata.swmed.edu/M2S/mut2seq.cgi,2013-09-03,"Departments of biophysics and biochemistry, University of Texas Southwestern Medical Center and Howard Hughes Medical Institute, University of Texas Southwestern Medical Center, Dallas, TX 75390-9050, USA.","Ji R, Cong Q, Li W, Grishin NV",,"NIGMS NIH HHS, NIGMS NIH HHS, Howard Hughes Medical Institute",1.0,United States +24030781,IQdb,0.993846714,IQdb,0.993846714,,0,1,http://IQdb.cbi.pku.edu.cn,"HTTPConnectionPool(host='iqdb.cbi.pku.edu.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20150122215647/http://iqdb.cbi.pku.edu.cn:80/,2013-09-11,"Center for Bioinformatics, State Key Laboratory of Protein and Plant Gene Research, College of Life Sciences, Peking University, Beijing 100871, PR China.","Kong L, Cheng L, Fan LY, Zhao M, Qu H",,,12.0,China +24143056,MAGICdb,0.998173594,MAGICdb,0.998173594,Mango Genetic stocks,0.772508487,1,http://www.tnaugenomics.com/mango/index.php,"HTTPConnectionPool(host='www.tnaugenomics.com', port=80): Max retries exceeded with url: /mango/index.php (Caused by ReadTimeoutError(""HTTPConnectionPool(host='www.tnaugenomics.com', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220617044428/http://www.tnaugenomics.com/mango/index.php,2013-09-23,"Department of Plant Molecular Biology & Bioinformatics, TamilNadu Agricultural University, Coimbatore, Tamil Nadu - 641003.","Asaithambi D, Natesan S, Venkatesan V, Muthuraja R, Muthusamy K, Vinayagam P",,,0.0, +24150937,LoQAtE,0.994121492,LoQAtE,0.994121492,Localizaiton and Quantitation Atlas of the yeast proteomE,0.870577331,1,http://www.weizmann.ac.il/molgen/loqate,301,,"(31.8942,34.8120)",http://web.archive.org/web/20220802111302/https://www.weizmann.ac.il/molgen/loqate/,2013-10-22,"Department of Molecular Genetics, Weizmann Institute of Science, Rehovot 7610001, Israel and Whitehead Institute for Biomedical Research, Nine Cambridge Center, Cambridge, MA 02142, USA.","Breker M, Gymrek M, Moldavski O, Schuldiner M",,"European Research Council, European Research Council",40.0,"Israel, United States" +24194591,LenVarDB,0.996493101,LenVarDB,0.996493101,Protein Alignments organized as Structural Superfamilies,0.726639379,1,http://caps.ncbs.res.in/lenvardb,301,,"(13.0637,77.5674)",http://web.archive.org/web/20220121131241/http://caps.ncbs.res.in/lenvardb/,2013-11-04,"International Institute of Information Technology-Hyderabad, Gachibowli, Hyderabad 500032, Andhra Pradesh, India, National Centre for Biological Sciences (TIFR), UAS-GKVK Campus, Bellary Road, Bangalore 560065, Karnataka, India and SASTRA University, Tirumalaisamudram, Thanjavur 613401, Tamil Nadu, India.","Mutt E, Mathew OK, Sowdhamini R",,,3.0,"India, India, India" +"24194598, 26531826, 29140473, 31701148",JASPAR,0.993904173,JASPAR,0.993904173,,0,4,http://jaspar.genereg.net,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20221101074047/https://jaspar.genereg.net/,2020-01-01,"Department of Medical Genetics, Centre for Molecular Medicine and Therapeutics at the Child and Family Research Institute, University of British Columbia, Vancouver, BC, Canada, Department of Biology and Biotech Research and Innovation Centre, The Bioinformatics Centre, Copenhagen University, Ole Maaloes Vej 5, DK-2200, Denmark, Lineberger Comprehensive Cancer Center, University of North Carolina, Chapel Hill, NC 27599, USA, Laboratoire Physiologie Cellulaire & Végétale, Université Grenoble Alpes, CNRS, CEA, iRTSV, INRA, 38054 Grenoble, France, Computational Regulatory Genomics, MRC Clinical Sciences Centre, Imperial College London, Du Cane Road, London W12 0NN, UK, and Department of Informatics, University of Bergen, Thormøhlensgate 55, N-5008 Bergen, Norway., Centre for Molecular Medicine and Therapeutics at the Child and Family Research Institute, Department of Medical Genetics, University of British Columbia, Vancouver, V5Z 4H4, BC, Canada., Centre for Molecular Medicine Norway (NCMM), Nordic EMBL Partnership, University of Oslo, 0318 Oslo, Norway., Centre for Molecular Medicine and Therapeutics, Department of Medical Genetics, BC Children's Hospital Research Institute, University of British Columbia, 950 W 28th Ave, Vancouver, BC V5Z 4H4, Canada.","Mathelier A, Zhao X, Zhang AW, Parcy F, Worsley-Hunt R, Arenillas DJ, Buchman S, Chen CY, Chou A, Ienasescu H, Lim J, Shyr C, Tan G, Zhou M, Lenhard B, Sandelin A, Wasserman WW, Mathelier A, Fornes O, Arenillas DJ, Chen CY, Denay G, Lee J, Shi W, Shyr C, Tan G, Worsley-Hunt R, Zhang AW, Parcy F, Lenhard B, Sandelin A, Wasserman WW, Khan A, Fornes O, Stigliani A, Gheorghe M, Castro-Mondragon JA, van der Lee R, Bessy A, Chèneby J, Kulkarni SR, Tan G, Baranasic D, Arenillas DJ, Sandelin A, Vandepoele K, Lenhard B, Ballester B, Wasserman WW, Parcy F, Mathelier A, Fornes O, Castro-Mondragon JA, Khan A, van der Lee R, Zhang X, Richmond PA, Modi BP, Correard S, Gheorghe M, BaranaÅ¡ić D, Santana-Garcia W, Tan G, Chèneby J, Ballester B, Parcy F, Sandelin A, Lenhard B, Wasserman WW, Mathelier A",", , , ","NIGMS NIH HHS, Lundbeck Foundation, NIGMS NIH HHS, Medical Research Council, European Research Council, Novo Nordisk Fonden, Canadian Institutes of Health Research, Medical Research Council, Biotechnology and Biological Sciences Research Council, Medical Research Council, Medical Research Council, Lundbeck Foundation, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Canadian Institutes of Health Research, Genome Canada, Norwegian Research Council, Genome British Columbia, Norwegian Research Council, Medical Research Council, The Danish Cancer Society, Helse Sør-Øst, French National Agency for Research, Biotechnology and Biological Sciences Research Council, Canadian Institutes of Health Research, Medical Research Council, Medical Research Council, Genome British Columbia, Weston Brain Institute, Dutch Research Council (NWO), Michael Smith Foundation for Health Research, Genome British Columbia, Norwegian Cancer Society, French National Agency for Research, Natural Sciences and Engineering Research Council of Canada, BC Children's Hospital Foundation and Research Institute, University of Oslo",2269.0,"Canada, Canada, Canada, Denmark, France, Norway, Norway, Norway, United States" +"24203342, 33125652",iRefWeb,0.995616853,iRefWeb,0.995616853,,0,2,http://wodaklab.org/iRefWeb,"HTTPConnectionPool(host='wodaklab.org', port=80): Max retries exceeded with url: /iRefWeb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20220324140910/http://wodaklab.org/iRefWeb/,2021-01-01,"Molecular Structure and Function program, Hospital for Sick Children, Toronto, ON, Canada., Centre for Computational Medicine, Hospital for Sick Children, Toronto, ON, Canada. turinsky@sickkids.ca.","Turinsky AL, Razick S, Turner B, Donaldson IM, Wodak SJ, Turinsky AL, Dupont S, Botzki A, Razick S, Turner B, Donaldson IM, Wodak SJ",", ","Canadian Institutes of Health Research, CIHR",12.0,"Canada, Canada" +24243842,LPSN,0.997364774,LPSN,0.997364774,List of Prokaryotic Names with Standing in Nomenclature,0.985107541,1,http://www.bacterio.net,301,,"(50.9787,11.0328)",http://web.archive.org/web/20221105000759/https://www.bacterio.net/,2013-11-15,"LPSN, 261 Willis Road, Sudbury, MA 01776, USA.",Parte AC,,,414.0,United States +24253300,InvFEST,0.995598376,InvFEST,0.995598376,,0,1,http://invfestdb.uab.cat,200,,"(41.4911,2.1408)",http://web.archive.org/web/20220218153349/http://invfestdb.uab.cat/,2013-11-18,"Institut de Biotecnologia i de Biomedicina, Universitat Autònoma de Barcelona, Bellaterra, Barcelona, Spain, Departament de Genètica i de Microbiologia, Universitat Autònoma de Barcelona, Bellaterra, Barcelona, Spain and Institució Catalana de Recerca i Estudis Avançats (ICREA), Barcelona, Spain.","Martínez-Fundichely A, Casillas S, Egea R, Ràmia M, Barbadilla A, Pantano L, Puig M, Cáceres M",,European Research Council,27.0,"Spain, Spain, Spain" +24297255,iPfam,0.995806396,iPfam,0.995806396,,0,1,http://ipfam.org,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20200630140910/http://ipfam.org:80/,2013-12-01,"HHMI Janelia Farm Research Campus, 19700 Helix Drive, Ashburn VA 20147 USA and European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Finn RD, Miller BL, Clements J, Bateman A",,Howard Hughes Medical Institute,81.0,United States +24324765,INDIGO,0.95893389,INDIGO,0.95893389,Annotation of Microbial Genomes,0.724162723,1,http://www.cbrc.kaust.edu.sa/indigo,302,,"(37.5331,-122.2486)",no_wayback,2013-12-06,"Computational Bioscience Research Center (CBRC), King Abdullah University of Science and Technology (KAUST), Thuwal, Kingdom of Saudi Arabia.","Alam I, Antunes A, Kamau AA, Ba Alawi W, Kalkatawi M, Stingl U, Bajic VB",,,52.0,Saudi Arabia +24507667,Kassiopeia,0.946248412,Kassiopeia,0.946248412,,0,1,http://www.motorprotein.de/kassiopeia,301,,"(51.5344,9.9323)",http://web.archive.org/web/20180108104658/http://www.motorprotein.de:80/kassiopeia,2014-02-10,None,"Hatje K, Kollmar M",,,7.0, +24525374,lncRNAMap,0.98099345,lncRNAMap,0.98099345,,0,1,http://lncRNAMap.mbc.nctu.edu.tw,"HTTPConnectionPool(host='lncrnamap.mbc.nctu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-01-23,"Biomedical Informatics, Asia University, Taichung, Taiwan; Epigenome Research Center, China Medical University Hospital, Taichung, Taiwan; School of Medicine, China Medical University, Taichung, Taiwan; Institute of Bioinformatics and Systems Biology, National Chiao Tung University, Hsin-Chu, Taiwan.","Chan WL, Huang HD, Chang JG",,"National Science Council of the Republic of China, National Science Council of the Republic of China, UST-UCSD International Center of Excellence in Advanced Bio-Engineering, National Science Council of the Republic of China, Asia University, MOE ATU, National Science Council of the Republic of China, UST-UCSD International Center of Excellence in Advanced Bio-Engineering, UST-UCSD International Center of Excellence in Advanced Bio-Engineering, National Science Council of the Republic of China",25.0,"China, China" +24558125,MANTRA,0.991984367,MANTRA,0.991984367,,0,1,http://mantra.tigem.it,301,,"(40.8439,14.0952)",http://web.archive.org/web/20221020072056/https://mantra.tigem.it/,2014-02-20,"Telethon Institute of Genetics and Medicine, Via P. Castellino 111, 80131 Naples, Interactive SRL, Via Fratelli Bisogno 5, 83100 Avellino, Dip. di Studi Aziendali e Quantitativi Università degli studi di Napoli 'Parthenope', Via Generale Parisi 13, 80132 Naples, Institute for High-Performance Computing - ICAR - CNR, Via P. Castellino 111, 80131 Naples and Department of Electrical Engineering and Information Technology, University of Naples 'Federico II', Via Claudio 21, 80125 Naples, Italy.","Carrella D, Napolitano F, Rispoli R, Miglietta M, Carissimo A, Cutillo L, Sirci F, Gregoretti F, Di Bernardo D",,Telethon,36.0,Italy +24813212,lncRNAtor,0.842248619,lncRNAtor,0.842248619,,0,1,http://lncrnator.ewha.ac.kr,"HTTPConnectionPool(host='lncrnator.ewha.ac.kr', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,no_wayback,2014-05-09,"Ewha Research Center for Systems Biology (ERCSB), Department of Life Science, Ewha Womans University, Seoul 120-750, Republic of Korea.","Park C, Yu N, Choi I, Kim W, Lee S",,,74.0, +"24885522, 27924020, 33219661",LincSNP,0.997198164,LincSNP,0.997198164,,0,3,http://bioinfo.hrbmu.edu.cn/LincSNP,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /LincSNP (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210417025829/http://bioinfo.hrbmu.edu.cn/LincSNP/,2021-01-01,"None, College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China., College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Ning S, Zhao Z, Ye J, Wang P, Zhi H, Li R, Wang T, Li X, Ning S, Yue M, Wang P, Liu Y, Zhi H, Zhang Y, Zhang J, Gao Y, Guo M, Zhou D, Li X, Li X, Gao Y, Li X, Shang S, Guo S, Wang P, Sun D, Gan J, Sun J, Zhang Y, Wang J, Wang X, Li X, Zhang Y, Ning S",", , ",", , National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Heilongjiang Touyan Innovation Team Program, Heilongjiang Provincial Natural Science Foundation",78.0,"China, China" +24926662,lnCeDB,0.99820447,lnCeDB,0.99820447,,0,1,http://gyanxet-beta.com/lncedb,"HTTPConnectionPool(host='gyanxet-beta.com', port=80): Max retries exceeded with url: /lncedb (Caused by ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')))",,,http://web.archive.org/web/20221016223944/https://gyanxet-beta.com/lncedb,2014-06-13,"Computational Biology Group, Indian Association for the Cultivation of Science, Kolkata, West Bengal, India.","Das S, Ghosal S, Sen R, Chakrabarti J",,,93.0,India +25058394,IthaGenes,0.993503034,IthaGenes,0.993503034,,0,1,http://www.ithanet.eu/db/ithagenes,"HTTPConnectionPool(host='www.ithanet.eu', port=80): Max retries exceeded with url: /db/ithagenes (Caused by ConnectTimeoutError(, 'Connection to www.ithanet.eu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220712063629/https://ithanet.eu/db/ithagenes,2014-07-24,"Molecular Genetics Thalassaemia, The Cyprus Institute of Neurology and Genetics, Nicosia, Cyprus.","Kountouris P, Lederer CW, Fanis P, Feleki X, Old J, Kleanthous M",,,58.0,"Cyprus, Cyprus" +25097383,IntergenicDB,0.977055252,IntergenicDB,0.977055252,,0,1,http://intergenicdb.bioinfoucs.com,200,,"(-23.5475,-46.6361)",http://web.archive.org/web/20220520051318/http://intergenicdb.bioinfoucs.com,2014-06-30,"Centro de Computação e Tecnologia da Informação, Universidade de Caxias do Sul, Rua Francisco Getúlio Vargas, 1130 - CEP 95070-560 - Caxias do Sul, Rio Grande do Sul, Brasil ; Instituto de Biotecnologia, Universidade de Caxias do Sul, Rua Francisco Getúlio Vargas, 1130 - CEP 95070-560 - Caxias do Sul, Rio Grande do Sul, Brasil.","Notari DL, Molin A, Davanzo V, Picolotto D, Ribeiro HG, Silva Sde A",,,0.0, +"25115331, 33247931",KiMoSys,0.998547196,KiMoSys,0.998547196,,0,2,http://kimosys.org,301,,"(38.7167,-9.1333)",http://web.archive.org/web/20221006020124/https://kimosys.org/,2020-11-01,"Instituto de Engenharia de Sistemas e Computadores, Investigacão e Desenvolvimento (INESC-ID), R Alves Redol 9, Lisboa, 1000-029, Portugal. rcosta@kdbio.inesc-id.pt., Departamento de Informática Faculdade de Ciências e Tecnologia, Universidade NOVA de Lisboa Campus de Caparica, 2829-516, Caparica, Portugal.","Costa RS, Veríssimo A, Vinga S, Mochão H, Barahona P, Costa RS",", ",", ",9.0,"Portugal, Portugal" +25125444,LeishMicrosatDB,0.997771621,LeishMicrosatDB,0.997771621,Leishmania Microsatellite Database,0.960049748,1,http://biomedinformri.com/leishmicrosat,404,,,http://web.archive.org/web/20211201125154/http://biomedinformri.com/leishmicrosat/,2014-08-14,"Biomedical Informatics Center and Department of Molecular Biology, Rajendra Memorial Research Institute of Medical Sciences, Patna 800007, India manasranjandikhit@gmail.com mrdikhit@icmr.org.in.","Dikhit MR, Moharana KC, Sahoo BR, Sahoo GC, Das P",,,1.0,India +25166490,isoMETLIN,0.988883018,isoMETLIN,0.988883018,,0,1,http://isometlin.scripps.edu,302,,"(32.8919,-117.2035)",http://web.archive.org/web/20221017020235/https://isometlin.scripps.edu/,2014-09-19,"Department of Chemistry, Washington University in St. Louis , St. Louis, Missouri 63130, United States.","Cho K, Mahieu N, Ivanisevic J, Uritboonthai W, Chen YJ, Siuzdak G, Patti GJ",,"NINDS NIH HHS, NIGMS NIH HHS, NIDA NIH HHS, NIDA NIH HHS",17.0,United States +25204646,Isotopo,0.985290567,Isotopo,0.985290567,,0,1,http://spp1316.uni-wuerzburg.de/bioinformatics/isotopo,301,,"(49.7939,9.9512)",no_wayback,2014-09-09,"Department of Bioinformatics, Biocenter, University of Würzburg, Am Hubland, 97074 Wuerzburg, Germany, Department of Neurobiology and Genetics, Biocenter, University of Wuerzburg, Am Hubland, 97074 Wuerzburg, Germany, Institute of Molecular and Translational Therapeutic Strategies, OE 8886, Hannover Medical School, Carl-Neuberg-Str. 1, D-30625 Hanover, Germany, Lehrstuhl für Biochemie, Center of Isotopologue Profiling, Lichtenbergstraße 4, Technische Universität München, D-85747 Garching, Germany, Division of Microbiology, Barbarastraße 11, Gebäude 36, University of Osnabrück, 49076 Osnabrück, Germany, Department of Bioinformatics and Biochemistry, Langer Kamp 19B, Technical University Braunschweig, D-38106 Braunschweig, Germany, Institute for Microbiology, Biozentrum, 2. Obergeschoss Spielmannstraße 7, Technical University Braunschweig, 38106 Braunschweig, Germany and Computational biology and structures program, European Molecular Biology Laboratory, Meyerhofstr. 1, 69117 Heidelberg, Germany Department of Bioinformatics, Biocenter, University of Würzburg, Am Hubland, 97074 Wuerzburg, Germany, Department of Neurobiology and Genetics, Biocenter, University of Wuerzburg, Am Hubland, 97074 Wuerzburg, Germany, Institute of Molecular and Translational Therapeutic Strategies, OE 8886, Hannover Medical School, Carl-Neuberg-Str. 1, D-30625 Hanover, Germany, Lehrstuhl für Biochemie, Center of Isotopologue Profiling, Lichtenbergstraße 4, Technische Universität München, D-85747 Garching, Germany, Division of Microbiology, Barbarastraße 11, Gebäude 36, University of Osnabrück, 49076 Osnabrück, Germany, Department of Bioinformatics and Biochemistry, Langer Kamp 19B, Technical University Braunschweig, D-38106 Braunschweig, Germany, Institute for Microbiology, Biozentrum, 2. Obergeschoss Spielmannstraße 7, Technical University Braunschweig, 38106 Braunschweig, Germany and Computational biology and structures program, European Molecular Biology Laboratory, Meye","Ahmed Z, Zeeshan S, Huber C, Hensel M, Schomburg D, Münch R, Eylert E, Eisenreich W, Dandekar T",,,8.0,"Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany" +25220766,LabeledIn,0.984398663,LabeledIn,0.984398663,,0,1,http://ftp.ncbi.nlm.nih.gov/pub/lu/LabeledIn,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221017005746/http://ftp.ncbi.nlm.nih.gov/pub/lu/LabeledIn/,2014-08-23,"National Center for Biotechnology Information (NCBI), U.S. National Institutes of Health, 8600 Rockville Pike, Bethesda, USA. Electronic address: ritu.khare@nih.gov.","Khare R, Li J, Lu Z",,Intramural NIH HHS,25.0,United States +25308527,LncEnvironmentDB,0.989758611,LncEnvironmentDB,0.989758611,,0,1,http://bioinfo.hrbmu.edu.cn/lncefdb,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /lncefdb (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2014-12-01,"College of Life Science, Jilin University, Changchun 130012, P. R. China. zhouhui@jlu.edu.cn.","Zhou M, Han L, Zhang J, Hao D, Cai Y, Wang Z, Zhou H, Sun J",,,5.0,China +25320561,Kazusa Marker DataBase,0.685222085,DataBase,0.645969987,Kazusa Marker DataBase,0.685222085,1,http://marker.kazusa.or.jp,200,,"(35.3343,139.4072)",http://web.archive.org/web/20221022170659/https://marker.kazusa.or.jp/,2014-09-01,"Department of Plant Genome Research, Kazusa DNA Research Institute , Kisarazu, Chiba 292-0818 , Japan.","Shirasawa K, Isobe S, Tabata S, Hirakawa H",,,9.0,Japan +25326331,ImmuCo,0.996915221,ImmuCo,0.996915221,,0,1,http://immuco.bjmu.edu.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220621045400/http://immuco.bjmu.edu.cn/,2014-10-17,"Department of Immunology, Key Laboratory of Medical Immunology, Ministry of Health, School of Basic Medical Sciences, Peking University Health Science Center, No. 38 Xueyuan Road, Beijing 100191, China Peking University Center for Human Disease Genomics, No. 38 Xueyuan Road, Beijing 100191, China wangpzh@bjmu.edu.cn.","Wang P, Qi H, Song S, Li S, Huang N, Han W, Ma D",,,17.0,"China, China" +25332394,lncRNAdb,0.996756196,lncRNAdb,0.996756196,,0,1,http://lncrnadb.org,200,,"(-33.8678,151.2073)",http://web.archive.org/web/20220815123422/http://lncrnadb.org/,2014-10-20,"Garvan Institute of Medical Research, 384 Victoria Street, Sydney, NSW 2010, Australia St Vincent's Clinical School, University of New South Wales, Sydney, NSW 2052, Australia.","Quek XC, Thomson DW, Maag JL, Bartonicek N, Signal B, Clark MB, Gloss BS, Dinger ME",,,266.0,"Australia, Australia" +25361973,KnotProt,0.995327413,KnotProt,0.995327413,,0,1,http://knotprot.cent.uw.edu.pl,301,,"(52.2298,21.0118)",http://web.archive.org/web/20220710220451/https://knotprot.cent.uw.edu.pl/,2014-10-31,"Faculty of Chemistry, University of Warsaw, Pasteura 1, 02-093 Warsaw, Poland.","Jamroz M, Niemyska W, Rawdon EJ, Stasiak A, Millett KC, Sułkowski P, Sulkowska JI",,,71.0,Poland +25378302,Islander,0.601636887,Islander,0.601636887,,0,1,http://bioinformatics.sandia.gov/islander,302,,"(35.0443,-106.6729)",http://web.archive.org/web/20180620131457/http://bioinformatics.sandia.gov:80/islander/,2014-11-05,"Sandia National Laboratories, Department of Systems Biology, Livermore, CA 94550, USA.","Hudson CM, Lau BY, Williams KP",,,36.0,United States +25388589,iPathCons,0.982198,iPathCons,0.982198,,0,1,http://ento.njau.edu.cn/ipath,"HTTPConnectionPool(host='ento.njau.edu.cn', port=80): Max retries exceeded with url: /ipath (Caused by ConnectTimeoutError(, 'Connection to ento.njau.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20151029011851/http://ento.njau.edu.cn:80/ipath/,2014-11-11,"Department of Entomology, College of Plant Protection, Nanjing Agricultural University and The Key laboratory of Monitoring and Management of Plant Diseases and Insects, Ministry of Agriculture, No. 1, Weigang Road, Xuanwu District, Nanjing, Jiangsu 210095, China.","Zhang Z, Yin C, Liu Y, Jie W, Lei W, Li F",,,7.0,China +25414382,Kin-driver,0.993340989,Kin-driver,0.993340989,,0,1,http://kin-driver.leloir.org.ar,200,,"(-34.6131,-58.3772)",http://web.archive.org/web/20200209171715/http://kin-driver.leloir.org.ar:80/,2014-11-19,"Fundación Instituto Leloir, Av. Patricias Argentinas 435. C1405BWE, Buenos Aires, Argentina, Pompeu Fabra University (UPF), Dept. de Tecnologies de la Informació i les Comunicacions. Tanger 122-140 08018, Barcelona, Spain, Computational Genomics Laboratory, Genetics Department, Institut de Biologia Universitat de Barcelona (IBUB), Facultat de Biologia, Av Diagonal 645 and Breakthrough Cancer Research Unit, Dexeus University Hospital, Sabino Arana 5-19, Barcelona, Spain.","Simonetti FL, Tornador C, Nabau-Moretó N, Molina-Vila MA, Marino-Buslje C",,,14.0,"Argentina, Spain, Spain" +25536965,MACE,0.964918494,MACE,0.964918494,,0,1,http://mace.sookmyung.ac.kr,"HTTPConnectionPool(host='mace.sookmyung.ac.kr', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to mace.sookmyung.ac.kr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221017165618/http://mace.sookmyung.ac.kr/,2014-12-22,"Center for Advanced Bioinformatics and Systems Medicine and Department of Biological Sciences, Sookmyung Women's University, Seoul 140-742, Republic of Korea.","Jeong E, He N, Park H, Song M, Kim N, Lee S, Yoon S",,,4.0, +25630312,LSE-Sign,0.994305208,LSE-Sign,0.994305208,,0,1,http://www.bcbl.eu/databases/lse,302,,"(43.3128,-1.9750)",no_wayback,2016-03-01,"Deafness, Cognition and Language Research Centre, University College London, London, UK.","Gutierrez-Sigut E, Costello B, Baus C, Carreiras M",,,7.0, +25707505,IIIDB,0.965002835,IIIDB,0.965002835,,0,1,http://syslab.nchu.edu.tw/IIIDB,"HTTPConnectionPool(host='syslab.nchu.edu.tw', port=80): Max retries exceeded with url: /IIIDB (Caused by ConnectTimeoutError(, 'Connection to syslab.nchu.edu.tw timed out. (connect timeout=5)'))",,,no_wayback,2015-01-21,None,"Tseng YT, Li W, Chen CH, Zhang S, Chen JJ, Zhou X, Liu CC",,"NIGMS NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS",14.0, +25725059,LocSigDB,0.997438669,LocSigDB,0.997438669,,0,1,http://genome.unmc.edu/LocSigDB,"HTTPConnectionPool(host='genome.unmc.edu', port=80): Max retries exceeded with url: /LocSigDB (Caused by ConnectTimeoutError(, 'Connection to genome.unmc.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200811014023/http://genome.unmc.edu/LocSigDB/,2015-02-27,"Department of Genetics, Cell Biology and Anatomy, Bioinformatics and Systems Biology Core, Department of Biochemistry and Molecular Biology, Fred and Pamela Buffet Cancer Center and Eppley Institute for Research in Cancer and Allied Diseases, University of Nebraska Medical Center, Omaha, NE 68198, USA.","Negi S, Pandey S, Srinivasan SM, Mohammed A, Guda C",,,20.0,United States +25776024,LMPID,0.998238027,LMPID,0.998238027,Linear Motif mediated Protein Interaction Database,0.925727314,1,http://bicresources.jcbose,"HTTPConnectionPool(host='bicresources.jcbose', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2015-03-16,"Bioinformatics Centre, Bose Institute, Kolkata, India.","Sarkar D, Jana T, Saha S",,,5.0,India +25819075,LOTUS-DB,0.744012758,LOTUS-DB,0.744012758,,0,1,http://lotus-db.wbgcas.cn,"HTTPConnectionPool(host='lotus-db.wbgcas.cn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to lotus-db.wbgcas.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210413005611/http://lotus-db.wbgcas.cn/,2015-03-27,"Key Laboratory of Plant Germplasm Enhancement and Speciality Agriculture, Wuhan Botanical Garden, Chinese Academy of Sciences, Wuhan, China and University of Chinese Academy of Sciences, Beijing, China.","Wang K, Deng J, Damaris RN, Yang M, Xu L, Yang P",,,11.0,"China, China" +25848172,ISOB,0.992247462,ISOB,0.992247462,,0,1,http://www.snakebd.com,200,,"(42.7325,-84.5555)",http://web.archive.org/web/20220520182049/http://snakebd.com/,2015-02-28,"Department of Genetic Engineering and Biotechnology, University of Rajshahi, Rajshahi-6205, Bangladesh.","Roly ZY, Hakim MA, Zahan AS, Hossain MM, Reza MA",,,7.0,Bangladesh +25907632,KM-parkin-DB,0.96272862,KM-parkin-DB,0.96272862,,0,1,http://mutview.dmb.med.keio.ac.jp,302,,"(35.6895,139.6917)",http://web.archive.org/web/20181103021701/http://mutview.dmb.med.keio.ac.jp:80/,2015-06-03,"Laboratory of Gene Medicine, Keio University School of Medicine.","Mitsuyama S, Ohtsubo M, Minoshima S, Shimizu N",,,5.0, +25988315,ImmuSort,0.995690107,ImmuSort,0.995690107,,0,1,http://202.85.212.211/Account/ImmuSort.html,"HTTPConnectionPool(host='202.85.212.211', port=80): Max retries exceeded with url: /Account/ImmuSort.html (Caused by ConnectTimeoutError(, 'Connection to 202.85.212.211 timed out. (connect timeout=5)'))",,,no_wayback,2015-05-19,"1] Department of Immunology, School of Basic Medical Sciences, Peking University Health Science Center. Key Laboratory of Medical Immunology, Ministry of Health, China. Peking University Center for Human Disease Genomics. No. 38 Xueyuan Road, Beijing, 100191, China [2] Chinese National Human Genome Center at Beijing. No. 3-707 North YongChang Road, BDA, Beijing, 100176, China.","Wang P, Yang Y, Han W, Ma D",,,16.0,"China, China, China" +"26173699, 27903896",IMG-ABC,0.986837733,IMG-ABC,0.986837733,Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters,0.984186777,2,http://img.jgi.doe.gov/abc,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20220915015652/https://img.jgi.doe.gov/abc/,2016-11-29,"Prokaryotic Super Program, DOE Joint Genome Institute, Walnut Creek, California, USA., Microbial Genome and Metagenome Program, Department of Energy Joint Genome Institute, Walnut Creek, CA 94598, USA michalis@lbl.gov.","Hadjithomas M, Chen IM, Chu K, Ratner A, Palaniappan K, Szeto E, Huang J, Reddy TB, Cimermančič P, Fischbach MA, Ivanova NN, Markowitz VM, Kyrpides NC, Pati A, Hadjithomas M, Chen IA, Chu K, Huang J, Ratner A, Palaniappan K, Andersen E, Markowitz V, Kyrpides NC, Ivanova NN",", ","NIGMS NIH HHS, ",70.0,"United States, United States" +26248563,ITS2,0.728715201,ITS2,0.728715201,,0,1,http://its2.bioapps.biozentrum.uni-wuerzburg.de,200,,"(49.7939,9.9512)",http://web.archive.org/web/20220526172357/http://its2.bioapps.biozentrum.uni-wuerzburg.de/,2015-08-06,"Department of Animal Ecology and Tropical Biology, Julius Maximilian University, Würzburg, Germany.","Ankenbrand MJ, Keller A, Wolf M, Schultz J, Förster F",,,66.0,Germany +26362267,ImmuNet,0.993981779,ImmuNet,0.993981779,,0,1,http://immunet.princeton.edu,301,,"(40.3487,-74.6590)",http://web.archive.org/web/20221017070011/https://immunet.princeton.edu/,2015-09-08,"Lewis-Sigler Institute for Integrative Genomics, Princeton University, Princeton, NJ 08544, USA.","Gorenshteyn D, Zaslavsky E, Fribourg M, Park CY, Wong AK, Tadych A, Hartmann BM, Albrecht RA, García-Sastre A, Kleinstein SH, Troyanskaya OG, Sealfon SC",,"NIAID NIH HHS, NIMH NIH HHS, NIGMS NIH HHS, NIAID NIH HHS, NIAID NIH HHS",32.0,United States +26363021,LncReg,0.997745812,LncReg,0.997745812,,0,1,http://bioinformatics.ustc.edu.cn/lncreg,"HTTPConnectionPool(host='bioinformatics.ustc.edu.cn', port=80): Max retries exceeded with url: /lncreg (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200706222024/http://bioinformatics.ustc.edu.cn/lncreg/,2015-09-10,"School of Information Science and Technology, Centers for Biomedical Engineering and.","Zhou Z, Shen Y, Khan MR, Li A",,,20.0, +26376976,KGCAK,0.998066127,KGCAK,0.998066127,,0,1,http://kgcak.big.ac.cn/KGCAK,"HTTPConnectionPool(host='kgcak.big.ac.cn', port=80): Max retries exceeded with url: /KGCAK (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160308052658/http://kgcak.big.ac.cn:80/KGCAK/,2015-09-16,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing, 100101, PR China. dapeng.wang@ucl.ac.uk.","Wang D, Xu J, Yu J",,,2.0,China +26424080,JRC GMO-Amplicons,0.748133315,JRC GMO-Amplicons,0.748133315,,0,1,http://gmo-crl.jrc.ec.europa.eu/jrcgmoamplicons,302,,"(37.3824,-5.9761)",http://web.archive.org/web/20221104124123/https://gmo-crl.jrc.ec.europa.eu/jrcgmoamplicons/,2015-09-30,"Molecular Biology and Genomics Unit, Joint Research Centre, European Commission, Ispra, Italy mauro.petrillo@ec.europa.eu.","Petrillo M, Angers-Loustau A, Henriksson P, Bonfini L, Patak A, Kreysa J",,,7.0,Italy +26432833,iPPI-DB,0.996882915,iPPI-DB,0.996882915,,0,1,http://www.ippidb.cdithem.fr,"HTTPConnectionPool(host='www.ippidb.cdithem.fr', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20191222145428/http://www.ippidb.cdithem.fr:80/,2015-10-01,"Université Paris Diderot, Sorbonne Paris Cité, Molécules Thérapeutiques, In Silico, INSERM UMR-S 973, Paris, France INSERM, U973, Paris, France.","Labbé CM, Kuenemann MA, Zarzycka B, Vriend G, Nicolaes GA, Lagorce D, Miteva MA, Villoutreix BO, Sperandio O",,,18.0,"France, France" +26444974,ListeriaBase,0.993109643,ListeriaBase,0.993109643,,0,1,http://listeria.um.edu.my,"HTTPConnectionPool(host='listeria.um.edu.my', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to listeria.um.edu.my timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20161101132517/http://listeria.um.edu.my:80/,2015-10-06,"Genome Informatics Research Laboratory, High Impact Research Building, University of Malaya, Kuala Lumpur, 50603, Malaysia. muifern007@gmail.com.","Tan MF, Siow CC, Dutta A, Mutha NV, Wee WY, Heydari H, Tan SY, Ang MY, Wong GJ, Choo SW",,"Universiti Malaya (MY) UM Research Grant (UMRG), Universiti Malaya (MY) High Impact Research Grant UM-MOHE",4.0,Malaysia +"26516188, 30407591",IID,0.992068827,IID,0.992068827,Integrated Interactions Database,0.880897582,2,http://ophid.utoronto.ca/iid,301,,"(43.7001,-79.4163)",http://web.archive.org/web/20220620235926/http://ophid.utoronto.ca/iid/,2019-01-01,"Princess Margaret Cancer Centre, University Health Network, Toronto, ON, M5G 1L7, Canada., Krembil Research Institute, University Health Network, Toronto, ON M5T 0S8, Canada.","Kotlyar M, Pastrello C, Sheahan N, Jurisica I, Kotlyar M, Pastrello C, Malik Z, Jurisica I",", ",", Natural Sciences Research Council, Krembil Foundation, Krembil Foundation, Canada Research Chairs, Canada Foundation for Innovation, Canada Foundation for Innovation, Canada Foundation for Innovation, Canada Research Chairs",143.0,"Canada, Canada" +26519469,JuncDB,0.996719241,JuncDB,0.996719241,,0,1,http://juncdb.carmelab.huji.ac.il,"HTTPConnectionPool(host='juncdb.carmelab.huji.ac.il', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to juncdb.carmelab.huji.ac.il timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210118132340/http://juncdb.carmelab.huji.ac.il/,2015-10-30,"Department of Genetics, The Alexander Silberman Institute of Life Sciences, Faculty of Science, The Hebrew University of Jerusalem, Edmond J. Safra Campus, Givat Ram, Jerusalem 91904, Israel The Rachel and Selim Benin School of Computer Science and Engineering, The Hebrew University of Jerusalem, Edmond J. Safra Campus, Jerusalem 91904, Israel.","Chorev M, Guy L, Carmel L",,,5.0,"Benin, Israel, Israel" +26546515,LIS,0.903893352,LIS,0.903893352,Legume Information System,0.759577766,1,"http://legumeinfo.org, http://legumefederation.org","301, 301",,"(37.7621,-122.3971), (35.6219,-105.8688)","http://web.archive.org/web/20221016212241/https://www.legumeinfo.org/, http://web.archive.org/web/20180831170135/https://legumefederation.org/",2015-11-05,"National Center for Genome Resources, Santa Fe, NM 87505, USA.","Dash S, Campbell JD, Cannon EK, Cleary AM, Huang W, Kalberer SR, Karingula V, Rice AG, Singh J, Umale PE, Weeks NT, Wilkey AP, Farmer AD, Cannon SB",,,48.0,United States +26578584,InsectBase,0.997729719,InsectBase,0.997729719,,0,1,http://www.insect-genome.com,200,,"(30.2936,120.1614)",http://web.archive.org/web/20221014104314/http://insect-genome.com/,2015-11-17,"Ministry of Agriculture, Key Lab of Agricultural Entomology and Institute of Insect Sciences, Zhejiang University, 866 Yuhangtang Road, Hangzhou 310058, China Department of Entomology, Nanjing Agricultural University, Nanjing 210095, China.","Yin C, Shen G, Guo D, Wang S, Ma X, Xiao H, Liu J, Zhang Z, Liu Y, Zhang Y, Yu K, Huang S, Li F",,,58.0,"China, China" +26582920,iGNM,0.993238688,iGNM,0.993238688,,0,1,http://gnmdb.csb.pitt.edu,200,,"(40.4406,-79.9959)",http://web.archive.org/web/20220716094850/http://gnmdb.csb.pitt.edu/,2015-11-17,"Department of Computational and Systems Biology, School of Medicine, University of Pittsburgh, PA 15213, USA.","Li H, Chang YY, Yang LW, Bahar I",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",34.0,United States +26653323,iTAP,0.940019906,iTAP,0.940019906,,0,1,http://sites.google.com/a/vt.edu/biomolecular-engineering-lab/software,302,,"(34.0522,-118.2437)",no_wayback,2015-12-12,"Department of Biological Systems Engineering, Virginia Tech, Blacksburg, VA, 24061, USA. sniveda5@vt.edu.","Sundararaman N, Ash C, Guo W, Button R, Singh J, Feng X",,"National Science Foundation, Virginia Polytechnic Institute and State University",2.0,United States +26656885,KIR,0.991020302,KIR,0.991020302,Kiwifruit Information Resource,0.94024086,1,http://bdg.hfut.edu.cn/kir/index.html,"HTTPConnectionPool(host='bdg.hfut.edu.cn', port=80): Max retries exceeded with url: /kir/index.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160322013416/http://bdg.hfut.edu.cn:80/kir/index.html,2015-12-09,"School of Biotechnology and Food Engineering, Hefei University of Technology, Hefei 230009, China.","Yue J, Liu J, Ban R, Tang W, Deng L, Fei Z, Liu Y",,,7.0,China +26708986,LymPHOS,0.99777627,LymPHOS,0.99777627,,0,1,http://www.lymphos.org,"HTTPConnectionPool(host='www.lymphos.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20220813064741/https://www.lymphos.org/,2015-12-26,"CSIC/UAB Proteomics Laboratory, Instituto De Investigaciones Biomédicas De Barcelona-Consejo Superior De Investigaciones Científicas (IIBB-CSIC/IDIBAPS), Rosellón 161 6a Planta, Barcelona E-08036, Spain.","Nguyen TD, Vidal-Cortes O, Gallardo O, Abian J, Carrascal M",,,4.0,Spain +26787663,LnCaNet,0.995760918,LnCaNet,0.995760918,,0,1,http://lncanet.bioinfo-minzhao.org/Contact,406,,,no_wayback,2016-01-18,"Faculty of Science, Health, Education and Engineering, University of the Sunshine Coast, Maroochydore DC, QLD 4558, Australia.","Liu Y, Zhao M",,,29.0,Australia +26912953,Mammalian Mitochondrial ncRNA Database,0.941759574,Unlabelled Mammalian Mitochondrial ncRNA,0.79049837,Mammalian Mitochondrial ncRNA Database,0.941759574,1,http://www.iitm.ac.in/bioinfo/mmndb,302,,"(13.0156,80.2467)",http://web.archive.org/web/20210805005308/https://www.iitm.ac.in/bioinfo/mmndb/,2015-11-30,"Department of Biotechnology, Bhupat and Jyoti Mehta School of Biosciences, Indian Institute of Technology Madras, Chennai 600 036, Tamil Nadu, India.","Anandakumar S, Vijayakumar S, Arumugam N, Gromiha MM",,,2.0,India +26944085,LNCat,0.995651901,LNCat,0.995651901,lncRNA,0.774182886,1,http://biocc.hrbmu.edu.cn/LNCat,"HTTPConnectionPool(host='biocc.hrbmu.edu.cn', port=80): Max retries exceeded with url: /LNCat (Caused by ProtocolError('Connection aborted.', BadStatusLine(' OK\r\n')))",,,http://web.archive.org/web/20200127054122/http://biocc.hrbmu.edu.cn:80/LNCat/,2017-03-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.","Xu J, Bai J, Zhang X, Lv Y, Gong Y, Liu L, Zhao H, Yu F, Ping Y, Zhang G, Lan Y, Xiao Y, Li X",,,54.0,China +26989151,KinetochoreDB,0.998664141,KinetochoreDB,0.998664141,,0,1,http://lightning.med.monash.edu/kinetochoreDB2,"HTTPConnectionPool(host='lightning.med.monash.edu', port=80): Max retries exceeded with url: /kinetochoreDB2 (Caused by ConnectTimeoutError(, 'Connection to lightning.med.monash.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160627005815/http://lightning.med.monash.edu:80/kinetochoreDB2/,2016-03-17,"Infection and Immunity Program, Biomedicine Discovery Institute and the Department of Biochemistry and Molecular Biology, Faculty of Medicine, Monash University, Melbourne, Victoria, 3800, Australia.","Li C, Androulakis S, Buckle AM, Song J",,,0.0,Australia +27081555,iJGVD,0.988222253,iJGVD,0.988222253,integrative Japanese Genome Variation Database,0.978778683,1,http://ijgvd.megabank.tohoku.ac.jp,302,,"(38.2667,140.8667)",http://web.archive.org/web/20220215165448/https://ijgvd.megabank.tohoku.ac.jp/,2015-11-26,"Department of Integrative Genomics, Tohoku Medical Megabank Organization, Tohoku University, Sendai, Japan; Graduate School of Medicine, Tohoku University, Sendai, Japan.","Yamaguchi-Kabata Y, Nariai N, Kawai Y, Sato Y, Kojima K, Tateno M, Katsuoka F, Yasuda J, Yamamoto M, Nagasaki M",,,56.0,"Japan, Japan" +27087309,Kalium,0.988748372,Kalium,0.988748372,,0,1,http://kaliumdb.org,301,,"(55.7522,37.6156)",http://web.archive.org/web/20221004034125/https://kaliumdb.org/,2016-04-17,"Shemyakin-Ovchinnikov Institute of Bioorganic Chemistry, Russian Academy of Sciences, Moscow 117997, Russia.","Kuzmenkov AI, Krylov NA, Chugunov AO, Grishin EV, Vassilevski AA",,,16.0, +27153728,IsomiR Bank,0.97647199,IsomiR Bank,0.97647199,,0,1,http://mcg.ustc.edu.cn/bsc/isomir/Contacts,404,,,no_wayback,2016-03-02,"Molecular and Cell Genetics Laboratory, The CAS Key Laboratory of Innate Immunity and Chronic Diseases, Hefei National Laboratory for Physical Sciences at Microscale, School of Life Sciences, CAS Center for Excellence in Molecular Cell Science, University of Science and Technology of China, Collaborative Innovation Center of Genetics and Development, Collaborative Innovation Center for Cancer Medicine, Hefei 230027, Anhui, China.","Zhang Y, Zang Q, Xu B, Zheng W, Ban R, Zhang H, Yang Y, Hao Q, Iqbal F, Li A, Shi Q",,,26.0,"China, China" +27465544,IGDD,0.995845596,IGDD,0.995845596,Intronless Genes Database in Dicots,0.981970423,1,http://bio.njfu.edu.cn/igdd,"HTTPConnectionPool(host='bio.njfu.edu.cn', port=80): Max retries exceeded with url: /igdd (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,no_wayback,2016-07-27,"Key Laboratory of Forest Genetics & Biotechnology, Nanjing Forestry University, Nanjing, China.","Yan H, Dai X, Feng K, Ma Q, Yin T",,"China postdoctoral science foundation, Anhui provincial Natural Science Foundation, the Natural Science Foundation of China, the Innovative Research Team of the Educational Department of China, the PAPD (Priority Academic Program Development) program at Nanjing Forestry University, the National Basic Research Project",8.0,China +27484196,iLIR,0.989266038,iLIR,0.989266038,,0,1,http://ilir.warwick.ac.uk,301,,"(52.4066,-1.5122)",http://web.archive.org/web/20221020165738/https://ilir.warwick.ac.uk/,2016-08-02,"a School of Life Sciences, University of Warwick , Coventry , UK.","Jacomin AC, Samavedam S, Promponas V, Nezis IP",,Biotechnology and Biological Sciences Research Council,52.0, +27493549,LigandBox,0.997803092,LigandBox,0.997803092,,0,1,http://ligandbox.protein.osaka-u.ac.jp/ligandbox,"HTTPConnectionPool(host='ligandbox.protein.osaka-u.ac.jp', port=80): Max retries exceeded with url: /ligandbox (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-08-07,"Institute for Protein Research, Osaka University, 3-2, Yamadaoka, Suita, Osaka, 565-0871, Japan.","Kawabata T, Sugihara Y, Fukunishi Y, Nakamura H",,,11.0,Japan +27605101,LncVar,0.995780408,LncVar,0.995780408,,0,1,http://bioinfo.ibp.ac.cn/LncVar,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220615214735/http://bioinfo.ibp.ac.cn/LncVar/,2016-09-06,"CAS Key Laboratory of RNA Biology, Institute of Biophysics, Chinese Academy of Sciences.","Chen X, Hao Y, Cui Y, Fan Z, He S, Luo J, Chen R",,,15.0, +27630202,KDD,0.99066178,KDD,0.99066178,Kawasaki Disease Database,0.97202076,1,http://www.kawasakidisease.kr,200,,"(37.5660,126.9784)",no_wayback,2016-07-01,"Clinical Research Center, Asan Institute of Life Sciences, Asan Medical Center, Seoul, Korea Department of Biomedical Informatics, Asan Medical Center, Seoul, Korea.","Park YR, Kim JJ, Yoon YJ, Yoon YK, Koo HY, Hong YM, Jang GY, Shin SY, Lee JK, ",,,3.0, +"27638885, 31584097",IGSR,0.970552325,IGSR,0.970552325,International Genome Sample Resource,0.881040025,2,http://www.internationalgenome.org,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20221017222131/https://www.internationalgenome.org/,2020-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Clarke L, Fairley S, Zheng-Bradley X, Streeter I, Perry E, Lowy E, Tassé AM, Flicek P, Fairley S, Lowy-Gallego E, Perry E, Flicek P",", ","Wellcome Trust, Wellcome Trust, Wellcome Trust, European Molecular Biology Laboratory, NHGRI NIH HHS, Wellcome Trust, National Institutes of Health, Wellcome Trust",97.0, +27651464,LNCediting,0.996848762,LNCediting,0.996848762,,0,1,http://bioinfo.life.hust.edu.cn/LNCediting,200,,"(31.2222,121.4581)",http://web.archive.org/web/20221019174718/http://bioinfo.life.hust.edu.cn/LNCediting,2016-09-19,"Department of Bioinformatics and Systems Biology, Hubei Bioinformatics & Molecular Imaging Key Laboratory, Key Laboratory of Molecular Biophysics of the Ministry of Education, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, PR China.","Gong J, Liu C, Liu W, Xiang Y, Diao L, Guo AY, Han L",,,46.0,China +27789704,KERIS,0.998085916,KERIS,0.998085916,,0,1,http://www.igenomed.org/keris,301,,"(37.7621,-122.3971)",no_wayback,2016-10-26,"Department of Surgery, Massachusetts General Hospital, Harvard Medical School, Boston, MA 02114, USA.","Li P, Tompkins RG, Xiao W, ",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",3.0,United States +27794552,LinkProt,0.987855673,LinkProt,0.987855673,,0,1,http://linkprot.cent.uw.edu.pl,301,,"(52.2298,21.0118)",http://web.archive.org/web/20220327151421/https://linkprot.cent.uw.edu.pl/,2016-10-28,"Faculty of Chemistry, University of Warsaw, Pasteura 1, 02-093, Warsaw, Poland.","Dabrowski-Tumanski P, Jarmolinska AI, Niemyska W, Rawdon EJ, Millett KC, Sulkowska JI",,,13.0,Poland +27841751,IRRMC,0.994473457,IRRMC,0.994473457,Integrated Resource for Reproducibility in Macromolecular Crystallography,0.81369595,1,http://www.proteindiffraction.org,301,,"(38.0293,-78.4767)",http://web.archive.org/web/20221017013159/https://www.proteindiffraction.org/,2016-10-28,"Department of Molecular Physiology and Biological Physics, University of Virginia, Charlottesville, VA 22904, USA.","Grabowski M, Langner KM, Cymborowski M, Porebski PJ, Sroka P, Zheng H, Cooper DR, Zimmerman MD, Elsliger MA, Burley SK, Minor W",,"NHGRI NIH HHS, NIH HHS, National Institutes of Health, NIAID NIH HHS, NIGMS NIH HHS, National Institute of Allergy and Infectious Diseases",49.0,United States +27863956,IHEC,0.982326448,IHEC,0.982326448,,0,1,http://epigenomesportal.ca/ihec,301,,"(45.4001,-71.8991)",no_wayback,2016-11-15,"McGill University and Génome Québec Innovation Center, Montréal, QC H3A 0G1, Canada.","Bujold D, Morais DAL, Gauthier C, Côté C, Caron M, Kwan T, Chen KC, Laperle J, Markovits AN, Pastinen T, Caron B, Veilleux A, Jacques PÉ, Bourque G",,"Canadian Institutes of Health Research, CANARIE, Genome Québec, Calcul Québec, Genome Canada, Canadian Institutes of Health Research, Canadian Institutes of Health Research, Natural Sciences and Engineering Research Council of Canada, Compute Canada",66.0,Canada +27888793,InverPep,0.99586463,InverPep,0.99586463,,0,1,http://ciencias.medellin.unal.edu.co/gruposdeinvestigacion/prospeccionydisenobiomoleculas/InverPep/public/home_en,302,,"(6.2518,-75.5636)",http://web.archive.org/web/20220617174203/https://ciencias.medellin.unal.edu.co/gruposdeinvestigacion/prospeccionydisenobiomoleculas/InverPep/public/home_en,2016-11-19,"Research Group Biología Funcional, Escuela de Biociencias, Facultad de Ciencias, Universidad Nacional de Colombia sede Medellín, Calle 59A No. 63-20, Medellín, Colombia. Electronic address: eagomezc@unal.edu.co.","Gómez EA, Giraldo P, Orduz S",,Universidad Nacional de Colombia sede Medellín,16.0,"Colombia, Colombia" +27899604,IPD-MHC,0.969788027,IPD-MHC,0.969788027,,0,1,http://www.ebi.ac.uk/ipd/mhc,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20220527155536/https://www.ebi.ac.uk/ipd/mhc/,2016-11-28,"The Pirbright Institute, Pirbright, Woking, Surrey, GU24 0NF, UK.","Maccari G, Robinson J, Ballingall K, Guethlein LA, Grimholt U, Kaufman J, Ho CS, de Groot NG, Flicek P, Bontrop RE, Hammond JA, Marsh SG",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",59.0, +27899654,jPOSTrepo,0.978089273,jPOSTrepo,0.978089273,,0,1,http://repository.jpostdb.org,302,,"(35.1167,138.9167)",http://web.archive.org/web/20221005184719/https://repository.jpostdb.org/,2016-11-28,"Niigata University Graduate School of Medical and Dental Sciences, Niigata 951-8510, Japan okd@med.niigata-u.ac.jp.","Okuda S, Watanabe Y, Moriya Y, Kawano S, Yamamoto T, Matsumoto M, Takami T, Kobayashi D, Araki N, Yoshizawa AC, Tabata T, Sugiyama N, Goto S, Ishihama Y",,,125.0,Japan +27899675,JET2,0.992096901,JET2,0.992096901,,0,1,http://www.jet2viewer.upmc.fr,200,,"(48.8534,2.3488)",http://web.archive.org/web/20221017135550/http://www.jet2viewer.upmc.fr/,2016-11-28,"Sorbonne Universités, UPMC University Paris 06, CNRS, IBPS, UMR 7238, Laboratoire de Biologie Computationnelle et Quantitative (LCQB), 75005 Paris, France.","Ripoche H, Laine E, Ceres N, Carbone A",,,2.0,France +27924012,L1Base,0.945930322,L1Base,0.945930322,,0,1,http://l1base.charite.de,200,,"(52.5244,13.4105)",http://web.archive.org/web/20220319212505/http://l1base.charite.de/,2016-10-18,"Department of Radiology, Charité-Universitätsmedizin Berlin, Augustenburger Platz 1, 13353 Berlin, Germany.","Penzkofer T, Jäger M, Figlerowicz M, Badge R, Mundlos S, Robinson PN, Zemojtel T",,,47.0,Germany +27924046,Manteia,0.997795522,Manteia,0.997795522,,0,1,http://manteia.igbmc.fr,200,,"(48.5839,7.7455)",http://web.archive.org/web/20221110010713/http://manteia.igbmc.fr/,2016-10-24,"Institut de Génétique et de Biologie Moléculaire et Cellulaire (IGBMC), Inserm U964, France otassy@igbmc.fr.",Tassy O,,,0.0,France +27974320,Mammalian Metabolic Enzyme Database,0.845961971,,0,Mammalian Metabolic Enzyme Database,0.845961971,1,"http://hpcwebapps.cit.nih.gov/ESBL/Database/MetabolicEnzymes/MetabolicEnzymeDatabase.html, http://hpcwebapps.cit.nih.gov/ESBL/Database/MetabolicEnzymes","301, 301",,"(38.9807,-77.1003), (38.9807,-77.1003)","http://web.archive.org/web/20210322205224/https://hpcwebapps.cit.nih.gov/ESBL/Database/MetabolicEnzymes/MetabolicEnzymeDatabase.html, no_wayback",2016-12-14,"Epithelial Systems Biology Laboratory, National Heart, Lung, and Blood Institute, National Institutes of Health, Bethesda, Maryland; and.","Corcoran CC, Grady CR, Pisitkun T, Parulekar J, Knepper MA",,NHLBI Intramural Program,8.0, +28008948,Lotus Base,0.940765053,Lotus Base,0.940765053,,0,1,http://lotus.au.dk,302,,"(56.1567,10.2108)",http://web.archive.org/web/20221016215559/https://lotus.au.dk/,2016-12-23,"Department of Molecular Biology and Genetics, Aarhus University, Gustav Wieds Vej 10, DK-8000 Aarhus C, Denmark.","Mun T, Bachmann A, Gupta V, Stougaard J, Andersen SU",,,43.0,Denmark +28025339,iMITEdb,0.966844749,iMITEdb,0.966844749,,0,1,http://gene.cqu.edu.cn/iMITEdb,"HTTPConnectionPool(host='gene.cqu.edu.cn', port=80): Max retries exceeded with url: /iMITEdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 101] Network is unreachable'))",,,no_wayback,2016-12-26,"State Key Laboratory of Silkworm Genome Biology, Key Laboratory for Sericulture Functional Genomics and Biotechnology of Agricultural Ministry, Southwest University, Chongqing 400715, China.","Han MJ, Zhou QZ, Zhang HH, Tong X, Lu C, Zhang Z, Dai F",,,5.0,China +28124611,ImmunemiR,0.989842415,ImmunemiR,0.989842415,,0,1,http://www.biominingbu.org/immunemir,301,,"(11.0055,76.9661)",http://web.archive.org/web/20220407171953/http://www.biominingbu.org/immunemir/,2017-01-01,"Data Mining and Text Mining Laboratory, Department of Bioinformatics, Bharathiar University, Coimbatore, Tamil Nadu 641 046. India.","Prabahar A, Natarajan J",,,7.0,India +"28150246, 29145615",iPTMnet,0.997314811,iPTMnet,0.997314811,,0,2,http://proteininformationresource.org/iPTMnet,301,,"(39.6837,-75.7497)",http://web.archive.org/web/20221017012149/https://proteininformationresource.org/iPTMnet/,2018-01-01,"Department of Biochemistry and Molecular and Cellular Biology, Georgetown University Medical Center, 3300 Whitehaven Street NW, Suite 1200, Washington, DC, 20057, USA. ker25@georgetown.edu., Center for Bioinformatics and Computational Biology, University of Delaware, Newark, DE 19711, USA.","Ross KE, Huang H, Ren J, Arighi CN, Li G, Tudor CO, Lv M, Lee JY, Lee JY, Chen SC, Vijay-Shanker K, Wu CH, Huang H, Arighi CN, Ross KE, Ren J, Li G, Chen SC, Wang Q, Cowart J, Vijay-Shanker K, Wu CH",", ","NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",49.0,"United States, United States" +28168018,IRBAS,0.990202904,IRBAS,0.990202904,Intermittent River Biodiversity Analysis and Synthesis,0.962772516,1,http://irbas.cesab.org,302,,"(49.0094,8.4044)",http://web.archive.org/web/20191022175216/http://irbas.cesab.org:80/,2017-01-03,"IrsteaUR MALY, centre de Lyon-Villeurbanne Villeurbanne Cedex France; CESAB- FRB Immeuble Henri Poincaré Aix-en-Provence Cedex 3 France; Australian Rivers Institute and Griffith School of Environment Griffith University Nathan Qld Australia.","Leigh C, Laporte B, Bonada N, Fritz K, Pella H, Sauquet E, Tockner K, Datry T",,"French Foundation for Research & Biodiversity (FRB), IRBAS, Centre for Synthesis and Analysis of Biodiversity (CESAB), French National Agency for Water and Aquatic Environments (ONEMA)",0.0,"Australia, France, France" +28187703,iHMS,0.988777816,iHMS,0.988777816,,0,1,http://www.tongjidmb.com/human/index.html,404,,,http://web.archive.org/web/20180508060430/http://www.tongjidmb.com:80/human/index.html,2017-02-11,"School of Computer Science and Technology, Donghua University, Shanghai, China.","Gan Y, Tao H, Guan J, Zhou S",,"National Natural Science Foundation of China (CN), National Natural Science Foundation of China (CN), Shanghai Natural Science Foundation",2.0,China +28322240,ISCIEMEDS,0.977421165,ISCIEMEDS,0.977421165,Endocrine and Metabolic Extended Data Set,0.951809481,1,"http://commondataelements.ninds.nih.gov/SCI.aspx, http://www.iscos.org.uk/international-sci-data-sets","302, 302",,"(39.1626,-76.6247), (33.7283,-117.1464)","http://web.archive.org/web/20190108214552/https://commondataelements.ninds.nih.gov/SCI.aspx, http://web.archive.org/web/20220814120123/https://www.iscos.org.uk/international-sci-data-sets",2017-03-21,"Department of Veterans Affairs Rehabilitation Research & Development, National Center for the Medical Consequences of Spinal Cord Injury, James J Peters Veterans Affairs Medical Center, New York, NY, USA.","Bauman WA, Wecht JM, Biering-Sørensen F",,,4.0,United States +28416714,JingleBells,0.996701837,JingleBells,0.996701837,,0,1,http://jinglebells.bgu.ac.il,302,,"(31.2518,34.7913)",http://web.archive.org/web/20220902225038/https://jinglebells.bgu.ac.il/,2017-05-01,"Department of Life Sciences, Ben-Gurion University of the Negev, Beer-Sheva 8410501, Israel.","Ner-Gaon H, Melchior A, Golan N, Ben-Haim Y, Shay T",,,11.0,Israel +28592645,ISVdb,0.981565356,ISVdb,0.981565356,Inbred Strain Variant Database,0.950276415,1,http://isvdb.unc.edu,302,,"(35.9132,-79.0558)",http://web.archive.org/web/20220623073037/https://isvdb.unc.edu/,2017-06-07,"Curriculum in Bioinformatics and Computational Biology, University of North Carolina, Chapel Hill, North Carolina 27599-7265.","Oreper D, Cai Y, Tarantino LM, de Villena FP, Valdar W",,"NIMH NIH HHS, NIGMS NIH HHS",16.0, +28751672,LNCmap,0.997964621,LNCmap,0.997964621,LncRNA Connectivity Map,0.948274367,1,http://www.bio-bigdata.com/LNCmap,502,,,http://web.archive.org/web/20211114123514/http://www.bio-bigdata.com/LncMAP/,2017-07-27,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, 150081, China.","Yang H, Shang D, Xu Y, Zhang C, Feng L, Sun Z, Shi X, Zhang Y, Han J, Su F, Li C, Li X",,,12.0,China +28806134,iLIR@viral,0.916833331,iLIR@viral,0.916833331,,0,1,http://ilir.uk/virus,301,,"(52.3740,4.8897)",http://web.archive.org/web/20220208092055/http://ilir.uk/virus/,2017-08-14,"a School of Life Sciences , University of Warwick , Coventry , UK.","Jacomin AC, Samavedam S, Charles H, Nezis IP",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",9.0, +28977416,IMOTA,0.99115777,IMOTA,0.99115777,,0,1,http://ccb-web.cs.uni-saarland.de/imota,302,,"(49.2326,7.0098)",http://web.archive.org/web/20220326090504/https://ccb-web.cs.uni-saarland.de/imota/,2018-01-01,"Chair for Clinical Bioinformatics, Saarland University, 66123 Saarbrücken, Germany.","Palmieri V, Backes C, Ludwig N, Fehlmann T, Kern F, Meese E, Keller A",,,14.0,Germany +29029599,LiverWiki,0.995711923,LiverWiki,0.995711923,,0,1,http://liverwiki.hupo.org.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20201027043639/http://liverwiki.hupo.org.cn/,2017-10-13,"Beijing Institute of Life Omics, State Key Laboratory of Proteomics, Beijing Proteome Research Center, National Center for Protein Sciences (Beijing), Beijing Institute of Radiation Medicine, 33 Life Science Park Rd, Changping District, Beijing, 102206, China.","Chen T, Li M, He Q, Zou L, Li Y, Chang C, Zhao D, Zhu Y",,Ministry of Science and Technology of China,3.0,China +29036329,m6AVar,0.985386446,m6AVar,0.985386446,,0,1,http://m6avar.renlab.org,301,,"(33.4223,-111.8226)",http://web.archive.org/web/20200918133927/http://m6avar.renlab.org/,2018-01-01,"Sun Yat-sen University Cancer Center, State Key Laboratory of Oncology in South China, Collaborative Innovation Center for Cancer Medicine, Sun Yat-sen University, Guangzhou 510060, China.","Zheng Y, Nie P, Peng D, He Z, Liu M, Xie Y, Miao Y, Zuo Z, Ren J",,,78.0,"China, China" +"29036527, 30417254",iSyTE,0.93340987,iSyTE,0.93340987,,0,2,http://research.bioinformatics.udel.edu/iSyTE,301,,"(39.6837,-75.7497)",http://web.archive.org/web/20220619204008/https://research.bioinformatics.udel.edu/iSyTE/,2018-11-11,"Center for Bioinformatics and Computational Biology, University of Delaware, Newark, DE 19711, USA., Department of Biological Sciences, University of Delaware, 105 The Green, Delaware Avenue, 236 Wolf Hall, Newark, DE, 19716, USA.","Kakrana A, Yang A, Anand D, Djordjevic D, Ramachandruni D, Singh A, Huang H, Ho JWK, Lachke SA, Anand D, Kakrana A, Siddam AD, Huang H, Saadi I, Lachke SA",", ","NEI NIH HHS, NIGMS NIH HHS, NIDCR NIH HHS, NICHD NIH HHS, NIDCR NIH HHS, National Institute of Dental and Craniofacial Research, NIDCR NIH HHS, NEI NIH HHS, National Eye Institute, NEI NIH HHS",52.0,"United States, United States" +29047407,Infevers,0.637892187,Infevers,0.637892187,,0,1,http://fmf.igh.cnrs.fr/ISSAID/infevers,"HTTPConnectionPool(host='fmf.igh.cnrs.fr', port=80): Max retries exceeded with url: /ISSAID/infevers (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220127175938/https://fmf.igh.cnrs.fr:443/ISSAID/infevers/,2017-10-18,"EULAR Centre of Excellence in Rheumatology 2008-2018, IRCCS Istituto Giannina Gaslini, Genoa, Italy. papariccardo86@gmail.com.","Papa R, Doglio M, Lachmann HJ, Ozen S, Frenkel J, Simon A, Neven B, Kuemmerle-Deschner J, Ozgodan H, Caorsi R, Federici S, Finetti M, Trachana M, Brunner J, Bezrodnik L, Pinedo Gago MC, Maggio MC, Tsitsami E, Al Suwairi W, Espada G, Shcherbina A, Aksu G, Ruperto N, Martini A, Ceccherini I, Gattorno M, ",,"ReumaFonds, Executive Agency for Health and Consumers",15.0,Italy +29069510,Lnc2Meth,0.998092294,Lnc2Meth,0.998092294,,0,1,http://www.bio-bigdata.com/Lnc2Meth,502,,,http://web.archive.org/web/20220715220947/http://www.bio-bigdata.com/Lnc2Meth/,2018-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Zhi H, Li X, Wang P, Gao Y, Gao B, Zhou D, Zhang Y, Guo M, Yue M, Shen W, Ning S, Jin L, Li X",,,16.0,China +29097748,KIXBASE,0.981071115,KIXBASE,0.981071115,,0,1,http://www.nipgr.res.in/kixbase/home.php,"HTTPConnectionPool(host='www.nipgr.res.in', port=80): Max retries exceeded with url: /kixbase/home.php (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181010180800/http://www.nipgr.res.in:80/kixbase/home.php,2017-11-02,"National Institute of Plant Genome Research (NIPGR), Aruna Asaf Ali Marg, New Delhi, 110067, India.","Yadav A, Thakur JK, Yadav G",,,2.0,India +29106644,iUUCD,0.997192144,iUUCD,0.997192144,,0,1,http://iuucd.biocuckoo.org,200,,"(40.2338,-111.6585)",http://web.archive.org/web/20220803173555/https://iuucd.biocuckoo.org/,2018-01-01,"Key Laboratory of Molecular Biophysics of Ministry of Education, College of Life Science and Technology and the Collaborative Innovation Center for Biomedical Engineering, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China.","Zhou J, Xu Y, Lin S, Guo Y, Deng W, Zhang Y, Guo A, Xue Y",,,15.0,China +29136207,LinkedOmics,0.990786731,LinkedOmics,0.990786731,,0,1,http://www.linkedomics.org,302,,"(45.8399,-119.7006)",no_wayback,2018-01-01,"Lester and Sue Smith Breast Center, Baylor College of Medicine, Houston, TX 77030, USA.","Vasaikar SV, Straub P, Wang J, Zhang B",,NCI NIH HHS,533.0,United States +29179110,LAND-deFeND,0.969560434,LAND-deFeND,0.969560434,LANDslides and Floods National Database,0.881252799,1,http://geomorphology.irpi.cnr.it/tools,200,,"(43.0990,12.3674)",http://web.archive.org/web/20220526021304/http://geomorphology.irpi.cnr.it/tools,2017-11-24,"CNR IRPI, Via Madonna Alta 126, I-06128, Perugia, Italy.","Napolitano E, Marchesini I, Salvati P, Donnini M, Bianchi C, Guzzetti F",,"Italian PRIN, Italian National Department for Civil Protection, PRIN, DPC, Fondazione Assicurazioni Generali",3.0,Italy +29220464,KiPho,0.995313227,KiPho,0.995313227,Malaria Parasite Kinome-Phosphatome,0.971182257,1,http://bioinfo.icgeb.res.in/kipho,301,,"(28.6519,77.2315)",no_wayback,2017-01-01,"Translational Bioinformatics Group, International Centre for Genetic Engineering and Biotechnology, Aruna Asaf Ali Marg, New Delhi 110067, India.","Pandey R, Kumar P, Gupta D",,"Department of Biotechnology, Ministry of Science and Technology, Department of Biotechnology, Ministry of Science and Technology",0.0,India +29531263,IMPPAT,0.988455057,IMPPAT,0.988455057,,0,1,http://cb.imsc.res.in/imppat,301,,"(13.0156,80.2467)",http://web.archive.org/web/20220814073535/https://cb.imsc.res.in/imppat/,2018-03-12,"The Institute of Mathematical Sciences (IMSc), Homi Bhabha National Institute (HBNI), Chennai, 600113, India.","Mohanraj K, Karthikeyan BS, Vivek-Ananth RP, Chand RPB, Aparna SR, Mangalapandi P, Samal A",,,41.0,India +29619235,iMETHYL,0.997811019,iMETHYL,0.997811019,,0,1,http://imethyl.iwate-megabank.org,200,,"(39.0667,141.7167)",http://web.archive.org/web/20220307212355/http://imethyl.iwate-megabank.org/,2018-03-29,"Division of Biomedical Information Analysis, Iwate Medical University, Shiwa, Iwate, Japan.","Komaki S, Shiwa Y, Furukawa R, Hachiya T, Ohmomo H, Otomo R, Satoh M, Hitomi J, Sobue K, Sasaki M, Shimizu A",,,23.0,Japan +29648583,LipidPedia,0.990220785,LipidPedia,0.990220785,,0,1,http://lipidpedia.cmdm.tw,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20220418032345/https://lipidpedia.cmdm.tw/,2018-09-01,"Graduate Institute of Biomedical Engineering and Bioinformatics, National Taiwan University, Taipei, Taiwan.","Kuo TC, Tseng YJ",,"National Taiwan University, Taiwan Ministry of Science and Technology, Taiwan Ministry of Science and Technology, National Taiwan University, Taiwan Ministry of Science and Technology, Resources of the Laboratory of Computational Molecular Design and Metabolomics, Taiwan Ministry of Science and Technology, National Taiwan University, Department of Computer Science and Information Engineering of National Taiwan University",9.0, +29697370,LDSplitDB,0.995853007,LDSplitDB,0.995853007,,0,1,http://histone.scse.ntu.edu.sg/LDSplitDB,"HTTPConnectionPool(host='histone.scse.ntu.edu.sg', port=80): Max retries exceeded with url: /LDSplitDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2018-04-20,"School of Computer Science and Engineering, Nanyang Technological University, Nanyang Ave, Singapore, 639798, Singapore.","Guo J, Chen H, Yang P, Lee YT, Wu M, Przytycka TM, Kwoh CK, Zheng J",,,1.0,"Singapore, Singapore" +29788225,LnChrom,0.996899307,LnChrom,0.996899307,,0,1,http://biocc.hrbmu.edu.cn/LnChrom,"HTTPConnectionPool(host='biocc.hrbmu.edu.cn', port=80): Max retries exceeded with url: /LnChrom (Caused by ProtocolError('Connection aborted.', BadStatusLine(' OK\r\n')))",,,no_wayback,2018-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150081, China.","Yu F, Zhang G, Shi A, Hu J, Li F, Zhang X, Zhang Y, Huang J, Xiao Y, Li X, Cheng S",,"National Natural Science Foundation of China, National Natural Science Foundation of China, China Postdoctoral Science Foundation, National Natural Science Foundation of China",12.0,China +29892515,iMusta4SLC,0.976851185,iMusta4SLC,0.976851185,,0,1,http://cib.cf.ocha.ac.jp/slc,308,,"(35.6895,139.6917)",http://web.archive.org/web/20220615210920/http://cib.cf.ocha.ac.jp/slc/,2018-04-27,"Graduate School of Frontier Sciences, The University of Tokyo, Minato-ku, Tokyo 108-8639, Japan.","Higuchi A, Nonaka N, Yura K",,,3.0,Japan +29897484,ILDgenDB,0.996150136,ILDgenDB,0.996150136,,0,1,http://14.139.240.55/ildgendb/index.php,404,,,http://web.archive.org/web/20171030134807/http://14.139.240.55/ildgendb/index.php,2018-01-01,"Department of Biotechnology and Bioinformatics, Jaypee University of Information Technology, Waknaghat, Solan, Himachal Pradesh 173234, India.","Mishra S, Shah MI, Sarkar M, Asati N, Rout C",,,0.0,India +29899596,LEGE,0.943019152,LEGE,0.943019152,,0,1,http://lege.ciimar.up.pt,301,,"(41.1496,-8.6110)",http://web.archive.org/web/20221021010407/https://lege.ciimar.up.pt/,2018-01-06,"1Interdisciplinary Centre of Marine and Environmental Research (CIIMAR/CIMAR), Terminal de Cruzeiros do Porto de Leixões, University of Porto, 4450-208 Matosinhos, Portugal.","Ramos V, Morais J, Castelo-Branco R, Pinheiro Â, Martins J, Regueiras A, Pereira AL, Lopes VR, Frazão B, Gomes D, Moreira C, Costa MS, Brûle S, Faustino S, Martins R, Saker M, Osswald J, Leão PN, Vasconcelos VM",,"NORTE2020, H2020 Health, Fundação para a Ciência e a Tecnologia, Fundação para a Ciência e a Tecnologia",15.0,Portugal +29905762,LeptoDB,0.995939851,LeptoDB,0.995939851,,0,1,http://leptonet.org.in,"HTTPConnectionPool(host='leptonet.org.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181004231520/https://www.leptonet.org.in/,2018-01-01,"Gujarat Biotechnology Research Centre, Department of Science and Technology, Government of Gujarat, Gandhinagar, Gujarat 382011, India.","Beriwal S, Padhiyar N, Bhatt D, Pandit PD, Ansari A, Lata KS, Saiyed ZM, Vaghasia V, Sharma P, Bhairappanavar SB, Soni S, Das J",,"The Department of Science and Technology, Government of Gujarat",1.0,India +29961817,LncCeRBase,0.998007238,LncCeRBase,0.998007238,,0,1,http://lnccerbase.it1004.com,"HTTPConnectionPool(host='lnccerbase.it1004.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2018-01-01,"Ministry of Agriculture Key Laboratory of Molecular Biology of Crop Pathogens and Insects, Institute of Insect Science, Zhejiang University, 866 Yuhangtang Road, Hangzhou, China.","Pian C, Zhang G, Tu T, Ma X, Li F",,"National Key Research and Development Program, National Key Research and Development Program",10.0,China +30046160,KampoDB,0.997830033,KampoDB,0.997830033,,0,1,http://wakanmoview.inm.u-toyama.ac.jp/kampo,301,,"(36.7000,137.2167)",http://web.archive.org/web/20220705145755/https://wakanmoview.inm.u-toyama.ac.jp/kampo/,2018-07-25,"Medical Institute of Bioregulation, Kyushu University, 3-1-1 Maidashi, Higashi-ku, Fukuoka, Fukuoka, 812-8582, Japan.","Sawada R, Iwata M, Umezaki M, Usui Y, Kobayashi T, Kubono T, Hayashi S, Kadowaki M, Yamanishi Y",,"Japan Science and Technology Agency, Japan Society for the Promotion of Science (JSPS), Japan Society for the Promotion of Science, Japan Science and Technology Agency (JST)",4.0,Japan +30252093,iProX,0.984180152,iProX,0.984180152,,0,1,http://www.iprox.org,301,,"(22.2783,114.1747)",http://web.archive.org/web/20210513223809/https://www.iprox.org/,2019-01-01,"State Key Laboratory of Proteomics, Beijing Proteome Research Center, National Center for Protein Sciences (Beijing), Beijing Institute of Life Omics, Beijing 102206, China.","Ma J, Chen T, Wu S, Yang C, Bai M, Shu K, Li K, Zhang G, Jin Z, He F, Hermjakob H, Zhu Y",,"National High Technology Research and Development Program of China, National Key Research Program of China, National Science Foundation of China, International Scientific and Technological Cooperation project of China, National Key Research Program of China, Biotechnology and Biological Sciences Research Council, National Key Research Program of China, International Scientific and Technological Cooperation project of China",316.0,China +30276831,lncRNAnet,0.986113548,lncRNAnet,0.986113548,,0,1,http://lnc.rnanet.org,200,,"(36.0649,120.3804)",http://web.archive.org/web/20220616012418/http://lnc.rnanet.org/,2018-10-02,"College of Life Science, Foshan University, 1 Xianhu University Road, Nanhai, Foshan, Guangdong, 528231, China.","Liang G, Yang Y, Li H, Yu H, Li X, Tang Z, Li K",,"National Natural Science Foundation of China, Agricultural Science and Technology Innovation Program, Key Laboratory of Shenzhen, National Key Basic Research Program of China, National Natural Science Foundation of China, National Key Basic Research Program of China",7.0,China +30295851,jPOST,0.991378427,jPOST,0.991378427,Japan ProteOme,0.680601423,1,http://jpostdb.org,301,,"(37.8864,139.0059)",http://web.archive.org/web/20221013045354/https://jpostdb.org/,2019-01-01,"Database Center for Life Science, Joint Support-Center for Data Science Research, Research Organization of Information and Systems, Kashiwa 277-0871, Japan.","Moriya Y, Kawano S, Okuda S, Watanabe Y, Matsumoto M, Takami T, Kobayashi D, Yamanouchi Y, Araki N, Yoshizawa AC, Tabata T, Iwasaki M, Sugiyama N, Tanaka S, Goto S, Ishihama Y",,"Japan Science and Technology Agency, Japan Science and Technology Agency",26.0,Japan +30357370,liqDB,0.997288704,liqDB,0.997288704,,0,1,http://bioinfo5.ugr.es/liqdb,301,,"(37.1882,-3.6067)",http://web.archive.org/web/20220520043050/https://bioinfo5.ugr.es/liqdb/,2019-01-01,"Dpto. de Genética, Facultad de Ciencias, Universidad de Granada, Campus de Fuentenueva s/n, 18071 Granada, Spain.","Aparicio-Puerta E, Jáspez D, Lebrón R, Koppers-Lalic D, Marchal JA, Hackenberg M",,"Spanish Government, Ministry of Education of Spain, Horizon 2020, Ministry of Education of Spain, Instituto de Salud Carlos III",7.0,Spain +30397019,iProteinDB,0.996118069,iProteinDB,0.996118069,,0,1,http://www.flyrnai.org/tools/iproteindb,301,,"(42.3584,-71.0598)",http://web.archive.org/web/20220127231444/https://www.flyrnai.org/tools/iproteindb/,2019-01-09,"Department of Genetics, Harvard Medical School, 77 Avenue Louis Pasteur, Boston, MA 02115.","Hu Y, Sopko R, Chung V, Foos M, Studer RA, Landry SD, Liu D, Rabinow L, Gnad F, Beltrao P, Perrimon N",,"NIDDK NIH HHS, NIGMS NIH HHS, NIAMS NIH HHS, NIGMS NIH HHS",7.0, +30476305,LncACTdb,0.993309855,LncACTdb,0.993309855,,0,1,http://www.bio-bigdata.net/LncACTdb,502,,,http://web.archive.org/web/20211105145241/http://www.bio-bigdata.net/LncACTdb/,2019-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Wang P, Li X, Gao Y, Guo Q, Wang Y, Fang Y, Ma X, Zhi H, Zhou D, Shen W, Liu W, Wang L, Zhang Y, Ning S, Li X",,"National Natural Science Foundation of China, National Natural Science Foundation of China, Postdoctoral Science Foundation of China, Innovative Talents of Science and Technology Research, Postdoctoral Science Foundation of China, National Natural Science Foundation of China, Innovative Talents of Science and Technology Research, National Natural Science Foundation of China, National Program on Key Basic Research, Innovative Talents of Science and Technology Research, Harbin Medical University",49.0,China +30535108,InteracDome,0.943789065,InteracDome,0.943789065,,0,1,http://interacdome.princeton.edu,301,,"(40.3487,-74.6590)",http://web.archive.org/web/20220802124015/https://interacdome.princeton.edu/,2019-01-01,"Department of Biomedical Informatics, Harvard Medical School, 10 Shattuck Street, Boston, MA 02115, USA.","Kobren SN, Singh M",,"National Science Foundation, National Science Foundation, National Institutes of Health, National Institutes of Health, NIGMS NIH HHS, NCI NIH HHS",8.0,United States +30820574,Mammalian Stress Granules Proteome,0.95231396,,0,Mammalian Stress Granules Proteome,0.95231396,1,http://msgp.pt,403,,,http://web.archive.org/web/20220617015729/https://msgp.pt/,2019-01-01,"Department of Biomedical Sciences and Medicine, University of Algarve, Faro, Portugal.","Nunes C, Mestre I, Marcelo A, Koppenol R, Matos CA, Nóbrega C",,"Portuguese Science and Technology Foundation, FCT, Ataxia UK, French Muscular Dystrophy Association, Ataxia UK",18.0,Portugal +30949679,IntronDB,0.99790293,IntronDB,0.99790293,,0,1,http://www.nextgenbioinformatics.org/IntronDB,"HTTPConnectionPool(host='www.nextgenbioinformatics.org', port=80): Max retries exceeded with url: /IntronDB (Caused by ConnectTimeoutError(, 'Connection to www.nextgenbioinformatics.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190527120405/http://www.nextgenbioinformatics.org:80/IntronDB/,2019-11-01,"Department of Plant Sciences, University of Oxford, Oxford, UK.",Wang D,,,2.0, +31180159,KHV,0.773043275,KHV,0.773043275,,0,1,http://genomes.vn,301,,"(21.0245,105.8412)",http://web.archive.org/web/20220522052624/https://genomes.vn/,2019-07-03,"Vinmec Research Institute of Stem Cell and Gene Technology, Hanoi, Vietnam.","Le VS, Tran KT, Bui HTP, Le HTT, Nguyen CD, Do DH, Ly HTT, Pham LTD, Dao LTM, Nguyen LT",,Vinmec Healthcare System,11.0, +31231133,IPGB,0.957984984,IPGB,0.957984984,Incontinentia Pigmenti Genetic Biobank,0.896460912,1,http://www.igb.cnr.it/ipgb,302,,"(40.8522,14.2681)",http://web.archive.org/web/20221014025513/https://www.igb.cnr.it/ipgb/,2019-06-23,"Institute of Genetics and Biophysics ""A. Buzzati Traverso"" CNR, 80131, Naples, Italy.","Fusco F, Valente V, Fergola D, Pescatore A, Lioi MB, Ursini MV",,,1.0,Italy +31284879,ImtRDB,0.997494876,ImtRDB,0.997494876,,0,1,http://bioinfodbs.kantiana.ru/ImtRDB,301,,"(54.7065,20.5110)",http://web.archive.org/web/20210620031241/http://bioinfodbs.kantiana.ru/ImtRDB/,2019-05-08,"Center for Mitochondrial Functional Genomics, School of Life Science, Immanuel Kant Baltic Federal University, Kaliningrad, Russia.","Shamanskiy VA, Timonina VN, Popadin KY, Gunbin KV",,,3.0, +31353404,MAdb,0.965469927,MAdb,0.965469927,Mammalian Annotation Database tool,0.926022192,1,http://madb.ethz.ch,302,,"(47.3667,8.5500)",http://web.archive.org/web/20210919035629/https://madb.ethz.ch/,2019-01-01,"Animal Physiology, Institute of Agricultural Sciences, ETH Zurich, Zurich, Switzerland.","Bick JT, Zeng S, Robinson MD, Ulbrich SE, Bauersachs S",,"Swiss National Science Foundation, Swiss National Science Foundation",9.0,"Switzerland, Ethiopia" +31598675,KnockTF,0.989506125,KnockTF,0.989506125,,0,1,http://www.licpathway.net/KnockTF/index.html,200,,"(22.2783,114.1747)",no_wayback,2020-01-01,"School of Medical Informatics, Daqing Campus, Harbin Medical University, Daqing 163319, China.","Feng C, Song C, Liu Y, Qian F, Gao Y, Ning Z, Wang Q, Jiang Y, Li Y, Li M, Chen J, Zhang J, Li C",,"National Natural Science Foundation of China, Yu Weihan Outstanding Youth Training Fund of Harbin Medical University, Wu Liande Youth Science Research Fund of Harbin Medical University, National Natural Science Foundation of China, National Science Foundation, Scientific Research Fund of Harbin Medical University",13.0,China +31605615,LeGOO,0.99293381,LeGOO,0.99293381,Legume,0.559114456,1,http://www.legoo.org,301,,"(43.6043,1.4437)",http://web.archive.org/web/20190126002628/https://www.legoo.org/,2020-01-01,"LIPM, Universit� de Toulouse, INRA, CNRS, Castanet-Tolosan, France.","Carrï Re SB, Verdenaud M, Gough C, Gouzy JRM, Gamas P",,"TULIP, Laboratoire d’Excellence, Agence Nationale de la Recherche, LABEX",1.0,France +31617563,LnCeVar,0.998445213,LnCeVar,0.998445213,,0,1,http://www.bio-bigdata.net/LnCeVar,502,,,http://web.archive.org/web/20200709200904/http://www.bio-bigdata.net/LnCeVar/,2020-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Wang P, Li X, Gao Y, Guo Q, Ning S, Zhang Y, Shang S, Wang J, Wang Y, Zhi H, Fang Y, Shen W, Zhang G, Chen SX, Li X",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Harbin Medical University, National Natural Science Foundation of China, Postdoctoral Foundation of Hei Long Jiang Province, University Nursing Program for Young Scholars with Creative Talents in Heilongjiang Province",32.0,China +31648227,MANET,0.982270757,MANET,0.982270757,metabolic Molecular Ancestry Networks,0.82753098,1,http://manet.illinois.edu,301,,"(40.1164,-88.2434)",http://web.archive.org/web/20210513020406/https://manet.illinois.edu/,2019-10-24,"Illinois Informatics Institute, University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America.","Mughal F, Caetano-Anollés G",,USDA National Institute of Food and Agriculture,3.0,United States +31665416,Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters,0.978851591,IMG-ABC,0.966223431,Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters,0.978851591,1,"http://img.jgi.doe.gov/abc-public, http://gold.jgi.doe.gov","301, 301",,"(37.7621,-122.3971), (37.7621,-122.3971)","http://web.archive.org/web/20220419144009/https://img.jgi.doe.gov/abc-public/, http://web.archive.org/web/20221102110448/https://gold.jgi.doe.gov/",2020-01-01,"Department of Energy Joint Genome Institute, 2800 Mitchell Drive, Walnut Creek, CA 94598, USA.","Palaniappan K, Chen IA, Chu K, Ratner A, Seshadri R, Kyrpides NC, Ivanova NN, Mouncey NJ",,"U.S. Department of Energy, Joint Genome Institute, Lawrence Berkeley National Laboratory",32.0,United States +31665439,MaGenDB,0.993712246,MaGenDB,0.993712246,,0,1,http://magen.whu.edu.cn,200,,"(30.5833,114.2667)",http://web.archive.org/web/20221016221407/http://magen.whu.edu.cn/,2020-01-01,"College of Life Sciences, Wuhan University, Wuhan 430072, China.","Wang D, Fan W, Guo X, Wu K, Zhou S, Chen Z, Li D, Wang K, Zhu Y, Zhou Y",,"National Natural Science Foundation of China, National Key R&D Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Wuhan University, National Natural Science Foundation of China",8.0,China +31701147,LINCS,0.992289344,LINCS,0.992289344,of Integrated Network-Based Cellular Signatures,0.737134829,1,http://lincsportal.ccs.miami.edu/signatures,302,,"(25.7743,-80.1937)",http://web.archive.org/web/20221015121058/https://lincsportal.ccs.miami.edu/signatures/,2020-01-01,"Department of Molecular and Cellular Pharmacology, Miller School of Medicine, University of Miami, USA.","Stathias V, Turner J, Koleti A, Vidovic D, Cooper D, Fazel-Najafabadi M, Pilarczyk M, Terryn R, Chung C, Umeano A, Clarke DJB, Lachmann A, Evangelista JE, Ma'ayan A, Medvedovic M, Schürer SC",,"NLM NIH HHS, National Center for Advancing Translational Sciences, National Institutes of Health, National Heart, Lung, and Blood Institute, NHLBI NIH HHS, National Institutes of Health, NCATS NIH HHS",39.0,United States +31713618,LncTarD,0.998159111,LncTarD,0.998159111,,0,1,"http://biocc.hrbmu.edu.cn/LncTarD/, http://bio-bigdata.hrbmu.edu.cn/LncTarD","HTTPConnectionPool(host='biocc.hrbmu.edu.cn', port=80): Max retries exceeded with url: /LncTarD/ (Caused by ProtocolError('Connection aborted.', BadStatusLine(' OK\r\n'))), 302",,", (31.2222,121.4581)","http://web.archive.org/web/20200204034002/http://biocc.hrbmu.edu.cn:80/LncTarD/, http://web.archive.org/web/20221102054300/http://bio-bigdata.hrbmu.edu.cn/LncTarD/",2020-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Zhao H, Shi J, Zhang Y, Xie A, Yu L, Zhang C, Lei J, Xu H, Leng Z, Li T, Huang W, Lin S, Wang L, Xiao Y, Li X",,"Heilongjiang Provincial Health and Family Planning Commission of Science Foundation, Hei Long Jiang Postdoctoral Foundation, China Postdoctoral Science Foundation, National Natural Science Foundation of China, Fundamental Research Funds for the Provincial Universities, China Postdoctoral Science Foundation, Natural Science Foundation of Heilongjiang Province, Heilongjiang Provincial Health and Family Planning Commission of Science Foundation, China Postdoctoral Science Special Foundation, Fundamental Research Funds for the Provincial Universities, National Natural Science Foundation of China, National Natural Science Foundation of China, Heilongjiang Provincial planning office key subjects, Hei Long Jiang Postdoctoral Foundation, National Natural Science Foundation of China",37.0,China +31780665,LDB,0.982422173,LDB,0.982422173,Lichen DataBase,0.793945372,1,http://www.ebi.ac.uk/metabolights/MTBLS999,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20220121021428/https://www.ebi.ac.uk/metabolights/MTBLS999,2019-11-28,"CNRS, ISCR (Institut des Sciences Chimiques de Rennes)-UMR 6226, Univ Rennes, F-35000, Rennes, France.","Olivier-Jimenez D, Chollet-Krugler M, Rondeau D, Beniddir MA, Ferron S, Delhaye T, Allard PM, Wolfender JL, Sipman HJM, Lücking R, Boustie J, Le Pogam P",,,8.0,France +31838187,International Severe Asthma Registry,0.96681873,ISAR,0.945667505,International Severe Asthma Registry,0.96681873,1,http://isaregistries.org,301,,"(33.4484,-112.0740)",http://web.archive.org/web/20220610160621/https://isaregistries.org/,2019-12-12,None,,,AstraZeneca,6.0, +31874631,JCDB,0.97426182,JCDB,0.97426182,curcas database,0.846007625,1,http://jcdb.xtbg.ac.cn,403,,,http://web.archive.org/web/20220609193416/http://jcdb.xtbg.ac.cn/,2019-12-24,"CAS Key Laboratory of Tropical Plant Resources and Sustainable Use, Xishuangbanna Tropical Botanical Garden, The Innovative Academy of Seed Design, Chinese Academy of Sciences, Menglun, Mengla, Yunnan, 666303, China.","Zhang X, Pan BZ, Chen M, Chen W, Li J, Xu ZF, Liu C",,,2.0,China +31906602,LLPSDB,0.997212887,LLPSDB,0.997212887,,0,1,"http://bio-comp.ucas.ac.cn/llpsdb, http://bio-comp.org.cn/llpsdb","HTTPConnectionPool(host='bio-comp.ucas.ac.cn', port=80): Max retries exceeded with url: /llpsdb (Caused by ConnectTimeoutError(, 'Connection to bio-comp.ucas.ac.cn timed out. (connect timeout=5)')), 301",,", (39.9075,116.3972)","http://web.archive.org/web/20200528164020/http://bio-comp.ucas.ac.cn/llpsdb/, http://web.archive.org/web/20201127150748/http://www.bio-comp.org.cn/LLPSDB/",2020-01-01,"College of Life Sciences, University of Chinese Academy of Sciences, Beijing, 100049, China.","Li Q, Peng X, Li Y, Tang W, Zhu J, Huang J, Qi Y, Zhang Z",,"National Natural Science Foundation of China, National Natural Science Foundation of China",30.0,China +32028878,laPPISite,0.994788826,laPPISite,0.994788826,,0,1,http://zzdlab.com/plappisite/index.php,200,,"(39.9075,116.3972)",no_wayback,2020-02-06,"State Key Laboratory of Agrobiotechnology, College of Biological Sciences, China Agricultural University, Beijing, 100193, China.","Yang X, Yang S, Qi H, Wang T, Li H, Zhang Z",,Natural Science Foundation of Beijing Municipality,3.0,"China, China" +32128558,LeukmiR,0.997031927,LeukmiR,0.997031927,,0,1,http://tdb.ccmb.res.in/LeukmiR,301,,"(17.3840,78.4564)",no_wayback,2020-01-01,"Cancer Biology, CSIR-Centre for Cellular and Molecular Biology, (CCMB), Uppal Road, Hyderabad, 500007, India.","Rawoof A, Swaminathan G, Tiwari S, Nair RA, Dinesh Kumar L",,,4.0,India +32133509,KRGDB,0.981609166,KRGDB,0.981609166,Korean Reference Genome Database,0.956574035,1,http://coda.nih.go.kr/coda/KRGDB/index.jsp,301,,"(37.3925,126.9269)",http://web.archive.org/web/20210508034435/http://coda.nih.go.kr/coda/KRGDB/index.jsp,2020-01-01,"Division of Biomedical Informatics, Center for Genome Science, National Institute of Health, KCDC, Cheongju 28159, Republic of Korea.","Jung KS, Hong KW, Jo HY, Choi J, Ban HJ, Cho SB, Chung M",,"Post-genome Multi-ministerial Project, Post-genome Multi-ministerial Project",11.0, +32193291,LncSpA,0.997582436,LncSpA,0.997582436,,0,1,http://bio-bigdata.hrbmu.edu.cn/LncSpA,302,,"(31.2222,121.4581)",http://web.archive.org/web/20220616064347/http://bio-bigdata.hrbmu.edu.cn/LncSpA/,2020-03-19,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.","Lv D, Xu K, Jin X, Li J, Shi Y, Zhang M, Jin X, Li Y, Xu J, Li X",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Key R&D Program of China, National Natural Science Foundation of China, Natural Science Foundation",18.0,China +32228437,iMarmot,0.997347414,iMarmot,0.997347414,,0,1,http://www.marmotdb.org,200,,"(37.3394,-121.8950)",http://web.archive.org/web/20221026053055/http://www.marmotdb.org/,2020-03-30,"Laboratory Animal Center, Xi'an Jiaotong University Health Science Center, No.76, Yanta West Road, Xi'an, 710061, Shaanxi, China.","Liu B, Bai L, Yu Q, Hu F, Wu J, Zhao S, Wang R, Wang W, Tao Y, Fan J, Liu E",,"Natural Science Foundation of Shaanxi Provincial Department of Education, China Postdoctoral Science Foundation Grant",0.0,China +32367112,MACSNVdb,0.99785769,MACSNVdb,0.99785769,,0,1,http://big.cdu.edu.cn/macsnvdb,"HTTPConnectionPool(host='big.cdu.edu.cn', port=80): Max retries exceeded with url: /macsnvdb (Caused by ConnectTimeoutError(, 'Connection to big.cdu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210918232119/http://big.cdu.edu.cn/macsnvdb/,2020-01-01,"Institute for Advanced Study, Chengdu University, 2025 Chengluo Rd, Chengdu 610106, China.","Du L, Guo T, Liu Q, Li J, Zhang X, Xing J, Yue B, Li J, Fan Z",,"Key Research Fund on Sciences and Technologies for Joint Academic Institute and Local Enterprises of Sichuan, National Natural Science Foundation of China, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities",0.0,China +32499815,LuluDB,0.996738493,LuluDB,0.996738493,,0,1,http://luluseqdb.umk.pl/basic/web/index.php,200,,"(53.0138,18.5981)",http://web.archive.org/web/20221008141230/http://luluseqdb.umk.pl/basic/web/index.php,2020-05-15,"Department of Plant Physiology and Biotechnology, Faculty of Biological and Veterinary Sciences, Nicolaus Copernicus University, Torun, Poland.","Glazinska P, Kulasek M, Glinkowski W, Wysocka M, Kosiński JG",,Narodowe Centrum Nauki,0.0,Poland +32502232,LabxDB,0.991126418,LabxDB,0.991126418,,0,1,"http://gitlab.com/vejnar/labxdb, http://labxdb.vejnar.org","301, 308",,"(37.7621,-122.3971), (48.8534,2.3488)","http://web.archive.org/web/20211206055118/https://gitlab.com/vejnar/labxdb, http://web.archive.org/web/20220929181440/https://labxdb.vejnar.org/",2020-08-01,Department of Genetics.,"Vejnar CE, Giraldez AJ",,"NIGMS NIH HHS, NICHD NIH HHS, NIGMS NIH HHS, NIH, NIH, NICHD NIH HHS, NIH, NIH",4.0, +32512182,IRESbase,0.987502873,IRESbase,0.987502873,,0,1,http://reprod.njmu.edu.cn/cgi-bin/iresbase/index.php,"HTTPConnectionPool(host='reprod.njmu.edu.cn', port=80): Max retries exceeded with url: /cgi-bin/iresbase/index.php (Caused by ReadTimeoutError(""HTTPConnectionPool(host='reprod.njmu.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220617082933/http://reprod.njmu.edu.cn/cgi-bin/iresbase/index.php,2020-04-01,"Department of Biomedical Engineering, Nanjing University of Aeronautics and Astronautics, Nanjing 211106, China.","Zhao J, Li Y, Wang C, Zhang H, Zhang H, Jiang B, Guo X, Song X",,"National Key R&D Program of China, Scientific Research Foundation of Nanjing Medical University, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Program for Distinguished Talents of Six Domains in Jiangsu Province, Natural Science Foundation of the Jiangsu Higher Education Institutions, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities, Fok Ying Tung Education Foundation",14.0,China +32618424,LymphoAtlas,0.957420588,LymphoAtlas,0.957420588,,0,1,http://bmm-lab.github.io/LymphoAtlas,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20221021090903/https://bmm-lab.github.io/LymphoAtlas/,2020-07-01,"Institut de Pharmacologie et de Biologie Structurale (IPBS), Université de Toulouse, CNRS, UPS, Toulouse, France.","Locard-Paulet M, Voisinne G, Froment C, Goncalves Menoita M, Ounoughene Y, Girard L, Gregoire C, Mori D, Martinez M, Luche H, Garin J, Malissen M, Burlet-Schiltz O, Malissen B, Gonzalez de Peredo A, Roncagalli R",,"ERC INTEGRATE, European Research Council, CNRS, INSERM, MSDAVENIR Fund, PHENOMIN, European Research Council, Investissement d'Avenir program of the French Ministry of Research ProFI, Agence Nationale de la Recherche",6.0,France +32766702,Male Fertility Gene Atlas,0.886184371,MFGA,0.8441058,Male Fertility Gene Atlas,0.886184371,1,http://mfga.uni-muenster.de,302,,"(51.9624,7.6257)",no_wayback,2020-09-01,"Institute of Medical Informatics, University of Münster, Münster, Germany.","Krenz H, Gromoll J, Darde T, Chalmel F, Dugas M, Tüttelmann F",,German Research Foundation,1.0,Germany +32766766,lncR2metasta,0.978646653,lncR2metasta,0.978646653,,0,1,http://lncR2metasta.wchoda.com,200,,"(39.9075,116.3972)",http://web.archive.org/web/20210413135851/http://lncr2metasta.wchoda.com/,2021-05-01,"College of Life Science and Health, Wuhan University of Science and Technology, Wuhan, China.","Zhang S, He X, Zhang R, Deng W",,"National Natural Science Foundation of China, National Natural Science Foundation of China",5.0,China +32820322,LncAS2Cancer,0.992546678,LncAS2Cancer,0.992546678,,0,1,http://lncrna2as.cd120.com,"HTTPConnectionPool(host='lncrna2as.cd120.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2021-05-01,"Department of Thoracic Surgery, West China Hospital, Sichuan University.","Deng Y, Luo H, Yang Z, Liu L",,"West China Hospital, Sichuan University, Sichuan Province Science and Technology Support Program, West China Hospital, Sichuan University, China Postdoctoral Science Foundation, National Natural Science Foundation of China, Sichuan University, China Postdoctoral Science Foundation, National Natural Science Foundation of China",0.0,China +32821400,Kiwifruit Genome Database,0.988579522,KGD,0.986381908,Kiwifruit Genome Database,0.988579522,1,http://kiwifruitgenome.org,200,,"(37.7621,-122.3971)",http://web.archive.org/web/20221024063523/https://kiwifruitgenome.org/,2020-08-01,"School of Horticulture, Anhui Agricultural University, Hefei, 230036 China.","Yue J, Liu J, Tang W, Wu YQ, Tang X, Li W, Yang Y, Wang L, Huang S, Fang C, Zhao K, Fei Z, Liu Y, Zheng Y",,"National Natural Science Foundation of China (National Science Foundation of China), National Natural Science Foundation of China (National Science Foundation of China), National Natural Science Foundation of China (National Science Foundation of China), National Natural Science Foundation of China (National Science Foundation of China), National Science Foundation (NSF)",6.0,China +33045741,LncSEA,0.996705592,LncSEA,0.996705592,,0,1,http://bio.liclab.net/LncSEA/index.php,200,,"(22.2783,114.1747)",http://web.archive.org/web/20220504165449/http://bio.liclab.net/LncSEA/index.php,2021-01-01,"School of Medical Informatics, Daqing Campus, Harbin Medical University, Daqing 163319, China.","Chen J, Zhang J, Gao Y, Li Y, Feng C, Song C, Ning Z, Zhou X, Zhao J, Feng M, Zhang Y, Wei L, Pan Q, Jiang Y, Qian F, Han J, Yang Y, Wang Q, Li C",,"National Natural Science Foundation of China, Harbin Medical University, National Natural Science Foundation of China, National Science Foundation of Heilongjiang Province",11.0,China +33045751,LncExpDB,0.998109341,LncExpDB,0.998109341,,0,1,http://bigd.big.ac.cn/lncexpdb,301,,"(39.9075,116.3972)",http://web.archive.org/web/20210517120259/https://bigd.big.ac.cn/lncexpdb/,2021-01-01,"China National Center for Bioinformation, Beijing 100101, China.","Li Z, Liu L, Jiang S, Li Q, Feng C, Du Q, Zou D, Xiao J, Zhang Z, Ma L",,"Chinese Academy of Sciences, Youth Innovation Promotion Association of Chinese Academy of Sciences, National Natural Science Foundation of China, Chinese Academy of Sciences, Chinese Academy of Sciences, National Key Research and Development Program of China, National Key Research and Development Program of China, K. C. Wong Education Foundation, Chinese Academy of Sciences",22.0,"China, China" +33095885,IndiGenomes,0.907384753,IndiGenomes,0.907384753,,0,1,http://clingen.igib.res.in/indigen,301,,"(26.7907,75.2061)",http://web.archive.org/web/20220712145945/https://clingen.igib.res.in/indigen/,2021-01-01,"CSIR-Institute of Genomics and Integrative Biology, New Delhi 110025, India.","Jain A, Bhoyar RC, Pandhare K, Mishra A, Sharma D, Imran M, Senthivel V, Divakar MK, Rophina M, Jolly B, Batra A, Sharma S, Siwach S, Jadhao AG, Palande NV, Jha GN, Ashrafi N, Mishra PK, A K V, Jain S, Dash D, Kumar NS, Vanlallawma A, Sarma RJ, Chhakchhuak L, Kalyanaraman S, Mahadevan R, Kandasamy S, B M P, Rajagopal RE, J ER, P ND, Bajaj A, Gupta V, Mathew S, Goswami S, Mangla M, Prakash S, Joshi K, S S, Gajjar D, Soraisham R, Yadav R, Devi YS, Gupta A, Mukerji M, Ramalingam S, B K B, Scaria V, Sivasubbu S",,"Council of Scientific and Industrial Research (CSIR), India, Council of Scientific and Industrial Research (CSIR), India",14.0,India +33137204,KinaseMD,0.997730494,KinaseMD,0.997730494,,0,1,http://bioinfo.uth.edu/kmd,302,,"(29.7633,-95.3633)",http://web.archive.org/web/20220503180121/https://bioinfo.uth.edu/kmd/,2021-01-01,"Center for Precision Health, School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston TX 77030, USA.","Hu R, Xu H, Jia P, Zhao Z",,"NLM NIH HHS, National Institutes of Health, Cancer Prevention and Research Institute of Texas, Cancer Prevention and Research Institute of Texas",2.0,United States +33147622,KNIndex,0.974327445,KNIndex,0.974327445,,0,1,http://knindex.pufengdu.org,301,,"(22.2783,114.1747)",http://web.archive.org/web/20201115170647/https://knindex.pufengdu.org/,2021-07-01,"College of Intelligence and Computing, Tianjin University.","Zhang WY, Xu J, Wang J, Zhou YK, Chen W, Du PF",,"National Natural Science Foundation of China, National Key Research and Development Program of China, Natural Science Foundation for Distinguished Young Scholar of Hebei Province, National Natural Science Foundation of China, Institute of Computing Technology, Chinese Academy of Sciences",3.0, +33166392,LitCovid,0.996545017,LitCovid,0.996545017,,0,1,http://www.ncbi.nlm.nih.gov/research/coronavirus,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221108222923/https://www.ncbi.nlm.nih.gov/research/coronavirus/,2021-01-01,"National Center for Biotechnology Information (NCBI), National Library of Medicine (NLM), National Institutes of Health (NIH), Bethesda, MD 20892, USA.","Chen Q, Allot A, Lu Z",,"National Institutes of Health, Intramural Research Program of the National Library of Medicine, NLM NIH HHS",52.0,United States +33193550,MaizeMine,0.995152235,MaizeMine,0.995152235,Maize Genetics and Genome Database,0.688180787,1,http://maizemine.maizegdb.org,301,,"(37.7621,-122.3971)",no_wayback,2020-10-22,"Division of Animal Sciences, University of Missouri, Columbia, MO, United States.","Shamimuzzaman M, Gardiner JM, Walsh AT, Triant DA, Le Tourneau JJ, Tayal A, Unni DR, Nguyen HN, Portwood JL 2nd, Cannon EKS, Andorf CM, Elsik CG",,Agricultural Research Service,1.0,United States +33219686,LnCeCell,0.997025967,LnCeCell,0.997025967,,0,1,"http://www.bio-bigdata.net/LnCeCell/, http://bio-bigdata.hrbmu.edu.cn/LnCeCell","502, 302",,", (31.2222,121.4581)","no_wayback, http://web.archive.org/web/20220617164246/http://bio-bigdata.hrbmu.edu.cn/LnCeCell/",2021-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Wang P, Guo Q, Hao Y, Liu Q, Gao Y, Zhi H, Li X, Shang S, Guo S, Zhang Y, Ning S, Li X",,"National Key Research and Development Program of China, National Natural Science Foundation of China, Postdoctoral Science Foundation of China, National Natural Science Foundation of China, Postdoctoral Foundation of Hei Long Jiang Province, Heilongjiang Provincial Natural Science Foundation, National Natural Science Foundation of China, National Natural Science Foundation of China, Heilongjiang Touyan Innovation Team Program",9.0,China +33287903,KVarPredDB,0.997992098,KVarPredDB,0.997992098,VarPredDB,0.663375258,1,http://bioinfo.zju.edu.cn/KVarPredDB,403,,,http://web.archive.org/web/20220617065943/http://bioinfo.zju.edu.cn/KVarPredDB/,2020-12-07,"Department of Human Genetics, and Women's Hospital, Zhejiang University School of Medicine, Hangzhou, China.","Ying Y, Lu L, Banerjee S, Xu L, Zhao Q, Wu H, Li R, Xu X, Yu H, Neculai D, Xi Y, Yang F, Qin J, Li C",,"Chinese National Natural Science Foundation, Zhejiang Provincial Natural Science Foundation of China, Zhejiang Provincial Key Projects of Technology Research",1.0,China +33308175,ILDGDB,0.998209894,ILDGDB,0.998209894,,0,1,http://ildgdb.org,"HTTPConnectionPool(host='ildgdb.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220617174602/http://ildgdb.org/,2020-12-11,"Department of Respiratory and Critical Care Medicine, the Second Affiliated Hospital of Harbin Medical University, Harbin, 150081, China.","Li Y, Wu G, Shang Y, Qi Y, Wang X, Ning S, Chen H",,,1.0,China +33502860,InterMetalDB,0.997499466,InterMetalDB,0.997499466,,0,1,http://intermetaldb.biotech.uni.wroc.pl,301,,"(51.1000,17.0333)",http://web.archive.org/web/20220802121143/https://intermetaldb.biotech.uni.wroc.pl/,2021-01-27,"Department of Chemical Biology, Faculty of Biotechnology, University of Wrocław, F. Joliot-Curie 14a, 50-383 Wrocław, Poland.","Tran JB, Krężel A",,Narodowe Centrum Nauki,2.0,Poland +33507270,InSexBase,0.984699667,InSexBase,0.984699667,,0,1,http://www.insect-genome.com/Sexdb,200,,"(30.2936,120.1614)",no_wayback,2021-01-01,"Ministry of Agriculture and Rural Affairs Key Laboratory of Molecular Biology of Crop Pathogens and Insects & Key Laboratory of Biology of Crop Pathogens and Insects of Zhejiang Province, Institute of Insect Sciences, Zhejiang University, Yuhangtang Rd 866, Xihu District, Hanzghou, 310058, China.","Chen XI, Mei Y, Chen M, Jing D, He Y, Liu F, He K, Li F",,"Natural Science Foundation of Zhejiang Province, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities",0.0,China +33645624,KAIKObase,0.99772799,KAIKObase,0.99772799,silkworm genome,0.654908737,1,http://kaikobase.dna.affrc.go.jp,302,,"(36.2000,140.1000)",http://web.archive.org/web/20220726065006/https://kaikobase.dna.affrc.go.jp/,2021-02-01,"Institute of Agrobiological Sciences, National Agriculture and Food Research Organization, 1-2 Owashi, Tsukuba, Ibaraki 305-8634, Japan.","Yang CC, Yokoi K, Yamamoto K, Jouraku A",,"Japan Science and Technology Agency, Japan International Cooperation Agency, Science and Technology Research Partnership for Sustainable Development",3.0,Japan +33709443,LIMONADA,0.994873464,LIMONADA,0.994873464,,0,1,http://limonada.univ-reims.fr,302,,"(49.9000,2.3000)",http://web.archive.org/web/20210312102624/https://limonada.univ-reims.fr/,2021-03-11,"Matrice Extracellulaire et Dynamique Cellulaire (UMR CNRS 7369), Chaire MAgICS, Université de Reims Champagne-Ardenne, Reims, France.","Crowet JM, Buchoux S, Belloy N, Sarazin C, Lins L, Dauchez M",,"Fonds De La Recherche Scientifique - FNRS, Wallonie-Bruxelles International, Fonds De La Recherche Scientifique - FNRS, Centre National de la Recherche Scientifique",0.0,France +33877974,LIVE-NFLX-II,0.79076913,LIVE-NFLX-II,0.79076913,,0,1,http://live.ece.utexas.edu/research/LIVE_NFLX_II/live_nflx_plus.html,200,,"(30.2672,-97.7431)",http://web.archive.org/web/20221022060812/https://live.ece.utexas.edu/research/LIVE_NFLX_II/live_nflx_plus.html,2021-05-25,None,"Bampis CG, Li Z, Katsavounidis I, Huang TY, Ekanadham C, Bovik AC",,,0.0, +33903708,MAP,0.872308294,MAP,0.872308294,microRNA Analysis Portal,0.776325062,1,http://stablab.uniroma2.it/MAP,301,,"(41.8661,12.5896)",no_wayback,2021-04-26,"MirNat s.r.l., 00133, Rome, Italy. stefano.pirro@mir-nat.com.","Pirrò S, Matic I, Colizzi V, Galgani A",,,0.0,Italy +33906563,M6ADD,0.994489074,M6ADD,0.994489074,m6A-diseases database,0.816149268,1,http://m6add.edbc.org,200,,"(33.8359,-118.3406)",no_wayback,2021-04-27,"School of Life Science and Technology, Computational Biology Research Center, Harbin Institute of Technology, Harbin, Heilongjiang, China.","Zhou D, Wang H, Bi F, Xing J, Gu Y, Wang C, Zhang M, Huang Y, Zeng J, Wu Q, Zhang Y",,"National Natural Science Foundation of China, Applied Technology Research and Development Plan of Heilongjiang Province",2.0,China +34023905,knotAnnotSV,0.989982367,knotAnnotSV,0.989982367,,0,1,http://www.lbgi.fr/AnnotSV,301,,"(48.5839,7.7455)",http://web.archive.org/web/20220609042755/https://www.lbgi.fr/AnnotSV/,2021-07-01,"Laboratoire de Génétique Médicale, U1112, INSERM, IGMA, FMTS, Université de Strasbourg, Strasbourg, France.","Geoffroy V, Guignard T, Kress A, Gaillard JB, Solli-Nowlan T, Schalk A, Gatinois V, Dollfus H, Scheidecker S, Muller J",,"Strasbourg University Hospital, Inserm, University of Strasbourg",0.0,France +34127402,Immu-Mela,0.990034401,Immu-Mela,0.990034401,,0,1,http://bioinfo.vanderbilt.edu/database/Immu-Mela,"HTTPConnectionPool(host='bioinfo.vanderbilt.edu', port=80): Max retries exceeded with url: /database/Immu-Mela (Caused by ConnectTimeoutError(, 'Connection to bioinfo.vanderbilt.edu timed out. (connect timeout=5)'))",,,no_wayback,2021-05-14,"Center for Quantitative Sciences, Vanderbilt University Medical Center, Nashville TN 37203, USA; Department of Biostatistics, Vanderbilt University Medical Center, Nashville TN 37203, USA.","Yang J, Zhao S, Wang J, Sheng Q, Liu Q, Shyr Y",,"NCI NIH HHS, National Cancer Institute, National Cancer Institute, NCI NIH HHS, National Cancer Institute, NCI NIH HHS, NCI NIH HHS, National Cancer Institute",0.0,"United States, United States" +34189203,IsoArcH,0.689917505,IsoArcH,0.689917505,,0,1,http://isoarch.eu,308,,"(48.3578,6.7347)",http://web.archive.org/web/20221022000019/https://isoarch.eu/,2021-06-10,"EA - Eco-anthropologie (UMR 7206), Muséum National d'Histoire Naturelle, CNRS, Université Paris Diderot, Paris, France.",Cheung C,,Agence Nationale de la Recherche,0.0,France +34378177,LinguaPix,0.955935061,LinguaPix,0.955935061,,0,1,http://linguapix.uni-mannheim.de,301,,"(49.4891,8.4669)",no_wayback,2021-08-10,"Department of English Linguistics, University of Mannheim, Schloss EW 274, 68161, Mannheim, Germany. a.krautz@uni-mannheim.de.","Krautz AE, Keuleers E",,Universität Mannheim,0.0,Germany +34415996,LINPS,0.818079352,LINPS,0.818079352,,0,1,http://mahshaaban.shinyapps.io/LINPSAPP,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20211009060630/https://mahshaaban.shinyapps.io/LINPSAPP/,2021-08-01,"Department of Biochemistry and Convergence Medical Science, Institute of Health Sciences, Gyeongsang National University College of Medicine, 816 Beon-gil 15, Jinju-daero, Jinju 52727, South Korea.","Ahmed M, Kim DR",,National Research Foundation of Korea grant,0.0, +34456903,ImmuCellDB,0.99683398,ImmuCellDB,0.99683398,,0,1,http://wap-lab.org:3200/ImmuCellDB,301,,,http://web.archive.org/web/20221024054256/http://wap-lab.org:3200/ImmuCellDB/,2021-08-12,"Institute of Systems Medicine, Chinese Academy of Medical Sciences & Peking Union Medical College, Beijing, China.","Chen Z, Na H, Wu A",,,0.0,China +34464437,lncExplore,0.996928811,lncExplore,0.996928811,,0,1,http://lncexplore.bmi.nycu.edu.tw,"HTTPConnectionPool(host='lncexplore.bmi.nycu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20210909171621/http://lncexplore.bmi.nycu.edu.tw/,2021-08-01,"Institute of Biomedical Informatics, National Yang Ming Chiao Tung University, No.155, Sec. 2, Linong St., Beitou District, Taipei 11221, Taiwan.","Lee YW, Chen M, Chung IF, Chang TY",,,0.0, +34755873,ImmReg,0.986415982,ImmReg,0.986415982,,0,1,http://bio-bigdata.hrbmu.edu.cn/ImmReg,302,,"(31.2222,121.4581)",no_wayback,2021-12-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150081, China.","Jiang T, Zhou W, Chang Z, Zou H, Bai J, Sun Q, Pan T, Xu J, Li Y, Li X",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Hainan Provincial Key Laboratory of Carcinogenesis and Intervention, Heilongjiang Touyan Innovation Team Program, Hainan Provincial Natural Science Foundation of China, HMU MarshalInitiative, Hainan Medical University, National Key Research and Development Program of China, National Natural Science Foundation of China, Natural Science Foundation for Distinguished Young Scholars of Heilongjiang Province, Major Science and Technology Program of Hainan Province, Hainan Province Clinical Medical Center, National Natural Science Foundation of China, Science and Technology special fund of Hainan Province",1.0,China +34768782,legumeSSRdb,0.996245384,legumeSSRdb,0.996245384,,0,1,http://bioinfo.usu.edu/legumeSSRdb,301,,"(41.7355,-111.8344)",http://web.archive.org/web/20201130204746/http://bioinfo.usu.edu/legumeSSRdb/,2021-10-21,"Department of Plants, Soils and Climate, CAAS, Utah State University, Logan, UT 84321, USA.","Duhan N, Kaundal R",,,0.0,United States +34936882,Lung CellCards,0.907546788,Lung CellCards,0.907546788,,0,1,http://www.lungmap.net/cell-cards,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20220517033811/https://lungmap.net/cell-cards/,2021-12-21,"Department of Pediatrics, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA; Department of Biological Sciences, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA. Electronic address: xinsun@health.ucsd.edu.","Sun X, Perl AK, Li R, Bell SM, Sajti E, Kalinichenko VV, Kalin TV, Misra RS, Deshmukh H, Clair G, Kyle J, Crotty Alexander LE, Masso-Silva JA, Kitzmiller JA, Wikenheiser-Brokamp KA, Deutsch G, Guo M, Du Y, Morley MP, Valdez MJ, Yu HV, Jin K, Bardes EE, Zepp JA, Neithamer T, Basil MC, Zacharias WJ, Verheyden J, Young R, Bandyopadhyay G, Lin S, Ansong C, Adkins J, Salomonis N, Aronow BJ, Xu Y, Pryhuber G, Whitsett J, Morrisey EE, ",,"NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS",0.0,"United States, United States" +34976312,LCMD,0.993581444,LCMD,0.993581444,Lung Cancer Metabolome Database,0.985615373,1,http://cosbi7.ee.ncku.edu.tw/LCMD,301,,"(22.9908,120.2133)",no_wayback,2021-12-07,"Department of Electrical Engineering, National Cheng Kung University, Tainan 70101, Taiwan.","Wu WS, Wu HY, Wang PH, Chen TY, Chen KR, Chang CW, Lee DE, Lin BH, Chang WC, Liao PC",,"National Cheng Kung University, Ministry of Science and Technology, Taiwan, Ministry of Science and Technology, Taiwan",0.0, +21177657,mESAdb,0.995116591,mESAdb,0.995116591,microRNA expression and sequence analysis database,0.857367776,1,http://konulab.fen.bilkent.edu.tr/mirna,301,,"(39.9199,32.8543)",http://web.archive.org/web/20220615142508/http://konulab.fen.bilkent.edu.tr/mirna/,2011-01-01,"Department of Molecular Biology and Genetics, Bilkent University, 06800 Ankara, Turkey.","Kaya KD, Karakülah G, Yakicier CM, Acar AC, Konu O",,,17.0,Turkey +21349870,MPID-T2,0.942223958,MPID-T2,0.942223958,MHC-Peptide Interaction Database-T,0.729910028,1,http://biolinfo.org/mpid-t2,301,,"(-33.8678,151.2073)",http://web.archive.org/web/20220527155322/http://biolinfo.org/mpid-t2/,2011-02-23,"Department of Chemistry and Biomolecular Sciences and ARC Centre of Excellence in Bioinformatics, Macquarie University, Sydney, NSW, Australia.","Khan JM, Cheruku HR, Tong JC, Ranganathan S",,,8.0,Australia +21546393,MSigDB,0.988693297,MSigDB,0.988693297,Molecular signatures database,0.879078257,1,http://www.broadinstitute.org/msigdb,301,,"(37.7621,-122.3971)",no_wayback,2011-05-05,"Broad Institute of MIT and Harvard, Cambridge, MA 02142, USA.","Liberzon A, Subramanian A, Pinchback R, Thorvaldsdóttir H, Tamayo P, Mesirov JP",,"NCI NIH HHS, NCI NIH HHS",1853.0,United States +21584191,MTB-PCDB,0.996515731,MTB-PCDB,0.996515731,The Mycobacterium tuberculosis Proteome Comparison Database,0.917592347,1,http://www.bicjbtdrc-mgims.in/MTB-PCDB,"HTTPConnectionPool(host='www.bicjbtdrc-mgims.in', port=80): Max retries exceeded with url: /MTB-PCDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200503184958/http://www.bicjbtdrc-mgims.in:80/MTB-PCDB/,2011-04-22,"Bioinformatics Centre, JB Tropical Disease Research Centre, Mahatma Gandhi Institute of Medical Sciences, Sevagram (Wardha) 442102, Maharashtra, India.","Jena L, Wankhade G, Kumar S, Harinath BC",,,1.0,India +"21605702, 25055920",miRWalk,0.998156488,miRWalk,0.998156488,,0,2,http://mirwalk.uni-hd.de,302,,"(49.4077,8.6908)",no_wayback,2014-01-01,"Medical Research Center, Medical Faculty Mannheim, University of Heidelberg, D-68167 Mannheim, Germany. harsh.dweep@medma.uni-heidelberg.de, Medical Faculty Mannheim, Medical Research Center, University of Heidelberg, Theodor-Kutzer-Ufer 1-3, D-68167, Mannheim, Germany, harsh.dweep@medma.uni-heidelberg.de.","Dweep H, Sticht C, Pandey P, Gretz N, Dweep H, Gretz N, Sticht C",", ",", ",1141.0,"Germany, Germany" +21668943,MeRy-B,0.987240215,MeRy-B,0.987240215,,0,1,http://www.cbib.u-bordeaux2.fr/MERYB/index.php,301,,"(44.8404,-0.5805)",no_wayback,2011-06-13,,,,,0.0, +21856757,modENCODE,0.994001627,modENCODE,0.994001627,model organism Encyclopedia of DNA Elements,0.97074911,1,http://www.modencode.org,200,,"(45.8399,-119.7006)",http://web.archive.org/web/20220901043602/http://www.modencode.org/,2011-08-19,"Lawrence Berkeley National Laboratory, Genomics Division, 1 Cyclotron Road MS64-121, Berkeley, CA 94720, USA.","Washington NL, Stinson EO, Perry MD, Ruzanov P, Contrino S, Smith R, Zha Z, Lyne R, Carr A, Lloyd P, Kephart E, McKay SJ, Micklem G, Stein LD, Lewis SE",,"NHGRI NIH HHS, Wellcome Trust",23.0,United States +21880229,Monogenic Diabetes Registry,0.641969562,,0,Monogenic Diabetes Registry,0.641969562,1,http://www.kovlerdiabetescenter.org/registry,301,,"(37.5331,-122.2486)",http://web.archive.org/web/20111217150508/http://www.kovlerdiabetescenter.org:80/registry,2011-07-01,"Department of Pediatrics, Section of Adult and Pediatric Endocrinology, Diabetes and Metabolism, The University of Chicago, Chicago, Illinois 60637, USA. sgreeley@peds.bsd.uchicago.edu","Greeley SA, Naylor RN, Cook LS, Tucker SE, Lipton RB, Philipson LH",,"NCRR NIH HHS, NIDDK NIH HHS, NIDDK NIH HHS",19.0,United States +21880546,MtbSD,0.997164249,MtbSD,0.997164249,Mycobacterium tuberculosis Structural Database,0.987535059,1,http://bmi.icmr.org.in/mtbsd/MtbSD.php,301,,"(29.9657,76.8370)",http://web.archive.org/web/20150703021026/http://bmi.icmr.org.in:80/mtbsd/MtbSD.php,2011-08-30,"National Institute for Research in Tuberculosis, Chetpet, Chennai 600 031, India.","Hassan S, Logambiga P, Raman AM, Subazini TK, Kumaraswami V, Hanna LE",,ICMR- Biomedical Informatics and National Institute for Research in Tuberculosis (formerly Tuberculosis Research Centre),2.0,India +21984757,miREnvironment,0.989526451,miREnvironment,0.989526451,,0,1,http://cmbi.bjmu.edu.cn/miren,"HTTPConnectionPool(host='cmbi.bjmu.edu.cn', port=80): Max retries exceeded with url: /miren (Caused by ConnectTimeoutError(, 'Connection to cmbi.bjmu.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2011-10-07,"Department of Biomedical Informatics, Peking University Health Science Center, Beijing 100191, China.","Yang Q, Qiu C, Yang J, Wu Q, Cui Q",,,44.0,China +22053087,MimoDB,0.995547354,MimoDB,0.995547354,,0,1,http://immunet.cn/mimodb,200,,"(34.0522,-118.2437)",http://web.archive.org/web/20220128200634/http://www.immunet.cn/mimodb/,2011-11-03,"Key Laboratory for Neuroinformation of Ministry of Education, School of Life Science and Technology, University of Electronic Science and Technology of China, No 4, 2nd Section, North Jianshe Road, Chengdu, Sichuan 610054, China. hj@uestc.edu.cn","Huang J, Ru B, Zhu P, Nie F, Yang J, Wang X, Dai P, Lin H, Guo FB, Rao N",,,56.0,"China, China" +22080560,MIPModDB,0.992227316,MIPModDB,0.992227316,,0,1,http://bioinfo.iitk.ac.in/MIPModDB,"HTTPConnectionPool(host='bioinfo.iitk.ac.in', port=80): Max retries exceeded with url: /MIPModDB (Caused by ConnectTimeoutError(, 'Connection to bioinfo.iitk.ac.in timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220615180848/http://bioinfo.iitk.ac.in/MIPModDB/,2011-11-12,"Department of Biological Sciences and Bioengineering, Indian Institute of Technology Kanpur, Kanpur 208016, India.","Gupta AB, Verma RK, Agarwal V, Vajpai M, Bansal V, Sankararamakrishnan R",,,16.0,India +22080565,modMine,0.931703269,modMine,0.931703269,,0,1,http://intermine.modencode.org,200,,"(45.8399,-119.7006)",http://web.archive.org/web/20220521022222/http://intermine.modencode.org/,2011-11-12,"Department of Genetics, University of Cambridge, Downing Street, Cambridge CB2 3EH, UK.","Contrino S, Smith RN, Butano D, Carr A, Hu F, Lyne R, Rutherford K, Kalderimis A, Sullivan J, Carbon S, Kephart ET, Lloyd P, Stinson EO, Washington NL, Perry MD, Ruzanov P, Zha Z, Lewis SE, Stein LD, Micklem G",,"NHGRI NIH HHS, Wellcome Trust",82.0, +22086948,MetaCrop,0.99725759,MetaCrop,0.99725759,,0,1,http://metacrop.ipk-gatersleben.de,302,,"(49.0151,12.1016)",no_wayback,2011-11-15,"Leibniz Institute of Plant Genetics and Crop Plant Research IPK, Corrensstrasse 3, 06466 Gatersleben, Germany. schreibe@ipk-gatersleben.de","Schreiber F, Colmsee C, Czauderna T, Grafahrend-Belau E, Hartmann A, Junker A, Junker BH, Klapperstück M, Scholz U, Weise S",,,20.0,Germany +22096227,MINT,0.995954633,MINT,0.995954633,Molecular INTeraction Database,0.968848467,1,"http://mint.bio.uniroma2.it/mint/, http://mint.bio.uniroma2.it/mint/download.do","301, 301",,"(41.8661,12.5896), (41.8661,12.5896)","http://web.archive.org/web/20171202114902/http://mint.bio.uniroma2.it:80/mint/, http://web.archive.org/web/20141013091913/http://mint.bio.uniroma2.it/mint/download.do",2011-11-16,"Department of Biology, University of Rome Tor Vergata, Via della Ricerca Scientifica, 00133 Rome, Italy.","Licata L, Briganti L, Peluso D, Perfetto L, Iannuccelli M, Galeota E, Sacco F, Palma A, Nardozza AP, Santonico E, Castagnoli L, Cesareni G",,"European Commission FP7, Telethon",481.0,Italy +22096233,MINAS,0.994471371,MINAS,0.994471371,Metal Ions in Nucleic AcidS,0.86654482,1,http://www.minas.uzh.ch,302,,"(47.3667,8.5500)",http://web.archive.org/web/20220316121022/https://www.minas.uzh.ch/,2011-11-16,"Institute of Inorganic Chemistry, University of Zurich, Winterthurerstrasse 190, CH-8057 Zurich, Switzerland.","Schnabl J, Suter P, Sigel RK",,"Swiss National Science Foundation, Swiss National Science Foundation, European Research Council",16.0,Switzerland +"22121219, 26432830, 30398659",MitoMiner,0.994106472,MitoMiner,0.994106472,,0,3,http://mitominer.mrc-mbu.cam.ac.uk,302,,"(52.2000,0.1167)",http://web.archive.org/web/20221023052603/https://mitominer.mrc-mbu.cam.ac.uk/,2019-01-01,"Medical Research Council Mitochondrial Biology Unit, Wellcome Trust/MRC Building, Hills Road, Cambridge CB2 0XY, UK., MRC Mitochondrial Biology Unit, Wellcome Trust/MRC Building, Cambridge Biomedical Campus, Hills Road, Cambridge, CB2 0XY, UK., MRC Mitochondrial Biology Unit, Wellcome Trust/MRC Building, Cambridge Biomedical Campus, Hills Road, Cambridge CB2 0XY, UK.","Smith AC, Blackshaw JA, Robinson AJ, Smith AC, Robinson AJ, Smith AC, Robinson AJ",", , ","Medical Research Council, Medical Research Council, Medical Research Council, Medical Research Council",181.0, +22123747,MitoZoa,0.997121572,MitoZoa,0.997121572,MITOchondrial genome database of metaZOAns,0.70377599,1,http://www.caspur.it/mitozoa,409,,,no_wayback,2011-11-28,"CASPUR, Consorzio interuniversitario per le Applicazioni di Supercalcolo per Università e Ricerca, Rome, Italy.","D'Onorio de Meo P, D'Antonio M, Griggio F, Lupi R, Borsani M, Pavesi G, Castrignanò T, Pesole G, Gissi C",,,28.0,Italy +"22135287, 24243848",miRNEST,0.990006804,miRNEST,0.990006804,,0,2,http://mirnest.amu.edu.pl,200,,"(52.4069,16.9299)",http://web.archive.org/web/20220621022619/http://mirnest.amu.edu.pl/,2013-11-15,"Laboratory of Bioinformatics, Faculty of Biology, Adam Mickiewicz University, Umultowska 89, 61-614 Poznan, Poland. miszcz@amu.edu.pl, Laboratory of Bioinformatics, Adam Mickiewicz University in Poznań, Poznań, Poland.","Szcześniak MW, Deorowicz S, Gapski J, Kaczyński Ł, Makalowska I, Szczesniak MW, Makalowska I",", ",", ",53.0,"Poland, Poland" +22139941,MMMDB,0.997343063,MMMDB,0.997343063,Mouse Multiple Tissue Metabolome Database,0.988576792,1,http://mmmdb.iab.keio.ac.jp,"HTTPConnectionPool(host='mmmdb.iab.keio.ac.jp', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to mmmdb.iab.keio.ac.jp timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200218162445/http://mmmdb.iab.keio.ac.jp:80/,2011-12-01,"Institute for Advanced Biosciences, Keio University, Tsuruoka, Yamagata 997-0017, Japan. msugi@sfc.keio.ac.jp","Sugimoto M, Ikeda S, Niigata K, Tomita M, Sato H, Soga T",,,24.0,Japan +22140101,MethylomeDB,0.997244477,MethylomeDB,0.997244477,,0,1,http://epigenomics.columbia.edu/methylomedb/index.html,302,,"(38.8951,-77.0364)",no_wayback,2011-12-02,"Department of Psychiatry, Columbia University and The New York State Psychiatric Institute, New York, NY 10032, USA.","Xin Y, Chanrion B, O'Donnell AH, Milekic M, Costa R, Ge Y, Haghighi FG",,"NHGRI NIH HHS, NIMH NIH HHS, NIMH NIH HHS",30.0,United States +22146221,MnM,0.994092762,MnM,0.994092762,Minimotif Miner,0.651109982,1,"http://minimotifminer.org, http://mnm.engr.uconn.edu","405, 302",,", (42.0334,-71.2189)","no_wayback, http://web.archive.org/web/20120525094137/http://mnm.engr.uconn.edu:80/",2011-12-06,"Department of Computer Science and Engineering, University of Connecticut, Storrs, CT 06269-2155, USA.","Mi T, Merlin JC, Deverasetty S, Gryk MR, Bill TJ, Brooks AW, Lee LY, Rathnayake V, Ross CA, Sargeant DP, Strong CL, Watts P, Rajasekaran S, Schiller MR",,"NIGMS NIH HHS, NLM NIH HHS, NIGMS NIH HHS, NLM NIH HHS, NCRR NIH HHS",34.0,United States +22209237,MTCID,0.993752122,MTCID,0.993752122,uberculosis clinical isolate genetic polymorphism database,0.893513964,1,http://ccbb.jnu.ac.in/Tb,301,,"(28.6453,77.2128)",http://web.archive.org/web/20140722120505/http://ccbb.jnu.ac.in/Tb/,2011-12-29,"School of Computational & Integrative Sciences, Jawaharlal Nehru University, New Delhi 110067, India. richapurohit86@gmail.com","Bharti R, Das R, Sharma P, Katoch K, Bhattacharya A",,,7.0,India +22276777,miRdSNP,0.998023212,miRdSNP,0.998023212,,0,1,http://mirdsnp.ccr.buffalo.edu,200,,"(42.8865,-78.8784)",http://web.archive.org/web/20221028133437/http://mirdsnp.ccr.buffalo.edu/,2012-01-25,"Center for Computational Research, New York State Center of Excellence in Bioinformatics & Life Sciences, State University of New York at Buffalo, Buffalo, NY 14260, USA.","Bruno AE, Li L, Kalabus JL, Pan Y, Yu A, Hu Z",,"NIDA NIH HHS, NEI NIH HHS",84.0,United States +22301388,MaxQB,0.992410779,MaxQB,0.992410779,,0,1,http://www.biochem.mpg.de/maxqb,301,,"(51.5344,9.9323)",no_wayback,2012-02-02,,,,,0.0, +22309450,MTDB,0.993324598,MTDB,0.993324598,truncatula transporter database,0.971297204,1,http://bioinformatics.cau.edu.cn/MtTransporter,301,,"(39.9075,116.3972)",http://web.archive.org/web/20210416153922/http://bioinformatics.cau.edu.cn/MtTransporter/,2012-02-06,,,,,0.0, +22491796,MSV3d,0.996012549,MSV3d,0.996012549,Database of human MisSense Variants,0.791921243,1,http://decrypthon.igbmc.fr/msv3d,404,,,http://web.archive.org/web/20190912003812/http://decrypthon.igbmc.fr:80/msv3d/,2012-04-03,,,,,0.0, +22545773,MASiVEdb,0.994499087,MASiVEdb,0.994499087,Mapping and Analysis of SireVirus Elements Database,0.726713588,1,http://bat.infspire.org/databases/masivedb,301,,"(49.1952,16.6080)",http://web.archive.org/web/20220314145741/http://bat.infspire.org/databases/masivedb/,2012-04-30,"Institute of Agrobiotechnology, Centre for Research and Technology Hellas, Thessaloniki, 57001, Greece. alexandros.bousios@gmail.com","Bousios A, Minga E, Kalitsou N, Pantermali M, Tsaballa A, Darzentas N",,,12.0,Greece +22547615,MBLED,0.99639684,MBLED,0.99639684,Lactamase Engineering Database,0.869486794,1,"http://www.mbled.uni-stuttgart.de, http://www.lahey.org/Studies","200, 301",,"(48.7823,9.1770), (37.7621,-122.3971)","http://web.archive.org/web/20220121023546/http://www.mbled.uni-stuttgart.de/, http://web.archive.org/web/20190123075758/http://www.lahey.org:80/Studies/",2012-04-30,,,,,0.0, +22606288,MGEx-Udb,0.998166392,MGEx-Udb,0.998166392,Mammalian Gene Expression Uterus database,0.957160369,1,http://resource.ibab.ac.in/MGEx-Udb,404,,,http://web.archive.org/web/20220305144010/http://resource.ibab.ac.in/MGEx-Udb/,2012-05-11,,,,,0.0, +22701463,Medicago PhosphoProtein Database,0.982081629,MPPD,0.981770674,Medicago PhosphoProtein Database,0.982081629,1,http://phospho.medicago.wisc.edu,301,,"(45.8399,-119.7006)",http://web.archive.org/web/20220308063942/https://phospho.medicago.wisc.edu/,2012-06-11,,,,,0.0, +22730453,MuteinDB,0.990509152,MuteinDB,0.990509152,,0,1,http://www.MuteinDB.org,301,,"(47.0667,15.4500)",http://web.archive.org/web/20150526190148/http://www.muteindb.org/,2012-06-21,,,,,0.0, +22856649,ModelDB,0.958948493,ModelDB,0.958948493,,0,1,http://bl210.caspur.it/MODEL-DB/MODEL-DB_web/MODindex.php.Operating,"HTTPConnectionPool(host='bl210.caspur.it', port=80): Max retries exceeded with url: /MODEL-DB/MODEL-DB_web/MODindex.php.Operating (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-08-02,"Department of Physics, Sapienza University of Rome, P,le A, Moro, 5, 00185 Rome, Italy.","Carbajo D, Tramontano A",,,2.0,Italy +22961451,MRTDD,0.996869192,MRTDD,0.996869192,,0,1,http://mrtdd.mbc.nctu.edu.tw,"HTTPConnectionPool(host='mrtdd.mbc.nctu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140722150518/http://mrtdd.mbc.nctu.edu.tw/,2012-01-01,"Institute of Bioinformatics and Systems Biology, National Chiao Tung University, Hsin-Chu, Taiwan. loveariddle.bi96g@g2.nctu.edu.tw","Huang WC, Lin FM, Chang TH, Liao KW, Huang HD",,,1.0, +23019219,METscout,0.998384833,METscout,0.998384833,,0,1,http://metscout.mpg.de,200,,"(51.5344,9.9323)",http://web.archive.org/web/20220706125538/http://metscout.mpg.de/,2012-09-27,,,,,0.0, +23044546,miR-EdiTar,0.902254691,miR-EdiTar,0.902254691,,0,1,http://microrna.osumc.edu/mireditar,"HTTPConnectionPool(host='microrna.osumc.edu', port=80): Max retries exceeded with url: /mireditar (Caused by ConnectTimeoutError(, 'Connection to microrna.osumc.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20201129035435/http://microrna.osumc.edu/mireditar/,2012-10-07,"Department of Molecular Virology, Immunology and Medical Genetics, Comprehensive Cancer Center, The Ohio State University, Columbus, OH, USA. alessandro.lagana@osumc.edu","Laganà A, Paone A, Veneziano D, Cascione L, Gasparini P, Carasi S, Russo F, Nigita G, Macca V, Giugno R, Pulvirenti A, Shasha D, Ferro A, Croce CM",,"NIGMS NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS",15.0,United States +"23060735, 23109552, 23630246",MetaboLights,0.997964621,MetaboLights,0.997964621,,0,3,http://www.ebi.ac.uk/metabolights,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20221103215812/https://www.ebi.ac.uk/metabolights/,2013-04-29,"European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD UK., nan, nan","Steinbeck C, Conesa P, Haug K, Mahendraker T, Williams M, Maguire E, Rocca-Serra P, Sansone SA, Salek RM, Griffin JL, nan, nan",", nan, nan","Biotechnology and Biological Sciences Research Council, Medical Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, nan, nan",41.0, +23066841,MetNet,0.945627868,MetNet,0.945627868,,0,1,"http://www.metnetonline.org, http://www.metnetonline.org/tutorial","200, 200",,"(33.7490,-84.3880), (33.7490,-84.3880)","http://web.archive.org/web/20220615140711/http://www.metnetonline.org/, http://web.archive.org/web/20210526010852/http://www.metnetonline.org/tutorial",2012-10-15,"Dept of Genetics, Development and Cell Biology, Iowa State University, Ames, IA, USA.","Sucaet Y, Wang Y, Li J, Wurtele ES",,,10.0,United States +23071556,MK4MDD,0.993700966,MK4MDD,0.993700966,evel Knowledge base for MDD,0.929511756,1,http://mdd.psych.ac.cn,"HTTPConnectionPool(host='mdd.psych.ac.cn', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='mdd.psych.ac.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220124054155/http://mdd.psych.ac.cn/,2012-10-05,,,,,0.0, +"23118485, 25398900, 30462302",MBGD,0.990653872,MBGD,0.990653872,microbial genome database for comparative analysis,0.900049647,3,http://mbgd.genome.ad.jp,"HTTPConnectionPool(host='mbgd.genome.ad.jp', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20210414022942/http://mbgd.genome.ad.jp/,2019-01-01,"Laboratory of Genome Informatics, Data Integration and Analysis Facility, National Institute for Basic Biology, National Institutes of Natural Sciences, Nishigonaka 38, Myodaiji, Okazaki, Aichi 444-8585, Japan. uchiyama@nibb.ac.jp, Laboratory of Genome Informatics, National Institute for Basic Biology, National Institutes of Natural Sciences, Nishigonaka 38, Myodaiji, Okazaki, Aichi 444-8585, Japan Data Integration and Analysis Facility, National Institute for Basic Biology, National Institutes of Natural Sciences, Nishigonaka 38, Myodaiji, Okazaki, Aichi 444-8585, Japan uchiyama@nibb.ac.jp., Laboratory of Genome Informatics, National Institute for Basic Biology, National Institutes of Natural Sciences, Nishigonaka 38, Myodaiji, Okazaki, Aichi 444-8585, Japan.","Uchiyama I, Mihara M, Nishide H, Chiba H, Uchiyama I, Mihara M, Nishide H, Chiba H, Uchiyama I, Mihara M, Nishide H, Chiba H, Kato M",", , ",", , National Bioscience Database Center",80.0,"Japan, Japan, Japan, Japan" +23143105,MonarchBase,0.996606827,MonarchBase,0.996606827,,0,1,http://monarchbase.umassmed.edu,200,,"(42.2626,-71.8023)",http://web.archive.org/web/20220418025721/http://monarchbase.umassmed.edu/,2012-11-09,"Department of Neurobiology, University of Massachusetts Medical School, 364 Plantation Street, Worcester, MA 01605, USA.","Zhan S, Reppert SM",,NIGMS NIH HHS,50.0,United States +"23155064, 29077942",MetalPDB,0.997881174,MetalPDB,0.997881174,,0,2,http://metalweb.cerm.unifi.it,302,,"(43.8319,11.1992)",http://web.archive.org/web/20201201183852/http://metalweb.cerm.unifi.it/,2018-01-01,"Magnetic Resonance Center (CERM), University of Florence, Via L. Sacconi 6, 50019 Sesto, Fiorentino, Italy. andreini@cerm.unifi.it, Magnetic Resonance Center (CERM)-University of Florence, Via L. Sacconi 6, 50019 Sesto Fiorentino, Italy.","Andreini C, Cavallaro G, Lorenzini S, Rosato A, Putignano V, Rosato A, Banci L, Andreini C",", ",", ",107.0,"Italy, Italy" +23173617,MirSNP,0.993562758,MirSNP,0.993562758,,0,1,http://cmbi.bjmu.edu.cn/mirsnp,"HTTPConnectionPool(host='cmbi.bjmu.edu.cn', port=80): Max retries exceeded with url: /mirsnp (Caused by ConnectTimeoutError(, 'Connection to cmbi.bjmu.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2012-11-23,"Institute of Mental Health, Peking University, 51 Hua Yuan Bei Road, Beijing 100191, People's Republic of China.","Liu C, Zhang F, Li T, Lu M, Wang L, Yue W, Zhang D",,,144.0,China +23178820,MicrobPad MD,0.995625615,MicrobPad MD,0.995625615,microbial pathogen diagnostic methods database,0.982669552,1,"http://bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp, http://pha-bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp","HTTPConnectionPool(host='bidd.nus.edu.sg', port=80): Max retries exceeded with url: /group/MicrobPad/MicrobPad.asp (Caused by ConnectTimeoutError(, 'Connection to bidd.nus.edu.sg timed out. (connect timeout=5)')), HTTPConnectionPool(host='pha-bidd.nus.edu.sg', port=80): Max retries exceeded with url: /group/MicrobPad/MicrobPad.asp (Caused by ConnectTimeoutError(, 'Connection to pha-bidd.nus.edu.sg timed out. (connect timeout=5)'))",,", ","http://web.archive.org/web/20140722182810/http://bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp, no_wayback",2012-11-21,"State Key Laboratory of Biotherapy and Cancer Center, West China Hospital, West China Medical School, Sichuan University, Sichuan 610041, PR China.","Han BC, Wei XN, Zhang JX, Truong NQ, Westgate CL, Zhao RY, Chen YZ",,,0.0,"China, China, China" +"23193269, 27899624, 28968784",MicroScope,0.975439906,MicroScope,0.975439906,,0,3,http://www.genoscope.cns.fr/agc/microscope,301,,"(48.8534,2.3488)",http://web.archive.org/web/20070811164104/http://www.genoscope.cns.fr:80/agc/MicroScope/,2019-07-01,"CEA, Institut de Génomique, Genoscope, 2 rue Gaston Crémieux, 91057 Evry, France. vallenet@genoscope.cns.fr, UMR 8030, CNRS, Université Évry-Val-d'Essonne, CEA, Institut de Génomique - Genoscope, Laboratoire d'Analyses Bioinformatiques pour la Génomique et le Métabolisme, F-91000 Évry, France vallenet@genoscope.cns.fr., None","Vallenet D, Belda E, Calteau A, Cruveiller S, Engelen S, Lajus A, Le Fèvre F, Longin C, Mornico D, Roche D, Rouy Z, Salvignol G, Scarpelli C, Thil Smith AA, Weiman M, Médigue C, Vallenet D, Calteau A, Cruveiller S, Gachet M, Lajus A, Josso A, Mercier J, Renaux A, Rollin J, Rouy Z, Roche D, Scarpelli C, Médigue C, Médigue C, Calteau A, Cruveiller S, Gachet M, Gautreau G, Josso A, Lajus A, Langlois J, Pereira H, Planel R, Roche D, Rollin J, Rouy Z, Vallenet D",", , ",", , France Genomique, Institut Francais De Boinformatique",375.0,"France, France" +23219992,MENT,0.987894893,MENT,0.987894893,Methylation and Expression database of,0.812570736,1,http://mgrc.kribb.re.kr:8080/MENT,"HTTPConnectionPool(host='mgrc.kribb.re.kr', port=8080): Max retries exceeded with url: /MENT (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140722203530/http://mgrc.kribb.re.kr:8080/MENT/,2012-12-07,"Medical Genomics Research Center, KRIBB, Gwahangno, Yuseong-gu, Daejeon 305-806, Republic of Korea.","Baek SJ, Yang S, Kang TW, Park SM, Kim YS, Kim SY",,"National R & D Program for Cancer Control, Ministry for Health and Welfare, Republic of Korea, National Research Foundation of Korea, Basic Science Research Program, Ministry of Education, Science and Technology (MOEST)",21.0, +23325619,miRCancer,0.985522926,miRCancer,0.985522926,,0,1,http://mircancer.ecu.edu,200,,"(35.6127,-77.3663)",http://web.archive.org/web/20220517213213/http://mircancer.ecu.edu/,2013-01-16,"Department of Computer Science, East Carolina University, Greenville, NC 27858 and Department of Physiology, Brody School of Medicine, East Carolina University, Greenville, NC 27834, USA.","Xie B, Ding Q, Han H, Wu D",,,209.0,United States +23585830,MitoLSDB,0.996946871,MitoLSDB,0.996946871,,0,1,http://mitolsdb.igib.res.in,"HTTPConnectionPool(host='mitolsdb.igib.res.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20120331063255/http://mitolsdb.igib.res.in:80/,2013-04-09,"Department of Bioinformatics, Stella Maris College, University of Madras, Chennai, Tamil Nadu, India.","K S, Jalali S, Scaria V, Bhardwaj A",,,12.0,India +23619930,MRIdb,0.995113969,MRIdb,0.995113969,,0,1,http://www3.imperial.ac.uk/bioinfsupport/resources/software/mridb,301,,"(51.5085,-0.1257)",no_wayback,2013-10-01,"Centre for Integrative Systems Biology and Bioinformatics, Imperial College London, London, SW7 2AZ, UK, m.woodbridge@imperial.ac.uk.","Woodbridge M, Fagiolo G, O'Regan DP",,British Heart Foundation,12.0, +23860041,MGDD,0.994052202,MGDD,0.994052202,Moroccan Genetic Disease Database,0.972358629,1,http://mgdd.pasteur.ma,200,,"(33.5883,-7.6114)",http://web.archive.org/web/20221011160125/http://mgdd.pasteur.ma/,2013-07-17,"1] Laboratoire de Génétique Moléculaire Humaine, Département de Recherche Scientifique, Institut Pasteur du Maroc, Casablanca, Morocco [2] Laboratoire d'Agroalimentaire et Santé, Faculté des Sciences et Techniques, Université Hassan I, Settat, Morocco.","Charoute H, Nahili H, Abidi O, Gabi K, Rouba H, Fakiri M, Barakat A",,,6.0,"Morocco, Morocco" +23864220,MisPred,0.997501791,MisPred,0.997501791,,0,1,http://www.mispred.com,"HTTPConnectionPool(host='www.mispred.com', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,http://web.archive.org/web/20190816044818/http://www.mispred.com:80/,2013-07-17,"Institute of Enzymology, Research Centre for Natural Sciences, Hungarian Academy of Sciences, H-1113 Budapest, Hungary.","Nagy A, Patthy L",,,11.0,Hungary +23875173,MelanomaDB,0.994151652,MelanomaDB,0.994151652,,0,1,"http://genesetdb.auckland.ac.nz/melanomadb/about.html, http://www.biomatters.com/apps/melanoma-profiler-for-research","302, 301",,"(-36.8485,174.7635), (38.8951,-77.0364)","http://web.archive.org/web/20220127130411/https://genesetdb.auckland.ac.nz/melanomadb/about.html, no_wayback",2013-07-16,"Department of Molecular Medicine and Pathology, School of Medical Sciences, University of Auckland , Auckland , New Zealand.","Trevarton AJ, Mann MB, Knapp C, Araki H, Wren JD, Stones-Havas S, Black MA, Print CG",,NIGMS NIH HHS,9.0,New Zealand +23894139,mpMoRFsDB,0.948040187,mpMoRFsDB,0.948040187,,0,1,http://bioinformatics.biol.uoa.gr/mpMoRFsDB,301,,"(37.9757,23.7691)",http://web.archive.org/web/20220303065913/http://bioinformatics.biol.uoa.gr/mpMoRFsDB/,2013-07-26,"Faculty of Biology, Department of Cell Biology and Biophysics, University of Athens, Panepistimiopolis, Athens 157 01, Greece.","Gypas F, Tsaousis GN, Hamodrakas SJ",,,4.0,Greece +23935057,metabolicMine,0.990483761,metabolicMine,0.990483761,,0,1,http://www.metabolicmine.org,301,,"(51.2302,6.7135)",http://web.archive.org/web/20160131131738/http://www.metabolicmine.org/,2013-08-09,"Cambridge Systems Biology Centre, University of Cambridge, Cambridge CB2 1QR, UK.","Lyne M, Smith RN, Lyne R, Aleksic J, Hu F, Kalderimis A, Stepan R, Micklem G",,Wellcome Trust,11.0, +"23955518, 29668970",MitoFish,0.996900797,MitoFish,0.996900797,,0,2,http://mitofish.aori.u-tokyo.ac.jp,200,,"(35.9053,139.9324)",http://web.archive.org/web/20220616143519/http://mitofish.aori.u-tokyo.ac.jp/,2018-06-01,"Atmosphere and Ocean Research Institute, The University of Tokyo, Kashiwa, Chiba, Japan., Center for Strategic Research Project, Organization for Research Promotion, University of the Ryukyus, Okinawa, Japan.","Iwasaki W, Fukunaga T, Isagozawa R, Yamada K, Maeda Y, Satoh TP, Sado T, Mabuchi K, Takeshima H, Miya M, Nishida M, Sato Y, Miya M, Fukunaga T, Sado T, Iwasaki W",", ",", Japan Society for the Promotion of Science, Japan Society for the Promotion of Science, the University of the Ryukyus, the Canon Foundation, Japan Society for the Promotion of Science, Japan Science and Technology Agency, Japan Society for the Promotion of Science",330.0,"Japan, Japan" +24125644,MarinegenomicsDB,0.994072795,MarinegenomicsDB,0.994072795,,0,1,http://marinegenomics.oist.jp,301,,"(35.2295,136.8581)",http://web.archive.org/web/20200626081132/https://marinegenomics.oist.jp/,2013-10-01,"Marine Genomics Unit, Okinawa Institute of Science and Technology, Tancha 1919-1, Onna, Kunigami, Okinawa 904-0495, Japan.","Koyanagi R, Takeuchi T, Hisata K, Gyoja F, Shoguchi E, Satoh N, Kawashima T",,,14.0,Japan +24167507,MASCP Gator,0.840257004,MASCP Gator,0.840257004,,0,1,http://gator.masc-proteomics.org,302,,"(43.7001,-79.4163)",http://web.archive.org/web/20191003101614/http://gator.masc-proteomics.org:80/,2013-10-23,"Joint BioEnergy Institute and Physical Biosciences Division, Lawrence Berkeley National Laboratory Berkeley, CA, USA.","Mann GW, Calley PC, Joshi HJ, Heazlewood JL",,,10.0,United States +24170808,MitoBreak,0.998078406,MitoBreak,0.998078406,mitochondrial DNA breakpoints database,0.974964321,1,http://mitobreak.portugene.com,410,,,http://web.archive.org/web/20221016204714/https://mitobreak.portugene.com/,2013-10-28,"Institute of Molecular Pathology and Immunology of the University of Porto (IPATIMUP), Rua Dr. Roberto Frias s/n, Porto 4200-465, Portugal, Faculty of Sciences, University of Porto, Rua do Campo Alegre, s/n, Porto 4169-007, Portugal and Interdisciplinary Centre of Marine and Environmental Research (CIIMAR/CIMAR), University of Porto, Rua dos Bragas 289, Porto 4050-123, Portugal.","Damas J, Carneiro J, Amorim A, Pereira F",,,24.0,"Portugal, Portugal, Portugal" +24194596,MP:PD,0.98293969,MP:PD,0.98293969,membrane protein packing database,0.938239947,1,http://proteinformatics.charite.de/mppd,404,,,http://web.archive.org/web/20210119121740/http://proteinformatics.charite.de/mppd/,2013-11-04,"Charité University Medicine Berlin, Institute of Medical Physics and Biophysics, ProteinFormatics Group, Charitéplatz 1, 10117 Berlin and Charité University Medicine Berlin, Institute for Physiology, Structural Bioinformatics Group, Lindenberger Weg 80, 13125 Berlin.","Rose A, Theune D, Goede A, Hildebrand PW",,,7.0, +24203705,MetaRef,0.996002674,MetaRef,0.996002674,,0,1,http://metaref.org,200,,"(42.2007,-83.0276)",http://web.archive.org/web/20221011165000/http://metaref.org/,2013-11-06,"Genome Sequencing and Analysis Program, Broad Institute of MIT and Harvard, 7 Cambridge Center, Cambridge, MA 02142, USA, Institute for Genome Sciences, University of Maryland School of Medicine, 801 W Baltimore St, Baltimore, MD 21201, USA, Biostatistics Department, Harvard School of Public Health, 655 Huntington Avenue, Boston, MA 02115, USA and Centre for Integrative Biology, University of Trento, via Sommarive 14, 38123 Povo (Trento), Italy.","Huang K, Brady A, Mahurkar A, White O, Gevers D, Huttenhower C, Segata N",,"NHGRI NIH HHS, NIAID NIH HHS, NIDDK NIH HHS, NHGRI NIH HHS",34.0,"Italy, United States, United States, United States" +24214963,MCDRP,0.995357215,MCDRP,0.995357215,Manually Curated Database of Rice Proteins,0.991914093,1,http://www.genomeindia.org/biocuration,403,,,http://web.archive.org/web/20220305133425/http://www.genomeindia.org/biocuration/,2013-11-07,"Department of Plant Molecular Biology, University of Delhi South Campus, Benito Juarez Road, New Delhi - 110021, India.","Gour P, Garg P, Jain R, Joseph SV, Tyagi AK, Raghuvanshi S",,,11.0,India +24244721,miRStress,0.982590735,miRStress,0.982590735,,0,1,http://mudshark.brookes.ac.uk/MirStress,301,,"(51.7522,-1.2560)",http://web.archive.org/web/20160129172534/http://mudshark.brookes.ac.uk/MirStress,2013-11-14,"Department of Biological and Medical Sciences, Oxford Brookes University, Oxford, United Kingdom.","Jacobs LA, Bewicke-Copley F, Poolman MG, Pink RC, Mulcahy LA, Baker I, Beaman EM, Brooks T, Caley DP, Cowling W, Currie JM, Horsburgh J, Kenehan L, Keyes E, Leite D, Massa D, McDermott-Rouse A, Samuel P, Wood H, Kadhim M, Carter DR",,,17.0,United Kingdom +24253302,MultitaskProtDB,0.998039424,MultitaskProtDB,0.998039424,,0,1,http://wallace.uab.es/multitask,301,,"(41.4911,2.1408)",http://web.archive.org/web/20220503180123/http://wallace.uab.es/multitask/,2013-11-18,"Departament de Bioquímica i Biologia Molecular, Institut de Biotecnologia i Biomedicina, Universitat Autònoma de Barcelona, Bellaterra, Barcelona 08193, Spain,Laboratorio de Inmunología, Universidad de la República Regional Norte-Salto, Rivera 1350, Salto 50000, Uruguay and Departament de Medicina Experimental, Institut de Recerca Biomèdica, Universitat de Lleida, Lleida 25198, Spain.","Hernández S, Ferragut G, Amela I, Perez-Pons J, Piñol J, Mozo-Villarias A, Cedano J, Querol E",,,36.0,"Spain, Spain, Uruguay" +24330312,Marmal-aid,0.976534307,Marmal-aid,0.976534307,,0,1,http://marmal-aid.org,"HTTPConnectionPool(host='marmal-aid.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220616085952/https://marmal-aid.org/,2013-12-12,"The Blizard Institute, Barts and The London School of Medicine and Dentistry, Queen Mary University of London, London, UK. r.lowe@qmul.ac.uk.","Lowe R, Rakyan VK",,Biotechnology and Biological Sciences Research Council,33.0, +24391364,MAPS,0.955033183,MAPS,0.955033183,Medicinal plant Activities,0.944907701,1,http://www.mapsdatabase.com,"HTTPConnectionPool(host='www.mapsdatabase.com', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')))",,,http://web.archive.org/web/20221017015252/https://www.mapsdatabase.com/,2013-12-06,"Department of Bioinformatics and Biotechnology, Government College University Faisalabad (GCUF), Pakistan.","Ashfaq UA, Mumtaz A, Qamar TU, Fatima T",,,11.0,Pakistan +24465676,mUbiSiDa,0.997054636,mUbiSiDa,0.997054636,mammalian Ubiquitination Site Database,0.863494439,1,http://reprod.njmu.edu.cn/mUbiSiDa,301,,"(39.9906,116.2887)",http://web.archive.org/web/20220124032636/http://reprod.njmu.edu.cn/mUbiSiDa/,2014-01-17,"Department of Biomedical Engineering, Nanjing University of Aeronautics and Astronautics, Nanjing, China.","Chen T, Zhou T, He B, Yu H, Guo X, Song X, Sha J",,,32.0,China +24475242,MetaMetaDB,0.99579674,MetaMetaDB,0.99579674,Metagenomic DataBase,0.651920029,1,http://mmdb.aori.u-tokyo.ac.jp,200,,"(35.9053,139.9324)",http://web.archive.org/web/20221017000558/http://mmdb.aori.u-tokyo.ac.jp/,2014-01-27,"Atmosphere and Ocean Research Institute, the University of Tokyo, Kashiwa, Chiba, Japan.","Yang CC, Iwasaki W",,,12.0,Japan +24536078,MICdb,0.989687264,MICdb,0.989687264,,0,1,http://www.cdfd.org.in/micas,301,,"(17.3840,78.4564)",http://web.archive.org/web/20220526020442/http://www.cdfd.org.in/micas/,2014-02-17,"Department of Computer Science & Engineering, Grandhi Varalakshmi Venkatarao Institute of Technology, Bhimavaram, Andhra Pradesh 534 207, India, Training & Delivery Department, TalentSprint Educational Services, IIIT Campus, Hyderabad, Andhra Pradesh 500 032, India and Laboratory of Computational Biology, Centre for DNA Fingerprinting & Diagnostics, Hyderabad, Andhra Pradesh 500 001, India.","Mudunuri SB, Patnana S, Nagarajaram HA",,,3.0,"India, India, India" +24561221,MitoSatPlant,0.997031987,MitoSatPlant,0.997031987,,0,1,http://compubio.in/mitosatplant,"HTTPConnectionPool(host='compubio.in', port=80): Max retries exceeded with url: /mitosatplant (Caused by ConnectTimeoutError(, 'Connection to compubio.in timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20181106105538/http://compubio.in:80/mitosatplant/,2014-02-19,"Department of Computer Science, Banasthali University, Banasthali, 304022 Rajasthan, India.","Kumar M, Kapil A, Shanker A",,"University Grants Commission, India",10.0,India +24828308,MeKO,0.985758662,MeKO,0.985758662,Metabolite Profiling Database for Knock-O,0.698294673,1,http://prime.psc.riken.jp/meko,403,,,http://web.archive.org/web/20150511003533/http://prime.psc.riken.jp:80/meko/,2014-05-14,"RIKEN Center for Sustainable Resource Science, Yokohama, Kanagawa 230-0045, Japan (A.F., Mi.K., R.F.M., M.I., Ma.K., N.H., A.W.-T., T.N., T.T., K.S.);Japan Science and Technology Agency, National Bioscience Database Center, Chiyoda-ku, Tokyo 102-0081, Japan (A.F.);Graduate School of Life and Environmental Sciences, University of Tsukuba, Tsukuba, Ibaraki 305-8572, Japan (Mi.K.);Nissan Chemical Industries, Funabashi, Chiba 274-8507, Japan (M.I.);Max-Planck-Institute of Molecular Plant Physiology, 14476 Potsdam-Golm, Germany (T.T.);Department of Genetics Development and Cell Biology (M.H., E.S.W.), Center for Metabolic Biology (E.S.W., B.J.N.), Center for Biorenewable Chemicals (E.S.W., B.J.N.), and Biochemistry, Biophysics, and Molecular Biology (B.J.N.), Iowa State University, Ames, Iowa 50011; andGraduate School of Pharmaceutical Sciences, Chiba University, Chiba-shi, Chiba 263-8522, Japan (K.S.) a-fukush@psc.riken.jp kazuki.saito@riken.jp.","Fukushima A, Kusano M, Mejia RF, Iwasa M, Kobayashi M, Hayashi N, Watanabe-Takahashi A, Narisawa T, Tohge T, Hur M, Wurtele ES, Nikolau BJ, Saito K",,,23.0,"Germany, Japan, Japan, Japan, Japan, Japan, Japan" +24850854,MetaImprint,0.993931651,MetaImprint,0.993931651,,0,1,http://bioinfo.hrbmu.edu.cn/MetaImprint,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /MetaImprint (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20191223213500/http://bioinfo.hrbmu.edu.cn:80/MetaImprint/,2014-05-21,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Wei Y, Su J, Liu H, Lv J, Wang F, Yan H, Wen Y, Liu H, Wu Q, Zhang Y",,,16.0,China +24923822,MorusDB,0.993190169,MorusDB,0.993190169,Morus Genome Database,0.793358922,1,http://morus.swu.edu.cn/morusdb,301,,"(31.2222,121.4581)",http://web.archive.org/web/20220806033325/https://morus.swu.edu.cn/morusdb/,2014-06-11,"State Key Laboratory of Silkworm Genome Biology, Southwest University, Chongqing 400715, China.","Li T, Qi X, Zeng Q, Xiang Z, He N",,,18.0,China +25022454,MICAD,0.960550686,MICAD,0.960550686,Molecular Imaging and,0.806334178,1,http://www.polyu.edu.hk/bmi/dipp,301,,"(22.2783,114.1747)",http://web.archive.org/web/20131205023732/http://www.polyu.edu.hk/bmi/dipp/,2014-01-01,None,"Chan LW, Ngo CH, Wang F, Zhao MY, Zhao M, Law HK, Wong SC, Yung BY",,,0.0, +25098325,MediaDB,0.991332531,MediaDB,0.991332531,,0,1,http://mediadb.systemsbiology.net,301,,"(47.6151,-122.3447)",http://web.archive.org/web/20220522222152/https://mediadb.systemsbiology.net/,2014-08-06,"Department of Chemical and Biomolecular Engineering, University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America; Institute for Systems Biology, Seattle, Washington, United States of America.","Richards MA, Cassen V, Heavner BD, Ajami NE, Herrmann A, Simeonidis E, Price ND",,"NIGMS NIH HHS, NIGMS NIH HHS",8.0,"United States, United States" +25145340,Mouse IDGenes,0.925365269,Mouse IDGenes,0.925365269,,0,1,http://mouseidgenes.helmholtz-muenchen.de,403,,,no_wayback,2014-08-20,"Institute of Developmental Genetics, Helmholtz Zentrum München, German Research Center for Environmental Health, Ingolstädter Landstr. 1, 85764 Neuherberg, Germany, Technische Universität München-Weihenstephan, Lehrstuhl für Genetik, Emil-Ramannstr. 8, 85354 Freising, Germany, Institute of Diabetes and Regeneration Research, Helmholtz Zentrum München, German Research Center for Environmental Health, Ingolstädter Landstr. 1, 85764 Neuherberg, Germany, Institute of Computational Biology, Helmholtz Zentrum München, German Research Center for Environmental Health, Ingolstädter Landstr. 1, 85764 Neuherberg, Germany, Technische Universität München, Zentrum Mathematik, Boltzmannstr. 3, 85747 Garching, Germany, Max-Planck-Institute of Psychiatry, Kraepelinstr. 2-10, 80804 München, Germany, Deutsches Zentrum für Neurodegenerative Erkrankungen e. V. (DZNE), Standort München, Schillerstr. 44, 80336 München, Germany, Technische Universität München-Weihenstephan, Lehrstuhl für Entwicklungsgenetik, c/o Helmholtz Zentrum München, Ingolstädter Landstr. 1, 85764 Neuherberg, Germany and Munich Cluster for Systems Neurology (SyNergy), Adolf-Butenandt-Institut, Ludwig-Maximilians-Universität München, Schillerstr. 44, 80336 München, Germany Institute of Developmental Genetics, Helmholtz Zentrum München, German Research Center for Environmental Health, Ingolstädter Landstr. 1, 85764 Neuherberg, Germany, Technische Universität München-Weihenstephan, Lehrstuhl für Genetik, Emil-Ramannstr. 8, 85354 Freising, Germany, Institute of Diabetes and Regeneration Research, Helmholtz Zentrum München, German Research Center for Environmental Health, Ingolstädter Landstr. 1, 85764 Neuherberg, Germany, Institute of Computational Biology, Helmholtz Zentrum München, German Research Center for Environmental Health, Ingolstädter Landstr. 1, 85764 Neuherberg, Germany, Technische Universität München, Zentrum Mathematik, Boltzmannstr. 3, 85747 Garching, Germany, Max-Planck-In","Matthes M, Preusse M, Zhang J, Schechter J, Mayer D, Lentes B, Theis F, Prakash N, Wurst W, Trümbach D",,,1.0,"Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany, Germany" +25288655,MetaProx,0.996964276,MetaProx,0.996964276,,0,1,http://metaprox.uwaterloo.ca,200,,"(43.4668,-80.5164)",http://web.archive.org/web/20151110042454/http://metaprox.uwaterloo.ca:80/,2014-10-06,"Department of Biology, University of Waterloo, 200 University Ave. West, Waterloo ON, N2L 3G1, Canada gvey@uwaterloo.ca.","Vey G, Charles TC",,,5.0,Canada +"25332399, 26302176",MTB,0.995452483,MTB,0.995452483,Mouse Tumor Biology Database,0.991739839,2,http://tumor.informatics.jax.org,200,,"(44.3876,-68.2039)",http://web.archive.org/web/20220806121117/http://www.tumor.informatics.jax.org/,2015-08-21,"The Jackson Laboratory, 600 Main Street, Bar Harbor, ME, USA carol.bult@jax.org., The Jackson Laboratory, Bar Harbor, ME USA. Electronic address: dale.begley@jax.org.","Bult CJ, Krupke DM, Begley DA, Richardson JE, Neuhauser SB, Sundberg JP, Eppig JT, Begley DA, Sundberg JP, Krupke DM, Neuhauser SB, Bult CJ, Eppig JT, Morse HC 3rd, Ward JM",", ","NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, Intramural NIH HHS, NCI NIH HHS, NCI NIH HHS",38.0,"United States, United States" +25347823,MitProNet,0.994459689,MitProNet,0.994459689,,0,1,http://bio.scu.edu.cn:8085/MitProNet,"HTTPConnectionPool(host='bio.scu.edu.cn', port=8085): Max retries exceeded with url: /MitProNet (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 101] Network is unreachable'))",,,no_wayback,2014-10-27,"College of Life Sciences, Sichuan University, Ministry of Education Key Laboratory for Bio-resource and Eco-environment, Sichuan Key Laboratory of Molecular Biology and Biotechnology, Chengdu, People's Republic of China.","Wang J, Yang J, Mao S, Chai X, Hu Y, Hou X, Tang Y, Bi C, Li X",,,2.0,China +"25378301, 31504780",miRDB,0.996351361,miRDB,0.996351361,,0,2,http://mirdb.org,301,,"(41.8500,-87.6500)",http://web.archive.org/web/20221108115838/https://mirdb.org/,2020-01-01,"Department of Biomedical Engineering, Washington University, St. Louis, MO 63130, USA Department of Radiation Oncology, Washington University School of Medicine, St. Louis, MO 63108, USA., Department of Radiation Oncology, Washington University School of Medicine, St Louis, MO, USA.","Wong N, Wang X, Chen Y, Wang X",", ","NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIDCR NIH HHS, National Institutes of Health, National Institutes of Health",1396.0,"United States, United States, United States" +25378329,MatrixDB,0.997984529,MatrixDB,0.997984529,the extracellular matrix interaction database,0.829914348,1,http://matrixdb.ibcp.fr,200,,"(48.8534,2.3488)",http://web.archive.org/web/20220127123540/http://matrixdb.ibcp.fr/,2014-11-06,"UMR 5086 CNRS - Université Lyon 1, 69367 Lyon Cedex 07, France.","Launay G, Salza R, Multedo D, Thierry-Mieg N, Ricard-Blum S",,,58.0,France +25380778,MelGene,0.995092273,MelGene,0.995092273,,0,1,http://www.melgene.org,"HTTPConnectionPool(host='www.melgene.org', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,http://web.archive.org/web/20180810120513/http://melgene.org/,2014-11-07,"Center of Systems Biology, Biomedical Research Foundation, Academy of Athens, Soranou Ephessiou 4, 115 27 Athens, GR, Greece, Department of Dermatology, University of Athens, School of Medicine, Andreas Sygros Hospital, Ι. Dragoumi 5, 161 21 Athens, GR, Greece, Department of Vertebrate Genomics, Neuropsychiatric Genetics Group, Max Planck Institute for Molecular Genetics, Ihnestraße 63-73, 14195 Berlin, DE, Germany, Department of Neurology, Focus Program Translational Neuroscience, University Medical Center of the Johannes Gutenberg University Mainz, Mainz, DE, Germany, Department of Hygiene and Epidemiology, Clinical and Molecular Epidemiology Unit, School of Medicine, University of Ioannina, 451 10 Ioannina, GR, Greece, Department of Epidemiology and Biostatistics, Imperial College London, St Mary's Campus, Norfolk Place, W2 1PG, London, UK, Department of Medicine Stanford Prevention Research Center, Stanford University School of Medicine, Stanford, CA, USA, Department of Health Research and Policy, Stanford Prevention Research Center, Stanford University School of Medicine, CA, USA, Department of Statistics, Stanford University School of Humanities and Sciences, Stanford, CA, USA and Department of Medicine, School of Public Health, Imperial College London, Sir Alexander Fleming Building, South Kensington Campus, London, UK.","Athanasiadis EI, Antonopoulou K, Chatzinasiou F, Lill CM, Bourdakou MM, Sakellariou A, Kypreou K, Stefanaki I, Evangelou E, Ioannidis JP, Bertram L, Stratigos AJ, Spyrou GM",,,4.0,"Germany, Germany, Greece, Greece, Greece, United States, United States, United States" +25392421,MiCroKiTS,0.991478801,MiCroKiTS,0.991478801,,0,1,http://microkit.biocuckoo.org,200,,"(40.2338,-111.6585)",http://web.archive.org/web/20221006220858/https://microkit.biocuckoo.org/,2014-11-11,"Department of Biomedical Engineering, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China.","Huang Z, Ma L, Wang Y, Pan Z, Ren J, Liu Z, Xue Y",,,14.0,China +25432968,Medicago truncatula genome database,0.925092287,MedicMine,0.649071693,Medicago truncatula genome database,0.925092287,1,http://www.MedicagoGenome.org,"HTTPConnectionPool(host='www.medicagogenome.org', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')))",,,http://web.archive.org/web/20220904050336/http://www.medicagogenome.org/,2014-11-28,"Plant Genomics Group, J. Craig Venter Institute, 9704 Medical Center Dr, Rockville, MD 20850, USA vkrishna@jcvi.org.","Krishnakumar V, Kim M, Rosen BD, Karamycheva S, Bidwell SL, Tang H, Town CD",,,37.0,United States +25432973,MCG,0.953158339,MCG,0.953158339,Maize Cell Genomics,0.678850925,1,http://maize.jcvi.org/cellgenomics,302,,"(32.7157,-117.1647)",http://web.archive.org/web/20210121022208/http://maize.jcvi.org/cellgenomics/,2014-11-27,"The J. Craig Venter Institute, Rockville, MD, USA.","Krishnakumar V, Choi Y, Beck E, Wu Q, Luo A, Sylvester A, Jackson D, Chan AP",,,13.0,United States +25527833,mirPub,0.996326447,mirPub,0.996326447,,0,1,http://www.microrna.gr/mirpub,301,,"(37.9838,23.7278)",http://web.archive.org/web/20220517102833/http://www.microrna.gr/mirpub/,2014-12-20,"School of Electrical and Computer Engineering, NTUA, Zografou 15773, IMIS Institute, 'Athena' RC, Marousi 15125, DIANA-Lab, Institute of Molecular Oncology, BSRC 'Alexander Fleming', Vari 16672, Department of Computer & Communication Engineering, University of Thessaly, Volos 38221, Greece and School of Computer Science & Info Tech, RMIT University, Melbourne 3001, Australia School of Electrical and Computer Engineering, NTUA, Zografou 15773, IMIS Institute, 'Athena' RC, Marousi 15125, DIANA-Lab, Institute of Molecular Oncology, BSRC 'Alexander Fleming', Vari 16672, Department of Computer & Communication Engineering, University of Thessaly, Volos 38221, Greece and School of Computer Science & Info Tech, RMIT University, Melbourne 3001, Australia.","Vergoulis T, Kanellos I, Kostoulas N, Georgakilas G, Sellis T, Hatzigeorgiou A, Dalamagas T",,,12.0,"Australia, Australia, Greece, Greece" +"25542617, 26919060",MSeqDR,0.998155057,MSeqDR,0.998155057,Mitochondrial Disease Sequence Data Resource,0.910063028,2,http://mseqdr.org,"HTTPConnectionPool(host='mseqdr.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to mseqdr.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221104090551/https://mseqdr.org/,2016-03-21,"Division of Human Genetics, Department of Pediatrics, The Children's Hospital of Philadelphia and University of Pennsylvania Perelman School of Medicine, Philadelphia, USA. Electronic address: falkm@email.chop.edu., Center for Personalized Medicine, Children's Hospital Los Angeles, Los Angeles, California, USA.","Falk MJ, Shen L, Gonzalez M, Leipzig J, Lott MT, Stassen AP, Diroma MA, Navarro-Gomez D, Yeske P, Bai R, Boles RG, Brilhante V, Ralph D, DaRe JT, Shelton R, Terry SF, Zhang Z, Copeland WC, van Oven M, Prokisch H, Wallace DC, Attimonelli M, Krotoski D, Zuchner S, Gai X, , , , , , , Shen L, Diroma MA, Gonzalez M, Navarro-Gomez D, Leipzig J, Lott MT, van Oven M, Wallace DC, Muraresku CC, Zolkipli-Cunningham Z, Chinnery PF, Attimonelli M, Zuchner S, Falk MJ, Gai X",", ","North American Mitochondrial Disease Consortium pilot award, NHGRI NIH HHS, Great Ormond Street Hospital Childrens Charity, Great Ormond Street Hospital Childrens Charity, Netherlands Genomic Initiative (NGI)/Netherlands Organization for Scientific Research (NWO), NINDS NIH HHS, NINDS NIH HHS, National Institutes of Health, NINDS NIH HHS, NINDS NIH HHS, National Institutes of Health, Medical Research Council, NHGRI NIH HHS, NHGRI NIH HHS, Wellcome Trust, Medical Research Council, Netherlands Genomic Initiative (NGI), United Mitochondrial Disease Foundation, National Institute for Health Research (NIHR), NIH HHS, National Institutes of Health, National Institutes of Health, Dutch Research Council (NWO), NINDS NIH HHS",55.0,"United States, United States" +25559128,MUFOLD-DB,0.985240579,MUFOLD-DB,0.985240579,,0,1,http://mufold.org/mufolddb.php,404,,,http://web.archive.org/web/20191229091257/http://mufold.org:80/mufolddb.php,2014-12-16,None,"He Z, Zhang C, Xu Y, Zeng S, Zhang J, Xu D",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",4.0, +25566299,MeioBase,0.997380674,MeioBase,0.997380674,,0,1,http://meiosis.ibcas.ac.cn,"HTTPConnectionPool(host='meiosis.ibcas.ac.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160305183343/http://meiosis.ibcas.ac.cn/,2014-12-16,"State Key Laboratory of Systematic and Evolutionary Botany, Institute of Botany, Chinese Academy of Sciences Beijing, China ; University of Chinese Academy of Sciences Beijing, China.","Li H, Meng F, Guo C, Wang Y, Xie X, Zhu T, Zhou S, Ma H, Shan H, Kong H",,,2.0,"China, China" +25725060,MnTEdb,0.992904842,MnTEdb,0.992904842,,0,1,http://morus.swu.edu.cn/mntedb,301,,"(31.2222,121.4581)",http://web.archive.org/web/20220616154237/https://morus.swu.edu.cn/mntedb/,2015-02-27,"State Key Laboratory of Silkworm Genome Biology, Southwest University, Chongqing 400715, China.","Ma B, Li T, Xiang Z, He N",,,10.0,China +"25858286, 28439836",miRGate,0.99535054,miRGate,0.99535054,,0,2,"http://mirgate.bioinfo.cnio.es, http://mirgate.bioinfo.cnio.es/API","302, 302",,"(40.9688,-5.6639), (40.9688,-5.6639)","http://web.archive.org/web/20130504151915/http://mirgate.bioinfo.cnio.es, http://web.archive.org/web/20220615134619/http://mirgate.bioinfo.cnio.es/API/",2017-01-01,"Bioinformatics Unit (UBio), Structural Biology and Biocomputing Programme, Spanish National Cancer Research Centre (CNIO), Madrid, Spain and High Technical School of Computer Engineering, University of Vigo, Ourense, Spain eleon-ibis@us.es., Bioinformatics Unit, Instituto de Parasitología y Biomedicina ""López Neyra"", Consejo Superior de Investigaciones Científicas (IPBLN-CSIC), PTS Granada, Granada, 18016, Spain. eduardo.andres@csic.es.","Andrés-León E, González Peña D, Gómez-López G, Pisano DG, Andrés-León E, Gómez-López G, Pisano DG",", ",", ",36.0,"Spain, Spain, Spain" +25861770,MetaMirClust,0.966021895,MetaMirClust,0.966021895,,0,1,http://fgfr.ibms.sinic.aedu.tw/MetaMirClust,"HTTPConnectionPool(host='fgfr.ibms.sinic.aedu.tw', port=80): Max retries exceeded with url: /MetaMirClust (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2016-01-01,"Institute of Plant and Microbial Biology, Academia Sinica, Taipei, Taiwan, Republic of China. wenching.chan@gmail.com.","Chan WC, Lin WC",,,5.0,China +25877638,miRNASNP,0.998172939,miRNASNP,0.998172939,,0,1,http://bioinfo.life,200,,"(27.9425,-82.5057)",http://web.archive.org/web/20220618171649/https://bioinfo.life/,2015-04-15,"Department of Biomedical Engineering, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, People's Republic of China and Department of Epidemiology and Biostatistics, School of Public Health, Tongji Medical College, Huazhong University of Science and Technology, Wuhan, Hubei 430074, People's Republic of China Department of Biomedical Engineering, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, People's Republic of China and Department of Epidemiology and Biostatistics, School of Public Health, Tongji Medical College, Huazhong University of Science and Technology, Wuhan, Hubei 430074, People's Republic of China.","Gong J, Liu C, Liu W, Wu Y, Ma Z, Chen H, Guo AY",,,51.0,"China, China, China, China" +25889518,miREC,0.907016993,miREC,0.907016993,,0,1,http://www.mirecdb.org,"HTTPConnectionPool(host='www.mirecdb.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180325071636/http://mirecdb.org/,2015-03-28,"Systems Biology Research Centre - Bioinformatics, University of Skövde, Box 408, 541 28, Skövde, Sweden. benjamin.ulfenborg@his.se.","Ulfenborg B, Jurcevic S, Lindlöf A, Klinga-Levan K, Olsson B",,,6.0,Sweden +25905099,Metabolonote,0.994843602,Metabolonote,0.994843602,Metabolome,0.64512068,1,http://metabolonote.kazusa.or.jp,301,,"(35.3343,139.4072)",no_wayback,2015-04-07,"Department of Technology Development, Kazusa DNA Research Institute , Kisarazu , Japan ; National Bioscience Database Center (NBDC), Japan Science and Technology Agency (JST) , Tokyo , Japan.","Ara T, Enomoto M, Arita M, Ikeda C, Kera K, Yamada M, Nishioka T, Ikeda T, Nihei Y, Shibata D, Kanaya S, Sakurai N",,,16.0,"Japan, Japan, Japan" +25953081,MIsoMine,0.996668458,MIsoMine,0.996668458,,0,1,http://guanlab.ccmb.med.umich.edu/misomine,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20191120075117/http://guanlab.ccmb.med.umich.edu:80/misomine/,2015-05-07,"Department of Computational Medicine and Bioinformatics, Department of Internal Medicine and Department of Electrical Engineering and Computer Science, University of Michigan, Ann Arbor, MI 48109, USA.","Li HD, Omenn GS, Guan Y",,"NINDS NIH HHS, NIEHS NIH HHS",10.0,United States +26106450,Metrabase,0.998185456,Metrabase,0.998185456,,0,1,http://www-metrabase.ch.cam.ac.uk,302,,"(52.2000,0.1167)",http://web.archive.org/web/20221005223458/https://www-metrabase.ch.cam.ac.uk/,2015-06-23,"The Centre for Molecular Informatics, Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge, CB2 1EW UK ; European Molecular Biology Laboratory - European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SD UK.","Mak L, Marcus D, Howlett A, Yarova G, Duchateau G, Klaffke W, Bender A, Glen RC",,,19.0, +26173767,MFMD,0.991956189,MFMD,0.991956189,Mediterranean Founder Mutation Database,0.945347416,1,http://mfmd.pasteur.ma,200,,"(33.5883,-7.6114)",http://web.archive.org/web/20211205081656/http://mfmd.pasteur.ma/,2015-07-30,"Laboratory of Agri-food and Health, Faculty of Sciences and Techniques, Hassan 1st University, BP 577, 26000, Settat, Morocco.","Charoute H, Bakhchane A, Benrahma H, Romdhane L, Gabi K, Rouba H, Fakiri M, Abdelhak S, Lenaers G, Barakat A",,,4.0,Morocco +26243198,miRegulome,0.996385574,miRegulome,0.996385574,,0,1,http://bnet.egr.vcu.edu/miRegulome,"HTTPConnectionPool(host='bnet.egr.vcu.edu', port=80): Max retries exceeded with url: /miRegulome (Caused by ConnectTimeoutError(, 'Connection to bnet.egr.vcu.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210411214614/http://bnet.egr.vcu.edu/miRegulome/,2015-08-05,"Centre for Genomics and Applied Gene Technology, Institute of Integrative Omics and Applied Biotechnology (IIOAB), Nonakuri, Purba Medinipur, WB-721172, India.","Barh D, Kamapantula B, Jain N, Nalluri J, Bhattacharya A, Juneja L, Barve N, Tiwari S, Miyoshi A, Azevedo V, Blum K, Kumar A, Silva A, Ghosh P",,,4.0,India +26255309,MetazSecKB,0.997505963,MetazSecKB,0.997505963,,0,1,http://proteomics.ysu.edu/secretomes/animal/index.php,200,,"(41.0998,-80.6495)",http://web.archive.org/web/20220308060217/http://proteomics.ysu.edu/secretomes/animal/index.php,2015-08-08,"Department of Computer Science and Information Systems, Center for Applied Chemical Biology and.","Meinken J, Walker G, Cooper CR, Min XJ",,,31.0, +26286928,MicRhoDE,0.990796208,MicRhoDE,0.990796208,,0,1,http://micrhode.sb-roscoff.fr,301,,"(48.7238,-3.9871)",no_wayback,2015-08-18,"CNRS, UMR 7144, Marine Phototrophic Prokaryotes Team, Sorbonne Universités, UPMC Univ Paris 06, UMR 7144, Oceanic Plankton Group.","Boeuf D, Audic S, Brillet-Guéguen L, Caron C, Jeanthon C",,,19.0, +26322134,Metabolic In silico Network Expansions,0.687488483,MINEs,0.649246573,Metabolic In silico Network Expansions,0.687488483,1,http://minedatabase.mcs.anl.gov,301,,"(41.6736,-88.0017)",http://web.archive.org/web/20221003021237/https://minedatabase.mcs.anl.gov/,2015-08-28,"Department of Chemical and Biological Engineering, Northwestern University, Evanston, IL USA ; Mathematics and Computer Science Division, Argonne National Laboratory, Argonne, IL USA.","Jeffryes JG, Colastani RL, Elbadawi-Sidhu M, Kind T, Niehaus TD, Broadbelt LJ, Hanson AD, Fiehn O, Tyo KE, Henry CS",,"National Institutes of Health, National Science Foundation, National Science Foundation, National Science Foundation, NIDDK NIH HHS",57.0,"United States, United States" +26450962,MEPD,0.996673008,MEPD,0.996673008,Medaka Expression Pattern Database,0.981669056,1,http://mepd.cos.uni-heidelberg.de,200,,"(49.4077,8.6908)",http://web.archive.org/web/20221015230119/http://mepd.cos.uni-heidelberg.de/,2015-10-07,"Department of Computing Systems, University of Castilla-La Mancha, Albacete, 02071, Spain.","Alonso-Barba JI, Rahman RU, Wittbrodt J, Mateo JL",,,4.0,Spain +26486520,MSGene,0.992402017,MSGene,0.992402017,,0,1,http://MSGene.bioinfo-minzhao.org,406,,,http://web.archive.org/web/20221017050202/http://msgene.bioinfo-minzhao.org/,2015-10-21,"School of Engineering, Faculty of Science, Health, Education and Engineering, University of the Sunshine Coast, Maroochydore DC, Queensland, 4558, Australia.","Zhao M, Li Z, Qu H",,,14.0,Australia +26490638,MtiBase,0.993986785,MtiBase,0.993986785,,0,1,http://mtibase.sysu.edu.cn,"HTTPConnectionPool(host='mtibase.sysu.edu.cn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to mtibase.sysu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20170223232357/http://mtibase.sysu.edu.cn:80/,2015-01-01,"Key Laboratory of Gene Engineering of the Ministry of Education, State Key Laboratory of Biocontrol.","Guo ZW, Xie C, Yang JR, Li JH, Yang JH, Zheng L",,,10.0, +26513174,MDP,0.993027329,MDP,0.993027329,and Drugs Portal,0.741629084,1,http://mdp.unimore.it,200,,"(44.6478,10.9254)",no_wayback,2015-11-01,"Department of Life Sciences, University of Modena and Reggio Emilia, Modena 41125, Italy.","Taccioli C, Sorrentino G, Zannini A, Caroli J, Beneventano D, Anderlucci L, Lolli M, Bicciato S, Del Sal G",,,29.0,Italy +26527720,MetaNetX,0.997939885,MetaNetX,0.997939885,,0,1,http://www.metanetx.org,301,,"(46.5160,6.6328)",http://web.archive.org/web/20221006190123/https://www.metanetx.org/,2015-11-02,"Vital-IT group, SIB Swiss Institute of Bioinformatics, Lausanne 1015, Switzerland Department of Ecology and Evolution, Biophore, Evolutionary Bioinformatics group, University of Lausanne, Lausanne 1015, Switzerland.","Moretti S, Martin O, Van Du Tran T, Bridge A, Morgat A, Pagni M",,,66.0,"Switzerland, Switzerland" +26527726,MouseNet,0.958441854,MouseNet,0.958441854,,0,1,http://www.inetbio.org/mousenet,301,,"(37.5598,126.9439)",http://web.archive.org/web/20220525060408/http://www.inetbio.org/mousenet/,2015-11-02,"Department of Biotechnology, College of Life Science and Biotechnology, Yonsei University, Seoul, Korea.","Kim E, Hwang S, Kim H, Shim H, Kang B, Yang S, Shim JH, Shin SY, Marcotte EM, Lee I",,NIGMS NIH HHS,21.0, +26590264,MutationAligner,0.997395098,MutationAligner,0.997395098,,0,1,http://www.mutationaligner.org,200,,"(34.0522,-118.2437)",http://web.archive.org/web/20221017135255/http://www.mutationaligner.org/,2015-11-20,"Computational Biology Center, Memorial Sloan Kettering Cancer Center, New York, NY 10065, USA mutalign@gmail.com.","Gauthier NP, Reznik E, Gao J, Sumer SO, Schultz N, Sander C, Miller ML",,"Cancer Research UK, NCI NIH HHS, Cancer Research UK, NIGMS NIH HHS, NIGMS NIH HHS, NCI NIH HHS",8.0,United States +26656948,MG-RAST,0.996288791,MG-RAST,0.996288791,,0,1,http://metagenomics.anl.gov,302,,"(41.6736,-88.0017)",http://web.archive.org/web/20170809173012/http://metagenomics.anl.gov/,2015-12-09,"Argonne National Laboratory, Mathematics and Computer Science Division, 60439 Argonne, IL, USA University of Chicago, Chicago 60637, IL, USA.","Wilke A, Bischof J, Gerlach W, Glass E, Harrison T, Keegan KP, Paczian T, Trimble WL, Bagchi S, Grama A, Chaterji S, Meyer F",,"NIAID NIH HHS, NHGRI NIH HHS",71.0,"United States, United States" +26822098,MTD,0.986400823,MTD,0.986400823,mammalian transcriptomic database,0.553143755,1,http://mtd.cbi.ac.cn,301,,"(39.9075,116.3972)",http://web.archive.org/web/20211020115238/http://mtd.cbi.ac.cn/,2016-01-27,None,"Sheng X, Wu J, Sun Q, Li X, Xian F, Sun M, Fang W, Chen M, Yu J, Xiao J",,,9.0, +27069559,MD-CTS,0.988920406,MD-CTS,0.988920406,of Clinical and,0.767214457,1,http://spellchecker.mfldclin.edu,404,,,no_wayback,2016-03-02,"Biomedical Informatics Research Center, Marshfield Clinic Research Foundation, Marshfield, WI, USA.","Ray W, Finamore J, Rastegar-Mojarad M, Kadolph C, Ye Z, Bohne J, Xu Y, Burish D, Sondelski J, Easker M, Finnegan B, Bartkowiak B, Smith CA, Tachinardi U, Mendonca EA, Weichelt B, Lin SM",,"NCATS NIH HHS, NIH National Center for Advancing Translational Sciences (NCATS)",0.0,United States +27167218,MiasDB,0.997192025,MiasDB,0.997192025,,0,1,http://47.88.84.236/Miasdb,301,,"(37.3394,-121.8950)",http://web.archive.org/web/20220617205229/http://47.88.84.236/Miasdb/,2016-05-11,"School of Life Science and Technology, Inner Mongolia University of Science and Technology, Baotou, 014010, China.","Xing Y, Zhao X, Yu T, Liang D, Li J, Wei G, Liu G, Cui X, Zhao H, Cai L",,"Natural Science Foundation of Inner Mongolia, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Natural Science Foundation of Inner Mongolia, Natural Science Foundation of Inner Mongolia, Natural Science Foundation of Inner Mongolia, Inner Mongolia Science & Technology Plan, National Natural Science Foundation of China, National Natural Science Foundation of China",7.0,"China, Mongolia" +27504011,MODEM,0.9967134,MODEM,0.9967134,,0,1,http://modem.hzau.edu.cn,200,,"(30.5833,114.2667)",http://web.archive.org/web/20220120235623/http://modem.hzau.edu.cn/,2016-08-07,"National Key Laboratory of Crop Genetic Improvement, Huazhong Agricultural University, Wuhan 430070, China.","Liu H, Wang F, Xiao Y, Tian Z, Wen W, Zhang X, Chen X, Liu N, Li W, Liu L, Liu J, Yan J, Liu J",,,10.0,China +27509041,MMpI,0.985122144,MMpI,0.985122144,,0,1,http://clri.res.in/subramanian/databases/mmpi/index.php,200,,"(11.9338,79.8298)",http://web.archive.org/web/20200730224732/http://clri.res.in:80/subramanian/databases/mmpi/index.php,2016-08-10,"Chemical Laboratory, Council of Scientific and Industrial Research-Central Leather Research Institute, Chennai, India.","Muvva C, Patra S, Venkatesan S",,,3.0,India +27510400,Membranome,0.99290514,Membranome,0.99290514,,0,1,http://membranome.org,301,,"(37.7621,-122.3971)",no_wayback,2016-08-10,"Department of Medicinal Chemistry, College of Pharmacy, University of Michigan, Ann Arbor, MI 48109-1065, USA almz@umich.edu.","Lomize AL, Lomize MA, Krolicki SR, Pogozheva ID",,,20.0,United States +27681445,MPD3,0.968090475,MPD3,0.968090475,Medicinal Plants Database for,0.855982542,1,http://bioinform.info,"HTTPConnectionPool(host='bioinform.info', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190915135042/http://bioinform.info/,2016-09-28,"a Department of Bioinformatics and Biotechnology , Government College, University Faisalabad (GCUF) , Faisalabad , Pakistan.","Mumtaz A, Ashfaq UA, Ul Qamar MT, Anwar F, Gulzar F, Ali MA, Saari N, Pervez MT",,,23.0,Pakistan +"27742822, 31691816",miRPathDB,0.997528672,miRPathDB,0.997528672,miRNA Pathway Dictionary Database,0.964912802,2,http://mpd.bioinf.uni-sb.de,301,,"(49.2326,7.0098)",http://web.archive.org/web/20221109163605/https://mpd.bioinf.uni-sb.de/,2020-01-01,"Chair for Clinical Bioinformatics, Saarland Informatics Campus, Saarland University, D-66123 Saarbruecken, Germany., Chair for Bioinformatics, Saarland Informatics Campus, 66123 Saarbrücken, Germany.","Backes C, Kehl T, Stöckel D, Fehlmann T, Schneider L, Meese E, Lenhof HP, Keller A, Kehl T, Kern F, Backes C, Fehlmann T, Stöckel D, Meese E, Lenhof HP, Keller A",", ",", Saarland University",107.0,"Germany, Germany" +27799474,mirDNMR,0.996765435,mirDNMR,0.996765435,,0,1,http://www.wzgenomics.cn/mirdnmr,301,,"(31.2222,121.4581)",http://web.archive.org/web/20220712104225/http://www.wzgenomics.cn/mirdnmr/,2016-10-30,"Institute of Genomic Medicine, Wenzhou Medical University, Wenzhou 325000, China.","Jiang Y, Li Z, Liu Z, Chen D, Wu W, Du Y, Ji L, Jin ZB, Li W, Wu J",,Medical Research Council,5.0,China +27822553,mockrobiota,0.997430325,mockrobiota,0.997430325,,0,1,http://caporaso-lab.github.io/mockrobiota,301,,"(37.7621,-122.3971)",no_wayback,2016-09-01,"Center for Microbial Genetics and Genomics, Northern Arizona University, Flagstaff, Arizona, USA.","Bokulich NA, Rideout JR, Mercurio WG, Shiffer A, Wolfe B, Maurice CF, Dutton RJ, Turnbaugh PJ, Knight R, Caporaso JG",,,28.0,United States +"27899569, 31722416",MEGARes,0.933923781,MEGARes,0.933923781,,0,2,http://megares.meglab.org,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20220802125446/https://megares.meglab.org/,2020-01-01,"Department of Clinical Sciences, Colorado State University, Fort Collins, CO 80523, USA., Veterinary Education, Research, and Outreach (VERO) Program, Texas A&M University and West Texas A&M University, Canyon, TX 79016, USA.","Lakin SM, Dean C, Noyes NR, Dettenwanger A, Ross AS, Doster E, Rovira P, Abdo Z, Jones KL, Ruiz J, Belk KE, Morley PS, Boucher C, Doster E, Lakin SM, Dean CJ, Wolfe C, Young JG, Boucher C, Belk KE, Noyes NR, Morley PS",", ","NIH HHS, USDA NIFA, NIAID NIH HHS, University of Minnesota, USDA NIFA, NIH HHS, NIH, College of Veterinary Medicine and Biomedical Sciences, Texas A and M University",176.0,"United States, United States" +27899620,MRPrimerV,0.993523359,MRPrimerV,0.993523359,,0,1,http://MRPrimerV.com,200,,"(37.5660,126.9784)",http://web.archive.org/web/20220519000357/http://mrprimerv.com/,2016-11-29,"Department of Information and Communication Engineering, DGIST, Daegu, 42988, South Korea.","Kim H, Kang N, An K, Kim D, Koo J, Kim MS",,,6.0, +27907895,mutLBSgeneDB,0.983370721,mutLBSgeneDB,0.983370721,mutated ligand binding site gene DataBase,0.615566436,1,http://zhaobioinfo.org/mutLBSgeneDB,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20220423150346/http://zhaobioinfo.org/mutLBSgeneDB/,2016-10-07,"Center for Precision Health, School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Kim P, Zhao J, Lu P, Zhao Z",,"NCI NIH HHS, NLM NIH HHS",6.0,United States +27924023,MethSMRT,0.995354295,MethSMRT,0.995354295,,0,1,http://sysbio.sysu.edu.cn/methsmrt,"HTTPConnectionPool(host='sysbio.sysu.edu.cn', port=80): Max retries exceeded with url: /methsmrt (Caused by ConnectTimeoutError(, 'Connection to sysbio.sysu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220320071858/http://sysbio.sysu.edu.cn/methsmrt/,2016-10-18,"State Key Laboratory of Ophthalmology, Guangdong Provincial Key Lab of Ophthalmology and Visual Science, Zhongshan Ophthalmic Center, Sun Yat-sen University, Guangzhou 510060, China.","Ye P, Luan Y, Chen K, Liu Y, Xiao C, Xie Z",,,44.0,China +28108447,miRmine,0.975829124,miRmine,0.975829124,,0,1,http://guanlab.ccmb.med.umich.edu/mirmine,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20190922115557/http://guanlab.ccmb.med.umich.edu:80/mirmine/,2017-05-01,"Department of Computational Medicine and Bioinformatics, University of Michigan, Ann Arbor, MI, USA.","Panwar B, Omenn GS, Guan Y",,,74.0,United States +28365733,miRnalyze,0.994418919,miRnalyze,0.994418919,,0,1,http://www.mirnalyze.in,200,,"(13.2257,77.5750)",http://web.archive.org/web/20221103053005/https://www.mirnalyze.in/,2017-01-01,"School of Medical Science and Technology, Indian Institute of Technology, Kharagpur, West Bengal 721302, India.","Subhra Das S, James M, Paul S, Chakravorty N",,,5.0,India +28365734,MiDAS,0.995514631,MiDAS,0.995514631,,0,1,http://www.midasfieldguide.org,301,,"(40.8344,-74.1377)",http://web.archive.org/web/20221022053358/https://www.midasfieldguide.org/,2017-01-01,"Department of Chemistry and Bioscience, Center for Microbial Communities, Aalborg University, Aalborg DK-9220, Denmark.","McIlroy SJ, Kirkegaard RH, McIlroy B, Nierychlo M, Kristensen JM, Karst SM, Albertsen M, Nielsen PH",,,40.0,Denmark +28481982,McPAS-TCR,0.978659749,McPAS-TCR,0.978659749,,0,1,http://friedmanlab.weizmann.ac.il/McPAS-TCR,"HTTPConnectionPool(host='friedmanlab.weizmann.ac.il', port=80): Max retries exceeded with url: /McPAS-TCR (Caused by ConnectTimeoutError(, 'Connection to friedmanlab.weizmann.ac.il timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220823085114/http://friedmanlab.weizmann.ac.il/McPAS-TCR/,2017-09-01,Department of Immunology.,"Tickotsky N, Sagiv T, Prilusky J, Shifrut E, Friedman N",,Israel Science Foundation,78.0, +28490127,Milk bioactive peptide database,0.679362587,,0,Milk bioactive peptide database,0.679362587,1,http://mbpdb.nws.oregonstate.edu,200,,"(44.5646,-123.2620)",http://web.archive.org/web/20221109072303/http://mbpdb.nws.oregonstate.edu/,2017-04-12,"Nutrition Program, School of Biological and Population Health Sciences, College of Public Health and Human Sciences, Oregon State University, United States.","Nielsen SD, Beverly RL, Qu Y, Dallas DC",,"Eunice Kennedy Shriver Institute, National Institutes of Health, NICHD NIH HHS, Core, University of California, NICHD NIH HHS, Oregon State University",64.0,United States +28608363,MiSynPat,0.997369468,MiSynPat,0.997369468,,0,1,http://misynpat.org,302,,"(48.5839,7.7455)",no_wayback,2017-06-27,"CSTB Complex Systems and Translational Bioinformatics, ICube Laboratory and Strasbourg Federation of Translational Medicine (FMTS), CNRS, Université de Strasbourg, Strasbourg, France.","Moulinier L, Ripp R, Castillo G, Poch O, Sissler M",,"Agence Nationale de la Recherche, Université de Strasbourg, Agence Nationale de la Recherche, Centre National de la Recherche Scientifique, Agence Nationale de la Recherche, Fondation pour la Recherche Médicale",16.0,France +28791657,MEGALEX,0.995782244,MEGALEX,0.995782244,,0,1,http://sedufau.shinyapps.io/megalex,301,,"(39.0437,-77.4875)",no_wayback,2018-06-01,"Université Clermont Auvergne, CNRS, Laboratoire de Psychologie Sociale et Cognitive (LAPSCO, UMR 6024), 34 avenue Carnot, 63037, Clermont-Ferrand, France. ludovic.ferrand@uca.fr.","Ferrand L, Méot A, Spinelli E, New B, Pallier C, Bonin P, Dufau S, Mathôt S, Grainger J",,Agence Nationale de la Recherche,6.0,France +28854643,MSDB,0.971259832,MSDB,0.971259832,Microsatellite Database,0.94480021,1,http://tdb.ccmb.res.in/msdb,200,,"(17.3840,78.4564)",http://web.archive.org/web/20220221092843/http://tdb.ccmb.res.in/msdb,2017-06-01,"CSIR - Centre for Cellular and Molecular Biology, Hyderabad, India.","Avvaru AK, Saxena S, Sowpati DT, Mishra RK",,"Council of Scientific and Industrial Research, Council of Scientific and Industrial Research, Council of Scientific and Industrial Research",12.0,India +28968812,MetSigDis,0.997604191,MetSigDis,0.997604191,,0,1,http://www.bio-annotation.cn/MetSigDis,"HTTPConnectionPool(host='www.bio-annotation.cn', port=80): Max retries exceeded with url: /MetSigDis (Caused by ProtocolError('Connection aborted.', BadStatusLine(' OK\r\n')))",,,http://web.archive.org/web/20211223120530/http://www.bio-annotation.cn/MetSigDis/,2019-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University.","Cheng L, Yang H, Zhao H, Pei X, Shi H, Sun J, Zhang Y, Wang Z, Zhou M",,"National Natural Science Foundation of China, China Postdoctoral Science Foundation, National Natural Science Foundation of China, National Natural Science Foundation of China",45.0, +29036653,miRCarta,0.995174766,miRCarta,0.995174766,,0,1,http://mircarta.cs.uni-saarland.de,200,,"(49.2326,7.0098)",http://web.archive.org/web/20220817104357/http://mircarta.cs.uni-saarland.de/,2018-01-01,"Chair for Clinical Bioinformatics, Saarland Informatics Campus, Saarland University, Germany.","Backes C, Fehlmann T, Kern F, Kehl T, Lenhof HP, Meese E, Keller A",,,34.0,Germany +29036655,MFIB,0.981060028,MFIB,0.981060028,Mutual Folding Induced by,0.731679062,1,http://mfib.enzim.ttk.mta.hu,200,,"(47.4984,19.0404)",http://web.archive.org/web/20220709154557/http://mfib.enzim.ttk.mta.hu/,2017-11-01,"Institute of Enzymology, Research Centre for Natural Sciences, Hungarian Academy of Sciences, Budapest H-1117, Hungary.","Fichó E, Reményi I, Simon I, Mészáros B",,"Hungarian Academy of Sciences, Hungarian Academy of Sciences",28.0,Hungary +29069466,MGA,0.93783768,MGA,0.93783768,Genome,0.568572879,1,http://ccg.vital-it.ch/mga,301,,"(46.5290,6.5626)",http://web.archive.org/web/20180313144251/http://ccg.vital-it.ch:80/mga/,2018-01-01,"Swiss Institute of Bioinformatics (SIB), CH-1015 Lausanne, Switzerland.","Dréos R, Ambrosini G, Groux R, Périer RC, Bucher P",,,4.0,Switzerland +29077896,mirTrans,0.996958256,mirTrans,0.996958256,,0,1,"http://mcube.nju.edu.cn/jwang/lab/soft/mirtrans/, http://120.27.239.192/mirtrans","302, HTTPConnectionPool(host='120.27.239.192', port=80): Max retries exceeded with url: /mirtrans (Caused by ConnectTimeoutError(, 'Connection to 120.27.239.192 timed out. (connect timeout=5)'))",,"(32.0617,118.7778), ","http://web.archive.org/web/20220225182729/https://mcube.nju.edu.cn/jwang/lab/soft/mirtrans/, no_wayback",2018-01-01,"The State Key Laboratory of Pharmaceutical Biotechnology and Jiangsu Engineering Research Center for MicroRNA Biology and Biotechnology, NJU Advanced Institute for Life Sciences (NAILS), School of Life Science, Nanjing University, Nanjing 210023, China.","Hua X, Tang R, Xu X, Wang Z, Xu Q, Chen L, Wingender E, Li J, Zhang C, Wang J",,,9.0,China +29106642,MSDD,0.997410993,MSDD,0.997410993,MiRNA SNP Disease Database,0.985756731,1,http://www.bio-bigdata.com/msdd,502,,,http://web.archive.org/web/20200224211224/http://www.bio-bigdata.com:80/msdd/,2018-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Yue M, Zhou D, Zhi H, Wang P, Zhang Y, Gao Y, Guo M, Li X, Wang Y, Zhang Y, Ning S, Li X",,,20.0,China +29109711,miPepBase,0.993406117,miPepBase,0.993406117,Mimicry Peptide Database,0.776584784,1,http://proteininformatics.org/mkumar/mipepbase,301,,"(33.4484,-112.0740)",http://web.archive.org/web/20220616055249/http://proteininformatics.org/mkumar/mipepbase/,2017-10-23,"Department of Biophysics, University of Delhi, New Delhi, India.","Garg A, Kumari B, Kumar R, Kumar M",,"University Grants Commission, Indian Council of Medical Research, Indian Council of Medical Research",4.0,India +29126312,Met-DB,0.965683778,Met-DB,0.965683778,,0,1,"http://compgenomics.utsa.edu/MeTDB/, http://www.xjtlu.edu.cn/metdb2","200, 301",,"(29.4241,-98.4936), (51.5085,-0.1257)","http://web.archive.org/web/20220818022503/http://compgenomics.utsa.edu/MeTDB/, no_wayback",2018-01-01,"School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, Jiangsu 221116, China.","Liu H, Wang H, Wei Z, Zhang S, Hua G, Zhang SW, Zhang L, Gao SJ, Meng J, Chen X, Huang Y",,"NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIDCR NIH HHS",48.0,"China, China" +29129553,mitoepigenomeKB,0.912198901,mitoepigenomeKB,0.912198901,,0,1,http://clingen.igib.res.in/mitoepigenome,301,,"(26.7907,75.2061)",http://web.archive.org/web/20220121151927/https://clingen.igib.res.in/mitoepigenome/,2017-11-09,"GN Ramachandran Knowledge Center for Genome Informatics, CSIR Institute of Genomics and Integrative Biology, Mathura Road, Delhi 110 025, India; Academy of Scientific and Innovative Research, CSIR-IGIB South Campus, Mathura Road, Delhi 110 025, India.","Ghosh S, Ranawat AS, Tolani P, Scaria V",,Council of Scientific and Industrial Research,1.0,"India, India" +29145608,MeDReaders,0.957522929,MeDReaders,0.957522929,,0,1,http://medreader.org,301,,"(39.3009,-76.5799)",no_wayback,2018-01-01,"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, Heilongjiang 150001, China.","Wang G, Luo X, Wang J, Wan J, Xia S, Zhu H, Qian J, Wang Y",,"NINDS NIH HHS, NINDS NIH HHS, NICHD NIH HHS",31.0,China +29145625,mSignatureDB,0.991612613,mSignatureDB,0.991612613,,0,1,http://tardis.cgu.edu.tw/msignaturedb,301,,"(25.0478,121.5319)",http://web.archive.org/web/20220225182732/http://tardis.cgu.edu.tw/msignaturedb/,2018-01-01,"Department of Biomedical Sciences, Chang Gung University, Taoyuan, Taiwan.","Huang PJ, Chiu LY, Lee CC, Yeh YM, Huang KY, Chiu CH, Tang P",,,24.0, +"29155944, 29155944, 29155944, 29155944",MIST,0.989567836,MIST,0.989567836,Molecular Interaction Search Tool,0.924612188,1,http://fgrtools.hms.harvard.edu/MIST,301,,"(42.3584,-71.0598)",http://web.archive.org/web/20220308035331/https://fgrtools.hms.harvard.edu/MIST/,2018-01-01,"Department of Genetics, Harvard Medical School, 77 Avenue Louis Pasteur, Boston, MA 02115, USA., Department of Genetics, Harvard Medical School, 77 Avenue Louis Pasteur, Boston, MA 02115, USA., Department of Genetics, Harvard Medical School, 77 Avenue Louis Pasteur, Boston, MA 02115, USA., Department of Genetics, Harvard Medical School, 77 Avenue Louis Pasteur, Boston, MA 02115, USA.","Hu Y, Vinayagam A, Nand A, Comjean A, Chung V, Hao T, Mohr SE, Perrimon N, Hu Y, Vinayagam A, Nand A, Comjean A, Chung V, Hao T, Mohr SE, Perrimon N, Hu Y, Vinayagam A, Nand A, Comjean A, Chung V, Hao T, Mohr SE, Perrimon N, Hu Y, Vinayagam A, Nand A, Comjean A, Chung V, Hao T, Mohr SE, Perrimon N",", , , ","NHGRI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS",124.0,"United States, United States, United States, United States" +29194489,mirDIP,0.971328497,mirDIP,0.971328497,,0,1,http://ophid.utoronto.ca/mirDIP,302,,"(43.7001,-79.4163)",http://web.archive.org/web/20220723023021/http://ophid.utoronto.ca/mirDIP/,2018-01-01,"Krembil Research Institute, University Health Network, Toronto, Ontario M5T 2S8, Canada.","Tokar T, Pastrello C, Rossos AEM, Abovsky M, Hauschild AC, Tsay M, Lu R, Jurisica I",,,150.0,Canada +29206899,MOSAIC,0.996641636,MOSAIC,0.996641636,,0,1,http://mosaic.cs.umn.edu,200,,"(44.9800,-93.2638)",http://web.archive.org/web/20221017033556/http://mosaic.cs.umn.edu/,2018-04-01,"Bioinformatics and Computational Biology Program, University of Minnesota-Twin Cities, Minneapolis, MN 55455, USA.","Nelson J, Simpkins SW, Safizadeh H, Li SC, Piotrowski JS, Hirano H, Yashiroda Y, Osada H, Yoshida M, Boone C, Myers CL",,"NSF, Japan Society for the Promotion of Science, National Science Foundation, NHGRI NIH HHS, Japan Society for the Promotion of Science, NHGRI NIH HHS, NIH, National Institutes of Health, National Institutes of Health, JSPS, University of Minnesota, Canadian Institutes of Health Research, NIGMS NIH HHS",6.0,United States +29220447,MiRIAD,0.840786457,MiRIAD,0.840786457,,0,1,http://www.miriad-database.org,"HTTPConnectionPool(host='www.miriad-database.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220525074744/https://miriad-database.org:443/,2017-01-01,"Department of Anaesthesiology, University Hospital of the Ludwig-Maximilians-University Munich, Munich, Germany.","Hinske LC, Dos Santos FRC, Ohara DT, Ohno-Machado L, Kreth S, Galante PAF",,,3.0,Germany +29284660,ModERN,0.978945613,ModERN,0.978945613,Model Organism ENCyclopedia,0.767016259,1,http://epic.gs.washington.edu/modERN,302,,"(47.6062,-122.3321)",http://web.archive.org/web/20220728225640/https://epic.gs.washington.edu/modERN/,2017-12-28,"Department of Genetics, Yale University, New Haven, Connecticut 06520.","Kudron MM, Victorsen A, Gevirtzman L, Hillier LW, Fisher WW, Vafeados D, Kirkey M, Hammonds AS, Gersch J, Ammouri H, Wall ML, Moran J, Steffen D, Szynkarek M, Seabrook-Sturgis S, Jameel N, Kadaba M, Patton J, Terrell R, Corson M, Durham TJ, Park S, Samanta S, Han M, Xu J, Yan KK, Celniker SE, White KP, Ma L, Gerstein M, Reinke V, Waterston RH",,"NIGMS NIH HHS, NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NIH HHS",56.0, +29401218,MutHTP,0.994622827,MutHTP,0.994622827,mutations in human transmembrane proteins,0.955130294,1,http://www.iitm.ac.in/bioinfo/MutHTP,302,,"(13.0156,80.2467)",no_wayback,2018-07-01,"Department of Biotechnology, Bhupat and Jyoti Mehta School of BioSciences, Indian Institute of Technology Madras, Chennai, Tamilnadu, India.","Kulandaisamy A, Binny Priya S, Sakthivel R, Tarnovskaya S, Bizin I, Hönigschmid P, Frishman D, Gromiha MM",,"Department of Science and Technology, Government of India, Russian Science Foundation, Ministry of Human Resource and Development",8.0,India +29433427,MethCNA,0.993689835,MethCNA,0.993689835,,0,1,http://cgma.scu.edu.cn/MethCNA,"HTTPConnectionPool(host='cgma.scu.edu.cn', port=80): Max retries exceeded with url: /MethCNA (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2018-02-13,"Center of Growth, Metabolism, and Aging, Key Laboratory of Bio-Resources and Eco-Environment, College of Life Sciences, Sichuan University, Chengdu, Sichuan, 610064, China.","Deng G, Yang J, Zhang Q, Xiao ZX, Cai H",,"National Natural Science Foundation of China, National Natural Science Foundation of China",4.0,China +29618898,MODII,0.976052999,MODII,0.976052999,MOlecular Database on Indian Insects,0.967991948,1,"http://cib.res.in, http://cib.res.in","301, 301",,"(37.7621,-122.3971), (37.7621,-122.3971)","http://web.archive.org/web/20220615155501/https://cib.res.in/, http://web.archive.org/web/20220615155501/https://cib.res.in/",2018-02-28,"ICAR-National Bureau of Agricultural Insect Resources, H.A. Farm Post, P.Bag No: 2491, Bellary Road, Hebbal, Bengaluru - 560 024. India.","Pratheepa M, Venkatesan T, Gracy G, Jalali SK, Rangheswaran R, Antony JC, Rai A",,,0.0,India +29745830,MEGADOCK,0.884224534,MEGADOCK,0.884224534,,0,1,http://www.bi.cs.titech.ac.jp/megadock-web,404,,,http://web.archive.org/web/20220803234147/https://www.bi.cs.titech.ac.jp/megadock-web/,2018-05-08,"Department of Computer Science, School of Computing, Tokyo Institute of Technology, 2-12-1 W8-76 Ookayama, Meguro-ku, Tokyo, 152-8550, Japan.","Hayashi T, Matsuzaki Y, Yanagisawa K, Ohue M, Akiyama Y",,,7.0,Japan +29795526,Melonomics,0.821713448,Melonomics,0.821713448,,0,1,http://melonomics.net,301,,"(41.4911,2.1408)",http://web.archive.org/web/20220901052210/https://www.melonomics.net/,2018-05-24,"Centre for Research in Agricultural Genomics (CRAG) CSIC-IRTA-UAB-UB, Campus UAB, Barcelona, 08193, Spain.","Ruggieri V, Alexiou KG, Morata J, Argyris J, Pujol M, Yano R, Nonaka S, Ezura H, Latrasse D, Boualem A, Benhamed M, Bendahmane A, Cigliano RA, Sanseverino W, Puigdomènech P, Casacuberta JM, Garcia-Mas J",,,19.0,Spain +29869221,MU3D,0.996364331,MU3D,0.996364331,Miami University Deception Detection Database,0.954041362,1,http://hdl.handle.net/2374.MIA/6067,302,,"(53.3331,-6.2489)",no_wayback,2019-02-01,"University of Denver, 2155 S. Race St., Denver, CO, 80433, USA. emilypaigelloyd@gmail.com.","Lloyd EP, Deska JC, Hugenberg K, McConnell AR, Humphrey BT, Kunstman JW",,,0.0,United States +29897419,MARDy,0.99679625,MARDy,0.99679625,Mycology Antifungal Resistance Database,0.973570075,1,http://www.mardy.net,200,,"(48.8534,2.3488)",http://web.archive.org/web/20220524221305/http://mardy.net/,2018-09-01,"Department of Physiology, Anatomy and Genetics, University of Oxford, Oxford, UK.","Nash A, Sewell T, Farrer RA, Abdolrasouli A, Shelton JMG, Fisher MC, Rhodes J",,"Medical Research Council, Natural Environment Research Council, Imperial College London, Medical Research Council, Antimicrobial Research Collaborative",7.0, +29967752,Metaxa2,0.596945107,Metaxa2,0.596945107,,0,1,http://microbiology.se/software/metaxa2,301,,"(55.6759,12.5655)",http://web.archive.org/web/20220527052805/https://microbiology.se/software/metaxa2/,2018-06-26,"Department of Entomology, Ohio State University, Columbus, OH, United States of America.","Richardson RT, Bengtsson-Palme J, Gardiner MM, Johnson RM",,"Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning, Ohio Agricultural Research and Development Center, Costco Honey Bee Biology Fellowship, Ohio Agricultural Research and Development Center",3.0,United States +30053238,mutTCPdb,0.996738315,mutTCPdb,0.996738315,,0,1,http://lms.snu.edu.in/mutTCPDB/index.php,"HTTPConnectionPool(host='lms.snu.edu.in', port=80): Max retries exceeded with url: /mutTCPDB/index.php (Caused by ConnectTimeoutError(, 'Connection to lms.snu.edu.in timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221005154106/https://lms.snu.edu.in/mutTCPDB/index.php,2018-01-01,"Department of Life Sciences, School of Natural Sciences, Shiv Nadar University, Greater Noida, Uttar Pradesh, India.","Singh G, Bhat B, Jayadev MSK, Madhusudhan C, Singh A",,,1.0,India +30057343,MCENet,0.991767049,MCENet,0.991767049,,0,1,http://bioinformatics.cau.edu.cn/MCENet,301,,"(39.9075,116.3972)",http://web.archive.org/web/20220618084904/http://bioinformatics.cau.edu.cn/MCENet/,2018-07-18,"State Key Laboratory of Plant Physiology and Biochemistry, College of Biological Sciences, China Agricultural University, Beijing 100193, China.","Tian T, You Q, Yan H, Xu W, Su Z",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",7.0,"China, China" +30065744,miRVIT,0.992824743,miRVIT,0.992824743,,0,1,http://mirvit.ipsp.cnr.it,301,,"(43.7792,11.2463)",http://web.archive.org/web/20220615154605/https://mirvit.ipsp.cnr.it/,2018-07-17,"Institute for Sustainable Plant Protection, National Research Council of Italy, Turin, Italy.","Chitarra W, Pagliarani C, Abbà S, Boccacci P, Birello G, Rossi M, Palmano S, Marzachì C, Perrone I, Gambino G",,,11.0,"Italy, Italy" +30092360,MPDSDM,0.995594293,MPDSDM,0.995594293,Molecular property diagnostic suite for diabetes mellitus,0.949859989,1,"http://www.mpds-diabetes.in, http://www.way2drug.com/passonline","HTTPConnectionPool(host='www.mpds-diabetes.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known')), 301",,", (55.7522,37.6156)","http://web.archive.org/web/20210623025519/http://mpds-diabetes.in/, http://web.archive.org/web/20220708212222/http://www.way2drug.com/PASSOnline/",2018-08-06,"Centre for Molecular Modeling, CSIR-Indian Institute of Chemical Technology, Tarnaka, Hyderabad 500 007, India.","Gaur AS, Nagamani S, Tanneeru K, Druzhilovskiy D, Rudik A, Poroikov V, Narahari Sastry G",,"RSF, DST",1.0,India +30268942,MiPanda,0.688638806,MiPanda,0.688638806,,0,1,http://mipanda.org,200,,"(42.2776,-83.7409)",http://web.archive.org/web/20221017055357/http://mipanda.org/,2018-09-27,"Michigan Center for Translational Pathology, University of Michigan, Ann Arbor, MI, USA.","Niknafs YS, Pandian B, Gajjar T, Gaudette Z, Wheelock K, Maz MP, Achar RK, Song M, Massaro C, Cao X, Chinnaiyan AM",,"Prostate Cancer Foundation, NIGMS NIH HHS, Prostate Cancer Foundation Young Investigator Award, Early Detection Research Network, NCI Prostate SPORE, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS",9.0,United States +30371819,MoonDB,0.993003726,MoonDB,0.993003726,,0,1,http://moondb.hb.univ-amu.fr,200,,"(43.2970,5.3811)",http://web.archive.org/web/20221108172814/http://moondb.hb.univ-amu.fr/,2019-01-01,"Aix-Marseille Univ, INSERM, TAGC, UMR_S1090, Marseille, France.","Ribeiro DM, Briere G, Bely B, Spinelli L, Brun C",,Aix-Marseille University,8.0,France +30380113,MethMotif,0.995933473,MethMotif,0.995933473,,0,1,http://bioinfo-csi.nus.edu.sg/methmotif,403,,,http://web.archive.org/web/20211127235102/https://bioinfo-csi.nus.edu.sg/methmotif/,2019-01-01,"Cancer Science Institute of Singapore, National University of Singapore, Singapore, Singapore.","Xuan Lin QX, Sian S, An O, Thieffry D, Jha S, Benoukraf T",,"National Medical Research Council of Singapore, Ministry of Education Academic Research Fund, Cancer Science Institute of Singapore, National Medical Research Council, Singapore Ministry of Education's AcRF Tier 3",23.0,"Singapore, Singapore, Singapore, Singapore" +30381914,MedPServer,0.69970268,MedPServer,0.69970268,,0,1,http://bif.uohyd.ac.in/medserver,301,,"(17.3840,78.4564)",http://web.archive.org/web/20221017013539/http://bif.uohyd.ac.in/medserver/,2018-11-28,"Department of Biotechnology and Bioinformatics, School of Life Sciences, University of Hyderabad, Hyderabad, India.","Potshangbam AM, Polavarapu R, Rathore RS, Naresh D, Prabhu NP, Potshangbam N, Kumar P, Vindal V",,,6.0,India +30418645,MemProtMD,0.975355506,MemProtMD,0.975355506,,0,1,http://memprotmd.bioch.ox.ac.uk,200,,"(51.7522,-1.2560)",http://web.archive.org/web/20220802093502/http://memprotmd.bioch.ox.ac.uk/,2019-01-01,"Department of Biochemistry, University of Oxford, South Parks Road, Oxford, OX1 3QU, UK.","Newport TD, Sansom MSP, Stansfeld PJ",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Engineering and Physical Sciences Research Council, Engineering and Physical Sciences Research Council, Engineering and Physical Sciences Research Council, Engineering and Physical Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Engineering and Physical Sciences Research Council, Engineering and Physical Sciences Research Council",49.0, +30584170,MDSGene,0.993123114,MDSGene,0.993123114,Database,0.699059725,1,http://www.mdsgene.org,"HTTPConnectionPool(host='www.mdsgene.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.mdsgene.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220520012906/https://www.mdsgene.org/,2018-01-01,"Institute of Neurogenetics, University of Luebeck, Luebeck, Germany.","Klein C, Hattori N, Marras C",,,10.0,Germany +30649247,miR+Pathway,0.936659026,miR+Pathway,0.936659026,,0,1,http://www.insect-genome.com/miR-pathway,200,,"(30.2936,120.1614)",http://web.archive.org/web/20220615174112/http://www.insect-genome.com/miR-pathway,2020-03-01,"Ministry of Agriculture Key Lab of Agricultural Entomology, Institute of Insect Sciences, College of Agriculture and Biotechnology, Zhejiang University, Hangzhou, China.","Pian C, Zhang G, Gao L, Fan X, Li F",,"Hong Kong Scholars Program, National Key Research, Development Program, National research and development project and Hong Kong Scholars Program, National Key Research, Development Program, National research and development project and Hong Kong Scholars Program, National Key Research, Development Program, National research and development project and Hong Kong Scholars Program, Research Grants Council of the Hong Kong Special Administrative Region, China",2.0,China +30738202,MtBrowse,0.99495405,MtBrowse,0.99495405,,0,1,http://ab-openlab.csir.res.in/cgi-bin/gb2/gbrowse,302,,"(28.6358,77.2245)",no_wayback,2019-02-07,"University Institute of Biotechnology, Chandigarh University, Mohali, India.","Singh V, Jolly B, Rajput NK, Pramanik S, Bhardwaj A",,"SERB, CSIR-GENESIS",1.0,India +30764761,MDB,0.958996102,MDB,0.958996102,,0,1,http://csc.columbusstate.edu/carroll/MDB,301,,"(32.4610,-84.9877)",http://web.archive.org/web/20180730124144/http://csc.columbusstate.edu:80/carroll/MDB/,2019-02-14,"TSYS School of Computer Science, Columbus State University, 4225 University Avenue, Columbus, 31907, GA, USA. carroll_hyrum@columbusstate.edu.","Carroll HD, Spouge JL, Gonzalez M",,National Institutes of Health,0.0,United States +30813887,mtProtEvol,0.955864847,mtProtEvol,0.955864847,,0,1,http://bioinfodbs.kantiana.ru/mtProtEvol,301,,"(54.7065,20.5110)",http://web.archive.org/web/20220617175452/http://bioinfodbs.kantiana.ru/mtProtEvol/,2019-02-26,"Center for Mitochondrial Functional Genomics, School of Life Science, Immanuel Kant Baltic Federal University, Kaliningrad, Russia.","Kuzminkova AA, Sokol AD, Ushakova KE, Popadin KY, Gunbin KV",,,0.0, +30837356,MusatransSSRDB,0.983545184,MusatransSSRDB,0.983545184,,0,1,http://bioinfnrcb.byethost7.com/nrcbbio,200,,"(27.9425,-82.5057)",no_wayback,2019-03-01,"ICAR-National Research Centre for Banana, Thogamalai Road, Thayanur Post, Tiruchirapalli 620 102, Tamil Nadu, India.","Backiyarani S, Chandrasekar A, Uma S, Saraswathi MS",,,3.0,India +30841849,mGAP,0.988449275,mGAP,0.988449275,macaque Genotype And Phenotype,0.870370652,1,http://mgap.ohsu.edu,301,,"(37.5331,-122.2486)",http://web.archive.org/web/20220709161614/https://mgap.ohsu.edu/,2019-03-06,"Division of Genetics, Oregon National Primate Research Center, Oregon Health and Sciences University, Beaverton, OR, 97006, USA.","Bimber BN, Yan MY, Peterson SM, Ferguson B",,"NIH HHS, NIH HHS, National Institutes of Health, National Institutes of Health",6.0,United States +30858555,MorCVD,0.99740231,MorCVD,0.99740231,,0,1,http://morcvd.sblab-nsit.net/About,200,,"(40.0334,-83.1582)",http://web.archive.org/web/20220414152129/http://morcvd.sblab-nsit.net/About,2019-03-11,"Computational and Structural Biology Laboratory, Division of Biological Sciences and Engineering, Netaji Subhas University of Technology, Dwarka, New Delhi, 110078, India.","Singh N, Bhatia V, Singh S, Bhatnagar S",,Council of Scientific and Industrial Research,2.0,India +30874795,MirtronDB,0.997429311,MirtronDB,0.997429311,,0,1,http://mirtrondb.cp.utfpr.edu.br,200,,"(-25.4447,-49.1925)",http://web.archive.org/web/20220805142603/http://mirtrondb.cp.utfpr.edu.br/,2019-10-01,"Bioinformatics Graduation Program (PPGBIOINFO), Department of Computer Science, Federal University of Technology - Paraná, Cornélio Procópio, Paraná, Brazil.","Da Fonseca BHR, Domingues DS, Paschoal AR",,,7.0,Brazil +30944327,monoterpene indole alkaloid database,0.976412416,MIADB,0.96922636,monoterpene indole alkaloid database,0.976412416,1,http://www.ebi.ac.uk/metabolights/MTBLS142,301,,"(51.5085,-0.1257)",no_wayback,2019-04-03,"Équipe ""Pharmacognosie-Chimie des Substances Naturelles"" BioCIS, Univ. Paris-Sud, CNRS, Université Paris-Saclay, 5 Rue J.-B. Clément, 92290, Châtenay-Malabry, France.","Fox Ramos AE, Le Pogam P, Fox Alcover C, Otogo N'Nang E, Cauchie G, Hazni H, Awang K, Bréard D, Echavarren AM, Frédérich M, Gaslonde T, Girardot M, Grougnet R, Kirillova MS, Kritsanida M, Lémus C, Le Ray AM, Lewin G, Litaudon M, Mambu L, Michel S, Miloserdov FM, Muratore ME, Richomme-Peniguel P, Roussi F, Evanno L, Poupon E, Champy P, Beniddir MA",,"CONCYTEC FONDECYT Peruvian research agency, Agence Nationale de la Recherche",5.0,France +31042284,Microbiome Learning Repo,0.96743991,ML Repo,0.96120888,Microbiome Learning Repo,0.96743991,1,http://knights-lab.github.io/MLRepo,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20220128212256/https://knights-lab.github.io/MLRepo/,2019-05-01,"Bioinformatics and Computational Biology, University of Minnesota, 200 Union Street SE, Minneapolis, MN 55455.","Vangay P, Hillmann BM, Knights D",,National Institutes of Health,17.0, +31157825,MENDA,0.996490777,MENDA,0.996490777,,0,1,http://menda.cqmu.edu.cn:8080/index.php,200,,,http://web.archive.org/web/20220724073536/http://menda.cqmu.edu.cn:8080/index.php,2020-07-01,"Department of Neurology, The First Affiliated Hospital of Chongqing Medical University, Chongqing, China.","Pu J, Yu Y, Liu Y, Tian L, Gui S, Zhong X, Fan C, Xu S, Song X, Liu L, Yang L, Zheng P, Chen J, Cheng K, Zhou C, Wang H, Xie P",,National Key Research and Development Program of China,7.0,China +31197322,MetOSite,0.994946599,MetOSite,0.994946599,,0,1,http://metosite.uma.es,301,,"(36.7202,-4.4203)",http://web.archive.org/web/20220622211107/https://metosite.uma.es/,2019-11-01,"Departamento de Biología Molecular y Bioquímica, Universidad de Málaga, Málaga 29071, Spain.","Valverde H, Cantón FR, Aledo JC",,"NIH, University of Málaga",7.0,Spain +31231773,MepmiRDB,0.99640429,MepmiRDB,0.99640429,medicinal plant microRNA database,0.712243244,1,http://mepmirdb.cn/mepmirdb/index.html,200,,"(37.3394,-121.8950)",http://web.archive.org/web/20220617065318/http://mepmirdb.cn/mepmirdb/index.html,2019-01-01,"College of Life and Environmental Sciences, Hangzhou Normal University, Hangzhou, 310036, China.","Yu D, Lu J, Shao W, Ma X, Xie T, Ito H, Wang T, Xu M, Wang H, Meng Y",,"Zhejiang Province Ministry of Science and Technology, National Natural Science Foundation of China, Major Increase Or Decrease Program In The Central Finance Level, National Natural Science Foundation of China, Hangzhou Ministry of Science and Technology, National Natural Science Foundation of China",8.0,China +31231774,Mr.Vc,0.98936215,Mr.Vc,0.98936215,,0,1,http://bioinfo.life.hust.edu.cn/mrvc,301,,"(31.2222,121.4581)",http://web.archive.org/web/20190905152305/http://bioinfo.life.hust.edu.cn:80/mrvc/,2019-01-01,"Department of Biotechnology, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan 430074, China.","Zhang Z, Chen G, Hu J, Hussain W, Fan F, Yang Y, Zhou Z, Fang X, Zhu J, Chen WH, Liu Z",,"National Science Foundation of China, National Science Foundation of China, National Science Foundation of China, National Science Foundation of China, National Programs for Fundamental and Development",1.0,China +31240103,MDR,0.993409554,MDR,0.993409554,,0,1,http://mdr.xieslab.org,"HTTPConnectionPool(host='mdr.xieslab.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20211020234726/http://mdr.xieslab.org/,2019-06-15,"1Hainan Key Laboratory for Biology of Tropical Ornamental Plant Germplasm, Institute of Tropical Agriculture and Forestry, Hainan University, 570228 Haikou, China.","Liu ZY, Xing JF, Chen W, Luan MW, Xie R, Huang J, Xie SQ, Xiao CL",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",18.0,China +31352145,MMRdb,0.9871943,MMRdb,0.9871943,,0,1,http://mmrdb.org,"HTTPConnectionPool(host='mmrdb.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20211019095903/http://mmrdb.org/,2019-07-26,"Epidemic Diseases Department, Institute for Research and Medical Consultations, Imam Abdulrahman Bin Faisal University, Dammam 31441, Saudi Arabia. Electronic address: ikalmansour@iau.edu.sa.","Almansour I, Alhagri M",,,3.0,Saudi Arabia +31404401,miRDRN,0.996445715,miRDRN,0.996445715,miRNA,0.549448252,1,http://mirdrn.ncu.edu.tw/mirdrn,"HTTPConnectionPool(host='mirdrn.ncu.edu.tw', port=80): Max retries exceeded with url: /mirdrn (Caused by ConnectTimeoutError(, 'Connection to mirdrn.ncu.edu.tw timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190806171508/http://mirdrn.ncu.edu.tw/mirdrn/,2019-08-06,"Department of Biomedical Sciences and Engineering, National Central University, Taoyuan City, Taiwan.","Liu HC, Peng YS, Lee HC",,"Ministry of Science and Technology, Republic of China",0.0, +31504189,MeLAD,0.996227562,MeLAD,0.996227562,Metalloenzyme-Ligand Association Database,0.90846928,1,http://melad.ddtmlab.org,"HTTPConnectionPool(host='melad.ddtmlab.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20220802174337/https://melad.ddtmlab.org/,2020-02-01,"Key Laboratory of Drug Targeting and Drug Delivery System of Ministry of Education, Department of Medicinal Chemistry, West China School of Pharmacy, Sichuan University, Chengdu, Sichuan 610041, China.","Li G, Su Y, Yan YH, Peng JY, Dai QQ, Ning XL, Zhu CL, Fu C, McDonough MA, Schofield CJ, Huang C, Li GB",,"Fundamental Research Funds for the Central Universities, Medical Research Council, Sichuan University Postdoctoral Interdisciplinary Innovation Startup Foundation, Scientific Research Foundation of Sichuan University, Cancer Research UK, National Natural Science Foundation, Cancer Research UK, Sichuan Science and Technology Program, National Natural Science Foundation, Scientific Research Foundation of Sichuan University",4.0,"China, China" +31559753,Mouse Liver Portal,0.730381191,Mouse Liver Portal,0.730381191,,0,1,http://mouseliver.com,"HTTPConnectionPool(host='mouseliver.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2019-09-01,"Institutes of Biomedical Sciences, Fudan University, Shanghai 200032, China.","Liu Y, Feng J, Liu W, Qin J, Ding C, He F",,,0.0,China +31586405,MatrisomeDB,0.997006476,MatrisomeDB,0.997006476,the ECM-protein knowledge database,0.631314657,1,http://www.pepchem.org/matrisomedb,301,,"(41.8500,-87.6500)",http://web.archive.org/web/20220308110551/https://www.pepchem.org/matrisomedb/,2020-01-01,"College of Pharmacy, University of Illinois at Chicago, Chicago, IL 60612, USA.","Shao X, Taha IN, Clauser KR, Gao YT, Naba A",,"Department of Physiology and Biophysics at the University of Illinois at Chicago, University of Illinois at Chicago, College of Pharmacy",34.0,United States +31588509,MutEx,0.993316352,MutEx,0.993316352,,0,1,http://www.innovebioinfo.com/Databases/Mutationdb_About.php,200,,"(40.7895,-74.0565)",http://web.archive.org/web/20220126225221/http://innovebioinfo.com/Databases/Mutationdb_About.php,2020-07-01,"Department of Biostatistics, Vanderbilt University Medical Center, Nashville, USA, 37232.","Ping J, Oyebamiji O, Yu H, Ness S, Chien J, Ye F, Kang H, Samuels D, Ivanov S, Chen D, Zhao YY, Guo Y",,"NIDCR NIH HHS, NCI NIH HHS, National Cancer Institute",3.0,United States +31612915,MIBiG,0.996939003,MIBiG,0.996939003,Minimum Information about a Biosynthetic Gene Cluster,0.971989518,1,http://mibig.secondarymetabolites.org,301,,"(55.6759,12.5655)",http://web.archive.org/web/20221014073401/https://mibig.secondarymetabolites.org/,2020-01-01,"Bioinformatics Group, Wageningen University, Wageningen, NL, The Netherlands.","Kautsar SA, Blin K, Shaw S, Navarro-Muñoz JC, Terlouw BR, van der Hooft JJJ, van Santen JA, Tracanna V, Suarez Duran HG, Pascal Andreu V, Selem-Mojica N, Alanjary M, Robinson SL, Lund G, Epstein SC, Sisto AC, Charkoudian LK, Collemare J, Linington RG, Weber T, Medema MH",,"Dutch Research Council (NWO), Netherlands eScience Center, Novo Nordisk Fonden, National Institutes of Health, Novo Nordisk Foundation, National Science Foundation, Novo Nordisk Foundation, NNF Center for Biosustainability, NSF GRF, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, NCCIH NIH HHS, Novo Nordisk Fonden, NSERC, Graduate School for Experimental Plant Sciences",140.0,Netherlands +31652812,Murine Microbiome Database,0.973936637,MMDB,0.96417357,Murine Microbiome Database,0.973936637,1,http://leb.snu.ac.kr/mmdb,"HTTPConnectionPool(host='leb.snu.ac.kr', port=80): Max retries exceeded with url: /mmdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='leb.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20211205053132/http://leb.snu.ac.kr/mmdb,2019-10-23,"Interdisciplinary Program in Bioinformatics, Seoul National University, Seoul 08826, Korea. yjunwon18@gmail.com.","Yang J, Park J, Park S, Baek I, Chun J",,"Korea Institute of Planning and Evaluation for Technology in Food, Agriculture, Forestry and Fisheries",7.0, +31667690,Microndata,0.973953187,Microndata,0.973953187,,0,1,http://www.microndata.net,302,,"(-27.4679,153.0281)",http://web.archive.org/web/20211218021648/http://microndata.net/,2019-10-30,"School of Biomedical Sciences, The University of Queensland, Brisbane, 4072, Australia.","Davila RA, Harkins D, Currey L, Fraser J, Bowles J, Piper M",,Australian Research Council,0.0,Australia +31679514,MaveDB,0.996891856,MaveDB,0.996891856,,0,1,http://www.mavedb.org,302,,"(47.6062,-122.3321)",http://web.archive.org/web/20221017034742/https://mavedb.org/,2019-11-04,"Bioinformatics Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, VIC, Australia.","Esposito D, Weile J, Shendure J, Starita LM, Papenfuss AT, Roth FP, Fowler DM, Rubin AF",,"Australian National Health and Medical Research Council, NIH HHS, CIHR, National Institutes of Health, NIH HHS, National Institutes of Health, NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Australian National Health and Medical Research Council, NIGMS NIH HHS, National Institutes of Health",31.0,Australia +31754718,MiST,0.747567356,MiST,0.747567356,,0,1,http://mistdb.com,301,,"(45.5946,-121.1787)",http://web.archive.org/web/20221022054519/https://mistdb.com/,2020-01-01,"Department of Microbiology, The Ohio State University, Columbus, OH 43210, USA.","Gumerov VM, Ortega DR, Adebali O, Ulrich LE, Zhulin IB",,"National Institutes of Health, NIDCR NIH HHS, NIGMS NIH HHS, National Institutes of Health",43.0,United States +31825307,MouseBytes,0.869805098,MouseBytes,0.869805098,,0,1,http://touchscreencognition.org,301,,"(40.8223,-74.4569)",no_wayback,2019-12-11,"Robarts Research Institute, The University of Western Ontario, Ontario, Canada.","Beraldo FH, Palmer D, Memar S, Wasserman DI, Lee WV, Liang S, Creighton SD, Kolisnyk B, Cowan MF, Mels J, Masood TS, Fodor C, Al-Onaizi MA, Bartha R, Gee T, Saksida LM, Bussey TJ, Strother SS, Prado VF, Winters BD, Prado MA",,"Canada Research Chairs, Canada First Research Excellence Fund, Canadian Institutes of Health Research, Canadian Institutes of Health Research, Natural Sciences and Engineering Research Council of Canada, Canadian Institutes of Health Research, CIFAR, Brain Canada, Alzheimer Society, Weston Brain Institute, Brain Canada, Mitacs",11.0,Canada +31836897,mesophotic.org,0.972254475,mesophotic.org,0.972254475,,0,1,http://mesophotic.org,301,,"(38.8951,-77.0364)",http://web.archive.org/web/20220901040159/http://www.mesophotic.org/,2019-01-01,"California Academy of Sciences, San Francisco, CA 94118, USA.","Bongaerts P, Perez-Rosales G, Radice VZ, Eyal G, Gori A, Gress E, Hammerman NM, Hernandez-Agreda A, Laverick J, Muir P, Pinheiro H, Pyle RL, Rocha L, Turner JA, Booker R",,"Australian Research Council’s Discovery Early Career Research Award, California Academy of Sciences’ Hope for Reefs Initiative",1.0,United States +32079733,MtSSPdb,0.998309374,MtSSPdb,0.998309374,truncatula Small Secreted Peptide Database,0.937849825,1,http://mtsspdb.noble.org,200,,"(34.1566,-97.1792)",http://web.archive.org/web/20221016215143/https://mtsspdb.noble.org/,2020-02-20,"Noble Research Institute, Ardmore, Oklahoma 73401.","Boschiero C, Dai X, Lundquist PK, Roy S, Christian de Bang T, Zhang S, Zhuang Z, Torres-Jerez I, Udvardi MK, Scheible WR, Zhao PX",,"Noble Research Institute, National Science Foundation, Oklahoma Center for the Advancement of Science and Technology (OCAST, Novo Nordisk Fonden",9.0, +32159764,MMHub,0.995018721,MMHub,0.995018721,,0,1,http://biodb.swu.edu.cn/mmdb,301,,"(31.2222,121.4581)",no_wayback,2020-01-01,"State Key Laboratory of Silkworm Genome Biology, Southwest University, No. 2, Tiansheng Road, Beibei, Chongqing 400715, China.","Li D, Ma B, Xu X, Chen G, Li T, He N",,"Chongqing Research Program of Basic Research and Frontier Technology, Fundamental Research Funds for the Central Universities, National Key Research and Development Program, Fundamental Research Funds for the Central Universities, China Postdoctoral Science Foundation",1.0,China +32337573,MPTherm,0.997999251,MPTherm,0.997999251,,0,1,http://www.iitm.ac.in/bioinfo/mptherm,302,,"(13.0156,80.2467)",no_wayback,2021-03-01,"Bharathidasan University, India.","Kulandaisamy A, Sakthivel R, Gromiha MM",,"Ministry of Human Resource Development and Initiative for Biological Systems Engineering Travel, Department of Science and Technology",1.0,India +32436932,miRactDB,0.998288214,miRactDB,0.998288214,,0,1,http://ccsm.uth.edu/miRactDB,302,,"(29.7633,-95.3633)",no_wayback,2021-05-01,None,"Tan H, Kim P, Sun P, Zhou X",,"NIH, National Institute of Health, NIH, National Institute of Health",3.0, +32777102,MSK-KP,0.953117967,MSK-KP,0.953117967,Musculoskeletal Knowledge Portal,0.928650388,1,http://mskkp.org,301,,"(37.4056,-122.0775)",http://web.archive.org/web/20201101032431/http://mskkp.org/,2020-09-01,"Hinda and Arthur Marcus Institute for Aging Research, Hebrew SeniorLife Boston, MA, USA.","Kiel DP, Kemp JP, Rivadeneira F, Westendorf JJ, Karasik D, Duncan EL, Imai Y, Müller R, Flannick J, Bonewald L, Burtt N",,"Orthopaedic Research Society, American Society for Bone and Mineral Research, European Calcified Tissue Society, NIA NIH HHS, Broad Institute, NIAMS NIH HHS, ETH Zurich Foundation",10.0,United States +32833025,MNDR,0.994394898,MNDR,0.994394898,mammal ncRNA-disease repository,0.987229202,1,http://www.rna-society.org/mndr,301,,"(40.2338,-111.6585)",http://web.archive.org/web/20221102055809/http://www.rna-society.org/mndr/,2021-01-01,"Dermatology Hospital, Southern Medical University, Guangzhou 510091, China.","Ning L, Cui T, Zheng B, Wang N, Luo J, Yang B, Du M, Cheng J, Dou Y, Wang D",,"National Natural Science Foundation of China, National Key Research and Development Program of China, Basic and Applied Basic Research Fund of Guangdong Province, Basic and Applied Basic Research Fund of Guangdong Province",20.0,China +32911083,MosaicBase,0.997231185,MosaicBase,0.997231185,,0,1,"http://mosaicbase.com/, http://49.4.21.8:8000","405, 200",,", ","no_wayback, http://web.archive.org/web/20220418031013/http://49.4.21.8:8000/",2020-04-01,"Center for Bioinformatics, State Key Laboratory of Protein and Plant Gene Research, School of Life Sciences, Peking University, Beijing 100871, China.","Yang X, Yang C, Zheng X, Xiong L, Tao Y, Wang M, Ye AY, Wu Q, Dou Y, Luo J, Wei L, Huang AY",,"National Natural Science Foundation of China, Ministry of Science and Technology of China",1.0,China +32986834,ModelSEED,0.991726339,ModelSEED,0.991726339,,0,1,http://modelseed.org/biochem,301,,"(41.6736,-88.0017)",http://web.archive.org/web/20221017033920/https://modelseed.org/biochem,2021-01-01,"Computing, Environment, and Life Sciences Division, Argonne National Laboratory, Lemont, IL 60439, USA.","Seaver SMD, Liu F, Zhang Q, Jeffryes J, Faria JP, Edirisinghe JN, Mundy M, Chia N, Noor E, Beber ME, Best AA, DeJongh M, Kimbrel JA, D'haeseleer P, McCorkle SR, Bolton JR, Pearson E, Canon S, Wood-Charlson EM, Cottingham RW, Arkin AP, Henry CS",,"Horizon 2020, U.S. Department of Energy, U.S. Department of Energy, National Science Foundation, Center for Individualized Medicine, Mayo Clinic, U.S. Department of Energy, National Science Foundation, NCI NIH HHS, U.S. Department of Energy, National Cancer Institute",27.0,United States +32990748,miRNASNP-v3,0.997226487,miRNASNP-v3,0.997226487,,0,1,http://bioinfo.life.hust.edu.cn/miRNASNP,200,,"(31.2222,121.4581)",http://web.archive.org/web/20220703065703/http://bioinfo.life.hust.edu.cn/miRNASNP,2021-01-01,"Research Center of Clinical Medicine, Affiliated Hospital of Nantong University, Nantong 226001, China.","Liu CJ, Fu X, Xia M, Zhang Q, Gu Z, Guo AY",,"National Natural Science Foundation of China, China Postdoctoral Science Foundation, National Natural Science Foundation of China, National Natural Science Foundation of China, China Postdoctoral Science Foundation, National Natural Science Foundation of China",14.0,China +33084905,MeDAS,0.996930599,MeDAS,0.996930599,Metazoan Developmental Alternative Splicing database,0.747435543,1,http://das.chenlulab.com,302,,"(30.2936,120.1614)",no_wayback,2021-01-01,"Key Laboratory of Birth Defects and Related Diseases of Women and Children of MOE, Department of Laboratory Medicine, State Key Laboratory of Biotherapy, West China Second Hospital, Sichuan University, Chengdu 610041, China.","Li Z, Zhang Y, Bush SJ, Tang C, Chen L, Zhang D, Urrutia AO, Lin JW, Chen L",,"Royal Society, Royal Society, National Key Research and Development Program of China, PAPPIT-DGAPA-UNAM, Santander and Newton fund UK-China, National Science Fund, Natural Environment Research Council, National Key Research and Development Program of China, Royal Society, NERC",2.0,"China, China" +33119751,MemMoRF,0.971484363,MemMoRF,0.971484363,,0,1,http://memmorf.hegelab.org,301,,"(47.4984,19.0404)",http://web.archive.org/web/20220503180121/https://memmorf.hegelab.org/,2021-01-01,"Department of Biophysics and Radiation Biology, Semmelweis University, Budapest 1094, Hungary.","Csizmadia G, Erdős G, Tordai H, Padányi R, Tosatto S, Dosztányi Z, Hegedűs T",,"National Research, Development and Innovation Office, Semmelweis University, Higher Education Institutional Excellence Programme of the Ministry for Innovation and Technology in Hungary",3.0,Hungary +33125077,MASI,0.987803578,MASI,0.987803578,microbiota-active substance interactions database,0.924483865,1,http://www.aiddlab.com/MASI,301,,"(1.2897,103.8501)",http://web.archive.org/web/20220503180121/http://www.aiddlab.com/MASI/,2021-01-01,"Department of Biological Medicines & Shanghai Engineering Research Center of Immunotherapeutics, Fudan University School of Pharmacy, Shanghai 201203, P. R. China.","Zeng X, Yang X, Fan J, Tan Y, Ju L, Shen W, Wang Y, Wang X, Chen W, Ju D, Chen YZ",,"Shenzhen Science and Technology Innovation Commission, Shanghai Sailing Program, National Natural Science Foundation of China, Shenzhen Science and Technology Innovation Commission, Shanghai Science and Technology Funds, Shenzhen Bay Laboratory, National Natural Science Foundation of China, Shenzhen Science and Technology Innovation Commission, Shenzhen Development and Reform Committee, National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Shenzhen Development and Reform Committee, Singapore Academic Research Fund",1.0,China +33126250,MloDisDB,0.996973872,MloDisDB,0.996973872,,0,1,http://mlodis.phasep.pro,200,,"(37.3394,-121.8950)",http://web.archive.org/web/20211126163806/http://mlodis.phasep.pro/,2021-07-01,"Department of Biomedical Informatics, Peking University Health Science Center.","Hou C, Xie H, Fu Y, Ma Y, Li T",,"National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China",1.0, +33156326,MetaNetX/MNXref,0.987576434,MetaNetX/MNXref,0.987576434,,0,1,"http://www.metanetx.org/, http://rdf.metanetx.org","301, 301",,"(46.5160,6.6328), (46.5160,6.6328)","http://web.archive.org/web/20221006190123/https://www.metanetx.org/, http://web.archive.org/web/20221017065643/https://rdf.metanetx.org/",2021-01-01,"Vital-IT group, SIB Swiss Institute of Bioinformatics, Lausanne 1015, Switzerland.","Moretti S, Tran VDT, Mehl F, Ibberson M, Pagni M",,"Swiss Federal Government, Swiss National Science Foundation",15.0,Switzerland +33174596,MitoCarta,0.984350145,MitoCarta,0.984350145,,0,1,http://www.broadinstitute.org/mitocarta,301,,"(37.7621,-122.3971)",no_wayback,2021-01-01,"Broad Institute of MIT and Harvard, Cambridge, MA 02142, USA.","Rath S, Sharma R, Gupta R, Ast T, Chan C, Durham TJ, Goodman RP, Grabarek Z, Haas ME, Hung WHW, Joshi PR, Jourdain AA, Kim SH, Kotrys AV, Lam SS, McCoy JG, Meisel JD, Miranda M, Panda A, Patgiri A, Rogers R, Sadre S, Shah H, Skinner OS, To TL, Walker MA, Wang H, Ward PS, Wengrod J, Yuan CC, Calvo SE, Mootha VK",,"National Institutes of Health, NIGMS NIH HHS, National Institutes of Health, Deutsche Forschungsgemeinschaft, NIGMS NIH HHS, National Institutes of Health, NIDDK NIH HHS, Dollis Huntington Endowment Fund for Cancer Research, National Institutes of Health, National Institutes of Health, NIA NIH HHS, National Institutes of Health, NCI NIH HHS, NIDDK NIH HHS, National Institutes of Health, NIAMS NIH HHS, National Institutes of Health, Massachusetts General Hospital Department of Neurology, NIGMS NIH HHS, Howard Hughes Medical Institute, Jane Coffin Childs",148.0,United States +33219670,MolluscDB,0.998527646,MolluscDB,0.998527646,,0,1,http://mgbase.qnlm.ac,200,,"(36.6683,116.9972)",http://web.archive.org/web/20220715112535/http://mgbase.qnlm.ac/,2021-01-01,"MOE Key Laboratory of Marine Genetics and Breeding and Sars-Fang Centre, Ocean University of China, Qingdao 266003, China.","Liu F, Li Y, Yu H, Zhang L, Hu J, Bao Z, Wang S",,"Fundamental Research Funds for the Central Universities, National Natural Science Foundation of China, Shandong Province of China, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities, National Key Research and Development Program of China, Shandong Natural Science Foundation",11.0,"China, China" +33238004,MetaTropismDB,0.996033907,MetaTropismDB,0.996033907,,0,1,http://www.introni.it/Metastasis/metastasis.html,200,,"(45.6960,9.6672)",no_wayback,2020-11-01,"Department of Specialistic Clinical and Odontostomatological Sciences, Polytechnic University of Marche, Via Brecce Bianche, 60131, Ancona, Italy.","Giulietti M, Bastianoni M, Cecati M, Ruzzo A, Bracci M, Malavolta M, Piacenza F, Giacconi R, Piva F",,,1.0,Italy +33245771,MarkerDB,0.996113002,MarkerDB,0.996113002,,0,1,http://markerdb.ca,301,,"(40.7402,-73.9996)",http://web.archive.org/web/20221012132908/https://markerdb.ca/,2021-01-01,"Department of Biological Sciences, University of Alberta, Edmonton, AB T6G 2E9, Canada.","Wishart DS, Bartok B, Oler E, Liang KYH, Budinski Z, Berjanskii M, Guo A, Cao X, Wilson M",,Canada Foundation for Innovation,13.0,Canada +33382886,MetSRR,0.996502876,MetSRR,0.996502876,Metabolic Syndrome Research Resource,0.966591418,1,http://www.healthdisparityinformatics.com/MetSRR,"HTTPConnectionPool(host='www.healthdisparityinformatics.com', port=80): Max retries exceeded with url: /MetSRR (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2020-12-01,"Department of Mathematics and Computer Science Fisk University, Nashville, TN 37208, USA.","Jenkins WS, Richardson C, Williams A, Williams-DeVane CR",,,0.0,United States +33418085,MicroPhenoDB,0.990126431,MicroPhenoDB,0.990126431,,0,1,"http://www.liwzlab.cn/microphenodb, http://lilab2.sysu.edu.cn/microphenodb","302, HTTPConnectionPool(host='lilab2.sysu.edu.cn', port=80): Max retries exceeded with url: /microphenodb (Caused by ConnectTimeoutError(, 'Connection to lilab2.sysu.edu.cn timed out. (connect timeout=5)'))",,"(29.4159,121.3397), ","http://web.archive.org/web/20220802011822/http://www.liwzlab.cn/microphenodb/, no_wayback",2020-12-01,"Zhongshan School of Medicine, Sun Yat-sen University, Guangzhou 510080, China.","Yao G, Zhang W, Yang M, Yang H, Wang J, Zhang H, Wei L, Xie Z, Li W",,"National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China",2.0,China +33502607,Missense3D-DB,0.892035206,Missense3D-DB,0.892035206,,0,1,http://missense3d.bc.ic.ac.uk,200,,"(51.5085,-0.1257)",http://web.archive.org/web/20220514205043/http://missense3d.bc.ic.ac.uk/,2021-01-27,"Department of Life Sciences, Centre for Integrative System Biology and Bioinformatics, Imperial College London, London, SW7 2AZ, UK.","Khanna T, Hanna G, Sternberg MJE, David A",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",4.0, +33514395,MPM,0.994437575,MPM,0.994437575,My Personal Mutanome,0.963305771,1,http://mutanome.lerner.ccf.org,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20220620234332/https://mutanome.lerner.ccf.org/,2021-01-29,"Genomic Medicine Institute, Lerner Research Institute, Cleveland Clinic, Cleveland, OH, 44195, USA.","Zhou Y, Zhao J, Fang J, Martin W, Li L, Nussinov R, Chan TA, Eng C, Cheng F",,"NCI NIH HHS, NHLBI NIH HHS, Foundation for the National Institutes of Health, Foundation for the National Institutes of Health, NIA NIH HHS, Foundation for the National Institutes of Health, Foundation for the National Institutes of Health, NCI NIH HHS",4.0,United States +33600011,MutSpliceDB,0.996531308,MutSpliceDB,0.996531308,,0,1,http://brb.nci.nih.gov/splicing,301,,"(39.4143,-77.4105)",http://web.archive.org/web/20221020102450/https://brb.nci.nih.gov/splicing/,2021-03-01,"Division of Cancer Treatment and Diagnosis, Biometric Research Program, National Cancer Institute, Rockville, Maryland, USA.","Palmisano A, Vural S, Zhao Y, Sonkin D",,,0.0,United States +33683131,MRMAssayDB,0.997641921,MRMAssayDB,0.997641921,,0,1,http://mrmassaydb.proteincentre.com,200,,"(48.4993,-123.4003)",http://web.archive.org/web/20220726193201/http://mrmassaydb.proteincentre.com/,2021-03-08,"University of Victoria - Genome BC Proteomics Centre, Victoria, British Columbia V8Z 7X8, Canada.","Bhowmick P, Roome S, Borchers CH, Goodlett DR, Mohammed Y",,"Genome British Columbia, European Regional Development Fund, Genome British Columbia, Genome British Columbia, Genome Canada, Genome British Columbia",1.0,Canada +33685383,MAPslnc,0.99292849,MAPslnc,0.99292849,,0,1,http://lncrnapipe.cimap.res.in,301,,"(25.3986,81.8418)",http://web.archive.org/web/20210921030425/https://lncrnapipe.cimap.res.in/,2021-03-18,"Information and Communication Technology Department, CSIR-Central Institute of Medicinal and Aromatic Plants, P.O. CIMAP, Lucknow, India.","Shukla B, Gupta S, Srivastava G, Sharma A, Shukla AK, Shasany AK",,Central Institute of Medicinal and Aromatic Plants,0.0,India +33780471,MCPdb,0.99681139,MCPdb,0.99681139,The bacterial microcompartment database,0.613576069,1,http://mcpdb.mbi.ucla.edu,301,,"(34.0522,-118.2437)",http://web.archive.org/web/20220524065133/https://mcpdb.mbi.ucla.edu/,2021-03-29,"UCLA Molecular Biology Institute, University of California Los Angeles, Los Angeles, California, United States of America.","Ochoa JM, Bair K, Holton T, Bobik TA, Yeates TO",,"NIAID NIH HHS, Howard Hughes Medical Institute, Howard Hughes Medical Institute Gilliam Fellowship, National Institute of Allergy and Infectious Diseases",3.0,United States +33822911,MENSAdb,0.992488444,MENSAdb,0.992488444,MEmbrane protein dimer Novel Structure Analyser database,0.932648856,1,http://www.moreiralab.com/resources/mensadb,301,,"(53.2192,6.5667)",http://web.archive.org/web/20221018071327/http://www.moreiralab.com/resources/mensadb/,2021-04-01,"Center for Neuroscience and Cell Biology, University of Coimbra, Coimbra 3005-504, Portugal.","Matos-Filipe P, Preto AJ, Koukos PI, Mourão J, Bonvin AMJJ, Moreira IS",,"Fundação para a Ciência e a Tecnologia, Dutch Research Council (NWO), Dutch Research Council (NWO), Fundação para a Ciência e a Tecnologia, Fundação para a Ciência e a Tecnologia, Fundação para a Ciência e a Tecnologia, European Regional Development Fund, Fundação para a Ciência e a Tecnologia",0.0,Portugal +33909069,Monosaccharide Biosynthesis Pathways Database,0.936623167,,0,Monosaccharide Biosynthesis Pathways Database,0.936623167,1,http://www.bio.iitb.ac.in/mbpd,302,,"(19.0728,72.8826)",http://web.archive.org/web/20220212143215/https://www.bio.iitb.ac.in/mbpd/,2021-12-01,None,"Srivastava J, Sunthar P, Balaji PV",,"Council of Scientific and Industrial Research, Government of India",0.0, +34052284,miREV,0.99435091,miREV,0.99435091,,0,1,http://www.physio.wzw.tum.de/mirev,301,,"(48.4035,11.7488)",http://web.archive.org/web/20211022124935/https://www.physio.wzw.tum.de/mirev/,2021-05-28,"Animal Physiology and Immunology, Technical University of Munich, Freising, Germany. Electronic address: alex.hildebrandt@tum.de.","Hildebrandt A, Kirchner B, Nolte-'t Hoen ENM, Pfaffl MW, Pfaffl MW",,Technische Universität München,5.0,Germany +34147352,MPSBase,0.996857762,MPSBase,0.996857762,,0,1,"http://www.ncbi.nlm.nih.gov/geo/, http://www.ufrgs.br/mpsbase","301, 302",,"(38.9896,-77.1538), (-30.0328,-51.2302)","http://web.archive.org/web/20221103131023/https://www.ncbi.nlm.nih.gov/geo/, http://web.archive.org/web/20220527090532/https://www.ufrgs.br/mpsbase/",2021-06-15,"Graduation Program on Biotechnology/Bioinformatics, UFRGS, Porto Alegre 91501-970, Brazil; Cells, Tissues and Genes Laboratory, HCPA, Porto Alegre 90035903, Brazil; Bioinformatics Core, HCPA, Porto Alegre 90035903, Brazil.","Soares LDF, Villalba Silva GC, Kubaski F, Giugliani R, Matte U",,,0.0,"Brazil, Brazil, Brazil" +34156446,MetamORF,0.997968137,MetamORF,0.997968137,,0,1,http://metamorf.hb.univ-amu.fr,301,,"(43.2970,5.3811)",http://web.archive.org/web/20220619170740/https://metamorf.hb.univ-amu.fr/,2021-06-01,"Aix-Marseille University, INSERM, TAGC, Turing Centre for Living Systems, 163 Avenue de Luminy, Marseille 13009, France.","Choteau SA, Wagner A, Pierre P, Spinelli L, Brun C",,the Investissements Avenir French Government program managed by the French National Research Agency,3.0,France +34156447,mPPI,0.952142298,mPPI,0.952142298,,0,1,http://bis.zju.edu.cn/mppi,301,,"(30.2936,120.1614)",http://web.archive.org/web/20220805072024/http://bis.zju.edu.cn/mppi/,2021-06-01,"Department of Bioinformatics, College of Life Sciences, Zhejiang University, Hangzhou 310058, China.","Zhou Y, Chen H, Li S, Chen M",,"National Key Research and Development Program of China, National Natural Sciences Foundation of China",0.0,China +34177338,MassBase,0.987924576,MassBase,0.987924576,,0,1,"http://webs2.kazusa.or.jp/massbase/, http://webs2.kazusa.or.jp/km2","200, 301",,"(35.3343,139.4072), (35.3343,139.4072)","http://web.archive.org/web/20220120143154/http://webs2.kazusa.or.jp/massbase/, http://web.archive.org/web/20210520112911/http://webs2.kazusa.or.jp/km2/",2021-03-01,"Kazusa DNA Research Institute, 2-6-7 Kazusa-Kamatari, Kisarazu, Chiba 292-0818, Japan.","Ara T, Sakurai N, Suzuki H, Aoki K, Saito K, Shibata D",,,2.0,Japan +34245304,MtExpress,0.993226945,MtExpress,0.993226945,,0,1,http://medicago.toulouse.inrae.fr/MtExpress,301,,"(43.6043,1.4437)",http://web.archive.org/web/20220317093318/https://medicago.toulouse.inrae.fr/MtExpress,2021-11-01,"LIPME, INRAE, CNRS, Université de Toulouse, 24 Chemin de Borde Rouge, 31320 Auzeville-Tolosane, Castanet-Tolosan 31320, France.","Carrere S, Verdier J, Gamas P",,"Agence Nationale de la Recherche, Plant2Pro Carnot Institute, Agence Nationale de la Recherche, Agence Nationale de la Recherche, Agence Nationale de la Recherche",1.0,France +34266386,MitoTox,0.988955975,MitoTox,0.988955975,toxicity database,0.584199995,1,http://www.mitotox.org,301,,"(1.3215,103.6957)",http://web.archive.org/web/20221017014300/https://www.mitotox.org/,2021-07-15,"Graduate Institute of Biomedical Electronics and Bioinformatics, National Taiwan University, Taipei, Taiwan.","Lin YT, Lin KH, Huang CJ, Wei AC",,"Ministry of Science and Technology, Taiwan, Ministry of Science and Technology, Taiwan",2.0, +34349127,MiREDiBase,0.997606039,MiREDiBase,0.997606039,,0,1,http://ncrnaome.osumc.edu/miredibase,302,,"(39.9612,-82.9988)",http://web.archive.org/web/20220615222821/https://ncrnaome.osumc.edu/miredibase/,2021-08-04,"Department of Clinical and Experimental Medicine, University of Catania, Catania, Italy.","Marceca GP, Distefano R, Tomasello L, Lagana A, Russo F, Calore F, Romano G, Bagnoli M, Gasparini P, Ferro A, Acunzo M, Ma Q, Croce CM, Nigita G",,"U.S. Department of Health & Human Services | NIH | National Cancer Institute (NCI), NCATS NIH HHS, NCI NIH HHS, U.S. Department of Health & Human Services | NIH | National Cancer Institute, NCATS NIH HHS",4.0,Italy +34363073,MIK,0.993417561,MIK,0.993417561,Male Infertility Knowledgebase,0.925598693,1,http://mik.bicnirrh.res.in,200,,"(19.0728,72.8826)",http://web.archive.org/web/20211113224934/http://mik.bicnirrh.res.in/,2021-08-01,"Genetic Research Center, ICMR-National Institute for Research in Reproductive Health, J.M. Street, Parel, Mumbai 400012, India.","Joseph S, Mahale SD",,,0.0,India +34485275,MeiosisOnline,0.993370068,MeiosisOnline,0.993370068,,0,1,http://mcg.ustc.edu.cn/bsc/meiosis/index.html,200,,"(31.8639,117.2808)",http://web.archive.org/web/20180604113006/http://mcg.ustc.edu.cn:80/bsc/meiosis/index.html,2021-08-13,"First Affiliated Hospital of USTC, Hefei National Laboratory for Physical Sciences at Microscale, School of Basic Medical Sciences, Division of Life Sciences and Medicine, CAS Center for Excellence in Molecular Cell Science, University of Science and Technology of China, Hefei, China.","Jiang X, Zhao D, Ali A, Xu B, Liu W, Wen J, Zhang H, Shi Q, Zhang Y",,,0.0,"China, China" +34510194,Mollusca mitochondrial database,0.986387879,MODB,0.971826156,Mollusca mitochondrial database,0.986387879,1,http://modb.ytu.edu.cn,200,,"(31.2222,121.4581)",http://web.archive.org/web/20220501164159/http://modb.ytu.edu.cn/,2021-09-01,"College of Life Sciences, Yantai University, No.30 Qingquan Road, Laishan District, Yantai, Shandong 264005, China.","Qu J, Xu Y, Cui Y, Wu S, Wang L, Liu X, Xing Z, Guo X, Wang S, Li R, Sun X, Li X, Wang X, Liu T, Wang X",,"Doctoral Science Research Foundation of Yantai University, National Natural Science Foundation of China, Doctoral Science Research Foundation of Yantai University, Shandong Provincial Natural Science Foundation, China",0.0,China +34729303,MCDB,0.984939098,MCDB,0.984939098,Mitotic Catastrophe Database,0.963184257,1,http://www.combio-lezhang.online/MCDB/index_html,301,,"(29.4159,121.3397)",http://web.archive.org/web/20210714155651/http://www.combio-lezhang.online/MCDB/index_html/,2021-06-07,"Innovation Center of Nursing Research, West China Biomedical Big Data Center, State Key Laboratory of Biotherapy and Cancer Center, West China Hospital, College of Computer Science, and Collaborative Innovation Center of Biotherapy, Sichuan University, Chengdu 610065, China.","Zhang L, Zhang L, Guo Y, Xiao M, Feng L, Yang C, Wang G, Ouyang L",,,1.0,"China, China, China" +34900127,MIKB,0.997301161,MIKB,0.997301161,Myocardial infarction knowledge base,0.9902739,1,http://www.sysbio.org.cn/mikb,301,,"(22.2783,114.1747)",no_wayback,2021-11-16,"Institutes for Systems Genetics, Frontiers Science Center for Disease-related Molecular Network, West China Hospital, Sichuan University, Sichuan 610212, China.","Zhan C, Zhang Y, Liu X, Wu R, Zhang K, Shi W, Shen L, Shen K, Fan X, Ye F, Shen B",,,0.0,"China, China" +35424258,MeFSAT,0.988593876,MeFSAT,0.988593876,Medicinal Fungi Secondary metabolites And Therapeutics,0.926579752,1,http://cb.imsc.res.in/mefsat,301,,"(13.0156,80.2467)",no_wayback,2021-01-12,The Institute of Mathematical Sciences (IMSc) Chennai 600113 India asamal@imsc.res.in.,"Vivek-Ananth RP, Sahoo AK, Kumaravel K, Mohanraj K, Samal A",,"Science and Engineering Research Board, Department of Atomic Energy, Government of India, Max-Planck-Gesellschaft",0.0,India +35559777,MESOCOSM,0.996743441,MESOCOSM,0.996743441,,0,1,http://aliayadi.github.io/MESOCOSM-database,301,,"(37.7621,-122.3971)",no_wayback,2020-12-22,"CEREGE, CNRS, Aix Marseille Univ, IRD, INRAE, Coll France, Aix-en-Provence, France. Electronic address: ayadi@cerege.fr.","Ayadi A, Rose J, de Garidel-Thoron C, Hendren C, Wiesner MR, Auffan M",,"Horizon 2020, Agence Nationale de la Recherche",0.0,"France, France" +20672376,pfSNP,0.996151626,pfSNP,0.996151626,SNPs,0.563585818,1,http://pfs.nus.edu.sg,"HTTPConnectionPool(host='pfs.nus.edu.sg', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to pfs.nus.edu.sg timed out. (connect timeout=5)'))",,,no_wayback,2011-01-01,"Department of Biochemistry Yong Loo Lin School of Medicine, National University of Singapore, Singapore.","Wang J, Ronaghi M, Chong SS, Lee CG",,,19.0,"Singapore, Singapore" +21486466,OryzaPG,0.995268643,OryzaPG,0.995268643,,0,1,http://oryzapg.iab.keio.ac.jp,"HTTPConnectionPool(host='oryzapg.iab.keio.ac.jp', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to oryzapg.iab.keio.ac.jp timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160101180136/http://oryzapg.iab.keio.ac.jp/,2011-04-12,"Institute for Advanced Biosciences, Keio University, 403-1 Daihoji, Tsuruoka, Yamagata 997-0017, Japan.","Helmy M, Tomita M, Ishihama Y",,,28.0,Japan +21491493,Noncoded Amino acids Database,0.975250321,NCAD,0.637544513,Noncoded Amino acids Database,0.975250321,1,http://recerca.upc.edu/imem/index.htm,302,,"(41.3597,2.1003)",http://web.archive.org/web/20100612153125/http://recerca.upc.edu/imem/index.htm,2011-04-12,"Departament d'Enginyeria Química, ETS d'Enginyeria Industrial de Barcelona, Universitat Politècnica de Catalunya, Diagonal 647, 08028 Barcelona, Spain.","Revilla-López G, Rodríguez-Ropero F, Curcó D, Torras J, Isabel Calaza M, Zanuy D, Jiménez AI, Cativiela C, Nussinov R, Alemán C",,"Ministerio de Ciencia e Innovación - FEDER, Ministerio de Ciencia e Innovación - FEDER, Intramural Research Program of the NIH, Generalitat de Catalunya, Gobierno de Aragón, CCR NIH HHS, NCI NIH HHS, Ministerio de Ciencia e Innovación - FEDER, Center for Cancer Research, National Cancer Institute, National Institutes of Health, National Cancer Institute",4.0,Spain +21593080,ParkDB,0.997004271,ParkDB,0.997004271,,0,1,http://www2.cancer.ucl.ac.uk/Parkinson_Db2,404,,,http://web.archive.org/web/20130703154226/http://www2.cancer.ucl.ac.uk/Parkinson_Db2/,2011-05-18,"UCL, Department of Cancer Biology, University College London, Gower Street, London, UK.","Taccioli C, Tegnér J, Maselli V, Gomez-Cabrero D, Altobelli G, Emmett W, Lescai F, Gustincich S, Stupka E",,,14.0, +21731755,PanSNPdb,0.997955024,PanSNPdb,0.997955024,sian,0.570886314,1,http://www4a.biotec.or.th/PASNP,404,,,http://web.archive.org/web/20191225180055/http://www4a.biotec.or.th:80/PASNP,2011-06-23,"National Center for Genetic Engineering and Biotechnology (BIOTEC), Klong Luang, Pathumthani, Thailand.","Ngamphiw C, Assawamakin A, Xu S, Shaw PJ, Yang JO, Ghang H, Bhak J, Liu E, Tongsima S, ",,,28.0,Thailand +21735248,ORchestra,0.99726969,ORchestra,0.99726969,,0,1,http://www.utwente.nl/choir/orchestra,301,,"(52.2183,6.8958)",http://web.archive.org/web/20140908100132/http://www.utwente.nl/choir/orchestra/,2011-07-07,"University of Twente, P.O. Box 217, 7500 AE Enschede, The Netherlands. p.j.h.hulshof@utwente.nl","Hulshof PJ, Boucherie RJ, Essen JT, Hans EW, Hurink JL, Kortbeek N, Litvak N, Vanberkel PT, Veen Ev, Veltman B, Vliegen IM, Zonderland ME",,,2.0,Netherlands +21765097,PGAT,0.996497869,PGAT,0.996497869,Prokaryotic-genome Analysis Tool,0.875871877,1,http://nwrce.org/pgat,200,United States,"(42.6898,-84.6427)",http://web.archive.org/web/20220618020223/http://nwrce.org/pgat,2011-07-15,"Department of Microbiology, University of Washington, Seattle, WA 98195, USA. mbrittna@uw.edu","Brittnacher MJ, Fong C, Hayden HS, Jacobs MA, Radey M, Rohmer L",,"NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS",50.0,United States +21821666,NeuroPedia,0.996082842,NeuroPedia,0.996082842,,0,1,http://proteomics.ucsd.edu/Software/NeuroPedia.html,200,,"(32.7157,-117.1647)",http://web.archive.org/web/20220308000028/http://proteomics.ucsd.edu/Software/NeuroPedia.html,2011-08-05,"Department of Electrical and Computer Engineering, University of California, San Diego, La Jolla, California 92093-0744, USA.","Kim Y, Bark S, Hook V, Bandeira N",,"NIMH NIH HHS, NINDS NIH HHS, NIDA NIH HHS, NCRR NIH HHS, NIDA NIH HHS, NHLBI NIH HHS",27.0,United States +21884625,NFI-Regulome,0.677506616,NFI-Regulome,0.677506616,Database,0.495260537,1,http://nfiregulome.ccr.buffalo.edu,"HTTPConnectionPool(host='nfiregulome.ccr.buffalo.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20191101175413/http://nfiregulome.ccr.buffalo.edu:80/,2011-01-20,"Department of Biochemistry, State University of New York at Buffalo, 140 Farber Hall, Buffalo, NY, 14214, USA. rgron@buffalo.edu.","Gronostajski RM, Guaneri J, Lee DH, Gallo SM",,,11.0,United States +21890895,OPM,0.993578792,OPM,0.993578792,Orientations of Proteins in Membranes,0.970261431,1,"http://opm.phar.umich.edu, http://opm.phar.umich.edu/server.php","301, 301",,"(37.7621,-122.3971), (37.7621,-122.3971)","http://web.archive.org/web/20221102223433/https://opm.phar.umich.edu/, http://web.archive.org/web/20220814085924/https://opm.phar.umich.edu/server.php",2011-09-02,"Department of Medicinal Chemistry, College of Pharmacy, University of Michigan, 428 Church Street, Ann Arbor, MI 48109-1065 USA. almz@umich.edu","Lomize MA, Pogozheva ID, Joo H, Mosberg HI, Lomize AL",,"NIDA NIH HHS, NIDDK NIH HHS",589.0,United States +21928249,Nanosized Cancer Polymarker Biochip Project,0.891584954,RBLA0,0.827072576,Nanosized Cancer Polymarker Biochip Project,0.891584954,1,http://serviziweb.ulss12.ve.it/firbabo,"HTTPConnectionPool(host='serviziweb.ulss12.ve.it', port=80): Max retries exceeded with url: /firbabo (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,no_wayback,2011-07-01,"ABO Association (now ABO Foundation), c/o Regional Center for the Study of Biological Markers of Malignancy, Department of Clinical Pathology, AULSS 12, Venice - Italy.","Leon AE, Fabricio AS, Benvegnù F, Michilin S, Secco A, Spangaro O, Meo S, Gion M",,,1.0,Italy +"21976737, 27789697",PDBj,0.996294558,PDBj,0.996294558,Protein Data Bank Japan,0.880288279,2,http://pdbj.org,301,Japan,"(34.7615,135.5086)",http://web.archive.org/web/20221101024343/https://pdbj.org/,2016-10-26,"Institute for Protein Research and Immunology Frontier Research Center, Osaka University, 3-1 Yamadaoka, Suita, Osaka 565-0871, Japan., Institute for Protein Research, Osaka University, 3-2 Yamadaoka, Suita, Osaka 565-0871, Japan akinjo@protein.osaka-u.ac.jp.","Kinjo AR, Suzuki H, Yamashita R, Ikegawa Y, Kudou T, Igarashi R, Kengaku Y, Cho H, Standley DM, Nakagawa A, Nakamura H, Kinjo AR, Bekker GJ, Suzuki H, Tsuchiya Y, Kawabata T, Ikegawa Y, Nakamura H",", ",", ",101.0,"Japan, Japan" +22139910,NCBI Taxonomy,0.87996386,NCBI Taxonomy,0.87996386,,0,1,http://www.ncbi.nlm.nih.gov/taxonomy,301,,"(38.9896,-77.1538)",http://web.archive.org/web/20221111125217/https://www.ncbi.nlm.nih.gov/taxonomy/,2011-12-01,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA. federhen@ncbi.nlm.nih.gov",Federhen S,,Intramural NIH HHS,481.0,United States +"22139911, 25593349, 27899619, 31724716",neXtProt,0.98649776,neXtProt,0.98649776,,0,4,http://www.nextprot.org,301,,"(46.5290,6.5626)",http://web.archive.org/web/20221101152757/https://www.nextprot.org/,2020-01-01,"CALIPHO Group, Swiss Institute of Bioinformatics, CMU - 1, Rue Michel Servet 1211 Geneva 4, Switzerland., CALIPHO group, SIB Swiss Institute of Bioinformatics, Geneva, Switzerland, 1211 Department of Human Protein Sciences, Faculty of Medicine, University of Geneva, Geneva, Switzerland, 1211 pascale.gaudet@isb-sib.ch., CALIPHO group, SIB Swiss Institute of Bioinformatics, Geneva, Switzerland, 1206 pascale.gaudet@sib.swiss., CALIPHO group, SIB Swiss Institute of Bioinformatics, Geneva, Switzerland.","Lane L, Argoud-Puy G, Britan A, Cusin I, Duek PD, Evalet O, Gateau A, Gaudet P, Gleizes A, Masselot A, Zwahlen C, Bairoch A, Gaudet P, Michel PA, Zahn-Zabal M, Cusin I, Duek PD, Evalet O, Gateau A, Gleizes A, Pereira M, Teixeira D, Zhang Y, Lane L, Bairoch A, Gaudet P, Michel PA, Zahn-Zabal M, Britan A, Cusin I, Domagalski M, Duek PD, Gateau A, Gleizes A, Hinard V, Rech de Laval V, Lin J, Nikitin F, Schaeffer M, Teixeira D, Lane L, Bairoch A, Zahn-Zabal M, Michel PA, Gateau A, Nikitin F, Schaeffer M, Audot E, Gaudet P, Duek PD, Teixeira D, Rech de Laval V, Samarasinghe K, Bairoch A, Lane L",", , , ",", , , Swiss State Secretariat for Education, Research and Innovation",258.0,"Switzerland, Switzerland, Switzerland, Switzerland, Switzerland" +22369214,nsLTPDB,0.937543948,nsLTPDB,0.937543948,plant non-specific lipid transfer protein database,0.758426607,1,http://nsltpdb.life.nthu.edu.tw,"HTTPConnectionPool(host='nsltpdb.life.nthu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20220418011726/http://nsltpdb.life.nthu.edu.tw/,2012-01-17,"Institute of Bioinformatics and Structural Biology, National Tsing Hua University, Hsinchu, Taiwan.","Wang NJ, Lee CC, Cheng CS, Lo WC, Yang YF, Chen MN, Lyu PC",,,22.0, +22419844,NeMedPlant,0.994859576,NeMedPlant,0.994859576,,0,1,http://bif.uohyd.ac.in/nemedplant/orhttp://202.41.85.11/nemedplant,404,,,no_wayback,2012-02-28,None,"Meetei PA, Singh P, Nongdam P, Prabhu NP, Rathore R, Vindal V",,,6.0, +22517761,NGS,0.890904665,NGS,0.890904665,Next Generation Sequencing Catalog,0.871557927,1,http://bioinfo.mc.vanderbilt.edu/NGS/index.html,"HTTPConnectionPool(host='bioinfo.mc.vanderbilt.edu', port=80): Max retries exceeded with url: /NGS/index.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20151219040737/http://bioinfo.mc.vanderbilt.edu:80/NGS/index.html,2012-04-19,"Department of Biomedical Informatics, Vanderbilt University School of Medicine, Nashville, TN 37203, USA.","Xia J, Wang Q, Jia P, Wang B, Pao W, Zhao Z",,"NCI NIH HHS, NCI NIH HHS",22.0,United States +22535208,PaxDb,0.987990916,PaxDb,0.987990916,,0,1,http://pax-db.org,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20221101211224/https://pax-db.org/,2012-04-24,"Institute of Molecular Life Sciences, University of Zurich, Winterthurerstrasse 190, 8057 Zurich, Switzerland.","Wang M, Weiss M, Simonovic M, Haertinger G, Schrimpf SP, Hengartner MO, von Mering C",,,272.0,Switzerland +22563442,MycoRRdb,0.989182293,MycoRRdb,0.989182293,,0,1,http://mycorrdb.uohbif.in,"HTTPConnectionPool(host='mycorrdb.uohbif.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20191231191420/http://mycorrdb.uohbif.in:80/,2012-04-26,"Department of Biotechnology, School of Life Sciences, University of Hyderabad, Hyderabad, India.","Midha M, Prasad NK, Vindal V",,,6.0,India +22581809,NORM,0.674789657,NORM,0.674789657,,0,1,http://www.nirs.go.jp/db/anzendb/NORMDB/ENG/index.php,"HTTPConnectionPool(host='www.nirs.go.jp', port=80): Max retries exceeded with url: /db/anzendb/NORMDB/ENG/index.php (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20150214111012/http://www.nirs.go.jp:80/db/anzendb/NORMDB/ENG/index.php,2012-05-11,"Research Center for Radiation Protection, National Institute of Radiological Sciences, 4-9-1 Anagawa, Inage, Chiba 263-8555, Japan. iwaoka@nirs.go.jp","Iwaoka K, Yonehara H",,,1.0,Japan +"22674824, 27613420",PCDDB,0.998229384,PCDDB,0.998229384,Protein Circular Dichroism Data Bank,0.960739791,2,http://pcddb.cryst.bbk.ac.uk,302,United Kingdom,"(51.5219,-0.130315)",http://web.archive.org/web/20221019195050/https://pcddb.cryst.bbk.ac.uk/,2016-09-08,"School of Biological and Chemical Sciences, Queen Mary University of London, London, United Kingdom. r.w.janes@qmul.ac.uk, Institute of Structural and Molecular Biology, Birkbeck College, University of London, London WC1E 7HX, UK.","Janes RW, Miles AJ, Woollett B, Whitmore L, Klose D, Wallace BA, Whitmore L, Miles AJ, Mavridis L, Janes RW, Wallace BA",", ","Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",19.0,United Kingdom +22712730,PeanutDB,0.997910798,PeanutDB,0.997910798,,0,1,http://bioinfolab.muohio.edu/txid3818v1,"HTTPConnectionPool(host='bioinfolab.muohio.edu', port=80): Max retries exceeded with url: /txid3818v1 (Caused by ConnectTimeoutError(, 'Connection to bioinfolab.muohio.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200808235825/http://bioinfolab.muohio.edu/txid3818v1/,2012-06-19,"State Key Laboratory for Biology of Plant Diseases and Insect Pests, Institute of Plant Protection, Chinese Academy of Agricultural Sciences, Beijing 100193, China.","Duan X, Schmidt E, Li P, Lenox D, Liu L, Shu C, Zhang J, Liang C",,NIGMS NIH HHS,7.0,China +22768977,OTD,0.98326385,OTD,0.98326385,Oomycete Transcriptomics Database,0.776904251,1,http://www.eumicrobedb.org/transcripts,404,,,http://web.archive.org/web/20210408031001/http://www.eumicrobedb.org/transcripts/,2012-07-06,"Virginia Bioinformatics Institute, Virginia Tech, Blacksburg, VA 24061, USA. tsucheta@gmail.com","Tripathy S, Deo T, Tyler BM",,,3.0,United States +22800569,PCBOST,0.969115496,PCBOST,0.969115496,Protein Classification Based on Structural Trees,0.917069605,1,http://strees.protres.ru,200,,"(54.8325,37.6195)",http://web.archive.org/web/20220402080448/http://strees.protres.ru/,2012-07-16,"Institute of Protein Research, Russian Academy of Sciences, Pushchino, Moscow Region, 142290, Russian Federation.","Gordeev AB, Efimov AV",,,3.0,Russian Federation +22833564,NESdb,0.996034861,NESdb,0.996034861,,0,1,http://prodata.swmed.edu/LRNes,301,,"(32.8252,-96.8388)",http://web.archive.org/web/20220420041009/http://prodata.swmed.edu/LRNes/,2012-07-25,"Department of Pharmacology, University of Texas Southwestern Medical Center at Dallas, Dallas, TX 75390, USA.","Xu D, Grishin NV, Chook YM",,"NIGMS NIH HHS, Howard Hughes Medical Institute, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",84.0,United States +23046413,PAGED,0.99276948,PAGED,0.99276948,Pathway And Gene Enrichment Database,0.900361799,1,http://bio.informatics.iupui.edu/PAGED,302,,"(39.2014,-85.9214)",no_wayback,2012-09-11,"School of Informatics, Indiana University, Indianapolis, IN 46202, USA.","Huang H, Wu X, Sonachalam M, Mandape SN, Pandey R, MacDorman KF, Wan P, Chen JY",,,4.0,United States +23084601,PESNPdb,0.998070478,PESNPdb,0.998070478,,0,1,http://bejerano.stanford.edu/pesnpdb,301,United States,"(37.423,-122.1639)",no_wayback,2012-10-18,"Department of Developmental Biology, Stanford University, Stanford, CA 94305, USA.","Tuteja G, Cheng E, Papadakis H, Bejerano G",,,14.0,United States +23084778,NSort/DB,0.925428107,NSort/DB,0.925428107,,0,1,http://www.nsort.org/db,"HTTPConnectionPool(host='www.nsort.org', port=80): Max retries exceeded with url: /db (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-07-25,"School of Chemistry and Molecular Biosciences, The University of Queensland, St. Lucia, Australia. willadsen@uq.edu.au","Willadsen K, Mohamad N, Bodén M",,,4.0,Australia +"23093593, 28053164",PaVE,0.990848164,PaVE,0.990848164,Papillomavirus Episteme,0.861218005,2,http://pave.niaid.nih.gov,302,,"(39.0437,-77.4875)",http://web.archive.org/web/20221021075622/https://pave.niaid.nih.gov/,2016-10-05,"DNA Tumor Virus Section, Laboratory of Viral Diseases, Office of Cyber Infrastructure and Computational Biology, National Institute of Allergy and Infectious Diseases, National Institutes of Health, Bethesda, MD 209892, USA., DNA Tumor Virus Section, Laboratory of Viral Diseases, National Institute of Allergy and Infectious Diseases, National Institutes of Health, Bethesda, MD 209892, USA.","Van Doorslaer K, Tan Q, Xirasagar S, Bandaru S, Gopalan V, Mohamoud Y, Huyen Y, McBride AA, Van Doorslaer K, Li Z, Xirasagar S, Maes P, Kaminsky D, Liou D, Sun Q, Kaur R, Huyen Y, McBride AA",", ","PHS HHS, Intramural NIH HHS, ",238.0,"United States, United States" +23125372,non-B,0.956009358,non-B,0.956009358,,0,1,http://nonb.abcc.ncifcrf.gov,"HTTPConnectionPool(host='nonb.abcc.ncifcrf.gov', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20120419114013/http://nonb.abcc.ncifcrf.gov/,2012-11-03,"Advanced Biomedical Computing Center, Information Systems Program, SAIC-Frederick, Inc., Frederick, MD 21702, USA.","Cer RZ, Donohue DE, Mudunuri US, Temiz NA, Loss MA, Starner NJ, Halusa GN, Volfovsky N, Yi M, Luke BT, Bacolla A, Collins JR, Stephens RM",,PHS HHS,64.0,United States +23153078,P2TF,0.991170208,P2TF,0.991170208,Predicted Prokaryotic Transcription Factors,0.982642752,1,http://www.p2tf.org,200,,"(48.6833,2.1333)",http://web.archive.org/web/20220518162222/http://www.p2tf.org/,2012-11-15,"CEA, DSV, IBEB, SBVME, LEMiRE, Saint-Paul-lez-Durance, France.","Ortet P, De Luca G, Whitworth DE, Barakat M",,,20.0,France +23180785,PeroxiBase,0.998095334,PeroxiBase,0.998095334,,0,1,http://peroxibase.toulouse.inra.fr,200,France,"(43.5585,1.6501)",http://web.archive.org/web/20221025110112/https://peroxibase.toulouse.inra.fr/,2012-11-24,"Université de Toulouse, UPS, UMR 5546, Laboratoire de Recherche en Sciences Végétales, France.","Fawal N, Li Q, Savelli B, Brette M, Passaia G, Fabre M, Mathé C, Dunand C",,,70.0,France +"23180799, 27987177",PGDD,0.996008992,PGDD,0.996008992,Plant Genome Duplication Database,0.988166434,2,http://chibba.agtec.uga.edu/duplication,"HTTPConnectionPool(host='chibba.agtec.uga.edu', port=80): Max retries exceeded with url: /duplication (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20201004013034/http://chibba.agtec.uga.edu/duplication/,2017-01-01,"Plant Genome Mapping Laboratory, University of Georgia, Athens, GA 30602, USA., Genomics Division, Department of Agricultural Bio-resource, National Academy of Agricultural Science, Rural Development Administration (RDA), Jeonju, South Korea.","Lee TH, Tang H, Wang X, Paterson AH, Lee TH, Kim J, Robertson JS, Paterson AH",", ",", ",289.0,"Georgia, United States" +23189029,Myosinome,0.969296992,Myosinome,0.969296992,,0,1,http://caps.ncbs.res.in/myosinome,301,,"(13.0637,77.5674)",http://web.archive.org/web/20210920193209/http://caps.ncbs.res.in/myosinome/,2012-11-12,"National Centre for Biological Sciences (NCBS-TIFR), GKVK Campus, Bangalore, India. ; Sugarcane Breeding Institute (SBI-ICAR), Coimbatore, India.","Syamaladevi DP, Sunitha MS, Kalaimathy S, Reddy CC, Iftekhar M, Pasha SN, Sowdhamini R",,,5.0,"India, India" +23196988,NURBS,0.996146083,NURBS,0.996146083,,0,1,http://shark.abl.ku.edu/nurbs,"HTTPConnectionPool(host='shark.abl.ku.edu', port=80): Max retries exceeded with url: /nurbs (Caused by ConnectTimeoutError(, 'Connection to shark.abl.ku.edu timed out. (connect timeout=5)'))",,,no_wayback,2012-11-29,"Applied Bioinformatics Laboratory, University of Kansas, Lawrence, KS 66047, USA. jwfang@ku.edu","Fang Y, Liu HX, Zhang N, Guo GL, Wan YJ, Fang J",,"NCI NIH HHS, NIDDK NIH HHS, NIDDK NIH HHS, NIDDK NIH HHS, NIDDK NIH HHS, NCI NIH HHS",4.0,United States +23203867,NetwoRx,0.998048842,NetwoRx,0.998048842,,0,1,http://ophid.utoronto.ca/networx,302,,"(43.7001,-79.4163)",http://web.archive.org/web/20221024153918/https://ophid.utoronto.ca/networx/,2012-11-29,"Department of Medical Biophysics, University of Toronto, Toronto, ON M5G 2M9, Canada.","Fortney K, Xie W, Kotlyar M, Griesman J, Kotseruba Y, Jurisica I",,Canadian Institutes of Health Research,6.0,Canada +23203876,OrtholugeDB,0.973012209,OrtholugeDB,0.973012209,,0,1,http://www.pathogenomics.sfu.ca/ortholugedb,301,,"(49.2497,-123.1193)",http://web.archive.org/web/20200220034209/http://www.pathogenomics.sfu.ca:80/ortholugedb/,2012-11-29,"Department of Molecular Biology and Biochemistry, Simon Fraser University, Burnaby, British Columbia V5A 1S6, Canada.","Whiteside MD, Winsor GL, Laird MR, Brinkman FS",,,40.0,Canada +23203877,NPACT,0.997277677,NPACT,0.997277677,Naturally Occurring Plant-based Anti-cancer Compound-Activity-Target database,0.987839665,1,http://crdd.osdd.net/raghava/npact,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/npact (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220122044121/http://crdd.osdd.net/raghava/npact/,2012-11-29,"Bioinformatics Division, Institute of Cytology and Preventive Oncology, I-7 Sector-39, Noida-201301, India.","Mangal M, Sagar P, Singh H, Raghava GP, Agarwal SM",,,71.0,India +23203890,OrysPSSP,0.995186687,OrysPSSP,0.995186687,,0,1,http://www.genoportal.org/PSSP/index.do,404,,,http://web.archive.org/web/20220621071147/http://www.genoportal.org/PSSP/index.do,2012-11-29,"Key Laboratory of Synthetic Biology, Institute of Plant Physiology and Ecology, Shanghai Institutes for Biological Sciences, Chinese Academy of Sciences, Shanghai 200031, China.","Pan B, Sheng J, Sun W, Zhao Y, Hao P, Li X",,,9.0,China +23203988,PDBTM,0.995859504,PDBTM,0.995859504,Protein Data Bank of transmembrane proteins,0.802712626,1,http://pdbtm.enzim.hu,400,,,http://web.archive.org/web/20221022041836/http://pdbtm.enzim.hu/,2012-11-30,"Lendület Membrane Protein Bioinformatics Research Group and Protein Structure Research Group, Institute of Enzymology, MTA RCNS, PO Box 7, H-1518 Budapest, Hungary.","Kozma D, Simon I, Tusnády GE",,,109.0,Hungary +23272737,OPTIMAS-DW,0.971398151,OPTIMAS-DW,0.971398151,,0,1,http://www.optimas-bioenergy.org/optimas_dw,302,,"(49.0151,12.1016)",http://web.archive.org/web/20211208224859/http://www.optimas-bioenergy.org/optimas_dw,2012-12-29,"Leibniz Institute of Plant Genetics and Crop Plant Research (IPK), 06466 Stadt Seeland, Corrensstr. 3, 06466 Stadt Seeland, OT Gatersleben, Germany.","Colmsee C, Mascher M, Czauderna T, Hartmann A, Schlüter U, Zellerhoff N, Schmitz J, Bräutigam A, Pick TR, Alter P, Gahrtz M, Witt S, Fernie AR, Börnke F, Fahnenstich H, Bucher M, Dresselhaus T, Weber AP, Schreiber F, Scholz U, Sonnewald U",,,20.0,Germany +23275726,MycoProtease-DB,0.981270339,MycoProtease-DB,0.981270339,,0,1,http://www.bicjbtdrc-mgims.in/MycoProtease-DB,"HTTPConnectionPool(host='www.bicjbtdrc-mgims.in', port=80): Max retries exceeded with url: /MycoProtease-DB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200503185030/http://www.bicjbtdrc-mgims.in:80/MycoProtease-DB/,2012-12-08,"Bioinformatics Centre, JB Tropical Disease Research Centre, Mahatma Gandhi Institute of Medical Sciences, Sevagram (Wardha) 442102, Maharashtra, India.","Jena L, Kumar S, Harinath BC",,,1.0,India +23282181,PCDq,0.992855191,PCDq,0.992855191,complex,0.558051407,1,http://h-invitational.jp/hinv/pcdq,301,,"(35.6910,139.7679)",http://web.archive.org/web/20221017001219/http://h-invitational.jp/hinv/pcdq/,2012-12-12,"Integrated Databases and Systems Biology Team, Biological Information Research Center, National Institute of Advanced Industrial Science and Technology (AIST), Tokyo, Japan.","Kikugawa S, Nishikata K, Murakami K, Sato Y, Suzuki M, Altaf-Ul-Amin M, Kanaya S, Imanishi T",,,23.0,Japan +23286825,NeuroDNet,0.995239019,NeuroDNet,0.995239019,,0,1,http://bioschool.iitd.ac.in/NeuroDNet,302,,"(28.6453,77.2128)",http://web.archive.org/web/20220528132006/https://bioschool.iitd.ac.in/NeuroDNet/,2013-01-03,"Kusuma School of Biological Sciences, Indian Institute of Technology Delhi, New Delhi, India.","Vasaikar SV, Padhi AK, Jayaram B, Gomes J",,,19.0,India +23322530,NCDR CathPCI,0.706379139,NCDR CathPCI,0.706379139,,0,1,http://www.ncdr.com,302,,"(47.6036,-122.3256)",http://web.archive.org/web/20080828105450/http://www.ncdr.com/,2013-01-15,"Division of Cardiology, Mayo Clinic, Jacksonville, Florida, USA.","Moussa I, Hermann A, Messenger JC, Dehmer GJ, Weaver WD, Rumsfeld JS, Masoudi FA",,,64.0,United States +23448274,PASmiR,0.993088782,PASmiR,0.993088782,,0,1,"http://hi.ustc.edu.cn:8080/PASmiR, http://pcsb.ahau.edu.cn:8080/PASmiR","HTTPConnectionPool(host='hi.ustc.edu.cn', port=8080): Max retries exceeded with url: /PASmiR (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known')), 500",,", ","http://web.archive.org/web/20140722185710/http://hi.ustc.edu.cn:8080/PASmiR/, http://web.archive.org/web/20140722204640/http://pcsb.ahau.edu.cn:8080/PASmiR/",2013-03-01,"School of life sciences, Anhui Agricultural University, Hefei 230036, China.","Zhang S, Yue Y, Sheng L, Wu Y, Fan G, Li A, Hu X, Shangguan M, Wei C",,,33.0,China +23696878,PEpiD,0.998079658,PEpiD,0.998079658,Prostate Epigenetic Database,0.979941408,1,http://wukong.tongji.edu.cn/pepid,302,China,"(40.0018,116.333)",http://web.archive.org/web/20181012003137/http://wukong.tongji.edu.cn:80/pepid,2013-05-16,"Shanghai Key Laboratory of Signaling and Disease Research, Department of Bioinformatics, Shanghai Tenth People's Hospital, The School of Life Sciences and Technology, Tongji University, Shanghai, China.","Shi J, Hu J, Zhou Q, Du Y, Jiang C",,,6.0,China +23874618,NanoMiner,0.984119594,NanoMiner,0.984119594,,0,1,http://nanominer.cs.tut.fi,302,,"(61.4957,23.8041)",no_wayback,2013-07-12,"Department of Signal Processing, Tampere University of Technology, Tampere, Finland.","Kong L, Tuomela S, Hahne L, Ahlfors H, Yli-Harja O, Fadeel B, Lahesmaa R, Autio R",,,7.0,Finland +23952586,MycoSec,0.997377336,MycoSec,0.997377336,Mycobacterium,0.634468615,1,http://www.bicnbu.in/mycosec,"HTTPConnectionPool(host='www.bicnbu.in', port=80): Max retries exceeded with url: /mycosec (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200228041824/http://www.bicnbu.in:80/mycosec/,2013-08-16,"1 Bioinformatics Facility, Department of Botany, University of North Bengal , Siliguri, India .","Roy A, Bhattacharya S, Bothra AK, Sen A",,,5.0,India +23996831,NAPS,0.940910459,NAPS,0.940910459,Nencki Affective Picture System,0.825295125,1,http://naps.nencki.gov.pl,301,,"(52.2298,21.0118)",http://web.archive.org/web/20220617013059/https://naps.nencki.gov.pl/,2014-06-01,"Laboratory of Brain Imaging, Neurobiology Centre, Nencki Institute of Experimental Biology, Warsaw, Poland, a.marchewka@nencki.gov.pl.","Marchewka A, Zurawski Ł, Jednoróg K, Grabowska A",,,149.0,Poland +24174539,pE-DB,0.995654404,pE-DB,0.995654404,,0,1,http://pedb.vib.be,"HTTPConnectionPool(host='pedb.vib.be', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20191211120520/http://pedb.vib.be:80/,2013-10-29,"VIB Department of Structural Biology, Vrije Universiteit Brussel, Brussels, European Molecular Biology Laboratory, Hamburg Unit, EMBL c/o DESY, Hamburg, Germany, CEA, CNRS, UJF-Grenoble 1, Protein Dynamics and Flexibility, Institut de Biologie Structurale Jean-Pierre Ebel, 41 Rue Jules Horowitz, Grenoble 38027, France, Indiana University School of Medicine; Indianapolis, IN, USA, Department of Chemistry, Center of Magnetic Resonance (CERM), University of Florence, Sesto Fiorentino, Italy, Molecular Structure and Function Program, Hospital for Sick Children, Toronto, Ontario, Canada, Department of Biochemistry, University of Toronto, Toronto, Ontario, Canada, Department of Structural Biology, St. Jude Children's Research Hospital, Memphis, TN, USA, Department of Structural Biology, Weizmann Institute of Science, Rehovot 76100, Israel, Department of Molecular Medicine and USF Health Byrd Alzheimer's Research Institute, Morsani College of Medicine, University of South Florida, Tampa, FL, USA, Institute for Biological Instrumentation, Russian Academy of Sciences, Pushchino, Moscow Region, Russia, Department of Chemistry, University of Cambridge, Cambridge, UK, Departments of Biological Sciences and Computing Science, University of Alberta, Edmonton, AB T6G 2E8, Canada, Department of Integrative Structural and Computational Biology, The Scripps Research Institute, La Jolla, CA, USA Institute of Enzymology, Research Centre for Natural Sciences, Hungarian Academy of Sciences, Budapest.","Varadi M, Kosol S, Lebrun P, Valentini E, Blackledge M, Dunker AK, Felli IC, Forman-Kay JD, Kriwacki RW, Pierattelli R, Sussman J, Svergun DI, Uversky VN, Vendruscolo M, Wishart D, Wright PE, Tompa P",,,89.0,"Canada, Canada, Canada, Germany, France, Israel, Italy, United States, United States, United States, United States" +24174541,P-MITE,0.894164824,P-MITE,0.894164824,plant MITE databases,0.776570714,1,http://pmite.hzau.edu.cn/django/mite,301,,"(30.5833,114.2667)",http://web.archive.org/web/20211224133604/http://pmite.hzau.edu.cn/django/mite/,2013-10-29,"Department of Vegetable Crops, Key Laboratory of Horticulture Biology, Ministry of Education, College of Horticulture and Forestry Sciences, Huazhong Agricultural University, Wuhan, 430070, P. R. China.","Chen J, Hu Q, Zhang Y, Lu C, Kuang H",,,55.0,China +24214996,Negatome,0.987791359,Negatome,0.987791359,,0,1,http://mips.helmholtz-muenchen.de/proj/ppi/negatome,301,,"(48.2500,11.5667)",http://web.archive.org/web/20220728184642/https://mips.helmholtz-muenchen.de/proj/ppi/negatome/,2013-11-08,"Institute for Bioinformatics and Systems Biology/MIPS, HMGU - German Research Center for Environmental Health, Ingolstaedter Landstrasse 1, 85764 Neuherberg, Germany, Clueda AG, Elsenheimerstraße 59, 80687 Munich, Germany and Department of Genome Oriented Bioinformatics, Technische Universitaet Muenchen Wissenschaftszentrum Weihenstephan, 85350 Freising, Germany.","Blohm P, Frishman G, Smialowski P, Goebels F, Wachinger B, Ruepp A, Frishman D",,,47.0,"Germany, Germany, Germany" +24229347,NeuroGeM,0.99263829,NeuroGeM,0.99263829,,0,1,http://chibi.ubc.ca/neurogem,404,,,http://web.archive.org/web/20170802031037/http://chibi.ubc.ca:80/neurogem/,2013-11-14,None,"Na D, Rouf M, O'Kane CJ, Rubinsztein DC, Gsponer J",,"CIHR, Medical Research Council, Wellcome Trust, Wellcome Trust",9.0, +"24271385, 27794041",NGSmethDB,0.998551369,NGSmethDB,0.998551369,,0,2,http://bioinfo2.ugr.es/NGSmethDB,301,,"(37.1882,-3.6067)",http://web.archive.org/web/20220226095845/https://bioinfo2.ugr.es/NGSmethDB/,2016-10-27,"Facultad de Ciencias, Departmento de Genética, Universidad de Granada, 18071-Granada, Spain and Laboratorio de Bioinformática, Instituto de Biotecnología, Centro de Investigación Biomédica, 18100-Granada, Spain., Department of Genetics, Faculty of Science, University of Granada, Campus de Fuentenueva s/n, 18071-Granada, Spain.","Geisen S, Barturen G, Alganza ÁM, Hackenberg M, Oliver JL, Lebrón R, Gómez-Martín C, Carpena P, Bernaola-Galván P, Barturen G, Hackenberg M, Oliver JL",", ",", ",13.0,"Spain, Spain, Spain" +24271386,OnTheFly,0.997192144,OnTheFly,0.997192144,,0,1,http://bhapp.c2b2.columbia.edu/OnTheFly/index.php,301,,"(40.7143,-74.0060)",http://web.archive.org/web/20210209174228/https://bhapp.c2b2.columbia.edu/OnTheFly/index.php,2013-11-22,"Howard Hughes Medical Institute, Department of Biochemistry and Molecular Biophysics, Department of Systems Biology, Center for Computational Biology and Bioinformatics, Columbia University, 1130 St. Nicholas Avenue, New York, NY 10032, USA, Department of Life Science, Open University of Israel, Ra'anana 43107, Israel and Department of Biochemistry and Molecular Biophysics, Columbia University, 701 West 168th Street, HHSC 1104, New York, NY 10032, USA.","Shazman S, Lee H, Socol Y, Mann RS, Honig B",,"NCI NIH HHS, NIGMS NIH HHS, Howard Hughes Medical Institute",20.0,"Israel, Israel, United States, United States" +24271398,NeXO,0.978267789,NeXO,0.978267789,,0,1,http://www.nexontology.org)-an,"HTTPConnectionPool(host='www.nexontology.org)-an', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-11-23,"Department of Medicine, University of California San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA.","Dutkowski J, Ono K, Kramer M, Yu M, Pratt D, Demchak B, Ideker T",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",9.0,United States +24297253,MycoCosm,0.99669534,MycoCosm,0.99669534,,0,1,http://jgi.doe.gov/fungi,301,,"(41.2619,-95.8608)",no_wayback,2013-12-01,"US Department of Energy Joint Genome Institute, 2800 Mitchell Drive, Walnut Creek, CA 94598, USA.","Grigoriev IV, Nikitin R, Haridas S, Kuo A, Ohm R, Otillar R, Riley R, Salamov A, Zhao X, Korzeniewski F, Smirnova T, Nordberg H, Dubchak I, Shabalov I",,,475.0,United States +24297257,NECTAR,0.995319307,NECTAR,0.995319307,Non-synonymous Enriched Coding muTation ARchive,0.943871379,1,http://nectarmutation.org,200,,"(35.5372,129.3167)",http://web.archive.org/web/20170926005622/http://nectarmutation.org/,2013-12-01,"NIHR Cardiovascular Biomedical Research Unit, Royal Brompton and Harefield NHS Foundation Trust and Imperial College London, London SW3 6NP, UK, National Heart and Lung Institute, Imperial College, London SW3 6LY, UK, National Heart Centre Singapore, Singapore 168752, Singapore and Cardiovascular & Metabolic Disorders, Duke National University of Singapore, Singapore 169857, Singapore.","Gong S, Ware JS, Walsh R, Cook SA",,"National Institute for Health Research (NIHR), Versus Arthritis, Wellcome Trust, Medical Research Council, British Heart Foundation",2.0,"Singapore, Singapore, Singapore, Singapore, Singapore, Singapore" +"24311565, 29106611",mVOC,0.945702612,mVOC,0.945702612,,0,2,http://bioinformatics.charite.de/mvoc,301,,"(52.5244,13.4105)",http://web.archive.org/web/20221019091638/https://bioinformatics.charite.de/mvoc/,2018-01-01,"University of Rostock, Institute of Biological Sciences, Rostock 18059, Germany, Charité-University Medicine Berlin, Structural Bioinformatics Group, Institute of Physiology & Experimental Clinical Research Center, Berlin 13125, Germany and Charité-University Medicine Berlin, Division of General Pediatrics, Department of Pediatric Oncology and Hematology, Berlin 13353, Germany., University of Rostock, Institute for Biological Sciences, Albert-Einstein-Strasse 3, 18059 Rostock, Germany.","Lemfack MC, Nickel J, Dunkel M, Preissner R, Piechulla B, Lemfack MC, Gohlke BO, Toguem SMT, Preissner S, Piechulla B, Preissner R",", ",", ",149.0,"Germany, Germany, Germany, Germany" +24312499,PaGenBase,0.997129798,PaGenBase,0.997129798,,0,1,http://bioinf.xmu.edu.cn/PaGenBase,302,,"(39.9906,116.2887)",http://web.archive.org/web/20220308062600/http://bioinf.xmu.edu.cn/PaGenBase/,2013-12-02,"Department of Chemical Biology, College of Chemistry and Chemical Engineering, The Key Laboratory for Chemical Biology of Fujian Province, Xiamen University, Xiamen, Fujian, PR China.","Pan JB, Hu SC, Shi D, Cai MC, Li YB, Zou Q, Ji ZL",,,53.0,China +24406170,PeptiSite,0.997222483,PeptiSite,0.997222483,,0,1,http://peptisite.ucsd.edu,"HTTPConnectionPool(host='peptisite.ucsd.edu', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to peptisite.ucsd.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20180618144650/http://peptisite.ucsd.edu:80/,2014-01-06,"UCSD, Skaggs School of Pharmacy and Pharmaceutical Sciences, La Jolla, CA 92093, USA.","Acharya C, Kufareva I, Ilatovskiy AV, Abagyan R",,"NLM NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",5.0,United States +24428888,OncomiRdbB,0.993847489,OncomiRdbB,0.993847489,,0,1,http://tdb.ccmb.res.in/OncomiRdbB/index.htm,200,,"(17.3840,78.4564)",http://web.archive.org/web/20220322175246/http://tdb.ccmb.res.in/OncomiRdbB/index.htm,2014-01-15,None,"Khurana R, Verma VK, Rawoof A, Tiwari S, Nair RA, Mahidhara G, Idris MM, Clarke AR, Kumar LD",,,16.0, +24501396,nifH,0.961427331,nifH,0.961427331,,0,1,http://www.css.cornell.edu/faculty/buckley/nifh.htm,301,,"(42.3751,-71.1056)",http://web.archive.org/web/20180323172449/http://www.css.cornell.edu:80/faculty/buckley/nifh.htm,2014-02-05,"Department of Crop and Soil Sciences, Cornell University, Ithaca, NY 14853, USA.","Gaby JC, Buckley DH",,,58.0,United States +24569397,PCMdb,0.994843006,PCMdb,0.994843006,Pancreatic Cancer Methylation Database,0.990767524,1,http://crdd.osdd.net/raghava/pcmdb,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/pcmdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220421102512/http://crdd.osdd.net/raghava/pcmdb/,2014-02-26,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh-160036, India.","Nagpal G, Sharma M, Kumar S, Chaudhary K, Gupta S, Gautam A, Raghava GP",,,13.0,India +24573879,NONATObase,0.994081736,NONATObase,0.994081736,,0,1,http://nonatobase.ufsc.br,200,,"(-27.5967,-48.5492)",http://web.archive.org/web/20210309165324/http://nonatobase.ufsc.br/,2014-02-25,"Departamento de Geociências, CFH, Universidade Federal de Santa Catarina, 88040-970 Florianópolis, Santa Catarina, Brazil, Núcleo de Estudos do Mar, CCB, Universidade Federal de Santa Catarina, 88040-900 Florianópolis, Santa Catarina, Brazil, Que?Art, 80.520-590 Curitiba, Paraná, Brazil, Centro de Estudos do Mar, Universidade Federal do Paraná, 83255-000 Pontal do Sul, Paraná, Brazil, Departamento de Zoologia, Instituto de Biologia, Universidade Estadual de Campinas, 13083-970 Campinas, São Paulo, Brazil and Instituto de Biociências, Universidade de São Paulo, 05508-090 São Paulo, Brazil.","Pagliosa PR, Doria JG, Misturini D, Otegui MB, Oortman MS, Weis WA, Faroni-Perez L, Alves AP, Camargo MG, Amaral AC, Marques AC, Lana PC",,,0.0,"Brazil, Brazil, Brazil, Brazil, Brazil, Brazil" +24592289,MycobacRV,0.986873567,MycobacRV,0.986873567,,0,1,http://mycobacteriarv.igib.res.in,301,,"(26.7907,75.2061)",http://web.archive.org/web/20220615153230/https://mycobacteriarv.igib.res.in/,2014-02-15,"CSIR-Institute of Genomics and Integrative Biology, Near Jubilee Hall, Mall Road, Delhi, 110 007 India.","Chaudhuri R, Kulshreshtha D, Raghunandanan MV, Ramachandran S",,,9.0,India +24651967,oncomiRDB,0.994694889,oncomiRDB,0.994694889,,0,1,http://bioinfo.au.tsinghua.edu.cn/oncomirdb/Contact,500,,,no_wayback,2014-03-20,"MOE Key Laboratory of Bioinformatics, TNLIST Bioinformatics Division / Center for Synthetic and Systems Biology, Department of Automation, Tsinghua University, Beijing 100084, China.","Wang D, Gu J, Wang T, Ding Z",,,67.0,China +24666037,NRLiSt BDB,0.936061025,NRLiSt BDB,0.936061025,,0,1,http://nrlist.drugdesign.fr,200,,"(50.6942,3.1746)",http://web.archive.org/web/20221017075602/http://nrlist.drugdesign.fr/,2014-03-25,"Laboratoire Génomique, Bioinformatique et Applications, EA 4627, Conservatoire National des Arts et Métiers , 292 Rue Saint Martin, 75003 Paris, France.","Lagarde N, Ben Nasr N, Jérémie A, Guillemain H, Laville V, Labib T, Zagury JF, Montes M",,,19.0,France +24723423,OrthoMaM,0.997223616,OrthoMaM,0.997223616,,0,1,http://www.orthomam.univ-montp2.fr,200,,"(43.6109,3.8763)",http://web.archive.org/web/20221004165145/http://www.orthomam.univ-montp2.fr/,2014-04-09,"Institut des Sciences de l'Evolution de Montpellier (ISE-M), UMR 5554 CNRS IRD, Université Montpellier 2, Montpellier, France emmanuel.douzery@univ-montp2.fr.","Douzery EJ, Scornavacca C, Romiguier J, Belkhir K, Galtier N, Delsuc F, Ranwez V",,,42.0,France +24727366,PathoPlant,0.994976938,PathoPlant,0.994976938,,0,1,http://www.pathoplant.de/expression_analysis.php,200,,"(52.2680,10.5200)",http://web.archive.org/web/20220303122455/http://www.pathoplant.de/expression_analysis.php,2014-04-10,"Institut für Genetik, Technische Universität Braunschweig, Spielmannstr 7, 38106 Braunschweig, Germany.","Bolívar JC, Machens F, Brill Y, Romanov A, Bülow L, Hehl R",,,6.0,Germany +24839966,PCD,0.993127882,PCD,0.993127882,Pancreatic Cancer Database,0.967810631,1,http://www.pancreaticcancerdatabase.org,200,United States,"(39.0438,-77.4874)",http://web.archive.org/web/20220709012703/http://www.pancreaticcancerdatabase.org/,2014-05-19,"Institute of Bioinformatics; International Technology Park; Bangalore, India; Amrita School of Biotechnology; Amrita Vishwa Vidyapeetham; Kollam, Kerala India.","Thomas JK, Kim MS, Balakrishnan L, Nanjappa V, Raju R, Marimuthu A, Radhakrishnan A, Muthusamy B, Khan AA, Sakamuri S, Tankala SG, Singal M, Nair B, Sirdeshmukh R, Chatterjee A, Prasad TS, Maitra A, Gowda H, Hruban RH, Pandey A",,NIDDK NIH HHS,27.0,"India, India" +24923818,ParaPep,0.996673524,ParaPep,0.996673524,,0,1,http://crdd.osdd.net/raghava/parapep,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/parapep (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220428023509/http://crdd.osdd.net/raghava/parapep/,2014-06-12,"Cell biology and Immunology Division and Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh-160036, India.","Mehta D, Anand P, Kumar V, Joshi A, Mathur D, Singh S, Tuknait A, Chaudhary K, Gautam SK, Gautam A, Varshney GC, Raghava GP",,,27.0,India +24939129,PeptideAtlas,0.972864509,PeptideAtlas,0.972864509,,0,1,http://www.peptideatlas.org,200,United States,"(47.8353,-122.284)",http://web.archive.org/web/20220903162501/http://www.peptideatlas.org/,2014-06-17,"Institute for Systems Biology, Seattle, Washington.","Kusebauch U, Deutsch EW, Campbell DS, Sun Z, Farrah T, Moritz RL",,"NHGRI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",14.0, +25024350,OMICtools,0.998182535,OMICtools,0.998182535,,0,1,http://omictools.com,"HTTPConnectionPool(host='omictools.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20221017122329/http://omictools.com/,2014-07-14,"Haute-Normandie-INSERM ERI-28, Institute for Research and Innovation in Biomedicine of Rouen University, 76183 Rouen, France, Center for Research in Biological Systems, University of California, San Diego, 9500 Gilman Dr. La Jolla, CA 92093, USA and STATSARRAY, 76300 Sotteville-lès-Rouen, France.","Henry VJ, Bandrowski AE, Pepin AS, Gonzalez BJ, Desfeux A",,,72.0,"Eritrea, France, France, United States" +25102069,Panorama,0.978761613,Panorama,0.978761613,,0,1,http://panoramaweb.org,302,United States,"(47.6722,-122.1257)",http://web.archive.org/web/20221101180951/https://panoramaweb.org/,2014-08-18,"University of Washington , Seattle, Washington 98195, United States.","Sharma V, Eckels J, Taylor GK, Shulman NJ, Stergachis AB, Joyner SA, Yan P, Whiteaker JR, Halusa GN, Schilling B, Gibson BW, Colangelo CM, Paulovich AG, Carr SA, Jaffe JD, MacCoss MJ, MacLean B",,"NCI NIH HHS, National Cancer Institute, NCATS NIH HHS, NHLBI NIH HHS, National Heart, Lung, and Blood Institute, NIGMS NIH HHS, NCI NIH HHS, NHLBI NIH HHS, National Cancer Institute, NIGMS NIH HHS, National Institute of General Medical Sciences, National Heart, Lung, and Blood Institute, NIDA NIH HHS, NIGMS NIH HHS, NHLBI NIH HHS, Broad Institute of MIT and Harvard",112.0,United States +25172923,Naked Mole Rat Genome Resource,0.875342856,,0,Naked Mole Rat Genome Resource,0.875342856,1,http://www.naked-mole-rat.org,200,,"(51.5085,-0.1257)",http://web.archive.org/web/20220914201858/http://www.naked-mole-rat.org/,2014-08-28,"Integrative Genomics of Ageing Group, Institute of Integrative Biology, University of Liverpool, Liverpool, UK, Broad Institute of MIT and Harvard, Cambridge, MA, USA, Department of Biology, University of Rochester, NY, USA, Vertebrate and Health Genomics, The Genome Analysis Center, Norwich, UK, Department of Medical Biochemistry and Microbiology, Science for Life Laboratory, Uppsala University, Uppsala, Sweden and Department of Genetics, Harvard Medical School, Boston, MA, USA.","Keane M, Craig T, Alföldi J, Berlin AM, Johnson J, Seluanov A, Gorbunova V, Di Palma F, Lindblad-Toh K, Church GM, de Magalhães JP",,"NIA NIH HHS, NIA NIH HHS, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust",41.0,"Sweden, United States, United States, United States" +25178289,NPCDB,0.993397549,NPCDB,0.993397549,Native Pig and Chicken Breed Database,0.977203727,1,http://npcdb.snu.ac.kr,"HTTPConnectionPool(host='npcdb.snu.ac.kr', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180113015029/http://npcdb.snu.ac.kr:80/,2014-10-01,"C&K Genomics, Seoul National University Research Park, Seoul 151-919, Korea .","Jeong HS, Kim DW, Chun SY, Sung S, Kim HJ, Cho S, Kim H, Oh SJ",,,2.0, +25198774,PfalDB,0.975739539,PfalDB,0.975739539,,0,1,http://pfaldb.jnu.ac.in,"HTTPConnectionPool(host='pfaldb.jnu.ac.in', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='pfaldb.jnu.ac.in', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2014-01-01,None,"Kumar A, Agarwal N, Pant L, Singh JP, Ghosh I, Subbarao N",,,0.0, +25262355,NrichD,0.996541321,NrichD,0.996541321,,0,1,http://proline.biochem.iisc.ernet.in/NRICHD,301,,"(12.9719,77.5937)",http://web.archive.org/web/20221017080908/http://proline.biochem.iisc.ernet.in/NRICHD/,2014-09-27,"IISc Mathematics Initiative, Indian Institute of Science, Bangalore 560 012, Karnataka, India.","Mudgal R, Sandhya S, Kumar G, Sowdhamini R, Chandra NR, Srinivasan N",,,5.0,India +25301850,PDBbind,0.995774388,PDBbind,0.995774388,,0,1,http://www.pdbbind-cn.org,301,United States,"(39.0438,-77.4874)",http://web.archive.org/web/20210819052032/http://www.pdbbind-cn.org/,2014-10-09,"State Key Laboratory of Bioorganic and Natural Products Chemistry, Shanghai Institute of Organic Chemistry, Chinese Academy of Sciences, 345 Lingling Road, Shanghai 200032 and State Key Laboratory of Quality Research in Chinese Medicine, Macau Institute for Applied Research in Medicine and Health, Macau University of Science and Technology, Macau, People's Republic of China.","Liu Z, Li Y, Han L, Li J, Liu J, Zhao Z, Nie W, Liu Y, Wang R",,,107.0,China +25313158,Organ System Heterogeneity,0.808443427,,0,Organ System Heterogeneity,0.808443427,1,http://mips.helmholtz-muenchen.de/Organ_System_Heterogeneity,503,,,http://web.archive.org/web/20160821212919/http://mips.helmholtz-muenchen.de:80/Organ_System_Heterogeneity/,2014-10-13,"German Center for Diabetes Research, Neuherberg 85764, Germany Institute of Bioinformatics and Systems Biology, Helmholtz Zentrum München, Neuherberg 85764, Germany.","Mannil D, Vogt I, Prinz J, Campillos M",,,5.0,"Germany, Germany" +25324303,P2CS,0.982351462,P2CS,0.982351462,prokaryotic two-component systems,0.689270794,1,http://www.p2cs.org,200,,"(48.6833,2.1333)",http://web.archive.org/web/20220524173841/http://www.p2cs.org/,2014-10-16,"CEA, IBEB, Lab Ecol Microb Rhizosphere & Environ Extrem, Saint-Paul-lez-Durance F-13108, France CNRS, UMR 7265 Biol Veget & Microbiol Environ, Saint-Paul-lez-Durance F-13108, France Aix Marseille Université, BVME UMR7265, Marseille F-13284, France.","Ortet P, Whitworth DE, Santaella C, Achouak W, Barakat M",,,48.0,"France, France, France" +25336619,PAIDB,0.989453137,PAIDB,0.989453137,Pathogenicity Island Database,0.901366401,1,http://www.paidb.re.kr,"HTTPConnectionPool(host='www.paidb.re.kr', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.paidb.re.kr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220822212149/http://www.paidb.re.kr/,2014-10-21,"Synthetic Biology and Bioengineering Research Center, Korea Research Institute of Bioscience and Biotechnology (KRIBB), Daejeon 305-806, Republic of Korea Bio-Medical Science Co., Ltd., Daejeon 305-301, Republic of Korea moncher@kribb.re.kr.","Yoon SH, Park YK, Kim JF",,,52.0, +25378328,MyMpn,0.998210311,MyMpn,0.998210311,,0,1,http://mympn.crg.eu,502,,,http://web.archive.org/web/20220606032102/http://mympn.crg.eu/,2014-11-06,"EMBL/CRG Systems Biology Research Unit, Centre for Genomic Regulation (CRG), Dr. Aiguader 88, 08003 Barcelona, Spain Universitat Pompeu Fabra (UPF), Dr. Aiguader 88, 08003 Barcelona, Spain Theoretical Biophysics, Humboldt-Universitt zu Berlin, Invalidenstr 42, 10115 Berlin, Germany guglielmo.roma@crg.eu luis.serrano@crg.eu.","Wodke JA, Alibés A, Cozzuto L, Hermoso A, Yus E, Lluch-Senar M, Serrano L, Roma G",,European Research Council,16.0,"Germany, Spain, Spain" +"25399418, 29106550, 33174605",OMA,0.997215748,OMA,0.997215748,Orthologous Matrix,0.856745347,3,http://omabrowser.org,301,,"(46.5160,6.6328)",http://web.archive.org/web/20221107071356/https://omabrowser.org/,2021-01-01,"University College London, Gower Street, London WC1E 6BT, UK Swiss Institute of Bioinformatics, Universitätstr. 6, 8092 Zurich, Switzerland ETH Zurich, Computer Science, Universitätstr. 6, 8092 Zurich, Switzerland., SIB Swiss Institute of Bioinformatics, 1015 Lausanne, Switzerland., SIB Swiss Institute of Bioinformatics, 1015 Lausanne, Switzerland.","Altenhoff AM, Škunca N, Glover N, Train CM, Sueki A, Piližota I, Gori K, Tomiczek B, Müller S, Redestig H, Gonnet GH, Dessimoz C, Altenhoff AM, Glover NM, Train CM, Kaleb K, Warwick Vesztrocy A, Dylus D, de Farias TM, Zile K, Stevenson C, Long J, Redestig H, Gonnet GH, Dessimoz C, Altenhoff AM, Train CM, Gilbert KJ, Mediratta I, Mendes de Farias T, Moi D, Nevers Y, Radoykova HS, Rossier V, Warwick Vesztrocy A, Glover NM, Dessimoz C",", , ","Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Swiss National Science Foundation, Swiss Institute of Bioinformatics, Swiss National Science Foundation",247.0,"Switzerland, Switzerland, Switzerland, Switzerland, Ethiopia" +25428349,OMIM,0.977684259,OMIM,0.977684259,Online Mendelian Inheritance in Man,0.973285995,1,http://omim.org,301,,"(39.0437,-77.4875)",http://web.archive.org/web/20221102114228/https://www.omim.org/,2014-11-26,"McKusick-Nathans Institute of Genetic Medicine, Johns Hopkins University School of Medicine, Baltimore, MD 21287, USA joanna@peas.welch.jhu.edu.","Amberger JS, Bocchini CA, Schiettecatte F, Scott AF, Hamosh A",,"NHGRI NIH HHS, NHGRI NIH HHS",672.0,United States +25527095,NetGestalt,0.99514091,NetGestalt,0.99514091,,0,1,http://www.netgestalt.org,200,,"(45.8399,-119.7006)",http://web.archive.org/web/20220805201725/http://netgestalt.org/,2014-12-18,"Department of Biomedical Informatics, Advanced Computing Center for Research and Education, Department of Electrical Engineering and Computer Science and Department of Cancer Biology, Vanderbilt University, Nashville, Tennessee, USA.","Zhu J, Shi Z, Wang J, Zhang B",,NCI NIH HHS,16.0,United States +25540777,ncRNA-DB,0.996957827,ncRNA-DB,0.996957827,,0,1,http://ncrnadb.scienze.univr.it/ncrnadb,404,,,http://web.archive.org/web/20170907142405/http://ncrnadb.scienze.univr.it:80/ncrnadb/,2014-12-10,"Department of Computer Science, University of Verona , Verona , Italy.","Bonnici V, Russo F, Bombieri N, Pulvirenti A, Giugno R",,,8.0,Italy +25551368,PD_NGSAtlas,0.995458275,PD_NGSAtlas,0.995458275,,0,1,http://bioinfo.hrbmu.edu.cn/pd_ngsatlas,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /pd_ngsatlas (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20150429125031/http://bioinfo.hrbmu.edu.cn:80/pd_ngsatlas/,2014-12-31,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, 150081, China. zhaozheng0503@gmail.com.","Zhao Z, Li Y, Chen H, Lu J, Thompson PM, Chen J, Wang Z, Xu J, Xu C, Li X",,,3.0,China +25591449,PathPPI,0.955567122,PathPPI,0.955567122,,0,1,http://proteomeview.hupo.org.cn/PathPPI/PathPPI.html,200,China,"(39.9143,116.3861)",no_wayback,2015-01-15,"College of Mechanical & Electronic Engineering and Automatization, National University of Defense Technology, Changsha, 410073, China.","Tang H, Zhong F, Liu W, He F, Xie H",,,3.0,China +25604238,ocsESTdb,0.997286081,ocsESTdb,0.997286081,,0,1,http://ocri-genomics.org/ocsESTdb,"HTTPConnectionPool(host='ocri-genomics.org', port=80): Max retries exceeded with url: /ocsESTdb (Caused by ConnectTimeoutError(, 'Connection to ocri-genomics.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200805195751/http://ocri-genomics.org/ocsESTdb/,2015-01-21,"Key Laboratory for Oil Crops Biology, the Ministry of Agriculture, PR China, Oil Crops Research Institute, Chinese Academy of Agricultural Sciences, No.2 Xudong Second Road, Wuhan, 430062, China. ketao2@hotmail.com.","Ke T, Yu J, Dong C, Mao H, Hua W, Liu S",,,4.0,"China, China" +25931458,NeuroPep,0.99364078,NeuroPep,0.99364078,,0,1,http://isyslab.info/NeuroPep,301,,"(22.2783,114.1747)",http://web.archive.org/web/20220206100300/http://isyslab.info/NeuroPep/,2015-04-29,"Key Laboratory of Molecular Biophysics of the Ministry of Education, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China, School of Software Engineering, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China, Department of Computational Medicine and Bioinformatics, University of Michigan, Ann Arbor, MI 48109, USA and National Laboratory of Biomacromolecules, Institute of Biophysics, Chinese Academy of Sciences, Beijing 100101, China.","Wang Y, Wang M, Yin S, Jang R, Wang J, Xue Z, Xu T",,,38.0,"China, China, China, United States" +25931459,novPTMenzy,0.971780777,novPTMenzy,0.971780777,,0,1,http://www.nii.ac.in/novptmenzy.html,200,,"(28.5531,77.1916)",http://web.archive.org/web/20210515020914/http://www.nii.ac.in/novptmenzy.html,2015-04-29,"Bioinformatics Centre, National Institute of Immunology, Aruna Asaf Ali Marg, New Delhi 110067, India.","Khater S, Mohanty D",,,0.0,India +26013919,NIG_MoG,0.989151716,NIG_MoG,0.989151716,of Genetics Mouse Genome database,0.914421072,1,http://molossinus.lab.nig.ac.jp/msmdb,301,,"(35.1167,138.9167)",http://web.archive.org/web/20170716035230/http://molossinus.lab.nig.ac.jp/msmdb/,2015-05-27,"Mammalian Genetics Laboratory, National Institute of Genetics, 1111 Yata, Mishima, Shizuoka, 411-8540, Japan, ttakada@nig.ac.jp.","Takada T, Yoshiki A, Obata Y, Yamazaki Y, Shiroishi T",,,13.0,Japan +26048622,PedsDTI,0.868158847,PedsDTI,0.868158847,NIH MRI study of normal brain development,0.851746321,1,http://www.pediatricmri.nih.gov,302,United States,"(39.0438,-77.4874)",http://web.archive.org/web/20190110135943/https://pediatricmri.nih.gov/,2015-06-03,"Program on Pediatric Imaging and Tissue Sciences, NICHD, NIH, Bethesda, MD, USA.","Walker L, Chang LC, Nayak A, Irfanoglu MO, Botteron KN, McCracken J, McKinstry RC, Rivkin MJ, Wang DJ, Rumsey J, Pierpaoli C, ",,"NICHD NIH HHS, NINDS NIH HHS, National Institute of Neurological Disorders and Stroke, National Institute of Mental Health, National Institute of Neurological Disorders and Stroke, National Institute of Neurological Disorders and Stroke, National Institute of Neurological Disorders and Stroke, National Institute of Neurological Disorders and Stroke, National Institute of Child Health and Human Development, NINDS NIH HHS, NICHD NIH HHS, National Institute of Neurological Disorders and Stroke, NINDS NIH HHS, National Institute of Neurological Disorders and Stroke, Intramural NIH HHS, NINDS NIH HHS, NIMH NIH HHS, NINDS NIH HHS, National Institute of Neurological Disorders and Stroke, NINDS NIH HHS, National Institute on Drug Abuse",19.0,United States +26072489,PAGER,0.964891553,PAGER,0.964891553,,0,1,http://discovery.informatics.iupui.edu/PAGER,302,,"(39.2014,-85.9214)",http://web.archive.org/web/20160609165118/http://discovery.informatics.iupui.edu:80/PAGER/,2015-06-01,"Indiana University School of Informatics and Computing, Department of Computer and Information Science, Indiana University-Purdue University Indianapolis, Indianapolis, IN 46202, Purdue University Center for Cancer Research, West Lafayette, IN 47906 and Institute of Biopharmaceutical Informatics and Technology, Wenzhou Medical University, WenZhou, Zhe Jiang Province, China.","Yue Z, Kshirsagar MM, Nguyen T, Suphavilai C, Neylon MT, Zhu L, Ratliff T, Chen JY",,"NCI NIH HHS, NIDDK NIH HHS, NCI NIH HHS, NIDDK NIH HHS",9.0,China +26073932,Pedican,0.98098731,Pedican,0.98098731,of pediatric cancers,0.786508222,1,http://pedican.bioinfo-minzhao.org,406,,,http://web.archive.org/web/20221017004617/https://pedican.bioinfo-minzhao.org/,2015-06-15,"Center for Bioinformatics, State Key Laboratory of Protein and Plant Gene Research, College of Life Sciences, Peking University, Beijing 100871, P.R. China.","Zhao M, Ma L, Liu Y, Qu H",,,6.0,China +26166372,MVsCarta,0.977910161,MVsCarta,0.977910161,,0,1,http://bioinf.xmu.edu.cn/MVsCarta,404,,,no_wayback,2015-06-01,"Shandong Medicinal Biotechnology Center, Shandong Academy of Medical Sciences, Key Laboratory for Biotech-Drugs Ministry of Health.","Cui Y, Xu Q, Luan J, Hu S, Pan J, Han J, Ji Z",,,3.0, +26183225,NEU-GD,0.990502596,NEU-GD,0.990502596,Near East University Genetic Mutation Database,0.902750194,1,http://genetics-db.neu.edu.tr,200,,"(38.6224,35.1826)",http://web.archive.org/web/20180423044940/http://genetics-db.neu.edu.tr:80/,2015-07-14,"Department of Medical Genetics, Faculty of Medicine, Near East University, Near East Avenue, 99138, Nicosia, Mersin 10, Turkey. Electronic address: mahmutcerkez@gmail.com.","Ergoren MC, Pirzada RH, Arici M, Serakinci N",,,1.0,Turkey +26476444,PDBe,0.99717021,PDBe,0.99717021,,0,1,http://pdbe.org,301,United Kingdom,"(52.1929,0.1256)",no_wayback,2015-10-17,"Protein Data Bank in Europe, European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridge, CB10 1SD, UK sameer@ebi.ac.uk.","Velankar S, van Ginkel G, Alhroub Y, Battle GM, Berrisford JM, Conroy MJ, Dana JM, Gore SP, Gutmanas A, Haslam P, Hendrickx PM, Lagerstedt I, Mir S, Fernandez Montecelo MA, Mukhopadhyay A, Oldfield TJ, Patwardhan A, Sanz-García E, Sen S, Slowley RA, Wainwright ME, Deshpande MS, Iudin A, Sahni G, Salavert Torres J, Hirshberg M, Mak L, Nadzirin N, Armstrong DR, Clark AR, Smart OS, Korir PK, Kleywegt GJ",,"Biotechnology and Biological Sciences Research Council, Medical Research Council, NIGMS NIH HHS, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, NIGMS NIH HHS",70.0, +26496950,OpenTein,0.988018394,OpenTein,0.988018394,Teratoma Investigation,0.906912729,1,http://opentein.hgc.jp,301,,"(35.6895,139.6917)",http://web.archive.org/web/20220407202124/https://opentein.hgc.jp/,2015-10-22,"Human Genome Center, The Institute of Medical Science, The University of Tokyo, 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.","Park SJ, Komiyama Y, Suemori H, Umezawa A, Nakai K",,,1.0,Japan +26504143,PDID,0.99190706,PDID,0.99190706,Protein-Drug Interaction Database,0.975201716,1,http://biomine.ece.ualberta.ca/PDID,301,Canada,"(53.52,-113.5319)",no_wayback,2015-10-26,"Department of Electrical and Computer Engineering, University of Alberta, Edmonton, AB, Canada T6G 2V4.","Wang C, Hu G, Wang K, Brylinski M, Xie L, Kurgan L",,NLM NIH HHS,14.0,Canada +26507856,NBDB,0.986459076,NBDB,0.986459076,Nucleotide binding database,0.770806229,1,http://nbdb.bii.a-star.edu.sg,200,,"(32.7939,-96.8319)",http://web.archive.org/web/20221020213519/https://nbdb.bii.a-star.edu.sg/,2015-10-26,"Bioinformatics Institute, Agency for Science, Technology and Research (A*STAR), 30 Biopolis Street, #07-01, Matrix, 138671, Singapore.","Zheng Z, Goncearenco A, Berezovsky IN",,,12.0,Singapore +26578565,PCOSKB,0.997247696,PCOSKB,0.997247696,,0,1,http://pcoskb.bicnirrh.res.in,200,India,"(19.0748,72.8856)",http://web.archive.org/web/20220809232412/http://pcoskb.bicnirrh.res.in/,2015-11-17,"Biomedical Informatics Center of Indian Council of Medical Research, National Institute for Research in Reproductive Health, Mumbai-400012, India.","Joseph S, Barai RS, Bhujbalrao R, Idicula-Thomas S",,,15.0,India +26578589,ORegAnno,0.998606801,ORegAnno,0.998606801,Open Regulatory Annotation database,0.859214735,1,http://www.oreganno.org,200,,"(45.8399,-119.7006)",http://web.archive.org/web/20220517045441/http://oreganno.org/,2015-11-17,"McDonnell Genome Institute, Washington University School of Medicine, St. Louis, MO 63108, USA.","Lesurf R, Cotto KC, Wang G, Griffith M, Kasaian K, Jones SJ, Montgomery SB, Griffith OL, ",,"NHGRI NIH HHS, NCI NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NIMH NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",54.0,United States +26615193,PDBFlex,0.998265803,PDBFlex,0.998265803,,0,1,http://pdbflex.org,302,,"(37.3476,-121.8870)",http://web.archive.org/web/20221021211224/https://pdbflex.org/,2015-11-28,"Bioinformatics and Systems Biology Program, Sanford Burnham Prebys Medical Discovery Institute, 10901 North Torrey Pines Road, La Jolla, CA 92037, USA.","Hrabe T, Li Z, Sedova M, Rotkiewicz P, Jaroszewski L, Godzik A",,"NIGMS NIH HHS, NIGMS NIH HHS",33.0,United States +26637529,ONRLDB,0.997567415,ONRLDB,0.997567415,Orphan Nuclear Receptor Ligand Binding Database,0.977192277,1,http://www.onrldb.org,"HTTPConnectionPool(host='www.onrldb.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180104220442/http://onrldb.org/,2015-12-04,None,"Nanduri R, Bhutani I, Somavarapu AK, Mahajan S, Parkesh R, Gupta P",,,6.0, +26721496,NONCODEv4,0.99214226,NONCODEv4,0.99214226,,0,1,"http://www.noncode.org/, http://www.bioinfo.org","200, 301",,"(30.2936,120.1614), (38.8951,-77.0364)","http://web.archive.org/web/20220523201319/http://noncode.org/, http://web.archive.org/web/20180212110050/http://www.bioinfo.org:80/",2016-01-01,"Key Laboratory of Intelligent Information Processing, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, 100190, China.","Zhao Y, Yuan J, Chen R",,,14.0,China +26827236,OGDD,0.991602957,OGDD,0.991602957,Olive Genetic Diversity Database,0.950038569,1,http://www.bioinfo-cbs.org/ogdd,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20210514012459/http://www.bioinfo-cbs.org/ogdd/,2016-01-30,"Centre of Biotechnology of Sfax, PB '1177', 3018 Sfax, Tunisia and raydabenayed@yahoo.fr.","Ben Ayed R, Ben Hassen H, Ennouri K, Ben Marzoug R, Rebai A",,,10.0,Tunisia +26896846,NALDB,0.99670428,NALDB,0.99670428,Nucleic acid ligand database,0.987690696,1,http://bsbe.iiti.ac.in/bsbe/naldb/HOME.php,302,,"(22.5387,75.9111)",http://web.archive.org/web/20210802082334/http://bsbe.iiti.ac.in/bsbe/naldb/HOME.php,2016-02-20,"Centre for Biosciences and Biomedical Engineering, Indian Institute of Technology Indore, Indore 452017, Madhya Pradesh, India.","Kumar Mishra S, Kumar A",,,9.0,India +26912952,NABIC,0.985369205,NABIC,0.985369205,,0,1,http://nabic.rda.go.kr/DNAchip,405,,,http://web.archive.org/web/20220617044449/http://nabic.rda.go.kr/DNAchip,2015-11-30,"Genomics Division, National Academy of Agricultural Science (NAAS), RDA, Jeonju 560-500, Republic of Korea.","Lee JH, Kang SH, Lee JY, Kim CK",,,0.0, +26995712,ODCs,0.995768189,ODCs,0.995768189,Orphan Disease Connections,0.98984336,1,http://csbg.cnb.csic.es/odcs,301,Spain,"(40.4168,-3.70379)",http://web.archive.org/web/20220709060737/http://csbg.cnb.csic.es/odcs/,2016-03-16,"Escuela Politecnica Superior, Universidad Autonoma de Madrid, Madrid 28049, Spain.","Fernandez-Novo S, Pazos F, Chagoyen M",,,2.0,Spain +27017950,NeisseriaBase,0.990827322,NeisseriaBase,0.990827322,,0,1,http://neisseria.um.edu.my,"HTTPConnectionPool(host='neisseria.um.edu.my', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to neisseria.um.edu.my timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160620181713/http://neisseria.um.edu.my,2016-03-17,"Department of Oral Biology and Biomedical Sciences, Faculty of Dentistry, University of Malaya, Kuala Lumpur, Malaysia; Genome Informatics Research Laboratory, HIR Building, University of Malaya, Kuala Lumpur, Malaysia.","Zheng W, Mutha NV, Heydari H, Dutta A, Siow CC, Jakubovics NS, Wee WY, Tan SY, Ang MY, Wong GJ, Choo SW",,"High Impact Research (HIR), University of Malaya and Ministry of Education, University of Malaya Research Grant",0.0,"Malaysia, Malaysia" +27152146,NCRO,0.979602456,NCRO,0.979602456,The Non-Coding RNA Ontology,0.890178517,1,http://purl.obolibrary.org/obo/ncro.owl,302,,"(39.0437,-77.4875)",no_wayback,2016-05-04,"School of Computing, University of South Alabama, Mobile, Alabama, 36688-0002 USA.","Huang J, Eilbeck K, Smith B, Blake JA, Dou D, Huang W, Natale DA, Ruttenberg A, Huan J, Zimmermann MT, Jiang G, Lin Y, Wu B, Strachan HJ, He Y, Zhang S, Wang X, Liu Z, Borchert GM, Tan M",,"National Cancer Institute, NIGMS NIH HHS, NCI NIH HHS",2.0,United States +27234245,OGRO,0.748835266,OGRO,0.748835266,,0,1,http://qtaro.abr.affrc.go.jp/ogro,"HTTPConnectionPool(host='qtaro.abr.affrc.go.jp', port=80): Max retries exceeded with url: /ogro (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220620014110/http://qtaro.abr.affrc.go.jp/ogro/,2012-09-24,"National Institute of Agrobiological Sciences, 2-1-2 Kannondai, Tsukuba, Ibaraki, 305-8602, Japan.","Yamamoto E, Yonemaru J, Yamamoto T, Yano M",,,62.0,Japan +27285615,Northeast India Helminth Parasite Information Database,0.971249071,NEIHPID,0.965815055,Northeast India Helminth Parasite Information Database,0.971249071,1,http://nepiac.nehu.ac.in/index.php,200,,,http://web.archive.org/web/20220618123654/http://nepiac.nehu.ac.in/index.php,2016-06-10,"Bioinformatics Centre, North-Eastern Hill University, Shillong, Meghalaya, India.","Biswal DK, Debnath M, Kharumnuid G, Thongnibah W, Tandon V",,"Department of Biotechnology, Govt. of India, Department of Information Technology, Ministry of Communications and Information Technology",3.0,India +27515825,Onco-Regulon,0.983626167,Onco-Regulon,0.983626167,,0,1,http://www.scfbio-iitd.res.in/software/onco/NavSite/index.htm,200,,"(28.5553,77.1743)",http://web.archive.org/web/20210617175035/http://www.scfbio-iitd.res.in/software/onco/NavSite/index.htm,2016-08-10,"Supercomputing Facility for Bioinformatics & Computational Biology, Indian Institute of Technology-Delhi, New Delhi, India.","Tomar N, Mishra A, Mrinal N, Jayaram B",,,2.0,India +27530928,NLDB,0.996441364,NLDB,0.996441364,Natural Ligand DataBase,0.982187194,1,http://nldb.hgc.jp,301,,"(35.6895,139.6917)",http://web.archive.org/web/20220228022130/https://nldb.hgc.jp/,2016-08-16,"Graduate School of Information Sciences, Tohoku University, Aramaki-Aza-Aoba 6-3-09, Aoba-ku, Sendai, 980-8575, Japan.","Murakami Y, Omori S, Kinoshita K",,,2.0,Japan +27625390,PathoYeastract,0.99473387,PathoYeastract,0.99473387,Search for Transcriptional Regulators And,0.811140098,1,http://pathoyeastract.org,301,,"(48.8534,2.3488)",http://web.archive.org/web/20220615132225/http://www.pathoyeastract.org/,2016-09-12,"Department of Computer Science and Engineering, Instituto Superior Técnico, Universidade de Lisboa, Av. Rovisco Pais, 1049-001 Lisbon, Portugal Pedro.Tiago.Monteiro@tecnico.pt.","Monteiro PT, Pais P, Costa C, Manna S, Sá-Correia I, Teixeira MC",,,21.0,Portugal +27819351,PEPlife,0.99723953,PEPlife,0.99723953,,0,1,http://crdd.osdd.net/raghava/peplife,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/peplife (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220325071709/http://crdd.osdd.net/raghava/peplife/,2016-11-07,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh, India.","Mathur D, Prakash S, Anand P, Kaur H, Agrawal P, Mehta A, Kumar R, Singh S, Raghava GP",,,35.0,India +27824078,PGAdb-builder,0.779213417,PGAdb-builder,0.779213417,,0,1,http://wgmlstdb.imst.nsysu.edu.tw,200,,,no_wayback,2016-11-08,"Central Regional Laboratory, Center for Diagnostics and Vaccine Development, Centers for Disease Control, Taichung 40855, Taiwan.","Liu YY, Chiou CS, Chen CC",,,16.0, +27899613,NSDNA,0.996830682,NSDNA,0.996830682,Nervous System Disease NcRNAome Atlas,0.991699129,1,http://www.bio-bigdata.net/nsdna,502,,,no_wayback,2016-11-28,"Department of Neurology, The Second Affiliated Hospital of Harbin Medical University, Harbin 150081, China.","Wang J, Cao Y, Zhang H, Wang T, Tian Q, Lu X, Lu X, Kong X, Liu Z, Wang N, Zhang S, Ma H, Ning S, Wang L",,,22.0,China +27936097,PeTMbase,0.988303959,PeTMbase,0.988303959,,0,1,http://petmbase.org,"HTTPConnectionPool(host='petmbase.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200803170345/http://www.petmbase.org/,2016-12-09,"İzmir International Biomedicine and Genome Institute (iBG-izmir), Dokuz Eylül University, İnciraltı, İzmir, Turkey.","Karakülah G, Yücebilgili Kurtoğlu K, Unver T",,Türkiye Bilimler Akademisi,22.0,Turkey +28011869,PCoM-DB,0.992738867,PCoM-DB,0.992738867,protein co-migration database,0.720944236,1,http://pcomdb.lowtem.hokudai.ac.jp/proteins/top,200,United States,"(39.0438,-77.4874)",http://web.archive.org/web/20220617113421/http://pcomdb.lowtem.hokudai.ac.jp/proteins/top,2017-01-01,"CREST, JST, Kita-ku, Sapporo, Japan.","Takabayashi A, Takabayashi S, Takahashi K, Watanabe M, Uchida H, Murakami A, Fujita T, Ikeuchi M, Tanaka A",,,8.0,Japan +28053167,PceRBase,0.983400321,PceRBase,0.983400321,ceRNA database,0.803619842,1,http://bis.zju.edu.cn/pcernadb/index.jsp,200,China,"(40.0018,116.333)",http://web.archive.org/web/20200221034252/http://bis.zju.edu.cn:80/pcernadb/index.jsp,2016-10-07,"Department of Bioinformatics, State Key Laboratory of Plant Physiology and Biochemistry, Institute of Plant Science, College of Life Sciences, Zhejiang University, Hangzhou 310058, China.","Yuan C, Meng X, Li X, Illing N, Ingle RA, Wang J, Chen M",,,25.0,China +28086860,NaDH,0.970333695,NaDH,0.970333695,Nicotiana attenuata Data Hub,0.910567932,1,http://nadh.ice.mpg.de,302,,"(50.9787,11.0328)",no_wayback,2017-01-13,"Department of Molecular Ecology, Max Planck Institute for Chemical Ecology, Hans-Knöll-Straße 8, D-07745, Jena, Germany.","Brockmöller T, Ling Z, Li D, Gaquerel E, Baldwin IT, Xu S",,"Seventh Framework Programme, Swiss National Science Foundation, Swiss National Science Foundation",11.0,Germany +28182744,OMDB,0.992427528,OMDB,0.992427528,Organic materials database,0.874303088,1,http://omdb.diracmaterials.org,"HTTPConnectionPool(host='omdb.diracmaterials.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 101] Network is unreachable'))",,,http://web.archive.org/web/20181203130155/https://omdb.diracmaterials.org/,2017-02-09,"Nordita, Center for Quantum Materials, KTH Royal Institute of Technology and Stockholm University, Roslagstullsbacken 23, SE-106 91 Stockholm, Sweden.","Borysov SS, Geilhufe RM, Balatsky AV",,"Villum Fonden (DK), Villum Fonden, European Research Council, Vetenskapsrådet, Knut och Alice Wallenbergs Stiftelse (SE), European Research Council",3.0,Sweden +28184254,NPCARE,0.992175639,NPCARE,0.992175639,,0,1,http://silver.sejong.ac.kr/npcare,"HTTPConnectionPool(host='silver.sejong.ac.kr', port=80): Max retries exceeded with url: /npcare (Caused by ConnectTimeoutError(, 'Connection to silver.sejong.ac.kr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220120145819/http://silver.sejong.ac.kr/npcare/,2017-01-05,"Department of Bioscience and Biotechnology, Institute of Anticancer Medicine Development, Sejong University, 209 Neungdong-ro, Kwangjin-gu, Seoul, 05006 Korea.","Choi H, Cho SY, Pak HJ, Kim Y, Choi JY, Lee YJ, Gong BH, Kang YS, Han T, Choi G, Cho Y, Lee S, Ryoo D, Park H",,Basic Science Research Program through the National Research Foundation of Korea,15.0, +28365721,PCPPI,0.994418144,PCPPI,0.994418144,Penicillium -Crop Protein-Protein Interactions,0.860982812,1,http://bdg.hfut.edu.cn/pcppi/index.html,"HTTPConnectionPool(host='bdg.hfut.edu.cn', port=80): Max retries exceeded with url: /pcppi/index.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2017-01-01,"College of Food Science and Engineering, Hefei University of Technology, Hefei 230009, China.","Yue J, Zhang D, Ban R, Ma X, Chen D, Li G, Liu J, Wisniewski M, Droby S, Liu Y",,,2.0,China +28365722,OCaPPI-Db,0.996962115,OCaPPI-Db,0.996962115,Oligonucleotide Capture Probes for Pathogen Identification Database,0.991928976,1,http://ocappidb.uca.works,"HTTPConnectionPool(host='ocappidb.uca.works', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2017-01-01,"Université Clermont Auvergne, INRA, MEDIS, F-63000 Clermont-Ferrand, France.","Gasc C, Constantin A, Jaziri F, Peyret P",,,0.0,France +28454513,MYCbase,0.986929297,MYCbase,0.986929297,,0,1,http://bicresources.jcbose.ac.in/ssaha4/mycbase,301,,"(22.6000,88.3833)",http://web.archive.org/web/20220319215109/http://bicresources.jcbose.ac.in/ssaha4/mycbase/,2017-04-28,"Bioinformatics Centre, Bose Institute, P 1/12, C.I.T. Road, Scheme-VII (M), Kolkata, 700054, India.","Chakravorty D, Jana T, Das Mandal S, Seth A, Bhattacharya A, Saha S",,,6.0,India +28641017,NANPDB,0.997249413,NANPDB,0.997249413,Northern African Natural Products Database,0.983017099,1,http://african-compounds.org/nanpdb,301,,"(47.9959,7.8522)",http://web.archive.org/web/20200519021959/http://www.african-compounds.org/nanpdb/,2017-06-22,"Department of Pharmaceutical Chemistry, Martin-Luther University of Halle-Wittenberg , Wolfgang-Langenbeck Straße 4, 06120 Halle (Saale), Germany.","Ntie-Kang F, Telukunta KK, Döring K, Simoben CV, A Moumbock AF, Malange YI, Njume LE, Yong JN, Sippl W, Günther S",,"Alexander von Humboldt-Stiftung, Deutscher Akademischer Austauschdienst",37.0,Germany +28651363,NeuroMMSig,0.995421886,NeuroMMSig,0.995421886,Multimodal mechanistic signatures for neurodegenerative diseases,0.627636355,1,http://neurommsig.scai.fraunhofer.de,302,,"(50.7754,7.1970)",http://web.archive.org/web/20220706134026/https://neurommsig.scai.fraunhofer.de/,2017-11-01,"Department of Bioinformatics, Fraunhofer Institute for Algorithms and Scientific Computing, Sankt Augustin 53754, Germany.","Domingo-Fernández D, Kodamullil AT, Iyappan A, Naz M, Emon MA, Raschka T, Karki R, Springstubbe S, Ebeling C, Hofmann-Apitius M",,,16.0,Germany +28961690,ncDR,0.99832958,ncDR,0.99832958,,0,1,http://www.jianglab.cn/ncDR,302,,"(30.2936,120.1614)",http://web.archive.org/web/20220526115135/http://www.jianglab.cn/ncDR/,2017-12-01,"Department of Biological Mathematics, College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Dai E, Yang F, Wang J, Zhou X, Song Q, An W, Wang L, Jiang W",,National Natural Science Foundation of China,17.0,China +29036324,PancanQTL,0.996932268,PancanQTL,0.996932268,,0,1,http://bioinfo.life.hust.edu.cn/PancanQTL,301,China,"(30.513,114.42)",http://web.archive.org/web/20200115145532/http://bioinfo.life.hust.edu.cn:80/PancanQTL/,2018-01-01,"Department of Epidemiology and Biostatistics, Key Laboratory of Environmental Health of Ministry of Education, School of Public Health, Tongji Medical College, Huazhong University of Science and Technology, Wuhan, Hubei 430030, PR China.","Gong J, Mei S, Liu C, Xiang Y, Ye Y, Zhang Z, Feng J, Liu R, Diao L, Guo AY, Miao X, Han L",,,62.0,China +29040761,PCSD,0.967814763,PCSD,0.967814763,Plant Chromatin State Database,0.940431786,1,http://systemsbiology.cau.edu.cn/chromstates,301,China,"(40.0018,116.333)",http://web.archive.org/web/20220125185852/http://systemsbiology.cau.edu.cn/chromstates/,2018-01-01,"State Key Laboratory of Plant Physiology and Biochemistry, College of Biological Sciences, China Agricultural University, Beijing 100193, China.","Liu Y, Tian T, Zhang K, You Q, Yan H, Zhao N, Yi X, Xu W, Su Z",,,26.0,"China, China" +29069459,OverGeneDB,0.993383467,OverGeneDB,0.993383467,,0,1,http://overgenedb.amu.edu.pl,200,,"(52.4069,16.9299)",http://web.archive.org/web/20221016223534/http://overgenedb.amu.edu.pl/,2018-01-01,"Department of Integrative Genomics, Institute of Anthropology, Faculty of Biology, Adam Mickiewicz University in Poznan, 61-712 Poznan, Poland.","Rosikiewicz W, Suzuki Y, Makalowska I",,,4.0,Poland +29106588,NLSdb,0.997089744,NLSdb,0.997089744,,0,1,http://rostlab.org/services/nlsdb,302,,"(48.2490,11.6510)",http://web.archive.org/web/20220618191718/https://www.rostlab.org/services/nlsdb/,2018-01-01,"Department of Informatics, I12-Chair of Bioinformatics and Computational Biology, Technical University of Munich (TUM), Boltzmannstrasse 3, 85748 Garching/Munich, Germany.","Bernhofer M, Goldberg T, Wolf S, Ahmed M, Zaugg J, Boden M, Rost B",,,17.0,Germany +29106626,PAMBD,0.997342527,PAMBD,0.997342527,Pseudomonas aeruginosaMetabolome Database,0.988374015,1,http://pseudomonas.umaryland.edu,200,,"(39.2904,-76.6122)",http://web.archive.org/web/20220709005549/http://pseudomonas.umaryland.edu/,2018-01-01,"Department of Pharmaceutical Sciences, School of Pharmacy, University of Maryland, Baltimore, MD 21209, USA.","Huang W, Brewer LK, Jones JW, Nguyen AT, Marcu A, Wishart DS, Oglesby-Sherrouse AG, Kane MA, Wilks A",,"NIAID NIH HHS, NIAID NIH HHS, NIGMS NIH HHS",16.0,United States +29112749,PGG.Population,0.989457592,PGG.Population,0.989457592,,0,1,http://www.pggpopulation.org,301,China,"(39.96,116.298)",http://web.archive.org/web/20220909131759/https://www.pggpopulation.org/,2018-01-01,"Chinese Academy of Sciences (CAS) Key Laboratory of Computational Biology, Max Planck Independent Research Group on Population Genomics, CAS-MPG Partner Institute for Computational Biology (PICB), Shanghai Institutes for Biological Sciences, CAS, Shanghai 200031, China.","Zhang C, Gao Y, Liu J, Xue Z, Lu Y, Deng L, Tian L, Feng Q, Xu S",,,5.0,China +29126123,PedAM,0.973055482,PedAM,0.973055482,Pediatric Disease Annotations & Medicines,0.695434553,1,http://www.unimd.org/pedam,301,United States,"(37.5517,-122.33)",no_wayback,2018-01-01,"The Center for Bioinformatics and Computational Biology, Shanghai Key Laboratory of Regulatory Biology, Institute of Biomedical Sciences and School of Life Sciences, East China Normal University, Shanghai 200241, China.","Jia J, An Z, Ming Y, Guo Y, Li W, Li X, Liang Y, Guo D, Tai J, Chen G, Jin Y, Liu Z, Ni X, Shi T",,,6.0,"China, China" +29177508,MVP,0.808669806,MVP,0.808669806,Microbe Versus,0.708312586,1,http://mvp.medgenius.info,406,,,http://web.archive.org/web/20221017000542/https://mvp.medgenius.info/,2018-01-01,"Key Laboratory of Molecular Biophysics of the Ministry of Education, Hubei Key Laboratory of Bioinformatics and Molecular-imaging, Department of Bioinformatics and Systems Biology, College of Life Science and Technology, Huazhong University of Science and Technology (HUST), 430074 Wuhan, Hubei, China.","Gao NL, Zhang C, Zhang Z, Hu S, Lercher MJ, Zhao XM, Bork P, Liu Z, Chen WH, Chen WH",,,18.0,China +29186335,OncoPPi,0.987840295,OncoPPi,0.987840295,,0,1,http://oncoppi.emory.edu,301,,"(33.7804,-84.3360)",http://web.archive.org/web/20220617063703/http://oncoppi.emory.edu/,2018-04-01,"Department of Pharmacology and Emory Chemical Biology Discovery Center, Emory University School of Medicine.","Ivanov AA, Revennaugh B, Rusnak L, Gonzalez-Pecchi V, Mo X, Johns MA, Du Y, Cooper LAD, Moreno CS, Khuri FR, Fu H",,"NIH, NCI NIH HHS, National Cancer Institute, NCI NIH HHS, Winship Cancer Institute, NCI NIH HHS",11.0, +29216377,PeachVar-DB,0.993295565,PeachVar-DB,0.993295565,,0,1,http://hpc-bioinformatics.cineca.it/peach,301,Italy,"(44.4861,11.261)",no_wayback,2018-01-01,"Department of Agricultural Science (DISAA), University of Milan, Milan, Italy.","Cirilli M, Flati T, Gioiosa S, Tagliaferri I, Ciacciulli A, Gao Z, Gattolin S, Geuna F, Maggi F, Bottoni P, Rossini L, Bassi D, Castrignanò T, Chillemi G",,,5.0,Italy +29455297,PEP725,0.994008164,PEP725,0.994008164,Pan European Phenological database,0.893914139,1,http://www.pep725.eu,200,,,http://web.archive.org/web/20220901042002/http://www.pep725.eu/,2018-02-18,"Zentralanstalt für Meteorologie und Geodynamik, Vienna, Austria.","Templ B, Koch E, Bolmgren K, Ungersböck M, Paul A, Scheifinger H, Rutishauser T, Busto M, Chmielewski FM, Hájková L, Hodzić S, Kaspar F, Pietragalla B, Romero-Fresneda R, Tolvanen A, Vučetič V, Zimmermann K, Zust A",,,17.0,Austria +29487113,Panorama Public,0.976007561,Panorama Public,0.976007561,,0,1,http://panoramaweb.org/public.url,302,,"(47.6062,-122.3321)",no_wayback,2018-02-27,"From the ‡University of Washington, Seattle, Washington 98195.","Sharma V, Eckels J, Schilling B, Ludwig C, Jaffe JD, MacCoss MJ, MacLean B",,"NIAMS NIH HHS, NIGMS NIH HHS, HHS | NIH | National Institute of General Medical Sciences, NIGMS NIH HHS, HHS | NIH | National Institute of General Medical Sciences, NHGRI NIH HHS, HHS | NIH | National Human Genome Research Institute, NIGMS NIH HHS, HHS | NIH | National Institute of Arthritis and Musculoskeletal and Skin Diseases",62.0, +29657279,oncoNcRNA,0.992065489,oncoNcRNA,0.992065489,,0,1,http://rna.sysu.edu.cn/onconcrna,302,,"(39.9906,116.2887)",no_wayback,2017-02-07,"Key Laboratory of Gene Engineering of the Ministry of Education, GuangZhou 510275, China. wangzl6@mail2.sysu.edu.cn.","Wang ZL, Zhang XQ, Zhou H, Yang JH, Qu LH",,,0.0,China +29699484,PDZBase,0.993412837,PDZBase,0.993412837,,0,1,http://www.actrec.gov.in:8080/pdzscape,"HTTPConnectionPool(host='www.actrec.gov.in', port=8080): Max retries exceeded with url: /pdzscape (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20200208030308/http://actrec.gov.in:8080/pdzscape/,2018-04-25,"Integrated Biophysics and Structural Biology Lab, Advanced Centre for Treatment, Research and Education in Cancer (ACTREC), Tata Memorial Centre, Kharghar, Navi Mumbai, 410210, India.","Doshi J, Kuppili RR, Gurdasani S, Venkatakrishnan N, Saxena A, Bose K",,,1.0,India +29739837,NvERTx,0.99687469,NvERTx,0.99687469,Nematostella vectensis Embryogenesis and Regeneration Transcriptomics,0.891611405,1,http://nvertx.kahikai.org,200,,"(46.2022,6.1457)",http://web.archive.org/web/20221017074208/https://nvertx.kahikai.org/,2018-05-17,"Université Côte d'Azur, CNRS, INSERM, Institute for Research on Cancer and Aging, Nice (IRCAN), 06107 Nice, France.","Warner JF, Guerlais V, Amiel AR, Johnston H, Nedoncelle K, Röttinger E",,"Association pour la Recherche sur le Cancer, Ligue Contre le Cancer, Minist?re de l'Enseignement Sup?rieur et de la Recherche, Seventh Framework Programme, ATIP-Avenir, Fondation pour la Recherche M?dicale",18.0,France +29743053,PDXliver,0.993864596,PDXliver,0.993864596,,0,1,http://www.picb.ac.cn/PDXliver,301,China,"(39.9042,116.407)",no_wayback,2018-05-09,"School of Life Science and Technology, ShanghaiTech University, Shanghai, 201210, China.","He S, Hu B, Li C, Lin P, Tang WG, Sun YF, Feng FY, Guo W, Li J, Xu Y, Yao QL, Zhang X, Qiu SJ, Zhou J, Fan J, Li YX, Li H, Yang XR",,"the Projects from the Shanghai Science and Technology Commission, the National Natural Science Foundation of China, the State Key Program of National Natural Science of China, the National Natural Science Foundation of China, the Projects from the Shanghai Science and Technology Commission, ""Strategic Priority Research Program"" of the Chinese Academy of Sciences, the National Natural Science Foundation of China, the National Natural Science Foundation of China, National Key R&D Program of China, the National Natural Science Foundation of China, “Strategic Priority Research Program” of the Chinese Academy of Sciences, the National Natural Science Foundation of China, “Strategic Priority Research Program” of the Chinese Academy of Sciences, National Key R&D Program of China, ""Strategic Priority Research Program"" of the Chinese Academy of Sciences",7.0,China +29855811,P-PAL,0.997003108,P-PAL,0.997003108,Procura-PALavras,0.787817964,1,http://p-pal.di.uminho.pt/tools,200,,"(41.1496,-8.6110)",http://web.archive.org/web/20220622022748/http://p-pal.di.uminho.pt/tools,2018-08-01,"Human Cognition Lab, CIPsi, School of Psychology, University of Minho, Campus de Gualtar, 4710-057, Braga, Portugal. asoares@psi.uminho.pt.","Soares AP, Iriarte Á, de Almeida JJ, Simões A, Costa A, Machado J, França P, Comesaña M, Rauber A, Rato A, Perea M",,,5.0,Portugal +29913065,OptoBase,0.9975577,OptoBase,0.9975577,,0,1,http://www.optobase.org,301,,"(49.4542,11.0775)",http://web.archive.org/web/20220708203124/https://www.optobase.org/,2018-07-03,"Faculty of Biology , University of Freiburg , 79104 Freiburg , Germany.","Kolar K, Knobloch C, Stork H, Žnidarič M, Weber W",,Deutsche Forschungsgemeinschaft,36.0,Germany +29982280,PepBDB,0.992508531,PepBDB,0.992508531,Peptide Binding DataBase,0.660811494,1,http://huanglab.phys.hust.edu.cn/pepbdb,301,,,http://web.archive.org/web/20220617182616/http://huanglab.phys.hust.edu.cn/pepbdb/,2019-01-01,"Institute of Biophysics, School of Physics, Huazhong University of Science and Technology, Wuhan, Hubei, China.","Wen Z, He J, Tao H, Huang SY",,"National Key R&D Program of China, Huazhong University of Science and Technology, National Natural Science Foundation of China, National Key R&D Program of China",8.0,China +30115014,PdumBase,0.998220384,PdumBase,0.998220384,,0,1,http://pdumbase.gdcb.iastate.edu,"HTTPConnectionPool(host='pdumbase.gdcb.iastate.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2018-08-16,"Department of Genetics, Developmental and Cell Biology, Iowa State University, 503 Science Hall II, Ames, IA, 50011, USA.","Chou HC, Acevedo-Luna N, Kuhlman JA, Schneider SQ",,"National Science Foundation, Roy J. Carver Charitable Trust",6.0,United States +30134653,PADFrag,0.997416735,PADFrag,0.997416735,Pesticide And Drug Fragments,0.979545602,1,http://chemyang.ccnu.edu.cn/ccb/database/PADFrag,301,,"(39.9906,116.2887)",http://web.archive.org/web/20220803084521/http://chemyang.ccnu.edu.cn/ccb/database/PADFrag/,2018-09-06,"Key Laboratory of Pesticide & Chemical Biology, Ministry of Education, College of Chemistry , Central China Normal University , Wuhan 430079 , P.R. China.","Yang JF, Wang F, Jiang W, Zhou GY, Li CZ, Zhu XL, Hao GF, Yang GF",,"Foundation for the Author of National Excellent Doctoral Dissertation of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China",4.0,"China, China" +30203047,Pancan-meQTL,0.99280415,Pancan-meQTL,0.99280415,,0,1,http://bioinfo.life.hust.edu.cn/Pancan-meQTL,301,China,"(30.513,114.42)",no_wayback,2019-01-01,"Department of Epidemiology and Biostatistics, Key Laboratory of Environmental Health of Ministry of Education, School of Public Health, Tongji Medical College, Huazhong University of Science and Technology, Wuhan, Hubei 430030, PR China.","Gong J, Wan H, Mei S, Ruan H, Zhang Z, Liu C, Guo AY, Diao L, Miao X, Han L",,Cancer Prevention & Research Institute of Texas,18.0,China +30239681,PalmXplore,0.977348566,PalmXplore,0.977348566,,0,1,http://palmxplore.mpob.gov.my,200,,"(3.1412,101.6865)",http://web.archive.org/web/20220618092027/http://palmxplore.mpob.gov.my/,2018-01-01,"Advanced Biotechnology and Breeding Centre, Malaysian Palm Oil Board, No. 6, Persiaran Institusi, Bandar Baru Bangi, Kajang, Selangor, Malaysia.","Sanusi NSNM, Rosli R, Halim MAA, Chan KL, Nagappan J, Azizi N, Amiruddin N, Tatarinova TV, Low EL",,Malaysian Palm Oil Board,5.0,Malaysia +30335176,NucMap,0.996892318,NucMap,0.996892318,nucleosome positioning map,0.91362164,1,http://bigd.big.ac.cn/nucmap,301,China,"(39.96,116.298)",http://web.archive.org/web/20210515041648/https://bigd.big.ac.cn/nucmap/,2019-01-01,"Department of Health Sciences Research, Mayo Clinic, Jacksonville, FL 32224, USA.","Zhao Y, Wang J, Liang F, Liu Y, Wang Q, Zhang H, Jiang M, Zhang Z, Zhao W, Bao Y, Zhang Z, Wu J, Asmann YW, Li R, Xiao J",,"Chinese Academy of Sciences, National Key Research Program of China, Chinese Academy of Sciences, National Natural Science Foundation of China, National Development and Reform Commission of China, Chinese Academy of Sciences, National Natural Science Foundation of China",15.0,United States +30349509,PanGFR-HM,0.976677001,PanGFR-HM,0.976677001,,0,1,http://www.bioinfo.iicb.res.in/pangfr-hm,"HTTPConnectionPool(host='www.bioinfo.iicb.res.in', port=80): Max retries exceeded with url: /pangfr-hm (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20190419051757/http://www.bioinfo.iicb.res.in:80/pangfr-hm/,2018-10-08,"Structural Biology and Bioinformatics Division, CSIR-Indian Institute of Chemical Biology, Kolkata, India.","Chaudhari NM, Gautam A, Gupta VK, Kaur G, Dutta C, Paul S",,"Council of Scientific and Industrial Research, Department of Science and Technology, Ministry of Science and Technology",4.0,India +30354114,NR-DBIND,0.99506779,NR-DBIND,0.99506779,Nuclear Receptors Database Including Negative Data,0.977114015,1,http://nr-dbind.drugdesign.fr,200,France,"(50.6918,3.2021)",http://web.archive.org/web/20210307114955/http://nr-dbind.drugdesign.fr/,2018-11-06,"Laboratoire GBA, EA4627 , Conservatoire National des Arts et Métiers , 2 Rue Conté , 75003 Paris , France.","Réau M, Lagarde N, Zagury JF, Montes M",,Minist?re de l'Enseignement Sup?rieur et de la Recherche,3.0,France +30380106,OrthoInspector,0.992683411,OrthoInspector,0.992683411,,0,1,http://lbgi.fr/orthoinspectorv3,301,,"(48.5839,7.7455)",http://web.archive.org/web/20221008052517/https://www.lbgi.fr/orthoinspectorv3/,2019-01-01,"Department of Computer Science, ICube, UMR 7357, University of Strasbourg, CNRS, Fédération de Médecine Translationnelle de Strasbourg, Strasbourg, France.","Nevers Y, Kress A, Defosset A, Ripp R, Linard B, Thompson JD, Poch O, Lecompte O",,"Agence Nationale de la Recherche, Agence Nationale de la Recherche, Agence Nationale de la Recherche, Agence Nationale de la Recherche, Agence Nationale de la Recherche",13.0,France +30395323,pATLAS,0.996197641,pATLAS,0.996197641,,0,1,http://www.patlas.site,200,,"(38.7167,-9.1333)",http://web.archive.org/web/20221012073204/http://www.patlas.site/,2019-01-01,"Instituto de Microbiologia and Instituto de Medicina Molecular João Lobo Antunes, Faculdade de Medicina, Universidade de Lisboa, Av. Professor Egaz Moniz, 1649-028 Lisboa, Portugal.","Jesus TF, Ribeiro-Gonçalves B, Silva DN, Bortolaia V, Ramirez M, Carriço JA",,Oneida Nation Foundation,9.0,Portugal +30445567,OncoBase,0.998138845,OncoBase,0.998138845,,0,1,http://www.oncobase.biols.ac.cn,200,,"(39.9075,116.3972)",http://web.archive.org/web/20220615164838/http://www.oncobase.biols.ac.cn/,2019-01-01,"Key laboratory of Carcinogenesis and Translational Research (Ministry of Education/Beijing), Laboratory of Molecular Oncology, Peking University Cancer Hospital & Institute, Beijing 100142, China.","Li X, Shi L, Wang Y, Zhong J, Zhao X, Teng H, Shi X, Yang H, Ruan S, Li M, Sun ZS, Sun ZS, Zhan Q, Mao F",,"National 973 Program, National Natural Science Foundation of China, China Postdoctoral Science Foundation, National Key R&D Program of China, National Natural Science Foundation of China",15.0,China +30535146,OGOB,0.957013845,OGOB,0.957013845,Oomycete Gene Order Browser,0.890639731,1,http://ogob.ie,301,,"(51.5018,-0.1328)",http://web.archive.org/web/20210921080415/https://ogob.ie/,2019-01-01,"Genome Evolution Laboratory, Department of Biology, Maynooth University, Co. Kildare, Ireland.","McGowan J, Byrne KP, Fitzpatrick DA",,"Irish Research Council, Government of Ireland",5.0,Ireland +30684219,NeuroMuscleDB,0.997726917,NeuroMuscleDB,0.997726917,,0,1,http://yu-mbl-muscledb.com/NeuroMuscleDB,301,,"(37.5660,126.9784)",http://web.archive.org/web/20220615141357/http://yu-mbl-muscledb.com/NeuroMuscleDB/,2019-01-25,"Department of Medical Biotechnology, Yeungnam University, Gyeongsan, 38541, Republic of Korea.","Baig MH, Rashid I, Srivastava P, Ahmad K, Jan AT, Rabbani G, Choi D, Barreto GE, Ashraf GM, Lee EJ, Choi I",,,6.0, +30733462,PFDB,0.996777177,PFDB,0.996777177,protein folding kinetics database,0.818020368,1,http://lee.kias.re.kr,302,,"(36.3638,127.355)",no_wayback,2019-02-07,"School of Computational Sciences, Korea Institute for Advanced Study (KIAS), Seoul, Korea.","Manavalan B, Kuwajima K, Lee J",,"Korea Science and Engineering Foundation, National Research Foundation of Korea, MEXT | Japan Society for the Promotion of Science",5.0, +30760842,OpSatdb,0.978444099,OpSatdb,0.978444099,,0,1,http://ssr.icar.gov.in/index.php,302,,"(28.6109,77.1792)",no_wayback,2019-02-13,"ICAR-Indian Institute of Oil Palm Research, Pedavegi-534 450, West Godavari (Dt), Andhra Pradesh, India. B.Babu@icar.gov.in.","B KB, K L MR, Sahu S, Mathur RK, P NK, G R, P A, H P B",,,8.0,India +30874591,PAmiRDB,0.994146883,PAmiRDB,0.994146883,,0,1,http://bioinfo.icgeb.res.in/pamirdb,301,,"(28.6519,77.2315)",http://web.archive.org/web/20221008141405/http://bioinfo.icgeb.res.in/pamirdb/,2019-03-15,"Translational Bioinformatics Group, International Centre For Genetic Engineering and Biotechnology, New Delhi, 110067, India.","Satish D, Mukherjee SK, Gupta D",,,4.0,India +31086734,OsteoporosAtlas,0.993293166,OsteoporosAtlas,0.993293166,,0,1,http://biokb.ncpsb.org/osteoporosis,301,Hong Kong,"(22.2908,114.1501)",http://web.archive.org/web/20190426171509/http://biokb.ncpsb.org/osteoporosis/,2019-04-26,"State Key Laboratory of Proteomics, Beijing Proteome Research Center, National Center for Protein Sciences, Beijing Institute of Lifeomics, Beijing, China.","Wang X, Diao L, Diao L, Sun D, Wang D, Zhu J, He Y, Liu Y, Xu H, Zhang Y, Liu J, Wang Y, He F, Li Y, Li D",,"Innovation Project, State Key Laboratory of Proteomics, National Natural Science Foundation of China, Program of Precision Medicine, Beijing Nova Program, National Natural Science Foundation of China",3.0,China +31160594,PathoPhenoDB,0.995027721,PathoPhenoDB,0.995027721,henomebrowser,0.623123646,1,http://patho.phenomebrowser.net,200,,"(37.5331,-122.2486)",http://web.archive.org/web/20220516153606/http://patho.phenomebrowser.net/,2019-06-03,"Computer, Electrical and Mathematical Sciences & Engineering with Division, Computational Bioscience Research Center, King Abdullah University of Science and Technology, Thuwal, 23955, Saudi Arabia.","Kafkas Ş, Abdelhakim M, Hashish Y, Kulmanov M, Abdellatif M, Schofield PN, Hoehndorf R",,"King Abdullah University of Science and Technology (KAUST), King Abdullah University of Science and Technology, King Abdullah University of Science and Technology (KAUST), King Abdullah University of Science and Technology",5.0,Saudi Arabia +31195415,Onkonet,0.994386017,Onkonet,0.994386017,,0,1,http://www.prostata-ca.net,302,,"(50.1155,8.6842)",no_wayback,2019-06-13,"Charité Universitätsmedizin Berlin, Klinik für Urologie, Berlin.","Pohle M, Magheli A, Diederichs W, Ecke T, Fischer T, Kempkensteffen C, Knispel H, Lehsnau M, Miller K, Pretzer J, Schostak M, Winter A, Zacharias M, Hinz S",,,0.0, +31259547,PerMM,0.995338023,PerMM,0.995338023,,0,1,http://permm.phar.umich.edu,301,United States,"(37.721,-122.391)",http://web.archive.org/web/20220617150744/https://permm.phar.umich.edu/,2019-07-01,"Department of Medicinal Chemistry, College of Pharmacy , University of Michigan , 428 Church Street , Ann Arbor , Michigan 48109-1065 , United States.","Lomize AL, Hage JM, Schnitzer K, Golobokov K, LaFaive MB, Forsyth AC, Pogozheva ID",,"National Institute on Drug Abuse, NIDA NIH HHS",8.0,United States +31373607,Pan Immune Repertoire Database,0.967511244,PIRD,0.95986867,Pan Immune Repertoire Database,0.967511244,1,http://db.cngb.org/pird,301,,"(37.3394,-121.8950)",http://web.archive.org/web/20221010071600/http://db.cngb.org/pird/,2020-02-01,"BGI-Shenzhen, Shenzhen 518083, China.","Zhang W, Wang L, Liu K, Wei X, Yang K, Du W, Wang S, Guo N, Ma C, Luo L, Wu J, Lin L, Yang F, Gao F, Wang X, Li T, Zhang R, Saksena NK, Yang H, Wang J, Fang L, Hou Y, Xu X, Liu X",,"Science, Technology and Innovation Commission of Shenzhen Municipality, Science, Technology and Innovation Commission of Shenzhen Municipality, Shenzhen Municipal Government of China",10.0,China +31410488,ncRNA-eQTL,0.982337432,ncRNA-eQTL,0.982337432,,0,1,http://ibi.hzau.edu.cn/ncRNA-eQTL,301,,"(30.5833,114.2667)",http://web.archive.org/web/20221102060832/http://ibi.hzau.edu.cn/ncRNA-eQTL/,2020-01-01,"Hubei Key Laboratory of Agricultural Bioinformatics, College of Informatics, Huazhong Agricultural University, Wuhan 430070, P.R. China.","Li J, Xue Y, Amin MT, Yang Y, Yang J, Zhang W, Yang W, Niu X, Zhang HY, Gong J",,Huazhong Agricultural University Scientific & Technological Self-innovation Foundation,15.0,China +31566225,OGRDB,0.995836377,OGRDB,0.995836377,Germline Receptor Database,0.822012579,1,http://ogrdb.airr-community.org,301,,"(37.3924,-121.9623)",http://web.archive.org/web/20221016225551/https://ogrdb.airr-community.org/,2020-01-01,"Institute of Structural and Molecular Biology, Birkbeck College, University of London, London WC1E 7HX, UK.","Lees W, Busse CE, Corcoran M, Ohlin M, Scheepers C, Matsen FA, Yaari G, Watson CT, , Collins A, Shepherd AJ",,"NIAID NIH HHS, National Institutes of Health, NIAID NIH HHS, National Institutes of Health, NIAID NIH HHS, National Institutes of Health, Swedish Research Council",6.0, +31584086,PGG.Han,0.980565399,PGG.Han,0.980565399,,0,1,"http://www.pgghan.org, http://www.hanchinesegenomes.org","301, 301","China, China","(39.96,116.298), (39.96,116.298)","no_wayback, http://web.archive.org/web/20221024112340/https://www.hanchinesegenomes.org/",2020-01-01,"Key Laboratory of Computational Biology, Bio-Med Big Data Center, CAS-MPG Partner Institute for Computational Biology, Shanghai Institute of Nutrition and Health, Shanghai Institutes for Biological Sciences, University of Chinese Academy of Sciences, Chinese Academy of Sciences, Shanghai 200031, China.","Gao Y, Zhang C, Yuan L, Ling Y, Wang X, Liu C, Pan Y, Zhang X, Ma X, Wang Y, Lu Y, Yuan K, Ye W, Qian J, Chang H, Cao R, Yang X, Ma L, Ju Y, Dai L, Tang Y, , Zhang G, Xu S",,"National Natural Science Foundation of China, Key Research Program of Frontier Sciences, Program of Shanghai Academic Research Leaders, National Natural Science Foundation of China, Chinese Academy of Sciences, Shanghai Municipal Science and Technology Major Project, National Natural Science Foundation of China, Strategic Priority Research Program, National Science Fund for Distinguished Young Scholars, National Natural Science Foundation of China, UK Royal Society-Newton Advanced Fellowship, National Key Research and Development Program, Zhangjiang Special Project of the National Innovation Demonstration Zone",15.0,China +31584092,PDBe-KB,0.995546019,PDBe-KB,0.995546019,Protein Data Bank in Europe-Knowledge Base,0.907250391,1,http://pdbe-kb.org,301,United Kingdom,"(52.1929,0.1256)",no_wayback,2020-01-01,None,,,"Hermesfonds for ELIXIR Belgium, Biotechnology and Biological Sciences Research Council, European Regional Development Fund, NIH, Wellcome Trust Strategic Awards, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust Strategic Awards, Biotechnology and Biological Sciences Research Council, SIFTS, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Medical Research Council, Czech Science Foundation, NIH, Wellcome Trust, Biotechnology and Biological Sciences Research Council, India Partnering Award, Biotechnology and Biological Sciences Research Council, Wellcome Trust, AIRC, ELIXIR CZ Research Infrastructure Project, Wellcome Trust Strategic Awards, Wellcome Trust Strategic Awards, Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, NCI NIH HHS, Wellcome Trust Strategic Awards, Biotechnology and Biological Sciences Research Council, Cancer Research UK, Biotechnology and Biological Sciences Research Council, Wellcome Trust Strategic Awards, Engineering and Physical Sciences Research Council, Research Foundation Flanders, British Heart Foundation",42.0, +31612943,OHNOLOGS,0.941329122,OHNOLOGS,0.941329122,,0,1,http://ohnologs.curie.fr,200,,"(48.8428,2.3525)",http://web.archive.org/web/20220926233544/http://ohnologs.curie.fr/,2020-01-01,"Institut Curie, Research Center, CNRS UMR168, PSL Research University, 26 rue d'Ulm, 75005, Paris, France.","Singh PP, Isambert H",,"La Ligue Contre le Cancer, Erasmus Mundus",16.0,France +31629694,PerMemDB,0.997093499,PerMemDB,0.997093499,,0,1,http://bioinformatics.biol.uoa.gr/db=permemdb,301,,,no_wayback,2019-10-17,"Section of Cell Biology and Biophysics, Department of Biology, National and Kapodistrian University of Athens, Panepistimiopolis, Athens 15701, Greece.","Nastou KC, Tsaousis GN, Iconomidou VA",,National and Kapodistrian University of Athens,2.0,Greece +31637139,ncRNA2MetS,0.990252088,ncRNA2MetS,0.990252088,metabolic syndrome-associated non,0.63908056,1,http://www.biomed-bigdata.com:50020/index.html,"HTTPConnectionPool(host='www.biomed-bigdata.com', port=50020): Max retries exceeded with url: /index.html (Caused by ConnectTimeoutError(, 'Connection to www.biomed-bigdata.com timed out. (connect timeout=5)'))",,,no_wayback,2019-10-15,"School of Software and Microelectronics, Harbin University of Science and Technology, Harbin, Heilongjiang, China.","Yao D, Zhan X, Zhan X, Kwoh CK, Sun Y",,"Youth Innovative Talents Training Program for Universities of Heilongjiang Province, Harbin Science and Technology Innovation Talents Research Project, China Scholarship Council",0.0,China +31640730,NARD,0.994531229,NARD,0.994531229,Northeast Asian Reference Database,0.981074795,1,http://nard.macrogen.com,301,,"(37.6564,126.8350)",http://web.archive.org/web/20220524180534/https://nard.macrogen.com/,2019-10-22,"Precision Medicine Center, Seoul National University Bundang Hospital, 172 Dolma-ro, Seongnam, Bundang-gu, Gyeonggi-do, 13605, Republic of Korea.","Yoo SK, Kim CU, Kim HL, Kim S, Shin JY, Kim N, Yang JSW, Lo KW, Cho B, Matsuda F, Schuster SC, Kim C, Kim JI, Seo JS",,"Research Grant Council, Hong Kong, Japan Agency for Medical Research and Development",8.0, +31647099,Pathway Commons,0.995383561,Pathway Commons,0.995383561,,0,1,http://www.pathwaycommons.org,200,Canada,"(43.8137,-79.4531)",http://web.archive.org/web/20221005233317/http://www.pathwaycommons.org/,2020-01-01,"The Donnelly Centre, University of Toronto, Toronto, Ontario M5S 3E1, Canada.","Rodchenkov I, Babur O, Luna A, Aksoy BA, Wong JV, Fong D, Franz M, Siper MC, Cheung M, Wrana M, Mistry H, Mosier L, Dlin J, Wen Q, O'Callaghan C, Li W, Elder G, Smith PT, Dallago C, Cerami E, Gross B, Dogrusoz U, Demir E, Bader GD, Sander C",,"National Institutes of Health, NHGRI NIH HHS, DARPA, NCI NIH HHS, NHGRI NIH HHS",71.0,Canada +31662803,ODIAC,0.977021754,ODIAC,0.977021754,Open-source Data Inventory for Anthropogenic CO2,0.870113115,1,http://db.cger.nies.go.jp/dataset/ODIAC,301,,"(36.2000,140.1000)",http://web.archive.org/web/20220625110253/https://db.cger.nies.go.jp/dataset/ODIAC/,2018-01-18,,,,,0.0, +31724725,oRNAment,0.992074907,oRNAment,0.992074907,,0,1,http://rnabiology.ircm.qc.ca/oRNAment,302,,"(45.5088,-73.5878)",no_wayback,2020-01-01,"Institut de Recherches Cliniques de Montréal (IRCM) Montréal, Québec, Canada.","Benoit Bouvrette LP, Bovaird S, Blanchette M, Lécuyer E",,"CIHR, Canadian Institutes of Health Research, Fonds de Recherche Québec – Santé, Fonds de Recherche Québec – Nature et Technologies",23.0,Canada +31725861,PCOSBase,0.996945679,PCOSBase,0.996945679,,0,1,http://pcosbase.org,301,Singapore,"(1.32123,103.695)",http://web.archive.org/web/20220419152923/https://pcosbase.org/,2017-01-01,Institute of Systems Biology (INBIOSIS).,"Afiqah-Aleng N, Harun S, A-Rahman MRA, Nor Muhammad NA, Mohamed-Hussein ZA",,"Kementerian Sains, Teknologi dan Inovasi",1.0, +31733062,ParameciumDB,0.996724844,ParameciumDB,0.996724844,,0,1,http://paramecium.i2bc.paris-saclay.fr,301,France,"(48.7046,2.13236)",http://web.archive.org/web/20221016212324/https://paramecium.i2bc.paris-saclay.fr/,2020-01-01,"I2BC, Institute of Integrative Biology of the Cell, UMR9198, CNRS, CEA, Univ Paris-Sud, Université Paris-Saclay, 91198 Gif-sur-Yvette, France.","Arnaiz O, Meyer E, Sperling L",,"Agence Nationale de la Recherche, CNRS, Agence Nationale de la Recherche",8.0,France +31733064,PathDIP,0.994651794,PathDIP,0.994651794,,0,1,http://ophid.utoronto.ca/pathDIP,302,,"(43.7001,-79.4163)",http://web.archive.org/web/20221012102606/https://ophid.utoronto.ca/pathDIP/,2020-01-01,"Krembil Research Institute, University Health Network, Toronto, ON M5T 0S8, Canada.","Rahmati S, Abovsky M, Pastrello C, Kotlyar M, Lu R, Cumbaa CA, Rahman P, Chandran V, Jurisica I",,"Atlantic Canada Opportunities Agency, Ian Lawson Van Toch Memorial Fund, Canada Foundation for Innovation, Natural Sciences Research Council, IBM, Ontario Research Foundation, Canada Foundation for Innovation",13.0,Canada +31831730,OPD,0.9961472,OPD,0.9961472,Odonate Phenotypic Database,0.971511943,1,http://www.odonatephenotypicdatabase.org,301,,"(50.1025,8.6299)",no_wayback,2019-12-12,"Department of Biology, Lund University, SE-223 62, Lund, Sweden.","Waller JT, Willink B, Tschol M, Svensson EI",,"Funding above to E.I. S. In addition, B.W. received funding from a Faculty Mobility grant from the University of Costa Rica and a grant from the Schlumberger Foundation., Stiftelsen Olle Engkvist Byggmästare",3.0,Sweden +31950190,PCaLiStDB,0.995350957,PCaLiStDB,0.995350957,PCa,0.603868067,1,http://www.sysbio.org.cn/pcalistdb,301,Hong Kong,"(22.2908,114.1501)",http://web.archive.org/web/20220617195109/http://www.sysbio.org.cn/pcalistdb/,2020-01-01,"Center for Systems Biology, Soochow University, Suzhou 215006, China.","Chen Y, Liu X, Yu Y, Yu C, Yang L, Lin Y, Xi T, Ye Z, Feng Z, Shen B",,"National Natural Science Foundation of China, National Key Research and Development Program of China, Natural Science Foundation of the Jiangsu Higher Education Institutions of China",4.0,China +32090261,NipahVR,0.995369494,NipahVR,0.995369494,,0,1,http://bioinfo.imtech.res.in/manojk/nipahvr,404,,,no_wayback,2020-01-01,,,,,0.0, +32103267,PDIR,0.982636213,PDIR,0.982636213,Predicted Drosophila Interactome Resource,0.975733578,1,http://drosophila.biomedtzc.cn,200,China,"(30.2994,120.1612)",http://web.archive.org/web/20220709111835/http://drosophila.biomedtzc.cn/,2020-01-01,"Institute of Big Data and Artificial Intelligence in Medicine, School of Electronics and Information Engineering, Taizhou University, 1139 Shifu Avenue, Taizhou 318000, China.","Ding XB, Jin J, Tao YT, Guo WP, Ruan L, Yang QL, Chen PC, Yao H, Zhang HB, Chen X",,"National Natural Science Foundation of China, Breeding program of Taizhou University, National Natural Science Foundation of China",2.0,China +32105730,ncEP,0.997201324,ncEP,0.997201324,,0,1,http://www.jianglab.cn/ncEP,302,,"(30.2936,120.1614)",http://web.archive.org/web/20220617131718/http://www.jianglab.cn/ncEP/,2020-02-24,"College of Automation Engineering, Nanjing University of Aeronautics and Astronautics, Nanjing, 211106, China.","Liu H, Zhou X, Yuan M, Zhou S, Huang YE, Hou F, Song X, Wang L, Jiang W",,"National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities, National Natural Science Foundation of China",10.0,China +32111231,NoncoRNA,0.997252762,NoncoRNA,0.997252762,,0,1,http://www.ncdtcdb.cn:8080/NoncoRNA,302,,,http://web.archive.org/web/20220616023404/http://www.ncdtcdb.cn:8080/NoncoRNA/,2020-02-28,"Department of Neurosurgery, the Second Affiliated Hospital of Harbin Medical University, Neuroscience Institute, Heilongjiang Academy of Medical Sciences, Harbin, 150086, China.","Li L, Wu P, Wang Z, Meng X, Zha C, Li Z, Qi T, Zhang Y, Han B, Li S, Jiang C, Zhao Z, Cai J",,"The Research Project of the Health and Family Planning Commission of Heilongjiang Province, China Postdoctoral Science Foundation, Heilongjiang Postdoctoral Science Foundation, The Research Project of the Chinese Society of Neuro-oncology, CACA, National Natural Science Foundation of China, National Natural Science Foundation of China",15.0,China +32117995,Nc2Eye,0.997420222,Nc2Eye,0.997420222,,0,1,http://nc2eye.bio-data.cn,200,,"(22.2783,114.1747)",http://web.archive.org/web/20220616011133/http://nc2eye.bio-data.cn/,2020-02-14,"School of Biomedical Engineering, School of Ophthalmology and Optometry and Eye Hospital, Wenzhou Medical University, Wenzhou, China.","Zhang Y, Xue Z, Guo F, Yu F, Xu L, Chen H",,"Eye Hospital Wenzhou Medical University, National Natural Science Foundation of China",2.0,China +32122231,ncRPheno,0.996450782,ncRPheno,0.996450782,Online Mendelian Inheritance in Man,0.861822203,1,http://lilab2.sysu.edu.cn/ncrpheno,"HTTPConnectionPool(host='lilab2.sysu.edu.cn', port=80): Max retries exceeded with url: /ncrpheno (Caused by ConnectTimeoutError(, 'Connection to lilab2.sysu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220618045454/http://lilab2.sysu.edu.cn/ncrpheno/,2020-03-26,"Zhongshan School of Medicine, Sun Yat-sen University , Guangzhou, China.","Zhang W, Yao G, Wang J, Yang M, Wang J, Zhang H, Li W",,"National Key R&D Program of China, Key Technology Research and Development Program of Shandong, National Natural Science Foundation of China",9.0,China +32324748,OdoBD,0.832681954,OdoBD,0.832681954,,0,1,http://www.odobd.org,406,,,http://web.archive.org/web/20220413144730/http://odobd.org/,2020-04-23,"Department of Biochemistry and Molecular Biology, Shahjalal University of Science and Technology, Sylhet, Bangladesh.","Shah MNA, Khan MK",,"Rufford Foundation, Explorers Club",0.0,Bangladesh +32393257,MyoMiner,0.994202495,MyoMiner,0.994202495,,0,1,http://www.sys-myo.com/myominer,301,,"(37.9838,23.7278)",http://web.archive.org/web/20220710032558/https://www.sys-myo.com/myominer/,2020-05-11,,,,,0.0, +32404014,PDB-2-PBv3.0,0.936054283,PDB-2-PBv3.0,0.936054283,,0,1,http://bioinfo.bdu.ac.in/pb3,301,India,"(12.8996,80.2209)",no_wayback,2020-04-01,"Department of Bioinformatics, School of Life Sciences, Bharathidasan University, Tiruchirappalli 620 024, Tamil Nadu, India.","Karuppasamy MP, Venkateswaran S, Subbiah P",,the DST-PURSE,4.0,India +32451429,MyomirDB,0.99731797,MyomirDB,0.99731797,,0,1,http://www.myomirdb.in,200,,"(13.2257,77.5750)",no_wayback,2020-05-25,"Defence Institute of Physiology and Allied Sciences (DIPAS), Defence R&D Organization (DRDO), Timarpur, Delhi, India.","Gupta A, Srivastava S, Suryakumar G, Kumar B, Khurana P",,,0.0,India +32487016,ncRI,0.994199097,ncRI,0.994199097,,0,1,http://www.jianglab.cn/ncRI,302,,"(30.2936,120.1614)",http://web.archive.org/web/20220615201111/http://www.jianglab.cn/ncRI/,2020-06-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, 150081, China.","Wang S, Zhou S, Liu H, Meng Q, Ma X, Liu H, Wang L, Jiang W",,"Fundamental Research Funds for the Central University, National Natural Science Foundation of China, National Natural Science Foundation of China, Fundamental Research Funds for the Provincial Universities",1.0,China +32556221,OMEGA-NET,0.988661706,OMEGA-NET,0.988661706,,0,1,http://occupationalcohorts.net,301,,"(41.3888,2.1590)",http://web.archive.org/web/20220617124131/https://occupationalcohorts.net/,2020-07-01,"Barcelona Institute for Global Health (ISGlobal), Barcelona, Spain.","Kogevinas M, Schlünssen V, Mehlum IS, Turner MC",,"European Cooperation in Science and Technology, Spanish Ministry of Science, Innovation and Universities, European Cooperation in Science and Technology, European Social Fund, Ramón y Cajal",2.0,Spain +32597311,OncotRF,0.998194695,OncotRF,0.998194695,,0,1,http://bioinformatics.zju.edu.cn/OncotRF,301,,"(30.2936,120.1614)",http://web.archive.org/web/20221102055303/http://bioinformatics.zju.edu.cn/OncotRF/,2020-06-28,"Center for Uterine Cancer Diagnosis & Therapy Research of Zhejiang Province, Women's Reproductive Health Key Laboratory of Zhejiang Province, Department of Gynecologic Oncology, Women's Hospital and Institute of Translational Medicine, School of Medicine, Zhejiang University , Hangzhou, Zhejiang, China.","Yao D, Sun X, Zhou L, Amanullah M, Pan X, Liu Y, Liang M, Liu P, Lu Y",,"National Natural Science Foundation of China, Medical Health Science and Technology Key Project of Zhejiang Provincial Health Commission, Key Program of Zhejiang Provincial Natural Science Foundation of China, National Key Research and Development Program of China",12.0,China +32693783,ORDER,0.97873052,ORDER,0.97873052,Oilseed Rape Developmental Expression Resource,0.972284428,1,http://order.jic.ac.uk,302,,"(52.6278,1.2983)",http://web.archive.org/web/20220720032029/http://order.jic.ac.uk/,2020-07-21,"Crop Genetics, John Innes Centre, Norwich Research Park, Norwich, NR4 7UH, UK.","Jones DM, Olson TSG, Pullen N, Wells R, Irwin JA, Morris RJ",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",1.0, +32780568,OpenProt,0.995658994,OpenProt,0.995658994,,0,1,http://openprot.org,301,,"(45.4001,-71.8991)",http://web.archive.org/web/20221017034150/https://openprot.org/,2020-09-01,"Department of Biochemistry and Functional Genomics, Université de Sherbrooke, Sherbrooke, Québec, Canada.","Brunet MA, Lekehal AM, Roucou X",,,1.0,Canada +32810235,PCAT,0.967368126,PCAT,0.967368126,PDX for Childhood Cancer Therapeutics,0.790382832,1,"http://pcat.zhenglab.info, http://www.pedtranscriptome.org","403, 200",United States,", (29.4614,-98.7468)","no_wayback, http://web.archive.org/web/20221007092620/http://pedtranscriptome.org/",2021-01-01,"Greehey Children's Cancer Research Institute, University of Texas Health at San Antonio, San Antonio, TX 78229, USA.","Yang J, Li Q, Noureen N, Fang Y, Kurmasheva R, Houghton PJ, Wang X, Zheng S",,"Greehey Children’s Cancer Research Institute, National Cancer Institute, Cancer Prevention and Research Institute of Texas, Cancer Prevention and Research Institute of Texas",0.0,United States +32895427,PCOSKBR2,0.997740662,PCOSKBR2,0.997740662,PolyCystic Ovary Syndrome KnowledgeBase,0.975534484,1,http://www.pcoskb.bicnirrh.res.in,403,,,http://web.archive.org/web/20220809232412/http://pcoskb.bicnirrh.res.in/,2020-09-07,"Biomedical Informatics Center, Indian Council of Medical Research-National Institute for Research in Reproductive Health, Mumbai, 400012, India.","Sharma M, Barai RS, Kundu I, Bhaye S, Pokar K, Idicula-Thomas S",,"Department of Biotechnology, Ministry of Science and Technology, India, Department of Health Research, India",3.0,India +32974523,Ocins,0.64978832,Ocins,0.64978832,,0,1,http://ocins.cftri.com/ocins,301,,"(14.1875,77.6267)",http://web.archive.org/web/20191020164352/http://ocins.cftri.com:80/ocins/,2019-06-13,"Department of Protein Chemistry and Technology, CSIR-CFTRI, Mysore, Karnataka 570020, India.","Choyam S, Psn S, Pandey R, Kammara R",,,0.0,India +33045747,Open Targets Genetics,0.843843058,Open Targets Genetics,0.843843058,Targets,0.592151999,1,http://genetics.opentargets.org,301,,"(39.0997,-94.5786)",http://web.archive.org/web/20221104212438/https://genetics.opentargets.org/,2021-01-01,"Wellcome Sanger Institute, Wellcome Genome Campus, Hinxton, Cambridgeshire CB10 1SA, UK.","Ghoussaini M, Mountjoy E, Carmona M, Peat G, Schmidt EM, Hercules A, Fumis L, Miranda A, Carvalho-Silva D, Buniello A, Burdett T, Hayhurst J, Baker J, Ferrer J, Gonzalez-Uriarte A, Jupp S, Karim MA, Koscielny G, Machlitt-Northen S, Malangone C, Pendlington ZM, Roncaglia P, Suveges D, Wright D, Vrousgou O, Papa E, Parkinson H, MacArthur JAL, Todd JA, Barrett JC, Schwartzentruber J, Hulcoop DG, Ochoa D, McDonagh EM, Dunham I",,JDRF,37.0, +33080028,Peryton,0.997871995,Peryton,0.997871995,,0,1,http://dianalab.e-ce.uth.gr/peryton,301,Greece,"(39.3616,22.9414)",no_wayback,2021-01-01,"Department of Electrical & Computer Engineering, Univ. of Thessaly, Volos 38221, Greece.","Skoufos G, Kardaras FS, Alexiou A, Kavakiotis I, Lambropoulou A, Kotsira V, Tastsoglou S, Hatzigeorgiou AG",,"ELIXIR-GR: The Greek Research Infrastructure for Data Management and Analysis in Life Sciences, European Regional Development Fund, Human Resources Development, Education and Lifelong Learning, Stavros Niarchos Foundation, Competitiveness, Entrepreneurship and Innovation",3.0,Greece +33103271,NanDeSyn,0.989316845,NanDeSyn,0.989316845,Nannochloropsis Design and Synthesis,0.926788456,1,http://nandesyn.single-cell.cn,308,,"(39.9075,116.3972)",http://web.archive.org/web/20220709205335/https://nandesyn.single-cell.cn/,2020-11-27,"Single-Cell Center, CAS Key Laboratory of Biofuels and Shandong Key Laboratory of Energy Genetics, Shandong Institute of Energy Research, Qingdao Institute of BioEnergy and Bioprocess Technology (QIBEBT), Chinese Academy of Sciences, Qingdao, Shandong, 266101, China.","Gong Y, Kang NK, Kim YU, Wang Z, Wei L, Xin Y, Shen C, Wang Q, You W, Lim JM, Jeong SW, Park YI, Oh HM, Pan K, Poliner E, Yang G, Li-Beisson Y, Li Y, Hu Q, Poetsch A, Farre EM, Chang YK, Jeong WJ, Jeong BR, Xu J",,"Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China-Guangdong Joint Fund, Natural Science Foundation of Shandong Province",2.0,China +33245774,PAGER-CoV,0.984723121,PAGER-CoV,0.984723121,,0,1,http://discovery.informatics.uab.edu/PAGER-CoV,301,,"(33.5207,-86.8025)",http://web.archive.org/web/20220116225721/http://discovery.informatics.uab.edu/PAGER-COV/,2021-01-01,"Informatics Institute, School of Medicine, The University of Alabama at Birmingham, Birmingham, AL 35223, USA.","Yue Z, Zhang E, Xu C, Khurana S, Batra N, Dang SDH, Cimino JJ, Chen JY",,"NCI NIH HHS, National Cancer Institute, Center for Clinical and Translational Science, The University of Alabama at Birmingham, NCATS NIH HHS",6.0,United States +33247934,OGDA,0.98520869,OGDA,0.98520869,Organelle Genome Database for Algae,0.973292843,1,http://ogda.ytu.edu.cn,303,,"(31.2222,121.4581)",no_wayback,2020-11-01,"College of Life Sciences, Yantai University, No.30 Qingquan Road, Laishan District, Yantai, 264005, Shandong, P.R. China.","Liu T, Cui Y, Jia X, Zhang J, Li R, Yu Y, Jia S, Qu J, Wang X",,"the Chinese Universities Scientific Fund, the Natural Science Foundation of Shandong Province, Top Talent Program of The Yantai University, China-ASEAN Maritime Cooperation Fund",1.0,China +33275967,ncRNAVar,0.998307347,ncRNAVar,0.998307347,,0,1,http://www.liwzlab.cn/ncrnavar,302,,"(29.4159,121.3397)",http://web.archive.org/web/20220526183442/http://liwzlab.cn/ncrnavar/,2020-12-01,"Zhongshan School of Medicine, Sun Yat-sen University, Guangzhou 510080, China.","Zhang W, Zeng B, Yang M, Yang H, Wang J, Deng Y, Zhang H, Yao G, Wu S, Li W",,"Fundamental Research Funds for the Central Universities, National Natural Science Foundation of China, National Key Research and Development Program of China",4.0,China +33294866,Pancreatlas,0.996841073,Pancreatlas,0.996841073,,0,1,http://www.pancreatlas.org,302,,"(33.7490,-84.3880)",http://web.archive.org/web/20221017072150/https://www.pancreatlas.org/,2020-10-05,"Division of Diabetes, Endocrinology, and Metabolism, Department of Medicine, Vanderbilt University Medical Center, Nashville, TN, USA.","Saunders DC, Messmer J, Kusmartseva I, Beery ML, Yang M, Atkinson MA, Powers AC, Cartailler JP, Brissova M",,"Human Islet Research Network, Department of Veterans Affairs, NIDDK, NIDDK, NIDDK, NIDDK, NIDDK, NIDDK, NIDDK",3.0,United States +33304468,PDmethDB,0.997210741,PDmethDB,0.997210741,Parkinson's Disease Methylation Database,0.937413712,1,http://ageing.shinyapps.io/pdmethdb,301,United States,"(39.0438,-77.4874)",no_wayback,2020-11-20,"Cancer Centre, Centre of Reproduction, Development and Aging, Faculty of Health Sciences, University of Macau, Macau S.A.R., China.","Wang C, Chen L, Zhang M, Yang Y, Wong G",,"National Natural Science Foundation of China, Shantou University, Universidade de Macau, Li Ka Shing Foundation",1.0,China +33306802,NPBS,0.996700525,NPBS,0.996700525,Natural Products & Biological Sources,0.790207267,1,http://www.organchem.csdb.cn/scdb/NPBS,301,China,"(39.9042,116.407)",no_wayback,2020-12-01,"Shanghai Institute of Organic Chemistry, Chinese Academy of Sciences, 345 LingLing Road, Shanghai 200032, China.","Xu T, Chen W, Zhou J, Dai J, Li Y, Zhao Y",,"National Natural Science Foundation of China, SGST, CSDB",4.0,China +33359127,NBIGV,0.983493745,NBIGV,0.983493745,,0,1,http://nbigv.org,200,,"(34.0443,-118.2509)",http://web.archive.org/web/20211129061243/http://www.nbigv.org/,2020-12-23,"Department of Immunology, School of Basic Medical Sciences, Peking University, Beijing 100191, China; Key Laboratory of Immunology, National Health Commission, Beijing 100191, China.","Zhang C, Xiao L, Huang Y, Zhang L, Jiang D, Shao W, Zheng J, Hu F, Chu M, Huang J, Gong X, Zhou Y, Qiu X",,"National Natural Science Foundation of China, Peking University, Chinese Academy of Medical Sciences",1.0,"China, China" +33361798,Open Cancer TherApeutic Discovery,0.961413613,OCTAD,0.955712438,Open Cancer TherApeutic Discovery,0.961413613,1,http://octad.org,200,,"(45.8399,-119.7006)",http://web.archive.org/web/20221031110732/http://octad.org/,2020-12-23,"Department of Pediatrics and Human Development, Michigan State University, Grand Rapids, MI, USA.","Zeng B, Glicksberg BS, Newbury P, Chekalin E, Xing J, Liu K, Wen A, Chow C, Chen B",,"NCATS NIH HHS, U.S. Department of Health & Human Services | NIH | National Center for Advancing Translational Sciences, U.S. Department of Health & Human Services | NIH | National Center for Advancing Translational Sciences (NCATS), U.S. Department of Health & Human Services | NIH | National Institute of General Medical Sciences, NIEHS NIH HHS, NIGMS NIH HHS, U.S. Department of Health & Human Services | NIH | National Institute of Environmental Health Sciences, U.S. Department of Health & Human Services | NIH | National Institute of General Medical Sciences (NIGMS)",8.0,United States +33363449,PASS,0.929875195,PASS,0.929875195,,0,1,http://musaelab.ca/pass-database,301,,"(46.8123,-71.2145)",no_wayback,2020-12-08,"INRS-EMT, Université du Québec, Montréal, QC, Canada.","Parent M, Albuquerque I, Tiwari A, Cassani R, Gagnon JF, Lafond D, Tremblay S, Falk TH",,Natural Sciences and Engineering Research Council of Canada,1.0,Canada +33442735,O-GlcNAcAtlas,0.984077953,O-GlcNAcAtlas,0.984077953,,0,1,http://oglcnac.org,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20221020163047/https://oglcnac.org/,2021-08-01,"Department of Oncology, Lombardi Comprehensive Cancer Center, Georgetown University Medical Center, Washington, DC 20057, USA.","Ma J, Li Y, Hou C, Wu C",,"NCI NIH HHS, National Institutes of Health, National Cancer Institute, NCI NIH HHS",5.0,United States +33514746,NGD,0.874260724,NGD,0.874260724,Nelumbo Genome Database,0.871381362,1,http://nelumbo.biocloud.net,200,,"(29.4159,121.3397)",http://web.archive.org/web/20220728071920/http://nelumbo.biocloud.net/,2021-01-29,"CAS Key Laboratory of Aquatic Botany and Watershed Ecology, Wuhan Botanical Garden, Chinese Academy of Sciences, Wuhan, 430074, China.","Li H, Yang X, Zhang Y, Gao Z, Liang Y, Chen J, Shi T",,"National Natural Science Foundation of China, Bureau of Landscaping and Forestry of Wuhan Municipality, Hubei Chenguang Talented Youth Develoment Foundation, National Natural Science Foundation of China, National Natural Science Foundation of China (National Science Foundation of China), National Natural Science Foundation of China, National Natural Science Foundation of China (National Science Foundation of China), National Natural Science Foundation of China (National Science Foundation of China)",2.0,China +33581334,OGP,0.993771553,OGP,0.993771553,,0,1,"http://www.oglyp.org/, http://www.oglyp.org/download.php","200, 200",,"(30.2936,120.1614), (30.2936,120.1614)","http://web.archive.org/web/20221011180156/http://www.oglyp.org/, no_wayback",2021-02-10,"Department of Chemistry and Institutes of Biomedical Sciences, Fudan University, Shanghai 200032, China; The Fifth People's Hospital, Fudan University, and the Shanghai Key Laboratory of Medical Epigenetics, the International Co-laboratory of Medical Epigenetics and Metabolism, Ministry of Science and Technology, Fudan University, Shanghai 200032, China.","Huang J, Wu M, Zhang Y, Kong S, Liu M, Jiang B, Yang P, Cao W",,"National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China",1.0,"China, China" +33647438,PepTherDia,0.833288401,PepTherDia,0.833288401,,0,1,http://peptherdia.herokuapp.com,200,,,http://web.archive.org/web/20221024200419/https://peptherdia.herokuapp.com/,2021-02-26,"School of Pharmacy and Biomolecular Sciences, Faculty of Science, Liverpool John Moores University, Liverpool L3 3AF, UK.","D'Aloisio V, Dognini P, Hutcheon GA, Coxon CR",,"Liverpool John Moores University, Horizon 2020, Horizon 2020 Framework Programme",4.0, +33653882,Mycobacterial Systems Resource,0.923476199,MSR,0.835704486,Mycobacterial Systems Resource,0.923476199,1,http://msrdb.org,301,,"(40.8223,-74.4569)",http://web.archive.org/web/20220213164636/https://msrdb.org/,2021-03-02,"Wadsworth Center, New York State Department of Health, Albany, New York, USA.","Judd JA, Canestrari J, Clark R, Joseph A, Lapierre P, Lasek-Nesselquist E, Mir M, Palumbo M, Smith C, Stone M, Upadhyay A, Wirth SE, Dedrick RM, Meier CG, Russell DA, Dills A, Dove E, Kester J, Wolf ID, Zhu J, Rubin ER, Fortune S, Hatfull GF, Gray TA, Wade JT, Derbyshire KM",,"Howard Hughes Medical Institute, HHS | National Institutes of Health, Howard Hughes Medical Institute, HHS | National Institutes of Health",5.0,United States +33749993,OmniPath,0.994131088,OmniPath,0.994131088,,0,1,http://omnipathdb.org,301,,"(51.5085,-0.1257)",http://web.archive.org/web/20221016235356/https://omnipathdb.org/,2021-03-01,"Faculty of Medicine and Heidelberg University Hospital, Institute of Computational Biomedicine, Heidelberg University, Heidelberg, Germany.","Türei D, Valdeolivas A, Gul L, Palacio-Escat N, Klein M, Ivanova O, Ölbei M, Gábor A, Theis F, Módos D, Korcsmáros T, Saez-Rodriguez J",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, UKRI|Biotechnology and Biological Sciences Research Council (BBSRC), UKRI|Biotechnology and Biological Sciences Research Council (BBSRC), UK Research and Innovation|Biotechnology and Biological Sciences Research Council, ISP grant for Gut Microbes and Health, JRC COMBINE, partially funded by Bayer AG, Deutsche Forschungsgemeinschaft, Biotechnology and Biological Sciences Research Council, UK Research and Innovation|Biotechnology and Biological Sciences Research Council, Norwich Research Park Biosciences Doctoral Training Partnership grant, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Federal Ministry of Education (BMFB, Computational Life Sciences grant), Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Deutsche Forschungsgemeinschaft (DFG), European Union Innovative Medicines Initiative TransQST, Biotechnology and Biological Sciences Research Council, UKRI|Biotechnology and Biological Sciences Research Council (BBSRC), Biotechnology and Biological Sciences Research Council",30.0,Germany +33997360,PCPD,0.983033021,PCPD,0.983033021,Plant cytochrome P450 database,0.944938992,1,http://p450.biodesign.ac.cn,301,United States,"(34.0522,-118.244)",http://web.archive.org/web/20220308084215/https://p450.biodesign.ac.cn/,2021-04-24,"College of Biotechnology, Tianjin University of Science & Technology, Tianjin, 300457, China.","Wang H, Wang Q, Liu Y, Liao X, Chu H, Chang H, Cao Y, Li Z, Zhang T, Cheng J, Jiang H",,"National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Outstanding Youth Science Fund Project of National Natural Science Foundation of China, National Outstanding Youth Science Fund Project of National Natural Science Foundation of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Outstanding Youth Science Fund Project of National Natural Science Foundation of China, National Outstanding Youth Science Fund Project of National Natural Science Foundation of China, National Natural Science Foundation of China, National Outstanding Youth Science Fund Project of National Natural Science Foundation of China, National Key Research and Development Program of China, National Key Research and Development Program of China",0.0,China +34025934,MycoTRAP-DB,0.982121785,MycoTRAP-DB,0.982121785,Mycobacterium tuberculosis Resistance Associated,0.798473769,1,http://139.59.12.92,200,,"(13.2257,77.5750)",no_wayback,2021-04-19,"Jamia Hamdard Institute of Molecular Medicine, Jamia Hamdard, New Delhi 110062, India.","Singh P, Jamal S, Ahmed F, Saqib N, Mehra S, Ali W, Roy D, Ehtesham NZ, Hasnain SE",,"Department of Biotechnology, Ministry of Science and Technology, India, Department of Health Research, India, Ministry of Science and Technology, Taiwan, Department of Health Research, India",1.0,India +34120586,NUCOME,0.996816218,NUCOME,0.996816218,,0,1,http://compbio-zhanglab.org/NUCOME,406,,,http://web.archive.org/web/20221016233453/http://compbio-zhanglab.org/NUCOME/,2021-06-13,"Institute for Regenerative Medicine, Shanghai East Hospital, Shanghai Key Laboratory of Signaling and Disease Research, Frontier Science Center for Stem Cell Research, School of Life Science and Technology, Tongji University, 1239 Siping Road, Shanghai, 200092, China.","Chen X, Yang H, Liu G, Zhang Y",,"National Key Research and Development Program of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China",0.0,China +34241085,OCELOT,0.996991932,OCELOT,0.996991932,Organic Crystals in Electronic and,0.949127361,1,http://oscar.as.uky.edu,302,,"(37.9887,-84.4777)",http://web.archive.org/web/20220524065750/https://oscar.as.uky.edu/,2021-05-01,"Department of Chemistry and Center for Applied Energy Research, University of Kentucky, Lexington, Kentucky 40506-0055, USA.","Ai Q, Bhat V, Ryno SM, Jarolimek K, Sornberger P, Smith A, Haley MM, Anthony JE, Risko C",,"National Science Foundation, National Science Foundation, National Science Foundation, National Science Foundation Extreme Science and Engineering Discovery Environment",0.0,United States +34332522,PACHIN,0.991595972,PACHIN,0.991595972,Pediatric In-Hospital Cardiac Arrest International Registry,0.979042335,1,http://clinicaltrials.gov/ct2/show/record/NCT04675918?cond=pediatric+cardiac+arrest&draw=2&rank=10,301,,"(38.9896,-77.1538)",no_wayback,2021-07-31,"Pediatric Intensive Care Unit, Gregorio Marañón General University Hospital, Condado de Treviño 9, 28033, Madrid, Spain. jimenadelcastillo@yahoo.es.","Del Castillo J, Sanz D, Herrera L, López-Herce J, ",,Instituto de Salud Carlos III,0.0,Spain +34389843,Nabe,0.985496402,Nabe,0.985496402,,0,1,http://nabe.denglab.org,200,,"(22.5231,113.3791)",no_wayback,2021-08-01,"School of Computer Science and Engineering, Central South University, 22 Shaoshan South Road, Changsha 410075, China.","Liu J, Liu S, Liu C, Zhang Y, Pan Y, Wang Z, Wang J, Wen T, Deng L",,"National Natural Science Foundation of China, National Natural Science Foundation of China",2.0,China +34504668,PEN,0.994843125,PEN,0.994843125,Protein-gene Expression Nexus,0.95734026,1,http://combio.snu.ac.kr/pen,"HTTPConnectionPool(host='combio.snu.ac.kr', port=80): Max retries exceeded with url: /pen (Caused by ReadTimeoutError(""HTTPConnectionPool(host='combio.snu.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2021-08-17,"National Cancer Center, 323 Ilsan-ro, Goyang-si, Gyeonggi-do 10408, Republic of Korea.","Hyung D, Baek MJ, Lee J, Cho J, Kim HS, Park C, Cho SY",,,0.0, +34521345,PathFams,0.991846502,PathFams,0.991846502,,0,1,http://pathfams.uwaterloo.ca,301,,"(41.2619,-95.8608)",http://web.archive.org/web/20220702004440/https://pathfams.uwaterloo.ca/,2021-09-14,"Department of Biology, University of Waterloo, Waterloo, Ontario, Canada.","Lobb B, Tremblay BJ, Moreno-Hagelsieb G, Doxey AC",,NSERC,0.0,Canada +34527188,MyoData,0.9587152,MyoData,0.9587152,,0,1,http://myodata.bio.unipd.it,301,,"(45.4080,11.8859)",no_wayback,2021-07-26,"Department of Biology, University of Padova, Via Ugo Bassi 58/b, 35131 Padova, Italy.","Corso D, Chemello F, Alessio E, Urso I, Ferrarese G, Bazzega M, Romualdi C, Lanfranchi G, Sales G, Cagnin S",,"Fondazione Cariplo, Università degli Studi di Padova",0.0,Italy +34630517,NetGenes,0.995349348,NetGenes,0.995349348,,0,1,http://rbc-dsai-iitm.github.io/NetGenes,301,,"(37.7621,-122.3971)",http://web.archive.org/web/20210123162728/https://rbc-dsai-iitm.github.io/NetGenes/,2021-09-23,"Centre for Integrative Biology and Systems mEdicine (IBSE), Indian Institute of Technology (IIT) Madras, Chennai, India.","Senthamizhan V, Ravindran B, Raman K",,,0.0,India +21031599,PRO-MINE,0.97720556,PRO-MINE,0.97720556,PROtein Mutations In NEurodegeneration,0.793561556,1,http://bioinfo.hr/pro-mine,"HTTPConnectionPool(host='bioinfo.hr', port=80): Max retries exceeded with url: /pro-mine (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20200204063648/http://bioinfo.hr:80/pro-mine/,2011-01-01,"Department of Molecular Biology, University of Zagreb, Croatia.","Pinto S, Vlahoviček K, Buratti E",,,6.0,Croatia +21366916,PoPoolation,0.903602421,PoPoolation,0.903602421,,0,1,http://www.popoolation.at/pgt,"HTTPConnectionPool(host='www.popoolation.at', port=80): Max retries exceeded with url: /pgt (Caused by ConnectTimeoutError(, 'Connection to www.popoolation.at timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210922225434/http://www.popoolation.at/pgt/,2011-03-02,"Institut für Populationsgenetik, Vetmeduni Vienna, Veterinärplatz 1, Vienna, Austria.","Pandey RV, Kofler R, Orozco-terWengel P, Nolte V, Schlötterer C",,Austrian Science Fund FWF,8.0,Austria +21418024,PlantPIs,0.9976331,PlantPIs,0.9976331,,0,1,http://www.plantpis.ba.itb.cnr.it,"HTTPConnectionPool(host='www.plantpis.ba.itb.cnr.it', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20220527152310/http://plantpis.ba.itb.cnr.it/,2011-08-01,"Institute of Biomedical Technologies ITB, National Research Council CNR, Via Amendola 122/D, 70126 Bari, Italy.","Consiglio A, Grillo G, Licciulli F, Ceci LR, Liuni S, Losito N, Volpicella M, Gallerani R, De Leo F",,,1.0,Italy +21472892,PORCN,0.596062005,PORCN,0.596062005,,0,1,http://www.lovd.nl/porcn,301,,,no_wayback,2011-06-21,"Department of Clinical Genetics, Academic Medical Center, University of Amsterdam, Meibergdreef 9, Amsterdam, The Netherlands.","Lombardi MP, Bulk S, Celli J, Lampe A, Gabbett MT, Ousager LB, van der Smagt JJ, Soller M, Stattin EL, Mannens MA, Smigiel R, Hennekam RC",,European Commission FP7,32.0,Netherlands +21507258,PheMaDB,0.986041546,PheMaDB,0.986041546,Phenotype,0.610215828,1,http://phemadb.sourceforge.net,301,Canada,"(43.6532,-79.3832)",no_wayback,2011-04-20,"Biotechnology, The MITRE Corporation, McLean, VA, USA. wchang@mitre.org","Chang WE, Sarver K, Higgs BW, Read TD, Nolan NM, Chapman CE, Bishop-Lilly KA, Sozhamannan S",,,6.0,United States +21536137,Proteopedia,0.995484591,Proteopedia,0.995484591,,0,1,http://www.proteopedia.org,301,,,http://web.archive.org/web/20090107032056/http://proteopedia.org/,2011-04-23,"Bioinformatics Unit, Biological Services Unit, Weizmann Institute of Science, Rehovot 76100, Israel. jaime.prilusky@weizmann.ac.il","Prilusky J, Hodis E, Canner D, Decatur WA, Oberholser K, Martz E, Berchanski A, Harel M, Sussman JL",,"European Commission VIth Framework Research and Technological Development Program, ‘SPINE2-COMPLEXES’ Project, ‘Teach-SG’ Project",22.0,Israel +21554668,PHENOPSIS,0.950341225,PHENOPSIS,0.950341225,,0,1,http://bioweb.supagro.inra.fr/phenopsis,301,France,"(48.7644,2.18486)",http://web.archive.org/web/20221006022055/https://bioweb.supagro.inra.fr/phenopsis/,2011-05-09,"Laboratoire d'Ecophysiologie des Plantes sous Stress Environnementaux (LEPSE), INRA-AGRO-M, UMR 759, 2 Place Viala, 34060 Montpellier Cedex 1 France.","Fabre J, Dauzat M, Nègre V, Wuyts N, Tireau A, Gennari E, Neveu P, Tisné S, Massonnet C, Hummel I, Granier C",,,31.0,France +21624156,PHARE-KB,0.872125208,PHARE-KB,0.872125208,,0,1,http://purl.bioontology.org/ontology/PHARE,302,United States,"(37.4295,-122.178)",no_wayback,2011-05-17,"LORIA - INRIA Nancy - Grand-Est, Campus Scientifique - BP 239 - 54506 Vandoeuvre-lès-Nancy Cedex, France. adrien.coulet@loria.fr.","Coulet A, Garten Y, Dumontier M, Altman RB, Musen MA, Shah NH",,"NLM NIH HHS, NHGRI NIH HHS",14.0,France +21656910,QuAD,0.991928503,QuAD,0.991928503,The Quantitative Assay Database,0.939238191,1,http://proteome.moffitt.org/QUAD,302,,,http://web.archive.org/web/20220616214512/http://proteome.moffitt.org/QUAD/,2011-06-08,"Molecular Oncology and Proteomics, H. Lee Moffitt Cancer Center, Tampa, FL 33612, USA.","Remily-Wood ER, Liu RZ, Xiang Y, Chen Y, Thomas CE, Rajyaguru N, Kaufman LM, Ochoa JE, Hazlehurst L, Pinilla-Ibarz J, Lancet J, Zhang G, Haura E, Shibata D, Yeatman T, Smalley KS, Dalton WS, Huang E, Scott E, Bloom GC, Eschrich SA, Koomen JM",,"The Bankhead-Coley Research Program of the State of Florida, US Army Medical Research and Materiel Command under Award, Institutional Research Grant from the American Cancer Society, NCI NIH HHS, NIH/National Cancer Institute PSOC, National Cancer Institute under Award, National Cancer Institute, NCI NIH HHS, National Functional Genomics Center, Department of Defense, Moffitt Foundation, Bankhead-Coley Cancer Research program of the Florida Department of Health, NCI NIH HHS, The Melanoma Research Foundation, Virginia Johnson and Lawrence Dangott at the Texas A&M University Protein Chemistry Laboratory, NCI NIH HHS, National Cancer Institute, NCI NIH HHS, University of South Florida Chemistry Department, NCI NIH HHS, University of Florida-Moffitt Collaborative Partnership, Moffitt's Hematological Oncology Program, NCI NIH HHS, NCI NIH HHS",38.0,United States +21781326,PromBase,0.990113139,PromBase,0.990113139,,0,1,http://nucleix.mbu.iisc.ernet.in/prombase,301,,,http://web.archive.org/web/20220620050119/http://nucleix.mbu.iisc.ernet.in/prombase/,2011-07-22,"Molecular Biophysics Unit, Indian Institute of Science, Bangalore-560 012, India. mb@mbu.iisc.ernet.in.","Rangannan V, Bansal M",,,15.0,India +21786137,PPIRA,0.925762713,PPIRA,0.925762713,,0,1,http://protein.cau.edu.cn/ppira,"HTTPConnectionPool(host='protein.cau.edu.cn', port=80): Max retries exceeded with url: /ppira (Caused by ConnectTimeoutError(, 'Connection to protein.cau.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20170927104430/http://protein.cau.edu.cn:80/ppira/,2011-07-24,"State Key Laboratory of Agrobiotechnology, China Agricultural University, Beijing, 100193, China.","Li ZG, He F, Zhang Z, Peng YL",,,15.0,"China, China" +21801404,PICCOLO,0.993021965,PICCOLO,0.993021965,,0,1,http://www-cryst.bioc.cam.ac.uk/piccolo,"HTTPConnectionPool(host='www-cryst.bioc.cam.ac.uk', port=80): Max retries exceeded with url: /piccolo (Caused by ConnectTimeoutError(, 'Connection to www-cryst.bioc.cam.ac.uk timed out. (connect timeout=5)'))",,,no_wayback,2011-07-29,"Department of Biochemistry, University of Cambridge, Cambridge, CB2 1GA, UK. grbickerton@dundee.ac.uk","Bickerton GR, Higueruelo AP, Blundell TL",,Biotechnology and Biological Sciences Research Council,30.0, +21965557,QlicRice,0.997935176,QlicRice,0.997935176,,0,1,http://nabg.iasri.res.in:8080/qlic-rice,"HTTPConnectionPool(host='nabg.iasri.res.in', port=8080): Max retries exceeded with url: /qlic-rice (Caused by ConnectTimeoutError(, 'Connection to nabg.iasri.res.in timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20170226062220/http://nabg.iasri.res.in:8080/qlic-rice/,2011-09-30,"National Research Centre on Plant Biotechnology, Indian Agricultural Research Institute, New Delhi 110012, India.","Smita S, Lenka SK, Katiyar A, Jaiswal P, Preece J, Bansal KC",,,6.0,India +21980353,pubmed2ensembl,0.975213031,pubmed2ensembl,0.975213031,,0,1,http://www.pubmed2ensembl.org,302,,,http://web.archive.org/web/20130617021143/http://pubmed2ensembl.org/,2011-09-29,"Faculty of Life Sciences, University of Manchester, Manchester, United Kingdom.","Baran J, Gerner M, Haeussler M, Nenadic G, Bergman CM",,Biotechnology and Biological Sciences Research Council,21.0,United Kingdom +21993301,Polbase,0.996193171,Polbase,0.996193171,,0,1,http://polbase.neb.com,"HTTPConnectionPool(host='polbase.neb.com', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to polbase.neb.com timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221022055317/https://polbase.neb.com/,2011-10-12,"New England Biolabs, 240 County Road, Ipswich, MA, USA.","Langhorst BW, Jack WE, Reha-Krantz L, Nichols NM",,NIGMS NIH HHS,5.0,United States +22009677,PhenoM,0.993650556,PhenoM,0.993650556,Phenomics of yeast Mutants,0.973869509,1,http://phenom.ccbr.utoronto.ca,200,Canada,"(43.6547,-79.3623)",http://web.archive.org/web/20190321005314/http://phenom.ccbr.utoronto.ca:80/,2011-10-18,"Donnelly Centre for Cellular and Biomolecular Research, University of Toronto, 160 College Street, Toronto, ON M5S 3E1, Canada.","Jin K, Li J, Vizeacoumar FS, Li Z, Min R, Zamparo L, Vizeacoumar FJ, Datti A, Andrews B, Boone C, Zhang Z",,"Canadian Institutes of Health Research, Canadian Institutes of Health Research",9.0,Canada +22039152,ProGlycProt,0.997674525,ProGlycProt,0.997674525,,0,1,http://www.proglycprot.org,301,,,http://web.archive.org/web/20220618211734/https://proglycprot.org/,2011-10-28,"Protein Science and Engineering, Institute of Microbial Technology, Council of Scientific and Industrial Research, Rajasthan, India.","Bhat AH, Mondal H, Chauhan JS, Raghava GP, Methi A, Rao A",,,14.0,India +22058132,PlantNATsDB,0.995334884,PlantNATsDB,0.995334884,NAT database,0.576344937,1,http://bis.zju.edu.cn/pnatdb,302,,,http://web.archive.org/web/20210612224853/http://bis.zju.edu.cn/pnatdb/,2011-11-03,"Department of Bioinformatics, State Key Laboratory of Plant Physiology and Biochemistry, College of Life Sciences, Zhejiang University, Hangzhou 310058, China.","Chen D, Yuan C, Zhang J, Zhang Z, Bai L, Meng Y, Chen LL, Chen M",,,46.0,China +22067443,PINA,0.992636144,PINA,0.992636144,Protein Interaction Network Analysis,0.964843237,1,http://cbg.garvan.unsw.edu.au/pina,"HTTPConnectionPool(host='cbg.garvan.unsw.edu.au', port=80): Max retries exceeded with url: /pina (Caused by ConnectTimeoutError(, 'Connection to cbg.garvan.unsw.edu.au timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20181023220348/http://cbg.garvan.unsw.edu.au:80/pina/,2011-11-08,"Cancer Research Program, Peter Wills Bioinformatics Centre, Garvan Institute of Medical Research, Darlinghurst, Sydney, NSW 2010, Australia.","Cowley MJ, Pinese M, Kassahn KS, Waddell N, Pearson JV, Grimmond SM, Biankin AV, Hautaniemi S, Wu J",,,180.0,Australia +22080505,PSCDB,0.963445306,PSCDB,0.963445306,Protein Structural Change DataBase,0.849862774,1,http://idp1.force.cs.is.nagoya-u.ac.jp/pscdb,"HTTPConnectionPool(host='idp1.force.cs.is.nagoya-u.ac.jp', port=80): Max retries exceeded with url: /pscdb (Caused by ConnectTimeoutError(, 'Connection to idp1.force.cs.is.nagoya-u.ac.jp timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220615181019/http://idp1.force.cs.is.nagoya-u.ac.jp/pscdb/,2011-11-10,"Graduate School of Information Science, Nagoya University, Furo-cho, Chikusa-ku, Nagoya 464-8601, Japan.","Amemiya T, Koike R, Kidera A, Ota M",,,26.0,Japan +22080512,PlantMetabolomics,0.962467134,PlantMetabolomics,0.962467134,,0,1,http://www.plantmetabolomics.org,200,,,http://web.archive.org/web/20221013125712/http://plantmetabolomics.org/,2011-11-10,"Bioinformatics and Computational Biology Program, Electrical and Computer Engineering Department, Biophysics and Molecular Biology and Virtual Reality Application Center, Iowa State University, Ames, IA 50011, USA.","Bais P, Moon-Quanbeck SM, Nikolau BJ, Dickerson JA",,,9.0,United States +"22080514, 24163105",PolymiRTS,0.997374415,PolymiRTS,0.997374415,polymorphism in microRNA target site,0.918009511,2,http://compbio.uthsc.edu/miRSNP,302,,,http://web.archive.org/web/20220615121851/https://compbio.uthsc.edu/miRSNP/,2013-10-24,"Department of Microbiology, Immunology and Biochemistry, Center for Integrative and Translational Genomics, University of Tennessee Health Science Center, Memphis, TN 38163, USA., Department of Microbiology, Immunology and Biochemistry, University of Tennessee Health Science Center, Memphis, TN 38163, USA and Center for Integrative and Translational Genomics, University of Tennessee Health Science Center, Memphis, TN 38163, USA.","Ziebarth JD, Bhattacharya A, Chen A, Cui Y, Bhattacharya A, Ziebarth JD, Cui Y",", ","NIAID NIH HHS, NIAID NIH HHS, NINR NIH HHS, ",243.0,"United States, United States, United States" +22084198,PLEXdb,0.997321129,PLEXdb,0.997321129,,0,1,http://www.plexdb.org,301,,,http://web.archive.org/web/20221014021342/https://plexdb.org/,2011-11-13,"Virtual Reality Application Center, Crop Genome Informatics Lab, Electrical and Computer Engineering, Bioinformatics and Computational Biology, Iowa State University, Ames, IA 50011, USA.","Dash S, Van Hemert J, Hong L, Wise RP, Dickerson JA",,,148.0,United States +22086960,PrimerBank,0.995503724,PrimerBank,0.995503724,,0,1,http://pga.mgh.harvard.edu/primerbank,302,,,http://web.archive.org/web/20221022200253/https://pga.mgh.harvard.edu/primerbank/,2011-11-15,"Department of Radiation Oncology, Washington University School of Medicine, 4511 Forest Park Ave, Saint Louis, MO 63108, USA.","Wang X, Spandidos A, Wang H, Seed B",,"NHLBI NIH HHS, NIGMS NIH HHS",315.0,United States +22096236,ProOpDB,0.997804239,ProOpDB,0.997804239,Prokaryotic Operon DataBase,0.958016768,1,http://operons.ibt.unam.mx/OperonPredictor,301,,,http://web.archive.org/web/20210125232958/http://operons.ibt.unam.mx/OperonPredictor/,2011-11-16,"Centro de Ciencias Aplicadas y Desarrollo Tecnológico, Universidad Nacional Autónoma de México, México, DF, México.","Taboada B, Ciria R, Martinez-Guerrero CE, Merino E",,,108.0, +22102570,ProPortal,0.995003283,ProPortal,0.995003283,,0,1,http://proportal.mit.edu,"HTTPConnectionPool(host='proportal.mit.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190223213142/http://proportal.mit.edu:80/,2011-11-18,"Department of Civil and Environmental Engineering, Massachusetts Institute of Technology, Cambridge, MA 02139, USA.","Kelly L, Huang KH, Ding H, Chisholm SW",,Intramural NIH HHS,20.0,United States +22102581,ProRepeat,0.997152328,ProRepeat,0.997152328,,0,1,http://prorepeat.bioinformatics.nl,301,,,http://web.archive.org/web/20190912213833/http://prorepeat.bioinformatics.nl/,2011-11-18,"Laboratory of Bioinformatics, Wageningen University and Research Centre, PO Box 569, 6700 AN Wageningen, Netherlands.","Luo H, Lin K, David A, Nijveen H, Leunissen JA",,,8.0,Netherlands +22110026,Phytozome,0.998088956,Phytozome,0.998088956,,0,1,http://www.phytozome.net,301,,,http://web.archive.org/web/20180820060436/http://www.phytozome.net/,2011-11-22,"US Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA. dmgoodstein@lbl.gov","Goodstein DM, Shu S, Howson R, Neupane R, Hayes RD, Fazo J, Mitros T, Dirks W, Hellsten U, Putnam N, Rokhsar DS",,,1774.0,United States +22110041,ProtChemSI,0.998405278,ProtChemSI,0.998405278,,0,1,http://pcidb.russelllab.org,200,,,no_wayback,2011-11-21,"Cell Networks, BioQuant, University of Heidelberg, Im Neuenheimer Feld 267, 69120 Heidelberg, Germany.","Kalinina OV, Wichmann O, Apic G, Russell RB, Russell RB",,,11.0,Germany +"22121227, 27899586",R-loopDB,0.997868508,R-loopDB,0.997868508,,0,2,http://rloop.bii.a-star.edu.sg,403,,,http://web.archive.org/web/20220606160917/http://rloop.bii.a-star.edu.sg/,2016-11-28,"Department of Genome and Gene Expression Data Analysis, Bioinformatics Institute, Singapore 138671., Department of Genome and Gene Expression Data Analysis, Bioinformatics Institute, Agency for Science, Technology and Research (A*STAR), 30 Biopolis Street, #07-01, 138671, Singapore.","Wongsurawat T, Jenjaroenpun P, Kwoh CK, Kuznetsov V, Jenjaroenpun P, Wongsurawat T, Sutheeworapong S, Kuznetsov VA",", ",", ",70.0,"Singapore, Singapore" +22255115,PHENOMIM,0.989852965,PHENOMIM,0.989852965,,0,1,http://faculty.neu.edu.cn/bmie/han/PhenOMIM,200,China,"(40.0018,116.333)",http://web.archive.org/web/20140722105507/http://faculty.neu.edu.cn/bmie/han/PhenOMIM/,2011-01-01,"Sino-Dutch Biomedical and Information Engineering School, Northeastern University, Shenyang 110003, China. han@bmie.neu.edu.cn","van Triest HJ, Chen D, Ji X, Qi S, Li-Ling J",,,2.0,China +22268964,ProBiS-Database,0.990155792,ProBiS-Database,0.990155792,,0,1,http://probis.cmm.ki.si/database,301,,,no_wayback,2012-02-07,"National Institute of Chemistry, Hajdrihova 19, 1000 Ljubljana, Slovenia.","Konc J, Cesnik T, Konc JT, Penca M, Janežič D",,,29.0,Slovenia +22363733,PrionHome,0.987491906,PrionHome,0.987491906,,0,1,http://libaio.biol.mcgill.ca/prion,"HTTPConnectionPool(host='libaio.biol.mcgill.ca', port=80): Max retries exceeded with url: /prion (Caused by ConnectTimeoutError(, 'Connection to libaio.biol.mcgill.ca timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20131227103458/http://libaio.biol.mcgill.ca/prion/,2012-02-20,"Department of Biology, McGill University, Montreal, Quebec, Canada.","Harbi D, Parthiban M, Gendoo DM, Ehsani S, Kumar M, Schmitt-Ulms G, Sowdhamini R, Harrison PM",,"Wellcome Trust, Medical Research Council",19.0,Canada +22424087,PupDB,0.998188615,PupDB,0.998188615,,0,1,http://cwtung.kmu.edu.tw/pupdb,301,,,http://web.archive.org/web/20220616045344/https://cwtung.kmu.edu.tw/pupdb/,2012-03-16,"School of Pharmacy, Kaohsiung Medical University, Kaohsiung 807, Taiwan. cwtung@kmu.edu.tw",Tung CW,,,21.0, +22509333,PolysacDB,0.988787591,PolysacDB,0.988787591,,0,1,http://crdd.osdd.net/raghava/polysacdb,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/polysacdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20211205132420/http://crdd.osdd.net/raghava/polysacdb/,2012-04-11,"Cell Biology and Immunology Division, Institute of Microbial Technology, Chandigarh, India.","Aithal A, Sharma A, Joshi S, Raghava GP, Varshney GC",,,7.0,India +22559792,PKKB,0.895442531,PKKB,0.895442531,PharmacoKinetics Knowledge Base,0.882700513,1,http://cadd.suda.edu.cn/admet,"HTTPConnectionPool(host='cadd.suda.edu.cn', port=80): Max retries exceeded with url: /admet (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170522154951/http://cadd.suda.edu.cn:80/admet/,2012-05-15,"Institute of Functional Nano & Soft Materials-FUNSOM and Jiangsu Key Laboratory for Carbon-Based Functional Materials & Devices, Soochow University, Suzhou, Jiangsu 215123, China.","Cao D, Wang J, Zhou R, Li Y, Yu H, Hou T",,"NIGMS NIH HHS, NIGMS NIH HHS",18.0,China +22589183,PRDB,0.969944358,PRDB,0.969944358,Protein Tandem Repeat DataBase,0.902798961,1,http://bioinfo.montp.cnrs.fr/?r=repeatDB,301,,,http://web.archive.org/web/20130124160255/http://bioinfo.montp.cnrs.fr/?r=repeatDB,2012-05-01,"Centre de Recherches de Biochimie Macromoléculaire UMR 5237, CNRS, University of Montpellier 1 and 2, Montpellier, France.","Jorda J, Baudrand T, Kajava AV",,"Ministère de l'Education Nationale, de la Recherche et de la Technologie",7.0,France +22669905,PSC,0.840825796,PSC,0.840825796,protein surface classification,0.706926028,1,http://pocket.uchicago.edu/psc,"HTTPConnectionPool(host='pocket.uchicago.edu', port=80): Max retries exceeded with url: /psc (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-06-04,"Department of Ecology and Evolution, University of Chicago 1101 East 57th Street, Chicago, IL 60637, USA. ytseng3@uchicago.edu","Tseng YY, Li WH",,,4.0,United States +22726705,QuartetS-DB,0.972562802,QuartetS-DB,0.972562802,,0,1,http://applications.bioanalysis.org/quartetsdb,302,,,no_wayback,2012-06-22,"United States Department of Defense Biotechnology High Performance Computing Software Applications Institute, Telemedicine and Advanced Technology Research Center, US Army Medical Research and Materiel Command, Fort Detrick, MD 21702, USA.","Yu C, Desai V, Cheng L, Reifman J",,,15.0,"United States, United States" +23055619,PRD,0.985122204,PRD,0.985122204,,0,1,http://pri.hgc.jp,200,,,http://web.archive.org/web/20220516004831/http://pri.hgc.jp/,2012-08-03,"Division of Interactome Medical Sciences, Institute of Medical Science, The University of Tokyo, 4-6-1, Shirokanedai Minato-ku, Tokyo 108-8639, Japan.","Fujimori S, Hino K, Saito A, Miyano S, Miyamoto-Sato E",,,7.0,Japan +23066098,PlantRNA,0.962213755,PlantRNA,0.962213755,,0,1,http://plantrna.ibmp.cnrs.fr,200,,,http://web.archive.org/web/20220709215843/http://plantrna.ibmp.cnrs.fr/,2012-10-12,"Institut de Biologie Moléculaire des Plantes, UPR 2357-CNRS, Université de Strasbourg, 12 rue du Général Zimmer, F-67084 Strasbourg Cedex, France.","Cognat V, Pawlak G, Duchêne AM, Daujat M, Gigant A, Salinas T, Michaud M, Gutmann B, Giegé P, Gobert A, Maréchal-Drouard L",,,31.0,France +23151233,PolySac3DB,0.961255148,PolySac3DB,0.961255148,,0,1,http://polysac3db.cermav.cnrs.fr,302,,,http://web.archive.org/web/20220620010421/http://www.polysac3db.cermav.cnrs.fr/,2012-11-14,"Centre de Recherches sur les Macromolécules Végétales (CERMAV*) Centre National de la Recherche Scientifique, Grenoble Cedex 9, BP 53X, F-38041, France.","Sarkar A, Pérez S",,,9.0,France +"23161682, 29156057",PRGdb,0.998317122,PRGdb,0.998317122,Plant Resistance Genes database,0.982363117,2,http://prgdb.org,301,,,no_wayback,2018-01-01,"Department of Soil, Plant, Environmental and Animal Production Sciences, University of Naples ""Federico II"", Via Università 100, 80055 Portici, Italy., Sequentia Biotech SL, Calle Comte D'Urgell 240, 08036 Barcelona, Spain.","Sanseverino W, Hermoso A, D'Alessandro R, Vlasova A, Andolfo G, Frusciante L, Lowy E, Roma G, Ercolano MR, Osuna-Cruz CM, Paytuvi-Gallart A, Di Donato A, Sundesha V, Andolfo G, Aiese Cigliano R, Sanseverino W, Ercolano MR",", ",", ",82.0,"Spain, Italy" +23162083,PTID,0.993905127,PTID,0.993905127,Pesticide-Target interaction database,0.957429435,1,http://lilab.ecust.edu.cn/ptid,302,,,http://web.archive.org/web/20180824163501/http://lilab.ecust.edu.cn:80/ptid/,2012-11-18,"School of Information Science and Engineering, Shanghai Key Laboratory of Chemical Biology, Shanghai Key Laboratory of New Drug Design, Institute of Pharmaceuticals and Pesticides, School of Pharmacy, East China University of Science and Technology, Shanghai 200237, China.","Gong J, Liu X, Cao X, Diao Y, Gao D, Li H, Qian X",,,5.0,"China, China" +23172287,PhosPhAt,0.993283868,PhosPhAt,0.993283868,,0,1,http://phosphat.mpimp-golm.mpg.de,301,,,http://web.archive.org/web/20131211071010/http://phosphat.mpimp-golm.mpg.de/,2012-11-20,"Max Planck Institut für molekulare Pflanzenphysiologie, Am Mühlenberg 1, 14476 Golm, Germany.","Zulawski M, Braginets R, Schulze WX",,,62.0,Germany +23180797,Quorumpeps,0.993253469,Quorumpeps,0.993253469,,0,1,http://quorumpeps.ugent.be,307,,,http://web.archive.org/web/20220615141922/https://quorumpeps.ugent.be/,2012-11-24,"Drug Quality and Registration (DruQuaR) group, Department of Pharmaceutical Analysis, Faculty of Pharmaceutical Sciences, Ghent Hospital University, Ghent B-9000, Belgium.","Wynendaele E, Bronselaer A, Nielandt J, D'Hondt M, Stalmans S, Bracke N, Verbeke F, Van De Wiele C, De Tré G, De Spiegeleer B",,,50.0,Belgium +23193263,PrePPI,0.997049689,PrePPI,0.997049689,,0,1,http://bhapp.c2b2.columbia.edu/PrePPI,301,,,http://web.archive.org/web/20220121040618/https://bhapp.c2b2.columbia.edu/PrePPI/,2012-11-27,"Howard Hughes Medical Institute, Department of Biochemistry and Molecular Biophysics, Center for Computational Biology and Bioinformatics, Columbia Initiative in Systems Biology, Columbia University, New York, NY 10032, USA.","Zhang QC, Petrey D, Garzón JI, Deng L, Honig B",,"NIGMS NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, Howard Hughes Medical Institute",98.0,United States +23193267,PR(2,0.98287158,PR(2,0.98287158,Protist Ribosomal Reference database,0.980404947,1,http://ssu-rrna.org,200,,,http://web.archive.org/web/20221006144728/https://ssu-rrna.org/,2012-11-27,"CNRS, UMR 7144, Adaptation et Diversité en Milieu Marin, 29682 Roscoff, France. lguillou@sb-roscoff.fr","Guillou L, Bachar D, Audic S, Bass D, Berney C, Bittner L, Boutte C, Burgaud G, de Vargas C, Decelle J, Del Campo J, Dolan JR, Dunthorn M, Edvardsen B, Holzmann M, Kooistra WH, Lara E, Le Bescot N, Logares R, Mahé F, Massana R, Montresor M, Morard R, Not F, Pawlowski J, Probert I, Sauvadet AL, Siano R, Stoeck T, Vaulot D, Zimmermann P, Christen R",,Natural Environment Research Council,332.0,France +"23193284, 25361965",PTMcode,0.961656034,PTMcode,0.961656034,,0,2,http://ptmcode.embl.de,302,,,http://web.archive.org/web/20220709194549/https://ptmcode.embl.de/,2014-10-31,"European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany., European Molecular Biology Laboratory (EMBL), Meyerhofstrasse 1, 69117 Heidelberg, Germany.","Minguez P, Letunic I, Parca L, Bork P, Minguez P, Letunic I, Parca L, Garcia-Alonso L, Dopazo J, Huerta-Cepas J, Bork P",", ",", ",96.0,"Germany, Germany" +23292601,PRIMe,0.993307292,PRIMe,0.993307292,,0,1,http://prime.psc.riken.jp,200,,,http://web.archive.org/web/20220610074854/http://prime.psc.riken.jp/,2013-01-03,"RIKEN Plant Science Center, 1-7-22 Suehiro-cho, Tsurumi-ku, Yokohama, Japan. stetsuya@psc.riken.jp","Sakurai T, Yamada Y, Sawada Y, Matsuda F, Akiyama K, Shinozaki K, Hirai MY, Saito K",,,38.0,Japan +23378291,PhenoDB,0.997127712,PhenoDB,0.997127712,Mendelian Inheritance in,0.610693395,1,http://phenodb.net,301,United States,"(34.0479,-118.255)",http://web.archive.org/web/20211216204538/http://phenodb.net/,2013-03-04,"McKusick-Nathans Institute of Genetic Medicine, Johns Hopkins University, Baltimore, MD, USA. ahamosh@jhmi.edu","Hamosh A, Sobreira N, Hoover-Fong J, Sutton VR, Boehm C, Schiettecatte F, Valle D",,"NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",48.0,United States +23396298,PIPEMicroDB,0.981419305,PIPEMicroDB,0.981419305,PIgeonPEa Microsatellite DataBase,0.70065501,1,http://cabindb.iasri.res.in/pigeonpea,"HTTPConnectionPool(host='cabindb.iasri.res.in', port=80): Max retries exceeded with url: /pigeonpea (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140730193308/http://cabindb.iasri.res.in:80/pigeonpea/,2013-02-08,"Centre for Agricultural Bioinformatics, Indian Agricultural Statistics Research Institute, Library Avenue, New Delhi-110012, India.","Sarika, Arora V, Iquebal MA, Rai A, Kumar D",,,5.0,India +23396300,pseudoMap,0.987089634,pseudoMap,0.987089634,,0,1,http://pseudomap.mbc.nctu.edu.tw,"HTTPConnectionPool(host='pseudomap.mbc.nctu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-02-08,"Institute of Bioinformatics and Systems Biology, National Chiao Tung University, Hsin-Chu, Taiwan.","Chan WL, Yang WK, Huang HD, Chang JG",,,3.0, +23426257,Planform,0.994146824,Planform,0.994146824,lanarian,0.698465049,1,http://planform.daniel-lobo.com,302,United States,"(39.2812,-76.7406)",http://web.archive.org/web/20150925144429/http://planform.daniel-lobo.com/,2013-02-19,"Department of Biology, Center for Regenerative and Developmental Biology, Tufts University, 200 Boston Avenue, Medford, MA 02155, USA.","Lobo D, Malone TJ, Levin M",,"NIGMS NIH HHS, NIGMS NIH HHS",13.0,United States +23497033,phiBIOTICS,0.983398318,phiBIOTICS,0.983398318,,0,1,http://www.phibiotics.org,200,Slovakia,"(48.2979,17.3547)",http://web.archive.org/web/20220520004114/http://phibiotics.org/,2013-03-06,"Laboratory of Bioinformatics, Institute of Molecular Biology, Slovak Academy of Sciences, Dubravska cesta 21, Bratislava, Slovakia.","Hojckova K, Stano M, Klucar L",,,9.0,Slovakia +23607573,PID-NET,0.746310925,PID-NET,0.746310925,,0,1,http://www.pid-net.org,"HTTPConnectionPool(host='www.pid-net.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20211127211056/http://pid-net.org/,2013-08-01,"Centre of Chronic Immunodeficiency, University Medical Center Freiburg and University of Freiburg, Freiburg, Germany. benjamin.gathmann@uniklinik-freiburg.de","Gathmann B, Goldacker S, Klima M, Belohradsky BH, Notheis G, Ehl S, Ritterbusch H, Baumann U, Meyer-Bahlburg A, Witte T, Schmidt R, Borte M, Borte S, Linde R, Schubert R, Bienemann K, Laws HJ, Dueckers G, Roesler J, Rothoeft T, Krüger R, Scharbatke EC, Masjosthusmann K, Wasmuth JC, Moser O, Kaiser P, Groß-Wieltsch U, Classen CF, Horneff G, Reiser V, Binder N, El-Helou SM, Klein C, Grimbacher B, Kindle G",,"Federal Ministry of Education and Research, PPT, Federal Ministry of Education and Research",38.0,Germany +23624946,PMP,0.941905677,PMP,0.941905677,Protein Model,0.727932513,1,http://www.proteinmodelportal.org,302,,,http://web.archive.org/web/20221102135415/https://www.proteinmodelportal.org/,2013-04-26,"Biozentrum University of Basel, Klingelbergstrasse 50-70, 4056 Basel, Switzerland.","Haas J, Roth S, Arnold K, Kiefer F, Schmidt T, Bordoli L, Schwede T",,"NIGMS NIH HHS, NIGMS NIH HHS",128.0,Switzerland +23674503,PhosphoGRID,0.998093545,PhosphoGRID,0.998093545,,0,1,http://www.phosphogrid.org,301,United States,"(32.9473,-96.7028)",http://web.archive.org/web/20221007162618/https://phosphogrid.org/,2013-05-13,"Department of Biochemistry and Molecular Biology, Molecular Epigenetics, Life Sciences Institute, University of British Columbia, 2350 Health Sciences Mall, Vancouver, British Columbia, Canada V6T 1Z3. ijs.ubc@gmail.com","Sadowski I, Breitkreutz BJ, Stark C, Su TC, Dahabieh M, Raithatha S, Bernhard W, Oughtred R, Dolinski K, Barreto K, Tyers M",,"CIHR, NCRR NIH HHS, Wellcome Trust, Biotechnology and Biological Sciences Research Council, NIH HHS, NCRR NIH HHS, NIH HHS",59.0,Canada +23688397,PLANEX,0.997026294,PLANEX,0.997026294,PLAnt co-EXpression database,0.942242801,1,http://planex.plantbioinformatics.org,302,Australia,"(-37.9867,145.035)",http://web.archive.org/web/20220307183044/http://planex.plantbioinformatics.org/,2013-05-20,"Department of Plant Biotechnology, Dongguk Univ-Seoul, Seoul 100-715, Korea.","Yim WC, Yu Y, Song K, Jang CS, Lee BM",,,26.0, +23725466,PMTED,0.991327643,PMTED,0.991327643,Plant MiRNA Target Expression Database,0.971420904,1,http://pmted.agrinome.org,"HTTPConnectionPool(host='pmted.agrinome.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to pmted.agrinome.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20180807164050/http://pmted.agrinome.org:80/,2013-06-03,"Key Laboratory of Symbol Computation and Knowledge Engineering of Ministry of Education, College of Computer Science and Technology, Jilin University, Changchun 130012, China.","Sun X, Dong B, Yin L, Zhang R, Du W, Liu D, Shi N, Li A, Liang Y, Mao L",,,14.0,China +23772554,PRIMOS,0.99499613,PRIMOS,0.99499613,Protein Interaction and Molecule Search,0.874199007,1,http://primos.fh-hagenberg.at,"HTTPConnectionPool(host='primos.fh-hagenberg.at', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160224233459/http://primos.fh-hagenberg.at/,2013-06-01,"Division of Molecular Dermatology, Department of Dermatology, Paracelsus Medical University Salzburg, Salzburg, Austria.","Rid R, Strasser W, Siegl D, Frech C, Kommenda M, Kern T, Hintner H, Bauer JW, Önder K",,,4.0,Austria +23826978,pico-PLAZA,0.990684529,pico-PLAZA,0.990684529,,0,1,http://bioinformatics.psb.ugent.be/pico-plaza,301,,,no_wayback,2013-07-04,"Department of Plant Systems Biology, VIB, Technologiepark 927, B-9052, Gent, Belgium. klaas.vandepoele@psb.vib-ugent.be","Vandepoele K, Van Bel M, Richard G, Van Landeghem S, Verhelst B, Moreau H, Van de Peer Y, Grimsley N, Piganeau G",,,34.0,Belgium +23911837,PTP-central,0.99302128,PTP-central,0.99302128,,0,1,http://www.PTP-central.org,"HTTPConnectionPool(host='www.ptp-central.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190601103454/http://www.ptp-central.org:80/,2013-07-31,"Goodman Cancer Research Center, McGill University, 1160 Pine Avenue, Montreal H3A 1A3, QC, Canada; Department of Biochemistry, McGill University, Montreal, QC, Canada.","Hatzihristidis T, Liu S, Pryszcz L, Hutchins AP, Gabaldón T, Tremblay ML, Miranda-Saavedra D",,"MLT, JST, Canadian Cancer Society Research Institute, Japan Society for the Promotion of Science, Kishimoto Foundation",10.0,"Canada, Canada" +24092884,PODB,0.986917019,PODB,0.986917019,Plant Organelles Database,0.934489107,1,http://podb.nibb.ac.jp/Organellome,"HTTPConnectionPool(host='podb.nibb.ac.jp', port=80): Max retries exceeded with url: /Organellome (Caused by ConnectTimeoutError(, 'Connection to podb.nibb.ac.jp timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220212012026/http://podb.nibb.ac.jp/Organellome/,2013-10-03,"Department of Cell Biology, National Institute for Basic Biology, Okazaki, 444-8585 Japan.","Mano S, Nakamura T, Kondo M, Miwa T, Nishikawa S, Mimura T, Nagatani A, Nishimura M",,,9.0,Japan +24163250,RADAR--a,0.97970136,RADAR--a,0.97970136,,0,1,http://RNAedit.com,"HTTPConnectionPool(host='rnaedit.com', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to rnaedit.com timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210126123829/http://rnaedit.com/,2013-10-25,"Department of Genetics, Stanford University, Stanford, CA 94305, USA.","Ramaswami G, Li JB",,"NIGMS NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS",260.0,United States +24198250,POGO-DB,0.998100138,POGO-DB,0.998100138,,0,1,http://pogo.ece.drexel.edu,200,,,http://web.archive.org/web/20221026010753/http://pogo.ece.drexel.edu/,2013-11-05,"School of Biomedical Engineering, Science and Health Systems, Drexel University, 3141 Chestnut Street, Philadelphia, PA 19104, USA, Electrical & Computer Engineering Department, Drexel University, 3141 Chestnut Street, Philadelphia, PA 19104, USA and Rachel & Menachem Mendelovitch Evolutionary Processes of Mutation & Natural Selection Research Laboratory, Department of Genetics, the Ruth and Bruce Rappaport Faculty of Medicine, Technion-Israel Institute of Technology, Haifa 31096, Israel.","Lan Y, Morrison JC, Hershberg R, Rosen GL",,European Research Council,11.0,"Israel, Israel, United States, United States" +24225322,Progenetix,0.996031821,Progenetix,0.996031821,,0,1,http://www.progenetix.org,200,,,http://web.archive.org/web/20221110025241/http://progenetix.org/,2013-11-12,"Institute of Molecular Life Sciences, University of Zürich, CH-8057 Zürich, Switzerland, Swiss Institute of Bioinformatics, University of Zürich, CH-8057 Zürich, Switzerland and Swiss Institute of Bioinformatics, University of Lausanne, CH-1015 Lausanne, Switzerland.","Cai H, Kumar N, Ai N, Gupta S, Rath P, Baudis M",,,20.0,"Switzerland, Switzerland, Switzerland" +24227675,PhosphoNetworks,0.997868598,PhosphoNetworks,0.997868598,,0,1,http://www.phosphonetworks.org,301,United States,"(39.0469,-77.4903)",http://web.archive.org/web/20220719002751/https://phosphonetworks.org/,2013-11-13,"Department of Ophthalmology, Johns Hopkins School of Medicine, Department of Pharmacology and Molecular Sciences, Center for High-Throughput Biology, Johns Hopkins School of Medicine, Baltimore, MD 21205, USA, Department of Biology, North Carolina Agricultural and Technical State University, Greensboro, NC 27411, USA and The Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins School of Medicine, Baltimore, MD 21205, USA.","Hu J, Rho HS, Newman RH, Zhang J, Zhu H, Qian J",,"NIDDK NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NCI NIH HHS, NIDDK NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NHGRI NIH HHS, NCRR NIH HHS",64.0,"United States, United States, United States" +24270047,ProfileDB,0.997333407,ProfileDB,0.997333407,,0,1,http://profileDB.-microdiscovery.de,"HTTPConnectionPool(host='profiledb.-microdiscovery.de', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-11-20,"MicroDiscovery GmbH, Marienburger Str. 1, D-10405 Berlin, Germany.","Bauer C, Glintschert A, Schuchhardt J",,,3.0,Germany +24275491,PhylomeDB,0.996954501,PhylomeDB,0.996954501,,0,1,http://phylomedb.org,200,Spain,"(41.4481,2.2032)",no_wayback,2013-11-25,"Bioinformatics and Genomics Programme, Centre for Genomic Regulation (CRG), Dr. Aiguader, 88. 08003 Barcelona, Spain, Universitat Pompeu Fabra (UPF), 08003 Barcelona, Spain and Institució Catalana de Recerca i Estudis Avançats (ICREA), Pg. Lluís Companys 23, 08010 Barcelona, Spain.","Huerta-Cepas J, Capella-Gutiérrez S, Pryszcz LP, Marcet-Houben M, Gabaldón T",,European Research Council,132.0,"Spain, Spain, Spain" +24304897,PPD,0.996921718,PPD,0.996921718,Plasma Proteome Database,0.985280633,1,http://www.plasmaproteomedatabase.org,503,,,http://web.archive.org/web/20220615221352/http://plasmaproteomedatabase.org/,2013-12-03,"Institute of Bioinformatics, International Technology Park, Bangalore 560 066, Karnataka, India, Amrita School of Biotechnology, Amrita University, Kollam 690 525, Kerala, India, Centre of Excellence in Bioinformatics, School of Life Sciences, Pondicherry University, Puducherry 605 014, India, Department of Biochemistry and Molecular Biology, Pondicherry University, Puducherry 605014, India, Department of Neurochemistry, National Institute of Mental Health and Neurosciences, Bangalore 560 022, Karnataka, India, Department of Biotechnology, Kuvempu University, Shankaraghatta 577 451, Karnataka, India, Government Medical College, Bhavnagar 364 001, Gujarat, India, Mahatma Gandhi Institute of Medical Sciences, Sevagram, Wardha 442 012, Maharashtra, India, The Department of Environmental Health Sciences, Johns Hopkins Bloomberg School of Public Health, Baltimore, MD 21205, USA, Department of Internal Medicine, Armed Forces Medical College, Pune 411 040, Maharashtra, India, Department of Neurology, National Institute of Mental Health and Neurosciences, Bangalore 560 022, Karnataka, India, Department of Biochemistry, La Trobe Institute for Molecular Science, La Trobe University, Melbourne, Victoria 3084, Australia, McKusick-Nathans Institute of Genetic Medicine, Johns Hopkins University, Baltimore, MD 21205, USA, Department of Biological Chemistry, Johns Hopkins University, Baltimore, MD 21205, USA, Department of Oncology, Johns Hopkins University, Baltimore, MD 21205, USA and Department of Pathology, Johns Hopkins University, Baltimore, MD 21205, USA.","Nanjappa V, Thomas JK, Marimuthu A, Muthusamy B, Radhakrishnan A, Sharma R, Ahmad Khan A, Balakrishnan L, Sahasrabuddhe NA, Kumar S, Jhaveri BN, Sheth KV, Kumar Khatana R, Shaw PG, Srikanth SM, Mathur PP, Shankar S, Nagaraja D, Christopher R, Mathivanan S, Raju R, Sirdeshmukh R, Chatterjee A, Simpson RJ, Harsha HC, Pandey A, Prasad TS",,"NIGMS NIH HHS, Wellcome Trust, NCI NIH HHS, NHLBI NIH HHS, NIGMS NIH HHS",142.0,"Australia, India, India, India, India, India, India, India, India, India, India, United States, United States, United States, United States, United States" +24340041,POGs2,0.997185946,POGs2,0.997185946,Putative orthologous Groups 2 Database,0.964314751,1,http://pogs.uoregon.edu,200,,,http://web.archive.org/web/20220503180121/http://pogs.uoregon.edu/,2013-12-10,"Institute of Molecular Biology, University of Oregon, Eugene, Oregon, United States of America.","Tomcal M, Stiffler N, Barkan A",,,8.0,United States +24715219,QAP,0.926871697,QAP,0.926871697,Quail Anatomy Portal,0.80078907,1,http://quail.anatomyportal.org,"HTTPConnectionPool(host='quail.anatomyportal.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to quail.anatomyportal.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200214132151/http://quail.anatomyportal.org:80/,2014-04-07,"School of Biological Sciences, Monash University, Melbourne, Victoria 3800, Australia, The Murdoch Childrens Research Institute, The Royal Children's Hospital, Flemington Road, Parkville, Melbourne, Victoria 3052, Australia, Australian Regenerative Medicine Institute, Monash University, Clayton, Victoria 3800, Australia, Aix Marseille Université, Inserm, GMGF UMR_S 910, 13385 Marseille, France, Instituto Gulbenkian de Ciencia, Rua da Avenida Grande 6, 2780-156 Oeiras, Portugal and Centre for Environmental Biology, Faculdade de Ciencias da Universidade de Lisboa, Campo Grande, 1749-016 Lisbon, Portugal.","Ruparelia AA, Simkin JE, Salgado D, Newgreen DF, Martins GG, Bryson-Richardson RJ",,,0.0,"Australia, Australia, Australia, France, Portugal, Portugal" +24771669,PhylOPDb,0.997332573,PhylOPDb,0.997332573,,0,1,http://g2im.u-clermont1.fr/phylopdb,302,France,"(45.748,4.85)",http://web.archive.org/web/20220303044519/https://g2im.u-clermont1.fr/phylopdb/,2014-04-26,"Clermont Université, Université d'Auvergne, EA 4678 CIDAM, BP 10448, F-63001 Clermont-Ferrand, France, UMR CNRS 6158, ISIMA/LIMOS, Clermont Université, Université Blaise Pascal, F-63173 Aubière, France, CNRS, UMR 6023, LMGE, F-63171 Aubière, France and Clermont Université, CRRI, F-63177 Aubière, France.","Jaziri F, Parisot N, Abid A, Denonfoux J, Ribière C, Gasc C, Boucher D, Brugère JF, Mahul A, Hill DR, Peyretaillade E, Peyret P",,,5.0,"France, France, France, France" +24855436,Polytraits,0.996589065,Polytraits,0.996589065,,0,1,http://polytraits.lifewatchgreece.eu,200,,,http://web.archive.org/web/20220703064702/http://polytraits.lifewatchgreece.eu/,2014-01-17,"National and Kapodestrian University of Athens, Athens, Greece ; Hellenic Centre for Marine Research, Heraklion, Crete, Greece.","Faulwetter S, Markantonatou V, Pavloudi C, Papageorgiou N, Keklikoglou K, Chatzinikolaou E, Pafilis E, Chatzigeorgiou G, Vasileiadou K, Dailianis T, Fanini L, Koulouri P, Arvanitidis C",,,6.0,"Greece, Greece" +24857970,PTM-SD,0.924359113,PTM-SD,0.924359113,Posttranslational modification structural database,0.737245253,1,http://www.dsimb.inserm.fr/dsimb_tools/PTM-SD,301,,,http://web.archive.org/web/20221017035351/https://www.dsimb.inserm.fr/dsimb_tools/PTM-SD/,2014-05-24,"INSERM, U 1134, DSIMB, F-75739 Paris, France, Univ Paris Diderot, Sorbonne Paris Cité, UMR-S 1134, F-75739 Paris, France, Institut National de la Transfusion Sanguine (INTS), F-75739 Paris, France and Laboratoire d'Excellence GR-Ex, F-75739 Paris, FranceINSERM, U 1134, DSIMB, F-75739 Paris, France, Univ Paris Diderot, Sorbonne Paris Cité, UMR-S 1134, F-75739 Paris, France, Institut National de la Transfusion Sanguine (INTS), F-75739 Paris, France and Laboratoire d'Excellence GR-Ex, F-75739 Paris, FranceINSERM, U 1134, DSIMB, F-75739 Paris, France, Univ Paris Diderot, Sorbonne Paris Cité, UMR-S 1134, F-75739 Paris, France, Institut National de la Transfusion Sanguine (INTS), F-75739 Paris, France and Laboratoire d'Excellence GR-Ex, F-75739 Paris, FranceINSERM, U 1134, DSIMB, F-75739 Paris, France, Univ Paris Diderot, Sorbonne Paris Cité, UMR-S 1134, F-75739 Paris, France, Institut National de la Transfusion Sanguine (INTS), F-75739 Paris, France and Laboratoire d'Excellence GR-Ex, F-75739 Paris, France pierrick.craveur@inserm.fr.","Craveur P, Rebehmed J, de Brevern AG",,,14.0,"France, France, France, France, France, France, France, France, France, France, France, France, France" +24870500,Phytoseiidae database,0.598889927,,0,Phytoseiidae database,0.598889927,1,http://www.lea.esalq.usp.br/phytoseiidae,301,,,no_wayback,2014-05-15,"Departamento de Entomologia e Acarologia, ESALQ-Universidade de São Paulo, 13418-900 Piracicaba, São Paulo, Brazil.; Email: peterson_demite@yahoo.com.br.","Demite PR, Mcmurtry JA, De Moraes GJ",,,6.0,Brazil +24980131,Plant rDNA,0.623982986,Plant rDNA,0.623982986,Plant rDNA database,0.521373582,1,http://www.plantrdnadatabase.com,302,,,http://web.archive.org/web/20220709032019/https://www.plantrdnadatabase.com/,2014-06-30,"Laboratori de Botànica-Unitat associada CSIC, Facultat de Farmàcia, Universitat de Barcelona, Barcelona, 08028 Catalonia, Spain, BioScripts - Centro de Investigación y Desarrollo de Recursos Científicos, Sevilla, 41012 Andalusia, Spain, Institute of Biophysics, Academy of Sciences of the Czech Republic. Brno, CZ-612 65, Czech Republic and Institut Botànic de Barcelona (IBB-CSIC-ICUB). Barcelona, 08038 Catalonia, Spain soniagarcia@ub.edu sphaeromeria@gmail.com.","Garcia S, Gálvez F, Gras A, Kovařík A, Garnatje T",,,8.0,"Spain, Spain, Spain" +25097386,ProADD,0.992834508,ProADD,0.992834508,,0,1,http://bicmku.in/ProADD,301,,,no_wayback,2014-06-30,"Centre of Excellence in Bioinformatics, School of Biotechnology, Madurai Kamaraj University, Madurai -625021,India.","Shobana R, Pandaranayaka EP",,,1.0,India +25125445,PlantCAZyme,0.998365402,PlantCAZyme,0.998365402,,0,1,http://cys.bios.niu.edu/plantcazyme,"HTTPConnectionPool(host='cys.bios.niu.edu', port=80): Max retries exceeded with url: /plantcazyme (Caused by ConnectTimeoutError(, 'Connection to cys.bios.niu.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190202083046/http://cys.bios.niu.edu:80/plantcazyme/,2014-08-14,"Department of Computer Science and Department of Biological Sciences, Northern Illinois University, DeKalb, IL 60115, USA.","Ekstrom A, Taujale R, McGinn N, Yin Y",,,10.0,United States +25252779,PIP-DB,0.989458632,PIP-DB,0.989458632,Protein Isoelectric Point database,0.94516476,1,http://www.pip-db.org,200,United States,"(42.6898,-84.6427)",http://web.archive.org/web/20220517105900/http://pip-db.org/,2014-09-23,"School of Life and Health Sciences and School of Engineering and Applied Science, University of Aston, Aston Triangle, Birmingham B4 7ET, UK.","Bunkute E, Cummins C, Crofts FJ, Bunce G, Nabney IT, Nabney IT, Flower DR",,,8.0, +25270086,PIGD,0.992457569,PIGD,0.992457569,Poaceae Intronless Genes Database,0.982717186,1,http://pigd.ahau.edu.cn,503,,,http://web.archive.org/web/20201125213243/http://pigd.ahau.edu.cn/,2014-10-01,None,"Yan H, Jiang C, Li X, Sheng L, Dong Q, Peng X, Li Q, Zhao Y, Jiang H, Cheng B",,,13.0, +"25324309, 27987171, 29069403",PLAZA,0.996272564,PLAZA,0.996272564,Comparative Genomic Database,0.86294961,3,http://bioinformatics.psb.ugent.be/plaza,301,,,http://web.archive.org/web/20220914215444/https://bioinformatics.psb.ugent.be/plaza/,2018-01-01,"University of Potsdam, Institute of Biochemistry and Biology, Karl-Liebknecht-Straße 24-25, Haus 20, 14476 Potsdam-Golm, Germany Max-Planck Institute of Molecular Plant Physiology, Am Mühlenberg 1, 14476 Potsdam-Golm, Germany., Department of Plant Systems Biology, VIB, Technologiepark 927, 9052, Ghent, Belgium. klaas.vandepoele@psb.vib-ugent.be., Department of Plant Biotechnology and Bioinformatics, Ghent University, 9052 Ghent, Belgium.","Proost S, Van Bel M, Vaneechoutte D, Van de Peer Y, Inzé D, Mueller-Roeber B, Vandepoele K, Vandepoele K, Van Bel M, Diels T, Vancaester E, Kreft L, Botzki A, Van de Peer Y, Coppens F, Vandepoele K",", , ",", , ",296.0,"Belgium, Belgium, Germany, Germany" +25361970,PomBase,0.994939148,PomBase,0.994939148,,0,1,http://www.pombase.org,302,,,http://web.archive.org/web/20221106172822/https://www.pombase.org/,2014-10-31,"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK mcdowall@ebi.ac.uk.","McDowall MD, Harris MA, Lock A, Rutherford K, Staines DM, Bähler J, Kersey PJ, Oliver SG, Wood V",,"Wellcome Trust, Wellcome Trust",55.0, +25378306,Plastid-LCGbase,0.993730698,Plastid-LCGbase,0.993730698,,0,1,http://lcgbase.big.ac.cn/plastid-LCGbase,"HTTPConnectionPool(host='lcgbase.big.ac.cn', port=80): Max retries exceeded with url: /plastid-LCGbase (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-11-05,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, P. R. China Stem Cell Laboratory, UCL Cancer Institute, University College London, London WC1E 6BT, UK junyu@big.ac.cn.","Wang D, Yu J",,,2.0,China +25382819,ProKinO,0.995589495,ProKinO,0.995589495,,0,1,http://vulcan.cs.uga.edu/prokino,"HTTPConnectionPool(host='vulcan.cs.uga.edu', port=80): Max retries exceeded with url: /prokino (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20220624022204/http://vulcan.cs.uga.edu/prokino/,2015-02-01,"Institute of Bioinformatics, University of Georgia, Athens, Georgia.","McSkimming DI, Dastgheib S, Talevich E, Narayanan A, Katiyar S, Taylor SS, Kochut K, Kannan N",,"National Science Foundation, American Cancer Society, Georgia Cancer Coalition",23.0,"Georgia, Georgia" +25392411,PyIgClassify,0.995087743,PyIgClassify,0.995087743,,0,1,http://dunbrack2.fccc.edu/pyigclassify,301,,,http://web.archive.org/web/20221017093454/http://dunbrack2.fccc.edu/pyigclassify/,2014-11-11,"Institute for Cancer Research, Fox Chase Cancer Center, 333 Cottman Avenue, Philadelphia, PA 19111, USA Program in Molecular and Cell Biology and Genetics, Drexel University College of Medicine, 245 N. 15th St. Philadelphia, PA 19102, USA.","Adolf-Bryfogle J, Xu Q, North B, Lehmann A, Dunbrack RL Jr",,NIGMS NIH HHS,58.0,"United States, United States" +25392416,PubAngioGen,0.991985381,PubAngioGen,0.991985381,,0,1,http://www.megabionet.org/aspd,404,,,http://web.archive.org/web/20171107131043/http://www.megabionet.org:80/aspd/,2014-11-11,"The center for Bioinformatics and Computational Biology, Shanghai Key Laboratory of Regulatory Biology, Institute of Biomedical Sciences and School of Life Sciences, East China Normal University, 500 Dongchuan Road, Shanghai 200241, China.","Li P, Liu Y, Wang H, He Y, Wang X, He Y, Lv F, Chen H, Pang X, Liu M, Shi T, Yi Z",,,4.0,"China, China" +25398903,PNRD,0.991778493,PNRD,0.991778493,plant ncRNA database,0.92003082,1,http://structuralbiology.cau.edu.cn/PNRD,301,,,http://web.archive.org/web/20220121032048/http://structuralbiology.cau.edu.cn/PNRD/,2014-11-14,"State Key Laboratory of Plant Physiology and Biochemistry, College of Biological Sciences, China Agricultural University, Beijing 100193, China.","Yi X, Zhang Z, Ling Y, Xu W, Su Z",,,82.0,"China, China" +25414335,ProteomeScout,0.997024179,ProteomeScout,0.997024179,,0,1,http://proteomescout.wustl.edu,301,,,http://web.archive.org/web/20211024194657/https://proteomescout.wustl.edu/,2014-11-20,"Department of Biomedical Engineering and the Center for Biological Systems Engineering, Washington University, St Louis, MO 63130, USA.","Matlock MK, Holehouse AS, Naegle KM",,,20.0,United States +25414340,PHI-base,0.987774253,PHI-base,0.987774253,Pathogen-Host Interactions database,0.932421378,1,http://www.phi-base.org,200,United Kingdom,"(51.8613,-0.4656)",http://web.archive.org/web/20221018211238/http://www.phi-base.org/,2014-11-20,"Department of Plant Biology and Crop Science, Rothamsted Research, Harpenden, Herts, AL5 2JQ, UK martin.urban@rothamsted.ac.uk.","Urban M, Urban M, Pant R, Raghunath A, Irvine AG, Pedro H, Hammond-Kosack KE",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",99.0, +"25425034, 30371818",piRBase,0.997035384,piRBase,0.997035384,,0,2,http://www.regulatoryrna.org/database/piRNA,"HTTPConnectionPool(host='www.regulatoryrna.org', port=80): Max retries exceeded with url: /database/piRNA (Caused by ConnectTimeoutError(, 'Connection to www.regulatoryrna.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20211021161223/http://www.regulatoryrna.org/database/piRNA/,2019-01-01,"Key Laboratory of the Zoological Systematics and Evolution, Institute of Zoology, Chinese Academy of Sciences, Beijing 100101, China, University of Chinese Academy of Science, Beijing 100049, China, Laboratory of Bioinformatics and Noncoding RNA, Institute of Biophysics, Chinese Academy of Sciences, Beijing 100101, China, College of Life Sciences, Hebei University, Baoding 071002, Hebei, China and College of Plant Protection, Shandong Agricultural University, Tai'an 271018, Shandong, China., Key Laboratory of RNA Biology, Center for Big Data Research in Health, Institute of Biophysics, Chinese Academy of Sciences, Beijing 100101, China.","Zhang P, Si X, Skogerbø G, Wang J, Cui D, Li Y, Sun X, Liu L, Sun B, Chen R, He S, Huang DW, Wang J, Zhang P, Lu Y, Li Y, Zheng Y, Kan Y, Chen R, He S",", ",", National Natural Science Foundation of China, National Key R&D Program of China",129.0,"China, China, China, China, China, China" +25435546,Plant-PrAS,0.983476034,Plant-PrAS,0.983476034,Plant Protein Annotation Suite database,0.848614266,1,http://plant-pras.riken.jp,200,,,http://web.archive.org/web/20220620102706/http://plant-pras.riken.jp/,2014-11-29,"RIKEN Center for Sustainable Resource Science, Yokohama, Kanagawa, 230-0045 Japan Department of Biotechnology and Life Sciences, Faculty of Technology, Tokyo University of Agriculture and Technology, Koganei, Tokyo, 184-8588 Japan.","Kurotani A, Yamada Y, Shinozaki K, Kuroda Y, Sakurai T",,,10.0,"Japan, Japan" +25505034,PODC,0.992418885,PODC,0.992418885,Center,0.597645581,1,http://bioinf.mind.meiji.ac.jp/podc,"HTTPConnectionPool(host='bioinf.mind.meiji.ac.jp', port=80): Max retries exceeded with url: /podc (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220331015704/http://bioinf.mind.meiji.ac.jp/podc/,2014-12-11,"School of Agriculture, Meiji University, Kawasaki, 214-8571 Japan CREST, JST, Saitama, 332-0012 Japan Tsukuba Division, Mitsubishi Space Software Co., Ltd., Tsukuba, 305-0032 Japan Plant Genetics Laboratory, National Institute of Genetics, Mishima, 411-8540 Japan These authors contributed equally to this work.","Ohyanagi H, Takano T, Terashima S, Kobayashi M, Kanno M, Morimoto K, Kanegae H, Sasaki Y, Saito M, Asano S, Ozaki S, Kudo T, Yokoyama K, Aya K, Suwabe K, Suzuki G, Aoki K, Kubo Y, Watanabe M, Matsuoka M, Yano K",,,25.0,"Japan, Japan, Japan, Japan" +25558364,PREDICTS,0.901059151,PREDICTS,0.901059151,,0,1,http://www.predicts.org.uk,"HTTPConnectionPool(host='www.predicts.org.uk', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220901215853/https://www.predicts.org.uk/,2014-12-02,"Department of Life Sciences, Natural History Museum Cromwell Road, London, SW7 5BD, U.K.","Hudson LN, Newbold T, Contu S, Hill SL, Lysenko I, De Palma A, Phillips HR, Senior RA, Bennett DJ, Booth H, Choimes A, Correia DL, Day J, Echeverría-Londoño S, Garon M, Harrison ML, Ingram DJ, Jung M, Kemp V, Kirkpatrick L, Martin CD, Pan Y, White HJ, Aben J, Abrahamczyk S, Adum GB, Aguilar-Barquero V, Aizen MA, Ancrenaz M, Arbeláez-Cortés E, Armbrecht I, Azhar B, Azpiroz AB, Baeten L, Báldi A, Báldi A, Banks JE, Barlow J, Batáry P, Bates AJ, Bayne EM, Beja P, Berg Å, Berry NJ, Bicknell JE, Bihn JH, Böhning-Gaese K, Boekhout T, Boutin C, Bouyer J, Brearley FQ, Brito I, Brunet J, Buczkowski G, Buscardo E, Cabra-García J, Calviño-Cancela M, Cameron SA, Cancello EM, Carrijo TF, Carvalho AL, Castro H, Castro-Luna AA, Cerda R, Cerezo A, Chauvat M, Clarke FM, Cleary DF, Connop SP, D'Aniello B, da Silva PG, Darvill B, Dauber J, Dejean A, Diekötter T, Dominguez-Haydar Y, Dormann CF, Dumont B, Dures SG, Dynesius M, Edenius L, Elek Z, Entling MH, Farwig N, Fayle TM, Felicioli A, Felton AM, Ficetola GF, Filgueiras BK, Fonte SJ, Fraser LH, Fukuda D, Furlani D, Ganzhorn JU, Garden JG, Gheler-Costa C, Giordani P, Giordano S, Gottschalk MS, Goulson D, Gove AD, Grogan J, Hanley ME, Hanson T, Hashim NR, Hawes JE, Hébert C, Helden AJ, Henden JA, Hernández L, Herzog F, Higuera-Diaz D, Hilje B, Horgan FG, Horváth R, Hylander K, Isaacs-Cubides P, Ishitani M, Jacobs CT, Jaramillo VJ, Jauker B, Jonsell M, Jung TS, Kapoor V, Kati V, Katovai E, Kessler M, Knop E, Kolb A, Kőrösi Á, Lachat T, Lantschner V, Le Féon V, LeBuhn G, Légaré JP, Letcher SG, Littlewood NA, López-Quintero CA, Louhaichi M, Lövei GL, Lucas-Borja ME, Luja VH, Maeto K, Magura T, Mallari NA, Marin-Spiotta E, Marshall EJ, Martínez E, Mayfield MM, Mikusinski G, Milder JC, Miller JR, Morales CL, Muchane MN, Muchane M, Naidoo R, Nakamura A, Naoe S, Nates-Parra G, Navarrete Gutierrez DA, Neuschulz EL, Noreika N, Norfolk O, Noriega JA, Nöske NM, O'Dea N, Oduro W, Ofori-Boateng C, Oke CO, Osgathorpe LM, Paritsis J, Parra-H A, Pelegrin N, Peres CA, Persson AS, Petanidou T, Phalan B, Philips TK, Poveda K, Power EF, Presley SJ, Proença V, Quaranta M, Quintero C, Redpath-Downing NA, Reid JL, Reis YT, Ribeiro DB, Richardson BA, Richardson MJ, Robles CA, Römbke J, Romero-Duque LP, Rosselli L, Rossiter SJ, Roulston TH, Rousseau L, Sadler JP, Sáfián S, Saldaña-Vázquez RA, Samnegård U, Schüepp C, Schweiger O, Sedlock JL, Shahabuddin G, Sheil D, Silva FA, Silva FA, Slade EM, Smith-Pardo AH, Sodhi NS, Somarriba EJ, Sosa RA, Stout JC, Struebig MJ, Sung YH, Threlfall CG, Tonietto R, Tóthmérész B, Tscharntke T, Turner EC, Tylianakis JM, Vanbergen AJ, Vassilev K, Verboven HA, Vergara CH, Vergara PM, Verhulst J, Walker TR, Wang Y, Watling JI, Wells K, Williams CD, Willig MR, Woinarski JC, Wolf JH, Woodcock BA, Yu DW, Zaitsev AS, Collen B, Ewers RM, Mace GM, Purves DW, Scharlemann JP, Purvis A",,"Hans Rausing PhD Scholarship, Natural Environment Research Council, Biotechnology and Biological Sciences Research Council, Natural Environment Research Council, Natural Environment Research Council, Biotechnology and Biological Sciences Research Council",48.0, +25593348,PlasmoGEM,0.99704349,PlasmoGEM,0.99704349,Plasmodium Genetic Modification,0.977940926,1,http://plasmogem.sanger.ac.uk,301,,,http://web.archive.org/web/20220302141332/https://plasmogem.sanger.ac.uk/,2015-01-01,"Wellcome Trust Sanger Institute, Hinxton Cambridge, CB10 1SA, UK.","Schwach F, Bushell E, Gomes AR, Anar B, Girling G, Herd C, Rayner JC, Billker O",,"Wellcome Trust, Medical Research Council",44.0, +25725063,PreDREM,0.996430159,PreDREM,0.996430159,,0,1,http://server.cs.ucf.edu/predrem,301,,,http://web.archive.org/web/20150517031947/http://server.cs.ucf.edu:80/predrem/,2015-02-27,"Department of Electrical Engineering and Computer Science and Burnett School of Biomedical Science, College of Medicine, University of Central Florida, Orlando, FL 32816, USA.","Zheng Y, Li X, Hu H",,,1.0,United States +25740460,PhytoREF,0.99713105,PhytoREF,0.99713105,,0,1,http://phytoref.fr,302,Germany,"(51.4556,7.01156)",no_wayback,2015-04-06,"CNRS, UMR 7144, Station Biologique de Roscoff, Roscoff, 29680, France.","Decelle J, Romac S, Stern RF, Bendif el M, Zingone A, Audic S, Guiry MD, Guillou L, Tessier D, Le Gall F, Gourvil P, Dos Santos AL, Probert I, Vaulot D, de Vargas C, Christen R",,"European Union programs MicroB3, Natural Environment Research Council, Agence Nationale de la Recherche, Investissements d'Avenir, EMBRC-France, MaCuMBA, Natural Environment Research Council, French Government",55.0,France +25911153,ProtoBug,0.979299188,ProtoBug,0.979299188,,0,1,http://www.protobug.cs.huji.ac.il,"HTTPConnectionPool(host='www.protobug.cs.huji.ac.il', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.protobug.cs.huji.ac.il timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220128221058/http://www.protobug.cs.huji.ac.il/,2015-04-24,"School of Computer Science and Engineering and Department of Biological Chemistry, Institute of Life Sciences, The Hebrew University of Jerusalem, Givat Ram Campus, Jerusalem, 91904 Israel.","Rappoport N, Linial M",,,1.0,Israel +25964630,PsyGeNET,0.997668445,PsyGeNET,0.997668445,Psychiatric disorders and Genes association NETwork,0.906647378,1,"http://www.psygenet.org/, http://opendatacommons.org/licenses/odbl/1.0","302, 301",,", ","no_wayback, http://web.archive.org/web/20200708130057/https://opendatacommons.org/licenses/odbl/1.0/",2015-05-11,"Research Group on Integrative Biomedical Informatics, Research Programme on Biomedical Informatics (GRIB), Hospital del Mar Medical Research Institute (IMIM), Department of Experimental and Health Sciences (DCEXS), Universitat Pompeu Fabra (UPF), Neurobiology of Behaviour Research Group (GReNeC), IMIM, DCEXS, UPF and Institute of Neuropsychiatry and Addiction, Parc de Salut Mar, Universitat Autònoma de Barcelona, Barcelona 08003, Spain.","Gutiérrez-Sacristán A, Grosdidier S, Valverde O, Torrens M, Bravo À, Piñero J, Sanz F, Furlong LI",,,29.0,Spain +26043787,PTM-SNP,0.989071167,PTM-SNP,0.989071167,,0,1,http://gcode.kaist.ac.kr/ptmsnp,"HTTPConnectionPool(host='gcode.kaist.ac.kr', port=80): Max retries exceeded with url: /ptmsnp (Caused by ConnectTimeoutError(, 'Connection to gcode.kaist.ac.kr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20180628045422/http://gcode.kaist.ac.kr:80/ptmsnp/,2015-05-29,None,"Kim Y, Kang C, Min B, Yi GS",,,11.0, +26089836,PrOnto,0.986176848,PrOnto,0.986176848,,0,1,http://tagc.univ-mrs.fr/pronto,"HTTPConnectionPool(host='tagc.univ-mrs.fr', port=80): Max retries exceeded with url: /pronto (Caused by ConnectTimeoutError(, 'Connection to tagc.univ-mrs.fr timed out. (connect timeout=5)'))",,,no_wayback,2015-06-03,"Inserm, UMR_S1090 TAGC Marseille, France ; Aix-Marseille Université, UMR_S1090 TAGC Marseille, France.","Chapple CE, Herrmann C, Brun C",,,7.0,"France, France" +26112452,PlantOrDB,0.997664332,PlantOrDB,0.997664332,,0,1,http://bioinfolab.miamioh.edu/plantordb,"HTTPConnectionPool(host='bioinfolab.miamioh.edu', port=80): Max retries exceeded with url: /plantordb (Caused by ConnectTimeoutError(, 'Connection to bioinfolab.miamioh.edu timed out. (connect timeout=5)'))",,,no_wayback,2015-06-26,"Department of Automation, Xiamen University, Fujian, 361005, China. leilioxford@hotmail.com.","Li L, Ji G, Ye C, Shu C, Zhang J, Liang C",,"NIGMS NIH HHS, NIGMS NIH HHS",2.0,China +26117828,PhyloNONCODE,0.804293454,PhyloNONCODE,0.804293454,,0,1,http://www.bioinfo.org/phyloNoncode,301,United States,"(37.4316,-78.6569)",http://web.archive.org/web/20161217060337/http://www.bioinfo.org:80/phyloNoncode/,2015-06-27,"Key Lab of Intelligent Information Processing, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, 100190, China.","Bu D, Luo H, Jiao F, Fang S, Tan C, Liu Z, Zhao Y",,,15.0,China +26200753,Pre_GI,0.997470955,Pre_GI,0.997470955,Predicted Genomic Islands database,0.986275991,1,http://pregi.bi.up.ac.za/index.php,200,,,http://web.archive.org/web/20220308233939/http://pregi.bi.up.ac.za/index.php,2015-06-17,"Bioinformatics and Computational Biology Unit, Department of Biochemistry, University of Pretoria, Pretoria, Gauteng 0002, South Africa.","Pierneef R, Cronje L, Bezuidt O, Reva ON",,,7.0,South Africa +26211629,PLNlncRbase,0.997170389,PLNlncRbase,0.997170389,,0,1,http://bioinformatics.ahau.edu.cn/PLNlncRbase,503,,,no_wayback,2015-07-23,"Department of Biostatistics, School of Science, Anhui Agricultural University, Hefei 230036, China; College of Information and Computer science, Anhui Agricultural University, Hefei 230036, China.","Xuan H, Zhang L, Liu X, Han G, Li J, Li X, Liu A, Liao M, Zhang S",,"National Natural Science Foundation of China, Provincial Quality Engineer Fund of Anhui Education Department, National Natural Science Foundation of China, Biostatistics Discipline Backbone Cultivated Foundation in Anhui Agricultural University, Biology Key Subject Construction of Anhui, National Natural Science Foundation of China, Anhui Agricultural University",24.0,"China, China" +26225242,PhIN,0.994730279,PhIN,0.994730279,protein pharmacology interaction network database,0.937448184,1,http://cadd.pharmacy.nankai.edu.cn/phin,403,,,no_wayback,2015-03-18,"State Key Laboratory of Medicinal Chemical Biology and College of Pharmacy, Nankai University Tianjin, China.","Wang Z, Li J, Dang R, Liang L, Lin J",,"National 973 Basic Research, National 973 Basic Research",5.0,China +26251998,PhenomeCentral,0.995991707,PhenomeCentral,0.995991707,,0,1,http://phenomecentral.org,301,Canada,"(43.6583,-79.3902)",http://web.archive.org/web/20220902092842/https://www.phenomecentral.org/,2015-08-31,"Department of Computer Science, University of Toronto, Toronto, Canada.","Buske OJ, Girdea M, Dumitriu S, Gallinger B, Hartley T, Trang H, Misyura A, Friedman T, Beaulieu C, Bone WP, Links AE, Washington NL, Haendel MA, Robinson PN, Boerkoel CF, Adams D, Gahl WA, Boycott KM, Brudno M",,"NSERC/CIHR Collaborative Health Research Project, Hospital for Sick Children, Ontario Research Fund, Canadian Institutes of Health Research, NIH HHS, Genome Canada, Natural Sciences and Engineering Research Council of Canada, Ontario Genomics Institute, Children’s Hospital of Eastern Ontario Foundation, Genome Quebec",59.0,Canada +26400163,PlantDHS,0.996344864,PlantDHS,0.996344864,,0,1,http://plantdhs.org,500,,,http://web.archive.org/web/20220527212527/http://plantdhs.org/,2015-09-22,"Department of Horticulture, University of Wisconsin-Madison, Madison, WI 53706, USA.","Zhang T, Marand AP, Marand AP, Jiang J",,,25.0,United States +26441671,Physiome.jp,0.977062356,Physiome.jp,0.977062356,,0,1,http://physiome.jp,"HTTPConnectionPool(host='physiome.jp', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220622074921/https://www.physiome.jp/,2015-09-09,"Integrated Open Systems Unit, Okinawa Institute of Science and Technology Graduate University Okinawa, Japan.","Asai Y, Abe T, Li L, Oka H, Nomura T, Kitano H",,,4.0,Japan +26450949,ProPepper,0.996425271,ProPepper,0.996425271,,0,1,http://propepper.net,200,,,http://web.archive.org/web/20220806022715/http://propepper.net/,2015-10-08,"Applied Genomics Department, MTA Centre for Agricultural Research, Brunszvik u. 2, Martonvásár, 2462, Hungary, juhasz.angela@agrar.mta.hu.","Juhász A, Haraszi R, Maulis C",,,9.0,Hungary +"26578570, 30496475",PlanMine,0.994910002,PlanMine,0.994910002,,0,2,http://planmine.mpi-cbg.de,302,,,no_wayback,2019-01-01,"Max Planck Institute of Molecular Cell Biology and Genetics, Pfotenhauerstrasse 108, 01307 Dresden, Germany., Max Planck Institute for Molecular Cell Biology and Genetics, Pfotenhauerstrasse 108, 01307 Dresden, Germany.","Brandl H, Moon H, Vila-Farré M, Liu SY, Henry I, Rink JC, Rozanski A, Moon H, Brandl H, Martín-Durán JM, Grohme MA, Hüttner K, Bartscherer K, Henry I, Rink JC",", ",", European Research Council",107.0,"Germany, Germany" +26578582,Pseudomonas Genome,0.462780903,Pseudomonas Genome,0.462780903,,0,1,http://www.pseudomonas.com,301,,,http://web.archive.org/web/20221026033553/https://pseudomonas.com/,2015-11-17,"Department of Molecular Biology and Biochemistry, Simon Fraser University, Greater Vancouver, BC V5A 1S6, Canada gwinsor@sfu.ca.","Winsor GL, Griffiths EJ, Lo R, Dhillon BK, Shay JA, Brinkman FS",,Canadian Institutes of Health Research,368.0,Canada +26586809,probeBase,0.996019483,probeBase,0.996019483,,0,1,http://www.probebase.net,301,,,http://web.archive.org/web/20110914041741/http://www.probebase.net:80/,2015-11-19,"Division of Computational Systems Biology, Department of Microbiology and Ecosystem Science, Research Network Chemistry meets Microbiology, University of Vienna, A-1090 Wien, Austria.","Greuter D, Loy A, Horn M, Rattei T",,"Austrian Science Fund FWF, European Research Council",46.0,Austria +"26602691, 33313828",PSORTdb,0.997708738,PSORTdb,0.997708738,,0,2,http://db.psort.org,301,,,http://web.archive.org/web/20221016223333/https://db.psort.org/,2021-01-01,"Department of Molecular Biology and Biochemistry, Simon Fraser University, Burnaby, British Columbia, V5A 1S6, Canada., Department of Molecular Biology and Biochemistry, Simon Fraser University, Burnaby, British Columbia V5A 1S6, Canada.","Peabody MA, Laird MR, Vlasschaert C, Lo R, Brinkman FS, Lau WYV, Hoad GR, Jin V, Winsor GL, Madyan A, Gray KL, Laird MR, Lo R, Brinkman FSL",", ",", Canadian Institutes of Health Research, Genome Canada and Genome British Columbia, National Sciences and Engineering Research Council of Canada, CIHR, Frederick Banting and Charles Best Canada Graduate Scholarship",33.0,"Canada, Canada" +26620522,PPIM,0.989891842,PPIM,0.989891842,Protein-Protein Interaction Database for Maize,0.972055906,1,http://comp-sysbio.org/ppim,301,,,http://web.archive.org/web/20220324023509/http://comp-sysbio.org/ppim/,2015-11-30,"Department of Computer Science and Technology, Tongji University, Shanghai 201804, China (G.Z., P.-P.X., J.W., X.-M.Z.);Key Laboratory of Food Safety Research, Institute for Nutritional Sciences (A.W.), and Key Laboratory of Systems Biology (L.C.), Shanghai Institutes for Biological Sciences, Chinese Academy of Sciences, Shanghai 200031, China;Department of Mathematics, Shanghai University, Shanghai 200444, China (X.-J.X.); andMonsanto Company, St. Louis, Missouri 63167 (L.L., J.L., Y.C.).","Zhu G, Wu A, Xu XJ, Xiao PP, Lu L, Liu J, Cao Y, Chen L, Wu J, Zhao XM",,,28.0,"China, China, China" +26746174,Pleurochrysome,0.995230675,Pleurochrysome,0.995230675,,0,1,http://bioinf.mind.meiji.ac.jp/phapt,"HTTPConnectionPool(host='bioinf.mind.meiji.ac.jp', port=80): Max retries exceeded with url: /phapt (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170928210942/http://bioinf.mind.meiji.ac.jp:80/phapt/,2016-01-07,"Bioinformatics Laboratory, School of Agriculture, Meiji University, 1-1-1 Higashi-mita, Tama-ku, Kawasaki, Kanagawa, 214-8571 Japan These authors contributed equally to this work. Present address: International Rice Research Institute, DAPO 7777, Metro Manila 1301, Philippines. fujiwara@toyaku.ac.jp kyano@isc.meiji.ac.jp.","Yamamoto N, Kudo T, Fujiwara S, Takatsuka Y, Hirokawa Y, Tsuzuki M, Takano T, Kobayashi M, Suda K, Asamizu E, Yokoyama K, Shibata D, Tabata S, Yano K",,,2.0,"Japan, Philippines" +26759061,Psmir,0.9962219,Psmir,0.9962219,,0,1,http://www.bio-bigdata.com/Psmir,502,,,http://web.archive.org/web/20180114024912/http://www.bio-bigdata.com:80/Psmir/,2016-01-13,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, P. R. China.","Meng F, Wang J, Dai E, Yang F, Chen X, Wang S, Yu X, Liu D, Jiang W",,,8.0,China +26887375,PlanTE-MIR DB,0.85333695,PlanTE-MIR DB,0.85333695,,0,1,http://bioinfo-tool.cp.utfpr.edu.br/plantemirdb,301,,,http://web.archive.org/web/20220728184819/http://bioinfo-tool.cp.utfpr.edu.br/plantemirdb/,2016-02-18,"Graduation Program in Genetics and Molecular Biology, Universidade Estadual de Londrina, UEL, Londrina, Brazil.","R Lorenzetti AP, A de Antonio GY, Paschoal AR, Domingues DS",,"Coordenação de Aperfeiçoamento de Pessoal de Nível Superior, Conselho Nacional de Desenvolvimento Científico e Tecnológico, Fundação Araucária",10.0,Brazil +26980519,PhyloPro,0.998065591,PhyloPro,0.998065591,,0,1,http://www.compsysbio.org/phylopro,404,,,http://web.archive.org/web/20220119204720/https://www.compsysbio.org/phylopro/,2016-03-15,"Program in Molecular Structure and Function, Hospital for Sick Children, 21-9830 PGCRL, 686 Bay Street, Toronto, ON M5G 0A4, Canada and graham.cromar@gmail.com.","Cromar GL, Zhao A, Xiong X, Swapna LS, Loughran N, Song H, Parkinson J",,,6.0,Canada +26980520,PolyQ,0.976776481,PolyQ,0.976776481,,0,1,http://lightning.med.monash.edu/polyq2,"HTTPConnectionPool(host='lightning.med.monash.edu', port=80): Max retries exceeded with url: /polyq2 (Caused by ConnectTimeoutError(, 'Connection to lightning.med.monash.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190308150020/http://lightning.med.monash.edu:80/polyq2/,2016-03-15,Biomedicine Discovery Institute and Department of Biochemistry and Molecular Biology.,"Li C, Nagel J, Androulakis S, Song J, Buckle AM",,,0.0, +27026615,PheKB,0.997518778,PheKB,0.997518778,Phenotype KnowledgeBase,0.822092161,1,http://phekb.org,302,United States,"(36.1417,-86.8008)",http://web.archive.org/web/20221107180033/https://phekb.org/,2016-03-28,"Vanderbilt University Medical Center, Nashville, TN, USA.","Kirby JC, Speltz P, Rasmussen LV, Basford M, Gottesman O, Peissig PL, Pacheco JA, Tromp G, Pathak J, Carrell DS, Ellis SB, Lingren T, Thompson WK, Savova G, Haines J, Roden DM, Harris PA, Denny JC",,"NCATS NIH HHS, NIGMS NIH HHS, NCATS NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",125.0,United States +27115628,ProTherm,0.994540989,ProTherm,0.994540989,,0,1,http://www.abren.net/protherm,200,,,http://web.archive.org/web/20221006052643/http://www.abren.net/protherm/,2016-01-01,"Department of Biotechnology, Bhupat & Jyoti Mehta School of Biosciences, Indian Institute of Technology Madras, Chennai, 600 036, India. gromiha@iitm.ac.in.","Gromiha MM, Anoosha P, Huang LT",,,7.0,India +27153608,ProbOnto,0.962836146,ProbOnto,0.962836146,,0,1,http://probonto.org,301,,,no_wayback,2016-04-03,"EMBL-European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK.","Swat MJ, Grenon P, Wimalaratne S",,,6.0, +27465131,PvTFDB,0.984973252,PvTFDB,0.984973252,,0,1,http://www.multiomics.in/PvTFDB,"HTTPConnectionPool(host='www.multiomics.in', port=80): Max retries exceeded with url: /PvTFDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190223071800/http://www.multiomics.in:80/PvTFDB/,2016-07-27,"Institute of Biotechnology, PJTSAU, Rajendra Nagar, Hyderabad 500030, India.","Bhawna, Bonthala VS, Gajula MP",,,1.0,India +27478368,PIMADb,0.994888186,PIMADb,0.994888186,protein interactions in huge macromolecular assemblies,0.905600566,1,http://caps.ncbs.res.in/pimadb,301,India,"(12.9634,77.5855)",http://web.archive.org/web/20220615180812/http://caps.ncbs.res.in/pimadb/,2016-07-19,"National Centre for Biological Sciences (TIFR), GKVK Campus, Bangalore, India.; SASTRA University, Tirumalaisamudram, Thanjavur, Tamil Nadu, India.","Mathew OK, Sowdhamini R",,,1.0,"India, India" +27515999,PoplarGene,0.995907903,PoplarGene,0.995907903,,0,1,"http://bioinformatics.caf.ac.cn/PoplarGene, http://124.127.201.25/PoplarGene","HTTPConnectionPool(host='bioinformatics.caf.ac.cn', port=80): Max retries exceeded with url: /PoplarGene (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known')), HTTPConnectionPool(host='124.127.201.25', port=80): Max retries exceeded with url: /PoplarGene (Caused by ConnectTimeoutError(, 'Connection to 124.127.201.25 timed out. (connect timeout=5)'))",,", ","http://web.archive.org/web/20191016174850/http://bioinformatics.caf.ac.cn:80/PoplarGene/, no_wayback",2016-08-12,"State Key Laboratory of Tree Genetics and Breeding, Research Institute of Forestry, Chinese Academy of Forestry, Key Laboratory of Tree Breeding and Cultivation, State Forestry Administration, Beijing 100091, China.","Liu Q, Ding C, Chu Y, Chen J, Zhang W, Zhang B, Huang Q, Su X",,,3.0,China +27551106,PPI4DOCK,0.968132639,PPI4DOCK,0.968132639,,0,1,http://biodev.cea.fr/interevol/ppi4dock,301,,,http://web.archive.org/web/20220531090353/http://biodev.cea.fr/interevol/ppi4dock/,2016-08-22,"Institute for Integrative Biology of the Cell (I2BC), IBITECS, CEA, CNRS, Univ Paris-Sud, Université Paris-Saclay, F-91198, Gif-sur-Yvette, France.","Yu J, Guerois R",,,6.0,France +27733507,PMDBase,0.997673035,PMDBase,0.997673035,,0,1,http://www.sesame-bioinfo.org/PMDBase,"HTTPConnectionPool(host='www.sesame-bioinfo.org', port=80): Max retries exceeded with url: /PMDBase (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,http://web.archive.org/web/20190620233924/http://www.sesame-bioinfo.org:80/PMDBase/,2016-10-12,"Key Laboratory of Biology and Genetic Improvement of Oil Crops, Ministry of Agriculture; Oil Crops Research Institute, the Chinese Academy of Agricultural Sciences, Wuhan 430062, China.","Yu J, Dossa K, Wang L, Zhang Y, Wei X, Liao B, Zhang X",,,21.0,China +27789569,PMKB,0.992120802,PMKB,0.992120802,Precision Medicine Knowledge Base,0.967980176,1,http://pmkb.weill.cornell.edu,301,United States,"(40.7652,-73.9588)",http://web.archive.org/web/20221006031312/https://pmkb.weill.cornell.edu/,2017-05-01,Institute for Precision Medicine.,"Huang L, Fernandes H, Zia H, Tavassoli P, Rennert H, Pisapia D, Imielinski M, Sboner A, Rubin MA, Kluk M, Elemento O",,"National Institutes of Health, National Science Foundation, NCI NIH HHS, Hirschl Trust",44.0, +27789699,Proteome-pI,0.946856538,Proteome-pI,0.946856538,,0,1,http://isoelectricpointdb.org,200,,,http://web.archive.org/web/20221021091047/http://isoelectricpointdb.org/,2016-10-26,"Quantitative and Computational Biology Group, Max Planck Institute for Biophysical Chemistry, Göttingen, Lower Saxony, 37077, Germany lukasz.kozlowski.lpk@gmail.com.",Kozlowski LP,,,71.0,Germany +27789703,pVOGs,0.934616673,pVOGs,0.934616673,Prokaryotic Virus Orthologous Groups,0.87238429,1,http://dmk-brain.ecn.uiowa.edu/pVOGs,"HTTPConnectionPool(host='dmk-brain.ecn.uiowa.edu', port=80): Max retries exceeded with url: /pVOGs (Caused by ConnectTimeoutError(, 'Connection to dmk-brain.ecn.uiowa.edu timed out. (connect timeout=5)'))",,,no_wayback,2016-10-26,"Department of Biomedical Engineering, College of Engineering, University of Iowa, Iowa City, IA 52242, USA.","Grazziotin AL, Koonin EV, Kristensen DM",,,114.0,United States +"27799469, 31680153",Plant Reactome,0.934845229,Plant Reactome,0.934845229,,0,2,http://plantreactome.gramene.org,301,Canada,"(43.6459,-79.3878)",http://web.archive.org/web/20181009154802/http://plantreactome.gramene.org:80/,2020-01-01,"2082 Cordley Hall, Department of Botany & Plant Pathology, Oregon State University, Corvallis, OR 97331, USA., Department of Botany & Plant Pathology, Oregon State University, Corvallis, OR, USA.","Naithani S, Preece J, D'Eustachio P, Gupta P, Amarasinghe V, Dharmawardhana PD, Wu G, Fabregat A, Elser JL, Weiser J, Keays M, Fuentes AM, Petryszak R, Stein LD, Ware D, Jaiswal P, Naithani S, Gupta P, Preece J, D'Eustachio P, Elser JL, Garg P, Dikeman DA, Kiff J, Cook J, Olson A, Wei S, Tello-Ruiz MK, Mundo AF, Munoz-Pomer A, Mohammed S, Cheng T, Bolton E, Papatheodorou I, Stein L, Ware D, Jaiswal P",", ","NHGRI NIH HHS, National Science Foundation, National Science Foundation, National Science Foundation, National Institute of Food and Agriculture, NHGRI NIH HHS, NHGRI NIH HHS, National Institutes of Health, National Institutes of Health",41.0,"United States, United States" +27924044,PlaMoM,0.996463239,PlaMoM,0.996463239,Plant Mobile Macromolecules,0.947892308,1,http://www.systembioinfo.org/plamom,"HTTPConnectionPool(host='www.systembioinfo.org', port=80): Max retries exceeded with url: /plamom (Caused by ConnectTimeoutError(, 'Connection to www.systembioinfo.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20191023221300/http://www.systembioinfo.org:80/plamom/,2016-10-24,"Department of Biology, Hong Kong Baptist University, Kowloon, Hong Kong.","Guan D, Guan D, Yan B, Thieme C, Hua J, Zhu H, Boheler KR, Zhao Z, Kragler F, Xia Y, Zhang S",,,15.0,"Hong Kong, Hong Kong" +28053162,POSTAR,0.967771292,POSTAR,0.967771292,,0,1,http://POSTAR.ncrnalab.org,200,,,http://web.archive.org/web/20221012003225/http://postar.ncrnalab.org/,2016-10-05,"MOE Key Laboratory of Bioinformatics, Center for Synthetic and Systems Biology, Center for Plant Biology and Tsinghua-Peking Joint Center for Life Sciences, School of Life Sciences, Tsinghua University, Beijing 100084, China.","Hu B, Yang YT, Huang Y, Zhu Y, Lu ZJ",,,40.0,China +28096778,PineElm_SSRdb,0.885082662,PineElm_SSRdb,0.885082662,,0,1,http://app.bioelm.com,302,United States,"(47.6229,-122.337)",no_wayback,2016-11-24,"Division of Genomic Resources, ICAR- National Bureau of Plant Genomic Resources, PUSA campus, 110012 New Delhi, India.","Chaudhary S, Mishra BK, Vivek T, Magadum S, Yasin JK",,Indian Council of Agricultural Research,1.0,India +28111365,PlantRGDB,0.995906234,PlantRGDB,0.995906234,Plant Retrocopied Gene DataBase,0.938694141,1,"http://probes.pw.usda.gov/plantrgdb, http://aegilops.wheat.ucdavis.edu/plantrgdb","HTTPConnectionPool(host='probes.pw.usda.gov', port=80): Max retries exceeded with url: /plantrgdb (Caused by ConnectTimeoutError(, 'Connection to probes.pw.usda.gov timed out. (connect timeout=5)')), 301",,", ","no_wayback, no_wayback",2017-01-01,"USDA-ARS, Plant Gene Expression Center, Albany, CA, USA.",Wang Y,,,5.0,United States +28158643,PlantExpress,0.993935406,PlantExpress,0.993935406,,0,1,http://plantomics.mind.meiji.ac.jp/PlantExpress,"HTTPConnectionPool(host='plantomics.mind.meiji.ac.jp', port=80): Max retries exceeded with url: /PlantExpress (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190909053838/http://plantomics.mind.meiji.ac.jp:80/PlantExpress/,2017-01-01,"Bioinformatics Laboratory, School of Agriculture, Meiji University, Higashi-mita, Tama-ku, Kawasaki, Kanagawa, Japan.","Kudo T, Terashima S, Takaki Y, Tomita K, Saito M, Kanno M, Yokoyama K, Yano K",,,8.0,Japan +28171531,Pro54DB,0.984204948,Pro54DB,0.984204948,,0,1,http://lin.uestc.edu.cn/database/pro54db,"HTTPConnectionPool(host='lin.uestc.edu.cn', port=80): Max retries exceeded with url: /database/pro54db (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2017-02-01,"Key Laboratory for NeuroInformation of Ministry of Education, School of Life Science and Technology and Center for Informational Biology, University of Electronic Science and Technology of China, Chengdu, China.","Liang ZY, Lai HY, Yang H, Zhang CJ, Yang H, Wei HH, Chen XX, Zhao YW, Su ZD, Li WC, Deng EZ, Tang H, Chen W, Lin H",,,41.0,"China, China" +28203705,Pln24NT,0.996295124,Pln24NT,0.996295124,,0,1,http://bioinformatics.caf.ac.cn/Pln24NT,"HTTPConnectionPool(host='bioinformatics.caf.ac.cn', port=80): Max retries exceeded with url: /Pln24NT (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2017-07-01,"State Key Laboratory of Tree Genetics and Breeding, Research Institute of Forestry, Chinese Academy of Forestry, Key Laboratory of Tree Breeding and Cultivation of the State Forestry Administration, Beijing, China.","Liu Q, Ding C, Chu Y, Zhang W, Guo G, Chen J, Su X",,,9.0,China +28365761,PhagesDB,0.997792363,PhagesDB,0.997792363,Actinobacteriophage Database,0.878269814,1,http://phagesdb.org,301,United States,"(40.8229,-74.4592)",http://web.archive.org/web/20221107172632/https://phagesdb.org/,2017-03-01,None,"Russell DA, Hatfull GF",,"NIGMS NIH HHS, National Institutes of Health, NIGMS NIH HHS, Howard Hughes Medical Institute, Howard Hughes Medical Institute, National Institutes of Health",115.0, +28481528,PubChemQC,0.988919353,PubChemQC,0.988919353,,0,1,http://pubchemqc.riken.jp,"HTTPConnectionPool(host='pubchemqc.riken.jp', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to pubchemqc.riken.jp timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220418020930/http://pubchemqc.riken.jp/,2017-05-19,"Advanced Center for Computing and Communication, RIKEN , 2-1 Hirosawa, Wako, Saitama 351-0198 Japan.","Nakata M, Shimazaki T",,Japan Society for the Promotion of Science,22.0,Japan +28498885,PROXiMATE,0.997301996,PROXiMATE,0.997301996,,0,1,http://www.iitm.ac.in/bioinfo/PROXiMATE,302,India,"(12.8996,80.2209)",no_wayback,2017-09-01,"Department of Biotechnology, Bhupat and Jyoti Mehta School of Biosciences, Indian Institute of Technology Madras, Chennai 600036, India.","Jemimah S, Yugandhar K, Michael Gromiha M",,Department of Science and Technology,18.0,India +28529077,PLMD,0.97490716,PLMD,0.97490716,protein lysine modification database,0.703303804,1,http://plmd.biocuckoo.org,200,United States,"(40.2069,-111.642)",http://web.archive.org/web/20221006221830/https://plmd.biocuckoo.org/,2017-05-03,"Key Laboratory of Molecular Biophysics of Ministry of Education, College of Life Science and Technology and the Collaborative Innovation Center for Brain Science, Huazhong University of Science and Technology, Wuhan 430074, China.","Xu H, Zhou J, Lin S, Deng W, Zhang Y, Xue Y",,"Natural Science Foundation of China, Natural Science Foundation of China, National Basic Research Program, International Science & Technology Cooperation Program of China",54.0,China +28592293,Plasmobase,0.995240629,Plasmobase,0.995240629,,0,1,http://genome.lcqb.upmc.fr/plasmobase,301,France,"(48.9188,2.5454)",http://web.archive.org/web/20220616191325/http://genome.lcqb.upmc.fr/plasmobase/,2017-06-07,"Laboratoire de Biologie Computationnelle et Quantitative, UMR 7238, IBPS, CNRS, UPMC Univ-Paris 6, Sorbonne Universités, 4 place Jussieu, 75005, Paris, France.","Bernardes J, Vaquero C, Carbone A",,"Equip@Meso - “Investissement d’Avenir” Programme EQUIPEX, Institut Universitaire de France, CALSIMLAB - “Investissements d’Avenir” program",4.0,France +28651001,PpTFDB,0.990379,PpTFDB,0.990379,Pigeonpea Transcription Factors Database,0.94807246,1,http://14.139.229.199/PpTFDB/Home.aspx,"HTTPConnectionPool(host='14.139.229.199', port=80): Max retries exceeded with url: /PpTFDB/Home.aspx (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,no_wayback,2017-06-26,"National Research Centre on Plant Biotechnology, Pusa Campus, New Delhi, India.","Singh A, Sharma AK, Singh NK, Sharma TR",,,4.0,India +28748223,PhenoPlasm,0.98693198,PhenoPlasm,0.98693198,,0,1,http://www.phenoplasm.org,301,United States,"(33.4413,-112.0421)",http://web.archive.org/web/20220606235555/http://phenoplasm.org/,2017-07-24,"Malaria Programme, Wellcome Trust Sanger Institute, Wellcome Genome Campus, Hinxton, Cambridge, UK.","Sanderson T, Rayner JC",,"Wellcome Trust, Wellcome Trust",6.0, +28830355,porcine translational research database,0.834548388,,0,porcine translational research database,0.834548388,1,http://www.ars.usda.gov/Services/docs.htm?docid=6065,301,,,http://web.archive.org/web/20150906142839/http://www.ars.usda.gov/Services/docs.htm?docid=6065,2017-08-22,"United States Department of Agriculture, Agricultural Research Service, Beltsville Human Nutrition Research Center, Diet, Genomics and Immunology Laboratory, Beltsville, MD, USA. Harry.Dawson@ars.usda.gov.","Dawson HD, Chen C, Gaynor B, Shao J, Urban JF Jr",,Agricultural Research Service,20.0,"United States, United States" +28862395,PMS_DN,0.993797079,PMS_DN,0.993797079,Phelan-McDermid syndrome data network,0.926657796,1,http://pmsdn.hms.harvard.edu,"HTTPConnectionPool(host='pmsdn.hms.harvard.edu', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='pmsdn.hms.harvard.edu', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2017-09-01,"Department of Biomedical Informatics, Harvard Medical School, Boston, Massachusetts.","Kothari C, Wack M, Hassen-Khodja C, Finan S, Savova G, O'Boyle M, Bliss G, Cornell A, Horn EJ, Davis R, Jacobs J, Kohane I, Avillach P",,"National Institutes of Health, Amazon Web Services, NHGRI NIH HHS, NIH HHS, Patient-Centered Outcomes Research Institute",6.0, +28966675,POGD,0.997819841,POGD,0.997819841,Poaceae orphan genes database,0.986579394,1,http://bioinfo.ahau.edu.cn/pogd,503,,,no_wayback,2017-08-09,"Graduate School, Anhui Agricultural University, Hefei, Anhui 230036, P.R. China.","Yao C, Yan H, Zhang X, Wang R",,,1.0,China +28977518,qPrimerDB,0.992642581,qPrimerDB,0.992642581,,0,1,http://biodb.swu.edu.cn/qprimerdb,301,,,http://web.archive.org/web/20220524055644/https://biodb.swu.edu.cn/qprimerdb/,2018-01-01,"College of Agronomy and Biotechnology, Southwest University, Beibei, Chongqing 400715, China.","Lu K, Li T, He J, Chang W, Zhang R, Liu M, Yu M, Fan Y, Ma J, Sun W, Qu C, Liu L, Li N, Liang Y, Wang R, Qian W, Tang Z, Xu X, Lei B, Zhang K, Li J",,,33.0,China +29069441,PolyA_DB,0.997724459,PolyA_DB,0.997724459,,0,1,http://www.polya-db.org/v3,"HTTPConnectionPool(host='www.polya-db.org', port=80): Max retries exceeded with url: /v3 (Caused by ConnectTimeoutError(, 'Connection to www.polya-db.org timed out. (connect timeout=5)'))",,,no_wayback,2018-01-01,"Department of Microbiology, Biochemistry and Molecular Genetics, Rutgers New Jersey Medical School and Rutgers Cancer Institute of New Jersey, Newark, NJ 07103, USA.","Wang R, Nambiar R, Zheng D, Tian B",,NIGMS NIH HHS,61.0,"Jersey, Jersey, United States" +29077937,PICKLES,0.996508896,PICKLES,0.996508896,CRISPR Knockout,0.593691432,1,http://pickles.hart-lab.org,301,,,http://web.archive.org/web/20220526035849/https://pickles.hart-lab.org/,2018-01-01,"Department of Bioinformatics and Computational Biology, The University of Texas MD Anderson Cancer Center, Houston, TX, USA.","Lenoir WF, Lim TL, Hart T",,NCI NIH HHS,37.0,United States +"29106664, 31665479",ProteomicsDB,0.997059524,ProteomicsDB,0.997059524,,0,2,http://www.ProteomicsDB.org,301,,,http://web.archive.org/web/20221109115228/https://www.proteomicsdb.org/,2020-01-01,"Chair of Proteomics and Bioanalytics, Technical University of Munich (TUM), Freising, 85354 Bavaria, Germany., Chair of Proteomics and Bioanalytics, Technical University of Munich (TUM), Freising, Bavaria, Germany.","Schmidt T, Samaras P, Frejno M, Gessulat S, Barnert M, Kienegger H, Krcmar H, Schlegl J, Ehrlich HC, Aiche S, Kuster B, Wilhelm M, Samaras P, Schmidt T, Frejno M, Gessulat S, Reinecke M, Jarzab A, Zecha J, Mergner J, Giansanti P, Ehrlich HC, Aiche S, Rank J, Kienegger H, Krcmar H, Kuster B, Wilhelm M",", ",", German Science Foundation, Federal Ministry of Education and Research, German Science Foundation, German Science Foundation, SAP, Federal Ministry of Education and Research",119.0,"Germany, Germany" +29136200,PRODORIC2,0.997216105,PRODORIC2,0.997216105,,0,1,http://www.prodoric2.de,301,,,http://web.archive.org/web/20210916145552/https://prodoric2.de/,2018-01-01,"Institute of Microbiology and Braunschweig Integrated Centre of Systems Biology (BRICS), Technische Universität Braunschweig, Rebenring 56, Braunschweig D-38106, Germany.","Eckweiler D, Dudek CA, Hartlich J, Brötje D, Jahn D",,,17.0,Germany +29223505,PGMD,0.981539965,PGMD,0.981539965,Pakistan Genetic Mutation Database,0.853109753,1,http://www.pakmutation.com,200,United States,"(37.3342,-121.892)",http://web.archive.org/web/20221013234308/http://www.pakmutation.com/,2017-12-07,"Department of Computer Science, University of Science & Technology, Bannu, Pakistan.","Qasim I, Ahmad B, Khan MA, Khan N, Muhammad N, Basit S, Khan S",,,3.0,Pakistan +29370821,PhenoDis,0.995657861,PhenoDis,0.995657861,,0,1,http://mips.helmholtz-muenchen.de/phenodis,302,Germany,"(48.1331,11.3756)",http://web.archive.org/web/20210727083326/https://mips.helmholtz-muenchen.de/phenodis/,2018-01-25,"Technische Universität München, Chair of Genome Oriented Bioinformatics, Center of Life and Food Science, D-85350, Freising-Weihenstephan, Germany.","Adler A, Kirchmeier P, Reinhard J, Brauner B, Dunger I, Fobo G, Frishman G, Montrone C, Mewes HW, Arnold M, Ruepp A",,"Qatar National Research Fund, National Institute of Mental Health, NIA NIH HHS, National Institute on Aging, National Institute on Aging (US), NIMH NIH HHS, NIA NIH HHS, National Institute on Aging, NIA NIH HHS, NIMH NIH HHS",4.0,Germany +29377907,ProtDataTherm,0.960978508,ProtDataTherm,0.960978508,,0,1,http://profiles.bs.ipm.ir/softwares/protdatatherm,"HTTPConnectionPool(host='profiles.bs.ipm.ir', port=80): Max retries exceeded with url: /softwares/protdatatherm (Caused by ConnectTimeoutError(, 'Connection to profiles.bs.ipm.ir timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200221235636/http://profiles.bs.ipm.ir:80/softwares/protdatatherm/,2018-01-29,"BioMEMS and Bioinspired Microfluidic Laboratory, Department of Mechanical and Manufacturing Engineering, University of Calgary, Calgary, Alberta, Canada.","Pezeshgi Modarres H, Mofrad MR, Sanati-Nezhad A",,Natural Sciences and Engineering Research Council of Canada,5.0,Canada +29425804,Predicted Endogenous Viral Elements,0.975142624,pEVE,0.907541066,Predicted Endogenous Viral Elements,0.975142624,1,http://peve.med.u-tokai.ac.jp,200,,,http://web.archive.org/web/20220803215430/http://peve.med.u-tokai.ac.jp/,2018-02-06,"Department of Molecular Life Science, Tokai University School of Medicine, 143 Shimokasuya, Isehara, Kanagawa 259-1193, Japan.","Kryukov K, Ueda MT, Imanishi T, Nakagawa S",,"Scientific Research on Innovative Areas, Challenging Exploratory Research, Scientific Research on Innovative Areas, Ministry of Education, Culture, Science, Sports, and Technology, Scientific Research on Innovative Areas, Research Activity Start-up",8.0,Japan +29530937,Predicted Arabidopsis Interactome Resource,0.918171836,AIR,0.706697822,Predicted Arabidopsis Interactome Resource,0.918171836,1,http://public.synergylab.cn/pair,302,,,http://web.archive.org/web/20190921142523/http://public.synergylab.cn:80/pair/,2018-03-12,"Institute of Pharmaceutical Biotechnology, Faculty of Medicine, Zhejiang University, Hangzhou, People's Republic of China, 310058.","Yao H, Wang X, Chen P, Hai L, Jin K, Yao L, Mao C, Chen X",,,6.0,China +29575358,ProtaBank,0.99739188,ProtaBank,0.99739188,,0,1,http://protabank.org,301,,,http://web.archive.org/web/20221017041413/https://www.protabank.org/,2018-04-30,"Protabit LLC, 129 N. Hill Avenue, Suite 102, Pasadena, California, 91106.","Wang CY, Chang PM, Ary ML, Allen BD, Chica RA, Mayo SL, Olafson BD",,"NIGMS NIH HHS, National Institute of General Medical Sciences of the National Institutes of Health",12.0, +"29662024, 32679723",PKIDB,0.987339139,PKIDB,0.987339139,Protein Kinase Inhibitor Database,0.97000488,2,http://www.icoa.fr/pkidb,302,France,"(46.5874,0.3332)",http://web.archive.org/web/20220121052317/https://www.icoa.fr/pkidb/,2020-07-15,"Institut de Chimie Organique et Analytique (ICOA), UMR CNRS-Université d'Orléans 7311, Université d'Orléans BP 6759, 45067 Orléans CEDEX 2, France. fabrice.carles@univ-orleans.fr., Institut de Chimie Organique et Analytique (ICOA), UMR CNRS-Université d'Orléans 7311, Université d'Orléans BP 6759, 45067 Orléans CEDEX 2, France.","Carles F, Bourg S, Meyer C, Bonnet P, Bournez C, Carles F, Peyrat G, Aci-Sèche S, Bourg S, Meyer C, Bonnet P",", ","Région Centre Val de Loire, Association Nationale de la Recherche et de la Technologie, ",47.0,"France, France" +29939244,PtRFdb,0.996222079,PtRFdb,0.996222079,,0,1,http://www.nipgr.res.in/PtRFdb,"HTTPConnectionPool(host='www.nipgr.res.in', port=80): Max retries exceeded with url: /PtRFdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181018045553/http://www.nipgr.res.in:80/PtRFdb/,2018-01-01,"Lab #202, National Institute of Plant Genome Research (NIPGR), Aruna Asaf Ali Marg, New Delhi, India.","Gupta N, Singh A, Zahra S, Kumar S",,"National Institute of Plant Genome Research, Department of Biotechnology",14.0,India +30010730,RabGTD,0.996257365,RabGTD,0.996257365,,0,1,http://www.picb.ac.cn/RabGTD,301,,,http://web.archive.org/web/20220617183746/https://www.picb.ac.cn/RabGTD/,2018-01-01,"Key Lab of Computational Biology, CAS-MPG Partner Institute for Computational Biology, Shanghai Institutes for Biological Sciences, Chinese Academy of Sciences, 320 Yueyang Rd., Xuhui District, Shanghai 200031, China.","Zhou L, Xiao Q, Bi J, Wang Z, Li Y",,"Youth Innovation Promotion Association CAS, National Key R&D Program of China",7.0,China +30053269,PITDB,0.992001295,PITDB,0.992001295,,0,1,http://pitdb.org,301,United Kingdom,"(51.5074,-0.127758)",http://web.archive.org/web/20200804025603/http://pitdb.org/,2018-01-01,"School of Biological and Chemical Sciences, Queen Mary University of London, Mile End, London E1 4NS, UK.","Saha S, Chatzimichali EA, Matthews DA, Bessant C",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",0.0, +30055873,PhoPepMass,0.996143401,PhoPepMass,0.996143401,,0,1,http://www.scbit.org/phopepmass/index.html,"HTTPConnectionPool(host='www.scbit.org', port=80): Max retries exceeded with url: /phopepmass/index.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,no_wayback,2018-07-19,"School of Life Science and Technology, Shanghai Tech University, Shanghai 201210, China; Shanghai Center for Bioinformation Technology, Shanghai Academy of Science and Technology, Shanghai 201203, China.","Zhang M, Cui H, Chen L, Yu Y, Glocker MO, Xie L",,"Chinese Human Proteome Projects (CNHPP, Chinese Human Proteome Projects (CNHPP, Shanghai Municipal Science and Technology Commission of China, National Hi-Tech Program, National Key Research and Development Program of China",0.0,"China, China" +30101318,PlaNC-TE,0.996604753,PlaNC-TE,0.996604753,,0,1,http://planc-te.cp.utfpr.edu.br,200,Brazil,"(-25.5026,-49.2908)",http://web.archive.org/web/20221017010743/http://planc-te.cp.utfpr.edu.br/,2018-01-01,"Department of Computer Science, Bioinformatics Graduation Program (PPGBIOINFO), Federal University of Technology - Paraná, Cornélio Procópio, PR, Brazil.","Pedro DLF, Lorenzetti APR, Domingues DS, Paschoal AR",,"Pró-Reitoria de Pesquisa e Pós-Graduação, Foundation for Research Support of the State of São Paulo, Coordination for the Improvement of Higher Education Personnel, Universidade Federal do Paraná, National Council for Scientific and Technological Development",6.0,Brazil +30127348,PPInS,0.995528519,PPInS,0.995528519,Protein-Protein Interaction Sitesbase,0.959092394,1,http://www.cup.edu.in:99/ppins/home.php,"HTTPConnectionPool(host='www.cup.edu.in', port=99): Max retries exceeded with url: /ppins/home.php (Caused by ConnectTimeoutError(, 'Connection to www.cup.edu.in timed out. (connect timeout=5)'))",,,no_wayback,2018-08-20,"Department of Computational Sciences, School of Basic and Applied Sciences, Central University of Punjab, Bathinda, Punjab, 151 001, India.","Kumar V, Mahato S, Munshi A, Kulharia M",,,2.0,India +30147056,PIMBase,0.997270882,PIMBase,0.997270882,,0,1,http://pimbase.kalis-amts.de,301,Germany,"(51.4556,7.01156)",no_wayback,2018-01-01,"Bioinformatics/Medical Informatics Department, Bielefeld University, Germany.","Friedrichs M, Shoshi A, Kleine M",,,1.0,Germany +30239819,POSTAR2,0.963751674,POSTAR2,0.963751674,,0,1,http://lulab.life.tsinghua.edu.cn/postar,503,,,http://web.archive.org/web/20220820013118/http://lulab.life.tsinghua.edu.cn/POSTAR/,2019-01-01,"MOE Key Laboratory of Bioinformatics, Center for Synthetic and Systems Biology, School of Life Sciences, Tsinghua University, Beijing 100084, China.","Zhu Y, Xu G, Yang YT, Xu Z, Chen X, Shi B, Xie D, Lu ZJ, Wang P",,"National Key Research and Development Plan of China, National Natural Science Foundation of China, National Natural Science Foundation of China",63.0,China +30244175,PTMD,0.985867262,PTMD,0.985867262,,0,1,http://ptmd.biocuckoo.org,200,,,http://web.archive.org/web/20221017085440/https://ptmd.biocuckoo.org/,2018-08-01,"Department of Bioinformatics & Systems Biology, MOE Key Laboratory of Molecular Biophysics, College of Life Science and Technology and the Collaborative Innovation Center for Biomedical Engineering, Huazhong University of Science and Technology, Wuhan 430074, China.","Xu H, Wang Y, Lin S, Deng W, Peng D, Cui Q, Xue Y",,"Special Project on Precision Medicine, Natural Science Foundation of China, Special Project on Precision Medicine, Natural Science Foundation of China, Fundamental Research Funds for the Central Universities",32.0,China +30266409,PlaD,0.996840239,PlaD,0.996840239,,0,1,"http://systbio.cau.edu.cn/plad/index.php, http://zzdlab.com/plad/index.php","200, 200","China, China","(40.0018,116.333), (22.5431,114.058)","http://web.archive.org/web/20211025101116/http://systbio.cau.edu.cn/plad/index.php, no_wayback",2018-08-01,"State Key Laboratory of Agrobiotechnology, College of Biological Sciences, China Agricultural University, Beijing 100193, China.","Qi H, Jiang Z, Zhang K, Yang S, He F, Zhang Z",,Beijing Natural Science Foundation,8.0,"China, China" +30307523,PVsiRNAdb,0.97544,PVsiRNAdb,0.97544,,0,1,http://www.nipgr.res.in/PVsiRNAdb,"HTTPConnectionPool(host='www.nipgr.res.in', port=80): Max retries exceeded with url: /PVsiRNAdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2018-01-01,"Bioinformatics Laboratory, National Institute of Plant Genome Research, Aruna Asaf Ali Marg, New Delhi, India.","Gupta N, Zahra S, Singh A, Kumar S",,"National Institute of Plant Genome Research, Council of Scientific and Industrial Research, Department of Biotechnology, Ministry of Science and Technology",4.0,India +30335169,PopHumanScan,0.995905161,PopHumanScan,0.995905161,,0,1,http://pophumanscan.uab.cat,302,,,http://web.archive.org/web/20220818104331/https://pophumanscan.uab.cat/,2019-01-01,"Institut de Biotecnologia i de Biomedicina and Departament de Genètica i de Microbiologia, Universitat Autònoma de Barcelona, 08193 Bellaterra, Barcelona, Spain.","Murga-Moreno J, Coronado-Zamora M, Bodelón A, Barbadilla A, Casillas S",,"Ministerio de Economía y Competitividad, Generalitat de Catalunya, Agència de Gestió d’Ajuts Universitaris i de Recerca",9.0,Spain +30357353,piRTarBase,0.99358052,piRTarBase,0.99358052,,0,1,http://cosbi6.ee.ncku.edu.tw/piRTarBase,200,,"(22.9917,120.2148)",http://web.archive.org/web/20220617191943/http://cosbi6.ee.ncku.edu.tw/piRTarBase/,2019-01-01,"Department of Electrical Engineering, National Cheng Kung University, Tainan, Taiwan.","Wu WS, Brown JS, Chen TT, Chu YH, Huang WC, Tu S, Lee HC",,"Ministry of Science of Technology of Taiwan, NIH predoctoral, NIGMS NIH HHS, NIH R00, Ministry of Science of Technology of Taiwan, NIGMS NIH HHS, Ministry of Science of Technology of Taiwan, National Natural Science Foundation of China",23.0, +30380090,PLSDB,0.992310107,PLSDB,0.992310107,,0,1,http://ccb-microbe.cs.uni-saarland.de/plsdb,302,,,http://web.archive.org/web/20220806080355/https://ccb-microbe.cs.uni-saarland.de/plsdb/,2019-01-01,"Chair for Clinical Bioinformatics, Saarland University, Campus Building E2.1, 66123 Saarbruecken, Germany.","Galata V, Fehlmann T, Backes C, Keller A",,Saarland University,97.0,Germany +30380102,qPhos,0.993786156,qPhos,0.993786156,,0,1,http://qphos.cancerbio.info,200,Hong Kong,"(22.2908,114.1501)",http://web.archive.org/web/20221017005343/http://qphos.cancerbio.info/,2019-01-01,"State Key Laboratory of Oncology in South China, Collaborative Innovation Center for Cancer Medicine, Sun Yat-sen University Cancer Center, Guangzhou 510060, China.","Yu K, Zhang Q, Liu Z, Zhao Q, Zhang X, Wang Y, Wang ZX, Jin Y, Li X, Liu ZX, Xu RH",,"Guangdong Introducing Innovative and Entrepreneurial Teams, Science and Technology Program of Guangzhou, Science and Technology Program of Guangzhou, National Key R&D Program of China, Science and Technology Program of Guangdong, National Natural Science Foundation of China, National Natural Science Foundation of China, Natural Science Foundation of Guangdong Province, Science and Technology Program of Guangzhou",15.0,"China, China" +30395277,PlantPAN,0.996323383,PlantPAN,0.996323383,Plant Promoter Analysis Navigator,0.927395006,1,http://PlantPAN.itps.ncku.edu.tw,200,,,http://web.archive.org/web/20220729192031/http://plantpan.itps.ncku.edu.tw/,2019-01-01,"Graduate Program in Translational Agricultural Sciences, National Cheng Kung University and Academia Sinica, Taiwan.","Chow CN, Lee TY, Hung YC, Li GZ, Tseng KC, Liu YH, Kuo PL, Zheng HQ, Chang WC",,Ministry of Science and Technology,69.0, +30576486,PhytoTypeDB,0.998071671,PhytoTypeDB,0.998071671,,0,1,http://phytotypedb.bio.unipd.it,200,,,no_wayback,2018-01-01,"Department of Biomedical Sciences, University of Padua, via U. Bassi 58/b, Padua, Italy.","Necci M, Piovesan D, Micheletti D, Paladin L, Cestaro A, Tosatto SCE",,Fondazione Edmund Mach,0.0,Italy +30587128,PSRN,0.994711161,PSRN,0.994711161,plant stress RNA-Seq nexus,0.983671112,1,http://syslab5.nchu.edu.tw/PSRN,"HTTPConnectionPool(host='syslab5.nchu.edu.tw', port=80): Max retries exceeded with url: /PSRN (Caused by ConnectTimeoutError(, 'Connection to syslab5.nchu.edu.tw timed out. (connect timeout=5)'))",,,no_wayback,2018-12-27,"Program in Medical Biotechnology, National Chung Hsing University, 145 Xingda Rd., South Dist, Taichung City, 402, Taiwan.","Li JR, Liu CC, Sun CH, Chen YT",,"Ministry of Education, Ministry of Science and Technology, Taiwan, Ministry of Science and Technology, Taiwan",4.0, +30804701,PPEAO,0.983579457,PPEAO,0.983579457,,0,1,http://ppeao.ird.fr,200,,,http://web.archive.org/web/20220302140924/http://www.ppeao.ird.fr/,2019-02-14,"MARBEC, Univ Montpellier, CNRS, Ifremer, IRD, Sète, France MARBEC, Univ Montpellier, CNRS, Ifremer, IRD Sète France.","Simier M, Ecoutin JM, Tito de Morais L",,,0.0,"France, France" +30805645,PKAD,0.989784598,PKAD,0.989784598,,0,1,http://compbio.clemson.edu/pkad,200,,,http://web.archive.org/web/20220620110448/http://compbio.clemson.edu/pkad,2019-01-01,"Computational Biophysics and Bioinformatics, Department of Physics and Astronomy, Clemson University, Clemson, South Carolina, USA.","Pahari S, Sun L, Alexov E",,National Institutes of Health,24.0,United States +30985146,ProteinExplorer,0.992175162,ProteinExplorer,0.992175162,,0,1,http://massive.ucsd.edu/ProteoSAFe/protein_explorer_splash.jsp,200,,,http://web.archive.org/web/20220122210507/https://massive.ucsd.edu/ProteoSAFe/protein_explorer_splash.jsp,2018-10-15,None,"Pullman BS, Wertz J, Carver J, Bandeira N",,"National Institute of General Medical Sciences, NIGMS NIH HHS, NIGMS NIH HHS, Alfred P. Sloan Foundation",9.0, +31103066,Placental Atlas Tool,0.817075777,PAT,0.751513183,Placental Atlas Tool,0.817075777,1,http://pat.nichd.nih.gov,301,United States,"(39.0032,-77.0979)",no_wayback,2019-04-01,"Eunice Kennedy Shriver National Institute of Child Health and Human Development, Pregnancy and Perinatology Branch, Bethesda, MD, 20817, USA. Electronic address: ilekisj@nih.gov.","Ilekis JV, Keller M, Shlionskaya A, Ferguson CH, Patel B, Meitiv AL, Gorman B, Mohale A",,"National Institute of Child Health and Human Development, Intramural NIH HHS",1.0,United States +31161204,PRISMOID,0.997668028,PRISMOID,0.997668028,,0,1,http://prismoid.erc.monash.edu,"HTTPConnectionPool(host='prismoid.erc.monash.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200525135153/http://prismoid.erc.monash.edu/,2020-05-01,"Biomedicine Discovery Institute and Department of Biochemistry and Molecular Biology, Monash University, Melbourne, Victoria, Australia.","Li F, Fan C, Marquez-Lago TT, Leier A, Revote J, Jia C, Zhu Y, Smith AI, Webb GI, Liu Q, Wei L, Li J, Song J",,"Informatics Institute of the School of Medicine at UAB, National Health and Medical Research Council, Fundamental Research Funds for the Central Universities, National Health and Medical Research Council, Australian Research Council, Monash Major Inter-Disciplinary Research, Key Research and Development Program of Shaanxi Province, China, Australian Research Council, National Institute of Allergy and Infectious Diseases",6.0,Australia +31228159,PhenoGen,0.95278728,PhenoGen,0.95278728,Rat,0.555455029,1,http://phenogen.org,301,United States,"(45.5235,-122.676)",http://web.archive.org/web/20221025120813/https://phenogen.org/,2019-01-01,"Department of Pharmaceutical Sciences, Skaggs School of Pharmacy and Pharmaceutical Sciences, University of Colorado, Aurora, CO, USA.","Tabakoff B, Smith H, Vanderlinden LA, Hoffman PL, Saba LM",,"NIAAA NIH HHS, NIDA NIH HHS",3.0,United States +31307376,PhenPath,0.98982805,PhenPath,0.98982805,,0,1,http://phenpath.biocomp.unibo.it,302,Italy,"(44.581,11.3595)",no_wayback,2019-07-16,"University of Bologna, FABIT, Via San Donato 15, 40126, Bologna, Italy.","Babbi G, Martelli PL, Casadio R",,,5.0,Italy +31337335,PPTdb,0.99392122,PPTdb,0.99392122,Pathogenic Protist Transmembranome database,0.98263188,1,http://pptdb.cgu.edu.tw,200,,,http://web.archive.org/web/20220616030832/http://pptdb.cgu.edu.tw/,2019-07-24,"Department and Graduate Institute of Computer Science and Information Engineering, Chang Gung University, Taoyuan, Taiwan.","Lee CC, Huang PJ, Yeh YM, Chen SY, Chiu CH, Cheng WH, Tang P",,,0.0, +31584089,PhaSepDB,0.996137023,PhaSepDB,0.996137023,,0,1,http://db.phasep.pro,200,United States,"(37.5517,-122.33)",http://web.archive.org/web/20221026024043/http://db.phasep.pro/,2020-01-01,"Department of Biomedical Informatics, Peking University Health Science Center, Beijing 100191, China.","You K, Huang Q, Yu C, Shen B, Sevilla C, Shi M, Hermjakob H, Chen Y, Li T",,"National Key Research and Development Program of China, National Natural Science Foundation of China, National Key Research and Development Program of China, European Bioinformatics Institute, National Natural Science Foundation of China",49.0,China +31598690,ProCarbDB,0.995657146,ProCarbDB,0.995657146,,0,1,http://www.procarbdb.science/procarb,301,,,http://web.archive.org/web/20220620235449/http://www.procarbdb.science/procarb/,2020-01-01,"Department of Biochemistry, University of Cambridge, Tennis Court Road, Cambridge, CB2 1GA, UK.","Copoiu L, Torres PHM, Ascher DB, Blundell TL, Malhotra S",,"Wellcome Trust, Ipsen Bioinnovation Ltd., Cambridge Studentship, Ipsen Bioinnovation Ltd., Wellcome Trust, Medical Research Council, Cystic Fibrosis Trust, National Health and Medical Research Council, Jack Brockhoff Foundation",8.0, +31598699,QTLbase,0.997860312,QTLbase,0.997860312,,0,1,http://mulinlab.org/qtlbase,301,,,http://web.archive.org/web/20221007042933/http://www.mulinlab.org/qtlbase/,2020-01-01,"Department of Pharmacology, Tianjin Key Laboratory of Inflammation Biology, 2011 Collaborative Innovation Center of Tianjin for Medical Epigenetics, School of Basic Medical Sciences, National Clinical Research Center for Cancer, Tianjin Medical University Cancer Institute and Hospital, Tianjin Medical University, Tianjin 300070, China.","Zheng Z, Huang D, Wang J, Zhao K, Zhou Y, Guo Z, Zhai S, Xu H, Cui H, Yao H, Wang Z, Yi X, Zhang S, Sham PC, Li MJ",,"National Natural Science Foundation of China, Natural Science Foundation of Tianjin, National Natural Science Foundation of China",27.0,China +31599098,PSMD,0.98416996,PSMD,0.98416996,Pan-Species Microsatellite Database,0.979983436,1,http://big.cdu.edu.cn/psmd,"HTTPConnectionPool(host='big.cdu.edu.cn', port=80): Max retries exceeded with url: /psmd (Caused by ConnectTimeoutError(, 'Connection to big.cdu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220618152328/http://big.cdu.edu.cn/psmd/,2019-10-28,"Institute for Advanced Study, Chengdu University, Chengdu, China.","Du L, Liu Q, Zhao K, Tang J, Zhang X, Yue B, Fan Z",,"National Natural Science Foundation of China, Start-up Fund of Chengdu University",3.0,China +31602478,PmiREN,0.997984946,PmiREN,0.997984946,Plant miRNA Encyclopedia,0.926905349,1,http://www.pmiren.com,302,,,http://web.archive.org/web/20221103182840/https://pmiren.com/,2020-01-01,"Beijing Agro-biotechnology Research Center, Beijing Academy of Agriculture and Forestry Sciences, Beijing 100097, P. R. China.","Guo Z, Kuang Z, Wang Y, Zhao Y, Tao Y, Cheng C, Yang J, Lu X, Hao C, Wang T, Cao X, Wei J, Li L, Yang X",,"National Natural Science Foundation of China, Beijing Academy of Agricultural and Forestry Sciences, Beijing Academy of Agricultural and Forestry Sciences",37.0,China +31612325,PlantAFP,0.990319967,PlantAFP,0.990319967,,0,1,http://bioinformatics.cimap.res.in/sharma/PlantAFP,"HTTPConnectionPool(host='bioinformatics.cimap.res.in', port=80): Max retries exceeded with url: /sharma/PlantAFP (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20220615160826/http://bioinformatics.cimap.res.in/sharma/PlantAFP/,2019-10-14,"Biotechnology Division, CSIR-Central Institute of Medicinal and Aromatic Plants, Near Kukrail Picnic Spot, P.O.-CIMAP, Lucknow, Uttar Pradesh, 226 015, India. ks26atul@gmail.com.","Tyagi A, Pankaj V, Singh S, Roy S, Semwal M, Shasany AK, Sharma A",,,3.0,India +31612960,PhaSePro,0.996815085,PhaSePro,0.996815085,,0,1,http://phasepro.elte.hu,301,Hungary,"(47.5636,19.0947)",http://web.archive.org/web/20220928044427/https://phasepro.elte.hu/,2020-01-01,"MTA-ELTE Momentum Bioinformatics Research Group, Department of Biochemistry, Eötvös Loránd University, Budapest H-1117, Hungary.","Mészáros B, Erdős G, Szabó B, Schád É, Tantos Á, Abukhairan R, Horváth T, Murvai N, Kovács OP, Kovács M, Tosatto SCE, Tompa P, Dosztányi Z, Pancsa R",,"New National Excellence Programme, Hungarian National Research, Development, and Innovation Office, Hungarian National Research, Development, and Innovation Office, Hungarian Academy of Sciences, VUB, National Research Council of Science and Technology, Hungarian National Research, Development, and Innovation Office, European Union's Horizon 2020 research and innovation programme",32.0,Hungary +31617559,PolyASite,0.985653996,PolyASite,0.985653996,,0,1,http://polyasite.unibas.ch,301,,,http://web.archive.org/web/20221025121008/https://www.polyasite.unibas.ch/,2020-01-01,"Biozentrum, University of Basel, Basel, Switzerland.","Herrmann CJ, Schmidt R, Kanitz A, Artimo P, Gruber AJ, Zavolan M",,"Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation",37.0,Switzerland +31620779,prokaryotic antiviral defense system,0.965218917,PADS,0.94088002,prokaryotic antiviral defense system,0.965218917,1,http://bigd.big.ac.cn/padsarsenal,301,,,no_wayback,2020-01-01,"National Genomics Data Center, Beijing 100101, China.","Zhang Y, Zhang Z, Zhang H, Zhao Y, Zhang Z, Xiao J",,"Chinese Academy of Sciences, National Key Research Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Chinese Academy of Sciences, National Development and Reform Commission",11.0,China +31640808,PGG.SNV,0.993890333,PGG.SNV,0.993890333,,0,1,http://www.pggsnv.org,301,China,"(39.96,116.298)",http://web.archive.org/web/20221012081459/https://www.pggsnv.org/,2019-10-22,"Chinese Academy of Sciences (CAS) Key Laboratory of Computational Biology, Max Planck Independent Research Group on Population Genomics, CAS-MPG Partner Institute for Computational Biology (PICB), Shanghai Institute of Nutrition and Health, Shanghai Institutes for Biological Sciences, University of Chinese Academy of Sciences, CAS, Shanghai, 200031, China.","Zhang C, Gao Y, Ning Z, Lu Y, Zhang X, Liu J, Xie B, Xue Z, Wang X, Yuan K, Ge X, Pan Y, Liu C, Tian L, Wang Y, Lu D, Hoh BP, Xu S",,"Program of Shanghai Academic Research Leader, National Natural Science Foundation of China, National Key Research and Development Program, National Natural Science Foundation of China, National Science Fund for Distinguished Young Scholars, Shanghai Municipal Science and Technology Major Project, Chinese Academy of Sciences President’s International Fellowship Initiatives, Key Research Program of Frontier Sciences, UK Royal Society-Newton Advanced Fellowship, National Natural Science Foundation of China, Strategic Priority Research Program, National Natural Science Foundation of China",9.0,China +31642469,PhenoModifier,0.996493459,PhenoModifier,0.996493459,,0,1,http://www.biosino.org/PhenoModifier,403,,,http://web.archive.org/web/20220808201014/https://www.biosino.org/PhenoModifier/,2020-01-01,"Shanghai Children's Hospital, Shanghai Jiao Tong University, Shanghai 200062, China.","Sun H, Guo Y, Lan X, Jia J, Cai X, Zhang G, Xie J, Liang Q, Li Y, Yu G",,"National Key R&D Program of China, National Key R&D Program of China, Zhangjiang Special Project of National Innovation Demonstration Zone, National Key R&D Program of China, National Key R&D Program of China, National Key R&D Program of China, Shanghai Jiao Tong University",6.0,China +31725858,PlantCircNet,0.997203588,PlantCircNet,0.997203588,,0,1,http://bis.zju.edu.cn/plantcircnet/index.php,200,China,"(40.0018,116.333)",http://web.archive.org/web/20220616002410/http://bis.zju.edu.cn/plantcircnet/index.php,2017-01-01,"Department of Bioinformatics, The State Key Laboratory of Plant Physiology and Biochemistry, Institute of Plant Science, College of Life Sciences, Zhejiang University, Hangzhou 310058, China.","Zhang P, Meng X, Chen H, Liu Y, Xue J, Zhou Y, Chen M",,National Natural Science Foundation of China,12.0,China +31738435,PMBD,0.987427726,PMBD,0.987427726,Plastics Microbial Biodegradation Database,0.977427036,1,http://pmbd.genome-mining.cn/home,301,,,http://web.archive.org/web/20220521164652/http://pmbd.genome-mining.cn/home/,2019-01-01,"Department of Biotechnology, College of Life Science, Huazhong University of Science and Technology, Email: 279659072@qq.com.","Gan Z, Zhang H",,,8.0, +31796964,PRP,0.96961385,PRP,0.96961385,Plant Regulomics Portal,0.915672481,1,http://scbb.ihbt.res.in/PRP,302,,,no_wayback,2019-01-01,"Studio of Computational Biology & Bioinformatics, Biotechnology Division, CSIR-Institute of Himalayan Bioresource Technology (CSIR-IHBT), Palampur, Kangra, Himachal Pradesh 176061, India.","Panzade G, Gangwar I, Awasthi S, Sharma N, Shankar R",,"Department of Science and Technology/Science and Engineering Research Board, Council of Scientific and Industrial Research",1.0,India +31809863,PsyMuKB,0.996618569,PsyMuKB,0.996618569,NeuroPsychiatric Mutation Knowledge Base,0.886189427,1,http://psymukb.net,"HTTPConnectionPool(host='psymukb.net', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,no_wayback,2019-08-01,"Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine, School of Biomedical Engineering, Shanghai Jiao Tong University, Shanghai 200030, China; Shanghai Key Laboratory of Psychotic Disorders, Shanghai 200030, China; Brain Science and Technology Research Center, Shanghai Jiao Tong University, Shanghai 200030, China. Electronic address: nickgnlin@sjtu.edu.cn.","Lin GN, Guo S, Tan X, Wang W, Qian W, Song W, Wang J, Yu S, Wang Z, Cui D, Wang H",,"National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities, Jilin Province, Program for Professor of Special Appointment (Eastern Scholar) at Shanghai Institutions of Higher Learning, National Key R & D Program of China, National Natural Science Foundation of China, Jilin Province, Shanghai Municipal Education Commission",4.0,"China, China, China" +31844049,ProTargetMiner,0.99520582,ProTargetMiner,0.99520582,,0,1,http://protargetminer.genexplain.com,200,,,http://web.archive.org/web/20220528110830/http://protargetminer.genexplain.com/,2019-12-16,"Department of Medical Biochemistry and Biophysics, Karolinska Institutet, 171 77, Stockholm, Sweden.","Saei AA, Beusch CM, Chernobrovkin A, Sabatier P, Zhang B, Tokat ÜG, Stergiou E, Gaetani M, Végvári Á, Zubarev RA",,"Cancerfonden, Cancerfonden",7.0,Sweden +31868683,QPN,0.877652884,QPN,0.877652884,Network,0.656310976,1,http://rpq-qpn.ca/en,301,,,http://web.archive.org/web/20220624100630/https://rpq-qpn.ca/en/,2020-01-01,"Department of Neurology and Neurosurgery, McGill University, Montréal, QC, Canada.","Gan-Or Z, Rao T, Leveille E, Degroot C, Chouinard S, Cicchetti F, Dagher A, Das S, Desautels A, Drouin-Ouellet J, Durcan T, Gagnon JF, Genge A, Karamchandani J, Lafontaine AL, Sun SLW, Langlois M, Levesque M, Melmed C, Panisset M, Parent M, Poline JB, Postuma RB, Pourcher E, Rouleau GA, Sharp M, Monchi O, Dupré N, Fon EA",,"NIMH NIH HHS, CIHR, NIBIB NIH HHS, NIDA NIH HHS",11.0,Canada +31949184,PulmonDB,0.997862995,PulmonDB,0.997862995,,0,1,http://pulmondb.liigh.unam.mx,301,,,http://web.archive.org/web/20220126181754/https://pulmondb.liigh.unam.mx/,2020-01-16,"Laboratorio Internacional de Investigación sobre el Genoma Humano, UNAM, Juriquilla, Mexico.","Villaseñor-Altamirano AB, Moretto M, Maldonado M, Zayas-Del Moral A, Munguía-Reyes A, Romero Y, García-Sotelo JS, Aguilar LA, Aldana-Assad O, Engelen K, Selman M, Collado-Vides J, Balderas-Martínez YI, Medina-Rivera A",,"National Autonomous University of Mexico | Dirección General de Asuntos del Personal Académico, Universidad Nacional Autónoma de México, Consejo Nacional de Ciencia y Tecnología, Consejo Nacional de Ciencia y Tecnología, Fundación Miguel Alemán, A.C., National Autonomous University of Mexico | Dirección General de Asuntos del Personal Académico, Universidad Nacional Autónoma de México",6.0,Mexico +32119071,ProCaff,0.978805065,ProCaff,0.978805065,carbohydrate complex binding affinity database,0.665413107,1,http://web.iitm.ac.in/bioinfo2/procaff,302,,,no_wayback,2020-06-01,"Department of Biotechnology, Bhupat and Jyoti Mehta School of Biosciences, Indian Institute of Technology Madras, Chennai 600036, India.","Siva Shanmugam NR, Jino Blessy J, Veluraja K, Michael Gromiha M",,"India and the DST-INSPIRE, Department of Biotechnology, Government of India, Ministry of Human Resource and Development",2.0,India +32190163,PLSD,0.991903663,PLSD,0.991903663,Prospective Lynch Syndrome Database,0.954046357,1,http://www.insight-group.org/variants/databases,301,United Kingdom,"(51.5074,-0.127758)",http://web.archive.org/web/20220709083850/http://www.insight-group.org/variants/databases/,2020-03-14,"Department of Tumour Biology, The Norwegian Radium Hospital, Part of Oslo University Hospital, Oslo, Norway.",Møller P,,,9.0,Norway +32345779,PSCRIdb,0.996715069,PSCRIdb,0.996715069,,0,1,http://bicresources.jcbose.ac.in,200,,,http://web.archive.org/web/20221005234729/http://bicresources.jcbose.ac.in/,2020-01-01,"Division of Bioinformatics, Bose Institute, Kolkata, India.","Banerjee K, Jana T, Ghosh Z, Saha S",,Indian Council of Medical Research,0.0,India +32358997,ProNetView-ccRCC,0.936549442,ProNetView-ccRCC,0.936549442,,0,1,http://ccrcc.cptac-network-view.org,200,,,http://web.archive.org/web/20220423091336/http://ccrcc.cptac-network-view.org/,2020-05-27,"Department of Genetics and Genomic Sciences, Icahn School of Medicine at Mount Sinai, New York, NY, 10029, USA.","Kalayci S, Petralia F, Wang P, Gümüş ZH",,"National Cancer Institute, NCI NIH HHS, NCI NIH HHS",1.0,United States +32542363,PvP01,0.951967716,PvP01,0.951967716,,0,1,http://www.scfbio-iitd.res.in/PvP01,301,,,http://web.archive.org/web/20200617063729/http://www.scfbio-iitd.res.in/PvP01/,2020-01-01,"Supercomputing Facility for Bioinformatics & Computational Biology, Indian Institute of Technology Delhi, Hauz Khas, New Delhi, India, 110016.","Singh A, Kaushik R, Chaurasia DK, Singh M, Jayaram B",,,0.0,India +32542382,PRMdb,0.995993018,PRMdb,0.995993018,high-throughput analysis of modified,0.833429269,1,http://www.biosequencing.cn/PRMdb,301,,,http://web.archive.org/web/20200930203359/http://www.biosequencing.cn/PRMdb/,2020-06-01,"College of Life Sciences, Tianjin Key Laboratory of Animal and Plant Resistance, Tianjin Normal University, Tianjin 300387, China.","Ma X, Si F, Liu X, Luan W",,"Tianjin Rice Industrial Technology System of China, National Science Foundation of China",2.0,China +33002111,PyDISH,0.98944056,PyDISH,0.98944056,DIStortion of Heme porphyrin,0.780717987,1,http://pydish.bio.info.hiroshima-cu.ac.jp,301,,,http://web.archive.org/web/20220802123844/https://pydish.bio.info.hiroshima-cu.ac.jp/,2020-10-01,"School of Regional Innovation and Social Design Engineering, Faculty of Engineering, Kitami Institute of Technology, 165 Koen-cho, Kitami, Hokkaido 090-8507, Japan.","Kondo HX, Kanematsu Y, Masumoto G, Takano Y",,"Japan Society for the Promotion of Science, Core Research for Evolutional Science and Technology, Sumitomo Foundation, Japan Society for the Promotion of Science, Ministry of Education, Culture, Sports, Science and Technology",1.0,Japan +33003203,QSIdb,0.99794662,QSIdb,0.99794662,quorum sensing interference molecules,0.608476996,1,http://qsidb.lbci.net,200,,,no_wayback,2021-07-01,"School of Chemical Engineering and Technology, Tianjin University, Tianjin, China.","Wu S, Liu C, Feng J, Yang A, Guo F, Qiao J",,"New Century Outstanding Talent Support Program, National Natural Science Foundation of China, National Natural Science Foundation of China, Education Ministry of China, National Key Research and Development Project of China, Creative Research Groups of China, National Natural Science Foundation of China",1.0,China +33010159,PROTAC-DB,0.975279614,PROTAC-DB,0.975279614,,0,1,http://cadd.zju.edu.cn/protacdb,404,,,http://web.archive.org/web/20220904061441/http://cadd.zju.edu.cn/protacdb/,2021-01-01,"Innovation Institute for Artificial Intelligence in Medicine of Zhejiang University, College of Pharmaceutical Sciences, Zhejiang University, Hangzhou 310058, Zhejiang, China.","Weng G, Shen C, Cao D, Gao J, Dong X, He Q, Yang B, Li D, Wu J, Hou T",,"National Natural Science Foundation of China, National Natural Science Foundation of China, Zhejiang Provincial Natural Science Foundation, National Key Research and Development Program of China, Key R&D Program of Zhejiang Province",13.0,China +33104790,PhycoCosm,0.996997952,PhycoCosm,0.996997952,,0,1,http://phycocosm.jgi.doe.gov,301,Canada,"(43.6532,-79.3832)",no_wayback,2021-01-01,"US Department of Energy Joint Genome Institute, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA.","Grigoriev IV, Hayes RD, Calhoun S, Kamel B, Wang A, Ahrendt S, Dusheyko S, Nikitin R, Mondo SJ, Salamov A, Shabalov I, Kuo A",,"U.S. Department of Energy, U.S. Department of Energy",13.0,United States +33137192,Plant-ImputeDB,0.997689724,Plant-ImputeDB,0.997689724,,0,1,http://gong_lab.hzau.edu.cn/Plant_imputeDB,"HTTPConnectionPool(host='gong_lab.hzau.edu.cn', port=80): Max retries exceeded with url: /Plant_imputeDB (Caused by ReadTimeoutError(""HTTPConnectionPool(host='gong_lab.hzau.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2021-01-01,"Hubei Key Laboratory of Agricultural Bioinformatics, College of Informatics, Huazhong Agricultural University, Wuhan 430070, P.R. China.","Gao Y, Yang Z, Yang W, Yang Y, Gong J, Yang QY, Niu X",,"Huazhong Agricultural University Scientific & Technological Self - innovation Foundation, Fundamental Research Funds for the Central Universities, National Key Research and Development Plan, China",1.0,China +33186585,PolarProtDb,0.996842086,PolarProtDb,0.996842086,,0,1,http://polarprotdb.enzim.hu,302,,,no_wayback,2020-11-10,"Membrane Protein Bioinformatics Research Group, Institute of Enzymology, Research Centre for Natural Sciences, Magyar Tudósok körútja 2, H-1117 Budapest, Hungary.","Zeke A, Dobson L, Szekeres LI, Langó T, Tusnády GE",,"European Molecular Biology Organization, Magyar Tudományos Akadémia, Hungarian Scientific Research Fund",0.0,Hungary +33196798,Promiscuous,0.892922521,Promiscuous,0.892922521,,0,1,http://bioinformatics.charite.de/promiscuous2,301,,,http://web.archive.org/web/20220930000324/https://bioinformatics.charite.de/promiscuous2/,2021-01-01,"Charité Universitaetsmedizin Berlin, Institute of Physiology, Structural Bioinformatics Group, Berlin 10117, Germany.","Gallo K, Goede A, Eckert A, Moahamed B, Preissner R, Gohlke BO",,,6.0,Germany +"33196841, 34826364",ProThermDB,0.998294175,ProThermDB,0.998294175,,0,2,http://web.iitm.ac.in/bioinfo2/prothermdb/index.html,302,,,http://web.archive.org/web/20221017150933/https://web.iitm.ac.in/bioinfo2/prothermdb/index.html,2021-11-01,"Department of Biotechnology, Bhupat and Jyoti Mehta School of BioSciences, Indian Institute of Technology Madras, Chennai 600 036, Tamilnadu, India., Department of Biotechnology, Bhupat and Jyoti Mehta School of BioSciences, Indian Institute of Technology Madras, Chennai, Tamil Nadu, India.","Nikam R, Kulandaisamy A, Harini K, Sharma D, Gromiha MM, Kulandaisamy A, Nikam R, Harini K, Sharma D, Gromiha MM",", ","Indian Institute of Technology Madras, ",19.0,"India, India" +33216897,PRID,0.985948801,PRID,0.985948801,predicted rat interactome database,0.924220908,1,http://rat.biomedtzc.cn,200,,,http://web.archive.org/web/20220430013208/http://rat.biomedtzc.cn/,2020-11-01,"Institute of Big data and Artificial Intelligence in Medicine, School of Electronics & Information Engineering, Taizhou University, 1139 Shifu Avenue, Taizhou, 318000, China.","Tao YT, Ding XB, Jin J, Zhang HB, Guo WP, Ruan L, Yang QL, Chen PC, Yao H, Chen X",,,0.0,China +33245779,PheLiGe,0.99728775,PheLiGe,0.99728775,,0,1,http://phelige.com,301,,"(55.0415,82.9346)",http://web.archive.org/web/20220731092343/https://phelige.com/,2021-01-01,"Theoretical and Applied Functional Genomics Laboratory, Novosibirsk State University, Novosibirsk 630090, Russia.","Shashkova TI, Pakhomov ED, Gorev DD, Karssen LC, Joshi PK, Aulchenko YS",,"PolyKnomics BV, Russian Ministry of Education and Science",4.0, +33330918,piRNA-eQTL,0.997409006,piRNA-eQTL,0.997409006,,0,1,http://njmu-edu.cn:3838/piRNA-eQTL,301,,,http://web.archive.org/web/20220617193229/http://njmu-edu.cn:3838/piRNA-eQTL/,2021-01-01,"Department of Environmental Genomics, Jiangsu Key Laboratory of Cancer Biomarkers, Prevention and Treatment, Collaborative Innovation Center for Cancer Personalized Medicine, Nanjing Medical University, Nanjing, China.","Xin J, Du M, Jiang X, Wu Y, Ben S, Zheng R, Chu H, Li S, Zhang Z, Wang M",,"Jiangsu Higher Education Institutions, National Natural Science Foundation of China",9.0,China +33388027,Propedia,0.98778069,Propedia,0.98778069,,0,1,http://bioinfo.dcc.ufmg.br/propedia,301,,,http://web.archive.org/web/20221010214041/http://bioinfo.dcc.ufmg.br/propedia/,2021-01-02,"Laboratory of Bioinformatics and Systems (LBS), Department of Computer Science, Universidade Federal de Minas Gerais, Av Pres. Antônio Carlos, Belo Horizonte, MG, 31720-901, Brazil.","Martins PM, Santos LH, Mariano D, Queiroz FC, Bastos LL, Gomes IS, Fischer PHC, Rocha REO, Silveira SA, de Lima LHF, de Magalhães MTQ, Oliveira MGA, de Melo-Minardi RC",,Coordenação de Aperfeiçoamento de Pessoal de Nível Superior,3.0,Brazil +33539888,Prokaryotic Promoter Database,0.956242067,PPD,0.750178277,Prokaryotic Promoter Database,0.956242067,1,http://lin-group.cn/database/ppd,301,,,http://web.archive.org/web/20220202123712/http://lin-group.cn/database/ppd/,2021-02-02,"Center for Informational Biology, School of Life Science and Technology, University of Electronic Science and Technology of China, Chengdu 610054, China.","Su W, Liu ML, Yang YH, Wang JS, Li SH, Lv H, Dao FY, Yang H, Lin H",,,6.0,"China, China" +33546584,Plantannot,0.995641232,Plantannot,0.995641232,Plant Co-expression Annotation Resource,0.853625529,1,http://www.machado.cnptia.embrapa.br/plantannot,302,,,http://web.archive.org/web/20210315154041/https://www.machado.cnptia.embrapa.br/plantannot,2021-02-05,"Graduate Program in Bioinformatics, Institute of Biological Sciences, Universidade Federal de Minas Gerais, Belo Horizonte, Minas Gerais, 31270-901, Brazil.","José Andrade Viana M, Zerlotini A, de Alvarenga Mudadu M",,Embrapa,0.0,Brazil +33554247,PMI-DB,0.978619933,PMI-DB,0.978619933,,0,1,http://easybioai.com/PMIDB,301,,,no_wayback,2021-09-01,"The State Key Laboratory of Reproductive Regulation and Breeding of Grassland Livestock, School of Life Sciences, Inner Mongolia University, Inner Mongolia, 010010, China.","Zhao T, Liu J, Zeng X, Wang W, Li S, Zang T, Peng J, Yang Y",,"National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",14.0,"China, Mongolia, Mongolia" +34059664,PSSRD,0.96689924,PSSRD,0.96689924,SSR database,0.615605434,1,http://www.pssrd.info,200,China,"(39.9143,116.3861)",http://web.archive.org/web/20211027222549/http://www.pssrd.info/,2021-06-01,"School of Life Sciences/Library, North China University of Science and Technology, Tangshan, Hebei, 063210, China. songxm@ncst.edu.cn.","Song X, Yang Q, Bai Y, Gong K, Wu T, Yu T, Pei Q, Duan W, Huang Z, Wang Z, Liu Z, Kang X, Zhao W, Ma X",,,0.0,"China, China" +34107869,PINIR,0.995020032,PINIR,0.995020032,Pin-II type PIs Information Resource,0.970687181,1,http://pinir.ncl.res.in,302,India,"(28.4597,77.0282)",http://web.archive.org/web/20220526045110/https://pinir.ncl.res.in/,2021-06-09,"Publication and Science Communication Unit, CSIR-National Chemical Laboratory, Dr. Homi Bhabha Road, Pune, 411008, India.","Yadav NK, Saikhedkar NS, Giri AP",,"Council of Scientific and Industrial Research, India",0.0,India +34111777,PID,0.98083353,PID,0.98083353,Plant Intron Database,0.959512129,1,http://biodb.sdau.edu.cn/PID/index.php,"HTTPConnectionPool(host='biodb.sdau.edu.cn', port=80): Max retries exceeded with url: /PID/index.php (Caused by ConnectTimeoutError(, 'Connection to biodb.sdau.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2021-06-05,"Agricultural Big-Data Research Center and College of Plant Protection, Shandong Agricultural University, Daizong Road No. 61, Tai'an, Shandong, 271018, China. Electronic address: 17863800378@163.com.","Gao Y, Ge F, Zhang R, Yin D, Zhao Y, Tang H, Zhang L, Yang L",,"National Key Research and Development Program of China, National Key Research and Development Program of China, Innovation Team Project for Modern Agricultural Industrious Technology System of Shandong Province, Innovation Team Project for Modern Agricultural Industrious Technology System of Shandong Province",0.0,China +34122478,PSDX,0.995595694,PSDX,0.995595694,trichocarpa Stem Differentiating,0.907250769,1,http://forestry.fafu.edu.cn/db/SDX,"HTTPConnectionPool(host='forestry.fafu.edu.cn', port=80): Max retries exceeded with url: /db/SDX (Caused by ConnectTimeoutError(, 'Connection to forestry.fafu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220310161113/http://forestry.fafu.edu.cn/db/SDX/,2021-05-20,"Basic Forestry and Proteomics Research Center, College of Forestry, Fujian Agriculture and Forestry University, Fuzhou, China.","Wang H, Liu S, Dai X, Yang Y, Luo Y, Gao Y, Liu X, Wei W, Wang H, Xu X, Reddy ASN, Jaiswal P, Li W, Liu B, Gu L",,National Key Research and Development Program of China,0.0,China +34319727,ProBiS-Dock,0.990430673,ProBiS-Dock,0.990430673,,0,1,http://probis-dock-database.insilab.org,200,,,http://web.archive.org/web/20220206200728/http://probis-dock-database.insilab.org/,2021-07-28,"Theory Department, National Institute of Chemistry, Hajdrihova 19, SI-1000 Ljubljana, Slovenia.","Konc J, Lešnik S, Škrlj B, Janežič D",,"Nvidia, Javna Agencija za Raziskovalno Dejavnost RS, Javna Agencija za Raziskovalno Dejavnost RS, Javna Agencija za Raziskovalno Dejavnost RS, Javna Agencija za Raziskovalno Dejavnost RS, Javna Agencija za Raziskovalno Dejavnost RS",2.0,Slovenia +34403192,Plant Metabolic Network,0.702312887,,0,Plant Metabolic Network,0.702312887,1,http://plantcyc.org,301,,,http://web.archive.org/web/20221008212003/https://plantcyc.org/,2021-10-27,"Department of Plant Biology, Carnegie Institution for Science, Stanford, California, 94305, USA.","Hawkins C, Ginzburg D, Zhao K, Dwyer W, Xue B, Xu A, Rice S, Cole B, Paley S, Karp P, Rhee SY",,,12.0,United States +34559210,QSDB,0.994050586,QSDB,0.994050586,Quorum Sensing Database,0.967420578,1,http://qsdb.org,403,,,no_wayback,2021-09-24,"Department of Information and Computer Science, University of Konstanz, Universitätsstraße 10, Konstanz, Baden-Württemberg 78464, Germany.","Klein K, Garkov D, Rütschlin S, Böttcher T, Schreiber F",,"Deutsche Forschungsgemeinschaft, EU FP7",0.0,Germany +34716373,PPMdb,0.994303644,PPMdb,0.994303644,PlantPathMarks,0.944572687,1,http://ppmdb.easyomics.org,200,,,http://web.archive.org/web/20221004072924/http://ppmdb.easyomics.org/,2021-10-29,"African Genome Center, Mohammed VI Polytechnic University, Ben Guerir, Morocco.","Mokhtar MM, El Allali A, Hegazy MF, Atia MAM",,The author(s) received no specific funding for this work.,1.0,Morocco +21216747,Rice TOGO Browser,0.827372536,Rice TOGO Browser,0.827372536,,0,1,http://agri-trait.dna.affrc.go.jp,"HTTPConnectionPool(host='agri-trait.dna.affrc.go.jp', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20150908030848/http://agri-trait.dna.affrc.go.jp/,2011-01-06,"Genome Resource Center, National Institute of Agrobiological Sciences, Kannondai 2-1-2, Tsukuba, Ibaraki, 305-8602 Japan. nagamura@nias.affrc.go.jp","Nagamura Y, Antonio BA, Sato Y, Miyao A, Namiki N, Yonemaru J, Minami H, Kamatsuki K, Shimura K, Shimizu Y, Hirochika H",,,16.0,Japan +"21398668, 25300483",sc-PDB,0.996898383,sc-PDB,0.996898383,,0,2,http://bioinfo-pharma.u-strasbg.fr/scPDB,301,,,http://web.archive.org/web/20221005074327/http://bioinfo-pharma.u-strasbg.fr/scPDB/,2014-10-09,"Structural Chemogenomics Group, Laboratory of Therapeutic Innovation, UMR7200 CNRS/University of Strasbourg, Faculté de Pharmacie, Illkirch, France., Laboratoire d'innovation thérapeutique, Medalis Drug Discovery Center, UMR7200 CNRS-Université de Strasbourg, F-67400 Illkirch, France.","Meslamani J, Rognan D, Kellenberger E, Desaphy J, Bret G, Rognan D, Kellenberger E",", ",", ",96.0,"France, France" +21464840,RMRIMS,0.633491713,RMRIMS,0.633491713,Research,0.512701094,1,http://biomedinformri.org/calp,"HTTPConnectionPool(host='biomedinformri.org', port=80): Max retries exceeded with url: /calp (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2011-03-02,"BioMedical Informatics Division, Rajendra Memorial Research Institute of Medical Sciences (RMRIMS), Agam kuan, Patna-800007, India.","Dikhit MR, Nathasharma YP, Patel L, Rana SP, Sahoo GC, Das P",,,1.0,India +21472436,SBKB,0.985803445,SBKB,0.985803445,Structural Biology Knowledgebase,0.919640875,1,http://sbkb.org,301,,,http://web.archive.org/web/20220228140735/http://sbkb.org/,2011-04-07,"Department of Chemistry and Chemical Biology, Rutgers, The State University of New Jersey, Piscataway, NJ 08854, USA.","Gabanyi MJ, Adams PD, Arnold K, Bordoli L, Carter LG, Flippen-Andersen J, Gifford L, Haas J, Kouranov A, McLaughlin WA, Micallef DI, Minor W, Shah R, Schwede T, Tao YP, Westbrook JD, Zimmerman M, Berman HM",,"NIGMS NIH HHS, NIGMS NIH HHS",41.0,"Jersey, United States" +21575179,SORGOdb,0.995990634,SORGOdb,0.995990634,Superoxide Reductase Gene,0.695761979,1,http://sorgo.genouest.org/index.php,200,,,http://web.archive.org/web/20220616024853/http://sorgo.genouest.org/index.php,2011-05-16,"CNRS UMR 6026, ICM, Equipe Sp@rte, Université de Rennes 1, Campus de Beaulieu, 35042 Rennes, France. celine.lucchetti@univ-rennes1.fr","Lucchetti-Miganeh C, Goudenège D, Thybert D, Salbert G, Barloy-Hubler F",,,12.0,France +21586118,RCGDB,0.987567484,RCGDB,0.987567484,Roche Cancer Genome Database,0.956028092,1,http://rcgdb.bioinf.uni-sb.de/MutomeWeb,"HTTPConnectionPool(host='rcgdb.bioinf.uni-sb.de', port=80): Max retries exceeded with url: /MutomeWeb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20130218041535/http://rcgdb.bioinf.uni-sb.de:80/MutomeWeb/,2011-05-17,"Roche Diagnostics GmbH Pharma Research and Early Development Informatics, Penzberg, Germany. jan.kuentzer@roche.com","Küntzer J, Maisel D, Lenhof HP, Klostermann S, Burtscher H",,,4.0,Germany +21729256,RASOnD,0.995079184,RASOnD,0.995079184,RAS Oncogene Database,0.942642281,1,"http://202.141.47.181/rasond/, http://www.aiims.edu/RAS.html","HTTPConnectionPool(host='202.141.47.181', port=80): Max retries exceeded with url: /rasond/ (Caused by ConnectTimeoutError(, 'Connection to 202.141.47.181 timed out. (connect timeout=5)')), 301",,", ","no_wayback, http://web.archive.org/web/20140722084739/http://www.aiims.edu/RAS.html",2011-07-05,"Department of Biophysics, All India Institute of Medical Sciences, New Delhi, India.","Kulsum U, Singh V, Sharma S, Srinivasan A, Singh TP, Kaur P",,,5.0,"India, India" +21938212,RiDs db,0.977813244,RiDs db,0.977813244,Repeats in diseases database,0.806841683,1,http://115.111.90.196/ridsdb/index.php,"HTTPConnectionPool(host='115.111.90.196', port=80): Max retries exceeded with url: /ridsdb/index.php (Caused by ConnectTimeoutError(, 'Connection to 115.111.90.196 timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140722092852/http://115.111.90.196/ridsdb/index.php,2011-09-06,"Centre for Cellular and Molecular Biology, Habsiguda, Hyderabad - 500007, Andhra Pradesh, India.","Chaturvedi A, Tiwari S, Jesudasan RA",,,1.0,India +22067445,SCRIPDB,0.997730494,SCRIPDB,0.997730494,,0,1,http://dcv.uhnres.utoronto.ca/SCRIPDB,200,,,http://web.archive.org/web/20220307172457/http://dcv.uhnres.utoronto.ca/SCRIPDB,2011-11-08,"Department of Computer Science, University of Toronto, Toronto, Ontario M5G 1L7, Canada.","Heifets A, Jurisica I",,,10.0,Canada +22075996,SNPeffect,0.994509757,SNPeffect,0.994509757,,0,1,http://snpeffect.switchlab.org,301,,,http://web.archive.org/web/20220802145750/https://snpeffect.switchlab.org/,2011-11-10,"VIB Switch Laboratory, 3000 Leuven, Belgium.","De Baets G, Van Durme J, Reumers J, Maurer-Stroh S, Vanhee P, Dopazo J, Schymkowitz J, Rousseau F",,,117.0,Belgium +22080561,SimpleSearch,0.99120003,SimpleSearch,0.99120003,,0,1,http://www.GABI-Kat.de,302,,,http://web.archive.org/web/20221016200521/https://www.gabi-kat.de/,2011-11-12,"Center for Biotechnology, Bielefeld University, Universitaetsstrasse 25, D-33615 Bielefeld, Germany.","Kleinboelting N, Huep G, Kloetgen A, Viehoever P, Weisshaar B",,,203.0,Germany +22086956,SEQwiki,0.989065051,SEQwiki,0.989065051,SEQanswers,0.585303307,1,"http://SEQanswers.com/, http://wiki.SEQanswers.com","301, HTTPConnectionPool(host='wiki.seqanswers.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,", ","http://web.archive.org/web/20221109032011/https://www.seqanswers.com/, no_wayback",2011-11-15,"School of Life Sciences, The Chinese University of Hong Kong, Shatin, NT, Hong Kong SAR. dan.bolser@gmail.com","Li JW, Robison K, Martin M, Sjödin A, Usadel B, Young M, Olivares EC, Bolser DM",,,22.0,"Hong Kong, Hong Kong" +22099701,SA-Motifbase,0.899282444,SA-Motifbase,0.899282444,,0,1,http://bioinfo.cis.nctu.edu.tw/samotifbase,"HTTPConnectionPool(host='bioinfo.cis.nctu.edu.tw', port=80): Max retries exceeded with url: /samotifbase (Caused by ConnectTimeoutError(, 'Connection to bioinfo.cis.nctu.edu.tw timed out. (connect timeout=5)'))",,,no_wayback,2011-11-17,"Department of Computer Science, National Chiao Tung University, 1001 Tashuei Rd., Hsinchu, Taiwan.","Ku SY, Hu YJ",,,3.0, +22120661,SalmonDB,0.996498466,SalmonDB,0.996498466,,0,1,http://genomicasalmones.dim.uchile.cl,200,,,http://web.archive.org/web/20221017001547/http://genomicasalmones.dim.uchile.cl/,2011-11-26,"Laboratory of Bioinformatics and Mathematics of the Genome, Center for Mathematical Modeling (UMI 2807 CNRS) and Center for Genome Regulation (Fondap 15090007), University of Chile, Santiago, Chile.","Di Génova A, Aravena A, Zapata L, González M, Maass A, Iturra P",,,10.0,"Chile, Chile, United States Minor Outlying Islands" +22127861,RNA CoSSMos,0.782637835,RNA CoSSMos,0.782637835,RNA Characterization of Secondary Structure Motifs,0.77531596,1,http://cossmos.slu.edu,"HTTPConnectionPool(host='cossmos.slu.edu', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,http://web.archive.org/web/20200226043123/http://cossmos.slu.edu:80/,2011-11-29,"Department of Chemistry, Saint Louis University, Saint Louis, MO 63103, USA.","Vanegas PL, Hudson GA, Davis AR, Kelly SC, Kirkpatrick CC, Znosko BM",,NIGMS NIH HHS,16.0,United States +"22139920, 28110602",SitEx,0.971261322,SitEx,0.971261322,,0,2,http://www-bionet.sscc.ru/sitex,301,,,http://web.archive.org/web/20180107163136/http://www-bionet.sscc.ru:80/sitex/,2017-01-23,"Computer Proteomics Laboratory, Institute of Cytology and Genetics SB RAS, 10 Lavrentyeva Avenue, 630090 Novosibirsk, Russia., * Institute of Cytology and Genetics, Siberian Branch of Russian Academy of Sciences, Lavrentyeva 10, Novosibirsk, 630090, Russia.","Medvedeva I, Demenkov P, Kolchanov N, Ivanisenko V, Medvedeva IV, Demenkov PS, Ivanisenko VA",", ","European Commission FP7, Russian Foundation for Basic Research, Russian Science Foundation, Russian Science Foundation",5.0, +22139942,RecountDB,0.995889306,RecountDB,0.995889306,,0,1,http://recountdb.cbrc.jp,"HTTPConnectionPool(host='recountdb.cbrc.jp', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20131110064459/http://recountdb.cbrc.jp:80/,2011-12-01,"Graduate School of Frontier Sciences, University of Tokyo, 5-1-5 Kashiwanoha, Kashiwa 277-8562, Japan.","Wijaya E, Frith MC, Asai K, Horton P",,,0.0,Japan +22140105,ScerTF,0.989538968,ScerTF,0.989538968,,0,1,http://ural.wustl.edu/ScerTF,"HTTPConnectionPool(host='ural.wustl.edu', port=80): Max retries exceeded with url: /ScerTF (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,no_wayback,2011-12-02,"Department of Genetics, Washington University Medical School, St Louis, MO, USA.","Spivak AT, Stormo GD",,"NHGRI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS",45.0,United States +22140107,SNPedia,0.99539566,SNPedia,0.99539566,,0,1,http://www.SNPedia.com,301,,,no_wayback,2011-12-02,"River Road Bio, LLC, 9812 Falls Road #114-237, Potomac, Maryland, MD 20854, USA.","Cariaso M, Lennon G",,,90.0,United States +22171328,seeQTL,0.995602727,seeQTL,0.995602727,,0,1,http://www.bios.unc.edu/research/genomic_software/seeQTL,301,,,http://web.archive.org/web/20180122104200/http://www.bios.unc.edu:80/research/genomic_software/seeQTL/,2011-12-13,"Department of Biostatistics, University of North Carolina, Chapel Hill, NC 27599, USA. kxia@bios.unc.edu","Xia K, Shabalin AA, Huang S, Madar V, Zhou YH, Wang W, Zou F, Sun W, Sullivan PF, Wright FA",,"NIMH NIH HHS, NIMH NIH HHS, NIEHS NIH HHS, NIMH NIH HHS, NIMH NIH HHS",90.0,United States +22210871,SNPdbe,0.988785923,SNPdbe,0.988785923,,0,1,http://www.rostlab.org/services/snpdbe,302,,,http://web.archive.org/web/20221020095240/https://www.rostlab.org/services/snpdbe/,2011-12-30,"Technische Universitaet Muenchen, Bioinformatics - I12, Informatik, Boltzmannstrasse 3, Muenchen, Germany. schaefer@rostlab.org","Schaefer C, Meier A, Rost B, Bromberg Y",,,32.0,Germany +22345621,RNA-Seq Atlas,0.958900797,RNA-Seq Atlas,0.958900797,,0,1,http://medicalgenomics.org/rna_seq_atlas,301,,,http://web.archive.org/web/20220524120738/https://medicalgenomics.org/rna_seq_atlas/,2012-02-17,"Department of Medicine I, Johannes Gutenberg University, 55131 Mainz, Germany. kruppm@uni-mainz.de","Krupp M, Marquardt JU, Sahin U, Galle PR, Castle J, Teufel A",,,99.0,Germany +22365971,SITVITWEB,0.995786428,SITVITWEB,0.995786428,,0,1,http://www.pasteur-guadeloupe.fr:8081/SITVIT_ONLINE,302,,,http://web.archive.org/web/20220728171530/http://www.pasteur-guadeloupe.fr:8081/SITVIT_ONLINE/,2012-02-17,"WHO Supranational TB Reference Laboratory, Unité de la Tuberculose et des Mycobactéries, Institut Pasteur de Guadeloupe, Abymes Cedex, Guadeloupe.","Demay C, Liens B, Burguière T, Hill V, Couvin D, Millet J, Mokrousov I, Sola C, Zozio T, Rastogi N",,,244.0,"Guadeloupe, Guadeloupe" +22411954,RNAimmuno,0.986062527,RNAimmuno,0.986062527,,0,1,http://rnaimmuno.ibch.poznan.pl,200,,,http://web.archive.org/web/20220615184548/http://rnaimmuno.ibch.poznan.pl/,2012-03-12,"Laboratory of Cancer Genetics, Institute of Bioorganic Chemistry, Polish Academy of Sciences, 61-704 Poznan, Poland.","Olejniczak M, Galka-Marciniak P, Polak K, Fligier A, Krzyzosiak WJ",,,21.0,Poland +22415763,Rett Networked Database,0.598606253,,0,Rett Networked Database,0.598606253,1,http://www.rettdatabasenetwork.org,200,Italy,"(43.4631,11.8783)",http://web.archive.org/web/20220318015441/https://www.rettdatabasenetwork.org/,2012-04-13,"Medical Genetics, Department of Biotechnology, University of Siena, Viale Bracci 2, Siena, Italy.","Grillo E, Villard L, Clarke A, Ben Zeev B, Pineda M, Bahi-Buisson N, Hryniewiecka-Jaworska A, Bienvenu T, Armstrong J, Roche-Martinez A, Mari F, Veneselli E, Russo S, Vignoli A, Pini G, Djuric M, Bisgaard AM, Mejaški Bošnjak V, Polgár N, Cogliati F, Ravn K, Pintaudi M, Melegh B, Craiu D, Djukic A, Renieri A",,Telethon,10.0,Italy +22419780,SEQanswers,0.993383706,SEQanswers,0.993383706,,0,1,http://SEQanswers.com,301,,,http://web.archive.org/web/20221109032011/https://www.seqanswers.com/,2012-03-13,"School of Life Sciences, The Chinese University of Hong Kong, Shatin, NT, Hong Kong SAR.","Li JW, Schmieder R, Ward RM, Delenick J, Olivares EC, Mittelman D",,,30.0,"Hong Kong, Hong Kong" +22544707,SNPnexus,0.9974401,SNPnexus,0.9974401,,0,1,http://www.snp-nexus.org,301,,,http://web.archive.org/web/20221101211916/https://snp-nexus.org/,2012-04-28,"Barts Cancer Institute, Queen Mary University of London, London EC1M 6BQ, UK.","Dayem Ullah AZ, Lemoine NR, Chelala C",,"Cancer Research UK, Cancer Research UK",83.0, +22608002,RCDB,0.995647073,RCDB,0.995647073,Renal Cancer Gene Database,0.962979174,1,http://www.juit.ac.in/attachments/jsr/rcdb/homenew.html,"HTTPConnectionPool(host='www.juit.ac.in', port=80): Max retries exceeded with url: /attachments/jsr/rcdb/homenew.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20221030032628/https://www.juit.ac.in/attachments/jsr/rcdb/homenew.html,2012-05-18,"Department of Biotechnology and Bioinformatics, Jaypee University of Information Technology, 173234, Waknaghat, Solan, Himachal Pradesh, India. jayashree_ramana@yahoo.co.in",Ramana J,,,26.0,India +22645600,RiceRBP,0.996419489,RiceRBP,0.996419489,,0,1,http://www.bioinformatics2.wsu.edu/RiceRBP,"HTTPConnectionPool(host='www.bioinformatics2.wsu.edu', port=80): Max retries exceeded with url: /RiceRBP (Caused by ConnectTimeoutError(, 'Connection to www.bioinformatics2.wsu.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20121116074911/http://www.bioinformatics2.wsu.edu/RiceRBP/,2012-05-14,"Institute of Biological Chemistry, Washington State University Pullman, WA, USA.","Doroshenk KA, Crofts AJ, Morris RT, Wyrick JJ, Okita TW",,,11.0,United States +22700939,RNAiAtlas,0.844358683,RNAiAtlas,0.844358683,,0,1,http://www.rnaiatlas.ethz.ch,"HTTPConnectionPool(host='www.rnaiatlas.ethz.ch', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-06-14,"Bioquant, University of Heidelberg, Screening, Im Neuenheimer Feld 267, D-69120 Heidelberg, Germany.","Mazur S, Csucs G, Kozak K",,,4.0,Germany +22784567,SigCS base,0.819766919,SigCS base,0.819766919,,0,1,http://sysbio.kribb.re.kr/sigcs,"HTTPConnectionPool(host='sysbio.kribb.re.kr', port=80): Max retries exceeded with url: /sigcs (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140722110056/http://sysbio.kribb.re.kr/sigcs/,2011-12-14,"Medical Genome Research Center, KRIBB, Daejeon 305-806, Korea.","Park YK, Bang OS, Cha MH, Kim J, Cole JW, Lee D, Kim YJ",,,3.0, +22833525,RedoxDB,0.996839046,RedoxDB,0.996839046,,0,1,http://biocomputer.bio.cuhk.edu.hk/RedoxDB,"HTTPConnectionPool(host='biocomputer.bio.cuhk.edu.hk', port=80): Max retries exceeded with url: /RedoxDB (Caused by ConnectTimeoutError(, 'Connection to biocomputer.bio.cuhk.edu.hk timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160315122104/http://biocomputer.bio.cuhk.edu.hk/RedoxDB/,2012-07-25,"School of Life Sciences, The Chinese University of Hong Kong, Shatin, New Territories, Hong Kong, China.","Sun MA, Wang Y, Cheng H, Zhang Q, Ge W, Guo D",,,40.0,"China, Hong Kong, Hong Kong" +22965133,RhesusBase,0.997771919,RhesusBase,0.997771919,,0,1,http://www.rhesusbase.org,"HTTPConnectionPool(host='www.rhesusbase.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190414050927/http://www.rhesusbase.org:80/,2012-09-10,"Institute of Molecular Medicine, Peking University, Beijing, China.","Zhang SJ, Liu CJ, Shi M, Kong L, Chen JY, Zhou WZ, Zhu X, Yu P, Wang J, Yang X, Hou N, Ye Z, Zhang R, Xiao R, Zhang X, Li CY",,,23.0,China +"22976082, 30053264",RMDB,0.97517405,RMDB,0.97517405,Mapping DataBase,0.709348813,2,http://rmdb.stanford.edu,301,United States,"(45.5235,-122.676)",no_wayback,2018-01-01,"Department of Biochemistry and Biomedical Informatics Program, Stanford University, Stanford, CA 94305, USA., Department of Biochemistry, Stanford University School of Medicine, Stanford CA 94305, USA.","Cordero P, Lucks JB, Das R, Yesselman JD, Tian S, Liu X, Shi L, Li JB, Das R",", ","NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",53.0,"United States, United States" +23044550,SemMedDB,0.998119652,SemMedDB,0.998119652,,0,1,http://skr3.nlm.nih.gov/SemMedDB,302,,,http://web.archive.org/web/20210923025945/https://skr3.nlm.nih.gov/SemMedDB/,2012-10-08,"Lister Hill National Center for Biomedical Communications, National Library of Medicine, Bethesda, MD 20894, USA. kilicogluh@mail.nih.gov","Kilicoglu H, Shin D, Fiszman M, Rosemblat G, Rindflesch TC",,Intramural NIH HHS,99.0,United States +23055621,SalmonellaBase,0.992680967,SalmonellaBase,0.992680967,,0,1,http://www.salmonellabase.com,"HTTPConnectionPool(host='www.salmonellabase.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180107020246/http://salmonellabase.com/,2012-08-03,"Department of Bioinformatics, Sathyabama University, Jeppiaar Nagar, Rajiv Gandhi Salai, Chennai - 600 119.","Pushpa OB, Suresh MX",,,0.0, +23129220,SNObase,0.997782171,SNObase,0.997782171,,0,1,http://www.nitrosation.org,"HTTPConnectionPool(host='www.nitrosation.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20211219131457/http://nitrosation.org/,2012-11-06,"National Laboratory of Biomacromolecules, Institute of Biophysics, Chinese Academy of Sciences, Beijing 100101, China.","Zhang X, Huang B, Zhang L, Zhang Y, Zhao Y, Guo X, Qiao X, Chen C",,,10.0,China +23144556,RKN,0.919945776,RKN,0.919945776,,0,1,http://bioinformatics.towson.edu/RKN,404,,,http://web.archive.org/web/20140722155000/http://bioinformatics.towson.edu/RKN/,2012-10-01,"Department of Computer and Information Sciences, Towson University, 8000 York Road, Towson, MD 21252, USA.","Ismail A, Matthews BF, Alkharouf NW",,,1.0,United States +23155061,RNApathwaysDB,0.971205235,RNApathwaysDB,0.971205235,,0,1,http://iimcb.genesilico.pl/rnapathwaysdb,301,Poland,"(52.2296,21.0067)",no_wayback,2012-11-15,"Laboratory of Bioinformatics and Protein Engineering, International Institute of Molecular and Cell Biology in Warsaw, Trojdena 4, PL-02-109 Warsaw, Poland.","Milanowska K, Mikolajczak K, Lukasik A, Skorupski M, Balcer Z, Machnicka MA, Nowacka M, Rother KM, Bujnicki JM",,,4.0,Poland +23161692,SchistoDB,0.998987973,SchistoDB,0.998987973,,0,1,http://SchistoDB.net,301,,,http://web.archive.org/web/20150315041021/http://www.schistodb.net,2012-11-17,"Centro de Excelência em Bioinformática, National Institute for Science and Technology in Tropical Diseases FIOCRUZ-Minas, Belo Horizonte, MG 30190-002, Brazil.","Zerlotini A, Aguiar ER, Yu F, Xu H, Li Y, Young ND, Gasser RB, Protasio AV, Berriman M, Roos DS, Kissinger JC, Oliveira G",,FIC NIH HHS,25.0,Brazil +"23180763, 25392407, 31740968",SEVA-DB,0.994396701,SEVA-DB,0.994396701,Standard European Vector Architecture,0.724981114,3,http://seva.cnb.csic.es,301,,,no_wayback,2020-01-01,"Systems Biology Program, Centro Nacional de Biotecnología (CNB-CSIC), 28049 Cantoblanco-Madrid, Spain., Systems Biology Program, Centro Nacional de Biotecnología (CNB-CSIC), 28049 Cantoblanco-Madrid, Spain., Systems Biology Program, Centro Nacional de Biotecnología CSIC, Campus de la Universidad Autónoma de Madrid, 28049 Spain.","Silva-Rocha R, Martínez-García E, Calles B, Chavarría M, Arce-Rodríguez A, de Las Heras A, Páez-Espino AD, Durante-Rodríguez G, Kim J, Nikel PI, Platero R, de Lorenzo V, Martínez-García E, Aparicio T, Goñi-Moreno A, Fraile S, de Lorenzo V, Martínez-García E, Goñi-Moreno A, Bartley B, McLaughlin J, Sánchez-Sampedro L, Pascual Del Pozo H, Prieto Hernández C, Marletta AS, De Lucrezia D, Sánchez-Fernández G, Fraile S, de Lorenzo V",", , ",", , Engineering and Physical Sciences Research Council, Comunidad de Madrid, Spanish Ministry of Science",306.0,"Spain, Spain, Spain" +23180765,RiceXPro,0.97669971,RiceXPro,0.97669971,,0,1,http://ricexpro.dna.affrc.go.jp,301,Japan,"(35.6916,139.768)",http://web.archive.org/web/20221107054138/https://ricexpro.dna.affrc.go.jp/,2012-11-23,"Genome Resource Unit, Agrogenomics Research Center, National Institute of Agrobiological Sciences, Kannondai 2-1-2, Tsukuba, Ibaraki 305-8602, Japan.","Sato Y, Takehisa H, Kamatsuki K, Minami H, Namiki N, Ikawa H, Ohyanagi H, Sugimoto K, Antonio BA, Nagamura Y",,,145.0,Japan +23180784,RiceFREND,0.970509291,RiceFREND,0.970509291,,0,1,http://ricefrend.dna.affrc.go.jp,301,Japan,"(35.6916,139.768)",http://web.archive.org/web/20221110094950/https://ricefrend.dna.affrc.go.jp/,2012-11-24,"Genome Resource Unit, Agrogenomics Research Center, National Institute of Agrobiological Sciences, 2-1-2 Kannondai, Tsukuba, Ibaraki 305-8602, Japan.","Sato Y, Namiki N, Takehisa H, Kamatsuki K, Minami H, Ikawa H, Ohyanagi H, Sugimoto K, Itoh J, Antonio BA, Nagamura Y",,,60.0,Japan +"23180788, 26578591",SomamiR,0.985346377,SomamiR,0.985346377,,0,2,http://compbio.uthsc.edu/SomamiR,302,,,http://web.archive.org/web/20221013234258/https://compbio.uthsc.edu/SomamiR/,2015-11-17,"Department of Microbiology, The University of Tennessee Health Science Center, 858 Madison Avenue, Memphis, TN 38163, USA., Machine Intelligence Unit, Indian Statistical Institute, Kolkata, WB 700108, India anindyamail123@gmail.com.","Bhattacharya A, Ziebarth JD, Cui Y, Bhattacharya A, Cui Y",", ",", ",109.0,"India, United States" +23193278,RGKbase,0.996097326,RGKbase,0.996097326,Rice Genome Knowledgebase,0.938942921,1,http://rgkbase.big.ac.cn/RGKbase,"HTTPConnectionPool(host='rgkbase.big.ac.cn', port=80): Max retries exceeded with url: /RGKbase (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20161015151526/http://rgkbase.big.ac.cn:80/RGKbase/,2012-11-28,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100029, PR China.","Wang D, Xia Y, Li X, Hou L, Yu J",,,13.0,China +"23193283, 24293649",SILVA,0.996267796,SILVA,0.996267796,,0,2,http://www.arb-silva.de,301,,,http://web.archive.org/web/20221024171806/https://www.arb-silva.de/,2013-11-28,"Microbial Genomics and Bioinformatics Research Group, Max Planck Institute for Marine Microbiology, D-28359 Bremen, Germany., Microbial Genomics and Bioinformatics Research Group, Max Planck Institute for Marine Microbiology, D-28359 Bremen, Germany, Department of Botany, University of British Columbia, Vancouver V6T 1Z4, Canada, Department of Zoology, University of British Columbia, Vancouver V6T 1Z4, Canada, Ribocon GmbH, D-28359 Bremen, Germany, School of Engineering and Science, Jacobs University Bremen gGmbH, D-28759 Bremen, Germany and Lehrstuhl für Mikrobiologie, Technische Universität München, D-853530 Freising, Germany.","Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO, Yilmaz P, Parfrey LW, Yarza P, Gerken J, Pruesse E, Quast C, Schweer T, Peplies J, Ludwig W, Glöckner FO",", ",", ",7256.0,"Canada, Canada, Germany, Germany, Germany, Germany, Germany" +23193298,SecReT4,0.996587932,SecReT4,0.996587932,,0,1,http://db-mml.sjtu.edu.cn/SecReT4,302,,,http://web.archive.org/web/20221021084418/https://db-mml.sjtu.edu.cn/SecReT4,2012-11-28,"State Key Laboratory of Microbial Metabolism, Shanghai Jiaotong University, Shanghai 200030, China.","Bi D, Liu L, Tai C, Deng Z, Rajakumar K, Ou HY",,,51.0,China +23203869,SIFTS,0.997229233,SIFTS,0.997229233,Structure Integration with Function,0.943572327,1,http://pdbe.org/sifts,301,,,no_wayback,2012-11-29,"Protein Data Bank in Europe, EMBL-EBI, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. sameer@ebi.ac.uk","Velankar S, Dana JM, Jacobsen J, van Ginkel G, Gane PJ, Luo J, Oldfield TJ, O'Donovan C, Martin MJ, Kleywegt GJ",,"NHGRI NIH HHS, Wellcome Trust",128.0, +23203982,SINEBase,0.997263968,SINEBase,0.997263968,,0,1,http://sines.eimb.ru,301,,,no_wayback,2012-11-30,"Laboratory of Eukaryotic Genome Evolution, Engelhardt Institute of Molecular Biology, Moscow 119991, Russia.","Vassetzky NS, Kramerov DA",,,62.0, +23220571,SM2miR,0.988238767,SM2miR,0.988238767,,0,1,http://bioinfo.hrbmu.edu.cn/SM2miR,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /SM2miR (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20190916151349/http://bioinfo.hrbmu.edu.cn:80/SM2miR/,2012-12-05,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.","Liu X, Wang S, Meng F, Wang J, Zhang Y, Dai E, Yu X, Li X, Jiang W",,,74.0,China +23239846,RadishBase,0.997795284,RadishBase,0.997795284,,0,1,http://bioinfo.bti.cornell.edu/radish,301,United States,"(42.4444,-76.4926)",http://web.archive.org/web/20220423094741/http://bioinfo.bti.cornell.edu/radish/,2012-12-13,"Key Laboratory of Horticultural Crop Biology and Germplasm Innovation, Ministry of Agriculture, Institute of Vegetables and Flowers, Chinese Academy of Agricultural Sciences, Beijing, China.","Shen D, Sun H, Huang M, Zheng Y, Li X, Fei Z",,,14.0,China +23299411,RAP-DB,0.992282307,RAP-DB,0.992282307,Rice Annotation Project Database,0.925042021,1,http://rapdb.dna.affrc.go.jp,301,,,http://web.archive.org/web/20221103175316/https://rapdb.dna.affrc.go.jp/,2013-01-07,"Agrogenomics Research Center, National Institute of Agrobiological Sciences, Tsukuba, Ibaraki, Japan.","Sakai H, Lee SS, Tanaka T, Numa H, Kim J, Kawahara Y, Wakimoto H, Yang CC, Iwamoto M, Abe T, Yamada Y, Muto A, Inokuchi H, Ikemura T, Matsumoto T, Sasaki T, Itoh T",,,275.0,Japan +23389821,RenalTube,0.996694028,RenalTube,0.996694028,,0,1,http://www.renaltube.com,302,Spain,"(36.7404,-4.0995)",http://web.archive.org/web/20170611115158/http://renaltube.com/,2013-02-07,"Department of Medicine, University of Oviedo, C/Julián Clavería s/n, 33006 Oviedo, Spain. namega@hotmail.com","Mejía N, Santos F, Claverie-Martín F, García-Nieto V, Ariceta G, Castaño L, ",,,8.0,Spain +23423175,SoyProDB,0.943566799,SoyProDB,0.943566799,soybean protein database,0.737450778,1,http://bioinformatics.towson.edu/Soybean_Seed_Proteins_2D_Gel_DB/Home.aspx,200,,,http://web.archive.org/web/20220913065932/http://bioinformatics.towson.edu/Soybean_Seed_Proteins_2D_Gel_DB/Home.aspx,2013-02-06,"Department of computer and information sciences, Towson University, Towson, MD 21252, USA.","Tavakolan M, Alkharouf NW, Khan FH, Natarajan S",,,5.0,United States +23457042,RCPedia,0.993256867,RCPedia,0.993256867,,0,1,http://www.bioinfo.mochsl.org.br/rcpedia,301,,,http://web.archive.org/web/20220813031612/https://www.bioinfo.mochsl.org.br/rcpedia/,2013-03-01,"Centro de Oncologia Molecular, Hospital Sírio-Libanês, São Paulo 01308-060, Brazil.","Navarro FC, Galante PA",,"FIC NIH HHS, FIC NIH HHS",18.0,Brazil +23547897,RegTransBase,0.996879399,RegTransBase,0.996879399,,0,1,http://regtransbase.lbl.gov,"HTTPConnectionPool(host='regtransbase.lbl.gov', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20080703222946/http://regtransbase.lbl.gov/,2013-04-02,"Department of Microbiology, University of California Davis, Davis, CA 95616, USA.","Cipriano MJ, Novichkov PN, Kazakov AE, Rodionov DA, Arkin AP, Gelfand MS, Dubchak I",,,34.0,United States +23660286,RiceSRTFDB,0.988805354,RiceSRTFDB,0.988805354,,0,1,http://www.nipgr.res.in/RiceSRTFDB.html,"HTTPConnectionPool(host='www.nipgr.res.in', port=80): Max retries exceeded with url: /RiceSRTFDB.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181014224223/http://www.nipgr.res.in:80/RiceSRTFDB.html,2013-05-09,"Functional and Applied Genomics Laboratory, National Institute of Plant Genome Research, Aruna Asaf Ali Marg, New Delhi 110067, India.","Priya P, Jain M",,,14.0,India +23724943,SGWHC,0.941945419,SGWHC,0.941945419,Sex and Gender Women's Health Collaborative,0.814800464,1,http://www.sgwhc.org,301,,,http://web.archive.org/web/20221028164640/https://sgwhc.org/,2013-06-01,"Emergency Medicine, Warren Alpert Medical School of Brown University, Rhode Island Hospital, Providence, RI, USA. amcgregormd@gmail.com.","McGregor AJ, Templeton K, Kleinman MR, Jenkins MR",,,17.0,United States +"24060102, 24175918",RegPrecise,0.986235499,RegPrecise,0.986235499,,0,2,http://regprecise.lbl.gov,301,Canada,"(43.6532,-79.3832)",http://web.archive.org/web/20221016205120/https://regprecise.lbl.gov/,2013-11-01,"Sanford-Burnham Medical Research Institute, 92037 La Jolla, CA, USA. rodionov@burnham.org., Lawrence Berkeley National Laboratory, Berkeley 94710, CA, USA. PSNovichkov@lbl.gov.","Sun EI, Leyn SA, Kazanov MD, Saier MH Jr, Novichkov PS, Rodionov DA, Novichkov PS, Kazakov AE, Ravcheev DA, Leyn SA, Kovaleva GY, Sutormin RA, Kazanov MD, Riehl W, Arkin AP, Dubchak I, Rodionov DA",", ","NIGMS NIH HHS, ",195.0,"United States, United States" +"24136998, 27987168",SoyKB,0.996702418,SoyKB,0.996702418,Soybean Knowledge Base,0.942847088,2,http://soykb.org,301,,,http://web.archive.org/web/20221007081227/https://www.soykb.org/,2017-01-01,"Department of Computer Science, University of Missouri, Columbia, MO 65211, USA, Christopher S. Bond Life Sciences Center, University of Missouri, Columbia, MO 65211, USA, National Center for Soybean Biotechnology, University of Missouri, Columbia, MO 65211, USA, Informatics Institute, University of Missouri, Columbia, MO 65211, USA and Division of Plant Sciences, University of Missouri, Columbia, MO 65211, USA., Department of Molecular Microbiology and Immunology, Medical Research Office School of Medicine, Informatics Institute, University of Missouri, 1201 E Rollins St., 271B LSC, Columbia, MO, 65201, USA. joshitr@missouri.edu.","Joshi T, Fitzpatrick MR, Chen S, Liu Y, Zhang H, Endacott RZ, Gaudiello EC, Stacey G, Nguyen HT, Xu D, Joshi T, Wang J, Zhang H, Chen S, Zeng S, Xu B, Xu D",", ",", ",38.0,"United States, United States, United States, United States, United States, United States" +24136999,RiceWiki,0.996466279,RiceWiki,0.996466279,,0,1,http://ricewiki.big.ac.cn,"HTTPConnectionPool(host='ricewiki.big.ac.cn', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))",,,http://web.archive.org/web/20210307092229/http://ricewiki.big.ac.cn/,2013-10-16,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China, Research Institute of Subtropical Forestry, Chinese Academy of Forestry, Fuyang, Zhejiang 311400, China, School of Computer Science and Technology, Beijing Institute of Technology, Beijing 100081, China and College of Life Science and Technology, Huazhong Agricultural University, Wuhan 430070, China.","Zhang Z, Sang J, Ma L, Wu G, Wu H, Huang D, Zou D, Liu S, Li A, Hao L, Tian M, Xu C, Wang X, Wu J, Xiao J, Dai L, Chen LL, Hu S, Yu J",,,10.0,"China, China, China, China" +24146757,SIDD,0.992121279,SIDD,0.992121279,,0,1,http://mlg.hit.edu.cn/SIDD,"HTTPConnectionPool(host='mlg.hit.edu.cn', port=80): Max retries exceeded with url: /SIDD (Caused by ConnectTimeoutError(, 'Connection to mlg.hit.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160317001838/http://mlg.hit.edu.cn/SIDD/,2013-10-11,"Center for Bioinformatics, School of Computer Science and Technology, Harbin Institute of Technology, Harbin, Heilongjiang, China.","Cheng L, Wang G, Li J, Zhang T, Xu P, Wang Y",,NIAAA NIH HHS,22.0,China +24147765,Rice,0.762478769,Rice,0.762478769,,0,1,http://ricedb.plantenergy.uwa.edu.au,"HTTPConnectionPool(host='ricedb.plantenergy.uwa.edu.au', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170812060235/http://ricedb.plantenergy.uwa.edu.au:80/,2013-11-29,"ARC Centre of Excellence in Plant Energy Biology, University of Western Australia, MCS Building M316, 35 Stirling Highway, Crawley, 6009, Western Australia, Australia; Centre for Computational Systems Biology, University of Western Australia, MCS Building M316, 35 Stirling Highway, Crawley, 6009, Western Australia, Australia.","Narsai R, Devenish J, Castleden I, Narsai K, Xu L, Shou H, Whelan J",,,19.0,"Australia, Australia, Australia, Australia, Australia, Australia" +24148649,snOPY,0.990186676,snOPY,0.990186676,snoRNA orthological gene database,0.855695571,1,http://snoopy.med.miyazaki-u.ac.jp,200,,,http://web.archive.org/web/20220616013708/http://snoopy.med.miyazaki-u.ac.jp/,2013-10-23,None,"Yoshihama M, Nakao A, Kenmochi N",,,49.0, +24158836,RAvariome,0.715638518,RAvariome,0.715638518,,0,1,http://hinv.jp/hinv/rav,301,,,http://web.archive.org/web/20140722235311/http://hinv.jp/hinv/rav/,2013-10-23,"Department of Molecular Life Science, Division of Basic Medical Science and Molecular Medicine, Tokai University School of Medicine, 143 Shimokasuya, Isehara, Kanagawa 259-1193, Japan and Data Management and Integration Team, Molecular Profiling Research Center for Drug Discovery, National Institute of Advanced Industrial Science and Technology, Koto-ku, Tokyo 135-0064, Japan.","Nagai Y, Imanishi T",,,3.0,"Japan, Japan" +24163098,SMMRNA,0.997671962,SMMRNA,0.997671962,,0,1,http://www.smmrna.org,200,,,http://web.archive.org/web/20220121131303/http://www.smmrna.org/,2013-10-24,"Department of Advanced Protein Science, Institute of Microbial Technology, Chandigarh-160036, India.","Mehta A, Sonam S, Gouri I, Loharch S, Sharma DK, Parkesh R",,,12.0,India +24165881,SIMAP,0.967703819,SIMAP,0.967703819,Similarity Matrix of Proteins,0.952235826,1,http://mips.gsf.de/simap,"HTTPConnectionPool(host='mips.gsf.de', port=80): Max retries exceeded with url: /simap (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20070930154852/http://mips.gsf.de/simap/,2013-10-27,"Terrence Donnelly Centre for Cellular and Biomolecular Research, Kim Lab, University of Toronto, Toronto, ON M5S 3E1, Canada, CUBE-Division of Computational Systems Biology, Department of Microbiology and Ecosystem Science, University of Vienna, 1090 Vienna, Austria and Institute of Bioinformatics and Systems Biology, Helmholtz Zentrum München, Technische Universität München, Wissenschaftszentrum Weihenstephan, 85764 Neuherberg, Germany.","Arnold R, Goldenberg F, Mewes HW, Rattei T",,,10.0,"Austria, Canada, Germany" +24194593,SelenoDB,0.994665325,SelenoDB,0.994665325,,0,1,http://www.selenodb.org,301,,,http://web.archive.org/web/20221016235114/https://www.selenodb.org/,2013-11-04,"Department of Evolutionary Genetics, Max Planck Institute for Evolutionary Anthropology, Leipzig 04103, Germany, Bioinformatics and Genomics Programme, Centre for Genomic Regulation (CRG), Dr. Aiguader 88, 08003 Barcelona, Spain, Universitat Pompeu Fabra (UPF), 08003 Barcelona, Spain and Department of Medicine, Division of Genetics, Brigham and Women's Hospital and Harvard Medical School, Boston, MA 02115, USA.","Romagné F, Santesmasses D, White L, Sarangi GK, Mariotti M, Hübler R, Weihmann A, Parra G, Gladyshev VN, Guigó R, Castellano S",,"NIA NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",13.0,"Germany, Spain, Spain, United States" +24203708,SMPDB,0.997425482,SMPDB,0.997425482,Small Molecule Pathway Database,0.985691021,1,http://www.smpdb.ca,200,,,http://web.archive.org/web/20221106065847/https://smpdb.ca/,2013-11-06,"Department of Computing Science, University of Alberta, Edmonton, AB, Canada T6G 2E8, Department of Biological Sciences, University of Alberta, Edmonton, AB, Canada T6G 2E8 and National Institute for Nanotechnology, 11421 Saskatchewan Drive, Edmonton, AB, Canada T6G 2M9.","Jewison T, Su Y, Disfany FM, Liang Y, Knox C, Maciejewski A, Poelzer J, Huynh J, Zhou Y, Arndt D, Djoumbou Y, Liu Y, Deng L, Guo AC, Han B, Pon A, Wilson M, Rafatnia S, Liu P, Wishart DS",,Canadian Institutes of Health Research,124.0,"Canada, Canada, Canada" +24214988,SAbDab,0.997705543,SAbDab,0.997705543,Structural antibody database,0.975666851,1,http://opig.stats.ox.ac.uk/webapps/sabdab,301,,,no_wayback,2013-11-08,"Department of Statistics, University of Oxford, 1 South Parks Road, Oxford OX1 3TG, UK, Informatics, UCB Pharma, 216 Bath Road, Slough SL1 4EN, UK and Roche Pharma Research & Early Development, Roche Diagnostics GmbH, 82377 Penzberg, Germany.","Dunbar J, Krawczyk K, Leem J, Baker T, Fuchs A, Georges G, Shi J, Deane CM",,"Engineering and Physical Sciences Research Council, Engineering and Physical Sciences Research Council",128.0,Germany +24220091,RNA Bricks,0.949595173,RNA Bricks,0.949595173,,0,1,http://iimcb.genesilico.pl/rnabricks,301,Poland,"(52.2296,21.0067)",http://web.archive.org/web/20221023070656/https://iimcb.genesilico.pl/rnabricks/,2013-11-12,"International Institute of Molecular and Cell Biology, Trojdena 4, 02-109 Warsaw, Poland, Faculty of Mathematics, Informatics, and Mechanics, University of Warsaw, Banacha 2, 02-097 Warsaw, Poland and Institute of Molecular Biology and Biotechnology, Faculty of Biology, Adam Mickiewicz University, Umultowska 89, 61-614 Poznan, Poland.","Chojnowski G, Walen T, Bujnicki JM",,,32.0,"Poland, Poland, Poland" +24225318,Selectome,0.979073048,Selectome,0.979073048,,0,1,http://selectome.unil.ch,301,,,http://web.archive.org/web/20190122010534/https://selectome.unil.ch/,2013-11-12,"Department of Ecology and Evolution, Biophore, University of Lausanne, CH-1015 Lausanne, Switzerland, Evolutionary Bioinformatics group, SIB Swiss Institute of Bioinformatics, CH-1015 Lausanne, Switzerland, Vital-IT group, SIB Swiss Institute of Bioinformatics, CH-1015 Lausanne, Switzerland, Computational Phylogenetics group, SIB Swiss Institute of Bioinformatics, CH-1015 Lausanne, Switzerland, Division of Biosciences, Institute of Structural and Molecular Biology, University College London, Gower Street, London, WC1E 6BT, UK and Swiss National Supercomputing Centre (CSCS), CH-6900, Lugano, Switzerland.","Moretti S, Laurenczy B, Gharib WH, Castella B, Kuzniar A, Schabauer H, Studer RA, Valle M, Salamin N, Stockinger H, Robinson-Rechavi M",,"Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation",38.0,"Switzerland, Switzerland, Switzerland, Switzerland, Switzerland" +"24271399, 25501940",SFLD,0.998137712,SFLD,0.998137712,Structure-Function Linkage Database,0.984608392,2,http://sfld.rbvi.ucsf.edu,200,,,http://web.archive.org/web/20220830015041/http://sfld.rbvi.ucsf.edu/,2014-12-12,"Department of Bioengineering and Therapeutic Sciences, University of California, San Francisco, San Francisco, CA 94158, USA, Universidad Andres Bello, Center for Bioinformatics and Integrative Biology, Facultad de Ciencias Biologicas, Santiago 8370146, Chile, Nodality, Inc., South San Francisco, CA 94080, USA, Department of Electrical and Computer Engineering, College of Engineering, Boston University, Boston, MA 02215, USA, Department of Chemical Engineering, Massachusetts Institute of Technology, Cambridge, MA 02139, USA, Department of Pharmaceutical Chemistry, School of Pharmacy, University of California, San Francisco, San Francisco, CA 94158, USA, Center for Bioinformatics (ZBH), University of Hamburg, Hamburg 20146, Germany, Department of Chemistry and Biochemistry, Montana State University, Bozeman, MT 59717, USA, School of Medicine, University of California, San Francisco, San Francisco, CA 94143, USA, UC Berkeley - UCSF Graduate Program in Bioengineering, University of California, San Francisco, CA 94158 and Berkeley, CA 94720, USA and California Institute for Quantitative Biosciences, University of California, San Francisco, San Francisco, CA 94158, USA., Department of Bioengineering and Therapeutic Sciences, University of California, San Francisco, California.","Akiva E, Brown S, Almonacid DE, Barber AE 2nd, Custer AF, Hicks MA, Huang CC, Lauck F, Mashiyama ST, Meng EC, Mischel D, Morris JH, Ojha S, Schnoes AM, Stryke D, Yunes JM, Ferrin TE, Holliday GL, Babbitt PC, Brown S, Babbitt P",", ","NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",125.0,"Chile, Germany, United States, United States, United States, United States, United States, United States, United States, United States, United States" +24279809,Rice Oligonucleotide Array Database,0.973416291,ROAD,0.928594371,Rice Oligonucleotide Array Database,0.973416291,1,http://www.ricearray.org,200,United States,"(33.4484,-112.074)",http://web.archive.org/web/20221110025551/https://www.ricearray.org/,2012-07-19,"Institute of Bioinformatics, Zhejiang University, Hangzhou, 310058, China. jzhu@zju.edu.cn.","Cao P, Jung KH, Choi D, Hwang D, Zhu J, Ronald PC",,,80.0,China +24280345,RiceCyc,0.990686595,RiceCyc,0.990686595,,0,1,http://www.gramene.org/pathway,301,United States,"(40.7831,-73.9713)",http://web.archive.org/web/20220804093119/https://www.gramene.org/pathway/,2013-05-29,"Department of Botany and Plant Pathology, Oregon State University, 2082-Cordley Hall, Corvallis, OR 97331, USA. jaiswalp@science.oregonstate.edu.","Dharmawardhana P, Ren L, Amarasinghe V, Monaco M, Thomason J, Ravenscroft D, McCouch S, Ware D, Jaiswal P",,,39.0,United States +24288368,RDP,0.996806602,RDP,0.996806602,Ribosomal Database Project,0.967247033,1,http://rdp.cme.msu.edu,200,United States,"(42.7069,-84.4138)",http://web.archive.org/web/20220922100548/http://rdp.cme.msu.edu/,2013-11-27,"Center for Microbial Ecology, Michigan State University, East Lansing, MI 48824, USA, Computer Science and Engineering, Michigan State University, East Lansing, MI 48824, USA, Microbiology and Molecular Genetics, Michigan State University, East Lansing, MI 48824, USA, Biological Sciences, Western Illinois University, Malcomb, IL 61455, USA and Bioscience Division, Los Alamos National Laboratory, Los Alamos, NM 87545, USA.","Cole JR, Wang Q, Fish JA, Chai B, McGarrell DM, Sun Y, Brown CT, Porras-Alfaro A, Kuske CR, Tiedje JM",,"NHLBI NIH HHS, NIDDK NIH HHS, NIEHS NIH HHS",1464.0,"United States, United States, United States, United States, United States" +24323624,SABRE,0.995861411,SABRE,0.995861411,Systematic consolidation of Arabidopsis and,0.888674723,1,http://sabre.epd.brc.riken.jp/SABRE2.html,"HTTPConnectionPool(host='sabre.epd.brc.riken.jp', port=80): Max retries exceeded with url: /SABRE2.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170402020139/http://sabre.epd.brc.riken.jp/SABRE2.html,2013-12-09,"RIKEN BioResource Center, 3-1-1 Koyadai, Tsukuba, Ibaraki, 305-0074 Japan.","Fukami-Kobayashi K, Nakamura Y, Tamura T, Kobayashi M",,,5.0,Japan +24504151,SATuRN,0.765131068,SATuRN,0.765131068,African Treatment Resistance Network,0.749985605,1,http://www.bioafrica.net/regadb,"HTTPConnectionPool(host='www.bioafrica.net', port=80): Max retries exceeded with url: /regadb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200219215128/http://www.bioafrica.net:80/regadb/,2014-02-06,"Africa Centre for Health and Population Studies, School of Laboratory Medicine and Medical Sciences, University of KwaZulu-Natal, Mtubatuba, South Africa, Department of Clinical Research, London School of Hygiene and Tropical Medicine, London, UK, Immunology department, University of Pretoria, Pretoria, South Africa, Infectious Diseases, Internal Medicine, University of the Free State School of Medicine, Bloemfontein, South Africa and Division of Medical Virology, Stellenbosch University, Stellenbosch, South Africa, National Health Laboratory Service Tygerberg, Cape Town, South Africa, Department of Virology, National Health Laboratory Service, University of KwaZulu-Natal, Durban, South Africa, The Brighton Doctoral College, Brighton and Sussex Medical School, UK, Academic Unit of primary Care and Population Science, Division of Social Statistics and Geography, University of Southampton, Southampton, UK, Jembi Health Systems, Cape Town, South Africa, School of Mathematics, Statistics and Computer Science, University of KwaZulu-Natal, Westville, South Africa and Research Department of Infection, University College of London (UCL), London, UK.","Manasa J, Lessells R, Rossouw T, Naidu K, Van Vuuren C, Goedhals D, van Zyl G, Bester A, Skingsley A, Stott K, Danaviah S, Chetty T, Singh L, Moodley P, Iwuji C, McGrath N, Seebregts CJ, de Oliveira T",,"Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust",6.0,"South Africa, South Africa, South Africa, South Africa, South Africa, South Africa, South Africa, South Africa" +24531082,SMAL,0.99170814,SMAL,0.99170814,Spontaneous Mutation Accumulation Lines,0.987832281,1,http://cefg.uestc.edu.cn/smal,"HTTPConnectionPool(host='cefg.uestc.edu.cn', port=80): Max retries exceeded with url: /smal (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20150321022936/http://cefg.uestc.edu.cn:80/smal/,2014-02-14,"Center of Bioinformatics, Key Laboratory for NeuroInformation of the Ministry of Education, University of Electronic Science and Technology of China, Chengdu, China.","Wei W, Ning LW, Ye YN, Li SJ, Zhou HQ, Huang J, Guo FB",,,6.0,"China, China" +24618044,SoyFN,0.997227907,SoyFN,0.997227907,,0,1,http://nclab.hit.edu.cn/SoyFN,"HTTPConnectionPool(host='nclab.hit.edu.cn', port=80): Max retries exceeded with url: /SoyFN (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160823181258/http://nclab.hit.edu.cn:80/SoyFN/,2014-03-10,"School of Computer Science and Technology, Harbin Institute of Technology, Harbin 150001, P.R. China and School of Life Science and Technology, Harbin Institute of Technology, Harbin 150001, P.R. China.","Xu Y, Guo M, Liu X, Wang C, Liu Y",,,6.0,"China, China" +24712981,SFGD,0.987578392,SFGD,0.987578392,Soybean Functional Genomics Database,0.935117344,1,http://bioinformatics.cau.edu.cn/SFGD,301,,,http://web.archive.org/web/20210525181029/http://bioinformatics.cau.edu.cn/SFGD/,2014-04-08,None,"Yu J, Zhang Z, Wei J, Ling Y, Xu W, Su Z",,,26.0, +24739306,RetrogeneDB,0.99268949,RetrogeneDB,0.99268949,,0,1,http://retrogenedb.amu.edu.pl,200,Poland,"(52.4052,16.9339)",http://web.archive.org/web/20221020092722/http://retrogenedb.amu.edu.pl/,2014-04-16,"Labolatory of Bioinformatics, Faculty of Biology, Adam Mickiewicz University, Poznań, Poland.","Kabza M, Ciomborowska J, Makałowska I",,,16.0,Poland +24771658,RegPhos,0.992758036,RegPhos,0.992758036,,0,1,http://csb.cse.yzu.edu.tw/RegPhos2,404,,,no_wayback,2014-04-25,"Department of Computer Science and Engineering, Yuan Ze University, Taoyuan 320, Taiwan, Institute of Chemistry, Academia Sinica, Taipei 115, Taiwan, Genomics Research Center, Academia Sinica, Taipei 115, Taiwan, Institute of Bioinformatics and Systems Biology, National Chiao Tung University, Hsin-Chu 300, Taiwan and Department of Biological Science and Technology, National Chiao Tung University, Hsin-Chu 300, Taiwan.","Huang KY, Wu HY, Chen YJ, Lu CT, Su MG, Hsieh YC, Tsai CM, Lin KI, Huang HD, Lee TY, Chen YJ",,,20.0, +24803509,RAID,0.7922194,RAID,0.7922194,,0,1,http://www.rna-society.org/raid,301,United States,"(40.2069,-111.642)",http://web.archive.org/web/20210416163312/https://www.rna-society.org/raid/,2014-05-06,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Zhang X, Wu D, Chen L, Li X, Yang J, Fan D, Dong T, Liu M, Tan P, Xu J, Yi Y, Wang Y, Zou H, Hu Y, Fan K, Kang J, Huang Y, Miao Z, Bi M, Jin N, Li K, Li X, Xu J, Wang D",,,36.0,China +24907201,SeaBase,0.991243899,SeaBase,0.991243899,,0,1,http://seabase.core.cli.mbl.edu,"HTTPConnectionPool(host='seabase.core.cli.mbl.edu', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to seabase.core.cli.mbl.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20150425234340/http://seabase.core.cli.mbl.edu:80/,2014-06-06,"*Marine Biological Laboratory, Woods Hole, MA 02543, USA; Department of Molecular and Cellular Biology, Harvard University, Cambridge, MA 02138, USA; Systems & Control Engineering, University of Magna Graecia, 88100 Catanzaro, Italy*Marine Biological Laboratory, Woods Hole, MA 02543, USA; Department of Molecular and Cellular Biology, Harvard University, Cambridge, MA 02138, USA; Systems & Control Engineering, University of Magna Graecia, 88100 Catanzaro, Italy jcsmith@mbl.edu.","Fischer AH, Mozzherin D, Eren AM, Lans KD, Wilson N, Cosentino C, Smith J",,,11.0,"Italy, Italy, United States, United States, United States, United States" +24912499,REGNET,0.995660305,REGNET,0.995660305,,0,1,http://mgrc.kribb.re.kr/regnet,"HTTPConnectionPool(host='mgrc.kribb.re.kr', port=80): Max retries exceeded with url: /regnet (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-06-09,None,"Chi SM, Seo YK, Park YK, Yoon S, Park CY, Kim YS, Kim SY, Nam D",,,2.0, +24939193,RetinoGenetics,0.990538726,RetinoGenetics,0.990538726,,0,1,http://www.retinogenetics.org,200,Hong Kong,"(22.3193,114.1693)",http://web.archive.org/web/20220615181927/http://retinogenetics.org/,2014-06-17,"Institute of Genomic Medicine, Wenzhou Medical University, Wenzhou 325027, China, Division of Ophthalmic Genetics, Laboratory for Stem Cell and Retinal Regeneration, The Eye Hospital of Wenzhou Medical University, Wenzhou 325027, China and The State Key Laboratory Cultivation Base and Key Laboratory of Vision Science, Ministry of Health People's Republic of China, Wenzhou 325027, China.","Ran X, Cai WJ, Huang XF, Liu Q, Lu F, Qu J, Wu J, Jin ZB",,,28.0,"China, China, China, China" +24952385,ShrimpGPAT,0.997447491,ShrimpGPAT,0.997447491,Shrimp Gene and Protein Annotation Tool,0.836638801,1,http://shrimpgpat.sc.mahidol.ac.th,"HTTPConnectionPool(host='shrimpgpat.sc.mahidol.ac.th', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-06-21,None,"Korshkari P, Vaiwsri S, Flegel TW, Ngamsuriyaroj S, Sonthayanon B, Prachumwat A",,,2.0, +24991975,sc-PDB-Frag,0.918389161,sc-PDB-Frag,0.918389161,,0,1,http://bioinfo-pharma.u-strasbg.fr/scPDBFrag,301,,,http://web.archive.org/web/20201205015914/http://bioinfo-pharma.u-strasbg.fr/scPDBFrag/,2014-07-17,"Laboratory for Therapeutical Innovation, UMR 7200 Université de Strasbourg/CNRS, MEDALIS Drug Discovery Center, F-67400 Illkirch, France.","Desaphy J, Rognan D",,,8.0,France +25075616,SNP@lincTFBS,0.949685822,SNP@lincTFBS,0.949685822,,0,1,http://bioinfo.hrbmu.edu.cn/SNP_lincTFBS,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /SNP_lincTFBS (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20141029095653/http://bioinfo.hrbmu.edu.cn:80/SNP_lincTFBS/,2014-07-30,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.","Ning S, Zhao Z, Ye J, Wang P, Zhi H, Li R, Wang T, Wang J, Wang L, Li X",,,12.0,China +25097385,SCNProDB,0.972986102,SCNProDB,0.972986102,,0,1,http://bioinformatics.towson.edu/Soybean_SCN_proteins_2D_Gel_DB/Gel1.aspx,404,,,no_wayback,2014-06-30,"USDA-ARS, Soybean Genomics and Improvement Laboratory, Beltsville, MD 20705, USA.","Natarajan S, Tavakolan M, Alkharouf NW, Matthews BF",,,2.0,United States +25228593,RADB,0.987689435,RADB,0.987689435,Database of Rheumatoid Arthritis-related Polymorphisms,0.919500697,1,http://www.bioapp.org/RADB,301,China,"(36.061,120.3814)",http://web.archive.org/web/20220615230712/http://www.bioapp.org/RADB/,2014-09-15,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150086, China, Yiwu Hospital, Zhejiang University, Yiwu 322000, China, Genome Analysis Laboratory, Tianjin Institute of Industrial Biotechnology, Chinese Academy of Sciences, Tianjin, 300308, China, Depatment of Pathology, Harbin Medical University, Harbin 150086, China jiangyongshuai@gmail.com.","Zhang R, Luan M, Shang Z, Duan L, Tang G, Shi M, Lv W, Zhu H, Li J, Lv H, Zhang M, Liu G, Chen H, Jiang Y",,,3.0,"China, China, China, China" +25252782,RGED,0.994495869,RGED,0.994495869,Renal Gene Expression Database,0.971099293,1,http://rged.wall-eva.net,200,United States,"(25.7689,-80.1946)",http://web.archive.org/web/20220619214251/http://rged.wall-eva.net/,2014-09-24,"Kidney Institute of CPLA, Division of Nephrology, Changzheng Hospital, Second Military Medical University, 415 Fengyang Road, Shanghai 200003, China.","Zhang Q, Yang B, Chen X, Xu J, Mei C, Mao Z",,,5.0,China +25309735,SkateBase,0.982891858,SkateBase,0.982891858,,0,1,http://skatebase.org,200,,,http://web.archive.org/web/20220830074639/http://skatebase.org/,2014-08-12,"Department of Computer and Information Sciences, Center for Bioinformatics and Computational Biology, University of Delaware, Newark, DE, 19711, USA.","Wyffels J, King BL, Vincent J, Chen C, Wu CH, Polson SW",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NCRR NIH HHS, NCRR NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NCRR NIH HHS",32.0,United States +25378308,REBASE,0.997635245,REBASE,0.997635245,,0,1,http://rebase.neb.com,403,,,http://web.archive.org/web/20220303154600/http://rebase.neb.com/,2014-11-05,"New England Biolabs, Ipswich, MA 01938, USA roberts@neb.com.","Roberts RJ, Vincze T, Posfai J, Macelis D",,,353.0,United States +25414355,rrnDB,0.994640529,rrnDB,0.994640529,ribosomal RNA operon copy number database,0.776851568,1,http://rrndb.umms.med.umich.edu,301,,,http://web.archive.org/web/20221105172954/https://rrndb.umms.med.umich.edu/,2014-11-20,"Department of Internal Medicine, University of Michigan, Ann Arbor, MI 48109, USA.","Stoddard SF, Smith BJ, Hein R, Roller BR, Schmidt TM",,"PHS HHS, NIGMS NIH HHS",265.0,United States +25428362,SGN,0.983287116,SGN,0.983287116,Sol Genomics Network,0.855045378,1,http://solgenomics.net,301,,,no_wayback,2014-11-26,"Boyce Thompson Institute for Plant Research, Ithaca, NY 14853, USA.","Fernandez-Pozo N, Menda N, Edwards JD, Saha S, Tecle IY, Strickler SR, Bombarely A, Fisher-York T, Pujar A, Foerster H, Yan A, Mueller LA",,,180.0,United States +25480115,Sinbase,0.989667535,Sinbase,0.989667535,,0,1,http://ocri-genomics.org/Sinbase,"HTTPConnectionPool(host='ocri-genomics.org', port=80): Max retries exceeded with url: /Sinbase (Caused by ConnectTimeoutError(, 'Connection to ocri-genomics.org timed out. (connect timeout=5)'))",,,no_wayback,2014-12-04,"Oil Crops Research Institute of the Chinese Academy of Agricultural Sciences, Key Laboratory of Biology and Genetic Improvement of Oil Crops, Ministry of Agriculture, PR China These authors contributed equally to this work.","Wang L, Yu J, Li D, Zhang X",,,21.0,China +25489177,RiceQTLPro,0.997575998,RiceQTLPro,0.997575998,,0,1,http://nabic.rda.go.kr/gere/rice/geneticMap,405,,,no_wayback,2014-10-30,"Genomics Division, National Academy of Agricultural Science (NAAS), Jeonju 560-500, Korea.","Kim CK, Seol YJ, Lee DJ, Lee JH, Lee TH, Park DS",,,0.0, +25640659,SecReT6,0.997667551,SecReT6,0.997667551,,0,1,http://db-mml.sjtu.edu.cn/SecReT6,302,,,http://web.archive.org/web/20210619160605/https://db-mml.sjtu.edu.cn/SecReT6/,2015-07-01,"State Key Laboratory for Microbial Metabolism and School of Life Sciences and Biotechnology, Shanghai Jiaotong University, Shanghai, 200030, China.","Li J, Yao Y, Xu HH, Hao L, Deng Z, Rajakumar K, Ou HY",,"Army Research Office, National Natural Science Foundation of China, National Natural Science Foundation of China, Specialized Research Fund for the Doctoral Program of Higher Education, China, 973 program, Ministry of Science and Technology, China, National Natural Science Foundation of China, United States Department of Homeland Security, Sino-UK Higher Education Research Partnership for PhD Studies",72.0,China +25880930,REGULATOR,0.697461545,REGULATOR,0.697461545,,0,1,http://www.bioinformatics.org/regulator,301,United States,"(39.9557,-75.1698)",no_wayback,2015-04-10,"Department of Biological Sciences, Graduate School of Science, Osaka University, Toyonaka, Osaka, 560-0043, Japan. wangk@bio.sci.osaka-u.ac.jp.","Wang K, Nishida H",,,6.0,Japan +25881271,Salinity Tolerant Poplar Database,0.957052559,STPD,0.778577745,Salinity Tolerant Poplar Database,0.957052559,1,http://me.lzu.edu.cn/stpd,"HTTPConnectionPool(host='me.lzu.edu.cn', port=80): Max retries exceeded with url: /stpd (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170601035514/http://me.lzu.edu.cn:80/stpd/,2015-03-17,"Molecular Ecology Group, State Key Laboratory of Grassland and Agro-Ecosystems, School of Life Sciences, Lanzhou University, Lanzhou, 730000, Gansu, China. mayazhen1222@126.com.","Ma Y, Xu T, Wan D, Ma T, Shi S, Liu J, Hu Q",,,2.0,China +26026167,RPdb,0.979530334,RPdb,0.979530334,reprogramming database,0.584708124,1,http://bioinformatics.ustc.edu.cn/rpdb,"HTTPConnectionPool(host='bioinformatics.ustc.edu.cn', port=80): Max retries exceeded with url: /rpdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2015-05-29,"School of Information Science and Technology, University of Science and Technology of China, Hefei, AH230027, China.","Shen Y, Gao F, Wang M, Li A",,,1.0,"China, China" +"26097510, 33952332",SANCDB,0.994873464,SANCDB,0.994873464,South African Natural Compounds Database,0.975358397,2,http://sancdb.rubi.ru.ac.za,301,,,http://web.archive.org/web/20220816132518/https://sancdb.rubi.ru.ac.za/,2021-05-05,"Department of Biochemistry and Microbiology, Research Unit in Bioinformatics (RUBi), Rhodes University, Grahamstown, South Africa., Research Unit in Bioinformatics (RUBi), Department of Biochemistry and Microbiology, Rhodes University, Makhanda/Grahamstown, 6140, South Africa.","Hatherley R, Brown DK, Musyoka TM, Penkler DL, Faya N, Lobb KA, Tastan Bishop Ö, Diallo BN, Glenister M, Musyoka TM, Lobb K, Tastan Bishop Ö",", ","NHGRI NIH HHS, Wellcome Trust, Grand Challenges Africa programme, DELGEME - Wellcome Trust, H3ABioNet - NIH",32.0,"South Africa, South Africa" +26138588,SmedGD,0.994868353,SmedGD,0.994868353,Schmidtea mediterranea Genome Database,0.978626414,1,http://smedgd.stowers.org,302,,,http://web.archive.org/web/20210119221445/http://smedgd.stowers.org/,2015-07-17,"Stowers Institute for Medical Research, Howard Hughes Medical Institute, Kansas City, Missouri, 64110.","Robb SM, Gotting K, Ross E, Sánchez Alvarado A",,"NIGMS NIH HHS, Howard Hughes Medical Insitute, Howard Hughes Medical Institute, Stowers Institute for Medical Research",70.0, +26322066,ReprOlive,0.997865617,ReprOlive,0.997865617,,0,1,http://reprolive.eez.csic.es,"HTTPConnectionPool(host='reprolive.eez.csic.es', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to reprolive.eez.csic.es timed out. (connect timeout=5)'))",,,no_wayback,2015-08-11,"Department of Biochemistry, Cell and Molecular Biology of Plants, Estación Experimental del Zaidín, Consejo Superior de Investigaciones Científicas Granada, Spain ; Plataforma Andaluza de Bioinformática, Edificio de Bioinnovación, Universidad de Málaga Málaga, Spain.","Carmona R, Zafra A, Seoane P, Castro AJ, Guerrero-Fernández D, Castillo-Castillo T, Medina-García A, Cánovas FM, Aldana-Montes JF, Navas-Delgado I, Alché Jde D, Claros MG",,,17.0,"Spain, Spain" +26323714,RNASeqMetaDB,0.962606609,RNASeqMetaDB,0.962606609,,0,1,http://rnaseqmetadb.ece.tamu.edu,"HTTPConnectionPool(host='rnaseqmetadb.ece.tamu.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170615084647/http://rnaseqmetadb.ece.tamu.edu/,2015-08-30,"Department of Electrical and Computer Engineering & TEES-AgriLife Center for Bioinformatics and Genomic Systems Engineering, Texas A&M University, College Station, TX 77843, USA.","Guo Z, Tzvetkova B, Bassik JM, Bodziak T, Wojnar BM, Qiao W, Obaida MA, Nelson SB, Hu BH, Yu P",,"NIDCD NIH HHS, NINDS NIH HHS, NIMH NIH HHS, NIDCD NIH HHS",6.0,United States +26424082,RegNetwork,0.983025074,RegNetwork,0.983025074,,0,1,http://www.regnetworkweb.org,301,United States,"(40.7128,-74.006)",no_wayback,2015-09-30,"Department of Biomedical Engineering, School of Control Science and Engineering, Shandong University, Jinan, Shandong 250061, China and.","Liu ZP, Wu C, Miao H, Wu H",,,105.0,China +"26467481, 28654729, 31665520",SIGNOR,0.967104197,SIGNOR,0.967104197,SIGnaling Network Open Resource,0.931980938,3,http://signor.uniroma2.it,200,,,http://web.archive.org/web/20221016213004/https://signor.uniroma2.it/,2020-01-01,"Department of Biology, University of Rome Tor Vergata, Rome, Italy., Department of Biology, University of Rome Tor Vergata, Rome, Italy., Department of Biology, University of Rome Tor Vergata, Via della Ricerca Scientifica, 00133 Rome, Italy.","Perfetto L, Briganti L, Calderone A, Cerquone Perpetuini A, Iannuccelli M, Langone F, Licata L, Marinkovic M, Mattioni A, Pavlidou T, Peluso D, Petrilli LL, Pirrò S, Posca D, Santonico E, Silvestri A, Spada F, Castagnoli L, Cesareni G, Lo Surdo P, Calderone A, Cesareni G, Perfetto L, Licata L, Lo Surdo P, Iannuccelli M, Palma A, Micarelli E, Perfetto L, Peluso D, Calderone A, Castagnoli L, Cesareni G",", , ","European Research Council, , Italian Association for Cancer Research, ELIXIR-IIB, Italian Association for Cancer Research, Italian Node of the European ELIXIR infrastructure",172.0,"Italy, Italy, Italy" +26478709,RAMS,0.954435706,RAMS,0.954435706,The Register of Antarctic Marine Species,0.92820104,1,http://ipt.biodiversity.aq/resource.do?r=rams,302,Belgium,"(50.842,4.4383)",http://web.archive.org/web/20220708105533/https://ipt.biodiversity.aq/resource.do?r=rams,2015-09-30,"Université Libre de Bruxelles, 50 avenue Franklin Roosevelt, 1000 Brussels, Belgium.","Jossart Q, Moreau C, Agüera A, Broyer CD, Danis B",,,2.0,Belgium +26490957,SigMol,0.986595631,SigMol,0.986595631,,0,1,http://bioinfo.imtech.res.in/manojk/sigmol,404,,,no_wayback,2015-10-20,"Bioinformatics Centre, Institute of Microbial Technology, Council of Scientific and Industrial Research (CSIR), Sector 39-A, Chandigarh-160036, India.","Rajput A, Kaur K, Kumar M",,,36.0,India +26503253,rVarBase,0.998465121,rVarBase,0.998465121,,0,1,http://rv.psych.ac.cn,200,,,http://web.archive.org/web/20220617095709/http://rv.psych.ac.cn/,2015-10-25,"Key Laboratory of Mental Health, Institute of Psychology, Chinese Academy of Sciences, Beijing, China.","Guo L, Du Y, Qu S, Wang J",,,29.0,China +26527724,RegulonDB,0.998138189,RegulonDB,0.998138189,,0,1,http://regulondb.ccg.unam.mx,200,Mexico,"(19.2928,-98.9941)",http://web.archive.org/web/20220402160731/http://regulondb.ccg.unam.mx/,2015-11-02,"Programa de Genómica Computacional, Centro de Ciencias Genómicas, Universidad Nacional Autónoma de México, A.P. 565-A, Cuernavaca, Morelos 62100, Mexico.","Gama-Castro S, Salgado H, Santos-Zavaleta A, Ledezma-Tejeida D, Muñiz-Rascado L, García-Sotelo JS, Alquicira-Hernández K, Martínez-Flores I, Pannier L, Castro-Mondragón JA, Medina-Rivera A, Solano-Lira H, Bonavides-Martínez C, Pérez-Rueda E, Alquicira-Hernández S, Porrón-Sotelo L, López-Fuentes A, Hernández-Koutoucheva A, Del Moral-Chávez V, Rinaldi F, Collado-Vides J",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",211.0,Mexico +26527728,SATPdb,0.998526931,SATPdb,0.998526931,,0,1,http://crdd.osdd.net/raghava/satpdb,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/satpdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220528021024/http://crdd.osdd.net/raghava/satpdb/,2015-11-02,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh, India.","Singh S, Chaudhary K, Dhanda SK, Bhalla S, Usmani SS, Gautam A, Tuknait A, Agrawal P, Mathur D, Raghava GP",,,44.0,India +"26527729, 29140531, 30485709",sORFs.org,0.976624405,sORFs.org,0.976624405,,0,3,http://www.sorfs.org,200,,,http://web.archive.org/web/20220616105926/http://www.sorfs.org/,2018-11-28,"Lab of Bioinformatics and Computational Genomics (BioBix), Department of Mathematical Modelling, Statistics and Bioinformatics, Faculty of Bioscience Engineering, Ghent University, 9000 Ghent, Belgium volodimir.olexiouk@ugent.be., Lab of Bioinformatics and Computational Genomics (BioBix), Department of Mathematical Modelling, Statistics and Bioinformatics, Faculty of Bioscience Engineering, Ghent University, 9000 Ghent, Belgium., Department of Mathematical Modelling, Statistics and Bioinformatics, Universiteit Gent Faculteit Bio-Ingenieurswetenschappen, Gent, Belgium.","Olexiouk V, Crappé J, Verbruggen S, Verhegen K, Martens L, Menschaert G, Olexiouk V, Van Criekinge W, Menschaert G, Olexiouk V, Menschaert G",", , ",", , Postdoctoral Fellows of the Research Foundation-Flanders, Fonds Wetenschappelijk Onderzoek",110.0,"Belgium, Belgium, Belgium" +26590403,SBR-Blood,0.985119432,SBR-Blood,0.985119432,hematopoietic Systems Biology Repository,0.887098688,1,http://sbrblood.nhgri.nih.gov,"HTTPConnectionPool(host='sbrblood.nhgri.nih.gov', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220418030214/https://sbrblood.nhgri.nih.gov/,2015-11-20,"National Human Genome Research Institute, National Institutes of Health, Bethesda, MD, USA lichtenbergj@mail.nih.gov.","Lichtenberg J, Heuston EF, Mishra T, Keller CA, Hardison RC, Bodine DM",,,0.0,United States +26635394,RBP-Var,0.991189197,RBP-Var,0.991189197,,0,1,http://www.rbp-var.biols.ac.cn,200,,,http://web.archive.org/web/20220517044028/http://www.rbp-var.biols.ac.cn/,2015-12-03,"Beijing Institutes of Life Science, Chinese Academy of Sciences, Beijing 100101, China University of Chinese Academy of Sciences, Beijing 100049, China.","Mao F, Xiao L, Li X, Liang J, Teng H, Cai W, Sun ZS, Sun ZS",,,27.0,"China, China" +26647370,SBMDb,0.995680153,SBMDb,0.995680153,,0,1,http://webapp.cabgrid.res.in/sbmdb,301,,,http://web.archive.org/web/20220221092858/http://webapp.cabgrid.res.in/sbmdb/,2015-12-08,"Centre for Agricultural Bioinformatics, Indian Agricultural Statistics Research Institute, Library Avenue, PUSA, New Delhi 110012, India.","Iquebal MA, Jaiswal S, Angadi UB, Sablok G, Arora V, Kumar S, Rai A, Kumar D",,,3.0,India +26746786,RVS,0.919026732,RVS,0.919026732,,0,1,http://rvs.u.hpc.mssm.edu,"HTTPConnectionPool(host='rvs.u.hpc.mssm.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220620095037/http://rvs.u.hpc.mssm.edu/,2016-01-08,"Department of Genetics and Genomic Sciences, Icahn School of Medicine at Mount Sinai, 1425 Madison Ave, Box 1498, New York, 10029, USA. joerg.hakenberg@gmail.com.","Hakenberg J, Cheng WY, Thomas P, Wang YC, Uzilov AV, Chen R",,"Medical Research Council, Medical Research Council",10.0,United States +26800248,SISTR,0.992295837,SISTR,0.992295837,Salmonella In Silico Typing Resource,0.952685045,1,http://lfz.corefacility.ca/sistr-app,301,,,http://web.archive.org/web/20211009005626/https://lfz.corefacility.ca/sistr-app/,2016-01-22,"National Microbiology Laboratory at Guelph, Public Health Agency of Canada, Guelph, Ontario, Canada.","Yoshida CE, Kruczkiewicz P, Laing CR, Lingohr EJ, Gannon VP, Nash JH, Taboada EN",,,171.0,"Canada, Canada" +26818131,siRNAmod,0.993242323,siRNAmod,0.993242323,,0,1,http://crdd.osdd.net/servers/sirnamod,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /servers/sirnamod (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20200813212337/http://crdd.osdd.net:80/servers/sirnamod/,2016-01-28,"Bioinformatics Centre, Institute of Microbial Technology, Council of Scientific and Industrial Research, Sector 39A, Chandigarh-160036, India.","Dar SA, Thakur A, Qureshi A, Kumar M",,,19.0,India +26876983,Regulators of Androgen Action Resource,0.965198304,RAAR,0.942830563,Regulators of Androgen Action Resource,0.965198304,1,http://www.lerner.ccf.org/cancerbio/heemers/RAAR/search,302,United States,"(41.4802,-82.0997)",http://web.archive.org/web/20210923181939/https://www.lerner.ccf.org/cancerbio/heemers/RAAR/search/,2016-02-13,Department of Cancer Genetics heemerh@ccf.org.,"DePriest AD, Fiandalo MV, Schlanger S, Heemers F, Mohler JL, Liu S, Heemers HV",,,10.0, +26912951,RDIS,0.970782856,RDIS,0.970782856,The Rabies Disease Information System,0.871801784,1,http://rabies.mscwbif.org/home.html,"HTTPConnectionPool(host='rabies.mscwbif.org', port=80): Max retries exceeded with url: /home.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2015-11-30,"Bioinformatics Infrastructure Facility, Department of Microbiology, Maharani׳s Science College for Women, Palace Road, Bangalore - 560001.","Dharmalingam B, Jothi L",,,1.0, +26949480,SNP2Structure,0.994531825,SNP2Structure,0.994531825,,0,1,http://apps.icbi.georgetown.edu/snp2structure,"HTTPConnectionPool(host='apps.icbi.georgetown.edu', port=80): Max retries exceeded with url: /snp2structure (Caused by ConnectTimeoutError(, 'Connection to apps.icbi.georgetown.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20150905121348/https://apps.icbi.georgetown.edu/snp2structure/,2015-09-30,"Department of Oncology, Lombardi Comprehensive Cancer Center, Georgetown University Medical Center, Washington, DC 20007, USA; Innovation Center for Biomedical Informatics, Georgetown University Medical Center, Washington, DC 20007, USA; Department of Biochemistry and Molecular & Cellular Biology, Georgetown University, Washington, DC 20007, USA.","Wang D, Song L, Singh V, Rao S, An L, Madhavan S",,,7.0,"United States, United States, United States" +26975833,REGene,0.984254122,REGene,0.984254122,REgeneration Gene database,0.634848428,1,http://REGene.bioinfo-minzhao.org,406,,,http://web.archive.org/web/20220207130204/http://regene.bioinfo-minzhao.org/,2016-03-15,"School of Engineering, Faculty of Science, Health, Education and Engineering, University of the Sunshine Coast, Maroochydore DC, Queensland, 4558, Australia.","Zhao M, Rotgans B, Wang T, Cummins SF",,,12.0,Australia +27010673,SM-TF,0.909217815,SM-TF,0.909217815,,0,1,http://zoulab.dalton.missouri.edu/SM-TF,301,,,no_wayback,2016-03-24,"Dalton Cardiovascular Research Center, University of Missouri, Columbia, Missouri, 65211.","Xu X, Ma Z, Sun H, Zou X",,"NSF CAREER Award, NIGMS NIH HHS, American Heart Association (Midwest Affiliate), National Institutes of Health",1.0, +27097230,SignaFish,0.99351877,SignaFish,0.99351877,Pathway,0.51736623,1,http://signafish.org,302,,,http://web.archive.org/web/20210419021643/http://signafish.org/,2016-04-20,"1 Department of Genetics, Eötvös Loránd University , Budapest, Hungary .","Csályi K, Fazekas D, Kadlecsik T, Türei D, Gul L, Horváth B, Módos D, Demeter A, Pápai N, Lenti K, Csermely P, Vellai T, Korcsmáros T, Varga M",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",3.0,Hungary +27189556,RiboDB,0.997285545,RiboDB,0.997285545,,0,1,http://ribodb.univ-lyon1.fr,200,France,"(45.7663,4.8883)",no_wayback,2016-05-05,"Univ Lyon, Université Lyon 1, CNRS, UMR5558, Laboratoire de Biométrie et Biologie Èvolutive, 43 bd du 11 novembre 1918, F-69622, Villeurbanne, France Technology Research Department, Innovation Unit, bioMérieux SA, Marcy L'Etoile, France.","Jauffrit F, Penel S, Delmotte S, Rey C, de Vienne DM, Gouy M, Charrier JP, Flandrois JP, Brochier-Armanet C",,,9.0,"France, France" +27297221,sHSPdb,0.981707454,sHSPdb,0.981707454,Heat Shock Proteins database,0.912849665,1,http://forge.info.univ-angers.fr,307,,,http://web.archive.org/web/20220122163802/http://forge.info.univ-angers.fr/,2016-06-13,"Université d'Angers, UMR 1345 IRHS, SFR 4207 QUASAV, Angers, France. emmanuel.jaspard@univ-angers.fr.","Jaspard E, Hunault G",,"ANR Blanc MITOZEN, University of Angers",11.0,France +27307138,SeriPort,0.92696017,SeriPort,0.92696017,,0,1,http://www.seriport.in,"HTTPConnectionPool(host='www.seriport.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220531064315/http://www6.seriport.in/,2016-06-15,"Bioengineering Research Laboratory, Department of Biosciences and Bioengineering, Indian Institute of Technology Guwahati, Guwahati, Assam 781039, India.","Singh D, Chetia H, Kabiraj D, Sharma S, Kumar A, Sharma P, Deka M, Bora U",,,0.0,India +27352859,SorghumFDB,0.993632734,SorghumFDB,0.993632734,,0,1,http://structuralbiology.cau.edu.cn/sorghum/index.html,200,,,http://web.archive.org/web/20190605051250/http://structuralbiology.cau.edu.cn:80/sorghum/index.html,2016-06-26,"State Key Laboratory of Plant Physiology and Biochemistry, College of Biological Sciences, China Agricultural University, Beijing 100193, China.","Tian T, You Q, Zhang L, Yi X, Yan H, Xu W, Su Z",,,14.0,"China, China" +27377064,RID,0.911651358,RID,0.911651358,Retrovirus Integration Database,0.863141191,1,"http://rid.ncifcrf.gov, http://home.ncifcrf.gov/hivdrp/resources.htm","301, 301",,", ","http://web.archive.org/web/20220526052911/https://rid.ncifcrf.gov/, no_wayback",2016-07-04,"Advanced Biomedical Computing Center, Leidos Biomedical Research, Inc., Frederick National Laboratory for Cancer Research (FNLCR), Frederick, MD, USA. shaow@mail.nih.gov.","Shao W, Shan J, Kearney MF, Wu X, Maldarelli F, Mellors JW, Luke B, Coffin JM, Hughes SH",,"CCR NIH HHS, NCI NIH HHS",21.0,United States +27515824,Ricebase,0.991965771,Ricebase,0.991965771,,0,1,http://ricebase.org,301,United States,"(42.4444,-76.4926)",http://web.archive.org/web/20220616031634/https://ricebase.org/,2016-08-10,"Dale Bumpers National Rice Research Center, USDA-ARS, Stuttgart, AR, USA jeremy.edwards@ars.usda.gov.","Edwards JD, Baldo AM, Mueller LA",,,8.0,United States +27543076,RNALocate,0.997115135,RNALocate,0.997115135,,0,1,http://www.rna-society.org/rnalocate,301,United States,"(40.2069,-111.642)",http://web.archive.org/web/20221102060829/http://www.rna-society.org/rnalocate/,2016-08-19,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Zhang T, Tan P, Wang L, Jin N, Li Y, Zhang L, Yang H, Hu Z, Zhang L, Hu C, Li C, Qian K, Zhang C, Huang Y, Li K, Lin H, Wang D",,,77.0,China +27580923,SkeletonGenetics,0.990358194,SkeletonGenetics,0.990358194,,0,1,http://101.200.211.232/skeletongenetics,"HTTPConnectionPool(host='101.200.211.232', port=80): Max retries exceeded with url: /skeletongenetics (Caused by ConnectTimeoutError(, 'Connection to 101.200.211.232 timed out. (connect timeout=5)'))",,,no_wayback,2016-08-31,"Department of Genetics of Dingli Clinical Medical School, Wenzhou Central Hospital, Wenzhou 325000, China.","Chen C, Jiang Y, Xu C, Liu X, Hu L, Xiang Y, Chen Q, Chen D, Li H, Xu X, Tang S",,,4.0,China +27800578,SCEGRAM,0.992693186,SCEGRAM,0.992693186,,0,1,http://www.scenegrammarlab.com/research/scegram-database,301,,,http://web.archive.org/web/20210514012102/https://www.scenegrammarlab.com/research/scegram-database/,2017-10-01,"Scene Grammar Lab, Department of Cognitive Psychology, Goethe University Frankfurt, Theodor-W.-Adorno-Platz 6, 60323, Frankfurt am Main, Germany. oehlschlaeger@psych.uni-frankfurt.de.","Öhlschläger S, Võ ML",,Deutsche Forschungsgemeinschaft,8.0,Germany +27899579,SNP2TFBS,0.997906208,SNP2TFBS,0.997906208,,0,1,http://ccg.vital-it.ch/snp2tfbs,301,Switzerland,"(46.5191,6.56676)",http://web.archive.org/web/20180311182825/http://ccg.vital-it.ch:80/snp2tfbs/,2016-11-28,"Swiss Institute for Experimental Cancer Research (ISREC), School of Life Sciences, Swiss Federal Institute of Technology (EPFL), CH-1015 Lausanne, Switzerland.","Kumar S, Ambrosini G, Bucher P",,Swiss National Science Foundation,66.0,Switzerland +27899672,SMR,0.991144598,SMR,0.991144598,SWISS-MODEL Repository,0.87782962,1,http://swissmodel.expasy.org/repository,302,,,http://web.archive.org/web/20220825211854/https://swissmodel.expasy.org/repository/,2016-11-29,"Biozentrum, University of Basel, Klingelbergstrasse 50-70, CH-4056 Basel, Switzerland.","Bienert S, Waterhouse A, de Beer TA, Tauriello G, Studer G, Bordoli L, Schwede T",,,391.0,Switzerland +27940610,RPAN,0.977469802,RPAN,0.977469802,Rice Pan-genome Browser,0.900623749,1,"http://cgm.sjtu.edu.cn/3kricedb/, http://www.rmbreeding.cn/pan3k","302, 302",,", ","http://web.archive.org/web/20221017030238/https://cgm.sjtu.edu.cn/3kricedb, http://web.archive.org/web/20220806204327/https://www.rmbreeding.cn/pan3k",2016-12-10,"Department of Bioinformatics and Biostatistics, School of Life Sciences and Biotechnology, Shanghai Jiao Tong University, Shanghai 200240, China.","Sun C, Hu Z, Zheng T, Lu K, Zhao Y, Wang W, Shi J, Wang C, Lu J, Zhang D, Li Z, Wei C",,,43.0,China +28011754,RenalDB,0.996478856,RenalDB,0.996478856,,0,1,http://renaldb.uni-frankfurt.de,200,Germany,"(50.16,8.6333)",http://web.archive.org/web/20210513194721/http://renaldb.uni-frankfurt.de/,2018-03-01,"Institute of Cardiovascular Regeneration, Centre for Molecular Medicine, Goethe University Frankfurt, Theodor-Stern-Kai 7, Frankfurt am Main, Germany.","Weirick T, Militello G, Ponomareva Y, John D, Döring C, Dimmeler S, Uchida S",,,9.0,Germany +28025342,RiceATM,0.990825713,RiceATM,0.990825713,,0,1,http://syslab3.nchu.edu.tw/rice,"HTTPConnectionPool(host='syslab3.nchu.edu.tw', port=80): Max retries exceeded with url: /rice (Caused by ConnectTimeoutError(, 'Connection to syslab3.nchu.edu.tw timed out. (connect timeout=5)'))",,,no_wayback,2016-12-26,Institute of Biomedical Sciences.,"Liu WT, Yang CC, Chen RK, Jwo WS, Wu CW, Ting WY, Shung DP, Liu CC, Chen JJ",,,0.0, +28077569,RAIN,0.991763949,RAIN,0.991763949,RNA-protein Association and Interaction Networks,0.843281314,1,http://rth.dk/resources/rain,301,Denmark,"(55.6482,12.6014)",http://web.archive.org/web/20220618064101/https://rth.dk/resources/rain/,2017-01-10,"Center for Non-coding RNA in Technology and Health, University of Copenhagen, Copenhagen,, Groennegaardsvej 3, DK-1870 Frederiksberg C, Denmark.","Junge A, Refsgaard JC, Garde C, Pan X, Santos A, Alkan F, Anthon C, von Mering C, Workman CT, Jensen LJ, Gorodkin J",,Novo Nordisk Foundation Center for Protein Research,15.0,Denmark +28194231,SkinSensDB,0.995361865,SkinSensDB,0.995361865,,0,1,http://cwtung.kmu.edu.tw/skinsensdb,301,,,no_wayback,2017-01-31,"School of Pharmacy, Kaohsiung Medical University, 100 Shih-Chuan 1st Road, Kaohsiung, 80708 Taiwan.","Wang CC, Lin YC, Wang SS, Shih C, Lin YH, Tung CW",,"National Health Research Institutes of Taiwan, NSYSU-KMU Joint Research Project, Kaohsiung Medical University Research Foundation, Research Center for Environmental Medicine, Ministry of Science and Technology, Taiwan, Ministry of Science and Technology, Taiwan",4.0, +28361715,SheddomeDB,0.996616364,SheddomeDB,0.996616364,,0,1,http://bal.ym.edu.tw/SheddomeDB,"HTTPConnectionPool(host='bal.ym.edu.tw', port=80): Max retries exceeded with url: /SheddomeDB (Caused by ConnectTimeoutError(, 'Connection to bal.ym.edu.tw timed out. (connect timeout=5)'))",,,no_wayback,2017-03-14,"Institute of Biomedical Informatics, National Yang Ming University, Taipei, 112, Taiwan.","Tien WS, Chen JH, Wu KP",,,9.0, +28365723,SilkPathDB,0.989426136,SilkPathDB,0.989426136,Pathogen,0.592897296,1,http://silkpathdb.swu.edu.cn,301,,,http://web.archive.org/web/20220815113404/https://silkpathdb.swu.edu.cn/,2017-01-01,"State Key Laboratory of Silkworm Genome Biology, Southwest University, Chongqing 400715, China.","Li T, Pan GQ, Vossbrinck CR, Xu JS, Li CF, Chen J, Long MX, Yang M, Xu XF, Xu C, Debrunner-Vossbrinck BA, Zhou ZY",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",3.0,China +28438161,REFOLDdb,0.993212402,REFOLDdb,0.993212402,,0,1,http://p4d-info.nig.ac.jp/refolddb,503,,,http://web.archive.org/web/20220615213114/http://p4d-info.nig.ac.jp/refolddb/,2017-04-24,"Center for Information Biology, National Institute of Genetics, 1111 Yata Mishima, Shizuoka, 411-8540, Japan.","Mizutani H, Sugawara H, Buckle AM, Sangawa T, Miyazono KI, Ohtsuka J, Nagata K, Shojima T, Nosaki S, Xu Y, Wang D, Hu X, Tanokura M, Yura K",,"Ministry of Education, Culture, Sports, Science and Technology of Japan (MEXT) and the Japan Agency for Medical Research and Development (AMED)",3.0,Japan +28529082,RED,0.91165034,RED,0.91165034,Rice Expression Database,0.904367864,1,http://expression.ic4r.org,200,,,http://web.archive.org/web/20220218140203/http://expression.ic4r.org/,2017-05-04,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China; BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China; University of Chinese Academy of Sciences, Beijing 100049, China.","Xia L, Zou D, Sang J, Xu X, Yin H, Li M, Wu S, Hu S, Hao L, Zhang Z",,"High Technology Research and Development, National Natural Science Foundation of China, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences",43.0,"China, China, China" +28539606,SesameFG,0.989373446,SesameFG,0.989373446,,0,1,http://ncgr.ac.cn/SesameFG,"HTTPConnectionPool(host='ncgr.ac.cn', port=80): Max retries exceeded with url: /SesameFG (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,no_wayback,2017-05-24,"Key Laboratory of Biology and Genetic Improvement of Oil Crops, Ministry of Agriculture of People's Republic of China, Oilcrops Research Institute, Chinese Academy of Agricultural Sciences, Wuhan, 430062, China.","Wei X, Gong H, Yu J, Liu P, Wang L, Zhang Y, Zhang X",,,11.0,"China, China" +28651544,SalmoBase,0.996645689,SalmoBase,0.996645689,,0,1,http://www.salmobase.org,302,,,http://web.archive.org/web/20221017030214/https://salmobase.org/,2017-06-26,"Centre for Integrative Genetics (CIGENE), Department of Animal and Aquacultural Sciences (IHA), Faculty of Biosciences (BIOVIT), Norwegian University of Life Sciences (NMBU), 1432, Ås, Akershus, Norway. jeevan.karloss@nmbu.no.","Samy JKA, Mulugeta TD, Nome T, Sandve SR, Grammes F, Kent MP, Lien S, Våge DI",,Norges Forskningsråd,18.0,Norway +28850115,RefEx,0.944570541,RefEx,0.944570541,Reference Expression dataset,0.787088712,1,http://refex.dbcls.jp,200,,,http://web.archive.org/web/20220725180315/https://refex.dbcls.jp/,2017-08-29,"Database Center for Life Science, Joint Support-Center for Data Science Research, Research Organization of Information and Systems, 1111 Yata, Mishima 411-8540, Japan.","Ono H, Ogasawara O, Okubo K, Bono H",,,23.0,Japan +28862214,RespCanDB,0.997904003,RespCanDB,0.997904003,Respiratory cancer database,0.9777426,1,http://ridb.subdic-bioinformatics-nitrr.in,"HTTPConnectionPool(host='ridb.subdic-bioinformatics-nitrr.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2017-07-01,"Department of Sub-DIC Bioinformatics, National Institute of Technology, Raipur, Chhattisgarh, India.","Choubey J, Choudhari JK, Patel A, Verma MK",,,0.0,India +28964253,RiceMetaSys,0.956623197,RiceMetaSys,0.956623197,,0,1,http://14.139.229.201,200,India,"(28.6542,77.2373)",no_wayback,2017-09-30,"ICAR-National Research Centre on Plant Biotechnology, LBS Building, Pusa Campus, New Delhi, 110012, India.","Sandhu M, Sureshkumar V, Prakash C, Dixit R, Solanke AU, Sharma TR, Mohapatra T, S V AM",,Centre for Agricultural Bioinformatics scheme (CABin): Indian council of Agricultural Research Indian Council of Agricultural Research (IN),4.0,India +28984188,ScaPD,0.998022795,ScaPD,0.998022795,,0,1,http://bioinfo.wilmer.jhu.edu/ScaPD,301,,,http://web.archive.org/web/20220712172331/http://bioinfo.wilmer.jhu.edu/ScaPD/,2017-10-03,"Department of Ophthalmology, Johns Hopkins School of Medicine, Baltimore, MD, USA.","Han X, Wang J, Wang J, Liu S, Hu J, Zhu H, Qian J",,,3.0,United States +29028888,RRDB,0.973243475,RRDB,0.973243475,RNA-RNA docking benchmark,0.873065448,1,http://huanglab.phys.hust.edu.cn/RRDbenchmark,301,,,http://web.archive.org/web/20220523101952/http://huanglab.phys.hust.edu.cn/RRDbenchmark/,2018-02-01,"School of Physics, Huazhong University of Science and Technology, Wuhan, Hubei 430074, People's Republic of China.","Yan Y, Huang SY",,"National Natural Science Foundation of China, Huazhong University of Science and Technology",3.0,China +29039006,RBFLDB,0.992151543,RBFLDB,0.992151543,RIKEN Full-length cDNA Database,0.988669369,1,"http://www.brachypodium.org, http://brachy.bmep.riken.jp/ver.1/index.pl","200, HTTPConnectionPool(host='brachy.bmep.riken.jp', port=80): Max retries exceeded with url: /ver.1/index.pl (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",United States,"(38.6706,-90.3705), ","http://web.archive.org/web/20221006203056/https://brachypodium.org/, http://web.archive.org/web/20200926200853/http://brachy.bmep.riken.jp/ver.1/index.pl",2018-01-01,"Cellulose Production Research Team, RIKEN Center for Sustainable Resource Science, #E804 8F East Research Bldg., 1-7-22 Suehiro-cho, Tsurumi-ku, Yokohama, Kanagawa, 230-0045, Japan. keiichi.mochida@riken.jp.","Mochida K, Shinozaki K",,,0.0,Japan +29040625,RISE,0.989733219,RISE,0.989733219,,0,1,http://rise.zhanglab.net,200,,,http://web.archive.org/web/20221024022924/http://rise.zhanglab.net/,2018-01-01,"MOE Key Laboratory of Bioinformatics, Center for Synthetic and Systems Biology, Tsinghua-Peking Joint Center for Life Sciences, School of Life Sciences, Tsinghua University, Beijing 100084, China.","Gong J, Shao D, Xu K, Lu Z, Lu ZJ, Yang YT, Zhang QC",,"NHGRI NIH HHS, NHGRI NIH HHS",37.0,China +29045713,SCPortalen,0.995376956,SCPortalen,0.995376956,,0,1,http://single-cell.clst.riken.jp,301,,,http://web.archive.org/web/20210612183659/http://single-cell.clst.riken.jp/,2018-01-01,"Division of Genomic Technologies (DGT), RIKEN Center for Life Science Technologies (CLST), Yokohama, Kanagawa 230-0045, Japan.","Abugessaisa I, Noguchi S, Böttcher M, Hasegawa A, Kouno T, Kato S, Tada Y, Ura H, Abe K, Shin JW, Plessy C, Carninci P, Kasukawa T",,,13.0,Japan +29057095,SalmoNet,0.995892107,SalmoNet,0.995892107,,0,1,http://salmonet.org,200,,,http://web.archive.org/web/20220615233030/http://salmonet.org/,2017-10-18,"Quadram Institute Bioscience, Norwich Research Park, Norwich, NR4 7UA UK.","Métris A, Sudhakar P, Fazekas D, Demeter A, Ari E, Olbei M, Branchu P, Kingsley RA, Baranyi J, Korcsmáros T",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",3.0, +29059366,SBCDDB,0.994983792,SBCDDB,0.994983792,Sleeping Beauty Cancer Driver Database,0.762524348,1,http://sbcddb.moffitt.org,301,,,http://web.archive.org/web/20220615180922/http://sbcddb.moffitt.org/,2018-01-01,"Cancer Research Program, Houston Methodist Research Institute, Houston, Texas, USA.","Newberg JY, Mann KM, Mann MB, Jenkins NA, Copeland NG",,NCI NIH HHS,15.0,United States +"29062930, 29156309",SMBP,0.996750757,SMBP,0.996750757,Secondary Metabolite Bioinformatics Portal,0.986019856,2,http://www.secondarymetabolites.org,200,,,http://web.archive.org/web/20221102173233/https://secondarymetabolites.org/,2017-10-01,"The Novo Nordisk Foundation Center for Biosustainability, Technical University of Denmark, Kogle Alle 6, 2970 Hørsholm, Denmark., The Novo Nordisk Foundation Center for Biosustainability, Technical University of Denmark, Dk 2800 Kgs. Lyngby, Denmark; Department of Chemical and Biomolecular Engineering & BioInformatics Research Center, Korea Advanced Institute of Science and Technology (KAIST), Daejeon 34141, Republic of Korea.","Weber T, Kim HU, Kim HU, Blin K, Lee SY, Weber T",", ","NNF Center for Biosustainability, Novo Nordisk Fonden, Novo Nordisk, Novo Nordisk Fonden, National Research Foundation of Korea, Novo Nordisk Fonden, NNF Center for Biosustainability, National Research Foundation of Korea, Ministry of Science, ICT and Future Planning",59.0,"Denmark, Denmark, Denmark, Denmark" +29069402,SEECancer,0.996404111,SEECancer,0.996404111,,0,1,http://biocc.hrbmu.edu.cn/SEECancer,302,,,http://web.archive.org/web/20221017153717/http://biocc.hrbmu.edu.cn/SEECancer/,2018-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150081, China.","Zhang H, Luo S, Zhang X, Liao J, Quan F, Zhao E, Zhou C, Yu F, Yin W, Zhang Y, Xiao Y, Li X",,,6.0,China +29069520,RNArchitecture,0.993474245,RNArchitecture,0.993474245,,0,1,http://iimcb.genesilico.pl/RNArchitecture,301,Poland,"(52.2296,21.0067)",http://web.archive.org/web/20220527124358/https://iimcb.genesilico.pl/RNArchitecture,2018-01-01,"Laboratory of Bioinformatics and Protein Engineering, International Institute of Molecular and Cell Biology in Warsaw, ul. Ks. Trojdena 4, PL-02-109 Warsaw, Poland.","Boccaletto P, Magnus M, Almeida C, Zyla A, Astha A, Pluta R, Baginski B, Jankowska E, Dunin-Horkawicz S, Wirecki TK, Boniecki MJ, Stefaniak F, Bujnicki JM",,,10.0,Poland +29155231,RTFAdb,0.981455028,RTFAdb,0.981455028,,0,1,http://tools.ibg.deu.edu.tr/rtfa,301,,,http://web.archive.org/web/20220307212640/http://tools.ibg.deu.edu.tr/rtfa/,2017-11-17,"İzmir International Biomedicine and Genome Institute (iBG-İzmir), Dokuz Eylül University, 35340, İnciraltı, İzmir, Turkey. Electronic address: gokhan.karakulah@deu.edu.tr.",Karakülah G,,,3.0,Turkey +29222504,RNA Structurome Database,0.584489805,,0,RNA Structurome Database,0.584489805,1,http://structurome.bb.iastate.edu,302,,,http://web.archive.org/web/20220616034945/https://structurome.bb.iastate.edu/,2017-12-08,"Roy J. Carver Department of Biochemistry, Biophysics, and Molecular Biology, Iowa State University, 2437 Pammel Drive, Ames, IA, 50011, USA.","Andrews RJ, Baber L, Moss WN",,"NIGMS NIH HHS, NIGMS NIH HHS",20.0,United States +29228298,SeedStor,0.993587971,SeedStor,0.993587971,,0,1,http://www.seedstor.ac.uk,302,,,http://web.archive.org/web/20221017090756/https://www.seedstor.ac.uk/,2018-01-01,"Germplasm Resources Unit, John Innes Centre, Colney Lane, Norwich NR4 7UH, UK.","Horler RSP, Turner AS, Fretter P, Ambrose M",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",1.0, +29309507,SEGreg,0.997626861,SEGreg,0.997626861,SEG regulation database,0.98023203,1,http://bioinfo.life.hust.edu.cn/SEGreg,200,,,no_wayback,2019-07-01,None,"Tang Q, Zhang Q, Lv Y, Miao YR, Guo AY",,National Natural Science Foundation of China,6.0, +29617941,SCRIPT-MAP,0.988087222,SCRIPT-MAP,0.988087222,,0,1,http://www.firmiana.org/responders,"HTTPConnectionPool(host='www.firmiana.org', port=80): Max retries exceeded with url: /responders (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2018-08-01,"State Key Laboratory of Proteomics, Beijing Proteome Research Center, Beijing Institute of Lifeomics, National Center for Protein Sciences (The PHOENIX Center, Beijing), Beijing, China.","Liu W, Wei L, Sun J, Feng J, Guo G, Liang L, Fu T, Liu M, Li K, Huang Y, Zhu W, Zhen B, Wang Y, Ding C, Qin J",,"National High-tech R&D Program of China, National Key Research and Development Program of China, Beijing Natural Science Foundation, International Science & Technology Cooperation Program of China, International Science & Technology Cooperation Program of China, National Natural Science Foundation of China, Shanghai Municipal Science and Technology Major Project, National Natural Science Foundation of China, Beijing Natural Science Foundation, National Program on Key Basic Research Project, National Natural Science Foundation of China, National Natural Science Foundation of China, National Program on Key Basic Research Project, International Science & Technology Cooperation Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China",0.0,China +29625201,saRNAdb,0.994530654,saRNAdb,0.994530654,,0,1,http://bioinfo.imtech.res.in/manojk/sarna,404,,,http://web.archive.org/web/20180610115231/http://bioinfo.imtech.res.in/manojk/sarna/,2018-04-03,"Bioinformatics Centre, Institute of Microbial Technology, Council of Scientific and Industrial Research, Sector 39A, Chandigarh 160036, India.","Dar SA, Kumar M",,,2.0,India +29696033,REDIdb,0.993994772,REDIdb,0.993994772,,0,1,http://srv00.recas.ba.infn.it/redidb/index.html,200,,,http://web.archive.org/web/20220615140541/http://srv00.recas.ba.infn.it/redidb/index.html,2018-04-11,"Institute of Biomembranes, Bioenergetics and Molecular Biotechnologies, Consiglio Nazionale delle Ricerche, Bari, Italy.","Lo Giudice C, Pesole G, Picardi E",,Consiglio Nazionale delle Ricerche,10.0,Italy +29733404,SPAR,0.965488374,SPAR,0.965488374,Small RNA-seq Portal for Analysis of sequencing,0.857351972,1,http://www.lisanwanglab.org/SPAR,301,,,no_wayback,2018-07-01,"Penn Neurodegeneration Genomics Center, University of Pennsylvania, Philadelphia, PA 19104, USA.","Kuksa PP, Amlie-Wolf A, Katanic Ž, Valladares O, Wang LS, Leung YY",,"National Institute on Aging, NIA NIH HHS, NIGMS NIH HHS, National Institute on Aging, NIA NIH HHS, National Institute on Aging, National Institute on Aging, National Institute of General Medical Sciences, National Institute on Aging",13.0,United States +29774137,SACPD,0.991069496,SACPD,0.991069496,Saudi anti-human cancer plants database,0.969169753,1,http://teeqrani1.wixsite.com/sapd,301,,,no_wayback,2018-01-01,"Biology and Chemistry Department, University College at Al-Qunfudhah, Umm AL-Qura University, Makkah, Saudi Arabia.",Al-Zahrani AA,,,2.0,Saudi Arabia +29892516,SEVENS,0.997551143,SEVENS,0.997551143,,0,1,"http://sevens.cbrc.jp, http://sevens.chem.aoyama.ac.jp","HTTPConnectionPool(host='sevens.cbrc.jp', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to sevens.cbrc.jp timed out. (connect timeout=5)')), HTTPConnectionPool(host='sevens.chem.aoyama.ac.jp', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to sevens.chem.aoyama.ac.jp timed out. (connect timeout=5)'))",,", ","http://web.archive.org/web/20190821231511/http://sevens.cbrc.jp:80/, no_wayback",2018-04-27,"Aoyama Gakuin University, College of Science and Engineering, Sagamihara, Kanagawa 252-5258, Japan.","Ikeda M, Sugihara M, Suwa M",,,2.0,Japan +29931156,RBPMetaDB,0.994099975,RBPMetaDB,0.994099975,,0,1,http://rbpmetadb.yubiolab.org,200,,,http://web.archive.org/web/20221016221850/http://rbpmetadb.yubiolab.org/,2018-01-01,"Department of Electrical and Computer Engineering, Texas A&M University, College Station, TX 77843, USA.","Li J, Deng SP, Vieira J, Thomas J, Costa V, Tseng CS, Ivankovic F, Ciccodicola A, Yu P",,,2.0,United States +29961821,SDADB,0.996021926,SDADB,0.996021926,,0,1,http://sda.denglab.org,200,,,http://web.archive.org/web/20211028143038/http://sda.denglab.org/,2018-01-01,"School of Software, Central South University, Changsha 410075, China.","Zeng C, Zhan W, Deng L",,"National Natural Science Foundation of China, Natural Science Foundation of Hunan Province",3.0,China +29989091,RicyerDB,0.99671818,RicyerDB,0.99671818,Rice Yield-related Database,0.840729028,1,http://server.malab.cn/Ricyer/index.html,200,,,http://web.archive.org/web/20200127083924/http://server.malab.cn:80/Ricyer/index.html,2018-05-22,"School of Aerospace Engineering, Xiamen University, Xiamen, 361001, China.","Jiang J, Xing F, Zeng X, Zou Q",,,10.0,China +30020436,realDB,0.993008554,realDB,0.993008554,,0,1,http://realdb.algaegenome.org,"HTTPConnectionPool(host='realdb.algaegenome.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190402185912/http://realdb.algaegenome.org:80/,2018-01-01,"State Key Laboratory of Ecological Pest Control for Fujian and Taiwan Crops, Key Laboratory of Genetics, Breeding and Multiple Utilization of Corps, Ministry of Education, Fujian Provincial Key Laboratory of Haixia Applied Plant Systems Biology, Fujian Agriculture and Forestry University, Fuzhou 350002, China.","Chen F, Zhang J, Chen J, Li X, Dong W, Hu J, Lin M, Liu Y, Li G, Wang Z, Zhang L",,"State Key Laboratory of Ecological Pest Control for Fujian and Taiwan Crops, Shandong Province Natural Science foundation, Fujian Province, National Natural Science Foundation of China",0.0,China +30165538,SKmDB,0.996431947,SKmDB,0.996431947,,0,1,http://sunlab.cpy.cuhk.edu.hk/SKmDB,301,,,http://web.archive.org/web/20211127051935/https://sunlab.cpy.cuhk.edu.hk/SKmDB/,2019-03-01,"Li Ka Shing Institute of Health Sciences, The Chinese University of Hong Kong, Shatin, New Territories, Hong Kong SAR, China.","Yuan J, Zhou J, Wang H, Sun H",,"Focused Innovations Scheme B, Research Grants Council, Hong Kong Special Administrative Region, CUHK direct, Hong Kong Special Administrative Region, CUHK direct, Hong Kong Special Administrative Region, RGC Collaborative Research Fund, Hong Kong Special Administrative Region, CUHK direct, CUHK direct, Hong Kong Special Administrative Region, Hong Kong Special Administrative Region, Hong Kong Special Administrative Region, General Research Funds",1.0,"China, Hong Kong, Hong Kong" +30202870,SequencEnG,0.989631832,SequencEnG,0.989631832,Sequencing Techniques Engine for Genomics,0.744912343,1,http://education.knoweng.org/sequenceng,301,,,http://web.archive.org/web/20221017020334/http://education.knoweng.org/sequenceng/,2019-04-01,"Department of Bioengineering, University of Illinois at Urbana-Champaign, Urbana, IL, USA.","Zhang Y, Manjunath M, Kim Y, Heintz J, Song JS",,"National Institutes of Health, NIGMS NIH HHS",2.0,United States +30321422,RetroRules,0.970715165,RetroRules,0.970715165,,0,1,http://retrorules.org,301,France,"(48.7645,2.16901)",http://web.archive.org/web/20221016223720/https://retrorules.org/,2019-01-01,"Micalis Institute, INRA, AgroParisTech, Université Paris-Saclay, 78350 Jouy-en-Josas, France.","Duigou T, du Lac M, Carbonell P, Faulon JL",,"French National Research Agency, Horizon 2020, Biotechnology and Biological Sciences Research Council",18.0,France +30329093,REDfly,0.997037947,REDfly,0.997037947,,0,1,http://redfly.ccr.buffalo.edu,200,,,http://web.archive.org/web/20221017003818/http://redfly.ccr.buffalo.edu/,2019-01-01,"Center for Computational Research, State University of New York at Buffalo, Buffalo, NY 14203, USA.","Rivera J, Keränen SVE, Gallo SM, Halfon MS",,"National Institutes of Health, National Science Foundation, National Science Foundation, NIGMS NIH HHS",14.0,United States +30371817,SEdb,0.99522537,SEdb,0.99522537,comprehensive human super-enhancer database,0.846477594,1,http://www.licpathway.net/sedb,301,,,http://web.archive.org/web/20220817125414/http://licpathway.net/sedb/,2019-01-01,"School of Medical Informatics, Daqing Campus, Harbin Medical University, Daqing 163319, China.","Jiang Y, Qian F, Bai X, Liu Y, Wang Q, Ai B, Han X, Shi S, Zhang J, Li X, Tang Z, Pan Q, Wang Y, Wang F, Li C",,"National Natural Science Foundation of China, Natural Science Foundation of Heilongjiang Province, National Natural Science Foundation of China",63.0,China +30380119,SAGD,0.992830694,SAGD,0.992830694,,0,1,http://bioinfo.life.hust.edu.cn/SAGD,200,,,http://web.archive.org/web/20220615180925/http://bioinfo.life.hust.edu.cn/SAGD,2019-01-01,"Hubei Key Laboratory of Agricultural Bioinformatics, College of Life Science and Technology, Huazhong Agricultural University, Wuhan, Hubei 430070, PR China.","Shi MW, Zhang NA, Shi CP, Liu CJ, Luo ZH, Wang DY, Guo AY, Chen ZX",,"Huazhong Agricultural University, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",5.0,China +"30453895, 32009518",RATEmiRs,0.978475153,RATEmiRs,0.978475153,Rat Atlas of Tissue-specific and Enriched miRNAs,0.862209494,2,http://www.niehs.nih.gov/ratemirs,301,,,no_wayback,2020-02-12,"nan, nan","nan, nan","nan, nan","nan, nan",0.0, +30481257,SkeletalVis,0.980601847,SkeletalVis,0.980601847,,0,1,http://phenome.manchester.ac.uk,200,,,http://web.archive.org/web/20220620050634/http://phenome.manchester.ac.uk/,2019-07-01,,,,,0.0, +30593925,SITVIT2,0.996629322,SITVIT2,0.996629322,,0,1,http://www.pasteur-guadeloupe.fr:8081/SITVIT2,302,,,http://web.archive.org/web/20220520160652/http://www.pasteur-guadeloupe.fr:8081/SITVIT2/,2018-12-26,"WHO Supranational TB Reference Laboratory, Unité de la Tuberculose et des Mycobactéries, Institut Pasteur de Guadeloupe, Abymes, Guadeloupe, France. Electronic address: dcouvin@pasteur-guadeloupe.fr.","Couvin D, David A, Zozio T, Rastogi N",,"Programme Opérationnel FEDER-Guadeloupe-Conseil Régional, European Union and Guadeloupe Region, FEDER",43.0,"France, Guadeloupe, Guadeloupe" +30622655,REXdb,0.975555122,REXdb,0.975555122,retrotransposon,0.663883686,1,"http://repeatexplorer-elixir.cerit-sc.cz/)Ã, http://repeatexplorer.org","HTTPConnectionPool(host='repeatexplorer-elixir.cerit-sc.cz', port=80): Max retries exceeded with url: /)%C3%83%C2%A2%C3%82%C2%88%C3%82%C2%9A%C3%83%C2%83%C3%82%C2%89%C3%83%C2%82%C3%82%C2%AC%C3%83%C2%83%C3%82%C2%89%C3%83%C2%A2%C3%82%C2%88%C3%82%C2%9A%C3%83%C2%83%C3%82%C2%87%C3%83%C2%82%C3%82%C2%AC%C3%83%C2%83%C3%82%C2%89%C3%83%C2%A2%C3%82%C2%88%C3%82%C2%9A%C3%83%C2%83%C3%82%C2%89%C3%83%C2%82%C3%82%C2%AC%C3%83%C2%83%C3%82%C2%87%C3%83%C2%A2%C3%82%C2%88%C3%82%C2%9A%C3%83%C2%83%C3%82%C2%87%C3%83%C2%82%C3%82%C2%AC%C3%83%C2%83%C3%82%C2%89 (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')), 200",Czechia,", (48.8805,14.7368)","no_wayback, http://web.archive.org/web/20220903134051/http://repeatexplorer.org/",2019-01-03,,,,,0.0, +30674925,Smooth Muscle Transcriptome Browser,0.98817302,SMTB,0.983972132,Smooth Muscle Transcriptome Browser,0.98817302,1,http://med.unr.edu/physio/transcriptome,301,,,http://web.archive.org/web/20220617041704/https://med.unr.edu/physio/transcriptome,2019-01-23,,,,,0.0, +30714194,SDRED,0.985853091,SDRED,0.985853091,Short-chain Dehydrogenases/Reductases Engineering Database,0.874342166,1,http://sdred.biocatnet.de,301,,,no_wayback,2019-02-25,"Institute of Biochemistry and Technical Biochemistry, University of Stuttgart, Stuttgart, Germany.","Gräff M, Buchholz PCF, Stockinger P, Bommarius B, Bommarius AS, Pleiss J",,Deutsche Forschungsgemeinschaft,5.0,Germany +30721533,SiMPLOD,0.995890737,SiMPLOD,0.995890737,Structurally-integrated database for Mutations of PLOD genes,0.912969387,1,http://fornerislab.unipv.it/SiMPLOD,301,,,http://web.archive.org/web/20210920075652/http://fornerislab.unipv.it/SiMPLOD/,2019-03-12,"The Armenise-Harvard Laboratory of Structural Biology, Department of Biology and Biotechnology, University of Pavia, Pavia, Italy.","Scietti L, Campioni M, Forneris F",,"Dipartimenti di Eccellenza Program, Italian Association for Cancer Research (AIRC), My First AIRC Grant, Giovanni Armenise-Harvard Career Development Award, Fondazione Cariplo, Giovanni Armenise-Harvard Foundation, Italian Ministry of Education, University and Research (MIUR), Dept. of Biology and Biotechnology, University of Pavia",6.0,Italy +30726866,RiboD,0.977628946,RiboD,0.977628946,,0,1,http://ribod.iiserkol.ac.in,200,India,"(22.518,88.3832)",http://web.archive.org/web/20220727154852/http://ribod.iiserkol.ac.in/,2019-09-01,"Department of Physical Sciences, Indian Institute of Science Education and Research Kolkata, Mohanpur-741246, India.","Mukherjee S, Das Mandal S, Gupta N, Drory-Retwitzer M, Barash D, Sengupta S",,,7.0,India +30794542,RESPIRE,0.993819237,RESPIRE,0.993819237,of Proteins Involved in the Red Blood Cell Environment,0.857962569,1,http://www.dsimb.inserm.fr/respire,301,France,"(43.5312,5.4554)",http://web.archive.org/web/20220303200332/https://www.dsimb.inserm.fr/respire/,2019-02-22,,,,,0.0, +31075273,refTSS,0.954385519,refTSS,0.954385519,,0,1,http://reftss.clst.riken.jp,302,,"(35.4333,139.6500)",no_wayback,2019-05-08,"RIKEN Center for Integrative Medical Sciences, 1-7-22, Suehiro-Cho, Tsurumi-Ku, Yokohama, Kanagawa 230-0045, Japan.","Abugessaisa I, Noguchi S, Hasegawa A, Kondo A, Kawaji H, Carninci P, Kasukawa T",,,9.0,Japan +31337332,Soybean-VCF2Genomes,0.917709383,Soybean-VCF2Genomes,0.917709383,,0,1,http://pgl.gnu.ac.kr/soy_vcf2genome,301,,,http://web.archive.org/web/20220522004727/http://pgl.gnu.ac.kr/soy_vcf2genome/,2019-07-24,,,,,0.0, +31494246,SliceIt,0.996555865,SliceIt,0.996555865,,0,1,http://sliceit.soic.iupui.edu,302,,,http://web.archive.org/web/20220522203447/https://sliceit.soic.iupui.edu/,2019-09-05,,,,,0.0, +31511885,SNP2APA,0.91174376,SNP2APA,0.91174376,,0,1,http://gong_lab.hzau.edu.cn/SNP2APA,"HTTPConnectionPool(host='gong_lab.hzau.edu.cn', port=80): Max retries exceeded with url: /SNP2APA (Caused by ReadTimeoutError(""HTTPConnectionPool(host='gong_lab.hzau.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2020-01-01,,,,,0.0, +31611909,SCDevDB,0.997864783,SCDevDB,0.997864783,,0,1,http://scdevdb.deepomics.org,301,,,http://web.archive.org/web/20220618075412/https://scdevdb.deepomics.org/,2019-09-26,"Department of Computer Science, City University of Hong Kong, Kowloon, Hong Kong.","Wang Z, Feng X, Li SC",,,3.0,"Hong Kong, Hong Kong" +31642484,SilkDB,0.996998668,SilkDB,0.996998668,,0,1,http://silkdb.bioinfotoolkits.net,301,,,no_wayback,2020-01-01,"Biological Science Research Center, Southwest University, Chongqing 400715, China.","Lu F, Wei Z, Luo Y, Guo H, Zhang G, Xia Q, Wang Y",,"National Natural Science Foundation of China, Fundamental Research Funds",18.0,China +31713629,SpatialDB,0.995859206,SpatialDB,0.995859206,,0,1,http://www.spatialomics.org/SpatialDB,301,,,http://web.archive.org/web/20221016213005/http://spatialomics.org/SpatialDB/,2020-01-01,"Center for High Throughput Sequencing, Core Facility for Protein Research, Key Laboratory of RNA Biology, Institute of Biophysics, Chinese Academy of Sciences, Beijing 100101, China.","Fan Z, Chen R, Chen X",,"National Key Research and Development Project, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",11.0,China +31799597,RNAactDrug,0.996623337,RNAactDrug,0.996623337,,0,1,http://bio-bigdata.hrbmu.edu.cn/RNAactDrug,302,China,"(45.7004,126.62)",http://web.archive.org/web/20220617162221/http://bio-bigdata.hrbmu.edu.cn/RNAactDrug/,2020-12-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.","Dong Q, Li F, Xu Y, Xiao J, Xu Y, Shang D, Zhang C, Yang H, Tian Z, Mi K, Li X, Zhang Y",,"National Natural Science Foundation of China, Fundamental Research Funds for the Provincial Universities, National Natural Science Foundation of China, Heilongjiang Postdoctoral Science Foundation, National Key R&D Program of China, National Natural Science Foundation of China",6.0,China +31872320,RSRS,0.890815914,RSRS,0.890815914,Rice Stress-Resistant SNP database,0.872037998,1,http://bioinformatics.fafu.edu.cn/RSRS,"HTTPConnectionPool(host='bioinformatics.fafu.edu.cn', port=80): Max retries exceeded with url: /RSRS (Caused by ConnectTimeoutError(, 'Connection to bioinformatics.fafu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20210117093005/http://bioinformatics.fafu.edu.cn/RSRS/,2019-12-23,,,,,0.0, +31906603,RNAInter,0.994603395,RNAInter,0.994603395,,0,1,"http://www.rna-society.org/rnainter/, http://www.rna-society.org/raid","200, 301","United States, United States","(40.2069,-111.642), (40.2069,-111.642)","http://web.archive.org/web/20220804035555/https://www.rna-society.org/rnainter/, http://web.archive.org/web/20210416163312/https://www.rna-society.org/raid/",2020-01-01,,,,,0.0, +31950189,RNA Characterization of Secondary Structure Motifs,0.966837181,CoSSMos,0.890264094,RNA Characterization of Secondary Structure Motifs,0.966837181,1,http://rnacossmos.com,200,United States,"(42.3314,-83.0458)",http://web.archive.org/web/20211028113440/http://rnacossmos.com/,2020-01-01,"Saint Louis University, Department of Chemistry, 3501 Laclede Avenue, St. Louis, MO 63103 USA.","Richardson KE, Kirkpatrick CC, Znosko BM",,National Institutes of Health,1.0,United States +32096105,SANDchild,0.992918432,SANDchild,0.992918432,,0,1,http://psico.fcep.urv.cat/exp/files/SANDchild.xlsx,302,,,http://web.archive.org/web/20220707004006/https://psico.fcep.urv.cat/exp/files/SANDchild.xlsx,2020-10-01,"Instituto Pluridisciplinar, Universidad Complutense de Madrid, Paseo Juan XXIII, 1, 28040, Madrid, Spain. sabater93@gmail.com.","Sabater L, Guasch M, Ferré P, Fraga I, Hinojosa JA",,"Ministerio de Ciencia, Innovación y Universidades of Spain, Consellería de Educación, Xunta de Galicia, Consejería de Educación e Investigación, Comunidad de Madrid, Ministerio de Economía y Competitividad of Spain and the Fondo Europeo de Desarrollo Regional",1.0,Spain +32338561,RadAtlas,0.993727684,RadAtlas,0.993727684,,0,1,http://biokb.ncpsb.org/radatlas,301,Hong Kong,"(22.2908,114.1501)",no_wayback,2020-05-12,"State Key Laboratory of Proteomics, Beijing Proteome Research Center, National Center for Protein Sciences-Beijing (PHOENIX Center), Beijing Institute of Lifeomics, Beijing, China.","Xu H, Liu Y, Li Y, Diao L, Xun Z, Zhang Y, Wang Z, Li D",,,2.0,China +32345346,REPIC,0.998654127,REPIC,0.998654127,RNA EPItranscriptome Collection,0.978067458,1,http://repicmod.uchicago.edu/repic,301,United States,"(41.5873,-87.5103)",http://web.archive.org/web/20220815204900/http://repicmod.uchicago.edu/repic/,2020-04-28,"Section of Genetic Medicine, Department of Medicine, The University of Chicago, Chicago, IL, 60637, USA.","Liu S, Zhu A, He C, Chen M",,"National Institute of General Medical Sciences, NIGMS NIH HHS, National Human Genome Research Institute, NCI NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS",19.0,United States +32382747,RSVdb,0.997209489,RSVdb,0.997209489,,0,1,http://taolab.nwafu.edu.cn/rsvdb,301,,,no_wayback,2021-05-01,None,"Yu H, Zhang Y, Sun Q, Gao H, Tao S",,National Natural Science Foundation of China,2.0, +32487193,SCDb,0.972417712,SCDb,0.972417712,,0,1,http://www.stomachcancerdb.org,200,,,http://web.archive.org/web/20220621120941/http://www.stomachcancerdb.org/,2020-06-02,"Department of Gastroenterology, Jing'An District Centre Hospital of Shanghai (Huashan Hospital Fudan University Jing'An Branch), Shanghai, 200040, People's Republic of China.","Gu E, Song W, Liu A, Wang H",,Key Clinical Specialist Construction Programs of Shanghai Municipal Commission of Health and Family Planning,3.0,China +32508104,Scop3P,0.996602729,Scop3P,0.996602729,,0,1,http://iomics.ugent.be/scop3p,301,,,no_wayback,2020-06-18,"VIB-UGent Center for Medical Biotechnology, VIB, Ghent 9000, Belgium.","Ramasamy P, Turan D, Tichshenko N, Hulstaert N, Vandermarliere E, Vranken W, Martens L",,"Fonds Wetenschappelijk Onderzoek, Horizon 2020 Framework Programme, Universiteit Gent, Fonds Wetenschappelijk Onderzoek",3.0,Belgium +32608478,RNAWRE,0.990312815,RNAWRE,0.990312815,,0,1,http://rnawre.bio2db.com,200,,,http://web.archive.org/web/20220123155223/http://rnawre.bio2db.com/,2020-01-01,"School of Life Sciences, Center for Genomics and Computational Biology, North China University of Science and Technology, 21 Bohai Road, Caofeidian Xincheng, Tangshan 063009, China.","Nie F, Feng P, Song X, Wu M, Tang Q, Chen W",,"National Nature Scientific Foundation of China, Natural Science Foundation for Distinguished Young Scholar of Hebei Province",6.0,"China, China" +32621601,SAGER,0.995087028,SAGER,0.995087028,Symbiodiniaceae and Algal Genomic Resource Database,0.986229761,1,http://sampgr.org.cn,200,,,http://web.archive.org/web/20220618131256/http://sampgr.org.cn/,2020-01-01,"State Key Laboratory of Marine Environmental Science and College of Ocean and Earth Sciences, Xiamen University, Xiamen 361102, China.","Yu L, Li T, Li L, Lin X, Li H, Liu C, Guo C, Lin S",,"Natural Science Foundation of China, State Key Laboratory of Marine Environmental Science, Marine S&T Fund of Shandong Province for Pilot National Laboratory for Marine Science and Technology",2.0,China +32709339,saponin mass spectrometry database,0.897874147,SMSD,0.893061325,saponin mass spectrometry database,0.897874147,1,"http://47.92.73.208:8082/, http://cpu-smsd.com","200, 200",,", ","no_wayback, http://web.archive.org/web/20211129182936/http://cpu-smsd.com/",2020-06-03,"State Key Laboratory of Natural Medicines, School of Traditional Chinese Pharmacy, China Pharmaceutical University, #639 Longmian Avenue, Jiangning District, Nanjing 211198, China.","Huang FQ, Dong X, Yin X, Fan Y, Fan Y, Mao C, Zhou W",,"National Natural Science Foundation of China, Youth Natural Science Foundation, Fundamental Research Funds for the Central Universities",2.0,"China, China" +32785571,SELAdb,0.976938367,SELAdb,0.976938367,das,0.545053124,1,http://intranet.fm.usp.br/sela,301,,,http://web.archive.org/web/20220617002642/https://intranet.fm.usp.br/sela/,2020-08-10,"Disciplina de Endocrinologia e Metabologia, Departamento de Clinica Medica, LIM/42, Hospital das Clinicas HCFMUSP, Faculdade de Medicina, Universidade de Sao Paulo, Sao Paulo, SP, BR.","Lerario AM, Mohan DR, Montenegro LR, Funari MFA, Nishi MY, Narcizo AM, Benedetti AFF, Oba-Shinjo SM, Vitorino AJ, Santos RASXD, Jorge AAL, Onuchic LF, Marie SKN, Mendonca BB",,NIGMS NIH HHS,4.0, +32849839,RIGD,0.99081533,RIGD,0.99081533,Rosaceae Intronless Genes Database,0.965823472,1,http://www.rigdb.cn,200,,,http://web.archive.org/web/20220625225327/http://www.rigdb.cn/,2020-08-07,"School of Life Sciences, Anhui Agricultural University, Hefei, China.","Chen T, Meng D, Liu X, Cheng X, Wang H, Jin Q, Xu X, Cao Y, Cai Y",,"China Postdoctoral Science Foundation, National Natural Science Foundation of China",0.0,China +33010177,SC2disease,0.997551847,SC2disease,0.997551847,,0,1,http://easybioai.com/sc2disease,301,,,http://web.archive.org/web/20220503180122/http://easybioai.com/sc2disease/,2021-01-01,"School of Computer Science, Northwestern Polytechnical University, Xi'an 710072, China.","Zhao T, Lyu S, Lu G, Juan L, Zeng X, Wei Z, Hao J, Peng J",,"National Natural Science Foundation of China, National Natural Science Foundation of China",23.0,China +33021671,RMVar,0.992782235,RMVar,0.992782235,,0,1,http://rmvar.renlab.org,301,United States,"(40.2069,-111.642)",http://web.archive.org/web/20221022052903/https://rmvar.renlab.org/,2021-01-01,"State Key Laboratory of Oncology in South China, Cancer Center, Collaborative Innovation Center for Cancer Medicine, School of Life Sciences, Sun Yat-sen University, Guangzhou 510060, China.","Luo X, Li H, Liang J, Zhao Q, Xie Y, Ren J, Zuo Z",,"Guangdong Basic and Applied Basic Research Foundation, Guangdong Basic and Applied Basic Research Foundation, National Natural Science Foundation of China, Program for Guangdong Introducing Innovative and Entrepreneurial Teams, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",28.0,"China, China" +33045745,SilencerDB,0.997530401,SilencerDB,0.997530401,,0,1,http://health.tsinghua.edu.cn/silencerdb,301,,,http://web.archive.org/web/20220125112241/http://health.tsinghua.edu.cn/silencerdb/,2021-01-01,"Ministry of Education Key Laboratory of Bioinformatics, Research Department of Bioinformatics at the Beijing National Research Center for Information Science and Technology, Center for Synthetic and Systems Biology, Department of Automation, Tsinghua University, Beijing 100084, China.","Zeng W, Chen S, Cui X, Chen X, Gao Z, Jiang R",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China",6.0,China +33068412,RASP,0.992025733,RASP,0.992025733,RNA Atlas of Structure Probing,0.901401392,1,http://rasp.zhanglab.net,200,,,http://web.archive.org/web/20221016232359/http://rasp.zhanglab.net/,2021-01-01,"MOE Key Laboratory of Bioinformatics, Beijing Advanced Innovation Center for Structural Biology & Frontier Research Center for Biological Structure, Center for Synthetic and Systems Biology, Tsinghua-Peking Joint Center for Life Sciences, School of Life Sciences, Tsinghua University, Beijing, 100084, China.","Li P, Zhou X, Xu K, Zhang QC",,"National Natural Science Foundation of China, Chinese Ministry of Science and Technology, National Natural Science Foundation of China, National Natural Science Foundation of China",8.0,China +33177514,SAVI,0.993400097,SAVI,0.993400097,Synthetically Accessible Virtual Inventory,0.951206068,1,http://doi.org/10.35115/37n9-5738,301,,,no_wayback,2020-11-11,"Computer-Aided Drug Design Group, Chemical Biology Laboratory, Center for Cancer Research, National Cancer Institute, National Institutes of Health, Frederick, MD, 21702, USA.","Patel H, Ihlenfeldt WD, Judson PN, Moroz YS, Pevzner Y, Peach ML, Delannée V, Tarasova NI, Nicklaus MC",,,3.0,United States +33196814,RBP2GO,0.996937677,RBP2GO,0.996937677,,0,1,http://RBP2GO.DKFZ.de,301,,,http://web.archive.org/web/20220526043511/https://rbp2go.dkfz.de/,2021-01-01,"Division of RNA Biology & Cancer, German Cancer Research Center (DKFZ), 69120 Heidelberg, Germany.","Caudron-Herger M, Jansen RE, Wassmer E, Diederichs S",,"Baden-Württemberg Stiftung, Wilhelm Sander-Stiftung, German Cancer Aid",3.0,Germany +33231322,SoyTD,0.986389339,SoyTD,0.986389339,Soybean transporter database,0.780514371,1,http://artemis.cyverse.org/soykb_dev/SoyTD,301,,,no_wayback,2020-12-14,"Agriculture Biotechnology Department, National Agri-Food Biotechnology Institute (NABI), Mohali, India.","Deshmukh R, Rana N, Liu Y, Zeng S, Agarwal G, Sonah H, Varshney R, Joshi T, Patil GB, Nguyen HT",,"State of Texas' Governor's University Research Initiative (GURI), United Soybean Board, Department of Biotechnology, Ministry of Science and Technology, India",0.0,India +33238002,RecipeDB,0.99755621,RecipeDB,0.99755621,,0,1,http://cosylab.iiitd.edu.in/recipedb,301,India,"(28.6542,77.2373)",http://web.archive.org/web/20221011165416/https://cosylab.iiitd.edu.in/recipedb/,2020-11-01,"Complex Systems Laboratory, Center for Computational Biology, Indraprastha Institute of Information Technology (IIIT-Delhi), New Delhi, India 110020.","Batra D, Diwan N, Upadhyay U, Kalra JS, Sharma T, Sharma AK, Khanna D, Marwah JS, Kalathil S, Singh N, Tuwani R, Bagler G",,,1.0,India +33264401,SoyBase,0.984504819,SoyBase,0.984504819,,0,1,http://soybase.org,"HTTPConnectionPool(host='soybase.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,no_wayback,2021-01-01,"USDA-ARS Corn Insects and Crop Genetics Research Unit, Ames, IA, USA.","Brown AV, Conners SI, Huang W, Wilkey AP, Grant D, Weeks NT, Cannon SB, Graham MA, Nelson RT",,U.S. Department of Agriculture,12.0,United States +33416848,SARS CoV-2,0.557417756,SARS CoV-2,0.557417756,,0,1,http://sars3d.com,301,,,http://web.archive.org/web/20221006102050/https://sars3d.com/,2021-03-01,"Department of Biochemistry, at the University of Cambridge, UK.","Alsulami AF, Thomas SE, Jamasb AR, Beaudoin CA, Moghul I, Bannerman B, Copoiu L, Vedithi SC, Torres P, Blundell TL",,"Cystic Fibrosis Trust, Fondation Botnar, Biotechnology and Biological Sciences Research Council, Saudi Arabia Research Council, American Leprosy Missions, Wellcome Trust, Cystic Fibrosis Trust",17.0, +33507271,SinEx,0.876471639,SinEx,0.876471639,,0,1,http://v2.sinex.cl,301,,,no_wayback,2021-01-01,"Center for Bioinformatics and Genome Biology, Fundacion Ciencia & Vida, Zañartu 1482, Ñuñoa Santiago 7780132, Chile.","Jorquera R, González C, Clausen PTLC, Petersen B, Holmes DS",,"Fondo Nacional de Desarrollo Científico y Tecnológico, Programa de Apoyo a Centros con Financiamiento Basal",0.0,Chile +33553941,SARSCOVIDB,0.995992124,SARSCOVIDB,0.995992124,infection database,0.97165823,1,http://sarscovidb.org,301,,,http://web.archive.org/web/20220726071549/https://sarscovidb.org/,2021-01-21,"Programa de Pós-Graduação em Biologia Celular e Molecular, Universidade Federal do Rio Grande do Sul, Avenida Bento Gonçalves 9500, prédio 43431, Porto Alegre, Rio Grande do Sul 91501-970, Brasil.","da Rosa RL, Yang TS, Tureta EF, de Oliveira LRS, Moraes ANS, Tatara JM, Costa RP, Borges JS, Alves CI, Berger M, Guimarães JA, Santi L, Beys-da-Silva WO",,Coordena??o de Aperfei?oamento de Pessoal de N?vel Superior,1.0, +33685493,riboCIRC,0.996479809,riboCIRC,0.996479809,,0,1,http://www.ribocirc.com,200,,,http://web.archive.org/web/20220617192053/http://www.ribocirc.com/,2021-03-08,"State Key Laboratory of Ophthalmology, Zhongshan Ophthalmic Center, Sun Yat-sen University, Guangzhou, China.","Li H, Xie M, Wang Y, Yang L, Xie Z, Wang H",,"National Natural Science Foundation of China, National Natural Science Foundation of China",7.0,China +33892308,SAPdb,0.994092166,SAPdb,0.994092166,,0,1,http://webs.iiitd.edu.in/raghava/sapdb,301,,,http://web.archive.org/web/20220314193605/https://webs.iiitd.edu.in/raghava/sapdb/,2021-04-10,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh-160036, India. Electronic address: matdeepika@gmail.com.","Mathur D, Kaur H, Dhall A, Sharma N, Raghava GPS",,Department of Science and Technology,1.0,India +33994075,RHeference,0.994272649,RHeference,0.994272649,,0,1,http://www.rheference.org,302,France,"(47.2145,-1.5512)",http://web.archive.org/web/20220308202020/https://rheference.org/,2021-04-20,"Univ Paris Est Creteil, INSERM, IMRB, Creteil, France; EFS Ile-de-France Créteil, Creteil, France; Laboratory of Excellence GR-Ex, Paris, France.","Floch A, Téletchéa S, Tournamille C, de Brevern AG, Pirenne F",,"Agence nationale de la recherche, Grand Équipement National De Calcul Intensif",0.0,"France, France, France, France" +34014674,SistematX,0.941165566,SistematX,0.941165566,,0,1,http://sistematx.ufpb.br,301,,,no_wayback,2021-05-20,"Laboratory of Cheminformatics, Instituto de Pesquisa em Fármacos e Medicamentos (IPeFarM), Universidade Federal da Paraíba, Campus I, Cidade Universitária, João Pessoa 58051-900, PB, Brazil.","Costa RPO, Lucena LF, Silva LMA, Zocolo GJ, Herrera-Acevedo C, Scotti L, Da-Costa FB, Ionov N, Poroikov V, Muratov EN, Scotti MT",,"Conselho Nacional de Desenvolvimento Cient?fico e Tecnol?gico, National Cancer Institute, Conselho Nacional de Desenvolvimento Cient?fico e Tecnol?gico, Government Council on Grants, Russian Federation, NCI NIH HHS",3.0,Brazil +34022814,Rhododendron Plant Genome Database,0.988815002,RPGD,0.988055825,Rhododendron Plant Genome Database,0.988815002,1,http://bioinfor.kib.ac.cn/RPGD,301,,,http://web.archive.org/web/20220514163330/http://bioinfor.kib.ac.cn/RPGD/,2021-05-22,"Germplasm Bank of Wild Species, Kunming Institute of Botany, Chinese Academy of Sciences, Kunming, 650201, China.","Liu N, Zhang L, Zhou Y, Tu M, Wu Z, Gui D, Ma Y, Wang J, Zhang C",,"National Natural Science Foundation of China, Youth Program of National Natural Science Foundation of China, Program of Science and Technology Talents Training in Yunnan province, Construction of International Flower Technology Innovation Center and Industrialization of achievements, Ten Thousand Young Talents Plan of Yunnan",0.0,China +34042771,ReMeDy,0.99511075,ReMeDy,0.99511075,,0,1,http://remedy.mssm.edu,301,United States,"(40.7889,-73.954)",http://web.archive.org/web/20211207060558/https://remedy.mssm.edu/,2021-05-01,"Icahn School of Medicine at Mount Sinai, New York, New York, USA.","Borziak K, Parvanova I, Finkelstein J",,,0.0,United States +34224878,REVA,0.989323854,REVA,0.989323854,,0,1,http://reva.gao-lab.org,200,Hong Kong,"(22.3193,114.1693)",http://web.archive.org/web/20220922051348/http://reva.gao-lab.org/,2021-07-03,"State Key Laboratory of Protein and Plant Gene Research, School of Life Sciences, Biomedical Pioneering Innovation Center (BIOPIC) & Beijing Advanced Innovation Center for Genomics (ICG), Center for Bioinformatics (CBI), Peking University, Beijing 100871, China.","Wang Y, Shi FY, Liang Y, Gao G",,,0.0,China +34496744,RPocket,0.952259183,RPocket,0.952259183,,0,1,http://zhaoserver.com.cn/RPocket/RPocket.html,200,,,no_wayback,2021-09-08,"Department of Physics, Institute of Biophysics, Central China Normal University, Wuhan, 430079, China.","Zhou T, Wang H, Zeng C, Zhao Y",,"National Natural Science Foundation of China, national natural science foundation of china",1.0,"China, China" +34514416,SCISSORâ,0.996242762,SCISSORâ,0.996242762,,0,1,http://thecailab.com/scissor,"HTTPConnectionPool(host='thecailab.com', port=80): Max retries exceeded with url: /scissor (Caused by ConnectTimeoutError(, 'Connection to thecailab.com timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220527162307/https://www.thecailab.com/scissor/,2021-09-09,"Department of Environmental Health Sciences, Arnold School of Public Health, University of South Carolina, Columbia, SC 29208, USA.","Cui X, Qin F, Yu X, Xiao F, Cai G",,"NSF, NIGMS, NIGMS NIH HHS, UofSC, NIH",1.0,United States +34844637,recount3,0.966189086,recount3,0.966189086,,0,1,http://rna.recount.bio,200,United States,"(37.7642,-122.3993)",no_wayback,2021-11-29,"Department of Computer Science, Johns Hopkins University, Baltimore, USA.","Wilks C, Zheng SC, Chen FY, Charles R, Solomon B, Ling JP, Imada EL, Zhang D, Joseph L, Leek JT, Jaffe AE, Nellore A, Collado-Torres L, Hansen KD, Langmead B",,"Medical Research Council, NIGMS NIH HHS, national institute of general medical sciences, national institute of general medical sciences, office of advanced cyberinfrastructure, NIGMS NIH HHS, NIGMS NIH HHS, national institute of general medical sciences",14.0,United States +21177659,TMPad,0.989770174,TMPad,0.989770174,TransMembrane Protein Helix-Packing Database,0.981935009,1,http://bio-cluster.iis.sinica.edu.tw/TMPad,"HTTPConnectionPool(host='bio-cluster.iis.sinica.edu.tw', port=80): Max retries exceeded with url: /TMPad (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170201090954/http://bio-cluster.iis.sinica.edu.tw/TMPad/,2011-01-01,"Bioinformatics Laboratory, Institute of Information Science, Academia Sinica, Taipei 115, Taiwan, Republic of China.","Lo A, Cheng CW, Chiu YY, Sung TY, Hsu WL",,,16.0,China +21253873,TGED,0.988756279,TGED,0.988756279,Tetrahymena Gene Expression Database,0.987045392,1,http://tged.ihb.ac.cn,"HTTPConnectionPool(host='tged.ihb.ac.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20141222203210/http://tged.ihb.ac.cn:80/,2011-01-21,"Key Laboratory of Aquatic Biodiversity and Conservation, Institute of Hydrobiology, Chinese Academy of Sciences, Wuhan, 430072, China.","Xiong J, Lu X, Lu Y, Zeng H, Yuan D, Feng L, Chang Y, Bowen J, Gorovsky M, Fu C, Miao W",,"NIGMS NIH HHS, NIGMS NIH HHS",19.0,China +21265623,The European Radiobiological Archives,0.877208258,,0,The European Radiobiological Archives,0.877208258,1,http://era.bfs.de,301,,,no_wayback,2011-01-25,"Federal Office for Radiation Protection, Department of Radiation Protection and Health, 85764 Neuherberg, Germany. mbirschwilks@bfs.de","Birschwilks M, Gruenberger M, Adelmann C, Tapio S, Gerber G, Schofield PN, Grosche B",,,8.0,Germany +21292827,SSFA-GPHR,0.962571482,SSFA-GPHR,0.962571482,Structure-Function-Analysis of Glycoprotein Hormone Receptors,0.797694047,1,http://www.ssfa-gphr.de,200,Germany,"(52.5656,13.4242)",http://web.archive.org/web/20220615225351/http://www.ssfa-gphr.de/,2011-02-03,"Leibniz-Institut für Molekulare Pharmakologie, Robert-Rössle-Strasse 10, 13125 Berlin, Germany.","Kreuchwig A, Kleinau G, Kreuchwig F, Worth CL, Krause G",,,20.0,Germany +21383924,TAMI,0.986702323,TAMI,0.986702323,The Antimicrobial Index,0.871445251,1,http://antibiotics.toku-e.com,301,,,http://web.archive.org/web/20221012001443/https://antibiotics.toku-e.com/,2011-01-22,"TOKU-E Company, Research and Development Division, 150 Cecil St. #16-00, Singapore 069543.","Amirkia VD, Qiubao P",,,4.0,Singapore +21423723,SWMD,0.968675633,SWMD,0.968675633,Seaweed metabolite database,0.964039514,1,http://www.swmd.co.in,406,,,http://web.archive.org/web/20221102071455/http://swmd.co.in/,2011-01-22,None,"Davis GD, Vasanthi AH",,,21.0, +21495663,TIN,0.984829128,TIN,0.984829128,,0,1,http://mmg.rcsi.ie:8080/tin,"HTTPConnectionPool(host='mmg.rcsi.ie', port=8080): Max retries exceeded with url: /tin (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2011-04-15,"Molecular and Cellular Therapeutics Department, Royal College of Surgeons in Ireland , 123 St. Stephen's Green, Dublin 2, Ireland.","Dorschner KV, Toomey D, Brennan MP, Heinemann T, Duffy FJ, Nolan KB, Cox D, Adamo MF, Chubb AJ",,,2.0,"Ireland, Ireland" +21520336,ThalInd,0.982127786,ThalInd,0.982127786,,0,1,http://ccg.murdoch.edu.au/thalind,"HTTPConnectionPool(host='ccg.murdoch.edu.au', port=80): Max retries exceeded with url: /thalind (Caused by ConnectTimeoutError(, 'Connection to ccg.murdoch.edu.au timed out. (connect timeout=5)'))",,,no_wayback,2011-06-23,"Centre for Comparative Genomics, Murdoch University, Perth, Australia.","Sinha S, Black ML, Agarwal S, Das R, Bittles AH, Bellgard M",,,7.0,Australia +21546359,TparvaDB,0.998618424,TparvaDB,0.998618424,,0,1,http://tparvadb.ilri.cgiar.org,"HTTPConnectionPool(host='tparvadb.ilri.cgiar.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20120101153534/http://tparvadb.ilri.cgiar.org:80/,2011-05-04,"Center for Biotechnology and Bioinformatics, University of Nairobi, Nairobi.","Visendi P, Ng'ang'a W, Bulimo W, Bishop R, Ochanda J, de Villiers EP",,,0.0, +"22075997, 24293645, 26590256",STITCH,0.979809999,STITCH,0.979809999,Tool for Interacting Chemicals,0.714140986,3,http://stitch.embl.de,200,Germany,"(49.4071,8.6879)",http://web.archive.org/web/20220618223210/http://stitch.embl.de/,2015-11-20,"Biotechnology Center, TU Dresden, 01062 Dresden, Germany. bork@embl.de, Biotechnology Center, TU Dresden, 01062 Dresden, Germany, Institute of Molecular Life Sciences, University of Zurich and Swiss Institute of Bioinformatics, Winterthurerstrasse 190, 8057 Zurich, Switzerland, Novo Nordisk Foundation Center for Protein Research, Faculty of Health Sciences, University of Copenhagen, 2200 Copenhagen N, Denmark, European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany and Max-Delbrück-Centre for Molecular Medicine, Robert-Rössle-Strasse 10, 13092 Berlin, Germany., Institute of Molecular Life Sciences, University of Zurich and Swiss Institute of Bioinformatics, Winterthurerstrasse 190, 8057 Zurich, Switzerland.","Kuhn M, Szklarczyk D, Franceschini A, von Mering C, Jensen LJ, Bork P, Kuhn M, Szklarczyk D, Pletscher-Frankild S, Blicher TH, von Mering C, Jensen LJ, Bork P, Szklarczyk D, Santos A, von Mering C, Jensen LJ, Bork P, Kuhn M",", , ",", Novo Nordisk Foundation Center for Protein Research, Novo Nordisk Foundation Center for Protein Research",694.0,"Switzerland, Switzerland, Germany, Germany, Germany, Germany, Denmark" +"22096228, 24178028, 26433225, 29788229",SubtiWiki,0.997424006,SubtiWiki,0.997424006,,0,4,http://subtiwiki.uni-goettingen.de,200,Germany,"(51.5126,9.9523)",http://web.archive.org/web/20221021144550/http://subtiwiki.uni-goettingen.de/,2018-01-01,"Interfaculty Institute for Genetics and Functional Genomics, Ernst-Moritz-Arndt-University Greifswald, Jahnstr 15, D-17487 Greifswald, Germany., Department of General Microbiology, Institute of Microbiology and Genetics, Georg-August University Göttingen, Grisebachstrasse 8, D-37077 Göttingen, Germany., Department of General Microbiology, Institute of Microbiology and Genetics, Georg-August University Göttingen, Grisebachstr. 8, D-37077 Göttingen, Germany., Department of General Microbiology, Institute of Microbiology and Genetics, Georg-August University Göttingen, Grisebachstr. 8, D-37077 Göttingen, Germany.","Mäder U, Schmeisky AG, Flórez LA, Stülke J, Michna RH, Commichau FM, Tödter D, Zschiedrich CP, Stülke J, Michna RH, Zhu B, Mäder U, Stülke J, Zhu B, Stülke J",", , , ",", , , ",221.0,"Germany, Germany, Germany, Germany" +"22116064, 31680154",TDR Targets,0.930215985,TDR Targets,0.930215985,,0,2,http://tdrtargets.org,301,,,http://web.archive.org/web/20221108232902/https://tdrtargets.org/,2020-01-01,"Instituto de Investigaciones Biotecnológicas, Universidad de San Martín, San Martín, Buenos Aires, Argentina., Instituto de Investigaciones Biotecnológicas ""Rodolfo Ugalde"" (IIB), Universidad de San Martín, San Martín, B1650HMP, Buenos Aires, Argentina.","Magariños MP, Carmona SJ, Crowther GJ, Ralph SA, Roos DS, Shanmugam D, Van Voorhis WC, Agüero F, Urán Landaburu L, Berenstein AJ, Videla S, Maru P, Shanmugam D, Chernomoretz A, Agüero F",", ","FIC NIH HHS, FIC NIH HHS, National Agency for the Promotion of Science and Technology, Argentina, Argentinian Ministry of Science and Technology, Wellcome Trust, GlaxoSmithKline Argentina, Indo-Argentina Bilateral Cooperation Project",62.0,"Argentina, Argentina" +22121217,Stem Cell Discovery Engine,0.896048009,SCDE,0.878304124,Stem Cell Discovery Engine,0.896048009,1,http://discovery.hsci.harvard.edu,301,United States,"(42.3427,-71.0922)",http://web.archive.org/web/20190215234824/http://discovery.hsci.harvard.edu:80/,2011-11-24,"Department of Biostatistics, HSPH Bioinformatics Core, Harvard School of Public Health, Boston, MA, USA. shosui@hsph.harvard.edu","Ho Sui SJ, Begley K, Reilly D, Chapman B, McGovern R, Rocca-Sera P, Maguire E, Altschuler GM, Hansen TA, Sompallae R, Krivtsov A, Shivdasani RA, Armstrong SA, Culhane AC, Correll M, Sansone SA, Hofmann O, Hide W",,"Biotechnology and Biological Sciences Research Council, NCI NIH HHS, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",16.0,United States +22139928,SpliceDisease,0.996948481,SpliceDisease,0.996948481,,0,1,http://cmbi.bjmu.edu.cn/sdisease,"HTTPConnectionPool(host='cmbi.bjmu.edu.cn', port=80): Max retries exceeded with url: /sdisease (Caused by ConnectTimeoutError(, 'Connection to cmbi.bjmu.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2011-12-01,"Department of Biomedical Informatics, Peking University Health Science Center, MOE Key Laboratory of Molecular Cardiology, Peking University, Beijing 100191, China. wjuan@hsc.pku.edu.cn","Wang J, Zhang J, Li K, Zhao W, Cui Q",,,32.0,China +"22140109, 29220077",TAIR,0.991281509,TAIR,0.991281509,Arabidopsis Information Resource,0.796741247,2,http://arabidopsis.org,301,,,http://web.archive.org/web/20221108143101/https://arabidopsis.org/,2017-12-08,"Department of Plant Biology, Carnegie Institution, 260 Panama St, Stanford, CA 94305, USA., Phoenix Bioinformatics, Fremont, California.","Lamesch P, Berardini TZ, Li D, Swarbreck D, Wilks C, Sasidharan R, Muller R, Dreher K, Alexander DL, Garcia-Hernandez M, Karthikeyan AS, Lee CH, Nelson WD, Ploetz L, Singh S, Wensel A, Huala E, Reiser L, Subramaniam S, Li D, Huala E",", ","NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, ",966.0,"Panama, United States" +22292669,TranscriptomeBrowser,0.988476872,TranscriptomeBrowser,0.988476872,,0,1,http://tagc.univ-mrs.fr/tbrowser,"HTTPConnectionPool(host='tagc.univ-mrs.fr', port=80): Max retries exceeded with url: /tbrowser (Caused by ConnectTimeoutError(, 'Connection to tagc.univ-mrs.fr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20181003233342/http://tagc.univ-mrs.fr:80/tbrowser/,2012-01-31,"TAGC UMR_S 928, Inserm, Parc Scientifique de Luminy, Marseille, France.","Lepoivre C, Bergon A, Lopez F, Perumal NB, Nguyen C, Imbert J, Puthier D",,,13.0,France +22397686,SpiroESTdb,0.997175336,SpiroESTdb,0.997175336,,0,1,http://pathod.cdc.go.kr/spiroestdb,503,,,http://web.archive.org/web/20120326100034/http://pathod.cdc.go.kr:80/spiroestdb/,2012-03-08,"Division of Malaria and Parasitic Diseases, Korea National Institute of Health, Osong 363-951, Republic of Korea.","Kim DW, Kim DW, Yoo WG, Nam SH, Lee MR, Yang HW, Park J, Lee K, Lee S, Cho SH, Lee WJ, Park HS, Ju JW",,,4.0, +22434841,TGD,0.996270736,TGD,0.996270736,Tetrahymena Genome Database,0.989497207,1,http://ciliate.org,302,,,http://web.archive.org/web/20100410173839/http://www.ciliate.org/,2012-03-20,"Department of Biology, Bradley University, Peoria, Illinois 61625, USA. nstover@bradley.edu","Stover NA, Punia RS, Bowen MS, Dolins SB, Clark TG",,NCRR NIH HHS,26.0,United States +22493538,Ssa miRNAs DB,0.706260227,Ssa miRNAs DB,0.706260227,miRNAs,0.592867792,1,http://www.molgenv.com/ssa_mirnas_db_home.php,301,,,http://web.archive.org/web/20160130213527/http://molgenv.com/ssa_mirnas_db_home.php,2012-03-31,"Laboratory of Molecular Ecology, Genomics and Evolutionary Studies, Department of Biology, Faculty of Chemistry and Biology, Santiago de Chile University, Chile & Centro de Biotecnología Acuícola (CBA).","Reyes D, Cepeda V, González R, Vidal R",,,3.0,"Chile, Chile" +22701460,SPWP,0.99272126,SPWP,0.99272126,Seed Proteome Web Portal,0.962555528,1,http://www.seed-proteome.com,301,France,"(50.9871,2.12554)",http://web.archive.org/web/20221016204723/https://www.seed-proteome.com/,2012-06-11,"INRA, Jean-Pierre Bourgin Institute (IJPB, UMR1318 INRA-AgroParisTech), Laboratory of Excellence ""Saclay Plant Sciences"" (LabEx SPS); RD10, F-78026 Versailles France.","Galland M, Job D, Rajjou L",,,10.0,France +22759918,SSKB,0.872170428,SSKB,0.872170428,ren's Syndrome Knowledge Base,0.860038102,1,http://sskb.umn.edu,"HTTPConnectionPool(host='sskb.umn.edu', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to sskb.umn.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160321203914/http://sskb.umn.edu/,2012-07-03,"Department of Diagnostic and Biological Sciences, University of Minnesota School of Dentistry, Minneapolis, MN 55455, USA. sugorr@umn.edu","Gorr SU, Wennblom TJ, Horvath S, Wong DT, Michie SA",,"NIDCR NIH HHS, NIDCR NIH HHS",3.0,United States +22786849,Transcriptomine,0.981516123,Transcriptomine,0.981516123,,0,1,http://www.nursa.org/transcriptomine,301,,,http://web.archive.org/web/20220807131146/https://www.nursa.org/transcriptomine,2012-07-10,"Department of Molecular and Cellular Biology, Baylor College of Medicine, Houston, Texas 77030, USA.","Ochsner SA, Watkins CM, McOwiti A, Xu X, Darlington YF, Dehart MD, Cooney AJ, Steffen DL, Becnel LB, McKenna NJ",,"NCI NIH HHS, NIDDK NIH HHS",18.0,United States +22807998,SyStemCell,0.995095372,SyStemCell,0.995095372,,0,1,http://lifecenter.sgst.cn/SyStemCell,"HTTPConnectionPool(host='lifecenter.sgst.cn', port=80): Max retries exceeded with url: /SyStemCell (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160730230800/http://lifecenter.sgst.cn:80/SyStemCell/,2012-07-13,"Shanghai Center for Bioinformation Technology, Shanghai, China.","Yu J, Xing X, Zeng L, Sun J, Li W, Sun H, He Y, Li J, Zhang G, Wang C, Li Y, Xie L",,,6.0,China +22821489,SpectraBank,0.972535014,SpectraBank,0.972535014,,0,1,"http://www.spectrabank.org, http://bioinfo.thep.lu.se/speclust.html","302, HTTPConnectionPool(host='bioinfo.thep.lu.se', port=80): Max retries exceeded with url: /speclust.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,", ","no_wayback, http://web.archive.org/web/20140722060638/http://bioinfo.thep.lu.se/speclust.html",2012-07-01,"Department of Analytical Chemistry, Nutrition and Food Science, School of Veterinary Sciences, University of Santiago de Compostela, Lugo, Spain.","Böhme K, Fernández-No IC, Barros-Velázquez J, Gallardo JM, Cañas B, Calo-Mata P",,,24.0,Spain +23071747,TRIP,0.917790532,TRIP,0.917790532,TRansient receptor potential,0.710879376,1,http://www.trpchannel.org,405,,,http://web.archive.org/web/20220802134518/http://trpchannel.org/,2012-10-11,"Department of Physiology and Biomedical Sciences, Seoul National University College of Medicine, Seoul, Korea.","Shin YC, Shin SY, Chun JN, Cho HS, Lim JM, Kim HG, So I, Kwon D, Jeon JH",,,16.0, +23093603,TOPPR,0.997798244,TOPPR,0.997798244,The Online Protein Processing Resource,0.95862323,1,http://iomics.ugent.be/toppr,301,,,http://web.archive.org/web/20160929055346/http://iomics.ugent.be:80/toppr,2012-10-23,"Department of Medical Protein Research, VIB, Ghent University, A. Baertsoenkaai 3, B-9000 Ghent, Belgium.","Colaert N, Maddelein D, Impens F, Van Damme P, Plasman K, Helsens K, Hulstaert N, Vandekerckhove J, Gevaert K, Martens L",,,10.0,Belgium +23104376,SwissSidechain,0.996189356,SwissSidechain,0.996189356,,0,1,http://www.swisssidechain.ch,301,France,"(50.6917,3.20157)",http://web.archive.org/web/20220616080154/https://www.swisssidechain.ch/,2012-10-26,"Swiss Institute of Bioinformatics (SIB), Quartier Sorge, Bâtiment Génopode, CH-1015 Lausanne, Switzerland.","Gfeller D, Michielin O, Zoete V",,,44.0,Switzerland +23118479,SpliceAid-F,0.996136576,SpliceAid-F,0.996136576,,0,1,http://www.caspur.it/SpliceAidF,409,,,no_wayback,2012-10-30,"Department of Biosciences, University of Bari, Bari, Italy.","Giulietti M, Piva F, D'Antonio M, D'Onorio De Meo P, Paoletti D, Castrignanò T, D'Erchia AM, Picardi E, Zambelli F, Principato G, Pavesi G, Pesole G",,,66.0,Italy +23118483,Spliceosome,0.603129983,,0,Spliceosome,0.603129983,1,http://spliceosomedb.ucsc.edu,200,United States,"(37.0009,-122.061)",wayback is down,2012-10-30,"Department of Molecular, Cell and Developmental Biology and Center for Molecular Biology of RNA, University of California, 1156 High Street, Santa Cruz, CA 95064, USA.","Cvitkovic I, Jurica MS",,"NIGMS NIH HHS, NIGMS NIH HHS",60.0,United States +23161688,SwissBioisostere,0.985951066,SwissBioisostere,0.985951066,,0,1,http://www.swissbioisostere.ch,200,,,http://web.archive.org/web/20221016222310/http://swissbioisostere.ch/,2012-11-17,"Computational Chemistry, Merck Serono S.A., Chemin des Mines, 9, CH-1202 Geneva.","Wirth M, Zoete V, Michielin O, Sauer WH",,,27.0, +23180783,SwissRegulon,0.99072659,SwissRegulon,0.99072659,,0,1,http://swissregulon.unibas.ch,302,,,http://web.archive.org/web/20221017002339/https://www.swissregulon.unibas.ch/,2012-11-24,"Biozentrum, University of Basel, and Swiss Institute of Bioinformatics, Klingelbergstrasse 50/70, CH-4056 Basel, Switzerland.","Pachkov M, Balwierz PJ, Arnold P, Ozonov E, van Nimwegen E",,Swiss National Science Foundation,75.0,Switzerland +"23180787, 25161662",SUBA3,0.984873056,SUBA3,0.984873056,subcellular location database for Arabidopsis proteins,0.961620086,2,http://suba.plantenergy.uwa.edu.au,301,Canada,"(43.6532,-79.3832)",http://web.archive.org/web/20221108060217/https://suba.plantenergy.uwa.edu.au/,2014-08-12,"Centre of Excellence in Computational Systems Biology, The University of Western Australia, Perth, WA 6009, Australia., The Australian Research Council Centre of Excellence in Plant Energy Biology, The University of Western Australia Perth, WA, Australia.","Tanz SK, Castleden I, Hooper CM, Vacher M, Small I, Millar HA, Tanz SK, Castleden I, Hooper CM, Small I, Millar AH",", ",", ",176.0,"Australia, Australia, Australia, Australia" +"23180794, 29087517",TFClass,0.997942924,TFClass,0.997942924,,0,2,http://tfclass.bioinf.med.uni-goettingen.de,200,,,http://web.archive.org/web/20220503180127/http://tfclass.bioinf.med.uni-goettingen.de/,2018-01-01,"Department of Bioinformatics, University Medical Center Göttingen, Georg August University Göttingen, Goldschmidtstr. 1, D-37077 Göttingen, Germany. edgar.wingender@bioinf.med.uni-goettingen.de, Institute of Bioinformatics, University Medical Center Göttingen, Georg August University, D-37077 Göttingen, Germany.","Wingender E, Schoeps T, Dönitz J, Wingender E, Schoeps T, Haubrock M, Krull M, Dönitz J",", ","European Commission FP7, ",115.0,"Germany, Germany" +"23193266, 27899616",TissueNet,0.973481178,TissueNet,0.973481178,,0,2,http://netbio.bgu.ac.il/tissuenet,"HTTPConnectionPool(host='netbio.bgu.ac.il', port=80): Max retries exceeded with url: /tissuenet (Caused by ConnectTimeoutError(, 'Connection to netbio.bgu.ac.il timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220522072723/https://netbio.bgu.ac.il/tissuenet/,2016-11-29,"Department of Clinical Biochemistry, Ben-Gurion University of the Negev, Beer-Sheva 84105, Israel., Department of Clinical Biochemistry & Pharmacology, Faculty of Health Sciences, Ben-Gurion University of the Negev, Beer-Sheva 84105, Israel.","Barshir R, Basha O, Eluk A, Smoly IY, Lan A, Yeger-Lotem E, Basha O, Barshir R, Sharon M, Lerman E, Kirson BF, Hekselman I, Yeger-Lotem E",", ",", ",60.0,"Israel, Israel" +23193286,SpermatogenesisOnline,0.914023519,SpermatogenesisOnline,0.914023519,,0,1,http://mcg.ustc.edu.cn/sdap1/spermgenes,"HTTPConnectionPool(host='mcg.ustc.edu.cn', port=80): Max retries exceeded with url: /sdap1/spermgenes (Caused by ReadTimeoutError(""HTTPConnectionPool(host='mcg.ustc.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20170916182311/http://mcg.ustc.edu.cn:80/sdap1/spermgenes/,2012-11-28,"Hefei National Laboratory for Physical Sciences at Microscale and Department of Life Sciences, University of Science and Technology of China, Hefei 230027, China.","Zhang Y, Zhong L, Xu B, Yang Y, Ban R, Zhu J, Cooke HJ, Hao Q, Shi Q",,,20.0,"China, China" +23197656,TIGRFAMs,0.994315922,TIGRFAMs,0.994315922,,0,1,"http://www.jcvi.org/tigrfams, http://www.jcvi.org/genome-properties","301, 301",,", ","http://web.archive.org/web/20181127120838/https://www.jcvi.org/tigrfams, no_wayback",2012-11-28,"Informatics, J Craig Venter Institute, Rockville, MD 20850, USA. haft@jcvi.org","Haft DH, Selengut JD, Richter RA, Harkins D, Basu MK, Beck E",,NHGRI NIH HHS,249.0,United States +"23203871, 25352553, 27924014, 30476243, 33237311",STRING,0.995607018,STRING,0.995607018,,0,5,http://string-db.org,301,,,http://web.archive.org/web/20221101152903/https://string-db.org/,2021-01-01,"nan, Institute of Molecular Life Sciences and Swiss Institute of Bioinformatics, University of Zurich, 8057 Zurich, Switzerland., Institute of Molecular Life Sciences and Swiss Institute of Bioinformatics, University of Zurich, 8057 Zurich, Switzerland., nan, Department of Molecular Life Sciences and Swiss Institute of Bioinformatics, University of Zurich, 8057 Zurich, Switzerland.","nan, Szklarczyk D, Franceschini A, Wyder S, Forslund K, Heller D, Huerta-Cepas J, Simonovic M, Roth A, Santos A, Tsafou KP, Kuhn M, Bork P, Jensen LJ, von Mering C, Szklarczyk D, Morris JH, Cook H, Kuhn M, Wyder S, Simonovic M, Santos A, Doncheva NT, Roth A, Bork P, Jensen LJ, von Mering C, nan, Szklarczyk D, Gable AL, Nastou KC, Lyon D, Kirsch R, Pyysalo S, Doncheva NT, Legeay M, Fang T, Bork P, Jensen LJ, von Mering C","nan, , , nan, ","nan, Novo Nordisk Foundation Center for Protein Research, NIGMS NIH HHS, NIGMS NIH HHS, Novo Nordisk Foundation Center for Protein Research, nan, Academy of Finland, Novo Nordisk Foundation Center for Protein Research, Novo Nordisk Foundation",7432.0,"Switzerland, Switzerland, Switzerland" +"23203875, 29106634",TCMID,0.992202723,TCMID,0.992202723,Traditional Chinese Medicine Integrated Database,0.9660478,2,http://www.megabionet.org/tcmid,301,,,http://web.archive.org/web/20190420165210/http://www.megabionet.org/tcmid/,2018-01-01,"nan, TCM Clinical Basis Institute, Zhejiang Chinese Medicine University, Zhejiang 310000, China.","nan, Huang L, Xie D, Yu Y, Liu H, Shi Y, Shi T, Wen C","nan, ","nan, ",86.0,China +23284086,TreeTFDB,0.998396933,TreeTFDB,0.998396933,,0,1,http://treetfdb.bmep.riken.jp/index.pl,"HTTPConnectionPool(host='treetfdb.bmep.riken.jp', port=80): Max retries exceeded with url: /index.pl (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20160216020834/http://treetfdb.bmep.riken.jp:80/index.pl,2013-01-02,,,,,0.0, +23284744,StRAP,0.99467355,StRAP,0.99467355,Stress Response Array Profiler,0.947469604,1,http://strap.nci.nih.gov,301,,,no_wayback,2012-12-17,,,,,0.0, +23314754,STIFDB2,0.995890498,STIFDB2,0.995890498,,0,1,http://caps.ncbs.res.in/stifdb2,301,India,"(12.9634,77.5855)",http://web.archive.org/web/20220827111521/http://caps.ncbs.res.in/stifdb2/,2013-01-10,,,,,0.0, +23406793,T-HOD,0.995550072,T-HOD,0.995550072,Text-mined Hypertension,0.958812918,1,http://bws.iis.sinica.edu.tw/THOD,"HTTPConnectionPool(host='bws.iis.sinica.edu.tw', port=80): Max retries exceeded with url: /THOD (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200223033520/http://bws.iis.sinica.edu.tw:80/THOD/,2013-02-12,,,,,0.0, +23482072,TetraFGD,0.996410549,TetraFGD,0.996410549,Tetrahymena functional genomics database,0.951898682,1,http://tfgd.ihb.ac.cn,200,,,http://web.archive.org/web/20220615145245/http://tfgd.ihb.ac.cn/,2013-03-12,,,,,0.0, +23497177,SymbioGBR,0.994127393,SymbioGBR,0.994127393,,0,1,http://www.SymbioGBR.org,500,,,http://web.archive.org/web/20210411045654/http://www.symbiogbr.org/,2013-03-13,,,,,0.0, +23515433,TIARA,0.992577295,TIARA,0.992577295,Total Integrated Archive of short-Read and Array,0.969094998,1,http://tiara.gmi.ac.kr,"HTTPConnectionPool(host='tiara.gmi.ac.kr', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='tiara.gmi.ac.kr', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220525101758/http://tiara.gmi.ac.kr/,2013-03-20,"Cancer Genomics Branch, Division of Convergence Technology, National Cancer Center, Gyeonggi-do 410-769, Korea.","Hong D, Lee J, Bleazard T, Jung H, Ju YS, Yu SB, Kim S, Park SS, Kim JI, Seo JS",,,4.0, +23547943,TRACER,0.993061364,TRACER,0.993061364,,0,1,http://tracerdatabase.embl.de,302,,,no_wayback,2013-04-02,,,,,0.0, +23550212,switches.ELM,0.973136947,switches.ELM,0.973136947,,0,1,http://switches.elm.eu.org,200,Germany,"(49.4071,8.6879)",http://web.archive.org/web/20221017110452/http://switches.elm.eu.org/,2013-04-02,"Structural and Computational Biology Unit, European Molecular Biology Laboratory (EMBL), Meyerhofstrasse 1, D-69117 Heidelberg, Germany.","Van Roey K, Dinkel H, Weatheritt RJ, Gibson TJ, Davey NE",,,64.0,Germany +23698860,TiPs,0.989953816,TiPs,0.989953816,,0,1,http://biocomputing.it/tips,301,,,http://web.archive.org/web/20140105043442/http://biocomputing.it:80/tips/,2013-05-21,"Department of Physics, Sapienza University, 00185 Rome, Italy.","Lepore R, Tramontano A, Via A",,,1.0,Italy +23766369,TIMBAL,0.996844351,TIMBAL,0.996844351,,0,1,http://www-cryst.bioc.cam.ac.uk/timbal,"HTTPConnectionPool(host='www-cryst.bioc.cam.ac.uk', port=80): Max retries exceeded with url: /timbal (Caused by ConnectTimeoutError(, 'Connection to www-cryst.bioc.cam.ac.uk timed out. (connect timeout=5)'))",,,no_wayback,2013-06-13,"Department of Biochemistry, University of Cambridge, Cambridge CB1 2GA, UK. alicia@cryst.bioc.cam.ac.uk","Higueruelo AP, Jubb H, Blundell TL",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",22.0, +23846596,T2D@ZJU,0.931308525,T2D@ZJU,0.931308525,,0,1,http://tcm.zju.edu.cn/t2d,404,,,http://web.archive.org/web/20140722210625/http://tcm.zju.edu.cn/t2d,2013-07-11,"Pharmaceutical Informatics Institute, College of Pharmaceutical Sciences, Zhejiang University, Hangzhou 310058, China.","Yang Z, Yang J, Liu W, Wu L, Xing L, Wang Y, Fan X, Cheng Y",,,10.0,China +24013925,targetHub,0.947265625,targetHub,0.947265625,,0,1,http://app1.bioinformatics.mdanderson.org/tarhub/_design/basic/index.html,302,,,http://web.archive.org/web/20220406031359/https://app1.bioinformatics.mdanderson.org/tarhub/_design/basic/index.html,2013-09-06,"Department of Bioinformatics and Computational Biology, Department of Gynecologic Oncology and Reproductive Medicine and Department of Experimental Therapeutics, The University of Texas M.D. Anderson Cancer Center, Houston, TX 77030, USA.","Manyam G, Ivan C, Calin GA, Calin GA, Coombes KR",,"NCI NIH HHS, NCI NIH HHS",7.0,United States +24066126,SurvExpress,0.972360611,SurvExpress,0.972360611,,0,1,http://bioinformatica.mty.itesm.mx/SurvExpress,"HTTPConnectionPool(host='bioinformatica.mty.itesm.mx', port=80): Max retries exceeded with url: /SurvExpress (Caused by ConnectTimeoutError(, 'Connection to bioinformatica.mty.itesm.mx timed out. (connect timeout=5)'))",,,no_wayback,2013-09-16,"Cátedra de Bioinformática, Tecnológico de Monterrey, Monterrey, Nuevo León, México.","Aguirre-Gamboa R, Gomez-Rueda H, Martínez-Ledesma E, Martínez-Torteya A, Chacolla-Huaringa R, Rodriguez-Barrientos A, Tamez-Peña JG, Treviño V",,,393.0, +24194607,TreeFam,0.990964532,TreeFam,0.990964532,,0,1,http://www.treefam.org,200,,,http://web.archive.org/web/20220913003958/http://www.treefam.org/,2013-11-04,"Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SA, UK and European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Schreiber F, Patricio M, Muffato M, Pignatelli M, Bateman A",,"Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust",73.0, +24203712,TISdb,0.993791044,TISdb,0.993791044,,0,1,http://tisdb.human.cornell.edu,200,,,http://web.archive.org/web/20221022041837/http://tisdb.human.cornell.edu/,2013-11-06,"Division of Nutritional Sciences, Cornell University, Ithaca, NY 14853, USA.","Wan J, Qian SB",,"NIH HHS, NIA NIH HHS",43.0,United States +24223973,SWEETLEAD,0.981075227,SWEETLEAD,0.981075227,,0,1,http://simtk.org/home/sweetlead,301,,,http://web.archive.org/web/20150908040554/https://simtk.org/home/sweetlead,2013-11-01,"Department of Chemistry, Stanford University, Stanford, California, United States of America.","Novick PA, Ortiz OF, Poelman J, Abdulhay AY, Pande VS",,"NCRR NIH HHS, NIGMS NIH HHS, NCATS NIH HHS, NCATS NIH HHS",27.0,United States +"24225317, 26546518",TCDB,0.996765494,TCDB,0.996765494,The Transporter Classification Database,0.959318292,2,http://www.tcdb.org,302,,,http://web.archive.org/web/20221108155811/https://tcdb.org/,2015-11-05,"Department of Molecular Biology, University of California at San Diego, La Jolla, CA 92093-0116, USA., Department of Molecular Biology, University of California at San Diego, La Jolla, CA 92093-0116, USA msaier@ucsd.edu.","Saier MH Jr, Reddy VS, Tamang DG, Västermark A, Saier MH Jr, Reddy VS, Tsu BV, Ahmed MS, Li C, Moreno-Hagelsieb G",", ","NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",496.0,"United States, United States" +24271391,SuperPain,0.974839449,SuperPain,0.974839449,,0,1,http://bioinformatics.charite.de/superpain,301,Germany,"(52.4422,13.3217)",http://web.archive.org/web/20210615222111/https://bioinformatics.charite.de/superpain/,2013-11-22,"German Cancer Consortium (DKTK), Lindenberger Weg 80, 13125 Berlin, Germany, Charité - Universitätsmedizin Berlin, Structural Bioinformatics Group, Lindenberger Weg 80, 13125 Berlin, Germany and Dental, Oral and Maxillary Medicine, Charité - Universitätsmedizin Berlin, CC3, Assmannshauser Straße 4-6, 14197 Berlin, Germany.","Gohlke BO, Preissner R, Preissner S",,,5.0,"Germany, Germany, Germany" +24273012,SpliceProt,0.991778851,SpliceProt,0.991778851,,0,1,http://lbbc.inca.gov.br/spliceprot,301,Brazil,"(-22.9201,-43.0811)",no_wayback,2014-02-01,"Bioinformatics Unit, Clinical Research Coordination, Instituto Nacional de Câncer (INCA), Rio de Janeiro, Brazil.","Tavares R, de Miranda Scherer N, Pauletti BA, Araújo E, Folador EL, Espindola G, Ferreira CG, Paes Leme AF, de Oliveira PS, Passetti F",,"Fundação Carlos Chagas Filho de Amparo à Pesquisa do Estado do Rio de Janeiro, Coordenação de Aperfeiçoamento de Pessoal de Nivel Superior, Vice-Presidência de Ensino, Conselho Nacional de Desenvolvimento Científico e Tecnológico, Ministério da Ciência e Tecnologia/Fundo Setorial de Saúde",8.0,Brazil +24334957,Transformer,0.906411409,Transformer,0.906411409,,0,1,http://bioinformatics.charite.de/transformer,301,,,http://web.archive.org/web/20210615072025/https://bioinformatics.charite.de/transformer/,2013-12-10,"Structural Bioinformatics Group, Institute for Physiology & ECRC, Charité - Universitätsmedizin Berlin, Lindenberger Weg 80, 13125 Berlin, Germany and Department of Operative and Preventive Dentistry, Charité - Universitätsmedizin Berlin, Assmannshauser Str. 4-6, 14197 Berlin, Germany.","Hoffmann MF, Preissner SC, Nickel J, Dunkel M, Preissner R, Preissner S",,,27.0,"Germany, Germany" +24371150,tasiRNAdb,0.994892418,tasiRNAdb,0.994892418,,0,1,http://bioinfo.jit.edu.cn/tasiRNADatabase,"HTTPConnectionPool(host='bioinfo.jit.edu.cn', port=80): Max retries exceeded with url: /tasiRNADatabase (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190913204624/http://bioinfo.jit.edu.cn:80/tasiRNADatabase/,2013-12-25,"College of Horticulture, Jinling Institute of Technology, Nanjing 210038, China, College of Forest Resources and Environment, Nanjing Forestry University, Nanjing 210037, China and College of Horticulture, Nanjing Agricultural University, Nanjing 210095, China.","Zhang C, Li G, Zhu S, Zhang S, Fang J",,,28.0,"China, China, China" +24408216,tbvar,0.995915771,tbvar,0.995915771,,0,1,http://genome.igib.res.in/tbvar,301,,,http://web.archive.org/web/20220706231516/http://genome.igib.res.in/tbvar/,2014-01-09,"CSIR Open Source Drug Discovery Unit, Anusandhan Bhawan, Delhi 110001, India; Academy of Scientific and Innovative Research (AcSIR), Anusandhan Bhawan, Delhi 110001, India; Department of Biotechnology, Delhi Technological University, Bawana Road, Delhi 110042, India and GN Ramachandran Knowledge Center for Genome Informatics, CSIR Institute of Genomics and Integrative Biology (CSIR-IGIB), Mall Road, Delhi 110007, India.","Joshi KR, Dhiman H, Scaria V",,,12.0,"India, India, India, India" +24466070,TGRD,0.996569216,TGRD,0.996569216,Tomato Genomic Resources Database,0.988132167,1,http://59.163.192.91/tomato2,"HTTPConnectionPool(host='59.163.192.91', port=80): Max retries exceeded with url: /tomato2 (Caused by ConnectTimeoutError(, 'Connection to 59.163.192.91 timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20140726095957/http://59.163.192.91/tomato2/,2014-01-21,"National Institute of Plant Genome Research, Aruna Asaf Ali Marg, New Delhi, India.","Suresh BV, Roy R, Sahu K, Misra G, Chattopadhyay D",,,21.0,India +24578355,StaphyloBase,0.997347653,StaphyloBase,0.997347653,,0,1,http://staphylococcus.um.edu.my,"HTTPConnectionPool(host='staphylococcus.um.edu.my', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to staphylococcus.um.edu.my timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20161116071553/http://staphylococcus.um.edu.my:80/,2014-02-26,"Genome Informatics Research Laboratory, HIR Building, University of Malaya, 50603 Kuala Lumpur, Malaysia, Department of Software Engineering, Faculty of Computer Science and Information Technology, University of Malaya, 50603 Kuala Lumpur, Malaysia, Department of Oral Biology and Biomedical Sciences, Faculty of Dentistry, University of Malaya, 50603 Kuala Lumpur, Malaysia and Department of Computer System and Technology, Faculty of Computer Science and Information Technology, University of Malaya, 50603 Kuala Lumpur, Malaysia.","Heydari H, Mutha NV, Mahmud MI, Siow CC, Wee WY, Wong GJ, Yazdi AH, Ang MY, Choo SW",,,3.0,"Malaysia, Malaysia, Malaysia, Malaysia" +24675620,TIBS,0.980226338,TIBS,0.980226338,Transcriptome of Irritable Bowel Syndrome,0.862030645,1,http://www.chengfeng.info/tibs_database.html,200,,,http://web.archive.org/web/20220620093620/http://www.chengfeng.info/tibs_database.html,2014-03-24,"Institute of First Clinical Medicine, Nanjing University of Chinese Medicine, Nanjing 210046, Jiangsu, People Republic of China. Electronic address: yanjing0513@126.com.","Yan J, Xu Y, Hu B, Alnajm S, Liu L, Lu Y, Sun Z, Cheng F",,,1.0,China +24705204,SysPTM,0.994190574,SysPTM,0.994190574,,0,1,http://lifecenter.sgst.cn/SysPTM,"HTTPConnectionPool(host='lifecenter.sgst.cn', port=80): Max retries exceeded with url: /SysPTM (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20180830182852/http://lifecenter.sgst.cn:80/SysPTM/,2014-04-03,"Key Laboratory of Biomedical Photonics of Ministry of Education, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan 430074, P. R. China, Shanghai Center for Bioinformation Technology, Shanghai Institutes of Biomedicine, Shanghai Academy of Science and Technology, Shanghai 201203, P. R. China, Britton Chance Center for Biomedical Photonics, Wuhan National Laboratory for Optoelectronics, Huazhong University of Science and Technology, Wuhan 430074, P. R. China, Department of Bioinformatics and Biostatistics, Shanghai Jiaotong University, Shanghai 200240, P. R. China, Key Laboratory of Systems Biology, Chinese Academy of Sciences, Shanghai 200031, P. R. China and Proteome Center Rostock, Department for Proteome Research, Institute of Immunology, University of Rostock, Rostock 18055, Germany.","Li J, Jia J, Li H, Yu J, Sun H, He Y, Lv D, Yang X, Glocker MO, Ma L, Yang J, Li L, Li W, Zhang G, Liu Q, Li Y, Xie L",,,20.0,"China, China, China, China, China, Germany" +24735618,TCMSP,0.992832065,TCMSP,0.992832065,traditional Chinese medicine systems pharmacology database,0.870840984,1,http://sm.nwsuaf.edu.cn/lsp/tcmsp.php,301,,,http://web.archive.org/web/20140808220324/http://sm.nwsuaf.edu.cn:80/lsp/tcmsp.php,2014-04-16,"Center for Bioinformatics, College of Life Science, Northwest A&F University, Yangling, Shaanxi 712100, China.","Ru J, Li P, Wang J, Zhou W, Li B, Huang C, Li P, Guo Z, Tao W, Yang Y, Xu X, Li Y, Wang Y, Yang L",,,890.0,China +24822057,tRNADB-CE,0.994249242,tRNADB-CE,0.994249242,,0,1,http://trna.ie.niigata-u.ac.jp,200,,,http://web.archive.org/web/20220616050909/http://trna.ie.niigata-u.ac.jp/,2014-05-01,"Graduate School of Science and Technology, Niigata University Niigata, Japan.","Abe T, Inokuchi H, Yamada Y, Muto A, Iwasaki Y, Ikemura T",,,28.0,Japan +24930145,TIPdb,0.994740725,TIPdb,0.994740725,,0,1,http://cwtung.kmu.edu.tw/tipdb,301,,,http://web.archive.org/web/20220615141116/https://cwtung.kmu.edu.tw/tipdb/,2014-06-13,"School of Pharmacy, Kaohsiung Medical University, Kaohsiung 80708, Taiwan, PhD Program in Toxicology, Kaohsiung Medical University, Kaohsiung 80708, Taiwan and National Environmental Health Research Center, National Health Research Institutes, Miaoli County 35053, TaiwanSchool of Pharmacy, Kaohsiung Medical University, Kaohsiung 80708, Taiwan, PhD Program in Toxicology, Kaohsiung Medical University, Kaohsiung 80708, Taiwan and National Environmental Health Research Center, National Health Research Institutes, Miaoli County 35053, TaiwanSchool of Pharmacy, Kaohsiung Medical University, Kaohsiung 80708, Taiwan, PhD Program in Toxicology, Kaohsiung Medical University, Kaohsiung 80708, Taiwan and National Environmental Health Research Center, National Health Research Institutes, Miaoli County 35053, Taiwan cwtung@kmu.edu.tw.","Tung CW, Lin YC, Chang HS, Wang CC, Chen IS, Jheng JL, Li JH",,,8.0, +25157689,STATdb,0.997418284,STATdb,0.997418284,,0,1,http://statdb.bic.nus.edu.sg,"HTTPConnectionPool(host='statdb.bic.nus.edu.sg', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to statdb.bic.nus.edu.sg timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20110612063544/http://statdb.bic.nus.edu.sg:80/,2014-08-26,"Department of Biochemistry, Yong Loo Lin School of Medicine, National University of Singapore, Singapore, Singapore.","Patro CP, Khan AM, Tan TW, Fu XY",,,0.0,"Singapore, Singapore, Singapore" +25187689,TRIPATH,0.990485728,TRIPATH,0.990485728,of,0.707948923,1,http://www.gbpuat-cbsh.ac.in/departments/bi/database/tripath,404,,,http://web.archive.org/web/20220617044324/http://www.gbpuat-cbsh.ac.in/departments/bi/database/tripath/,2014-07-22,"Department of Molecular Biology & Genetic Engineering, College of Basic Sciences & Humanities, G.B. Pant University of Agriculture & Technology, Pantnagar-263 145 (India).","Garg S, Pandey D, Taj G, Goel A, Kumar A",,,3.0,India +25224438,Tea Metabolome database,0.978793991,TMDB,0.973765627,Tea Metabolome database,0.978793991,1,http://pcsb.ahau.edu.cn:8080/TCDB/index.jsp,500,,,http://web.archive.org/web/20170722135137/http://pcsb.ahau.edu.cn:8080/TCDB/index.jsp,2014-09-16,None,"Yue Y, Chu GX, Liu XS, Tang X, Wang W, Liu GJ, Yang T, Ling TJ, Wang XG, Zhang ZZ, Xia T, Wan XC, Bao GH",,,21.0, +25269378,SPGDB,0.997126877,SPGDB,0.997126877,Streptococcus pneumoniae Genome Database,0.974226971,1,http://pranag.physics.iisc.ernet.in/SPGDB,301,,,http://web.archive.org/web/20220120091010/http://pranag.physics.iisc.ernet.in/SPGDB/,2014-09-28,"Medical & Biological Computing Laboratory, School of Biosciences and Technology, VIT University, Vellore 632 014, India.","Swetha RG, Sekar DK, Devi ED, Ahmed ZZ, Ramaiah S, Anbarasu A, Sekar K",,Indian Council of Medical Research,2.0,India +25300487,Super Natural II,0.813094119,Super Natural II,0.813094119,,0,1,http://bioinformatics.charite.de/supernatural,301,,,http://web.archive.org/web/20220910010500/https://bioinformatics.charite.de/supernatural/,2014-10-09,"Structural Bioinformatics Group, Charite-University Medicine Berlin, Institute of Physiology, Lindenberger Weg 80, 13125 Berlin, Germany Graduate School of Computational Systems Biology, Humboldt-Universität zu Berlin Invalidenstrasse 42, 10115 Berlin, Germany.","Banerjee P, Erehman J, Gohlke BO, Wilhelm T, Preissner R, Dunkel M",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",75.0,"Germany, Germany" +25332401,TopFIND,0.99184984,TopFIND,0.99184984,,0,1,http://clipserve.clip.ubc.ca/topfind,301,,,http://web.archive.org/web/20200220183759/http://clipserve.clip.ubc.ca:80/topfind/,2014-10-20,"Department of Biochemistry and Molecular Biology, University of British Columbia, Vancouver, British Columbia, Canada Department of Oral Biological and Medical Sciences, University of British Columbia, Vancouver, British Columbia, Canada Centre for Blood Research, University of British Columbia, Vancouver, British Columbia, Canada Centre for High Throughput Biology, University of British Columbia, Vancouver, British Columbia, Canada.","Fortelny N, Yang S, Pavlidis P, Lange PF, Overall CM",,Canadian Institutes of Health Research,38.0,"Canada, Canada, Canada, Canada" +25378311,tmRNA Website,0.91520524,tmRNA Website,0.91520524,,0,1,http://bioinformatics.sandia.gov/tmrna,302,,,http://web.archive.org/web/20180604123222/http://bioinformatics.sandia.gov:80/tmrna/,2014-11-05,"Sandia National Laboratories, Department of Systems Biology, Livermore, CA 94551, USA.","Hudson CM, Williams KP",,,12.0,United States +25392422,tRFdb,0.998105168,tRFdb,0.998105168,,0,1,http://genome.bioch.virginia.edu/trfdb,301,,,http://web.archive.org/web/20220912074317/http://genome.bioch.virginia.edu/trfdb/,2014-11-11,"Department of Biochemistry and Molecular Genetics, University of Virginia School of Medicine, Charlottesville, VA 22901, USA.","Kumar P, Mudunuri SB, Anaya J, Dutta A",,"NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",102.0,United States +25392424,TOPDB,0.997721473,TOPDB,0.997721473,Topology Data Bank of Transmembrane Proteins,0.98918283,1,http://topdb.enzim.ttk.mta.hu,"HTTPConnectionPool(host='topdb.enzim.ttk.mta.hu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-11-11,"'Momentum' Membrane Protein Bioinformatics Research Group, Institute of Enzymology, RCNS, HAS, Budapest PO Box 7, H-1518, Hungary.","Dobson L, Langó T, Reményi I, Tusnády GE",,,17.0,Hungary +25404137,SuperFly,0.996205926,SuperFly,0.996205926,,0,1,http://superfly.crg.eu,200,Spain,"(41.387,2.1701)",http://web.archive.org/web/20221018055711/http://superfly.crg.eu/,2014-11-17,"EMBL/CRG Research Unit in Systems Biology, Centre for Genomic Regulation (CRG), 08003 Barcelona, Spain Bioinformatics Core Facility, Centre for Genomic Regulation (CRG), 08003 Barcelona, Spain.","Cicin-Sain D, Pulido AH, Crombach A, Wotton KR, Jiménez-Guri E, Taly JF, Roma G, Jaeger J",,,5.0,"Spain, Spain" +25405079,Splooce,0.992536426,Splooce,0.992536426,,0,1,http://www.bioinformatics-brazil.org/splooce,404,,,http://web.archive.org/web/20160808004534/http://www.bioinformatics-brazil.org:80/splooce/,2014-11-13,"Institute of Bioinformatics and Biotechnology , Natal , Brazil ; Brain Institute, UFRN , Natal , Brazil.","Kroll JE, de Souza SJ, de Souza GA",,,2.0,"Brazil, Brazil" +25413576,TreeshrewDB,0.991575599,TreeshrewDB,0.991575599,Tree shrew database,0.932710469,1,http://www.treeshrewdb.org,200,,,http://web.archive.org/web/20220303045618/http://www.treeshrewdb.org/,2014-11-21,"1] Key Laboratory of Animal Models and Human Disease Mechanisms of Chinese Academy of Sciences and Yunnan Province, Kunming Institute of Zoology, Kunming, Yunnan 650223, China [2] Kunming College of Life Science, University of Chinese Academy of Sciences, Kunming, Yunnan 650223, China.","Fan Y, Yu D, Yao YG",,,20.0,"China, China" +"25414345, 30445555",SUPERFAMILY,0.993269384,SUPERFAMILY,0.993269384,,0,2,http://supfam.org,301,United Kingdom,"(52.1932,0.1426)",http://web.archive.org/web/20221012022522/https://www.supfam.org/,2019-01-01,"Computer Science, University of Bristol, Bristol, BS8 1UB, UK Matt.Oates@bristol.ac.uk., MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, UK.","Oates ME, Stahlhacke J, Vavoulis DV, Smithers B, Rackham OJ, Sardar AJ, Zaucha J, Thurlby N, Fang H, Gough J, Pandurangan AP, Stahlhacke J, Oates ME, Smithers B, Gough J",", ","Engineering and Physical Sciences Research Council, Biotechnology and Biological Sciences Research Council, Engineering and Physical Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Medical Research Council, Biotechnology and Biological Sciences Research Council",91.0, +25627341,SynBioLGDB,0.994347095,SynBioLGDB,0.994347095,,0,1,http://bioinformatics.ac.cn/synbiolgdb,"HTTPConnectionPool(host='bioinformatics.ac.cn', port=80): Max retries exceeded with url: /synbiolgdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170507195453/http://bioinformatics.ac.cn:80/synbiolgdb/,2015-01-28,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.","Wang L, Qian K, Huang Y, Jin N, Lai H, Zhang T, Li C, Zhang C, Bi X, Wu D, Wang C, Wu H, Tan P, Lu J, Chen L, Li K, Li X, Wang D",,,5.0,China +25792605,TeloPIN,0.992713809,TeloPIN,0.992713809,Telomeric Proteins Interaction Network,0.977430391,1,http://songyanglab.sysu.edu.cn/telopin,"HTTPConnectionPool(host='songyanglab.sysu.edu.cn', port=80): Max retries exceeded with url: /telopin (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170106011713/http://songyanglab.sysu.edu.cn:80/telopin/,2015-03-18,"Key Laboratory of Gene Engineering of the Ministry of Education and State Key Laboratory of Biocontrol, School of Life Sciences, Sun Yat-Sen University, Guangzhou 510006, China, Department of Electronics and Communication Engineering, School of Information Science and Technology, Sun Yat-Sen University, Guangzhou, China; SYSU-CMU Shunde International Joint Research Institute (JRI) Shunde, Guangdong, China; Cell-Based Assay Screening Core, Dan L. Duncan Cancer Center, Verna and Marrs Mclean Department of Biochemistry and Molecular Biology, Baylor College of Medicine, One Baylor Plaza, Houston, TX 77030, USA, and Key Laboratory of Reproductive Medicine of Guangdong Province, Guangzhou, China.","Luo Z, Dai Z, Xie X, Feng X, Liu D, Songyang Z, Xiong Y",,,6.0,"China, China, China, China, United States" +25805861,SRD,0.972883582,SRD,0.972883582,Staphylococcal Regulatory RNA Database,0.963276863,1,http://srd.genouest.org,200,France,"(48.8254,2.13054)",http://web.archive.org/web/20220328075326/http://srd.genouest.org/,2015-03-24,"Inserm U835 Biochimie Pharmaceutique, Rennes University, 35043 Rennes, France.","Sassi M, Augagneur Y, Mauro T, Ivain L, Chabelskaya S, Hallier M, Sallou O, Felden B",,Marie Curie International Incoming Fellowship,37.0,France +25932650,TMREC,0.997089624,TMREC,0.997089624,,0,1,http://bioinfo.hrbmu.edu.cn/TMREC,"HTTPConnectionPool(host='bioinfo.hrbmu.edu.cn', port=80): Max retries exceeded with url: /TMREC (Caused by ConnectTimeoutError(, 'Connection to bioinfo.hrbmu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200128172814/http://bioinfo.hrbmu.edu.cn:80/TMREC/,2015-05-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, 150081, P. R. China.","Wang S, Li W, Lian B, Liu X, Zhang Y, Dai E, Yu X, Meng F, Jiang W, Li X",,,11.0,China +25943471,SwissLipids,0.992622912,SwissLipids,0.992622912,,0,1,http://www.swisslipids.org,200,Switzerland,"(46.5163,6.5802)",http://web.archive.org/web/20221017020216/https://www.swisslipids.org/,2015-05-05,"Swiss-Prot Group, SIB Swiss Institute of Bioinformatics, CMU, 1 rue Michel-Servet, CH-1211 Geneva 4, Switzerland.","Aimo L, Liechti R, Hyka-Nouspikel N, Niknejad A, Gleizes A, Götz L, Kuznetsov D, David FP, van der Goot FG, Riezman H, Bougueleret L, Xenarios I, Bridge A",,,44.0,Switzerland +26005672,SPECTRA,0.996715844,SPECTRA,0.996715844,,0,1,http://alpha.dmi.unict.it/spectra,301,,,http://web.archive.org/web/20221016215357/https://alpha.dmi.unict.it/spectra/,2015-05-08,"Department of Computer Science, University of Pisa , Pisa , Italy.","Micale G, Ferro A, Pulvirenti A, Giugno R",,,4.0,Italy +26123534,The Mouse Genomes Project,0.571499872,,0,The Mouse Genomes Project,0.571499872,1,http://www.sanger.ac.uk/resources/mouse/genomes,301,,,http://web.archive.org/web/20151030072359/http://www.sanger.ac.uk/resources/mouse/genomes/,2015-06-30,"Wellcome Trust Sanger Institute, Hinxton, Cambridge, CB10 1HH, UK. da1@sanger.ac.uk.","Adams DJ, Doran AG, Lilue J, Keane TM",,"Biotechnology and Biological Sciences Research Council, Wellcome Trust, Medical Research Council, Cancer Research UK",43.0, +26131021,TaxKB,0.994036555,TaxKB,0.994036555,Taxane knowledge base,0.832966849,1,http://bioinfo.au-kbc.org.in/taxane/Taxkb,301,,,http://web.archive.org/web/20220617195601/http://bioinfo.au-kbc.org.in/taxane/Taxkb/,2015-06-28,"Department of Botany and Microbiology, College of Science, King Saud University, P.O. Box 2455, Riyadh, 11451 Saudi Arabia.","Murugan K, Shanmugasamy S, Al-Sohaibani S, Vignesh N, Palanikannan K, Vimala A, Kumar GR",,,3.0,Saudi Arabia +26208906,TANRIC,0.997347057,TANRIC,0.997347057,he Atlas of Noncoding RNAs in Cancer,0.930311513,1,http://bioinformatics.mdanderson.org/main/TANRIC:Overview,302,,,http://web.archive.org/web/20181015203922/http://bioinformatics.mdanderson.org:80/main/TANRIC:Overview,2015-07-24,"Department of Bioinformatics and Computational Biology, The University of Texas MD Anderson Cancer Center, Houston, Texas.","Li J, Han L, Roebuck P, Diao L, Liu L, Yuan Y, Weinstein JN, Liang H",,"NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS",269.0, +26220682,SpirPro,0.993724525,SpirPro,0.993724525,,0,1,http://spirpro.sbi.kmutt.ac.th,"HTTPConnectionPool(host='spirpro.sbi.kmutt.ac.th', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to spirpro.sbi.kmutt.ac.th timed out. (connect timeout=5)'))",,,no_wayback,2015-07-29,"Biochemical Engineering and Pilot Plant Research and Development (BEC) Unit, National Center for Genetic Engineering and Biotechnology, National Science and Technology Development Agency at King Mongkut's University of Technology Thonburi, 49 Soi Thian Thalae 25, Bang Khun Thian Chai Thalae Rd., Tha Kham, Bang Khun Thian, Bangkok, 10150, Thailand. jittisak.sen@biotec.or.th.","Senachak J, Cheevadhanarak S, Hongsthong A",,,6.0,Thailand +26249811,SurvCurv,0.998165548,SurvCurv,0.998165548,,0,1,http://www.ebi.ac.uk/thornton-srv/databases/SurvCurv,301,United Kingdom,"(52.1929,0.1256)",http://web.archive.org/web/20220123002001/https://www.ebi.ac.uk/thornton-srv/databases/SurvCurv/,2015-08-06,"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK, Department of Genetics, Evolution and Environment, The Institute of Healthy Ageing, University College London, London WC1E 6BT, UK and.","Ziehm M, Ivanov DK, Bhat A, Partridge L, Thornton JM",,"Wellcome Trust, Wellcome Trust, PEPFAR",4.0, +26339475,SwissPalm,0.997897446,SwissPalm,0.997897446,Protein Palmitoylation database,0.7939366,1,http://swisspalm.epfl.ch,302,,,http://web.archive.org/web/20180326162440/http://swisspalm.epfl.ch:80/,2015-07-16,"Global Health Institute, School of Life Sciences, Ecole Polytechnique Fédérale de Lausanne (EPFL), Lausanne, CH-1015, Switzerland.","Blanc M, David F, Abrami L, Migliozzi D, Armand F, Bürgi J, van der Goot FG",,European Research Council,87.0,Switzerland +26503244,sRNATarBase,0.988428116,sRNATarBase,0.988428116,,0,1,http://ccb1.bmi.ac.cn/srnatarbase,"HTTPConnectionPool(host='ccb1.bmi.ac.cn', port=80): Max retries exceeded with url: /srnatarbase (Caused by ConnectTimeoutError(, 'Connection to ccb1.bmi.ac.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20170208142521/http://ccb1.bmi.ac.cn/srnatarbase/,2015-10-25,"Center of Computational Biology, Beijing Institute of Basic Medical Sciences, Haidian district, Beijing 100850, China.","Wang J, Liu T, Zhao B, Lu Q, Wang Z, Cao Y, Li W",,,21.0,China +26516187,SynLethDB,0.998530388,SynLethDB,0.998530388,,0,1,http://histone.sce.ntu.edu.sg/SynLethDB,"HTTPConnectionPool(host='histone.sce.ntu.edu.sg', port=80): Max retries exceeded with url: /SynLethDB (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190313051129/http://histone.sce.ntu.edu.sg:80/SynLethDB/,2015-10-29,"School of Computer Engineering, Nanyang Technological University, Singapore 639798, Singapore.","Guo J, Liu H, Zheng J",,"NCRR NIH HHS, NCRR NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",43.0,"Singapore, Singapore" +26578555,SugarBindDB,0.997786748,SugarBindDB,0.997786748,,0,1,http://sugarbind.expasy.org,301,Switzerland,"(46.5184,6.6436)",http://web.archive.org/web/20220825212720/https://sugarbind.expasy.org/,2015-11-17,"Proteome Informatics Group, SIB Swiss Institute of Bioinformatics, Geneva, Switzerland.","Mariethoz J, Khatib K, Alocci D, Campbell MP, Karlsson NG, Packer NH, Mullen EH, Lisacek F",,Swiss National Science Foundation,20.0,Switzerland +26578594,Super-Enhancer Archive,0.932689333,SEA,0.812459141,Super-Enhancer Archive,0.932689333,1,http://sea.edbc.org,200,United States,"(33.3359,-111.894)",http://web.archive.org/web/20220308180545/http://sea.edbc.org/,2015-11-17,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Wei Y, Zhang S, Shang S, Zhang B, Li S, Wang X, Wang F, Su J, Wu Q, Liu H, Zhang Y",,,52.0,China +26578693,TENOR,0.997442484,TENOR,0.997442484,Transcriptome ENcyclopedia Of Rice,0.994375892,1,http://tenor.dna.affrc.go.jp,301,,,http://web.archive.org/web/20221006151610/https://tenor.dna.affrc.go.jp/,2015-11-16,"Agrogenomics Research Center, National Institute of Agrobiological Sciences, 2-1-2 Kannondai, Tsukuba, Ibaraki, 305-8602 Japan.","Kawahara Y, Oono Y, Wakimoto H, Ogata J, Kanamori H, Sasaki H, Mori S, Matsumoto T, Itoh T",,,31.0,Japan +26582922,SureChEMBL,0.998425841,SureChEMBL,0.998425841,,0,1,http://www.surechembl.org,301,United Kingdom,"(52.0851,0.1874)",http://web.archive.org/web/20140827154914/https://www.surechembl.org/,2015-11-17,"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK.","Papadatos G, Davies M, Dedman N, Chambers J, Gaulton A, Siddle J, Koks R, Irvine SA, Pettersson J, Goncharoff N, Hersey A, Overington JP",,"Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust",42.0, +26582925,Start2Fold,0.996267363,Start2Fold,0.996267363,,0,1,http://start2fold.eu,405,,,http://web.archive.org/web/20181026101907/http://start2fold.eu:80/,2015-11-17,"Structural Biology Brussels, Vrije Universiteit Brussel (VUB), Brussels 1050, Belgium Structural Biology Research Center (IB), VIB, Brussels 1050, Belgium rpancsa@mrc-lmb.cam.ac.uk.","Pancsa R, Varadi M, Tompa P, Vranken WF",,,10.0,"Belgium, Belgium" +26852673,ThebaDB,0.965065837,ThebaDB,0.965065837,,0,1,http://thebadb.bioinfo-minzhao.org,406,,,http://web.archive.org/web/20181115203028/http://thebadb.bioinfo-minzhao.org/,2016-02-08,"School of Engineering, Faculty of Science, Health, Education and Engineering, University of the Sunshine Coast, Maroochydore DC, Queensland, 4558, Australia.","Zhao M, Wang T, Adamson KJ, Storey KB, Cummins SF",,,7.0,Australia +26896848,the maize gene families database,0.861386001,MGFD,0.702373147,the maize gene families database,0.861386001,1,http://mgfd.ahau.edu.cn,503,,,no_wayback,2016-02-20,"Key Laboratory of Crop Biology of Anhui Province, Anhui Agricultural University, Hefei 230036, China.","Sheng L, Jiang H, Yan H, Li X, Lin Y, Ye H, Cheng B",,,0.0,China +27138013,StreptoBase,0.997655153,StreptoBase,0.997655153,,0,1,http://streptococcus.um.edu.my,"HTTPConnectionPool(host='streptococcus.um.edu.my', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to streptococcus.um.edu.my timed out. (connect timeout=5)'))",,,no_wayback,2016-05-03,"Genome Informatics Research Laboratory, High Impact Research Building (HIR) Building, University of Malaya, 50603 Kuala Lumpur, Malaysia.","Zheng W, Tan TK, Paterson IC, Mutha NV, Siow CC, Tan SY, Old LA, Jakubovics NS, Choo SW",,Universiti Malaya,10.0,Malaysia +27148975,SpinachDB,0.997738004,SpinachDB,0.997738004,,0,1,http://222.73.98.124/spinachdb,"HTTPConnectionPool(host='222.73.98.124', port=80): Max retries exceeded with url: /spinachdb (Caused by ConnectTimeoutError(, 'Connection to 222.73.98.124 timed out. (connect timeout=5)'))",,,no_wayback,2016-05-05,"The Protected Horticulture Institute, Shanghai Academy of Agricultural Sciences, Shanghai, China.","Yang XD, Tan HW, Zhu WM",,"Natural Science Foundation of Shanghai, Shanghai Science and Technology Talents Project, Scientific Research Project in Public Agricultural Industry",4.0,China +27153630,TOPDOM,0.992206335,TOPDOM,0.992206335,,0,1,http://topdom.enzim.hu,200,,,http://web.archive.org/web/20221006064152/http://topdom.enzim.hu/,2016-04-12,"'Momentum' Membrane Protein Bioinformatics Research Group, Institute of Enzymology, RCNS, HAS, Budapest H-1518, Hungary.","Varga J, Dobson L, Tusnády GE",,,0.0,Hungary +27168721,TCCR,0.975648627,TCCR,0.975648627,Thyroid Cancer and Tumor Collaborative Registry,0.948505716,1,http://tccr.unmc.edu,200,,,http://web.archive.org/web/20220418110453/https://tccr.unmc.edu/,2016-05-03,"Eppley Institute for Research in Cancer, University of Nebraska Medical Center, Omaha, NE, USA.; Progenomix, Inc., Omaha, NE, USA.","Shats O, Goldner W, Feng J, Sherman A, Smith RB, Sherman S",,NCI NIH HHS,2.0,"United States, United States" +27173523,The Chinchilla Research Resource Database,0.876183919,CRRD,0.799337149,The Chinchilla Research Resource Database,0.876183919,1,http://crrd.mcw.edu,"HTTPConnectionPool(host='crrd.mcw.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220125071954/http://crrd.mcw.edu/,2016-05-12,"Rat Genome Database, Department of Surgery, Medical College of Wisconsin, Milwaukee, WI, USA.","Shimoyama M, Smith JR, De Pons J, Tutaj M, Khampang P, Hong W, Erbe CB, Ehrlich GD, Bakaletz LO, Kerschner JE",,,12.0,United States +27188311,Structure Surfer,0.915590485,Structure Surfer,0.915590485,,0,1,http://tesla.pcbi.upenn.edu/strucuturesurfer,404,,,no_wayback,2016-05-17,"Department of Biology, University of Pennsylvania, 433 S. University Ave., Philadelphia, PA, 19104, USA.","Berkowitz ND, Silverman IM, Childress DM, Kazan H, Wang LS, Gregory BD",,"NIGMS NIH HHS, NIGMS NIH HHS, NIA NIH HHS, National Science Foundation, Marie Curie CIG Grant, National Institute of General Medical Sciences, National Institute of Health",9.0,United States +27337171,TarNet,0.9843418,TarNet,0.9843418,,0,1,http://www.herbbol.org:8001/tarnet,"HTTPConnectionPool(host='www.herbbol.org', port=8001): Max retries exceeded with url: /tarnet (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,no_wayback,2016-06-23,"Beijing Key Laboratory of Innovative Drug Discovery of Traditional Chinese Medicine (Natural Medicine) and Translational Medicine, Institute of Medicinal Plant Development, Peking Union Medical College and Chinese Academy of Medical Sciences, Beijing, China.","Hu R, Ren G, Sun G, Sun X",,"National Major Scientific and Technological Special Project for “Significant New Drug Formulation”, National Major Scientific and Technological Special Project for “Significant New Drug Formulation”, National Natural Science Foundation of China",2.0,China +27387194,TBC2health,0.996583238,TBC2health,0.996583238,,0,1,http://camellia.ahau.edu.cn/TBC2health,503,,,no_wayback,2017-09-01,None,"Zhang S, Xuan H, Zhang L, Fu S, Wang Y, Yang H, Tai Y, Song Y, Zhang J, Ho CT, Li S, Wan X",,,6.0, +27402679,SSTAR,0.966379583,SSTAR,0.966379583,Functional Annotation of the Mammalian Genome project,0.852594042,1,http://fantom.gsc.riken.jp/5/sstar,301,,,http://web.archive.org/web/20160807002403/http://fantom.gsc.riken.jp/5/sstar/,2016-07-09,"Division of Genomic Technologies (DGT), RIKEN Center for Life Science Technologies (CLST), Kanagawa 230-0045, Japan.","Abugessaisa I, Shimoji H, Sahin S, Kondo A, Harshbarger J, Lizio M, Hayashizaki Y, Carninci P, , Forrest A, Kasukawa T, Kawaji H",,,32.0,Japan +27412095,SSBD,0.980243762,SSBD,0.980243762,Systems Science of Biological Dynamics database,0.951881438,1,http://ssbd.qbic.riken.jp,200,Japan,"(35.6916,139.768)",http://web.archive.org/web/20221011002017/https://ssbd.qbic.riken.jp/,2016-07-13,"Laboratory for Developmental Dynamics, RIKEN Quantitative Biology Center, Kobe 650-0047, Japan.","Tohsato Y, Ho KH, Kyoda K, Onami S",,,10.0,Japan +27451428,SZDB,0.994548321,SZDB,0.994548321,,0,1,http://www.szdb.org,200,,,http://web.archive.org/web/20221016212109/http://szdb.org/,2017-03-01,"Key Laboratory of Animal Models and Human Disease Mechanisms of the Chinese Academy of Sciences and Yunnan Province, Kunming Institute of Zoology, Kunming, China.","Wu Y, Yao YG, Luo XJ",,,46.0,China +27643925,StemCellCKB,0.995615423,StemCellCKB,0.995615423,,0,1,http://www.cbligand.org/StemCellCKB,301,United States,"(40.4965,-79.9747)",http://web.archive.org/web/20210723185414/https://www.cbligand.org/StemCellCKB/,2016-10-07,"Department of Pharmaceutical Sciences and Computational Chemical Genomics Screening Center, School of Pharmacy; National Center of Excellence for Computational Drug Abuse Research; Drug Discovery Institute; Departments of Computational Biology and Structural Biology, School of Medicine, University of Pittsburgh , Pittsburgh, Pennsylvania 15260, United States.","Zhang Y, Wang L, Feng Z, Cheng H, McGuire TF, Ding Y, Cheng T, Gao Y, Xie XQ",,"China Scholarship Council, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, NIDA NIH HHS, National Heart, Lung, and Blood Institute, National Institute on Drug Abuse, NHLBI NIH HHS, NCATS NIH HHS, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Tianjin City",4.0,United States +27733502,SZGR,0.997519851,SZGR,0.997519851,,0,1,http://bioinfo.uth.edu/SZGR,302,,,http://web.archive.org/web/20221017080727/https://bioinfo.uth.edu/SZGR/,2016-10-12,"Center for Precision Health, School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Jia P, Han G, Zhao J, Lu P, Zhao Z",,"NLM NIH HHS, NCI NIH HHS",16.0,United States +27749924,SulfAtlas,0.992132127,SulfAtlas,0.992132127,,0,1,http://abims.sb-roscoff.fr/sulfatlas,301,France,"(48.7246,-3.9932)",no_wayback,2016-10-17,"Sorbonne Universités, UPMC Univ Paris 06, CNRS, UMR 8227, Integrative Biology of Marine Models, Station Biologique de Roscoff, CS 90074, Roscoff, Bretagne, France.","Barbeyron T, Brillet-Guéguen L, Carré W, Carrière C, Caron C, Czjzek M, Hoebeke M, Michel G",,Seventh Framework Programme,42.0,France +"27779618, 27779621",studyforrest,0.994259179,studyforrest,0.994259179,,0,2,http://studyforrest.org,301,Germany,"(50.9095,6.40611)",http://web.archive.org/web/20221006173647/https://www.studyforrest.org/,2016-10-25,"Experimental Psychology Lab, Institute of Psychology, Otto-von-Guericke University, Magdeburg D-39016, Germany., Psychoinformatics Lab, Institute of Psychology, Otto-von-Guericke University, Magdeburg D-39016, Germany.","Sengupta A, Kaule FR, Guntupalli JS, Hoffmann MB, Häusler C, Stadler J, Hanke M, Hanke M, Adelhöfer N, Kottke D, Iacovella V, Sengupta A, Kaule FR, Nigbur R, Waite AQ, Baumgartner F, Stadler J",", ",", ",38.0,"Germany, Germany" +27789689,TcoF-DB,0.943981087,TcoF-DB,0.943981087,,0,1,http://tcofdb.org,301,,,no_wayback,2016-10-26,"Massey University Auckland, Institute of Natural and Mathematical Sciences, Auckland, New Zealand.","Schmeier S, Alam T, Essack M, Bajic VB",,,32.0,New Zealand +27899608,TFBSbank,0.99741137,TFBSbank,0.99741137,,0,1,http://tfbsbank.co.uk,200,,,http://web.archive.org/web/20220401004710/http://tfbsbank.co.uk/,2016-11-28,"Fitzwilliam College, University of Cambridge, Storey's Way, Cambridge CB3 0DG, UK dc571@cam.ac.uk dschen2016@gmail.com.","Chen D, Jiang S, Ma X, Li F",,,3.0, +27899614,SUBA4,0.997400373,SUBA4,0.997400373,SUBcellular location database for Arabidopsis proteins,0.988059592,1,http://suba.live,301,Canada,"(43.6532,-79.3832)",http://web.archive.org/web/20221018155227/https://suba.live/,2016-11-28,"ARC Centre of Excellence in Plant Energy Biology, The University of Western Australia, Perth, WA 6009, Australia cornelia.hooper@uwa.edu.au.","Hooper CM, Castleden IR, Tanz SK, Aryamanesh N, Millar AH",,,118.0,"Australia, Australia" +27899676,TransportDB,0.979824901,TransportDB,0.979824901,,0,1,http://www.membranetransport.org/transportDB2,301,,,http://web.archive.org/web/20221017033958/http://www.membranetransport.org/transportDB2/,2016-11-28,"Department of Chemistry and Biomolecular Sciences, Macquarie University, NSW 2109, Australia liam.elbourne@mq.edu.au.","Elbourne LD, Tetu SG, Hassan KA, Paulsen IT",,,97.0,Australia +28013278,ThaleMine,0.986150086,ThaleMine,0.986150086,,0,1,"http://apps.araport.org/thalemine/, http://www.araport.org","HTTPConnectionPool(host='apps.araport.org', port=80): Max retries exceeded with url: /thalemine/ (Caused by ConnectTimeoutError(, 'Connection to apps.araport.org timed out. (connect timeout=5)')), 301",,", ","no_wayback, http://web.archive.org/web/20221022214625/https://www.araport.org/",2017-01-01,"Plant Genomics, J. Craig Venter Institute, Medical Center Dr, Rockville, MD, USA.","Krishnakumar V, Contrino S, Cheng CY, Belyaeva I, Ferlanti ES, Miller JR, Vaughn MW, Micklem G, Town CD, Chan AP",,Biotechnology and Biological Sciences Research Council,10.0,United States +28111364,TOMATOMICS,0.993693531,TOMATOMICS,0.993693531,,0,1,http://bioinf.mind.meiji.ac.jp/tomatomics,"HTTPConnectionPool(host='bioinf.mind.meiji.ac.jp', port=80): Max retries exceeded with url: /tomatomics (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20161111062507/http://bioinf.mind.meiji.ac.jp:80/tomatomics/,2017-01-01,"Bioinformatics Laboratory, School of Agriculture, Meiji University,Higashi-mita, Tama-ku, Kawasaki, Kanagawa, Japan.","Kudo T, Kobayashi M, Terashima S, Katayama M, Ozaki S, Kanno M, Saito M, Yokoyama K, Ohyanagi H, Aoki K, Kubo Y, Yano K",,,9.0,Japan +28365741,TMPL,0.995930076,TMPL,0.995930076,,0,1,http://www.dsimb.inserm.fr/TMPL,301,,,http://web.archive.org/web/20171205134229/http://www.dsimb.inserm.fr:80/TMPL/,2017-01-01,"Inserm U1134, Paris, France.","Postic G, Ghouzam Y, Etchebest C, Gelly JC",,,2.0,France +28387841,TimeTree,0.989791155,TimeTree,0.989791155,,0,1,http://www.timetree.org,200,,,http://web.archive.org/web/20221023212115/http://timetree.org/,2017-07-01,"Institute for Genomics and Evolutionary Medicine, Temple University, Philadelphia, PA.","Kumar S, Stecher G, Suleski M, Hedges SB",,,705.0, +28420402,SSER,0.930679306,SSER,0.930679306,Species specific essential reactions database,0.902112281,1,http://cefg.uestc.edu.cn/sser,"HTTPConnectionPool(host='cefg.uestc.edu.cn', port=80): Max retries exceeded with url: /sser (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2017-04-19,"Center of Bioinformatics, Key Laboratory for Neuro-Information of Ministry of Education, School of Life Science and Technology, University of Electronic Science and Technology of China, Chengdu, China.","Labena AA, Ye YN, Dong C, Zhang FZ, Guo FB",,"National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities of China, Sichuan Youth Science and Technology Foundation of China",2.0,"China, China" +28588237,TCM-Mesh,0.989953801,TCM-Mesh,0.989953801,,0,1,http://mesh.tcm.microbioinformatics.org,403,,,http://web.archive.org/web/20201202031902/http://mesh.tcm.microbioinformatics.org/,2017-06-06,"Key Laboratory of Molecular Biophysics of the Ministry of Education, Hubei Key Laboratory of Bioinformatics and Molecular-imaging, Department of Bioinformatics and Systems Biology, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei, 430074, China.","Zhang RZ, Yu SJ, Bai H, Ning K",,,65.0,China +28759605,THPdb,0.998677254,THPdb,0.998677254,,0,1,http://crdd.osdd.net/raghava/thpdb,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/thpdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220202145704/http://crdd.osdd.net/raghava/thpdb/,2017-07-31,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh, India.","Usmani SS, Bedi G, Samuel JS, Singh S, Kalra S, Kumar P, Ahuja AA, Sharma M, Gautam A, Raghava GPS",,Open Source Drug Discovery,94.0,India +28807036,T-Time,0.980422139,T-Time,0.980422139,,0,1,http://ttime.mlatlab.org,200,United States,"(33.7931,-117.852)",http://web.archive.org/web/20210801064649/http://ttime.mlatlab.org/,2017-08-15,"Schmid College of Science and Technology, Chapman University, One University Drive, Orange, CA, 92866, USA.","Arbuckle C, Greenberg M, Bergh A, German R, Sirago N, Linstead E",,,0.0,United States +28832569,SweGen,0.992595315,SweGen,0.992595315,,0,1,http://swefreq.nbis.se,301,,,http://web.archive.org/web/20221020162616/https://swefreq.nbis.se/,2017-08-23,"Science for Life Laboratory, Department of Immunology, Genetics and Pathology, Uppsala University, Uppsala, Sweden.","Ameur A, Dahlberg J, Olason P, Vezzi F, Karlsson R, Martin M, Viklund J, Kähäri AK, Lundin P, Che H, Thutkawkorapin J, Eisfeldt J, Lampa S, Dahlberg M, Hagberg J, Jareborg N, Liljedahl U, Jonasson I, Johansson Å, Feuk L, Lundeberg J, Syvänen AC, Lundin S, Nilsson D, Nystedt B, Magnusson PK, Gyllensten U",,European Research Council,73.0,Sweden +28888135,STRSeq,0.99774313,STRSeq,0.99774313,Sequencing Project,0.543064018,1,"http://www.ncbi.nlm.nih.gov/bioproject/380127, http://strider.online","301, 308","Germany, United States","(38.8341,-76.7974), (48.3238,11.7598)","http://web.archive.org/web/20220804215817/http://www.ncbi.nlm.nih.gov/bioproject/380127, http://web.archive.org/web/20221017041135/https://strider.online/",2017-09-01,"U.S. National Institute of Standards and Technology, Biomolecular Measurement Division, 100 Bureau Drive, Gaithersburg, MD 20899, USA. Electronic address: katherine.gettings@nist.gov.","Gettings KB, Borsuk LA, Ballard D, Bodner M, Budowle B, Devesse L, King J, Parson W, Phillips C, Vallone PM",,Intramural NIST DOC,16.0,United States +28961249,SWI/SNF Infobase,0.784500964,SWI/SNF Infobase,0.784500964,,0,1,http://scbt.sastra.edu/swisnfdb/index.php,200,,,http://web.archive.org/web/20171216215422/http://scbt.sastra.edu:80/swisnfdb/index.php,2017-09-29,"School of Chemical & Biotechnology, SASTRA University, Tirumalaisamudram, Thanjavur, India.","Mani U, S AS, Goutham R N A, Mohan S S",,Science and Engineering Research Board,10.0,India +28974472,Stress2TF,0.992926578,Stress2TF,0.992926578,,0,1,http://csgenomics.ahau.edu.cn/Stress2TF,503,,,no_wayback,2017-09-30,"College of Information and Computer science, Anhui Agricultural University, Hefei 230036, China.","Zhang X, Yao C, Fu S, Xuan H, Wen S, Liu C, Li F, Liu A, Bi S, Zhang S, Li S",,"National Natural Science Foundation of China, Anhui Province",2.0,China +28985418,SysteMHC,0.961411715,SysteMHC,0.961411715,,0,1,http://systemhcatlas.org,302,United States,"(42.8943,-78.8736)",http://web.archive.org/web/20220802183907/https://systemhcatlas.org/,2018-01-01,"Department of Biology, Institute of Molecular Systems Biology, ETH Zurich, Zurich 8093, Switzerland.","Shao W, Pedrioli PGA, Wolski W, Scurtescu C, Schmid E, Vizcaíno JA, Courcelles M, Schuster H, Kowalewski D, Marino F, Arlehamn CSL, Vaughan K, Peters B, Sette A, Ottenhoff THM, Meijgaarden KE, Nieuwenhuizen N, Kaufmann SHE, Schlapbach R, Castle JC, Nesvizhskii AI, Nielsen M, Deutsch EW, Campbell DS, Moritz RL, Zubarev RA, Ytterberg AJ, Purcell AW, Marcilla M, Paradela A, Wang Q, Costello CE, Ternette N, van Veelen PA, van Els CACM, Heck AJR, de Souza GA, Sollid LM, Admon A, Stevanovic S, Rammensee HG, Thibault P, Perreault C, Bassani-Sternberg M, Aebersold R, Caron E",,"NIGMS NIH HHS, NCI NIH HHS, European Research Council, NIGMS NIH HHS, NIGMS NIH HHS, European Research Council, Swiss National Science Foundation, Wellcome Trust",53.0,"Switzerland, Ethiopia" +29036590,TissGDB,0.99716121,TissGDB,0.99716121,Tissue specific Gene DataBase in cancer,0.886347724,1,http://zhaobioinfo.org/TissGDB,301,,,http://web.archive.org/web/20220130052820/http://zhaobioinfo.org/TissGDB/,2018-01-01,"Center for Precision Health, School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Kim P, Park A, Han G, Sun H, Jia P, Zhao Z",,"NCI NIH HHS, NLM NIH HHS",20.0,United States +29045725,StemMapper,0.99310112,StemMapper,0.99310112,,0,1,http://stemmapper.sysbiolab.eu,503,,,http://web.archive.org/web/20220520172248/http://stemmapper.sysbiolab.eu/,2018-01-01,"Systems Biology and Bioinformatics Laboratory (SysBioLab), Universidade do Algarve, Faro, 8005-139, Portugal.","Pinto JP, Machado RSR, Magno R, Oliveira DV, Machado S, Andrade RP, Bragança J, Duarte I, Futschik ME",,,14.0,Portugal +29045755,TriForC,0.973249376,TriForC,0.973249376,,0,1,http://bioinformatics.psb.ugent.be/triforc,301,,,http://web.archive.org/web/20220121030554/http://bioinformatics.psb.ugent.be/triforc/,2018-01-01,"Ghent University, Department of Plant Biotechnology and Bioinformatics, 9052 Ghent, Belgium.","Miettinen K, Iñigo S, Kreft L, Pollier J, De Bo C, Botzki A, Coppens F, Bak S, Goossens A",,,7.0,Belgium +29087479,STCRDab,0.99811368,STCRDab,0.99811368,Structural T-cell Receptor Database,0.990144104,1,http://opig.stats.ox.ac.uk/webapps/stcrdab,301,United Kingdom,"(51.7128,-1.2347)",http://web.archive.org/web/20221016215116/https://opig.stats.ox.ac.uk/webapps/stcrdab/,2018-01-01,"Department of Statistics, University of Oxford, 24-29 St Giles, Oxford, OX1 3LB, UK.","Leem J, de Oliveira SHP, Krawczyk K, Deane CM",,Medical Research Council,22.0, +29092939,TCPA,0.990851581,TCPA,0.990851581,The Cancer Proteome Atlas,0.966235052,1,http://tcpaportal.org,302,,,http://web.archive.org/web/20221102173155/https://tcpaportal.org/,2017-11-01,"Department of Bioinformatics and Computational Biology, The University of Texas MD Anderson Cancer Center, Houston, Texas.","Li J, Akbani R, Zhao W, Lu Y, Weinstein JN, Mills GB, Liang H",,"NIH, NIH, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NHGRI NIH HHS, NIH, NIH, NIH, NIH, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NIH, MD Anderson Cancer Center, NIH, Cancer Prevention and Research Institute of Texas",34.0, +29106630,TranslatomeDB,0.996909559,TranslatomeDB,0.996909559,,0,1,http://www.translatomedb.net,200,,,http://web.archive.org/web/20221014063533/http://www.translatomedb.net/,2018-01-01,"Key Laboratory of Functional Protein Research of Guangdong Higher Education Institutes, Institute of Life and Health Engineering, Jinan University, Guangzhou 510632, China.","Liu W, Xiang L, Zheng T, Jin J, Zhang G",,,19.0,China +29106651,Target-Pathogen,0.984828025,Target-Pathogen,0.984828025,,0,1,http://target.sbg.qb.fcen.uba.ar/patho,302,,,http://web.archive.org/web/20220617071146/http://target.sbg.qb.fcen.uba.ar/patho/,2018-01-01,"IQUIBICEN-CONICET, Ciudad Universitaria, Pabellón 2, C1428EHA Ciudad de Buenos Aires, Argentina.","Sosa EJ, Burguener G, Lanzarotti E, Defelipe L, Radusky L, Pardo AM, Marti M, Turjanski AG, Fernández Do Porto D",,,20.0,Argentina +29106666,TADB2.0,0.957761908,TADB2.0,0.957761908,,0,1,http://bioinfo-mml.sjtu.edu.cn/TADB2,302,,,http://web.archive.org/web/20220303153918/https://bioinfo-mml.sjtu.edu.cn/TADB2/,2018-01-01,"State Key Laboratory of Microbial Metabolism, Joint International Laboratory on Metabolic & Developmental Sciences, School of Life Sciences & Biotechnology, Shanghai Jiao Tong University, Shanghai, 200030, China.","Xie Y, Wei Y, Shen Y, Li X, Zhou H, Tai C, Deng Z, Ou HY",,,80.0,China +29140469,SuperDRUG2,0.978730261,SuperDRUG2,0.978730261,,0,1,http://cheminfo.charite.de/superdrug2,"HTTPConnectionPool(host='cheminfo.charite.de', port=80): Max retries exceeded with url: /superdrug2 (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20201204165317/http://cheminfo.charite.de/superdrug2/,2018-01-01,"Structural Bioinformatics Group, Experimental and Clinical Research Center (ECRC), Charité - University Medicine Berlin, Berlin, Germany.","Siramshetty VB, Eckert OA, Gohlke BO, Goede A, Chen Q, Devarakonda P, Preissner S, Preissner R",,,29.0,Germany +29145635,tRex,0.994673848,tRex,0.994673848,,0,1,http://combio.pl/trex)-the,404,,,no_wayback,2018-01-01,"Department of Computational Biology, Faculty of Biology, Institute of Molecular Biology and Biotechnology, Adam Mickiewicz University Poznan, 61-614 Poznan, Poland.","Thompson A, Zielezinski A, Plewka P, Szymanski M, Nuc P, Szweykowska-Kulinska Z, Jarmolowski A, Karlowski WM",,"National Science Centre, National Science Centre",9.0,Poland +29157087,THANATOS,0.99623239,THANATOS,0.99623239,Necrosis,0.979467452,1,http://thanatos.biocuckoo.org,200,,,http://web.archive.org/web/20221016224133/https://thanatos.biocuckoo.org/,2018-01-01,"a Key Laboratory of Molecular Biophysics of Ministry of Education, College of Life Science and Technology and the Collaborative Innovation Center for Biomedical Engineering , Huazhong University of Science and Technology , Wuhan , Hubei 430074 , China.","Deng W, Ma L, Zhang Y, Zhou J, Wang Y, Liu Z, Xue Y",,"National Key R&D Program, National Basic Research Program, International Science & Technology Cooperation Program of China, Natural Science Foundation of China",19.0,China +29218589,StimulStat,0.99712956,StimulStat,0.99712956,,0,1,http://stimul.cognitivestudies.ru,302,,,no_wayback,2018-12-01,"St. Petersburg State University, St. Petersburg, Galernya 58/60, 190000, Russia. mail@s-alexeeva.ru.","Alexeeva S, Slioussar N, Chernova D",,Russian Humanitarian Foundation,3.0, +29234333,TaSSRDb,0.996534109,TaSSRDb,0.996534109,Triticum,0.721190155,1,http://webtom.cabgrid.res.in/wheatssr,"HTTPConnectionPool(host='webtom.cabgrid.res.in', port=80): Max retries exceeded with url: /wheatssr (Caused by ConnectTimeoutError(, 'Connection to webtom.cabgrid.res.in timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220331083834/http://webtom.cabgrid.res.in/wheatssr/,2017-11-28,"Centre for Agricultural Bioinformatics, ICAR-Indian Agricultural Statistics Research Institute, New Delhi, India.","Jaiswal S, Sheoran S, Arora V, Angadi UB, Iquebal MA, Raghav N, Aneja B, Kumar D, Singh R, Sharma P, Singh GP, Rai A, Tiwari R, Kumar D",,,4.0,India +29297316,TBEVhostDB,0.947500706,TBEVhostDB,0.947500706,,0,1,http://icg.nsc.ru/TBEVHostDB,301,,,no_wayback,2017-12-28,"Laboratory of Evolutionary Bioinformatics and Theoretical Genetics, The Federal Research Center Institute of Cytology and Genetics of Siberian Branch of the Russian Academy of Sciences, Novosibirsk, 630090, Russia. eignat@bionet.nsc.ru.","Ignatieva EV, Igoshin AV, Yudin NS",,,3.0, +29316788,SynBioHub,0.997284293,SynBioHub,0.997284293,,0,1,http://synbiohub.org,301,United States,"(36.677696,-78.37471)",http://web.archive.org/web/20220715172041/https://synbiohub.org/,2018-01-30,"School of Computing, Newcastle University , Newcastle upon Tyne, NE1 7RU, U.K.","McLaughlin JA, Myers CJ, Zundel Z, Mısırlı G, Zhang M, Ofiteru ID, Goñi-Moreno A, Wipat A",,"Google, FUJIFILM Diosynth Biotechnologies U.S.A., Inc., Division of Computing and Communication Foundations, Division of Biological Infrastructure, Engineering and Physical Sciences Research Council, Engineering and Physical Sciences Research Council, Engineering and Physical Sciences Research Council",23.0, +29385404,TOXsIgN,0.996063232,TOXsIgN,0.996063232,,0,1,http://toxsign.genouest.org,302,,,http://web.archive.org/web/20220714160738/https://toxsign.genouest.org/,2018-06-01,"Univ Rennes, Inserm, EHESP, Irset (Institut de recherche en santé, environnement et travail) - UMR_S1085, F-35000 Rennes, France.","Darde TA, Gaudriault P, Beranger R, Lancien C, Caillarec-Joly A, Sallou O, Bonvallot N, Chevrier C, Mazaud-Guittot S, Jégou B, Collin O, Becker E, Rolland AD, Chalmel F",,"French agency for food and safety, Fondation pour la recherche médicale, European Union",7.0,France +29432422,TopicalPdb,0.998002231,TopicalPdb,0.998002231,,0,1,http://crdd.osdd.net/raghava/topicalpdb,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/topicalpdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20211206085134/http://crdd.osdd.net/raghava/topicalpdb/,2018-02-12,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh, India.","Mathur D, Mehta A, Firmal P, Bedi G, Sood C, Gautam A, Raghava GPS",,"Council of Scientific and Industrial Research, Council for Scientific and Industrial Research, Council of Scientific and Industrial Research, Department of Biotechnology, Ministry of Science and Technology",1.0,India +29520288,TBC2target,0.991942734,TBC2target,0.991942734,,0,1,http://camellia.ahau.edu.cn/TBC2target,503,,,no_wayback,2018-02-22,"State Key Laboratory of Tea Plant Biology and Utilization, Institute of Applied Mathematics, Anhui Agricultural University, Hefei, China.","Zhang S, Zhang L, Wang Y, Yang J, Liao M, Bi S, Xie Z, Ho CT, Wan X",,,2.0,China +29617745,TISSUES,0.994719326,TISSUES,0.994719326,,0,1,http://tissues.jensenlab.org,302,,,no_wayback,2018-01-01,"Novo Nordisk Foundation Center for Protein Research, Faculty of Health and Medical Sciences, University of Copenhagen, Copenhagen, Denmark.","Palasca O, Santos A, Stolte C, Gorodkin J, Jensen LJ",,Novo Nordisk Foundation Center for Protein Research,38.0,Denmark +29776332,TelNet,0.987919092,TelNet,0.987919092,,0,1,http://www.cancertelsys.org/telnet,301,,,http://web.archive.org/web/20220405143813/http://www.cancertelsys.org/telnet/,2018-05-18,"Division of Chromatin Networks, German Cancer Research Center (DKFZ) & Bioquant, 69120, Heidelberg, Germany.","Braun DM, Chung I, Kepper N, Deeg KI, Rippe K",,Bundesministerium für Forschung und Technologie,12.0,Germany +30052772,Terabase,0.869710565,Terabase,0.869710565,,0,1,http://tse.idies.jhu.edu,200,,,no_wayback,2019-02-01,"Department of Physics and Astronomy, Johns Hopkins University, Baltimore, MD, USA.","Wilton R, Wheelan SJ, Szalay AS, Salzberg SL",,"NIH, NHLBI NIH HHS, NIH, NHGRI NIH HHS",1.0,United States +30053266,TC3A,0.990756714,TC3A,0.990756714,UTR Atlas,0.958735685,1,http://tc3a.org,405,,,http://web.archive.org/web/20221102054700/http://tc3a.org/,2018-01-01,"Division of Biostatistics, Dan L. Duncan Cancer Center and Department of Molecular and Cellular Biology, Baylor College of Medicine, Houston, TX 77030, USA.","Feng X, Li L, Wagner EJ, Li W",,"NCI NIH HHS, NHGRI NIH HHS",23.0,United States +30094004,SuperbaSE,0.992503345,SuperbaSE,0.992503345,,0,1,http://www.krill.le.ac.uk,302,,,no_wayback,2017-06-28,Department of Genetics College of Medicine Biological Sciences and Psychology University of Leicester University Road Leicester UK.,"Hunt BJ, Özkaya Ö, Davies NJ, Gaten E, Seear P, Kyriacou CP, Tarling G, Rosato E",,"Natural Environment Research Council, Natural Environment Research Council, Natural Environment Research Council",2.0, +30119164,TReasure,0.990983844,TReasure,0.990983844,,0,1,http://www.trials-network.org/treasure,301,,,http://web.archive.org/web/20220121153302/https://www.trials-network.org/treasure,2018-08-16,None,"Kalyoncu U, Taşcılar EK, Ertenli Aİ, Dalkılıç HE, Bes C, Küçükşahin O, Kaşifoğlu T, Alpay Kanıtez N, Emmungil H, Kimyon G, Yaşar Bilge NŞ, Akar S, Atagündüz MP, Koca SS, Ateş A, Yazısız V, Terzioğlu E, Ersözlü ED, Tufan MA, Çınar M, Mercan R, Şahin A, Erten Ş, Pehlivan Y, Yılmaz S, Keleşoğlu Dinçer AB, Gerçik Ö, Coşkun BN, Yağız B, Kaymaz Tahra S, Aksoy A, Karadağ Ö, Kılıç L, Kiraz S",,,3.0, +30165582,Traitpedia,0.997839808,Traitpedia,0.997839808,,0,1,http://cbdm-01.zdv.uni-mainz.de,200,,,http://web.archive.org/web/20220620015156/http://cbdm-01.zdv.uni-mainz.de/,2019-03-01,"Faculty of Biology, Institute of Organismic and Molecular Evolution, Johannes Gutenberg University, Mainz, Germany.","Mier P, Andrade-Navarro MA",,Deutsche Forschungsgemeinschaft,0.0,Germany +30184150,TRCirc,0.998382151,TRCirc,0.998382151,,0,1,http://www.licpathway.net/TRCirc,301,,,http://web.archive.org/web/20221030154308/http://www.licpathway.net/TRCirc/,2019-11-01,"School of Medical Informatics, Daqing Campus, Harbin Medical University.","Tang Z, Li X, Zhao J, Qian F, Feng C, Li Y, Zhang J, Jiang Y, Yang Y, Wang Q, Li C",,"Natural Science Foundation of Heilongjiang Province, National Natural Science Foundation of China, National Natural Science Foundation of China",21.0, +30306862,Sri Lankan Flora,0.628779441,Sri Lankan Flora,0.628779441,,0,1,http://science.cmb.ac.lk/tools/slflora,301,,,http://web.archive.org/web/20220301225307/https://science.cmb.ac.lk/tools/slflora/,2018-01-01,"Department of Chemistry, Faculty of Science, University of Colombo, Colombo, Sri Lanka.","Rathnayake S, Weerasinghe S",,,0.0,Sri Lanka +30371815,TransmiR,0.97742933,TransmiR,0.97742933,,0,1,http://www.cuilab.cn/transmir,200,,,http://web.archive.org/web/20220803222113/https://www.cuilab.cn/transmir,2019-01-01,"Department of Biomedical Informatics, School of Basic Medical Sciences, Peking University, Beijing 100191, China.","Tong Z, Cui Q, Wang J, Zhou Y",,"National Natural Science Foundation of China, Fundamental Research Funds for Central Universities of China, Special Project on Precision Medicine under the National Key R&D Program",80.0,China +30380087,SymMap,0.99069941,SymMap,0.99069941,,0,1,"http://www.symmap.org/, http://www.bioinfo.org/symmap","200, 301",United States,", (37.4316,-78.6569)","http://web.archive.org/web/20221020030216/http://symmap.org/, no_wayback",2019-01-01,"Beijing University of Chinese Medicine, ChaoYang District, Beijing 100029, China.","Wu Y, Zhang F, Yang K, Fang S, Bu D, Li H, Sun L, Hu H, Gao K, Wang W, Zhou X, Zhao Y, Chen J",,"National Natural Science Foundation of China, National Natural Science Foundation for Young Scholars of China, National Natural Science Foundation of China, National Natural Science Foundation for Young Scholars of China, National Natural Science Foundation for Young Scholars of China, National Key Research and Development Program of China, Institute of Computing Technology, National Key Research and Development Program of China, National Key Research and Development Program of China",78.0,China +30380112,Translocatome,0.997318149,Translocatome,0.997318149,,0,1,"http://translocatome.linkgroup.hu, http://comppi.linkgroup.hu","200, 200",,", ","http://web.archive.org/web/20221109183959/http://translocatome.linkgroup.hu/, http://web.archive.org/web/20221016235027/https://comppi.linkgroup.hu/",2019-01-01,"Department of Medical Chemistry, Semmelweis University, Budapest, Hungary.","Mendik P, Dobronyi L, Hári F, Kerepesi C, Maia-Moço L, Buszlai D, Csermely P, Veres DV",,"Ministry of Human Capacities in Hungary, Hungarian National Research Development and Innovation Office, Hungarian Ministry of Human Capacities, Hungarian Ministry of Human Capacities, European Social Fund",6.0,Hungary +30602089,STADIUM,0.983587086,STADIUM,0.983587086,Specific tRNA Adaptive Index Compendium,0.638744718,1,http://stadium.pmrc.re.kr,500,,,http://web.archive.org/web/20220118123301/http://stadium.pmrc.re.kr/,2018-12-28,"Department of Biomedicine & Health Sciences, Graduate School, The Catholic University of Korea, Seoul 06591, Korea.","Yoon J, Chung YJ, Lee M",,"Ministry of Science and ICT, National Research Foundation of Korea, National Research Foundation of Korea, Korea Research Environment Open NETwork",3.0, +30810209,Tetrahymena Comparative Genomics Database,0.969320416,TCGD,0.902827978,Tetrahymena Comparative Genomics Database,0.969320416,1,http://ciliate.ihb.ac.cn,200,,,http://web.archive.org/web/20220501234758/http://ciliate.ihb.ac.cn/,2019-01-01,"Key Laboratory of Aquatic Biodiversity and Conservation, Institute of Hydrobiology, Chinese Academy of Sciences, Wuhan, China.","Yang W, Jiang C, Zhu Y, Chen K, Wang G, Yuan D, Miao W, Xiong J",,"Natural Science Foundation of China, Natural Science Foundation of China, Natural Science Foundation of China, Wuhan Branch, Supercomputing Centre, Chinese Academy of Sciences, China, Youth Innovation Promotion Association, Chinese Academy of Sciences",1.0,China +30846808,TACCO,0.997243583,TACCO,0.997243583,Transcriptome Alterations in CanCer Omnibus,0.932588655,1,http://tacco.life.nctu.edu.tw,200,,,http://web.archive.org/web/20220418024303/http://tacco.life.nctu.edu.tw/,2019-03-07,"Molecular Medicine Research Center, Chang Gung University, Taoyuan, Taiwan.","Chou PH, Liao WC, Tsai KW, Chen KC, Yu JS, Chen TW",,"Ministry of Science and Technology, Taiwan, Chang Gung Memorial Hospital, Linkou, Ministry of Science and Technology, Taiwan",6.0, +30871473,TADKB,0.996571302,TADKB,0.996571302,,0,1,http://dna.cs.miami.edu/TADKB,301,,,http://web.archive.org/web/20220616021022/http://dna.cs.miami.edu/TADKB/,2019-03-14,"Department of Computer Science, University of Miami, 1365 Memorial Drive, Coral Gables, FL, 33124-4245, USA.","Liu T, Porter J, Zhao C, Zhu H, Wang N, Sun Z, Mo YY, Wang Z",,"NIGMS NIH HHS, National Institute of General Medical Sciences, National Institute of General Medical Sciences (US)",8.0,United States +30913342,TPIA,0.981803596,TPIA,0.981803596,Tea Plant Information Archive,0.938457757,1,http://tpia.teaplant.org,302,,,http://web.archive.org/web/20210501150721/http://tpia.teaplant.org/,2019-04-11,"State Key Laboratory of Tea Plant Biology and Utilization, Anhui Agricultural University, Hefei, 230036, China.","Xia EH, Li FD, Tong W, Li PH, Wu Q, Zhao HJ, Ge RH, Li RP, Li YY, Zhang ZZ, Wei CL, Wan XC",,"China Postdoctoral Science Foundation, National Natural Science Foundation of China",51.0,China +30994884,starPepDB,0.923639512,starPepDB,0.923639512,,0,1,http://mobiosd-hub.com/starpep,301,United States,"(40.7128,-74.006)",http://web.archive.org/web/20220608074339/http://mobiosd-hub.com/starpep/,2019-11-01,"Departamento de Ciencias de la Computación, Centro de Investigación Científica y de Educación Superior de Ensenada (CICESE), 22860 Ensenada, Mexico.","Aguilera-Mendoza L, Marrero-Ponce Y, Beltran JA, Tellez Ibarra R, Guillen-Ramirez HA, Brizuela CA",,"CONACYT, CONACYT, USFQ",5.0,Mexico +31015229,TCEA,0.980338752,TCEA,0.980338752,The Cancer Editome Atlas,0.965569031,1,http://tcea.tmu.edu.tw,200,,,http://web.archive.org/web/20220622000726/http://tcea.tmu.edu.tw/,2019-04-23,"Graduate Institute of Biomedical Informatics, College of Medical Science and Technology, Taipei Medical University, Taipei, Taiwan.","Lin CH, Chen SC",,"Ministry of Science and Technology of Taiwan, Taipei Medical University",8.0, +31120982,TB DEPOT,0.832982928,TB DEPOT,0.832982928,,0,1,http://depot.tbportals.niaid.nih.gov,301,,,http://web.archive.org/web/20220520230147/https://depot.tbportals.niaid.nih.gov/,2019-05-23,"Office of Cyber Infrastructure & Computational Biology, National Institute of Allergy and Infectious Disease, National Institutes of Health, Bethesda, MD, United States of America.","Gabrielian A, Engle E, Harris M, Wollenberg K, Juarez-Espinosa O, Glogowski A, Long A, Patti L, Hurt DE, Rosenthal A, Tartakovsky M",,,2.0,United States +"31171447, 31171447",SynGO,0.996233463,SynGO,0.996233463,,0,1,"http://syngoportal.org, http://geneontology.org","301, 200",United States,"(37.7621,-122.3971), (39.0438,-77.4874)","http://web.archive.org/web/20221006165105/https://syngoportal.org/, http://web.archive.org/web/20221105015500/http://geneontology.org/",2019-06-03,"Department of Functional Genomics, CNCR, VU University and UMC Amsterdam, 1081 HV Amsterdam, the Netherlands; Department of Molecular and Cellular Neurobiology, CNCR, VU University and UMC Amsterdam, 1081 HV Amsterdam, the Netherlands., Department of Functional Genomics, CNCR, VU University and UMC Amsterdam, 1081 HV Amsterdam, the Netherlands; Department of Molecular and Cellular Neurobiology, CNCR, VU University and UMC Amsterdam, 1081 HV Amsterdam, the Netherlands.","Koopmans F, van Nierop P, Andres-Alonso M, Byrnes A, Cijsouw T, Coba MP, Cornelisse LN, Farrell RJ, Goldschmidt HL, Howrigan DP, Hussain NK, Imig C, de Jong APH, Jung H, Kohansalnodehi M, Kramarz B, Lipstein N, Lovering RC, MacGillavry H, Mariano V, Mi H, Ninov M, Osumi-Sutherland D, Pielot R, Smalla KH, Tang H, Tashman K, Toonen RFG, Verpelli C, Reig-Viader R, Watanabe K, van Weering J, Achsel T, Ashrafi G, Asi N, Brown TC, De Camilli P, Feuermann M, Foulger RE, Gaudet P, Joglekar A, Kanellopoulos A, Malenka R, Nicoll RA, Pulido C, de Juan-Sanz J, Sheng M, Südhof TC, Tilgner HU, Bagni C, Bayés À, Biederer T, Brose N, Chua JJE, Dieterich DC, Gundelfinger ED, Hoogenraad C, Huganir RL, Jahn R, Kaeser PS, Kim E, Kreutz MR, McPherson PS, Neale BM, O'Connor V, Posthuma D, Ryan TA, Sala C, Feng G, Hyman SE, Thomas PD, Smit AB, Verhage M, Koopmans F, van Nierop P, Andres-Alonso M, Byrnes A, Cijsouw T, Coba MP, Cornelisse LN, Farrell RJ, Goldschmidt HL, Howrigan DP, Hussain NK, Imig C, de Jong APH, Jung H, Kohansalnodehi M, Kramarz B, Lipstein N, Lovering RC, MacGillavry H, Mariano V, Mi H, Ninov M, Osumi-Sutherland D, Pielot R, Smalla KH, Tang H, Tashman K, Toonen RFG, Verpelli C, Reig-Viader R, Watanabe K, van Weering J, Achsel T, Ashrafi G, Asi N, Brown TC, De Camilli P, Feuermann M, Foulger RE, Gaudet P, Joglekar A, Kanellopoulos A, Malenka R, Nicoll RA, Pulido C, de Juan-Sanz J, Sheng M, Südhof TC, Tilgner HU, Bagni C, Bayés À, Biederer T, Brose N, Chua JJE, Dieterich DC, Gundelfinger ED, Hoogenraad C, Huganir RL, Jahn R, Kaeser PS, Kim E, Kreutz MR, McPherson PS, Neale BM, O'Connor V, Posthuma D, Ryan TA, Sala C, Feng G, Hyman SE, Thomas PD, Smit AB, Verhage M",", ","SYNSYS, SYNSYS, DFG, NIDA NIH HHS, NIMH NIH HHS, NINDS NIH HHS, Leibniz Foundation, The Broad Institute of MIT and Harvard, European FP People Marie Curie Action, DFG, German Federal Ministry of Education and Research, NINDS NIH HHS, NINDS NIH HHS, European Union, SYNSYS, EUROSPIN, Ramón y Cajal, EU-JPND, European Research Council, NINDS NIH HHS, NIMH NIH HHS, CERCA Program/Generalitat de Catalunya, FEDER, The Stanley Center for Psychiatric Research, European Union, NIMH NIH HHS, European Union, European Research Council, NIH, SYNSYS, SYNSYS, DFG, NIDA NIH HHS, NIMH NIH HHS, NINDS NIH HHS, Leibniz Foundation, The Broad Institute of MIT and Harvard, European FP People Marie Curie Action, DFG, German Federal Ministry of Education and Research, NINDS NIH HHS, NINDS NIH HHS, European Union, SYNSYS, EUROSPIN, Ramón y Cajal, EU-JPND, European Research Council, NINDS NIH HHS, NIMH NIH HHS, CERCA Program/Generalitat de Catalunya, FEDER, The Stanley Center for Psychiatric Research, European Union, NIMH NIH HHS, European Union, European Research Council, NIH",282.0,"Netherlands, Netherlands, Netherlands, Netherlands" +31211398,SpinachBase,0.997433722,SpinachBase,0.997433722,,0,1,http://spinachbase.org,200,United States,"(42.4444,-76.4926)",http://web.archive.org/web/20220718035147/http://spinachbase.org/,2019-01-01,"Boyce Thompson Institute for Plant Research, Ithaca, NY 14853, USA.","Collins K, Zhao K, Jiao C, Xu C, Cai X, Wang X, Ge C, Dai S, Wang Q, Wang Q, Fei Z, Zheng Y",,"Shanghai Engineering Research Center of Plant Germplasm Resources, Development and Collaborative Innovation Center of Shanghai, National Science Foundation, National Science Foundation",9.0,United States +31240309,TCR3d,0.996341427,TCR3d,0.996341427,T cell receptor structural repertoire database,0.668063611,1,http://tcr3d.ibbr.umd.edu,302,,,http://web.archive.org/web/20220809050833/https://tcr3d.ibbr.umd.edu/,2019-12-01,"University of Maryland Institute for Bioscience and Biotechnology Research, Rockville, MD, USA.","Gowthaman R, Pierce BG",,"NIGMS NIH HHS, National Institutes of Health",20.0,United States +31432762,tRic,0.994563282,tRic,0.994563282,,0,1,"http://hanlab.uth.edu/tRic/, http://bioinfo.life.hust.edu.cn/tRic","404, 200",,", ","no_wayback, no_wayback",2019-08-25,"Department of Biochemistry and Molecular Biology, McGovern Medical School at The University of Texas Health Science Center at Houston , Houston, TX, USA.","Zhang Z, Ruan H, Liu CJ, Ye Y, Gong J, Diao L, Guo AY, Han L",,Cancer Prevention and Research Institute of Texas,5.0,United States +31490686,TMB,0.958310604,TMB,0.958310604,,0,1,http://dna.engr.latech.edu,301,,,http://web.archive.org/web/20220803212711/https://dna.engr.latech.edu/,2019-09-24,None,"Sun R, Li Z, Bishop TC",,"National Institutes of Health, National Science Foundation",5.0, +31624839,T-psi-C,0.996630514,T-psi-C,0.996630514,,0,1,http://tpsic.igcz.poznan.pl,301,,,no_wayback,2020-01-01,"Institute of Human Genetics, Polish Academy of Sciences, Strzeszynska 32, 60-479, Poznan, Poland.","Sajek MP, Woźniak T, Sprinzl M, Jaruzelska J, Barciszewski J",,Polish Academy of Sciences,8.0,Poland +31672983,SPP,0.947210044,SPP,0.947210044,The Signaling Pathways Project,0.80841283,1,http://www.signalingpathways.org,200,United States,"(45.5235,-122.676)",http://web.archive.org/web/20221017022114/http://www.signalingpathways.org/,2019-10-31,"Department of Molecular and Cellular Biology, Baylor College of Medicine, Houston, Texas, 77030, USA.","Ochsner SA, Abraham D, Martin K, Ding W, McOwiti A, Kankanamge W, Wang Z, Andreano K, Hamilton RA, Chen Y, Hamilton A, Gantner ML, Dehart M, Qu S, Hilsenbeck SG, Becnel LB, Bridges D, Ma'ayan A, Huss JM, Stossi F, Foulds CE, Kralli A, McDonnell DP, McKenna NJ",,"U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases, U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases, NIDDK NIH HHS, Cancer Prevention and Research Institute of Texas, U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases, U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases, U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases (National Institute of Diabetes & Digestive & Kidney Diseases), NIDDK NIH HHS, NIDDK NIH HHS, NIDDK NIH HHS, NIDDK NIH HHS, U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases (National Institute of Diabetes & Digestive & Kidney Diseases), U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases (National Institute of Diabetes & Digestive & Kidney Diseases), U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases (National Institute of Diabetes & Digestive & Kidney Diseases), Cancer Prevention and Research Institute of Texas (Cancer Prevention Research Institute of Texas)",31.0,United States +31728519,SyntDB,0.997279763,SyntDB,0.997279763,,0,1,http://syntdb.amu.edu.pl,"HTTPConnectionPool(host='syntdb.amu.edu.pl', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20220329135205/http://syntdb.amu.edu.pl/,2020-01-01,"Adam Mickiewicz University in Poznan, Faculty of Biology, Institute of Anthropology, Laboratory of Integrative Genomics, Uniwersytetu Poznańskiego 6, 61-614 Poznan, Poland.","Bryzghalov O, Szcześniak MW, Makałowska I",,"KNOW Poznan RNA Centre, Polish Ministry of Science and Higher Education, National Science Centre",8.0,Poland +31728526,TerrestrialMetagenomeDB,0.959326208,TerrestrialMetagenomeDB,0.959326208,,0,1,http://webapp.ufz.de/tmdb,301,,,http://web.archive.org/web/20221016213121/https://webapp.ufz.de/tmdb/,2020-01-01,"Department of Environmental Microbiology, UFZ-Helmholtz Centre for Environmental Research, Leipzig, Saxony 04318, Germany.","Corrêa FB, Saraiva JP, Stadler PF, da Rocha UN",,Helmholtz Association,3.0,Germany +31831861,TRANSNAP,0.997132599,TRANSNAP,0.997132599,,0,1,http://plantomics.mind.meiji.ac.jp/nashi,"HTTPConnectionPool(host='plantomics.mind.meiji.ac.jp', port=80): Max retries exceeded with url: /nashi (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220618084631/http://plantomics.mind.meiji.ac.jp/nashi/,2019-12-12,"School of Agriculture, Meiji University, Kawasaki, 214-8571, Japan.","Koshimizu S, Nakamura Y, Nishitani C, Kobayashi M, Ohyanagi H, Yamamoto T, Yano K",,,2.0,Japan +31838261,TBPP,0.786532521,TBPP,0.786532521,,0,1,http://TBPortals.niaid.nih.gov,302,,,http://web.archive.org/web/20220901080022/https://tbportals.niaid.nih.gov/,2019-12-12,"Office of Cyber Infrastructure & Computational Biology, National Institute of Allergy and Infectious Disease, National Institutes of Health, Bethesda, MD, USA. Electronic address: gabr@niaid.nih.gov.","Gabrielian A, Engle E, Harris M, Wollenberg K, Glogowski A, Long A, Hurt DE, Rosenthal A",,,1.0,United States +31982380,TissueCoCoPUTs,0.951715197,TissueCoCoPUTs,0.951715197,,0,1,http://hive.biochemistry.gwu.edu/review/tissue_codon,301,,,no_wayback,2020-01-23,"Division of Plasma Protein Therapeutics, Office of Tissue and Advanced Therapies, Center for Biologics Evaluation and Research, Food and Drug Administration, Silver Spring, MD, 20993, USA.","Kames J, Alexaki A, Holcomb DD, Santana-Quintero LV, Athey JC, Hamasaki-Katagiri N, Katneni U, Golikov A, Ibla JC, Bar H, Kimchi-Sarfaty C",,U.S. Food and Drug Administration,7.0,United States +32027495,tautomeric,0.663365901,,0,tautomeric,0.663365901,1,http://cactus.nci.nih.gov/download/tautomer,301,,,http://web.archive.org/web/20221104204735/https://cactus.nci.nih.gov/download/tautomer/,2020-03-10,"Computer-Aided Drug Design Group, Chemical Biology Laboratory, Center for Cancer Research, National Cancer Institute, NIH, Frederick, Maryland 21702, United States.","Dhaked DK, Guasch L, Nicklaus MC",,"Intramural NIH HHS, National Cancer Institute",3.0,United States +32047897,TRlnc,0.99749732,TRlnc,0.99749732,for human regulatory information of lncRNAs,0.931201637,1,http://bio.licpathway.net/TRlnc,301,,,http://web.archive.org/web/20220923074713/http://bio.licpathway.net/TRlnc/,2021-03-01,None,"Li Y, Li X, Yang Y, Li M, Qian F, Tang Z, Zhao J, Zhang J, Bai X, Jiang Y, Zhou J, Zhang Y, Zhou L, Xie J, Li E, Wang Q, Li C",,"National Natural Science Foundation of China, National Natural Science Foundation of China, Harbin Medical University, National Natural Science Foundation of China, Natural Science Foundation of Heilongjiang Province, National Natural Science Foundation of China",9.0, +32159215,TeaMiD,0.993912682,TeaMiD,0.993912682,,0,1,http://indianteagenome.in:8080/teamid,301,,,no_wayback,2020-01-01,"Indian Council Agricultural Research-National Institute for Plant Biotechnology, Lal Bahadur Sashtri Centre, Indian Agricultural Research Institute, Pusa, New Delhi 110012, India.","Dubey H, Rawal HC, Rohilla M, Lama U, Kumar PM, Bandyopadhyay T, Gogoi M, Singh NK, Mondal TK",,"National Tea Research Foundation, Tea Board, Ministry of Commerce, Govt of India, Kolkata, India",5.0,India +32248093,TE141K1,0.987350821,TE141K1,0.987350821,Text Benchmark for,0.68329291,1,http://daooshee.github.io/TE141K,301,,,http://web.archive.org/web/20220418124010/https://daooshee.github.io/TE141K/,2021-09-02,None,"Yang S, Wang W, Liu J",,"Natural Science Foundation of Beijing Municipality, National Natural Science Foundation of China",0.0, +32265943,TOAST,0.984416008,TOAST,0.984416008,Test Of Arabidopsis Space Transcriptome,0.949631318,1,http://astrobiology.botany.wisc.edu/astrobotany-toast,301,,,http://web.archive.org/web/20220614232906/https://astrobiology.botany.wisc.edu/astrobotany-toast/,2020-03-04,"Department of Botany, University of Wisconsin, Madison, WI, United States.","Barker R, Lombardino J, Rasmussen K, Gilroy S",,,7.0,United States +32277449,TransGene Promoters,0.957293236,TGP,0.893220305,TransGene Promoters,0.957293236,1,http://wwwmgs.bionet.nsc.ru/mgs/dbases/tgp/home.html,200,,,http://web.archive.org/web/20220302125828/http://wwwmgs.bionet.nsc.ru/mgs/dbases/tgp/home.html,2020-01-01,"Institute of Cytology and Genetics, Siberian Branch, Russian Academy of Sciences, Novosibirsk, Russia. planta@bionet.nsc.ru.","Smirnova OG, Kochetov AV",,,1.0, +32286817,TeroKit,0.997192562,TeroKit,0.997192562,,0,1,http://terokit.qmclab.com,404,,,http://web.archive.org/web/20220814014320/http://terokit.qmclab.com/,2020-04-20,"Guangdong Provincial Key Laboratory of New Drug Design and Evaluation, School of Pharmaceutical Sciences, Sun Yat-sen University, Guangzhou 510006, People's Republic of China.","Zeng T, Liu Z, Zhuang J, Jiang Y, He W, Diao H, Lv N, Jian Y, Liang D, Qiu Y, Zhang R, Zhang F, Tang X, Wu R",,"Guangdong Natural Science Founds for Distinguished Young Scholars, GDAS' Project of Science and Technology Development, National Natural Science Foundation of China, National Natural Science Foundation of China",7.0,China +32351388,TCMIO,0.994452953,TCMIO,0.994452953,,0,1,http://tcmio.xielab.net,"HTTPConnectionPool(host='tcmio.xielab.net', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20220813235626/http://tcmio.xielab.net/,2020-04-15,"State Key Laboratory of Applied Microbiology Southern China, Guangdong Provincial Key Laboratory of Microbial Culture Collection and Application, Guangdong Open Laboratory of Applied Microbiology, Guangdong Institute of Microbiology, Guangdong Academy of Sciences, Guangzhou, China.","Liu Z, Cai C, Du J, Liu B, Cui L, Fan X, Wu Q, Fang J, Xie L",,National Natural Science Foundation of China,10.0,"China, China" +32427908,The Ontario Climate Data Portal,0.631285894,,0,The Ontario Climate Data Portal,0.631285894,1,http://yorku.ca/ocdp,302,,,no_wayback,2020-05-19,"Laboratory of Mathematical Parallel Systems (LAMPS), Department of Mathematics and Statistics, York University, Toronto, ON, M3J 1P3, Canada. huaiping@mathstat.yorku.ca.","Zhu H, Liu J, Zhou X, Chen X, Qiu X, Bello RL, Deng Z",,"Canadian Network for Research and Innovation in Machining Technology, Natural Sciences and Engineering Research Council of Canada",0.0,Canada +32442307,SYNERGxDB,0.997662961,SYNERGxDB,0.997662961,,0,1,http://SYNERGxDB.ca,302,Guinea,"(8.53829,-9.47282)",http://web.archive.org/web/20221102182312/https://www.synergxdb.ca/,2020-07-01,"Princess Margaret Cancer Centre, University Health Network, Toronto, Ontario M5G 0A3, Canada.","Seo H, Tkachuk D, Ho C, Mammoliti A, Rezaie A, Madani Tonekaboni SA, Haibe-Kains B",,"Genome Canada, CIHR, Ontario Institute for Cancer Research, Canadian Institutes of Health Research",6.0,Canada +32620074,TeaCoN,0.997057378,TeaCoN,0.997057378,,0,1,http://teacon.wchoda.com,200,,,http://web.archive.org/web/20220714012936/http://teacon.wchoda.com/,2020-07-03,"School of Information and Computer, Anhui Agricultural University, Hefei, China.","Zhang R, Ma Y, Hu X, Chen Y, He X, Wang P, Chen Q, Ho CT, Wan X, Zhang Y, Zhang S",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program",5.0,China +32696292,TLPdb,0.996521711,TLPdb,0.996521711,,0,1,http://tlpdb.cftri.com,"HTTPConnectionPool(host='tlpdb.cftri.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181002215903/http://tlpdb.cftri.com:80/,2020-08-01,"Department of Protein Chemistry and Technology, CFTRI, Mysore, India.","Thimme Gowda C, Purama SNS, Kammara R",,CSIR-CFTRI,1.0,India +32719467,STAGdb,0.743933082,STAGdb,0.743933082,,0,1,http://coralsnp.science.psu.edu/galaxy,301,United States,"(40.7957,-77.8618)",no_wayback,2020-07-27,"Department of Biology, The Pennsylvania State University, 208 Mueller Laboratory, University Park, PA, 16802, USA.","Kitchen SA, Von Kuster G, Kuntz KLV, Reich HG, Miller W, Griffin S, Fogarty ND, Baums IB",,"National Oceanic and Atmospheric Administration, National Science Foundation",7.0,United States +32727974,TEMPURA,0.997982502,TEMPURA,0.997982502,growth TEMPeratures of Usual and RAre prokaryotes,0.942272703,1,http://togodb.org/db/tempura,200,,,no_wayback,2020-01-01,"International Center for Biotechnology, Osaka University.","Sato Y, Okano K, Kimura H, Honda K",,,4.0, +32761141,SPDB,0.982828304,SPDB,0.982828304,Swine Pathogen Database,0.946259499,1,http://spdatabase.com:2080,"HTTPConnectionPool(host='spdatabase.com', port=2080): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200815175241/http://spdatabase.com:2080/,2020-01-01,"School of Food Science and Engineering, South China University of Technology, 381 Wushan Road, Tianhe District, Guangzhou, Guangdong Province 510641, China.","Wang X, Liu Z, Li X, Li D, Cai J, Yan H",,"Natural Science Foundation of Guangdong Province, Natural Science Foundation of China, ‘Climbing plan’ supported by Guangdong University students’ Special Fund for Scientific and Technological Innovation and Cultivation, National Key Basic Research Program, Guangdong Provincial Key Research and Development Plan Project",0.0,"China, China" +32813752,SVAD,0.986849755,SVAD,0.986849755,SCD-associated Variants Annotation Database,0.878880084,1,http://svad.mbc.nctu.edu.tw,"HTTPConnectionPool(host='svad.mbc.nctu.edu.tw', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220621154201/http://www.svad.mbc.nctu.edu.tw/,2020-08-19,"Department of Biological Science and Technology, National Chiao Tung University, Hsinchu, Taiwan, R.O.C.","Huang WC, Huang HT, Chen PY, Wang WC, Ko TM, Shrestha S, Yang CD, Tai CS, Chiew MY, Chou YP, Hu YF, Huang HD",,,0.0, +32829394,TGV,0.91157198,TGV,0.91157198,Genomic,0.657844663,1,http://psd.uohyd.ac.in/tgv,301,,,http://web.archive.org/web/20220420023624/http://psd.uohyd.ac.in/tgv/,2020-12-01,"Repository of Tomato Genomics Resources, Department of Plant Sciences.","Gupta P, Dholaniya PS, Devulapalli S, Tawari NR, Sreelakshmi Y, Sharma R",,"Department of Biotechnology, Research Fellowship of Council of Scientific and Industrial Research",1.0, +32882008,TBDB,0.995206952,TBDB,0.995206952,T-box Riboswitch Annotation Database,0.944775608,1,http://tbdb.io,301,,,http://web.archive.org/web/20221102055810/https://tbdb.io/,2021-01-01,"Department of Genetics, Harvard Medical School, Boston, MA 02115, USA.","Marchand JA, Pierson Smela MD, Jordan THH, Narasimhan K, Church GM, Church GM",,"National Science Foundation, U.S. Department of Energy, US Department of Energy",3.0,United States +32934277,StoneMod,0.996549025,StoneMod,0.996549025,kidney stone modulator database,0.977263463,1,http://www.stonemod.org,301,Thailand,"(13.6166,100.5531)",http://web.archive.org/web/20221102055810/https://www.stonemod.org/,2020-09-15,"Medical Proteomics Unit, Office for Research and Development, Faculty of Medicine Siriraj Hospital, Mahidol University, 6th Floor - SiMR Building, 2 Wanglang Road, Bangkoknoi, Bangkok, 10700, Thailand.","Sassanarakkit S, Peerapen P, Thongboonkerd V",,,1.0,Thailand +32976578,TREND-DB,0.993754983,TREND-DB,0.993754983,,0,1,http://shiny.imbei.uni-mainz.de:3838/trend-db,301,,,http://web.archive.org/web/20220120150708/http://shiny.imbei.uni-mainz.de:3838/trend-db/,2021-01-01,"Institute of Medical Biostatistics, Epidemiology and Informatics (IMBEI), University Medical Center Mainz, 55131 Mainz, Germany.","Marini F, Scherzinger D, Danckwardt S",,"Federal Ministry of Education and Research, DFG, DFG, Dr. Hella Bühler Stiftung, DFG",4.0,Germany +32976581,STAB,0.990528941,STAB,0.990528941,Spatio-Temporal cell Atlas of the human Brain,0.935739333,1,http://stab.comp-sysbio.org,301,,,http://web.archive.org/web/20220117093551/http://stab.comp-sysbio.org/,2021-01-01,"Institute of Science and Technology for Brain-Inspired Intelligence, Fudan University, Shanghai 200433, China.","Song L, Pan S, Zhang Z, Jia L, Chen WH, Chen WH, Zhao XM",,"Shanghai Science and Technology Innovation Fund, Shanghai Municipal Science and Technology Commission, National Natural Science Foundation of China, National Natural Science Foundation of China",7.0,China +32990749,TCRdb,0.997715533,TCRdb,0.997715533,,0,1,http://bioinfo.life.hust.edu.cn/TCRdb,301,,,http://web.archive.org/web/20220503180124/http://bioinfo.life.hust.edu.cn/TCRdb/,2021-01-01,"Center for Artificial Intelligence Biology, Hubei Bioinformatics & Molecular Imaging Key Laboratory, Key Laboratory of Molecular Biophysics of the Ministry of Education, College of Life Science and Technology, Huazhong University of Science and Technology; Wuhan, 430074, China.","Chen SY, Yue T, Lei Q, Guo AY",,"China Postdoctoral Science Foundation, National Natural Science Foundation of China, National Natural Science Foundation of China, China Postdoctoral Science Foundation, National Natural Science Foundation of China, National Key Research and Development Program of China",1.0,China +33035346,tRFtarget,0.997647464,tRFtarget,0.997647464,,0,1,http://trftarget.net,200,,,http://web.archive.org/web/20220407012047/http://trftarget.net/,2021-01-01,"SJTU-Yale Joint Center for Biostatistics and Data Science, Department of Bioinformatics and Biostatistics, School of Life Sciences and Biotechnology, Shanghai Jiao Tong University, Shanghai 200240, China.","Li N, Shan N, Lu L, Wang Z",,"NIAAA NIH HHS, China Scholarship Council, National Science Foundation, National Science Foundation, NCATS NIH HHS, SJTU-Yale Collaborative Research Seed Fund, National Institutes of Health, Neil Shen's SJTU Medical Research Fund",8.0,China +33074314,TransCirc,0.997567892,TransCirc,0.997567892,,0,1,http://www.biosino.org/transcirc,403,,,http://web.archive.org/web/20221018043650/https://www.biosino.org/transcirc/,2021-01-01,"Bio-Med Big Data Center, CAS Key Laboratory of Computational Biology, CAS-MPG Partner Institute for Computational Biology, Shanghai Institute of Nutrition and Health, Chinese Academy of Sciences, Shanghai 200031, China.","Huang W, Ling Y, Zhang S, Xia Q, Cao R, Fan X, Fang Z, Wang Z, Zhang G",,"National Natural Science Foundation of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, Science and Technology Commission of Shanghai Municipality, National Science and Technology Basic Resources Investigation, National Natural Science Foundation of China, Strategic Priority Research Program of Chinese Academy of Sciences",17.0,China +33095862,ThermoMutDB,0.998117328,ThermoMutDB,0.998117328,,0,1,http://biosig.unimelb.edu.au/thermomutdb,308,,,no_wayback,2021-01-01,"Institute of Agricultural Sciences, Universidade Federal dos Vales do Jequitinhonha e Mucuri.","Xavier JS, Nguyen TB, Karmarkar M, Portelli S, Rezende PM, Velloso JPL, Ascher DB, Pires DEV",,"Wellcome Trust, National Health and Medical Research Council, Coordenação de Aperfeiçoamento de Pessoal de Nível Superior, Fundação de Amparo à Pesquisa do Estado de Minas Gerais, Conselho Nacional de Desenvolvimento Científico e Tecnológico, Medical Research Council, Jack Brockhoff Foundation",10.0, +33156327,TCRD,0.885566056,TCRD,0.885566056,Central Resource Database,0.841031889,1,"http://juniper.health.unm.edu/tcrd/, http://pharos.nih.gov","200, 301",,", ","http://web.archive.org/web/20220522150512/http://juniper.health.unm.edu/tcrd/, http://web.archive.org/web/20221101122042/https://pharos.nih.gov/",2021-01-01,"National Center for Advancing Translational Science, 9800 Medical Center Drive, Rockville, MD 20850, USA.","Sheils TK, Mathias SL, Kelleher KJ, Siramshetty VB, Nguyen DT, Bologa CG, Jensen LJ, Vidović D, Koleti A, Schürer SC, Waller A, Yang JJ, Holmes J, Bocci G, Southall N, Dharkar P, Mathé E, Simeonov A, Oprea TI",,"National Institutes of Health, Novo Nordisk Foundation Center for Protein Research, NCATS NIH HHS, National Institutes of Health, Novo Nordisk Foundation, Intramural Research Program, Division of Preclinical Innovation, NIH NCATS, NCI NIH HHS",12.0,United States +33179754,TISCH,0.996230185,TISCH,0.996230185,Tumor Immune Single Cell Hub,0.991766587,1,http://tisch.comp-genomics.org,200,,,http://web.archive.org/web/20220820233253/http://tisch.comp-genomics.org/,2021-01-01,"Shanghai Putuo District People's Hospital, School of Life Science and Technology, Tongji University, Shanghai 200060, China.","Sun D, Wang J, Han Y, Dong X, Ge J, Zheng R, Shi X, Wang B, Li Z, Ren P, Sun L, Yan Y, Zhang P, Zhang F, Li T, Wang C",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",114.0,China +33258964,ThRSDB,0.989187896,ThRSDB,0.989187896,Thai Rice Starch Database,0.958283556,1,http://thairicestarch.kku.ac.th,301,,,no_wayback,2020-12-01,"Siriraj Metabolomics and Phenomics Center, Faculty of Medicine Siriraj Hospital, Mahidol University, 2 Wanglang Road, Bangkok Noi, Bangkok 10700, Thailand.","Wanichthanarak K, Thitisaksakul M",,Khon Kaen University,1.0,Thailand +33311384,Tenuipalpidae,0.925811052,Tenuipalpidae,0.925811052,,0,1,http://www.tenuipalpidae.ibilce.unesp.br,301,,,http://web.archive.org/web/20220616024318/https://www.tenuipalpidae.ibilce.unesp.br/,2020-10-29,"Departamento de Zoologia e Botânica, UNESP-Universidade Estadual Paulista, 15054-000, São José do Rio Preto, São Paulo, Brazil.. elizeu_unesp@yahoo.com.br.","Castro EB, Mesa NC, Feres RJF, DE Moraes GJ, Ochoa R, Beard JJ, Demite PR",,,0.0,Brazil +33360695,ToxinDB,0.996215522,ToxinDB,0.996215522,,0,1,http://www.rxnfinder.org/toxindb,301,,,no_wayback,2020-12-11,"CAS Key Laboratory of Computational Biology, CAS Key Laboratory of Nutrition, Metabolism and Food Safety, CAS-MPG Partner Institute for Computational Biology, Shanghai Institute of Nutrition and Health, University of Chinese Academy of Sciences, Chinese Academy of Sciences, Shanghai 200031, PR China.","Zhang D, Tian Y, Tian Y, Xing H, Liu S, Zhang H, Ding S, Cai P, Sun D, Zhang T, Hong Y, Dai H, Tu W, Chen J, Wu A, Hu QN",,"National Key Research and Development Program of China, National Natural Science Foundation of China, China Postdoctoral Science Foundation, Natural Science Foundation of Tianjin City, CAS-SAFEA International Partnership Program for Creative Research Teams, Chinese Academy of Science and Technology Service Network Planning",1.0,China +33459764,SWITCHES,0.99647893,SWITCHES,0.99647893,for topologies of,0.699981242,1,http://switches.ncbs.res.in,302,India,"(12.9634,77.5855)",http://web.archive.org/web/20220223195224/https://switches.ncbs.res.in/,2021-01-18,"Neurobiology, National Centre for Biological Sciences, Bangalore, India TIFR.","HarshaRani GV, Moza S, Ramakrishnan N, Bhalla US",,"Department of Biotechnology, Department of Biotechnology, US National Science Foundation, US National Science Foundation",0.0,India +33655207,TMSNP,0.996496975,TMSNP,0.996496975,,0,1,http://lmc.uab.es/tmsnp,302,,,no_wayback,2021-02-23,"Laboratori de Medicina Computacional, Facultat de Medicina, Universitat Autònoma de Barcelona, 08193 Bellaterra, Spain.","Garcia-Recio A, Gómez-Tamayo JC, Reina I, Campillo M, Cordomí A, Olivella M",,"European Regional Development Fund, Ministerio de Ciencia, Innovación y Universidades, ISCIII-Subdirección General de Evaluación, Ministerio de Ciencia, Innovación y Universidades",2.0,Spain +33729437,TIE,0.982593015,TIE,0.982593015,Tumor IsomiR Encyclopedia,0.978378109,1,http://isomir.ccr.cancer.gov,301,,,http://web.archive.org/web/20221104175541/http://isomir.ccr.cancer.gov/,2021-03-17,"RNA Mediated Gene Regulation Section, RNA Biology Laboratory, Center for Cancer Research, National Cancer Institute.","Bofill-De Ros X, Luke B, Guthridge R, Mudunuri U, Loss M, Gu S",,"National Institutes of Health, National Cancer Institute",2.0, +33969254,TOMATOMET,0.975067317,TOMATOMET,0.975067317,,0,1,http://metabolites.in/tomato-fruits,302,,,http://web.archive.org/web/20220617140233/http://metabolites.in/tomato-fruits/,2021-04-29,Graduate School of Agriculture Kyoto University Uji Japan.,"Ara T, Sakurai N, Takahashi S, Waki N, Suganuma H, Aizawa K, Matsumura Y, Kawada T, Shibata D",,,0.0,Japan +33985427,TarDB,0.990120292,TarDB,0.990120292,,0,1,http://www.biosequencing.cn/TarDB,301,,,http://web.archive.org/web/20220606075412/http://www.biosequencing.cn/TarDB/,2021-05-13,"College of Life Sciences, Tianjin Key Laboratory of Animal and Plant Resistance, Tianjin Normal University, Tianjin, 300387, China.","Liu J, Liu X, Zhang S, Liang S, Luan W, Ma X",,"National Natural Science Foundation of China, Tianjin Rice Industrial Technology System of China",5.0,China +34000890,TELEMED,0.973653436,TELEMED,0.973653436,telemedicine TELEMED,0.615303091,1,http://telemedicine.cimt.dk,406,,,http://web.archive.org/web/20220525104901/https://telemedicine.cimt.dk/,2021-05-18,"Center for Innovative Medical Technology, Odense University Hospital, Denmark.","Kidholm K, Svendsen IW, Yderstræde K, Ølholm AM, Rayce K, Kjølhede T",,"Novo Nordisk Fonden, Region of Southern Denmark",0.0,Denmark +"34037703, 34113986",TFcancer,0.99685061,TFcancer,0.99685061,,0,2,http://lcbb.swjtu.edu.cn/tfcancer,301,,,http://web.archive.org/web/20201022085845/http://lcbb.swjtu.edu.cn/tfcancer/,2021-05-26,"School of Life Sciences and Engineering, Southwest Jiaotong University, Chengdu, 610031, China., School of Life Sciences and Engineering, Southwest Jiaotong University, Chengdu 610031, China.","Huang Q, Tan Z, Li Y, Wang W, Lang M, Li C, Guo Z, Huang Q, Tan Z, Li Y, Wang W, Lang M, Li C, Guo Z",", ",", ",0.0,"China, China" +34154536,TeaAS,0.994054417,TeaAS,0.994054417,plants,0.769802034,1,http://www.teaas.cn/index.php,200,,,http://web.archive.org/web/20220228083958/http://www.teaas.cn/index.php,2021-06-21,"State Key Laboratory of Tea Plant Biology and Utilization, Anhui Agricultural University, West 130 Changjiang Road, Hefei, Anhui, 230036, People's Republic of China.","Mi X, Yue Y, Tang M, An Y, Xie H, Qiao D, Ma Z, Liu S, Wei C",,"the Open Fund of State Key Laboratory of Tea Plant Biology and Utilization, the National Key Research and Development Program of China, the special funds for the tea germplasm resource garden, the National Natural Science Foundation of China, the Base of Introducing Talents for Tea Plant Biology and Quality Chemistry",0.0,China +34154643,TE Hub,0.932434916,TE Hub,0.932434916,,0,1,http://tehub.org,200,,,http://web.archive.org/web/20221024143724/https://tehub.org/,2021-06-21,None,", Elliott TA, Heitkam T, Hubley R, Quesneville H, Suh A, Wheeler TJ",,"National Human Genome Research Institute, NHGRI NIH HHS, NHGRI NIH HHS",1.0, +34244719,TIDB,0.992214262,TIDB,0.992214262,Trained Immunity DataBase,0.935802007,1,http://www.ieom-tm.com/tidb,200,,,no_wayback,2021-07-01,"Department of Environmental Medicine, Tianjin Institute of Environmental and Operational Medicine, No.1 Dali Road, Heping District, Tianjin 300050, China.","Cao Y, Dong Q, Wang D, Liu Y, Zhang P, Yu X, Niu C",,Tianjin Institute of Environmental and Operational Medicine,1.0,China +34273956,TCM-Blast,0.966936181,TCM-Blast,0.966936181,traditional Chinese Medicine Basic,0.723005056,1,http://viroblast.pungentdb.org.cn/TCM-Blast/viroblast.php,200,,,http://web.archive.org/web/20220525075856/http://viroblast.pungentdb.org.cn/TCM-Blast/viroblast.php,2021-07-17,"School of Chinese Materia Medica, Beijing University of Chinese Medicine, Yangguang South Avenue, Fangshan District, Beijing, 102488, China. zhaochen@bucm.edu.cn.","Chen Z, Li J, Hou N, Zhang Y, Qiao Y",,"China Postdoctoral Science Foundation, National Natural Science Foundation of China",0.0,China +34407614,Tracking Air Pollution in China,0.963049769,TAP,0.955015858,Tracking Air Pollution in China,0.963049769,1,http://tapdata.org.cn,200,,,http://web.archive.org/web/20220528145922/http://tapdata.org.cn/,2021-08-18,"State Key Joint Laboratory of Environment Simulation and Pollution Control, School of Environment, Tsinghua University, Beijing 100084, China.","Geng G, Xiao Q, Liu S, Liu X, Cheng J, Zheng Y, Xue T, Tong D, Zheng B, Peng Y, Huang X, He K, Zhang Q",,"National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China",3.0,China +34515387,SynWiki,0.996189475,SynWiki,0.996189475,,0,1,http://synwiki.uni-goettingen.de,200,,,http://web.archive.org/web/20210915013232/http://synwiki.uni-goettingen.de/,2021-09-20,"Department of General Microbiology, Göttingen Center for Molecular Biosciences, Georg-August University Göttingen, Göttingen, Germany.","Pedreira T, Elfmann C, Singh N, Stülke J",,,2.0,Germany +34517763,TnCentral,0.997506559,TnCentral,0.997506559,,0,1,"http://tncentral.proteininformationresource.org/, http://tncentral.ncc.unesp.br","301, 301",,", ","http://web.archive.org/web/20220814081328/https://tncentral.proteininformationresource.org/, http://web.archive.org/web/20220803151414/https://tncentral.ncc.unesp.br/",2021-09-14,"Protein Information Resource, Department of Biochemistry and Molecular and Cellular Biology, Georgetown University Medical Center, Washington, DC, USA.","Ross K, Varani AM, Snesrud E, Huang H, Alvarenga DO, Zhang J, Wu C, McGann P, Chandler M",,,3.0,United States +34522848,ToppCell,0.97753334,ToppCell,0.97753334,,0,1,http://toppcell.cchmc.org,302,,,no_wayback,2021-09-10,"Division of Biomedical Informatics, Cincinnati Children's Hospital Medical Center, Cincinnati, OH 45229, USA.","Jin K, Bardes EE, Mitelpunkt A, Wang JY, Bhatnagar S, Sengupta S, Krummel DP, Rothenberg ME, Aronow BJ",,"NHLBI NIH HHS, NIMH NIH HHS, NIDDK NIH HHS, National Institutes of Health, NIDDK NIH HHS",0.0,United States +34531327,T1TAdb,0.98944664,T1TAdb,0.98944664,,0,1,http://d-lab.arna.cnrs.fr/t1tadb,"HTTPConnectionPool(host='d-lab.arna.cnrs.fr', port=80): Max retries exceeded with url: /t1tadb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20220122014448/https://d-lab.arna.cnrs.fr/t1tadb,2021-09-16,"University of Bordeaux, CNRS, INSERM, ARNA, UMR 5320, U1212, F-33000 Bordeaux, France.","Tourasse NJ, Darfeuille F",,"Institut National de la Santé et de la Recherche Médicale, Bordeaux University, and Agence Nationale de la Recherche, Bordeaux University, and Agence Nationale de la Recherche, Centre National de la Recherche Scientifique",1.0,France +34534667,TExAs,0.994255507,TExAs,0.994255507,Human Tissue-specific Exposome Atlas,0.91863317,1,http://cb.imsc.res.in/texas,301,,,http://web.archive.org/web/20211223120529/https://cb.imsc.res.in/texas/,2021-09-15,"The Institute of Mathematical Sciences (IMSc), Chennai, 600113, India; Homi Bhabha National Institute (HBNI), Mumbai, 400094, India.","Ravichandran J, Karthikeyan BS, Aparna SR, Samal A",,Max-Planck-Gesellschaft,1.0,"India, India" +34656056,SuperTCM,0.990243733,SuperTCM,0.990243733,,0,1,http://tcm.charite.de/supertcm,301,Germany,"(52.4422,13.3217)",http://web.archive.org/web/20220121051605/https://tcm.charite.de/supertcm/,2021-10-15,"Structural Bioinformatics Group, Institute for Physiology, 10115 Berlin, Germany; China Scholarship Council (CSC), Beijing 100044, China.","Chen Q, Springer L, Gohlke BO, Goede A, Dunkel M, Abel R, Gallo K, Preissner S, Eckert A, Seshadri L, Preissner R",,"Deutsche Forschungsgemeinschaft, Deutsche Forschungsgemeinschaft",0.0,"China, China, Germany" +34663591,SysInflam HuDB,0.983198355,SysInflam HuDB,0.983198355,,0,1,http://sepsis.gxbsidra.org/dm3/geneBrowser/list,200,Netherlands,"(52.3667,4.9)",http://web.archive.org/web/20221108031619/http://sepsis.gxbsidra.org/dm3/geneBrowser/list,2021-11-01,"Sidra Medicine, Doha, Qatar.","Toufiq M, Huang SSY, Boughorbel S, Alfaki M, Rinchai D, Saraiva LR, Chaussabel D, Garand M",,,2.0,Qatar +34679164,T-ARDIS,0.996051848,T-ARDIS,0.996051848,Target-Adverse Reaction Database Integrated Search,0.962873194,1,http://www.bioinsilico.org/T-ARDIS,308,Spain,"(41.387,2.1701)",no_wayback,2021-10-01,"Department of Biosciences, U Science Tech, Universitat de Vic-Universitat Central de Catalunya, Carrer Laura 13, Vic, Catalonia 08500, Spain.","Galletti C, Bota PM, Oliva B, Fernandez-Fuentes N",,MINECO,0.0,Spain +35113396,Subcellular Location of Proteins in Arabidopsis,0.81757238,,0,Subcellular Location of Proteins in Arabidopsis,0.81757238,1,"http://suba.live/, http://crop-pal.org","301, 301","Canada, Canada","(43.6532,-79.3832), (43.6532,-79.3832)","http://web.archive.org/web/20221018155227/https://suba.live/, http://web.archive.org/web/20221027204933/https://crop-pal.org/",2021-01-01,"The Centre of Excellence in Plant Energy Biology, The University of Western Australia, Crawley, WA, Australia.","Hooper CM, Castleden IR, Tanz SK, Grasso SV, Millar AH",,,0.0,"Australia, Australia" +21177655,COMBREX,0.983597279,COMBREX,0.983597279,Acids,0.534308016,1,"http://www.oxfordjournals.org/nar/database/a/, http://nar.oxfordjournals.org","301, 403",,", ","http://web.archive.org/web/20220804171609/https://www.oxfordjournals.org/nar/database/a, http://web.archive.org/web/20170107075732/https://nar.oxfordjournals.org/",2011-01-01,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA. galperin@ncbi.nlm.nih.gov","Galperin MY, Cochrane GR",,Intramural NIH HHS,28.0,United States +21245417,VeryGene,0.988496959,VeryGene,0.988496959,,0,1,http://www.verygene.com,200,United States,"(39.0997,-94.5785)",no_wayback,2011-01-18,"Institute of Genetic Engineering, Southern Medical University, Guangzhou, Guangdong Province, China.","Yang X, Ye Y, Wang G, Huang H, Yu D, Liang S",,,21.0,China +21253872,TSdb,0.996435046,TSdb,0.996435046,,0,1,http://tsdb.cbi.pku.edu.cn,"HTTPConnectionPool(host='tsdb.cbi.pku.edu.cn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to tsdb.cbi.pku.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160731075635/http://tsdb.cbi.pku.edu.cn:80/,2011-01-21,"Center for Bioinformatics, National Laboratory of Protein Engineering and Plant Genetic Engineering, College of Life Sciences, Peking University, Beijing, 100871, China.","Zhao M, Chen Y, Qu D, Qu H",,,18.0,China +21276248,ZFNGenome,0.933807492,ZFNGenome,0.933807492,inc Finger,0.642919501,1,"http://bindr.gdcb.iastate.edu/ZFNGenome, http://www.zincfingers.org","302, 200",,", ","no_wayback, http://web.archive.org/web/20221020122554/https://www.zincfingers.org/",2011-01-28,"Department of Genetics, Iowa State University, Ames, IA 50011, USA. dreyon@iastate.edu","Reyon D, Kirkpatrick JR, Sander JD, Zhang F, Voytas DF, Joung JK, Dobbs D, Coffman CR",,"NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",15.0,United States +21398669,UniCarb,0.640729487,UniCarb,0.640729487,,0,1,http://www.unicarb-db.org,405,,,no_wayback,2011-03-12,"Department of Medical Biochemistry and Cell Biology, University of Gothenburg, Sweden.","Hayes CA, Karlsson NG, Struwe WB, Lisacek F, Rudd PM, Packer NH, Campbell MP",,,66.0,Sweden +21715385,Bioinformatics Links,0.632332242,Bioinformatics Links,0.632332242,,0,1,http://bioinformatics.ca/links_directory,302,,,http://web.archive.org/web/20170608065028/https://bioinformatics.ca/links_directory/,2011-07-01,"Ontario Institute for Cancer Research, 101 College St., Suite 800, Toronto, Ontario, Canada M5G 0A3.","Brazas MD, Yim DS, Yamada JT, Ouellette BF",,,7.0,Canada +21769196,VPDB,0.997562647,VPDB,0.997562647,Viral Protein Structural Database,0.964751055,1,http://www.vpdb.bicpu.edu.in,"HTTPConnectionPool(host='www.vpdb.bicpu.edu.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220617044452/http://vpdb.bicpu.edu.in/,2011-07-06,"Centre for Bioinformatics, School of Life Sciences, Pondicherry University, Pondicherry-605014, India.","Sharma OP, Jadhav A, Hussain A, Kumar MS",,,5.0,India +21876203,UPB,0.950364868,UPB,0.950364868,Urinary Protein Biomarker,0.927442985,1,http://122.70.220.102/biomarker,"HTTPConnectionPool(host='122.70.220.102', port=80): Max retries exceeded with url: /biomarker (Caused by ConnectTimeoutError(, 'Connection to 122.70.220.102 timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20170321111041/http://122.70.220.102/biomarker/,2011-08-29,"National Key Laboratory of Medical Molecular Biology, Department of Physiology and Pathophysiology, Peking Union Medical College, 5 Dong Dan San Tiao, Beijing, China.","Shao C, Li M, Li X, Wei L, Zhu L, Yang F, Jia L, Mu Y, Wang J, Guo Z, Zhang D, Yin J, Wang Z, Sun W, Zhang Z, Gao Y",,,33.0,China +21887013,AnimalLectinDb,0.99539414,AnimalLectinDb,0.99539414,,0,1,http://www.research-bioinformatics.in,"HTTPConnectionPool(host='www.research-bioinformatics.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20141218002645/http://research-bioinformatics.in/,2011-04-22,None,"Kumar D, Mittal Y",,,1.0, +"22009731, 26519407",WheatGenome.info,0.941735923,WheatGenome.info,0.941735923,,0,2,http://www.wheatgenome.info,200,,,http://web.archive.org/web/20221017125416/http://www.wheatgenome.info/,2016-01-01,"School of Agriculture and Food Sciences and Australian Centre for Plant Functional Genomics, University of Queensland, Brisbane, QLD 4072, Australia., School of Agriculture and Food Science, University of Queensland, Hartley Teakle Building 83, St. Lucia, QLD, 4072, Australia. k.lai1@uq.edu.au.","Lai K, Berkman PJ, Lorenc MT, Duran C, Smits L, Manoli S, Stiller J, Edwards D, Lai K",", ",", ",18.0,"Australia, Australia" +"22039101, 25740498",Newt-omics,0.960447629,Newt-omics,0.960447629,,0,2,http://newt-omics.mpi-bn.mpg.de,"HTTPConnectionPool(host='newt-omics.mpi-bn.mpg.de', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to newt-omics.mpi-bn.mpg.de timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220122003940/http://newt-omics.mpi-bn.mpg.de/,2011-10-27,"Department of Cardiac Development and Remodelling, Max Planck Institute for Heart and Lung Research, Ludwigstrasse 43, D-61231 Bad Nauheim, Germany., Max-Planck-Institute for Heart and Lung Research, Ludwigstrasse 43, 61231, Bad Nauheim, Germany, Mario.Looso@mpi-bn.mpg.de.","Bruckskotten M, Looso M, Reinhardt R, Braun T, Borchardt T, Looso M, Braun T",", ",", ",11.0,"Germany, Germany" +"22064855, 27899612",YMDB,0.991479576,YMDB,0.991479576,Yeast Metabolome Database,0.972250591,2,http://www.ymdb.ca,200,,,http://web.archive.org/web/20221102101126/http://www.ymdb.ca/,2016-11-28,"Department of Computing Science, Department of Biological Sciences, University of Alberta, Edmonton, AB T6G 2E8, Canada., Departments of Computing Science, University of Alberta, Edmonton, AB T6G 2E8, Canada.","Jewison T, Knox C, Neveu V, Djoumbou Y, Guo AC, Lee J, Liu P, Mandal R, Krishnamurthy R, Sinelnikov I, Wilson M, Wishart DS, Ramirez-Gaona M, Marcu A, Pon A, Guo AC, Sajed T, Wishart NA, Karu N, Djoumbou Feunang Y, Arndt D, Wishart DS",", ","Canadian Institutes of Health Research, CIHR",119.0,"Canada, Canada" +22064863,EcoliWiki,0.997703433,EcoliWiki,0.997703433,,0,1,"http://porteco.org, http://ecoliwiki.net","200, 302",,", ","http://web.archive.org/web/20221105022238/http://porteco.org/, http://web.archive.org/web/20080828091310/http://ecoliwiki.net/",2011-11-07,"Department of Biochemistry and Biophysics, Texas Agrilife Research, Texas A&M University College Station, TX 77843, USA.","McIntosh BK, Renfro DP, Knapp GS, Lairikyengbam CR, Liles NM, Niu L, Supak AM, Venkatraman A, Zweifel AE, Siegele DA, Hu JC",,"NIGMS NIH HHS, NIGMS NIH HHS",21.0,United States +22067444,zfishbook,0.99548614,zfishbook,0.99548614,Zebrafish,0.59932977,1,http://zfishbook.org,301,,,http://web.archive.org/web/20211219152416/http://zfishbook.org/,2011-11-08,"Department of Biochemistry and Molecular Biology, Mayo Clinic, Rochester, MN 55905, USA.","Clark KJ, Argue DP, Petzold AM, Ekker SC",,"NIDA NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIDDK NIH HHS, NIGMS NIH HHS, NIDDK NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIDA NIH HHS",12.0,United States +"22067448, 26578559",VFDB,0.998255268,VFDB,0.998255268,virulence factor database,0.889513955,2,http://www.mgc.ac.cn/VFs,301,China,"(39.9175,116.362)",http://web.archive.org/web/20221016084409/http://www.mgc.ac.cn/VFs/,2015-11-17,"State Key Laboratory for Molecular Virology and Genetic Engineering, Institute of Pathogen Biology, Chinese Academy Medical Sciences and Peking Union Medical College, Beijing 100176, China., MOH Key Laboratory of Systems Biology of Pathogens, Institute of Pathogen Biology, Chinese Academy of Medical Sciences and Peking Union Medical College, Beijing 100176, China.","Chen L, Xiong Z, Sun L, Yang J, Jin Q, Chen L, Zheng D, Liu B, Yang J, Jin Q",", ",", ",708.0,"China, China" +"22067451, 24178034",Intrinsically Disordered proteins with Extensive Annotations and Literature,0.993429282,IDEAL,0.991460741,Intrinsically Disordered proteins with Extensive Annotations and Literature,0.993429282,2,http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL,"HTTPConnectionPool(host='www.ideal.force.cs.is.nagoya-u.ac.jp', port=80): Max retries exceeded with url: /IDEAL (Caused by ConnectTimeoutError(, 'Connection to www.ideal.force.cs.is.nagoya-u.ac.jp timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200502160132/http://www.ideal.force.cs.is.nagoya-u.ac.jp:80/IDEAL/,2011-11-08,"Faculty of Engineering, Maebashi Institute of Technology, Maebashi, Gunma 371-0816, Japan. sfukuchi@maebashi-it.ac.jp, Faculty of Engineering, Maebashi Institute of Technology, Maebashi 371-0816, Japan, Graduate School of Information Science, Nagoya University, Nagoya 464-8601, Japan, HOLONICS Corporation, Numazu 411-0803, Japan and Graduate School of Pharmaceutical Sciences, Nagoya University, Nagoya 464-8601, Japan.","Fukuchi S, Sakamoto S, Nobe Y, Murakami SD, Amemiya T, Hosoda K, Koike R, Hiroaki H, Ota M, Fukuchi S, Amemiya T, Sakamoto S, Nobe Y, Hosoda K, Kado Y, Murakami SD, Koike R, Hiroaki H, Ota M",", ",", ",94.0,"Japan, Japan, Japan, Japan, Japan" +"22075990, 23175610, 24285300, 25348401, 26578600, 27899570, 29092072, 30407599, 33231642",MGD,0.99467206,MGD,0.99467206,Mouse Genome Database,0.976606995,9,http://www.informatics.jax.org,200,,,http://web.archive.org/web/20221101073803/http://www.informatics.jax.org/,2021-01-01,"The Jackson Laboratory, Bar Harbor, ME 04609, USA. janan.eppig@jax.org, The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609 USA. carol.bult@jax.org, Bioinformatics and Computational Biology, The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA., The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA janan.eppig@jax.org., The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA carol.bult@jax.org., The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA judith.blake@jax.org., The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA., The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA., The Jackson Laboratory, Bar Harbor, ME, USA.","Eppig JT, Blake JA, Bult CJ, Kadin JA, Richardson JE, , Bult CJ, Eppig JT, Blake JA, Kadin JA, Richardson JE, , Blake JA, Bult CJ, Eppig JT, Kadin JA, Richardson JE, , Eppig JT, Blake JA, Bult CJ, Kadin JA, Richardson JE, , Bult CJ, Eppig JT, Blake JA, Kadin JA, Richardson JE, , Blake JA, Eppig JT, Kadin JA, Richardson JE, Smith CL, Bult CJ, , Smith CL, Blake JA, Kadin JA, Richardson JE, Bult CJ, , Bult CJ, Blake JA, Smith CL, Kadin JA, Richardson JE, , Blake JA, Baldarelli R, Kadin JA, Richardson JE, Smith CL, Bult CJ, ",", , , , , , , , ","NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NHGRI NIH HHS, National Institutes of Health, National Institutes of Health, NHGRI NIH HHS, National Institutes of Health, NHGRI NIH HHS, National Human Genome Research Institute, National Institutes of Health, NHGRI NIH HHS, National Human Genome Research Institute, NHGRI NIH HHS",1181.0,"United States, United States, United States, United States, United States, United States, United States, United States, United States" +"22080546, 26657633, 29190397, 33166387, 23180798",International Nucleotide Sequence Database Collaboration,0.915257774,INSDC,0.902326147,International Nucleotide Sequence Database Collaboration,0.915257774,5,http://www.insdc.org,301,,,http://web.archive.org/web/20221104193531/https://www.insdc.org/,2021-01-01,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 45 Center Drive, Bethesda, MD 20892, USA. mizrachi@ncbi.nlm.nih.gov, European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK cochrane@ebi.ac.uk., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA., Bioinformation and DDBJ Center, National Institute of Genetics, Mishima, Shizuoka 411-8540, Japan., DDBJ Center, National Institute of Genetics, Research Organization for Information and Systems, Yata, Mishima 411-8510, Japan. yn@nig.ac.jp","Karsch-Mizrachi I, Nakamura Y, Cochrane G, , Cochrane G, Karsch-Mizrachi I, Takagi T, , Karsch-Mizrachi I, Takagi T, Cochrane G, , Arita M, Karsch-Mizrachi I, Cochrane G, Nakamura Y, Cochrane G, Karsch-Mizrachi I, ",", , , , ","Biotechnology and Biological Sciences Research Council, Intramural NIH HHS, NHGRI NIH HHS, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Intramural NIH HHS, Wellcome Trust, , European Commission Horizon 2020, National Bioscience Database Center, Gordon and Betty Moore Foundation, National Institutes of Health, European Molecular Biology Laboratory, NLM NIH HHS, United Kingdom Biotechnology, Biological Sciences Research Council, Japan Agency for Medical Research and Development, Ministry of Education, Culture, Sports, Science and Technology, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Intramural NIH HHS, Wellcome Trust",294.0,"Japan, Japan, United States, United States" +22080555,UCSC Archaeal Genome Browser,0.798626341,arCOG,0.772361755,UCSC Archaeal Genome Browser,0.798626341,1,http://archaea.ucsc.edu,200,United States,"(36.9921,-122.0577)",http://web.archive.org/web/20220629165143/http://archaea.ucsc.edu/,2011-11-12,"Department of Biomolecular Engineering, University of California, Santa Cruz, 1156 High Street, SOE-2, Santa Cruz, CA 95064, USA.","Chan PP, Holmes AD, Smith AM, Tran D, Lowe TM",,,55.0,United States +22102575,YeTFaSCo,0.996380448,YeTFaSCo,0.996380448,The Yeast Transcription Factor Specificity Compendium,0.867617818,1,http://yetfasco.ccbr.utoronto.ca,200,,,http://web.archive.org/web/20220617230200/http://yetfasco.ccbr.utoronto.ca/,2011-11-18,"Department of Molecular Genetics, Terrence Donnelly Centre for Cellular and Biomolecular Research, University of Toronto, Toronto, Canada.","de Boer CG, Hughes TR",,"Canadian Institutes of Health Research, Canadian Institutes of Health Research",91.0,Canada +22102589,UniPathway,0.997355998,UniPathway,0.997355998,,0,1,http://www.unipathway.org,"HTTPConnectionPool(host='www.unipathway.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200804012252/http://www.unipathway.org:80/,2011-11-18,"Swiss-Prot Group, SIB Swiss Institute of Bioinformatics, CMU, 1 rue Michel-Servet, CH-1211 Geneva 4, Switzerland. anne.morgat@isb-sib.ch","Morgat A, Coissac E, Coudert E, Axelsen KB, Keller G, Bairoch A, Bridge A, Bougueleret L, Xenarios I, Viari A",,"European Commission FP7, European Commission FP7",59.0,Switzerland +22123743,PASS2.4,0.720472276,PASS2.4,0.720472276,,0,1,http://caps.ncbs.res.in/pass2,301,,,http://web.archive.org/web/20221021201220/http://caps.ncbs.res.in/pass2/,2011-11-28,"National centre for Biological Sciences, TIFR, GKVK campus, Bangalore 560 065, Karnataka, India.","Gandhimathi A, Nair AG, Sowdhamini R",,,11.0,India +"22135296, 25510499",VectorBase,0.996112347,VectorBase,0.996112347,,0,2,http://www.vectorbase.org,301,United States,"(33.8519,-83.412)",no_wayback,2014-12-15,"European Bioinformatics Institute EMBL, Wellcome Trust Genome Campus, Hinxton CB10 1SD, UK. kmegy@ebi.ac.uk, Department of Biological Sciences, University of Notre Dame, Notre Dame, IN 46556, USA.","Megy K, Emrich SJ, Lawson D, Campbell D, Dialynas E, Hughes DS, Koscielny G, Louis C, Maccallum RM, Redmond SN, Sheehan A, Topalis P, Wilson D, , Giraldo-Calderón GI, Emrich SJ, MacCallum RM, Maslen G, Dialynas E, Topalis P, Ho N, Gesing S, , Madey G, Collins FH, Lawson D",", ","PHS HHS, European Commission FP7, PHS HHS, European Commission FP7, European Commission FP7, PHS HHS, PHS HHS, PHS HHS",399.0,United States +"22135297, 29156006, 25416803",TarBase,0.995145559,TarBase,0.995145559,,0,3,http://www.microrna.gr/tarbase,301,,,no_wayback,2018-01-01,"IMIS Institute, 'Athena' Research Center, 11524 Athens, Greece., DIANA-Lab, Department of Electrical & Computer Engineering, University of Thessaly, 382 21 Volos, Greece., DIANA-Lab, Department of Electrical & Computer Engineering, University of Thessaly, 382 21 Volos, Greece Laboratory for Experimental Surgery and Surgical Research 'N.S. Christeas', Medical School of Athens, University of Athens, 11527 Athens, Greece arhatzig@uth.gr dalamag@imis.athena-innovation.gr ivlachos@lessr.eu.","Vergoulis T, Vlachos IS, Alexiou P, Georgakilas G, Maragkakis M, Reczko M, Gerangelos S, Koziris N, Dalamagas T, Hatzigeorgiou AG, Karagkouni D, Paraskevopoulou MD, Chatzopoulos S, Vlachos IS, Tastsoglou S, Kanellos I, Papadimitriou D, Kavakiotis I, Maniou S, Skoufos G, Vergoulis T, Dalamagas T, Hatzigeorgiou AG, Vlachos IS, Paraskevopoulou MD, Karagkouni D, Georgakilas G, Vergoulis T, Kanellos I, Anastasopoulos IL, Maniou S, Karathanou K, Kalfakakou D, Fevgas A, Dalamagas T, Hatzigeorgiou AG",", , ",", , ",1037.0,"Greece, Greece, Greece, Greece" +"22135298, 25514926",PhosphoSitePlus,0.998392642,PhosphoSitePlus,0.998392642,,0,2,http://www.phosphosite.org,403,,,http://web.archive.org/web/20221101212740/https://www.phosphosite.org/,2011-12-01,"Cell Signaling Technology, 3 Trask Lane, Danvers, MA 01923, USA. phornbeck@cellsignal.com, Cell Signaling Technology, 3 Trask Lane, Danvers, MA 01923, USA phornbeck@cellsignal.com.","Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M, Hornbeck PV, Zhang B, Murray B, Kornhauser JM, Latham V, Skrzypek E",", ","NIGMS NIH HHS, NCI NIH HHS, NIAAA NIH HHS, NCI NIH HHS, NCRR NIH HHS, NIAAA NIH HHS, NHLBI NIH HHS, NIGMS NIH HHS, NIAAA NIH HHS, NHLBI NIH HHS, NCI NIH HHS, NCI NIH HHS, NIAAA NIH HHS",1972.0,"United States, United States" +"22155609, 27048349",GeneCards,0.995489895,GeneCards,0.995489895,Human Integrated Protein Expression Database,0.944921303,2,http://www.genecards.org,403,,,http://web.archive.org/web/20221101114702/https://www.genecards.org/,2016-04-05,"Department of Molecular Genetics, Weizmann Institute of Science, Rehovot, 76100, Israel. gil.stelzer@weizmann.ac.il, nan","Stelzer G, Dalah I, Stein TI, Satanower Y, Rosen N, Nativ N, Oz-Levi D, Olender T, Belinky F, Bahir I, Krug H, Perco P, Mayer B, Kolker E, Safran M, Lancet D, nan",", nan",", nan",111.0,Israel +22160653,vHoT,0.990057766,vHoT,0.990057766,viral microRNA host target,0.892197204,1,http://dna.korea.ac.kr/vhot,"HTTPConnectionPool(host='dna.korea.ac.kr', port=80): Max retries exceeded with url: /vhot (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2011-12-08,"School of Electrical Engineering, Korea University, #207 Engineering Bldg., Anamdong, Seongbukgu, Seoul 136-713, Korea.","Kim H, Park S, Min H, Yoon S",,,9.0, +"22218860, 24214987",UniHI,0.993736625,UniHI,0.993736625,Unified Human Interactome,0.957470024,2,http://www.unihi.org,503,,,http://web.archive.org/web/20220527064448/http://unihi.org/,2013-11-08,"Charité, Humboldt University, Berlin, Germany., Centre for Molecular and Structural Biomedicine, University of Algarve, Faro, Portugal and Institute for Theoretical Biology, Charité, Humboldt-University, Berlin, Germany.","Chaurasia G, Futschik M, Kalathur RK, Pinto JP, Hernández-Prieto MA, Machado RS, Almeida D, Chaurasia G, Futschik ME",", ",", ",38.0,"Germany, Germany, Portugal" +22325123,YADAMP,0.996951342,YADAMP,0.996951342,antimicrobial peptide database,0.960818076,1,http://www.yadamp.unisa.it,302,,,http://web.archive.org/web/20220523194522/http://yadamp.unisa.it/,2012-02-09,"Department of Pharmaceutical and Biomedical Sciences, University of Salerno, Via Ponte don Melillo, 84084 Fisciano, Salerno, Italy. piotto@unisa.it","Piotto SP, Sessa L, Concilio S, Iannelli P",,"MIUR (Italy), University of Salerno (Salerno, Italy)",57.0,Italy +22428748,ZeBase,0.995802045,ZeBase,0.995802045,,0,1,http://zebase.bio.purdue.edu,301,,,http://web.archive.org/web/20220524123648/https://zebase.bio.purdue.edu/,2012-03-01,"Department of Biological Sciences, Purdue University, West Lafayette, Indiana, USA.","Hensley MR, Hassenplug E, McPhail R, Leung YF",,,4.0,United States +22481888,Xylella,0.679942429,Xylella,0.679942429,,0,1,http://www.xylella.lncc.br,200,,,no_wayback,2012-01-01,"Genome and Transposable Elements Laboratory, Departamento de Botânica, Instituto de Biociências, Universidade de São Paulo, São Paulo, SP, Brazil.","Varani AM, Monteiro-Vitorello CB, de Almeida LG, Souza RC, Cunha OL, Lima WC, Civerolo E, Van Sluys MA, Vasconcelos AT",,,9.0,Brazil +22493537,BacterialLectinDb,0.95345632,BacterialLectinDb,0.95345632,,0,1,http://www.research-bioinformatics.in,"HTTPConnectionPool(host='www.research-bioinformatics.in', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20141218002645/http://research-bioinformatics.in/,2012-03-31,None,"Kumar D, Mittal Y",,,0.0, +22523575,TumorHoPe,0.990183711,TumorHoPe,0.990183711,,0,1,http://crdd.osdd.net/raghava/tumorhope,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/tumorhope (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220528004706/http://crdd.osdd.net/raghava/tumorhope/,2012-04-16,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Sector 39A, Chandigarh, India.","Kapoor P, Singh H, Gautam A, Chaudhary K, Kumar R, Raghava GP",,,50.0,India +"22661982, 28499913",Soybean Proteome Database,0.979806413,SPD,0.971009135,Soybean Proteome Database,0.979806413,2,http://proteome.dc.affrc.go.jp/Soybean,"HTTPConnectionPool(host='proteome.dc.affrc.go.jp', port=80): Max retries exceeded with url: /Soybean (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200217015013/http://proteome.dc.affrc.go.jp:80/Soybean/,2012-05-30,"Tsukuba Division, Mitsubishi Space Software Co., Ltd, Tsukuba, Japan., Institute of Crop Science, National Agriculture and Food Research Organization, Tsukuba 305-8518, Japan; Faculty of Life and Environmental Sciences, University of Tsukuba, Tsukuba 305-8572, Japan. Electronic address: komatsu.setsuko.fu@u.tsukuba.ac.jp.","Ohyanagi H, Sakata K, Komatsu S, Komatsu S, Wang X, Yin X, Nanjo Y, Ohyanagi H, Sakata K",", ",", JSPS",25.0,"Japan, Japan, Japan" +22701464,pep2pro,0.995340317,pep2pro,0.995340317,,0,1,"http://www.ebi.ac.uk/pride, http://gator.masc-proteomics.org","301, 302",,", ","http://web.archive.org/web/20221030021940/https://www.ebi.ac.uk/pride/, http://web.archive.org/web/20191003101614/http://gator.masc-proteomics.org:80/",2012-06-11,"Plant Biotechnology, Department of Biology, ETH Zurich, Zurich, Switzerland.","Hirsch-Hoffmann M, Gruissem W, Baerenfaller K",,,16.0,"Switzerland, Ethiopia" +22702248,WikiCell,0.997400999,WikiCell,0.997400999,,0,1,http://www.wikicell.org,301,,,no_wayback,2012-06-01,"CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing, China.","Zhao D, Wu J, Zhou Y, Gong W, Xiao J, Yu J",,,10.0,China +22715306,VPTD,0.991008282,VPTD,0.991008282,vegetable phytochemicals and their target database,0.914648957,1,http://www/vptd.in,302,,,no_wayback,2012-05-31,None,"Kriushnapriya S, Dhinagar K, Malathy S, Mani K",,,1.0, +22735743,VIP DB,0.981878147,VIP DB,0.981878147,Protein domain DataBase,0.806038454,1,http://vipdb.cgu.edu.tw,"HTTPConnectionPool(host='vipdb.cgu.edu.tw', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to vipdb.cgu.edu.tw timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20130418205144/http://vipdb.cgu.edu.tw,2012-06-24,"Molecular Medicine Research Center, Chang Gung University, Taoyuan, Taiwan. g39328001@ym.edu.tw","Chen TW, Gan RR, Wu TH, Lin WC, Tang P",,"National Science Council, Academia Sinica",0.0, +22736877,KB,0.750262052,KB,0.750262052,,0,1,"http://kb.phenoscape.org, http://zfin.org","308, 405",,", ","http://web.archive.org/web/20221017035425/https://kb.phenoscape.org/, http://web.archive.org/web/20221102162810/https://zfin.org/",2012-05-21,"Department of Biology, 414 East Clark Street, University of South Dakota, Vermillion, South Dakota, United States of America.","Mabee BP, Balhoff JP, Dahdul WM, Lapp H, Midford PE, Vision TJ, Westerfield M",,"NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",30.0,United States +22748168,PharmDB,0.995387569,PharmDB,0.995387569,,0,1,"http://www.i-pharm.org/, http://biomart.i-pharm.org","200, 403",,", ","no_wayback, http://web.archive.org/web/20170620133912/http://biomart.i-pharm.org/",2012-07-02,"Medicinal Bioconvergence Research Center, College of Pharmacy, Seoul National University, Seoul, Korea.","Lee HS, Bae T, Lee JH, Kim DG, Oh YS, Jang Y, Kim JT, Lee JJ, Innocenti A, Supuran CT, Chen L, Rho K, Kim S",,,39.0, +"22828716, 28437484",ECG-ViEW,0.982872033,ECG-ViEW,0.982872033,Electrocardiogram Vigilance with Electronic data Warehouse II,0.880449582,2,http://www.ecgview.org,"HTTPConnectionPool(host='www.ecgview.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.ecgview.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220625210636/http://ecgview.org/,2012-07-25,"Department of Biomedical Informatics, Ajou University School of Medicine, Suwon, Korea., Department of Biomedical Informatics, Ajou University School of Medicine, Suwon, South Korea.","Park MY, Yoon D, Choi NK, Lee J, Lee K, Lim HS, Park BJ, Kim JH, Park RW, Kim YG, Shin D, Park MY, Lee S, Jeon MS, Yoon D, Park RW",", ",", Korea Health Industry Development Institute, Korea Health Industry Development Institute",16.0, +22881376,UCL LDLR,0.977101341,UCL LDLR,0.977101341,lipoprotein receptor gene familial hypercholesterolemia variant database,0.635834286,1,http://grenada.lumc.nl/LOVD2/UCL-Heart/home.php?select_db=LDLR,404,,,no_wayback,2012-09-01,"British Heart Foundation Laboratories, Centre for Cardiovascular Genetics, Institute of Cardiovascular Sciences, University College London, London, UK.","Usifo E, Leigh SE, Whittall RA, Lench N, Taylor A, Yeats C, Orengo CA, Martin AC, Celli J, Humphries SE",,"British Heart Foundation, British Heart Foundation",108.0, +"23066107, 26590405",TSGene,0.985564858,TSGene,0.985564858,Tumor Suppressor Gene database,0.982487644,2,http://bioinfo.mc.vanderbilt.edu/TSGene,"HTTPConnectionPool(host='bioinfo.mc.vanderbilt.edu', port=80): Max retries exceeded with url: /TSGene (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20160408184553/http://bioinfo.mc.vanderbilt.edu:80/TSGene/,2015-11-20,"Department of Biomedical Informatics, Vanderbilt University School of Medicine, Nashville, TN 37232, USA., School of Engineering, Faculty of Science, Health, Education and Engineering, University of the Sunshine Coast, Maroochydore DC, Queensland 4558, Australia.","Zhao M, Sun J, Zhao Z, Zhao M, Kim P, Mitra R, Zhao J, Zhao Z",", ","NLM NIH HHS, NCI NIH HHS, NLM NIH HHS, NCI NIH HHS, NCI NIH HHS, NCI NIH HHS, NLM NIH HHS",331.0,"Australia, United States" +23093589,ValidNESs,0.977603436,ValidNESs,0.977603436,,0,1,http://validness.ym.edu.tw,"HTTPConnectionPool(host='validness.ym.edu.tw', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to validness.ym.edu.tw timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200226035552/http://validness.ym.edu.tw:80/,2012-10-22,"Department of Life Science, National Taiwan University, Taipei 106, Taiwan.","Fu SC, Huang HC, Horton P, Juan HF",,,41.0, +23110448,YeastIP,0.997585833,YeastIP,0.997585833,,0,1,http://genome.jouy.inra.fr/yeastip,301,,,http://web.archive.org/web/20220120222032/http://genome.jouy.inra.fr/yeastip/,2012-12-17,"INRA, UMR1319 Micalis, CIRM-Levures, Thiverval-Grignon, France; AgroParisTech, UMR Micalis, Thiverval-Grignon, France.","Weiss S, Samson F, Navarro D, Casaregola S",,,25.0,"France, France" +"23110975, 29761459, 34698891",MGI,0.983090103,MGI,0.983090103,Mouse Genome Informatics,0.812098155,3,http://www.informatics.jax.org,200,,,http://web.archive.org/web/20221101073803/http://www.informatics.jax.org/,2021-10-26,"The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA. harold.drabkin@jax.org, Mouse Genome Informatics, The Jackson Laboratory, Bar Harbor, ME, USA., The Jackson Laboratory, Bar Harbor, ME, USA. martin.ringwald@jax.org.","Drabkin HJ, Blake JA, , Law M, Shaw DR, Ringwald M, Richardson JE, Baldarelli RM, Blake JA, Kadin JA, Smith C, Bult CJ",", , ","NHGRI NIH HHS, NHGRI NIH HHS, NICHD NIH HHS, NCI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NICHD NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NICHD NIH HHS, NHGRI NIH HHS, national institute of child health and human development, NHGRI NIH HHS, national human genome research institute, national human genome research institute, NICHD NIH HHS, NHGRI NIH HHS",25.0,"United States, United States, United States" +"23125366, 25313157",Xenbase,0.995507419,Xenbase,0.995507419,Xenopus model organism,0.689142883,2,http://www.xenbase.org,"HTTPConnectionPool(host='www.xenbase.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.xenbase.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220122220332/http://www.xenbase.org/,2014-10-13,"Division of Developmental Biology, Cincinnati Children's Research Foundation, 3333 Burnet Avenue, S3.620, Cincinnati, OH 45229-3039, USA., University of Calgary-Computer Science, Calgary, Alberta, Canada.","James-Zorn C, Ponferrada VG, Jarabek CJ, Burns KA, Segerdell EJ, Lee J, Snyder K, Bhattacharyya B, Karpinka JB, Fortriede J, Bowes JB, Zorn AM, Vize PD, Karpinka JB, Fortriede JD, Burns KA, James-Zorn C, Ponferrada VG, Lee J, Karimi K, Zorn AM, Vize PD",", ","NICHD NIH HHS, NICHD NIH HHS, NICHD NIH HHS, NICHD NIH HHS, NICHD NIH HHS, NICHD NIH HHS",124.0,"Canada, United States" +23134687,YGD,0.857634743,YGD,0.857634743,Yak Genome Database,0.853846967,1,http://me.lzu.edu.cn/yak,"HTTPConnectionPool(host='me.lzu.edu.cn', port=80): Max retries exceeded with url: /yak (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170511142509/http://me.lzu.edu.cn:80/yak/,2012-11-07,"State Key Laboratory of Grassland Agro-ecosystem, College of Life Science, Lanzhou University, Lanzhou, China.","Hu Q, Ma T, Wang K, Xu T, Liu J, Qiu Q",,,22.0,China +23161674,Voronoia4RNA,0.995378757,Voronoia4RNA,0.995378757,,0,1,http://proteinformatics.charite.de/voronoia4rna,404,,,no_wayback,2012-11-17,"Charité, Institute of Medical Physics and Biophysics, Proteinformatics Group, Ziegelstr. 7/9, 10117, Berlin, Germany.","Ismer J, Rose AS, Tiemann JK, Goede A, Rother K, Hildebrand PW",,,4.0,Germany +23172286,Worm Developmental Dynamics Database,0.896067631,WDDD,0.479633838,Worm Developmental Dynamics Database,0.896067631,1,http://so.qbic.riken.jp/wddd,301,,,http://web.archive.org/web/20220204051531/http://so.qbic.riken.jp/wddd/,2012-11-20,"Laboratory for Developmental Dynamics, RIKEN Quantitative Biology Center, Kobe 650-0047, Japan.","Kyoda K, Adachi E, Masuda E, Nagai Y, Suzuki Y, Oguro T, Urai M, Arai R, Furukawa M, Shimada K, Kuramochi J, Nagai E, Onami S",,,8.0,Japan +23172288,UUCD,0.997053325,UUCD,0.997053325,,0,1,http://uucd.biocuckoo.org,200,,,http://web.archive.org/web/20221103162012/https://uucd.biocuckoo.org/,2012-11-20,"Department of Biomedical Engineering, College of Life Science and Technology, Huazhong University of Science and Technology, Luoyu Road 1037, Wuhan, Hubei 430074, China.","Gao T, Liu Z, Wang Y, Cheng H, Yang Q, Guo A, Ren J, Xue Y",,,59.0,China +23175606,WholeCellKB,0.995857,WholeCellKB,0.995857,,0,1,http://wholecellkb.stanford.edu,200,,,http://web.archive.org/web/20220513215212/http://wholecellkb.stanford.edu/,2012-11-21,"Graduate Program in Biophysics, Stanford University, 318 Campus Drive West, Stanford, CA 94305, USA.","Karr JR, Sanghvi JC, Macklin DN, Arora A, Covert MW",,"NCCDPHP CDC HHS, NLM NIH HHS, NIH HHS",21.0,United States +23180778,ZInC,0.982900321,ZInC,0.982900321,The Zebrafish Insertion Collection,0.843595777,1,"http://research.nhgri.nih.gov/ZInC/, http://zfin.org","302, 405",,", ","http://web.archive.org/web/20220926023610/https://research.nhgri.nih.gov/zinc/, http://web.archive.org/web/20221102162810/https://zfin.org/",2012-11-24,"Genome Technology Branch, National Human Genome Research Institute, National Institutes of Health, Bethesda, MD 20892, USA.","Varshney GK, Huang H, Zhang S, Lu J, Gildea DE, Yang Z, Wolfsberg TG, Lin S, Burgess SM",,"NIDDK NIH HHS, Intramural NIH HHS",15.0,United States +23180786,WormQTL,0.998097301,WormQTL,0.998097301,,0,1,"http://www.wormqtl.org, http://www.rqtl.org","302, 301",,", ","http://web.archive.org/web/20170915003022/http://wormqtl.org/, http://web.archive.org/web/20221007175226/https://rqtl.org/",2012-11-24,"Laboratory of Nematology, Wageningen University, Wageningen 6708 PB, The Netherlands.","Snoek LB, Van der Velde KJ, Arends D, Li Y, Beyer A, Elvin M, Fisher J, Hajnal A, Hengartner MO, Poulin GB, Rodriguez M, Schmid T, Schrimpf S, Xue F, Jansen RC, Kammenga JE, Swertz MA",,,20.0,Netherlands +"23180796, 26602686, 30407520",QTLdb,0.998483792,QTLdb,0.998483792,Animal QTL database,0.808126822,3,http://www.animalgenome.org/QTLdb,302,,,http://web.archive.org/web/20130604100131/http://www.animalgenome.org:80/QTLdb/,2015-11-23,"Department of Animal Science and Center for Integrated Animal Genomics, Iowa State University, 2255 Kildee Hall, Ames, IA 50011, USA. zhu@iastate.edu, Department of Animal Science, Iowa State University, 2255 Kildee Hall, Ames, IA 50011, USA zhu@iastate.edu., Department of Animal Science, Iowa State University, 2255 Kildee Hall, Ames, IA 50011, USA.","Hu ZL, Park CA, Wu XL, Reecy JM, Hu ZL, Park CA, Reecy JM, Hu ZL, Park CA, Reecy JM",", , ",", , USDA-AFRI, National Animal Genome Research",409.0,"United States, United States, United States" +23193254,UCNEbase,0.997873008,UCNEbase,0.997873008,,0,1,http://ccg.vital-it.ch/UCNEbase,301,Switzerland,"(46.5191,6.56676)",http://web.archive.org/web/20180317103110/http://ccg.vital-it.ch:80/UCNEbase/,2012-11-27,"Swiss Institute for Experimental Cancer Research (ISREC), School of Life Sciences, Swiss Federal Institute of Technology (EPFL), Lausanne, Switzerland. slavica.dimitrieva@epfl.ch","Dimitrieva S, Bucher P",,Swiss National Science Foundation,59.0,Switzerland +23200141,miRT,0.972974837,miRT,0.972974837,,0,1,http://www.isical.ac.in,302,,,http://web.archive.org/web/20220925040049/https://www.isical.ac.in/,2012-09-29,"Machine Intelligence Unit, Indian Statistical Institute, Kolkata 700 108, India.","Bhattacharyya M, Das M, Bandyopadhyay S",,,18.0,India +"23203880, 25398902, 27899625",YM500,0.994998276,YM500,0.994998276,,0,3,http://ngs.ym.edu.tw/ym500,301,,,http://web.archive.org/web/20220606174937/http://ngs.ym.edu.tw/ym500/,2014-11-14,"Division of Pediatric Neurosurgery, Neurological Institute, Taipei Veterans General Hospital, Taipei 11217, Taiwan., Research Center for Tumor Medical Science, China Medical University, Taichung 40402, Taiwan., Institute of Biomedical Informatics, National Yang-Ming University, Taipei 11221, Taiwan.","Cheng WC, Chung IF, Huang TS, Chang ST, Sun HJ, Tsai CF, Liang ML, Wong TT, Wang HW, Cheng WC, Chung IF, Tsai CF, Huang TS, Chen CY, Wang SC, Chang TY, Sun HJ, Chao JY, Cheng CC, Wu CW, Wang HW, Chung IF, Chang SJ, Chen CY, Liu SH, Li CY, Chan CH, Shih CC, Cheng WC",", , ",", , ",94.0,China +23203887,ZiFDB,0.996439636,ZiFDB,0.996439636,Zinc Finger Database,0.955517188,1,http://zifdb.msi.umn.edu,"HTTPConnectionPool(host='zifdb.msi.umn.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2012-11-29,"Department of Genetics, University of Minnesota, 6-160 Jackson Hall, 321 Church Street SE, Minneapolis, MN 55455, USA.","Fu F, Voytas DF",,,10.0,United States +"23203891, 26980515",2P2I,0.99723657,2P2I,0.99723657,,0,2,http://2p2idb.cnrs-mrs.fr,"HTTPConnectionPool(host='2p2idb.cnrs-mrs.fr', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to 2p2idb.cnrs-mrs.fr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200226035006/http://2p2idb.cnrs-mrs.fr:80/,2012-11-30,"Laboratory of Integrative Structural and Chemical Biology (iSCB), Centre de Recherche en Cancérologie de Marseille (CRCM), CNRS UMR 7258, INSERM U 1068, Institut Paoli-Calmettes, Marseille, France., Centre de Recherche en Cancérologie de Marseille (CRCM); CNRS, UMR 7258; INSERM U1068; Institut Paoli-Calmettes; Aix-Marseille Université; Marseille 13009, France.","Basse MJ, Betzi S, Bourgeas R, Bouzidi S, Chetrit B, Hamon V, Morelli X, Roche P, Basse MJ, Betzi S, Morelli X, Roche P",", ",", ",91.0,"France, France" +23209562,Wiki-Pi,0.983535866,Wiki-Pi,0.983535866,,0,1,http://severus.dbmi.pitt.edu/wiki-pi,301,,,no_wayback,2012-11-28,"Department of Biomedical Informatics, University of Pittsburgh, Pittsburgh, PA, USA.","Orii N, Ganapathiraju MK",,"NIMH NIH HHS, NIMH NIH HHS",30.0,United States +23219434,VirmugenDB,0.994162917,VirmugenDB,0.994162917,,0,1,http://www.violinet.org/virmugendb,404,,,http://web.archive.org/web/20221006215918/https://violinet.org/virmugendb/,2012-12-06,"College of Literature, Science, and the Arts, University of Michigan, Ann Arbor, MI 48109, USA.","Racz R, Chung M, Xiang Z, He Y",,"NIAID NIH HHS, NIH-NIAID, NIAID NIH HHS",13.0,United States +"23226127, 26311606",UCLA Multimodal Connectivity Database,0.900062852,UMCD,0.88225615,UCLA Multimodal Connectivity Database,0.900062852,2,http://umcd.humanconnectomeproject.org,"HTTPConnectionPool(host='umcd.humanconnectomeproject.org', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='umcd.humanconnectomeproject.org', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20211016143408/http://umcd.humanconnectomeproject.org/,2012-11-28,"nan, Memory and Aging Center, Department of Neurology, University of California, San Francisco, CA, USA. Electronic address: jesse.brown@ucsf.edu.","nan, Brown JA, Van Horn JD","nan, ","nan, NIA NIH HHS, NIMH NIH HHS, NINDS NIH HHS",7.0,United States +23314752,UniVIO,0.993773353,UniVIO,0.993773353,Uniformed Viewer for Integrated Omics,0.9183596,1,http://univio.psc.riken.jp,200,Japan,"(35.5047,139.6802)",http://web.archive.org/web/20220228165829/http://univio.psc.riken.jp/,2013-01-10,,,,,0.0, +23411719,YY1TargetDB,0.982896606,YY1TargetDB,0.982896606,Target loci Database,0.670763016,1,http://www.myogenesisdb.org/YY1TargetDB,405,,,no_wayback,2013-02-14,"Li Ka Shing Institute of Health Sciences, The Chinese University of Hong Kong, Shatin, New Territories, Hong Kong SAR, China.","Guo AM, Sun K, Su X, Wang H, Sun H",,,6.0,"China, Hong Kong, Hong Kong" +"23550210, 31308250",cBioPortal,0.992826402,cBioPortal,0.992826402,,0,2,http://cbioportal.org,301,,,http://web.archive.org/web/20221108131925/https://www.cbioportal.org/,2019-07-15,"nan, ‡Department of Biochemistry and Molecular Pharmacology, New York University School of Medicine, New York, NY 10016; §Institute for Systems Genetics, New York University School of Medicine, New York, NY 10016; ¶Sackler Institute of Graduate Biomedical Sciences, New York University School of Medicine, New York, NY 10016.","nan, Wu P, Heins ZJ, Muller JT, Katsnelson L, de Bruijn I, Abeshouse AA, Schultz N, Fenyö D, Gao J","nan, ","nan, Leidos, HHS | NIH | National Cancer Institute, HHS | NIH | National Cancer Institute, HHS | NIH | National Cancer Institute, NCI NIH HHS",35.0, +"23568467, 24259431",VIOLIN,0.977441937,VIOLIN,0.977441937,Vaccine Investigation and,0.620449245,2,http://www.violinet.org,301,,,http://web.archive.org/web/20221006142310/https://violinet.org/,2013-11-19,"Department of Microbiology and Immunology, Center for Computational Medicine and Bioinformatics, University of Michigan Medical School, Ann Arbor, MI, USA., nan","He Y, Xiang Z, nan",", nan","NIAID NIH HHS, NIAID NIH HHS, nan",18.0,United States +23584834,KDDB,0.99216032,KDDB,0.99216032,Allele Frequency Net Database,0.929808199,1,"http://www.allelefrequencies.net, http://www.allelefrequencies.net/diseases","200, 301",,", ","http://web.archive.org/web/20221002054844/http://www.allelefrequencies.net/, http://web.archive.org/web/20220621154602/http://www.allelefrequencies.net/diseases/",2013-04-12,"Institute of Integrative Biology, Functional and Comparative Genomics, University of Liverpool, Liverpool, L69 7ZB, UK. L.Takeshita@liverpool.ac.uk","Takeshita LY, Gonzalez-Galarza FF, dos Santos EJ, Maia MH, Rahman MM, Zain SM, Middleton D, Jones AR",,,18.0, +23594715,TUMIR,0.997981846,TUMIR,0.997981846,,0,1,http://www.ncrnalab.com/TUMIR,"HTTPConnectionPool(host='www.ncrnalab.com', port=80): Max retries exceeded with url: /TUMIR (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140722221315/http://www.ncrnalab.com/TUMIR/,2013-04-17,"Department of Biochemistry, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences (CAMS) & Peking Union Medical College (PUMC), National Laboratory of Medical Molecular Biology, Beijing, 100005, PR China. j-yu@ibms.pumc.edu.cn.","Dong L, Luo M, Wang F, Zhang J, Li T, Yu J",,,4.0,China +23734609,Virtually Aligned Matched Molecular Pairs Including Receptor Environment,0.959377536,VAMMPIRE,0.947511351,Virtually Aligned Matched Molecular Pairs Including Receptor Environment,0.959377536,1,http://vammpire.pharmchem.uni-frankfurt.de,"HTTPConnectionPool(host='vammpire.pharmchem.uni-frankfurt.de', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170121131141/http://vammpire.pharmchem.uni-frankfurt.de:80/,2013-06-18,"Institute of Pharmaceutical Chemistry, Goethe-University, Max-von-Laue Strasse 9, Frankfurt D-60438, Germany.","Weber J, Achenbach J, Moser D, Proschak E",,,10.0,Germany +23993619,aFiSh,0.991861537,aFiSh,0.991861537,NFOODS database on fish and shellfish,0.929391642,1,http://www.fao.org/infoods/biodiversity/index_en.stm,301,,,http://web.archive.org/web/20120719082918/http://www.fao.org:80/infoods/biodiversity/index_en.stm,2013-07-11,"FAO, Rome, Italy. doris.rittenschober@fao.org","Rittenschober D, Nowak V, Charrondiere UR",,,8.0,Italy +24013926,XDB,0.954009195,XDB,0.954009195,Xeno-glycomics database,0.933108858,1,http://bioinformatics.snu.ac.kr/xdb,"HTTPConnectionPool(host='bioinformatics.snu.ac.kr', port=80): Max retries exceeded with url: /xdb (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-09-06,"School of Chemical and Biological Engineering and Department of Computer Science and Engineering, Seoul National University, Seoul 151-742, Korea Department of Chemical Engineering, Soongsil University, Seoul 156-743, Korea Institute of Molecular Biology and Genetics and Institute of Bioengineering, Seoul National University, Seoul 151-742, Korea.","Park HM, Park JH, Kim YW, Kim KJ, Jeong HJ, Jang KS, Kim BG, Kim YG",,,1.0, +24082050,yApoptosis,0.986070871,yApoptosis,0.986070871,,0,1,http://www.ycelldeath.com/yapoptosis,301,,,http://web.archive.org/web/20220615180842/http://www.ycelldeath.com/yapoptosis/,2013-09-29,"Department of Chemical and Biological Engineering, Chalmers University of Technology, Kemivägen 10, 41296, Gothenburg, Sweden, Department of Mathematics, Chalmers University of Technology, Chalmers tvärgata 3, 41296, Gothenburg, Sweden, Department of Mathematics, University of Gothenburg, Chalmers tvärgata 3, 41296, Gothenburg, Sweden and Fine Chemicals and Biocatalysis Research, BASF SE, GVF/D - A030, 67056 Ludwigshafen, Germany.","Wanichthanarak K, Cvijovic M, Molt A, Petranovic D",,European Research Council,3.0,"Germany, Sweden, Sweden, Sweden" +24163100,uORFdb,0.997731447,uORFdb,0.997731447,,0,1,http://cbdm.mdc-berlin.de/tools/uorfdb,302,Germany,"(52.6277,13.507)",http://web.archive.org/web/20220808150749/https://cbdm.mdc-berlin.de/tools/uorfdb,2013-10-24,"Max Delbrück Center for Molecular Medicine (MDC), Cell Differentiation and Tumorigenesis, Robert-Rössle-Strasse 10, D-13092 Berlin, Germany, Hematology, Oncology and Tumor Immunology, Helios Klinikum Berlin-Buch, Schwanebecker Chaussee 50, D-13125 Berlin, Germany, Max Delbrück Center for Molecular Medicine (MDC), Computational Biology and Data Mining, Robert-Rössle-Strasse 10, D-13092 Berlin, Germany and Humoldt-University, Department of Biology, Invalidenstrasse 43, D-10115 Berlin, Germany.","Wethmar K, Barbosa-Silva A, Andrade-Navarro MA, Leutz A",,,46.0,"Germany, Germany, Germany, Germany" +24165882,YeastNet,0.946244299,YeastNet,0.946244299,,0,1,http://www.inetbio.org/yeastnet,301,,,http://web.archive.org/web/20220710070646/http://www.inetbio.org/yeastnet/,2013-10-27,"Department of Biotechnology, College of Life Science and Biotechnology, Yonsei University, Seoul, Korea.","Kim H, Shin J, Kim E, Kim H, Hwang S, Shim JE, Lee I",,,37.0, +24170807,YEASTRACT,0.997687936,YEASTRACT,0.997687936,,0,1,http://www.yeastract.com,200,,,http://web.archive.org/web/20221013144700/http://yeastract.com/,2013-10-28,"Instituto Superior Técnico, Universidade de Lisboa, Av. Rovisco Pais, 1049-001 Lisbon, Portugal; IBB-Institute for Biotechnology and BioEngineering, Centre for Biological and Chemical Engineering, Biological Sciences Research Group, Av. Rovisco Pais, 1049-001 Lisbon, Portugal and INESC-ID, Knowledge Discovery and Bioinformatics Group, R. Alves Redol, 9, 1000-029 Lisbon, Portugal.","Teixeira MC, Monteiro PT, Guerreiro JF, Gonçalves JP, Mira NP, dos Santos SC, Cabrito TR, Palma M, Costa C, Francisco AP, Madeira SC, Oliveira AL, Freitas AT, Sá-Correia I",,,134.0,"Portugal, Portugal, Portugal" +24194600,IMPC,0.863769869,IMPC,0.863769869,,0,1,http://www.mousephenotype.org,301,,,http://web.archive.org/web/20221102163444/https://www.mousephenotype.org/,2013-11-04,"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK, Medical Research Council Harwell (Mammalian Genetics Unit and Mary Lyon Centre), Harwell, Oxfordshire OX11 0RD, UK and Mouse Informatics Group, Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK.","Koscielny G, Yaikhom G, Iyer V, Meehan TF, Morgan H, Atienza-Herrero J, Blake A, Chen CK, Easty R, Di Fenza A, Fiegel T, Grifiths M, Horne A, Karp NA, Kurbatova N, Mason JC, Mason JC, Matthews P, Oakley DJ, Qazi A, Regnart J, Retha A, Santos LA, Sneddon DJ, Warren J, Westerberg H, Wilson RJ, Melvin DG, Smedley D, Brown SD, Flicek P, Skarnes WC, Mallon AM, Parkinson H",,"Medical Research Council, NHGRI NIH HHS, Wellcome Trust, NHGRI NIH HHS, Medical Research Council",154.0, +24198712,VASCAN,0.996923745,VASCAN,0.996923745,Database of Vascular Plants of Canada,0.964604948,1,http://data.canadensys.net/vascan,405,,,no_wayback,2013-07-24,"Université de Montréal Biodiversity Centre, 4101 rue Sherbrooke est, H1X2B2, Montreal, Canada.","Desmet P, Brouillet L",,,5.0,Canada +"24243849, 28150236",P(3)DB,0.997485409,P(3)DB,0.997485409,Plant Protein Phosphorylation Database,0.983375771,2,http://p3db.org,301,,,http://web.archive.org/web/20220815011551/https://p3db.org/,2013-11-15,"Department of Computer Science, University of Missouri, Columbia, MO 65211, USA, Bond Life Sciences Center, University of Missouri, Columbia, MO 65211, USA, School of Communication and Information Engineering, Shanghai University, Shanghai 200444, People's Republic of China, Department of Biology, Brandeis University, MA 02453, USA, Computational Biology Center, Memorial Sloan-Kettering Cancer Center, New York, NY 10065, USA and Department of Biochemistry, University of Missouri, Columbia, MO 65211, USA., Department of Computer Science and Christopher S. Bond Life Sciences Center, University of Missouri, 1201 Rollins St., Columbia, MO, 65211, USA. qywt5@mail.mizzou.edu.","Yao Q, Ge H, Wu S, Zhang N, Zhang N, Chen W, Xu C, Gao J, Thelen JJ, Xu D, Yao Q, Xu D",", ","NIGMS NIH HHS, NIGMS NIH HHS, ",34.0,"China, United States, United States, United States, United States, United States, United States" +24267744,VTO,0.888742566,VTO,0.888742566,Vertebrate Taxonomy Ontology,0.715373244,1,http://phenoscape.org,301,,,http://web.archive.org/web/20221006155602/https://phenoscape.org/,2013-11-22,"Department of Vertebrate Zoology and Anthropology, California Academy of Sciences, San Francisco, California, USA. dblackburn@calacademy.org.","Midford PE, Dececchi TA, Balhoff JP, Dahdul WM, Ibrahim N, Lapp H, Lundberg JG, Mabee PM, Sereno PC, Westerfield M, Vision TJ, Blackburn DC",,"NHGRI NIH HHS, NHGRI NIH HHS",15.0,United States +24285306,PortEco,0.997819364,PortEco,0.997819364,,0,1,http://porteco.org,200,,,http://web.archive.org/web/20221105022238/http://porteco.org/,2013-11-26,"Department of Biochemistry and Biophysics, Texas A&M University, College Station, TX 77843, USA, Department of Genetics, Stanford University, Stanford, CA 94305, USA, Department of Biology, Texas A&M University, College Station, TX, 77843, USA, Artificial Intelligence Center, SRI International, Menlo Park, CA 94025, USA and Deptartment of Preventive Medicine, University of Southern California, Los Angeles, CA 90089, USA.","Hu JC, Sherlock G, Siegele DA, Aleksander SA, Ball CA, Demeter J, Gouni S, Holland TA, Karp PD, Lewis JE, Liles NM, McIntosh BK, Mi H, Muruganujan A, Wymore F, Thomas PD, Altman T",,"NIGMS NIH HHS, NIGMS NIH HHS",14.0,"United States, United States, United States, United States, United States" +"24304889, 26481351, 29165655, 22064864",Expression Atlas,0.897050291,Expression Atlas,0.897050291,,0,4,http://www.ebi.ac.uk/gxa,301,,,http://web.archive.org/web/20131121201608/http://www.ebi.ac.uk:80/gxa/,2018-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute, EMBL-EBI, Hinxton, CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, EMBL-EBI, Hinxton, UK rpetry@ebi.ac.uk., European Molecular Biology Laboratory, European Bioinformatics Institute, EMBL-EBI, Hinxton, UK., European Bioinformatics Institute, EMBL, Hinxton, UK and Dana-Farber Cancer Institute, Boston, MA, USA.","Petryszak R, Burdett T, Fiorelli B, Fonseca NA, Gonzalez-Porta M, Hastings E, Huber W, Jupp S, Keays M, Kryvych N, McMurry J, Marioni JC, Malone J, Megy K, Rustici G, Tang AY, Taubert J, Williams E, Mannion O, Parkinson HE, Brazma A, Petryszak R, Keays M, Tang YA, Fonseca NA, Barrera E, Burdett T, Füllgrabe A, Fuentes AM, Jupp S, Koskinen S, Mannion O, Huerta L, Megy K, Snow C, Williams E, Barzine M, Hastings E, Weisser H, Wright J, Jaiswal P, Huber W, Choudhary J, Parkinson HE, Brazma A, Papatheodorou I, Fonseca NA, Keays M, Tang YA, Barrera E, Bazant W, Burke M, Füllgrabe A, Fuentes AM, George N, Huerta L, Koskinen S, Mohammed S, Geniza M, Preece J, Jaiswal P, Jarnuczak AF, Huber W, Stegle O, Vizcaino JA, Brazma A, Petryszak R, Kapushesky M, Adamusiak T, Burdett T, Culhane A, Farne A, Filippov A, Holloway E, Klebanov A, Kryvych N, Kurbatova N, Kurnosov P, Malone J, Melnichuk O, Petryszak R, Pultsin N, Rustici G, Tikhonov A, Travillian RS, Williams E, Zorin A, Parkinson H, Brazma A",", , , ","Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",747.0,United States +24304891,Virus Variation,0.927988728,Virus Variation,0.927988728,Variation Resource,0.64340649,1,http://www.ncbi.nlm.nih.gov/genomes/VirusVariation,301,United States,"(38.8341,-76.7974)",http://web.archive.org/web/20220404123752/https://www.ncbi.nlm.nih.gov/genomes/VirusVariation/,2013-12-04,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA.","Brister JR, Bao Y, Zhdanov SA, Ostapchuck Y, Chetvernin V, Kiryutin B, Zaslavsky L, Kimelman M, Tatusova TA",,Intramural NIH HHS,31.0,United States +"24318814, 24265223",Electronic Mouse Atlas of Gene Expression,0.988375112,EMAGE,0.985727191,Electronic Mouse Atlas of Gene Expression,0.988375112,2,http://www.emouseatlas.org/emage,302,,,no_wayback,2014-01-01,"MRC Human Genetics Unit, Institute of Genetics and Molecular Medicine, University of Edinburgh, Edinburgh, UK., MRC Human Genetics Unit, Institute of Genetics and Molecular Medicine, University of Edinburgh, Western General Hospital EH4 2XU, UK.","Richardson L, Stevenson P, Venkataraman S, Yang Y, Burton N, Rao J, Christiansen JH, Baldock RA, Davidson DR, Richardson L, Venkataraman S, Stevenson P, Yang Y, Moss J, Graham L, Burton N, Hill B, Rao J, Baldock RA, Armit C",", ","Medical Research Council, Wellcome Trust, Medical Research Council",77.0, +24341535,VTCdb,0.994380951,VTCdb,0.994380951,,0,1,http://vtcdb.adelaide.edu.au/Home.aspx,"HTTPConnectionPool(host='vtcdb.adelaide.edu.au', port=80): Max retries exceeded with url: /Home.aspx (Caused by ConnectTimeoutError(, 'Connection to vtcdb.adelaide.edu.au timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220303200649/http://vtcdb.adelaide.edu.au/Home.aspx,2013-12-16,None,"Wong DC, Sweetman C, Drew DP, Ford CM",,,33.0, +24498619,CHBMP,0.899794623,CHBMP,0.899794623,Hemophilia B mutation project,0.751574079,1,http://www.cdc.gov/hemophiliamutations,301,,,no_wayback,2013-08-19,"Division of Blood Disorders, National Center on Birth Defects and Developmental Disabilities, Centers for Disease Control and Prevention Atlanta, Georgia.","Li T, Miller CH, Payne AB, Craig Hooper W",,"Pfizer Inc, CDC Foundation",24.0,Georgia +24578356,Zebrafish GenomeWiki,0.791018561,Zebrafish GenomeWiki,0.791018561,,0,1,http://genome.igib.res.in/twiki,404,,,http://web.archive.org/web/20190310133545/http://genome.igib.res.in:80/twiki/,2014-02-26,"CSIR Institute of Genomics and Integrative Biology (CSIR-IGIB), Mall Road, Delhi 110007, India, Academy of Scientific and Innovative Research (AcSIR), Anusandhan Bhawan, Delhi 110001, India, Acharya Narendra Dev College, Delhi University, Govindpuri, Kalkaji, New Delhi 110019, India, Dr. B. R. Ambedkar Center for Biomedical Research, University of Delhi, Delhi 110007, India, Department of Genetics, University of Delhi South Campus, Benito Juarez Road, Dhaula Kuan, New Delhi 110021, India and Mayo Clinic, Rochester, MN, USA.","Singh M, Bhartiya D, Maini J, Sharma M, Singh AR, Kadarkaraisamy S, Rana R, Sabharwal A, Nanda S, Ramachandran A, Mittal A, Kapoor S, Sehgal P, Asad Z, Kaushik K, Vellarikkal SK, Jagga D, Muthuswami M, Chauhan RK, Leonard E, Priyadarshini R, Halimani M, Malhotra S, Patowary A, Vishwakarma H, Joshi P, Bhardwaj V, Bhaumik A, Bhatt B, Jha A, Kumar A, Budakoti P, Lalwani MK, Meli R, Jalali S, Joshi K, Pal K, Dhiman H, Laddha SV, Jadhav V, Singh N, Pandey V, Sachidanandan C, Ekker SC, Klee EW, Scaria V, Sivasubbu S",,,2.0,"India, India, India, India, India, United States" +24599579,UMD-APC,0.840977097,UMD-APC,0.840977097,,0,1,http://www.umd.be/APC,301,France,"(43.2951,5.3861)",http://web.archive.org/web/20220126140513/http://www.umd.be/APC/,2014-04-07,"UMR_S910, INSERM, Marseille, France; AP-HM La Timone, Gastroenterology Department, Marseille, France.","Grandval P, Blayau M, Buisine MP, Coulet F, Maugard C, Pinson S, Remenieras A, Tinat J, Uhrhammer N, Béroud C, Olschwang S",,,11.0,"France, France" +24608172,YTRP,0.992604494,YTRP,0.992604494,Yeast Transcriptional Regulatory Pathway,0.94507375,1,http://cosbi3.ee.ncku.edu.tw/YTRP,404,,,http://web.archive.org/web/20220331152004/http://cosbi3.ee.ncku.edu.tw/YTRP/,2014-03-07,"Department of Electrical Engineering, National Cheng Kung University, Tainan, Taiwan and Institute of Biomedical Informatics, National Yang-Ming University, Taipei, Taiwan.","Yang TH, Wang CC, Wang YC, Wu WS",,,11.0, +24816183,TuberQ,0.99642241,TuberQ,0.99642241,,0,1,http://tuberq.proteinq.com.ar,"HTTPConnectionPool(host='tuberq.proteinq.com.ar', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20190519163022/http://tuberq.proteinq.com.ar:80/,2014-05-08,"Departamento de Química Biológica, Facultad de Ciencias Exactas y Naturales, Universidad de Buenos Aires, Pabellón II, Buenos Aires C1428EHA, Argentina, INQUIMAE/UBA-CONICET, Facultad de Ciencias Exactas y Naturales, Universidad de Buenos Aires, Pabellón II, Buenos Aires C1428EHA, Argentina, Department of Physical Chemistry, Faculty of Pharmacy and Institute of Biomedicine (IBUB), University of Barcelona, Campus de l'Alimentació Torribera, Avgda. Prat de la Riba 171, Santa Coloma de Gramenet 08921, Spain, Department of Physical Chemistry, Faculty of Pharmacy and Institute of Biomedicine (IBUB), University of Barcelona, Avgda. Diagonal 643, Barcelona 08028, Spain and Catalan Institution for Research and Advanced Studies (ICREA), Passeig Lluís Companys 23, Barcelona 08010, Spain.","Radusky L, Defelipe LA, Lanzarotti E, Luque J, Barril X, Marti MA, Turjanski AG",,,15.0,"Argentina, Argentina, Spain, Spain, Spain" +24843289,URJC GB,0.922845423,URJC GB,0.922845423,,0,1,http://www.gbif.es:8080/ipt/resource.do?r=germoplasma-urjc,"HTTPConnectionPool(host='www.gbif.es', port=8080): Max retries exceeded with url: /ipt/resource.do?r=germoplasma-urjc (Caused by ConnectTimeoutError(, 'Connection to www.gbif.es timed out. (connect timeout=5)'))",,,no_wayback,2014-03-25,"Área de Biodiversidad y Conservación, Departamento de Biología y Geología, Escuela Superior de Ciencias Experimentales y Tecnología, Universidad Rey Juan Carlos, Tulipán s/n, 28933, Móstoles, Spain.","Alonso P, Iriondo JM",,,0.0,Spain +24889152,TSmiR,0.990324497,TSmiR,0.990324497,,0,1,http://bioeng.swjtu.edu.cn/TSmiR,301,China,"(30.5728,104.067)",http://web.archive.org/web/20150507163800/http://bioeng.swjtu.edu.cn:80/TSmiR/,2014-06-03,"School of Life Sciences and Bioengineering, Southwest Jiaotong University, Chengdu, 610031, P.R. China.","Guo Z, Maki M, Ding R, Yang Y, Zhang B, Xiong L",,,96.0,China +25024351,yStreX,0.943920732,yStreX,0.943920732,yeast stress expression database,0.765314773,1,http://www.ystrexdb.com,"HTTPConnectionPool(host='www.ystrexdb.com', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.ystrexdb.com timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220521065301/http://ystrexdb.com/,2014-07-14,"Department of Chemical and Biological Engineering, Chalmers University of Technology, Gothenburg, Sweden and Comparative Genomics Group, Biosciences Division, Oak Ridge National Laboratory, Oak Ridge, TN 37831, USA.","Wanichthanarak K, Nookaew I, Petranovic D",,,8.0,"Sweden, United States" +25025376,tropiTree,0.987223446,tropiTree,0.987223446,,0,1,http://bioinf.hutton.ac.uk/tropiTree,301,United Kingdom,"(56.4707,-3.0312)",no_wayback,2014-07-15,"Cell and Molecular Sciences, James Hutton Institute, Invergowrie, Scotland, United Kingdom.","Russell JR, Hedley PE, Cardle L, Dancey S, Morris J, Booth A, Odee D, Mwaura L, Omondi W, Angaine P, Machua J, Muchugi A, Milne I, Milne I, Kindt R, Jamnadass R, Dawson IK",,Natural Environment Research Council,3.0,United Kingdom +25217587,VirusMentha,0.996016741,VirusMentha,0.996016741,,0,1,http://virusmentha.uniroma2.it,301,,,http://web.archive.org/web/20220319132337/https://virusmentha.uniroma2.it/,2014-09-12,"Department of Biology, University of Rome Tor Vergata, Rome, Italy sinnefa@gmail.com.","Calderone A, Licata L, Cesareni G",,,59.0,Italy +25274736,ViRBase,0.998200536,ViRBase,0.998200536,,0,1,http://www.rna-society.org/virbase,301,,,http://web.archive.org/web/20221019025154/http://www.rna-society.org/virbase/,2014-10-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China Institute of Cardiovascular Sciences and Key Laboratory of Molecular Cardiovascular Sciences, Peking University Health Science Center, Beijing, China.","Li Y, Wang C, Miao Z, Bi X, Wu D, Jin N, Wang L, Wu H, Qian K, Li C, Zhang T, Zhang C, Yi Y, Lai H, Hu Y, Cheng L, Leung KS, Li X, Zhang F, Li K, Li X, Wang D",,,45.0,"China, China" +25300491,TrypanoCyc,0.996331513,TrypanoCyc,0.996331513,,0,1,http://www.metexplore.fr/trypanocyc,301,France,"(43.5585,1.6501)",http://web.archive.org/web/20220123082925/http://www.metexplore.fr/trypanocyc/,2014-10-09,"Institut National de la Recherche Agronomique (INRA), UMR1331, TOXALIM (Research Centre in Food Toxicology), Université de Toulouse, Toulouse, France.","Shameer S, Logan-Klumpler FJ, Vinson F, Cottret L, Merlet B, Achcar F, Boshart M, Berriman M, Breitling R, Bringaud F, Bütikofer P, Cattanach AM, Bannerman-Chukualim B, Creek DJ, Crouch K, de Koning HP, Denise H, Ebikeme C, Fairlamb AH, Ferguson MA, Ginger ML, Hertz-Fowler C, Kerkhoven EJ, Mäser P, Michels PA, Nayak A, Nes DW, Nolan DP, Olsen C, Silva-Franco F, Smith TK, Taylor MC, Tielens AG, Urbaniak MD, van Hellemond JJ, Vincent IM, Wilkinson SR, Wyllie S, Opperdoes FR, Barrett MP, Jourdan F",,"Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust",17.0,France +25324314,TTSMI,0.997173548,TTSMI,0.997173548,TTS Mapping and,0.7016765,1,http://ttsmi.bii.a-star.edu.sg,200,,,http://web.archive.org/web/20221110015803/http://ttsmi.bii.a-star.edu.sg/,2014-10-16,"Department of Genome and Gene Expression Data Analysis, Bioinformatics Institute, 138671, Singapore Interdisciplinary Graduate Program in Genetic Engineering, Graduate School, Kasetsart University, Bangkean, Bangkok 10900, Thailand.","Jenjaroenpun P, Chew CS, Yong TP, Choowongkomon K, Thammasorn W, Kuznetsov VA",,,8.0,"Singapore, Thailand" +25348409,Human Disease Ontology,0.669468194,,0,Human Disease Ontology,0.669468194,1,http://www.disease-ontology.org,302,,,http://web.archive.org/web/20221101070700/https://disease-ontology.org/,2014-10-27,"Center for Biomedical Informatics and Information Technology, National Cancer Institute, 9609 Medical Center Drive, Rockville, MD 20850, USA.","Kibbe WA, Arze C, Felix V, Mitraka E, Bolton E, Fu G, Mungall CJ, Binder JX, Malone J, Vasant D, Parkinson H, Schriml LM",,"NCRR NIH HHS, NIH HHS, NIH HHS, Wellcome Trust, NHGRI NIH HHS",256.0,United States +25361969,VaDE,0.989877224,VaDE,0.989877224,,0,1,http://bmi-tokai.jp/VaDE,403,,,http://web.archive.org/web/20220617152120/http://bmi-tokai.jp/VaDE/,2014-10-31,"Department of Molecular Life Science, Tokai University School of Medicine, Isehara, Kanagawa 259-1193, Japan.","Nagai Y, Takahashi Y, Imanishi T",,,2.0,Japan +25378322,UniPROBE,0.997834285,UniPROBE,0.997834285,Universal PBM Resource for Oligonucleotide Binding Evaluation,0.908042654,1,http://uniprobe.org,403,,,no_wayback,2014-11-05,"Division of Genetics, Department of Medicine, Brigham and Women's Hospital and Harvard Medical School, Boston, MA 02115, USA Bioinformatics Graduate Program, Northeastern University, Boston, MA 02115, USA.","Hume MA, Barrera LA, Gisselbrecht SS, Bulyk ML",,"NHGRI NIH HHS, NHGRI NIH HHS",131.0,"United States, United States" +"25392405, 23203872",non-human primate reference transcriptome resource,0.920451568,NHPRTR,0.710883155,non-human primate reference transcriptome resource,0.920451568,2,http://nhprtr.org,200,,,http://web.archive.org/web/20220707071003/http://nhprtr.org/,2014-11-11,"Department of Microbiology, University of Washington, Seattle, WA 98109, USA Washington National Primate Research Center, Seattle, WA 98109, USA., Department of Physiology and Biophysics, Weill Cornell Medical College, New York, NY 10065, USA.","Peng X, Thierry-Mieg J, Thierry-Mieg D, Nishida A, Pipes L, Bozinoski M, Thomas MJ, Kelly S, Weiss JM, Raveendran M, Muzny D, Gibbs RA, Rogers J, Schroth GP, Katze MG, Mason CE, Pipes L, Li S, Bozinoski M, Palermo R, Peng X, Blood P, Kelly S, Weiss JM, Thierry-Mieg J, Thierry-Mieg D, Zumbo P, Chen R, Schroth GP, Mason CE, Katze MG",", ","NIH HHS, NHGRI NIH HHS, NINDS NIH HHS, PHS HHS, NIH HHS, Intramural NIH HHS, NIH HHS, NIH HHS, NCRR NIH HHS, NIH HHS, NIGMS NIH HHS, NCRR NIH HHS, NINDS NIH HHS, Intramural NIH HHS, NCRR NIH HHS, NCRR NIH HHS, NINDS NIH HHS, NIH HHS",76.0,"United States, United States, United States" +25392406,VirHostNet,0.99125731,VirHostNet,0.99125731,,0,1,http://virhostnet.prabi.fr,301,,,http://web.archive.org/web/20221013125747/https://virhostnet.prabi.fr/,2014-11-11,"PRABI, Rhône Alpes Bioinformatics Center, UCBL, Lyon1, Université de Lyon, Lyon, France.","Guirimand T, Delmotte S, Navratil V",,,74.0,France +25392412,Addgene Repository,0.828912163,Addgene Repository,0.828912163,,0,1,http://www.addgene.org,301,,,http://web.archive.org/web/20221109214822/https://www.addgene.org/,2014-11-11,"Addgene, Cambridge, MA 02139, USA joanne.kamens@addgene.org.",Kamens J,,,27.0,United States +25392418,ValidatorDB,0.997314692,ValidatorDB,0.997314692,,0,1,http://ncbr.muni.cz/ValidatorDB,301,Czechia,"(49.3344,16.4522)",http://web.archive.org/web/20210918202138/http://ncbr.muni.cz/ValidatorDB/,2014-11-11,"CEITEC-Central European Institute of Technology, Masaryk University Brno, Kamenice 5, 625 00 Brno, Czech Republic National Centre for Biomolecular Research, Faculty of Science, Masaryk University, Kotlářská 2, 611 37 Brno, Czech Republic Faculty of Informatics, Masaryk University Brno, Botanická 68a, 602 00 Brno, Czech Republic.","Sehnal D, Svobodová Vařeková R, Pravda L, Ionescu CM, Geidl S, Horský V, Jaiswal D, Wimmerová M, Koča J",,,10.0, +"25414323, 29858801, 33755549",AFND,0.989334464,AFND,0.989334464,Allele Frequency Net Database,0.951628447,3,http://www.allelefrequencies.net,200,,,http://web.archive.org/web/20221002054844/http://www.allelefrequencies.net/,2020-10-21,"Institute of Integrative Biology, University of Liverpool, Liverpool, UK Center for Biomedical Research, Faculty of Medicine, Autonomous University of Coahuila, Torreon, Mexico., Department of Molecular Immunobiology, Faculty of Medicine, Centre for Biomedical Research, Autonomous University of Coahuila, Torreón, Coahuila, Mexico. faviel.gonzalez@uadec.edu.mx., Center for Biomedical Research, Faculty of Medicine, Autonomous University of Coahuila, Torreon, Mexico. Electronic address: faviel.gonzalez@uadec.edu.mx.","González-Galarza FF, Takeshita LY, Santos EJ, Kempson F, Maia MH, da Silva AL, Teles e Silva AL, Ghattaoraya GS, Alfirevic A, Jones AR, Middleton D, Gonzalez-Galarza FF, McCabe A, Melo Dos Santos EJ, Takeshita L, Ghattaoraya G, Jones AR, Middleton D, Gonzalez-Galarza FF, McCabe A, Melo Dos Santos EJ, Jones AR, Middleton D",", , ","Epilepsy Research UK, National Institute for Health Research (NIHR), Biotechnology and Biological Sciences Research Council, ",370.0,"Mexico, Mexico, Mexico" +25466819,WATRefNet,0.994038701,WATRefNet,0.994038701,White Adipose Tissue Health Reference Network,0.908846239,1,http://bioclaims.uib.es,301,,,http://web.archive.org/web/20140822181712/http://bioclaims.uib.es:80/,2014-12-03,"Microbiology & Systems Biology, TNO, Zeist, The Netherlands.","Kelder T, Summer G, Caspers M, van Schothorst EM, Keijer J, Duivenvoorde L, Klaus S, Voigt A, Bohnert L, Pico C, Palou A, Bonet ML, Dembinska-Kiec A, Malczewska-Malec M, Kieć-Wilk B, Del Bas JM, Caimari A, Arola L, van Erk M, van Ommen B, Radonjic M",,,8.0,Netherlands +25522035,YNA,0.981317759,YNA,0.981317759,Yeast Nucleosome Atlas,0.915188857,1,http://cosbi3.ee.ncku.edu.tw/yna,301,,,http://web.archive.org/web/20220808070159/http://cosbi3.ee.ncku.edu.tw/yna/,2014-12-08,None,"Hung PC, Yang TH, Liaw HJ, Wu WS",,,4.0, +25538713,VitisCyc,0.997620523,VitisCyc,0.997620523,,0,1,http://pathways.cgrb.oregonstate.edu,302,,,no_wayback,2014-12-09,"Department of Botany and Plant Pathology, Oregon State University Corvallis, OR, USA.","Naithani S, Raja R, Waddell EN, Elser J, Gouthu S, Deluc LG, Jaiswal P",,,12.0,United States +25591325,YersiniaBase,0.989195168,YersiniaBase,0.989195168,,0,1,http://yersinia.um.edu.my,"HTTPConnectionPool(host='yersinia.um.edu.my', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to yersinia.um.edu.my timed out. (connect timeout=5)'))",,,no_wayback,2015-01-16,"Genome Informatics Research Laboratory, High Impact Research Building (HIR) Building, University of Malaya, 50603, Kuala Lumpur, Malaysia. shiyangtan@gmail.com.","Tan SY, Dutta A, Jakubovics NS, Ang MY, Siow CC, Mutha NV, Heydari H, Wee WY, Wong GJ, Choo SW",,,4.0,Malaysia +25614757,WallProtDB,0.996273875,WallProtDB,0.996273875,,0,1,http://www.polebio.lrsv.ups-tlse.fr/WallProtDB,301,,,http://web.archive.org/web/20220616070311/https://www.polebio.lrsv.ups-tlse.fr/WallProtDB/,2015-01-16,"Université de Toulouse; UPS; UMR 5546, Laboratoire de Recherche en Sciences Végétales, BP 42617, F-31326 Castanet-Tolosan, France ; CNRS; UMR 5546, BP 42617, F-31326 Castanet-Tolosan, France.","San Clemente H, Jamet E",,,26.0,"France, France" +"25753703, 26286194, 24680503",CSDB,0.980909228,CSDB,0.980909228,Carbohydrate Structure Databases,0.910097814,3,http://csdb.glycoscience.ru,200,,,http://web.archive.org/web/20221005064907/http://csdb.glycoscience.ru/,2015-01-01,"N.D. Zelinsky Institute of Organic Chemistry, Russian Academy of Sciences, Leninsky Prospekt 47, Moscow, 119991, Russia, netbox@toukach.ru., N.D. Zelinsky Institute of Organic Chemistry, Russian Academy of Sciences, Moscow 119991, Russia netbox@toukach.ru., N.D. Zelinsky Institute of Organic Chemistry, Russian Academy of Sciences, Leninsky prospect 47, Moscow 119991, Russia. Electronic address: danamad@gmail.com.","Toukach PV, Egorova KS, Toukach PV, Egorova KS, Egorova KS, Toukach PV",", , ",", , ",58.0, +25935546,u-CARE,0.98301971,u-CARE,0.98301971,Comprehensive Antibiotic resistance Repository of Escherichia coli,0.747757421,1,http://www.e-bioinformatics.net/ucare,406,,,http://web.archive.org/web/20221006151648/https://e-bioinformatics.net/ucare/,2015-05-02,"Department of Computational Biology and Bioinformatics, JSBB, SHIATS Allahabad, Uttar Pradesh, India.","Saha SB, Uttam V, Verma V",,,5.0,India +26061870,YDHS,0.994329572,YDHS,0.994329572,,0,1,http://www.semanticgen.net/ydhs,301,,,http://web.archive.org/web/20160527105143/http://www.semanticgen.net/ydhs/,2015-06-10,"Department of Clinical Genetics, Oulu University Hospital, PEDEGO Research Unit, University of Oulu, and Medical Research Center Oulu, Oulu University Hospital and University of Oulu, PO Box 23, FI-90029, Oulu, Finland, timo.tiirikka@student.oulu.fi.","Tiirikka T, Moilanen JS",,Suomen Kulttuurirahasto,1.0,Finland +26065909,zflncRNApedia,0.602053881,zflncRNApedia,0.602053881,,0,1,http://genome.igib.res.in/zflncRNApedia,301,,,http://web.archive.org/web/20220318043839/http://genome.igib.res.in/zflncRNApedia/,2015-06-11,"GN Ramachandran Knowledge Center for Genome Informatics, CSIR-Institute of Genomics and Integrative Biology, Mathura Road, Delhi, 110020, India.","Dhiman H, Kapoor S, Sivadas A, Sivasubbu S, Scaria V",,,12.0,India +26066708,TRRUST,0.997621059,TRRUST,0.997621059,regulatory relationships,0.536692739,1,http://www.grnpedia.org/trrust,301,,"(37.5813,126.9377)",http://web.archive.org/web/20220513175244/https://www.grnpedia.org/trrust/,2015-06-12,"Department of Biotechnology, College of Life Science and Biotechnology, Yonsei University, Seoul, Korea.","Han H, Shim H, Shin D, Shim JE, Ko Y, Shin J, Kim H, Cho A, Kim E, Lee T, Kim H, Kim K, Yang S, Bae D, Yun A, Kim S, Kim CY, Cho HJ, Kang B, Shin S, Lee I",,,123.0, +26078228,WFRGdb,0.997528315,WFRGdb,0.997528315,Wood-Formation Related Genes database,0.967793643,1,http://me.lzu.edu.cn/woodformation,"HTTPConnectionPool(host='me.lzu.edu.cn', port=80): Max retries exceeded with url: /woodformation (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2015-06-16,"State Key Laboratory of Grassland and Agro-Ecosystems, School of Life Sciences, Lanzhou University, Lanzhou 730000, Gansu, China.","Xu T, Ma T, Hu Q, Liu J",,,2.0,China +26199991,UCDB,0.994101167,UCDB,0.994101167,Ulcerative Colitis Database,0.981074442,1,http://seiwertlab.uchicago.edu/UCDB,"HTTPConnectionPool(host='seiwertlab.uchicago.edu', port=80): Max retries exceeded with url: /UCDB (Caused by ConnectTimeoutError(, 'Connection to seiwertlab.uchicago.edu timed out. (connect timeout=5)'))",,,no_wayback,2015-08-01,"*Department of Gastroenterology, Ren Ji Hospital, School of Medicine, Shanghai Jiao Tong University, Shanghai Institute of Digestive Disease, Shanghai IBD Research Center, Shanghai, China; †Committee on Immunology, Department of Pathology, The University of Chicago, Chicago, Illinois; ‡Center for Growth, Metabolism and Aging, School of Life Sciences, Sichuan University, Chengdu, China; §Department of Surgery, Sir Run Run Shaw Hospital, School of Medicine, Zhejiang University, Hangzhou, China; and ||Department of Medicine, The University of Chicago, Chicago, Illinois.","Shen J, Mao AP, Zhu MM, Zhao P, Xu JJ, Zuo Z",,,1.0,"China, China, China" +26209309,HMA,0.983201583,HMA,0.983201583,human metabolic atlas,0.734993324,1,http://www.metabolicatlas.org,301,,,http://web.archive.org/web/20221109231110/https://metabolicatlas.org/,2015-07-24,"Department of Biology and Biological Engineering, Chalmers University of Technology, Göteborg 41269, Sweden nielsenj@chalmers.se.","Pornputtapong N, Nookaew I, Nielsen J",,,36.0,Sweden +"26227548, 32621232, 28891124",DOCKGROUND,0.997901142,DOCKGROUND,0.997901142,,0,3,http://dockground.compbio.ku.edu,200,,,http://web.archive.org/web/20220908233313/http://dockground.compbio.ku.edu/,2020-01-01,"Center for Computational Biology, The University of Kansas, Lawrence, KS, 66047, USA. tatsiana.bylund@gmail.com., Computational Biology Program and Department of Molecular Biosciences, The University of Kansas, Lawrence, KS, USA. pkundro@ku.edu., Center for Computational Biology, The University of Kansas, Lawrence, Kansas, 66045.","Kirys T, Ruvinsky AM, Singla D, Tuzikov AV, Kundrotas PJ, Vakser IA, Kundrotas PJ, Kotthoff I, Choi SW, Copeland MM, Vakser IA, Kundrotas PJ, Anishchenko I, Dauzhenka T, Kotthoff I, Mnevets D, Copeland MM, Vakser IA",", , ","National Institutes of Health, National Science Foundation, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, Directorate for Biological Sciences, NSF, NIH, NIGMS NIH HHS, NSF",24.0,"United States, United States" +26340938,IKMC,0.861618261,IKMC,0.861618261,International Knockout Mouse Consortium,0.697445065,1,http://www.mousephenotype.org,301,,,http://web.archive.org/web/20221102163444/https://www.mousephenotype.org/,2015-09-04,"Stem Cell Engineering, Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SA, UK.","Rosen B, Schick J, Wurst W",,,18.0, +26452372,WaspAtlas,0.996073008,WaspAtlas,0.996073008,,0,1,http://waspatlas.com,302,,,wayback is down,2015-10-09,"Department of Genetics, University of Leicester, University Road, Leicester LE1 7RH, UK.","Davies NJ, Tauber E",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",7.0, +"26476456, 31696234",BiGG,0.950149775,BiGG,0.950149775,,0,2,http://bigg.ucsd.edu,405,,,http://web.archive.org/web/20220918133300/http://bigg.ucsd.edu/,2015-10-17,"Department of Bioengineering, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA., Department of Bioengineering, University of California, San Diego, La Jolla, CA 92093, USA.","King ZA, Lu J, Dräger A, Miller P, Federowicz S, Lerman JA, Ebrahim A, Palsson BO, Lewis NE, Norsigian CJ, Pusarla N, McConn JL, Yurkovich JT, Dräger A, Palsson BO, King Z",", ","NNF Center for Biosustainability, Novo Nordisk Fonden, NNF Center for Biosustainability, NICHD NIH HHS, NNF Center for Biosustainability, NNF Center for Biosustainability, Novo Nordisk Fonden",302.0,"United States, United States" +26555441,PharmDB-K,0.986358921,PharmDB-K,0.986358921,,0,1,"http://pharmdb-k.org, http://biomart.i-pharm.org","200, 403",,", ","http://web.archive.org/web/20220405234807/http://pharmdb-k.org/, http://web.archive.org/web/20170620133912/http://biomart.i-pharm.org/",2015-11-10,"Medicinal Bioconvergence Research Center, Seoul National University, Seoul 152-742, Republic of Korea.","Lee JH, Park KM, Han DJ, Bang NY, Kim DH, Na H, Lim S, Kim TB, Kim DG, Kim HJ, Chung Y, Sung SH, Surh YJ, Kim S, Han BW",,,6.0, +26573482,WIDDE,0.997149587,WIDDE,0.997149587,,0,1,http://widde.toulouse.inra.fr,302,,,no_wayback,2015-11-14,"CIRAD, UMR INTERTRYP, F34398, Montpellier, France. guilhem.sempere@cirad.fr.","Sempéré G, Moazami-Goudarzi K, Eggen A, Laloë D, Gautier M, Flori L",,"INRA DGA, INRA DGA, INRA DGA, INRA Métaprogramme ACCAF, INRA AIP Bioressources, INRA DGA",25.0,France +26581408,VetBioBase,0.985843897,VetBioBase,0.985843897,,0,1,http://vetbiobase.igbb.msstate.edu,"HTTPConnectionPool(host='vetbiobase.igbb.msstate.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20151228210315/http://vetbiobase.igbb.msstate.edu:80/,2015-11-18,"Department of Basic Sciences, College of Veterinary Medicine, Mississippi State University, Mississippi State, MS 39762 USA, Institute for Genomics, Biocomputing & Biotechnology (IGBB), Mississippi State University, Mississippi State, MS 39762 USA, tbuza@igbb.msstate.edu.","Buza TM, Jack SW, Kirunda H, Khaitsa ML, Lawrence ML, Pruett S, Peterson DG",,,0.0,"United States, United States" +26590254,UET,0.958269477,UET,0.958269477,,0,1,http://mammoth.bcm.tmc.edu/uet,301,United States,"(29.7056,-95.402)",http://web.archive.org/web/20221005222751/http://mammoth.bcm.tmc.edu/uet/,2015-11-20,"Department of Molecular and Human Genetics, Baylor College of Medicine, Houston, TX 77030, USA.","Lua RC, Wilson SJ, Konecki DM, Wilkins AD, Venner E, Morgan DH, Lichtarge O",,"NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NLM NIH HHS, NIGMS NIH HHS",9.0,United States +"26590259, 33221922, 25428374",UCSC Genome Browser,0.851264405,UCSC Genome Browser,0.851264405,UCSC Genome Browser,0.851264405,3,http://genome.ucsc.edu,200,,,no_wayback,2021-01-01,"Genomics Institute, University of California Santa Cruz, Santa Cruz, CA 95064, USA mspeir@soe.ucsc.edu., Genomics Institute, University of California Santa Cruz, Santa Cruz, CA 95064, USA., Center for Biomolecular Science and Engineering, CBSE, UC Santa Cruz, 1156 High Street, Santa Cruz, CA 95064, USA kate@soe.ucsc.edu.","Speir ML, Zweig AS, Rosenbloom KR, Raney BJ, Paten B, Nejad P, Lee BT, Learned K, Karolchik D, Hinrichs AS, Heitner S, Harte RA, Haeussler M, Guruvadoo L, Fujita PA, Eisenhart C, Diekhans M, Clawson H, Casper J, Barber GP, Haussler D, Kuhn RM, Kent WJ, Navarro Gonzalez J, Zweig AS, Speir ML, Schmelter D, Rosenbloom KR, Raney BJ, Powell CC, Nassar LR, Maulding ND, Lee CM, Lee BT, Hinrichs AS, Fyfe AC, Fernandes JD, Diekhans M, Clawson H, Casper J, Benet-Pagès A, Barber GP, Haussler D, Kuhn RM, Haeussler M, Kent WJ, Rosenbloom KR, Armstrong J, Barber GP, Casper J, Clawson H, Diekhans M, Dreszer TR, Fujita PA, Guruvadoo L, Haeussler M, Harte RA, Heitner S, Hickey G, Hinrichs AS, Hubley R, Karolchik D, Learned K, Lee BT, Li CH, Miga KH, Nguyen N, Paten B, Raney BJ, Smit AF, Speir ML, Zweig AS, Haussler D, Kuhn RM, Kent WJ",", , ","Howard Hughes Medical Institute, NCI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, National Human Genome Research Institute, Silicon Valley Community Foundation, NHGRI NIH HHS, National Institutes of Health, NHGRI NIH HHS, UCSC Baskin Endowed Chair Funds, NHGRI NIH HHS, Howard Hughes Medical Institute, National Human Genome Research Institute, NIMH NIH HHS, NHGRI NIH HHS, Silicon Valley Community Foundation, National Human Genome Research Institute, California Institute for Regenerative Medicine, NHGRI NIH HHS, National Human Genome Research Institute, Center for Information Technology Research in the Interest of Society, National Human Genome Research Institute, University of California Office of the President Emergency, NHGRI NIH HHS, NCI NIH HHS, NIAID NIH HHS, NCI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Howard Hughes Medical Institute, NIDCR NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",932.0,"United States, United States, United States" +"26602694, 25555720",APD3,0.988607168,APD3,0.988607168,The antimicrobial peptide database,0.915424158,2,http://aps.unmc.edu/AP,"HTTPConnectionPool(host='aps.unmc.edu', port=80): Max retries exceeded with url: /AP (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20220327220828/https://aps.unmc.edu/AP/,2015-11-23,"Department of Pathology and Microbiology, University of Nebraska Medical Center, 986495 Nebraska Medical Center, Omaha, NE 68198-6495, USA gwang@unmc.edu., Department of Pathology and Microbiology, University of Nebraska Medical Center, 986495 Nebraska Medical Center, Omaha, NE, 68198-6495, USA, gwang@unmc.edu.","Wang G, Li X, Wang Z, Wang G",", ","NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS, NIAID NIH HHS",563.0,"United States, United States" +26644460,VigGS,0.995423436,VigGS,0.995423436,Vigna Genome Server,0.905400942,1,http://viggs.dna.affrc.go.jp,301,,,http://web.archive.org/web/20221017170250/https://viggs.dna.affrc.go.jp/,2015-12-07,"Agrogenomics Research Center, National Institute of Agrobiological Sciences, 2-1-2 Kannondai, Tsukuba, Ibaraki, 305-8602 Japan hirsakai@affrc.go.jp.","Sakai H, Naito K, Takahashi Y, Sato T, Yamamoto T, Muto I, Itoh T, Tomooka N",,,14.0,Japan +"26656949, 23193292",NPIDB,0.997779801,NPIDB,0.997779801,Nucleic acid-Protein Interaction DataBase,0.976899055,2,http://npidb.belozersky.msu.ru,301,,,no_wayback,2015-12-09,"Belozersky Institute of Physico-Chemical Biology, Lomonosov Moscow State University, Moscow 119992, Russia., Department of Mathematical Methods in Biology, Belozersky Institute of Physico-Chemical Biology, Lomonosov Moscow State University, Moscow, Russia.","Zanegina O, Kirsanov D, Baulin E, Karyagina A, Alexeevski A, Spirin S, Kirsanov DD, Zanegina ON, Aksianov EA, Spirin SA, Karyagina AS, Alexeevski AV",", ",", ",33.0, +26705106,WheatExp,0.978051126,WheatExp,0.978051126,,0,1,http://wheat.pw.usda.gov/WheatExp,"HTTPConnectionPool(host='wheat.pw.usda.gov', port=80): Max retries exceeded with url: /WheatExp (Caused by ConnectTimeoutError(, 'Connection to wheat.pw.usda.gov timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220805085840/https://wheat.pw.usda.gov/WheatExp/,2015-12-24,"Department of Plant Sciences, University of California, Davis, CA, 95616, USA. sppearce@ucdavis.edu.","Pearce S, Vazquez-Gross H, Herin SY, Hane D, Wang Y, Gu YQ, Dubcovsky J",,"Howard Hughes Medical Institute, National Institute of Food and Agriculture, National Institute of Food and Agriculture, Howard Hughes Medical Institute",42.0,United States +26826444,IPD-IMGT/HLA,0.737059861,IPD-IMGT/HLA,0.737059861,Polymorphism,0.601328015,1,http://www.ebi.ac.uk/ipd/imgt/hla,301,,,http://web.archive.org/web/20220802032045/https://www.ebi.ac.uk/ipd/imgt/hla/,2016-01-27,"Anthony Nolan Research Institute, Royal Free Hospital, Pond Street, Hampstead, London NW3 2QG, UK; UCL Cancer Institute, University College London, Royal Free Campus, Pond Street, Hampstead, London NW3 2QG, UK.","Robinson J, Soormally AR, Hayhurst JD, Marsh SGE",,Cancer Research UK,51.0, +26973684,FragariaCyc,0.99653697,FragariaCyc,0.99653697,,0,1,http://pathways.cgrb.oregonstate.edu,302,,,no_wayback,2016-03-04,"Department of Botany and Plant Pathology, Oregon State University Corvallis, OR, USA.","Naithani S, Partipilo CM, Raja R, Elser JL, Jaiswal P",,Oregon State University,4.0,United States +26989148,Wikidata,0.992722154,Wikidata,0.992722154,,0,1,http://www.wikidata.org,301,,,http://web.archive.org/web/20120918024058/http://www.wikidata.org:80/,2016-03-17,"The Scripps Research Institute, La Jolla, CA, USA bgood@scripps.edu asu@scripps.edu.","Burgstaller-Muehlbacher S, Waagmeester A, Mitraka E, Turner J, Putman T, Leong J, Naik C, Pavlidis P, Schriml L, Good BM, Su AI",,,19.0,United States +27053566,Vaxar,0.983407974,Vaxar,0.983407974,,0,1,http://www.violinet.org/vaxar,404,,,http://web.archive.org/web/20220615200451/https://www.violinet.org/vaxar/,2016-04-01,"Division of Comparative Medicine, University of South Florida, Tampa, Florida, USA.","Todd T, Dunn N, Xiang Z, He Y",,"NIAID NIH HHS, NCRR NIH HHS, NIAID NIH HHS, NCRR NIH HHS",1.0,United States +27242032,URSDB,0.997525334,URSDB,0.997525334,Universe of RNA Structures DataBase,0.987289786,1,http://server3.lpm.org.ru/urs,"HTTPConnectionPool(host='server3.lpm.org.ru', port=80): Max retries exceeded with url: /urs (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,http://web.archive.org/web/20220319220031/http://server3.lpm.org.ru/urs/,2016-05-30,"Laboratory of Applied Mathematics, Institute of Mathematical Problems of Biology, Russian Academy of Sciences, Pushchino, Moscow Region 142290, Russia Department of Algorithms and Technology of Programming, Faculty of Innovations and High Technology, Moscow Institute of Physics and Technology (State University), Dolgoprudny, Moscow Region 141700, Russia mroytberg@lpm.org.ru.","Baulin E, Yacovlev V, Khachko D, Spirin S, Roytberg M",,,3.0, +"27242034, 32542109",Abasy,0.987492204,Abasy,0.987492204,acteria systems,0.613150299,2,http://abasy.ccg.unam.mx,301,,,http://web.archive.org/web/20221017050733/https://abasy.ccg.unam.mx/,2016-05-30,"Group of Regulatory Systems Biology, Evolutionary Genomics Program, Universidad Nacional Autónoma De México, Av. Universidad S/N, Col. Chamilpa, Cuernavaca, Morelos 62210, México Undergraduate Program in Genomic Sciences, Center for Genomics Sciences, Universidad Nacional Autónoma De México, Av. Universidad S/N, Col. Chamilpa, Cuernavaca, Morelos 62210, México., Regulatory Systems Biology Research Group, Laboratory of Systems and Synthetic Biology, Center for Genomic Sciences, Universidad Nacional Autónoma de México, Av. Universidad s/n, Col. Chamilpa, 62210 Cuernavaca, Morelos, Mexico.","Ibarra-Arellano MA, Campos-González AI, Treviño-Quintanilla LG, Tauch A, Freyre-González JA, Escorcia-Rodríguez JM, Tauch A, Freyre-González JA",", ",", Dirección General de Asuntos del Personal Académico, Universidad Nacional Autónoma de México",12.0,Mexico +27242836,VESPUCCI,0.992614388,VESPUCCI,0.992614388,Vitis Expression Studies Platform Using COLOMBOS Compendia Instances,0.96687065,1,http://vespucci.colombos.fmach.it,"HTTPConnectionPool(host='vespucci.colombos.fmach.it', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to vespucci.colombos.fmach.it timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220617203253/http://vespucci.colombos.fmach.it/,2016-05-10,"Department of Computational Biology, Research and Innovation Center, Fondazione Edmund MachTrento, Italy; Department of Biology, University of PadovaPadova, Italy.","Moretto M, Sonego P, Pilati S, Malacarne G, Costantini L, Grzeskowiak L, Bagagli G, Grando MS, Moser C, Engelen K",,,21.0,"Italy, Italy" +27392072,YCRD,0.992400229,YCRD,0.992400229,Yeast Combinatorial Regulation Database,0.966890701,1,"http://cosbi.ee.ncku.edu.tw/YCRD/, http://cosbi2.ee.ncku.edu.tw/YCRD","301, 404",,", ","http://web.archive.org/web/20180123092114/http://cosbi.ee.ncku.edu.tw:80/YCRD/, http://web.archive.org/web/20200210210329/http://cosbi2.ee.ncku.edu.tw:80/YCRD/",2016-07-08,"Department of Electrical Engineering, National Cheng Kung University, Tainan, Taiwan.","Wu WS, Hsieh YC, Lai FJ",,"Ministry of Science and Technology, Taiwan, National Cheng Kung University",2.0, +27511743,VHLdb,0.995007813,VHLdb,0.995007813,,0,1,http://vhldb.bio.unipd.it,200,,,http://web.archive.org/web/20220620175816/http://vhldb.bio.unipd.it/,2016-08-11,"Department of Biomedical Sciences and CRIBI Biotechnology Center, University of Padova, Viale G. Colombo 3, 35121, Padova, Italy.","Tabaro F, Minervini G, Sundus F, Quaglia F, Leonardi E, Piovesan D, Tosatto SC",,,16.0,Italy +27543790,TSCD,0.990743339,TSCD,0.990743339,Tissue-Specific CircRNA Database,0.976619937,1,http://gb.whu.edu.cn/TSCD,403,,,http://web.archive.org/web/20220407075623/http://gb.whu.edu.cn/TSCD/,2017-11-01,None,"Xia S, Feng J, Lei L, Hu J, Xia L, Wang J, Xiang Y, Liu L, Zhong S, Han L, He C",,,132.0, +27634949,Verdant,0.98544848,Verdant,0.98544848,,0,1,http://verdant.iplantcollaborative.org/plastidDB,"HTTPConnectionPool(host='verdant.iplantcollaborative.org', port=80): Max retries exceeded with url: /plastidDB (Caused by ConnectTimeoutError(, 'Connection to verdant.iplantcollaborative.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20200421205055/http://verdant.iplantcollaborative.org/plastidDB/,2016-09-14,"Donald Danforth Plant Science Center, St. Louis, MO 63132, USA.","McKain MR, Hartsock RH, Wohl MM, Kellogg EA",,,22.0,United States +27789692,WERAM,0.997139871,WERAM,0.997139871,,0,1,http://weram.biocuckoo.org,200,,,http://web.archive.org/web/20221019025108/http://weram.biocuckoo.org/,2016-10-26,"Key Laboratory of Molecular Biophysics of Ministry of Education, College of Life Science and Technology and the Collaborative Innovation Center for Brain Science, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China.","Xu Y, Zhang S, Lin S, Guo Y, Deng W, Zhang Y, Xue Y",,,29.0,China +27789702,Cistrome,0.987228572,Cistrome,0.987228572,Browser,0.58698076,1,http://cistrome.org/db,301,,,http://web.archive.org/web/20221029163924/http://cistrome.org/db/,2016-10-26,"Clinical Translational Research Center, Shanghai Pulmonary Hospital, Tongji University, Shanghai 200433, China.","Mei S, Qin Q, Wu Q, Sun H, Zheng R, Zang C, Zhu M, Wu J, Shi X, Taing L, Liu T, Brown M, Meyer CA, Liu XS",,NCI NIH HHS,193.0,China +27813701,UNcleProt,0.993122756,UNcleProt,0.993122756,Universal Nuclear Protein database of barley,0.863325749,1,http://barley.gambrinus.ueb.cas.cz,200,Czechia,"(50.2323,15.7874)",no_wayback,2016-11-04,"a Institute of Experimental Botany , Centre of the Region Haná for Biotechnological and Agricultural Research , Olomouc , Czech Republic.","Blavet N, Uřinovská J, Jeřábková H, Chamrád I, Vrána J, Lenobel R, Beinhauer J, Šebela M, Doležel J, Petrovská B",,,5.0, +27899279,WormBase ParaSite,0.946643819,WormBase ParaSite,0.946643819,,0,1,http://parasite.wormbase.org,301,,,http://web.archive.org/web/20161030123945/http://parasite.wormbase.org:80/,2016-11-27,"European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. Electronic address: kevin.howe@wormbase.org.","Howe KL, Bolt BJ, Shafie M, Kersey P, Berriman M",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Medical Research Council, Michael Paulini, Biotechnology and Biological Sciences Research Council",177.0, +27899583,XTalkDB,0.99725914,XTalkDB,0.99725914,,0,1,http://www.xtalkdb.org,302,,,no_wayback,2016-11-28,"Department of Biological Sciences, Virginia Tech, Blacksburg, VA 24061, USA.","Sam SA, Teel J, Tegge AN, Bharadwaj A, Murali TM",,NIGMS NIH HHS,8.0,United States +27924015,TSTMP,0.996672034,TSTMP,0.996672034,,0,1,http://tstmp.enzim.ttk.mta.hu,200,Hungary,"(47.9573,21.7151)",http://web.archive.org/web/20221019174719/http://tstmp.enzim.ttk.mta.hu/,2016-10-18,"Faculty of Chemical Technology and Biotechnology, Budapest University of Technology and Economics, Műegyetem rakpart 3, H1111 Hungary.","Varga J, Dobson L, Reményi I, Tusnády GE",,,4.0,Hungary +"27987179, 24194597",PPDB,0.988334405,PPDB,0.988334405,Plant Promoter Database,0.942410922,2,http://ppdb.agr.gifu-u.ac.jp,301,,,http://web.archive.org/web/20220127095827/https://ppdb.agr.gifu-u.ac.jp/,2017-01-01,"United Graduate School of Agricultural Science, Gifu University, Yanagido 1-1, Gifu City, Gifu, 501-1193, Japan., The United Graduate School of Agricultural Science, Gifu University, 1-1 Yanagido, Gifu City, Gifu 501-1193 Japan, Faculty of Applied Biological Sciences, Gifu University, 1-1 Yanagido, Gifu City, Gifu 501-1193 Japan, Center for Sustainable Resource Science, RIKEN Yokohama Institute, 1-7-22 Suehiro-cho, Tsurumi-ku, Yokohama, Kanagawa 230-0045 Japan, Graduate School of Arts and Sciences, The University of Tokyo, 3-8-1 Komaba, Meguro-ku, Tokyo 153-8902 Japan, Advanced Science Research Center, Kanazawa University, Takaramachi 13-1. Kanazawa City, Ishikawa 920-0934, Japan, National Institute for Basic Biology, Nishigonaka 38, Myodaiji, Okazaki City, Aichi 444-8585 Japan, Department of Basic Biology, School of Life Science, Graduate University for Advanced Studies, Okazaki 444-8585, Japan, Faculty of Biology, University of Freiburg, Schänzlestrasse 1, D79104 Freiburg, Germany, Plant Cell Biology, Faculty of Biology, University of Marburg, Karl-von-Frisch-Strasse 9, 35043 Marburg, Germany and Graduate School of Environmental Life Science, Kyoto Prefectural University, 1-5 Hangi-cho, Shimogamo, Sakyo-ku, Kyoto 606-8522 Japan.","Kusunoki K, Yamamoto YY, Hieno A, Naznin HA, Hyakumachi M, Sakurai T, Tokizawa M, Koyama H, Sato N, Nishiyama T, Hasebe M, Zimmer AD, Lang D, Reski R, Rensing SA, Obokata J, Yamamoto YY",", ",", ",30.0,"Germany, Germany, Japan, Japan, Japan, Japan, Japan, Japan, Japan, Japan, Japan" +28018331,wgMLST,0.924707353,wgMLST,0.924707353,,0,1,http://wgmlst.imst.nsysu.edu.tw,200,,,http://web.archive.org/web/20220618103625/http://wgmlst.imst.nsysu.edu.tw/,2016-12-15,"Central Regional Laboratory, Center for Diagnostics and Vaccine Development, Centers for Disease Control Taichung, Taiwan.","Liu YY, Chen CC, Chiou CS",,,8.0, +28025349,viruSITE,0.993772805,viruSITE,0.993772805,,0,1,http://www.virusite.org,200,Slovakia,"(48.2979,17.3547)",http://web.archive.org/web/20220518182853/http://www.virusite.org/,2016-12-26,"Laboratory of Bioinformatics, Institute of Molecular Biology, Slovak Academy of Sciences, Bratislava, Slovakia.","Stano M, Beke G, Klucar L",,,18.0,Slovakia +28158179,TrypsNetDB,0.998068571,TrypsNetDB,0.998068571,,0,1,http://trypsNetDB.org,302,United States,"(39.0469,-77.4903)",no_wayback,2017-02-03,"Institute of Parasitology, McGill University, Ste. Anne de Bellevue, Quebec, Canada.","Gazestani VH, Yip CW, Nikpour N, Berghuis N, Salavati R",,"Natural Sciences and Engineering Research Council of Canada (NSERC), Canadian Institutes of Health Research",4.0,Canada +28365718,VerSeDa,0.998133659,VerSeDa,0.998133659,vertebrate secretome database,0.940622234,1,http://genomics.cicbiogune.es/VerSeDa/index.php,200,Spain,"(43.3126,-1.9745)",http://web.archive.org/web/20221016235345/http://genomics.cicbiogune.es/VerSeDa/index.php,2017-01-01,"Genome Analysis Platform, CIC bioGUNE & CIBERehd, Bizkaia Technology Park, 48160 Derio, Spain.","Cortazar AR, Oguiza JA, Aransay AM, Lavín JL",,,4.0,Spain +28605766,GeneHancer,0.987699628,GeneHancer,0.987699628,,0,1,http://www.genecards.org,403,,,http://web.archive.org/web/20221101114702/https://www.genecards.org/,2017-01-01,"Department of Molecular Genetics, Weizmann Institute of Science, Rehovot 7610001, Israel.","Fishilevich S, Nudel R, Rappaport N, Hadar R, Plaschkes I, Iny Stein T, Rosen N, Kohn A, Twik M, Safran M, Lancet D, Cohen D",,,265.0,Israel +"29069501, 33179747",jMorp,0.996074796,jMorp,0.996074796,Japanese Multi Omics Reference Panel,0.883684933,2,http://jmorp.megabank.tohoku.ac.jp,302,,,http://web.archive.org/web/20221102173834/https://jmorp.megabank.tohoku.ac.jp/,2021-01-01,"Tohoku Medical Megabank Organization, Tohoku University, 2-1 Seiryo-machi, Aoba-ku, Miyagi 980-8575, Japan., Tohoku Medical Megabank Organization, Tohoku University, 2-1 Seiryo-machi, Aoba-ku, Sendai, Miyagi 980-8573, Japan.","Tadaka S, Saigusa D, Motoike IN, Inoue J, Aoki Y, Shirota M, Koshiba S, Yamamoto M, Kinoshita K, Tadaka S, Hishinuma E, Komaki S, Motoike IN, Kawashima J, Saigusa D, Inoue J, Takayama J, Okamura Y, Aoki Y, Shirota M, Otsuki A, Katsuoka F, Shimizu A, Tamiya G, Koshiba S, Sasaki M, Yamamoto M, Kinoshita K",", ",", Ministry of Education, Culture, Sports, Science and Technology, AMED, Japan Agency for Medical Research and Development, Japan Agency for Medical Research and Development, AMED, Japan Agency for Medical Research and Development, AMED, Japan Agency for Medical Research and Development",48.0,"Japan, Japan" +29112736,VarCards,0.997416556,VarCards,0.997416556,,0,1,http://varcards.biols.ac.cn,200,,,http://web.archive.org/web/20220519084432/http://varcards.biols.ac.cn/,2018-01-01,"Institute of Genomic Medicine, Wenzhou Medical University, Wenzhou, Zhejiang 325025, China.","Li J, Shi L, Zhang K, Zhang Y, Hu S, Zhao T, Teng H, Li X, Jiang Y, Ji L, Sun Z, Sun Z",,,58.0,China +29197720,ZikaBase,0.957984567,ZikaBase,0.957984567,,0,1,http://test5.bicpu.edu.in,"HTTPConnectionPool(host='test5.bicpu.edu.in', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='test5.bicpu.edu.in', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20190701165545/http://test5.bicpu.edu.in:80/,2017-12-01,"Centre for Bioinformatics, Pondicherry University, RV Nagar, Kalapet, Puducherry 605014, India.","Gurumayum S, Brahma R, Naorem LD, Muthaiyan M, Gopal J, Venkatesan A",,,6.0,India +29201145,Obesity and Co-morbid Disease Database,0.974759728,OCDD,0.967281461,Obesity and Co-morbid Disease Database,0.974759728,1,http://www.isical.ac.in,302,,,http://web.archive.org/web/20220925040049/https://www.isical.ac.in/,2017-11-21,"Machine Intelligence Unit, Indian Statistical Institute, 203 B.T. Road, Kolkata, 700108 India.","Ray I, Bhattacharya A, De RK",,,2.0,India +29308007,VirusDB,0.994696617,VirusDB,0.994696617,,0,1,http://yaulab.math.tsinghua.edu.cn/VirusDB,301,,,http://web.archive.org/web/20220713034030/http://yaulab.math.tsinghua.edu.cn/VirusDB/,2017-12-17,"Department of Mathematical Sciences, Tsinghua University, Beijing, China.","Dong R, Zheng H, Tian K, Yau SC, Mao W, Yu W, Yin C, Yu C, He RL, Yang J, Yau SS",,,3.0,China +29351734,EOGD,0.995608449,EOGD,0.995608449,octocarinatus Genome Database,0.981783918,1,http://ciliates.ihb.ac.cn/database/home,301,,,http://web.archive.org/web/20220122081909/http://ciliates.ihb.ac.cn/database/home/,2018-01-19,"Key Laboratory of Chemical Biology and Molecular Engineering of Ministry of Education, Institute of Biotechnology, Shanxi University, Taiyuan, 030006, China.","Wang RL, Miao W, Wang W, Xiong J, Liang AH",,"Natural Science Foundation of China, National Natural Science Foundation of China, Youth Innovation Promotion Association of the Chinese Academy of Sciences, National Natural Science Foundation of China, National Natural Science Foundation of China",5.0,China +29624889,PhyMet2,0.994540513,PhyMet2,0.994540513,,0,1,http://metanogen.biotech.uni.wroc.pl,200,,,http://web.archive.org/web/20220815084055/http://metanogen.biotech.uni.wroc.pl/,2018-06-01,"Department of Genomics, Faculty of Biotechnology, University of Wrocław, Wrocław, Poland.","Michał B, Gagat P, Jabłoński S, Chilimoniuk J, Gaworski M, Mackiewicz P, Marcin Ł",,"Narodowe Centrum Nauki, Krajowy Naukowy Osrodek Wiodacy, Narodowe Centrum Nauki, Narodowe Centrum Nauki",7.0,Poland +"29652620, 33080015, 30329036",GeneLab,0.978558898,GeneLab,0.978558898,,0,3,http://genelab.nasa.gov,301,,,http://web.archive.org/web/20221010105137/https://genelab.nasa.gov/,2021-01-01,"a   Wyle Labs, NASA Ames Research Center, Moffett Field, California, 94035., USRA/NASA Ames Research Center, Moffett Field, CA 94035, USA., Space Biosciences Division, USRA/NASA Ames Research Center, Moffett Field, CA, USA.","Beheshti A, Miller J, Kidane Y, Berrios D, Gebre SG, Costes SV, Berrios DC, Galazka J, Grigorev K, Gebre S, Costes SV, Ray S, Gebre S, Fogle H, Berrios DC, Tran PB, Galazka JM, Costes SV",", , ",", National Aeronautics and Space Administration, NASA, National Aeronautics and Space Administration, GeneLab Project, SLPSRA, Division of Space Life and Physical Sciences Research and Applications, Ames Research Center, NASA’s Space Biology Program",36.0,"United States, United States" +29718389,PUG-REST,0.995439017,PUG-REST,0.995439017,,0,1,http://pubchem.ncbi.nlm.nih.gov,301,,,http://web.archive.org/web/20221110171543/https://pubchem.ncbi.nlm.nih.gov/,2018-07-01,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Department of Health and Human Services, Bethesda, MD 20894, USA.","Kim S, Thiessen PA, Cheng T, Yu B, Bolton EE",,,21.0,United States +30048518,YARG,0.993278623,YARG,0.993278623,Yeast Arsenic-Related Genes,0.992494863,1,http://cosbi4.ee.ncku.edu.tw/YARG,200,,,http://web.archive.org/web/20180123092422/http://cosbi4.ee.ncku.edu.tw:80/YARG/,2018-07-26,"Department of Earth Sciences, National Cheng Kung University, Tainan, Taiwan.","Rathod J, Tu HP, Chang YI, Chu YH, Tseng YY, Jean JS, Wu WS",,"Ministry of Science and Technology, Taiwan, Ministry of Science and Technology, Taiwan",2.0, +30208844,WoM,0.97369504,WoM,0.97369504,,0,1,http://webofmicrobes.org,405,,,http://web.archive.org/web/20220622010102/http://www.webofmicrobes.org/,2018-09-12,"Environmental Genomics and Systems Biology, Lawrence Berkeley National Laboratory, M/S 100PFG100, 1 Cyclotron Road, Berkeley, CA, 94720, USA.","Kosina SM, Greiner AM, Lau RK, Jenkins S, Baran R, Bowen BP, Northen TR",,U.S. Department of Energy,9.0,United States +30217145,PPGD,0.992584169,PPGD,0.992584169,persalinus genome database,0.981605089,1,http://ciliates.ihb.ac.cn/database/home,301,,,http://web.archive.org/web/20220122081909/http://ciliates.ihb.ac.cn/database/home/,2018-09-14,"Key Laboratory of Aquatic Biodiversity and Conservation, Institute of Hydrobiology, Chinese Academy of Sciences, Wuhan, 430072, China.","Wei W, Chen K, Miao W, Yang W, Xiong J",,"National Natural Science Foundation of China, National Natural Science Foundation of China, Knowledge Innovation Program of the Chinese Academy of Sciences, Youth Innovation Promotion Association of the Chinese Academy of Sciences",1.0,China +30223042,TSNAdb,0.982816732,TSNAdb,0.982816732,tumor-specific neoantigen database,0.929707067,1,http://biopharm.zju.edu.cn/tsnadb,301,China,"(40.0018,116.333)",http://web.archive.org/web/20220615165329/http://biopharm.zju.edu.cn/tsnadb/,2018-08-01,"Institute of Drug Metabolism and Pharmaceutical Analysis and Zhejiang Provincial Key Laboratory of Anti-Cancer Drug Research, College of Pharmaceutical Sciences, Zhejiang University, Hangzhou 310058, China.","Wu J, Zhao W, Zhou B, Su Z, Gu X, Zhou Z, Chen S",,"National Key Research and Development Program of China, National Natural Science Foundation of China, Fundamental Research Funds for the Central Universities of China",26.0,China +30239928,UniLectin3D,0.978693748,UniLectin3D,0.978693748,,0,1,http://www.unilectin.eu/unilectin3D,302,France,"(50.9871,2.12554)",http://web.archive.org/web/20220618211343/https://www.unilectin.eu/unilectin3D/,2019-01-01,"Univ. Grenoble Alpes, CNRS, CERMAV, 38000 Grenoble, France.","Bonnardel F, Mariethoz J, Salentin S, Robin X, Schroeder M, Perez S, Lisacek F, Imberty A",,"State Secretariat for Education, Research and Innovation",25.0,France +"30265627, 33313778",VIPERdb,0.994908571,VIPERdb,0.994908571,Particle ExploreR data base,0.844930937,2,http://viperdb.scripps.edu,301,,,no_wayback,2021-01-01,"Department of Integrative Structural and Computational Biology, The Scripps Research Institute, La Jolla, California 92037, USA; email: reddyv@scripps.edu., Department of Integrative Structural and Computational Biology, The Scripps Research Institute, La Jolla, CA 92037, USA.","Ho PT, Montiel-Garcia DJ, Wong JJ, Carrillo-Tripp M, Brooks CL 3rd, Johnson JE, Reddy VS, Montiel-Garcia D, Santoyo-Rivera N, Ho P, Carrillo-Tripp M, Iii CLB, Johnson JE, Reddy VS",", ",", NIH, NIAID NIH HHS",15.0,"United States, United States" +30365026,Victors,0.988581896,Victors,0.988581896,,0,1,http://www.phidias.us/victors,404,,,http://web.archive.org/web/20220308092006/http://www.phidias.us/victors/,2019-01-01,"Unit for Laboratory Animal Medicine, Department of Microbiology and Immunology, and Center for Computational Medicine and Bioinformatics, University of Michigan Medical School, Ann Arbor, MI 48109, USA.","Sayers S, Li L, Ong E, Deng S, Fu G, Lin Y, Yang B, Zhang S, Fa Z, Zhao B, Xiang Z, Li Y, Zhao XM, Olszewski MA, Chen L, He Y",,"National Natural Science Foundation of China, National Natural Science Foundation of China, NIAID NIH HHS, NIH-NIAID, Chinese Academy of Sciences, Shanghai Pujiang Program, VA Research Career Scientist, BLRD VA, National Program on Key Basic Research Project of China, VA Merit, BLRD VA, Innovation Program of Shanghai Municipal Education Commission, National Program on Key Basic Research Project of China, National Natural Science Foundation of China",35.0,United States +30371820,UNITE,0.997397006,UNITE,0.997397006,,0,1,http://unite.ut.ee,301,Estonia,"(58.369,26.7466)",http://web.archive.org/web/20221101151147/https://unite.ut.ee/,2019-01-01,"University of Gothenburg, Department of Biological and Environmental Sciences, Gothenburg Global Biodiversity Centre, Box 461, 405 30 Gothenburg, Sweden.","Nilsson RH, Larsson KH, Taylor AFS, Bengtsson-Palme J, Jeppesen TS, Schigel D, Kennedy P, Picard K, Glöckner FO, Tedersoo L, Saar I, Kõljalg U, Abarenkov K",,"Alfred P. Sloan Foundation, Swedish Research Council of Environment, Agricultural Sciences, and Spatial Planning",338.0,Sweden +30371824,ViBrism,0.936161578,ViBrism,0.936161578,,0,1,http://vibrism.neuroinf.jp,301,Japan,"(35.6837,139.6805)",http://web.archive.org/web/20220709230109/https://vibrism.neuroinf.jp/,2019-01-01,"RIKEN Center for Advanced Photonics, Wako, Saitama 351-0198, Japan.","Morita M, Shimokawa K, Nishimura M, Nakamura S, Tsujimura Y, Takemoto S, Tawara T, Yokota H, Wemler S, Miyamoto D, Ikeno H, Sato A, Furuichi T, Kobayashi N, Okumura Y, Yamaguchi Y, Okamura-Oho Y",,"Japan Society for the Promotion of Science, Japan Society for the Promotion of Science, Japan Society for the Promotion of Science, Japan Society for the Promotion of Science, Japan Society for the Promotion of Science, Japan Society for the Promotion of Science",1.0,Japan +30395310,Vesiclepedia,0.996926308,Vesiclepedia,0.996926308,,0,1,http://www.microvesicles.org,200,Australia,"(-37.6154,145.0186)",http://web.archive.org/web/20220624003822/http://microvesicles.org/,2019-01-01,"Department of Biochemistry and Genetics, La Trobe Institute for Molecular Science, La Trobe University, Bundoora, Victoria 3086, Australia.","Pathan M, Fonseka P, Chitti SV, Kang T, Sanwlani R, Van Deun J, Hendrix A, Mathivanan S",,"National Health and Medical Research Council, Australian Research Council DP, Australian Research Council FT",171.0,Australia +30398663,CATH,0.967545748,CATH,0.967545748,,0,1,http://www.cathdb.info,200,,,no_wayback,2019-01-01,"Structural and Molecular Biology, University College London WC1E 6BT, UK.","Sillitoe I, Dawson N, Lewis TE, Das S, Lees JG, Ashford P, Tolulope A, Scholes HM, Senatorov I, Bujan A, Ceballos Rodriguez-Conde F, Dowling B, Thornton J, Orengo CA",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council",53.0, +30407009,VIETHERB,0.988221526,VIETHERB,0.988221526,,0,1,http://vietherb.com.vn,301,,"(10.8017,106.646)",http://web.archive.org/web/20220617133241/https://vietherb.com.vn/,2018-12-03,"Computational Biology Center , International University-VNU , Ho Chi Minh City 700000 , Vietnam.","Nguyen-Vo TH, Le T, Pham D, Nguyen T, Le P, Nguyen A, Nguyen T, Nguyen TN, Nguyen V, Do H, Trinh K, Duong HT, Le L",,"U.S. Department of Defense, U.S. Department of Defense",10.0, +30462313,Cistrome DB,0.963168994,Cistrome DB,0.963168994,Cistrome Data Browser,0.840564919,1,"http://cistrome.org/db, http://dbtoolkit.cistrome.org","301, 200",,", ","http://web.archive.org/web/20221029163924/http://cistrome.org/db/, http://web.archive.org/web/20221009062753/http://dbtoolkit.cistrome.org/",2019-01-01,"Shanghai Key Laboratory of Tuberculosis, Clinical Translational Research Center, Shanghai Pulmonary Hospital, School of Life Sciences and Technology, Tongji University, Shanghai 200092, China.","Zheng R, Wan C, Mei S, Qin Q, Wu Q, Sun H, Chen CH, Brown M, Zhang X, Meyer CA, Liu XS",,"NHGRI NIH HHS, NCI NIH HHS, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Institutes of Health of US, National Institutes of Health of US",168.0,China +30465539,VCGDB,0.950244635,VCGDB,0.950244635,Virtual Chinese Genome Database,0.920584655,1,"http://bigd.big.ac.cn/vcg/, http://bigd.big.ac.cn/gvm","301, 301","China, China","(39.96,116.298), (39.96,116.298)","http://web.archive.org/web/20201031223321/http://bigd.big.ac.cn/vcg/, http://web.archive.org/web/20211020110435/http://bigd.big.ac.cn/gvm/",2018-11-01,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Song S, Teng XF, Xiao JF",,,0.0,China +30546860,YaTCM,0.994157434,YaTCM,0.994157434,,0,1,http://cadd.pharmacy.nankai.edu.cn/yatcm/home,403,,,http://web.archive.org/web/20220121050628/http://cadd.pharmacy.nankai.edu.cn/yatcm/home,2018-11-23,"State Key Laboratory of Medicinal Chemical Biology, College of Pharmacy and Tianjin Key Laboratory of Molecular Drug Research, Nankai University, Haihe Education Park, 38 Tongyan Road, Tianjin 300353, China.","Li B, Ma C, Zhao X, Hu Z, Du T, Xu X, Wang Z, Lin J",,National Key R&D Program of China,15.0,China +30593617,ViPR,0.996145308,ViPR,0.996145308,Virus Pathogen Resource,0.91619873,1,http://www.viprbrc.org/brc/home.spg?decorator=flavi_hcv,301,United States,"(41.713,-87.9608)",no_wayback,2019-01-01,"J. Craig Venter Institute, La Jolla, CA, USA.","Zhang Y, Zmasek C, Sun G, Larsen CN, Scheuermann RH",,NIAID NIH HHS,2.0,United States +30601939,UbiHub,0.998238921,UbiHub,0.998238921,,0,1,http://ubihub.thesgc.org,301,Canada,"(43.6532,-79.3832)",no_wayback,2019-08-01,"Structural Genomics Consortium, University of Toronto, Toronto, ON, Canada.","Liu L, Damerell DR, Koukouflis L, Tong Y, Marsden BD, Schapira M",,"Pfizer, SGC, Merck KGaA, Structural Genomics Consortium, Wellcome Trust, Ontario Genomics Institute, EU/EFPIA, São Paulo Research Foundation-FAPESP, Innovative Medicines Initiative, The Wellcome, Wellcome Trust, ULTRA-DD, Boehringer Ingelheim, Takeda, Bayer Pharma AG, Canada Foundation for Innovation, Janssen, AbbVie, Eshelman Institute for Innovation, Novartis Pharma AG, Ontario Ministry of Research, Innovation and Science (MRIS)",5.0,Canada +30614601,UVEOGENE,0.992076588,UVEOGENE,0.992076588,,0,1,"http://databases.lovd.nl/shared/genes, http://www.uvogene.com","301, 200",Netherlands,"(52.17,4.4728), ","http://web.archive.org/web/20221020164717/https://databases.lovd.nl/shared/genes, http://web.archive.org/web/20220621125256/http://www.uvogene.com/",2019-01-16,"The First Affiliated Hospital of Chongqing Medical University, Chongqing Key Laboratory of Ophthalmology, and Chongqing Eye Institute, Chongqing, People's Republic of China.","Wang Q, Su G, Tan X, Deng J, Du L, Huang X, Lv M, Yi S, Hou S, Kijlstra A, Yang P",,,5.0,China +30887928,VaxiJen,0.989937842,VaxiJen,0.989937842,,0,1,http://www.ddg-pharmfac.net/vaxi,404,,,no_wayback,2019-01-01,"Faculty of Pharmacy, Medical University of Sofia, Sofia, Bulgaria.","Zaharieva N, Dimitrov I, Flower DR, Doytchinova I",,,13.0,Bulgaria +31021279,UVGD,0.993201971,UVGD,0.993201971,,0,1,http://biokb.ncpsb.org/UVGD,301,,,no_wayback,2019-05-13,"a State Key Laboratory of Proteomics, Beijing Proteome Research Center, National Center for Protein Sciences-Beijing (PHOENIX Center), Beijing Institute of Lifeomics , Beijing , China.","Xu H, Wang Y, Diao L, Wang X, Zhang Y, Zhu J, Liu J, Yao J, Liu Z, Li Y, He F, Wang Z, Liu Y, Li D",,"Beijing Nova Program, Innovation Project, National Natural Science Foundation of China, State Key Laboratory of Proteomics, National Key Research and Development Program, State Key Laboratory of Proteomics",0.0,China +31169974,GDBMedChem,0.997045815,GDBMedChem,0.997045815,Chemistry,0.490931183,1,http://gdb.unibe.ch,301,,,http://web.archive.org/web/20221005232107/https://gdb.unibe.ch/,2019-06-06,"Department of Chemistry and Biochemistry, University of Bern, Freiestrasse 3, 3012, Bern, Switzerland.","Awale M, Sirockin F, Stiefl N, Reymond JL",,,5.0,Switzerland +31245720,VPGD,0.986272037,VPGD,0.986272037,and,0.526194394,1,http://vigs.noble.org,200,,,http://web.archive.org/web/20220617113505/https://vigs.noble.org/,2018-04-23,Noble Research Institute Ardmore Oklahoma.,"Senthil-Kumar M, Wang M, Chang J, Ramegowda V, Del Pozo O, Liu Y, Doraiswamy V, Lee HK, Ryu CM, Wang K, Xu P, Van Eck J, Chakravarthy S, Dinesh-Kumar SP, Martin GB, Mysore KS",,"Noble Research Institute, National Science Foundation",6.0, +31263870,ValTrendsDB,0.964570105,ValTrendsDB,0.964570105,,0,1,http://ncbr.muni.cz/ValTrendsDB,301,Czechia,"(49.3344,16.4522)",http://web.archive.org/web/20220303130428/http://ncbr.muni.cz/ValTrendsDB/,2019-12-01,"National Centre for Biomolecular Research, Faculty of Science, Masaryk University, Brno, Czech Republic.","Horský V, Bendová V, Toušek D, Koča J, Svobodová R",,"European Union’s Horizon 2020, European Union’s Horizon 2020, European Regional Development Fund, European Regional Development Fund, CEITEC 2020, Grant Agency of Masaryk University, Ministry of Education, Youth and Sports of the Czech Republic",1.0, +31274965,vMS-Share,0.993173659,vMS-Share,0.993173659,Visual Mass-Spec Share,0.818651617,1,http://vmsshare.nist.gov,301,,,http://web.archive.org/web/20220418013636/https://vmsshare.nist.gov/,2019-01-01,"Chemical Sciences Division, National Institute of Standards and Technology, Gaithersburg, Maryland 20899-8380, United States.","Blonder N, Orsburn BC, Blonder J, Gonzalez CA",,,1.0,United States +31283070,VIPdb,0.984589517,VIPdb,0.984589517,genetic Variant Impact Predictor Database,0.94663018,1,http://genomeinterpretation.org/vipdb,404,,,http://web.archive.org/web/20201125080959/https://genomeinterpretation.org/vipDB,2019-08-17,"Department of Plant and Microbial Biology, University of California, Berkeley, California.","Hu Z, Yu C, Furutsuki M, Andreoletti G, Ly M, Hoskins R, Adhikari AN, Brenner SE",,"NHGRI NIH HHS, Foundation for the National Institutes of Health, Tata Consultancy Services, NHGRI NIH HHS, NIH HHS",11.0, +31501752,ABCD,0.989302754,ABCD,0.989302754,Alzheimer's disease Biomarkers Comprehensive Database,0.986439314,1,http://www.bioinfoindia.org/abcd,301,,,http://web.archive.org/web/20190917014412/http://bioinfoindia.org:80/abcd/,2019-09-03,"Department of Biotechnology and Bioinformatics, Jaypee University of Information Technology, Waknaghat, Solan, Himachal Pradesh 173234 India.","Kumar A, Bansal A, Singh TR",,,1.0,India +31504823,WALTZ-DB,0.969778025,WALTZ-DB,0.969778025,,0,1,http://waltzdb.switchlab.org,200,,,wayback is down,2020-01-01,"VIB Center for Brain & Disease Research, Switch Laboratory, Leuven, 3000, Belgium.","Louros N, Konstantoulea K, De Vleeschouwer M, Ramakers M, Schymkowitz J, Rousseau F",,"Research Flanders Post-doctoral Fellowship, European Research Council, European Research Council",22.0,Belgium +31512145,ZIKAVID,0.995752037,ZIKAVID,0.995752037,ZIKA Virus Infection Database,0.853707194,1,http://zikavid.org,301,,,http://web.archive.org/web/20220725085452/https://zikavid.org/,2019-09-11,"Faculdade de Farmácia, Universidade Federal do Rio Grande do Sul, Av. Ipiranga, 2752 suit 709, Porto Alegre, RS, Brazil.","Rosa RL, Santi L, Berger M, Tureta EF, Quincozes-Santos A, Souza DO, Guimarães JA, Beys-da-Silva WO",,CAPES/MCTI/CNPq,2.0,Brazil +31588507,VDJdb,0.996914685,VDJdb,0.996914685,,0,1,http://vdjdb.cdr3.net,301,Czechia,"(49.2031,16.5352)",http://web.archive.org/web/20220905074811/https://vdjdb.cdr3.net/,2020-01-01,"Pirogov Russian Medical State University, Moscow, Russia.","Bagaev DV, Vroomans RMA, Samir J, Stervbo U, Rius C, Dolton G, Greenshields-Watson A, Attaf M, Egorov ES, Zvyagin IV, Babel N, Cole DK, Godkin AJ, Sewell AK, Kesmir C, Chudakov DM, Luciani F, Shugay M",,Russian Science Foundation,72.0, +31598695,MirGeneDB,0.99370259,MirGeneDB,0.99370259,,0,1,http://mirgenedb.org,302,,,http://web.archive.org/web/20221027001206/https://mirgenedb.org/,2020-01-01,"Science for Life Laboratory, Department of Molecular Biosciences, The Wenner-Gren Institute, Stockholm University, Stockholm, Sweden.","Fromm B, Domanska D, Høye E, Ovchinnikov V, Kang W, Aparicio-Puerta E, Johansen M, Flatmark K, Mathelier A, Hovig E, Hackenberg M, Friedländer MR, Peterson KJ",,"Russian Science Foundation, University of Nottingham, NASA-Ames, Southern and Eastern Norway Regional Health Authority, Dartmouth College, National Science Foundation, Swedish Research Council, Norwegian Research Council, Southern and Eastern Norway Regional Health Authority",71.0,Sweden +31598702,VISDB,0.994756818,VISDB,0.994756818,Viral,0.535345972,1,http://bioinfo.uth.edu/VISDB,302,United States,"(29.8834,-95.4553)",no_wayback,2020-01-01,"Center for Precision Health, School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Tang D, Li B, Xu T, Hu R, Tan D, Song X, Jia P, Zhao Z",,"Cancer Prevention and Research Institute of Texas, National Institutes of Health, National Institutes of Health, NLM NIH HHS, NIDA NIH HHS, Cancer Prevention and Research Institute of Texas",12.0,United States +"31647096, 28053165",proGenomes2,0.9957847,proGenomes2,0.9957847,,0,2,http://progenomes.embl.de,301,,,http://web.archive.org/web/20220712072358/https://progenomes.embl.de/,2020-01-01,"Department of Medical Microbiology, Academic Medical Centre, University of Amsterdam, Amsterdam, The Netherlands., Structural and Computational Biology Unit, European Molecular Biology Laboratory, 69117 Heidelberg, Germany.","Mende DR, Letunic I, Maistrenko OM, Schmidt TSB, Milanese A, Paoli L, Hernández-Plaza A, Orakov AN, Forslund SK, Sunagawa S, Zeller G, Huerta-Cepas J, Coelho LP, Bork P, Mende DR, Letunic I, Huerta-Cepas J, Li SS, Forslund K, Sunagawa S, Bork P",", ","Fondo Social Europeo, Heidelberg Center for Human Bioinformatics, Consejería de Educación, Juventud y Deporte de la Comunidad de Madrid, ETH Zürich, Fudan University, Shanghai Municipal Science and Technology, Ministerio de Ciencia, Innovación y Universidades, European Molecular Biology Laboratory, Helmut Horten Foundation, ZHANGJIANG LAB, European Research Council, Horizon 2020, European Research Council, European Research Council, European Research Council",61.0,"Germany, Netherlands" +31680168,TSEA-DB,0.995504111,TSEA-DB,0.995504111,Tissue-Specific Enrichment Analysis DataBase,0.810658699,1,http://bioinfo.uth.edu/TSEADB,302,,,http://web.archive.org/web/20221016212949/https://bioinfo.uth.edu/TSEADB/,2020-01-01,"Center for Precision Health, School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, TX 77030, USA.","Jia P, Dai Y, Hu R, Pei G, Manuel AM, Zhao Z",,"Cancer Prevention and Research Institute of Texas, NLM NIH HHS, National Institutes of Health",10.0,United States +"31696235, 26582919",MGnify,0.997318149,MGnify,0.997318149,,0,2,http://www.ebi.ac.uk/metagenomics,301,,,http://web.archive.org/web/20221102065119/https://www.ebi.ac.uk/metagenomics/,2020-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridgeshire, CB10 1SD, UK.","Mitchell AL, Almeida A, Beracochea M, Boland M, Burgin J, Cochrane G, Crusoe MR, Kale V, Potter SC, Richardson LJ, Sakharova E, Scheremetjew M, Korobeynikov A, Shlemov A, Kunyavskaya O, Lapidus A, Finn RD, Mitchell A, Bucchini F, Cochrane G, Denise H, ten Hoopen P, Fraser M, Pesseat S, Potter S, Scheremetjew M, Sterk P, Finn RD",", ","Horizon 2020, Biotechnology and Biological Sciences Research Council, Biotechnology and Biosciences Research Council, Russian Fund for Basic Research, Horizon 2020, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, European Molecular Biology Laboratory, Biotechnology and Biosciences Research Council, Biotechnology and Biosciences Research Council, ELIXIR, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",164.0, +31705629,VariCarta,0.995876968,VariCarta,0.995876968,,0,1,http://varicarta.msl.ubc.ca,302,Canada,"(49.2643,-123.0961)",no_wayback,2019-11-09,"Michael Smith Laboratories, UBC, Vancouver, British Columbia, Canada.","Belmadani M, Jacobson M, Holmes N, Phan M, Nguyen T, Pavlidis P, Rogic S",,Simons Foundation,9.0,Canada +31837751,VetCOT,0.961757398,VetCOT,0.961757398,Trauma Registry,0.660841862,1,"http://vetcot.org/index.php/home/identification-and-verification-process/, http://vetcot.org/index.php/home/registry-use-materials","406, 406",,", ","http://web.archive.org/web/20220316113043/http://vetcot.org/index.php/home/identification-and-verification-process/, http://web.archive.org/web/20220419183456/http://vetcot.org/index.php/home/registry-use-materials/",2019-09-25,"Colorado State University, College of Veterinary Medicine and Biomedical Sciences, Fort Collins, CO, USA. Electronic address: KHall.Wilke@colostate.edu.",Hall K,,"National Institutes of Health, National Center for Advancing Translational Sciences",1.0,United States +31931895,TwinsMX,0.978481114,TwinsMX,0.978481114,,0,1,http://twinsmxofficial.unam.mx,301,,,http://web.archive.org/web/20221103124935/https://twinsmxofficial.unam.mx/,2019-12-01,"Laboratorio Internacional de Investigación sobre el Genoma Humano, Universidad Nacional Autónoma de México, Querétaro, México.","Leon-Apodaca AV, Chiu-Han E, Ortega-Mora I, Román-López TV, Caballero-Sánchez U, Aldana-Assad O, Campos AI, Cuellar-Partida G, Ruiz-Contreras AE, Alcauter S, Rentería ME, Medina-Rivera A",,,1.0, +"32055857, 26607947",SymGenDB,0.998103142,SymGenDB,0.998103142,Symbiotic Genomes Database,0.95245223,2,http://symbiogenomesdb.uv.es,200,,,http://web.archive.org/web/20221017153227/http://symbiogenomesdb.uv.es/,2020-01-01,"Evolutionary Systems Biology of Symbionts, Institute for Integrative Systems Biology (I2SysBio), Universitat de València, Paterna, València, Spain., Institut Cavanilles de Biodiversitat i Biologia Evolutiva, Universitat de València, Calle Catedrático José Beltrán 2, 46980 Paterna, València, Spain.","Reyes-Prieto M, Vargas-Chávez C, Llabrés M, Palmer P, Latorre A, Moya A, Reyes-Prieto M, Vargas-Chávez C, Latorre A, Moya A",", ","National Board of Science and Technology of México, ",6.0,"Spain, Spain" +32117874,GDB17,0.992214759,GDB17,0.992214759,,0,1,"http://gdb.unibe.ch, http://faerun.gdb.tools","301, 200",,", ","http://web.archive.org/web/20221005232107/https://gdb.unibe.ch/, http://web.archive.org/web/20220615102734/http://faerun.gdb.tools/",2020-02-04,"Department of Chemistry and Biochemistry, University of Bern, Bern, Switzerland.","Bühlmann S, Reymond JL",,Swiss National Science Foundation,7.0,Switzerland +32168374,uORFlight,0.997313261,uORFlight,0.997313261,,0,1,http://uorflight.whu.edu.cn,"HTTPConnectionPool(host='uorflight.whu.edu.cn', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to uorflight.whu.edu.cn timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220616024301/http://uorflight.whu.edu.cn/,2020-01-01,"State Key Laboratory of Hybrid Rice, Institute for Advanced Studies (IAS), Wuhan University, Wuhan, Hubei 430072, China.","Niu R, Zhou Y, Zhang Y, Mou R, Tang Z, Wang Z, Zhou G, Guo S, Yuan M, Xu G",,"National Natural Science Foundation of China, Wuhan University",8.0,China +32221380,WeiBI,0.996359855,WeiBI,0.996359855,WeiBiologicalInteractions,0.583205014,1,http://weislab.com/WeiDOCK/?page=PKPD,200,,,no_wayback,2020-03-27,"Wuxi School of Medicine, Jiangnan University, Wuxi, China. amanbioinfo@sjtu.edu.cn.","Kaushik AC, Mehmood A, Dai X, Wei DQ",,"Shanghai Jiao Tong University, Ministry of Science and Technology of the People&apos;s Republic of China, National Natural Science Foundation of China",1.0,China +32349124,VirusCircBase,0.996753037,VirusCircBase,0.996753037,,0,1,http://www.computationalbiology.cn/ViruscircBase/home.html,200,,,http://web.archive.org/web/20220601033446/http://www.computationalbiology.cn/ViruscircBase/home.html,2021-03-01,None,"Cai Z, Fan Y, Zhang Z, Lu C, Zhu Z, Jiang T, Shan T, Peng Y",,"Hunan Provincial Natural Science Foundation of China, Chinese Academy of Medical Sciences, National Key Plan for Scientific Research and Development of China, National Natural Science Foundation of China",13.0, +32380213,TrypInDB,0.996800661,TrypInDB,0.996800661,,0,1,http://trypindb.biomedinformri.com,200,United States,"(40.7357,-74.1724)",no_wayback,2020-05-05,"Department of Statistics / Bioinformatics Centre, Rajendra Memorial Research Institute of Medical Science, Indian Council for Medical Research, Agamkuan, Patna, Bihar, 800007, India. Electronic address: saravanan.vij@icmr.gov.in.","Vijayakumar S, Ranjan R, Kumar R, Das P",,Indian Council of Medical Research,1.0,India +32386298,MGP Portal,0.925392497,MGP Portal,0.925392497,Mnemiopsis Genome Project Portal,0.882159429,1,http://research.nhgri.nih.gov/mnemiopsis,302,,,http://web.archive.org/web/20220926023607/https://research.nhgri.nih.gov/mnemiopsis/,2020-01-01,"Computational and Statistical Genomics Branch, National Human Genome Research Institute, National Institutes of Health, Bethesda, MD, 20892, USA.","Moreland RT, Nguyen AD, Ryan JF, Baxevanis AD",,"National Institutes of Health, National Science Foundation, National Human Genome Research Institute",3.0,United States +32392583,WER target gene database,0.948100317,M6A2Target,0.923408449,WER target gene database,0.948100317,1,http://m6a2target.canceromics.org,302,,,no_wayback,2021-05-01,None,"Deng S, Zhang H, Zhu K, Li X, Ye Y, Li R, Liu X, Lin D, Zuo Z, Zheng J",,,24.0, +32431267,WorldWide Antimalarial Resistance Network,0.951693782,WWARN,0.904815594,WorldWide Antimalarial Resistance Network,0.951693782,1,http://www.wwarn.org/tools-resources/literature-reviews/wwarn-clinical-trials-publication-library,302,,,http://web.archive.org/web/20220127114000/https://www.wwarn.org/tools-resources/literature-reviews/wwarn-clinical-trials-publication-library,2020-05-07,"1WorldWide Antimalarial Resistance Network (WWARN), Oxford, United Kingdom.","Takata J, Sondo P, Humphreys GS, Burrow R, Maguire B, Hossain MS, Das D, Commons RJ, Price RN, Guerin PJ",,,4.0,United Kingdom +32454857,UNaProd,0.996470153,UNaProd,0.996470153,,0,1,http://jafarilab.com/unaprod,301,Germany,"(50.475,12.365)",http://web.archive.org/web/20210725081038/http://jafarilab.com/unaprod/,2020-05-13,"Department of Traditional Medicine, School of Persian Medicine, Tehran University of Medical Sciences, Tehran, Iran.","Naghizadeh A, Hamzeheian D, Akbari S, Mohammadi F, Otoufat T, Asgari S, Zarei A, Noroozi S, Nasiri N, Salamat M, Karbalaei R, Mirzaie M, Rezaeizadeh H, Karimi M, Jafari M",,,5.0, +32509450,VIRdb,0.994836926,VIRdb,0.994836926,Vitiligo Information Resource,0.84693079,1,http://www.vitiligoinfores.com,"HTTPConnectionPool(host='www.vitiligoinfores.com', port=80): Max retries exceeded with url: / (Caused by ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')))",,,http://web.archive.org/web/20220403094220/https://vitiligoinfores.com/,2020-05-21,"Amity Institute of Biotechnology, Amity University, Uttar Pradesh, India.","Srivastava P, Choudhury A, Talwar M, Mohanty S, Narad P, Sengupta A",,,1.0,India +32512488,TTRMDB,0.990589499,TTRMDB,0.990589499,Transthyretin mutant database,0.962966555,1,http://vit.ac.in/ttrmdb,307,India,"(12.9202,79.1306)",no_wayback,2020-05-25,"Bioinformatics Lab, Department of Biotechnology, School of Bio Sciences and Technology, Vellore Institute of Technology (Deemed to be University), Vellore 632014, Tamil Nadu, India.","Srinivasan E, Natarajan N, Rajasekaran R",,CSIR,1.0,India +32548865,UK Immunological Toolbox,0.771031559,UK Immunological Toolbox,0.771031559,,0,1,http://www.immunologicaltoolbox.co.uk,301,Canada,"(43.6532,-79.3832)",no_wayback,2020-07-29,"The Pirbright Institute, Woking, UK.","Mwangi W, Maccari G, Hope JC, Entrican G, Hammond JA",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, European Union Horizon 2020, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, BMGF, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Core Capability Grant awarded to the Roslin Institute., Biotechnology and Biological Sciences Research Council, SG RESAS Strategic Research Programme, UKRI-BBSRC/SG/BioRad, Biotechnology and Biological Sciences Research Council",3.0, +"32550548, 34530999",VitiVar,0.99736613,VitiVar,0.99736613,,0,2,http://vitivar.igib.res.in,301,,,http://web.archive.org/web/20211202213551/https://vitivar.igib.res.in/,2019-05-11,"CSIR-Institute of Genomics and Integrative Biology, Mathura Road, Delhi, India., CSIR-Institute of Genomics and Integrative Biology, Mathura Road, Delhi, India; Academy of Scientific and Innovative Research, CSIR Campus, CSIR Road, Chennai, India.","Gupta I, Narang A, Singh P, Manchanda V, Khanna S, , Mukerji M, Natarajan VT, Dash D, Gupta I, Narang A, Singh P, Manchanda V, Khanna S, , Mukerji M, Natarajan VT, Dash D",", ",", ",1.0,"India, India, India" +32738156,YIR,0.990129635,YIR,0.990129635,yeast interactome resource,0.962886065,1,http://yeast.biomedtzc.cn,200,,,http://web.archive.org/web/20220523234121/http://yeast.biomedtzc.cn/,2020-08-11,"Institute of Big Data and Artificial Intelligence in Medicine, School of Electronics and Information Engineering, Taizhou University, Taizhou, China.","Jin J, Tao YT, Ding XB, Guo WP, Ruan L, Yang QL, Chen PC, Yao H, Zhang HB, Chen X",,"National Natural Science Foundation of China, National Natural Science Foundation of China",0.0,China +32931381,ZenoFishDb,0.99299258,ZenoFishDb,0.99299258,,0,1,http://konulab.shinyapps.io/zenofishdb,301,,,no_wayback,2020-09-15,"Department of Molecular Biology and Genetics, Bilkent University, Ankara, Turkey.","Targen S, Kaya T, Avci ME, Gunes D, Keskus AG, Konu O",,,1.0,Turkey +33045721,ViruSurf,0.998281777,ViruSurf,0.998281777,,0,1,"http://gmql.eu/virusurf/, http://gmql.eu/virusurf_gisaid","302, 301","Italy, Italy","(44.4861,11.261), (44.4861,11.261)","no_wayback, no_wayback",2021-01-01,"Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Via Ponzio 34/5, 20133 Milano, Italy.","Canakoglu A, Pinoli P, Bernasconi A, Alfonsi T, Melidis DP, Ceri S",,"European Research Council, H2020 European Institute of Innovation and Technology",17.0,Italy +33068436,tsRBase,0.994211316,tsRBase,0.994211316,,0,1,http://www.tsrbase.org,200,,,http://web.archive.org/web/20220814060428/http://tsrbase.org/,2021-01-01,"Laboratory of Molecular Oncology, Frontiers Science Center for Disease-related Molecular Network, State Key Laboratory of Biotherapy and Cancer Center, West China Hospital, Sichuan University, Chengdu 610064, China.","Zuo Y, Zhu L, Guo Z, Liu W, Zhang J, Zeng Z, Wu Q, Cheng J, Fu X, Jin Y, Zhao Y, Peng Y",,"Science and Technology Program of Sichuan Province, National Key Research and Development Program of China, Science and Technology Program of Sichuan Province, National Natural Science Foundation of China, National Key Research and Development Program of China, National Natural Science Foundation of China, Sichuan University, Sichuan University",6.0,"China, China" +33094321,VPTMdb,0.996834993,VPTMdb,0.996834993,viral posttranslational modification database,0.67337137,1,http://vptmdb.com:8787/VPTMdb,301,,,http://web.archive.org/web/20211020231725/http://vptmdb.com:8787/VPTMdb/,2021-07-01,"State Key Laboratory of Integrated Management of Pest Insects and Rodents, Institute of Zoology, Chinese Academy of Sciences.","Xiang Y, Zou Q, Zhao L",,"Natural Science Foundation of China, Natural Science Foundation of China, Youth Innovation Promotion Association CAS, Natural Science Foundation of China, National Youth Talent Support Program of China",2.0, +33095866,VARAdb,0.997256637,VARAdb,0.997256637,comprehensive variation annotation database for human,0.841432224,1,http://www.licpathway.net/VARAdb,301,Hong Kong,"(22.2908,114.1501)",http://web.archive.org/web/20221017002443/http://licpathway.net/VARAdb/,2021-01-01,"School of Medical Informatics, Daqing Campus, Harbin Medical University. Daqing 163319, China.","Pan Q, Liu YJ, Bai XF, Han XL, Jiang Y, Ai B, Shi SS, Wang F, Xu MC, Wang YZ, Zhao J, Chen JX, Zhang J, Li XC, Zhu J, Zhang GR, Wang QY, Li CQ",,"Fundamental Research Funds, Natural Science Fundation for Distinguished Young Scholars of Heilongjiang Province of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Harbin Medical University",6.0,China +33174598,UniLectin,0.981569886,UniLectin,0.981569886,,0,1,http://www.unilectin.eu,302,France,"(50.9871,2.12554)",http://web.archive.org/web/20221108112721/https://www.unilectin.eu/,2021-01-01,"Univ. Grenoble Alpes, CNRS, CERMAV, 38000 Grenoble, France.","Bonnardel F, Mariethoz J, Pérez S, Imberty A, Lisacek F",,"Alliance Campus Rhodanien, Labex ARCANE, ANR",5.0,France +"33175170, 32102777, 33704069",2019nCoVR,0.997189482,2019nCoVR,0.997189482,2019 Novel Coronavirus Resource,0.953625798,3,http://bigd.big.ac.cn,301,,,no_wayback,2021-01-01,"None, China National Center for Bioinformation & National Genomics Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China; CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China; University of Chinese Academy of Sciences, Beijing 100049, China., China National Center for Bioinformation, Beijing 100101, China; National Genomics Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China; CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China; University of Chinese Academy of Sciences, Beijing 100049, China.",", Zhao WM, Song SH, Chen ML, Zou D, Ma LN, Ma YK, Li RJ, Hao LL, Li CP, Tian DM, Tang BX, Wang YQ, Zhu JW, Chen HX, Zhang Z, Xue YB, Bao YM, Song S, Ma L, Zou D, Tian D, Li C, Zhu J, Chen M, Wang A, Ma Y, Li M, Teng X, Cui Y, Duan G, Zhang M, Jin T, Shi C, Du Z, Zhang Y, Liu C, Li R, Zeng J, Hao L, Jiang S, Chen H, Han D, Xiao J, Zhang Z, Zhao W, Xue Y, Bao Y",", , ","Chinese Academy of Sciences, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Key Research Program of the Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, China Postdoctoral Science Foundation, UK Royal Society-Newton Advanced Fellowship, Chinese Academy of Sciences, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, China Postdoctoral Science Foundation, National Natural Science Foundation of China, National Natural Science Foundation of China, Key Research Program of Frontier Sciences of the Chinese Academy of Sciences, National Key Research and Development Program, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, Chinese Academy of Sciences, China Postdoctoral Science Foundation, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Chinese Academy of Sciences, Chinese Academy of Sciences, Genomics Data Center Construction of Chinese Academy of Sciences, Key Technology Talent Program of the Chinese Academy of Sciences, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Chinese Academy of Sciences, National Natural Science Foundation of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, Chinese Academy of Sciences, National Science and Technology Basic Resources Investigation, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Zhangjiang special project of national innovation demonstration zone, National Key Research and Development Program of China, National Key Research and Development Program of China, Ministry of Science and Technology, National Key Research and Development Program of China, National Key Research and Development Program of China, Fundamental Research Funds for the Central Universities, National Natural Science Foundation of China, National Natural Science Foundation of China, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, National Key Research and Development Program of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Chinese Academy of Sciences, Chinese Academy of Sciences, K. C. Wong Education Foundation, , Chinese Academy of Sciences, Chinese Academy of Sciences, Alliance of International Science Organizations, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, National Key R&D Program of China, National Key R&D Program of China, National Key R&D Program of China, National Key R&D Program of China, KC Wong Education Foundation, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences",189.0,"China, China, China, China, China, China, China, China, China" +33181826,WGVD,0.993578374,WGVD,0.993578374,Wheat Genome Variation Database,0.973856658,1,http://animal.nwsuaf.edu.cn/code/index.php/Wheat,301,,,no_wayback,2020-01-01,"State Key Laboratory of Crop Stress Biology for Arid Areas and College of Plant Protection, Northwest A&F University, 3 Taicheng Rd, Yangling 712100, Shaanxi, China.","Wang J, Fu W, Wang R, Hu D, Cheng H, Zhao J, Jiang Y, Kang Z",,"China Agriculture Research System, National Key R&D Program of China, Natural Science Basic Research Plan in Shaanxi Province of China",0.0,China +33211851,WikiPathways,0.994370461,WikiPathways,0.994370461,,0,1,http://www.wikipathways.org,301,,,http://web.archive.org/web/20090107012003/http://wikipathways.org/,2021-01-01,"Department of Bioinformatics - BiGCaT, NUTRIM, Maastricht University, 6229 ER Maastricht, the Netherlands.","Martens M, Ammar A, Riutta A, Waagmeester A, Slenter DN, Hanspers K, A Miller R, Digles D, Lopes EN, Ehrhart F, Dupuis LJ, Winckers LA, Coort SL, Willighagen EL, Evelo CT, Pico AR, Kutmon M",,"Horizon 2020, Horizon 2020, Horizon 2020, National Institute of General Medical Sciences, National Institute of General Medical Sciences, NIGMS NIH HHS, Horizon 2020, Horizon 2020, ZonMw, NIGMS NIH HHS",136.0,Netherlands +33216899,WCSdb,0.997819245,WCSdb,0.997819245,Wild Coffee Species database,0.988351196,1,http://publish.plantnet-project.org/project/wildcofdb_en,200,,,http://web.archive.org/web/20201125062310/http://publish.plantnet-project.org/project/wildcofdb_en,2020-11-01,"Institut de Recherche pour le Développement, UMR DIADE, Université de Montpellier, 911 Avenue Agropolis, 34394 Montpellier, France.","Guyot R, Hamon P, Couturon E, Raharimalala N, Rakotomalala JJ, Lakkanna S, Sabatier S, Affouard A, Bonnet P",,Agropolis Fondation,1.0,France +"33245777, 29106613",3DIV,0.995887399,3DIV,0.995887399,3D-genome Interaction Viewer and database,0.817636555,2,http://3div.kr,"HTTPConnectionPool(host='3div.kr', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to 3div.kr timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221103105901/http://3div.kr/,2021-01-01,"Department of Biological Sciences, Korea Advanced Institute of Science and Technology (KAIST), Daejeon 34141, Korea., nan","Kim K, Jang I, Kim M, Choi J, Kim MS, Lee B, Jung I, nan",", nan","National Research Foundation in the Republic of Korea, National Research Foundation in the Republic of Korea, National Research Foundation in the Republic of Korea, nan",5.0, +33367605,Virxicon,0.996595085,Virxicon,0.996595085,,0,1,http://virxicon.cs.put.poznan.pl,301,,,http://web.archive.org/web/20221017132351/https://virxicon.cs.put.poznan.pl/,2020-12-26,"Institute of Computing Science and European Centre for Bioinformatics and Genomics, Poznan University of Technology, Poznan, 60-965, Poland.","Kudla M, Gutowska K, Synak J, Weber M, Bohnsack KS, Lukasiak P, Villmann T, Blazewicz J, Szachniuk M",,"Institute of Bioorganic Chemistry, European Social Fund, National Science Centre, Poland, Polish Academy of Sciences, National Science Centre, Poland",1.0,Poland +33594411,Viral Host Range database,0.955648327,ViralHostRangeDB,0.884413302,Viral Host Range database,0.955648327,1,"http://viralhostrangedb.pasteur.cloud, http://gitlab.pasteur.fr/hub/viralhostrangedb","302, 302","France, France","(48.8323,2.4075), (48.8323,2.4075)","no_wayback, http://web.archive.org/web/20220522194036/https://gitlab.pasteur.fr/hub/viralhostrangedb",2021-02-17,"Bacteriophage, Bacterium, Host Laboratory, Department of Microbiology, Institut Pasteur, Paris, F-75015, France.","Lamy-Besnier Q, Brancotte B, Ménager H, Debarbieux L",,,3.0,France +33756618,WCO-Lite,0.967464912,WCO-Lite,0.967464912,orld Catalogue of Opiliones,0.777871438,1,http://wcolite.com,200,,,http://web.archive.org/web/20220920172617/https://wcolite.com/,2021-01-15,"Departamento de Invertebrados, Museu Nacional/UFRJ, Quinta da Boa Vista, São Cristóvão, 20.940-040, Rio de Janeiro-RJ, Brazil.. adrianok@gmail.com.","Kury AB, Mendes AC, Cardoso L, Kury MS, Granado AA, Yoder MJ, Kury IS",,,1.0,Brazil +33811468,virusMS,0.953528702,virusMS,0.953528702,,0,1,http://virusms.erc.monash.edu,301,,,no_wayback,2021-04-14,"Department of Biochemistry and Molecular Biology and Infection and Immunity Program, Monash Biomedicine Discovery Institute, Monash University, Clayton, Victoria, Australia.","Li C, Revote J, Ramarathinam SH, Chung SZ, Croft NP, Scull KE, Huang Z, Ayala R, Braun A, Mifsud NA, Illing PT, Faridi P, Purcell AW",,,1.0,Australia +33993461,TUPDB,0.994650185,TUPDB,0.994650185,Target-Unrelated Peptide Data Bank,0.885041343,1,http://i.uestc.edu.cn/tupdb,"HTTPConnectionPool(host='i.uestc.edu.cn', port=80): Max retries exceeded with url: /tupdb (Caused by ConnectTimeoutError(, 'Connection to i.uestc.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2021-05-16,"School of Medicine, Guizhou University, Guiyang, 550025, China. bfhe@gzu.edu.cn.","He B, Yang S, Long J, Chen X, Zhang Q, Gao H, Chen H, Huang J",,"Guizhou Science and Technology Department, National Natural Science Foundation of China, Guizhou University, Guizhou Science and Technology Department, National Natural Science Foundation of China, Guizhou Science and Technology Department, Guizhou University, China Postdoctoral Science Foundation, China Postdoctoral Science Foundation, National Natural Science Foundation of China",1.0,China +34285772,Yeast Phosphoinositide-Binding Proteins,0.98663541,YPIBP,0.980853081,Yeast Phosphoinositide-Binding Proteins,0.98663541,1,http://cosbi7.ee.ncku.edu.tw/YPIBP,301,,,no_wayback,2021-06-24,"Department of Earth Sciences, College of Sciences, National Cheng Kung University, Tainan 701, Taiwan.","Rathod J, Yen HC, Liang B, Tseng YY, Chen CS, Wu WS",,Taiwan Ministry of Science and Technology,0.0, +34601118,Viral Putative G-quadruplex,0.673970756,,0,Viral Putative G-quadruplex,0.673970756,1,http://jsjds.hzau.edu.cn/MBPC/ViPGD/index.php/home/index,"HTTPConnectionPool(host='jsjds.hzau.edu.cn', port=80): Max retries exceeded with url: /MBPC/ViPGD/index.php/home/index (Caused by ConnectTimeoutError(, 'Connection to jsjds.hzau.edu.cn timed out. (connect timeout=5)'))",,,no_wayback,2021-09-30,"State Key Laboratory of Agricultural Microbiology, College of Veterinary Medicine, Huazhong Agricultural University, Wuhan, Hubei 430070, China; Hubei Hongshan Laboratory, Huazhong Agricultural University, Wuhan 430070, China; Interdisciplinary Sciences Institute, Huazhong Agricultural University, Wuhan 430070, China; College of Science, Huazhong Agricultural University, Wuhan 430070, China.","Li Z, Qian SH, Wang F, Mohamed HI, Yang G, Chen ZX, Wei D",,National Natural Science Foundation of China,1.0,"China, China, China, China" +34848704,Water dropwortDB,0.96038208,Water dropwortDB,0.96038208,,0,1,http://apiaceae.njau.edu.cn/waterdropwortdb,"HTTPConnectionPool(host='apiaceae.njau.edu.cn', port=80): Max retries exceeded with url: /waterdropwortdb (Caused by ReadTimeoutError(""HTTPConnectionPool(host='apiaceae.njau.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20210418203235/http://apiaceae.njau.edu.cn/waterdropwortdb,2021-12-01,"State Key Laboratory of Crop Genetics and Germplasm Enhancement, Ministry of Agriculture and Rural Affairs Key Laboratory of Biology and Germplasm Enhancement of Horticultural Crops in East China, College of Horticulture, Nanjing Agricultural University, 1 Weigang, 210095, Nanjing, China.","Liu JX, Jiang Q, Tao JP, Feng K, Li T, Duan AQ, Wang H, Xu ZS, Liu H, Xiong AS",,,4.0,"China, China" +"20931385, 24217918, 28713666, 29165610, 33170273, 22374386",Gramene,0.975419879,Gramene,0.975419879,Gramene Database,0.524727866,6,http://www.gramene.org,301,,,no_wayback,2021-01-01,"Department of Botany and Plant Pathology, Oregon State University, Corvallis, OR, USA., nan, nan, nan, Cold Spring Harbor Laboratory, Cold Spring Harbor, NY 11724, USA., nan","Jaiswal P, nan, nan, nan, Tello-Ruiz MK, Naithani S, Gupta P, Olson A, Wei S, Preece J, Jiao Y, Wang B, Chougule K, Garg P, Elser J, Kumari S, Kumar V, Contreras-Moreira B, Naamati G, George N, Cook J, Bolser D, D'Eustachio P, Stein LD, Gupta A, Xu W, Regala J, Papatheodorou I, Kersey PJ, Flicek P, Taylor C, Jaiswal P, Ware D, nan",", nan, nan, nan, , nan",", nan, nan, nan, United States Department of Agriculture, United States Department of Agriculture, National Science Foundation, European Molecular Biology Laboratory, Open Targets, United States Department of Agriculture, Biotechnology and Biological Sciences Research Council, Wellcome Trust, National Science Foundation, NHGRI NIH HHS, National Institutes of Health, Wellcome Trust, NHGRI NIH HHS, Ontario Research Fund, United Kingdom Biotechnology and Biosciences Research Council, NSF, EU, EMBL, nan",53.0,"United States, United States" +"21063943, 23203882, 30395289, 26527722",PRIDE,0.993364791,PRIDE,0.993364791,Proteomics Identifications Database,0.96215572,4,http://www.ebi.ac.uk/pride,301,,,no_wayback,2019-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute, Cambridge, UK., nan, nan, nan","Vizcaíno JA, Reisinger F, Côté R, Martens L, nan, nan, nan",", nan, nan, nan",", nan, nan, nan",9.0, +"21296746, 21321022, 21478484, 23255149, 23603846, 23794737, 23881287, 25355511, 27602200, 27736745, 29761460, 31713623, 34741192, 27009807",RGD,0.984376073,RGD,0.984376073,Rat Genome Database,0.897326604,14,http://rgd.mcw.edu,302,,,http://web.archive.org/web/20221101143649/https://rgd.mcw.edu/,2021-11-05,"Rat Genome Database, Human and Molecular Genetics Center, Medical College of Wisconsin, 8701 Watertown Plank Road, Milwaukee, WI 53226, USA. shimoyama@mcw.edu, Human and Molecular Genetics Center, Medical College of Wisconsin, 8701 Watertown Plank Rd, Milwaukee, WI 53226-3548, USA. slaulederkind@mcw.edu, Human and Molecular Genetics Center, Medical College of Wisconsin, WI, 53226-3548, USA. vpetri@mcw.edu, Human and Molecular Genetics Center, Medical College of Wisconsin, Milwaukee, Wisconsin, USA., Human and Molecular Genetics Center, Medical College of Wisconsin, Human and Molecular Genetics Center, 8701 Watertown Plank Rd, Milwaukee, WI 53226-3548, USA. slaulederkind@mcw.edu, Rat Genome Database, Human and Molecular Genetics Center, Medical College of Wisconsin, 8701 Watertown Plank Road, Milwaukee, WI 53226, USA. sjwang@mcw.edu., Human and Molecular Genetics Center, Medical College of Wisconsin, Milwaukee, Wisconsin;, Human and Molecular Genetics Center, Medical College of Wisconsin, Milwaukee, WI 53226, USA Department of Surgery, Medical College of Wisconsin, Milwaukee, WI 53226, USA shimoyama@mcw.edu., nan, Medical College of Wisconsin, Department of Surgery, Milwaukee, WI 53226, USA shimoyama@mcw.edu., Department of Biomedical Engineering, Medical College of Wisconsin and Marquette University, Milwaukee, WI, USA. slaulede@mcw.edu., Rat Genome Database, Department of Biomedical Engineering, Medical College of Wisconsin, Milwaukee, WI 53226, USA., Department of Biomedical Engineering, The Rat Genome Database, Medical College of Wisconsin, Milwaukee, WI, USA., Medical College of Wisconsin, Human and Molecular Genetics Center Department of Surgery, Medical College of Wisconsin, Milwaukee, WI, USA gthayman@mcw.edu.","Shimoyama M, Smith JR, Hayman T, Laulederkind S, Lowry T, Nigam R, Petri V, Wang SJ, Dwinell M, Jacob H, , Laulederkind SJ, Shimoyama M, Hayman GT, Lowry TF, Nigam R, Petri V, Smith JR, Wang SJ, de Pons J, Kowalski G, Liu W, Rood W, Munzenmaier DH, Dwinell MR, Twigger SN, Jacob HJ, , Petri V, Shimoyama M, Hayman GT, Smith JR, Tutaj M, de Pons J, Dwinell MR, Munzenmaier DH, Twigger SN, Jacob HJ, , Laulederkind SJ, Hayman GT, Wang SJ, Lowry TF, Nigam R, Petri V, Smith JR, Dwinell MR, Jacob HJ, Shimoyama M, Laulederkind SJ, Liu W, Smith JR, Hayman GT, Wang SJ, Nigam R, Petri V, Lowry TF, de Pons J, Dwinell MR, Shimoyama M, Wang SJ, Laulederkind SJ, Hayman GT, Smith JR, Petri V, Lowry TF, Nigam R, Dwinell MR, Worthey EA, Munzenmaier DH, Shimoyama M, Jacob HJ, Nigam R, Laulederkind SJ, Hayman GT, Smith JR, Wang SJ, Lowry TF, Petri V, De Pons J, Tutaj M, Liu W, Jayaraman P, Munzenmaier DH, Worthey EA, Dwinell MR, Shimoyama M, Jacob HJ, Shimoyama M, De Pons J, Hayman GT, Laulederkind SJ, Liu W, Nigam R, Petri V, Smith JR, Tutaj M, Wang SJ, Worthey E, Dwinell M, Jacob H, nan, Shimoyama M, Laulederkind SJ, De Pons J, Nigam R, Smith JR, Tutaj M, Petri V, Hayman GT, Wang SJ, Ghiasvand O, Thota J, Dwinell MR, Laulederkind SJF, Hayman GT, Wang SJ, Smith JR, Petri V, Hoffman MJ, De Pons J, Tutaj MA, Ghiasvand O, Tutaj M, Thota J, Dwinell MR, Shimoyama M, Smith JR, Hayman GT, Wang SJ, Laulederkind SJF, Hoffman MJ, Kaldunski ML, Tutaj M, Thota J, Nalabolu HS, Ellanki SLR, Tutaj MA, De Pons JL, Kwitek AE, Dwinell MR, Shimoyama ME, Kaldunski ML, Smith JR, Hayman GT, Brodie K, De Pons JL, Demos WM, Gibson AC, Hill ML, Hoffman MJ, Lamers L, Laulederkind SJF, Nalabolu HS, Thorat K, Thota J, Tutaj M, Tutaj MA, Vedi M, Wang SJ, Zacher S, Dwinell MR, Kwitek AE, Hayman GT, Laulederkind SJ, Smith JR, Wang SJ, Petri V, Nigam R, Tutaj M, De Pons J, Dwinell MR, Shimoyama M",", , , , , , , , nan, , , , , ","NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, , NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, nan, NHLBI NIH HHS, NHGRI NIH HHS, NHLBI NIH HHS, NHLBI NIH HHS, National Institutes of Health, National Heart, Lung, and Blood Institute, NHLBI NIH HHS, NHGRI NIH HHS, National Heart, Lung, and Blood Institute, National Human Genome Research Institute, ",274.0,"United States, United States, United States, United States, United States, United States, United States, United States, United States, United States, United States, United States, United States" +"21447597, 33237286, 22102590, 23161681, 24253303, 25348405, 26519399, 30395287",UniProtKB,0.997420847,UniProtKB,0.997420847,Universal Protein Resource,0.747594476,8,http://www.uniprot.org,301,,,http://web.archive.org/web/20221104172213/https://www.uniprot.org/,2021-01-01,"nan, None, The EMBL Outstation, The European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK., The EMBL Outstation, The European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK., None, None, Swiss Institute of Bioinformatics, Centre Medical Universitaire, rue Michel Servet 1, CH-1211, Geneva 4, Switzerland. emmanuel.boutet@isb-sib.ch., None","nan, , , , , , Boutet E, Lieberherr D, Tognolli M, Schneider M, Bansal P, Bridge AJ, Poux S, Bougueleret L, Xenarios I, ","nan, , , , , , , ","nan, National Institute of General Medical Sciences, Open Targets, National Cancer Institute, NHGRI NIH HHS, National Institute of Allergy and Infectious Diseases, NIGMS NIH HHS, European Molecular Biology Laboratory, National Eye Institute, National Heart, Lung, and Blood Institute, Biotechnology and Biological Sciences Research Council, Swiss Federal Government, Biotechnology and Biological Sciences Research Council, National Institute of General Medical Sciences, National Human Genome Research Institute, National Institute of Diabetes and Digestive and Kidney Diseases, National Institutes of Health, Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, British Heart Foundation, NHGRI NIH HHS, NHGRI NIH HHS, NCRR NIH HHS, NIGMS NIH HHS, Medical Research Council, NIGMS NIH HHS, NHGRI NIH HHS, NLM NIH HHS, European Commission FP7, European Commission FP7, European Commission FP7, British Heart Foundation, NIGMS NIH HHS, NLM NIH HHS, NLM NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, British Heart Foundation, British Heart Foundation, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, British Heart Foundation, NLM NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NLM NIH HHS, Parkinson's UK, NHGRI NIH HHS, Wellcome Trust, British Heart Foundation, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NLM NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, National Institute of General Medical Sciences, National Institute of General Medical Sciences, NIGMS NIH HHS, National Institute of General Medical Sciences, National Institutes of Health, National Human Genome Research Institute, National Human Genome Research Institute, NIGMS NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NCATS NIH HHS, NIGMS NIH HHS, British Heart Foundation",8087.0,Switzerland +21520333,LOVD,0.994573355,LOVD,0.994573355,Leiden Open-source Variation Database,0.861171469,1,http://www.LOVD.nl,301,,,http://web.archive.org/web/20221108132410/https://www.lovd.nl/,2011-02-22,"Center of Human and Clinical Genetics, Department of Human Genetics, Leiden University Medical Center, Leiden, Nederland.","Fokkema IF, Taschner PE, Schaafsma GC, Celli J, Laros JF, den Dunnen JT",,European Commission FP7,498.0, +21904429,MGDB,0.991700888,MGDB,0.991700888,Unlabelled Mycobacteriophage genome database,0.943340257,1,http://mpgdb.ibioinformatics.org/mpgdb.php,"HTTPConnectionPool(host='mpgdb.ibioinformatics.org', port=80): Max retries exceeded with url: /mpgdb.php (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140722091559/http://mpgdb.ibioinformatics.org/mpgdb.php,2011-08-02,"Department of Bacteriology, Tuberculosis Research Centre (Indian Council of Medical Research), Mayor VR Ramanathan road, Chetput, Chennai - 600 031, India.","Joseph J, Rajendran V, Hassan S, Kumar V",,,3.0,India +"22064862, 24185697, 27738138",CGD,0.991008719,CGD,0.991008719,Candida Genome Database,0.961281765,3,http://www.candidagenome.org,200,United States,"(45.5235,-122.676)",http://web.archive.org/web/20221108212053/http://www.candidagenome.org/,2016-10-13,"Department of Genetics, Stanford University Medical School, Stanford, CA 94305-5120, USA., Department of Genetics, Stanford University Medical School, Stanford, CA 94305-5120, USA., Department of Genetics, Stanford University Medical School, Stanford, CA 94305-5120, USA.","Inglis DO, Arnaud MB, Binkley J, Shah P, Skrzypek MS, Wymore F, Binkley G, Miyasato SR, Simison M, Sherlock G, Binkley J, Arnaud MB, Inglis DO, Skrzypek MS, Shah P, Wymore F, Binkley G, Miyasato SR, Simison M, Sherlock G, Skrzypek MS, Binkley J, Binkley G, Miyasato SR, Simison M, Sherlock G",", , ","NIDCR NIH HHS, NIDCR NIH HHS, NIDCR NIH HHS",312.0,"United States, United States, United States" +"22067447, 24163254",Ensembl Genomes,0.818894243,Ensembl Genomes,0.818894243,,0,2,http://www.ensemblgenomes.org,301,United Kingdom,"(52.1929,0.1256)",http://web.archive.org/web/20221106052852/https://ensemblgenomes.org/,2013-10-25,"Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. pkersey@ebi.ac.uk, The European Molecular Biology Laboratory, The European Bioinformatics Institute, The Wellcome Trust Genome Campus, Hinxton, Cambridgeshire, CB10 1SD, UK, Wellcome Trust Sanger Centre, The Wellcome Trust Genome Campus, Hinxton, Cambridgeshire, CB10 1SA, UK, Cold Spring Harbor Laboratory, 1 Bungtown Rd, Cold Spring Harbor, NY 11724, USA and USDA-ARS, Cornell University, Ithaca, NY, 14853, USA.","Kersey PJ, Staines DM, Lawson D, Kulesha E, Derwent P, Humphrey JC, Hughes DS, Keenan S, Kerhornou A, Koscielny G, Langridge N, McDowall MD, Megy K, Maheswari U, Nuhn M, Paulini M, Pedro H, Toneva I, Wilson D, Yates A, Birney E, Kersey PJ, Allen JE, Christensen M, Davis P, Falin LJ, Grabmueller C, Hughes DS, Humphrey J, Kerhornou A, Khobova J, Langridge N, McDowall MD, Maheswari U, Maslen G, Nuhn M, Ong CK, Paulini M, Paulini M, Pedro H, Toneva I, Tuli MA, Walts B, Williams G, Wilson D, Youens-Clark K, Monaco MK, Stein J, Wei X, Ware D, Bolser DM, Howe KL, Kulesha E, Lawson D, Staines DM",", ","Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust",234.0,"United States, United States" +"22102583, 29136208, 31696236",MPD,0.996881326,MPD,0.996881326,Mouse Phenome Database,0.988143757,3,http://phenome.jax.org,301,,,http://web.archive.org/web/20221107120839/https://phenome.jax.org/,2020-01-01,"The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA., The Jackson Laboratory, Bar Harbor, Maine 04609, USA., The Jackson Laboratory, Bar Harbor, Maine, ME 04609, USA.","Maddatu TP, Grubb SC, Bult CJ, Bogue MA, Bogue MA, Grubb SC, Walton DO, Philip VM, Kolishovski G, Stearns T, Dunn MH, Skelly DA, Kadakkuzha B, TeHennepe G, Kunde-Ramamoorthy G, Chesler EJ, Bogue MA, Philip VM, Walton DO, Grubb SC, Dunn MH, Kolishovski G, Emerson J, Mukherjee G, Stearns T, He H, Sinha V, Kadakkuzha B, Kunde-Ramamoorthy G, Chesler EJ",", , ","NHLBI NIH HHS, NIA NIH HHS, NIA NIH HHS, NIMH NIH HHS, NIA NIH HHS, NIA NIH HHS, NIDA NIH HHS, NIDA NIH HHS, NHGRI NIH HHS, NIDA NIH HHS, NIDA NIH HHS, NCI NIH HHS, NIDA NIH HHS, National Institutes of Health, National Institutes of Health, NIDA NIH HHS, National Institute on Drug Abuse, NIA NIH HHS, National Institute on Aging, National Institutes of Health, NIDA NIH HHS, National Institutes of Health",88.0,"United States, United States, United States" +"22110037, 23487186, 24265222, 27252399, 29140510, 32128557, 26631132",SGD,0.98170195,SGD,0.98170195,Saccharomyces Genome Database,0.977382439,7,http://www.yeastgenome.org,301,,,http://web.archive.org/web/20221101065827/https://www.yeastgenome.org/,2020-01-01,"Department of Genetics, Stanford University, Stanford, CA 94305-5120, USA. cherry@stanford.edu, Department of Genetics, Stanford University, Stanford, CA, USA., Department of Genetics, Stanford University School of Medicine, Stanford, CA 94305, USA., Department of Genetics, Stanford University, Stanford, CA, USA., Department of Genetics, Stanford University, Stanford, CA, 94305-5120 USA., Department of Genetics, Stanford University, 3165 Porter Drive, Palo Alto, CA 94304, USA., Department of Genetics, Stanford University School of Medicine, Stanford, California 94305-5120.","Cherry JM, Hong EL, Amundsen C, Balakrishnan R, Binkley G, Chan ET, Christie KR, Costanzo MC, Dwight SS, Engel SR, Fisk DG, Hirschman JE, Hitz BC, Karra K, Krieger CJ, Miyasato SR, Nash RS, Park J, Skrzypek MS, Simison M, Weng S, Wong ED, Engel SR, Cherry JM, Costanzo MC, Engel SR, Wong ED, Lloyd P, Karra K, Chan ET, Weng S, Paskov KM, Roe GR, Binkley G, Hitz BC, Cherry JM, Song G, Balakrishnan R, Binkley G, Costanzo MC, Dalusag K, Demeter J, Engel S, Hellerstedt ST, Karra K, Hitz BC, Nash RS, Paskov K, Sheppard T, Skrzypek M, Weng S, Wong E, Michael Cherry J, Skrzypek MS, Nash RS, Wong ED, MacPherson KA, Hellerstedt ST, Engel SR, Karra K, Weng S, Sheppard TK, Binkley G, Simison M, Miyasato SR, Cherry JM, Nash RS, Weng S, Karra K, Wong ED, Engel SR, Cherry JM, , Cherry JM",", , , , , , ","NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, , NHGRI NIH HHS, National Institutes of Health, NHGRI, NHGRI, NHGRI NIH HHS",957.0,"United States, United States, United States, United States, United States, United States" +"22135294, 33196801, 24285305, 26586799, 29140524",NONCODE,0.997087181,NONCODE,0.997087181,,0,5,http://www.noncode.org,200,,,http://web.archive.org/web/20220523201319/http://noncode.org/,2021-01-01,"Bioinformatics Research Group, Key Laboratory of Intelligent Information Processing, Advanced Computer Research Center, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, PR China., Key Laboratory of Intelligent Information Processing, Advanced Computer Research Center, Institute of Computing Technology, Chinese Academy of Sciences, Beijing 100190, China., Bioinformatics Research Group, Advanced Computing Research Laboratory, Institute of Computing Technology, Chinese Academy of Sciences, Beijing 100190, China, University of Chinese Academy of Sciences, Beijing 100049, China, Laboratory of Noncoding RNA, Institute of Biophysics, Chinese Academy of Sciences, Beijing 100101, China and Taicang Institute of Life Sciences Information, Suzhou 215400, China., School of Medicine, MOE Key Laboratory of Bioinformatics and Bioinformatics Division, Center for Synthetic and System Biology, TNLIST/Department of Automation, Tsinghua University, Beijing 100084, China., Key Laboratory of Intelligent Information Processing, Advanced Computer Research Center, Institute of Computing Technology, Chinese Academy of Sciences, Beijing 100190, China.","Bu D, Yu K, Sun S, Xie C, Skogerbø G, Miao R, Xiao H, Liao Q, Luo H, Zhao G, Zhao H, Liu Z, Liu C, Chen R, Zhao Y, Zhao L, Wang J, Li Y, Song T, Wu Y, Fang S, Bu D, Li H, Sun L, Pei D, Zheng Y, Huang J, Xu M, Chen R, Zhao Y, He S, Xie C, Yuan J, Li H, Li M, Zhao G, Bu D, Zhu W, Wu W, Chen R, Zhao Y, Zhao Y, Li H, Fang S, Kang Y, Wu W, Hao Y, Li Z, Bu D, Sun N, Zhang MQ, Chen R, Fang S, Zhang L, Guo J, Niu Y, Wu Y, Li H, Zhao L, Li X, Teng X, Sun X, Sun L, Zhang MQ, Chen R, Zhao Y",", , , , ",", National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program, National Natural Science Foundation of China, China Postdoctoral Innovative Talent Foundation, China Postdoctoral Science Foundation, National Natural Science Foundation of China, Chinese Academy of Sciences, National Key Research and Development Program, Institute of Computing Technology, National Key Research and Development Program, National Key Research and Development Program, Chinese Academy of Sciences, Natural Science Foundation for Young Scholars of China, Chinese Academy of Sciences, National Key Research and Development Program, Chinese Academy of Sciences, , , ",977.0,"China, China, China, China, China, China, China, China" +22139919,Nematode.net,0.99133563,Nematode.net,0.99133563,,0,1,http://nematode.net,"HTTPConnectionPool(host='nematode.net', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to nematode.net timed out. (connect timeout=5)'))",,,no_wayback,2011-12-01,"The Genome Institute, Washington University School of Medicine, St Louis, MO 63108, USA.","Martin J, Abubucker S, Heizer E, Taylor CM, Mitreva M",,NIAID NIH HHS,33.0,United States +"22140110, 24198245, 26400175, 27899599, 30371825, 33151290, 28346087",PubChem,0.990066409,PubChem,0.990066409,,0,7,http://pubchem.ncbi.nlm.nih.gov,301,,,http://web.archive.org/web/20221110171543/https://pubchem.ncbi.nlm.nih.gov/,2021-01-01,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA. ywang@ncbi.nlm.nih.gov, National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Department of Health and Human Services, Bethesda, MD 20894, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA ywang@ncbi.nlm.nih.gov., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Department of Health and Human Services, Bethesda, MD 20894, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Department of Health and Human Services, Bethesda, MD, 20894, USA., 1 National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, USA.","Wang Y, Xiao J, Suzek TO, Zhang J, Wang J, Zhou Z, Han L, Karapetyan K, Dracheva S, Shoemaker BA, Bolton E, Gindulyte A, Bryant SH, Wang Y, Suzek T, Zhang J, Wang J, He S, Cheng T, Shoemaker BA, Gindulyte A, Bryant SH, Kim S, Thiessen PA, Bolton EE, Chen J, Fu G, Gindulyte A, Han L, He J, He S, Shoemaker BA, Wang J, Yu B, Zhang J, Bryant SH, Wang Y, Bryant SH, Cheng T, Wang J, Gindulyte A, Shoemaker BA, Thiessen PA, He S, Zhang J, Kim S, Chen J, Cheng T, Gindulyte A, He J, He S, Li Q, Shoemaker BA, Thiessen PA, Yu B, Zaslavsky L, Zhang J, Bolton EE, Kim S, Chen J, Cheng T, Gindulyte A, He J, He S, Li Q, Shoemaker BA, Thiessen PA, Yu B, Zaslavsky L, Zhang J, Bolton EE, Wang Y, Cheng T, Bryant SH",", , , , , , ","Intramural NIH HHS, Intramural NIH HHS, Intramural NIH HHS, , National Institutes of Health, National Institutes of Health, NLM NIH HHS, National Library of Medicine, National Institutes of Health",2740.0,"United States, United States, United States, United States, United States, United States, United States" +22140215,RAC,0.979894936,RAC,0.979894936,Repository of Antibiotic resistance Cassettes,0.828436719,1,http://www2.chi.unsw.edu.au/rac,"HTTPConnectionPool(host='www2.chi.unsw.edu.au', port=80): Max retries exceeded with url: /rac (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20140722104155/http://www2.chi.unsw.edu.au/rac/,2011-12-02,"Centre for Health Informatics, Australian Institute of Health Innovation, University of New South Wales, Australia. guyt@unsw.edu.au","Tsafnat G, Copty J, Partridge SR",,,24.0,Australia +22238270,BESC,0.902424991,BESC,0.902424991,,0,1,http://besckb.ornl.gov,"HTTPConnectionPool(host='besckb.ornl.gov', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20161117233959/http://besckb.ornl.gov/,2012-01-11,"BioEnergy Science Center, Oak Ridge National Laboratory, Oak Ridge, TN, USA. syedmh@ornl.gov","Syed MH, Karpinets TV, Parang M, Leuze MR, Park BH, Hyatt D, Brown SD, Moulton S, Galloway MD, Uberbacher EC",,,4.0,United States +22434840,Aptamer Base,0.971290752,Aptamer Base,0.971290752,,0,1,http://aptamer.freebase.com,404,,,http://web.archive.org/web/20120717200348/http://aptamer.freebase.com:80/,2012-03-20,"Department of Biology, Carleton University, Ottawa, ON, Canada. jctoledo@connect.carleton.ca","Cruz-Toledo J, McKeague M, Zhang X, Giamberardino A, McConnell E, Francis T, DeRosa MC, Dumontier M",,,24.0,Canada +22564364,PharmGKB,0.992674232,PharmGKB,0.992674232,,0,1,http://bioai4core.fulton.asu.edu/snpshot,"HTTPConnectionPool(host='bioai4core.fulton.asu.edu', port=80): Max retries exceeded with url: /snpshot (Caused by ConnectTimeoutError(, 'Connection to bioai4core.fulton.asu.edu timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20100707040528/http://bioai4core.fulton.asu.edu/snpshot/,2012-04-30,"Computer Science Department, Arizona State University, 699 S Mill Ave., Tempe, AZ 85281, USA. jorg.hakenberg@roche.com","Hakenberg J, Voronov D, Nguyên VH, Liang S, Anwar S, Lumpkin B, Leaman R, Tari L, Baral C",,"National Science Foundation, Science Foundation Arizona, Arizona State University, Fulbright International Student Program Russia",27.0,United States +"22753137, 21702733",ALSoD,0.907861531,ALSoD,0.907861531,Online Database,0.633407295,2,http://alsod.iop.kcl.ac.uk,301,United Kingdom,"(51.5115,-0.115997)",no_wayback,2012-07-16,"Department of Clinical Neuroscience, King's College London, Institute of Psychiatry, London, UK., Neuropsychiatric Genetics Group, Department of Vertebrate Genomics, Max Planck Institute for Molecular Genetics, Berlin, Germany.","Abel O, Powell JF, Andersen PM, Al-Chalabi A, Lill CM, Abel O, Bertram L, Al-Chalabi A",", ","European Commission FP7, European Commission FP7",183.0,Germany +"23074187, 27899582, 28838067, 33170210, 30407545, 26097180",ZFIN,0.994089166,ZFIN,0.994089166,Zebrafish Model Organism Database,0.985861821,6,http://zfin.org,405,,,no_wayback,2021-01-01,"ZFIN, 5291 University of Oregon, Eugene, OR 97403-5291, USA. dhowe@zfin.org, The Institute of Neuroscience, University of Oregon, Eugene, OR 97403-1254, USA dhowe@zfin.org., Zebrafish Model Organism Database, University of Oregon, Eugene, Oregon., The Institute of Neuroscience, University of Oregon, Eugene, OR 97403-1254, USA., The Institute of Neuroscience, University of Oregon, Eugene, OR 97403-1254, USA., ZFIN, 5291 University of Oregon, Eugene, Oregon.","Howe DG, Bradford YM, Conlin T, Eagle AE, Fashena D, Frazer K, Knight J, Mani P, Martin R, Moxon SA, Paddock H, Pich C, Ramachandran S, Ruef BJ, Ruzicka L, Schaper K, Shao X, Singer A, Sprunger B, Van Slyke CE, Westerfield M, Howe DG, Bradford YM, Eagle A, Fashena D, Frazer K, Kalita P, Mani P, Martin R, Moxon ST, Paddock H, Pich C, Ramachandran S, Ruzicka L, Schaper K, Shao X, Singer A, Toro S, Van Slyke C, Westerfield M, Bradford YM, Toro S, Ramachandran S, Ruzicka L, Howe DG, Eagle A, Kalita P, Martin R, Taylor Moxon SA, Schaper K, Westerfield M, Howe DG, Ramachandran S, Bradford YM, Fashena D, Toro S, Eagle A, Frazer K, Kalita P, Mani P, Martin R, Moxon ST, Paddock H, Pich C, Ruzicka L, Schaper K, Shao X, Singer A, Van Slyke CE, Westerfield M, Ruzicka L, Howe DG, Ramachandran S, Toro S, Van Slyke CE, Bradford YM, Eagle A, Fashena D, Frazer K, Kalita P, Mani P, Martin R, Moxon ST, Paddock H, Pich C, Schaper K, Shao X, Singer A, Westerfield M, Ruzicka L, Bradford YM, Frazer K, Howe DG, Paddock H, Ramachandran S, Singer A, Toro S, Van Slyke CE, Eagle AE, Fashena D, Kalita P, Knight J, Mani P, Martin R, Moxon SA, Pich C, Schaper K, Shao X, Westerfield M",", , , , , ","NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, National Institutes of Health, National Institutes of Health, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, National Institutes of Health, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, National Human Genome Research Institute of the National Institutes of Health",291.0,"United States, United States, United States, United States" +"23093600, 25326323, 27651457, 29846728, 30247620, 33068428, 29351546",CTD,0.997465014,CTD,0.997465014,Comparative Toxicogenomics Database,0.988830974,7,http://ctdbase.org,200,United States,"(35.779,-78.6778)",no_wayback,2021-01-01,"Department of Biology, North Carolina State University, Raleigh, NC 27695-7617, USA. apdavis3@ncsu.edu, Department of Biological Sciences, North Carolina State University, Raleigh, NC 27695-7617, USA apdavis3@ncsu.edu., Department of Biological Sciences, North Carolina State University, Raleigh, NC 27695, USA apdavis3@ncsu.edu., Department of Biological Sciences., Department of Biological Sciences, North Carolina State University, Raleigh, NC 27695, USA., Department of Biological Sciences, North Carolina State University, Raleigh, NC 27695, USA., Department of Biological Sciences, North Carolina State University, Raleigh, North Carolina, USA.","Davis AP, Murphy CG, Johnson R, Lay JM, Lennon-Hopkins K, Saraceni-Richards C, Sciaky D, King BL, Rosenstein MC, Wiegers TC, Mattingly CJ, Davis AP, Grondin CJ, Lennon-Hopkins K, Saraceni-Richards C, Sciaky D, King BL, Wiegers TC, Mattingly CJ, Davis AP, Grondin CJ, Johnson RJ, Sciaky D, King BL, McMorran R, Wiegers J, Wiegers TC, Mattingly CJ, Davis AP, Wiegers TC, Wiegers J, Johnson RJ, Sciaky D, Grondin CJ, Mattingly CJ, Davis AP, Grondin CJ, Johnson RJ, Sciaky D, McMorran R, Wiegers J, Wiegers TC, Mattingly CJ, Davis AP, Grondin CJ, Johnson RJ, Sciaky D, Wiegers J, Wiegers TC, Mattingly CJ, Grondin CJ, Davis AP, Wiegers TC, Wiegers JA, Mattingly CJ",", , , , , , ","NIEHS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, NIGMS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, NIGMS NIH HHS, NIEHS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, National Institute of Environmental Health Sciences, NIEHS NIH HHS, National Institute of Environmental Health Sciences, National Institutes of Health, National Institute of Environmental Health Sciences, NIEHS NIH HHS, NIEHS NIH HHS, National Institute of Environmental Health Sciences, National Institute of Environmental Health Sciences, National Institute of Environmental Health Sciences, NIEHS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, National Institute of Environmental Health Sciences, National Institute of Environmental Health Sciences, National Institute of Environmental Health Sciences, NIEHS NIH HHS, NIEHS NIH HHS, National Institutes of Health, NIEHS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS, NIEHS NIH HHS",1015.0,"United States, United States, United States, United States, United States, United States" +"23180793, 25414341, 29858800, 31641782, 25048120",IPD,0.978224158,IPD,0.978224158,The Immuno Polymorphism Database,0.944300856,5,http://www.ebi.ac.uk/ipd,301,,,no_wayback,2019-10-22,"Anthony Nolan Research Institute, Royal Free Hospital, Pond Street, Hampstead, London NW3 2QG, UK., Anthony Nolan Research Institute, Hampstead, London, NW3 2QG, UK UCL Cancer Institute, University College London, Hampstead, London, NW3 2QG, UK., Anthony Nolan Research Institute, London, UK., The Pirbright Institute, Pirbright, Woking, Surrey, UK., Anthony Nolan Research Institute, Royal Free Hospital, Pond Street, Hampstead, London, NW3 2QG, UK.","Robinson J, Halliwell JA, McWilliam H, Lopez R, Marsh SG, Robinson J, Halliwell JA, Hayhurst JD, Flicek P, Parham P, Marsh SG, Abraham JP, Barker DJ, Robinson J, Maccari G, Marsh SGE, Maccari G, Robinson J, Hammond JA, Marsh SGE, Robinson J, Halliwell JA, Marsh SG",", , , , ","PHS HHS, PHS HHS, Cancer Research UK, NCI NIH HHS, Cancer Research UK, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, ",1118.0, +23193253,LAMP,0.989832759,LAMP,0.989832759,Library of Apicomplexan Metabolic Pathways,0.983664009,1,http://www.llamp.net,"HTTPConnectionPool(host='www.llamp.net', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to www.llamp.net timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220709150508/https://www.llamp.net/,2012-11-27,"Department of Functional and Comparative Genomics, Institute of Integrative Biology, University of Liverpool, Biosciences Building, Crown Street, Liverpool L69 7ZB, UK. ashanmu@liv.ac.uk","Shanmugasundram A, Gonzalez-Galarza FF, Wastling JM, Vasieva O, Jones AR",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",30.0, +23203886,PlantsDB,0.96487546,PlantsDB,0.96487546,Element Database,0.734058082,1,http://mips.helmholtz-muenchen.de/plant/genomes.jsp,302,,,http://web.archive.org/web/20141210132131/http://mips.helmholtz-muenchen.de:80/plant/genomes.jsp,2012-11-29,"Munich Information Center for Protein Sequences/Institute of Bioinformatics and Systems Biology, Helmholtz Center Munich-German Research Center for Environmental Health, 85764 Neuherberg, Germany.","Nussbaumer T, Martis MM, Roessner SK, Pfeifer M, Bader KC, Sharma S, Gundlach H, Spannagl M",,,89.0,Germany +"23203987, 24316576, 26888907, 26896847, 29155950, 30407521, 31691826, 26578574, 29092050, 31598706, 22086963",Ensembl,0.994191885,Ensembl,0.994191885,,0,11,http://www.ensembl.org,302,United Kingdom,"(52.1929,0.1256)",http://web.archive.org/web/20200611051847/http://ensembl.org/,2020-01-01,"European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton Cambridge CB10 1SD, UK. flicek@ebi.ac.uk, European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SD and Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SA, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK zerbino@ebi.ac.uk., nan, European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., nan, The European Molecular Biology Laboratory, The European Bioinformatics Institute, The Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton Cambridge CB10 1SD, UK. flicek@ebi.ac.uk","Flicek P, Ahmed I, Amode MR, Barrell D, Beal K, Brent S, Carvalho-Silva D, Clapham P, Coates G, Fairley S, Fitzgerald S, Gil L, García-Girón C, Gordon L, Hourlier T, Hunt S, Juettemann T, Kähäri AK, Keenan S, Komorowska M, Kulesha E, Longden I, Maurel T, McLaren WM, Muffato M, Nag R, Overduin B, Pignatelli M, Pritchard B, Pritchard E, Riat HS, Ritchie GR, Ruffier M, Schuster M, Sheppard D, Sobral D, Taylor K, Thormann A, Trevanion S, White S, Wilder SP, Aken BL, Birney E, Cunningham F, Dunham I, Harrow J, Herrero J, Hubbard TJ, Johnson N, Kinsella R, Parker A, Spudich G, Yates A, Yates A, Zadissa A, Searle SM, Flicek P, Amode MR, Barrell D, Beal K, Billis K, Brent S, Carvalho-Silva D, Clapham P, Coates G, Fitzgerald S, Gil L, Girón CG, Gordon L, Hourlier T, Hunt S, Johnson N, Juettemann T, Kähäri AK, Keenan S, Kulesha E, Martin FJ, Maurel T, McLaren WM, Murphy DN, Nag R, Overduin B, Pignatelli M, Pritchard B, Pritchard E, Riat HS, Ruffier M, Sheppard D, Taylor K, Thormann A, Trevanion SJ, Vullo A, Wilder SP, Wilson M, Zadissa A, Aken BL, Birney E, Cunningham F, Harrow J, Herrero J, Hubbard TJ, Kinsella R, Muffato M, Parker A, Spudich G, Yates A, Yates A, Zerbino DR, Searle SM, Zerbino DR, Johnson N, Juetteman T, Sheppard D, Wilder SP, Lavidas I, Nuhn M, Perry E, Raffaillac-Desfosses Q, Sobral D, Keefe D, Gräf S, Ahmed I, Kinsella R, Pritchard B, Brent S, Amode R, Parker A, Trevanion S, Birney E, Dunham I, Flicek P, nan, Zerbino DR, Achuthan P, Akanni W, Amode MR, Barrell D, Bhai J, Billis K, Cummins C, Gall A, Girón CG, Gil L, Gordon L, Haggerty L, Haskell E, Hourlier T, Izuogu OG, Janacek SH, Juettemann T, To JK, Laird MR, Lavidas I, Liu Z, Loveland JE, Maurel T, McLaren W, Moore B, Mudge J, Murphy DN, Newman V, Nuhn M, Ogeh D, Ong CK, Parker A, Patricio M, Riat HS, Schuilenburg H, Sheppard D, Sparrow H, Taylor K, Thormann A, Vullo A, Walts B, Zadissa A, Frankish A, Hunt SE, Kostadima M, Langridge N, Martin FJ, Muffato M, Perry E, Ruffier M, Staines DM, Trevanion SJ, Aken BL, Cunningham F, Yates A, Flicek P, Cunningham F, Achuthan P, Akanni W, Allen J, Amode MR, Armean IM, Bennett R, Bhai J, Billis K, Boddu S, Cummins C, Davidson C, Dodiya KJ, Gall A, Girón CG, Gil L, Grego T, Haggerty L, Haskell E, Hourlier T, Izuogu OG, Janacek SH, Juettemann T, Kay M, Laird MR, Lavidas I, Liu Z, Loveland JE, Marugán JC, Maurel T, McMahon AC, Moore B, Morales J, Mudge JM, Nuhn M, Ogeh D, Parker A, Parton A, Patricio M, Abdul Salam AI, Schmitt BM, Schuilenburg H, Sheppard D, Sparrow H, Stapleton E, Szuba M, Taylor K, Threadgold G, Thormann A, Vullo A, Walts B, Winterbottom A, Zadissa A, Chakiachvili M, Frankish A, Hunt SE, Kostadima M, Langridge N, Martin FJ, Muffato M, Perry E, Ruffier M, Staines DM, Trevanion SJ, Aken BL, Yates AD, Zerbino DR, Flicek P, Yates AD, Achuthan P, Akanni W, Allen J, Allen J, Alvarez-Jarreta J, Amode MR, Armean IM, Azov AG, Bennett R, Bhai J, Billis K, Boddu S, Marugán JC, Cummins C, Davidson C, Dodiya K, Fatima R, Gall A, Giron CG, Gil L, Grego T, Haggerty L, Haskell E, Hourlier T, Izuogu OG, Janacek SH, Juettemann T, Kay M, Lavidas I, Le T, Lemos D, Martinez JG, Maurel T, McDowall M, McMahon A, Mohanan S, Moore B, Nuhn M, Oheh DN, Parker A, Parton A, Patricio M, Sakthivel MP, Abdul Salam AI, Schmitt BM, Schuilenburg H, Sheppard D, Sycheva M, Szuba M, Taylor K, Thormann A, Threadgold G, Vullo A, Walts B, Winterbottom A, Zadissa A, Chakiachvili M, Flint B, Frankish A, Hunt SE, IIsley G, Kostadima M, Langridge N, Loveland JE, Martin FJ, Morales J, Mudge JM, Muffato M, Perry E, Ruffier M, Trevanion SJ, Cunningham F, Howe KL, Zerbino DR, Flicek P, nan, Kersey PJ, Allen JE, Allot A, Barba M, Boddu S, Bolt BJ, Carvalho-Silva D, Christensen M, Davis P, Grabmueller C, Kumar N, Liu Z, Maurel T, Moore B, McDowall MD, Maheswari U, Naamati G, Newman V, Ong CK, Paulini M, Pedro H, Perry E, Russell M, Sparrow H, Tapanari E, Taylor K, Vullo A, Williams G, Zadissia A, Olson A, Stein J, Wei S, Tello-Ruiz M, Ware D, Luciani A, Potter S, Finn RD, Urban M, Hammond-Kosack KE, Bolser DM, De Silva N, Howe KL, Langridge N, Maslen G, Staines DM, Yates A, Howe KL, Contreras-Moreira B, De Silva N, Maslen G, Akanni W, Allen J, Alvarez-Jarreta J, Barba M, Bolser DM, Cambell L, Carbajo M, Chakiachvili M, Christensen M, Cummins C, Cuzick A, Davis P, Fexova S, Gall A, George N, Gil L, Gupta P, Hammond-Kosack KE, Haskell E, Hunt SE, Jaiswal P, Janacek SH, Kersey PJ, Langridge N, Maheswari U, Maurel T, McDowall MD, Moore B, Muffato M, Naamati G, Naithani S, Olson A, Papatheodorou I, Patricio M, Paulini M, Pedro H, Perry E, Preece J, Rosello M, Russell M, Sitnik V, Staines DM, Stein J, Tello-Ruiz MK, Trevanion SJ, Urban M, Wei S, Ware D, Williams G, Yates AD, Flicek P, Flicek P, Amode MR, Barrell D, Beal K, Brent S, Carvalho-Silva D, Clapham P, Coates G, Fairley S, Fitzgerald S, Gil L, Gordon L, Hendrix M, Hourlier T, Johnson N, Kähäri AK, Keefe D, Keenan S, Kinsella R, Komorowska M, Koscielny G, Kulesha E, Larsson P, Longden I, McLaren W, Muffato M, Overduin B, Pignatelli M, Pritchard B, Riat HS, Ritchie GR, Ruffier M, Schuster M, Sobral D, Tang YA, Taylor K, Trevanion S, Vandrovcova J, White S, Wilson M, Wilder SP, Aken BL, Birney E, Cunningham F, Dunham I, Durbin R, Fernández-Suarez XM, Harrow J, Herrero J, Hubbard TJ, Parker A, Proctor G, Spudich G, Vogel J, Yates A, Yates A, Zadissa A, Searle SM",", , , nan, , , , nan, , , ","Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, NHGRI NIH HHS, Wellcome Trust, NHGRI NIH HHS, NHGRI NIH HHS, NCRR NIH HHS, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, NICHD NIH HHS, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, NICHD NIH HHS, NCRR NIH HHS, NHGRI NIH HHS, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust, nan, NHGRI NIH HHS, Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Wellcome Trust, NHGRI NIH HHS, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Wellcome Trust, National Institutes of Health, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust, NHGRI NIH HHS, National Institutes of Health, Wellcome Trust, Wellcome Trust, National Human Genome Research Institute, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, National Human Genome Research Institute, Wellcome Trust, National Institutes of Health, Horizon 2020, European Molecular Biology Laboratory, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, Wellcome Trust, Wellcome Trust, NHGRI NIH HHS, nan, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, UK Biosciences and Biotechnology Research Council, UK Biosciences and Biotechnology Research Council, UK Biosciences and Biotechnology Research Council, National Science Foundation, European Union's Horizon 2020 Research and Innovation Programme, European Union's Horizon 2020 Research and Innovation Programme, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, National Institute of Allergy and Infectious Diseases, Medical Research Council, National Human Genome Research Institute, Biotechnology and Biological Sciences Research Council, National Institutes of Health, Bill and Melinda Gates Foundation, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Wellcome Trust, Biotechnology and Biological Sciences Research Council, United States Department of Agriculture, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, UK Biosciences and Biotechnology Research Council, Biotechnology and Biological Sciences Research Council, UK Biosciences and Biotechnology Research Council, NHGRI NIH HHS, NHGRI NIH HHS, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, European Commission FP7, NHGRI NIH HHS, Wellcome Trust, European Commission FP7, European Commission FP7, European Commission FP7",4906.0, +"23245209, 30304474",HGNC,0.797803879,HGNC,0.797803879,,0,2,http://www.genenames.org,301,,,http://web.archive.org/web/20221102074142/https://www.genenames.org/,2019-01-01,"nan, HUGO Gene Nomenclature Committee, European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK.","nan, Braschi B, Denny P, Gray K, Jones T, Seal R, Tweedie S, Yates B, Bruford E","nan, ","nan, National Human Genome Research Institute, NHGRI NIH HHS, Wellcome Trust",156.0, +"23245335, 23197657",H-InvDB,0.99724789,H-InvDB,0.99724789,H-Invitational Database,0.770428264,2,"http://hinv.jp/, http://hinv.jp/hinv/h-epd","200, 301",,", ","http://web.archive.org/web/20220308074820/http://hinv.jp/, no_wayback",2012-12-17,"Biomedicinal Information Research Center, National Institute of Advanced Industrial Science and Technology, Tokyo, Japan. t.imanishi@aist.go.jp, Integrated Database and Systems Biology Team, Biomedicinal Information Research Center, National Institute of Advanced Industrial Science and Technology, Aomi 2-4-7, Koto-ku, Tokyo 135-0064, Japan.","Imanishi T, Nagai Y, Habara T, Yamasaki C, Takeda J, Mikami S, Bando Y, Tojo H, Nishimura T, Takeda J, Yamasaki C, Murakami K, Nagai Y, Sera M, Hara Y, Obi N, Habara T, Gojobori T, Imanishi T",", ",", ",17.0,"Japan, Japan" +"23292603, 22123792",KNApSAcK,0.885880351,KNApSAcK,0.885880351,,0,2,"http://kanaya.naist.jp/KNApSAcK_Family/, http://kanaya.naist.jp/knapsack3d","302, 302",,", ","http://web.archive.org/web/20190326172932/http://kanaya.naist.jp:80/KNApSAcK_Family/, no_wayback",2013-01-03,"Department of Life Science and Informatics, Maebashi Institute of Technology, 460-1 Kamisadori-machi, Maebashi-City, Gunma, Japan. skanaya@gtc.naist.jp, Graduate School of Information Science, Nara Institute of Science and Technology, 8916-5 Takayama-cho, Ikoma-shi, Nara, 630-0192 Japan.","Nakamura K, Shimura N, Otabe Y, Hirai-Morita A, Nakamura Y, Ono N, Ul-Amin MA, Kanaya S, Afendi FM, Okada T, Yamazaki M, Hirai-Morita A, Nakamura Y, Nakamura K, Ikeda S, Takahashi H, Altaf-Ul-Amin M, Darusman LK, Saito K, Kanaya S",", ",", ",208.0,"Japan, Japan" +"23331499, 23715991",SignaLink,0.985477149,SignaLink,0.985477149,,0,2,http://SignaLink.org,200,United Kingdom,"(52.578,1.0889)",http://web.archive.org/web/20221023183126/http://signalink.org/,2013-01-18,"Department of Genetics, Eötvös Loránd University, Pázmány P, s, 1C, H-1117, Budapest, Hungary., Department of Genetics, Eötvös Loránd University, Budapest, Hungary.","Fazekas D, Koltai M, Türei D, Módos D, Pálfy M, Dúl Z, Zsákai L, Szalay-Bekő M, Lenti K, Farkas IJ, Vellai T, Csermely P, Korcsmáros T, Pálfy M, Farkas IJ, Vellai T, Korcsmáros T",", ",", ",100.0,"Hungary, Hungary" +23696674,CGD,0.991376301,CGD,0.991376301,Clinical Genomic Database,0.881686285,1,http://research.nhgri.nih.gov/CGD,302,United States,"(39.0827,-77.1253)",http://web.archive.org/web/20221019170931/https://research.nhgri.nih.gov/CGD/,2013-05-21,"Medical Genetics Branch, National Human Genome Research Institute, National Institutes of Health, Bethesda, MD 20892, USA. solomonb@mail.nih.gov","Solomon BD, Nguyen AD, Bear KA, Wolfsberg TG",,Intramural NIH HHS,56.0,United States +23730305,SGR,0.993947387,SGR,0.993947387,Systems Genetics Resource,0.927626471,1,http://systems.genetics.ucla.edu,"HTTPConnectionPool(host='systems.genetics.ucla.edu', port=80): Max retries exceeded with url: / (Caused by ReadTimeoutError(""HTTPConnectionPool(host='systems.genetics.ucla.edu', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20200922164145/https://systems.genetics.ucla.edu/,2013-05-20,,,,,0.0, +"23734660, 33772585, 22681406",IEDB,0.993056357,IEDB,0.993056357,Immune Epitope Database and Analysis Resource,0.933946027,3,http://www.iedb.org,200,,,http://web.archive.org/web/20221101172524/https://www.iedb.org/,2021-03-01,"nan, Center for Infectious Disease and Vaccine Research, La Jolla Institute for Immunology, 9420 Athena Circle La Jolla, CA 92037, USA., La Jolla Institute for Allergy & Immunology, La Jolla, CA 92037, USA. nsalimi@liai.org","nan, Edwards L, Jackson R, Overton JA, Vita R, Blazeska N, Peters B, Sette A, Salimi N, Fleri W, Peters B, Sette A","nan, , ","nan, National Institute of Allergy and Infectious Diseases, ",21.0,"United States, United States" +23825543,LAMP,0.990910769,LAMP,0.990910769,,0,1,http://biotechlab.fudan.edu.cn/database/lamp,500,,,http://web.archive.org/web/20220617011457/http://biotechlab.fudan.edu.cn/database/lamp/,2013-06-18,"State Key Laboratory of Genetic Engineering, School of Life Sciences, Fudan University, Shanghai, China ; Shanghai High-Tech Bioengineering Co., Ltd, Shanghai, China.","Zhao X, Wu H, Lu H, Li G, Huang Q",,,116.0,"China, China" +"23868073, 33290554, 26578592, 27899595, 30407594, 23193289",PANTHER,0.996814907,PANTHER,0.996814907,Protein Analysis Through Evolutionary Relationships,0.987256037,6,http://www.pantherdb.org,200,,,http://web.archive.org/web/20221102173146/http://pantherdb.org/,2021-01-01,"nan, Division of Bioinformatics, Department of Preventive Medicine, Keck School of Medicine, University of Southern California, Los Angeles, CA 90033, USA., nan, Division of Bioinformatics, Department of Preventive Medicine, Keck School of Medicine of USC, University of Southern California, Los Angeles, CA 90033, USA huaiyumi@usc.edu., Division of Bioinformatics, Department of Preventive Medicine, Keck School of Medicine of USC, University of Southern California, Los Angeles, CA 90033, USA., nan","nan, Mi H, Ebert D, Muruganujan A, Mills C, Albou LP, Mushayamaha T, Thomas PD, nan, Mi H, Huang X, Muruganujan A, Tang H, Mills C, Kang D, Thomas PD, Mi H, Muruganujan A, Ebert D, Huang X, Thomas PD, nan","nan, , nan, , , nan","nan, NCI NIH HHS, National Cancer Institute, NHGRI NIH HHS, National Science Foundation, National Science Foundation, National Institutes of Health, National Science Foundation, nan, NIBIB NIH HHS, National Institutes of Health, National Science Foundation, NHGRI NIH HHS, nan",2058.0,"United States, United States, United States" +24007337,GRASP,0.996128678,GRASP,0.996128678,Genomic Resource Access for Stoichioproteomics,0.944524201,1,http://www.graspdb.net,"HTTPConnectionPool(host='www.graspdb.net', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2013-09-04,,,,,0.0, +"24163255, 29059374",PED,0.995105187,PED,0.995105187,Pancreatic Expression Database,0.927990806,2,http://www.pancreasexpression.org,301,,,http://web.archive.org/web/20210621200758/http://pancreasexpression.org/,2018-01-01,"Centre for Molecular Oncology, Barts Cancer Institute, Queen Mary University of London, Charterhouse Square, London EC1M 6BQ, UK and Molecular GI-Onkologie (MGO), University of Bochum, Germany., Bioinformatics Unit, Centre for Molecular Oncology, Barts Cancer Institute, Queen Mary University London, London EC1M 6BQ, UK.","Dayem Ullah AZ, Cutts RJ, Ghetia M, Gadaleta E, Hahn SA, Crnogorac-Jurcevic T, Lemoine NR, Chelala C, Marzec J, Dayem Ullah AZ, Pirrò S, Gadaleta E, Crnogorac-Jurcevic T, Lemoine NR, Kocher HM, Chelala C",", ","Cancer Research UK, Cancer Research UK, Cancer Research UK",28.0,Germany +24174537,DGV,0.99448659,DGV,0.99448659,Database of Genomic Variants,0.923672388,1,http://dgv.tcag.ca,200,,,http://web.archive.org/web/20220626001827/http://dgv.tcag.ca/,2013-10-29,"The Centre for Applied Genomics, Peter Gilgan Centre for Research and Learning, The Hospital for Sick Children, 686 Bay Street, Toronto, Ontario M5G 0A4, Canada, Department of Immunology, Genetics and Pathology, Science for Life Laboratory, Uppsala University, Uppsala SE-751 08, Sweden and Department of Molecular Genetics, University of Toronto, Toronto, Ontario M5S 1A8, Canada.","MacDonald JR, Ziman R, Yuen RK, Feuk L, Scherer SW",,Canadian Institutes of Health Research,557.0,"Canada, Canada, Sweden" +24185695,NDB,0.992138386,NDB,0.992138386,Nucleic Acid Database,0.922201002,1,http://ndbserver.rutgers.edu,200,,,no_wayback,2013-10-31,"Department of Chemistry and Chemical Biology, Center for Integrative Proteomics Research, Rutgers, the State University of New Jersey, 174 Frelinghuysen Road, Piscataway, NJ 08854-8076, USA, Department of Chemistry and Center for Biomolecular Sciences, Bowling Green State University, Bowling Green, OH 43403, USA and Department of Mathematics and Statistics, Bowling Green State University, Bowling Green, OH 43403, USA.","Coimbatore Narayanan B, Westbrook J, Ghosh S, Petrov AI, Sweeney B, Zirbel CL, Leontis NB, Berman HM",,NIGMS NIH HHS,69.0,"Jersey, United States, United States, United States" +"24194602, 25477381, 26578571, 29040613, 30357349, 33156332, 27924010",DDBJ,0.986539379,DDBJ,0.986539379,DNA Data Bank of Japan,0.896418548,7,http://www.ddbj.nig.ac.jp,301,,,no_wayback,2021-01-01,"DDBJ Center, National Institute of Genetics, Yata 1111, Mishima, Shizuoka 411-8540, Japan and National Bioscience Database Center, Japan Science and Technology Agency, Tokyo 102-8666, Japan., DDBJ Center, National Institute of Genetics, Shizuoka 411-8540, Japan., nan, DDBJ Center, National Institute of Genetics, Shizuoka 411-8540, Japan., DDBJ Center, National Institute of Genetics, Shizuoka 411-8540, Japan., Bioinformation and DDBJ Center, National Institute of Genetics, Mishima, Shizuoka 411-8540, Japan., DDBJ Center, National Institute of Genetics, Shizuoka 411-8540, Japan jmashima@nig.ac.jp.","Kosuge T, Mashima J, Kodama Y, Fujisawa T, Kaminuma E, Ogasawara O, Okubo K, Takagi T, Nakamura Y, Kodama Y, Mashima J, Kosuge T, Katayama T, Fujisawa T, Kaminuma E, Ogasawara O, Okubo K, Takagi T, Nakamura Y, nan, Kodama Y, Mashima J, Kosuge T, Kaminuma E, Ogasawara O, Okubo K, Nakamura Y, Takagi T, Kodama Y, Mashima J, Kosuge T, Ogasawara O, Fukuda A, Kodama Y, Mashima J, Fujisawa T, Ogasawara O, Mashima J, Kodama Y, Fujisawa T, Katayama T, Okuda Y, Kaminuma E, Ogasawara O, Okubo K, Nakamura Y, Takagi T",", , nan, , , , ",", , nan, , Japan Science and Technology Agency, Technology of Japan, Japan Science and Technology Agency, Ministry of Education, Culture, Sports, Science and Technology, Japan Agency for Medical Research and Development, ",123.0,"Japan, Japan, Japan, Japan, Japan, Japan, Japan, Japan" +"24214965, 27899562, 30398643, 25883136, 21936816",ChEMBL,0.997582018,ChEMBL,0.997582018,,0,5,http://www.ebi.ac.uk/chembl,301,United Kingdom,"(52.1929,0.1256)",http://web.archive.org/web/20221107193125/https://www.ebi.ac.uk/chembl/,2019-01-01,"European Molecular Biology Laboratory European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK., nan, Computational Chemical Biology, EMBL-EBI, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK.","Bento AP, Gaulton A, Hersey A, Bellis LJ, Chambers J, Davies M, Krüger FA, Light Y, Mak L, McGlinchey S, Nowotka M, Papadatos G, Santos R, Overington JP, Gaulton A, Hersey A, Nowotka M, Bento AP, Chambers J, Mendez D, Mutowo P, Atkinson F, Bellis LJ, Cibrián-Uhalte E, Davies M, Dedman N, Karlsson A, Magariños MP, Overington JP, Papadatos G, Smit I, Leach AR, Mendez D, Gaulton A, Bento AP, Chambers J, De Veij M, Félix E, Magariños MP, Mosquera JF, Mutowo P, Nowotka M, Gordillo-Marañón M, Hunter F, Junco L, Mugumbate G, Rodriguez-Lopez M, Atkinson F, Bosc N, Radoux CJ, Segura-Cabrera A, Hersey A, Leach AR, nan, Bellis LJ, Akhtar R, Al-Lazikani B, Atkinson F, Bento AP, Chambers J, Davies M, Gaulton A, Hersey A, Ikeda K, Krüger FA, Light Y, McGlinchey S, Santos R, Stauch B, Overington JP",", , , nan, ","Engineering and Physical Sciences Research Council, Wellcome Trust, Wellcome Trust, Wellcome Trust, NCI NIH HHS, Wellcome Trust, Innovative Medicines Initiative Joint Undertaking, European Union Seventh Framework Programme, European Union Seventh Framework Programme, Wellcome Trust, Wellcome Trust, National Institutes of Health, nan, Wellcome Trust",1422.0, +"24225323, 31667520, 25273106",PATRIC,0.997595549,PATRIC,0.997595549,Center,0.560106337,3,http://www.patricbrc.org,301,,,http://web.archive.org/web/20220901051154/http://www.patricbrc.org/,2020-01-01,"Virginia Bioinformatics Institute, Virginia Tech, Blacksburg, VA 24060, USA, Computation Institute, University of Chicago, Chicago, IL 60637, USA, Mathematics and Computer Science Division, Argonne National Laboratory, Argonne, IL 60637, USA, Grado Department of Industrial & Systems Engineering, Virginia Tech, Blacksburg, VA 24060, USA, Department of Microbiology and Immunology, University of Maryland School of Medicine, Baltimore, MD 21201, USA, Fellowship for Interpretation of Genomes, Burr Ridge, IL 60527, USA, Computing, Environment, and Life Sciences, Argonne National Laboratory, Argonne, IL 60637, USA and Nestlé Institute of Health Sciences SA, Campus EPFL, Quartier de L'innovation, Lausanne, Switzerland., University of Chicago Consortium for Advanced Science and Engineering, University of Chicago, Chicago, IL 60637, USA., Virginia Bioinformatics Institute, Virginia Tech, Blacksburg, VA 24061, USA.","Wattam AR, Abraham D, Dalay O, Disz TL, Driscoll T, Gabbard JL, Gillespie JJ, Gough R, Hix D, Kenyon R, Machi D, Mao C, Nordberg EK, Olson R, Overbeek R, Pusch GD, Shukla M, Schulman J, Stevens RL, Sullivan DE, Vonstein V, Warren A, Will R, Wilson MJ, Yoo HS, Zhang C, Zhang Y, Sobral BW, Davis JJ, Wattam AR, Aziz RK, Brettin T, Butler R, Butler RM, Chlenski P, Conrad N, Dickerman A, Dietrich EM, Gabbard JL, Gerdes S, Guard A, Kenyon RW, Machi D, Mao C, Murphy-Olson D, Nguyen M, Nordberg EK, Olsen GJ, Olson RD, Overbeek JC, Overbeek R, Parrello B, Pusch GD, Shukla M, Thomas C, VanOeffelen M, Vonstein V, Warren AS, Xia F, Xie D, Yoo H, Stevens R, Mao C, Abraham D, Wattam AR, Wilson MJ, Shukla M, Yoo HS, Sobral BW",", , ","PHS HHS, National Institute of Allergy and Infectious Diseases, NIAID NIH HHS, NIAID NIH HHS",772.0,"Switzerland, United States, United States, United States, United States, United States, United States, United States, United States, United States" +"24234449, 26935103, 22127867",FlyBase,0.9871732,FlyBase,0.9871732,,0,3,http://flybase.org,403,,,no_wayback,2016-03-01,"The Biological Laboratories, Harvard University, 16 Divinity Avenue, Cambridge, MA 02138, USA and Department of Genetics, University of Cambridge, Downing Street, Cambridge CB2 3EH, UK., Department of Genetics, University of Cambridge, Downing Street, Cambridge CB2 3EH, UK gm119@cam.ac.uk., Department of Genetics, University of Cambridge, Downing Street, Cambridge CB2 3EH, UK.","St Pierre SE, Ponting L, Stefancsik R, McQuilton P, , Millburn GH, Crosby MA, Gramates LS, Tweedie S, , McQuilton P, St Pierre SE, Thurmond J, ",", , ","Medical Research Council, NHGRI NIH HHS, NHGRI NIH HHS, Medical Research Council, NHGRI NIH HHS, Medical Research Council, NIGMS NIH HHS, NHGRI NIH HHS",518.0,United States +"24259432, 26553804, 29112715, 33270901, 24316578, 25510495, 22121212",RefSeq,0.988386571,RefSeq,0.988386571,Prokaryotic Genome Annotation Pipeline,0.812073847,7,http://www.ncbi.nlm.nih.gov/refseq,301,,,http://web.archive.org/web/20221110022853/https://www.ncbi.nlm.nih.gov/refseq/,2021-01-01,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Building 38A, 8600 Rockville Pike, Bethesda, MD 20894, USA., nan, National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 45 Center Drive, Bethesda, MD 20892-6511, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bldg. 38A 8600 Rockville Pike, Bethesda, MD 20894, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Building 38A 8600 Rockville Pike, Bethesda, MD 20894, USA. tatiana@ncbi.nlm.nih.gov., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Building 38A, 8600 Rockville Pike, Bethesda, MD 20894, USA. pruitt@ncbi.nlm.nih.gov","Pruitt KD, Brown GR, Hiatt SM, Thibaud-Nissen F, Astashyn A, Ermolaeva O, Farrell CM, Hart J, Landrum MJ, McGarvey KM, Murphy MR, O'Leary NA, Pujar S, Rajput B, Rangwala SH, Riddick LD, Shkeda A, Sun H, Tamez P, Tully RE, Wallin C, Webb D, Weber J, Wu W, DiCuccio M, Kitts P, Maglott DR, Murphy TD, Ostell JM, O'Leary NA, Wright MW, Brister JR, Ciufo S, Haddad D, McVeigh R, Rajput B, Robbertse B, Smith-White B, Ako-Adjei D, Astashyn A, Badretdin A, Bao Y, Blinkova O, Brover V, Chetvernin V, Choi J, Cox E, Ermolaeva O, Farrell CM, Goldfarb T, Gupta T, Haft D, Hatcher E, Hlavina W, Joardar VS, Kodali VK, Li W, Maglott D, Masterson P, McGarvey KM, Murphy MR, O'Neill K, Pujar S, Rangwala SH, Rausch D, Riddick LD, Schoch C, Shkeda A, Storz SS, Sun H, Thibaud-Nissen F, Tolstoy I, Tully RE, Vatsan AR, Wallin C, Webb D, Wu W, Landrum MJ, Kimchi A, Tatusova T, DiCuccio M, Kitts P, Murphy TD, Pruitt KD, nan, Li W, O'Neill KR, Haft DH, DiCuccio M, Chetvernin V, Badretdin A, Coulouris G, Chitsaz F, Derbyshire MK, Durkin AS, Gonzales NR, Gwadz M, Lanczycki CJ, Song JS, Thanki N, Wang J, Yamashita RA, Yang M, Zheng C, Marchler-Bauer A, Thibaud-Nissen F, Tatusova T, Ciufo S, Fedorov B, O'Neill K, Tolstoy I, Tatusova T, Ciufo S, Federhen S, Fedorov B, McVeigh R, O'Neill K, Tolstoy I, Zaslavsky L, Pruitt KD, Tatusova T, Brown GR, Maglott DR",", , nan, , , , ","Wellcome Trust, Intramural NIH HHS, Intramural NIH HHS, nan, National Institutes of Health, Intramural NIH HHS, Intramural NIH HHS, Intramural NIH HHS",3379.0,"United States, United States, United States, United States, United States, United States" +"24293654, 23110173",SEED,0.908952594,SEED,0.908952594,,0,2,"http://pubseed.theseed.org/, http://rast.nmpdr.org","301, 301",,", ","http://web.archive.org/web/20221013053327/https://pubseed.theseed.org/, http://web.archive.org/web/20221107151847/https://rast.nmpdr.org/",2013-11-29,"Fellowship for Interpretation of Genomes, Burr Ridge, IL 60527, USA, Mathematics and Computer Science Division, Argonne National Laboratory, Argonne, IL 60439, USA, Computation Institute, University of Chicago, Chicago, IL 60637, USA, Department of Microbiology, University of Illinois at Urbana-Champaign, Urbana, IL 61801, USA, Department of Computer Science, San Diego State University, San Diego, CA 92182, USA, Virginia Bioinformatics Institute, Virginia Tech, Blacksburg, VA 24060, USA, Computing, Environment and Life Sciences, Argonne National Laboratory, Argonne, IL 60439, USA and Department of Computer Science, University of Chicago, Chicago, IL 60637, USA., Computation Institute, University of Chicago, Chicago, Illinois, United States of America. ramy.aziz@gmail.com","Overbeek R, Olson R, Pusch GD, Olsen GJ, Davis JJ, Disz T, Edwards RA, Gerdes S, Parrello B, Shukla M, Vonstein V, Wattam AR, Xia F, Stevens R, Aziz RK, Devoid S, Disz T, Edwards RA, Henry CS, Olsen GJ, Olson R, Overbeek R, Parrello B, Pusch GD, Stevens RL, Vonstein V, Xia F",", ","PHS HHS, PHS HHS, NIAID NIH HHS",1974.0,"United States, United States, United States, United States, United States, United States, United States, United States, United States" +"24304899, 27914894",SCOPe,0.985311985,SCOPe,0.985311985,Structural Classification of Proteins-extended,0.87680452,2,http://scop.berkeley.edu,200,,,http://web.archive.org/web/20221109035924/https://scop.berkeley.edu/,2016-11-30,"Physical Biosciences Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA and Department of Plant and Microbial Biology, University of California, Berkeley, CA 94720, USA., Environmental Genomics and Systems Biology Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA; Molecular Biophysics and Integrated Bioimaging Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA. Electronic address: scope@compbio.berkeley.edu.","Fox NK, Brenner SE, Chandonia JM, Chandonia JM, Fox NK, Brenner SE",", ","NIGMS NIH HHS, NIGMS NIH HHS, National Institutes of Health, NIGMS NIH HHS",306.0,"United States, United States, United States, United States" +"24319143, 22135289",MMDB,0.99548922,MMDB,0.99548922,Molecular Modeling Database,0.961676583,2,http://www.ncbi.nlm.nih.gov/Structure,301,,,no_wayback,2013-12-06,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Building 38 A, Room 8N805, 8600 Rockville Pike, Bethesda, MD 20894, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bldg 38 A, Room 8N805, 8600 Rockville Pike, Bethesda, MD 20894, USA. madej@ncbi.nlm.nih.gov","Madej T, Lanczycki CJ, Zhang D, Thiessen PA, Geer RC, Marchler-Bauer A, Bryant SH, Madej T, Addess KJ, Fong JH, Geer LY, Geer RC, Lanczycki CJ, Liu C, Lu S, Marchler-Bauer A, Panchenko AR, Chen J, Thiessen PA, Wang Y, Zhang D, Bryant SH",", ","Intramural NIH HHS, Intramural NIH HHS",162.0,"United States, United States" +"24350770, 25404128, 24910945",Model Organism Protein Expression Database,0.981790811,MOPED,0.971850872,Model Organism Protein Expression Database,0.981790811,3,http://moped.proteinspire.org,"HTTPConnectionPool(host='moped.proteinspire.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-11-17,"None, Bioinformatics and High-Throughput Analysis Laboratory, Center for Developmental Therapeutics, Seattle Children's Research Institute, Seattle, WA, USA 98101 High-Throughput Analysis Core, Seattle Children's Research Institute, Seattle, WA, USA 98101 CDO Analytics, Seattle Children's, Seattle, WA, USA 98101 Data-Enabled Life Sciences Alliance (DELSA Global), Seattle, WA, USA 98101., 1 Bioinformatics and High-Throughput Analysis Laboratory, Center for Developmental Therapeutics, Seattle Children's Research Institute , Seattle, Washington.","Higdon R, Stewart E, Stanberry L, Haynes W, Choiniere J, Montague E, Anderson N, Yandl G, Janko I, Broomall W, Fishilevich S, Lancet D, Kolker N, Kolker E, Montague E, Janko I, Stanberry L, Lee E, Choiniere J, Anderson N, Stewart E, Broomall W, Higdon R, Kolker N, Kolker E, Montague E, Stanberry L, Higdon R, Janko I, Lee E, Anderson N, Choiniere J, Stewart E, Yandl G, Broomall W, Kolker N, Kolker E",", , ","NIDDK NIH HHS, NIDDK NIH HHS, NIDDK NIH HHS, NIDDK NIH HHS, , NIDDK NIH HHS, NIDDK NIH HHS",50.0,"United States, United States, United States, United States" +24356117,dbSNP,0.992551982,dbSNP,0.992551982,,0,1,http://www.ncbi.nlm.nih.gov/projects/SNP,301,,,http://web.archive.org/web/20221101145313/https://www.ncbi.nlm.nih.gov/projects/SNP/,2013-12-12,"Department of Pharmacology, Penn State College of Medicine, Pennsylvania State University, Hershey, Pa., USA.","Tekin I, Vrana KE",,NIGMS NIH HHS,1.0,United States +24364888,SGR,0.919245839,SGR,0.919245839,,0,1,http://bioinformatics.towson.edu/strawberry/Default.aspx,200,,,http://web.archive.org/web/20220618084643/http://bioinformatics.towson.edu/strawberry/default.aspx,2013-12-23,None,"Darwish O, Slovin JP, Kang C, Hollender CA, Geretz A, Houston S, Liu Z, Alkharouf NW",,,23.0, +"25214827, 21672956",BioPortal,0.977196991,BioPortal,0.977196991,,0,2,http://sparql.bioontology.org,200,United States,"(37.4295,-122.178)",http://web.archive.org/web/20220912183533/http://sparql.bioontology.org/,2013-01-01,"Stanford Center for Biomedical Informatics Research Stanford University, US., Stanford Center for Biomedical Informatics Research, Stanford University, Stanford, CA 94305, USA. whetzel@stanford.edu","Salvadores M, Alexander PR, Musen MA, Noy NF, Whetzel PL, Noy NF, Shah NH, Alexander PR, Nyulas C, Tudorache T, Musen MA",", ","NHGRI NIH HHS, NHGRI NIH HHS",252.0,United States +"25262351, 30204897, 22080564",AnimalTFDB,0.989919841,AnimalTFDB,0.989919841,Animal Transcription Factor DataBase,0.923694839,3,http://bioinfo.life.hust.edu.cn/AnimalTFDB,200,,,http://web.archive.org/web/20220609015213/http://bioinfo.life.hust.edu.cn/AnimalTFDB,2019-01-01,"Department of Biomedical Engineering, Key Laboratory of Molecular Biophysics of the Ministry of Education, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, PR China., Department of Bioinformatics and Systems Biology, Key Laboratory of Molecular Biophysics of the Ministry of Education, Hubei Bioinformatics and Molecular Imaging Key Laboratory, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, PR China., Hubei Bioinformatics & Molecular Imaging Key Laboratory, Department of Systems Biology, College of Life Science, Huazhong University of Science and Technology Wenhua College, Wuhan 430074, China.","Zhang HM, Liu T, Liu CJ, Song S, Zhang X, Liu W, Jia H, Xue Y, Guo AY, Hu H, Miao YR, Jia LH, Yu QY, Zhang Q, Guo AY, Zhang HM, Chen H, Liu W, Liu H, Gong J, Wang H, Guo AY",", , ",", National Key Research and Development Program of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, ",488.0,"China, China, China" +25332396,BCCTBbp,0.998296082,BCCTBbp,0.998296082,Breast Cancer Campaign Tissue Bank,0.761125972,1,http://bioinformatics.breastcancertissue,"HTTPConnectionPool(host='bioinformatics.breastcancertissue', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-10-20,"Centre for Molecular Oncology, Barts Cancer Institute, Queen Mary University of London, Charterhouse Square, London EC1M 6BQ, UK.","Cutts RJ, Guerra-Assunção JA, Gadaleta E, Dayem Ullah AZ, Chelala C",,"Cancer Research UK, Cancer Research UK",4.0, +"25352543, 27794554, 30395267, 33106848",RNAcentral,0.993771434,RNAcentral,0.993771434,,0,4,http://rnacentral.org,301,,,http://web.archive.org/web/20221103162110/https://rnacentral.org/,2021-01-01,"None, None, None, None",", Petrov AI, Kay SJE, Gibson R, Kulesha E, Staines D, Bruford EA, Wright MW, Burge S, Finn RD, Kersey PJ, Cochrane G, Bateman A, Griffiths-Jones S, Harrow J, Chan PP, Lowe TM, Zwieb CW, Wower J, Williams KP, Hudson CM, Gutell R, Clark MB, Dinger M, Quek XC, Bujnicki JM, Chua NH, Liu J, Wang H, Skogerbø G, Zhao Y, Chen R, Zhu W, Cole JR, Chai B, Huang HD, Huang HY, Cherry JM, Hatzigeorgiou A, Pruitt KD, , Petrov AI, Kay SJE, Kalvari I, Howe KL, Gray KA, Bruford EA, Kersey PJ, Cochrane G, Finn RD, Bateman A, Kozomara A, Griffiths-Jones S, Frankish A, Zwieb CW, Lau BY, Williams KP, Chan PP, Lowe TM, Cannone JJ, Gutell R, Machnicka MA, Bujnicki JM, Yoshihama M, Kenmochi N, Chai B, Cole JR, Szymanski M, Karlowski WM, Wood V, Huala E, Berardini TZ, Zhao Y, Chen R, Zhu W, Paraskevopoulou MD, Vlachos IS, Hatzigeorgiou AG, Ma L, Zhang Z, Puetz J, Stadler PF, McDonald D, Basu S, Fey P, Engel SR, Cherry JM, Volders PJ, Mestdagh P, Wower J, Clark MB, Quek XC, Dinger ME, , ",", , , ","Biotechnology and Biological Sciences Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, Intramural NIH HHS, NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, Wellcome Trust, Alzheimers Research UK, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, British Heart Foundation, NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, NHLBI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Alzheimers Research UK, NHGRI NIH HHS, National Institutes of Health, National Institutes of Health, NLM NIH HHS, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, NHGRI NIH HHS, Charles University, NHGRI NIH HHS, Wellcome Trust, NHGRI NIH HHS, Wellcome Trust, Biotechnology and Biological Sciences Research Council",239.0, +25352545,BDB,0.792936504,BDB,0.792936504,,0,1,http://swift.cmbi.ru.nl/gv/facilities,"HTTPConnectionPool(host='swift.cmbi.ru.nl', port=80): Max retries exceeded with url: /gv/facilities (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-10-28,"Centre for Molecular and Biomolecular Informatics, CMBI, Radboud university medical center, Geert Grooteplein Zuid 26-28 6525 GA Nijmegen, The Netherlands.","Touw WG, Baakman C, Black J, te Beek TA, Krieger E, Joosten RP, Vriend G",,Dutch Research Council (NWO),235.0,Netherlands +"25355513, 22135288",Dr.VIS,0.991928021,Dr.VIS,0.991928021,,0,2,http://www.bioinfo.org/drvis,301,United States,"(37.4316,-78.6569)",no_wayback,2014-10-29,"Department of Liver Surgery, Peking Union Medical College Hospital, Chinese Academy of Medical Sciences and Peking Union Medical College (CAMS & PUMC), Beijing, China., School of Life Sciences and Technology, Tongji University, 1239 Siping Road, Shanghai, China.","Yang X, Li M, Liu Q, Zhang Y, Qian J, Wan X, Wang A, Zhang H, Zhu C, Lu X, Mao Y, Sang X, Zhao H, Zhao Y, Zhang X, Zhao X, Liu Q, Cai Q, Li Y, Xu C, Li Y, Li Z, Zhang X",", ",", ",18.0,"China, China" +"25404129, 22135290",PoSSuM,0.947280467,PoSSuM,0.947280467,PoSSuM drug search,0.722460806,2,"http://possum.cbrc.jp/PoSSuM/, http://possum.cbrc.jp/PoSSuM/drug_search","301, 301",,", ","http://web.archive.org/web/20220503180122/http://possum.cbrc.jp/PoSSuM/, no_wayback",2014-11-17,"Laboratory of Bioinformatics, National Institute of Biomedical Innovation (NIBIO), 7-6-8 Saito-Asagi, Ibaraki, Osaka 567-0085, Japan Computational Biology Research Center (CBRC), National Institute of Advanced Industrial Science and Technology (AIST), 2-4-7 Aomi, Koto-ku, Tokyo 135-0064, Japan k-tomii@aist.go.jp., Department of Computational Biology, Graduate School of Frontier Sciences, The University of Tokyo, 5-1-5 Kashiwanoha, Kashiwa, Chiba 277-8568, Japan.","Ito J, Ikeda K, Yamada K, Mizuguchi K, Tomii K, Ito J, Tabei Y, Shimizu K, Tsuda K, Tomii K",", ",", ",34.0,"Japan, Japan, Japan" +"25414350, 23193287",GenBank,0.986147463,GenBank,0.986147463,,0,2,http://www.ncbi.nlm.nih.gov/genbank,301,United States,"(38.8341,-76.7974)",http://web.archive.org/web/20221110022916/https://www.ncbi.nlm.nih.gov/genbank/,2014-11-20,"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Building 38A, 8600 Rockville Pike, Bethesda, MD 20894, USA., National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Building 38A, 8600 Rockville Pike, Bethesda, MD 20894, USA.","Benson DA, Clark K, Karsch-Mizrachi I, Lipman DJ, Ostell J, Sayers EW, Benson DA, Cavanaugh M, Clark K, Karsch-Mizrachi I, Lipman DJ, Ostell J, Sayers EW",", ","Intramural NIH HHS, Intramural NIH HHS",1272.0,"United States, United States" +"25425035, 22439011",microPIR,0.981774867,microPIR,0.981774867,promoter interaction resource,0.824124861,2,http://www4a.biotec.or.th/micropir2,404,,,http://web.archive.org/web/20200213112936/http://www4a.biotec.or.th:80/micropir2/,2014-11-25,"Genome Technology Research Unit, National Center for Genetic Engineering and Biotechnology (BIOTEC), National Science and Technology Development Agency (NSTDA), Pathum Thani, Thailand jittima.pir@biotec.or.th., Genome Institute, National Center for Genetic Engineering and Biotechnology, Pathumthani, Thailand. jittima.pir@biotec.or.th","Piriyapongsa J, Bootchai C, Ngamphiw C, Tongsima S, Piriyapongsa J, Bootchai C, Ngamphiw C, Tongsima S",", ",", ",31.0,"Thailand, Thailand" +"25428361, 24931982",GRASP,0.99731338,GRASP,0.99731338,Genome-Wide Repository of Associations between SNPs and Phenotypes,0.985747047,2,http://apps.nhlbi.nih.gov/Grasp/Overview.aspx,"HTTPConnectionPool(host='apps.nhlbi.nih.gov', port=80): Max retries exceeded with url: /Grasp/Overview.aspx (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2014-11-26,"Cardiovascular Epidemiology and Human Genomics Branch, National Heart, Lung, and Blood Institute, The Framingham Heart Study, Framingham, MA 01702, USA., Cardiovascular Epidemiology and Human Genomics Branch, National Heart, Lung and Blood Institute, The Framingham Heart Study, Framingham, MA 01702, University of Massachusetts Medical School, Worcester, MA 01655 and Division of Cardiology, Massachusetts General Hospital, Boston, MA 02114, USACardiovascular Epidemiology and Human Genomics Branch, National Heart, Lung and Blood Institute, The Framingham Heart Study, Framingham, MA 01702, University of Massachusetts Medical School, Worcester, MA 01655 and Division of Cardiology, Massachusetts General Hospital, Boston, MA 02114, USA.","Eicher JD, Landowski C, Stackhouse B, Sloan A, Chen W, Jensen N, Lien JP, Leslie R, Johnson AD, Leslie R, O'Donnell CJ, Johnson AD",", ","NIGMS NIH HHS, Intramural NIH HHS, Intramural NIH HHS",239.0,"United States, United States" +25635527,ARN,0.983717382,ARN,0.983717382,Autophagy Regulatory Network,0.98153131,1,http://autophagy-regulation.org,200,United States,"(32.7797,-96.8022)",http://web.archive.org/web/20221016224227/http://autophagy-regulation.org/,2015-01-01,"a Department of Genetics ; Eötvös Loránd University ; Budapest , Hungary.","Türei D, Földvári-Nagy L, Fazekas D, Módos D, Kubisch J, Kadlecsik T, Demeter A, Lenti K, Csermely P, Vellai T, Korcsmáros T",,"Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",48.0,Hungary +"25754864, 21622642",mycoCLAP,0.783426881,mycoCLAP,0.783426881,Characterized,0.764837186,2,http://mycoclap.fungalgenomics.ca,302,Canada,"(45.4724,-73.6141)",no_wayback,2015-03-08,"Centre for Structural and Functional Genomics, Department of Computer Science and Software Engineering, Department of Chemistry and Biochemistry, and Department of Biology Concordia University, Montréal, QC, USA., Centre for Structural and Functional Genomics, Concordia University, Montreal QC H4B 1R6, Canada.","Strasser K, McDonnell E, Nyaga C, Wu M, Wu S, Almeida H, Meurs MJ, Kosseim L, Powlowski J, Butler G, Tsang A, Murphy C, Powlowski J, Wu M, Butler G, Tsang A",", ",", ",43.0,"Canada, United States" +26110276,BGD,0.959574401,BGD,0.959574401,Genome Database,0.728102009,1,http://donglab.ecnu.edu.cn/databases/BatGenome,"HTTPConnectionPool(host='donglab.ecnu.edu.cn', port=80): Max retries exceeded with url: /databases/BatGenome (Caused by ReadTimeoutError(""HTTPConnectionPool(host='donglab.ecnu.edu.cn', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2015-06-25,"Institute of Molecular Ecology and Evolution, SKLEC & IECR, East China Normal University, Shanghai, China.","Fang J, Wang X, Mu S, Zhang S, Dong D",,,2.0,"China, China" +"26141515, 22013167",mirEX,0.982228279,mirEX,0.982228279,,0,2,http://www.combio.pl/mirex,"HTTPConnectionPool(host='www.combio.pl', port=80): Max retries exceeded with url: /mirex (Caused by ReadTimeoutError(""HTTPConnectionPool(host='www.combio.pl', port=80): Read timed out. (read timeout=5)""))",,,no_wayback,2015-06-16,"Department of Computational Biology, Institute of Molecular Biology and Biotechnology, Faculty of Biology, Adam Mickiewicz University, Umultowska 89, 61-614, Poznan, Poland. andrzejz@amu.edu.pl., Department of Gene Expression, Faculty of Biology, Institute of Molecular Biology and Biotechnology, Adam Mickiewicz University, Umultowska 89, 61-614 Poznan, Poland.","Zielezinski A, Dolata J, Alaba S, Kruszka K, Pacak A, Swida-Barteczka A, Knop K, Stepien A, Bielewicz D, Pietrykowska H, Sierocka I, Sobkowiak L, Lakomiak A, Jarmolowski A, Szweykowska-Kulinska Z, Karlowski WM, Bielewicz D, Dolata J, Zielezinski A, Alaba S, Szarzynska B, Szczesniak MW, Jarmolowski A, Szweykowska-Kulinska Z, Karlowski WM",", ","European Regional Development Fund, Narodowe Centrum Nauki, Narodowe Centrum Nauki, Foundation For Polish Science, Narodowe Centrum Nauki, Foundation For Polish Science, Narodowe Centrum Nauki, KNOW Poznan RNA Centre, Narodowe Centrum Nauki, Narodowe Centrum Nauki, Narodowe Centrum Nauki, Narodowe Centrum Nauki, Narodowe Centrum Nauki, Foundation For Polish Science, ",51.0,"Poland, Poland" +26424083,MGDB,0.996766806,MGDB,0.996766806,Melanoma Gene Database,0.976466978,1,http://bioinfo.ahu.edu.cn:8080/Melanoma/index.jsp,302,,,no_wayback,2015-09-30,"Institute of Health Sciences, School of Computer Science and Technology.","Zhang D, Zhu R, Zhang H, Zheng CH, Xia J",,,5.0, +"26432828, 26519406, 30407532, 30407532, 30407532, 30407532, 30407532",MaizeGDB,0.995507419,MaizeGDB,0.995507419,and,0.607942879,3,http://www.maizegdb.org,301,,,http://web.archive.org/web/20221107171901/https://www.maizegdb.org/,2019-01-01,"USDA-ARS Corn Insects and Crop Genetics Research Unit, Iowa State University, Ames, IA 50011, USA Department of Computer Science, Iowa State University, Ames, IA 50011, USA carson.andorf@ars.usda.gov., Maize Genetics and Genomics Database, USDA-ARS, Corn Insects and Crop Genetics Research Unit, Iowa State University, Ames, IA, 50011, USA. lisaharper@me.com., USDA-ARS Corn Insects and Crop Genetics Research Unit, Ames, IA 50011, USA., USDA-ARS Corn Insects and Crop Genetics Research Unit, Ames, IA 50011, USA., USDA-ARS Corn Insects and Crop Genetics Research Unit, Ames, IA 50011, USA., USDA-ARS Corn Insects and Crop Genetics Research Unit, Ames, IA 50011, USA., USDA-ARS Corn Insects and Crop Genetics Research Unit, Ames, IA 50011, USA.","Andorf CM, Cannon EK, Portwood JL 2nd, Gardiner JM, Harper LC, Schaeffer ML, Braun BL, Campbell DA, Vinnakota AG, Sribalusu VV, Huerta M, Cho KT, Wimalanathan K, Richter JD, Mauch ED, Rao BS, Birkett SM, Sen TZ, Lawrence-Dill CJ, Harper L, Gardiner J, Andorf C, Lawrence CJ, Portwood JL 2nd, Woodhouse MR, Cannon EK, Gardiner JM, Harper LC, Schaeffer ML, Walsh JR, Sen TZ, Cho KT, Schott DA, Braun BL, Dietze M, Dunfee B, Elsik CG, Manchanda N, Coe E, Sachs M, Stinard P, Tolbert J, Zimmerman S, Andorf CM, Portwood JL 2nd, Woodhouse MR, Cannon EK, Gardiner JM, Harper LC, Schaeffer ML, Walsh JR, Sen TZ, Cho KT, Schott DA, Braun BL, Dietze M, Dunfee B, Elsik CG, Manchanda N, Coe E, Sachs M, Stinard P, Tolbert J, Zimmerman S, Andorf CM, Portwood JL 2nd, Woodhouse MR, Cannon EK, Gardiner JM, Harper LC, Schaeffer ML, Walsh JR, Sen TZ, Cho KT, Schott DA, Braun BL, Dietze M, Dunfee B, Elsik CG, Manchanda N, Coe E, Sachs M, Stinard P, Tolbert J, Zimmerman S, Andorf CM, Portwood JL 2nd, Woodhouse MR, Cannon EK, Gardiner JM, Harper LC, Schaeffer ML, Walsh JR, Sen TZ, Cho KT, Schott DA, Braun BL, Dietze M, Dunfee B, Elsik CG, Manchanda N, Coe E, Sachs M, Stinard P, Tolbert J, Zimmerman S, Andorf CM, Portwood JL 2nd, Woodhouse MR, Cannon EK, Gardiner JM, Harper LC, Schaeffer ML, Walsh JR, Sen TZ, Cho KT, Schott DA, Braun BL, Dietze M, Dunfee B, Elsik CG, Manchanda N, Coe E, Sachs M, Stinard P, Tolbert J, Zimmerman S, Andorf CM",", , , , , , ",", , United States Department of Agriculture, United States Department of Agriculture, United States Department of Agriculture, United States Department of Agriculture, United States Department of Agriculture",439.0,"United States, United States, United States, United States, United States, United States, United States, United States" +"26434508, 21989406, 24009883",ExoCarta,0.995963633,ExoCarta,0.995963633,,0,3,"http://www.exocarta.org, http://www.funrich.org","200, 200","Australia, Australia","(-37.6154,145.0186), (-37.6154,145.0186)","http://web.archive.org/web/20221017013236/http://www.exocarta.org/, http://web.archive.org/web/20220619231521/http://www.funrich.org/",2015-10-03,"Department of Biochemistry and Genetics, La Trobe Institute for Molecular Science, La Trobe University, Melbourne, VIC 3086, Australia. Electronic address: S.Keerthikumar@latrobe.edu.au., Department of Biochemistry, La Trobe Institute for Molecular Science, La Trobe University, Melbourne, Victoria 3086, Australia. Richard.Simpson@latrobe.edu.au, Department of Biochemistry, La Trobe Institute for Molecular Science, La Trobe University, Melbourne, Victoria, Australia.","Keerthikumar S, Chisanga D, Ariyaratne D, Al Saffar H, Anand S, Zhao K, Samuel M, Pathan M, Jois M, Chilamkurti N, Gangoda L, Mathivanan S, Mathivanan S, Fahner CJ, Reid GE, Simpson RJ, Simpson RJ, Kalra H, Mathivanan S",", , ","NIDA NIH HHS, Australian Research Council Discovery, Australian Research Council Discovery, NIDA NIH HHS, Australian Research Council, National Institutes of Health, , ",1086.0,"Australia, Australia, Australia" +26503249,BDB,0.991443157,BDB,0.991443157,Biopanning Data Bank,0.795208553,1,http://immunet.cn/bdb,200,,,http://web.archive.org/web/20220320115704/http://immunet.cn/bdb/,2015-10-25,"Center of Bioinformatics (COBI), Key Laboratory for NeuroInformation of Ministry of Education, University of Electronic Science and Technology of China, Chengdu 610054, China.","He B, Chai G, Duan Y, Yan Z, Qiu L, Zhang H, Liu Z, He Q, Han K, Ru B, Guo FB, Ding H, Lin H, Wang X, Rao N, Zhou P, Huang J",,,31.0,"China, China" +"26516186, 24608173, 22080562",NCG,0.929202914,NCG,0.929202914,Network of Cancer Genes,0.920580149,3,http://ncg.kcl.ac.uk,200,,,no_wayback,2015-10-29,"Division of Cancer Studies, King's College London, London SE11UL, UK., Department of Experimental Oncology, European Institute of Oncology, IFOM-IEO Campus, Via Adamello 16, 20139 Milan, Italy and Division of Cancer Studies, King's College London, London SE1 1UL, UK., Department of Experimental Oncology, European Institute of Oncology, IFOM-IEO Campus, Via Adamello 16, 20139 Milan, Italy.","An O, Dall'Olio GM, Mourikis TP, Ciccarelli FD, An O, Pendino V, D'Antonio M, Ratti E, Gentilini M, Ciccarelli FD, D'Antonio M, Pendino V, Sinha S, Ciccarelli FD",", , ",", , ",102.0,"Italy, Italy" +"26519400, 33125081, 24214961, 26476454, 27899662, 30321428, 21882442, 23192552, 22080510",KEGG,0.995484889,KEGG,0.995484889,Kyoto Encyclopedia of Genes and Genomes,0.887442343,9,http://www.kegg.jp,301,,,http://web.archive.org/web/20221110015441/https://www.kegg.jp/,2021-01-01,"Institute for Chemical Research, Kyoto University, Uji, Kyoto, 611-0011, Japan. kanehisa@kuicr.kyoto-u.ac.jp., Institute for Chemical Research, Kyoto University, Uji, Kyoto 611-0011, Japan., Institute for Chemical Research, Kyoto University, Uji, Kyoto 611-0011, Japan and Life Science Solutions Department, Fujitsu Kyushu Systems Ltd., Sawara-ku, Fukuoka 814-8589, Japan., Institute for Chemical Research, Kyoto University, Uji, Kyoto 611-0011, Japan kanehisa@kuicr.kyoto-u.ac.jp., Institute for Chemical Research, Kyoto University, Uji, Kyoto 611-0011, Japan kanehisa@kuicr.kyoto-u.ac.jp., Institute for Chemical Research, Kyoto University, Uji, Kyoto 611-0011, Japan., None, Bioinformatics Center, Institute for Chemical Research, Kyoto University, Uji, Japan. kanehisa@kuicr.kyoto-u.ac.jp, Bioinformatics Center, Institute for Chemical Research, Kyoto University, Uji, Kyoto 611-0011, Japan. kanehisa@kuicr.kyoto-u.ac.jp","Kanehisa M, Kanehisa M, Furumichi M, Sato Y, Ishiguro-Watanabe M, Tanabe M, Kanehisa M, Goto S, Sato Y, Kawashima M, Furumichi M, Tanabe M, Kanehisa M, Sato Y, Kawashima M, Furumichi M, Tanabe M, Kanehisa M, Furumichi M, Tanabe M, Sato Y, Morishima K, Kanehisa M, Sato Y, Furumichi M, Morishima K, Tanabe M, Kanehisa M, Limviphuvadh V, Tanabe M, Kanehisa M, Kanehisa M, Goto S, Sato Y, Furumichi M, Tanabe M",", , , , , , , , ",", National Bioscience Database Center, , , , Japan Science and Technology Agency, , , ",10213.0,"Japan, Japan, Japan, Japan, Japan, Japan, Japan, Japan, Japan" +26519466,IC4R,0.995473579,IC4R,0.995473579,Information Commons for Rice,0.858307824,1,http://ic4r.org,200,,,http://web.archive.org/web/20221024112216/http://ic4r.org/,2015-10-30,None,", Hao L, Zhang H, Zhang Z, Hu S, Xue Y",,,23.0, +26527721,PlantsDB,0.988374114,PlantsDB,0.988374114,Plant Genome and Systems Biology,0.978462391,1,"http://pgsb.helmholtz-muenchen.de/plant/index.jsp, http://transplantdb.eu","302, 301",,", ","http://web.archive.org/web/20210928002759/https://pgsb.helmholtz-muenchen.de/plant/index.jsp, http://web.archive.org/web/20220307192648/http://www.transplantdb.eu/",2015-11-02,"Plant Genome and Systems Biology, Helmholtz Center Munich - German Research Center for Environmental Health, 85764 Neuherberg, Germany manuel.spannagl@helmholtz-muenchen.de.","Spannagl M, Nussbaumer T, Bader KC, Martis MM, Seidel M, Kugler KG, Gundlach H, Mayer KF",,,31.0,Germany +"26578581, 33151284, 24888447",DBAASP,0.996422017,DBAASP,0.996422017,Database of Antimicrobial Activity and Structure of Peptides,0.945471898,3,http://dbaasp.org,302,United States,"(39.0438,-77.4874)",no_wayback,2021-01-01,"Ivane Beritashvili Center of Experimental Biomedicine, Tbilisi 0160, Georgia m.pirtskhalava@lifescience.org.ge., Ivane Beritashvili Center of Experimental Biomedicine, Tbilisi 0160, Georgia., Laboratory of Bioinformatics, Ivane Beritashvili Center of Experimental Biomedicine, Tbilisi, Georgia.","Pirtskhalava M, Gabrielian A, Cruz P, Griggs HL, Squires RB, Hurt DE, Grigolava M, Chubinidze M, Gogoladze G, Vishnepolsky B, Alekseyev V, Rosenthal A, Tartakovsky M, Pirtskhalava M, Amstrong AA, Grigolava M, Chubinidze M, Alimbarashvili E, Vishnepolsky B, Gabrielian A, Rosenthal A, Hurt DE, Tartakovsky M, Gogoladze G, Grigolava M, Vishnepolsky B, Chubinidze M, Duroux P, Lefranc MP, Pirtskhalava M",", , ","PHS HHS, NIAID NIH HHS, International Science and Technology Center, NIAID NIH HHS, National Institute of Allergy and Infectious Diseases, National Institute of Allergy and Infectious Diseases, International Science and Technology Center",134.0,"Georgia, Georgia, Georgia" +"26578587, 23175613",BioGPS,0.973907888,BioGPS,0.973907888,,0,2,http://biogps.org,200,United States,"(37.3394,-121.895)",no_wayback,2015-11-17,"Department of Molecular and Experimental Medicine, The Scripps Research Institute, La Jolla, CA 92037, USA asu@scripps.edu., Department of Molecular and Experimental Medicine, The Scripps Research Institute, La Jolla, CA 92037, USA. cwu@scripps.edu","Wu C, Jin X, Tsueng G, Afrasiabi C, Su AI, Wu C, Macleod I, Su AI",", ","NIGMS NIH HHS, NIGMS NIH HHS, NCATS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",371.0,"United States, United States" +"26586801, 23175603",HOCOMOCO,0.995230079,HOCOMOCO,0.995230079,Homo sapiens comprehensive model collection,0.977648824,2,"http://hocomoco.autosome.ru, http://www.cbrc.kaust.edu.sa/hocomoco10","301, 302",,", ","http://web.archive.org/web/20170829224406/http://hocomoco.autosome.ru:80/, http://web.archive.org/web/20170718144753/http://www.cbrc.kaust.edu.sa:80/hocomoco10/",2015-11-19,"Engelhardt Institute of Molecular Biology, Russian Academy of Sciences, 119991, GSP-1, Vavilova 32, Moscow, Russia Vavilov Institute of General Genetics, Russian Academy of Sciences, 119991, GSP-1, Gubkina 3, Moscow, Russia ivan.kulakovskiy@gmail.com., Laboratory of Bioinformatics and Systems Biology, Engelhardt Institute of Molecular Biology, Russian Academy of Sciences, Vavilov Street 32, Moscow 119991, GSP-1, Russia. ivan.kulakovskiy@gmail.com","Kulakovskiy IV, Vorontsov IE, Yevshin IS, Soboleva AV, Kasianov AS, Ashoor H, Ba-Alawi W, Bajic VB, Medvedeva YA, Kolpakov FA, Makeev VJ, Kulakovskiy IV, Medvedeva YA, Schaefer U, Kasianov AS, Vorontsov IE, Bajic VB, Makeev VJ",", ",", ",219.0, +"26586805, 24214998",COLOMBOS,0.998174131,COLOMBOS,0.998174131,,0,2,http://colombos.net,"HTTPConnectionPool(host='colombos.net', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to colombos.net timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20220331050803/http://www.colombos.net/,2015-11-19,"Department of Computational Biology, Research and Innovation Center, Fondazione Edmund Mach, San Michele all'Adige, Trento (TN) 38010, Italy Department of Biology, University of Padova, Padova (PD) 35121, Italy., Department of Mathematics and Computer Science, University of Antwerp, B-2020 Antwerp, Belgium, Biomedical Informatics Research Center Antwerp (biomina), University of Antwerp/Antwerp University Hospital, B-2650 Edegem, Belgium, Department of Computational Biology, Research and Innovation Center, Fondazione Edmund Mach, San Michele all'Adige, Trento (TN) 38010, Italy, Department of Microbial and Molecular Sciences, KU Leuven, Leuven B-3001, Belgium, Centro de Ciencias Genómicas, Universidad Nacional Autónoma de México, Cuernavaca, Morelos 62210, Mexico, Department of Plant Biotechnology and Bioinformatics, Ghent University, Gent 9052, Belgium and Department of Information Technology, IMinds, Ghent University, Gent 9052, Belgium.","Moretto M, Sonego P, Dierckxsens N, Brilli M, Bianco L, Ledezma-Tejeida D, Gama-Castro S, Galardini M, Romualdi C, Laukens K, Collado-Vides J, Meysman P, Engelen K, Meysman P, Sonego P, Bianco L, Fu Q, Ledezma-Tejeida D, Gama-Castro S, Liebens V, Michiels J, Laukens K, Marchal K, Collado-Vides J, Engelen K",", ",", ",52.0,"Belgium, Belgium, Belgium, Belgium, Belgium, Italy, Italy, Italy, Mexico" +"26589635, 21995777",BRAD,0.977056324,BRAD,0.977056324,Brassica database,0.716363907,2,http://brassicadb.org/brad,301,United States,"(37.7823,-122.391)",http://web.archive.org/web/20220615143608/https://brassicadb.org/brad/,2015-11-20,"Institute of Vegetables and Flowers, Chinese Academy of Agricultural Sciences, Beijing 100081, China., Institute of Vegetables and Flowers, Chinese Academy of Agricultural Sciences, Beijing 100081, China.","Wang X, Wu J, Liang J, Cheng F, Wang X, Cheng F, Liu S, Wu J, Fang L, Sun S, Liu B, Li P, Hua W, Wang X",", ",", European Commission FP7",265.0,"China, China" +"26590263, 24270788",Lynx,0.996176362,Lynx,0.996176362,,0,2,http://lynx.ci.uchicago.edu,302,,,http://web.archive.org/web/20211223120529/http://lynx.ci.uchicago.edu/,2015-11-20,"Department of Human Genetics, University of Chicago, 920 E. 58th Street, Chicago, IL 60637, USA Computation Institute, University of Chicago, 5735 S. Ellis Avenue, Chicago, IL 60637, USA sulakhe@uchicago.edu., Computation Institute, the University of Chicago, Chicago, IL 60637, USA, Department of Human Genetics, the University of Chicago, Chicago, IL 60637, USA, Department of Computer Science, Illinois Institute of Technology, Chicago, IL 60616, USA and Toyota Technological Institute at Chicago, Chicago, IL 60637, USA.","Sulakhe D, Xie B, Taylor A, D'Souza M, Balasubramanian S, Hashemifar S, White S, Dave UJ, Agam G, Xu J, Wang S, Gilliam TC, Maltsev N, Sulakhe D, Balasubramanian S, Xie B, Feng B, Taylor A, Wang S, Berrocal E, Dave U, Xu J, Börnigen D, Gilliam TC, Maltsev N",", ","NCATS NIH HHS, NIMH NIH HHS, NINDS NIH HHS, NIGMS NIH HHS, NINDS NIH HHS, NIGMS NIH HHS, NIMH NIH HHS, NIMH NIH HHS",19.0,"United States, United States, United States, United States, United States, United States" +"26612867, 33436076, 23203985",Dfam,0.994080424,Dfam,0.994080424,,0,3,http://dfam.org,301,,,http://web.archive.org/web/20221102110817/https://dfam.org/,2021-01-12,"Institute for Systems Biology, Seattle, WA 98109, USA rhubley@systemsbiology.org., Institute for Systems Biology, Seattle, WA, 98109, USA. jessica.storer@isbscience.org., HHMI Janelia Farm Research Campus, Ashburn, VA 20147, USA. wheelert@janelia.hhmi.org","Hubley R, Finn RD, Clements J, Eddy SR, Jones TA, Bao W, Smit AF, Wheeler TJ, Storer J, Hubley R, Rosen J, Wheeler TJ, Smit AF, Wheeler TJ, Clements J, Eddy SR, Hubley R, Jones TA, Jurka J, Smit AF, Finn RD",", , ","NHGRI NIH HHS, NHGRI NIH HHS, Howard Hughes Medical Institute, NLM NIH HHS, NHGRI NIH HHS, National Human Genome Research Institute, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, National Human Genome Research Institute, Howard Hughes Medical Institute, NHGRI NIH HHS, NLM NIH HHS",408.0,"United States, United States, United States" +"26673716, 30357350, 33125078, 24288371",Pfam,0.990840673,Pfam,0.990840673,the protein families database,0.763704188,4,http://pfam.xfam.org,200,,,http://web.archive.org/web/20221107172204/http://pfam.xfam.org/,2021-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK rdf@ebi.ac.uk., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton CB10 1SD, UK., HHMI Janelia Farm Research Campus, 19700 Helix Drive, Ashburn, VA 20147 USA, European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK, Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK, MRC Functional Genomics Unit, Department of Physiology, Anatomy and Genetics, University of Oxford, Oxford, OX1 3QX, UK, Institute of Biotechnology and Department of Biological and Environmental Sciences, University of Helsinki, PO Box 56 (Viikinkaari 5), 00014 Helsinki, Finland and Stockholm Bioinformatics Center, Swedish eScience Research Center, Department of Biochemistry and Biophysics, Science for Life Laboratory, Stockholm University, PO Box 1031, SE-17121 Solna, Sweden.","Finn RD, Coggill P, Eberhardt RY, Eddy SR, Mistry J, Mitchell AL, Potter SC, Punta M, Qureshi M, Sangrador-Vegas A, Salazar GA, Tate J, Bateman A, El-Gebali S, Mistry J, Bateman A, Eddy SR, Luciani A, Potter SC, Qureshi M, Richardson LJ, Salazar GA, Smart A, Sonnhammer ELL, Hirsh L, Paladin L, Piovesan D, Tosatto SCE, Finn RD, Mistry J, Chuguransky S, Williams L, Qureshi M, Salazar GA, Sonnhammer ELL, Tosatto SCE, Paladin L, Raj S, Richardson LJ, Finn RD, Bateman A, Finn RD, Bateman A, Clements J, Coggill P, Eberhardt RY, Eddy SR, Heger A, Hetherington K, Holm L, Mistry J, Sonnhammer EL, Tate J, Punta M",", , , ","Wellcome Trust, Howard Hughes Medical Institute, Wellcome Trust, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, Horizon 2020, Biotechnology and Biological Sciences Research Council, Biotechnology and Biosciences Research Council, European Cooperation in Science and Technology, National Institutes of Health, Wellcome Trust, European Molecular Biology Laboratory Core Funds, Open Targets, European Union's Horizon 2020 MSCA-RISE action, Biotechnology and Biological Sciences Research Council, Wellcome Trust, Howard Hughes Medical Institute, Wellcome Trust, Biotechnology and Biological Sciences Research Council",7562.0,"Finland, Sweden, United States" +"26719120, 21258066",TOMATOMA,0.9822613,TOMATOMA,0.9822613,,0,2,http://tomatoma.nbrp.jp,301,,,http://web.archive.org/web/20221025072638/https://tomatoma.nbrp.jp/,2015-12-30,"Gene Research Center, Faculty of Life and Environmental Sciences, University of Tsukuba, Tsukuba, 305-8572 Japan ezura@gene.tsukuba.ac.jp., Graduate School of Life and Environmental Sciences, University of Tsukuba, Tsukuba, 305-8572 Japan.","Shikata M, Hoshikawa K, Ariizumi T, Fukuda N, Yamazaki Y, Ezura H, Saito T, Ariizumi T, Okabe Y, Asamizu E, Hiwasa-Tanase K, Fukuda N, Mizoguchi T, Yamazaki Y, Aoki K, Ezura H",", ",", ",108.0,"Japan, Japan" +26822210,ASDB,0.991850942,ASDB,0.991850942,Annotated Scaffold Database,0.81746671,1,http://www.rcdd.org.cn/asdb/with,"HTTPConnectionPool(host='www.rcdd.org.cn', port=80): Max retries exceeded with url: /asdb/with (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,no_wayback,2016-01-28,"Research Center for Drug Discovery, School of Pharmaceutical Sciences, Sun Yat-sen University, Guangzhou 510006, China.","Liu Z, Ding P, Yan X, Zheng M, Zhou H, Xu Y, Du Y, Gu Q, Xu J",,,2.0,China +26868053,CLD,0.986547867,CLD,0.986547867,Corvids Literature Database,0.931742728,1,http://www.corvids.de/cld,301,,,no_wayback,2016-02-11,"Botanic Garden and Botanical Museum Berlin-Dahlem, Freie Universität Berlin, Koenigin-Luise-Str. 6-8, Berlin 14195, Germany Section Ornithology, Zoological Research Museum Alexander Koenig, Centre for Taxonomy and Evolutionary Research, Adenauerallee 160, Bonn 53113, Germany g.droege@bgbm.org.","Droege G, Töpfer T",,,1.0,"Germany, Germany" +"26953092, 24174543",Hemolytik,0.997379065,Hemolytik,0.997379065,,0,2,http://crdd.osdd.net/raghava/hemopi,"HTTPConnectionPool(host='crdd.osdd.net', port=80): Max retries exceeded with url: /raghava/hemopi (Caused by ReadTimeoutError(""HTTPConnectionPool(host='crdd.osdd.net', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220406031221/http://crdd.osdd.net/raghava/hemopi/,2016-03-08,"Bioinformatics Centre, CSIR-Institute of Microbial Technology, Sector 39A, Chandigarh, India., Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh 160036, India and Cell Biology and Immunology Division, CSIR-Institute of Microbial Technology, Chandigarh 160036, India.","Chaudhary K, Kumar R, Singh S, Tuknait A, Gautam A, Mathur D, Anand P, Varshney GC, Raghava GP, Gautam A, Chaudhary K, Singh S, Joshi A, Anand P, Tuknait A, Mathur D, Varshney GC, Raghava GP",", ",", ",85.0,"India, India, India" +"26989145, 21408081",TargetMine,0.979291916,TargetMine,0.979291916,,0,2,http://targetmine.mizuguchilab.org,302,Japan,"(34.372,135.3098)",http://web.archive.org/web/20220716140704/https://targetmine.mizuguchilab.org/,2016-03-17,"National Institutes of Biomedical Innovation, Health and Nutrition, 7-6-8 Saito-Asagi, Ibaraki, Osaka 567-0085, Japan kenji@nibiohn.go.jp., National Institute of Biomedical Innovation, Saito-Asagi, Ibaraki, Osaka, Japan.","Chen YA, Tripathi LP, Mizuguchi K, Chen YA, Tripathi LP, Mizuguchi K",", ",", ",91.0,"Japan, Japan" +"27114493, 24234444",CollecTF,0.998359561,CollecTF,0.998359561,,0,2,http://www.collectf.org,302,,,no_wayback,2016-04-25,"Department of Biological Sciences, University of Maryland Baltimore County (UMBC), 1000 Hilltop Circle, Baltimore, MD, 21250, USA., Department of Biological Sciences, University of Maryland Baltimore County (UMBC), 1000 Hilltop Circle, Baltimore, MD 21250, USA.","Kılıç S, Sagitova DM, Wolfish S, Bely B, Courtot M, Ciufo S, Tatusova T, O'Donovan C, Chibucos MC, Martin MJ, Erill I, Kiliç S, White ER, Sagitova DM, Cornish JP, Erill I",", ",", ",55.0,"United States, United States" +27375595,DGV,0.96955502,DGV,0.96955502,Dengue Genographic Viewer,0.616353422,1,http://gph.niid.go.jp/geograph/dengue/content/genomemap,302,Japan,"(35.6627,139.5295)",no_wayback,2016-06-07,"Pathogen Genomics Center, National Institute of Infectious Diseases Tokyo, Japan.","Yamashita A, Sakamoto T, Sekizuka T, Kato K, Takasaki T, Kuroda M",,,6.0,Japan +27402678,dbSNP,0.988259852,dbSNP,0.988259852,,0,1,http://www.actrec.gov.in/pi-webpages/AmitDutt/TMCSNP/TMCSNPdp.html,200,India,"(20.7405,78.6062)",http://web.archive.org/web/20220721233244/http://www.actrec.gov.in/pi-webpages/AmitDutt/TMCSNP/TMCSNPdp.html,2016-07-09,"Integrated Genomics Laboratory, Advanced Centre for Treatment Research Education in Cancer (ACTREC).","Upadhyay P, Gardi N, Desai S, Sahoo B, Singh A, Togar T, Iyer P, Prasad R, Chandrani P, Gupta S, Dutt A",,,5.0, +27603020,DIANA-TarBase,0.983838618,DIANA-TarBase,0.983838618,,0,1,"http://www.microrna.gr/tarbase, http://www.microrna.gr","301, 200","Greece, Greece","(37.9011,23.8727), (37.9011,23.8727)","no_wayback, http://web.archive.org/web/20220615003911/http://microrna.gr/",2016-09-07,"DIANA-Lab, Department of Electrical & Computer Engineering, University of Thessaly, Volos, Greece.","Paraskevopoulou MD, Vlachos IS, Hatzigeorgiou AG",,,21.0,Greece +"27606777, 23143270",ConsensusPathDB,0.997402191,ConsensusPathDB,0.997402191,,0,2,http://consensuspathdb.org,301,,,http://web.archive.org/web/20220520134428/http://www.consensuspathdb.org/,2016-09-08,"Department of Computational Molecular Biology, Max Planck Institute for Molecular Genetics, Berlin, Germany., Department of Vertebrate Genomics, Max Planck Institute for Molecular Genetics, Ihnestrasse 63-73, 14195 Berlin, Germany. kamburov@molgen.mpg.de","Herwig R, Hardt C, Lienhard M, Kamburov A, Kamburov A, Stelzl U, Lehrach H, Herwig R",", ",", ",572.0,"Germany, Germany" +27616775,PGD,0.98055391,PGD,0.98055391,Pangolin Genome Database,0.940320601,1,http://pangolin-genome.um.edu.my,"HTTPConnectionPool(host='pangolin-genome.um.edu.my', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to pangolin-genome.um.edu.my timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20160724064914/http://pangolin-genome.um.edu.my:80/,2016-09-11,"Genome Informatics Research Laboratory, Centre for Research in Biotechnology for Agriculture (CEBAR), High Impact Research Building, University of Malaya, 50603 Kuala Lumpur, Malaysia Department of Oral and Craniofacial Sciences, Faculty of Dentistry, University of Malaya, 50603 Kuala Lumpur, Malaysia.","Tan TK, Tan KY, Hari R, Mohamed Yusoff A, Wong GJ, Siow CC, Mutha NV, Rayko M, Komissarov A, Dobrynin P, Krasheninnikova K, Tamazian G, Paterson IC, Warren WC, Johnson WE, O'Brien SJ, Choo SW",,,2.0,"Malaysia, Malaysia" +"27650316, 25030112",DBSecSys,0.992282927,DBSecSys,0.992282927,of Burkholderia malleiSecretion Systems,0.793931067,2,http://dbsecsys.bhsai.org,302,United States,"(39.494,-77.4608)",no_wayback,2016-09-20,"Department of Defense Biotechnology High Performance Computing Software Applications Institute, Telemedicine and Advanced Technology Research Center, U.S. Army Medical Research and Materiel Command, Fort Detrick, MD 21702, USA., None","Memišević V, Kumar K, Zavaljevski N, DeShazer D, Wallqvist A, Reifman J, Memišević V, Kumar K, Cheng L, Zavaljevski N, DeShazer D, Wallqvist A, Reifman J",", ","U.S. Medical Research and Materiel Command, Defense Threat Reduction Agency, ",11.0,United States +"27694206, 24137012",GGBN,0.978382245,GGBN,0.978382245,Global Genome Biodiversity Network,0.861103143,2,http://terms.tdwg.org/wiki/GGBN_Data_Standard,"HTTPConnectionPool(host='terms.tdwg.org', port=80): Max retries exceeded with url: /wiki/GGBN_Data_Standard (Caused by ReadTimeoutError(""HTTPConnectionPool(host='terms.tdwg.org', port=80): Read timed out. (read timeout=5)""))",,,http://web.archive.org/web/20220305193011/http://terms.tdwg.org/wiki/GGBN_Data_Standard,2016-10-02,"Botanic Garden and Botanical Museum Berlin-Dahlem, Freie Universität Berlin, Königin-Luise-Str. 6-8, Berlin 14195, Germany g.droege@bgbm.org., Botanic Garden and Botanical Museum Berlin-Dahlem, Freie Universität Berlin, Berlin 14195, Germany, National Museum of Natural History Smithsonian Institution, Washington DC 20013, USA, Zoological Research Museum Alexander Koenig, Bonn 53113, Germany, Wildlife & Environment Society of South Africa, Pretoria 0001, South Africa, National Herbarium of Victoria, Royal Botanic Gardens Melbourne, South Yarra, VIC 3141, Australia, Molecular Systematics Section, Jodrell Laboratory, Royal Botanic Gardens, Kew, Richmond, Surrey TW9 3DS, UK, Systematic Botany, Justus-Liebig-Universität, Giessen 35392, Germany, Global Biodiversity Information Facility (GBIF), Copenhagen Ø DK-2100, Denmark, Department of Zoology, The Natural History Museum, London SW7 5BD, UK, Natural History Museum of Denmark, Copenhagen K DK-1307, Denmark and Smithsonian Tropical Research Institute, Balboa Ancon, Unit 0948, Panama.","Droege G, Barker K, Seberg O, Coddington J, Benson E, Berendsohn WG, Bunk B, Butler C, Cawsey EM, Deck J, Döring M, Flemons P, Gemeinholzer B, Güntsch A, Hollowell T, Kelbert P, Kostadinov I, Kottmann R, Lawlor RT, Lyal C, Mackenzie-Dodds J, Meyer C, Mulcahy D, Nussbeck SY, O'Tuama É, Orrell T, Petersen G, Robertson T, Söhngen C, Whitacre J, Wieczorek J, Yilmaz P, Zetzsche H, Zhang Y, Zhou X, Droege G, Barker K, Astrin JJ, Bartels P, Butler C, Cantrill D, Coddington J, Forest F, Gemeinholzer B, Hobern D, Mackenzie-Dodds J, Ó Tuama É, Petersen G, Sanjur O, Schindel D, Seberg O",", ",", ",39.0,"Australia, Germany, Germany, Germany, Germany, Denmark, Denmark, Denmark, Panama, United States, South Africa, South Africa" +"27742820, 23180792",PIECE,0.997208118,PIECE,0.997208118,Plant Intron Exon Comparison and Evolution,0.978025717,2,"http://probes.pw.usda.gov/piece, http://aegilops.wheat.ucdavis.edu/piece","HTTPConnectionPool(host='probes.pw.usda.gov', port=80): Max retries exceeded with url: /piece (Caused by ConnectTimeoutError(, 'Connection to probes.pw.usda.gov timed out. (connect timeout=5)')), 301",United States,", (38.2401,-122.0397)","http://web.archive.org/web/20220808075611/https://probes.pw.usda.gov/piece/, no_wayback",2016-10-13,"USDA-ARS, Western Regional Research Center, Crop Improvement and Genetics Research Unit, Albany, CA 94710, USA., USDA-Agriculture Research Service, Western Regional Research Center, Albany, CA 94710, USA.","Wang Y, Xu L, Thilmony R, You FM, Gu YQ, Coleman-Derr D, Wang Y, You FM, Lazo GR, Luo MC, Thilmony R, Gordon S, Kianian SF, Gu YQ",", ",", ",39.0,"United States, United States" +27789693,3DSNP,0.99662596,3DSNP,0.99662596,,0,1,http://biotech.bmi.ac.cn/3dsnp,"HTTPConnectionPool(host='biotech.bmi.ac.cn', port=80): Max retries exceeded with url: /3dsnp (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20170318003526/http://biotech.bmi.ac.cn/3dsnp/,2016-10-26,"Beijing Institute of Radiation Medicine, State Key Laboratory of Proteomics, Beijing 100850, China.","Lu Y, Quan C, Chen H, Bo X, Zhang C",,,36.0,China +27789706,CGDB,0.998253147,CGDB,0.998253147,of circadian genes in eukaryotes,0.924593337,1,http://cgdb.biocuckoo.org,200,United States,"(40.2069,-111.642)",http://web.archive.org/web/20221017054430/https://cgdb.biocuckoo.org/,2016-10-26,"Key Laboratory of Molecular Biophysics of Ministry of Education, College of Life Science and Technology and the Collaborative Innovation Center for Brain Science, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China.","Li S, Shui K, Zhang Y, Lv Y, Deng W, Ullah S, Zhang L, Xue Y",,,21.0,China +"27794040, 30357420, 33152092, 22135293, 25348402",GOLD,0.951975763,GOLD,0.951975763,Genomes OnLine Database,0.946679926,5,http://gold.jgi.doe.gov,301,Canada,"(43.6532,-79.3832)",http://web.archive.org/web/20221102110448/https://gold.jgi.doe.gov/,2021-01-01,"Prokaryotic Super Program, DOE Joint Genome Institute, Walnut Creek, 94598 CA, USA., Prokaryotic Super Program, DOE Joint Genome Institute, Walnut Creek, CA 94598, USA., DOE Joint Genome Institute, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA., Department of Energy Joint Genome Institute, Microbial Genomics and Metagenomics Program, 2800 Mitchell Drive, Walnut Creek, CA, USA., Prokaryotic Super Program, DOE Joint Genome Institute, Walnut Creek, CA 94598, USA tbreddy@lbl.gov.","Mukherjee S, Stamatis D, Bertsch J, Ovchinnikova G, Verezemska O, Isbandi M, Thomas AD, Ali R, Sharma K, Kyrpides NC, Reddy TB, Mukherjee S, Stamatis D, Bertsch J, Ovchinnikova G, Katta HY, Mojica A, Chen IA, Kyrpides NC, Reddy T, Mukherjee S, Stamatis D, Bertsch J, Ovchinnikova G, Sundaramurthi JC, Lee J, Kandimalla M, Chen IA, Kyrpides NC, Reddy TBK, Pagani I, Liolios K, Jansson J, Chen IM, Smirnova T, Nosrat B, Markowitz VM, Kyrpides NC, Reddy TB, Thomas AD, Stamatis D, Bertsch J, Isbandi M, Jansson J, Mallajosyula J, Pagani I, Lobos EA, Kyrpides NC",", , , , ",", Department of Energy Joint Genome Institute, U.S. Department of Energy, U.S. Department of Energy, U.S. Department of Energy, U.S. Department of Energy, NHGRI NIH HHS, ",603.0,"United States, United States, United States, United States, United States" +"27794042, 23193259, 25428375",RCSB PDB,0.994570589,RCSB PDB,0.994570589,Data Bank,0.701618314,3,http://rcsb.org,301,,,http://web.archive.org/web/20221109171247/https://www.rcsb.org/,2016-10-27,"RCSB Protein Data Bank, San Diego Supercomputer Center, University of California, San Diego, La Jolla, CA 92093, USA pwrose@ucsd.edu., San Diego Supercomputer Center, University of California San Diego, La Jolla, CA 92093-0743, USA. pwrose@ucsd.edu, RCSB Protein Data Bank, San Diego Supercomputer Center, University of California San Diego, La Jolla, CA 92093, USA pwrose@ucsd.edu.","Rose PW, Prlić A, Altunkaya A, Bi C, Bradley AR, Christie CH, Costanzo LD, Duarte JM, Dutta S, Feng Z, Green RK, Goodsell DS, Hudson B, Kalro T, Lowe R, Peisach E, Randle C, Rose AS, Shao C, Tao YP, Valasatava Y, Voigt M, Westbrook JD, Woo J, Yang H, Young JY, Zardecki C, Berman HM, Burley SK, Rose PW, Bi C, Bluhm WF, Christie CH, Dimitropoulos D, Dutta S, Green RK, Goodsell DS, Prlic A, Quesada M, Quinn GB, Ramos AG, Westbrook JD, Young J, Zardecki C, Berman HM, Bourne PE, Rose PW, Prlić A, Bi C, Bluhm WF, Christie CH, Dutta S, Green RK, Goodsell DS, Westbrook JD, Woo J, Young J, Zardecki C, Berman HM, Bourne PE, Burley SK",", , ","NCI NIH HHS, , ",770.0,"United States, United States, United States" +"27794045, 30407557, 33211864, 25723102",FANTOM5,0.996881783,FANTOM5,0.996881783,Functional ANnoTation Of the Mammalian genome,0.984211731,4,http://fantom.gsc.riken.jp,301,Japan,"(35.5047,139.6802)",http://web.archive.org/web/20221101073929/https://fantom.gsc.riken.jp/,2016-10-27,"Division of Genomic Technologies (DGT), RIKEN Center for Life Science Technologie, 1-7-22 Suehiro-cho, Tsurumi-ku, Yokohama, Kanagawa 230-0045, Japan., RIKEN Center for Integrative Medical Sciences, Yokohama, Kanagawa 230-0045, Japan., RIKEN Center for Integrative Medical Sciences, Yokohama, Kanagawa, Japan., nan","Lizio M, Harshbarger J, Abugessaisa I, Noguchi S, Kondo A, Severin J, Mungall C, Arenillas D, Mathelier A, Medvedeva YA, Lennartsson A, Drabløs F, Ramilowski JA, Rackham O, Gough J, Andersson R, Sandelin A, Ienasescu H, Ono H, Bono H, Hayashizaki Y, Carninci P, Forrest AR, Kasukawa T, Kawaji H, Lizio M, Abugessaisa I, Noguchi S, Kondo A, Hasegawa A, Hon CC, de Hoon M, Severin J, Oki S, Hayashizaki Y, Carninci P, Kasukawa T, Kawaji H, Abugessaisa I, Ramilowski JA, Lizio M, Severin J, Hasegawa A, Harshbarger J, Kondo A, Noguchi S, Yip CW, Ooi JLC, Tagami M, Hori F, Agrawal S, Hon CC, Cardon M, Ikeda S, Ono H, Bono H, Kato M, Hashimoto K, Bonetti A, Kato M, Kobayashi N, Shin J, de Hoon M, Hayashizaki Y, Carninci P, Kawaji H, Kasukawa T, nan",", , , nan","Novo Nordisk Fonden, NHGRI NIH HHS, Lundbeck Foundation, Biotechnology and Biological Sciences Research Council, Japan Society for the Promotion of Science, Japan Society for the Promotion of Science, MEXT, ROIS-DS-JOINT 2019, RIKEN Center for Life Science Technology, JSPS, RIKEN, nan",138.0,"Japan, Japan, Japan" +"27899580, 33196836, 23180791, 30395283, 25428351",OrthoDB,0.997169971,OrthoDB,0.997169971,,0,5,http://orthodb.org,301,,,no_wayback,2021-01-01,"Department of Genetic Medicine and Development, University of Geneva Medical School, rue Michel-Servet 1, 1211 Geneva, Switzerland, and Swiss Institute of Bioinformatics, rue Michel-Servet 1, 1211 Geneva, Switzerland evgeny.zdobnov@unige.ch., Department of Genetic Medicine and Development, University of Geneva Medical School, rue Michel-Servet 1, 1211 Geneva, Switzerland, and Swiss Institute of Bioinformatics, rue Michel-Servet 1, 1211 Geneva, Switzerland., Department of Genetic Medicine and Development, University of Geneva Medical School, 1211 Geneva, Switzerland., Department of Genetic Medicine and Development, University of Geneva Medical School, rue Michel-Servet 1, 1211 Geneva, Switzerland., Department of Genetic Medicine and Development, University of Geneva Medical School, rue Michel-Servet 1, 1211 Geneva, Switzerland Swiss Institute of Bioinformatics, rue Michel-Servet 1, 1211 Geneva, Switzerland evgenia.kriventseva@unige.ch.","Zdobnov EM, Tegenfeldt F, Kuznetsov D, Waterhouse RM, Simão FA, Ioannidis P, Seppey M, Loetscher A, Kriventseva EV, Zdobnov EM, Kuznetsov D, Tegenfeldt F, Manni M, Berkeley M, Kriventseva EV, Waterhouse RM, Tegenfeldt F, Li J, Zdobnov EM, Kriventseva EV, Kriventseva EV, Kuznetsov D, Tegenfeldt F, Manni M, Dias R, Simão FA, Zdobnov EM, Kriventseva EV, Tegenfeldt F, Petty TJ, Waterhouse RM, Simão FA, Pozdnyakov IA, Ioannidis P, Zdobnov EM",", , , , ","Swiss National Science Foundation, Swiss Institute of Bioinformatics SERI, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation, São Paulo Research Foundation, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation",803.0,"Switzerland, Switzerland, Switzerland, Switzerland, Switzerland, Switzerland, Switzerland, Switzerland" +"27899581, 22139932",HmtDB,0.998390019,HmtDB,0.998390019,,0,2,http://www.hmtdb.uniba.it,301,,,no_wayback,2016-11-28,"Department of Biosciences, Biotechnology and Biopharmaceutics, University of Bari, 70126 Bari, Italy., Dipartimento di Biochimica e Biologia Molecolare E Quagliariello, Università degli studi di Bari, Bari 70126, Italy.","Clima R, Preste R, Calabrese C, Diroma MA, Santorsola M, Scioscia G, Simone D, Shen L, Gasparre G, Attimonelli M, Rubino F, Piredda R, Calabrese FM, Simone D, Lang M, Calabrese C, Petruzzella V, Tommaseo-Ponzetta M, Gasparre G, Attimonelli M",", ","Worldwide Cancer Research, ",81.0,"Italy, Italy" +"27899596, 31747015, 23143107, 25414346",ChiTaRS,0.9914096,ChiTaRS,0.9914096,,0,4,http://chitars.md.biu.ac.il,200,,,http://web.archive.org/web/20221013103131/https://chitars.md.biu.ac.il/,2020-01-01,"Faculty of Medicine in Galilee, Bar-Ilan University, Henrietta Szold 8, Safed 13195, Israel., Laboratory of Cancer Genomics and Biocomputing of Complex Diseases, The Azrieli Faculty of Medicine, Bar-Ilan University, Safed 1311502, Israel., Structural Biology and BioComputing Program, Spanish National Cancer Research Centre (CNIO), Madrid 28029, Spain., Structural Biology and BioComputing Program, Spanish National Cancer Research Centre (CNIO), Madrid 28029, Spain.","Gorohovski A, Tagore S, Palande V, Malka A, Raviv-Shay D, Frenkel-Morgenstern M, Balamurali D, Gorohovski A, Detroja R, Palande V, Raviv-Shay D, Frenkel-Morgenstern M, Frenkel-Morgenstern M, Gorohovski A, Lacroix V, Rogers M, Ibanez K, Boullosa C, Andres Leon E, Ben-Hur A, Valencia A, Frenkel-Morgenstern M, Gorohovski A, Vucenovic D, Maestre L, Valencia A",", , , ",", Israel Cancer Association, Israel Innovation Authority, NHGRI NIH HHS, NHGRI NIH HHS",84.0,"Spain, Spain, Israel, Israel" +27899622,UniProt,0.995713353,UniProt,0.995713353,,0,1,"http://sparql.uniprot.org/, http://www.uniprot.org","301, 301",,", ","http://web.archive.org/web/20221103014232/https://sparql.uniprot.org/, http://web.archive.org/web/20221104172213/https://www.uniprot.org/",2016-11-29,None,,,"NIGMS NIH HHS, Biotechnology and Biological Sciences Research Council, Parkinson's UK, NHGRI NIH HHS, NHGRI NIH HHS, British Heart Foundation, NIGMS NIH HHS, Wellcome Trust, NIGMS NIH HHS, NIGMS NIH HHS",1864.0, +"27899668, 24275496",CyanoBase,0.946007907,CyanoBase,0.946007907,,0,2,http://genome.microbedb.jp/cyanobase,200,Japan,"(35.6916,139.768)",http://web.archive.org/web/20221101072649/http://genome.microbedb.jp/cyanobase/,2016-11-29,"Center for Information Biology, National Institute of Genetics, Research Organization of Information and Systems, Yata, Mishima 411-8540, Japan., Center for Information Biology, National Institute of Genetics, Research Organization of Information and Systems, Yata, Mishima 411-8540, Japan, Database Center for Life Science, Research Organization of Information and Systems, 2-11-16 Yayoi, Bunkyo-ku, Tokyo 113-0032, Japan, Faculty of Life Sciences, Kyoto Sangyo University, Motoyama, Kamigamo, Kita-Ku, Kyoto 603-8555, Japan and Kazusa DNA Research Institute, 2-6-7 Kazusa-Kamatari, Kisarazu 292-0818, Japan.","Fujisawa T, Narikawa R, Maeda SI, Watanabe S, Kanesaki Y, Kobayashi K, Nomata J, Hanaoka M, Watanabe M, Ehira S, Suzuki E, Awai K, Nakamura Y, Fujisawa T, Okamoto S, Katayama T, Nakao M, Yoshimura H, Kajiya-Kanegae H, Yamamoto S, Yano C, Yanaka Y, Maita H, Kaneko T, Tabata S, Nakamura Y",", ",", ",77.0,"Japan, Japan, Japan, Japan, Japan" +"27924039, 22067456",FlyRNAi,0.996583045,FlyRNAi,0.996583045,Drosophila RNAi screening,0.770914784,2,http://fgr.hms.harvard.edu,301,United States,"(39.0438,-77.4874)",http://web.archive.org/web/20221017122217/https://fgr.hms.harvard.edu/,2016-10-23,"Department of Genetics, Harvard Medical School, 77 Avenue Louis Pasteur, Boston, MA 02115, USA., Department of Genetics, Harvard Medical School, Boston, MA 02115, USA.","Hu Y, Comjean A, Roesel C, Vinayagam A, Flockhart I, Zirin J, Perkins L, Perrimon N, Mohr SE, Flockhart IT, Booker M, Hu Y, McElvany B, Gilly Q, Mathey-Prevot B, Perrimon N, Mohr SE",", ","NCRR NIH HHS, Howard Hughes Medical Institute, NIGMS NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NIH HHS, NIH HHS, NIAID NIH HHS, NIGMS NIH HHS",53.0,"United States, United States" +"27924042, 24174544",PlantTFDB,0.997673213,PlantTFDB,0.997673213,,0,2,"http://planttfdb.cbi.pku.edu.cn/, http://plantregmap.cbi.pku.edu.cn","200, 200","China, China","(39.9917,116.316), (39.9917,116.316)","http://web.archive.org/web/20221031234058/http://planttfdb.cbi.pku.edu.cn/, http://web.archive.org/web/20220920173133/http://plantregmap.cbi.pku.edu.cn/",2016-10-24,"State Key Laboratory of Protein and Plant Gene Research, College of Life Sciences, Center for Bioinformatics, Beijing 100871, P.R. China., State Key Laboratory of Protein and Plant Gene Research, College of Life Sciences and Center for Bioinformatics, Peking University, Beijing 100871, P.R. China.","Jin J, Tian F, Yang DC, Meng YQ, Kong L, Luo J, Gao G, Jin J, Zhang H, Kong L, Gao G, Luo J",", ",", ",916.0,"China, China" +"27982098, 27982098, 27982098",ARN,0.983553469,ARN,0.983553469,Adipogenesis Regulation Network,0.879501736,1,http://210.27.80.93/arn,200,China,"(40.0018,116.333)",http://web.archive.org/web/20220802101804/http://210.27.80.93/arn/,2016-12-16,"National Beef Cattle Improvement Center, College of Animal Science and Technology, Northwest A&F University, Yangling, China., National Beef Cattle Improvement Center, College of Animal Science and Technology, Northwest A&F University, Yangling, China., National Beef Cattle Improvement Center, College of Animal Science and Technology, Northwest A&F University, Yangling, China.","Huang Y, Wang L, Zan LS, Huang Y, Wang L, Zan LS, Huang Y, Wang L, Zan LS",", , ",", , ",3.0,"China, China, China" +"27987164, 24363285",PGDBj,0.985787213,PGDBj,0.985787213,Plant Genome DataBase Japan,0.963867758,2,http://pgdbj,"HTTPConnectionPool(host='pgdbj', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2017-01-01,"Department of Genome Informatics, Graduate School of Medicine, Osaka University, 1-1 Yamadaoka, Suita, Osaka, 565-0871, Japan., Department of Plant Genome Research, Kazusa DNA Research Institute, 2-6-7 Kazusa-kamatari, Kisarazu, Chiba, 292-0818 Japan.","Nakaya A, Ichihara H, Asamizu E, Shirasawa S, Nakamura Y, Tabata S, Hirakawa H, Asamizu E, Ichihara H, Nakaya A, Nakamura Y, Hirakawa H, Ishii T, Tamura T, Fukami-Kobayashi K, Nakajima Y, Tabata S",", ",", ",10.0,"Japan, Japan" +"27987169, 23161680",TropGeneDB,0.997986138,TropGeneDB,0.997986138,,0,2,http://tropgenedb.cirad.fr,302,,,http://web.archive.org/web/20160120061533/http://tropgenedb.cirad.fr/,2017-01-01,"UMR Amélioration Génétique et Adaptation des Plantes Méditerranéennes et Tropicales (AGAP), CIRAD, TA A-108/03, Avenue Agropolis, 34398, Montpellier, France. manuel.ruiz@cirad.fr., CIRAD, UMR AGAP, F-34398 Montpellier, France. chantal.hamelin@cirad.fr","Ruiz M, Sempéré G, Hamelin C, Hamelin C, Sempere G, Jouffe V, Ruiz M",", ",", ",9.0,"France, France" +"28011601, 33242091, 30759212",CSDB_GT,0.99439846,CSDB_GT,0.99439846,Carbohydrate Structure Glycosyltransferase DatabaseÂ,0.911011142,3,http://csdb.glycoscience.ru/gt.html,200,,"(55.7483,37.6171)",http://web.archive.org/web/20210508134408/http://csdb.glycoscience.ru/gt.html,2021-06-01,"N.D. Zelinsky Institute of Organic Chemistry, Russian Academy of Sciences, Leninsky prospect 47, Moscow, Russia., Laboratory of Metal-Complex and Nano-Scale Catalysts, N.D. Zelinsky Institute of Organic Chemistry, Russian Academy of Sciences, Leninsky prospect 47, Moscow 119991, Russia., N.D. Zelinsky Institute of Organic Chemistry, Russian Academy of Sciences, Leninsky Prospect 47, Moscow, Russia.","Egorova KS, Toukach PV, Egorova KS, Smirnova NS, Toukach PV, Egorova KS, Knirel YA, Toukach PV",", , ","Russian Science Foundation, Russian Science Foundation, Russian Science Foundation",13.0, +"28053161, 21917859",GETPrime,0.99608171,GETPrime,0.99608171,,0,2,http://bbcftools.epfl.ch/getprime,302,Switzerland,"(46.5191,6.56676)",http://web.archive.org/web/20180516195909/http://bbcftools.epfl.ch:80/getprime,2016-10-07,"Bioinformatics and Biostatistics Core Facility, School of Life Sciences, Ecole Polytechnique Fédérale de Lausanne (EPFL), CH-1015 Lausanne, Switzerland., Institute of Bio-engineering, School of Life Sciences, Laboratory of Systems Biology and Genetics, Lausanne, Switzerland.","David FP, Rougemont J, Deplancke B, Gubelmann C, Gattiker A, Massouras A, Hens K, David F, Decouttere F, Rougemont J, Deplancke B",", ","Swiss National Science Foundation, ",37.0,"Switzerland, Switzerland" +28104956,MPDB,0.995024717,MPDB,0.995024717,Molecular Pathways Brain Database,0.991229546,1,http://pranag.physics.iisc.ernet.in/mpdb,301,,,http://web.archive.org/web/20220617044702/http://pranag.physics.iisc.ernet.in/mpdb/,2016-04-10,"Centre of Excellence in Bioinformatics, School of Biotechnology, Madurai Kamaraj University, Madurai - 625021.","Vigneshwari GM, Ramamoorthy S, Muralikrishnan A, Srivastava P, Pathania M, Krishnaswamy S",,,0.0, +"28111366, 23324169",Orchidstra,0.991553485,Orchidstra,0.991553485,,0,2,http://orchidstra2.abrc.sinica.edu.tw,301,,,no_wayback,2017-01-01,"Agricultural Biotechnology Research Center, Academia Sinica, Nankang, Taipei, Taiwan., Agricultural Biotechnology Research Center, Academia Sinica, Taipei, Taiwan.","Chao YT, Yen SH, Yeh JH, Chen WC, Shih MC, Su CL, Chao YT, Yen SH, Chen CY, Chen WC, Chang YC, Shih MC",", ",", ",50.0, +"28150237, 24234447, 21898825",UniCarbKB,0.998428583,UniCarbKB,0.998428583,,0,3,"http://unicarbkb.org, http://confluence.unicarbkb.org","200, 405",Australia,"(-37.8136,144.972), ","http://web.archive.org/web/20220809003113/http://www.unicarbkb.org/, no_wayback",2017-01-01,"Department of Chemistry and Biomolecular Sciences, Research Drive, Building E8C, Macquarie University, North Ryde, Sydney, 2109, NSW, Australia., Biomolecular Frontiers Research Centre, Macquarie University, North Ryde, NSW 2109, Australia, Proteome Informatics Group, Swiss Institute of Bioinformatics, Geneva, Switzerland, Swiss-Prot Group, Swiss Institute of Bioinformatics, Geneva, Switzerland, Department of Bioinformatics, Faculty of Engineering, Soka University, 1-236 Tangi-machi, Hachioji, Tokyo, Japan and Section of Biology, Faculty of Sciences, University of Geneva, Switzerland., Biomolecular Frontiers Research Centre, Macquarie University, Sydney, NSW, Australia.","Campbell MP, Peterson RA, Gasteiger E, Mariethoz J, Lisacek F, Packer NH, Campbell MP, Peterson R, Mariethoz J, Gasteiger E, Akune Y, Aoki-Kinoshita KF, Lisacek F, Packer NH, Campbell MP, Hayes CA, Struwe WB, Wilkins MR, Aoki-Kinoshita KF, Harvey DJ, Rudd PM, Kolarich D, Lisacek F, Karlsson NG, Packer NH",", , ","NHGRI NIH HHS, Swiss National Science Foundation, ",107.0,"Australia, Australia, Australia, Switzerland, Switzerland, Switzerland, Japan" +28160322,ATLAS,0.976716459,ATLAS,0.976716459,Altered TCR Ligand Affinities and Structures,0.972670598,1,http://zlab.umassmed.edu/atlas/web,301,,,no_wayback,2017-02-16,"Program in Bioinformatics and Integrative Biology, University of Massachusetts Medical School, Worcester, Massachusetts, 01605.","Borrman T, Cimons J, Cosiano M, Purcaro M, Pierce BG, Baker BM, Weng Z",,"National Institutes of Health, NIGMS NIH HHS, National Institutes of Health, NIGMS NIH HHS",22.0, +28293068,MMDB,0.978604794,MMDB,0.978604794,Magnaporthe oryzae Microsatellite Database,0.971214314,1,http://14.139.229.199/home.aspx,"HTTPConnectionPool(host='14.139.229.199', port=80): Max retries exceeded with url: /home.aspx (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 113] No route to host'))",,,no_wayback,2016-11-29,"National Research Centre on Plant Biotechnology, IARI, New Delhi 110012, India; Department of Bioscience and Biotechnology, Banasthali University, Tonk, Rajasthan 304 022, India.","Singh PK, Singh A, Pawar DV, Devanna BN, Singh J, Sharma V, Sharma TR",,,0.0,"India, India" +"28775335, 23330984",NuBBEDB,0.996761322,NuBBEDB,0.996761322,of Natural Products Database,0.788312316,2,http://nubbe.iq.unesp.br/portal/nubbedb.html,200,,,http://web.archive.org/web/20220121233511/http://nubbe.iq.unesp.br/portal/nubbedb.html,2017-08-03,"Nuclei of Bioassays, Biosynthesis and Ecophysiology of Natural Products (NuBBE), Department of Organic Chemistry, Institute of Chemistry, Sao Paulo State University - UNESP, 14800-060, Araraquara, SP, Brazil., Núcleo de Bioensaios, Biossíntese e Ecofisiologia de Produtos Naturais (NuBBE), Departamento de Química Orgânica, Instituto de Química, UNESP - Univ. Estadual Paulista, 14801-970, Araraquara-SP, Brazil.","Pilon AC, Valli M, Dametto AC, Pinto MEF, Freire RT, Castro-Gamboa I, Andricopulo AD, Bolzani VS, Valli M, dos Santos RN, Figueira LD, Nakajima CH, Castro-Gamboa I, Andricopulo AD, Bolzani VS",", ",", ",75.0,"Brazil, Brazil" +"28943872, 28077565",MAHMI,0.988386333,MAHMI,0.988386333,Mechanism of Action of the Human Microbiome,0.950187612,2,http://www.mahmi.org,"HTTPConnectionPool(host='www.mahmi.org', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20181230155500/http://mahmi.org/,2017-09-08,"Department of Microbiology and Biochemistry of Dairy Products, Instituto de Productos Lácteos de Asturias, Consejo Superior de Investigaciones CientíficasVillaviciosa, Spain., ESEI - Department of Computer Science, University of Vigo, Edificio Politécnico, Campus Universitario As Lagoas s/n 32004, Ourense, Spain.","Hidalgo-Cantabrana C, Moro-García MA, Blanco-Míguez A, Fdez-Riverola F, Lourenço A, Alonso-Arias R, Sánchez B, Blanco-Míguez A, Gutiérrez-Jácome A, Fdez-Riverola F, Lourenço A, Sánchez B",", ","Fundación Científica Asociación Española Contra el Cáncer, Ministerio de Economía y Competitividad, ",24.0,"Spain, Spain" +28981707,EPD,0.979538023,EPD,0.979538023,Encyclopedia of Proteome Dynamics,0.85972634,1,http://peptracker.com/epd,301,,,http://web.archive.org/web/20141221055709/http://peptracker.com/epd/,2018-01-01,"Centre for Gene Regulation and Expression, School of Life Sciences, University of Dundee, Dow St, Dundee DD1 5EH, UK.","Brenes A, Afzal V, Kent R, Lamond AI",,"Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust",9.0, +"29036351, 23094086",miRandola,0.990178049,miRandola,0.990178049,extracellular circulating microRNAs,0.69080558,2,http://mirandola.iit.cnr.it,200,Italy,"(43.7089,10.4087)",http://web.archive.org/web/20220615140122/http://mirandola.iit.cnr.it/,2018-01-01,"Disease Systems Biology, Novo Nordisk Foundation Center for Protein Research, Faculty of Health and Medical Sciences, University of Copenhagen, Copenhagen, 2200, Denmark., Department of Clinical and Molecular Biomedicine, University of Catania, Catania, Italy.","Russo F, Di Bella S, Vannini F, Berti G, Scoyni F, Cook HV, Santos A, Nigita G, Bonnici V, Laganà A, Geraci F, Pulvirenti A, Giugno R, De Masi F, Belling K, Jensen LJ, Brunak S, Pellegrini M, Ferro A, Russo F, Di Bella S, Nigita G, Macca V, Laganà A, Giugno R, Pulvirenti A, Ferro A",", ","Novo Nordisk Foundation Center for Protein Research, Novo Nordisk Foundation Center for Protein Research, ",128.0,"Denmark, Italy" +"29036529, 22786784",ITSoneDB,0.997155786,ITSoneDB,0.997155786,,0,2,http://itsonedb.cloud.ba.infn.it,200,,,http://web.archive.org/web/20220903134056/http://itsonedb.cloud.ba.infn.it/,2018-01-01,"Institute of Biomembranes, Bioenergetics and Molecular Biotechnologies, Consiglio Nazionale delle Ricerche, Bari 70126, Italy., Institute of Biomembranes and Bioenergetics, National Research Council, Bari, Italy.","Santamaria M, Fosso B, Licciulli F, Balech B, Larini I, Grillo G, De Caro G, Liuni S, Pesole G, Santamaria M, Fosso B, Consiglio A, De Caro G, Grillo G, Licciulli F, Liuni S, Marzano M, Alonso-Alemany D, Valiente G, Pesole G",", ",", ",58.0,"Italy, Italy" +29040681,SMART,0.996265411,SMART,0.996265411,Simple Modular Architecture Research Tool,0.979393108,1,http://smart.embl.de,200,Germany,"(49.4071,8.6879)",http://web.archive.org/web/20221006015847/https://smart.embl.de/,2018-01-01,"biobyte solutions GmbH, Bothestr 142, 69126 Heidelberg, Germany.","Letunic I, Bork P",,,552.0,Germany +"29040692, 26464443",RMBase,0.992835402,RMBase,0.992835402,RNA Modification Base,0.927216482,2,http://rna.sysu.edu.cn/rmbase,302,,,http://web.archive.org/web/20220802091049/https://rna.sysu.edu.cn/rmbase/,2018-01-01,"Key Laboratory of Gene Engineering of the Ministry of Education, Sun Yat-sen University, Guangzhou 510275, PR China., Key Laboratory of Gene Engineering of the Ministry of Education, Sun Yat-sen University, Guangzhou 510275, P. R. China State Key Laboratory for Biocontrol, Sun Yat-sen University, Guangzhou 510275, P. R. China.","Xuan JJ, Sun WJ, Lin PH, Zhou KR, Liu S, Zheng LL, Qu LH, Yang JH, Sun WJ, Li JH, Liu S, Wu J, Zhou H, Qu LH, Yang JH",", ",", ",192.0,"China, China, China" +"29059334, 22102576",MetaCyc,0.994340897,MetaCyc,0.994340897,,0,2,"http://MetaCyc.org, http://BioCyc.org","302, 302",,", ","http://web.archive.org/web/20221105200830/https://metacyc.org/, http://web.archive.org/web/20221109033703/https://www.biocyc.org/",2018-01-01,"SRI International, 333 Ravenswood, Menlo Park, CA 94025, USA., SRI International, 333 Ravenswood, Menlo Park, CA 94025, USA, USA.","Caspi R, Billington R, Fulcher CA, Keseler IM, Kothari A, Krummenacker M, Latendresse M, Midford PE, Ong Q, Ong WK, Paley S, Subhraveti P, Karp PD, Caspi R, Altman T, Dreher K, Fulcher CA, Subhraveti P, Keseler IM, Kothari A, Krummenacker M, Latendresse M, Mueller LA, Ong Q, Paley S, Pujar A, Shearer AG, Travers M, Weerasinghe D, Zhang P, Karp PD",", ","NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",562.0,"United States, United States, United States" +"29069475, 23161672",APPRIS,0.998272896,APPRIS,0.998272896,of,0.716509938,2,http://appris-tools.org,"HTTPConnectionPool(host='appris-tools.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to appris-tools.org timed out. (connect timeout=5)'))",,,no_wayback,2018-01-01,"Spanish National Bioinformatics Institute (INB), Spanish National Cancer Research Centre (CNIO), Madrid 28029, Spain., Spanish National Bioinformatics Institute (INB), Madrid 28029, Spain.","Rodriguez JM, Rodriguez-Rivas J, Di Domenico T, Vázquez J, Valencia A, Tress ML, Rodriguez JM, Maietta P, Ezkurdia I, Pietrelli A, Wesselink JJ, Lopez G, Valencia A, Tress ML",", ","NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",137.0,"Spain, Spain" +"29077884, 28025347",HEDD,0.985895932,HEDD,0.985895932,Human Enhancer Disease Database,0.981465423,2,http://zdzlab.einstein.yu.edu/1/hedd.php,404,,,http://web.archive.org/web/20201024184821/http://zdzlab.einstein.yu.edu/1/hedd.php,2018-01-01,"Department of Genetics, Albert Einstein College of Medicine, Bronx, NY, USA., Department of Bioscience, School of Life Science, Jilin Normal University, Siping, China qiyunfeng911@outlook.com.","Wang Z, Zhang Q, Zhang W, Lin JR, Cai Y, Mitra J, Zhang ZD, Qi Y, Wang D, Wang D, Jin T, Yang L, Wu H, Li Y, Zhao J, Du F, Song M, Wang R",", ","NHGRI NIH HHS, ",33.0,"China, United States" +"29077939, 25332392",lncRNASNP,0.996575296,lncRNASNP,0.996575296,,0,2,http://bioinfo.life.hust.edu.cn/lncRNASNP2,200,China,"(30.513,114.42)",http://web.archive.org/web/20220615043714/http://bioinfo.life.hust.edu.cn/lncRNASNP2,2018-01-01,"Department of Bioinformatics and Systems Biology, Key Laboratory of Molecular Biophysics of the Ministry of Education, Hubei Bioinformatics and Molecular Imaging Key Laboratory, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, PR China., Department of Epidemiology and Biostatistics, School of Public Health, Tongji Medical College, Huazhong University of Science and Technology, Wuhan, Hubei 430030, PR China Department of Biomedical Engineering, Key Laboratory of Molecular Biophysics of the Ministry of Education, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, PR China.","Miao YR, Liu W, Zhang Q, Guo AY, Gong J, Liu W, Zhang J, Miao X, Guo AY",", ",", ",191.0,"China, China, China" +"29092055, 22102587",SABIO-RK,0.997967561,SABIO-RK,0.997967561,,0,2,http://sabiork.h-its.org,301,,,http://web.archive.org/web/20221107174004/https://sabiork.h-its.org/,2018-01-01,"Scientific Databases and Visualization Group, Heidelberg Institute for Theoretical Studies (HITS gGmbH), Schloss-Wolfsbrunnenweg 35, 69118 Heidelberg, Germany., Scientific Databases and Visualization Group, Heidelberg Institute for Theoretical Studies, gGmbH, Schloss-Wolfsbrunnenweg 35, 69118 Heidelberg, Germany. Ulrike.Wittig@h-its.org","Wittig U, Rey M, Weidemann A, Kania R, Müller W, Wittig U, Kania R, Golebiewski M, Rey M, Shi L, Jong L, Algaa E, Weidemann A, Sauer-Danzwith H, Mir S, Krebs O, Bittkowski M, Wetsch E, Rojas I, Müller W",", ",", ",112.0,"Germany, Germany" +"29112718, 29927072, 33211869, 23125362",Rfam,0.996679723,Rfam,0.996679723,,0,4,http://rfam.org,301,,,http://web.archive.org/web/20221016215908/https://rfam.org/,2021-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge, United Kingdom., European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, UK., Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SA, UK. sb30@sanger.ac.uk","Kalvari I, Argasinska J, Quinones-Olvera N, Nawrocki EP, Rivas E, Eddy SR, Bateman A, Finn RD, Petrov AI, Kalvari I, Nawrocki EP, Argasinska J, Quinones-Olvera N, Finn RD, Bateman A, Petrov AI, Kalvari I, Nawrocki EP, Ontiveros-Palacios N, Argasinska J, Lamkiewicz K, Marz M, Griffiths-Jones S, Toffano-Nioche C, Gautheret D, Weinberg Z, Rivas E, Eddy SR, Finn RD, Bateman A, Petrov AI, Burge SW, Daub J, Eberhardt R, Tate J, Barquist L, Nawrocki EP, Eddy SR, Gardner PP, Bateman A",", , , ","NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, Intramural NIH HHS, Biotechnology and Biological Sciences Research Council, National Human Genome Research Institute, NIH, Carl Zeiss Foundation, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Horizon 2020, DFG, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, NLM NIH HHS, Wellcome Trust, Howard Hughes Medical Institute",1009.0,United Kingdom +"29126295, 33245761, 25324305",MoonProt,0.995451093,MoonProt,0.995451093,,0,3,http://moonlightingproteins.org,200,,,no_wayback,2021-01-01,"Department of Bioengineering, University of Illinois at Chicago, Chicago, IL 60607, USA., Department of Bioengineering, University of Illinois at Chicago, Chicago, IL 60607, USA., Department of Bioengineering, University of Illinois at Chicago, Chicago, IL 60607, USA.","Chen C, Zabad S, Liu H, Wang W, Jeffery C, Chen C, Liu H, Zabad S, Rivera N, Rowin E, Hassan M, Gomez De Jesus SM, Llinás Santos PS, Kravchenko K, Mikhova M, Ketterer S, Shen A, Shen S, Navas E, Horan B, Raudsepp J, Jeffery C, Mani M, Chen C, Amblee V, Liu H, Mathur T, Zwicke G, Zabad S, Patel B, Thakkar J, Jeffery CJ",", , ",", UIC, ",126.0,"United States, United States, United States" +"29140525, 24285297",rSNPBase,0.998359084,rSNPBase,0.998359084,,0,2,http://rsnp3.psych.ac.cn,200,China,"(39.9042,116.407)",http://web.archive.org/web/20220618061652/http://rsnp3.psych.ac.cn/,2018-01-01,"CAS Key Laboratory of Mental Health, Institute of Psychology, Chinese Academy of Sciences, Beijing 100101, China., Key Laboratory of Mental Health, Institute of Psychology, Chinese Academy of Sciences, 16 Lincui Road, Chaoyang District, Beijing 100101, China and University of Chinese Academy of Sciences, 19A Yuquan Road, Beijing, 100049, China.","Guo L, Wang J, Guo L, Du Y, Chang S, Zhang K, Wang J",", ",", ",74.0,"China, China, China" +"29145643, 32920969, 22086950, 23584835, 24157837, 26527717",MEROPS,0.980729818,MEROPS,0.980729818,,0,6,http://www.ebi.ac.uk/merops,301,,,no_wayback,2020-10-03,"EMBL European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Hinxton, Cambridge, UK., The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SA, UK. ndr@sanger.ac.uk, Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK. neil.rawlings@ebi.ac.uk, The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SA, UK and Proteins and Protein Families, EMBO European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK., The Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire, CB10 1SA, UK EMBO European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire, CB10 1SD, UK ndr@sanger.ac.uk ndr@ebi.ac.uk.","Rawlings ND, Barrett AJ, Thomas PD, Huang X, Bateman A, Finn RD, Rawlings ND, Bateman A, Rawlings ND, Barrett AJ, Bateman A, Rawlings ND, Rawlings ND, Waller M, Barrett AJ, Bateman A, Rawlings ND, Barrett AJ, Finn R",", , , , , ","NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, Wellcome Trust, , Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust, Wellcome Trust",1575.0, +"29161430, 25294826",MethBank,0.993737161,MethBank,0.993737161,,0,2,http://bigd.big.ac.cn/methbank,301,,,http://web.archive.org/web/20211021201057/http://bigd.big.ac.cn/methbank/,2018-01-01,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China., CAS Key Laboratory of Genome Sciences and Information, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Li R, Liang F, Li M, Zou D, Sun S, Zhao Y, Zhao W, Bao Y, Xiao J, Zhang Z, Zou D, Sun S, Li R, Liu J, Zhang J, Zhang Z",", ",", ",32.0,"China, China" +"29165593, 33539890, 22110034, 24185702",FunCoup,0.996186793,FunCoup,0.996186793,,0,4,http://funcoup.sbc.su.se,301,,,http://web.archive.org/web/20130905074532/http://funcoup.sbc.su.se:80/,2021-02-02,"Stockholm Bioinformatics Center, Department of Biochemistry and Biophysics, Stockholm University, Science for Life Laboratory, Box 1031, 17121 Solna, Sweden., Department of Biochemistry and Biophysics, Stockholm University, Science for Life Laboratory, Box 1031, 17121 Solna, Sweden., School of Biotechnology, Royal Institute of Technology, Science for Life Laboratory, Box 1031, SE-17121 Solna, Sweden., Stockholm Bioinformatics Centre, Science for Life Laboratory, Box 1031, Solna SE-17121, Sweden, Department of Biochemistry and Biophysics, Stockholm University and Swedish eScience Research Center.","Ogris C, Guala D, Sonnhammer ELL, Persson E, Castresana-Aguirre M, Buzzao D, Guala D, Sonnhammer ELL, Alexeyenko A, Schmitt T, Tjärnberg A, Guala D, Frings O, Sonnhammer EL, Schmitt T, Ogris C, Sonnhammer EL",", , , ",", Vetenskapsrådet, , ",127.0,"Sweden, Sweden, Sweden, Sweden" +29533231,wwPDB,0.983193755,wwPDB,0.983193755,Worldwide,0.737224817,1,http://validate.wwpdb.org,302,United States,"(32.8844,-117.234)",no_wayback,2018-03-02,"Protein Data Bank in Europe, European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, Hinxton, Cambridge CB10 1SD, England.","Smart OS, Horský V, Gore S, Svobodová Vařeková R, Bendová V, Kleywegt GJ, Velankar S",,"EMBL-EBI core funding, Wellcome Trust, Wellcome Trust, European Union, Ministry of Education, Youth and Sports of the Czech Republic, Wellcome Trust, Wellcome Trust",6.0, +"29761469, 26578564",HGD,0.997601748,HGD,0.997601748,Hymenoptera Genome Database,0.993258144,2,http://hymenopteragenome.org,301,,,no_wayback,2018-01-01,"Division of Animal Sciences, University of Missouri, Columbia, MO, USA. elsikc@missouri.edu., Division of Animal Sciences, University of Missouri, Columbia, MO 65211, USA Division of Plant Sciences, University of Missouri, Columbia, MO 65211, USA MU Informatics Institute, University of Missouri, Columbia, MO 65211, USA elsikc@missouri.edu.","Elsik CG, Tayal A, Unni DR, Burns GW, Hagen DE, Elsik CG, Tayal A, Diesh CM, Unni DR, Emery ML, Nguyen HN, Hagen DE",", ",", ",64.0,"United States, United States, United States, United States" +29917040,MPD,0.986540794,MPD,0.986540794,pathogen,0.605002284,1,http://data.mypathogen.org,"HTTPConnectionPool(host='data.mypathogen.org', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(, 'Connection to data.mypathogen.org timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221012023702/http://data.mypathogen.org/,2018-01-01,"State Key Laboratory for Infectious Disease Prevention and Control, National Institute for Communicable Disease Control and Prevention, Chinese Center for Disease Control and Prevention, Beijing 102206, China.","Zhang T, Miao J, Han N, Qiang Y, Zhang W",,"Priority Project on Infectious Disease Control and Prevention, Priority Project on Infectious Disease Control and Prevention, Priority Project on Infectious Disease Control and Prevention, National Natural Science Foundation of China, Priority Project on Infectious Disease Control and Prevention",3.0,China +29934697,CLD,0.992355183,CLD,0.992355183,Chinese Lexical Database,0.949212909,1,http://www.chineselexicaldatabase.com,200,,,no_wayback,2018-12-01,"Eberhard Karl's Universität Tübingen, Tübingen, Germany. ching-chu.sun@uni-tuebingen.de.","Sun CC, Hendrix P, Ma J, Baayen RH",,,8.0,Germany +"30020414, 22859501",SKEMPI,0.998165965,SKEMPI,0.998165965,,0,2,http://life.bsc.es/pid/skempi2,301,Spain,"(41.387,2.1701)",http://web.archive.org/web/20220710031032/https://life.bsc.es/pid/skempi2,2019-02-01,"Institute of Biotechnology, Life Sciences Center, Vilnius University, Vilnius, Lithuania., Joint BSC-IRB Research Program in Computational Biology, Life Science Department, Barcelona Supercomputing Center, Barcelona, Spain.","Jankauskaite J, Jiménez-García B, Dapkunas J, Fernández-Recio J, Moal IH, Moal IH, Fernández-Recio J",", ","European Commission, Biotechnology and Biological Sciences Research Council, Spanish Ministry of Economy and Competitiveness, Biotechnology and Biological Sciences Research Council, Interreg POCTEFA, Future Leader Fellowship, MINECO, European Molecular Biology Laboratory, ",178.0,"Spain, Lithuania" +"30084000, 21996254, 27139435, 21959865",NetPath,0.996446371,NetPath,0.996446371,,0,4,"http://www.netpath.org, http://www.netpath.org/pathways?path_id=NetPath_172","503, 503",,", ","no_wayback, no_wayback",2018-08-06,"Department of Biotechnology, University of Kashmir, Srinagar, 190006, India., McKusick-Nathans Institute of Genetic Medicine, Johns Hopkins University School of Medicine, Baltimore, MD 21205, USA. pandey@jhmi.edu., Institute of Bioinformatics, International Technology Park, Whitefield, Bangalore, 560066, India., Institute of Bioinformatics, International Tech Park, Bangalore, India.","Bhat SA, Gurtoo S, Deolankar SC, Fazili KM, Advani J, Shetty R, Prasad TSK, Andrabi S, Subbannayya Y, Telikicherla D, Ambekar A, Palapetta SM, Dwivedi SB, Raju R, Sharma J, Prasad TsK, Ramachandra Y, Mohan SS, Maharudraiah J, Mukherjee S, Pandey A, Subbannayya T, Variar P, Advani J, Nair B, Shankar S, Gowda H, Saussez S, Chatterjee A, Prasad TS, Raju R, Nanjappa V, Balakrishnan L, Radhakrishnan A, Thomas JK, Sharma J, Tian M, Palapetta SM, Subbannayya T, Sekhar NR, Muthusamy B, Goel R, Subbannayya Y, Telikicherla D, Bhattacharjee M, Pinto SM, Syed N, Srikanth MS, Sathe GJ, Ahmad S, Chavan SN, Kumar GS, Marimuthu A, Prasad TS, Harsha HC, Rahiman BA, Ohara O, Bader GD, Sujatha Mohan S, Schiemann WP, Pandey A",", , , ",", , Department of Biotechnology , Ministry of Science and Technology, ",41.0,"India, India, India, United States" +"30152276, 21584190",HypoDB,0.997157693,HypoDB,0.997157693,,0,2,http://www.bioclues.org/hypo2,301,,,http://web.archive.org/web/20211027103511/http://www.bioclues.org/hypo2/,2018-01-01,"Bioclues.org, Kukatpally, Hyderabad 500072, India., None","Sundararajan VS, Malik G, Ijaq J, Kumar A, Das PS, P R S, Nair AS, Dhar PK, Suravajhala P, Adinarayana KP, Sravani TS, Hareesh C",", ",", ",1.0,India +30245835,PGD,0.995591462,PGD,0.995591462,Pineapple Genomics Database,0.916814101,1,http://pineapple.angiosperms.org/pineapple/html/index.html,"HTTPConnectionPool(host='pineapple.angiosperms.org', port=80): Max retries exceeded with url: /pineapple/html/index.html (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20210224233743/http://pineapple.angiosperms.org/pineapple/html/index.html,2018-09-17,"1Center for Genomics and Biotechnology, Haixia Institute of Science and Technology, Fujian Provincial Key Laboratory of Haixia Applied Plant Systems Biology, Fujian Agriculture and Forestry University, 350002 Fuzhou, China.","Xu H, Yu Q, Shi Y, Hua X, Tang H, Yang L, Ming R, Zhang J",,,8.0,China +"30272209, 27789701, 22135291, 25332395",Rhea,0.982805073,Rhea,0.982805073,Entities of,0.594281514,4,"http://www.rhea-db.org, http://sparql.rhea-db.org/sparql","301, 301",,", ","http://web.archive.org/web/20221107173955/https://www.rhea-db.org/, no_wayback",2019-01-01,"Swiss-Prot Group, SIB Swiss Institute of Bioinformatics, CMU, 1 rue Michel-Servet, CH-1211 Geneva 4, Switzerland., Swiss-Prot Group, SIB Swiss Institute of Bioinformatics, CMU, 1 rue Michel-Servet, CH-1211 Geneva 4, Switzerland Anne.Morgat@sib.swiss., Chemoinformatics and Metabolism Team, European Bioinformatics Institute, Hinxton, Cambridge CB10 1SD, UK. rafael.alcantara@ebi.ac.uk, Swiss-Prot Group, SIB Swiss Institute of Bioinformatics, Geneva, CH-1206, Switzerland Genoscope-LABGeM, CEA, Evry, F-91057, France anne.morgat@isb-sib.ch.","Lombardot T, Morgat A, Axelsen KB, Aimo L, Hyka-Nouspikel N, Niknejad A, Ignatchenko A, Xenarios I, Coudert E, Redaschi N, Bridge A, Morgat A, Lombardot T, Axelsen KB, Aimo L, Niknejad A, Hyka-Nouspikel N, Coudert E, Pozzato M, Pagni M, Moretti S, Rosanoff S, Onwubiko J, Bougueleret L, Xenarios I, Redaschi N, Bridge A, Alcántara R, Axelsen KB, Morgat A, Belda E, Coudert E, Bridge A, Cao H, de Matos P, Ennis M, Turner S, Owen G, Bougueleret L, Xenarios I, Steinbeck C, Morgat A, Axelsen KB, Lombardot T, Alcántara R, Aimo L, Zerara M, Niknejad A, Belda E, Hyka-Nouspikel N, Coudert E, Redaschi N, Bougueleret L, Steinbeck C, Xenarios I, Bridge A",", , , ","State Secretariat for Education, Research and Innovation, , European Commission FP7, European Commission FP7, ",112.0,"Switzerland, Switzerland, Switzerland, France" +"30285109, 31942978, 23175614",LncRNADisease,0.963101439,LncRNADisease,0.963101439,non-coding RNA disease database,0.798822011,3,http://www.rnanut.net/lncrnadisease,301,,,http://web.archive.org/web/20220520125605/http://www.rnanut.net/lncrnadisease/,2020-01-01,"Shanghai Key Laboratory of Regulatory Biology, Institute of Biomedical Sciences, School of Life Sciences, East China Normal University, Shanghai 200241, China., Department of Biomedical Informatics, Department of Physiology and Pathophysiology, Center for Noncoding RNA Medicine, MOE Key Lab of Cardiovascular Sciences, School of Basic Medical Sciences, Peking University, 38 Xueyuan Rd, Beijing, 100191, China., Department of Biomedical Informatics, School of Basic Medical Sciences, Peking University, China.","Bao Z, Yang Z, Huang Z, Zhou Y, Cui Q, Dong D, Jia K, Gao Y, Shi J, Zhou Y, Zhou Y, Cui Q, Chen G, Wang Z, Wang D, Qiu C, Liu M, Chen X, Zhang Q, Yan G, Cui Q",", , ","National Natural Science Foundation of China, Special Project on Precision Medicine under the National Key R&D Program, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Key R&D Program, ",612.0,"China, China, China, China" +"30329070, 25392410",RaftProt,0.996259689,RaftProt,0.996259689,mammalian lipid raft proteome database,0.892292529,2,"http://lipid-raft-database.di.uq.edu.au/, http://raftprot.org","200, 301",Australia,"(-27.4975,152.9989), ","http://web.archive.org/web/20221017080901/http://lipid-raft-database.di.uq.edu.au/, http://web.archive.org/web/20221011174931/https://www.raftprot.org/",2019-01-01,"The University of Queensland Diamantina Institute, Faculty of Medicine, The University of Queensland, Translational Research Institute, Brisbane, QLD 4012, Australia., The University of Queensland Diamantina Institute, The University of Queensland, Translational Research Institute, Brisbane, QLD, Australia.","Mohamed A, Shah AD, Chen D, Hill MM, Shah A, Chen D, Boda AR, Foster LJ, Davis MJ, Hill MM",", ","Australian Research Council, Australian Research Council, National Breast Cancer Foundation",21.0,"Australia, Australia" +"30335166, 26433228",RPFdb,0.996379256,RPFdb,0.996379256,,0,2,"http://www.rpfdb.org, http://sysbio.sysu.edu.cn/rpfdb","301, HTTPConnectionPool(host='sysbio.sysu.edu.cn', port=80): Max retries exceeded with url: /rpfdb (Caused by ConnectTimeoutError(, 'Connection to sysbio.sysu.edu.cn timed out. (connect timeout=5)'))",,", ","no_wayback, no_wayback",2019-01-01,"State Key Laboratory of Ophthalmology, Zhongshan Ophthalmic Center, Sun Yat-sen University, Guangzhou 510060, China., State Key Laboratory of Ophthalmology, Zhongshan Ophthalmic Center, Sun Yat-sen University, Guangzhou 510060, China Scientific Center for Precision Medicine, Sun Yat-sen University, Guangzhou 510000, China.","Wang H, Yang L, Wang Y, Chen L, Li H, Xie Z, Xie SQ, Nie P, Wang Y, Wang H, Li H, Yang Z, Liu Y, Ren J, Xie Z",", ","National Natural Science Foundation of China, National Natural Science Foundation of China, NIAID NIH HHS",41.0,"China, China, China" +"30357347, 24247530",GDR,0.979074816,GDR,0.979074816,Genome Database for Rosaceae,0.827349126,2,http://www.rosaceae.org,302,,,http://web.archive.org/web/20221101075122/https://www.rosaceae.org/,2019-01-01,"Department of Horticulture, Washington State University, Pullman, WA 99164-6414, USA., Washington State University Tree Fruit Research and Extension Center, 1100 N. Western Ave, Wenatchee, WA 98801; Department of Horticulture, Washington State University, Johnson Hall, Pullman WA 99164 and Department of Computer Science, Saginaw Valley State University, University Center, MI 48710, USA.","Jung S, Lee T, Cheng CH, Buble K, Zheng P, Yu J, Humann J, Ficklin SP, Gasic K, Scott K, Frank M, Ru S, Hough H, Evans K, Peace C, Olmstead M, DeVetter LW, McFerson J, Coe M, Wegrzyn JL, Staton ME, Abbott AG, Main D, Evans K, Jung S, Lee T, Brutcher L, Cho I, Peace C, Main D",", ","National Institute of Food and Agriculture, National Institute of Food and Agriculture, ",74.0,"United States, United States" +"30357384, 21940398",BitterDB,0.998117387,BitterDB,0.998117387,,0,2,http://bitterdb.agri.huji.ac.il,301,,,no_wayback,2019-01-01,"The Institute of Biochemistry, Food and Nutrition, The Robert H Smith Faculty of Agriculture, Food and Environment, The Hebrew University, 76100 Rehovot, Israel., The Robert H Smith Faculty of Agriculture, Food and Environment, The Institute of Biochemistry, Food Science and Nutrition, The Hebrew University of Jerusalem, Rehovot, Israel.","Dagan-Wiener A, Di Pizio A, Nissim I, Bahia MS, Dubovski N, Margulis E, Niv MY, Wiener A, Shudler M, Levit A, Niv MY",", ","Israel Science Foundation, Israel Science Foundation, ",104.0,"Israel, Israel" +"30364951, 23093601",DoriC,0.991857886,DoriC,0.991857886,,0,2,"http://tubic.org/doric/, http://tubic.tju.edu.cn/doric","301, 308",China,", (40.0018,116.333)","http://web.archive.org/web/20220123185242/https://tubic.org/doric, no_wayback",2019-01-01,"Department of Physics, School of Science, Tianjin University, Tianjin 300072, China., Department of Physics, Tianjin University, Tianjin 300072, China. fgao@tju.edu.cn","Luo H, Gao F, Gao F, Luo H, Zhang CT",", ","National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, ",120.0,"China, China" +30364952,PED,0.992514034,PED,0.992514034,Plant Editosome Database,0.983514047,1,http://bigd.big.ac.cn/ped,301,,,http://web.archive.org/web/20210515052156/https://bigd.big.ac.cn/ped/,2019-01-01,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Li M, Xia L, Zhang Y, Niu G, Li M, Wang P, Zhang Y, Sang J, Zou D, Hu S, Hao L, Zhang Z",,"National Key Research & Development Program of China, 13th Five-year Informatization Plan of Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, National Programs for High Technology Research and Development, Youth Innovation Promotion Association of Chinese Academy of Science, International Partnership Program of the Chinese Academy of Sciences, National Programs for High Technology Research and Development, National Natural Science Foundation of China",13.0,China +"30364956, 24194601",HMDD,0.997899234,HMDD,0.997899234,Human microRNA Disease Database,0.991232157,2,http://www.cuilab.cn/hmdd,200,,,no_wayback,2019-01-01,"Department of Biomedical Informatics, Department of Physiology and Pathophysiology, Center for Noncoding RNA Medicine, MOE Key Lab of Cardiovascular Sciences, School of Basic Medical Sciences, Peking University, 38 Xueyuan Rd, Beijing 100191, China., Department of Cell Biology, School of Basic Medical Sciences, Peking University, 38 Xueyuan Road, Beijing 100191, China, Department of Biomedical Informatics, School of Basic Medical Sciences, Peking University, 38 Xueyuan Road, Beijing 100191, China, Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China and MOE Key Lab of Cardiovascular Sciences, Peking University, 38 Xueyuan Road, Beijing 100191, China.","Huang Z, Shi J, Gao Y, Cui C, Zhang S, Li J, Zhou Y, Cui Q, Li Y, Qiu C, Tu J, Geng B, Yang J, Jiang T, Cui Q",", ","National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Fundamental Research Funds for Central Universities of China, Special Project on Precision Medicine under the National Key R&D Program, ",521.0,"China, China, China, China, China" +"30371849, 23042674, 25378313",LNCipedia,0.997421205,LNCipedia,0.997421205,,0,3,http://lncipedia.org,301,,,http://web.archive.org/web/20221016231932/https://lncipedia.org/,2019-01-01,"Cancer Research Institute Ghent (CRIG), 9000 Ghent, Belgium., Center for Medical Genetics, Ghent University, 9000 Ghent, Belgium., Center for Medical Genetics, Ghent University, Ghent 9000, Belgium.","Volders PJ, Anckaert J, Verheggen K, Nuytens J, Martens L, Mestdagh P, Vandesompele J, Volders PJ, Helsens K, Wang X, Menten B, Martens L, Gevaert K, Vandesompele J, Mestdagh P, Volders PJ, Verheggen K, Menschaert G, Vandepoele K, Martens L, Vandesompele J, Mestdagh P",", , ","Ghent University, NIGMS NIH HHS, NIGMS NIH HHS, ",586.0,"Belgium, Belgium, Belgium" +"30380072, 25399422",LncRNA2Target,0.983380377,LncRNA2Target,0.983380377,,0,2,http://123.59.132.21/lncrna2target,"HTTPConnectionPool(host='123.59.132.21', port=80): Max retries exceeded with url: /lncrna2target (Caused by ConnectTimeoutError(, 'Connection to 123.59.132.21 timed out. (connect timeout=5)'))",,,no_wayback,2019-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China., School of Life Science and Technology, Harbin Institute of Technology, Harbin, Heilongjiang 150001, China.","Cheng L, Wang P, Tian R, Wang S, Guo Q, Luo M, Zhou W, Liu G, Jiang H, Jiang Q, Jiang Q, Wang J, Wu X, Ma R, Zhang T, Jin S, Han Z, Tan R, Peng J, Liu G, Li Y, Wang Y",", ","National Nature Science Foundation of China, National Science and Technology Major Project of China, National Nature Science Foundation of China, Natural Science Foundation of Heilongjiang Province, National Science and Technology Major Project of China, National Nature Science Foundation of China, National Nature Science Foundation of China, ",165.0,"China, China" +"30407529, 24265224, 22096232",BioSamples,0.893052816,BioSamples,0.893052816,BioSample Database,0.469798426,3,"http://www.ebi.ac.uk/biosamples, http://www.ebi.ac.uk/about/terms-of-use","301, 301","United Kingdom, United Kingdom","(52.1929,0.1256), (52.1929,0.1256)","no_wayback, http://web.archive.org/web/20221031210429/https://www.ebi.ac.uk/about/terms-of-use/",2019-01-01,"EMBL-EBI, Wellcome Genome Campus, Hinxton CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK., EMBL-EBI, the European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK.","Courtot M, Cherubin L, Faulconbridge A, Vaughan D, Green M, Richardson D, Harrison P, Whetzel PL, Parkinson H, Burdett T, Faulconbridge A, Burdett T, Brandizi M, Gostev M, Pereira R, Vasant D, Sarkans U, Brazma A, Parkinson H, Gostev M, Faulconbridge A, Brandizi M, Fernandez-Banet J, Sarkans U, Brazma A, Parkinson H",", , ","Medical Research Council, Medical Research Council, European Bank, Medical Research Council, Wellcome Trust, Wellcome Trust, Medical Research Council, Wellcome Trust, Biotechnology and Biological Sciences Research Council, European Commission FP7, European Commission FP7, Biotechnology and Biological Sciences Research Council",83.0, +"30419167, 33759118, 24451008",ZINClick,0.996718585,ZINClick,0.996718585,,0,3,http://www.ZINClick.org,200,,,no_wayback,2021-01-01,"Dipartimento di Scienze del Farmaco , Università degli Studi del Piemonte Orientale ""A. Avogadro"" , Largo Donegani 2 , 28100 Novara , Italy., Dipartimento di Scienze del Farmaco, Università degli Studi del Piemonte Orientale ""A. Avogadro"", Novara, Italy. alberto.massarotti@uniupo.it., Dipartimento di Scienze del Farmaco, Università degli Studi del Piemonte Orientale , ""A. Avogadro"", Largo Donegani 2, 28100 Novara, Italy.","Levré D, Arcisto C, Mercalli V, Massarotti A, Massarotti A, Massarotti A, Brunco A, Sorba G, Tron GC",", , ","Fondazione Cariplo, , ",9.0,"Italy, Italy, Italy" +"30423142, 24275495",miRBase,0.997651339,miRBase,0.997651339,,0,2,http://mirbase.org,301,United Kingdom,"(53.4696,-2.23662)",http://web.archive.org/web/20221107060446/https://www.mirbase.org/,2019-01-01,"School of Biological Sciences, Faculty of Biology, Medicine and Health, University of Manchester, Manchester M13 9PT, UK., Faculty of Life Sciences, University of Manchester, Manchester, M13 9PT, UK.","Kozomara A, Birgaoanu M, Griffiths-Jones S, Kozomara A, Griffiths-Jones S",", ","Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",3795.0, +"30445619, 33231677, 27924024",GTRD,0.998025298,GTRD,0.998025298,Gene Transcription Regulation Database,0.985109007,3,http://gtrd.biouml.org,200,,,http://web.archive.org/web/20221017050151/http://gtrd.biouml.org/,2021-01-01,"BIOSOFT.RU, LLC, Novosibirsk 630090, Russian Federation., BIOSOFT.RU, LLC, Novosibirsk 630090, Russian Federation., BIOSOFT.RU, LLC, Novosibirsk 630058, Russian Federation.","Yevshin I, Sharipov R, Kolmykov S, Kondrakhin Y, Kolpakov F, Kolmykov S, Yevshin I, Kulyashov M, Sharipov R, Kondrakhin Y, Makeev VJ, Kulakovskiy IV, Kel A, Kolpakov F, Yevshin I, Sharipov R, Valeev T, Kel A, Kolpakov F",", , ","Russian Foundation for Basic Research, Ministry of Science and Higher Education of the Russian Federation, Russian Science Foundation, Russian Science Foundation, ",182.0,"Russian Federation, Russian Federation, Russian Federation" +"30476227, 28077563, 25428363, 27980099",BioGRID,0.997427076,BioGRID,0.997427076,Biological General Repository for Interaction Datasets,0.975577229,4,"http://thebiogrid.org, http://orcs.thebiogrid.org","301, 301",United States,"(32.9473,-96.7028), (32.9482,-96.7297)","http://web.archive.org/web/20221101115316/https://thebiogrid.org/, no_wayback",2019-01-01,"nan, National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD20894, USA., Institute for Research in Immunology and Cancer, Université de Montréal, Montréal, Quebec H3C 3J7, Canada., Institute for Research in Immunology and Cancer, Université de Montréal, Montréal, Quebec H3T 1J4, Canada.","nan, Islamaj Dogan R, Kim S, Chatr-Aryamontri A, Chang CS, Oughtred R, Rust J, Wilbur WJ, Comeau DC, Dolinski K, Tyers M, Chatr-Aryamontri A, Breitkreutz BJ, Oughtred R, Boucher L, Heinicke S, Chen D, Stark C, Breitkreutz A, Kolas N, O'Donnell L, Reguly T, Nixon J, Ramage L, Winter A, Sellam A, Chang C, Hirschman J, Theesfeld C, Rust J, Livstone MS, Dolinski K, Tyers M, Chatr-Aryamontri A, Oughtred R, Boucher L, Rust J, Chang C, Kolas NK, O'Donnell L, Oster S, Theesfeld C, Sellam A, Stark C, Breitkreutz BJ, Dolinski K, Tyers M","nan, , , ","nan, Biotechnology and Biological Sciences Research Council, NIH HHS, Biotechnology and Biological Sciences Research Council, NHLBI NIH HHS, NIH HHS, NIH HHS, NHLBI NIH HHS, Biotechnology and Biological Sciences Research Council, NIH HHS, NIH HHS, NHLBI NIH HHS",946.0,"Canada, Canada, United States" +"30668832, 26553799",DASHR,0.997241557,DASHR,0.997241557,Database of small human noncoding RNAs,0.921087686,2,http://lisanwanglab.org/DASHRv2,301,United States,"(39.0438,-77.4874)",no_wayback,2019-03-01,"Penn Neurodegeneration Genomics Center, Department of Pathology and Laboratory Medicine., Department of Pathology and Laboratory Medicine, University of Pennsylvania, Philadelphia, PA 19104, USA Penn Institute for Biomedical Informatics, University of Pennsylvania, Philadelphia, PA 19104, USA.","Kuksa PP, Amlie-Wolf A, Katanić Ž, Valladares O, Wang LS, Leung YY, Leung YY, Kuksa PP, Amlie-Wolf A, Valladares O, Ungar LH, Kannan S, Gregory BD, Wang LS",", ","National Institute of General Medical Sciences, NIGMS NIH HHS, NIA NIH HHS, National Institute on Aging, National Institute on Aging, National Institute on Aging, National Institute on Aging, NIA NIH HHS, National Institute on Aging, NIA NIH HHS, NIA NIH HHS, NIA NIH HHS, NIA NIH HHS, NIGMS NIH HHS, NIA NIH HHS, NIGMS NIH HHS, NIA NIH HHS, NIA NIH HHS, NIA NIH HHS",66.0,"United States, United States" +"30689843, 22139939",ccPDB,0.997259557,ccPDB,0.997259557,,0,2,http://webs.iiitd.edu.in/raghava/ccpdb,301,India,"(28.6542,77.2373)",http://web.archive.org/web/20220119133001/https://webs.iiitd.edu.in/raghava/ccpdb/,2019-01-01,"Bioinformatics Center, CSIR-Institute of Microbial Technology, India., Bioinformatics Centre, Institute of Microbial Technology, Chandigarh, India.","Agrawal P, Patiyal S, Kumar R, Kumar V, Singh H, Raghav PK, Raghava GPS, Singh H, Chauhan JS, Gromiha MM, , Raghava GP",", ","Department of Science and Technology, Government of India, ",18.0,"India, India" +"30963485, 28049134",miRandb,0.993180573,miRandb,0.993180573,miRNA algorithmic network database,0.859572917,2,http://miRandb.ir,"HTTPConnectionPool(host='mirandb.ir', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20220516175332/http://mirandb.ir/,2019-01-01,"Bioinformatics Research Center, Mashhad University of Medical Sciences, Mashhad, Razavi Khorasan, Iran. aghaeibh@mums.ac.ir., Mashhad University of Medical Sciences, Faculty of Medicine, Department of Medical Biotechnology, Mashhad, Razavi Khorasan, Iran.","Aghaee-Bakhtiari SH, Aghaee-Bakhtiari SH, Arefian E, Lau P",", ",", ",4.0, +"31161214, 25348404",WDSPdb,0.994846165,WDSPdb,0.994846165,,0,2,"http://www.wdspdb.com/wdsp/, http://wu.scbb.pkusz.edu.cn/wdsp","200, HTTPConnectionPool(host='wu.scbb.pkusz.edu.cn', port=80): Max retries exceeded with url: /wdsp (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",China,"(22.5431,114.058), ","http://web.archive.org/web/20220726220902/http://www.wdspdb.com/wdsp/, http://web.archive.org/web/20181019003515/http://wu.scbb.pkusz.edu.cn:80/wdsp/",2019-11-01,"Lab of Computational Chemistry and Drug Design, State Key Laboratory of Chemical Oncogenomics, Peking University Shenzhen Graduate School, Shenzhen 518055, China., Lab of Computational Chemistry and Drug Design, Laboratory of Chemical Genomics, Peking University Shenzhen Graduate School, Shenzhen, 518055, P. R. China.","Ma J, An K, Zhou JB, Wu NS, Wang Y, Ye ZQ, Wu YD, Wang Y, Hu XJ, Zou XD, Wu XH, Ye ZQ, Wu YD",", ","Shenzhen Basic Research Program, Shenzhen Basic Research Program, National Natural Science Foundation of China, ",56.0,"China, China" +"31231515, 31559014, 31290545, 29527288, 27158451, 27158452",GXB,0.902687609,GXB,0.902687609,,0,6,http://vri1.gxbsidra.org/dm3/geneBrowser/list,200,Netherlands,"(52.3667,4.9)",http://web.archive.org/web/20220916102330/http://vri1.gxbsidra.org/dm3/geneBrowser/list,2019-03-13,"Systems Biology and Immunology Department, Sidra Medicine, Doha, Qatar., Systems Biology and Immunology, Sidra Medicine, Doha, Qatar., Sidra Medicine, Al Gharrafa Street Ar-Rayyan, Doha, Qatar., Tumor Biology, Immunology and Therapy section, Sidra Medical and Research Center, Doha, Qatar., Systems Biology Department, Sidra Medical and Research Center, Doha, Qatar., Systems Biology Department, Sidra Medical and Research Center, Doha, Qatar.","Bougarn S, Boughorbel S, Chaussabel D, Marr N, Bougarn S, Boughorbel S, Chaussabel D, Marr N, Huang SSY, Al Ali F, Boughorbel S, Toufiq M, Chaussabel D, Garand M, Roelands J, Decock J, Boughorbel S, Rinchai D, Maccalli C, Ceccarelli M, Black M, Print C, Chou J, Presnell S, Quinn C, Jithesh P, Syed N, Al Bader SBJ, Bedri S, Wang E, Marincola FM, Chaussabel D, Kuppen P, Miller LD, Bedognetti D, Hendrickx W, Rinchai D, Boughorbel S, Presnell S, Quinn C, Chaussabel D, Rinchai D, Boughorbel S, Presnell S, Quinn C, Chaussabel D",", , , , , ",", , Qatar National Research Fund, Qatar Foundation, , , ",24.0,"Qatar, Qatar, Qatar, Qatar, Qatar, Qatar" +31410491,ABCD,0.981157601,ABCD,0.981157601,for AntiBodies Chemically Defined,0.948119296,1,http://web.expasy.org/abcd,301,Switzerland,"(46.5184,6.6436)",no_wayback,2020-01-01,"Geneva Antibody Facility, Faculty of Medicine, University of Geneva, CH-1211 Geneva, Switzerland.","Lima WC, Gasteiger E, Marcatili P, Duek P, Bairoch A, Cosson P",,"Swiss National Science Foundation, Swiss National Science Foundation, ProCare Foundation",10.0,Switzerland +31500643,OPA1,0.989455819,OPA1,0.989455819,,0,1,http://www.lovd.nl/OPA1,301,,,no_wayback,2019-09-10,"Département d'Ophtalmologie, Centre Hospitalier Universitaire d'Angers, Angers, France.","Le Roux B, Lenaers G, Zanlonghi X, Amati-Bonneau P, Chabrun F, Foulonneau T, Caignard A, Leruez S, Gohier P, Procaccio V, Milea D, den Dunnen JT, Reynier P, Ferré M",,,16.0,France +"31524988, 30329098",LncBook,0.997623146,LncBook,0.997623146,,0,2,"http://lncrna.big.ac.cn/index.php/Main_Page, http://bigd.big.ac.cn/lncbook","301, 301",,", ","no_wayback, no_wayback",2019-09-01,"BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing, China., BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing 100101, China.","Ma L, Cao J, Liu L, Li Z, Shireen H, Pervaiz N, Batool F, Raza RZ, Zou D, Bao Y, Abbasi AA, Zhang Z, Ma L, Cao J, Liu L, Du Q, Li Z, Zou D, Bajic VB, Zhang Z",", ",", National Key Research and Development Program of China, King Abdullah University of Science and Technology, Chinese Academy of Sciences, National Key Research and Development Program of China, Chinese Academy of Sciences, Chinese Academy of Sciences, Chinese Academy of Sciences, National Natural Science Foundation of China",67.0,"China, China" +"31599330, 27987180, 24185698",LSD,0.993650754,LSD,0.993650754,leaf senescence database,0.934000885,3,http://bigd.big.ac.cn/lsd,301,,,no_wayback,2020-01-01,"Beijing Advanced Innovation Center for Tree Breeding by Molecular Design, Beijing Forestry University, Beijing 100083, China., State Key Laboratory of Protein and Plant Gene Research, College of Life Sciences, and Peking-Tsinghua Center for Life Sciences, Peking University, Beijing, 100871, China., College of Life Sciences, Peking University, Beijing 100871, People's Republic of China, The Key Laboratory of Protein and Plant Gene Research, Peking University, Beijing 100871, People's Republic of China, Center for Life Sciences, Peking University, Beijing 100871, People's Republic of China and Center for Bioinformatics, Peking University, Beijing 100871, People's Republic of China.","Li Z, Zhang Y, Zou D, Zhao Y, Wang HL, Zhang Y, Xia X, Luo J, Guo H, Zhang Z, Li Z, Zhao Y, Liu X, Jiang Z, Peng J, Jin J, Guo H, Luo J, Li Z, Zhao Y, Liu X, Peng J, Guo H, Luo J",", , ","Informatization Plan of Chinese Academy of Sciences, China Postdoctoral Science Foundation, China Postdoctoral Science Foundation, National Natural Science Foundation of China, Chinese Academy of Sciences, Chinese Academy of Sciences, National Key Research and Development Program of China, National Natural Science Foundation of China, Chinese Academy of Sciences, National Natural Science Foundation of China, , ",58.0,"China, China, China, China, China, China" +31608375,GSAD,0.995496213,GSAD,0.995496213,Genome Size in Asteraceae Database,0.987456696,1,http://www.asteraceaegenomesize.com,302,,,no_wayback,2019-01-01,"Institut Botànic de Barcelona (IBB, CSIC-ICUB), Passeig del migdia s/n, 08038 Barcelona, Catalonia, Spain.","Vitales D, Fernández P, Garnatje T, Garcia S",,"Ramón y Caja, Dirección General de Investigación Científica y Técnica, Generalitat de Catalunya, Dirección General de Investigación Científica y Técnica, Dirección General de Investigación Científica y Técnica",4.0,Spain +"31642470, 22067452, 24194605, 29069413",WormBase,0.99719429,WormBase,0.99719429,,0,4,http://wormbase.org,301,,,http://web.archive.org/web/20221109004545/https://wormbase.org/,2020-01-01,"Informatics and Bio-computing Platform, Ontario Institute for Cancer Research, Toronto, ON M5G0A3, Canada., Division of Biology 156-29, California Institute of Technology, Pasadena, CA 91125, USA. kyook@wormbase.org, Informatics and Bio-computing Platform, Ontario Institute for Cancer Research, Toronto, ON M5G0A3, Canada, Genome Sequencing Center, Washington University, School of Medicine, St Louis, MO 63108, USA, Division of Biology and Biological Engineering 156-29, California Institute of Technology, Pasadena, CA 91125, USA, European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK, Department of Genetics Campus, Washington University School of Medicine, St. Louis, MO 63110, USA, Genetics Unit, Department of Biochemistry, University of Oxford, Oxford OX1 3QU, UK, Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK and Howard Hughes Medical Institute, California Institute of Technology, Pasadena, CA 91125, USA., Division of Biology and Biological Engineering 156-29, California Institute of Technology, Pasadena, CA 91125, USA.","Harris TW, Arnaboldi V, Cain S, Chan J, Chen WJ, Cho J, Davis P, Gao S, Grove CA, Kishore R, Lee RYN, Muller HM, Nakamura C, Nuin P, Paulini M, Raciti D, Rodgers FH, Russell M, Schindelman G, Auken KV, Wang Q, Williams G, Wright AJ, Yook K, Howe KL, Schedl T, Stein L, Sternberg PW, Yook K, Harris TW, Bieri T, Cabunoc A, Chan J, Chen WJ, Davis P, de la Cruz N, Duong A, Fang R, Ganesan U, Grove C, Howe K, Kadam S, Kishore R, Lee R, Li Y, Muller HM, Nakamura C, Nash B, Ozersky P, Paulini M, Raciti D, Rangarajan A, Schindelman G, Shi X, Schwarz EM, Ann Tuli M, Van Auken K, Wang D, Wang X, Williams G, Hodgkin J, Berriman M, Durbin R, Kersey P, Spieth J, Stein L, Sternberg PW, Harris TW, Baran J, Bieri T, Cabunoc A, Chan J, Chen WJ, Davis P, Done J, Grove C, Howe K, Kishore R, Lee R, Li Y, Muller HM, Nakamura C, Ozersky P, Paulini M, Raciti D, Schindelman G, Tuli MA, Van Auken K, Wang D, Wang X, Williams G, Wong JD, Yook K, Schedl T, Hodgkin J, Berriman M, Kersey P, Spieth J, Stein L, Sternberg PW, Lee RYN, Howe KL, Harris TW, Arnaboldi V, Cain S, Chan J, Chen WJ, Davis P, Gao S, Grove C, Kishore R, Muller HM, Nakamura C, Nuin P, Paulini M, Raciti D, Rodgers F, Russell M, Schindelman G, Tuli MA, Van Auken K, Wang Q, Williams G, Wright A, Yook K, Berriman M, Kersey P, Schedl T, Stein L, Sternberg PW",", , , ","National Human Genome Research Institute, Biotechnology and Biological Sciences Research Council, Medical Research Council, NHGRI NIH HHS, Howard Hughes Medical Institute, NHGRI NIH HHS, NHGRI NIH HHS, Medical Research Council, Medical Research Council, NHGRI NIH HHS, Medical Research Council, NHGRI NIH HHS, Medical Research Council, NHGRI NIH HHS, NHGRI NIH HHS, Howard Hughes Medical Institute, NHGRI NIH HHS, NLM NIH HHS, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, Medical Research Council",435.0,"Canada, Canada, United States, United States, United States, United States, United States, United States" +"31642487, 27924043",AraPheno,0.992314994,AraPheno,0.992314994,,0,2,"http://arapheno.1001genomes.org, http://aragwas.1001genomes.org","302, 302",,", ","http://web.archive.org/web/20221104145444/https://arapheno.1001genomes.org/, http://web.archive.org/web/20221016200523/https://aragwas.1001genomes.org/",2020-01-01,"Machine Learning and Computational Biology Lab, Department of Biosystems Science and Engineering, ETH Zürich, Basel, Switzerland., Gregor Mendel Institute of Molecular Plant Biology, Vienna, Austria.","Togninalli M, Seren Ü, Freudenthal JA, Monroe JG, Meng D, Nordborg M, Weigel D, Borgwardt K, Korte A, Grimm DG, Seren Ü, Grimm D, Fitz J, Weigel D, Nordborg M, Borgwardt K, Korte A",", ","NSF, ",44.0,"Austria, Switzerland, Ethiopia" +"31647100, 26481361",BGD,0.996976018,BGD,0.996976018,Bovine Genome Database,0.979248871,2,http://bovinegenome.org,301,United States,"(30.2493,-81.5268)",http://web.archive.org/web/20210610054206/http://www.bovinegenome.org./,2020-01-01,"Division of Animal Sciences, University of Missouri, Columbia, MO 65211, USA., nan","Shamimuzzaman M, Le Tourneau JJ, Unni DR, Diesh CM, Triant DA, Walsh AT, Tayal A, Conant GC, Hagen DE, Elsik CG, nan",", nan","Seventh Framework Programme, National Institute of Food and Agriculture, nan",8.0,United States +"31647101, 26590260, 29126174, 24304892",miRTarBase,0.9938097,miRTarBase,0.9938097,,0,4,http://miRTarBase.cuhk.edu.cn,301,China,"(22.5559,114.0577)",no_wayback,2020-01-01,"School of Life and Health Sciences, The Chinese University of Hong Kong, Shenzhen, Longgang District, Shenzhen, Guangdong Province 518172, China., nan, Institute of Bioinformatics and Systems Biology, National Chiao Tung University, Hsinchu, 300, Taiwan., Institute of Bioinformatics and Systems Biology, National Chiao Tung University, Hsinchu 300, Taiwan, Institute of Genomics and Bioinformatics, National Chung Hsing University, Taichung 402, Taiwan, Department of Computer Science and Engineering, National Chung Hsing University, Taichung 402, Taiwan, Department of Biological Science and Technology, National Chiao Tung University, Hsinchu 300, Taiwan, Molecular Bioinformatics Center, National Chiao Tung University, Hsinchu 300, Taiwan, Graduate Department of Clinical Pharmacy, Taipei Medical University, Taipei 110, Taiwan, Institute of Molecular Medicine and Bioengineering, National Chiao Tung University, Hsinchu 300, Taiwan, Graduate Institute of Biomedical Informatics, Taipei Medical University, Taipei 110, Taiwan, Department of Obstetrics and Gynecology, Hsinchu Mackay Memorial Hospital, Hsinchu 300, Taiwan, Mackay Medicine, Nursing and Management College, Taipei 112, Taiwan, Department of Medicine, Mackay Medical College, New Taipei City 252, Taiwan, and Department of Biomedical Science and Environmental Biology, Kaohsiung Medical University, Kaohsiung 807, Taiwan.","Huang HY, Lin YC, Li J, Huang KY, Shrestha S, Hong HC, Tang Y, Chen YG, Jin CN, Yu Y, Xu JT, Li YM, Cai XX, Zhou ZY, Chen XH, Pei YY, Hu L, Su JJ, Cui SD, Wang F, Xie YY, Ding SY, Luo MF, Chou CH, Chang NW, Chen KW, Cheng YH, Wan XH, Hsu WL, Lee TY, Wei FX, Huang HD, nan, Chou CH, Shrestha S, Yang CD, Chang NW, Lin YL, Liao KW, Huang WC, Sun TH, Tu SJ, Lee WH, Chiew MY, Tai CS, Wei TY, Tsai TR, Huang HT, Wang CY, Wu HY, Ho SY, Chen PR, Chuang CH, Hsieh PJ, Wu YS, Chen WL, Li MJ, Wu YC, Huang XY, Ng FL, Buddhakosai W, Huang PC, Lan KC, Huang CY, Weng SL, Cheng YN, Liang C, Hsu WL, Huang HD, Hsu SD, Tseng YT, Shrestha S, Lin YL, Khaleel A, Chou CH, Chu CF, Huang HY, Lin CM, Ho SY, Jian TY, Lin FM, Chang TH, Weng SL, Liao KW, Liao IE, Liu CC, Huang HD",", nan, , ","Warshel Institute for Computational Biology, Shenzhen Ganghong Group Co., nan, , ",1704.0,"China, Hong Kong" +"31665425, 24214955",TFBSshape,0.972820997,TFBSshape,0.972820997,,0,2,http://tfbsshape.usc.edu,302,,,http://web.archive.org/web/20220418014019/https://tfbsshape.usc.edu/,2020-01-01,"Quantitative and Computational Biology, Departments of Biological Sciences, Chemistry, Physics & Astronomy, and Computer Science, University of Southern California, Los Angeles, CA 90089, USA., Molecular and Computational Biology Program, University of Southern California, Los Angeles, CA 90089, USA, Department of Biology, Technion - Israel Institute of Technology, Technion City, Haifa 32000, Israel, Centre for Molecular Medicine and Therapeutics, University of British Columbia, Vancouver, BC, Canada and Institute for Genome Sciences & Policy, Duke University, Durham, NC 27708, USA.","Chiu TP, Xin B, Markarian N, Wang Y, Rohs R, Yang L, Zhou T, Dror I, Mathelier A, Wasserman WW, Gordân R, Rohs R",", ","NIGMS NIH HHS, USC-Taiwan Postdoctoral Fellowship, National Institutes of Health, NIGMS NIH HHS, Rose Hills Foundation, NHGRI NIH HHS, National Institutes of Health, Human Frontier Science Program, National Institutes of Health, NHGRI NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS",82.0,"Canada, Israel, Israel, United States, United States, United States" +"31665441, 23650175, 27789705",CARD,0.996455212,CARD,0.996455212,Comprehensive Antibiotic Resistance Database,0.990965346,3,http://card.mcmaster.ca,301,Canada,"(43.2025,-79.9016)",http://web.archive.org/web/20221108223814/https://card.mcmaster.ca/,2020-01-01,"David Braley Centre for Antibiotic Discovery, McMaster University, Hamilton, Ontario, L8S 4K1, Canada., nan, M.G. DeGroote Institute for Infectious Disease Research, Department of Biochemistry and Biomedical Sciences, DeGroote School of Medicine, McMaster University, Hamilton, Ontario L8S 4K1, Canada.","Alcock BP, Raphenya AR, Lau TTY, Tsang KK, Bouchard M, Edalatmand A, Huynh W, Nguyen AV, Cheng AA, Liu S, Min SY, Miroshnichenko A, Tran HK, Werfalli RE, Nasir JA, Oloni M, Speicher DJ, Florescu A, Singh B, Faltyn M, Hernandez-Koutoucheva A, Sharma AN, Bordeleau E, Pawlowski AC, Zubyk HL, Dooley D, Griffiths E, Maguire F, Winsor GL, Beiko RG, Brinkman FSL, Hsiao WWL, Domselaar GV, McArthur AG, nan, Jia B, Raphenya AR, Alcock B, Waglechner N, Guo P, Tsang KK, Lago BA, Dave BM, Pereira S, Sharma AN, Doshi S, Courtot M, Lo R, Williams LE, Frye JG, Elsayegh T, Sardar D, Westman EL, Pawlowski AC, Johnson TA, Brinkman FS, Wright GD, McArthur AG",", nan, ","Genome Canada, Cisco Research Chair in Bioinformatics, Cisco Systems, Canadian Institutes of Health Research, Ontario Graduate Scholarship, nan, CIHR, Wellcome Trust",1421.0,"Canada, Canada" +"31665499, 29126285",ReMap,0.989697456,ReMap,0.989697456,,0,2,http://remap.univ-amu.fr,308,France,"(43.2951,5.3861)",http://web.archive.org/web/20221017005522/https://remap.univ-amu.fr/,2020-01-01,"Aix Marseille Univ, INSERM, TAGC, Marseille, France., INSERM, UMR1090 TAGC, Marseille F-13288, France.","Chèneby J, Ménétrier Z, Mestdagh M, Rosnet T, Douida A, Rhalloussi W, Bergon A, Lopez F, Ballester B, Chèneby J, Gheorghe M, Artufel M, Mathelier A, Ballester B",", ","French Ministry of Higher Education and Research, Institut National de la Santé et de la Recherche Médicale, ",129.0,"France, France" +"31670377, 24217916",NPInter,0.993997335,NPInter,0.993997335,,0,2,http://bigdata.ibp.ac.cn/npinter,302,,,wayback is down,2020-01-01,"Key Laboratory of RNA Biology, Center for Big Data Research in Health, Institute of Biophysics, Chinese Academy of Sciences, Beijing 100101, China., Laboratory of Noncoding RNA, Institute of Biophysics, Chinese Academy of Sciences, Beijing 100101, China, University of Chinese Academy of Sciences, Beijing 100049, China and Bioinformatics Research Group, Advanced Computing Research Laboratory, Institute of Computing Technology, Chinese Academy of Sciences, Beijing 100190, China.","Teng X, Chen X, Xue H, Tang Y, Zhang P, Kang Q, Hao Y, Chen R, Zhao Y, He S, Yuan J, Wu W, Xie C, Zhao G, Zhao Y, Chen R",", ","National Key R&D Program of China, National Natural Science Foundation of China, Chinese Academy of Sciences, National Key R&D Program of China, National Natural Science Foundation of China, ",99.0,"China, China, China, China" +"31680157, 27899563",ChimerDB,0.995721757,ChimerDB,0.995721757,,0,2,http://www.kobic.re.kr/chimerdb,301,,"(36.638,127.4838)",http://web.archive.org/web/20220817050312/http://www.kobic.re.kr/chimerdb/,2020-01-01,"Department of Bio-Information Science, Ewha Womans University, Seoul 03760, Republic of Korea., Department of Bio-Information Science, Ewha Womans University, Seoul 03760, Republic of Korea.","Jang YE, Jang I, Kim S, Cho S, Kim D, Kim K, Kim J, Hwang J, Kim S, Kim J, Kang J, Lee B, Lee S, Lee M, Lee K, Yu N, Jang I, Choi I, Kim P, Jang YE, Kim B, Kim S, Lee B, Kang J, Lee S",", ","National Research Foundation of Korea, KRIBB, National Research Foundation of Korea, ",54.0, +"31680159, 23193273, 27899657, 25378343",EPD,0.994761258,EPD,0.994761258,Eukaryotic Promoter Database,0.983197996,4,http://epd.epfl.ch,302,,,http://web.archive.org/web/20221109202152/https://epd.epfl.ch/,2020-01-01,"Swiss Institute of Bioinformatics (SIB), CH-1015 Lausanne, Switzerland., Swiss Institute of Bioinformatics (SIB), CH-1015 Lausanne, Switzerland., Swiss Institute of Bioinformatics (SIB), CH-1015 Lausanne, Switzerland rene.dreos@epfl.ch., Swiss Institute of Bioinformatics (SIB), CH-1015 Lausanne, Switzerland.","Meylan P, Dreos R, Ambrosini G, Groux R, Bucher P, Dreos R, Ambrosini G, Cavin Périer R, Bucher P, Dreos R, Ambrosini G, Groux R, Cavin Périer R, Bucher P, Dreos R, Ambrosini G, Périer RC, Bucher P",", , , ","Swiss Government, Swiss National Science Foundation, Swiss National Science Foundation, Swiss National Science Foundation",250.0,"Switzerland, Switzerland, Switzerland, Switzerland" +"31691799, 29082924",Norine,0.987097681,Norine,0.987097681,,0,2,http://bioinfo.cristal.univ-lille.fr/norine,302,,,no_wayback,2020-01-01,"Univ. Lille, CNRS, Centrale Lille, UMR 9189 - CRIStAL - Centre de Recherche en Informatique Signal et Automatique de Lille, F-59000 Lille, France., Univ Lille, CNRS, Centrale Lille, UMR 9189 - CRIStAL - Centre de Recherche en Informatique Signal et Automatique de Lille, F-59000 Lille, France.","Flissi A, Ricart E, Campart C, Chevalier M, Dufresne Y, Michalik J, Jacques P, Flahaut C, Lisacek F, Leclère V, Pupin M, Pupin M, Esmaeel Q, Flissi A, Dufresne Y, Jacques P, Leclère V",", ",", University of Lille 1, Inria-Lille Nord Europe, bilille plateform",32.0,"France, France" +"31691815, 31802127, 32486891, 29145629, 22012987, 24243840, 26087747",Reactome,0.799830675,Reactome,0.799830675,,0,7,http://reactome.org,301,United States,"(39.0438,-77.4874)",http://web.archive.org/web/20221108115924/https://reactome.org/,2020-06-02,"Ontario Institute for Cancer Research, Toronto, ON M5G0A3, Canada., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Cambridge, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK., Ontario Institute for Cancer Research, Toronto, ON, M5G0A3, Canada., European Bioinformatics Institute (EMBL-EBI), European Molecular Biology Laboratory, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK, Ontario Institute for Cancer Research, Toronto, ON M5G0A3, Canada, College of Pharmacy and Health Sciences, St. John's University, Queens, NY 11439, USA, NYU School of Medicine, New York, NY 10016, USA, Cold Spring Harbor Laboratory, Cold Spring Harbor, NY 11724, USA and Department of Molecular Genetics, University of Toronto, Toronto, ON M5S 1A1, Canada., Ontario Institute for Cancer Research, Toronto, Ontario, Canada.","Jassal B, Matthews L, Viteri G, Gong C, Lorente P, Fabregat A, Sidiropoulos K, Cook J, Gillespie M, Haw R, Loney F, May B, Milacic M, Rothfels K, Sevilla C, Shamovsky V, Shorser S, Varusai T, Weiser J, Wu G, Stein L, Hermjakob H, D'Eustachio P, Viteri G, Matthews L, Varusai T, Gillespie M, Milacic M, Cook J, Weiser J, Shorser S, Sidiropoulos K, Fabregat A, Haw R, Wu G, Stein L, D'Eustachio P, Hermjakob H, Varusai TM, Jupe S, Sevilla C, Matthews L, Gillespie M, Stein L, Wu G, D'Eustachio P, Metzakopian E, Hermjakob H, Fabregat A, Jupe S, Matthews L, Sidiropoulos K, Gillespie M, Garapati P, Haw R, Jassal B, Korninger F, May B, Milacic M, Roca CD, Rothfels K, Sevilla C, Shamovsky V, Shorser S, Varusai T, Viteri G, Weiser J, Wu G, Stein L, Hermjakob H, D'Eustachio P, Haw RA, Croft D, Yung CK, Ndegwa N, D'Eustachio P, Hermjakob H, Stein LD, Croft D, Mundo AF, Haw R, Milacic M, Weiser J, Wu G, Caudy M, Garapati P, Gillespie M, Kamdar MR, Jassal B, Jupe S, Matthews L, May B, Palatnik S, Rothfels K, Shamovsky V, Song H, Williams M, Birney E, Hermjakob H, Stein L, D'Eustachio P, McKay SJ, Weiser J",", , , , , , ","NCI NIH HHS, NHGRI NIH HHS, National Institutes of Health, National Institutes of Health, NHGRI NIH HHS, European Bioinformatics Institute, , NHGRI NIH HHS, NHGRI NIH HHS, European Bioinformatics Institute, Foundation for the National Institutes of Health, Medical Research Council, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS",2591.0,"Canada, Canada, Canada, Canada, Canada, United States, United States, United States" +31724711,SCOP,0.993221243,SCOP,0.993221243,Structural Classification of Proteins,0.824369984,1,http://scop.mrc-lmb.cam.ac.uk,200,,,no_wayback,2020-01-01,"MRC Laboratory of Molecular Biology, Francis Crick Avenue, Cambridge CB2 0QH, UK.","Andreeva A, Kulesha E, Gough J, Murzin AG",,"Medical Research Council, Biotechnology and Biological Sciences Research Council, Medical Research Council",44.0, +"31740966, 27515742",EnhancerAtlas,0.995407641,EnhancerAtlas,0.995407641,,0,2,http://www.enhanceratlas.org/indexv2.php,200,United States,"(39.3248,-76.6062)",no_wayback,2020-01-01,"The Wilmer Eye Institute, Johns Hopkins School of Medicine, Baltimore, MD 21231, USA., The Wilmer Eye Institute, Johns Hopkins School of Medicine, Baltimore, MD 21205, USA.","Gao T, Qian J, Gao T, He B, Liu S, Zhu H, Tan K, Qian J",", ","National Institutes of Health, National Institutes of Health, National Institutes of Health, National Institutes of Health, NEI NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NEI NIH HHS, NIGMS NIH HHS",159.0,"United States, United States" +"31979981, 27043825",CPAD,0.964463413,CPAD,0.964463413,Curated Protein Aggregation Database,0.924321791,2,http://web.iitm.ac.in/bioinfo2/cpad2/index.html,302,India,"(12.8996,80.2209)",no_wayback,2020-01-24,"Protein Bioinformatics Lab, Department of Biotechnology, Bhupat and Jyoti Mehta School of Biosciences, Indian Institute of Technology Madras, Chennai, India., Center for Advanced Studies in Crystallography and Biophysics and Bioinformatics Infrastructure Facility, University of Madras, Chennai, 600025, India.","Rawat P, Prabakaran R, Sakthivel R, Mary Thangakani A, Kumar S, Gromiha MM, Thangakani AM, Nagarajan R, Kumar S, Sakthivel R, Velmurugan D, Gromiha MM",", ",", ",18.0,"India, India" +"32016318, 22903802",VariBench,0.997050226,VariBench,0.997050226,,0,2,"http://structure.bmc.lu.se/VariBench/, http://structure.bmc.lu.se/VariBench","200, 301",Sweden,"(55.7037,13.1946), ","http://web.archive.org/web/20220704152214/http://structure.bmc.lu.se/VariBench/, http://web.archive.org/web/20220704152214/http://structure.bmc.lu.se/VariBench/",2020-01-01,"Department of Experimental Medical Science, BMC B13, Lund University, SE-22 184 Lund, Sweden., Institute of Biomedical Technology, University of Tampere, Tampere, Finland.","Sarkar A, Yang Y, Vihinen M, Sasidharan Nair P, Vihinen M",", ","Vetenskapsrådet, National Natural Science Foundation of China, Swedish Cancer Society, ",77.0,"Finland, Sweden" +"32248568, 27924022",FINDbase,0.996567488,FINDbase,0.996567488,,0,2,http://www.findbase.org,301,,,http://web.archive.org/web/20220819211510/https://findbase.org/,2020-04-14,"Department of Computer Engineering and Informatics, Faculty of Engineering, University of Patras, Patras, Greece., University of Patras, Faculty of Engineering, Department of Computer Engineering and Informatics, GR-26504, Patras, Greece.","Kounelis F, Kanterakis A, Kanavos A, Pandi MT, Kordou Z, Manusama O, Vonitsanos G, Katsila T, Tsermpini EE, Lauschke VM, Koromina M, van der Spek PJ, Patrinos GP, Viennas E, Komianou A, Mizzi C, Stojiljkovic M, Mitropoulou C, Muilu J, Vihinen M, Grypioti P, Papadaki S, Pavlidis C, Zukic B, Katsila T, van der Spek PJ, Pavlovic S, Tzimas G, Patrinos GP",", ","European Commission, European Commission, General Secretariat for Research and Technology, ",11.0,"Greece, Greece" +32257241,CGDB,0.994118055,CGDB,0.994118055,Coriander Genomics Database,0.98100695,1,http://cgdb.bio2db.com,200,,,no_wayback,2020-04-01,"1Center for Genomics and Biocomputing/College of Life Sciences, North China University of Science and Technology, Tangshan, Hebei 063210 China.","Song X, Nie F, Chen W, Ma X, Gong K, Yang Q, Wang J, Li N, Sun P, Pei Q, Yu T, Hu J, Li X, Wu T, Feng S, Li XQ, Wang X",,China-Hebei 100 Scholars Supporting Project to,7.0,"China, China" +"32472030, 25841437",dbPSP,0.99289383,dbPSP,0.99289383,prokaryotes,0.651427448,2,http://dbpsp.biocuckoo.cn,200,,,http://web.archive.org/web/20220619105238/http://dbpsp.biocuckoo.cn/,2020-05-29,"Key Laboratory of Molecular Biophysics of Ministry of Education, Hubei Bioinformatics and Molecular Imaging Key Laboratory, Center for Artificial Intelligence Biology, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei, 430074, China., School of Life Sciences, University of Science and Technology of China, Hefei 230027, China, Department of Biomedical Engineering, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China, and State Key Laboratory of Biocontrol, School of Life Sciences, School of Advanced Computing, Sun Yat-sen University, Guangzhou 510275, China School of Life Sciences, University of Science and Technology of China, Hefei 230027, China, Department of Biomedical Engineering, College of Life Science and Technology, Huazhong University of Science and Technology, Wuhan, Hubei 430074, China, and State Key Laboratory of Biocontrol, School of Life Sciences, School of Advanced Computing, Sun Yat-sen University, Guangzhou 510275, China.","Shi Y, Zhang Y, Lin S, Wang C, Zhou J, Peng D, Xue Y, Pan Z, Wang B, Zhang Y, Wang Y, Ullah S, Jian R, Liu Z, Xue Y",", ",", ",14.0,"China, China, China, China, China, China, China, China, China" +"32558264, 28296894, 27450113",PDB,0.993884663,PDB,0.993884663,Worldwide Protein Data Bank organization,0.829005563,3,http://pdb101.rcsb.org,301,,,http://web.archive.org/web/20221102193052/https://pdb101.rcsb.org/,2020-06-17,"Research Collaboratory for Structural Bioinformatics Protein Data Bank, Rutgers, The State University of New Jersey, Piscataway, New Jersey, USA., Bioinformatics and Medical Informatics, San Diego State University, San Diego, California, United States of America., Research Collaboratory for Structural Bioinformatics Protein Data Bank, Department of Chemistry and Chemical Biology, Center for Integrative Proteomics Research, Institute for Quantitative Biomedicine, Rutgers, The State University of New Jersey, 174 Frelinghuysen Road, Piscataway, NJ 08854, USA. Electronic address: berman@rcsb.rutgers.edu.","Goodsell DS, Zardecki C, Berman HM, Burley SK, Bhattacharya R, Rose PW, Burley SK, Prlić A, Berman HM, Burley SK, Kleywegt GJ, Markley JL, Nakamura H, Velankar S",", , ","NIGMS NIH HHS, United States Department of Energy, National Science Foundation, National Science Foundation, National Institutes of Health, National Science Foundation, NIGMS, EU, Biotechnology and Biological Sciences Research Council, MRC, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, Wellcome Trust, Biotechnology and Biological Sciences Research Council, NIH, Biotechnology and Biological Sciences Research Council, DOE, Biotechnology and Biological Sciences Research Council, Medical Research Council, NIGMS NIH HHS, NSF",41.0,"Jersey, Jersey, Jersey, United States, United States, United States" +"32728249, 31713622, 29126249, 23193274",ENCODE,0.986879021,ENCODE,0.986879021,The Encyclopedia of DNA Elements,0.854699457,4,"http://www.encodeproject.org, http://screen.encodeproject.org","301, 200",United States,"(45.5235,-122.676), ","http://web.archive.org/web/20221109210422/https://www.encodeproject.org/, no_wayback",2020-07-29,"None, Department of Genetics, Stanford University, Stanford, CA 94305-5477, USA., Department of Genetics, Stanford University, Stanford, CA 94305-5120, USA., Center for Biomolecular Science and Engineering, School of Engineering, University of California Santa Cruz (UCSC), Santa Cruz, CA 95064, USA. kate@soe.ucsc.edu",", Moore JE, Purcaro MJ, Pratt HE, Epstein CB, Shoresh N, Adrian J, Kawli T, Davis CA, Dobin A, Kaul R, Halow J, Van Nostrand EL, Freese P, Gorkin DU, Shen Y, He Y, Mackiewicz M, Pauli-Behn F, Williams BA, Mortazavi A, Keller CA, Zhang XO, Elhajjajy SI, Huey J, Dickel DE, Snetkova V, Wei X, Wang X, Rivera-Mulia JC, Rozowsky J, Zhang J, Chhetri SB, Zhang J, Victorsen A, White KP, Visel A, Yeo GW, Burge CB, Lécuyer E, Gilbert DM, Dekker J, Rinn J, Mendenhall EM, Ecker JR, Kellis M, Klein RJ, Noble WS, Kundaje A, Guigó R, Farnham PJ, Cherry JM, Myers RM, Ren B, Graveley BR, Gerstein MB, Pennacchio LA, Snyder MP, Bernstein BE, Wold B, Hardison RC, Gingeras TR, Stamatoyannopoulos JA, Weng Z, Luo Y, Hitz BC, Gabdank I, Hilton JA, Kagda MS, Lam B, Myers Z, Sud P, Jou J, Lin K, Baymuradov UK, Graham K, Litton C, Miyasato SR, Strattan JS, Jolanki O, Lee JW, Tanaka FY, Adenekan P, O'Neill E, Cherry JM, Davis CA, Hitz BC, Sloan CA, Chan ET, Davidson JM, Gabdank I, Hilton JA, Jain K, Baymuradov UK, Narayanan AK, Onate KC, Graham K, Miyasato SR, Dreszer TR, Strattan JS, Jolanki O, Tanaka FY, Cherry JM, Rosenbloom KR, Sloan CA, Malladi VS, Dreszer TR, Learned K, Kirkup VM, Wong MC, Maddren M, Fang R, Heitner SG, Lee BT, Barber GP, Harte RA, Diekhans M, Long JC, Wilder SP, Zweig AS, Karolchik D, Kuhn RM, Haussler D, Kent WJ",", , , ","Biotechnology and Biological Sciences Research Council, NIDDK NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Medical Research Council, NCI NIH HHS, NCI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NCI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NIDDK NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, NIDDK NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, National Institutes of Health, National Human Genome Research Institute, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Wellcome Trust, NHGRI NIH HHS, Howard Hughes Medical Institute",1412.0,"United States, United States, United States" +32786900,BCE,0.897705595,BCE,0.897705595,bioactive conformational,0.562582284,1,http://mmb.irbbarcelona.org/BCE,301,Spain,"(41.387,2.1701)",no_wayback,2020-09-01,"Institute for Research in Biomedicine (IRB Barcelona). The Barcelona Institute of Science and Technology (BIST). Barcelona 08028, Spain.","Zivanovic S, Bayarri G, Colizzi F, Moreno D, Gelpí JL, Soliva R, Hospital A, Orozco M",,"H2020 Marie Sklodowska-Curie Actions, Generalitat de Catalunya, Centre for Industrial Technological Development, Fundaci?n Bot?n, Instituto Nacional de Bioinform?tica, Ministerio de Ciencia e Innovaci?n",2.0,Spain +"32898258, 25708775",EXPath,0.997518301,EXPath,0.997518301,,0,2,http://EXPath.itps.ncku.edu.tw,200,,"(22.9917,120.2148)",http://web.archive.org/web/20220615135601/http://expath.itps.ncku.edu.tw/,2020-10-01,"Department of Life Sciences, National Cheng Kung University, Tainan 701, Taiwan., None","Tseng KC, Li GZ, Hung YC, Chow CN, Wu NY, Chien YY, Zheng HQ, Lee TY, Kuo PL, Chang SB, Chang WC, Chien CH, Chow CN, Wu NY, Chiang-Hsieh YF, Hou PF, Chang WC",", ","Academia Sinica, Ministry of Science and Technology, Ministry of Science and Technology, Innovative Translational Agricultural Research Program, ",19.0, +33021634,NDB,0.968252078,NDB,0.968252078,Nucleome Data Bank,0.928795207,1,http://ndb.rice.edu,302,,,wayback is down,2021-01-01,"Center for Theoretical Biological Physics, Rice University, Houston, TX 77005, USA.","Contessoto VG, Cheng RR, Hajitaheri A, Dodero-Rojas E, Mello MF, Lieberman-Aiden E, Wolynes PG, Di Pierro M, Onuchic JN",,"Fundação de Amparo ã Pesquisa do Estado de São Paulo, Welch Foundation, Fundação de Amparo ã Pesquisa do Estado de São Paulo, USDA Agriculture and Food Research Initiative, National Science Foundation, NHGRI NIH HHS, Welch Foundation, National Science Foundation, Welch Foundation, NIH, Robert A. Welch Postdoctoral Fellowship, Cancer Prevention and Research Institute of Texas",8.0,United States +"33051671, 26615197, 23193280",StreptomeDB,0.996224344,StreptomeDB,0.996224344,,0,3,http://www.pharmbioinf.uni-freiburg.de/streptomedb,301,Germany,"(47.9908,7.8578)",no_wayback,2021-01-01,"Institute of Pharmaceutical Sciences, Albert-Ludwigs-Universität Freiburg, Hermann-Herder-Straße 9, D-79104 Freiburg, Germany., Pharmaceutical Bioinformatics, Institute of Pharmaceutical Sciences, Albert-Ludwigs-University, Hermann-Herder-Strasse 9, Freiburg 79104, Germany., Pharmaceutical Bioinformatics, Institute of Pharmaceutical Sciences, Albert-Ludwigs-University, D-79104 Freiburg, Germany.","Moumbock AFA, Gao M, Qaseem A, Li J, Kirchner PA, Ndingkokhar B, Bekono BD, Simoben CV, Babiaka SB, Malange YI, Sauter F, Zierep P, Ntie-Kang F, Günther S, Klementz D, Döring K, Lucas X, Telukunta KK, Erxleben A, Deubel D, Erber A, Santillana I, Thomas OS, Bechthold A, Günther S, Lucas X, Senger C, Erxleben A, Grüning BA, Döring K, Mosch J, Flemming S, Günther S",", , ","Baden-Württemberg Foundation, German Academic Exchange Service, German Academic Exchange Service, German Academic Exchange Service, German Research Foundation, China Scholarship Council, , ",89.0,"Germany, Germany, Germany" +"33079992, 23476021",PLncDB,0.997522697,PLncDB,0.997522697,Plant long non-coding RNA database,0.98458527,2,http://plncdb.tobaccodb.org,301,,"(34.7578,113.6486)",no_wayback,2021-01-01,"China Tobacco Gene Research Center, Zhengzhou Tobacco Research Institute of CNTC, Zhengzhou 450001, China., Laboratory of Plant Molecular Biology, Rockefeller University, New York, NY 10065, USA.","Jin J, Lu P, Xu Y, Li Z, Yu S, Liu J, Wang H, Chua NH, Cao P, Jin J, Liu J, Wang H, Wong L, Chua NH",", ","Zhengzhou Tobacco Research Institute, China Association for Science and Technology, Zhengzhou Tobacco Research Institute, Zhengzhou Tobacco Research Institute, National Research Foundation of Singapore, NIGMS NIH HHS, NIGMS NIH HHS",99.0,"China, China, United States" +"33084874, 27799467, 22075992",OGEE,0.996816039,OGEE,0.996816039,,0,3,http://v3.ogee.info,301,,,no_wayback,2021-01-01,"Key Laboratory of Molecular Biophysics of the Ministry of Education, Hubei Key Laboratory of Bioinformatics and Molecular-imaging, Center for Artificial Biology, Department of Bioinformatics and Systems Biology, College of Life Science and Technology, Huazhong University of Science and Technology (HUST), 430074 Wuhan, Hubei, China., Key Laboratory of Molecular Biophysics of the Ministry of Education, Hubei Key Laboratory of Bioinformatics and Molecular-imaging, Department of Bioinformatics and Systems Biology, College of Life Science and Technology, Huazhong University of Science and Technology (HUST), 430074 Wuhan, Hubei, China weihuachen@hust.edu.cn., European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany.","Gurumayum S, Jiang P, Hao X, Campos TL, Young ND, Korhonen PK, Gasser RB, Bork P, Zhao XM, He LJ, Chen WH, Chen WH, Lu G, Chen X, Zhao XM, Bork P, Chen WH, Minguez P, Lercher MJ, Bork P",", , ","Australian Research Council, Shanghai Municipal Science and Technology, National Key Research and Development Program of China, Natural Science Foundation of Shanghai, NHMRC, National Key Research and Development Program of China, , ",169.0,"China, China, Germany" +"33084889, 26496949",KLIFS,0.998473823,KLIFS,0.998473823,inase-L,0.635523836,2,http://klifs.net,301,,,http://web.archive.org/web/20220818173712/https://klifs.net/,2021-01-01,"Division of Medicinal Chemistry, Amsterdam Institute for Molecules, Medicines and Systems (AIMMS), Vrije Universiteit Amsterdam, De Boelelaan 1108, 1081 HZ Amsterdam, The Netherlands., Division of Medicinal Chemistry, Amsterdam Institute for Molecules, Medicines and Systems (AIMMS), Vrije Universiteit Amsterdam, Amsterdam, 1081 HV, The Netherlands.","Kanev GK, de Graaf C, Westerman BA, de Esch IJP, Kooistra AJ, Kooistra AJ, Kanev GK, van Linden OP, Leurs R, de Esch IJ, de Graaf C",", ","The Dutch Cancer Society, Cancer Center Amsterdam, Brain Tumour Charity, The Dutch Cancer Society, The Brain Tumour Charity, Dutch Research Council (NWO)",57.0,"Netherlands, Netherlands" +"33104772, 24163257",GXD,0.998189569,GXD,0.998189569,Expression Database,0.871181101,2,"http://www.ebi.ac.uk/arrayexpress/, http://www.ncbi.nlm.nih.gov/geo","301, 301",,", ","http://web.archive.org/web/20220930031306/https://www.ebi.ac.uk/arrayexpress/, http://web.archive.org/web/20221103131023/https://www.ncbi.nlm.nih.gov/geo/",2021-01-01,"The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA., The Jackson Laboratory, 600 Main Street, Bar Harbor, ME 04609, USA.","Baldarelli RM, Smith CM, Finger JH, Hayamizu TF, McCright IJ, Xu J, Shaw DR, Beal JS, Blodgett O, Campbell J, Corbani LE, Frost PJ, Giannatto SC, Miers DB, Kadin JA, Richardson JE, Ringwald M, Smith CM, Finger JH, Hayamizu TF, McCright IJ, Xu J, Berghout J, Campbell J, Corbani LE, Forthofer KL, Frost PJ, Miers D, Shaw DR, Stone KR, Eppig JT, Kadin JA, Richardson JE, Ringwald M",", ","NICHD NIH HHS, National Institutes of Health, NICHD NIH HHS, NCI NIH HHS, NICHD NIH HHS",72.0,"United States, United States" +"33104797, 27587585",REDIportal,0.996106863,REDIportal,0.996106863,,0,2,http://srv00.recas.ba.infn.it/atlas/index.html,200,,,http://web.archive.org/web/20221019031151/http://srv00.recas.ba.infn.it/atlas/index.html,2021-01-01,"Department of Biosciences, Biotechnologies and Biopharmaceutics (DBBB), University of Bari, Via Orabona 4, 70125 Bari, Italy., Department of Biosciences, Biotechnology and Biopharmaceutics, University of Bari, Via Orabona 4, 70126 Bari, Italy ernesto.picardi@uniba.it.","Mansi L, Tangaro MA, Lo Giudice C, Flati T, Kopel E, Schaffer AA, Castrignanò T, Chillemi G, Pesole G, Picardi E, Picardi E, D'Erchia AM, Lo Giudice C, Pesole G",", ","PRACE call 18, Elixir ITA, PRACE call 15, CCR NIH HHS, NIMH NIH HHS, NHLBI NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIMH NIH HHS, NIDA NIH HHS, NCI NIH HHS",129.0,"Italy, Italy" +"33112702, 25233092",LncRBase,0.9981637,LncRBase,0.9981637,,0,2,http://dibresources.jcbose.ac.in/zhumur/lncrbase2,301,,,http://web.archive.org/web/20220124115109/http://dibresources.jcbose.ac.in/zhumur/lncrbase2/,2020-10-28,"Division of Bioinformatics, Bose Institute, Kolkata, India., Bioinformatics Centre, Bose Institute, Kolkata, India.","Das T, Deb A, Parida S, Mondal S, Khatua S, Ghosh Z, Chakraborty S, Deb A, Maji RK, Saha S, Ghosh Z",", ","Council of Scientific and Industrial Research, Science and Engineering Research Board, ",35.0,"India, India" +"33137183, 27799466",IMG/VR,0.986604303,IMG/VR,0.986604303,,0,2,"http://img.jgi.doe.gov/vr, http://genome.jgi.doe.gov/portal/IMG_VR","301, 301",,", ","http://web.archive.org/web/20220503180121/https://img.jgi.doe.gov/vr/, no_wayback",2021-01-01,"DOE Joint Genome Institute, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA., Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA.","Roux S, Páez-Espino D, Chen IA, Palaniappan K, Ratner A, Chu K, Reddy TBK, Nayfach S, Schulz F, Call L, Neches RY, Woyke T, Ivanova NN, Eloe-Fadrosh EA, Kyrpides NC, Paez-Espino D, Chen IA, Palaniappan K, Ratner A, Chu K, Szeto E, Pillay M, Huang J, Markowitz VM, Nielsen T, Huntemann M, K Reddy TB, Pavlopoulos GA, Sullivan MB, Campbell BJ, Chen F, McMahon K, Hallam SJ, Denef V, Cavicchioli R, Caffrey SM, Streit WR, Webster J, Handley KM, Salekdeh GH, Tsesmetzis N, Setubal JC, Pope PB, Liu WT, Rivers AR, Ivanova NN, Kyrpides NC",", ","U.S. Department of Energy, ",105.0,"United States, United States" +"33175131, 22144203",deepBase,0.993842721,deepBase,0.993842721,,0,2,http://rna.sysu.edu.cn/deepbase3/index.html,302,China,"(40.0018,116.333)",http://web.archive.org/web/20220615144253/https://rna.sysu.edu.cn/deepbase3/index.html,2021-01-01,"MOE Key Laboratory of Gene Function and Regulation, State Key Laboratory for Biocontrol, The Fifth Affiliated Hospital, Sun Yat-sen University, Guangzhou 510275, China., Key Laboratory of Gene Engineering of the Ministry of Education, State Key Laboratory for Biocontrol, Sun Yat-sen University, Guangzhou, P.R. China.","Xie F, Liu S, Wang J, Xuan J, Zhang X, Qu L, Zheng L, Yang J, Yang JH, Qu LH",", ","Guangzhou city, Guangzhou city, National Natural Science Foundation of China, National Natural Science Foundation of China, National Key Research and Development Program of China, National Key Research and Development Program of China, National Natural Science Foundation of China, Youth science and technology innovation talent of guangdong TeZhi, Fundamental Research Funds for the Central Universities, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Guangdong Province Key Laboratory of Computational Science, Fundamental Research Funds for the Central Universities, Guangdong Province Computational Science Innovative Research Team, Science and Technology New Star in ZhuJiang Guangzhou city, Guangdong Province, Guangdong Province, National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, ",22.0,"China, China" +"33186582, 26586798",CPPsite,0.992566049,CPPsite,0.992566049,,0,2,http://webs.iiitd.edu.in/raghava/cppsite,301,India,"(28.6542,77.2373)",no_wayback,2020-11-10,"Department of Hepatitis and AIDS, Pasteur Institute of Iran, Tehran, Iran; Iranian Comprehensive Hemophilia Care Center, Tehran, Iran., Bioinformatics Centre, CSIR-Institute of Microbial Technology, Chandigarh 160036, India.","Kardani K, Bolhassani A, Agrawal P, Bhalla S, Usmani SS, Singh S, Chaudhary K, Raghava GP, Gautam A",", ",", ",87.0,India +"33206959, 24234451, 22121220",IntAct,0.997128367,IntAct,0.997128367,,0,3,http://www.ebi.ac.uk/intact,301,,,http://web.archive.org/web/20221030125040/https://www.ebi.ac.uk/intact/,2020-01-01,"European Molecular Biology Laboratory, Wellcome Genome Campus, European Bioinformatics Institute (EMBL-EBI), Hinxton, CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK, School of Animal and Comparative Biomedical Sciences, The University of Arizona, Tucson, AZ 85721-0036, USA, Swiss-Prot Group, SIB Swiss Institute of Bioinformatics, CMU, 1 Rue Michel-Servet, CH-1211 Geneva 4, Switzerland, Department of Biology, University of Rome Tor Vergata, Rome 00133, Italy, Ontario Cancer Institute, the Campbell Family Institute for Cancer Research, and Techna Institute, University Health Network, Toronto, Ontario M5G 0A3, Canada, Cardiovascular Gene Annotation Initiative, Centre for Cardiovascular Genetics, Institute of Cardiovascular Science, University College London, London WC1E 6BT, UK, Centre for Microbial Diseases and Immunity Research, University of British Columbia, Vancouver, British Columbia V6T 1Z4 Canada, Mechanobiology Institute, National University of Singapore, T-Lab #05-01, 5A Engineering Drive 1, Singapore 117411, Singapore, Department of Cancer Research and Molecular Medicine, Norwegian University of Science and Technology, 7489 Trondheim, Norway, Research Institute IRCSS ""Fondazione Santa Lucia"", Rome 00179, Italy, Molecular Connections Pvt. Ltd., Bangalore 560 004, India, Institut de Biologie et Chimie des Protéines, Unité Mixte de Recherche 5086, Centre National de la Recherche Scientifique, Université Lyon 1, Lyon, France and Structural and Computational Biology Unit, European Molecular Biology Laboratory (EMBL), Meyerhofstrasse 1, D-69117 Heidelberg, Germany., EMBL Outstation, European Bioinformatics Institute, Wellcome Trust Genome Campus Hinxton, Cambridge CB10 1SD, UK.","Perfetto L, Pastrello C, Del-Toro N, Duesbury M, Iannuccelli M, Kotlyar M, Licata L, Meldal B, Panneerselvam K, Panni S, Rahimzadeh N, Ricard-Blum S, Salwinski L, Shrivastava A, Cesareni G, Pellegrini M, Orchard S, Jurisica I, Hermjakob H, Porras P, Orchard S, Ammari M, Aranda B, Breuza L, Briganti L, Broackes-Carter F, Campbell NH, Chavali G, Chen C, del-Toro N, Duesbury M, Dumousseau M, Galeota E, Hinz U, Iannuccelli M, Jagannathan S, Jimenez R, Khadake J, Lagreid A, Licata L, Lovering RC, Meldal B, Melidoni AN, Milagros M, Peluso D, Perfetto L, Porras P, Raghunath A, Ricard-Blum S, Roechert B, Stutz A, Tognolli M, van Roey K, Cesareni G, Hermjakob H, Kerrien S, Aranda B, Breuza L, Bridge A, Broackes-Carter F, Chen C, Duesbury M, Dumousseau M, Feuermann M, Hinz U, Jandrasits C, Jimenez RC, Khadake J, Mahadevan U, Masson P, Pedruzzi I, Pfeiffenberger E, Porras P, Raghunath A, Roechert B, Orchard S, Hermjakob H",", , ","Institute Français de la Bioinformatique, Associazione Italiana per la Ricerca sul Cancro, European Molecular Biology Laboratory, EMBL core funding, Open Targets, Fondation pour la Recherche Médicale, National Institute of General Medical Sciences, National Institute of General Medical Sciences, European Research Council, Wellcome Trust, National Heart, Lung, and Blood Institute, National Human Genome Research Institute, National Institute of Diabetes and Digestive and Kidney Diseases, Natural Sciences and Engineering Research Council of Canada, Ontario Research Fund, National Institute on Aging, Buchan Foundation, International Business Machines Corporation, Canada Foundation for Innovation, National Cancer Institute, National Eye Institute, European Research Council, National Institute of Allergy and Infectious Diseases, National Institute of Mental Health, European Research Council, Telethon, NHLBI NIH HHS, British Heart Foundation, European Commission FP7, European Commission FP7, European Commission FP7",1305.0,"Canada, Canada, Switzerland, Germany, France, India, Italy, Italy, Norway, Singapore, Singapore, Singapore, United States" +"33211879, 23193272, 25361974, 30357387",ArrayExpress,0.994881332,ArrayExpress,0.994881332,,0,4,"http://www.ebi.ac.uk/arrayexpress, http://www.ebi.ac.uk/biostudies","301, 301","United Kingdom, United Kingdom","(52.1929,0.1256), (52.1929,0.1256)","http://web.archive.org/web/20220930031306/https://www.ebi.ac.uk/arrayexpress/, no_wayback",2021-01-01,"European Molecular Biology Laboratory, European Bioinformatics Institute, EMBL-EBI, Wellcome Trust Genome Campus, Hinxton CB10 1SD, UK., Functional Genomics Team, EMBL-EBI, Wellcome Trust Genome Campus, Hinxton CB10 1SD, UK. gabry@ebi.ac.uk, European Molecular Biology Laboratory, European Bioinformatics Institute, EMBL-EBI, Wellcome Trust Genome Campus, Hinxton, CB10 1SD, UK., European Molecular Biology Laboratory, European Bioinformatics Institute, EMBL-EBI, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK.","Sarkans U, Füllgrabe A, Ali A, Athar A, Behrangi E, Diaz N, Fexova S, George N, Iqbal H, Kurri S, Munoz J, Rada J, Papatheodorou I, Brazma A, Rustici G, Kolesnikov N, Brandizi M, Burdett T, Dylag M, Emam I, Farne A, Hastings E, Ison J, Keays M, Kurbatova N, Malone J, Mani R, Mupo A, Pedro Pereira R, Pilicheva E, Rung J, Sharma A, Tang YA, Ternent T, Tikhonov A, Welter D, Williams E, Brazma A, Parkinson H, Sarkans U, Kolesnikov N, Hastings E, Keays M, Melnichuk O, Tang YA, Williams E, Dylag M, Kurbatova N, Brandizi M, Burdett T, Megy K, Pilicheva E, Rustici G, Tikhonov A, Parkinson H, Petryszak R, Sarkans U, Brazma A, Athar A, Füllgrabe A, George N, Iqbal H, Huerta L, Ali A, Snow C, Fonseca NA, Petryszak R, Papatheodorou I, Sarkans U, Brazma A",", , , ","Wellcome Trust, NHGRI NIH HHS, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Wellcome Trust, European Molecular Biology Laboratory, National Science Foundation of USA",836.0, +"33219685, 26481356, 30407549",Lnc2Cancer,0.99007107,Lnc2Cancer,0.99007107,,0,3,"http://www.bio-bigdata.net/lnc2cancer, http://bio-bigdata.hrbmu.edu.cn/lnc2cancer","502, 302",,", ","http://web.archive.org/web/20210516075443/http://www.bio-bigdata.net/lnc2cancer/, http://web.archive.org/web/20221013085339/http://bio-bigdata.hrbmu.edu.cn/lnc2cancer/",2021-01-01,"College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China., College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China., College of Bioinformatics Science and Technology, Harbin Medical University, Harbin 150081, China.","Gao Y, Shang S, Guo S, Li X, Zhou H, Liu H, Sun Y, Wang J, Wang P, Zhi H, Li X, Ning S, Zhang Y, Ning S, Zhang J, Wang P, Zhi H, Wang J, Liu Y, Gao Y, Guo M, Yue M, Wang L, Li X, Gao Y, Wang P, Wang Y, Ma X, Zhi H, Zhou D, Li X, Fang Y, Shen W, Xu Y, Shang S, Wang L, Wang L, Ning S, Li X",", , ","National Natural Science Foundation of China, National Natural Science Foundation of China, National Natural Science Foundation of China, Natural Science Foundation, Heilongjiang Touyan Innovation, National Key Research and Development Program of China, , National Natural Science Foundation of China, National Natural Science Foundation of China, Innovative Talents of Science and Technology Research, National Natural Science Foundation of China, National Program on Key Basic Research, Innovative Talents of Science and Technology Research, National Natural Science Foundation of China, Harbin Medical University, National Natural Science Foundation of China",247.0,"China, China, China" +"33237313, 24311564, 27899671",RepeatsDB,0.994790673,RepeatsDB,0.994790673,,0,3,http://repeatsdb.org,301,,,http://web.archive.org/web/20221019173758/https://repeatsdb.org/,2021-01-01,"Dept. of Biomedical Sciences, University of Padua, Via Ugo Bassi 58/B, Padua 35121, Italy., Department of Biomedical Sciences, University of Padua, 35131 Padova, Italy, Department of Biological Chemistry, Universidad de Buenos Aires, Buenos Aires C1428EGA, Argentina, Department of Information Engineering, University of Padua, 35121 Padova, Italy, Department of Biosciences, COMSATS Institute of Information Technology, Sahiwal, Pakistan, Centre de Recherches de Biochimie Macromoléculaire, CNRS, 34293 Montpellier Cedex 5, France and Institut de Biologie Computationnelle, 34293 Montpellier Cedex 5, France., Dept. of Biomedical Sciences, University of Padua, 35121 Padova, Italy.","Paladin L, Bevilacqua M, Errigo S, Piovesan D, Mičetić I, Necci M, Monzon AM, Fabre ML, Lopez JL, Nilsson JF, Rios J, Menna PL, Cabrera M, Buitron MG, Kulik MG, Fernandez-Alberti S, Fornasari MS, Parisi G, Lagares A, Hirsh L, Andrade-Navarro MA, Kajava AV, Tosatto SCE, Di Domenico T, Potenza E, Walsh I, Parra RG, Giollo M, Minervini G, Piovesan D, Ihsan A, Ferrari C, Kajava AV, Tosatto SC, Paladin L, Hirsh L, Piovesan D, Andrade-Navarro MA, Kajava AV, Tosatto SC",", , ","Marie Skłodowska-Curie, Wellcome Trust, ",47.0,"Argentina, France, France, Italy, Italy, Italy, Italy, Pakistan" +"33237329, 22661649, 25361972, 32696355",MobiDB,0.997653842,MobiDB,0.997653842,,0,4,http://mobidb.org,301,,,no_wayback,2021-01-01,"Dept. of Biomedical Sciences, University of Padua, Via Ugo Bassi 58/B, Padua 35121, Italy., Department of Biology, University of Padova, Viale G. Colombo 3, 35131 Padova, Italy., Department of Biomedical Sciences, University of Padua, 35131 Padova, Italy., Department of Biomedical Sciences, University of Padua, Padua, Italy.","Piovesan D, Necci M, Escobedo N, Monzon AM, Hatos A, Mičetić I, Quaglia F, Paladin L, Ramasamy P, Dosztányi Z, Vranken WF, Davey NE, Parisi G, Fuxreiter M, Tosatto SCE, Di Domenico T, Walsh I, Martin AJ, Tosatto SC, Potenza E, Di Domenico T, Walsh I, Tosatto SC, Monzon AM, Hatos A, Necci M, Piovesan D, Tosatto SCE",", , , ","Italian Ministry of University and Research, ANPCyT, Universidad Nacional de Quilmes, Cancer Research UK, Marie Skłodowska-Curie, Cancer Research UK, Horizon 2020, Research Foundation Flanders, , , ",221.0,"Italy, Italy, Italy, Italy" +"33261662, 21520341, 23843252, 26555599",dbNSFP,0.998201787,dbNSFP,0.998201787,,0,4,http://database.liulab.science/dbNSFP,200,United States,"(28.2141,-82.1539)",http://web.archive.org/web/20220528154723/http://database.liulab.science/dbNSFP,2020-12-02,"USF Genomics & College of Public Health, University of South Florida, Tampa, FL, USA. xiaomingliu@usf.edu., nan, Human Genetics Center, School of Public Health, University of Texas Health Science Center at Houston, Houston, Texas 77030, USA. Xiaoming.Liu@uth.tmc.edu, Human Genetics Center, School of Public Health, The University of Texas Health Science Center at Houston, Houston, Texas.","Liu X, Li C, Mou C, Dong Y, Tu Y, nan, Liu X, Jian X, Boerwinkle E, Liu X, Wu C, Li C, Boerwinkle E",", nan, , ","National Human Genome Research Institute, NHGRI NIH HHS, nan, NHLBI NIH HHS, NHLBI NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Medical Research Council, Medical Research Council, NHLBI NIH HHS, NHLBI NIH HHS, NHGRI NIH HHS",956.0,"United States, United States" +"33270889, 25398901",MethHC,0.997448325,MethHC,0.997448325,,0,2,http://awi.cuhk.edu.cn/Ã,301,,,no_wayback,2021-01-01,"School of Life and Health Sciences, The Chinese University of Hong Kong, Shenzhen, Longgang District, Shenzhen, Guangdong Province 518172, China., Department of Biological Science and Technology, National Chiao Tung University, Hsin-Chu 300, Taiwan Institute of Bioinformatics and Systems Biology, National Chiao Tung University, Hsin-Chu 300, Taiwan.","Huang HY, Li J, Tang Y, Huang YX, Chen YG, Xie YY, Zhou ZY, Chen XY, Ding SY, Luo MF, Jin CN, Zhao LS, Xu JT, Zhou Y, Lin YC, Hong HC, Zuo HL, Hu SY, Xu PY, Li X, Huang HD, Huang WY, Hsu SD, Huang HY, Sun YM, Chou CH, Weng SL, Huang HD",", ","Warshel Institute for Computational Biology, Shenzhen Ganghong Group Co., Ltd., ",175.0,"China, Hong Kong" +"33270898, 29155946, 30664776",GPCRdb,0.993975937,GPCRdb,0.993975937,,0,3,http://gpcrdb.org,301,Germany,"(50.1188,8.6843)",no_wayback,2021-01-01,"Department of Drug Design and Pharmacology, University of Copenhagen, Universitetsparken 2, 2100 Copenhagen, Denmark., Department of Drug Design and Pharmacology, University of Copenhagen, Universitetsparken 2, DK-2100, Copenhagen, Denmark., Department of Drug Design and Pharmacology, University of Copenhagen, Copenhagen, Denmark. christian.munk@sund.ku.dk.","Kooistra AJ, Mordalski S, Pándy-Szekeres G, Esguerra M, Mamyrbekov A, Munk C, Keserű GM, Gloriam DE, Pándy-Szekeres G, Munk C, Tsonkov TM, Mordalski S, Harpsøe K, Hauser AS, Bojarski AJ, Gloriam DE, Munk C, Mutt E, Isberg V, Nikolajsen LF, Bibbe JM, Flock T, Hanson MA, Stevens RC, Deupi X, Gloriam DE",", , ","Polish National Science Center, Independent Research Fund Denmark, Novo Nordisk Fonden, Novo Nordisk Foundation, Horizon 2020, Lundbeck Foundation, Alfred Benzon Foundation, National Research, Development and Innovation Office, Hungary, Lundbeck Foundation, Lundbeck Foundation, European Research Council, European Research Council, Swiss National Science Foundation, Lundbeck Foundation, Lundbeck Foundation",250.0,"Denmark, Denmark, Denmark" +"33290552, 27899567, 30395331, 23161678",GOC,0.951027632,GOC,0.951027632,Ontology resource,0.794861913,4,http://geneontology.org,200,,,http://web.archive.org/web/20221105015500/http://geneontology.org/,2021-01-01,"None, None, None, None",", , , , Blake JA, Dolan M, Drabkin H, Hill DP, Li N, Sitnikov D, Bridges S, Burgess S, Buza T, McCarthy F, Peddinti D, Pillai L, Carbon S, Dietze H, Ireland A, Lewis SE, Mungall CJ, Gaudet P, Chrisholm RL, Fey P, Kibbe WA, Basu S, Siegele DA, McIntosh BK, Renfro DP, Zweifel AE, Hu JC, Brown NH, Tweedie S, Alam-Faruque Y, Apweiler R, Auchinchloss A, Axelsen K, Bely B, Blatter M-, Bonilla C, Bouguerleret L, Boutet E, Breuza L, Bridge A, Chan WM, Chavali G, Coudert E, Dimmer E, Estreicher A, Famiglietti L, Feuermann M, Gos A, Gruaz-Gumowski N, Hieta R, Hinz C, Hulo C, Huntley R, James J, Jungo F, Keller G, Laiho K, Legge D, Lemercier P, Lieberherr D, Magrane M, Martin MJ, Masson P, Mutowo-Muellenet P, O'Donovan C, Pedruzzi I, Pichler K, Poggioli D, Porras Millán P, Poux S, Rivoire C, Roechert B, Sawford T, Schneider M, Stutz A, Sundaram S, Tognolli M, Xenarios I, Foulgar R, Lomax J, Roncaglia P, Khodiyar VK, Lovering RC, Talmud PJ, Chibucos M, Giglio MG, Chang H-, Hunter S, McAnulla C, Mitchell A, Sangrador A, Stephan R, Harris MA, Oliver SG, Rutherford K, Wood V, Bahler J, Lock A, Kersey PJ, McDowall DM, Staines DM, Dwinell M, Shimoyama M, Laulederkind S, Hayman T, Wang S-, Petri V, Lowry T, D'Eustachio P, Matthews L, Balakrishnan R, Binkley G, Cherry JM, Costanzo MC, Dwight SS, Engel SR, Fisk DG, Hitz BC, Hong EL, Karra K, Miyasato SR, Nash RS, Park J, Skrzypek MS, Weng S, Wong ED, Berardini TZ, Huala E, Mi H, Thomas PD, Chan J, Kishore R, Sternberg P, Van Auken K, Howe D, Westerfield M",", , , ","The Francis Crick Institute, National Science Foundation, National Human Genome Research Institute, National Human Genome Research Institute, National Institute of Allergy and Infectious Diseases, National Institute of General Medical Sciences, Wellcome Trust, Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, NIGMS NIH HHS, NHGRI NIH HHS, Swiss Federal Government, Biotechnology and Biological Sciences Research Council, NHGRI NIH HHS, National Heart, Lung, and Blood Institute, National Science Foundation, National Institute of General Medical Sciences, NHGRI NIH HHS, NHGRI NIH HHS, National Institute for Health Research University College London Hospitals Biomedical Research Centre, Wellcome Trust, NIGMS NIH HHS, Alzheimer's Research UK, National Institutes of Health, National Human Genome Research Institute, National Human Genome Research Institute, NHGRI NIH HHS, NHGRI NIH HHS, National Human Genome Research Institute, National Human Genome Research Institute, Biotechnology and Biological Sciences Research Council, National Human Genome Research Institute, National Human Genome Research Institute, National Science Foundation, NIGMS NIH HHS, National Institute of General Medical Sciences, National Institute of General Medical Sciences, EMBL, National Eye Institute, Research Council of Norway, Biotechnology and Biological Sciences Research Council, National Human Genome Research Institute, Medical Research Council, NHGRI NIH HHS, NHLBI NIH HHS, National Human Genome Research Institute, National Institutes of Health, National Human Genome Research Institute, National Human Genome Research Institute, Wellcome Trust, Alzheimers Research UK, Biotechnology and Biological Sciences Research Council, Ensemble Effort for the Knowledge Commons, National Science Foundation, Medical Research Council, European Molecular Biology Laboratory, European Molecular Biology Laboratory, NHGRI NIH HHS, NCI NIH HHS, Wellcome Trust, National Science Foundation, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, NHGRI NIH HHS, NHGRI NIH HHS, Gene Regulation, National Institute of Diabetes and Digestive and Kidney Diseases, Alzheimers Research UK, NIGMS NIH HHS, Wellcome Trust, NHGRI NIH HHS, Medical Research Council, British Heart Foundation, Parkinson's UK, NIGMS NIH HHS, NCATS NIH HHS, Alzheimers Research UK, NHGRI NIH HHS, Wellcome Trust, National Science Foundation, Parkinson's UK, National Institute of General Medical Sciences, British Heart Foundation, National Institute of General Medical Sciences, NHGRI NIH HHS, The Francis Crick Institute, Research Council of Norway, Biotechnology and Biological Sciences Research Council, Gene Regulation Ensemble Effort for the Knowledge Commons, National Institute of General Medical Sciences, NHGRI NIH HHS, NHGRI NIH HHS, National Human Genome Research Institute, NHGRI NIH HHS, NHGRI NIH HHS, National Science Foundation, National Institute of General Medical Sciences, National Human Genome Research Institute, National Human Genome Research Institute, National Human Genome Research Institute, National Human Genome Research Institute, Wellcome Trust, Wellcome Trust, Alzheimers Research UK, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council, National Heart, Lung, and Blood Institute, NIGMS NIH HHS, National Institute of General Medical Sciences, National Human Genome Research Institute, NHGRI NIH HHS, National Human Genome Research Institute, Biotechnology and Biological Sciences Research Council, Parkinson's UK, National Institutes of Health, University College London, National Human Genome Research Institute, National Human Genome Research Institute, Wellcome Trust, Biotechnology and Biological Sciences Research Council, Medical Research Council, Medical Research Council, National Institutes of Health, National Human Genome Research Institute, Wellcome Trust, NHGRI NIH HHS, NHGRI NIH HHS, NHLBI NIH HHS, Wellcome Trust, NHGRI NIH HHS, NCATS NIH HHS, Wellcome Trust, British Heart Foundation, British Heart Foundation, Medical Research Council, NHGRI NIH HHS",3269.0, +"33305318, 34252246",PED,0.985365828,PED,0.985365828,Protein Ensemble Database,0.779749259,2,http://proteinensemble.org,301,,,http://web.archive.org/web/20221016205116/https://proteinensemble.org/,2021-07-01,"VIB-VUB Center for Structural Biology, Flanders Institute for Biotechnology, Brussels 1050, Belgium., Department of Biomedical Sciences, University of Padova, Padova, Italy.","Lazar T, Martínez-Pérez E, Quaglia F, Hatos A, Chemes LB, Iserte JA, Méndez NA, Garrone NA, Saldaño TE, Marchetti J, Rueda AJV, Bernadó P, Blackledge M, Cordeiro TN, Fagerberg E, Forman-Kay JD, Fornasari MS, Gibson TJ, Gomes GW, Gradinaru CC, Head-Gordon T, Jensen MR, Lemke EA, Longhi S, Marino-Buslje C, Minervini G, Mittag T, Monzon AM, Pappu RV, Parisi G, Ricard-Blum S, Ruff KM, Salladini E, Skepö M, Svergun D, Vallet SD, Varadi M, Tompa P, Tosatto SCE, Piovesan D, Quaglia F, Lazar T, Hatos A, Tompa P, Piovesan D, Tosatto SCE",", ","National Institutes of Health, NIGMS NIH HHS, Horizon 2020, Agence Nationale de la Recherche, Fondation pour la Recherche Médicale, Hungarian Scientific Research Fund, German Ministry of Science and Education, Vrije Universiteit Brussel, Horizon 2020, Hungarian Scientific Research Fund, National Agency for the Promotion of Science and Technology, Italian Ministry of University and Research, LBC, Universidad Nacional de Quilmes, Natural Sciences and Engineering Research Council of Canada, ",25.0,"Belgium, Italy" +"33306787, 23521697",MetaADEDB,0.991302609,MetaADEDB,0.991302609,,0,2,http://lmmd.ecust.edu.cn/metaadedb,301,,,http://web.archive.org/web/20210109082637/http://lmmd.ecust.edu.cn/metaadedb/,2021-08-01,"Shanghai Key Laboratory of New Drug Design, School of Pharmacy, East China University of Science and Technology, Shanghai 200237, China., Shanghai Key Laboratory of New Drug Design, School of Pharmacy, East China University of Science and Technology, 130 Meilong Road, Shanghai 200237, China.","Yu Z, Wu Z, Li W, Liu G, Tang Y, Cheng F, Li W, Wang X, Zhou Y, Wu Z, Shen J, Tang Y",", ","National Key Research and Development Program of China, China Postdoctoral Science Foundation, National Key Research and Development Program of China, Shanghai Sailing Program, Shanghai Post-doctoral Excellence Program, National Natural Science Foundation of China, National Natural Science Foundation of China, ",54.0,"China, China, China, China" +"33306800, 24209780",CEG,0.956548989,CEG,0.956548989,Clusters of Essential Genes,0.888208412,2,http://cefg.uestc.cn/ceg,"HTTPConnectionPool(host='cefg.uestc.cn', port=80): Max retries exceeded with url: /ceg (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,no_wayback,2020-12-01,"School of Life Science and Technology, Center for Informational Biology, University of Electronic Science and Technology of China, Chengdu 610054, China., None","Liu S, Wang SX, Liu W, Wang C, Zhang FZ, Ye YN, Wu CS, Zheng WX, Rao N, Guo FB, Ye YN, Hua ZG, Huang J, Rao N, Guo FB",", ","the national key research and development program, Beijing Natural Science Foundation, National Natural Science Foundation of China, ",26.0,"China, China" +"33326073, 32406920",AnnoLnc,0.998637438,AnnoLnc,0.998637438,,0,2,http://annolnc1.gao-lab.org,200,Hong Kong,"(22.3193,114.1693)",no_wayback,2021-01-01,"Biomedical Pioneering Innovation Center (BIOPIC), Beijing Advanced Innovation Center for Genomics (ICG), Center for Bioinformatics (CBI), Peking University, Beijing, China., School of Life Sciences, Biomedical Pioneering Innovation Center (BIOPIC) & Beijing Advanced Innovation Center for Genomics (ICG), Center for Bioinformatics (CBI) and State Key Laboratory of Protein and Plant Gene Research, Peking University, Beijing 100871, China.","Yang DC, Ke L, Ding Y, Gao G, Ke L, Yang DC, Wang Y, Ding Y, Gao G",", ",", China 863 Program, State Key Laboratory of Protein and Plant Gene Research, National Key Research and Development Program, Beijing Advanced Innovation Center for Genomics, National Program for Support of Top-notch Young Professionals",11.0,"China, China" +33588073,ASDB,0.989679317,ASDB,0.989679317,sinensis omics database,0.906412411,1,http://asdb.jungleran.com,503,,,no_wayback,2021-02-12,"College of Life Sciences, Chongqing Normal University, Shapingba, Chongqing 401331, China. Electronic address: zhangyj@cqnu.edu.cn.","Zhang YJ, Lan Y, Chen B",,,0.0,China +33643383,APRegNet,0.974425733,APRegNet,0.974425733,,0,1,http://lms.snu.edu.in/APRegNet,"HTTPConnectionPool(host='lms.snu.edu.in', port=80): Max retries exceeded with url: /APRegNet (Caused by ConnectTimeoutError(, 'Connection to lms.snu.edu.in timed out. (connect timeout=5)'))",,,http://web.archive.org/web/20221005200844/https://lms.snu.edu.in/APRegNet/,2021-02-12,"Department of Life Sciences, Shiv Nadar University, Gautam Buddha Nagar, India.","Sharma R, Upadhyay S, Bhattacharya S, Singh A",,,1.0,India +"33661371, 26578696",OryzaGenome,0.958561838,OryzaGenome,0.958561838,,0,2,"http://viewer.shigen.info/oryzagenome21detail/index.xhtml, http://shigen.nig.ac.jp/rice/oryzabase","200, 302",,", ","http://web.archive.org/web/20211205134247/http://viewer.shigen.info/oryzagenome21detail/index.xhtml, http://web.archive.org/web/20220910023509/https://shigen.nig.ac.jp/rice/oryzabase/",2021-03-04,"Department of Agricultural and Environmental Biology, Graduate School of Agricultural and Life Science, The University of Tokyo, Bunkyo 1-1-1, Tokyo, 113-8657, Japan., Plant Genetics Laboratory, National Institute of Genetics, Mishima, Japan Bioinformatics Laboratory, Meiji University, Kawasaki, Japan Tsukuba Division, Mitsubishi Space Software Co., Ltd., Tsukuba, Japan Present address: Computational Bioscience Research Center, King Abdullah University of Science and Technology, Thuwal 23955-6900, Kingdom of Saudi Arabia.","Kajiya-Kanegae H, Ohyanagi H, Ebata T, Tanizawa Y, Onogi A, Sawada Y, Hirai MY, Wang ZX, Han B, Toyoda A, Fujiyama A, Iwata H, Tsuda K, Suzuki T, Nosaka-Takahashi M, Nonomura KI, Nakamura Y, Kawamoto S, Kurata N, Sato Y, Ohyanagi H, Ebata T, Huang X, Gong H, Fujita M, Mochizuki T, Toyoda A, Fujiyama A, Kaminuma E, Nakamura Y, Feng Q, Wang ZX, Han B, Kurata N",", ","Japan Agency for Medical Research and Development, ",15.0,"Japan, Japan, Japan, Japan, Saudi Arabia" +"33693667, 27114492",UbiNet,0.996777952,UbiNet,0.996777952,,0,2,http://awi.cuhk.edu.cn,301,China,"(22.5559,114.0577)",no_wayback,2021-03-01,"School of Life and Health Sciences, The Chinese University of Hong Kong, Shenzhen, Guangdong 518172, P.R.China., Department of Computer Science and Engineering, Yuan Ze University, Taoyuan, 320, Taiwan University of Information and Communication Technology, Thai Nguyen University, Vietnam and.","Li Z, Chen S, Jhong JH, Pang Y, Huang KY, Li S, Lee TY, Nguyen VN, Huang KY, Weng JT, Lai KR, Lee TY",", ","National Natural Science Foundation of China, Warshel Institute of Computational Biology, ",17.0,"China, Hong Kong" +33724838,UniProtKB,0.99742651,UniProtKB,0.99742651,,0,1,http://macpepdb.mpc.rub.de,301,,,http://web.archive.org/web/20210317083202/https://macpepdb.mpc.rub.de/,2021-03-16,"Ruhr University Bochum, Medical Faculty, Medizinisches Proteom-Center, 44801 Bochum, Germany.","Uszkoreit J, Winkelhardt D, Barkovits K, Wulf M, Roocke S, Marcus K, Eisenacher M",,"Ministerium f?r Kultur und Wissenschaft des Landes Nordrhein-Westfalen, Bundesministerium f?r Bildung und Forschung",1.0,Germany +"33835459, 23118484, 29106616",MODOMICS,0.997817814,MODOMICS,0.997817814,Modification Pathways Database,0.882010448,3,http://www.genesilico.pl/modomics,301,,,http://web.archive.org/web/20221102101148/https://genesilico.pl/modomics/,2021-01-01,"Laboratory of Bioinformatics and Protein Engineering, International Institute of Molecular and Cell Biology in Warsaw, Warsaw, Poland. pboccaletto@genesilico.pl., Laboratory of Bioinformatics and Protein Engineering, International Institute of Molecular and Cell Biology, ul. Ks. Trojdena 4, PL-02-109 Warsaw, Poland., Laboratory of Bioinformatics and Protein Engineering, International Institute of Molecular and Cell Biology in Warsaw, ul. Ks. Trojdena 4, PL-02-109 Warsaw, Poland.","Boccaletto P, Bagiński B, Machnicka MA, Milanowska K, Osman Oglou O, Purta E, Kurkowska M, Olchowik A, Januszewski W, Kalinowski S, Dunin-Horkawicz S, Rother KM, Helm M, Bujnicki JM, Grosjean H, Boccaletto P, Machnicka MA, Purta E, Piatkowski P, Baginski B, Wirecki TK, de Crécy-Lagard V, Ross R, Limbach PA, Kotter A, Helm M, Bujnicki JM",", , ",", , NIGMS NIH HHS, NIGMS NIH HHS",1190.0,"Poland, Poland, Poland" +"33835460, 25378335",MeT-DB,0.995145404,MeT-DB,0.995145404,MethylTranscriptome DataBase,0.968732161,2,http://compgenomics.utsa.edu/MeTDB,301,,,http://web.archive.org/web/20220818022503/http://compgenomics.utsa.edu/MeTDB/,2021-01-01,"School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, Jiangsu, China., School of Information and Electrical Engineering, China University of Mining and Technology, Xuzhou, Jiangsu 221116, China.","Liu H, Ma J, Meng J, Zhang L, Liu H, Flores MA, Meng J, Zhang L, Zhao X, Rao MK, Chen Y, Huang Y",", ",", NIMHD NIH HHS, NIMHD NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, NCI NIH HHS, NCATS NIH HHS",33.0,"China, China, China, China" +"33849445, 28280852",TANTIGEN,0.998069942,TANTIGEN,0.998069942,adb,0.65444386,2,http://projects.met-hilab.org/tadb,301,United States,"(36.677696,-78.37471)",no_wayback,2021-04-14,"Metropolitan College, Boston University, Boston, USA. guanglan@bu.edu., Department of Bio and Health Informatics, Technical University of Denmark, Lyngby, 2800, Denmark.","Zhang G, Chitkushev L, Olsen LR, Keskin DB, Brusic V, Olsen LR, Tongchusak S, Lin H, Reinherz EL, Brusic V, Zhang GL",", ","Division of Cancer Epidemiology and Genetics, National Cancer Institute, NCI NIH HHS, National Cancer Institute, National Institutes of Health, National Institutes of Health, Det Frie Forskningsråd",22.0,"Denmark, Denmark, United States" +34144671,BCGene,0.99187088,BCGene,0.99187088,brain cancer gene database,0.777056694,1,http://soft.bioinfo-minzhao.org/bcgene,406,,,http://web.archive.org/web/20221017151036/https://soft.bioinfo-minzhao.org/bcgene/,2021-06-18,"School of Science, Technology and Engineering, University of the Sunshine Coast, Maroochydore DC, Sippy Downs, Queensland, 4558, Australia.","Zhao M, Liu Y, Ding G, Qu D, Qu H",,,0.0,Australia +34303324,PDB,0.981093069,PDB,0.981093069,Data Bank,0.642776877,1,http://pitgroup.org/amyloid,301,,,http://web.archive.org/web/20220623051303/https://pitgroup.org/amyloid/,2021-07-26,"PIT Bioinformatics Group, Eötvös University, Budapest H-1117, Hungary.","Takács K, Grolmusz V",,,0.0,Hungary +"34314492, 26476458, 33125071",GlyTouCan,0.994329453,GlyTouCan,0.994329453,,0,3,http://data.glygen.org,301,United States,"(39.0448,-77.6042)",http://web.archive.org/web/20221017104628/https://data.glygen.org/,2021-12-01,"The Department of Biochemistry and Molecular Biology, George Washington University Medical Center, 2300 I St NW, Washington DC 20052, USA., Faculty of Science and Engineering, Soka University, Tokyo 192-8577, Japan Glycoscience and Glycotechnology Research Group, AIST, Ibaraki 305-8568, Japan kkiyoko@soka.ac.jp., Graduate School of Science and Engineering, Soka University, Tokyo 192-8577, Japan.","Navelkar R, Owen G, Mutherkrishnan V, Thiessen P, Cheng T, Bolton E, Edwards N, Tiemeyer M, Campbell MP, Martin M, Vora J, Kahsay R, Mazumder R, Aoki-Kinoshita K, Agravat S, Aoki NP, Arpinar S, Cummings RD, Fujita A, Fujita N, Hart GM, Haslam SM, Kawasaki T, Matsubara M, Moreman KW, Okuda S, Pierce M, Ranzinger R, Shikanai T, Shinmachi D, Solovieva E, Suzuki Y, Tsuchiya S, Yamada I, York WS, Zaia J, Narimatsu H, Fujita A, Aoki NP, Shinmachi D, Matsubara M, Tsuchiya S, Shiota M, Ono T, Yamada I, Aoki-Kinoshita KF",", , ","NIGMS NIH HHS, NIH HHS, NLM NIH HHS, NIGMS NIH HHS, NIGMS NIH HHS, Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, Biotechnology and Biological Sciences Research Council, NIGMS NIH HHS, National Bioscience Database Center, National Institutes of Health, Japan Science and Technology Agency",50.0,"Japan, Japan, Japan, United States" +"34344425, 34344425, 34344425, 34344425, 34344425, 34344425, 34344425, 34344425, 34344425, 34344425, 34344425, 34344425, 26744602",SorGSD,0.9974702,SorGSD,0.9974702,Sorghum Genome SNP Database,0.930164422,2,http://ngdc.cncb.ac.cn/sorgsd,301,,,http://web.archive.org/web/20220523183244/https://ngdc.cncb.ac.cn/sorgsd/,2021-08-03,"Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, Beijing, 100093, China., Genomics and Molecular Breeding of Biofuel Crops, Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences, 100093 Beijing, China.","Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Liu Y, Wang Z, Wu X, Zhu J, Luo H, Tian D, Li C, Luo J, Zhao W, Hao H, Jing HC, Luo H, Zhao W, Wang Y, Xia Y, Wu X, Zhang L, Tang B, Zhu J, Fang L, Du Z, Bekele WA, Tai S, Jordan DR, Godwin ID, Snowdon RJ, Mace ES, Jing HC, Luo J",", , , , , , , , , , , , ","National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, Ministry of Science and Technology of the People's Republic of China, Ministry of Science and Technology of the People's Republic of China, National Natural Science Foundation of China, National Science & Technology Support Program, Sino-Africa Centre of CAS International Outreach Initiatives, National Science & Technology Support Program, National Natural Science Foundation of China",29.0,"China, China, China, China, China, China, China, China, China, China, China, China, China" +34362451,MPDB,0.990668297,MPDB,0.990668297,,0,1,http://www.medicinalplantbd.com,301,,,http://web.archive.org/web/20220618132803/https://www.medicinalplantbd.com/,2021-08-06,"Department of Biochemistry and Molecular Biology, Tejgaon College, National University of Bangladesh, Gazipur, 1704, Bangladesh.","Hussain N, Chanda R, Abir RA, Mou MA, Hasan MK, Ashraf MA",,,0.0,"Bangladesh, Bangladesh" +"34366563, 30774152",DSLD,0.953341067,DSLD,0.953341067,Dietary Supplement Label Database,0.795074418,2,http://dsld.nlm.nih.gov/dsld,"HTTPConnectionPool(host='dsld.nlm.nih.gov', port=80): Max retries exceeded with url: /dsld (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))",,,http://web.archive.org/web/20200407050828/https://dsld.nlm.nih.gov/dsld/,2021-06-25,"Office of Dietary Supplements National Institutes of Health, U.S. Department of Health and Human Services, 6100 Executive Blvd Bethesda MD USA 20892., Assistant Professor at the Uniformed Services University.","Saldanha LG, Dwyer JT, Bailen RA, Scott JM, Lindsey AT, Costello RB, Deuster PA",", ","Intramural NIH HHS, Office of Dietary Supplements, U.S. Department of Health and Human Services, National Institutes of Health, Intramural NIH HHS",3.0,United States +"34384382, 21245031",OrchidBase,0.997832179,OrchidBase,0.997832179,,0,2,http://orchidbase.itps.ncku.edu.tw,200,,,http://web.archive.org/web/20220418024736/http://orchidbase.itps.ncku.edu.tw/,2021-08-12,"Orchid Research and Development Center, National Cheng Kung University, Tainan, 70101, Taiwan., Department of Engineering Science, National Cheng Kung University, Tainan 701, Taiwan.","Hsiao YY, Fu CH, Ho SY, Li CI, Chen YY, Wu WL, Wang JS, Zhang DY, Hu WQ, Yu X, Sun WH, Zhou Z, Liu KW, Huang L, Lan SR, Chen HH, Wu WS, Liu ZJ, Tsai WC, Fu CH, Chen YW, Hsiao YY, Pan ZJ, Liu ZJ, Huang YM, Tsai WC, Chen HH",", ","Ministry of Science and Technology, Taiwan, ",47.0, +34387941,PharmGKB,0.997299695,PharmGKB,0.997299695,Pharmacogenomics Knowledgebase,0.730918014,1,http://www.pharmgkb.org,301,,,http://web.archive.org/web/20221108154156/https://www.pharmgkb.org/,2021-08-01,"Departments of Biomedical Data Science and Medicine (BMIR), Stanford University, Stanford, California.","Gong L, Whirl-Carrillo M, Klein TE",,"NHGRI NIH HHS, National Institute of Child Health and Human Development, National Institutes of Health, National Human Genome Research Institute, NIH, NICHD NIH HHS, NHGRI",2.0, +34529321,antimicrobial peptide database,0.933922029,APD,0.899997145,antimicrobial peptide database,0.933922029,1,http://aps.unmc.edu,"HTTPConnectionPool(host='aps.unmc.edu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))",,,http://web.archive.org/web/20221023193318/https://aps.unmc.edu/,2021-09-24,"Department of Pathology and Microbiology, College of Medicine, University of Nebraska Medical Center, 985900 Nebraska Medical Center, Omaha, Nebraska, USA.","Wang G, Zietz CM, Mudgapalli A, Wang S, Wang Z",,"NIAID NIH HHS, NIAID NIH HHS, NIGMS NIH HHS, NIAID NIH HHS, National Institute of General Medical Sciences",4.0,United States +"34536568, 28137767",SmProt,0.989004731,SmProt,0.989004731,small proteins database,0.90862302,2,http://bigdata.ibp.ac.cn/SmProt,301,China,"(39.9042,116.407)",no_wayback,2021-08-01,"College of Life Sciences, University of Chinese Academy of Sciences, Beijing 100049, China; Key Laboratory of RNA Biology, Center for Big Data Research in Health, Institute of Biophysics, Chinese Academy of Sciences, Beijing 100101, China., Key Laboratory of RNA Biology, Institute of Biophysics, Chinese Academy of Sciences, Beijing, China.","Li Y, Zhou H, Chen X, Zheng Y, Kang Q, Hao D, Zhang L, Song T, Luo H, Hao Y, Chen R, Zhang P, He S, Hao Y, Zhang L, Niu Y, Cai T, Luo J, He S, Zhang B, Zhang D, Qin Y, Yang F, Chen R",", ",", ",45.0,"China, China, China" +34556150,AprGPD,0.988473594,AprGPD,0.988473594,apricot,0.7814821,1,http://apricotgpd.com,200,China,"(22.5431,114.058)",http://web.archive.org/web/20211205174638/https://apricotgpd.com/,2021-09-23,"State Key Laboratory of Tree Genetics and Breeding, Non-Timber Forest Research and Development Center, Chinese Academy of Forestry, Zhengzhou, China.","Chen C, Liu H, Gou N, Huang M, Xu W, Zhu X, Yin M, Bai H, Wang L, Wuyun TN",,"the National Key R&D Program of China, the National Key R&D Program of China, the Fundamental Research Funds for the Central Non-profit Research Institution of Chinese Academy of Forestry",0.0,China +34648133,SMART,0.978860319,SMART,0.978860319,,0,1,http://smart.omicstudio.cloud,200,China,"(30.2813,120.12)",http://web.archive.org/web/20220419121949/http://smart.omicstudio.cloud/,2021-10-14,"State Key Laboratory of Crop Stress Biology for Arid Areas, Center of Bioinformatics, College of Life Sciences, Northwest A&F University, Yangling, Shaanxi, China.","Lei B, Song M, Li X, Dang X, Qin R, Zhu S, An X, Liu Q, Yao X, Nie Y, Ma C",,"Projects of Youth Technology New Star of Shaanxi Province, the Hundred Talents Program of Shaanxi Province of China",0.0,China +"34655133, 30217829",OAS,0.866781175,OAS,0.866781175,Antibody Space,0.754744515,2,http://opig.stats.ox.ac.uk/webapps/oas,301,,,http://web.archive.org/web/20220525025511/http://opig.stats.ox.ac.uk/webapps/oas/,2021-10-29,"Department of Statistics, University of Oxford, Oxford, UK., Department of Statistics, University of Oxford, Oxford OX1 3LB, United Kingdom; and.","Olsen TH, Boyles F, Deane CM, Kovaltsuk A, Leem J, Kelm S, Snowden J, Deane CM, Krawczyk K",", ","Engineering and Physical Sciences Research Council, Engineering and Physical Sciences Research Council, Biotechnology and Biological Sciences Research Council, Biotechnology and Biological Sciences Research Council",47.0,United Kingdom +"34965192, 24997126",piRNAQuest,0.996235311,piRNAQuest,0.996235311,,0,2,http://dibresources.jcbose.ac.in/zhumur/pirnaquest2,301,India,"(20.2706,85.8334)",http://web.archive.org/web/20220130223046/http://dibresources.jcbose.ac.in/zhumur/pirnaquest2/,2021-12-31,"Division of Bioinformatics, Bose Institute, Kolkata, India., None","Ghosh B, Sarkar A, Mondal S, Bhattacharya N, Khatua S, Ghosh Z, Sarkar A, Maji RK, Saha S, Ghosh Z",", ","DST, India, supported by the, ",27.0,India diff --git a/data/manual_classifications.csv b/data/manual_classifications.csv new file mode 100644 index 0000000..40a2c39 --- /dev/null +++ b/data/manual_classifications.csv @@ -0,0 +1,1635 @@ +id,title,abstract,checked_by,kes_check,hji_check,curation_sum,number_of_checks,curation_score,kes_notes,hji_notes +28791657,MEGALEX: A megastudy of visual and auditory word recognition.,"Using the megastudy approach, we report a new database (MEGALEX) of visual and auditory lexical decision times and accuracy rates for tens of thousands of words. We collected visual lexical decision data for 28,466 French words and the same number of pseudowords, and auditory lexical decision data for 17,876 French words and the same number of pseudowords (synthesized tokens were used for the auditory modality). This constitutes the first large-scale database for auditory lexical decision, and the first database to enable a direct comparison of word recognition in different modalities. Different regression analyses were conducted to illustrate potential ways to exploit this megastudy database. First, we compared the proportions of variance accounted for by five word frequency measures. Second, we conducted item-level regression analyses to examine the relative importance of the lexical variables influencing performance in the different modalities (visual and auditory). Finally, we compared the similarities and differences between the two modalities. All data are freely available on our website ( https://sedufau.shinyapps.io/megalex/ ) and are searchable at www.lexique.org , inside the Open Lexique search engine.","hji,kes",1,1,2,2,1,visual and lexical db,not life sci; reassessed and re-scored - re-score based on broaded def of life sci +28917032,CFS MATLAB toolbox: An experiment builder for continuous flash suppression (CFS) task.,"CFS toolbox is an open-source collection of MATLAB functions that utilizes PsychToolbox-3 (PTB-3). It is designed to allow a researcher to create and run continuous flash suppression experiments using a variety of experimental parameters (i.e., stimulus types and locations, noise characteristics, and experiment window settings). In a CFS experiment, one of the eyes at a time is presented with a dynamically changing noise pattern, while the other eye is concurrently presented with a static target stimulus, such as a Gabor patch. Due to the strong interocular suppression created by the dominant noise pattern mask, the target stimulus is rendered invisible for an extended duration. Very little knowledge of MATLAB is required for using the toolbox; experiments are generated by modifying csv files with the required parameters, and result data are output to text files for further analysis. The open-source code is available on the project page under a Creative Commons License ( http://www.mikkonuutinen.arkku.net/CFS_toolbox/ and https://bitbucket.org/mikkonuutinen/cfs_toolbox ).","hji,kes",0,0,0,2,0,software,NA +28961763,psygenet2r: a R/Bioconductor package for the analysis of psychiatric disease genes.,"

Motivation

Psychiatric disorders have a great impact on morbidity and mortality. Genotype-phenotype resources for psychiatric diseases are key to enable the translation of research findings to a better care of patients. PsyGeNET is a knowledge resource on psychiatric diseases and their genes, developed by text mining and curated by domain experts.

Results

We present psygenet2r, an R package that contains a variety of functions for leveraging PsyGeNET database and facilitating its analysis and interpretation. The package offers different types of queries to the database along with variety of analysis and visualization tools, including the study of the anatomical structures in which the genes are expressed and gaining insight of gene's molecular function. Psygenet2r is especially suited for network medicine analysis of psychiatric disorders.

Availability and implementation

The package is implemented in R and is available under MIT license from Bioconductor (http://bioconductor.org/packages/release/bioc/html/psygenet2r.html).

Contact

juanr.gonzalez@isglobal.org or laura.furlong@upf.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +28961912,Enabling reproducible real-time quantitative PCR research: the RDML package.,"

Motivation

Reproducibility, a cornerstone of research, requires defined data formats, which include the setup and output of experiments. The real-time PCR data markup language (RDML) is a recommended standard of the minimum information for publication of quantitative real-time PCR experiments guidelines. Despite the popularity of the RDML format for analysis of quantitative PCR data, handling of RDML files is not yet widely supported in all PCR curve analysis softwares.

Results

This study describes the open-source RDML package for the statistical computing language R. RDML is compatible with RDML versions ≤ 1.2 and provides functionality to (i) import RDML data; (ii) extract sample information (e.g. targets and concentration); (iii) transform data to various formats of the R environment; (iv) generate human-readable run summaries; and (v) to create RDML files from user data. In addition, RDML offers a graphical user interface to read, edit and create RDML files.

Availability and implementation

https://cran.r-project.org/package=RDML. rdmlEdit server http://shtest.evrogen.net/rdmlEdit/. Documentation: http://kablag.github.io/RDML/.

Contact

k.blag@yandex.ru.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +29036410,SPRINT: an SNP-free toolkit for identifying RNA editing sites.,"

Motivation

RNA editing generates post-transcriptional sequence alterations. Detection of RNA editing sites (RESs) typically requires the filtering of SNVs called from RNA-seq data using an SNP database, an obstacle that is difficult to overcome for most organisms.

Results

Here, we present a novel method named SPRINT that identifies RESs without the need to filter out SNPs. SPRINT also integrates the detection of hyper RESs from remapped reads, and has been fully automated to any RNA-seq data with reference genome sequence available. We have rigorously validated SPRINT's effectiveness in detecting RESs using RNA-seq data of samples in which genes encoding RNA editing enzymes are knock down or over-expressed, and have also demonstrated its superiority over current methods. We have applied SPRINT to investigate RNA editing across tissues and species, and also in the development of mouse embryonic central nervous system. A web resource (http://sprint.tianlab.cn) of RESs identified by SPRINT has been constructed.

Availability and implementation

The software and related data are available at http://sprint.tianlab.cn.

Contact

weidong.tian@fudan.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +29315360,CGManalyzer: an R package for analyzing continuous glucose monitoring studies.,"

Summary

The R package CGManalyzer contains functions for analyzing data from a continuous glucose monitoring (CGM) study. It covers a wide and comprehensive range of data analysis methods including reading a series of datasets, obtaining summary statistics of glucose levels, plotting data, transforming the time stamp format, fixing missing values, evaluating the mean of daily difference and continuous overlapping net glycemic action, calculating multiscale sample entropy, conducting pairwise comparison, displaying results using various plots including a new type of plot called an antenna plot, etc. This package has been developed from our work in directly analyzing data from various CGM devices such as the FreeStyle Libre, Glutalor, Dexcom and Medtronic CGM. Thus, this package should greatly facilitate the analysis of various CGM studies.

Availability and implementation

The package for Windows is available from CRAN: http://cran.r-project.org/mirrors.html. The source file CGManalyzer_1.0.tar.gz is available in the Supplementary Material and at the website of Zhang's lab https://quantitativelab.fhs.umac.mo/analytic-tool/.

Contact

douglaszhang@umac.mo.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +29514181,PWMScan: a fast tool for scanning entire genomes with a position-specific weight matrix.,"

Summary

Transcription factors regulate gene expression by binding to specific short DNA sequences of 5-20 bp to regulate the rate of transcription of genetic information from DNA to messenger RNA. We present PWMScan, a fast web-based tool to scan server-resident genomes for matches to a user-supplied PWM or transcription factor binding site model from a public database.

Availability and implementation

The web server and source code are available at http://ccg.vital-it.ch/pwmscan and https://sourceforge.net/projects/pwmscan, respectively.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +29539190,"MSeqDR mvTool: A mitochondrial DNA Web and API resource for comprehensive variant annotation, universal nomenclature collation, and reference genome conversion.","Accurate mitochondrial DNA (mtDNA) variant annotation is essential for the clinical diagnosis of diverse human diseases. Substantial challenges to this process include the inconsistency in mtDNA nomenclatures, the existence of multiple reference genomes, and a lack of reference population frequency data. Clinicians need a simple bioinformatics tool that is user-friendly, and bioinformaticians need a powerful informatics resource for programmatic usage. Here, we report the development and functionality of the MSeqDR mtDNA Variant Tool set (mvTool), a one-stop mtDNA variant annotation and analysis Web service. mvTool is built upon the MSeqDR infrastructure (https://mseqdr.org), with contributions of expert curated data from MITOMAP (https://www.mitomap.org) and HmtDB (https://www.hmtdb.uniba.it/hmdb). mvTool supports all mtDNA nomenclatures, converts variants to standard rCRS- and HGVS-based nomenclatures, and annotates novel mtDNA variants. Besides generic annotations from dbNSFP and Variant Effect Predictor (VEP), mvTool provides allele frequencies in more than 47,000 germline mitogenomes, and disease and pathogenicity classifications from MSeqDR, Mitomap, HmtDB and ClinVar (Landrum et al., 2013). mvTools also provides mtDNA somatic variants annotations. ""mvTool API"" is implemented for programmatic access using inputs in VCF, HGVS, or classical mtDNA variant nomenclatures. The results are reported as hyperlinked html tables, JSON, Excel, and VCF formats. MSeqDR mvTool is freely accessible at https://mseqdr.org/mvtool.php.","hji,kes",0,0,0,2,0,software,NA +29718389,An update on PUG-REST: RESTful interface for programmatic access to PubChem.,"PubChem (https://pubchem.ncbi.nlm.nih.gov) is one of the largest open chemical information resources available. It currently receives millions of unique users per month on average, serving as a key resource for many research fields such as cheminformatics, chemical biology, medicinal chemistry, and drug discovery. PubChem provides multiple programmatic access routes to its data and services. One of them is PUG-REST, a Representational State Transfer (REST)-like web service interface to PubChem. On average, PUG-REST receives more than a million requests per day from tens of thousands of unique users. The present paper provides an update on PUG-REST since our previous paper published in 2015. This includes access to new kinds of data (e.g. concise bioactivity data, table of contents headings, etc.), full implementation of synchronous fast structure search, support for assay data retrieval using accession identifiers in response to the deprecation of NCBI's GI numbers, data exchange between PUG-REST and NCBI's E-Utilities through the List Gateway, implementation of dynamic traffic control through throttling, and enhanced usage policies. In addition, example Perl scripts are provided, which the user can easily modify, run, or translate into another scripting language.","hji,kes",1,1,2,2,1,description of an interface to PubChem,"no notes; reassessed and re-scored - too strictly read ""service"" before - URL and name match to pubchem itself" +29718411,BUSCA: an integrative web server to predict subcellular localization of proteins.,"Here, we present BUSCA (http://busca.biocomp.unibo.it), a novel web server that integrates different computational tools for predicting protein subcellular localization. BUSCA combines methods for identifying signal and transit peptides (DeepSig and TPpred3), GPI-anchors (PredGPI) and transmembrane domains (ENSEMBLE3.0 and BetAware) with tools for discriminating subcellular localization of both globular and membrane proteins (BaCelLo, MemLoci and SChloro). Outcomes from the different tools are processed and integrated for annotating subcellular localization of both eukaryotic and bacterial protein sequences. We benchmark BUSCA against protein targets derived from recent CAFA experiments and other specific data sets, reporting performance at the state-of-the-art. BUSCA scores better than all other evaluated methods on 2732 targets from CAFA2, with a F1 value equal to 0.49 and among the best methods when predicting targets from CAFA3. We propose BUSCA as an integrated and accurate resource for the annotation of protein subcellular localization.","hji,kes",0,0,0,2,0,0,NA +29721311,Gene Unprediction with Spurio: A tool to identify spurious protein sequences.,"We now have access to the sequences of tens of millions of proteins. These protein sequences are essential for modern molecular biology and computational biology. The vast majority of protein sequences are derived from gene prediction tools and have no experimental supporting evidence for their translation. Despite the increasing accuracy of gene prediction tools there likely exists a large number of spurious protein predictions in the sequence databases. We have developed the Spurio tool to help identify spurious protein predictions in prokaryotes. Spurio searches the query protein sequence against a prokaryotic nucleotide database using tblastn and identifies homologous sequences. The tblastn matches are used to score the query sequence's likelihood of being a spurious protein prediction using a Gaussian process model. The most informative feature is the appearance of stop codons within the presumed translation of homologous DNA sequences. Benchmarking shows that the Spurio tool is able to distinguish spurious from true proteins. However, transposon proteins are prone to be predicted as spurious because of the frequency of degraded homologs found in the DNA sequence databases. Our initial experiments suggest that less than 1% of the proteins in the UniProtKB sequence database are likely to be spurious and that Spurio is able to identify over 60 times more spurious proteins than the AntiFam resource. The Spurio software and source code is available under an MIT license at the following URL: https://bitbucket.org/bateman-group/spurio.","hji,kes",0,0,0,2,0,software,NA +29726919,canvasDesigner: a versatile interactive high-resolution scientific multi-panel visualization toolkit.,"Summary:We present a bioinformatics and systems biology visualization toolkit harmonizing real time interactive exploring and analyzing of big data, full-fledged customizing of look-n-feel and producing multi-panel publication-ready figures in PDF format simultaneously. Availability and implementation:Source code and detailed user guides are available at http://canvasxpress.org, https://baohongz.github.io/canvasDesigner and https://baohongz.github.io/canvasDesigner/demo_video.html. Supplementary information:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +30107777,BALCONY: an R package for MSA and functional compartments of protein variability analysis.,"

Background

Here, we present an R package for entropy/variability analysis that facilitates prompt and convenient data extraction, manipulation and visualization of protein features from multiple sequence alignments. BALCONY can work with residues dispersed across a protein sequence and map them on the corresponding alignment of homologous protein sequences. Additionally, it provides several entropy and variability scores that indicate the conservation of each residue.

Results

Our package allows the user to visualize evolutionary variability by locating the positions most likely to vary and to assess mutation candidates in protein engineering.

Conclusion

In comparison to other R packages BALCONY allows conservation/variability analysis in context of protein structure with linkage of the appropriate metrics with physicochemical features of user choice.

Availability

CRAN project page: https://cran.r-project.org/package=BALCONY and our website: http://www.tunnelinggroup.pl/software/ for major platforms: Linux/Unix, Windows and Mac OS X.","hji,kes",0,0,0,2,0,software,NA +30222561,Large-Scale Study of Perceptual Video Quality.,"The great variations of videographic skills in videography, camera designs, compression and processing protocols, communication and bandwidth environments, and displays leads to an enormous variety of video impairments. Current noreference (NR) video quality models are unable to handle this diversity of distortions. This is true in part because available video quality assessment databases contain very limited content, fixed resolutions, were captured using a small number of camera devices by a few videographers and have been subjected to a modest number of distortions. As such, these databases fail to adequately represent real world videos, which contain very different kinds of content obtained under highly diverse imaging conditions and are subject to authentic, complex and often commingled distortions that are difficult or impossible to simulate. As a result, NR video quality predictors tested on real-world video data often perform poorly. Towards advancing NR video quality prediction, we have constructed a largescale video quality assessment database containing 585 videos of unique content, captured by a large number of users, with wide ranges of levels of complex, authentic distortions. We collected a large number of subjective video quality scores via crowdsourcing. A total of 4776 unique participants took part in the study, yielding more than 205000 opinion scores, resulting in an average of 240 recorded human opinions per video. We demonstrate the value of the new resource, which we call the LIVE Video Quality Challenge Database (LIVE-VQC for short), by conducting a comparison of leading NR video quality predictors on it. This study is the largest video quality assessment study ever conducted along several key dimensions: number of unique contents, capture devices, distortion types and combinations of distortions, study participants, and recorded subjective scores. The database is available for download on this link: http://live.ece.utexas.edu/research/LIVEVQC/index.html.","hji,kes",0,0,0,2,0,"database, but not biological",not life sci +30261835,Sequence homology in eukaryotes (SHOE): interactive visual tool for promoter analysis.,"

Background

Microarray and DNA-sequencing based technologies continue to produce enormous amounts of data on gene expression. This data has great potential to illuminate our understanding of biology and medicine, but the data alone is of limited value without computational tools to allow human investigators to visualize and interpret it in the context of their problem of interest.

Results

We created a web server called SHOE that provides an interactive, visual presentation of the available evidence of transcriptional regulation and gene co-expression to facilitate its exploration and interpretation. SHOE predicts the likely transcription factor binding sites in orthologous promoters of humans, mice, and rats using the combined information of 1) transcription factor binding preferences (position-specific scoring matrix (PSSM) libraries such as Transfac32, Jaspar, HOCOMOCO, ChIP-seq, SELEX, PBM, and iPS-reprogramming factor), 2) evolutionary conservation of putative binding sites in orthologous promoters, and 3) co-expression tendencies of gene pairs based on 1,714 normal human cells selected from the Gene Expression Omnibus Database.

Conclusion

SHOE enables users to explore potential interactions between transcription factors and target genes via multiple data views, discover transcription factor binding motifs on top of gene co-expression, and visualize genes as a network of gene and transcription factors on its native gadget GeneViz, the CellDesigner pathway analyzer, and the Reactome database to search the pathways involved. As we demonstrate here when using the CREB1 and Nf-κB datasets, SHOE can reliably identify experimentally verified interactions and predict plausible novel ones, yielding new biological insights into the gene regulatory mechanisms involved. SHOE comes with a manual describing how to run it on a local PC or via the Garuda platform ( www.garuda-alliance.org ), where it joins other popular gadgets such as the CellDesigner pathway analyzer and the Reactome database, as part of analysis workflows to meet the growing needs of molecular biologists and medical researchers. SHOE is available from the following URL http://ec2-54-150-223-65.ap-northeast-1.compute.amazonaws.com A video demonstration of SHOE can be found here: https://www.youtube.com/watch?v=qARinNb9NtE.","hji,kes",0,0,0,2,0,uses a database,NA +30445657,"The European Bioinformatics Institute in 2018: tools, infrastructure and training.","The European Bioinformatics Institute (https://www.ebi.ac.uk/) archives, curates and analyses life sciences data produced by researchers throughout the world, and makes these data available for re-use globally (https://www.ebi.ac.uk/). Data volumes continue to grow exponentially: total raw storage capacity now exceeds 160 petabytes, and we manage these increasing data flows while maintaining the quality of our services. This year we have improved the efficiency of our computational infrastructure and doubled the bandwidth of our connection to the worldwide web. We report two new data resources, the Single Cell Expression Atlas (https://www.ebi.ac.uk/gxa/sc/), which is a component of the Expression Atlas; and the PDBe-Knowledgebase (https://www.ebi.ac.uk/pdbe/pdbe-kb), which collates functional annotations and predictions for structure data in the Protein Data Bank. Additionally, Europe PMC (http://europepmc.org/) has added preprint abstracts to its search results, supplementing results from peer-reviewed publications. EMBL-EBI maintains over 150 analytical bioinformatics tools that complement our data resources. We make these tools available for users through a web interface as well as programmatically using application programming interfaces, whilst ensuring the latest versions are available for our users. Our training team, with support from all of our staff, continued to provide on-site, off-site and web-based training opportunities for thousands of researchers worldwide this year.","hji,kes",1,1,2,2,1,2 data resources: Single cell Expression Atlas and PDBe-Knowledgebase,EBI not a data resource in and of itself; reassessed and re-scored - describes distinct resources in abstract +30535135,WIlsON: Web-based Interactive Omics VisualizatioN.,"

Motivation

High throughput (HT) screens in the omics field are typically analyzed by automated pipelines that generate static visualizations and comprehensive spreadsheet data for scientists. However, exploratory and hypothesis driven data analysis are key aspects of the understanding of biological systems, both generating extensive need for customized and dynamic visualization.

Results

Here we describe WIlsON, an interactive workbench for analysis and visualization of multi-omics data. It is primarily intended to empower screening platforms to offer access to pre-calculated HT screen results to the non-computational scientist. Facilitated by an open file format, WIlsON supports all types of omics screens, serves results via a web-based dashboard, and enables end users to perform analyses and generate publication-ready plots.

Availability and implementation

We implemented WIlsON in R with a focus on extensibility using the modular Shiny and Plotly frameworks. A demo of the interactive workbench without limitations may be accessed at http://loosolab.mpi-bn.mpg.de. A standalone Docker container as well as the source code of WIlsON are freely available from our Docker hub https://hub.docker. com/r/loosolab/wilson, CRAN https://cran.r-project.org/web/packages/wilson/, and GitHub repository https://github.molgen.mpg.de/loosolab/wilson-apps, respectively.","hji,kes",0,0,0,2,0,software,NA +30535305,PopViz: a webserver for visualizing minor allele frequencies and damage prediction scores of human genetic variations.,"

Summary

Next-generation sequencing (NGS) generates large amounts of genomic data and reveals about 20 000 genetic coding variants per individual studied. Several mutation damage prediction scores are available to prioritize variants, but there is currently no application to help investigators to determine the relevance of the candidate genes and variants quickly and visually from population genetics data and deleteriousness scores. Here, we present PopViz, a user-friendly, rapid, interactive, mobile-compatible webserver providing a gene-centric visualization of the variants of any human gene, with (i) population-specific minor allele frequencies from the gnomAD population genetic database; (ii) mutation damage prediction scores from CADD, EIGEN and LINSIGHT and (iii) amino-acid positions and protein domains. This application will be particularly useful in investigations of NGS data for new disease-causing genes and variants, by reinforcing or rejecting the plausibility of the candidate genes, and by selecting and prioritizing, the candidate variants for experimental testing.

Availability and implementation

PopViz webserver is freely accessible from http://shiva.rockefeller.edu/PopViz/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +30596886,Multiview: a software package for multiview pattern recognition methods.,"

Summary

Multiview datasets are the norm in bioinformatics, often under the label multi-omics. Multiview data are gathered from several experiments, measurements or feature sets available for the same subjects. Recent studies in pattern recognition have shown the advantage of using multiview methods of clustering and dimensionality reduction; however, none of these methods are readily available to the extent of our knowledge. Multiview extensions of four well-known pattern recognition methods are proposed here. Three multiview dimensionality reduction methods: multiview t-distributed stochastic neighbour embedding, multiview multidimensional scaling and multiview minimum curvilinearity embedding, as well as a multiview spectral clustering method. Often they produce better results than their single-view counterparts, tested here on four multiview datasets.

Availability and implementation

R package at the B2SLab site: http://b2slab.upc.edu/software-and-tutorials/ and Python package: https://pypi.python.org/pypi/multiview.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +30666329,BINOPtimal: a web tool for optimal chiral phosphoric acid catalyst selection.,"A catalyst selection program, BINOPtimal, has been developed. This interactive web tool selects the best performing chiral phosphoric acid catalysts from analysis of the starting materials, imine and nucleophile, on the basis of rules derived from the transformations within its database. This procedure has been applied to an example transformation demonstrating the potential to assist reaction design. The tool is available at www-mmm.ch.cam.ac.uk.","hji,kes",0,0,0,2,0,software,NA +30793168,The TMCrys server for supporting crystallization of transmembrane proteins.,"

Motivation

Due to their special properties, the structures of transmembrane proteins are extremely hard to determine. Several methods exist to predict the propensity of successful completion of the structure determination process. However, available predictors incorporate data of any kind of proteins, hence they can hardly differentiate between crystallizable and non-crystallizable membrane proteins.

Results

We implemented a web server to simplify running TMCrys prediction method that was developed specifically to separate crystallizable and non-crystallizable membrane proteins.

Availability and implementation

http://tmcrys.enzim.ttk.mta.hu.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +30916462,MolTarPred: A web tool for comprehensive target prediction with reliability estimation.,"Molecular target prediction can provide a starting point to understand the efficacy and side effects of phenotypic screening hits. Unfortunately, the vast majority of in silico target prediction methods are not available as web tools. Furthermore, these are limited in the number of targets that can be predicted, do not estimate which target predictions are more reliable and/or lack comprehensive retrospective validations. We present MolTarPred ( http://moltarpred.marseille.inserm.fr/), a user-friendly web tool for predicting protein targets of small organic compounds. It is powered by a large knowledge base comprising 607,659 compounds and 4,553 macromolecular targets collected from the ChEMBL database. In about 1 min, the predicted targets for the supplied molecule will be listed in a table. The chemical structures of the query molecule and the most similar compounds annotated with the predicted target will also be shown to permit visual inspection and comparison. Practical examples of the use of MolTarPred are showcased. MolTarPred is a new resource for scientists that require a more complete knowledge of the polypharmacology of a molecule. The introduction of a reliability score constitutes an attractive functionality of MolTarPred, as it permits focusing experimental confirmatory tests on the most reliable predictions, which leads to higher prospective hit rates.","hji,kes",0,0,0,2,0,software,NA +31012755,"Integration of wheelchair service provision education: current situation, facilitators and barriers for academic rehabilitation programs worldwide.","Purpose: An estimated 75 million people with disabilities need wheelchairs globally, of whom 5-15% have one. Access to an appropriate wheelchair requires rehabilitation professionals trained to provide wheelchair service. One aim of the International Society of Wheelchair Professionals (ISWP) is to promote and facilitate the integration of wheelchair service provision education into academic rehabilitation programs worldwide. To inform the development of integration strategies, the purpose of this study was to develop an in-depth global portrait of the wheelchair service provision education offered in academic rehabilitation programs, the process of its integration and the associated facilitators and barriers.Method: Semi-structured qualitative interviews were conducted with a purposive sample of 14 representatives from academic rehabilitation programs (i.e., occupational therapy, physical therapy, and prosthetics and orthotics) in 11 countries, including low, middle and upper resourced settings.Findings: Thematic data analyses identified three overarching themes. The first theme, ""impact of context"", portrays factors related to local population needs, governance and supply chain of equipment and service delivery. The second theme, ""current and planned wheelchair education"", describes the content, pedagogic approach, student evaluation and feedback process. The third theme, ""integration process"", details five states of this process.Conclusions: This study describes in-depth the wheelchair service provision education across academic rehabilitation programs and resource settings, illustrating the context-dependent nature of its integration. This understanding may assist the global community of educators in preparing future rehabilitation professionals to better serve wheelchair users. This work has informed the development of ISWP's Seating and Mobility Academic Resource Toolkit (http://smart.wheelchairnetwork.org/).Implications for RehabilitationThe Dynamics of Context-Dependent Integration of Wheelchair Service Provision Education in Curricula model, depicting the findings of this study, may help to inform key stakeholders (i.e., academic institutions, health care providers and policy makers) about potential barriers and facilitators to the implementation of adequate wheelchair service provision education in the curricula of academic rehabilitation program.Study findings may lead to creative strategies, such as the expansion of ISWP's Seating and Mobility Academic Resource Toolkit (SMART; http://smart.wheelchairnetwork.org/), that may enable academic rehabilitation programs to be a part of the solution to strengthening rehabilitation systems worldwide, through appropriately trained rehabilitation professionals in wheelchair service provision.","hji,kes",0,0,0,2,0,NA,NA +31073610,"The barcode, UMI, set format and BUStools.","

Summary

We introduce the Barcode-UMI-Set format (BUS) for representing pseudoalignments of reads from single-cell RNA-seq experiments. The format can be used with all single-cell RNA-seq technologies, and we show that BUS files can be efficiently generated. BUStools is a suite of tools for working with BUS files and facilitates rapid quantification and analysis of single-cell RNA-seq data. The BUS format therefore makes possible the development of modular, technology-specific and robust workflows for single-cell RNA-seq analysis.

Availability and implementation

http://BUStools.github.io/ and http://pachterlab.github.io/kallisto/singlecell.html.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31076763,DOGMA: a web server for proteome and transcriptome quality assessment.,"Even in the era of next generation sequencing, in which bioinformatics tools abound, annotating transcriptomes and proteomes remains a challenge. This can have major implications for the reliability of studies based on these datasets. Therefore, quality assessment represents a crucial step prior to downstream analyses on novel transcriptomes and proteomes. DOGMA allows such a quality assessment to be carried out. The data of interest are evaluated based on a comparison with a core set of conserved protein domains and domain arrangements. Depending on the studied species, DOGMA offers precomputed core sets for different phylogenetic clades. We now developed a web server for the DOGMA software, offering a user-friendly, simple to use interface. Additionally, the server provides a graphical representation of the analysis results and their placement in comparison to publicly available data. The server is freely available under https://domainworld-services.uni-muenster.de/dogma/. Additionally, for large scale analyses the software can be downloaded free of charge from https://domainworld.uni-muenster.de.","hji,kes",0,0,0,2,0,software,NA +31106379,Drug ReposER: a web server for predicting similar amino acid arrangements to known drug binding interfaces for potential drug repositioning.,"A common drug repositioning strategy is the re-application of an existing drug to address alternative targets. A crucial aspect to enable such repurposing is that the drug's binding site on the original target is similar to that on the alternative target. Based on the assumption that proteins with similar binding sites may bind to similar drugs, the 3D substructure similarity data can be used to identify similar sites in other proteins that are not known targets. The Drug ReposER (DRug REPOSitioning Exploration Resource) web server is designed to identify potential targets for drug repurposing based on sub-structural similarity to the binding interfaces of known drug binding sites. The application has pre-computed amino acid arrangements from protein structures in the Protein Data Bank that are similar to the 3D arrangements of known drug binding sites thus allowing users to explore them as alternative targets. Users can annotate new structures for sites that are similarly arranged to the residues found in known drug binding interfaces. The search results are presented as mappings of matched sidechain superpositions. The results of the searches can be visualized using an integrated NGL viewer. The Drug ReposER server has no access restrictions and is available at http://mfrlab.org/drugreposer/.","hji,kes",0,0,0,2,0,software,NA +31114875,GEPIA2: an enhanced web server for large-scale expression profiling and interactive analysis.,"Introduced in 2017, the GEPIA (Gene Expression Profiling Interactive Analysis) web server has been a valuable and highly cited resource for gene expression analysis based on tumor and normal samples from the TCGA and the GTEx databases. Here, we present GEPIA2, an updated and enhanced version to provide insights with higher resolution and more functionalities. Featuring 198 619 isoforms and 84 cancer subtypes, GEPIA2 has extended gene expression quantification from the gene level to the transcript level, and supports analysis of a specific cancer subtype, and comparison between subtypes. In addition, GEPIA2 has adopted new analysis techniques of gene signature quantification inspired by single-cell sequencing studies, and provides customized analysis where users can upload their own RNA-seq data and compare them with TCGA and GTEx samples. We also offer an API for batch process and easy retrieval of the analysis results. The updated web server is publicly accessible at http://gepia2.cancer-pku.cn/.","hji,kes",0,0,0,2,0,software,NA +31114876,"BEERE: a web server for biomedical entity expansion, ranking and explorations.","BEERE (Biomedical Entity Expansion, Ranking and Explorations) is a new web-based data analysis tool to help biomedical researchers characterize any input list of genes/proteins, biomedical terms or their combinations, i.e. 'biomedical entities', in the context of existing literature. Specifically, BEERE first aims to help users examine the credibility of known entity-to-entity associative or semantic relationships supported by database or literature references from the user input of a gene/term list. Then, it will help users uncover the relative importance of each entity-a gene or a term-within the user input by computing the ranking scores of all entities. At last, it will help users hypothesize new gene functions or genotype-phenotype associations by an interactive visual interface of constructed global entity relationship network. The output from BEERE includes: a list of the original entities matched with known relationships in databases; any expanded entities that may be generated from the analysis; the ranks and ranking scores reported with statistical significance for each entity; and an interactive graphical display of the gene or term network within data provenance annotations that link to external data sources. The web server is free and open to all users with no login requirement and can be accessed at http://discovery.informatics.uab.edu/beere/.","hji,kes",0,0,0,2,0,software,NA +31139843,Erratum to: Stroke in patients with prosthetic valve endocarditis : Single-center cohort study in China.,Erratum to:Herz 2019 https://doi.org/10.1007/s00059-019-4809-4 The original article has been corrected. The data given in Table 4 for patient No. 13 were wrong. Please find here the corrected Table.The authors and publisher apologize for this ….,"hji,kes",0,0,0,2,0,NA,NA +31165321,I-PINE web server: an integrative probabilistic NMR assignment system for proteins.,"Various methods for understanding the structural and dynamic properties of proteins rely on the analysis of their NMR chemical shifts. These methods require the initial assignment of NMR signals to particular atoms in the sequence of the protein, a step that can be very time-consuming. The probabilistic interaction network of evidence (PINE) algorithm for automated assignment of backbone and side chain chemical shifts utilizes a Bayesian probabilistic network model that analyzes sequence data and peak lists from multiple NMR experiments. PINE, which is one of the most popular and reliable automated chemical shift assignment algorithms, has been available to the protein NMR community for longer than a decade. We announce here a new web server version of PINE, called Integrative PINE (I-PINE), which supports more types of NMR experiments than PINE (including three-dimensional nuclear Overhauser enhancement and four-dimensional J-coupling experiments) along with more comprehensive visualization of chemical shift based analysis of protein structure and dynamics. The I-PINE server is freely accessible at http://i-pine.nmrfam.wisc.edu . Help pages and tutorial including browser capability are available at: http://i-pine.nmrfam.wisc.edu/instruction.html . Sample data that can be used for testing the web server are available at: http://i-pine.nmrfam.wisc.edu/examples.html .","hji,kes",0,0,0,2,0,software,NA +31182652,Practice Resource for Forensic Training in General Psychiatry Residency Programs.,"Full Document: Alonso-Katzowitz JS, Cardasis W, Cerny-Suelzer CA, et al: Practice Resource for Forensic Training in General Psychiatry Residency Programs. Journal of the American Academy of Psychiatry and the Law Online Supplement 2019, 47 (1). Available at: http://www.jaapl.org/content/47/1_Supplement.","hji,kes",0,0,0,2,0,abstract seems to be downloaded wrong,NA +31220119,Interlaboratory study to validate a STR profiling method for intraspecies identification of mouse cell lines.,"The Consortium for Mouse Cell Line Authentication was formed to validate Short Tandem Repeat (STR) markers for intraspecies identification of mouse cell lines. The STR profiling method is a multiplex polymerase chain reaction (PCR) assay comprised of primers targeting 19 mouse STR markers and two human STR markers (for interspecies contamination screening). The goals of the Consortium were to perform an interlaboratory study to-(1) validate the mouse STR markers to uniquely identify mouse cell lines (intraspecies identification), (2) to provide a public database of mouse cell lines with the National Institute of Standards and Technology (NIST)-validated mouse STR profiles, and (3) to publish the results of the interlaboratory study. The interlaboratory study was an international effort that consisted of 12 participating laboratories representing institutions from academia, industry, biological resource centers, and government. The study was based on 50 of the most commonly used mouse cell lines obtained from the American Type Culture Collection (ATCC). Of the 50 mouse cell lines, 18 had unique STR profiles that were 100% concordant (match) among all Consortium laboratory members, and the remaining 32 cell lines had discordance that was resolved readily and led to improvement of the assay. The discordance was due to low signal and interpretation issues involving artifacts and genotyping errors. Although the total number of discordant STR profiles was relatively high in this study, the percent of labs agreeing on allele calls among the discordant samples was above 92%. The STR profiles, including electropherogram images, for NIST-validated mouse cell lines will be published on the NCBI BioSample Database (https://www.ncbi.nlm.nih.gov/biosample/). Overall, the interlaboratory study showed that the multiplex PCR method using 18 of the 19 mouse STR markers is capable of discriminating at the intraspecies level between mouse cell lines. Further studies are ongoing to refine the assay including (1) development of an allelic ladder for improving the accuracy of allele calling and (2) integration of stutter filters to identify true stutter.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +31318409,PTM-Logo: a program for generation of sequence logos based on position-specific background amino-acid probabilities.,"

Summary

Identification of the amino-acid motifs in proteins that are targeted for post-translational modifications (PTMs) is of great importance in understanding regulatory networks. Information about targeted motifs can be derived from mass spectrometry data that identify peptides containing specific PTMs such as phosphorylation, ubiquitylation and acetylation. Comparison of input data against a standardized 'background' set allows identification of over- and under-represented amino acids surrounding the modified site. Conventionally, calculation of targeted motifs assumes a random background distribution of amino acids surrounding the modified position. However, we show that probabilities of amino acids depend on (i) the type of the modification and (ii) their positions relative to the modified site. Thus, software that identifies such over- and under-represented amino acids should make appropriate adjustments for these effects. Here we present a new program, PTM-Logo, that generates representations of these amino acid preferences ('logos') based on position-specific amino-acid probability backgrounds calculated either from user-input data or curated databases.

Availability and implementation

PTM-Logo is freely available online at http://sysbio.chula.ac.th/PTMLogo/ or https://hpcwebapps.cit.nih.gov/PTMLogo/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31329242,Genetic association testing using the GENESIS R/Bioconductor package.,"

Summary

The Genomic Data Storage (GDS) format provides efficient storage and retrieval of genotypes measured by microarrays and sequencing. We developed GENESIS to perform various single- and aggregate-variant association tests using genotype data stored in GDS format. GENESIS implements highly flexible mixed models, allowing for different link functions, multiple variance components and phenotypic heteroskedasticity. GENESIS integrates cohesively with other R/Bioconductor packages to build a complete genomic analysis workflow entirely within the R environment.

Availability and implementation

https://bioconductor.org/packages/GENESIS; vignettes included.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31372596,Accelerating structure-function mapping using the ViVa webtool to mine natural variation.,"Thousands of sequenced genomes are now publicly available capturing a significant amount of natural variation within plant species; yet, much of these data remain inaccessible to researchers without significant bioinformatics experience. Here, we present a webtool called ViVa (Visualizing Variation) which aims to empower any researcher to take advantage of the amazing genetic resource collected in the Arabidopsis thaliana 1001 Genomes Project (http://1001genomes.org). ViVa facilitates data mining on the gene, gene family, or gene network level. To test the utility and accessibility of ViVa, we assembled a team with a range of expertise within biology and bioinformatics to analyze the natural variation within the well-studied nuclear auxin signaling pathway. Our analysis has provided further confirmation of existing knowledge and has also helped generate new hypotheses regarding this well-studied pathway. These results highlight how natural variation could be used to generate and test hypotheses about less-studied gene families and networks, especially when paired with biochemical and genetic characterization. ViVa is also readily extensible to databases of interspecific genetic variation in plants as well as other organisms, such as the 3,000 Rice Genomes Project ( http://snp-seek.irri.org/) and human genetic variation ( https://www.ncbi.nlm.nih.gov/clinvar/).","hji,kes",0,0,0,2,0,software,a tool and not descriptive of resource +31444973,CLASTR: The Cellosaurus STR similarity search tool - A precious help for cell line authentication.,"Despite an increased awareness of the problematic of cell line cross-contamination and misidentification, it remains nowadays a major source of erroneous experimental results in biomedical research. To prevent it, researchers are expected to frequently test the authenticity of the cell lines they are working on. STR profiling was selected as the international reference method to perform cell line authentication. While the experimental protocols and manipulations for generating a STR profile are well described, the available tools and workflows to analyze such data are lacking. The Cellosaurus knowledge resource aimed to improve the situation by compiling all the publicly available STR profiles from the literature and other databases. As a result, it grew to become the largest database in terms of human STR profiles, with 6,474 distinct cell lines having an associated STR profile (release July 31, 2019). Here we present CLASTR, the Cellosaurus STR similarity search tool enabling users to compare one or more STR profiles with those available in the Cellosaurus cell line knowledge resource. It aims to help researchers in the process of cell line authentication by providing numerous functionalities. The tool is publicly accessible on the SIB ExPASy server (https://web.expasy.org/cellosaurus-str-search) and its source code is available on GitHub under the GPL-3.0 license.","hji,kes",0,0,0,2,0,"software, but specific to searching a db",NA +31501868,Diurnal.plant.tools: Comparative Transcriptomic and Co-expression Analyses of Diurnal Gene Expression of the Archaeplastida Kingdom.,"Almost all organisms coordinate some aspects of their biology through the diurnal cycle. Photosynthetic organisms, and plants especially, have established complex programs that coordinate physiological, metabolic and developmental processes with the changing light. The diurnal regulation of the underlying transcriptional processes is observed when groups of functionally related genes (gene modules) are expressed at a specific time of the day. However, studying the diurnal regulation of these gene modules in the plant kingdom was hampered by the large amount of data required for the analyses. To meet this need, we used gene expression data from 17 diurnal studies spanning the whole Archaeplastida kingdom (Plantae kingdom in the broad sense) to make an online diurnal database. We have equipped the database with tools that allow user-friendly cross-species comparisons of gene expression profiles, entire co-expression networks, co-expressed clusters (involved in specific biological processes), time-specific gene expression and others. We exemplify how these tools can be used by studying three important biological questions: (i) the evolution of cell division, (ii) the diurnal control of gene modules in algae and (iii) the conservation of diurnally controlled modules across species. The database is freely available at http://diurnal.plant.tools.","hji,kes",1,1,2,2,1,database and software tools,no notes; reassessed and re-scored - seems to be value add +31504168,CROSSalive: a web server for predicting the in vivo structure of RNA molecules.,"MOTIVATION:RNA structure is difficult to predict in vivo due to interactions with enzymes and other molecules. Here we introduce CROSSalive, an algorithm to predict the single- and double-stranded regions of RNAs in vivo using predictions of protein interactions. RESULTS:Trained on icSHAPE data in presence (m6a+) and absence of N6 methyladenosine modification (m6a-), CROSSalive achieves cross-validation accuracies between 0.70 and 0.88 in identifying high-confidence single- and double-stranded regions. The algorithm was applied to the long non-coding RNA Xist (17 900‚Äânt, not present in the training) and shows an Area under the ROC curve of 0.83 in predicting structured regions. AVAILABILITY AND IMPLEMENTATION:CROSSalive webserver is freely accessible at http://service.tartaglialab.com/new_submission/crossalive. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31589313,WASPS: web-assisted symbolic plasmid synteny server.,"

Motivation

Comparative plasmid genome analyses require complex tools, the manipulation of large numbers of sequences and constitute a daunting task for the wet bench experimentalist. Dedicated plasmid databases are sparse, only comprise bacterial plasmids and provide exclusively access to sequence similarity searches.

Results

We have developed Web-Assisted Symbolic Plasmid Synteny (WASPS), a web service granting protein and DNA sequence similarity searches against a database comprising all completely sequenced natural plasmids from bacterial, archaeal and eukaryal origin. This database pre-calculates orthologous protein clustering and enables WASPS to generate fully resolved plasmid synteny maps in real time using internal and user-provided DNA sequences.

Availability and implementation

WASPS queries befit all current browsers such as Firefox, Edge or Safari while the best functionality is achieved with Chrome. Internet Explorer is not supported. WASPS is freely accessible at https://archaea.i2bc.paris-saclay.fr/wasps/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,"database and software tools; reassessed, data nto directly available","no notes; reassessed and still no - sounds like a service only to me, BLAST against a curated set of genomes" +31598083,Construction of a core collection of eggplant (Solanum melongena L.) based on genome-wide SNP and SSR genotypes.,"A core collection of eggplant (Solanum melongena L.) was developed based on a dataset of genome-wide 831 SNP and 50 SSR genotypes analyzed in 893 accessions of eggplant genetic resources collected in the NARO Genebank using the Core Hunter II program. The 893 accessions were collected worldwide, mainly Asia. Genetic variation and population structure among the 893 eggplant accessions were characterized. The genetic diversity of the Asian accessions, especially the South Asian and Southeast Asian accessions, forming the center of diversity in eggplant, was higher than that of the other regions. The resulting core collection, World Eggplant Core (WEC) collection consisted of 100 accessions basically collected from the high genetic diversity countries. Based on the results of the cluster and STRUCTURE analyses with SNP genotypes, the WEC collection was divided into four clusters (S1-S4). Each cluster corresponds to a geographical group as below, S1; the European, American and African countries, S2; the East Asian countries, S3; the Southeast Asian countries, S4; the South Asian and Southeast Asian countries. The genotype and phenotype data of the WEC collection are available from the VegMarks database (https://vegmarks.nivot.affrc.go.jp/resource/), and seed samples are available from the NARO Genebank (https://www.gene.affrc.go.jp/databases-core_collections.php).","hji,kes",0,0,0,2,0,database is external and contains other data,not descriptive of resource +31647521,gep2pep: a Bioconductor package for the creation and analysis of pathway-based expression profiles.,"Summary: Pathway-based expression profiles allow for high-level interpretation of transcriptomic data and systematic comparison of dysregulated cellular programs. We have previously demonstrated the efficacy of pathway-based approaches with two different applications: the Drug Set Enrichment Analysis and the Gene2drug analysis. Here we present a software tool that allows to easily convert gene-based profiles to pathway-based profiles and analyze them within the popular R framework. We also provide pre-computed profiles derived from the original Connectivity Map and its next generation release, i.e. the LINCS database.

Availability and implementation: the tool is implemented as the R/Bioconductor package gep2pep and can be freely downloaded from https://bioconductor.org/packages/gep2pep.

Supplementary information: Supplementary data are available at http://dsea.tigem.it/lincs.","hji,kes",0,0,0,2,0,software,NA +31664036,Prenatal hypoxia-induced epigenomic and transcriptomic reprogramming in rat fetal and adult offspring hearts.,"The molecular mechanism of antenatal hypoxia impacting on fetal heart development and elevated risk of heart disease of adult offspring is poorly understood. We present a dataset integrating DNA methylome and transcriptome analyses of antenatal hypoxia affecting rat fetal and adult offspring hearts to understand hypoxia-mediated epigenomic reprogramming of the heart development. We showed that antenatal hypoxia not only induced DNA methylomic and transcriptomic changes in the fetal hearts, but also had a delayed and lasting effect on the adult offspring hearts. Of interest, antenatal hypoxia induced opposite changes in DNA methylation patterns in fetal and adult hearts, with a hypermethylation in the fetus and a hypomethylation in the adult. An extensive preprocessing, quality assessment, and downstream data analyses were performed on the genomic dataset so that the research community may take advantage of the public resource. These dataset could be exploited as a comprehensive resource for understanding fetal hypoxia-mediated epigenetic reprogramming in the heart development and further developmental programming of heart vulnerability to disease later in life.Figshare doi: https://doi.org/10.6084/m9.figshare.9948572.","hji,kes",0,0,0,2,0,NA,NA +31681951,GenCLiP 3: mining human genes' functions and regulatory networks from PubMed based on co-occurrences and natural language processing.,"SUMMARY:We present a web server, GenCLiP 3, which is an updated version of GenCLiP 2.0 to enhance analysis of human gene functions and regulatory networks, with the following improvements: i) accurate recognition of molecular interactions with polarity and directionality from the entire PubMed database; ii) support for Boolean search to customize multiple-term search and to quickly retrieve function related genes; iii) strengthened association between gene and keyword by a new scoring method; and iv) daily updates following literature release at PubMed FTP. AVAILABILITY:The server is freely available for academic use at: http://ci.smu.edu.cn/genclip3/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31688940,Tripal MapViewer: A tool for interactive visualization and comparison of genetic maps.,"Tripal is an open-source, resource-efficient toolkit for construction of genomic, genetic and breeding databases. It facilitates development of biological websites by providing tools to integrate and display biological data using the generic database schema, Chado, together with Drupal, a popular website creation and content management system. Tripal MapViewer is a new interactive tool for visualizing genetic map data. Developed as a Tripal replacement for Comparative Map Viewer (CMap), it enables visualization of entire maps or linkage groups and features such as molecular markers, quantitative trait loci (QTLs) and heritable phenotypic markers. It also provides graphical comparison of maps sharing the same markers as well as dot plot and correspondence matrices. MapViewer integrates directly with the Tripal application programming interface framework, improving data searching capability and providing a more seamless experience for site visitors. The Tripal MapViewer interface can be integrated in any Tripal map page and linked from any Tripal page for markers, QTLs, heritable morphological markers or genes. Configuration of the display is available through a control panel and the administration interface. The administration interface also allows configuration of the custom database query for building materialized views, providing better performance and flexibility in the way data is stored in the Chado database schema. MapViewer is implemented with the D3.js technology and is currently being used at the Genome Database for Rosaceae (https://www.rosaceae.org), CottonGen (https://www.cottongen.org), Citrus Genome Database (https://citrusgenomedb.org), Vaccinium Genome Database (https://www.vaccinium.org) and Cool Season Food Legume Database (https://www.coolseasonfoodlegume.org). It is also currently in development on the Hardwood Genomics Web (https://hardwoodgenomics.org) and TreeGenes (https://treegenesdb.org). Database URL: https://gitlab.com/mainlabwsu/tripal_map.","hji,kes",0,0,0,2,0,software,NA +31697319,Critical evaluation of web-based prediction tools for human protein subcellular localization.,"Human protein subcellular localization has an important research value in biological processes, also in elucidating protein functions and identifying drug targets. Over the past decade, a number of protein subcellular localization prediction tools have been designed and made freely available online. The purpose of this paper is to summarize the progress of research on the subcellular localization of human proteins in recent years, including commonly used data sets proposed by the predecessors and the performance of all selected prediction tools against the same benchmark data set. We carry out a systematic evaluation of several publicly available subcellular localization prediction methods on various benchmark data sets. Among them, we find that mLASSO-Hum and pLoc-mHum provide a statistically significant improvement in performance, as measured by the value of accuracy, relative to the other methods. Meanwhile, we build a new data set using the latest version of Uniprot database and construct a new GO-based prediction method HumLoc-LBCI in this paper. Then, we test all selected prediction tools on the new data set. Finally, we discuss the possible development directions of human protein subcellular localization. Availability: The codes and data are available from http://www.lbci.cn/syn/.","hji,kes",0,0,0,2,0,NA,NA +31742321,KofamKOALA: KEGG Ortholog assignment based on profile HMM and adaptive score threshold.,"

Summary

KofamKOALA is a web server to assign KEGG Orthologs (KOs) to protein sequences by homology search against a database of profile hidden Markov models (KOfam) with pre-computed adaptive score thresholds. KofamKOALA is faster than existing KO assignment tools with its accuracy being comparable to the best performing tools. Function annotation by KofamKOALA helps linking genes to KEGG resources such as the KEGG pathway maps and facilitates molecular network reconstruction.

Availability and implementation

KofamKOALA, KofamScan and KOfam are freely available from GenomeNet (https://www.genome.jp/tools/kofamkoala/).

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31782763,genBaRcode: a comprehensive R-package for genetic barcode analysis.,"MOTIVATION:Genetic barcodes have been established as an efficient method to trace clonal progeny of uniquely labeled cells by introducing artificial genetic sequences into the corresponding genomes. The assessment of those sequences relies on next generation sequencing and the subsequent analysis aiming to identify sequences of interest and correctly quantifying their abundance. RESULTS:We developed the genBaRcode package as a toolbox combining the flexibility of digesting next generation sequencing reads with or without a sophisticated barcode structure, with a variety of error-correction approaches and the availability of several types of visualization routines. Furthermore, a graphical user interface was incorporated to allow also less experienced R users package-based analyses. Finally, the provided tool is intended to bridge the gap between generating and analyzing barcode data and thereby supporting the establishment of standardized and reproducible analysis strategies. AVAILABILITY AND IMPLEMENTATION:The genBaRcode package is available at CRAN (https://cran.r-project.org/package=genBaRcode).","hji,kes",0,0,0,2,0,software,NA +31806443,Identification of adverse outcome pathway related to high-density polyethylene microplastics exposure: Caenorhabditis elegans transcription factor RNAi screening and zebrafish study.,"To gain insight into the human health implications of microplastics, in this study, we investigated the possible mechanisms affecting the toxicity of high-density polyethylene (HDPE) in the nematode Caenorhabditis elegans using RNAi screening and a bioinformatics-based unbiased approach. The candidate pathways identified from C. elegans study were also confirmed using vertebrate model, zebrafish, Danio rerio and human relevance was then inferred using Comparative Toxicogenomics Database (CTD) analysis. Prior to evaluating the toxicity, label-free Raman mapping was conducted to investigate whether or not the organisms could uptake HDPE. C. elegans transcription factor RNAi screening results showed that the nucleotide excision repair (NER) and transforming growth factor-beta (TGF-β) signaling pathways were significantly associated with HDPE exposure, which was also confirmed in zebrafish model. Gene-disease interaction analysis using the CTD revealed the possible human health implications of microplastics. Finally, based on this finding, related AOPs were identified from AOP Wiki (http://aopwiki.org), which are ""Peroxisome proliferator-activated receptors γ inactivation leading to lung fibrosis"" and ""AFB1: Mutagenic Mode-of-Action leading to Hepatocellular Carcinoma"". Further studies are needed for the validation of these AOPs with various microplastics.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +31813964,Pathway Tools version 23.0 update: software for pathway/genome informatics and systems biology.,"

Motivation

Biological systems function through dynamic interactions among genes and their products, regulatory circuits and metabolic networks. Our development of the Pathway Tools software was motivated by the need to construct biological knowledge resources that combine these many types of data, and that enable users to find and comprehend data of interest as quickly as possible through query and visualization tools. Further, we sought to support the development of metabolic flux models from pathway databases, and to use pathway information to leverage the interpretation of high-throughput data sets.

Results

In the past 4 years we have enhanced the already extensive Pathway Tools software in several respects. It can now support metabolic-model execution through the Web, it provides a more accurate gap filler for metabolic models; it supports development of models for organism communities distributed across a spatial grid; and model results may be visualized graphically. Pathway Tools supports several new omics-data analysis tools including the Omics Dashboard, multi-pathway diagrams called pathway collages, a pathway-covering algorithm for metabolomics data analysis and an algorithm for generating mechanistic explanations of multi-omics data. We have also improved the core pathway/genome databases management capabilities of the software, providing new multi-organism search tools for organism communities, improved graphics rendering, faster performance and re-designed gene and metabolite pages.

Availability

The software is free for academic use; a fee is required for commercial use. See http://pathwaytools.com.

Contact

pkarp@ai.sri.com.

Supplementary information

Supplementary data are available at Briefings in Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31821408,psSubpathway: a software package for flexible identification of phenotype-specific subpathways in cancer progression.,"SUMMARY:Subpathways, which are defined as local gene subregions within a biological pathway, have been reported to be associated with the occurrence and development of cancer. The recent subpathway identification tools generally identify differentially expressed subpathways between normal and cancer samples. psSubpathway is a novel systems biology R-based software package that enables flexible identification of phenotype-specific subpathways in a cancer dataset with multiple categories (such as multiple subtypes and developmental stages of cancer). The operation modes include extraction of subpathways from pathway networks, inference with subpathway activities in the context of gene expression data, identification of subtype-specific subpathways, identification of dynamic-changed subpathways associated with the cancer developmental stage and visualization of subpathway activities of samples in different phenotypes. Its capabilities enable psSubpathway to find specific abnormal subpathways in the datasets with multi-phenotype categories and to fill the gaps in the recent tools. psSubpathway may identify more specific biomarkers to facilitate the development of tailored treatment for patients with cancer. AVAILABILITY AND IMPLEMENTATION:The package is implemented in R and available under GPL-2 license from the CRAN website (https://cran.r-project.org/web/packages/psSubpathway/). SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31829207,Genepanel.iobio - an easy to use web tool for generating disease- and phenotype-associated gene lists.,"When ordering genetic testing or triaging candidate variants in exome and genome sequencing studies, it is critical to generate and test a comprehensive list of candidate genes that succinctly describe the complete and objective phenotypic features of disease. Significant efforts have been made to curate gene:disease associations both in academic research and commercial genetic testing laboratory settings. However, many of these valuable resources exist as islands and must be used independently, generating static, single-resource gene:disease association lists. Here we describe genepanel.iobio (https://genepanel.iobio.io) an easy to use, free and open-source web tool for generating disease- and phenotype-associated gene lists from multiple gene:disease association resources, including the NCBI Genetic Testing Registry (GTR), Phenolyzer, and the Human Phenotype Ontology (HPO). We demonstrate the utility of genepanel.iobio by applying it to complex, rare and undiagnosed disease cases that had reached a diagnostic conclusion. We find that genepanel.iobio is able to correctly prioritize the gene containing the diagnostic variant in roughly half of these challenging cases. Importantly, each component resource contributed diagnostic value, showing the benefits of this aggregate approach. We expect genepanel.iobio will improve the ease and diagnostic value of generating gene:disease association lists for genetic test ordering and whole genome or exome sequencing variant prioritization.","hji,kes",0,0,0,2,0,software,NA +31830251,FFLtool: a web server for transcription factor and miRNA feed forward loop analysis in human.,"SUMMARY:Transcription factors (TFs) and microRNAs (miRNAs) are two kinds of important regulators for transcriptional and post-transcriptional regulations. Understanding cross-talks between the two regulators and their targets is critical to reveal complex molecular regulatory mechanisms. Here, we developed FFLtool, a web server for detecting potential feed forward loop (FFL) of TF-miRNA-target regulation in human. In FFLtool, we integrated comprehensive regulations of TF-target and miRNA-target, and developed two functional modules: (i) The 'FFL Analysis' module can detect potential FFLs and internal regulatory networks in a user-defined gene set. FFLtool also provides three levels of evidence to illustrate the reliability for each FFL and enrichment functions for co-target genes of the same TF and miRNA; (ii) The 'Browse FFLs' module displays FFLs comprised of differentially or specifically expressed TFs and miRNAs and their target genes in cancers. FFLtool is a valuable resource for investigating gene expression regulation and mechanism study in biological processes and diseases. AVAILABILITY AND IMPLEMENTATION:FFLtool is available on http://bioinfo.life.hust.edu.cn/FFLtool/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31834361,MLDSP-GUI: an alignment-free standalone tool with an interactive graphical user interface for DNA sequence comparison and analysis.,"SUMMARY:Machine Learning with Digital Signal Processing and Graphical User Interface (MLDSP-GUI) is an open-source, alignment-free, ultrafast, computationally lightweight, and standalone software tool with an interactive GUI for comparison and analysis of DNA sequences. MLDSP-GUI is a general-purpose tool that can be used for a variety of applications such as taxonomic classification, disease classification, virus subtype classification, evolutionary analyses, among others. AVAILABILITY AND IMPLEMENTATION:MLDSP-GUI is open-source, cross-platform compatible, and is available under the terms of the Creative Commons Attribution 4.0 International license (http://creativecommons.org/licenses/by/4.0/). The executable and dataset files are available at https://sourceforge.net/projects/mldsp-gui/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31860075,iMIRAGE: an R package to impute microRNA expression using protein-coding genes.,"

Summary

MicroRNAs (miRNAs) are critical post-transcriptional regulators of gene expression. Due to challenges in accurate profiling of small RNAs, a vast majority of public transcriptome datasets lack reliable miRNA profiles. However, the biological consequence of miRNA activity in the form of altered protein-coding gene (PCG) expression can be captured using machine-learning algorithms. Here, we present iMIRAGE (imputed miRNA activity from gene expression), a convenient tool to predict miRNA expression using PCG expression of the test datasets. The iMIRAGE package provides an integrated workflow for normalization and transformation of miRNA and PCG expression data, along with the option to utilize predicted miRNA targets to impute miRNA activity from independent test PCG datasets.

Availability and implementation

The iMIRAGE package for R, along with package documentation and vignette, is available at https://aritronath.github.io/iMIRAGE/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31874630,wtest: an integrated R package for genetic epistasis testing.,"

Background

With the increasing amount of high-throughput genomic sequencing data, there is a growing demand for a robust and flexible tool to perform interaction analysis. The identification of SNP-SNP, SNP-CpG, and higher order interactions helps explain the genetic etiology of human diseases, yet genome-wide analysis for interactions has been very challenging, due to the computational burden and a lack of statistical power in most datasets.

Results

The wtest R package performs association testing for main effects, pairwise and high order interactions in genome-wide association study data, and cis-regulation of SNP and CpG sites in genome-wide and epigenome-wide data. The software includes a number of post-test diagnostic and analysis functions and offers an integrated toolset for genetic epistasis testing.

Conclusions

The wtest is an efficient and powerful statistical tool for integrated genetic epistasis testing. The package is available in CRAN: https://CRAN.R-project.org/package=wtest.","hji,kes",0,0,0,2,0,software,NA +31882993,ShinyGO: a graphical gene-set enrichment tool for animals and plants.,"

Motivation

Gene lists are routinely produced from various omic studies. Enrichment analysis can link these gene lists with underlying molecular pathways and functional categories such as gene ontology (GO) and other databases.

Results

To complement existing tools, we developed ShinyGO based on a large annotation database derived from Ensembl and STRING-db for 59 plant, 256 animal, 115 archeal and 1678 bacterial species. ShinyGO's novel features include graphical visualization of enrichment results and gene characteristics, and application program interface access to KEGG and STRING for the retrieval of pathway diagrams and protein-protein interaction networks. ShinyGO is an intuitive, graphical web application that can help researchers gain actionable insights from gene-sets.

Availability and implementation

http://ge-lab.org/go/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31883004,qgg: an R package for large-scale quantitative genetic analyses.,"SUMMARY:Here, we present the R package qgg, which provides an environment for large-scale genetic analyses of quantitative traits and diseases. The qgg package provides an infrastructure for efficient processing of large-scale genetic data and functions for estimating genetic parameters, and performing single and multiple marker association analyses and genomic-based predictions of phenotypes. AVAILABILITY AND IMPLEMENTATION:The qgg package is freely available. For the latest updates, user guides and example scripts, consult the main page http://psoerensen.github.io/qgg. The current release is available from CRAN (https://CRAN.R-project.org/package=qgg) for all major operating systems. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31926012,LigRMSD: a web server for automatic structure matching and RMSD calculations among identical and similar compounds in protein-ligand docking.,"MOTIVATION:Root mean square deviation (RMSD) is one of the most useful and straightforward features for structural comparison between different conformations of the same molecule. Commonly, protein-ligand docking programs have included some utilities that allow the calculation of this value; however, they only work efficiently when exists a complete atom label equivalence between the evaluated conformations. RESULTS:We present LigRMSD, a free web-server for the automatic matching and RMSD calculations among identical or similar chemical compounds. This server allows the user to submit only a pair of identical or similar molecules or dataset of similar compounds to compare their three-dimensional conformations. AVAILABILITY AND IMPLEMENTATION:LigRMSD can be freely accessed at https://ligrmsd.appsbio.utalca.cl. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31930403,The Protein Imager: a full-featured online molecular viewer interface with server-side HQ-rendering capabilities.,"SUMMARY:Molecular viewers' long learning curve is hindering researchers in approaching the field of structural biology for the first time. Herein, we present 'The Protein Imager', a lightweight, powerful and easy-to-use interface as a next-gen online molecular viewer. Furthermore, the interface is linked to an automated server-side rendering system able to generate publication-quality molecular illustrations. The Protein Imager interface has been designed for easy usage for beginners and experts in the field alike. The interface allows the preparation of very complex molecular views maintaining a high level of responsiveness even on mobile devices. AVAILABILITY AND IMPLEMENTATION:The Protein Imager interface is freely available online at https://3dproteinimaging.com/protein-imager. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31950986,3D-Cell-Annotator: an open-source active surface tool for single-cell segmentation in 3D microscopy images.,"SUMMARY:Segmentation of single cells in microscopy images is one of the major challenges in computational biology. It is the first step of most bioimage analysis tasks, and essential to create training sets for more advanced deep learning approaches. Here, we propose 3D-Cell-Annotator to solve this task using 3D active surfaces together with shape descriptors as prior information in a semi-automated fashion. The software uses the convenient 3D interface of the widely used Medical Imaging Interaction Toolkit (MITK). Results on 3D biological structures (e.g. spheroids, organoids and embryos) show that the precision of the segmentation reaches the level of a human expert. AVAILABILITY AND IMPLEMENTATION:3D-Cell-Annotator is implemented in CUDA/C++ as a patch for the segmentation module of MITK. The 3D-Cell-Annotator enabled MITK distribution can be downloaded at: www.3D-cell-annotator.org. It works under Windows 64-bit systems and recent Linux distributions even on a consumer level laptop with a CUDA-enabled video card using recent NVIDIA drivers. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +31991337,FrogAncestryCalc: A standalone batch likelihood computation tool for ancestry inference panels catalogued in FROG-kb.,"The web-based application, FROG-kb (the Forensic Resource/Reference on Genetics-knowledge base, https://frog.med.yale.edu) supports the use of Single Nucleotide Polymorphisms (SNPs) for individual identification and ancestry inference in a forensic setting. The primary functionality provided by FROG-kb on the web is computation of relative likelihoods of populations being the origin of an individual, utilizing the underlying reference population allele frequency data curated and organized in ALFRED, the ALlele FREquency Database (https://alfred.med.yale.edu/). Here we present a downloadable stand-alone tool, FrogAncestryCalc that can simultaneously compute population likelihoods for multiple individuals for a selected panel of SNPs. The program calculates for a given Ancestry Inference (AI) panel the probability of each individual's genotype profile arising in each of the reference populations. Five of the AI panels catalogued in FROG-kb are implemented in this version of FrogAncestryCalc.","hji,kes",0,0,0,2,0,references other db,abstract refs dbs but the paper itself is for a tool +32006275,Comparative Protein Structure Analysis with Bio3D-Web.,"Bio3D-web is an online application for the interactive analysis of sequence-structure-dynamics relationships in user-defined protein structure sets. Major functionality includes structure database searching, sequence and structure conservation assessment, inter-conformer relationship mapping and clustering with principal component analysis (PCA), and flexibility prediction and comparison with ensemble normal mode analysis (eNMA). Collectively these methods allow users to start with a single sequence or structure and characterize the structural, conformational, and internal dynamic properties of homologous proteins for which there are high-resolution structures available. Functionality is also provided for the generation of custom PDF, Word, and HTML analysis reports detailing all user-specified analysis settings and corresponding results. Bio3D-web is available at http://thegrantlab.org/bio3d/webapps , as a Docker image https://hub.docker.com/r/bio3d/bio3d-web/ , or downloadable source code https://bitbucket.org/Grantlab/bio3d-web .","hji,kes",0,0,0,2,0,software,NA +32022785,Therapeutic Drug Monitoring of Asparaginase: Intra-individual Variability and Predictivity in Children With Acute Lymphoblastic Leukemia Treated With PEG-Asparaginase in the AIEOP-BFM Acute Lymphoblastic Leukemia 2009 Study.,"

Background

Therapeutic drug monitoring (TDM) can identify patients with subtherapeutic asparaginase (ASNase) activity [silent inactivation (SI)] and prospectively guide therapeutic adaptation. However, limited intra-individual variability is a precondition for targeted dosing and the diagnosis of SI.

Methods

In the AIEOP-BFM acute lymphoblastic leukemia (ALL) 2009 trial, 2771 children with ALL were included and underwent ASNase-TDM in a central laboratory in Münster. Two biweekly administrations of pegylated ASNase during induction and a third dose during reinduction or the high-risk block, which was administered several weeks later, were monitored. We calculated (1) the incidence of SI; and (2) the predictivity of SI for SI after the subsequent administration. ASNase activities monitored during induction were categorized into percentiles at the respective sampling time points. These percentiles were used to calculate the intra-individual range of percentiles as a surrogate for intrapatient variability and to evaluate the predictivity of ASNase activity for the subsequent administration.

Results

The overall incidence of SI was low (4.9%). The positive predictive value of SI identified by one sample was ≤21%. Confirmation of SI by a second sample indicated a high positive predictive value of 100% for biweekly administrations, but not for administration more than 17 weeks later. Sampling and/or documentation errors were risks for misdiagnosis of SI. High intra-individual variability in ASNase activities, with ranges of percentiles over more than 2 quartiles and low predictivity, was observed in approximately 25% of the patients. These patients were likely to fail dose individualization based on TDM data.

Conclusions

To use TDM as a basis for clinical decisions, standardized clinical procedures are required and high intra-individual variability should be taken into account. Details of the treatment are available in the European Clinical Trials Database at https://www.clinicaltrialsregister.eu/ctr-search/trial/2007-004270-43/DE.","hji,kes",0,0,0,2,0,NA,NA +32058000,TbCAPs: A toolbox for co-activation pattern analysis.,"Functional magnetic resonance imaging provides rich spatio-temporal data of human brain activity during task and rest. Many recent efforts have focussed on characterising dynamics of brain activity. One notable instance is co-activation pattern (CAP) analysis, a frame-wise analytical approach that disentangles the different functional brain networks interacting with a user-defined seed region. While promising applications in various clinical settings have been demonstrated, there is not yet any centralised, publicly accessible resource to facilitate the deployment of the technique. Here, we release a working version of TbCAPs, a new toolbox for CAP analysis, which includes all steps of the analytical pipeline, introduces new methodological developments that build on already existing concepts, and enables a facilitated inspection of CAPs and resulting metrics of brain dynamics. The toolbox is available on a public academic repository at https://c4science.ch/source/CAP_Toolbox.git. In addition, to illustrate the feasibility and usefulness of our pipeline, we describe an application to the study of human cognition. CAPs are constructed from resting-state fMRI using as seed the right dorsolateral prefrontal cortex, and, in a separate sample, we successfully predict a behavioural measure of continuous attentional performance from the metrics of CAP dynamics (R ‚Äã= ‚Äã0.59).","hji,kes",0,0,0,2,0,software,a little questionable as life sci too +32077475,NERDD: a web portal providing access to in silico tools for drug discovery.,"SUMMARY:The New E-Resource for Drug Discovery (NERDD) is a quickly expanding web portal focused on the provision of peer-reviewed in silico tools for drug discovery. NERDD currently hosts tools for predicting the sites of metabolism (FAME) and metabolites (GLORY) of small organic molecules, for flagging compounds that are likely to interfere with biological assays (Hit Dexter), and for identifying natural products and natural product derivatives in large compound collections (NP-Scout). Several additional models and components are currently in development. AVAILABILITY AND IMPLEMENTATION:The NERDD web server is available at https://nerdd.zbh.uni-hamburg.de. Most tools are also available as software packages for local installation.","hji,kes",0,0,0,2,0,NA,"yikes - it's a site with tools on it, not like bio.tools which is a catelogue of tools" +32091591,6mA-Finder: a novel online tool for predicting DNA N6-methyladenine sites in genomes.,"

Motivation

DNA N6-methyladenine (6‚ÄâmA) has recently been found as an essential epigenetic modification, playing its roles in a variety of cellular processes. The abnormal status of DNA 6‚ÄâmA modification has been reported in cancer and other disease. The annotation of 6‚ÄâmA marks in genome is the first crucial step to explore the underlying molecular mechanisms including its regulatory roles.

Results

We present a novel online DNA 6‚ÄâmA site tool, 6‚ÄâmA-Finder, by incorporating seven sequence-derived information and three physicochemical-based features through recursive feature elimination strategy. Our multiple cross-validations indicate the promising accuracy and robustness of our model. 6‚ÄâmA-Finder outperforms its peer tools in general and species-specific 6‚ÄâmA site prediction, suggesting it can provide a useful resource for further experimental investigation of DNA 6‚ÄâmA modification.

Availability and implementation

https://bioinfo.uth.edu/6mA_Finder.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32100154,An Arts on Prescription Programme: Perspectives of the Cultural Institutions.,"Research on Arts on Prescription (AoP) programmes are on the increase and the participants' positive mental health outcomes are well-documented. However, there is insufficient research that considers the participating cultural institutions' perspectives. A qualitative focus group interview was conducted with the participating culture institutions in an AoP project in Denmark. Representatives from seven cultural institutions participated in the interview. The data was transcribed and analysed using Braun and Clark's (Qual Res Psychol 3(77):77-101. https://doi.org/10.1191/1478088706qp063oa , 2006) thematic approach. The cultural institutions were positive about the interdisciplinary collaboration with the Center for Mental Health and benefited from working with groups of people with mental health problems. They considered the collaboration to have encouraged skills development by working with groups that they did not regularly engaged with. If cultural institutions are to engage with the mental health wellbeing agenda then policy-driven initiatives can support collaborations that involve groups of people with mental health problems.","hji,kes",0,0,0,2,0,NA,not life sci +32154836,iATC-FRAKEL: a simple multi-label web server for recognizing anatomical therapeutic chemical classes of drugs with their fingerprints only.,"MOTIVATION:Anatomical therapeutic chemical (ATC) classification system is very important for drug utilization and studies. Correct prediction of the 14 classes in the first level for given drugs is an essential problem for the study on such system. Several multi-label classifiers have been proposed in this regard. However, only two of them provided the web servers and their performance was not very high. On the other hand, although some rest classifiers can provide better performance, they were built based on some prior knowledge on drugs, such as information of chemical-chemical interaction and chemical ontology, leading to limited applications. Furthermore, provided codes of these classifiers are almost inaccessible for pharmacologists. RESULTS:In this study, we built a simple web server, namely iATC-FRAKEL. This web server only required the SMILES format of drugs as input and extracted their fingerprints for making prediction. The performance of the iATC-FRAKEL was much higher than all existing web servers and was comparable to the best multi-label classifier but had much wider applications. Such web server can be visited at http://cie.shmtu.edu.cn/iatc/index. AVAILABILITY AND IMPLEMENTATION:The web server is available at http://cie.shmtu.edu.cn/iatc/index. CONTACT:chen_lei1@163.com. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32156760,"Cohort profile: social well-being and determinants of health study (SWADES), Kerala, India.","

Purpose

In response to the need for more advanced and longitudinal data concerning chronic diseases, behavioural risk factors and social support systems in India, the SWADES (Social Well-being and Determinants of Health Study) was established.

Participants

At baseline, 997 adults aged 30 years and over, living in the semi-urban area were interviewed in their home.

Findings to date

Data collected included self-reports of demographic details, health, depression, morbid conditions and healthcare utilisation, risk factors (physical, behavioural and social) of chronic diseases, common mental disorders, out-of-pocket expenditure, social support network, social cohesion, disability, education and wealth. Objective data for hypertension, diabetes and cognitive function were also collected.

Future plans

The first annual follow-up interviews were completed in 2019; the subsequent annual follow-up will be conducted until 2030. The SWADES data are held at the International Centre for Consortium Research in Social Care (ICRS), Rajagiri College of Social Science, Kerala, India. Procedures for data access, information on collaborations, publications and other details can be found at (http://icrs.in).","hji,kes",0,0,0,2,0,health and demographics data,project site and not life sci +32170928,primirTSS: an R package for identifying cell-specific microRNA transcription start sites.,"SUMMARY:The R/Bioconductor package primirTSS is a fast and convenient tool that allows implementation of the analytical method to identify transcription start sites of microRNAs by integrating ChIP-seq data of H3K4me3 and Pol II. It further ensures the precision by employing the conservation score and sequence features. The tool showed a good performance when using H3K4me3 or Pol II Chip-seq data alone as input, which brings convenience to applications where multiple datasets are hard to acquire. This flexible package is provided with both R-programming interfaces as well as graphical web interfaces. AVAILABILITY AND IMPLEMENTATION:primirTSS is available at: http://bioconductor.org/packages/primirTSS. The documentation of the package including an accompanying tutorial was deposited at: https://bioconductor.org/packages/release/bioc/vignettes/primirTSS/inst/doc/primirTSS.html. CONTACT:jwang@nju.edu.cn. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32176258,M2IA: a web server for microbiome and metabolome integrative analysis.,"MOTIVATION:Microbiome-metabolome association studies have experienced exponential growth for an in-depth understanding of the impact of microbiota on human health over the last decade. However, analyzing the resulting multi-omics data and their correlations remains a significant challenge due to the lack of a comprehensive computational tool that can facilitate data integration and interpretation. In this study, an automated microbiome and metabolome integrative analysis pipeline (M2IA) has been developed to meet the urgent needs for tools that can effectively integrate microbiome and metabolome data to derive biological insights. RESULTS:M2IA streamlines the integrative data analysis between metabolome and microbiome, from data preprocessing, univariate and multivariate statistical analyses, advanced functional analysis for biological interpretation, to a summary report. The functionality of M2IA was demonstrated using TwinsUK cohort datasets consisting of 1116 fecal metabolites and 16s rRNA microbiome from 786 individuals. Moreover, two important metabolic pathways, i.e. benzoate degradation and phosphotransferase system, were identified to be closely associated with obesity. AVAILABILITY AND IMPLEMENTATION:M2IA is public available at http://m2ia.met-bioinformatics.cn. CONTACT:yanni617@zju.edu.cn or fjf68@zju.edu.cn. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32219387,debCAM: a bioconductor R package for fully unsupervised deconvolution of complex tissues.,"

Summary

We develop a fully unsupervised deconvolution method to dissect complex tissues into molecularly distinctive tissue or cell subtypes based on bulk expression profiles. We implement an R package, deconvolution by Convex Analysis of Mixtures (debCAM) that can automatically detect tissue/cell-specific markers, determine the number of constituent subtypes, calculate subtype proportions in individual samples and estimate tissue/cell-specific expression profiles. We demonstrate the performance and biomedical utility of debCAM on gene expression, methylation, proteomics and imaging data. With enhanced data preprocessing and prior knowledge incorporation, debCAM software tool will allow biologists to perform a more comprehensive and unbiased characterization of tissue remodeling in many biomedical contexts.

Availability and implementation

http://bioconductor.org/packages/debCAM.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32241255,EpiMOLAS: an intuitive web-based framework for genome-wide DNA methylation analysis.,"

Background

DNA methylation is a crucial epigenomic mechanism in various biological processes. Using whole-genome bisulfite sequencing (WGBS) technology, methylated cytosine sites can be revealed at the single nucleotide level. However, the WGBS data analysis process is usually complicated and challenging.

Results

To alleviate the associated difficulties, we integrated the WGBS data processing steps and downstream analysis into a two-phase approach. First, we set up the required tools in Galaxy and developed workflows to calculate the methylation level from raw WGBS data and generate a methylation status summary, the mtable. This computation environment is wrapped into the Docker container image DocMethyl, which allows users to rapidly deploy an executable environment without tedious software installation and library dependency problems. Next, the mtable files were uploaded to the web server EpiMOLAS_web to link with the gene annotation databases that enable rapid data retrieval and analyses.

Conclusion

To our knowledge, the EpiMOLAS framework, consisting of DocMethyl and EpiMOLAS_web, is the first approach to include containerization technology and a web-based system for WGBS data analysis from raw data processing to downstream analysis. EpiMOLAS will help users cope with their WGBS data and also conduct reproducible analyses of publicly available data, thereby gaining insights into the mechanisms underlying complex biological phenomenon. The Galaxy Docker image DocMethyl is available at https://hub.docker.com/r/lsbnb/docmethyl/. EpiMOLAS_web is publicly accessible at http://symbiosis.iis.sinica.edu.tw/epimolas/.","hji,kes",0,0,0,2,0,software,NA +32243271,Identification of key genes and pathways associated with topotecan treatment using multiple bioinformatics tools.,"

Background

The goal of this study is to determine critical genes and pathways associated with topotecan using publicly accessible bioinformatics tools.

Methods

Topotecan signatures were downloaded from the Library of Integrated Network-Based Cellular Signatures (LINCS) database (http://www.ilincs.org/ilincs/). Differentially expressed genes (DEGs) were defined as genes that appeared at least three times with p values <0.05 and a fold change of ‚â•50% (|log2FC| ‚â• 0.58). Hub genes were identified by evaluating the following parameters using a protein-protein interaction network: node degrees, betweenness, and eigenfactor scores. Hub genes and the top-40 DEGs by |log2FC| were used to generate a Venn diagram, and key genes were identified. Functional and pathway enrichment analysis was performed using the Kyoto Encyclopedia of Genes and Genomes (KEGG) databases. Information on ovarian cancer patients derived from The Cancer Genome Atlas (TCGA) database was analyzed, and the effect of topotecan on the protein expression was examined by Western blotting.

Results

Eleven topotecan signatures were downloaded, and 65 upregulated and 87 downregulated DEGs were identified. Twenty-one hub genes were identified. We identified eight key genes as upregulated genes, including NFKBIA, IKBKB, GADD45A, CDKN1A, and HIST2H2BE, while EZH2, CDC20, and CDK7 were identified as downregulated genes, which play critical roles in the cell cycle and carcinogenesis in KEGG analysis. In the TCGA analysis, the CDKN1A+/EZH2- group had the longest median survival, while the CDKN1A-/EZH2+ group had the shortest median survival. Topotecan-treated murine ovarian (MOSEC), colorectal (CT26), and lung (LLC) cancer cell lines displayed upregulated CDKN1A encoding p21 and downregulated Ezh2.

Conclusion

Using publicly accessible bioinformatics tools, we evaluated key genes and pathways related to topotecan and examined the key genes using the TCGA database and in vitro studies.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +32270255,Major depressive disorder and cardiometabolic diseases: a bidirectional Mendelian randomisation study.,"AIMS/HYPOTHESIS:Observational studies have shown a bidirectional association between major depressive disorder (MDD) and cardiometabolic diseases. We conducted a two-sample bidirectional Mendelian randomisation (MR) study to assess the causal associations of MDD with type 2 diabetes, coronary artery disease (CAD) and heart failure and vice versa. METHODS:We extracted summary-level data for MDD, type 2 diabetes, CAD and heart failure from corresponding published large genome-wide association studies of individuals mainly of European-descent. In total, 96 SNPs for MDD, 202 SNPs for type 2 diabetes, 44 SNPs for CAD and 12 SNPs for heart failure were proposed as instrumental variables at the genome-wide significance level (p‚Äâ<‚Äâ5‚Äâ√ó‚Äâ10-8). The random-effects inverse-variance weighted method was used for the main analyses. RESULTS:Genetic liability to MDD was significantly associated with type 2 diabetes and CAD at the Bonferroni-corrected significance level. The ORs of type 2 diabetes and CAD were respectively 1.26 (95% CI 1.10, 1.43; p‚Äâ=‚Äâ6‚Äâ√ó‚Äâ10-4) and 1.16 (95% CI 1.05, 1.29; p‚Äâ=‚Äâ0.0047) per one-unit increase in loge odds of MDD. There was a suggestive association between MDD and heart failure (OR 1.11 [95% CI 1.01, 1.21]; p‚Äâ=‚Äâ0.033). We found limited evidence supporting causal effects of cardiometabolic diseases on MDD risk in the reverse MR analyses. CONCLUSIONS/INTERPRETATION:The present study strengthened the evidence that MDD is a potential risk factor for type 2 diabetes and CAD. Whether MDD is causally related to heart failure needs further study. DATA AVAILABILITY:All data included in this study were uploaded as supplements and are also publicly available through published GWASs and open GWAS datasets (UK Biobank, 23andMe and Psychiatric Genomics: https://datashare.is.ed.ac.uk/handle/10283/3203; DIAGRAM: http://diagram-consortium.org/downloads.html; CARDIoGRAMplusCD4: www.cardiogramplusc4d.org/; HERMES: http://www.kp4cd.org/datasets/mi). Graphical abstract.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +32271863,gplas: a comprehensive tool for plasmid analysis using short-read graphs.,"

Summary

Plasmids can horizontally transmit genetic traits, enabling rapid bacterial adaptation to new environments and hosts. Short-read whole-genome sequencing data are often applied to large-scale bacterial comparative genomics projects but the reconstruction of plasmids from these data is facing severe limitations, such as the inability to distinguish plasmids from each other in a bacterial genome. We developed gplas, a new approach to reliably separate plasmid contigs into discrete components using sequence composition, coverage, assembly graph information and network partitioning based on a pruned network of plasmid unitigs. Gplas facilitates the analysis of large numbers of bacterial isolates and allows a detailed analysis of plasmid epidemiology based solely on short-read sequence data.

Availability and implementation

Gplas is written in R, Bash and uses a Snakemake pipeline as a workflow management system. Gplas is available under the GNU General Public License v3.0 at https://gitlab.com/sirarredondo/gplas.git.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32271876,MODifieR: an Ensemble R Package for Inference of Disease Modules from Transcriptomics Networks.,"

Motivation

Complex diseases are due to the dense interactions of many disease-associated factors that dysregulate genes that in turn form the so-called disease modules, which have shown to be a powerful concept for understanding pathological mechanisms. There exist many disease module inference methods that rely on somewhat different assumptions, but there is still no gold standard or best-performing method. Hence, there is a need for combining these methods to generate robust disease modules.

Results

We developed MODule IdentiFIER (MODifieR), an ensemble R package of nine disease module inference methods from transcriptomics networks. MODifieR uses standardized input and output allowing the possibility to combine individual modules generated from these methods into more robust disease-specific modules, contributing to a better understanding of complex diseases.

Availability and implementation

MODifieR is available under the GNU GPL license and can be freely downloaded from https://gitlab.com/Gustafsson-lab/MODifieR and as a Docker image from https://hub.docker.com/r/ddeweerd/modifier.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32286627,rMAPS2: an update of the RNA map analysis and plotting server for alternative splicing regulation.,"The rMAPS2 (RNA Map Analysis and Plotting Server 2) web server, freely available at http://rmaps.cecsresearch.org/, has provided the high-throughput sequencing data research community with curated tools for the identification of RNA binding protein sites. rMAPS2 analyzes differential alternative splicing or CLIP peak data obtained from high-throughput sequencing data analysis tools like MISO, rMATS, Piranha, PIPE-CLIP and PARalyzer, and then, graphically displays enriched RNA-binding protein target sites. The initial release of rMAPS focused only on the most common alternative splicing event, skipped exon or exon skipping. However, there was a high demand for the analysis of other major types of alternative splicing events, especially for retained intron events since this is the most common type of alternative splicing in plants, such as Arabidopsis thaliana. Here, we expanded the implementation of rMAPS2 to facilitate analyses for all five major types of alternative splicing events: skipped exon, mutually exclusive exons, alternative 5' splice site, alternative 3' splice site and retained intron. In addition, by employing multi-threading, rMAPS2 has vastly improved the user experience with significant reductions in running time, ∼3.5 min for the analysis of all five major alternative splicing types at once.","hji,kes",0,0,0,2,0,software,NA +32291734,"The Auditory English Lexicon Project: A multi-talker, multi-region psycholinguistic database of 10,170 spoken words and nonwords.","The Auditory English Lexicon Project (AELP) is a multi-talker, multi-region psycholinguistic database of 10,170 spoken words and 10,170 spoken nonwords. Six tokens of each stimulus were recorded as 44.1-kHz, 16-bit, mono WAV files by native speakers of American, British, and Singapore English, with one from each gender. Intelligibility norms, as determined by average identification scores and confidence ratings from between 15 and 20 responses per token, were obtained from 561 participants. Auditory lexical decision accuracies and latencies, with between 25 and 36 responses per token, were obtained from 438 participants. The database also includes a variety of lexico-semantic variables and structural indices for the words and nonwords, as well as participants' individual difference measures such as age, gender, language background, and proficiency. Taken together, there are a total of 122,040 sound files and over 4 million behavioral data points in the AELP. We describe some of the characteristics of this database. This resource is freely available from a website ( https://inetapps.nus.edu.sg/aelp/ ) hosted by the Department of Psychology at the National University of Singapore.","hji,kes",1,1,2,2,1,auditory and lexical db,not life sci; reassessed and re-scored - based on broaded def of life sci +32299846,Pilot Study of Return of Genetic Results to Patients in Adult Nephrology.,"

Background and objectives

Actionable genetic findings have implications for care of patients with kidney disease, and genetic testing is an emerging tool in nephrology practice. However, there are scarce data regarding best practices for return of results and clinical application of actionable genetic findings for kidney patients.

Design, setting, participants, & measurements

We developed a return of results workflow in collaborations with clinicians for the retrospective recontact of adult nephrology patients who had been recruited into a biobank research study for exome sequencing and were identified to have medically actionable genetic findings.

Results

Using this workflow, we attempted to recontact a diverse pilot cohort of 104 nephrology research participants with actionable genetic findings, encompassing 34 different monogenic etiologies of nephropathy and five single-gene disorders recommended by the American College of Medical Genetics and Genomics for return as medically actionable secondary findings. We successfully recontacted 64 (62%) participants and returned results to 41 (39%) individuals. In each case, the genetic diagnosis had meaningful implications for the patients' nephrology care. Through implementation efforts and qualitative interviews with providers, we identified over 20 key challenges associated with returning results to study participants, and found that physician knowledge gaps in genomics was a recurrent theme. We iteratively addressed these challenges to yield an optimized workflow, which included standardized consultation notes with tailored management recommendations, monthly educational conferences on core topics in genomics, and a curated list of expert clinicians for patients requiring extranephrologic referrals.

Conclusions

Developing the infrastructure to support return of genetic results in nephrology was resource-intensive, but presented potential opportunities for improving patient care.

Podcast

This article contains a podcast at https://www.asn-online.org/media/podcast/CJASN/2020_04_16_12481019.mp3.","hji,kes",0,0,0,2,0,NA,podcast +32312909,Optimizing Resources in Children's Surgical Care: An Update on the American College of Surgeons' Verification Program.,"Surgical procedures are performed in the United States in a wide variety of clinical settings and with variation in clinical outcomes. In May 2012, the Task Force for Children's Surgical Care, an ad hoc multidisciplinary group comprising physicians representing specialties relevant to pediatric perioperative care, was convened to generate recommendations to optimize the delivery of children's surgical care. This group generated a white paper detailing the consensus opinions of the involved experts. Following these initial recommendations, the American College of Surgeons (ACS), Children's Hospital Association, and Task Force for Children's Surgical Care, with input from all related perioperative specialties, developed and published specific and detailed resource and quality standards designed to improve children's surgical care (https://www.facs.org/quality-programs/childrens-surgery/childrens-surgery-verification). In 2015, with the endorsement of the American Academy of Pediatrics (https://pediatrics.aappublications.org/content/135/6/e1538), the ACS established a pilot verification program. In January 2017, after completion of the pilot program, the ACS Children's Surgery Verification Quality Improvement Program was officially launched. Verified sites are listed on the program Web site at https://www.facs.org/quality-programs/childrens-surgery/childrens-surgery-verification/centers, and more than 150 are interested in verification. This report provides an update on the ACS Children's Surgery Verification Quality Improvement Program as it continues to evolve.","hji,kes",0,0,0,2,0,NA,program - medical +32321166,APAlyzer: a bioinformatics package for analysis of alternative polyadenylation isoforms.,"

Summary

Most eukaryotic genes produce alternative polyadenylation (APA) isoforms. APA is dynamically regulated under different growth and differentiation conditions. Here, we present a bioinformatics package, named APAlyzer, for examining 3'UTR APA, intronic APA and gene expression changes using RNA-seq data and annotated polyadenylation sites in the PolyA_DB database. Using APAlyzer and data from the GTEx database, we present APA profiles across human tissues.

Availability and implementation

APAlyzer is freely available at https://bioconductor.org/packages/release/bioc/html/APAlyzer.html as an R/Bioconductor package.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32324845,alona: a web server for single-cell RNA-seq analysis.,"

Summary

Single-cell RNA sequencing (scRNA-seq) is a technology to measure gene expression in single cells. It has enabled discovery of new cell types and established cell type atlases of tissues and organs. The widespread adoption of scRNA-seq has created a need for user-friendly software for data analysis. We have developed a web server, alona that incorporates several of the most popular single-cell analysis algorithms into a flexible pipeline. alona can perform quality filtering, normalization, batch correction, clustering, cell type annotation and differential gene expression analysis. Data are visualized in the web browser using an interface based on JavaScript, allowing the user to query genes of interest and visualize the cluster structure. alona accepts a compressed gene expression matrix and identifies cell clusters with a graph-based clustering strategy. Cell types are identified from a comprehensive collection of marker genes or by specifying a custom set of marker genes.

Availability and implementation

The service runs at https://alona.panglaodb.se and the Python package can be downloaded from https://oscar-franzen.github.io/adobo/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32330167,geoBoundaries: A global database of political administrative boundaries.,"We present the geoBoundaries Global Administrative Database (geoBoundaries): an online, open license resource of the geographic boundaries of political administrative divisions (i.e., state, county). Contrasted to other resources geoBoundaries (1) provides detailed information on the legal open license for every boundary in the repository, and (2) focuses on provisioning highly precise boundary data to support accurate, replicable scientific inquiry. Further, all data is released in a structured form, allowing for the integration of geoBoundaries with large-scale computational workflows. Our database has records for every country around the world, with up to 5 levels of administrative hierarchy. The database is accessible at http://www.geoboundaries.org, and a static version is archived on the Harvard Dataverse.","hji,kes",0,0,0,2,0,non-bio,not life sci +32343964,Longitudinal Competence Programs for Basic Point-of-Care Ultrasound in Critical Care: A Systematic Review.,"

Background

Competence in point-of-care ultrasound (PoCUS) is widely recommended by several critical care societies. Despite numerous introductory short courses, very few doctors attain PoCUS competence because of the challenges in establishing longitudinal competence programs.

Research question

To evaluate the methodologic quality of the literature on basic PoCUS competence processes in critical care.

Study design and methods

A systematic review to identify manuscripts meeting predefined inclusion criteria was performed using three medical databases (PubMed, OVID Embase, and Web of Science); using extra references from original articles, review articles, and expert panel guidelines; and by directly contacting authors for further information if required. The objectives, domains, and inclusion and exclusion criteria of the review were determined during discussions between experienced PoCUS educators. Data extraction and analyses were performed independently by three reviewers.

Results

Of the 5,408 abstracts extracted, 42 met the inclusion criteria for longitudinal PoCUS competence. Each study was described along four broad categories: general information, study design, and trainee characteristics; description of introductory course; description of longitudinal competence program; and grading of overall methodologic quality on a 4-point Likert scale. Thirty-nine studies (92.9%) were from a single center. Most studies lacked important details on study methodology such as prior ultrasound experience, pre- and postcourse tests, models for hands-on sessions, ratio of instructors to trainees, competence assessment criteria, number of scans performed by individual trainees, and formative and summative assessments. The studies were rated as follows: poor = 19 (45.2%), average = 15 (35.7%), good = 4 (9.5%), and excellent = 4 (9.5%).

Interpretation

Ther is very little high-quality evidence on PoCUS competence. To help frame policy guidelines to improve PoCUS education, there is a need for well-designed longitudinal studies on PoCUS competence.

Trial registry

PROSPERO database; No.: CRD42018094033; URL: https://www.crd.york.ac.uk/PROSPERO/.","hji,kes",0,0,0,2,0,NA,NA +32347334,Retrograde intrarenal surgery of renal stones: a critical multi-aspect evaluation of the outcomes by the Turkish Academy of Urology Prospective Study Group (ACUP Study).,"

Aims

To outline and evaluate the incidence, management and follow-up of the residual fragments (RFs) following retrograde intrarenal surgery (RIRS) of renal stones by the Turkish Academy of Urology Prospective Study Group (ACUP Study).

Methods

Following the ethical committee approval, 15 centers providing data regarding the incidence, management, and follow-up of RFs after RIRS were included and all relevant information was recorded into the same electronic database program ( https://acup.uroturk.org.tr/ ) created by Turkish Urology Academy for Residual Stone Study.

Results

A total of 1112 cases underwent RIRS for renal calculi and RFs were observed in 276 cases (24.8%). Of all the parameters evaluated, our results demonstrated no statistically significant relation between preoperative DJ stenting and the presence of RFs (χ2 (1) = 158.418; p = 0.099). RFs were significantly higher in patients treated with UAS (82 patients, 29.3%) during the procedure compared to the cases who did not receive UAS (194 patients, 23.3%) (χ2 (1) = 3.999; p = 0.046). The mean period for a secondary intervention after RIRS was 28.39 (± 12.52) days. Regarding the procedures applied for RF removal, re-RIRS was the most commonly performed approach (56%).

Conclusions

Despite the reported safe and successful outcomes, the incidence of RFs is higher, after the RIRS procedure particularly in cases with relatively larger calculi. Such cases need to be followed in a close manner and although a second flexible ureteroscopy is the treatment of choice for fragment removal in the majority of these patients, shock wave lithotripsy and percutaneous nephrolithotomy may also be preferred in selected cases.","hji,kes",0,0,0,2,0,NA,clinical; not descriptive of resource +32363380,CeRNASeek: an R package for identification and analysis of ceRNA regulation.,"Competitive endogenous RNA (ceRNA) represents a novel layer of gene regulation that controls both physiological and pathological processes. However, there is still lack of computational tools for quickly identifying ceRNA regulation. To address this problem, we presented an R-package, CeRNASeek, which allows identifying and analyzing ceRNA-ceRNA interactions by integration of multiple-omics data. CeRNASeek integrates six widely used computational methods to identify ceRNA-ceRNA interactions, including two global and four context-specific ceRNA regulation prediction methods. In addition, it provides several downstream analyses for predicted ceRNA-ceRNA pairs, including regulatory network analysis, functional annotation and survival analysis. With examples of cancer-related ceRNA prioritization and cancer subtyping, we demonstrate that CeRNASeek is a valuable tool for investigating the function of ceRNAs in complex diseases. In summary, CeRNASeek provides a comprehensive and efficient tool for identifying and analysis of ceRNA regulation. The package is available on the Comprehensive R Archive Network (CRAN) at https://CRAN.R-project.org/package=CeRNASeek.","hji,kes",0,0,0,2,0,software,NA +32367143,"Not just a research method: If used with caution, can job-exposure matrices be a useful tool in the practice of occupational medicine and public health?","The recent editorial by Dr Susan Peters ""Although a valuable method in occupational epidemiology, job-exposure matrices are no magic fix"" ably describes the strengths and limitations of job-exposure matrix (JEM) approaches in occupational epidemiology research (1). In addition to their use in research, we would like to add that JEM may also be of use in compensation and surveillance efforts in occupational health. JEM could assist the compensation process by supporting the assessment of relevant exposures related to specific health conditions (2). The potential usefulness of a JEM as a decision tool for compensation of work-related musculoskeletal disorders has been examined (3). Because occupational diseases are often under-recognized, another practical application is using a JEM to screen for occupational exposures as part of health surveillance. Use of JEM to screen for asbestos and wood dust exposure in the clinical setting has shown promising results (4-6). By summarizing multiple exposures at a job level (7), JEM may also assist policy-makers in setting priorities for hazards and controls at work, as well as occupational practitioners to target prevention efforts and direct the conduct of more precise exposure measures to particular jobs. Sharing JEM across different countries may be useful in providing estimates of exposures across larger populations to calculate global burden of disease related to occupational exposure. The JEMINI (JEM InterNatIonal) initiative was launched to explore the possibility of developing international JEM that could be used across countries (8). Beginning with physical (biomechanical) exposures, this open group has started homogenizing job coding systems and comparing some available JEM. Estimating differences in the level of exposure between countries will require much more work, without guaranteed success. As Peters mentioned, many limitations exist in the use of JEM. Users of JEM must consider the source of exposure data - expert assessments, data collected from individual workers, or environmental sampling. The coding of occupations is time consuming and can introduce error (9), and more testing of and comparison with automated job coding systems is needed (10). JEM reflect an ""average"" level of exposure within a job at the expense of individual variation. At population level, JEM can offer a useful estimate of exposures. If used at an individual level in a clinical or compensation setting, JEM cannot replace the professionals involved in exposure assessment but may help them focus their action more effectively on complex situations that require their expertise. In conclusion, these JEM developed for research might also be used as a public health tool, provided that their limitations are properly taken into account. References 1. Peters S. Although a valuable method in occupational epidemiology, job-exposure matrices are no magic fix. Scand J Work Environ Health 2020;46:2314. https://doi.org/10.5271/sjweh.3894 2. Kerbrat J, Descatha A. (The recognition of health consequences of difficult working conditions in France and its evaluation with the use of a job-exposure matrix). Arch Mal Prof Environ. 2018;79:493500. https://doi.org/10.1016/j.admp.2017.12.001 3. Fadel M, Valter R, Quignette A, Descatha A. Usefulness of a job-exposure matrix « MADE » as a decision tool for compensation of work-related musculoskeletal disorders. Eur J Public Health 2019;29:86870. https://doi.org/10.1093/eurpub/cky274 4. Lorentz E, Despreaux T, Quignette A, Chinet T, Descatha A. (Screening of occupational exposure to asbestos and silica by job-exposure matrix among patients with lung cancer and mesothelioma). Rev Mal Respir. 2019;36:108895. https://doi.org/10.1016/j.rmr.2019.08.006 5. Imbernon E, Goldberg M, Spyckerell Y, Steinmetz J, Bonenfant S, Fournier B. (Use of a job-exposure matrix for the screening of occupational exposure to asbestos). Rev Epidemiol Sante Publique 2004;52:717. https://doi.org/10.1016/S0398-7620(04)99018-9 6. Carton M, Bonnaud S, Nachtigal M, Serrano A, Carole C, Bonenfant S, et al. Post-retirement surveillance of workers exposed to asbestos or wood dust: first results of the French national SPIRALE Program. Epidemiol Prev. 2011;35:31523. 7. Guéguen A, Goldberg M, Bonenfant S, Martin JC. Using a representative sample of workers for constructing the SUMEX French general population based job-exposure matrix. Occup Environ Med. 2004;61:58693. https://doi.org/10.1136/oem.2003.010660 8. Descatha A, Evanoff BA, Andersen JH, Fadel M, Ngabirano L, Leclerc A, et al. JEMINI (Job Exposure Matrix InterNatIonal) Initiative: a Utopian Possibility for Helping Occupational Exposure Assessment All Around the World? J Occup Environ Med. 2019;61:e3201. https://doi.org/10.1097/JOM.0000000000001631 9. Petersen SB, Flachs EM, Svendsen SW, Marott JL, Budtz-Jørgensen E, Hansen J, et al. Influence of errors in job codes on job exposure matrix-based exposure assessment in the register-based occupational cohort DOC*X. Scand J Work Environ Health 2020;46:25967. https://doi.org/10.5271/sjweh.3857 10. Buckner-Petty S, Dale AM, Evanoff BA. Efficiency of autocoding programs for converting job descriptors into standard occupational classification (SOC) codes. Am J Ind Med. 2019;62:5968. https://doi.org/10.1002/ajim.22928.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +32383755,ARIAweb: a server for automated NMR structure calculation.,"Nuclear magnetic resonance (NMR) spectroscopy is a method of choice to study the dynamics and determine the atomic structure of macromolecules in solution. The standalone program ARIA (Ambiguous Restraints for Iterative Assignment) for automated assignment of nuclear Overhauser enhancement (NOE) data and structure calculation is well established in the NMR community. To ultimately provide a perfectly transparent and easy to use service, we designed an online user interface to ARIA with additional functionalities. Data conversion, structure calculation setup and execution, followed by interactive visualization of the generated 3D structures are all integrated in ARIAweb and freely accessible at https://ariaweb.pasteur.fr.","hji,kes",0,0,0,2,0,software,questionable for life sci but is for macromolecules +32394182,LexOPS: An R package and user interface for the controlled generation of word stimuli.,"LexOPS is an R package and user interface designed to facilitate the generation of word stimuli for use in research. Notably, the tool permits the generation of suitably controlled word lists for any user-specified factorial design and can be adapted for use with any language. It features an intuitive graphical user interface, including the visualization of both the distributions within and relationships among variables of interest. An inbuilt database of English words is also provided, including a range of lexical variables commonly used in psycholinguistic research. This article introduces LexOPS, outlining the features of the package and detailing the sources of the inbuilt dataset. We also report a validation analysis, showing that, in comparison to stimuli of existing studies, stimuli optimized with LexOPS generally demonstrate greater constraint and consistency in variable manipulation and control. Current instructions for installing and using LexOPS are available at https://JackEdTaylor.github.io/LexOPSdocs/ .","hji,kes",0,0,0,2,0,software,also linguistics +32421805,IRIS3: integrated cell-type-specific regulon inference server from single-cell RNA-Seq.,"A group of genes controlled as a unit, usually by the same repressor or activator gene, is known as a regulon. The ability to identify active regulons within a specific cell type, i.e., cell-type-specific regulons (CTSR), provides an extraordinary opportunity to pinpoint crucial regulators and target genes responsible for complex diseases. However, the identification of CTSRs from single-cell RNA-Seq (scRNA-Seq) data is computationally challenging. We introduce IRIS3, the first-of-its-kind web server for CTSR inference from scRNA-Seq data for human and mouse. IRIS3 is an easy-to-use server empowered by over 20 functionalities to support comprehensive interpretations and graphical visualizations of identified CTSRs. CTSR data can be used to reliably characterize and distinguish the corresponding cell type from others and can be combined with other computational or experimental analyses for biomedical studies. CTSRs can, therefore, aid in the discovery of major regulatory mechanisms and allow reliable constructions of global transcriptional regulation networks encoded in a specific cell type. The broader impact of IRIS3 includes, but is not limited to, investigation of complex diseases hierarchies and heterogeneity, causal gene regulatory network construction, and drug development. IRIS3 is freely accessible from https://bmbl.bmi.osumc.edu/iris3/ with no login requirement.","hji,kes",0,0,0,2,0,software,submit jobs +32421835,InterPred: a webtool to predict chemical autofluorescence and luminescence interference.,"High-throughput screening (HTS) research programs for drug development or chemical hazard assessment are designed to screen thousands of molecules across hundreds of biological targets or pathways. Most HTS platforms use fluorescence and luminescence technologies, representing more than 70% of the assays in the US Tox21 research consortium. These technologies are subject to interferent signals largely explained by chemicals interacting with light spectrum. This phenomenon results in up to 5-10% of false positive results, depending on the chemical library used. Here, we present the InterPred webserver (version 1.0), a platform to predict such interference chemicals based on the first large-scale chemical screening effort to directly characterize chemical-assay interference, using assays in the Tox21 portfolio specifically designed to measure autofluorescence and luciferase inhibition. InterPred combines 17 quantitative structure activity relationship (QSAR) models built using optimized machine learning techniques and allows users to predict the probability that a new chemical will interfere with different combinations of cellular and technology conditions. InterPred models have been applied to the entire Distributed Structure-Searchable Toxicity (DSSTox) Database (∼800,000 chemicals). The InterPred webserver is available at https://sandbox.ntp.niehs.nih.gov/interferences/.","hji,kes",0,0,0,2,0,software,NA +32427333,webPSN v2.0: a webserver to infer fingerprints of structural communication in biomacromolecules.,"A mixed Protein Structure Network (PSN) and Elastic Network Model-Normal Mode Analysis (ENM-NMA)-based strategy (i.e. PSN-ENM) was developed to investigate structural communication in bio-macromolecules. Protein Structure Graphs (PSGs) are computed on a single structure, whereas information on system dynamics is supplied by ENM-NMA. The approach was implemented in a webserver (webPSN), which was significantly updated herein. The webserver now handles both proteins and nucleic acids and relies on an internal upgradable database of network parameters for ions and small molecules in all PDB structures. Apart from the radical restyle of the server and some changes in the calculation setup, other major novelties concern the possibility to: a) compute the differences in nodes, links, and communication pathways between two structures (i.e. network difference) and b) infer links, hubs, communities, and metapaths from consensus networks computed on a number of structures. These new features are useful to identify commonalties and differences between two different functional states of the same system or structural-communication signatures in homologous or analogous systems. The output analysis relies on 3D-representations, interactive tables and graphs, also available for download. Speed and accuracy make this server suitable to comparatively investigate structural communication in large sets of bio-macromolecular systems. URL: http://webpsn.hpc.unimore.it.","hji,kes",0,0,0,2,0,software,NA +32459325,PaCRISPR: a server for predicting and visualizing anti-CRISPR proteins.,"Anti-CRISPRs are widespread amongst bacteriophage and promote bacteriophage infection by inactivating the bacterial host's CRISPR-Cas defence system. Identifying and characterizing anti-CRISPR proteins opens an avenue to explore and control CRISPR-Cas machineries for the development of new CRISPR-Cas based biotechnological and therapeutic tools. Past studies have identified anti-CRISPRs in several model phage genomes, but a challenge exists to comprehensively screen for anti-CRISPRs accurately and efficiently from genome and metagenome sequence data. Here, we have developed an ensemble learning based predictor, PaCRISPR, to accurately identify anti-CRISPRs from protein datasets derived from genome and metagenome sequencing projects. PaCRISPR employs different types of feature recognition united within an ensemble framework. Extensive cross-validation and independent tests show that PaCRISPR achieves a significantly more accurate performance compared with homology-based baseline predictors and an existing toolkit. The performance of PaCRISPR was further validated in discovering anti-CRISPRs that were not part of the training for PaCRISPR, but which were recently demonstrated to function as anti-CRISPRs for phage infections. Data visualization on anti-CRISPR relationships, highlighting sequence similarity and phylogenetic considerations, is part of the output from the PaCRISPR toolkit, which is freely available at http://pacrispr.erc.monash.edu/.","hji,kes",0,0,0,2,0,software,no but a nice visualization dashboard +32467965,AOP4EUpest: mapping of pesticides in adverse outcome pathways using a text mining tool.,"

Motivation

Exposure to pesticides may lead to adverse health effects in human populations, in particular vulnerable groups. The main long-term health concerns are neurodevelopmental disorders, carcinogenicity as well as endocrine disruption possibly leading to reproductive and metabolic disorders. Adverse outcome pathways (AOP) consist in linear representations of mechanistic perturbations at different levels of the biological organization. Although AOPs are chemical-agnostic, they can provide a better understanding of the Mode of Action of pesticides and can support a rational identification of effect markers.

Results

With the increasing amount of scientific literature and the development of biological databases, investigation of putative links between pesticides, from various chemical groups and AOPs using the biological events present in the AOP-Wiki database is now feasible. To identify co-occurrence between a specific pesticide and a biological event in scientific abstracts from the PubMed database, we used an updated version of the artificial intelligence-based AOP-helpFinder tool. This allowed us to decipher multiple links between the studied substances and molecular initiating events, key events and adverse outcomes. These results were collected, structured and presented in a web application named AOP4EUpest that can support regulatory assessment of the prioritized pesticides and trigger new epidemiological and experimental studies.

Availability and implementation

http://www.biomedicale.parisdescartes.fr/aop4EUpest/home.php.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +32478594,"IMABASE: A new set of 313 colourised line drawings standardised in French for name agreement, image agreement, conceptual familiarity, age-of-acquisition, and imageability.","We provide normative data for a new set of 313 colourised line drawings. The drawings were standardised on name agreement (N = 60 participants), image agreement (N = 34), conceptual familiarity (N = 36), age of acquisition (N = 35), and imageability (N = 35). Objective visual complexity measures are given for the pictures, and objective word frequencies are provided for the modal names of the drawings. Reliability measures for the collected norms are very high. There are high levels of agreement between the names given by the participants and the drawings and comparative analyses indicate that the distribution of name agreement scores is very similar in both our own database and the MultiPic database (Duñabeitia et al., 2018). A novel ""picture-choice task"" used to assess name-image agreement (N = 30) reveals that the great majority of the IMABASE pictures that are also present in MultiPic are rated as providing better pictorial representations of the corresponding concepts. Finally, most of the correlations are comparable with those reported in other normative studies on colourised drawings. The whole set of pictures is freely available from https://leadserv.u-bourgogne.fr/~lead/imabase/ and the norms are available as Supplementary Material.","hji,kes",0,0,0,2,0,NA,not life sci +32479601,CReSCENT: CanceR Single Cell ExpressioN Toolkit.,"CReSCENT: CanceR Single Cell ExpressioN Toolkit (https://crescent.cloud), is an intuitive and scalable web portal incorporating a containerized pipeline execution engine for standardized analysis of single-cell RNA sequencing (scRNA-seq) data. While scRNA-seq data for tumour specimens are readily generated, subsequent analysis requires high-performance computing infrastructure and user expertise to build analysis pipelines and tailor interpretation for cancer biology. CReSCENT uses public data sets and preconfigured pipelines that are accessible to computational biology non-experts and are user-editable to allow optimization, comparison, and reanalysis for specific experiments. Users can also upload their own scRNA-seq data for analysis and results can be kept private or shared with other users.","hji,kes",0,0,0,2,0,software,NA +32491175,Tox21BodyMap: a webtool to map chemical effects on the human body.,"To support rapid chemical toxicity assessment and mechanistic hypothesis generation, here we present an intuitive webtool allowing a user to identify target organs in the human body where a substance is estimated to be more likely to produce effects. This tool, called Tox21BodyMap, incorporates results of 9,270 chemicals tested in the United States federal Tox21 research consortium in 971 high-throughput screening (HTS) assays whose targets were mapped onto human organs using organ-specific gene expression data. Via Tox21BodyMap's interactive tools, users can visualize chemical target specificity by organ system, and implement different filtering criteria by changing gene expression thresholds and activity concentration parameters. Dynamic network representations, data tables, and plots with comprehensive activity summaries across all Tox21 HTS assay targets provide an overall picture of chemical bioactivity. Tox21BodyMap webserver is available at https://sandbox.ntp.niehs.nih.gov/bodymap/.","hji,kes",0,0,0,2,0,software,Chuck Check - discussion - webserver - N +32496546,SNPnexus: a web server for functional annotation of human genome sequence variation (2020 update).,"SNPnexus is a web-based annotation tool for the analysis and interpretation of both known and novel sequencing variations. Since its last release, SNPnexus has received continual updates to expand the range and depth of annotations provided. SNPnexus has undergone a complete overhaul of the underlying infrastructure to accommodate faster computational times. The scope for data annotation has been substantially expanded to enhance biological interpretations of queried variants. This includes the addition of pathway analysis for the identification of enriched biological pathways and molecular processes. We have further expanded the range of user directed annotation fields available for the study of cancer sequencing data. These new additions facilitate investigations into cancer driver variants and targetable molecular alterations within input datasets. New user directed filtering options have been coupled with the addition of interactive graphical and visualization tools. These improvements streamline the analysis of variants derived from large sequencing datasets for the identification of biologically and clinically significant subsets in the data. SNPnexus is the most comprehensible web-based application currently available and these new set of updates ensures that it remains a state-of-the-art tool for researchers. SNPnexus is freely available at https://www.snp-nexus.org.","hji,kes",0,0,0,2,0,software,NA +32510568,CCLA: an accurate method and web server for cancer cell line authentication using gene expression profiles.,"Cancer cell lines (CCLs) as important model systems play critical roles in cancer research. The misidentification and contamination of CCLs are serious problems, leading to unreliable results and waste of resources. Current methods for CCL authentication are mainly based on the CCL-specific genetic polymorphism, whereas no method is available for CCL authentication using gene expression profiles. Here, we developed a novel method and homonymic web server (CCLA, Cancer Cell Line Authentication, http://bioinfo.life.hust.edu.cn/web/CCLA/) to authenticate 1291 human CCLs of 28 tissues using gene expression profiles. CCLA showed an excellent speed advantage and high accuracy for CCL authentication, a top 1 accuracy of 96.58 or 92.15% (top 3 accuracy of 100 or 95.11%) for microarray or RNA-Seq validation data (719 samples, 461 CCLs), respectively. To the best of our knowledge, CCLA is the first approach to authenticate CCLs using gene expression data. Users can freely and conveniently authenticate CCLs using gene expression profiles or NCBI GEO accession on CCLA website.","hji,kes",0,0,0,2,0,software,NA +32548865,The UK Veterinary Immunological Toolbox Website: promoting vaccine research by facilitating communication and removing reagent barriers.,"Using the best animal models to study immune responses against specific pathogens or vaccines can dramatically accelerate our understanding. Veterinary species are well studied, particularly livestock, to reduce their disease burden. They have also proven to be powerful models, especially for zoonotic pathogens and novel vaccination strategies. A prerequisite for any model selection is having the right quality and range of species-specific immunological reagents. To help promote the widest possible use of veterinary species, an open access website (https://www.immunologicaltoolbox.co.uk) has been created as a central community annotated hub for veterinary immunological reagents. The website is also the portal into services offered by the UK Immunological Toolbox project that includes antibody generation, sequencing and recombinant expression. The funding for this effort is linked into sustainable sources, but ultimate success relies on community engagement to continually increase the quality and quantity of information. It is hoped that as more users and reagent owners engage, it will become an essential resource for researchers, veterinarians and clinicians alike by removing barriers that prevent the use of the most informative animal models.","hji,kes",1,1,2,2,1,NA,no notes; reassessed and re-scored - re-score based on broaded def of life sci - a db of reagents I guess +32569358,COVID-19 TestNorm: A tool to normalize COVID-19 testing names to LOINC codes.,"Large observational data networks that leverage routine clinical practice data in electronic health records (EHRs) are critical resources for research on coronavirus disease 2019 (COVID-19). Data normalization is a key challenge for the secondary use of EHRs for COVID-19 research across institutions. In this study, we addressed the challenge of automating the normalization of COVID-19 diagnostic tests, which are critical data elements, but for which controlled terminology terms were published after clinical implementation. We developed a simple but effective rule-based tool called COVID-19 TestNorm to automatically normalize local COVID-19 testing names to standard LOINC (Logical Observation Identifiers Names and Codes) codes. COVID-19 TestNorm was developed and evaluated using 568 test names collected from 8 healthcare systems. Our results show that it could achieve an accuracy of 97.4% on an independent test set. COVID-19 TestNorm is available as an open-source package for developers and as an online Web application for end users (https://clamp.uth.edu/covid/loinc.php). We believe that it will be a useful tool to support secondary use of EHRs for research on COVID-19.","hji,kes",0,0,0,2,0,NA,NA +32589734,BeadNet: deep learning-based bead detection and counting in low-resolution microscopy images.,"

Motivation

An automated counting of beads is required for many high-throughput experiments such as studying mimicked bacterial invasion processes. However, state-of-the-art algorithms under- or overestimate the number of beads in low-resolution images. In addition, expert knowledge is needed to adjust parameters.

Results

In combination with our image labeling tool, BeadNet enables biologists to easily annotate and process their data reducing the expertise required in many existing image analysis pipelines. BeadNet outperforms state-of-the-art-algorithms in terms of missing, added and total amount of beads.

Availability and implementation

BeadNet (software, code and dataset) is available at https://bitbucket.org/t_scherr/beadnet. The image labeling tool is available at https://bitbucket.org/abartschat/imagelabelingtool.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32592631,"A study on the structure, mechanism, and biochemistry of kanamycin B dioxygenase (KanJ)-an enzyme with a broad range of substrates.","Kanamycin A is an aminoglycoside antibiotic isolated from Streptomyces kanamyceticus and used against a wide spectrum of bacteria, including Mycobacterium tuberculosis. Biosynthesis of kanamycin involves an oxidative deamination step catalyzed by kanamycin B dioxygenase (KanJ), thereby the C2' position of kanamycin B is transformed into a keto group upon release of ammonia. Here, we present for the first time, structural models of KanJ with several ligands, which along with the results of ITC binding assays and HPLC activity tests explain substrate specificity of the enzyme. The large size of the binding pocket suggests that KanJ can accept a broad range of substrates, which was confirmed by activity tests. Specificity of the enzyme with respect to its substrate is determined by the hydrogen bond interactions between the methylamino group of the antibiotic and highly conserved Asp134 and Cys150 as well as between hydroxyl groups of the substrate and Asn120 and Gln80. Upon antibiotic binding, the C terminus loop is significantly rearranged and Gln80 and Asn120, which are directly involved in substrate recognition, change their conformations. Based on reaction energy profiles obtained by density functional theory (DFT) simulations, we propose a mechanism of ketone formation involving the reactive FeIV = O and proceeding either via OH rebound, which yields a hemiaminal intermediate or by abstraction of two hydrogen atoms, which leads to an imine species. At acidic pH, the latter involves a lower barrier than the OH rebound, whereas at basic pH, the barrier leading to an imine vanishes completely. DATABASES: Structural data are available in PDB database under the accession numbers: 6S0R, 6S0T, 6S0U, 6S0W, 6S0V, 6S0S. Diffraction images are available at the Integrated Resource for Reproducibility in Macromolecular Crystallography at http://proteindiffraction.org under DOIs: 10.18430/m36s0t, 10.18430/m36s0u, 10.18430/m36s0r, 10.18430/m36s0s, 10.18430/m36s0v, 10.18430/m36s0w. A data set collection of computational results is available in the Mendeley Data database under DOI: 10.17632/sbyzssjmp3.1 and in the ioChem-BD database under DOI: 10.19061/iochem-bd-4-18.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +32614400,iPromoter-BnCNN: a novel branched CNN-based predictor for identifying and classifying sigma promoters.,"

Motivation

Promoter is a short region of DNA which is responsible for initiating transcription of specific genes. Development of computational tools for automatic identification of promoters is in high demand. According to the difference of functions, promoters can be of different types. Promoters may have both intra- and interclass variation and similarity in terms of consensus sequences. Accurate classification of various types of sigma promoters still remains a challenge.

Results

We present iPromoter-BnCNN for identification and accurate classification of six types of promoters-σ24,σ28,σ32,σ38,σ54,σ70. It is a CNN-based classifier which combines local features related to monomer nucleotide sequence, trimer nucleotide sequence, dimer structural properties and trimer structural properties through the use of parallel branching. We conducted experiments on a benchmark dataset and compared with six state-of-the-art tools to show our supremacy on 5-fold cross-validation. Moreover, we tested our classifier on an independent test dataset.

Availability and implementation

Our proposed tool iPromoter-BnCNN web server is freely available at http://103.109.52.8/iPromoter-BnCNN. The runnable source code can be found https://colab.research.google.com/drive/1yWWh7BXhsm8U4PODgPqlQRy23QGjF2DZ.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32615035,mycoCSM: Using Graph-Based Signatures to Identify Safe Potent Hits against Mycobacteria.,"Development of new potent, safe drugs to treat Mycobacteria has proven to be challenging, with limited hit rates of initial screens restricting subsequent development efforts. Despite significant efforts and the evolution of quantitative structure-activity relationship as well as machine learning-based models for computationally predicting molecule bioactivity, there is an unmet need for efficient and reliable methods for identifying biologically active compounds against Mycobacterium that are also safe for humans. Here we developed mycoCSM, a graph-based signature approach to rapidly identify compounds likely to be active against bacteria from the genus Mycobacterium, or against specific Mycobacteria species. mycoCSM was trained and validated on eight organism-specific and for the first time a general Mycobacteria data set, achieving correlation coefficients of up to 0.89 on cross-validation and 0.88 on independent blind tests, when predicting bioactivity in terms of minimum inhibitory concentration. In addition, we also developed a predictor to identify those compounds likely to penetrate in necrotic tuberculosis foci, which achieved a correlation coefficient of 0.75. Together with a built-in estimator of the maximum tolerated dose in humans, we believe this method will provide a valuable resource to enrich screening libraries with potent, safe molecules. To provide simple guidance in the selection of libraries with favorable anti-Mycobacteria properties, we made mycoCSM freely available online at http://biosig.unimelb.edu.au/myco_csm.","hji,kes",0,0,0,2,0,NA,NA +32626907,Comments on a recent case-control study of malignant mesothelioma of the pericardium and the tunica vaginalis testis.,"As the first case-control study of malignant mesothelioma of the pericardium and the tunica vaginalis testis (mTVT), the paper by Marinaccio et al (1) is potentially an important epidemiologic contribution. A careful review of the paper, however, raises a number of methodological issues. Any case-control study can be viewed as being nested within a conceptual cohort, with controls being sampled from the at-risk cohort as cases arise over time. This view of case-control studies leads to the concept of incidence-density sampling of controls (eg, 2, 3). For Marinaccio et al (1) this would mean that, as cases were registered over the study period, each would be matched to an individual control or set of controls of the same gender, age, and region of the country (since asbestos exposure varies by time and region [4]). For example, if a case were 50 years old in 1995, then any matched control should be close to age 50 in 1995 and of the same gender and from the same region as the case. Matching for age in this fashion automatically results in matching for year of birth, which is essential in this context because birth-cohort effects are determinants of asbestos exposure and mesothelioma incidence (eg, 5-8). If Marinaccio et al (1) used this scheme for age-matching, one would expect to see similar distributions of cases (table 1) and controls (table S3 in the supplemental material) by period of birth. Among males, however, the distributions of mesothelioma cases (whether pericardial or mTVT) and controls by period of birth are clearly different (P<0.001). Among females, the distributions of cases of pericardial mesothelioma and controls by birth year are less dissimilar (P≈0.05). Thus, the female cases of pericardial mesothelioma are better matched to controls on year of birth than are male cases of either mTVT or pericardial mesothelioma. We note also that the distributions of male and female controls by year of birth are distinctly different (P<0.002), whereas the birth-year distributions of cases of mesothelioma by site and gender are not (P≈0.8). In the Marinaccio et al (1) sensitivity analysis restricted to subjects born before 1950, the distributions of cases and controls by period of birth remain significantly different. Therefore, based on the reported evidence, cases and controls were not matched on birth cohort, thereby possibly biasing the results. Similarly, bias may result from the lack of matching on geographic region; while cases were registered from across Italy, controls were selected from only six regions. Although a sensitivity analysis restricted cases and controls to those from only the six regions, a comparison of tables S1 and S3 indicates that the regional distribution of controls is different from that of person-time observed; that is, the controls do not appear to be representative of the underlying population at risk by region. The second major issue of concern has to do with ascertainment of asbestos exposure. Information on exposure for the cases was presumably obtained at the time of registration. The two sets of controls, obtained from previously unpublished case-control studies, were interviewed during 2014-2015 and 2014-2016; that is, many years after the exposure for most cases was ascertained (1993-2015). Few other details of the control groups are provided, except that participation by one set of controls was <50%, raising additional concerns about selection bias. For details on the second set of controls, Marinaccio et al (1) reference a paper by Brandi et al (9). On review of that paper, however, we found no description of the control group, only references to three earlier papers. Marinaccio et al (1) present analyses only with both sets of controls combined; to evaluate potential sources of bias from the use of different sets of controls, they should also report results using each set of controls separately. The authors also did not detail their methods of exposure classification. For example, what does probable or possible exposure mean? The authors should at least present separate analyses of definite occupational exposure. Eighty cases of mTVT were registered, but only 68 were included in the analyses. Information on the 12 omitted cases (eg, age, year of birth, and region) would be helpful. Marinaccio et al (1) did not provide clear information on what occupations and/or industries they considered as exposed to asbestos. In an earlier study, Marinaccio et al (10) remarked on the absence of pericardial mesothelioma and mTVT in industries with the highest exposures to asbestos, saying, ""[t]he absence of exposures in the shipbuilding, railway and asbestos-cement industries … for all the 67 pericardial and testicular cases is noteworthy but not easy to interpret."" By contrast, Marinaccio et al (1) stated, ""[t]he economic sectors more frequently associated with asbestos exposure were construction, steel mills, metal-working industry, textile industry and agriculture."" The possibility of exposure in the ""agriculture economic sector"" was not mentioned in Marinaccio et al (10) and appears not to have been considered in previous epidemiologic studies in Italy. In general, epidemiologic studies indicate that farmers and agricultural workers are not at increased risk of developing mesothelioma (eg, 11-17). The fact that few, if any, cases of mTVT and pericardial mesothelioma occurred in industries traditionally associated with high asbestos exposure raises the possibility that the results of Marinaccio et al (1) are attributable to deficiencies in study design, very possibly bias in the selection of controls, and deficiencies in exposure assessment and classification as described above, leading to a spurious association of occupational exposure with mTVT and male pericardial mesothelioma. Conflict of interest This research has received no outside funding. All authors are employees of Exponent, Inc., an international scientific and engineering consulting company. All authors have worked as both consulting and testifying experts in litigation matters related to asbestos exposure and asbestos-related disease. References 1. Marinaccio A, Consonni D, Mensi C, Mirabelli D, Migliore E, Magnani C et al.; ReNaM Working Group. Association between asbestos exposure and pericardial and tunica vaginalis testis malignant mesothelioma: a case-control study and epidemiological remarks. Scand J Work Environ Health. 2020;46(6):609-617. https://doi.org/10.5271/sjweh.3895. 2. Rothman KJ, Greenland S, Lash TL. Modern Epidemiology. 2008; Philadelphia: Wolters Kluwer/Lippincott Williams & Wilkins. 3. Richardson DB. An incidence density sampling program for nested case-control analyses. Occup Environ Med 2004 Dec;61(12):e59. https://doi.org/10.1136/oem.2004.014472. 4. Marinaccio A, Binazzi A, Marzio DD, Scarselli A, Verardo M, Mirabelli D et al.; ReNaM Working Group. Pleural malignant mesothelioma epidemic: incidence, modalities of asbestos exposure and occupations involved from the Italian National Register. Int J Cancer 2012 May;130(9):2146-54. https://doi.org/10.1002/ijc.26229. 5. La Vecchia C, Decarli A, Peto J, Levi F, Tomei F, Negri E. An age, period and cohort analysis of pleural cancer mortality in Europe. Eur J Cancer Prev 2000 Jun;9(3):179-84. https://doi.org/10.1097/00008469-200006000-00005. 6. Price B, Ware A. Mesothelioma trends in the United States: an update based on Surveillance, Epidemiology, and End Results Program data for 1973 through 2003. Am J Epidemiol 2004 Jan;159(2):107-12. https://doi.org/10.1093/aje/kwh025. 7. Moolgavkar SH, Meza R, Turim J. Pleural and peritoneal mesotheliomas in SEER: age effects and temporal trends, 1973-2005. Cancer Causes Control 2009 Aug;20(6):935-44. https://doi.org/10.1007/s10552-009-9328-9. 8. Moolgavkar SH, Chang ET, Mezei G, Mowat FS. Chapter 3. Epidemiology of mesothelioma. In Testa JR. Asbestos and mesothelioma; 2017. pp. 43-72. Cham, Switzerland: Springer International Publishing. 9. Brandi G, Di Girolamo S, Farioli A, de Rosa F, Curti S, Pinna AD et al. Asbestos: a hidden player behind the cholangiocarcinoma increase? Findings from a case-control analysis. Cancer Causes Control 2013 May;24(5):911-8. https://doi.org/10.1007/s10552-013-0167-3. 10. Marinaccio A, Binazzi A, Di Marzio D, Scarselli A, Verardo M, Mirabelli D et al. Incidence of extrapleural malignant mesothelioma and asbestos exposure, from the Italian national register. Occup Environ Med 2010 Nov;67(11):760-5. https://doi.org/10.1136/oem.2009.051466. 11. Teschke K, Morgan MS, Checkoway H, Franklin G, Spinelli JJ, van Belle G et al. Mesothelioma surveillance to locate sources of exposure to asbestos. Can J Public Health 1997 May-Jun;88(3):163-8. https://doi.org/10.1007/BF03403881. 12. Bouchardy C, Schüler G, Minder C, Hotz P, Bousquet A, Levi F et al. Cancer risk by occupation and socioeconomic group among men--a study by the Association of Swiss Cancer Registries. Scand J Work Environ Health 2002;28(1 Suppl 1):1-88. 13. Hemminki K, Li X. Time trends and occupational risk factors for pleural mesothelioma in Sweden. J Occup Environ Med 2003a Apr;45(4):456-61. https://doi.org/10.1097/01.jom.0000058341.05741.7e. 14. Hemminki K, Li X. Time trends and occupational risk factors for peritoneal mesothelioma in Sweden. J Occup Environ Med 2003b Apr;45(4):451-5. https://doi.org/10.1097/01.jom.0000052960.59271.d4. 15. Pukkala E, Martinsen JI, Lynge E, Gunnarsdottir HK, Sparén P, Tryggvadottir L et al. Occupation and cancer - follow-up of 15 million people in five Nordic countries. Acta Oncol 2009;48(5):646-790. https://doi.org/10.1080/02841860902913546. 16. Rolland P, Gramond C, Berron H, Ducamp S, Imbernon E, Goldberg M et al. Mesotheliome pleural: Professions et secteurs d'activite a risque chez les hommes [Pleural mesothelioma: Professions and occupational areas at risk among humans]. 2005; Institut de Veille Sanitaire, Departement Sante Travai, Saint-Maurice, France. 17. Rolland P, Gramond C, Lacourt A, Astoul P, Chamming's S, Ducamp S et al. PNSM Study Group. Occupations and industries in France at high risk for pleural mesothelioma: A population-based case-control study (1998-2002). Am J Ind Med 2010 Dec;53(12):1207-19. https://doi.org/10.1002/ajim.20895.","hji,kes",0,0,0,2,0,NA,comment +32632821,Efficient System Wide Metabolic Pathway Comparisons in Multiple Microbes Using Genome to KEGG Orthology (G2KO) Pipeline Tool.,"Comparison of system-wide metabolic pathways among microbes provides valuable insights of organisms' metabolic capabilities that can further assist in rationally screening organisms in silico for various applications. In this work, we present a much needed, efficient and user-friendly Genome to KEGG Orthology (G2KO) pipeline tool that facilitates efficient comparison of system wide metabolic networks of multiple organisms simultaneously. The optimized strategy primarily involves automatic retrieval of the KEGG Orthology (KO) identifiers of user defined organisms from the KEGG database followed by overlaying and visualization of the metabolic genes using the KEGG Mapper reconstruct pathway tool. We demonstrate the applicability of G2KO via two case studies in which we processed 24,314 genes across 15 organisms, mapped on to 530 reference pathways in KEGG, while focusing on pathways of interest. First, an in-silico designing of synthetic microbial consortia towards bioprocessing of cellulose to valuable products by comparing the cellulose degradation and fermentative pathways of microbes was undertaken. Second, we comprehensively compared the amino acid biosynthetic pathways of multiple microbes and demonstrated the potential of G2KO as an efficient tool for metabolic studies. We envisage the tool will find immensely useful to the metabolic engineers as well as systems biologists. The tool's web-server, along with tutorial is publicly available at https://faculty.iitmandi.ac.in/~shyam/tools/g2ko/g2ko.cgi . Also, standalone tool can be downloaded freely from https://sourceforge.net/projects/g2ko/ , and from the supplementary.","hji,kes",0,0,0,2,0,NA,NA +32657416,MutCombinator: identification of mutated peptides allowing combinatorial mutations using nucleotide-based graph search.,"

Motivation

Proteogenomics has proven its utility by integrating genomics and proteomics. Typical approaches use data from next-generation sequencing to infer proteins expressed. A sample-specific protein sequence database is often adopted to identify novel peptides from matched mass spectrometry-based proteomics; nevertheless, there is no software that can practically identify all possible forms of mutated peptides suggested by various genomic information sources.

Results

We propose MutCombinator, which enables us to practically identify mutated peptides from tandem mass spectra allowing combinatorial mutations during the database search. It uses an upgraded version of a variant graph, keeping track of frame information. The variant graph is indexed by nine nucleotides for fast access. Using MutCombinator, we could identify more mutated peptides than previous methods, because combinations of point mutations are considered and also because it can be practically applied together with a large mutation database such as COSMIC. Furthermore, MutCombinator supports in-frame search for coding regions and three-frame search for non-coding regions.

Availability and implementation

https://prix.hanyang.ac.kr/download/mutcombinator.jsp.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32657595,The First Inherited Retinal Disease Registry in Iran: Research Protocol and Results of a Pilot Study.,"

Background

To describe the protocol for developing a national inherited retinal disease (IRD) registry in Iran and present its initial report.

Methods

This community-based participatory research was approved by the Ministry of Health and Medical Education of Iran in 2016. To provide the minimum data set (MDS), several focus group meetings were held. The final MDS was handed over to an engineering team to develop a web-based software. In the pilot phase, the software was set up in two referral centers in Iran. Final IRD diagnosis was made based on clinical manifestations and genetic findings. Ultimately, patient registration was done based on all clinical and non-clinical manifestations.

Results

Initially, a total of 151 data elements were approved with Delphi technique. The registry software went live at www. IRDReg.org based on DHIS2 open source license agreement since February 2016. So far, a total of 1001 patients have been registered with a mean age of 32.41±15.60 years (range, 3 months to 74 years). The majority of the registered patients had retinitis pigmentosa (42%, 95% CI: 38.9% to 45%). Genetic testing was done for approximately 20% of the registered individuals.

Conclusion

Our study shows successful web-based software design and data collection as a proof of concept for the first IRD registry in Iran. Multicenter integration of the IRD registry in medical centers throughout the country is well underway as planned. These data will assist researchers to rapidly access information about the distribution and genetic patterns of this disease.","hji,kes",0,0,0,2,0,clinical data,registry; reassessed and still no - clinical registry +32689928,SpectralTAD: an R package for defining a hierarchy of topologically associated domains using spectral clustering.,"

Background

The three-dimensional (3D) structure of the genome plays a crucial role in gene expression regulation. Chromatin conformation capture technologies (Hi-C) have revealed that the genome is organized in a hierarchy of topologically associated domains (TADs), sub-TADs, and chromatin loops. Identifying such hierarchical structures is a critical step in understanding genome regulation. Existing tools for TAD calling are frequently sensitive to biases in Hi-C data, depend on tunable parameters, and are computationally inefficient.

Methods

To address these challenges, we developed a novel sliding window-based spectral clustering framework that uses gaps between consecutive eigenvectors for TAD boundary identification.

Results

Our method, implemented in an R package, SpectralTAD, detects hierarchical, biologically relevant TADs, has automatic parameter selection, is robust to sequencing depth, resolution, and sparsity of Hi-C data. SpectralTAD outperforms four state-of-the-art TAD callers in simulated and experimental settings. We demonstrate that TAD boundaries shared among multiple levels of the TAD hierarchy were more enriched in classical boundary marks and more conserved across cell lines and tissues. In contrast, boundaries of TADs that cannot be split into sub-TADs showed less enrichment and conservation, suggesting their more dynamic role in genome regulation.

Conclusion

SpectralTAD is available on Bioconductor, http://bioconductor.org/packages/SpectralTAD/ .","hji,kes",0,0,0,2,0,software,NA +32692801,"COVID-19 Docking Server: a meta server for docking small molecules, peptides and antibodies against potential targets of COVID-19.","

Motivation

The coronavirus disease 2019 (COVID-19) caused by a new type of coronavirus has been emerging from China and led to thousands of death globally since December 2019. Despite many groups have engaged in studying the newly emerged virus and searching for the treatment of COVID-19, the understanding of the COVID-19 target-ligand interactions represents a key challenge. Herein, we introduce COVID-19 Docking Server, a web server that predicts the binding modes between COVID-19 targets and the ligands including small molecules, peptides and antibodies.

Results

Structures of proteins involved in the virus life cycle were collected or constructed based on the homologs of coronavirus, and prepared ready for docking. The meta-platform provides a free and interactive tool for the prediction of COVID-19 target-ligand interactions and following drug discovery for COVID-19.

Availability and implementation

http://ncov.schanglab.org.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32696040,Metasubtract: an R-package to analytically produce leave-one-out meta-analysis GWAS summary statistics.,"

Summary

statistics from a meta-analysis of genome-wide association studies (meta-GWAS) can be used for many follow-up analyses. One valuable application is the creation of polygenic scores. However, if polygenic scores are calculated in a validation cohort that was part of the meta-GWAS consortium, this cohort is not independent and analyses will therefore yield inflated results. The R package 'MetaSubtract' was developed to subtract the results of the validation cohort from meta-GWAS summary statistics analytically. The statistical formulas for a meta-analysis were inverted to compute corrected summary statistics of a meta-GWAS leaving one (or more) cohort(s) out. These formulas have been implemented in MetaSubtract for different meta-analyses methods (fixed effects inverse variance or square root sample size weighted z-score) accounting for no, single or double genomic control correction. Results obtained by MetaSubtract correlate very well to those calculated using the traditional way, i.e. by performing a meta-analysis leaving out the validation cohort. In conclusion, MetaSubtract allows researchers to compute meta-GWAS summary statistics that are independent of the GWAS results of the validation cohort without requiring access to the cohort level GWAS results of the corresponding meta-GWAS consortium.

Availability and implementation

https://cran.r-project.org/web/packages/MetaSubtract.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32699131,Febrile Illness Evaluation in a Broad Range of Endemicities (FIEBRE): protocol for a multisite prospective observational study of the causes of fever in Africa and Asia.,"

Introduction

Fever commonly leads to healthcare seeking and hospital admission in sub-Saharan Africa and Asia. There is only limited guidance for clinicians managing non-malarial fevers, which often results in inappropriate treatment for patients. Furthermore, there is little evidence for estimates of disease burden, or to guide empirical therapy, control measures, resource allocation, prioritisation of clinical diagnostics or antimicrobial stewardship. The Febrile Illness Evaluation in a Broad Range of Endemicities (FIEBRE) study seeks to address these information gaps.

Methods and analysis

FIEBRE investigates febrile illness in paediatric and adult outpatients and inpatients using standardised clinical, laboratory and social science protocols over a minimum 12-month period at five sites in sub-Saharan Africa and Southeastern and Southern Asia. Patients presenting with fever are enrolled and provide clinical data, pharyngeal swabs and a venous blood sample; selected participants also provide a urine sample. Laboratory assessments target infections that are treatable and/or preventable. Selected point-of-care tests, as well as blood and urine cultures and antimicrobial susceptibility testing, are performed on site. On day 28, patients provide a second venous blood sample for serology and information on clinical outcome. Further diagnostic assays are performed at international reference laboratories. Blood and pharyngeal samples from matched community controls enable calculation of AFs, and surveys of treatment seeking allow estimation of the incidence of common infections. Additional assays detect markers that may differentiate bacterial from non-bacterial causes of illness and/or prognosticate illness severity. Social science research on antimicrobial use will inform future recommendations for fever case management. Residual samples from participants are stored for future use.

Ethics and dissemination

Ethics approval was obtained from all relevant institutional and national committees; written informed consent is obtained from all participants or parents/guardians. Final results will be shared with participating communities, and in open-access journals and other scientific fora. Study documents are available online (https://doi.org/10.17037/PUBS.04652739).","hji,kes",0,0,0,2,0,NA,NA +32702108,treeheatr: an R package for interpretable decision tree visualizations.,"

Summary

treeheatr is an R package for creating interpretable decision tree visualizations with the data represented as a heatmap at the tree's leaf nodes. The integrated presentation of the tree structure along with an overview of the data efficiently illustrates how the tree nodes split up the feature space and how well the tree model performs. This visualization can also be examined in depth to uncover the correlation structure in the data and importance of each feature in predicting the outcome. Implemented in an easily installed package with a detailed vignette, treeheatr can be a useful teaching tool to enhance students' understanding of a simple decision tree model before diving into more complex tree-based machine learning methods.

Availability and implementation

The treeheatr package is freely available under the permissive MIT license at https://trang1618.github.io/treeheatr and https://cran.r-project.org/package=treeheatr. It comes with a detailed vignette that is automatically built with GitHub Actions continuous integration.","hji,kes",0,0,0,2,0,software,NA +32717044,Rtpca: an R package for differential thermal proximity coaggregation analysis.,

Summary

Rtpca is an R package implementing methods for inferring protein-protein interactions (PPIs) based on thermal proteome profiling experiments of a single condition or in a differential setting via an approach called thermal proximity coaggregation. It offers user-friendly tools to explore datasets for their PPI predictive performance and easily integrates with available R packages.

Availability and implementation

Rtpca is available from Bioconductor (https://bioconductor.org/packages/Rtpca).

Supplementary information

Supplementary data are available at Bioinformatics online.,"hji,kes",0,0,0,2,0,software,NA +32734663,The Bio3D packages for structural bioinformatics.,"Bio3D is a family of R packages for the analysis of biomolecular sequence, structure, and dynamics. Major functionality includes biomolecular database searching and retrieval, sequence and structure conservation analysis, ensemble normal mode analysis, protein structure and correlation network analysis, principal component, and related multivariate analysis methods. Here, we review recent package developments, including a new underlying segregation into separate packages for distinct analysis, and introduce a new method for structure analysis named ensemble difference distance matrix analysis (eDDM). The eDDM approach calculates and compares atomic distance matrices across large sets of homologous atomic structures to help identify the residue wise determinants underlying specific functional processes. An eDDM workflow is detailed along with an example application to a large protein family. As a new member of the Bio3D family, the Bio3D-eddm package supports both experimental and theoretical simulation-generated structures, is integrated with other methods for dissecting sequence-structure-function relationships, and can be used in a highly automated and reproducible manner. Bio3D is distributed as an integrated set of platform independent open source R packages available from: http://thegrantlab.org/bio3d/.","hji,kes",0,0,0,2,0,software,NA +32742675,Network-based protein structural classification.,"Experimental determination of protein function is resource-consuming. As an alternative, computational prediction of protein function has received attention. In this context, protein structural classification (PSC) can help, by allowing for determining structural classes of currently unclassified proteins based on their features, and then relying on the fact that proteins with similar structures have similar functions. Existing PSC approaches rely on sequence-based or direct three-dimensional (3D) structure-based protein features. By contrast, we first model 3D structures of proteins as protein structure networks (PSNs). Then, we use network-based features for PSC. We propose the use of graphlets, state-of-the-art features in many research areas of network science, in the task of PSC. Moreover, because graphlets can deal only with unweighted PSNs, and because accounting for edge weights when constructing PSNs could improve PSC accuracy, we also propose a deep learning framework that automatically learns network features from weighted PSNs. When evaluated on a large set of approximately 9400 CATH and approximately 12 800 SCOP protein domains (spanning 36 PSN sets), the best of our proposed approaches are superior to existing PSC approaches in terms of accuracy, with comparable running times. Our data and code are available at https://doi.org/10.5281/zenodo.3787922.","hji,kes",0,0,0,2,0,NA,NA +32749460,Development of an online tool for linking behavior change techniques and mechanisms of action based on triangulation of findings from literature synthesis and expert consensus.,"Researchers, practitioners, and policymakers develop interventions to change behavior based on their understanding of how behavior change techniques (BCTs) impact the determinants of behavior. A transparent, systematic, and accessible method of linking BCTs with the processes through which they change behavior (i.e., their mechanisms of action [MoAs]) would advance the understanding of intervention effects and improve theory and intervention development. The purpose of this study is to triangulate evidence for hypothesized BCT-MoA links obtained in two previous studies and present the results in an interactive, online tool. Two previous studies generated evidence on links between 56 BCTs and 26 MoAs based on their frequency in literature synthesis and on expert consensus. Concordance between the findings of the two studies was examined using multilevel modeling. Uncertainties and differences between the two studies were reconciled by 16 behavior change experts using consensus development methods. The resulting evidence was used to generate an online tool. The two studies showed concordance for 25 of the 26 MoAs and agreement for 37 links and for 460 ""nonlinks."" A further 55 links were resolved by consensus (total of 92 [37 + 55] hypothesized BCT-MoA links). Full data on 1,456 possible links was incorporated into the online interactive Theory and Technique Tool (https://theoryandtechniquetool.humanbehaviourchange.org/). This triangulation of two distinct sources of evidence provides guidance on how BCTs may affect the mechanisms that change behavior and is available as a resource for behavior change intervention designers, researchers and theorists, supporting intervention design, research synthesis, and collaborative research.","hji,kes",0,0,0,2,0,NA,not life sci - questable if underlying data is available +32753773,hubViz: A Novel Tool for Hub-centric Visualization.,"Visualization algorithms have been widely used for intuitive interrogation of genomic data and popularly used tools include MDS, t-SNE, and UMAP. However, these algorithms are not tuned for the visualization of binary data and none of them consider the hubness of observations for the visualization. In order to address these limitations, here we propose hubViz, a novel tool for hub-centric visualization of binary data. We evaluated the performance of hubViz with its application to the gene expression data measured in multiple brain regions of rats exposed to cocaine, the single-cell RNA-seq data of peripheral blood mononuclear cells treated with interferon beta, and the literature mining data to investigate relationships among diseases. We further evaluated the performance of hubViz using simulation studies. We showed that hubViz provides effective visual inspection by locating the hub in the center and the contrasting elements in the opposite sides around the center. We believe that hubViz and its software can be powerful tools that can improve visualizations of various genomic data. The hubViz is implemented as an R package hubviz, which is publicly available at https://dongjunchung.github.io/hubviz/.","hji,kes",0,0,0,2,0,software,NA +32773643,Genomic variance of Open Reading Frames (ORFs) and Spike protein in severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).,"

Background

The outbreak of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has caused severe pneumonia at December 2019. Since then, it has been wildly spread from Wuhan, China, to Asia, European, and United States to become the pandemic worldwide. Now coronavirus disease 2019 were globally diagnosed over 3 084 740 cases with mortality of 212 561 toll. Current reports variants are found in SARS-CoV-2, majoring in functional ribonucleic acid (RNA) to transcribe into structural proteins as transmembrane spike (S) glycoprotein and the nucleocapsid (N) protein holds the virus RNA genome; the envelope (E) and membrane (M) alone with spike protein form viral envelope. The nonstructural RNA genome includes ORF1ab, ORF3, ORF6, 7a, 8, and ORF10 with highly conserved information for genome synthesis and replication in ORF1ab.

Methods

We apply genomic alignment analysis to observe SARS-CoV-2 sequences from GenBank (http://www.ncbi.nim.nih.gov/genebank/): MN 908947 (China, C1); MN985325 (United States: WA, UW); MN996527 (China, C2); MT007544 (Australia: Victoria, A1); MT027064 (United States: CA, UC); MT039890 (South Korea, K1); MT066175 (Taiwan, T1); MT066176 (Taiwan, T2); LC528232 (Japan, J1); and LC528233 (Japan, J2) and Global Initiative on Sharing All Influenza Data database (https://www.gisaid.org). We adopt Multiple Sequence Alignments web from Clustalw (https://www.genome.jp/tools-bin/clustalw) and Geneious web (https://www.geneious.com.

Results

We analyze database by genome alignment search for nonstructural ORFs and structural E, M, N, and S proteins. Mutations in ORF1ab, ORF3, and ORF6 are observed; specific variants in spike region are detected.

Conclusion

We perform genomic analysis and comparative multiple sequence of SARS-CoV-2. Large scaling sequence alignments trace to localize and catch different mutant strains in United possibly to transmit severe deadly threat to humans. Studies about the biological symptom of SARS-CoV-2 in clinic animal and humans will be applied and manipulated to find mechanisms and shield the light for understanding the origin of pandemic crisis.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +32777814,BSAseq: an interactive and integrated web-based workflow for identification of causal mutations in bulked F2 populations.,"

Summary

With the advance of next-generation sequencing technologies and reductions in the costs of these techniques, bulked segregant analysis (BSA) has become not only a powerful tool for mapping quantitative trait loci but also a useful way to identify causal gene mutations underlying phenotypes of interest. However, due to the presence of background mutations and errors in sequencing, genotyping, and reference assembly, it is often difficult to distinguish true causal mutations from background mutations. In this study, we developed the BSAseq workflow, which includes an automated bioinformatics analysis pipeline with a probabilistic model for estimating the linked region (the region linked to the causal mutation) and an interactive Shiny web application for visualizing the results. We deeply sequenced a sorghum male-sterile parental line (ms8) to capture the majority of background mutations in our bulked F2 data. We applied the workflow to 11 bulked sorghum F2 populations and 1 rice F2 population and identified the true causal mutation in each population. The workflow is intuitive and straightforward, facilitating its adoption by users without bioinformatics analysis skills. We anticipate that the BSAseq workflow will be broadly applicable to the identification of causal mutations for many phenotypes of interest.

Availability and implementation

BSAseq is freely available on https://www.sciapps.org/page/bsa.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +32778890,SurvivalMeth: a web server to investigate the effect of DNA methylation-related functional elements on prognosis.,"Aberrant DNA methylation is a fundamental characterization of epigenetics for carcinogenesis. Abnormality of DNA methylation-related functional elements (DMFEs) may lead to dysfunction of regulatory genes in the progression of cancers, contributing to prognosis of many cancers. There is an urgent need to construct a tool to comprehensively assess the impact of DMFEs on prognosis. Therefore, we developed SurvivalMeth (http://bio-bigdata.hrbmu.edu.cn/survivalmeth) to explore the prognosis-related DMFEs, which documented many kinds of DMFEs, including 309,465 CpG island-related elements, 104,748 transcript-related elements, 77,634 repeat elements, as well as cell-type specific 1,689,653 super enhancers (SE) and 1,304,902 CTCF binding regions for analysis. SurvivalMeth is a convenient tool which collected DNA methylation profiles of 36 cancers and allowed users to query their genes of interest in different datasets for prognosis. Furthermore, SurvivalMeth not only integrated different combinations, including single DMFE, multiple DMFEs, SEs and clinical data, to perform survival analysis on preupload data but also allowed for uploading customized DNA methylation profile of DMFEs from various diseases to analyze. SurvivalMeth provided a comprehensive resource and automated analysis for prognostic DMFEs, including DMFE methylation level, correlation analysis, clinical analysis, differential analysis, DMFE annotation, survival-related detailed result and visualization of survival analysis. In summary, we believe that SurvivalMeth will facilitate prognostic research of DMFEs in diverse cancers.","hji,kes",0,0,0,2,0,software,NA +32778891,MolAICal: a soft tool for 3D drug design of protein targets by artificial intelligence and classical algorithm.,"Deep learning is an important branch of artificial intelligence that has been successfully applied into medicine and two-dimensional ligand design. The three-dimensional (3D) ligand generation in the 3D pocket of protein target is an interesting and challenging issue for drug design by deep learning. Here, the MolAICal software is introduced to supply a way for generating 3D drugs in the 3D pocket of protein targets by combining with merits of deep learning model and classical algorithm. The MolAICal software mainly contains two modules for 3D drug design. In the first module of MolAICal, it employs the genetic algorithm, deep learning model trained by FDA-approved drug fragments and Vinardo score fitting on the basis of PDBbind database for drug design. In the second module, it uses deep learning generative model trained by drug-like molecules of ZINC database and molecular docking invoked by Autodock Vina automatically. Besides, the Lipinski's rule of five, Pan-assay interference compounds (PAINS), synthetic accessibility (SA) and other user-defined rules are introduced for filtering out unwanted ligands in MolAICal. To show the drug design modules of MolAICal, the membrane protein glucagon receptor and non-membrane protein SARS-CoV-2 main protease are chosen as the investigative drug targets. The results show MolAICal can generate the various and novel ligands with good binding scores and appropriate XLOGP values. We believe that MolAICal can use the advantages of deep learning model and classical programming for designing 3D drugs in protein pocket. MolAICal is freely for any nonprofit purpose and accessible at https://molaical.github.io.","hji,kes",0,0,0,2,0,software,NA +32805048,mixtureS: a novel tool for bacterial strain genome reconstruction from reads.,"

Motivation

It is essential to study bacterial strains in environmental samples. Existing methods and tools often depend on known strains or known variations, cannot work on individual samples, not reliable, or not easy to use, etc. It is thus important to develop more user-friendly tools that can identify bacterial strains more accurately.

Results

We developed a new tool called mixtureS that can de novo identify bacterial strains from shotgun reads of a clonal or metagenomic sample, without prior knowledge about the strains and their variations. Tested on 243 simulated datasets and 195 experimental datasets, mixtureS reliably identified the strains, their numbers and their abundance. Compared with three tools, mixtureS showed better performance in almost all simulated datasets and the vast majority of experimental datasets.

Availability and implementation

The source code and tool mixtureS is available at http://www.cs.ucf.edu/Àúxiaoman/mixtureS/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32810207,MELODI Presto: a fast and agile tool to explore semantic triples derived from biomedical literature.,"

Summary

The field of literature-based discovery is growing in step with the volume of literature being produced. From modern natural language processing algorithms to high quality entity tagging, the methods and their impact are developing rapidly. One annotation object that arises from these approaches, the subject-predicate-object triple, is proving to be very useful in representing knowledge. We have implemented efficient search methods and an application programming interface, to create fast and convenient functions to utilize triples extracted from the biomedical literature by SemMedDB. By refining these data, we have identified a set of triples that focus on the mechanistic aspects of the literature, and provide simple methods to explore both enriched triples from single queries, and overlapping triples across two query lists.

Availability and implementation

https://melodi-presto.mrcieu.ac.uk/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32839519,Serum albumin as a predictor of neurological recovery after spinal cord injury: a replication study.,"

Study design

This was a secondary analysis on an observational cohort study.

Objective

To determine if serum albumin significantly associates with long-term neurological outcome (i.e., 1-year post-injury) in a contemporary cohort of individuals with spinal cord injury.

Setting

Six rehabilitation centers across the United States.

Methods

A secondary analysis of neurological outcomes and serum albumin concentrations was performed on data from the Spinal Cord Injury Rehabilitation study. Data was accessed from the Archive of Data on Disability to Enable Policy and research (ADDEP). The primary analysis applied unbiased recursive partitioning to examine the relationship between serum albumin, injury severity, and long-term outcomes. The analysis is accessible via https://rpubs.com/AnhKhoaVo/586028 .

Results

Serum albumin concentration was significantly associated with lower extremity motor scores (LEMS) and American Spinal Injury Association Impairment Scale (AIS) grade at admission to rehabilitation. Serum albumin concentrations alone were also significantly associated with change of LEMS and marked recovery (improvement of at least 2 AIS grades and/or recovery to walking) at 1-year post injury. However, after adjusting for admission to rehabilitation LEMS and AIS grade, serum albumin was not significant.

Conclusion

The current study partially confirms our previous observations that serum albumin concentrations are associated with neurological outcome after spinal cord injury. As a crude prognostic biomarker, serum albumin concentration could be useful in cases where injury severity cannot be accurately assessed.","hji,kes",0,0,0,2,0,NA,NA +32840559,SOLQC: Synthetic Oligo Library Quality Control tool.,"

Motivation

Recent years have seen a growing number and an expanding scope of studies using synthetic oligo libraries for a range of applications in synthetic biology. As experiments are growing by numbers and complexity, analysis tools can facilitate quality control and support better assessment and inference.

Results

We present a novel analysis tool, called SOLQC, which enables fast and comprehensive analysis of synthetic oligo libraries, based on NGS analysis performed by the user. SOLQC provides statistical information such as the distribution of variant representation, different error rates and their dependence on sequence or library properties. SOLQC produces graphical reports from the analysis, in a flexible format. We demonstrate SOLQC by analyzing literature libraries. We also discuss the potential benefits and relevance of the different components of the analysis.

Availability and implementation

SOLQC is a free software for non-commercial use, available at https://app.gitbook.com/@yoav-orlev/s/solqc/. For commercial use please contact the authors.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32840574,KORP-PL: a coarse-grained knowledge-based scoring function for protein-ligand interactions.,"

Motivation

Despite the progress made in studying protein-ligand interactions and the widespread application of docking and affinity prediction tools, improving their precision and efficiency still remains a challenge. Computational approaches based on the scoring of docking conformations with statistical potentials constitute a popular alternative to more accurate but costly physics-based thermodynamic sampling methods. In this context, a minimalist and fast sidechain-free knowledge-based potential with a high docking and screening power can be very useful when screening a big number of putative docking conformations.

Results

Here, we present a novel coarse-grained potential defined by a 3D joint probability distribution function that only depends on the pairwise orientation and position between protein backbone and ligand atoms. Despite its extreme simplicity, our approach yields very competitive results with the state-of-the-art scoring functions, especially in docking and screening tasks. For example, we observed a twofold improvement in the median 5% enrichment factor on the DUD-E benchmark compared to Autodock Vina results. Moreover, our results prove that a coarse sidechain-free potential is sufficient for a very successful docking pose prediction.

Availabilityand implementation

The standalone version of KORP-PL with the corresponding tests and benchmarks are available at https://team.inria.fr/nano-d/korp-pl/ and https://chaconlab.org/modeling/korp-pl.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32871007,SARS2020: an integrated platform for identification of novel coronavirus by a consensus sequence-function model.,"

Motivation

The 2019 novel coronavirus outbreak has significantly affected global health and society. Thus, predicting biological function from pathogen sequence is crucial and urgently needed. However, little work has been conducted to identify viruses by the enzymes that they encode, and which are key to pathogen propagation.

Results

We built a comprehensive scientific resource, SARS2020, which integrates coronavirus-related research, genomic sequences and results of anti-viral drug trials. In addition, we built a consensus sequence-catalytic function model from which we identified the novel coronavirus as encoding the same proteinase as the severe acute respiratory syndrome virus. This data-driven sequence-based strategy will enable rapid identification of agents responsible for future epidemics.

Availabilityand implementation

SARS2020 is available at http://design.rxnfinder.org/sars2020/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,diverse coronavirus data resource,Chuck Check - discussion - tool - integration but not enough value add - N +32884207,From sequence analysis of DPP-4 to molecular docking based searching of its inhibitors.,"Literature data suggests that Dipeptidyl peptidase-4 (DPP-4) is a potential target for type 2 Diabetes Mellitus. Therefore, it is of interest to identify new DPP-4 inhibitors using molecular docking analysis. We document compounds such as STOCK1N-98884, STOCK1N-98881, and STOCK1N-98866 with optimal binding features with DPP-4 from the ligand database at https://www.ibscreen.com/ for further consideration.","hji,kes",0,0,0,2,0,mentions database,not descriptive of resource +32889703,Six- and Twelve-Month Follow-up Results of a Cluster Randomized Controlled Trial of a CBT-Based Group Course.,"In the current study, we examined the durability of intervention gains over a 6- and 12-month follow-up period after the implementation of a CBT-based group intervention ""Adolescent Coping with Depression Course"" (ACDC) for adolescents with subclinical or mild-to-moderate depression. Data were collected from 228 youth, 133 of whom were allocated to the 14-week ACDC intervention and 95 to the usual care (UC) control condition. Analyses for the main outcome variable of depressive symptoms were performed using a random effects repeated measures piecewise growth model to estimate trajectory shape over time on an intention-to-treat basis. Results revealed that the reduction in depressive symptoms achieved during the intervention phase continued across the follow-up period for both ACDC and UC (i.e., depressive symptoms showed a significantly decreasing trend in both groups in intervention and follow-up phases); however, no differential effects between conditions were found during the follow-up phase. The direct and indirect effects of the intervention on the other outcome variables' follow-up results were also presented. ISRCTN registry ISRCTN19700389. Registered 6 October 2015. https://doi.org/10.1186/ISRCTN19700389 . Full Protocol: https://doi.org/10.1186/s12888-016-0954-y.","hji,kes",0,0,0,2,0,NA,NA +32926121,CHESPA/CHESCA-SPARKY: automated NMR data analysis plugins for SPARKY to map protein allostery.,"

Motivation

Correlated Nuclear Magnetic Resonance (NMR) chemical shift changes identified through the CHEmical Shift Projection Analysis (CHESPA) and CHEmical Shift Covariance Analysis (CHESCA) reveal pathways of allosteric transitions in biological macromolecules. To address the need for an automated platform that implements CHESPA and CHESCA and integrates them with other NMR analysis software packages, we introduce here integrated plugins for NMRFAM-SPARKY that implement the seamless detection and visualization of allosteric networks.

Availability and implementation

CHESCA-SPARKY and CHESPA-SPARKY are available in the latest version of NMRFAM-SPARKY from the National Magnetic Resonance Facility at Madison (http://pine.nmrfam.wisc.edu/download_packages.html), the NMRbox Project (https://nmrbox.org) and to subscribers to the SBGrid (https://sbgrid.org). The assigned spectra involved in this study and tutorial videos using this dataset are available at https://sites.google.com/view/chescachespa-sparky.

Supplementary information

Supplementary data are available at Bioinformatics Online.","hji,kes",0,0,0,2,0,software,hard one - mention of dataset but the resources are packages - N +32926411,Quantitative SEM characterisation of ceramic target prior and after magnetron sputtering: a case study of aluminium zinc oxide.,"Till now electron microscopy techniques have not been used to evaluate the plasma-target interactions undergone during the magnetron sputtering process. The destructive nature of this interaction severely alters the target microstructure. Utilising quantitative microscopy techniques can shed light on the complex plasma and solid-state processes involved which can ultimately lead to improved functional thin film deposition. As a representative functional material, aluminium-doped-zinc oxide (AZO) is an upcoming alternative to conventional transparent electrode wherein the process optimisation is of great importance. In this paper, we evaluate the pre- and post-sputter field emission scanning electron microscopy (FESEM) data for ceramic AZO target fabricated at three final sintering temperatures (1100°C, 1200°C and 1300°C). In all cases, grain boundaries are merged in addition to a visible reduction in the secondary phases which makes segmentation-based image analysis challenging. Through surface statistics (i.e. fractal dimension, autocorrelation length, texture aspect ratio and entropy) as a function of magnification we can quantify the electron microscopy image of the microstructure. We show that the plasma-microstructure interaction leads to an increase in autocorrelation length, texture aspect ratio and entropy for the optimum AZO ceramic sputtering target sintered at 1200°C. Furthermore, a maximum reduction in fractal dimension span (as determined by exponential regression) is also observed for 1200°C. In addition to the evaluation of plasma effects on sintering, our approach can provide a window towards understanding the underlying thin film growth mechanisms. We believe that this technique can be applied to the defect characterisation of a wide range of polycrystalline ceramic sputtering targets (e.g. ITO, CZTS, GAZO and so on) with the ultimate goal of improving the magnetron sputtering process and the resulting functional thin film. LAY DESCRIPTION: Magnetron sputtering allows scientists to make functional thin films on the order of the nanoscale. In this technique, atoms are plucked from a 'target' then placed onto a substrate forming a thin nanometric film: all thanks to magnets, a special power supply and the fourth state of matter (plasma). Understanding what is going on and how to make a 'good' thin film is important for making better light emitting diodes, solar cells and light sensors. Scientists use electron microscopy to see what is going on in the microstructure of the sputtered thin films to fine tune the sputtering recipe. Here, for the first time, we have applied electron microscopy to see the surface of the microstructure before and after magnetron sputtering. This will help us understanding the plasma-microstructure interaction allowing us to make more informed decisions when fine-tuning the sputtering process to get improved thin films. This is a case study of aluminium-doped zinc oxide (AZO) target that could potentially replace indium tin oxide (ITO), which is widely used as a transparent electrode in devices involving light and electricity. In this case, improved characteristics would be lower electrical resistivity and higher transmission of light. We show that it is possible to use a mathematical description (e.g. the fractal dimension) of the scanning electron microscopy picture to show a link between the target surface and the functional properties. Simple explanation of fractal dimensions by Sixty Symbols ○ https://www.youtube.com/watch?v=cmBljeC79Ls Experimental demonstration of magnetron sputtering by The Thought Emporium ○ https://www.youtube.com/watch?v=Cyu7etM-0Ko Introductory video on magnetron sputtering by Applied Science ○ https://www.youtube.com/watch?v=9OEz_e9C4KM Demonstration of AZO target fabrication and sputtering by Pradhyut Rajjkumar ○ https://www.youtube.com/watch?v=kTLaTJfNX3c Simple explanation of a DIY SEM by Applied Science ○ https://www.youtube.com/watch?v=VdjYVF4a6iU.","hji,kes",0,0,0,2,0,NA,NA +32931552,AFid: a tool for automated identification and exclusion of autofluorescent objects from microscopy images.,"

Motivation

Autofluorescence is a long-standing problem that has hindered the analysis of images of tissues acquired by fluorescence microscopy. Current approaches to mitigate autofluorescence in tissue are lab-based and involve either chemical treatment of sections or specialized instrumentation and software to 'unmix' autofluorescent signals. Importantly, these approaches are pre-emptive and there are currently no methods to deal with autofluorescence in acquired fluorescence microscopy images.

Results

To address this, we developed Autofluorescence Identifier (AFid). AFid identifies autofluorescent pixels as discrete objects in multi-channel images post-acquisition. These objects can then be tagged for exclusion from downstream analysis. We validated AFid using images of FFPE human colorectal tissue stained for common immune markers. Further, we demonstrate its utility for image analysis where its implementation allows the accurate measurement of HIV-Dendritic cell interactions in a colorectal explant model of HIV transmission. Therefore, AFid represents a major leap forward in the extraction of useful data from images plagued by autofluorescence by offering an approach that is easily incorporated into existing workflows and that can be used with various samples, staining panels and image acquisition methods. We have implemented AFid in ImageJ, Matlab and R to accommodate the diverse image analysis community.

Availability and implementation

AFid software is available at https://ellispatrick.github.io/AFid.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32963084,CRISpy-Pop: A Web Tool for Designing CRISPR/Cas9-Driven Genetic Modifications in Diverse Populations.,"CRISPR/Cas9 is a powerful tool for editing genomes, but design decisions are generally made with respect to a single reference genome. With population genomic data becoming available for an increasing number of model organisms, researchers are interested in manipulating multiple strains and lines. CRISpy-pop is a web application that generates and filters guide RNA sequences for CRISPR/Cas9 genome editing for diverse yeast and bacterial strains. The current implementation designs and predicts the activity of guide RNAs against more than 1000 Saccharomyces cerevisiae genomes, including 167 strains frequently used in bioenergy research. Zymomonas mobilis, an increasingly popular bacterial bioenergy research model, is also supported. CRISpy-pop is available as a web application (https://CRISpy-pop.glbrc.org/) with an intuitive graphical user interface. CRISpy-pop also cross-references the human genome to allow users to avoid the selection of guide RNAs with potential biosafety concerns. Additionally, CRISpy-pop predicts the strain coverage of each guide RNA within the supported strain sets, which aids in functional population genetic studies. Finally, we validate how CRISpy-pop can accurately predict the activity of guide RNAs across strains using population genomic data.","hji,kes",0,0,0,2,0,software,NA +32963995,Shorter time to clinical decision in work-related asthma using a digital tool.,"PEF curves are a useful but cumbersome tool in diagnosing work-related asthma. Using a digital spirometer and smartphone app, time to clinical decision could be shortened by 6-7 weeks. Physician's time spent analysing PEF data is also shortened. https://bit.ly/3d5SY78.","hji,kes",0,0,0,2,0,NA,NA +32973479,Nutil: A Pre- and Post-processing Toolbox for Histological Rodent Brain Section Images.,"With recent technological advances in microscopy and image acquisition of tissue sections, further developments of tools are required for viewing, transforming, and analyzing the ever-increasing amounts of high-resolution data produced. In the field of neuroscience, histological images of whole rodent brain sections are commonly used for investigating brain connections as well as cellular and molecular organization in the normal and diseased brain, but present a problem for the typical neuroscientist with no or limited programming experience in terms of the pre- and post-processing steps needed for analysis. To meet this need we have designed Nutil, an open access and stand-alone executable software that enables automated transformations, post-processing, and analyses of 2D section images using multi-core processing (OpenMP). The software is written in C++ for efficiency, and provides the user with a clean and easy graphical user interface for specifying the input and output parameters. Nutil currently contains four separate tools: (1) A transformation toolchain named ""Transform"" that allows for rotation, mirroring and scaling, resizing, and renaming of very large tiled tiff images. (2) ""TiffCreator"" enables the generation of tiled TIFF images from other image formats such as PNG and JPEG. (3) A ""Resize"" tool completes the preprocessing toolset and allows downscaling of PNG and JPEG images with output in PNG format. (4) The fourth tool is a post-processing method called ""Quantifier"" that enables the quantification of segmented objects in the context of regions defined by brain atlas maps generated with the QuickNII software based on a 3D reference atlas (mouse or rat). The output consists of a set of report files, point cloud coordinate files for visualization in reference atlas space, and reference atlas images superimposed with color-coded objects. The Nutil software is made available by the Human Brain Project (https://www.humanbrainproject.eu) at https://www.nitrc.org/projects/nutil/.","hji,kes",0,0,0,2,0,software,references a database - NITR +32976679,Spage2vec: Unsupervised representation of localized spatial gene expression signatures.,"Investigations of spatial cellular composition of tissue architectures revealed by multiplexed in situ RNA detection often rely on inaccurate cell segmentation or prior biological knowledge from complementary single-cell sequencing experiments. Here, we present spage2vec, an unsupervised segmentation-free approach for decrypting the spatial transcriptomic heterogeneity of complex tissues at subcellular resolution. Spage2vec represents the spatial transcriptomic landscape of tissue samples as a graph and leverages a powerful machine learning graph representation technique to create a lower dimensional representation of local spatial gene expression. We apply spage2vec to mouse brain data from three different in situ transcriptomic assays and to a spatial gene expression dataset consisting of hundreds of individual cells. We show that learned representations encode meaningful biological spatial information of re-occurring localized gene expression signatures involved in cellular and subcellular processes. DATABASE: Spatial gene expression data are available in Zenodo database at https://doi.org/10.5281/zenodo.3897401. Source code for reproducing analysis results and figures is available in Zenodo database at http://www.doi.org/10.5281/zenodo.4030404.","hji,kes",0,0,0,2,0,mentions database,zenodo +32981359,Twenty-Four-Hour Urinary Sodium and Potassium Excretion and Their Associations With Blood Pressure Among Adults in China: Baseline Survey of Action on Salt China.,"This study aimed to assess current level of sodium and potassium intake and their associations with blood pressure (BP) using the 24-hour urinary data in a large sample of China. Data from participants aged 18 to 75 years were collected as the baseline survey of Action on Salt China in 2018. Of 5454 adults, 5353 completed 24-hour urine collection. The average sodium, potassium excretion, and sodium-to-potassium molar ratio were 4318.1±1814.1 mg/d (equivalent to 11.0±4.6 g/d of salt), 1573.7±627.1 mg/d, and 5.0±2.1, respectively. After adjusting for potential confounding factors and correcting for regression dilution, each 1000-mg increase in sodium excretion was associated with increased systolic BP (1.32 mm Hg [95% CI, 0.92-1.81]) and diastolic BP (0.34 mm Hg [95% CI, 0.09-0.60]). Each 1000-mg increase in potassium excretion was inversely associated with systolic BP (-3.19 mm Hg [95% CI, -4.38 to -2.20]) and diastolic BP (-1.56 mm Hg [95% CI, -2.29 to -0.90]). Each unit increase in sodium-to-potassium molar ratio was associated with an increase of systolic BP by 1.21 mm Hg (95% CI, 0.91-1.60) and diastolic BP by 0.44 mm Hg (95% CI, 0.24-0.64). The relationships between sodium and BP mostly increase with the rise of BP quantiles. Potassium shows the opposite trend. The current sodium intake in Chinese adults remains high and potassium intake is low. Sodium and sodium-to-potassium ratio were positively associated with BP, whereas potassium was inversely associated with BP. Registration- URL: https://tinyurl.com/vdr8rpr; Unique identifier: ChiCTR1800017553. URL: https://tinyurl.com/w8c7x3w; Unique identifier: ChiCTR1800016804. URL: https://tinyurl.com/s3ajldw; Unique identifier: ChiCTR1800018119.","hji,kes",0,0,0,2,0,NA,NA +32997734,Fijiyama: a registration tool for 3D multimodal time-lapse imaging.,"

Summary

The increasing interest of animal and plant research communities for biomedical 3D imaging devices results in the emergence of new topics. The anatomy, structure and function of tissues can be observed non-destructively in time-lapse multimodal imaging experiments by combining the outputs of imaging devices such as X-ray CT and MRI scans. However, living samples cannot remain in these devices for a long period. Manual positioning and natural growth of the living samples induce variations in the shape, position and orientation in the acquired images that require a preprocessing step of 3D registration prior to analyses. This registration step becomes more complex when combining observations from devices that highlight various tissue structures. Identifying image invariants over modalities is challenging and can result in intractable problems. Fijiyama, a Fiji plugin built upon biomedical registration algorithms, is aimed at non-specialists to facilitate automatic alignment of 3D images acquired either at successive times and/or with different imaging systems. Its versatility was assessed on four case studies combining multimodal and time series data, spanning from micro to macro scales.

Availability and implementation

Fijiyama is an open source software (GPL license) implemented in Java. The plugin is available through the official Fiji release. An extensive documentation is available at the official page: https://imagej.github.io/Fijiyama.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +32997753,BEST: a Shiny/R web-based application to easily retrieve cross-related enzyme functional parameters and information from BRENDA.,"

Motivation

BRENDA is the largest enzyme functional database, containing information of 84 000 experimentally characterized enzyme entries. This database is an invaluable resource for researchers in the biological field, which classifies enzyme-related information in categories that are very useful to obtain specific functional and protein engineering information for enzyme families. However, the BRENDA web interface, the most used by researchers with a non-informatic background, does not allow the user to cross-reference data from different categories or sub-categories in the database. Obtaining information in an easy and fast way, in a friendly web interface, without the necessity to have a deep informatics knowledge, will facilitate and improve research in the enzymology and protein engineering field.

Results

We developed the Brenda Easy Search Tool (BEST), an interactive Shiny/R application that enables querying the BRENDA database for complex cross-tabulated characteristics, and retrieving enzyme-related parameters and information readily and efficiently, which can be used for the study of enzyme function or as an input for other bioinformatics tools.

Availability and implementation

BEST and its tutorial are freely available from https://pesb2.cl/best/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,interface to another database,NA +33006570,Automating the Generation of Antimicrobial Resistance Surveillance Reports: Proof-of-Concept Study Involving Seven Hospitals in Seven Countries.,"

Background

Reporting cumulative antimicrobial susceptibility testing data on a regular basis is crucial to inform antimicrobial resistance (AMR) action plans at local, national, and global levels. However, analyzing data and generating a report are time consuming and often require trained personnel.

Objective

This study aimed to develop and test an application that can support a local hospital to analyze routinely collected electronic data independently and generate AMR surveillance reports rapidly.

Methods

An offline application to generate standardized AMR surveillance reports from routinely available microbiology and hospital data files was written in the R programming language (R Project for Statistical Computing). The application can be run by double clicking on the application file without any further user input. The data analysis procedure and report content were developed based on the recommendations of the World Health Organization Global Antimicrobial Resistance Surveillance System (WHO GLASS). The application was tested on Microsoft Windows 10 and 7 using open access example data sets. We then independently tested the application in seven hospitals in Cambodia, Lao People's Democratic Republic, Myanmar, Nepal, Thailand, the United Kingdom, and Vietnam.

Results

We developed the AutoMated tool for Antimicrobial resistance Surveillance System (AMASS), which can support clinical microbiology laboratories to analyze their microbiology and hospital data files (in CSV or Excel format) onsite and promptly generate AMR surveillance reports (in PDF and CSV formats). The data files could be those exported from WHONET or other laboratory information systems. The automatically generated reports contain only summary data without patient identifiers. The AMASS application is downloadable from https://www.amass.website/. The participating hospitals tested the application and deposited their AMR surveillance reports in an open access data repository.

Conclusions

The AMASS is a useful tool to support the generation and sharing of AMR surveillance reports.","hji,kes",0,0,0,2,0,software,NA +33009809,Detection of mobile genetic elements associated with antibiotic resistance in Salmonella enterica using a newly developed web tool: MobileElementFinder.,"

Objectives

Antimicrobial resistance (AMR) in clinically relevant bacteria is a growing threat to public health globally. In these bacteria, antimicrobial resistance genes are often associated with mobile genetic elements (MGEs), which promote their mobility, enabling them to rapidly spread throughout a bacterial community.

Methods

The tool MobileElementFinder was developed to enable rapid detection of MGEs and their genetic context in assembled sequence data. MGEs are detected based on sequence similarity to a database of 4452 known elements augmented with annotation of resistance genes, virulence factors and detection of plasmids.

Results

MobileElementFinder was applied to analyse the mobilome of 1725 sequenced Salmonella enterica isolates of animal origin from Denmark, Germany and the USA. We found that the MGEs were seemingly conserved according to multilocus ST and not restricted to either the host or the country of origin. Moreover, we identified putative translocatable units for specific aminoglycoside, sulphonamide and tetracycline genes. Several putative composite transposons were predicted that could mobilize, among others, AMR, metal resistance and phosphodiesterase genes associated with macrophage survivability. This is, to our knowledge, the first time the phosphodiesterase-like pdeL has been found to be potentially mobilized into S. enterica.

Conclusions

MobileElementFinder is a powerful tool to study the epidemiology of MGEs in a large number of genome sequences and to determine the potential for genomic plasticity of bacteria. This web service provides a convenient method of detecting MGEs in assembled sequence data. MobileElementFinder can be accessed at https://cge.cbs.dtu.dk/services/MobileElementFinder/.","hji,kes",0,0,0,2,0,software,NA +33010165,"Newt: a comprehensive web-based tool for viewing, constructing and analyzing biological maps.","

Motivation

Visualization of cellular processes and pathways is a key recurring requirement for effective biological data analysis. There is a considerable need for sophisticated web-based pathway viewers and editors operating with widely accepted standard formats, using the latest visualization techniques and libraries.

Results

We developed a web-based tool named Newt for viewing, constructing and analyzing biological maps in standard formats such as SBGN, SBML and SIF.

Availability and implementation

Newt's source code is publicly available on GitHub and freely distributed under the GNU LGPL. Ample documentation on Newt can be found on http://newteditor.org and on YouTube.","hji,kes",0,0,0,2,0,software,NA +33016192,Curated gene expression dataset of differentiating 3T3-L1 adipocytes under pharmacological and genetic perturbations.,"The 3T3-L1 cell line is used as an adipocyte differentiation model for the analysis of genes specifically expressed during the differentiation course. This cell model has several applications in obesity and insulin resistance research. We built a data resource to model gene expression of differentiating and mature adipocytes in response to several drugs and gene manipulations. We surveyed the literature survey for microarray datasets of differentiating 3T3-L1 cell line sampled at one or more time points under genetic or pharmacological perturbations. Data and metadata were obtained from the gene expression omnibus. The metadata were manually curated using unified language across the studies. Probe intensities were mapped and collapsed to genes using a reproducible pipeline. Samples were classified into none, genetically or pharmacologically modified. In addition to the clean datasets, two aggregated sets were further homogenized for illustration purposes. The curated datasets are available as an R/Bioconductor experimental data package curatedAdipoArray. The package documents the source code of the data collection, curation and processing. Finally, we used a subset of the data to effectively remove batch effects and reproduce biological observations. Database URL https://bioconductor.org/packages/curatedAdipoArray.","hji,kes",0,0,0,2,0,"curated dataset; reassessed, no bioconductor",described as a data resource but a bioconductor package; reassessed and still no - not includeing bioconductor packages +33016997,ADACT: a tool for analysing (dis)similarity among nucleotide and protein sequences using minimal and relative absent words.,"

Motivation

Researchers and practitioners use a number of popular sequence comparison tools that use many alignment-based techniques. Due to high time and space complexity and length-related restrictions, researchers often seek alignment-free tools. Recently, some interesting ideas, namely, Minimal Absent Words (MAW) and Relative Absent Words (RAW), have received much interest among the scientific community as distance measures that can give us alignment-free alternatives. This drives us to structure a framework for analysing biological sequences in an alignment-free manner.

Results

In this application note, we present Alignment-free Dissimilarity Analysis & Comparison Tool (ADACT), a simple web-based tool that computes the analogy among sequences using a varied number of indexes through the distance matrix, species relation list and phylogenetic tree. This tool basically combines absent word (MAW or RAW) computation, dissimilarity measures, species relationship and thus brings all required software in one platform for the ease of researchers and practitioners alike in the field of bioinformatics. We have also developed a restful API.

Availability and implementation

ADACT has been hosted at http://research.buet.ac.bd/ADACT/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,API but not a data resource +33027504,PredHPI: an integrated web server platform for the detection and visualization of host-pathogen interactions using sequence-based methods.,"

Motivation

Understanding the mechanisms underlying infectious diseases is fundamental to develop prevention strategies. Host-pathogen interactions (HPIs) are actively studied worldwide to find potential genomic targets for the development of novel drugs, vaccines and other therapeutics. Determining which proteins are involved in the interaction system behind an infectious process is the first step to develop an efficient disease control strategy. Very few computational methods have been implemented as web services to infer novel HPIs, and there is not a single framework which combines several of those approaches to produce and visualize a comprehensive analysis of HPIs.

Results

Here, we introduce PredHPI, a powerful framework that integrates both the detection and visualization of interaction networks in a single web service, facilitating the apprehension of model and non-model host-pathogen systems to aid the biologists in building hypotheses and designing appropriate experiments. PredHPI is built on high-performance computing resources on the backend capable of handling proteome-scale sequence data from both the host as well as pathogen. Data are displayed in an information-rich and interactive visualization, which can be further customized with user-defined layouts. We believe PredHPI will serve as an invaluable resource to diverse experimental biologists and will help advance the research in the understanding of complex infectious diseases.

Availability and implementation

PredHPI tool is freely available at http://bioinfo.usu.edu/PredHPI/.

Supplementary information

Sup plementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33031509,Automated download and clean-up of family-specific databases for kmer-based virus identification.,"

Summary

Here, we present an automated pipeline for Download Of NCBI Entries (DONE) and continuous updating of a local sequence database based on user-specified queries. The database can be created with either protein or nucleotide sequences containing all entries or complete genomes only. The pipeline can automatically clean the database by removing entries with matches to a database of user-specified sequence contaminants. The default contamination entries include sequences from the UniVec database of plasmids, marker genes and sequencing adapters from NCBI, an E.coli genome, rRNA sequences, vectors and satellite sequences. Furthermore, duplicates are removed and the database is automatically screened for sequences from green fluorescent protein, luciferase and antibiotic resistance genes that might be present in some GenBank viral entries, and could lead to false positives in virus identification. For utilizing the database, we present a useful opportunity for dealing with possible human contamination. We show the applicability of DONE by downloading a virus database comprising 37 virus families. We observed an average increase of 16 776 new entries downloaded per month for the 37 families. In addition, we demonstrate the utility of a custom database compared to a standard reference database for classifying both simulated and real sequence data.

Availabilityand implementation

The DONE pipeline for downloading and cleaning is deposited in a publicly available repository (https://bitbucket.org/genomicepidemiology/done/src/master/).

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,interface to another database,for cleaning databases not a database itself +33039623,The Developmental Chronnecto-Genomics (Dev-CoG) study: A multimodal study on the developing brain.,"Brain development has largely been studied through unimodal analysis of neuroimaging data, providing independent results for structural and functional data. However, structure clearly impacts function and vice versa, pointing to the need for performing multimodal data collection and analysis to improve our understanding of brain development, and to further inform models of typical and atypical brain development across the lifespan. Ultimately, such models should also incorporate genetic and epigenetic mechanisms underlying brain structure and function, although currently this area is poorly specified. To this end, we are reporting here a multi-site, multi-modal dataset that captures cognitive function, brain structure and function, and genetic and epigenetic measures to better quantify the factors that influence brain development in children originally aged 9-14 years. Data collection for the Developmental Chronnecto-Genomics (Dev-CoG) study (http://devcog.mrn.org/) includes cognitive, emotional, and social performance scales, structural and functional MRI, diffusion MRI, magnetoencephalography (MEG), and saliva collection for DNA analysis of single nucleotide polymorphisms (SNPs) and DNA methylation patterns. Across two sites (The Mind Research Network and the University of Nebraska Medical Center), data from over 200 participants were collected and these children were re-tested annually for at least 3 years. The data collection protocol, sample demographics, and data quality measures for the dataset are presented here. The sample will be made freely available through the collaborative informatics and neuroimaging suite (COINS) database at the conclusion of the study.","hji,kes",0,0,0,2,0,NA,Chuck Check - project site - N +33051283,Gender-transformative programming with men and boys to improve sexual and reproductive health and rights: a systematic review of intervention studies.,"

Background

Global health organisations advocate gender-transformative programming (which challenges gender inequalities) with men and boys to improve sexual and reproductive health and rights (SRHR) for all. We systematically review evidence for this approach.

Methods

We previously reported an evidence-and-gap map (http://srhr.org/masculinities/wbincome/) and systematic review of reviews of experimental intervention studies engaging men/boys in SRHR, identified through a Campbell Collaboration published protocol (https://doi.org/10.1002/CL2.203) without language restrictions between January 2007 and July 2018. Records for the current review of intervention studies were retrieved from those systematic reviews containing one or more gender-transformative intervention studies engaging men/boys. Data were extracted for intervention studies relating to each of the World Health Organization (WHO) SRHR outcomes. Promising programming characteristics, as well as underused strategies, were analysed with reference to the WHO definition of gender-transformative programming and an established behaviour change model, the COM-B model. Risk of bias was assessed using Cochrane Risk of Bias tools, RoB V.2.0 and Risk of Bias In Non-randomised Studies of Interventions.

Findings

From 509 eligible records, we synthesised 68 studies comprising 36 randomised controlled trials, n=56‚Äâ417 participants, and 32 quasi-experimental studies, n=25‚Äâ554 participants. Promising programming characteristics include: multicomponent activities of education, persuasion, modelling and enablement; multilevel programming that mobilises wider communities; targeting both men and women; and programmes of longer duration than three‚Äâmonths. Six of the seven interventions evaluated more than once show efficacy. However, we identified a significant risk of bias in the overall available evidence. Important gaps in evidence relate to safe abortion and SRHR during disease outbreaks.

Conclusion

It is widely acknowledged by global organisations that the question is no longer whether to include boys and men in SRHR but how to do so in ways that promote gender equality and health for all and are scientifically rigorous. This paper provides an evidence base to take this agenda for programming and research forward.","hji,kes",0,0,0,2,0,NA,NA +33054771,smORFunction: a tool for predicting functions of small open reading frames and microproteins.,"

Background

Small open reading frame (smORF) is open reading frame with a length of less than 100 codons. Microproteins, translated from smORFs, have been found to participate in a variety of biological processes such as muscle formation and contraction, cell proliferation, and immune activation. Although previous studies have collected and annotated a large abundance of smORFs, functions of the vast majority of smORFs are still unknown. It is thus increasingly important to develop computational methods to annotate the functions of these smORFs.

Results

In this study, we collected 617,462 unique smORFs from three studies. The expression of smORF RNAs was estimated by reannotated microarray probes. Using a speed-optimized correlation algorism, the functions of smORFs were predicted by their correlated genes with known functional annotations. After applying our method to 5 known microproteins from literatures, our method successfully predicted their functions. Further validation from the UniProt database showed that at least one function of 202 out of 270 microproteins was predicted.

Conclusions

We developed a method, smORFunction, to provide function predictions of smORFs/microproteins in at most 265 models generated from 173 datasets, including 48 tissues/cells, 82 diseases (and normal). The tool can be available at https://www.cuilab.cn/smorfunction .","hji,kes",0,0,0,2,0,software,NA +33057676,IDseq-An open source cloud-based pipeline and analysis service for metagenomic pathogen detection and monitoring.,"

Background

Metagenomic next-generation sequencing (mNGS) has enabled the rapid, unbiased detection and identification of microbes without pathogen-specific reagents, culturing, or a priori knowledge of the microbial landscape. mNGS data analysis requires a series of computationally intensive processing steps to accurately determine the microbial composition of a sample. Existing mNGS data analysis tools typically require bioinformatics expertise and access to local server-class hardware resources. For many research laboratories, this presents an obstacle, especially in resource-limited environments.

Findings

We present IDseq, an open source cloud-based metagenomics pipeline and service for global pathogen detection and monitoring (https://idseq.net). The IDseq Portal accepts raw mNGS data, performs host and quality filtration steps, then executes an assembly-based alignment pipeline, which results in the assignment of reads and contigs to taxonomic categories. The taxonomic relative abundances are reported and visualized in an easy-to-use web application to facilitate data interpretation and hypothesis generation. Furthermore, IDseq supports environmental background model generation and automatic internal spike-in control recognition, providing statistics that are critical for data interpretation. IDseq was designed with the specific intent of detecting novel pathogens. Here, we benchmark novel virus detection capability using both synthetically evolved viral sequences and real-world samples, including IDseq analysis of a nasopharyngeal swab sample acquired and processed locally in Cambodia from a tourist from Wuhan, China, infected with the recently emergent SARS-CoV-2.

Conclusion

The IDseq Portal reduces the barrier to entry for mNGS data analysis and enables bench scientists, clinicians, and bioinformaticians to gain insight from mNGS datasets for both known and novel pathogens.","hji,kes",0,0,0,2,0,software,NA +33063234,"Social Validity of the Strengthening Families Program in Northeastern Brazil: the Voices of Parents, Adolescents, and Facilitators.","In 2013, Brazil's Ministry of Health adopted the Strengthening Families Program (SFP 10-14), developed internationally for preventing drug abuse by enhancing family bonds. The social validity of the objectives, procedures, and perceived impacts of the program were investigated for participants and facilitators in northeastern Brazil. Focus groups with parents/guardians (N‚Äâ=‚Äâ199), adolescents (N‚Äâ=‚Äâ111), and facilitators (N‚Äâ=‚Äâ100) were implemented. Content analysis revealed that the program's objectives were considered socially relevant and that there was a positive short-term perceived impact on family cohesion, authoritative parenting style, adolescent life skills, and the facilitators' professional capacity. The parents/guardians and adolescents presented a positive perception of the appropriateness of the program's methodology, while facilitators indicated the need to adapt it to vulnerable families and improve its implementation conditions. Future studies may benefit from these findings when developing similarly viable and scalable interventions in low-resource settings. Brazilian Trial Register RBR-7q9xh5. Registered 5 August 2017, http://www.ensaiosclinicos.gov.br/rg/RBR-7q9xh5/.","hji,kes",0,0,0,2,0,NA,NA +33064576,"Triclocarban, Triclosan, Bromochlorophene, Chlorophene, and Climbazole Effects on Nuclear Receptors: An in Silico and in Vitro Study.","

Background

Endocrine-disrupting chemicals can interfere with hormonal homeostasis and have adverse effects for both humans and the environment. Their identification is increasingly difficult due to lack of adequate toxicological tests. This difficulty is particularly problematic for cosmetic ingredients, because in vivo testing is now banned completely in the European Union.

Objectives

The aim was to identify candidate preservatives as endocrine disruptors by in silico methods and to confirm endocrine receptors' activities through nuclear receptors in vitro.

Methods

We screened preservatives listed in Annex V in the European Union Regulation on cosmetic products to predict their binding to nuclear receptors using the Endocrine Disruptome and VirtualToxLab™ version 5.8 in silico tools. Five candidate preservatives were further evaluated for androgen receptor (AR), estrogen receptor (ERα), glucocorticoid receptor (GR), and thyroid receptor (TR) agonist and antagonist activities in cell-based luciferase reporter assays in vitro in AR-EcoScreen, hERα-HeLa-9903, MDA-kb2, and GH3.TRE-Luc cell lines. Additionally, assays to test for false positives were used (nonspecific luciferase gene induction and luciferase inhibition).

Results

Triclocarban had agonist activity on AR and ERα at 1μM and antagonist activity on GR at 5μM and TR at 1μM. Triclosan showed antagonist effects on AR, ERα, GR at 10μM and TR at 5μM, and bromochlorophene at 1μM (AR and TR) and at 10μM (ERα and GR). AR antagonist activity of chlorophene was observed [inhibitory concentration at 50% (IC50) IC50=2.4μM], as for its substantial ERα agonist at >5μM and TR antagonist activity at 10μM. Climbazole showed AR antagonist (IC50=13.6μM), ERα agonist at >10μM, and TR antagonist activity at 10μM.

Discussion

These data support the concerns of regulatory authorities about the endocrine-disrupting potential of preservatives. These data also define the need to further determine their effects on the endocrine system and the need to reassess the risks they pose to human health and the environment. https://doi.org/10.1289/EHP6596.","hji,kes",0,0,0,2,0,NA,NA +33067342,OpenPepXL: An Open-Source Tool for Sensitive Identification of Cross-Linked Peptides in XL-MS.,"Cross-linking MS (XL-MS) has been recognized as an effective source of information about protein structures and interactions. In contrast to regular peptide identification, XL-MS has to deal with a quadratic search space, where peptides from every protein could potentially be cross-linked to any other protein. To cope with this search space, most tools apply different heuristics for search space reduction. We introduce a new open-source XL-MS database search algorithm, OpenPepXL, which offers increased sensitivity compared with other tools. OpenPepXL searches the full search space of an XL-MS experiment without using heuristics to reduce it. Because of efficient data structures and built-in parallelization OpenPepXL achieves excellent runtimes and can also be deployed on large compute clusters and cloud services while maintaining a slim memory footprint. We compared OpenPepXL to several other commonly used tools for identification of noncleavable labeled and label-free cross-linkers on a diverse set of XL-MS experiments. In our first comparison, we used a data set from a fraction of a cell lysate with a protein database of 128 targets and 128 decoys. At 5% FDR, OpenPepXL finds from 7% to over 50% more unique residue pairs (URPs) than other tools. On data sets with available high-resolution structures for cross-link validation OpenPepXL reports from 7% to over 40% more structurally validated URPs than other tools. Additionally, we used a synthetic peptide data set that allows objective validation of cross-links without relying on structural information and found that OpenPepXL reports at least 12% more validated URPs than other tools. It has been built as part of the OpenMS suite of tools and supports Windows, macOS, and Linux operating systems. OpenPepXL also supports the MzIdentML 1.2 format for XL-MS identification results. It is freely available under a three-clause BSD license at https://openms.org/openpepxl.","hji,kes",0,0,0,2,0,software,NA +33080021,NAMS webserver: coding potential assessment and functional annotation of plant transcripts.,"Recent advances in transcriptomics have uncovered lots of novel transcripts in plants. To annotate such transcripts, dissecting their coding potential is a critical step. Computational approaches have been proven fruitful in this task; however, most current tools are designed/optimized for mammals and only a few of them have been tested on a limited number of plant species. In this work, we present NAMS webserver, which contains a novel coding potential classifier, NAMS, specifically optimized for plants. We have evaluated the performance of NAMS using a comprehensive dataset containing more than 3 million transcripts from various plant species, where NAMS demonstrates high accuracy and remarkable performance improvements over state-of-the-art software. Moreover, our webserver also furnishes functional annotations, aiming to provide users informative clues to the functions of their transcripts. Considering that most plant species are poorly characterized, our NAMS webserver could serve as a valuable resource to facilitate the transcriptomic studies. The webserver with testing dataset is freely available at http://sunlab.cpy.cuhk.edu.hk/NAMS/.","hji,kes",0,0,0,2,0,software,NA +33083503,The babyPose dataset.,"The database here described contains data relevant to preterm infants' movement acquired in neonatal intensive care units (NICUs). The data consists of 16 depth videos recorded during the actual clinical practice. Each video consists of 1000 frames (i.e., 100s). The dataset was acquired at the NICU of the Salesi Hospital, Ancona (Italy). Each frame was annotated with the limb-joint location. Twelve joints were annotated, i.e., left and right shoul- der, elbow, wrist, hip, knee and ankle. The database is freely accessible at http://doi.org/10.5281/zenodo.3891404. This dataset represents a unique resource for artificial intelligence researchers that want to develop algorithms to provide healthcare professionals working in NICUs with decision support. Hence, the babyPose dataset is the first annotated dataset of depth images relevant to preterm infants' movement analysis.","hji,kes",1,0,1,2,0.5,manually annotated dataset,clinical data; reassessed and still no - clinincal data +33086240,The Caregiver Experience After Stroke in a COVID-19 Environment: A Qualitative Study in Inpatient Rehabilitation.,"

Background and purpose

Stroke is the leading cause of disability in the United States. Two-thirds of stroke survivors will require caregiver assistance. Evidence suggests the mental health of caregivers is closely related to patients' health outcomes. The timing of this study intersected with the beginning of the coronavirus disease-2019 (COVID-19) pandemic that required strict social distancing and hospital visitor policy changes. This study aims to answer the primary research question: What is the level and nature of stress experienced by caregivers of persons with newly-acquired stroke in the inpatient rehabilitation setting and how has the COVID-19 pandemic impacted the caregiver experience?

Methods

Recruitment occurred from a single inpatient rehabilitation facility. Participants were administered the Perceived Stress Scale and then completed qualitative semistructured interviews. The investigators used a phenomenological, iterative approach to collect and analyze qualitative data. The data were independently coded and categorized before consolidated into primary themes and subthemes.

Results

Eleven informal caregivers' perspectives generated 13 subthemes across 4 primary thematic categories: COVID-19 impact, concerns of the caregiver, coping strategies, and important aspects of the caregiver experience. COVID-19 social distancing necessitated new visitor policies, which presented additional challenges for caregivers.

Discussion and conclusions

Caregiver attendance at therapy sessions and frequent, direct communication between staff and caregivers improved caregiver readiness for family member discharge following inpatient rehabilitation. This study shared perspectives from a distinctive time during the COVID-19 pandemic. If visitation for multiple therapy sessions is prohibited, we recommend taking alternative measures to keep the caregiver involved in the plan of care.Video Abstract available for more insights from the authors (see the Video, Supplemental Digital Content 1, available at: http://links.lww.com/JNPT/A326).","hji,kes",0,0,0,2,0,NA,NA +33087719,Density functional theory-based electric field gradient database.,"The deviation of the electron density around the nuclei from spherical symmetry determines the electric field gradient (EFG), which can be measured by various types of spectroscopy. Nuclear Quadrupole Resonance (NQR) is particularly sensitive to the EFG. The EFGs, and by implication NQR frequencies, vary dramatically across materials. Consequently, searching for NQR spectral lines in previously uninvestigated materials represents a major challenge. Calculated EFGs can significantly aid at the search's inception. To facilitate this task, we have applied high-throughput density functional theory calculations to predict EFGs for 15187 materials in the JARVIS-DFT database. This database, which will include EFG as a standard entry, is continuously increasing. Given the large scope of the database, it is impractical to verify each calculation. However, we assess accuracy by singling out cases for which reliable experimental information is readily available and compare them to the calculations. We further present a statistical analysis of the results. The database and tools associated with our work are made publicly available by JARVIS-DFT ( https://www.ctcms.nist.gov/~knc6/JVASP.html ) and NIST-JARVIS API ( http://jarvis.nist.gov/ ).","hji,kes",0,0,0,2,0,not bio,Chuck Check - project site - N / I thought because not life sci +33095870,Database resources of the National Center for Biotechnology Information.,"The National Center for Biotechnology Information (NCBI) provides a large suite of online resources for biological information and data, including the GenBank® nucleic acid sequence database and the PubMed® database of citations and abstracts published in life science journals. The Entrez system provides search and retrieval operations for most of these data from 34 distinct databases. The E-utilities serve as the programming interface for the Entrez system. Custom implementations of the BLAST program provide sequence-based searching of many specialized datasets. New resources released in the past year include a new PubMed interface and NCBI datasets. Additional resources that were updated in the past year include PMC, Bookshelf, Genome Data Viewer, SRA, ClinVar, dbSNP, dbVar, Pathogen Detection, BLAST, Primer-BLAST, IgBLAST, iCn3D and PubChem. All of these resources can be accessed through the NCBI home page at https://www.ncbi.nlm.nih.gov.","hji,kes",0,0,0,2,0,"description of interfaces and new NCBI data resources; reassessed, not details for any given resource",like EBI - NCBI is not a data resource in and of itself; reassessed and still no - abstract does not describe or have urls for distinct resources +33098409,SolupHred: a server to predict the pH-dependent aggregation of intrinsically disordered proteins.,"

Summary

Polypeptides are exposed to changing environmental conditions that modulate their intrinsic aggregation propensities. Intrinsically disordered proteins (IDPs) constitutively expose their aggregation determinants to the solvent, thus being especially sensitive to its fluctuations. However, solvent conditions are often disregarded in computational aggregation predictors. We recently developed a phenomenological model to predict IDPs' solubility as a function of the solution pH, which is based on the assumption that both protein lipophilicity and charge depend on this parameter. The model anticipated solubility changes in different IDPs accurately. In this application note, we present SolupHred, a web-based interface that implements the aforementioned theoretical framework into a predictive tool able to compute IDPs aggregation propensities as a function of pH. SolupHred is the first dedicated software for the prediction of pH-dependent protein aggregation.

Availability and implementation

The SolupHred web server is freely available for academic users at: https://ppmclab.pythonanywhere.com/SolupHred. It is platform-independent and does not require previous registration.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33098441,Corona Immunitas: study protocol of a nationwide program of SARS-CoV-2 seroprevalence and seroepidemiologic studies in Switzerland.,"

Objectives

Seroprevalence studies to assess the spread of SARS-CoV-2 infection in the general population and subgroups are key for evaluating mitigation and vaccination policies and for understanding the spread of the disease both on the national level and for comparison with the international community.

Methods

Corona Immunitas is a research program of coordinated, population-based, seroprevalence studies implemented by Swiss School of Public Health (SSPH+). Over 28,340 participants, randomly selected and age-stratified, with some regional specificities will be included. Additional studies in vulnerable and highly exposed subpopulations are also planned. The studies will assess population immunological status during the pandemic.

Results

Phase one (first wave of pandemic) estimates from Geneva showed a steady increase in seroprevalence up to 10.8% (95% CI 8.2-13.9, n‚Äâ=‚Äâ775) by May 9, 2020. Since June, Zurich, Lausanne, Basel City/Land, Ticino, and Fribourg recruited a total of 5973 participants for phase two thus far.

Conclusions

Corona Immunitas will generate reliable, comparable, and high-quality serological and epidemiological data with extensive coverage of Switzerland and of several subpopulations, informing health policies and decision making in both economic and societal sectors. ISRCTN Registry: https://www.isrctn.com/ISRCTN18181860 .","hji,kes",0,0,0,2,0,NA,NA +33110585,Rapid response to the COVID-19 pandemic: Vietnam government's experience and preliminary success.,"

Background

The COVID-19 pandemic has hit all corners of the world, challenging governments to act promptly in controlling the spread of the pandemic. Due to limited resources and inferior technological capacities, developing countries including Vietnam have faced many challenges in combating the pandemic. Since the first cases were detected on 23 January 2020, Vietnam has undergone a 3-month fierce battle to control the outbreak with stringent measures from the government to mitigate the adverse impacts. In this study, we aim to give insights into the Vietnamese government's progress during the first three months of the outbreak. Additionally, we relatively compare Vietnam's response with that of other Southeast Asia countries to deliver a clear and comprehensive view on disease control strategies.

Methods

The data on the number of COVID-19 confirmed and recovered cases in Vietnam was obtained from the Dashboard for COVID-19 statistics of the Ministry of Health (https://ncov.vncdc.gov.vn/). The review on Vietnam's country-level responses was conducted by searching for relevant government documents issued on the online database 'Vietnam Laws Repository' (https://thuvienphapluat.vn/en/index.aspx), with the grey literature on Google and relevant official websites. A stringency index of government policies and the countries' respective numbers of confirmed cases of nine Southeast Asian countries were adapted from the Oxford COVID-19 Government Response Tracker (https://www.bsg.ox.ac.uk/research/research-projects/coronavirus-government-response-tracker). All data was updated as of 24 April 2020.

Results

Preliminary positive results have been achieved given that the nation confirmed no new community-transmitted cases since 16 April and zero COVID-19 - related deaths throughout the 3-month pandemic period. To date, the pandemic has been successfully controlled thanks to the Vietnamese government's prompt, proactive and decisive responses including mobilization of the health care systems, security forces, economic policies, along with a creative and effective communication campaign corresponding with crucial milestones of the epidemic's progression.

Conclusions

Vietnam could be one of the role models in pandemic control for low-resource settings. As the pandemic is still ongoing in an unpredictable trajectory, disease control measures should continue to be put in place in the foreseeable short term.","hji,kes",0,0,0,2,0,NA,NA +33116744,Identification of Novel Therapeutic Molecular Targets in Inflammatory Bowel Disease by Using Genetic Databases.,"

Purpose

Utilization of genetic databases to identify genes involved in ulcerative colitis (UC), Crohn's disease (CD), and their extra-intestinal manifestations.

Methods

Protein coding genes involved in ulcerative colitis (3783 genes), Crohn's disease (3980 genes), uveitis (1043 genes), arthritis (5583 genes), primary sclerosing cholangitis (PSC) (1313 genes), and pyoderma gangrenosum (119 genes) were categorized using four genetic databases. These include Genecards: The Human Gene Database (www.genecards.org), DisGeNET (https://www.disgenet.org/), The Comparative Toxicogenomics Database (http://ctdbase.org/) and the Universal Protein Resource (https://www.uniprot.org/). NDex, Network Data Exchange (http://www.ndexbio.org/), was then utilized for mapping a unique signal pathway from the identified shared genes involved in the above disease processes.

Results

We have detected a unique array of 20 genes with the highest probability of overlay in UC, CD, uveitis, arthritis, pyoderma gangrenosum, and PSC. Figure 1 represents the interactome of these 20 protein coding genes. Of note, unique immune modulators in different disease processes are also noted. Interleukin-25 (IL-25) and monensin-resistant homolog 2 (MON-2) are only noted in UC, CD, pyoderma gangrenosum, and arthritis. Arachidonate 5-lipoxygenase (ALOX5) is involved in UC, CD, and arthritis. SLCO1B3 is exclusively involved with pyoderma gangrenosum, UC, and CD. As expected, TNF involvement is noted in CD, UC, PSC, and arthritis. Table 1 depicts the detailed result.

Conclusion

Our work has identified a distinctive set of genes involved in IBD and its associated extra-intestinal disease processes. These genes play crucial roles in mechanisms of immune response, inflammation, and apoptosis and further our understanding of this complex disease process. We postulate that these genes play a critical role at intersecting pathways involved in inflammatory bowel disease, and these novel molecules, their upstream and downstream effectors, are potential targets for future therapeutic agents.","hji,kes",0,0,0,2,0,references other db,not descriptive of resource +33135733,Maximum likelihood reconstruction of ancestral networks by integer linear programming.,"

Motivation

The study of the evolutionary history of biological networks enables deep functional understanding of various bio-molecular processes. Network growth models, such as the Duplication-Mutation with Complementarity (DMC) model, provide a principled approach to characterizing the evolution of protein-protein interactions (PPIs) based on duplication and divergence. Current methods for model-based ancestral network reconstruction primarily use greedy heuristics and yield sub-optimal solutions.

Results

We present a new Integer Linear Programming (ILP) solution for maximum likelihood reconstruction of ancestral PPI networks using the DMC model. We prove the correctness of our solution that is designed to find the optimal solution. It can also use efficient heuristics from general-purpose ILP solvers to obtain multiple optimal and near-optimal solutions that may be useful in many applications. Experiments on synthetic data show that our ILP obtains solutions with higher likelihood than those from previous methods, and is robust to noise and model mismatch. We evaluate our algorithm on two real PPI networks, with proteins from the families of bZIP transcription factors and the Commander complex. On both the networks, solutions from our ILP have higher likelihood and are in better agreement with independent biological evidence from other studies.

Availability and implementation

A Python implementation is available at https://bitbucket.org/cdal/network-reconstruction.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +33135764,BiasAway: command-line and web server to generate nucleotide composition-matched DNA background sequences.,"

Motivation

Accurate motif enrichment analyses depend on the choice of background DNA sequences used, which should ideally match the sequence composition of the foreground sequences. It is important to avoid false positive enrichment due to sequence biases in the genome, such as GC-bias. Therefore, relying on an appropriate set of background sequences is crucial for enrichment analysis.

Results

We developed BiasAway, a command line tool and its dedicated easy-to-use web server to generate synthetic sequences matching any k-mer nucleotide composition or select genomic DNA sequences matching the mononucleotide composition of the foreground sequences through four different models. For genomic sequences, we provide precomputed partitions of genomes from nine species with five different bin sizes to generate appropriate genomic background sequences.

Availability and implementation

BiasAway source code is freely available from Bitbucket (https://bitbucket.org/CBGR/biasaway) and can be easily installed using bioconda or pip. The web server is available at https://biasaway.uio.no and a detailed documentation is available at https://biasaway.readthedocs.io.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33137644,PlacentaCellEnrich: A tool to characterize gene sets using placenta cell-specific gene enrichment analysis.,"Single-cell RNA-Sequencing (scRNA-Seq) has improved our understanding of individual cell types in the human placenta. However, placental scRNA-Seq data is not readily accessible when trying to understand how expression patterns in model systems correspond to those from first trimester human placenta. Therefore, we developed PlacentaCellEnrich, a tool that takes a gene set as input, and then reports if the input set is enriched for genes with placenta cell-specific expression patterns, based on human placenta scRNA-Seq data. The PlacentaCellEnrich tool is freely available at https://placentacellenrich.gdcb.iastate.edu/ for non-profit academic use under the MIT license.","hji,kes",0,0,0,2,0,software,NA +33164522,Comprehensive Study on Molecular Supervised Learning with Graph Neural Networks.,"This work considers strategies to develop accurate and reliable graph neural networks (GNNs) for molecular property predictions. Prediction performance of GNNs is highly sensitive to the change in various parameters due to the inherent challenges in molecular machine learning, such as a deficient amount of data samples and bias in data distribution. Comparative studies with well-designed experiments are thus important to clearly understand which GNNs are powerful for molecular supervised learning. Our work presents a number of ablation studies along with a guideline to train and utilize GNNs for both molecular regression and classification tasks. First, we validate that using both atomic and bond meta-information improves the prediction performance in the regression task. Second, we find that the graph isomorphism hypothesis proposed by [Xu, K.; et al How powerful are graph neural networks? 2018, arXiv:1810.00826. arXiv.org e-Print archive. https://arxiv.org/abs/1810.00826] is valid for the regression task. Surprisingly, however, the findings above do not hold for the classification tasks. Beyond the study on model architectures, we test various regularization methods and Bayesian learning algorithms to find the best strategy to achieve a reliable classification system. We demonstrate that regularization methods penalizing predictive entropy might not give well-calibrated probability estimation, even though they work well in other domains, and Bayesian learning methods are capable of developing reliable prediction systems. Furthermore, we argue the importance of Bayesian learning in virtual screening by showing that well-calibrated probability estimation may lead to a higher success rate.","hji,kes",0,0,0,2,0,NA,NA +33166149,Insights from the First Phosphopeptide Challenge of the MS Resource Pillar of the HUPO Human Proteome Project.,"Mass spectrometry has greatly improved the analysis of phosphorylation events in complex biological systems and on a large scale. Despite considerable progress, the correct identification of phosphorylated sites, their quantification, and their interpretation regarding physiological relevance remain challenging. The MS Resource Pillar of the Human Proteome Organization (HUPO) Human Proteome Project (HPP) initiated the Phosphopeptide Challenge as a resource to help the community evaluate methods, learn procedures and data analysis routines, and establish their own workflows by comparing results obtained from a standard set of 94 phosphopeptides (serine, threonine, tyrosine) and their nonphosphorylated counterparts mixed at different ratios in a neat sample and a yeast background. Participants analyzed both samples with their method(s) of choice to report the identification and site localization of these peptides, determine their relative abundances, and enrich for the phosphorylated peptides in the yeast background. We discuss the results from 22 laboratories that used a range of different methods, instruments, and analysis software. We reanalyzed submitted data with a single software pipeline and highlight the successes and challenges in correct phosphosite localization. All of the data from this collaborative endeavor are shared as a resource to encourage the development of even better methods and tools for diverse phosphoproteomic applications. All submitted data and search results were uploaded to MassIVE (https://massive.ucsd.edu/) as data set MSV000085932 with ProteomeXchange identifier PXD020801.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33175170,"Database Resources of the National Genomics Data Center, China National Center for Bioinformation in 2021.","The National Genomics Data Center (NGDC), part of the China National Center for Bioinformation (CNCB), provides a suite of database resources to support worldwide research activities in both academia and industry. With the explosive growth of multi-omics data, CNCB-NGDC is continually expanding, updating and enriching its core database resources through big data deposition, integration and translation. In the past year, considerable efforts have been devoted to 2019nCoVR, a newly established resource providing a global landscape of SARS-CoV-2 genomic sequences, variants, and haplotypes, as well as Aging Atlas, BrainBase, GTDB (Glycosyltransferases Database), LncExpDB, and TransCirc (Translation potential for circular RNAs). Meanwhile, a series of resources have been updated and improved, including BioProject, BioSample, GWH (Genome Warehouse), GVM (Genome Variation Map), GEN (Gene Expression Nebulas) as well as several biodiversity and plant resources. Particularly, BIG Search, a scalable, one-stop, cross-database search engine, has been significantly updated by providing easy access to a large number of internal and external biological resources from CNCB-NGDC, our partners, EBI and NCBI. All of these resources along with their services are publicly accessible at https://bigd.big.ac.cn.","hji,kes",1,0,1,2,0.5,description of several databases and biodata resources,like EBI - NGDC is not a data resource in and of itself; reassussed and still no - abstract does not describe or have urls for distinct resources +33180722,"A Benchmark for Studying Diabetic Retinopathy: Segmentation, Grading, and Transferability.","People with diabetes are at risk of developing an eye disease called diabetic retinopathy (DR). This disease occurs when high blood glucose levels cause damage to blood vessels in the retina. Computer-aided DR diagnosis has become a promising tool for the early detection and severity grading of DR, due to the great success of deep learning. However, most current DR diagnosis systems do not achieve satisfactory performance or interpretability for ophthalmologists, due to the lack of training data with consistent and fine-grained annotations. To address this problem, we construct a large fine-grained annotated DR dataset containing 2,842 images (FGADR). Specifically, this dataset has 1,842 images with pixel-level DR-related lesion annotations, and 1,000 images with image-level labels graded by six board-certified ophthalmologists with intra-rater consistency. The proposed dataset will enable extensive studies on DR diagnosis. Further, we establish three benchmark tasks for evaluation: 1. DR lesion segmentation; 2. DR grading by joint classification and segmentation; 3. Transfer learning for ocular multi-disease identification. Moreover, a novel inductive transfer learning method is introduced for the third task. Extensive experiments using different state-of-the-art methods are conducted on our FGADR dataset, which can serve as baselines for future research. Our dataset will be released in https://csyizhou.github.io/FGADR/.","hji,kes",0,0,0,2,0,NA,NA +33185649,"A system-level analysis of patient disease trajectories based on clinical, phenotypic and molecular similarities.","

Motivation

Incorporating the temporal dimension into multimorbidity studies has shown to be crucial for achieving a better understanding of the disease associations. Furthermore, due to the multifactorial nature of human disease, exploring disease associations from different perspectives can provide a holistic view to support the study of their aetiology.

Results

In this work, a temporal systems-medicine approach is proposed for identifying time-dependent multimorbidity patterns from patient disease trajectories, by integrating data from electronic health records with genetic and phenotypic information. Specifically, the disease trajectories are clustered using an unsupervised algorithm based on dynamic time warping and three disease similarity metrics: clinical, genetic and phenotypic. An evaluation method is also presented for quantitatively assessing, in the different disease spaces, both the cluster homogeneity and the respective similarities between the associated diseases within individual trajectories. The latter can facilitate exploring the origin(s) in the identified disease patterns. The proposed integrative methodology can be applied to any longitudinal cohort and disease of interest. In this article, prostate cancer is selected as a use case of medical interest to demonstrate, for the first time, the identification of temporal disease multimorbidities in different disease spaces.

Availability and implementation

https://gitlab.com/agiannoula/diseasetrajectories.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +33185687,CGPE: An integrated online server for Cancer Gene and Pathway Exploration.,"

Summary

Cancer Gene and Pathway Explorer (CGPE) is developed to guide biological and clinical researchers, especially those with limited informatics and programming skills, performing preliminary cancer related biomedical research using transcriptional data and publications. CGPE enables three user-friendly online analytical and visualization modules without requiring any local deployment. The GenePub HotIndex applies natural language processing, statistics, and association discovery to provide analytical results on gene-specific PubMed publications, including gene-specific research trends, cancer types correlations, top-related genes, and the WordCloud of publication profiles. The OnlineGSEA enables Gene Set Enrichment Analysis (GSEA) and results visualizations through an easy-to-follow interface for public or in-house transcriptional datasets, integrating the GSEA algorithm and preprocessed public TCGA and GEO datasets. The preprocessed datasets ensure gene sets analysis with appropriate pathway alternation and gene signatures. The CellLine Search presents evidence-based guidance for cell line selections with combined information on cell line dependency, gene expressions, and pathway activity maps, which are valuable knowledge to have before conducting gene-related experiments. In a nutshell, the CGPE webserver provides a user-friendly, visual, intuitive, and informative bioinformatics tool that allows biomedical researchers to perform efficient analyses and preliminary studies on in-house and publicly available bioinformatics data.

Availability and implementation

The webserver is freely available online at https://cgpe.soic.iupui.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,analysis server +33204420,"Risk factors, clinical outcomes and predictors of stroke mortality in Sierra Leoneans: A retrospective hospital cohort study.","

Background

Stroke data from Sierra Leone is limited, despite the increase in global burden of the disease. The aim of this study was to assess the risk factors, clinical outcomes and predictors of stroke mortality at a tertiary hospital in Freetown, Sierra Leone.

Methods

This retrospective cohort study was conducted on stroke patients admitted at the Connaught Teaching Hospital between 1st January to December 31, 2018. Clinical data related to stroke, with variables including patients' demographics, stroke subtype, vascular risk factors, modified Rankin Scale (mRS), and outcomes were documented. In-hospital mortality, associated risk factors and predictors of stroke were determined. The study was approved by the Sierra Leone Ethics and Scientific Review Committee. It was registered under Research Registry https://www.researchregistry.com/browse-the-registry#home/with the unique identifying number researchregistry6009.

Result

We studied 178 (95 male and 83 female) patients. The mean age was 59.8 ± 14.0 years, median was 58.1years (ranging: 29-88 years). The commonest risk factors were hypertension (84.3%), tobacco smoking (35.9%) and alcohol (31.4%). Ischemic stroke confirmed by CT scan was 76.3%. In-hospital mortality was 34.8% and at discharge, mean modified Rankin Score (mRS) was 3.89 ± 1.62. The independent predictors for stroke mortality were: hypertension [AOR = 2.2; C.I 95%: (1.32-3.80), p = 0.001], previous stroke [AOR = 2.31; C.I 95%: (1.43-5.74), p = 0.001], GCS < 8 [AOR = 6.06; C.I 95%: (3.17-12.79), p < 0.001], clinical diagnosis in the absence of imaging [AOR = 3.11; C.I 95%: (2.1-9.87), p = 0.001], hemorrhagic stroke [AOR = 2.96; C.I 95%: (1.96-9.54), p < 0.001], and aspiration pneumonia [(AOR = 3.03; C.I 95%:(1.44-6.36), p = 0.001]. Women had poorer outcome than men.

Conclusion

This study highlights a high stroke mortality in a resource limited hospital, with some stroke patients having difficulties in accessing Computer Tomogram (CT) scan services. It illustrates the need to establish a stroke care setting to improve the quality of stroke care.","hji,kes",0,0,0,2,0,NA,NA +33212503,DeepBL: a deep learning-based approach for in silico discovery of beta-lactamases.,"Beta-lactamases (BLs) are enzymes localized in the periplasmic space of bacterial pathogens, where they confer resistance to beta-lactam antibiotics. Experimental identification of BLs is costly yet crucial to understand beta-lactam resistance mechanisms. To address this issue, we present DeepBL, a deep learning-based approach by incorporating sequence-derived features to enable high-throughput prediction of BLs. Specifically, DeepBL is implemented based on the Small VGGNet architecture and the TensorFlow deep learning library. Furthermore, the performance of DeepBL models is investigated in relation to the sequence redundancy level and negative sample selection in the benchmark dataset. The models are trained on datasets of varying sequence redundancy thresholds, and the model performance is evaluated by extensive benchmarking tests. Using the optimized DeepBL model, we perform proteome-wide screening for all reviewed bacterium protein sequences available from the UniProt database. These results are freely accessible at the DeepBL webserver at http://deepbl.erc.monash.edu.au/.","hji,kes",0,0,0,2,0,software,testing datasets only +33222322,Analysis of mutational signatures with yet another package for signature analysis.,"Different mutational processes leave characteristic patterns of somatic mutations in the genome that can be identified as mutational signatures. Determining the contributions of mutational signatures to cancer genomes allows not only to reconstruct the etiology of somatic mutations, but can also be used for improved tumor classification and support therapeutic decisions. We here present the R package yet another package for signature analysis (YAPSA) to deconvolute the contributions of mutational signatures to tumor genomes. YAPSA provides in-built collections from the COSMIC and PCAWG SNV signature sets as well as the PCAWG Indel signatures and employs signature-specific cutoffs to increase sensitivity and specificity. Furthermore, YAPSA allows to determine 95% confidence intervals for signature exposures, to perform constrained stratified signature analyses to obtain enrichment and depletion patterns of the identified signatures and, when applied to whole exome sequencing data, to correct for the triplet content of individual target capture kits. With this functionality, YAPSA has proved to be a valuable tool for analysis of mutational signatures in molecular tumor boards in a precision oncology context. YAPSA is available at R/Bioconductor (http://bioconductor.org/packages/3.12/bioc/html/YAPSA.html).","hji,kes",0,0,0,2,0,"software, love the name",NA +33245775,The European Bioinformatics Institute: empowering cooperation in response to a global health crisis.,"The European Bioinformatics Institute (EMBL-EBI; https://www.ebi.ac.uk/) provides freely available data and bioinformatics services to the scientific community, alongside its research activity and training provision. The 2020 COVID-19 pandemic has brought to the forefront a need for the scientific community to work even more cooperatively to effectively tackle a global health crisis. EMBL-EBI has been able to build on its position to contribute to the fight against COVID-19 in a number of ways. Firstly, EMBL-EBI has used its infrastructure, expertise and network of international collaborations to help build the European COVID-19 Data Platform (https://www.covid19dataportal.org/), which brings together COVID-19 biomolecular data and connects it to researchers, clinicians and public health professionals. By September 2020, the COVID-19 Data Platform has integrated in excess of 170 000 COVID-19 biomolecular data and literature records, collected through a number of EMBL-EBI resources. Secondly, EMBL-EBI has strived to continue its support of the life science communities through the crisis, with updated Training provision and improved service provision throughout its resources. The COVID-19 pandemic has highlighted the importance of EMBL-EBI's core principles, including international cooperation, resource sharing and central data brokering, and has further empowered scientific cooperation.","hji,kes",1,0,1,2,0.5,COVID-19 Data Platform,EBI again; reassessed and still no - abstract does not describe or have urls for distinct resources +33251154,Vaginal Microbiota and Cytokine Microenvironment in HPV Clearance/Persistence in Women Surgically Treated for Cervical Intraepithelial Neoplasia: An Observational Prospective Study.,"High-risk human papillomaviruses (hrHPVs) are causally related to cervical intraepithelial neoplasia (CIN) and subsequent cervical cancer (CC). The vaginal microbiome has been suggested to play a role in the development of CC, but the effect of conservative surgical treatment on the microbiome and hrHPV elimination has not been elucidated. In this study, we aimed to characterize the vaginal microbiome and inflammatory chemokine profile in 85 women treated for CIN2-CIN3 lesions, before and after surgical CIN removal. The results showed, as expected, a high prevalence of dysbiotic microbiomes and vaginal pro-inflammatory cytokines in the CIN cohort, correlated with disease severity, at the basal level. By contrast, surgical CIN removal induced significant vaginal microbiome variations, and specific microbiome/cytokine profiles were associated with hrHPV clearance/persistence at 6-month follow-up. hrHPV-cleared patients, in fact, showed a specific increase of L. crispatus and decrease of dysbiosis and inflammatory cytokines compared to hrHPV-persistent patients. These data highlight the crosstalk between HPV and the local microbiome, and suggest that vaginal microbiome modulation might represent a novel approach to modifying the natural history of hrHPV-related CC. Study registration n. ISRCTN34437150 (https://www.isrctn.com/ISRCTN34437150).","hji,kes",0,0,0,2,0,NA,NA +33254015,ENNAACT is a novel tool which employs neural networks for anticancer activity classification for therapeutic peptides.,"The prevalence of cancer as a threat to human life, responsible for 9.6 million deaths worldwide in 2018, motivates the search for new anticancer agents. While many options are currently available for treatment, these are often expensive and impact the human body unfavourably. Anticancer peptides represent a promising emerging field of anticancer therapeutics, which are characterized by favourable toxicity profile. The development of accurate in silico methods for anticancer peptide prediction is of paramount importance, as the amount of available sequence data is growing each year. This study leverages advances in machine learning research to produce a novel sequence-based deep neural network classifier for anticancer peptide activity. The classifier achieves performance comparable to the best-in-class, with a cross-validated accuracy of 98.3%, Matthews correlation coefficient of 0.91 and an Area Under the Curve of 0.95. This innovative classifier is available as a web server at https://research.timmons.eu/ennaact, facilitating in silico screening and design of new anticancer peptide chemotherapeutics by the research community.","hji,kes",0,0,0,2,0,software,NA +33272133,Incidence and Outcome of Aneurysmal Subarachnoid Hemorrhage: The Swiss Study on Subarachnoid Hemorrhage (Swiss SOS).,"

Background and purpose

The purpose of this study was to assess nationwide incidence and outcomes of aneurysmal subarachnoid hemorrhage (aSAH). The Swiss SOS (Swiss Study on Subarachnoid Hemorrhage) was established in 2008 and offers the unique opportunity to provide this data from the point of care on a nationwide level.

Methods

All patients with confirmed aneurysmal subarachnoid hemorrhage admitted between January 1, 2009 and December 31, 2014, within Switzerland were recorded in a prospective registry. Incidence rates were calculated based on time-matched population data. Admission parameters and outcomes at discharge and at 1 year were recorded.

Results

We recorded data of 1787 consecutive patients. The incidence of aneurysmal subarachnoid hemorrhage in Switzerland was 3.7 per 100 000 persons/y. The number of female patients was 1170 (65.5%). With a follow-up rate of 91.3% at 1 year, 1042 patients (58.8%) led an independent life according to the modified Rankin Scale (0-2). About 1 in 10 patients survived in a dependent state (modified Rankin Scale, 3-5; n=185; 10.4%). Case fatality was 20.1% (n=356) at discharge and 22.1% (n=391) after 1 year.

Conclusions

The current incidence of aneurysmal subarachnoid hemorrhage in Switzerland is lower than expected and an indication of a global trend toward decreasing admissions for ruptured intracranial aneurysms. Registration: URL: https://www.clinicaltrials.gov. Unique identifier: NCT03245866.","hji,kes",0,0,0,2,0,NA,NA +33283531,Association of Processed Meats and Alcohol Consumption with Renal Cell Carcinoma: A Worldwide Population-Based Study.,"The link between diet and renal cell carcinoma (RCC) is still unclear. The purpose of this study was to evaluate the association of diet with RCC's incidence and mortality rates worldwide. We conducted an ecological study including 170 countries, whose data on age-standardized (AS) incidence and mortality rates of RCC, dietary factors, and potentially confounding factors such as obesity, insufficient physical activity, tobacco smoking, hypertension, diabetes, and human development index (HDI) were collected and available on May 2020 from the Global Cancer Observatory, the Global Dietary Database, the Global Health Observatory data repository, the Diabetes Atlas 9th edition and the Human Development Report 2019. Univariable and multivariable linear regression analyses were performed to determine the association of dietary factors with incidence and mortality rates of RCC adjusted for the effects of population age and potentially confounding factors. Intake of processed meats and consumption of alcohol were both positively associated with AS incidence rates of RCC (β = 0.11, P < 0.001 and β = 0.1, P = 0.044, respectively). We suggest that high consumption of processed meats and/or alcohol is a risk factor for RCC. However, they were not associated with mortality. Further research is needed at an individual level.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2020.1856388.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33294943,"SUMMER, a shiny utility for metabolomics and multiomics exploratory research.","

Introduction

Cellular metabolites are generated by a complex network of biochemical reactions. This makes interpreting changes in metabolites exceptionally challenging.

Objectives

To develop a computational tool that integrates multiomics data at the level of reactions.

Methods

Changes in metabolic reactions are modeled with input from transcriptomics/proteomics measurements of enzymes and metabolomic measurements of metabolites.

Results

We developed SUMMER, which identified more relevant signals, key metabolic reactions, and relevant underlying biological pathways in a real-world case study.

Conclusion

SUMMER performs integrative analysis for data interpretation and exploration. SUMMER is freely accessible at http://summer.salk.edu and the code is available at https://bitbucket.org/salkigc/summer .","hji,kes",0,0,0,2,0,software,NA +33297866,Magnetic Resonance Imaging-Guided Thrombolysis (0.6 mg/kg) Was Beneficial for Unknown Onset Stroke Above a Certain Core Size: THAWS RCT Substudy.,"

Background and purpose

We determined to identify patients with unknown onset stroke who could have favorable 90-day outcomes after low-dose thrombolysis from the THAWS (Thrombolysis for Acute Wake-Up and Unclear-Onset Strokes With Alteplase at 0.6 mg/kg) database.

Methods

This was a subanalysis of an investigator-initiated, multicenter, randomized, open-label, blinded-end point trial. Patients with stroke with a time last-known-well >4.5 hours who showed a mismatch between diffusion-weighted imaging (DWI) and fluid-attenuated inversion recovery were randomly assigned (1:1) to receive alteplase at 0.6 mg/kg intravenously or standard medical treatment. The patients were dichotomized by ischemic core size or National Institutes of Health Stroke Scale score, and the effects of assigned treatments were compared in each group. The efficacy outcome was favorable outcome at 90 days, defined as a modified Rankin Scale score of 0 to 1.

Results

The median DWI-Alberta Stroke Program Early CT Score (ASPECTS) was 9, and the median ischemic core volume was 2.5 mL. Both favorable outcome (47.1% versus 48.3%) and any intracranial hemorrhage (26% versus 14%) at 22 to 36 hours were comparable between the 68 thrombolyzed patients and the 58 control patients. There was a significant treatment-by-cohort interaction for favorable outcome between dichotomized patients by ASPECTS on DWI (P=0.026) and core volume (P=0.035). Favorable outcome was more common in the alteplase group than in the control group in patients with DWI-ASPECTS 5 to 8 (RR, 4.75 [95% CI, 1.33-30.2]), although not in patients with DWI-ASPECTS 9 to 10. Favorable outcome tended to be more common in the alteplase group than in the control group in patients with core volume >6.4 mL (RR, 6.15 [95% CI, 0.87-43.64]), although not in patients with volume ≤6.4 mL. The frequency of any intracranial hemorrhage did not differ significantly between the 2 treatment groups in any dichotomized patients.

Conclusions

Patients developing unknown onset stroke with DWI-ASPECTS 5 to 8 showed favorable outcomes more commonly after low-dose thrombolysis than after standard treatment. Registration: URL: https://www.clinicaltrials.gov; Unique Identifier: NCT02002325. URL: https://www.umin.ac.jp/ctr; Unique Identifier: UMIN000011630.","hji,kes",0,0,0,2,0,NA,NA +33297942,ideal: an R/Bioconductor package for interactive differential expression analysis.,"

Background

RNA sequencing (RNA-seq) is an ever increasingly popular tool for transcriptome profiling. A key point to make the best use of the available data is to provide software tools that are easy to use but still provide flexibility and transparency in the adopted methods. Despite the availability of many packages focused on detecting differential expression, a method to streamline this type of bioinformatics analysis in a comprehensive, accessible, and reproducible way is lacking.

Results

We developed the ideal software package, which serves as a web application for interactive and reproducible RNA-seq analysis, while producing a wealth of visualizations to facilitate data interpretation. ideal is implemented in R using the Shiny framework, and is fully integrated with the existing core structures of the Bioconductor project. Users can perform the essential steps of the differential expression analysis workflow in an assisted way, and generate a broad spectrum of publication-ready outputs, including diagnostic and summary visualizations in each module, all the way down to functional analysis. ideal also offers the possibility to seamlessly generate a full HTML report for storing and sharing results together with code for reproducibility.

Conclusion

ideal is distributed as an R package in the Bioconductor project ( http://bioconductor.org/packages/ideal/ ), and provides a solution for performing interactive and reproducible analyses of summarized RNA-seq expression data, empowering researchers with many different profiles (life scientists, clinicians, but also experienced bioinformaticians) to make the ideal use of the data at hand.","hji,kes",0,0,0,2,0,software,NA +33313640,dittoSeq: Universal User-Friendly Single-Cell and Bulk RNA Sequencing Visualization Toolkit.,"

Summary

A visualization suite for major forms of bulk and single-cell RNAseq data in R. dittoSeq is color blindness-friendly by default, robustly documented to power ease-of-use, and allows highly customizable generation of both daily-use and publication-quality figures.

Availability and implementation

dittoSeq is an R package available through Bioconductor via an open source MIT license.

Supplementary information

Supplementary Code and figures are available at Bioinformatics online. Full vignettes are available through Bioconductor, https://bioconductor.org/packages/dittoSeq/, and github, github.com/dtm2451/dittoSeq/.","hji,kes",0,0,0,2,0,software,NA +33315308,Protein Sequence Analysis Using the MPI Bioinformatics Toolkit.,"The MPI Bioinformatics Toolkit (https://toolkit.tuebingen.mpg.de) provides interactive access to a wide range of the best-performing bioinformatics tools and databases, including the state-of-the-art protein sequence comparison methods HHblits and HHpred. The Toolkit currently includes 35 external and in-house tools, covering functionalities such as sequence similarity searching, prediction of sequence features, and sequence classification. Due to this breadth of functionality, the tight interconnection of its constituent tools, and its ease of use, the Toolkit has become an important resource for biomedical research and for teaching protein sequence analysis to students in the life sciences. In this article, we provide detailed information on utilizing the three most widely accessed tools within the Toolkit: HHpred for the detection of homologs, HHpred in conjunction with MODELLER for structure prediction and homology modeling, and CLANS for the visualization of relationships in large sequence datasets. © 2020 The Authors. Basic Protocol 1: Sequence similarity searching using HHpred Alternate Protocol: Pairwise sequence comparison using HHpred Support Protocol: Building a custom multiple sequence alignment using PSI-BLAST and forwarding it as input to HHpred Basic Protocol 2: Calculation of homology models using HHpred and MODELLER Basic Protocol 3: Cluster analysis using CLANS.","hji,kes",0,0,0,2,0,NA,NA +33316147,Older Adults' Engagement in Technology-Mediated Self-Monitoring of Diet: A Mixed-Method Study.,"

Purpose

This feasibility study explored older adults' use of a nutrition app called Appetitus (https://apps.apple.com/us/app/appetitt/id1001936854?ign-mpt=uo%3D2; https://play.google.com/store/apps/details?id=no.nr.appetitt&hl=e) and addressed their engagement in technology-mediated self-monitoring of diet. Undernutrition is a significant challenge among older adults and is associated with poorer health experiences. Digital health for self-monitoring of diet has the potential to increase awareness of personal nutrition, and the scarcity of research reporting older adults' ability and willingness to engage in technology-mediated dietary self-monitoring warranted this study.

Design and methods

An explorative mixed-methods design combining descriptive analysis of log data with qualitative analysis of interviews with Appetitus users was implemented.

Findings

Twenty-five older adults self-monitored their diet using Appetitus over an 8-week trial period. Eighty percent of the participants used the app regularly in the trial period. The most engaged users recorded their food consumption daily for 8 weeks. Personal interest in nutrition and commitment to the project facilitated regular use of Appetitus. Poor health and the perception that using a nutrition app lacked personal relevance contributed to irregular self-monitoring. For inexperienced technology users, participation in this project became a springboard to using tablet technology and the Internet beyond the Appetitus app.

Conclusions

The majority of the participants regularly used Appetitus for self-monitoring of diet; they found the tablet technology and Appetitus app easy to use.

Clinical relevance

Older adults are able and willing to use self-monitoring tools. Nutrition apps can empower older adults to make better informed decisions about their diet. Patients' self-monitoring can provide valuable and detailed health-related information to healthcare professionals and mediate patient-centered care practices.","hji,kes",0,0,0,2,0,NA,NA +33326008,Collecting and managing taxonomic data with NCBI-taxonomist.,"

Summary

We present NCBI-taxonomist - a command-line tool written in Python that collects and manages taxonomic data from the National Center for Biotechnology Information (NCBI). NCBI-taxonomist does not depend on a pre-downloaded taxonomic database but can store data locally. NCBI-taxonomist has six commands to map, collect, extract, resolve, import and group taxonomic data that can be linked together to create powerful analytical pipelines. Because many life science databases use the same taxonomic information, the data managed by NCBI-taxonomist is not limited to NCBI and can be used to find data linked to taxonomic information present in other scientific databases.

Availability and implementation

NCBI-taxonomist is implemented in Python 3 (‚â•3.8) and available at https://gitlab.com/janpb/ncbi-taxonomist and via PyPi (https://pypi.org/project/ncbi-taxonomist/), as a Docker container (https://gitlab.com/janpb/ncbi-taxonomist/container_registry/) and Singularity (v3.5.3) image (https://cloud.sylabs.io/library/jpb/ncbi-taxonomist). NCBI-taxonomist is licensed under the GPLv3.

Supplementary information

https://ncbi-taxonomist.readthedocs.io/en/latest/.","hji,kes",0,0,0,2,0,software,NA +33330622,ANCA: A Web Server for Amino Acid Networks Construction and Analysis.,"Amino acid network (AAN) models empower us to gain insights into protein structures and functions by describing a protein 3D structure as a graph, where nodes represent residues and edges as amino acid interactions. Here, we present the ANCA, an interactive Web server for Amino Acids Network Construction and Analysis based on a single structure or a set of structures from the Protein Data Bank. The main purpose of ANCA is to provide a portal for three types of an environment-dependent residue contact energy (ERCE)-based network model, including amino acid contact energy network (AACEN), node-weighted amino acid contact energy network (NACEN), and edge-weighted amino acid contact energy network (EACEN). For comparison, the C-alpha distance-based network model is also included, which can be extended to protein-DNA/RNA complexes. Then, the analyses of different types of AANs were performed and compared from node, edge, and network levels. The network and corresponding structure can be visualized directly in the browser. The ANCA enables researchers to investigate diverse concerns in the framework of AAN, such as the interpretation of allosteric regulation and functional residues. The ANCA portal, together with an extensive help, is available at http://sysbio.suda.edu.cn/anca/.","hji,kes",0,0,0,2,0,software,NA +33338203,MuscleAtlasExplorer: a web service for studying gene expression in human skeletal muscle.,"MuscleAtlasExplorer is a freely available web application that allows for the exploration of gene expression data from human skeletal muscle. It draws from an extensive publicly available dataset of 1654 skeletal muscle expression microarray samples. Detailed, manually curated, patient phenotype data, with information such as age, sex, BMI and disease status, are combined with skeletal muscle gene expression to provide insights into gene function in skeletal muscle. It aims to facilitate easy exploration of the data using powerful data visualization functions, while allowing for sample selection, in-depth inspection and further analysis using external tools. Availability: MuscleAtlasExplorer is available at https://mae.crc.med.lu.se/mae2 (username 'muscle' and password 'explorer' pre-publication).","hji,kes",1,0,1,2,0.5,not sure if it is a biodata resource in and of itself,"no notes; reassessed and no still - does talk about value add done to data, but still seems mostly like a web servcer" +33342295,Markedness and implicational relationships in phonological development: A cross-linguistic investigation.,"

Purpose

The complexity approach to speech disorders, based on the theoretical notion of phonological markedness, has been gaining interest over the last decade. In a nutshell, this approach suggests that the acquisition of phonologically marked units (e.g. complex onsets) implies the acquisition of less marked ones (e.g. singleton onsets). However, because the notion of markedness is, itself, subject to controversies, we need to constrain what types of implications can be generalised among language learners, within and across languages.

Method

We report on longitudinal data from one phonologically-disordered and five typically-developing children documented across four different languages (English, French, German, Portuguese), using data from the PhonBank database (https://phonbank.talkbank.org). Using the Phon software program (https://www.phon.ca), we systematically analysed each longitudinal study for consonants in singleton onsets and codas as well as in onset clusters.

Result

The implicational relationships supported by our study involve units of similar types (e.g. relations between different segmental categories), while relationships that involve different types of units or processes cannot be generalised across learners.

Conclusion

A better understanding of implicational relationships makes the complexity approach more predictive of developmental patterns of phonology and related phonological disorders.","hji,kes",0,0,0,2,0,mentions database,not life sci +33346815,FireProtASR: A Web Server for Fully Automated Ancestral Sequence Reconstruction.,"There is a great interest in increasing proteins' stability to widen their usability in numerous biomedical and biotechnological applications. However, native proteins cannot usually withstand the harsh industrial environment, since they are evolved to function under mild conditions. Ancestral sequence reconstruction is a well-established method for deducing the evolutionary history of genes. Besides its applicability to discover the most probable evolutionary ancestors of the modern proteins, ancestral sequence reconstruction has proven to be a useful approach for the design of highly stable proteins. Recently, several computational tools were developed, which make the ancestral reconstruction algorithms accessible to the community, while leaving the most crucial steps of the preparation of the input data on users' side. FireProtASR aims to overcome this obstacle by constructing a fully automated workflow, allowing even the unexperienced users to obtain ancestral sequences based on a sequence query as the only input. FireProtASR is complemented with an interactive, easy-to-use web interface and is freely available at https://loschmidt.chemi.muni.cz/fireprotasr/.","hji,kes",0,0,0,2,0,software,NA +33348264,Combining in vivo pathohistological and redox status analysis with in silico toxicogenomic study to explore the phthalates and bisphenol A mixture-induced testicular toxicity.,"The aim of this study was to: (i) determine and compare the capacity of bis (2 -ethylhexyl) phthalate (DEHP), dibutyl phthalate (DBP), bisphenol A (BPA), and their mixture to produce testicular toxicity after the subacute exposure; (ii) explore the mechanisms behind the observed changes using in silico toxicogenomic approach. Male rats were randomly split into groups (n = 6): (1) Control (corn oil); (2) DEHP (50 mg/kg b.w./day); (3) DBP (50 mg/kg b.w./day); (4) BPA (25 mg/kg b.w./day); and (5) MIX (50 mg/kg b.w./day DEHP + 50 mg/kg b.w/day DBP + 25 mg/kg b.w./day BPA). Animals were sacrificed after 28 days of oral exposure, testes were extracted and prepared for histological assessments under the light microscope (haematoxylin and eosin staining) and redox status analysis. The Comparative Toxicogenomics Database (CTD; http://CTD.mdibl.org), Cytoscape software (https://cytoscape.org) and ToppGene Suite (https://toppgene.cchmc.org) were used for data-mining. Present pathohistological study has demonstrated more pronounced testicular toxicity of the MIX group (desquamated germinal epithelium cells, enlarged cells with hyperchromatic nuclei, multinucleated cell forms and intracytoplasmic vacuoles) in comparison with the single substances, while effects on redox status parameters were either more prominent, or present only in the MIX group. In silico investigation revealed 20 genes linked to male reproductive disorders, affected by all three investigated substances. Effects on metabolism, AhR pathway, apoptosis and oxidative stress could be singled out as the most probable mechanisms involved in the subacute DEHP, DBP and BPA mixture testicular toxicity, while the effect on oxidative stress parameters was confirmed by in vivo experiment.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33354878,Industry payments to hospitalist physicians: a 5-year analysis of the Open Payments programme from 2014 to 2018.,We analysed Open Payments programme data (https://openpaymentsdata.cms.gov) on industry-to-physician payments to hospitalists for the years 2014 to 2018. Payments to hospitalists increased by 106.5% from 2014 to 2018 with food and beverage (38.5%) and compensation for services other than consulting (24.3%) being the highest-paid categories. Industry payment to hospitalists was highly skewed with top 10 hospitalists receiving more than 30% of the total payments during the study period. The most common drugs associated with payments were anticoagulant medications (apixaban and rivaroxaban). Industry seems to be spending a significant amount of money to increase awareness of medications among hospitalists. Identification of these trends and potential motives of industry spending is critical to address any potential physician bias.,"hji,kes",0,0,0,2,0,NA,not life sci +33361083,Development and external validation of a COVID-19 mortality risk prediction algorithm: a multicentre retrospective cohort study.,"

Objective

This study aimed to develop and externally validate a COVID-19 mortality risk prediction algorithm.

Design

Retrospective cohort study.

Setting

Five designated tertiary hospitals for COVID-19 in Hubei province, China.

Participants

We routinely collected medical data of 1364 confirmed adult patients with COVID-19 between 8 January and 19 March 2020. Among them, 1088 patients from two designated hospitals in Wuhan were used to develop the prognostic model, and 276 patients from three hospitals outside Wuhan were used for external validation. All patients were followed up for a maximal of 60 days after the diagnosis of COVID-19.

Methods

The model discrimination was assessed by the area under the receiver operating characteristic curve (AUC) and Somers' D test, and calibration was examined by the calibration plot. Decision curve analysis was conducted.

Main outcome measures

The primary outcome was all-cause mortality within 60 days after the diagnosis of COVID-19.

Results

The full model included seven predictors of age, respiratory failure, white cell count, lymphocytes, platelets, D-dimer and lactate dehydrogenase. The simple model contained five indicators of age, respiratory failure, coronary heart disease, renal failure and heart failure. After cross-validation, the AUC statistics based on derivation cohort were 0.96 (95% CI, 0.96 to 0.97) for the full model and 0.92 (95% CI, 0.89 to 0.95) for the simple model. The AUC statistics based on the external validation cohort were 0.97 (95% CI, 0.96 to 0.98) for the full model and 0.88 (95% CI, 0.80 to 0.96) for the simple model. Good calibration accuracy of these two models was found in the derivation and validation cohort.

Conclusion

The prediction models showed good model performance in identifying patients with COVID-19 with a high risk of death in 60 days. It may be useful for acute risk classification.

Web calculator

We provided a freely accessible web calculator (https://www.whuyijia.com/).","hji,kes",0,0,0,2,0,NA,NA +33362409,Bayesian differential programming for robust systems identification under uncertainty.,"This paper presents a machine learning framework for Bayesian systems identification from noisy, sparse and irregular observations of nonlinear dynamical systems. The proposed method takes advantage of recent developments in differentiable programming to propagate gradient information through ordinary differential equation solvers and perform Bayesian inference with respect to unknown model parameters using Hamiltonian Monte Carlo sampling. This allows an efficient inference of the posterior distributions over plausible models with quantified uncertainty, while the use of sparsity-promoting priors enables the discovery of interpretable and parsimonious representations for the underlying latent dynamics. A series of numerical studies is presented to demonstrate the effectiveness of the proposed methods, including nonlinear oscillators, predator-prey systems and examples from systems biology. Taken together, our findings put forth a flexible and robust workflow for data-driven model discovery under uncertainty. All codes and data accompanying this article are available at https://bit.ly/34FOJMj.","hji,kes",0,0,0,2,0,software,NA +33381797,ChemGenerator: a web server for generating potential ligands for specific targets.,"In drug discovery, one of the most important tasks is to find novel and biologically active molecules. Given that only a tip of iceberg of drugs was founded in nearly one-century's experimental exploration, it shows great significance to use in silico methods to expand chemical database and profile drug-target linkages. In this study, a web server named ChemGenerator was proposed to generate novel activates for specific targets based on users' input. The ChemGenerator relies on an autoencoder-based algorithm of Recurrent Neural Networks with Long Short-Term Memory by training of 7 million of molecular Simplified Molecular-Input Line-Entry System as the basic model, and further develops target guided generation by transfer learning. As results, ChemGenerator gains lower loss (<0.01) than existing reference model (0.2~0.4) and shows good performance in the case of Epidermal Growth Factor Receptor. Meanwhile, ChemGenerator is now freely accessible to the public by http://smiles.tcmobile.org. In proportion to endless molecular enumeration and time-consuming expensive experiments, this work demonstrates an efficient alternative way for the first virtual screening in drug discovery.","hji,kes",0,0,0,2,0,software,NA +33386221,Update of the AMSER National Medical Student Curriculum.,"Since the first steps of creating the Alliance of Medical Student Educators in Radiology (AMSER) curriculum 20 years ago, dramatic advances in medical imaging, patient care, and medical education have occurred necessitating an update of this valuable resource. The 2020 update of the AMSER curriculum aims to address as many of these changes while providing a succinct resource that will hopefully remain useful for years to come. The updated AMSER curriculum document is freely available for download via the AMSER website at https://www.aur.org/en/affinity-groups/amser/curriculum.","hji,kes",0,0,0,2,0,NA,NA +33395075,Development and Validation of an Automatic Image-Recognition Endoscopic Report Generation System: A Multicenter Study.,"

Introduction

Conventional gastrointestinal (GI) endoscopy reports written by physicians are time consuming and might have obvious heterogeneity or omissions, impairing the efficiency and multicenter consultation potential. We aimed to develop and validate an image recognition-based structured report generation system (ISRGS) through a multicenter database and to assess its diagnostic performance.

Methods

First, we developed and evaluated an ISRGS combining real-time video capture, site identification, lesion detection, subcharacteristics analysis, and structured report generation. White light and chromoendoscopy images from patients with GI lesions were eligible for study inclusion. A total of 46,987 images from 9 tertiary hospitals were used to train, validate, and multicenter test (6:2:2). Moreover, 5,699 images were prospectively enrolled from Qilu Hospital of Shandong University to further assess the system in a prospective test set. The primary outcome was the diagnosis performance of GI lesions in multicenter and prospective tests.

Results

The overall accuracy in identifying early esophageal cancer, early gastric cancer, early colorectal cancer, esophageal varices, reflux esophagitis, Barrett's esophagus, chronic atrophic gastritis, gastric ulcer, colorectal polyp, and ulcerative colitis was 0.8841 (95% confidence interval, 0.8775-0.8904) and 0.8965 (0.8883-0.9041) in multicenter and prospective tests, respectively. The accuracy of cecum and upper GI site identification were 0.9978 (0.9969-0.9984) and 0.8513 (0.8399-0.8620), respectively. The accuracy of staining discrimination was 0.9489 (0.9396-0.9568). The relative error of size measurement was 4.04% (range 0.75%-7.39%).

Discussion

ISRGS is a reliable computer-aided endoscopic report generation system that might assist endoscopists working at various hospital levels to generate standardized and accurate endoscopy reports (http://links.lww.com/CTG/A485).","hji,kes",0,0,0,2,0,NA,NA +33407073,SPServer: split-statistical potentials for the analysis of protein structures and protein-protein interactions.,"

Background

Statistical potentials, also named knowledge-based potentials, are scoring functions derived from empirical data that can be used to evaluate the quality of protein folds and protein-protein interaction (PPI) structures. In previous works we decomposed the statistical potentials in different terms, named Split-Statistical Potentials, accounting for the type of amino acid pairs, their hydrophobicity, solvent accessibility and type of secondary structure. These potentials have been successfully used to identify near-native structures in protein structure prediction, rank protein docking poses, and predict PPI binding affinities.

Results

Here, we present the SPServer, a web server that applies the Split-Statistical Potentials to analyze protein folds and protein interfaces. SPServer provides global scores as well as residue/residue-pair profiles presented as score plots and maps. This level of detail allows users to: (1) identify potentially problematic regions on protein structures; (2) identify disrupting amino acid pairs in protein interfaces; and (3) compare and analyze the quality of tertiary and quaternary structural models.

Conclusions

While there are many web servers that provide scoring functions to assess the quality of either protein folds or PPI structures, SPServer integrates both aspects in a unique easy-to-use web server. Moreover, the server permits to locally assess the quality of the structures and interfaces at a residue level and provides tools to compare the local assessment between structures. SERVER ADDRESS: https://sbi.upf.edu/spserver/ .","hji,kes",0,0,0,2,0,NA,NA +33416829,"ProteomeExpert: a docker image based web-server for exploring, modeling, visualizing, and mining quantitative proteomic data sets.","

‚ÄÇ

The rapid progresses of high throughput sequencing technology-based omics and mass spectrometry (MS)-based proteomics such as data-independent acquisition (DIA) and its penetration to clinical studies have generated increasing number of proteomic data sets containing 100‚Äâs-1000s samples. To analyze these quantitative proteomic data sets and other -omics data sets more efficiently and conveniently, we present a web server-based software tool ProteomeExpert implemented in Docker, which offers various analysis tools for experimental design, data mining, interpretation, and visualization of quantitative proteomic data sets. ProteomeExpert can be deployed on an operating system with Docker installed or with R language environment.

Availability and implementation

The Docker image of ProteomeExpert is freely available from https://hub.docker.com/r/lifeinfo/proteomeexpert. The source code of ProteomeExpert is also openly accessible at http://www.github.com/lifeinfo/ProteomeExpert/. In addition, a demo server is provided at https://proteomic.shinyapps.io/peserver/.

Supplementary information

SUPPLEMENTARY DATA ARE AVAILABLE AT BIOINFORMATICS ONLINE.","hji,kes",0,0,0,2,0,software,NA +33416854,GWASinspector: comprehensive quality control of genome-wide association study results.,"

Summary

Quality control (QC) of genome wide association study (GWAS) result files has become increasingly difficult due to advances in genomic technology. The main challenges include continuous increases in the number of polymorphic genetic variants contained in recent GWASs and reference panels, the rising number of cohorts participating in a GWAS consortium, and inclusion of new variant types. Here, we present GWASinspector, a flexible R package for comprehensive QC of GWAS results. This package is compatible with recent imputation reference panels, handles insertion/deletion and multi-allelic variants, provides extensive QC reports and efficiently processes big data files. Reference panels covering three human genome builds (NCBI36, GRCh37 and GRCh38) are available. GWASinspector has a user friendly design and allows easy set-up of the QC pipeline through a configuration file. In addition to checking and reporting on individual files, it can be used in preparation of a meta-analysis by testing for systemic differences between studies and generating cleaned, harmonized GWAS files. Comparison with existing GWAS QC tools shows that the main advantages of GWASinspector are its ability to more effectively deal with insertion/deletion and multi-allelic variants and its relatively low memory use.

Availability and implementation

Our package is available at The Comprehensive R Archive Network (CRAN): https://CRAN.R-project.org/package=GWASinspector. Reference datasets and a detailed tutorial can be found at the package website at http://gwasinspector.com/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +33416864,SoluProt: Prediction of Soluble Protein Expression in Escherichia coli.,"

Motivation

Poor protein solubility hinders the production of many therapeutic and industrially useful proteins. Experimental efforts to increase solubility are plagued by low success rates and often reduce biological activity. Computational prediction of protein expressibility and solubility in Escherichia coli using only sequence information could reduce the cost of experimental studies by enabling prioritisation of highly soluble proteins.

Results

A new tool for sequence-based prediction of soluble protein expression in Escherichia coli, SoluProt, was created using the gradient boosting machine technique with the TargetTrack database as a training set. When evaluated against a balanced independent test set derived from the NESG database, SoluProt's accuracy of 58.5% and AUC of 0.62 exceeded those of a suite of alternative solubility prediction tools. There is also evidence that it could significantly increase the success rate of experimental protein studies. SoluProt is freely available as a standalone program and a user-friendly webserver at https://loschmidt.chemi.muni.cz/soluprot/.

Availability and implementation

https://loschmidt.chemi.muni.cz/soluprot/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33418450,Discovery of new enzymatic functions and metabolic pathways using genomic enzymology web tools.,"The continuing expansion of protein and genome sequence databases is an opportunity to identify novel enzymes with biotechnological applications. Whether applied to enzymology, chemical biology, systems biology, and microbiology, database mining must be 'user-friendly' so that experimentalists can devise focused strategies to discover the in vitro activities and in vivo functions of uncharacterized enzymes. We developed a suite of genomic enzymology tools (https://efi.igb.illinois.edu/) to (1) generate sequence similarity networks (SSNs) for exploration of sequence-function space in protein families (EFI-EST) and (2) provide genome context for members of protein families (EFI-GNT). Integrated analysis of this complementary information allows to generate testable hypotheses about new functions. After a brief overview of EFI-EST and EFI-GNT, we describe applications that illustrate their use.","hji,kes",0,0,0,2,0,NA,NA +33431029,"The ChemicalToolbox: reproducible, user-friendly cheminformatics analysis on the Galaxy platform.","Here, we introduce the ChemicalToolbox, a publicly available web server for performing cheminformatics analysis. The ChemicalToolbox provides an intuitive, graphical interface for common tools for downloading, filtering, visualizing and simulating small molecules and proteins. The ChemicalToolbox is based on Galaxy, an open-source web-based platform which enables accessible and reproducible data analysis. There is already an active Galaxy cheminformatics community using and developing tools. Based on their work, we provide four example workflows which illustrate the capabilities of the ChemicalToolbox, covering assembly of a compound library, hole filling, protein-ligand docking, and construction of a quantitative structure-activity relationship (QSAR) model. These workflows may be modified and combined flexibly, together with the many other tools available, to fit the needs of a particular project. The ChemicalToolbox is hosted on the European Galaxy server and may be accessed via https://cheminformatics.usegalaxy.eu .","hji,kes",0,0,0,2,0,NA,NA +33435732,Rationale and Design of the SAFE-PAD Study.,"

Background

Recent evidence from randomized controlled trials has raised concerns about the long-term safety of paclitaxel-coated peripheral devices used for femoropopliteal artery revascularization. In response to a call for more real-world data on the safety of these devices, the SAFE-PAD study (Safety Assessment of Femoropopliteal Endovascular treatment with Paclitaxel-coated Devices) was designed with input from the Food and Drug Administration to provide a long-term, comprehensive evaluation of the mortality risk associated with paclitaxel-coated devices among Medicare beneficiaries.

Methods and results

SAFE-PAD is an observational cohort study of fee-for-service Medicare beneficiaries that underwent femoropopliteal artery revascularization with either a drug-coated device or nondrug-coated device from 2015 through 2018. All patients age 66 years or older who underwent revascularization will be identified using a combination of International Classification of Diseases, Tenth Revision procedural codes, Current Procedural Terminology codes, and Healthcare Common Procedure Coding System C-codes. The safety end point of all-cause death will be updated semiannually and continued until the median duration of follow-up surpasses 5 years. Sub-group analyses will be conducted by device type, patient characteristics, and procedural setting. Registration: The SAFE-PAD study has been registered on URL: https://www.clinicaltrials.gov; Unique identifier: NCT04496544.

Conclusions

The SAFE-PAD study will evaluate the long-term safety of drug-coated devices compared with nondrug-coated devices for femoropopliteal artery revascularization among a broad, real-world population of patients with peripheral artery disease.","hji,kes",0,0,0,2,0,NA,NA +33438548,Texture Analysis in the Evaluation of Covid-19 Pneumonia in Chest X-Ray Images: a Proof of Concept Study.,"

Background

One of the most challenging aspects related to Covid-19 is to establish the presence of infection in early phase of the disease. Texture analysis might be an additional tool for the evaluation of Chest X-ray in patients with clinical suspicion of Covid-19 related pneumonia.

Objective

To evaluate the diagnostic performance of texture analysis and machine learning models for the diagnosis of Covid-19 interstitial pneumonia in Chest X-ray images.

Methods

Chest X-ray images were accessed from a publicly available repository (https://www.kaggle.com/tawsifurrahman/covid19-radiography-database). Lung areas were manually segmented using a polygonal regions of interest covering both lung areas, using MaZda, a freely available software for texture analysis. A total of 308 features per ROI was extracted. One hundred-ten Covid-19 Chest X-ray images were selected for the final analysis.

Results

Six models, namely NB, GLM, DL, GBT, ANN and PLS-DA were selected and ensembled. According to Youden's index, the Covid-19 Ensemble Machine Learning Score showing the highest Area Under the Curve (0.971±0.015) was 132.57. Assuming this cut-off the Ensemble model performance was estimated evaluating both true and false positive/negative, resulting in 91.8% accuracy with 93% sensitivity and 90% specificity. Moving the cut-off value to -100, although the accuracy resulted lower (90.6%), the Ensemble Machine Learning showed 100% sensitivity, with 80% specificity.

Conclusion

Texture analysis of Chest X-ray images and machine learning algorithms may help in differentiating patients with Covid-19 pneumonia. Despite several limitations, this study can lay ground for future researches in this field and help developing more rapid and accurate screening tools for these patients.","hji,kes",0,0,0,2,0,NA,NA +33441366,Protocol for a qualitative study to identify strategies to optimise hospital ePrescribing systems.,"

Introduction

Electronic prescribing (ePrescribing) is a key area of development and investment in the UK and across the developed world. ePrescribing is widely understood as a vehicle for tackling medication-related safety concerns, improving care quality and making more efficient use of health resources. Nevertheless, implementation of an electronic health record does not itself ensure benefits for prescribing are maximised. We examine the process of optimisation of ePrescribing systems using case studies to provide policy recommendations based on the experiences of digitally mature hospital sites.

Methods and analysis

Qualitative interviews within six digitally mature sites will be carried out. The aim is to capture successful optimisation of electronic prescribing (ePrescribing) in particular health systems and hospitals. We have identified hospital sites in the UK and in three other developed countries. We used a combination of literature reviews and advice from experts at Optimising ePrescribing in Hospitals (eP Opt) Project round-table events. Sites were purposively selected based on geographical area, innovative work in ePrescribing/electronic health (eHealth) and potential transferability of practices to the UK setting. Interviews will be recorded and transcribed and transcripts coded thematically using NVivo software. Relevant policy and governance documents will be analysed, where available. Planned site visits were suspended due to the COVID-19 pandemic.

Ethics and dissemination

The Usher Research Ethics Group granted approval for this study. Results will be disseminated via peer-reviewed journals in medical informatics and expert round-table events, lay member meetings and the ePrescribing Toolkit (http://www.eprescribingtoolkit.com/)-an online resource supporting National Health Service (NHS) hospitals through the ePrescribing process.","hji,kes",0,0,0,2,0,NA,NA +33444218,Predicting dementia diagnosis from cognitive footprints in electronic health records: a case-control study protocol.,"

Introduction

Dementia is a group of disabling disorders that can be devastating for persons living with it and for their families. Data-informed decision-making strategies to identify individuals at high risk of dementia are essential to facilitate large-scale prevention and early intervention. This population-based case-control study aims to develop and validate a clinical algorithm for predicting dementia diagnosis, based on the cognitive footprint in personal and medical history.

Methods and analysis

We will use territory-wide electronic health records from the Clinical Data Analysis and Reporting System (CDARS) in Hong Kong between 1 January 2001 and 31 December 2018. All individuals who were at least 65 years old by the end of 2018 will be identified from CDARS. A random sample of control individuals who did not receive any diagnosis of dementia will be matched with those who did receive such a diagnosis by age, gender and index date with 1:1 ratio. Exposure to potential protective/risk factors will be included in both conventional logistic regression and machine-learning models. Established risk factors of interest will include diabetes mellitus, midlife hypertension, midlife obesity, depression, head injuries and low education. Exploratory risk factors will include vascular disease, infectious disease and medication. The prediction accuracy of several state-of-the-art machine-learning algorithms will be compared.

Ethics and dissemination

This study was approved by Institutional Review Board of The University of Hong Kong/Hospital Authority Hong Kong West Cluster (UW 18-225). Patients' records are anonymised to protect privacy. Study results will be disseminated through peer-reviewed publications. Codes of the resulted dementia risk prediction algorithm will be made publicly available at the website of the Tools to Inform Policy: Chinese Communities' Action in Response to Dementia project (https://www.tip-card.hku.hk/).","hji,kes",0,0,0,2,0,NA,NA +33452079,Advancing the Patient EXperience (APEX) in COPD Registry: Study Design and Strengths.,"The Advancing the Patient Experience (APEX) in Chronic Obstructive Pulmonary Disease (COPD) registry (https://www.apexcopd.org/) is the first primary care health system-based COPD registry in the United States. While its ultimate goal is to improve the care of patients diagnosed with COPD, the registry is also designed to describe real-life experiences of people with COPD, track key outcomes longitudinally, and assess the effectiveness of interventions. It will retrospectively and prospectively collect information from 3000 patients enrolled in 5 health care organizations. Information will be obtained from electronic health records, and from extended annual and brief questionnaires completed by patients before clinic visits. Core variables to be collected into the APEX COPD registry were agreed on by Delphi consensus and fall into 3 domains: demographics, COPD monitoring, and treatment. Main strengths of the registry include: 1) its size and scope (in terms of patient numbers, geographic spread and use of multiple information sources including patient-reported information); 2) collection of variables which are clinically relevant and practical to collect within primary care; 3) use of electronic data capture systems to ensure high-quality data and minimization of data-entry requirements; 4) inclusion of clinical, database development, management and communication experts; 5) regular sharing of key findings, both at international/national congresses and in peer-reviewed publications; and 6) a robust organizational structure to ensure continuance of the registry, and that research outputs are ethical, relevant and continue to bring value to both patients and physicians.","hji,kes",0,0,0,2,0,NA,Chuck Check - med records out of scope +33455583,Establishment and application of information resource of mutant mice in RIKEN BioResource Research Center.,"Online databases are crucial infrastructures to facilitate the wide effective and efficient use of mouse mutant resources in life sciences. The number and types of mouse resources have been rapidly growing due to the development of genetic modification technology with associated information of genomic sequence and phenotypes. Therefore, data integration technologies to improve the findability, accessibility, interoperability, and reusability of mouse strain data becomes essential for mouse strain repositories. In 2020, the RIKEN BioResource Research Center released an integrated database of bioresources including, experimental mouse strains, Arabidopsis thaliana as a laboratory plant, cell lines, microorganisms, and genetic materials using Resource Description Framework-related technologies. The integrated database shows multiple advanced features for the dissemination of bioresource information. The current version of our online catalog of mouse strains which functions as a part of the integrated database of bioresources is available from search bars on the page of the Center ( https://brc.riken.jp ) and the Experimental Animal Division ( https://mus.brc.riken.jp/ ) websites. The BioResource Research Center also released a genomic variation database of mouse strains established in Japan and Western Europe, MoG+ ( https://molossinus.brc.riken.jp/mogplus/ ), and a database for phenotype-phenotype associations across the mouse phenome using data from the International Mouse Phenotyping Platform. In this review, we describe features of current version of databases related to mouse strain resources in RIKEN BioResource Research Center and discuss future views.","hji,kes",1,1,2,2,1,description of several databases and biodata resources,"like EBI - Riken is not a data resource in and of itself; reassessed and re-scored - does have one URL and name of a distinct resources, tricky abstract" +33459772,SubtypeDrug: a software package for prioritization of candidate cancer subtype-specific drugs.,"

Summary

Cancer can be classified into various subtypes by its molecular, histological, or clinical characteristics. Discovering cancer-subtype-specific drugs is a crucial step in personalized medicine. SubtypeDrug is a system biology R-based software package that enables the prioritization of subtype-specific drugs based on cancer expression data from samples of many subtypes. This provides a novel approach to identify the subtype-specific drug by considering biological functions regulated by drugs at the subpathway level. The operation modes include extraction of subpathways from biological pathways, identification of dysregulated subpathways induced by each drug, inference of sample-specific subpathway activity profiles, evaluation of drug-disease reverse association at the subpathways level, identification of cancer-subtype-specific drugs through subtype sample set enrichment analysis, and visualization of the results. Its capabilities enable SubtypeDrug to find subtype-specific drugs, which will fill the gaps in the recent tools which only identify the drugs for a particular cancer type. SubtypeDrug may help to facilitate the development of tailored treatment for patients with cancer.

Availability and implementation

The package is implemented in R and available under GPL-2 license from the CRAN website (https://CRAN.R-project.org/package=SubtypeDrug).

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33464891,TopSuite Web Server: A Meta-Suite for Deep-Learning-Based Protein Structure and Quality Prediction.,"Proteins carry out the most fundamental processes of life such as cellular metabolism, regulation, and communication. Understanding these processes at a molecular level requires knowledge of their three-dimensional structures. Experimental techniques such as X-ray crystallography, NMR spectroscopy, and cryogenic electron microscopy can resolve protein structures but are costly and time-consuming and do not work for all proteins. Computational protein structure prediction tries to overcome these problems by predicting the structure of a new protein using existing protein structures as a resource. Here we present TopSuite, a web server for protein model quality assessment (TopScore) and template-based protein structure prediction (TopModel). TopScore provides meta-predictions for global and residue-wise model quality estimation using deep neural networks. TopModel predicts protein structures using a top-down consensus approach to aid the template selection and subsequently uses TopScore to refine and assess the predicted structures. The TopSuite Web server is freely available at https://cpclab.uni-duesseldorf.de/topsuite/.","hji,kes",0,0,0,2,0,software,NA +33471079,Bios2cor: an R package integrating dynamic and evolutionary correlations to identify functionally important residues in proteins.,"

Summary

Both dynamic correlations in protein sidechain motions during molecular dynamics (MD) simulations and evolutionary correlations in multiple sequence alignments (MSA) of homologous proteins may reveal functionally important residues. We developed the R package Bios2cor that provides a unique framework to investigate and, possibly, integrate both analyses. Bios2cor starts with an MSA or a MD trajectory and computes correlation/covariation scores between positions in the MSA or between sidechain dihedral angles or rotamers in the MD trajectory. In addition, Bios2cor provides a variety of tools for the analysis, the visualization and the interpretation of the data.

Availability

The R package Bios2cor is available from the Comprehensive R Archive Network, at http://cran.r-project.org/ web/packages/Bios2cor/index.html.","hji,kes",0,0,0,2,0,software,NA +33477091,Chronic wounds multimodal image database.,"A multimodal wound image database was created to allow fast development of computer-aided approaches for wound healing monitoring. The developed system with parallel camera optical axes enables multimodal images: photo, thermal, stereo, and depth map of the wound area to be acquired. As a result of using this system a multimodal database of chronic wound images is introduced. It contains 188 image sets of photographs, thermal images, and 3D meshes of the surfaces of chronic wounds acquired during 79 patient visits. Manual wound outlines delineated by an expert are also included in the dataset. All images of each case are additionally coregistered, and both numerical registration parameters and the transformed images are covered in the database. The presented database is publicly available for the research community at https://chronicwounddatabase.eu. That is the first publicly available database for evaluation and comparison of new image-based algorithms in the wound healing monitoring process with coregistered photographs, thermal maps, and 3D models of the wound area. Easily available database of coregistered multimodal data with the raw data set allows faster development of algorithms devoted to wound healing analysis and monitoring.","hji,kes",1,0,1,2,0.5,NA,medical - out of scope; reassessed and still no - clincial data +33486066,"Making science computable: Developing code systems for statistics, study design, and risk of bias.","The COVID-19 crisis led a group of scientific and informatics experts to accelerate development of an infrastructure for electronic data exchange for the identification, processing, and reporting of scientific findings. The Fast Healthcare Interoperability Resources (FHIR®) standard which is overcoming the interoperability problems in health information exchange was extended to evidence-based medicine (EBM) knowledge with the EBMonFHIR project. A 13-step Code System Development Protocol was created in September 2020 to support global development of terminologies for exchange of scientific evidence. For Step 1, we assembled expert working groups with 55 people from 26 countries by October 2020. For Step 2, we identified 23 commonly used tools and systems for which the first version of code systems will be developed. For Step 3, a total of 368 non-redundant concepts were drafted to become display terms for four code systems (Statistic Type, Statistic Model, Study Design, Risk of Bias). Steps 4 through 13 will guide ongoing development and maintenance of these terminologies for scientific exchange. When completed, the code systems will facilitate identifying, processing, and reporting research results and the reliability of those results. More efficient and detailed scientific communication will reduce cost and burden and improve health outcomes, quality of life, and patient, caregiver, and healthcare professional satisfaction. We hope the achievements reached thus far will outlive COVID-19 and provide an infrastructure to make science computable for future generations. Anyone may join the effort at https://www.gps.health/covid19_knowledge_accelerator.html.","hji,kes",0,0,0,2,0,NA,Chuck Check - no +33493161,PASA: Proteomic analysis of serum antibodies web server.,"

Motivation

A comprehensive characterization of the humoral response towards a specific antigen requires quantification of the B-cell receptor repertoire by next-generation sequencing (BCR-Seq), as well as the analysis of serum antibodies against this antigen, using proteomics. The proteomic analysis is challenging since it necessitates the mapping of antigen-specific peptides to individual B-cell clones.

Results

The PASA web server provides a robust computational platform for the analysis and integration of data obtained from proteomics of serum antibodies. PASA maps peptides derived from antibodies raised against a specific antigen to corresponding antibody sequences. It then analyzes and integrates proteomics and BCR-Seq data, thus providing a comprehensive characterization of the humoral response. The PASA web server is freely available at https://pasa.tau.ac.il and open to all users without a login requirement.","hji,kes",0,0,0,2,0,software,NA +33516897,A Hilbert-based method for processing respiratory timeseries.,"In this technical note, we introduce a new method for estimating changes in respiratory volume per unit time (RVT) from respiratory bellows recordings. By using techniques from the electrophysiological literature, in particular the Hilbert transform, we show how we can better characterise breathing rhythms, with the goal of improving physiological noise correction in functional magnetic resonance imaging (fMRI). Specifically, our approach leads to a representation with higher time resolution and better captures atypical breathing events than current peak-based RVT estimators. Finally, we demonstrate that this leads to an increase in the amount of respiration-related variance removed from fMRI data when used as part of a typical preprocessing pipeline. Our implementation is publicly available as part of the PhysIO package, which is distributed as part of the open-source TAPAS toolbox (https://translationalneuromodeling.org/tapas).","hji,kes",0,0,0,2,0,NA,NA +33529135,Disruption of the Atrophy-based Functional Network in Multiple Sclerosis Is Associated with Clinical Disability: Validation of a Meta-Analytic Model in Resting-State Functional MRI.,"Background In multiple sclerosis (MS), gray matter (GM) atrophy exhibits a specific pattern, which correlates strongly with clinical disability. However, the mechanism of regional specificity in GM atrophy remains largely unknown. Recently, the network degeneration hypothesis (NDH) was quantitatively defined (using coordinate-based meta-analysis) as the atrophy-based functional network (AFN) model, which posits that localized GM atrophy in MS is mediated by functional networks. Purpose To test the NDH in MS in a data-driven manner using the AFN model to direct analyses in an independent test sample. Materials and Methods Model fit testing was conducted with structural equation modeling, which is based on the computation of semipartial correlations. Model verification was performed in coordinate-based data of healthy control participants from the BrainMap database (https://www.brainmap.org). Model validation was conducted in prospectively acquired resting-state functional MRI in participants with relapsing-remitting MS who were recruited between September 2018 and January 2019. Correlation analyses of model fit indices and volumetric measures with Expanded Disability Status Scale (EDSS) scores and disease duration were performed. Results Model verification of healthy control participants included 80 194 coordinates from 9035 experiments. Model verification in healthy control data resulted in excellent model fit (root mean square error of approximation, 0.037; 90% CI: 0.036, 0.039). Twenty participants (mean age, 36 years ± 9 [standard deviation]; 12 women) with relapsing-remitting MS were evaluated. Model validation in resting-state functional MRI in participants with MS resulted in deviation from optimal model fit (root mean square error of approximation, 0.071; 90% CI: 0.070, 0.072), which correlated with EDSS scores (r = 0.68; P = .002). Conclusion The atrophy-based functional network model predicts functional network disruption in multiple sclerosis (MS), thereby supporting the network degeneration hypothesis. On resting-state functional MRI scans, reduced functional network integrity in participants with MS had a strong positive correlation with clinical disability. © RSNA, 2021 Online supplemental material is available for this article.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33529731,Deep learning for the radiographic diagnosis of proximal femur fractures: Limitations and programming issues.,"

Introduction

Radiology is one of the domains where artificial intelligence (AI) yields encouraging results, with diagnostic accuracy that approaches that of experienced radiologists and physicians. Diagnostic errors in traumatology are rare but can have serious functional consequences. Using AI as a radiological diagnostic aid may be beneficial in the emergency room. Thus, an effective, low-cost software that helps with making radiographic diagnoses would be a relevant tool for current clinical practice, although this concept has rarely been evaluated in orthopedics for proximal femur fractures (PFF). This led us to conduct a prospective study with the goals of: 1) programming deep learning software to help make the diagnosis of PFF on radiographs and 2) to evaluate its performance.

Hypothesis

It is possible to program an effective deep learning software to help make the diagnosis of PFF based on a limited number of radiographs.

Methods

Our database consisted of 1309 radiographs: 963 had a PFF, while 346 did not. The sample size was increased 8-fold (resulting in 10,472 radiographs) using a validated technique. Each radiograph was evaluated by an orthopedic surgeon using RectLabel‚Ñ¢ software (https://rectlabel.com), by differentiating between healthy and fractured zones. Fractures were classified according to the AO system. The deep learning algorithm was programmed on Tensorflow‚Ñ¢ software (Google Brain, Santa Clara, Ca, USA, tensorflow.org). In all, 9425 annotated radiographs (90%) were used for the training phase and 1074 (10%) for the test phase.

Results

The sensitivity of the algorithm was 61% for femoral neck fractures and 67% for trochanteric fractures. The specificity was 67% and 69%, the positive predictive value was 55% and 56%, while the negative predictive value was 74% and 78%, respectively.

Conclusion

Our results are not good enough for our algorithm to be used in current clinical practice. Programming of deep learning software with sufficient diagnostic accuracy can only be done with several tens of thousands of radiographs, or by using transfer learning.

Level of evidence

III; Diagnostic studies, Study of nonconsecutive patients, without consistently applied reference ""gold"" standard.","hji,kes",0,0,0,2,0,NA,NA +33532481,Newly developed multiple-breath washout reference equations from the CHILD Cohort Study: implications of poorly fitting equations.,"Using inappropriate reference equations would provide incorrect estimate of z-scores, which would cause misdiagnosis. Appropriate representative normative reference data must be available to correctly interpret individual lung function results. https://bit.ly/3dcNZ5p.","hji,kes",0,0,0,2,0,NA,NA +33532841,BAGET 2.0: an updated web tool for the effortless retrieval of prokaryotic gene context and sequence.,"

Motivation

The retrieval of a single gene sequence and context from completely sequenced bacterial and archaeal genomes constitutes an intimidating task for the wet bench biologist. Existing web-based genome browsers are either too complex for routine use or only provide a subset of the available prokaryotic genomes.

Results

We have developed BAGET 2.0 (Bacterial and Archaeal Gene Exploration Tool), an updated web service granting access in just three mouse clicks to the sequence and synteny of any gene from completely sequenced bacteria and archaea. User-provided annotated genomes can be processed as well. BAGET 2.0 relies on a local database updated on a daily basis.

Availability and implementation

BAGET 2.0 befits all current browsers such as Chrome, Firefox, Edge, Opera and Safari. Internet Explorer 11 is supported. BAGET 2.0 is freely accessible at https://archaea.i2bc.paris-saclay.fr/baget/.","hji,kes",0,0,0,2,0,Uses underlying database that is still being update,"questionable but retrieval ""explroation"" - no download" +33539179,Prenatal Exposure to Nitrate from Drinking Water and Markers of Fetal Growth Restriction: A Population-Based Study of Nearly One Million Danish-Born Children.,"

Background

High levels of nitrate (NO3-) in drinking water cause methemoglobinemia in infants; however, few studies have examined the potential effects of low-level exposure on fetal growth, and the results have been inconsistent.

Objectives

We sought to assess the association between maternal exposure to nitrate in drinking water during pregnancy and offspring size at birth in a nationwide study of full-term (‚â•37‚Äâwk gestation) live-born singletons.

Methods

We estimated maternal nitrate exposure for 898,206 births in Denmark during 1991-2011 by linkage of individual home address(es) with nitrate data from the national monitoring database. Maternal address during pregnancy, infant size at birth [i.e., birth weight, low birth weight (LBW), body length, and birth head circumference] and covariates were compiled from the Danish Civil Registration System, the Danish Medical Birth Register, and The Integrated Database for Longitudinal Labor Market Research. Linear and logistic models with generalized estimating equations were used to account for multiple births to an individual. Nitrate exposure was modeled using five categories and as a log-transformed continuous variable.

Results

There was evidence of a decreasing trend in models for term birth weight using categorical or continuous measures of exposure. Modeling exposure continuously, a difference of -9.71‚Äâg (95% confidence interval: -14.60, -4.81) was predicted at 25‚Äâmg/L (half the value of the European Union drinking water standard) compared with 0‚Äâmg/L‚ÄâNO3-. Body length also decreased as nitrate concentrations increased in categorical and continuous models. There was little evidence of an association between NO3- and head circumference or LBW.

Discussion

Although the estimated effects were small, our findings for live singleton births to Danish-born parents suggest that maternal intake of nitrate from drinking water may reduce term birth weight and length, which are markers of intrauterine growth. However, there was little evidence for an association between nitrate and head circumference or LBW. Future studies in other populations and with data on dietary sources of nitrate are encouraged to confirm or refute these findings. https://doi.org/10.1289/EHP7331.","hji,kes",0,0,0,2,0,NA,NA +33557754,Visual4DTracker: a tool to interact with 3D + t image stacks.,"

Background

Biological phenomena usually evolves over time and recent advances in high-throughput microscopy have made possible to collect multiple 3D images over time, generating [Formula: see text] (or 4D) datasets. To extract useful information there is the need to extract spatial and temporal data on the particles that are in the images, but particle tracking and feature extraction need some kind of assistance.

Results

This manuscript introduces our new freely downloadable toolbox, the Visual4DTracker. It is a MATLAB package implementing several useful functionalities to navigate, analyse and proof-read the track of each particle detected in any [Formula: see text] stack. Furthermore, it allows users to proof-read and to evaluate the traces with respect to a given gold standard. The Visual4DTracker toolbox permits the users to visualize and save all the generated results through a user-friendly graphical user interface. This tool has been successfully used in three applicative examples. The first processes synthetic data to show all the software functionalities. The second shows how to process a 4D image stack showing the time-lapse growth of Drosophila cells in an embryo. The third example presents the quantitative analysis of insulin granules in living beta-cells, showing that such particles have two main dynamics that coexist inside the cells.

Conclusions

Visual4DTracker is a software package for MATLAB to visualize, handle and manually track [Formula: see text] stacks of microscopy images containing objects such cells, granules, etc.. With its unique set of functions, it remarkably permits the user to analyze and proof-read 4D data in a friendly 3D fashion. The tool is freely available at https://drive.google.com/drive/folders/19AEn0TqP-2B8Z10kOavEAopTUxsKUV73?usp=sharing.","hji,kes",0,0,0,2,0,software,in google drive +33563213,lncEvo: automated identification and conservation study of long noncoding RNAs.,"

Background

Long noncoding RNAs represent a large class of transcripts with two common features: they exceed an arbitrary length threshold of 200 nt and are assumed to not encode proteins. Although a growing body of evidence indicates that the vast majority of lncRNAs are potentially nonfunctional, hundreds of them have already been revealed to perform essential gene regulatory functions or to be linked to a number of cellular processes, including those associated with the etiology of human diseases. To better understand the biology of lncRNAs, it is essential to perform a more in-depth study of their evolution. In contrast to protein-encoding transcripts, however, they do not show the strong sequence conservation that usually results from purifying selection; therefore, software that is typically used to resolve the evolutionary relationships of protein-encoding genes and transcripts is not applicable to the study of lncRNAs.

Results

To tackle this issue, we developed lncEvo, a computational pipeline that consists of three modules: (1) transcriptome assembly from RNA-Seq data, (2) prediction of lncRNAs, and (3) conservation study-a genome-wide comparison of lncRNA transcriptomes between two species of interest, including search for orthologs. Importantly, one can choose to apply lncEvo solely for transcriptome assembly or lncRNA prediction, without calling the conservation-related part.

Conclusions

lncEvo is an all-in-one tool built with the Nextflow framework, utilizing state-of-the-art software and algorithms with customizable trade-offs between speed and sensitivity, ease of use and built-in reporting functionalities. The source code of the pipeline is freely available for academic and nonacademic use under the MIT license at https://gitlab.com/spirit678/lncrna_conservation_nf .","hji,kes",0,0,0,2,0,software,NA +33564394,gprofiler2 -- an R package for gene list functional enrichment analysis and namespace conversion toolset g:Profiler.,"g:Profiler ( https://biit.cs.ut.ee/gprofiler) is a widely used gene list functional profiling and namespace conversion toolset that has been contributing to reproducible biological data analysis already since 2007. Here we introduce the accompanying R package, gprofiler2, developed to facilitate programmatic access to g:Profiler computations and databases via REST API. The gprofiler2 package provides an easy-to-use functionality that enables researchers to incorporate functional enrichment analysis into automated analysis pipelines written in R. The package also implements interactive visualisation methods to help to interpret the enrichment results and to illustrate them for publications. In addition, gprofiler2 gives access to the versatile gene/protein identifier conversion functionality in g:Profiler enabling to map between hundreds of different identifier types or orthologous species. The gprofiler2 package is freely available at the CRAN repository.","hji,kes",0,0,0,2,0,software,NA +33568057,GalaxyTrakr: a distributed analysis tool for public health whole genome sequence data accessible to non-bioinformaticians.,"

Background

Processing and analyzing whole genome sequencing (WGS) is computationally intense: a single Illumina MiSeq WGS run produces‚Äâ~‚Äâ1 million 250-base-pair reads for each of 24 samples. This poses significant obstacles for smaller laboratories, or laboratories not affiliated with larger projects, which may not have dedicated bioinformatics staff or computing power to effectively use genomic data to protect public health. Building on the success of the cloud-based Galaxy bioinformatics platform ( http://galaxyproject.org ), already known for its user-friendliness and powerful WGS analytical tools, the Center for Food Safety and Applied Nutrition (CFSAN) at the U.S. Food and Drug Administration (FDA) created a customized 'instance' of the Galaxy environment, called GalaxyTrakr ( https://www.galaxytrakr.org ), for use by laboratory scientists performing food-safety regulatory research. The goal was to enable laboratories outside of the FDA internal network to (1) perform quality assessments of sequence data, (2) identify links between clinical isolates and positive food/environmental samples, including those at the National Center for Biotechnology Information sequence read archive ( https://www.ncbi.nlm.nih.gov/sra/ ), and (3) explore new methodologies such as metagenomics. GalaxyTrakr hosts a variety of free and adaptable tools and provides the data storage and computing power to run the tools. These tools support coordinated analytic methods and consistent interpretation of results across laboratories. Users can create and share tools for their specific needs and use sequence data generated locally and elsewhere.

Results

In its first full year (2018), GalaxyTrakr processed over 85,000 jobs and went from 25 to 250 users, representing 53 different public and state health laboratories, academic institutions, international health laboratories, and federal organizations. By mid-2020, it has grown to 600 registered users and processed over 450,000 analytical jobs. To illustrate how laboratories are making use of this resource, we describe how six institutions use GalaxyTrakr to quickly analyze and review their data. Instructions for participating in GalaxyTrakr are provided.

Conclusions

GalaxyTrakr advances food safety by providing reliable and harmonized WGS analyses for public health laboratories and promoting collaboration across laboratories with differing resources. Anticipated enhancements to this resource will include workflows for additional foodborne pathogens, viruses, and parasites, as well as new tools and services.","hji,kes",0,0,0,2,0,software,Chuck Check - no +33579190,A graph-based algorithm for detecting rigid domains in protein structures.,"

Background

Conformational transitions are implicated in the biological function of many proteins. Structural changes in proteins can be described approximately as the relative movement of rigid domains against each other. Despite previous efforts, there is a need to develop new domain segmentation algorithms that are capable of analysing the entire structure database efficiently and do not require the choice of protein-dependent tuning parameters such as the number of rigid domains.

Results

We develop a graph-based method for detecting rigid domains in proteins. Structural information from multiple conformational states is represented by a graph whose nodes correspond to amino acids. Graph clustering algorithms allow us to reduce the graph and run the Viterbi algorithm on the associated line graph to obtain a segmentation of the input structures into rigid domains. In contrast to many alternative methods, our approach does not require knowledge about the number of rigid domains. Moreover, we identified default values for the algorithmic parameters that are suitable for a large number of conformational ensembles. We test our algorithm on examples from the DynDom database and illustrate our method on various challenging systems whose structural transitions have been studied extensively.

Conclusions

The results strongly suggest that our graph-based algorithm forms a novel framework to characterize structural transitions in proteins via detecting their rigid domains. The web server is available at http://azifi.tz.agrar.uni-goettingen.de/webservice/ .","hji,kes",0,0,0,2,0,software,NA +33590861,Twelve years of SAMtools and BCFtools.,"

Background

SAMtools and BCFtools are widely used programs for processing and analysing high-throughput sequencing data. They include tools for file format conversion and manipulation, sorting, querying, statistics, variant calling, and effect analysis amongst other methods.

Findings

The first version appeared online 12 years ago and has been maintained and further developed ever since, with many new features and improvements added over the years. The SAMtools and BCFtools packages represent a unique collection of tools that have been used in numerous other software projects and countless genomic pipelines.

Conclusion

Both SAMtools and BCFtools are freely available on GitHub under the permissive MIT licence, free for both non-commercial and commercial use. Both packages have been installed >1 million times via Bioconda. The source code and documentation are available from https://www.htslib.org.","hji,kes",0,0,0,2,0,NA,NA +33592504,TAP 1.0: A robust immunoinformatic tool for the prediction of tumor T-cell antigens based on AAindex properties.,"Immunotherapy is a research area with great potential in drug discovery for cancer treatment. Because of the capacity of tumor antigens to activate the immune response and promote the destruction of tumor cells, they are considered excellent immunotherapeutic drugs. In this work, we evaluated fifteen machine learning algorithms for the classification of tumor antigens. For this purpose, we build robust datasets, carefully selected from the TANTIGEN and IEDB databases. The feature computation of all antigens in this study was performed by developing a script written in Python 3.8, which allowed the calculation of 544 physicochemical and biochemical properties extracted from the AAindex database. All classifiers were subjected to the training, 10-fold cross-validation, and testing on an independent dataset. The results of this study showed that the quadratic discriminant classifier presented the best performance measures over the independent dataset, accuracy = 0.7384, AUC = 0.817, recall = 0.676, precision = 0.7857, F1 = 0.713, kappa = 0.4764, and Matthews correlation coefficient = 0.4834, outperforming common machine learning classifiers used in the bioinformatics area. We believe that our prediction model could be of great importance in the field of cancer immunotherapy for the search of potential tumor antigens. Taking all aspects mentioned before, we developed an immunoinformatic tool called TAP 1.0 with a friendly interface for tumor antigens prediction, available at https://tapredictor.herokuapp.com/.","hji,kes",0,0,0,2,0,software,extracted data - used for methods +33593821,Therapeutic Targeting of DGKA-Mediated Macropinocytosis Leads to Phospholipid Reprogramming in Tuberous Sclerosis Complex.,"Lymphangioleiomyomatosis is a rare destructive lung disease affecting primarily women and is the primary lung manifestation of tuberous sclerosis complex (TSC). In lymphangioleiomyomatosis, biallelic loss of TSC1/2 leads to hyperactivation of mTORC1 and inhibition of autophagy. To determine how the metabolic vulnerabilities of TSC2-deficient cells can be targeted, we performed a high-throughput screen utilizing the ""Repurposing"" library at the Broad Institute of MIT and Harvard (Cambridge, MA), with or without the autophagy inhibitor chloroquine. Ritanserin, an inhibitor of diacylglycerol kinase alpha (DGKA), was identified as a selective inhibitor of proliferation of Tsc2-/- mouse embryonic fibroblasts (MEF), with no impact on Tsc2+/+ MEFs. DGKA is a lipid kinase that metabolizes diacylglycerol to phosphatidic acid, a key component of plasma membranes. Phosphatidic acid levels were increased 5-fold in Tsc2-/- MEFs compared with Tsc2+/+ MEFs, and treatment of Tsc2-/- MEFs with ritanserin led to depletion of phosphatidic acid as well as rewiring of phospholipid metabolism. Macropinocytosis is known to be upregulated in TSC2-deficient cells. Ritanserin decreased macropinocytic uptake of albumin, limited the number of lysosomes, and reduced lysosomal activity in Tsc2-/- MEFs. In a mouse model of TSC, ritanserin treatment decreased cyst frequency and volume, and in a mouse model of lymphangioleiomyomatosis, genetic downregulation of DGKA prevented alveolar destruction and airspace enlargement. Collectively, these data indicate that DGKA supports macropinocytosis in TSC2-deficient cells to maintain phospholipid homeostasis and promote proliferation. Targeting macropinocytosis with ritanserin may represent a novel therapeutic approach for the treatment of TSC and lymphangioleiomyomatosis. SIGNIFICANCE: This study identifies macropinocytosis and phospholipid metabolism as novel mechanisms of metabolic homeostasis in mTORC1-hyperactive cells and suggest ritanserin as a novel therapeutic strategy for use in mTORC1-hyperactive tumors, including pancreatic cancer. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/8/2086/F1.large.jpg.","hji,kes",0,0,0,2,0,NA,NA +33619466,An interactive tool to forecast US hospital needs in the coronavirus 2019 pandemic.,"

Objective

We developed an application (https://rush-covid19.herokuapp.com/) to aid US hospitals in planning their response to the ongoing Coronavirus Disease 2019 (COVID-19) pandemic.

Materials and methods

Our application forecasts hospital visits, admits, discharges, and needs for hospital beds, ventilators, and personal protective equipment by coupling COVID-19 predictions to models of time lags, patient carry-over, and length-of-stay. Users can choose from 7 COVID-19 models, customize 23 parameters, examine trends in testing and hospitalization, and download forecast data.

Results

Our application accurately predicts the spread of COVID-19 across states and territories. Its hospital-level forecasts are in continuous use by our home institution and others.

Discussion

Our application is versatile, easy-to-use, and can help hospitals plan their response to the changing dynamics of COVID-19, while providing a platform for deeper study.

Conclusion

Empowering healthcare responses to COVID-19 is as crucial as understanding the epidemiology of the disease. Our application will continue to evolve to meet this need.","hji,kes",0,0,0,2,0,software,out of scope +33622334,Identification of major depressive disorder disease-related genes and functional pathways based on system dynamic changes of network connectivity.,"

Background

Major depressive disorder (MDD) is a leading psychiatric disorder that involves complex abnormal biological functions and neural networks. This study aimed to compare the changes in the network connectivity of different brain tissues under different pathological conditions, analyzed the biological pathways and genes that are significantly related to disease progression, and further predicted the potential therapeutic drug targets.

Methods

Expression of differentially expressed genes (DEGs) were analyzed with postmortem cingulate cortex (ACC) and prefrontal cortex (PFC) mRNA expression profile datasets downloaded from the Gene Expression Omnibus (GEO) database, including 76 MDD patients and 76 healthy subjects in ACC and 63 MDD patients and 63 healthy subjects in PFC. The co-expression network construction was based on system network analysis. The function of the genes was annotated by Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway analysis. Human Protein Reference Database (HPRD, http://www.hprd.org/ ) was used for gene interaction relationship mapping.

Results

We filtered 586 DEGs in ACC and 616 DEGs in PFC for further analysis. By constructing the co-expression network, we found that the gene connectivity was significantly reduced under disease conditions (P‚Äâ=‚Äâ0.04 in PFC and P‚Äâ=‚Äâ1.227e-09 in ACC). Crosstalk analysis showed that CD19, PTDSS2 and NDST2 were significantly differentially expressed in ACC and PFC of MDD patients. Among them, CD19 and PTDSS2 have been targeted by several drugs in the Drugbank database. KEGG pathway analysis demonstrated that the function of CD19 and PTDSS2 were enriched with the pathway of Glycerophospholipid metabolism and T cell receptor signaling pathway.

Conclusion

Co-expression network and tissue comparing analysis can identify signaling pathways and cross talk genes related to MDD, which may provide novel insight for understanding the molecular mechanisms of MDD.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33630831,A plasmid DNA-launched SARS-CoV-2 reverse genetics system and coronavirus toolkit for COVID-19 research.,"The recent emergence of Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2), the underlying cause of Coronavirus Disease 2019 (COVID-19), has led to a worldwide pandemic causing substantial morbidity, mortality, and economic devastation. In response, many laboratories have redirected attention to SARS-CoV-2, meaning there is an urgent need for tools that can be used in laboratories unaccustomed to working with coronaviruses. Here we report a range of tools for SARS-CoV-2 research. First, we describe a facile single plasmid SARS-CoV-2 reverse genetics system that is simple to genetically manipulate and can be used to rescue infectious virus through transient transfection (without in vitro transcription or additional expression plasmids). The rescue system is accompanied by our panel of SARS-CoV-2 antibodies (against nearly every viral protein), SARS-CoV-2 clinical isolates, and SARS-CoV-2 permissive cell lines, which are all openly available to the scientific community. Using these tools, we demonstrate here that the controversial ORF10 protein is expressed in infected cells. Furthermore, we show that the promising repurposed antiviral activity of apilimod is dependent on TMPRSS2 expression. Altogether, our SARS-CoV-2 toolkit, which can be directly accessed via our website at https://mrcppu-covid.bio/, constitutes a resource with considerable potential to advance COVID-19 vaccine design, drug testing, and discovery science.","hji,kes",0,0,0,2,0,software,NA +33633365,ArchR is a scalable software package for integrative single-cell chromatin accessibility analysis.,"The advent of single-cell chromatin accessibility profiling has accelerated the ability to map gene regulatory landscapes but has outpaced the development of scalable software to rapidly extract biological meaning from these data. Here we present a software suite for single-cell analysis of regulatory chromatin in R (ArchR; https://www.archrproject.com/ ) that enables fast and comprehensive analysis of single-cell chromatin accessibility data. ArchR provides an intuitive, user-focused interface for complex single-cell analyses, including doublet removal, single-cell clustering and cell type identification, unified peak set generation, cellular trajectory identification, DNA element-to-gene linkage, transcription factor footprinting, mRNA expression level prediction from chromatin accessibility and multi-omic integration with single-cell RNA sequencing (scRNA-seq). Enabling the analysis of over 1.2 million single cells within 8 h on a standard Unix laptop, ArchR is a comprehensive software suite for end-to-end analysis of single-cell chromatin accessibility that will accelerate the understanding of gene regulation at the resolution of individual cells.","hji,kes",0,0,0,2,0,software,NA +33633572,TOXPANEL: A Gene-Set Analysis Tool to Assess Liver and Kidney Injuries.,"Gene-set analysis is commonly used to identify trends in gene expression when cells, tissues, organs, or organisms are subjected to conditions that differ from those within the normal physiological range. However, tools for gene-set analysis to assess liver and kidney injury responses are less common. Furthermore, most websites for gene-set analysis lack the option for users to customize their gene-set database. Here, we present the ToxPanel website, which allows users to perform gene-set analysis to assess liver and kidney injuries using activation scores based on gene-expression fold-change values. The results are graphically presented to assess constituent injury phenotypes (histopathology), with interactive result tables that identify the main contributing genes to a given signal. In addition, ToxPanel offers the flexibility to analyze any set of custom genes based on gene fold-change values. ToxPanel is publically available online at https://toxpanel.bhsai.org. ToxPanel allows users to access our previously developed liver and kidney injury gene sets, which we have shown in previous work to yield robust results that correlate with the degree of injury. Users can also test and validate their customized gene sets using the ToxPanel website.","hji,kes",0,0,0,2,0,unclear on data availability,NA +33634706,Autonomic Alterations After Pulmonary Vein Isolation in the CIRCA-DOSE (Cryoballoon vs Irrigated Radiofrequency Catheter Ablation) Study.,"Background The natural history of autonomic alterations following catheter ablation of drug-refractory paroxysmal atrial fibrillation is poorly defined, largely because of the historical reliance on non-invasive intermittent rhythm monitoring for outcome ascertainment. Methods and Results The study included 346 patients with drug-refractory paroxysmal atrial fibrillation undergoing pulmonary vein isolation using contemporary advanced-generation ablation technologies. All patients underwent insertion of a Reveal LINQ (Medtronic) implantable cardiac monitor before ablation. The implantable cardiac monitor continuously recorded physical activity, heart rate variability (measured as the SD of the average normal-to-normal), daytime heart rate, and nighttime heart rate. Longitudinal autonomic data in the 2-month period leading up to the date of ablation were compared with the period from 91 to 365 days following ablation. Following ablation there was a significant decrease in SD of the average normal-to-normal (mean difference versus baseline of 19.3 ms; range, 12.9-25.7; P<0.0001), and significant increases in daytime and nighttime heart rates (mean difference versus baseline of 9.6 bpm; range, 7.4-11.8; P<0.0001, and 7.4 bpm; range, 5.4-9.3; P<0.0001, respectively). Patients free of arrhythmia recurrence had significantly faster daytime (11±11 versus 8±12 bpm, P=0.001) and nighttime heart rates (8±9 versus 6±8 bpm, P=0.049), but no difference in SD of the average normal-to-normal (P=0.09) compared with those with atrial fibrillation recurrence. Ablation technology and cryoablation duration did not influence these autonomic nervous system effects. Conclusions Pulmonary vein isolation results in significant sustained changes in the heart rate parameters related to autonomic function. These changes are correlated with procedural outcome and are independent of the ablation technology used. Registration URL: https://www.clinicaltrials.gov; Unique identifier: NCT01913522.","hji,kes",0,0,0,2,0,NA,NA +33638346,HeteroGGM: an R package for Gaussian graphical model-based heterogeneity analysis.,"

Summary

Heterogeneity is a hallmark of many complex human diseases, and unsupervised heterogeneity analysis has been extensively conducted using high-throughput molecular measurements and histopathological imaging features. ""Classic"" heterogeneity analysis has been based on simple statistics such as mean, variance, and correlation. Network-based analysis takes interconnections as well as individual variable properties into consideration and can be more informative. Several Gaussian graphical model (GGM)-based heterogeneity analysis techniques have been developed, but friendly and portable software is still lacking. To facilitate more extensive usage, we develop the R package HeteroGGM, which conducts GGM-based heterogeneity analysis using the advanced penaliztaion techniques, can provide informative summary and graphical presentation, and is efficient and friendly.

Availability

The package is available at https://CRAN.R-project.org/package=HeteroGGM.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33652081,An international core outcome set for evaluating interventions to improve informed consent to clinical trials: The ELICIT Study.,"

Objective

To develop a core outcome set for the evaluation of interventions that aim to improve how people make decisions about whether to participate in randomized controlled trials (of healthcare interventions), the ELICIT Study.

Study design

International mixed-method study involving a systematic review of existing outcomes, semi-structured interviews, an online Delphi survey, and a face-to-face consensus meeting.

Results

The literature review and stakeholder interviews (n = 25) initially identified 1045 reported outcomes that were grouped into 40 individually distinct outcomes. These 40 outcomes were scored for importance in two rounds of an online Delphi survey (n = 79), with 18 people attending the consensus meeting. Consensus was reached on 12 core outcomes: therapeutic misconception; comfort with decision; authenticity of decision; communication about the trial; empowerment; sense of altruism; equipoise; knowledge; salience of questions; understanding, how helpful the process was for decision making; and trial attrition.

Conclusion

The ELICIT core outcome set is the first internationally agreed minimum set of outcomes deemed essential to be measured in all future studies evaluating interventions to improve decisions about participating in an randomized controlled trial. Use of the ELICIT core set will ensure that results from these trials are comparable and relevant to all stakeholders.

Registration

COMET database - http://www.comet-initiative.org/Studies/Details/595.","hji,kes",0,0,0,2,0,NA,out of scope +33656920,Human Colonization with Extended-Spectrum Beta-Lactamase-Producing E. coli in Relation to Animal and Environmental Exposures in Bangladesh: An Observational One Health Study.,"

Background

Human exposure to intensively farmed livestock is a potential risk for transmission of antibiotic-resistant bacteria (ARB) but few studies have assessed the relative role of animal vs. environmental sources of ARB in low-resource community settings.

Objectives

We conducted an observational study to compare ARB colonization and antibiotic-resistant gene prevalence and abundance in humans with high or low exposure to poultry in rural households, commercial poultry farms, and urban markets in Bangladesh.

Methods

Extended-spectrum β-lactamase (ESBL)-producing and carbapenem-resistant E. coli were quantified in feces from adults with high or low poultry exposure (n=100, respectively), poultry (n=200), drinking water (n=120), and wastewater (n=120) from 40 rural households, 40 poultry farms, and 40 urban markets.

Results

ESBL-producing E. coli (ESBL-EC) prevalence was 67.5% (95% CI: 61.0, 74.0) in samples from adults, 68.0% (95% CI: 61.5, 74.5) in samples from poultry, and 92.5% (95% CI: 87.7, 97.3) in wastewater samples. Carbapenem-resistant E. coli prevalence was high in market wastewaters [30% (95% CI: 15.0, 45.0)] but low in humans (1%) and poultry (1%). Human, poultry, and wastewater isolates shared common resistance genes: blaCTX-M-1, qnr, and blaTEM. Human colonization was not significantly associated with exposure to poultry or setting (rural, farm, or market). Ninety-five percent of commercial poultry farms routinely administered antibiotics. Susceptibility tests were significantly different in household vs. farm and market poultry isolates for four of seven antibiotic classes. In human isolates, there were no differences except aminoglycoside resistance (16.4% high vs. 4.4% low exposure, p=0.02). Urban market wastewaters and poultry samples had significantly higher concentrations of ESBL-EC (p<0.001) and blaCTX-M-1 (p<0.001) compared with samples from farms and rural households.

Discussion

ESBL-EC colonization was high in humans but not significantly associated with exposure to poultry. Bidirectional transmission of antibiotic resistance is likely between humans, poultry, and the environment in these community settings, underlining the importance of One Health mitigation strategies. https://doi.org/10.1289/EHP7670.","hji,kes",0,0,0,2,0,NA,NA +33677064,Primary Coenzyme Q deficiencies: A literature review and online platform of clinical features to uncover genotype-phenotype correlations.,"Primary Coenzyme Q (CoQ) deficiencies are clinically heterogeneous conditions and lack clear genotype-phenotype correlations, complicating diagnosis and prognostic assessment. Here we present a compilation of all the symptoms and patients with primary CoQ deficiency described in the literature so far and analyse the most common clinical manifestations associated with pathogenic variants identified in the different COQ genes. In addition, we identified new associations between the age of onset of symptoms and different pathogenic variants, which could help to a better diagnosis and guided treatment. To make these results useable for clinicians, we created an online platform (https://coenzymeQbiology.github.io/clinic-CoQ-deficiency) about clinical manifestations of primary CoQ deficiency that will be periodically updated to incorporate new information published in the literature. Since CoQ primary deficiency is a rare disease, the available data are still limited, but as new patients are added over time, this tool could become a key resource for a more efficient diagnosis of this pathology.","hji,kes",0,0,0,2,0,NA,out of scope +33677478,Bali-Phy version 3: Model-based co-estimation of alignment and phylogeny.,"

Summary

We describe improvements to BAli-Phy, a Markov chain Monte Carlo (MCMC) program that jointly estimates phylogeny, alignment, and other parameters from unaligned sequence data. Version 3 is substantially faster for large trees, and implements covarion models, additional codon models, and other new models. It implements ancestral state reconstruction, allows prior selection for all model parameters, and can also analyze multiple genes simultaneously.

Availability

Software is available for download at http://www.bali-phy.org. C++ source code is freely available on Github under the GPL2 License.","hji,kes",0,0,0,2,0,NA,NA +33684246,Development of machine learning model algorithm for prediction of 5-year soft tissue myxoid liposarcoma survival.,"

Background

Predicting survival in myxoid liposarcoma (MLS) patients is very challenging given its propensity to metastasize and the controversial role of adjuvant therapy. The purpose of this study was to develop a machine-learning algorithm for the prediction of survival at five years for patients with MLS and externally validate it using our institutional cohort.

Methods

Two databases, the surveillance, epidemiology, and end results program (SEER) database and an institutional database, were used in this study. Five machine learning models were created based on the SEER database and performance was rated using the TRIPOD criteria. The model that performed best on the SEER data was again tested on our institutional database.

Results

The net-elastic penalized logistic regression model was the best according to our performance indicators. This model had an area under the curve (AUC) of 0.85 when compared to the SEER testing data and an AUC of 0.76 when tested against institutional database. An application to use this calculator is available at https://sorg-apps.shinyapps.io/myxoid_liposarcoma/.

Conclusion

MLS is a soft-tissue sarcoma with adjunct treatment options that are, in part, decided by prognostic survival. We developed the first machine-learning predictive algorithm specifically for MLS using the SEER registry that retained performance during external validation with institutional data.","hji,kes",0,0,0,2,0,NA,NA +33686532,Discovery of potential biomarkers in acute kidney injury by ultra-high-performance liquid chromatography-tandem quadrupole time-of-flight mass spectrometry (UPLC-Q/TOF-MS).,"

Objective

The LC-MS/MS-based non-targeted metabolomics method was used to differentially screen serum and urine metabolites of acute kidney injury (AKI) patients and healthy people, to explore potential biomarkers of AKI and analyze related pathways, and explain the potential mechanism and biological significance of AKI.

Methods

The serum and urine samples from 30 AKI patients and 20 healthy people were selected to conduct a non-targeted metabolomics study by ultra-high-performance liquid chromatography-tandem quadrupole time-of-flight mass spectrometry (UPLC-Q/TOF-MS). The differential metabolites between the two groups were searched by the human metabolome (HMDB) database ( https://hmdb.ca/ ) and the related pathways of these potential biomarkers were identified by searching the Kyoto encyclopedia of genes and genomes (KEGG) database ( https://www.kegg.jp/ ). The total metabolic pathways were analyzed by the MS Peaks to Pathways module of MetaboAnalyst ( https://www.metaboanalyst.ca/ ).

Results

Multivariate data analysis found that serum and urine metabolism in AKI patients was significantly different from healthy people. We found three metabolites in urine (2-S-glutathionyl glutathione acetate, 5-L-Glutamyl-taurine, and L-Phosphoarginine) contributing to the separation of AKI patients from healthy people, and major metabolic pathways associated with these potential biomarkers including cytochrome P450 metabolism, arginine, and proline metabolism.

Conclusion

2-S-glutathionyl glutathione acetate, 5-L-Glutamyl-taurine, and L-Phosphoarginine were associated with AKI patients, which could be selected as potential biomarkers to predicate AKI disease.","hji,kes",0,0,0,2,0,NA,not descriptive of resources +33689356,XlinkCyNET: A Cytoscape Application for Visualization of Protein Interaction Networks Based on Cross-Linking Mass Spectrometry Identifications.,"Software tools that allow the visualization and analysis of protein interaction networks are essential for studies in systems biology. One of the most popular network visualization tools in biology is Cytoscape, which offers a great selection of plug-ins for the interpretation of network data. Chemical cross-linking coupled to mass spectrometry (XL-MS) is an increasingly important source for protein interaction data; however, to date, no Cytoscape tools are available to analyze XL-MS results. In light of the suitability of the Cytoscape platform and to expand its toolbox, here we introduce XlinkCyNET, an open-source Cytoscape Java plug-in for exploring large-scale XL-MS-based protein interaction networks. XlinkCyNET offers the rapid and easy visualization of intra- and interprotein cross-links in a rectangular-bar style as well as on the 3D structure, allowing the interrogation of protein interaction networks at the residue level. XlinkCyNET is freely available from the Cytoscape App Store (http://apps.cytoscape.org/apps/xlinkcynet) and at the Liu lab webpage (https://www.theliulab.com/software/xlinkcynet).","hji,kes",0,0,0,2,0,software,NA +33719338,Construction of circRNA-Based ceRNA Network to Reveal the Role of circRNAs in the Progression and Prognosis of Hepatocellular Carcinoma.,"

Background

Circular RNAs (circRNAs) are now under hot discussion as novel promising biomarkers for patients with hepatocellular carcinoma (HCC). The purpose of our study is to identify several competing endogenous RNA (ceRNA) networks related to the prognosis and progression of HCC and to further investigate the mechanism of their influence on tumor progression.

Methods

First, we obtained gene expression data related to liver cancer from The Cancer Genome Atlas (TCGA) database (http://www.portal.gdc.cancer.gov/), including microRNA (miRNA) sequence, RNA sequence, and clinical information. A co-expression network was constructed through the Weighted Correlation Network Analysis (WGCNA) software package in R software. The differentially expressed messenger RNAs (DEmRNAs) in the key module were analyzed with the Database for Annotation Visualization and Integrated Discovery (DAVID) (https://david.ncifcrf.gov/summary.jsp) to perform functional enrichment analysis including Kyoto Encyclopedia of Genes and Genomes (KEGG) and Gene Ontology (GO). The data of miRNA expression and clinical information downloaded from TCGA were utilized for survival analysis to detach the prognostic value of the DEmiRNAs of the key module.

Results

The 201 differentially expressed miRNAs (DEmiRNAs) and 3,783 DEmRNAs were preliminarily identified through differential expression analysis. The co-expression networks of DEmiRNAs and DEmRNAs were constructed with WGCNA. Further analysis confirmed four miRNAs in the most significant module (blue module) were associated with the overall survival (OS) of patients with liver cancer, including hsa-miR-92b-3p, hsa-miR-122-3p, hsa-miR-139-5p, and hsa-miR-7850-5p. DAVID was used for functional enrichment analysis of 286 co-expressed mRNAs. The GO analysis results showed that the top enriched GO terms were oxidation-reduction process, extracellular exosome, and iron ion binding. In KEGG pathway analysis, the top three enriched terms included metabolic pathways, fatty acid degradation, and valine, leucine, and isoleucine degradation. In addition, we intersected the miRNA-mRNA interaction prediction results with the differentially expressed and prognostic mRNAs. We found that hsa-miR-92b-3p can be related to CPEB3 and ACADL. By overlapping the data of predicted circRNAs by circBank and differentially expressed circRNAs of GSE94508, we screened has_circ_0077210 as the upstream regulatory molecule of hsa-miR-92b-3p. Hsa_circ_0077210/hsa-miR-92b-3p/cytoplasmic polyadenylation element binding protein-3 (CPEB3) and acyl-Coenzyme A dehydrogenase, long chain (ACADL) were validated in HCC tissue.

Conclusion

Our research provides a mechanistic elucidation of the unknown ceRNA regulatory network in HCC. Hsa_circ_0077210 might serve a momentous therapeutic role to restrain the occurrence and development of HCC.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33728932,Metabolic Dyslipidemia and Cardiovascular Outcomes in Type 2 Diabetes Mellitus: Findings From the Look AHEAD Study.,"Background Metabolic dyslipidemia (high triglyceride) and low high-density lipoprotein cholesterol (HDL-C) is highly prevalent in type 2 diabetes mellitus (T2DM). The extent to which diabetes mellitus-related abnormalities in the triglyceride-HDL-C profile associates with cardiovascular disease (CVD) risk is incompletely understood. We evaluated the associations of triglyceride and HDL-C status with CVD outcomes in individuals with T2DM. Methods and Results We analyzed data from 4199 overweight/obese adults with T2DM free of CVD with available data on triglyceride and HDL-C at baseline (2001-2004) in the Look AHEAD (Action for Health in Diabetes) study. We used Cox proportional models to estimate hazard ratios (HRs) and 95% CIs of: (1) composite CVD outcome (myocardial infarction, stroke, hospitalization for angina, and/or death from cardiovascular causes); (2) coronary artery disease events; and (3) cerebrovascular accidents (stroke). Of the 4199 participants, 62% (n=2600) were women, with a mean age of 58 years (SD, 7), and 40% (n=1659) had metabolic dyslipidemia at baseline. Over a median follow-up of 9.5 years (interquartile range, 8.7-10.3), 500 participants experienced the composite CVD outcome, 396 experienced coronary artery disease events, and 100 experienced stroke. Low HDL-C was associated with higher hazards of the composite CVD outcome (HR, 1.36; 95% CI, 1.12-1.64 [P=0.002]) and coronary artery disease events (HR, 1.46; 95% CI, 1.18-1.81 [P=0.001]) but not stroke (HR, 1.38; 95% CI, 0.90-2.11 [P=0.140]). Compared with patients with normal triglyceride and normal HDL, participants with metabolic dyslipidemia had higher risks of the composite CVD outcome (HR, 1.30; 95% CI, 1.03-1.63 [P=0.025]) and coronary artery disease events (HR, 1.48; 95% CI, 1.14-1.93 [P=0.003]) but not stroke (HR, 1.23; 95% CI, 0.74-2.05 [P=0.420]). Conclusions In a large sample of overweight/obese individuals with T2DM, metabolic dyslipidemia was associated with higher risks of CVD outcomes. Our findings highlight the necessity to account for metabolic dyslipidemia in CVD risk stratification among patients with T2DM. Registration URL: https://www.lookaheadtrial.org; Unique identifier: NCT00017953.","hji,kes",0,0,0,2,0,NA,NA +33729791,Epigenetic Target Profiler: A Web Server to Predict Epigenetic Targets of Small Molecules.,"The identification of protein targets of small molecules is essential for drug discovery. With the increasing amount of chemogenomic data in the public domain, multiple ligand-based models for target prediction have emerged. However, these models are generally biased by the number of known ligands for different targets, which involves an under-representation of epigenetic targets, and despite the increasing importance of epigenetic targets in drug discovery, there are no open tools for epigenetic target prediction. In this work, we introduce Epigenetic Target Profiler (ETP), a freely accessible and easy-to-use web application for the prediction of epigenetic targets of small molecules. For a query compound, ETP predicts its bioactivity profile over a panel of 55 different epigenetic targets. To that aim, ETP uses a consensus model based on two binary classification models for each target, relying on support vector machines and built on molecular fingerprints of different design. A distance-to-model parameter related to the reliability of the predictions is included to facilitate their interpretability and assist in the identification of small molecules with potential epigenetic activity. Epigenetic Target Profiler is freely available at http://www.epigenetictargetprofiler.com.","hji,kes",0,0,0,2,0,software,NA +33735471,Update of the CLRP eye plaque brachytherapy database for photon-emitting sources.,"

Purpose

To update and extend the Carleton Laboratory for Radiotherapy Physics (CLRP) Eye Plaque (EP) dosimetry database for low-energy photon-emitting brachytherapy sources using egs_brachy, an open-source EGSnrc application. The previous database, CLRP_EPv1, contained datasets for the Collaborative Ocular Melanoma Study (COMS) plaques (10-22 mm diameter) with 103 Pd or 125 I seeds (BrachyDose-computed, 2008). The new database, CLRP_EPv2, consists of newly calculated three-dimensional (3D) dose distributions for 17 plaques [eight COMS, five Eckert & Ziegler BEBIG, and four others representative of models used worldwide] for 103 Pd, 125 I, and 131 Cs seeds.

Acquisition and validation methods

Plaque models are developed with egs_brachy, based on published/manufacturer dimensions and material data. The BEBIG plaques (modeled for the first time) are identical in dimensions to COMS plaques but differ in elemental composition and/or density. Previously benchmarked seed models are used. Eye plaques and seeds are simulated at the center of full-scatter water phantoms, scoring in (0.05 cm)3 voxels spanning the eye for scenarios: (a) ""HOMO"": simulated TG43 conditions; (b) ""HETERO"": eye plaques and seeds fully modeled; (c) ""HETsi"" (BEBIG only): one seed is active at a time with other seed geometries present but not emitting photons (inactive); summation over all i seeds in a plaque then yields ""HETsum"" (includes interseed effects). For validation, doses are compared to those from CLRP_EPv1 and published data.

Data format and access

Data are available at https://physics.carleton.ca/clrp/eye_plaque_v2, http://doi.org/10.22215/clrp/EPv2. The data consist of 3D dose distributions (text-based EGSnrc ""3ddose"" file format) and graphical presentations of the comparisons to previously published data.

Potential applications

The CLRP_EPv2 database provides accurate reference 3D dose distributions to advance ocular brachytherapy dose evaluations. The fully-benchmarked eye plaque models will be freely distributed with egs_brachy, supporting adoption of model-based dose evaluations as recommended by TG-129, TG-186, and TG-221.","hji,kes",1,0,1,2,0.5,NA,out of scope; reassessed and still no - clinical data +33750020,KnetMiner: a comprehensive approach for supporting evidence-based gene discovery and complex trait analysis across species.,"The generation of new ideas and scientific hypotheses is often the result of extensive literature and database searches, but, with the growing wealth of public and private knowledge, the process of searching diverse and interconnected data to generate new insights into genes, gene networks, traits and diseases is becoming both more complex and more time-consuming. To guide this technically challenging data integration task and to make gene discovery and hypotheses generation easier for researchers, we have developed a comprehensive software package called KnetMiner which is open-source and containerized for easy use. KnetMiner is an integrated, intelligent, interactive gene and gene network discovery platform that supports scientists explore and understand the biological stories of complex traits and diseases across species. It features fast algorithms for generating rich interactive gene networks and prioritizing candidate genes based on knowledge mining approaches. KnetMiner is used in many plant science institutions and has been adopted by several plant breeding organizations to accelerate gene discovery. The software is generic and customizable and can therefore be readily applied to new species and data types; for example, it has been applied to pest insects and fungal pathogens; and most recently repurposed to support COVID-19 research. Here, we give an overview of the main approaches behind KnetMiner and we report plant-centric case studies for identifying genes, gene networks and trait relationships in Triticum aestivum (bread wheat), as well as, an evidence-based approach to rank candidate genes under a large Arabidopsis thaliana QTL. KnetMiner is available at: https://knetminer.org.","hji,kes",0,0,0,2,0,software,NA +33753737,Fault2SHA Central Apennines database and structuring active fault data for seismic hazard assessment.,"We present a database of field data for active faults in the central Apennines, Italy, including trace, fault and main fault locations with activity and location certainties, and slip-rate, slip-vector and surface geometry data. As advances occur in our capability to create more detailed fault-based hazard models, depending on the availability of primary data and observations, it is desirable that such data can be organized in a way that is easily understood and incorporated into present and future models. The database structure presented herein aims to assist this process. We recommend stating what observations have led to different location and activity certainty and presenting slip-rate data with point location coordinates of where the data were collected with the time periods over which they were calculated. Such data reporting allows more complete uncertainty analyses in hazard and risk modelling. The data and maps are available as kmz, kml, and geopackage files with the data presented in spreadsheet files and the map coordinates as txt files. The files are available at: https://doi.org/10.1594/PANGAEA.922582 .","hji,kes",0,0,0,2,0,not bio,"not descriptive of resources, specific dataset" +33759252,MS Amanda 2.0: Advancements in the standalone implementation.,"

Rationale

Database search engines are the preferred method to identify peptides in mass spectrometry data. However, valuable software is in this context not only defined by a powerful algorithm to separate correct from false identifications, but also by constant maintenance and continuous improvements.

Methods

In 2014, we presented our peptide identification algorithm MS Amanda, showing its suitability for identifying peptides in high-resolution tandem mass spectrometry data and its ability to outperform widely used tools to identify peptides. Since then, we have continuously worked on improvements to enhance its usability and to support new trends and developments in this fast-growing field, while keeping the original scoring algorithm to assess the quality of a peptide spectrum match unchanged.

Results

We present the outcome of these efforts, MS Amanda 2.0, a faster and more flexible standalone version with the original scoring algorithm. The new implementation has led to a 3-5√ó speedup, is able to handle new ion types and supports standard data formats. We also show that MS Amanda 2.0 works best when using only the most common ion types in a particular search instead of all possible ion types.

Conclusions

MS Amanda is available free of charge from https://ms.imp.ac.at/index.php?action=msamanda.","hji,kes",0,0,0,2,0,software,NA +33763309,"pmparser and PMDB: resources for large-scale, open studies of the biomedical literature.","PubMed is an invaluable resource for the biomedical community. Although PubMed is freely available, the existing API is not designed for large-scale analyses and the XML structure of the underlying data is inconvenient for complex queries. We developed an R package called pmparser to convert the data in PubMed to a relational database. Our implementation of the database, called PMDB, currently contains data on over 31 million PubMed Identifiers (PMIDs) and is updated regularly. Together, pmparser and PMDB can enable large-scale, reproducible, and transparent analyses of the biomedical literature. pmparser is licensed under GPL-2 and available at https://pmparser.hugheylab.org. PMDB is available in both PostgreSQL (DOI 10.5281/zenodo.4008109) and Google BigQuery (https://console.cloud.google.com/bigquery?project=pmdb-bq&d=pmdb).","hji,kes",0,0,0,2,0,"remix of other data resource; reassessed, and still no. There is value add by making the data parsable, but is not a resource itself",database itself is downloadable via zenodo or Google BigQuery - not a distinct resource; reassessed and still no - not a distinct data resource +33766657,An early-morning gene network controlled by phytochromes and cryptochromes regulates photomorphogenesis pathways in Arabidopsis.,"Light perception at dawn plays a key role in coordinating multiple molecular processes and in entraining the plant circadian clock. The Arabidopsis mutant lacking the main photoreceptors, however, still shows clock entrainment, indicating that the integration of light into the morning transcriptome is not well understood. In this study, we performed a high-resolution RNA-sequencing time-series experiment, sampling every 2 min beginning at dawn. In parallel experiments, we perturbed temperature, the circadian clock, photoreceptor signaling, and chloroplast-derived light signaling. We used these data to infer a gene network that describes the gene expression dynamics after light stimulus in the morning, and then validated key edges. By sampling time points at high density, we are able to identify three light- and temperature-sensitive bursts of transcription factor activity, one of which lasts for only about 8 min. Phytochrome and cryptochrome mutants cause a delay in the transcriptional bursts at dawn, and completely remove a burst of expression in key photomorphogenesis genes (HY5 and BBX family). Our complete network is available online (http://www-users.york.ac.uk/∼de656/dawnBurst/dawnBurst.html). Taken together, our results show that phytochrome and cryptochrome signaling is required for fine-tuning the dawn transcriptional response to light, but separate pathways can robustly activate much of the program in their absence.","hji,kes",0,0,0,2,0,NA,NA +33768085,"Open, High-Resolution EI+ Spectral Library of Anthropogenic Compounds.","To address the lack of high-resolution electron ionisation mass spectral libraries (HR-[EI+]-MS) for environmental chemicals, a retention-indexed HR-[EI+]-MS library has been constructed following analysis of authentic compounds via GC-Orbitrap MS. The library is freely provided alongside a compound database of predicted physicochemical properties. Currently, the library contains over 350 compounds from 56 compound classes and includes a range of legacy and emerging contaminants. The RECETOX Exposome HR-[EI+]-MS library expands the number of freely available resources for use in full-scan chemical exposure studies and is available at: https://doi.org/10.5281/zenodo.4471217.","hji,kes",1,0,1,2,0.5,NA,no notes; reassessed and still no - not a distinct resource (Zenodo) +33770801,Progression of Postural Asymmetry in Young Adults With Cerebral Palsy Who Are Not Walking: An Exploratory Study.,"

Purpose

Young adults with cerebral palsy (CP) who are not walking are at risk of developing or increasing musculoskeletal asymmetries affecting the rib cage, spine, pelvis, and hips. This longitudinal study aimed to explore postural change using the Goldsmith Indices of Body Symmetry (GIofBS) over an 18-month period in adults with CP who are not walking.

Methods

Demographic and medical data were accessed from participant's history. Posture was recorded using the GIofBS to collect data during an 18-month period following skeletal maturity.

Results

All participants had postural asymmetry at study onset with evidence of minimal change in some GIofBS outcome measures and fluctuations in other outcomes over 18 months.

Conclusions

Physical therapists may use the GIofBS across the lifespan to screen for deterioration in musculoskeletal status or in assessing longer-term outcomes of interventions impacting posture in this complex population.

Video abstract

For more insights from the authors, see Supplemental Digital Content 1, available at: http://links.lww.com/PPT/A314.","hji,kes",0,0,0,2,0,NA,NA +33772584,Bioconductor toolchain for reproducible bioinformatics pipelines using Rcwl and RcwlPipelines.,"

Summary

The Common Workflow Language (CWL) is used to provide portable and reproducible data analysis workflows across different tools and computing environments. We have developed Rcwl, an R interface to CWL, to provide easier development, use, and maintenance of CWL pipelines from within R. We have also collected more than 100 pre-built tools and pipelines in RcwlPipelines, ready to be queried and used by researchers in their own analysis. A single-cell RNA sequencing preprocessing pipeline demonstrates use of the software.

Availability

Project website: https://rcwl.org (Rcwl: https://bioconductor.org/packages/Rcwl; RcwlPipelines: https://bioconductor.org/packages/RcwlPipelines).

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33772596,FUNGI: Fusion Gene Integration Toolset.,

Motivation

Fusion genes are both useful cancer biomarkers and important drug targets. Finding relevant fusion genes is challenging due to genomic instability resulting in a high number of passenger events. To reveal and prioritize relevant gene fusion events we have developed FUNGI (FUsionN Gene Identification toolset) that uses an ensemble of fusion detection algorithms with prioritization and visualization modules.

Results

We applied FUNGI to an ovarian cancer dataset of 107 tumor samples from 36 patients. Ten out of 11 detected and prioritized fusion genes were validated. Many of detected fusion genes affect the PI3K-AKT pathway with potential role in treatment resistance.

Availability

FUNGI and its documentation are available at https://bitbucket.org/alejandra_cervera/fungi as standalone or from Anduril at https://www.anduril.org.

Supplementary information

Supplementary data are available at Bioinformatics online.,"hji,kes",0,0,0,2,0,software,NA +33777034,TCRMatch: Predicting T-Cell Receptor Specificity Based on Sequence Similarity to Previously Characterized Receptors.,"The adaptive immune system in vertebrates has evolved to recognize non-self antigens, such as proteins expressed by infectious agents and mutated cancer cells. T cells play an important role in antigen recognition by expressing a diverse repertoire of antigen-specific receptors, which bind epitopes to mount targeted immune responses. Recent advances in high-throughput sequencing have enabled the routine generation of T-cell receptor (TCR) repertoire data. Identifying the specific epitopes targeted by different TCRs in these data would be valuable. To accomplish that, we took advantage of the ever-increasing number of TCRs with known epitope specificity curated in the Immune Epitope Database (IEDB) since 2004. We compared seven metrics of sequence similarity to determine their power to predict if two TCRs have the same epitope specificity. We found that a comprehensive k-mer matching approach produced the best results, which we have implemented into TCRMatch, an openly accessible tool (http://tools.iedb.org/tcrmatch/) that takes TCR β-chain CDR3 sequences as an input, identifies TCRs with a match in the IEDB, and reports the specificity of each match. We anticipate that this tool will provide new insights into T cell responses captured in receptor repertoire and single cell sequencing experiments and will facilitate the development of new strategies for monitoring and treatment of infectious, allergic, and autoimmune diseases, as well as cancer.","hji,kes",0,0,0,2,0,software,not descriptive of resource +33793824,Analysis of a photosynthetic cyanobacterium rich in internal membrane systems via gradient profiling by sequencing (Grad-seq).,"Although regulatory small RNAs have been reported in photosynthetic cyanobacteria, the lack of clear RNA chaperones involved in their regulation poses a conundrum. Here, we analyzed the full complement of cellular RNAs and proteins using gradient profiling by sequencing (Grad-seq) in Synechocystis 6803. Complexes with overlapping subunits such as the CpcG1-type versus the CpcL-type phycobilisomes or the PsaK1 versus PsaK2 photosystem I pre(complexes) could be distinguished, supporting the high quality of this approach. Clustering of the in-gradient distribution profiles followed by several additional criteria yielded a short list of potential RNA chaperones that include an YlxR homolog and a cyanobacterial homolog of the KhpA/B complex. The data suggest previously undetected complexes between accessory proteins and CRISPR-Cas systems, such as a Csx1-Csm6 ribonucleolytic defense complex. Moreover, the exclusive association of either RpoZ or 6S RNA with the core RNA polymerase complex and the existence of a reservoir of inactive sigma-antisigma complexes is suggested. The Synechocystis Grad-seq resource is available online at https://sunshine.biologie.uni-freiburg.de/GradSeqExplorer/ providing a comprehensive resource for the functional assignment of RNA-protein complexes and multisubunit protein complexes in a photosynthetic organism.","hji,kes",0,0,0,2,0,NA,analysis only? +33794377,Identifying Novel Drug Targets by iDTPnd: A Case Study of Kinase Inhibitors.,"Current FDA-approved kinase inhibitors cause diverse adverse effects, some of which are due to the mechanism-independent effects of these drugs. Identifying these mechanism-independent interactions could improve drug safety and support drug repurposing. We have developed iDTPnd (integrated Drug Target Predictor with negative dataset), a computational approach for large-scale discovery of novel targets for known drugs. For a given drug, we construct a positive and a negative structural signature that captures the weakly conserved structural features of drug binding sites. To facilitate assessment of unintended targets, iDTPnd also provides a docking-based interaction score and its statistical significance. We were able to confirm the interaction of sorafenib, imatinib, dasatinib, sunitinib, and pazopanib with their known targets at a sensitivity and specificity of 52% and 55%, respectively. We have validated 10 predicted novel targets by using in vitro experiments. Our results suggest that proteins other than kinases, such as nuclear receptors, cytochrome P450, or MHC Class I molecules can also be physiologically relevant targets of kinase inhibitors. Our method is general and broadly applicable for the identification of protein-small molecule interactions, when sufficient drug-target 3D data are available. The code for constructing the structural signature is available at https://sfb.kaust.edu.sa/Documents/iDTP.zip.","hji,kes",0,0,0,2,0,NA,NA +33823115,A Nationwide Study Examining Deafness Among Hospitalized Adults.,"Background It is unknown whether hospital outcomes differ among nonspeaking deaf patients compared to those without this disability. Objective This article aims to compare clinical outcomes and utilization data among patients with and without deafness. Design This study used a retrospective cohort study. Setting and Participants The participants included Nationwide Inpatient Sample, year 2017, hospitalized adults with and without diagnostic codes related to deafness and inability to speak. Method Multiple logistic and linear regression were used to compare in-hospital outcomes. Results Thirty million four hundred one thousand one hundred seventeen adults were hospitalized, and 7,180 had deafness and inability to speak related coding. Patients with deafness were older (mean age ± SEM: 59.2 ± 0.51 vs. 57.9 ± 0.09 years, p = .01), and less likely female (47.0% vs. 57.7%, p < .01) compared to controls. Those with deafness had more comorbidities compared to the controls (Charlson comorbidity score ≥ 3: 31.2% vs. 27.8%, p < .01). Mortality was higher among deaf versus controls (3.6% vs. 2.2%; p < .01); this translated into higher adjusted odds of mortality (adjusted odds ratio = 1.7. [confidence interval (CI) 1.3-2.4]; p = .01). Deaf patients had lower odds of being discharged home compared to controls {aOR} = 0.6, (CI) 0.55-0.73]; p < .01. Length of stay was longer (adjusted mean difference = 1.5 days CI [0.7-2.3]; p < .01) and hospital charges were higher, but not significantly so (adjusted mean difference = $4,193 CI [-$1,935-$10,322]; p = .18) in patients with deafness. Conclusions Hospitalized nonspeaking deaf patients had higher mortality and longer hospital stays compared to those without this condition. These results suggest that specialized attention may be warranted when deaf patients are admitted to our hospitals in hopes of reducing disparities in outcomes. Supplemental Material https://doi.org/10.23641/asha.14336663.","hji,kes",0,0,0,2,0,NA,NA +33823117,Exploring Parental Perspectives of Childhood Speech and Language Disorders Across 10 Countries: A Pilot Qualitative Study.,"Purpose Although researchers have explored parental perspectives of childhood speech and language disorders, most studies have been conducted in English-speaking countries. Little is known about parental experiences across countries, where procedures of language screening and services for language disorders differ. The authors participated in the COST 1 Action network IS1406, ""Enhancing Children's Oral Language Skills Across Europe and Beyond,"" which provided an opportunity to conduct cross-country qualitative interviews with parents. The aim of this pilot study was to explore ways in which parents construed and described speech and language disorders across countries. Method Semistructured qualitative interviews were conducted with parents from 10 families in 10 different countries. The data were analyzed using thematic analysis. Findings The overall theme was ""acknowledging parental expertise."" The parents described, in detail, ways in which their children's speech and language (dis)abilities had an impact on the children's everyday life. Three subthemes were identified: impairment, disability, and changes over time. Conclusions The findings suggest that, across a range of countries, parents demonstrated contextualized understandings of their children's speech and language (dis)abilities, along with the everyday functional implications of the disorders. Hence, despite not holding professional knowledge about language disorders, the voices, views, understandings, and personal experiences of parents in relation to their child's disorder should be listened to when planning therapy services. Supplemental Material https://doi.org/10.23641/asha.14109881.","hji,kes",0,0,0,2,0,NA,NA +33823122,Long-Term Outcomes in Intensive Care Unit Patients with Delirium: A Population-Based Cohort Study.,"

Rationale

Delirium is common in the intensive care unit (ICU) and portends worse ICU and hospital outcomes. The effect of delirium in the ICU on post-hospital discharge mortality and health resource utilization is less well known.

Objectives

To estimate mortality and health resource utilization 2.5-years post-hospital discharge in critically ill patients admitted to ICU.

Methods

Population-based, propensity-score matched, retrospective cohort study of adult patients admitted to one of fourteen medical-surgical ICUs from January 1, 2014 to June 30, 2016. Delirium was measured by the 8-point Intensive Care Delirium Screening Checklist (ICDSC). The primary outcome was mortality. The secondary outcome was a composite measure of subsequent emergency department visits, hospital readmission, or mortality.

Measurements and main results

There were 5,936 propensity score matched patients with and without incident delirium who survived to hospital discharge. Delirium was associated with increased mortality 0 to 30-days post-hospital discharge [Hazard Ratio (HR): 1.44 (95%CI: 1.08-1.92)]. There was no significant difference in mortality more than 30 days post-hospital discharge (delirium: 3.9%, no delirium: 2.6%). There was a persistent increased risk of emergency department visits, hospital readmissions, or mortality post-hospital discharge [HR: 1.12 (95%CI: 1.07-1.17)] throughout the study period.

Conclusions

ICU delirium is associated with increased mortality 0 to 30 days post-hospital discharge. This article is open access and distributed under the terms of the Creative Commons Attribution Non-Commercial No Derivatives License 4.0 (http://creativecommons.org/licenses/by-nc-nd/4.0/).","hji,kes",0,0,0,2,0,NA,NA +33826413,"Rare, Protein-Altering Variants in AS3MT and Arsenic Metabolism Efficiency: A Multi-Population Association Study.","

Background

Common genetic variation in the arsenic methyltransferase (AS3MT) gene region is known to be associated with arsenic metabolism efficiency (AME), measured as the percentage of dimethylarsinic acid (DMA%) in the urine. Rare, protein-altering variants in AS3MT could have even larger effects on AME, but their contribution to AME has not been investigated.

Objectives

We estimated the impact of rare, protein-coding variation in AS3MT on AME using a multi-population approach to facilitate the discovery of population-specific and shared causal rare variants.

Methods

We generated targeted DNA sequencing data for the coding regions of AS3MT for three arsenic-exposed cohorts with existing data on arsenic species measured in urine: Health Effects of Arsenic Longitudinal Study (HEALS, n=2,434), Strong Heart Study (SHS, n=868), and New Hampshire Skin Cancer Study (NHSCS, n=666). We assessed the collective effects of rare (allele frequency <1%), protein-altering AS3MT variants on DMA%, using multiple approaches, including a test of the association between rare allele carrier status (yes/no) and DMA% using linear regression (adjusted for common variants in 10q24.32 region, age, sex, and population structure).

Results

We identified 23 carriers of rare-protein-altering AS3MT variant across all cohorts (13 in HEALS and 5 in both SHS and NHSCS), including 6 carriers of predicted loss-of-function variants. DMA% was 6-10% lower in carriers compared with noncarriers in HEALS [β=-9.4 (95% CI: -13.9, -4.8)], SHS [β=-6.9 (95% CI: -13.6, -0.2)], and NHSCS [β=-8.7 (95% CI: -15.6, -2.2)]. In meta-analyses across cohorts, DMA% was 8.7% lower in carriers [β=-8.7 (95% CI: -11.9, -5.4)].

Discussion

Rare, protein-altering variants in AS3MT were associated with lower mean DMA%, an indicator of reduced AME. Although a small percentage of the population (0.5-0.7%) carry these variants, they are associated with a 6-10% decrease in DMA% that is consistent across multiple ancestral and environmental backgrounds. https://doi.org/10.1289/EHP8152.","hji,kes",0,0,0,2,0,NA,NA +33837660,Upregulation of ZHX2 predicts poor prognosis and is correlated with immune infiltration in gastric cancer.,"The transcriptional repressor zinc finger homeobox 2 (ZHX2) is reported to regulate tumor progression in several human cancers, although little is known about its role in gastric cancer (GC). In the present study, we investigated the expression of ZHX2 and its relationship with the clinicopathological characteristics and prognosis of GC patients, and we also examined the effect of ZHX2 overexpression in GC cell lines. We used UALCAN (http://ualcan.path.uab.edu) and the Tumor Immune Estimation Resource (http://cistrome.org/TIMER) to examine ZHX2 mRNA expression, and also used Kaplan-Meier Plotter (https://kmplot.com) to determine whether ZHX2 expression was related to GC prognosis. Expression of ZHX2 protein was detected using immunohistochemical staining assays. Cell proliferation was evaluated using a cell counting kit-8 and colony formation assays, whereas apoptosis was examined by flow cytometry. Wound healing and transwell assays were used to detect cell migration and invasion. We also performed Gene Set Enrichment Analysis (https://www.gsea-msigdb.org) and used The Cancer Genome Atlas database (https://www.genome.gov/Funded-Programs-Projects/Cancer-Genome-Atlas) to examine the correlation of ZHX2 with immune infiltration. We report that ZHX2 is highly expressed in GC tissues and is significantly associated with clinical characteristics. Upregulation of ZHX2 predicted poor prognosis in GC. Furthermore, ZHX2 overexpression can promote the proliferation, invasion and migration, but inhibit apoptosis, of GC cells. High expression of ZHX2 in GC is correlated with the presence of infiltrating immune cells, including B cells, CD4+ T cells, macrophages and dendritic cells. Our data suggest that high expression of ZHX2 in GC predicts poor prognosis. In addition, ZHX2 may promote malignant behaviors of GC cells, and immune infiltration might be related to the oncogenic role of ZHX2 in GC.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33840048,Rank-preserving biclustering algorithm: a case study on miRNA breast cancer.,"Effective biomarkers aid in the early diagnosis and monitoring of breast cancer and thus play an important role in the treatment of patients suffering from the disease. Growing evidence indicates that alteration of expression levels of miRNA is one of the principal causes of cancer. We analyze breast cancer miRNA data to discover a list of biclusters as well as breast cancer miRNA biomarkers which can help to understand better this critical disease and take important clinical decisions for treatment and diagnosis. In this paper, we propose a pattern-based parallel biclustering algorithm termed Rank-Preserving Biclustering (RPBic). The key strategy is to identify rank-preserved rows under a subset of columns based on a modified version of all substrings common subsequence (ALCS) framework. To illustrate the effectiveness of the RPBic algorithm, we consider synthetic datasets and show that RPBic outperforms relevant biclustering algorithms in terms of relevance and recovery. For breast cancer data, we identify 68 biclusters and establish that they have strong clinical characteristics among the samples. The differentially co-expressed miRNAs are found to be involved in KEGG cancer related pathways. Moreover, we identify frequency-based biomarkers (hsa-miR-410, hsa-miR-483-5p) and network-based biomarkers (hsa-miR-454, hsa-miR-137) which we validate to have strong connectivity with breast cancer. The source code and the datasets used can be found at http://agnigarh.tezu.ernet.in/~rosy8/Bioinformatics_RPBic_Data.rar . Graphical Abstract.","hji,kes",0,0,0,2,0,NA,NA +33850871,Development and validation of a prediction model for lung adenocarcinoma based on RNA-binding protein.,"

Background

RNA-binding proteins (RBPs) have been found to participate in the development and progression of cancer. This present study aimed to construct a RBP-based prognostic prediction model for lung adenocarcinoma (LUAD).

Methods

RNA sequencing data and corresponding clinical information were acquired from The Cancer Genome Atlas (TCGA) and served as a training set. The prediction model was validated using the dataset in Gene Expression Omnibus (GEO) databases. Univariate and multivariate Cox regression analyses were conducted to identify the RBPs associated with survival. R software (http://www.r-project.org) was used for analysis in this study.

Results

Nine hub prognostic RBPs (CIRBP, DARS2, DDX24, GAPDH, LARP6, SNRPE, WDR3, ZC3H12C, ZC3H12D) were identified by univariate Cox regression analysis and multivariate Cox regression analysis. Using a risk score based on the nine-hub RBP model, we separated the LUAD patients into a low-risk group and a high-risk group. The outcomes revealed that patients in the high-risk group had poorer survival than those in the low-risk group. This signature was validated in the GEO database. Further study revealed that the risk score can be an independent prognostic biomarker for LUAD. A nomogram based on the nine hub RBPs was built to quantitatively predict the prognosis of LUAD patients.

Conclusions

Our nine-gene signature model could be used as a marker to predict the prognosis of LUAD and has potential for use in treatment individualization.","hji,kes",0,0,0,2,0,NA,NA +33854526,GeenaR: A Web Tool for Reproducible MALDI-TOF Analysis.,"Mass spectrometry is a widely applied technology with a strong impact in the proteomics field. MALDI-TOF is a combined technology in mass spectrometry with many applications in characterizing biological samples from different sources, such as the identification of cancer biomarkers, the detection of food frauds, the identification of doping substances in athletes' fluids, and so on. The massive quantity of data, in the form of mass spectra, are often biased and altered by different sources of noise. Therefore, extracting the most relevant features that characterize the samples is often challenging and requires combining several computational methods. Here, we present GeenaR, a novel web tool that provides a complete workflow for pre-processing, analyzing, visualizing, and comparing MALDI-TOF mass spectra. GeenaR is user-friendly, provides many different functionalities for the analysis of the mass spectra, and supports reproducible research since it produces a human-readable report that contains function parameters, results, and the code used for processing the mass spectra. First, we illustrate the features available in GeenaR. Then, we describe its internal structure. Finally, we prove its capabilities in analyzing oncological datasets by presenting two case studies related to ovarian cancer and colorectal cancer. GeenaR is available at http://proteomics.hsanmartino.it/geenar/.","hji,kes",0,0,0,2,0,software,NA +33855981,Competence assessment of the clinical tutor: a multicentric observational study.,"

Background and aim of the study

In the international literature there are no validated tools which investigate clinical tutors' skills. The main objective of the study has been to describe the clinical nurse tutor's skills, required to properly train nursing students during their educational path.

Methods

In this observational study a non-probability sampling has been used. The study was led in two centers: the AOUPR of Parma and the AUSL of Parma, after obtaining the favorable opinion from the Ethics Committee of the Northern Emilia Large Section. The data have been collected by using a structured and self-given survey that investigated three areas. Each item has a 4-point Likert scale, in which 1 indicates ""for nothing"" and 4 ""very much"". The data have been analyzed with the statistical software IBM SPSS v.26 ® and with the open-source statistical software Jamovi v.1.6.9 (https://www.jamovi.org.). The number of factors in the original model was reduced using several established research steps and then evaluated for data quality and construct validity using principal component analysis and confimatory factor analysis.

Results

Among 397 administered questionnaires, only 300, which were considered valid, have been filled. The psychometric properties of the investigation tool turned out to be good in all the areas analyzed with a Cronbach alpha higher than 0.70. The extensive process resulted in a version with 4 factors.

Conclusions

Nurses' answers have allowed to draw the required profile of the clinical tutors in the different organizational contexts. The results can target possible training proposals to create opportunities for the clinical tutors.","hji,kes",0,0,0,2,0,NA,NA +33858322,ChIP-BIT2: a software tool to detect weak binding events using a Bayesian integration approach.,"

Background

ChIP-seq combines chromatin immunoprecipitation assays with sequencing and identifies genome-wide binding sites for DNA binding proteins. While many binding sites have strong ChIP-seq 'peak' observations and are well captured, there are still regions bound by proteins weakly, with a relatively low ChIP-seq signal enrichment. These weak binding sites, especially those at promoters and enhancers, are functionally important because they also regulate nearby gene expression. Yet, it remains a challenge to accurately identify weak binding sites in ChIP-seq data due to the ambiguity in differentiating these weak binding sites from the amplified background DNAs.

Results

ChIP-BIT2 ( http://sourceforge.net/projects/chipbitc/ ) is a software package for ChIP-seq peak detection. ChIP-BIT2 employs a mixture model integrating protein and control ChIP-seq data and predicts strong or weak protein binding sites at promoters, enhancers, or other genomic locations. For binding sites at gene promoters, ChIP-BIT2 simultaneously predicts their target genes. ChIP-BIT2 has been validated on benchmark regions and tested using large-scale ENCODE ChIP-seq data, demonstrating its high accuracy and wide applicability.

Conclusion

ChIP-BIT2 is an efficient ChIP-seq peak caller. It provides a better lens to examine weak binding sites and can refine or extend the existing binding site collection, providing additional regulatory regions for decoding the mechanism of gene expression regulation.","hji,kes",0,0,0,2,0,software,NA +33863373,IrGO: Iranian traditional medicine General Ontology and knowledge base.,"

Background

Iranian traditional medicine, also known as Persian Medicine, is a holistic school of medicine with a long prolific history. It describes numerous concepts and the relationships between them. However, no unified language system has been proposed for the concepts of this medicine up to the present time. Considering the extensive terminology in the numerous textbooks written by the scholars over centuries, comprehending the totality of concepts is obviously a very challenging task. To resolve this issue, overcome the obstacles, and code the concepts in a reusable manner, constructing an ontology of the concepts of Iranian traditional medicine seems a necessity.

Construction and content

Makhzan al-Advieh, an encyclopedia of materia medica compiled by Mohammad Hossein Aghili Khorasani, was selected as the resource to create an ontology of the concepts used to describe medicinal substances. The steps followed to accomplish this task included (1) compiling the list of classes via examination of textbooks, and text mining the resource followed by manual review to ensure comprehensiveness of extracted terms; (2) arranging the classes in a taxonomy; (3) determining object and data properties; (4) specifying annotation properties including ID, labels (English and Persian), alternative terms, and definitions (English and Persian); (5) ontology evaluation. The ontology was created using Protégé with adherence to the principles of ontology development provided by the Open Biological and Biomedical Ontology (OBO) foundry.

Utility and discussion

The ontology was finalized with inclusion of 3521 classes, 15 properties, and 20,903 axioms in the Iranian traditional medicine General Ontology (IrGO) database, freely available at http://ir-go.net/ . An indented list and an interactive graph view using WebVOWL were used to visualize the ontology. All classes were linked to their instances in UNaProd database to create a knowledge base of ITM materia medica.

Conclusion

We constructed an ontology-based knowledge base of ITM concepts in the domain of materia medica to help offer a shared and common understanding of this concept, enable reuse of the knowledge, and make the assumptions explicit. This ontology will aid Persian medicine practitioners in clinical decision-making to select drugs. Extending IrGO will bridge the gap between traditional and conventional schools of medicine, helping guide future research in the process of drug discovery.","hji,kes",0,0,0,2,0,NA,out of scope +33877858,Developing a Flexible National Wastewater Surveillance System for COVID-19 and Beyond.,"

Background

Wastewater testing offers a cost-effective strategy for measuring population disease prevalence and health behaviors. For COVID-19, wastewater surveillance addresses testing gaps and provides an early warning for outbreaks. As U.S. federal agencies build a National Wastewater Surveillance System around the pandemic, thinking through ways to develop flexible frameworks for wastewater sampling, testing, and reporting can avoid unnecessary system overhauls for future infectious disease, chronic disease, and drug epidemics.

Objectives

We discuss ways to transform a historically academic exercise into a tool for epidemic response. We generalize lessons learned by a global network of wastewater researchers around validation and implementation for COVID-19 and opioids while also drawing on our experience with wastewater-based epidemiology in the United States.

Discussion

Sustainable wastewater surveillance requires coordination between health and safety officials, utilities, labs, and researchers. Adapting sampling frequency, type, and location to threat level, community vulnerability, biomarker properties, and decisions that wastewater data will inform can increase the practical value of the data. Marketplace instabilities, coupled with a fragmented testing landscape due to specialization, may require officials to engage multiple labs to test for known and unknown threats. Government funding can stabilize the market, balancing commercial pressures with public good, and incentivize data sharing. When reporting results, standardizing metrics and contextualizing wastewater data with health resource data can provide insights into a community's vulnerability and identify strategies to prevent health care systems from being overwhelmed. If wastewater data will inform policy decisions for an entire community, comparing characteristics of the wastewater treatment plant's service population to those of the larger community can help determine whether the wastewater data are generalizable. Ethical protocols may be needed to protect privacy and avoid stigmatization. With data-driven approaches to sample collection, analysis, and interpretation, officials can use wastewater surveillance for adaptive resource allocation, pandemic management, and program evaluation. https://doi.org/10.1289/EHP8572.","hji,kes",0,0,0,2,0,NA,NA +33882120,APICURON: a database to credit and acknowledge the work of biocurators.,"APICURON is an open and freely accessible resource that tracks and credits the work of biocurators across multiple participating knowledgebases. Biocuration is essential to extract knowledge from research data and make it available in a structured and standardized way to the scientific community. However, processing biological data-mainly from literature-requires a huge effort that is difficult to attribute and quantify. APICURON collects biocuration events from third-party resources and aggregates this information, spotlighting biocurator contributions. APICURON promotes biocurator engagement implementing gamification concepts like badges, medals and leaderboards and at the same time provides a monitoring service for registered resources and for biocurators themselves. APICURON adopts a data model that is flexible enough to represent and track the majority of biocuration activities. Biocurators are identified through their Open Researcher and Contributor ID. The definition of curation events, scoring systems and rules for assigning badges and medals are resource-specific and easily customizable. Registered resources can transfer curation activities on the fly through a secure and robust Application Programming Interface (API). Here, we show how simple and effective it is to connect a resource to APICURON, describing the DisProt database of intrinsically disordered proteins as a use case. We believe APICURON will provide biological knowledgebases with a service to recognize and credit the effort of their biocurators, monitor their activity and promote curator engagement. Database URL: https://apicuron.org.","hji,kes",0,0,0,2,0,NA,not life sci data +33882829,Gene-set distance analysis (GSDA): a powerful tool for gene-set association analysis.,"

Background

Identifying sets of related genes (gene sets) that are empirically associated with a treatment or phenotype often yields valuable biological insights. Several methods effectively identify gene sets in which individual genes have simple monotonic relationships with categorical, quantitative, or censored event-time variables. Some distance-based methods, such as distance correlations, may detect complex non-monotone associations of a gene-set with a quantitative variable that elude other methods. However, the distance correlations have yet to be generalized to associate gene-sets with categorical and censored event-time endpoints. Also, there is a need to determine which genes empirically drive the significance of an association of a gene set with an endpoint.

Results

We develop gene-set distance analysis (GSDA) by generalizing distance correlations to evaluate the association of a gene set with categorical and censored event-time variables. We also develop a backward elimination procedure to identify a subset of genes that empirically drive significant associations. In simulation studies, GSDA more effectively identified complex non-monotone gene-set associations than did six other published methods. In the analysis of a pediatric acute myeloid leukemia (AML) data set, GSDA was the only method to discover that event-free survival (EFS) was associated with the 56-gene AML pathway gene-set, narrow that result down to 5 genes, and confirm the association of those 5 genes with EFS in a separate validation cohort. These results indicate that GSDA effectively identifies and characterizes complex non-monotonic gene-set associations that are missed by other methods.

Conclusion

GSDA is a powerful and flexible method to detect gene-set association with categorical, quantitative, or censored event-time variables, especially to detect complex non-monotonic gene-set associations. Available at https://CRAN.R-project.org/package=GSDA .","hji,kes",0,0,0,2,0,software,NA +33890543,The latest development of the DELAD project for sharing corpora of speech disorders.,"Corpora of speech of individuals with communication disorders (CSD) are invaluable resources for education and research, but they are costly and hard to build and difficult to share for various reasons. DELAD, which means 'shared' in Swedish, is a project initiated by Professors Nicole Müller and Martin Ball in 2015 that aims to address this issue by establishing a platform for researchers to share datasets of speech disorders with interested audiences. To date four workshops have been held, where selected participants, covering various expertise including researchers in clinical phonetics and linguistics, speech and language therapy, infrastructure specialists, and ethics and legal specialists, participated to discuss relevant issues in setting up such an archive. Positive and steady progress has been made since 2015, including refurbishing the DELAD website (http://delad.net/) with information and application forms for researchers to join and share their datasets and linking with the CLARIN K-Centre for Atypical Communication Expertise (https://ace.ruhosting.nl/) where CSD can be hosted and accessed through the CLARIN B-Centres, The Language Archive (https://tla.mpi.nl/tools/tla-tools/) and TalkBank (https://talkbank.org/). The latest workshop, which was funded by CLARIN (Common Language Resources and Technology Infrastructure) was held as an online event in January 2021 on topics including Data Protection Impact Assessments, reviewing changes in ethics perspectives in academia on sharing CSD, and voice conversion as a mean to pseudonomise speech. This paper reports the latest progress of DELAD and discusses the directions for further advance of the initiative, with information on how researchers can contribute to the repository.","hji,kes",0,0,0,2,0,"databases are external, this provides an interface",not life sci +33901273,Protlego: A Python package for the analysis and design of chimeric proteins.,"

Motivation

Duplication and recombination of protein fragments have led to the highly diverse protein space that we observe today. By mimicking this natural process, the design of protein chimeras via fragment recombination has proven experimentally successful and has opened a new era for the design of customizable proteins. The in-silico building of structural models for these chimeric proteins, however, remains a manual task that requires a considerable degree of expertise and is not amenable for high-throughput studies. Energetic and structural analysis of the designed proteins often require the use of several tools, each with their unique technical difficulties and available in different programming languages or web servers.

Results

We implemented a Python package that enables automated, high-throughput design of chimeras and their structural analysis. First, it fetches evolutionarily conserved fragments from a built-in database (also available at fuzzle.uni-bayreuth.de). These relationships can then be represented via networks or further selected for chimera construction via recombination. Designed chimeras or natural proteins are then scored and minimised with the Charmm and Amber forcefields and their diverse structural features can be analysed at ease. Here, we showcase Protlego's pipeline by exploring the relationships between the P-loop and Rossmann superfolds, building and characterising their offspring chimeras. We believe that Protlego provides a powerful new tool for the protein design community.

Availability and implementation

Protlego runs on the Linux platform and is freely available at (https://hoecker-lab.github.io/protlego/) with tutorials and documentation and runs on Linux OS.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33903179,Effectiveness of intravenous r-tPA versus UK for acute ischaemic stroke: a nationwide prospective Chinese registry study.,"

Background

Intravenous recombinant tissue plasminogen activator (r-tPA) and urokinase (UK) are both recommended for the treatment of acute ischaemic stroke (AIS) in China, but with few comparative outcome data being available. We aimed to compare the outcomes of these two thrombolytic agents for the treatment of patients within 4.5‚Äâhours of onset of AIS in routine clinical practice in China.

Methods

A pre-planned, prospective, nationwide, multicentre, real-world registry of consecutive patients with AIS (age ‚â•18 years) who received r-tPA or UK within 4.5‚Äâhours of symptom onset according to local decision-making and guideline recommendations during 2017-2019. The primary effectiveness outcome was the proportion of patients with an excellent functional outcome (defined by modified Rankin scale scores 0 to 1) at 90 days. The key safety endpoint was symptomatic intracranial haemorrhage according to standard definitions. Multivariable logistic regression was used for comparative analysis, with adjustment according to propensity scores to ensure balance in baseline characteristics.

Results

Overall, 4130 patients with AIS were registered but 320 had incomplete or missing data, leaving 3810 with available data for analysis of whom 2666 received r-tPA (median dose 0.88 (IQR 0.78-0.90)‚Äâmg/kg) and 1144 received UK (1.71 (1.43-2.00)√ó104 international unit per kilogram). There were several significant intergroup differences in patient characteristics: r-tPA patients were more educated, had less history of stroke, lower systolic blood pressure, greater neurological impairment and shorter treatment times from symptom onset than UK patients. However, in adjusted analysis, the frequency of excellent outcome (OR 1.18, 95% CI 1.00 to 1.40, p=0.052) and symptomatic intracranial haemorrhage (OR 0.70, 95%‚ÄâCI 0.33 to 1.47, p=0.344) were similar between groups.

Conclusions

UK may be as effective and carry a similar safety profile as r-tPA in treating mild to moderate AIS within guidelines in China. REGISTRATION: http://www.clinicaltrials.gov. unique identifier: NCT02854592.","hji,kes",0,0,0,2,0,NA,NA +33906374,Genetic Causes of Cardiomyopathy in Children: First Results From the Pediatric Cardiomyopathy Genes Study.,"Background Pediatric cardiomyopathy is a genetically heterogeneous disease with substantial morbidity and mortality. Current guidelines recommend genetic testing in children with hypertrophic, dilated, or restrictive cardiomyopathy, but practice variations exist. Robust data on clinical testing practices and diagnostic yield in children are lacking. This study aimed to identify the genetic causes of cardiomyopathy in children and to investigate clinical genetic testing practices. Methods and Results Children with familial or idiopathic cardiomyopathy were enrolled from 14 institutions in North America. Probands underwent exome sequencing. Rare sequence variants in 37 known cardiomyopathy genes were assessed for pathogenicity using consensus clinical interpretation guidelines. Of the 152 enrolled probands, 41% had a family history of cardiomyopathy. Of 81 (53%) who had undergone clinical genetic testing for cardiomyopathy before enrollment, 39 (48%) had a positive result. Genetic testing rates varied from 0% to 97% between sites. A positive family history and hypertrophic cardiomyopathy subtype were associated with increased likelihood of genetic testing (P=0.005 and P=0.03, respectively). A molecular cause was identified in an additional 21% of the 63 children who did not undergo clinical testing, with positive results identified in both familial and idiopathic cases and across all phenotypic subtypes. Conclusions A definitive molecular genetic diagnosis can be made in a substantial proportion of children for whom the cause and heritable nature of their cardiomyopathy was previously unknown. Practice variations in genetic testing are great and should be reduced. Improvements can be made in comprehensive cardiac screening and predictive genetic testing in first-degree relatives. Overall, our results support use of routine genetic testing in cases of both familial and idiopathic cardiomyopathy. Registration URL: https://www.clinicaltrials.gov; Unique identifier: NCT01873963.","hji,kes",0,0,0,2,0,NA,NA +33907838,Demetra Application: An integrated genotype analysis web server for clinical genomics in endometriosis.,"Demetra Application is a holistic integrated and scalable bioinformatics web‚Äëbased tool designed to assist medical experts and researchers in the process of diagnosing endometriosis. The application identifies the most prominent gene variants and single nucleotide polymorphisms (SNPs) causing endometriosis using the genomic data provided for the patient by a medical expert. The present study analyzed >28.000 endometriosis‚Äërelated publications using data mining and semantic techniques aimed towards extracting the endometriosis‚Äërelated genes and SNPs. The extracted knowledge was filtered, evaluated, annotated, classified, and stored in the Demetra Application Database (DAD). Moreover, an updated gene regulatory network with the genes implements in endometriosis was established. This was followed by the design and development of the Demetra Application, in which the generated datasets and results were included. The application was tested and presented herein with whole‚Äëexome sequencing data from seven related patients with endometriosis. Endometriosis‚Äërelated SNPs and variants identified in genome‚Äëwide association studies (GWAS), whole‚Äëgenome (WGS), whole‚Äëexome (WES), or targeted sequencing information were classified, annotated and analyzed in a consolidated patient profile with clinical significance information. Probable genes associated with the patient's genomic profile were visualized using several graphs, including chromosome ideograms, statistic bars and regulatory networks through data mining studies with relative publications, in an effort to obtain a representative number of the most credible candidate genes and biological pathways associated with endometriosis. An evaluation analysis was performed on seven patients from a three‚Äëgeneration family with endometriosis. All the recognized gene variants that were previously considered to be associated with endometriosis were properly identified in the output profile per patient, and by comparing the results, novel findings emerged. This novel and accessible webserver tool of endometriosis to assist medical experts in the clinical genomics and precision medicine procedure is available at http://geneticslab.aua.gr/.","hji,kes",0,0,0,2,0,NA,all applications +33939828,PE-Designer and PE-Analyzer: web-based design and analysis tools for CRISPR prime editing.,"Prime editing technology is capable of generating targeted insertions, deletions, and base conversions. However, the process of designing prime editing guide RNAs (pegRNAs), which contain a primer binding site and a reverse-transcription template at the 3' end, is more complex than that for the single guide RNAs used with CRISPR nucleases or base editors. Furthermore, the assessment of high-throughput sequencing data after prime editors (PEs) have been employed should consider the unique feature of PEs; thus, pre-existing assessment tools cannot directly be adopted for PEs. Here, we present two user-friendly web-based tools for PEs, named PE-Designer and PE-Analyzer. PE-Designer, a dedicated tool for pegRNA selection, provides all possible target sequences, pegRNA extension sequences, and nicking guide RNA sequences together with useful information, and displays the results in an interactive image. PE-Analyzer, a dedicated tool for PE outcome analysis, accepts high-throughput sequencing data, summarizes mutation-related information in a table, and provides interactive graphs. PE-Analyzer was mainly written using JavaScript so that it can analyze several data sets without requiring that huge sequencing data (>100MB) be uploaded to the server, reducing analysis time and increasing personal security. PE-Designer and PE-Analyzer are freely available at http://www.rgenome.net/pe-designer/ and http://www.rgenome.net/pe-analyzer/ without a login process.","hji,kes",0,0,0,2,0,software,NA +33941415,Carotid Ultrasound Boundary Study (CUBS): An Open Multicenter Analysis of Computerized Intima-Media Thickness Measurement Systems and Their Clinical Impact.,"Common carotid intima-media thickness (CIMT) is a commonly used marker for atherosclerosis and is often computed in carotid ultrasound images. An analysis of different computerized techniques for CIMT measurement and their clinical impacts on the same patient data set is lacking. Here we compared and assessed five computerized CIMT algorithms against three expert analysts' manual measurements on a data set of 1088 patients from two centers. Inter- and intra-observer variability was assessed, and the computerized CIMT values were compared with those manually obtained. The CIMT measurements were used to assess the correlation with clinical parameters, cardiovascular event prediction through a generalized linear model and the Kaplan-Meier hazard ratio. CIMT measurements obtained with a skilled analyst's segmentation and the computerized segmentation were comparable in statistical analyses, suggesting they can be used interchangeably for CIMT quantification and clinical outcome investigation. To facilitate future studies, the entire data set used is made publicly available for the community at http://dx.doi.org/10.17632/fpv535fss7.1.","hji,kes",0,0,0,2,0,curated dataset; reassessed I would still want to say yes based on abstract. But Heidi's point of Mendeley is valid,"mendely; reassessed and still no - in Mendely data, not a distinct data resource" +33942461,The hop downy mildew pathogen Pseudoperonospora humuli.,"Pseudoperonospora humuli is an obligate biotrophic oomycete that causes downy mildew, one of the most devastating diseases of cultivated hop, Humulus lupulus. Downy mildew occurs in all production areas of the crop in the Northern Hemisphere and Argentina. The pathogen overwinters in hop crowns and roots, and causes considerable crop loss. Downy mildew is managed by sanitation practices, planting of resistant cultivars, and fungicide applications. However, the scarcity of sources of host resistance and fungicide resistance in pathogen populations complicates disease management. This review summarizes the current knowledge on the symptoms of the disease, life cycle, virulence factors, and management of hop downy mildew, including various forecasting systems available in the world. Additionally, recent developments in genomics and effector discovery, and the future prospects of using such resources in successful disease management are also discussed.

Taxonomy

Class: Oomycota; Order: Peronosporales; Family: Peronosporaceae; Genus: Pseudoperonospora; Species: Pseudoperonospora humuli.

Disease symptoms

The disease is characterized by systemically infected chlorotic shoots called ""spikes"". Leaf symptoms and signs include angular chlorotic lesions and profuse sporulation on the abaxial side of the leaf. Under severe disease pressure, dark brown discolouration or lesions are observed on cones. Infected crowns have brown to black streaks when cut open. Cultivars highly susceptible to crown rot may die at this phase of the disease cycle without producing shoots. However, foliar symptoms may not be present on plants with systemically infected root systems.

Infection process

Pathogen mycelium overwinters in buds and crowns, and emerges on infected shoots in spring. Profuse sporulation occurs on infected tissues and sporangia are released and dispersed by air currents. Under favourable conditions, sporangia germinate and produce biflagellate zoospores that infect healthy tissue, thus perpetuating the infection cycle. Though oospores are produced in infected tissues, their role in the infection cycle is not defined.

Control

Downy mildew on hop is managed by a combination of sanitation practices and timely fungicide applications. Forecasting systems are used to time fungicide applications for successful management of the disease. USEFUL WEBSITES: https://content.ces.ncsu.edu/hop-downy-mildew (North Carolina State University disease factsheet), https://www.canr.msu.edu/resources/michigan-hop-management-guide (Michigan Hop Management Guide), http://uspest.org/risk/models (Oregon State University Integrated Plant Protection Center degree-day model for hop downy mildew), https://www.usahops.org/cabinet/data/Field-Guide.pdf (Field Guide for Integrated Pest Management in Hops).","hji,kes",0,0,0,2,0,NA,NA +33950258,LipidSuite: interactive web server for lipidomics differential and enrichment analysis.,"Advances in mass spectrometry enabled high throughput profiling of lipids but differential analysis and biological interpretation of lipidomics datasets remains challenging. To overcome this barrier, we present LipidSuite, an end-to-end differential lipidomics data analysis server. LipidSuite offers a step-by-step workflow for preprocessing, exploration, differential analysis and enrichment analysis of untargeted and targeted lipidomics. Three lipidomics data formats are accepted for upload: mwTab file from Metabolomics Workbench, Skyline CSV Export, and a numerical matrix. Experimental variables to be used in analysis are uploaded in a separate file. Conventional lipid names are automatically parsed to enable lipid class and chain length analyses. Users can interactively explore data, choose subsets based on sample types or lipid classes or characteristics, and conduct univariate, multivariate and unsupervised analyses. For complex experimental designs and clinical cohorts, LipidSuite offers confounding variables adjustment. Finally, data tables and plots can be both interactively viewed or downloaded for publication or reports. Overall, we anticipate this free, user-friendly webserver to facilitate differential lipidomics data analysis and re-analysis, and fully harness biological interpretation from lipidomics datasets. LipidSuite is freely available at http://suite.lipidr.org.","hji,kes",0,0,0,2,0,software,NA +33953926,Rapid response to emerging biomedical challenges and threats.,"As part of the global mobilization to combat the present pandemic, almost 100‚ÄÖ000 COVID-19-related papers have been published and nearly a thousand models of macromolecules encoded by SARS-CoV-2 have been deposited in the Protein Data Bank within less than a year. The avalanche of new structural data has given rise to multiple resources dedicated to assessing the correctness and quality of structural data and models. Here, an approach to evaluate the massive amounts of such data using the resource https://covid19.bioreproducibility.org is described, which offers a template that could be used in large-scale initiatives undertaken in response to future biomedical crises. Broader use of the described methodology could considerably curtail information noise and significantly improve the reproducibility of biomedical research.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33956141,CeLaVi: an interactive cell lineage visualization tool.,"Recent innovations in genetics and imaging are providing the means to reconstruct cell lineages, either by tracking cell divisions using live microscopy, or by deducing the history of cells using molecular recorders. A cell lineage on its own, however, is simply a description of cell divisions as branching events. A major goal of current research is to integrate this description of cell relationships with information about the spatial distribution and identities of the cells those divisions produce. Visualizing, interpreting and exploring these complex data in an intuitive manner requires the development of new tools. Here we present CeLaVi, a web-based visualization tool that allows users to navigate and interact with a representation of cell lineages, whilst simultaneously visualizing the spatial distribution, identities and properties of cells. CeLaVi's principal functions include the ability to explore and manipulate the cell lineage tree; to visualise the spatial distribution of cell clones at different depths of the tree; to colour cells in the 3D viewer based on lineage relationships; to visualise various cell qualities on the 3D viewer (e.g. gene expression, cell type) and to annotate selected cells/clones. All these capabilities are demonstrated with four different example data sets. CeLaVi is available at http://www.celavi.pro.","hji,kes",0,0,0,2,0,software,NA +33961050,GEInter: an R package for robust gene-environment interaction analysis.,"

Summary

For understanding complex diseases, gene-environment (G-E) interactions have important implications beyond main G and E effects. Most of the existing analysis approaches and software packages cannot accommodate data contamination/long-tailed distribution. We develop GEInter, a comprehensive R package tailored to robust G-E interaction analysis. For both marginal and joint analysis, for data without and with missingness, for continuous and censored survival responses, it comprehensively conducts identification, estimation, visualization, and prediction. It can fill an important gap in the existing literature and enjoy broad applicability.

Availability and implementation

https://cran.r-project.org/web/packages/GEInter/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33961960,Do not attempt cardiopulmonary resuscitation (DNACPR) decisions in people admitted with suspected COVID-19: Secondary analysis of the PRIEST observational cohort study.,"

Aims

We aimed to describe the characteristics and outcomes of adults admitted to hospital with suspected COVID-19 according to their DNACPR decisions, and identify factors associated with DNACPR decisions.

Methods

We undertook a secondary analysis of 13,977 adults admitted to hospital with suspected COVID-19 and included in the Pandemic Respiratory Infection Emergency System Triage (PRIEST) study. We recorded presenting characteristics and outcomes (death or organ support) up to 30 days. We categorised patients as early DNACPR (before or on the day of admission) or late/no DNACPR (no DNACPR or occurring after the day of admission). We undertook descriptive analysis comparing these groups and multivariable analysis to identify independent predictors of early DNACPR.

Results

We excluded 1249 with missing DNACPR data, and identified 3929/12748 (31%) with an early DNACPR decision. They had higher mortality (40.7% v 13.1%) and lower use of any organ support (11.6% v 15.7%), but received a range of organ support interventions, with some being used at rates comparable to those with late or no DNACPR (e.g. non-invasive ventilation 4.4% v 3.5%). On multivariable analysis, older age (p < 0.001), active malignancy (p < 0.001), chronic lung disease (p < 0.001), limited performance status (p < 0.001), and abnormal physiological variables were associated with increased recording of early DNACPR. Asian ethnicity was associated with reduced recording of early DNACPR (p = 0.001).

Conclusions

Early DNACPR decisions were associated with recognised predictors of adverse outcome, and were inversely associated with Asian ethnicity. Most people with an early DNACPR decision survived to 30 days and many received potentially life-saving interventions.

Registration

ISRCTN registry, ISRCTN28342533, http://www.isrctn.com/ISRCTN28342533.","hji,kes",0,0,0,2,0,NA,NA +33963730,Correlates of serum IGF-1 in young children with moderate acute malnutrition: a cross-sectional study in Burkina Faso.,"

Background

Serum insulin-like growth factor 1 (sIGF-1) is an important growth factor in childhood. However, studies on sIGF-1 among children from low-income countries are few, and the role of body composition is unknown.

Objectives

To assess the associations of anthropometry, body composition, inflammation, and breastfeeding with sIGF-1 among children with moderate acute malnutrition (MAM).

Methods

A cross-sectional study based on admission data from 6- to 23-mo-old children with MAM participating in a nutrition intervention trial (Treatfood) in Burkina Faso. Linear regression analysis was used to identify correlates of sIGF-1.

Results

Among 1546 children, the median (IQR) sIGF-1 was 12 (8.2-18.3) ng/mL. sIGF-1 was highest at 6 mo, with a nadir ∼10-11 mo, and higher in girls than boys. Length-for-age z score (LAZ), weight-for-length z score (WLZ), and midupper arm circumference were positively associated with sIGF-1 (P ≤ 0.001). Fat-free mass (FFM) was also positively associated, as sIGF-1 increased 1.5 (95% CI: 0.5, 2.5) ng/mL for each 1-kg increase in FFM. However, the association disappeared after adjustment for height. Elevated serum C-reactive protein and α1-acid glycoprotein were negatively associated with sIGF-1 (P ≤ 0.001), as was fever (P < 0.001) but not a positive malaria test per se (P = 0.15). Children never breastfed had lower sIGF-1 (-5.1; 95% CI: -9.8, -0.3).

Conclusions

LAZ and WLZ were positively and inflammation negatively associated with sIGF-1. As all children were moderately malnourished and many had inflammation, this probably explains the very low median sIGF-1. The association of FFM with sIGF-1 was fully explained by height. There was a marked age pattern, with a nadir in late infancy, confirming findings from smaller studies from well-nourished populations. There is a need for prospective studies to disentangle the role of sIGF-1 in growth and health. This trial was registered at https://www.isrctn.com as ISRCTN42569496.","hji,kes",0,0,0,2,0,NA,NA +33963869,ProteoSign v2: a faster and evolved user-friendly online tool for statistical analyses of differential proteomics.,"Bottom-up proteomics analyses have been proved over the last years to be a powerful tool in the characterization of the proteome and are crucial for understanding cellular and organism behaviour. Through differential proteomic analysis researchers can shed light on groups of proteins or individual proteins that play key roles in certain, normal or pathological conditions. However, several tools for the analysis of such complex datasets are powerful, but hard-to-use with steep learning curves. In addition, some other tools are easy to use, but are weak in terms of analytical power. Previously, we have introduced ProteoSign, a powerful, yet user-friendly open-source online platform for protein differential expression/abundance analysis designed with the end-proteomics user in mind. Part of Proteosign's power stems from the utilization of the well-established Linear Models For Microarray Data (LIMMA) methodology. Here, we present a substantial upgrade of this computational resource, called ProteoSign v2, where we introduce major improvements, also based on user feedback. The new version offers more plot options, supports additional experimental designs, analyzes updated input datasets and performs a gene enrichment analysis of the differentially expressed proteins. We also introduce the deployment of the Docker technology and significantly increase the speed of a full analysis. ProteoSign v2 is available at http://bioinformatics.med.uoc.gr/ProteoSign.","hji,kes",0,0,0,2,0,software,NA +33964156,ptm: an R package for the study of methionine sulfoxidation and other post-translational modifications.,"

Summary

Methionine sulfoxidation is a post-translational modification playing important roles in cell signaling. Herein, we present ptm, an R package for the study of this modification. However, since many of the analyses applied to methionine modification can be extended to other modifications, the package can be useful to thoroughly analyze post-translational modifications in general. Thus, within a single software environment ptm can integrate information from up to 11 databases covering 9 modifications. Different functions can work coordinately to form pipelines allowing the programmatic analysis of thousands of proteins. Alternatively, the user can simultaneously perform different analyses on the same protein of interest, combining the results in a single output. The flexibility of ptm makes it a suitable tool to address site- and protein-centric hypotheses related to post-translational modifications. Accompanying the package we maintain a web page containing extended documentation and examples of the tasks that can be performed with ptm.

Availability and implementation

ptm is implemented in R. Release versions are available via CRAN and work on all major operating systems. The development version is maintained at https://bitbucket.org/jcaledo/ptm. Extended documentation can be found at https://metositeptm.com.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33964537,Children's understanding of epilepsy: A qualitative study.,"

Purpose

To use a qualitative research approach to determine children's understandings of epilepsy and their epilepsy treatment.

Methods

Children aged 7-16‚ÄØyears with physician-confirmed active epilepsy (i.e., having had an epileptic seizure in the past year and or currently taking antiepileptic drugs (AEDs), and not known to have an intellectual disability, were invited to participate. Children had semi-structured interviews separately on two occasions. Between the first and second interviews, an observation of a routine epilepsy clinic appointment of individual children was conducted, and was then discussed during the second interview. Participatory research tools were used in both child interviews to facilitate discussions. Interviews were audio recorded and transcribed, pseudonymized and entered into NVivo (version 12, QSR International). Data were analyzed using a thematic approach.

Results

Twenty-three children of mean age 10.1‚ÄØyears (range 8-14), mean duration of epilepsy of 4.6‚ÄØyears (range 2-10) were enrolled. Twelve were 12 female; 7 had focal, 14 had generalized, and 2 had combined epilepsy; 20 were on monotherapy; and 16 had tried previous AEDs. All had an initial (first) interview; 20 were observed during a clinic appointment and had a second interview. Five broad themes emerged: understanding of epilepsy; understanding of seizures; understanding of medication; understanding of children's role in clinical appointments; influences on children's understanding. Children spoke about what epilepsy meant by describing the physical sensations of having a seizure or through the act of taking medication. Children described the role they had, or felt they should have, but reported challenges in being meaningfully involved in clinical appointments. While healthcare professionals were initial information nodes, epilepsy information from parents appeared to be more significant for children.

Conclusions

The perspectives of children with epilepsy are valuable for clinicians to understand; assumptions should not be made that children's views can be accessed via parents. Clinicians need to be constantly aware of children's views and ways of understanding and communicating about their epilepsy. To support this, the research - drawing on children's words, meanings, and stories - was used to inform an easily accessible, gender-neutral, animation about epilepsy that provides information about the condition, seizures, and medication (https://youtu.be/MO7xXL2ZXP8).","hji,kes",0,0,0,2,0,NA,NA +33968360,Dataset: local government mask orders preceding statewide orders by US states.,"We present a database listing local government mask orders for COVID-19 that were enacted between April and September, 2020, prior to the date that the governors issued statewide mask wearing mandates. We obtained data from a Google search of web pages of local and national commercial and public broadcasters and newspapers, and of the orders themselves. In the database, we present data identifying the county, municipality or tribal council, date of the order, and the source's internet address. In the 34 states with statewide orders, local governments in 21 of these states issued mandates in 218 municipalities, 155 counties, and 1 tribal council. The dataset can be accessed from https://doi.org/10.7939/DVN/NDFEHK.","hji,kes",0,0,0,2,0,not bio,Chuck Check - no +33968730,The Immune-Related Gene HCST as a Novel Biomarker for the Diagnosis and Prognosis of Clear Cell Renal Cell Carcinoma.,"Clear cell renal cell carcinoma (ccRCC) is the most common type of kidney tumor worldwide. Analysis of The Cancer Genome Atlas (TCGA) and Gene Expression Omnibus (GEO) databases showed that the immune-related gene (IRG) hematopoietic cell signal transducer (HCST) could provide guidance for the diagnosis, prognosis, and treatment of ccRCC. The RNA-seq data of ccRCC tissues were extracted from two databases: TCGA (https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga) and GEO (https://www.ncbi.nlm.nih.gov/geo/). Corresponding clinical information was downloaded from TCGA. Immune-related gene data were extracted from the IMMPORT website (https://www.immport.org/). Differential analysis with R software (https://www.r-project.org/) was used to obtain a prognosis model of ccRCC IRGs. The differences were combined with the clinical data to assess the usefulness of the HCST as a prognostic biomarker. Based on data obtained from the Oncomine (https://www.oncomine.org/), Human Protein Atlas (https://www.proteinatlas.org/), and PubMed (https://pubmed.ncbi.nlm.nih.gov/) databases, the expression levels of the HCST in ccRCC, clinical-pathological indicators of relevance, and influence on prognosis were analyzed. Regulation of the HCST gene in ccRCC was assessed by gene set enrichment analysis (GSEA). In TCGA/GEO databases, the high HCST expression in tumor tissues was significantly correlated to the TMN stage, tumor grade, invasion depth, and lymphatic metastasis (p < 0.05). The overall survival (OS) of patients with high HCST gene expression was significantly lower than that of patients with low HCST gene expression (p < 0.001). Multivariate Cox regression analysis suggested that the HCST expression level [hazard ratio (HR) = 1.630, 95% confidence interval (CI) = 1.042-2.552], tumor cell grade (HR = 1.829, 95% CI = 1.115-3.001), and distant metastasis (HR = 2.634, 95%, CI = 1.562-4.442) were independent risk factors affecting the OS of ccRCC patients (all, p < 0.05). The GSEA study showed that there was significant enrichment in cell adhesion, tumorigenesis, and immune and inflammatory responses in HCST high expression samples. Hematopoietic cell signal transducer expression was closely associated with the levels of infiltrating immune cells around ccRCC tissues, especially dendritic cells (DCs). In conclusion, the present study suggested that the HCST was interrelated to the clinicopathology and poor prognosis of ccRCC. High HCST expression was also closely correlated with the levels of tumor-infiltrating immune cells, especially DCs.","hji,kes",0,0,0,2,0,NA,not descpritive of resource +33970805,Medicinal plant compounds as promising inhibitors of coronavirus (COVID-19) main protease: an in silico study.,"The novel Coronavirus (COVID-19) has spread rapidly across the globe and has involved more than 215 countries and territories. Due to a lack of effective therapy or vaccine, urgent and concerted efforts are needed to identify therapeutic targets and medications. COVID-19 main protease represents a major target for drug treatment to inhibit viral function. The present study sought to evaluate medicinal plant compounds as potential inhibitors of the COVID-19 main protease using molecular docking and molecular dynamic analysis. The PDB files of COVID-19 main protease and some medicinal plant compounds were retrieved from the Protein Data Bank (http://www.rcsb.org) and Pubchem server, respectively. The Gromacs software was used for simulation studies, and molecular docking analysis was done using Autodock 4.2. The COVID-19 main protease simulation, compared with some phytochemicals docked to the COVID-19 main protease, were analyzed. Glabridin, catechin, and fisetin had the greatest tendency to interact with the COVID-19 main protease by hydrogen and hydrophobic interactions. Docking of these phytochemicals to COVID-19 main protease led to an increase in the radius of gyration (Rg), decrease in the Root mean square fluctuation (RMSF), and induced variation in COVID-19 main protease secondary structure. The high tendency interaction of glabridin, catechin, and fisetin to COVID-19 main protease induced conformational changes on this enzyme. These interactions can lead to enzyme inhibition. This simulated study indicates that these phytochemicals may be considered as potent inhibitors of the viral protease; however, more investigations are required to explore their potential medicinal use.Communicated by Ramaswamy H. Sarma.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33970835,"""Just Engage in It or Not, You Get Out What You Put In"": Student and Staff Experiences of Feedback and Feedforward in Workplace-Based Learning Environments.","Feedback is central to student learning in the veterinary workplace. Feedforward, a related concept, is used to describe the way information about a student's performance may be used to improve their future performance. Feedback and feedforward practices are diverse, with varied student and staff understandings of the nature and purpose of feedback (feedback literacy). This study compared the practices of feedback and feedforward in a range of programs in one institution during student transitions from the classroom to workplace-based learning environments. The study adopted a broad inter-professional approach to include health care programs as well as social work and theater and performance studies. Profession-specific focus groups were conducted with contribution from 28 students and 31 staff from five different professions. Thematic analysis revealed that students and staff shared an understanding of the feedback and feedforward concepts, and both groups recognized the importance of emotional and relational aspects of the process. Students and staff across all professions recognized the impact of time constraints on the feedback process, although this was particularly highlighted in the health science professions. Social work and theater and performance studies students demonstrated a more nuanced understanding of the emotional and relational aspects of feedback and feedforward. Overall, the approach highlights similarities and differences in practices and experiences in different workplace contexts, creating opportunities for cross-disciplinary learning, which may have relevance more widely in higher education programs with workplace-based elements. The study underpinned the development of the LeapForward feedback training resource (https://bilt.online/the-leapforward-project/).","hji,kes",0,0,0,2,0,NA,not life sci +33972020,REP2: A Web Server to Detect Common Tandem Repeats in Protein Sequences.,"Ensembles of tandem repeats (TRs) in protein sequences expand rapidly to form domains well suited for interactions with proteins. For this reason, they are relatively frequent. Some TRs have known structures and therefore it is advantageous to predict their presence in a protein sequence. However, since most TRs diverge quickly, their detection by classical sequence comparison algorithms is not very accurate. Previously, we developed a method and a web server that used curated profiles and thresholds for the detection of 11 common TRs. Here we present a new web server (REP2) that allows the analysis of TRs in both individual and aligned sequences. We provide currently precomputed analyses for a selection of 78 UniProt reference proteomes. We illustrate how these data can be used to study the evolution of TRs using comparative genomics. REP2 can be accessed at http://cbdm-01.zdv.uni-mainz.de/~munoz/rep/.","hji,kes",0,0,0,2,0,software,NA +33980298,IDSM ChemWebRDF: SPARQLing small-molecule datasets.,"The Resource Description Framework (RDF), together with well-defined ontologies, significantly increases data interoperability and usability. The SPARQL query language was introduced to retrieve requested RDF data and to explore links between them. Among other useful features, SPARQL supports federated queries that combine multiple independent data source endpoints. This allows users to obtain insights that are not possible using only a single data source. Owing to all of these useful features, many biological and chemical databases present their data in RDF, and support SPARQL querying. In our project, we primary focused on PubChem, ChEMBL and ChEBI small-molecule datasets. These datasets are already being exported to RDF by their creators. However, none of them has an official and currently supported SPARQL endpoint. This omission makes it difficult to construct complex or federated queries that could access all of the datasets, thus underutilising the main advantage of the availability of RDF data. Our goal is to address this gap by integrating the datasets into one database called the Integrated Database of Small Molecules (IDSM) that will be accessible through a SPARQL endpoint. Beyond that, we will also focus on increasing mutual interoperability of the datasets. To realise the endpoint, we decided to implement an in-house developed SPARQL engine based on the PostgreSQL relational database for data storage. In our approach, data are stored in the traditional relational form, and the SPARQL engine translates incoming SPARQL queries into equivalent SQL queries. An important feature of the engine is that it optimises the resulting SQL queries. Together with optimisations performed by PostgreSQL, this allows efficient evaluations of SPARQL queries. The endpoint provides not only querying in the dataset, but also the compound substructure and similarity search supported by our Sachem project. Although the endpoint is accessible from an internet browser, it is mainly intended to be used for programmatic access by other services, for example as a part of federated queries. For regular users, we offer a rich web application called ChemWebRDF using the endpoint. The application is publicly available at https://idsm.elixir-czech.cz/chemweb/ .","hji,kes",0,0,0,2,0,interface to another database,NA +33981815,A spatiotemporal dataset for integrated assessment and modelling of crop-livestock integration with the MAELIA simulation platform.,"The general purpose of the primary and secondary data available in this article is to support an integrated assessment of scenarios of crop-livestock integration at the territorial level i.e. of exchanges between arable and livestock farms. The data is a result of a research collaboration between the scientist from INRAE, agricultural advisers from Chamber of Agriculture of Pays de la Loire (CRAPL) and a collective of five arable and two livestock farmers located in the district of Pays de Pouzauges (Vendée department, western France). All participants formed part of the DiverIMPACTS project (https://www.diverimpacts.net/) that aims to achieve the full potential of diversification of cropping systems for improved productivity, delivery of ecosystem services and resource-efficient and sustainable value chains in Europe. The first dataset corresponds to the inputs of MAELIA (http://maelia-platform.inra.fr/), a spatial agent-based simulation platform that was used to support an iterative design and assessment of scenarios to redesign cropping systems. The second dataset corresponds to the outputs of MAELIA simulations and the associated indicators at the farm, group and territory level. The data comprise multiple shape and csv files characterizing the edaphic-climatic heterogeneity of the territory and cropping systems, farmers' crop management rules (IF-THEN rules) and general information about the farms (e.g. crops, agricultural equipment, average crop yields). Data is reported for the baseline situation and three exchange scenarios containing different innovative cropping systems co-designed by scientists, agricultural advisers and the farmers. The data presented here can be found in the Portail Data INRA repository (https://doi.org/10.15454/3ZTCF5) and were used in the research article ""Fostering local crop-livestock integration via legume exchanges using an innovative integrated assessment and modelling approach: MAELIA"" [1].","hji,kes",0,0,0,2,0,references other db,NA +33982946,"Clinical Impact of Rectal Hyposensitivity: A Cross-Sectional Study of 2,876 Patients With Refractory Functional Constipation.","

Introduction

Normal bowel function requires intact sensory pathways. Diminished rectal sensation (rectal hyposensitivity [RH]) is associated with constipation, although its clinical importance remains unclear.

Methods

Consecutive patients (aged 18-80) attending a tertiary center (2004-2016) for investigation of refractory functional constipation (Rome IV core criteria defined, applied post hoc) were included. Patients completed a clinical symptom questionnaire and underwent anorectal physiologic investigations, including rectal sensory testing (balloon distension) to determine 3 well-established sensory thresholds. Multivariate regression analyses were performed to evaluate associations between RH, symptomology, and allied physiologic investigations.

Results

Of 2,876 patients meeting inclusion criteria, 722 (25%) had RH based on ‚â•1 elevated sensory thresholds (0: n = 2,154 [74.9%]; 1: n = 327 [11.4%]; 2: n = 209 [7.3%]; and 3: n = 186 [6.5%]). A linear relationship existed between increasing number of elevated sensory thresholds and constipation severity (Cleveland Clinic constipation score: mean difference per threshold [95% confidence interval] 0.69 [0.48-0.90]; P < 0.001). Several symptoms were significantly (P < 0.05) associated with RH including: infrequent defecation (odds ratio 1.29 [1.17-1.42]), painful evacuation (1.15 [1.05-1.27]), prolonged toileting (1.14 [1.05-1.24]), and digitation or enema use (1.18 [1.08-1.30]). On defecography, a ""functional"" evacuation disorder was also associated with RH (1.37 [1.25-1.50], P < 0.001), as was megarectum (2.52 [2.08-3.05], P < 0.001).

Discussion

RH occurs in 25% of patients with refractory functional constipation. Increased number of elevated sensory thresholds is associated with more severe constipation phenotype. These data, in the largest study to date, provide for the first time evidence to show that RH is a major pathophysiologic mechanism in constipation, with recognized clinical impact (http://links.lww.com/AJG/B765).(Equation is included in full-text article.).","hji,kes",0,0,0,2,0,NA,NA +33983414,CNVfilteR: an R/bioconductor package to identify false positives produced by germline NGS CNV detection tools.,"

‚ÄÇ

Germline copy-number variants (CNVs) are relevant mutations for multiple genetics fields, such as the study of hereditary diseases. However, available benchmarks show that all next-generation sequencing (NGS) CNV calling tools produce false positives. We developed CNVfilteR, an R package that uses the single nucleotide variant calls usually obtained in germline NGS pipelines to identify those false positives. The package can detect both false deletions and false duplications. We evaluated CNVfilteR performance on callsets generated by 13 CNV calling tools on 3 whole-genome sequencing and 541 panel samples, showing a decrease of up to 44.8% in false positives and consistent F1-score increase. Using CNVfilteR to detect false-positive calls can improve the overall performance of existing CNV calling pipelines.

Availability

CNVfilteR is released under Artistic-2.0 License. Source code and documentation are freely available at Bioconductor (http://www.bioconductor.org/packages/CNVfilteR).

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33985433,IPD 2.0: To derive insights from an evolving SARS-CoV-2 genome.,"

Background

Rapid analysis of SARS-CoV-2 genomic data plays a crucial role in surveillance and adoption of measures in controlling spread of Covid-19. Fast, inclusive and adaptive methods are required for the heterogenous SARS-CoV-2 sequence data generated at an unprecedented rate.

Results

We present an updated version of the SARS-CoV-2 analysis module of our automated computational pipeline, Infectious Pathogen Detector (IPD) 2.0, to perform genomic analysis to understand the variability and dynamics of the virus. It adopts the recent clade nomenclature and demonstrates the clade prediction accuracy of 92.8%. IPD 2.0 also contains a SARS-CoV-2 updater module, allowing automatic upgrading of the variant database using genome sequences from GISAID. As a proof of principle, analyzing 208,911 SARS-CoV-2 genome sequences, we generate an extensive database of 2.58 million sample-wise variants. A comparative account of lineage-specific mutations in the newer SARS-CoV-2 strains emerging in the UK, South Africa and Brazil and data reported from India identify overlapping and lineages specific acquired mutations suggesting a repetitive convergent and adaptive evolution.

Conclusions

A novel and dynamic feature of the SARS-CoV-2 module of IPD 2.0 makes it a contemporary tool to analyze the diverse and growing genomic strains of the virus and serve as a vital tool to help facilitate rapid genomic surveillance in a population to identify variants involved in breakthrough infections. IPD 2.0 is freely available from http://www.actrec.gov.in/pi-webpages/AmitDutt/IPD/IPD.html and the web-application is available at http://ipd.actrec.gov.in/ipdweb/ .","hji,kes",0,0,0,2,0,software,NA +33993215,TimiRGeN: R/Bioconductor package for time series microRNA-mRNA integration and analysis.,"

Motivation

The analysis of longitudinal datasets and construction of gene regulatory networks provide a valuable means to disentangle the complexity of microRNA-mRNA interactions. However, there are no computational tools that can integrate, conduct functional analysis and generate detailed networks from longitudinal microRNA-mRNA datasets.

Results

We present TimiRGeN, an R package that uses time point based differential expression results to identify miRNA-mRNA interactions influencing signalling pathways of interest. miRNA-mRNA interactions can be visualised in R or exported to PathVisio or Cytoscape. The output can be used for hypothesis generation and directing in vitro or further in silico work such as gene regulatory network construction.

Availability and implementation

TimiRGeN is available for download on Bioconductor (https://bioconductor.org/packages/TimiRGeN) and requires R v4.0.2 or newer and BiocManager v3.12 or newer.

Supplementary information

Supplementary data is available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +33997237,Comparative proteomics of Brucella melitensis is a useful toolbox for developing prophylactic interventions in a One-Health context.,"Brucellosis caused by Brucella melitensis is a zoonosis frequently reported in the Mediterranean and Middle-East regions and responsible for important economic losses and reduced animal welfare. To date, current strategies applied to control or eradicate the disease relies on diagnostic tests that suffer from limited specificity in non-vaccinated animals; while prophylactic measures, when applied, use a live attenuated bacterial strain characterized by residual virulence on adult pregnant animals and difficulties in distinguishing vaccinated from infected animals. To overcome these issues, studies are desired to elucidate the bacterial biology and the pathogenetic mechanisms of both the vaccinal strain and the pathogenic strains. Proteomics has a potential in tackling issues of One-Health concern; here, we employed label-free shotgun proteomics to investigate the protein repertoire of the vaccinal strain B. melitensis Rev.1 and compare it with the proteome of the Brucella melitensis 16 M, a reference strain representative of B. melitensis field strains. Comparative proteomics profiling underlines common and diverging traits between the two strains. Common features suggest the potential biochemical routes responsible for the residual virulence of the vaccinal strain, whilst the diverging traits are suggestive biochemical signatures to be further investigated to provide an optimized diagnostic capable of discriminating the vaccinated from infected animals. The data presented in this study are openly available in PRIDE data repository at https://www.ebi.ac.uk/pride/, reference number PXD022472.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +33999180,OpenAnnotate: a web server to annotate the chromatin accessibility of genomic regions.,"Chromatin accessibility, as a powerful marker of active DNA regulatory elements, provides valuable information for understanding regulatory mechanisms. The revolution in high-throughput methods has accumulated massive chromatin accessibility profiles in public repositories. Nevertheless, utilization of these data is hampered by cumbersome collection, time-consuming processing, and manual chromatin accessibility (openness) annotation of genomic regions. To fill this gap, we developed OpenAnnotate (http://health.tsinghua.edu.cn/openannotate/) as the first web server for efficiently annotating openness of massive genomic regions across various biosample types, tissues, and biological systems. In addition to the annotation resource from 2729 comprehensive profiles of 614 biosample types of human and mouse, OpenAnnotate provides user-friendly functionalities, ultra-efficient calculation, real-time browsing, intuitive visualization, and elaborate application notebooks. We show its unique advantages compared to existing databases and toolkits by effectively revealing cell type-specificity, identifying regulatory elements and 3D chromatin contacts, deciphering gene functional relationships, inferring functions of transcription factors, and unprecedentedly promoting single-cell data analyses. We anticipate OpenAnnotate will provide a promising avenue for researchers to construct a more holistic perspective to understand regulatory mechanisms.","hji,kes",0,0,0,2,0,software,NA +33999189,ProteoVision: web server for advanced visualization of ribosomal proteins.,"ProteoVision is a web server designed to explore protein structure and evolution through simultaneous visualization of multiple sequence alignments, topology diagrams and 3D structures. Starting with a multiple sequence alignment, ProteoVision computes conservation scores and a variety of physicochemical properties and simultaneously maps and visualizes alignments and other data on multiple levels of representation. The web server calculates and displays frequencies of amino acids. ProteoVision is optimized for ribosomal proteins but is applicable to analysis of any protein. ProteoVision handles internally generated and user uploaded alignments and connects them with a selected structure, found in the PDB or uploaded by the user. It can generate de novo topology diagrams from three-dimensional structures. All displayed data is interactive and can be saved in various formats as publication quality images or external datasets or PyMol Scripts. ProteoVision enables detailed study of protein fragments defined by Evolutionary Classification of protein Domains (ECOD) classification. ProteoVision is available at http://proteovision.chemistry.gatech.edu/.","hji,kes",0,0,0,2,0,software,NA +34000199,A Deep Learning Enhanced Novel Software Tool for Laryngeal Dynamics Analysis.,"Purpose High-speed videoendoscopy (HSV) is an emerging, but barely used, endoscopy technique in the clinic to assess and diagnose voice disorders because of the lack of dedicated software to analyze the data. HSV allows to quantify the vocal fold oscillations by segmenting the glottal area. This challenging task has been tackled by various studies; however, the proposed approaches are mostly limited and not suitable for daily clinical routine. Method We developed a user-friendly software in C# that allows the editing, motion correction, segmentation, and quantitative analysis of HSV data. We further provide pretrained deep neural networks for fully automatic glottis segmentation. Results We freely provide our software Glottis Analysis Tools (GAT). Using GAT, we provide a general threshold-based region growing platform that enables the user to analyze data from various sources, such as in vivo recordings, ex vivo recordings, and high-speed footage of artificial vocal folds. Additionally, especially for in vivo recordings, we provide three robust neural networks at various speed and quality settings to allow a fully automatic glottis segmentation needed for application by untrained personnel. GAT further evaluates video and audio data in parallel and is able to extract various features from the video data, among others the glottal area waveform, that is, the changing glottal area over time. In total, GAT provides 79 unique quantitative analysis parameters for video- and audio-based signals. Many of these parameters have already been shown to reflect voice disorders, highlighting the clinical importance and usefulness of the GAT software. Conclusion GAT is a unique tool to process HSV and audio data to determine quantitative, clinically relevant parameters for research, diagnosis, and treatment of laryngeal disorders. Supplemental Material https://doi.org/10.23641/asha.14575533.","hji,kes",0,0,0,2,0,software,NA +34000890,The hospital telemedicine TELEMED database: Providing information on evidence-based telemedicine services to hospital managers and healthcare professionals.,"

Background

Increased use of telemedicine in the healthcare system is a political goal in Denmark. Although the number of hospital patients using interventions such as the video consultation has increased in recent years only a small proportion of the outpatient and inpatient visits involve telemedicine. The TELEMED database (https://telemedicine.cimt.dk/) has been launched at the Center for Innovative Medical Technology in Denmark to ensure that hospital managers and healthcare professionals have access to information about telemedicine services and their effectiveness. This article describes the development and the content of the TELEMED database.

Methods

A structured literature search was made in the PubMed Database for randomised controlled trials or observational studies with a control group that investigated the effect of telemedicine interventions for hospital patients. Data were extracted from each article on the clinical effectiveness, patient perceptions, economic effects and implementation challenges. As the database should only provide inspiration to healthcare professionals regarding possibilities for use of telemedicine, the risk of bias in the studies was not assessed.

Results

The literature search resulted in 2825 hits. Based on full text assessment, 331 articles were included for data extraction and assessment. These articles present telemedicine services used in 22 different medical specialities. Forty-eight percent of the studies found a positive, statistically significant clinical effect, while 47% showed no statistically significant difference. In 48% of the studies, patients' experiences were examined and of these 68% found positive patient experiences. Fifty-four percent of the articles included information on the economic effects and, of these, 51% found reduction in healthcare utilization. In the majority of studies between two and four types of implementation challenges were found.Conclusions and recommendations: The TELEMED database provides an easily accessible overview of existing evidence-based telemedicine services for use by hospital managers and health professionals, who whish to to implement telemedicine. The database is freely available and expected to be continuously improved and broadened over time.","hji,kes",0,0,0,2,0,"remotely related to bio, but worth considering?",out of scope +34001434,TaxonKit: A practical and efficient NCBI taxonomy toolkit.,"The National Center for Biotechnology Information (NCBI) Taxonomy is widely applied in biomedical and ecological studies. Typical demands include querying taxonomy identifier (TaxIds) by taxonomy names, querying complete taxonomic lineages by TaxIds, listing descendants of given TaxIds, and others. However, existed tools are either limited in functionalities or inefficient in terms of runtime. In this work, we present TaxonKit, a command-line toolkit for comprehensive and efficient manipulation of NCBI Taxonomy data. TaxonKit comprises seven core subcommands providing functions, including TaxIds querying, listing, filtering, lineage retrieving and reformatting, lowest common ancestor computation, and TaxIds change tracking. The practical functions, competitive processing performance, scalability with different scales of datasets and good accessibility could facilitate taxonomy data manipulations. TaxonKit provides free access under the permissive MIT license on GitHub, Brewsci, and Bioconda. The documents are also available at https://bioinf.shenwei.me/taxonkit/.","hji,kes",0,0,0,2,0,software,NA +34004273,Klebsiella MALDI TypeR: a web-based tool for Klebsiella identification based on MALDI-TOF mass spectrometry.,"Klebsiella pathogens affect human and animal health and are widely distributed in the environment. Among these, the Klebsiella pneumoniae species complex, which includes seven phylogroups, is an important cause of community and hospital infections. The Klebsiella oxytoca species complex also causes hospital infections and antibiotic-associated haemorrhagic colitis. The unsuitability of currently used clinical microbiology methods to distinguish species within each of these species complexes leads to high rates of misidentifications that are masking the true clinical significance and potential epidemiological specificities of individual species. We developed a web-based tool, Klebsiella MALDI TypeR, a platform-independent and user-friendly application that enables uploading MALDI-TOF mass spectrometry data in order to identify Klebsiella isolates at the species complex and phylogroup levels. The tool, available at https://maldityper.pasteur.fr/, leverages a database of previously identified biomarkers that are specific for species complexes, individual phylogroups, or related phylogroups. We obtained 84%-100% identification accuracy depending on phylogroup. Identification results are obtained in a few seconds from batches of uploaded spectral data. Klebsiella MALDI TypeR enables fast and reliable identification of Klebsiella strains that are often misidentified with standard microbiological methods. This web-based identification tool may be extended in the future to other human bacterial pathogens.","hji,kes",0,0,0,2,0,software,NA +34006627,DOE JGI Metagenome Workflow.,"The DOE Joint Genome Institute (JGI) Metagenome Workflow performs metagenome data processing, including assembly; structural, functional, and taxonomic annotation; and binning of metagenomic data sets that are subsequently included into the Integrated Microbial Genomes and Microbiomes (IMG/M) (I.-M. A. Chen, K. Chu, K. Palaniappan, A. Ratner, et al., Nucleic Acids Res, 49:D751-D763, 2021, https://doi.org/10.1093/nar/gkaa939) comparative analysis system and provided for download via the JGI data portal (https://genome.jgi.doe.gov/portal/). This workflow scales to run on thousands of metagenome samples per year, which can vary by the complexity of microbial communities and sequencing depth. Here, we describe the different tools, databases, and parameters used at different steps of the workflow to help with the interpretation of metagenome data available in IMG and to enable researchers to apply this workflow to their own data. We use 20 publicly available sediment metagenomes to illustrate the computing requirements for the different steps and highlight the typical results of data processing. The workflow modules for read filtering and metagenome assembly are available as a workflow description language (WDL) file (https://code.jgi.doe.gov/BFoster/jgi_meta_wdl). The workflow modules for annotation and binning are provided as a service to the user community at https://img.jgi.doe.gov/submit and require filling out the project and associated metadata descriptions in the Genomes OnLine Database (GOLD) (S. Mukherjee, D. Stamatis, J. Bertsch, G. Ovchinnikova, et al., Nucleic Acids Res, 49:D723-D733, 2021, https://doi.org/10.1093/nar/gkaa983).IMPORTANCE The DOE JGI Metagenome Workflow is designed for processing metagenomic data sets starting from Illumina fastq files. It performs data preprocessing, error correction, assembly, structural and functional annotation, and binning. The results of processing are provided in several standard formats, such as fasta and gff, and can be used for subsequent integration into the Integrated Microbial Genomes and Microbiomes (IMG/M) system where they can be compared to a comprehensive set of publicly available metagenomes. As of 30 July 2020, 7,155 JGI metagenomes have been processed by the DOE JGI Metagenome Workflow. Here, we present a metagenome workflow developed at the JGI that generates rich data in standard formats and has been optimized for downstream analyses ranging from assessment of the functional and taxonomic composition of microbial communities to genome-resolved metagenomics and the identification and characterization of novel taxa. This workflow is currently being used to analyze thousands of metagenomic data sets in a consistent and standardized manner.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +34007299,Ear Acupressure for Allergic Rhinitis: A Systematic Review and Meta-Analysis of Randomized Controlled Trials.,"

Background

The treatment effects and safety of ear acupressure (EAP) for patients with allergic rhinitis (AR) have yet to be clarified.

Objective

To evaluate the effects and safety of EAP in AR patients.

Design

Systematic review of published studies.

Methods

A total of 24 English and Chinese databases (PubMed, EMBASE (Excerpta Medical Database), Cochrane Central Register of Controlled Trials, CINAHL, Informit, ScienceDirect, LILACS (Latin American and Caribbean Health Sciences), ProQuest, AMED, Blackwell Synergy, PsycINFO, Panteleimon, AcuBriefs, KoreaMed, IndMed, Ingenta, mRCT, ISI Web of Knowledge, ERIC, VIP Information (http://www.cqvip.com), China National Knowledge Infrastructure (http://www.cnki.net), Cochrane Library, Chinese Cochrane Centre Controlled Trials Register Platform, and Wanfang Chinese Digital Periodical and Conference Database) were searched from their respective inceptions to August 2020 to collect randomized controlled trials of ear acupressure for allergic rhinitis. We performed literature inclusion, data extraction, and trial quality evaluations. Methodological quality was assessed according to the Cochrane Handbook. Revman5.3 was used for all analyses.

Results

A total of 203 trials were identified and eleven studies involved 1094 participants aged 3-70 years. EAP was better than control group interventions in terms of effectiveness (risk ratio (RR): 0.51; 95% confidence interval (CI): 0.36-0.70; P < 0.0001). EAP was superior to sham EAP in terms of improvement of the total nasal symptom score (RR: -0.50; 95% CI: -0.96-0.05; P‚Äâ=‚Äâ0.03), sneezing score (RR: -0.36; 95% CI: -0.59-0.12; P‚Äâ=‚Äâ0.003), global QoL score (RR: 0.42; 95% CI: 0.04-0.08; P‚Äâ=‚Äâ0.03), and eye symptom score (RR: -0.36; 95% CI: -0.67-0.05; P‚Äâ=‚Äâ0.02).

Conclusions

Despite the positive results, it is premature to confirm the efficacy of EAP for treating AR. More high-quality studies are needed to confirm safety and efficacy.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +34009014,DNA Methylation in Babies Born to Nonsmoking Mothers Exposed to Secondhand Smoke during Pregnancy: An Epigenome-Wide Association Study.,"

Background

Maternal smoking during pregnancy is related to altered DNA methylation in infant umbilical cord blood. The extent to which low levels of smoke exposure among nonsmoking pregnant women relates to offspring DNA methylation is unknown.

Objective

This study sought to evaluate relationships between maternal prenatal plasma cotinine levels and DNA methylation in umbilical cord blood in newborns using the Infinium HumanMethylation 450K BeadChip.

Methods

Participants from the Newborn Epigenetics Study cohort who reported not smoking during pregnancy had verified low levels of cotinine from maternal prenatal plasma (0 ng/mL to <4‚Äâng/mL), and offspring epigenetic data from umbilical cord blood were included in this study (n=79). Multivariable linear regression models were fit to the data, controlling for cell proportions, age, race, education, and parity. Estimates represent changes in response to any 1-ng/mL unit increase in exposure.

Results

Multivariable linear regression models yielded 29,049 CpGs that were differentially methylated in relation to increases in cotinine at a 5% false discovery rate. Top CpGs were within or near genes involved in neuronal functioning (PRKG1, DLGAP2, BSG), carcinogenesis (FHIT, HSPC157) and inflammation (AGER). Kyoto Encyclopedia of Genes and Genomes (KEGG) analyses suggest cotinine was related to methylation of gene pathways controlling neuronal signaling, metabolic regulation, cell signaling and regulation, and cancer. Further, enhancers associated with transcription start sites were enriched in altered CpGs. Using an independent sample from the same study population (n=115), bisulfite pyrosequencing was performed with infant cord blood DNA for two genes within our top 20 hits (AGER and PRKG1). Results from pyrosequencing replicated epigenome results for PRKG1 (cg17079497, estimate=-1.09, standard error‚Äâ(SE)=0.45, p=0.018) but not for AGER (cg09199225; estimate=-0.16, SE=0.21, p=0.44).

Discussion

Secondhand smoke exposure among nonsmoking women may alter DNA methylation in regions involved in development, carcinogenesis, and neuronal functioning. These novel findings suggest that even low levels of smoke exposure during pregnancy may be sufficient to alter DNA methylation in distinct sites of mixed umbilical cord blood leukocytes in pathways that are known to be altered in cord blood from pregnant active smokers. https://doi.org/10.1289/EHP8099.","hji,kes",0,0,0,2,0,NA,NA +34009297,powerEQTL: An R package and shiny application for sample size and power calculation of bulk tissue and single-cell eQTL analysis.,"

Summary

Genome-wide association studies (GWAS) have revealed thousands of genetic loci for common diseases. One of the main challenges in the post-GWAS era is to understand the causality of the genetic variants. Expression quantitative trait locus (eQTL) analysis is an effective way to address this question by examining the relationship between gene expression and genetic variation in a sufficiently powered cohort. However, it is frequently a challenge to determine the sample size at which a variant with a specific allele frequency will be detected to associate with gene expression with sufficient power. This is a particularly difficult task for single-cell RNAseq studies. Therefore, a user-friendly tool to estimate statistical power for eQTL analyses in both bulk tissue and single-cell data is needed. Here, we presented an R package called powerEQTL with flexible functions to estimate power, minimal sample size, or detectable minor allele frequency for both bulk tissue and single-cell eQTL analysis. A user-friendly, program-free web application is also provided, allowing users to calculate and visualize the parameters interactively.

Availability and implementation

The powerEQTL R package source code and online tutorial are freely available at CRAN: https://cran.r-project.org/web/packages/powerEQTL/. The R shiny application is publicly hosted at https://bwhbioinfo.shinyapps.io/powerEQTL/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +34012710,A Comparison of Methods for Studying the Tumor Microenvironment's Spatial Heterogeneity in Digital Pathology Specimens.,"

Background

The tumor microenvironment is highly heterogeneous, and it is understood to affect tumor progression and patient outcome. A number of studies have reported the prognostic significance of tumor-infiltrating lymphocytes and tumor budding in colorectal cancer (CRC). However, the significance of the intratumoral heterogeneity present in the spatial distribution of these features within the tumor immune microenvironment (TIME) has not been previously reported. Evaluating this intratumoral heterogeneity may aid the understanding of the TIME's effect on patient prognosis as well as identify novel aggressive phenotypes which can be further investigated as potential targets for new treatment.

Methods

In this study, we propose and apply two spatial statistical methodologies for the evaluation of the intratumor heterogeneity present in the distribution of CD3 + and CD8 + lymphocytes and tumor buds (TB) in 232 Stage II CRC cases. Getis-Ord hotspot analysis was applied to quantify the cold and hotspots, defined as regions with a significantly low or high number of each feature of interest, respectively. A novel spatial heatmap methodology for the quantification of the cold and hotspots of each feature of interest, which took into account both the interpatient heterogeneity and the intratumor heterogeneity, was further developed.

Results

Resultant data from each analysis, characterizing the spatial intratumor heterogeneity of lymphocytes and TBs were used for the development of two new highly prognostic risk models.

Conclusions

Our results highlight the value of applying spatial statistics for the assessment of the intratumor heterogeneity. Both Getis-Ord hotspot and our proposed spatial heatmap analysis are broadly applicable across other tissue types as well as other features of interest.

Availability

The code underpinning this publication can be accessed at https://doi.org/10.17630/c2306fe9-66e2-4442-ad89-f986220053e2.","hji,kes",0,0,0,2,0,NA,NA +34013639,In silico prediction of drug-induced ototoxicity using machine learning and deep learning methods.,"Drug-induced ototoxicity has become a serious global problem, because of leading to deafness in hundreds of thousands of people every year. It always results from exposure to drugs or environmental chemicals that cause the impairment and degeneration of the inner ear. Herein, we focused on the in silico modeling of drug-induced ototoxicity of chemicals. We collected 1,102 ototoxic medications and 1,705 non-ototoxic drugs. Based on the data set, a series of computational models were developed with different traditional machine learning and deep learning algorithms implemented on an online chemical database and modeling environment. Six ML models performed best on 5-fold cross-validation and test set. A consensus model was developed with the best individual models. These models were further validated with an external validation. The consensus model showed best predictive ability, with high accuracy of 0.95 on test set and 0.90 on validation set. The consensus model and the data sets used for model development are available at https://ochem.eu/model/46566321. Besides, 16 structural alerts responsible for drug-induced ototoxicity were identified. We hope the results could provide meaningful knowledge and useful tools for ototoxicity evaluation in drug discovery and environmental risk assessment.","hji,kes",0,0,0,2,0,NA,dataset for the model only +34018555,MesKit: a tool kit for dissecting cancer evolution of multi-region tumor biopsies through somatic alterations.,"

Background

Multi-region sequencing (MRS) has been widely used to analyze intra-tumor heterogeneity (ITH) and cancer evolution. However, comprehensive analysis of mutational data from MRS is still challenging, necessitating complicated integration of a plethora of computational and statistical approaches.

Findings

Here, we present MesKit, an R/Bioconductor package that can assist in characterizing genetic ITH and tracing the evolutionary history of tumors based on somatic alterations detected by MRS. MesKit provides a wide range of analysis and visualization modules, including ITH evaluation, metastatic route inference, and mutational signature identification. In addition, MesKit implements an auto-layout algorithm to generate phylogenetic trees based on somatic mutations. The application of MesKit for 2 reported MRS datasets of hepatocellular carcinoma and colorectal cancer identified known heterogeneous features and evolutionary patterns, together with potential driver events during cancer evolution.

Conclusions

In summary, MesKit is useful for interpreting ITH and tracing evolutionary trajectory based on MRS data. MesKit is implemented in R and available at https://bioconductor.org/packages/MesKit under the GPL v3 license.","hji,kes",0,0,0,2,0,NA,NA +34019643,eVITTA: a web-based visualization and inference toolbox for transcriptome analysis.,"Transcriptome profiling is essential for gene regulation studies in development and disease. Current web-based tools enable functional characterization of transcriptome data, but most are restricted to applying gene-list-based methods to single datasets, inefficient in leveraging up-to-date and species-specific information, and limited in their visualization options. Additionally, there is no systematic way to explore data stored in the largest transcriptome repository, NCBI GEO. To fill these gaps, we have developed eVITTA (easy Visualization and Inference Toolbox for Transcriptome Analysis; https://tau.cmmt.ubc.ca/eVITTA/). eVITTA provides modules for analysis and exploration of studies published in NCBI GEO (easyGEO), detailed molecular- and systems-level functional profiling (easyGSEA), and customizable comparisons among experimental groups (easyVizR). We tested eVITTA on transcriptomes of SARS-CoV-2 infected human nasopharyngeal swab samples, and identified a downregulation of olfactory signal transducers, in line with the clinical presentation of anosmia in COVID-19 patients. We also analyzed transcriptomes of Caenorhabditis elegans worms with disrupted S-adenosylmethionine metabolism, confirming activation of innate immune responses and feedback induction of one-carbon cycle genes. Collectively, eVITTA streamlines complex computational workflows into an accessible interface, thus filling the gap of an end-to-end platform capable of capturing both broad and granular changes in human and model organism transcriptomes.","hji,kes",0,0,0,2,0,software,NA +34019656,Amino Acid Interactions (INTAA) web server v2.0: a single service for computation of energetics and conservation in biomolecular 3D structures.,"Interactions among amino acid residues are the principal contributor to the stability of the three-dimensional structure of a protein. The Amino Acid Interactions (INTAA) web server (https://bioinfo.uochb.cas.cz/INTAA/) has established itself as a unique computational resource, which enables users to calculate the contribution of individual residues in a biomolecular structure to its total energy using a molecular mechanical scoring function. In this update, we describe major additions to the web server which help solidify its position as a robust, comprehensive resource for biomolecular structure analysis. Importantly, a new continuum solvation model was introduced, allowing more accurate representation of electrostatic interactions in aqueous media. In addition, a low-overhead pipeline for the estimation of evolutionary conservation in protein chains has been added. New visualization options were introduced as well, allowing users to easily switch between and interrelate the energetic and evolutionary views of the investigated structures.","hji,kes",0,0,0,2,0,software,NA +34019657,ProteinTools: a toolkit to analyze protein structures.,"The experimental characterization and computational prediction of protein structures has become increasingly rapid and precise. However, the analysis of protein structures often requires researchers to use several software packages or web servers, which complicates matters. To provide long-established structural analyses in a modern, easy-to-use interface, we implemented ProteinTools, a web server toolkit for protein structure analysis. ProteinTools gathers four applications so far, namely the identification of hydrophobic clusters, hydrogen bond networks, salt bridges, and contact maps. In all cases, the input data is a PDB identifier or an uploaded structure, whereas the output is an interactive dynamic web interface. Thanks to the modular nature of ProteinTools, the addition of new applications will become an easy task. Given the current need to have these tools in a single, fast, and interpretable interface, we believe that ProteinTools will become an essential toolkit for the wider protein research community. The web server is available at https://proteintools.uni-bayreuth.de.","hji,kes",0,0,0,2,0,software,NA +34019776,Conducting a Virtual Study With Special Considerations for Working With Persons With Aphasia.,"Purpose The use of technology (e.g., telehealth) in clinical settings has rapidly increased, and its use in research settings continues to grow. The aim of this report is to present one potential solution to a clinical issue that of virtual and remote assessment for the purposes of spoken language research in persons with aphasia (PWA). To do so, we report detailed methods for conducting a multitimepoint (test-retest) virtual paradigm, assessing lifestyle, physiological, cognitive, and linguistic factors in persons with and without aphasia. Method Procedures for virtual assessment are detailed in a sample of adults with no brain damage (N = 24) and PWA (N = 25) on a test-retest paradigm (data collection approximately 10 ± 3 days apart). This report provides practical information about pre-assessment (e.g., recruitment, scheduling), assessment (e.g., aphasia-friendly consent presentation, investigator fidelity), and postassessment (e.g., data storage, quality check) procedures for human behavior research using a virtual platform. Results Preliminary study data are provided, indicating high retention rates, high rates of data acquisition, and feasibility. Common technological troubles and solutions are discussed, and solutions are offered. The results suggest that our pre-assessment, assessment, and postassessment procedures contributed to the success of our study. Conclusions We provide a practical methodology for conducting a multitimepoint study, with considerations for PWA, adding to the body of research on telehealth in clinical populations. Future studies should continue to evaluate telemethodology, which may be core for diversifying studies, improving study retention, and enrolling larger sample sizes. Supplemental Material https://doi.org/10.23641/asha.14608101.","hji,kes",0,0,0,2,0,NA,NA +34020445,Effect on gut microbiota of a 1-y lifestyle intervention with Mediterranean diet compared with energy-reduced Mediterranean diet and physical activity promotion: PREDIMED-Plus Study.,"

Background

The Mediterranean diet is a well-recognized healthy diet that has shown to induce positive changes in gut microbiota. Lifestyle changes such as diet along with physical activity could aid in weight loss and improve cardiovascular risk factors.

Objectives

To investigate the effect of an intensive lifestyle weight loss intervention on gut microbiota.

Methods

This is a substudy of the PREDIMED-Plus (Prevención con Dieta Mediterránea-Plus), a randomized controlled trial conducted in overweight/obese men and women (aged 55-75 y) with metabolic syndrome. The intervention group (IG) underwent an intensive weight loss lifestyle intervention based on an energy-restricted Mediterranean diet (MedDiet) and physical activity promotion, and the control group (CG) underwent a non-energy-restricted MedDiet for 1 y. Anthropometric, biochemical, and gut microbial 16S rRNA sequencing data were analyzed at baseline (n = 362) and 1-y follow-up (n = 343).

Results

IG participants had a weight loss of 4.2 (IQR, -6.8, -2.5) kg compared with 0.2 (IQR, -2.1, 1.4) kg in the CG (P < 0.001). Reductions in BMI, fasting glucose, glycated hemoglobin, and triglycerides and an increase in HDL cholesterol were greater in IG than in CG participants (P < 0.05). We observed a decrease in Butyricicoccus, Haemophilus, Ruminiclostridium 5, and Eubacterium hallii in the IG compared with the CG. Many genera shifted in the same direction within both intervention groups, indicating an overall effect of the MedDiet. Decreases in Haemophilus, Coprococcus 3, and few other genera were associated with a decrease in adiposity parameters in both intervention groups. Changes in Lachnospiraceae NK4A136 were positively associated with changes in MedDiet adherence.

Conclusions

Weight loss induced by an energy-restricted MedDiet and physical activity induce changes in gut microbiota. The role of MedDiet-induced changes on the host might be via short-chain fatty acid producing bacteria, whereas with energy restriction, these changes might be modulated with other mechanisms, which need to be explored in future studies. This trial was registered at http://www.isrctn.com/ISRCTN89898870 as ISRCT 89898870.","hji,kes",0,0,0,2,0,NA,NA +34020552,Genome-wide discovery of pre-miRNAs: comparison of recent approaches based on machine learning.,"

Motivation

The genome-wide discovery of microRNAs (miRNAs) involves identifying sequences having the highest chance of being a novel miRNA precursor (pre-miRNA), within all the possible sequences in a complete genome. The known pre-miRNAs are usually just a few in comparison to the millions of candidates that have to be analyzed. This is of particular interest in non-model species and recently sequenced genomes, where the challenge is to find potential pre-miRNAs only from the sequenced genome. The task is unfeasible without the help of computational methods, such as deep learning. However, it is still very difficult to find an accurate predictor, with a low false positive rate in this genome-wide context. Although there are many available tools, these have not been tested in realistic conditions, with sequences from whole genomes and the high class imbalance inherent to such data.

Results

In this work, we review six recent methods for tackling this problem with machine learning. We compare the models in five genome-wide datasets: Arabidopsis thaliana, Caenorhabditis elegans, Anopheles gambiae, Drosophila melanogaster, Homo sapiens. The models have been designed for the pre-miRNAs prediction task, where there is a class of interest that is significantly underrepresented (the known pre-miRNAs) with respect to a very large number of unlabeled samples. It was found that for the smaller genomes and smaller imbalances, all methods perform in a similar way. However, for larger datasets such as the H. sapiens genome, it was found that deep learning approaches using raw information from the sequences reached the best scores, achieving low numbers of false positives.

Availability

The source code to reproduce these results is in: http://sourceforge.net/projects/sourcesinc/files/gwmirna Additionally, the datasets are freely available in: https://sourceforge.net/projects/sourcesinc/files/mirdata.","hji,kes",0,0,0,2,0,NA,NA +34023895,LigAdvisor: a versatile and user-friendly web-platform for drug design.,"Although several tools facilitating in silico drug design are available, their results are usually difficult to integrate with publicly available information or require further processing to be fully exploited. The rational design of multi-target ligands (polypharmacology) and the repositioning of known drugs towards unmet therapeutic needs (drug repurposing) have raised increasing attention in drug discovery, although they usually require careful planning of tailored drug design strategies. Computational tools and data-driven approaches can help to reveal novel valuable opportunities in these contexts, as they enable to efficiently mine publicly available chemical, biological, clinical, and disease-related data. Based on these premises, we developed LigAdvisor, a data-driven webserver which integrates information reported in DrugBank, Protein Data Bank, UniProt, Clinical Trials and Therapeutic Target Database into an intuitive platform, to facilitate drug discovery tasks as drug repurposing, polypharmacology, target fishing and profiling. As designed, LigAdvisor enables easy integration of similarity estimation results with clinical data, thereby allowing a more efficient exploitation of information in different drug discovery contexts. Users can also develop customizable drug design tasks on their own molecules, by means of ligand- and target-based search modes, and download their results. LigAdvisor is publicly available at https://ligadvisor.unimore.it/.","hji,kes",0,0,0,2,0,draws from several data sources,Chuck Check - no +34023906,MyCLADE: a multi-source domain annotation server for sequence functional exploration.,"The ever-increasing number of genomic and metagenomic sequences accumulating in our databases requires accurate approaches to explore their content against specific domain targets. MyCLADE is a user-friendly webserver designed for targeted functional profiling of genomic and metagenomic sequences based on a database of a few million probabilistic models of Pfam domains. It uses the MetaCLADE multi-source domain annotation strategy, modelling domains based on multiple probabilistic profiles. MyCLADE takes a list of protein sequences and possibly a target set of domains/clans as input and, for each sequence, it provides a domain architecture built from the targeted domains or from all Pfam domains. It is linked to the Pfam and QuickGO databases in multiple ways for easy retrieval of domain and clan information. E-value, bit-score, domain-dependent probability scores and logos representing the match of the model with the sequence are provided to help the user to assess the quality of each annotation. Availability and implementation: MyCLADE is freely available at http://www.lcqb.upmc.fr/myclade.","hji,kes",0,0,0,2,0,software,NA +34025412,The Genus Eriosema (Fabaceae): From the Ethnopharmacology to an Evidence-Based Phytotherapeutic Perspective?,"The genus Eriosema (Fabaceae) includes approximately 150 species widely distributed across tropical and subtropical regions of the world (Africa, Neotropics, Asia and Australia). Throughout these regions, several species are used since centuries in different traditional medicinal systems, while others are used as food or food supplement. The present review attempts to critically summarize current information concerning the uses, phytochemistry and pharmacology of the Eriosema genus and to evaluate the therapeutic potential. The information published in English and French (up to September 2020) on ethnopharmacology or traditional uses, chemistry, pharmacology and toxicology of Eriosema genus was collected from electronic databases [SciFinder, PubMed, Google, Google Scholar, Scopus, Web of Science, Prelude Medicinal Plants-http://www.ethnopharmacologia.org/recherche-dans-prelude/?plant, The Plant List (http://www.theplantlist.org/), POWO (http://powo.science.kew.org/) and IUCN Red List Categories (https://www.iucnredlist.org/)], conference proceedings, books, M.Sc. and Ph.D. dissertations. The information retrieved on the ethnomedicinal indications of Eriosema genus allowed to list 25 species (∼16.6% of the genus). The majority of uses is recorded from Africa. Phytochemical analyses of 8 species led to the identification and/or isolation of 107 compounds, with flavonoids (69.2%), chromones (7.5%) and benzoic acid derivatives (3.7%) as the main chemical classes. Pharmacological investigations with crude extracts and isolated compounds showed a broad range of activities including aphrodisiac, estrogenic, anti-osteoporosis, hypolipidemic, anti-diabetic, anti-diarrheal, anti-microbial, anti-oxidant, anthelmintic, anti-cancer, and acetylcholinesterase inhibitory activities. Despite the low number of Eriosema species tested, there is convincing evidence in vitro and in vivo studies validating some traditional and ethnobotanical uses. However, the utility of several of the described uses has not yet been confirmed in pharmacological studies. Reviewed data could serve as a reference tool and preliminary information for advanced research on Eriosema species.","hji,kes",0,0,0,2,0,NA,not descriptive of the resource +34034357,Talking about post-injury sexual functioning: The views of people with spinal cord injuries-A qualitative interview study.,"

Aim

This study aimed to explore perceptions of people with spinal cord injuries regarding the information they received during their rehabilitation programme on post-injury sexual functioning.

Background

Spinal cord injury is a traumatic, life-altering event that is associated with loss of motor and sensory function and sexual impairment. Existing evidence suggests that sexual issues are poorly handled during the rehabilitation phase of the patient's journey.

Design

A descriptive qualitative design was utilized in this study.

Methods

Twenty-nine people with spinal cord injury participated in qualitative in-depth interviews between November 2017 and April 2018, and data were analysed using the Burnard (1991, https://doi.org/10.1016/0260-6917(91)90009-y) thematic analysis framework.

Results

Some participants indicated they were sexually inactive prior to their spinal cord injury. They testified that they had not received information on post-injury sexual functioning. Many participants who received post-injury information on sexual functioning reported dissatisfaction with the content and timing of this information.

Conclusion

Personal conversations between spinal cord injured patients and dedicated members of the interdisciplinary health team can enhance the quality of rehabilitation care and patients' satisfaction with rehabilitation care. Nurses are central clinicians in the rehabilitation programme of spinal cord injured patients and should engage in individually designed conversations about post-injury sexual functioning.","hji,kes",0,0,0,2,0,NA,NA +34037798,CPA: a web-based platform for consensus pathway analysis and interactive visualization.,"In molecular biology and genetics, there is a large gap between the ease of data collection and our ability to extract knowledge from these data. Contributing to this gap is the fact that living organisms are complex systems whose emerging phenotypes are the results of multiple complex interactions taking place on various pathways. This demands powerful yet user-friendly pathway analysis tools to translate the now abundant high-throughput data into a better understanding of the underlying biological phenomena. Here we introduce Consensus Pathway Analysis (CPA), a web-based platform that allows researchers to (i) perform pathway analysis using eight established methods (GSEA, GSA, FGSEA, PADOG, Impact Analysis, ORA/Webgestalt, KS-test, Wilcox-test), (ii) perform meta-analysis of multiple datasets, (iii) combine methods and datasets to accurately identify the impacted pathways underlying the studied condition and (iv) interactively explore impacted pathways, and browse relationships between pathways and genes. The platform supports three types of input: (i) a list of differentially expressed genes, (ii) genes and fold changes and (iii) an expression matrix. It also allows users to import data from NCBI GEO. The CPA platform currently supports the analysis of multiple organisms using KEGG and Gene Ontology, and it is freely available at http://cpa.tinnguyen-lab.com.","hji,kes",0,0,0,2,0,software,NA +34038112,CyProduct: A Software Tool for Accurately Predicting the Byproducts of Human Cytochrome P450 Metabolism.,"In silico metabolism prediction is a cheminformatic task of autonomously predicting the set of metabolic byproducts produced from a specified molecule and a set of enzymes or reactions. Here, we describe a novel machine learned in silico cytochrome P450 (CYP450) metabolism prediction suite, called CyProduct, that accurately predicts metabolic byproducts for a specified molecule and a human CYP450 isoform. It includes three modules: (1) CypReact, a tool that predicts if the query compound reacts with a given CYP450 enzyme, (2) CypBoM, a tool that accurately predicts the ""bond site"" of the reaction (i.e., which specific bonds within the query molecule react with the CYP isoform), and (3) MetaboGen, a tool that generates the metabolic byproducts based on CypBoM's bond-site prediction. CyProduct predicts metabolic biotransformation products for each of the nine most important human CYP450 enzymes. CypBoM uses an important new concept called ""bond of metabolism"" (BoM), which extends the traditional ""site of metabolism"" (SoM) by specifying the information about the set of chemical bonds that is modified or formed in a metabolic reaction (rather than the specific atom). We created a BoM database for 1845 CYP450-mediated Phase I reactions, then used this to train the CypBoM Predictor to predict the reactive bond locations on substrate molecules. CypBoM Predictor's cross-validated Jaccard score for reactive bond prediction ranged from 0.380 to 0.452 over the nine CYP450 enzymes. Over variants of a test set of 68 known CYP450 substrates and 30 nonreactants, CyProduct outperformed the other packages, including ADMET Predictor, BioTransformer, and GLORY, by an average of 200% (with respect to Jaccard score) in terms of predicting metabolites. The CyProduct suite and the data sets are freely available at https://bitbucket.org/wishartlab/cyproduct/src/master/.","hji,kes",0,0,0,2,0,software,NA +34038548,Estimage: a webserver hub for the computation of methylation age.,"Methylage is an epigenetic marker of biological age that exploits the correlation between the methylation state of specific CG dinucleotides (CpGs) and chronological age (in years), gestational age (in weeks), cellular age (in cell cycles or as telomere length, in kilobases). Using DNA methylation data, methylage is measurable via the so called epigenetic clocks. Importantly, alterations of the correlation between methylage and age (age acceleration or deceleration) have been stably associated with pathological states and occur long before clinical signs of diseases become overt, making epigenetic clocks a potentially disruptive tool in preventive, diagnostic and also in forensic applications. Nevertheless, methylage dependency from CpGs selection, mathematical modelling, tissue specificity and age range, still makes the potential of this biomarker limited. In order to enhance model comparisons, interchange, availability, robustness and standardization, we organized a selected set of clocks within a hub webservice, EstimAge (Estimate of methylation Age, http://estimage.iac.rm.cnr.it), which intuitively and informatively enables quick identification, computation and comparison of available clocks, with the support of standard statistics.","hji,kes",0,0,0,2,0,software,NA +34039904,Low Referral Rates for Genetic Assessment of Patients with Multiple Adenomas in United Kingdom Bowel Cancer Screening Programs.,"

Background

Approximately one in twenty cases of colorectal cancer are caused by monogenic syndromes. Published guidelines recommend that patients with ten or more adenomas be referred for genetic testing, based on evidence that colorectal cancer risk is associated with adenoma multiplicity.

Objective

The aim of this study was to determine adherence to guidelines on referral for genetic screening in patients with ten or more adenomas.

Design

A cross-sectional study was performed of prospectively collected data from the United Kingdom Bowel Cancer Screening Program between May 2007 and June 2018. Only histologically confirmed adenomas were included. Clinicopathological data were recorded from patient records and referrals to clinical genetics services were ascertained.

Setting

Data were obtained from three centers in London, United Kingdom.

Patients

A total of 17,450 subjects underwent colonoscopy following an abnormal fecal occult blood test.

Main outcome measures

We quantified patients with ten or more adenomas and the proportion referred for genetic screening.

Results

The adenoma detection rate was 50.6% amongst 17,450 patients who underwent colonoscopy (8,831 had one or more adenomas). 347 patients (2.0%) had 10 or more adenomas. Patients with 10 or more adenomas were more likely to be male than those with less than 10 adenomas (76.9% vs. 53.4%; p<0.0001). A family history was collected in 37.8% of the multiple adenoma population. Of 347 patients with 10 or more adenomas, 28 (8.1%) were referred for genetic assessment.

Limitations

All three screening centers were in a single city. No genetic outcome data were available to permit analysis of actual rates of inherited cancer syndromes in this population.

Conclusions

In this study, almost one in fifty patients had ten or more adenomas. Despite guidelines advising genetic testing in this group, referral rates are low. A referral pathway and management strategies should be established to address this patient population. See Video Abstract at http://links.lww.com/DCR/B630.","hji,kes",0,0,0,2,0,NA,NA +34040621,Easymap: A User-Friendly Software Package for Rapid Mapping-by-Sequencing of Point Mutations and Large Insertions.,"Mapping-by-sequencing strategies combine next-generation sequencing (NGS) with classical linkage analysis, allowing rapid identification of the causal mutations of the phenotypes exhibited by mutants isolated in a genetic screen. Computer programs that analyze NGS data obtained from a mapping population of individuals derived from a mutant of interest to identify a causal mutation are available; however, the installation and usage of such programs requires bioinformatic skills, modifying or combining pieces of existing software, or purchasing licenses. To ease this process, we developed Easymap, an open-source program that simplifies the data analysis workflows from raw NGS reads to candidate mutations. Easymap can perform bulked segregant mapping of point mutations induced by ethyl methanesulfonate (EMS) with DNA-seq or RNA-seq datasets, as well as tagged-sequence mapping for large insertions, such as transposons or T-DNAs. The mapping analyses implemented in Easymap have been validated with experimental and simulated datasets from different plant and animal model species. Easymap was designed to be accessible to all users regardless of their bioinformatics skills by implementing a user-friendly graphical interface, a simple universal installation script, and detailed mapping reports, including informative images and complementary data for assessment of the mapping results. Easymap is available at http://genetics.edu.umh.es/resources/easymap; its Quickstart Installation Guide details the recommended procedure for installation.","hji,kes",0,0,0,2,0,software,NA +34047888,"Advancing the Psychometric Study of Human Life History Indicators : K Does Not Measure Life History Speed, but Theory and Evidence Suggest It Deserves Further Attention.","In this article we attend to recent critiques of psychometric applications of life history (LH) theory to variance among humans and develop theory to advance the study of latent LH constructs. We then reanalyze data (n‚Äâ=‚Äâ4,244) previously examined by Richardson et al. (Evolutionary Psychology, 15(1), 2017, https://doi.org/10.1177/1474704916666840 to determine whether (a) previously reported evidence of multidimensionality is robust to the modeling approach employed and (b) the structure of LH indicators is invariant by sex. Findings provide further evidence that a single LH dimension is implausible and that researchers should cease interpreting K-factor scores as empirical proxies for LH speed. In contrast to the original study, we detected a small inverse correlation between mating competition and Super-K that is consistent with a trade-off. Tests of measurement invariance across the sexes revealed evidence of metric invariance (i.e., equivalence of factor loadings), consistent with the theory that K is a proximate cause of its indicators; however, evidence of partial scalar invariance suggests use of scores likely introduces bias when the sexes are compared. We discuss limitations and identify approaches that researchers may use to further evaluate the validity of the K-factor and other applications of LH to human variation.","hji,kes",0,0,0,2,0,NA,NA +34048578,GalaxyHeteromer: protein heterodimer structure prediction by template-based and ab initio docking.,"Protein-protein interactions play crucial roles in diverse biological processes, including various disease progressions. Atomistic structural details of protein-protein interactions may provide important information that can facilitate the design of therapeutic agents. GalaxyHeteromer is a freely available automatic web server (http://galaxy.seoklab.org/heteromer) that predicts protein heterodimer complex structures from two subunit protein sequences or structures. When subunit structures are unavailable, they are predicted by template- or distance-prediction-based modelling methods. Heterodimer complex structures can be predicted by both template-based and ab initio docking, depending on the template's availability. Structural templates are detected from the protein structure database based on both the sequence and structure similarities. The templates for heterodimers may be selected from monomer and homo-oligomer structures, as well as from hetero-oligomers, owing to the evolutionary relationships of heterodimers with domains of monomers or subunits of homo-oligomers. In addition, the server employs one of the best ab initio docking methods when heterodimer templates are unavailable. The multiple heterodimer structure models and the associated scores, which are provided by the web server, may be further examined by user to test or develop functional hypotheses or to design new functional molecules.","hji,kes",0,0,0,2,0,NA,NA +34050758,GEPIA2021: integrating multiple deconvolution-based analysis into GEPIA.,"In 2017, we released GEPIA (Gene Expression Profiling Interactive Analysis) webserver to facilitate the widely used analyses based on the bulk gene expression datasets in the TCGA and the GTEx projects, providing the biologists and clinicians with a handy tool to perform comprehensive and complex data mining tasks. Recently, the deconvolution tools have led to revolutionary trends to resolve bulk RNA datasets at cell type-level resolution, interrogating the characteristics of different cell types in cancer and controlled cohorts became an important strategy to investigate the biological questions. Thus, we present GEPIA2021, a standalone extension of GEPIA, allowing users to perform multiple interactive analysis based on the deconvolution results, including cell type-level proportion comparison, correlation analysis, differential expression, and survival analysis. With GEPIA2021, experimental biologists could easily explore the large TCGA and GTEx datasets and validate their hypotheses in an enhanced resolution. GEPIA2021 is publicly accessible at http://gepia2021.cancer-pku.cn/.","hji,kes",0,0,0,2,0,NA,NA +34058399,BA-plotteR - A web tool for generating Bland-Altman plots and constructing limits of agreement.,"Investigators use Bland-Altman plot (Limits of Agreement plot) to compare two methods measuring the same continuous variable to determine interchangeability or agreement of the methods. The method has evolved to deal with heteroscedastic data and fixed or proportional biases (or both). Although an ordinary Bland-Altman plot can be readily made with various software applications, there is no free, open-source application that is dedicated to producing Bland-Altman plots and constructing limits of agreement for data that do not meet the assumptions of a simple comparison. To fill this gap, we created BA-plotteR, a web-based, open-source, freeware tool created in Shiny/R that is dedicated to creating Bland-Altman plots. We validated the tool using 20 datasets with various data distributions by comparing the output from the tool against manually derived results. The webtool handles data that requires a more complex analysis than is commonly available through commercial statistical programs. Moreover, the automated analysis of the data distribution will guide users and help them to correctly plot and analyse their data. The tool agreed perfectly with manually constructed plots. The Bland-Altman graphing tool provides clinical researchers with a tool that correctly analyzes and graphs studies involved in method comparisons. The tool can be accessed here: https://huygens.science.uva.nl/BA-plotteR.","hji,kes",0,0,0,2,0,software,NA +34058973,GLEANER: a web server for GermLine cycle Expression ANalysis and Epigenetic Roadmap visualization.,"

Background

Germline cells are important carriers of genetic and epigenetic information transmitted across generations in mammals. During the mammalian germline cell development cycle (i.e., the germline cycle), cell potency changes cyclically, accompanied by dynamic transcriptional changes and epigenetic reprogramming. Recently, to understand these dynamic and regulatory mechanisms, multiomic analyses, including transcriptomic and epigenomic analyses of DNA methylation, chromatin accessibility and histone modifications of germline cells, have been performed for different stages in human and mouse germline cycles. However, the long time span of the germline cycle and material scarcity of germline cells have largely limited the understanding of these dynamic characteristic changes. A tool that integrates the existing multiomics data and visualizes the overall continuous dynamic trends in the germline cycle can partially overcome such limitations.

Results

Here, we present GLEANER, a web server for GermLine cycle Expression ANalysis and Epigenetics Roadmap visualization. GLEANER provides a comprehensive collection of the transcriptome, DNA methylome, chromatin accessibility, and H3K4me3, H3K27me3, and H3K9me3 histone modification characteristics in human and mouse germline cycles. For each input gene, GLEANER shows the integrative analysis results of its transcriptional and epigenetic features, the genes with correlated transcriptional changes, and the overall continuous dynamic trends in the germline cycle. We further used two case studies to demonstrate the detailed functionality of GLEANER and highlighted that it can provide valuable clues to the epigenetic regulation mechanisms in the genetic and epigenetic information transmitted during the germline cycle.

Conclusions

To the best of our knowledge, GLEANER is the first web server dedicated to the analysis and visualization of multiomics data related to the mammalian germline cycle. GLEANER is freely available at http://compbio-zhanglab.org/GLEANER .","hji,kes",0,0,0,2,0,software,NA +34061414,COVID-19 spreading across world correlates with C677T allele of the methylenetetrahydrofolate reductase (MTHFR) gene prevalence.,"

Background

Homocysteine assessment has been proposed as a potential predictive biomarker for the severity of COVID-19 infection. The purpose of this review was to analyze the correlation between the prevalence of MTHFR C677 T gene polymorphism and COVID-19 incidence and mortality worldwide.

Methods

Data regarding MTHFR C677 T gene mutation were obtained from the interrogation of the Genome Aggregation Database (genomAD), which is publicly available from the web""https://gnomad.broadinstitute.org."" COVID-19 cases, including prevalence and mortality, were obtained from""https://www.worldometers.info/coronavirus"" 27 August 2020.

Results

There is a clear trend toward the worldwide prevalence of MTHFR 677 T and COVID-19 incidence and mortality. The prevalence of MTHFR 677 T allele in the Latino population, and the incidence and mortality for COVID-19 was higher for this ethnic group than that reported for most other populations globally. Statistical analysis showed a relatively strong correlation between C677 T and death from coronavirus.

Conclusions

Genetic polymorphism of MTHFR C677 T may modulate the incidence and severity of COVID-19 pandemic infection.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +34061826,"Galaxy-ML: An accessible, reproducible, and scalable machine learning toolkit for biomedicine.","Supervised machine learning is an essential but difficult to use approach in biomedical data analysis. The Galaxy-ML toolkit (https://galaxyproject.org/community/machine-learning/) makes supervised machine learning more accessible to biomedical scientists by enabling them to perform end-to-end reproducible machine learning analyses at large scale using only a web browser. Galaxy-ML extends Galaxy (https://galaxyproject.org), a biomedical computational workbench used by tens of thousands of scientists across the world, with a suite of tools for all aspects of supervised machine learning.","hji,kes",0,0,0,2,0,software,not descriptive of resource +34074827,Influence of Sedation on the Detection Rate of Early Cancer and Precancerous Lesions During Diagnostic Upper Gastrointestinal Endoscopies: A Multicenter Retrospective Study.,"

Introduction

The influence of sedation on the endoscopic detection rate of upper gastrointestinal (UGI) early cancer (EC) and precancerous lesions, including high-grade intraepithelial neoplasia (HGIN) and low-grade intraepithelial neoplasia, has not been assessed. The aim of this research is to assess whether the use of sedation can help improve the detection rate of UGI EC and precancerous lesions. The second objective is to evaluate its potential influencing factors.

Methods

The study includes 432,202 patients from a multicenter database from January 2012 to July 2019. Information on endoscopic findings and histology biopsies was obtained from endoscopy quality-control system. Associations of sedation with the detection rate of EC and precancerous lesions were assessed.

Results

The sedation group has a higher detection rate of UGI EC and HGIN compared with the no-sedation group, whereas the detection rate of low-grade intraepithelial neoplasia was similar between the 2 groups. There were more cases examined by using staining, image enhancement, or magnifying techniques in the sedation group (P < 0.001). And, the mean observation time was also longer in the sedation group (P < 0.001). The type 0-IIb esophageal HGIN and EC cases were significantly increased in the sedation group. No significant difference was detected on lesion subtypes for gastric HGIN and EC according to the Paris classification. More gastric HGIN and EC were detected at gastric body in the sedation group (P = 0.001).

Discussion

Sedation may improve the endoscopic detection rate of EC and HGIN in the UGI tract probably through enhancing the use of accessary endoscopic techniques, prolonging observation time, and taking more biopsies in different locations (see Visual Abstract, Supplementary Digital Content 2, http://links.lww.com/AJG/B926).","hji,kes",0,0,0,2,0,NA,NA +34076892,The PREPARE for Your Care program increases advance care planning engagement among diverse older adults with cancer.,"

Background

Advance care planning (ACP) is low among older adults with cancer. In a secondary analysis of randomized trial data, the authors compared the efficacy of the PREPARE for Your Care (PREPARE) website plus an easy-to-read advance directive (AD) with an AD only among older adults with and without cancer.

Methods

Safety net, primary care patients in San Francisco were included if they were 55 years old or older, were English- or Spanish-speaking, and had 2 or more chronic conditions. The authors determined cancer diagnoses by using International Classification of Diseases, Ninth Revision/Tenth Revision codes. The primary outcome was new ACP documentation in the medical record at 15 months; the secondary outcomes were self-reported ACP engagement, ease of use, satisfaction, and depression/anxiety. The authors used mixed effects logistic and linear regression adjusted for prior ACP, health literacy, and clinician, including a cancer interaction term.

Results

Of 986 participants, 220 (22%) had cancer. The mean age was 63 years (SD, 6 years), 61% were women, 81% were of a minority race/ethnicity, 45% were Spanish-speaking, 39% had limited health literacy, and 27% had prior ACP. New ACP documentation was higher in the PREPARE arm versus the AD-only arm among participants with cancer (62% vs 43%; P = .01) and without cancer (38% vs 28%; P = .01), as was ACP engagement in both arms (P < .001), with no interactions by cancer. Ease of use and satisfaction were high, and depression/anxiety was low, with no differences by study arm or by cancer/no cancer.

Conclusions

PREPARE plus an easy-to-read AD increased ACP documentation and engagement among diverse older adults with cancer more than an AD alone, with no increase in depression or anxiety between study arms or by cancer. PREPARE may help to decrease ACP disparities among patients with cancer.

Lay summary

Advance care planning (ACP) is the process of sharing values, goals, and preferences for medical care, but engagement in ACP is low among older adults with cancer. Among 986 English- and Spanish-speaking older adults from a safety net hospital, an interactive, multimedia, web-based ACP program (PREPARE for Your Care at https://prepareforyourcare.org/) plus an easy-to-read advance directive increased ACP documentation and engagement more than an advance directive alone. There were no differences in this increase in ACP between older adults with cancer and older adults without cancer. Also, engaging in ACP did not result in increased depression or anxiety.","hji,kes",0,0,0,2,0,NA,NA +34078106,Impact of Age and Alberta Stroke Program Early Computed Tomography Score 0 to 5 on Mechanical Thrombectomy Outcomes: Analysis From the STRATIS Registry.,"

Background and purpose

This study investigates clinical outcomes after mechanical thrombectomy in adult patients with baseline Alberta Stroke Program Early CT Score (ASPECTS) of 0 to 5.

Methods

We included data from the STRATIS Registry (Systematic Evaluation of Patients Treated With Neurothrombectomy Devices for Acute Ischemic Stroke) from patients who underwent mechanical thrombectomy within 8 hours of symptom onset and had available ASPECTS data adjudicated by an independent core laboratory. Angiographic and clinical outcomes were collected, including successful reperfusion (modified Thrombolysis in Cerebral Infarction ‚â•2b), functional independence (modified Rankin Scale score 0-2), 90-day mortality, and symptomatic intracranial hemorrhage at 24 hours. Outcomes were stratified by ASPECTS scores and age.

Results

Of the 984 patients enrolled, 763 had available ASPECTS data. Of these patients, 57 had ASPECTS of 0 to 5 with a median age of 63 years (interquartile range, 28-100), whereas 706 patients had ASPECTS of 6 to 10 with a median age of 70 years of age (interquartile range, 19-100). Ten patients had ASPECTS of 0 to 3 and 47 patients had ASPECTS of 4 to 5 at baseline. Successful reperfusion was achieved in 85.5% (47/55) in the ASPECTS of 0 to 5 group. Functional independence was achieved in 28.8% (15/52) in the ASPECTS of 0 to 5 versus 59.7% (388/650) in the 6 to 10 group (P<0.001). Mortality rates were 30.8% (16/52) in the ASPECTS of 0 to 5 and 13.4% (87/650) in the 6 to 10 group (P<0.001). sICH rates were 7.0% (4/57) in the ASPECTS of 0 to 5 and 0.9% (6/682) in the 6 to 10 group (P<0.001). No patients aged >75 years with ASPECTS of 0 to 5 (0/12) achieved functional independence versus 44.8% (13/29) of those age ≤65 (P=0.005).

Conclusions

Patients <65 years of age with large core infarction (ASPECTS 0-5) have better rates of functional independence and lower rates of mortality compared with patients >75 years of age. Registration: URL: https://www.clinicaltrials.gov; Unique identifier: NCT02239640.","hji,kes",0,0,0,2,0,NA,NA +34078215,"COVID-19 pandemic: SARS-CoV-2 specific vaccines and challenges, protection via BCG trained immunity, and clinical trials.","Introduction: The coronavirus disease 2019 (COVID-19) pandemic continues to spread worldwide and vaccination remains the most effective approach to control COVID-19. Currently, at least ten COVID-19 vaccines have been authorized under emergency authorization. However, these vaccines still face many challenges.Areas covered: This study reviews the concept and mechanisms of trained immunity induced by the Bacille Calmette Guérin (BCG) vaccine and identifies questions that should be answered before the BCG vaccine could be used to combat COVID-19 pandemic. Moreover, we present for the first time the details of current BCG vaccine clinical trials, which are underway in various countries, to assess its effectiveness in combating the COVID-19 pandemic. Finally, we discuss the challenges of COVID-19 vaccines and opportunities for the BCG vaccine. The literature was found by searching the PubMed (https://pubmed.ncbi.nlm.nih.gov/), Web of Science (https://www.webofknowledge.com), Embase (https://www.embase.com), and CNKI (https://www.cnki.net/) databases. The date was set as the default parameter for each database.Expert opinion: The advantages of the BCG vaccine can compensate for the shortcomings of other COVID-19 vaccines. If the efficacy of the BCG vaccine against COVID-19 is confirmed by these clinical trials, the BCG vaccine may be essential to resolve the challenges faced by COVID-19 vaccines.","hji,kes",0,0,0,2,0,mentions database,not descriptive of resource +34081107,PDBe Aggregated API: Programmatic access to an integrative knowledge graph of molecular structure data.,"

Summary

The PDBe aggregated API is an open-access and open-source RESTful API that provides programmatic access to a wealth of macromolecular structural data and their functional and biophysical annotations through 80+ API endpoints. The API is powered by the PDBe graph database (https://pdbe.org/graph-schema), an open-access integrative knowledge graph that can be used as a discovery tool to answer complex biological questions.

Availability and implementation

The PDBe aggregated API provides up-to-date access to the PDBe graph database, which has weekly releases with the latest data from the Protein Data Bank, integrated with updated annotations from UniProt, Pfam, CATH, SCOP and the PDBe-KB partner resources. The complete list of all the available API endpoints and their descriptions are available at https://pdbe.org/graph-api. The source code of the Python 3.6+ API application is publicly available at https://gitlab.ebi.ac.uk/pdbe-kb/services/pdbe-graph-api.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,interface to another database,tool for resource; not descriptive of resource +34085565,Patterns of Nutrient Intake in Relation to Gastric Cancer: A Case Control Study.,"Gastric Cancer (GC) is the most common cancer among Iranian men. We conducted a case-control study to investigate the association between patterns of nutrient intake and the risk of GC in Iran. We enrolled 178 GC patients and 271 controls matched for age and sex. We collected dietary intakes using a validated diet history questionnaire. We performed factor analysis on 28 nutrients using multivariate logistic regression models on tertiles of factor scores and estimated odds ratios (OR) and 95% confidence intervals (95% CI). We identified three nutrient patterns. The first pattern included pantothenic acid, riboflavin, zinc, animal protein, and calcium. Selenium, thiamin, carbohydrate, vegetable protein, niacin and low intake of vitamin E loaded the second pattern, and the third pattern was abundant in fiber, carotene, vitamin C and A. We found no significant association between GC and any of the dietary patterns. However, in the first patterns, men in the highest tertile had significantly higher odds of GC than the lowest (OR = 2.15, 95% CI: 1.13-4.09, p trend = 0.02). A dietary pattern loaded by animal products may increase the risk of GC among Iranian men. Larger studies are required to approve these findings in overall and in different subgroups.Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1931697.","hji,kes",0,0,0,2,0,NA,NA +34090324,Reproducible and accessible analysis of transposon insertion sequencing in Galaxy for qualitative essentiality analyses.,"

Background

Significant progress has been made in advancing and standardizing tools for human genomic and biomedical research. Yet, the field of next-generation sequencing (NGS) analysis for microorganisms (including multiple pathogens) remains fragmented, lacks accessible and reusable tools, is hindered by local computational resource limitations, and does not offer widely accepted standards. One such ""problem areas"" is the analysis of Transposon Insertion Sequencing (TIS) data. TIS allows probing of almost the entire genome of a microorganism by introducing random insertions of transposon-derived constructs. The impact of the insertions on the survival and growth under specific conditions provides precise information about genes affecting specific phenotypic characteristics. A wide array of tools has been developed to analyze TIS data. Among the variety of options available, it is often difficult to identify which one can provide a reliable and reproducible analysis.

Results

Here we sought to understand the challenges and propose reliable practices for the analysis of TIS experiments. Using data from two recent TIS studies, we have developed a series of workflows that include multiple tools for data de-multiplexing, promoter sequence identification, transposon flank alignment, and read count repartition across the genome. Particular attention was paid to quality control procedures, such as determining the optimal tool parameters for the analysis and removal of contamination.

Conclusions

Our work provides an assessment of the currently available tools for TIS data analysis. It offers ready to use workflows that can be invoked by anyone in the world using our public Galaxy platform ( https://usegalaxy.org ). To lower the entry barriers, we have also developed interactive tutorials explaining details of TIS data analysis procedures at https://bit.ly/gxy-tis .","hji,kes",0,0,0,2,0,NA,not descriptive of resource +34092780,"Transcultural Adaptation of Tibetan Nursing Trainees: A Case Study of ""9+3"" Vocational Technical Students in Sichuan Province, China.","BACKGROUND Nursing education is an important part of the ""9+3"" vocational education program led by Sichuan Province. In the internship stage, nursing students of Tibetan ethnicity may have problems of intercultural adaptation in the process of getting along with patients, which may affect the effective nursing outcome. The purpose of this study was to clarify the current situation of transcultural adaptation of Tibetan trainee nurses and to provide more theoretical support and guidance. MATERIAL AND METHODS We collected 237 valid survey questionnaires, based on Ward's acculturation process model, from a total of 363 Tibetan trainee nurses in the ""9+3"" free vocational education program in Chengdu, Luzhou, and Nanchong of Sichuan Province. The SPSSAU project (2020), an online application software retrieved from https://www.spssau.com, was used for data coding and archiving. RESULTS The results of questionnaire and data analysis showed that the overall level of transcultural adaptation of Tibetan trainee nurses was that the number of people with poor adaptation was slightly higher than those with good adaptation, and most Tibetan trainee nurses were in the middle level. Meanwhile, sociocultural adaptation was better than psychological adaptation. There were no statistically significant differences among the 4 grouping variables: gender, student home region, the city where the internship hospital was located, and whether they were from a single-child family or not. CONCLUSIONS The results revealed that there was still transcultural maladjustment among Tibetan nurses in the internship stage, and the psychological maladjustment was more obvious than the sociocultural maladjustment. We provide countermeasures and suggestions to solve the problems of transcultural adaptation reflected in the research.","hji,kes",0,0,0,2,0,NA,NA +34096110,'What matters to you?'-a qualitative study on the views of nursing home residents with dementia regarding the health care they receive.,"

Aims and objective

This study's aim is to examine what matters to nursing home residents with dementia by exploring their perceptions of nursing home health care through the conceptual lens of person-centred care.

Background

Dementia is a major contributor to nursing home placement. To understand the meaning of living with dementia, the inclusion of persons with dementia in research studies is essential.

Methods

In total, 35 in-depth qualitative interviews were conducted with people who have dementia and live in nursing homes. A thematic analysis was applied to analyse the data. Checklist for qualitative studies: Consolidated Criteria for Reporting Qualitative Research (COREQ) https://www.equator-network.org/reporting-guidelines/coreq/ RESULTS: The analysis revealed one overarching theme with four sub-themes. Different matchings of person-centred care and routines in health care being the overarching theme. The four sub-themes were as follows: (a) understanding of the interplay between disabilities and ageing; (b) participating based on one's own preferences and needs; (c) incongruence between the person with dementia's preferences and needs and health-care support; and (d) working conditions: the relationship between residents and health-care providers. Despite the substantive focus of researchers on person-centred care and the positive impact on the nursing home health care of those who receive it, the results showed that nursing home residents still want more person-centred care.

Conclusions

The results indicate that the incongruence between general routines and individual preferences and needs, as well as the demand to operationalise the person-centred dimensions of health-care behaviour in nursing homes, must be resolved. Health care in nursing homes must focus on enabling residents to participate in daily activities and sustain their personhood and sense of self.

Relevance to clinical practice

Based on the residents' statements, the results contribute to the fields of dementia education, health-care provision and policy-making and may be used to achieve person-centredness and governance.","hji,kes",0,0,0,2,0,NA,NA +34096320,Treated HIV Infection and Progression of Carotid Atherosclerosis in Rural Uganda: A Prospective Observational Cohort Study.,"Background Although ≈70% of the world's population of people living with HIV reside in sub-Saharan Africa, there are minimal prospective data on the contributions of HIV infection to atherosclerosis in the region. Methods and Results We conducted a prospective observational cohort study of people living with HIV on antiretroviral therapy >40 years of age in rural Uganda, along with population-based comparators not infected with HIV. We collected data on cardiovascular disease risk factors and carotid ultrasound measurements annually. We fitted linear mixed effects models, adjusted for cardiovascular disease risk factors, to estimate the association between HIV serostatus and progression of carotid intima media thickness (cIMT). We enrolled 155 people living with HIV and 154 individuals not infected with HIV and collected cIMT images at 1045 visits during a median of 4 annual visits per participant (interquartile range 3-4, range 1-5). Age (median 50.9 years) and sex (49% female) were similar by HIV serostatus. At enrollment, there was no difference in mean cIMT by HIV serostatus (0.665 versus 0.680 mm, P=0.15). In multivariable models, increasing age, blood pressure, and non-high-density lipoprotein cholesterol were associated with greater cIMT (P<0.05), however change in cIMT per year was also no different by HIV serostatus (0.004 mm/year for HIV negative [95% CI, 0.001-0.007 mm], 0.006 mm/year for people living with HIV [95% CI, 0.003-0.008 mm], HIV×time interaction P=0.25). Conclusions In rural Uganda, treated HIV infection was not associated with faster cIMT progression. These results do not support classification of treated HIV infection as a risk factor for subclinical atherosclerosis progression in rural sub-Saharan Africa. Registration URL: https://www.ClinicalTrials.gov; Unique identifier: NCT02445079.","hji,kes",0,0,0,2,0,NA,NA +34109245,Lung volume reduction in real clinical practice.,Pragmatic studies and capturing routine care clinical data in registration databases are important to further guide and optimise treatments in the future https://bit.ly/3el1lh7.,"hji,kes",0,0,0,2,0,NA,NA +34111367,"ActionHealthNYC: Effectiveness of a Health Care Access Program for the Uninsured, 2016‒2017.","Objectives. To evaluate the effectiveness of a novel health care access program (ActionHealthNYC) for uninsured immigrants. Methods. The evaluation was conducted as a randomized controlled trial in New York City from May 2016 through June 2017. Using baseline and follow-up survey data, we assessed health care access, patient experience, and health status. Results.At baseline, 25% of participants had a regular source of care; two thirds had visited a doctor in the past year and reported 2.5 visits in the past 12 months, on average. Nine to 12 months later, intervention participants were 1.2 times more likely to report having a primary care provider (58% vs 46%), were 1.2 times more likely to have seen a doctor in the past 9 months (91% vs 77%), and had 1.5 times more health care visits (4.1 vs 2.9) compared with control participants. Conclusions. ActionHealthNYC increased health care access among program participants. Public Health Implications. State and local policymakers should build on the progress that has been made over the last decade to expand and improve access to health care for uninsured immigrants. (Am J Public Health. Published online ahead of print June 10, 2021: e1-e10. https://doi.org/10.2105/AJPH.2021.306271).","hji,kes",0,0,0,2,0,NA,NA +34122055,Impact of Total Epinephrine Dose on Long Term Neurological Outcome for Cardiac Arrest Patients: A Cohort Study.,"Introduction: Although epinephrine is universally acknowledged to increase return of spontaneous circulation (ROSC) after cardiac arrest, its balanced effects on later outcomes remain uncertain, causing potential harm during post-resuscitation phase. Recent studies have questioned the efficacy and potential deleterious effects of epinephrine on long-term survival and neurological outcomes, despite that the adverse relationship between epinephrine dose and outcome can be partially biased by longer CPR duration and underlying comorbidities. This study explored the long-term effect of epinephrine when used in a cohort of patients that underwent cardiac arrest during cardiopulmonary resuscitation. Methods: The data were originally collected from a retrospective institutional database from January 2007 to December 2015 and are now available on Dryad (via: https://doi.org/10.5061/dryad.qv6fp83). Use of epinephrine was coded by dose (<2 mg, 2 mg, 3-4 mg, ≥5 mg). A favorable neurological outcome was defined using a Cerebral Performance Category (CPC) 1 or 2. The association between epinephrine dosing and 3-months neurological outcome was analyzed by univariate analysis and multivariate logistic regression. Results: Univariate and multivariate analysis demonstrated a negative association between total epinephrine dose and neurological outcome. Of the 373 eligible patients, 92 received less than 2 mg of epinephrine, 60 received 2 mg, 97 received 3-4 mg and 124 received more than 5 mg. Compared to patients who received less than 2 mg of epinephrine, the adjusted odds ratio (OR) of a favorable neurological outcome was 0.8 (95% confidence interval [CI]: 0.38-1.68) for 2 mg of epinephrine, 0.43 (95% confidence interval [CI]: 0.21-0.89) for 3-4 mg of epinephrine and 0.40 (95% confidence interval [CI]: 0.17-0.96) for more than 5 mg of epinephrine. Conclusion: In this cohort of patients who achieved ROSC, total epinephrine dosing during resuscitation was associated with a worse neurological outcome three months after cardiac arrest, after adjusting other confounding factors. Further researches are needed to investigate the long-term effect of epinephrine on cardiac arrest patients.","hji,kes",0,0,0,2,0,NA,NA +34122523,Development and Validation of a Hypoxia-Related Signature for Predicting Survival Outcomes in Patients With Bladder Cancer.,"

Objectives

This study aimed to develop and validate a hypoxia signature for predicting survival outcomes in patients with bladder cancer.

Methods

We downloaded the RNA sequence and the clinicopathologic data of the patients with bladder cancer from The Cancer Genome Atlas (TCGA) (https://portal.gdc.cancer.gov/repository?facetTab=files) and the Gene Expression Omnibus (GEO) (https://www.ncbi.nlm.nih.gov/geo/) databases. Hypoxia genes were retrieved from the Molecular Signatures Database (https://www.gsea-msigdb.org/gsea/msigdb/index.jsp). Differentially expressed hypoxia-related genes were screened by univariate Cox regression analysis and Lasso regression analysis. Then, the selected genes constituted the hypoxia signature and were included in multivariate Cox regression to generate the risk scores. After that, we evaluate the predictive performance of this signature by multiple receiver operating characteristic (ROC) curves. The CIBERSORT tool was applied to investigate the relationship between the hypoxia signature and the immune cell infiltration, and the maftool was used to summarize and analyze the mutational data. Gene-set enrichment analysis (GSEA) was used to investigate the related signaling pathways of differentially expressed genes in both risk groups. Furthermore, we developed a model and presented it with a nomogram to predict survival outcomes in patients with bladder cancer.

Results

Eight genes (AKAP12, ALDOB, CASP6, DTNA, HS3ST1, JUN, KDELR3, and STC1) were included in the hypoxia signature. The patients with higher risk scores showed worse overall survival time than the ones with lower risk scores in the training set (TCGA) and two external validation sets (GSE13507 and GSE32548). Immune infiltration analysis showed that two types of immune cells (M0 and M1 macrophages) had a significant infiltration in the high-risk group. Tumor mutation burden (TMB) analysis showed that the risk scores between the wild types and the mutation types of TP53, MUC16, RB1, and FGFR3 were significantly different. Gene-Set Enrichment Analysis (GSEA) showed that immune or cancer-associated pathways belonged to the high-risk groups and metabolism-related signal pathways were enriched into the low-risk group. Finally, we constructed a predictive model with risk score, age, and stage and validated its performance in GEO datasets.

Conclusion

We successfully constructed and validated a novel hypoxia signature in bladder cancer, which could accurately predict patients' prognosis.","hji,kes",0,0,0,2,0,NA,NA +34122663,Context aware benchmarking and tuning of a TByte-scale air quality database and web service.,"We present context-aware benchmarking and performance engineering of a mature TByte-scale air quality database system which was created by the Tropospheric Ozone Assessment Report (TOAR) and contains one of the world's largest collections of near-surface air quality measurements. A special feature of our data service https://join.fz-juelich.de is on-demand processing of several air quality metrics directly from the TOAR database. As a service that is used by more than 350 users of the international air quality research community, our web service must be easily accessible and functionally flexible, while delivering good performance. The current on-demand calculations of air quality metrics outside the database together with the necessary transfer of large volume raw data are identified as the major performance bottleneck. In this study, we therefore explore and benchmark in-database approaches for the statistical processing, which results in performance enhancements of up to 32%.","hji,kes",0,0,0,2,0,"describes a new feature to a biodata(?) resource.It is a very specific feature, and not sure if it is life sciecnes either.",not life sci +34124628,Uncovering transmission patterns of COVID-19 outbreaks: A region-wide comprehensive retrospective study in Hong Kong.,"

Background

Given the dynamism and heterogeneity of COVID-19 transmission patterns, determining the most effective yet timely strategies for specific regions remains a severe challenge for public health decision-makers.

Methods

In this work, we proposed a spatiotemporal connectivity analysis method for discovering transmission patterns across geographic locations and age-groups throughout different COVID-19 outbreak phases. First, we constructed the transmission networks of the confirmed cases during different phases by considering the spatiotemporal connectivity of any two cases. Then, for each case and those cases immediately pointed from it, we characterized the corresponding cross-district/population transmission pattern by counting their district-to-district and age-to-age occurrences. By summating the cross-district/population transmission patterns of all cases during a given period, we obtained the aggregated cross-district and cross-population transmission patterns.

Findings

We conducted a region-wide comprehensive retrospective study in Hong Kong based on the complete data report of COVID-19 cases, covering all 18 districts between January 23, 2020, and January 8, 2021 (https://data.gov.hk/en-data/dataset/hk-dh-chpsebcddr-novel-infectious-agent). The spatiotemporal connectivity analysis clearly unveiled the quantitative differences among various outbreak waves in their transmission scales, durations, and patterns. Moreover, for the statistically similar waves, their cross-district/population transmission patterns could be quite different (e.g., the cross-district transmission of the fourth wave was more diverse than that of the third wave, while the transmission over age-groups of the fourth wave was more concentrated than that of the third wave). At an overall level, super-spreader individuals (highly connected cases in the transmission networks) were usually concentrated in only a few districts (2 out of 18 in our study) or age-groups (3 out of 11 in our study).

Interpretation

With the discovered cross-district or cross-population transmission patterns, all of the waves of COVID-19 outbreaks in Hong Kong can be systematically scrutinized. Among all districts, quite a few (e.g., the Yau Tsim Mong district) were instrumental in spreading the virus throughout the pandemic. Aside from being exceptionally densely populated, these districts were also social-economic centers. With a variety of situated public venues, such as restaurants and singing/dancing clubs, these districts played host to all kinds of social gathering events, thereby providing opportunities for widespread and rapid transmission of the virus. Thus, these districts should be given the highest priority when deploying district-specific social distancing or intervention strategies, such as lockdown and stringent mandatory coronavirus testing for identifying and obstructing the chain of transmission. We also observed that most of the reported cases and the highly connected cases were middle-aged and elderly people (40- to 69-year-olds). People in these age-groups were active in various public places and social activities, and thus had high chances of being infected by or infecting others.

Funding

General research fund of the Hong Kong research grants council.","hji,kes",0,0,0,2,0,NA,NA +34126844,Incidence and prevalence of psychogenic nonepileptic seizures (functional seizures): a systematic review and an analytical study.,"

Aim

Psychogenic nonepileptic seizures (PNES) or functional seizures are universal phenomena. However, data on their epidemiology is limited. The aim of the current study was to review the literature on the epidemiology of PNES and to provide analytical estimates of its incidence and prevalence based on the direct data that are available from previous studies on PNES.

Methods

The methods of this work had two parts: (1) MEDLINE, PsycINFO, and Scopus from inception to 19 October 2019 were systematically searched. (2) The analytical study of the incidence and prevalence of PNES was performed, based on the following data from previous studies: incidence of PNES, duration of PNES before making a diagnosis, outcome and mortality of PNES.

Results

The search strategy yielded five articles; three were on the incidence and two on the prevalence. In the analytical part of the study, the incidence of PNES was calculated to be 3.1 (95% Confidence Interval: 1.1-5.1) per 100,000 population per year. The calculated prevalence rate of PNES in 2019 was 108.5 (95% Confidence Interval: 39.2-177.8) per 100,000 population, in the USA.

Conclusion

While, the generalizability of these calculated incidence and prevalence rates to other places in the world is limited, they give us a reasonable hint that PNES is a common condition and the prevalence is much more than that it was thought before.Supplemental data for this article is available online at https://doi.org/10.1080/00207454.2021.1942870.","hji,kes",0,0,0,2,0,NA,NA +34132752,"SANS serif: alignment-free, whole-genome based phylogenetic reconstruction.","

Summary

SANS serif is a novel software for alignment-free, whole-genome based phylogeny estimation that follows a pangenomic approach to efficiently calculate a set of splits in a phylogenetic tree or network.

Availability and implementation

Implemented in C‚Äâ++ and supported on Linux, MacOS, and Windows. The source code is freely available for download at https://gitlab.ub.uni-bielefeld.de/gi/sans.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,software,NA +34132767,VoroContacts: a tool for the analysis of interatomic contacts in macromolecular structures.,"

Summary

VoroContacts is a versatile tool for computing and analyzing contact surface areas (CSAs) and solvent accessible surface areas (SASAs) for 3‚ÄâD structures of proteins, nucleic acids and their complexes at the atomic resolution. CSAs and SASAs are derived using Voronoi tessellation of 3‚ÄâD structure, represented as a collection of atomic balls. VoroContacts web server features a highly configurable query interface, which enables on-the-fly analysis of contacts for selected set of atoms and allows filtering interatomic contacts by their type, surface areas, distance between contacting atoms and sequence separation between contacting residues. The VoroContacts functionality is also implemented as part of the standalone Voronota package, enabling batch processing.

Availability and implementation

https://bioinformatics.lt/wtsam/vorocontacts.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +34133393,Evaluation of Bowel Function after Surgical Treatment for Intestinal Endometriosis: A Prospective Study.,"

Background

Defecation symptoms related to intestinal deep infiltrative endometriosis are considered to be caused by anatomical and functional disorders, and are likely linked to the course of the disease and surgical treatment.

Objective

The primary aim of this study was to assess bowel function before and after intestinal deep infiltrative endometriosis surgery. Secondarily, we sought to correlate defecatory symptoms with preoperative risk factors.

Design/settings

This is a single center prospective cohort study, using the Low Anterior Resection Syndrome Score to evaluate bowel function 4 weeks before, as well as at 6 months and one year after surgery. Wilcoxon signed rank test and logistic multiple regression analyses were performed to compare preoperative and postoperative scores. For all of the comparisons, the level of significance was set at <0.05.

Patients

Thirty-seven adult female patients who underwent intestinal resection for deep infiltrative endometriosis between 2015 and 2017 were included.

Main outcome measures

The primary outcome was bowel function appraisement in deep infiltrative endometriosis intestinal surgery.

Results

During the preoperative evaluation, 48.6% of patients reported Low Anterior Resection Syndrome Score ≥21. This group presented a mean score of 17.9 ± 13.7, with a median of 20 and a range of 5 - 30. After one year, the mean score was decreased to 9.6 ± 11.1, with a median of 4 and a range of 0 - 22. A significant difference was detected when comparing the post- and preoperative scores (p=0.0006). It was also reported improvements in defecatory symptoms such as reduced fecal incontinence for flatus (p= 0.004) and liquid stools (p=0.014). The clustering of stools (p=0.005) and fecal urgency (p=0.001) also improved one year after surgery. The preoperative multiple logistic regression showed that dyschezia was the only independent variable associated with bowel symptoms.

Limitations

Despite it is a well-documented prospective study the data presented has a relatively small population.

Conclusions

This study provides evidence that intestinal deep infiltrative endometriosis surgery improves bowel function and has a positive impact on evacuation symptoms. See Video Abstract at http://links.lww.com/DCR/B534 .","hji,kes",0,0,0,2,0,NA,NA +34136652,Mixology: a tool for calculating required masses and volumes for laboratory solutions.,"Laboratory work often requires making up solutions with defined concentrations of various components. Mixology is a tool we have created to simplify calculation of the masses and volumes required to obtain particular concentrations. It operates with many kinds of volumetric, mass and concentration units, including conversion between molarity- and mass-based concentrations using molecular masses retrieved from the Chemical Entities of Biological Interest (ChEBI) database. Mixology can be accessed at https://mixology.science.","hji,kes",0,0,0,2,0,software,NA +34142845,"First report of Xanthomonas campestris pv. campestris as the causal agent of necrotic leaf spot in Phaseolus vulgaris at Puebla, Mexico.","Beans are the most cultivated legume in the world. In Mexico, it is the second most important crop after corn (FAO 2020; SIAP 2020). Bean plants ""Flor de Mayo M38"" variety were affected by a foliar disease during the agricultural cycle 2019 in Puebla-Mexico (19°02'46.6"" LN and 98°05'15.6"" LO). Necrotic V- shaped lesions were observed on the margins of the leaves surrounded by yellow halos followed by foliar necrosis, affecting 40% of the crop. In Mexico this variety of cultivars is in great demand for local consumption and generates income in foreign currency (Castellanos et al. 1997). Sampling was carried out on 50 plants ""Flor de Mayo M38"" variety, with necrotic leaf symptoms from ten plots of one hectare. Samples were cut into pieces (5 mm), disinfested with 1% hypochlorite 3 min, and washed with sterile distilled water. Subsequently, samples were dried on sterile paper and placed on Petri plates containing yeast extract calcium carbonate dextrose agar (YDC) medium and kept at 36°C for 3 days. Colonies of ten typical bacteria isolated from all symptomatic plants were Gram (-), small and uniform in size with rounded edges, yellow, convex with entire borders and mucoid appearance on YDC. Bacteria did not grow on 0.1% triphenyl tetrazolium chloride amended casamino acid, peptone, and glucose medium (CPG). Biochemical tests showed that isolates did not reduce nitrate to nitrites, had positive catalase and starch hydrolysis, while the Kovac oxidase test was negative (Schaad and White 1974). Genus identity of the representative isolate Xcf1-APJR, was confirmed by 16S rRNA encoding gene partial sequencing, using universal primers 518F (5'-CCAGCAGCCGCGGTAATACG-3') and 800R (5'-TACCAGGGTATCTAATCC-3') (Halim et al. 2020). BLASTn alignments against the nucleotide collection were 100% identical to Xanthomonas sequences including Xanthomonas campestris pv. campestris strains NZ_AP019684.1, CP025750.1, and MN108237.1. The 1,418 bp sequence was deposited in the GenBank database under accession number MT645246. The identification of species/pathovar was accomplished by serological methods using a polyclonal antiserum specific for X. campestris pv. campestris (Popovic ́ et al. 2013) with the DAS-ELISA commercial kit (catalog number 07122C/096, LOEWE Biochemica GmbH, Germany). The pathogenicity test was carried out on 50 healthy bean plants from the ""Flor de Mayo M38"" variety. Bacterial culture incubated at 28°C for 48 h in YDC medium was used to prepare the bacterial suspension (108 CFU mL-1). The first two lower leaves of 30-day-old plants were inoculated by sprinkling. Ten plants sprayed with sterile distilled water were used as negative control. All plants were kept for 20 days in greenhouse at 18-26°C and relative humidity of 60%. After seven days, chlorotic lesions developed on all inoculated plants that became necrotic from 14 days after inoculation (dai). Necrotic leaf spots merged at 14 dai to form necrotic areas of more than 20 mm in diameter, reaching total necrosis of the leaf tissue at 20 dai and were similar to the symptoms observed in the field. Koch's postulates were confirmed by the reisolation of Xcf1-APJR strain, which presented the same colony morphology, partial sequence, and polyclonal specific detection. This is the first report of this pathogen causing necrotic leaf spot in beans from the ""Flor de Mayo M38"" variety in Puebla-Mexico. The author(s) declare no conflict of interest. References: FAO. 2020. FAOSTAT. Food and Agriculture Data. http://www.fao.org/faostat/en/#home/. SIAP. 2020. Atlas Agroalimentario. https://www.gob.mx/siap/. Castellanos, J. Z., et al. 1997. Arch. Latinoam. Nutr. 47:163. Schaad, N. W., and White, W. C. 1974. Phytopathology. 64:876. https://doi.org/10.1094/Phyto-64-876 Halim, R. A., et al. 2020. HAYATI J. Biosciences. 27:215. https://doi.org/10.4308/hjb.27.3.215 Popovic ́, T., et al. 2013. Plant Dis. 97:418. https://doi.org/10.1094/PDIS-05-12-0506-PDN.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +34147079,ORFik: a comprehensive R toolkit for the analysis of translation.,"

Background

With the rapid growth in the use of high-throughput methods for characterizing translation and the continued expansion of multi-omics, there is a need for back-end functions and streamlined tools for processing, analyzing, and characterizing data produced by these assays.

Results

Here, we introduce ORFik, a user-friendly R/Bioconductor API and toolbox for studying translation and its regulation. It extends GenomicRanges from the genome to the transcriptome and implements a framework that integrates data from several sources. ORFik streamlines the steps to process, analyze, and visualize the different steps of translation with a particular focus on initiation and elongation. It accepts high-throughput sequencing data from ribosome profiling to quantify ribosome elongation or RCP-seq/TCP-seq to also quantify ribosome scanning. In addition, ORFik can use CAGE data to accurately determine 5'UTRs and RNA-seq for determining translation relative to RNA abundance. ORFik supports and calculates over 30 different translation-related features and metrics from the literature and can annotate translated regions such as proteins or upstream open reading frames (uORFs). As a use-case, we demonstrate using ORFik to rapidly annotate the dynamics of 5' UTRs across different tissues, detect their uORFs, and characterize their scanning and translation in the downstream protein-coding regions.

Conclusion

In summary, ORFik introduces hundreds of tested, documented and optimized methods. ORFik is designed to be easily customizable, enabling users to create complete workflows from raw data to publication-ready figures for several types of sequencing data. Finally, by improving speed and scope of many core Bioconductor functions, ORFik offers enhancement benefiting the entire Bioconductor environment.

Availability

http://bioconductor.org/packages/ORFik .","hji,kes",0,0,0,2,0,software,NA +34153027,pyTFM: A tool for traction force and monolayer stress microscopy.,"Cellular force generation and force transmission are of fundamental importance for numerous biological processes and can be studied with the methods of Traction Force Microscopy (TFM) and Monolayer Stress Microscopy. Traction Force Microscopy and Monolayer Stress Microscopy solve the inverse problem of reconstructing cell-matrix tractions and inter- and intra-cellular stresses from the measured cell force-induced deformations of an adhesive substrate with known elasticity. Although several laboratories have developed software for Traction Force Microscopy and Monolayer Stress Microscopy computations, there is currently no software package available that allows non-expert users to perform a full evaluation of such experiments. Here we present pyTFM, a tool to perform Traction Force Microscopy and Monolayer Stress Microscopy on cell patches and cell layers grown in a 2-dimensional environment. pyTFM was optimized for ease-of-use; it is open-source and well documented (hosted at https://pytfm.readthedocs.io/) including usage examples and explanations of the theoretical background. pyTFM can be used as a standalone Python package or as an add-on to the image annotation tool ClickPoints. In combination with the ClickPoints environment, pyTFM allows the user to set all necessary analysis parameters, select regions of interest, examine the input data and intermediary results, and calculate a wide range of parameters describing forces, stresses, and their distribution. In this work, we also thoroughly analyze the accuracy and performance of the Traction Force Microscopy and Monolayer Stress Microscopy algorithms of pyTFM using synthetic and experimental data from epithelial cell patches.","hji,kes",0,0,0,2,0,software,NA +34164199,"Construction, validation and, visualization of a web-based nomogram for predicting the overall survival and cancer-specific survival of leiomyosarcoma patients with lung metastasis.","

Background

This study sought to assess the prognostic factors for leiomyosarcoma (LMS) patients with lung metastasis and construct web-based nomograms to predict overall survival (OS) and cancer-specific survival (CSS).

Method

Patients diagnosed with LMS combined with lung metastasis between 2010 and 2016 were identified in the Surveillance, Epidemiology, and End Results (SEER) database. The patients were randomly divided into a training set and a testing set. The X-tile analysis provides the best age and tumor size cut-off point, and changes continuous variables into categorical variables. The independent prognostic factors were determined by Cox regression analysis, and 2 nomograms were established. Receiver operating characteristic curves and calibration curves were used to evaluate the nomograms. Based on the nomograms, 2 web-based nomograms were established.

Results

Two hundred and twenty-eight cases were included in the OS nomogram construction, and were randomly divided into a training set (n=160) and a validation set (n=68). Age, T stage, bone metastasis, surgery, chemotherapy, marital status, tumor size, and tumor site were found to be correlated with OS. One hundred and eighty-three cases were enrolled in the CSS nomogram construction, and randomly divided into a training set (n=129) and a validation set (n=54). Age, bone metastasis, surgery, chemotherapy, tumor size, and tumor site were found to be correlated with CSS. Two nomograms were established to predict OS and CSS. In the training set, the areas under the curve of the nomogram for predicting 1-, 2-, and 3-year OS were 0.783, 0.830, and 0.832, respectively, and those for predicting 1-, 2-, and 3-year CSS were 0.889, 0.777, and 0.884, respectively. Two web-based nomograms were established to predict OS (https://wenn23.shinyapps.io/lmslmosapp/), and CSS (https://wenn23.shinyapps.io/lmslmcssapp/).

Conclusion

The developed web-based nomogram is a useful tool for accurately analyzing the prognosis of LMS patients with lung metastasis, and could help clinical doctors to make personalized clinical decisions.","hji,kes",0,0,0,2,0,NA,NA +34164647,DUI: the drug use insights web server.,"

Motivation

Substance abuse constitutes one of the major contemporary health epidemics. Recently, the use of social media platforms has garnered interest as a novel source of data for drug addiction epidemiology. Often however, the language used in such forums comprises slang and jargon. Currently, there are no publicly available resources to automatically analyse the esoteric language-use in the social media drug-use sub-culture. This lacunae introduces critical challenges for interpreting, sensemaking and modeling of addiction epidemiology using social media.

Results

Drug-Use Insights (DUI) is a public and open-source web application to address the aforementioned deficiency. DUI is underlined by a hierarchical taxonomy encompassing 108 different addiction related categories consisting of over 9,000 terms, where each category encompasses a set of semantically related terms. These categories and terms were established by utilizing thematic analysis in conjunction with term embeddings generated from 7,472,545 Reddit posts made by 1,402,017 redditors. Given post(s) from social media forums such as Reddit and Twitter, DUI can be used foremost to identify constituent terms related to drug use. Furthermore, the DUI categories and integrated visualization tools can be leveraged for semantic- and exploratory analysis. To the best of our knowledge, DUI utilizes the largest number of substance use and recovery social media posts used in a study and represents the first significant online taxonomy of drug abuse terminology.

Availability

The DUI web server and source code are available at: http://haddock9.sfsu.edu/insight/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +34165340,Application of Text Mining in Risk Assessment of Chemical Mixtures: A Case Study of Polycyclic Aromatic Hydrocarbons (PAHs).,"

Background

Cancer risk assessment of complex exposures, such as exposure to mixtures of polycyclic aromatic hydrocarbons (PAHs), is challenging due to the diverse biological activities of these compounds. With the help of text mining (TM), we have developed TM tools-the latest iteration of the Cancer Risk Assessment using Biomedical literature tool (CRAB3) and a Cancer Hallmarks Analytics Tool (CHAT)-that could be useful for automatic literature analyses in cancer risk assessment and research. Although CRAB3 analyses are based on carcinogenic modes of action (MOAs) and cover almost all the key characteristics of carcinogens, CHAT evaluates literature according to the hallmarks of cancer referring to the alterations in cellular behavior that characterize the cancer cell.

Objectives

The objective was to evaluate the usefulness of these tools to support cancer risk assessment by performing a case study of 22 European Union and U.S. Environmental Protection Agency priority PAHs and diesel exhaust and a case study of PAH interactions with silica.

Methods

We analyzed PubMed literature, comprising 57,498 references concerning priority PAHs and complex PAH mixtures, using CRAB3 and CHAT.

Results

CRAB3 analyses correctly identified similarities and differences in genotoxic and nongenotoxic MOAs of the 22 priority PAHs and grouped them according to their known carcinogenic potential. CHAT had the same capacity and complemented the CRAB output when comparing, for example, benzo[a]pyrene and dibenzo[a,l]pyrene. Both CRAB3 and CHAT analyses highlighted potentially interacting mechanisms within and across complex PAH mixtures and mechanisms of possible importance for interactions with silica.

Conclusion

These data suggest that our TM approach can be useful in the hazard identification of PAHs and mixtures including PAHs. The tools can assist in grouping chemicals and identifying similarities and differences in carcinogenic MOAs and their interactions. https://doi.org/10.1289/EHP6702.","hji,kes",0,0,0,2,0,NA,NA +34165986,BlackSheep: A Bioconductor and Bioconda Package for Differential Extreme Value Analysis.,"Unbiased assays such as shotgun proteomics and RNA-seq provide high-resolution molecular characterization of tumors. These assays measure molecules with highly varied distributions, making interpretation and hypothesis testing challenging. Samples with the most extreme measurements for a molecule can reveal the most interesting biological insights yet are often excluded from analysis. Furthermore, rare disease subtypes are, by definition, underrepresented in cancer cohorts. To provide a strategy for identifying molecules aberrantly enriched in small sample cohorts, we present BlackSheep, a package for nonparametric description and differential analysis of genome-wide data, available from Bioconductor (https://www.bioconductor.org/packages/release/bioc/html/blacksheepr.html) and Bioconda (https://bioconda.github.io/recipes/blksheep/README.html). BlackSheep is a complementary tool to other differential expression analysis methods, which is particularly useful when analyzing small subgroups in a larger cohort.","hji,kes",0,0,0,2,0,software,NA +34174821,"Construction of a high-density linkage map and graphical representation of the arrangement of transcriptome-based unigene markers on the chromosomes of onion, Allium cepa L.","

Background

Genomic information for Allium cepa L. is limited as it is heterozygous and its genome is very large. To elucidate potential SNP markers obtained by NGS, we used a complete set of A. fistulosum L.-A. cepa monosomic addition lines (MALs) and doubled haploids (DHs). These were the parental lines of an A. cepa mapping population for transcriptome-based SNP genotyping.

Results

We mapped the transcriptome sequence reads from a series of A. fistulosum-A. cepa MALs onto the unigene sequence of the doubled haploid shallot A. cepa Aggregatum group (DHA) and compared the MAL genotype call for parental bunching onion and shallot transcriptome mapping data. We identified SNP sites with at least four reads on 25,462 unigenes. They were anchored on eight A. cepa chromosomes. A single SNP site was identified on 3,278 unigenes and multiple SNPs were identified on 22,184 unigenes. The chromosome marker information was made public via the web database Allium TDB ( http://alliumtdb.kazusa.or.jp/ ). To apply transcriptome based genotyping approach for genetic mapping, we gathered RNA sequence data from 96 lines of a DHA √ó doubled haploid bulb onion A. cepa common onion group (DHC) mapping population. After selecting co-dominant SNP sites, 16,872 SNPs were identified in 5,339 unigenes. Of these, at least two SNPs with identical genotypes were found in 1,435 unigenes. We developed a linkage map using genotype information from these unigenes. All unigene markers mapped onto the eight chromosomes and graphical genotyping was conducted based on the unigene order information. Another 2,963 unigenes were allocated onto the eight chromosomes. To confirm the accuracy of this transcriptome-based genetic linkage map, conventional PCR-based markers were used for linkage analysis. All SNP - and PCR-based markers were mapped onto the expected linkage groups and no inconsistency was found among these chromosomal locations.

Conclusions

Effective transcriptome analysis with unique Allium resources successfully associated numerous chromosome markers with unigene information and a high-density A. cepa linkage map. The information on these unigene markers is valuable in genome sequencing and useful trait detection in Allium.","hji,kes",0,0,0,2,0,references other db,not descriptive of resource +34175254,Robot-assisted Versus Open Radical Cystectomy in Bladder Cancer: An Economic Evaluation Alongside a Multicentre Comparative Effectiveness Study.,"

Background

Open radical cystectomy (ORC) is regarded as the standard treatment for muscle-invasive bladder cancer, but robot-assisted radical cystectomy (RARC) is increasingly used in practice. A recent study showed that RARC resulted in slightly fewer minor but slightly more major complications, although the difference was not statistically significant. Some differences were found in secondary outcomes favouring either RARC or ORC. RARC use is expected to increase in coming years, which fuels the debate about whether RARC provides value for money.

Objective

To assess the cost-effectiveness of RARC compared to ORC in bladder cancer.

Design, setting, and participants

This economic evaluation was performed alongside a prospective multicentre comparative effectiveness study. We included 348 bladder cancer patients (ORC, n‚Äâ=‚Äâ168; RARC, n‚Äâ=‚Äâ180) from 19 Dutch hospitals.

Outcome measurements and statistical analysis

Over 1 yr, we assessed the incremental cost per quality-adjusted life year (QALY) gained from both healthcare and societal perspectives. We used single imputation nested in the bootstrap percentile method to assess missing data and uncertainty, and inverse probability of treatment weighting to control for potential bias. Deterministic sensitivity analyses were performed to explore the impact of various parameters on the cost difference.

Results and limitations

The mean healthcare cost per patient was €17 141 (95% confidence interval [CI] €15 791-€18 720) for ORC and €21 266 (95% CI €19 163-€23 650) for RARC. The mean societal cost per patient was €18 926 (95% CI €17 431-€22 642) for ORC and €24 896 (95% CI €21 925-€31 888) for RARC. On average, RARC patients gained 0.79 QALYs (95% CI 0.74-0.85) compared to 0.81 QALYs (95% CI 0.77-0.85) for ORC patients, resulting in a mean QALY difference of -0.02 (95% CI -0.05 to 0.02). Using a cost-effectiveness threshold of €80 000, RARC was cost-effective in 0.6% and 0.2% of the replications for the healthcare and societal perspectives, respectively.

Conclusions

RARC shows no difference in terms of QALYs, but is more expensive than ORC. Hence, RARC does not seem to provide value for money in comparison to ORC.

Patient summary

This study assessed the relation between costs and effects of robot-assisted surgery compared to open surgery for removal of the bladder in 348 Dutch patients with bladder cancer. We found that after 1 year, the two approaches were similarly effective according to a measure called quality-adjusted life years, but robot-assisted surgery was much more expensive. This trial was prospectively registered in the Netherlands Trial Register as NTR5362 (https://www.trialregister.nl/trial/5214).","hji,kes",0,0,0,2,0,NA,NA +34179956,The DNA methylation haplotype (mHap) format and mHapTools.,"

Summary

Bisulfite sequencing (BS-seq) is currently the gold standard for measuring genome-wide DNA methylation profiles at single-nucleotide resolution. Most analyses focus on mean CpG methylation and ignore methylation states on the same DNA fragments [DNA methylation haplotypes (mHaps)]. Here, we propose mHap, a simple DNA mHap format for storing DNA BS-seq data. This format reduces the size of a BAM file by 40- to 140-fold while retaining complete read-level CpG methylation information. It is also compatible with the Tabix tool for fast and random access. We implemented a command-line tool, mHapTools, for converting BAM/SAM files from existing platforms to mHap files as well as post-processing DNA methylation data in mHap format. With this tool, we processed all publicly available human reduced representation bisulfite sequencing data and provided these data as a comprehensive mHap database.

Availability and implementation

https://jiantaoshi.github.io/mHap/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,"database of reformatted previous data; reassessed, the data are fine, resource is not since githubio","no notes; reassessed and no still - in github.io, format, tool, not a data resource in and of itself" +34180600,Primary antibiotic resistance of Helicobacter pylori isolates is twofold more frequent in HIV-positive than HIV-negative individuals: A descriptive observational study.,"The antimicrobial susceptibility of Helicobacter pylori strains isolated from HIV-positive individuals is not well characterized. This study aimed to measure the prevalence and long-term trends associated with primary H. pylori antibiotic resistance, evaluate correlations with antibiotic consumption, and compare predictors for H. pylori antibiotic resistance between HIV-positive and HIV-negative individuals. In this longitudinal registry study, we evaluated consecutive adults with and without HIV infection, naïve to H. pylori treatment, who underwent upper gastrointestinal endoscopy and had a positive H. pylori culture, with susceptibility testing available, between 2004 and 2015. Outpatient antibiotic consumption data were based on nationwide aggregated numbers. H. pylori was isolated from gastric biopsies of 3008/8321 patients, 181/477 (37.9%) were HIV-positive and 2827/7844 (36.0%) HIV-negative. Overall cohort mean prevalence of H. pylori primary antibiotic resistance was 11.1% for clarithromycin, 17.8% levofloxacin, and 39.4% metronidazole. The prevalence of H. pylori primary resistance was significantly higher for these three drugs in HIV-positive individuals across the study period. Linear regression showed that the prevalence of clarithromycin and levofloxacin resistance correlated with the country aggregate daily dose consumption of macrolides and quinolones, respectively. Multivariable regression analysis showed that HIV infection is a strong independent risk factor for multiple H. pylori antibiotic resistance. In summary, HIV infection is a risk factor for carrying multi-resistant H. pylori strains and this is correlated with antibiotic consumption. Empirical therapies should be avoided in HIV-positive individuals. These data highlight the need to implement ongoing monitoring of H. pylori antimicrobial susceptibility among HIV-positive individuals. The study is registered at ISRCTN registry, number 13466428: https://www.isrctn.com/ISRCTN13466428.","hji,kes",0,0,0,2,0,NA,NA +34181220,"The First Nations Food, Nutrition and Environment Study (2008-2018)-rationale, design, methods and lessons learned.","

Objective

To describe the rationale, the participatory nature of the methodology, and the lessons learned during the First Nations Food, Nutrition and Environment Study (FNFNES), a community-based participatory research project implemented in eight Assembly of First Nations regions, which includes the entirety of Canada south of the 60th parallel.

Methods

FNFNES respected the First Nations principles of Ownership, Control, Access and Possession (OCAP®) ( https://fnigc.ca/ocap ). A random sampling strategy based on an ecosystem framework comprising 11 ecozones was adopted to collect representative nutritional and environmental health results for all First Nations adults living on-reserve south of the 60th parallel. Data collection occurred during the fall months from 2008 to 2016. Respective First Nations were involved in the planning and implementation of data collection for the five principal components: household interviews, tap water sampling for metals, surface water sampling for pharmaceuticals, hair sampling for mercury, and traditional food sampling for contaminants.

Results

A total of 6487 adults from 92 First Nations participated in the Study (participation rate 78%). A higher percentage of females (66%) participated than males (34%). The average age of males and females was similar (44 and 45 years, respectively). This study offers a novel body of coherent and regionally representative evidence on the human dimension of the ongoing environmental degradation affecting First Nations.

Conclusion

FNFNES serves as a good example of participatory research. We encourage public health professionals to develop policy and programs building on the participatory dimension of the research as well as on its results. The information collected by the FNFNES is also important for community empowerment, environmental stewardship and the general promotion of good health by and for First Nations peoples in Canada.","hji,kes",0,0,0,2,0,NA,NA +34181547,An Approach of Epistasis Detection Using Integer Linear Programming Optimizing Bayesian Network.,"Proposing a more effective and accurate epistatic loci detection method in large-scale genomic data has important research significance. Bayesian network (BN) has been widely used in constructing the network of SNPs and phenotype traits and thus to mine epistatic loci. In this work, we transform the problem of learning Bayesian network into the optimization of integer linear programming (ILP). We use the algorithms of branch-and-bound and cutting planes to get the global optimal Bayesian network (ILPBN), and thus to get epistatic loci influencing specific phenotype traits. In order to handle large-scale of SNP loci and further to improve efficiency, we use the method of optimizing Markov blanket to reduce the number of candidate parent nodes for each node. In addition, we use -BIC that is suitable for processing the epistatis mining to calculate the BN score. We use four properties of BN decomposable scoring functions to further reduce the number of candidate parent sets for each node. Finally, we compare ILPBN with several popular epistasis mining algorithms by using simulated and real Age-related macular disease (AMD) dataset. Experiment results show that ILPBN has better epistasis detection accuracy, F1-score and false positive rate in premise of ensuring the efficiency. Availability: http://122.205.95.139/ILPBN/.","hji,kes",0,0,0,2,0,NA,NA +34188160,NOD: a web server to predict New use of Old Drugs to facilitate drug repurposing.,"Computational methods accelerate the drug repurposing pipelines that are a quicker and cost-effective alternative to discovering new molecules. However, there is a paucity of web servers to conduct fast, focussed, and customized investigations for identifying new uses of old drugs. We present the NOD web server, which has the mentioned characteristics. NOD uses a sensitive sequence-guided approach to identify close and distant homologs of a protein of interest. NOD then exploits this evolutionary information to suggest potential compounds from the DrugBank database that can be repurposed against the input protein. NOD also allows expansion of the chemical space of the potential candidates through similarity searches. We have validated the performance of NOD against available experimental and/or clinical reports. In 65.6% of the investigated cases in a control study, NOD is able to identify drugs more effectively than the searches made in DrugBank. NOD is freely-available at http://pauling.mbu.iisc.ac.in/NOD/NOD/ .","hji,kes",0,0,0,2,0,NA,NA +34191783,A Web-Based Deep Learning Model for Automated Diagnosis of Otoscopic Images.,"

Objectives

To develop a multiclass-classifier deep learning model and website for distinguishing tympanic membrane (TM) pathologies based on otoscopic images.

Methods

An otoscopic image database developed by utilizing publicly available online images and open databases was assessed by convolutional neural network (CNN) models including ResNet-50, Inception-V3, Inception-Resnet-V2, and MobileNetV2. Training and testing were conducted with a 75:25 breakdown. Area under the curve of receiver operating characteristics (AUC-ROC), accuracy, sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) were used to compare different CNN models' performances in classifying TM images.

Results

Our database included 400 images, organized into normal (n‚Ää=‚Ää196) and abnormal classes (n‚Ää=‚Ää204), including acute otitis media (n‚Ää=‚Ää116), otitis externa (n‚Ää=‚Ää44), chronic suppurative otitis media (n‚Ää=‚Ää23), and cerumen impaction (n‚Ää=‚Ää21). For binary classification between normal versus abnormal TM, the best performing model had average AUC-ROC of 0.902 (MobileNetV2), followed by 0.745 (Inception-Resnet-V2), 0.731 (ResNet-50), and 0.636 (Inception-V3). Accuracy ranged between 0.73-0.77, sensitivity 0.72-0.88, specificity 0.58-0.84, PPV 0.68-0.81, and NPV 0.73-0.83. Macro-AUC-ROC for MobileNetV2 based multiclass-classifier was 0.91, with accuracy of 66%. Binary and multiclass-classifier models based on MobileNetV2 were loaded onto a publicly accessible and user-friendly website (https://headneckml.com/tympanic). This allows the readership to upload TM images for real-time predictions using the developed algorithms.

Conclusions

Novel CNN algorithms were developed with high AUC-ROCs for differentiating between various TM pathologies. This was further deployed as a proof-of-concept publicly accessible website for real-time predictions.","hji,kes",0,0,0,2,0,software,out of scope +34192666,"Open science datasets from PREVENT-AD, a longitudinal cohort of pre-symptomatic Alzheimer's disease.","To move Alzheimer Disease (AD) research forward it is essential to collect data from large cohorts, but also make such data available to the global research community. We describe the creation of an open science dataset from the PREVENT-AD (PResymptomatic EValuation of Experimental or Novel Treatments for AD) cohort, composed of cognitively unimpaired older individuals with a parental or multiple-sibling history of AD. From 2011 to 2017, 386 participants were enrolled (mean age 63 years old ± 5) for sustained investigation among whom 349 have retrospectively agreed to share their data openly. Repositories are findable through the unified interface of the Canadian Open Neuroscience Platform and contain up to five years of longitudinal imaging data, cerebral fluid biochemistry, neurosensory capacities, cognitive, genetic, and medical information. Imaging data can be accessed openly at https://openpreventad.loris.ca while most of the other information, sensitive by nature, is accessible by qualified researchers at https://registeredpreventad.loris.ca. In addition to being a living resource for continued data acquisition, PREVENT-AD offers opportunities to facilitate understanding of AD pathogenesis.","hji,kes",1,0,1,2,0.5,"not open access, but no paywall. Clinical in nature",out of scope; reassessed and still no - clinical data +34192711,A United States Rectal Cancer Consortium Study of Inferior Mesenteric Artery versus Superior Rectal Artery Ligation: How High Do We Need to Go?,"

Background

The optimal level of pedicle ligation during proctectomy for rectal cancer, either at the origin of the inferior mesenteric artery or the superior rectal artery, is still debated.

Objective

The objective was to determine if superior rectal artery ligation portends equivalent technical or oncologic outcomes.

Design

This was a retrospective analysis of a rectal cancer database (2007-2017).

Settings

The study was conducted at six tertiary referral centers in the United States (Emory University, University of Michigan, University of Pittsburgh Medical Center, The Ohio State University Wexner Medical Center, Vanderbilt University Medical Center, and Washington University School of Medicine in St. Louis).

Patients

Patients with primary, non-metastatic rectal cancer who underwent low anterior resection or abdominoperineal resection were included.

Main outcome measures

Anastomotic leak, lymph node harvest, locoregional recurrence-free survival, recurrence-free survival, and overall survival were measured.

Results

Of 877 patients, 86% (n=755) received an inferior mesenteric artery ligation while 14% (n=122) received a superior rectal artery ligation. 12%, 33%, 24%, and 31% were pathologic stage 0, I, II, and III, respectively. Median follow-up was 31 months. Superior rectal artery ligation was associated with a similar anastomotic leak rate compared to inferior mesenteric artery ligation (9vs8%, p=1.0). Median number of lymph nodes removed was identical (15vs15, p=0.38). On multivariable analysis accounting for relevant clinicopathologic factors, superior rectal artery ligation was not associated with increased anastomotic leak rate, worse lymph node harvest, or worse locoregional recurrence-free survival, recurrence-free survival, or overall survival (all p>0.1).

Limitations

This was a retrospective design.

Conclusions

Compared to inferior mesenteric artery ligation, superior rectal artery ligation is not associated with either worse technical or oncologic outcomes. Given the potential risks of inadequate blood flow to the proximal limb of the anastomosis and autonomic nerve injury, we advocate for increased utilization of superior rectal artery ligation. See Video Abstract at http://links.lww.com/DCR/B646.","hji,kes",0,0,0,2,0,NA,NA +34194678,WADDAICA: A webserver for aiding protein drug design by artificial intelligence and classical algorithm.,"Artificial intelligence can train the related known drug data into deep learning models for drug design, while classical algorithms can design drugs through established and predefined procedures. Both deep learning and classical algorithms have their merits for drug design. Here, the webserver WADDAICA is built to employ the advantage of deep learning model and classical algorithms for drug design. The WADDAICA mainly contains two modules. In the first module, WADDAICA provides deep learning models for scaffold hopping of compounds to modify or design new novel drugs. The deep learning model which is used in WADDAICA shows a good scoring power based on the PDBbind database. In the second module, WADDAICA supplies functions for modifying or designing new novel drugs by classical algorithms. WADDAICA shows better Pearson and Spearman correlations of binding affinity than Autodock Vina that is considered to have the best scoring power. Besides, WADDAICA supplies a friendly and convenient web interface for users to submit drug design jobs. We believe that WADDAICA is a useful and effective tool to help researchers to modify or design novel drugs by deep learning models and classical algorithms. WADDAICA is free and accessible at https://bqflab.github.io or https://heisenberg.ucam.edu:5000.","hji,kes",0,0,0,2,0,software,NA +34196957,elaborator: A Novel App for Insights into Laboratory Data of Clinical Trials.,"In clinical studies there are huge numbers of laboratory parameters available that are measured at several visits for several treatment groups. The status quo for presenting laboratory data in clinical trials consists in generating large numbers of tables and data listings. Such tables and listings are required for submissions to health authorities. However, reviewing laboratory data presented in the form of tables and listings is a lengthy and tedious process. Thus, to enable efficient exploration of laboratory data we developed elaborator, a comprehensive and easy-to-use interactive browser-based application. The elaborator app comprises three analyses types for addressing different questions, for example about changes in laboratory values that frequently occur, treatment-related changes and changes beyond the normal ranges. In this way, the app can be used by study teams for identifying safety signals in a clinical trial as well as for generating hypotheses that are further inspected with detailed analyses and possibly data from other sources. The elaborator app is implemented in the statistical software R. The R package elaborator can be obtained from https://cran.r-project.org/package=elaborator . Patients' laboratory data need to be extracted from the clinical database and pre-processed locally for feeding into the app. For exploring data by means of the elaborator, the user needs some familiarity with R but no programming knowledge is required.","hji,kes",0,0,0,2,0,software,NA +34210730,Health literacy among pregnant women in a lifestyle intervention trial: protocol for an explorative study on the role of health literacy in the perinatal health service setting.,"

Introduction

Pregnancy is a vulnerable period that affects long-term health of pregnant women and their unborn infants. Health literacy plays a crucial role in promoting healthy behaviour and thereby maintaining good health. This study explores the role of health literacy in the GeMuKi (acronym for 'Gemeinsam Gesund: Vorsorge plus für Mutter und Kind'-Strengthening health promotion: enhanced check-up visits for mother and child) Project. It will assess the ability of the GeMuKi lifestyle intervention to positively affect health literacy levels through active participation in preventive counselling. The study also explores associations between health literacy, health outcomes, health service use and effectiveness of the intervention.

Methods and analysis

The GeMuKi trial has a hybrid effectiveness-implementation design and is carried out in routine prenatal health service settings in Germany. Women (n=1860) are recruited by their gynaecologist during routine check-up visits before 12 weeks of gestation. Trained healthcare providers carry out counselling using motivational interviewing techniques to positively affect health literacy and lifestyle-related risk factors. Healthcare providers (gynaecologists and midwives) and women jointly agree on Specific, Measurable, Achievable Reasonable, Time-Bound goals. Women will be invited to fill in questionnaires at two time points (at recruitment and 37th-40th week of gestation) using an app. Health literacy is measured using the German version of the Health Literacy Survey-16 and the Brief Health Literacy Screener. Lifestyle is measured with questions on physical activity, nutrition, alcohol and drug use. Health outcomes of both mother and child, including gestational weight gain (GWG) will be documented at each routine visit. Health service use will be assessed using social health insurance claims data. Data analyses will be conducted using IBM SPSS Statistics, version 26.0. These include descriptive statistics, tests and regression models. A mediation model will be conducted to answer the question whether health behaviour mediates the association between health literacy and GWG.

Ethics and dissemination

The study was approved by the University Hospital of Cologne Research Ethics Committee (ID: 18-163) and the State Chamber of Physicians in Baden-Wuerttemberg (ID: B-F-2018-100). Study results will be disseminated through (poster) presentations at conferences, publications in peer-reviewed journals and press releases.

Trail registration

German Clinical Trials Register (DRKS00013173). Registered pre-results, 3rd of January 2019, https://www.drks.de.","hji,kes",0,0,0,2,0,NA,NA +34211562,Research on Diagnosis Prediction of Traditional Chinese Medicine Diseases Based on Improved Bayesian Combination Model.,"Traditional Chinese Medicine (TCM) clinical intelligent decision-making assistance has been a research hotspot in recent years. However, the recommendations of TCM disease diagnosis based on the current symptoms are difficult to achieve a good accuracy rate because of the ambiguity of the names of TCM diseases. The medical record data downloaded from ancient and modern medical records cloud platform developed by the Institute of Medical Information on TCM of the Chinese Academy of Chinese Medical Sciences (CACMC) and the practice guidelines data in the TCM clinical decision supporting system were utilized as the corpus. Based on the empirical analysis, a variety of improved Naïve Bayes algorithms are presented. The research findings show that the Naïve Bayes algorithm with main symptom weighted and equal probability has achieved better results, with an accuracy rate of 84.2%, which is 15.2% higher than the 69% of the classic Naïve Bayes algorithm (without prior probability). The performance of the Naïve Bayes classifier is greatly improved, and it has certain clinical practicability. The model is currently available at http://tcmcdsmvc.yiankb.com/.","hji,kes",0,0,0,2,0,NA,NA +34213323,pdCSM-cancer: Using Graph-Based Signatures to Identify Small Molecules with Anticancer Properties.,"The development of new, effective, and safe drugs to treat cancer remains a challenging and time-consuming task due to limited hit rates, restraining subsequent development efforts. Despite the impressive progress of quantitative structure-activity relationship and machine learning-based models that have been developed to predict molecule pharmacodynamics and bioactivity, they have had mixed success at identifying compounds with anticancer properties against multiple cell lines. Here, we have developed a novel predictive tool, pdCSM-cancer, which uses a graph-based signature representation of the chemical structure of a small molecule in order to accurately predict molecules likely to be active against one or multiple cancer cell lines. pdCSM-cancer represents the most comprehensive anticancer bioactivity prediction platform developed till date, comprising trained and validated models on experimental data of the growth inhibition concentration (GI50%) effects, including over 18,000 compounds, on 9 tumor types and 74 distinct cancer cell lines. Across 10-fold cross-validation, it achieved Pearson's correlation coefficients of up to 0.74 and comparable performance of up to 0.67 across independent, non-redundant blind tests. Leveraging the insights from these cell line-specific models, we developed a generic predictive model to identify molecules active in at least 60 cell lines. Our final model achieved an area under the receiver operating characteristic curve (AUC) of up to 0.94 on 10-fold cross-validation and up to 0.94 on independent non-redundant blind tests, outperforming alternative approaches. We believe that our predictive tool will provide a valuable resource to optimizing and enriching screening libraries for the identification of effective and safe anticancer molecules. To provide a simple and integrated platform to rapidly screen for potential biologically active molecules with favorable anticancer properties, we made pdCSM-cancer freely available online at http://biosig.unimelb.edu.au/pdcsm_cancer.","hji,kes",0,0,0,2,0,software,data to train and test and the models +34217324,An easy-to-operate web-based calculator for predicting the progression of chronic kidney disease.,"

Background

This study aimed to establish and validate an easy-to-operate novel scoring system based on simple and readily available clinical indices for predicting the progression of chronic kidney disease (CKD).

Methods

We retrospectively evaluated 1045 eligible CKD patients from a publicly available database. Factors included in the model were determined by univariate and multiple Cox proportional hazard analyses based on the training set.

Results

Independent prognostic factors including etiology, hemoglobin level, creatinine level, proteinuria, and urinary protein/creatinine ratio were determined and contained in the model. The model showed good calibration and discrimination. The area under the curve (AUC) values generated to predict 1-, 2-, and 3-year progression-free survival in the training set were 0.947, 0.931, and 0.939, respectively. In the validation set, the model still revealed excellent calibration and discrimination, and the AUC values generated to predict 1-, 2-, and 3-year progression-free survival were 0.948, 0.933, and 0.915, respectively. In addition, decision curve analysis demonstrated that the model was clinically beneficial. Moreover, to visualize the prediction results, we established a web-based calculator ( https://ncutool.shinyapps.io/CKDprogression/ ).

Conclusion

An easy-to-operate model based on five relevant factors was developed and validated as a conventional tool to assist doctors with clinical decision-making and personalized treatment.","hji,kes",0,0,0,2,0,software,NA +34224351,Deep Learning for Ultrasound Image Formation: CUBDL Evaluation Framework & Open Datasets.,"Deep learning for ultrasound image formation is rapidly garnering research support and attention, quickly rising as the latest frontier in ultrasound image formation, with much promise to balance both image quality and display speed. Despite this promise, one challenge with identifying optimal solutions is the absence of unified evaluation methods and datasets that are not specific to a single research group. This paper introduces the largest known international database of ultrasound channel data and describes associated evaluation methods that were initially developed for the Challenge on Ultrasound Beamforming with Deep Learning (CUBDL), which was offered as a component of the 2020 IEEE International Ultrasonics Symposium. We summarize the challenge results and present qualitative and quantitative assessments using both the initially closed CUBDL evaluation test dataset (which was crowd-sourced from multiple groups around the world) and additional in vivo breast ultrasound data contributed after the challenge was completed. As an example quantitative assessment, single plane wave images from the CUBDL Task 1 dataset produced a mean generalized contrast-to-noise ratio (gCNR) of 0.67 and a mean lateral resolution of 0.42 mm when formed with delay-and-sum beamforming, compared to a mean gCNR as high as 0.81 and a mean lateral resolution as low as 0.32 mm when formed with networks submitted by the challenge winners. We also describe contributed CUBDL data that may be used for training of future networks. The compiled database includes a total of 576 image acquisition sequences. We additionally introduce a neural network-based global sound speed estimator implementation that was necessary to fairly evaluate results obtained with this international database. The integration of CUBDL evaluation methods, evaluation code, network weights from the challenge winners, and all datasets described herein are publicly available (visit https://cubdl.jhu.edu for details).","hji,kes",1,1,2,2,1,Available model training data,training and test data; reassessed and re-scored - does have data +34235237,"Whole genome sequence data of Bacillus australimaris strain B28A, isolated from Marine Water in India.","Bacillus genus members are dominant in the Eastern Arabian Sea and are known for producing many industrial enzymes. Bacillus australimaris B28A, isolated from seawater, had an enzymatic activity. Here, the whole genome sequence of Bacillus australimaris B28A is reported. The 3,766,107-bp genome, with a GC content of 41.6%, comprised 3936 protein-coding genes, seven ribosomal RNA, and 75 transfer RNA. Several bioactive secondary metabolite genes in the genome, including surfactin, lichenysin, bacillibactin, bacilysin, paenilamicin, fengycin, and carotenoid, were identified using antiSMASH. The 1396 proteins were predicted using RAST, including asparaginase enzyme: an anticancer enzyme. Sequence data have been deposited in the DDBJ/ENA/GenBank database under the accession number JAGQFH000000000. The version described in this paper is JAGQFH000000000.1. The BioProject ID in the GenBank database is PRJNA670955. The raw data is publicly available at ""https://www.ncbi.nlm.nih.gov/sra/SRR14203888"".","hji,kes",0,0,0,2,0,references other db,not descriptive of resource +34236262,"The prognosis of glioblastoma: a large, multifactorial study.","

Objective

Glioblastoma is the most common and fatal primary brain tumor in adults. Even with maximal resection and a series of postoperative adjuvant treatments, the median overall survival (OS) of glioblastoma patients remains approximately 15 months. The Huashan Hospital glioma bank contains more than 2000 glioma tissue samples with long-term follow-up data; almost half of these samples are from glioblastoma patients. Several large glioma databases with long-term follow-up data have reported outcomes of glioblastoma patients from countries other than China. We investigated the prognosis of glioblastoma patients in China and compared the survival outcomes among patients from different databases.

Methods

The data for 967 glioblastoma patients who underwent surgery at Huashan Hospital and had long-term follow-up records were obtained from our glioma registry (diagnosed from 29 March 2010, through 7 June 2017). Patients were eligible for inclusion if they underwent surgical resection for newly diagnosed glioblastomas and had available data of survival and personal information. Data of 778 glioblastoma patients were collected from three separate online databases (448 patients from The Cancer Genome Atlas (TCGA, https://cancergenome.nih.gov), 191 from REpository for Molecular BRAin Neoplasia DaTa (REMBRANDT) database (GSE108476) and 132 from data set GSE16011(Hereafter called as the French database). We compared the prognosis of glioblastoma patients from records among the different databases and the changes in survival outcomes of glioblastoma patients from Huashan Hospital over an 8-year period.

Results

The median OS of glioblastoma patients was 16.3 (95% CI: 15.4-17.2) months for Huashan Hospital, 13.8 (95% CI: 12.9-14.9) months for TCGA, 19.3 (95% CI: 17.0-20.0) months for the REMBRANDT database, and 9.1 months for the French database. The median OS of glioblastoma patients from Huashan Hospital improved from 15.6 (2010-2013, 95% CI: 14.4-16.6) months to 18.2 (2014-2017, 95% CI: 15.8-20.6) months over the study period (2010-2017). In addition, the prognosis of glioblastoma patients with total resection was significantly better than that of glioblastoma patients with sub-total resection or biopsy.

Conclusions

Our study confirms that treatment centered around maximal surgical resection brought survival benefits to glioblastoma patients after adjusting to validated prognostic factors. In addition, an improvement in prognosis was observed among glioblastoma patients from Huashan Hospital over the course of our study. We attributed it to the adoption of a new standard of neurosurgical treatment on the basis of neurosurgical multimodal technologies. Even though the prognosis of glioblastoma patients remains poor, gradual progress is being made.","hji,kes",0,0,0,2,0,NA,not descriptive of resource +29724163,EuGI: a novel resource for studying genomic islands to facilitate horizontal gene transfer detection in eukaryotes.,"BACKGROUND:Genomic islands (GIs) are inserts of foreign DNA that have potentially arisen through horizontal gene transfer (HGT). There are evidences that GIs can contribute significantly to the evolution of prokaryotes. The acquisition of GIs through HGT in eukaryotes has, however, been largely unexplored. In this study, the previously developed GI prediction tool, SeqWord Gene Island Sniffer (SWGIS), is modified to predict GIs in eukaryotic chromosomes. Artificial simulations are used to estimate ratios of predicting false positive and false negative GIs by inserting GIs into different test chromosomes and performing the SWGIS v2.0 algorithm. Using SWGIS v2.0, GIs are then identified in 36 fungal, 22 protozoan and 8 invertebrate genomes. RESULTS:SWGIS v2.0 predicts GIs in large eukaryotic chromosomes based on the atypical nucleotide composition of these regions. Averages for predicting false negative and false positive GIs were 20.1% and 11.01% respectively. A total of 10,550 GIs were identified in 66 eukaryotic species with 5299 of these GIs coding for at least one functional protein. The EuGI web-resource, freely accessible at http://eugi.bi.up.ac.za , was developed that allows browsing the database created from identified GIs and genes within GIs through an interactive and visual interface. CONCLUSIONS:SWGIS v2.0 along with the EuGI database, which houses GIs identified in 66 different eukaryotic species, and the EuGI web-resource, provide the first comprehensive resource for studying HGT in eukaryotes.","hji,kes",0,1,1,2,0.5,software,no notes; reassessed and still yes - software and includes a data resource +29788290,BAGEL4: a user-friendly web server to thoroughly mine RiPPs and bacteriocins.,"Interest in secondary metabolites such as RiPPs (ribosomally synthesized and posttranslationally modified peptides) is increasing worldwide. To facilitate the research in this field we have updated our mining web server. BAGEL4 is faster than its predecessor and is now fully independent from ORF-calling. Gene clusters of interest are discovered using the core-peptide database and/or through HMM motifs that are present in associated context genes. The databases used for mining have been updated and extended with literature references and links to UniProt and NCBI. Additionally, we have included automated promoter and terminator prediction and the option to upload RNA expression data, which can be displayed along with the identified clusters. Further improvements include the annotation of the context genes, which is now based on a fast blast against the prokaryote part of the UniRef90 database, and the improved web-BLAST feature that dynamically loads structural data such as internal cross-linking from UniProt. Overall BAGEL4 provides the user with more information through a user-friendly web-interface which simplifies data evaluation. BAGEL4 is freely accessible at http://bagel4.molgenrug.nl.","hji,kes",0,0,0,2,0,software,no notes; reassessed and re-scored - server only it seems +29846656,The BaMM web server for de-novo motif discovery and regulatory sequence analysis.,"The BaMM web server offers four tools: (i) de-novo discovery of enriched motifs in a set of nucleotide sequences, (ii) scanning a set of nucleotide sequences with motifs to find motif occurrences, (iii) searching with an input motif for similar motifs in our BaMM database with motifs for >1000 transcription factors, trained from the GTRD ChIP-seq database and (iv) browsing and keyword searching the motif database. In contrast to most other servers, we represent sequence motifs not by position weight matrices (PWMs) but by Bayesian Markov Models (BaMMs) of order 4, which we showed previously to perform substantially better in ROC analyses than PWMs or first order models. To address the inadequacy of P- and E-values as measures of motif quality, we introduce the AvRec score, the average recall over the TP-to-FP ratio between 1 and 100. The BaMM server is freely accessible without registration at https://bammmotif.mpibpc.mpg.de.","hji,kes",0,0,0,2,0,software,no notes; reassessed and re-scored - server only it seems +29899596,Cyanobacterial diversity held in microbial biological resource centers as a biotechnological asset: the case study of the newly established LEGE culture collection.,"Cyanobacteria are a well-known source of bioproducts which renders culturable strains a valuable resource for biotechnology purposes. We describe here the establishment of a cyanobacterial culture collection (CC) and present the first version of the strain catalog and its online database (http://lege.ciimar.up.pt/). The LEGE CC holds 386 strains, mainly collected in coastal (48%), estuarine (11%), and fresh (34%) water bodies, for the most part from Portugal (84%). By following the most recent taxonomic classification, LEGE CC strains were classified into at least 46 genera from six orders (41% belong to the Synechococcales), several of them are unique among the phylogenetic diversity of the cyanobacteria. For all strains, primary data were obtained and secondary data were surveyed and reviewed, which can be reached through the strain sheets either in the catalog or in the online database. An overview on the notable biodiversity of LEGE CC strains is showcased, including a searchable phylogenetic tree and images for all strains. With this work, 80% of the LEGE CC strains have now their 16S rRNA gene sequences deposited in GenBank. Also, based in primary data, it is demonstrated that several LEGE CC strains are a promising source of extracellular polymeric substances (EPS). Through a review of previously published data, it is exposed that LEGE CC strains have the potential or actual capacity to produce a variety of biotechnologically interesting compounds, including common cyanotoxins or unprecedented bioactive molecules. Phylogenetic diversity of LEGE CC strains does not entirely reflect chemodiversity. Further bioprospecting should, therefore, account for strain specificity of the valuable cyanobacterial holdings of LEGE CC.","hji,kes",1,1,2,2,1,NA,no notes; reassessed and still yes - includes a data resource +29997612,CDG: An Online Server for Detecting Biologically Closest Disease-Causing Genes and its Application to Primary Immunodeficiency.,"High-throughput genomic technologies yield about 20,000 variants in the protein-coding exome of each individual. A commonly used approach to select candidate disease-causing variants is to test whether the associated gene has been previously reported to be disease-causing. In the absence of known disease-causing genes, it can be challenging to associate candidate genes with specific genetic diseases. To facilitate the discovery of novel gene-disease associations, we determined the putative biologically closest known genes and their associated diseases for 13,005 human genes not currently reported to be disease-associated. We used these data to construct the closest disease-causing genes (CDG) server, which can be used to infer the closest genes with an associated disease for a user-defined list of genes or diseases. We demonstrate the utility of the CDG server in five immunodeficiency patient exomes across different diseases and modes of inheritance, where CDG dramatically reduced the number of candidate genes to be evaluated. This resource will be a considerable asset for ascertaining the potential relevance of genetic variants found in patient exomes to specific diseases of interest. The CDG database and online server are freely available to non-commercial users at: http://lab.rockefeller.edu/casanova/CDG.","hji,kes",1,1,2,2,1,software; data resource too,no notes; reassessed and still yes - software and includes a data resource +30219844,TCellXTalk facilitates the detection of co-modified peptides for the study of protein post-translational modification cross-talk in T cells.,"

Motivation

Protein function is regulated by post-translational modifications (PTMs) that may act individually or interact with others in a phenomenon termed PTM cross-talk. Multiple databases have been dedicated to PTMs, including recent initiatives oriented towards the in silico prediction of PTM interactions. The study of PTM cross-talk ultimately requires experimental evidence about whether certain PTMs coexist in a single protein molecule. However, available resources do not assist researchers in the experimental detection of co-modified peptides.

Results

Herein, we present TCellXTalk, a comprehensive database of phosphorylation, ubiquitination and acetylation sites in human T cells that supports the experimental detection of co-modified peptides using targeted or directed mass spectrometry. We demonstrate the efficacy of TCellXTalk and the strategy presented here in a proof of concept experiment that enabled the identification and quantification of 15 co-modified (phosphorylated and ubiquitinated) peptides from CD3 proteins of the T-cell receptor complex. To our knowledge, these are the first co-modified peptide sequences described in this widely studied cell type. Furthermore, quantitative data showed distinct dynamics for co-modified peptides upon T cell activation, demonstrating differential regulation of co-occurring PTMs in this biological context. Overall, TCellXTalk facilitates the experimental detection of co-modified peptides in human T cells and puts forward a novel and generic strategy for the study of PTM cross-talk.

Availability and implementation

TCellXTalk is available at https://www.tcellxtalk.org. Source Code is available at https://bitbucket.org/lp-csic-uab/tcellxtalk.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,1,1,2,0.5,NA,"mostly a server but has an associated database - available as excel spreadsheets (though error - they do not download); reassessed and still yes - referecnes the distinct resource in the abstract, tricky though" +30355619,The NCI Transcriptional Pharmacodynamics Workbench: A Tool to Examine Dynamic Expression Profiling of Therapeutic Response in the NCI-60 Cell Line Panel.,": The intracellular effects and overall efficacies of anticancer therapies can vary significantly by tumor type. To identify patterns of drug-induced gene modulation that occur in different cancer cell types, we measured gene-expression changes across the NCI-60 cell line panel after exposure to 15 anticancer agents. The results were integrated into a combined database and set of interactive analysis tools, designated the NCI Transcriptional Pharmacodynamics Workbench (NCI TPW), that allows exploration of gene-expression modulation by molecular pathway, drug target, and association with drug sensitivity. We identified common transcriptional responses across agents and cell types and uncovered gene-expression changes associated with drug sensitivity. We also demonstrated the value of this tool for investigating clinically relevant molecular hypotheses and identifying candidate biomarkers of drug activity. The NCI TPW, publicly available at https://tpwb.nci.nih.gov, provides a comprehensive resource to facilitate understanding of tumor cell characteristics that define sensitivity to commonly used anticancer drugs. SIGNIFICANCE: The NCI Transcriptional Pharmacodynamics Workbench represents the most extensive compilation to date of directly measured longitudinal transcriptional responses to anticancer agents across a thoroughly characterized ensemble of cancer cell lines.","hji,kes",0,1,1,2,0.5,software,no notes; reassessed and still yes - software and includes a data resource but iffy +30445541,"SIFTS: updated Structure Integration with Function, Taxonomy and Sequences resource allows 40-fold increase in coverage of structure-based annotations for proteins.","The Structure Integration with Function, Taxonomy and Sequences resource (SIFTS; http://pdbe.org/sifts/) was established in 2002 and continues to operate as a collaboration between the Protein Data Bank in Europe (PDBe; http://pdbe.org) and the UniProt Knowledgebase (UniProtKB; http://uniprot.org). The resource is instrumental in the transfer of annotations between protein structure and protein sequence resources through provision of up-to-date residue-level mappings between entries from the PDB and from UniProtKB. SIFTS also incorporates residue-level annotations from other biological resources, currently comprising the NCBI taxonomy database, IntEnz, GO, Pfam, InterPro, SCOP, CATH, PubMed, Ensembl, Homologene and automatic Pfam domain assignments based on HMM profiles. The recently released implementation of SIFTS includes support for multiple cross-references for proteins in the PDB, allowing mappings to UniProtKB isoforms and UniRef90 cluster members. This development makes structure data in the PDB readily available to over 1.8 million UniProtKB accessions.","hji,kes",0,1,1,2,0.5,"closely ties 2 resources, but is not its own","no notes; reassessed and still yes - very iffy though, I think there is value add data there though can't tell for sure without looking" +30715167,The global dissemination of bacterial infections necessitates the study of reverse genomic epidemiology.,"Whole genome sequencing (WGS) has revolutionized the genotyping of bacterial pathogens and is expected to become the new gold standard for tracing the transmissions of bacterial infectious diseases for public health purposes. Traditional genomic epidemiology often uses WGS as a verification tool, namely, when a common source or epidemiological link is suspected, the collected isolates are sequenced for the determination of clonal relationships. However, increasingly frequent international travel and food transportation, and the associated potential for the cross-border transmission of bacterial pathogens, often lead to an absence of information on bacterial transmission routes. Here we introduce the concept of 'reverse genomic epidemiology', i.e. when isolates are inspected by genome comparisons to be sufficiently similar to one another, they are assumed to be a consequence of infection from a common source. Through BacWGSTdb (http://bacdb.org/BacWGSTdb/), a database we have developed for bacterial genome typing and source tracking, we have found that almost the entire analyzed 20 bacterial species exhibit the phenomenon of cross-border clonal dissemination. Five networks were further identified in which isolates sharing nearly identical genomes were collected from at least five different countries. Three of these have been documented as real infectious disease outbreaks, therefore demonstrating the feasibility and authority of reverse genomic epidemiology. Our survey and proposed strategy would be of potential value in establishing a global surveillance system for tracing bacterial transmissions and outbreaks; the related database and techniques require urgent standardization.","hji,kes",1,1,2,2,1,an update on the usefulness of their data resource,NA +30715201,"GLAD: GLycan Array Dashboard, a visual analytics tool for glycan microarrays.","MOTIVATION:Traditional glycan microarray data is typically presented as excel files with limited visualization and interactivity. Thus, comparisons and analysis of glycan array data have been difficult, and there is need for a tool to facilitate data mining of glycan array data. RESULTS:GLAD (GLycan Array Dashboard) is a web-based tool to visualize, analyze, present and mine glycan microarray data. GLAD allows users to input multiple data files to create comparisons. GLAD extends the capability of the microarray data to produce more comparative visualizations in the form of grouped bar charts, heatmaps, calendar heatmaps, force graphs and correlation maps in order to analyze broad sets of samples. Additionally, it allows users to filter, sort and normalize the data and view glycan structures in an interactive manner, to facilitate faster visual data mining. AVAILABILITY AND IMPLEMENTATION:GLAD is freely available for use on the Web at https://glycotoolkit.com/Tools/GLAD/ with all major modern browsers (Edge, Firefox, Chrome, Safari). SUPPLEMENTARY INFORMATION:Full documentation and video tutorials for GLAD can be found on https://glycotoolkit.com/GLAD.","hji,kes",0,0,0,2,0,software,no notes; reassessed and re-scored - server only it seems +30874795,mirtronDB: a mirtron knowledge base.,"

Motivation

Mirtrons arise from short introns with atypical cleavage by using the splicing mechanism. In the current literature, there is no repository centralizing and organizing the data available to the public. To fill this gap, we developed mirtronDB, the first knowledge database dedicated to mirtron, and it is available at http://mirtrondb.cp.utfpr.edu.br/. MirtronDB currently contains a total of 1407 mirtron precursors and 2426 mirtron mature sequences in 18 species.

Results

Through a user-friendly interface, users can now browse and search mirtrons by organism, organism group, type and name. MirtronDB is a specialized resource that provides free and user-friendly access to knowledge on mirtron data.

Availability and implementation

MirtronDB is available at http://mirtrondb.cp.utfpr.edu.br/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +30967549,iFISH is a publically available resource enabling versatile DNA FISH to study genome architecture.,"DNA fluorescence in situ hybridization (DNA FISH) is a powerful method to study chromosomal organization in single cells. At present, there is a lack of free resources of DNA FISH probes and probe design tools which can be readily applied. Here, we describe iFISH, an open-source repository currently comprising 380 DNA FISH probes targeting multiple loci on the human autosomes and chromosome X, as well as a genome-wide database of optimally designed oligonucleotides and a freely accessible web interface ( http://ifish4u.org ) that can be used to design DNA FISH probes. We individually validate 153 probes and take advantage of our probe repository to quantify the extent of intermingling between multiple heterologous chromosome pairs, showing a much higher extent of intermingling in human embryonic stem cells compared to fibroblasts. In conclusion, iFISH is a versatile and expandable resource, which can greatly facilitate the use of DNA FISH in research and diagnostics.","hji,kes",1,1,2,2,1,software; reassessed and yes,no notes; reassessed and still yes - repository of probes +31197322,MetOSite: an integrated resource for the study of methionine residues sulfoxidation.,"

Motivation

The oxidation of protein-bound methionine to form methionine sulfoxide has traditionally been regarded as an oxidative damage. However, growing evidences support the view of this reversible reaction also as a regulatory post-translational modification. Thus, the oxidation of methionine residues has been reported to have multiple and varied implications for protein function. However, despite the importance of this modification and the abundance of reports, all these data are scattered in the literature. No database/resource on methionine sulfoxidation exists currently. Since this information is useful to gain further insights into the redox regulation of cellular proteins, we have created a primary database of experimentally confirmed sulfoxidation sites.

Results

MetOSite currently contains 7242 methionine sulfoxide sites found in 3562 different proteins from 23 species, with Homo sapiens, Arabidopsis thaliana and Bacillus cereus as the main contributors. Each collected site has been classified according to the effect of its sulfoxidation on the biological properties of the modified protein. Thus, MetOSite documents cases where the sulfoxidation of methionine leads to (i) gain of activity, (ii) loss of activity, (iii) increased protein-protein interaction susceptibility, (iv) decreased protein-protein interaction susceptibility, (v) changes in protein stability and (vi) changes in subcellular location.

Availability and implementation

MetOSite is available at https://metosite.uma.es.","hji,kes",1,1,2,2,1,NA,NA +31307376,PhenPath: a tool for characterizing biological functions underlying different phenotypes.,"

Background

Many diseases are associated with complex patterns of symptoms and phenotypic manifestations. Parsimonious explanations aim at reconciling the multiplicity of phenotypic traits with the perturbation of one or few biological functions. For this, it is necessary to characterize human phenotypes at the molecular and functional levels, by exploiting gene annotations and known relations among genes, diseases and phenotypes. This characterization makes it possible to implement tools for retrieving functions shared among phenotypes, co-occurring in the same patient and facilitating the formulation of hypotheses about the molecular causes of the disease.

Results

We introduce PhenPath, a new resource consisting of two parts: PhenPathDB and PhenPathTOOL. The former is a database collecting the human genes associated with the phenotypes described in Human Phenotype Ontology (HPO) and OMIM Clinical Synopses. Phenotypes are then associated with biological functions and pathways by means of NET-GE, a network-based method for functional enrichment of sets of genes. The present version considers only phenotypes related to diseases. PhenPathDB collects information for 18 OMIM Clinical synopses and 7137 HPO phenotypes, related to 4292 diseases and 3446 genes. Enrichment of Gene Ontology annotations endows some 87.7, 86.9 and 73.6% of HPO phenotypes with Biological Process, Molecular Function and Cellular Component terms, respectively. Furthermore, 58.8 and 77.8% of HPO phenotypes are also enriched for KEGG and Reactome pathways, respectively. Based on PhenPathDB, PhenPathTOOL analyzes user-defined sets of phenotypes retrieving diseases, genes and functional terms which they share. This information can provide clues for interpreting the co-occurrence of phenotypes in a patient.

Conclusions

The resource allows finding molecular features useful to investigate diseases characterized by multiple phenotypes, and by this, it can help researchers and physicians in identifying molecular mechanisms and biological functions underlying the concomitant manifestation of phenotypes. The resource is freely available at http://phenpath.biocomp.unibo.it .","hji,kes",1,1,2,2,1,NA,NA +31429284,One Thousand and One Software for Proteomics: Tales of the Toolmakers of Science.,"Proteomics is a highly dynamic field driven by frequent introduction of new technological approaches, leading to high demand for new software tools and the concurrent development of many methods for data analysis, processing, and storage. The rapidly changing landscape of proteomics software makes finding a tool fit for a particular purpose a significant challenge. The comparison of software and the selection of tools capable to perform a certain operation on a given type of data rely on their detailed annotation using well-defined descriptors. However, finding accurate information including tool input/output capabilities can be challenging and often heavily depends on manual curation efforts. This is further hampered by a rather low half-life of most of the tools, thus demanding the maintenance of a resource with updated information about the tools. We present here our approach to curate a collection of 189 software tools with detailed information about their functional capabilities. We furthermore describe our efforts to reach out to the proteomics community for their engagement, which further increased the catalog to >750 tools being about 70% of the estimated number of 1097 tools existing for proteomics data analysis. Descriptions of all annotated tools are available at https://proteomics.bio.tools.","hji,kes",1,1,2,2,1,"meta-resource, but of software not data",Chuck Check - discussion - catelogue of protocols a la Europe PMC - Y +31602484,VDJbase: an adaptive immune receptor genotype and haplotype database.,"VDJbase is a publicly available database that offers easy searching of data describing the complete sets of gene sequences (genotypes and haplotypes) inferred from adaptive immune receptor repertoire sequencing datasets. VDJbase is designed to act as a resource that will allow the scientific community to explore the genetic variability of the immunoglobulin (Ig) and T cell receptor (TR) gene loci. It can also assist in the investigation of Ig- and TR-related genetic predispositions to diseases. Our database includes web-based query and online tools to assist in visualization and analysis of the genotype and haplotype data. It enables users to detect those alleles and genes that are significantly over-represented in a particular population, in terms of genotype, haplotype and gene expression. The database website can be freely accessed at https://www.vdjbase.org/, and no login is required. The data and code use creative common licenses and are freely downloadable from https://bitbucket.org/account/user/yaarilab/projects/GPHP.","hji,kes",1,1,2,2,1,NA,NA +31641782,The IPD Project: a centralised resource for the study of polymorphism in genes of the immune system.,"The Immuno Polymorphism Database (IPD), https://www.ebi.ac.uk/ipd/, is a set of specialist databases that enable the study of polymorphic genes which function as part of the vertebrate immune system. The major focus is on the hyperpolymorphic major histocompatibility complex (MHC) genes and the killer-cell immunoglobulin-like receptor (KIR) genes, by providing the official repository and primary source of sequence data. Databases are centred around humans as well as animals important for food security, for companionship and as disease models. The IPD project works with specialist groups or nomenclature committees who provide and manually curate individual sections before they are submitted for online publication. To reflect the recent advance of allele sequencing technologies and the increasing demands of novel tools for the analysis of genomic variation, the IPD project is undergoing a progressive redesign and reorganisation. In this review, recent updates and future developments are discussed, with a focus on the core concepts to better future-proof the project.","hji,kes",1,1,2,2,1,NA,NA +31642470,WormBase: a modern Model Organism Information Resource.,"WormBase (https://wormbase.org/) is a mature Model Organism Information Resource supporting researchers using the nematode Caenorhabditis elegans as a model system for studies across a broad range of basic biological processes. Toward this mission, WormBase efforts are arranged in three primary facets: curation, user interface and architecture. In this update, we describe progress in each of these three areas. In particular, we discuss the status of literature curation and recently added data, detail new features of the web interface and options for users wishing to conduct data mining workflows, and discuss our efforts to build a robust and scalable architecture by leveraging commercial cloud offerings. We conclude with a description of WormBase's role as a founding member of the nascent Alliance of Genome Resources.","hji,kes",1,1,2,2,1,NA,NA +31648087,AdditiveChem: A comprehensive bioinformatics knowledge-base for food additive chemicals.,"Food additives are considered to be the catalysts and headstones of the modern food industry, affecting every step of food production, processing, and storage. The urgent need for a comprehensive curation of food additives, including their molecular structures, biological activities, and precise toxicological evaluations, prompted the creation of the AdditiveChem database (http://www.rxnfinder.org/additivechem/). This database has curated >9064 types of food additives, along with their molecular structure, chemical and physical properties, absorption, distribution, metabolism, excretion and toxicity properties, biosynthesis and biodegradation methods, usage specifications, toxicological and risk assessment data, and targets in the human body from 16 databases to construct an efficient search platform for in silico preliminary evaluations. AdditiveChem database will enable an exploration of the relationship between the structure and function of food additives.","hji,kes",1,1,2,2,1,NA,NA +31665416,IMG-ABC v.5.0: an update to the IMG/Atlas of Biosynthetic Gene Clusters Knowledgebase.,"Microbial secondary metabolism is a reservoir of bioactive compounds of immense biotechnological and biomedical potential. The biosynthetic machinery responsible for the production of these secondary metabolites (SMs) (also called natural products) is often encoded by collocated groups of genes called biosynthetic gene clusters (BGCs). High-throughput genome sequencing of both isolates and metagenomic samples combined with the development of specialized computational workflows is enabling systematic identification of BGCs and the discovery of novel SMs. In order to advance exploration of microbial secondary metabolism and its diversity, we developed the largest publicly available database of predicted BGCs combined with experimentally verified BGCs, the Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters (IMG-ABC) (https://img.jgi.doe.gov/abc-public). Here we describe the first major content update of the IMG-ABC knowledgebase, since its initial release in 2015, refreshing the BGC prediction pipeline with the latest version of antiSMASH (v5) as well as presenting the data in the context of underlying environmental metadata sourced from GOLD (https://gold.jgi.doe.gov/). This update has greatly improved the quality and expanded the types of predicted BGCs compared to the previous version.","hji,kes",1,1,2,2,1,NA,NA +31665503,BBCancer: an expression atlas of blood-based biomarkers in the early diagnosis of cancers.,"The early detection of cancer holds the key to combat and control the increasing global burden of cancer morbidity and mortality. Blood-based screenings using circulating DNAs (ctDNAs), circulating RNA (ctRNAs), circulating tumor cells (CTCs) and extracellular vesicles (EVs) have shown promising prospects in the early detection of cancer. Recent high-throughput gene expression profiling of blood samples from cancer patients has provided a valuable resource for developing new biomarkers for the early detection of cancer. However, a well-organized online repository for these blood-based high-throughput gene expression data is still not available. Here, we present BBCancer (http://bbcancer.renlab.org/), a web-accessible and comprehensive open resource for providing the expression landscape of six types of RNAs, including messenger RNAs (mRNAs), long noncoding RNAs (lncRNAs), microRNAs (miRNAs), circular RNAs (circRNAs), tRNA-derived fragments (tRFRNAs) and Piwi-interacting RNAs (piRNAs) in blood samples, including plasma, CTCs and EVs, from cancer patients with various cancer types. Currently, BBCancer contains expression data of the six RNA types from 5040 normal and tumor blood samples across 15 cancer types. We believe this database will serve as a powerful platform for developing blood biomarkers.","hji,kes",1,1,2,2,1,NA,NA +31831730,"The odonate phenotypic database, a new open data resource for comparative studies of an old insect order.","We present The Odonate Phenotypic Database (OPD): an online data resource of dragonfly and damselfly phenotypes (Insecta: Odonata). Odonata is a relatively small insect order that currently consists of about 6400 species belonging to 32 families. The database consists of multiple morphological, life-history and behavioral traits, and biogeographical information collected from literature sources. We see taxon-specific phenotypic databases from Odonata and other organismal groups as becoming an increasing valuable resource in comparative studies. Our database has phenotypic records for 1011 of all 6400 known odonate species. The database is accessible at http://www.odonatephenotypicdatabase.org/, and a static version with an information file about the variables in the database is archived at Dryad.","hji,kes",1,1,2,2,1,NA,NA +31874631,"JCDB: a comprehensive knowledge base for Jatropha curcas, an emerging model for woody energy plants.","

Background

Jatropha curcas is an oil-bearing plant, and has seeds with high oil content (~‚Äâ40%). Several advantages, such as easy genetic transformation and short generation duration, have led to the emergence of J. curcas as a model for woody energy plants. With the development of high-throughput sequencing, the genome of Jatropha curcas has been sequenced by different groups and a mass of transcriptome data was released. How to integrate and analyze these omics data is crucial for functional genomics research on J. curcas.

Results

By establishing pipelines for processing novel gene identification, gene function annotation, and gene network construction, we systematically integrated and analyzed a series of J. curcas transcriptome data. Based on these data, we constructed a J. curcas database (JCDB), which not only includes general gene information, gene functional annotation, gene interaction networks, and gene expression matrices but also provides tools for browsing, searching, and downloading data, as well as online BLAST, the JBrowse genome browser, ID conversion, heatmaps, and gene network analysis tools.

Conclusions

JCDB is the most comprehensive and well annotated knowledge base for J. curcas. We believe it will make a valuable contribution to the functional genomics study of J. curcas. The database is accessible at http://jcdb.xtbg.ac.cn.","hji,kes",1,1,2,2,1,NA,NA +31942979,Phenotype-genotype network construction and characterization: a case study of cardiovascular diseases and associated non-coding RNAs.,"The phenotype-genotype relationship is a key for personalized and precision medicine for complex diseases. To unravel the complexity of the clinical phenotype-genotype network, we used cardiovascular diseases (CVDs) and associated non-coding RNAs (ncRNAs) (i.e. miRNAs, long ncRNAs, etc.) as the case for the study of CVDs at a systems or network level. We first integrated a database of CVDs and ncRNAs (CVDncR, http://sysbio.org.cn/cvdncr/) to construct CVD-ncRNA networks and annotate their clinical associations. To characterize the networks, we then separated the miRNAs into two groups, i.e. universal miRNAs associated with at least two types of CVDs and specific miRNAs related only to one type of CVD. Our analyses indicated two interesting patterns in these CVD-ncRNA networks. First, scale-free features were present within both CVD-miRNA and CVD-lncRNA networks; second, universal miRNAs were more likely to be CVDs biomarkers. These results were confirmed by computational functional analyses. The findings offer theoretical guidance for decoding CVD-ncRNA associations and will facilitate the screening of CVD ncRNA biomarkers. Database URL: http://sysbio.org.cn/cvdncr/.","hji,kes",1,1,2,2,1,NA,NA +31982380,TissueCoCoPUTs: Novel Human Tissue-Specific Codon and Codon-Pair Usage Tables Based on Differential Tissue Gene Expression.,"Protein expression in multicellular organisms varies widely across tissues. Codon usage in the transcriptome of each tissue is derived from genomic codon usage and the relative expression level of each gene. We created a comprehensive computational resource that houses tissue-specific codon, codon-pair, and dinucleotide usage data for 51 Homo sapiens tissues (TissueCoCoPUTs: https://hive.biochemistry.gwu.edu/review/tissue_codon), using transcriptome data from the Broad Institute Genotype-Tissue Expression (GTEx) portal. Distances between tissue-specific codon and codon-pair frequencies were used to generate a dendrogram based on the unique patterns of codon and codon-pair usage in each tissue that are clearly distinct from the genomic distribution. This novel resource may be useful in unraveling the relationship between codon usage and tRNA abundance, which could be critical in determining translation kinetics and efficiency across tissues. Areas of investigation such as biotherapeutic development, tissue-specific genetic engineering, and genetic disease prediction will greatly benefit from this resource.","hji,kes",1,1,2,2,1,NA,mostly a tool but the data is available for download (and download works) +32009518,RATEmiRs: the rat atlas of tissue-specific and enriched miRNAs for discerning baseline expression exclusivity of candidate biomarkers.,"MicroRNAs (miRNAs) are small RNAs that regulate mRNA expression and have been targeted as biomarkers of organ damage and disease. To explore the utility of miRNAs to assess injury to specific tissues, a tissue atlas of miRNA abundance was constructed. The Rat Atlas of Tissue-specific and Enriched miRNAs (RATEmiRs) catalogues miRNA sequencing data from 21 and 23 tissues in male and female Sprague-Dawley rats, respectively. RATEmiRs identifies tissue-enriched (TE), tissue-specific (TS), or organ-specific (OS) miRNAs via comparisons of one or more tissue or organ vs others. We provide a brief overview of RATEmiRs and present how to use it to detect miRNA expression abundance of candidate biomarkers as well as to compare the expression of miRNAs between rat and human. The database is available at https://www.niehs.nih.gov/ratemirs/.","hji,kes",1,1,2,2,1,NA,NA +32090261,NipahVR: a resource of multi-targeted putative therapeutics and epitopes for the Nipah virus.,"Nipah virus (NiV) is an emerging and priority pathogen from the Paramyxoviridae family with a high fatality rate. It causes various diseases such as respiratory ailments and encephalitis and poses a great threat to humans and livestock. Despite various efforts, there is no approved antiviral treatment available. Therefore, to expedite and assist the research, we have developed an integrative resource NipahVR (http://bioinfo.imtech.res.in/manojk/nipahvr/) for the multi-targeted putative therapeutics and epitopes for NiV. It is structured into different sections, i.e. genomes, codon usage, phylogenomics, molecular diagnostic primers, therapeutics (siRNAs, sgRNAs, miRNAs) and vaccine epitopes (B-cell, CTL, MHC-I and -II binders). Most decisively, potentially efficient therapeutic regimens targeting different NiV proteins and genes were anticipated and projected. We hope this computational resource would be helpful in developing combating strategies against this deadly pathogen. Database URL: http://bioinfo.imtech.res.in/manojk/nipahvr/.","hji,kes",1,1,2,2,1,NA,super simple - its does aggregate and present data in a structured way +32103267,Predicted Drosophila Interactome Resource and web tool for functional interpretation of differentially expressed genes.,"Drosophila melanogaster is a well-established model organism that is widely used in genetic studies. This species enjoys the availability of a wide range of research tools, well-annotated reference databases and highly similar gene circuitry to other insects. To facilitate molecular mechanism studies in Drosophila, we present the Predicted Drosophila Interactome Resource (PDIR), a database of high-quality predicted functional gene interactions. These interactions were inferred from evidence in 10 public databases providing information for functional gene interactions from diverse perspectives. The current version of PDIR includes 102‚Äâ835 putative functional associations with balanced sensitivity and specificity, which are expected to cover 22.56% of all Drosophila protein interactions. This set of functional interactions is a good reference for hypothesis formulation in molecular mechanism studies. At the same time, these interactions also serve as a high-quality reference interactome for gene set linkage analysis (GSLA), which is a web tool for the interpretation of the potential functional impacts of a set of changed genes observed in transcriptomics analyses. In a case study, we show that the PDIR/GSLA system was able to produce a more comprehensive and concise interpretation of the collective functional impact of multiple simultaneously changed genes compared with the widely used gene set annotation tools, including PANTHER and David. PDIR and its associated GSLA service can be accessed at http://drosophila.biomedtzc.cn.","hji,kes",1,1,2,2,1,NA,NA +32117995,Nc2Eye: A Curated ncRNAomics Knowledgebase for Bridging Basic and Clinical Research in Eye Diseases.,"Eye diseases (EDs) represent a group of disorders affecting the visual system, most of which can lead to visual impairment and blindness. Accumulating evidence reveals that non-coding RNAs (ncRNAs) are closely associated with a wide variety of EDs. However, abundant associations between ncRNAs and EDs are scattered across the published literature, obstructing a global view of ncRNA-ED associations. A public resource of high-quality manually curated ncRNAomics knowledge associated with EDs remains unavailable. To address this gap, we thus developed Nc2Eye (http://nc2eye.bio-data.cn/), which is the first knowledgebase dedicated to providing a comprehensive ncRNAomics resource for bridging basic and clinical research in EDs. Through a comprehensive review of more than 2400 published papers, Nc2Eye catalogs 7088 manually curated ncRNA-ED associations involving 4363 ncRNAs across eight species. We also provide detailed descriptions and annotation information for each ncRNA-disease association such as ncRNA categories, experimental methods, expression pattern and related clinical drugs. To further expand the pathogenic ncRNAs, we also collected more than 90 high-throughput EDs-related transcriptome datasets. Furthermore, a user-friendly interface was constructed for convenient and flexible data browsing, querying, and retrieving. We believe that Nc2Eye is a timely and valuable knowledgebase for significantly improving and useful for discovery of new diagnostic and therapeutic biomarkers.","hji,kes",1,1,2,2,1,NA,NA +32315389,HotSpot3D web server: an integrated resource for mutation analysis in protein 3D structures.,"

Motivation

HotSpot3D is a widely used software for identifying mutation hotspots on the 3D structures of proteins. To further assist users, we developed a new HotSpot3D web server to make this software more versatile, convenient and interactive.

Results

The HotSpot3D web server performs data pre-processing, clustering, visualization and log-viewing on one stop. Users can interactively explore each cluster and easily re-visualize the mutational clusters within browsers. We also provide a database that allows users to search and visualize proximal mutations from 33 cancers in the Cancer Genome Atlas.

Availability and implementation

http://niulab.scgrid.cn/HotSpot3D/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +32337573,MPTherm: database for membrane protein thermodynamics for understanding folding and stability.,"The functions of membrane proteins (MPs) are attributed to their structure and stability. Factors influencing the stability of MPs differ from globular proteins due to the presence of membrane spanning regions. Thermodynamic data of MPs aid to understand the relationship among their structure, stability and function. Although a wealth of experimental data on thermodynamics of MPs are reported in the literature, there is no database available explicitly for MPs. In this work, we have developed a database for MP thermodynamics, MPTherm, which contains more than 7000 thermodynamic data from about 320 MPs. Each entry contains protein sequence and structural information, membrane topology, experimental conditions, thermodynamic parameters such as melting temperature, free energy, enthalpy etc. and literature information. MPTherm assists users to retrieve the data by using different search and display options. We have also provided the sequence and structure visualization as well as cross-links to UniProt and PDB databases. MPTherm database is freely available at http://www.iitm.ac.in/bioinfo/mptherm/. It is implemented in HTML, PHP, MySQL and JavaScript, and supports the latest versions of major browsers, such as Firefox, Chrome and Opera. MPTherm would serve as an effective resource for understanding the stability of MPs, development of prediction tools and identifying drug targets for diseases associated with MPs.","hji,kes",1,1,2,2,1,NA,NA +32358997,ProNetView-ccRCC: A Web-Based Portal to Interactively Explore Clear Cell Renal Cell Carcinoma Proteogenomics Networks.,"To better understand the molecular basis of cancer, the NCI's Clinical Proteomics Tumor Analysis Consortium (CPTAC) has been performing comprehensive large-scale proteogenomic characterizations of multiple cancer types. Gene and protein regulatory networks are subsequently being derived based on these proteogenomic profiles, which serve as tools to gain systems-level understanding of the molecular regulatory factories underlying these diseases. On the other hand, it remains a challenge to effectively visualize and navigate the resulting network models, which capture higher order structures in the proteogenomic profiles. There is a pressing need to have a new open community resource tool for intuitive visual exploration, interpretation, and communication of these gene/protein regulatory networks by the cancer research community. In this work, ProNetView-ccRCC (http://ccrcc.cptac-network-view.org/), an interactive web-based network exploration portal for investigating phosphopeptide co-expression network inferred based on the CPTAC clear cell renal cell carcinoma (ccRCC) phosphoproteomics data is introduced. ProNetView-ccRCC enables quick, user-intuitive visual interactions with the ccRCC tumor phosphoprotein co-expression network comprised of 3614 genes, as well as 30 functional pathway-enriched network modules. Users can interact with the network portal and can conveniently query for association between abundance of each phosphopeptide in the network and clinical variables such as tumor grade.","hji,kes",1,1,2,2,1,NA,has data use agreement / data portal https://cptac-data-portal.georgetown.edu/cptac/study/disclaimer?accNum=S044 +32382747,RSVdb: a comprehensive database of transcriptome RNA structure.,"RNA fulfills a crucial regulatory role in cells by folding into a complex RNA structure. To date, a chemical compound, dimethyl sulfate (DMS), has been developed to probe the RNA structure at the transcriptome level effectively. We proposed a database, RSVdb (https://taolab.nwafu.edu.cn/rsvdb/), for the browsing and visualization of transcriptome RNA structures. RSVdb, including 626 225 RNAs with validated DMS reactivity from 178 samples in eight species, supports four main functions: information retrieval, research overview, structure prediction and resource download. Users can search for species, studies, transcripts and genes of interest; browse the quality control of sequencing data and statistical charts of RNA structure information; preview and perform online prediction of RNA structures in silico and under DMS restraint of different experimental treatments and download RNA structure data for species and studies. Together, RSVdb provides a reference for RNA structure and will support future research on the function of RNA structure at the transcriptome level.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +32392583,"M6A2Target: a comprehensive database for targets of m6A writers, erasers and readers.","N6-methyladenosine (m6A) is the most abundant posttranscriptional modification in mammalian mRNA molecules and has a crucial function in the regulation of many fundamental biological processes. The m6A modification is a dynamic and reversible process regulated by a series of writers, erasers and readers (WERs). Different WERs might have different functions, and even the same WER might function differently in different conditions, which are mostly due to different downstream genes being targeted by the WERs. Therefore, identification of the targets of WERs is particularly important for elucidating this dynamic modification. However, there is still no public repository to host the known targets of WERs. Therefore, we developed the m6A WER target gene database (m6A2Target) to provide a comprehensive resource of the targets of m6A WERs. M6A2Target provides a user-friendly interface to present WER targets in two different modules: 'Validated Targets', referred to as WER targets identified from low-throughput studies, and 'Potential Targets', including WER targets analyzed from high-throughput studies. Compared to other existing m6A-associated databases, m6A2Target is the first specific resource for m6A WER target genes. M6A2Target is freely accessible at http://m6a2target.canceromics.org.","hji,kes",1,1,2,2,1,NA,NA +32415965,Palantir: a springboard for the analysis of secondary metabolite gene clusters in large-scale genome mining projects.,"

Summary

To support small and large-scale genome mining projects, we present Post-processing Analysis tooLbox for ANTIsmash Reports (Palantir), a dedicated software suite for handling and refining secondary metabolite biosynthetic gene cluster (BGC) data annotated with the popular antiSMASH pipeline. Palantir provides new functionalities building on NRPS/PKS predictions from antiSMASH, such as improved BGC annotation, module delineation and easy access to sub-sequences at different levels (cluster, gene, module and domain). Moreover, it can parse user-provided antiSMASH reports and reformat them for direct use or storage in a relational database.

Availability and implementation

Palantir is released both as a Perl API available on CPAN (https://metacpan.org/release/Bio-Palantir) and as a web application (http://palantir.uliege.be). As a practical use case, the web interface also features a database built from the mining of 1616 cyanobacterial genomes, of which 1488 were predicted to encode at least one BGC.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +32433469,Construction of a web-based nanomaterial database by big data curation and modeling friendly nanostructure annotations.,"Modern nanotechnology research has generated numerous experimental data for various nanomaterials. However, the few nanomaterial databases available are not suitable for modeling studies due to the way they are curated. Here, we report the construction of a large nanomaterial database containing annotated nanostructures suited for modeling research. The database, which is publicly available through http://www.pubvinas.com/, contains 705 unique nanomaterials covering 11 material types. Each nanomaterial has up to six physicochemical properties and/or bioactivities, resulting in more than ten endpoints in the database. All the nanostructures are annotated and transformed into protein data bank files, which are downloadable by researchers worldwide. Furthermore, the nanostructure annotation procedure generates 2142 nanodescriptors for all nanomaterials for machine learning purposes, which are also available through the portal. This database provides a public resource for data-driven nanoinformatics modeling research aimed at rational nanomaterial design and other areas of modern computational nanotechnology.","hji,kes",1,1,2,2,1,NA,NA +32436932,miRactDB characterizes miRNA-gene relation switch between normal and cancer tissues across pan-cancer.,"It has been increasingly accepted that microRNA (miRNA) can both activate and suppress gene expression, directly or indirectly, under particular circumstances. Yet, a systematic study on the switch in their interaction pattern between activation and suppression and between normal and cancer conditions based on multi-omics evidences is not available. We built miRactDB, a database for miRNA-gene interaction, at https://ccsm.uth.edu/miRactDB, to provide a versatile resource and platform for annotation and interpretation of miRNA-gene relations. We conducted a comprehensive investigation on miRNA-gene interactions and their biological implications across tissue types in both tumour and normal conditions, based on TCGA, CCLE and GTEx databases. We particularly explored the genetic and epigenetic mechanisms potentially contributing to the positive correlation, including identification of miRNA binding sites in the gene coding sequence (CDS) and promoter regions of partner genes. Integrative analysis based on this resource revealed that top-ranked genes derived from TCGA tumour and adjacent normal samples share an overwhelming part of biological processes, which are quite different than those from CCLE and GTEx. The most active miRNAs predicted to target CDS and promoter regions are largely overlapped. These findings corroborate that adjacent normal tissues might have undergone significant molecular transformations towards oncogenesis before phenotypic and histological change; and there probably exists a small yet critical set of miRNAs that profoundly influence various cancer hallmark processes. miRactDB provides a unique resource for the cancer and genomics communities to screen, prioritize and rationalize their candidates of miRNA-gene interactions, in both normal and cancer scenarios.","hji,kes",1,1,2,2,1,NA,NA +32493955,WilsonGen a comprehensive clinically annotated genomic variant resource for Wilson's Disease.,"Wilson disease (WD) is one of the most prevalent genetic diseases with an estimated global carrier frequency of 1 in 90 and a prevalence of 1 in 30,000. The disease owes its genesis to Kinnier Wilson who described the disease, and is caused by accumulation of Copper (Cu) in various organs including the liver, central nervous system, cornea, kidney, joints and cardiac muscle which contribute to the characteristic clinical features of WD. A number of studies have reported genetic variants in the ATP7B gene from diverse ethnic and geographical origins. The recent advent of next-generation sequencing approaches has also enabled the discovery of a large number of novel variants in the gene associated with the disease. Previous attempts have been made to compile the knowledgebase and spectrum of genetic variants from across the multitude of publications, but have been limited by the utility due to the significant differences in approaches used to qualify pathogenicity of variants in each of the publications. The recent formulation of guidelines and algorithms for assessment of the pathogenicity of variants jointly put forward by the American College of Medical Genetics and the Association of Molecular Pathologists (ACMG &) has provided a framework for evidence based and systematic assessment of pathogenicity of variants. In this paper, we describe a comprehensive resource of genetic variants in ATP7B gene manually curated from literature and data resources and systematically annotated using the ACMG & AMP guidelines for assessing pathogenicity. The resource therefore serves as a central point for clinicians and geneticists working on WD and to the best of our knowledge is the most comprehensive and only clinically annotated resource for WD. The resource is available at URL http://clingen.igib.res.in/WilsonGen/. We compiled a total of 3662 genetic variants from publications and databases associated with WD. Of these variants compiled, a total of 1458 were found to be unique entries. This is the largest WD database comprising 656 pathogenic/likely pathogenic variants reported classified according to ACMG & AMP guidelines. We also mapped all the pathogenic variants corresponding to ATP7B protein from literature and other databases. In addition, geographical origin and distribution of ATP7B pathogenic variants reported are also mapped in the database.","hji,kes",1,1,2,2,1,NA,clincal data but genomic so okay +32496513,gutMEGA: a database of the human gut MEtaGenome Atlas.,"The gut microbiota plays important roles in human health through regulating both physiological homeostasis and disease emergence. The accumulation of metagenomic sequencing studies enables us to better understand the temporal and spatial variations of the gut microbiota under different physiological and pathological conditions. However, it is inconvenient for scientists to query and retrieve published data; thus, a comprehensive resource for the quantitative gut metagenome is urgently needed. In this study, we developed gut MEtaGenome Atlas (gutMEGA), a well-annotated comprehensive database, to curate and host published quantitative gut microbiota datasets from Homo sapiens. By carefully curating the gut microbiota composition, phenotypes and experimental information, gutMEGA finally integrated 59 132 quantification events for 6457 taxa at seven different levels (kingdom, phylum, class, order, family, genus and species) under 776 conditions. Moreover, with various browsing and search functions, gutMEGA provides a fast and simple way for users to obtain the relative abundances of intestinal microbes among phenotypes. Overall, gutMEGA is a convenient and comprehensive resource for gut metagenome research, which can be freely accessed at http://gutmega.omicsbio.info.","hji,kes",1,1,2,2,1,NA,NA +32507889,"The articles.ELM resource: simplifying access to protein linear motif literature by annotation, text-mining and classification.","Modern biology produces data at a staggering rate. Yet, much of these biological data is still isolated in the text, figures, tables and supplementary materials of articles. As a result, biological information created at great expense is significantly underutilised. The protein motif biology field does not have sufficient resources to curate the corpus of motif-related literature and, to date, only a fraction of the available articles have been curated. In this study, we develop a set of tools and a web resource, 'articles.ELM', to rapidly identify the motif literature articles pertinent to a researcher's interest. At the core of the resource is a manually curated set of about 8000 motif-related articles. These articles are automatically annotated with a range of relevant biological data allowing in-depth search functionality. Machine-learning article classification is used to group articles based on their similarity to manually curated motif classes in the Eukaryotic Linear Motif resource. Articles can also be manually classified within the resource. The 'articles.ELM' resource permits the rapid and accurate discovery of relevant motif articles thereby improving the visibility of motif literature and simplifying the recovery of valuable biological insights sequestered within scientific articles. Consequently, this web resource removes a critical bottleneck in scientific productivity for the motif biology field. Database URL: http://slim.icr.ac.uk/articles/.","hji,kes",1,1,2,2,1,NA,NA +32510549,DenvInD: dengue virus inhibitors database for clinical and molecular research.,"Dengue virus (DENV) researchers often face challenges with the highly time-consuming process of collecting and curating information on known inhibitors during the standard drug discovery process. To this end, however, required collective information is not yet available on a single platform. Hence, we have developed the DenvInD database for experimentally validated DENV inhibitors against its known targets presently hosted at https://webs.iiitd.edu.in/raghava/denvind/. This database provides comprehensive information, i.e. PubChem IDs, SMILES, IC50, EC50, CC50, and wherever available Ki values of the 484 compounds in vitro validated as inhibitors against respective drug targets of DENV. Also, the DenvInD database has been linked to the user-friendly web-based interface and accessibility features, such as simple search, advanced search and data browsing. All the required data curation was conducted manually from the reported scientific literature and PubChem. The collected information was then organized into the DenvInD database using sequence query language under user interface by hypertext markup language. DenvInD is the first useful repository of its kind which would augment the DENV drug discovery research by providing essential information on known DENV inhibitors for molecular docking, computational screening, pharmacophore modeling and quantitative structure-activity relationship modeling.","hji,kes",1,1,2,2,1,NA,NA +32539086,ForageGrassBase: molecular resource for the forage grass meadow fescue (Festuca pratensis Huds.).,"Meadow fescue (Festuca pratensis Huds.) is one of the most important forage grasses in temperate regions. It is a diploid (2n‚Äâ=‚Äâ14) outbreeding species that belongs to the genus Festuca. Together with Lolium perenne, they are the most important genera of forage grasses. Meadow fescue has very high quality of yield with good winter survival and persistency. However, extensive genomic resources for meadow fescue have not become available so far. To address this lack of comprehensive publicly available datasets, we have developed functionally annotated draft genome sequences of two meadow fescue genotypes, 'HF7/2' and 'B14/16', and constructed the platform ForageGrassBase, available at http://foragegrass.org/, for data visualization, download and querying. This is the first open-access platform that provides extensive genomic resources related to this forage grass species. The current database provides the most up-to-date draft genome sequence along with structural and functional annotations for genes that can be accessed using Genome Browser (GBrowse), along with comparative genomic alignments to Arabidopsis, L. perenne, barley, rice, Brachypodium and maize genomes. We have integrated homologous search tool BLAST also for the users to analyze their data. Combined, GBrowse, BLAST and downloadable data gives a user-friendly access to meadow fescue genomic resources. To our knowledge, ForageGrassBase is the first genome database dedicated to forage grasses. The current forage grass database provides valuable resources for a range of research fields related to meadow fescue and other forage crop species, as well as for plant research communities in general. The genome database can be accessed at http://foragegrass.org.","hji,kes",1,1,2,2,1,NA,NA +32591816,ExoBCD: a comprehensive database for exosomal biomarker discovery in breast cancer.,"Effective and safe implementation of precision oncology for breast cancer is a vital strategy to improve patient outcomes, which relies on the application of reliable biomarkers. As 'liquid biopsy' and novel resource for biomarkers, exosomes provide a promising avenue for the diagnosis and treatment of breast cancer. Although several exosome-related databases have been developed, there is still lacking of an integrated database for exosome-based biomarker discovery. To this end, a comprehensive database ExoBCD (https://exobcd.liumwei.org) was constructed with the combination of robust analysis of four high-throughput datasets, transcriptome validation of 1191 TCGA cases and manual mining of 950 studies. In ExoBCD, approximately 20 900 annotation entries were integrated from 25 external sources and 306 exosomal molecules (49 potential biomarkers and 257 biologically interesting molecules). The latter could be divided into 3 molecule types, including 121 mRNAs, 172 miRNAs and 13 lncRNAs. Thus, the well-linked information about molecular characters, experimental biology, gene expression patterns, overall survival, functional evidence, tumour stage and clinical use were fully integrated. As a data-driven and literature-based paradigm proposed of biomarker discovery, this study also demonstrated the corroborative analysis and identified 36 promising molecules, as well as the most promising prognostic biomarkers, IGF1R and FRS2. Taken together, ExoBCD is the first well-corroborated knowledge base for exosomal studies of breast cancer. It not only lays a foundation for subsequent studies but also strengthens the studies of probing molecular mechanisms, discovering biomarkers and developing meaningful clinical use.","hji,kes",1,1,2,2,1,NA,NA +32597467,A comprehensive integrated drug similarity resource for in-silico drug repositioning and beyond.,"Drug similarity studies are driven by the hypothesis that similar drugs should display similar therapeutic actions and thus can potentially treat a similar constellation of diseases. Drug-drug similarity has been derived by variety of direct and indirect sources of evidence and frequently shown high predictive power in discovering validated repositioning candidates as well as other in-silico drug development applications. Yet, existing resources either have limited coverage or rely on an individual source of evidence, overlooking the wealth and diversity of drug-related data sources. Hence, there has been an unmet need for a comprehensive resource integrating diverse drug-related information to derive multi-evidenced drug-drug similarities. We addressed this resource gap by compiling heterogenous information for an exhaustive set of small-molecule drugs (total of 10 367 in the current version) and systematically integrated multiple sources of evidence to derive a multi-modal drug-drug similarity network. The resulting database, 'DrugSimDB' currently includes 238 635 drug pairs with significant aggregated similarity, complemented with an interactive user-friendly web interface (http://vafaeelab.com/drugSimDB.html), which not only enables database ease of access, search, filtration and export, but also provides a variety of complementary information on queried drugs and interactions. The integration approach can flexibly incorporate further drug information into the similarity network, providing an easily extendable platform. The database compilation and construction source-code has been well-documented and semi-automated for any-time upgrade to account for new drugs and up-to-date drug information.","hji,kes",1,1,2,2,1,NA,NA +32608479,CHDGKB: a knowledgebase for systematic understanding of genetic variations associated with non-syndromic congenital heart disease.,"Congenital heart disease (CHD) is one of the most common birth defects, with complex genetic and environmental etiologies. The reports of genetic variation associated with CHD have increased dramatically in recent years due to the revolutionary development of molecular technology. However, CHD is a heterogeneous disease, and its genetic origins remain inconclusive in most patients. Here we present a database of genetic variations for non-syndromic CHD (NS-CHD). By manually literature extraction and analyses, 5345 NS-CHD-associated genetic variations were collected, curated and stored in the public online database. The objective of our database is to provide the most comprehensive updates on NS-CHD genetic research and to aid systematic analyses of pathogenesis of NS-CHD in molecular level and the correlation between NS-CHD genotypes and phenotypes. Database URL: http://www.sysbio.org.cn/CHDGKB/.","hji,kes",1,1,2,2,1,NA,NA +32621232,Dockground Tool for Development and Benchmarking of Protein Docking Procedures.,"Databases of protein-protein complexes are essential for the development of protein modeling/docking techniques. Such databases provide a knowledge base for docking algorithms, intermolecular potentials, search procedures, scoring functions, and refinement protocols. Development of docking techniques requires systematic validation of the modeling protocols on carefully curated benchmark sets of complexes. We present a description and a guide to the DOCKGROUND resource ( http://dockground.compbio.ku.edu ) for structural modeling of protein interactions. The resource integrates various datasets of protein complexes and other data for the development and testing of protein docking techniques. The sets include bound complexes, experimentally determined unbound, simulated unbound, model-model complexes, and docking decoys. The datasets are available to the user community through a Web interface.","hji,kes",1,1,2,2,1,Available model training data,NA +32632099,Database of literature derived cellular measurements from the murine basal ganglia.,"Quantitative measurements and descriptive statistics of different cellular elements in the brain are typically published in journal articles as text, tables, and example figures, and represent an important basis for the creation of biologically constrained computational models, design of intervention studies, and comparison of subject groups. Such data can be challenging to extract from publications and difficult to normalise and compare across studies, and few studies have so far attempted to integrate quantitative information available in journal articles. We here present a database of quantitative information about cellular parameters in the frequently studied murine basal ganglia. The database holds a curated and normalised selection of currently available data collected from the literature and public repositories, providing the most comprehensive collection of quantitative neuroanatomical data from the basal ganglia to date. The database is shared as a downloadable resource from the EBRAINS Knowledge Graph (https://kg.ebrains.eu), together with a workflow that allows interested researchers to update and expand the database with data from future reports.","hji,kes",1,1,2,2,1,NA,Chuck Check - yes +32639365,A transcriptional toolbox for exploring peripheral neuroimmune interactions.,"

Abstract

Correct communication between immune cells and peripheral neurons is crucial for the protection of our bodies. Its breakdown is observed in many common, often painful conditions, including arthritis, neuropathies, and inflammatory bowel or bladder disease. Here, we have characterised the immune response in a mouse model of neuropathic pain using flow cytometry and cell-type-specific RNA sequencing (RNA-seq). We found few striking sex differences, but a very persistent inflammatory response, with increased numbers of monocytes and macrophages up to 3 1/2 months after the initial injury. This raises the question of whether the commonly used categorisation of pain into ""inflammatory"" and ""neuropathic"" is one that is mechanistically appropriate. Finally, we collated our data with other published RNA-seq data sets on neurons, macrophages, and Schwann cells in naive and nerve injury states. The result is a practical web-based tool for the transcriptional data mining of peripheral neuroimmune interactions. http://rna-seq-browser.herokuapp.com/.","hji,kes",1,1,2,2,1,NA,essentailly a knoweldgebase - all data can be downloaded in excel format +32702093,"AciDB 1.0: a database of acidophilic organisms, their genomic information and associated metadata.","

Motivation

There are about 600 available genome sequences of acidophilic organisms (grow at a pH‚Äâ<‚Äâ5) from the three domains of the Tree of Life. Information about acidophiles is scattered over many heterogeneous sites making it extraordinarily difficult to link physiological traits with genomic data. We were motivated to generate a curated, searchable database to address this problem.

Results

AciDB 1.0 is a curated database of sequenced acidophiles that enables researchers to execute complex queries linking genomic features to growth data, environmental descriptions and taxonomic information.

Availability and implementation

AciDB 1.0 is freely available online at: http://AciDB.cl. The source code is released under an MIT license at: https://gitlab.com/Hawkline451/acidb/.","hji,kes",1,1,2,2,1,NA,NA +32728249,Expanded encyclopaedias of DNA elements in the human and mouse genomes.,"The human and mouse genomes contain instructions that specify RNAs and proteins and govern the timing, magnitude, and cellular context of their production. To better delineate these elements, phase III of the Encyclopedia of DNA Elements (ENCODE) Project has expanded analysis of the cell and tissue repertoires of RNA transcription, chromatin structure and modification, DNA methylation, chromatin looping, and occupancy by transcription factors and RNA-binding proteins. Here we summarize these efforts, which have produced 5,992 new experimental datasets, including systematic determinations across mouse fetal development. All data are available through the ENCODE data portal (https://www.encodeproject.org), including phase II ENCODE1 and Roadmap Epigenomics2 data. We have developed a registry of 926,535 human and 339,815 mouse candidate cis-regulatory elements, covering 7.9 and 3.4% of their respective genomes, by integrating selected datatypes associated with gene regulation, and constructed a web-based server (SCREEN; http://screen.encodeproject.org) to provide flexible, user-defined access to this resource. Collectively, the ENCODE data and registry provide an expansive resource for the scientific community to build a better understanding of the organization and function of the human and mouse genomes.","hji,kes",1,1,2,2,1,NA,NA +32738156,Predicted yeast interactome and network-based interpretation of transcriptionally changed genes.,"Saccharomyces cerevisiae, budding yeast, is a widely used model organism and research tool in genetics studies. Many efforts have been directed at constructing a high-quality comprehensive molecular interaction network to elucidate the design logic of the gene circuitries in this classic model organism. In this work, we present the yeast interactome resource (YIR), which includes 22,238 putative functional gene interactions inferred from functional gene association data integrated from 10 databases focusing on diverse functional perspectives. These putative functional gene interactions are expected to cover 18.84% of yeast protein interactions, and 38.49% may represent protein interactions. Based on the YIR, a gene set linkage analysis (GSLA) web tool was developed to annotate the potential functional impacts of a set of transcriptionally changed genes. In a case study, we show that the YIR/GSLA system produced more extensive and concise annotations compared with widely used gene set annotation tools, including PANTHER and DAVID. Both YIR and GSLA are accessible through the website http://yeast.biomedtzc.cn.","hji,kes",1,1,2,2,1,NA,NA +32761142,"NCBI Taxonomy: a comprehensive update on curation, resources and tools.","The National Center for Biotechnology Information (NCBI) Taxonomy includes organism names and classifications for every sequence in the nucleotide and protein sequence databases of the International Nucleotide Sequence Database Collaboration. Since the last review of this resource in 2012, it has undergone several improvements. Most notable is the shift from a single SQL database to a series of linked databases tied to a framework of data called NameBank. This means that relations among data elements can be adjusted in more detail, resulting in expanded annotation of synonyms, the ability to flag names with specific nomenclatural properties, enhanced tracking of publications tied to names and improved annotation of scientific authorities and types. Additionally, practices utilized by NCBI Taxonomy curators specific to major taxonomic groups are described, terms peculiar to NCBI Taxonomy are explained, external resources are acknowledged and updates to tools and other resources are documented. Database URL: https://www.ncbi.nlm.nih.gov/taxonomy.","hji,kes",1,1,2,2,1,update of existing resource,NA +32766766,LncR2metasta: a manually curated database for experimentally supported lncRNAs during various cancer metastatic events.,"Mounting evidence has shown the involvement of long non-coding RNAs (lncRNAs) during various cancer metastatic events (abbreviated as CMEs, e.g. cancer cell invasion, intravasation, extravasation, proliferation, etc.) that may cooperatively facilitate malignant tumor spread and cause massive patient deaths. The study of lncRNA-CME associations might help understand lncRNA functions in metastasis and present reliable biomarkers for early dissemination detection and optimized treatment. Therefore, we developed a database named 'lncR2metasta' by manually compiling experimentally supported lncRNAs during various CMEs from existing studies. LncR2metasta documents 1238 associations between 304 lncRNAs and 39 CMEs across 54 human cancer subtypes. Each entry of lncR2metasta contains detailed information on a lncRNA-CME association, including lncRNA symbol, a specific CME, brief description of the association, lncRNA category, lncRNA Entrez or Ensembl ID, lncRNA genomic location and strand, lncRNA experiment, lncRNA expression pattern, detection method, target gene (or pathway) of lncRNA, lncRNA regulatory role on a CME, cancer name and the literature reference. An easy-to-use web interface was deployed in lncR2metasta for its users to easily browse, search and download as well as to submit novel lncRNA-CME associations. LncR2metasta will be a useful resource in cancer research community. It is freely available at http://lncR2metasta.wchoda.com.","hji,kes",1,1,2,2,1,NA,NA +32786695,Systemic In Silico Screening in Drug Discovery for Coronavirus Disease (COVID-19) with an Online Interactive Web Server.,"The emergence of the new coronavirus (nCoV-19) has impacted human health on a global scale, while the interaction between the virus and the host is the foundation of the disease. The viral genome codes a cluster of proteins, each with a unique function in the event of host invasion or viral development. Under the current adverse situation, we employ virtual screening tools in searching for drugs and natural products which have been already deposited in DrugBank in an attempt to accelerate the drug discovery process. This study provides an initial evaluation of current drug candidates from various reports using our systemic in silico drug screening based on structures of viral proteins and human ACE2 receptor. Additionally, we have built an interactive online platform (https://shennongproject.ai/) for browsing these results with the visual display of a small molecule docked on its potential target protein, without installing any specialized structural software. With continuous maintenance and incorporation of data from laboratory work, it may serve not only as the assessment tool for the new drug discovery but also an educational web site for the public.","hji,kes",0,0,0,2,0,software,no notes; reassessed and re-scored - server only it seems +32792559,"ACDC, a global database of amphibian cytochrome-b sequences using reproducible curation for GenBank records.","Genetic data are a crucial and exponentially growing resource across all biological sciences, yet curated databases are scarce. The widespread occurrence of sequence and (meta)data errors in public repositories calls for comprehensive improvements of curation protocols leading to robust research and downstream analyses. We collated and curated all available GenBank cytochrome-b sequences for amphibians, a benchmark marker in this globally declining vertebrate clade. The Amphibia's Curated Database of Cytochrome-b (ACDC) consists of 36,514 sequences representing 2,309 species from 398 genera (median‚Äâ=‚Äâ2 with 50% interquartile ranges of 1-7 species/genus). We updated the taxonomic identity of >4,800 sequences (ca. 13%) and found 2,359 (6%) conflicting sequences with 84% of the errors originating from taxonomic misidentifications. The database (accessible at https://doi.org/10.6084/m9.figshare.9944759 ) also includes an R script to replicate our study for other loci and taxonomic groups. We provide recommendations to improve genetic-data quality in public repositories and flag species for which there is a need for taxonomic refinement in the face of increased rate of amphibian extinctions in the Anthropocene.","hji,kes",1,1,2,2,1,NA,NA +32829394,Reanalysis of genome sequences of tomato accessions and its wild relatives: development of Tomato Genomic Variation (TGV) database integrating SNPs and INDELs polymorphisms.,"

Motivation

Facilitated by technological advances and expeditious decrease in the sequencing costs, whole-genome sequencing is increasingly implemented to uncover variations in cultivars/accessions of many crop plants. In tomato (Solanum lycopersicum), the availability of the genome sequence, followed by the resequencing of tomato cultivars and its wild relatives, has provided a prodigious resource for the improvement of traits. A high-quality genome resequencing of 84 tomato accessions and wild relatives generated a dataset that can be used as a resource to identify agronomically important alleles across the genome. Converting this dataset into a searchable database, including information about the influence of single-nucleotide polymorphisms (SNPs) on protein function, provides valuable information about the genetic variations. The database will assist in searching for functional variants of a gene for introgression into tomato cultivars.

Results

A recent release of better-quality tomato genome reference assembly SL3.0, and new annotation ITAG3.2 of SL3.0, dropped 3857 genes, added 4900 novel genes and updated 20 766 genes. Using the above version, we remapped the data from the tomato lines resequenced under the '100 tomato genome resequencing project' on new tomato genome assembly SL3.0 and made an online searchable Tomato Genomic Variations (TGVs) database. The TGV contains information about SNPs and insertion/deletion events and expands it by functional annotation of variants with new ITAG3.2 using SIFT4G software. This database with search function assists in inferring the influence of SNPs on the function of a target gene. This database can be used for selecting SNPs, which can be potentially deployed for improving tomato traits.

Availability and implementation

TGV is freely available at http://psd.uohyd.ac.in/tgv.","hji,kes",1,1,2,2,1,NA,NA +32871004,"CaNDis: a web server for investigation of causal relationships between diseases, drugs and drug targets.","

Motivation

Causal biological interaction networks represent cellular regulatory pathways. Their fusion with other biological data enables insights into disease mechanisms and novel opportunities for drug discovery.

Results

We developed Causal Network of Diseases (CaNDis), a web server for the exploration of a human causal interaction network, which we expanded with data on diseases and FDA-approved drugs, on the basis of which we constructed a disease-disease network in which the links represent the similarity between diseases. We show how CaNDis can be used to identify candidate genes with known and novel roles in disease co-occurrence and drug-drug interactions.

Availabilityand implementation

CaNDis is freely available to academic users at http://candis.ijs.si and http://candis.insilab.org.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,1,1,2,0.5,software,"Chuck Check - yes; reassessed and still yes - iffy though," +32890396,CoV3D: a database of high resolution coronavirus protein structures.,"SARS-CoV-2, the etiologic agent of COVID-19, exemplifies the general threat to global health posed by coronaviruses. The urgent need for effective vaccines and therapies is leading to a rapid rise in the number of high resolution structures of SARS-CoV-2 proteins that collectively reveal a map of virus vulnerabilities. To assist structure-based design of vaccines and therapeutics against SARS-CoV-2 and other coronaviruses, we have developed CoV3D, a database and resource for coronavirus protein structures, which is updated on a weekly basis. CoV3D provides users with comprehensive sets of structures of coronavirus proteins and their complexes with antibodies, receptors, and small molecules. Integrated molecular viewers allow users to visualize structures of the spike glycoprotein, which is the major target of neutralizing antibodies and vaccine design efforts, as well as sets of spike-antibody complexes, spike sequence variability, and known polymorphisms. In order to aid structure-based design and analysis of the spike glycoprotein, CoV3D permits visualization and download of spike structures with modeled N-glycosylation at known glycan sites, and contains structure-based classification of spike conformations, generated by unsupervised clustering. CoV3D can serve the research community as a centralized reference and resource for spike and other coronavirus protein structures, and is available at: https://cov3d.ibbr.umd.edu.","hji,kes",1,1,2,2,1,NA,NA +32897080,METATRYP v 2.0: Metaproteomic Least Common Ancestor Analysis for Taxonomic Inference Using Specialized Sequence Assemblies-Standalone Software and Web Servers for Marine Microorganisms and Coronaviruses.,"We present METATRYP version 2 software that identifies shared peptides across the predicted proteomes of organisms within environmental metaproteomics studies to enable accurate taxonomic attribution of peptides during protein inference. Improvements include ingestion of complex sequence assembly data categories (metagenomic and metatranscriptomic assemblies, single cell amplified genomes, and metagenome assembled genomes), prediction of the least common ancestor (LCA) for a peptide shared across multiple organisms, increased performance through updates to the backend architecture, and development of a web portal (https://metatryp.whoi.edu). Major expansion of the marine METATRYP database with predicted proteomes from environmental sequencing confirms a low occurrence of shared tryptic peptides among disparate marine microorganisms, implying tractability for targeted metaproteomics. METATRYP was designed to facilitate ocean metaproteomics and has been integrated into the Ocean Protein Portal (https://oceanproteinportal.org); however, it can be readily applied to other domains. We describe the rapid deployment of a coronavirus-specific web portal (https://metatryp-coronavirus.whoi.edu/) to aid in use of proteomics on coronavirus research during the ongoing pandemic. A coronavirus-focused METATRYP database identified potential SARS-CoV-2 peptide biomarkers and indicated very few shared tryptic peptides between SARS-CoV-2 and other disparate taxa analyzed, sharing <1% peptides with taxa outside of the betacoronavirus group, establishing that taxonomic specificity is achievable using tryptic peptide-based proteomic diagnostic approaches.","hji,kes",1,1,2,2,1,NA,NA +32911083,MosaicBase: A Knowledgebase of Postzygotic Mosaic Variants in Noncancer Disease-related and Healthy Human Individuals.,"Mosaic variants resulting from postzygotic mutations are prevalent in the human genome and play important roles in human diseases. However, except for cancer-related variants, there is no collection of postzygotic mosaic variants in noncancer disease-related and healthy individuals. Here, we present MosaicBase, a comprehensive database that includes 6698 mosaic variants related to 266 noncancer diseases and 27,991 mosaic variants identified in 422 healthy individuals. Genomic and phenotypic information of each variant was manually extracted and curated from 383 publications. MosaicBase supports the query of variants with Online Mendelian Inheritance in Man (OMIM) entries, genomic coordinates, gene symbols, or Entrez IDs. We also provide an integrated genome browser for users to easily access mosaic variants and their related annotations for any genomic region. By analyzing the variants collected in MosaicBase, we find that mosaic variants that directly contribute to disease phenotype show features distinct from those of variants in individuals with mild or no phenotypes, in terms of their genomic distribution, mutation signatures, and fraction of mutant cells. MosaicBase will not only assist clinicians in genetic counseling and diagnosis but also provide a useful resource to understand the genomic baseline of postzygotic mutations in the general human population. MosaicBase is publicly available at http://mosaicbase.com/ or http://49.4.21.8:8000.","hji,kes",1,1,2,2,1,NA,NA +32915954,"PanGPCR: predictions for multiple targets, repurposing and side effects.","

Summary

Drug discovery targeting G protein-coupled receptors (GPCRs), the largest known class of therapeutic targets, is challenging. To facilitate the rapid discovery and development of GPCR drugs, we built a system, PanGPCR, to predict multiple potential GPCR targets and their expression locations in the tissues, side effects and possible repurposing of GPCR drugs. With PanGPCR, the compound of interest is docked to a library of 36 experimentally determined crystal structures comprising of 46 docking sites for human GPCRs, and a ranked list is generated from the docking studies to assess all GPCRs and their binding affinities. Users can determine a given compound's GPCR targets and its repurposing potential accordingly. Moreover, potential side effects collected from the SIDER (Side-Effect Resource) database and mapped to 45 tissues and organs are provided by linking predicted off-targets and their expressed sequence tag profiles. With PanGPCR, multiple targets, repurposing potential and side effects can be determined by simply uploading a small ligand.

Availability and implementation

PanGPCR is freely accessible at https://gpcrpanel.cmdm.tw/index.html.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,1,1,2,0.5,software,"Chuck Check - discussion - also includes database - Y; reassessed and still yes, iffy though" +32934277,StoneMod: a database for kidney stone modulatory proteins with experimental evidence.,"Better understanding of molecular mechanisms for kidney stone formation is required to improve management of kidney stone disease with better therapeutic outcome. Recent kidney stone research has indicated critical roles of a group of proteins, namely 'stone modulators', in promotion or inhibition of the stone formation. Nevertheless, such information is currently dispersed and difficult to obtain. Herein, we present the kidney stone modulator database (StoneMod), which is a curated resource by obtaining necessary information of such stone modulatory proteins, which can act as stone promoters or inhibitors, with experimental evidence from previously published studies. Currently, the StoneMod database contains 10, 16, 13, 8 modulatory proteins that affect calcium oxalate crystallization, crystal growth, crystal aggregation, and crystal adhesion on renal tubular cells, respectively. Informative details of each modulatory protein and PubMed links to the published articles are provided. Additionally, hyperlinks to other protein/gene databases (e.g., UniProtKB, Swiss-Prot, Human Protein Atlas, PeptideAtlas, and Ensembl) are made available for the users to obtain additional in-depth information of each protein. Moreover, this database provides a user-friendly web interface, in which the users can freely access to the information and/or submit their data to deposit or update. Database URL: https://www.stonemod.org .","hji,kes",1,1,2,2,1,NA,NA +32941628,"IDDB: a comprehensive resource featuring genes, variants and characteristics associated with infertility.","Infertility is a complex multifactorial disease that affects up to 10% of couples across the world. However, many mechanisms of infertility remain unclear due to the lack of studies based on systematic knowledge, leading to ineffective treatment and/or transmission of genetic defects to offspring. Here, we developed an infertility disease database to provide a comprehensive resource featuring various factors involved in infertility. Features in the current IDDB version were manually curated as follows: (i) a total of 307 infertility-associated genes in human and 1348 genes associated with reproductive disorder in 9 model organisms; (ii) a total of 202 chromosomal abnormalities leading to human infertility, including aneuploidies and structural variants; and (iii) a total of 2078 pathogenic variants from infertility patients' samples across 60 different diseases causing infertility. Additionally, the characteristics of clinically diagnosed infertility patients (i.e. causative variants, laboratory indexes and clinical manifestations) were collected. To the best of our knowledge, the IDDB is the first infertility database serving as a systematic resource for biologists to decipher infertility mechanisms and for clinicians to achieve better diagnosis/treatment of patients from disease phenotype to genetic factors. The IDDB is freely available at http://mdl.shsmu.edu.cn/IDDB/.","hji,kes",1,1,2,2,1,NA,specifically has genes +32943659,The landscape of microsatellites in the enset (Ensete ventricosum) genome and web-based marker resource development.,"Ensete ventricosum (Musaceae, enset) is an Ethiopian food security crop. To realize the potential of enset for rural livelihoods, further knowledge of enset diversity, genetics and genomics is required to support breeding programs and conservation. This study was conducted to explore the enset genome to develop molecular markers, genomics resources, and characterize enset landraces while giving insight into the organization of the genome. We identified 233 microsatellites (simple sequence repeats, SSRs) per Mbp in the enset genome, representing 0.28% of the genome. Mono- and di-nucleotide repeats motifs were found in a higher proportion than other classes of SSR-motifs. In total, 154,586 non-redundant enset microsatellite markers (EMM) were identified and 40 selected for primer development. Marker validation by PCR and low-cost agarose gel electrophoresis revealed that 92.5% were polymorphic, showing a high PIC (Polymorphism Information Content; 0.87) and expected heterozygosity (He‚Äâ=‚Äâ0.79-0.82). In silico analysis of genomes of closely related species showed 46.86% of the markers were transferable among enset species and 1.90% were transferable to Musa. The SSRs are robust (with basic PCR methods and agarose gel electrophoresis), informative, and applicable in measuring enset diversity, genotyping, selection and potentially breeding. Enset SSRs are available in a web-based database at https://enset-project.org/EnMom@base.html (or https://enset.aau.edu.et/index.html , downloadable from Figshare).","hji,kes",1,1,2,2,1,NA,NA +32976589,DIGGER: exploring the functional role of alternative splicing in protein interactions.,"Alternative splicing plays a major role in regulating the functional repertoire of the proteome. However, isoform-specific effects to protein-protein interactions (PPIs) are usually overlooked, making it impossible to judge the functional role of individual exons on a systems biology level. We overcome this barrier by integrating protein-protein interactions, domain-domain interactions and residue-level interactions information to lift exon expression analysis to a network level. Our user-friendly database DIGGER is available at https://exbio.wzw.tum.de/digger and allows users to seamlessly switch between isoform and exon-centric views of the interactome and to extract sub-networks of relevant isoforms, making it an essential resource for studying mechanistic consequences of alternative splicing.","hji,kes",1,1,2,2,1,NA,NA +32986834,"The ModelSEED Biochemistry Database for the integration of metabolic annotations and the reconstruction, comparison and analysis of metabolic models for plants, fungi and microbes.","For over 10 years, ModelSEED has been a primary resource for the construction of draft genome-scale metabolic models based on annotated microbial or plant genomes. Now being released, the biochemistry database serves as the foundation of biochemical data underlying ModelSEED and KBase. The biochemistry database embodies several properties that, taken together, distinguish it from other published biochemistry resources by: (i) including compartmentalization, transport reactions, charged molecules and proton balancing on reactions; (ii) being extensible by the user community, with all data stored in GitHub; and (iii) design as a biochemical 'Rosetta Stone' to facilitate comparison and integration of annotations from many different tools and databases. The database was constructed by combining chemical data from many resources, applying standard transformations, identifying redundancies and computing thermodynamic properties. The ModelSEED biochemistry is continually tested using flux balance analysis to ensure the biochemical network is modeling-ready and capable of simulating diverse phenotypes. Ontologies can be designed to aid in comparing and reconciling metabolic reconstructions that differ in how they represent various metabolic pathways. ModelSEED now includes 33,978 compounds and 36,645 reactions, available as a set of extensible files on GitHub, and available to search at https://modelseed.org/biochem and KBase.","hji,kes",1,1,2,2,1,NA,NA +32990749,TCRdb: a comprehensive database for T-cell receptor sequences with powerful search function.,"T cells and the T-cell receptor (TCR) repertoire play pivotal roles in immune response and immunotherapy. TCR sequencing (TCR-Seq) technology has enabled accurate profiling TCR repertoire and currently a large number of TCR-Seq data are available in public. Based on the urgent need to effectively re-use these data, we developed TCRdb, a comprehensive human TCR sequences database, by a uniform pipeline to characterize TCR sequences on TCR-Seq data. TCRdb contains more than 277 million highly reliable TCR sequences from over 8265 TCR-Seq samples across hundreds of tissues/clinical conditions/cell types. The unique features of TCRdb include: (i) comprehensive and reliable sequences for TCR repertoire in different samples generated by a strict and uniform pipeline of TCRdb; (ii) powerful search function, allowing users to identify their interested TCR sequences in different conditions; (iii) categorized sample metadata, enabling comparison of TCRs in different sample types; (iv) interactive data visualization charts, describing the TCR repertoire in TCR diversity, length distribution and V-J gene utilization. The TCRdb database is freely available at http://bioinfo.life.hust.edu.cn/TCRdb/ and will be a useful resource in the research and application community of T cell immunology.","hji,kes",1,1,2,2,1,NA,NA +33010163,cncRNAdb: a manually curated resource of experimentally supported RNAs with both protein-coding and noncoding function.,"RNA endowed with both protein-coding and noncoding functions is referred to as 'dual-function RNA', 'binary functional RNA (bifunctional RNA)' or 'cncRNA (coding and noncoding RNA)'. Recently, an increasing number of cncRNAs have been identified, including both translated ncRNAs (ncRNAs with coding functions) and untranslated mRNAs (mRNAs with noncoding functions). However, an appropriate database for storing and organizing cncRNAs is still lacking. Here, we developed cncRNAdb, a manually curated database of experimentally supported cncRNAs, which aims to provide a resource for efficient manipulation, browsing and analysis of cncRNAs. The current version of cncRNAdb documents about 2600 manually curated entries of cncRNA functions with experimental evidence, involving more than 2,000 RNAs (including over 1300 translated ncRNAs and over 600 untranslated mRNAs) across over 20 species. In summary, we believe that cncRNAdb will help elucidate the functions and mechanisms of cncRNAs and develop new prediction methods. The database is available at http://www.rna-society.org/cncrnadb/.","hji,kes",1,1,2,2,1,NA,NA +33010176,CancerImmunityQTL: a database to systematically evaluate the impact of genetic variants on immune infiltration in human cancer.,"Tumor-infiltrating immune cells as integral component of the tumor microenvironment are associated with tumor progress, prognosis and responses to immunotherapy. Genetic variants have been demonstrated to impact tumor-infiltrating, underscoring the heritable character of immune landscape. Therefore, identification of immunity quantitative trait loci (immunQTLs), which evaluate the effect of genetic variants on immune cells infiltration, might present a critical step toward fully understanding the contribution of genetic variants in tumor development. Although emerging studies have demonstrated the determinants of germline variants on immune infiltration, no database has yet been developed to systematically analyze immunQTLs across multiple cancer types. Using genotype data from TCGA database and immune cell fractions estimated by CIBERSORT, we developed a computational pipeline to identify immunQTLs in 33 cancer types. A total of 913 immunQTLs across different cancer types were identified. Among them, 5 immunQTLs are associated with patient overall survival. Furthermore, by integrating immunQTLs with GWAS data, we identified 527 immunQTLs overlapping with known GWAS linkage disequilibrium regions. Finally, we constructed a user-friendly database, CancerImmunityQTL (http://www.cancerimmunityqtl-hust.com/) for users to browse, search and download data of interest. This database provides an informative resource to understand the germline determinants of immune infiltration in human cancer and benefit from personalized cancer immunotherapy.","hji,kes",1,1,2,2,1,NA,Chuck Check - yes +33020484,"lncRNAKB, a knowledgebase of tissue-specific functional annotation and trait association of long noncoding RNA.","Long non-coding RNA Knowledgebase (lncRNAKB) is an integrated resource for exploring lncRNA biology in the context of tissue-specificity and disease association. A systematic integration of annotations from six independent databases resulted in 77,199 human lncRNA (224,286 transcripts). The user-friendly knowledgebase covers a comprehensive breadth and depth of lncRNA annotation. lncRNAKB is a compendium of expression patterns, derived from analysis of RNA-seq data in thousands of samples across 31 solid human normal tissues (GTEx). Thousands of co-expression modules identified via network analysis and pathway enrichment to delineate lncRNA function are also accessible. Millions of expression quantitative trait loci (cis-eQTL) computed using whole genome sequence genotype data (GTEx) can be downloaded at lncRNAKB that also includes tissue-specificity, phylogenetic conservation and coding potential scores. Tissue-specific lncRNA-trait associations encompassing 323 GWAS (UK Biobank) are also provided. LncRNAKB is accessible at http://www.lncrnakb.org/ , and the data are freely available through Open Science Framework ( https://doi.org/10.17605/OSF.IO/RU4D2 ).","hji,kes",1,1,2,2,1,NA,NA +33021634,The Nucleome Data Bank: web-based resources to simulate and analyze the three-dimensional genome.,"We introduce the Nucleome Data Bank (NDB), a web-based platform to simulate and analyze the three-dimensional (3D) organization of genomes. The NDB enables physics-based simulation of chromosomal structural dynamics through the MEGABASE + MiChroM computational pipeline. The input of the pipeline consists of epigenetic information sourced from the Encode database; the output consists of the trajectories of chromosomal motions that accurately predict Hi-C and fluorescence insitu hybridization data, as well as multiple observations of chromosomal dynamics in vivo. As an intermediate step, users can also generate chromosomal sub-compartment annotations directly from the same epigenetic input, without the use of any DNA-DNA proximity ligation data. Additionally, the NDB freely hosts both experimental and computational structural genomics data. Besides being able to perform their own genome simulations and download the hosted data, users can also analyze and visualize the same data through custom-designed web-based tools. In particular, the one-dimensional genetic and epigenetic data can be overlaid onto accurate 3D structures of chromosomes, to study the spatial distribution of genetic and epigenetic features. The NDB aims to be a shared resource to biologists, biophysicists and all genome scientists. The NDB is available at https://ndb.rice.edu.","hji,kes",1,1,2,2,1,"seems like software, not a data resource. But also seems you may be able to obtain the underlying data. Not sure if those uderlying data are from their own resource?; reassessed value add data resource",no notes; reassessed and still yes - software and includes a data resource +33045721,ViruSurf: an integrated database to investigate viral sequences.,"ViruSurf, available at http://gmql.eu/virusurf/, is a large public database of viral sequences and integrated and curated metadata from heterogeneous sources (RefSeq, GenBank, COG-UK and NMDC); it also exposes computed nucleotide and amino acid variants, called from original sequences. A GISAID-specific ViruSurf database, available at http://gmql.eu/virusurf_gisaid/, offers a subset of these functionalities. Given the current pandemic outbreak, SARS-CoV-2 data are collected from the four sources; but ViruSurf contains other virus species harmful to humans, including SARS-CoV, MERS-CoV, Ebola and Dengue. The database is centered on sequences, described from their biological, technological and organizational dimensions. In addition, the analytical dimension characterizes the sequence in terms of its annotations and variants. The web interface enables expressing complex search queries in a simple way; arbitrary search queries can freely combine conditions on attributes from the four dimensions, extracting the resulting sequences. Several example queries on the database confirm and possibly improve results from recent research papers; results can be recomputed over time and upon selected populations. Effective search over large and curated sequence data may enable faster responses to future threats that could arise from new viruses.","hji,kes",1,1,2,2,1,NA,NA +33045737,INTEDE: interactome of drug-metabolizing enzymes.,"Drug-metabolizing enzymes (DMEs) are critical determinant of drug safety and efficacy, and the interactome of DMEs has attracted extensive attention. There are 3 major interaction types in an interactome: microbiome-DME interaction (MICBIO), xenobiotics-DME interaction (XEOTIC) and host protein-DME interaction (HOSPPI). The interaction data of each type are essential for drug metabolism, and the collective consideration of multiple types has implication for the future practice of precision medicine. However, no database was designed to systematically provide the data of all types of DME interactions. Here, a database of the Interactome of Drug-Metabolizing Enzymes (INTEDE) was therefore constructed to offer these interaction data. First, 1047 unique DMEs (448 host and 599 microbial) were confirmed, for the first time, using their metabolizing drugs. Second, for these newly confirmed DMEs, all types of their interactions (3359 MICBIOs between 225 microbial species and 185 DMEs; 47 778 XEOTICs between 4150 xenobiotics and 501 DMEs; 7849 HOSPPIs between 565 human proteins and 566 DMEs) were comprehensively collected and then provided, which enabled the crosstalk analysis among multiple types. Because of the huge amount of accumulated data, the INTEDE made it possible to generalize key features for revealing disease etiology and optimizing clinical treatment. INTEDE is freely accessible at: https://idrblab.org/intede/.","hji,kes",1,1,2,2,1,NA,NA +33046717,Protein ontology on the semantic web for knowledge discovery.,"The Protein Ontology (PRO) provides an ontological representation of protein-related entities, ranging from protein families to proteoforms to complexes. Protein Ontology Linked Open Data (LOD) exposes, shares, and connects knowledge about protein-related entities on the Semantic Web using Resource Description Framework (RDF), thus enabling integration with other Linked Open Data for biological knowledge discovery. For example, proteins (or variants thereof) can be retrieved on the basis of specific disease associations. As a community resource, we strive to follow the Findability, Accessibility, Interoperability, and Reusability (FAIR) principles, disseminate regular updates of our data, support multiple methods for accessing, querying and downloading data in various formats, and provide documentation both for scientists and programmers. PRO Linked Open Data can be browsed via faceted browser interface and queried using SPARQL via YASGUI. RDF data dumps are also available for download. Additionally, we developed RESTful APIs to support programmatic data access. We also provide W3C HCLS specification compliant metadata description for our data. The PRO Linked Open Data is available at https://lod.proconsortium.org/ .","hji,kes",1,1,2,2,1,NA,NA +33051688,dbGuide: a database of functionally validated guide RNAs for genome editing in human and mouse cells.,"With the technology's accessibility and ease of use, CRISPR has been employed widely in many different organisms and experimental settings. As a result, thousands of publications have used CRISPR to make specific genetic perturbations, establishing in itself a resource of validated guide RNA sequences. While numerous computational tools to assist in the design and identification of candidate guide RNAs exist, these are still just at best predictions and generally, researchers inevitably will test multiple sequences for functional activity. Here, we present dbGuide (https://sgrnascorer.cancer.gov/dbguide), a database of functionally validated guide RNA sequences for CRISPR/Cas9-based knockout in human and mouse. Our database not only contains computationally determined candidate guide RNA sequences, but of even greater value, over 4000 sequences which have been functionally validated either through direct amplicon sequencing or manual curation of literature from over 1000 publications. Finally, our established framework will allow for continual addition of newly published and experimentally validated guide RNA sequences for CRISPR/Cas9-based knockout as well as incorporation of sequences from different gene editing systems, additional species and other types of site-specific functionalities such as base editing, gene activation, repression and epigenetic modification.","hji,kes",1,1,2,2,1,NA,NA +33053178,"DNAmoreDB, a database of DNAzymes.","Deoxyribozymes, DNA enzymes or simply DNAzymes are single-stranded oligo-deoxyribonucleotide molecules that, like proteins and ribozymes, possess the ability to perform catalysis. Although DNAzymes have not yet been found in living organisms, they have been isolated in the laboratory through in vitro selection. The selected DNAzyme sequences have the ability to catalyze a broad range of chemical reactions, utilizing DNA, RNA, peptides or small organic compounds as substrates. DNAmoreDB is a comprehensive database resource for DNAzymes that collects and organizes the following types of information: sequences, conditions of the selection procedure, catalyzed reactions, kinetic parameters, substrates, cofactors, structural information whenever available, and literature references. Currently, DNAmoreDB contains information about DNAzymes that catalyze 20 different reactions. We included a submission form for new data, a REST-based API system that allows users to retrieve the database contents in a machine-readable format, and keyword and BLASTN search features. The database is publicly available at https://www.genesilico.pl/DNAmoreDB/.","hji,kes",1,1,2,2,1,NA,NA +33068420,Chewie Nomenclature Server (chewie-NS): a deployable nomenclature server for easy sharing of core and whole genome MLST schemas.,"Chewie Nomenclature Server (chewie-NS, https://chewbbaca.online/) allows users to share genome-based gene-by-gene typing schemas and to maintain a common nomenclature, simplifying the comparison of results. The combination between local analyses and a public repository of allelic data strikes a balance between potential confidentiality issues and the need to compare results. The possibility of deploying private instances of chewie-NS facilitates the creation of nomenclature servers with a restricted user base to allow compliance with the strictest data policies. Chewie-NS allows users to easily share their own schemas and to explore publicly available schemas, including informative statistics on schemas and loci presented in interactive charts and tables. Users can retrieve all the information necessary to run a schema locally or all the alleles identified at a particular locus. The integration with the chewBBACA suite enables users to directly upload new schemas to chewie-NS, download existing schemas and synchronize local and remote schemas from chewBBACA command line version, allowing an easier integration into high-throughput analysis pipelines. The same REST API linking chewie-NS and the chewBBACA suite supports the interaction of other interfaces or pipelines with the databases available at chewie-NS, facilitating the reusability of the stored data.","hji,kes",1,1,2,2,1,"software; reassessed, allows for sharing of data","basically a database of schemas; reassessed and still yes - there is data there, I think" +33068428,Comparative Toxicogenomics Database (CTD): update 2021.,"The public Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is an innovative digital ecosystem that relates toxicological information for chemicals, genes, phenotypes, diseases, and exposures to advance understanding about human health. Literature-based, manually curated interactions are integrated to create a knowledgebase that harmonizes cross-species heterogeneous data for chemical exposures and their biological repercussions. In this biennial update, we report a 20% increase in CTD curated content and now provide 45 million toxicogenomic relationships for over 16 300 chemicals, 51 300 genes, 5500 phenotypes, 7200 diseases and 163 000 exposure events, from 600 comparative species. Furthermore, we increase the functionality of chemical-phenotype content with new data-tabs on CTD Disease pages (to help fill in knowledge gaps for environmental health) and new phenotype search parameters (for Batch Query and Venn analysis tools). As well, we introduce new CTD Anatomy pages that allow users to uniquely explore and analyze chemical-phenotype interactions from an anatomical perspective. Finally, we have enhanced CTD Chemical pages with new literature-based chemical synonyms (to improve querying) and added 1600 amino acid-based compounds (to increase chemical landscape). Together, these updates continue to augment CTD as a powerful resource for generating testable hypotheses about the etiologies and molecular mechanisms underlying environmentally influenced diseases.","hji,kes",1,1,2,2,1,NA,NA +33068433,CovalentInDB: a comprehensive database facilitating the discovery of covalent inhibitors.,"Inhibitors that form covalent bonds with their targets have traditionally been considered highly adventurous due to their potential off-target effects and toxicity concerns. However, with the clinical validation and approval of many covalent inhibitors during the past decade, design and discovery of novel covalent inhibitors have attracted increasing attention. A large amount of scattered experimental data for covalent inhibitors have been reported, but a resource by integrating the experimental information for covalent inhibitor discovery is still lacking. In this study, we presented Covalent Inhibitor Database (CovalentInDB), the largest online database that provides the structural information and experimental data for covalent inhibitors. CovalentInDB contains 4511 covalent inhibitors (including 68 approved drugs) with 57 different reactive warheads for 280 protein targets. The crystal structures of some of the proteins bound with a covalent inhibitor are provided to visualize the protein-ligand interactions around the binding site. Each covalent inhibitor is annotated with the structure, warhead, experimental bioactivity, physicochemical properties, etc. Moreover, CovalentInDB provides the covalent reaction mechanism and the corresponding experimental verification methods for each inhibitor towards its target. High-quality datasets are downloadable for users to evaluate and develop computational methods for covalent drug design. CovalentInDB is freely accessible at http://cadd.zju.edu.cn/cidb/.","hji,kes",1,1,2,2,1,NA,NA +33074314,TransCirc: an interactive database for translatable circular RNAs based on multi-omics evidence.,"TransCirc (https://www.biosino.org/transcirc/) is a specialized database that provide comprehensive evidences supporting the translation potential of circular RNAs (circRNAs). This database was generated by integrating various direct and indirect evidences to predict coding potential of each human circRNA and the putative translation products. Seven types of evidences for circRNA translation were included: (i) ribosome/polysome binding evidences supporting the occupancy of ribosomes onto circRNAs; (ii) experimentally mapped translation initiation sites on circRNAs; (iii) internal ribosome entry site on circRNAs; (iv) published N-6-methyladenosine modification data in circRNA that promote translation initiation; (v) lengths of the circRNA specific open reading frames; (vi) sequence composition scores from a machine learning prediction of all potential open reading frames; (vii) mass spectrometry data that directly support the circRNA encoded peptides across back-splice junctions. TransCirc provides a user-friendly searching/browsing interface and independent lines of evidences to predicte how likely a circRNA can be translated. In addition, several flexible tools have been developed to aid retrieval and analysis of the data. TransCirc can serve as an important resource for investigating the translation capacity of circRNAs and the potential circRNA-encoded peptides, and can be expanded to include new evidences or additional species in the future.","hji,kes",1,1,2,2,1,NA,NA +33074547,"Usage of the Sea Urchin Hemicentrotus pulcherrimus Database, HpBase.","HpBase ( http://cell-innovation.nig.ac.jp/Hpul/ ) is a database that provides genome and transcriptome resources of the sea urchin Hemicentrotus pulcherrimus. In addition to downloading the bulk data, several analysis tools for resource use are available: gene search, homology search, and genome browsing. HpBase also discloses the protocols for biological experiments using H. pulcherrimus that have been accumulated so far. Therefore, HpBase can assist efficient use of genome resources for researchers from various fields-evolutionary, developmental, and cell biology. In this chapter we present an overview and usage of tools in HpBase.","hji,kes",1,1,2,2,1,NA,NA +33076954,Predicted functional interactome of Caenorhabditis elegans and a web tool for the functional interpretation of differentially expressed genes.,"

Background

The nematode worm, Caenorhabditis elegans, is a saprophytic species that has been emerging as a standard model organism since the early 1960s. This species is useful in numerous fields, including developmental biology, neurobiology, and ageing. A high-quality comprehensive molecular interaction network is needed to facilitate molecular mechanism studies in C. elegans.

Results

We present the predicted functional interactome of Caenorhabditis elegans (FIC), which integrates functional association data from 10 public databases to infer functional gene interactions on diverse functional perspectives. In this work, FIC includes 108,550 putative functional associations with balanced sensitivity and specificity, which are expected to cover 21.42% of all C. elegans protein interactions, and 29.25% of these associations may represent protein interactions. Based on FIC, we developed a gene set linkage analysis (GSLA) web tool to interpret potential functional impacts from a set of differentially expressed genes observed in transcriptome analyses.

Conclusion

We present the predicted C. elegans interactome database FIC, which is a high-quality database of predicted functional interactions among genes. The functional interactions in FIC serve as a good reference interactome for GSLA to annotate differentially expressed genes for their potential functional impacts. In a case study, the FIC/GSLA system shows more comprehensive and concise annotations compared to other widely used gene set annotation tools, including PANTHER and DAVID. FIC and its associated GSLA are available at the website http://worm.biomedtzc.cn .","hji,kes",1,1,2,2,1,NA,NA +33079988,The Dark Kinase Knowledgebase: an online compendium of knowledge and experimental results of understudied kinases.,"Kinases form the backbone of numerous cell signaling pathways, with their dysfunction similarly implicated in multiple pathologies. Further facilitated by their druggability, kinases are a major focus of therapeutic development efforts in diseases such as cancer, infectious disease and autoimmune disorders. While their importance is clear, the role or biological function of nearly one-third of kinases is largely unknown. Here, we describe a data resource, the Dark Kinase Knowledgebase (DKK; https://darkkinome.org), that is specifically focused on providing data and reagents for these understudied kinases to the broader research community. Supported through NIH's Illuminating the Druggable Genome (IDG) Program, the DKK is focused on data and knowledge generation for 162 poorly studied or 'dark' kinases. Types of data provided through the DKK include parallel reaction monitoring (PRM) peptides for quantitative proteomics, protein interactions, NanoBRET reagents, and kinase-specific compounds. Higher-level data is similarly being generated and consolidated such as tissue gene expression profiles and, longer-term, functional relationships derived through perturbation studies. Associated web tools that help investigators interrogate both internal and external data are also provided through the site. As an evolving resource, the DKK seeks to continually support and enhance knowledge on these potentially high-impact druggable targets.","hji,kes",1,1,2,2,1,NA,NA +33079992,PLncDB V2.0: a comprehensive encyclopedia of plant long noncoding RNAs.,"Long noncoding RNAs (lncRNAs) are transcripts longer than 200 nucleotides with little or no protein coding potential. The expanding list of lncRNAs and accumulating evidence of their functions in plants have necessitated the creation of a comprehensive database for lncRNA research. However, currently available plant lncRNA databases have some deficiencies, including the lack of lncRNA data from some model plants, uneven annotation standards, a lack of visualization for expression patterns, and the absence of epigenetic information. To overcome these problems, we upgraded our Plant Long noncoding RNA Database (PLncDB, http://plncdb.tobaccodb.org/), which was based on a uniform annotation pipeline. PLncDB V2.0 currently contains 1 246 372 lncRNAs for 80 plant species based on 13 834 RNA-Seq datasets, integrating lncRNA information from four other resources including EVLncRNAs, RNAcentral and etc. Expression patterns and epigenetic signals can be visualized using multiple tools (JBrowse, eFP Browser and EPexplorer). Targets and regulatory networks for lncRNAs are also provided for function exploration. In addition, PLncDB V2.0 is hierarchical and user-friendly and has five built-in search engines. We believe PLncDB V2.0 is useful for the plant lncRNA community and data mining studies and provides a comprehensive resource for data-driven lncRNA research in plants.","hji,kes",1,1,2,2,1,NA,NA +33080028,Peryton: a manual collection of experimentally supported microbe-disease associations.,"We present Peryton (https://dianalab.e-ce.uth.gr/peryton/), a database of experimentally supported microbe-disease associations. Its first version constitutes a novel resource hosting more than 7900 entries linking 43 diseases with 1396 microorganisms. Peryton's content is exclusively sustained by manual curation of biomedical articles. Diseases and microorganisms are provided in a systematic, standardized manner using reference resources to create database dictionaries. Information about the experimental design, study cohorts and the applied high- or low-throughput techniques is meticulously annotated and catered to users. Several functionalities are provided to enhance user experience and enable ingenious use of Peryton. One or more microorganisms and/or diseases can be queried at the same time. Advanced filtering options and direct text-based filtering of results enable refinement of returned information and the conducting of tailored queries suitable to different research questions. Peryton also provides interactive visualizations to effectively capture different aspects of its content and results can be directly downloaded for local storage and downstream analyses. Peryton will serve as a valuable source, enabling scientists of microbe-related disease fields to form novel hypotheses but, equally importantly, to assist in cross-validation of findings.","hji,kes",1,1,2,2,1,NA,NA +33084889,KLIFS: an overhaul after the first 5 years of supporting kinase research.,"Kinases are a prime target of drug development efforts with >60 drug approvals in the past two decades. Due to the research into this protein family, a wealth of data has been accumulated that keeps on growing. KLIFS-Kinase-Ligand Interaction Fingerprints and Structures-is a structural database focusing on how kinase inhibitors interact with their targets. The aim of KLIFS is to support (structure-based) kinase research through the systematic collection, annotation, and processing of kinase structures. Now, 5 years after releasing the initial KLIFS website, the database has undergone a complete overhaul with a new website, new logo, and new functionalities. In this article, we start by looking back at how KLIFS has been used by the research community, followed by a description of the renewed KLIFS, and conclude with showcasing the functionalities of KLIFS. Major changes include the integration of approved drugs and inhibitors in clinical trials, extension of the coverage to atypical kinases, and a RESTful API for programmatic access. KLIFS is available at the new domain https://klifs.net.","hji,kes",1,1,2,2,1,NA,NA +33084904,DualSeqDB: the host-pathogen dual RNA sequencing database for infection processes.,"Despite antibiotic resistance being a matter of growing concern worldwide, the bacterial mechanisms of pathogenesis remain underexplored, restraining our ability to develop new antimicrobials. The rise of high-throughput sequencing technology has made available a massive amount of transcriptomic data that could help elucidate the mechanisms underlying bacterial infection. Here, we introduce the DualSeqDB database, a resource that helps the identification of gene transcriptional changes in both pathogenic bacteria and their natural hosts upon infection. DualSeqDB comprises nearly 300 000 entries from eight different studies, with information on bacterial and host differential gene expression under in vivo and in vitro conditions. Expression data values were calculated entirely from raw data and analyzed through a standardized pipeline to ensure consistency between different studies. It includes information on seven different strains of pathogenic bacteria and a variety of cell types and tissues in Homo sapiens, Mus musculus and Macaca fascicularis at different time points. We envisage that DualSeqDB can help the research community in the systematic characterization of genes involved in host infection and help the development and tailoring of new molecules against infectious diseases. DualSeqDB is freely available at http://www.tartaglialab.com/dualseq.","hji,kes",1,1,2,2,1,NA,NA +33084905,MeDAS: a Metazoan Developmental Alternative Splicing database.,"Alternative splicing is widespread throughout eukaryotic genomes and greatly increases transcriptomic diversity. Many alternative isoforms have functional roles in developmental processes and are precisely temporally regulated. To facilitate the study of alternative splicing in a developmental context, we created MeDAS, a Metazoan Developmental Alternative Splicing database. MeDAS is an added-value resource that re-analyses publicly archived RNA-seq libraries to provide quantitative data on alternative splicing events as they vary across the time course of development. It has broad temporal and taxonomic scope and is intended to assist the user in identifying trends in alternative splicing throughout development. To create MeDAS, we re-analysed a curated set of 2232 Illumina polyA+ RNA-seq libraries that chart detailed time courses of embryonic and post-natal development across 18 species with a taxonomic range spanning the major metazoan lineages from Caenorhabditis elegans to human. MeDAS is freely available at https://das.chenlulab.com both as raw data tables and as an interactive browser allowing searches by species, tissue, or genomic feature (gene, transcript or exon ID and sequence). Results will provide details on alternative splicing events identified for the queried feature and can be visualised at the gene-, transcript- and exon-level as time courses of expression and inclusion levels, respectively.","hji,kes",1,1,2,2,1,NA,NA +33086069,SCLC-CellMiner: A Resource for Small Cell Lung Cancer Cell Line Genomics and Pharmacology Based on Genomic Signatures.,"CellMiner-SCLC (https://discover.nci.nih.gov/SclcCellMinerCDB/) integrates drug sensitivity and genomic data, including high-resolution methylome and transcriptome from 118 patient-derived small cell lung cancer (SCLC) cell lines, providing a resource for research into this ""recalcitrant cancer."" We demonstrate the reproducibility and stability of data from multiple sources and validate the SCLC consensus nomenclature on the basis of expression of master transcription factors NEUROD1, ASCL1, POU2F3, and YAP1. Our analyses reveal transcription networks linking SCLC subtypes with MYC and its paralogs and the NOTCH and HIPPO pathways. SCLC subsets express specific surface markers, providing potential opportunities for antibody-based targeted therapies. YAP1-driven SCLCs are notable for differential expression of the NOTCH pathway, epithelial-mesenchymal transition (EMT), and antigen-presenting machinery (APM) genes and sensitivity to mTOR and AKT inhibitors. These analyses provide insights into SCLC biology and a framework for future investigations into subtype-specific SCLC vulnerabilities.","hji,kes",1,1,2,2,1,NA,NA +33095860,CNCDatabase: a database of non-coding cancer drivers.,"Most mutations in cancer genomes occur in the non-coding regions with unknown impact on tumor development. Although the increase in the number of cancer whole-genome sequences has revealed numerous putative non-coding cancer drivers, their information is dispersed across multiple studies making it difficult to understand their roles in tumorigenesis of different cancer types. We have developed CNCDatabase, Cornell Non-coding Cancer driver Database (https://cncdatabase.med.cornell.edu/) that contains detailed information about predicted non-coding drivers at gene promoters, 5' and 3' UTRs (untranslated regions), enhancers, CTCF insulators and non-coding RNAs. CNCDatabase documents 1111 protein-coding genes and 90 non-coding RNAs with reported drivers in their non-coding regions from 32 cancer types by computational predictions of positive selection using whole-genome sequences; differential gene expression in samples with and without mutations; or another set of experimental validations including luciferase reporter assays and genome editing. The database can be easily modified and scaled as lists of non-coding drivers are revised in the community with larger whole-genome sequencing studies, CRISPR screens and further experimental validations. Overall, CNCDatabase provides a helpful resource for researchers to explore the pathological role of non-coding alterations in human cancers.","hji,kes",1,1,2,2,1,NA,NA +33095866,VARAdb: a comprehensive variation annotation database for human.,"With the study of human diseases and biological processes increasing, a large number of non-coding variants have been identified and facilitated. The rapid accumulation of genetic and epigenomic information has resulted in an urgent need to collect and process data to explore the regulation of non-coding variants. Here, we developed a comprehensive variation annotation database for human (VARAdb, http://www.licpathway.net/VARAdb/), which specifically considers non-coding variants. VARAdb provides annotation information for 577,283,813 variations and novel variants, prioritizes variations based on scores using nine annotation categories, and supports pathway downstream analysis. Importantly, VARAdb integrates a large amount of genetic and epigenomic data into five annotation sections, which include 'Variation information', 'Regulatory information', 'Related genes', 'Chromatin accessibility' and 'Chromatin interaction'. The detailed annotation information consists of motif changes, risk SNPs, LD SNPs, eQTLs, clinical variant-drug-gene pairs, sequence conservation, somatic mutations, enhancers, super enhancers, promoters, transcription factors, chromatin states, histone modifications, chromatin accessibility regions and chromatin interactions. This database is a user-friendly interface to query, browse and visualize variations and related annotation information. VARAdb is a useful resource for selecting potential functional variations and interpreting their effects on human diseases and biological processes.","hji,kes",1,1,2,2,1,NA,NA +33095885,IndiGenomes: a comprehensive resource of genetic variants from over 1000 Indian genomes.,"With the advent of next-generation sequencing, large-scale initiatives for mining whole genomes and exomes have been employed to better understand global or population-level genetic architecture. India encompasses more than 17% of the world population with extensive genetic diversity, but is under-represented in the global sequencing datasets. This gave us the impetus to perform and analyze the whole genome sequencing of 1029 healthy Indian individuals under the pilot phase of the 'IndiGen' program. We generated a compendium of 55,898,122 single allelic genetic variants from geographically distinct Indian genomes and calculated the allele frequency, allele count, allele number, along with the number of heterozygous or homozygous individuals. In the present study, these variants were systematically annotated using publicly available population databases and can be accessed through a browsable online database named as 'IndiGenomes' http://clingen.igib.res.in/indigen/. The IndiGenomes database will help clinicians and researchers in exploring the genetic component underlying medical conditions. Till date, this is the most comprehensive genetic variant resource for the Indian population and is made freely available for academic utility. The resource has also been accessed extensively by the worldwide community since it's launch.","hji,kes",1,1,2,2,1,NA,NA +33104772,The mouse Gene Expression Database (GXD): 2021 update.,"The Gene Expression Database (GXD; www.informatics.jax.org/expression.shtml) is an extensive and well-curated community resource of mouse developmental gene expression information. For many years, GXD has collected and integrated data from RNA in situ hybridization, immunohistochemistry, RT-PCR, northern blot, and western blot experiments through curation of the scientific literature and by collaborations with large-scale expression projects. Since our last report in 2019, we have continued to acquire these classical types of expression data; developed a searchable index of RNA-Seq and microarray experiments that allows users to quickly and reliably find specific mouse expression studies in ArrayExpress (https://www.ebi.ac.uk/arrayexpress/) and GEO (https://www.ncbi.nlm.nih.gov/geo/); and expanded GXD to include RNA-Seq data. Uniformly processed RNA-Seq data are imported from the EBI Expression Atlas and then integrated with the other types of expression data in GXD, and with the genetic, functional, phenotypic and disease-related information in Mouse Genome Informatics (MGI). This integration has made the RNA-Seq data accessible via GXD's enhanced searching and filtering capabilities. Further, we have embedded the Morpheus heat map utility into the GXD user interface to provide additional tools for display and analysis of RNA-Seq data, including heat map visualization, sorting, filtering, hierarchical clustering, nearest neighbors analysis and visual enrichment.","hji,kes",1,1,2,2,1,NA,NA +33106848,"RNAcentral 2021: secondary structure integration, improved sequence search and new member databases.","RNAcentral is a comprehensive database of non-coding RNA (ncRNA) sequences that provides a single access point to 44 RNA resources and >18 million ncRNA sequences from a wide range of organisms and RNA types. RNAcentral now also includes secondary (2D) structure information for >13 million sequences, making RNAcentral the world's largest RNA 2D structure database. The 2D diagrams are displayed using R2DT, a new 2D structure visualization method that uses consistent, reproducible and recognizable layouts for related RNAs. The sequence similarity search has been updated with a faster interface featuring facets for filtering search results by RNA type, organism, source database or any keyword. This sequence search tool is available as a reusable web component, and has been integrated into several RNAcentral member databases, including Rfam, miRBase and snoDB. To allow for a more fine-grained assignment of RNA types and subtypes, all RNAcentral sequences have been annotated with Sequence Ontology terms. The RNAcentral database continues to grow and provide a central data resource for the RNA community. RNAcentral is freely available at https://rnacentral.org.","hji,kes",1,1,2,2,1,NA,NA +33112702,LncRBase V.2: an updated resource for multispecies lncRNAs and ClinicLSNP hosting genetic variants in lncRNAs for cancer patients.,"The recent discovery of long non-coding RNA as a regulatory molecule in the cellular system has altered the concept of the functional aptitude of the genome. Since our publication of the first version of LncRBase in 2014, there has been an enormous increase in the number of annotated lncRNAs of multiple species other than Human and Mouse. LncRBase V.2 hosts information of 549,648 lncRNAs corresponding to six additional species besides Human and Mouse, viz. Rat, Fruitfly, Zebrafish, Chicken, Cow and C.elegans. It provides additional distinct features such as (i) Transcription Factor Binding Site (TFBS) in the lncRNA promoter region, (ii) sub-cellular localization pattern of lncRNAs (iii) lnc-pri-miRNAs (iv) Possible small open reading frames (sORFs) within lncRNA. (v) Manually curated information of interacting target molecules and disease association of lncRNA genes (vi) Distribution of lncRNAs across multiple tissues of all species. Moreover, we have hosted ClinicLSNP within LncRBase V.2. ClinicLSNP has a comprehensive catalogue of lncRNA variants present within breast, ovarian, and cervical cancer inferred from 561 RNA-Seq data corresponding to these cancers. Further, we have checked whether these lncRNA variants overlap with (i)Repeat elements,(ii)CGI, (iii)TFBS within lncRNA loci (iv)SNP localization in trait-associated Linkage Disequilibrium(LD) region, (v)predicted the potentially pathogenic variants and (vi)effect of SNP on lncRNA secondary structure. Overall, LncRBaseV.2 is a user-friendly database to survey, search and retrieve information about multi-species lncRNAs. Further, ClinicLSNP will serve as a useful resource for cancer specific lncRNA variants and their related information. The database is freely accessible and available at http://dibresources.jcbose.ac.in/zhumur/lncrbase2/.","hji,kes",1,1,2,2,1,NA,NA +33119751,The MemMoRF database for recognizing disordered protein regions interacting with cellular membranes.,"Protein and lipid membrane interactions play fundamental roles in a large number of cellular processes (e.g. signalling, vesicle trafficking, or viral invasion). A growing number of examples indicate that such interactions can also rely on intrinsically disordered protein regions (IDRs), which can form specific reversible interactions not only with proteins but also with lipids. We named IDRs involved in such membrane lipid-induced disorder-to-order transition as MemMoRFs, in an analogy to IDRs exhibiting disorder-to-order transition upon interaction with protein partners termed Molecular Recognition Features (MoRFs). Currently, both the experimental detection and computational characterization of MemMoRFs are challenging, and information about these regions are scattered in the literature. To facilitate the related investigations we generated a comprehensive database of experimentally validated MemMoRFs based on manual curation of literature and structural data. To characterize the dynamics of MemMoRFs, secondary structure propensity and flexibility calculated from nuclear magnetic resonance chemical shifts were incorporated into the database. These data were supplemented by inclusion of sentences from papers, functional data and disease-related information. The MemMoRF database can be accessed via a user-friendly interface at https://memmorf.hegelab.org, potentially providing a central resource for the characterization of disordered regions in transmembrane and membrane-associated proteins.","hji,kes",1,1,2,2,1,NA,NA +33125078,Pfam: The protein families database in 2021.,"The Pfam database is a widely used resource for classifying protein sequences into families and domains. Since Pfam was last described in this journal, over 350 new families have been added in Pfam 33.1 and numerous improvements have been made to existing entries. To facilitate research on COVID-19, we have revised the Pfam entries that cover the SARS-CoV-2 proteome, and built new entries for regions that were not covered by Pfam. We have reintroduced Pfam-B which provides an automatically generated supplement to Pfam and contains 136 730 novel clusters of sequences that are not yet matched by a Pfam family. The new Pfam-B is based on a clustering by the MMseqs2 software. We have compared all of the regions in the RepeatsDB to those in Pfam and have started to use the results to build and refine Pfam repeat families. Pfam is freely available for browsing and download at http://pfam.xfam.org/.","hji,kes",1,1,2,2,1,NA,NA +33125081,KEGG: integrating viruses and cellular organisms.,"KEGG (https://www.kegg.jp/) is a manually curated resource integrating eighteen databases categorized into systems, genomic, chemical and health information. It also provides KEGG mapping tools, which enable understanding of cellular and organism-level functions from genome sequences and other molecular datasets. KEGG mapping is a predictive method of reconstructing molecular network systems from molecular building blocks based on the concept of functional orthologs. Since the introduction of the KEGG NETWORK database, various diseases have been associated with network variants, which are perturbed molecular networks caused by human gene variants, viruses, other pathogens and environmental factors. The network variation maps are created as aligned sets of related networks showing, for example, how different viruses inhibit or activate specific cellular signaling pathways. The KEGG pathway maps are now integrated with network variation maps in the NETWORK database, as well as with conserved functional units of KEGG modules and reaction modules in the MODULE database. The KO database for functional orthologs continues to be improved and virus KOs are being expanded for better understanding of virus-cell interactions and for enabling prediction of viral perturbations.","hji,kes",1,1,2,2,1,NA,NA +33125652,Navigating the Global Protein-Protein Interaction Landscape Using iRefWeb.,"iRefWeb is a resource that provides web interface to a large collection of protein-protein interactions aggregated from major primary databases. The underlying data-consolidation process, called iRefIndex, implements a rigorous methodology of identifying redundant protein sequences and integrating disparate data records that reference the same peptide sequences, despite many potential differences in data identifiers across various source databases. iRefWeb offers a unified user interface to all interaction records and associated information collected by iRefIndex, in addition to a number of data filters and visual features that present the supporting evidence. Users of iRefWeb can explore the consolidated landscape of protein-protein interactions, establish the provenance and reliability of each data record, and compare annotations performed by different data curator teams. The iRefWeb portal is freely available at http://wodaklab.org/iRefWeb .","hji,kes",0,1,1,2,0.5,not a data source on its own,no notes; reassessed and still yes - software and includes a data resource +33137173,Global Substance Registration System: consistent scientific descriptions for substances related to health.,"The US Food and Drug Administration (FDA) and the National Center for Advancing Translational Sciences (NCATS) have collaborated to publish rigorous scientific descriptions of substances relevant to regulated products. The FDA has adopted the global ISO 11238 data standard for the identification of substances in medicinal products and has populated a database to organize the agency's regulatory submissions and marketed products data. NCATS has worked with FDA to develop the Global Substance Registration System (GSRS) and produce a non-proprietary version of the database for public benefit. In 2019, more than half of all new drugs in clinical development were proteins, nucleic acid therapeutics, polymer products, structurally diverse natural products or cellular therapies. While multiple databases of small molecule chemical structures are available, this resource is unique in its application of regulatory standards for the identification of medicinal substances and its robust support for other substances in addition to small molecules. This public, manually curated dataset provides unique ingredient identifiers (UNIIs) and detailed descriptions for over 100 000 substances that are particularly relevant to medicine and translational research. The dataset can be accessed and queried at https://gsrs.ncats.nih.gov/app/substances.","hji,kes",1,1,2,2,1,NA,NA +33137183,IMG/VR v3: an integrated ecological and evolutionary framework for interrogating genomes of uncultivated viruses.,"Viruses are integral components of all ecosystems and microbiomes on Earth. Through pervasive infections of their cellular hosts, viruses can reshape microbial community structure and drive global nutrient cycling. Over the past decade, viral sequences identified from genomes and metagenomes have provided an unprecedented view of viral genome diversity in nature. Since 2016, the IMG/VR database has provided access to the largest collection of viral sequences obtained from (meta)genomes. Here, we present the third version of IMG/VR, composed of 18 373 cultivated and 2 314 329 uncultivated viral genomes (UViGs), nearly tripling the total number of sequences compared to the previous version. These clustered into 935 362 viral Operational Taxonomic Units (vOTUs), including 188 930 with two or more members. UViGs in IMG/VR are now reported as single viral contigs, integrated proviruses or genome bins, and are annotated with a new standardized pipeline including genome quality estimation using CheckV, taxonomic classification reflecting the latest ICTV update, and expanded host taxonomy prediction. The new IMG/VR interface enables users to efficiently browse, search, and select UViGs based on genome features and/or sequence similarity. IMG/VR v3 is available at https://img.jgi.doe.gov/vr, and the underlying data are available to download at https://genome.jgi.doe.gov/portal/IMG_VR.","hji,kes",1,1,2,2,1,NA,NA +33137190,Ensembl 2021.,"The Ensembl project (https://www.ensembl.org) annotates genomes and disseminates genomic data for vertebrate species. We create detailed and comprehensive annotation of gene structures, regulatory elements and variants, and enable comparative genomics by inferring the evolutionary history of genes and genomes. Our integrated genomic data are made available in a variety of ways, including genome browsers, search interfaces, specialist tools such as the Ensembl Variant Effect Predictor, download files and programmatic interfaces. Here, we present recent Ensembl developments including two new website portals. Ensembl Rapid Release (http://rapid.ensembl.org) is designed to provide core tools and services for genomes as soon as possible and has been deployed to support large biodiversity sequencing projects. Our SARS-CoV-2 genome browser (https://covid-19.ensembl.org) integrates our own annotation with publicly available genomic data from numerous sources to facilitate the use of genomics in the international scientific response to the COVID-19 pandemic. We also report on other updates to our annotation resources, tools and services. All Ensembl data and software are freely available without restriction.","hji,kes",1,1,2,2,1,NA,NA +33137192,Plant-ImputeDB: an integrated multiple plant reference panel database for genotype imputation.,"Genotype imputation is a process that estimates missing genotypes in terms of the haplotypes and genotypes in a reference panel. It can effectively increase the density of single nucleotide polymorphisms (SNPs), boost the power to identify genetic association and promote the combination of genetic studies. However, there has been a lack of high-quality reference panels for most plants, which greatly hinders the application of genotype imputation. Here, we developed Plant-ImputeDB (http://gong_lab.hzau.edu.cn/Plant_imputeDB/), a comprehensive database with reference panels of 12 plant species for online genotype imputation, SNP and block search and free download. By integrating genotype data and whole-genome resequencing data of plants from various studies and databases, the current Plant-ImputeDB provides high-quality reference panels of 12 plant species, including ∼69.9 million SNPs from 34 244 samples. It also provides an easy-to-use online tool with the option of two popular tools specifically designed for genotype imputation. In addition, Plant-ImputeDB accepts submissions of different types of genomic variations, and provides free and open access to all publicly available data in support of related research worldwide. In general, Plant-ImputeDB may serve as an important resource for plant genotype imputation and greatly facilitate the research on plant genetic research.","hji,kes",1,1,2,2,1,NA,NA +33147626,CellTalkDB: a manually curated database of ligand-receptor interactions in humans and mice.,"Cell-cell communications in multicellular organisms generally involve secreted ligand-receptor (LR) interactions, which is vital for various biological phenomena. Recent advancements in single-cell RNA sequencing (scRNA-seq) have effectively resolved cellular phenotypic heterogeneity and the cell-type composition of complex tissues, facilitating the systematic investigation of cell-cell communications at single-cell resolution. However, assessment of chemical-signal-dependent cell-cell communication through scRNA-seq relies heavily on prior knowledge of LR interaction pairs. We constructed CellTalkDB (http://tcm.zju.edu.cn/celltalkdb), a manually curated comprehensive database of LR interaction pairs in humans and mice comprising 3398 human LR pairs and 2033 mouse LR pairs, through text mining and manual verification of known protein-protein interactions using the STRING database, with literature-supported evidence for each pair. Compared with SingleCellSignalR, the largest LR-pair resource, CellTalkDB includes not only 2033 mouse LR pairs but also 377 additional human LR pairs. In conclusion, the data on human and mouse LR pairs contained in CellTalkDB could help to further the inference and understanding of the LR-interaction-based cell-cell communications, which might provide new insights into the mechanism underlying biological processes.","hji,kes",1,1,2,2,1,NA,NA +33151287,DrugCentral 2021 supports drug discovery and repositioning.,"DrugCentral is a public resource (http://drugcentral.org) that serves the scientific community by providing up-to-date drug information, as described in previous papers. The current release includes 109 newly approved (October 2018 through March 2020) active pharmaceutical ingredients in the US, Europe, Japan and other countries; and two molecular entities (e.g. mefuparib) of interest for COVID19. New additions include a set of pharmacokinetic properties for ∼1000 drugs, and a sex-based separation of side effects, processed from FAERS (FDA Adverse Event Reporting System); as well as a drug repositioning prioritization scheme based on the market availability and intellectual property rights forFDA approved drugs. In the context of the COVID19 pandemic, we also incorporated REDIAL-2020, a machine learning platform that estimates anti-SARS-CoV-2 activities, as well as the 'drugs in news' feature offers a brief enumeration of the most interesting drugs at the present moment. The full database dump and data files are available for download from the DrugCentral web portal.","hji,kes",1,1,2,2,1,NA,NA +33151298,GRNdb: decoding the gene regulatory networks in diverse human and mouse conditions.,"Gene regulatory networks (GRNs) formed by transcription factors (TFs) and their downstream target genes play essential roles in gene expression regulation. Moreover, GRNs can be dynamic changing across different conditions, which are crucial for understanding the underlying mechanisms of disease pathogenesis. However, no existing database provides comprehensive GRN information for various human and mouse normal tissues and diseases at the single-cell level. Based on the known TF-target relationships and the large-scale single-cell RNA-seq data collected from public databases as well as the bulk data of The Cancer Genome Atlas and the Genotype-Tissue Expression project, we systematically predicted the GRNs of 184 different physiological and pathological conditions of human and mouse involving >633 000 cells and >27 700 bulk samples. We further developed GRNdb, a freely accessible and user-friendly database (http://www.grndb.com/) for searching, comparing, browsing, visualizing, and downloading the predicted information of 77 746 GRNs, 19 687 841 TF-target pairs, and related binding motifs at single-cell/bulk resolution. GRNdb also allows users to explore the gene expression profile, correlations, and the associations between expression levels and the patient survival of diverse cancers. Overall, GRNdb provides a valuable and timely resource to the scientific community to elucidate the functions and mechanisms of gene expression regulation in various conditions.","hji,kes",1,1,2,2,1,NA,NA +33152070,Genenames.org: the HGNC and VGNC resources in 2021.,"The HUGO Gene Nomenclature Committee (HGNC) based at EMBL's European Bioinformatics Institute (EMBL-EBI) assigns unique symbols and names to human genes. There are over 42,000 approved gene symbols in our current database of which over 19 000 are for protein-coding genes. While we still update placeholder and problematic symbols, we are working towards stabilizing symbols where possible; over 2000 symbols for disease associated genes are now marked as stable in our symbol reports. All of our data is available at the HGNC website https://www.genenames.org. The Vertebrate Gene Nomenclature Committee (VGNC) was established to assign standardized nomenclature in line with human for vertebrate species lacking their own nomenclature committee. In addition to the previous VGNC core species of chimpanzee, cow, horse and dog, we now name genes in cat, macaque and pig. Gene groups have been added to VGNC and currently include two complex families: olfactory receptors (ORs) and cytochrome P450s (CYPs). In collaboration with specialists we have also named CYPs in species beyond our core set. All VGNC data is available at https://vertebrate.genenames.org/. This article provides an overview of our online data and resources, focusing on updates over the last two years.","hji,kes",1,1,2,2,1,NA,nomenclature database +33156326,MetaNetX/MNXref: unified namespace for metabolites and biochemical reactions in the context of metabolic models.,"MetaNetX/MNXref is a reconciliation of metabolites and biochemical reactions providing cross-links between major public biochemistry and Genome-Scale Metabolic Network (GSMN) databases. The new release brings several improvements with respect to the quality of the reconciliation, with particular attention dedicated to preserving the intrinsic properties of GSMN models. The MetaNetX website (https://www.metanetx.org/) provides access to the full database and online services. A major improvement is for mapping of user-provided GSMNs to MXNref, which now provides diagnostic messages about model content. In addition to the website and flat files, the resource can now be accessed through a SPARQL endpoint (https://rdf.metanetx.org).","hji,kes",1,1,2,2,1,NA,NA +33156327,TCRD and Pharos 2021: mining the human proteome for disease biology.,"In 2014, the National Institutes of Health (NIH) initiated the Illuminating the Druggable Genome (IDG) program to identify and improve our understanding of poorly characterized proteins that can potentially be modulated using small molecules or biologics. Two resources produced from these efforts are: The Target Central Resource Database (TCRD) (http://juniper.health.unm.edu/tcrd/) and Pharos (https://pharos.nih.gov/), a web interface to browse the TCRD. The ultimate goal of these resources is to highlight and facilitate research into currently understudied proteins, by aggregating a multitude of data sources, and ranking targets based on the amount of data available, and presenting data in machine learning ready format. Since the 2017 release, both TCRD and Pharos have produced two major releases, which have incorporated or expanded an additional 25 data sources. Recently incorporated data types include human and viral-human protein-protein interactions, protein-disease and protein-phenotype associations, and drug-induced gene signatures, among others. These aggregated data have enabled us to generate new visualizations and content sections in Pharos, in order to empower users to find new areas of study in the druggable genome.","hji,kes",1,1,2,2,1,NA,NA +33156332,DDBJ update: streamlining submission and access of human data.,"The Bioinformation and DDBJ Center (DDBJ Center, https://www.ddbj.nig.ac.jp) provides databases that capture, preserve and disseminate diverse biological data to support research in the life sciences. This center collects nucleotide sequences with annotations, raw sequencing data, and alignment information from high-throughput sequencing platforms, and study and sample information, in collaboration with the National Center for Biotechnology Information (NCBI) and the European Bioinformatics Institute (EBI). This collaborative framework is known as the International Nucleotide Sequence Database Collaboration (INSDC). In collaboration with the National Bioscience Database Center (NBDC), the DDBJ Center also provides a controlled-access database, the Japanese Genotype-phenotype Archive (JGA), which archives and distributes human genotype and phenotype data, requiring authorized access. The NBDC formulates guidelines and policies for sharing human data and reviews data submission and use applications. To streamline all of the processes at NBDC and JGA, we have integrated the two systems by introducing a unified login platform with a group structure in September 2020. In addition to the public databases, the DDBJ Center provides a computer resource, the NIG supercomputer, for domestic researchers to analyze large-scale genomic data. This report describes updates to the services of the DDBJ Center, focusing on the NBDC and JGA system enhancements.","hji,kes",1,1,2,2,1,NA,NA +33156333,The InterPro protein families and domains database: 20 years on.,"The InterPro database (https://www.ebi.ac.uk/interpro/) provides an integrative classification of protein sequences into families, and identifies functionally important domains and conserved sites. InterProScan is the underlying software that allows protein and nucleic acid sequences to be searched against InterPro's signatures. Signatures are predictive models which describe protein families, domains or sites, and are provided by multiple databases. InterPro combines signatures representing equivalent families, domains or sites, and provides additional information such as descriptions, literature references and Gene Ontology (GO) terms, to produce a comprehensive resource for protein classification. Founded in 1999, InterPro has become one of the most widely used resources for protein family annotation. Here, we report the status of InterPro (version 81.0) in its 20th year of operation, and its associated software, including updates to database content, the release of a new website and REST API, and performance improvements in InterProScan.","hji,kes",1,1,2,2,1,NA,NA +33166388,LegumeIP V3: from models to crops-an integrative gene discovery platform for translational genomics in legumes.,"Legumes have contributed to human health, sustainable food and feed production worldwide for centuries. The study of model legumes has played vital roles in deciphering key genes, pathways, and networks regulating biological mechanisms and agronomic traits. Along with emerging breeding technology such as genome editing, translation of the knowledge gained from model plants to crops is in high demand. The updated database (V3) was redesigned for translational genomics targeting the discovery of novel key genes in less-studied non-model legume crops by referring to the knowledge gained in model legumes. The database contains genomic data for all 22 included species, and transcriptomic data covering thousands of RNA-seq samples mostly from model species. The rich biological data and analytic tools for gene expression and pathway analyses can be used to decipher critical genes, pathways, and networks in model legumes. The integrated comparative genomic functions further facilitate the translation of this knowledge to legume crops. Therefore, the database will be a valuable resource to identify important genes regulating specific biological mechanisms or agronomic traits in the non-model yet economically significant legume crops. LegumeIP V3 is available free to the public at https://plantgrn.noble.org/LegumeIP. Access to the database does not require login, registration, or password.","hji,kes",1,1,2,2,1,NA,NA +33170210,The Zebrafish Information Network: major gene page and home page updates.,"The Zebrafish Information Network (ZFIN) (https://zfin.org/) is the database for the model organism, zebrafish (Danio rerio). ZFIN expertly curates, organizes, and provides a wide array of zebrafish genetic and genomic data, including genes, alleles, transgenic lines, gene expression, gene function, mutant phenotypes, orthology, human disease models, gene and mutant nomenclature, and reagents. New features at ZFIN include major updates to the home page and the gene page, the two most used pages at ZFIN. Data including disease models, phenotypes, expression, mutants and gene function continue to be contributed to The Alliance of Genome Resources for integration with similar data from other model organisms.","hji,kes",1,1,2,2,1,NA,NA +33174598,"LectomeXplore, an update of UniLectin for the discovery of carbohydrate-binding proteins based on a new lectin classification.","Lectins are non-covalent glycan-binding proteins mediating cellular interactions but their annotation in newly sequenced organisms is lacking. The limited size of functional domains and the low level of sequence similarity challenge usual bioinformatics tools. The identification of lectin domains in proteomes requires the manual curation of sequence alignments based on structural folds. A new lectin classification is proposed. It is built on three levels: (i) 35 lectin domain folds, (ii) 109 classes of lectins sharing at least 20% sequence similarity and (iii) 350 families of lectins sharing at least 70% sequence similarity. This information is compiled in the UniLectin platform that includes the previously described UniLectin3D database of curated lectin 3D structures. Since its first release, UniLectin3D has been updated with 485 additional 3D structures. The database is now complemented by two additional modules: PropLec containing predicted β-propeller lectins and LectomeXplore including predicted lectins from sequences of the NBCI-nr and UniProt for every curated lectin class. UniLectin is accessible at https://www.unilectin.eu/.","hji,kes",1,1,2,2,1,NA,NA +33174605,"OMA orthology in 2021: website overhaul, conserved isoforms, ancestral gene order and more.","OMA is an established resource to elucidate evolutionary relationships among genes from currently 2326 genomes covering all domains of life. OMA provides pairwise and groupwise orthologs, functional annotations, local and global gene order conservation (synteny) information, among many other functions. This update paper describes the reorganisation of the database into gene-, group- and genome-centric pages. Other new and improved features are detailed, such as reporting of the evolutionarily best conserved isoforms of alternatively spliced genes, the inferred local order of ancestral genes, phylogenetic profiling, better cross-references, fast genome mapping, semantic data sharing via RDF, as well as a special coronavirus OMA with 119 viruses from the Nidovirales order, including SARS-CoV-2, the agent of the COVID-19 pandemic. We conclude with improvements to the documentation of the resource through primers, tutorials and short videos. OMA is accessible at https://omabrowser.org.","hji,kes",1,1,2,2,1,NA,NA +33175872,High density genotype storage for plant breeding in the Chado schema of Breedbase.,"Modern breeding programs routinely use genome-wide information for selecting individuals to advance. The large volumes of genotypic information required present a challenge for data storage and query efficiency. Major use cases require genotyping data to be linked with trait phenotyping data. In contrast to phenotyping data that are often stored in relational database schemas, next-generation genotyping data are traditionally stored in non-relational storage systems due to their extremely large scope. This study presents a novel data model implemented in Breedbase (https://breedbase.org/) for uniting relational phenotyping data and non-relational genotyping data within the open-source PostgreSQL database engine. Breedbase is an open-source, web-database designed to manage all of a breeder's informatics needs: management of field experiments, phenotypic and genotypic data collection and storage, and statistical analyses. The genotyping data is stored in a PostgreSQL data-type known as binary JavaScript Object Notation (JSONb), where the JSON structures closely follow the Variant Call Format (VCF) data model. The Breedbase genotyping data model can handle different ploidy levels, structural variants, and any genotype encoded in VCF. JSONb is both compressed and indexed, resulting in a space and time efficient system. Furthermore, file caching maximizes data retrieval performance. Integration of all breeding data within the Chado database schema retains referential integrity that may be lost when genotyping and phenotyping data are stored in separate systems. Benchmarking demonstrates that the system is fast enough for computation of a genomic relationship matrix (GRM) and genome wide association study (GWAS) for datasets involving 1,325 diploid Zea mays, 314 triploid Musa acuminata, and 924 diploid Manihot esculenta samples genotyped with 955,690, 142,119, and 287,952 genotype-by-sequencing (GBS) markers, respectively.","hji,kes",1,1,2,2,1,NA,NA +33179747,jMorp updates in 2020: large enhancement of multi-omics data resources on the general Japanese population.,"In the Tohoku Medical Megabank project, genome and omics analyses of participants in two cohort studies were performed. A part of the data is available at the Japanese Multi Omics Reference Panel (jMorp; https://jmorp.megabank.tohoku.ac.jp) as a web-based database, as reported in our previous manuscript published in Nucleic Acid Research in 2018. At that time, jMorp mainly consisted of metabolome data; however, now genome, methylome, and transcriptome data have been integrated in addition to the enhancement of the number of samples for the metabolome data. For genomic data, jMorp provides a Japanese reference sequence obtained using de novo assembly of sequences from three Japanese individuals and allele frequencies obtained using whole-genome sequencing of 8,380 Japanese individuals. In addition, the omics data include methylome and transcriptome data from ∼300 samples and distribution of concentrations of more than 755 metabolites obtained using high-throughput nuclear magnetic resonance and high-sensitivity mass spectrometry. In summary, jMorp now provides four different kinds of omics data (genome, methylome, transcriptome, and metabolome), with a user-friendly web interface. This will be a useful scientific data resource on the general population for the discovery of disease biomarkers and personalized disease prevention and early diagnosis.","hji,kes",1,1,2,2,1,NA,NA +33186585,PolarProtDb: A Database of Transmembrane and Secreted Proteins showing Apical-Basal Polarity.,"Most cells in multicellular organisms are somehow asymmetric, polarized: maintaining separate membrane domains. Typical examples are the epithelial cells (apical-basal polarization), neurons (dendritic-axonal domains), or migratory cells (with a leading and a trailing edge). Here we present the most comprehensive database containing experimentally verified mammalian proteins that display polarized sorting or secretion, focusing on epithelial polarity. In addition to the source cells or tissues, homology-based inferences and transmembrane topology (if applicable) are all provided. PolarProtDb also offers a detailed interface displaying all information that may be relevant for trafficking: including post-translational modifications (glycosylations and phosphorylations), known or predicted short linear motifs conserved across orthologs, as well as potential interaction partners. Data on polarized sorting has so far been scattered across myriads of publications, hence difficult to access. This information can help researchers in several areas, such as scanning for potential entry points of viral agents like COVID-19. PolarProtDb shall be a useful resource to design future experiments as well as for comparative analyses. The database is available at http://polarprotdb.enzim.hu.","hji,kes",1,1,2,2,1,NA,NA +33196830,GenBank.,"GenBank® (https://www.ncbi.nlm.nih.gov/genbank/) is a comprehensive, public database that contains 9.9 trillion base pairs from over 2.1 billion nucleotide sequences for 478 000 formally described species. Daily data exchange with the European Nucleotide Archive and the DNA Data Bank of Japan ensures worldwide coverage. Recent updates include new resources for data from the SARS-CoV-2 virus, updates to the NCBI Submission Portal and associated submission wizards for dengue and SARS-CoV-2 viruses, new taxonomy queries for viruses and prokaryotes, and simplified submission processes for EST and GSS sequences.","hji,kes",1,1,2,2,1,NA,NA +33211864,FANTOM enters 20th year: expansion of transcriptomic atlases and functional annotation of non-coding RNAs.,"The Functional ANnoTation Of the Mammalian genome (FANTOM) Consortium has continued to provide extensive resources in the pursuit of understanding the transcriptome, and transcriptional regulation, of mammalian genomes for the last 20 years. To share these resources with the research community, the FANTOM web-interfaces and databases are being regularly updated, enhanced and expanded with new data types. In recent years, the FANTOM Consortium's efforts have been mainly focused on creating new non-coding RNA datasets and resources. The existing FANTOM5 human and mouse miRNA atlas was supplemented with rat, dog, and chicken datasets. The sixth (latest) edition of the FANTOM project was launched to assess the function of human long non-coding RNAs (lncRNAs). From its creation until 2020, FANTOM6 has contributed to the research community a large dataset generated from the knock-down of 285 lncRNAs in human dermal fibroblasts; this is followed with extensive expression profiling and cellular phenotyping. Other updates to the FANTOM resource includes the reprocessing of the miRNA and promoter atlases of human, mouse and chicken with the latest reference genome assemblies. To facilitate the use and accessibility of all above resources we further enhanced FANTOM data viewers and web interfaces. The updated FANTOM web resource is publicly available at https://fantom.gsc.riken.jp/.","hji,kes",1,1,2,2,1,NA,NA +33211869,"Rfam 14: expanded coverage of metagenomic, viral and microRNA families.","Rfam is a database of RNA families where each of the 3444 families is represented by a multiple sequence alignment of known RNA sequences and a covariance model that can be used to search for additional members of the family. Recent developments have involved expert collaborations to improve the quality and coverage of Rfam data, focusing on microRNAs, viral and bacterial RNAs. We have completed the first phase of synchronising microRNA families in Rfam and miRBase, creating 356 new Rfam families and updating 40. We established a procedure for comprehensive annotation of viral RNA families starting with Flavivirus and Coronaviridae RNAs. We have also increased the coverage of bacterial and metagenome-based RNA families from the ZWD database. These developments have enabled a significant growth of the database, with the addition of 759 new families in Rfam 14. To facilitate further community contribution to Rfam, expert users are now able to build and submit new families using the newly developed Rfam Cloud family curation system. New Rfam website features include a new sequence similarity search powered by RNAcentral, as well as search and visualisation of families with pseudoknots. Rfam is freely available at https://rfam.org.","hji,kes",1,1,2,2,1,NA,NA +33211879,From ArrayExpress to BioStudies.,"ArrayExpress (https://www.ebi.ac.uk/arrayexpress) is an archive of functional genomics data at EMBL-EBI, established in 2002, initially as an archive for publication-related microarray data and was later extended to accept sequencing-based data. Over the last decade an increasing share of biological experiments involve multiple technologies assaying different biological modalities, such as epigenetics, and RNA and protein expression, and thus the BioStudies database (https://www.ebi.ac.uk/biostudies) was established to deal with such multimodal data. Its central concept is a study, which typically is associated with a publication. BioStudies stores metadata describing the study, provides links to the relevant databases, such as European Nucleotide Archive (ENA), as well as hosts the types of data for which specialized databases do not exist. With BioStudies now fully functional, we are able to further harmonize the archival data infrastructure at EMBL-EBI, and ArrayExpress is being migrated to BioStudies. In future, all functional genomics data will be archived at BioStudies. The process will be seamless for the users, who will continue to submit data using the online tool Annotare and will be able to query and download data largely in the same manner as before. Nevertheless, some technical aspects, particularly programmatic access, will change. This update guides the users through these changes.","hji,kes",1,1,2,2,1,NA,NA +33211880,"BRENDA, the ELIXIR core data resource in 2021: new developments and updates.","The BRENDA enzyme database (https://www.brenda-enzymes.org), established in 1987, has evolved into the main collection of functional enzyme and metabolism data. In 2018, BRENDA was selected as an ELIXIR Core Data Resource. BRENDA provides reliable data, continuous curation and updates of classified enzymes, and the integration of newly discovered enzymes. The main part contains >5 million data for ∼90 000 enzymes from ∼13 000 organisms, manually extracted from ∼157 000 primary literature references, combined with information of text and data mining, data integration, and prediction algorithms. Supplements comprise disease-related data, protein sequences, 3D structures, genome annotations, ligand information, taxonomic, bibliographic, and kinetic data. BRENDA offers an easy access to enzyme information from quick to advanced searches, text- and structured-based queries for enzyme-ligand interactions, word maps, and visualization of enzyme data. The BRENDA Pathway Maps are completely revised and updated for an enhanced interactive and intuitive usability. The new design of the Enzyme Summary Page provides an improved access to each individual enzyme. A new protein structure 3D viewer was integrated. The prediction of the intracellular localization of eukaryotic enzymes has been implemented. The new EnzymeDetector combines BRENDA enzyme annotations with protein and genome databases for the detection of eukaryotic and prokaryotic enzymes.","hji,kes",1,1,2,2,1,NA,NA +33216893,"DPL: a comprehensive database on sequences, structures, sources and functions of peptide ligands.","DPL (http://www.peptide-ligand.cn/) is a comprehensive database of peptide ligand (DPL). DPL1.0 holds 1044 peptide ligand entries and provides references for the study of the polypeptide platform. The data were collected from PubMed-NCBI, PDB, APD3, CAMPR3, etc. The lengths of the base sequences are varied from 3 to78. DPL database has 923 linear peptides and 88 cyclic peptides. The functions of peptides collected by DPL are very wide. It includes 540 entries of antiviral peptides (including SARS-CoV-2), 55 entries of signal peptides, 48 entries of protease inhibitors, 45 entries of anti-hypertension, 37 entries of anticancer peptides, etc. There are 270 different kinds of peptide targets. All peptides in DPL have clear binding targets. Most of the peptides and receptors have 3D structures experimentally verified or predicted by CYCLOPS, I-TASSER and SWISS-MODEL. With the rapid development of the COVID-2019 epidemic, this database also collects the research progress of peptides against coronavirus. In conclusion, DPL is a unique resource, which allows users easily to explore the targets, different structures as well as properties of peptides.","hji,kes",1,1,2,2,1,NA,NA +33216899,WCSdb: a database of wild Coffea species.,"Coffee is a beverage enjoyed by millions of people worldwide and an important commodity for millions of people. Beside the two cultivated species (Coffea arabica and Coffea canephora), the 139 wild coffee species/taxa belonging to the Coffea genus are largely unknown to coffee scientists and breeders although these species may be crucial for future coffee crop development to face climate changes. Here we present the Wild Coffee Species database (WCSdb) hosted by Pl@ntNet platform (http://publish.plantnet-project.org/project/wildcofdb_en), providing information for 141 coffee species/taxa, for which 84 contain a photo gallery and 82 contain sequencing data (genotyping-by-sequencing, chloroplast or whole genome sequences). The objective of this database is to better understand and characterize the species (identification, morphology, biochemical compounds, genetic diversity and sequence data) in order to better protect and promote them.

Database url

http://publish.plantnet-project.org/project/wildcofdb_en.","hji,kes",1,1,2,2,1,NA,NA +33219674,canSAR: update to the cancer translational research and drug discovery knowledgebase.,"canSAR (http://cansar.icr.ac.uk) is the largest, public, freely available, integrative translational research and drug discovery knowledgebase for oncology. canSAR integrates vast multidisciplinary data from across genomic, protein, pharmacological, drug and chemical data with structural biology, protein networks and more. It also provides unique data, curation and annotation and crucially, AI-informed target assessment for drug discovery. canSAR is widely used internationally by academia and industry. Here we describe significant developments and enhancements to the data, web interface and infrastructure of canSAR in the form of the new implementation of the system: canSARblack. We demonstrate new functionality in aiding translation hypothesis generation and experimental design, and show how canSAR can be adapted and utilised outside oncology.","hji,kes",1,1,2,2,1,NA,NA +33219686,LnCeCell: a comprehensive database of predicted lncRNA-associated ceRNA networks at single-cell resolution.,"Within the tumour microenvironment, cells exhibit different behaviours driven by fine-tuning of gene regulation. Identification of cellular-specific gene regulatory networks will deepen the understanding of disease pathology at single-cell resolution and contribute to the development of precision medicine. Here, we describe a database, LnCeCell (http://www.bio-bigdata.net/LnCeCell/ or http://bio-bigdata.hrbmu.edu.cn/LnCeCell/), which aims to document cellular-specific long non-coding RNA (lncRNA)-associated competing endogenous RNA (ceRNA) networks for personalised characterisation of diseases based on the 'One Cell, One World' theory. LnCeCell is curated with cellular-specific ceRNA regulations from >94 000 cells across 25 types of cancers and provides >9000 experimentally supported lncRNA biomarkers, associated with tumour metastasis, recurrence, prognosis, circulation, drug resistance, etc. For each cell, LnCeCell illustrates a global map of ceRNA sub-cellular locations, which have been manually curated from the literature and related data sources, and portrays a functional state atlas for a single cancer cell. LnCeCell also provides several flexible tools to infer ceRNA functions based on a specific cellular background. LnCeCell serves as an important resource for investigating the gene regulatory networks within a single cell and can help researchers understand the regulatory mechanisms underlying complex microbial ecosystems and individual phenotypes.","hji,kes",1,1,2,2,1,NA,NA +33231677,GTRD: an integrated view of transcription regulation.,"The Gene Transcription Regulation Database (GTRD; http://gtrd.biouml.org/) contains uniformly annotated and processed NGS data related to gene transcription regulation: ChIP-seq, ChIP-exo, DNase-seq, MNase-seq, ATAC-seq and RNA-seq. With the latest release, the database has reached a new level of data integration. All cell types (cell lines and tissues) presented in the GTRD were arranged into a dictionary and linked with different ontologies (BRENDA, Cell Ontology, Uberon, Cellosaurus and Experimental Factor Ontology) and with related experiments in specialized databases on transcription regulation (FANTOM5, ENCODE and GTEx). The updated version of the GTRD provides an integrated view of transcription regulation through a dedicated web interface with advanced browsing and search capabilities, an integrated genome browser, and table reports by cell types, transcription factors, and genes of interest.","hji,kes",1,1,2,2,1,NA,NA +33237286,UniProt: the universal protein knowledgebase in 2021.,"The aim of the UniProt Knowledgebase is to provide users with a comprehensive, high-quality and freely accessible set of protein sequences annotated with functional information. In this article, we describe significant updates that we have made over the last two years to the resource. The number of sequences in UniProtKB has risen to approximately 190 million, despite continued work to reduce sequence redundancy at the proteome level. We have adopted new methods of assessing proteome completeness and quality. We continue to extract detailed annotations from the literature to add to reviewed entries and supplement these in unreviewed entries with annotations provided by automated systems such as the newly implemented Association-Rule-Based Annotator (ARBA). We have developed a credit-based publication submission interface to allow the community to contribute publications and annotations to UniProt entries. We describe how UniProtKB responded to the COVID-19 pandemic through expert curation of relevant entries that were rapidly made available to the research community through a dedicated portal. UniProt resources are available under a CC-BY (4.0) license via the web at https://www.uniprot.org/.","hji,kes",1,1,2,2,1,NA,NA +33237311,"The STRING database in 2021: customizable protein-protein networks, and functional characterization of user-uploaded gene/measurement sets.","Cellular life depends on a complex web of functional associations between biomolecules. Among these associations, protein-protein interactions are particularly important due to their versatility, specificity and adaptability. The STRING database aims to integrate all known and predicted associations between proteins, including both physical interactions as well as functional associations. To achieve this, STRING collects and scores evidence from a number of sources: (i) automated text mining of the scientific literature, (ii) databases of interaction experiments and annotated complexes/pathways, (iii) computational interaction predictions from co-expression and from conserved genomic context and (iv) systematic transfers of interaction evidence from one organism to another. STRING aims for wide coverage; the upcoming version 11.5 of the resource will contain more than 14 000 organisms. In this update paper, we describe changes to the text-mining system, a new scoring-mode for physical interactions, as well as extensive user interface features for customizing, extending and sharing protein networks. In addition, we describe how to query STRING with genome-wide, experimental data, including the automated detection of enriched functionalities and potential biases in the user's query data. The STRING resource is available online, at https://string-db.org/.","hji,kes",1,1,2,2,1,NA,NA +33237313,RepeatsDB in 2021: improved data and extended classification for protein tandem repeat structures.,"The RepeatsDB database (URL: https://repeatsdb.org/) provides annotations and classification for protein tandem repeat structures from the Protein Data Bank (PDB). Protein tandem repeats are ubiquitous in all branches of the tree of life. The accumulation of solved repeat structures provides new possibilities for classification and detection, but also increasing the need for annotation. Here we present RepeatsDB 3.0, which addresses these challenges and presents an extended classification scheme. The major conceptual change compared to the previous version is the hierarchical classification combining top levels based solely on structural similarity (Class > Topology > Fold) with two new levels (Clan > Family) requiring sequence similarity and describing repeat motifs in collaboration with Pfam. Data growth has been addressed with improved mechanisms for browsing the classification hierarchy. A new UniProt-centric view unifies the increasingly frequent annotation of structures from identical or similar sequences. This update of RepeatsDB aligns with our commitment to develop a resource that extracts, organizes and distributes specialized information on tandem repeat protein structures.","hji,kes",1,1,2,2,1,NA,NA +33238002,RecipeDB: a resource for exploring recipes.,"Cooking is the act of turning nature into the culture, which has enabled the advent of the omnivorous human diet. The cultural wisdom of processing raw ingredients into delicious dishes is embodied in their cuisines. Recipes thus are the cultural capsules that encode elaborate cooking protocols for evoking sensory satiation as well as providing nourishment. As we stand on the verge of an epidemic of diet-linked disorders, it is eminently important to investigate the culinary correlates of recipes to probe their association with sensory responses as well as consequences for nutrition and health. RecipeDB (https://cosylab.iiitd.edu.in/recipedb) is a structured compilation of recipes, ingredients and nutrition profiles interlinked with flavor profiles and health associations. The repertoire comprises of meticulous integration of 118 171 recipes from cuisines across the globe (6 continents, 26 geocultural regions and 74 countries), cooked using 268 processes (heat, cook, boil, simmer, bake, etc.), by blending over 20 262 diverse ingredients, which are further linked to their flavor molecules (FlavorDB), nutritional profiles (US Department of Agriculture) and empirical records of disease associations obtained from MEDLINE (DietRx). This resource is aimed at facilitating scientific explorations of the culinary space (recipe, ingredient, cooking processes/techniques, dietary styles, etc.) linked to taste (flavor profile) and health (nutrition and disease associations) attributes seeking for divergent applications. Database URL: https://cosylab.iiitd.edu.in/recipedb.","hji,kes",0,1,1,2,0.5,"not life-sciences; reassessed and keeping as no just because of questionable life sci, but it is tricky",read abstract more carefully - flavor proviles/molecules; reassessed and still yes - life sci data included +33245777,3DIV update for 2021: a comprehensive resource of 3D genome and 3D cancer genome.,"Three-dimensional (3D) genome organization is tightly coupled with gene regulation in various biological processes and diseases. In cancer, various types of large-scale genomic rearrangements can disrupt the 3D genome, leading to oncogenic gene expression. However, unraveling the pathogenicity of the 3D cancer genome remains a challenge since closer examinations have been greatly limited due to the lack of appropriate tools specialized for disorganized higher-order chromatin structure. Here, we updated a 3D-genome Interaction Viewer and database named 3DIV by uniformly processing ∼230 billion raw Hi-C reads to expand our contents to the 3D cancer genome. The updates of 3DIV are listed as follows: (i) the collection of 401 samples including 220 cancer cell line/tumor Hi-C data, 153 normal cell line/tissue Hi-C data, and 28 promoter capture Hi-C data, (ii) the live interactive manipulation of the 3D cancer genome to simulate the impact of structural variations and (iii) the reconstruction of Hi-C contact maps by user-defined chromosome order to investigate the 3D genome of the complex genomic rearrangement. In summary, the updated 3DIV will be the most comprehensive resource to explore the gene regulatory effects of both the normal and cancer 3D genome. '3DIV' is freely available at http://3div.kr.","hji,kes",1,1,2,2,1,NA,NA +33247931,"KiMoSys 2.0: an upgraded database for submitting, storing and accessing experimental data for kinetic modeling.","The KiMoSys (https://kimosys.org), launched in 2014, is a public repository of published experimental data, which contains concentration data of metabolites, protein abundances and flux data. It offers a web-based interface and upload facility to share data, making it accessible in structured formats, while also integrating associated kinetic models related to the data. In addition, it also supplies tools to simplify the construction process of ODE (Ordinary Differential Equations)-based models of metabolic networks. In this release, we present an update of KiMoSys with new data and several new features, including (i) an improved web interface, (ii) a new multi-filter mechanism, (iii) introduction of data visualization tools, (iv) the addition of downloadable data in machine-readable formats, (v) an improved data submission tool, (vi) the integration of a kinetic model simulation environment and (vii) the introduction of a unique persistent identifier system. We believe that this new version will improve its role as a valuable resource for the systems biology community. Database URL: www.kimosys.org.","hji,kes",1,1,2,2,1,NA,NA +33252190,GRIN database: A unified and manually curated repertoire of GRIN variants.,"Glutamatergic neurotransmission is crucial for brain development, wiring neuronal function, and synaptic plasticity mechanisms. Recent genetic studies showed the existence of autosomal dominant de novo GRIN gene variants associated with GRIN-related disorders (GRDs), a rare pediatric neurological disorder caused by N-methyl- d-aspartate receptor (NMDAR) dysfunction. Notwithstanding, GRIN variants identification is exponentially growing and their clinical, genetic, and functional annotations remain highly fragmented, representing a bottleneck in GRD patient's stratification. To shorten the gap between GRIN variant identification and patient stratification, we present the GRIN database (GRINdb), a publicly available, nonredundant, updated, and curated database gathering all available genetic, functional, and clinical data from more than 4000 GRIN variants. The manually curated GRINdb outputs on a web server, allowing query and retrieval of reported GRIN variants, and thus representing a fast and reliable bioinformatics resource for molecular clinical advice. Furthermore, the comprehensive mapping of GRIN variants' genetic and clinical information along NMDAR structure revealed important differences in GRIN variants' pathogenicity and clinical phenotypes, shedding light on GRIN-specific fingerprints. Overall, the GRINdb and web server is a resource for molecular stratification of GRIN variants, delivering clinical and investigational insights into GRDs. GRINdb is accessible at http://lmc.uab.es/grindb.","hji,kes",1,1,2,2,1,NA,NA +33262341,"HuskinDB, a database for skin permeation of xenobiotics.","Skin permeation is an essential biological property of small organic compounds our body is exposed to, such as drugs in topic formulations, cosmetics, and environmental toxins. Despite the limited availability of experimental data, there is a lack of systematic analysis and structure. We present a novel resource on skin permeation data that collects all measurements available in the literature and systematically structures experimental conditions. Besides the skin permeation value kp, it includes experimental protocols such as skin source site, skin layer used, preparation technique, storage conditions, as well as test conditions such as temperature, pH as well as the type of donor and acceptor solution. It is important to include these parameters in the assessment of the skin permeation data. In addition, we provide an analysis of physicochemical properties and chemical space coverage, laying the basis for applicability domain determination of insights drawn from the collected data points. The database is freely accessible under https://huskindb.drug-design.de or https://doi.org/10.7303/syn21998881 .","hji,kes",1,1,2,2,1,NA,NA +33270111,GENCODE 2021.,"The GENCODE project annotates human and mouse genes and transcripts supported by experimental data with high accuracy, providing a foundational resource that supports genome biology and clinical genomics. GENCODE annotation processes make use of primary data and bioinformatic tools and analysis generated both within the consortium and externally to support the creation of transcript structures and the determination of their function. Here, we present improvements to our annotation infrastructure, bioinformatics tools, and analysis, and the advances they support in the annotation of the human and mouse genomes including: the completion of first pass manual annotation for the mouse reference genome; targeted improvements to the annotation of genes associated with SARS-CoV-2 infection; collaborative projects to achieve convergence across reference annotation databases for the annotation of human and mouse protein-coding genes; and the first GENCODE manually supervised automated annotation of lncRNAs. Our annotation is accessible via Ensembl, the UCSC Genome Browser and https://www.gencodegenes.org.","hji,kes",1,1,2,2,1,NA,NA +33275967,ncRNAVar: A Manually Curated Database for Identification of Noncoding RNA Variants Associated with Human Diseases.,"While variants of noncoding RNAs (ncRNAs) have been experimentally validated as a new class of biomarkers and drug targets, the discovery and interpretation of relationships between ncRNA variants and human diseases become important and challenging. Here we present ncRNAVar (http://www.liwzlab.cn/ncrnavar/), the first database that provides association data between validated ncRNA variants and human diseases through manual curation on 2650 publications and computational annotation. ncRNAVar contains 4565 associations between 711 human disease phenotypes and 3112 variants from 2597 ncRNAs. Each association was reviewed by professional curators, incorporated with valuable annotation and cross references, and designated with an association score by our refined score model. ncRNAVar offers web applications including association prioritization, network visualization, and relationship mapping. ncRNAVar, presenting a landscape of ncRNA variants in human diseases and a useful resource for subsequent software development, will improve our insight of relationships between ncRNA variants and human health.","hji,kes",1,1,2,2,1,NA,NA +33279968,"Gene Circuit Explorer (GeneEx): an interactive web-app for visualizing, simulating and analyzing gene regulatory circuits.","

Summary

GeneEx is an interactive web-app that uses an ODE-based mathematical modeling approach to simulate, visualize and analyze gene regulatory circuits (GRCs) for an explicit kinetic parameter set or for a large ensemble of random parameter sets. GeneEx offers users the freedom to modify many aspects of the simulation such as the parameter ranges, the levels of gene expression noise and the GRC network topology itself. This degree of flexibility allows users to explore a variety of hypotheses by providing insight into the number and stability of attractors for a given GRC. Moreover, users have the option to upload, and subsequently compare, experimental gene expression data to simulated data generated from the analysis of a built or uploaded custom circuit. Finally, GeneEx offers a curated database that contains circuit motifs and known biological GRCs to facilitate further inquiry into these. Overall, GeneEx enables users to investigate the effects of parameter variation, stochasticity and/or topological changes on gene expression for GRCs using a systems-biology approach.

Availability and implementation

GeneEx is available at https://geneex.jax.org. This web-app is released under the MIT license and is free and open to all users and there is no mandatory login requirement.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,software; reassessed and offers a curated database,no notes; reassessed and still yes - says includes a curated database +33290552,The Gene Ontology resource: enriching a GOld mine.,"The Gene Ontology Consortium (GOC) provides the most comprehensive resource currently available for computable knowledge regarding the functions of genes and gene products. Here, we report the advances of the consortium over the past two years. The new GO-CAM annotation framework was notably improved, and we formalized the model with a computational schema to check and validate the rapidly increasing repository of 2838 GO-CAMs. In addition, we describe the impacts of several collaborations to refine GO and report a 10% increase in the number of GO annotations, a 25% increase in annotated gene products, and over 9,400 new scientific articles annotated. As the project matures, we continue our efforts to review older annotations in light of newer findings, and, to maintain consistency with other ontologies. As a result, 20 000 annotations derived from experimental data were reviewed, corresponding to 2.5% of experimental GO annotations. The website (http://geneontology.org) was redesigned for quick access to documentation, downloads and tools. To maintain an accurate resource and support traceability and reproducibility, we have made available a historical archive covering the past 15 years of GO data with a consistent format and file structure for both the ontology and annotations.","hji,kes",1,1,2,2,1,NA,NA +33290554,"PANTHER version 16: a revised family classification, tree-based classification tool, enhancer regions and extensive API.","PANTHER (Protein Analysis Through Evolutionary Relationships, http://www.pantherdb.org) is a resource for the evolutionary and functional classification of protein-coding genes from all domains of life. The evolutionary classification is based on a library of over 15,000 phylogenetic trees, and the functional classifications include Gene Ontology terms and pathways. Here, we analyze the current coverage of genes from genomes in different taxonomic groups, so that users can better understand what to expect when analyzing a gene list using PANTHER tools. We also describe extensive improvements to PANTHER made in the past two years. The PANTHER Protein Class ontology has been completely refactored, and 6101 PANTHER families have been manually assigned to a Protein Class, providing a high level classification of protein families and their genes. Users can access the TreeGrafter tool to add their own protein sequences to the reference phylogenetic trees in PANTHER, to infer evolutionary context as well as fine-grained annotations. We have added human enhancer-gene links that associate non-coding regions with the annotated human genes in PANTHER. We have also expanded the available services for programmatic access to PANTHER tools and data via application programming interfaces (APIs). Other improvements include additional plant genomes and an updated PANTHER GO-slim.","hji,kes",1,1,2,2,1,NA,NA +33294866,Pancreatlas: Applying an Adaptable Framework to Map the Human Pancreas in Health and Disease.,"Human tissue phenotyping generates complex spatial information from numerous imaging modalities, yet images typically become static figures for publication, and original data and metadata are rarely available. While comprehensive image maps exist for some organs, most resources have limited support for multiplexed imaging or have non-intuitive user interfaces. Therefore, we built a Pancreatlas resource that integrates several technologies into a unique interface, allowing users to access richly annotated web pages, drill down to individual images, and deeply explore data online. The current version of Pancreatlas contains over 800 unique images acquired by whole-slide scanning, confocal microscopy, and imaging mass cytometry, and is available at https://www.pancreatlas.org. To create this human pancreas-specific biological imaging resource, we developed a React-based web application and Python-based application programming interface, collectively called Flexible Framework for Integrating and Navigating Data (FFIND), which can be adapted beyond Pancreatlas to meet countless imaging or other structured data-management needs.","hji,kes",1,1,2,2,1,NA,NA +33301927,Phylogenomic study and classification of mitochondrial DNA through virtual genomic fingerprints.,"In the present study, we evaluated the ability of the Virtual Analysis Method for Phylogenomic fingerprint Estimation (VAMPhyRE) toolkit to classify human mitochondrial DNA (mtDNA) haplogroups. In total, 357 random mtDNA sequences were obtained from different haplogroups, based on the classification of PhyloTree. Additionally, we included a control group of five sequences (Pan paniscus, Pan troglodytes, Homo sapiens neanderthalensis, Yoruba15, and the revised Cambridge reference sequence). VAMPhyRE employs a virtual hybridization technique, using probes that specifically bind to their complementary sequences in the genome. We used 65,536 probes of 8 nucleotides to identify potential sites where hybridization occurs between the mtDNA and the specific probe, forming different heteroduplexes and thus, creating a unique and specific genomic fingerprint for each sequence. Genomic fingerprints were compared, and a table of distances was calculated to obtain a mitochondrial phylogenomic tree with the macrohaplogroups, L, N, M, and R, and their corresponding haplogroups, according to universal nomenclature. The results obtained suggest an accuracy of 97.25% for the distribution of the 357 mtDNA sequences in the four macrohaplogroups and their corresponding haplogroups when compared with other mtDNA classification tools that require reference sequences and do not offer an analysis based on an evolutionary approach. These data are available online at http://biomedbiotec.encb.ipn.mx/VAMPhyRE/.","hji,kes",0,0,0,2,0,NA,"no notes; reassessed and re-scored - seems to be software though it does say ""data are available"", iffy" +33306802,NPBS database: a chemical data resource with relational data between natural products and biological sources.,"NPBS (Natural Products & Biological Sources) database is a chemical data resource with relational data between natural products and biological sources, manually curated from literatures of natural product researches. The relational data link a specific species and all the natural products derived from it and contrarily link a specific natural product and all the biological sources. The biological sources cover diverse species of plant, bacterial, fungal and marine organisms; the natural molecules have proper chemical structure data and computable molecular properties and all the relational data have corresponding references. NPBS database provides a wider choice of biological sources and can be used for dereplication to prevent re-isolation and re-characterization of already known natural products. Database URL: http://www.organchem.csdb.cn/scdb/NPBS.","hji,kes",1,1,2,2,1,NA,NA +33330918,Systematic evaluation of the effects of genetic variants on PIWI-interacting RNA expression across 33 cancer types.,"PIWI-interacting RNAs (piRNAs) are an emerging class of non-coding RNAs involved in tumorigenesis. Expression quantitative trait locus (eQTL) analysis has been demonstrated to help reveal the genetic mechanism of single nucleotide polymorphisms (SNPs) in cancer etiology. However, there are no databases that have been constructed to provide an eQTL analysis between SNPs and piRNA expression. In this study, we collected genotyping and piRNA expression data for 10 997 samples across 33 cancer types from The Cancer Genome Atlas (TCGA). Using linear regression cis-eQTL analysis with adjustment of appropriate covariates, we identified millions of SNP-piRNA pairs in tumor (76 924 831) and normal (24 431 061) tissues. Further, we performed differential expression and survival analyses, and linked the eQTLs to genome-wide association study (GWAS) data to comprehensively decipher the functional roles of identified cis-piRNA eQTLs. Finally, we developed a user-friendly database, piRNA-eQTL (http://njmu-edu.cn:3838/piRNA-eQTL/), to help users query, browse and download corresponding eQTL results. In summary, piRNA-eQTL could serve as an important resource to assist the research community in understanding the roles of genetic variants and piRNAs in the development of cancers.","hji,kes",1,1,2,2,1,NA,NA +33367605,Virxicon: A Lexicon Of Viral Sequences.,"

Motivation

Viruses are the most abundant biological entities and constitute a large reservoir of genetic diversity. In recent years, knowledge about them has increased significantly as a result of dynamic development in life sciences and rapid technological progress. This knowledge is scattered across various data repositories, making a comprehensive analysis of viral data difficult.

Results

In response to the need for gathering a comprehensive knowledge of viruses and viral sequences, we developed Virxicon, a lexicon of all experimentally-acquired sequences for RNA and DNA viruses. The ability to quickly obtain data for entire viral groups, searching sequences by levels of taxonomic hierarchy-according to the Baltimore classification and ICTV taxonomy-and tracking the distribution of viral data and its growth over time are unique features of our database compared to the other tools.

Availability

Virxicon is a publicly available resource, updated weekly. It has an intuitive web interface and can be freely accessed at http://virxicon.cs.put.poznan.pl/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,Chuck Check - yes +33382886,Creating a Metabolic Syndrome Research Resource using the National Health and Nutrition Examination Survey.,"Metabolic syndrome (MetS) is multifaceted. Risk factors include visceral adiposity, dyslipidemia, hyperglycemia, hypertension and environmental stimuli. MetS leads to an increased risk of cardiovascular disease, type 2 diabetes and stroke. Comparative studies, however, have identified heterogeneity in the pathology of MetS across groups though the etiology of these differences has yet to be elucidated. The Metabolic Syndrome Research Resource (MetSRR) described in this report is a curated database that provides access to MetS-associated biological and ancillary data and pools current and potential biomarkers of MetS extracted from relevant National Health and Nutrition Examination Survey (NHANES) data from 1999-2016. Each potential biomarker was selected following the review of over 100 peer-reviewed articles. MetSRR includes 28 demographics, survey and known MetS-related variables, including 9 curated categorical variables and 42 potentially novel biomarkers. All measures are captured from over 90 000 individuals. This biocuration effort provides increased access to curated MetS-related data and will serve as a hypothesis-generating tool to aid in novel biomarker discovery. In addition, MetSRR provides the ability to generate and export ethnic group-/race-, sex- and age-specific curated datasets, thus broadening participation in research efforts to identify clinically evaluative MetS biomarkers for disparate populations. Although there are other databases, such as BioM2MetDisease, designed to explore metabolic diseases through analysis of miRNAs and disease phenotypes, MetSRR is the only MetS-specific database designed to explore etiology of MetS across groups, through the biocuration of demographic, biological samples and biometric data. Database URL: http://www.healthdisparityinformatics.com/MetSRR.","hji,kes",1,1,2,2,1,NA,NA +33391542,Network- and systems-based re-engineering of dendritic cells with non-coding RNAs for cancer immunotherapy.,"Dendritic cells (DCs) are professional antigen-presenting cells that induce and regulate adaptive immunity by presenting antigens to T cells. Due to their coordinative role in adaptive immune responses, DCs have been used as cell-based therapeutic vaccination against cancer. The capacity of DCs to induce a therapeutic immune response can be enhanced by re-wiring of cellular signalling pathways with microRNAs (miRNAs). Methods: Since the activation and maturation of DCs is controlled by an interconnected signalling network, we deploy an approach that combines RNA sequencing data and systems biology methods to delineate miRNA-based strategies that enhance DC-elicited immune responses. Results: Through RNA sequencing of IKKβ-matured DCs that are currently being tested in a clinical trial on therapeutic anti-cancer vaccination, we identified 44 differentially expressed miRNAs. According to a network analysis, most of these miRNAs regulate targets that are linked to immune pathways, such as cytokine and interleukin signalling. We employed a network topology-oriented scoring model to rank the miRNAs, analysed their impact on immunogenic potency of DCs, and identified dozens of promising miRNA candidates, with miR-15a and miR-16 as the top ones. The results of our analysis are presented in a database that constitutes a tool to identify DC-relevant miRNA-gene interactions with therapeutic potential (https://www.synmirapy.net/dc-optimization). Conclusions: Our approach enables the systematic analysis and identification of functional miRNA-gene interactions that can be experimentally tested for improving DC immunogenic potency.","hji,kes",1,1,2,2,1,NA,NA +33399824,BnaGVD: A genomic variation database of rapeseed (Brassica napus).,"Rapeseed (Brassica napus L.) is a typical polyploid crop and one of the most important oilseed crops worldwide. With the rapid progress on high-throughput sequencing technologies and the reduction of sequencing cost, large-scale genomic data of a specific crop have become available. However, raw sequence data are mostly deposited in the sequence read archive of the National Center of Biotechnology Information (NCBI) and the European Nucleotide Archive (ENA), which is freely accessible to all researchers. Extensive tools for practical purposes should be developed to efficiently utilize these large raw data. Here, we report a web-based rapeseed genomic variation database (BnaGVD, http://rapeseed.biocloud.net/home) from which genomic variations, such as single nucleotide polymorphisms (SNPs) and insertions/deletions (InDels) across a world-wide collection of rapeseed accessions, can be referred. The current release of the BnaGVD contains 34,591,899 high-quality SNPs and 12,281,923 high-quality InDels and provides search tools to retrieve genomic variations and gene annotations across 1,007 accessions of worldwide rapeseed germplasm. We implement a variety of built-in tools (e.g., BnaGWAS, BnaPCA, and BnaStructure) to help users perform in-depth analyses. We recommend this web resource for accelerating studies on the functional genomics and screening of molecular markers for rapeseed breeding.","hji,kes",1,1,2,2,1,NA,NA +33401309,ADeditome provides the genomic landscape of A-to-I RNA editing in Alzheimer's disease.,"A-to-I RNA editing, contributing to nearly 90% of all editing events in human, has been reported to involve in the pathogenesis of Alzheimer's disease (AD) due to its roles in brain development and immune regulation, such as the deficient editing of GluA2 Q/R related to cell death and memory loss. Currently, there are urgent needs for the systematic annotations of A-to-I RNA editing events in AD. Here, we built ADeditome, the annotation database of A-to-I RNA editing in AD available at https://ccsm.uth.edu/ADeditome, aiming to provide a resource and reference for functional annotation of A-to-I RNA editing in AD to identify therapeutically targetable genes in an individual. We detected 1676 363 editing sites in 1524 samples across nine brain regions from ROSMAP, MayoRNAseq and MSBB. For these editing events, we performed multiple functional annotations including identification of specific and disease stage associated editing events and the influence of editing events on gene expression, protein recoding, alternative splicing and miRNA regulation for all the genes, especially for AD-related genes in order to explore the pathology of AD. Combing all the analysis results, we found 108 010 and 26 168 editing events which may promote or inhibit AD progression, respectively. We also found 5582 brain region-specific editing events with potentially dual roles in AD across different brain regions. ADeditome will be a unique resource for AD and drug research communities to identify therapeutically targetable editing events. Significance: ADeditome is the first comprehensive resource of the functional genomics of individual A-to-I RNA editing events in AD, which will be useful for many researchers in the fields of AD pathology, precision medicine, and therapeutic researches.","hji,kes",1,1,2,2,1,NA,NA +33416858,The iPPI-DB initiative: A Community-centered database of Protein-Protein Interaction modulators.,"

Motivation

One avenue to address the paucity of clinically testable targets is to reinvestigate the druggable genome by tackling complicated types of targets such as Protein-Protein Interactions (PPIs). Given the challenge to target those interfaces with small chemical compounds, it has become clear that learning from successful examples of PPI modulation is a powerful strategy. Freely-accessible databases of PPI modulators that provide the community with tractable chemical and pharmacological data, as well as powerful tools to query them, are therefore essential to stimulate new drug discovery projects on PPI targets.

Results

Here, we present the new version iPPI-DB, our manually curated database of PPI modulators. In this completely redesigned version of the database, we introduce a new web interface relying on crowdsourcing for the maintenance of the database. This interface was created to enable community contributions, whereby external experts can suggest new database entries. Moreover, the data model, the graphical interface, and the tools to query the database have been completely modernized and improved. We added new PPI modulators, new PPI targets, and extended our focus to stabilizers of PPIs as well.

Availability and implementation

The iPPI-DB server is available at https://ippidb.pasteur.fr The source code for this server is available at https://gitlab.pasteur.fr/ippidb/ippidb-web/ and is distributed under GPL licence (http://www.gnu.org/licences/gpl). Queries can be shared through persistent links according to the FAIR data standards. Data can be downloaded from the website as csv files.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +33436076,"The Dfam community resource of transposable element families, sequence models, and genome annotations.","Dfam is an open access database of repetitive DNA families, sequence models, and genome annotations. The 3.0-3.3 releases of Dfam ( https://dfam.org ) represent an evolution from a proof-of-principle collection of transposable element families in model organisms into a community resource for a broad range of species, and for both curated and uncurated datasets. In addition, releases since Dfam 3.0 provide auxiliary consensus sequence models, transposable element protein alignments, and a formalized classification system to support the growing diversity of organisms represented in the resource. The latest release includes 266,740 new de novo generated transposable element families from 336 species contributed by the EBI. This expansion demonstrates the utility of many of Dfam's new features and provides insight into the long term challenges ahead for improving de novo generated transposable element datasets.","hji,kes",1,1,2,2,1,NA,NA +33459764,SWITCHES: Searchable web interface for topologies of CHEmical switches.,"

‚ÄÇ

Bistable biochemical switches are key motifs in cellular state decisions and long-term storage of cellular 'memory'. There are a few known biological switches that have been well characterized, however these examples are insufficient for systematic surveys of properties of these important systems. Here we present a resource of all possible bistable biochemical reaction networks with up to 6 reactions between 3 molecules, and 3 reactions between 4 molecules. Over 35,000 reaction topologies were constructed by identifying unique combinations of reactions between a fixed number of molecules. Then, these topologies were populated with rates within a biologically realistic range. The Searchable Web Interface for Topologies of CHEmical Switches (SWITCHES, https://switches.ncbs.res.in) provides a bistability and parameter analysis of over 7 million models from this systematic survey of chemical reaction space. This database will be useful for theoreticians interested in analyzing stability in chemical systems and also experimentalists for creating robust synthetic biological switches.

Availability and implementation

Freely available on the web at https://switches.ncbs.res.in. Website implemented in PHP, MariaDB, Graphviz, and Apache, with all major browsers supported.","hji,kes",1,1,2,2,1,NA,NA +33461215,Ligand-based approach for predicting drug targets and for virtual screening against COVID-19.,"Discovering efficient drugs and identifying target proteins are still an unmet but urgent need for curing coronavirus disease 2019 (COVID-19). Protein structure-based docking is a widely applied approach for discovering active compounds against drug targets and for predicting potential targets of active compounds. However, this approach has its inherent deficiency caused by e.g. various different conformations with largely varied binding pockets adopted by proteins, or the lack of true target proteins in the database. This deficiency may result in false negative results. As a complementary approach to the protein structure-based platform for COVID-19, termed as D3Docking in our previous work, we developed in this study a ligand-based method, named D3Similarity, which is based on the molecular similarity evaluation between the submitted molecule(s) and those in an active compound database. The database is constituted by all the reported bioactive molecules against the coronaviruses, viz., severe acute respiratory syndrome coronavirus (SARS), Middle East respiratory syndrome coronavirus (MERS), severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2), human betacoronavirus 2c EMC/2012 (HCoV-EMC), human CoV 229E (HCoV-229E) and feline infectious peritonitis virus (FIPV), some of which have target or mechanism information but some do not. Based on the two-dimensional (2D) and three-dimensional (3D) similarity evaluation of molecular structures, virtual screening and target prediction could be performed according to similarity ranking results. With two examples, we demonstrated the reliability and efficiency of D3Similarity by using 2D‚Äâ√ó‚Äâ3D value as score for drug discovery and target prediction against COVID-19. The database, which will be updated regularly, is available free of charge at https://www.d3pharma.com/D3Targets-2019-nCoV/D3Similarity/index.php.","hji,kes",1,1,2,2,1,NA,NA +33480398,DrugRepV: a compendium of repurposed drugs and chemicals targeting epidemic and pandemic viruses.,"Viruses are responsible for causing various epidemics and pandemics with a high mortality rate e.g. ongoing SARS-CoronaVirus-2 crisis. The discovery of novel antivirals remains a challenge but drug repurposing is emerging as a potential solution to develop antivirals in a cost-effective manner. In this regard, we collated the information of repurposed drugs tested for antiviral activity from literature and presented it in the form of a user-friendly web server named 'DrugRepV'. The database contains 8485 entries (3448 unique) with biological, chemical, clinical and structural information of 23 viruses responsible to cause epidemics/pandemics. The database harbors browse and search options to explore the repurposed drug entries. The data can be explored by some important fields like drugs, viruses, drug targets, clinical trials, assays, etc. For summarizing the data, we provide overall statistics of the repurposed candidates. To make the database more informative, it is hyperlinked to various external repositories like DrugBank, PubChem, NCBI-Taxonomy, Clinicaltrials.gov, World Health Organization and many more. 'DrugRepV' database (https://bioinfo.imtech.res.in/manojk/drugrepv/) would be highly useful to the research community working to develop antivirals.","hji,kes",1,1,2,2,1,NA,NA +33485793,cgMLST@Taiwan: A web service platform for Vibrio cholerae cgMLST profiling and global strain tracking.,"

Background

Cholera, a rapidly dehydrating diarrheal disease caused by toxigenic Vibrio cholerae, is a leading cause of morbidity and mortality in some regions of the world. Core genome multilocus sequence typing (cgMLST) is a promising approach in generating genetic fingerprints from whole-genome sequencing (WGS) data for strain comparison among laboratories.

Methods

We constructed a V. cholerae core gene allele database using an in-house developed computational pipeline, a database with cgMLST profiles converted from genomic sequences from the National Center for Biotechnology Information, and built a REST-based web accessible via the Internet.

Results

We built a web service platform-cgMLST@Taiwan and installed a V. cholerae allele database, a cgMLST profile database, and computational tools for generating V. cholerae cgMLST profiles (based on 3,017 core genes), performing rapid global strain tracking, and clustering analysis of cgMLST profiles. This web-based platform provides services to researchers, public health microbiologists, and physicians who use WGS data for the investigation of cholera outbreaks and tracking of V. cholerae strain transmission across countries and geographic regions. The cgMLST@Taiwan is accessible at http://rdvd.cdc.gov.tw/cgMLST.","hji,kes",1,1,2,2,1,NA,NA +33502607,Missense3D-DB web catalogue: an atom-based analysis and repository of 4M human protein-coding genetic variants.,"The interpretation of human genetic variation is one of the greatest challenges of modern genetics. New approaches are urgently needed to prioritize variants, especially those that are rare or lack a definitive clinical interpretation. We examined 10,136,597 human missense genetic variants from GnomAD, ClinVar and UniProt. We were able to perform large-scale atom-based mapping and phenotype interpretation of 3,960,015 of these variants onto 18,874 experimental and 84,818 in house predicted three-dimensional coordinates of the human proteome. We demonstrate that 14% of amino acid substitutions from the GnomAD database that could be structurally analysed are predicted to affect protein structure (n‚Äâ=‚Äâ568,548, of which 566,439 rare or extremely rare) and may, therefore, have a yet unknown disease-causing effect. The same is true for 19.0% (n‚Äâ=‚Äâ6266) of variants of unknown clinical significance or conflicting interpretation reported in the ClinVar database. The results of the structural analysis are available in the dedicated web catalogue Missense3D-DB ( http://missense3d.bc.ic.ac.uk/ ). For each of the 4 M variants, the results of the structural analysis are presented in a friendly concise format that can be included in clinical genetic reports. A detailed report of the structural analysis is also available for the non-experts in structural biology. Population frequency and predictions from SIFT and PolyPhen are included for a more comprehensive variant interpretation. This is the first large-scale atom-based structural interpretation of human genetic variation and offers geneticists and the biomedical community a new approach to genetic variant interpretation.","hji,kes",1,1,2,2,1,NA,NA +33507270,InSexBase: an annotated genomic resource of sex chromosomes and sex-biased genes in insects.,"Sex determination and the regulation of sexual dimorphism are among the most fascinating topics in modern biology. As the most species-rich group of sexually reproducing organisms on Earth, insects have multiple sex determination systems. Though sex chromosomes and sex-biased genes are well-studied in dozens of insects, their gene sequences are scattered in various databases. Moreover, a shortage of annotation hinders the deep mining of these data. Here, we collected the chromosome-level sex chromosome data of 49 insect species, including 34 X chromosomes, 15 Z chromosomes, 5 W chromosomes and 2 Y chromosomes. We also obtained Y-linked contigs of four insects species-Anopheles gambiae, Drosophila innubila, Drosophila yakuba and Tribolium castaneum. The unannotated chromosome-level sex chromosomes were annotated using a standard pipeline, yielding a total of 123‚Äâ030 protein-coding genes, 2‚Äâ159‚Äâ427 repeat sequences, 894 miRNAs, 1574 rRNAs, 5105 tRNAs, 395 snoRNAs (small nucleolar RNA), 54 snRNAs (small nuclear RNA) and 5959 other ncRNAs (non-coding RNA). In addition, 36‚Äâ781 sex-biased genes were identified by analyzing 62 RNA-seq (RNA sequencing) datasets. Together with 5707 sex-biased genes from the Drosophila genus collected from the Sex-Associated Gene Database, we obtained a total of 42‚Äâ488 sex-biased genes from 13 insect species. All these data were deposited into InSexBase, a new user-friendly database of insect sex chromosomes and sex-biased genes. Database URL: http://www.insect-genome.com/Sexdb/.","hji,kes",1,1,2,2,1,NA,NA +33511767,FAWMine: An integrated database and analysis platform for fall armyworm genomics.,"Fall armyworm (Spodoptera frugiperda), a native insect species in the Americas, is rapidly becoming a major agricultural pest worldwide and is causing great damage to corn, rice, soybeans, and other crops. To control this pest, scientists have accumulated a great deal of high-throughput data of fall armyworm, and nine versions of its genomes and transcriptomes have been published. However, easily accessing and performing integrated analysis of these omics data sets is challenging. Here, we developed the Fall Armyworm Genome Database (FAWMine, http://159.226.67.243:8080/fawmine/) to maintain genome sequences, structural and functional annotations, transcriptomes, co-expression, protein interactions, homologs, pathways, and single-nucleotide variations. FAWMine provides a powerful framework that helps users to perform flexible and customized searching, present integrated data sets using diverse visualization methods, output results tables in a range of file formats, analyze candidate gene lists using multiple widgets, and query data available in other InterMine systems. Additionally, stand-alone JBrowse and BLAST services are also established, allowing the users to visualize RNA-Seq data and search genome and annotated gene sequences. Altogether, FAWMine is a useful tool for querying, visualizing, and analyzing compiled data sets rapidly and efficiently. FAWMine will be continually updated to function as a community resource for fall armyworm genomics and pest control research.","hji,kes",1,1,2,2,1,NA,NA +33514395,My personal mutanome: a computational genomic medicine platform for searching network perturbing alleles linking genotype to phenotype.,"Massive genome sequencing data have inspired new challenges in personalized treatments and facilitated oncological drug discovery. We present a comprehensive database, My Personal Mutanome (MPM), for accelerating the development of precision cancer medicine protocols. MPM contains 490,245 mutations from over 10,800 tumor exomes across 33 cancer types in The Cancer Genome Atlas mapped to 94,563 structure-resolved/predicted protein-protein interaction interfaces (""edgetic"") and 311,022 functional sites (""nodetic""), including ligand-protein binding sites and 8 types of protein posttranslational modifications. In total, 8884 survival results and 1,271,132 drug responses are obtained for these mapped interactions. MPM is available at https://mutanome.lerner.ccf.org .","hji,kes",1,1,2,2,1,NA,NA +33515030,HVIDB: a comprehensive database for human-virus protein-protein interactions.,"While leading to millions of people's deaths every year the treatment of viral infectious diseases remains a huge public health challenge.Therefore, an in-depth understanding of human-virus protein-protein interactions (PPIs) as the molecular interface between a virus and its host cell is of paramount importance to obtain new insights into the pathogenesis of viral infections and development of antiviral therapeutic treatments. However, current human-virus PPI database resources are incomplete, lack annotation and usually do not provide the opportunity to computationally predict human-virus PPIs. Here, we present the Human-Virus Interaction DataBase (HVIDB, http://zzdlab.com/hvidb/) that provides comprehensively annotated human-virus PPI data as well as seamlessly integrates online PPI prediction tools. Currently, HVIDB highlights 48 643 experimentally verified human-virus PPIs covering 35 virus families, 6633 virally targeted host complexes, 3572 host dependency/restriction factors as well as 911 experimentally verified/predicted 3D complex structures of human-virus PPIs. Furthermore, our database resource provides tissue-specific expression profiles of 6790 human genes that are targeted by viruses and 129 Gene Expression Omnibus series of differentially expressed genes post-viral infections. Based on these multifaceted and annotated data, our database allows the users to easily obtain reliable information about PPIs of various human viruses and conduct an in-depth analysis of their inherent biological significance. In particular, HVIDB also integrates well-performing machine learning models to predict interactions between the human host and viral proteins that are based on (i) sequence embedding techniques, (ii) interolog mapping and (iii) domain-domain interaction inference. We anticipate that HVIDB will serve as a one-stop knowledge base to further guide hypothesis-driven experimental efforts to investigate human-virus relationships.","hji,kes",1,1,2,2,1,NA,NA +33539887,ADDRESS: A Database of Disease-associated Human Variants Incorporating Protein Structure and Folding Stabilities.,"Numerous human diseases are caused by mutations in genomic sequences. Since amino acid changes affect protein function through mechanisms often predictable from protein structure, the integration of structural and sequence data enables us to estimate with greater accuracy whether and how a given mutation will lead to disease. Publicly available annotated databases enable hypothesis assessment and benchmarking of prediction tools. However, the results are often presented as summary statistics or black box predictors, without providing full descriptive information. We developed a new semi-manually curated human variant database presenting information on the protein contact-map, sequence-to-structure mapping, amino acid identity change, and stability prediction for the popular UniProt database. We found that the profiles of pathogenic and benign missense polymorphisms can be effectively deduced using decision trees and comparative analyses based on the presented dataset. The database is made publicly available through https://zhanglab.ccmb.med.umich.edu/ADDRESS.","hji,kes",1,1,2,2,1,NA,NA +33546584,Plant Co-expression Annotation Resource: a web server for identifying targets for genetically modified crop breeding pipelines.,"The development of genetically modified crops (GM) includes the discovery of candidate genes through bioinformatics analysis using genomics data, gene expression, and others. Proteins of unknown function (PUFs) are interesting targets for GM crops breeding pipelines for the novelty associated with such targets and also to avoid copyright protection. One method of inferring the putative function of PUFs is by relating them to factors of interest such as abiotic stresses using orthology and co-expression networks, in a guilt-by-association manner. In this regard, we have downloaded, analyzed, and processed genomics data of 53 angiosperms, totaling 1,862,010 genes and 2,332,974 RNA. Diamond and InterproScan were used to discover 72,266 PUFs for all organisms. RNA-seq datasets related to abiotic stresses were downloaded from NCBI/GEO. The RNA-seq data was used as input to the LSTrAP software to construct co-expression networks. LSTrAP also created clusters of transcripts with correlated expression, whose members are more probably related to the molecular mechanisms associated with abiotic stresses in the plants. Orthologous groups were created (OrhtoMCL) using all 2,332,974 proteins in order to associate PUFs to abiotic stress-related clusters of co-expression and therefore infer their function in a guilt-by-association manner. A freely available web resource named ""Plant Co-expression Annotation Resource"" ( https://www.machado.cnptia.embrapa.br/plantannot ), Plantannot, was created to provide indexed queries to search for PUF putatively associated with abiotic stresses. The web interface also allows browsing, querying, and retrieving of public genomics data from 53 plants. We hope Plantannot to be useful for researchers trying to obtain novel GM crops resistant to climate change hazards.","hji,kes",1,1,2,2,1,NA,value add +33547946,Web resource on available DNA variant tests for hereditary diseases and genetic predispositions in dogs and cats: An Update.,"Vast progress has been made in the clinical diagnosis and molecular basis of hereditary diseases and genetic predisposition in companion animals. The purpose of this report is to provide an update on the availability of DNA testing for hereditary diseases and genetic predispositions in dogs and cats utilizing the WSAVA-PennGen DNA Testing Database web resource (URL: http://research.vet.upenn.edu/WSAVA-LabSearch ). Information on hereditary diseases, DNA tests, genetic testing laboratories and afflicted breeds added to the web-based WSAVA-PennGen DNA Testing Database was gathered. Following verification through original research and clinical studies, searching various databases on hereditary diseases in dogs and cats, and contacting laboratories offering DNA tests, the data were compared to the resource reported on in 2013. The number of molecularly defined Mendelian inherited diseases and variants in companion animals listed in the WSAVA-PennGen DNA Testing Database in 2020 drastically increased by 112% and 141%, respectively. The number of DNA variant tests offered by each laboratory has also doubled for dogs and cats. While the overall number of laboratories has only slightly increased from 43 to 47, the number of larger corporate laboratories increased, while academic laboratories have declined. In addition, there are now several laboratories that are offering breed-specific or all-breed panel tests rather than single-DNA tests for dogs and cats. This unique regularly updated searchable web-based database allows veterinary clinicians, breeders and pet owners to readily find available DNA tests, laboratories performing these DNA tests worldwide, and canine and feline breeds afflicted and also serves as a valuable resource for comparative geneticists.","hji,kes",1,1,2,2,1,NA,NA +33553941,SARSCOVIDB-A New Platform for the Analysis of the Molecular Impact of SARS-CoV-2 Viral Infection.,"The COVID-19 pandemic caused by the new coronavirus (SARS-CoV-2) has become a global emergency issue for public health. This threat has led to an acceleration in related research and, consequently, an unprecedented volume of clinical and experimental data that include changes in gene expression resulting from infection. The SARS-CoV-2 infection database (SARSCOVIDB: https://sarscovidb.org/) was created to mitigate the difficulties related to this scenario. The SARSCOVIDB is an online platform that aims to integrate all differential gene expression data, at messenger RNA and protein levels, helping to speed up analysis and research on the molecular impact of COVID-19. The database can be searched from different experimental perspectives and presents all related information from published data, such as viral strains, hosts, methodological approaches (proteomics or transcriptomics), genes/proteins, and samples (clinical or experimental). All information was taken from 24 articles related to analyses of differential gene expression out of 5,554 COVID-19/SARS-CoV-2-related articles published so far. The database features 12,535 genes whose expression has been identified as altered due to SARS-CoV-2 infection. Thus, the SARSCOVIDB is a new resource to support the health workers and the scientific community in understanding the pathogenesis and molecular impact caused by SARS-CoV-2.","hji,kes",1,1,2,2,1,NA,Chuck Check - yes +33581334,OGP: A Repository of Experimentally Characterized O-Glycoproteins to Facilitate Studies on O-Glycosylation.,"Numerous studies on cancer, biopharmaceuticals, and clinical trials have necessitated comprehensive and precise analysis of protein O-glycosylation. However, the lack of updated and convenient databases deters the storage of and reference to emerging O-glycoprotein data. To resolve this issue, an O-glycoprotein repository named OGP was established in this work. It was constructed with a collection of O-glycoprotein data from different sources. OGP contains 9354 O-glycosylation sites and 11,633 site-specific O-glycans mapping to 2133 O-glycoproteins, and it is the largest O-glycoprotein repository thus far. Based on the recorded O-glycosylation sites, an O-glycosylation site prediction tool was developed. Moreover, an OGP-based website is already available (http://www.oglyp.org/). The website comprises four specially designed and user-friendly modules: statistical analysis, database search, site prediction, and data submission. The first version of OGP repository and the website allow users to obtain various O-glycoprotein-related information, such as protein accession numbers, O-glycosylation sites, glycopeptide sequences, site-specific glycan structures, experimental methods, and potential O-glycosylation sites. O-glycosylation data mining can be performed efficiently on this website, which will greatly facilitate related studies. In addition, the database is accessible from OGP website (http://www.oglyp.org/download.php).","hji,kes",1,1,2,2,1,NA,NA +33594411,"Viral Host Range database, an online tool for recording, analyzing and disseminating virus-host interactions.","

Motivation

Viruses are ubiquitous in the living world, and their ability to infect more than one host defines their host range. However, information about which virus infects which host, and about which host is infected by which virus, is not readily available.

Results

We developed a web-based tool called the Viral Host Range database to record, analyze and disseminate experimental host range data for viruses infecting archaea, bacteria and eukaryotes.

Availability

The ViralHostRangeDB application is available from https://viralhostrangedb.pasteur.cloud. Its source code is freely available from the Gitlab hub of Institut Pasteur (https://gitlab.pasteur.fr/hub/viralhostrangedb).","hji,kes",1,1,2,2,1,NA,NA +33599248,bc-GenExMiner 4.5: new mining module computes breast cancer differential gene expression analyses.,"'Breast cancer gene-expression miner' (bc-GenExMiner) is a breast cancer-associated web portal (http://bcgenex.ico.unicancer.fr). Here, we describe the development of a new statistical mining module, which permits several differential gene expression analyses, i.e. 'Expression' module. Sixty-two breast cancer cohorts and one healthy breast cohort with their corresponding clinicopathological information are included in bc-GenExMiner v4.5 version. Analyses are based on microarray or RNAseq transcriptomic data. Thirty-nine differential gene expression analyses, grouped into 13 categories, according to clinicopathological and molecular characteristics ('Targeted' and 'Exhaustive') and gene expression ('Customized'), have been developed. Output results are visualized in four forms of plots. This new statistical mining module offers, among other things, the possibility to compare gene expression in healthy (cancer-free), tumour-adjacent and tumour tissues at once and in three triple-negative breast cancer subtypes (i.e. C1: molecular apocrine tumours; C2: basal-like tumours infiltrated by immune suppressive cells and C3: basal-like tumours triggering an ineffective immune response). Several validation tests showed that bioinformatics process did not alter the pathobiological information contained in the source data. In this work, we developed and demonstrated that bc-GenExMiner 'Expression' module can be used for exploratory and validation purposes. Database URL: http://bcgenex.ico.unicancer.fr.","hji,kes",1,1,2,2,1,NA,NA +33600011,MutSpliceDB: A database of splice sites variants with RNA-seq based evidence on effects on splicing.,"Splice site variants may lead to transcript alterations, causing exons inclusion, exclusion, truncation, or intron retention. Interpreting the consequences of a specific splice site variant is not straightforward, especially if the variant is located outside of the canonical splice sites. We developed MutSpliceDB: https://brb.nci.nih.gov/splicing, a public resource to facilitate the interpretation of splice sites variants effects on splicing based on manually reviewed RNA-seq BAM files from samples with splice site variants.","hji,kes",1,1,2,2,1,NA,minimal but can view full dataset et +33618727,A clinical trials corpus annotated with UMLS entities to enhance the access to evidence-based medicine.,"

Background

The large volume of medical literature makes it difficult for healthcare professionals to keep abreast of the latest studies that support Evidence-Based Medicine. Natural language processing enhances the access to relevant information, and gold standard corpora are required to improve systems. To contribute with a new dataset for this domain, we collected the Clinical Trials for Evidence-Based Medicine in Spanish (CT-EBM-SP) corpus.

Methods

We annotated 1200 texts about clinical trials with entities from the Unified Medical Language System semantic groups: anatomy (ANAT), pharmacological and chemical substances (CHEM), pathologies (DISO), and lab tests, diagnostic or therapeutic procedures (PROC). We doubly annotated 10% of the corpus and measured inter-annotator agreement (IAA) using F-measure. As use case, we run medical entity recognition experiments with neural network models.

Results

This resource contains 500 abstracts of journal articles about clinical trials and 700 announcements of trial protocols (292 173 tokens). We annotated 46 699 entities (13.98% are nested entities). Regarding IAA agreement, we obtained an average F-measure of 85.65% (±4.79, strict match) and 93.94% (±3.31, relaxed match). In the use case experiments, we achieved recognition results ranging from 80.28% (±00.99) to 86.74% (±00.19) of average F-measure.

Conclusions

Our results show that this resource is adequate for experiments with state-of-the-art approaches to biomedical named entity recognition. It is freely distributed at: http://www.lllf.uam.es/ESP/nlpmedterm_en.html . The methods are generalizable to other languages with similar available sources.","hji,kes",0,1,1,2,0.5,no notes; reassessed and I will stick with no. Text data is questionable life-sci. But even the underlying data are clinical.,"questionable - not really a database, but data is available and downloadable; reassessed and still yes, provides annotated corpus (text data)" +33643383,Abiotic Stress-Responsive miRNA and Transcription Factor-Mediated Gene Regulatory Network in Oryza sativa: Construction and Structural Measure Study.,"Climate changes and environmental stresses have a consequential association with crop plant growth and yield, meaning it is necessary to cultivate crops that have tolerance toward the changing climate and environmental disturbances such as water stress, temperature fluctuation, and salt toxicity. Recent studies have shown that trans-acting regulatory elements, including microRNAs (miRNAs) and transcription factors (TFs), are emerging as promising tools for engineering naive improved crop varieties with tolerance for multiple environmental stresses and enhanced quality as well as yield. However, the interwoven complex regulatory function of TFs and miRNAs at transcriptional and post-transcriptional levels is unexplored in Oryza sativa. To this end, we have constructed a multiple abiotic stress responsive TF-miRNA-gene regulatory network for O. sativa using a transcriptome and degradome sequencing data meta-analysis approach. The theoretical network approach has shown the networks to be dense, scale-free, and small-world, which makes the network stable. They are also invariant to scale change where an efficient, quick transmission of biological signals occurs within the network on extrinsic hindrance. The analysis also deciphered the existence of communities (cluster of TF, miRNA, and genes) working together to help plants in acclimatizing to multiple stresses. It highlighted that genes, TFs, and miRNAs shared by multiple stress conditions that work as hubs or bottlenecks for signal propagation, for example, during the interaction between stress-responsive genes (TFs/miRNAs/other genes) and genes involved in floral development pathways under multiple environmental stresses. This study further highlights how the fine-tuning feedback mechanism works for balancing stress tolerance and how timely flowering enable crops to survive in adverse conditions. This study developed the abiotic stress-responsive regulatory network, APRegNet database (http://lms.snu.edu.in/APRegNet), which may help researchers studying the roles of miRNAs and TFs. Furthermore, it advances current understanding of multiple abiotic stress tolerance mechanisms.","hji,kes",1,1,2,2,1,NA,NA +33655207,TMSNP: a web server to predict pathogenesis of missense mutations in the transmembrane region of membrane proteins.,"The massive amount of data generated from genome sequencing brings tons of newly identified mutations, whose pathogenic/non-pathogenic effects need to be evaluated. This has given rise to several mutation predictor tools that, in general, do not consider the specificities of the various protein groups. We aimed to develop a predictor tool dedicated to membrane proteins, under the premise that their specific structural features and environment would give different responses to mutations compared to globular proteins. For this purpose, we created TMSNP, a database that currently contains information from 2624 pathogenic and 196 705 non-pathogenic reported mutations located in the transmembrane region of membrane proteins. By computing various conservation parameters on these mutations in combination with annotations, we trained a machine-learning model able to classify mutations as pathogenic or not. TMSNP (freely available at http://lmc.uab.es/tmsnp/) improves considerably the prediction power of commonly used mutation predictors trained with globular proteins.","hji,kes",1,1,2,2,1,"software; reassess, resulted in database",no notes; reassessed and still yes - includes a data resource as the outcome of a program +33683565,"Health and longevity studies in C. elegans: the ""healthy worm database"" reveals strengths, weaknesses and gaps of test compound-based studies.","Several biogerontology databases exist that focus on genetic or gene expression data linked to health as well as survival, subsequent to compound treatments or genetic manipulations in animal models. However, none of these has yet collected experimental results of compound-related health changes. Since quality of life is often regarded as more valuable than length of life, we aim to fill this gap with the ""Healthy Worm Database"" ( http://healthy-worm-database.eu ). Literature describing health-related compound studies in the aging model Caenorhabditis elegans was screened, and data for 440 compounds collected. The database considers 189 publications describing 89 different phenotypes measured in 2995 different conditions. Besides enabling a targeted search for promising compounds for further investigations, this database also offers insights into the research field of studies on healthy aging based on a frequently used model organism. Some weaknesses of C. elegans-based aging studies, like underrepresented phenotypes, especially concerning cognitive functions, as well as the convenience-based use of young worms as the starting point for compound treatment or phenotype measurement are discussed. In conclusion, the database provides an anchor for the search for compounds affecting health, with a link to public databases, and it further highlights some potential shortcomings in current aging research.","hji,kes",1,1,2,2,1,NA,NA +33685383,lncRNADetector: a bioinformatics pipeline for long non-coding RNA identification and MAPslnc: a repository of medicinal and aromatic plant lncRNAs.,"Long non-coding RNAs (lncRNAs) are an emerging class of non-coding RNAs and potent regulatory elements in the living cells. High throughput RNA sequencing analyses have generated a tremendous amount of transcript sequence data. A large proportion of these transcript sequences does not code for proteins and are known as non-coding RNAs. Among them, lncRNAs are a unique class of transcripts longer than 200 nucleotides with diverse biological functions and regulatory mechanisms. Recent emerging studies and next-generation sequencing technologies show a substantial amount of lncRNAs within the plant genome, which are yet to be identified. The computational identification of lncRNAs from these transcripts is a challenging task due to the involvement of a series of filtering steps. We have developed lncRNADetector, a bioinformatics pipeline for the identification of novel lncRNAs, especially from medicinal and aromatic plant (MAP) species. The lncRNADetector has been utilized to analyse and identify more than 88,459 lncRNAs from 21 species of MAPs. To provide a knowledge resource for the plant research community towards elucidating the diversity of biological roles of lncRNAs, the information generated about MAP lncRNAs (post-filtering steps) through lncRNADetector has been stored and organized in MAPslnc database (MAPslnc, https://lncrnapipe.cimap.res.in). The lncRNADetector web server and MAPslnc database have been developed in order to facilitate researchers for accurate identification of lncRNAs from the next-generation sequencing data of different organisms for downstream studies. To the best of our knowledge no such MAPslnc database is available till date.","hji,kes",1,1,2,2,1,NA,NA +33685493,riboCIRC: a comprehensive database of translatable circRNAs.,"riboCIRC is a translatome data-oriented circRNA database specifically designed for hosting, exploring, analyzing, and visualizing translatable circRNAs from multi-species. The database provides a comprehensive repository of computationally predicted ribosome-associated circRNAs; a manually curated collection of experimentally verified translated circRNAs; an evaluation of cross-species conservation of translatable circRNAs; a systematic de novo annotation of putative circRNA-encoded peptides, including sequence, structure, and function; and a genome browser to visualize the context-specific occupant footprints of circRNAs. It represents a valuable resource for the circRNA research community and is publicly available at http://www.ribocirc.com .","hji,kes",1,1,2,2,1,NA,NA +33705530,Development of a biomarker database toward performing disease classification and finding disease interrelations.,"A biomarker is a measurable indicator of a disease or abnormal state of a body that plays an important role in disease diagnosis, prognosis and treatment. The biomarker has become a significant topic due to its versatile usage in the medical field and in rapid detection of the presence or severity of some diseases. The volume of biomarker data is rapidly increasing and the identified data are scattered. To provide comprehensive information, the explosively growing data need to be recorded in a single platform. There is no open-source freely available comprehensive online biomarker database. To fulfill this purpose, we have developed a human biomarker database as part of the KNApSAcK family databases which contain a vast quantity of information on the relationships between biomarkers and diseases. We have classified the diseases into 18 disease classes, mostly according to the National Center for Biotechnology Information definitions. Apart from this database development, we also have performed disease classification by separately using protein and metabolite biomarkers based on the network clustering algorithm DPClusO and hierarchical clustering. Finally, we reached a conclusion about the relationships among the disease classes. The human biomarker database can be accessed online and the inter-disease relationships may be helpful in understanding the molecular mechanisms of diseases. To our knowledge, this is one of the first approaches to classify diseases based on biomarkers. Database URL: http://www.knapsackfamily.com/Biomarker/top.php.","hji,kes",1,1,2,2,1,NA,NA +33735949,OverCOVID: an integrative web portal for SARS-CoV-2 bioinformatics resources.,"Outbreaks of COVID-19 caused by the novel coronavirus SARS-CoV-2 is still a threat to global human health. In order to understand the biology of SARS-CoV-2 and developing drug against COVID-19, a vast amount of genomic, proteomic, interatomic, and clinical data is being generated, and the bioinformatics researchers produced databases, webservers and tools to gather those publicly available data and provide an opportunity of analyzing such data. However, these bioinformatics resources are scattered and researchers need to find them from different resources discretely. To facilitate researchers in finding the resources in one frame, we have developed an integrated web portal called OverCOVID (http://bis.zju.edu.cn/overcovid/). The publicly available webservers, databases and tools associated with SARS-CoV-2 have been incorporated in the resource page. In addition, a network view of the resources is provided to display the scope of the research. Other information like SARS-CoV-2 strains is visualized and various layers of interaction resources is listed in distinct pages of the web portal. As an integrative web portal, the OverCOVID will help the scientist to search the resources and accelerate the clinical research of SARS-CoV-2.","hji,kes",1,1,2,2,1,NA,tricky - portal of resources a la knowledgembase +33749993,Integrated intra- and intercellular signaling knowledge for multicellular omics analysis.,"Molecular knowledge of biological processes is a cornerstone in omics data analysis. Applied to single-cell data, such analyses provide mechanistic insights into individual cells and their interactions. However, knowledge of intercellular communication is scarce, scattered across resources, and not linked to intracellular processes. To address this gap, we combined over 100 resources covering interactions and roles of proteins in inter- and intracellular signaling, as well as transcriptional and post-transcriptional regulation. We added protein complex information and annotations on function, localization, and role in diseases for each protein. The resource is available for human, and via homology translation for mouse and rat. The data are accessible via OmniPath's web service (https://omnipathdb.org/), a Cytoscape plug-in, and packages in R/Bioconductor and Python, providing access options for computational and experimental scientists. We created workflows with tutorials to facilitate the analysis of cell-cell interactions and affected downstream intracellular signaling processes. OmniPath provides a single access point to knowledge spanning intra- and intercellular processes for data analysis, as we demonstrate in applications studying SARS-CoV-2 infection and ulcerative colitis.","hji,kes",1,1,2,2,1,NA,NA +33757430,ATAV: a comprehensive platform for population-scale genomic analyses.,"

Background

A common approach for sequencing studies is to do joint-calling and store variants of all samples in a single file. If new samples are continually added or controls are re-used for several studies, the cost and time required to perform joint-calling for each analysis can become prohibitive.

Results

We present ATAV, an analysis platform for large-scale whole-exome and whole-genome sequencing projects. ATAV stores variant and per site coverage data for all samples in a centralized database, which is efficiently queried by ATAV to support diagnostic analyses for trios and singletons, as well as rare-variant collapsing analyses for finding disease associations in complex diseases. Runtime logs ensure full reproducibility and the modularized ATAV framework makes it extensible to continuous development. Besides helping with the identification of disease-causing variants for a range of diseases, ATAV has also enabled the discovery of disease-genes by rare-variant collapsing on datasets containing more than 20,000 samples. Analyses to date have been performed on data of more than 110,000 individuals demonstrating the scalability of the framework. To allow users to easily access variant-level data directly from the database, we provide a web-based interface, the ATAV data browser ( http://atavdb.org/ ). Through this browser, summary-level data for more than 40,000 samples can be queried by the general public representing a mix of cases and controls of diverse ancestries. Users have access to phenotype categories of variant carriers, as well as predicted ancestry, gender, and quality metrics. In contrast to many other platforms, the data browser is able to show data of newly-added samples in real-time and therefore evolves rapidly as more and more samples are sequenced.

Conclusions

Through ATAV, users have public access to one of the largest variant databases for patients sequenced at a tertiary care center and can look up any genes or variants of interest. Additionally, since the entire code is freely available on GitHub, ATAV can easily be deployed by other groups that wish to build their own platform, database, and user interface.","hji,kes",1,1,2,2,1,NA,NA +33769951,A Comprehensive Map of mRNAs and Their Isoforms across All 14 Renal Tubule Segments of Mouse.,"

Background

The repertoire of protein expression along the renal tubule depends both on regulation of transcription and regulation of alternative splicing that can generate multiple proteins from a single gene.

Methods

A full-length, small-sample RNA-seq protocol profiled transcriptomes for all 14 renal tubule segments microdissected from mouse kidneys.

Results

This study identified >34,000 transcripts, including 3709 that were expressed in a segment-specific manner. All data are provided as an online resource (https://esbl.nhlbi.nih.gov/MRECA/Nephron/). Many of the genes expressed in unique patterns along the renal tubule were solute carriers, transcription factors, or G protein-coupled receptors that account for segment-specific function. Mapping the distribution of transcripts associated with Wnk-SPAK-PKA signaling, renin-angiotensin-aldosterone signaling, and cystic diseases of the kidney illustrated the applications of the online resource. The method allowed full-length mapping of RNA-seq reads, which facilitated comprehensive, unbiased characterization of alternative exon usage along the renal tubule, including known isoforms of Cldn10, Kcnj1 (ROMK), Slc12a1 (NKCC2), Wnk1, Stk39 (SPAK), and Slc14a2 (UT-A urea transporter). It also identified many novel isoforms with segment-specific distribution. These included variants associated with altered protein structure (Slc9a8, Khk, Tsc22d1, and Scoc), and variants that may affect untranslated, regulatory regions of transcripts (Pth1r, Pkar1a, and Dab2).

Conclusions

Full-length, unbiased sequencing of transcripts identified gene-expression patterns along the mouse renal tubule. The data, provided as an online resource, include both quantitative and qualitative differences in transcripts. Identification of alternative splicing along the renal tubule may prove critical to understanding renal physiology and pathophysiology.","hji,kes",1,1,2,2,1,NA,NA +33772585,An immunologically friendly classification of non-peptidic ligands.,"The Immune Epitope Database (IEDB) freely provides experimental data regarding immune epitopes to the scientific public. The main users of the IEDB are immunologists who can easily use our web interface to search for peptidic epitopes via their simple single-letter codes. For example, 'A' stands for 'alanine'. Similarly, users can easily navigate the IEDB's simplified NCBI taxonomy hierarchy to locate proteins from specific organisms. However, some epitopes are non-peptidic, such as carbohydrates, lipids, chemicals and drugs, and it is more challenging to consistently name them and search upon, making access to their data more problematic for immunologists. Therefore, we set out to improve access to non-peptidic epitope data in the IEDB through the simplification of the non-peptidic hierarchy used in our search interfaces. Here, we present these efforts and their outcomes. Database URL: http://www.iedb.org/.","hji,kes",1,1,2,2,1,NA,NA +33776770,An Open Access Database of Licensed Cancer Drugs.,"A global, comprehensive and open access listing of approved anticancer drugs does not currently exist. Partial information is available from multiple sources, including regulatory authorities, national formularies and scientific agencies. Many such data sources include drugs used in oncology for supportive care, diagnostic or other non-antineoplastic uses. We describe a methodology to combine and cleanse relevant data from multiple sources to produce an open access database of drugs licensed specifically for therapeutic antineoplastic purposes. The resulting list is provided as an open access database, (http://www.redo-project.org/cancer-drugs-db/), so that it may be used by researchers as input for further research projects, for example literature-based text mining for drug repurposing.","hji,kes",1,1,2,2,1,NA,NA +33780471,MCPdb: The bacterial microcompartment database.,"Bacterial microcompartments are organelle-like structures composed entirely of proteins. They have evolved to carry out several distinct and specialized metabolic functions in a wide variety of bacteria. Their outer shell is constructed from thousands of tessellating protein subunits, encapsulating enzymes that carry out the internal metabolic reactions. The shell proteins are varied, with single, tandem and permuted versions of the PF00936 protein family domain comprising the primary structural component of their polyhedral architecture, which is reminiscent of a viral capsid. While considerable amounts of structural and biophysical data have been generated in the last 15 years, the existing functionalities of current resources have limited our ability to rapidly understand the functional and structural properties of microcompartments (MCPs) and their diversity. In order to make the remarkable structural features of bacterial microcompartments accessible to a broad community of scientists and non-specialists, we developed MCPdb: The Bacterial Microcompartment Database (https://mcpdb.mbi.ucla.edu/). MCPdb is a comprehensive resource that categorizes and organizes known microcompartment protein structures and their larger assemblies. To emphasize the critical roles symmetric assembly and architecture play in microcompartment function, each structure in the MCPdb is validated and annotated with respect to: (1) its predicted natural assembly state (2) tertiary structure and topology and (3) the metabolic compartment type from which it derives. The current database includes 163 structures and is available to the public with the anticipation that it will serve as a growing resource for scientists interested in understanding protein-based metabolic organelles in bacteria.","hji,kes",1,1,2,2,1,NA,NA +33784373,Bioinformatics tools developed to support BioCompute Objects.,"Developments in high-throughput sequencing (HTS) result in an exponential increase in the amount of data generated by sequencing experiments, an increase in the complexity of bioinformatics analysis reporting and an increase in the types of data generated. These increases in volume, diversity and complexity of the data generated and their analysis expose the necessity of a structured and standardized reporting template. BioCompute Objects (BCOs) provide the requisite support for communication of HTS data analysis that includes support for workflow, as well as data, curation, accessibility and reproducibility of communication. BCOs standardize how researchers report provenance and the established verification and validation protocols used in workflows while also being robust enough to convey content integration or curation in knowledge bases. BCOs that encapsulate tools, platforms, datasets and workflows are FAIR (findable, accessible, interoperable and reusable) compliant. Providing operational workflow and data information facilitates interoperability between platforms and incorporation of future dataset within an HTS analysis for use within industrial, academic and regulatory settings. Cloud-based platforms, including High-performance Integrated Virtual Environment (HIVE), Cancer Genomics Cloud (CGC) and Galaxy, support BCO generation for users. Given the 100K+ userbase between these platforms, BioCompute can be leveraged for workflow documentation. In this paper, we report the availability of platform-dependent and platform-independent BCO tools: HIVE BCO App, CGC BCO App, Galaxy BCO API Extension and BCO Portal. Community engagement was utilized to evaluate tool efficacy. We demonstrate that these tools further advance BCO creation from text editing approaches used in earlier releases of the standard. Moreover, we demonstrate that integrating BCO generation within existing analysis platforms greatly streamlines BCO creation while capturing granular workflow details. We also demonstrate that the BCO tools described in the paper provide an approach to solve the long-standing challenge of standardizing workflow descriptions that are both human and machine readable while accommodating manual and automated curation with evidence tagging. Database URL: https://www.biocomputeobject.org/resources.","hji,kes",1,1,2,2,1,the underlaying data are workflow descriptions.kind of meta related to life-sciences.,NA +33813885,MolluscDB: a genome and transcriptome database for molluscs.,"As sequencing becomes more accessible and affordable, the analysis of genomic and transcriptomic data has become a cornerstone of many research initiatives. Communities with a focus on particular taxa or ecosystems need solutions capable of aggregating genomic resources and serving them in a standardized and analysis-friendly manner. Taxon-focussed resources can be more flexible in addressing the needs of a research community than can universal or general databases. Here, we present MolluscDB, a genome and transcriptome database for molluscs. MolluscDB offers a rich ecosystem of tools, including an Ensembl browser, a BLAST server for homology searches and an HTTP server from which any dataset present in the database can be downloaded. To demonstrate the utility of the database and verify the quality of its data, we imported data from assembled genomes and transcriptomes of 22 species, estimated the phylogeny of Mollusca using single-copy orthologues, explored patterns of gene family size change and interrogated the data for biomineralization-associated enzymes and shell matrix proteins. MolluscDB provides an easy-to-use and openly accessible data resource for the research community. This article is part of the Theo Murphy meeting issue 'Molluscan genomics: broad insights and future directions for a neglected phylum'.","hji,kes",1,1,2,2,1,NA,NA +33843105,Sequence and evolutionary analysis of bacterial ribosomal S1 proteins.,"The multi-domain bacterial S1 protein is the largest and most functionally important ribosomal protein of the 30S subunit, which interacts with both mRNA and proteins. The family of ribosomal S1 proteins differs in the classical sense from a protein with tandem repeats and has a ""bead-on-string"" organization, where each repeat is folded into a globular domain. Based on our recent data, the study of evolutionary relationships for the bacterial phyla will provide evidence for one of the proposed theories of the evolutionary development of proteins with structural repeats: from multiple repeats of assembles to single repeats, or vice versa. In this comparative analysis of 1333 S1 sequences that were identified in 24 different phyla, we demonstrate how such phyla can form independently/dependently during evolution. To the best of our knowledge, this work is the first study of the evolutionary history of bacterial ribosomal S1 proteins. The collected and structured data can be useful to computer biologists as a resource for determining percent identity, amino acid composition and logo motifs, as well as dN/dS ratio in bacterial S1 protein. The obtained research data indicate that the evolutionary development of bacterial ribosomal S1 proteins evolved from multiple assemblies to single repeat. The presented data are integrated into the server, which can be accessed at http://oka.protres.ru:4200.","hji,kes",1,1,2,2,1,NA,NA +33849075,iNetModels 2.0: an interactive visualization and database of multi-omics data.,"It is essential to reveal the associations between various omics data for a comprehensive understanding of the altered biological process in human wellness and disease. To date, very few studies have focused on collecting and exhibiting multi-omics associations in a single database. Here, we present iNetModels, an interactive database and visualization platform of Multi-Omics Biological Networks (MOBNs). This platform describes the associations between the clinical chemistry, anthropometric parameters, plasma proteomics, plasma metabolomics, as well as metagenomics for oral and gut microbiome obtained from the same individuals. Moreover, iNetModels includes tissue- and cancer-specific Gene Co-expression Networks (GCNs) for exploring the connections between the specific genes. This platform allows the user to interactively explore a single feature's association with other omics data and customize its particular context (e.g. male/female specific). The users can also register their data for sharing and visualization of the MOBNs and GCNs. Moreover, iNetModels allows users who do not have a bioinformatics background to facilitate human wellness and disease research. iNetModels can be accessed freely at https://inetmodels.com without any limitation.","hji,kes",1,1,2,2,1,NA,NA +33849445,TANTIGEN 2.0: a knowledge base of tumor T cell antigens and epitopes.,"We previously developed TANTIGEN, a comprehensive online database cataloging more than 1000 T cell epitopes and HLA ligands from 292 tumor antigens. In TANTIGEN 2.0, we significantly expanded coverage in both immune response targets (T cell epitopes and HLA ligands) and tumor antigens. It catalogs 4,296 antigen variants from 403 unique tumor antigens and more than 1500 T cell epitopes and HLA ligands. We also included neoantigens, a class of tumor antigens generated through mutations resulting in new amino acid sequences in tumor antigens. TANTIGEN 2.0 contains validated TCR sequences specific for cognate T cell epitopes and tumor antigen gene/mRNA/protein expression information in major human cancers extracted by Human Pathology Atlas. TANTIGEN 2.0 is a rich data resource for tumor antigens and their associated epitopes and neoepitopes. It hosts a set of tailored data analytics tools tightly integrated with the data to form meaningful analysis workflows. It is freely available at http://projects.met-hilab.org/tadb .","hji,kes",1,1,2,2,1,NA,NA +33858332,Predicting tumor response to drugs based on gene-expression biomarkers of sensitivity learned from cancer cell lines.,"

Background

Human cancer cell line profiling and drug sensitivity studies provide valuable information about the therapeutic potential of drugs and their possible mechanisms of action. The goal of those studies is to translate the findings from in vitro studies of cancer cell lines into in vivo therapeutic relevance and, eventually, patients' care. Tremendous progress has been made.

Results

In this work, we built predictive models for 453 drugs using data on gene expression and drug sensitivity (IC50) from cancer cell lines. We identified many known drug-gene interactions and uncovered several potentially novel drug-gene associations. Importantly, we further applied these predictive models to ~‚Äâ17,000 bulk RNA-seq samples from The Cancer Genome Atlas (TCGA) and the Genotype-Tissue Expression (GTEx) database to predict drug sensitivity for both normal and tumor tissues. We created a web site for users to visualize and download our predicted data ( https://manticore.niehs.nih.gov/cancerRxTissue ). Using trametinib as an example, we showed that our approach can faithfully recapitulate the known tumor specificity of the drug.

Conclusions

We demonstrated that our approach can predict drugs that 1) are tumor-type specific; 2) elicit higher sensitivity from tumor compared to corresponding normal tissue; 3) elicit differential sensitivity across breast cancer subtypes. If validated, our prediction could have relevance for preclinical drug testing and in phase I clinical design.","hji,kes",1,1,2,2,1,NA,NA +33858848,HLA Ligand Atlas: a benign reference of HLA-presented peptides to improve T-cell-based cancer immunotherapy.,"

Background

The human leucocyte antigen (HLA) complex controls adaptive immunity by presenting defined fractions of the intracellular and extracellular protein content to immune cells. Understanding the benign HLA ligand repertoire is a prerequisite to define safe T-cell-based immunotherapies against cancer. Due to the poor availability of benign tissues, if available, normal tissue adjacent to the tumor has been used as a benign surrogate when defining tumor-associated antigens. However, this comparison has proven to be insufficient and even resulted in lethal outcomes. In order to match the tumor immunopeptidome with an equivalent counterpart, we created the HLA Ligand Atlas, the first extensive collection of paired HLA-I and HLA-II immunopeptidomes from 227 benign human tissue samples. This dataset facilitates a balanced comparison between tumor and benign tissues on HLA ligand level.

Methods

Human tissue samples were obtained from 16 subjects at autopsy, five thymus samples and two ovary samples originating from living donors. HLA ligands were isolated via immunoaffinity purification and analyzed in over 1200 liquid chromatography mass spectrometry runs. Experimentally and computationally reproducible protocols were employed for data acquisition and processing.

Results

The initial release covers 51 HLA-I and 86 HLA-II allotypes presenting 90,428 HLA-I- and 142,625 HLA-II ligands. The HLA allotypes are representative for the world population. We observe that immunopeptidomes differ considerably between tissues and individuals on source protein and HLA-ligand level. Moreover, we discover 1407 HLA-I ligands from non-canonical genomic regions. Such peptides were previously described in tumors, peripheral blood mononuclear cells (PBMCs), healthy lung tissues and cell lines. In a case study in glioblastoma, we show that potential on-target off-tumor adverse events in immunotherapy can be avoided by comparing tumor immunopeptidomes to the provided multi-tissue reference.

Conclusion

Given that T-cell-based immunotherapies, such as CAR-T cells, affinity-enhanced T cell transfer, cancer vaccines and immune checkpoint inhibition, have significant side effects, the HLA Ligand Atlas is the first step toward defining tumor-associated targets with an improved safety profile. The resource provides insights into basic and applied immune-associated questions in the context of cancer immunotherapy, infection, transplantation, allergy and autoimmunity. It is publicly available and can be browsed in an easy-to-use web interface at https://hla-ligand-atlas.org .","hji,kes",1,1,2,2,1,NA,NA +33876217,CanDriS: posterior profiling of cancer-driving sites based on two-component evolutionary model.,"Current cancer genomics databases have accumulated millions of somatic mutations that remain to be further explored. Due to the over-excess mutations unrelated to cancer, the great challenge is to identify somatic mutations that are cancer-driven. Under the notion that carcinogenesis is a form of somatic-cell evolution, we developed a two-component mixture model: while the ground component corresponds to passenger mutations, the rapidly evolving component corresponds to driver mutations. Then, we implemented an empirical Bayesian procedure to calculate the posterior probability of a site being cancer-driven. Based on these, we developed a software CanDriS (Cancer Driver Sites) to profile the potential cancer-driving sites for thousands of tumor samples from the Cancer Genome Atlas and International Cancer Genome Consortium across tumor types and pan-cancer level. As a result, we identified that approximately 1% of the sites have posterior probabilities larger than 0.90 and listed potential cancer-wide and cancer-specific driver mutations. By comprehensively profiling all potential cancer-driving sites, CanDriS greatly enhances our ability to refine our knowledge of the genetic basis of cancer and might guide clinical medication in the upcoming era of precision medicine. The results were displayed in a database CandrisDB (http://biopharm.zju.edu.cn/candrisdb/).","hji,kes",1,1,2,2,1,NA,NA +33882119,BC-TFdb: a database of transcription factor drivers in breast cancer.,"Transcription factors (TFs) are DNA-binding proteins, which regulate many essential biological functions. In several cancer types, TF function is altered by various direct mechanisms, including gene amplification or deletion, point mutations, chromosomal translocations, expression alterations, as well as indirectly by non-coding DNA mutations influencing the binding of the TF. TFs are also actively involved in breast cancer (BC) initiation and progression. Herein, we have developed an open-access database, BC-TFdb (Breast Cancer Transcription Factors database), of curated, non-redundant TF involved in BC. The database provides BC driver TFs related information including genomic sequences, proteomic sequences, structural data, pathway information, mutations information, DNA binding residues, survival and therapeutic resources. The database will be a useful platform for researchers to obtain BC-related TF-specific information. High-quality datasets are downloadable for users to evaluate and develop computational methods for drug designing against BC. Database URL: https://www.dqweilab-sjtu.com/index.php.","hji,kes",1,1,2,2,1,NA,NA +33898816,FermFooDb: A database of bioactive peptides derived from fermented foods.,"Globally fermented foods are in demands due to their functional and nutritional benefits. These foods are sources of probiotic organisms and bioactive peptides, various amino acids, enzymes etc. that provides numerous health benefits. FermFooDb (https://webs.iiitd.edu.in/raghava/fermfoodb/) is a manually curated database of bioactive peptides derived from wide range of foods that maintain comprehensive information about peptides and process of fermentation. This database comprises of 2205 entries with following major fields, peptide sequence, Mass and IC50, food source, functional activity, fermentation conditions, starter culture, testing conditions of sequences in vitro or in vivo, type of model and method of analysis. The bioactive peptides in our database have wide range of therapeutic potentials that includes antihypertensive, ACE-inhibitory, antioxidant, antimicrobial, immunomodulatory and cholesterol lowering peptides. These bioactive peptides were derived from different types of fermented foods that include milk, cheese, yogurt, wheat and rice. Numerous, web-based tools have been integrated to retrieve data, peptide mapping of proteins, similarity search and multiple-sequence alignment. This database will be useful for the food industry and researchers to explore full therapeutic potential of fermented foods from specific cultures.","hji,kes",1,1,2,2,1,NA,NA +33905618,The PhenX Toolkit: Establishing Standard Measures for COVID-19 Research.,"The PhenX (consensus measures for Phenotypes and eXposures) Toolkit (https://www.phenxtoolkit.org/) is a publicly available, web-based catalog of recommended, well-established measurement protocols of phenotypes and exposures. The goal of PhenX is to facilitate the use of standard measures, enhance data interoperability, and promote collaborative and translational research. PhenX is driven by the scientific community and historically has depended on working groups of experts to recommend measures for release in the PhenX Toolkit. The urgent need for recommended, standard measures for COVID-19 research triggered the development of a ""rapid release"" process for releasing new content in the PhenX Toolkit. Initially, PhenX collaborated with the National Institutes of Health (NIH) Office of Behavioral and Social Sciences Research, the National Human Genome Research Institute, and the NIH Disaster Research Response (DR2) program to create a library of COVID-19 measurement protocols. With additional support from NIH, PhenX adapted crowdsourcing techniques to accelerate prioritization and recommendation of protocols for release in the PhenX Toolkit. Prioritized COVID-19-specific protocols were used to anchor and define specialty collections of protocols that were subject to review and approval by the PhenX Steering Committee. In addition to the COVID-19-specific protocols, the specialty collections include existing, well-established PhenX protocols, use of which will further enhance data interoperability and cross-study analysis. The COVID-19 specialty collections are Behaviors and Risks; Ethnicity, Race and Demographics; History, Treatment and Outcomes; Information Resources; Psychosocial and Mental Health; and Socioeconomic. The development and usage of PhenX COVID-19 specialty collections are described in this article. © 2021 The Authors. Basic Protocol: Selecting COVID-19 protocols.","hji,kes",1,1,2,2,1,"collection of protocols, again possibly too meta related to life science",Chuck Check - yes +33906563,M6ADD: a comprehensive database of m6A modifications in diseases.,"N6-methyladenosine (m6A) modification is an important regulatory factor affecting diseases, including multiple cancers and it is a developing direction for targeted disease therapy. Here, we present the M6ADD (m6A-diseases database) database, a public data resource containing manually curated data on potential m6A-disease associations for which some experimental evidence is available; the related high-throughput sequencing data are also provided and analysed by using different computational methods. To give researchers a tool to query the m6A modification data, the M6ADD was designed as a web-based comprehensive resource focusing on the collection, storage and online analysis of m6A modifications, aimed at exploring the associations between m6A modification and gene disorders and diseases. The M6ADD includes 222 experimentally confirmed m6A-disease associations, involving 59 diseases from a review of more than 2000 published papers. The M6ADD also includes 409,229 m6A-disease associations obtained by computational and statistical methods from 30 high-throughput sequencing datasets. In addition, we provide data on 5239 potential m6A regulatory proteins related to 24 cancers based on network analysis prediction methods. In addition, we have developed a tool to explore the function of m6A-modified genes through the protein-protein interaction networks. The M6ADD can be accessed at http://m6add.edbc.org/.","hji,kes",1,1,2,2,1,NA,NA +33929905,Risk-Based Chemical Ranking and Generating a Prioritized Human Exposome Database.,"

Background

Due to the ubiquitous use of chemicals in modern society, humans are increasingly exposed to thousands of chemicals that contribute to a major portion of the human exposome. Should a comprehensive and risk-based human exposome database be created, it would be conducive to the rapid progress of human exposomics research. In addition, once a xenobiotic is biotransformed with distinct half-lives upon exposure, monitoring the parent compounds alone may not reflect the actual human exposure. To address these questions, a comprehensive and risk-prioritized human exposome database is needed.

Objectives

Our objective was to set up a comprehensive risk-prioritized human exposome database including physicochemical properties as well as risk prediction and develop a graphical user interface (GUI) that has the ability to conduct searches for content associated with chemicals in our database.

Methods

We built a comprehensive risk-prioritized human exposome database by text mining and database fusion. Subsequently, chemicals were prioritized by integrating exposure level obtained from the Systematic Empirical Evaluation of Models with toxicity data predicted by the Toxicity Estimation Software Tool and the Toxicological Priority Index calculated from the ToxCast database. The biotransformation half-lives (HLBs) of all the chemicals were assessed using the Iterative Fragment Selection approach and biotransformation products were predicted using the previously developed BioTransformer machine-learning method.

Results

We compiled a human exposome database of >20,000 chemicals, prioritized 13,441 chemicals based on probabilistic hazard quotient and 7,770 chemicals based on risk index, and provided a predicted biotransformation metabolite database of >95,000 metabolites. In addition, a user-interactive Java software (Oracle)-based search GUI was generated to enable open access to this new resource.

Discussion

Our database can be used to guide chemical management and enhance scientific understanding to rapidly and effectively prioritize chemicals for comprehensive biomonitoring in epidemiological investigations. https://doi.org/10.1289/EHP7722.","hji,kes",1,1,2,2,1,NA,NA +33942874,Human IRES Atlas: an integrative platform for studying IRES-driven translational regulation in humans.,"It is now known that cap-independent translation initiation facilitated by internal ribosome entry sites (IRESs) is vital in selective cellular protein synthesis under stress and different physiological conditions. However, three problems make it hard to understand transcriptome-wide cellular IRES-mediated translation initiation mechanisms: (i) complex interplay between IRESs and other translation initiation-related information, (ii) reliability issue of in silico cellular IRES investigation and (iii) labor-intensive in vivo IRES identification. In this research, we constructed the Human IRES Atlas database for a comprehensive understanding of cellular IRESs in humans. First, currently available and suitable IRES prediction tools (IRESfinder, PatSearch and IRESpy) were used to obtain transcriptome-wide human IRESs. Then, we collected eight genres of translation initiation-related features to help study the potential molecular mechanisms of each of the putative IRESs. Three functional tests (conservation, structural RNA-protein scores and conditional translation efficiency) were devised to evaluate the functionality of the identified putative IRESs. Moreover, an easy-to-use interface and an IRES-translation initiation interaction map for each gene transcript were implemented to help understand the interactions between IRESs and translation initiation-related features. Researchers can easily search/browse an IRES of interest using the web interface and deduce testable mechanism hypotheses of human IRES-driven translation initiation based on the integrated results. In summary, Human IRES Atlas integrates putative IRES elements and translation initiation-related experiments for better usage of these data and deduction of mechanism hypotheses. Database URL: http://cobishss0.im.nuk.edu.tw/Human_IRES_Atlas/.","hji,kes",1,1,2,2,1,NA,NA +33950201,Trips-Viz: an environment for the analysis of public and user-generated ribosome profiling data.,"Trips-Viz (https://trips.ucc.ie/) is an interactive platform for the analysis and visualization of ribosome profiling (Ribo-Seq) and shotgun RNA sequencing (RNA-seq) data. This includes publicly available and user generated data, hence Trips-Viz can be classified as a database and as a server. As a database it provides access to many processed Ribo-Seq and RNA-seq data aligned to reference transcriptomes which has been expanded considerably since its inception. Here, we focus on the server functionality of Trips-viz which also has been greatly improved. Trips-viz now enables visualisation of proteomics data from a large number of processed mass spectrometry datasets. It can be used to support translation inferred from Ribo-Seq data. Users are now able to upload a custom reference transcriptome as well as data types other than Ribo-Seq/RNA-Seq. Incorporating custom data has been streamlined with RiboGalaxy (https://ribogalaxy.ucc.ie/) integration. The other new functionality is the rapid detection of translated open reading frames (ORFs) through a simple easy to use interface. The analysis of differential expression has been also improved via integration of DESeq2 and Anota2seq in addition to a number of other improvements of existing Trips-viz features.","hji,kes",1,1,2,2,1,NA,NA +33952332,SANCDB: an update on South African natural compounds and their readily available analogs.,"

Background

South African Natural Compounds Database (SANCDB; https://sancdb.rubi.ru.ac.za/ ) is the sole and a fully referenced database of natural chemical compounds of South African biodiversity. It is freely available, and since its inception in 2015, the database has become an important resource to several studies. Its content has been: used as training data for machine learning models; incorporated to larger databases; and utilized in drug discovery studies for hit identifications.

Description

Here, we report the updated version of SANCDB. The new version includes 412 additional compounds that have been reported since 2015, giving a total of 1012 compounds in the database. Further, although natural products (NPs) are an important source of unique scaffolds, they have a major drawback due to their complex structure resulting in low synthetic feasibility in the laboratory. With this in mind, SANCDB is, now, updated to provide direct links to commercially available analogs from two major chemical databases namely Mcule and MolPort. To our knowledge, this feature is not available in other NP databases. Additionally, for easier access to information by users, the database and website interface were updated. The compounds are now downloadable in many different chemical formats.

Conclusions

The drug discovery process relies heavily on NPs due to their unique chemical organization. This has inspired the establishment of numerous NP chemical databases. With the emergence of newer chemoinformatic technologies, existing chemical databases require constant updates to facilitate information accessibility and integration by users. Besides increasing the NPs compound content, the updated SANCDB allows users to access the individual compounds (if available) or their analogs from commercial databases seamlessly.","hji,kes",1,1,2,2,1,NA,NA +33959747,Updates to HCOP: the HGNC comparison of orthology predictions tool.,"Multiple resources currently exist that predict orthologous relationships between genes. These resources differ both in the methodologies used and in the species they make predictions for. The HGNC Comparison of Orthology Predictions (HCOP) search tool integrates and displays data from multiple ortholog prediction resources for a specified human gene or set of genes. An indication of the reliability of a prediction is provided by the number of resources that support it. HCOP was originally designed to show orthology predictions between human and mouse but has been expanded to include data from a current total of 20 selected vertebrate and model organism species. The HCOP pipeline used to fetch and integrate the information from the disparate ortholog and nomenclature data resources has recently been rewritten, both to enable the inclusion of new data and to take advantage of modern web technologies. Data from HCOP are used extensively in our work naming genes as the Vertebrate Gene Nomenclature Committee (https://vertebrate.genenames.org).","hji,kes",0,1,1,2,0.5,software,data available; reassessed and still yes - kinda odd data but I think it counts +33970229,ChemHub: a knowledgebase of functional chemicals for synthetic biology studies.,"

Summary

The field of synthetic biology lacks a comprehensive knowledgebase for selecting synthetic target molecules according to their functions, economic applications, and known biosynthetic pathways. We implemented ChemHub, a knowledgebase containing >90,000 chemicals and their functions, along with related biosynthesis information for these chemicals that was manually extracted from >600,000 published studies by more than 100 people over the past 10‚Äâyears.

Availability and implementation

Multiple algorithms were implemented to enable biosynthetic pathway design and precursor discovery, which can support investigation of the biosynthetic potential of these functional chemicals. ChemHub is freely available at: http://www.rxnfinder.org/chemhub/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +33973408,"hu.MAP 2.0: integration of over 15,000 proteomic experiments builds a global compendium of human multiprotein assemblies.","A general principle of biology is the self-assembly of proteins into functional complexes. Characterizing their composition is, therefore, required for our understanding of cellular functions. Unfortunately, we lack knowledge of the comprehensive set of identities of protein complexes in human cells. To address this gap, we developed a machine learning framework to identify protein complexes in over 15,000 mass spectrometry experiments which resulted in the identification of nearly 7,000 physical assemblies. We show our resource, hu.MAP 2.0, is more accurate and comprehensive than previous state of the art high-throughput protein complex resources and gives rise to many new hypotheses, including for 274 completely uncharacterized proteins. Further, we identify 253 promiscuous proteins that participate in multiple complexes pointing to possible moonlighting roles. We have made hu.MAP 2.0 easily searchable in a web interface (http://humap2.proteincomplexes.org/), which will be a valuable resource for researchers across a broad range of interests including systems biology, structural biology, and molecular explanations of disease.","hji,kes",1,1,2,2,1,NA,NA +33984507,HisPhosSite: A comprehensive database of histidine phosphorylated proteins and sites.,"Histidine phosphorylation is critically important in a variety of cellular processes including signal transduction, cell cycle, proliferation, differentiation, and apoptosis. It is estimated to account for 6% of all phosphorylated amino acids. However, due to the acid lability of the PN bond, the study of pHis lags far behind that of pSer, pThr, and pTyr. Recently, the development and use of pHis-specific antibodies and methodologies have led to a resurgence in the study of histidine phosphorylation. Although a considerable number of pHis proteins and sites have been discovered, most of them have not been manually curated and integrated to any databases. There is a lack of a data repository for pHis, and such work is expected to help further systemic studies of pHis. Thus, we present a comprehensive resource database of histidine phosphorylation (HisPhosSite) by curating experimentally validated pHis proteins and sites and compiling putative pHis sites with ortholog search. HisPhosSite contains 776 verified pHis sites and 2702 verified pHis proteins in 38 eukaryotic and prokaryotic species and 15,378 putative pHis sites and 10,816 putative pHis proteins in 1366 species. HisPhosSite provides rich annotations of pHis sites and proteins and multiple search engines (including motif search and BLAST search) for users to locate pHis sites of interest. HisPhosSite is available at http://reprod.njmu.edu.cn/hisphossite. SIGNIFICANCE: Histidine phosphorylation is involved in a variety of cellular processes as well as cancers, and it has been proved to be more common than previously thought. The HisPhosSite database was developed to collect pHis data from published literatures with experimental evidences. Unification of the identified pHis proteins and sites will give researchers an informative resource for histidine phosphorylation. HisPhosSite has a user-friendly interface with multiple search engines for users to locate pHis sites of interest. In addition, the database provides rich structural and functional annotations. HisPhosSite will help future studies and elucidation of the functions of histidine phosphorylation.","hji,kes",1,1,2,2,1,NA,NA +33985427,TarDB: an online database for plant miRNA targets and miRNA-triggered phased siRNAs.,"

Background

In plants, microRNAs (miRNAs) are pivotal regulators of plant development and stress responses. Different computational tools and web servers have been developed for plant miRNA target prediction; however, in silico prediction normally contains false positive results. In addition, many plant miRNA target prediction servers lack information for miRNA-triggered phased small interfering RNAs (phasiRNAs). Creating a comprehensive and relatively high-confidence plant miRNA target database is much needed.

Results

Here, we report TarDB, an online database that collects three categories of relatively high-confidence plant miRNA targets: (i) cross-species conserved miRNA targets; (ii) degradome/PARE (Parallel Analysis of RNA Ends) sequencing supported miRNA targets; (iii) miRNA-triggered phasiRNA loci. TarDB provides a user-friendly interface that enables users to easily search, browse and retrieve miRNA targets and miRNA initiated phasiRNAs in a broad variety of plants. TarDB has a comprehensive collection of reliable plant miRNA targets containing previously unreported miRNA targets and miRNA-triggered phasiRNAs even in the well-studied model species. Most of these novel miRNA targets are relevant to lineage-specific or species-specific miRNAs. TarDB data is freely available at http://www.biosequencing.cn/TarDB .

Conclusions

In summary, TarDB serves as a useful web resource for exploring relatively high-confidence miRNA targets and miRNA-triggered phasiRNAs in plants.","hji,kes",1,1,2,2,1,NA,NA +33985559,TGIF-DB: terse genomics interface for developing botany.,"

Objectives

Pearl millet (Pennisetum glaucum) is a staple cereal crop for semi-arid regions. Its whole genome sequence and deduced putative gene sequences are available. However, the functions of many pearl millet genes are unknown. Situations are similar for other crop species such as garden asparagus (Asparagus officinalis), chickpea (Cicer arietinum) and Tartary buckwheat (Fagopyrum tataricum). The objective of the data presented here was to improve functional annotations of genes of pearl millet, garden asparagus, chickpea and Tartary buckwheat with gene annotations of model plants, to systematically provide such annotations as well as their sequences on a website, and thereby to promote genomics for those crops.

Data description

Sequences of genomes and transcripts of pearl millet, garden asparagus, chickpea and Tartary buckwheat were downloaded from a public database. These transcripts were associated with functional annotations of their Arabidopsis thaliana and rice (Oryza sativa) counterparts identified by BLASTX. Conserved domains in protein sequences of those species were identified by the HMMER scan with the Pfam database. The resulting data was deposited in the figshare repository and can be browsed on the Terse Genomics Interface for Developing Botany (TGIF-DB) website ( http://webpark2116.sakura.ne.jp/rlgpr/ ).","hji,kes",1,1,2,2,1,NA,figshare and website +33993461,TUPDB: Target-Unrelated Peptide Data Bank.,"The isolation of target-unrelated peptides (TUPs) through biopanning remains as a major problem of phage display selection experiments. These TUPs do not have any actual affinity toward targets of interest, which tend to be mistakenly identified as target-binding peptides. Therefore, an information portal for storing TUP data is urgently needed. Here, we present a TUP data bank (TUPDB), which is a comprehensive, manually curated database of approximately 73 experimentally verified TUPs and 1963 potential TUPs collected from TUPScan, the BDB database, and public research articles. The TUPScan tool has been integrated in TUPDB to facilitate TUP analysis. We believe that TUPDB can help identify and remove TUPs in future reports in the biopanning community. The database is of great importance to improving the quality of phage display-based epitope mapping and promoting the development of vaccines, diagnostics, and therapeutics. The TUPDB database is available at http://i.uestc.edu.cn/tupdb .","hji,kes",1,1,2,2,1,NA,NA +33994075,A Review of the Literature Organized Into a New Database: RHeference.,"Hundreds of articles containing heterogeneous data describe D variants or add to the knowledge of known alleles. Data can be difficult to find despite existing online blood group resources and genetic and literature databases. We have developed a modern, elaborate database for D variants, thanks to an extensive literature search with meticulous curation of 387 peer-reviewed articles and 80 abstracts from major conferences and other sources. RHeference contains entries for 710 RHD alleles, 11 RHCE alleles, 30 phenotype descriptions (preventing data loss from historical sources), 35 partly characterized alleles, 3 haplotypes, and 16 miscellaneous entries. The entries include molecular, phenotypic, serological, alloimmunization, haplotype, geographical, and other data, detailed for each source. The main characteristics are summarized for each entry. The sources for all information are included and easily accessible through doi and PMID links. Overall, the database contains more than 10,000 individual pieces of data. We have set up the database architecture based on our previous expertise on database setup and biocuration for other topics, using modern technologies such as the Django framework, BioPython, Bootstrap, and Jquery. This architecture allows an easy access to data and enables simple and complex queries: combining multiple mutations, keywords, or any of the characteristics included in the database. RHeference provides a complement to existing resources and will continue to grow as our knowledge expands and new articles are published. The database url is http://www.rheference.org/.","hji,kes",1,1,2,2,1,NA,NA +33995920,ExVe: The knowledge base of orthologous proteins identified in fungal extracellular vesicles.,"Extracellular vesicles (EVs) are double-membrane particles associated with intercellular communication. Since the discovery of EV production in the fungus Cryptococcus neoformans, the importance of EV release in its physiology and pathogenicity has been investigated. To date, few studies have investigated the proteomic content of EVs from multiple fungal species. Our main objective was to use an orthology approach to compare proteins identified by EV shotgun proteomics in 8 pathogenic and 1 nonpathogenic species. Using protein information from the UniProt and FungiDB databases, we integrated data for 11,433 hits in fungal EVs with an orthology perspective, resulting in 3,834 different orthologous groups. OG6_100083 (Hsp70 Pfam domain) was the unique orthologous group that was identified for all fungal species. Proteins with this protein domain are associated with the stress response, survival and morphological changes in different fungal species. Although no pathogenic orthologous group was found, we identified 5 orthologous groups exclusive to S. cerevisiae. Using the criteria of at least 7 pathogenic fungi to define a cluster, we detected the 4 unique pathogenic orthologous groups. Taken together, our data suggest that Hsp70-related proteins might play a key role in fungal EVs, regardless of the pathogenic status. Using an orthology approach, we identified at least 4 protein domains that could be novel therapeutic targets against pathogenic fungi. Our results were compiled in the herein described ExVe database, which is publicly available at http://exve.icc.fiocruz.br.","hji,kes",1,1,2,2,1,NA,NA +34010390,Integration of 1:1 orthology maps and updated datasets into Echinobase.,"Echinobase (https://echinobase.org) is a central online platform that generates, manages and hosts genomic data relevant to echinoderm research. While the resource primarily serves the echinoderm research community, the recent release of an excellent quality genome for the frequently studied purple sea urchin (Strongylocentrotus purpuratus genome, v5.0) has provided an opportunity to adapt to the needs of a broader research community across other model systems. To this end, establishing pipelines to identify orthologous genes between echinoderms and other species has become a priority in many contexts including nomenclature, linking to data in other model organisms, and in internal functionality where data gathered in one hosted species can be associated with genes in other hosted echinoderms. This paper describes the orthology pipelines currently employed by Echinobase and how orthology data are processed to yield 1:1 ortholog mappings between a variety of echinoderms and other model taxa. We also describe functions of interest that have recently been included on the resource, including an updated developmental time course for S.purpuratus, and additional tracks for genome browsing. These data enhancements will increase the accessibility of the resource to non-echinoderm researchers and simultaneously expand the data quality and quantity available to core Echinobase users. Database URL: https://echinobase.org.","hji,kes",1,1,2,2,1,NA,NA +34012763,Benchmarking mass spectrometry based proteomics algorithms using a simulated database.,"Protein sequencing algorithms process data from a variety of instruments that has been generated under diverse experimental conditions. Currently there is no way to predict the accuracy of an algorithm for a given data set. Most of the published algorithms and associated software has been evaluated on limited number of experimental data sets. However, these performance evaluations do not cover the complete search space the algorithmand the software might encounter in real-world. To this end, we present a database of simulated spectra that can be used to benchmark any spectra to peptide search engine. We demonstrate the usability of this database by bench marking two popular peptide sequencing engines. We show wide variation in the accuracy of peptide deductions and a complete quality profile of a given algorithm can be useful for practitioners and algorithm developers. All benchmarking data is available at https://users.cs.fiu.edu/~fsaeed/Benchmark.html.","hji,kes",1,1,2,2,1,NA,tricky - resource of benchmarking data +34016708,Analyzing the vast coronavirus literature with CoronaCentral.,"The SARS-CoV-2 pandemic has caused a surge in research exploring all aspects of the virus and its effects on human health. The overwhelming publication rate means that researchers are unable to keep abreast of the literature. To ameliorate this, we present the CoronaCentral resource that uses machine learning to process the research literature on SARS-CoV-2 together with SARS-CoV and MERS-CoV. We categorize the literature into useful topics and article types and enable analysis of the contents, pace, and emphasis of research during the crisis with integration of Altmetric data. These topics include therapeutics, disease forecasting, as well as growing areas such as ""long COVID"" and studies of inequality. This resource, available at https://coronacentral.ai, is updated daily.","hji,kes",1,1,2,2,1,"meta-resource, of literature","knowledge base - ai created, data available" +34017945,Identity and compatibility of reference genome resources.,"Genome analysis relies on reference data like sequences, feature annotations, and aligner indexes. These data can be found in many versions from many sources, making it challenging to identify and assess compatibility among them. For example, how can you determine which indexes are derived from identical raw sequence files, or which annotations share a compatible coordinate system? Here, we describe a novel approach to establish identity and compatibility of reference genome resources. We approach this with three advances: first, we derive unique identifiers for each resource; second, we record parent-child relationships among resources; and third, we describe recursive identifiers that determine identity as well as compatibility of coordinate systems and sequence names. These advances facilitate portability, reproducibility, and re-use of genome reference data. Available athttps://refgenie.databio.org.","hji,kes",1,1,2,2,1,"meta-resource, data about data",NA +34019655,KEA3: improved kinase enrichment analysis via data integration.,"Phosphoproteomics and proteomics experiments capture a global snapshot of the cellular signaling network, but these methods do not directly measure kinase state. Kinase Enrichment Analysis 3 (KEA3) is a webserver application that infers overrepresentation of upstream kinases whose putative substrates are in a user-inputted list of proteins. KEA3 can be applied to analyze data from phosphoproteomics and proteomics studies to predict the upstream kinases responsible for observed differential phosphorylations. The KEA3 background database contains measured and predicted kinase-substrate interactions (KSI), kinase-protein interactions (KPI), and interactions supported by co-expression and co-occurrence data. To benchmark the performance of KEA3, we examined whether KEA3 can predict the perturbed kinase from single-kinase perturbation followed by gene expression experiments, and phosphoproteomics data collected from kinase-targeting small molecules. We show that integrating KSIs and KPIs across data sources to produce a composite ranking improves the recovery of the expected kinase. The KEA3 webserver is available at https://maayanlab.cloud/kea3.","hji,kes",0,0,0,2,0,software,no notes; reassessed and re-scored - has brought together but abstract does make it sound like the data isn't necessarily available (eg background database) +34020544,Ori-Finder 3: a web server for genome-wide prediction of replication origins in Saccharomyces cerevisiae.,"DNA replication is a fundamental process in all organisms; this event initiates at sites termed origins of replication. The characteristics of eukaryotic replication origins are best understood in Saccharomyces cerevisiae. For this species, origin prediction algorithms or web servers have been developed based on the sequence features of autonomously replicating sequences (ARSs). However, their performances are far from satisfactory. By utilizing the Z-curve methodology, we present a novel pipeline, Ori-Finder 3, for the computational prediction of replication origins in S. cerevisiae at the genome-wide level based solely on DNA sequences. The ARS exhibiting both an AT-rich stretch and ARS consensus sequence element can be predicted at the single-nucleotide level. For the identified ARSs in the S. cerevisiae reference genome, 83 and 60% of the top 100 and top 300 predictions matched the known ARS records, respectively. Based on Ori-Finder 3, we subsequently built a database of the predicted ARSs identified in more than a hundred S. cerevisiae genomes. Consequently, we developed a user-friendly web server including the ARS prediction pipeline and the predicted ARSs database, which can be freely accessed at http://tubic.tju.edu.cn/Ori-Finder3.","hji,kes",1,1,2,2,1,NA,NA +34022814,The Rhododendron Plant Genome Database (RPGD): a comprehensive online omics database for Rhododendron.,"

Background

The genus Rhododendron L. has been widely cultivated for hundreds of years around the world. Members of this genus are known for great ornamental and medicinal value. Owing to advances in sequencing technology, genomes and transcriptomes of members of the Rhododendron genus have been sequenced and published by various laboratories. With increasing amounts of omics data available, a centralized platform is necessary for effective storage, analysis, and integration of these large-scale datasets to ensure consistency, independence, and maintainability.

Results

Here, we report our development of the Rhododendron Plant Genome Database (RPGD; http://bioinfor.kib.ac.cn/RPGD/ ), which represents the first comprehensive database of Rhododendron genomics information. It includes large amounts of omics data, including genome sequence assemblies for R. delavayi, R. williamsianum, and R. simsii, gene expression profiles derived from public RNA-Seq data, functional annotations, gene families, transcription factor identification, gene homology, simple sequence repeats, and chloroplast genome. Additionally, many useful tools, including BLAST, JBrowse, Orthologous Groups, Genome Synteny Browser, Flanking Sequence Finder, Expression Heatmap, and Batch Download were integrated into the platform.

Conclusions

RPGD is designed to be a comprehensive and helpful platform for all Rhododendron researchers. Believe that RPGD will be an indispensable hub for Rhododendron studies.","hji,kes",1,1,2,2,1,NA,NA +34023905,"AnnotSV and knotAnnotSV: a web server for human structural variations annotations, ranking and analysis.","With the dramatic increase of pangenomic analysis, Human geneticists have generated large amount of genomic data including millions of small variants (SNV/indel) but also thousands of structural variations (SV) mainly from next-generation sequencing and array-based techniques. While the identification of the complete SV repertoire of a patient is getting possible, the interpretation of each SV remains challenging. To help identifying human pathogenic SV, we have developed a web server dedicated to their annotation and ranking (AnnotSV) as well as their visualization and interpretation (knotAnnotSV) freely available at the following address: https://www.lbgi.fr/AnnotSV/. A large amount of annotations from >20 sources is integrated in our web server including among others genes, haploinsufficiency, triplosensitivity, regulatory elements, known pathogenic or benign genomic regions, phenotypic data. An ACMG/ClinGen compliant prioritization module allows the scoring and the ranking of SV into 5 SV classes from pathogenic to benign. Finally, the visualization interface displays the annotated SV in an interactive way including popups, search fields, filtering options, advanced colouring to highlight pathogenic SV and hyperlinks to the UCSC genome browser or other public databases. This web server is designed for diagnostic and research analysis by providing important resources to the user.","hji,kes",1,1,2,2,1,unclear how or if users can access the underlying data,NA +34025933,AddictGene: An integrated knowledge base for differentially expressed genes associated with addictive substance.,"Addiction, a disorder of maladaptive brain plasticity, is associated with changes in numerous gene expressions. Nowadays, high-throughput sequencing data on addictive substance-induced gene expression have become widely available. A resource for comprehensive annotation of genes that show differential expression in response to commonly abused substances is necessary. So, we developed AddictGene by integrating gene expression, gene-gene interaction, gene-drug interaction and epigenetic regulatory annotation for over 70,156 items of differentially expressed genes associated with 7 commonly abused substances, including alcohol, nicotine, cocaine, morphine, heroin, methamphetamine, and amphetamine, across three species (human, mouse, rat). We also collected 1,141 addiction-related experimentally validated genes by techniques such as RT-PCR, northern blot and in situ hybridization. The easy-to-use web interface of AddictGene (http://159.226.67.237/sun/addictgedb/) allows users to search and browse multidimensional data on DEGs of their interest: 1) detailed gene-specific information extracted from the original studies; 2) basic information about the specific gene extracted from NCBI; 3) SNP associated with substance dependence and other psychiatry disorders; 4) expression alteration of specific gene in other psychiatric disorders; 5) expression patterns of interested gene across 31 primary and 54 secondary human tissues; 6) functional annotation of interested gene; 7) epigenetic regulators involved in the alteration of specific genes, including histone modifications and DNA methylation; 8) protein-protein interaction for functional linkage with interested gene; 9) drug-gene interaction for potential druggability. AddictGene offers a valuable repository for researchers to study the molecular mechanisms underlying addiction, and might provide valuable insights into potential therapies for drug abuse and relapse.","hji,kes",1,1,2,2,1,NA,NA +34025934,Computational modeling and bioinformatic analyses of functional mutations in drug target genes in Mycobacterium tuberculosis.,"Tuberculosis (TB) continues to be the leading cause of deaths due to its persistent drug resistance and the consequent ineffectiveness of anti-TB treatment. Recent years witnessed huge amount of sequencing data, revealing mutations responsible for drug resistance. However, the lack of an up-to-date repository remains a barrier towards utilization of these data and identifying major mutations-associated with resistance. Amongst all mutations, non-synonymous mutations alter the amino acid sequence of a protein and have a much greater effect on pathogenicity. Hence, this type of gene mutation is of prime interest of the present study. The purpose of this study is to develop an updated database comprising almost all reported substitutions within the Mycobacterium tuberculosis (M.tb) drug target genes rpoB, inhA, katG, pncA, gyrA and gyrB. Various bioinformatics prediction tools were used to assess the structural and biophysical impacts of the resistance causing non-synonymous single nucleotide polymorphisms (nsSNPs) at the molecular level. This was followed by evaluating the impact of these mutations on binding affinity of the drugs to target proteins. We have developed a comprehensive online resource named MycoTRAP-DB (Mycobacterium tuberculosis Resistance Associated Polymorphisms Database) that connects mutations in genes with their structural, functional and pathogenic implications on protein. This database is accessible at http://139.59.12.92. This integrated platform would enable comprehensive analysis and prioritization of SNPs for the development of improved diagnostics and antimycobacterial medications. Moreover, our study puts forward secondary mutations that can be important for prognostic assessments of drug-resistance mechanism and actionable anti-TB drugs.","hji,kes",1,1,2,2,1,NA,NA +34029142,Landscape of GPCR expression along the mouse nephron.,"Kidney transport and other renal functions are regulated by multiple G protein-coupled receptors (GPCRs) expressed along the renal tubule. The rapid, recent appearance of comprehensive unbiased gene expression data in the various renal tubule segments, chiefly RNA sequencing and protein mass spectrometry data, has provided a means of identifying patterns of GPCR expression along the renal tubule. To allow for comprehensive mapping, we first curated a comprehensive list of GPCRs in the genomes of mice, rats, and humans (https://hpcwebapps.cit.nih.gov/ESBL/Database/GPCRs/) using multiple online data sources. We used this list to mine segment-specific and cell type-specific expression data from RNA-sequencing studies in microdissected mouse tubule segments to identify GPCRs that are selectively expressed in discrete tubule segments. Comparisons of these mapped mouse GPCRs with other omics datasets as well as functional data from isolated perfused tubule and micropuncture studies confirmed patterns of expression for well-known receptors and identified poorly studied GPCRs that are likely to play roles in the regulation of renal tubule function. Thus, we provide data resources for GPCR expression across the renal tubule, highlighting both well-known GPCRs and understudied receptors to provide guidance for future studies.","hji,kes",1,1,2,2,1,NA,NA +34032471,The Human Salivary Proteome Wiki: A Community-Driven Research Platform.,"Saliva has become an attractive body fluid for on-site, remote, and real-time monitoring of oral and systemic health. At the same time, the scientific community needs a saliva-centered information platform that keeps pace with the rapid accumulation of new data and knowledge by annotating, refining, and updating the salivary proteome catalog. We developed the Human Salivary Proteome (HSP) Wiki as a public data platform for researching and retrieving custom-curated data and knowledge on the saliva proteome. The HSP Wiki is dynamically compiled and updated based on published saliva proteome studies and up-to-date protein reference records. It integrates a wide range of available information by funneling in data from established external protein, genome, transcriptome, and glycome databases. In addition, the HSP Wiki incorporates data from human disease-related studies. Users can explore the proteome of saliva simply by browsing the database, querying the available data, performing comparisons of data sets, and annotating existing protein entries using a simple, intuitive interface. The annotation process includes both user feedback and curator committee review to ensure the quality and validity of each entry. Here, we present the first overview of features and functions the HSP Wiki offers. As a saliva proteome-centric, publicly accessible database, the HSP Wiki will advance the knowledge of saliva composition and function in health and disease for users across a wide range of disciplines. As a community-based data- and knowledgebase, the HSP Wiki will serve as a worldwide platform to exchange salivary proteome information, inspire novel research ideas, and foster cross-discipline collaborations. The HSP Wiki will pave the way for harnessing the full potential of the salivary proteome for diagnosis, risk prediction, therapy of oral and systemic diseases, and preparedness for emerging infectious diseases.Database URL: https://salivaryproteome.nidcr.nih.gov/.","hji,kes",1,1,2,2,1,NA,NA +34034817,PhenCards: a data resource linking human phenotype information to biomedical knowledge.,"We present PhenCards ( https://phencards.org ), a database and web server intended as a one-stop shop for previously disconnected biomedical knowledge related to human clinical phenotypes. Users can query human phenotype terms or clinical notes. PhenCards obtains relevant disease/phenotype prevalence and co-occurrence, drug, procedural, pathway, literature, grant, and collaborator data. PhenCards recommends the most probable genetic diseases and candidate genes based on phenotype terms from clinical notes. PhenCards facilitates exploration of phenotype, e.g., which drugs cause or are prescribed for patient symptoms, which genes likely cause specific symptoms, and which comorbidities co-occur with phenotypes.","hji,kes",1,1,2,2,1,NA,NA +34038028,Using the PhenX Toolkit to Select Standard Measurement Protocols for Your Research Study.,"The goals of PhenX (consensus measures for Phenotypes and eXposures) are to promote the use of standard measurement protocols and to help investigators identify opportunities for collaborative research and cross-study analysis, thus increasing the impact of individual studies. The PhenX Toolkit (https://www.phenxtoolkit.org/) offers high-quality, well-established measurement protocols to assess phenotypes and exposures in studies with human participants. The Toolkit contains protocols representing 29 research domains and 6 specialty collections of protocols that add depth to the Toolkit in specific research areas (e.g., COVID-19, Social Determinants of Health [SDoH], Blood Sciences Research [BSR], Mental Health Research [MHR], Tobacco Regulatory Research [TRR], and Substance Abuse and Addiction [SAA]). Protocols are recommended for inclusion in the PhenX Toolkit by Working Groups of domain experts using a consensus process that includes input from the scientific community. For each PhenX protocol, the Toolkit provides a detailed description, the rationale for inclusion, and supporting documentation. Users can browse protocols in the Toolkit, search the Toolkit using keywords, or use Browse Protocols Tree to identify protocols of interest. The PhenX Toolkit provides data dictionaries compatible with the database of Genotypes and Phenotypes (dbGaP), Research Electronic Data Capture (REDCap) data submission compatibility, and data collection worksheets to help investigators incorporate PhenX protocols into their study design. The PhenX Toolkit provides resources to help users identify published studies that used PhenX protocols. © 2021 The Authors. Current Protocols published by Wiley Periodicals LLC. Basic Protocol: Using the PhenX Toolkit to support or extend study design.","hji,kes",0,1,1,2,0.5,data are measurement protocols related to life-sciences,"no notes; reassessed and still yes - under a broad definition, it is a collection of data, iffy" +34042771,Introducing a Platform for Integrating and Sharing Stem Cell Research Data.,"Advancements in regenerative medicine have highlighted the need for increased standardization and sharing of stem cell products to help drive these innovative interventions toward public availability and to increase collaboration in the scientific community. Although numerous attempts and numerous databases have been made to store this data, there is still a lack of a platform that incorporates heterogeneous stem cell information into a harmonized project-based framework. The aim of the platform described in this study, ReMeDy, is to provide an intelligent informatics solution which integrates diverse stem cell product characteristics with study subject and omics information. In the resulting platform, heterogeneous data is validated using predefined ontologies and stored in a relational database. In this initial feasibility study, testing of the ReMeDy functionality was performed using published, publically-available induced pluripotent stem cell projects conducted in in vitro, preclinical and intervention evaluations. It demonstrated the robustness of ReMeDy for storing diverse iPSC data, by seamlessly harmonizing diverse common data elements, and the potential utility of this platform for driving knowledge generation from the aggregation of this shared data. Next steps include increasing the number of curated projects by developing a crowdsourcing framework for data upload and an automated pipeline for metadata abstraction. The database is publically accessible at https://remedy.mssm.edu/.","hji,kes",1,1,2,2,1,NA,NA +34046592,A map of the SARS-CoV-2 RNA structurome.,"SARS-CoV-2 has exploded throughout the human population. To facilitate efforts to gain insights into SARS-CoV-2 biology and to target the virus therapeutically, it is essential to have a roadmap of likely functional regions embedded in its RNA genome. In this report, we used a bioinformatics approach, ScanFold, to deduce the local RNA structural landscape of the SARS-CoV-2 genome with the highest likelihood of being functional. We recapitulate previously-known elements of RNA structure and provide a model for the folding of an essential frameshift signal. Our results find that SARS-CoV-2 is greatly enriched in unusually stable and likely evolutionarily ordered RNA structure, which provides a large reservoir of potential drug targets for RNA-binding small molecules. Results are enhanced via the re-analyses of publicly-available genome-wide biochemical structure probing datasets that are broadly in agreement with our models. Additionally, ScanFold was updated to incorporate experimental data as constraints in the analysis to facilitate comparisons between ScanFold and other RNA modelling approaches. Ultimately, ScanFold was able to identify eight highly structured/conserved motifs in SARS-CoV-2 that agree with experimental data, without explicitly using these data. All results are made available via a public database (the RNAStructuromeDB: https://structurome.bb.iastate.edu/sars-cov-2) and model comparisons are readily viewable at https://structurome.bb.iastate.edu/sars-cov-2-global-model-comparisons.","hji,kes",1,1,2,2,1,NA,NA +34048545,"dbGENVOC: database of GENomic Variants of Oral Cancer, with special reference to India.","Oral cancer is highly prevalent in India and is the most frequent cancer type among Indian males. It is also very common in southeast Asia. India has participated in the International Cancer Genome Consortium (ICGC) and some national initiatives to generate large-scale genomic data on oral cancer patients and analyze to identify associations and systematically catalog the associated variants. We have now created an open, web-accessible database of these variants found significantly associated with Indian oral cancer patients, with a user-friendly interface to enable easy mining. We have value added to this database by including relevant data collated from various sources on other global populations, thereby providing opportunities of comparative geographical and/or ethnic analyses. Currently, no other database of similar nature is available on oral cancer. We have developed Database of GENomic Variants of Oral Cancer, a browsable online database framework for storage, retrieval and analysis of large-scale data on genomic variants and make it freely accessible to the scientific community. Presently, the web-accessible database allows potential users to mine data on ∼24 million clinically relevant somatic and germline variants derived from exomes (n = 100) and whole genomes (n = 5) of Indian oral cancer patients; all generated by us. Variant data from The Cancer Genome Atlas and data manually curated from peer-reviewed publications were also incorporated into the database for comparative analyses. It allows users to query the database by a single gene, multiple genes, multiple variant sites, genomic region, patient ID and pathway identities. Database URL: http://research.nibmg.ac.in/dbcares/dbgenvoc/.","hji,kes",1,1,2,2,1,NA,NA +34048547,emiRIT: a text-mining-based resource for microRNA information.,"microRNAs (miRNAs) are essential gene regulators, and their dysregulation often leads to diseases. Easy access to miRNA information is crucial for interpreting generated experimental data, connecting facts across publications and developing new hypotheses built on previous knowledge. Here, we present extracting miRNA Information from Text (emiRIT), a text-miningbased resource, which presents miRNA information mined from the literature through a user-friendly interface. We collected 149 ,233 miRNA -PubMed ID pairs from Medline between January 1997 and May 2020. emiRIT currently contains 'miRNA -gene regulation' (69 ,152 relations), 'miRNA disease (cancer)' (12 ,300 relations), 'miRNA -biological process and pathways' (23, 390 relations) and circulatory 'miRNAs in extracellular locations' (3782 relations). Biological entities and their relation to miRNAs were extracted from Medline abstracts using publicly available and in-house developed text-mining tools, and the entities were normalized to facilitate querying and integration. We built a database and an interface to store and access the integrated data, respectively. We provide an up-to-date and user-friendly resource to facilitate access to comprehensive miRNA information from the literature on a large scale, enabling users to navigate through different roles of miRNA and examine them in a context specific to their information needs. To assess our resource's information coverage, we have conducted two case studies focusing on the target and differential expression information of miRNAs in the context of cancer and a third case study to assess the usage of emiRIT in the curation of miRNA information. Database URL: https://research.bioinformatics.udel.edu/emirit/.","hji,kes",1,1,2,2,1,NA,Chuck Check - yes +34048576,The COVID-19 Data Portal: accelerating SARS-CoV-2 and COVID-19 research through rapid open access data sharing.,"The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic will be remembered as one of the defining events of the 21st century. The rapid global outbreak has had significant impacts on human society and is already responsible for millions of deaths. Understanding and tackling the impact of the virus has required a worldwide mobilisation and coordination of scientific research. The COVID-19 Data Portal (https://www.covid19dataportal.org/) was first released as part of the European COVID-19 Data Platform, on April 20th 2020 to facilitate rapid and open data sharing and analysis, to accelerate global SARS-CoV-2 and COVID-19 research. The COVID-19 Data Portal has fortnightly feature releases to continue to add new data types, search options, visualisations and improvements based on user feedback and research. The open datasets and intuitive suite of search, identification and download services, represent a truly FAIR (Findable, Accessible, Interoperable and Reusable) resource that enables researchers to easily identify and quickly obtain the key datasets needed for their COVID-19 research.","hji,kes",1,1,2,2,1,NA,NA +34052284,miREV: An Online Database and Tool to Uncover Potential Reference RNAs and Biomarkers in Small-RNA Sequencing Data Sets from Extracellular Vesicles Enriched Samples.,"Extracellular vesicles (EVs) are nano-sized, membrane-enclosed vesicles released by cells for intercellular communication. EVs are involved in pathological processes and miRNAs in EVs have gained interest as easily accessible biomolecules in liquid biopsies for diagnostic purposes. To validate potential miRNA biomarker, transcriptome analyses must be carried out to detect suitable reference miRNAs. miREV is a database with over 400 miRNA sequencing data sets and helps the researcher to find suitable reference miRNAs for their individual experimental setup. The researcher can put together a specific sample set in miREV, which is similar to his own experimental concept in order to find the most suitable references. This allows to run validation experiments without having to carry out a complex and costly transcriptome analysis priorly. Additional read count tables of each generated sample set are downloadable for further analysis. miREV is freely available at https://www.physio.wzw.tum.de/mirev/.","hji,kes",1,1,2,2,1,NA,NA +34080131,"Preclinical Western Blot in the Era of Digital Transformation and Reproducible Research, an Eastern Perspective.","The current research is an interdisciplinary endeavor to develop a necessary tool in preclinical protein studies of diseases or disorders through western blotting. In the era of digital transformation and open access principles, an interactive cloud-based database called East-West Blot ( https://rancs-lab.shinyapps.io/WesternBlots ) is designed and developed. The online interactive subject-specific database built on the R shiny platform facilitates a systematic literature search on the specific subject matter, here set to western blot studies of protein regulation in the preclinical model of TBI. The tool summarizes the existing publicly available knowledge through a data visualization technique and easy access to the critical data elements and links to the study itself. The application compiled a relational database of PubMed-indexed western blot studies labeled under HHS public access, reporting downstream protein regulations presented by fluid percussion injury model of traumatic brain injury. The promises of the developed tool include progressing toward implementing the principles of 3Rs (replacement, reduction, and refinement) for humane experiments, cultivating the prerequisites of reproducible research in terms of reporting characteristics, paving the ways for a more collaborative experimental design in basic science, and rendering an up-to-date and summarized perspective of current publicly available knowledge.","hji,kes",1,1,2,2,1,NA,Chuck Check- yes +34081565,Biomedical Entity Explorer: A Web Server for Biomedical Entity Exploration.,"Biomedical Entity Explorer (BEE) is a web server that can search for biomedical entities from a database of six biomedical entity types (gene, miRNA, drug, disease, single nucleotide polymorphism [SNP], pathway) and their gene associations. The search results can be explored using intersections, unions, and negations. BEE has integrated biomedical entities from 16 databases (Ensemble, PharmGKB, Genetic Home Reference, Tarbase, Mirbase, NCI Thesaurus, DisGeNET, Linked life data, UMLS, GSEA MsigDB, Reactome, KEGG, Gene Ontology, HGVD, SNPedia, and dbSNP) based on their gene associations and built a database with their synonyms, descriptions, and links containing individual details. Users can enter the keyword of one or more entities and select the type of entity for which they want to know the relationship for and by using set operations such as union, negation, and intersection, they can navigate the search results more clearly. We believe that BEE will not only be useful for biologists querying for complex associations between entities, but can also be a good starting point for general users searching for biomedical entities. BEE is accessible at (http://bike-bee.snu.ac.kr).","hji,kes",1,1,2,2,1,NA,knowlegbase +34085038,"EyeDiseases: an integrated resource for dedicating to genetic variants, gene expression and epigenetic factors of human eye diseases.","Eye diseases are remarkably common and encompass a large and diverse range of morbidities that affect different components of the visual system and visual function. With advances in omics technology of eye disorders, genome-scale datasets have been rapidly accumulated in genetics and epigenetics field. However, the efficient collection and comprehensive analysis of different kinds of omics data are lacking. Herein, we developed EyeDiseases (https://eyediseases.bio-data.cn/), the first database for multi-omics data integration and interpretation of human eyes diseases. It contains 1344 disease-associated genes with genetic variation, 1774 transcription files of bulk cell expression and single-cell RNA-seq, 105 epigenomics data across 185 kinds of human eye diseases. Using EyeDiseases, we investigated SARS-CoV-2 potential tropism in eye infection and found that the SARS-CoV-2 entry factors, ACE2 and TMPRSS2 are highly correlated with cornea and keratoconus, suggest that ocular surface cells are susceptible to infection by SARS-CoV-2. Additionally, integrating analysis of Age-related macular degeneration (AMD) GWAS loci and co-expression data revealed 9 associated genes involved in HIF-1 signaling pathway and voltage-gate potassium channel complex. The EyeDiseases provides a valuable resource for accelerating the discovery and validation of candidate loci and genes contributed to the molecular diagnosis and therapeutic vulnerabilities with various eyes diseases.","hji,kes",1,1,2,2,1,NA,NA +34104972,eSkip-Finder: a machine learning-based web application and database to identify the optimal sequences of antisense oligonucleotides for exon skipping.,"Exon skipping using antisense oligonucleotides (ASOs) has recently proven to be a powerful tool for mRNA splicing modulation. Several exon-skipping ASOs have been approved to treat genetic diseases worldwide. However, a significant challenge is the difficulty in selecting an optimal sequence for exon skipping. The efficacy of ASOs is often unpredictable, because of the numerous factors involved in exon skipping. To address this gap, we have developed a computational method using machine-learning algorithms that factors in many parameters as well as experimental data to design highly effective ASOs for exon skipping. eSkip-Finder (https://eskip-finder.org) is the first web-based resource for helping researchers identify effective exon skipping ASOs. eSkip-Finder features two sections: (i) a predictor of the exon skipping efficacy of novel ASOs and (ii) a database of exon skipping ASOs. The predictor facilitates rapid analysis of a given set of exon/intron sequences and ASO lengths to identify effective ASOs for exon skipping based on a machine learning model trained by experimental data. We confirmed that predictions correlated well with in vitro skipping efficacy of sequences that were not included in the training data. The database enables users to search for ASOs using queries such as gene name, species, and exon number.","hji,kes",1,1,2,2,1,NA,NA +34107869,PINIR: a comprehensive information resource for Pin-II type protease inhibitors.,"

Background

Serine protease inhibitors belonging to the Potato type-II Inhibitor family Protease Inhibitors (Pin-II type PIs) are essential plant defense molecules. They are characterized by multiple inhibitory repeat domains, conserved disulfide bond pattern, and a tripeptide reactive center loop. These features of Pin-II type PIs make them potential molecules for protein engineering and designing inhibitors for agricultural and therapeutic applications. However, the diversity in these PIs remains unexplored due to the lack of annotated protein sequences and their functional attributes in the available databases.

Results

We have developed a database, PINIR (Pin-II type PIs Information Resource), by systematic collection and manual annotation of 415 Pin-II type PI protein sequences. For each PI, the number and position for signature sequences are specified: 695 domains, 75 linkers, 63 reactive center loops, and 10 disulfide bond patterns are identified and mapped. Database analysis revealed novel subcategories of PIs, species-correlated occurrence of inhibitory domains, reactive center loops, and disulfide bond patterns. By analyzing linker regions, we predict that alternative processing at linker regions could generate PI variants in the Solanaceae family.

Conclusion

PINIR ( https://pinir.ncl.res.in ) provides a web interface for browsing and analyzing the protein sequences of Pin-II type PIs. Information about signature sequences, spatio-temporal expression, biochemical properties, gene sequences, and literature references are provided. Analysis of PINIR depicts conserved species-specific features of Pin-II type PI protein sequences. Diversity in the sequence of inhibitory domains and reactive loops directs potential applications to engineer Pin-II type PIs. The PINIR database will serve as a comprehensive information resource for further research into Pin-II type PIs.","hji,kes",1,1,2,2,1,NA,NA +34113986,Tfcancer: a manually curated database of transcription factors associated with human cancers.,"

Summary

Transcription factors (TFs) are critical regulation elements and its dysregulation can lead to a variety of cancers. However, currently, there are no such online resources for large-scale collection, storage and analysis of TF-cancer associations in those cancers. To fill this gap, we present a database called TFcancer (http://lcbb.swjtu.edu.cn/tfcancer/), which contains 3136 experimentally supported associations between 364 TFs and 33 TCGA cancers by manually curating more than 1800 literature. TFcancer mainly concentrates on four aspects: TF expression, molecular alteration, regulatory relationships between TFs and target genes, and biological processes and signaling pathways of TFs in cancers. TFcancer not only provides a user-friendly interface for browsing and searching but also allows flexible data downloading and user data submitting. It is believed that TFcancer is a helpful and valuable resource for researchers who seek to understand the functions and molecular mechanisms of TFs involved in human cancers.

Availability and implementation

The TFcancer are freely available at http://lcbb.swjtu.edu.cn/tfcancer/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +34120586,NUCOME: A comprehensive database of nucleosome organization referenced landscapes in mammalian genomes.,"

Background

Nucleosome organization is involved in many regulatory activities in various organisms. However, studies integrating nucleosome organization in mammalian genomes are very limited mainly due to the lack of comprehensive data quality control (QC) assessment and uneven data quality of public data sets.

Results

The NUCOME is a database focused on filtering qualified nucleosome organization referenced landscapes covering various cell types in human and mouse based on QC metrics. The filtering strategy guarantees the quality of nucleosome organization referenced landscapes and exempts users from redundant data set selection and processing. The NUCOME database provides standardized, qualified data source and informative nucleosome organization features at a whole-genome scale and on the level of individual loci.

Conclusions

The NUCOME provides valuable data resources for integrative analyses focus on nucleosome organization. The NUCOME is freely available at http://compbio-zhanglab.org/NUCOME .","hji,kes",1,1,2,2,1,NA,NA +34125008,"The ""second wave"" of the COVID-19 pandemic in the Arctic: regional and temporal dynamics.","This article focuses on the ""second wave"" of the COVID-19 pandemic in the Arctic and examines spatiotemporal patterns between July 2020 and January 2021. We analyse available COVID-19 data at the regional (subnational) level to elucidate patterns and typology of Arctic regions with respect to the COVID-19 pandemic. This article builds upon our previous research that examined the early phase of the COVID-19 pandemic between February and July 2020. The pandemic's ""second wave"" observed in the Arctic between September 2020 and January 2021 was severe in terms of COVID-19 infections and fatalities, having particularly strong impacts in Alaska, Northern Russia and Northern Sweden. Based on the spatiotemporal patterns of the ""second wave"" dynamics, we identified 5 types of the pandemic across regions: Shockwaves (Iceland, Faroe Islands, Northern Norway, and Northern Finland), Protracted Waves (Northern Sweden), Tidal Waves (Northern Russia), Tsunami Waves (Alaska), and Isolated Splashes (Northern Canada and Greenland). Although data limitations and gaps persist, monitoring of COVID-19 is critical for developing a proper understanding of the pandemic in order to develop informed and effective responses to the current crisis and possible future pandemics in the Arctic. Data used in this paper are available at https://arctic.uni.edu/arctic-covid-19.","hji,kes",0,0,0,2,0,NA,"vis/dashboard; assessed and re-scored - says data available but abstract is otherwise describing viz, tricky abstract" +34127402,Immu-Mela: An open resource for exploring immunotherapy-related multidimensional genomic profiles in melanoma.,"There are increasing studies aimed to reveal genomic hallmarks predictive of immune checkpoint blockade (ICB) treatment response, which generated a large number of data and provided an unprecedented opportunity to identify response-related features and evaluate their robustness across cohorts. However, those valuable data sets are not easily accessible to the research community. To take full advantage of existing large-scale immuno-genomic profiles, we developed Immu-Mela (http://bioinfo.vanderbilt.edu/database/Immu-Mela/), a multidimensional immuno-genomic portal that provides interactive exploration of associations between ICB responsiveness and multi-omics features in melanoma, including genetic, transcriptomics, immune cells, and single-cell populations. Immu-Mela also enables integrative analysis of any two genomic features. We demonstrated the value of Immu-Mela by identifying known and novel genomic features associated with ICB response. In addition, Immu-Mela allows users to upload their data sets (unrestricted to any cancer types) and co-analyze with existing data to identify and validate signatures of interest. Immu-Mela reduces barriers between researchers and complex genomic data, facilitating discoveries in cancer immunotherapy.","hji,kes",1,1,2,2,1,NA,NA +34147352,MPSBase: Comprehensive repository of differentially expressed genes for mucopolysaccharidoses.,"Mucopolysaccharidoses (MPS) are lysosomal storage diseases (LSDs) caused by the deficiency of enzymes essential for the metabolism of extracellular matrix components called glycosaminoglycans (GAGs). To understand the physiopathology and alterations due to the lysosomal accumulation resulting from enzymatic deficiencies and their secondary outcomes can improve the diagnosis and treatment of rare genetic diseases. This work presents a database for differentially expressed genes from different public MPS data. We developed our database, including 13 studies previously deposited in the GEO (https://www.ncbi.nlm.nih.gov/geo/). The website is hosted in the UFRGS data processing center (CPD) and is available at . The site was constructed in PHP, and the analyses were performed in R. The organisms represented by the datasets are Canis lupus familiaris, Homo sapiens, Mus musculus, and Rattus norvegicus. The user can search for the differentially expressed genes and ontologies by species, MPS type, or tissue type. For each comparison, a heatmap with the 50 top differentially expressed genes is available as well as dot plots for the 30 top ontologies divided by biological process, cellular component, KEGG pathways, and molecular function. This data is also fully available in tables. There are 54 possible comparisons involving about 5000 to 10,000 genes each. This website is the only specific database for MPS with filtering and presenting their results in a one-click approach to the best of our knowledge. The development of such analytical and automated strategies accessible to health professionals is essential for fostering MPS research. The MPSBase is a web user-friendly, comprehensive repository of differentially expressed genes and ontologies regarding the MPS data.","hji,kes",1,1,2,2,1,NA,NA +34156446,MetamORF: a repository of unique short open reading frames identified by both experimental and computational approaches for gene and metagene analyses.,"The development of high-throughput technologies revealed the existence of non-canonical short open reading frames (sORFs) on most eukaryotic ribonucleic acids. They are ubiquitous genetic elements conserved across species and suspected to be involved in numerous cellular processes. MetamORF (https://metamorf.hb.univ-amu.fr/) aims to provide a repository of unique sORFs identified in the human and mouse genomes with both experimental and computational approaches. By gathering publicly available sORF data, normalizing them and summarizing redundant information, we were able to identify a total of 1‚Äâ162‚Äâ675 unique sORFs. Despite the usual characterization of ORFs as short, upstream or downstream, there is currently no clear consensus regarding the definition of these categories. Thus, the data have been reprocessed using a normalized nomenclature. MetamORF enables new analyses at locus, gene, transcript and ORF levels, which should offer the possibility to address new questions regarding sORF functions in the future. The repository is available through an user-friendly web interface, allowing easy browsing, visualization, filtering over multiple criteria and export possibilities. sORFs can be searched starting from a gene, a transcript and an ORF ID, looking in a genome area or browsing the whole repository for a species. The database content has also been made available through track hubs at UCSC Genome Browser. Finally, we demonstrated an enrichment of genes harboring upstream ORFs among genes expressed in response to reticular stress. Database URL https://metamorf.hb.univ-amu.fr/.","hji,kes",1,1,2,2,1,NA,NA +34164644,HFBD: a biomarker knowledge database for heart failure heterogeneity and personalized applications.,"

Motivation

Heart failure (HF) is a cardiovascular disease with a high incidence around the world. Accumulating studies have focused on the identification of biomarkers for HF precision medicine. To understand the HF heterogeneity and provide biomarker information for the personalized diagnosis and treatment of HF, a knowledge database collecting the distributed and multiple-level biomarker information is necessary.

Results

In this study, the HF biomarker knowledge database (HFBD) was established by manually collecting the data and knowledge from literature in PubMed. HFBD contains 2618 records and 868 HF biomarkers (731 single and 137 combined) extracted from 1237 original articles. The biomarkers were classified into proteins, RNAs, DNAs, and the others at molecular, image, cellular and physiological levels. The biomarkers were annotated with biological, clinical and article information as well as the experimental methods used for the biomarker discovery. With its user-friendly interface, this knowledge database provides a unique resource for the systematic understanding of HF heterogeneity and personalized diagnosis and treatment of HF in the era of precision medicine.

Availability

The platform is openly available at http://sysbio.org.cn/HFBD/.","hji,kes",1,1,2,2,1,NA,NA +34167460,A comprehensive database for integrated analysis of omics data in autoimmune diseases.,"

Background

Autoimmune diseases are heterogeneous pathologies with difficult diagnosis and few therapeutic options. In the last decade, several omics studies have provided significant insights into the molecular mechanisms of these diseases. Nevertheless, data from different cohorts and pathologies are stored independently in public repositories and a unified resource is imperative to assist researchers in this field.

Results

Here, we present Autoimmune Diseases Explorer ( https://adex.genyo.es ), a database that integrates 82 curated transcriptomics and methylation studies covering 5609 samples for some of the most common autoimmune diseases. The database provides, in an easy-to-use environment, advanced data analysis and statistical methods for exploring omics datasets, including meta-analysis, differential expression or pathway analysis.

Conclusions

This is the first omics database focused on autoimmune diseases. This resource incorporates homogeneously processed data to facilitate integrative analyses among studies.","hji,kes",1,1,2,2,1,NA,NA +34169314,KMDATA: a curated database of reconstructed individual patient-level data from 153 oncology clinical trials.,"We created a database of reconstructed patient-level data from published clinical trials that includes multiple time-to-event outcomes such as overall survival and progression-free survival. Outcomes were extracted from Kaplan-Meier (KM) curves reported in 153 oncology Phase III clinical trial publications identified through a PubMed search of clinical trials in breast, lung, prostate and colorectal cancer, published between 2014 and 2016. For each trial that met our search criteria, we curated study-level information and digitized all reported KM curves with the software Digitizelt. We then used the digitized KM survival curves to estimate (possibly censored) patient-level time-to-event outcomes. Collections of time-to-event datasets from completed trials can be used to support the choice of appropriate trial designs for future clinical studies. Patient-level data allow investigators to tailor clinical trial designs to diseases and classes of treatments. Patient-level data also allow investigators to estimate the operating characteristics (e.g. power and type I error rate) of candidate statistical designs and methods. Database URL: https://10.6084/m9.figshare.14642247.v1.","hji,kes",1,1,2,2,1,NA,NA +34174131,CanVaS: Documenting the genetic variation spectrum of Greek cancer patients.,"National genetic variation registries vastly increase the level of detail for the relevant population, while directly affecting patient management. Herein, we report CanVaS, a Cancer Variation reSource aiming to document the genetic variation of cancer patients in Greece. CanVaS comprises germline genetic data from 7,363 Greek individuals with a personal and/or family history of malignancy. The data set incorporates approximately 24,000 functionally annotated rare variants in 97 established or suspected cancer susceptibility genes. For each variant, allele frequency for the Greek population, interpretation for clinical significance, anonymized family and segregation information, as well as phenotypic traits of the carriers, are included. Moreover, information on the geographic distribution of the variants across the country is provided, enabling the study of Greek population isolates. Direct comparisons between Greek (sub)populations with relevant genetic resources are supported, allowing fine-grain localized adjustment of guidelines and clinical decision-making. Most importantly, anonymized data are available for download, while the Leiden Open Variation Database schema is adopted, enabling integration/interconnection with central resources. CanVaS could become a stepping-stone for a countrywide effort to characterize the cancer genetic variation landscape, concurrently supporting national and international cancer research. The database can be accessed at: http://ithaka.rrp.demokritos.gr/CanVaS.","hji,kes",1,1,2,2,1,NA,NA +34174819,UniBind: maps of high-confidence direct TF-DNA interactions across nine species.,"

Background

Transcription factors (TFs) bind specifically to TF binding sites (TFBSs) at cis-regulatory regions to control transcription. It is critical to locate these TF-DNA interactions to understand transcriptional regulation. Efforts to predict bona fide TFBSs benefit from the availability of experimental data mapping DNA binding regions of TFs (chromatin immunoprecipitation followed by sequencing - ChIP-seq).

Results

In this study, we processed ~‚Äâ10,000 public ChIP-seq datasets from nine species to provide high-quality TFBS predictions. After quality control, it culminated with the prediction of ~‚Äâ56 million TFBSs with experimental and computational support for direct TF-DNA interactions for 644 TFs in >‚Äâ1000 cell lines and tissues. These TFBSs were used to predict >‚Äâ197,000 cis-regulatory modules representing clusters of binding events in the corresponding genomes. The high-quality of the TFBSs was reinforced by their evolutionary conservation, enrichment at active cis-regulatory regions, and capacity to predict combinatorial binding of TFs. Further, we confirmed that the cell type and tissue specificity of enhancer activity was correlated with the number of TFs with binding sites predicted in these regions. All the data is provided to the community through the UniBind database that can be accessed through its web-interface ( https://unibind.uio.no/ ), a dedicated RESTful API, and as genomic tracks. Finally, we provide an enrichment tool, available as a web-service and an R package, for users to find TFs with enriched TFBSs in a set of provided genomic regions.

Conclusions

UniBind is the first resource of its kind, providing the largest collection of high-confidence direct TF-DNA interactions in nine species.","hji,kes",1,1,2,2,1,NA,NA +34175476,Genome Warehouse: A Public Repository Housing Genome-scale Data.,"The Genome Warehouse (GWH) is a public repository housing genome assembly data for a wide range of species and delivering a series of web services for genome data submission, storage, release, and sharing. As one of the core resources in the National Genomics Data Center (NGDC), part of the China National Center for Bioinformation (CNCB, https://ngdc.cncb.ac.cn), GWH accepts both full genome and partial genome (chloroplast, mitochondrion, and plasmid) sequences with different assembly levels, as well as an update of existing genome assemblies. For each assembly, GWH collects detailed genome-related metadata of biological project, biological sample, and genome assembly, in addition to genome sequence and annotation. To archive high-quality genome sequences and annotations, GWH is equipped with a uniform and standardized procedure for quality control. Besides basic browse and search functionalities, all released genome sequences and annotations can be visualized with JBrowse. By May 21, 2021, GWH has received 19,124 direct submissions covering a diversity of 1108 species and has released 8772 of them. Collectively, GWH serves as an important resource for genome-scale data management and provides free and publicly accessible data to support research activities throughout the world. GWH is publicly accessible at https://ngdc.cncb.ac.cn/gwh.","hji,kes",1,1,2,2,1,NA,NA +34178036,RHIVDB: A Freely Accessible Database of HIV Amino Acid Sequences and Clinical Data of Infected Patients.,"Human immunodeficiency virus (HIV) infection remains one of the most severe problems for humanity, particularly due to the development of HIV resistance. To evaluate an association between viral sequence data and drug combinations and to estimate an effect of a particular drug combination on the treatment results, collection of the most representative drug combinations used to cure HIV and the biological data on amino acid sequences of HIV proteins is essential. We have created a new, freely available web database containing 1,651 amino acid sequences of HIV structural proteins [reverse transcriptase (RT), protease (PR), integrase (IN), and envelope protein (ENV)], treatment history information, and CD4+ cell count and viral load data available by the user's query. Additionally, the biological data on new HIV sequences and treatment data can be stored in the database by any user followed by an expert's verification. The database is available on the web at http://www.way2drug.com/rhivdb.","hji,kes",1,1,2,2,1,NA,NA +34214659,An inferred functional impact map of genetic variants in rice.,"Interpreting the functional impacts of genetic variants (GVs) is an important challenge for functional genomic studies in crops and next-generation breeding. Currently, studies in rice (Oryza sativa) have mainly focused on the identification of GVs, while the functional annotation of GVs has not yet been systematically carried out. Here we present a functional impact map of GVs in rice. We curated haplotype information of 17,397,026 GVs from sequencing data of 4,726 rice accessions. We quantitatively evaluated the effects of missense mutations in coding regions in each haplotype based on the conservation of amino acid residues and obtained the effects of 918,848 non-redundant missense GVs. We also generated high-quality chromatin accessibility (CA) data from six representative rice tissues and used these data to train deep convolutional neural network models to predict the impacts of 5,067,405 GVs for CA in regulatory regions. We characterized the functional properties and tissue specificity of the effects of GVs and found that large-effect GVs in coding and regulatory regions might be subject to selection in different directions. We finally demonstrated how the functional impact map could be used to prioritize the causal variants in mapping populations. This impact map will be a useful resource for accelerating gene cloning and functional studies in rice and can be freely queried in RiceVarMap V2.0 (http://ricevarmap.ncpgr.cn).","hji,kes",1,1,2,2,1,NA,NA +34220930,"The FAANG Data Portal: Global, Open-Access, ""FAIR"", and Richly Validated Genotype to Phenotype Data for High-Quality Functional Annotation of Animal Genomes.","The Functional Annotation of ANimal Genomes (FAANG) project is a worldwide coordinated action creating high-quality functional annotation of farmed and companion animal genomes. The generation of a rich genome-to-phenome resource and supporting informatic infrastructure advances the scope of comparative genomics and furthers the understanding of functional elements. The project also provides terrestrial and aquatic animal agriculture community powerful resources for supporting improvements to farmed animal production, disease resistance, and genetic diversity. The FAANG Data Portal (https://data.faang.org) ensures Findable, Accessible, Interoperable and Reusable (FAIR) open access to the wealth of sample, sequencing, and analysis data produced by an ever-growing number of FAANG consortia. It is developed and maintained by the FAANG Data Coordination Centre (DCC) at the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI). FAANG projects produce a standardised set of multi-omic assays with resulting data placed into a range of specialised open data archives. To ensure this data is easily findable and accessible by the community, the portal automatically identifies and collates all submitted FAANG data into a single easily searchable resource. The Data Portal supports direct download from the multiple underlying archives to enable seamless access to all FAANG data from within the portal itself. The portal provides a range of predefined filters, powerful predictive search, and a catalogue of sampling and analysis protocols and automatically identifies publications associated with any dataset. To ensure all FAANG data submissions are high-quality, the portal includes powerful contextual metadata validation and data submissions brokering to the underlying EMBL-EBI archives. The portal will incorporate extensive new technical infrastructure to effectively deliver and standardise FAANG's shift to single-cellomics, cell atlases, pangenomes, and novel phenotypic prediction models. The Data Portal plays a key role for FAANG by supporting high-quality functional annotation of animal genomes, through open FAIR sharing of data, complete with standardised rich metadata. Future Data Portal features developed by the DCC will support new technological developments for continued improvement for FAANG projects.","hji,kes",1,1,2,2,1,NA,NA +34225788,A global overview of genetically interpretable multimorbidities among common diseases in the UK Biobank.,"

Background

Multimorbidities greatly increase the global health burdens, but the landscapes of their genetic risks have not been systematically investigated.

Methods

We used the hospital inpatient data of 385,335 patients in the UK Biobank to investigate the multimorbid relations among 439 common diseases. Post-GWAS analyses were performed to identify multimorbidity shared genetic risks at the genomic loci, network, as well as overall genetic architecture levels. We conducted network decomposition for the networks of genetically interpretable multimorbidities to detect the hub diseases and the involved molecules and functions in each module.

Results

In total, 11,285 multimorbidities among 439 common diseases were identified, and 46% of them were genetically interpretable at the loci, network, or overall genetic architecture levels. Multimorbidities affecting the same and different physiological systems displayed different patterns of the shared genetic components, with the former more likely to share loci-level genetic components while the latter more likely to share network-level genetic components. Moreover, both the loci- and network-level genetic components shared by multimorbidities converged on cell immunity, protein metabolism, and gene silencing. Furthermore, we found that the genetically interpretable multimorbidities tend to form network modules, mediated by hub diseases and featuring physiological categories. Finally, we showcased how hub diseases mediating the multimorbidity modules could help provide useful insights for the genetic contributors of multimorbidities.

Conclusions

Our results provide a systematic resource for understanding the genetic predispositions of multimorbidities and indicate that hub diseases and converged molecules and functions may be the key for treating multimorbidities. We have created an online database that facilitates researchers and physicians to browse, search, or download these multimorbidities ( https://multimorbidity.comp-sysbio.org ).","hji,kes",1,1,2,2,1,NA,NA +34013642,Efficient study design to estimate population means with multiple measurement instruments.,"Outcomes from studies assessing exposure often use multiple measurements. In previous work, using a model first proposed by Buonoccorsi (1991), we showed that combining direct (eg, biomarkers) and indirect (eg, self-report) measurements provides a more accurate picture of true exposure than estimates obtained when using a single type of measurement. In this article, we propose a tool for efficient design of studies that include both direct and indirect measurements of a relevant outcome. Based on data from a pilot or preliminary study, the tool, which is available online as a shiny app at https://michalbitan.shinyapps.io/shinyApp/, can be used to compute: (1) the sample size required for a statistical power analysis, while optimizing the percent of participants who should provide direct measures of exposure (biomarkers) in addition to the indirect (self-report) measures provided by all participants; (2) the ideal number of replicates; and (3) the allocation of resources to intervention and control arms. In addition we show how to examine the sensitivity of results to underlying assumptions. We illustrate our analysis using studies of tobacco smoke exposure and nutrition. In these examples, a near-optimal allocation of the resources can be found even if the assumptions are not precise.","hji,kes",0,0,0,2,0,software,NA +34015821,ProtCHOIR: a tool for proteome-scale generation of homo-oligomers.,"The rapid developments in gene sequencing technologies achieved in the recent decades, along with the expansion of knowledge on the three-dimensional structures of proteins, have enabled the construction of proteome-scale databases of protein models such as the Genome3D and ModBase. Nevertheless, although gene products are usually expressed as individual polypeptide chains, most biological processes are associated with either transient or stable oligomerisation. In the PDB databank, for example, ~40% of the deposited structures contain at least one homo-oligomeric interface. Unfortunately, databases of protein models are generally devoid of multimeric structures. To tackle this particular issue, we have developed ProtCHOIR, a tool that is able to generate homo-oligomeric structures in an automated fashion, providing detailed information for the input protein and output complex. ProtCHOIR requires input of either a sequence or a protomeric structure that is queried against a pre-constructed local database of homo-oligomeric structures, then extensively analyzed using well-established tools such as PSI-Blast, MAFFT, PISA and Molprobity. Finally, MODELLER is employed to achieve the construction of the homo-oligomers. The output complex is thoroughly analyzed taking into account its stereochemical quality, interfacial stabilities, hydrophobicity and conservation profile. All these data are then summarized in a user-friendly HTML report that can be saved or printed as a PDF file. The software is easily parallelizable and also outputs a comma-separated file with summary statistics that can straightforwardly be concatenated as a spreadsheet-like document for large-scale data analyses. As a proof-of-concept, we built oligomeric models for the Mabellini Mycobacterium abscessus structural proteome database. ProtCHOIR can be run as a web-service and the code can be obtained free-of-charge at http://lmdm.biof.ufrj.br/protchoir.","hji,kes",0,0,0,2,0,software,submit job +26483767,Clinical utilization of genomics data produced by the international Pseudomonas aeruginosa consortium.,"The International Pseudomonas aeruginosa Consortium is sequencing over 1000 genomes and building an analysis pipeline for the study of Pseudomonas genome evolution, antibiotic resistance and virulence genes. Metadata, including genomic and phenotypic data for each isolate of the collection, are available through the International Pseudomonas Consortium Database (http://ipcd.ibis.ulaval.ca/). Here, we present our strategy and the results that emerged from the analysis of the first 389 genomes. With as yet unmatched resolution, our results confirm that P. aeruginosa strains can be divided into three major groups that are further divided into subgroups, some not previously reported in the literature. We also provide the first snapshot of P. aeruginosa strain diversity with respect to antibiotic resistance. Our approach will allow us to draw potential links between environmental strains and those implicated in human and animal infections, understand how patients become infected and how the infection evolves over time as well as identify prognostic markers for better evidence-based decisions on patient care.","hji,kes",1,1,2,2,1,NA,NA +26995712,Rare disease relations through common genes and protein interactions.,"ODCs (Orphan Disease Connections), available at http://csbg.cnb.csic.es/odcs, is a novel resource to explore potential molecular relations between rare diseases. These molecular relations have been established through the integration of disease susceptibility genes and human protein-protein interactions. The database currently contains 54,941 relations between 3032 diseases.","hji,kes",1,1,2,2,1,NA,NA +21255607,"Aromatic-Aromatic Interactions Database, A(2)ID: an analysis of aromatic π-networks in proteins.","The geometrical arrangement of the aromatic rings of phenylalanine, tyrosine, tryptophan and histidine has been analyzed at a database level using the X-ray crystal structure of proteins from PDB in order to find out the aromatic-aromatic (p-p) networks in proteins and to understand how these aromatic rings are connected with each-other in a specific p-p network. A stringent examination of the 7848 proteins indicates that close to 89% of the proteins have occurrence of at least a network of 2p or a higher p-p network. The occurrence of p-p networks in various protein superfamilies based on SCOP, CATH and EC classifiers has also been probed in the present work. In general, we find that multidomain and membrane proteins as well as lyases show a more number of these networks. Analysis of the distribution of angle between planes of two proximal aromatic rings () distribution indicates that at a larger cutoff distance (between centroid of two aromatic rings), above 5, C-Hp interactions (T-shaped orientation) are more prevalent, while p-p interactions (stacked orientation) are more prevalent at a smaller cutoff distance. The connectivity patterns of p-p networks propose strong propensity of finding arrangement of aromatic residues as clusters rather than linear arrangement. We have also made a public domain database """"Aromatic-Aromatic Interactions Database"""" (A(2)ID) comprising of all types of p-p networks and their connectivity pattern present in proteins. It can be accessed by url http://203.199.182.73/gnsmmg/databases/aidb/aidb.html.","hji,kes",1,1,2,2,1,NA,NA +21398668,sc-PDB: a database for identifying variations and multiplicity of 'druggable' binding sites in proteins.,"

Background

The sc-PDB database is an annotated archive of druggable binding sites extracted from the Protein Data Bank. It contains all-atoms coordinates for 8166 protein-ligand complexes, chosen for their geometrical and physico-chemical properties. The sc-PDB provides a functional annotation for proteins, a chemical description for ligands and the detailed intermolecular interactions for complexes. The sc-PDB now includes a hierarchical classification of all the binding sites within a functional class.

Method

The sc-PDB entries were first clustered according to the protein name indifferent of the species. For each cluster, we identified dissimilar sites (e.g. catalytic and allosteric sites of an enzyme). SCOPE AND APPLICATIONS: The classification of sc-PDB targets by binding site diversity was intended to facilitate chemogenomics approaches to drug design. In ligand-based approaches, it avoids comparing ligands that do not share the same binding site. In structure-based approaches, it permits to quantitatively evaluate the diversity of the binding site definition (variations in size, sequence and/or structure).

Availability

The sc-PDB database is freely available at: http://bioinfo-pharma.u-strasbg.fr/scPDB.","hji,kes",1,1,2,2,1,NA,NA +21624162,Developing a kidney and urinary pathway knowledge base.,"

Background

Chronic renal disease is a global health problem. The identification of suitable biomarkers could facilitate early detection and diagnosis and allow better understanding of the underlying pathology. One of the challenges in meeting this goal is the necessary integration of experimental results from multiple biological levels for further analysis by data mining. Data integration in the life science is still a struggle, and many groups are looking to the benefits promised by the Semantic Web for data integration.

Results

We present a Semantic Web approach to developing a knowledge base that integrates data from high-throughput experiments on kidney and urine. A specialised KUP ontology is used to tie the various layers together, whilst background knowledge from external databases is incorporated by conversion into RDF. Using SPARQL as a query mechanism, we are able to query for proteins expressed in urine and place these back into the context of genes expressed in regions of the kidney.

Conclusions

The KUPKB gives KUP biologists the means to ask queries across many resources in order to aggregate knowledge that is necessary for answering biological questions. The Semantic Web technologies we use, together with the background knowledge from the domain's ontologies, allows both rapid conversion and integration of this knowledge base. The KUPKB is still relatively small, but questions remain about scalability, maintenance and availability of the knowledge itself.

Availability

The KUPKB may be accessed via http://www.e-lico.eu/kupkb.","hji,kes",1,1,2,2,1,NA,read this +21656910,A database of reaction monitoring mass spectrometry assays for elucidating therapeutic response in cancer.,"

Purpose

The Quantitative Assay Database (QuAD), http://proteome.moffitt.org/QUAD/, facilitates widespread implementation of quantitative mass spectrometry in cancer biology and clinical research through sharing of methods and reagents for monitoring protein expression and modification.

Experimental design

Liquid chromatography coupled to multiple reaction monitoring (LC-MRM) mass spectrometry assays are developed using SDS-PAGE fractionated lysates from cancer cell lines. Pathway maps created using GeneGO Metacore provide the biological relationships between proteins and illustrate concepts for multiplexed analysis; each protein can be selected to examine assay development at the protein and peptide levels.

Results

The coupling of SDS-PAGE and multiple reaction monitoring mass spectrometry screening has been used to detect 876 peptides from 218 cancer-related proteins in model systems including colon, lung, melanoma, leukemias, and myeloma, which has led to the development of 95 quantitative assays including stable-isotope-labeled peptide standards. Methods are published online and peptide standards are made available to the research community. Protein expression measurements for heat shock proteins, including a comparison with ELISA and monitoring response to the HSP90 inhibitor, 17-(dimethylaminoethylamino)-17-demethoxygeldanamycin (17-DMAG), are used to illustrate the components of the QuAD and its potential utility.

Conclusions and clinical relevance

This resource enables quantitative assessment of protein components of signaling pathways and biological processes and holds promise for systematic investigation of treatment responses in cancer.","hji,kes",1,1,2,2,1,tentative 1; Does this fit our description of biodata?,NA +21769196,VPDB: Viral Protein Structural Database.,"

Unlabelled

Viral Protein Database is an interactive database for three dimensional viral proteins. Our aim is to provide a comprehensive resource to the community of structural virology, with an emphasis on the description of derived data from structural biology. Currently, VPDB includes 1,670 viral protein structures from >277 viruses with more than 465 virus strains. The whole database can be easily accessed through the user convenience text search. Interactivity has been enhanced by using Jmol, WebMol and Strap to visualize the viral protein molecular structure.

Availability

The database is available for free at http://www.vpdb.bicpu.edu.in.","hji,kes",1,1,2,2,1,NA,NA +21821666,NeuroPedia: neuropeptide database and spectral library.,"

Summary

Neuropeptides are essential for cell-cell communication in neurological and endocrine physiological processes in health and disease. While many neuropeptides have been identified in previous studies, the resulting data has not been structured to facilitate further analysis by tandem mass spectrometry (MS/MS), the main technology for high-throughput neuropeptide identification. Many neuropeptides are difficult to identify when searching MS/MS spectra against large protein databases because of their atypical lengths (e.g. shorter/longer than common tryptic peptides) and lack of tryptic residues to facilitate peptide ionization/fragmentation. NeuroPedia is a neuropeptide encyclopedia of peptide sequences (including genomic and taxonomic information) and spectral libraries of identified MS/MS spectra of homolog neuropeptides from multiple species. Searching neuropeptide MS/MS data against known NeuroPedia sequences will improve the sensitivity of database search tools. Moreover, the availability of neuropeptide spectral libraries will also enable the utilization of spectral library search tools, which are known to further improve the sensitivity of peptide identification. These will also reinforce the confidence in peptide identifications by enabling visual comparisons between new and previously identified neuropeptide MS/MS spectra.

Availability

http://proteomics.ucsd.edu/Software/NeuroPedia.html

Contact

bandeira@ucsd.edu

Supplementary information

Supplementary materials are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +22058129,DistiLD Database: diseases and traits in linkage disequilibrium blocks.,"Genome-wide association studies (GWAS) have identified thousands of single nucleotide polymorphisms (SNPs) associated with the risk of hundreds of diseases. However, there is currently no database that enables non-specialists to answer the following simple questions: which SNPs associated with diseases are in linkage disequilibrium (LD) with a gene of interest? Which chromosomal regions have been associated with a given disease, and which are the potentially causal genes in each region? To answer these questions, we use data from the HapMap Project to partition each chromosome into so-called LD blocks, so that SNPs in LD with each other are preferentially in the same block, whereas SNPs not in LD are in different blocks. By projecting SNPs and genes onto LD blocks, the DistiLD database aims to increase usage of existing GWAS results by making it easy to query and visualize disease-associated SNPs and genes in their chromosomal context. The database is available at http://distild.jensenlab.org/.","hji,kes",1,1,2,2,1,NA,NA +22064864,Gene Expression Atlas update--a value-added database of microarray and sequencing-based functional genomics experiments.,"Gene Expression Atlas (http://www.ebi.ac.uk/gxa) is an added-value database providing information about gene expression in different cell types, organism parts, developmental stages, disease states, sample treatments and other biological/experimental conditions. The content of this database derives from curation, re-annotation and statistical analysis of selected data from the ArrayExpress Archive and the European Nucleotide Archive. A simple interface allows the user to query for differential gene expression either by gene names or attributes or by biological conditions, e.g. diseases, organism parts or cell types. Since our previous report we made 20 monthly releases and, as of Release 11.08 (August 2011), the database supports 19 species, which contains expression data measured for 19,014 biological conditions in 136,551 assays from 5598 independent studies.","hji,kes",1,1,2,2,1,NA,NA +22067443,PINA v2.0: mining interactome modules.,"The Protein Interaction Network Analysis (PINA) platform is a comprehensive web resource, which includes a database of unified protein-protein interaction data integrated from six manually curated public databases, and a set of built-in tools for network construction, filtering, analysis and visualization. The second version of PINA enhances its utility for studies of protein interactions at a network level, by including multiple collections of interaction modules identified by different clustering approaches from the whole network of protein interactions ('interactome') for six model organisms. All identified modules are fully annotated by enriched Gene Ontology terms, KEGG pathways, Pfam domains and the chemical and genetic perturbations collection from MSigDB. Moreover, a new tool is provided for module enrichment analysis in addition to simple query function. The interactome data are also available on the web site for further bioinformatics analysis. PINA is freely accessible at http://cbg.garvan.unsw.edu.au/pina/.","hji,kes",1,1,2,2,1,NA,NA +22067445,"SCRIPDB: a portal for easy access to syntheses, chemicals and reactions in patents.","The patent literature is a rich catalog of biologically relevant chemicals; many public and commercial molecular databases contain the structures disclosed in patent claims. However, patents are an equally rich source of metadata about bioactive molecules, including mechanism of action, disease class, homologous experimental series, structural alternatives, or the synthetic pathways used to produce molecules of interest. Unfortunately, this metadata is discarded when chemical structures are deposited separately in databases. SCRIPDB is a chemical structure database designed to make this metadata accessible. SCRIPDB provides the full original patent text, reactions and relationships described within any individual patent, in addition to the molecular files common to structural databases. We discuss how such information is valuable in medical text mining, chemical image analysis, reaction extraction and in silico pharmaceutical lead optimization. SCRIPDB may be searched by exact chemical structure, substructure or molecular similarity and the results may be restricted to patents describing synthetic routes. SCRIPDB is available at http://dcv.uhnres.utoronto.ca/SCRIPDB.","hji,kes",1,1,2,2,1,NA,NA +22067451,IDEAL: Intrinsically Disordered proteins with Extensive Annotations and Literature.,"IDEAL, Intrinsically Disordered proteins with Extensive Annotations and Literature (http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/), is a collection of knowledge on experimentally verified intrinsically disordered proteins. IDEAL contains manual annotations by curators on intrinsically disordered regions, interaction regions to other molecules, post-translational modification sites, references and structural domain assignments. In particular, IDEAL explicitly describes protean segments that can be transformed from a disordered state to an ordered state. Since in most cases they can act as molecular recognition elements upon binding of partner proteins, IDEAL provides a data resource for functional regions of intrinsically disordered proteins. The information in IDEAL is provided on a user-friendly graphical view and in a computer-friendly XML format.","hji,kes",1,1,2,2,1,NA,NA +22080559,"The Aspergillus Genome Database (AspGD): recent developments in comprehensive multispecies curation, comparative genomics and community resources.","The Aspergillus Genome Database (AspGD; http://www.aspgd.org) is a freely available, web-based resource for researchers studying fungi of the genus Aspergillus, which includes organisms of clinical, agricultural and industrial importance. AspGD curators have now completed comprehensive review of the entire published literature about Aspergillus nidulans and Aspergillus fumigatus, and this annotation is provided with streamlined, ortholog-based navigation of the multispecies information. AspGD facilitates comparative genomics by providing a full-featured genomics viewer, as well as matched and standardized sets of genomic information for the sequenced aspergilli. AspGD also provides resources to foster interaction and dissemination of community information and resources. We welcome and encourage feedback at aspergillus-curator@lists.stanford.edu.","hji,kes",1,1,2,2,1,NA,NA +22086951,The UCSC Genome Browser database: extensions and updates 2011.,"The University of California Santa Cruz Genome Browser (http://genome.ucsc.edu) offers online public access to a growing database of genomic sequence and annotations for a wide variety of organisms. The Browser is an integrated tool set for visualizing, comparing, analyzing and sharing both publicly available and user-generated genomic data sets. In the past year, the local database has been updated with four new species assemblies, and we anticipate another four will be released by the end of 2011. Further, a large number of annotation tracks have been either added, updated by contributors, or remapped to the latest human reference genome. Among these are new phenotype and disease annotations, UCSC genes, and a major dbSNP update, which required new visualization methods. Growing beyond the local database, this year we have introduced 'track data hubs', which allow the Genome Browser to provide access to remotely located sets of annotations. This feature is designed to significantly extend the number and variety of annotation tracks that are publicly available for visualization and analysis from within our site. We have also introduced several usability features including track search and a context-sensitive menu of options available with a right-click anywhere on the Browser's image.","hji,kes",1,1,2,2,1,NA,NA +22086963,Ensembl 2012.,"The Ensembl project (http://www.ensembl.org) provides genome resources for chordate genomes with a particular focus on human genome data as well as data for key model organisms such as mouse, rat and zebrafish. Five additional species were added in the last year including gibbon (Nomascus leucogenys) and Tasmanian devil (Sarcophilus harrisii) bringing the total number of supported species to 61 as of Ensembl release 64 (September 2011). Of these, 55 species appear on the main Ensembl website and six species are provided on the Ensembl preview site (Pre!Ensembl; http://pre.ensembl.org) with preliminary support. The past year has also seen improvements across the project.","hji,kes",1,1,2,2,1,NA,NA +22102583,Mouse Phenome Database (MPD).,"The Mouse Phenome Project was launched a decade ago to complement mouse genome sequencing efforts by promoting new phenotyping initiatives under standardized conditions and collecting the data in a central public database, the Mouse Phenome Database (MPD; http://phenome.jax.org). MPD houses a wealth of strain characteristics data to facilitate the use of the laboratory mouse in translational research for human health and disease, helping alleviate problems involving experimentation in humans that cannot be done practically or ethically. Data sets are voluntarily contributed by researchers from a variety of institutions and settings, or in some cases, retrieved by MPD staff from public sources. MPD maintains a growing collection of standardized reference data that assists investigators in selecting mouse strains for research applications; houses treatment/control data for drug studies and other interventions; offers a standardized platform for discovering genotype-phenotype relationships; and provides tools for hypothesis testing. MPD improvements and updates since our last NAR report are presented, including the addition of new tools and features to facilitate navigation and data mining as well as the acquisition of new data (phenotypic, genotypic and gene expression).","hji,kes",1,1,2,2,1,NA,NA +22102771,EuDBase: An online resource for automated EST analysis pipeline (ESTFrontier) and database for red seaweed Eucheuma denticulatum.,"Functional genomics has proven to be an efficient tool in identifying genes involved in various biological functions. However the availability of commercially important seaweed Eucheuma denticulatum functional resources is still limited. EuDBase is the first seaweed online repository that provides integrated access to ESTs of Eucheuma denticulatum generated from samples collected from Kudat and Semporna in Sabah, Malaysia. The database stored 10,031 ESTs that are clustered and assembled into 2,275 unique transcripts (UT) and 955 singletons. Raw data were automatically processed using ESTFrontier, an in-house automated EST analysis pipeline. Data was collected in MySQL database. Web interface is implemented using PHP and it allows browsing and querying EuDBase through search engine. Data is searchable via BLAST hit, domain search, Gene Ontology or KEGG Pathway. A user-friendly interface allows the identification of sequences either using a simple text query or similarity search. The development of EuDBase is initiated to store, manage and analyze the E. denticulatum ESTs and to provide accumulative digital resources for the use of global scientific community. EuDBase is freely available from http://www.inbiosis.ukm.my/eudbase/.","hji,kes",1,1,2,2,1,NA,NA +22121220,The IntAct molecular interaction database in 2012.,"IntAct is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. Two levels of curation are now available within the database, with both IMEx-level annotation and less detailed MIMIx-compatible entries currently supported. As from September 2011, IntAct contains approximately 275,000 curated binary interaction evidences from over 5000 publications. The IntAct website has been improved to enhance the search process and in particular the graphical display of the results. New data download formats are also available, which will facilitate the inclusion of IntAct's data in the Semantic Web. IntAct is an active contributor to the IMEx consortium (http://www.imexconsortium.org). IntAct source code and data are freely available at http://www.ebi.ac.uk/intact.","hji,kes",1,1,2,2,1,NA,NA +22135298,PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse.,"PhosphoSitePlus (http://www.phosphosite.org) is an open, comprehensive, manually curated and interactive resource for studying experimentally observed post-translational modifications, primarily of human and mouse proteins. It encompasses 1,30,000 non-redundant modification sites, primarily phosphorylation, ubiquitinylation and acetylation. The interface is designed for clarity and ease of navigation. From the home page, users can launch simple or complex searches and browse high-throughput data sets by disease, tissue or cell line. Searches can be restricted by specific treatments, protein types, domains, cellular components, disease, cell types, cell lines, tissue and sequences or motifs. A few clicks of the mouse will take users to substrate pages or protein pages with sites, sequences, domain diagrams and molecular visualization of side-chains known to be modified; to site pages with information about how the modified site relates to the functions of specific proteins and cellular processes and to curated information pages summarizing the details from one record. PyMOL and Chimera scripts that colorize reactive groups on residues that are modified can be downloaded. Features designed to facilitate proteomic analyses include downloads of modification sites, kinase-substrate data sets, sequence logo generators, a Cytoscape plugin and BioPAX download to enable pathway visualization of the kinase-substrate interactions in PhosphoSitePlus.","hji,kes",1,1,2,2,1,NA,NA +22139934,Cube-DB: detection of functional divergence in human protein families.,"Cube-DB is a database of pre-evaluated results for detection of functional divergence in human/vertebrate protein families. The analysis is organized around the nomenclature associated with the human proteins, but based on all currently available vertebrate genomes. Using full genomes enables us, through a mutual-best-hit strategy, to construct comparable taxonomical samples for all paralogues under consideration. Functional specialization is scored on the residue level according to two models of behavior after divergence: heterotachy and homotachy. In the first case, the positions on the protein sequence are scored highly if they are conserved in the reference group of orthologs, and overlap poorly with the residue type choice in the paralogs groups (such positions will also be termed functional determinants). The second model additionally requires conservation within each group of paralogs (functional discriminants). The scoring functions are phylogeny independent, but sensitive to the residue type similarity. The results are presented as a table of per-residue scores, and mapped onto related structure (when available) via browser-embedded visualization tool. They can also be downloaded as a spreadsheet table, and sessions for two additional molecular visualization tools. The database interface is available at http://epsf.bmad.bii.a-star.edu.sg/cube/db/html/home.html.","hji,kes",1,1,2,2,1,NA,NA +22230935,Database for crude drugs and Kampo medicine.,"A wiki-based repository for crude drugs and Kampo medicine is introduced. It provides taxonomic and chemical information for 158 crude drugs and 348 prescriptions of the traditional Kampo medicine in Japan, which is a variation of ancient Chinese medicine. The system is built on MediaWiki with extensions for inline page search and for sending user-input elements to the server. These functions together realize implementation of word checks and data integration at the user-level. In this scheme, any user can participate in creating an integrated database with controlled vocabularies on the wiki system. Our implementation and data are accessible at http://metabolomics.jp/wiki/.","hji,kes",1,1,2,2,1,NA,NA +22276777,miRdSNP: a database of disease-associated SNPs and microRNA target sites on 3'UTRs of human genes.,"

Background

Single nucleotide polymorphisms (SNPs) can lead to the susceptibility and onset of diseases through their effects on gene expression at the posttranscriptional level. Recent findings indicate that SNPs could create, destroy, or modify the efficiency of miRNA binding to the 3'UTR of a gene, resulting in gene dysregulation. With the rapidly growing number of published disease-associated SNPs (dSNPs), there is a strong need for resources specifically recording dSNPs on the 3'UTRs and their nucleotide distance from miRNA target sites. We present here miRdSNP, a database incorporating three important areas of dSNPs, miRNA target sites, and diseases.

Description

miRdSNP provides a unique database of dSNPs on the 3'UTRs of human genes manually curated from PubMed. The current release includes 786 dSNP-disease associations for 630 unique dSNPs and 204 disease types. miRdSNP annotates genes with experimentally confirmed targeting by miRNAs and indexes miRNA target sites predicted by TargetScan and PicTar as well as potential miRNA target sites newly generated by dSNPs. A robust web interface and search tools are provided for studying the proximity of miRNA binding sites to dSNPs in relation to human diseases. Searches can be dynamically filtered by gene name, miRBase ID, target prediction algorithm, disease, and any nucleotide distance between dSNPs and miRNA target sites. Results can be viewed at the sequence level showing the annotated locations for miRNA target sites and dSNPs on the entire 3'UTR sequences. The integration of dSNPs with the UCSC Genome browser is also supported.

Conclusion

miRdSNP provides a comprehensive data source of dSNPs and robust tools for exploring their distance from miRNA target sites on the 3'UTRs of human genes. miRdSNP enables researchers to further explore the molecular mechanism of gene dysregulation for dSNPs at posttranscriptional level. miRdSNP is freely available on the web at http://mirdsnp.ccr.buffalo.edu.","hji,kes",1,1,2,2,1,NA,NA +22334387,The androgen receptor gene mutations database: 2012 update.,"The current version of the androgen receptor gene (AR) mutations database is described. A major change to the database is that the nomenclature and numbering scheme now conforms to all Human Genome Variation Society norms. The total number of reported mutations has risen from 605 to 1,029 since 2004. The database now contains a number of mutations that are associated with prostate cancer (CaP) treatment regimens, while the number of AR mutations found in CaP tissues has more than doubled from 76 to 159. In addition, in a number of androgen insensitivity syndrome (AIS) and CaP cases, multiple mutations have been found within the same tissue samples. For the first time, we report on a disconnect within the AIS phenotype-genotype relationship among our own patient database, in that over 40% of our patients with a classic complete AIS or partial AIS phenotypes did not appear to have a mutation in their AR gene. The implications of this phenomenon on future locus-specific mutation database (LSDB) development are discussed, together with the concept that mutations can be associated with both loss- and gain-of-function, and the effect of multiple AR mutations within individuals. The database is available on the internet (http://androgendb.mcgill.ca), and a web-based LSDB with the variants using the Leiden Open Variation Database platform is available at http://www.lovd.nl/AR.","hji,kes",1,1,2,2,1,NA,NA +22345505,ANAP: an integrated knowledge base for Arabidopsis protein interaction network analysis.,"Protein interactions are fundamental to the molecular processes occurring within an organism and can be utilized in network biology to help organize, simplify, and understand biological complexity. Currently, there are more than 10 publicly available Arabidopsis (Arabidopsis thaliana) protein interaction databases. However, there are limitations with these databases, including different types of interaction evidence, a lack of defined standards for protein identifiers, differing levels of information, and, critically, a lack of integration between them. In this paper, we present an interactive bioinformatics Web tool, ANAP (Arabidopsis Network Analysis Pipeline), which serves to effectively integrate the different data sets and maximize access to available data. ANAP has been developed for Arabidopsis protein interaction integration and network-based study to facilitate functional protein network analysis. ANAP integrates 11 Arabidopsis protein interaction databases, comprising 201,699 unique protein interaction pairs, 15,208 identifiers (including 11,931 The Arabidopsis Information Resource Arabidopsis Genome Initiative codes), 89 interaction detection methods, 73 species that interact with Arabidopsis, and 6,161 references. ANAP can be used as a knowledge base for constructing protein interaction networks based on user input and supports both direct and indirect interaction analysis. It has an intuitive graphical interface allowing easy network visualization and provides extensive detailed evidence for each interaction. In addition, ANAP displays the gene and protein annotation in the generated interactive network with links to The Arabidopsis Information Resource, the AtGenExpress Visualization Tool, the Arabidopsis 1,001 Genomes GBrowse, the Protein Knowledgebase, the Kyoto Encyclopedia of Genes and Genomes, and the Ensembl Genome Browser to significantly aid functional network analysis. The tool is available open access at http://gmdd.shgmo.org/Computational-Biology/ANAP.","hji,kes",1,1,2,2,1,NA,NA +22369658,DetoxiProt: an integrated database for detoxification proteins.,"

Background

Detoxification proteins are a class of proteins for degradation and/or elimination of endogenous and exogenous toxins or medicines, as well as reactive oxygen species (ROS) produced by these materials. Most of these proteins are generated as a response to the stimulation of toxins or medicines. They are essential for the clearance of harmful substances and for maintenance of physiological balance in organisms. Thus, it is important to collect and integrate information on detoxification proteins.

Results

To store, retrieve and analyze the information related to their features and functions, we developed the DetoxiProt, a comprehensive database for annotation of these proteins. This database provides detailed introductions about different classes of the detoxification proteins. Extensive annotations of these proteins, including sequences, structures, features, inducers, inhibitors, substrates, chromosomal location, functional domains as well as physiological-biochemical properties were generated. Furthermore, pre-computed BLAST results, multiple sequence alignments and evolutionary trees for detoxification proteins are also provided for evolutionary study of conserved function and pathways. The current version of DetoxiProt contains 5956 protein entries distributed in 628 organisms. An easy to use web interface was designed, so that annotations about each detoxification protein can be retrieved by browsing with a specific method or by searching with different criteria.

Conclusions

DetoxiProt provides an effective and efficient way of accessing the detoxification protein sequences and other high-quality information. This database would be a valuable source for toxicologists, pharmacologists and medicinal chemists. DetoxiProt database is freely available at http://lifecenter.sgst.cn/detoxiprot/.","hji,kes",1,1,2,2,1,NA,NA +22784567,SigCS base: an integrated genetic information resource for human cerebral stroke.,"

Background

To understand how stroke risk factors mechanistically contribute to stroke, the genetic components regulating each risk factor need to be integrated and evaluated with respect to biological function and through pathway-based algorithms. This resource will provide information to researchers studying the molecular and genetic causes of stroke in terms of genomic variants, genes, and pathways.

Methods

Reported genetic variants, gene structure, phenotypes, and literature information regarding stroke were collected and extracted from publicly available databases describing variants, genome, proteome, functional annotation, and disease subtypes. Stroke related candidate pathways and etiologic genes that participate significantly in risk were analyzed in terms of canonical pathways in public biological pathway databases. These efforts resulted in a relational database of genetic signals of cerebral stroke, SigCS base, which implements an effective web retrieval system.

Results

The current version of SigCS base documents 1943 non-redundant genes with 11472 genetic variants and 165 non-redundant pathways. The web retrieval system of SigCS base consists of two principal search flows, including: 1) a gene-based variant search using gene table browsing or a keyword search, and, 2) a pathway-based variant search using pathway table browsing. SigCS base is freely accessible at http://sysbio.kribb.re.kr/sigcs.

Conclusions

SigCS base is an effective tool that can assist researchers in the identification of the genetic factors associated with stroke by utilizing existing literature information, selecting candidate genes and variants for experimental studies, and examining the pathways that contribute to the pathophysiological mechanisms of stroke.","hji,kes",1,1,2,2,1,NA,NA +22836712,A comparative cellular and molecular biology of longevity database.,"Discovering key cellular and molecular traits that promote longevity is a major goal of aging and longevity research. One experimental strategy is to determine which traits have been selected during the evolution of longevity in naturally long-lived animal species. This comparative approach has been applied to lifespan research for nearly four decades, yielding hundreds of datasets describing aspects of cell and molecular biology hypothesized to relate to animal longevity. Here, we introduce a Comparative Cellular and Molecular Biology of Longevity Database, available at ( http://genomics.brocku.ca/ccmbl/ ), as a compendium of comparative cell and molecular data presented in the context of longevity. This open access database will facilitate the meta-analysis of amalgamated datasets using standardized maximum lifespan (MLSP) data (from AnAge). The first edition contains over 800 data records describing experimental measurements of cellular stress resistance, reactive oxygen species metabolism, membrane composition, protein homeostasis, and genome homeostasis as they relate to vertebrate species MLSP. The purpose of this review is to introduce the database and briefly demonstrate its use in the meta-analysis of combined datasets.","hji,kes",1,1,2,2,1,NA,NA +22912585,Dissecting the gene network of dietary restriction to identify evolutionarily conserved pathways and new functional genes.,"Dietary restriction (DR), limiting nutrient intake from diet without causing malnutrition, delays the aging process and extends lifespan in multiple organisms. The conserved life-extending effect of DR suggests the involvement of fundamental mechanisms, although these remain a subject of debate. To help decipher the life-extending mechanisms of DR, we first compiled a list of genes that if genetically altered disrupt or prevent the life-extending effects of DR. We called these DR-essential genes and identified more than 100 in model organisms such as yeast, worms, flies, and mice. In order for other researchers to benefit from this first curated list of genes essential for DR, we established an online database called GenDR (http://genomics.senescence.info/diet/). To dissect the interactions of DR-essential genes and discover the underlying lifespan-extending mechanisms, we then used a variety of network and systems biology approaches to analyze the gene network of DR. We show that DR-essential genes are more conserved at the molecular level and have more molecular interactions than expected by chance. Furthermore, we employed a guilt-by-association method to predict novel DR-essential genes. In budding yeast, we predicted nine genes related to vacuolar functions; we show experimentally that mutations deleting eight of those genes prevent the life-extending effects of DR. Three of these mutants (OPT2, FRE6, and RCR2) had extended lifespan under ad libitum, indicating that the lack of further longevity under DR is not caused by a general compromise of fitness. These results demonstrate how network analyses of DR using GenDR can be used to make phenotypically relevant predictions. Moreover, gene-regulatory circuits reveal that the DR-induced transcriptional signature in yeast involves nutrient-sensing, stress responses and meiotic transcription factors. Finally, comparing the influence of gene expression changes during DR on the interactomes of multiple organisms led us to suggest that DR commonly suppresses translation, while stimulating an ancient reproduction-related process.","hji,kes",1,1,2,2,1,tentative 1; Cannot be contributed to,NA +23084601,PESNPdb: a comprehensive database of SNPs studied in association with pre-eclampsia.,"Pre-eclampsia is a pregnancy specific disorder that can be life threatening for mother and child. Multiple studies have been carried out in an attempt to identify SNPs that contribute to the genetic susceptibility of the disease. Here we describe PESNPdb (http://bejerano.stanford.edu/pesnpdb), a database aimed at centralizing SNP and study details investigated in association with pre-eclampsia. We also describe a Placenta Disorders ontology that utilizes information from PESNPdb. The main focus of PESNPdb is to help researchers study the genetic complexity of pre-eclampsia through a user-friendly interface that encourages community participation.","hji,kes",1,1,2,2,1,NA,NA +23093601,DoriC 5.0: an updated database of oriC regions in both bacterial and archaeal genomes.,"Replication of chromosomes is one of the central events in the cell cycle. Chromosome replication begins at specific sites, called origins of replication (oriCs), for all three domains of life. However, the origins of replication still remain unknown in a considerably large number of bacterial and archaeal genomes completely sequenced so far. The availability of increasing complete bacterial and archaeal genomes has created challenges and opportunities for identification of their oriCs in silico, as well as in vivo. Based on the Z-curve theory, we have developed a web-based system Ori-Finder to predict oriCs in bacterial genomes with high accuracy and reliability by taking advantage of comparative genomics, and the predicted oriC regions have been organized into an online database DoriC, which is publicly available at http://tubic.tju.edu.cn/doric/ since 2007. Five years after we constructed DoriC, the database has significant advances over the number of bacterial genomes, increasing about 4-fold. Additionally, oriC regions in archaeal genomes identified by in vivo experiments, as well as in silico analyses, have also been added to the database. Consequently, the latest release of DoriC contains oriCs for >1500 bacterial genomes and 81 archaeal genomes, respectively.","hji,kes",1,1,2,2,1,NA,NA +23118484,MODOMICS: a database of RNA modification pathways--2013 update.,"MODOMICS is a database of RNA modifications that provides comprehensive information concerning the chemical structures of modified ribonucleosides, their biosynthetic pathways, RNA-modifying enzymes and location of modified residues in RNA sequences. In the current database version, accessible at http://modomics.genesilico.pl, we included new features: a census of human and yeast snoRNAs involved in RNA-guided RNA modification, a new section covering the 5'-end capping process, and a catalogue of 'building blocks' for chemical synthesis of a large variety of modified nucleosides. The MODOMICS collections of RNA modifications, RNA-modifying enzymes and modified RNAs have been also updated. A number of newly identified modified ribonucleosides and more than one hundred functionally and structurally characterized proteins from various organisms have been added. In the RNA sequences section, snRNAs and snoRNAs with experimentally mapped modified nucleosides have been added and the current collection of rRNA and tRNA sequences has been substantially enlarged. To facilitate literature searches, each record in MODOMICS has been cross-referenced to other databases and to selected key publications. New options for database searching and querying have been implemented, including a BLAST search of protein sequences and a PARALIGN search of the collected nucleic acid sequences.","hji,kes",1,1,2,2,1,NA,NA +23143106,EcoCyc: fusing model organism databases with systems biology.,"EcoCyc (http://EcoCyc.org) is a model organism database built on the genome sequence of Escherichia coli K-12 MG1655. Expert manual curation of the functions of individual E. coli gene products in EcoCyc has been based on information found in the experimental literature for E. coli K-12-derived strains. Updates to EcoCyc content continue to improve the comprehensive picture of E. coli biology. The utility of EcoCyc is enhanced by new tools available on the EcoCyc web site, and the development of EcoCyc as a teaching tool is increasing the impact of the knowledge collected in EcoCyc.","hji,kes",1,1,2,2,1,NA,NA +23151233,PolySac3DB: an annotated data base of 3 dimensional structures of polysaccharides.,"

Background

Polysaccharides are ubiquitously present in the living world. Their structural versatility makes them important and interesting components in numerous biological and technological processes ranging from structural stabilization to a variety of immunologically important molecular recognition events. The knowledge of polysaccharide three-dimensional (3D) structure is important in studying carbohydrate-mediated host-pathogen interactions, interactions with other bio-macromolecules, drug design and vaccine development as well as material science applications or production of bio-ethanol.

Description

PolySac3DB is an annotated database that contains the 3D structural information of 157 polysaccharide entries that have been collected from an extensive screening of scientific literature. They have been systematically organized using standard names in the field of carbohydrate research into 18 categories representing polysaccharide families. Structure-related information includes the saccharides making up the repeat unit(s) and their glycosidic linkages, the expanded 3D representation of the repeat unit, unit cell dimensions and space group, helix type, diffraction diagram(s) (when applicable), experimental and/or simulation methods used for structure description, link to the abstract of the publication, reference and the atomic coordinate files for visualization and download. The database is accompanied by a user-friendly graphical user interface (GUI). It features interactive displays of polysaccharide structures and customized search options for beginners and experts, respectively. The site also serves as an information portal for polysaccharide structure determination techniques. The web-interface also references external links where other carbohydrate-related resources are available.

Conclusion

PolySac3DB is established to maintain information on the detailed 3D structures of polysaccharides. All the data and features are available via the web-interface utilizing the search engine and can be accessed at http://polysac3db.cermav.cnrs.fr.","hji,kes",1,1,2,2,1,NA,NA +23161672,APPRIS: annotation of principal and alternative splice isoforms.,"Here, we present APPRIS (http://appris.bioinfo.cnio.es), a database that houses annotations of human splice isoforms. APPRIS has been designed to provide value to manual annotations of the human genome by adding reliable protein structural and functional data and information from cross-species conservation. The visual representation of the annotations provided by APPRIS for each gene allows annotators and researchers alike to easily identify functional changes brought about by splicing events. In addition to collecting, integrating and analyzing reliable predictions of the effect of splicing events, APPRIS also selects a single reference sequence for each gene, here termed the principal isoform, based on the annotations of structure, function and conservation for each transcript. APPRIS identifies a principal isoform for 85% of the protein-coding genes in the GENCODE 7 release for ENSEMBL. Analysis of the APPRIS data shows that at least 70% of the alternative (non-principal) variants would lose important functional or structural information relative to the principal isoform.","hji,kes",1,1,2,2,1,NA,NA +23178820,MicrobPad MD: microbial pathogen diagnostic methods database.,"Medical pathogens induce infections, illnesses and sometimes serious medical conditions in the infected hosts. Diagnosis of these pathogens is important for proper treatment and investigation of pathogenesis processes. Molecular techniques have been developed for facilitating accurate, sensitive and low-cost diagnosis of these pathogens. Based on these techniques, diagnostic devices have been developed for a number of pathogens. More devices are needed for comprehensive coverage of medical pathogens. To facilitate the development of these devices, a database with integrated information about diagnostic methods, targets, and primers/probes for the known bacterial, fungal and viral pathogens is needed. We developed the microbial pathogen diagnostic methods database MicrobPad MD (http://bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp or http://pha-bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp) to provide comprehensive information about the molecular diagnostic techniques, targets, primers/probes, detection procedures and conditions, and tested diagnostic accuracies and limit of diagnosis for 314 bacterial, fungal and viral species from 61 genera. While available, additional information such as pathogen strains and hosts, tissue distribution or habitats, cultivation methods, biochemical characteristics, virulence factors, morphology, diseases, symptoms, treatment and prevention methods are provided. Our Database covers 242 gene targets, 700 primers/probes, 340 virulence factors, and 261 diseases. Cross-links to the NCBI genome and SwissProt/UniProt databases are provided.","hji,kes",1,1,2,2,1,NA,NA +23180799,PGDD: a database of gene and genome duplication in plants.,"Genome duplication (GD) has permanently shaped the architecture and function of many higher eukaryotic genomes. The angiosperms (flowering plants) are outstanding models in which to elucidate consequences of GD for higher eukaryotes, owing to their propensity for chromosomal duplication or even triplication in a few cases. Duplicated genome structures often require both intra- and inter-genome alignments to unravel their evolutionary history, also providing the means to deduce both obvious and otherwise-cryptic orthology, paralogy and other relationships among genes. The burgeoning sets of angiosperm genome sequences provide the foundation for a host of investigations into the functional and evolutionary consequences of gene and GD. To provide genome alignments from a single resource based on uniform standards that have been validated by empirical studies, we built the Plant Genome Duplication Database (PGDD; freely available at http://chibba.agtec.uga.edu/duplication/), a web service providing synteny information in terms of colinearity between chromosomes. At present, PGDD contains data for 26 plants including bryophytes and chlorophyta, as well as angiosperms with draft genome sequences. In addition to the inclusion of new genomes as they become available, we are preparing new functions to enhance PGDD.","hji,kes",1,1,2,2,1,NA,NA +23209799,The duplicated genes database: identification and functional annotation of co-localised duplicated genes across genomes.,"

Background

There has been a surge in studies linking genome structure and gene expression, with special focus on duplicated genes. Although initially duplicated from the same sequence, duplicated genes can diverge strongly over evolution and take on different functions or regulated expression. However, information on the function and expression of duplicated genes remains sparse. Identifying groups of duplicated genes in different genomes and characterizing their expression and function would therefore be of great interest to the research community. The 'Duplicated Genes Database' (DGD) was developed for this purpose.

Methodology

Nine species were included in the DGD. For each species, BLAST analyses were conducted on peptide sequences corresponding to the genes mapped on a same chromosome. Groups of duplicated genes were defined based on these pairwise BLAST comparisons and the genomic location of the genes. For each group, Pearson correlations between gene expression data and semantic similarities between functional GO annotations were also computed when the relevant information was available.

Conclusions

The Duplicated Gene Database provides a list of co-localised and duplicated genes for several species with the available gene co-expression level and semantic similarity value of functional annotation. Adding these data to the groups of duplicated genes provides biological information that can prove useful to gene expression analyses. The Duplicated Gene Database can be freely accessed through the DGD website at http://dgd.genouest.org.","hji,kes",1,1,2,2,1,NA,NA +23406793,"T-HOD: a literature-based candidate gene database for hypertension, obesity and diabetes.","Researchers are finding it more and more difficult to follow the changing status of disease candidate genes due to the exponential increase in gene mapping studies. The Text-mined Hypertension, Obesity and Diabetes candidate gene database (T-HOD) is developed to help trace existing research on three kinds of cardiovascular diseases: hypertension, obesity and diabetes, with the last disease categorized into Type 1 and Type 2, by regularly and semiautomatically extracting HOD-related genes from newly published literature. Currently, there are 837, 835 and 821 candidate genes recorded in T-HOD for hypertension, obesity and diabetes, respectively. T-HOD employed the state-of-art text-mining technologies, including a gene/disease identification system and a disease-gene relation extraction system, which can be used to affirm the association of genes with three diseases and provide more evidence for further studies. The primary inputs of T-HOD are the three kinds of diseases, and the output is a list of disease-related genes that can be ranked based on their number of appearance, protein-protein interactions and single-nucleotide polymorphisms. Unlike manually constructed disease gene databases, the content of T-HOD is regularly updated by our text-mining system and verified by domain experts. The interface of T-HOD facilitates easy browsing for users and allows T-HOD curators to verify data efficiently. We believe that T-HOD can help life scientists in search for more disease candidate genes in a less time- and effort-consuming manner. Database URL: http://bws.iis.sinica.edu.tw/THOD.","hji,kes",1,1,2,2,1,NA,NA +23411718,The Eimeria transcript DB: an integrated resource for annotated transcripts of protozoan parasites of the genus Eimeria.,"Parasites of the genus Eimeria infect a wide range of vertebrate hosts, including chickens. We have recently reported a comparative analysis of the transcriptomes of Eimeria acervulina, Eimeria maxima and Eimeria tenella, integrating ORESTES data produced by our group and publicly available Expressed Sequence Tags (ESTs). All cDNA reads have been assembled, and the reconstructed transcripts have been submitted to a comprehensive functional annotation pipeline. Additional studies included orthology assignment across apicomplexan parasites and clustering analyses of gene expression profiles among different developmental stages of the parasites. To make all this body of information publicly available, we constructed the Eimeria Transcript Database (EimeriaTDB), a web repository that provides access to sequence data, annotation and comparative analyses. Here, we describe the web interface, available sequence data sets and query tools implemented on the site. The main goal of this work is to offer a public repository of sequence and functional annotation data of reconstructed transcripts of parasites of the genus Eimeria. We believe that EimeriaTDB will represent a valuable and complementary resource for the Eimeria scientific community and for those researchers interested in comparative genomics of apicomplexan parasites. Database URL: http://www.coccidia.icb.usp.br/eimeriatdb/","hji,kes",1,1,2,2,1,NA,NA +23599502,INstruct: a database of high-quality 3D structurally resolved protein interactome networks.,"

Unlabelled

INstruct is a database of high-quality, 3D, structurally resolved protein interactome networks in human and six model organisms. INstruct combines the scale of available high-quality binary protein interaction data with the specificity of atomic-resolution structural information derived from co-crystal evidence using a tested interaction interface inference method. Its web interface is designed to allow for flexible search based on standard and organism-specific protein and gene-naming conventions, visualization of protein architecture highlighting interaction interfaces and viewing and downloading custom 3D structurally resolved interactome datasets.

Availability

INstruct is freely available on the web at http://instruct.yulab.org with all major browsers supported.","hji,kes",1,1,2,2,1,NA,NA +23633602,Integrated database of information from structural genomics experiments.,"Information from structural genomics experiments at the RIKEN SPring-8 Center, Japan has been compiled and published as an integrated database. The contents of the database are (i) experimental data from nine species of bacteria that cover a large variety of protein molecules in terms of both evolution and properties (http://database.riken.jp/db/bacpedia), (ii) experimental data from mutant proteins that were designed systematically to study the influence of mutations on the diffraction quality of protein crystals (http://database.riken.jp/db/bacpedia) and (iii) experimental data from heavy-atom-labelled proteins from the heavy-atom database HATODAS (http://database.riken.jp/db/hatodas). The database integration adopts the semantic web, which is suitable for data reuse and automatic processing, thereby allowing batch downloads of full data and data reconstruction to produce new databases. In addition, to enhance the use of data (i) and (ii) by general researchers in biosciences, a comprehensible user interface, Bacpedia (http://bacpedia.harima.riken.jp), has been developed.","hji,kes",1,1,2,2,1,NA,NA +23721660,HIM-herbal ingredients in-vivo metabolism database.,"

Background

Herbal medicine has long been viewed as a valuable asset for potential new drug discovery and herbal ingredients' metabolites, especially the in vivo metabolites were often found to gain better pharmacological, pharmacokinetic and even better safety profiles compared to their parent compounds. However, these herbal metabolite information is still scattered and waiting to be collected.

Description

HIM database manually collected so far the most comprehensive available in-vivo metabolism information for herbal active ingredients, as well as their corresponding bioactivity, organs and/or tissues distribution, toxicity, ADME and the clinical research profile. Currently HIM contains 361 ingredients and 1104 corresponding in-vivo metabolites from 673 reputable herbs. Tools of structural similarity, substructure search and Lipinski's Rule of Five are also provided. Various links were made to PubChem, PubMed, TCM-ID (Traditional Chinese Medicine Information database) and HIT (Herbal ingredients' targets databases).

Conclusions

A curated database HIM is set up for the in vivo metabolites information of the active ingredients for Chinese herbs, together with their corresponding bioactivity, toxicity and ADME profile. HIM is freely accessible to academic researchers at http://www.bioinformatics.org.cn/.","hji,kes",1,1,2,2,1,NA,NA +23936191,HSC-explorer: a curated database for hematopoietic stem cells.,"HSC-Explorer (http://mips.helmholtz-muenchen.de/HSC/) is a publicly available, integrative database containing detailed information about the early steps of hematopoiesis. The resource aims at providing fast and easy access to relevant information, in particular to the complex network of interacting cell types and molecules, from the wealth of publications in the field through visualization interfaces. It provides structured information on more than 7000 experimentally validated interactions between molecules, bioprocesses and environmental factors. Information is manually derived by critical reading of the scientific literature from expert annotators. Hematopoiesis-relevant interactions are accompanied with context information such as model organisms and experimental methods for enabling assessment of reliability and relevance of experimental results. Usage of established vocabularies facilitates downstream bioinformatics applications and to convert the results into complex networks. Several predefined datasets (Selected topics) offer insights into stem cell behavior, the stem cell niche and signaling processes supporting hematopoietic stem cell maintenance. HSC-Explorer provides a versatile web-based resource for scientists entering the field of hematopoiesis enabling users to inspect the associated biological processes through interactive graphical presentation.","hji,kes",1,1,2,2,1,NA,NA +24009897,EVpedia: an integrated database of high-throughput data for systemic analyses of extracellular vesicles.,"Secretion of extracellular vesicles is a general cellular activity that spans the range from simple unicellular organisms (e.g. archaea; Gram-positive and Gram-negative bacteria) to complex multicellular ones, suggesting that this extracellular vesicle-mediated communication is evolutionarily conserved. Extracellular vesicles are spherical bilayered proteolipids with a mean diameter of 20-1,000 nm, which are known to contain various bioactive molecules including proteins, lipids, and nucleic acids. Here, we present EVpedia, which is an integrated database of high-throughput datasets from prokaryotic and eukaryotic extracellular vesicles. EVpedia provides high-throughput datasets of vesicular components (proteins, mRNAs, miRNAs, and lipids) present on prokaryotic, non-mammalian eukaryotic, and mammalian extracellular vesicles. In addition, EVpedia also provides an array of tools, such as the search and browse of vesicular components, Gene Ontology enrichment analysis, network analysis of vesicular proteins and mRNAs, and a comparison of vesicular datasets by ortholog identification. Moreover, publications on extracellular vesicle studies are listed in the database. This free web-based database of EVpedia (http://evpedia.info) might serve as a fundamental repository to stimulate the advancement of extracellular vesicle studies and to elucidate the novel functions of these complex extracellular organelles.","hji,kes",1,1,2,2,1,NA,NA +24163255,The pancreatic expression database: recent extensions and updates.,"The Pancreatic Expression Database (PED, http://www.pancreasexpression.org) is the only device currently available for mining of pancreatic cancer literature data. It brings together the largest collection of multidimensional pancreatic data from the literature including genomic, proteomic, microRNA, methylomic and transcriptomic profiles. PED allows the user to ask specific questions on the observed levels of deregulation among a broad range of specimen/experimental types including healthy/patient tissue and body fluid specimens, cell lines and murine models as well as related treatments/drugs data. Here we provide an update to PED, which has been previously featured in the Database issue of this journal. Briefly, PED data content has been substantially increased and expanded to cover methylomics studies. We introduced an extensive controlled vocabulary that records specific details on the samples and added data from large-scale meta-analysis studies. The web interface has been improved/redesigned with a quick search option to rapidly extract information about a gene/protein of interest and an upload option allowing users to add their own data to PED. We added a user guide and implemented integrated graphical tools to overlay and visualize retrieved information. Interoperability with biomart-compatible data sets was significantly improved to allow integrative queries with pancreatic cancer data.","hji,kes",1,1,2,2,1,NA,NA +24185698,LSD 2.0: an update of the leaf senescence database.,"This manuscript describes an update of the leaf senescence database (LSD) previously featured in the 2011 NAR Database Issue. LSD provides comprehensive information concerning senescence-associated genes (SAGs) and their corresponding mutants. We have made extensive annotations for these SAGs through both manual and computational approaches. Recently, we updated LSD to a new version LSD 2.0 (http://www.eplantsenescence.org/), which contains 5356 genes and 322 mutants from 44 species, an extension from the previous version containing 1145 genes and 154 mutants from 21 species. In the current version, we also included several new features: (i) Primer sequences retrieved based on experimental evidence or designed for high-throughput analysis were added; (ii) More than 100 images of Arabidopsis SAG mutants were added; (iii) Arabidopsis seed information obtained from The Arabidopsis Information Resource (TAIR) was integrated; (iv) Subcellular localization information of SAGs in Arabidopsis mined from literature or generated from the SUBA3 program was presented; (v) Quantitative Trait Loci information was added with links to the original database and (vi) New options such as primer and miRNA search for database query were implemented. The updated database will be a valuable and informative resource for basic research of leaf senescence and for the manipulation of traits of agronomically important plants.","hji,kes",1,1,2,2,1,NA,NA +24203703,"CottonGen: a genomics, genetics and breeding database for cotton research.","CottonGen (http://www.cottongen.org) is a curated and integrated web-based relational database providing access to publicly available genomic, genetic and breeding data for cotton. CottonGen supercedes CottonDB and the Cotton Marker Database, with enhanced tools for easier data sharing, mining, visualization and data retrieval of cotton research data. CottonGen contains annotated whole genome sequences, unigenes from expressed sequence tags (ESTs), markers, trait loci, genetic maps, genes, taxonomy, germplasm, publications and communication resources for the cotton community. Annotated whole genome sequences of Gossypium raimondii are available with aligned genetic markers and transcripts. These whole genome data can be accessed through genome pages, search tools and GBrowse, a popular genome browser. Most of the published cotton genetic maps can be viewed and compared using CMap, a comparative map viewer, and are searchable via map search tools. Search tools also exist for markers, quantitative trait loci (QTLs), germplasm, publications and trait evaluation data. CottonGen also provides online analysis tools such as NCBI BLAST and Batch BLAST.","hji,kes",1,1,2,2,1,NA,NA +24234451,The MIntAct project--IntAct as a common curation platform for 11 molecular interaction databases.,"IntAct (freely available at http://www.ebi.ac.uk/intact) is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. IntAct has developed a sophisticated web-based curation tool, capable of supporting both IMEx- and MIMIx-level curation. This tool is now utilized by multiple additional curation teams, all of whom annotate data directly into the IntAct database. Members of the IntAct team supply appropriate levels of training, perform quality control on entries and take responsibility for long-term data maintenance. Recently, the MINT and IntAct databases decided to merge their separate efforts to make optimal use of limited developer resources and maximize the curation output. All data manually curated by the MINT curators have been moved into the IntAct database at EMBL-EBI and are merged with the existing IntAct dataset. Both IntAct and MINT are active contributors to the IMEx consortium (http://www.imexconsortium.org).","hji,kes",1,1,2,2,1,NA,NA +24243842,LPSN--list of prokaryotic names with standing in nomenclature.,"The List of Prokaryotic Names with Standing in Nomenclature (LPSN; http://www.bacterio.net) is a database that lists the names of prokaryotes (Bacteria and Archaea) that have been validly published in the International Journal of Systematic and Evolutionary Microbiology directly or by inclusion in a Validation List, under the Rules of International Code of Nomenclature of Bacteria. Currently there are 15 974 taxa listed. In addition, LPSN has an up-to-date classification of prokaryotes and information on prokaryotic nomenclature and culture collections.","hji,kes",1,1,2,2,1,NA,NA +24270047,ProfileDB: a resource for proteomics and cross-omics biomarker discovery.,"The increasing size and complexity of high-throughput datasets pose a growing challenge for researchers. Often very different (cross-omics) techniques with individual data analysis pipelines are employed making a unified biomarker discovery strategy and a direct comparison of different experiments difficult and time consuming. Here we present the comprehensive web-based application ProfileDB. The application is designed to integrate data from different high-throughput 'omics' data types (Transcriptomics, Proteomics, Metabolomics) with clinical parameters and prior knowledge on pathways and ontologies. Beyond data storage, ProfileDB provides a set of dedicated tools for study inspection and data visualization. The user can gain insights into a complex experiment with just a few mouse clicks. We will demonstrate the application by presenting typical use cases for the identification of proteomics biomarkers. All presented analyses can be reproduced using the public ProfileDB web server. The ProfileDB application is available by standard browser (Firefox 18+, Internet Explorer Version 9+) technology via http://profileDB.-microdiscovery.de/ (login and pass-word: profileDB). The installation contains several public datasets including different cross-'omics' experiments. This article is part of a Special Issue entitled: Biomarkers: A Proteomic Challenge.","hji,kes",1,1,2,2,1,NA,NA +24273012,SpliceProt: a protein sequence repository of predicted human splice variants.,"The mechanism of alternative splicing in the transcriptome may increase the proteome diversity in eukaryotes. In proteomics, several studies aim to use protein sequence repositories to annotate MS experiments or to detect differentially expressed proteins. However, the available protein sequence repositories are not designed to fully detect protein isoforms derived from mRNA splice variants. To foster knowledge for the field, here we introduce SpliceProt, a new protein sequence repository of transcriptome experimental data used to investigate for putative splice variants in human proteomes. Current version of SpliceProt contains 159719 non-redundant putative polypeptide sequences. The assessment of the potential of SpliceProt in detecting new protein isoforms resulting from alternative splicing was performed by using publicly available proteomics data. We detected 173 peptides hypothetically derived from splice variants, which 54 of them are not present in UniprotKB/TrEMBL sequence repository. In comparison to other protein sequence repositories, SpliceProt contains a greater number of unique peptides and is able to detect more splice variants. Therefore, SpliceProt provides a solution for the annotation of proteomics experiments regarding splice isofoms. The repository files containing the translated sequences of the predicted splice variants and a visualization tool are freely available at http://lbbc.inca.gov.br/spliceprot.","hji,kes",1,1,2,2,1,NA,NA +24304892,miRTarBase update 2014: an information resource for experimentally validated miRNA-target interactions.,"MicroRNAs (miRNAs) are small non-coding RNA molecules capable of negatively regulating gene expression to control many cellular mechanisms. The miRTarBase database (http://mirtarbase.mbc.nctu.edu.tw/) provides the most current and comprehensive information of experimentally validated miRNA-target interactions. The database was launched in 2010 with data sources for >100 published studies in the identification of miRNA targets, molecular networks of miRNA targets and systems biology, and the current release (2013, version 4) includes significant expansions and enhancements over the initial release (2010, version 1). This article reports the current status of and recent improvements to the database, including (i) a 14-fold increase to miRNA-target interaction entries, (ii) a miRNA-target network, (iii) expression profile of miRNA and its target gene, (iv) miRNA target-associated diseases and (v) additional utilities including an upgrade reminder and an error reporting/user feedback system.","hji,kes",1,1,2,2,1,NA,NA +24334957,The Transformer database: biotransformation of xenobiotics.,"As the number of prescribed drugs is constantly rising, drug-drug interactions are an important issue. The simultaneous administration of several drugs can cause severe adverse effects based on interactions with the same metabolizing enzyme(s). The Transformer database (http://bioinformatics.charite.de/transformer) contains integrated information on the three phases of biotransformation (modification, conjugation and excretion) of 3000 drugs and >350 relevant food ingredients (e.g. grapefruit juice) and herbs, which are catalyzed by 400 proteins. A total of 100,000 interactions were found through text mining and manual validation. The 3D structures of 200 relevant proteins are included. The database enables users to search for drugs with a visual display of known interactions with phase I (Cytochrome P450) and phase II enzymes, transporters, food and herbs. For each interaction, PubMed references are given. To detect mutual impairments of drugs, the drug-cocktail tool displays interactions between selected drugs. By choosing the indication for a drug, the tool offers suggestions for alternative medications to avoid metabolic conflicts. Drug interactions can also be visualized in an interactive network view. Additionally, prodrugs, including their mechanisms of activation, and further information on enzymes of biotransformation, including 3D models, can be viewed.","hji,kes",1,1,2,2,1,NA,NA +24428888,OncomiRdbB: a comprehensive database of microRNAs and their targets in breast cancer.,"

Background

Given the estimate that 30% of our genes are controlled by microRNAs, it is essential that we understand the precise relationship between microRNAs and their targets. OncomiRs are microRNAs (miRNAs) that have been frequently shown to be deregulated in cancer. However, although several oncomiRs have been identified and characterized, there is as yet no comprehensive compilation of this data which has rendered it underutilized by cancer biologists. There is therefore an unmet need in generating bioinformatic platforms to speed the identification of novel therapeutic targets.

Description

We describe here OncomiRdbB, a comprehensive database of oncomiRs mined from different existing databases for mouse and humans along with novel oncomiRs that we have validated in human breast cancer samples. The database also lists their respective predicted targets, identified using miRanda, along with their IDs, sequences, chromosome location and detailed description. This database facilitates querying by search strings including microRNA name, sequence, accession number, target genes and organisms. The microRNA networks and their hubs with respective targets at 3'UTR, 5'UTR and exons of different pathway genes were also deciphered using the 'R' algorithm.

Conclusion

OncomiRdbB is a comprehensive and integrated database of oncomiRs and their targets in breast cancer with multiple query options which will help enhance both understanding of the biology of breast cancer and the development of new and innovative microRNA based diagnostic tools and targets of therapeutic significance. OncomiRdbB is freely available for download through the URL link http://tdb.ccmb.res.in/OncomiRdbB/index.htm.","hji,kes",1,1,2,2,1,NA,NA +24466021,CoryneBase: Corynebacterium genomic resources and analysis tools at your fingertips.,"Corynebacteria are used for a wide variety of industrial purposes but some species are associated with human diseases. With increasing number of corynebacterial genomes having been sequenced, comparative analysis of these strains may provide better understanding of their biology, phylogeny, virulence and taxonomy that may lead to the discoveries of beneficial industrial strains or contribute to better management of diseases. To facilitate the ongoing research of corynebacteria, a specialized central repository and analysis platform for the corynebacterial research community is needed to host the fast-growing amount of genomic data and facilitate the analysis of these data. Here we present CoryneBase, a genomic database for Corynebacterium with diverse functionality for the analysis of genomes aimed to provide: (1) annotated genome sequences of Corynebacterium where 165,918 coding sequences and 4,180 RNAs can be found in 27 species; (2) access to comprehensive Corynebacterium data through the use of advanced web technologies for interactive web interfaces; and (3) advanced bioinformatic analysis tools consisting of standard BLAST for homology search, VFDB BLAST for sequence homology search against the Virulence Factor Database (VFDB), Pairwise Genome Comparison (PGC) tool for comparative genomic analysis, and a newly designed Pathogenomics Profiling Tool (PathoProT) for comparative pathogenomic analysis. CoryneBase offers the access of a range of Corynebacterium genomic resources as well as analysis tools for comparative genomics and pathogenomics. It is publicly available at http://corynebacterium.um.edu.my/.","hji,kes",1,1,2,2,1,NA,NA +24558441,Human transporter database: comprehensive knowledge and discovery tools in the human transporter genes.,"Transporters are essential in homeostatic exchange of endogenous and exogenous substances at the systematic, organic, cellular, and subcellular levels. Gene mutations of transporters are often related to pharmacogenetics traits. Recent developments in high throughput technologies on genomics, transcriptomics and proteomics allow in depth studies of transporter genes in normal cellular processes and diverse disease conditions. The flood of high throughput data have resulted in urgent need for an updated knowledgebase with curated, organized, and annotated human transporters in an easily accessible way. Using a pipeline with the combination of automated keywords query, sequence similarity search and manual curation on transporters, we collected 1,555 human non-redundant transporter genes to develop the Human Transporter Database (HTD) (http://htd.cbi.pku.edu.cn). Based on the extensive annotations, global properties of the transporter genes were illustrated, such as expression patterns and polymorphisms in relationships with their ligands. We noted that the human transporters were enriched in many fundamental biological processes such as oxidative phosphorylation and cardiac muscle contraction, and significantly associated with Mendelian and complex diseases such as epilepsy and sudden infant death syndrome. Overall, HTD provides a well-organized interface to facilitate research communities to search detailed molecular and genetic information of transporters for development of personalized medicine.","hji,kes",1,1,2,2,1,NA,NA +24651967,OncomiRDB: a database for the experimentally verified oncogenic and tumor-suppressive microRNAs.,"

Summary

MicroRNAs (miRNAs), a class of small regulatory RNAs, play important roles in cancer initiation, progression and therapy. MiRNAs are found to regulate diverse cancer-related processes by targeting a large set of oncogenic and tumor-suppressive genes. To establish a high-confidence reference resource for studying the miRNA-regulated target genes and cellular processes in cancer, we manually curated 2259 entries of cancer-related miRNA regulations with direct experimental evidence from ~9000 abstracts, covering more than 300 miRNAs and 829 target genes across 25 cancer tissues. A web-based portal named oncomiRDB, which provides both graphical and text-based interfaces, was developed for easily browsing and searching all the annotations. It should be a useful resource for both the computational analysis and experimental study on miRNA regulatory networks and functions in cancer.

Availability and implementation

http://bioinfo.au.tsinghua.edu.cn/oncomirdb/

Contact

jgu@tsinghua.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +24712981,SFGD: a comprehensive platform for mining functional information from soybean transcriptome data and its use in identifying acyl-lipid metabolism pathways.,"

Background

Soybean (Glycine max L.) is one of the world's most important leguminous crops producing high-quality protein and oil. Increasing the relative oil concentration in soybean seeds is many researchers' goal, but a complete analysis platform of functional annotation for the genes involved in the soybean acyl-lipid pathway is still lacking. Following the success of soybean whole-genome sequencing, functional annotation has become a major challenge for the scientific community. Whole-genome transcriptome analysis is a powerful way to predict genes with biological functions. It is essential to build a comprehensive analysis platform for integrating soybean whole-genome sequencing data, the available transcriptome data and protein information. This platform could also be used to identify acyl-lipid metabolism pathways.

Description

In this study, we describe our construction of the Soybean Functional Genomics Database (SFGD) using Generic Genome Browser (Gbrowse) as the core platform. We integrated microarray expression profiling with 255 samples from 14 groups' experiments and mRNA-seq data with 30 samples from four groups' experiments, including spatial and temporal transcriptome data for different soybean development stages and environmental stresses. The SFGD includes a gene co-expression regulatory network containing 23,267 genes and 1873 miRNA-target pairs, and a group of acyl-lipid pathways containing 221 enzymes and more than 1550 genes. The SFGD also provides some key analysis tools, i.e. BLAST search, expression pattern search and cis-element significance analysis, as well as gene ontology information search and single nucleotide polymorphism display.

Conclusion

The SFGD is a comprehensive database integrating genome and transcriptome data, and also for soybean acyl-lipid metabolism pathways. It provides useful toolboxes for biologists to improve the accuracy and robustness of soybean functional genomics analysis, further improving understanding of gene regulatory networks for effective crop improvement. The SFGD is publically accessible at http://bioinformatics.cau.edu.cn/SFGD/, with all data available for downloading.","hji,kes",1,1,2,2,1,NA,NA +24855436,Polytraits: A database on biological traits of marine polychaetes.,"The study of ecosystem functioning - the role which organisms play in an ecosystem - is becoming increasingly important in marine ecological research. The functional structure of a community can be represented by a set of functional traits assigned to behavioural, reproductive and morphological characteristics. The collection of these traits from the literature is however a laborious and time-consuming process, and gaps of knowledge and restricted availability of literature are a common problem. Trait data are not yet readily being shared by research communities, and even if they are, a lack of trait data repositories and standards for data formats leads to the publication of trait information in forms which cannot be processed by computers. This paper describes Polytraits (http://polytraits.lifewatchgreece.eu), a database on biological traits of marine polychaetes (bristle worms, Polychaeta: Annelida). At present, the database contains almost 20,000 records on morphological, behavioural and reproductive characteristics of more than 1,000 marine polychaete species, all referenced by literature sources. All data can be freely accessed through the project website in different ways and formats, both human-readable and machine-readable, and have been submitted to the Encyclopedia of Life for archival and integration with trait information from other sources.","hji,kes",1,1,2,2,1,NA,NA +24870500,Phytoseiidae database: a website for taxonomic and distributional information on phytoseiid mites (Acari).,"This paper announces a database on the taxonomy and distribution of mites of the family Phytoseiidae Berlese, which is available online at http://www.lea.esalq.usp.br/phytoseiidae/. Synthesis of species diversity per genus, subfamily and country are given. Information about use of the database is provided.","hji,kes",1,1,2,2,1,NA,NA +24907201,SeaBase: a multispecies transcriptomic resource and platform for gene network inference.,"Marine and aquatic animals are extraordinarily useful as models for identifying mechanisms of development and evolution, regeneration, resistance to cancer, longevity and symbiosis, among many other areas of research. This is due to the great diversity of these organisms and their wide-ranging capabilities. Genomics tools are essential for taking advantage of these """"free lessons"""" of nature. However, genomics and transcriptomics are challenging in emerging model systems. Here, we present SeaBase, a tool for helping to meet these needs. Specifically, SeaBase provides a platform for sharing and searching transcriptome data. More importantly, SeaBase will support a growing number of tools for inferring gene network mechanisms. The first dataset available on SeaBase is a developmental transcriptomic profile of the sea anemone Nematostella vectensis (Anthozoa, Cnidaria). Additional datasets are currently being prepared and we are aiming to expand SeaBase to include user-supplied data for any number of marine and aquatic organisms, thereby supporting many potentially new models for gene network studies. SeaBase can be accessed online at: http://seabase.core.cli.mbl.edu.","hji,kes",1,1,2,2,1,NA,NA +25098325,MediaDB: a database of microbial growth conditions in defined media.,"Isolating pure microbial cultures and cultivating them in the laboratory on defined media is used to more fully characterize the metabolism and physiology of organisms. However, identifying an appropriate growth medium for a novel isolate remains a challenging task. Even organisms with sequenced and annotated genomes can be difficult to grow, despite our ability to build genome-scale metabolic networks that connect genomic data with metabolic function. The scientific literature is scattered with information about defined growth media used successfully for cultivating a wide variety of organisms, but to date there exists no centralized repository to inform efforts to cultivate less characterized organisms by bridging the gap between genomic data and compound composition for growth media. Here we present MediaDB, a manually curated database of defined media that have been used for cultivating organisms with sequenced genomes, with an emphasis on organisms with metabolic network models. The database is accessible online, can be queried by keyword searches or downloaded in its entirety, and can generate exportable individual media formulation files. The data assembled in MediaDB facilitate comparative studies of organism growth media, serve as a starting point for formulating novel growth media, and contribute to formulating media for in silico investigation of metabolic networks. MediaDB is freely available for public use at https://mediadb.systemsbiology.net.","hji,kes",1,1,2,2,1,NA,NA +25178289,Native Pig and Chicken Breed Database: NPCDB.,"Indigenous (native) breeds of livestock have higher disease resistance and adaptation to the environment due to high genetic diversity. Even though their extinction rate is accelerated due to the increase of commercial breeds, natural disaster, and civil war, there is a lack of well-established databases for the native breeds. Thus, we constructed the native pig and chicken breed database (NPCDB) which integrates available information on the breeds from around the world. It is a nonprofit public database aimed to provide information on the genetic resources of indigenous pig and chicken breeds for their conservation. The NPCDB (http://npcdb.snu.ac.kr/) provides the phenotypic information and population size of each breed as well as its specific habitat. In addition, it provides information on the distribution of genetic resources across the country. The database will contribute to understanding of the breed's characteristics such as disease resistance and adaptation to environmental changes as well as the conservation of indigenous genetic resources.","hji,kes",1,1,2,2,1,tentative 1; Is this biodata?,NA +25234927,circBase: a database for circular RNAs.,"Recently, several laboratories have reported thousands of circular RNAs (circRNAs) in animals. Numerous circRNAs are highly stable and have specific spatiotemporal expression patterns. Even though a function for circRNAs is unknown, these features make circRNAs an interesting class of RNAs as possible biomarkers and for further research. We developed a database and website, """"circBase,"""" where merged and unified data sets of circRNAs and the evidence supporting their expression can be accessed, downloaded, and browsed within the genomic context. circBase also provides scripts to identify known and novel circRNAs in sequencing data. The database is freely accessible through the web server at http://www.circbase.org/.","hji,kes",1,1,2,2,1,NA,NA +25267795,CarrotDB: a genomic and transcriptomic database for carrot.,"Carrot (Daucus carota L.) is an economically important vegetable worldwide and is the largest source of carotenoids and provitamin A in the human diet. Given the importance of this vegetable to humans, research and breeding communities on carrot should obtain useful genomic and transcriptomic information. The first whole-genome sequences of 'DC-27' carrot were de novo assembled and analyzed. Transcriptomic sequences of 14 carrot genotypes were downloaded from the Sequence Read Archive (SRA) database of National Center for Biotechnology Information (NCBI) and mapped to the whole-genome sequence before assembly. Based on these data sets, the first Web-based genomic and transcriptomic database for D. carota (CarrotDB) was developed (database homepage: http://apiaceae.njau.edu.cn/carrotdb). CarrotDB offers the tools of Genome Map and Basic Local Alignment Search Tool. Using these tools, users can search certain target genes and simple sequence repeats along with designed primers of 'DC-27'. Assembled transcriptomic sequences along with fragments per kilobase of transcript sequence per millions base pairs sequenced information (FPKM) information of 14 carrot genotypes are also provided. Users can download de novo assembled whole-genome sequences, putative gene sequences and putative protein sequences of 'DC-27'. Users can also download transcriptome sequence assemblies of 14 carrot genotypes along with their FPKM information. A total of 2826 transcription factor (TF) genes classified into 57 families were identified in the entire genome sequences. These TF genes were embedded in CarrotDB as an interface. The 'GERMPLASM' part of CarrotDB also offers taproot photos of 45 carrot genotypes and a table containing accession numbers, names, countries of origin and colors of cortex, phloem and xylem parts of taproots corresponding to each carrot genotype. CarrotDB will be continuously updated with new information. Database URL: http://apiaceae.njau.edu.cn/carrotdb/","hji,kes",1,1,2,2,1,NA,NA +25392413,The coffee genome hub: a resource for coffee genomes.,"The whole genome sequence of Coffea canephora, the perennial diploid species known as Robusta, has been recently released. In the context of the C. canephora genome sequencing project and to support post-genomics efforts, we developed the Coffee Genome Hub (http://coffee-genome.org/), an integrative genome information system that allows centralized access to genomics and genetics data and analysis tools to facilitate translational and applied research in coffee. We provide the complete genome sequence of C. canephora along with gene structure, gene product information, metabolism, gene families, transcriptomics, syntenic blocks, genetic markers and genetic maps. The hub relies on generic software (e.g. GMOD tools) for easy querying, visualizing and downloading research data. It includes a Genome Browser enhanced by a Community Annotation System, enabling the improvement of automatic gene annotation through an annotation editor. In addition, the hub aims at developing interoperability among other existing South Green tools managing coffee data (phylogenomics resources, SNPs) and/or supporting data analyses with the Galaxy workflow manager.","hji,kes",1,1,2,2,1,NA,value add +25404137,SuperFly: a comparative database for quantified spatio-temporal gene expression patterns in early dipteran embryos.,"We present SuperFly (http://superfly.crg.eu), a relational database for quantified spatio-temporal expression data of segmentation genes during early development in different species of dipteran insects (flies, midges and mosquitoes). SuperFly has a special focus on emerging non-drosophilid model systems. The database currently includes data of high spatio-temporal resolution for three species: the vinegar fly Drosophila melanogaster, the scuttle fly Megaselia abdita and the moth midge Clogmia albipunctata. At this point, SuperFly covers up to 9 genes and 16 time points per species, with a total of 1823 individual embryos. It provides an intuitive web interface, enabling the user to query and access original embryo images, quantified expression profiles, extracted positions of expression boundaries and integrated datasets, plus metadata and intermediate processing steps. SuperFly is a valuable new resource for the quantitative comparative study of gene expression patterns across dipteran species. Moreover, it provides an interesting test set for systems biologists interested in fitting mathematical gene network models to data. Both of these aspects are essential ingredients for progress toward a more quantitative and mechanistic understanding of developmental evolution.","hji,kes",1,1,2,2,1,NA,NA +25551368,PD_NGSAtlas: a reference database combining next-generation sequencing epigenomic and transcriptomic data for psychiatric disorders.,"

Background

Psychiatric disorders such as schizophrenia (SZ) and bipolar disorder (BP) are projected to lead the global disease burden within the next decade. Several lines of evidence suggest that epigenetic- or genetic-mediated dysfunction is frequently present in these disorders. To date, the inheritance patterns have been complicated by the problem of integrating epigenomic and transcriptomic factors that have yet to be elucidated. Therefore, there is a need to build a comprehensive database for storing epigenomic and transcriptomic data relating to psychiatric disorders.

Description

We have developed the PD_NGSAtlas, which focuses on the efficient storage of epigenomic and transcriptomic data based on next-generation sequencing and on the quantitative analyses of epigenetic and transcriptional alterations involved in psychiatric disorders. The current release of the PD_NGSAtlas contains 43 DNA methylation profiles and 37 transcription profiles detected by MeDIP-Seq and RNA-Seq, respectively, in two distinct brain regions and peripheral blood of SZ, BP and non-psychiatric controls. In addition to these data that were generated in-house, we have included, and will continue to include, published DNA methylation and gene expression data from other research groups, with a focus on psychiatric disorders. A flexible query engine has been developed for the acquisition of methylation profiles and transcription profiles for special genes or genomic regions of interest of the selected samples. Furthermore, the PD_NGSAtlas offers online tools for identifying aberrantly methylated and expressed events involved in psychiatric disorders. A genome browser has been developed to provide integrative and detailed views of multidimensional data in a given genomic context, which can help researchers understand molecular mechanisms from epigenetic and transcriptional perspectives. Moreover, users can download the methylation and transcription data for further analyses.

Conclusions

The PD_NGSAtlas aims to provide storage of epigenomic and transcriptomic data as well as quantitative analyses of epigenetic and transcriptional alterations involved in psychiatric disorders. The PD_NGSAtlas will be a valuable data resource and will enable researchers to investigate the pathophysiology and aetiology of disease in detail. The database is available at http://bioinfo.hrbmu.edu.cn/pd_ngsatlas/.","hji,kes",1,1,2,2,1,NA,NA +25632258,Araneae Sloveniae: a national spider species checklist.,"The research of the spider fauna of Slovenia dates back to the very beginning of binomial nomenclature, and has gone through more and less prolific phases with authors concentrating on taxonomy, faunistics, ecology and zoogeographic reviews. Although the body of published works is remarkable for a small nation, the faunistic data has remained too scattered for a thorough understanding of regional biotic diversity, for comparative and ecological research, and for informed conservation purposes. A national checklist is long overdue. Here, a critical review of all published records in any language is provided. The species list currently comprises 738 species, is published online at http://www.bioportal.si/katalog/araneae.php under the title Araneae Sloveniae, and will be updated in due course. This tool will fill the void in cataloguing regional spider faunas and will facilitate further araneological research in central and southern Europe.","hji,kes",1,1,2,2,1,tentative 1; Does this fit our description of biodata?,basically a curated listing of spiders +25922515,The Fossil Calibration Database-A New Resource for Divergence Dating.,"Fossils provide the principal basis for temporal calibrations, which are critical to the accuracy of divergence dating analyses. Translating fossil data into minimum and maximum bounds for calibrations is the most important-often least appreciated-step of divergence dating. Properly justified calibrations require the synthesis of phylogenetic, paleontological, and geological evidence and can be difficult for nonspecialists to formulate. The dynamic nature of the fossil record (e.g., new discoveries, taxonomic revisions, updates of global or local stratigraphy) requires that calibration data be updated continually lest they become obsolete. Here, we announce the Fossil Calibration Database (http://fossilcalibrations.org), a new open-access resource providing vetted fossil calibrations to the scientific community. Calibrations accessioned into this database are based on individual fossil specimens and follow best practices for phylogenetic justification and geochronological constraint. The associated Fossil Calibration Series, a calibration-themed publication series at Palaeontologia Electronica, will serve as a key pipeline for peer-reviewed calibrations to enter the database.","hji,kes",1,1,2,2,1,NA,NA +26061870,Human Chromosome Y and Haplogroups; introducing YDHS Database.,"

Background

As the high throughput sequencing efforts generate more biological information, scientists from different disciplines are interpreting the polymorphisms that make us unique. In addition, there is an increasing trend in general public to research their own genealogy, find distant relatives and to know more about their biological background. Commercial vendors are providing analyses of mitochondrial and Y-chromosomal markers for such purposes. Clearly, an easy-to-use free interface to the existing data on the identified variants would be in the interest of general public and professionals less familiar with the field. Here we introduce a novel metadatabase YDHS that aims to provide such an interface for Y-chromosomal DNA (Y-DNA) haplogroups and sequence variants.

Methods

The database uses ISOGG Y-DNA tree as the source of mutations and haplogroups and by using genomic positions of the mutations the database links them to genes and other biological entities. YDHS contains analysis tools for deeper Y-SNP analysis.

Results

YDHS addresses the shortage of Y-DNA related databases. We have tested our database using a set of different cases from literature ranging from infertility to autism. The database is at http://www.semanticgen.net/ydhs

Conclusions

Y-chromosomal DNA (Y-DNA) haplogroups and sequence variants have not been in the scientific limelight, excluding certain specialized fields like forensics, mainly because there is not much freely available information or it is scattered in different sources. However, as we have demonstrated Y-SNPs do play a role in various cases on the haplogroup level and it is possible to create a free Y-DNA dedicated bioinformatics resource.","hji,kes",1,1,2,2,1,NA,NA +26138588,SmedGD 2.0: The Schmidtea mediterranea genome database.,"Planarians have emerged as excellent models for the study of key biological processes such as stem cell function and regulation, axial polarity specification, regeneration, and tissue homeostasis among others. The most widely used organism for these studies is the free-living flatworm Schmidtea mediterranea. In 2007, the Schmidtea mediterranea Genome Database (SmedGD) was first released to provide a much needed resource for the small, but growing planarian community. SmedGD 1.0 has been a depository for genome sequence, a draft assembly, and related experimental data (e.g., RNAi phenotypes, in situ hybridization images, and differential gene expression results). We report here a comprehensive update to SmedGD (SmedGD 2.0) that aims to expand its role as an interactive community resource. The new database includes more recent, and up-to-date transcription data, provides tools that enhance interconnectivity between different genome assemblies and transcriptomes, including next-generation assemblies for both the sexual and asexual biotypes of S. mediterranea. SmedGD 2.0 (http://smedgd.stowers.org) not only provides significantly improved gene annotations, but also tools for data sharing, attributes that will help both the planarian and biomedical communities to more efficiently mine the genomics and transcriptomics of S. mediterranea.","hji,kes",1,1,2,2,1,NA,NA +26243198,miRegulome: a knowledge-base of miRNA regulomics and analysis.,"

Unlabelled

miRNAs regulate post transcriptional gene expression by targeting multiple mRNAs and hence can modulate multiple signalling pathways, biological processes, and patho-physiologies. Therefore, understanding of miRNA regulatory networks is essential in order to modulate the functions of a miRNA. The focus of several existing databases is to provide information on specific aspects of miRNA regulation. However, an integrated resource on the miRNA regulome is currently not available to facilitate the exploration and understanding of miRNA regulomics. miRegulome attempts to bridge this gap. The current version of miRegulome v1.0 provides details on the entire regulatory modules of miRNAs altered in response to chemical treatments and transcription factors, based on validated data manually curated from published literature. Modules of miRegulome (upstream regulators, downstream targets, miRNA regulated pathways, functions, diseases, etc) are hyperlinked to an appropriate external resource and are displayed visually to provide a comprehensive understanding. Four analysis tools are incorporated to identify relationships among different modules based on user specified datasets. miRegulome and its tools are helpful in understanding the biology of miRNAs and will also facilitate the discovery of biomarkers and therapeutics. With added features in upcoming releases, miRegulome will be an essential resource to the scientific community.

Availability

http://bnet.egr.vcu.edu/miRegulome.","hji,kes",1,1,2,2,1,NA,NA +26321999,Speech error and tip of the tongue diary for mobile devices.,"Collections of various types of speech errors have increased our understanding of the acquisition, production, and perception of language. Although such collections of naturally occurring language errors are invaluable for a number of reasons, the process of collecting various types of speech errors presents many challenges to the researcher interested in building such a collection, among them a significant investment of time and effort to obtain a sufficient number of examples to enable statistical analysis. Here we describe a freely accessible website http://spedi.ku.edu that helps users document slips of the tongue, slips of the ear, and tip of the tongue states that they experience firsthand or observe in others. The documented errors are amassed, and made available for other users to analyze, thereby distributing the time and effort involved in collecting errors across a large number of individuals instead of saddling the lone researcher, and facilitating distribution of the collection to other researchers. This approach also addresses some issues related to data curation that hampered previous error collections, and enables the collection to continue to grow over a longer period of time than previous collections. Finally, this web-based tool creates an opportunity for language scientists to engage in outreach efforts to increase the understanding of language disorders and research in the general public.","hji,kes",1,1,2,2,1,tentative 1; Is this biodata?,NA +26322998,CTDB: An Integrated Chickpea Transcriptome Database for Functional and Applied Genomics.,"Chickpea is an important grain legume used as a rich source of protein in human diet. The narrow genetic diversity and limited availability of genomic resources are the major constraints in implementing breeding strategies and biotechnological interventions for genetic enhancement of chickpea. We developed an integrated Chickpea Transcriptome Database (CTDB), which provides the comprehensive web interface for visualization and easy retrieval of transcriptome data in chickpea. The database features many tools for similarity search, functional annotation (putative function, PFAM domain and gene ontology) search and comparative gene expression analysis. The current release of CTDB (v2.0) hosts transcriptome datasets with high quality functional annotation from cultivated (desi and kabuli types) and wild chickpea. A catalog of transcription factor families and their expression profiles in chickpea are available in the database. The gene expression data have been integrated to study the expression profiles of chickpea transcripts in major tissues/organs and various stages of flower development. The utilities, such as similarity search, ortholog identification and comparative gene expression have also been implemented in the database to facilitate comparative genomic studies among different legumes and Arabidopsis. Furthermore, the CTDB represents a resource for the discovery of functional molecular markers (microsatellites and single nucleotide polymorphisms) between different chickpea types. We anticipate that integrated information content of this database will accelerate the functional and applied genomic research for improvement of chickpea. The CTDB web service is freely available at http://nipgr.res.in/ctdb.html.","hji,kes",1,1,2,2,1,NA,NA +26387108,The Protein Ensemble Database.,"The scientific community's major conceptual notion of structural biology has recently shifted in emphasis from the classical structure-function paradigm due to the emergence of intrinsically disordered proteins (IDPs). As opposed to their folded cousins, these proteins are defined by the lack of a stable 3D fold and a high degree of inherent structural heterogeneity that is closely tied to their function. Due to their flexible nature, solution techniques such as small-angle X-ray scattering (SAXS), nuclear magnetic resonance (NMR) spectroscopy and fluorescence resonance energy transfer (FRET) are particularly well-suited for characterizing their biophysical properties. Computationally derived structural ensembles based on such experimental measurements provide models of the conformational sampling displayed by these proteins, and they may offer valuable insights into the functional consequences of inherent flexibility. The Protein Ensemble Database (http://pedb.vib.be) is the first openly accessible, manually curated online resource storing the ensemble models, protocols used during the calculation procedure, and underlying primary experimental data derived from SAXS and/or NMR measurements. By making this previously inaccessible data freely available to researchers, this novel resource is expected to promote the development of more advanced modelling methodologies, facilitate the design of standardized calculation protocols, and consequently lead to a better understanding of how function arises from the disordered state.","hji,kes",1,1,2,2,1,NA,NA +26432833,iPPI-DB: an online database of modulators of protein-protein interactions.,"In order to boost the identification of low-molecular-weight drugs on protein-protein interactions (PPI), it is essential to properly collect and annotate experimental data about successful examples. This provides the scientific community with the necessary information to derive trends about privileged physicochemical properties and chemotypes that maximize the likelihood of promoting a given chemical probe to the most advanced stages of development. To this end we have developed iPPI-DB (freely accessible at http://www.ippidb.cdithem.fr), a database that contains the structure, some physicochemical characteristics, the pharmacological data and the profile of the PPI targets of several hundreds modulators of protein-protein interactions. iPPI-DB is accessible through a web application and can be queried according to two general approaches: using physicochemical/pharmacological criteria; or by chemical similarity to a user-defined structure input. In both cases the results are displayed as a sortable and exportable datasheet with links to external databases such as Uniprot, PubMed. Furthermore each compound in the table has a link to an individual ID card that contains its physicochemical and pharmacological profile derived from iPPI-DB data. This includes information about its binding data, ligand and lipophilic efficiencies, location in the PPI chemical space, and importantly similarity with known drugs, and links to external databases like PubChem, and ChEMBL.","hji,kes",1,1,2,2,1,NA,NA +26515641,Mouse polyQ database: a new online resource for research using mouse models of neurodegenerative diseases.,"

Background

The polyglutamine (polyQ) family of disorders comprises 9 genetic diseases, including several types of ataxia and Huntington disease. Approximately two decades of investigation and the creation of more than 130 mouse models of polyQ disorders have revealed many similarities between these diseases. The disorders share common mutation types, neurological characteristics and certain aspects of pathogenesis, including morphological and physiological neuronal alterations. All of the diseases still remain incurable.

Description

The large volume of information collected as a result of the investigation of polyQ models currently represents a great potential for searching, comparing and translating pathogenesis and therapeutic information between diseases. Therefore, we generated a public database comprising the polyQ mouse models, phenotypes and therapeutic interventions tested in vivo. The database is available at http://conyza.man.poznan.pl/ .

Conclusion

The use of the database in the field of polyQ diseases may accelerate research on these and other neurodegenerative diseases and provide new perspectives for future investigation.","hji,kes",1,1,2,2,1,NA,NA +26553798,InterRNA: a database of base interactions in RNA structures.,"A major component of RNA structure stabilization are the hydrogen bonded interactions between the base residues. The importance and biological relevance for large clusters of base interactions can be much more easily investigated when their occurrences have been systematically detected, catalogued and compared. In this paper, we describe the database InterRNA (INTERactions in RNA structures database-http://mfrlab.org/interrna/) that contains records of known RNA 3D motifs as well as records for clusters of bases that are interconnected by hydrogen bonds. The contents of the database were compiled from RNA structural annotations carried out by the NASSAM (http://mfrlab.org/grafss/nassam) and COGNAC (http://mfrlab.org/grafss/cognac) computer programs. An analysis of the database content and comparisons with the existing corpus of knowledge regarding RNA 3D motifs clearly show that InterRNA is able to provide an extension of the annotations for known motifs as well as able to provide novel interactions for further investigations.","hji,kes",1,1,2,2,1,NA,NA +26582915,piRNA cluster database: a web resource for piRNA producing loci.,"Piwi proteins and their guiding small RNAs, termed Piwi-interacting (pi-) RNAs, are essential for silencing of transposons in the germline of animals. A substantial fraction of piRNAs originates from genomic loci termed piRNA clusters and sequences encoded in these piRNA clusters determine putative targets for the Piwi/piRNA system. In the past decade, studies of piRNA transcriptomes in different species revealed additional roles for piRNAs beyond transposon silencing, reflecting the astonishing plasticity of the Piwi/piRNA system along different phylogenetic branches. Moreover, piRNA transcriptomes can change drastically during development and vary across different tissues.Since piRNA clusters crucially shape piRNA profiles, analysis of these loci is imperative for a thorough understanding of functional and evolutionary aspects of the piRNA pathway. But despite the ever-growing amount of available piRNA sequence data, we know little about the factors that determine differential regulation of piRNA clusters, nor the evolutionary events that cause their gain or loss.In order to facilitate addressing these subjects, we established a user-friendly piRNA cluster database (http://www.smallrnagroup-mainz.de/piRNAclusterDB.html) that provides comprehensive data on piRNA clusters in multiple species, tissues and developmental stages based on small RNA sequence data deposited at NCBI's Sequence Read Archive (SRA).","hji,kes",1,1,2,2,1,NA,NA +26582919,EBI metagenomics in 2016--an expanding and evolving resource for the analysis and archiving of metagenomic data.,"EBI metagenomics (https://www.ebi.ac.uk/metagenomics/) is a freely available hub for the analysis and archiving of metagenomic and metatranscriptomic data. Over the last 2 years, the resource has undergone rapid growth, with an increase of over five-fold in the number of processed samples and consequently represents one of the largest resources of analysed shotgun metagenomes. Here, we report the status of the resource in 2016 and give an overview of new developments. In particular, we describe updates to data content, a complete overhaul of the analysis pipeline, streamlining of data presentation via the website and the development of a new web based tool to compare functional analyses of sequence runs within a study. We also highlight two of the higher profile projects that have been analysed using the resource in the last year: the oceanographic projects Ocean Sampling Day and Tara Oceans.","hji,kes",1,1,2,2,1,NA,knowledgebase +26590405,TSGene 2.0: an updated literature-based knowledgebase for tumor suppressor genes.,"Tumor suppressor genes (TSGs) are a major type of gatekeeper genes in the cell growth. A knowledgebase with the systematic collection and curation of TSGs in multiple cancer types is critically important for further studying their biological functions as well as for developing therapeutic strategies. Since its development in 2012, the Tumor Suppressor Gene database (TSGene), has become a popular resource in the cancer research community. Here, we reported the TSGene version 2.0, which has substantial updates of contents (e.g. up-to-date literature and pan-cancer genomic data collection and curation), data types (noncoding RNAs and protein-coding genes) and content accessibility. Specifically, the current TSGene 2.0 contains 1217 human TSGs (1018 protein-coding and 199 non-coding genes) curated from over 9000 articles. Additionally, TSGene 2.0 provides thousands of expression and mutation patterns derived from pan-cancer data of The Cancer Genome Atlas. A new web interface is available at http://bioinfo.mc.vanderbilt.edu/TSGene/. Systematic analyses of 199 non-coding TSGs provide numerous cancer-specific non-coding mutational events for further screening and clinical use. Intriguingly, we identified 49 protein-coding TSGs that were consistently down-regulated in 11 cancer types. In summary, TSGene 2.0, which is the only available database for TSGs, provides the most updated TSGs and their features in pan-cancer.","hji,kes",1,1,2,2,1,NA,NA +26612867,The Dfam database of repetitive DNA families.,"Repetitive DNA, especially that due to transposable elements (TEs), makes up a large fraction of many genomes. Dfam is an open access database of families of repetitive DNA elements, in which each family is represented by a multiple sequence alignment and a profile hidden Markov model (HMM). The initial release of Dfam, featured in the 2013 NAR Database Issue, contained 1143 families of repetitive elements found in humans, and was used to produce more than 100 Mb of additional annotation of TE-derived regions in the human genome, with improved speed. Here, we describe recent advances, most notably expansion to 4150 total families including a comprehensive set of known repeat families from four new organisms (mouse, zebrafish, fly and nematode). We describe improvements to coverage, and to our methods for identifying and reducing false annotation. We also describe updates to the website interface. The Dfam website has moved to http://dfam.org. Seed alignments, profile HMMs, hit lists and other underlying data are available for download.","hji,kes",1,1,2,2,1,NA,NA +26644461,ALCOdb: Gene Coexpression Database for Microalgae.,"In the era of energy and food shortage, microalgae have gained much attention as promising sources of biofuels and food ingredients. However, only a small fraction of microalgal genes have been functionally characterized. Here, we have developed the Algae Gene Coexpression database (ALCOdb; http://alcodb.jp), which provides gene coexpression information to survey gene modules for a function of interest. ALCOdb currently supports two model algae: the green alga Chlamydomonas reinhardtii and the red alga Cyanidioschyzon merolae. Users can retrieve coexpression information for genes of interest through three unique data pages: (i) Coexpressed Gene List; (ii) Gene Information; and (iii) Coexpressed Gene Network. In addition to the basal coexpression information, ALCOdb also provides several advanced functionalities such as an expression profile viewer and a differentially expressed gene search tool. Using these user interfaces, we demonstrated that our gene coexpression data have the potential to detect functionally related genes and are useful in extrapolating the biological roles of uncharacterized genes. ALCOdb will facilitate molecular and biochemical studies of microalgal biological phenomena, such as lipid metabolism and organelle development, and promote the evolutionary understanding of plant cellular systems.","hji,kes",1,1,2,2,1,NA,NA +26657893,"HRGRN: A Graph Search-Empowered Integrative Database of Arabidopsis Signaling Transduction, Metabolism and Gene Regulation Networks.","The biological networks controlling plant signal transduction, metabolism and gene regulation are composed of not only tens of thousands of genes, compounds, proteins and RNAs but also the complicated interactions and co-ordination among them. These networks play critical roles in many fundamental mechanisms, such as plant growth, development and environmental response. Although much is known about these complex interactions, the knowledge and data are currently scattered throughout the published literature, publicly available high-throughput data sets and third-party databases. Many 'unknown' yet important interactions among genes need to be mined and established through extensive computational analysis. However, exploring these complex biological interactions at the network level from existing heterogeneous resources remains challenging and time-consuming for biologists. Here, we introduce HRGRN, a graph search-empowered integrative database of Arabidopsis signal transduction, metabolism and gene regulatory networks. HRGRN utilizes Neo4j, which is a highly scalable graph database management system, to host large-scale biological interactions among genes, proteins, compounds and small RNAs that were either validated experimentally or predicted computationally. The associated biological pathway information was also specially marked for the interactions that are involved in the pathway to facilitate the investigation of cross-talk between pathways. Furthermore, HRGRN integrates a series of graph path search algorithms to discover novel relationships among genes, compounds, RNAs and even pathways from heterogeneous biological interaction data that could be missed by traditional SQL database search methods. Users can also build subnetworks based on known interactions. The outcomes are visualized with rich text, figures and interactive network graphs on web pages. The HRGRN database is freely available at http://plantgrn.noble.org/hrgrn/.","hji,kes",1,1,2,2,1,NA,NA +26777304,dbEM: A database of epigenetic modifiers curated from cancerous and normal genomes.,"We have developed a database called dbEM (database of Epigenetic Modifiers) to maintain the genomic information of about 167 epigenetic modifiers/proteins, which are considered as potential cancer targets. In dbEM, modifiers are classified on functional basis and comprise of 48 histone methyl transferases, 33 chromatin remodelers and 31 histone demethylases. dbEM maintains the genomic information like mutations, copy number variation and gene expression in thousands of tumor samples, cancer cell lines and healthy samples. This information is obtained from public resources viz. COSMIC, CCLE and 1000-genome project. Gene essentiality data retrieved from COLT database further highlights the importance of various epigenetic proteins for cancer survival. We have also reported the sequence profiles, tertiary structures and post-translational modifications of these epigenetic proteins in cancer. It also contains information of 54 drug molecules against different epigenetic proteins. A wide range of tools have been integrated in dbEM e.g. Search, BLAST, Alignment and Profile based prediction. In our analysis, we found that epigenetic proteins DNMT3A, HDAC2, KDM6A, and TET2 are highly mutated in variety of cancers. We are confident that dbEM will be very useful in cancer research particularly in the field of epigenetic proteins based cancer therapeutics. This database is available for public at URL: http://crdd.osdd.net/raghava/dbem.","hji,kes",1,1,2,2,1,NA,NA +27037912,GAMDB: a web resource to connect microRNAs with autophagy in gerontology.,"

Objectives

MicroRNAs (miRNAs) are endogenous ~23 nucleotides (nt) RNAs, regulating gene expression by pairing to the mRNAs of protein-coding genes to direct their post-transcriptional repression. Both in normal and aberrant activities, miRNAs contribute to a recurring paradigm of cellular behaviors in pathological settings, especially in gerontology. Autophagy, a multi-step lysosomal degradation process with function to degrade long-lived proteins and damaged organelles, has significant impact on gerontology. Thus, elucidating how miRNAs participate in autophagy may enlarge the scope of miRNA in autophagy and facilitate researches in gerontology.

Materials and methods

Herein, based upon the published studies, predicted targets and gerontology-related diseases, we constructed a web resource named Gerontology-Autophagic-MicroRNA Database (GAMDB) (http://gamdb.liu-lab.com/index.php), which contained 836 autophagy-related miRNAs, 197 targeted genes/proteins and 56 aging-related diseases such as Parkinson' disease, Alzheimer's disease and Huntington's disease.

Results and conclusion

We made use of large amounts of data to elucidate the intricate relationships between microRNA-regulated autophagic mechanisms and gerontology. This database will facilitate better understanding of autophagy regulation network in gerontology and thus promoting gerontology-related therapy in the future.","hji,kes",1,1,2,2,1,NA,NA +27188311,A comprehensive database of high-throughput sequencing-based RNA secondary structure probing data (Structure Surfer).,"

Background

RNA molecules fold into complex three-dimensional shapes, guided by the pattern of hydrogen bonding between nucleotides. This pattern of base pairing, known as RNA secondary structure, is critical to their cellular function. Recently several diverse methods have been developed to assay RNA secondary structure on a transcriptome-wide scale using high-throughput sequencing. Each approach has its own strengths and caveats, however there is no widely available tool for visualizing and comparing the results from these varied methods.

Methods

To address this, we have developed Structure Surfer, a database and visualization tool for inspecting RNA secondary structure in six transcriptome-wide data sets from human and mouse ( http://tesla.pcbi.upenn.edu/strucuturesurfer/ ). The data sets were generated using four different high-throughput sequencing based methods. Each one was analyzed with a scoring pipeline specific to its experimental design. Users of Structure Surfer have the ability to query individual loci as well as detect trends across multiple sites.

Results

Here, we describe the included data sets and their differences. We illustrate the database's function by examining known structural elements and we explore example use cases in which combined data is used to detect structural trends.

Conclusions

In total, Structure Surfer provides an easy-to-use database and visualization interface for allowing users to interrogate the currently available transcriptome-wide RNA secondary structure information for mammals.","hji,kes",1,1,2,2,1,NA,NA +27199454,Comprehensive database of human E3 ubiquitin ligases: application to aquaporin-2 regulation.,"Aquaporin-2 (AQP2) is regulated in part via vasopressin-mediated changes in protein half-life that are in turn dependent on AQP2 ubiquitination. Here we addressed the question, """"What E3 ubiquitin ligase is most likely to be responsible for AQP2 ubiquitination?"""" using large-scale data integration based on Bayes' rule. The first step was to bioinformatically identify all E3 ligase genes coded by the human genome. The 377 E3 ubiquitin ligases identified in the human genome, consisting predominant of HECT, RING, and U-box proteins, have been used to create a publically accessible and downloadable online database (https://hpcwebapps.cit.nih.gov/ESBL/Database/E3-ligases/). We also curated a second database of E3 ligase accessory proteins that included BTB domain proteins, cullins, SOCS-box proteins, and F-box proteins. Using Bayes' theorem to integrate information from multiple large-scale proteomic and transcriptomic datasets, we ranked these 377 E3 ligases with respect to their probability of interaction with AQP2. Application of Bayes' rule identified the E3 ligases most likely to interact with AQP2 as (in order of probability): NEDD4 and NEDD4L (tied for first), AMFR, STUB1, ITCH, ZFPL1. Significantly, the two E3 ligases tied for top rank have also been studied extensively in the reductionist literature as regulatory proteins in renal tubule epithelia. The concordance of conclusions from reductionist and systems-level data provides strong motivation for further studies of the roles of NEDD4 and NEDD4L in the regulation of AQP2 protein turnover.","hji,kes",1,1,2,2,1,NA,NA +27242836,VESPUCCI: Exploring Patterns of Gene Expression in Grapevine.,"Large-scale transcriptional studies aim to decipher the dynamic cellular responses to a stimulus, like different environmental conditions. In the era of high-throughput omics biology, the most used technologies for these purposes are microarray and RNA-Seq, whose data are usually required to be deposited in public repositories upon publication. Such repositories have the enormous potential to provide a comprehensive view of how different experimental conditions lead to expression changes, by comparing gene expression across all possible measured conditions. Unfortunately, this task is greatly impaired by differences among experimental platforms that make direct comparisons difficult. In this paper, we present the Vitis Expression Studies Platform Using COLOMBOS Compendia Instances (VESPUCCI), a gene expression compendium for grapevine which was built by adapting an approach originally developed for bacteria, and show how it can be used to investigate complex gene expression patterns. We integrated nearly all publicly available microarray and RNA-Seq expression data: 1608 gene expression samples from 10 different technological platforms. Each sample has been manually annotated using a controlled vocabulary developed ad hoc to ensure both human readability and computational tractability. Expression data in the compendium can be visually explored using several tools provided by the web interface or can be programmatically accessed using the REST interface. VESPUCCI is freely accessible at http://vespucci.colombos.fmach.it.","hji,kes",1,1,2,2,1,NA,NA +27451428,SZDB: A Database for Schizophrenia Genetic Research.,"Schizophrenia (SZ) is a debilitating brain disorder with a complex genetic architecture. Genetic studies, especially recent genome-wide association studies (GWAS), have identified multiple variants (loci) conferring risk to SZ. However, how to efficiently extract meaningful biological information from bulk genetic findings of SZ remains a major challenge. There is a pressing need to integrate multiple layers of data from various sources, eg, genetic findings from GWAS, copy number variations (CNVs), association and linkage studies, gene expression, protein-protein interaction (PPI), co-expression, expression quantitative trait loci (eQTL), and Encyclopedia of DNA Elements (ENCODE) data, to provide a comprehensive resource to facilitate the translation of genetic findings into SZ molecular diagnosis and mechanism study. Here we developed the SZDB database (http://www.szdb.org/), a comprehensive resource for SZ research. SZ genetic data, gene expression data, network-based data, brain eQTL data, and SNP function annotation information were systematically extracted, curated and deposited in SZDB. In-depth analyses and systematic integration were performed to identify top prioritized SZ genes and enriched pathways. Multiple types of data from various layers of SZ research were systematically integrated and deposited in SZDB. In-depth data analyses and integration identified top prioritized SZ genes and enriched pathways. We further showed that genes implicated in SZ are highly co-expressed in human brain and proteins encoded by the prioritized SZ risk genes are significantly interacted. The user-friendly SZDB provides high-confidence candidate variants and genes for further functional characterization. More important, SZDB provides convenient online tools for data search and browse, data integration, and customized data analyses.","hji,kes",1,1,2,2,1,tentative 1; Clinical type data. Is it biodata?,NA +27484196,iLIR database: A web resource for LIR motif-containing proteins in eukaryotes.,"Atg8-family proteins are the best-studied proteins of the core autophagic machinery. They are essential for the elongation and closure of the phagophore into a proper autophagosome. Moreover, Atg8-family proteins are associated with the phagophore from the initiation of the autophagic process to, or just prior to, the fusion between autophagosomes with lysosomes. In addition to their implication in autophagosome biogenesis, they are crucial for selective autophagy through their ability to interact with selective autophagy receptor proteins necessary for the specific targeting of substrates for autophagic degradation. In the past few years it has been revealed that Atg8-interacting proteins include not only receptors but also components of the core autophagic machinery, proteins associated with vesicles and their transport, and specific proteins that are selectively degraded by autophagy. Atg8-interacting proteins contain a short linear LC3-interacting region/LC3 recognition sequence/Atg8-interacting motif (LIR/LRS/AIM) motif which is responsible for their interaction with Atg8-family proteins. These proteins are referred to as LIR-containing proteins (LIRCPs). So far, many experimental efforts have been carried out to identify new LIRCPs, leading to the characterization of some of them in the past 10years. Given the need for the identification of LIRCPs in various organisms, we developed the iLIR database ( https://ilir.warwick.ac.uk ) as a freely available web resource, listing all the putative canonical LIRCPs identified in silico in the proteomes of 8 model organisms using the iLIR server, combined with a Gene Ontology (GO) term analysis. Additionally, a curated text-mining analysis of the literature permitted us to identify novel putative LICRPs in mammals that have not previously been associated with autophagy.","hji,kes",1,1,2,2,1,NA,value add and available +27794041,NGSmethDB 2017: enhanced methylomes and differential methylation.,"The 2017 update of NGSmethDB stores whole genome methylomes generated from short-read data sets obtained by bisulfite sequencing (WGBS) technology. To generate high-quality methylomes, stringent quality controls were integrated with third-part software, adding also a two-step mapping process to exploit the advantages of the new genome assembly models. The samples were all profiled under constant parameter settings, thus enabling comparative downstream analyses. Besides a significant increase in the number of samples, NGSmethDB now includes two additional data-types, which are a valuable resource for the discovery of methylation epigenetic biomarkers: (i) differentially methylated single-cytosines; and (ii) methylation segments (i.e. genome regions of homogeneous methylation). The NGSmethDB back-end is now based on MongoDB, a NoSQL hierarchical database using JSON-formatted documents and dynamic schemas, thus accelerating sample comparative analyses. Besides conventional database dumps, track hubs were implemented, which improved database access, visualization in genome browsers and comparative analyses to third-part annotations. In addition, the database can be also accessed through a RESTful API. Lastly, a Python client and a multiplatform virtual machine allow for program-driven access from user desktop. This way, private methylation data can be compared to NGSmethDB without the need to upload them to public servers. Database website: http://bioinfo2.ugr.es/NGSmethDB.","hji,kes",1,1,2,2,1,NA,NA +27841751,A public database of macromolecular diffraction experiments.,"The low reproducibility of published experimental results in many scientific disciplines has recently garnered negative attention in scientific journals and the general media. Public transparency, including the availability of `raw' experimental data, will help to address growing concerns regarding scientific integrity. Macromolecular X-ray crystallography has led the way in requiring the public dissemination of atomic coordinates and a wealth of experimental data, making the field one of the most reproducible in the biological sciences. However, there remains no mandate for public disclosure of the original diffraction data. The Integrated Resource for Reproducibility in Macromolecular Crystallography (IRRMC) has been developed to archive raw data from diffraction experiments and, equally importantly, to provide related metadata. Currently, the database of our resource contains data from 2920 macromolecular diffraction experiments (5767 data sets), accounting for around 3% of all depositions in the Protein Data Bank (PDB), with their corresponding partially curated metadata. IRRMC utilizes distributed storage implemented using a federated architecture of many independent storage servers, which provides both scalability and sustainability. The resource, which is accessible via the web portal at http://www.proteindiffraction.org, can be searched using various criteria. All data are available for unrestricted access and download. The resource serves as a proof of concept and demonstrates the feasibility of archiving raw diffraction data and associated metadata from X-ray crystallographic studies of biological macromolecules. The goal is to expand this resource and include data sets that failed to yield X-ray structures in order to facilitate collaborative efforts that will improve protein structure-determination methods and to ensure the availability of `orphan' data left behind for various reasons by individual investigators and/or extinct structural genomics projects.","hji,kes",1,1,2,2,1,NA,NA +27899625,YM500v3: a database for small RNA sequencing in human cancer research.,"We previously presented the YM500 database, which contains >8000 small RNA sequencing (smRNA-seq) data sets and integrated analysis results for various cancer miRNome studies. In the updated YM500v3 database (http://ngs.ym.edu.tw/ym500/) presented herein, we not only focus on miRNAs but also on other functional small non-coding RNAs (sncRNAs), such as PIWI-interacting RNAs (piRNAs), tRNA-derived fragments (tRFs), small nuclear RNAs (snRNAs) and small nucleolar RNAs (snoRNAs). There is growing knowledge of the role of sncRNAs in gene regulation and tumorigenesis. We have also incorporated >10 000 cancer-related RNA-seq and >3000 more smRNA-seq data sets into the YM500v3 database. Furthermore, there are two main new sections, 'Survival' and 'Cancer', in this updated version. The 'Survival' section provides the survival analysis results in all cancer types or in a user-defined group of samples for a specific sncRNA. The 'Cancer' section provides the results of differential expression analyses, miRNA-gene interactions and cancer miRNA-related pathways. In the 'Expression' section, sncRNA expression profiles across cancer and sample types are newly provided. Cancer-related sncRNAs hold potential for both biotech applications and basic research.","hji,kes",1,1,2,2,1,NA,NA +27980519,ContaMiner and ContaBase: a webserver and database for early identification of unwantedly crystallized protein contaminants.,"Solving the phase problem in protein X-ray crystallography relies heavily on the identity of the crystallized protein, especially when molecular replacement (MR) methods are used. Yet, it is not uncommon that a contaminant crystallizes instead of the protein of interest. Such contaminants may be proteins from the expression host organism, protein fusion tags or proteins added during the purification steps. Many contaminants co-purify easily, crystallize and give good diffraction data. Identification of contaminant crystals may take time, since the presence of the contaminant is unexpected and its identity unknown. A webserver (ContaMiner) and a contaminant database (ContaBase) have been established, to allow fast MR-based screening of crystallographic data against currently 62 known contaminants. The web-based ContaMiner (available at http://strube.cbrc.kaust.edu.sa/contaminer/) currently produces results in 5 min to 4 h. The program is also available in a github repository and can be installed locally. ContaMiner enables screening of novel crystals at synchrotron beamlines, and it would be valuable as a routine safety check for 'crystallization and preliminary X-ray analysis' publications. Thus, in addition to potentially saving X-ray crystallographers much time and effort, ContaMiner might considerably lower the risk of publishing erroneous data.","hji,kes",1,1,2,2,1,NA,NA +28090394,GExplore 1.4: An expanded web interface for queries on Caenorhabditis elegans protein and gene function.,"Genetic high-throughput experiments often result in hundreds or thousands of genes satisfying certain experimental conditions. Grouping and prioritizing a large number of genes for further analysis can be a time-consuming challenge. In 2009 we developed a web-based user interface, GExplore, to assist with large-scale data-mining related to gene function in Caenorhabditis elegans. The underlying database contained information about Caenorhabditis elegans genes and proteins including domain organization of the proteins, phenotypic descriptions, expression data and Gene Ontology Consortium annotations. These data enable users to quickly obtain an overview of biological and biochemical functions of a large number of genes at once. Since its inception the underlying database has been updated and expanded significantly. Here we describe the current version of GExplore 1.4, documenting the changes since the original release. GExplore 1.4 now contains information about the domain organization of the proteomes of 9 nematode species, can display the location of Caenorhabditis elegans mutations with respect to the domain organization of the proteins, and includes stage-specific RNAseq gene expression data generated by the modENCODE project. The underlying database has been reorganized to facilitate independent updates of the different parts of the database and to allow the addition of novel data sets in the future. The web interface is available under http://genome.sfu.ca/gexplore.","hji,kes",1,1,2,2,1,NA,NA +28149703,Expanding our understanding of the trade in marine aquarium animals.,"The trade of live marine animals for home and public aquaria has grown into a major global industry. Millions of marine fishes and invertebrates are removed from coral reefs and associated habitats each year. The majority are imported into the United States, with the remainder sent to Europe, Japan, and a handful of other countries. Despite the recent growth and diversification of the aquarium trade, to date, data collection is not mandatory, and hence comprehensive information on species volume and diversity is lacking. This lack of information makes it impossible to study trade pathways. Without species-specific volume and diversity data, it is unclear how importing and exporting governments can oversee this industry effectively or how sustainability should be encouraged. To expand our knowledge and understanding of the trade, and to effectively communicate this new understanding, we introduce the publically-available Marine Aquarium Biodiversity and Trade Flow online database (https://www.aquariumtradedata.org/). This tool was created to communicate the volume and diversity of marine fishes and/or invertebrates imported into the US over three complete years (2008, 2009, and 2011) and three partial years (2000, 2004, 2005). To create this tool, invoices pertaining to shipments of live marine fishes and invertebrates were scanned and analyzed for species name, species quantities, country of origin, port of entry, and city of import destination. Here we focus on the analysis of the later three years of data and also produce an estimate for the entirety of 2000, 2004, and 2005. The three-year aggregate totals (2008, 2009, 2011) indicate that just under 2,300 fish and 725 invertebrate species were imported into the US cumulatively, although just under 1,800 fish and 550 invertebrate species were traded annually. Overall, the total number of live marine animals decreased between 2008 and 2011. In 2008, 2009, and 2011, the total number of individual fish (8.2, 7.3, and 6.9 million individuals) and invertebrates (4.2, 3.7, and 3.6 million individuals) assessed by analyzing the invoice data are roughly 60% of the total volumes recorded through the Law Enforcement Management Information System (LEMIS) dataset. Using these complete years, we back-calculated the number of individuals of both fishes and invertebrates imported in 2000, 2004, and 2005. These estimates (9.3, 10.8, and 11.2 million individual fish per year) were consistent with the three years of complete data. We also use these data to understand the global trade in two species (Banggai cardinalfish, Pterapogon kauderni, and orange clownfish, Amphiprion ocellaris / percula) recently considered for Endangered Species Act listing. Aquariumtradedata.org can help create more effective management plans for the traded species, and ideally could be implemented at key trade ports to better assess the global trade of aquatic wildlife.","hji,kes",1,1,2,2,1,NA,NA +28203233,Exo-miRExplorer: A Comprehensive Resource for Exploring and Comparatively Analyzing Exogenous MicroRNAs.,"MicroRNAs (miRNAs) are small regulatory RNAs that play important roles in animals, plants, and viruses. Deep-sequencing technology has been widely adopted in miRNA investigations. However, it is still a big mysterious why nearly all sequencing data contain miRNA sequences from exogenous species, called exo-miRNAs. In this study, we developed a novel platform, exo-miRExplorer, for mining and identifying exo-miRNAs from high-throughput small RNA sequencing experiments which originated from tissues and cell lines of multiple organisms. Thousands of exo-miRNAs are characterized with their expression abundance, the RNA families, original organisms and the sequencing platforms presented in exo-miRExplorer. Subsequently, we used exo-miRExplorer to perform further analysis. Comparative analysis of the exo-miRNAs between different sequencing datasets revealed significant correlation of exo-miRNAs between experiments in the same study. The plant-derived exo-miRNAs analysis provided robust evidence for non-diet source of exo-miRNAs. Virus-derived exo-miRNA analysis showed that pathogen RNAs could transfer to host cells and exist in deep-sequencing result at abundance level. In conclusion, exo-miRExplorer provides users with an integrative resource to facilitate detection and analysis of exo-miRNAs. exo-miRExplorer is available at the following URL: http://rna.sysu.edu.cn/exomiRDB/.","hji,kes",1,1,2,2,1,NA,NA +28387199,GSA: Genome Sequence Archive.,"With the rapid development of sequencing technologies towards higher throughput and lower cost, sequence data are generated at an unprecedentedly explosive rate. To provide an efficient and easy-to-use platform for managing huge sequence data, here we present Genome Sequence Archive (GSA; http://bigd.big.ac.cn/gsa or http://gsa.big.ac.cn), a data repository for archiving raw sequence data. In compliance with data standards and structures of the International Nucleotide Sequence Database Collaboration (INSDC), GSA adopts four data objects (BioProject, BioSample, Experiment, and Run) for data organization, accepts raw sequence reads produced by a variety of sequencing platforms, stores both sequence reads and metadata submitted from all over the world, and makes all these data publicly available to worldwide scientific communities. In the era of big data, GSA is not only an important complement to existing INSDC members by alleviating the increasing burdens of handling sequence data deluge, but also takes the significant responsibility for global big data archive and provides free unrestricted access to all publicly available data in support of research activities throughout the world.","hji,kes",1,1,2,2,1,NA,NA +28413782,APMicroDB: A microsatellite database of Acyrthosiphon pisum.,"Pea aphids represent a complex genetic system that could be used for QTL analysis, genetic diversity and population genetics studies. Here, we described the development of first microsatellite repeat database of the pea aphid (APMicroDB), accessible at """"http://deepaklab.com/aphidmicrodb"""". We identified 3,40,233 SSRs using MIcroSAtellite (MISA) tool that was distributed in 14,067 (out of 23,924) scaffold of the pea aphid. We observed 89.53% simple repeats of which 73.41% were mono-nucleotide, followed by di-nucleotide repeats. This database stored information about the repeats kind, GC content, motif type (mono - hexa), genomic location etc. We have also incorporated the primer information derived from Primer3 software of the 2504bp flanking region of the identified marker. Blast tool is also provided for searching the user query sequence for identified marker and their primers. This work has an immense use for scientific community working in the field of agricultural pest management, QTL mapping, and host-pathogen interaction analysis.","hji,kes",1,1,2,2,1,NA,NA +28539606,SesameFG: an integrated database for the functional genomics of sesame.,"Sesame (Sesamum indicum L.) has high oil content, a small diploid genome and a short growth period, making it an attractive species for genetic studies on oilseed crops. With the advancement of next-generation sequencing technology, genomics and functional genomics research of sesame has developed quickly in the last few years, and large amounts of data have been generated. However, these results are distributed in many different publications, and there is a lack of integration. To promote functional genomics research of sesame, we collected genetic information combined with comprehensive phenotypic information and integrated them in the web-based database named SesameFG. The current version of SesameFG contains phenotypic information on agronomic traits of 705 sesame accessions, de novo assembled genomes of three sesame varieties, massive numbers of identified SNPs, gene expression profiles of five tissues, gene families, candidate genes for the important agronomic traits and genomic-SSR markers. All phenotypic and genotypic information in SesameFG is available for online queries and can be downloaded freely. SesameFG provides useful search functions and data mining tools, including Genome Browser and local BLAST services. SesameFG is freely accessible at http://ncgr.ac.cn/SesameFG/. SesameFG provides valuable resources and tools for functional genomics research and the molecular breeding of sesame.","hji,kes",1,1,2,2,1,NA,NA +28748223,PhenoPlasm: a database of disruption phenotypes for malaria parasite genes.,"Two decades after the first Plasmodium transfection, attempts have been made to disrupt more than 3,151 genes in malaria parasites, across five Plasmodium species. While results from rodent malaria transfections have been curated and systematised, empowering large-scale analysis, phenotypic data from human malaria parasite transfections currently exists as individual reports scattered across a the literature. To facilitate systematic analysis of published experimental genetic data across Plasmodium species, we have built PhenoPlasm ( http://www.phenoplasm.org), a database of phenotypes generated by transfection experiments in all Plasmodium parasites. The site provides a simple interface linking citation-backed Plasmodium reverse-genetic phenotypes to gene IDs. The database has been populated with phenotypic data on 367 P. falciparum genes, curated from 176 individual publications, as well as existing data on rodent Plasmodium species from RMgmDB and PlasmoGEM. This is the first time that all available data on P. falciparum transfection experiments has been brought together in a single place. These data are presented using ortholog mapping to allow a researcher interested in a gene in one species to see results across other Plasmodium species. The collaborative nature of the database enables any researcher to add new phenotypes as they are discovered. As an example of database utility, we use the currently available datasets to identify RAP (RNA-binding domain abundant in Apicomplexa)-domain containing proteins as crucial to parasite survival.","hji,kes",1,1,2,2,1,NA,NA +28850115,"RefEx, a reference gene expression dataset as a web tool for the functional analysis of genes.","Gene expression data are exponentially accumulating; thus, the functional annotation of such sequence data from metadata is urgently required. However, life scientists have difficulty utilizing the available data due to its sheer magnitude and complicated access. We have developed a web tool for browsing reference gene expression pattern of mammalian tissues and cell lines measured using different methods, which should facilitate the reuse of the precious data archived in several public databases. The web tool is called Reference Expression dataset (RefEx), and RefEx allows users to search by the gene name, various types of IDs, chromosomal regions in genetic maps, gene family based on InterPro, gene expression patterns, or biological categories based on Gene Ontology. RefEx also provides information about genes with tissue-specific expression, and the relative gene expression values are shown as choropleth maps on 3D human body images from BodyParts3D. Combined with the newly incorporated Functional Annotation of Mammals (FANTOM) dataset, RefEx provides insight regarding the functional interpretation of unfamiliar genes. RefEx is publicly available at http://refex.dbcls.jp/.","hji,kes",1,1,2,2,1,NA,NA +28904183,"The TB Portals: an Open-Access, Web-Based Platform for Global Drug-Resistant-Tuberculosis Data Sharing and Analysis.","The TB Portals program is an international consortium of physicians, radiologists, and microbiologists from countries with a heavy burden of drug-resistant tuberculosis working with data scientists and information technology professionals. Together, we have built the TB Portals, a repository of socioeconomic/geographic, clinical, laboratory, radiological, and genomic data from patient cases of drug-resistant tuberculosis backed by shareable, physical samples. Currently, there are 1,299 total cases from five country sites (Azerbaijan, Belarus, Moldova, Georgia, and Romania), 976 (75.1%) of which are multidrug or extensively drug resistant and 38.2%, 51.9%, and 36.3% of which contain X-ray, computed tomography (CT) scan, and genomic data, respectively. The top Mycobacterium tuberculosis lineages represented among collected samples are Beijing, T1, and H3, and single nucleotide polymorphisms (SNPs) that confer resistance to isoniazid, rifampin, ofloxacin, and moxifloxacin occur the most frequently. These data and samples have promoted drug discovery efforts and research into genomics and quantitative image analysis to improve diagnostics while also serving as a valuable resource for researchers and clinical providers. The TB Portals database and associated projects are continually growing, and we invite new partners and collaborations to our initiative. The TB Portals data and their associated analytical and statistical tools are freely available at https://tbportals.niaid.nih.gov/.","hji,kes",1,1,2,2,1,NA,iffy +28985416,EVLncRNAs: a manually curated database for long non-coding RNAs validated by low-throughput experiments.,"Long non-coding RNAs (lncRNAs) play important functional roles in various biological processes. Early databases were utilized to deposit all lncRNA candidates produced by high-throughput experimental and/or computational techniques to facilitate classification, assessment and validation. As more lncRNAs are validated by low-throughput experiments, several databases were established for experimentally validated lncRNAs. However, these databases are small in scale (with a few hundreds of lncRNAs only) and specific in their focuses (plants, diseases or interactions). Thus, it is highly desirable to have a comprehensive dataset for experimentally validated lncRNAs as a central repository for all of their structures, functions and phenotypes. Here, we established EVLncRNAs by curating lncRNAs validated by low-throughput experiments (up to 1 May 2016) and integrating specific databases (lncRNAdb, LncRANDisease, Lnc2Cancer and PLNIncRBase) with additional functional and disease-specific information not covered previously. The current version of EVLncRNAs contains 1543 lncRNAs from 77 species that is 2.9 times larger than the current largest database for experimentally validated lncRNAs. Seventy-four percent lncRNA entries are partially or completely new, comparing to all existing experimentally validated databases. The established database allows users to browse, search and download as well as to submit experimentally validated lncRNAs. The database is available at http://biophy.dzu.edu.cn/EVLncRNAs.","hji,kes",1,1,2,2,1,NA,NA +29036719,ChannelsDB: database of biomacromolecular tunnels and pores.,"ChannelsDB (http://ncbr.muni.cz/ChannelsDB) is a database providing information about the positions, geometry and physicochemical properties of channels (pores and tunnels) found within biomacromolecular structures deposited in the Protein Data Bank. Channels were deposited from two sources; from literature using manual deposition and from a software tool automatically detecting tunnels leading to the enzymatic active sites and selected cofactors, and transmembrane pores. The database stores information about geometrical features (e.g. length and radius profile along a channel) and physicochemical properties involving polarity, hydrophobicity, hydropathy, charge and mutability. The stored data are interlinked with available UniProt annotation data mapping known mutation effects to channel-lining residues. All structures with channels are displayed in a clear interactive manner, further facilitating data manipulation and interpretation. As such, ChannelsDB provides an invaluable resource for research related to deciphering the biological function of biomacromolecular channels.","hji,kes",1,1,2,2,1,NA,NA +29059366,SBCDDB: Sleeping Beauty Cancer Driver Database for gene discovery in mouse models of human cancers.,"Large-scale oncogenomic studies have identified few frequently mutated cancer drivers and hundreds of infrequently mutated drivers. Defining the biological context for rare driving events is fundamentally important to increasing our understanding of the druggable pathways in cancer. Sleeping Beauty (SB) insertional mutagenesis is a powerful gene discovery tool used to model human cancers in mice. Our lab and others have published a number of studies that identify cancer drivers from these models using various statistical and computational approaches. Here, we have integrated SB data from primary tumor models into an analysis and reporting framework, the Sleeping Beauty Cancer Driver DataBase (SBCDDB, http://sbcddb.moffitt.org), which identifies drivers in individual tumors or tumor populations. Unique to this effort, the SBCDDB utilizes a single, scalable, statistical analysis method that enables data to be grouped by different biological properties. This allows for SB drivers to be evaluated (and re-evaluated) under different contexts. The SBCDDB provides visual representations highlighting the spatial attributes of transposon mutagenesis and couples this functionality with analysis of gene sets, enabling users to interrogate relationships between drivers. The SBCDDB is a powerful resource for comparative oncogenomic analyses with human cancer genomics datasets for driver prioritization.","hji,kes",1,1,2,2,1,NA,NA +29077896,mirTrans: a resource of transcriptional regulation on microRNAs for human cell lines.,"The cell-specific information of transcriptional regulation on microRNAs (miRNAs) is crucial to the precise understanding of gene regulations in various physiological and pathological processes existed in different tissues and cell types. The database, mirTrans, provides comprehensive information about cell-specific transcription of miRNAs including the transcriptional start sites (TSSs) of miRNAs, transcription factor (TF) to miRNA regulations and miRNA promoter sequences. mirTrans also maps the experimental H3K4me3 and DHS (DNase-I hypersensitive site) marks within miRNA promoters and expressed sequence tags (ESTs) within transcribed regions. The current version of database covers 35 259 TSSs and over 2.3 million TF-miRNA regulations for 1513 miRNAs in a total of 54 human cell lines. These cell lines span most of the biological systems, including circulatory system, digestive system and nervous system. Information for both the intragenic miRNAs and intergenic miRNAs is offered. Particularly, the quality of miRNA TSSs and TF-miRNA regulations is evaluated by literature curation. 23 447 TSS records and 2148 TF-miRNA regulations are supported by special experiments as a result of literature curation. EST coverage is also used to evaluate the accuracy of miRNA TSSs. Interface of mirTrans is friendly designed and convenient to make downloads (http://mcube.nju.edu.cn/jwang/lab/soft/mirtrans/ or http://120.27.239.192/mirtrans/).","hji,kes",1,1,2,2,1,NA,NA +29092050,Ensembl Genomes 2018: an integrated omics infrastructure for non-vertebrate species.,"Ensembl Genomes (http://www.ensemblgenomes.org) is an integrating resource for genome-scale data from non-vertebrate species, complementing the resources for vertebrate genomics developed in the Ensembl project (http://www.ensembl.org). Together, the two resources provide a consistent set of programmatic and interactive interfaces to a rich range of data including genome sequence, gene models, transcript sequence, genetic variation, and comparative analysis. This paper provides an update to the previous publications about the resource, with a focus on recent developments and expansions. These include the incorporation of almost 20 000 additional genome sequences and over 35 000 tracks of RNA-Seq data, which have been aligned to genomic sequence and made available for visualization. Other advances since 2015 include the release of the database in Resource Description Framework (RDF) format, a large increase in community-derived curation, a new high-performance protein sequence search, additional cross-references, improved annotation of non-protein-coding genes, and the launch of pre-release and archival sites. Collectively, these changes are part of a continuing response to the increasing quantity of publicly-available genome-scale data, and the consequent need to archive, integrate, annotate and disseminate these using automated, scalable methods.","hji,kes",1,1,2,2,1,NA,NA +29092072,Mouse Genome Database (MGD)-2018: knowledgebase for the laboratory mouse.,"The Mouse Genome Database (MGD; http://www.informatics.jax.org) is the key community mouse database which supports basic, translational and computational research by providing integrated data on the genetics, genomics, and biology of the laboratory mouse. MGD serves as the source for biological reference data sets related to mouse genes, gene functions, phenotypes and disease models with an increasing emphasis on the association of these data to human biology and disease. We report here on recent enhancements to this resource, including improved access to mouse disease model and human phenotype data and enhanced relationships of mouse models to human disease.","hji,kes",1,1,2,2,1,NA,NA +29121237,Human Ageing Genomic Resources: new and updated databases.,"In spite of a growing body of research and data, human ageing remains a poorly understood process. Over 10 years ago we developed the Human Ageing Genomic Resources (HAGR), a collection of databases and tools for studying the biology and genetics of ageing. Here, we present HAGR's main functionalities, highlighting new additions and improvements. HAGR consists of six core databases: (i) the GenAge database of ageing-related genes, in turn composed of a dataset of >300 human ageing-related genes and a dataset with >2000 genes associated with ageing or longevity in model organisms; (ii) the AnAge database of animal ageing and longevity, featuring >4000 species; (iii) the GenDR database with >200 genes associated with the life-extending effects of dietary restriction; (iv) the LongevityMap database of human genetic association studies of longevity with >500 entries; (v) the DrugAge database with >400 ageing or longevity-associated drugs or compounds; (vi) the CellAge database with >200 genes associated with cell senescence. All our databases are manually curated by experts and regularly updated to ensure a high quality data. Cross-links across our databases and to external resources help researchers locate and integrate relevant information. HAGR is freely available online (http://genomics.senescence.info/).","hji,kes",1,1,2,2,1,NA,NA +29126285,ReMap 2018: an updated atlas of regulatory regions from an integrative analysis of DNA-binding ChIP-seq experiments.,"With this latest release of ReMap (http://remap.cisreg.eu), we present a unique collection of regulatory regions in human, as a result of a large-scale integrative analysis of ChIP-seq experiments for hundreds of transcriptional regulators (TRs) such as transcription factors, transcriptional co-activators and chromatin regulators. In 2015, we introduced the ReMap database to capture the genome regulatory space by integrating public ChIP-seq datasets, covering 237 TRs across 13 million (M) peaks. In this release, we have extended this catalog to constitute a unique collection of regulatory regions. Specifically, we have collected, analyzed and retained after quality control a total of 2829 ChIP-seq datasets available from public sources, covering a total of 485 TRs with a catalog of 80M peaks. Additionally, the updated database includes new search features for TR names as well as aliases, including cell line names and the ability to navigate the data directly within genome browsers via public track hubs. Finally, full access to this catalog is available online together with a TR binding enrichment analysis tool. ReMap 2018 provides a significant update of the ReMap database, providing an in depth view of the complexity of the regulatory landscape in human.","hji,kes",1,1,2,2,1,NA,NA +29126312,MeT-DB V2.0: elucidating context-specific functions of N6-methyl-adenosine methyltranscriptome.,"Methyltranscriptome is an exciting new area that studies the mechanisms and functions of methylation in transcripts. A knowledge base with the systematic collection and curation of context specific transcriptome-wide methylations is critical for elucidating their biological functions as well as for developing bioinformatics tools. Since its inception in 2014, the Met-DB (Liu, H., Flores, M.A., Meng, J., Zhang, L., Zhao, X., Rao, M.K., Chen, Y. and Huang, Y. (2015) MeT-DB: a database of transcriptome methylation in mammalian cells. Nucleic Acids Res., 43, D197-D203), has become an important resource for methyltranscriptome, especially in the N6-methyl-adenosine (m6A) research community. Here, we report Met-DB v2.0, the significantly improved second version of Met-DB, which is entirely redesigned to focus more on elucidating context-specific m6A functions. Met-DB v2.0 has a major increase in context-specific m6A peaks and single-base sites predicted from 185 samples for 7 species from 26 independent studies. Moreover, it is also integrated with a new database for targets of m6A readers, erasers and writers and expanded with more collections of functional data. The redesigned Met-DB v2.0 web interface and genome browser provide more friendly, powerful, and informative ways to query and visualize the data. More importantly, MeT-DB v2.0 offers for the first time a series of tools specifically designed for understanding m6A functions. Met-DB V2.0 will be a valuable resource for m6A methyltranscriptome research. The Met-DB V2.0 database is available at http://compgenomics.utsa.edu/MeTDB/ and http://www.xjtlu.edu.cn/metdb2.","hji,kes",1,1,2,2,1,NA,NA +29145629,The Reactome Pathway Knowledgebase.,"The Reactome Knowledgebase (https://reactome.org) provides molecular details of signal transduction, transport, DNA replication, metabolism, and other cellular processes as an ordered network of molecular transformations-an extended version of a classic metabolic map, in a single consistent data model. Reactome functions both as an archive of biological processes and as a tool for discovering unexpected functional relationships in data such as gene expression profiles or somatic mutation catalogues from tumor cells. To support the continued brisk growth in the size and complexity of Reactome, we have implemented a graph database, improved performance of data analysis tools, and designed new data structures and strategies to boost diagram viewer performance. To make our website more accessible to human users, we have improved pathway display and navigation by implementing interactive Enhanced High Level Diagrams (EHLDs) with an associated icon library, and subpathway highlighting and zooming, in a simplified and reorganized web site with adaptive design. To encourage re-use of our content, we have enabled export of pathway diagrams as 'PowerPoint' files.","hji,kes",1,1,2,2,1,NA,NA +29155231,RTFAdb: A database of computationally predicted associations between retrotransposons and transcription factors in the human and mouse genomes.,"In recent years, retrotransposons have gained increasing attention as a source of binding motifs for transcription factors (TFs). Despite the substantial roles of these mobile genetic elements in the regulation of gene expression, a comprehensive resource enabling the investigation of retrotransposon species that are bound by TFs is still lacking. Herein, I introduce for the first time a novel database called RTFAdb, which allows exploring computationally predicted associations between retrotransposons and TFs in diverse cell lines and tissues of human and mouse. My database, using over 3.000 TF ChIP-seq binding profiles collected from human and mouse samples, makes possible searching more than 1.500 retrotransposon species in the binding sites of a total of 596 TFs. RTFAdb is freely available at http://tools.ibg.deu.edu.tr/rtfa/ and has the potential to offer novel insights into mammalian transcriptional networks by providing an additional layer of information regarding the regulatory roles of retrotransposons.","hji,kes",1,1,2,2,1,NA,NA +29209336,DRDB: An Online Date Palm Genomic Resource Database.,"Background: Date palm (Phoenix dactylifera L.) is a cultivated woody plant with agricultural and economic importance in many countries around the world. With the advantages of next generation sequencing technologies, genome sequences for many date palm cultivars have been released recently. Short sequence repeat (SSR) and single nucleotide polymorphism (SNP) can be identified from these genomic data, and have been proven to be very useful biomarkers in plant genome analysis and breeding. Results: Here, we first improved the date palm genome assembly using 130X of HiSeq data generated in our lab. Then 246,445 SSRs (214,901 SSRs and 31,544 compound SSRs) were annotated in this genome assembly; among the SSRs, mononucleotide SSRs (58.92%) were the most abundant, followed by di- (29.92%), tri- (8.14%), tetra- (2.47%), penta- (0.36%), and hexa-nucleotide SSRs (0.19%). The high-quality PCR primer pairs were designed for most (174,497; 70.81% out of total) SSRs. We also annotated 6,375,806 SNPs with raw read depth=3 in 90% cultivars. To further reduce false positive SNPs, we only kept 5,572,650 (87.40% out of total) SNPs with at least 20% cultivars support for downstream analyses. The high-quality PCR primer pairs were also obtained for 4,177,778 (65.53%) SNPs. We reconstructed the phylogenetic relationships among the 62 cultivars using these variants and found that they can be divided into three clusters, namely North Africa, Egypt - Sudan, and Middle East - South Asian, with Egypt - Sudan being the admixture of North Africa and Middle East - South Asian cultivars; we further confirmed these clusters using principal component analysis. Moreover, 34,346 SSRs and 4,177,778 SNPs with PCR primers were assigned to shared cultivars for cultivar classification and diversity analysis. All these SSRs, SNPs and their classification are available in our database, and can be used for cultivar identification, comparison, and molecular breeding. Conclusion:DRDB is a comprehensive genomic resource database of date palm. It can serve as a bioinformatics platform for date palm genomics, genetics, and molecular breeding. DRDB is freely available at http://drdb.big.ac.cn/home.","hji,kes",1,1,2,2,1,NA,NA +29315358,HTT-DB: new features and updates.,"Horizontal Transfer (HT) of genetic material between species is a common phenomenon among Bacteria and Archaea species and several databases are available for information retrieval and data mining. However, little attention has been given to this phenomenon among eukaryotic species mainly due to the lower proportion of these events. In the last years, a vertiginous amount of new HT events involving eukaryotic species was reported in the literature, highlighting the need of a common repository to keep the scientific community up to date and describe overall trends. Recently, we published the first HT database focused on HT of transposable elements among eukaryotes: the Horizontal Transposon Transfer DataBase (http://lpa.saogabriel.unipampa.edu.br: 8080/httdatabase/). Here, we present new features and updates of this unique database: (i) its expansion to include virus-host exchange of genetic material, which we called Horizontal Virus Transfer (HVT) and (ii) the availability of a web server for HT detection, where we implemented the online version of vertical and horizontal inheritance consistence analysis (VHICA), an R package developed for HT detection. These improvements will help researchers to navigate through known HVT cases, take data-informed decision and export figures based on keywords searches. Moreover, the availability of the VHICA as an online tool will make this software easily reachable even for researchers with no or little computation knowledge as well as foster our capability to detect new HT events in a wide variety of taxa. (Database URL: http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase/).","hji,kes",1,1,2,2,1,NA,NA +29401218,MutHTP: mutations in human transmembrane proteins.,"Motivation:Existing sources of experimental mutation data do not consider the structural environment of amino acid substitutions and distinguish between soluble and membrane proteins. They also suffer from a number of further limitations, including data redundancy, lack of disease classification, incompatible information content, and ambiguous annotations (e.g. the same mutation being annotated as disease and benign). Results:We have developed a novel database, MutHTP, which contains information on 183 395 disease-associated and 17 827 neutral mutations in human transmembrane proteins. For each mutation site MutHTP provides a description of its location with respect to the membrane protein topology, structural environment (if available) and functional features. Comprehensive visualization, search, display and download options are available. Availability and implementation:The database is publicly available at http://www.iitm.ac.in/bioinfo/MutHTP/. The website is implemented using HTML, PHP and javascript and supports recent versions of all major browsers, such as Firefox, Chrome and Opera. Supplementary information:Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +29455297,Pan European Phenological database (PEP725): a single point of access for European data.,"The Pan European Phenology (PEP) project is a European infrastructure to promote and facilitate phenological research, education, and environmental monitoring. The main objective is to maintain and develop a Pan European Phenological database (PEP725) with an open, unrestricted data access for science and education. PEP725 is the successor of the database developed through the COST action 725 """"Establishing a European phenological data platform for climatological applications"""" working as a single access point for European-wide plant phenological data. So far, 32 European meteorological services and project partners from across Europe have joined and supplied data collected by volunteers from 1868 to the present for the PEP725 database. Most of the partners actively provide data on a regular basis. The database presently holds almost 12 million records, about 46 growing stages and 265 plant species (including cultivars), and can be accessed via http://www.pep725.eu/ . Users of the PEP725 database have studied a diversity of topics ranging from climate change impact, plant physiological question, phenological modeling, and remote sensing of vegetation to ecosystem productivity.","hji,kes",1,1,2,2,1,NA,NA +29617941,A reference peptide database for proteome quantification based on experimental mass spectrum response curves.,"

Motivation

Mass spectrometry (MS) based quantification of proteins/peptides has become a powerful tool in biological research with high sensitivity and throughput. The accuracy of quantification, however, has been problematic as not all peptides are suitable for quantification. Several methods and tools have been developed to identify peptides that response well in mass spectrometry and they are mainly based on predictive models, and rarely consider the linearity of the response curve, limiting the accuracy and applicability of the methods. An alternative solution is to select empirically superior peptides that offer satisfactory MS response intensity and linearity in a wide dynamic range of peptide concentration.

Results

We constructed a reference database for proteome quantification based on experimental mass spectrum response curves. The intensity and dynamic range of over 2 647 773 transitions from 121 318 peptides were obtained from a set of dilution experiments, covering 11 040 gene products. These transitions and peptides were evaluated and presented in a database named SCRIPT-MAP. We showed that the best-responder (BR) peptide approach for quantification based on SCRIPT-MAP database is robust, repeatable and accurate in proteome-scale protein quantification. This study provides a reference database as well as a peptides/transitions selection method for quantitative proteomics.

Availability and implementation

SCRIPT-MAP database is available at http://www.firmiana.org/responders/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +29662024,"PKIDB: A Curated, Annotated and Updated Database of Protein Kinase Inhibitors in Clinical Trials.","The number of protein kinase inhibitors (PKIs) approved worldwide continues to grow steadily, with 39 drugs approved in the period between 2001 and January 2018. PKIs on the market have been the subject of many reviews, and structure-property relationships specific to this class of drugs have been inferred. However, the large number of PKIs under development is often overlooked. In this paper, we present PKIDB (Protein Kinase Inhibitor Database), a monthly-updated database gathering approved PKIs as well as PKIs currently in clinical trials. The database compiles currently 180 inhibitors ranging from phase 0 to 4 clinical trials along with annotations extracted from seven public resources. The distribution and property ranges of standard physicochemical properties are presented. They can be used as filters to better prioritize compound selection for future screening campaigns. Interestingly, more than one-third of the kinase inhibitors violate at least one Lipinski's rule. A Principal Component Analysis (PCA) reveals that Type-II inhibitors are mapped to a distinct chemical space as compared to orally administrated drugs as well as to other types of kinase inhibitors. Using a Principal Moment of Inertia (PMI) analysis, we show that PKIs under development tend to explore new shape territories as compared to approved PKIs. In order to facilitate the analysis of the protein space, the kinome tree has been annotated with all protein kinases being targeted by PKIs. Finally, we analyzed the pipeline of the pharmaceutical companies having PKIs on the market or still under development. We hope that this work will assist researchers in the kinase field in identifying and designing the next generation of kinase inhibitors for still untargeted kinases. The PKIDB database is freely accessible from a website at http://www.icoa.fr/pkidb and can be easily browsed through a user-friendly spreadsheet-like interface.","hji,kes",1,1,2,2,1,NA,NA +29743053,PDXliver: a database of liver cancer patient derived xenograft mouse models.,"

Background

Liver cancer is the second leading cause of cancer-related deaths and characterized by heterogeneity and drug resistance. Patient-derived xenograft (PDX) models have been widely used in cancer research because they reproduce the characteristics of original tumors. However, the current studies of liver cancer PDX mice are scattered and the number of available PDX models are too small to represent the heterogeneity of liver cancer patients. To improve this situation and to complement available PDX models related resources, here we constructed a comprehensive database, PDXliver, to integrate and analyze liver cancer PDX models.

Description

Currently, PDXliver contains 116 PDX models from Chinese liver cancer patients, 51 of them were established by the in-house PDX platform and others were curated from the public literatures. These models are annotated with complete information, including clinical characteristics of patients, genome-wide expression profiles, germline variations, somatic mutations and copy number alterations. Analysis of expression subtypes and mutated genes show that PDXliver represents the diversity of human patients. Another feature of PDXliver is storing drug response data of PDX mice, which makes it possible to explore the association between molecular profiles and drug sensitivity. All data can be accessed via the Browse and Search pages. Additionally, two tools are provided to interactively visualize the omics data of selected PDXs or to compare two groupsof PDXs.

Conclusion

As far as we known, PDXliver is the first public database of liver cancer PDX models. We hope that this comprehensive resource will accelerate the utility of PDX models and facilitate liver cancer research. The PDXliver database is freely available online at: http://www.picb.ac.cn/PDXliver/.","hji,kes",1,1,2,2,1,NA,liver cancer - too clinical? +29761459,Mouse Genome Informatics (MGI) Is the International Resource for Information on the Laboratory Mouse.,"Mouse Genome Informatics (MGI, http://www.informatics.jax.org/ ) web resources provide free access to meticulously curated information about the laboratory mouse. MGI's primary goal is to help researchers investigate the genetic foundations of human diseases by translating information from mouse phenotypes and disease models studies to human systems. MGI provides comprehensive phenotypes for over 50,000 mutant alleles in mice and provides experimental model descriptions for over 1500 human diseases. Curated data from scientific publications are integrated with those from high-throughput phenotyping and gene expression centers. Data are standardized using defined, hierarchical vocabularies such as the Mammalian Phenotype (MP) Ontology, Mouse Developmental Anatomy and the Gene Ontologies (GO). This chapter introduces you to Gene and Allele Detail pages and provides step-by-step instructions for simple searches and those that take advantage of the breadth of MGI data integration.","hji,kes",1,1,2,2,1,NA,NA +29890119,FlyXCDB-A Resource for Drosophila Cell Surface and Secreted Proteins and Their Extracellular Domains.,"Genomes of metazoan organisms possess a large number of genes encoding cell surface and secreted (CSS) proteins that carry out crucial functions in cell adhesion and communication, signal transduction, extracellular matrix establishment, nutrient digestion and uptake, immunity, and developmental processes. We developed the FlyXCDB database (http://prodata.swmed.edu/FlyXCDB) that provides a comprehensive resource to investigate extracellular (XC) domains in CSS proteins of Drosophila melanogaster, the most studied insect model organism in various aspects of animal biology. More than 300 Drosophila XC domains were discovered in Drosophila CSS proteins encoded by over 2500 genes through analyses of computational predictions of signal peptide, transmembrane (TM) segment, and GPI-anchor signal sequence, profile-based sequence similarity searches, gene ontology, and literature. These domains were classified into six classes mainly based on their molecular functions, including protein-protein interactions (class P), signaling molecules (class S), binding of non-protein molecules or groups (class B), enzyme homologs (class E), enzyme regulation and inhibition (class R), and unknown molecular function (class U). Main cellular functions such as cell adhesion, cell signaling, and extracellular matrix composition were described for the most abundant domains in each functional class. We assigned cell membrane topology categories (E, secreted; S, type I/III single-pass TM; T, type II single-pass TM; M, multi-pass TM; and G, GPI-anchored) to the products of genes with XC domains and investigated their regulation by mechanisms such as alternative splicing and stop codon readthrough.","hji,kes",1,1,2,2,1,NA,NA +29897484,ILDgenDB: integrated genetic knowledge resource for interstitial lung diseases (ILDs).,"Interstitial lung diseases (ILDs) are a diverse group of ~200 acute and chronic pulmonary disorders that are characterized by variable amounts of inflammation, fibrosis and architectural distortion with substantial morbidity and mortality. Inaccurate and delayed diagnoses increase the risk, especially in developing countries. Studies have indicated the significant roles of genetic elements in ILDs pathogenesis. Therefore, the first genetic knowledge resource, ILDgenDB, has been developed with an objective to provide ILDs genetic data and their integrated analyses for the better understanding of disease pathogenesis and identification of diagnostics-based biomarkers. This resource contains literature-curated disease candidate genes (DCGs) enriched with various regulatory elements that have been generated using an integrated bioinformatics workflow of databases searches, literature-mining and DCGs-microRNA (miRNAs)-single nucleotide polymorphisms (SNPs) association analyses. To provide statistical significance to disease-gene association, ILD-specificity index and hypergeomatric test scores were also incorporated. Association analyses of miRNAs, SNPs and pathways responsible for the pathogenesis of different sub-classes of ILDs were also incorporated. Manually verified 299 DCGs and their significant associations with 1932 SNPs, 2966 miRNAs and 9170 miR-polymorphisms were also provided. Furthermore, 216 literature-mined and proposed biomarkers were identified. The ILDgenDB resource provides user-friendly browsing and extensive query-based information retrieval systems. Additionally, this resource also facilitates graphical view of predicted DCGs-SNPs/miRNAs and literature associated DCGs-ILDs interactions for each ILD to facilitate efficient data interpretation. Outcomes of analyses suggested the significant involvement of immune system and defense mechanisms in ILDs pathogenesis. This resource may potentially facilitate genetic-based disease monitoring and diagnosis.Database URL: http://14.139.240.55/ildgendb/index.php.","hji,kes",1,1,2,2,1,NA,NA +29961819,dbLGL: an online leukemia gene and literature database for the retrospective comparison of adult and childhood leukemia genetics with literature evidence.,"Leukemia is a group of cancers with increased numbers of immature or abnormal leucocytes that originated in the bone marrow and other blood-forming organs. The development of differentially diagnostic biomarkers for different subtypes largely depends on understanding the biological pathways and regulatory mechanisms associated with leukemia-implicated genes. Unfortunately, the leukemia-implicated genes that have been identified thus far are scattered among thousands of published studies, and no systematic summary of the differences between adult and childhood leukemia exists with regard to the causative genetic mutations and genetic mechanisms of the various subtypes. In this study, we performed a systematic literature review of those susceptibility genes reported in small-scale experiments and built an online gene database containing a total of 1805 leukemia-associated genes, available at http://soft.bioinfo-minzhao.org/lgl/. Our comparison of genes from the four primary subtypes and between adult and childhood cases identified a number of potential genes related to patient survival. These curated genes can satisfy a growing demand for further integrating genomics screening for leukemia-associated low-frequency mutated genes.Database URL: http://soft.bioinfo-minzhao.org/lgl/.","hji,kes",1,1,2,2,1,NA,NA +30150996,AromaDb: A Database of Medicinal and Aromatic Plant's Aroma Molecules With Phytochemistry and Therapeutic Potentials.,"In traditional, herbal medicine, and aromatherapy, use of essential oils and their aroma compounds have been known since long, for the management of various human diseases. The essential oil is a mixture of highly complex, naturally occurring volatile aroma compounds synthesized by medicinal and aromatic plants as secondary metabolites. Essential oils widely used in pharmaceutical, cosmetic, sanitary, food industry and agriculture for their antibacterial, antiviral, antifungal, antiparasitic, insecticidal, anticancer, neuroprotective, psychophysiological, and anti-aging activities. Moreover, volatile aroma compounds comprise a chemically diverse class of low molecular weight organic compounds with significant vapor pressure. However, aroma compounds produced by plants, mainly attract pollinators, seed dispersers and provide defense against pests or pathogens. However, in humans, about 300 active olfactory receptor genes are involved to detect thousands of different aroma compounds and modulates expression of different metabolic genes regulating human psychophysiological activity, brain function, pharmacological signaling, and therapeutic potential. Keeping in mind this importance, present database, namely, AromaDb (http://bioinfo.cimap.res.in/aromadb/) covers information of plant varieties/chemotypes, essential oils, chemical constituents, GC-MS profile, yield variations due to agro-morphological parameters, trade data, aroma compounds, fragrance type, and bioactivity details. The database includes 1,321 aroma chemical structures, bioactivities of essential oil/aroma compounds, 357 fragrance type, 166 commercially used plants, and their high yielding 148 varieties/chemotypes. Also includes calculated cheminformatics properties related to identification, physico-chemical properties, pharmacokinetics, toxicological, and ecological information. Also comprises interacted human genes affecting various diseases related cell signaling pathways correlating the use of aromatherapy. This database could be a useful resource to the plant's growers/producers, an aroma/fragrance industrialist, health professionals, and researchers exploring the potential of essential oils and aroma compounds in the development of novel formulations against human diseases.","hji,kes",1,1,2,2,1,NA,NA +30152276,HYPO: A Database of Human Hypothetical Proteins.,"

Background

There are genes whose function remains obscure as they may not have similarities to known regions in the genome. Such known 'unknown' genes constituting the Open Reading Frames (ORF) that remain in the epigenome are termed as orphan genes and the proteins encoded by them but having no experimental evidence of translation are termed as 'Hypothetical Proteins' (HPs).

Objectives

We have enhanced our former database of Hypothetical Proteins (HP) in human (HypoDB) with added annotation, application programming interfaces and descriptive features. The database hosts 1000+ manually curated records of the known 'unknown' regions in the human genome. The new updated version of HypoDB with functionalities (Blast, Match) is freely accessible at http://www.bioclues.org/hypo2.

Methods

The total collection of HPs were checked using experimentally validated sets (from Swiss-Prot) or non-experimentally validated set (TrEMBL) or the complete set (UniProtKB). The database was designed with java at the core backend, integrated with databases, viz. EMBL, PIR, HPRD and those including descriptors for structural databases, interaction and association databases.

Results

The HypoDB constituted Application Programming Interfaces (API) for implicitly searching resources linking them to other databases like NCBI Link-out in addition to multiple search capabilities along with advanced searches using integrated bio-tools, viz. Match and BLAST were incorporated.

Conclusion

The HypoDB is perhaps the only open-source HP database with a range of tools for common bioinformatics retrievals and serves as a standby reference to researchers who are interested in finding candidate sequences for their potential experimental work.","hji,kes",1,1,2,2,1,NA,NA +30223042,TSNAdb: A Database for Tumor-specific Neoantigens from Immunogenomics Data Analysis.,"Tumor-specific neoantigens have attracted much attention since they can be used as biomarkers to predict therapeutic effects of immune checkpoint blockade therapy and as potential targets for cancer immunotherapy. In this study, we developed a comprehensive tumor-specific neoantigen database (TSNAdb v1.0), based on pan-cancer immunogenomic analyses of somatic mutation data and human leukocyte antigen (HLA) allele information for 16 tumor types with 7748 tumor samples from The Cancer Genome Atlas (TCGA) and The Cancer Immunome Atlas (TCIA). We predicted binding affinities between mutant/wild-type peptides and HLA class I molecules by NetMHCpan v2.8/v4.0, and presented detailed information of 3,707,562/1,146,961 potential neoantigens generated by somatic mutations of all tumor samples. Moreover, we employed recurrent mutations in combination with highly frequent HLA alleles to predict potential shared neoantigens across tumor patients, which would facilitate the discovery of putative targets for neoantigen-based cancer immunotherapy. TSNAdb is freely available at http://biopharm.zju.edu.cn/tsnadb.","hji,kes",1,1,2,2,1,NA,NA +30244175,PTMD: A Database of Human Disease-associated Post-translational Modifications.,"Various posttranslational modifications (PTMs) participate in nearly all aspects of biological processes by regulating protein functions, and aberrant states of PTMs are frequently implicated in human diseases. Therefore, an integral resource of PTM-disease associations (PDAs) would be a great help for both academic research and clinical use. In this work, we reported PTMD, a well-curated database containing PTMs that are associated with human diseases. We manually collected 1950 known PDAs in 749 proteins for 23 types of PTMs and 275 types of diseases from the literature. Database analyses show that phosphorylation has the largest number of disease associations, whereas neurologic diseases have the largest number of PTM associations. We classified all known PDAs into six classes according to the PTM status in diseases and demonstrated that the upregulation and presence of PTM events account for a predominant proportion of disease-associated PTM events. By reconstructing a disease-gene network, we observed that breast cancers have the largest number of associated PTMs and AKT1 has the largest number of PTMs connected to diseases. Finally, the PTMD database was developed with detailed annotations and can be a useful resource for further analyzing the relations between PTMs and human diseases. PTMD is freely accessible at http://ptmd.biocuckoo.org.","hji,kes",1,1,2,2,1,NA,NA +30268934,GAAD: A Gene and Autoimmiune Disease Association Database.,"Autoimmune diseases (ADs) arise from an abnormal immune response of the body against substances and tissues normally present in the body. More than a hundred of ADs have been described in the literature so far. Although their etiology remains largely unclear, various types of ADs tend to share more associated genes with other types of ADs than with non-AD types. Here we present GAAD, a gene and AD association database. In GAAD, we collected 44,762 associations between 49 ADs and 4249 genes from public databases and MEDLINE documents. We manually verified the associations to ensure the quality and credibility. We reconstructed and recapitulated the relationships among ADs using their shared genes, which further validated the quality of our data. We also provided a list of significantly co-occurring gene pairs among ADs; with embedded tools, users can query gene co-occurrences and construct customized co-occurrence network with genes of interest. To make GAAD more straightforward to experimental biologists and medical scientists, we extracted additional information describing the associations through text mining, including the putative diagnostic value of the associations, type and position of gene polymorphisms, expression changes of implicated genes, as well as the phenotypical consequences, and grouped the associations accordingly. GAAD is freely available at http://gaad.medgenius.info.","hji,kes",1,1,2,2,1,NA,NA +30371822,MatrixDB: integration of new data with a focus on glycosaminoglycan interactions.,"MatrixDB (http://matrixdb.univ-lyon1.fr/) is an interaction database focused on biomolecular interactions established by extracellular matrix (ECM) proteins and glycosaminoglycans (GAGs). It is an active member of the International Molecular Exchange (IMEx) consortium (https://www.imexconsortium.org/). It has adopted the HUPO Proteomics Standards Initiative standards for annotating and exchanging interaction data, either at the MIMIx (The Minimum Information about a Molecular Interaction eXperiment) or IMEx level. The following items related to GAGs have been added in the updated version of MatrixDB: (i) cross-references of GAG sequences to the GlyTouCan database, (ii) representation of GAG sequences in different formats (IUPAC and GlycoCT) and as SNFG (Symbol Nomenclature For Glycans) images and (iii) the GAG Builder online tool to build 3D models of GAG sequences from GlycoCT codes. The database schema has been improved to represent n-ary experiments. Gene expression data, imported from Expression Atlas (https://www.ebi.ac.uk/gxa/home), quantitative ECM proteomic datasets (http://matrisomeproject.mit.edu/ecm-atlas), and a new visualization tool of the 3D structures of biomolecules, based on the PDB Component Library and LiteMol, have also been added. A new advanced query interface now allows users to mine MatrixDB data using combinations of criteria, in order to build specific interaction networks related to diseases, biological processes, molecular functions or publications.","hji,kes",1,1,2,2,1,NA,NA +30380112,Translocatome: a novel resource for the analysis of protein translocation between cellular organelles.,"Here we present Translocatome, the first dedicated database of human translocating proteins (URL: http://translocatome.linkgroup.hu). The core of the Translocatome database is the manually curated data set of 213 human translocating proteins listing the source of their experimental validation, several details of their translocation mechanism, their local compartmentalized interactome, as well as their involvement in signalling pathways and disease development. In addition, using the well-established and widely used gradient boosting machine learning tool, XGBoost, Translocatome provides translocation probability values for 13066 human proteins identifying 1133 and 3268 high- and low-confidence translocating proteins, respectively. The database has user-friendly search options with a UniProt autocomplete quick search and advanced search for proteins filtered by their localization, UniProt identifiers, translocation likelihood or data complexity. Download options of search results, manually curated and predicted translocating protein sets are available on its website. The update of the database is helped by its manual curation framework and connection to the previously published ComPPI compartmentalized protein-protein interaction database(http://comppi.linkgroup.hu). As shown by the application examples of merlin (NF2) and tumor protein 63 (TP63) Translocatome allows a better comprehension of protein translocation as a systems biology phenomenon and can be used as a discovery-tool in the protein translocation field.","hji,kes",1,1,2,2,1,NA,NA +30652085,PDB_Amyloid: an extended live amyloid structure list from the PDB.,"The Protein Data Bank (PDB) contains more than 135000 entries at present. From these, relatively few amyloid structures can be identified, since amyloids are insoluble in water. Therefore, most amyloid structures deposited in the PDB are in the form of solid state NMR data. Based on the geometric analysis of these deposited structures, we have prepared an automatically updated web server, which generates a list of the deposited amyloid structures, and also entries of globular proteins that have amyloid-like substructures of given size and characteristics. We have found that by applying only appropriately selected geometric conditions, it is possible to identify deposited amyloid structures and a number of globular proteins with amyloid-like substructures. We have analyzed these globular proteins and have found proof in the literature that many of them form amyloids more easily than many other globular proteins. Our results relate to the method of Stankovic etal. [Stankovic I etal. (2017) IPSI BgD Tran Int Res 13, 47-51], who applied a hybrid textual-search and geometric approach for finding amyloids in the PDB. If one intends to identify a subset of the PDB for certain applications, the identification algorithm needs to be re-run periodically, since in 2017 on average 30 new entries per day were deposited in the data bank. Our web server is updated regularly and automatically, and the identified amyloid and partial amyloid structures can be viewed or their list can be downloaded from the following website https://pitgroup.org/amyloid.","hji,kes",1,1,2,2,1,NA,based on PDB though +30674925,"Smooth Muscle Transcriptome Browser: offering genome-wide references and expression profiles of transcripts expressed in intestinal SMC, ICC, and PDGFRα+ cells.","Transcriptome data on the quantitative numbers of transcriptional variants expressed in primary cells offer essential clues into specific cellular functions and biological processes. We have previously collected transcriptomes from primary smooth muscle cells (SMC), interstitial cells of Cajal (ICC), and PDGFRa+ cells (fibroblast-like cells) isolated from murine jejunal and colonic smooth muscle and/or mucosal tissues as well as transcriptomes from the associated tissues (jejunal smooth muscle, colonic smooth muscle, and colonic mucosa). In this study, we have built the Smooth Muscle Transcriptome Browser (SMTB), https://med.unr.edu/physio/transcriptome , a web-based, graphical user interface that offers genetic references and expression profiles of all transcripts expressed at both the cellular (SMC, ICC, and PDGFRa+ cells) and tissue level (smooth muscle and mucosal tissue). This browser brings new insights into the cellular and biological functions of the cell types in gastrointestinal smooth muscle biology.","hji,kes",1,1,2,2,1,NA,iffy +30715274,APID database: redefining protein-protein interaction experimental evidences and binary interactomes.,"The collection and integration of all the known protein-protein physical interactions within a proteome framework are critical to allow proper exploration of the protein interaction networks that drive biological processes in cells at molecular level. APID Interactomes is a public resource of biological data (http://apid.dep.usal.es) that provides a comprehensive and curated collection of `protein interactomes' for more than 1100 organisms, including 30 species with more than 500 interactions, derived from the integration of experimentally detected protein-to-protein physical interactions (PPIs). We have performed an update of APID database including a redefinition of several key properties of the PPIs to provide a more precise data integration and to avoid false duplicated records. This includes the unification of all the PPIs from five primary databases of molecular interactions (BioGRID, DIP, HPRD, IntAct and MINT), plus the information from two original systematic sources of human data and from experimentally resolved 3D structures (i.e. PDBs, Protein Data Bank files, where more than two distinct proteins have been identified). Thus, APID provides PPIs reported in published research articles (with traceable PMIDs) and detected by valid experimental interaction methods that give evidences about such protein interactions (following the `ontology and controlled vocabulary': www.ebi.ac.uk/ols/ontologies/mi; developed by `HUPO PSI-MI'). Within this data mining framework, all interaction detection methods have been grouped into two main types: (i) `binary' physical direct detection methods and (ii) `indirect' methods. As a result of these redefinitions, APID provides unified protein interactomes including the specific `experimental evidences' that support each PPI, indicating whether the interactions can be considered `binary' (i.e. supported by at least one binary detection method) or not.","hji,kes",1,1,2,2,1,NA,NA +30760842,Development and validation of whole genome-wide and genic microsatellite markers in oil palm (Elaeis guineensis Jacq.): First microsatellite database (OpSatdb).,"The availability of large expressed sequence tag (EST) and whole genome databases of oil palm enabled the development of a data base of microsatellite markers. For this purpose, an EST database consisting of 40,979 EST sequences spanning 27 Mb and a chromosome-wise whole genome databases were downloaded. A total of 3,950 primer pairs were identified and developed from EST sequences. The tri and tetra nucleotide repeat motifs were most prevalent (each 24.75%) followed by di-nucleotide repeat motifs. Whole genome-wide analysis found a total of 245,654 SSR repeats across the 16 chromosomes of oil palm, of which 38,717 were compound microsatellite repeats. A web application, OpSatdb, the first microsatellite database of oil palm, was developed using the PHP and MySQL database ( https://ssr.icar.gov.in/index.php ). It is a simple and systematic web-based search engine for searching SSRs based on repeat motif type, repeat type, and primer details. High synteny was observed between oil palm and rice genomes. The mapping of ESTs having SSRs by Blast2GO resulted in the identification of 19.2% sequences with gene ontology (GO) annotations. Randomly, a set of ten genic SSRs and five genomic SSRs were used for validation and genetic diversity on 100 genotypes belonging to the world oil palm genetic resources. The grouping pattern was observed to be broadly in accordance with the geographical origin of the genotypes. The identified genic and genome-wide SSRs can be effectively useful for various genomic applications of oil palm, such as genetic diversity, linkage map construction, mapping of QTLs, marker-assisted selection, and comparative population studies.","hji,kes",1,1,2,2,1,NA,NA +30994884,Graph-based data integration from bioactive peptide databases of pharmaceutical interest: toward an organized collection enabling visual network analysis.,"

Motivation

Bioactive peptides have gained great attention in the academy and pharmaceutical industry since they play an important role in human health. However, the increasing number of bioactive peptide databases is causing the problem of data redundancy and duplicated efforts. Even worse is the fact that the available data is non-standardized and often dirty with data entry errors. Therefore, there is a need for a unified view that enables a more comprehensive analysis of the information on this topic residing at different sites.

Results

After collecting web pages from a large variety of bioactive peptide databases, we organized the web content into an integrated graph database (starPepDB) that holds a total of 71310 nodes and 348505 relationships. In this graph structure, there are 45120 nodes representing peptides, and the rest of the nodes are connected to peptides for describing metadata. Additionally, to facilitate a better understanding of the integrated data, a software tool (starPep toolbox) has been developed for supporting visual network analysis in a user-friendly way; providing several functionalities such as peptide retrieval and filtering, network construction and visualization, interactive exploration and exporting data options.

Availability and implementation

Both starPepDB and starPep toolbox are freely available at http://mobiosd-hub.com/starpep/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +31016417,DNAmod: the DNA modification database.,"Covalent DNA modifications, such as 5-methylcytosine (5mC), are increasingly the focus of numerous research programs. In eukaryotes, both 5mC and 5-hydroxymethylcytosine (5hmC) are now recognized as stable epigenetic marks, with diverse functions. Bacteria, archaea, and viruses contain various other modified DNA nucleobases. Numerous databases describe RNA and histone modifications, but no database specifically catalogues DNA modifications, despite their broad importance in epigenetic regulation. To address this need, we have developed DNAmod: the DNA modification database. DNAmod is an open-source database ( https://dnamod.hoffmanlab.org ) that catalogues DNA modifications and provides a single source to learn about their properties. DNAmod provides a web interface to easily browse and search through these modifications. The database annotates the chemical properties and structures of all curated modified DNA bases, and a much larger list of candidate chemical entities. DNAmod includes manual annotations of available sequencing methods, descriptions of their occurrence in nature, and provides existing and suggested nomenclature. DNAmod enables researchers to rapidly review previous work, select mapping techniques, and track recent developments concerning modified bases of interest.","hji,kes",1,1,2,2,1,NA,NA +31034103,Functional analysis tools for post-translational modification: a post-translational modification database for analysis of proteins and metabolic pathways.,"Post-translational modifications (PTMs) are critical regulators of protein function, and nearly 200 different types of PTM have been identified. Advances in high-resolution mass spectrometry have led to the identification of an unprecedented number of PTM sites in numerous organisms, potentially facilitating a more complete understanding of how PTMs regulate cellular behavior. While databases have been created to house the resulting data, most of these resources focus on individual types of PTM, do not consider quantitative PTM analyses or do not provide tools for the visualization and analysis of PTM data. Here, we describe the Functional Analysis Tools for Post-Translational Modifications (FAT-PTM) database (https://bioinformatics.cse.unr.edu/fat-ptm/), which currently supports eight different types of PTM and over 49000 PTM sites identified in large-scale proteomic surveys of the model organism Arabidopsis thaliana. The FAT-PTM database currently supports tools to visualize protein-centric PTM networks, quantitative phosphorylation site data from over 10 different quantitative phosphoproteomic studies, PTM information displayed in protein-centric metabolic pathways and groups of proteins that are co-modified by multiple PTMs. Overall, the FAT-PTM database provides users with a robust platform to share and visualize experimentally supported PTM data, develop hypotheses related to target proteins or identify emergent patterns in PTM data for signaling and metabolic pathways.","hji,kes",1,1,2,2,1,NA,NA +31096089,"DrugR+: A comprehensive relational database for drug repurposing, combination therapy, and replacement therapy.","Drug repurposing or repositioning, which introduces new applications of the existing drugs, is an emerging field in drug discovery scope. To enhance the success rate of the research and development (R&D) process in a cost- and time-effective manner, a number of pharmaceutical companies worldwide have made tremendous investments. Besides, many researchers have proposed various methods and databases for the repurposing of various drugs. However, there is not a proper and well-organized database available. To this end, for the first time, we developed a new database based on DrugBank and KEGG data, which is named """"DrugR+"""". Our developed database provides some advantages relative to the DrugBank, and its interface supplies new capabilities for both single and synthetic repositioning of drugs. Moreover, it includes four new datasets which can be used for predicting drug-target interactions using supervised machine learning methods. As a case study, we introduced novel applications of some drugs and discussed the obtained results. A comparison of several machine learning methods on the generated datasets has also been reported in the Supplementary File. Having included several normalized tables, DrugR+has been organized to provide key information on data structures for the repurposing and combining applications of drugs. It provides the SQL query capability for professional users and an appropriate method with different options for unprofessional users. Additionally, DrugR+consists of repurposing service that accepts a drug and proposes a list of potential drugs for some usages. Taken all, DrugR+ is a free web-based database and accessible using (http://www.drugr.ir), which can be updated through a map-reduce parallel processing method to provide the most relevant information.","hji,kes",1,1,2,2,1,NA,NA +31110280,"CancerMine: a literature-mined resource for drivers, oncogenes and tumor suppressors in cancer.","Tumors from individuals with cancer are frequently genetically profiled for information about the driving forces behind the disease. We present the CancerMine resource, a text-mined and routinely updated database of drivers, oncogenes and tumor suppressors in different types of cancer. All data are available online ( http://bionlp.bcgsc.ca/cancermine ) and downloadable under a Creative Commons Zero license for ease of use.","hji,kes",1,1,2,2,1,NA,NA +31160594,"PathoPhenoDB, linking human pathogens to their phenotypes in support of infectious disease research.","Understanding the relationship between the pathophysiology of infectious disease, the biology of the causative agent and the development of therapeutic and diagnostic approaches is dependent on the synthesis of a wide range of types of information. Provision of a comprehensive and integrated disease phenotype knowledgebase has the potential to provide novel and orthogonal sources of information for the understanding of infectious agent pathogenesis, and support for research on disease mechanisms. We have developed PathoPhenoDB, a database containing pathogen-to-phenotype associations. PathoPhenoDB relies on manual curation of pathogen-disease relations, on ontology-based text mining as well as manual curation to associate host disease phenotypes with infectious agents. Using Semantic Web technologies, PathoPhenoDB also links to knowledge about drug resistance mechanisms and drugs used in the treatment of infectious diseases. PathoPhenoDB is accessible at http://patho.phenomebrowser.net/ , and the data are freely available through a public SPARQL endpoint.","hji,kes",1,1,2,2,1,NA,NA +31284879,ImtRDB: a database and software for mitochondrial imperfect interspersed repeats annotation.,"

Background

Mitochondria is a powerhouse of all eukaryotic cells that have its own circular DNA (mtDNA) encoding various RNAs and proteins. Somatic perturbations of mtDNA are accumulating with age thus it is of great importance to uncover the main sources of mtDNA instability. Recent analyses demonstrated that somatic mtDNA deletions depend on imperfect repeats of various nature between distant mtDNA segments. However, till now there are no comprehensive databases annotating all types of imperfect repeats in numerous species with sequenced complete mitochondrial genome as well as there are no algorithms capable to call all types of imperfect repeats in circular mtDNA.

Results

We implemented nave algorithm of pattern recognition by analogy to standard dot-plot construction procedures allowing us to find both perfect and imperfect repeats of four main types: direct, inverted, mirror and complementary. Our algorithm is adapted to specific characteristics of mtDNA such as circularity and an excess of short repeats - it calls imperfect repeats starting from the length of 10 b.p. We constructed interactive web available database ImtRDB depositing perfect and imperfect repeats positions in mtDNAs of more than 3500 Vertebrate species. Additional tools, such as visualization of repeats within a genome, comparison of repeat densities among different genomes and a possibility to download all results make this database useful for many biologists. Our first analyses of the database demonstrated that mtDNA imperfect repeats (i) are usually short; (ii) associated with unfolded DNA structures; (iii) four types of repeats positively correlate with each other forming two equivalent pairs: direct and mirror versus inverted and complementary, with identical nucleotide content and similar distribution between species; (iv) abundance of repeats is negatively associated with GC content; (v) dinucleotides GC versus CG are overrepresented on light chain of mtDNA covered by repeats.

Conclusions

ImtRDB is available at http://bioinfodbs.kantiana.ru/ImtRDB/ . It is accompanied by the software calling all types of interspersed repeats with different level of degeneracy in circular DNA. This database and software can become a very useful tool in various areas of mitochondrial and chloroplast DNA research.","hji,kes",1,1,2,2,1,NA,NA +31416842,Curatopes Melanoma: A Database of Predicted T-cell Epitopes from Overly Expressed Proteins in Metastatic Cutaneous Melanoma.,"Therapeutic anticancer vaccination has been adapted as an immunotherapy in several solid tumors. However, the selection of promising candidates from the total quantity of possible epitopes poses a challenge to clinicians and bioinformaticians alike, and very few epitopes have been tested in experimental or clinical settings to validate their efficacy. Here, we present a comprehensive database of predicted nonmutated peptide epitopes derived from genes that are overly expressed in a group of 32 melanoma biopsies compared with healthy tissues and that were filtered against expression in a curated list of survival-critical tissues. We hypothesize that these """"self-tolerant"""" epitopes have two desirable properties: they do not depend on mutations, being immediately applicable to a large patient collective, and they potentially cause fewer autoimmune reactions. To support epitope selection, we provide an aggregated score of expected therapeutic efficiency as a shortlist mechanism. The database has applications in facilitating epitope selection and trial design and is freely accessible at https://www.curatopes.com. SIGNIFICANCE: A database is presented that predicts and scores antitumor T-cell epitopes, with a focus on tolerability and avoidance of severe autoimmunity, offering a supplementary epitope set for further investigation in immunotherapy.","hji,kes",1,1,2,2,1,NA,NA +31490686,TMB Library of Nucleosome Simulations.,"Nucleosomes are the fundamental building blocks of chromatin, the biomaterial that houses the genome in all higher organisms. A nucleosome consists of 145-147 base pairs of DNA wrapped 1.7 times around eight histones. Given a four-letter code (A, C, G, T), there are approximately 4147 or 1088 oligonucleotides that can form a nucleosome. Comparative, rather than comprehensive, studies are required. Here we introduce the TMB Library of nucleosome simulations and present a meta-analysis of over 20 s of all atom molecular dynamics simulations representing 518 different realizations of the nucleosome. The TMB Library serves as a reference for future comparative, on-demand simulations of nucleosomes and a demonstration of iBIOMES Lite as a tool for managing a laboratory's simulation library. For every simulation, dewatered trajectories, RMSD, and DNA helical parameter data are provided through iBIOMES Lite in a Web browser and a file browser format. A novel view of nucleosomal DNA emerges from our meta-analysis of the TMB Library. DNA conformation is restricted to a specific left-handed superhelix, but the range of conformations observed for individual bases and base pairs is not more restricted nor more highly deformed than DNA free in solution. With the exception of Roll, mean DNA helical parameter values obtained from simulations of nucleosomes are largely within the range of thermal motion of DNA free in solution. The library provides evidence of DNA kinking in the nucleosome and clearly demonstrates the effects of DNA sequence on the gross structure and dynamics of nucleosomes. These effects and mispositioning of the 601 super strong nucleosome positioning sequence can be detected in short simulations (10 ns). Collectively, the results provide a basis for comparative simulation studies of nucleosomes and extend our understanding of the binding of proteins and drugs to nucleosomal DNA. The TMB Library can be found at http://dna.engr.latech.edu/~tmbshare/ .","hji,kes",1,1,2,2,1,NA,NA +31642469,PhenoModifier: a genetic modifier database for elucidating the genetic basis of human phenotypic variation.,"From clinical observations to large-scale sequencing studies, the phenotypic impact of genetic modifiers is evident. To better understand the full spectrum of the genetic contribution to human disease, concerted efforts are needed to construct a useful modifier resource for interpreting the information from sequencing data. Here, we present the PhenoModifier (https://www.biosino.org/PhenoModifier), a manually curated database that provides a comprehensive overview of human genetic modifiers. By manually curating over ten thousand published articles, 3078 records of modifier information were entered into the current version of PhenoModifier, related to 288 different disorders, 2126 genetic modifier variants and 843 distinct modifier genes. To help users probe further into the mechanism of their interested modifier genes, we extended the yeast genetic interaction data and yeast quantitative trait loci to the human and we also integrated GWAS data into the PhenoModifier to assist users in evaluating all possible phenotypes associated with a modifier allele. As the first comprehensive resource of human genetic modifiers, PhenoModifier provides a more complete spectrum of genetic factors contributing to human phenotypic variation. The portal has a broad scientific and clinical scope, spanning activities relevant to variant interpretation for research purposes as well as clinical decision making.","hji,kes",1,1,2,2,1,NA,NA +31647100,Bovine Genome Database: new annotation tools for a new reference genome.,"The Bovine Genome Database (BGD) (http://bovinegenome.org) has been the key community bovine genomics database for more than a decade. To accommodate the increasing amount and complexity of bovine genomics data, BGD continues to advance its practices in data acquisition, curation, integration and efficient data retrieval. BGD provides tools for genome browsing (JBrowse), genome annotation (Apollo), data mining (BovineMine) and sequence database searching (BLAST). To augment the BGD genome annotation capabilities, we have developed a new Apollo plug-in, called the Locus-Specific Alternate Assembly (LSAA) tool, which enables users to identify and report potential genome assembly errors and structural variants. BGD now hosts both the newest bovine reference genome assembly, ARS-UCD1.2, as well as the previous reference genome, UMD3.1.1, with cross-genome navigation and queries supported in JBrowse and BovineMine, respectively. Other notable enhancements to BovineMine include the incorporation of genomes and gene annotation datasets for non-bovine ruminant species (goat and sheep), support for multiple assemblies per organism in the Regions Search tool, integration of additional ontologies and development of many new template queries. To better serve the research community, we continue to focus on improving existing tools, developing new tools, adding new datasets and encouraging researchers to use these resources.","hji,kes",1,1,2,2,1,NA,NA +31664080,dendPoint: a web resource for dendrimer pharmacokinetics investigation and prediction.,"Nanomedicine development currently suffers from a lack of efficient tools to predict pharmacokinetic behavior without relying upon testing in large numbers of animals, impacting success rates and development costs. This work presents dendPoint, the first in silico model to predict the intravenous pharmacokinetics of dendrimers, a commonly explored drug vector, based on physicochemical properties. We have manually curated the largest relational database of dendrimer pharmacokinetic parameters and their structural/physicochemical properties. This was used to develop a machine learning-based model capable of accurately predicting pharmacokinetic parameters, including half-life, clearance, volume of distribution and dose recovered in the liver and urine. dendPoint successfully predicts dendrimer pharmacokinetic properties, achieving correlations of up to r = 0.83 and Q2 up to 0.68. dendPoint is freely available as a user-friendly web-service and database at http://biosig.unimelb.edu.au/dendpoint . This platform is ultimately expected to be used to guide dendrimer construct design and refinement prior to embarking on more time consuming and expensive in vivo testing.","hji,kes",1,1,2,2,1,NA,NA +31780665,A database of high-resolution MS/MS spectra for lichen metabolites.,"While analytical techniques in natural products research massively shifted to liquid chromatography-mass spectrometry, lichen chemistry remains reliant on limited analytical methods, Thin Layer Chromatography being the gold standard. To meet the modern standards of metabolomics within lichenochemistry, we announce the publication of an open access MS/MS library with 250 metabolites, coined LDB for Lichen DataBase, providing a comprehensive coverage of lichen chemodiversity. These were donated by the Berlin Garden and Botanical Museum from the collection of Siegfried Huneck to be analyzed by LC-MS/MS. Spectra at individual collision energies were submitted to MetaboLights (https://www.ebi.ac.uk/metabolights/MTBLS999) while merged spectra were uploaded to the GNPS platform (CCMSLIB00004751209 to CCMSLIB00004751517). Technical validation was achieved by dereplicating three lichen extracts using a Molecular Networking approach, revealing the detection of eleven unique molecules that would have been missed without LDB implementation to the GNPS. From a chemist's viewpoint, this database should help streamlining the isolation of formerly unreported metabolites. From a taxonomist perspective, the LDB offers a versatile tool for the chemical profiling of newly reported species.","hji,kes",1,1,2,2,1,NA,NA +32119071,ProCaff: protein-carbohydrate complex binding affinity database.,"MOTIVATION:Protein-carbohydrate interactions perform several cellular and biological functions and their structure and function are mainly dictated by their binding affinity. Although plenty of experimental data on binding affinity are available, there is no reliable and comprehensive database in the literature. RESULTS:We have developed a database on binding affinity of protein-carbohydrate complexes, ProCaff, which contains 3122 entries on dissociation constant (Kd), Gibbs free energy change (G), experimental conditions, sequence, structure and literature information. Additional features include the options to search, display, visualization, download and upload the data. AVAILABILITY AND IMPLEMENTATION:The database is freely available at http://web.iitm.ac.in/bioinfo2/procaff/. The website is implemented using HTML and PHP and supports recent versions of major browsers such as Chrome, Firefox, IE10 and Opera. CONTACT:gromiha@iitm.ac.in. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,NA,NA +32159215,TeaMiD: a comprehensive database of simple sequence repeat markers of tea.,"Tea is a highly cross-pollinated, woody, perennial tree. High heterozygosity combined with a long gestational period makes conventional breeding a cumbersome process. Therefore, marker-assisted breeding is a better alternative approach when compared with conventional breeding. Considering the large genome size of tea (~3Gb), information about simple sequence repeat (SSR) is scanty. Thus, we have taken advantage of the recently published tea genomes to identify large numbers of SSR markers in the tea. Besides the genomic sequences, we identified SSRs from the other publicly available sequences such as RNA-seq, GSS, ESTs and organelle genomes (chloroplasts and mitochondrial) and also searched published literature to catalog validated set of tea SSR markers. The complete exercise yielded a total of 935 547 SSRs. Out of the total, 82 SSRs were selected for validation among a diverse set of tea genotypes. Six primers (each with four to six alleles, an average of five alleles per locus) out of the total 27 polymorphic primers were used for a diversity analysis in 36 tea genotypes with mean polymorphic information content of 0.61-0.76. Finally, using all the information generated in this study, we have developed a user-friendly database (TeaMiD; http://indianteagenome.in:8080/teamid/) that hosts SSR from all the six resources including three nuclear genomes of tea and transcriptome sequences of 17 Camellia wild species. Database URL: http://indianteagenome.in:8080/teamid/.","hji,kes",1,1,2,2,1,NA,NA +32219412,Circad: a comprehensive manually curated resource of circular RNA associated with diseases.,"Circular RNAs (circRNAs) are unique transcript isoforms characterized by back splicing of exon ends to form a covalently closed loop or circular conformation. These transcript isoforms are now known to be expressed in a variety of organisms across the kingdoms of life. Recent studies have shown the role of circRNAs in a number of diseases and increasing evidence points to their potential application as biomarkers in these diseases. We have created a comprehensive manually curated database of circular RNAs associated with diseases. This database is available at URL http://clingen.igib.res.in/circad/. The Database lists more than 1300 circRNAs associated with 150 diseases and mapping to 113 International Statistical Classification of Diseases (ICD) codes with evidence of association linked to published literature. The database is unique in many ways. Firstly, it provides ready-to-use primers to work with, in order to use circRNAs as biomarkers or to perform functional studies. It additionally lists the assay and PCR primer details including experimentally validated ones as a ready reference to researchers along with fold change and statistical significance. It also provides standard disease nomenclature as per the ICD codes. To the best of our knowledge, circad is the most comprehensive and updated database of disease associated circular RNAs.Availability: http://clingen.igib.res.in/circad/.","hji,kes",1,1,2,2,1,NA,NA +32277449,Choice of the Promoter for Tissue and Developmental Stage-Specific Gene Expression.,"Transgenic technologies belong to important tools of reverse genetics and biotechnology in plants. Targeted genetic modifications can reveal functions of genes of interest, change metabolic and regulatory pathways, or result in accumulation of valuable proteins or metabolites. However, to be efficient in targeted genetic modification, the chimeric gene construct should be designed properly. In particular, the promoters used to control transgene expression need to be carefully chosen. Most promoters in widely used vectors belong to strong and constitutively expressed variants. However, in many cases transgene expression has to be restricted to certain tissue, stage of development, or response to some internal or external stimuli. In turn, a large variety of tissue-specific promoters have been studied and information on their characteristics may be recovered from the literature. An appropriate promoter may be selected and used in genetic construct to optimize the transgene transcription pattern. We have previously designed the TGP database (TransGene Promoters, http://wwwmgs.bionet.nsc.ru/mgs/dbases/tgp/home.html ) collecting information from the publications in this field. Here we review the wide range of noncanonical tissue-specific and developmentally regulated promoters that might be used for transgene expression control.","hji,kes",1,1,2,2,1,NA,iffy +32404014,PDB-2-PBv3.0: An updated protein block database.,"Our protein block (PB) sequence database PDB-2-PBv1.0 provides PB sequences and dihedral angles for 74,297 protein structures comprising of 103,252 protein chains of Protein Data Bank (PDB) as on 2011. Since there are a lot of practical applications of PB and also as the size of PDB database increases, it becomes necessary to provide the PB sequences for all PDB protein structures. The current updated PDB-2-PBv3.0 contains PB sequences for 147,602 PDB structures comprising of 400,355 protein chains as on October 2019. When compared to our previous version PDB-2-PBv1.0, the current PDB-2-PBv3.0 contains 2- and 4-fold increase in the number of protein structures and chains, respectively. Notably, it provides PB information for any protein chain, regardless of the missing atom records of protein structure data in PDB. It includes protein interaction information with DNA and RNA along with their corresponding functional classes from Nucleic Acid Database (NDB) and PDB. Now, the updated version allows the user to download multiple PB records by parameter search and/or by a given list. This database is freely accessible at http://bioinfo.bdu.ac.in/pb3.","hji,kes",1,1,2,2,1,NA,NA +32422927,HDVdb: A Comprehensive Hepatitis D Virus Database.,"Hepatitis D virus (HDV) causes the most severe form of viral hepatitis, which may rapidly progress to liver cirrhosis and hepatocellular carcinoma (HCC). It has been estimated that 15-20 million people worldwide are suffering from the chronic HDV infection. Currently, no effective therapies are available to treat acute or chronic HDV infection. The remarkable sequence variability of the HDV genome, particularly within the hypervariable region has resulted in the provisional classification of eight major genotypes and various subtypes. We have developed a specialized database, HDVdb (http://hdvdb.bio.wzw.tum.de/), which contains a collection of partial and complete HDV genomic sequences obtained from the GenBank and from our own patient cohort. HDVdb enables the researchers to investigate the genetic variability of all available HDV sequences, correlation of genotypes to epidemiology and pathogenesis. Additionally, it will contribute in understanding the drug resistant mutations and develop effective vaccines against HDV infection. The database can be accessed through a web interface that allows for static and dynamic queries and offers integrated generic and specialized sequence analysis tools, such as annotation, genotyping, primer prediction, and phylogenetic analyses.","hji,kes",1,1,2,2,1,NA,NA +32647128,"IDEAL, the Infectious Diseases of East African Livestock project open access database and biobank.","The Infectious Diseases of East African Livestock (IDEAL) project was a longitudinal cohort study of calf health which was conducted in Western Kenya between 2007-2010. A total of 548 East African shorthorn zebu calves were recruited at birth and followed at least every 5 weeks during the first year of life. Comprehensive clinical and epidemiological data, blood and tissue samples were collected at every visit. These samples were screened for over 100 different pathogens or infectious exposures, using a range of diagnostic methods. This manuscript describes this comprehensive dataset and bio-repository, and how to access it through a single online site ( http://data.ctlgh.org/ideal/ ). This provides extensive filtering and searching capabilities. These data are useful to illustrate outcomes of multiple infections on health, investigate patterns of morbidity and mortality due to parasite infections, and to study genotypic determinants of immunity and disease.","hji,kes",1,1,2,2,1,NA,NA +32858223,hTFtarget: A Comprehensive Database for Regulations of Human Transcription Factors and Their Targets.,"Transcription factors (TFs) as key regulators play crucial roles in biological processes. The identification of TF-target regulatory relationships is a key step for revealing functions of TFs and their regulations on gene expression. The accumulated data of chromatin immunoprecipitation sequencing (ChIP-seq) provide great opportunities to discover the TF-target regulations across different conditions. In this study, we constructed a database named hTFtarget, which integrated huge human TF target resources (7190 ChIP-seq samples of 659 TFs and high-confidence binding sites of 699 TFs) and epigenetic modification information to predict accurate TF-target regulations. hTFtarget offers the following functions for users to explore TF-target regulations: (1) browse or search general targets of a query TF across datasets; (2) browse TF-target regulations for a query TF in a specific dataset or tissue; (3) search potential TFs for a given target gene or non-coding RNA; (4) investigate co-association between TFs in cell lines; (5) explore potential co-regulations for given target genes or TFs; (6) predict candidate TF binding sites on given DNA sequences; (7) visualize ChIP-seq peaks for different TFs and conditions in a genome browser. hTFtarget provides a comprehensive, reliable and user-friendly resource for exploring human TF-target regulations, which will be very useful for a wide range of users in the TF and gene expression regulation community. hTFtarget is available at http://bioinfo.life.hust.edu.cn/hTFtarget.","hji,kes",1,1,2,2,1,NA,NA +32931381,ZenoFishDb v1.1: A Database for Xenotransplantation Studies in Zebrafish.,"Rapidly accumulating literature has proven feasibility of the zebrafish xenograft models in cancer research. Nevertheless, online databases for searching the current zebrafish xenograft literature are in great demand. Herein, we have developed a manually curated database, called ZenoFishDb v1.1 (https://konulab.shinyapps.io/zenofishdb), based on R Shiny platform aiming to provide searchable information on ever increasing collection of zebrafish studies for cancer cell line transplantation and patient-derived xenografts (PDXs). ZenoFishDb v1.1 user interface contains four modules: DataTable, Visualization, PDX Details, and PDX Charts. The DataTable and Visualization pages represent xenograft study details, including injected cell lines, PDX injections, molecular modifications of cell lines, zebrafish strains, as well as technical aspects of the xenotransplantation procedures in table, bar, and/or pie chart formats. The PDX Details module provides comprehensive information on the patient details in table format and can be searched and visualized. Overall, ZenoFishDb v1.1 enables researchers to effectively search, list, and visualize different technical and biological attributes of zebrafish xenotransplantation studies particularly focusing on the new trends that make use of reporters, RNA interference, overexpression, or mutant gene constructs of transplanted cancer cells, stem cells, and PDXs, as well as distinguished host modifications.","hji,kes",1,1,2,2,1,NA,NA +33008298,Database: web application for visualization of the cumulated RNAseq data against the salicylic acid (SA) and methyl jasmonate (MeJA) treatment of Arabidopsis thaliana.,"

Background

Plants have adapted to survive under adverse conditions or exploit favorable conditions in response to their environment as sessile creatures. In a way of plant adaptation, plant hormones have been evolved to efficiently use limited resources. Plant hormones including auxin, jasmonic acid, salicylic acid, and ethylene have been studied to reveal their role in plant adaptation against their environment by phenotypic observation with experimental design such as mutation on hormone receptors and treatment / non-treatment of plant hormones along with other environmental conditions. With the development of Next Generation Sequencing (NGS) technology, it became possible to score the total gene expression of the sampled plants and estimate the degree of effect of plant hormones in gene expression. This allowed us to infer the signaling pathway through plant hormones, which greatly stimulated the study of functional genomics using mutants. Due to the continued development of NGS technology and analytical techniques, many plant hormone-related studies have produced and accumulated NGS-based data, especially RNAseq data have been stored in the sequence read archive represented by NCBI, EBI, and DDBJ.

Description

Here, hormone treatment RNAseq data of Arabidopsis (Col0), wild-type genotype, were collected with mock, SA, and MeJA treatments. The genes affected by hormones were identified through a machine learning approach. The degree of expression of the affected gene was quantified, visualized in boxplot using d3 (data-driven-document), and the database was built by Django.

Conclusion

Using this database, we created a web application ( http://pgl.gnu.ac.kr/hormoneDB/ ) that lists hormone-related or hormone-affected genes and visualizes the boxplot of the gene expression of selected genes. This web application eventually aids the functional genomics researchers who want to gather the cases of the gene responses by the hormones.","hji,kes",1,1,2,2,1,NA,NA +33103271,The NanDeSyn database for Nannochloropsis systems and synthetic biology.,"Nannochloropsis species, unicellular industrial oleaginous microalgae, are model organisms for microalgal systems and synthetic biology. To facilitate community-based annotation and mining of the rapidly accumulating functional genomics resources, we have initiated an international consortium and present a comprehensive multi-omics resource database named Nannochloropsis Design and Synthesis (NanDeSyn; http://nandesyn.single-cell.cn). Via the Tripal toolkit, it features user-friendly interfaces hosting genomic resources with gene annotations and transcriptomic and proteomic data for six Nannochloropsis species, including two updated genomes of Nannochloropsis oceanica IMET1 and Nannochloropsis salina CCMP1776. Toolboxes for search, Blast, synteny view, enrichment analysis, metabolic pathway analysis, a genome browser, etc. are also included. In addition, functional validation of genes is indicated based on phenotypes of mutants and relevant bibliography. Furthermore, epigenomic resources are also incorporated, especially for sequencing of small RNAs including microRNAs and circular RNAs. Such comprehensive and integrated landscapes of Nannochloropsis genomics and epigenomics will promote and accelerate community efforts in systems and synthetic biology of these industrially important microalgae.","hji,kes",1,1,2,2,1,NA,NA +33137185,iCSDB: an integrated database of CRISPR screens.,"High-throughput screening based on CRISPR-Cas9 libraries has become an attractive and powerful technique to identify target genes for functional studies. However, accessibility of public data is limited due to the lack of user-friendly utilities and up-to-date resources covering experiments from third parties. Here, we describe iCSDB, an integrated database of CRISPR screening experiments using human cell lines. We compiled two major sources of CRISPR-Cas9 screening: the DepMap portal and BioGRID ORCS. DepMap portal itself is an integrated database that includes three large-scale projects of CRISPR screening. We additionally aggregated CRISPR screens from BioGRID ORCS that is a collection of screening results from PubMed articles. Currently, iCSDB contains 1375 genome-wide screens across 976 human cell lines, covering 28 tissues and 70 cancer types. Importantly, the batch effects from different CRISPR libraries were removed and the screening scores were converted into a single metric to estimate the knockout efficiency. Clinical and molecular information were also integrated to help users to select cell lines of interest readily. Furthermore, we have implemented various interactive tools and viewers to facilitate users to choose, examine and compare the screen results both at the gene and guide RNA levels. iCSDB is available at https://www.kobic.re.kr/icsdb/.","hji,kes",1,1,2,2,1,NA,NA +33166383,FireProtDB: database of manually curated protein stability data.,"The majority of naturally occurring proteins have evolved to function under mild conditions inside the living organisms. One of the critical obstacles for the use of proteins in biotechnological applications is their insufficient stability at elevated temperatures or in the presence of salts. Since experimental screening for stabilizing mutations is typically laborious and expensive, in silico predictors are often used for narrowing down the mutational landscape. The recent advances in machine learning and artificial intelligence further facilitate the development of such computational tools. However, the accuracy of these predictors strongly depends on the quality and amount of data used for training and testing, which have often been reported as the current bottleneck of the approach. To address this problem, we present a novel database of experimental thermostability data for single-point mutants FireProtDB. The database combines the published datasets, data extracted manually from the recent literature, and the data collected in our laboratory. Its user interface is designed to facilitate both types of the expected use: (i) the interactive explorations of individual entries on the level of a protein or mutation and (ii) the construction of highly customized and machine learning-friendly datasets using advanced searching and filtering. The database is freely available at https://loschmidt.chemi.muni.cz/fireprotdb.","hji,kes",1,1,2,2,1,NA,NA +33313828,PSORTdb 4.0: expanded and redesigned bacterial and archaeal protein subcellular localization database incorporating new secondary localizations.,"Protein subcellular localization (SCL) is important for understanding protein function, genome annotation, and aids identification of potential cell surface diagnostic markers, drug targets, or vaccine components. PSORTdb comprises ePSORTdb, a manually curated database of experimentally verified protein SCLs, and cPSORTdb, a pre-computed database of PSORTb-predicted SCLs for NCBI's RefSeq deduced bacterial and archaeal proteomes. We now report PSORTdb 4.0 (http://db.psort.org/). It features a website refresh, in particular a more user-friendly database search. It also addresses the need to uniquely identify proteins from NCBI genomes now that GI numbers have been retired. It further expands both ePSORTdb and cPSORTdb, including additional data about novel secondary localizations, such as proteins found in bacterial outer membrane vesicles. Protein predictions in cPSORTdb have increased along with the number of available microbial genomes, from approximately 13 million when PSORTdb 3.0 was released, to over 66 million currently. Now, analyses of both complete and draft genomes are included. This expanded database will be of wide use to researchers developing SCL predictors or studying diverse microbes, including medically, agriculturally and industrially important species that have both classic or atypical cell envelope structures or vesicles.","hji,kes",1,1,2,2,1,NA,NA +33511845,FMODB: The World's First Database of Quantum Mechanical Calculations for Biomacromolecules Based on the Fragment Molecular Orbital Method.,"We developed the world's first web-based public database for the storage, management, and sharing of fragment molecular orbital (FMO) calculation data sets describing the complex interactions between biomacromolecules, named FMO Database (https://drugdesign.riken.jp/FMODB/). Each entry in the database contains relevant background information on how the data was compiled as well as the total energy of each molecular system and interfragment interaction energy (IFIE) and pair interaction energy decomposition analysis (PIEDA) values. Currently, the database contains more than 13600 FMO calculation data sets, and a comprehensive search function implemented at the front-end. The procedure for selecting target proteins, preprocessing the experimental structures, construction of the database, and details of the database front-end were described. Then, we demonstrated a use of the FMODB by comparing IFIE value distributions of hydrogen bond, ion-pair, and XH/p interactions obtained by FMO method to those by molecular mechanics approach. From the comparison, the statistical analysis of the data provided standard reference values for the three types of interactions that will be useful for determining whether each interaction in a given system is relatively strong or weak compared to the interactions contained within the data in the FMODB. In the final part, we demonstrate the use of the database to examine the contribution of halogen atoms to the binding affinity between human cathepsin L and its inhibitors. We found that the electrostatic term derived by PIEDA greatly correlated with the binding affinities of the halogen containing cathepsin L inhibitors, indicating the importance of QM calculation for quantitative analysis of halogen interactions. Thus, the FMO calculation data in FMODB will be useful for conducting statistical analyses to drug discovery, for conducting molecular recognition studies in structural biology, and for other studies involving quantum mechanics-based interactions.","hji,kes",1,1,2,2,1,NA,NA +33704069,"The Global Landscape of SARS-CoV-2 Genomes, Variants, and Haplotypes in 2019nCoVR.","On January 22, 2020, China National Center for Bioinformation (CNCB) released the 2019 Novel Coronavirus Resource (2019nCoVR), an open-access information resource for the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). 2019nCoVR features a comprehensive integration of sequence and clinical information for all publicly available SARS-CoV-2 isolates, which are manually curated with value-added annotations and quality evaluated by an automated in-house pipeline. Of particular note, 2019nCoVR offers systematic analyses to generate a dynamic landscape of SARS-CoV-2 genomic variations at a global scale. It provides all identified variants and their detailed statistics for each virus isolate, and congregates the quality score, functional annotation, and population frequency for each variant. Spatiotemporal change for each variant can be visualized and historical viral haplotype network maps for the course of the outbreak are also generated based on all complete and high-quality genomes available. Moreover, 2019nCoVR provides a full collection of SARS-CoV-2 relevant literature on the coronavirus disease 2019 (COVID-19), including published papers from PubMed as well as preprints from services such as bioRxiv and medRxiv through Europe PMC. Furthermore, by linking with relevant databases in CNCB, 2019nCoVR offers data submission services for raw sequence reads and assembled genomes, and data sharing with NCBI. Collectively, SARS-CoV-2 is updated daily to collect the latest information on genome sequences, variants, haplotypes, and literature for a timely reflection, making 2019nCoVR a valuable resource for the global research community. 2019nCoVR is accessible at https://bigd.big.ac.cn/ncov/.","hji,kes",1,1,2,2,1,NA,NA +33787872,Drugmonizome and Drugmonizome-ML: integration and abstraction of small molecule attributes for drug enrichment analysis and machine learning.,"Understanding the underlying molecular and structural similarities between seemingly heterogeneous sets of drugs can aid in identifying drug repurposing opportunities and assist in the discovery of novel properties of preclinical small molecules. A wealth of information about drug and small molecule structure, targets, indications and side effects; induced gene expression signatures; and other attributes are publicly available through web-based tools, databases and repositories. By processing, abstracting and aggregating information from these resources into drug set libraries, knowledge about novel properties of drugs and small molecules can be systematically imputed with machine learning. In addition, drug set libraries can be used as the underlying database for drug set enrichment analysis. Here, we present Drugmonizome, a database with a search engine for querying annotated sets of drugs and small molecules for performing drug set enrichment analysis. Utilizing the data within Drugmonizome, we also developed Drugmonizome-ML. Drugmonizome-ML enables users to construct customized machine learning pipelines using the drug set libraries from Drugmonizome. To demonstrate the utility of Drugmonizome, drug sets from 12 independent SARS-CoV-2 in vitro screens were subjected to consensus enrichment analysis. Despite the low overlap among these 12 independent in vitro screens, we identified common biological processes critical for blocking viral replication. To demonstrate Drugmonizome-ML, we constructed a machine learning pipeline to predict whether approved and preclinical drugs may induce peripheral neuropathy as a potential side effect. Overall, the Drugmonizome and Drugmonizome-ML resources provide rich and diverse knowledge about drugs and small molecules for direct systems pharmacology applications. Database URL: https://maayanlab.cloud/drugmonizome/.","hji,kes",1,1,2,2,1,NA,NA +33798715,COnVIDa: COVID-19 multidisciplinary data collection and dashboard.,"Since the first reported case in Wuhan in late 2019, COVID-19 has rapidly spread worldwide, dramatically impacting the lives of millions of citizens. To deal with the severe crisis resulting from the pandemic, worldwide institutions have been forced to make decisions that profoundly affect the socio-economic realm. In this sense, researchers from diverse knowledge areas are investigating the behavior of the disease in a rush against time. In both cases, the lack of reliable data has been an obstacle to carry out such tasks with accuracy. To tackle this challenge, COnVIDa (https://convida.inf.um.es) has been designed and developed as a user-friendly tool that easily gathers rigorous multidisciplinary data related to the COVID-19 pandemic from different data sources. In particular, the pandemic expansion is analyzed with variables of health nature, but also social ones, mobility, etc. Besides, COnVIDa permits to smoothly join such data, compare and download them for further analysis. Due to the open-science nature of the project, COnVIDa is easily extensible to any other region of the planet. In this way, COnVIDa becomes a data facilitator for decision-making processes, as well as a catalyst for new scientific researches related to this pandemic.","hji,kes",1,1,2,2,1,NA,NA +34177338,MassBase: A large-scaled depository of mass spectrometry datasets for metabolome analysis.,"Depository of low-molecular-weight compounds or metabolites detected in various organisms in a non-targeted manner is indispensable for metabolomics research. Due to the diverse chemical compounds, various mass spectrometry (MS) setups with state-of-the-art technologies have been used. Over the past two decades, we have analyzed various biological samples by using gas chromatography-mass spectrometry, liquid chromatography-mass spectrometry, or capillary electrophoresis-mass spectrometry, and archived the datasets in the depository MassBase (http://webs2.kazusa.or.jp/massbase/). As the format of MS datasets depends on the MS setup used, we converted each raw binary dataset of the mass chromatogram to text file format, and thereafter, information of the chromatograph peak was extracted in the text file from the converted file. In total, the depository comprises 46,493 datasets, of which 38,750 belong to the plant species and 7,743 are authentic or mixed chemicals as well as other sources (microorganisms, animals, and foods), as on August 1, 2020. All files in the depository can be downloaded in bulk from the website. Mass chromatograms of 90 plant species obtained by LC-Fourier transform ion cyclotron resonance MS or Orbitrap MS, which detect the ionized molecules with high accuracy allowing speculation of chemical compositions, were converted to text files by the software PowerGet, and the chemical annotation of each peak was added. The processed datasets were deposited in the annotation database KomicMarket2 (http://webs2.kazusa.or.jp/km2/). The archives provide fundamental resources for comparative metabolomics and functional genomics, which may result in deeper understanding of living organisms.","hji,kes",1,1,2,2,1,NA,NA +IND601142821,MulSatDB: a first online database for mulberry microsatellites,"KEY MESSAGE : Simple sequence repeat motifs were mined from the genome and EST sequences of Morus notabilis and archived in MulSatDB. Bioinformatics tools were integrated with the database for the analysis of genomic datasets. Mulberry is a crop of economic importance in sericulture, which shapes the lives of millions of rural people among different Eurasian and Latin American countries. Limited availability of genomic resources has constrained the molecular breeding efforts in mulberry, a poorly studied crop. Microsatellite or simple sequence repeat (SSR) has revolutionized the plant breeding and is used in linkage mapping, association studies, diversity, and parentage analysis, etc. Recent availability of mulberry whole genome assembly provided an opportunity for the development of mulberry-specific DNA markers. In this study, we mined a total of 217,312 microsatellites from whole genome and 961 microsatellites from EST sequences of Morus notabilis. Mono-repeats were predominant among both whole genome and EST sequences. The SSR containing EST sequences were functionally annotated, and SSRs mined from whole genome were mapped on chromosomes of the phylogenetically related genusFragaria vesca, to aid the selection of markers based on the function and location. All the mined markers were archived in the mulberry microsatellite database (MulSatDB), and the markers can be retrieved based on different criteria like marker location, repeat kind, motif type and size. Primer3plus and CMap tools are integrated with the database to design primers for PCR amplification and to visualize markers on F. vesca chromosomes, respectively. A blast tool is also integrated to collate new markers with the database. MulSatDB is the first and complete destination for mulberry researchers to browse SSR markers, design primers, and locate markers on strawberry chromosomes. MulSatDB is freely accessible at http://btismysore.in/mulsatdb .","hji,kes",1,1,2,2,1,NA,NA +IND606040020,Global database of plants with root‐symbiotic nitrogen fixation: NodDB,"Plants associated with symbiotic N-fixing bacteria play important roles in early successional, riparian and semi-dry ecosystems. These so-called N-fixing plants are widely used for reclamation of disturbed vegetation and improvement of soil fertility in agroforestry. Yet, available information about plants that are capable of establishing nodulation is fragmented and somewhat outdated. This article introduces the NodDB database of N-fixing plants based on morphological and phylogenetic evidence (available at https://doi.org/10.15156/bio/587469) and discusses plant groups with conflicting reports and interpretation, such as certain legume clades and the Zygophyllaceae family. During angiosperm evolution, N-fixing plants became common in the fabid rather than in the nitrogen-fixing clade. The global GBIF plant species distribution data indicated that N-fixing plants tend to be relatively more diverse in savanna and semi-desert biomes. The compiled and re-interpreted information about N-fixing plants enables accurate analyses of biogeography and community ecology of biological N fixation.","hji,kes",1,1,2,2,1,NA,NA +21249531,The DIADEM data sets: representative light microscopy images of neuronal morphology to advance automation of digital reconstructions.,"The comprehensive characterization of neuronal morphology requires tracing extensive axonal and dendritic arbors imaged with light microscopy into digital reconstructions. Considerable effort is ongoing to automate this greatly labor-intensive and currently rate-determining process. Experimental data in the form of manually traced digital reconstructions and corresponding image stacks play a vital role in developing increasingly more powerful reconstruction algorithms. The DIADEM challenge (short for DIgital reconstruction of Axonal and DEndritic Morphology) successfully stimulated progress in this area by utilizing six data set collections from different animal species, brain regions, neuron types, and visualization methods. The original research projects that provided these data are representative of the diverse scientific questions addressed in this field. At the same time, these data provide a benchmark for the types of demands automated software must meet to achieve the quality of manual reconstructions while minimizing human involvement. The DIADEM data underwent extensive curation, including quality control, metadata annotation, and format standardization, to focus the challenge on the most substantial technical obstacles. This data set package is now freely released ( http://diademchallenge.org ) to train, test, and aid development of automated reconstruction algorithms.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - data sets +21303543,An EST-based analysis identifies new genes and reveals distinctive gene expression features of Coffea arabica and Coffea canephora.,"

Background

Coffee is one of the world's most important crops; it is consumed worldwide and plays a significant role in the economy of producing countries. Coffea arabica and C. canephora are responsible for 70 and 30% of commercial production, respectively. C. arabica is an allotetraploid from a recent hybridization of the diploid species, C. canephora and C. eugenioides. C. arabica has lower genetic diversity and results in a higher quality beverage than C. canephora. Research initiatives have been launched to produce genomic and transcriptomic data about Coffea spp. as a strategy to improve breeding efficiency.

Results

Assembling the expressed sequence tags (ESTs) of C. arabica and C. canephora produced by the Brazilian Coffee Genome Project and the Nestl-Cornell Consortium revealed 32,007 clusters of C. arabica and 16,665 clusters of C. canephora. We detected different GC3 profiles between these species that are related to their genome structure and mating system. BLAST analysis revealed similarities between coffee and grape (Vitis vinifera) genes. Using KA/KS analysis, we identified coffee genes under purifying and positive selection. Protein domain and gene ontology analyses suggested differences between Coffea spp. data, mainly in relation to complex sugar synthases and nucleotide binding proteins. OrthoMCL was used to identify specific and prevalent coffee protein families when compared to five other plant species. Among the interesting families annotated are new cystatins, glycine-rich proteins and RALF-like peptides. Hierarchical clustering was used to independently group C. arabica and C. canephora expression clusters according to expression data extracted from EST libraries, resulting in the identification of differentially expressed genes. Based on these results, we emphasize gene annotation and discuss plant defenses, abiotic stress and cup quality-related functional categories.

Conclusion

We present the first comprehensive genome-wide transcript profile study of C. arabica and C. canephora, which can be freely assessed by the scientific community at http://www.lge.ibi.unicamp.br/coffea. Our data reveal the presence of species-specific/prevalent genes in coffee that may help to explain particular characteristics of these two crops. The identification of differentially expressed transcripts offers a starting point for the correlation between gene expression profiles and Coffea spp. developmental traits, providing valuable insights for coffee breeding and biotechnology, especially concerning sugar metabolism and stress tolerance.","hji,kes",0,1,1,2,0.5,NA,"iffy - project website but aggregated and value add, looks like it'd be in risk if the data aren't elsewhere; reassessed and still yes - data there" +21398672,"Histogram-based DNA analysis for the visualization of chromosome, genome and species information.","

Motivation

We describe a novel approach to explore DNA nucleotide sequence data, aiming to produce high-level categorical and structural information about the underlying chromosomes, genomes and species.

Results

The article starts by analyzing chromosomal data through histograms using fixed length DNA sequences. After creating the DNA-related histograms, a correlation between pairs of histograms is computed, producing a global correlation matrix. These data are then used as input to several data processing methods for information extraction and tabular/graphical output generation. A set of 18 species is processed and the extensive results reveal that the proposed method is able to generate significant and diversified outputs, in good accordance with current scientific knowledge in domains such as genomics and phylogenetics.

Availability and implementation

Source code freely available for download at http://www4.dei.isep.ipp.pt/etc/dnapaper2010, implemented in Free Pascal and UNIX scripting tools. Study input data available online for download at University of California at Santa Cruz Genome Bioinformatics, http://hgdownload.cse.ucsc.edu/downloads.html.","hji,kes",0,1,1,2,0.5,NA,iffy; reassessed and still yes - there is data there but it is iffy +21411447,Using computational predictions to improve literature-based Gene Ontology annotations: a feasibility study.,"Annotation using Gene Ontology (GO) terms is one of the most important ways in which biological information about specific gene products can be expressed in a searchable, computable form that may be compared across genomes and organisms. Because literature-based GO annotations are often used to propagate functional predictions between related proteins, their accuracy is critically important. We present a strategy that employs a comparison of literature-based annotations with computational predictions to identify and prioritize genes whose annotations need review. Using this method, we show that comparison of manually assigned 'unknown' annotations in the Saccharomyces Genome Database (SGD) with InterPro-based predictions can identify annotations that need to be updated. A survey of literature-based annotations and computational predictions made by the Gene Ontology Annotation (GOA) project at the European Bioinformatics Institute (EBI) across several other databases shows that this comparison strategy could be used to maintain and improve the quality of GO annotations for other organisms besides yeast. The survey also shows that although GOA-assigned predictions are the most comprehensive source of functional information for many genomes, a large proportion of genes in a variety of different organisms entirely lack these predictions but do have manual annotations. This underscores the critical need for manually performed, literature-based curation to provide functional information about genes that are outside the scope of widely used computational methods. Thus, the combination of manual and computational methods is essential to provide the most accurate and complete functional annotation of a genome. Database URL: http://www.yeastgenome.org.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - includes a data resource +21464845,An ANN model for the identification of deleterious nsSNPs in tumor suppressor genes.,"

Unlabelled

Human genetic variations primarily result from single nucleotide polymorphisms (SNPs) that occurs approximately every 1000 bases in the overall human population. The non-synonymous SNPs (nsSNPs), lead to amino acid changes in the protein product may account for nearly half of the known genetic variations linked to inherited human diseases and cancer. One of the main problems of medical genetics today is to identify nsSNPs that underlie disease-related phenotypes in humans. An attempt was made to develop a new approach to predict such nsSNPs. This would enhance our understanding of genetic diseases and helps to predict the disease. We detect nsSNPs and all possible and reliable alleles by ANN, a soft computing model using potential SNP information. Reliable nsSNPs are identified, based on the reconstructed alleles and on sequence redundancy. The model gives good results with mean specificity (95.85&), sensitivity (97.40&) and accuracy (96.25&). Our results indicate that ANNs can serve as a useful method to analyze quantitative effect of nsSNPs on protein function and would be useful for large-scale analysis of genomic nsSNP data.

Availability

The database is available for free at http://www.snp.mirworks.in.","hji,kes",0,0,0,2,0,NA,"no notes; reassessed and re-scored - says database available but abstract is entirely about the ANN method, iffy" +21523935,ncRNA consensus secondary structure derivation using grammar strings.,"Many noncoding RNAs (ncRNAs) function through both their sequences and secondary structures. Thus, secondary structure derivation is an important issue in today's RNA research. The state-of-the-art structure annotation tools are based on comparative analysis, which derives consensus structure of homologous ncRNAs. Despite promising results from existing ncRNA aligning and consensus structure derivation tools, there is a need for more efficient and accurate ncRNA secondary structure modeling and alignment methods. In this work, we introduce a consensus structure derivation approach based on grammar string, a novel ncRNA secondary structure representation that encodes an ncRNA's sequence and secondary structure in the parameter space of a context-free grammar (CFG) and a full RNA grammar including pseudoknots. Being a string defined on a special alphabet constructed from a grammar, grammar string converts ncRNA alignment into sequence alignment. We derive consensus secondary structures from hundreds of ncRNA families from BraliBase 2.1 and 25 families containing pseudoknots using grammar string alignment. Our experiments have shown that grammar string-based structure derivation competes favorably in consensus structure quality with Murlet and RNASampler. Source code and experimental data are available at http://www.cse.msu.edu/~yannisun/grammar-string.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - says data available but abstract emphasizes method +21569303,Meta-analysis of heterogeneous Down Syndrome data reveals consistent genome-wide dosage effects related to neurological processes.,"

Background

Down syndrome (DS; trisomy 21) is the most common genetic cause of mental retardation in the human population and key molecular networks dysregulated in DS are still unknown. Many different experimental techniques have been applied to analyse the effects of dosage imbalance at the molecular and phenotypical level, however, currently no integrative approach exists that attempts to extract the common information.

Results

We have performed a statistical meta-analysis from 45 heterogeneous publicly available DS data sets in order to identify consistent dosage effects from these studies. We identified 324 genes with significant genome-wide dosage effects, including well investigated genes like SOD1, APP, RUNX1 and DYRK1A as well as a large proportion of novel genes (N = 62). Furthermore, we characterized these genes using gene ontology, molecular interactions and promoter sequence analysis. In order to judge relevance of the 324 genes for more general cerebral pathologies we used independent publicly available microarry data from brain studies not related with DS and identified a subset of 79 genes with potential impact for neurocognitive processes. All results have been made available through a web server under http://ds-geneminer.molgen.mpg.de/.

Conclusions

Our study represents a comprehensive integrative analysis of heterogeneous data including genome-wide transcript levels in the domain of trisomy 21. The detected dosage effects build a resource for further studies of DS pathology and the development of new therapies.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - abstract emphasizes the data to me +21643562,Toxicity tests aiming to protect Brazilian aquatic systems: current status and implications for management.,"The current status of toxicological tests performed with Brazilian native species was evaluated through a survey of the scientific data available in the literature. The information gathered was processed and an electronic toxicology database (http://www.inct-ta.furg.br/bd_toxicologico.php) was generated. This database provides valuable information for researchers to select sensitive and tolerant aquatic species to a large variety of aquatic pollutants. Furthermore, the toxicology database allows researchers to select species representative of an ecosystem of interest. Analysis of the toxicology database showed that ecotoxicological assays have significantly improved in Brazil over the last decade, in spite of the still relatively low number of tests performed and the restricted number of native species tested. This is because most of the research is developed in a few laboratories concentrated in certain regions of Brazil, especially in Southern and Southeast regions. Considering the extremely rich biodiversity and the large variety of aquatic ecosystems in Brazil, this finding points to the urgent need for the development of ecotoxicological studies with other groups of aquatic animals, such as insects, foraminifera, cnidarians, worms, amphibians, among others. This would help to derive more realistic water quality criteria (WQC) values, which would better protect the different aquatic ecosystems in Brazil. Finally, the toxicology database generated presents solid and science based information, which can encourage and drive the Environmental Regulatory Agencies in Brazil to derive WQC based on native species. In this context, the present paper discusses the historical evolution of ecotoxicological studies in Brazil, and how they have contributed to the improvement of the Brazilian Federal and Regional regulations for environment.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +21646342,"CSpritz: accurate prediction of protein disorder segments with annotation for homology, secondary structure and linear motifs.","CSpritz is a web server for the prediction of intrinsic protein disorder. It is a combination of previous Spritz with two novel orthogonal systems developed by our group (Punch and ESpritz). Punch is based on sequence and structural templates trained with support vector machines. ESpritz is an efficient single sequence method based on bidirectional recursive neural networks. Spritz was extended to filter predictions based on structural homologues. After extensive testing, predictions are combined by averaging their probabilities. The CSpritz website can elaborate single or multiple predictions for either short or long disorder. The server provides a global output page, for download and simultaneous statistics of all predictions. Links are provided to each individual protein where the amino acid sequence and disorder prediction are displayed along with statistics for the individual protein. As a novel feature, CSpritz provides information about structural homologues as well as secondary structure and short functional linear motifs in each disordered segment. Benchmarking was performed on the very recent CASP9 data, where CSpritz would have ranked consistently well with a Sw measure of 49.27 and AUC of 0.828. The server, together with help and methods pages including examples, are freely available at URL: http://protein.bio.unipd.it/cspritz/.","hji,kes",0,0,0,2,0,NA,iffy; reassessed and re-scored - no data +21786137,Prediction of protein-protein interactions between Ralstonia solanacearum and Arabidopsis thaliana.,"Ralstonia solanacearum is a devastating bacterial pathogen that has an unusually wide host range. R. solanacearum, together with Arabidopsis thaliana, has become a model system for studying the molecular basis of plant-pathogen interactions. Protein-protein interactions (PPIs) play a critical role in the infection process, and some PPIs can initiate a plant defense response. However, experimental investigations have rarely addressed such PPIs. Using two computational methods, the interolog and the domain-based methods, we predicted 3,074 potential PPIs between 119 R. solanacearum and 1,442 A. thaliana proteins. Interestingly, we found that the potential pathogen-targeted proteins are more important in the A. thaliana PPI network. To facilitate further studies, all predicted PPI data were compiled into a database server called PPIRA (http://protein.cau.edu.cn/ppira/). We hope that our work will provide new insights for future research addressing the pathogenesis of R. solanacearum.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +21854555,A first-generation integrated tammar wallaby map and its use in creating a tammar wallaby first-generation virtual genome map.,"

Background

The limited (2X) coverage of the tammar wallaby (Macropus eugenii) genome sequence dataset currently presents a challenge for assembly and anchoring onto chromosomes. To provide a framework for this assembly, it would be a great advantage to have a dense map of the tammar wallaby genome. However, only limited mapping data are available for this non-model species, comprising a physical map and a linkage map.

Results

We combined all available tammar wallaby mapping data to create a tammar wallaby integrated map, using the Location DataBase (LDB) strategy. This first-generation integrated map combines all available information from the second-generation tammar wallaby linkage map with 148 loci, and extensive FISH mapping data for 492 loci, especially for genes likely to be located at the ends of wallaby chromosomes or at evolutionary breakpoints inferred from comparative information. For loci whose positions are only approximately known, their location in the integrated map was refined on the basis of comparative information from opossum (Monodelphis domestica) and human. Interpolation of segments from the opossum and human assemblies into the integrated map enabled the subsequent construction of a tammar wallaby first-generation virtual genome map, which comprises 14336 markers, including 13783 genes recruited from opossum and human assemblies. Both maps are freely available at http://compldb.angis.org.au.

Conclusions

The first-generation integrated map and the first-generation virtual genome map provide a backbone for the chromosome assembly of the tammar wallaby genome sequence. For example, 78% of the 10257 gene-scaffolds in the Ensembl annotation of the tammar wallaby genome sequence (including 10522 protein-coding genes) can now be given a chromosome location in the tammar wallaby virtual genome map.","hji,kes",0,1,1,2,0.5,NA,"iffy - data from elsewhere, but integrated, may count then, but not available so will fall out later; reassessed and still yes - data, though iffy" +21880147,An integrative approach to ortholog prediction for disease-focused and other functional studies.,"

Background

Mapping of orthologous genes among species serves an important role in functional genomics by allowing researchers to develop hypotheses about gene function in one species based on what is known about the functions of orthologs in other species. Several tools for predicting orthologous gene relationships are available. However, these tools can give different results and identification of predicted orthologs is not always straightforward.

Results

We report a simple but effective tool, the Drosophila RNAi Screening Center Integrative Ortholog Prediction Tool (DIOPT; http://www.flyrnai.org/diopt), for rapid identification of orthologs. DIOPT integrates existing approaches, facilitating rapid identification of orthologs among human, mouse, zebrafish, C. elegans, Drosophila, and S. cerevisiae. As compared to individual tools, DIOPT shows increased sensitivity with only a modest decrease in specificity. Moreover, the flexibility built into the DIOPT graphical user interface allows researchers with different goals to appropriately 'cast a wide net' or limit results to highest confidence predictions. DIOPT also displays protein and domain alignments, including percent amino acid identity, for predicted ortholog pairs. This helps users identify the most appropriate matches among multiple possible orthologs. To facilitate using model organisms for functional analysis of human disease-associated genes, we used DIOPT to predict high-confidence orthologs of disease genes in Online Mendelian Inheritance in Man (OMIM) and genes in genome-wide association study (GWAS) data sets. The results are accessible through the DIOPT diseases and traits query tool (DIOPT-DIST; http://www.flyrnai.org/diopt-dist).

Conclusions

DIOPT and DIOPT-DIST are useful resources for researchers working with model organisms, especially those who are interested in exploiting model organisms such as Drosophila to study the functions of human disease genes.","hji,kes",0,1,1,2,0.5,NA,"no notes; reassessed and still yes - includes a data resource, it seems" +21929785,The representation of protein complexes in the Protein Ontology (PRO).,"

Background

Representing species-specific proteins and protein complexes in ontologies that are both human- and machine-readable facilitates the retrieval, analysis, and interpretation of genome-scale data sets. Although existing protin-centric informatics resources provide the biomedical research community with well-curated compendia of protein sequence and structure, these resources lack formal ontological representations of the relationships among the proteins themselves. The Protein Ontology (PRO) Consortium is filling this informatics resource gap by developing ontological representations and relationships among proteins and their variants and modified forms. Because proteins are often functional only as members of stable protein complexes, the PRO Consortium, in collaboration with existing protein and pathway databases, has launched a new initiative to implement logical and consistent representation of protein complexes.

Description

We describe here how the PRO Consortium is meeting the challenge of representing species-specific protein complexes, how protein complex representation in PRO supports annotation of protein complexes and comparative biology, and how PRO is being integrated into existing community bioinformatics resources. The PRO resource is accessible at http://pir.georgetown.edu/pro/.

Conclusion

PRO is a unique database resource for species-specific protein complexes. PRO facilitates robust annotation of variations in composition and function contexts for protein complexes within and between species.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +21943375,"Reptilian-transcriptome v1.0, a glimpse in the brain transcriptome of five divergent Sauropsida lineages and the phylogenetic position of turtles.","

Background

Reptiles are largely under-represented in comparative genomics despite the fact that they are substantially more diverse in many respects than mammals. Given the high divergence of reptiles from classical model species, next-generation sequencing of their transcriptomes is an approach of choice for gene identification and annotation.

Results

Here, we use 454 technology to sequence the brain transcriptome of four divergent reptilian and one reference avian species: the Nile crocodile, the corn snake, the bearded dragon, the red-eared turtle, and the chicken. Using an in-house pipeline for recursive similarity searches of >3,000,000 reads against multiple databases from 7 reference vertebrates, we compile a reptilian comparative transcriptomics dataset, with homology assignment for 20,000 to 31,000 transcripts per species and a cumulated non-redundant sequence length of 248.6 Mbases. Our approach identifies the majority (87%) of chicken brain transcripts and about 50% of de novo assembled reptilian transcripts. In addition to 57,502 microsatellite loci, we identify thousands of SNP and indel polymorphisms for population genetic and linkage analyses. We also build very large multiple alignments for Sauropsida and mammals (two million residues per species) and perform extensive phylogenetic analyses suggesting that turtles are not basal living reptiles but are rather associated with Archosaurians, hence, potentially answering a long-standing question in the phylogeny of Amniotes.

Conclusions

The reptilian transcriptome (freely available at http://www.reptilian-transcriptomes.org) should prove a useful new resource as reptiles are becoming important new models for comparative genomics, ecology, and evolutionary developmental genetics.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - data (transcriptome) available +21994220,FlyExpress: visual mining of spatiotemporal patterns for genes and publications in Drosophila embryogenesis.,"

Summary

Images containing spatial expression patterns illuminate the roles of different genes during embryogenesis. In order to generate initial clues to regulatory interactions, biologists frequently need to know the set of genes expressed at the same time at specific locations in a developing embryo, as well as related research publications. However, text-based mining of image annotations and research articles cannot produce all relevant results, because the primary data are images that exist as graphical objects. We have developed a unique knowledge base (FlyExpress) to facilitate visual mining of images from Drosophila melanogaster embryogenesis. By clicking on specific locations in pictures of fly embryos from different stages of development and different visual projections, users can produce a list of genes and publications instantly. In FlyExpress, each queryable embryo picture is a heat-map that captures the expression patterns of more than 4500 genes and more than 2600 published articles. In addition, one can view spatial patterns for particular genes over time as well as find other genes with similar expression patterns at a given developmental stage. Therefore, FlyExpress is a unique tool for mining spatiotemporal expression patterns in a format readily accessible to the scientific community.

Availability

http://www.flyexpress.net

Contact

s.kumar@asu.edu.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - knowledgebase +22102885,"Novel SSR markers from BAC-end sequences, DArT arrays and a comprehensive genetic map with 1,291 marker loci for chickpea (Cicer arietinum L.).","Chickpea (Cicer arietinum L.) is the third most important cool season food legume, cultivated in arid and semi-arid regions of the world. The goal of this study was to develop novel molecular markers such as microsatellite or simple sequence repeat (SSR) markers from bacterial artificial chromosome (BAC)-end sequences (BESs) and diversity arrays technology (DArT) markers, and to construct a high-density genetic map based on recombinant inbred line (RIL) population ICC 4958 (C. arietinum)PI 489777 (C. reticulatum). A BAC-library comprising 55,680 clones was constructed and 46,270 BESs were generated. Mining of these BESs provided 6,845 SSRs, and primer pairs were designed for 1,344 SSRs. In parallel, DArT arrays with ca. 15,000 clones were developed, and 5,397 clones were found polymorphic among 94 genotypes tested. Screening of newly developed BES-SSR markers and DArT arrays on the parental genotypes of the RIL mapping population showed polymorphism with 253 BES-SSR markers and 675 DArT markers. Segregation data obtained for these polymorphic markers and 494 markers data compiled from published reports or collaborators were used for constructing the genetic map. As a result, a comprehensive genetic map comprising 1,291 markers on eight linkage groups (LGs) spanning a total of 845.56 cM distance was developed (http://cmap.icrisat.ac.in/cmap/sm/cp/thudi/). The number of markers per linkage group ranged from 68 (LG 8) to 218 (LG 3) with an average inter-marker distance of 0.65 cM. While the developed resource of molecular markers will be useful for genetic diversity, genetic mapping and molecular breeding applications, the comprehensive genetic map with integrated BES-SSR markers will facilitate its anchoring to the physical map (under construction) to accelerate map-based cloning of genes in chickpea and comparative genome evolution studies in legumes.","hji,kes",0,1,1,2,0.5,NA,"no notes; reassessed and still yes - data, I think" +22210604,Discovery and mapping of a new expressed sequence tag-single nucleotide polymorphism and simple sequence repeat panel for large-scale genetic studies and breeding of Theobroma cacao L.,"Theobroma cacao is an economically important tree of several tropical countries. Its genetic improvement is essential to provide protection against major diseases and improve chocolate quality. We discovered and mapped new expressed sequence tag-single nucleotide polymorphism (EST-SNP) and simple sequence repeat (SSR) markers and constructed a high-density genetic map. By screening 149 650 ESTs, 5246 SNPs were detected in silico, of which 1536 corresponded to genes with a putative function, while 851 had a clear polymorphic pattern across a collection of genetic resources. In addition, 409 new SSR markers were detected on the Criollo genome. Lastly, 681 new EST-SNPs and 163 new SSRs were added to the pre-existing 418 co-dominant markers to construct a large consensus genetic map. This high-density map and the set of new genetic markers identified in this study are a milestone in cocoa genomics and for marker-assisted breeding. The data are available at http://tropgenedb.cirad.fr.","hji,kes",0,1,1,2,0.5,NA,"no notes; reassessed and still yes - data, I think" +22303453,Catalog of microRNA seed polymorphisms in vertebrates.,"MicroRNAs (miRNAs) are a class of non-coding RNA that plays an important role in posttranscriptional regulation of mRNA. Evidence has shown that miRNA gene variability might interfere with its function resulting in phenotypic variation and disease susceptibility. A major role in miRNA target recognition is ascribed to complementarity with the miRNA seed region that can be affected by polymorphisms. In the present study, we developed an online tool for the detection of miRNA polymorphisms (miRNA SNiPer) in vertebrates (http://www.integratomics-time.com/miRNA-SNiPer) and generated a catalog of miRNA seed region polymorphisms (miR-seed-SNPs) consisting of 149 SNPs in six species. Although a majority of detected polymorphisms were due to point mutations, two consecutive nucleotide substitutions (double nucleotide polymorphisms, DNPs) were also identified in nine miRNAs. We determined that miR-SNPs are frequently located within the quantitative trait loci (QTL), chromosome fragile sites, and cancer susceptibility loci, indicating their potential role in the genetic control of various complex traits. To test this further, we performed an association analysis between the mmu-miR-717 seed SNP rs30372501, which is polymorphic in a large number of standard inbred strains, and all phenotypic traits in these strains deposited in the Mouse Phenome Database. Analysis showed a significant association between the mmu-miR-717 seed SNP and a diverse array of traits including behavior, blood-clinical chemistry, body weight size and growth, and immune system suggesting that seed SNPs can indeed have major pleiotropic effects. The bioinformatics analyses, data and tools developed in the present study can serve researchers as a starting point in testing more targeted hypotheses and designing experiments using optimal species or strains for further mechanistic studies.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - includes data +22440904,Quantitative proteomics identifies vasopressin-responsive nuclear proteins in collecting duct cells.,"Vasopressin controls transport in the renal collecting duct, in part, by regulating transcription. This complex process, which can involve translocation and/or modification of transcriptional regulators, is not completely understood. Here, we applied a method for large-scale profiling of nuclear proteins to quantify vasopressin-induced changes in the nuclear proteome of cortical collecting duct (mpkCCD) cells. Using stable isotope labeling and tandem mass spectrometry, we quantified 3987 nuclear proteins and identified significant changes in the abundance of 65, including previously established targets of vasopressin signaling in the collecting duct. Vasopressin-induced changes in the abundance of the transcription factors JunB, Elf3, Gatad2b, and Hmbox1; transcriptional co-regulators Ctnnb1 (-catenin) and Crebbp; subunits of the Mediator complex; E3 ubiquitin ligase Nedd4; nuclear transport regulator RanGap1; and several proteins associated with tight junctions and adherens junctions. Bioinformatic analysis showed that many of the quantified transcription factors have putative binding sites in the 5'-flanking regions of genes coding for the channel proteins Aqp2, Aqp3, Scnn1b (ENaC), and Scnn1g (ENaC), which are known targets of vasopressin. Immunoblotting demonstrated that the increase in -catenin in nuclear fractions was accompanied by an even larger increase in its phosphorylated form (pSer552). The findings provide a new online database resource for nuclear proteomics (http://helixweb.nih.gov/ESBL/Database/mNPD/) and generate new hypotheses regarding vasopressin-mediated transcriptional regulation in the collecting duct.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +22506599,SPAdes: a new genome assembly algorithm and its applications to single-cell sequencing.,"The lion's share of bacteria in various environments cannot be cloned in the laboratory and thus cannot be sequenced using existing technologies. A major goal of single-cell genomics is to complement gene-centric metagenomic data with whole-genome assemblies of uncultivated organisms. Assembly of single-cell data is challenging because of highly non-uniform read coverage as well as elevated levels of sequencing errors and chimeric reads. We describe SPAdes, a new assembler for both single-cell and standard (multicell) assembly, and demonstrate that it improves on the recently released E+V-SC assembler (specialized for single-cell data) and on popular assemblers Velvet and SoapDeNovo (for multicell data). SPAdes generates single-cell assemblies, providing information about genomes of uncultivatable bacteria that vastly exceeds what may be obtained via traditional metagenomics studies. SPAdes is available online ( http://bioinf.spbau.ru/spades ). It is distributed as open source software.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - server only it seems +22600737,ProBiS-2012: web server and web services for detection of structurally similar binding sites in proteins.,"The ProBiS web server is a web server for detection of structurally similar binding sites in the PDB and for local pairwise alignment of protein structures. In this article, we present a new version of the ProBiS web server that is 10 times faster than earlier versions, due to the efficient parallelization of the ProBiS algorithm, which now allows significantly faster comparison of a protein query against the PDB and reduces the calculation time for scanning the entire PDB from hours to minutes. It also features new web services, and an improved user interface. In addition, the new web server is united with the ProBiS-Database and thus provides instant access to pre-calculated protein similarity profiles for over 29 000 non-redundant protein structures. The ProBiS web server is particularly adept at detection of secondary binding sites in proteins. It is freely available at http://probis.cmm.ki.si/old-version, and the new ProBiS web server is at http://probis.cmm.ki.si.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - server only it seems +22669907,Cscan: finding common regulators of a set of genes by using a collection of genome-wide ChIP-seq datasets.,"The regulation of transcription of eukaryotic genes is a very complex process, which involves interactions between transcription factors (TFs) and DNA, as well as other epigenetic factors like histone modifications, DNA methylation, and so on, which nowadays can be studied and characterized with techniques like ChIP-Seq. Cscan is a web resource that includes a large collection of genome-wide ChIP-Seq experiments performed on TFs, histone modifications, RNA polymerases and others. Enriched peak regions from the ChIP-Seq experiments are crossed with the genomic coordinates of a set of input genes, to identify which of the experiments present a statistically significant number of peaks within the input genes' loci. The input can be a cluster of co-expressed genes, or any other set of genes sharing a common regulatory profile. Users can thus single out which TFs are likely to be common regulators of the genes, and their respective correlations. Also, by examining results on promoter activation, transcription, histone modifications, polymerase binding and so on, users can investigate the effect of the TFs (activation or repression of transcription) as well as of the cell or tissue specificity of the genes' regulation and expression. The web interface is free for use, and there is no login requirement. Available at: http://www.beaconlab.it/cscan.","hji,kes",0,0,0,2,0,NA,iffy; reassessed and re-scored - input mentioned - does seem to be a server +22759420,Comparative evaluation of set-level techniques in predictive classification of gene expression samples.,"

Background

Analysis of gene expression data in terms of a priori-defined gene sets has recently received significant attention as this approach typically yields more compact and interpretable results than those produced by traditional methods that rely on individual genes. The set-level strategy can also be adopted with similar benefits in predictive classification tasks accomplished with machine learning algorithms. Initial studies into the predictive performance of set-level classifiers have yielded rather controversial results. The goal of this study is to provide a more conclusive evaluation by testing various components of the set-level framework within a large collection of machine learning experiments.

Results

Genuine curated gene sets constitute better features for classification than sets assembled without biological relevance. For identifying the best gene sets for classification, the Global test outperforms the gene-set methods GSEA and SAM-GS as well as two generic feature selection methods. To aggregate expressions of genes into a feature value, the singular value decomposition (SVD) method as well as the SetSig technique improve on simple arithmetic averaging. Set-level classifiers learned with 10 features constituted by the Global test slightly outperform baseline gene-level classifiers learned with all original data features although they are slightly less accurate than gene-level classifiers learned with a prior feature-selection step.

Conclusion

Set-level classifiers do not boost predictive accuracy, however, they do achieve competitive accuracy if learned with the right combination of ingredients.

Availability

Open-source, publicly available software was used for classifier learning and testing. The gene expression datasets and the gene set database used are also publicly available. The full tabulation of experimental results is available at http://ida.felk.cvut.cz/CESLT.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - software and includes a data resource +22759586,IDDI: integrated domain-domain interaction and protein interaction analysis system.,"

Background

Deciphering protein-protein interaction (PPI) in domain level enriches valuable information about binding mechanism and functional role of interacting proteins. The 3D structures of complex proteins are reliable source of domain-domain interaction (DDI) but the number of proven structures is very limited. Several resources for the computationally predicted DDI have been generated but they are scattered in various places and their prediction show erratic performances. A well-organized PPI and DDI analysis system integrating these data with fair scoring system is necessary.

Method

We integrated three structure-based DDI datasets and twenty computationally predicted DDI datasets and constructed an interaction analysis system, named IDDI, which enables to browse protein and domain interactions with their relationships. To integrate heterogeneous DDI information, a novel scoring scheme is introduced to determine the reliability of DDI by considering the prediction scores of each DDI and the confidence levels of each prediction method in the datasets, and independencies between predicted datasets. In addition, we connected this DDI information to the comprehensive PPI information and developed a unified interface for the interaction analysis exploring interaction networks at both protein and domain level.

Result

IDDI provides 204,705 DDIs among total 7,351 Pfam domains in the current version. The result presents that total number of DDIs is increased eight times more than that of previous studies. Due to the increment of data, 50.4% of PPIs could be correlated with DDIs which is more than twice of previous resources. Newly designed scoring scheme outperformed the previous system in its accuracy too. User interface of IDDI system provides interactive investigation of proteins and domains in interactions with interconnected way. A specific example is presented to show the efficiency of the systems to acquire the comprehensive information of target protein with PPI and DDI relationships. IDDI is freely available at http://pcode.kaist.ac.kr/iddi/.","hji,kes",0,1,1,2,0.5,NA,new data; reassessed and still yes - new data created and made available +22800569,Modeling of folds and folding pathways for some protein families of (α + β)- and (α/β)-classes.,"In this paper, updated structural trees for a/-proteins containing five- and seven-segment (a/)-motifs are represented. Novel structural motifs occurring in some families of (a + )- and (a/)-proteins are also characterized. Databases of these proteins have been compiled from the Protein Data Bank (PDB) and Structural Classification of Proteins (SCOP) and the corresponding structural trees have been constructed. The classification of these proteins has been developed and organized as an extension of the PCBOST database, which is available at http://strees.protres.ru . In total, the updated Protein Classification Based on Structural Trees database contains 11 structural trees, 106 levels, 635 folds, 4911 proteins and domains, and 14,202 PDB entries.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +22804825,HuPho: the human phosphatase portal.,"Phosphatases and kinases contribute to the regulation of protein phosphorylation homeostasis in the cell. Phosphorylation is a key post-translational modification underlying the regulation of many cellular processes. Thus, a comprehensive picture of phosphatase function and the identification of their target substrates would aid a systematic approach to a mechanistic description of cell signalling. Here we present a website designed to facilitate the retrieval of information about human protein phosphatases. To this end we developed a search engine to recover and integrate information annotated in several publicly available web resources. In addition we present a text-mining-assisted annotation effort aimed at extracting phosphatase related data reported in the scientific literature. The HuPho (human phosphatases) website can be accessed at http://hupho.uniroma2.it.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +22829725,Bacterial genome mapper: A comparative bacterial genome mapping tool.,"

Unlabelled

Recently, next generation sequencing (NGS) technologies have led to a revolutionary increase in sequencing speed and costefficacy. Consequently, a vast number of contigs from many recently sequenced bacterial genomes remain to be accurately mapped and annotated, requiring the development of more convenient bioinformatics programs. In this paper, we present a newly developed web-based bioinformatics program, Bacterial Genome Mapper, which is suitable for mapping and annotating contigs that have been assembled from bacterial genome sequence raw data. By constructing a multiple alignment map between target contig sequences and two reference bacterial genome sequences, this program also provides very useful comparative genomics analysis of draft bacterial genomes.

Availability

The database is available for free at http://mbgm.kribb.re.kr.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - calls it a db but seems to about method +22923291,EFICAz2.5: application of a high-precision enzyme function predictor to 396 proteomes.,"

Unlabelled

High-quality enzyme function annotation is essential for understanding the biochemistry, metabolism and disease processes of organisms. Previously, we developed a multi-component high-precision enzyme function predictor, EFICAz(2) (enzyme function inference by a combined approach). Here, we present an updated improved version, EFICAz(2.5), that is trained on a significantly larger data set of enzyme sequences and PROSITE patterns. We also present the results of the application of EFICAz(2.5) to the enzyme reannotation of 396 genomes cataloged in the ENSEMBL database.

Availability

The EFICAz(2.5) server and database is freely available with a use-friendly interface at http://cssb.biology.gatech.edu/EFICAz2.5.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - software and includes a data resource +22961451,Identifying cancer highly-expressed membrane receptors for targeted drug delivery.,"Currently, the accompanying side effects of anti-cancer drugs owing to incorrect delivery to normal tissues should be reduced. We present a database (MRTDD) with identified cancer highly-expressed membrane receptors (CHMRs) which can be used in targeted drug delivery. To evaluate the probability of occurrence of incorrect delivery, we calculate tissue index for each CHMR and expect to identify good candidates. The information provided includes: (1) genomic annotations; (2) gene expression profiles of membrane receptors in cancer tissue vs. corresponding normal tissue, normal tissues of body and cancer cell-lines; (3) available antibody services of manufacturers. MRTDD is available at http://mrtdd.mbc.nctu.edu.tw/.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +22962312,"DECIPHER: web-based, community resource for clinical interpretation of rare variants in developmental disorders.","Patients with developmental disorders often harbour sub-microscopic deletions or duplications that lead to a disruption of normal gene expression or perturbation in the copy number of dosage-sensitive genes. Clinical interpretation for such patients in isolation is hindered by the rarity and novelty of such disorders. The DECIPHER project (https://decipher.sanger.ac.uk) was established in 2004 as an accessible online repository of genomic and associated phenotypic data with the primary goal of aiding the clinical interpretation of rare copy-number variants (CNVs). DECIPHER integrates information from a variety of bioinformatics resources and uses visualization tools to identify potential disease genes within a CNV. A two-tier access system permits clinicians and clinical scientists to maintain confidential linked anonymous records of phenotypes and CNVs for their patients that, with informed consent, can subsequently be shared with the wider clinical genetics and research communities. Advances in next-generation sequencing technologies are making it practical and affordable to sequence the whole exome/genome of patients who display features suggestive of a genetic disorder. This approach enables the identification of smaller intragenic mutations including single-nucleotide variants that are not accessible even with high-resolution genomic array analysis. This article briefly summarizes the current status and achievements of the DECIPHER project and looks ahead to the opportunities and challenges of jointly analysing structural and sequence variation in the human genome.","hji,kes",0,1,1,2,0.5,NA,"no notes; reassessed and still yes - includes a data resource, clinical but genetic, I think it counts" +23061897,"Sifting through genomes with iterative-sequence clustering produces a large, phylogenetically diverse protein-family resource.","

Background

New computational resources are needed to manage the increasing volume of biological data from genome sequencing projects. One fundamental challenge is the ability to maintain a complete and current catalog of protein diversity. We developed a new approach for the identification of protein families that focuses on the rapid discovery of homologous protein sequences.

Results

We implemented fully automated and high-throughput procedures to de novo cluster proteins into families based upon global alignment similarity. Our approach employs an iterative clustering strategy in which homologs of known families are sifted out of the search for new families. The resulting reduction in computational complexity enables us to rapidly identify novel protein families found in new genomes and to perform efficient, automated updates that keep pace with genome sequencing. We refer to protein families identified through this approach as """"Sifting Families,"""" or SFams. Our analysis of ~10.5 million protein sequences from 2,928 genomes identified 436,360 SFams, many of which are not represented in other protein family databases. We validated the quality of SFam clustering through statistical as well as network topology-based analyses.

Conclusions

We describe the rapid identification of SFams and demonstrate how they can be used to annotate genomes and metagenomes. The SFam database catalogs protein-family quality metrics, multiple sequence alignments, hidden Markov models, and phylogenetic trees. Our source code and database are publicly available and will be subject to frequent updates (http://edhar.genomecenter.ucdavis.edu/sifting_families/).","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - includes a data resource +23095498,"The human """"magnesome"""": detecting magnesium binding sites on human proteins.","

Background

Magnesium research is increasing in molecular medicine due to the relevance of this ion in several important biological processes and associated molecular pathogeneses. It is still difficult to predict from the protein covalent structure whether a human chain is or not involved in magnesium binding. This is mainly due to little information on the structural characteristics of magnesium binding sites in proteins and protein complexes. Magnesium binding features, differently from those of other divalent cations such as calcium and zinc, are elusive. Here we address a question that is relevant in protein annotation: how many human proteins can bind Mg2+? Our analysis is performed taking advantage of the recently implemented Bologna Annotation Resource (BAR-PLUS), a non hierarchical clustering method that relies on the pair wise sequence comparison of about 14 millions proteins from over 300.000 species and their grouping into clusters where annotation can safely be inherited after statistical validation.

Results

After cluster assignment of the latest version of the human proteome, the total number of human proteins for which we can assign putative Mg binding sites is 3,751. Among these proteins, 2,688 inherit annotation directly from human templates and 1,063 inherit annotation from templates of other organisms. Protein structures are highly conserved inside a given cluster. Transfer of structural properties is possible after alignment of a given sequence with the protein structures that characterise a given cluster as obtained with a Hidden Markov Model (HMM) based procedure. Interestingly a set of 370 human sequences inherit Mg2+ binding sites from templates sharing less than 30% sequence identity with the template.

Conclusion

We describe and deliver the """"human magnesome"""", a set of proteins of the human proteome that inherit putative binding of magnesium ions. With our BAR-hMG, 251 clusters including 1,341 magnesium binding protein structures corresponding to 387 sequences are sufficient to annotate some 13,689 residues in 3,751 human sequences as """"magnesium binding"""". Protein structures act therefore as three dimensional seeds for structural and functional annotation of human sequences. The data base collects specifically all the human proteins that can be annotated according to our procedure as """"magnesium binding"""", the corresponding structures and BAR+ clusters from where they derive the annotation (http://bar.biocomp.unibo.it/mg).","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +23195311,Accessing and mining data from large-scale mouse phenotyping projects.,"Comprehensive phenotyping through the International Mouse Phenotyping Consortium (IMPC)-www.mousephenotype.org-will reveal the pleiotropic functions of every gene in the mouse genome and uncover the wider role of genetic loci within diverse biological systems. The informatics challenge will be to develop an infrastructure to acquire the diverse and complex data sets generated from broad-based phenotyping and disseminate these data in an integrated manner to the scientific community. We describe here the current methodologies implemented to capture and disseminate these data, and plans within the Knockout Mouse Phenotyping Project (KOMP2) (http://commonfund.nih.gov/KOMP2/)-funded informatics consortium to scale these implementations to manage the surge in data from the IMPC.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - consortium description +23433959,Identification of candidate transcription factor binding sites in the cattle genome.,"A resource that provides candidate transcription factor binding sites (TFBSs) does not currently exist for cattle. Such data is necessary, as predicted sites may serve as excellent starting locations for future omics studies to develop transcriptional regulation hypotheses. In order to generate this resource, we employed a phylogenetic footprinting approach-using sequence conservation across cattle, human and dog-and position-specific scoring matrices to identify 379,333 putative TFBSs upstream of nearly 8000 Mammalian Gene Collection (MGC) annotated genes within the cattle genome. Comparisons of our predictions to known binding site loci within the PCK1, ACTA1 and G6PC promoter regions revealed 75% sensitivity for our method of discovery. Additionally, we intersected our predictions with known cattle SNP variants in dbSNP and on the Illumina BovineHD 770k and Bos 1 SNP chips, finding 7534, 444 and 346 overlaps, respectively. Due to our stringent filtering criteria, these results represent high quality predictions of putative TFBSs within the cattle genome. All binding site predictions are freely available at http://bfgl.anri.barc.usda.gov/BovineTFBS/ or http://199.133.54.77/BovineTFBS.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes data +23704925,Comprehensive genomic characterization of cutaneous malignant melanoma cell lines derived from metastatic lesions by whole-exome sequencing and SNP array profiling.,"Cutaneous malignant melanoma is the most fatal skin cancer and although improved comprehension of its pathogenic pathways allowed to realize some effective molecular targeted therapies, novel targets and drugs are still needed. Aiming to add genetic information potentially useful for novel targets discovery, we performed an extensive genomic characterization by whole-exome sequencing and SNP array profiling of six cutaneous melanoma cell lines derived from metastatic patients. We obtained a total of 3,325 novel coding single nucleotide variants, including 2,172 non-synonymous variants. We catalogued the coding mutations according to Sanger COSMIC database and to a manually curated list including genes involved in melanoma pathways identified by mining recent literature. Besides confirming the presence of known melanoma driver mutations (BRAF(V600E), NRAS(Q61R) ), we identified novel mutated genes involved in signalling pathways crucial for melanoma pathogenesis and already addressed by current targeted therapies (such as MAPK and glutamate pathways). We also identified mutations in four genes (MUC19, PAICS, RBMXL1, KIF23) never reported in melanoma, which might deserve further investigations. All data are available to the entire research community in our Melanoma Exome Database (at https://155.253.6.64/MExDB/). In summary, these cell lines are valuable biological tools to improve the genetic comprehension of this complex cancer disease and to study functional relevance of individual mutational events, and these findings could provide insights potentially useful for identification of novel therapeutic targets for cutaneous malignant melanoma.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +23729657,"The non-obese diabetic mouse sequence, annotation and variation resource: an aid for investigating type 1 diabetes.","Model organisms are becoming increasingly important for the study of complex diseases such as type 1 diabetes (T1D). The non-obese diabetic (NOD) mouse is an experimental model for T1D having been bred to develop the disease spontaneously in a process that is similar to humans. Genetic analysis of the NOD mouse has identified around 50 disease loci, which have the nomenclature Idd for insulin-dependent diabetes, distributed across at least 11 different chromosomes. In total, 21 Idd regions across 6 chromosomes, that are major contributors to T1D susceptibility or resistance, were selected for finished sequencing and annotation at the Wellcome Trust Sanger Institute. Here we describe the generation of 40.4 mega base-pairs of finished sequence from 289 bacterial artificial chromosomes for the NOD mouse. Manual annotation has identified 738 genes in the diabetes sensitive NOD mouse and 765 genes in homologous regions of the diabetes resistant C57BL/6J reference mouse across 19 candidate Idd regions. This has allowed us to call variation consequences between homologous exonic sequences for all annotated regions in the two mouse strains. We demonstrate the importance of this resource further by illustrating the technical difficulties that regions of inter-strain structural variation between the NOD mouse and the C57BL/6J reference mouse can cause for current next generation sequencing and assembly techniques. Furthermore, we have established that the variation rate in the Idd regions is 2.3 times higher than the mean found for the whole genome assembly for the NOD/ShiLtJ genome, which we suggest reflects the fact that positive selection for functional variation in immune genes is beneficial in regard to host defence. In summary, we provide an important resource, which aids the analysis of potential causative genes involved in T1D susceptibility. Database URLs: http://www.sanger.ac.uk/resources/mouse/nod/; http://vega-previous.sanger.ac.uk/info/data/mouse_regions.html#Idd","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +23730305,The systems genetics resource: a web application to mine global data for complex disease traits.,"The Systems Genetics Resource (SGR) (http://systems.genetics.ucla.edu) is a new open-access web application and database that contains genotypes and clinical and intermediate phenotypes from both human and mouse studies. The mouse data include studies using crosses between specific inbred strains and studies using the Hybrid Mouse Diversity Panel. SGR is designed to assist researchers studying genes and pathways contributing to complex disease traits, including obesity, diabetes, atherosclerosis, heart failure, osteoporosis, and lipoprotein metabolism. Over the next few years, we hope to add data relevant to deafness, addiction, hepatic steatosis, toxin responses, and vascular injury. The intermediate phenotypes include expression array data for a variety of tissues and cultured cells, metabolite levels, and protein levels. Pre-computed tables of genetic loci controlling intermediate and clinical phenotypes, as well as phenotype correlations, are accessed via a user-friendly web interface. The web site includes detailed protocols for all of the studies. Data from published studies are freely available; unpublished studies have restricted access during their embargo period.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +23734622,Biotea: RDFizing PubMed Central in support for the paper as an interface to the Web of Data.,"

Background

The World Wide Web has become a dissemination platform for scientific and non-scientific publications. However, most of the information remains locked up in discrete documents that are not always interconnected or machine-readable. The connectivity tissue provided by RDF technology has not yet been widely used to support the generation of self-describing, machine-readable documents.

Results

In this paper, we present our approach to the generation of self-describing machine-readable scholarly documents. We understand the scientific document as an entry point and interface to the Web of Data. We have semantically processed the full-text, open-access subset of PubMed Central. Our RDF model and resulting dataset make extensive use of existing ontologies and semantic enrichment services. We expose our model, services, prototype, and datasets at http://biotea.idiginfo.org/

Conclusions

The semantic processing of biomedical literature presented in this paper embeds documents within the Web of Data and facilitates the execution of concept-based queries against the entire digital library. Our approach delivers a flexible and adaptable set of tools for metadata enrichment and semantic processing of biomedical documents. Our model delivers a semantically rich and highly interconnected dataset with self-describing content so that software can make effective use of it.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - software and includes a data resource +23828786,Towards building a disease-phenotype knowledge base: extracting disease-manifestation relationship from literature.,"

Motivation

Systems approaches to studying phenotypic relationships among diseases are emerging as an active area of research for both novel disease gene discovery and drug repurposing. Currently, systematic study of disease phenotypic relationships on a phenome-wide scale is limited because large-scale machine-understandable disease-phenotype relationship knowledge bases are often unavailable. Here, we present an automatic approach to extract disease-manifestation (D-M) pairs (one specific type of disease-phenotype relationship) from the wide body of published biomedical literature.

Data and methods

Our method leverages external knowledge and limits the amount of human effort required. For the text corpus, we used 119 085 682 MEDLINE sentences (21 354 075 citations). First, we used D-M pairs from existing biomedical ontologies as prior knowledge to automatically discover D-M-specific syntactic patterns. We then extracted additional pairs from MEDLINE using the learned patterns. Finally, we analysed correlations between disease manifestations and disease-associated genes and drugs to demonstrate the potential of this newly created knowledge base in disease gene discovery and drug repurposing.

Results

In total, we extracted 121 359 unique D-M pairs with a high precision of 0.924. Among the extracted pairs, 120 419 (99.2%) have not been captured in existing structured knowledge sources. We have shown that disease manifestations correlate positively with both disease-associated genes and drug treatments.

Conclusions

The main contribution of our study is the creation of a large-scale and accurate D-M phenotype relationship knowledge base. This unique knowledge base, when combined with existing phenotypic, genetic and proteomic datasets, can have profound implications in our deeper understanding of disease etiology and in rapid drug repurposing.

Availability

http://nlp.case.edu/public/data/DMPatternUMLS/","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +23868073,Large-scale gene function analysis with the PANTHER classification system.,"The PANTHER (protein annotation through evolutionary relationship) classification system (http://www.pantherdb.org/) is a comprehensive system that combines gene function, ontology, pathways and statistical analysis tools that enable biologists to analyze large-scale, genome-wide data from sequencing, proteomics or gene expression experiments. The system is built with 82 complete genomes organized into gene families and subfamilies, and their evolutionary relationships are captured in phylogenetic trees, multiple sequence alignments and statistical models (hidden Markov models or HMMs). Genes are classified according to their function in several different ways: families and subfamilies are annotated with ontology terms (Gene Ontology (GO) and PANTHER protein class), and sequences are assigned to PANTHER pathways. The PANTHER website includes a suite of tools that enable users to browse and query gene functions, and to analyze large-scale experimental data with a number of statistical tests. It is widely used by bench scientists, bioinformaticians, computer scientists and systems biologists. In the 2013 release of PANTHER (v.8.0), in addition to an update of the data content, we redesigned the website interface to improve both user experience and the system's analytical capability. This protocol provides a detailed description of how to analyze genome-wide experimental data with the PANTHER classification system.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +23989082,PhenDisco: phenotype discovery system for the database of genotypes and phenotypes.,"The database of genotypes and phenotypes (dbGaP) developed by the National Center for Biotechnology Information (NCBI) is a resource that contains information on various genome-wide association studies (GWAS) and is currently available via NCBI's dbGaP Entrez interface. The database is an important resource, providing GWAS data that can be used for new exploratory research or cross-study validation by authorized users. However, finding studies relevant to a particular phenotype of interest is challenging, as phenotype information is presented in a non-standardized way. To address this issue, we developed PhenDisco (phenotype discoverer), a new information retrieval system for dbGaP. PhenDisco consists of two main components: (1) text processing tools that standardize phenotype variables and study metadata, and (2) information retrieval tools that support queries from users and return ranked results. In a preliminary comparison involving 18 search scenarios, PhenDisco showed promising performance for both unranked and ranked search comparisons with dbGaP's search engine Entrez. The system can be accessed at http://pfindr.net.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - software +24060102,Comparative genomics of metabolic capacities of regulons controlled by cis-regulatory RNA motifs in bacteria.,"

Background

In silico comparative genomics approaches have been efficiently used for functional prediction and reconstruction of metabolic and regulatory networks. Riboswitches are metabolite-sensing structures often found in bacterial mRNA leaders controlling gene expression on transcriptional or translational levels.An increasing number of riboswitches and other cis-regulatory RNAs have been recently classified into numerous RNA families in the Rfam database. High conservation of these RNA motifs provides a unique advantage for their genomic identification and comparative analysis.

Results

A comparative genomics approach implemented in the RegPredict tool was used for reconstruction and functional annotation of regulons controlled by RNAs from 43 Rfam families in diverse taxonomic groups of Bacteria. The inferred regulons include ~5200 cis-regulatory RNAs and more than 12000 target genes in 255 microbial genomes. All predicted RNA-regulated genes were classified into specific and overall functional categories. Analysis of taxonomic distribution of these categories allowed us to establish major functional preferences for each analyzed cis-regulatory RNA motif family. Overall, most RNA motif regulons showed predictable functional content in accordance with their experimentally established effector ligands. Our results suggest that some RNA motifs (including thiamin pyrophosphate and cobalamin riboswitches that control the cofactor metabolism) are widespread and likely originated from the last common ancestor of all bacteria. However, many more analyzed RNA motifs are restricted to a narrow taxonomic group of bacteria and likely represent more recent evolutionary innovations.

Conclusions

The reconstructed regulatory networks for major known RNA motifs substantially expand the existing knowledge of transcriptional regulation in bacteria. The inferred regulons can be used for genetic experiments, functional annotations of genes, metabolic reconstruction and evolutionary analysis. The obtained genome-wide collection of reference RNA motif regulons is available in the RegPrecise database (http://regprecise.lbl.gov/).","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +24087878,OpenMSI: a high-performance web-based platform for mass spectrometry imaging.,"Mass spectrometry imaging (MSI) enables researchers to directly probe endogenous molecules directly within the architecture of the biological matrix. Unfortunately, efficient access, management, and analysis of the data generated by MSI approaches remain major challenges to this rapidly developing field. Despite the availability of numerous dedicated file formats and software packages, it is a widely held viewpoint that the biggest challenge is simply opening, sharing, and analyzing a file without loss of information. Here we present OpenMSI, a software framework and platform that addresses these challenges via an advanced, high-performance, extensible file format and Web API for remote data access (http://openmsi.nersc.gov). The OpenMSI file format supports storage of raw MSI data, metadata, and derived analyses in a single, self-describing format based on HDF5 and is supported by a large range of analysis software (e.g., Matlab and R) and programming languages (e.g., C++, Fortran, and Python). Careful optimization of the storage layout of MSI data sets using chunking, compression, and data replication accelerates common, selective data access operations while minimizing data storage requirements and are critical enablers of rapid data I/O. The OpenMSI file format has shown to provide >2000-fold improvement for image access operations, enabling spectrum and image retrieval in less than 0.3 s across the Internet even for 50 GB MSI data sets. To make remote high-performance compute resources accessible for analysis and to facilitate data sharing and collaboration, we describe an easy-to-use yet powerful Web API, enabling fast and convenient access to MSI data, metadata, and derived analysis results stored remotely to facilitate high-performance data analysis and enable implementation of Web based data sharing, visualization, and analysis.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - software +24165883,IMG 4 version of the integrated microbial genomes comparative analysis system.,"The Integrated Microbial Genomes (IMG) data warehouse integrates genomes from all three domains of life, as well as plasmids, viruses and genome fragments. IMG provides tools for analyzing and reviewing the structural and functional annotations of genomes in a comparative context. IMG's data content and analytical capabilities have increased continuously since its first version released in 2005. Since the last report published in the 2012 NAR Database Issue, IMG's annotation and data integration pipelines have evolved while new tools have been added for recording and analyzing single cell genomes, RNA Seq and biosynthetic cluster data. Different IMG datamarts provide support for the analysis of publicly available genomes (IMG/W: http://img.jgi.doe.gov/w), expert review of genome annotations (IMG/ER: http://img.jgi.doe.gov/er) and teaching and training in the area of microbial genome analysis (IMG/EDU: http://img.jgi.doe.gov/edu).","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +24178034,IDEAL in 2014 illustrates interaction networks composed of intrinsically disordered proteins and their binding partners.,"IDEAL (Intrinsically Disordered proteins with Extensive Annotations and Literature, http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/) is a collection of intrinsically disordered proteins (IDPs) that cannot adopt stable globular structures under physiological conditions. Since its previous publication in 2012, the number of entries in IDEAL has almost tripled (120 to 340). In addition to the increase in quantity, the quality of IDEAL has been significantly improved. The new IDEAL incorporates the interactions of IDPs and their binding partners more explicitly, and illustrates the protein-protein interaction (PPI) networks and the structures of protein complexes. Redundant experimental data are arranged based on the clustering of Protein Data Bank entries, and similar sequences with the same binding mode are grouped. As a result, the new IDEAL presents more concise and informative experimental data. Nuclear magnetic resonance (NMR) disorder is annotated in a systematic manner, by identifying the regions with large deviations among the NMR models. The ordered/disordered and new domain predictions by DICHOT are available, as well as the domain assignments by HMMER. Some examples of the PPI networks and the highly deviated regions derived from NMR models will be described, together with other advances. These enhancements will facilitate deeper understanding of IDPs, in terms of their flexibility, plasticity and promiscuity.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +24270792,Gene3D: Multi-domain annotations for protein sequence and comparative genome analysis.,"Gene3D (http://gene3d.biochem.ucl.ac.uk) is a database of protein domain structure annotations for protein sequences. Domains are predicted using a library of profile HMMs from 2738 CATH superfamilies. Gene3D assigns domain annotations to Ensembl and UniProt sequence sets including >6000 cellular genomes and >20 million unique protein sequences. This represents an increase of 45% in the number of protein sequences since our last publication. Thanks to improvements in the underlying data and pipeline, we see large increases in the domain coverage of sequences. We have expanded this coverage by integrating Pfam and SUPERFAMILY domain annotations, and we now resolve domain overlaps to provide highly comprehensive composite multi-domain architectures. To make these data more accessible for comparative genome analyses, we have developed novel search algorithms for searching genomes to identify related multi-domain architectures. In addition to providing domain family annotations, we have now developed a pipeline for 3D homology modelling of domains in Gene3D. This has been applied to the human genome and will be rolled out to other major organisms over the next year.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +24319143,MMDB and VAST+: tracking structural similarities between macromolecular complexes.,"The computational detection of similarities between protein 3D structures has become an indispensable tool for the detection of homologous relationships, the classification of protein families and functional inference. Consequently, numerous algorithms have been developed that facilitate structure comparison, including rapid searches against a steadily growing collection of protein structures. To this end, NCBI's Molecular Modeling Database (MMDB), which is based on the Protein Data Bank (PDB), maintains a comprehensive and up-to-date archive of protein structure similarities computed with the Vector Alignment Search Tool (VAST). These similarities have been recorded on the level of single proteins and protein domains, comprising in excess of 1.5 billion pairwise alignments. Here we present VAST+, an extension to the existing VAST service, which summarizes and presents structural similarity on the level of biological assemblies or macromolecular complexes. VAST+ simplifies structure neighboring results and shows, for macromolecular complexes tracked in MMDB, lists of similar complexes ranked by the extent of similarity. VAST+ replaces the previous VAST service as the default presentation of structure neighboring data in NCBI's Entrez query and retrieval system. MMDB and VAST+ can be accessed via http://www.ncbi.nlm.nih.gov/Structure.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +24372041,miRror2.0: a platform for assessing the joint action of microRNAs in cell regulation.,"microRNAs (miRNAs) are short, noncoding RNAs that negatively regulate the levels of mRNA post-transcriptionally. Recent experiments revealed thousands of mRNA-miRNA pairs in which multiple miRNAs may bind the same transcript. These results raised the notion of miRNAs teamwork for a wide range of cellular context. miRror2.0 utilizes the miRNA-target predictions from over a dozen programs and resources and unifies them under a common statistical basis. The platform, called miRror2.0, considers the combinatorial regulation by miRNAs in different tissues, cell lines and under a broad range of conditions. A flexible setting permits the selection of the preferred combination of miRNA-target prediction resources as well as the statistical parameters for the analysis. miRror2.0 covers six major model organisms including human and mouse. Importantly, the system is capable of analyzing hundreds of genes that were subjected to miRNAs' regulation. Activating miRror2.0 by introducing thousands of genes from miRNA overexpression experiments successfully identified the objective miRNAs. The output from miRror2.0 is a list of genes that is optimally regulated by a defined set of miRNAs. A symmetric application of miRror2.0 starts with a set of miRNAs, and the system then seeks the preferred set of genes that are regulated by that miRNA composition. The results from miRror2.0 are empowered by an iterative procedure called PSI-miRror. PSI-miRror tests the robustness of miRror2.0 prediction. It allows a refinement of the initial list of genes in view of the miRNAs that optimally regulate this list. We present miRror2.0 as a valuable resource for supporting cellular experimentalists that seek recovery of combinatorial regulation by miRNAs from noisy experimental data. miRror2.0 is available at http://www.mirrorsuite.cs.huji.ac.il .","hji,kes",0,0,0,2,0,NA,iffy; reassessed and re-scored - software +24464816,PRIMSIPLR: prediction of inner-membrane situated pore-lining residues for alpha-helical transmembrane proteins.,"Transmembrane proteins such as transporters and channels mediate the passage of inorganic and organic substances across biological membranes through their central pore. Pore-lining residues (PLRs) that make direct contacts to the substrates have a crucial impact on the function of the protein and, hence, their identification is a key step in mechanistic studies. Here, we established a nonredundant data set containing the three-dimensional (3D) structures of 90 a-helical transmembrane proteins and annotated the PLRs of these proteins by a pore identification software. A support vector machine was then trained to distinguish PLRs from other residues based on the protein sequence alone. Using sixfold cross-validation, our best performing predictor gave a Matthews's correlation coefficient of 0.41 with an accuracy of 0.86, sensitivity of 0.61, and specificity of 0.89, respectively. We provide a novel software tool that will aid biomedical scientists working on transmembrane proteins with unknown 3D structures. Both standalone version and web service are freely available from the URL http://service.bioinformatik.uni-saarland.de/PRIMSIPLR/.","hji,kes",0,0,0,2,0,NA,seems to be both the tool and value add data; reassessed and re-scored - emphasize on the method +24467687,Non-synonymous variations in cancer and their effects on the human proteome: workflow for NGS data biocuration and proteome-wide analysis of TCGA data.,"

Background

Next-generation sequencing (NGS) technologies have resulted in petabytes of scattered data, decentralized in archives, databases and sometimes in isolated hard-disks which are inaccessible for browsing and analysis. It is expected that curated secondary databases will help organize some of this Big Data thereby allowing users better navigate, search and compute on it.

Results

To address the above challenge, we have implemented a NGS biocuration workflow and are analyzing short read sequences and associated metadata from cancer patients to better understand the human variome. Curation of variation and other related information from control (normal tissue) and case (tumor) samples will provide comprehensive background information that can be used in genomic medicine research and application studies. Our approach includes a CloudBioLinux Virtual Machine which is used upstream of an integrated High-performance Integrated Virtual Environment (HIVE) that encapsulates Curated Short Read archive (CSR) and a proteome-wide variation effect analysis tool (SNVDis). As a proof-of-concept, we have curated and analyzed control and case breast cancer datasets from the NCI cancer genomics program - The Cancer Genome Atlas (TCGA). Our efforts include reviewing and recording in CSR available clinical information on patients, mapping of the reads to the reference followed by identification of non-synonymous Single Nucleotide Variations (nsSNVs) and integrating the data with tools that allow analysis of effect nsSNVs on the human proteome. Furthermore, we have also developed a novel phylogenetic analysis algorithm that uses SNV positions and can be used to classify the patient population. The workflow described here lays the foundation for analysis of short read sequence data to identify rare and novel SNVs that are not present in dbSNP and therefore provides a more comprehensive understanding of the human variome. Variation results for single genes as well as the entire study are available from the CSR website (http://hive.biochemistry.gwu.edu/dna.cgi?cmd=csr).

Conclusions

Availability of thousands of sequenced samples from patients provides a rich repository of sequence information that can be utilized to identify individual level SNVs and their effect on the human proteome beyond what the dbSNP database provides.","hji,kes",0,1,1,2,0.5,NA,database but password protected; reassessed and still yes - includes a data resource +24580755,"CDSbank: taxonomy-aware extraction, selection, renaming and formatting of protein-coding DNA or amino acid sequences.","

Background

Protein-coding DNA sequences and their corresponding amino acid sequences are routinely used to study relationships between sequence, structure, function, and evolution. The rapidly growing size of sequence databases increases the power of such comparative analyses but it makes it more challenging to prepare high quality sequence data sets with control over redundancy, quality, completeness, formatting, and labeling. Software tools for some individual steps in this process exist but manual intervention remains a common and time consuming necessity.

Description

CDSbank is a database that stores both the protein-coding DNA sequence (CDS) and amino acid sequence for each protein annotated in Genbank. CDSbank also stores Genbank feature annotation, a flag to indicate incomplete 5' and 3' ends, full taxonomic data, and a heuristic to rank the scientific interest of each species. This rich information allows fully automated data set preparation with a level of sophistication that aims to meet or exceed manual processing. Defaults ensure ease of use for typical scenarios while allowing great flexibility when needed. Access is via a free web server at http://hazeslab.med.ualberta.ca/CDSbank/.

Conclusions

CDSbank presents a user-friendly web server to download, filter, format, and name large sequence data sets. Common usage scenarios can be accessed via pre-programmed default choices, while optional sections give full control over the processing pipeline. Particular strengths are: extract protein-coding DNA sequences just as easily as amino acid sequences, full access to taxonomy for labeling and filtering, awareness of incomplete sequences, and the ability to take one protein sequence and extract all synonymous CDS or identical protein sequences in other species. Finally, CDSbank can also create labeled property files to, for instance, annotate or re-label phylogenetic trees.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +24655548,Analysis of growth factor signaling in genetically diverse breast cancer lines.,"

Background

Soluble growth factors present in the microenvironment play a major role in tumor development, invasion, metastasis, and responsiveness to targeted therapies. While the biochemistry of growth factor-dependent signal transduction has been studied extensively in individual cell types, relatively little systematic data are available across genetically diverse cell lines.

Results

We describe a quantitative and comparative dataset focused on immediate-early signaling that regulates the AKT (AKT1/2/3) and ERK (MAPK1/3) pathways in a canonical panel of well-characterized breast cancer lines. We also provide interactive web-based tools to facilitate follow-on analysis of the data. Our findings show that breast cancers are diverse with respect to ligand sensitivity and signaling biochemistry. Surprisingly, triple negative breast cancers (TNBCs; which express low levels of ErbB2, progesterone and estrogen receptors) are the most broadly responsive to growth factors and HER2amp cancers (which overexpress ErbB2) the least. The ratio of ERK to AKT activation varies with ligand and subtype, with a systematic bias in favor of ERK in hormone receptor positive (HR+) cells. The factors that correlate with growth factor responsiveness depend on whether fold-change or absolute activity is considered the key biological variable, and they differ between ERK and AKT pathways.

Conclusions

Responses to growth factors are highly diverse across breast cancer cell lines, even within the same subtype. A simple four-part heuristic suggests that diversity arises from variation in receptor abundance, an ERK/AKT bias that depends on ligand identity, a set of factors common to all receptors that varies in abundance or activity with cell line, and an """"indirect negative regulation"""" by ErbB2. This analysis sets the stage for the development of a mechanistic and predictive model of growth factor signaling in diverse cancer lines. Interactive tools for looking up these results and downloading raw data are available at http://lincs.hms.harvard.edu/niepel-bmcbiol-2014/.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",project website though; reassessed and still yes - raw data available +24678734,Research resource: EPSLiM: ensemble predictor for short linear motifs in nuclear hormone receptors.,"Nuclear receptors (NRs) are a superfamily of transcription factors central to regulating many biological processes, including cell growth, death, metabolism, and immune responses. NR-mediated gene expression can be modulated by coactivators and corepressors through direct physical interaction or protein complexes with functional domains in NRs. One class of these domains includes short linear motifs (SLiMs), which facilitate protein-protein interactions, phosphorylation, and ligand binding primarily in the intrinsically disordered regions (IDRs) of proteins. Across all proteins, the number of known SLiMs is limited due to the difficulty in studying IDRs experimentally. Computational tools provide a systematic and data-driven approach for predicting functional motifs that can be used to prioritize experimental efforts. Accordingly, several tools have been developed based on sequence conservation or biophysical features; however, discrepancies in predictions make it difficult to determine the true candidate SLiMs. In this work, we present the ensemble predictor for short linear motifs (EPSLiM), a novel strategy to prioritize the residues that are most likely to be SLiMs in IDRs. EPSLiM applies a generalized linear model to integrate predictions from individual methodologies. We show that EPSLiM outperforms individual predictors, and we apply our method to NRs. The androgen receptor is an example with an N-terminal domain of 559 disordered amino acids that contains several validated SLiMs important for transcriptional activation. We use the androgen receptor to illustrate the predictive performance of EPSLiM and make the results of all human and mouse NRs publically available through the web service http://epslim.bwh.harvard.edu.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data set","resulting data is what's available; reassessed and still yes - tricky abstract, it's the data that's available" +24727366,"'In silico expression analysis', a novel PathoPlant web tool to identify abiotic and biotic stress conditions associated with specific cis-regulatory sequences.","Using bioinformatics, putative cis-regulatory sequences can be easily identified using pattern recognition programs on promoters of specific gene sets. The abundance of predicted cis-sequences is a major challenge to associate these sequences with a possible function in gene expression regulation. To identify a possible function of the predicted cis-sequences, a novel web tool designated 'in silico expression analysis' was developed that correlates submitted cis-sequences with gene expression data from Arabidopsis thaliana. The web tool identifies the A. thaliana genes harbouring the sequence in a defined promoter region and compares the expression of these genes with microarray data. The result is a hierarchy of abiotic and biotic stress conditions to which these genes are most likely responsive. When testing the performance of the web tool, known cis-regulatory sequences were submitted to the 'in silico expression analysis' resulting in the correct identification of the associated stress conditions. When using a recently identified novel elicitor-responsive sequence, a WT-box (CGACTTTT), the 'in silico expression analysis' predicts that genes harbouring this sequence in their promoter are most likely Botrytis cinerea induced. Consistent with this prediction, the strongest induction of a reporter gene harbouring this sequence in the promoter is observed with B. cinerea in transgenic A. thaliana. DATABASE URL: http://www.pathoplant.de/expression_analysis.php.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes database, but that is not emphasized","no notes; reassessed and still yes - data, I think - iffy" +24852251,StemCellNet: an interactive platform for network-oriented investigations in stem cell biology.,"Stem cells are characterized by their potential for self-renewal and their capacity to differentiate into mature cells. These two key features emerge through the interplay of various factors within complex molecular networks. To provide researchers with a dedicated tool to investigate these networks, we have developed StemCellNet, a versatile web server for interactive network analysis and visualization. It rapidly generates focused networks based on a large collection of physical and regulatory interactions identified in human and murine stem cells. The StemCellNet web-interface has various easy-to-use tools for selection and prioritization of network components, as well as for integration of expression data provided by the user. As a unique feature, the networks generated can be screened against a compendium of stemness-associated genes. StemCellNet can also indicate novel candidate genes by evaluating their connectivity patterns. Finally, an optional dataset of generic interactions, which provides large coverage of the human and mouse proteome, extends the versatility of StemCellNet to other biomedical research areas in which stem cells play important roles, such as in degenerative diseases or cancer. The StemCellNet web server is freely accessible at http://stemcellnet.sysbiolab.eu.","hji,kes",0,1,1,2,0.5,NA,iffy; reassessed and still yes - there is data there but it is iffy +24885229,A comprehensive assessment of the transcriptome of cork oak (Quercus suber) through EST sequencing.,"

Background

Cork oak (Quercus suber) is one of the rare trees with the ability to produce cork, a material widely used to make wine bottle stoppers, flooring and insulation materials, among many other uses. The molecular mechanisms of cork formation are still poorly understood, in great part due to the difficulty in studying a species with a long life-cycle and for which there is scarce molecular/genomic information. Cork oak forests are of great ecological importance and represent a major economic and social resource in Southern Europe and Northern Africa. However, global warming is threatening the cork oak forests by imposing thermal, hydric and many types of novel biotic stresses. Despite the economic and social value of the Q. suber species, few genomic resources have been developed, useful for biotechnological applications and improved forest management.

Results

We generated in excess of 7 million sequence reads, by pyrosequencing 21 normalized cDNA libraries derived from multiple Q. suber tissues and organs, developmental stages and physiological conditions. We deployed a stringent sequence processing and assembly pipeline that resulted in the identification of ~159,000 unigenes. These were annotated according to their similarity to known plant genes, to known Interpro domains, GO classes and E.C. numbers. The phylogenetic extent of this ESTs set was investigated, and we found that cork oak revealed a significant new gene space that is not covered by other model species or EST sequencing projects. The raw data, as well as the full annotated assembly, are now available to the community in a dedicated web portal at http://www.corkoakdb.org.

Conclusions

This genomic resource represents the first trancriptome study in a cork producing species. It can be explored to develop new tools and approaches to understand stress responses and developmental processes in forest trees, as well as the molecular cascades underlying cork differentiation and disease response.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - data available +25119676,Improvements to pairwise sequence comparison (PASC): a genome-based web tool for virus classification.,"The number of viral genome sequences in the public databases is increasing dramatically, and these sequences are playing an important role in virus classification. Pairwise sequence comparison is a sequence-based virus classification method. A program using this method calculates the pairwise identities of virus sequences within a virus family and displays their distribution, and visual analysis helps to determine demarcations at different taxonomic levels such as strain, species, genus and subfamily. Subsequent comparison of new sequences against existing ones allows viruses from which the new sequences were derived to be classified. Although this method cannot be used as the only criterion for virus classification in some cases, it is a quantitative method and has many advantages over conventional virus classification methods. It has been applied to several virus families, and there is an increasing interest in using this method for other virus families/groups. The Pairwise Sequence Comparison (PASC) classification tool was created at the National Center for Biotechnology Information. The tool's database stores pairwise identities for complete genomes/segments of 56 virus families/groups. Data in the system are updated every day to reflect changes in virus taxonomy and additions of new virus sequences to the public database. The web interface of the tool ( http://www.ncbi.nlm.nih.gov/sutils/pasc/ ) makes it easy to navigate and perform analyses. Multiple new viral genome sequences can be tested simultaneously with this system to suggest the taxonomic position of virus isolates in a specific family. PASC eliminates potential discrepancies in the results caused by different algorithms and/or different data used by researchers.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - data available +25179504,Differential motif enrichment analysis of paired ChIP-seq experiments.,"

Background

Motif enrichment analysis of transcription factor ChIP-seq data can help identify transcription factors that cooperate or compete. Previously, little attention has been given to comparative motif enrichment analysis of pairs of ChIP-seq experiments, where the binding of the same transcription factor is assayed under different conditions. Such comparative analysis could potentially identify the distinct regulatory partners/competitors of the assayed transcription factor under different conditions or at different stages of development.

Results

We describe a new methodology for identifying sequence motifs that are differentially enriched in one set of DNA or RNA sequences relative to another set, and apply it to paired ChIP-seq experiments. We show that, using paired ChIP-seq data for a single transcription factor, differential motif enrichment analysis identifies all the known key transcription factors involved in the transformation of non-cancerous immortalized breast cells (MCF10A-ER-Src cells) into cancer stem cells whereas non-differential motif enrichment analysis does not. We also show that differential motif enrichment analysis identifies regulatory motifs that are significantly enriched at constrained locations within the bound promoters, and that these motifs are not identified by non-differential motif enrichment analysis. Our methodology differs from other approaches in that it leverages both comparative enrichment and positional enrichment of motifs in ChIP-seq peak regions or in the promoters of genes bound by the transcription factor.

Conclusions

We show that differential motif enrichment analysis of paired ChIP-seq experiments offers biological insights not available from non-differential analysis. In contrast to previous approaches, our method detects motifs that are enriched in a constrained region in one set of sequences, but not enriched in the same region in the comparative set. We have enhanced the web-based CentriMo algorithm to allow it to perform the constrained differential motif enrichment analysis described in this paper, and CentriMo's on-line interface (http://meme.ebi.edu.au) provides dozens of databases of DNA- and RNA-binding motifs from a full range of organisms. All data and output files presented here are available at http://research.imb.uq.edu.au/t.bailey/supplementary\_data/Lesluyes2014.","hji,kes",0,0,0,2,0,NA,"no notes; reassessed and re-scored - data as supp files, not really a resouce" +25404128,"Beyond protein expression, MOPED goes multi-omics.","MOPED (Multi-Omics Profiling Expression Database; http://moped.proteinspire.org) has transitioned from solely a protein expression database to a multi-omics resource for human and model organisms. Through a web-based interface, MOPED presents consistently processed data for gene, protein and pathway expression. To improve data quality, consistency and use, MOPED includes metadata detailing experimental design and analysis methods. The multi-omics data are integrated through direct links between genes and proteins and further connected to pathways and experiments. MOPED now contains over 5 million records, information for approximately 75,000 genes and 50,000 proteins from four organisms (human, mouse, worm, yeast). These records correspond to 670 unique combinations of experiment, condition, localization and tissue. MOPED includes the following new features: pathway expression, Pathway Details pages, experimental metadata checklists, experiment summary statistics and more advanced searching tools. Advanced searching enables querying for genes, proteins, experiments, pathways and keywords of interest. The system is enhanced with visualizations for comparing across different data types. In the future MOPED will expand the number of organisms, increase integration with pathways and provide connections to disease.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +25428349,"OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders.","Online Mendelian Inheritance in Man, OMIM(), is a comprehensive, authoritative and timely research resource of curated descriptions of human genes and phenotypes and the relationships between them. The new official website for OMIM, OMIM.org (http://omim.org), was launched in January 2011. OMIM is based on the published peer-reviewed biomedical literature and is used by overlapping and diverse communities of clinicians, molecular biologists and genome scientists, as well as by students and teachers of these disciplines. Genes and phenotypes are described in separate entries and are given unique, stable six-digit identifiers (MIM numbers). OMIM entries have a structured free-text format that provides the flexibility necessary to describe the complex and nuanced relationships between genes and genetic phenotypes in an efficient manner. OMIM also has a derivative table of genes and genetic phenotypes, the Morbid Map. OMIM.org has enhanced search capabilities such as genome coordinate searching and thesaurus-enhanced search term options. Phenotypic series have been created to facilitate viewing genetic heterogeneity of phenotypes. Clinical synopsis features are enhanced with UMLS, Human Phenotype Ontology and Elements of Morphology terms and image links. All OMIM data are available for FTP download and through an API. MIMmatch is a novel outreach feature to disseminate updates and encourage collaboration.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +25540186,Knowledge-based modeling of peptides at protein interfaces: PiPreD.,"

Motivation

Protein-protein interactions (PPIs) underpin virtually all cellular processes both in health and disease. Modulating the interaction between proteins by means of small (chemical) agents is therefore a promising route for future novel therapeutic interventions. In this context, peptides are gaining momentum as emerging agents for the modulation of PPIs.

Results

We reported a novel computational, structure and knowledge-based approach to model orthosteric peptides to target PPIs: PiPreD. PiPreD relies on a precompiled and bespoken library of structural motifs, iMotifs, extracted from protein complexes and a fast structural modeling algorithm driven by the location of native chemical groups on the interface of the protein target named anchor residues. PiPreD comprehensive and systematically samples the entire interface deriving peptide conformations best suited for the given region on the protein interface. PiPreD complements the existing technologies and provides new solutions for the disruption of selected interactions.

Availability and implementation

Database and accessory scripts and programs are available upon request to the authors or at http://www.bioinsilico.org/PIPRED.

Contact

narcis.fernandez@gmail.com.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - data but on request only +25629077,Genetic variability of microRNA regulome in human.,"MicroRNAs are currently being extensively studied due to their important role as post-transcriptional regulators. During miRNA biogenesis, precursors undergo two cleavage steps performed by Drosha-DGCR8 (Microprocessor) cleaving of pri-miRNA to produce pre-miRNA and Dicer-mediated cleaving to create mature miRNA. Genetic variants within human miRNA regulome have been shown to influence miRNA expression, target interaction and to affect the phenotype. In this study, we reviewed the literature, existing bioinformatics tools and catalogs associated with polymorphic miRNA regulome, and organized them into four categories: (1) polymorphisms located within miRNA genes (miR-SNPs), (2) transcription factor-binding sites/miRNA regulatory regions (miR-rSNPs), (3) miRNA target sites (miR-TS-SNPs), and 4. miRNA silencing machinery (miR-SM-SNPs). Since the miR-SM-SNPs have not been systematically studied yet, we have collected polymorphisms associated with miRNA silencing machinery. We have developed two catalogs containing genetic variability within: (1) genes encoding three main catalytic components of the silencing machinery, DROSHA, DGCR8, and DICER1; (2) miRNA genes itself, overlapping Drosha and Dicer cleavage sites. The developed resource of polymorphisms is available online (http://www.integratomics-time.com/miRNA-regulome) and will be useful for further functional studies and development of biomarkers associated with diseases and phenotypic traits.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +25907632,The KM-parkin-DB: A Sub-set MutationView Database Specialized for PARK2 (PARKIN) Variants.,"We previously isolated PARKIN (PARK2) as a gene responsible for a unique sort of Parkinson disease, namely Autosomal Recessive Juvenile Parkinsonism (ARJP). In this study, we surveyed all the available literature describing PARK2 gene/Parkin protein mutations found in Parkinson disease patients. Only carefully evaluated data were deposited in the graphical database MutationView (http://mutview.dmb.med.keio.ac.jp) to construct KM-parkin-DB, an independent sub-set database. Forty-four articles were selected for data curation regarding clinical information such as ethnic origins, manifested symptoms, onset age, and hereditary patterns as well as mutation details including base changes and zygosity. A total of 366 cases were collected from 39 ethnic origins and 96 pathogenic mutations were found. PARK2 gene mutations were found also in some general Parkinson disease patients. The majority (63%) of mutations in PARK2 were restricted to two particular domains (UBL and RING1) of the Parkin protein. In these domains, two major mutations, a large deletion (DelEx3) and a point mutation (p.Arg275Trp), were located.","hji,kes",0,1,1,2,0.5,NA,"no notes; reassessed and still yes - includes a data resource, but tricky. Independent sub database" +26043787,Detection and analysis of disease-associated single nucleotide polymorphism influencing post-translational modification.,"Post-translational modification (PTM) plays a crucial role in biological functions and corresponding disease developments. Discovering disease-associated non-synonymous SNPs (nsSNPs) altering PTM sites can help to estimate the various PTM candidates involved in diseases, therefore, an integrated analysis between SNPs, PTMs and diseases is necessary. However, only a few types of PTMs affected by nsSNPs have been studied without considering disease-association until now. In this study, we developed a new database called PTM-SNP which contains a comprehensive collection of human nsSNPs that affect PTM sites, together with disease information. Total 179,325 PTM-SNPs were collected by aligning missense SNPs and stop-gain SNPs on PTM sites (position 0) or their flanking region (position -7 to 7). Disease-associated SNPs from GWAS catalogs were also matched with detected PTM-SNP to find disease associated PTM-SNPs. Our result shows PTM-SNPs are highly associated with diseases, compared with other nsSNP sites and functional classes including near gene, intron and so on. PTM-SNP can provide an insight about discovering important PTMs involved in the diseases easily through the web site. PTM-SNP is freely available at http://gcode.kaist.ac.kr/ptmsnp.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +26109357,Gene Model Annotations for Drosophila melanogaster: Impact of High-Throughput Data.,"We report the current status of the FlyBase annotated gene set for Drosophila melanogaster and highlight improvements based on high-throughput data. The FlyBase annotated gene set consists entirely of manually annotated gene models, with the exception of some classes of small non-coding RNAs. All gene models have been reviewed using evidence from high-throughput datasets, primarily from the modENCODE project. These datasets include RNA-Seq coverage data, RNA-Seq junction data, transcription start site profiles, and translation stop-codon read-through predictions. New annotation guidelines were developed to take into account the use of the high-throughput data. We describe how this flood of new data was incorporated into thousands of new and revised annotations. FlyBase has adopted a philosophy of excluding low-confidence and low-frequency data from gene model annotations; we also do not attempt to represent all possible permutations for complex and modularly organized genes. This has allowed us to produce a high-confidence, manageable gene annotation dataset that is available at FlyBase (http://flybase.org). Interesting aspects of new annotations include new genes (coding, non-coding, and antisense), many genes with alternative transcripts with very long 3' UTRs (up to 15-18 kb), and a stunning mismatch in the number of male-specific genes (approximately 13% of all annotated gene models) vs. female-specific genes (less than 1%). The number of identified pseudogenes and mutations in the sequenced strain also increased significantly. We discuss remaining challenges, for instance, identification of functional small polypeptides and detection of alternative translation starts.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +26148193,Basic Emotions in the Nencki Affective Word List (NAWL BE): New Method of Classifying Emotional Stimuli.,"The Nencki Affective Word List (NAWL) has recently been introduced as a standardized database of Polish words suitable for studying various aspects of language and emotions. Though the NAWL was originally based on the most commonly used dimensional approach, it is not the only way of studying emotions. Another framework is based on discrete emotional categories. Since the two perspectives are recognized as complementary, the aim of the present study was to supplement the NAWL database by the addition of categories corresponding to basic emotions. Thus, 2902 Polish words from the NAWL were presented to 265 subjects, who were instructed to rate them according to the intensity of each of the five basic emotions: happiness, anger, sadness, fear and disgust. The general characteristics of the present word database, as well as the relationships between the studied variables are shown to be consistent with typical patterns found in previous studies using similar databases for different languages. Here we present the Basic Emotions in the Nencki Affective Word List (NAWL BE) as a database of verbal material suitable for highly controlled experimental research. To make the NAWL more convenient to use, we introduce a comprehensive method of classifying stimuli to basic emotion categories. We discuss the advantages of our method in comparison to other methods of classification. Additionally, we provide an interactive online tool (http://exp.lobi.nencki.gov.pl/nawl-analysis) to help researchers browse and interactively generate classes of stimuli to meet their specific requirements.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - does ref a db but url is to a tool +26227548,Simulated unbound structures for benchmarking of protein docking in the DOCKGROUND resource.,"

Background

Proteins play an important role in biological processes in living organisms. Many protein functions are based on interaction with other proteins. The structural information is important for adequate description of these interactions. Sets of protein structures determined in both bound and unbound states are essential for benchmarking of the docking procedures. However, the number of such proteins in PDB is relatively small. A radical expansion of such sets is possible if the unbound structures are computationally simulated.

Results

The DOCKGROUND public resource provides data to improve our understanding of protein-protein interactions and to assist in the development of better tools for structural modeling of protein complexes, such as docking algorithms and scoring functions. A large set of simulated unbound protein structures was generated from the bound structures. The modeling protocol was based on 1 ns Langevin dynamics simulation. The simulated structures were validated on the ensemble of experimentally determined unbound and bound structures. The set is intended for large scale benchmarking of docking algorithms and scoring functions.

Conclusions

A radical expansion of the unbound protein docking benchmark set was achieved by simulating the unbound structures. The simulated unbound structures were selected according to criteria from systematic comparison of experimentally determined bound and unbound structures. The set is publicly available at http://dockground.compbio.ku.edu.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +26578587,BioGPS: building your own mash-up of gene annotations and expression profiles.,"BioGPS (http://biogps.org) is a centralized gene-annotation portal that enables researchers to access distributed gene annotation resources. This article focuses on the updates to BioGPS since our last paper (2013 database issue). The unique features of BioGPS, compared to those of other gene portals, are its community extensibility and user customizability. Users contribute the gene-specific resources accessible from BioGPS ('plugins'), which helps ensure that the resource collection is always up-to-date and that it will continue expanding over time (since the 2013 paper, 162 resources have been added, for a 34% increase in the number of resources available). BioGPS users can create their own collections of relevant plugins and save them as customized gene-report pages or 'layouts' (since the 2013 paper, 488 user-created layouts have been added, for a 22% increase in the number of layouts). In addition, we recently updated the most popular plugin, the 'Gene expression/activity chart', to include ~ 6000 datasets (from ~ 2000 datasets) and we enhanced user interactivity. We also added a new 'gene list' feature that allows users to save query results for future reference.","hji,kes",0,1,1,2,0.5,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +26852673,Multi-tissue transcriptomics for construction of a comprehensive gene resource for the terrestrial snail Theba pisana.,"The land snail Theba pisana is native to the Mediterranean region but has become one of the most abundant invasive species worldwide. Here, we present three transcriptomes of this agriculture pest derived from three tissues: the central nervous system, hepatopancreas (digestive gland), and foot muscle. Sequencing of the three tissues produced 339,479,092 high quality reads and a global de novo assembly generated a total of 250,848 unique transcripts (unigenes). BLAST analysis mapped 52,590 unigenes to NCBI non-redundant protein databases and further functional analysis annotated 21,849 unigenes with gene ontology. We report that T. pisana transcripts have representatives in all functional classes and a comparison of differentially expressed transcripts amongst all three tissues demonstrates enormous differences in their potential metabolic activities. The genes differentially expressed include those with sequence similarity to those genes associated with multiple bacterial diseases and neurological diseases. To provide a valuable resource that will assist functional genomics study, we have implemented a user-friendly web interface, ThebaDB (http://thebadb.bioinfo-minzhao.org/). This online database allows for complex text queries, sequence searches, and data browsing by enriched functional terms and KEGG mapping.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +26927478,A polymer dataset for accelerated property prediction and design.,"Emerging computation- and data-driven approaches are particularly useful for rationally designing materials with targeted properties. Generally, these approaches rely on identifying structure-property relationships by learning from a dataset of sufficiently large number of relevant materials. The learned information can then be used to predict the properties of materials not already in the dataset, thus accelerating the materials design. Herein, we develop a dataset of 1,073 polymers and related materials and make it available at http://khazana.uconn.edu/. This dataset is uniformly prepared using first-principles calculations with structures obtained either from other sources or by using structure search methods. Because the immediate target of this work is to assist the design of high dielectric constant polymers, it is initially designed to include the optimized structures, atomization energies, band gaps, and dielectric constants. It will be progressively expanded by accumulating new materials and including additional properties calculated for the optimized structures provided.","hji,kes",0,0,0,2,0,no notes; reassessed and not life sci,no notes; reassessed and re-scored - db but not life sci ?! +27102089,Gene-set activity toolbox (GAT): A platform for microarray-based cancer diagnosis using an integrative gene-set analysis approach.,"Cancer is a complex disease that cannot be diagnosed reliably using only single gene expression analysis. Using gene-set analysis on high throughput gene expression profiling controlled by various environmental factors is a commonly adopted technique used by the cancer research community. This work develops a comprehensive gene expression analysis tool (gene-set activity toolbox: (GAT)) that is implemented with data retriever, traditional data pre-processing, several gene-set analysis methods, network visualization and data mining tools. The gene-set analysis methods are used to identify subsets of phenotype-relevant genes that will be used to build a classification model. To evaluate GAT performance, we performed a cross-dataset validation study on three common cancers namely colorectal, breast and lung cancers. The results show that GAT can be used to build a reasonable disease diagnostic model and the predicted markers have biological relevance. GAT can be accessed from http://gat.sit.kmutt.ac.th where GAT's java library for gene-set analysis, simple classification and a database with three cancer benchmark datasets can be downloaded.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",benchmark datasets; reassessed and still yes - software and data says data is downloadable +27131357,"PHYLOViZ Online: web-based tool for visualization, phylogenetic inference, analysis and sharing of minimum spanning trees.","High-throughput sequencing methods generated allele and single nucleotide polymorphism information for thousands of bacterial strains that are publicly available in online repositories and created the possibility of generating similar information for hundreds to thousands of strains more in a single study. Minimum spanning tree analysis of allelic data offers a scalable and reproducible methodological alternative to traditional phylogenetic inference approaches, useful in epidemiological investigations and population studies of bacterial pathogens. PHYLOViZ Online was developed to allow users to do these analyses without software installation and to enable easy accessing and sharing of data and analyses results from any Internet enabled computer. PHYLOViZ Online also offers a RESTful API for programmatic access to data and algorithms, allowing it to be seamlessly integrated into any third party web service or software. PHYLOViZ Online is freely available at https://online.phyloviz.net.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - software +27350137,Analysis of Transitional and Turbulent Flow Through the FDA Benchmark Nozzle Model Using Laser Doppler Velocimetry.,"Transitional and turbulent flow through a simplified medical device model is analyzed as part of the FDA's Critical Path Initiative, designed to improve the process of bringing medical products to market. Computational predictions are often used in the development of devices and reliable in vitro data is needed to validate computational results, particularly estimations of the Reynolds stresses that could play a role in damaging blood elements. The high spatial resolution of laser Doppler velocimetry (LDV) is used to collect two component velocity data within the FDA benchmark nozzle model. Two flow conditions are used to produce flow encompassing laminar, transitional, and turbulent regimes, and viscous stresses, principal Reynolds stresses, and turbulence intensities are calculated from the measured LDV velocities. Axial velocities and viscous stresses are compared to data from a prior inter-laboratory study conducted with particle image velocimetry. Large velocity gradients are observed near the wall in the nozzle throat and in the jet shear layer located in the expansion downstream of the throat, with axial velocity changing as much as 4.5m/s over 200m. Additionally, maximum Reynolds shear stresses of 1000-2000Pa are calculated in the high shear regions, which are an order of magnitude higher than the peak viscous shear stresses (<100Pa). It is important to consider the effects of both viscous and turbulent stresses when simulating flow through medical devices. Reynolds stresses above commonly accepted hemolysis thresholds are measured in the nozzle model, indicating that hemolysis may occur under certain flow conditions. As such, the presented turbulence quantities from LDV, which are also available for download at https://fdacfd.nci.nih.gov/ , provide an ideal validation test for computational simulations that seek to characterize the flow field and to predict hemolysis within the FDA nozzle geometry.","hji,kes",0,1,1,2,0.5,NA,iffy; reassessed and still yes - there is data there but it is iffy +27450113,The archiving and dissemination of biological structure data.,"The global Protein Data Bank (PDB) was the first open-access digital archive in biology. The history and evolution of the PDB are described, together with the ways in which molecular structural biology data and information are collected, curated, validated, archived, and disseminated by the members of the Worldwide Protein Data Bank organization (wwPDB; http://wwpdb.org). Particular emphasis is placed on the role of community in establishing the standards and policies by which the PDB archive is managed day-to-day.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +27509041,MMpI: A WideRange of Available Compounds of Matrix Metalloproteinase Inhibitors.,"Matrix metalloproteinases (MMPs) are a family of zinc-dependent proteinases involved in the regulation of the extracellular signaling and structural matrix environment of cells and tissues. MMPs are considered as promising targets for the treatment of many diseases. Therefore, creation of database on the inhibitors of MMP would definitely accelerate the research activities in this area due to its implication in above-mentioned diseases and associated limitations in the first and second generation inhibitors. In this communication, we report the development of a new MMpI database which provides resourceful information for all researchers working in this field. It is a web-accessible, unique resource that contains detailed information on the inhibitors of MMP including small molecules, peptides and MMP Drug Leads. The database contains entries of ~3000 inhibitors including ~72 MMP Drug Leads and ~73 peptide based inhibitors. This database provides the detailed molecular and structural details which are necessary for the drug discovery and development. The MMpI database contains physical properties, 2D and 3D structures (mol2 and pdb format files) of inhibitors of MMP. Other data fields are hyperlinked to PubChem, ChEMBL, BindingDB, DrugBank, PDB, MEROPS and PubMed. The database has extensive searching facility with MMpI ID, IUPAC name, chemical structure and with the title of research article. The MMP inhibitors provided in MMpI database are optimized using Python-based Hierarchical Environment for Integrated Xtallography (Phenix) software. MMpI Database is unique and it is the only public database that contains and provides the complete information on the inhibitors of MMP. Database URL: http://clri.res.in/subramanian/databases/mmpi/index.php.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +27551106,PPI4DOCK: large scale assessment of the use of homology models in free docking over more than 1000 realistic targets.,"

Motivation

Protein-protein docking methods are of great importance for understanding interactomes at the structural level. It has become increasingly appealing to use not only experimental structures but also homology models of unbound subunits as input for docking simulations. So far we are missing a large scale assessment of the success of rigid-body free docking methods on homology models.

Results

We explored how we could benefit from comparative modelling of unbound subunits to expand docking benchmark datasets. Starting from a collection of 3157 non-redundant, high X-ray resolution heterodimers, we developed the PPI4DOCK benchmark containing 1417 docking targets based on unbound homology models. Rigid-body docking by Zdock showed that for 1208 cases (85.2%), at least one correct decoy was generated, emphasizing the efficiency of rigid-body docking in generating correct assemblies. Overall, the PPI4DOCK benchmark contains a large set of realistic cases and provides new ground for assessing docking and scoring methodologies.

Availability and implementation

Benchmark sets can be downloaded from http://biodev.cea.fr/interevol/ppi4dock/ CONTACT: guerois@cea.frSupplementary information: Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",benchmarking data available; reassessed and still yes - data available +27554092,Training and evaluation corpora for the extraction of causal relationships encoded in biological expression language (BEL).,"Success in extracting biological relationships is mainly dependent on the complexity of the task as well as the availability of high-quality training data. Here, we describe the new corpora in the systems biology modeling language BEL for training and testing biological relationship extraction systems that we prepared for the BioCreative V BEL track. BEL was designed to capture relationships not only between proteins or chemicals, but also complex events such as biological processes or disease states. A BEL nanopub is the smallest unit of information and represents a biological relationship with its provenance. In BEL relationships (called BEL statements), the entities are normalized to defined namespaces mainly derived from public repositories, such as sequence databases, MeSH or publicly available ontologies. In the BEL nanopubs, the BEL statements are associated with citation information and supportive evidence such as a text excerpt. To enable the training of extraction tools, we prepared BEL resources and made them available to the community. We selected a subset of these resources focusing on a reduced set of namespaces, namely, human and mouse genes, ChEBI chemicals, MeSH diseases and GO biological processes, as well as relationship types 'increases' and 'decreases'. The published training corpus contains 11 000 BEL statements from over 6000 supportive text excerpts. For method evaluation, we selected and re-annotated two smaller subcorpora containing 100 text excerpts. For this re-annotation, the inter-annotator agreement was measured by the BEL track evaluation environment and resulted in a maximal F-score of 91.18% for full statement agreement. In addition, for a set of 100 BEL statements, we do not only provide the gold standard expert annotations, but also text excerpts pre-selected by two automated systems. Those text excerpts were evaluated and manually annotated as true or false supportive in the course of the BioCreative V BEL track task.Database URL: http://wiki.openbel.org/display/BIOC/Datasets.","hji,kes",0,1,1,2,0.5,NA,value add and available; reassessed and still yes - data available +27779621,"A studyforrest extension, simultaneous fMRI and eye gaze recordings during prolonged natural stimulation.","Here we present an update of the studyforrest (http://studyforrest.org) dataset that complements the previously released functional magnetic resonance imaging (fMRI) data for natural language processing with a new two-hour 3 Tesla fMRI acquisition while 15 of the original participants were shown an audio-visual version of the stimulus motion picture. We demonstrate with two validation analyses that these new data support modeling specific properties of the complex natural stimulus, as well as a substantial within-subject BOLD response congruency in brain areas related to the processing of auditory inputs, speech, and narrative when compared to the existing fMRI data for audio-only stimulation. In addition, we provide participants' eye gaze location as recorded simultaneously with fMRI, and an additional sample of 15 control participants whose eye gaze trajectories for the entire movie were recorded in a lab setting-to enable studies on attentional processes and comparative investigations on the potential impact of the stimulation setting on these processes.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - data set +27899584,CATH: an expanded resource to predict protein function through structure and sequence.,"The latest version of the CATH-Gene3D protein structure classification database has recently been released (version 4.1, http://www.cathdb.info). The resource comprises over 300 000 domain structures and over 53 million protein domains classified into 2737 homologous superfamilies, doubling the number of predicted protein domains in the previous version. The daily-updated CATH-B, which contains our very latest domain assignment data, provides putative classifications for over 100 000 additional protein domains. This article describes developments to the CATH-Gene3D resource over the last two years since the publication in 2015, including: significant increases to our structural and sequence coverage; expansion of the functional families in CATH; building a support vector machine (SVM) to automatically assign domains to superfamilies; improved search facilities to return alignments of query sequences against multiple sequence alignments; the redesign of the web pages and download site.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +28053162,POSTAR: a platform for exploring post-transcriptional regulation coordinated by RNA-binding proteins.,"We present POSTAR (http://POSTAR.ncrnalab.org), a resource of POST-trAnscriptional Regulation coordinated by RNA-binding proteins (RBPs). Precise characterization of post-transcriptional regulatory maps has accelerated dramatically in the past few years. Based on new studies and resources, POSTAR supplies the largest collection of experimentally probed (~23 million) and computationally predicted (approximately 117 million) RBP binding sites in the human and mouse transcriptomes. POSTAR annotates every transcript and its RBP binding sites using extensive information regarding various molecular regulatory events (e.g., splicing, editing, and modification), RNA secondary structures, disease-associated variants, and gene expression and function. Moreover, POSTAR provides a friendly, multi-mode, integrated search interface, which helps users to connect multiple RBP binding sites with post-transcriptional regulatory events, phenotypes, and diseases. Based on our platform, we were able to obtain novel insights into post-transcriptional regulation, such as the putative association between CPSF6 binding, RNA structural domains, and Li-Fraumeni syndrome SNPs. In summary, POSTAR represents an early effort to systematically annotate post-transcriptional regulatory maps and explore the putative roles of RBPs in human diseases.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource","no notes; reassessed and still yes - includes a data resource, collection of binding sites" +28633399,The interfacial character of antibody paratopes: analysis of antibody-antigen structures.,"

Summary

In this study, computational methods are applied to investigate the general properties of antigen engaging residues of a paratope from a non-redundant dataset of 403 antibody-antigen complexes to dissect the contribution of hydrogen bonds, hydrophobic, van der Waals contacts and ionic interactions, as well as role of water molecules in the antigen-antibody interface. Consistent with previous reports using smaller datasets, we found that Tyr, Trp, Ser, Asn, Asp, Thr, Arg, Gly, His contribute substantially to the interactions between antibody and antigen. Furthermore, antibody-antigen interactions can be mediated by interfacial waters. However, there is no reported comprehensive analysis for a large number of structured waters that engage in higher ordered structures at the antibody-antigen interface. From our dataset, we have found the presence of interfacial waters in 242 complexes. We present evidence that suggests a compelling role of these interfacial waters in interactions of antibodies with a range of antigens differing in shape complementarity. Finally, we carry out 296 835 pairwise 3D structure comparisons of 771 structures of contact residues of antibodies with their interfacial water molecules from our dataset using CLICK method. A heuristic clustering algorithm is used to obtain unique structural similarities, and found to separate into 368 different clusters. These clusters are used to identify structural motifs of contact residues of antibodies for epitope binding.

Availability and implementation

This clustering database of contact residues is freely accessible at http://mspc.bii.a-star.edu.sg/minhn/pclick.html.

Contact

minhn@bii.a-star.edu.sg, chandra@bii.a-star.edu.sg or zhong_pingyu@immunol.a-star.edu.sg.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource of cluster residues - output of model +28827280,Systematic and Quantitative Assessment of Hydrogen Peroxide Reactivity With Cysteines Across Human Proteomes.,"Protein cysteinyl residues are the mediators of hydrogen peroxide (H2O2)-dependent redox signaling. However, site-specific mapping of the selectivity and dynamics of these redox reactions in cells poses a major analytical challenge. Here we describe a chemoproteomic platform to systematically and quantitatively analyze the reactivity of thousands of cysteines toward H2O2 in human cells. We identified >900 H2O2-sensitive cysteines, which are defined as the H2O2-dependent redoxome. Although redox sites associated with antioxidative and metabolic functions are consistent, most of the H2O2-dependent redoxome varies dramatically between different cells. Structural analyses reveal that H2O2-sensitive cysteines are less conserved than their redox-insensitive counterparts and display distinct sequence motifs, structural features, and potential for crosstalk with lysine modifications. Notably, our chemoproteomic platform also provides an opportunity to predict oxidation-triggered protein conformational changes. The data are freely accessible as a resource at http://redox.ncpsb.org/OXID/.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +28968848,A comprehensive assessment of long intrinsic protein disorder from the DisProt database.,"

Motivation

Intrinsic disorder (ID), i.e. the lack of a unique folded conformation at physiological conditions, is a common feature for many proteins, which requires specialized biochemical experiments that are not high-throughput. Missing X-ray residues from the PDB have been widely used as a proxy for ID when developing computational methods. This may lead to a systematic bias, where predictors deviate from biologically relevant ID. Large benchmarking sets on experimentally validated ID are scarce. Recently, the DisProt database has been renewed and expanded to include manually curated ID annotations for several hundred new proteins. This provides a large benchmark set which has not yet been used for training ID predictors.

Results

Here, we describe the first systematic benchmarking of ID predictors on the new DisProt dataset. In contrast to previous assessments based on missing X-ray data, this dataset contains mostly long ID regions and a significant amount of fully ID proteins. The benchmarking shows that ID predictors work quite well on the new dataset, especially for long ID segments. However, a large fraction of ID still goes virtually undetected and the ranking of methods is different than for PDB data. In particular, many predictors appear to confound ID and regions outside X-ray structures. This suggests that the ID prediction methods capture different flavors of disorder and can benefit from highly accurate curated examples.

Availability and implementation

The raw data used for the evaluation are available from URL: http://www.disprot.org/assessment/.

Contact

silvio.tosatto@unipd.it.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,1,1,2,0.5,no notes; reassessed and still no - these are results from an existing (and updated) dataset. Paper is not about the data resource.,no notes; reassessed and still yes - data set +29039006,Information Resources for Functional Genomics Studies in Brachypodium distachyon.,"Online tools and databases play an essential role in the promotion of functional genomics studies. Several resources for information regarding Brachypodium distachyon (Brachypodium) are available on the Web. In this chapter, we focus on recently published resources for Brachypodium research. The Brachypodium.org website ( http://www.brachypodium.org /) is an information portal that provides links to various genomic resources regarding Brachypodium, including genome annotation and re-sequencing datasets of accessions. RIKEN Full-length cDNA Database (RBFLDB, http://brachy.bmep.riken.jp/ver.1/index.pl ) is a web-accessible database that provides information of Brachypodium full-length cDNAs (FLcDNAs) collected in RIKEN and updated gene structures of Brachypodium based on the FLcDNA sequences as well as results of comparative analyses with available sequence resources for Triticeae crops, wheat, and barley. We introduce the functionalities and availability of these important information resources. Furthermore, we also present brief descriptions of useful online tools that facilitate Brachypodium functional genomics studies.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource","no notes; reassessed and still yes - includes a data resource, possibly two - one a knowledgebase and another data resource" +29155427,PDB-wide identification of biological assemblies from conserved quaternary structure geometry.,"Protein structures are key to understanding biomolecular mechanisms and diseases, yet their interpretation is hampered by limited knowledge of their biologically relevant quaternary structure (QS). A critical challenge in inferring QS information from crystallographic data is distinguishing biological interfaces from fortuitous crystal-packing contacts. Here, we tackled this problem by developing strategies for aligning and comparing QS states across both homologs and data repositories. QS conservation across homologs proved remarkably strong at predicting biological relevance and is implemented in two methods, QSalign and anti-QSalign, for annotating homo-oligomers and monomers, respectively. QS conservation across repositories is implemented in QSbio (http://www.QSbio.org), which approaches the accuracy of manual curation and allowed us to predict >100,000 QS states across the Protein Data Bank. Based on this high-quality data set, we analyzed pairs of structurally conserved interfaces, and this analysis revealed a striking plasticity whereby evolutionary distant interfaces maintain similar interaction geometries through widely divergent chemical properties.","hji,kes",0,0,0,2,0,NA,iffy; reassessed and re-scored - not clear from abstract what is available +29161266,Knowledge-based prediction of protein backbone conformation using a structural alphabet.,"Libraries of structural prototypes that abstract protein local structures are known as structural alphabets and have proven to be very useful in various aspects of protein structure analyses and predictions. One such library, Protein Blocks, is composed of 16 standard 5-residues long structural prototypes. This form of analyzing proteins involves drafting its structure as a string of Protein Blocks. Predicting the local structure of a protein in terms of protein blocks is the general objective of this work. A new approach, PB-kPRED is proposed towards this aim. It involves (i) organizing the structural knowledge in the form of a database of pentapeptide fragments extracted from all protein structures in the PDB and (ii) applying a knowledge-based algorithm that does not rely on any secondary structure predictions and/or sequence alignment profiles, to scan this database and predict most probable backbone conformations for the protein local structures. Though PB-kPRED uses the structural information from homologues in preference, if available. The predictions were evaluated rigorously on 15,544 query proteins representing a non-redundant subset of the PDB filtered at 30% sequence identity cut-off. We have shown that the kPRED method was able to achieve mean accuracies ranging from 40.8% to 66.3% depending on the availability of homologues. The impact of the different strategies for scanning the database on the prediction was evaluated and is discussed. Our results highlight the usefulness of the method in the context of proteins without any known structural homologues. A scoring function that gives a good estimate of the accuracy of prediction was further developed. This score estimates very well the accuracy of the algorithm (R2 of 0.82). An online version of the tool is provided freely for non-commercial usage at http://www.bo-protscience.fr/kpred/.","hji,kes",0,0,0,2,0,NA,"value add and available; reassessed and re-scored, seems to focus on method only" +29220450,CTD2 Dashboard: a searchable web interface to connect validated results from the Cancer Target Discovery and Development Network.,

Database url

https://ctd2-dashboard.nci.nih.gov/.,"hji,kes",0,0,0,2,0,NA,"value add; reassessed and re-scored, abstract error" +29296922,Standard measures for sickle cell disease research: the PhenX Toolkit sickle cell disease collections.,"Standard measures and common data elements for sickle cell disease (SCD) will improve the data quality and comparability necessary for cross-study analyses and the development of guidelines that support effective treatments and interventions. In 2014, the National Institutes of Health, National Heart, Lung, and Blood Institute (NHLBI) funded an Administrative Supplement to the PhenX Toolkit (consensus measures for Phenotypes and eXposures; https://www.phenxtoolkit.org/) to identify common measures to promote data comparability across SCD research. An 11-member Sickle Cell Disease Research and Scientific Panel provided guidance to the project, establishing a core collection of SCD-related measures and defining the scope of 2 specialty collections: (1) cardiovascular, pulmonary, and renal complications, and (2) neurology, quality-of-life, and health services. For each specialty collection, a working group of SCD experts selected high-priority measures using a consensus process that included scientific community input. The SCD measures were released into the Toolkit in August 2015. The 25 measures included in the core collection are recommended for use by all NHLBI-funded investigators performing human-subject SCD research. The 10 neurology, quality-of-life, and health services measures and 14 cardiovascular, pulmonary, and renal measures are recommended for use within these specialized research areas. For SCD and other researchers, PhenX measures will promote collaborations with clinicians and patients, facilitate cross-study analysis, accelerate translational research, and lead to greater understanding of SCD phenotypes and epigenetics. For clinicians, using PhenX measures will help elucidate the etiology, progression, and treatment of SCD, leading to improved patient care and quality of life.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - not a collection of data it seems +29351546,Accessing an Expanded Exposure Science Module at the Comparative Toxicogenomics Database.,"SUMMARY:The Comparative Toxicogenomics Database (CTD; http://ctdbase.org) is a free resource that provides manually curated information on chemical, gene, phenotype, and disease relationships to advance understanding of the effect of environmental exposures on human health. Four core content areas are independently curated: chemical-gene interactions, chemical-disease and gene-disease associations, chemical-phenotype interactions, and environmental exposure data (e.g., effects of chemical stressors on humans). Since releasing exposure data in 2015, we have vastly increased our coverage of chemicals and disease/phenotype outcomes; greatly expanded access to exposure content; added search capability by stressors, cohorts, population demographics, and measured outcomes; and created user-specified displays of content. These enhancements aim to facilitate human studies by allowing comparisons among experimental parameters and across studies involving specified chemicals, populations, or outcomes. Integration of data among CTD's four content areas and external data sets, such as Gene Ontology annotations and pathway information, links exposure data with over 1.8 million chemical-gene, chemical-disease and gene-disease interactions. Our analysis tools reveal direct and inferred relationships among the data and provide opportunities to generate predictive connections between environmental exposures and population-level health outcomes. https://doi.org/10.1289/EHP2873.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +29855811,Procura-PALavras (P-PAL): A Web-based interface for a new European Portuguese lexical database.,"In this article, we present Procura-PALavras (P-PAL), a Web-based interface for a new European Portuguese (EP) lexical database. Based on a contemporary printed corpus of over 227 million words, P-PAL provides a broad range of word attributes and statistics, including several measures of word frequency (e.g., raw counts, per-million word frequency, logarithmic Zipf scale), morpho-syntactic information (e.g., parts of speech [PoSs], grammatical gender and number, dominant PoS, and frequency and relative frequency of the dominant PoS), as well as several lexical and sublexical orthographic (e.g., number of letters; consonant-vowel orthographic structure; density and frequency of orthographic neighbors; orthographic Levenshtein distance; orthographic uniqueness point; orthographic syllabification; and trigram, bigram, and letter type and token frequencies), and phonological measures (e.g., pronunciation, number of phonemes, stress, density and frequency of phonological neighbors, transposed and phonographic neighbors, syllabification, and biphone and phone type and token frequencies) for ~53,000 lemmatized and ~208,000 nonlemmatized EP word forms. To obtain these metrics, researchers can choose between two word queries in the application: (i) analyze words previously selected for specific attributes and/or lexical and sublexical characteristics, or (ii) generate word lists that meet word requirements defined by the user in the menu of analyses. For the measures it provides and the flexibility it allows, P-PAL will be a key resource to support research in all cognitive areas that use EP verbal stimuli. P-PAL is freely available at http://p-pal.di.uminho.pt/tools .","hji,kes",1,1,2,2,1,no notes; reassessed and questionably life sci,no notes; reassessed and still yes - includes a data resource +29990104,A Robust 3D-2D Interactive Tool for Scene Segmentation and Annotation.,"Recent advances of 3D acquisition devices have enabled large-scale acquisition of 3D scene data. Such data, if completely and well annotated, can serve as useful ingredients for a wide spectrum of computer vision and graphics works such as data-driven modeling and scene understanding, object detection and recognition. However, annotating a vast amount of 3D scene data remains challenging due to the lack of an effective tool and/or the complexity of 3D scenes (e.g. clutter, varying illumination conditions). This paper aims to build a robust annotation tool that effectively and conveniently enables the segmentation and annotation of massive 3D data. Our tool works by coupling 2D and 3D information via an interactive framework, through which users can provide high-level semantic annotation for objects. We have experimented our tool and found that a typical indoor scene could be well segmented and annotated in less than 30 minutes by using the tool, as opposed to a few hours if done manually. Along with the tool, we created a dataset of over a hundred 3D scenes associated with complete annotations using our tool. Both the tool and dataset will be available at http://scenenn.net.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data set",no notes; reassessed and still yes - data set +29990255,Meta-Path Methods for Prioritizing Candidate Disease miRNAs.,"MicroRNAs (miRNAs) play critical roles in regulating gene expression at post-transcriptional levels. Numerous experimental studies indicate that alterations and dysregulations in miRNAs are associated with important complex diseases, especially cancers. Predicting potential miRNA-disease association is beneficial not only to explore the pathogenesis of diseases, but also to understand biological processes. In this work, we propose two methods that can effectively predict potential miRNA-disease associations using our reconstructed miRNA and disease similarity networks, which are based on the latest experimental data. We reconstruct a miRNA functional similarity network using the following biological information: the miRNA family information, miRNA cluster information, experimentally valid miRNA-target association and disease-miRNA information. We also reconstruct a disease similarity network using disease functional information and disease semantic information. We present Katz with specific weights and Katz with machine learning, on the comprehensive heterogeneous network. These methods, which achieve corresponding AUC values of 0.897 and 0.919, exhibit performance superior to the existing methods. Comprehensive data networks and reasonable considerations guarantee the high performance of our methods. Contrary to several methods, which cannot work in such situations, the proposed methods also predict associations for diseases without any known related miRNAs. A web service for the download and prediction of relationships between diseases and miRNAs is available at http://lab.malab.cn/soft/MDPredict/.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",iffy; reassessed and still yes - it seems that it's the outputs/data that's available +30102334,"Cell membrane proteins with high N-glycosylation, high expression and multiple interaction partners are preferred by mammalian viruses as receptors.","

Motivation

Receptor mediated entry is the first step for viral infection. However, the question of how viruses select receptors remains unanswered.

Results

Here, by manually curating a high-quality database of 268 pairs of mammalian virus-host receptor interaction, which included 128 unique viral species or sub-species and 119 virus receptors, we found the viral receptors are structurally and functionally diverse, yet they had several common features when compared to other cell membrane proteins: more protein domains, higher level of N-glycosylation, higher ratio of self-interaction and more interaction partners, and higher expression in most tissues of the host. This study could deepen our understanding of virus-receptor interaction.

Availability and implementation

The database of mammalian virus-host receptor interaction is available at http://www.computationalbiology.cn: 5000/viralReceptor.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +30137226,ImaGEO: integrative gene expression meta-analysis from GEO database.,"SUMMARY:The Gene Expression Omnibus (GEO) database provides an invaluable resource of publicly available gene expression data that can be integrated and analyzed to derive new hypothesis and knowledge. In this context, gene expression meta-analysis (geMAs) is increasingly used in several fields to improve study reproducibility and discovering robust biomarkers. Nevertheless, integrating data is not straightforward without bioinformatics expertise. Here, we present ImaGEO, a web tool for geMAs that implements a complete and comprehensive meta-analysis workflow starting from GEO dataset identifiers. The application integrates GEO datasets, applies different meta-analysis techniques and provides functional analysis results in an easy-to-use environment. ImaGEO is a powerful and useful resource that allows researchers to integrate and perform meta-analysis of GEO datasets to lead robust findings for biomarker discovery studies. AVAILABILITY AND IMPLEMENTATION:ImaGEO is accessible at http://bioinfo.genyo.es/imageo/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +30248878,The IADN data visualization tool.,"Data on atmospheric levels of toxic pollutants in samples collected near the Great Lakes are now readily available online to scientists, researchers, and the public on a website called IADN Data Viz (https://iadnviz.iu.edu/). These data come from the Integrated Atmospheric Deposition Network (IADN), a long term monitoring program run by the U.S. Environmental Protection Agency (US EPA).","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - data but seems to be viz only +30407550,"Human Disease Ontology 2018 update: classification, content and workflow expansion.","The Human Disease Ontology (DO) (http://www.disease-ontology.org), database has undergone significant expansion in the past three years. The DO disease classification includes specific formal semantic rules to express meaningful disease models and has expanded from a single asserted classification to include multiple-inferred mechanistic disease classifications, thus providing novel perspectives on related diseases. Expansion of disease terms, alternative anatomy, cell type and genetic disease classifications and workflow automation highlight the updates for the DO since 2015. The enhanced breadth and depth of the DO's knowledgebase has expanded the DO's utility for exploring the multi-etiology of human disease, thus improving the capture and communication of health-related data across biomedical databases, bioinformatics tools, genomic and cancer resources and demonstrated by a 6.6 growth in DO's user community since 2015. The DO's continual integration of human disease knowledge, evidenced by the more than 200 SVN/GitHub releases/revisions, since previously reported in our DO 2015 NAR paper, includes the addition of 2650 new disease terms, a 30% increase of textual definitions, and an expanding suite of disease classification hierarchies constructed through defined logical axioms.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +30714210,Functional Evolution of Proteins.,"The functional evolution of proteins advances through gene duplication followed by functional drift, whereas molecular evolution occurs through random mutational events. Over time, protein active-site structures or functional epitopes remain highly conserved, which enables relationships to be inferred between distant orthologs or paralogs. In this study, we present the first functional clustering and evolutionary analysis of the RCSB Protein Data Bank (RCSB PDB) based on similarities between active-site structures. All of the ligand-bound proteins within the RCSB PDB were scored using our Comparison of Protein Active-site Structures (CPASS) software and database (http://cpass.unl.edu/). Principal component analysis was then used to identify 4431 representative structures to construct a phylogenetic tree based on the CPASS comparative scores (http://itol.embl.de/shared/jcatazaro). The resulting phylogenetic tree identified a sequential, step-wise evolution of protein active-sites and provides novel insights into the emergence of protein function or changes in substrate specificity based on subtle changes in geometry and amino acid composition.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - don't think the data is there - it only refs other resources +30963486,Sfold Tools for MicroRNA Target Prediction.,"Computational prediction of miRNA binding sites on target mRNAs facilitates experimental investigation of miRNA functions. In this chapter, we describe STarMir and STarMirDB, two application modules of the Sfold RNA package. STarMir is a Web server for performing miRNA binding site predictions for mRNA and target sequences submitted by users. STarMirDB is a database of precomputed transcriptome-scale predictions. Both STarMir and STarMirDB provide comprehensive sequence, thermodynamic, and target structure features, a logistic probability as a measure of confidence for each predicted site, and a publication-quality diagram of the predicted miRNA-target hybrid. In addition, STarMir now offers a new quantitative score to address combined regulatory effects of multiple seed and seedless sites. This score provides a quantitative measure of the overall regulatory effects of both seed and seedless sites on the target. STarMir and STarMirDB are freely available to all through the Sfold Web application server at http://sfold.wadsworth.org .","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - software and includes a data resource +31142855,Multi-omics of the gut microbial ecosystem in inflammatory bowel diseases.,"Inflammatory bowel diseases, which include Crohn's disease and ulcerative colitis, affect several million individuals worldwide. Crohn's disease and ulcerative colitis are complex diseases that are heterogeneous at the clinical, immunological, molecular, genetic, and microbial levels. Individual contributing factors have been the focus of extensive research. As part of the Integrative Human Microbiome Project (HMP2 or iHMP), we followed 132 subjects for one year each to generate integrated longitudinal molecular profiles of host and microbial activity during disease (up to 24 time points each; in total 2,965 stool, biopsy, and blood specimens). Here we present the results, which provide a comprehensive view of functional dysbiosis in the gut microbiome during inflammatory bowel disease activity. We demonstrate a characteristic increase in facultative anaerobes at the expense of obligate anaerobes, as well as molecular disruptions in microbial transcription (for example, among clostridia), metabolite pools (acylcarnitines, bile acids, and short-chain fatty acids), and levels of antibodies in host serum. Periods of disease activity were also marked by increases in temporal variability, with characteristic taxonomic, functional, and biochemical shifts. Finally, integrative analysis identified microbial, biochemical, and host factors central to this dysregulation. The study's infrastructure resources, results, and data, which are available through the Inflammatory Bowel Disease Multi'omics Database ( http://ibdmdb.org ), provide the most comprehensive description to date of host and microbial activities in inflammatory bowel diseases.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +31201317,Multi omics analysis of fibrotic kidneys in two mouse models.,"Kidney fibrosis represents an urgent unmet clinical need due to the lack of effective therapies and an inadequate understanding of the molecular pathogenesis. We have generated a comprehensive and combined multi-omics dataset (proteomics, mRNA and small RNA transcriptomics) of fibrotic kidneys that is searchable through a user-friendly web application: http://hbcreports.med.harvard.edu/fmm/ . Two commonly used mouse models were utilized: a reversible chemical-induced injury model (folic acid (FA) induced nephropathy) and an irreversible surgically-induced fibrosis model (unilateral ureteral obstruction (UUO)). mRNA and small RNA sequencing, as well as 10-plex tandem mass tag (TMT) proteomics were performed with kidney samples from different time points over the course of fibrosis development. The bioinformatics workflow used to process, technically validate, and combine the single omics data will be described. In summary, we present temporal multi-omics data from fibrotic mouse kidneys that are accessible through an interrogation tool (Mouse Kidney Fibromics browser) to provide a searchable transcriptome and proteome for kidney fibrosis researchers.","hji,kes",1,1,2,2,1,"tentative 1; Very small, is it a biodata resource?; reassessed, we have included similar datasets",no notes; reassessed and still yes - data set available +31220804,Investigation and development of maize fused network analysis with multi-omics.,"Maize is a critically important staple crop in the whole world, which has contributed to both economic security and food in planting areas. The main target for researchers and breeding is the improvement of maize quality and yield. The use of computational biology methods combined with multi-omics for selecting biomolecules of interest for maize breeding has been receiving more attention. Moreover, the rapid growth of high-throughput sequencing data provides the opportunity to explore biomolecules of interest at the molecular level in maize. Furthermore, we constructed weighted networks for each of the omics and then integrated them into a final fused weighted network based on a nonlinear combination method. We also analyzed the final fused network and mined the orphan nodes, some of which were shown to be transcription factors that played a key role in maize development. This study could help to improve maize production via insights at the multi-omics level and provide a new perspective for maize researchers. All related data have been released at http://lab.malab.cn/~jj/maize.htm.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource","no notes; reassessed and still yes - includes data, though not a named resouces so" +31432427,Essential Features and Use Cases of the Cerebrospinal Fluid Proteome Resource (CSF-PR).,"Every year, a large number of published studies present biomarkers for various neurological disorders. Many of these studies are based on mass spectrometry proteomics data and describe comparison of the abundance of proteins in cerebrospinal fluid between two or more disease groups. As the number of such studies is growing, it is no longer straightforward to obtain an overview of which specific proteins are increased or decreased between the numerous relevant diseases and their many subcategories, or to see the larger picture or trends between related diseases. To alleviate this situation, we therefore mined the literature for mass spectrometry-based proteomics studies including quantitative protein data from cerebrospinal fluid of patients with multiple sclerosis, Alzheimer's disease, and Parkinson's disease and organized the extracted data in the Cerebrospinal Fluid Proteome Resource (CSF-PR). CSF-PR is freely available online at http://probe.uib.no/csf-pr , is highly interactive, and allows for easy navigation, visualization, and export of the published scientific data. This chapter will guide the user through some of the most important features of the tool and show examples of the suggested use cases.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource, although kind of clinical underlying data?",no notes; reassessed and still yes - includes a data resource +31459550,InterSpin: Integrated Supportive Webtools for Low- and High-Field NMR Analyses Toward Molecular Complexity.,"InterSpin (http://dmar.riken.jp/interspin/) comprises integrated, supportive, and freely accessible preprocessing webtools and a database to advance signal assignment in low- and high-field NMR analyses of molecular complexities ranging from small molecules to macromolecules for food, material, and environmental applications. To support handling of the broad spectra obtained from solid-state NMR or low-field benchtop NMR, we have developed and evaluated two preprocessing tools: sensitivity improvement with spectral integration, which enhances the signal-to-noise ratio by spectral integration, and peaks separation, which separates overlapping peaks by several algorithms, such as non-negative sparse coding. In addition, the InterSpin Laboratory Information Management System (SpinLIMS) database stores numerous standard spectra ranging from small molecules to macromolecules in solid and solution states (dissolved in polar/nonpolar solvents), and can be searched under various conditions using the following molecular assignment tools. SpinMacro supports easy assignment of macromolecules in natural mixtures via solid-state 13C peaks and dimethyl sulfoxide-dissolved 1H-13C correlation peaks. InterAnalysis improves the accuracy of molecular assignment by integrated analysis of 1H-13C correlation peaks and 1H-J correlation peaks of small molecules dissolved in D2O or deuterated methanol, which supports easy narrowing down of metabolite candidates. Finally, by enabling database interoperability, SpinLIMS's client software will ultimately support scientific discovery by facilitating sharing and reusing of NMR data.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - includes a data resource though not entirely clear how available underlying data is +31640730,NARD: whole-genome reference panel of 1779 Northeast Asians improves imputation accuracy of rare and low-frequency variants.,"Here, we present the Northeast Asian Reference Database (NARD), including whole-genome sequencing data of 1779 individuals from Korea, Mongolia, Japan, China, and Hong Kong. NARD provides the genetic diversity of Korean (n= 850) and Mongolian (n= 384) ancestries that were not present in the 1000 Genomes Project Phase 3 (1KGP3). We combined and re-phased the genotypes from NARD and 1KGP3 to construct a union set of haplotypes. This approach established a robust imputation reference panel for Northeast Asians, which yields the greatest imputation accuracy of rare and low-frequency variants compared with the existing panels. NARD imputation panel is available at https://nard.macrogen.com/ .","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +31706268,Genome-wide prediction and prioritization of human aging genes by data fusion: a machine learning approach.,"BACKGROUND:Machine learning can effectively nominate novel genes for various research purposes in the laboratory. On a genome-wide scale, we implemented multiple databases and algorithms to predict and prioritize the human aging genes (PPHAGE). RESULTS:We fused data from 11 databases, and used Nave Bayes classifier and positive unlabeled learning (PUL) methods, NB, Spy, and Rocchio-SVM, to rank human genes in respect with their implication in aging. The PUL methods enabled us to identify a list of negative (non-aging) genes to use alongside the seed (known age-related) genes in the ranking process. Comparison of the PUL algorithms revealed that none of the methods for identifying a negative sample were advantageous over other methods, and their simultaneous use in a form of fusion was critical for obtaining optimal results (PPHAGE is publicly available at https://cbb.ut.ac.ir/pphage). CONCLUSION:We predict and prioritize over 3,000 candidate age-related genes in human, based on significant ranking scores. The identified candidate genes are associated with pathways, ontologies, and diseases that are linked to aging, such as cancer and diabetes. Our data offer a platform for future experimental research on the genetic and biological aspects of aging. Additionally, we demonstrate that fusion of PUL methods and data sources can be successfully used for aging and disease candidate gene prioritization.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +31950241,Evaluating genetic causes of azoospermia: What can we learn from a complex cellular structure and single-cell transcriptomics of the human testis?,"Azoospermia is a condition defined as the absence of spermatozoa in the ejaculate, but the testicular phenotype of men with azoospermia may be very variable, ranging from full spermatogenesis, through arrested maturation of germ cells at different stages, to completely degenerated tissue with ghost tubules. Hence, information regarding the cell-type-specific expression patterns is needed to prioritise potential pathogenic variants that contribute to the pathogenesis of azoospermia. Thanks to technological advances within next-generation sequencing, it is now possible to obtain detailed cell-type-specific expression patterns in the testis by single-cell RNA sequencing. However, to interpret single-cell RNA sequencing data properly, substantial knowledge of the highly sophisticated data processing and visualisation methods is needed. Here we review the complex cellular structure of the human testis in different types of azoospermia and outline how known genetic alterations affect the pathology of the testis. We combined the currently available single-cell RNA sequencing datasets originating from the human testis into one dataset covering 62,751 testicular cells, each with a median of 2637 transcripts quantified. We show what effects the most common data-processing steps have, and how different visualisation methods can be used. Furthermore, we calculated expression patterns in pseudotime, and show how splicing rates can be used to determine the velocity of differentiation during spermatogenesis. With the combined dataset we show expression patterns and network analysis of genes known to be involved in the pathogenesis of azoospermia. Finally, we provide the combined dataset as an interactive online resource where expression of genes and different visualisation methods can be explored ( https://testis.cells.ucsc.edu/ ).","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - data but seems to be viz only +32081774,Discovery and development of safe-in-man broad-spectrum antiviral agents.,"Viral diseases are one of the leading causes of morbidity and mortality in the world. Virus-specific vaccines and antiviral drugs are the most powerful tools to combat viral diseases. However, broad-spectrum antiviral agents (BSAAs, i.e. compounds targeting viruses belonging to two or more viral families) could provide additional protection of the general population from emerging and re-emerging viral diseases, reinforcing the arsenal of available antiviral options. Here, we review discovery and development of BSAAs and summarize the information on 120 safe-in-man agents in a freely accessible database (https://drugvirus.info/). Future and ongoing pre-clinical and clinical studies will increase the number of BSAAs, expand the spectrum of their indications, and identify drug combinations for treatment of emerging and re-emerging viral infections as well as co-infections.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource although maybe clinical?","no notes; reassessed and still yes - includes data resource, though name not very clear" +32392296,CoCoCoNet: conserved and comparative co-expression across a diverse set of species.,"Co-expression analysis has provided insight into gene function in organisms from Arabidopsis to zebrafish. Comparison across species has the potential to enrich these results, for example by prioritizing among candidate human disease genes based on their network properties or by finding alternative model systems where their co-expression is conserved. Here, we present CoCoCoNet as a tool for identifying conserved gene modules and comparing co-expression networks. CoCoCoNet is a resource for both data and methods, providing gold standard networks and sophisticated tools for on-the-fly comparative analyses across 14 species. We show how CoCoCoNet can be used in two use cases. In the first, we demonstrate deep conservation of a nucleolus gene module across very divergent organisms, and in the second, we show how the heterogeneity of autism mechanisms in humans can be broken down by functional groups and translated to model organisms. CoCoCoNet is free to use and available to all at https://milton.cshl.edu/CoCoCoNet, with data and R scripts available at ftp://milton.cshl.edu/data.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - software and includes a data resource +32449511,A strategy for large-scale comparison of evolutionary- and reaction-based classifications of enzyme function.,"Determining the molecular function of enzymes discovered by genome sequencing represents a primary foundation for understanding many aspects of biology. Historically, classification of enzyme reactions has used the enzyme nomenclature system developed to describe the overall reactions performed by biochemically characterized enzymes, irrespective of their associated sequences. In contrast, functional classification and assignment for the millions of protein sequences of unknown function now available is largely done in two computational steps, first by similarity-based assignment of newly obtained sequences to homologous groups, followed by transferring to them the known functions of similar biochemically characterized homologs. Due to the fundamental differences in their etiologies and practice, `how' these chemistry- and evolution-centric functional classification systems relate to each other has been difficult to explore on a large scale. To investigate this issue in a new way, we integrated two published ontologies that had previously described each of these classification systems independently. The resulting infrastructure was then used to compare the functional assignments obtained from each classification system for the well-studied and functionally diverse enolase superfamily. Mapping these function assignments to protein structure and reaction similarity networks shows a profound and complex disconnect between the homology- and chemistry-based classification systems. This conclusion mirrors previous observations suggesting that except for closely related sequences, facile annotation transfer from small numbers of characterized enzymes to the huge number uncharacterized homologs to which they are related is problematic. Our extension of these comparisons to large enzyme superfamilies in a computationally intelligent manner provides a foundation for new directions in protein function prediction for the huge proportion of sequences of unknown function represented in major databases. Interactive sequence, reaction, substrate and product similarity networks computed for this work for the enolase and two other superfamilies are freely available for download from the Structure Function Linkage Database Archive (http://sfld.rbvi.ucsf.edu).","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - includes a data resource +32614398,Using AnABlast for intergenic sORF prediction in the Caenorhabditis elegans genome.,"

Motivation

Short bioactive peptides encoded by small open reading frames (sORFs) play important roles in eukaryotes. Bioinformatics prediction of ORFs is an early step in a genome sequence analysis, but sORFs encoding short peptides, often using non-AUG initiation codons, are not easily discriminated from false ORFs occurring by chance.

Results

AnABlast is a computational tool designed to highlight putative protein-coding regions in genomic DNA sequences. This protein-coding finder is independent of ORF length and reading frame shifts, thus making of AnABlast a potentially useful tool to predict sORFs. Using this algorithm, here, we report the identification of 82 putative new intergenic sORFs in the Caenorhabditis elegans genome. Sequence similarity, motif presence, expression data and RNA interference experiments support that the underlined sORFs likely encode functional peptides, encouraging the use of AnABlast as a new approach for the accurate prediction of intergenic sORFs in annotated eukaryotic genomes.

Availability and implementation

AnABlast is freely available at http://www.bioinfocabd.upo.es/ab/. The C.elegans genome browser with AnABlast results, annotated genes and all data used in this study is available at http://www.bioinfocabd.upo.es/celegans.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - includes a data resource +32765587,ABC-GWAS: Functional Annotation of Estrogen Receptor-Positive Breast Cancer Genetic Variants.,"Over the past decade, hundreds of genome-wide association studies (GWAS) have implicated genetic variants in various diseases, including cancer. However, only a few of these variants have been functionally characterized to date, mainly because the majority of the variants reside in non-coding regions of the human genome with unknown function. A comprehensive functional annotation of the candidate variants is thus necessary to fill the gap between the correlative findings of GWAS and the development of therapeutic strategies. By integrating large-scale multi-omics datasets such as the Cancer Genome Atlas (TCGA) and the Encyclopedia of DNA Elements (ENCODE), we performed multivariate linear regression analysis of expression quantitative trait loci, sequence permutation test of transcription factor binding perturbation, and modeling of three-dimensional chromatin interactions to analyze the potential molecular functions of 2,813 single nucleotide variants in 93 genomic loci associated with estrogen receptor-positive breast cancer. To facilitate rapid progress in functional genomics of breast cancer, we have created """"Analysis of Breast Cancer GWAS"""" (ABC-GWAS), an interactive database of functional annotation of estrogen receptor-positive breast cancer GWAS variants. Our resource includes expression quantitative trait loci, long-range chromatin interaction predictions, and transcription factor binding motif analyses to prioritize putative target genes, causal variants, and transcription factors. An embedded genome browser also facilitates convenient visualization of the GWAS loci in genomic and epigenomic context. ABC-GWAS provides an interactive visual summary of comprehensive functional characterization of estrogen receptor-positive breast cancer variants. The web resource will be useful to both computational and experimental biologists who wish to generate and test their hypotheses regarding the genetic susceptibility, etiology, and carcinogenesis of breast cancer. ABC-GWAS can also be used as a user-friendly educational resource for teaching functional genomics. ABC-GWAS is available at http://education.knoweng.org/abc-gwas/.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +32976910,Galaxy InteractoMIX: An Integrated Computational Platform for the Study of Protein-Protein Interaction Data.,"Protein interactions play a crucial role among the different functions of a cell and are central to our understanding of cellular processes both in health and disease. Here we present Galaxy InteractoMIX (http://galaxy.interactomix.com), a platform composed of 13 different computational tools each addressing specific aspects of the study of protein-protein interactions, ranging from large-scale cross-species protein-wide interactomes to atomic resolution level of protein complexes. Galaxy InteractoMIX provides an intuitive interface where users can retrieve consolidated interactomics data distributed across several databases or uncover links between diseases and genes by analyzing the interactomes underlying these diseases. The platform makes possible large-scale prediction and curation protein interactions using the conservation of motifs, interology, or presence or absence of key sequence signatures. The range of structure-based tools includes modeling and analysis of protein complexes, delineation of interfaces and the modeling of peptides acting as inhibitors of protein-protein interactions. Galaxy InteractoMIX includes a range of ready-to-use workflows to run complex analyses requiring minimal intervention by users. The potential range of applications of the platform covers different aspects of life science, biomedicine, biotechnology and drug discovery where protein associations are studied.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - tools only it seems +33177514,"SAVI, in silico generation of billions of easily synthesizable compounds through expert-system type rules.","We have made available a database of over 1 billion compounds predicted to be easily synthesizable, called Synthetically Accessible Virtual Inventory (SAVI). They have been created by a set of transforms based on an adaptation and extension of the CHMTRN/PATRAN programming languages describing chemical synthesis expert knowledge, which originally stem from the LHASA project. The chemoinformatics toolkit CACTVS was used to apply a total of 53 transforms to about 150,000 readily available building blocks (enamine.net). Only single-step, two-reactant syntheses were calculated for this database even though the technology can execute multi-step reactions. The possibility to incorporate scoring systems in CHMTRN allowed us to subdivide the database of 1.75 billion compounds in sets according to their predicted synthesizability, with the most-synthesizable class comprising 1.09 billion synthetic products. Properties calculated for all SAVI products show that the database should be well-suited for drug discovery. It is being made publicly available for free download from https://doi.org/10.35115/37n9-5738.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource; questionably bio",no notes; reassessed and still yes - includes a data resource +33181824,CircR2Cancer: a manually curated database of associations between circRNAs and cancers.,"Accumulating evidences have shown that the deregulation of circRNA has close association with many human cancers. However, these experimental verified circRNA-cancer associations are not collected in any database. Here, we develop a manually curated database (circR2Cancer) that provides experimentally supported associations between circRNAs and cancers. The current version of the circR2Cancer contains 1439 associations between 1135 circRNAs and 82 cancers by extracting data from existing literatures and databases. In addition, circR2Cancer contains the information of cancer exacted from Disease Ontology and basic biological information of circRNAs from circBase. At the same time, circR2Cancer provides a simple and friendly interface for users to conveniently browse, search and download the data. It will be a useful and valuable resource for researchers to understanding the regulation mechanism of circRNA in cancers.

Database url

http://www.biobdlab.cn:8000.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +33391232,Creation of an Online Platform for Identification of Microorganisms: Peak Picking or Full-Spectrum Analysis.,"Identification of microorganisms by MALDI-TOF mass spectrometry is a very efficient method with high throughput, speed, and accuracy. However, it is significantly limited by the absence of a universal database of reference mass spectra. This problem can be solved by creating an Internet platform for open databases of protein spectra of microorganisms. Choosing the optimal mathematical apparatus is the pivotal issue for this task. In our previous study we proposed the geometric approach for processing mass spectrometry data, which represented a mass spectrum as a vector in a multidimensional Euclidean space. This algorithm was implemented in a Jacob4 stand-alone package. We demonstrated its efficiency in delimiting two closely related species of the Bacillus pumilus group. In this study, the geometric approach was realized as R scripts which allowed us to design a Web-based application. We also studied the possibility of using full spectra analysis (FSA) without calculating mass peaks (PPA), which is the logical development of the method. We used 74 microbial strains from the collections of ICiG SB RAS, UNIQEM, IEGM, KMM, and VGM as the models. We demonstrated that the algorithms based on peak-picking and analysis of complete data have accuracy no less than that of Biotyper 3.1 software. We proposed a method for calculating cut-off thresholds based on averaged intraspecific distances. The resulting database, raw data, and the set of R scripts are available online at https://icg-test.mydisk.nsc.ru/s/qj6cfZg57g6qwzN.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - software and includes a data resource +33620450,VoroCNN: Deep convolutional neural network built on 3D Voronoi tessellation of protein structures.,"

Motivation

Effective use of evolutionary information has recently led to tremendous progress in computational prediction of three-dimensional (3D) structures of proteins and their complexes. Despite the progress, the accuracy of predicted structures tends to vary considerably from case to case. Since the utility of computational models depends on their accuracy, reliable estimates of deviation between predicted and native structures are of utmost importance.

Results

For the first time, we present a deep convolutional neural network (CNN) constructed on a Voronoi tessellation of 3D molecular structures. Despite the irregular data domain, our data representation allows us to efficiently introduce both convolution and pooling operations and train the network in an end-to-end fashion without precomputed descriptors. The resultant model, VoroCNN, predicts local qualities of 3D protein folds. The prediction results are competitive to state of the art and superior to the previous 3D CNN architectures built for the same task. We also discuss practical applications of VoroCNN, for example, in recognition of protein binding interfaces.

Availability

The model, data, and evaluation tests are available at https://team.inria.fr/nano-d/software/vorocnn/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - model +33789926,A Novel Three-Choice Touchscreen Task to Examine Spatial Attention and Orienting Responses in Rodents.,"Mammalian orienting behavior consists of coordinated movements of the eyes, head, pinnae, vibrissae, or body to attend to an external stimulus. The present study aimed to develop a novel operant task using a touch-screen system to measure spatial attention. In this task, rats were trained to nose-poke a light stimulus presented in one of three locations. The stimulus was presented more frequently in the center location to develop spatial attention bias toward the center stimulus. Changes in orienting responses were detected by measuring the animals' response accuracy and latency to stimuli at the lateral locations, following reversible unilateral chemogenetic inactivation of the superior colliculus (SC). Additionally, spontaneous turning and rotation behavior was measured using an open-field test (OFT). Our results show that right SC inactivation significantly increased the whole body turn angle in the OFT, in line with previous literature that indicated an ipsiversive orientating bias and the presence of contralateral neglect following unilateral SC lesions. In the touch screen orienting task, unilateral SC inactivation significantly increased bias toward the ipsilateral side, as measured by response frequency in various experimental conditions, and a very large left-shift of a respective psychometric function. Our results demonstrate that this novel touchscreen task is able to detect changes in spatial attention and orienting responses because of e.g. experimental manipulations or injury with very high sensitivity, while taking advantage of the touch screen technology that allows for high transferability of the task between labs and for open-source data sharing through https://www.mousebytes.ca.","hji,kes",0,0,0,2,0,NA,"yes, not well conveyed in the abstract; reassessed and re-scored - not clear if they created or used a data resource" +33996891,Universal Architectural Concepts Underlying Protein Folding Patterns.,"What is the architectural """"basis set"""" of the observed universe of protein structures? Using information-theoretic inference, we answer this question with a dictionary of 1,493 substructures-called concepts-typically at a subdomain level, based on an unbiased subset of known protein structures. Each concept represents a topologically conserved assembly of helices and strands that make contact. Any protein structure can be dissected into instances of concepts from this dictionary. We dissected the Protein Data Bank and completely inventoried all the concept instances. This yields many insights, including correlations between concepts and catalytic activities or binding sites, useful for rational drug design; local amino-acid sequence-structure correlations, useful for ab initio structure prediction methods; and information supporting the recognition and exploration of evolutionary relationships, useful for structural studies. An interactive site, Proodic, at http://lcb.infotech.monash.edu.au/prosodic (click), provides access to and navigation of the entire dictionary of concepts and their usages, and all associated information. This report is part of a continuing programme with the goal of elucidating fundamental principles of protein architecture, in the spirit of the work of Cyrus Chothia.","hji,kes",0,1,1,2,0.5,NA,no notes; reassessed and still yes - its a collection but iffy +34015823,COVID-19 biomarkers and their overlap with comorbidities in a disease biomarker data model.,"In response to the COVID-19 outbreak, scientists and medical researchers are capturing a wide range of host responses, symptoms and lingering postrecovery problems within the human population. These variable clinical manifestations suggest differences in influential factors, such as innate and adaptive host immunity, existing or underlying health conditions, comorbidities, genetics and other factors-compounding the complexity of COVID-19 pathobiology and potential biomarkers associated with the disease, as they become available. The heterogeneous data pose challenges for efficient extrapolation of information into clinical applications. We have curated 145 COVID-19 biomarkers by developing a novel cross-cutting disease biomarker data model that allows integration and evaluation of biomarkers in patients with comorbidities. Most biomarkers are related to the immune (SAA, TNF- and IP-10) or coagulation (D-dimer, antithrombin and VWF) cascades, suggesting complex vascular pathobiology of the disease. Furthermore, we observe commonality with established cancer biomarkers (ACE2, IL-6, IL-4 and IL-2) as well as biomarkers for metabolic syndrome and diabetes (CRP, NLR and LDL). We explore these trends as we put forth a COVID-19 biomarker resource (https://data.oncomx.org/covid19) that will help researchers and diagnosticians alike.","hji,kes",0,0,0,2,0,NA,no notes; reassessed and re-scored - not clear if model and data +IND607223097,Characterization of transcriptomes from sexual and asexual lineages of a New Zealand snail (Potamopyrgus antipodarum),"Understanding the evolution and maintenance of sexual reproduction is one of the central challenges of evolutionary biology, yet we know very little about how sex influences molecular evolution. The New Zealand freshwater snail Potamopyrgus antipodarum is ideally suited to address this knowledge gap because obligately sexual individuals often coexist with multiple independently derived obligately asexual lineages. This unusual situation allows direct comparisons both between sexual and asexual P. antipodarum and across populations that differ in the relative frequency of sexual individuals. As such, P. antipodarum has received a great deal of attention as a model system for the maintenance of sex in nature and is also used as a model for environmental toxicology and biological invasions. Molecular genetic resources for P. antipodarum will thus be useful to investigators in a variety of biological fields. We used 454 sequencing of cDNA libraries to generate transcriptomes from two sexual and two asexual P. antipodarum lineages. A de novo assembly of 116.7Mb of sequence reads produced 41396 contigs, and sequence similarity-based Gene Ontology annotations were obtained for 3740 contigs. We detected 408315 SNP loci and 7315 microsatellite loci, which together represent the first genome-scale resource available for P. antipodarum. Raw 454 read sequences, contig sequences, annotation data and polymorphism data are publicly available in a searchable online database and for download at http://www.biology.uiowa.edu/neiman/transcriptome.php.","hji,kes",1,1,2,2,1,"no notes; reassessed and yes, includes data resource",no notes; reassessed and still yes - includes a data resource +PMC7775396,Creation of an Online Platform for Identification of Microorganisms: Peak Picking or Full-Spectrum Analysis,"Identification of microorganisms by MALDI-TOF mass spectrometry is a very efficient method with high throughput, speed, and accuracy. However, it is significantly limited by the absence of a universal database of reference mass spectra. This problem can be solved by creating an Internet platform for open databases of protein spectra of microorganisms. Choosing the optimal mathematical apparatus is the pivotal issue for this task. In our previous study we proposed the geometric approach for processing mass spectrometry data, which represented a mass spectrum as a vector in a multidimensional Euclidean space. This algorithm was implemented in a Jacob4 stand-alone package. We demonstrated its efficiency in delimiting two closely related species of the Bacillus pumilus group. In this study, the geometric approach was realized as R scripts which allowed us to design a Web-based application. We also studied the possibility of using full spectra analysis (FSA) without calculating mass peaks (PPA), which is the logical development of the method. We used 74 microbial strains from the collections of ICiG SB RAS, UNIQEM, IEGM, KMM, and VGM as the models. We demonstrated that the algorithms based on peak-picking and analysis of complete data have accuracy no less than that of Biotyper 3.1 software. We proposed a method for calculating cut-off thresholds based on averaged intraspecific distances. The resulting database, raw data, and the set of R scripts are available online at https://icg-test.mydisk.nsc.ru/s/qj6cfZg57g6qwzN.","hji,kes",0,1,1,2,0.5,NA,"no notes; reassessed and still yes - includes data, though not a named resouces so" +22075991,The Gene Wiki in 2011: community intelligence applied to human gene annotation.,"The Gene Wiki is an open-access and openly editable collection of Wikipedia articles about human genes. Initiated in 2008, it has grown to include articles about more than 10,000 genes that, collectively, contain more than 1.4 million words of gene-centric text with extensive citations back to the primary scientific literature. This growing body of useful, gene-centric content is the result of the work of thousands of individuals throughout the scientific community. Here, we describe recent improvements to the automated system that keeps the structured data presented on Gene Wiki articles in sync with the data from trusted primary databases. We also describe the expanding contents, editors and users of the Gene Wiki. Finally, we introduce a new automated system, called WikiTrust, which can effectively compute the quality of Wikipedia articles, including Gene Wiki articles, at the word level. All articles in the Gene Wiki can be freely accessed and edited at Wikipedia, and additional links and information can be found at the project's Wikipedia portal page: http://en.wikipedia.org/wiki/Portal:Gene_Wiki.","hji,kes",1,1,2,2,1,"no notes; reassessed and still yes, it is a wiki which is questionable though",no notes; reassessed and re-scored - probably counts as a data resouce as Gene Wiki +25946867,The linked human imprintome v1.0: over 120 genes confirmed as imprinted impose a major review on previous censuses.,"The whole set of human imprinted genes, termed imprintome, is here analysed by means of a reasonable, valid application of the Semantic Web and Linked Data approaches to a few structured datasets in order to provide a comprehensive collection of imprinted genes in the human genome. Thus, we have stored, organised, filtered, and analysed massive amounts of existing data on human imprinted genes towards compiling, structuring and linking data to comprise a sharing resource for genome and epigenome interrogated studies. Our datasets of linked data are the actual research outcome of this human imprintome analysis because as genomics become more and more data intensive, due to huge amounts of biological data, so does our needs for more structured data to be easier mined and shared. We present the resulting first version of the Linked Human Imprintome as a project within Linked Open Data (LOD) initiative (http://lod-cloud.net/) through Data Hub (http:// thedatahub.org/en/dataset/a-draft-version-of-the-linked-human-imprintome).","hji,kes",0,0,0,2,0,"no notes; reassessed and no, it is generic",no notes; reassessed and still no - data hub is a generic resource +25348213,DIANA--algorithmic improvements for analysis of data-independent acquisition MS data.,"

Motivation

Data independent acquisition mass spectrometry has emerged as a reproducible and sensitive alternative in quantitative proteomics, where parsing the highly complex tandem mass spectra requires dedicated algorithms. Recently, targeted data extraction was proposed as a novel analysis strategy for this type of data, but it is important to further develop these concepts to provide quality-controlled, interference-adjusted and sensitive peptide quantification.

Results

We here present the algorithm DIANA and the classifier PyProphet, which are based on new probabilistic sub-scores to classify the chromatographic peaks in targeted data-independent acquisition data analysis. The algorithm is capable of providing accurate quantitative values and increased recall at a controlled false discovery rate, in a complex gold standard dataset. Importantly, we further demonstrate increased confidence gained by the use of two complementary data-independent acquisition targeted analysis algorithms, as well as increased numbers of quantified peptide precursors in complex biological samples.

Availability and implementation

DIANA is implemented in scala and python and available as open source (Apache 2.0 license) or pre-compiled binaries from http://quantitativeproteomics.org/diana. PyProphet can be installed from PyPi (https://pypi.python.org/pypi/pyprophet).

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26098815,From Ramachandran Maps to Tertiary Structures of Proteins.,"Sequence to structure of proteins is an unsolved problem. A possible coarse grained resolution to this entails specification of all the torsional (F, ) angles along the backbone of the polypeptide chain. The Ramachandran map quite elegantly depicts the allowed conformational (F, ) space of proteins which is still very large for the purposes of accurate structure generation. We have divided the allowed (F, ) space in Ramachandran maps into 27 distinct conformations sufficient to regenerate a structure to within 5 from the native, at least for small proteins, thus reducing the structure prediction problem to a specification of an alphanumeric string, i.e., the amino acid sequence together with one of the 27 conformations preferred by each amino acid residue. This still theoretically results in 27(n) conformations for a protein comprising """"n"""" amino acids. We then investigated the spatial correlations at the two-residue (dipeptide) and three-residue (tripeptide) levels in what may be described as higher order Ramachandran maps, with the premise that the allowed conformational space starts to shrink as we introduce neighborhood effects. We found, for instance, for a tripeptide which potentially can exist in any of the 27(3) """"allowed"""" conformations, three-fourths of these conformations are redundant to the 95% confidence level, suggesting sequence context dependent preferred conformations. We then created a look-up table of preferred conformations at the tripeptide level and correlated them with energetically favorable conformations. We found in particular that Boltzmann probabilities calculated from van der Waals energies for each conformation of tripeptides correlate well with the observed populations in the structural database (the average correlation coefficient is ~0.8). An alpha-numeric string and hence the tertiary structure can be generated for any sequence from the look-up table within minutes on a single processor and to a higher level of accuracy if secondary structure can be specified. We tested the methodology on 100 small proteins, and in 90% of the cases, a structure within 5 is recovered. We thus believe that the method presented here provides the missing link between Ramachandran maps and tertiary structures of proteins. A Web server to convert a tertiary structure to an alphanumeric string and to predict the tertiary structure from the sequence of a protein using the above methodology is created and made freely accessible at http://www.scfbio-iitd.res.in/software/proteomics/rm2ts.jsp.","hji,kes",0,0,0,2,0,NA,NA +33737684,"Proteomic blood profiling in mild, severe and critical COVID-19 patients.","The recent SARS-CoV-2 pandemic manifests itself as a mild respiratory tract infection in most individuals, leading to COVID-19 disease. However, in some infected individuals, this can progress to severe pneumonia and acute respiratory distress syndrome (ARDS), leading to multi-organ failure and death. This study explores the proteomic differences between mild, severe, and critical COVID-19 positive patients to further understand the disease progression, identify proteins associated with disease severity, and identify potential therapeutic targets. Blood protein profiling was performed on 59 COVID-19 mild (n = 26), severe (n = 9) or critical (n = 24) cases and 28 controls using the OLINK inflammation, autoimmune, cardiovascular and neurology panels. Differential expression analysis was performed within and between disease groups to generate nine different analyses. From the 368 proteins measured per individual, more than 75% were observed to be significantly perturbed in COVID-19 cases. Six proteins (IL6, CKAP4, Gal-9, IL-1ra, LILRB4 and PD-L1) were identified to be associated with disease severity. The results have been made readily available through an interactive web-based application for instant data exploration and visualization, and can be accessed at https://phidatalab-shiny.rosalind.kcl.ac.uk/COVID19/ . Our results demonstrate that dynamic changes in blood proteins associated with disease severity can potentially be used as early biomarkers to monitor disease severity in COVID-19 and serve as potential therapeutic targets.","hji,kes",0,0,0,2,0,NA,NA +21208982,Tabix: fast retrieval of sequence features from generic TAB-delimited files.,"

Unlabelled

Tabix is the first generic tool that indexes position sorted files in TAB-delimited formats such as GFF, BED, PSL, SAM and SQL export, and quickly retrieves features overlapping specified regions. Tabix features include few seek function calls per query, data compression with gzip compatibility and direct FTP/HTTP access. Tabix is implemented as a free command-line tool as well as a library in C, Java, Perl and Python. It is particularly useful for manually examining local genomic features on the command line and enables genome viewers to support huge data files and remote custom tracks over networks.

Availability and implementation

http://samtools.sourceforge.net.","hji,kes",0,0,0,2,0,NA,NA +21208984,Model selection in Bayesian segmentation of multiple DNA alignments.,"

Motivation

The analysis of multiple sequence alignments is allowing researchers to glean valuable insights into evolution, as well as identify genomic regions that may be functional, or discover novel classes of functional elements. Understanding the distribution of conservation levels that constitutes the evolutionary landscape is crucial to distinguishing functional regions from non-functional. Recent evidence suggests that a binary classification of evolutionary rates is inappropriate for this purpose and finds only highly conserved functional elements. Given that the distribution of evolutionary rates is multi-modal, determining the number of modes is of paramount concern. Through simulation, we evaluate the performance of a number of information criterion approaches derived from MCMC simulations in determining the dimension of a model.

Results

We utilize a deviance information criterion (DIC) approximation that is more robust than the approximations from other information criteria, and show our information criteria approximations do not produce superfluous modes when estimating conservation distributions under a variety of circumstances. We analyse the distribution of conservation for a multiple alignment comprising four primate species and mouse, and repeat this on two additional multiple alignments of similar species. We find evidence of six distinct classes of evolutionary rates that appear to be robust to the species used.

Availability

Source code and data are available at http://dl.dropbox.com/u/477240/changept.zip.","hji,kes",0,0,0,2,0,NA,data available but on dropbox +21226895,Shape-based peak identification for ChIP-Seq.,"

Background

The identification of binding targets for proteins using ChIP-Seq has gained popularity as an alternative to ChIP-chip. Sequencing can, in principle, eliminate artifacts associated with microarrays, and cheap sequencing offers the ability to sequence deeply and obtain a comprehensive survey of binding. A number of algorithms have been developed to call """"peaks"""" representing bound regions from mapped reads. Most current algorithms incorporate multiple heuristics, and despite much work it remains difficult to accurately determine individual peaks corresponding to distinct binding events.

Results

Our method for identifying statistically significant peaks from read coverage is inspired by the notion of persistence in topological data analysis and provides a non-parametric approach that is statistically sound and robust to noise in experiments. Specifically, our method reduces the peak calling problem to the study of tree-based statistics derived from the data. We validate our approach using previously published data and show that it can discover previously missed regions.

Conclusions

The difficulty in accurately calling peaks for ChIP-Seq data is partly due to the difficulty in defining peaks, and we demonstrate a novel method that improves on the accuracy of previous methods in resolving peaks. Our introduction of a robust statistical test based on ideas from topological data analysis is also novel. Our methods are implemented in a program called T-PIC (Tree shape Peak Identification for ChIP-Seq) is available at http://bio.math.berkeley.edu/tpic/.","hji,kes",0,0,0,2,0,NA,NA +21244646,easyDAS: automatic creation of DAS servers.,"

Background

The Distributed Annotation System (DAS) has proven to be a successful way to publish and share biological data. Although there are more than 750 active registered servers from around 50 organizations, setting up a DAS server comprises a fair amount of work, making it difficult for many research groups to share their biological annotations. Given the clear advantage that the generalized sharing of relevant biological data is for the research community it would be desirable to facilitate the sharing process.

Results

Here we present easyDAS, a web-based system enabling anyone to publish biological annotations with just some clicks. The system, available at http://www.ebi.ac.uk/panda-srv/easydas is capable of reading different standard data file formats, process the data and create a new publicly available DAS source in a completely automated way. The created sources are hosted on the EBI systems and can take advantage of its high storage capacity and network connection, freeing the data provider from any network management work. easyDAS is an open source project under the GNU LGPL license.

Conclusions

easyDAS is an automated DAS source creation system which can help many researchers in sharing their biological data, potentially increasing the amount of relevant biological data available to the scientific community.","hji,kes",0,0,0,2,0,NA,NA +21256977,AskHERMES: An online question answering system for complex clinical questions.,"

Objective

Clinical questions are often long and complex and take many forms. We have built a clinical question answering system named AskHERMES to perform robust semantic analysis on complex clinical questions and output question-focused extractive summaries as answers.

Design

This paper describes the system architecture and a preliminary evaluation of AskHERMES, which implements innovative approaches in question analysis, summarization, and answer presentation. Five types of resources were indexed in this system: MEDLINE abstracts, PubMed Central full-text articles, eMedicine documents, clinical guidelines and Wikipedia articles.

Measurement

We compared the AskHERMES system with Google (Google and Google Scholar) and UpToDate and asked physicians to score the three systems by ease of use, quality of answer, time spent, and overall performance.

Results

AskHERMES allows physicians to enter a question in a natural way with minimal query formulation and allows physicians to efficiently navigate among all the answer sentences to quickly meet their information needs. In contrast, physicians need to formulate queries to search for information in Google and UpToDate. The development of the AskHERMES system is still at an early stage, and the knowledge resource is limited compared with Google or UpToDate. Nevertheless, the evaluation results show that AskHERMES' performance is comparable to the other systems. In particular, when answering complex clinical questions, it demonstrates the potential to outperform both Google and UpToDate systems.

Conclusions

AskHERMES, available at http://www.AskHERMES.org, has the potential to help physicians practice evidence-based medicine and improve the quality of patient care.","hji,kes",0,0,0,2,0,NA,NA +21266443,An efficient hierarchical generalized linear mixed model for pathway analysis of genome-wide association studies.,"

Motivation

In genome-wide association studies (GWAS) of complex diseases, genetic variants having real but weak associations often fail to be detected at the stringent genome-wide significance level. Pathway analysis, which tests disease association with combined association signals from a group of variants in the same pathway, has become increasingly popular. However, because of the complexities in genetic data and the large sample sizes in typical GWAS, pathway analysis remains to be challenging. We propose a new statistical model for pathway analysis of GWAS. This model includes a fixed effects component that models mean disease association for a group of genes, and a random effects component that models how each gene's association with disease varies about the gene group mean, thus belongs to the class of mixed effects models.

Results

The proposed model is computationally efficient and uses only summary statistics. In addition, it corrects for the presence of overlapping genes and linkage disequilibrium (LD). Via simulated and real GWAS data, we showed our model improved power over currently available pathway analysis methods while preserving type I error rate. Furthermore, using the WTCCC Type 1 Diabetes (T1D) dataset, we demonstrated mixed model analysis identified meaningful biological processes that agreed well with previous reports on T1D. Therefore, the proposed methodology provides an efficient statistical modeling framework for systems analysis of GWAS.

Availability

The software code for mixed models analysis is freely available at http://biostat.mc.vanderbilt.edu/LilyWang.","hji,kes",0,0,0,2,0,NA,NA +21278116,"Predicting breed composition using breed frequencies of 50,000 markers from the US Meat Animal Research Center 2,000 Bull Project.","Knowledge of breed composition can be useful in multiple aspects of cattle production, and can be critical for analyzing the results of whole genome-wide association studies currently being conducted around the world. We examine the feasibility and accuracy of using genotype data from the most prevalent bovine genome-wide association studies platform, the Illumina BovineSNP50 array (Illumina Inc., San Diego, CA), to estimate breed composition for individual breeds of cattle. First, allele frequencies (of Illumina-defined allele B) of SNP on the array for each of 16 beef cattle breeds were defined by genotyping a large set of more than 2,000 bulls selected in cooperation with the respective breed associations to be representative of their breed. With these breed-specific allele frequencies, the breed compositions of approximately 2,000 two-, three-, and four-way cross (of 8 breeds) cattle produced at the US Meat Animal Research Center were predicted by using a simple multiple regression technique or Mendel (http://www.genetics.ucla.edu/software/mendel) and their genotypes from the Illumina BovineSNP50 array, and were then compared with pedigree-based estimates of breed composition. The accuracy of marker-based breed composition estimates was 89% when using either estimation method for all breeds except Angus and Red Angus (averaged 79%), based on comparing estimates with pedigree-based average breed composition. Accuracy increased to approximately 88% when these 2 breeds were combined into an aggregate Angus group. Additionally, we used a subset of these markers, approximately 3,000 that populate the Illumina Bovine3K (Illumina Inc.), to see whether breed composition could be estimated with similar accuracy when using this reduced panel of SNP makers. When breed composition was estimated using only SNP in common with the Bovine 3K array, accuracy was slightly reduced to 83%. These results suggest that SNP data from these arrays could be used to estimate breed composition in most US beef cattle in situations where pedigree is not known (e.g., multiple-sire natural service matings, non-source-verified animals in feedlots or at slaughter). This approach can aid analyses that depend on knowledge of breed composition, including identification and adjustment of breed-based population stratification, when performing genome-wide association studies on populations with incomplete pedigrees. In addition, SNP-based breed composition estimates may facilitate fitting cow germplasm to the environment, managing cattle in the feedlot, and tracing disease cases back to the geographic region or farm of origin.","hji,kes",0,0,0,2,0,NA,NA +21330288,Sensitive gene fusion detection using ambiguously mapping RNA-Seq read pairs.,"

Motivation

Paired-end whole transcriptome sequencing provides evidence for fusion transcripts. However, due to the repetitiveness of the transcriptome, many reads have multiple high-quality mappings. Previous methods to find gene fusions either ignored these reads or required additional longer single reads. This can obscure up to 30% of fusions and unnecessarily discards much of the data.

Results

We present a method for using paired-end reads to find fusion transcripts without requiring unique mappings or additional single read sequencing. Using simulated data and data from tumors and cell lines, we show that our method can find fusions with ambiguously mapping read pairs without generating numerous spurious fusions from the many mapping locations.

Availability

A C++ and Python implementation of the method demonstrated in this article is available at http://exon.ucsd.edu/ShortFuse.

Contact

mckinsel@ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +21345871,BiC: a web server for calculating bimodality of coexpression between gene and protein networks.,"

Unlabelled

Bimodal patterns of expression have recently been shown to be useful not only in prioritizing genes that distinguish phenotypes, but also in prioritizing network models that correlate with proteomic evidence. In particular, subgroups of strongly coexpressed gene pairs result in an increased variance of the correlation distribution. This variance, a measure of association between sets of genes (or proteins), can be summarized as the bimodality of coexpression (BiC). We developed an online tool to calculate the BiC for user-defined gene lists and associated mRNA expression data. BiC is a comprehensive application that provides researchers with the ability to analyze both publicly available and user-collected array data.

Availability

The freely available web service and the documentation can be accessed at http://gurkan.case.edu/software.

Contact

gurkan@case.edu.","hji,kes",0,0,0,2,0,NA,NA +21371586,"Preparation of protein samples for NMR structure, function, and small-molecule screening studies.","In this chapter, we concentrate on the production of high-quality protein samples for nuclear magnetic resonance (NMR) studies. In particular, we provide an in-depth description of recent advances in the production of NMR samples and their synergistic use with recent advancements in NMR hardware. We describe the protein production platform of the Northeast Structural Genomics Consortium and outline our high-throughput strategies for producing high-quality protein samples for NMR studies. Our strategy is based on the cloning, expression, and purification of 6-His-tagged proteins using T7-based Escherichia coli systems and isotope enrichment in minimal media. We describe 96-well ligation-independent cloning and analytical expression systems, parallel preparative scale fermentation, and high-throughput purification protocols. The 6-His affinity tag allows for a similar two-step purification procedure implemented in a parallel high-throughput fashion that routinely results in purity levels sufficient for NMR studies (>97% homogeneity). Using this platform, the protein open reading frames of over 17,500 different targeted proteins (or domains) have been cloned as over 28,000 constructs. Nearly 5000 of these proteins have been purified to homogeneity in tens of milligram quantities (see Summary Statistics, http://nesg.org/statistics.html), resulting in more than 950 new protein structures, including more than 400 NMR structures, deposited in the Protein Data Bank. The Northeast Structural Genomics Consortium pipeline has been effective in producing protein samples of both prokaryotic and eukaryotic origin. Although this chapter describes our entire pipeline for producing isotope-enriched protein samples, it focuses on the major updates introduced during the last 5 years (Phase 2 of the National Institute of General Medical Sciences Protein Structure Initiative). Our advanced automated and/or parallel cloning, expression, purification, and biophysical screening technologies are suitable for implementation in a large individual laboratory or by a small group of collaborating investigators for structural biology, functional proteomics, ligand screening, and structural genomics research.","hji,kes",0,0,0,2,0,NA,NA +21417267,Data-driven high-throughput prediction of the 3-D structure of small molecules: review and progress.,"Accurate prediction of the 3-D structure of small molecules is essential in order to understand their physical, chemical, and biological properties, including how they interact with other molecules. Here, we survey the field of high-throughput methods for 3-D structure prediction and set up new target specifications for the next generation of methods. We then introduce COSMOS, a novel data-driven prediction method that utilizes libraries of fragment and torsion angle parameters. We illustrate COSMOS using parameters extracted from the Cambridge Structural Database (CSD) by analyzing their distribution and then evaluating the system's performance in terms of speed, coverage, and accuracy. Results show that COSMOS represents a significant improvement when compared to state-of-the-art prediction methods, particularly in terms of coverage of complex molecular structures, including metal-organics. COSMOS can predict structures for 96.4% of the molecules in the CSD (99.6% organic, 94.6% metal-organic), whereas the widely used commercial method CORINA predicts structures for 68.5% (98.5% organic, 51.6% metal-organic). On the common subset of molecules predicted by both methods, COSMOS makes predictions with an average speed per molecule of 0.15 s (0.10 s organic, 0.21 s metal-organic) and an average rmsd of 1.57 (1.26 organic, 1.90 metal-organic), and CORINA makes predictions with an average speed per molecule of 0.13s (0.18s organic, 0.08s metal-organic) and an average rmsd of 1.60 (1.13 organic, 2.11 metal-organic). COSMOS is available through the ChemDB chemoinformatics Web portal at http://cdb.ics.uci.edu/ .","hji,kes",0,0,0,2,0,NA,NA +21435986,A potential causal association mining algorithm for screening adverse drug reactions in postmarketing surveillance.,"Early detection of unknown adverse drug reactions (ADRs) in postmarketing surveillance saves lives and prevents harmful consequences. We propose a novel data mining approach to signaling potential ADRs from electronic health databases. More specifically, we introduce potential causal association rules (PCARs) to represent the potential causal relationship between a drug and ICD-9 (CDC. (2010). International Classification of Diseases, Ninth Revision (ICD-9). [Online]. Available: http://www.cdc.gov/nchs/icd/icd9.html) coded signs or symptoms representing potential ADRs. Due to the infrequent nature of ADRs, the existing frequency-based data mining methods cannot effectively discover PCARs. We introduce a new interestingness measure, potential causal leverage, to quantify the degree of association of a PCAR. This measure is based on the computational, experience-based fuzzy recognition-primed decision (RPD) model that we developed previously (Y. Ji, R. M. Massanari, J. Ager, J. Yen, R. E. Miller, and H. Ying, """"A fuzzy logic-based computational recognition-primed decision model,"""" Inf. Sci., vol. 177, pp. 4338-4353, 2007) on the basis of the well-known, psychology-originated qualitative RPD model (G. A. Klein, """"A recognition-primed decision making model of rapid decision making,"""" in Decision Making in Action: Models and Methods, 1993, pp. 138-147). The potential causal leverage assesses the strength of the association of a drug-symptom pair given a collection of patient cases. To test our data mining approach, we retrieved electronic medical data for 16,206 patients treated by one or more than eight drugs of our interest at the Veterans Affairs Medical Center in Detroit between 2007 and 2009. We selected enalapril as the target drug for this ADR signal generation study. We used our algorithm to preliminarily evaluate the associations between enalapril and all the ICD-9 codes associated with it. The experimental results indicate that our approach has a potential to better signal potential ADRs than risk ratio and leverage, two traditional frequency-based measures. Among the top 50 signal pairs (i.e., enalapril versus symptoms) ranked by the potential causal-leverage measure, the physicians on the project determined that eight of them probably represent true causal associations.","hji,kes",0,0,0,2,0,NA,NA +21467251,Patient-specific radiation dose and cancer risk for pediatric chest CT.,"

Purpose

To estimate patient-specific radiation dose and cancer risk for pediatric chest computed tomography (CT) and to evaluate factors affecting dose and risk, including patient size, patient age, and scanning parameters.

Materials and methods

The institutional review board approved this study and waived informed consent. This study was HIPAA compliant. The study included 30 patients (0-16 years old), for whom full-body computer models were recently created from clinical CT data. A validated Monte Carlo program was used to estimate organ dose from eight chest protocols, representing clinically relevant combinations of bow tie filter, collimation, pitch, and tube potential. Organ dose was used to calculate effective dose and risk index (an index of total cancer incidence risk). The dose and risk estimates before and after normalization by volume-weighted CT dose index (CTDI(vol)) or dose-length product (DLP) were correlated with patient size and age. The effect of each scanning parameter was studied.

Results

Organ dose normalized by tube current-time product or CTDI(vol) decreased exponentially with increasing average chest diameter. Effective dose normalized by tube current-time product or DLP decreased exponentially with increasing chest diameter. Chest diameter was a stronger predictor of dose than weight and total scan length. Risk index normalized by tube current-time product or DLP decreased exponentially with both chest diameter and age. When normalized by DLP, effective dose and risk index were independent of collimation, pitch, and tube potential (<10% variation).

Conclusion

The correlations of dose and risk with patient size and age can be used to estimate patient-specific dose and risk. They can further guide the design and optimization of pediatric chest CT protocols.

Supplemental material

http://radiology.rsna.org/lookup/suppl/doi:10.1148/radiol.11101900/-/DC1.","hji,kes",0,0,0,2,0,NA,NA +21467568,Increasing power of genome-wide association studies by collecting additional single-nucleotide polymorphisms.,"Genome-wide association studies (GWASs) have been effectively identifying the genomic regions associated with a disease trait. In a typical GWAS, an informative subset of the single-nucleotide polymorphisms (SNPs), called tag SNPs, is genotyped in case/control individuals. Once the tag SNP statistics are computed, the genomic regions that are in linkage disequilibrium (LD) with the most significantly associated tag SNPs are believed to contain the causal polymorphisms. However, such LD regions are often large and contain many additional polymorphisms. Following up all the SNPs included in these regions is costly and infeasible for biological validation. In this article we address how to characterize these regions cost effectively with the goal of providing investigators a clear direction for biological validation. We introduce a follow-up study approach for identifying all untyped associated SNPs by selecting additional SNPs, called follow-up SNPs, from the associated regions and genotyping them in the original case/control individuals. We introduce a novel SNP selection method with the goal of maximizing the number of associated SNPs among the chosen follow-up SNPs. We show how the observed statistics of the original tag SNPs and human genetic variation reference data such as the HapMap Project can be utilized to identify the follow-up SNPs. We use simulated and real association studies based on the HapMap data and the Wellcome Trust Case Control Consortium to demonstrate that our method shows superior performance to the correlation- and distance-based traditional follow-up SNP selection approaches. Our method is publicly available at http://genetics.cs.ucla.edu/followupSNPs.","hji,kes",0,0,0,2,0,NA,NA +21493653,A robust and accurate binning algorithm for metagenomic sequences with arbitrary species abundance ratio.,"

Motivation

With the rapid development of next-generation sequencing techniques, metagenomics, also known as environmental genomics, has emerged as an exciting research area that enables us to analyze the microbial environment in which we live. An important step for metagenomic data analysis is the identification and taxonomic characterization of DNA fragments (reads or contigs) resulting from sequencing a sample of mixed species. This step is referred to as 'binning'. Binning algorithms that are based on sequence similarity and sequence composition markers rely heavily on the reference genomes of known microorganisms or phylogenetic markers. Due to the limited availability of reference genomes and the bias and low availability of markers, these algorithms may not be applicable in all cases. Unsupervised binning algorithms which can handle fragments from unknown species provide an alternative approach. However, existing unsupervised binning algorithms only work on datasets either with balanced species abundance ratios or rather different abundance ratios, but not both.

Results

In this article, we present MetaCluster 3.0, an integrated binning method based on the unsupervised top--down separation and bottom--up merging strategy, which can bin metagenomic fragments of species with very balanced abundance ratios (say 1:1) to very different abundance ratios (e.g. 1:24) with consistently higher accuracy than existing methods.

Availability

MetaCluster 3.0 can be downloaded at http://i.cs.hku.hk/~alse/MetaCluster/.","hji,kes",0,0,0,2,0,NA,NA +21496265,SNP-based pathway enrichment analysis for genome-wide association studies.,"

Background

Recently we have witnessed a surge of interest in using genome-wide association studies (GWAS) to discover the genetic basis of complex diseases. Many genetic variations, mostly in the form of single nucleotide polymorphisms (SNPs), have been identified in a wide spectrum of diseases, including diabetes, cancer, and psychiatric diseases. A common theme arising from these studies is that the genetic variations discovered by GWAS can only explain a small fraction of the genetic risks associated with the complex diseases. New strategies and statistical approaches are needed to address this lack of explanation. One such approach is the pathway analysis, which considers the genetic variations underlying a biological pathway, rather than separately as in the traditional GWAS studies. A critical challenge in the pathway analysis is how to combine evidences of association over multiple SNPs within a gene and multiple genes within a pathway. Most current methods choose the most significant SNP from each gene as a representative, ignoring the joint action of multiple SNPs within a gene. This approach leads to preferential identification of genes with a greater number of SNPs.

Results

We describe a SNP-based pathway enrichment method for GWAS studies. The method consists of the following two main steps: 1) for a given pathway, using an adaptive truncated product statistic to identify all representative (potentially more than one) SNPs of each gene, calculating the average number of representative SNPs for the genes, then re-selecting the representative SNPs of genes in the pathway based on this number; and 2) ranking all selected SNPs by the significance of their statistical association with a trait of interest, and testing if the set of SNPs from a particular pathway is significantly enriched with high ranks using a weighted Kolmogorov-Smirnov test. We applied our method to two large genetically distinct GWAS data sets of schizophrenia, one from European-American (EA) and the other from African-American (AA). In the EA data set, we found 22 pathways with nominal P-value less than or equal to 0.001 and corresponding false discovery rate (FDR) less than 5%. In the AA data set, we found 11 pathways by controlling the same nominal P-value and FDR threshold. Interestingly, 8 of these pathways overlap with those found in the EA sample. We have implemented our method in a JAVA software package, called SNP Set Enrichment Analysis (SSEA), which contains a user-friendly interface and is freely available at http://cbcl.ics.uci.edu/SSEA.

Conclusions

The SNP-based pathway enrichment method described here offers a new alternative approach for analysing GWAS data. By applying it to schizophrenia GWAS studies, we show that our method is able to identify statistically significant pathways, and importantly, pathways that can be replicated in large genetically distinct samples.","hji,kes",0,0,0,2,0,NA,NA +21546392,Boulder ALignment Editor (ALE): a web-based RNA alignment tool.,"

Summary

The explosion of interest in non-coding RNAs, together with improvements in RNA X-ray crystallography, has led to a rapid increase in RNA structures at atomic resolution from 847 in 2005 to 1900 in 2010. The success of whole-genome sequencing has led to an explosive growth of unaligned homologous sequences. Consequently, there is a compelling and urgent need for user-friendly tools for producing structure-informed RNA alignments. Most alignment software considers the primary sequence alone; some specialized alignment software can also include Watson-Crick base pairs, but none adequately addresses the needs introduced by the rapid influx of both sequence and structural data. Therefore, we have developed the Boulder ALignment Editor (ALE), which is a web-based RNA alignment editor, designed for editing and assessing alignments using structural information. Some features of BoulderALE include the annotation and evaluation of an alignment based on isostericity of Watson-Crick and non-Watson-Crick base pairs, along with the collapsing (horizontally and vertically) of the alignment, while maintaining the ability to edit the alignment.

Availability

http://www.microbio.me/boulderale.","hji,kes",0,0,0,2,0,NA,NA +21546400,Mixture models for analysis of the taxonomic composition of metagenomes.,"

Motivation

Inferring the taxonomic profile of a microbial community from a large collection of anonymous DNA sequencing reads is a challenging task in metagenomics. Because existing methods for taxonomic profiling of metagenomes are all based on the assignment of fragmentary sequences to phylogenetic categories, the accuracy of results largely depends on fragment length. This dependence complicates comparative analysis of data originating from different sequencing platforms or resulting from different preprocessing pipelines.

Results

We here introduce a new method for taxonomic profiling based on mixture modeling of the overall oligonucleotide distribution of a sample. Our results indicate that the mixture-based profiles compare well with taxonomic profiles obtained with other methods. However, in contrast to the existing methods, our approach shows a nearly constant profiling accuracy across all kinds of read lengths and it operates at an unrivaled speed.

Availability

A platform-independent implementation of the mixture modeling approach is available in terms of a MATLAB/Octave toolbox at http://gobics.de/peter/taxy. In addition, a prototypical implementation within an easy-to-use interactive tool for Windows can be downloaded.","hji,kes",0,0,0,2,0,NA,NA +21554765,Enhancing genome assemblies by integrating non-sequence based data.,"

Introduction

Many genome projects were underway before the advent of high-throughput sequencing and have thus been supported by a wealth of genome information from other technologies. Such information frequently takes the form of linkage and physical maps, both of which can provide a substantial amount of data useful in de novo sequencing projects. Furthermore, the recent abundance of genome resources enables the use of conserved synteny maps identified in related species to further enhance genome assemblies.

Methods

The tammar wallaby (Macropus eugenii) is a model marsupial mammal with a low coverage genome. However, we have access to extensive comparative maps containing over 14,000 markers constructed through the physical mapping of conserved loci, chromosome painting and comprehensive linkage maps. Using a custom Bioperl pipeline, information from the maps was aligned to assembled tammar wallaby contigs using BLAT. This data was used to construct pseudo paired-end libraries with intervals ranging from 5-10 MB. We then used Bambus (a program designed to scaffold eukaryotic genomes by ordering and orienting contigs through the use of paired-end data) to scaffold our libraries. To determine how map data compares to sequence based approaches to enhance assemblies, we repeated the experiment using a 0.5 coverage of unique reads from 4 KB and 8 KB Illumina paired-end libraries. Finally, we combined both the sequence and non-sequence-based data to determine how a combined approach could further enhance the quality of the low coverage de novo reconstruction of the tammar wallaby genome.

Results

Using the map data alone, we were able order 2.2% of the initial contigs into scaffolds, and increase the N50 scaffold size to 39 KB (36 KB in the original assembly). Using only the 0.5 paired-end sequence based data, 53% of the initial contigs were assigned to scaffolds. Combining both data sets resulted in a further 2% increase in the number of initial contigs integrated into a scaffold (55% total) but a 35% increase in N50 scaffold size over the use of sequence-based data alone.

Conclusions

We provide a relatively simple pipeline utilizing existing bioinformatics tools to integrate map data into a genome assembly which is available at http://www.mcb.uconn.edu/fac.php?name=paska. While the map data only contributed minimally to assigning the initial contigs to scaffolds in the new assembly, it greatly increased the N50 size. This process added structure to our low coverage assembly, greatly increasing its utility in further analyses.","hji,kes",0,0,0,2,0,NA,NA +21566560,"Summary of notifiable diseases: United States, 2009.","The Summary of Notifiable Diseases--- United States, 2009 contains the official statistics, in tabular and graphic form, for the reported occurrence of nationally notifiable infectious diseases in the United States for 2009. Unless otherwise noted, the data are final totals for 2009 reported as of June 30, 2010. These statistics are collected and compiled from reports sent by state health departments and territories to the National Notifiable Diseases Surveillance System (NNDSS), which is operated by CDC in collaboration with the Council of State and Territorial Epidemiologists (CSTE). The Summary is available at http://www.cdc.gov/mmwr/summary.html. This site also includes Summary publications from previous years.","hji,kes",0,0,0,2,0,NA,NA +21590677,Evaluation of the positional difference between two common geocoding methods.,"Geocoding, the process of matching addresses to geographic coordinates, is a necessary first step when using geographical information systems (GIS) technology. However, different geocoding methodologies can result in different geographic coordinates. The objective of this study was to compare the positional (i.e. longitude/latitude) difference between two common geocoding methods, i.e. ArcGIS (Environmental System Research Institute, Redlands, CA, USA) and Batchgeo (freely available online at http://www.batchgeo.com). Address data came from the YMCA-Harvard After School Food and Fitness Project, an obesity prevention intervention involving children aged 5-11 years and their families participating in YMCA-administered, after-school programmes located in four geographically diverse metropolitan areas in the USA. Our analyses include baseline addresses (n = 748) collected from the parents of the children in the after school sites. Addresses were first geocoded to the street level and assigned longitude and latitude coordinates with ArcGIS, version 9.3, then the same addresses were geocoded with Batchgeo. For this analysis, the ArcGIS minimum match score was 80. The resulting geocodes were projected into state plane coordinates, and the difference in longitude and latitude coordinates were calculated in meters between the two methods for all data points in each of the four metropolitan areas. We also quantified the descriptions of the geocoding accuracy provided by Batchgeo with the match scores from ArcGIS. We found a 94% match rate (n = 705), 2% (n = 18) were tied and 3% (n = 25) were unmatched using ArcGIS. Forty-eight addresses (6.4%) were not matched in ArcGIS with a match score =80 (therefore only 700 addresses were included in our positional difference analysis). Six hundred thirteen (87.6%) of these addresses had a match score of 100. Batchgeo yielded a 100% match rate for the addresses that ArcGIS geocoded. The median for longitude and latitude coordinates for all the data was just over 25 m. Overall, the range for longitude was 0.04-12,911.8 m, and the range for latitude was 0.02-37,766.6 m. Comparisons show minimal differences in the median and minimum values, while there were slightly larger differences in the maximum values. The majority (>75%) of the geographic differences were within 50 m of each other; mostly <25 m from each other (about 49%). Only about 4% overall were =400 m apart. We also found geographic differences in the proportion of addresses that fell within certain meter ranges. The match-score range associated with the Batchgeo accuracy level """"approximate"""" (least accurate) was 84-100 (mean = 92), while the """"rooftop"""" Batchgeo accuracy level (most accurate) delivered a mean of 98.9 but the range was the same. Although future research should compare the positional difference of Batchgeo to criterion measures of longitude/latitude (e.g. with global positioning system measurement), this study suggests that Batchgeo is a good, free-of-charge option to geocode addresses.","hji,kes",0,0,0,2,0,NA,NA +21600674,[EAU Guidelines on Urinary Incontinence].,"

Context

The first European Association of Urology (EAU) guidelines on incontinence were published in 2001. These guidelines were periodically updated in past years.

Objective

The aim of this paper is to present a summary of the 2009 update of the EAU guidelines on urinary incontinence (UI).

Evidence acquisition

The EAU working panel was part of the 4th International Consultation on Incontinence (ICI) and, with permission of the ICI, extracted the relevant data. The methodology of the 4th ICI was a comprehensive literature review by international experts and consensus formation. In addition, level of evidence was rated according to a modified Oxford system and grades of recommendation were given accordingly.

Evidence summary

A full version of the EAU guidelines on urinary incontinence is available as a printed document (extended and short form) and as a CD-ROM from the EAU office or online from the EAU Web site (http://www.uroweb.org/guidelines/online-guidelines/). The extent and invasiveness of assessment of UI depends on severity and/or complexity of symptoms and clinical signs and is different for men, women, frail older persons, children, and patients with neuropathy. At the level of initial management, basic diagnostic tests are applied to exclude an underlying disease or condition such as urinary tract infection. Treatment is mostly conservative (lifestyle interventions, physiotherapy, physical therapy, pharmacotherapy) and is of an empirical nature. At the level of specialised management (when primary therapy failed, diagnosis is unclear, or symptoms and/or signs are complex/severe),more elaborate assessment is generally required, including imaging, endoscopy, and urodynamics. Treatment options include invasive interventions and surgery.

Conclusions

Treatment options for UI are rapidly expanding. These EAU guidelines provide ratings of the evidence (guided by evidence-based medicine) and graded recommendations for the appropriate assessment and according treatment options and put them into clinical perspective.","hji,kes",0,0,0,2,0,NA,NA +21602510,"GProX, a user-friendly platform for bioinformatics analysis and visualization of quantitative proteomics data.","Recent technological advances have made it possible to identify and quantify thousands of proteins in a single proteomics experiment. As a result of these developments, the analysis of data has become the bottleneck of proteomics experiment. To provide the proteomics community with a user-friendly platform for comprehensive analysis, inspection and visualization of quantitative proteomics data we developed the Graphical Proteomics Data Explorer (GProX)(1). The program requires no special bioinformatics training, as all functions of GProX are accessible within its graphical user-friendly interface which will be intuitive to most users. Basic features facilitate the uncomplicated management and organization of large data sets and complex experimental setups as well as the inspection and graphical plotting of quantitative data. These are complemented by readily available high-level analysis options such as database querying, clustering based on abundance ratios, feature enrichment tests for e.g. GO terms and pathway analysis tools. A number of plotting options for visualization of quantitative proteomics data is available and most analysis functions in GProX create customizable high quality graphical displays in both vector and bitmap formats. The generic import requirements allow data originating from essentially all mass spectrometry platforms, quantitation strategies and software to be analyzed in the program. GProX represents a powerful approach to proteomics data analysis providing proteomics experimenters with a toolbox for bioinformatics analysis of quantitative proteomics data. The program is released as open-source and can be freely downloaded from the project webpage at http://gprox.sourceforge.net.","hji,kes",0,0,0,2,0,NA,NA +21609440,ParaHaplo 3.0: A program package for imputation and a haplotype-based whole-genome association study using hybrid parallel computing.,"

Background

Use of missing genotype imputations and haplotype reconstructions are valuable in genome-wide association studies (GWASs). By modeling the patterns of linkage disequilibrium in a reference panel, genotypes not directly measured in the study samples can be imputed and used for GWASs. Since millions of single nucleotide polymorphisms need to be imputed in a GWAS, faster methods for genotype imputation and haplotype reconstruction are required.

Results

We developed a program package for parallel computation of genotype imputation and haplotype reconstruction. Our program package, ParaHaplo 3.0, is intended for use in workstation clusters using the Intel Message Passing Interface. We compared the performance of ParaHaplo 3.0 on the Japanese in Tokyo, Japan and Han Chinese in Beijing, and Chinese in the HapMap dataset. A parallel version of ParaHaplo 3.0 can conduct genotype imputation 20 times faster than a non-parallel version of ParaHaplo.

Conclusions

ParaHaplo 3.0 is an invaluable tool for conducting haplotype-based GWASs. The need for faster genotype imputation and haplotype reconstruction using parallel computing will become increasingly important as the data sizes of such projects continue to increase. ParaHaplo executable binaries and program sources are available at http://en.sourceforge.jp/projects/parallelgwas/releases/.","hji,kes",0,0,0,2,0,NA,NA +21624157,How orthogonal are the OBO Foundry ontologies?,"

Background

Ontologies in biomedicine facilitate information integration, data exchange, search and query of biomedical data, and other critical knowledge-intensive tasks. The OBO Foundry is a collaborative effort to establish a set of principles for ontology development with the eventual goal of creating a set of interoperable reference ontologies in the domain of biomedicine. One of the key requirements to achieve this goal is to ensure that ontology developers reuse term definitions that others have already created rather than create their own definitions, thereby making the ontologies orthogonal.

Methods

We used a simple lexical algorithm to analyze the extent to which the set of OBO Foundry candidate ontologies identified from September 2009 to September 2010 conforms to this vision. Specifically, we analyzed (1) the level of explicit term reuse in this set of ontologies, (2) the level of overlap, where two ontologies define similar terms independently, and (3) how the levels of reuse and overlap changed during the course of this year.

Results

We found that 30% of the ontologies reuse terms from other Foundry candidates and 96% of the candidate ontologies contain terms that overlap with terms from the other ontologies. We found that while term reuse increased among the ontologies between September 2009 and September 2010, the level of overlap among the ontologies remained relatively constant. Additionally, we analyzed the six ontologies announced as OBO Foundry members on March 5, 2010, and identified that the level of overlap was extremely low, but, notably, so was the level of term reuse.

Conclusions

We have created a prototype web application that allows OBO Foundry ontology developers to see which classes from their ontologies overlap with classes from other ontologies in the OBO Foundry (http://obomap.bioontology.org). From our analysis, we conclude that while the OBO Foundry has made significant progress toward orthogonality during the period of this study through increased adoption of explicit term reuse, a large amount of overlap remains among these ontologies. Furthermore, the characteristics of the identified overlap, such as the terms it comprises and its distribution among the ontologies, indicate that the achieving orthogonality will be exceptionally difficult, if not impossible.","hji,kes",0,0,0,2,0,NA,for analysis only it seems +21685100,Piecewise linear approximation of protein structures using the principle of minimum message length.,"

Unlabelled

Simple and concise representations of protein-folding patterns provide powerful abstractions for visualizations, comparisons, classifications, searching and aligning structural data. Structures are often abstracted by replacing standard secondary structural features-that is, helices and strands of sheet-by vectors or linear segments. Relying solely on standard secondary structure may result in a significant loss of structural information. Further, traditional methods of simplification crucially depend on the consistency and accuracy of external methods to assign secondary structures to protein coordinate data. Although many methods exist automatically to identify secondary structure, the impreciseness of definitions, along with errors and inconsistencies in experimental structure data, drastically limit their applicability to generate reliable simplified representations, especially for structural comparison. This article introduces a mathematically rigorous algorithm to delineate protein structure using the elegant statistical and inductive inference framework of minimum message length (MML). Our method generates consistent and statistically robust piecewise linear explanations of protein coordinate data, resulting in a powerful and concise representation of the structure. The delineation is completely independent of the approaches of using hydrogen-bonding patterns or inspecting local substructural geometry that the current methods use. Indeed, as is common with applications of the MML criterion, this method is free of parameters and thresholds, in striking contrast to the existing programs which are often beset by them. The analysis of results over a large number of proteins suggests that the method produces consistent delineation of structures that encompasses, among others, the segments corresponding to standard secondary structure.

Availability

http://www.csse.monash.edu.au/~karun/pmml.","hji,kes",0,0,0,2,0,NA,NA +21703007,PlantPhos: using maximal dependence decomposition to identify plant phosphorylation sites with substrate site specificity.,"

Background

Protein phosphorylation catalyzed by kinases plays crucial regulatory roles in intracellular signal transduction. Due to the difficulty in performing high-throughput mass spectrometry-based experiment, there is a desire to predict phosphorylation sites using computational methods. However, previous studies regarding in silico prediction of plant phosphorylation sites lack the consideration of kinase-specific phosphorylation data. Thus, we are motivated to propose a new method that investigates different substrate specificities in plant phosphorylation sites.

Results

Experimentally verified phosphorylation data were extracted from TAIR9-a protein database containing 3006 phosphorylation data from the plant species Arabidopsis thaliana. In an attempt to investigate the various substrate motifs in plant phosphorylation, maximal dependence decomposition (MDD) is employed to cluster a large set of phosphorylation data into subgroups containing significantly conserved motifs. Profile hidden Markov model (HMM) is then applied to learn a predictive model for each subgroup. Cross-validation evaluation on the MDD-clustered HMMs yields an average accuracy of 82.4% for serine, 78.6% for threonine, and 89.0% for tyrosine models. Moreover, independent test results using Arabidopsis thaliana phosphorylation data from UniProtKB/Swiss-Prot show that the proposed models are able to correctly predict 81.4% phosphoserine, 77.1% phosphothreonine, and 83.7% phosphotyrosine sites. Interestingly, several MDD-clustered subgroups are observed to have similar amino acid conservation with the substrate motifs of well-known kinases from Phospho.ELM-a database containing kinase-specific phosphorylation data from multiple organisms.

Conclusions

This work presents a novel method for identifying plant phosphorylation sites with various substrate motifs. Based on cross-validation and independent testing, results show that the MDD-clustered models outperform models trained without using MDD. The proposed method has been implemented as a web-based plant phosphorylation prediction tool, PlantPhos http://csb.cse.yzu.edu.tw/PlantPhos/. Additionally, two case studies have been demonstrated to further evaluate the effectiveness of PlantPhos.","hji,kes",0,0,0,2,0,NA,NA +21708002,A mutation degree model for the identification of transcriptional regulatory elements.,"

Background

Current approaches for identifying transcriptional regulatory elements are mainly via the combination of two properties, the evolutionary conservation and the overrepresentation of functional elements in the promoters of co-regulated genes. Despite the development of many motif detection algorithms, the discovery of conserved motifs in a wide range of phylogenetically related promoters is still a challenge, especially for the short motifs embedded in distantly related gene promoters or very closely related promoters, or in the situation that there are not enough orthologous genes available.

Results

A mutation degree model is proposed and a new word counting method is developed for the identification of transcriptional regulatory elements from a set of co-expressed genes. The new method comprises two parts: 1) identifying overrepresented oligo-nucleotides in promoters of co-expressed genes, 2) estimating the conservation of the oligo-nucleotides in promoters of phylogenetically related genes by the mutation degree model. Compared with the performance of other algorithms, our method shows the advantages of low false positive rate and higher specificity, especially the robustness to noisy data. Applying the method to co-expressed gene sets from Arabidopsis, most of known cis-elements were successfully detected. The tool and example are available at http://mcube.nju.edu.cn/jwang/lab/soft/ocw/OCW.html.

Conclusions

The mutation degree model proposed in this paper is adapted to phylogenetic data of different qualities, and to a wide range of evolutionary distances. The new word-counting method based on this model has the advantage of better performance in detecting short sequence of cis-elements from co-expressed genes of eukaryotes and is robust to less complete phylogenetic data.","hji,kes",0,0,0,2,0,NA,NA +21712249,"CytoscapeRPC: a plugin to create, modify and query Cytoscape networks from scripting languages.","

Summary

CytoscapeRPC is a plugin for Cytoscape which allows users to create, query and modify Cytoscape networks from any programming language which supports XML-RPC. This enables them to access Cytoscape functionality and visualize their data interactively without leaving the programming environment with which they are familiar.

Availability

Install through the Cytoscape plugin manager or visit the web page: http://wiki.nbic.nl/index.php/CytoscapeRPC for the user tutorial and download.

Contact

j.j.bot@tudelft.nl; j.j.bot@tudelft.nl.","hji,kes",0,0,0,2,0,NA,NA +21742634,PREDA: an R-package to identify regional variations in genomic data.,"

Summary

Chromosomal patterns of genomic signals represent molecular fingerprints that may reveal how the local structural organization of a genome impacts the functional control mechanisms. Thus, the integrative analysis of multiple sources of genomic data and information deepens the resolution and enhances the interpretation of stand-alone high-throughput data. In this note, we present PREDA (Position RElated Data Analysis), an R package for detecting regional variations in genomics data. PREDA identifies relevant chromosomal patterns in high-throughput data using a smoothing approach that accounts for distance and density variability of genomics features. Custom-designed data structures allow efficiently managing diverse signals in different genomes. A variety of smoothing functions and statistics empower flexible and robust workflows. The modularity of package design allows an easy deployment of custom analytical pipelines. Tabular and graphical representations facilitate downstream biological interpretation of results.

Availability

PREDA is available in Bioconductor and at http://www.xlab.unimo.it/PREDA.

Contact

silvio.bicciato@unimore.it

Supplementary information

Supplementary information is available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +21750706,GenExp: an interactive web-based genomic DAS client with client-side data rendering.,"

Background

The Distributed Annotation System (DAS) offers a standard protocol for sharing and integrating annotations on biological sequences. There are more than 1000 DAS sources available and the number is steadily increasing. Clients are an essential part of the DAS system and integrate data from several independent sources in order to create a useful representation to the user. While web-based DAS clients exist, most of them do not have direct interaction capabilities such as dragging and zooming with the mouse.

Results

Here we present GenExp, a web based and fully interactive visual DAS client. GenExp is a genome oriented DAS client capable of creating informative representations of genomic data zooming out from base level to complete chromosomes. It proposes a novel approach to genomic data rendering and uses the latest HTML5 web technologies to create the data representation inside the client browser. Thanks to client-side rendering most position changes do not need a network request to the server and so responses to zooming and panning are almost immediate. In GenExp it is possible to explore the genome intuitively moving it with the mouse just like geographical map applications. Additionally, in GenExp it is possible to have more than one data viewer at the same time and to save the current state of the application to revisit it later on.

Conclusions

GenExp is a new interactive web-based client for DAS and addresses some of the short-comings of the existing clients. It uses client-side data rendering techniques resulting in easier genome browsing and exploration. GenExp is open source under the GPL license and it is freely available at http://gralggen.lsi.upc.edu/recerca/genexp.","hji,kes",0,0,0,2,0,NA,NA +21751374,MassChroQ: a versatile tool for mass spectrometry quantification.,"Recently, many software tools have been developed to perform quantification in LC-MS analyses. However, most of them are specific to either a quantification strategy (e.g. label-free or isotopic labelling) or a mass-spectrometry system (e.g. high or low resolution). In this context, we have developed MassChroQ (Mass Chromatogram Quantification), a versatile software that performs LC-MS data alignment and peptide quantification by peak area integration on extracted ion chromatograms. MassChroQ is suitable for quantification with or without labelling and is not limited to high-resolution systems. Peptides of interest (for example all the identified peptides) can be determined automatically, or manually by providing targeted m/z and retention time values. It can handle large experiments that include protein or peptide fractionation (as SDS-PAGE, 2-D LC). It is fully configurable. Every processing step is traceable, the produced data are in open standard formats and its modularity allows easy integration into proteomic pipelines. The output results are ready for use in statistical analyses. Evaluation of MassChroQ on complex label-free data obtained from low and high-resolution mass spectrometers showed low CVs for technical reproducibility (1.4%) and high coefficients of correlation to protein quantity (0.98). MassChroQ is freely available under the GNU General Public Licence v3.0 at http://pappso.inra.fr/bioinfo/masschroq/.","hji,kes",0,0,0,2,0,NA,NA +21752802,"RxnFinder: biochemical reaction search engines using molecular structures, molecular fragments and reaction similarity.","

Summary

Biochemical reactions play a key role to help sustain life and allow cells to grow. RxnFinder was developed to search biochemical reactions from KEGG reaction database using three search criteria: molecular structures, molecular fragments and reaction similarity. RxnFinder is helpful to get reference reactions for biosynthesis and xenobiotics metabolism.

Availability

RxnFinder is freely available via: http://sdd.whu.edu.cn/rxnfinder.

Contact

qnhu@whu.edu.cn.","hji,kes",0,0,0,2,0,NA,NA +21756356,Statistical mutation calling from sequenced overlapping DNA pools in TILLING experiments.,"

Background

TILLING (Targeting induced local lesions IN genomes) is an efficient reverse genetics approach for detecting induced mutations in pools of individuals. Combined with the high-throughput of next-generation sequencing technologies, and the resolving power of overlapping pool design, TILLING provides an efficient and economical platform for functional genomics across thousands of organisms.

Results

We propose a probabilistic method for calling TILLING-induced mutations, and their carriers, from high throughput sequencing data of overlapping population pools, where each individual occurs in two pools. We assign a probability score to each sequence position by applying Bayes' Theorem to a simplified binomial model of sequencing error and expected mutations, taking into account the coverage level. We test the performance of our method on variable quality, high-throughput sequences from wheat and rice mutagenized populations.

Conclusions

We show that our method effectively discovers mutations in large populations with sensitivity of 92.5% and specificity of 99.8%. It also outperforms existing SNP detection methods in detecting real mutations, especially at higher levels of coverage variability across sequenced pools, and in lower quality short reads sequence data. The implementation of our method is available from: http://www.cs.ucdavis.edu/filkov/CAMBa/.","hji,kes",0,0,0,2,0,NA,NA +21779367,High-resolution quantification of focal adhesion spatiotemporal dynamics in living cells.,"Focal adhesions (FAs) are macromolecular complexes that provide a linkage between the cell and its external environment. In a motile cell, focal adhesions change size and position to govern cell migration, through the dynamic processes of assembly and disassembly. To better understand the dynamic regulation of focal adhesions, we have developed an analysis system for the automated detection, tracking, and data extraction of these structures in living cells. This analysis system was used to quantify the dynamics of fluorescently tagged Paxillin and FAK in NIH 3T3 fibroblasts followed via Total Internal Reflection Fluorescence Microscopy (TIRF). High content time series included the size, shape, intensity, and position of every adhesion present in a living cell. These properties were followed over time, revealing adhesion lifetime and turnover rates, and segregation of properties into distinct zones. As a proof-of-concept, we show how a single point mutation in Paxillin at the Jun-kinase phosphorylation site Serine 178 changes FA size, distribution, and rate of assembly. This study provides a detailed, quantitative picture of FA spatiotemporal dynamics as well as a set of tools and methodologies for advancing our understanding of how focal adhesions are dynamically regulated in living cells. A full, open-source software implementation of this pipeline is provided at http://gomezlab.bme.unc.edu/tools.","hji,kes",0,0,0,2,0,NA,NA +21789500,NeuroNames: an ontology for the BrainInfo portal to neuroscience on the web.,"BrainInfo ( http://braininfo.org ) is a growing portal to neuroscientific information on the Web. It is indexed by NeuroNames, an ontology designed to compensate for ambiguities in neuroanatomical nomenclature. The 20-year old ontology continues to evolve toward the ideal of recognizing all names of neuroanatomical entities and accommodating all structural concepts about which neuroscientists communicate, including multiple concepts of entities for which neuroanatomists have yet to determine the best or 'true' conceptualization. To make the definitions of structural concepts unambiguous and terminologically consistent we created a 'default vocabulary' of unique structure names selected from existing terminology. We selected standard names by criteria designed to maximize practicality for use in verbal communication as well as computerized knowledge management. The ontology of NeuroNames accommodates synonyms and homonyms of the standard terms in many languages. It defines complex structures as models composed of primary structures, which are defined in unambiguous operational terms. NeuroNames currently relates more than 16,000 names in eight languages to some 2,500 neuroanatomical concepts. The ontology is maintained in a relational database with three core tables: Names, Concepts and Models. BrainInfo uses NeuroNames to index information by structure, to interpret users' queries and to clarify terminology on remote web pages. NeuroNames is a resource vocabulary of the NLM's Unified Medical Language System (UMLS, 2011) and the basis for the brain regions component of NIFSTD (NeuroLex, 2011). The current version has been downloaded to hundreds of laboratories for indexing data and linking to BrainInfo, which attracts some 400 visitors/day, downloading 2,000 pages/day.","hji,kes",0,0,0,2,0,NA,NA +21791534,A dynamic programming algorithm for identification of triplex-forming sequences.,"

Motivation

Current methods for identification of potential triplex-forming sequences in genomes and similar sequence sets rely primarily on detecting homopurine and homopyrimidine tracts. Procedures capable of detecting sequences supporting imperfect, but structurally feasible intramolecular triplex structures are needed for better sequence analysis.

Results

We modified an algorithm for detection of approximate palindromes, so as to account for the special nature of triplex DNA structures. From available literature, we conclude that approximate triplexes tolerate two classes of errors. One, analogical to mismatches in duplex DNA, involves nucleotides in triplets that do not readily form Hoogsteen bonds. The other class involves geometrically incompatible neighboring triplets hindering proper alignment of strands for optimal hydrogen bonding and stacking. We tested the statistical properties of the algorithm, as well as its correctness when confronted with known triplex sequences. The proposed algorithm satisfactorily detects sequences with intramolecular triplex-forming potential. Its complexity is directly comparable to palindrome searching.

Availability

Our implementation of the algorithm is available at http://www.fi.muni.cz/lexa/triplex as source code and a web-based search tool. The source code compiles into a library providing searching capability to other programs, as well as into a stand-alone command-line application based on this library.

Contact

lexa@fi.muni.cz

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +21798033,Visualizing meta-features in proteomic maps.,"

Background

The steps of a high-throughput proteomics experiment include the separation, differential expression and mass spectrometry-based identification of proteins. However, the last and more challenging step is inferring the biological role of the identified proteins through their association with interaction networks, biological pathways, analysis of the effect of post-translational modifications, and other protein-related information.

Results

In this paper, we present an integrative visualization methodology that allows combining experimentally produced proteomic features with protein meta-features, typically coming from meta-analysis tools and databases, in synthetic Proteomic Feature Maps. Using three proteomics analysis scenarios, we show that the proposed visualization approach is effective in filtering, navigating and interacting with the proteomics data in order to address visually challenging biological questions. The novelty of our approach lies in the ease of integration of any user-defined proteomic features in easy-to-comprehend visual representations that resemble the familiar 2D-gel images, and can be adapted to the user's needs. The main capabilities of the developed VIP software, which implements the presented visualization methodology, are also highlighted and discussed.

Conclusions

By using this visualization and the associated VIP software, researchers can explore a complex heterogeneous proteomics dataset from different perspectives in order to address visually important biological queries and formulate new hypotheses for further investigation. VIP is freely available at http://pelopas.uop.gr/~egian/VIP/index.html.","hji,kes",0,0,0,2,0,NA,NA +21828087,"OrganismTagger: detection, normalization and grounding of organism entities in biomedical documents.","

Motivation

Semantic tagging of organism mentions in full-text articles is an important part of literature mining and semantic enrichment solutions. Tagged organism mentions also play a pivotal role in disambiguating other entities in a text, such as proteins. A high-precision organism tagging system must be able to detect the numerous forms of organism mentions, including common names as well as the traditional taxonomic groups: genus, species and strains. In addition, such a system must resolve abbreviations and acronyms, assign the scientific name and if possible link the detected mention to the NCBI Taxonomy database for further semantic queries and literature navigation.

Results

We present the OrganismTagger, a hybrid rule-based/machine learning system to extract organism mentions from the literature. It includes tools for automatically generating lexical and ontological resources from a copy of the NCBI Taxonomy database, thereby facilitating system updates by end users. Its novel ontology-based resources can also be reused in other semantic mining and linked data tasks. Each detected organism mention is normalized to a canonical name through the resolution of acronyms and abbreviations and subsequently grounded with an NCBI Taxonomy database ID. In particular, our system combines a novel machine-learning approach with rule-based and lexical methods for detecting strain mentions in documents. On our manually annotated OT corpus, the OrganismTagger achieves a precision of 95%, a recall of 94% and a grounding accuracy of 97.5%. On the manually annotated corpus of Linnaeus-100, the results show a precision of 99%, recall of 97% and grounding accuracy of 97.4%.

Availability

The OrganismTagger, including supporting tools, resources, training data and manual annotations, as well as end user and developer documentation, is freely available under an open-source license at http://www.semanticsoftware.info/organism-tagger.

Contact

witte@semanticsoftware.info.","hji,kes",0,0,0,2,0,NA,NA +21846404,Prediction of conformational B-cell epitopes from 3D structures by random forests with a distance-based feature.,"

Background

Antigen-antibody interactions are key events in immune system, which provide important clues to the immune processes and responses. In Antigen-antibody interactions, the specific sites on the antigens that are directly bound by the B-cell produced antibodies are well known as B-cell epitopes. The identification of epitopes is a hot topic in bioinformatics because of their potential use in the epitope-based drug design. Although most B-cell epitopes are discontinuous (or conformational), insufficient effort has been put into the conformational epitope prediction, and the performance of existing methods is far from satisfaction.

Results

In order to develop the high-accuracy model, we focus on some possible aspects concerning the prediction performance, including the impact of interior residues, different contributions of adjacent residues, and the imbalanced data which contain much more non-epitope residues than epitope residues. In order to address above issues, we take following strategies. Firstly, a concept of 'thick surface patch' instead of 'surface patch' is introduced to describe the local spatial context of each surface residue, which considers the impact of interior residue. The comparison between the thick surface patch and the surface patch shows that interior residues contribute to the recognition of epitopes. Secondly, statistical significance of the distance distribution difference between non-epitope patches and epitope patches is observed, thus an adjacent residue distance feature is presented, which reflects the unequal contributions of adjacent residues to the location of binding sites. Thirdly, a bootstrapping and voting procedure is adopted to deal with the imbalanced dataset. Based on the above ideas, we propose a new method to identify the B-cell conformational epitopes from 3D structures by combining conventional features and the proposed feature, and the random forest (RF) algorithm is used as the classification engine. The experiments show that our method can predict conformational B-cell epitopes with high accuracy. Evaluated by leave-one-out cross validation (LOOCV), our method achieves the mean AUC value of 0.633 for the benchmark bound dataset, and the mean AUC value of 0.654 for the benchmark unbound dataset. When compared with the state-of-the-art prediction models in the independent test, our method demonstrates comparable or better performance.

Conclusions

Our method is demonstrated to be effective for the prediction of conformational epitopes. Based on the study, we develop a tool to predict the conformational epitopes from 3D structures, available at http://code.google.com/p/my-project-bpredictor/downloads/list.","hji,kes",0,0,0,2,0,NA,NA +21857117,Eu-Detect: an algorithm for detecting eukaryotic sequences in metagenomic data sets.,"Physical partitioning techniques are routinely employed (during sample preparation stage) for segregating the prokaryotic and eukaryotic fractions of metagenomic samples. In spite of these efforts, several metagenomic studies focusing on bacterial and archaeal populations have reported the presence of contaminating eukaryotic sequences in metagenomic data sets. Contaminating sequences originate not only from genomes of micro-eukaryotic species but also from genomes of (higher) eukaryotic host cells. The latter scenario usually occurs in the case of host-associated metagenomes. Identification and removal of contaminating sequences is important, since these sequences not only impact estimates of microbial diversity but also affect the accuracy of several downstream analyses. Currently, the computational techniques used for identifying contaminating eukaryotic sequences, being alignment based, are slow, inefficient, and require huge computing resources. In this article, we present Eu-Detect, an alignment-free algorithm that can rapidly identify eukaryotic sequences contaminating metagenomic data sets. Validation results indicate that on a desktop with modest hardware specifications, the Eu-Detect algorithm is able to rapidly segregate DNA sequence fragments of prokaryotic and eukaryotic origin, with high sensitivity. A Web server for the Eu-Detect algorithm is available at http://metagenomics.atc.tcs.com/Eu-Detect/.","hji,kes",0,0,0,2,0,NA,NA +21873327,Finding recurrent copy number alterations preserving within-sample homogeneity.,"

Motivation

Copy number alterations (CNAs) represent an important component of genetic variation and play a significant role in many human diseases. Development of array comparative genomic hybridization (aCGH) technology has made it possible to identify CNAs. Identification of recurrent CNAs represents the first fundamental step to provide a list of genomic regions which form the basis for further biological investigations. The main problem in recurrent CNAs discovery is related to the need to distinguish between functional changes and random events without pathological relevance. Within-sample homogeneity represents a common feature of copy number profile in cancer, so it can be used as additional source of information to increase the accuracy of the results. Although several algorithms aimed at the identification of recurrent CNAs have been proposed, no attempt of a comprehensive comparison of different approaches has yet been published.

Results

We propose a new approach, called Genomic Analysis of Important Alterations (GAIA), to find recurrent CNAs where a statistical hypothesis framework is extended to take into account within-sample homogeneity. Statistical significance and within-sample homogeneity are combined into an iterative procedure to extract the regions that likely are involved in functional changes. Results show that GAIA represents a valid alternative to other proposed approaches. In addition, we perform an accurate comparison by using two real aCGH datasets and a carefully planned simulation study.

Availability

GAIA has been implemented as R/Bioconductor package. It can be downloaded from the following page http://bioinformatics.biogem.it/download/gaia.

Contact

ceccarelli@unisannio.it; morganella@unisannio.it.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +21896882,Functional network construction in Arabidopsis using rule-based machine learning on large-scale data sets.,"The meta-analysis of large-scale postgenomics data sets within public databases promises to provide important novel biological knowledge. Statistical approaches including correlation analyses in coexpression studies of gene expression have emerged as tools to elucidate gene function using these data sets. Here, we present a powerful and novel alternative methodology to computationally identify functional relationships between genes from microarray data sets using rule-based machine learning. This approach, termed """"coprediction,"""" is based on the collective ability of groups of genes co-occurring within rules to accurately predict the developmental outcome of a biological system. We demonstrate the utility of coprediction as a powerful analytical tool using publicly available microarray data generated exclusively from Arabidopsis thaliana seeds to compute a functional gene interaction network, termed Seed Co-Prediction Network (SCoPNet). SCoPNet predicts functional associations between genes acting in the same developmental and signal transduction pathways irrespective of the similarity in their respective gene expression patterns. Using SCoPNet, we identified four novel regulators of seed germination (ALTERED SEED GERMINATION5, 6, 7, and 8), and predicted interactions at the level of transcript abundance between these novel and previously described factors influencing Arabidopsis seed germination. An online Web tool to query SCoPNet has been developed as a community resource to dissect seed biology and is available at http://www.vseed.nottingham.ac.uk/.","hji,kes",0,0,0,2,0,NA,NA +21903624,Finding stable local optimal RNA secondary structures.,"

Motivation

Many RNAs, such as riboswitches, can fold into multiple alternate structures and perform different biological functions. These biologically functional structures usually have low free energies in their local energy landscapes and are very stable such that they cannot easily jump out of the current states and fold into other stable conformations. The conformational space of feasible RNA secondary structures is prohibitively large, and accurate prediction of functional structure conformations is challenging. Because the stability of an RNA secondary structure is determined predominantly by energetically favorable helical regions (stacks), we propose to use configurations of putative stacks to represent RNA secondary structures. By considering a reduced conformational space of local optimal stack configurations instead of all feasible RNA structures, we first present an algorithm for enumerating all possible local optimal stack configurations. In addition, we present a fast heuristic algorithm for approximating energy barriers encountered during folding pathways between each pair of local optimal stack configurations and finding all the stable local optimal structures.

Results

Benchmark tests have been conducted on several RNA riboswitches, whose alternate secondary structures have been experimentally verified. The benchmark results show that our method can successfully predict the native 'on' and 'off' secondary structures, and better rank them compared with other state-of-art approaches.

Availability

The software is freely available and can be downloaded at http://genome.ucf.edu/RNASLOpt.

Contact

shzhang@eecs.ucf.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +21903632,Protein-protein binding affinity prediction on a diverse set of structures.,"

Motivation

Accurate binding free energy functions for protein-protein interactions are imperative for a wide range of purposes. Their construction is predicated upon ascertaining the factors that influence binding and their relative importance. A recent benchmark of binding affinities has allowed, for the first time, the evaluation and construction of binding free energy models using a diverse set of complexes, and a systematic assessment of our ability to model the energetics of conformational changes.

Results

We construct a large set of molecular descriptors using commonly available tools, introducing the use of energetic factors associated with conformational changes and disorder to order transitions, as well as features calculated on structural ensembles. The descriptors are used to train and test a binding free energy model using a consensus of four machine learning algorithms, whose performance constitutes a significant improvement over the other state of the art empirical free energy functions tested. The internal workings of the learners show how the descriptors are used, illuminating the determinants of protein-protein binding.

Availability

The molecular descriptor set and descriptor values for all complexes are available in the Supplementary Material. A web server for the learners and coordinates for the bound and unbound structures can be accessed from the website: http://bmm.cancerresearchuk.org/~Affinity.

Contact

paul.bates@cancer.org.uk.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +21985277,A model-based circular binary segmentation algorithm for the analysis of array CGH data.,"

Background

Circular Binary Segmentation (CBS) is a permutation-based algorithm for array Comparative Genomic Hybridization (aCGH) data analysis. CBS accurately segments data by detecting change-points using a maximal-t test; but extensive computational burden is involved for evaluating the significance of change-points using permutations. A recent implementation utilizing a hybrid method and early stopping rules (hybrid CBS) to improve the performance in speed was subsequently proposed. However, a time analysis revealed that a major portion of computation time of the hybrid CBS was still spent on permutation. In addition, what the hybrid method provides is an approximation of the significance upper bound or lower bound, not an approximation of the significance of change-points itself.

Results

We developed a novel model-based algorithm, extreme-value based CBS (eCBS), which limits permutations and provides robust results without loss of accuracy. Thousands of aCGH data under null hypothesis were simulated in advance based on a variety of non-normal assumptions, and the corresponding maximal-t distribution was modeled by the Generalized Extreme Value (GEV) distribution. The modeling results, which associate characteristics of aCGH data to the GEV parameters, constitute lookup tables (eXtreme model). Using the eXtreme model, the significance of change-points could be evaluated in a constant time complexity through a table lookup process.

Conclusions

A novel algorithm, eCBS, was developed in this study. The current implementation of eCBS consistently outperforms the hybrid CBS 4 to 20 in computation time without loss of accuracy. Source codes, supplementary materials, supplementary figures, and supplementary tables can be found at http://ntumaps.cgm.ntu.edu.tw/eCBSsupplementary.","hji,kes",0,0,0,2,0,NA,NA +21988420,"Pain, analgesia and genetics.","

Objectives

In the clinical setting, there is marked intersubject variability in the intensity of pain reported by patients with apparently similar pain states, as well as widely differing analgesic dosing requirements between individuals to produce satisfactory pain relief with tolerable side-effects. Genetic and environmental factors as well as their interaction are implicated, and these are discussed in this review.

Key findings

Pioneering work undertaken in mice more than a decade ago, showed a strong genetic contribution to levels of nociception/hypersensitivity as well as levels of antinociception produced by commonly available analgesic agents. To date more than 300 candidate 'pain' genes have been identified as potentially contributing to heritable differences in pain sensitivity and analgesic responsiveness in animals and humans, with this information available in a publicly accessible database http://www.jbldesign.com/jmogil/enter.html. Since then, many genetic association studies have been conducted in humans to investigate the possibility that single nucleotide polymorphisms (SNPs) in an individual gene may explain drug inefficacy or excessive toxicity experienced by a small subset of the whole population who have the rare allele for a particular SNP.

Summary

Despite the fact that SNPs in more than 20 genes that affect pain sensitivity or contribute to interindividual variability in responses to analgesic medications have been identified in the human genome, much of the data is conflicting. Apart from deficiencies in the design and conduct of human genetic association studies, recent research from other fields has implicated epigenetic mechanisms that facilitate dynamic gene-environment communication, as a possible explanation.","hji,kes",0,0,0,2,0,NA,not about the resource +21998153,Fast scaffolding with small independent mixed integer programs.,"

Motivation

Assembling genomes from short read data has become increasingly popular, but the problem remains computationally challenging especially for larger genomes. We study the scaffolding phase of sequence assembly where preassembled contigs are ordered based on mate pair data.

Results

We present MIP Scaffolder that divides the scaffolding problem into smaller subproblems and solves these with mixed integer programming. The scaffolding problem can be represented as a graph and the biconnected components of this graph can be solved independently. We present a technique for restricting the size of these subproblems so that they can be solved accurately with mixed integer programming. We compare MIP Scaffolder to two state of the art methods, SOPRA and SSPACE. MIP Scaffolder is fast and produces better or as good scaffolds as its competitors on large genomes.

Availability

The source code of MIP Scaffolder is freely available at http://www.cs.helsinki.fi/u/lmsalmel/mip-scaffolder/.

Contact

leena.salmela@cs.helsinki.fi.","hji,kes",0,0,0,2,0,NA,NA +21998156,Extraction of data deposition statements from the literature: a method for automatically tracking research results.,"

Motivation

Research in the biomedical domain can have a major impact through open sharing of the data produced. For this reason, it is important to be able to identify instances of data production and deposition for potential re-use. Herein, we report on the automatic identification of data deposition statements in research articles.

Results

We apply machine learning algorithms to sentences extracted from full-text articles in PubMed Central in order to automatically determine whether a given article contains a data deposition statement, and retrieve the specific statements. With an Support Vector Machine classifier using conditional random field determined deposition features, articles containing deposition statements are correctly identified with 81% F-measure. An error analysis shows that almost half of the articles classified as containing a deposition statement by our method but not by the gold standard do indeed contain a deposition statement. In addition, our system was used to process articles in PubMed Central, predicting that a total of 52 932 articles report data deposition, many of which are not currently included in the Secondary Source Identifier [si] field for MEDLINE citations.

Availability

All annotated datasets described in this study are freely available from the NLM/NCBI website at http://www.ncbi.nlm.nih.gov/CBBresearch/Fellows/Neveol/DepositionDataSets.zip

Contact

aurelie.neveol@nih.gov; john.wilbur@nih.gov; zhiyong.lu@nih.gov

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,zip +22014078,"Inputs to quality: supervision, management, and community involvement in health facilities in Egypt in 2004.","

Background

As low- and middle-income countries experience economic development, ensuring quality of health care delivery is a central component of health reform. Nevertheless, health reforms in low- and middle-income countries have focused more on access to services rather than the quality of these services, and reporting on quality has been limited. In the present study, we sought to examine the prevalence and regional variation in key management practices in Egyptian health facilities within three domains: supervision of the facility from the Ministry of Health and Population (MOHP), managerial processes, and patient and community involvement in care.

Methods

We conducted a cross-sectional analysis of data from 559 facilities surveyed with the Egyptian Service Provision Assessment (ESPA) survey in 2004, the most recent such survey in Egypt. We registered on the Measure Demographic and Health Survey (DHS) website http://legacy.measuredhs.com/login.cfm to gain access to the survey data. From the ESPA sampled 559 MOHP facilities, we excluded a total of 79 facilities because they did not offer facility-based 24-hour care or have at least one physician working in the facility, resulting in a final sample of 480 facilities. The final sample included 76 general service hospitals, 307 rural health units, and 97 maternal and child health and urban health units (MCH/urban units). We used standard frequency analyses to describe facility characteristics and tested the statistical significance of regional differences using chi-square statistics.

Results

Nearly all facilities reported having external supervision within the 6 months preceding the interview. In contrast, key facility-level managerial processes, such as having routine and documented management meetings and applying quality assurance approaches, were uncommon. Involvement of communities and patients was also reported in a minority of facilities. Hospitals and health units located in Urban Egypt compared with more rural parts of Egypt were significantly more likely to have management committees that met at least monthly, to keep official records of the meetings, and to have an approach for reviewing quality assurance activities.

Conclusions

Although the data precede the recent reform efforts of the MOHP, they provide a baseline against which future progress can be measured. Targeted efforts to improve facility-level management are critical to supporting quality improvement initiatives directed at improving the quality of health care throughout the country.","hji,kes",0,0,0,2,0,NA,NA +22014236,An integrated workflow for robust alignment and simplified quantitative analysis of NMR spectrometry data.,"

Background

Nuclear magnetic resonance spectroscopy (NMR) is a powerful technique to reveal and compare quantitative metabolic profiles of biological tissues. However, chemical and physical sample variations make the analysis of the data challenging, and typically require the application of a number of preprocessing steps prior to data interpretation. For example, noise reduction, normalization, baseline correction, peak picking, spectrum alignment and statistical analysis are indispensable components in any NMR analysis pipeline.

Results

We introduce a novel suite of informatics tools for the quantitative analysis of NMR metabolomic profile data. The core of the processing cascade is a novel peak alignment algorithm, called hierarchical Cluster-based Peak Alignment (CluPA). The algorithm aligns a target spectrum to the reference spectrum in a top-down fashion by building a hierarchical cluster tree from peak lists of reference and target spectra and then dividing the spectra into smaller segments based on the most distant clusters of the tree. To reduce the computational time to estimate the spectral misalignment, the method makes use of Fast Fourier Transformation (FFT) cross-correlation. Since the method returns a high-quality alignment, we can propose a simple methodology to study the variability of the NMR spectra. For each aligned NMR data point the ratio of the between-group and within-group sum of squares (BW-ratio) is calculated to quantify the difference in variability between and within predefined groups of NMR spectra. This differential analysis is related to the calculation of the F-statistic or a one-way ANOVA, but without distributional assumptions. Statistical inference based on the BW-ratio is achieved by bootstrapping the null distribution from the experimental data.

Conclusions

The workflow performance was evaluated using a previously published dataset. Correlation maps, spectral and grey scale plots show clear improvements in comparison to other methods, and the down-to-earth quantitative analysis works well for the CluPA-aligned spectra. The whole workflow is embedded into a modular and statistically sound framework that is implemented as an R package called """"speaq"""" (""""spectrum alignment and quantitation""""), which is freely available from http://code.google.com/p/speaq/.","hji,kes",0,0,0,2,0,NA,NA +22053077,AMPA: an automated web server for prediction of protein antimicrobial regions.,"

Summary

AMPA is a web application for assessing the antimicrobial domains of proteins, with a focus on the design on new antimicrobial drugs. The application provides fast discovery of antimicrobial patterns in proteins that can be used to develop new peptide-based drugs against pathogens. Results are shown in a user-friendly graphical interface and can be downloaded as raw data for later examination.

Availability

AMPA is freely available on the web at http://tcoffee.crg.cat/apps/ampa. The source code is also available in the web.

Contact

marc.torrent@upf.edu; david.andreu@upf.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,no new data or value add? +22088842,Dragon PolyA Spotter: predictor of poly(A) motifs within human genomic DNA sequences.,"

Motivation

Recognition of poly(A) signals in mRNA is relatively straightforward due to the presence of easily recognizable polyadenylic acid tail. However, the task of identifying poly(A) motifs in the primary genomic DNA sequence that correspond to poly(A) signals in mRNA is a far more challenging problem. Recognition of poly(A) signals is important for better gene annotation and understanding of the gene regulation mechanisms. In this work, we present one such poly(A) motif prediction method based on properties of human genomic DNA sequence surrounding a poly(A) motif. These properties include thermodynamic, physico-chemical and statistical characteristics. For predictions, we developed Artificial Neural Network and Random Forest models. These models are trained to recognize 12 most common poly(A) motifs in human DNA. Our predictors are available as a free web-based tool accessible at http://cbrc.kaust.edu.sa/dps. Compared with other reported predictors, our models achieve higher sensitivity and specificity and furthermore provide a consistent level of accuracy for 12 poly(A) motif variants.

Contact

vladimir.bajic@kaust.edu.sa

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +22135418,Metscape 2 bioinformatics tool for the analysis and visualization of metabolomics and gene expression data.,"

Motivation

Metabolomics is a rapidly evolving field that holds promise to provide insights into genotype-phenotype relationships in cancers, diabetes and other complex diseases. One of the major informatics challenges is providing tools that link metabolite data with other types of high-throughput molecular data (e.g. transcriptomics, proteomics), and incorporate prior knowledge of pathways and molecular interactions.

Results

We describe a new, substantially redesigned version of our tool Metscape that allows users to enter experimental data for metabolites, genes and pathways and display them in the context of relevant metabolic networks. Metscape 2 uses an internal relational database that integrates data from KEGG and EHMN databases. The new version of the tool allows users to identify enriched pathways from expression profiling data, build and analyze the networks of genes and metabolites, and visualize changes in the gene/metabolite data. We demonstrate the applications of Metscape to annotate molecular pathways for human and mouse metabolites implicated in the pathogenesis of sepsis-induced acute lung injury, for the analysis of gene expression and metabolite data from pancreatic ductal adenocarcinoma, and for identification of the candidate metabolites involved in cancer and inflammation.

Availability

Metscape is part of the National Institutes of Health-supported National Center for Integrative Biomedical Informatics (NCIBI) suite of tools, freely available at http://metscape.ncibi.org. It can be downloaded from http://cytoscape.org or installed via Cytoscape plugin manager.

Contact

metscape-help@umich.edu; akarnovs@umich.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +22161415,Surgical orbital decompression for thyroid eye disease.,"

Background

Orbital decompression is an established procedure for the management of exophthalmos and visual rehabilitation from optic neuropathy in cases of thyroid eye disease. Numerous procedures for removal of orbital bony wall, fat or a combination of these for a variety of indications in different stages of the disease have been well reported in the medical literature. However, the relative effectiveness and safety of these procedures in relation to the various indications remains unclear.

Objectives

To review current published evidence for the effectiveness of surgical orbital decompression for disfiguring proptosis in adult thyroid eye disease and summa rise information on possible complications and the quality of life from the studies identified.

Search methods

We searched CENTRAL (which contains the Cochrane Eyes and Vision Group Trials Register) (The Cochrane Library 2011, Issue 10), MEDLINE (January 1950 to October 2011), EMBASE (January 1980 to October 2011), the metaRegister of Controlled Trials (mRCT) (www.controlled-trials.com) and ClinicalTrials.gov (http://clinicaltrials.gov). There were no date or language restrictions in the electronic searches for trials. The electronic databases were last searched on 6 October 2011. We searched oculoplastic textbooks, conference proceedings from the European and American Society of Ophthalmic Plastic and Reconstructive Surgery (ESOPRS, ASOPRS), European Ophthalmological Society (SOE), the Association for Research in Vision and Ophthalmology (ARVO) and American Academy of Ophthalmology (AAO) for the years 2000 to 2009 to identify relevant data. We attempted to contact researchers who are active in this field for information about further published or unpublished studies.

Selection criteria

We included randomised controlled trials (RCTs) with no restriction on date or language comparing two or more surgical methods for orbital decompression with removal of bony wall, orbital fat or a combination of both for disfiguring proptosis or comparison of surgical techniques with any form of medical decompression.

Data collection and analysis

Each review author independently assessed study abstracts identified from the electronic and manual searches. Author analysis was then compared and full papers for appropriate studies were obtained according to the inclusion criteria. Disagreements between the authors were resolved by discussion.

Main results

We identified two randomised trials eligible for inclusion in the review. There was significant variability between the trials for interventions, methodology and outcome measures and therefore meta-analysis was not performed. One study suggested that the transantral approach and endoscopic transnasal technique had similar effects in reducing exophthalmos but that the endoscopic approach may be safer, relating to fewer complications. This study had short-term follow-up and lacked information on our primary outcome (success or failure of treatment). The second study provided evidence that intravenous steroids may be superior to primary surgical decompression in the management of compressive optic neuropathy requiring less secondary surgical procedures, although it relates more frequently to transient side effects. This study was weakened by a small sample size. Until more credible evidence is available recommendations as to best treatment cannot be reliably made.

Authors' conclusions

A single study showed that the transantral approach for orbital decompression was related to more complications than the endoscopic transnasal technique which is preferred by Ear, Nose and Throat (ENT) surgeons, usually as an adjunctive procedure. Intravenous steroids were reported in a single trial to be the most efficient intervention for dysthyroid optic neuropathy. The majority of published literature on orbital decompression for thyroid eye disease consists of retrospective, cohort, or case series studies. Although these provide useful descriptive information, clarification is required to show the relative effectiveness of each intervention for various indications.The two RCTs reviewed are not robust enough to provide credible evidence to our understanding of current decompressive surgery and to support recommendations for clinical practice. There is evidence from currently available uncontrolled studies that removal of the medial and lateral wall (balanced decompression) with or without fat removal may be the most effective surgical method related to only a few complications.There is a clear need for randomised studies evaluating the balanced two-wall, three-wall and orbital fat decompression techniques. Comparison with other surgical techniques for orbital decompression or with immunosuppression in cases of compressive optic neuropathy would also be important. These studies should primarily address the reduction of exophthalmos, disease severity, complication rates, quality of life and cost of the intervention.","hji,kes",0,0,0,2,0,NA,NA +22182607,Culture independent survey of the microbiota of the glassy-winged sharpshooter (Homalodisca vitripennis) using 454 pyrosequencing.,"The glassy-winged sharpshooter, Homalodisca vitripennis (Germar), is an invasive pest that has spread across the southern and western United States. H. vitripennis is highly polyphagous and voracious, feeding on at least 100 plant species and consuming up to 100 times its weight in xylem fluid daily. The insect is a vector of the phytopathogen Xylella fastidiosa (Wells), which is the causative agent of Pierce's disease in grapevines. To evaluate the microbial flora associated with H. vitripennis, total DNA extracts from hemolymph, alimentary canal excretions, and whole insect bodies were subjected to 16S rDNA pyrosequencing using the bTEFAP methodology and the resulting sequences (370-520 bp in length) were compared with a curated high quality 16S database derived from GenBank http://www.ncbi.nlm.nih.gov. Species from the genera Wolbachia, Delftia (formerly Pseudomonas), Pectobacterium, Moraxella, Serratia, Bacillus, and many others were detected and a comprehensive picture of the microbiome associated with H. vitripennis was established. Some of the bacteria identified in this report are initial discoveries; providing a breadth of knowledge to the microbial flora of this insect pest can serve as a reservoir of information for developing biological control strategies.","hji,kes",0,0,0,2,0,NA,references other data resource +22226708,"Detection, annotation and visualization of alternative splicing from RNA-Seq data with SplicingViewer.","Alternative splicing is a crucial mechanism by which diverse gene products can be generated from a limited number of genes, and is thought to be involved in complex orchestration of eukaryotic gene expression. Next-generation sequencing technologies, with reduced time and cost, provide unprecedented opportunities for deep interrogation of alternative splicing at the genome-wide scale. In this study, an integrated software SplicingViewer has been developed for unambiguous detection, annotation and visualization of splice junctions and alternative splicing events from RNA-Seq data. Specifically, it allows easy identification and characterization of splice junctions, and holds a versatile computational pipeline for in-depth annotation and classification of alternative splicing with different patterns. Moreover, it provides a user-friendly environment in which an alternative splicing landscape can be displayed in a straightforward and flexible manner. In conclusion, SplicingViewer can be widely used for studying alternative splicing easily and efficiently. SplicingViewer can be freely accessed at http://bioinformatics.zj.cn/splicingviewer.","hji,kes",0,0,0,2,0,NA,NA +22253280,BamView: visualizing and interpretation of next-generation sequencing read alignments.,"

Unlabelled

So-called next-generation sequencing (NGS) has provided the ability to sequence on a massive scale at low cost, enabling biologists to perform powerful experiments and gain insight into biological processes. BamView has been developed to visualize and analyse sequence reads from NGS platforms, which have been aligned to a reference sequence. It is a desktop application for browsing the aligned or mapped reads [Ruffalo, M, LaFramboise, T, Koyutrk, M. Comparative analysis of algorithms for next-generation sequencing read alignment. Bioinformatics 2011;27:2790-6] at different levels of magnification, from nucleotide level, where the base qualities can be seen, to genome or chromosome level where overall coverage is shown. To enable in-depth investigation of NGS data, various views are provided that can be configured to highlight interesting aspects of the data. Multiple read alignment files can be overlaid to compare results from different experiments, and filters can be applied to facilitate the interpretation of the aligned reads. As well as being a standalone application it can be used as an integrated part of the Artemis genome browser, BamView allows the user to study NGS data in the context of the sequence and annotation of the reference genome. Single nucleotide polymorphism (SNP) density and candidate SNP sites can be highlighted and investigated, and read-pair information can be used to discover large structural insertions and deletions. The application will also calculate simple analyses of the read mapping, including reporting the read counts and reads per kilobase per million mapped reads (RPKM) for genes selected by the user.

Availability

BamView and Artemis are freely available software. These can be downloaded from their home pages: http://bamview.sourceforge.net/; http://www.sanger.ac.uk/resources/software/artemis/. Requirements: Java 1.6 or higher.","hji,kes",0,0,0,2,0,NA,NA +22257670,AnnTools: a comprehensive and versatile annotation toolkit for genomic variants.,"

Unlabelled

AnnTools is a versatile bioinformatics application designed for comprehensive annotation of a full spectrum of human genome variation: novel and known single-nucleotide substitutions (SNP/SNV), short insertions/deletions (INDEL) and structural variants/copy number variation (SV/CNV). The variants are interpreted by interrogating data compiled from 15 constantly updated sources. In addition to detailed functional characterization of the coding variants, AnnTools searches for overlaps with regulatory elements, disease/trait associated loci, known segmental duplications and artifact prone regions, thereby offering an integrated and comprehensive analysis of genomic data. The tool conveniently accepts user-provided tracks for custom annotation and offers flexibility in input data formats. The output is generated in the universal Variant Call Format. High annotation speed makes AnnTools suitable for high-throughput sequencing facilities, while a low-memory footprint and modest CPU requirements allow it to operate on a personal computer. The application is freely available for public use; the package includes installation scripts and a set of helper tools.

Availability

http://anntools.sourceforge.net/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +22280360,The EnzymeTracker: an open-source laboratory information management system for sample tracking.,"

Background

In many laboratories, researchers store experimental data on their own workstation using spreadsheets. However, this approach poses a number of problems, ranging from sharing issues to inefficient data-mining. Standard spreadsheets are also error-prone, as data do not undergo any validation process. To overcome spreadsheets inherent limitations, a number of proprietary systems have been developed, which laboratories need to pay expensive license fees for. Those costs are usually prohibitive for most laboratories and prevent scientists from benefiting from more sophisticated data management systems.

Results

In this paper, we propose the EnzymeTracker, a web-based laboratory information management system for sample tracking, as an open-source and flexible alternative that aims at facilitating entry, mining and sharing of experimental biological data. The EnzymeTracker features online spreadsheets and tools for monitoring numerous experiments conducted by several collaborators to identify and characterize samples. It also provides libraries of shared data such as protocols, and administration tools for data access control using OpenID and user/team management. Our system relies on a database management system for efficient data indexing and management and a user-friendly AJAX interface that can be accessed over the Internet. The EnzymeTracker facilitates data entry by dynamically suggesting entries and providing smart data-mining tools to effectively retrieve data. Our system features a number of tools to visualize and annotate experimental data, and export highly customizable reports. It also supports QR matrix barcoding to facilitate sample tracking.

Conclusions

The EnzymeTracker was designed to be easy to use and offers many benefits over spreadsheets, thus presenting the characteristics required to facilitate acceptance by the scientific community. It has been successfully used for 20 months on a daily basis by over 50 scientists. The EnzymeTracker is freely available online at http://cubique.fungalgenomics.ca/enzymedb/index.html under the GNU GPLv3 license.","hji,kes",0,0,0,2,0,NA,NA +22303397,Accurate microRNA Target Prediction Using Detailed Binding Site Accessibility and Machine Learning on Proteomics Data.,"MicroRNAs (miRNAs) are a class of small regulatory genes regulating gene expression by targeting messenger RNA. Though computational methods for miRNA target prediction are the prevailing means to analyze their function, they still miss a large fraction of the targeted genes and additionally predict a large number of false positives. Here we introduce a novel algorithm called DIANA-microT-ANN which combines multiple novel target site features through an artificial neural network (ANN) and is trained using recently published high-throughput data measuring the change of protein levels after miRNA overexpression, providing positive and negative targeting examples. The features characterizing each miRNA recognition element include binding structure, conservation level, and a specific profile of structural accessibility. The ANN is trained to integrate the features of each recognition element along the 3'untranslated region into a targeting score, reproducing the relative repression fold change of the protein. Tested on two different sets the algorithm outperforms other widely used algorithms and also predicts a significant number of unique and reliable targets not predicted by the other methods. For 542 human miRNAs DIANA-microT-ANN predicts 120000 targets not provided by TargetScan 5.0. The algorithm is freely available at http://microrna.gr/microT-ANN.","hji,kes",0,0,0,2,0,NA,NA +22417303,Infrequent p53 gene mutation but UV gradient-like p53 protein positivity in keloids.,"Keloids are characterized by extreme fibroblastic overgrowth of unknown pathogenesis after skin injury. Previous studies, mostly in non-Caucasian populations, suggest that p53 mutations may be involved. To substantiate this, we performed DNA sequence analysis of exons 4-8 of the p53 gene and immunohistochemical staining of p53 protein in archived keloidal tissue samples from 23 Caucasian patients. In contrast to previous reports, we found mutated p53 in keloidal tissue in a minority of cases (2/23; 12%). The G allele frequency and C allele frequency at the p53 polymorphic codon 72 were 0.72 (33/46) and 0.28 (13/46), respectively, in our study, a finding that was similar to the 0.77 (184/240) vs. 0.23 (56/240) (P = 0.4580; chi-squared test) observed in the Hap Map data of a European population but statistically significantly different from the 0.43 (547/1258) vs. 0.57 (711/1258) (P = 0.0002; chi-squared test) observed in the 1000 Genome project [Database of Single Nucleotide Polymorphisms (dbSNP). Bethesda (MD): National Center for Biotechnology Information, National Library of Medicine. dbSNP accession:rs1042522, (dbSNP Build ID: 132). Available from: (http://www.ncbi.nlm.nih.gov/SNP/] a difference most likely due to the different genetic background of the populations enrolled. However, one-third of the keloidal samples showed lesional nuclear p53 staining with a UV penetration gradient-like positivity (P = 0.0084). Staining with an anti-cyclobutane pyrimidine dimer antibody revealed the total absence of short-term photoproducts in the epidermis as well as keloidal tissue. Furthermore, all fibroblasts expressing p53 stained negative for Ki-67, indicating that these cells were in a quiescent stage and p53 upregulation did not contribute to keloidal proliferation. We conclude that p53 plays no major role in the pathogenesis of keloids in the Caucasian population.","hji,kes",0,0,0,2,0,NA,references other data resource +22426492,Unsupervised pattern discovery in human chromatin structure through genomic segmentation.,"We trained Segway, a dynamic Bayesian network method, simultaneously on chromatin data from multiple experiments, including positions of histone modifications, transcription-factor binding and open chromatin, all derived from a human chronic myeloid leukemia cell line. In an unsupervised fashion, we identified patterns associated with transcription start sites, gene ends, enhancers, transcriptional regulator CTCF-binding regions and repressed regions. Software and genome browser tracks are at http://noble.gs.washington.edu/proj/segway/.","hji,kes",0,0,0,2,0,NA,NA +22434533,Sann: solvent accessibility prediction of proteins by nearest neighbor method.,"We present a method to predict the solvent accessibility of proteins which is based on a nearest neighbor method applied to the sequence profiles. Using the method, continuous real-value prediction as well as two-state and three-state discrete predictions can be obtained. The method utilizes the z-score value of the distance measure in the feature vector space to estimate the relative contribution among the k-nearest neighbors for prediction of the discrete and continuous solvent accessibility. The Solvent accessibility database is constructed from 5717 proteins extracted from PISCES culling server with the cutoff of 25% sequence identities. Using optimal parameters, the prediction accuracies (for discrete predictions) of 78.38% (two-state prediction with the threshold of 25%), 65.1% (three-state prediction with the thresholds of 9 and 36%), and the Pearson correlation coefficient (between the predicted and true RSA's for continuous prediction) of 0.676 are achieved An independent benchmark test was performed with the CASP8 targets where we find that the proposed method outperforms existing methods. The prediction accuracies are 80.89% (for two state prediction with the threshold of 25%), 67.58% (three-state prediction), and the Pearson correlation coefficient of 0.727 (for continuous prediction) with mean absolute error of 0.148. We have also investigated the effect of increasing database sizes on the prediction accuracy, where additional improvement in the accuracy is observed as the database size increases. The SANN web server is available at http://lee.kias.re.kr/~newton/sann/.","hji,kes",0,0,0,2,0,NA,not found - will be weeded out later even if included +22435961,Healthcare continuity from hospital to territory in Lombardy: TELEMACO project.,"

Objectives

To verify implementation and use of TELEMACO (TELEMedicina Ai piccoli COmunilombardi; http://www.telemaco.regione.lombardia.it/), which provides specialized continuity of care with innovative healthcare services in remote areas of the Lombardy region of Italy; to design a network in the territory for sharing of continuityof- care programs; and to allow the relevant health authorities to collect cost data to establish a model for sustainable pricing for implementing these services.

Methods

TELEMACO provides home-based telemanagement services for patients with chronic heart failure and chronic obstructive pulmonary disease (COPD), as well as second-opinion teleconsultations in cardiology, dermatology, diabetology, and pulmonology for general practitioners and second-opinion teleconsultations on digital images in cases of traumatic brain injury and stroke. A total of 2 service centers, 10 cardiology and pneumology departments, 30 specialists, 176 general practitioners, 40 nurses, 2 emergency departments, and 2 consultant hospitals were involved.

Results

A total of 166 patients with chronic heart failure and 474 patients with COPD were enrolled. There were 4830, 51, and 44 second-opinion teleconsultations for cardiologic, dermatologic, and diabetic conditions, respectively. There were 147 second-opinion teleconsultations on digital images, 68 for stroke, and 79 for traumatic brain injury. Implementation of TELEMACO introduced innovations in working methods and provided evidence to the health authorities for allocating funds for such services.

Conclusions

TELEMACO provided evidence that there is a growing need for home management of patients using telemedicine, a common and efficacious approach that can ensure care continuity, especially in chronic diseases.","hji,kes",0,0,0,2,0,NA,NA +22446067,Paint4Net: COBRA Toolbox extension for visualization of stoichiometric models of metabolism.,"A visual analysis of reconstructions and large stoichiometric models with elastic change of the visualization scope and representation methods becomes increasingly important due to the rapidly growing size and number of available reconstructions. The Paint4Net is a novel COBRA Toolbox extension for automatic generation of a hypergraph layout of defined scope with the steady state rates of reaction fluxes of stoichiometric models. Directionalities and fluxes of reactions are constantly represented in the visualization while detailed information about reaction (ID, name and synonyms, and formula) and metabolite (ID, name and synonyms, and charged formula) appears placing the cursor on the item of interest. Additionally Paint4Net functionality can be used to: (1) get lists of involved metabolites and dead end metabolites of the visualized part of the network, (2) exclude (filter) particular metabolites from representation, (3) find isolated parts of a network and (4) find running cycles when all the substrates are cut down. Layout pictures can be saved in various formats and easily distributed. The Paint4Net is open source software under the GPL v3 license. Relevant documentation and sample data is available at http://www.biosystems.lv/paint4net. The Paint4Net works on MATLAB starting from version of 2009.","hji,kes",0,0,0,2,0,NA,NA +22454131,Revealing mammalian evolutionary relationships by comparative analysis of gene clusters.,"Many software tools for comparative analysis of genomic sequence data have been released in recent decades. Despite this, it remains challenging to determine evolutionary relationships in gene clusters due to their complex histories involving duplications, deletions, inversions, and conversions. One concept describing these relationships is orthology. Orthologs derive from a common ancestor by speciation, in contrast to paralogs, which derive from duplication. Discriminating orthologs from paralogs is a necessary step in most multispecies sequence analyses, but doing so accurately is impeded by the occurrence of gene conversion events. We propose a refined method of orthology assignment based on two paradigms for interpreting its definition: by genomic context or by sequence content. X-orthology (based on context) traces orthology resulting from speciation and duplication only, while N-orthology (based on content) includes the influence of conversion events. We developed a computational method for automatically mapping both types of orthology on a per-nucleotide basis in gene cluster regions studied by comparative sequencing, and we make this mapping accessible by visualizing the output. All of these steps are incorporated into our newly extended CHAP 2 package. We evaluate our method using both simulated data and real gene clusters (including the well-characterized a-globin and -globin clusters). We also illustrate use of CHAP 2 by analyzing four more loci: CCL (chemokine ligand), IFN (interferon), CYP2abf (part of cytochrome P450 family 2), and KIR (killer cell immunoglobulin-like receptors). These new methods facilitate and extend our understanding of evolution at these and other loci by adding automated accurate evolutionary inference to the biologist's toolkit. The CHAP 2 package is freely available from http://www.bx.psu.edu/miller_lab.","hji,kes",0,0,0,2,0,NA,NA +22459672,"Meeting report: the Schizophrenia International Research Society (SIRS) South America Conference (August 5-7, 2011).","On August 5-7, 2011, So Paulo was home to the first regional meeting of the Schizophrenia International Research Society (SIRS). Over 400 people from many countries attended the activities and contributed with around 200 submissions for oral and poster presentations. This article summarizes the data presented during the meeting, with an emphasis on the plenary talks and sessions for short oral presentations. For information on the poster presentations, readers are referred to the special issue of Revista de Psiquiatria Clnica (Brazil) dedicated to the conference (available at: http://www.hcnet.usp.br/ipq/revista/vol38/s1/).","hji,kes",0,0,0,2,0,NA,NA +22479706,Network-based functional enrichment.,"

Background

Many methods have been developed to infer and reason about molecular interaction networks. These approaches often yield networks with hundreds or thousands of nodes and up to an order of magnitude more edges. It is often desirable to summarize the biological information in such networks. A very common approach is to use gene function enrichment analysis for this task. A major drawback of this method is that it ignores information about the edges in the network being analyzed, i.e., it treats the network simply as a set of genes. In this paper, we introduce a novel method for functional enrichment that explicitly takes network interactions into account.

Results

Our approach naturally generalizes Fisher's exact test, a gene set-based technique. Given a function of interest, we compute the subgraph of the network induced by genes annotated to this function. We use the sequence of sizes of the connected components of this sub-network to estimate its connectivity. We estimate the statistical significance of the connectivity empirically by a permutation test. We present three applications of our method: i) determine which functions are enriched in a given network, ii) given a network and an interesting subnetwork of genes within that network, determine which functions are enriched in the sub-network, and iii) given two networks, determine the functions for which the connectivity improves when we merge the second network into the first. Through these applications, we show that our approach is a natural alternative to network clustering algorithms.

Conclusions

We presented a novel approach to functional enrichment that takes into account the pairwise relationships among genes annotated by a particular function. Each of the three applications discovers highly relevant functions. We used our methods to study biological data from three different organisms. Our results demonstrate the wide applicability of our methods. Our algorithms are implemented in C++ and are freely available under the GNU General Public License at our supplementary website. Additionally, all our input data andresults are available at http://bioinformatics.cs.vt.edu/~murali/supplements/2011-incob-nbe/.","hji,kes",0,0,0,2,0,NA,iffy - input data availanle +22609187,EXP-PAC: providing comparative analysis and storage of next generation gene expression data.,"Microarrays and more recently RNA sequencing has led to an increase in available gene expression data. How to manage and store this data is becoming a key issue. In response we have developed EXP-PAC, a web based software package for storage, management and analysis of gene expression and sequence data. Unique to this package is SQL based querying of gene expression data sets, distributed normalization of raw gene expression data and analysis of gene expression data across experiments and species. This package has been populated with lactation data in the international milk genomic consortium web portal (http://milkgenomics.org/). Source code is also available which can be hosted on a Windows, Linux or Mac APACHE server connected to a private or public network (http://mamsap.it.deakin.edu.au/~pcc/Release/EXP_PAC.html).","hji,kes",0,0,0,2,0,NA,NA +22637737,Proliferative and nonproliferative lesions of the rat and mouse central and peripheral nervous systems.,"Harmonization of diagnostic nomenclature used in the pathology analysis of tissues from rodent toxicity studies will enhance the comparability and consistency of data sets from different laboratories worldwide. The INHAND Project (International Harmonization of Nomenclature and Diagnostic Criteria for Lesions in Rats and Mice) is a joint initiative of four major societies of toxicologic pathology to develop a globally recognized nomenclature for proliferative and nonproliferative lesions in rodents. This article recommends standardized terms for classifying changes observed in tissues of the mouse and rat central (CNS) and peripheral (PNS) nervous systems. Sources of material include academic, government, and industrial histopathology databases from around the world. Covered lesions include frequent, spontaneous, and aging-related changes as well as principal toxicant-induced findings. Common artifacts that might be confused with genuine lesions are also illustrated. The neural nomenclature presented in this document is also available electronically on the Internet at the goRENI website (http://www.goreni.org/).","hji,kes",0,0,0,2,0,NA,NA +22638579,SteinerNet: a web server for integrating 'omic' data to discover hidden components of response pathways.,"High-throughput technologies including transcriptional profiling, proteomics and reverse genetics screens provide detailed molecular descriptions of cellular responses to perturbations. However, it is difficult to integrate these diverse data to reconstruct biologically meaningful signaling networks. Previously, we have established a framework for integrating transcriptional, proteomic and interactome data by searching for the solution to the prize-collecting Steiner tree problem. Here, we present a web server, SteinerNet, to make this method available in a user-friendly format for a broad range of users with data from any species. At a minimum, a user only needs to provide a set of experimentally detected proteins and/or genes and the server will search for connections among these data from the provided interactomes for yeast, human, mouse, Drosophila melanogaster and Caenorhabditis elegans. More advanced users can upload their own interactome data as well. The server provides interactive visualization of the resulting optimal network and downloadable files detailing the analysis and results. We believe that SteinerNet will be useful for researchers who would like to integrate their high-throughput data for a specific condition or cellular response and to find biologically meaningful pathways. SteinerNet is accessible at http://fraenkel.mit.edu/steinernet.","hji,kes",0,0,0,2,0,NA,NA +22638583,"Seq2Logo: a method for construction and visualization of amino acid binding motifs and sequence profiles including sequence weighting, pseudo counts and two-sided representation of amino acid enrichment and depletion.","Seq2Logo is a web-based sequence logo generator. Sequence logos are a graphical representation of the information content stored in a multiple sequence alignment (MSA) and provide a compact and highly intuitive representation of the position-specific amino acid composition of binding motifs, active sites, etc. in biological sequences. Accurate generation of sequence logos is often compromised by sequence redundancy and low number of observations. Moreover, most methods available for sequence logo generation focus on displaying the position-specific enrichment of amino acids, discarding the equally valuable information related to amino acid depletion. Seq2logo aims at resolving these issues allowing the user to include sequence weighting to correct for data redundancy, pseudo counts to correct for low number of observations and different logotype representations each capturing different aspects related to amino acid enrichment and depletion. Besides allowing input in the format of peptides and MSA, Seq2Logo accepts input as Blast sequence profiles, providing easy access for non-expert end-users to characterize and identify functionally conserved/variable amino acids in any given protein of interest. The output from the server is a sequence logo and a PSSM. Seq2Logo is available at http://www.cbs.dtu.dk/biotools/Seq2Logo (14 May 2012, date last accessed).","hji,kes",0,0,0,2,0,NA,NA +22645098,A reference microsatellite kit to assess for genetic diversity of Sorghum bicolor (Poaceae).,"

Premise of the study

Discrepancies in terms of genotyping data are frequently observed when comparing simple sequence repeat (SSR) data sets across genotyping technologies and laboratories. This technical concern introduces biases that hamper any synthetic studies or comparison of genetic diversity between collections. To prevent this for Sorghum bicolor, we developed a control kit of 48 SSR markers.

Methods and results

One hundred seventeen markers were selected along the genome to provide coverage across the length of all 10 sorghum linkage groups. They were tested for polymorphism and reproducibility across two laboratories (Centre de Cooperation Internationale en Recherche Agronomique pour le Developpement [CIRAD], France, and International Crops Research Institute for the Semi-Arid Tropics [ICRISAT], India) using two commonly used genotyping technologies (polyacrylamide gel-based technology with LI-COR sequencing machines and capillary systems with ABI sequencing apparatus) with DNA samples from a diverse set of 48 S. bicolor accessions.

Conclusions

A kit for diversity analysis (http://sat.cirad.fr/sat/sorghum_SSR_kit/) was developed. It contains information on 48 technically robust sorghum microsatellite markers and 10 DNA controls. It can further be used to calibrate sorghum SSR genotyping data acquired with different technologies and compare those to genetic diversity references.","hji,kes",0,0,0,2,0,NA,NA +22645320,Integrative analysis of gene and miRNA expression profiles with transcription factor-miRNA feed-forward loops identifies regulators in human cancers.,"We describe here a novel method for integrating gene and miRNA expression profiles in cancer using feed-forward loops (FFLs) consisting of transcription factors (TFs), miRNAs and their common target genes. The dChip-GemiNI (Gene and miRNA Network-based Integration) method statistically ranks computationally predicted FFLs by their explanatory power to account for differential gene and miRNA expression between two biological conditions such as normal and cancer. GemiNI integrates not only gene and miRNA expression data but also computationally derived information about TF-target gene and miRNA-mRNA interactions. Literature validation shows that the integrated modeling of expression data and FFLs better identifies cancer-related TFs and miRNAs compared to existing approaches. We have utilized GemiNI for analyzing six data sets of solid cancers (liver, kidney, prostate, lung and germ cell) and found that top-ranked FFLs account for ~20% of transcriptome changes between normal and cancer. We have identified common FFL regulators across multiple cancer types, such as known FFLs consisting of MYC and miR-15/miR-17 families, and novel FFLs consisting of ARNT, CREB1 and their miRNA partners. The results and analysis web server are available at http://www.canevolve.org/dChip-GemiNi.","hji,kes",0,0,0,2,0,NA,NA +22659403,Determining pair distance distribution function from SAXS data using parametric functionals.,"Small angle X-ray scattering (SAXS) experiments are widely applied in structural biology. The SAXS experiments yield one-dimensional profile that needs further analysis to reveal structural information. The pair distance distribution function (PDDF), P(r), can provide molecular structures more intuitively, and it can be used to guide ab initio model reconstructions, making it a critical step to derive P(r) from experimental SAXS profiles. To calculate the P(r) curves, a new method based on a specially designed parametric functional form is developed, and implemented in pregxs. This method is tested against both synthetic and experimental data, the estimated P(r) functions are in good agreement with correct or known P(r). The method can also predict the molecular size. In summary, the pregxs method is robust and accurate in P(r) determination from SAXS profiles. The pregxs source code and an online server are available at http://www.sastbx.als.lbl.gov.","hji,kes",0,0,0,2,0,NA,NA +22672126,"Trends in midwife-attended births in the United States, 1989-2009.","

Introduction

Data on attendance at birth by midwives in the United States have been available on the national level since 1989. Rates of certified nurse-midwife (CNM)-attended births more than doubled between 1989 (3.3% of all births) and 2002 (7.7%) and have remained steady since. This article examines trends in midwife-attended births from 1989 to 2009.

Methods

The data in this report are based on records gathered as part of the US National Standard Certificate of Live Birth from a public use Web site, Vital Stats (http://www.cdc.gov/nchs/VitalStats.htm), that allows users to create and download specialized tables.

Results

Between 2007 and 2009, the proportion of all births attended by CNMs increased by 4% from 7.3% of all births to 7.6% and a total of 313,516. This represents a decline in total births attended by CNMs from 2008 but a higher proportion of all births because total US births dropped at a faster rate. The proportion of vaginal births attended by CNMs reached an all-time high of 11.4% in 2009. There were strong regional patterns to the distribution of CNM-attended births. Births attended by """"other midwives"""" rose to 21,787 or 0.5% of all US births, and the total proportion of all births attended by midwives reached an all-time high of 8.1%. The race/ethnicity of mothers attended by CNMs has shifted over the years. In 1990, CNMs attended a disproportionately high number of births to non-white mothers, whereas in 2009, the profile of CNM births mirrors the national distribution in race/ethnicity.

Discussion

Midwife-attended births in the United States are increasing. The geographic patterns in the distribution of midwife-attended births warrant further study.","hji,kes",0,0,0,2,0,NA,NA +22672254,The gastrointestinal electrical mapping suite (GEMS): software for analyzing and visualizing high-resolution (multi-electrode) recordings in spatiotemporal detail.,"

Background

Gastrointestinal contractions are controlled by an underlying bioelectrical activity. High-resolution spatiotemporal electrical mapping has become an important advance for investigating gastrointestinal electrical behaviors in health and motility disorders. However, research progress has been constrained by the low efficiency of the data analysis tasks. This work introduces a new efficient software package: GEMS (Gastrointestinal Electrical Mapping Suite), for analyzing and visualizing high-resolution multi-electrode gastrointestinal mapping data in spatiotemporal detail.

Results

GEMS incorporates a number of new and previously validated automated analytical and visualization methods into a coherent framework coupled to an intuitive and user-friendly graphical user interface. GEMS is implemented using MATLAB, which combines sophisticated mathematical operations and GUI compatibility. Recorded slow wave data can be filtered via a range of inbuilt techniques, efficiently analyzed via automated event-detection and cycle clustering algorithms, and high quality isochronal activation maps, velocity field maps, amplitude maps, frequency (time interval) maps and data animations can be rapidly generated. Normal and dysrhythmic activities can be analyzed, including initiation and conduction abnormalities. The software is distributed free to academics via a community user website and forum (http://sites.google.com/site/gimappingsuite).

Conclusions

This software allows for the rapid analysis and generation of critical results from gastrointestinal high-resolution electrical mapping data, including quantitative analysis and graphical outputs for qualitative analysis. The software is designed to be used by non-experts in data and signal processing, and is intended to be used by clinical researchers as well as physiologists and bioengineers. The use and distribution of this software package will greatly accelerate efforts to improve the understanding of the causes and clinical consequences of gastrointestinal electrical disorders, through high-resolution electrical mapping.","hji,kes",0,0,0,2,0,NA,NA +22689643,MoNetFamily: a web server to infer homologous modules and module-module interaction networks in vertebrates.,"A module is a fundamental unit forming with highly connected proteins and performs a certain kind of biological functions. Modules and module-module interaction (MMI) network are essential for understanding cellular processes and functions. The MoNetFamily web server can identify the modules, homologous modules (called module family) and MMI networks across multiple species for the query protein(s). This server first finds module candidates of the query by using BLASTP to search the module template database (1785 experimental and 1252 structural templates). MoNetFamily then infers the homologous modules of the selected module candidate using protein-protein interaction (PPI) families. According to homologous modules and PPIs, we statistically calculated MMIs and MMI networks across multiple species. For each module candidate, MoNetFamily identifies its neighboring modules and their MMIs in module networks of Homo sapiens, Mus musculus and Danio rerio. Finally, MoNetFamily shows the conserved proteins, PPI profiles and functional annotations of the module family. Our results indicate that the server can be useful for MMI network (e.g. 1818 modules and 9678 MMIs in H. sapiens) visualizations and query annotations using module families and neighboring modules. We believe that the server is able to provide valuable insights to determine homologous modules and MMI networks across multiple species for studying module evolution and cellular processes. The MoNetFamily sever is available at http://monetfamily.life.nctu.edu.tw.","hji,kes",0,0,0,2,0,NA,NA +22705213,How predictable is the position of third molars over time?,"

Purpose

The purpose of this study was to review contemporaneous longitudinal studies focused on changes in the position of third molars.

Materials and methods

A systematic search of the National Library of Medicine (PubMed, http://www.pubmed.gov) and the Cochrane Central Register of Controlled Trials (http://www.mrw.interscience.wiley.com/cochrane) was conducted to identify eligible articles. The inclusion criteria were 1) longitudinal assessment (retrospective or prospective); 2) published in English; and 3) full text available online or at the University of North Carolina Health Sciences Library.

Results

Five studies met the inclusion criteria. The status of third molars with respect to eruption/angulation was operationalized in multiple ways, making any comparison of the frequency of changes in position difficult. The major findings of each study are reviewed.

Conclusions

Few longitudinal data exist on the changes over time of impacted third molars. Impacted teeth that remain static, with no changes in position or angulation over time, are rare.","hji,kes",0,0,0,2,0,NA,NA +22710135,TWARIT: an extremely rapid and efficient approach for phylogenetic classification of metagenomic sequences.,"Phylogenetic assignment of individual sequence reads to their respective taxa, referred to as 'taxonomic binning', constitutes a key step of metagenomic analysis. Existing binning methods have limitations either with respect to time or accuracy/specificity of binning. Given these limitations, development of a method that can bin vast amounts of metagenomic sequence data in a rapid, efficient and computationally inexpensive manner can profoundly influence metagenomic analysis in computational resource poor settings. We introduce TWARIT, a hybrid binning algorithm, that employs a combination of short-read alignment and composition-based signature sorting approaches to achieve rapid binning rates without compromising on binning accuracy and specificity. TWARIT is validated with simulated and real-world metagenomes and the results demonstrate significantly lower overall binning times compared to that of existing methods. Furthermore, the binning accuracy and specificity of TWARIT are observed to be comparable/superior to them. A web server implementing TWARIT algorithm is available at http://metagenomics.atc.tcs.com/Twarit/","hji,kes",0,0,0,2,0,NA,NA +22711791,Bluues server: electrostatic properties of wild-type and mutated protein structures.,"

Motivation

Electrostatic calculations are an important tool for deciphering many functional mechanisms in proteins. Generalized Born (GB) models offer a fast and convenient computational approximation over other implicit solvent-based electrostatic models. Here we present a novel GB-based web server, using the program Bluues, to calculate numerous electrostatic features including pKa-values and surface potentials. The output is organized allowing both experts and beginners to rapidly sift the data. A novel feature of the Bluues server is that it explicitly allows to find electrostatic differences between wild-type and mutant structures.

Availability

The Bluues server, examples and extensive help files are available for non-commercial use at URL: http://protein.bio.unipd.it/bluues/.","hji,kes",0,0,0,2,0,NA,NA +22711792,Bellerophontes: an RNA-Seq data analysis framework for chimeric transcripts discovery based on accurate fusion model.,"

Motivation

Next-generation sequencing technology allows the detection of genomic structural variations, novel genes and transcript isoforms from the analysis of high-throughput data. In this work, we propose a new framework for the detection of fusion transcripts through short paired-end reads which integrates splicing-driven alignment and abundance estimation analysis, producing a more accurate set of reads supporting the junction discovery and taking into account also not annotated transcripts. Bellerophontes performs a selection of putative junctions on the basis of a match to an accurate gene fusion model.

Results

We report the fusion genes discovered by the proposed framework on experimentally validated biological samples of chronic myelogenous leukemia (CML) and on public NCBI datasets, for which Bellerophontes is able to detect the exact junction sequence. With respect to state-of-art approaches, Bellerophontes detects the same experimentally validated fusions, however, it is more selective on the total number of detected fusions and provides a more accurate set of spanning reads supporting the junctions. We finally report the fusions involving non-annotated transcripts found in CML samples.

Availability and implementation

Bellerophontes JAVA/Perl/Bash software implementation is free and available at http://eda.polito.it/bellerophontes/.","hji,kes",0,0,0,2,0,NA,NA +22759425,Computing the protein binding sites.,"

Background

Identifying the location of binding sites on proteins is of fundamental importance for a wide range of applications including molecular docking, de novo drug design, structure identification and comparison of functional sites. Structural genomic projects are beginning to produce protein structures with unknown functions. Therefore, efficient methods are required if all these structures are to be properly annotated. Lots of methods for finding binding sites involve 3D structure comparison. Here we design a method to find protein binding sites by direct comparison of protein 3D structures.

Results

We have developed an efficient heuristic approach for finding similar binding sites from the surface of given proteins. Our approach consists of three steps: local sequence alignment, protein surface detection, and 3D structures comparison. We implement the algorithm and produce a software package that works well in practice. When comparing a complete protein with all complete protein structures in the PDB database, experiments show that the average recall value of our approach is 82% and the average precision value of our approach is also significantly better than the existing approaches.

Conclusions

Our program has much higher recall values than those existing programs. Experiments show that all the existing approaches have recall values less than 50%. This implies that more than 50% of real binding sites cannot be reported by those existing approaches. The software package is available at http://sites.google.com/site/guofeics/bsfinder.","hji,kes",0,0,0,2,0,NA,NA +22780965,ngLOC: software and web server for predicting protein subcellular localization in prokaryotes and eukaryotes.,"

Background

Understanding protein subcellular localization is a necessary component toward understanding the overall function of a protein. Numerous computational methods have been published over the past decade, with varying degrees of success. Despite the large number of published methods in this area, only a small fraction of them are available for researchers to use in their own studies. Of those that are available, many are limited by predicting only a small number of organelles in the cell. Additionally, the majority of methods predict only a single location for a sequence, even though it is known that a large fraction of the proteins in eukaryotic species shuttle between locations to carry out their function.

Findings

We present a software package and a web server for predicting the subcellular localization of protein sequences based on the ngLOC method. ngLOC is an n-gram-based Bayesian classifier that predicts subcellular localization of proteins both in prokaryotes and eukaryotes. The overall prediction accuracy varies from 89.8% to 91.4% across species. This program can predict 11 distinct locations each in plant and animal species. ngLOC also predicts 4 and 5 distinct locations on gram-positive and gram-negative bacterial datasets, respectively.

Conclusions

ngLOC is a generic method that can be trained by data from a variety of species or classes for predicting protein subcellular localization. The standalone software is freely available for academic use under GNU GPL, and the ngLOC web server is also accessible at http://ngloc.unmc.edu.","hji,kes",0,0,0,2,0,NA,NA +22796958,A regression model for estimating DNA copy number applied to capture sequencing data.,"

Motivation

Target enrichment, also referred to as DNA capture, provides an effective way to focus sequencing efforts on a genomic region of interest. Capture data are typically used to detect single-nucleotide variants. It can also be used to detect copy number alterations, which is particularly useful in the context of cancer, where such changes occur frequently. In copy number analysis, it is a common practice to determine log-ratios between test and control samples, but this approach results in a loss of information as it disregards the total coverage or intensity at a locus.

Results

We modeled the coverage or intensity of the test sample as a linear function of the control sample. This regression approach is able to deal with regions that are completely deleted, which are problematic for methods that use log-ratios. To demonstrate the utility of our approach, we used capture data to determine copy number for a set of 600 genes in a panel of nine breast cancer cell lines. We found high concordance between our results and those generated using a single-nucleotide polymorphsim genotyping platform. When we compared our results with other log-ratio-based methods, including ExomeCNV, we found that our approach produced better overall correlation with SNP data.

Availability

The algorithm is implemented in C and R and the code can be downloaded from http://bioinformatics.nki.nl/ocs/

Contact

l.wessels@nki.nl

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +22815359,Quantifying uniformity of mapped reads.,"

Unlabelled

We describe a tool for quantifying the uniformity of mapped reads in high-throughput sequencing experiments. Our statistic directly measures the uniformity of both read position and fragment length, and we explain how to compute a P-value that can be used to quantify biases arising from experimental protocols and mapping procedures. Our method is useful for comparing different protocols in experiments such as RNA-Seq.

Availability and implementation

We provide a freely available and open source python script that can be used to analyze raw read data or reads mapped to transcripts in BAM format at http://www.math.miami.edu/~vhower/ReadSpy.html.","hji,kes",0,0,0,2,0,NA,NA +22824207,ACPYPE - AnteChamber PYthon Parser interfacE.,"

Background

ACPYPE (or AnteChamber PYthon Parser interfacE) is a wrapper script around the ANTECHAMBER software that simplifies the generation of small molecule topologies and parameters for a variety of molecular dynamics programmes like GROMACS, CHARMM and CNS. It is written in the Python programming language and was developed as a tool for interfacing with other Python based applications such as the CCPN software suite (for NMR data analysis) and ARIA (for structure calculations from NMR data). ACPYPE is open source code, under GNU GPL v3, and is available as a stand-alone application at http://www.ccpn.ac.uk/acpype and as a web portal application at http://webapps.ccpn.ac.uk/acpype.

Findings

We verified the topologies generated by ACPYPE in three ways: by comparing with default AMBER topologies for standard amino acids; by generating and verifying topologies for a large set of ligands from the PDB; and by recalculating the structures for 5 protein-ligand complexes from the PDB.

Conclusions

ACPYPE is a tool that simplifies the automatic generation of topology and parameters in different formats for different molecular mechanics programmes, including calculation of partial charges, while being object oriented for integration with other applications.","hji,kes",0,0,0,2,0,NA,NA +22847931,Deep architectures for protein contact map prediction.,"

Motivation

Residue-residue contact prediction is important for protein structure prediction and other applications. However, the accuracy of current contact predictors often barely exceeds 20% on long-range contacts, falling short of the level required for ab initio structure prediction.

Results

Here, we develop a novel machine learning approach for contact map prediction using three steps of increasing resolution. First, we use 2D recursive neural networks to predict coarse contacts and orientations between secondary structure elements. Second, we use an energy-based method to align secondary structure elements and predict contact probabilities between residues in contacting alpha-helices or strands. Third, we use a deep neural network architecture to organize and progressively refine the prediction of contacts, integrating information over both space and time. We train the architecture on a large set of non-redundant proteins and test it on a large set of non-homologous domains, as well as on the set of protein domains used for contact prediction in the two most recent CASP8 and CASP9 experiments. For long-range contacts, the accuracy of the new CMAPpro predictor is close to 30%, a significant increase over existing approaches.

Availability

CMAPpro is available as part of the SCRATCH suite at http://scratch.proteomics.ics.uci.edu/.

Contact

pfbaldi@uci.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +22848493,"qPMS7: a fast algorithm for finding (ℓ, d)-motifs in DNA and protein sequences.","Detection of rare events happening in a set of DNA/protein sequences could lead to new biological discoveries. One kind of such rare events is the presence of patterns called motifs in DNA/protein sequences. Finding motifs is a challenging problem since the general version of motif search has been proven to be intractable. Motifs discovery is an important problem in biology. For example, it is useful in the detection of transcription factor binding sites and transcriptional regulatory elements that are very crucial in understanding gene function, human disease, drug design, etc. Many versions of the motif search problem have been proposed in the literature. One such is the (l, d)-motif search (or Planted Motif Search (PMS)). A generalized version of the PMS problem, namely, Quorum Planted Motif Search (qPMS), is shown to accurately model motifs in real data. However, solving the qPMS problem is an extremely difficult task because a special case of it, the PMS Problem, is already NP-hard, which means that any algorithm solving it can be expected to take exponential time in the worse case scenario. In this paper, we propose a novel algorithm named qPMS7 that tackles the qPMS problem on real data as well as challenging instances. Experimental results show that our Algorithm qPMS7 is on an average 5 times faster than the state-of-art algorithm. The executable program of Algorithm qPMS7 is freely available on the web at http://pms.engr.uconn.edu/downloads/qPMS7.zip. Our online motif discovery tools that use Algorithm qPMS7 are freely available at http://pms.engr.uconn.edu or http://motifsearch.com.","hji,kes",0,0,0,2,0,NA,NA +22859915,Assessing drug target association using semantic linked data.,"The rapidly increasing amount of public data in chemistry and biology provides new opportunities for large-scale data mining for drug discovery. Systematic integration of these heterogeneous sets and provision of algorithms to data mine the integrated sets would permit investigation of complex mechanisms of action of drugs. In this work we integrated and annotated data from public datasets relating to drugs, chemical compounds, protein targets, diseases, side effects and pathways, building a semantic linked network consisting of over 290,000 nodes and 720,000 edges. We developed a statistical model to assess the association of drug target pairs based on their relation with other linked objects. Validation experiments demonstrate the model can correctly identify known direct drug target pairs with high precision. Indirect drug target pairs (for example drugs which change gene expression level) are also identified but not as strongly as direct pairs. We further calculated the association scores for 157 drugs from 10 disease areas against 1683 human targets, and measured their similarity using a [Formula: see text] score matrix. The similarity network indicates that drugs from the same disease area tend to cluster together in ways that are not captured by structural similarity, with several potential new drug pairings being identified. This work thus provides a novel, validated alternative to existing drug target prediction algorithms. The web service is freely available at: http://chem2bio2rdf.org/slap.","hji,kes",0,0,0,2,0,NA,not available +22861649,A re-evaluation of 9-HODE activity at TRPV1 channels in comparison with anandamide: enantioselectivity and effects at other TRP channels and in sensory neurons.,"

Background and purpose

Two oxidation products of linoleic acid, 9- and 13-hydroxy-octadecadienoic acids (HODEs), have recently been suggested to act as endovanilloids, that is, endogenous agonists of transient receptor potential vanilloid-1 (TRPV1) channels, thereby contributing to inflammatory hyperalgesia in rats. However, HODE activity at rat TRPV1 in comparison with the best established endovanilloid, anandamide, and its enantioselectivity and selectivity towards other TRP channels that are also abundant in sensory neurons have never been investigated.

Experimental approach

We studied the effect of 9(R)-HODE, 9(S)-HODE, (+/-)13-HODE, 15(S)-hydroxyanandamide and anandamide on [Ca(2+) ](i) in HEK-293 cells stably expressing the rat or human recombinant TRPV1, or rat recombinant TRPV2, TRPA1 or TRPM8, and also the effect of 9(S)-HODE in rat dorsal root ganglion (DRG) neurons by calcium imaging.

Key results

Anandamide and 15(S)-hydroxyanandamide were the most potent endovanilloids at human TRPV1, whereas 9(S)-HODE was approximately threefold less efficacious and 75- and 3-fold less potent, respectively, and did not perform much better at rat TRPV1. The 9(R)-HODE and (+/-)13-HODE were almost inactive at TRPV1. Unlike anandamide and 15(S)-hydroxyanandamide, all HODEs were very weak at desensitizing TRPV1 to the action of capsaicin, but activated rat TRPV2 [only (+/-)13-HODE] and rat TRPA1, and antagonized rat TRPM8, at concentrations higher than those required to activate TRPV1. Finally, 9(S)-HODE elevated [Ca(2+) ](i) in DRG neurons almost exclusively in capsaicin-sensitive cells but only at concentrations between 25 and 100 M.

Conclusions and implications

The present data suggest that HODEs are less important endovanilloids than anandamide.

Linked articles

This article is part of a themed section on Cannabinoids. To view the other articles in this section visit http://dx.doi.org/10.1111/bph.2012.167.issue-8.","hji,kes",0,0,0,2,0,NA,NA +22915736,Plant B vitamin pathways and their compartmentation: a guide for the perplexed.,"The B vitamins and the cofactors derived from them are essential for life. B vitamin synthesis in plants is consequently as crucial to plants themselves as it is to humans and animals, whose B vitamin nutrition depends largely on plants. The synthesis and salvage pathways for the seven plant B vitamins are now broadly known, but certain enzymes and many transporters have yet to be identified, and the subcellular locations of various reactions are unclear. Although very substantial, what is not known about plant B vitamin pathways is regrettably difficult to discern from the literature or from biochemical pathway databases. Nor do databases accurately represent all that is known about B vitamin pathways-above all their compartmentation-because the facts are scattered throughout the literature, and thus hard to piece together. These problems (i) deter discoveries because newcomers to B vitamins cannot see which mysteries still need solving; and (ii) impede metabolic reconstruction and modelling of B vitamin pathways because genes for reactions or transport steps are missing. This review therefore takes a fresh approach to capture current knowledge of B vitamin pathways in plants. The synthesis pathways, key salvage routes, and their subcellular compartmentation are surveyed in depth, and encoded in the SEED database (http://pubseed.theseed.org/seedviewer.cgi?page=PlantGateway) for Arabidopsis and maize. The review itself and the encoded pathways specifically identify enigmatic or missing reactions, enzymes, and transporters. The SEED-encoded B vitamin pathway collection is a publicly available, expertly curated, one-stop resource for metabolic reconstruction and modeling.","hji,kes",0,0,0,2,0,NA,NA +22954625,Performance reproducibility index for classification.,"

Motivation

A common practice in biomarker discovery is to decide whether a large laboratory experiment should be carried out based on the results of a preliminary study on a small set of specimens. Consideration of the efficacy of this approach motivates the introduction of a probabilistic measure, for whether a classifier showing promising results in a small-sample preliminary study will perform similarly on a large independent sample. Given the error estimate from the preliminary study, if the probability of reproducible error is low, then there is really no purpose in substantially allocating more resources to a large follow-on study. Indeed, if the probability of the preliminary study providing likely reproducible results is small, then why even perform the preliminary study?

Results

This article introduces a reproducibility index for classification, measuring the probability that a sufficiently small error estimate on a small sample will motivate a large follow-on study. We provide a simulation study based on synthetic distribution models that possess known intrinsic classification difficulties and emulate real-world scenarios. We also set up similar simulations on four real datasets to show the consistency of results. The reproducibility indices for different distributional models, real datasets and classification schemes are empirically calculated. The effects of reporting and multiple-rule biases on the reproducibility index are also analyzed.

Availability

We have implemented in C code the synthetic data distribution model, classification rules, feature selection routine and error estimation methods. The source code is available at http://gsp.tamu.edu/Publications/supplementary/yousefi12a/.","hji,kes",0,0,0,2,0,NA,NA +22962342,GREVE: Genomic Recurrent Event ViEwer to assist the identification of patterns across individual cancer samples.,"

Summary

GREVE has been developed to assist with the identification of recurrent genomic aberrations across cancer samples. The exact characterization of such aberrations remains a challenge despite the availability of increasing amount of data, from SNParray to next-generation sequencing. Furthermore, genomic aberrations in cancer are especially difficult to handle because they are, by nature, unique to the patients. However, their recurrence in specific regions of the genome has been shown to reflect their relevance in the development of tumors. GREVE makes use of previously characterized events to identify such regions and focus any further analysis.

Availability

GREVE is available through a web interface and open-source application (http://www.well.ox.ac.uk/GREVE).","hji,kes",0,0,0,2,0,NA,NA +22998891,Patterns of medication initiation in newly diagnosed diabetes mellitus: quality and cost implications.,"Currently, 25 million Americans are known to have diabetes, with an additional 7 million cases believed to be undiagnosed. It is estimated that direct and indirect costs of diabetes top $200 billion. Due to the significant health and financial burdens associated with diabetes, it is imperative that this disease be treated quickly and aggressively. In 2009, the American Diabetes Association and the European Association for the Study of Diabetes developed a consensus statement regarding the treatment of type 2 diabetes, citing lifestyle modification and metformin as the preferred first line therapies. In this study, the authors looked at prescription claims data for adults who were newly initiated on oral hypoglycemic monotherapy between January 1, 2006, and December 31, 2008, to determine if initiation patterns changed over time, to evaluate how well the treatment guidelines were being followed, and to assess the economic consequences of prescribing patterns by drug class for both patients and insurers. The results showed that over the course of the study period the proportion of patients initially treated with metformin increased, whereas those receiving sulfonylureas as first-line therapy decreased. Thiazolidinediones experienced the greatest decrease, falling from 20% to 8%, while prescriptions for dipeptidyl peptidase-4 inhibitors increase from 0-7%. Over a 6-month period, patients taking metformin or sulfonylureas paid approximately $38 to $40 in co-pays while insurance paid about $77. Patients taking other agents paid approximately $130 in co-pays and insurance paid over $500. The authors concluded that based its cost and safety profile, metformin should be the first line drug therapy for patients with newly diagnosed type 2 diabetes. This CME multimedia activity, which is part of a 2-part multimedia activity on the management and treatment of diabetes, contains a video presentation and is available through the website of The American Journal of Medicine at http://amjmed.com/content/multimedia. Click on """"Patterns of Medication Initiation in Newly Diagnosed Diabetes Mellitus: Quality and Cost Implications"""" to access this part of the multimedia program.","hji,kes",0,0,0,2,0,NA,NA +23003214,Bulk superconductivity in bismuth oxysulfide Bi4O4S3.,"A very recent report on the observation of superconductivity in Bi(4)O(4)S(3) [Mizuguchi, Y.; http://arxiv.org/abs/1207.3145] could potentially reignite the search for superconductivity in a broad range of layered sulfides. We report here the synthesis of Bi(4)O(4)S(3) at 500 C by a vacuum encapsulation technique and its basic characterizations. The as-synthesized Bi(4)O(4)S(3) was contaminated with small amounts of Bi(2)S(3) and Bi impurities. The majority phase was found to be tetragonal (space group I4/mmm) with lattice parameters a = 3.9697(2) and c = 41.3520(1) . Both AC and DC magnetization measurements confirmed that Bi(4)O(4)S(3) is a bulk superconductor with a superconducting transition temperature (T(c)) of 4.4 K. Isothermal magnetization (M-H) measurements indicated closed loops with clear signatures of flux pinning and irreversible behavior. The lower critical field (H(c1)) at 2 K for the new superconductor was found to be ~15 Oe. Magnetotransport measurements showed a broadening of the resistivity () and a decrease in T(c) ( = 0) with increasing magnetic field. The extrapolated upper critical field H(c2)(0) was ~31 kOe with a corresponding Ginzburg-Landau coherence length of ~100 . In the normal state, the ~ T(2) dependence was not indicated. Hall resistivity data showed a nonlinear magnetic field dependence. Our magnetization and electrical transport measurements substantiate the appearance of bulk superconductivity in as-synthesized Bi(4)O(4)S(3). On the other hand, Bi heat-treated at the same temperature is not superconducting, thus excluding the possibility of impurity-driven superconductivity in the newly discovered superconductor Bi(4)O(4)S(3).","hji,kes",0,0,0,2,0,NA,NA +23023983,Adding unaligned sequences into an existing alignment using MAFFT and LAST.,"

Unlabelled

Two methods to add unaligned sequences into an existing multiple sequence alignment have been implemented as the '--add' and '--addfragments' options in the MAFFT package. The former option is a basic one and applicable only to full-length sequences, whereas the latter option is applicable even when the unaligned sequences are short and fragmentary. These methods internally infer the phylogenetic relationship among the sequences in the existing alignment and the phylogenetic positions of unaligned sequences. Benchmarks based on two independent simulations consistently suggest that the """"--addfragments"""" option outperforms recent methods, PaPaRa and PAGAN, in accuracy for difficult problems and that these three methods appropriately handle easy problems.

Availability

http://mafft.cbrc.jp/alignment/software/

Contact

katoh@ifrec.osaka-u.ac.jp

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +23023984,InterMine: a flexible data warehouse system for the integration and analysis of heterogeneous biological data.,"

Summary

InterMine is an open-source data warehouse system that facilitates the building of databases with complex data integration requirements and a need for a fast customizable query facility. Using InterMine, large biological databases can be created from a range of heterogeneous data sources, and the extensible data model allows for easy integration of new data types. The analysis tools include a flexible query builder, genomic region search and a library of 'widgets' performing various statistical analyses. The results can be exported in many commonly used formats. InterMine is a fully extensible framework where developers can add new tools and functionality. Additionally, there is a comprehensive set of web services, for which client libraries are provided in five commonly used programming languages.

Availability

Freely available from http://www.intermine.org under the LGPL license.

Contact

g.micklem@gen.cam.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,for creating databases… +23028969,Transcriptome tomography for brain analysis in the web-accessible anatomical space.,"Increased information on the encoded mammalian genome is expected to facilitate an integrated understanding of complex anatomical structure and function based on the knowledge of gene products. Determination of gene expression-anatomy associations is crucial for this understanding. To elicit the association in the three-dimensional (3D) space, we introduce a novel technique for comprehensive mapping of endogenous gene expression into a web-accessible standard space: Transcriptome Tomography. The technique is based on conjugation of sequential tissue-block sectioning, all fractions of which are used for molecular measurements of gene expression densities, and the block- face imaging, which are used for 3D reconstruction of the fractions. To generate a 3D map, tissues are serially sectioned in each of three orthogonal planes and the expression density data are mapped using a tomographic technique. This rapid and unbiased mapping technique using a relatively small number of original data points allows researchers to create their own expression maps in the broad anatomical context of the space. In the first instance we generated a dataset of 36,000 maps, reconstructed from data of 61 fractions measured with microarray, covering the whole mouse brain (ViBrism: http://vibrism.riken.jp/3dviewer/ex/index.html) in one month. After computational estimation of the mapping accuracy we validated the dataset against existing data with respect to the expression location and density. To demonstrate the relevance of the framework, we showed disease related expression of Huntington's disease gene and Bdnf. Our tomographic approach is applicable to analysis of any biological molecules derived from frozen tissues, organs and whole embryos, and the maps are spatially isotropic and well suited to the analysis in the standard space (e.g. Waxholm Space for brain-atlas databases). This will facilitate research creating and using open-standards for a molecular-based understanding of complex structures; and will contribute to new insights into a broad range of biological and medical questions.","hji,kes",0,0,0,2,0,NA,NA +23088694,Timing is everything: when to consult palliative care.,"

Purpose

Consults promote additional perspectives and help with complex patient management. As the population ages and healthcare demands increase, providers are consulting palliative care (PC). Nurse practitioners (NPs) should understand when to consult PC.

Data sources

Information was obtained from an extensive search of the scientific literature to include Pallimed (http://www.pallimed.org/) and the author's clinical experience.

Conclusions

Based on the 2009 Clinical Practice Guidelines for Quality PC developed from the Hospice and PC Coalition, PC should be consulted at diagnosis. These findings have also been validated in a landmark randomized controlled trial by Temel et al. (2010). The goals of PC are to alleviate suffering and promote quality of life for people with illnesses. PC accepts and incorporates hospice philosophies, but is distinct.

Implications for practice

Many professional organizations are incorporating PC into their specialties and guidelines. In addition to incorporating PC at diagnosis, PC access needs to be improved. New and experienced NPs may appreciate collaborating with PC specialists. Such conversations and relationships will likely offer practical and supportive guidance to both patients and NPs. Overall, the future for PC is promising.","hji,kes",0,0,0,2,0,NA,clinical +23118487,CellLineNavigator: a workbench for cancer cell line analysis.,"The CellLineNavigator database, freely available at http://www.medicalgenomics.org/celllinenavigator, is a web-based workbench for large scale comparisons of a large collection of diverse cell lines. It aims to support experimental design in the fields of genomics, systems biology and translational biomedical research. Currently, this compendium holds genome wide expression profiles of 317 different cancer cell lines, categorized into 57 different pathological states and 28 individual tissues. To enlarge the scope of CellLineNavigator, the database was furthermore closely linked to commonly used bioinformatics databases and knowledge repositories. To ensure easy data access and search ability, a simple data and an intuitive querying interface were implemented. It allows the user to explore and filter gene expression, focusing on pathological or physiological conditions. For a more complex search, the advanced query interface may be used to query for (i) differentially expressed genes; (ii) pathological or physiological conditions; or (iii) gene names or functional attributes, such as Kyoto Encyclopaedia of Genes and Genomes pathway maps. These queries may also be combined. Finally, CellLineNavigator allows additional advanced analysis of differentially regulated genes by a direct link to the Database for Annotation, Visualization and Integrated Discovery (DAVID) Bioinformatics Resources.","hji,kes",0,0,0,2,0,NA,NA +23140436,A possible strategy against head and neck cancer: in silico investigation of three-in-one inhibitors.,"Overexpression of epidermal growth factor receptor (EGFR), Her2, and uroporphyrinogen decarboxylase (UROD) occurs in a variety of malignant tumor tissues. UROD has potential to modulate tumor response of radiotherapy for head and neck cancer, and EGFR and Her2 are common drug targets for the treatment of head and neck cancer. This study attempts to find a possible lead compound backbone from TCM Database@Taiwan ( http://tcm.cmu.edu.tw/ ) for EGFR, Her2, and UROD proteins against head and neck cancer using computational techniques. Possible traditional Chinese medicine (TCM) lead compounds had potential binding affinities with EGFR, Her2, and UROD proteins. The candidates formed stable interactions with residues Arg803, Thr854 in EGFR, residues Thr862, Asp863 in Her2 protein, and residues Arg37, Arg41 in UROD protein, which are key residues in the binding or catalytic domain of EGFR, Her2, and UROD proteins. Thus, the TCM candidates indicated a possible molecule backbone for evolving potential inhibitors for three drug target proteins against head and neck cancer.","hji,kes",0,0,0,2,0,NA,NA +23153116,Unipept: tryptic peptide-based biodiversity analysis of metaproteome samples.,"The Unipept web application (http://unipept.ugent.be) supports biodiversity analysis of large and complex metaproteome samples using tryptic peptide information obtained from shotgun MS/MS experiments. Its underlying index structure is designed to quickly retrieve all occurrences of a tryptic peptide in UniProtKB records. Taxon-specificity of the tryptic peptide is successively derived from these occurrences using a novel lowest common ancestor approach that is robust against taxonomic misarrangements, misidentifications, and inaccuracies. Not taking into account this identification noise would otherwise result in drastic loss of information. Dynamic treemaps visualize the biodiversity of metaproteome samples, which eases the exploration of samples with highly complex compositions. The potential of Unipept to gain novel insights into the biodiversity of a sample is evaluated by reanalyzing publicly available metaproteome data sets taken from the bacterial phyllosphere and the human gut.","hji,kes",0,0,0,2,0,NA,NA +23153250,"MTMDAT-HADDOCK: high-throughput, protein complex structure modeling based on limited proteolysis and mass spectrometry.","

Background

MTMDAT is a program designed to facilitate analysis of mass spectrometry data of proteins and biomolecular complexes that are probed structurally by limited proteolysis. This approach can provide information about stable fragments of multidomain proteins, yield tertiary and quaternary structure data, and help determine the origin of stability changes at the amino acid residue level. Here, we introduce a pipeline between MTMDAT and HADDOCK, that facilitates protein-protein complex structure probing in a high-throughput and highly automated fashion.

Results

A new feature of MTMDAT allows for the direct identification of residues that are involved in complex formation by comparing the mass spectra of bound and unbound proteins after proteolysis. If 3D structures of the unbound components are available, this data can be used to define restraints for data-driven docking to calculate a model of the complex. We describe here a new implementation of MTMDAT, which includes a pipeline to the data-driven docking program HADDOCK, thus streamlining the entire procedure. This addition, together with usability improvements in MTMDAT, enables high-throughput modeling of protein complexes from mass spectrometry data. The algorithm has been validated by using the protein-protein interaction between the ubiquitin-binding domain of proteasome component Rpn13 and ubiquitin. The resulting structural model, based on restraints extracted by MTMDAT from limited proteolysis and modeled by HADDOCK, was compared to the published NMR structure, which relied on twelve unambiguous intermolecular NOE interactions. The MTMDAT-HADDOCK structure was of similar quality to structures generated using only chemical shift perturbation data derived by NMR titration experiments.

Conclusions

The new MTMDAT-HADDOCK pipeline enables direct high-throughput modeling of protein complexes from mass spectrometry data. MTMDAT-HADDOCK can be downloaded from http://www.ifm.liu.se/chemistry/molbiotech/maria_sunnerhagens_group/mtmdat/together with the manual and example files. The program is free for academic/non-commercial purposes.","hji,kes",0,0,0,2,0,NA,NA +23181585,A Monte Carlo-based framework enhances the discovery and interpretation of regulatory sequence motifs.,"

Background

Discovery of functionally significant short, statistically overrepresented subsequence patterns (motifs) in a set of sequences is a challenging problem in bioinformatics. Oftentimes, not all sequences in the set contain a motif. These non-motif-containing sequences complicate the algorithmic discovery of motifs. Filtering the non-motif-containing sequences from the larger set of sequences while simultaneously determining the identity of the motif is, therefore, desirable and a non-trivial problem in motif discovery research.

Results

We describe MotifCatcher, a framework that extends the sensitivity of existing motif-finding tools by employing random sampling to effectively remove non-motif-containing sequences from the motif search. We developed two implementations of our algorithm; each built around a commonly used motif-finding tool, and applied our algorithm to three diverse chromatin immunoprecipitation (ChIP) data sets. In each case, the motif finder with the MotifCatcher extension demonstrated improved sensitivity over the motif finder alone. Our approach organizes candidate functionally significant discovered motifs into a tree, which allowed us to make additional insights. In all cases, we were able to support our findings with experimental work from the literature.

Conclusions

Our framework demonstrates that additional processing at the sequence entry level can significantly improve the performance of existing motif-finding tools. For each biological data set tested, we were able to propose novel biological hypotheses supported by experimental work from the literature. Specifically, in Escherichia coli, we suggested binding site motifs for 6 non-traditional LexA protein binding sites; in Saccharomyces cerevisiae, we hypothesize 2 disparate mechanisms for novel binding sites of the Cse4p protein; and in Halobacterium sp. NRC-1, we discoverd subtle differences in a general transcription factor (GTF) binding site motif across several data sets. We suggest that small differences in our discovered motif could confer specificity for one or more homologous GTF proteins. We offer a free implementation of the MotifCatcher software package at http://www.bme.ucdavis.edu/facciotti/resources_data/software/.","hji,kes",0,0,0,2,0,NA,NA +23192052,Hierarchical shrinkage priors and model fitting for high-dimensional generalized linear models.,"Abstract Genetic and other scientific studies routinely generate very many predictor variables, which can be naturally grouped, with predictors in the same groups being highly correlated. It is desirable to incorporate the hierarchical structure of the predictor variables into generalized linear models for simultaneous variable selection and coefficient estimation. We propose two prior distributions: hierarchical Cauchy and double-exponential distributions, on coefficients in generalized linear models. The hierarchical priors include both variable-specific and group-specific tuning parameters, thereby not only adopting different shrinkage for different coefficients and different groups but also providing a way to pool the information within groups. We fit generalized linear models with the proposed hierarchical priors by incorporating flexible expectation-maximization (EM) algorithms into the standard iteratively weighted least squares as implemented in the general statistical package R. The methods are illustrated with data from an experiment to identify genetic polymorphisms for survival of mice following infection with Listeria monocytogenes. The performance of the proposed procedures is further assessed via simulation studies. The methods are implemented in a freely available R package BhGLM (http://www.ssg.uab.edu/bhglm/).","hji,kes",0,0,0,2,0,NA,NA +23217202,Protein Nano-Object Integrator (ProNOI) for generating atomic style objects for molecular modeling.,"

Background

With the progress of nanotechnology, one frequently has to model biological macromolecules simultaneously with nano-objects. However, the atomic structures of the nano objects are typically not available or they are solid state entities. Because of that, the researchers have to investigate such nano systems by generating models of the nano objects in a manner that the existing software be able to carry the simulations. In addition, it should allow generating composite objects with complex shape by combining basic geometrical figures and embedding biological macromolecules within the system.

Results

Here we report the Protein Nano-Object Integrator (ProNOI) which allows for generating atomic-style geometrical objects with user desired shape and dimensions. Unlimited number of objects can be created and combined with biological macromolecules in Protein Data Bank (PDB) format file. Once the objects are generated, the users can use sliders to manipulate their shape, dimension and absolute position. In addition, the software offers the option to charge the objects with either specified surface or volumetric charge density and to model them with user-desired dielectric constants. According to the user preference, the biological macromolecule atoms can be assigned charges and radii according to four different force fields: Amber, Charmm, OPLS and PARSE. The biological macromolecules and the atomic-style objects are exported as a position, charge and radius (PQR) file, or if a default dielectric constant distribution is not selected, it is exported as a position, charge, radius and epsilon (PQRE) file. As illustration of the capabilities of the ProNOI, we created a composite object in a shape of a robot, aptly named the Clemson Robot, whose parts are charged with various volumetric charge densities and holds the barnase-barstar protein complex in its hand.

Conclusions

The Protein Nano-Object Integrator (ProNOI) is a convenient tool for generating atomic-style nano shapes in conjunction with biological macromolecule(s). Charges and radii on the macromolecule atoms and the atoms in the shapes are assigned according to the user's preferences allowing various scenarios of modeling. The default output file is in PQR (PQRE) format which is readable by almost any software available in biophysical field. It can be downloaded from: http://compbio.clemson.edu/downloadDir/ProNO_integrator.tar.gz.","hji,kes",0,0,0,2,0,NA,NA +23244467,"COEUS: """"semantic web in a box"""" for biomedical applications.","

Unlabelled

Background

As the """"omics"""" revolution unfolds, the growth in data quantity and diversity is bringing about the need for pioneering bioinformatics software, capable of significantly improving the research workflow. To cope with these computer science demands, biomedical software engineers are adopting emerging semantic web technologies that better suit the life sciences domain. The latter's complex relationships are easily mapped into semantic web graphs, enabling a superior understanding of collected knowledge. Despite increased awareness of semantic web technologies in bioinformatics, their use is still limited.

Results

COEUS is a new semantic web framework, aiming at a streamlined application development cycle and following a """"semantic web in a box"""" approach. The framework provides a single package including advanced data integration and triplification tools, base ontologies, a web-oriented engine and a flexible exploration API. Resources can be integrated from heterogeneous sources, including CSV and XML files or SQL and SPARQL query results, and mapped directly to one or more ontologies. Advanced interoperability features include REST services, a SPARQL endpoint and LinkedData publication. These enable the creation of multiple applications for web, desktop or mobile environments, and empower a new knowledge federation layer.

Conclusions

The platform, targeted at biomedical application developers, provides a complete skeleton ready for rapid application deployment, enhancing the creation of new semantic information systems. COEUS is available as open source at http://bioinformatics.ua.pt/coeus/.","hji,kes",0,0,0,2,0,NA,NA +23271269,Inference of gene regulatory networks from genome-wide knockout fitness data.,"

Motivation

Genome-wide fitness is an emerging type of high-throughput biological data generated for individual organisms by creating libraries of knockouts, subjecting them to broad ranges of environmental conditions, and measuring the resulting clone-specific fitnesses. Since fitness is an organism-scale measure of gene regulatory network behaviour, it may offer certain advantages when insights into such phenotypical and functional features are of primary interest over individual gene expression. Previous works have shown that genome-wide fitness data can be used to uncover novel gene regulatory interactions, when compared with results of more conventional gene expression analysis. Yet, to date, few algorithms have been proposed for systematically using genome-wide mutant fitness data for gene regulatory network inference.

Results

In this article, we describe a model and propose an inference algorithm for using fitness data from knockout libraries to identify underlying gene regulatory networks. Unlike most prior methods, the presented approach captures not only structural, but also dynamical and non-linear nature of biomolecular systems involved. A state-space model with non-linear basis is used for dynamically describing gene regulatory networks. Network structure is then elucidated by estimating unknown model parameters. Unscented Kalman filter is used to cope with the non-linearities introduced in the model, which also enables the algorithm to run in on-line mode for practical use. Here, we demonstrate that the algorithm provides satisfying results for both synthetic data as well as empirical measurements of GAL network in yeast Saccharomyces cerevisiae and TyrR-LiuR network in bacteria Shewanella oneidensis.

Availability

MATLAB code and datasets are available to download at http://www.duke.edu/~lw174/Fitness.zip and http://genomics.lbl.gov/supplemental/fitness-bioinf/","hji,kes",0,0,0,2,0,NA,zip file +23281802,BM-Map: an efficient software package for accurately allocating multireads of RNA-sequencing data.,"

Background

RNA sequencing (RNA-seq) has become a major tool for biomedical research. A key step in analyzing RNA-seq data is to infer the origin of short reads in the source genome, and for this purpose, many read alignment/mapping software programs have been developed. Usually, the majority of mappable reads can be mapped to one unambiguous genomic location, and these reads are called unique reads. However, a considerable proportion of mappable reads can be aligned to more than one genomic location with the same or similar fidelities, and they are called """"multireads"""". Allocating these multireads is challenging but critical for interpreting RNA-seq data. We recently developed a Bayesian stochastic model that allocates multireads more accurately than alternative methods (Ji et al. Biometrics 2011).

Results

In order to serve a greater biological community, we have implemented this method in a stand-alone, efficient, and user-friendly software package, BM-Map. BM-Map takes SAM (Sequence Alignment/Map), the most popular read alignment format, as the standard input; then based on the Bayesian model, it calculates mapping probabilities of multireads for competing genomic loci; and BM-Map generates the output by adding mapping probabilities to the original SAM file so that users can easily perform downstream analyses. The program is available in three common operating systems, Linux, Mac and PC. Moreover, we have built a dedicated website, http://bioinformatics.mdanderson.org/main/BM-Map, which includes free downloads, detailed tutorials and illustration examples.

Conclusions

We have developed a stand-alone, efficient, and user-friendly software package for accurately allocating multireads, which is an important addition to our previous methodology paper. We believe that this bioinformatics tool will greatly help RNA-seq and related applications reach their full potential in life science research.","hji,kes",0,0,0,2,0,NA,NA +23303509,ALE: a generic assembly likelihood evaluation framework for assessing the accuracy of genome and metagenome assemblies.,"

Motivation

Researchers need general purpose methods for objectively evaluating the accuracy of single and metagenome assemblies and for automatically detecting any errors they may contain. Current methods do not fully meet this need because they require a reference, only consider one of the many aspects of assembly quality or lack statistical justification, and none are designed to evaluate metagenome assemblies.

Results

In this article, we present an Assembly Likelihood Evaluation (ALE) framework that overcomes these limitations, systematically evaluating the accuracy of an assembly in a reference-independent manner using rigorous statistical methods. This framework is comprehensive, and integrates read quality, mate pair orientation and insert length (for paired-end reads), sequencing coverage, read alignment and k-mer frequency. ALE pinpoints synthetic errors in both single and metagenomic assemblies, including single-base errors, insertions/deletions, genome rearrangements and chimeric assemblies presented in metagenomes. At the genome level with real-world data, ALE identifies three large misassemblies from the Spirochaeta smaragdinae finished genome, which were all independently validated by Pacific Biosciences sequencing. At the single-base level with Illumina data, ALE recovers 215 of 222 (97%) single nucleotide variants in a training set from a GC-rich Rhodobacter sphaeroides genome. Using real Pacific Biosciences data, ALE identifies 12 of 12 synthetic errors in a Lambda Phage genome, surpassing even Pacific Biosciences' own variant caller, EviCons. In summary, the ALE framework provides a comprehensive, reference-independent and statistically rigorous measure of single genome and metagenome assembly accuracy, which can be used to identify misassemblies or to optimize the assembly process.

Availability

ALE is released as open source software under the UoI/NCSA license at http://www.alescore.org. It is implemented in C and Python.","hji,kes",0,0,0,2,0,NA,NA +23334680,Getting the word out about treating borderline personality disorder: an online information resource.,"This column reviews the need for greater awareness among mental health clinicians regarding evidence-based treatments for borderline personality disorder (BPD) and describes an online resource that has been developed to partially address this need. This resource is the Borderline Personality Disorder Clinician Resource Centre, which can be accessed at http://www.treatingBPD.ca.","hji,kes",0,0,0,2,0,NA,clinical +23337681,Medical mentoring via the evolving world wide web.,"

Objectives

Mentoring, for physicians and surgeons in training, is advocated as an essential adjunct in work-based learning, providing support in career and non-career related issues. The World Wide Web (WWW) has evolved, as a technology, to become more interactive and person centric, tailoring itself to the individual needs of the user. This changing technology may open new avenues to foster mentoring in medicine. DESIGN, SYSTEMATIC REVIEW, MAIN OUTCOME MEASURES: A search of the MEDLINE database from 1950 to 2012 using the PubMed interface, combined with manual cross-referencing was performed using the following strategy: (""""mentors""""[MeSH Terms] OR """"mentors""""[All Fields] OR """"mentor""""[All Fields]) AND (""""internet""""[MeSH Terms] OR """"internet""""[All Fields]) AND (""""medicine""""[MeSH Terms] OR """"medicine""""[All Fields]) AND (""""humans""""[MeSH Terms] AND English[lang]). Abstracts were screened for relevance (UJ) to the topic; eligibility for inclusion was simply on screening for relevance to online mentoring and web-based technologies.

Results

Forty-five papers were found, of which 16 were relevant. All studies were observational in nature. To date, all medical mentoring applications utilizing the World Wide Web have enjoyed some success limited by Web 1.0 and 2.0 technologies.

Conclusions

With the evolution of the WWW through 1.0, 2.0 and 3.0 generations, the potential for meaningful tele- and distance mentoring has greatly improved. Some engagement has been made with these technological advancements, however further work is required to fully realize the potential of these technologies.","hji,kes",0,0,0,2,0,NA,NA +23342084,Early growth response 3 (Egr3) is highly over-expressed in non-relapsing prostate cancer but not in relapsing prostate cancer.,"Members of the early growth response (EGR) family of transcription factors play diverse functions in response to many cellular stimuli, including growth, stress, and inflammation. Egr3 has gone relatively unstudied, but here through use of the SPECS (Strategic Partners for the Evaluation of Predictive Signatures of Prostate Cancer) Affymetrix whole genome gene expression database we report that Egr3 mRNA is significantly over-expressed in prostate cancer compared to normal prostate tissue (5-fold). The Human Protein Atlas (http://www.proteinatlas.org), a database of tissue microarrays labeled with antibodies against over 11,000 human proteins, was utilized to quantify Egr3 protein expression in normal prostate and prostate cancer patients. In agreement with the SPECS data, we found that Egr3 protein is significantly increased in prostate cancer. The SPECS database has the benefit of extensive clinical follow up for the prostate cancer patients. Analysis of Egr3 mRNA expression in relation to the relapse status reveals that Egr3 mRNA expression is increased in tumor cells of non-relapsed samples (n = 63) compared to normal prostate cells, but is significantly lower in relapsed samples (n = 38) compared to non-relapse. The observations were confirmed using an independent data set. A list of genes correlating with this unique expression pattern was determined. These Egr3-correlated genes were enriched with Egr binding sites in their promoters. The gene list contains inflammatory genes such as IL-6, IL-8, IL1 and COX-2, which have extensive connections to prostate cancer.","hji,kes",0,0,0,2,0,NA,NA +23362108,Epigenetic regulation of the X-linked tumour suppressors BEX1 and LDOC1 in oral squamous cell carcinoma.,"The strong associations between oral squamous cell carcinoma (OSCC) and dietary habits such as alcohol consumption (A), betel quid chewing (B) and cigarette smoking (C) and its predominance in men have been well documented; however, systemic analysis of OSCC is limited. Our study applied high-throughput screening methods to identify causative epigenetic targets in a cohort of men with ABC-associated OSCC. We identified BEX1 and LDOC1 as two epigenetically silenced X-linked tumour suppressors and demonstrated a functional link between the transcription of BEX1 and LDOC1 and promoter hypermethylation. Methylation of the BEX1 and LDOC1 promoters was associated significantly (p < 0.0001) with OSCC and were detected in 75% (42/56) and 89% (50/56) of the samples, respectively. We observed concordant increases in the methylation of both genes in 71% (40/56) of the tumours, and potent in vitro and in vivo growth inhibitory effects in OSCC cells ectopically expressing BEX1 and/or LDOC1. Restored expression of BEX1 and LDOC1 suppressed the nuclear factor-B (NF-B) signalling pathway, which is the most frequently hyperactivated signalling pathway in OSCC. This suppression might result from decreased p50 and p65 expression. These findings suggest that silencing of BEX1 and LDOC1 by promoter hypermethylation might represent a critical event in the molecular pathogenesis of OSCC and account for the oncogenic effects of ABC exposure and the male predominance of OSCC occurrence. Microarray data are available in the Gene Expression Omnibus (GEO; http://www.ncbi.nlm.nih.gov/geo/)","hji,kes",0,0,0,2,0,NA,data deposited as referenced +23368677,Identifying cross-category relations in gene ontology and constructing genome-specific term association networks.,"

Background

Gene Ontology (GO) has been widely used in biological databases, annotation projects, and computational analyses. Although the three GO categories are structured as independent ontologies, the biological relationships across the categories are not negligible for biological reasoning and knowledge integration. However, the existing cross-category ontology term similarity measures are either developed by utilizing the GO data only or based on manually curated term name similarities, ignoring the fact that GO is evolving quickly and the gene annotations are far from complete.

Results

In this paper we introduce a new cross-category similarity measurement called CroGO by incorporating genome-specific gene co-function network data. The performance study showed that our measurement outperforms the existing algorithms. We also generated genome-specific term association networks for yeast and human. An enrichment based test showed our networks are better than those generated by the other measures.

Conclusions

The genome-specific term association networks constructed using CroGO provided a platform to enable a more consistent use of GO. In the networks, the frequently occurred MF-centered hub indicates that a molecular function may be shared by different genes in multiple biological processes, or a set of genes with the same functions may participate in distinct biological processes. And common subgraphs in multiple organisms also revealed conserved GO term relationships. Software and data are available online at http://www.msu.edu/~jinchen/CroGO.","hji,kes",0,0,0,2,0,NA,iffy +23375235,Learning the local Bayesian network structure around the ZNF217 oncogene in breast tumours.,"In this study, we discuss and apply a novel and efficient algorithm for learning a local Bayesian network model in the vicinity of the ZNF217 oncogene from breast cancer microarray data without having to decide in advance which genes have to be included in the learning process. ZNF217 is a candidate oncogene located at 20q13, a chromosomal region frequently amplified in breast and ovarian cancer, and correlated with shorter patient survival in these cancers. To properly address the difficulties in managing complex gene interactions given our limited sample, statistical significance of edge strengths was evaluated using bootstrapping and the less reliable edges were pruned to increase the network robustness. We found that 13 out of the 35 genes associated with deregulated ZNF217 expression in breast tumours have been previously associated with survival and/or prognosis in cancers. Identifying genes involved in lipid metabolism opens new fields of investigation to decipher the molecular mechanisms driven by the ZNF217 oncogene. Moreover, nine of the 13 genes have already been identified as putative ZNF217 targets by independent biological studies. We therefore suggest that the algorithms for inferring local BNs are valuable data mining tools for unraveling complex mechanisms of biological pathways from expression data. The source code is available at http://www710.univ-lyon1.fr/~aaussem/Software.html.","hji,kes",0,0,0,2,0,NA,NA +23377977,Discovery of microRNA regulatory networks by integrating multidimensional high-throughput data.,"MicroRNAs (miRNAs) are endogenous non-coding RNAs (ncRNAs) of approximately 22 nt that regulate the expression of a large fraction of genes by targeting messenger RNAs (mRNAs). However, determining the biologically significant targets of miRNAs is an ongoing challenge. In this chapter, we describe how to identify miRNA-target interactions and miRNA regulatory networks from high-throughput deep sequencing, CLIP-Seq (HITS-CLIP, PAR-CLIP) and degradome sequencing data using starBase platforms. In starBase, several web-based and stand-alone computational tools were developed to discover Argonaute (Ago) binding and cleavage sites, miRNA-target interactions, perform enrichment analysis of miRNA target genes in Gene Ontology (GO) categories and biological pathways, and identify combinatorial effects between Ago and other RNA-binding proteins (RBPs). Investigating target pathways of miRNAs in human CLIP-Seq data, we found that many cancer-associated miRNAs modulate cancer pathways. Performing an enrichment analysis of genes targeted by highly expressed miRNAs in the mouse brain showed that many miRNAs are involved in cancer-associated MAPK signaling and glioma pathways, as well as neuron-associated neurotrophin signaling and axon guidance pathways. Moreover, thousands of combinatorial binding sites between Ago and RBPs were identified from CLIP-Seq data suggesting RBPs and miRNAs coordinately regulate mRNA transcripts. As a means of comprehensively integrating CLIP-Seq and Degradome-Seq data, the starBase platform is expected to identify clinically relevant miRNA-target regulatory relationships, and reveal multi-dimensional post-transcriptional regulatory networks involving miRNAs and RBPs. starBase is available at http://starbase.sysu.edu.cn/ .","hji,kes",0,0,0,2,0,NA,NA +23394478,Multiple consensus trees: a method to separate divergent genes.,"

Background

It is generally admitted that the species tree cannot be inferred from the genetic sequences of a single gene because the evolution of different genes, and thus the gene tree topologies, may vary substantially. Gene trees can differ, for example, because of horizontal transfer events or because some of them correspond to paralogous instead of orthologous sequences. A variety of methods has been proposed to tackle the problem of the reconciliation of gene trees in order to reconstruct a species tree. When the taxa in all the trees are identical, the problem can be stated as a consensus tree problem.

Results

In this paper we define a new method for deciding whether a unique consensus tree or multiple consensus trees can best represent a set of given phylogenetic trees. If the given trees are all congruent, they should be compatible into a single consensus tree. Otherwise, several consensus trees corresponding to divergent genetic patterns can be identified. We introduce a method optimizing the generalized score, over a set of tree partitions in order to decide whether the given set of gene trees is homogeneous or not.

Conclusions

The proposed method has been validated with simulated data (random trees organized in three topological groups) as well as with real data (bootstrap trees, homogeneous set of trees, and a set of non homogeneous gene trees of 30 E. Coli strains; it is worth noting that some of the latter genes underwent horizontal gene transfers). A computer program, MCT - Multiple Consensus Trees, written in C was made freely available for the research community (it can be downloaded from http://bioinformatics.lif.univ-mrs.fr/consensus/index.html). It handles trees in a standard Newick format, builds three hierarchies corresponding to RF and QS similarities between trees and the greedy ascending algorithm. The generalized score values of all tree partitions are computed.","hji,kes",0,0,0,2,0,NA,NA +23413437,GalaxyGemini: a web server for protein homo-oligomer structure prediction based on similarity.,"

Summary

A large number of proteins function as homo-oligomers; therefore, predicting homo-oligomeric structure of proteins is of primary importance for understanding protein function at the molecular level. Here, we introduce a web server for prediction of protein homo-oligomer structure. The server takes a protein monomer structure as input and predicts its homo-oligomer structure from oligomer templates selected based on sequence and tertiary/quaternary structure similarity. Using protein model structures as input, the server shows clear improvement over the best methods of CASP9 in predicting oligomeric structures from amino acid sequences.

Availability

http://galaxy.seoklab.org/gemini.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +23419168,CD44/CD24 immunophenotypes on clinicopathologic features of salivary glands malignant neoplasms.,"

Background

Salivary glands malignant neoplasms (SGMNs) account for 3-6% of head and neck cancers and 0.3% of all cancers. Tumor cells that express CD44 and CD24 exhibit a stem-cell-like behavior. CD44 is the binding site for hyaluronic acid, and CD24 is a receptor that interacts with P-selectin to induce metastasis and tumor progression. The present study aims to evaluate the expression of CD44 and CD24 on SGMNs and correlated these data with several clinicopathologic features.

Methods

Immunohistochemical stains for CD44 and CD24 were performed on tissue microarrays containing SGMN samples from 69 patients. The CD44, CD24 and CD44/CD24 expression phenotypes were correlated to patient clinicopathologic features and outcome.

Results

CD44 expression was associated with the primary site of neoplasm (p = 0.046). CD24 was associated with clinical stage III/IV (p = 0.008), T stage (p = 0,27) and lymph node (p = 0,001). The CD44/CD24 profiles were associated with the primary site of injury (p = 0.005), lymph node (p = 0.011) and T stage (p = 0.023). Univariate analysis showed a significant relationship between clinical staging and disease- free survival (p = 0.009), and the overall survival presents relation with male gender (p = 0.011) and metastasis (p = 0.027).

Conclusion

In summary, our investigation confirms that the clinical stage, in accordance with the literature, is the main prognostic factor for SGMN. Additionally, we have presented some evidence that the analysis of isolated CD44 and CD24 immunoexpression or the two combined markers could give prognostic information associated to clinicopathologic features in SGMN.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1284611098470676.","hji,kes",0,0,0,2,0,NA,NA +23493323,BRANCH: boosting RNA-Seq assemblies with partial or related genomic sequences.,"

Motivation

De novo transcriptome assemblies of RNA-Seq data are important for genomics applications of unsequenced organisms. Owing to the complexity and often incomplete representation of transcripts in sequencing libraries, the assembly of high-quality transcriptomes can be challenging. However, with the rapidly growing number of sequenced genomes, it is now feasible to improve RNA-Seq assemblies by guiding them with genomic sequences.

Results

This study introduces BRANCH, an algorithm designed for improving de novo transcriptome assemblies by using genomic information that can be partial or complete genome sequences from the same or a related organism. Its input includes assembled RNA reads (transfrags), genomic sequences (e.g. contigs) and the RNA reads themselves. It uses a customized version of BLAT to align the transfrags and RNA reads to the genomic sequences. After identifying exons from the alignments, it defines a directed acyclic graph and maps the transfrags to paths on the graph. It then joins and extends the transfrags by applying an algorithm that solves a combinatorial optimization problem, called the Minimum weight Minimum Path Cover with given Paths. In performance tests on real data from Caenorhabditis elegans and Saccharomyces cerevisiae, assisted by genomic contigs from the same species, BRANCH improved the sensitivity and precision of transfrags generated by Velvet/Oases or Trinity by 5.1-56.7% and 0.3-10.5%, respectively. These improvements added 3.8-74.1% complete transcripts and 8.3-3.8% proteins to the initial assembly. Similar improvements were achieved when guiding the BRANCH processing of a transcriptome assembly from a more complex organism (mouse) with genomic sequences from a related species (rat).

Availability

The BRANCH software can be downloaded for free from this site: http://manuals.bioinformatics.ucr.edu/home/branch.

Contact

thomas.girke@ucr.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +23504016,Genomic reconstruction of the transcriptional regulatory network in Bacillus subtilis.,"The adaptation of microorganisms to their environment is controlled by complex transcriptional regulatory networks (TRNs), which are still only partially understood even for model species. Genome scale annotation of regulatory features of genes and TRN reconstruction are challenging tasks of microbial genomics. We used the knowledge-driven comparative-genomics approach implemented in the RegPredict Web server to infer TRN in the model Gram-positive bacterium Bacillus subtilis and 10 related Bacillales species. For transcription factor (TF) regulons, we combined the available information from the DBTBS database and the literature with bioinformatics tools, allowing inference of TF binding sites (TFBSs), comparative analysis of the genomic context of predicted TFBSs, functional assignment of target genes, and effector prediction. For RNA regulons, we used known RNA regulatory motifs collected in the Rfam database to scan genomes and analyze the genomic context of new RNA sites. The inferred TRN in B. subtilis comprises regulons for 129 TFs and 24 regulatory RNA families. First, we analyzed 66 TF regulons with previously known TFBSs in B. subtilis and projected them to other Bacillales genomes, resulting in refinement of TFBS motifs and identification of novel regulon members. Second, we inferred motifs and described regulons for 28 experimentally studied TFs with previously unknown TFBSs. Third, we discovered novel motifs and reconstructed regulons for 36 previously uncharacterized TFs. The inferred collection of regulons is available in the RegPrecise database (http://regprecise.lbl.gov/) and can be used in genetic experiments, metabolic modeling, and evolutionary analysis.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +23504705,Accurate prediction of hot spot residues through physicochemical characteristics of amino acid sequences.,"Hot spot residues of proteins are fundamental interface residues that help proteins perform their functions. Detecting hot spots by experimental methods is costly and time-consuming. Sequential and structural information has been widely used in the computational prediction of hot spots. However, structural information is not always available. In this article, we investigated the problem of identifying hot spots using only physicochemical characteristics extracted from amino acid sequences. We first extracted 132 relatively independent physicochemical features from a set of the 544 properties in AAindex1, an amino acid index database. Each feature was utilized to train a classification model with a novel encoding schema for hot spot prediction by the IBk algorithm, an extension of the K-nearest neighbor algorithm. The combinations of the individual classifiers were explored and the classifiers that appeared frequently in the top performing combinations were selected. The hot spot predictor was built based on an ensemble of these classifiers and to work in a voting manner. Experimental results demonstrated that our method effectively exploited the feature space and allowed flexible weights of features for different queries. On the commonly used hot spot benchmark sets, our method significantly outperformed other machine learning algorithms and state-of-the-art hot spot predictors. The program is available at http://sfb.kaust.edu.sa/pages/software.aspx.","hji,kes",0,0,0,2,0,NA,NA +23524031,Acute diagnostic biomarkers for spinal cord injury: review of the literature and preliminary research report.,"

Objective

Many efforts have been made to create new diagnostic technologies for use in the diagnosis of central nervous system injury. However, there is still no consensus for the use of biomarkers in clinical acute spinal cord injury (SCI). The aims of this review are (1) to evaluate the current status of neurochemical biomarkers and (2) to discuss their potential acute diagnostic role in SCI by reviewing the literature.

Methods

PubMed (http://www.ncbi.nlm.nih.gov/pubmed) was searched up to 2012 to identify publications concerning diagnostic biomarkers in SCI. To support more knowledge, we also checked secondary references in the primarily retrieved literature.

Results

Neurofilaments, cleaved-Tau, microtubule-associated protein 2, myelin basic protein, neuron-specific enolase, S100, and glial fibrillary acidic protein were identified as structural protein biomarkers in SCI by this review process. We could not find reports relating ubiquitin C-terminal hydrolase-L1 and a-II spectrin breakdown products, which are widely researched in other central nervous system injuries. Therefore, we present our preliminary data relating to these two biomarkers. Some of biomarkers showed promising results for SCI diagnosis and outcome prediction; however, there were unresolved issues relating to accuracy and their accessibility.

Conclusion

Currently, there still are not many reports focused on diagnostic biomarkers in SCI. This fact warranted the need for greater efforts to innovate sensitive and reliable biomarkers for SCI.","hji,kes",0,0,0,2,0,NA,not about the resource +23531787,EDGE-pro: Estimated Degree of Gene Expression in Prokaryotic Genomes.,"

Background

The expression levels of bacterial genes can be measured directly using next-generation sequencing (NGS) methods, offering much greater sensitivity and accuracy than earlier, microarray-based methods. Most bioinformatics software for estimating levels of gene expression from NGS data has been designed for eukaryotic genomes, with algorithms focusing particularly on detection of splicing patterns. These methods do not perform well on bacterial genomes.

Results

Here we describe the first software system designed explicitly for quantifying the degree of gene expression in bacteria and other prokaryotes. EDGE-pro (Estimated Degree of Gene Expression in PROkaryotes) processes the raw data from an RNA-seq experiment on a bacterial or archaeal species and produces estimates of the expression levels for each gene in these gene-dense genomes.

Software

The EDGE-pro tool is implemented as a pipeline of C++ and Perl programs and is freely available as open-source code at http://www.genomics.jhu.edu/software/EDGE/index.shtml.","hji,kes",0,0,0,2,0,NA,NA +23587428,The Th17/Treg balance and the expression of related cytokines in Uygur cervical cancer patients.,"

Background

The fine balance of Th17/Treg is crucial for maintenance of immune homeostasis. The objective of this study was to investigate the balance of Th17/Treg and the expression of related cytokines in Uighur cervical cancer patients.

Methods

Peripheral blood was collected from 65 cases of cervical cancer patients, 42 cases of cervical CIN patients and 40 healthy people. Flow cytometry was used to detect the percentages of T cell subsets, including CD3+ T cells, CD4+ T cells, CD8+ T cells, Treg cells and Th17 cells. ELISA assay was conducted to detect expression levels of TGF-, IL-6, IL-10, IL-17, IL-23 and IFN-.

Results

There were no significant difference in the levels of CD3+ T cells, CD4+ T cells, CD8+ T cells, and the ratio of CD4+/CD8+ among the cervical cancer group, the CIN group and the healthy control group. However, compared with the healthy control group, the percentages of CD4+ CD25+ Treg, CD4+CD25+CD127- Treg, CD4+IL17+ Th17, CD4+CD25+Foxp3+, CD4+CD25- Foxp3+, CD8+CD25+CD127-Treg and CD8+CD25+Foxp3 were significantly higher in the cervical cancer group and the CIN group. Similar results were also found in the Th17/Treg ratio and the related cytokines. There was no significant difference between the cervical cancer group and the CIN group. Additionally, Th17 cell levels were positively correlated with IL-6, IL-23 and IL-17. Also, Treg cell levels were positively correlated with TGF-, IL-10 and IL-6. Contrarily, Treg cell levels and IFN- were negatively correlated.

Conclusions

Our data indicated that the Th17/Treg balance was broken in peripheral blood of cervical cancer patients. Analysis of Th17/Treg balance may have a significant implication in diagnosing cervical cancer.

Virtual slides

The virtual slide for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/1813823795931511.","hji,kes",0,0,0,2,0,NA,NA +23626918,AnsNGS: An Annotation System to Sequence Variations of Next Generation Sequencing Data for Disease-Related Phenotypes.,"

Objectives

Next-generation sequencing (NGS) data in the identification of disease-causing genes provides a promising opportunity in the diagnosis of disease. Beyond the previous efforts for NGS data alignment, variant detection, and visualization, developing a comprehensive annotation system supported by multiple layers of disease phenotype-related databases is essential for deciphering the human genome. To satisfy the impending need to decipher the human genome, it is essential to develop a comprehensive annotation system supported by multiple layers of disease phenotype-related databases.

Methods

AnsNGS (Annotation system of sequence variations for next-generation sequencing data) is a tool for contextualizing variants related to diseases and examining their functional consequences. The AnsNGS integrates a variety of annotation databases to attain multiple levels of annotation.

Results

The AnsNGS assigns biological functions to variants, and provides gene (or disease)-centric queries for finding disease-causing variants. The AnsNGS also connects those genes harbouring variants and the corresponding expression probes for downstream analysis using expression microarrays. Here, we demonstrate its ability to identify disease-related variants in the human genome.

Conclusions

The AnsNGS can give a key insight into which of these variants is already known to be involved in a disease-related phenotype or located in or near a known regulatory site. The AnsNGS is available free of charge to academic users and can be obtained from http://snubi.org/software/AnsNGS/.","hji,kes",0,0,0,2,0,NA,NA +23633579,OpenStructure: an integrated software framework for computational structural biology.,"Research projects in structural biology increasingly rely on combinations of heterogeneous sources of information, e.g. evolutionary information from multiple sequence alignments, experimental evidence in the form of density maps and proximity constraints from proteomics experiments. The OpenStructure software framework, which allows the seamless integration of information of different origin, has previously been introduced. The software consists of C++ libraries which are fully accessible from the Python programming language. Additionally, the framework provides a sophisticated graphics module that interactively displays molecular structures and density maps in three dimensions. In this work, the latest developments in the OpenStructure framework are outlined. The extensive capabilities of the framework will be illustrated using short code examples that show how information from molecular-structure coordinates can be combined with sequence data and/or density maps. The framework has been released under the LGPL version 3 license and is available for download from http://www.openstructure.org.","hji,kes",0,0,0,2,0,NA,NA +23637070,glyXalign: high-throughput migration time alignment preprocessing of electrophoretic data retrieved via multiplexed capillary gel electrophoresis with laser-induced fluorescence detection-based glycoprofiling.,"Glycomics has become a rapidly emerging field and monitoring of protein glycosylation is needed to ensure quality and consistency during production processes of biologicals such as therapeutic antibodies or vaccines. Glycoanalysis via multiplexed CGE with LIF detection (xCGE-LIF) represents a powerful technique featuring high resolution, high sensitivity as well as high-throughput performance. However, sample data retrieved from this method exhibit challenges for downstream computational analysis due to intersample migration time shifts as well as stretching and compression of electropherograms. Here, we present glyXalign, a freely available and easy-to-use software package to automatically correct for distortions in xCGE-LIF based glycan data. We demonstrate its ability to outperform conventional algorithms such as dynamic time warping and correlation optimized warping in terms of processing time and alignment accuracy for high-resolution datasets. Built upon a set of rapid algorithms, the tool includes an intuitive graphical user interface and allows full control over all parameters. Additionally, it visualizes the alignment process and enables the user to readjust misaligned results. Software and documentation are available at http://www.glyxera.com.","hji,kes",0,0,0,2,0,NA,NA +23664230,Application of the Bayesian approach for derivation of PDFs for concentration ratio values.,"Concentration ratios (CRs) are used to derive activity concentrations in wild plants and animals. Usually, compilations of CR values encompass a wide range of element-organism combinations, extracted from different studies with statistical information reported at varying degrees of detail. To produce a more robust estimation of distribution parameters, data from different studies are normally pooled using classical statistical methods. However, there is inherent subjectivity involved in pooling CR data in the sense that there is a tacit assumption that the CRs under any arbitrarily defined biota category belong to the same population. Here, Bayesian inference has been introduced as an alternative way of making estimates of distribution parameters of CRs. This approach, in contrast to classical methods, is more flexible and also allows us to define the various assumptions required, when combining data, in a more explicit manner. Taking selected data from the recently compiled wildlife transfer database (http://www.wildlifetransferdatabase.org/) as a working example, attempts are made to refine the pooling approaches previously used and to consider situations when empirical data are limited.","hji,kes",0,0,0,2,0,NA,but usedful ref +23685432,DRIMust: a web server for discovering rank imbalanced motifs using suffix trees.,"Cellular regulation mechanisms that involve proteins and other active molecules interacting with specific targets often involve the recognition of sequence patterns. Short sequence elements on DNA, RNA and proteins play a central role in mediating such molecular recognition events. Studies that focus on measuring and investigating sequence-based recognition processes make use of statistical and computational tools that support the identification and understanding of sequence motifs. We present a new web application, named DRIMust, freely accessible through the website http://drimust.technion.ac.il for de novo motif discovery services. The DRIMust algorithm is based on the minimum hypergeometric statistical framework and uses suffix trees for an efficient enumeration of motif candidates. DRIMust takes as input ranked lists of sequences in FASTA format and returns motifs that are over-represented at the top of the list, where the determination of the threshold that defines top is data driven. The resulting motifs are presented individually with an accurate P-value indication and as a Position Specific Scoring Matrix. Comparing DRIMust with other state-of-the-art tools demonstrated significant advantage to DRIMust, both in result accuracy and in short running times. Overall, DRIMust is unique in combining efficient search on large ranked lists with rigorous P-value assessment for the detected motifs.","hji,kes",0,0,0,2,0,NA,NA +23685613,Prediction of clustered RNA-binding protein motif sites in the mammalian genome.,"Sequence-specific interactions of RNA-binding proteins (RBPs) with their target transcripts are essential for post-transcriptional gene expression regulation in mammals. However, accurate prediction of RBP motif sites has been difficult because many RBPs recognize short and degenerate sequences. Here we describe a hidden Markov model (HMM)-based algorithm mCarts to predict clustered functional RBP-binding sites by effectively integrating the number and spacing of individual motif sites, their accessibility in local RNA secondary structures and cross-species conservation. This algorithm learns and quantifies rules of these features, taking advantage of a large number of in vivo RBP-binding sites obtained from cross-linking and immunoprecipitation data. We applied this algorithm to study two representative RBP families, Nova and Mbnl, which regulate tissue-specific alternative splicing through interacting with clustered YCAY and YGCY elements, respectively, and predicted their binding sites in the mouse transcriptome. Despite the low information content in individual motif elements, our algorithm made specific predictions for successful experimental validation. Analysis of predicted sites also revealed cases of extensive and distal RBP-binding sites important for splicing regulation. This algorithm can be readily applied to other RBPs to infer their RNA-regulatory networks. The software is freely available at http://zhanglab.c2b2.columbia.edu/index.php/MCarts.","hji,kes",0,0,0,2,0,NA,NA +23703214,PiDNA: Predicting protein-DNA interactions with structural models.,"Predicting binding sites of a transcription factor in the genome is an important, but challenging, issue in studying gene regulation. In the past decade, a large number of protein-DNA co-crystallized structures available in the Protein Data Bank have facilitated the understanding of interacting mechanisms between transcription factors and their binding sites. Recent studies have shown that both physics-based and knowledge-based potential functions can be applied to protein-DNA complex structures to deliver position weight matrices (PWMs) that are consistent with the experimental data. To further use the available structural models, the proposed Web server, PiDNA, aims at first constructing reliable PWMs by applying an atomic-level knowledge-based scoring function on numerous in silico mutated complex structures, and then using the PWM constructed by the structure models with small energy changes to predict the interaction between proteins and DNA sequences. With PiDNA, the users can easily predict the relative preference of all the DNA sequences with limited mutations from the native sequence co-crystallized in the model in a single run. More predictions on sequences with unlimited mutations can be realized by additional requests or file uploading. Three types of information can be downloaded after prediction: (i) the ranked list of mutated sequences, (ii) the PWM constructed by the favourable mutated structures, and (iii) any mutated protein-DNA complex structure models specified by the user. This study first shows that the constructed PWMs are similar to the annotated PWMs collected from databases or literature. Second, the prediction accuracy of PiDNA in detecting relatively high-specificity sites is evaluated by comparing the ranked lists against in vitro experiments from protein-binding microarrays. Finally, PiDNA is shown to be able to select the experimentally validated binding sites from 10,000 random sites with high accuracy. With PiDNA, the users can design biological experiments based on the predicted sequence specificity and/or request mutated structure models for further protein design. As well, it is expected that PiDNA can be incorporated with chromatin immunoprecipitation data to refine large-scale inference of in vivo protein-DNA interactions. PiDNA is available at: http://dna.bime.ntu.edu.tw/pidna.","hji,kes",0,0,0,2,0,NA,no data - a webserver +23732274,AuthorReward: increasing community curation in biological knowledge wikis through automated authorship quantification.,"

Summary

Community curation-harnessing community intelligence in knowledge curation, bears great promise in dealing with the flood of biological knowledge. To exploit the full potential of the scientific community for knowledge curation, multiple biological wikis (bio-wikis) have been built to date. However, none of them have achieved a substantial impact on knowledge curation. One of the major limitations in bio-wikis is insufficient community participation, which is intrinsically because of lack of explicit authorship and thus no credit for community curation. To increase community curation in bio-wikis, here we develop AuthorReward, an extension to MediaWiki, to reward community-curated efforts in knowledge curation. AuthorReward quantifies researchers' contributions by properly factoring both edit quantity and quality and yields automated explicit authorship according to their quantitative contributions. AuthorReward provides bio-wikis with an authorship metric, helpful to increase community participation in bio-wikis and to achieve community curation of massive biological knowledge.

Availability

http://cbb.big.ac.cn/software.

Contact

zhangzhang@big.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +23732275,Relating genes to function: identifying enriched transcription factors using the ENCODE ChIP-Seq significance tool.,"

Motivation

Biological analysis has shifted from identifying genes and transcripts to mapping these genes and transcripts to biological functions. The ENCODE Project has generated hundreds of ChIP-Seq experiments spanning multiple transcription factors and cell lines for public use, but tools for a biomedical scientist to analyze these data are either non-existent or tailored to narrow biological questions. We present the ENCODE ChIP-Seq Significance Tool, a flexible web application leveraging public ENCODE data to identify enriched transcription factors in a gene or transcript list for comparative analyses.

Implementation

The ENCODE ChIP-Seq Significance Tool is written in JavaScript on the client side and has been tested on Google Chrome, Apple Safari and Mozilla Firefox browsers. Server-side scripts are written in PHP and leverage R and a MySQL database. The tool is available at http://encodeqt.stanford.edu.

Contact

abutte@stanford.edu

Supplementary information

Supplementary material is available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,iffy +23737449,antiSMASH 2.0--a versatile platform for genome mining of secondary metabolite producers.,"Microbial secondary metabolites are a potent source of antibiotics and other pharmaceuticals. Genome mining of their biosynthetic gene clusters has become a key method to accelerate their identification and characterization. In 2011, we developed antiSMASH, a web-based analysis platform that automates this process. Here, we present the highly improved antiSMASH 2.0 release, available at http://antismash.secondarymetabolites.org/. For the new version, antiSMASH was entirely re-designed using a plug-and-play concept that allows easy integration of novel predictor or output modules. antiSMASH 2.0 now supports input of multiple related sequences simultaneously (multi-FASTA/GenBank/EMBL), which allows the analysis of draft genomes comprising multiple contigs. Moreover, direct analysis of protein sequences is now possible. antiSMASH 2.0 has also been equipped with the capacity to detect additional classes of secondary metabolites, including oligosaccharide antibiotics, phenazines, thiopeptides, homo-serine lactones, phosphonates and furans. The algorithm for predicting the core structure of the cluster end product is now also covering lantipeptides, in addition to polyketides and non-ribosomal peptides. The antiSMASH ClusterBlast functionality has been extended to identify sub-clusters involved in the biosynthesis of specific chemical building blocks. The new features currently make antiSMASH 2.0 the most comprehensive resource for identifying and analyzing novel secondary metabolite biosynthetic pathways in microorganisms.","hji,kes",0,0,0,2,0,NA,NA +23761447,ResponseNet2.0: Revealing signaling and regulatory pathways connecting your proteins and genes--now with human data.,"Genome sequencing and transcriptomic profiling are two widely used approaches for the identification of human disease pathways. However, each approach typically provides a limited view of disease pathways: Genome sequencing can identify disease-related mutations but rarely reveals their mode-of-action, while transcriptomic assays do not reveal the series of events that lead to the transcriptomic change. ResponseNet is an integrative network-optimization approach that we developed to fill these gaps by highlighting major signaling and regulatory molecular interaction paths that connect disease-related mutations and genes. The ResponseNet web-server provides a user-friendly interface to ResponseNet. Specifically, users can upload weighted lists of proteins and genes and obtain a sparse, weighted, molecular interaction subnetwork connecting them, that is biased toward regulatory and signaling pathways. ResponseNet2.0 enhances the functionality of the ResponseNet web-server in two important ways. First, it supports analysis of human data by offering a human interactome composed of proteins, genes and micro-RNAs. Second, it offers a new informative view of the output, including a randomization analysis, to help users assess the biological relevance of the output subnetwork. ResponseNet2.0 is available at http://netbio.bgu.ac.il/respnet .","hji,kes",0,0,0,2,0,NA,NA +23771137,Secondary structure and domain architecture of the 23S and 5S rRNAs.,"We present a de novo re-determination of the secondary (2) structure and domain architecture of the 23S and 5S rRNAs, using 3D structures, determined by X-ray diffraction, as input. In the traditional 2 structure, the center of the 23S rRNA is an extended single strand, which in 3D is seen to be compact and double helical. Accurately assigning nucleotides to helices compels a revision of the 23S rRNA 2 structure. Unlike the traditional 2 structure, the revised 2 structure of the 23S rRNA shows architectural similarity with the 16S rRNA. The revised 2 structure also reveals a clear relationship with the 3D structure and is generalizable to rRNAs of other species from all three domains of life. The 2 structure revision required us to reconsider the domain architecture. We partitioned the 23S rRNA into domains through analysis of molecular interactions, calculations of 2D folding propensities and compactness. The best domain model for the 23S rRNA contains seven domains, not six as previously ascribed. Domain 0 forms the core of the 23S rRNA, to which the other six domains are rooted. Editable 2 structures mapped with various data are provided (http://apollo.chemistry.gatech.edu/RibosomeGallery).","hji,kes",0,0,0,2,0,NA,kinda iffy +23812995,Compressive genomics for protein databases.,"

Motivation

The exponential growth of protein sequence databases has increasingly made the fundamental question of searching for homologs a computational bottleneck. The amount of unique data, however, is not growing nearly as fast; we can exploit this fact to greatly accelerate homology search. Acceleration of programs in the popular PSI/DELTA-BLAST family of tools will not only speed-up homology search directly but also the huge collection of other current programs that primarily interact with large protein databases via precisely these tools.

Results

We introduce a suite of homology search tools, powered by compressively accelerated protein BLAST (CaBLASTP), which are significantly faster than and comparably accurate with all known state-of-the-art tools, including HHblits, DELTA-BLAST and PSI-BLAST. Further, our tools are implemented in a manner that allows direct substitution into existing analysis pipelines. The key idea is that we introduce a local similarity-based compression scheme that allows us to operate directly on the compressed data. Importantly, CaBLASTP's runtime scales almost linearly in the amount of unique data, as opposed to current BLASTP variants, which scale linearly in the size of the full protein database being searched. Our compressive algorithms will speed-up many tasks, such as protein structure prediction and orthology mapping, which rely heavily on homology search.

Availability

CaBLASTP is available under the GNU Public License at http://cablastp.csail.mit.edu/

Contact

bab@mit.edu.","hji,kes",0,0,0,2,0,NA,NA +23813003,Using state machines to model the Ion Torrent sequencing process and to improve read error rates.,"

Motivation

The importance of fast and affordable DNA sequencing methods for current day life sciences, medicine and biotechnology is hard to overstate. A major player is Ion Torrent, a pyrosequencing-like technology which produces flowgrams--sequences of incorporation values--which are converted into nucleotide sequences by a base-calling algorithm. Because of its exploitation of ubiquitous semiconductor technology and innovation in chemistry, Ion Torrent has been gaining popularity since its debut in 2011. Despite the advantages, however, Ion Torrent read accuracy remains a significant concern.

Results

We present FlowgramFixer, a new algorithm for converting flowgrams into reads. Our key observation is that the incorporation signals of neighboring flows, even after normalization and phase correction, carry considerable mutual information and are important in making the correct base-call. We therefore propose that base-calling of flowgrams should be done on a read-wide level, rather than one flow at a time. We show that this can be done in linear-time by combining a state machine with a Viterbi algorithm to find the nucleotide sequence that maximizes the likelihood of the observed flowgram. FlowgramFixer is applicable to any flowgram-based sequencing platform. We demonstrate FlowgramFixer's superior performance on Ion Torrent Escherichia coli data, with a 4.8% improvement in the number of high-quality mapped reads and a 7.1% improvement in the number of uniquely mappable reads.

Availability

Binaries and source code of FlowgramFixer are freely available at: http://www.cs.tau.ac.il/~davidgo5/flowgramfixer.html.","hji,kes",0,0,0,2,0,NA,NA +23823315,"CluGene: A Bioinformatics Framework for the Identification of Co-Localized, Co-Expressed and Co-Regulated Genes Aimed at the Investigation of Transcriptional Regulatory Networks from High-Throughput Expression Data.","The full understanding of the mechanisms underlying transcriptional regulatory networks requires unravelling of complex causal relationships. Genome high-throughput technologies produce a huge amount of information pertaining gene expression and regulation; however, the complexity of the available data is often overwhelming and tools are needed to extract and organize the relevant information. This work starts from the assumption that the observation of co-occurrent events (in particular co-localization, co-expression and co-regulation) may provide a powerful starting point to begin unravelling transcriptional regulatory networks. Co-expressed genes often imply shared functional pathways; co-expressed and functionally related genes are often co-localized, too; moreover, co-expressed and co-localized genes are also potential targets for co-regulation; finally, co-regulation seems more frequent for genes mapped to proximal chromosome regions. Despite the recognized importance of analysing co-occurrent events, no bioinformatics solution allowing the simultaneous analysis of co-expression, co-localization and co-regulation is currently available. Our work resulted in developing and valuating CluGene, a software providing tools to analyze multiple types of co-occurrences within a single interactive environment allowing the interactive investigation of combined co-expression, co-localization and co-regulation of genes. The use of CluGene will enhance the power of testing hypothesis and experimental approaches aimed at unravelling transcriptional regulatory networks. The software is freely available at http://bioinfolab.unipg.it/.","hji,kes",0,0,0,2,0,NA,NA +23824634,Validating a Coarse-Grained Potential Energy Function through Protein Loop Modelling.,"Coarse-grained (CG) methods for sampling protein conformational space have the potential to increase computational efficiency by reducing the degrees of freedom. The gain in computational efficiency of CG methods often comes at the expense of non-protein like local conformational features. This could cause problems when transitioning to full atom models in a hierarchical framework. Here, a CG potential energy function was validated by applying it to the problem of loop prediction. A novel method to sample the conformational space of backbone atoms was benchmarked using a standard test set consisting of 351 distinct loops. This method used a sequence-independent CG potential energy function representing the protein using [Formula: see text]-carbon positions only and sampling conformations with a Monte Carlo simulated annealing based protocol. Backbone atoms were added using a method previously described and then gradient minimised in the Rosetta force field. Despite the CG potential energy function being sequence-independent, the method performed similarly to methods that explicitly use either fragments of known protein backbones with similar sequences or residue-specific [Formula: see text]/[Formula: see text]-maps to restrict the search space. The method was also able to predict with sub-Angstrom accuracy two out of seven loops from recently solved crystal structures of proteins with low sequence and structure similarity to previously deposited structures in the PDB. The ability to sample realistic loop conformations directly from a potential energy function enables the incorporation of additional geometric restraints and the use of more advanced sampling methods in a way that is not possible to do easily with fragment replacement methods and also enable multi-scale simulations for protein design and protein structure prediction. These restraints could be derived from experimental data or could be design restraints in the case of computational protein design. C++ source code is available for download from http://www.sbg.bio.ic.ac.uk/phyre2/PD2/.","hji,kes",0,0,0,2,0,NA,NA +23829391,An untargeted metabolomic workflow to improve structural characterization of metabolites.,"Mass spectrometry-based metabolomics relies on MS(2) data for structural characterization of metabolites. To obtain the high-quality MS(2) data necessary to support metabolite identifications, ions of interest must be purely isolated for fragmentation. Here, we show that metabolomic MS(2) data are frequently characterized by contaminating ions that prevent structural identification. Although using narrow-isolation windows can minimize contaminating MS(2) fragments, even narrow windows are not always selective enough, and they can complicate data analysis by removing isotopic patterns from MS(2) spectra. Moreover, narrow windows can significantly reduce sensitivity. In this work, we introduce a novel, two-part approach for performing metabolomic identifications that addresses these issues. First, we collect MS(2) scans with less stringent isolation settings to obtain improved sensitivity at the expense of specificity. Then, by evaluating MS(2) fragment intensities as a function of retention time and precursor mass targeted for MS(2) analysis, we obtain deconvolved MS(2) spectra that are consistent with pure standards and can therefore be used for metabolite identification. The value of our approach is highlighted with metabolic extracts from brain, liver, astrocytes, as well as nerve tissue, and performance is evaluated by using pure metabolite standards in combination with simulations based on raw MS(2) data from the METLIN metabolite database. A R package implementing the algorithms used in our workflow is available on our laboratory website ( http://pattilab.wustl.edu/decoms2.php ).","hji,kes",0,0,0,2,0,NA,NA +23851377,CoLIde: a bioinformatics tool for CO-expression-based small RNA Loci Identification using high-throughput sequencing data.,"Small RNAs (sRNAs) are 20-25 nt non-coding RNAs that act as guides for the highly sequence-specific regulatory mechanism known as RNA silencing. Due to the recent increase in sequencing depth, a highly complex and diverse population of sRNAs in both plants and animals has been revealed. However, the exponential increase in sequencing data has also made the identification of individual sRNA transcripts corresponding to biological units (sRNA loci) more challenging when based exclusively on the genomic location of the constituent sRNAs, hindering existing approaches to identify sRNA loci. To infer the location of significant biological units, we propose an approach for sRNA loci detection called CoLIde (Co-expression based sRNA Loci Identification) that combines genomic location with the analysis of other information such as variation in expression levels (expression pattern) and size class distribution. For CoLIde, we define a locus as a union of regions sharing the same pattern and located in close proximity on the genome. Biological relevance, detected through the analysis of size class distribution, is also calculated for each locus. CoLIde can be applied on ordered (e.g., time-dependent) or un-ordered (e.g., organ, mutant) series of samples both with or without biological/technical replicates. The method reliably identifies known types of loci and shows improved performance on sequencing data from both plants (e.g., A. thaliana, S. lycopersicum) and animals (e.g., D. melanogaster) when compared with existing locus detection techniques. CoLIde is available for use within the UEA Small RNA Workbench which can be downloaded from: http://srna-workbench.cmp.uea.ac.uk.","hji,kes",0,0,0,2,0,NA,NA +23853063,Exhaustively characterizing feasible logic models of a signaling network using Answer Set Programming.,"

Motivation

Logic modeling is a useful tool to study signal transduction across multiple pathways. Logic models can be generated by training a network containing the prior knowledge to phospho-proteomics data. The training can be performed using stochastic optimization procedures, but these are unable to guarantee a global optima or to report the complete family of feasible models. This, however, is essential to provide precise insight in the mechanisms underlaying signal transduction and generate reliable predictions.

Results

We propose the use of Answer Set Programming to explore exhaustively the space of feasible logic models. Toward this end, we have developed caspo, an open-source Python package that provides a powerful platform to learn and characterize logic models by leveraging the rich modeling language and solving technologies of Answer Set Programming. We illustrate the usefulness of caspo by revisiting a model of pro-growth and inflammatory pathways in liver cells. We show that, if experimental error is taken into account, there are thousands (11 700) of models compatible with the data. Despite the large number, we can extract structural features from the models, such as links that are always (or never) present or modules that appear in a mutual exclusive fashion. To further characterize this family of models, we investigate the input-output behavior of the models. We find 91 behaviors across the 11 700 models and we suggest new experiments to discriminate among them. Our results underscore the importance of characterizing in a global and exhaustive manner the family of feasible models, with important implications for experimental design.

Availability

caspo is freely available for download (license GPLv3) and as a web service at http://caspo.genouest.org/.

Supplementary information

Supplementary materials are available at Bioinformatics online.

Contact

santiago.videla@irisa.fr.","hji,kes",0,0,0,2,0,NA,NA +23930024,PROcEED: Probabilistic reverse dosimetry approaches for estimating exposure distributions.,"

Unlabelled

As increasing amounts of biomonitoring survey data become available, a new discipline focused on converting such data into estimates of chemical exposures has developed. Reverse dosimetry uses a pharmacokinetic model along with measured biomarker concentrations to determine the plausible exposure concentrations-- a critical step to incorporate ground-truthing experimental data into a distribution of probable exposures that reduces model uncertainty and variability. At the population level, probabilistic reverse dosimetry can utilize a distribution of measured biomarker concentrations to identify the most likely exposure concentrations (or intake doses) experienced by the study participants. PROcEED is software that provides access to probabilistic reverse dosimetry approaches for estimating exposure distributions via a simple user interface.

Availability

PROcEED along with installation instructions is freely available for download from http://www.epa.gov/heasd/products/proceed/proceed.html.","hji,kes",0,0,0,2,0,NA,NA +23945724,"Efficacy and safety of new complementary feeding guidelines with an emphasis on red meat consumption: a randomized trial in Bogota, Colombia.","

Background

Iron deficiency and poor linear growth are common in infants from deprived socioeconomic backgrounds and may be associated with inadequate complementary feeding (CF) practices.

Objective

We tested the hypothesis that new CF guidelines emphasizing meat as a source of iron and zinc would improve linear growth, iron, and zinc status in infants living in poor socioeconomic circumstances in Bogota, Colombia.

Design

A total of 85 term infants who were exclusively breastfed for =4 mo were randomly assigned at 6 mo of age to a control group [CG (n = 43); current advice] or intervention group (new guidelines group [NGG (n = 42); with counseling to 1) continue breastfeeding, 2) offer red meat =3 d/wk, and 3) offer fruit and vegetables daily]). Main outcomes were 1) linear growth from 6 to 12 mo of age; 2) hemoglobin, hematocrit, iron [serum ferritin (SF)], and zinc status at 12 mo of age; and 3) meat intake at 12 mo of age (by using a food-frequency questionnaire).

Results

A total of 38 infants/group provided data at 12 mo of age. NGG infants had significantly higher red meat intake [mean SD: 5.4 1.8 compared with 3.5 1.7 d/wk at 12 mo of age; P < 0.001), higher hemoglobin and hematocrit at 12 mo of age, and a significantly greater increase in hemoglobin (mean SD change: 0.41 0.8 compared with -0.13 1.0; P = 0.01) and hematocrit (1.04 2.2 compared with -0.15 2.4; P = 0.03) from 6 to 12 mo of age than those in CG infants. There were no significant differences in linear growth from 6 to 12 mo of age or in SF or zinc.

Conclusions

The new guidelines showed efficacy with higher red meat intake and positive effects on hemoglobin and hematocrit. The intervention was acceptable and affordable for most mothers. These preliminary results suggest that the intervention merits investigation in a larger cohort with longer-term follow-up. This trial was registered at http://isrctn.org as ISRCTN57733004.","hji,kes",0,0,0,2,0,NA,NA +23956306,Incorporating prior knowledge into Gene Network Study.,"

Motivation

A major goal in genomic research is to identify genes that may jointly influence a biological response. From many years of intensive biomedical research, a large body of biological knowledge, or pathway information, has accumulated in available databases. There is a strong interest in leveraging these pathways to improve the statistical power and interpretability in studying gene networks associated with complex phenotypes. This prior information is a valuable complement to large-scale genomic data such as gene expression data generated from microarrays. However, it is a non-trivial task to effectively integrate available biological knowledge into gene expression data when reconstructing gene networks.

Results

In this article, we developed and applied a Lasso method from a Bayesian perspective, a method we call prior Lasso (pLasso), for the reconstruction of gene networks. In this method, we partition edges between genes into two subsets: one subset of edges is present in known pathways, whereas the other has no prior information associated. Our method assigns different prior distributions to each subset according to a modified Bayesian information criterion that incorporates prior knowledge on both the network structure and the pathway information. Simulation studies have indicated that the method is more effective in recovering the underlying network than a traditional Lasso method that does not use the prior information. We applied pLasso to microarray gene expression datasets, where we used information from the Pathway Commons (PC) and the Kyoto Encyclopedia of Genes and Genomes (KEGG) as prior information for the network reconstruction, and successfully identified network hub genes associated with clinical outcome in cancer patients.

Availability

The source code is available at http://nba.uth.tmc.edu/homepage/liu/pLasso.","hji,kes",0,0,0,2,0,NA,NA +23975194,A modular framework for gene set analysis integrating multilevel omics data.,"Modern high-throughput methods allow the investigation of biological functions across multiple 'omics' levels. Levels include mRNA and protein expression profiling as well as additional knowledge on, for example, DNA methylation and microRNA regulation. The reason for this interest in multi-omics is that actual cellular responses to different conditions are best explained mechanistically when taking all omics levels into account. To map gene products to their biological functions, public ontologies like Gene Ontology are commonly used. Many methods have been developed to identify terms in an ontology, overrepresented within a set of genes. However, these methods are not able to appropriately deal with any combination of several data types. Here, we propose a new method to analyse integrated data across multiple omics-levels to simultaneously assess their biological meaning. We developed a model-based Bayesian method for inferring interpretable term probabilities in a modular framework. Our Multi-level ONtology Analysis (MONA) algorithm performed significantly better than conventional analyses of individual levels and yields best results even for sophisticated models including mRNA fine-tuning by microRNAs. The MONA framework is flexible enough to allow for different underlying regulatory motifs or ontologies. It is ready-to-use for applied researchers and is available as a standalone application from http://icb.helmholtz-muenchen.de/mona.","hji,kes",0,0,0,2,0,NA,NA +23981350,Inferring nucleosome positions with their histone mark annotation from ChIP data.,"

Motivation

The nucleosome is the basic repeating unit of chromatin. It contains two copies each of the four core histones H2A, H2B, H3 and H4 and about 147 bp of DNA. The residues of the histone proteins are subject to numerous post-translational modifications, such as methylation or acetylation. Chromatin immunoprecipitiation followed by sequencing (ChIP-seq) is a technique that provides genome-wide occupancy data of these modified histone proteins, and it requires appropriate computational methods.

Results

We present NucHunter, an algorithm that uses the data from ChIP-seq experiments directed against many histone modifications to infer positioned nucleosomes. NucHunter annotates each of these nucleosomes with the intensities of the histone modifications. We demonstrate that these annotations can be used to infer nucleosomal states with distinct correlations to underlying genomic features and chromatin-related processes, such as transcriptional start sites, enhancers, elongation by RNA polymerase II and chromatin-mediated repression. Thus, NucHunter is a versatile tool that can be used to predict positioned nucleosomes from a panel of histone modification ChIP-seq experiments and infer distinct histone modification patterns associated to different chromatin states.

Availability

The software is available at http://epigen.molgen.mpg.de/nuchunter/.","hji,kes",0,0,0,2,0,NA,NA +24023630,PhagoSight: an open-source MATLAB® package for the analysis of fluorescent neutrophil and macrophage migration in a zebrafish model.,"Neutrophil migration in zebrafish larvae is increasingly used as a model to study the response of these leukocytes to different determinants of the cellular inflammatory response. However, it remains challenging to extract comprehensive information describing the behaviour of neutrophils from the multi-dimensional data sets acquired with widefield or confocal microscopes. Here, we describe PhagoSight, an open-source software package for the segmentation, tracking and visualisation of migrating phagocytes in three dimensions. The algorithms in PhagoSight extract a large number of measurements that summarise the behaviour of neutrophils, but that could potentially be applied to any moving fluorescent cells. To derive a useful panel of variables quantifying aspects of neutrophil migratory behaviour, and to demonstrate the utility of PhagoSight, we evaluated changes in the volume of migrating neutrophils. Cell volume increased as neutrophils migrated towards the wound region of injured zebrafish. PhagoSight is openly available as MATLAB m-files under the GNU General Public License. Synthetic data sets and a comprehensive user manual are available from http://www.phagosight.org.","hji,kes",0,0,0,2,0,NA,NA +24048357,miREval 2.0: a web tool for simple microRNA prediction in genome sequences.,"

Result

We have developed miREval 2.0, an online tool that can simultaneously search up to 100 sequences for novel microRNAs (miRNAs) in multiple organisms. miREval 2.0 uses multiple published in silico approaches to detect miRNAs in sequences of interest. This tool can be used to discover miRNAs from DNA sequences or to validate candidates from sequencing data.

Availability

http://mimirna.centenary.org.au/mireval/.","hji,kes",0,0,0,2,0,NA,NA +24058397,A consistency-based feature selection method allied with linear SVMs for HIV-1 protease cleavage site prediction.,"

Background

Predicting type-1 Human Immunodeficiency Virus (HIV-1) protease cleavage site in protein molecules and determining its specificity is an important task which has attracted considerable attention in the research community. Achievements in this area are expected to result in effective drug design (especially for HIV-1 protease inhibitors) against this life-threatening virus. However, some drawbacks (like the shortage of the available training data and the high dimensionality of the feature space) turn this task into a difficult classification problem. Thus, various machine learning techniques, and specifically several classification methods have been proposed in order to increase the accuracy of the classification model. In addition, for several classification problems, which are characterized by having few samples and many features, selecting the most relevant features is a major factor for increasing classification accuracy.

Results

We propose for HIV-1 data a consistency-based feature selection approach in conjunction with recursive feature elimination of support vector machines (SVMs). We used various classifiers for evaluating the results obtained from the feature selection process. We further demonstrated the effectiveness of our proposed method by comparing it with a state-of-the-art feature selection method applied on HIV-1 data, and we evaluated the reported results based on attributes which have been selected from different combinations.

Conclusion

Applying feature selection on training data before realizing the classification task seems to be a reasonable data-mining process when working with types of data similar to HIV-1. On HIV-1 data, some feature selection or extraction operations in conjunction with different classifiers have been tested and noteworthy outcomes have been reported. These facts motivate for the work presented in this paper.

Software availability

The software is available at http://ozyer.etu.edu.tr/c-fs-svm.rar. The software can be downloaded at esnag.etu.edu.tr/software/hiv_cleavage_site_prediction.rar; you will find a readme file which explains how to set the software in order to work.","hji,kes",0,0,0,2,0,NA,NA +24067102,DaGO-Fun: tool for Gene Ontology-based functional analysis using term information content measures.,"

Background

The use of Gene Ontology (GO) data in protein analyses have largely contributed to the improved outcomes of these analyses. Several GO semantic similarity measures have been proposed in recent years and provide tools that allow the integration of biological knowledge embedded in the GO structure into different biological analyses. There is a need for a unified tool that provides the scientific community with the opportunity to explore these different GO similarity measure approaches and their biological applications.

Results

We have developed DaGO-Fun, an online tool available at http://web.cbio.uct.ac.za/ITGOM, which incorporates many different GO similarity measures for exploring, analyzing and comparing GO terms and proteins within the context of GO. It uses GO data and UniProt proteins with their GO annotations as provided by the Gene Ontology Annotation (GOA) project to precompute GO term information content (IC), enabling rapid response to user queries.

Conclusions

The DaGO-Fun online tool presents the advantage of integrating all the relevant IC-based GO similarity measures, including topology- and annotation-based approaches to facilitate effective exploration of these measures, thus enabling users to choose the most relevant approach for their application. Furthermore, this tool includes several biological applications related to GO semantic similarity scores, including the retrieval of genes based on their GO annotations, the clustering of functionally related genes within a set, and term enrichment analysis.","hji,kes",0,0,0,2,0,NA,analysis only I think… +24108511,Antibiotics for the prophylaxis of bacterial endocarditis in dentistry.,"

Background

Infective endocarditis is a severe infection arising in the lining of the chambers of the heart with a high mortality rate.Many dental procedures cause bacteraemia and it was believed that this may lead to bacterial endocarditis (BE) in a few people. Guidelines in many countries have recommended that prior to invasive dental procedures antibiotics are administered to people at high risk of endocarditis. However, recent guidance by the National Institute for Health and Care Excellence (NICE) in England and Wales has recommended that antibiotics are not required.

Objectives

To determine whether prophylactic antibiotic administration, compared to no such administration or placebo, before invasive dental procedures in people at risk or at high risk of bacterial endocarditis influences mortality, serious illness or the incidence of endocarditis.

Search methods

The following electronic databases were searched: the Cochrane Oral Health Group's Trials Register (to 21 January 2013), the Cochrane Central Register of Controlled Trials (CENTRAL) (The Cochrane Library 2012, Issue 12), MEDLINE via OVID (1946 to 21 January 2013) and EMBASE via OVID (1980 to 21 January 2013). We searched for ongoing trials in the US National Institutes of Health Trials Register (http://clinicaltrials.gov) and the metaRegister of Controlled Trials (http://www.controlled-trials.com/mrct/). No restrictions were placed on the language or date of publication when searching the electronic databases.

Selection criteria

Due to the low incidence of BE it was anticipated that few if any trials would be located. For this reason, cohort and case-control studies were included where suitably matched control or comparison groups had been studied. The intervention was the administration of antibiotic, compared to no such administration, before a dental procedure in people with an increased risk of BE. Cohort studies would need to follow those individuals at increased risk and assess outcomes following any invasive dental procedures, grouping by whether prophylaxis was received or not. Included case-control studies would need to match people who had developed endocarditis (and who were known to be at increased risk before undergoing an invasive dental procedure preceding the onset of endocarditis) with those at similar risk but who had not developed endocarditis. Outcomes of interest were mortality or serious adverse events requiring hospital admission; development of endocarditis following any dental procedure in a defined time period; development of endocarditis due to other non-dental causes; any recorded adverse events to the antibiotics; and cost implications of the antibiotic provision for the care of those patients who developed endocarditis.

Data collection and analysis

Two review authors independently selected studies for inclusion then assessed risk of bias and extracted data from the included study.

Main results

No randomised controlled trials (RCTs), controlled clinical trials (CCTs) or cohort studies were included. One case-control study met the inclusion criteria. It collected all the cases of endocarditis in the Netherlands over two years, finding a total of 24 people who developed endocarditis within 180 days of an invasive dental procedure, definitely requiring prophylaxis according to current guidelines, and who were at increased risk of endocarditis due to a pre-existing cardiac problem. This study included participants who died because of the endocarditis (using proxies). Controls attended local cardiology outpatient clinics for similar cardiac problems, had undergone an invasive dental procedure within the past 180 days, and were matched by age with the cases. No significant effect of penicillin prophylaxis on the incidence of endocarditis could be seen. No data were found on other outcomes.

Authors' conclusions

There remains no evidence about whether antibiotic prophylaxis is effective or ineffective against bacterial endocarditis in people at risk who are about to undergo an invasive dental procedure. It is not clear whether the potential harms and costs of antibiotic administration outweigh any beneficial effect. Ethically, practitioners need to discuss the potential benefits and harms of antibiotic prophylaxis with their patients before a decision is made about administration.","hji,kes",0,0,0,2,0,NA,NA +24127838,"Penile vibratory stimulation in the recovery of urinary continence and erectile function after nerve-sparing radical prostatectomy: a randomized, controlled trial.","

Objective

To examine the effect of penile vibratory stimulation (PVS) in the preservation and restoration of erectile function and urinary continence in conjunction with nerve-sparing radical prostatectomy (RP).

Patients and methods

The present study was conducted between July 2010 and March 2013 as a randomized prospective trial at two university hospitals. Eligible participants were continent men with an International Index of Erectile Function-5 (IIEF-5) score of at least 18, scheduled to undergo nerve-sparing RP. Patients were randomized to a PVS group or a control group. Patients in the PVS group were instructed in using a PVS device (FERTI CARE() vibrator). Stimulation was performed at the frenulum once daily by the patients in their own homes for at least 1 week before surgery. After catheter removal, daily PVS was re-initiated for a period of 6 weeks. Participants were evaluated at 3, 6 and 12 months after surgery with the IIEF-5 questionnaire and questions regarding urinary bother. Patients using up to one pad daily for security reasons only were considered continent. The study was registered at http://clinicaltrials.gov/ (NCT01067261).

Results

Data from 68 patients were available for analyses (30 patients randomized to PVS and 38 patients randomized to the control group). The IIEF-5 score was highest in the PVS group at all time points after surgery with a median score of 18 vs 7.5 in the control group at 12 months (P = 0.09), but the difference only reached borderline significance. At 12 months, 16/30 (53%) patients in the PVS group had reached an IIEF-5 score of at least 18, while this was the case for 12/38 (32%) patients in the control group (P = 0.07). There were no significant differences in the proportions of continent patients between groups at 3, 6 or 12 months. At 12 months 90% of the PVS patients were continent, while 94.7% of the control patients were continent (P = 0.46).

Conclusion

The present study did not document a significant effect of PVS. However, the method proved to be acceptable for most patients and there was a trend towards better erectile function with PVS. More studies are needed to explore this possible effect further.","hji,kes",0,0,0,2,0,NA,NA +24170398,T-Coffee: Tree-based consistency objective function for alignment evaluation.,"T-Coffee, for Tree-based consistency objective function for alignment evaluation, is a versatile multiple sequence alignment (MSA) method suitable for aligning virtually any type of biological sequences. T-Coffee provides more than a simple sequence aligner; rather it is a framework in which alternative alignment methods and/or extra information (i.e., structural, evolutionary, or experimental information) can be combined to reach more accurate and more meaningful MSAs. T-Coffee can be used either by running input data via the Web server ( http://tcoffee.crg.cat/apps/tcoffee/index.html ) or by downloading the T-Coffee package. Here, we present how the package can be used in its command line mode to carry out the most common tasks and multiply align proteins, DNA, and RNA sequences. This chapter particularly emphasizes on the description of T-Coffee special flavors also called """"modes,"""" designed to address particular biological problems.","hji,kes",0,0,0,2,0,NA,NA +24170408,PROMALS3D: multiple protein sequence alignment enhanced with evolutionary and three-dimensional structural information.,"Multiple sequence alignment (MSA) is an essential tool with many applications in bioinformatics and computational biology. Accurate MSA construction for divergent proteins remains a difficult computational task. The constantly increasing protein sequences and structures in public databases could be used to improve alignment quality. PROMALS3D is a tool for protein MSA construction enhanced with additional evolutionary and structural information from database searches. PROMALS3D automatically identifies homologs from sequence and structure databases for input proteins, derives structure-based constraints from alignments of three-dimensional structures, and combines them with sequence-based constraints of profile-profile alignments in a consistency-based framework to construct high-quality multiple sequence alignments. PROMALS3D output is a consensus alignment enriched with sequence and structural information about input proteins and their homologs. PROMALS3D Web server and package are available at http://prodata.swmed.edu/PROMALS3D.","hji,kes",0,0,0,2,0,NA,NA +24206655,Association of a polymorphism in PON-1 gene with steroid-induced osteonecrosis of femoral head in Chinese Han population.,"

Background

Treatment with steroids covers a wide spectrum of diseases in clinic. However, some users are suffering from serious side effects of steroid administration, while we enjoy the benefit it brings about. Osteonecrosis of the femoral head (ONFH) is a troublesome one among them. Recent studies have demonstrated that lipid metabolism disorder may play a vital role in pathogenesis of ONFH and mutation of the paraoxonase-1 (PON-1) gene may be involved in the occurrence of this disease. However, the relationship between polymorphisms of PON-1 and ONFH has not been thoroughly studied. The aim of this study was to determine whether PON-1 polymorphisms are associated with steroid-induced ONFH through a cohort study among Chinese Han population.

Methods

This trial applied a case-control scheme to compare the clinical data including PON-1 SNP among 94 patients and 106 control subjects to analyze the association between SNP and risk of steroid-induced ONFH. Time of Flight Mass Spectrometer is utilized for genotyping and the result was analyzed in multivariate analysis models.

Results

According to polymorphism test of rs662, its SNP was significantly associated with the risk of ONFH in overdominant analysis model [P value: 0.022; odds ratio (OR): 0.39]. However, genotype frequencies of rs662 of PON-1 gene between case and control group showed no differences (P > 0.05).

Conclusions

Our data suggest for the first time that SNP (rs662) of the PON-1 gene was associated with the risk of steroid-induced ONFH. In addition, PAI-1 SNPs may play an important role in pathogenesis of ONFH.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticphatology.diagnomx.eu/vs/1501829501107336.","hji,kes",0,0,0,2,0,NA,NA +24250114,SNPAAMapper: An efficient genome-wide SNP variant analysis pipeline for next-generation sequencing data.,"

Unlabelled

Many NGS analysis tools focusing on read alignment and variant calling functions for exome sequencing data have been developed in recent years. However, publicly available tools dealing with the downstream analysis of genome-wide variants are fewer and have limited functionality. We developed SNPAAMapper, a novel variant analysis pipeline that can effectively classify variants by region (e.g. CDS, UTRs, intron, upstream, downstream), predict amino acid change type (e.g. synonymous, non-synonymous mutation), and prioritize mutation effects (e.g. CDS versus UTRs). Additional functionality afforded by our pipeline includes: checking variants at exon/intron junctions, customized homozygosity and allele frequency cutoff parameters, and annotation of known variants with dbSNP information, listing original and mutated amino acid sequences containing variants. The final result is reported in a spreadsheet format table containing all variant associated information and prioritized amino acids effects for investigators to examine.

Availability

Perl scripts and required input files are available on the web at http://www.ccmb.med.umich.edu/ccdu /SNPAAMapper.","hji,kes",0,0,0,2,0,NA,NA +24259518,Quantitative proteomic profiling reveals novel region-specific markers in the adult mouse brain.,"Despite major advances in neuroscience, a comprehensive understanding of the structural and functional components of the adult brain compartments remains to be fully elucidated at a quantitative molecular level. Indeed, over half of the soluble- and membrane-annotated proteins are currently unmapped within online digital brain atlases. In this study, two complementary approaches were used to assess the unique repertoire of proteins enriched within select regions of the adult mouse CNS, including the brain stem, cerebellum, and remaining brain hemispheres. Of the 1200 proteins visualized by 2D-DIGE, approximately 150 (including cytosolic and membrane proteins) were found to exhibit statistically significant changes in relative abundance thus representing putative region-specific brain markers. In addition to using a high-precision (18) O-labeling strategy for the quantitative LC-MS/MS mapping of membrane proteins isolated from myelin-enriched fractions, we have identified over 1000 proteins that have yet to be described in any other mammalian myelin proteome. A comparison of our myelin proteome was made to an existing transcriptome database containing mRNA abundance profiles during oligodendrocyte differentiation and has confirmed statistically significant abundance changes for ~500 of these newly mapped proteins, thus revealing new roles in oligodendrocyte and myelin biology. These data offer a resource for the neuroscience community studying the molecular basis for specialized neuronal activities in the CNS and myelin-related disorders. The MS proteomics data associated with this manuscript have been deposited to the ProteomeXchange Consortium with the dataset identifier PXD000327 (http://proteomecentral.proteomexchange.org/dataset/PXD000327).","hji,kes",0,0,0,2,0,NA,data deposited as referenced +24300438,ISRNA: an integrative online toolkit for short reads from high-throughput sequencing data.,"

Unlabelled

Integrative Short Reads NAvigator (ISRNA) is an online toolkit for analyzing high-throughput small RNA sequencing data. Besides the high-speed genome mapping function, ISRNA provides statistics for genomic location, length distribution and nucleotide composition bias analysis of sequence reads. Number of reads mapped to known microRNAs and other classes of short non-coding RNAs, coverage of short reads on genes, expression abundance of sequence reads as well as some other analysis functions are also supported. The versatile search functions enable users to select sequence reads according to their sub-sequences, expression abundance, genomic location, relationship to genes, etc. A specialized genome browser is integrated to visualize the genomic distribution of short reads. ISRNA also supports management and comparison among multiple datasets.

Availability

ISRNA is implemented in Java/C++/Perl/MySQL and can be freely accessed at http://omicslab.genetics.ac.cn/ISRNA/.","hji,kes",0,0,0,2,0,NA,not available +24307635,Comparative proteomics reveals key proteins recruited at the nucleoid of Deinococcus after irradiation-induced DNA damage.,"The nucleoids of radiation-resistant Deinococcus species show a high degree of compaction maintained after ionizing irradiation. We identified proteins recruited after irradiation in nucleoids of Deinococcus radiodurans and Deinococcus deserti by means of comparative proteomics. Proteins in nucleoid-enriched fractions from unirradiated and irradiated Deinococcus were identified and semiquantified by shotgun proteomics. The ssDNA-binding protein SSB, DNA gyrase subunits GyrA and GyrB, DNA topoisomerase I, RecA recombinase, UvrA excinuclease, RecQ helicase, DdrA, DdrB, and DdrD proteins were found in significantly higher amounts in irradiated nucleoids of both Deinococcus species. We observed, by immunofluorescence microscopy, the subcellular localization of these proteins in D. radiodurans, showing for the first time the recruitment of the DdrD protein into the D. radiodurans nucleoid. We specifically followed the kinetics of recruitment of RecA, DdrA, and DdrD to the nucleoid after irradiation. Remarkably, RecA proteins formed irregular filament-like structures 1 h after irradiation, before being redistributed throughout the cells by 3 h post-irradiation. Comparable dynamics of DdrD localization were observed, suggesting a possible functional interaction between RecA and DdrD. Several proteins involved in nucleotide synthesis were also seen in higher quantities in the nucleoids of irradiated cells, indicative of the existence of a mechanism for orchestrating the presence of proteins involved in DNA metabolism in nucleoids in response to massive DNA damage. All MS data have been deposited in the ProteomeXchange with identifier PXD00196 (http://proteomecentral.proteomexchange.org/dataset/PXD000196).","hji,kes",0,0,0,2,0,NA,data deposited as referenced +24312116,The power of regional heritability analysis for rare and common variant detection: simulations and application to eye biometrical traits.,"Genome-wide association studies (GWAS) have provided valuable insights into the genetic basis of complex traits. However, they have explained relatively little trait heritability. Recently, we proposed a new analytical approach called regional heritability mapping (RHM) that captures more of the missing genetic variation. This method is applicable both to related and unrelated populations. Here, we demonstrate the power of RHM in comparison with single-SNP GWAS and gene-based association approaches under a wide range of scenarios with variable numbers of quantitative trait loci (QTL) with common and rare causal variants in a narrow genomic region. Simulations based on real genotype data were performed to assess power to capture QTL variance, and we demonstrate that RHM has greater power to detect rare variants and/or multiple alleles in a region than other approaches. In addition, we show that RHM can capture more accurately the QTL variance, when it is caused by multiple independent effects and/or rare variants. We applied RHM to analyze three biometrical eye traits for which single-SNP GWAS have been published or performed to evaluate the effectiveness of this method in real data analysis and detected some additional loci which were not detected by other GWAS methods. RHM has the potential to explain some of missing heritability by capturing variance caused by QTL with low MAF and multiple independent QTL in a region, not captured by other GWAS methods. RHM analyses can be implemented using the software REACTA (http://www.epcc.ed.ac.uk/projects-portfolio/reacta).","hji,kes",0,0,0,2,0,NA,NA +24313344,"Protannotator: a semiautomated pipeline for chromosome-wise functional annotation of the """"missing"""" human proteome.","The chromosome-centric human proteome project (C-HPP) aims to define the complete set of proteins encoded in each human chromosome. The neXtProt database (September 2013) lists 20,128 proteins for the human proteome, of which 3831 human proteins (~19%) are considered """"missing"""" according to the standard metrics table (released September 27, 2013). In support of the C-HPP initiative, we have extended the annotation strategy developed for human chromosome 7 """"missing"""" proteins into a semiautomated pipeline to functionally annotate the """"missing"""" human proteome. This pipeline integrates a suite of bioinformatics analysis and annotation software tools to identify homologues and map putative functional signatures, gene ontology, and biochemical pathways. From sequential BLAST searches, we have primarily identified homologues from reviewed nonhuman mammalian proteins with protein evidence for 1271 (33.2%) """"missing"""" proteins, followed by 703 (18.4%) homologues from reviewed nonhuman mammalian proteins and subsequently 564 (14.7%) homologues from reviewed human proteins. Functional annotations for 1945 (50.8%) """"missing"""" proteins were also determined. To accelerate the identification of """"missing"""" proteins from proteomics studies, we generated proteotypic peptides in silico. Matching these proteotypic peptides to ENCODE proteogenomic data resulted in proteomic evidence for 107 (2.8%) of the 3831 """"missing proteins, while evidence from a recent membrane proteomic study supported the existence for another 15 """"missing"""" proteins. The chromosome-wise functional annotation of all """"missing"""" proteins is freely available to the scientific community through our web server (http://biolinfo.org/protannotator).","hji,kes",0,0,0,2,0,NA,NA +24334622,Metagenomic frameworks for monitoring antibiotic resistance in aquatic environments.,"

Background

High-throughput genomic technologies offer new approaches for environmental health monitoring, including metagenomic surveillance of antibiotic resistance determinants (ARDs). Although natural environments serve as reservoirs for antibiotic resistance genes that can be transferred to pathogenic and human commensal bacteria, monitoring of these determinants has been infrequent and incomplete. Furthermore, surveillance efforts have not been integrated into public health decision making.

Objectives

We used a metagenomic epidemiology-based approach to develop an ARD index that quantifies antibiotic resistance potential, and we analyzed this index for common modal patterns across environmental samples. We also explored how metagenomic data such as this index could be conceptually framed within an early risk management context.

Methods

We analyzed 25 published data sets from shotgun pyrosequencing projects. The samples consisted of microbial community DNA collected from marine and freshwater environments across a gradient of human impact. We used principal component analysis to identify index patterns across samples.

Results

We observed significant differences in the overall index and index subcategory levels when comparing ecosystems more proximal versus distal to human impact. The selection of different sequence similarity thresholds strongly influenced the index measurements. Unique index subcategory modes distinguished the different metagenomes.

Conclusions

Broad-scale screening of ARD potential using this index revealed utility for framing environmental health monitoring and surveillance. This approach holds promise as a screening tool for establishing baseline ARD levels that can be used to inform and prioritize decision making regarding management of ARD sources and human exposure routes.

Citation

Port JA, Cullen AC, Wallace JC, Smith MN, Faustman EM. 2014. Metagenomic frameworks for monitoring antibiotic resistance in aquatic environments. Environ Health Perspect 122:222228; http://dx.doi.org/10.1289/ehp.1307009","hji,kes",0,0,0,2,0,NA,NA +24339543,Phenotypic spectrum in uniparental disomy: Low incidence or lack of study?,"

Context

Alterations in the human chromosomal complement are expressed phenotypically ranging from (i) normal, via (ii) frequent fetal loss in otherwise normal person, to (iii) sub-clinical to severe mental retardation and dysmorphism in live births. A subtle and microscopically undetectable chromosomal alteration is uniparental disomy (UPD), which is known to be associated with distinct birth defects as per the chromosome involved and parental origin. UPD can be evident due to imprinted genes and/or activation of recessive mutations.

Aims

The present study comprises of data mining of published UPD cases with a focus on associated phenotypes. The goal was to identify non-random and recurrent associations between UPD and various genetic conditions, which can possibly indicate the presence of new imprinted genes.

Settings and design

Data mining was carried out using the homepage """"http://www.fish.uniklinikum-jena.de/UPD.html."""", an online catalog of published cases with UPD.

Materials and methods

The UPD cases having normal karyotype and with or without clinical findings were selected to analyze the associated phenotypes for each chromosome, maternal or paternal involved in UPD.

Results

Our results revealed many genetic conditions (other than the known UPD syndromes) to be associated with UPD. Even in cases of bad obstetric history as well as normal individuals chance detection of UPD has been reported.

Conclusions

The role of UPD in human genetic disorders needs to be studied by involving larger cohorts of individuals with birth defects as well as normal population. The genetic conditions were scrutinized in terms of inheritance patterns; majority of these were autosomal recessive indicating the role of UPD as an underlying mechanism.","hji,kes",0,0,0,2,0,NA,NA +24367574,Switch of sensitivity dynamics revealed with DyGloSA toolbox for dynamical global sensitivity analysis as an early warning for system's critical transition.,"Systems with bifurcations may experience abrupt irreversible and often unwanted shifts in their performance, called critical transitions. For many systems like climate, economy, ecosystems it is highly desirable to identify indicators serving as early warnings of such regime shifts. Several statistical measures were recently proposed as early warnings of critical transitions including increased variance, autocorrelation and skewness of experimental or model-generated data. The lack of automatized tool for model-based prediction of critical transitions led to designing DyGloSA - a MATLAB toolbox for dynamical global parameter sensitivity analysis (GPSA) of ordinary differential equations models. We suggest that the switch in dynamics of parameter sensitivities revealed by our toolbox is an early warning that a system is approaching a critical transition. We illustrate the efficiency of our toolbox by analyzing several models with bifurcations and predicting the time periods when systems can still avoid going to a critical transition by manipulating certain parameter values, which is not detectable with the existing SA techniques. DyGloSA is based on the SBToolbox2 and contains functions, which compute dynamically the global sensitivity indices of the system by applying four main GPSA methods: eFAST, Sobol's ANOVA, PRCC and WALS. It includes parallelized versions of the functions enabling significant reduction of the computational time (up to 12 times). DyGloSA is freely available as a set of MATLAB scripts at http://bio.uni.lu/systems_biology/software/dyglosa. It requires installation of MATLAB (versions R2008b or later) and the Systems Biology Toolbox2 available at www.sbtoolbox2.org. DyGloSA can be run on Windows and Linux systems, -32 and -64 bits.","hji,kes",0,0,0,2,0,NA,NA +24373114,'Beyond Milestones': a randomised controlled trial evaluating an innovative digital resource teaching quality observation of normal child development.,"

Aims

The study aimed to create and evaluate the educational effectiveness of a digital resource instructing paediatric trainees in a systematic approach to critical and quality observation of normal child development.

Methods

A digital educational resource was developed utilising the skills of an expert developmental paediatrician who was videoed assessing normal early child development at a series of critical stages. Videos illustrated aspects of language, sophistication of play and socialisation, cognition, and motor progress. Expert commentary, teaching text and summaries were used. A randomised controlled trial evaluated the resource. Paediatric trainees were recruited from The Sydney Children's Hospitals Network. Outcome measures were repeated at three time points (pre-teaching, immediate-post and 1 month) and included self-rated attitudes, knowledge of markers of development and observational expertise. Qualitative data on teaching usefulness were obtained through open-ended questions.

Results

Fifty-six paediatric trainees (registrar 79%, women 82%; mean age 31 years) completed the pre-assessment, 46 the immediate-post and 45 the 1-month follow-up (20% attrition). Compared with the Control group, the Teaching group scored higher over time on markers of development (P = 0.006), observational expertise (P < 0.0001), confidence (P = 0.035) and satisfaction (P < 0.0001). Teaching participants valued the video and expert commentary and reported improvement in confidence and understanding and acquiring a more structured approach.

Conclusions

The 'Beyond Milestones' free online resource for medical professionals improves knowledge, increases confidence and is useful, providing a structured approach to developmental assessment. The techniques taught can be applied to every paediatric consultation.","hji,kes",0,0,0,2,0,NA,clinical +24479843,Quantitative structure-property relationship modeling: a valuable support in high-throughput screening quality control.,"Evaluation of important pharmacokinetic properties such as hydrophobicity by high-throughput screening (HTS) methods is a major issue in drug discovery. In this paper, we present measurements of the chromatographic hydrophobicity index (CHI) on a subset of the French chemical library Chimiothque Nationale (CN). The data were used in quantitative structure-property relationship (QSPR) modeling in order to annotate the CN. An algorithm is proposed to detect problematic molecules with large prediction errors, called outliers. In order to find an explanation for these large discrepancies between predicted and experimental values, these compounds were reanalyzed experimentally. As the first selected outliers indeed had experimental problems, including hydrolysis or sheer absence of expected structure, we herewith propose the use of QSPR as a support tool for quality control of screening data and encourage cooperation between experimental and theoretical teams to improve results. The corrected data were used to produce a model, which is freely available on our web server at http://infochim.u-strasbg.fr/webserv/VSEngine.html .","hji,kes",0,0,0,2,0,NA,NA +24507755,Systematic characterization of small RNAome during zebrafish early developmental stages.,"

Background

During early vertebrate development, various small non-coding RNAs (sRNAs) such as MicroRNAs (miRNAs) and Piwi-interacting RNAs (piRNAs) are dynamically expressed for orchestrating the maternal-to-zygotic transition (MZT). Systematic analysis of expression profiles of zebrafish small RNAome will be greatly helpful for understanding the sRNA regulation during embryonic development.

Results

We first determined the expression profiles of sRNAs during eight distinct stages of early zebrafish development by sRNA-seq technology. Integrative analyses with a new computational platform of CSZ (characterization of small RNAome for zebrafish) demonstrated an sRNA class transition from piRNAs to miRNAs as development proceeds. We observed that both the abundance and diversity of miRNAs are gradually increased, while the abundance is enhanced more dramatically than the diversity during development. However, although both the abundance and diversity of piRNAs are gradually decreased, the diversity was firstly increased then rapidly decreased. To evaluate the computational accuracy, the expression levels of four known miRNAs were experimentally validated. We also predicted 25 potentially novel miRNAs, whereas two candidates were verified by Northern blots.

Conclusions

Taken together, our analyses revealed the piRNA to miRNA transition as a conserved mechanism in zebrafish, although two different types of sRNAs exhibit distinct expression dynamics in abundance and diversity, respectively. Our study not only generated a better understanding for sRNA regulations in early zebrafish development, but also provided a useful platform for analyzing sRNA-seq data. The CSZ was implemented in Perl and freely downloadable at: http://csz.biocuckoo.org.","hji,kes",0,0,0,2,0,NA,NA +24524735,QSAR modeling of imbalanced high-throughput screening data in PubChem.,"Many of the structures in PubChem are annotated with activities determined in high-throughput screening (HTS) assays. Because of the nature of these assays, the activity data are typically strongly imbalanced, with a small number of active compounds contrasting with a very large number of inactive compounds. We have used several such imbalanced PubChem HTS assays to test and develop strategies to efficiently build robust QSAR models from imbalanced data sets. Different descriptor types [Quantitative Neighborhoods of Atoms (QNA) and """"biological"""" descriptors] were used to generate a variety of QSAR models in the program GUSAR. The models obtained were compared using external test and validation sets. We also report on our efforts to incorporate the most predictive of our models in the publicly available NCI/CADD Group Web services ( http://cactus.nci.nih.gov/chemical/apps/cap).","hji,kes",0,0,0,2,0,NA,NA +24564522,Incorporating substrate sequence motifs and spatial amino acid composition to identify kinase-specific phosphorylation sites on protein three-dimensional structures.,"

Background

Protein phosphorylation catalyzed by kinases plays crucial regulatory roles in cellular processes. Given the high-throughput mass spectrometry-based experiments, the desire to annotate the catalytic kinases for in vivo phosphorylation sites has motivated. Thus, a variety of computational methods have been developed for performing a large-scale prediction of kinase-specific phosphorylation sites. However, most of the proposed methods solely rely on the local amino acid sequences surrounding the phosphorylation sites. An increasing number of three-dimensional structures make it possible to physically investigate the structural environment of phosphorylation sites.

Results

In this work, all of the experimental phosphorylation sites are mapped to the protein entries of Protein Data Bank by sequence identity. It resulted in a total of 4508 phosphorylation sites containing the protein three-dimensional (3D) structures. To identify phosphorylation sites on protein 3D structures, this work incorporates support vector machines (SVMs) with the information of linear motifs and spatial amino acid composition, which is determined for each kinase group by calculating the relative frequencies of 20 amino acid types within a specific radial distance from central phosphorylated amino acid residue. After the cross-validation evaluation, most of the kinase-specific models trained with the consideration of structural information outperform the models considering only the sequence information. Furthermore, the independent testing set which is not included in training set has demonstrated that the proposed method could provide a comparable performance to other popular tools.

Conclusion

The proposed method is shown to be capable of predicting kinase-specific phosphorylation sites on 3D structures and has been implemented as a web server which is freely accessible at http://csb.cse.yzu.edu.tw/PhosK3D/. Due to the difficulty of identifying the kinase-specific phosphorylation sites with similar sequenced motifs, this work also integrates the 3D structural information to improve the cross classifying specificity.","hji,kes",0,0,0,2,0,NA,not found +24573477,3D-SURFER 2.0: web platform for real-time search and characterization of protein surfaces.,"The increasing number of uncharacterized protein structures necessitates the development of computational approaches for function annotation using the protein tertiary structures. Protein structure database search is the basis of any structure-based functional elucidation of proteins. 3D-SURFER is a web platform for real-time protein surface comparison of a given protein structure against the entire PDB using 3D Zernike descriptors. It can smoothly navigate the protein structure space in real-time from one query structure to another. A major new feature of Release 2.0 is the ability to compare the protein surface of a single chain, a single domain, or a single complex against databases of protein chains, domains, complexes, or a combination of all three in the latest PDB. Additionally, two types of protein structures can now be compared: all-atom-surface and backbone-atom-surface. The server can also accept a batch job for a large number of database searches. Pockets in protein surfaces can be identified by VisGrid and LIGSITE (csc) . The server is available at http://kiharalab.org/3d-surfer/.","hji,kes",0,0,0,2,0,NA,NA +24574118,Canto: an online tool for community literature curation.,"

Motivation

Detailed curation of published molecular data is essential for any model organism database. Community curation enables researchers to contribute data from their papers directly to databases, supplementing the activity of professional curators and improving coverage of a growing body of literature. We have developed Canto, a web-based tool that provides an intuitive curation interface for both curators and researchers, to support community curation in the fission yeast database, PomBase. Canto supports curation using OBO ontologies, and can be easily configured for use with any species.

Availability

Canto code and documentation are available under an Open Source license from http://curation.pombase.org/. Canto is a component of the Generic Model Organism Database (GMOD) project (http://www.gmod.org/).","hji,kes",0,0,0,2,0,NA,not about the resource +24588959,Analysis of BRAF(V600E) mutation and DNA methylation improves the diagnostics of thyroid fine needle aspiration biopsies.,"

Background

Thyroid nodules with indeterminate cytological features on fine needle aspiration biopsy specimens (FNABs) have a ~20% risk of thyroid cancer. BRAF(V600E) mutation and DNA methylation are useful markers to distinguish malignant thyroid neoplasm from benign. The aim of this study was to determine whether combined detection of BRAF(V600E) mutation and methylation markers on FNABs could improve the diagnostic accuracy of thyroid cancer.

Methods

Using pyrosequencing and quantitative methylation-specific PCR (Q-MSP) methods, FNABs from 79 and 38 patients with thyroid nodules in training and test groups, respectively, were analyzed for BRAF(V600E) mutation and gene methylation.

Results

BRAF(V600E) mutation was found in 30/42 (71.4%) and 14/20 (70%) FNABs in training and test groups, respectively. All BRAF(V600E)-positive samples were histologically diagnosed as papillary thyroid cancer (PTC) after thyroidectomy. As expected, BRAF mutation was not found in all benign nodules. Moreover, we demonstrated that the five genes, including CALCA, DAPK1, TIMP3, RAR-beta and RASSF1A, were aberrantly methylated in FNABs. Of them, methylation level of DAPK1 in PTCs was significantly higher than that in benign samples (P <0.0001). Conversely, methylation level of RASSF1A in PTCs was significantly lower than that in benign samples (P =0.003). Notably, compared with BRAF mutation testing alone, combined detection of BRAF mutation and methylation markers increased the diagnostic sensitivity and accuracy of PTC with excellent specificity.

Conclusion

Our data have demonstrated that combine analysis of BRAF mutation and DNA methylation markers on FNABs may be a useful strategy to facilitate the diagnosis of malignant thyroid neoplasm, particularly PTC.

Virtual slides

The virtual slide(s) for this article can be found here: http://www.diagnosticpathology.diagnomx.eu/vs/6080878071149177.","hji,kes",0,0,0,2,0,NA,NA +24611578,Variation in surgical quality measure adherence within hospital referral regions: do publicly reported surgical quality measures distinguish among hospitals that patients are likely to compare?,"

Objective

To determine whether surgical quality measures that Medicare publicly reports provide a basis for patients to choose a hospital from within their geographic region.

Data source

The Department of Health and Human Services' public reporting website, http://www.medicare.gov/hospitalcompare.

Study design

We identified hospitals (n=2,953) reporting adherence rates to the quality measures intended to reduce surgical site infections (Surgical Care Improvement Project, 1-3) in 2012. We defined regions within which patients were likely to compare hospitals using the hospital referral regions (HRRs) from the Dartmouth Atlas of Health Care Project. We described distributions of reported SCIP adherence within each HRR, including medians, interquartile ranges (IQRs), skewness, and outliers.

Principal findings

Ninety-seven percent of HRRs had median SCIP-1 scores =95 percent. In 93 percent of HRRs, half of the hospitals in the HRR were within 5 percent of the median hospital's score. In 62 percent of HRRs, hospitals were skewed toward the higher rates (negative skewness). Seven percent of HRRs demonstrated positive skewness. Only 1 percent had a positive outlier. SCIP-2 and SCIP-3 demonstrated similar distributions.

Conclusions

Publicly reported quality measures for surgical site infection prevention do not distinguish the majority of hospitals that patients are likely to choose from when selecting a surgical provider. More studies are needed to improve public reporting's ability to positively impact patient decision making.","hji,kes",0,0,0,2,0,NA,NA +24700317,Fast alignment-free sequence comparison using spaced-word frequencies.,"

Motivation

Alignment-free methods for sequence comparison are increasingly used for genome analysis and phylogeny reconstruction; they circumvent various difficulties of traditional alignment-based approaches. In particular, alignment-free methods are much faster than pairwise or multiple alignments. They are, however, less accurate than methods based on sequence alignment. Most alignment-free approaches work by comparing the word composition of sequences. A well-known problem with these methods is that neighbouring word matches are far from independent.

Results

To reduce the statistical dependency between adjacent word matches, we propose to use 'spaced words', defined by patterns of 'match' and 'don't care' positions, for alignment-free sequence comparison. We describe a fast implementation of this approach using recursive hashing and bit operations, and we show that further improvements can be achieved by using multiple patterns instead of single patterns. To evaluate our approach, we use spaced-word frequencies as a basis for fast phylogeny reconstruction. Using real-world and simulated sequence data, we demonstrate that our multiple-pattern approach produces better phylogenies than approaches relying on contiguous words.

Availability and implementation

Our program is freely available at http://spaced.gobics.de/.","hji,kes",0,0,0,2,0,NA,NA +24737859,"The influence of dynein processivity control, MAPs, and microtubule ends on directional movement of a localising mRNA.","Many cellular constituents travel along microtubules in association with multiple copies of motor proteins. How the activity of these motors is regulated during cargo sorting is poorly understood. In this study, we address this issue using a novel in vitro assay for the motility of localising Drosophila mRNAs bound to native dynein-dynactin complexes. High precision tracking reveals that individual RNPs within a population undergo either diffusive, or highly processive, minus end-directed movements along microtubules. RNA localisation signals stimulate the processive movements, with regulation of dynein-dynactin's activity rather than its total copy number per RNP, responsible for this effect. Our data support a novel mechanism for multi-motor translocation based on the regulation of dynein processivity by discrete cargo-associated features. Studying the in vitro responses of RNPs to microtubule-associated proteins (MAPs) and microtubule ends provides insights into how an RNA population could navigate the cytoskeletal network and become anchored at its destination in cells. DOI: http://dx.doi.org/10.7554/eLife.01596.001.","hji,kes",0,0,0,2,0,NA,NA +24799331,Allergen cross-reactivity in allergic rhinitis and oral-allergy syndrome: a bioinformatic protein sequence analysis.,"

Background

Clinical allergy cross-reactivity that is seen with related inhalant allergens or between unrelated inhalant allergens and foods in oral allergy syndrome (OAS) remains poorly understood. The goal of this study is to determine whether clinical cross-reactivity can be identified from primary protein sequences in allergy epitopes and food proteins.

Methods

High-throughput analysis was performed by assembling all known allergy epitopes within the Immune Epitope Database (IEDB; http://www.iedb.org) for 5 common species from 5 inhalant allergen subclasses and comparing their protein sequences to each other, as well as to sequences of intact proteins from known cross-reactive foods in the European Molecular Biology Laboratory-European Bioinformatics Institute (EMBL-EBI) protein database (http://www.uniprot.org) that have been implicated in OAS. Computational methods were employed to allow for exact matching, gaps, and similar amino acids using multiple algorithms. A phylogenetic tree was created to determine evolutionary relationships between cross-reactive epitopes in OAS.

Results

Twenty-three common inhalant allergens had 4429 unique epitopes; the 19 foods implicated in OAS had 9497 protein sequences. The Basic Local Alignment Search Tool (BLAST) algorithm identified interclass and intraclass sequence similarities for the 5 inhalant allergy classes with high similarity for mites, grasses, and trees. Analysis of OAS proteins identified 104 matches to inhalant allergy epitopes that are known to cross-react. The phylogenetic tree displayed relationships that mostly followed organism phylogeny.

Conclusion

Use of primary protein sequences was successful in explaining clinical allergy cross-reactivity. Clinical correlation is needed for use of these epitopes as diagnostic or therapeutic entities for patients with cross-reactive allergic disease.","hji,kes",0,0,0,2,0,NA,not about the resource +24801556,Large eddy simulation of transitional flow in an idealized stenotic blood vessel: evaluation of subgrid scale models.,"In the present study, we performed large eddy simulation (LES) of axisymmetric, and 75% stenosed, eccentric arterial models with steady inflow conditions at a Reynolds number of 1000. The results obtained are compared with the direct numerical simulation (DNS) data (Varghese et al., 2007, """"Direct Numerical Simulation of Stenotic Flows. Part 1. Steady Flow,"""" J. Fluid Mech., 582, pp. 253-280). An inhouse code (WenoHemo) employing high-order numerical methods for spatial and temporal terms, along with a 2nd order accurate ghost point immersed boundary method (IBM) (Mark, and Vanwachem, 2008, """"Derivation and Validation of a Novel Implicit Second-Order Accurate Immersed Boundary Method,"""" J. Comput. Phys., 227(13), pp. 6660-6680) for enforcing boundary conditions on curved geometries is used for simulations. Three subgrid scale (SGS) models, namely, the classical Smagorinsky model (Smagorinsky, 1963, """"General Circulation Experiments With the Primitive Equations,"""" Mon. Weather Rev., 91(10), pp. 99-164), recently developed Vreman model (Vreman, 2004, """"An Eddy-Viscosity Subgrid-Scale Model for Turbulent Shear Flow: Algebraic Theory and Applications,"""" Phys. Fluids, 16(10), pp. 3670-3681), and the Sigma model (Nicoud et al., 2011, """"Using Singular Values to Build a Subgrid-Scale Model for Large Eddy Simulations,"""" Phys. Fluids, 23(8), 085106) are evaluated in the present study. Evaluation of SGS models suggests that the classical constant coefficient Smagorinsky model gives best agreement with the DNS data, whereas the Vreman and Sigma models predict an early transition to turbulence in the poststenotic region. Supplementary simulations are performed using Open source field operation and manipulation (OpenFOAM) (""""OpenFOAM,"""" http://www.openfoam.org/) solver and the results are inline with those obtained with WenoHemo.","hji,kes",0,0,0,2,0,NA,NA +24812336,fastGapFill: efficient gap filling in metabolic networks.,"

Motivation

Genome-scale metabolic reconstructions summarize current knowledge about a target organism in a structured manner and as such highlight missing information. Such gaps can be filled algorithmically. Scalability limitations of available algorithms for gap filling hinder their application to compartmentalized reconstructions.

Results

We present fastGapFill, a computationally efficient tractable extension to the COBRA toolbox that permits the identification of candidate missing knowledge from a universal biochemical reaction database (e.g. Kyoto Encyclopedia of Genes and Genomes) for a given (compartmentalized) metabolic reconstruction. The stoichiometric consistency of the universal reaction database and of the metabolic reconstruction can be tested for permitting the computation of biologically more relevant solutions. We demonstrate the efficiency and scalability of fastGapFill on a range of metabolic reconstructions.

Availability and implementation

fastGapFill is freely available from http://thielelab.eu.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +24821260,Transatlantic peer-to-peer learning: an initial feasibility analysis.,"

Introduction

Peer-to-peer learning is a well-established learning modality, which has been shown to improve learning outcomes, with positive implications for clinical practice. The purpose of this pilot study was to explore the feasibility of linking students from North America and Europe with a peer-to-peer learning approach.

Methods

Face and content validity studies were completed on the previously designed and validated online repository http://www.pilgrimshospital.com. Four medical students from the University of Toronto, Canada, were paired with four students from University College Cork, Ireland. Each student was invited to upload two pieces of information learned from a senior colleague that day. Each student was asked to review the information uploaded by their partner, editing with references if needed. Quantitative and qualitative evaluations of the e-peer system were conducted.

Results

Over the study period, the system recorded a total of 10 079 individual page views. Questionnaires completed by participants demonstrated that 6/8 found the system either """"very easy"""" or """"easy"""" to use, whereas all found that the system promoted evidenced-based and self-directed learning. Structured interviews revealed 3 main themes: The Peer Connection, Trust in Data Veracity, and Aid to Clinical Learning.

Conclusion

This pilot study demonstrates it is feasible to link students from separate continents in a community of peer-to-peer learning. This is viewed positively by students and enhances evidenced-based learning, and the aspect of peer connectivity was important to participating students. Such an approach encourages peer cooperation and has the potential to disseminate key clinical learning experiences widely.","hji,kes",0,0,0,2,0,NA,out of scope +24843014,APOBEC3A deaminates transiently exposed single-strand DNA during LINE-1 retrotransposition.,"Long INterspersed Element-1 (LINE-1 or L1) retrotransposition poses a mutagenic threat to human genomes. Human cells have therefore evolved strategies to regulate L1 retrotransposition. The APOBEC3 (A3) gene family consists of seven enzymes that catalyze deamination of cytidine nucleotides to uridine nucleotides (C-to-U) in single-strand DNA substrates. Among these enzymes, APOBEC3A (A3A) is the most potent inhibitor of L1 retrotransposition in cultured cell assays. However, previous characterization of L1 retrotransposition events generated in the presence of A3A did not yield evidence of deamination. Thus, the molecular mechanism by which A3A inhibits L1 retrotransposition has remained enigmatic. Here, we have used in vitro and in vivo assays to demonstrate that A3A can inhibit L1 retrotransposition by deaminating transiently exposed single-strand DNA that arises during the process of L1 integration. These data provide a mechanistic explanation of how the A3A cytidine deaminase protein can inhibit L1 retrotransposition.DOI: http://dx.doi.org/10.7554/eLife.02008.001.","hji,kes",0,0,0,2,0,NA,NA +24849626,The inhibitory microcircuit of the substantia nigra provides feedback gain control of the basal ganglia output.,"Dysfunction of the basal ganglia produces severe deficits in the timing, initiation, and vigor of movement. These diverse impairments suggest a control system gone awry. In engineered systems, feedback is critical for control. By contrast, models of the basal ganglia highlight feedforward circuitry and ignore intrinsic feedback circuits. In this study, we show that feedback via axon collaterals of substantia nigra projection neurons control the gain of the basal ganglia output. Through a combination of physiology, optogenetics, anatomy, and circuit mapping, we elaborate a general circuit mechanism for gain control in a microcircuit lacking interneurons. Our data suggest that diverse tonic firing rates, weak unitary connections and a spatially diffuse collateral circuit with distinct topography and kinetics from feedforward input is sufficient to implement divisive feedback inhibition. The importance of feedback for engineered systems implies that the intranigral microcircuit, despite its absence from canonical models, could be essential to basal ganglia function. DOI: http://dx.doi.org/10.7554/eLife.02397.001.","hji,kes",0,0,0,2,0,NA,NA +24861624,Enhancing UCSF Chimera through web services.,"Integrating access to web services with desktop applications allows for an expanded set of application features, including performing computationally intensive tasks and convenient searches of databases. We describe how we have enhanced UCSF Chimera (http://www.rbvi.ucsf.edu/chimera/), a program for the interactive visualization and analysis of molecular structures and related data, through the addition of several web services (http://www.rbvi.ucsf.edu/chimera/docs/webservices.html). By streamlining access to web services, including the entire job submission, monitoring and retrieval process, Chimera makes it simpler for users to focus on their science projects rather than data manipulation. Chimera uses Opal, a toolkit for wrapping scientific applications as web services, to provide scalable and transparent access to several popular software packages. We illustrate Chimera's use of web services with an example workflow that interleaves use of these services with interactive manipulation of molecular sequences and structures, and we provide an example Python program to demonstrate how easily Opal-based web services can be accessed from within an application. Web server availability: http://webservices.rbvi.ucsf.edu/opal2/dashboard?command=serviceList.","hji,kes",0,0,0,2,0,NA,NA +24861626,A web tool for the design and management of panels of genes for targeted enrichment and massive sequencing for clinical applications.,"Disease targeted sequencing is gaining importance as a powerful and cost-effective application of high throughput sequencing technologies to the diagnosis. However, the lack of proper tools to process the data hinders its extensive adoption. Here we present TEAM, an intuitive and easy-to-use web tool that fills the gap between the predicted mutations and the final diagnostic in targeted enrichment sequencing analysis. The tool searches for known diagnostic mutations, corresponding to a disease panel, among the predicted patient's variants. Diagnostic variants for the disease are taken from four databases of disease-related variants (HGMD-public, HUMSAVAR, ClinVar and COSMIC.) If no primary diagnostic variant is found, then a list of secondary findings that can help to establish a diagnostic is produced. TEAM also provides with an interface for the definition of and customization of panels, by means of which, genes and mutations can be added or discarded to adjust panel definitions. TEAM is freely available at: http://team.babelomics.org.","hji,kes",0,0,0,2,0,NA,NA +24889386,Imported Plasmodium vivax malaria ex Pakistan.,"

Background

According to WHO, 1.5 million cases of malaria are reported annually in Pakistan. Malaria distribution in Pakistan is heterogeneous, and some areas, including Punjab, are considered at low risk for malaria. The aim of this study is to describe the trend of imported malaria cases from Pakistan reported to the international surveillance systems from 2005 to 2012.

Methods

Clinics reporting malaria cases acquired after a stay in Pakistan between January 1, 2005, and December 31, 2012, were identified from the GeoSentinel (http://www.geosentinel.org) and EuroTravNet (http://www.Eurotravnet.eu) networks. Demographic and travel-related information was retrieved from the database and further information such as areas of destination within Pakistan was obtained directly from the reporting sites. Standard linear regression models were used to assess the statistical significance of the time trend.

Results

From January 2005 to December 2012, a total of 63 cases of malaria acquired in Pakistan were retrieved in six countries over three continents. A statistically significant increasing trend in imported Plasmodium vivax malaria cases acquired in Pakistan, particularly for those exposed in Punjab, was observed over time (p = 0.006).

Conclusions

Our observation may herald a variation in malaria incidence in the Punjab province of Pakistan. This is in contrast with the previously described decreasing incidence of malaria in travelers to the Indian subcontinent, and with reports that describe Punjab as a low risk area for malaria. Nevertheless, this event is considered plausible by international organizations. This has potential implications for changes in chemoprophylaxis options and reinforces the need for increased surveillance, also considering the risk of introduction of autochthonous P. vivax malaria in areas where competent vectors are present, such as Europe.","hji,kes",0,0,0,2,0,NA,references other data resource +24965847,H-DROP: an SVM based helical domain linker predictor trained with features optimized by combining random forest and stepwise selection.,"Domain linker prediction is attracting much interest as it can help identifying novel domains suitable for high throughput proteomics analysis. Here, we report H-DROP, an SVM-based Helical Domain linker pRediction using OPtimal features. H-DROP is, to the best of our knowledge, the first predictor for specifically and effectively identifying helical linkers. This was made possible first because a large training dataset became available from IS-Dom, and second because we selected a small number of optimal features from a huge number of potential ones. The training helical linker dataset, which included 261 helical linkers, was constructed by detecting helical residues at the boundary regions of two independent structural domains listed in our previously reported IS-Dom dataset. 45 optimal feature candidates were selected from 3,000 features by random forest, which were further reduced to 26 optimal features by stepwise selection. The prediction sensitivity and precision of H-DROP were 35.2 and 38.8%, respectively. These values were over 10.7% higher than those of control methods including our previously developed DROP, which is a coil linker predictor, and PPRODO, which is trained with un-differentiated domain boundary sequences. Overall, these results indicated that helical linkers can be predicted from sequence information alone by using a strictly curated training data set for helical linkers and carefully selected set of optimal features. H-DROP is available at http://domserv.lab.tuat.ac.jp.","hji,kes",0,0,0,2,0,NA,iffy +25022716,Impact of prophylactic central neck dissection on oncologic outcomes of papillary thyroid carcinoma: a review.,"Prophylactic neck dissection (PND) for papillary thyroid carcinoma (PTC) is controversial. Our aim was to assess current levels of evidence (LE) according to the Oxford Centre for Evidence-based Medicine ( http://www.cebm.net/?O=1025 ) regarding the oncologic benefits of PND. Data were analyzed via MEDLINE keywords: PTC, differentiated thyroid carcinoma, PND, central lymph node metastases, central compartment, recurrence-free survival. There was conflicting evidence regarding the rate of reoperation for recurrence, with some studies showing a lower rate after PND with increased recurrence-free survival and a higher rate of undetectable pre- and post-ablation thyroglobulin levels (LE 4), whereas other studies did not show a difference (LE 4). Only one study (LE 4) showed improved disease-specific survival with PND. PND may improve recurrence-free survival, although this is supported by only a low LE. Current recommendations can only be based on low-level evidence.","hji,kes",0,0,0,2,0,NA,NA +25051568,A flexible pinhole camera model for coherent nonuniform sampling.,"The flexible pinhole camera (FPC) allows flexible modulation of the sampling rate over the field of view. The FPC is defined by a viewpoint and a map specifying the sampling locations on the image plane. The map is constructed from known regions of interest with interactive and automatic approaches. The FPC provides inexpensive 3D projection that allows rendering complex datasets quickly, in feed-forward fashion, by projection followed by rasterization. The FPC supports many types of data, including image, height field, geometry, and volume data. The resulting image is a coherent nonuniform sampling (CoNUS) of the dataset that matches the local variation of the dataset's importance. CoNUS images have been successfully implemented for remote visualization, focus-plus-context visualization, and acceleration of expensive rendering effects such as surface geometric detail and specular reflection. A video explaining and demonstrating the FPC is at http://youtu.be/kvFe5XjOPNM.","hji,kes",0,0,0,2,0,NA,NA +25128977,Accurate de novo and transmitted indel detection in exome-capture data using microassembly.,"We present an open-source algorithm, Scalpel (http://scalpel.sourceforge.net/), which combines mapping and assembly for sensitive and specific discovery of insertions and deletions (indels) in exome-capture data. A detailed repeat analysis coupled with a self-tuning k-mer strategy allows Scalpel to outperform other state-of-the-art approaches for indel discovery, particularly in regions containing near-perfect repeats. We analyzed 593 families from the Simons Simplex Collection and demonstrated Scalpel's power to detect long (=30 bp) transmitted events and enrichment for de novo likely gene-disrupting indels in autistic children.","hji,kes",0,0,0,2,0,NA,NA +25143288,GlycoPattern: a web platform for glycan array mining.,"

Unlabelled

GlycoPattern is Web-based bioinformatics resource to support the analysis of glycan array data for the Consortium for Functional Glycomics. This resource includes algorithms and tools to discover structural motifs, a heatmap visualization to compare multiple experiments, hierarchical clustering of Glycan Binding Proteins with respect to their binding motifs and a structural search feature on the experimental data.

Availability and implementation

GlycoPattern is freely available on the Web at http://glycopattern.emory.edu with all major browsers supported.","hji,kes",0,0,0,2,0,NA,NA +25150030,Is the Alma Ata vision of comprehensive primary health care viable? Findings from an international project.,"

Background

The 4-year (2007-2011) Revitalizing Health for All international research program (http://www.globalhealthequity.ca/projects/proj_revitalizing/index.shtml) supported 20 research teams located in 15 low- and middle-income countries to explore the strengths and weaknesses of comprehensive primary health care (CPHC) initiatives at their local or national levels. Teams were organized in a triad comprised of a senior researcher, a new researcher, and a 'research user' from government, health services, or other organizations with the authority or capacity to apply the research findings. Multiple regional and global team capacity-enhancement meetings were organized to refine methods and to discuss and assess cross-case findings.

Objective

Most research projects used mixed methods, incorporating analyses of qualitative data (interviews and focus groups), secondary data, and key policy and program documents. Some incorporated historical case study analyses, and a few undertook new surveys. The synthesis of findings in this report was derived through qualitative analysis of final project reports undertaken by three different reviewers.

Results

Evidence of comprehensiveness (defined in this research program as efforts to improve equity in access, community empowerment and participation, social and environmental health determinants, and intersectoral action) was found in many of the cases.

Conclusions

Despite the important contextual differences amongst the different country studies, the similarity of many of their findings, often generated using mixed methods, attests to certain transferable health systems characteristics to create and sustain CPHC practices. These include:1. Well-trained and supported community health workers (CHWs) able to work effectively with marginalized communities2. Effective mechanisms for community participation, both informal (through participation in projects and programs, and meaningful consultation) and formal (though program management structures)3. Co-partnership models in program and policy development (in which financial and knowledge supports from governments or institutions are provided to communities, which retain decision-making powers in program design and implementation)4. Support for community advocacy and engagement in health and social systems decision makingThese characteristics, in turn, require a political context that supports state responsibilities for redistributive health and social protection measures.","hji,kes",0,0,0,2,0,NA,NA +25161233,TEMPI: probabilistic modeling time-evolving differential PPI networks with multiPle information.,"

Motivation

Time-evolving differential protein-protein interaction (PPI) networks are essential to understand serial activation of differentially regulated (up- or downregulated) cellular processes (DRPs) and their interplays over time. Despite developments in the network inference, current methods are still limited in identifying temporal transition of structures of PPI networks, DRPs associated with the structural transition and the interplays among the DRPs over time.

Results

Here, we present a probabilistic model for estimating Time-Evolving differential PPI networks with MultiPle Information (TEMPI). This model describes probabilistic relationships among network structures, time-course gene expression data and Gene Ontology biological processes (GOBPs). By maximizing the likelihood of the probabilistic model, TEMPI estimates jointly the time-evolving differential PPI networks (TDNs) describing temporal transition of PPI network structures together with serial activation of DRPs associated with transiting networks. This joint estimation enables us to interpret the TDNs in terms of temporal transition of the DRPs. To demonstrate the utility of TEMPI, we applied it to two time-course datasets. TEMPI identified the TDNs that correctly delineated temporal transition of DRPs and time-dependent associations between the DRPs. These TDNs provide hypotheses for mechanisms underlying serial activation of key DRPs and their temporal associations.

Availability and implementation

Source code and sample data files are available at http://sbm.postech.ac.kr/tempi/sources.zip.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,sample data only +25161236,Effects of small particle numbers on long-term behaviour in discrete biochemical systems.,"

Motivation

The functioning of many biological processes depends on the appearance of only a small number of a single molecular species. Additionally, the observation of molecular crowding leads to the insight that even a high number of copies of species do not guarantee their interaction. How single particles contribute to stabilizing biological systems is not well understood yet. Hence, we aim at determining the influence of single molecules on the long-term behaviour of biological systems, i.e. whether they can reach a steady state.

Results

We provide theoretical considerations and a tool to analyse Systems Biology Markup Language models for the possibility to stabilize because of the described effects. The theory is an extension of chemical organization theory, which we called discrete chemical organization theory. Furthermore we scanned the BioModels Database for the occurrence of discrete chemical organizations. To exemplify our method, we describe an application to the Template model of the mitotic spindle assembly checkpoint mechanism.

Availability and implementation

http://www.biosys.uni-jena.de/Services.html.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +25173705,SeqControl: process control for DNA sequencing.,"As high-throughput sequencing continues to increase in speed and throughput, routine clinical and industrial application draws closer. These 'production' settings will require enhanced quality monitoring and quality control to optimize output and reduce costs. We developed SeqControl, a framework for predicting sequencing quality and coverage using a set of 15 metrics describing overall coverage, coverage distribution, basewise coverage and basewise quality. Using whole-genome sequences of 27 prostate cancers and 26 normal references, we derived multivariate models that predict sequencing quality and depth. SeqControl robustly predicted how much sequencing was required to reach a given coverage depth (area under the curve (AUC) = 0.993), accurately classified clinically relevant formalin-fixed, paraffin-embedded samples, and made predictions from as little as one-eighth of a sequencing lane (AUC = 0.967). These techniques can be immediately incorporated into existing sequencing pipelines to monitor data quality in real time. SeqControl is available at http://labs.oicr.on.ca/Boutros-lab/software/SeqControl/.","hji,kes",0,0,0,2,0,NA,NA +25204235,"The 5th National Audit Project (NAP5) on accidental awareness during general anaesthesia: protocol, methods and analysis of data.","Accidental awareness during general anaesthesia with recall is a potentially distressing complication of general anaesthesia that can lead to psychological harm. The 5th National Audit Project was designed to investigate the reported incidence, predisposing factors, causality and impact of accidental awareness. A nationwide network of local co-ordinators across all UK and Irish public hospitals reported all new patient reports of accidental awareness to a central database, using a system of monthly anonymised reporting over a calendar year. The database collected the details of the reported event, anaesthetic and surgical technique, and any sequelae. These reports were categorised into main types by a multidisciplinary panel, using a formalised process of analysis. The main categories of accidental awareness were: certain or probable; possible; during sedation; on or from the intensive care unit; could not be determined; unlikely; drug errors; and statement only. The degree of evidence to support the categorisation was also defined for each report. Patient experience and sequelae were categorised using current tools or modifications of such. The 5th National Audit Project methodology may be used to assess new reports of accidental awareness during general anaesthesia in a standardised manner, especially for the development of an ongoing database of case reporting. This paper is a shortened version describing the protocols, methods and data analysis from 5th National Audit Project - the full report can be found at http://www.nationalauditprojects.org.uk/NAP5_home#pt.","hji,kes",0,0,0,2,0,NA,NA +25260792,Dynamic evolution of clonal epialleles revealed by methclone.,"We describe methclone, a novel method to identify epigenetic loci that harbor large changes in the clonality of their epialleles (epigenetic alleles). Methclone efficiently analyzes genome-wide DNA methylation sequencing data. We quantify the changes using a composition entropy difference calculation and also introduce a new measure of global clonality shift, loci with epiallele shift per million loci covered, which enables comparisons between different samples to gauge overall epiallelic dynamics. Finally, we demonstrate the utility of methclone in capturing functional epiallele shifts in leukemia patients from diagnosis to relapse. Methclone is open-source and freely available at https://code.google.com/p/methclone.","hji,kes",0,0,0,2,0,NA,NA +25286919,Computing autocatalytic sets to unravel inconsistencies in metabolic network reconstructions.,"

Motivation

Genome-scale metabolic network reconstructions have been established as a powerful tool for the prediction of cellular phenotypes and metabolic capabilities of organisms. In recent years, the number of network reconstructions has been constantly increasing, mostly because of the availability of novel (semi-)automated procedures, which enabled the reconstruction of metabolic models based on individual genomes and their annotation. The resulting models are widely used in numerous applications. However, the accuracy and predictive power of network reconstructions are commonly limited by inherent inconsistencies and gaps.

Results

Here we present a novel method to validate metabolic network reconstructions based on the concept of autocatalytic sets. Autocatalytic sets correspond to collections of metabolites that, besides enzymes and a growth medium, are required to produce all biomass components in a metabolic model. These autocatalytic sets are well-conserved across all domains of life, and their identification in specific genome-scale reconstructions allows us to draw conclusions about potential inconsistencies in these models. The method is capable of detecting inconsistencies, which are neglected by other gap-finding methods. We tested our method on the Model SEED, which is the largest repository for automatically generated genome-scale network reconstructions. In this way, we were able to identify a significant number of missing pathways in several of these reconstructions. Hence, the method we report represents a powerful tool to identify inconsistencies in large-scale metabolic networks.

Availability and implementation

The method is available as source code on http://users.minet.uni-jena.de/~m3kach/ASBIG/ASBIG.zip.

Contact

christoph.kaleta@uni-jena.de

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +25322794,A novel algorithm for the precise calculation of the maximal information coefficient.,"Measuring associations is an important scientific task. A novel measurement method maximal information coefficient (MIC) was proposed to identify a broad class of associations. As foreseen by its authors, MIC implementation algorithm ApproxMaxMI is not always convergent to real MIC values. An algorithm called SG (Simulated annealing and Genetic) was developed to facilitate the optimal calculation of MIC, and the convergence of SG was proved based on Markov theory. When run on fruit fly data set including 1,000,000 pairs of gene expression profiles, the mean squared difference between SG and the exhaustive algorithm is 0.00075499, compared with 0.1834 in the case of ApproxMaxMI. The software SGMIC and its manual are freely available at http://lxy.depart.hebust.edu.cn/SGMIC/SGMIC.htm.","hji,kes",0,0,0,2,0,NA,NA +25329667,Accurate assignment of significance to neuropeptide identifications using Monte Carlo k-permuted decoy databases.,"In support of accurate neuropeptide identification in mass spectrometry experiments, novel Monte Carlo permutation testing was used to compute significance values. Testing was based on k-permuted decoy databases, where k denotes the number of permutations. These databases were integrated with a range of peptide identification indicators from three popular open-source database search software (OMSSA, Crux, and X! Tandem) to assess the statistical significance of neuropeptide spectra matches. Significance p-values were computed as the fraction of the sequences in the database with match indicator value better than or equal to the true target spectra. When applied to a test-bed of all known manually annotated mouse neuropeptides, permutation tests with k-permuted decoy databases identified up to 100% of the neuropeptides at p-value < 10(-5). The permutation test p-values using hyperscore (X! Tandem), E-value (OMSSA) and Sp score (Crux) match indicators outperformed all other match indicators. The robust performance to detect peptides of the intuitive indicator """"number of matched ions between the experimental and theoretical spectra"""" highlights the importance of considering this indicator when the p-value was borderline significant. Our findings suggest permutation decoy databases of size 1105 are adequate to accurately detect neuropeptides and this can be exploited to increase the speed of the search. The straightforward Monte Carlo permutation testing (comparable to a zero order Markov model) can be easily combined with existing peptide identification software to enable accurate and effective neuropeptide detection. The source code is available at http://stagbeetle.animal.uiuc.edu/pepshop/MSMSpermutationtesting.","hji,kes",0,0,0,2,0,NA,NA +25340248,"Head Injury: Triage, Assessment, Investigation and Early Management of Head Injury in Children, Young People and Adults","For the purposes of this guideline, head injury is defined as any trauma to the head other than superficial injuries to the face. Head injury is the commonest cause of death and disability in people aged 140 years in the UK. Data for head injury are recorded in the Hospital Episode Statistics (http://www.hscic.gov.uk/hes). Each year, 1.4 million people attend emergency departments in England and Wales with a recent head injury. Between 33% and 50% of these are children aged under 15 years. Annually, about 200,000 people are admitted to hospital with head injury. Of these, one-fifth have features suggesting skull fracture or have evidence of brain damage. Most patients recover without specific or specialist intervention, but others experience long-term disability or even die from the effects of complications that could potentially be minimised or avoided with early detection and appropriate treatment. The incidence of death from head injury is low, with as few as 0.2% of all patients attending emergency departments with a head injury dying as a result of this injury. Ninety five per cent of people who have sustained a head injury present with a normal or minimally impaired conscious level (Glasgow Coma Scale [GCS] greater than 12) but the majority of fatal outcomes are in the moderate (GCS 912) or severe (GCS 8 or less) head injury groups, which account for only 5% of attenders. Therefore, emergency departments see a large number of patients with minor or mild head injuries and need to identify the very small number who will go on to have serious acute intracranial complications. It is estimated that 2530% of children aged under 2 years who are hospitalised with head injury have an abusive head injury. This guideline has updated some of the terminology used in relation to safeguarding children and vulnerable adults. The previous head injury guideline produced by NICE in 2003 (NICE clinical guideline 4) and updated in 2007 (NICE clinical guideline 56) resulted in CT scanning replacing skull radiography as the primary imaging modality for assessing head injury. It also led to an increase in the proportion of people with severe head injury having their care managed in specialist centres. This has been associated with a decline in fatality among patients with severe head injury. This update is needed because of the continuing importance of up-to-date evidence-based guidance on the initial assessment and early management of head injury. Appropriate guidance can enable early detection and treatment of life-threatening brain injury, where present, but also early discharge of patients with negligible risk of brain injury. It can therefore save lives while at the same time preventing needless crowding in emergency departments and observation wards. Further key NHS changes have driven the scope of this update. These include the introduction in 2012 of regional trauma networks with major trauma triage tools within NHS England; the extension of indications for anticoagulation therapy; the expanding use of biomarkers to guide emergent clinical management in other conditions, such as chest pain; and the establishment of local safeguarding boards. The last of these addresses the requirement for front-line clinical staff to assess not only the severity of the head injury but also why it occurred.","hji,kes",0,0,0,2,0,NA,clinical +25341068,"Udock, the interactive docking entertainment system.","Protein-protein interactions play a crucial role in biological processes. Protein docking calculations' goal is to predict, given two proteins of known structures, the associate conformation of the corresponding complex. Here, we present a new interactive protein docking system, Udock, that makes use of users' cognitive capabilities added up. In Udock, the users tackle simplified representations of protein structures and explore protein-protein interfaces' conformational space using a gamified interactive docking system with on the fly scoring. We assumed that if given appropriate tools, a nave user's cognitive capabilities could provide relevant data for (1) the prediction of correct interfaces in binary protein complexes and (2) the identification of the experimental partner in interaction among a set of decoys. To explore this approach experimentally, we conducted a preliminary two week long playtest where the registered users could perform a cross-docking on a dataset comprising 4 binary protein complexes. The users explored almost all the surface of the proteins that were available in the dataset but favored certain regions that seemed more attractive as potential docking spots. These favored regions were located inside or nearby the experimental binding interface for 5 out of the 8 proteins in the dataset. For most of them, the best scores were obtained with the experimental partner. The alpha version of Udock is freely accessible at http://udock.fr.","hji,kes",0,0,0,2,0,NA,NA +25344501,iDoComp: a compression scheme for assembled genomes.,"

Motivation

With the release of the latest next-generation sequencing (NGS) machine, the HiSeq X by Illumina, the cost of sequencing a Human has dropped to a mere $4000. Thus we are approaching a milestone in the sequencing history, known as the $1000 genome era, where the sequencing of individuals is affordable, opening the doors to effective personalized medicine. Massive generation of genomic data, including assembled genomes, is expected in the following years. There is crucial need for compression of genomes guaranteed of performing well simultaneously on different species, from simple bacteria to humans, which will ease their transmission, dissemination and analysis. Further, most of the new genomes to be compressed will correspond to individuals of a species from which a reference already exists on the database. Thus, it is natural to propose compression schemes that assume and exploit the availability of such references.

Results

We propose iDoComp, a compressor of assembled genomes presented in FASTA format that compresses an individual genome using a reference genome for both the compression and the decompression. In terms of compression efficiency, iDoComp outperforms previously proposed algorithms in most of the studied cases, with comparable or better running time. For example, we observe compression gains of up to 60% in several cases, including H.sapiens data, when comparing with the best compression performance among the previously proposed algorithms.

Availability

iDoComp is written in C and can be downloaded from: http://www.stanford.edu/~iochoa/iDoComp.html (We also provide a full explanation on how to run the program and an example with all the necessary files to run it.).","hji,kes",0,0,0,2,0,NA,NA +25383185,CyKEGGParser: tailoring KEGG pathways to fit into systems biology analysis workflows.,"The KEGG pathway database is a widely accepted source for biomolecular pathway maps. In this paper we present the CyKEGGParser app ( http://apps.cytoscape.org/apps/cykeggparser) for Cytoscape 3 that allows manipulation with KEGG pathway maps. Along with basic functionalities for pathway retrieval, visualization and export in KGML and BioPAX formats, the app provides unique features for computer-assisted adjustment of inconsistencies in KEGG pathway KGML files and generation of tissue- and protein-protein interaction specific pathways. We demonstrate that using biological context-specific KEGG pathways created with CyKEGGParser makes systems biology analysis more sensitive and appropriate compared to original pathways.","hji,kes",0,0,0,2,0,NA,NA +25414366,Biological Dynamics Markup Language (BDML): an open format for representing quantitative biological dynamics data.,"

Motivation

Recent progress in live-cell imaging and modeling techniques has resulted in generation of a large amount of quantitative data (from experimental measurements and computer simulations) on spatiotemporal dynamics of biological objects such as molecules, cells and organisms. Although many research groups have independently dedicated their efforts to developing software tools for visualizing and analyzing these data, these tools are often not compatible with each other because of different data formats.

Results

We developed an open unified format, Biological Dynamics Markup Language (BDML; current version: 0.2), which provides a basic framework for representing quantitative biological dynamics data for objects ranging from molecules to cells to organisms. BDML is based on Extensible Markup Language (XML). Its advantages are machine and human readability and extensibility. BDML will improve the efficiency of development and evaluation of software tools for data visualization and analysis.

Availability and implementation

A specification and a schema file for BDML are freely available online at http://ssbd.qbic.riken.jp/bdml/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +25417090,Knowledge-Based Personal Health System to empower outpatients of diabetes mellitus by means of P4 Medicine.,"Diabetes Mellitus (DM) affects hundreds of millions of people worldwide and it imposes a large economic burden on healthcare systems. We present a web patient empowering system (PHSP4) that ensures continuous monitoring and assessment of the health state of patients with DM (type I and II). PHSP4 is a Knowledge-Based Personal Health System (PHS) which follows the trend of P4 Medicine (Personalized, Predictive, Preventive, and Participative). It provides messages to outpatients and clinicians about the achievement of objectives, follow-up, and treatments adjusted to the patient condition. Additionally, it calculates a four-component risk vector of the associated pathologies with DM: Nephropathy, Diabetic retinopathy, Diabetic foot, and Cardiovascular event. The core of the system is a Rule-Based System which Knowledge Base is composed by a set of rules implementing the recommendations of the American Diabetes Association (ADA) (American Diabetes Association: http://www.diabetes.org/ ) clinical guideline. The PHSP4 is designed to be standardized and to facilitate its interoperability by means of terminologies (SNOMED-CT [The International Health Terminology Standards Development Organization: http://www.ihtsdo.org/snomed-ct/ ] and UCUM [The Unified Code for Units of Measure: http://unitsofmeasure.org/ ]), standardized clinical documents (HL7 CDA R2 [Health Level Seven International: http://www.hl7.org/index.cfm ]) for managing Electronic Health Record (EHR). We have evaluated the functionality of the system and its users' acceptance of the system using simulated and real data, and a questionnaire based in the Technology Acceptance Model methodology (TAM). Finally results show the reliability of the system and the high acceptance of clinicians.","hji,kes",0,0,0,2,0,NA,NA +25429060,A bio-inspired computing model for ovarian carcinoma classification and oncogene detection.,"

Motivation

Ovarian cancer is the fifth leading cause of cancer deaths in women in the western world for 2013. In ovarian cancer, benign tumors turn malignant, but the point of transition is difficult to predict and diagnose. The 5-year survival rate of all types of ovarian cancer is 44%, but this can be improved to 92% if the cancer is found and treated before it spreads beyond the ovary. However, only 15% of all ovarian cancers are found at this early stage. Therefore, the ability to automatically identify and diagnose ovarian cancer precisely and efficiently as the tissue changes from benign to invasive is important for clinical treatment and for increasing the cure rate. This study proposes a new ovarian carcinoma classification model using two algorithms: a novel discretization of food sources for an artificial bee colony (DfABC), and a support vector machine (SVM). For the first time in the literature, oncogene detection using this method is also investigated.

Results

A novel bio-inspired computing model and hybrid algorithms combining DfABC and SVM was applied to ovarian carcinoma and oncogene classification. This study used the human ovarian cDNA expression database to collect 41 patient samples and 9600 genes in each pathological stage. Feature selection methods were used to detect and extract 15 notable oncogenes. We then used the DfABC-SVM model to examine these 15 oncogenes, dividing them into eight different classifications according to their gene expressions of various pathological stages. The average accuracyof the eight classification experiments was 94.76%. This research also found some oncogenes that had not been discovered or indicated in previous scientific studies. The main contribution of this research is the proof that these newly discovered oncogenes are highly related to ovarian or other cancers.

Availability and implementation

http://mht.mis.nchu.edu.tw/moodle/course/view.php?id=7.","hji,kes",0,0,0,2,0,NA,NA +25434742,GenePainter v. 2.0 resolves the taxonomic distribution of intron positions.,"

Unlabelled

Conserved intron positions in eukaryotic genes can be used to reconstruct phylogenetic trees, to resolve ambiguous subfamily relationships in protein families and to infer the history of gene families. This version of GenePainter facilitates working with large datasets through options to select specific subsets for analysis and visualization, and through providing exhaustive statistics. GenePainter's application in phylogenetic analyses is considerably extended by the newly implemented integration of the exon-intron pattern conservation with phylogenetic trees.

Availability and implementation

The software along with detailed documentation is available at http://www.motorprotein.de/genepainter and as Supplementary Material.

Contact

mako@nmr.mpibpc.mpg.de

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +25435486,"Commentary on """"spectral characterization of the binding and conformational changes of serum albumins upon interaction with an anticancer drug, anastrozole"""".","The manuscript by R. Punith and J. Seetharamappa (http://dx.doi.org/10.1016/j.saa.201202.038) presents the interaction between serum albumin from human (HAS) and from bovine (BSA) with a drug called Anastrozole (AZ). The drug is on the market for treating patients with breast cancer after surgery and for metastasis in women. The study utilizes various spectroscopic techniques such as; fluorescence, synchronous fluorescence, 3D fluorescence measurements, FTIR, CD and UV. Although there are some relatively minor comments on the paper, the main point that needs to be reviewed by the authors is the result of FTIR measurements. Based on the data provided in the text (there is no figure), the protein sample is not in its native state, which makes the data inconvenient to be used in drawing conclusions. Authors are kindly requested to take another look at the FTIR experiments.","hji,kes",0,0,0,2,0,NA,NA +25475896,Proteomic analysis of three Borrelia burgdorferi sensu lato native species and disseminating clones: relevance for Lyme vaccine design.,"Lyme borreliosis is the most important vector-borne disease in the Northern hemisphere. It is caused by Borrelia burgdorferi sensu lato bacteria transmitted to humans by the bite of hard ticks, Ixodes spp. Although antibiotic treatments are efficient in the early stage of the infection, a significant number of patients develop disseminated manifestations (articular, neurological, and cutaneous) due to unnoticed or absence of erythema migrans, or to inappropriate treatment. Vaccine could be an efficient approach to decrease Lyme disease incidence. We have developed a proteomic approach based on a one dimensional gel electrophoresis followed by LC-MS/MS strategy to identify new vaccine candidates. We analyzed a disseminating clone and the associated wild-type strain for each major pathogenic Borrelia species: B. burgdorferi sensu stricto, B. garinii, and B. afzelii. We identified specific proteins and common proteins to the disseminating clones of the three main species. In parallel, we used a spectral counting strategy to identify upregulated proteins common to the clones. Finally, 40 proteins were found that could potentially be involved in bacterial virulence and of interest in the development of a new vaccine. We selected the three proteins specifically detected in the disseminating clones of the three Borrelia species and checked by RT-PCR whether they are expressed in mouse skin upon B. burgdorferi ss inoculation. Interestingly, BB0566 appears as a potential vaccine candidate. All MS data have been deposited in the ProteomeXchange with identifier PXD000876 (http://proteomecentral.proteomexchange.org/dataset/PXD000876).","hji,kes",0,0,0,2,0,NA,data deposited as referenced +25480679,Marky: a tool supporting annotation consistency in multi-user and iterative document annotation projects.,"

Background and objectives

Document annotation is a key task in the development of Text Mining methods and applications. High quality annotated corpora are invaluable, but their preparation requires a considerable amount of resources and time. Although the existing annotation tools offer good user interaction interfaces to domain experts, project management and quality control abilities are still limited. Therefore, the current work introduces Marky, a new Web-based document annotation tool equipped to manage multi-user and iterative projects, and to evaluate annotation quality throughout the project life cycle.

Methods

At the core, Marky is a Web application based on the open source CakePHP framework. User interface relies on HTML5 and CSS3 technologies. Rangy library assists in browser-independent implementation of common DOM range and selection tasks, and Ajax and JQuery technologies are used to enhance user-system interaction.

Results

Marky grants solid management of inter- and intra-annotator work. Most notably, its annotation tracking system supports systematic and on-demand agreement analysis and annotation amendment. Each annotator may work over documents as usual, but all the annotations made are saved by the tracking system and may be further compared. So, the project administrator is able to evaluate annotation consistency among annotators and across rounds of annotation, while annotators are able to reject or amend subsets of annotations made in previous rounds. As a side effect, the tracking system minimises resource and time consumption.

Conclusions

Marky is a novel environment for managing multi-user and iterative document annotation projects. Compared to other tools, Marky offers a similar visually intuitive annotation experience while providing unique means to minimise annotation effort and enforce annotation quality, and therefore corpus consistency. Marky is freely available for non-commercial use at http://sing.ei.uvigo.es/marky.","hji,kes",0,0,0,2,0,NA,NA +25495213,Genotype harmonizer: automatic strand alignment and format conversion for genotype data integration.,"

Background

To gain statistical power or to allow fine mapping, researchers typically want to pool data before meta-analyses or genotype imputation. However, the necessary harmonization of genetic datasets is currently error-prone because of many different file formats and lack of clarity about which genomic strand is used as reference.

Findings

Genotype Harmonizer (GH) is a command-line tool to harmonize genetic datasets by automatically solving issues concerning genomic strand and file format. GH solves the unknown strand issue by aligning ambiguous A/T and G/C SNPs to a specified reference, using linkage disequilibrium patterns without prior knowledge of the used strands. GH supports many common GWAS/NGS genotype formats including PLINK, binary PLINK, VCF, SHAPEIT2 & Oxford GEN. GH is implemented in Java and a large part of the functionality can also be used as Java 'Genotype-IO' API. All software is open source under license LGPLv3 and available from http://www.molgenis.org/systemsgenetics.

Conclusions

GH can be used to harmonize genetic datasets across different file formats and can be easily integrated as a step in routine meta-analysis and imputation pipelines.","hji,kes",0,0,0,2,0,NA,NA +25541969,Scholarly context not found: one in five articles suffers from reference rot.,"The emergence of the web has fundamentally affected most aspects of information communication, including scholarly communication. The immediacy that characterizes publishing information to the web, as well as accessing it, allows for a dramatic increase in the speed of dissemination of scholarly knowledge. But, the transition from a paper-based to a web-based scholarly communication system also poses challenges. In this paper, we focus on reference rot, the combination of link rot and content drift to which references to web resources included in Science, Technology, and Medicine (STM) articles are subject. We investigate the extent to which reference rot impacts the ability to revisit the web context that surrounds STM articles some time after their publication. We do so on the basis of a vast collection of articles from three corpora that span publication years 1997 to 2012. For over one million references to web resources extracted from over 3.5 million articles, we determine whether the HTTP URI is still responsive on the live web and whether web archives contain an archived snapshot representative of the state the referenced resource had at the time it was referenced. We observe that the fraction of articles containing references to web resources is growing steadily over time. We find one out of five STM articles suffering from reference rot, meaning it is impossible to revisit the web context that surrounds them some time after their publication. When only considering STM articles that contain references to web resources, this fraction increases to seven out of ten. We suggest that, in order to safeguard the long-term integrity of the web-based scholarly record, robust solutions to combat the reference rot problem are required. In conclusion, we provide a brief insight into the directions that are explored with this regard in the context of the Hiberlink project.","hji,kes",0,0,0,2,0,NA,NA +25555998,Fiber estimation and tractography in diffusion MRI: development of simulated brain images and comparison of multi-fiber analysis methods at clinical b-values.,"Advances in diffusion-weighted magnetic resonance imaging (DW-MRI) have led to many alternative diffusion sampling strategies and analysis methodologies. A common objective among methods is estimation of white matter fiber orientations within each voxel, as doing so permits in-vivo fiber-tracking and the ability to study brain connectivity and networks. Knowledge of how DW-MRI sampling schemes affect fiber estimation accuracy, tractography and the ability to recover complex white-matter pathways, differences between results due to choice of analysis method, and which method(s) perform optimally for specific data sets, all remain important problems, especially as tractography-based studies become common. In this work, we begin to address these concerns by developing sets of simulated diffusion-weighted brain images which we then use to quantitatively evaluate the performance of six DW-MRI analysis methods in terms of estimated fiber orientation accuracy, false-positive (spurious) and false-negative (missing) fiber rates, and fiber-tracking. The analysis methods studied are: 1) a two-compartment """"ball and stick"""" model (BSM) (Behrens et al., 2003); 2) a non-negativity constrained spherical deconvolution (CSD) approach (Tournier et al., 2007); 3) analytical q-ball imaging (QBI) (Descoteaux et al., 2007); 4) q-ball imaging with Funk-Radon and Cosine Transform (FRACT) (Haldar and Leahy, 2013); 5) q-ball imaging within constant solid angle (CSA) (Aganj et al., 2010); and 6) a generalized Fourier transform approach known as generalized q-sampling imaging (GQI) (Yeh et al., 2010). We investigate these methods using 20, 30, 40, 60, 90 and 120 evenly distributed q-space samples of a single shell, and focus on a signal-to-noise ratio (SNR = 18) and diffusion-weighting (b = 1000 s/mm(2)) common to clinical studies. We found that the BSM and CSD methods consistently yielded the least fiber orientation error and simultaneously greatest detection rate of fibers. Fiber detection rate was found to be the most distinguishing characteristic between the methods, and a significant factor for complete recovery of tractography through complex white-matter pathways. For example, while all methods recovered similar tractography of prominent white matter pathways of limited fiber crossing, CSD (which had the highest fiber detection rate, especially for voxels containing three fibers) recovered the greatest number of fibers and largest fraction of correct tractography for complex three-fiber crossing regions. The synthetic data sets, ground-truth, and tools for quantitative evaluation are publically available on the NITRC website as the project """"Simulated DW-MRI Brain Data Sets for Quantitative Evaluation of Estimated Fiber Orientations"""" at http://www.nitrc.org/projects/sim_dwi_brain.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +25561511,Genomic classifier ColoPrint predicts recurrence in stage II colorectal cancer patients more accurately than clinical factors.,"

Background

Approximately 20% of patients with stage II colorectal cancer will experience a relapse. Current clinical-pathologic stratification factors do not allow clear identification of these high-risk patients. ColoPrint (Agendia, Amsterdam, The Netherlands, http://www.agendia.com) is a gene expression classifier that distinguishes patients with low or high risk of disease relapse.

Methods

ColoPrint was developed using whole-genome expression data and validated in several independent validation cohorts. Stage II patients from these studies were pooled (n = 416), and ColoPrint was compared with clinical risk factors described in the National Comprehensive Cancer Network (NCCN) 2013 Guidelines for Colon Cancer. Median follow-up was 81 months. Most patients (70%) did not receive adjuvant chemotherapy. Risk of relapse (ROR) was defined as survival until first event of recurrence or death from cancer.

Results

In the pooled stage II data set, ColoPrint identified 63% of patients as low risk with a 5-year ROR of 10%, whereas high-risk patients (37%) had a 5-year ROR of 21%, with a hazard ratio (HR) of 2.16 (p = .004). This remained significant in a multivariate model that included number of lymph nodes retrieved and microsatellite instability. In the T3 microsatellite-stable subgroup (n = 301), ColoPrint classified 59% of patients as low risk with a 5-year ROR of 9.9%. High-risk patients (31%) had a 22.4% ROR (HR: 2.41; p = .005). In contrast, the NCCN clinical high-risk factors were unable to distinguish high- and low-risk patients (15% vs. 13% ROR; p = .55).

Conclusion

ColoPrint significantly improved prognostic accuracy independent of microsatellite status or clinical variables, facilitating the identification of patients at higher risk who might be considered for additional treatment.","hji,kes",0,0,0,2,0,NA,NA +25638814,Factor graph analysis of live cell-imaging data reveals mechanisms of cell fate decisions.,"

Motivation

Cell fate decisions have a strong stochastic component. The identification of the underlying mechanisms therefore requires a rigorous statistical analysis of large ensembles of single cells that were tracked and phenotyped over time.

Results

We introduce a probabilistic framework for testing elementary hypotheses on dynamic cell behavior using time-lapse cell-imaging data. Factor graphs, probabilistic graphical models, are used to properly account for cell lineage and cell phenotype information. Our model is applied to time-lapse movies of murine granulocyte-macrophage progenitor (GMP) cells. It decides between competing hypotheses on the mechanisms of their differentiation. Our results theoretically substantiate previous experimental observations that lineage instruction, not selection is the cause for the differentiation of GMP cells into mature monocytes or neutrophil granulocytes.

Availability and implementation

The Matlab source code is available at http://treschgroup.de/Genealogies.html.","hji,kes",0,0,0,2,0,NA,NA +25643357,Figure-associated text summarization and evaluation.,"Biomedical literature incorporates millions of figures, which are a rich and important knowledge resource for biomedical researchers. Scientists need access to the figures and the knowledge they represent in order to validate research findings and to generate new hypotheses. By themselves, these figures are nearly always incomprehensible to both humans and machines and their associated texts are therefore essential for full comprehension. The associated text of a figure, however, is scattered throughout its full-text article and contains redundant information content. In this paper, we report the continued development and evaluation of several figure summarization systems, the FigSum+ systems, that automatically identify associated texts, remove redundant information, and generate a text summary for every figure in an article. Using a set of 94 annotated figures selected from 19 different journals, we conducted an intrinsic evaluation of FigSum+. We evaluate the performance by precision, recall, F1, and ROUGE scores. The best FigSum+ system is based on an unsupervised method, achieving F1 score of 0.66 and ROUGE-1 score of 0.97. The annotated data is available at figshare.com (http://figshare.com/articles/Figure_Associated_Text_Summarization_and_Evaluation/858903).","hji,kes",0,0,0,2,0,NA,NA +25687422,Mega2: validated data-reformatting for linkage and association analyses.,"BACKGROUND:In a typical study of the genetics of a complex human disease, many different analysis programs are used, to test for linkage and association. This requires extensive and careful data reformatting, as many of these analysis programs use differing input formats. Writing scripts to facilitate this can be tedious, time-consuming, and error-prone. To address these issues, the open source Mega2 data reformatting program provides validated and tested data conversions from several commonly-used input formats to many output formats. RESULTS:Mega2, the Manipulation Environment for Genetic Analysis, facilitates the creation of analysis-ready datasets from data gathered as part of a genetic study. It transparently allows users to process genetic data for family-based or case/control studies accurately and efficiently. In addition to data validation checks, Mega2 provides analysis setup capabilities for a broad choice of commonly-used genetic analysis programs. First released in 2000, Mega2 has recently been significantly improved in a number of ways. We have rewritten it in C++ and have reduced its memory requirements. Mega2 now can read input files in LINKAGE, PLINK, and VCF/BCF formats, as well as its own specialized annotated format. It supports conversion to many commonly-used formats including SOLAR, PLINK, Merlin, Mendel, SimWalk2, Cranefoot, IQLS, FBAT, MORGAN, BEAGLE, Eigenstrat, Structure, and PLINK/SEQ. When controlled by a batch file, Mega2 can be used non-interactively in data reformatting pipelines. Support for genetic data from several other species besides humans has been added. CONCLUSIONS:By providing tested and validated data reformatting, Mega2 facilitates more accurate and extensive analyses of genetic data, avoiding the need to write, debug, and maintain one's own custom data reformatting scripts. Mega2 is freely available at https://watson.hgen.pitt.edu/register/.","hji,kes",0,0,0,2,0,NA,NA +25725090,SimSeq: a nonparametric approach to simulation of RNA-sequence datasets.,"

Motivation

RNA sequencing analysis methods are often derived by relying on hypothetical parametric models for read counts that are not likely to be precisely satisfied in practice. Methods are often tested by analyzing data that have been simulated according to the assumed model. This testing strategy can result in an overly optimistic view of the performance of an RNA-seq analysis method.

Results

We develop a data-based simulation algorithm for RNA-seq data. The vector of read counts simulated for a given experimental unit has a joint distribution that closely matches the distribution of a source RNA-seq dataset provided by the user. We conduct simulation experiments based on the negative binomial distribution and our proposed nonparametric simulation algorithm. We compare performance between the two simulation experiments over a small subset of statistical methods for RNA-seq analysis available in the literature. We use as a benchmark the ability of a method to control the false discovery rate. Not surprisingly, methods based on parametric modeling assumptions seem to perform better with respect to false discovery rate control when data are simulated from parametric models rather than using our more realistic nonparametric simulation strategy.

Availability and implementation

The nonparametric simulation algorithm developed in this article is implemented in the R package SimSeq, which is freely available under the GNU General Public License (version 2 or later) from the Comprehensive R Archive Network (http://cran.rproject.org/).

Contact

sgbenidt@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +25725497,IVA: accurate de novo assembly of RNA virus genomes.,"

Motivation

An accurate genome assembly from short read sequencing data is critical for downstream analysis, for example allowing investigation of variants within a sequenced population. However, assembling sequencing data from virus samples, especially RNA viruses, into a genome sequence is challenging due to the combination of viral population diversity and extremely uneven read depth caused by amplification bias in the inevitable reverse transcription and polymerase chain reaction amplification process of current methods.

Results

We developed a new de novo assembler called IVA (Iterative Virus Assembler) designed specifically for read pairs sequenced at highly variable depth from RNA virus samples. We tested IVA on datasets from 140 sequenced samples from human immunodeficiency virus-1 or influenza-virus-infected people and demonstrated that IVA outperforms all other virus de novo assemblers.

Availability and implementation

The software runs under Linux, has the GPLv3 licence and is freely available from http://sanger-pathogens.github.io/iva","hji,kes",0,0,0,2,0,NA,NA +25765651,Analysis of strand-specific RNA-seq data using machine learning reveals the structures of transcription units in Clostridium thermocellum.,"Identification of transcription units (TUs) encoded in a bacterial genome is essential to elucidation of transcriptional regulation of the organism. To gain a detailed understanding of the dynamically composed TU structures, we have used four strand-specific RNA-seq (ssRNA-seq) datasets collected under two experimental conditions to derive the genomic TU organization of Clostridium thermocellum using a machine-learning approach. Our method accurately predicted the genomic boundaries of individual TUs based on two sets of parameters measuring the RNA-seq expression patterns across the genome: expression-level continuity and variance. A total of 2590 distinct TUs are predicted based on the four RNA-seq datasets. Among the predicted TUs, 44% have multiple genes. We assessed our prediction method on an independent set of RNA-seq data with longer reads. The evaluation confirmed the high quality of the predicted TUs. Functional enrichment analyses on a selected subset of the predicted TUs revealed interesting biology. To demonstrate the generality of the prediction method, we have also applied the method to RNA-seq data collected on Escherichia coli and achieved high prediction accuracies. The TU prediction program named SeqTU is publicly available at https://code.google.com/p/seqtu/. We expect that the predicted TUs can serve as the baseline information for studying transcriptional and post-transcriptional regulation in C. thermocellum and other bacteria.","hji,kes",0,0,0,2,0,NA,NA +25767303,Global differential expression of genes located in the Down Syndrome Critical Region in normal human brain.,"

Background

The information of gene expression obtained from databases, have made possible the extraction and analysis of data related with several molecular processes involving not only in brain homeostasis but its disruption in some neuropathologies; principally in Down syndrome and the Alzheimer disease.

Objective

To correlate the levels of transcription of 19 genes located in the Down Syndrome Critical Region (DSCR) with their expression in several substructures of normal human brain.

Methods

There were obtained expression profiles of 19 DSCR genes in 42 brain substructures, from gene expression values available at the database of the human brain of the Brain Atlas of the Allen Institute for Brain Sciences"""", (http://human.brain-map.org/). The co-expression patterns of DSCR genes in brain were calculated by using multivariate statistical methods.

Results

Highest levels of gene expression were registered at caudate nucleus, nucleus accumbens and putamen among central areas of cerebral cortex. Increased expression levels of RCAN1 that encode by a protein involved in signal transduction process of the CNS were recorded for PCP4 that participates in the binding to calmodulin and TTC3; a protein that is associated with differentiation of neurons. That previously identified brain structures play a crucial role in the learning process, in different class of memory and in motor skills.

Conclusion

The precise regulation of DSCR gene expression is crucial to maintain the brain homeostasis, especially in those areas with high levels of gene expression associated with a remarkable process of learning and cognition.","hji,kes",0,0,0,2,0,NA,not about the resource +25805426,Real-Time Motion Capture Toolbox (RTMocap): an open-source code for recording 3-D motion kinematics to study action-effect anticipations during motor and social interactions.,"We present here a toolbox for the real-time motion capture of biological movements that runs in the cross-platform MATLAB environment (The MathWorks, Inc., Natick, MA). It provides instantaneous processing of the 3-D movement coordinates of up to 20 markers at a single instant. Available functions include (1) the setting of reference positions, areas, and trajectories of interest; (2) recording of the 3-D coordinates for each marker over the trial duration; and (3) the detection of events to use as triggers for external reinforcers (e.g., lights, sounds, or odors). Through fast online communication between the hardware controller and RTMocap, automatic trial selection is possible by means of either a preset or an adaptive criterion. Rapid preprocessing of signals is also provided, which includes artifact rejection, filtering, spline interpolation, and averaging. A key example is detailed, and three typical variations are developed (1) to provide a clear understanding of the importance of real-time control for 3-D motion in cognitive sciences and (2) to present users with simple lines of code that can be used as starting points for customizing experiments using the simple MATLAB syntax. RTMocap is freely available (http://sites.google.com/site/RTMocap/) under the GNU public license for noncommercial use and open-source development, together with sample data and extensive documentation.","hji,kes",0,0,0,2,0,NA,NA +25833981,Community-based management of severe acute malnutrition in India: new evidence from Bihar.,"

Background

An estimated one-third of the world's children who are wasted live in India. In Bihar state, of children <5 y old, 27.1% are wasted and 8.3% have severe acute malnutrition (SAM). In 2009, Mdecins Sans Frontires (MSF) initiated a community-based management of acute malnutrition (CMAM) program for children aged 6-59 mo with SAM.

Objective

In this report, we describe the characteristics and outcomes of 8274 children treated between February 2009 and September 2011.

Design

Between February 2009 and June 2010, the program admitted children with a weight-for-height z score (WHZ) <-3 SD and/or midupper arm circumference (MUAC) <110 mm and discharged those who reached a WHZ >-2 SDs and MUAC >110 mm. These variables changed in July 2010 to admission on the basis of an MUAC <115 mm and discharge at an MUAC =120 mm. Uncomplicated SAM cases were treated as outpatients in the community by using a WHO-standard, ready-to-use, therapeutic lipid-based paste produced in India; complicated cases were treated as inpatients by using F75/F100 WHO-standard milk until they could complete treatment in the community.

Results

A total of 8274 children were admitted including 5149 girls (62.2%), 6613 children aged 6-23 mo (79.9%), and 87.3% children who belonged to Scheduled Caste, Scheduled Tribe, or Other Backward Caste families or households. Of 3873 children admitted under the old criteria, 41 children (1.1%) died, 2069 children (53.4%) were discharged as cured, and 1485 children (38.3%) defaulted. Of 4401 children admitted under the new criteria, 36 children (0.8%) died, 2526 children (57.4%) were discharged as cured, and 1591 children (36.2%) defaulted. For children discharged as cured, the mean (SD) weight gain and length of stay were 4.7 3.1 and 5.1 3.7 g kg(-1) d(-1) and 8.7 6.1 and 7.3 5.6 wk under the old and new criteria, respectively (P < 0.01). After adjustment, significant risk factors for default were as follows: no community referral for admission, more severe wasting on admission, younger age, and a long commute for treatment.

Conclusions

To our knowledge, this is the first conventional CMAM program in India and has achieved low mortality and high cure rates in nondefaulting children. The new admission criteria lower the threshold for severity with the result that more children are included who are at lower risk of death and have a smaller WHZ deficit to correct than do children identified by the old criteria. This study was registered as a retrospective observational analysis of routine program data at http://www.isrctn.com as ISRCTN13980582.","hji,kes",0,0,0,2,0,NA,NA +25837579,Maximum-Likelihood Phylogenetic Inference with Selection on Protein Folding Stability.,"Despite intense work, incorporating constraints on protein native structures into the mathematical models of molecular evolution remains difficult, because most models and programs assume that protein sites evolve independently, whereas protein stability is maintained by interactions between sites. Here, we address this problem by developing a new mean-field substitution model that generates independent site-specific amino acid distributions with constraints on the stability of the native state against both unfolding and misfolding. The model depends on a background distribution of amino acids and one selection parameter that we fix maximizing the likelihood of the observed protein sequence. The analytic solution of the model shows that the main determinant of the site-specific distributions is the number of native contacts of the site and that the most variable sites are those with an intermediate number of native contacts. The mean-field models obtained, taking into account misfolded conformations, yield larger likelihood than models that only consider the native state, because their average hydrophobicity is more realistic, and they produce on the average stable sequences for most proteins. We evaluated the mean-field model with respect to empirical substitution models on 12 test data sets of different protein families. In all cases, the observed site-specific sequence profiles presented smaller Kullback-Leibler divergence from the mean-field distributions than from the empirical substitution model. Next, we obtained substitution rates combining the mean-field frequencies with an empirical substitution model. The resulting mean-field substitution model assigns larger likelihood than the empirical model to all studied families when we consider sequences with identity larger than 0.35, plausibly a condition that enforces conservation of the native structure across the family. We found that the mean-field model performs better than other structurally constrained models with similar or higher complexity. With respect to the much more complex model recently developed by Bordner and Mittelmann, which takes into account pairwise terms in the amino acid distributions and also optimizes the exchangeability matrix, our model performed worse for data with small sequence divergence but better for data with larger sequence divergence. The mean-field model has been implemented into the computer program Prot_Evol that is freely available at http://ub.cbm.uam.es/software/Prot_Evol.php.","hji,kes",0,0,0,2,0,NA,NA +25840970,Global Transcriptional Changes Following Statin Treatment in Breast Cancer.,"

Background

Statins purportedly exert antitumoral effects, but the underlying mechanisms are currently not fully elucidated. The aim of this study was to explore potential statin-induced effects on global gene expression profiles in primary breast cancer.

Experimental design

This window-of-opportunity phase II trial enrolled 50 newly diagnosed breast cancer patients prescribed atorvastatin (80 mg/day) for 2 weeks presurgically. Pre- and posttreatment tumor samples were analyzed using Significance Analysis of Microarrays (SAM) to identify differentially expressed genes. Similarly, SAM and gene ontology analyses were applied to gene expression data derived from atorvastatin-treated breast cancer cell lines (MCF7, BT474, SKBR3, and MDAMB231) comparing treated and untreated cells. The Systematic Motif Analysis Retrieval Tool (SMART) was used to identify enriched transcription factor-binding sites. Literature Vector Analysis (LitVAn) identified gene module functionality, and pathway analysis was performed using GeneGo Pathways Software (MetaCore; https://portal.genego.com/).

Results

Comparative analysis of gene expression profiles in paired clinical samples revealed 407 significantly differentially expressed genes (FDR = 0); 32 upregulated and 375 downregulated genes. Restricted filtration (fold change =1.49) resulted in 21 upregulated and 46 downregulated genes. Significantly upregulated genes included DUSP1, RHOB1, GADD45B, and RGS1. Pooled results from gene ontology, LitVAn and SMART analyses identified statin-induced effects on the apoptotic and MAPK pathways among others. Comparative analyses of gene expression profiles in breast cancer cell lines showed significant upregulation of the mevalonate and proapoptotic pathways following atorvastatin treatment.

Conclusions

We report potential statin-induced changes in global tumor gene expression profiles, indicating MAPK pathway inhibition and proapoptotic events.","hji,kes",0,0,0,2,0,NA,references other data resource +25855375,Alterations of Functional Connectivity Among Resting-State Networks in Hypothyroidism.,"Hypothyroidism affects brain functioning as suggested by various neuroimaging studies. The primary focus of the present study was to examine whether hypothyroidism would impact connectivity among resting-state networks (RSNs) using resting-state functional magnetic resonance imaging (rsfMRI). Twenty-two patients with hypothyroidism and 22 healthy controls were recruited and scanned using rsfMRI. The data were analysed using independent component analysis and a dual regression approach that was applied on five RSNs that were identified using fsl software (http://fsl.fmrib.ox.ac.uk). Hypothyroid patients showed significantly decreased functional connectivity in the regions of the right frontoparietal network (frontal pole), the medial visual network (lateral occipital gyrus, precuneus cortex and cuneus) and the motor network (precentral gyrus, postcentral gyrus, precuneus cortex, paracingulate gyrus, cingulate gyrus and supramarginal gyrus) compared to healthy controls. The reduced functional connectivity in the right frontoparietal network, the medial visual network and the motor network suggests neurocognitive alterations in hypothyroid patients in the corresponding functions. However, the study would be further continued to investigate the effects of thyroxine treatment and correlation with neurocognitive scores. The findings of the present study provide further interesting insights into our understanding of the action of thyroid hormone on the adult human brain.","hji,kes",0,0,0,2,0,NA,NA +25855811,SANSparallel: interactive homology search against Uniprot.,"Proteins evolve by mutations and natural selection. The network of sequence similarities is a rich source for mining homologous relationships that inform on protein structure and function. There are many servers available to browse the network of homology relationships but one has to wait up to a minute for results. The SANSparallel webserver provides protein sequence database searches with immediate response and professional alignment visualization by third-party software. The output is a list, pairwise alignment or stacked alignment of sequence-similar proteins from Uniprot, UniRef90/50, Swissprot or Protein Data Bank. The stacked alignments are viewed in Jalview or as sequence logos. The database search uses the suffix array neighborhood search (SANS) method, which has been re-implemented as a client-server, improved and parallelized. The method is extremely fast and as sensitive as BLAST above 50% sequence identity. Benchmarks show that the method is highly competitive compared to previously published fast database search programs: UBLAST, DIAMOND, LAST, LAMBDA, RAPSEARCH2 and BLAT. The web server can be accessed interactively or programmatically at http://ekhidna2.biocenter.helsinki.fi/cgi-bin/sans/sans.cgi. It can be used to make protein functional annotation pipelines more efficient, and it is useful in interactive exploration of the detailed evidence supporting the annotation of particular proteins of interest.","hji,kes",0,0,0,2,0,NA,NA +25861968,MetaSV: an accurate and integrative structural-variant caller for next generation sequencing.,"

Unlabelled

Structural variations (SVs) are large genomic rearrangements that vary significantly in size, making them challenging to detect with the relatively short reads from next-generation sequencing (NGS). Different SV detection methods have been developed; however, each is limited to specific kinds of SVs with varying accuracy and resolution. Previous works have attempted to combine different methods, but they still suffer from poor accuracy particularly for insertions. We propose MetaSV, an integrated SV caller which leverages multiple orthogonal SV signals for high accuracy and resolution. MetaSV proceeds by merging SVs from multiple tools for all types of SVs. It also analyzes soft-clipped reads from alignment to detect insertions accurately since existing tools underestimate insertion SVs. Local assembly in combination with dynamic programming is used to improve breakpoint resolution. Paired-end and coverage information is used to predict SV genotypes. Using simulation and experimental data, we demonstrate the effectiveness of MetaSV across various SV types and sizes.

Availability and implementation

Code in Python is at http://bioinform.github.io/metasv/.

Contact

rd@bina.com

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +25865072,Radiopacity for Contemporary Luting Cements Using Digital Radiography under Various Exposure Conditions.,"

Purpose

This study examined the radiopacity of contemporary luting cements using direct digital radiography under a range of exposure conditions.

Materials and methods

Disc specimens (N = 80, n = 10 per group, 5 mm 1 mm) were prepared from 8 resin-based luting cements (BisCem Clearfil SA Luting, Duolink, Maxcem Elite Multilink Speed, Panavia F 2.0, RelyX Unicem Clicker, V-link). The specimens were radiographed using a charge-coupled device sensor along with an 11-step aluminum step wedge (1.5-mm incremental steps) and 1-mm-thick tooth cut using five tube voltage/exposure time setups (60 kVp, 0.10/0.08 seconds; 70 kVp, 0.10/0.08/0.06 seconds) at 4 mA and 30 cm. The radiopacity of the specimens was compared with that of the aluminum step wedge and human enamel and dentin using NIH ImageJ software (available at http://rsb.info.nih.gov/ij/). A linear regression model for the aluminum step wedge was constructed, and the data were analyzed by ANOVA and Duncan post hoc test.

Results

Maxcem Elite (5.142 to 5.441) showed the highest radiopacity of all materials, followed in order by Multilink Speed (3.731 to 3.396) and V-link (2.763 to 3.103). The radiopacity of Panavia F 2.0 (2.025 to 2.429), BisCem (1.825 to 2.218), Clearfil SA Luting (1.692 to 2.145), Duolink (1.707 to 1.993), and RelyX Unicem Clicker (1.586 to 1.979) were between enamel (2.117 to 2.330) and dentin (1.302 to 1.685). The radiopacity of 70 kVp conditions was higher than that of the 60 kVp conditions.

Conclusions

The radiopacities of the tested luting materials were greater than those of dentin or aluminum, satisfying the criteria of the International Organization for Standardization, and they differed significantly from each other in the exposure setups.","hji,kes",0,0,0,2,0,NA,NA +25871442,Monitoring matrix metalloproteinase activity at the epidermal-dermal interface by SILAC-iTRAQ-TAILS.,"Secreted proteases act on interstitial tissue secretomes released from multiple cell types. Thus, substrate proteins might be part of higher molecular complexes constituted by many proteins with diverse and potentially unknown cellular origin. In cell culture, these may be reconstituted by mixing native secretomes from different cell types prior to incubation with a test protease. Although current degradomics techniques could identify novel substrate proteins in these complexes, all information on the cellular origin is lost. To address this limitation, we combined iTRAQ-based terminal amine isotopic labeling of substrates (iTRAQ-TAILS) with SILAC to assign proteins to a specific cell type by MS1- and their cleavage by MS2-based quantification in the same experiment. We demonstrate the power of our newly established workflow by monitoring matrix metalloproteinase (MMP) 10 dependent cleavages in mixtures from light-labeled keratinocyte and heavy-labeled fibroblast secretomes. This analysis correctly assigned extracellular matrix components, such as laminins and collagens, to their respective cellular origins and revealed their processing in an MMP10-dependent manner. Hence, our newly devised degradomics workflow facilitates deeper insight into protease activity in complex intercellular compartments such as the epidermal-dermal interface by integrating multiple modes of quantification with positional proteomics. All MS data have been deposited in the ProteomeXchange with identifier PXD001643 (http://proteomecentral.proteomexchange.org/dataset/PXD001643).","hji,kes",0,0,0,2,0,NA,data deposited as referenced +25882789,ReMo-SNPs: a new software tool for identification of polymorphisms in regions and motifs genome-wide.,"Studies of complex genetic diseases have revealed many risk factors of small effect, but the combined amount of heritability explained is still low. Genome-wide association studies are often underpowered to identify true effects because of the very large number of parallel tests. There is, therefore, a great need to generate data sets that are enriched for those markers that have an increased a priori chance of being functional, such as markers in genomic regions involved in gene regulation. ReMo-SNPs is a computational program developed to aid researchers in the process of selecting functional SNPs for association analyses in user-specified regions and/or motifs genome-wide. The useful feature of automatic selection of genotyped markers in the user-provided material makes the output data ready to be used in a following association study. In this article we describe the program and its functions. We also validate the program by including an example study on three different transcription factors and results from an association study on two psychiatric phenotypes. The flexibility of the ReMo-SNPs program enables the user to study any region or sequence of interest, without limitation to transcription factor binding regions and motifs. The program is freely available at: http://www.neuro.ki.se/ReMo-SNPs/.","hji,kes",0,0,0,2,0,NA,NA +25886978,Accurate prediction of RNA nucleotide interactions with backbone k-tree model.,"

Motivation

Given the importance of non-coding RNAs to cellular regulatory functions, it would be highly desirable to have accurate computational prediction of RNA 3D structure, a task which remains challenging. Even for a short RNA sequence, the space of tertiary conformations is immense; existing methods to identify native-like conformations mostly resort to random sampling of conformations to achieve computational feasibility. However, native conformations may not be examined and prediction accuracy may be compromised due to sampling. State-of-the-art methods have yet to deliver satisfactory predictions for RNAs of length beyond 50 nucleotides.

Results

This paper presents a method to tackle a key step in the RNA 3D structure prediction problem, the prediction of the nucleotide interactions that constitute the desired 3D structure. The research is based on a novel graph model, called a backbone k-tree, to tightly constrain the nucleotide interaction relationships considered for RNA 3D structures. It is shown that the new model makes it possible to efficiently predict the optimal set of nucleotide interactions (including the non-canonical interactions in all recently revealed families) from the query sequence along with known or predicted canonical basepairs. The preliminary results indicate that in most cases the new method can predict with a high accuracy the nucleotide interactions that constitute the 3D structure of the query sequence. It thus provides a useful tool for the accurate prediction of RNA 3D structure.

Availability and implementation

The source package for BkTree is available at http://rna-informatics.uga.edu/index.php?f=software&p=BkTree.

Contact

lding@uga.edu or cai@cs.uga.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +25887233,Kernel approaches for differential expression analysis of mass spectrometry-based metabolomics data.,"

Background

Data generated from metabolomics experiments are different from other types of """"-omics"""" data. For example, a common phenomenon in mass spectrometry (MS)-based metabolomics data is that the data matrix frequently contains missing values, which complicates some quantitative analyses. One way to tackle this problem is to treat them as absent. Hence there are two types of information that are available in metabolomics data: presence/absence of a metabolite and a quantitative value of the abundance level of a metabolite if it is present. Combining these two layers of information poses challenges to the application of traditional statistical approaches in differential expression analysis.

Results

In this article, we propose a novel kernel-based score test for the metabolomics differential expression analysis. In order to simultaneously capture both the continuous pattern and discrete pattern in metabolomics data, two new kinds of kernels are designed. One is the distance-based kernel and the other is the stratified kernel. While we initially describe the procedures in the case of single-metabolite analysis, we extend the methods to handle metabolite sets as well.

Conclusions

Evaluation based on both simulated data and real data from a liver cancer metabolomics study indicates that our kernel method has a better performance than some existing alternatives. An implementation of the proposed kernel method in the R statistical computing environment is available at http://works.bepress.com/debashis_ghosh/60/ .","hji,kes",0,0,0,2,0,NA,NA +25925569,NGL Viewer: a web application for molecular visualization.,"The NGL Viewer (http://proteinformatics.charite.de/ngl) is a web application for the visualization of macromolecular structures. By fully adopting capabilities of modern web browsers, such as WebGL, for molecular graphics, the viewer can interactively display large molecular complexes and is also unaffected by the retirement of third-party plug-ins like Flash and Java Applets. Generally, the web application offers comprehensive molecular visualization through a graphical user interface so that life scientists can easily access and profit from available structural data. It supports common structural file-formats (e.g. PDB, mmCIF) and a variety of molecular representations (e.g. 'cartoon, spacefill, licorice'). Moreover, the viewer can be embedded in other web sites to provide specialized visualizations of entries in structural databases or results of structure-related calculations.","hji,kes",0,0,0,2,0,NA,NA +25931517,RVD2: an ultra-sensitive variant detection model for low-depth heterogeneous next-generation sequencing data.,"

Motivation

Next-generation sequencing technology is increasingly being used for clinical diagnostic tests. Clinical samples are often genomically heterogeneous due to low sample purity or the presence of genetic subpopulations. Therefore, a variant calling algorithm for calling low-frequency polymorphisms in heterogeneous samples is needed.

Results

We present a novel variant calling algorithm that uses a hierarchical Bayesian model to estimate allele frequency and call variants in heterogeneous samples. We show that our algorithm improves upon current classifiers and has higher sensitivity and specificity over a wide range of median read depth and minor allele fraction. We apply our model and identify 15 mutated loci in the PAXP1 gene in a matched clinical breast ductal carcinoma tumor sample; two of which are likely loss-of-heterozygosity events.

Availability and implementation

http://genomics.wpi.edu/rvd2/.

Contact

pjflaherty@wpi.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +25952609,Heterozygous genome assembly via binary classification of homologous sequence.,"

Background

Genome assemblers to date have predominantly targeted haploid reference reconstruction from homozygous data. When applied to diploid genome assembly, these assemblers perform poorly, owing to the violation of assumptions during both the contigging and scaffolding phases. Effective tools to overcome these problems are in growing demand. Increasing parameter stringency during contigging is an effective solution to obtaining haplotype-specific contigs; however, effective algorithms for scaffolding such contigs are lacking.

Methods

We present a stand-alone scaffolding algorithm, ScaffoldScaffolder, designed specifically for scaffolding diploid genomes. The algorithm identifies homologous sequences as found in """"bubble"""" structures in scaffold graphs. Machine learning classification is used to then classify sequences in partial bubbles as homologous or non-homologous sequences prior to reconstructing haplotype-specific scaffolds. We define four new metrics for assessing diploid scaffolding accuracy: contig sequencing depth, contig homogeneity, phase group homogeneity, and heterogeneity between phase groups.

Results

We demonstrate the viability of using bubbles to identify heterozygous homologous contigs, which we term homolotigs. We show that machine learning classification trained on these homolotig pairs can be used effectively for identifying homologous sequences elsewhere in the data with high precision (assuming error-free reads).

Conclusion

More work is required to comparatively analyze this approach on real data with various parameters and classifiers against other diploid genome assembly methods. However, the initial results of ScaffoldScaffolder supply validity to the idea of employing machine learning in the difficult task of diploid genome assembly. Software is available at http://bioresearch.byu.edu/scaffoldscaffolder.","hji,kes",0,0,0,2,0,NA,NA +25966491,Social Science Collaboration with Environmental Health.,"

Background

Social science research has been central in documenting and analyzing community discovery of environmental exposure and consequential processes. Collaboration with environmental health science through team projects has advanced and improved our understanding of environmental health and justice.

Objective

We sought to identify diverse methods and topics in which social scientists have expanded environmental health understandings at multiple levels, to examine how transdisciplinary environmental health research fosters better science, and to learn how these partnerships have been able to flourish because of the support from National Institute of Environmental Health Sciences (NIEHS).

Methods

We analyzed various types of social science research to investigate how social science contributes to environmental health. We also examined NIEHS programs that foster social science. In addition, we developed a case study of a community-based participation research project in Akwesasne in order to demonstrate how social science has enhanced environmental health science.

Results

Social science has informed environmental health science through ethnographic studies of contaminated communities, analysis of spatial distribution of environmental injustice, psychological experience of contamination, social construction of risk and risk perception, and social impacts of disasters. Social science-environmental health team science has altered the way scientists traditionally explore exposure by pressing for cumulative exposure approaches and providing research data for policy applications.

Conclusions

A transdisciplinary approach for environmental health practice has emerged that engages the social sciences to paint a full picture of the consequences of contamination so that policy makers, regulators, public health officials, and other stakeholders can better ameliorate impacts and prevent future exposure.

Citation

Hoover E, Renauld M, Edelstein MR, Brown P. 2015. Social science collaboration with environmental health. Environ Health Perspect 123:1100-1106; http://dx.doi.org/10.1289/ehp.1409283.","hji,kes",0,0,0,2,0,NA,NA +25987413,A Scalable Approach for Protein False Discovery Rate Estimation in Large Proteomic Data Sets.,"Calculating the number of confidently identified proteins and estimating false discovery rate (FDR) is a challenge when analyzing very large proteomic data sets such as entire human proteomes. Biological and technical heterogeneity in proteomic experiments further add to the challenge and there are strong differences in opinion regarding the conceptual validity of a protein FDR and no consensus regarding the methodology for protein FDR determination. There are also limitations inherent to the widely used classic target-decoy strategy that particularly show when analyzing very large data sets and that lead to a strong over-representation of decoy identifications. In this study, we investigated the merits of the classic, as well as a novel target-decoy-based protein FDR estimation approach, taking advantage of a heterogeneous data collection comprised of ~19,000 LC-MS/MS runs deposited in ProteomicsDB (https://www.proteomicsdb.org). The """"picked"""" protein FDR approach treats target and decoy sequences of the same protein as a pair rather than as individual entities and chooses either the target or the decoy sequence depending on which receives the highest score. We investigated the performance of this approach in combination with q-value based peptide scoring to normalize sample-, instrument-, and search engine-specific differences. The """"picked"""" target-decoy strategy performed best when protein scoring was based on the best peptide q-value for each protein yielding a stable number of true positive protein identifications over a wide range of q-value thresholds. We show that this simple and unbiased strategy eliminates a conceptual issue in the commonly used """"classic"""" protein FDR approach that causes overprediction of false-positive protein identification in large data sets. The approach scales from small to very large data sets without losing performance, consistently increases the number of true-positive protein identifications and is readily implemented in proteomics analysis software.","hji,kes",0,0,0,2,0,NA,NA +25990735,"MyProteinNet: build up-to-date protein interaction networks for organisms, tissues and user-defined contexts.","The identification of the molecular pathways active in specific contexts, such as disease states or drug responses, often requires an extensive view of the potential interactions between a subset of proteins. This view is not easily obtained: it requires the integration of context-specific protein list or expression data with up-to-date data of protein interactions that are typically spread across multiple databases. The MyProteinNet web server allows users to easily create such context-sensitive protein interaction networks. Users can automatically gather and consolidate data from up to 11 different databases to create a generic protein interaction network (interactome). They can score the interactions based on reliability and filter them by user-defined contexts including molecular expression and protein annotation. The output of MyProteinNet includes the generic and filtered interactome files, together with a summary of their network attributes. MyProteinNet is particularly geared toward building human tissue interactomes, by maintaining tissue expression profiles from multiple resources. The ability of MyProteinNet to facilitate the construction of up-to-date, context-specific interactomes and its applicability to 11 different organisms and to tens of human tissues, make it a powerful tool in meaningful analysis of protein networks. MyProteinNet is available at http://netbio.bgu.ac.il/myproteinnet.","hji,kes",0,0,0,2,0,NA,intergration but data not uqnie/no value add +26051252,Light-weight reference-based compression of FASTQ data.,"

Background

The exponential growth of next generation sequencing (NGS) data has posed big challenges to data storage, management and archive. Data compression is one of the effective solutions, where reference-based compression strategies can typically achieve superior compression ratios compared to the ones not relying on any reference.

Results

This paper presents a lossless light-weight reference-based compression algorithm namely LW-FQZip to compress FASTQ data. The three components of any given input, i.e., metadata, short reads and quality score strings, are first parsed into three data streams in which the redundancy information are identified and eliminated independently. Particularly, well-designed incremental and run-length-limited encoding schemes are utilized to compress the metadata and quality score streams, respectively. To handle the short reads, LW-FQZip uses a novel light-weight mapping model to fast map them against external reference sequence(s) and produce concise alignment results for storage. The three processed data streams are then packed together with some general purpose compression algorithms like LZMA. LW-FQZip was evaluated on eight real-world NGS data sets and achieved compression ratios in the range of 0.111-0.201. This is comparable or superior to other state-of-the-art lossless NGS data compression algorithms.

Conclusions

LW-FQZip is a program that enables efficient lossless FASTQ data compression. It contributes to the state of art applications for NGS data storage and transmission. LW-FQZip is freely available online at: http://csse.szu.edu.cn/staff/zhuzx/LWFQZip.","hji,kes",0,0,0,2,0,NA,NA +26063840,EXIMS: an improved data analysis pipeline based on a new peak picking method for EXploring Imaging Mass Spectrometry data.,"

Motivation

Matrix Assisted Laser Desorption Ionization-Imaging Mass Spectrometry (MALDI-IMS) in 'omics' data acquisition generates detailed information about the spatial distribution of molecules in a given biological sample. Various data processing methods have been developed for exploring the resultant high volume data. However, most of these methods process data in the spectral domain and do not make the most of the important spatial information available through this technology. Therefore, we propose a novel streamlined data analysis pipeline specifically developed for MALDI-IMS data utilizing significant spatial information for identifying hidden significant molecular distribution patterns in these complex datasets.

Methods

The proposed unsupervised algorithm uses Sliding Window Normalization (SWN) and a new spatial distribution based peak picking method developed based on Gray level Co-Occurrence (GCO) matrices followed by clustering of biomolecules. We also use gist descriptors and an improved version of GCO matrices to extract features from molecular images and minimum medoid distance to automatically estimate the number of possible groups.

Results

We evaluated our algorithm using a new MALDI-IMS metabolomics dataset of a plant (Eucalypt) leaf. The algorithm revealed hidden significant molecular distribution patterns in the dataset, which the current Component Analysis and Segmentation Map based approaches failed to extract. We further demonstrate the performance of our peak picking method over other traditional approaches by using a publicly available MALDI-IMS proteomics dataset of a rat brain. Although SWN did not show any significant improvement as compared with using no normalization, the visual assessment showed an improvement as compared to using the median normalization.

Availability and implementation

The source code and sample data are freely available at http://exims.sourceforge.net/.

Contact

awgcdw@student.unimelb.edu.au or chalini_w@live.com

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26072509,IgRepertoireConstructor: a novel algorithm for antibody repertoire construction and immunoproteogenomics analysis.,"

Unlabelled

The analysis of concentrations of circulating antibodies in serum (antibody repertoire) is a fundamental, yet poorly studied, problem in immunoinformatics. The two current approaches to the analysis of antibody repertoires [next generation sequencing (NGS) and mass spectrometry (MS)] present difficult computational challenges since antibodies are not directly encoded in the germline but are extensively diversified by somatic recombination and hypermutations. Therefore, the protein database required for the interpretation of spectra from circulating antibodies is custom for each individual. Although such a database can be constructed via NGS, the reads generated by NGS are error-prone and even a single nucleotide error precludes identification of a peptide by the standard proteomics tools. Here, we present the IgRepertoireConstructor algorithm that performs error-correction of immunosequencing reads and uses mass spectra to validate the constructed antibody repertoires.

Availability and implementation

IgRepertoireConstructor is open source and freely available as a C++ and Python program running on all Unix-compatible platforms. The source code is available from http://bioinf.spbau.ru/igtools.

Contact

ppevzner@ucsd.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26072510,Reconstruction of clonal trees and tumor composition from multi-sample sequencing data.,"

Motivation

DNA sequencing of multiple samples from the same tumor provides data to analyze the process of clonal evolution in the population of cells that give rise to a tumor.

Results

We formalize the problem of reconstructing the clonal evolution of a tumor using single-nucleotide mutations as the variant allele frequency (VAF) factorization problem. We derive a combinatorial characterization of the solutions to this problem and show that the problem is NP-complete. We derive an integer linear programming solution to the VAF factorization problem in the case of error-free data and extend this solution to real data with a probabilistic model for errors. The resulting AncesTree algorithm is better able to identify ancestral relationships between individual mutations than existing approaches, particularly in ultra-deep sequencing data when high read counts for mutations yield high confidence VAFs.

Availability and implementation

An implementation of AncesTree is available at: http://compbio.cs.brown.edu/software.","hji,kes",0,0,0,2,0,NA,NA +26072515,Inferring orthologous gene regulatory networks using interspecies data fusion.,"

Motivation

The ability to jointly learn gene regulatory networks (GRNs) in, or leverage GRNs between related species would allow the vast amount of legacy data obtained in model organisms to inform the GRNs of more complex, or economically or medically relevant counterparts. Examples include transferring information from Arabidopsis thaliana into related crop species for food security purposes, or from mice into humans for medical applications. Here we develop two related Bayesian approaches to network inference that allow GRNs to be jointly inferred in, or leveraged between, several related species: in one framework, network information is directly propagated between species; in the second hierarchical approach, network information is propagated via an unobserved 'hypernetwork'. In both frameworks, information about network similarity is captured via graph kernels, with the networks additionally informed by species-specific time series gene expression data, when available, using Gaussian processes to model the dynamics of gene expression.

Results

Results on in silico benchmarks demonstrate that joint inference, and leveraging of known networks between species, offers better accuracy than standalone inference. The direct propagation of network information via the non-hierarchical framework is more appropriate when there are relatively few species, while the hierarchical approach is better suited when there are many species. Both methods are robust to small amounts of mislabelling of orthologues. Finally, the use of Saccharomyces cerevisiae data and networks to inform inference of networks in the budding yeast Schizosaccharomyces pombe predicts a novel role in cell cycle regulation for Gas1 (SPAC19B12.02c), a 1,3-beta-glucanosyltransferase.

Availability and implementation

MATLAB code is available from http://go.warwick.ac.uk/systemsbiology/software/.","hji,kes",0,0,0,2,0,NA,NA +26078289,"Breast Cancer and Occupation: The Need for Action: APHA Policy Statement Number 20146, Issued November 18, 2014.","Breast cancer is the most prevalent cancer among women in the United States and other countries, making it a major public health concern. Despite significant scientific evidence about its known or suspected causes, research and prevention measures to identify and eliminate occupational and other environmental hazards and risk factors for breast cancer remain largely overlooked. As a result, hazards continue unabated for women generally, especially those who work outside the home. The science linking breast cancer and occupation in particular is growing. Researchers have identified commonly used chemicals that induce breast tumors in test animals. Animal studies link chemicals that mimic reproductive hormones to elevated breast cancer rates. Other animal and human studies link chemical exposures to increased breast cancer rates, including two recent investigations focused on occupational hazards. But the latter are the exception. Studies that attempt to identify and characterize workplace agents linked to breast cancer, as well as intervention studies focusing on the use of less toxic processes and substances, are limited. In what might be construed as a case of gender and social class bias, many research and funding agencies have ignored or downplayed the role of occupational studies despite their relevance to prevention efforts. Action required starts with making a national priority of promoting and supporting research on occupational and other environmental causes of breast cancer. Other public health actions include hazard surveillance and primary prevention activities such as reductions in the use of toxic materials, informed substitution, and green chemistry efforts. The original document is accessible at the APHA website, http://www.apha.org/policies-and-advocacy/public-health-policy-statements/policy-database/2015/01/07/14/55/breast-cancer-and-occupation.","hji,kes",0,0,0,2,0,NA,NA +26095248,"Advocating for Deployed Women Veterans' Health Differences, Difficulties, and Disparities.","

Problem

The preceding article presented a glimpse of deployed women veterans, their military culture, and their experiences in the Global War on Terror (Iraq and Afghanistan) to assist civilian nurses to gain significant rapport and provide important culturally sensitive care.

Methods

Pertinent literary sources were reviewed to gather applicable data about the problem.

Findings

A confirmatory answer from the assessment question of """"Have you served in the military?"""" and the use of the Military Health History Pocket Card for Clinicians (available at http://www.va.gov.oaa/pocketcard) will assist with revealing possible health risks from the increased amounts of military men and women veterans seeking (and/or returning to) a variety of community-based health services. This article about deployed women veterans examines their specific health differences (e.g., research literature, post-traumatic stress disorder, and military sexual trauma), difficulties (e.g., reproductive, gynecologic, urinary, suicide), and gender disparities (varied treatment patterns).

Conclusion

Understanding these gender situations, civilian nurses can better advocate with increasing evidence-based decisions that their physical and behavioral responses were different from their male counterparts. Continual assessment, knowledgeable care, ongoing literature review, interdisciplinary health team development, and the presence of resourceful community agencies should be a significant part of their holistic care. Conard Armstrong.","hji,kes",0,0,0,2,0,NA,NA +26106259,MITOSCISSOR: A Useful Tool for Auto-Assembly of Mitogenomic Datasets in the Evolutionary Analysis of Fishes.,"As a result of the development of rapid and efficient sequencing technologies, complete sequences of numerous mitochondrial genomes are now available. Mitochondrial genomes have been widely used to evaluate relationships between species in several fields, including evolutionary and population genetics, as well as in forensic identification and in the study of mitochondrial diseases in humans. However, the creation of mitochondrial genomes is extremely time consuming. In this paper, we present a new tool, MITOSCISSOR, which is a rapid method for parsing and formatting dozens of complete mitochondrial genome sequences. With the aid of MITOSCISSOR, complete mitochondrial genome sequences of 103 species from Tetraodontiformes (a difficult-to-classify order of fish) were easily parsed and formatted. It typically takes several days to produce similar results when relying upon manual editing. This tool could open the .gb file of Genbank directly and help us to use existing mitogenomic data. In the present study, we established the first clear and robust molecular phylogeny of 103 tetraodontiform fishes, a goal that has long eluded ichthyologists. MITOSCISSOR greatly increases the efficiency with which DNA data files can be parsed and annotated, and thus has the potential to greatly facilitate evolutionary analysis using mitogenomic data. This software is freely available for noncommercial users at http://www.filedropper.com/mitoscissor.","hji,kes",0,0,0,2,0,NA,NA +26115255,Estimation of the environmental dam-offspring correlation in beef cattle.,"A long standing controversy in animal breeding is related to the strong negative estimates of the direct-maternal genetic correlation obtained when fitting data on maternally influenced traits. In this article, we focused on a model that introduces a new correlation parameter among dam-offspring records. The extant theory allows estimation of the parameter when dams have at most a single offspring. Our goal was to develop an inferential procedure in a more general setting. To do so, we applied a Bayesian approach and we showed that the estimation could be accomplished by introducing a Markov chain Monte Carlo (MCMC) step embedded into a regular Gibbs sampler program. The procedure was implemented by means of an MCMC algorithm known as the Griddy-Gibbs sampler, and a Fortran 90 library was created to accomplish the task. The computer program is available from http://www.agro.uba.ar/catedras/mg_animal/software/RDBLK. With this tool at hand, we applied the inferential procedure to weaning weight records on beef cattle calves from an Argentinean Hereford herd, and we estimated the marginal distribution of the environmental dam-offspring correlation parameter. The distribution was unimodal and symmetric with a mean value of -0.14 (0.03) and a 95% high posterior density interval between -0.20 and -0.07, indicating that the model placed a huge mass on negative values of the parameter. Noticeably, the magnitude of the direct-maternal genetic correlation diminished from -0.61 to -0.37 with respect to the standard maternal animal model. This result reinforces the idea that environmental covariances among dam-offspring records may bias the estimate of the direct-maternal genetic correlation.","hji,kes",0,0,0,2,0,NA,NA +26130741,Cohort Profile Update: Australian Longitudinal Study on Women's Health.,"In 1996 the Australian Longitudinal Study on Women's Health recruited a nationally representative sample of more than 40,000 women in three age cohorts, born in 1973-78, 1946-51 and 1921-26. At least six waves of 3-yearly surveys have been completed. Although the focus remains on factors affecting the health and well-being of women and their access to and use of health services across urban, rural and remote areas of Australia, the study has now been considerably expanded by linkage to other health data sets. For most women who have ever participated in the study, linked records are now available for: government-subsidized non-hospital services (e.g. all general practitioner visits); pharmaceutical prescriptions filled; national death index, including codes for multiple causes of death; aged care assessments and services; cancer registries; and, for most states and territories, hospital admissions and perinatal data. Additionally, a large cohort of women born in 1989-95 have been recruited. The data are available to approved collaborators, with more than 780 researchers using the data so far. Full details of the study materials and data access procedures are available at [http://www.alswh.org.au/].","hji,kes",0,0,0,2,0,NA,health data +26133389,Quest for Orthologs Entails Quest for Tree of Life: In Search of the Gene Stream.,"Quest for Orthologs (QfO) is a community effort with the goal to improve and benchmark orthology predictions. As quality assessment assumes prior knowledge on species phylogenies, we investigated the congruency between existing species trees by comparing the relationships of 147 QfO reference organisms from six Tree of Life (ToL)/species tree projects: The National Center for Biotechnology Information (NCBI) taxonomy, Opentree of Life, the sequenced species/species ToL, the 16S ribosomal RNA (rRNA) database, and trees published by Ciccarelli et al. (Ciccarelli FD, et al. 2006. Toward automatic reconstruction of a highly resolved tree of life. Science 311:1283-1287) and by Huerta-Cepas et al. (Huerta-Cepas J, Marcet-Houben M, Gabaldon T. 2014. A nested phylogenetic reconstruction approach provides scalable resolution in the eukaryotic Tree Of Life. PeerJ PrePrints 2:223) Our study reveals that each species tree suggests a different phylogeny: 87 of the 146 (60%) possible splits of a dichotomous and rooted tree are congruent, while all other splits are incongruent in at least one of the species trees. Topological differences are observed not only at deep speciation events, but also within younger clades, such as Hominidae, Rodentia, Laurasiatheria, or rosids. The evolutionary relationships of 27 archaea and bacteria are highly inconsistent. By assessing 458,108 gene trees from 65 genomes, we show that consistent species topologies are more often supported by gene phylogenies than contradicting ones. The largest concordant species tree includes 77 of the QfO reference organisms at the most. Results are summarized in the form of a consensus ToL (http://swisstree.vital-it.ch/species_tree) that can serve different benchmarking purposes.","hji,kes",0,0,0,2,0,NA,summmaried data +26150785,The PREP pipeline: standardized preprocessing for large-scale EEG analysis.,"The technology to collect brain imaging and physiological measures has become portable and ubiquitous, opening the possibility of large-scale analysis of real-world human imaging. By its nature, such data is large and complex, making automated processing essential. This paper shows how lack of attention to the very early stages of an EEG preprocessing pipeline can reduce the signal-to-noise ratio and introduce unwanted artifacts into the data, particularly for computations done in single precision. We demonstrate that ordinary average referencing improves the signal-to-noise ratio, but that noisy channels can contaminate the results. We also show that identification of noisy channels depends on the reference and examine the complex interaction of filtering, noisy channel identification, and referencing. We introduce a multi-stage robust referencing scheme to deal with the noisy channel-reference interaction. We propose a standardized early-stage EEG processing pipeline (PREP) and discuss the application of the pipeline to more than 600 EEG datasets. The pipeline includes an automatically generated report for each dataset processed. Users can download the PREP pipeline as a freely available MATLAB library from http://eegstudy.org/prepcode.","hji,kes",0,0,0,2,0,NA,NA +26204236,MI-PVT: A Tool for Visualizing the Chromosome-Centric Human Proteome.,"We have developed the web-based Michigan Proteome Visualization Tool (MI-PVT) to visualize and compare protein expression and isoform-level function across human chromosomes and tissues (http://guanlab.ccmb.med.umich.edu/mipvt). As proof of principle, we have populated the tool with Human Proteome Map (HPM) data. We were able to observe many biologically interesting features. From the vantage point of our chromosome 17 team, for example, we found more than 300 proteins from chromosome 17 expressed in each of the 30 tissues and cell types studied, with the highest number of expressed proteins being 685 in testis. Comparisons of expression levels across tissues showed low numbers of proteins expressed in esophagus, but esophagus had 12 cytoskeletal proteins coded on chromosome 17 with very high expression (>1000 spectral counts). This customized MI-PVT should be helpful for biologists to browse and study specific proteins and protein data sets across tissues and chromosomes. Users can upload any data of interest in MI-PVT for visualization. Our aim is to integrate extensive mass-spectrometric proteomic data into the tool to facilitate finding chromosome-centric protein expression and correlation across tissues.","hji,kes",0,0,0,2,0,NA,NA +26226130,Node sampling for protein complex estimation in bait-prey graphs.,"In cellular biology, node-and-edge graph or """"network"""" data collection often uses bait-prey technologies such as co-immunoprecipitation (CoIP). Bait-prey technologies assay relationships or """"interactions"""" between protein pairs, with CoIP specifically measuring protein complex co-membership. Analyses of CoIP data frequently focus on estimating protein complex membership. Due to budgetary and other constraints, exhaustive assay of the entire network using CoIP is not always possible. We describe a stratified sampling scheme to select baits for CoIP experiments when protein complex estimation is the main goal. Expanding upon the classic framework in which nodes represent proteins and edges represent pairwise interactions, we define generalized nodes as sets of adjacent nodes with identical adjacency outside the set and use these as strata from which to select the next set of baits. Strata are redefined at each round of sampling to incorporate accumulating data. This scheme maintains user-specified quality thresholds for protein complex estimates and, relative to simple random sampling, leads to a marked increase in the number of correctly estimated complexes at each round of sampling. The R package seqSample contains all source code and is available at http://vault.northwestern.edu/~dms877/Rpacks/.","hji,kes",0,0,0,2,0,NA,NA +26236573,"SOCR data dashboard: an integrated big data archive mashing medicare, labor, census and econometric information.","

Introduction

Intuitive formulation of informative and computationally-efficient queries on big and complex datasets present a number of challenges. As data collection is increasingly streamlined and ubiquitous, data exploration, discovery and analytics get considerably harder. Exploratory querying of heterogeneous and multi-source information is both difficult and necessary to advance our knowledge about the world around us.

Research design

We developed a mechanism to integrate dispersed multi-source data and service the mashed information via human and machine interfaces in a secure, scalable manner. This process facilitates the exploration of subtle associations between variables, population strata, or clusters of data elements, which may be opaque to standard independent inspection of the individual sources. This a new platform includes a device agnostic tool (Dashboard webapp, http://socr.umich.edu/HTML5/Dashboard/) for graphical querying, navigating and exploring the multivariate associations in complex heterogeneous datasets.

Results

The paper illustrates this core functionality and serviceoriented infrastructure using healthcare data (e.g., US data from the 2010 Census, Demographic and Economic surveys, Bureau of Labor Statistics, and Center for Medicare Services) as well as Parkinson's Disease neuroimaging data. Both the back-end data archive and the front-end dashboard interfaces are continuously expanded to include additional data elements and new ways to customize the human and machine interactions.

Conclusions

A client-side data import utility allows for easy and intuitive integration of user-supplied datasets. This completely open-science framework may be used for exploratory analytics, confirmatory analyses, meta-analyses, and education and training purposes in a wide variety of fields.","hji,kes",0,0,0,2,0,NA,out of scope +26241356,In the pursuit of a semantic similarity metric based on UMLS annotations for articles in PubMed Central Open Access.,"

Motivation

Although full-text articles are provided by the publishers in electronic formats, it remains a challenge to find related work beyond the title and abstract context. Identifying related articles based on their abstract is indeed a good starting point; this process is straightforward and does not consume as many resources as full-text based similarity would require. However, further analyses may require in-depth understanding of the full content. Two articles with highly related abstracts can be substantially different regarding the full content. How similarity differs when considering title-and-abstract versus full-text and which semantic similarity metric provides better results when dealing with full-text articles are the main issues addressed in this manuscript.

Methods

We have benchmarked three similarity metrics - BM25, PMRA, and Cosine, in order to determine which one performs best when using concept-based annotations on full-text documents. We also evaluated variations in similarity values based on title-and-abstract against those relying on full-text. Our test dataset comprises the Genomics track article collection from the 2005 Text Retrieval Conference. Initially, we used an entity recognition software to semantically annotate titles and abstracts as well as full-text with concepts defined in the Unified Medical Language System (UMLS). For each article, we created a document profile, i.e., a set of identified concepts, term frequency, and inverse document frequency; we then applied various similarity metrics to those document profiles. We considered correlation, precision, recall, and F1 in order to determine which similarity metric performs best with concept-based annotations. For those full-text articles available in PubMed Central Open Access (PMC-OA), we also performed dispersion analyses in order to understand how similarity varies when considering full-text articles.

Results

We have found that the PubMed Related Articles similarity metric is the most suitable for full-text articles annotated with UMLS concepts. For similarity values above 0.8, all metrics exhibited an F1 around 0.2 and a recall around 0.1; BM25 showed the highest precision close to 1; in all cases the concept-based metrics performed better than the word-stem-based one. Our experiments show that similarity values vary when considering only title-and-abstract versus full-text similarity. Therefore, analyses based on full-text become useful when a given research requires going beyond title and abstract, particularly regarding connectivity across articles.

Availability

Visualization available at ljgarcia.github.io/semsim.benchmark/, data available at http://dx.doi.org/10.5281/zenodo.13323.","hji,kes",0,0,0,2,0,NA,data available at Zenodo +26271256,DeDaL: Cytoscape 3 app for producing and morphing data-driven and structure-driven network layouts.,"

Background

Visualization and analysis of molecular profiling data together with biological networks are able to provide new mechanistic insights into biological functions. Currently, it is possible to visualize high-throughput data on top of pre-defined network layouts, but they are not always adapted to a given data analysis task. A network layout based simultaneously on the network structure and the associated multidimensional data might be advantageous for data visualization and analysis in some cases.

Results

We developed a Cytoscape app, which allows constructing biological network layouts based on the data from molecular profiles imported as values of node attributes. DeDaL is a Cytoscape 3 app, which uses linear and non-linear algorithms of dimension reduction to produce data-driven network layouts based on multidimensional data (typically gene expression). DeDaL implements several data pre-processing and layout post-processing steps such as continuous morphing between two arbitrary network layouts and aligning one network layout with respect to another one by rotating and mirroring. The combination of all these functionalities facilitates the creation of insightful network layouts representing both structural network features and correlation patterns in multivariate data. We demonstrate the added value of applying DeDaL in several practical applications, including an example of a large protein-protein interaction network.

Conclusions

DeDaL is a convenient tool for applying data dimensionality reduction methods and for designing insightful data displays based on data-driven layouts of biological networks, built within Cytoscape environment. DeDaL is freely available for downloading at http://bioinfo-out.curie.fr/projects/dedal/.","hji,kes",0,0,0,2,0,NA,NA +26314578,MGFM: a novel tool for detection of tissue and cell specific marker genes from microarray gene expression data.,"

Background

Identification of marker genes associated with a specific tissue/cell type is a fundamental challenge in genetic and cell research. Marker genes are of great importance for determining cell identity, and for understanding tissue specific gene function and the molecular mechanisms underlying complex diseases.

Results

We have developed a new bioinformatics tool called MGFM (Marker Gene Finder in Microarray data) to predict marker genes from microarray gene expression data. Marker genes are identified through the grouping of samples of the same type with similar marker gene expression levels. We verified our approach using two microarray data sets from the NCBI's Gene Expression Omnibus public repository encompassing samples for similar sets of five human tissues (brain, heart, kidney, liver, and lung). Comparison with another tool for tissue-specific gene identification and validation with literature-derived established tissue markers established functionality, accuracy and simplicity of our tool. Furthermore, top ranked marker genes were experimentally validated by reverse transcriptase-polymerase chain reaction (RT-PCR). The sets of predicted marker genes associated with the five selected tissues comprised well-known genes of particular importance in these tissues. The tool is freely available from the Bioconductor web site, and it is also provided as an online application integrated into the CellFinder platform ( http://cellfinder.org/analysis/marker ).

Conclusions

MGFM is a useful tool to predict tissue/cell type marker genes using microarray gene expression data. The implementation of the tool as an R-package as well as an application within CellFinder facilitates its use.","hji,kes",0,0,0,2,0,NA,NA +26318525,[MEG]PLS: A pipeline for MEG data analysis and partial least squares statistics.,"The emphasis of modern neurobiological theories has recently shifted from the independent function of brain areas to their interactions in the context of whole-brain networks. As a result, neuroimaging methods and analyses have also increasingly focused on network discovery. Magnetoencephalography (MEG) is a neuroimaging modality that captures neural activity with a high degree of temporal specificity, providing detailed, time varying maps of neural activity. Partial least squares (PLS) analysis is a multivariate framework that can be used to isolate distributed spatiotemporal patterns of neural activity that differentiate groups or cognitive tasks, to relate neural activity to behavior, and to capture large-scale network interactions. Here we introduce [MEG]PLS, a MATLAB-based platform that streamlines MEG data preprocessing, source reconstruction and PLS analysis in a single unified framework. [MEG]PLS facilitates MRI preprocessing, including segmentation and coregistration, MEG preprocessing, including filtering, epoching, and artifact correction, MEG sensor analysis, in both time and frequency domains, MEG source analysis, including multiple head models and beamforming algorithms, and combines these with a suite of PLS analyses. The pipeline is open-source and modular, utilizing functions from FieldTrip (Donders, NL), AFNI (NIMH, USA), SPM8 (UCL, UK) and PLScmd (Baycrest, CAN), which are extensively supported and continually developed by their respective communities. [MEG]PLS is flexible, providing both a graphical user interface and command-line options, depending on the needs of the user. A visualization suite allows multiple types of data and analyses to be displayed and includes 4-D montage functionality. [MEG]PLS is freely available under the GNU public license (http://meg-pls.weebly.com).","hji,kes",0,0,0,2,0,NA,NA +26353838,SHAPE directed RNA folding.,"

Summary

Chemical mapping experiments allow for nucleotide resolution assessment of RNA structure. We demonstrate that different strategies of integrating probing data with thermodynamics-based RNA secondary structure prediction algorithms can be implemented by means of soft constraints. This amounts to incorporating suitable pseudo-energies into the standard energy model for RNA secondary structures. As a showcase application for this new feature of the ViennaRNA Package we compare three distinct, previously published strategies to utilize SHAPE reactivities for structure prediction. The new tool is benchmarked on a set of RNAs with known reference structure.

Availability and implementation

The capability for SHAPE directed RNA folding is part of the upcoming release of the ViennaRNA Package 2.2, for which a preliminary release is already freely available at http://www.tbi.univie.ac.at/RNA.

Contact

michael.wolfinger@univie.ac.at

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26353840,Reveel: large-scale population genotyping using low-coverage sequencing data.,"

Motivation

Population low-coverage whole-genome sequencing is rapidly emerging as a prominent approach for discovering genomic variation and genotyping a cohort. This approach combines substantially lower cost than full-coverage sequencing with whole-genome discovery of low-allele frequency variants, to an extent that is not possible with array genotyping or exome sequencing. However, a challenging computational problem arises of jointly discovering variants and genotyping the entire cohort. Variant discovery and genotyping are relatively straightforward tasks on a single individual that has been sequenced at high coverage, because the inference decomposes into the independent genotyping of each genomic position for which a sufficient number of confidently mapped reads are available. However, in low-coverage population sequencing, the joint inference requires leveraging the complex linkage disequilibrium (LD) patterns in the cohort to compensate for sparse and missing data in each individual. The potentially massive computation time for such inference, as well as the missing data that confound low-frequency allele discovery, need to be overcome for this approach to become practical.

Results

Here, we present Reveel, a novel method for single nucleotide variant calling and genotyping of large cohorts that have been sequenced at low coverage. Reveel introduces a novel technique for leveraging LD that deviates from previous Markov-based models, and which is aimed at computational efficiency as well as accuracy in capturing LD patterns present in rare haplotypes. We evaluate Reveel's performance through extensive simulations as well as real data from the 1000 Genomes Project, and show that it achieves higher accuracy in low-frequency allele discovery and substantially lower computation cost than previous state-of-the-art methods.

Availability and implementation

http://reveel.stanford.edu/

Contact

: serafim@cs.stanford.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26394400,Validation of the RRE-90 Scale to Predict Stroke Risk after Transient Symptoms with Infarction: A Prospective Cohort Study.,"

Background and purpose

The risk of stroke after a transient ischemic attack (TIA) for patients with a positive diffusion-weighted image (DWI), i.e., transient symptoms with infarction (TSI), is much higher than for those with a negative DWI. The aim of this study was to validate the predictive value of a web-based recurrence risk estimator (RRE; http://www.nmr.mgh.harvard.edu/RRE/) of TSI.

Methods

Data from the prospective hospital-based TIA database of the First Affiliated Hospital of Zhengzhou University were analyzed. The RRE and ABCD2 scores were calculated within 7 days of symptom onset. The predictive outcome was ischemic stroke occurrence at 90 days. The receiver-operating characteristics curves were plotted, and the predictive value of the two models was assessed by computing the C statistics.

Results

A total of 221 eligible patients were prospectively enrolled, of whom 46 (20.81%) experienced a stroke within 90 days. The 90-day stroke risk in high-risk TSI patients (RRE =4) was 3.406-fold greater than in those at low risk (P <0.001). The C statistic of RRE (0.681; 95% confidence interval [CI], 0.592-0.771) was statistically higher than that of ABCD2 score (0.546; 95% CI, 0.454-0.638; Z = 2.115; P = 0.0344) at 90 days.

Conclusion

The RRE score had a higher predictive value than the ABCD2 score for assessing the 90-day risk of stroke after TSI.","hji,kes",0,0,0,2,0,NA,NA +26395772,APTANI: a computational tool to select aptamers through sequence-structure motif analysis of HT-SELEX data.,"

Motivation

Aptamers are synthetic nucleic acid molecules that can bind biological targets in virtue of both their sequence and three-dimensional structure. Aptamers are selected using SELEX, Systematic Evolution of Ligands by EXponential enrichment, a technique that exploits aptamer-target binding affinity. The SELEX procedure, coupled with high-throughput sequencing (HT-SELEX), creates billions of random sequences capable of binding different epitopes on specific targets. Since this technique produces enormous amounts of data, computational analysis represents a critical step to screen and select the most biologically relevant sequences.

Results

Here, we present APTANI, a computational tool to identify target-specific aptamers from HT-SELEX data and secondary structure information. APTANI builds on AptaMotif algorithm, originally implemented to analyze SELEX data; extends the applicability of AptaMotif to HT-SELEX data and introduces new functionalities, as the possibility to identify binding motifs, to cluster aptamer families or to compare output results from different HT-SELEX cycles. Tabular and graphical representations facilitate the downstream biological interpretation of results.

Availability and implementation

APTANI is available at http://aptani.unimore.it.

Contact

silvio.bicciato@unimore.it

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26398339,Small Supernumerary Marker Chromosomes in Human Infertility.,"Small supernumerary marker chromosomes (sSMC) are structurally abnormal chromosomes that cannot be unambiguously identified by banding cytogenetics. The objective of this study was to provide an overview of sSMC frequency and characterization in a context of infertility and to review the literature describing sSMC in relation with male and female infertility. Therefore, a systematic literature review on sSMC associated with infertility was conducted by means of a PubMed literature and a sSMC database (http://ssmc-tl.com/sSMC.html) search. A total of 234 patients with infertility were identified as carriers of sSMC. All chromosomes, except chromosomes 10, 19 and the X, were involved in sSMC, and in 72% the sSMC originated from acrocentric chromosomes. Euchromatic imbalances were caused by the presence of sSMC in 30% of the cases. Putative genes have been identified in only 1.2% of sSMC associated with infertility. The implication of sSMC in infertility could be due to a partial trisomy of some genes but also to mechanical effects perturbing meiosis. Further precise molecular and interphase-architecture studies on sSMC are needed in the future to characterize the relationship between this chromosomal anomaly and human infertility.","hji,kes",0,0,0,2,0,NA,NA +26435838,Improving preparedness of medical students and junior doctors to manage patients with diabetes.,"

Objective

New medical graduates are the front-line staff in many hospital settings and manage patients with diabetes frequently. Prescribing is an area of concern for junior doctors, however, with insulin prescribing reported as a particular weakness. This study aimed to produce an educational intervention which aimed to improve preparedness to manage patients with diabetes and evaluate it using a mixed methods approach.

Research design and methods

An e-resource (http://www.diabetesscenariosforjuniordoctors.co.uk) was created to contain commonplace and authentic diabetes decision-making scenarios. -32 junior doctors (n=20) and year 5 students (n=12) in South West England worked through the scenarios while 'thinking aloud' and then undertook a semistructured interview. Qualitative data were transcribed verbatim and analyzed thematically. Participant confidence to manage patients with diabetes before, immediately after, and 6 weeks after the educational intervention was also measured using a self-rating scale.

Results

Participants reported that patients with diabetes were daunting to manage because of the wide array of insulin products, their lack of confidence with chronic disease management and the difficulty of applying theory to practice. The e-resource was described as authentic, practical, and appropriate for the target audience. Junior doctors' self-rated confidence to manage patients with diabetes increased from 4.7 (of 10) before using the e-resource, to 6.4 immediately afterwards, and 6.8 6 weeks later. Medical students' confidence increased from 5.1 before, to 6.4 immediately afterwards, and 6.4 6 weeks later.

Conclusions

Providing opportunities to work with authentic scenarios in a safe environment can help to ameliorate junior doctors' lack of confidence to manage patients with diabetes.","hji,kes",0,0,0,2,0,NA,clinical +26447265,"Pressure UlceR Programme Of reSEarch (PURPOSE): using mixed methods (systematic reviews, prospective cohort, case study, consensus and psychometrics) to identify patient and organisational risk, develop a risk assessment tool and patient-reported outcome Quality of Life and Health Utility measures","

Background

The Pressure UlceR Programme Of reSEarch (PURPOSE) consisted of two themes. Theme 1 focused on improving our understanding of individuals and organisational risk factors and on improving the quality of risk assessments (work packages 13) and theme 2 focused on developing patient-reported outcome measures (work packages 4 and 5).

Methods

The programme comprised 21 individual pieces of work. Pain: (1) multicentre pain prevalence study in acute hospitals, (2) multicentre pain prevalence study in community localities incorporating (3) a comparison of case-finding methods, and (4) multicentre, prospective cohort study. Severe pressure ulcers: (5) retrospective case study, (6) patient involvement workshop with the Pressure Ulcer Research Service User Network for the UK (PURSUN UK) and (7) development of root cause analysis methodology. Risk assessment: (8) systematic review, (9) consensus study, (10) conceptual framework development and theoretical causal pathway, (11) design and pretesting of draft Risk Assessment Framework and (12) field test to assess reliability, validity, data completeness and clinical usability. Quality of life: (13) conceptual framework development (systematic review, patient interviews), (14 and 15) provisional instrument development, with items generated from patient interviews [from (1) above] two systematic reviews and experts, (16) pretesting of the provisional Pressure Ulcer Quality of Life (PU-QOL) instrument using mixed methods, (17) field test 1 including (18) optimal mode of administration substudy and item reduction with testing of scale formation, acceptability, scaling assumptions, reliability and validity, and (19) field test 2 final psychometric evaluation to test scale targeting, item response categories, item fit, response bias, acceptability, scaling assumptions, reliability and validity. Costutility: (20) time trade-off task valuations of health states derived from selected PU-QOL items, and (21) validation of the items selected and psychometric properties of the new Pressure Ulcer Quality of Life Utility Index (PUQOL-UI).

Key findings

Pain: prevalence studies hospital and community patients experience both pressure area-related and pressure ulcer pain; pain cohort study indicates that pain is independently predictive of category 2 (and above) pressure ulcer development. Severe pressure ulcers: these were more likely to develop in contexts in which clinicians failed to listen to patients/carers or recognise/respond to high risk or the presence of an existing pressure ulcer and services were not effectively co-ordinated; service users found the interactive workshop format valuable; including novel components (interviews with patients and carers) in root cause analysis improves the quality of the insights captured. Risk assessment: we developed a Pressure Ulcer Risk Assessment Framework, the PURPOSE-T, incorporating the Minimum Data Set, a screening stage, a full assessment stage, use of colour to support decision-making, and decision pathways that make a clear distinction between patients with an existing pressure ulcer(s) (or scarring from previous ulcers) who require secondary prevention and treatment and those at risk who require primary prevention (http://medhealth.leeds.ac.uk/accesspurposet). Quality of life: the final PU-QOL instrument consists of 10 scales to measure pain, exudate, odour, sleep, vitality, mobility/movement, daily activities, emotional well-being, self-consciousness and appearance, and participation (http://medhealth.leeds.ac.uk/puqol-ques). Costutility: seven items were selected from the PU-QOL instrument for inclusion in the PUQOL-UI (http://medhealth.leeds.ac.uk/puqol-ui); secondary study analysis indicated that item selection for the PUQOL-UI was appropriate and that the index was acceptable to patients and had adequate levels of validity.

Conclusions

The PURPOSE programme has provided important insights for pressure ulcer prevention and treatment and involvement of service users in research and development, with implications for patient and public involvement, clinical practice, quality/safety/health service management and research including replication of the pain risk factor study, work exploring best practice settings, the impact of including skin status as an indicator for escalation of preventative interventions, further psychometric evaluation of PU-QOL and PUQOL-UI the measurement of disease attribution.

Funding

The National Institute for Health Research Programme Grants for Applied Research programme.","hji,kes",0,0,0,2,0,NA,NA +26455800,ConTemplate Suggests Possible Alternative Conformations for a Query Protein of Known Structure.,"Protein function involves conformational changes, but often, for a given protein, only some of these conformations are known. The missing conformations could be predicted using the wealth of data in the PDB. Most PDB proteins have multiple structures, and proteins sharing one similar conformation often share others as well. The ConTemplate web server (http://bental.tau.ac.il/contemplate) exploits these observations to suggest conformations for a query protein with at least one known conformation (or model thereof). We demonstrate ConTemplate on a ribose-binding protein that undergoes significant conformational changes upon substrate binding. Querying ConTemplate with the ligand-free (or bound) structure of the protein produces the ligand-bound (or free) conformation with a root-mean-square deviation of 1.7 (or 2.2); the models are derived from conformations of other sugar-binding proteins, sharing approximately 30% sequence identity with the query. The calculation also suggests intermediate conformations and a pathway between the bound and free conformations.","hji,kes",0,0,0,2,0,NA,NA +26464967,neuTube 1.0: A New Design for Efficient Neuron Reconstruction Software Based on the SWC Format.,"Brain circuit mapping requires digital reconstruction of neuronal morphologies in complicated networks. Despite recent advances in automatic algorithms, reconstruction of neuronal structures is still a bottleneck in circuit mapping due to a lack of appropriate software for both efficient reconstruction and user-friendly editing. Here we present a new software design based on the SWC format, a standardized neuromorphometric format that has been widely used for analyzing neuronal morphologies or sharing neuron reconstructions via online archives such as NeuroMorpho.org. We have also implemented the design in our open-source software called neuTube 1.0. As specified by the design, the software is equipped with parallel 2D and 3D visualization and intuitive neuron tracing/editing functions, allowing the user to efficiently reconstruct neurons from fluorescence image data and edit standard neuron structure files produced by any other reconstruction software. We show the advantages of neuTube 1.0 by comparing it to two other software tools, namely Neuromantic and Neurostudio. The software is available for free at http://www.neutracing.com, which also hosts complete software documentation and video tutorials.","hji,kes",0,0,0,2,0,NA,NA +26477251,Spin labeling and Double Electron-Electron Resonance (DEER) to Deconstruct Conformational Ensembles of HIV Protease.,"An understanding of macromolecular conformational equilibrium in biological systems is oftentimes essential to understand function, dysfunction, and disease. For the past few years, our lab has been utilizing site-directed spin labeling (SDSL), coupled with electron paramagnetic resonance (EPR) spectroscopy, to characterize the conformational ensemble and ligand-induced conformational shifts of HIV-1 protease (HIV-1PR). The biomedical importance of characterizing the fractional occupancy of states within the conformational ensemble critically impacts our hypothesis of a conformational selection mechanism of drug-resistance evolution in HIV-1PR. The purpose of the following chapter is to give a timeline perspective of our SDSL EPR approach to characterizing conformational sampling of HIV-1PR. We provide detailed instructions for the procedure utilized in analyzing distance profiles for HIV-1PR obtained from pulsed electron-electron double resonance (PELDOR). Specifically, we employ a version of PELDOR known as double electron-electron resonance (DEER). Data are processed with the software package """"DeerAnalysis"""" (http://www.epr.ethz.ch/software), which implements Tikhonov regularization (TKR), to generate a distance profile from electron spin-echo amplitude modulations. We assign meaning to resultant distance profiles based upon a conformational sampling model, which is described herein. The TKR distance profiles are reconstructed with a linear combination of Gaussian functions, which is then statistically analyzed. In general, DEER has proven powerful for observing structural ensembles in proteins and, more recently, nucleic acids. Our goal is to present our advances in order to aid readers in similar applications.","hji,kes",0,0,0,2,0,NA,NA +26484228,Whole transcriptome microarrays identify long non-coding RNAs associated with cardiac hypertrophy.,"Long non-coding RNAs (lncRNAs) have recently emerged as a novel group of non-coding RNAs able to regulate gene expression. While their role in cardiac disease is only starting to be understood, their involvement in cardiac hypertrophy is poorly known. We studied the association between lncRNAs and left ventricular hypertrophy using whole transcriptome microarrays. Wild-type mice and mice overexpressing the adenosine A2A receptor were subjected to transverse aortic constriction (TAC) to induce left ventricular hypertrophy. Expression profiles of lncRNAs in the heart were characterized using genome-wide microarrays. An analytical pipeline was specifically developed to extract lncRNA data from microarrays. We identified 2 lncRNAs up-regulated and 3 lncRNAs down-regulated in the hearts of A2A-receptor overexpressing-mice subjected to TAC compared to wild-type mice. Differential expression of these 2 lncRNAs was validated by quantitative PCR. Complete microarray dataset is available at Gene Expression Omnibus (GEO) database (http://www.ncbi.nlm.nih.gov/geo/) under the accession number GSE45423. Here, we describe in details the experimental design, microarray performance and analysis.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +26537179,BRANE Cut: biologically-related a priori network enhancement with graph cuts for gene regulatory network inference.,"

Background

Inferring gene networks from high-throughput data constitutes an important step in the discovery of relevant regulatory relationships in organism cells. Despite the large number of available Gene Regulatory Network inference methods, the problem remains challenging: the underdetermination in the space of possible solutions requires additional constraints that incorporate a priori information on gene interactions.

Methods

Weighting all possible pairwise gene relationships by a probability of edge presence, we formulate the regulatory network inference as a discrete variational problem on graphs. We enforce biologically plausible coupling between groups and types of genes by minimizing an edge labeling functional coding for a priori structures. The optimization is carried out with Graph cuts, an approach popular in image processing and computer vision. We compare the inferred regulatory networks to results achieved by the mutual-information-based Context Likelihood of Relatedness (CLR) method and by the state-of-the-art GENIE3, winner of the DREAM4 multifactorial challenge.

Results

Our BRANE Cut approach infers more accurately the five DREAM4 in silico networks (with improvements from 6% to 11%). On a real Escherichia coli compendium, an improvement of 11.8% compared to CLR and 3% compared to GENIE3 is obtained in terms of Area Under Precision-Recall curve. Up to 48 additional verified interactions are obtained over GENIE3 for a given precision. On this dataset involving 4345 genes, our method achieves a performance similar to that of GENIE3, while being more than seven times faster. The BRANE Cut code is available at: http://www-syscom.univ-mlv.fr/~pirayre/Codes-GRN-BRANE-cut.html.

Conclusions

BRANE Cut is a weighted graph thresholding method. Using biologically sound penalties and data-driven parameters, it improves three state-of-the art GRN inference methods. It is applicable as a generic network inference post-processing, due to its computational efficiency.","hji,kes",0,0,0,2,0,NA,NA +26568623,MIEC-SVM: automated pipeline for protein peptide/ligand interaction prediction.,"

Motivation

MIEC-SVM is a structure-based method for predicting protein recognition specificity. Here, we present an automated MIEC-SVM pipeline providing an integrated and user-friendly workflow for construction and application of the MIEC-SVM models. This pipeline can handle standard amino acids and those with post-translational modifications (PTMs) or small molecules. Moreover, multi-threading and support to Sun Grid Engine (SGE) are implemented to significantly boost the computational efficiency.

Availability and implementation

The program is available at http://wanglab.ucsd.edu/MIEC-SVM CONTACT: : wei-wang@ucsd.edu

Supplementary information

Supplementary data available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26581084,Transcriptator: An Automated Computational Pipeline to Annotate Assembled Reads and Identify Non Coding RNA.,"RNA-seq is a new tool to measure RNA transcript counts, using high-throughput sequencing at an extraordinary accuracy. It provides quantitative means to explore the transcriptome of an organism of interest. However, interpreting this extremely large data into biological knowledge is a problem, and biologist-friendly tools are lacking. In our lab, we developed Transcriptator, a web application based on a computational Python pipeline with a user-friendly Java interface. This pipeline uses the web services available for BLAST (Basis Local Search Alignment Tool), QuickGO and DAVID (Database for Annotation, Visualization and Integrated Discovery) tools. It offers a report on statistical analysis of functional and Gene Ontology (GO) annotation's enrichment. It helps users to identify enriched biological themes, particularly GO terms, pathways, domains, gene/proteins features and protein-protein interactions related informations. It clusters the transcripts based on functional annotations and generates a tabular report for functional and gene ontology annotations for each submitted transcript to the web server. The implementation of QuickGo web-services in our pipeline enable the users to carry out GO-Slim analysis, whereas the integration of PORTRAIT (Prediction of transcriptomic non coding RNA (ncRNA) by ab initio methods) helps to identify the non coding RNAs and their regulatory role in transcriptome. In summary, Transcriptator is a useful software for both NGS and array data. It helps the users to characterize the de-novo assembled reads, obtained from NGS experiments for non-referenced organisms, while it also performs the functional enrichment analysis of differentially expressed transcripts/genes for both RNA-seq and micro-array experiments. It generates easy to read tables and interactive charts for better understanding of the data. The pipeline is modular in nature, and provides an opportunity to add new plugins in the future. Web application is freely available at: http://www-labgtp.na.icar.cnr.it/Transcriptator.","hji,kes",0,0,0,2,0,NA,NA +26626453,Fast dimension reduction and integrative clustering of multi-omics data using low-rank approximation: application to cancer molecular classification.,"

Background

One major goal of large-scale cancer omics study is to identify molecular subtypes for more accurate cancer diagnoses and treatments. To deal with high-dimensional cancer multi-omics data, a promising strategy is to find an effective low-dimensional subspace of the original data and then cluster cancer samples in the reduced subspace. However, due to data-type diversity and big data volume, few methods can integrative and efficiently find the principal low-dimensional manifold of the high-dimensional cancer multi-omics data.

Results

In this study, we proposed a novel low-rank approximation based integrative probabilistic model to fast find the shared principal subspace across multiple data types: the convexity of the low-rank regularized likelihood function of the probabilistic model ensures efficient and stable model fitting. Candidate molecular subtypes can be identified by unsupervised clustering hundreds of cancer samples in the reduced low-dimensional subspace. On testing datasets, our method LRAcluster (low-rank approximation based multi-omics data clustering) runs much faster with better clustering performances than the existing method. Then, we applied LRAcluster on large-scale cancer multi-omics data from TCGA. The pan-cancer analysis results show that the cancers of different tissue origins are generally grouped as independent clusters, except squamous-like carcinomas. While the single cancer type analysis suggests that the omics data have different subtyping abilities for different cancer types.

Conclusions

LRAcluster is a very useful method for fast dimension reduction and unsupervised clustering of large-scale multi-omics data. LRAcluster is implemented in R and freely available via http://bioinfo.au.tsinghua.edu.cn/software/lracluster/ .","hji,kes",0,0,0,2,0,NA,NA +26628858,Characterizing Cancer-Specific Networks by Integrating TCGA Data.,"The Cancer Genome Atlas (TCGA) generates comprehensive genomic data for thousands of patients over more than 20 cancer types. TCGA data are typically whole-genome measurements of multiple genomic features, such as DNA copy numbers, DNA methylation, and gene expression, providing unique opportunities for investigating cancer mechanism from multiple molecular and regulatory layers. We propose a Bayesian graphical model to systemically integrate multi-platform TCGA data for inference of the interactions between different genomic features either within a gene or between multiple genes. The presence or absence of edges in the graph indicates the presence or absence of conditional dependence between genomic features. The inference is restricted to genes within a known biological network, but can be extended to any sets of genes. Applying the model to the same genes using patient samples in two different cancer types, we identify network components that are common as well as different between cancer types. The examples and codes are available at https://www.ma.utexas.edu/users/yxu/software.html.","hji,kes",0,0,0,2,0,NA,NA +26631838,"Relative Prognostic and Predictive Value of Gene Signature and Histologic Grade in Estrogen Receptor-Positive, HER2-Negative Breast Cancer.","

Background

In estrogen receptor (ER)-positive, human epidermal growth factor receptor 2 (HER2)-negative breast cancer, first-generation genomic signatures serve predominately as prognostic biomarkers and secondarily as predictors of response to chemotherapy. We compared both the prognostic and predictive value of histologic grades and genomic markers.

Methods

We retrieved publicly available cDNA microarray data from 1373 primary ER(+)/HER2(-) breast cancers and developed a genomic signature simulated from Recurrence Online (http://www.recurrenceonline.com/) to calculate the recurrence score and risk using predefined sets of genes in the cDNA microarray. We then compared the prognostic and predictive information provided by histologic grade and genomic signature.

Results

Based on genomic signatures, 55%, 28%, and 17% of breast cancers were classified as low, intermediate, and high risk, respectively, whereas the histologic grades were I, II, and III in 22%, 59%, and 19% of breast cancers, respectively. Univariate analysis in the untreated cohort revealed that both histologic grade (overall P = .007) and genomic signature (P < .001) could predict prognosis. Results were similar using the genomic signature, with pathologic complete response rates of 4.6%, 5.7%, and 16.5% for low-, intermediate-, and high-risk cancers, respectively. Neither biomarker was statistically significant in multivariate analysis for predictive response to neoadjuvant chemotherapy (NAC).

Conclusion

Genomic signature was better at identifying low-risk cases compared to histologic grade alone, but both markers had similar predictive values for NAC response. Better predictive biomarkers for NAC response are still needed.","hji,kes",0,0,0,2,0,NA,references other data resource +26635139,An integrative somatic mutation analysis to identify pathways linked with survival outcomes across 19 cancer types.,"

Motivation

Identification of altered pathways that are clinically relevant across human cancers is a key challenge in cancer genomics. Precise identification and understanding of these altered pathways may provide novel insights into patient stratification, therapeutic strategies and the development of new drugs. However, a challenge remains in accurately identifying pathways altered by somatic mutations across human cancers, due to the diverse mutation spectrum. We developed an innovative approach to integrate somatic mutation data with gene networks and pathways, in order to identify pathways altered by somatic mutations across cancers.

Results

We applied our approach to The Cancer Genome Atlas (TCGA) dataset of somatic mutations in 4790 cancer patients with 19 different types of tumors. Our analysis identified cancer-type-specific altered pathways enriched with known cancer-relevant genes and targets of currently available drugs. To investigate the clinical significance of these altered pathways, we performed consensus clustering for patient stratification using member genes in the altered pathways coupled with gene expression datasets from 4870 patients from TCGA, and multiple independent cohorts confirmed that the altered pathways could be used to stratify patients into subgroups with significantly different clinical outcomes. Of particular significance, certain patient subpopulations with poor prognosis were identified because they had specific altered pathways for which there are available targeted therapies. These findings could be used to tailor and intensify therapy in these patients, for whom current therapy is suboptimal.

Availability and implementation

The code is available at: http://www.taehyunlab.org

Contact

jhcheong@yuhs.ac or taehyun.hwang@utsouthwestern.edu or taehyun.cs@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26672762,CyREST: Turbocharging Cytoscape Access for External Tools via a RESTful API.,"As bioinformatic workflows become increasingly complex and involve multiple specialized tools, so does the difficulty of reliably reproducing those workflows. Cytoscape is a critical workflow component for executing network visualization, analysis, and publishing tasks, but it can be operated only manually via a point-and-click user interface. Consequently, Cytoscape-oriented tasks are laborious and often error prone, especially with multistep protocols involving many networks. In this paper, we present the new cyREST Cytoscape app and accompanying harmonization libraries. Together, they improve workflow reproducibility and researcher productivity by enabling popular languages (e.g., Python and R, JavaScript, and C#) and tools (e.g., IPython/Jupyter Notebook and RStudio) to directly define and query networks, and perform network analysis, layouts and renderings. We describe cyREST's API and overall construction, and present Python- and R-based examples that illustrate how Cytoscape can be integrated into large scale data analysis pipelines. cyREST is available in the Cytoscape app store (http://apps.cytoscape.org) where it has been downloaded over 1900 times since its release in late 2014.","hji,kes",0,0,0,2,0,NA,NA +26677962,CG2AA: backmapping protein coarse-grained structures.,"

Unlabelled

Coarse grain (CG) models allow long-scale simulations with a much lower computational cost than that of all-atom simulations. However, the absence of atomistic detail impedes the analysis of specific atomic interactions that are determinant in most interesting biomolecular processes. In order to study these phenomena, it is necessary to reconstruct the atomistic structure from the CG representation. This structure can be analyzed by itself or be used as an onset for atomistic molecular dynamics simulations. In this work, we present a computer program that accurately reconstructs the atomistic structure from a CG model for proteins, using a simple geometrical algorithm.

Availability and implementation

The software is free and available online at http://www.ic.fcen.uba.ar/cg2aa/cg2aa.py

Supplementary information

Supplementary data are available at Bioinformatics online.

Contact

lula@qi.fcen.uba.ar.","hji,kes",0,0,0,2,0,NA,NA +26678663,EC: an efficient error correction algorithm for short reads.,"

Background

In highly parallel next-generation sequencing (NGS) techniques millions to billions of short reads are produced from a genomic sequence in a single run. Due to the limitation of the NGS technologies, there could be errors in the reads. The error rate of the reads can be reduced with trimming and by correcting the erroneous bases of the reads. It helps to achieve high quality data and the computational complexity of many biological applications will be greatly reduced if the reads are first corrected. We have developed a novel error correction algorithm called EC and compared it with four other state-of-the-art algorithms using both real and simulated sequencing reads.

Results

We have done extensive and rigorous experiments that reveal that EC is indeed an effective, scalable, and efficient error correction tool. Real reads that we have employed in our performance evaluation are Illumina-generated short reads of various lengths. Six experimental datasets we have utilized are taken from sequence and read archive (SRA) at NCBI. The simulated reads are obtained by picking substrings from random positions of reference genomes. To introduce errors, some of the bases of the simulated reads are changed to other bases with some probabilities.

Conclusions

Error correction is a vital problem in biology especially for NGS data. In this paper we present a novel algorithm, called Error Corrector (EC), for correcting substitution errors in biological sequencing reads. We plan to investigate the possibility of employing the techniques introduced in this research paper to handle insertion and deletion errors also.

Software availability

The implementation is freely available for non-commercial purposes. It can be downloaded from: http://engr.uconn.edu/~rajasek/EC.zip.","hji,kes",0,0,0,2,0,NA,NA +26679168,Mineotaur: a tool for high-content microscopy screen sharing and visual analytics.,"High-throughput/high-content microscopy-based screens are powerful tools for functional genomics, yielding intracellular information down to the level of single-cells for thousands of genotypic conditions. However, accessing their data requires specialized knowledge and most often that data is no longer analyzed after initial publication. We describe Mineotaur ( http://www.mineotaur.org ), a open-source, downloadable web application that allows easy online sharing and interactive visualisation of large screen datasets, facilitating their dissemination and further analysis, and enhancing their impact.","hji,kes",0,0,0,2,0,NA,iffy +26690490,"PEPstrMOD: structure prediction of peptides containing natural, non-natural and modified residues.","

Background

In the past, many methods have been developed for peptide tertiary structure prediction but they are limited to peptides having natural amino acids. This study describes a method PEPstrMOD, which is an updated version of PEPstr, developed specifically for predicting the structure of peptides containing natural and non-natural/modified residues.

Results

PEPstrMOD integrates Forcefield_NCAA and Forcefield_PTM force field libraries to handle 147 non-natural residues and 32 types of post-translational modifications respectively by performing molecular dynamics using AMBER. AMBER was also used to handle other modifications like peptide cyclization, use of D-amino acids and capping of terminal residues. In addition, GROMACS was used to implement 210 non-natural side-chains in peptides using SwissSideChain force field library. We evaluated the performance of PEPstrMOD on three datasets generated from Protein Data Bank; i) ModPep dataset contains 501 non-natural peptides, ii) ModPep16, a subset of ModPep, and iii) CyclicPep contains 34 cyclic peptides. We achieved backbone Root Mean Square Deviation between the actual and predicted structure of peptides in the range of 3.81-4.05 .

Conclusions

In summary, the method PEPstrMOD has been developed that predicts the structure of modified peptide from the sequence/structure given as input. We validated the PEPstrMOD application using a dataset of peptides having non-natural/modified residues. PEPstrMOD offers unique advantages that allow the users to predict the structures of peptides having i) natural residues, ii) non-naturally modified residues, iii) terminal modifications, iv) post-translational modifications, v) D-amino acids, and also allows extended simulation of predicted peptides. This will help the researchers to have prior structural information of modified peptides to further design the peptides for desired therapeutic property. PEPstrMOD is freely available at http://osddlinux.osdd.net/raghava/pepstrmod/.","hji,kes",0,0,0,2,0,NA,NA +26691201,An ultra-high-density map as a community resource for discerning the genetic basis of quantitative traits in maize.,"

Background

To safeguard the food supply for the growing human population, it is important to understand and exploit the genetic basis of quantitative traits. Next-generation sequencing technology performs advantageously and effectively in genetic mapping and genome analysis of diverse genetic resources. Hence, we combined re-sequencing technology and a bin map strategy to construct an ultra-high-density bin map with thousands of bin markers to precisely map a quantitative trait locus.

Results

In this study, we generated a linkage map containing 1,151,856 high quality SNPs between Mo17 and B73, which were verified in the maize intermated B73 Mo17 (IBM) Syn10 population. This resource is an excellent complement to existing maize genetic maps available in an online database (iPlant, http://data.maizecode.org/maize/qtl/syn10/ ). Moreover, in this population combined with the IBM Syn4 RIL population, we detected 135 QTLs for flowering time and plant height traits across the two populations. Eighteen known functional genes and twenty-five candidate genes for flowering time and plant height trait were fine-mapped into a 2.21-4.96Mb interval. Map expansion and segregation distortion were also analyzed, and evidence for inadvertent selection of early flowering time in the process of mapping population development was observed. Furthermore, an updated integrated map with 1,151,856 high-quality SNPs, 2,916 traditional markers and 6,618 bin markers was constructed. The data were deposited into the iPlant Discovery Environment (DE), which provides a fundamental resource of genetic data for the maize genetic research community.

Conclusions

Our findings provide basic essential genetic data for the maize genetic research community. An updated IBM Syn10 population and a reliable, verified high-quality SNP set between Mo17 and B73 will aid in future molecular breeding efforts.","hji,kes",0,0,0,2,0,NA,NA +26716705,Correlated confocal and super-resolution imaging by VividSTORM.,"Single-molecule localization microscopy (SMLM) is rapidly gaining popularity in the life sciences as an efficient approach to visualize molecular distribution with nanoscale precision. However, it has been challenging to obtain and analyze such data within a cellular context in tissue preparations. Here we describe a 5-d tissue processing and immunostaining procedure that is optimized for SMLM, and we provide example applications to fixed mouse brain, heart and kidney tissues. We then describe how to perform correlated confocal and 3D-superresolution imaging on these sections, which allows the visualization of nanoscale protein localization within labeled subcellular compartments of identified target cells in a few minutes. Finally, we describe the use of VividSTORM (http://katonalab.hu/index.php/vividstorm), an open-source software for correlated confocal and SMLM image analysis, which facilitates the measurement of molecular abundance, clustering, internalization, surface density and intermolecular distances in a cell-specific and subcellular compartment-restricted manner. The protocol requires only basic skills in tissue staining and microscopy.","hji,kes",0,0,0,2,0,NA,NA +26738481,"TRAPLINE: a standardized and automated pipeline for RNA sequencing data analysis, evaluation and annotation.","

Background

Technical advances in Next Generation Sequencing (NGS) provide a means to acquire deeper insights into cellular functions. The lack of standardized and automated methodologies poses a challenge for the analysis and interpretation of RNA sequencing data. We critically compare and evaluate state-of-the-art bioinformatics approaches and present a workflow that integrates the best performing data analysis, data evaluation and annotation methods in a Transparent, Reproducible and Automated PipeLINE (TRAPLINE) for RNA sequencing data processing (suitable for Illumina, SOLiD and Solexa).

Results

Comparative transcriptomics analyses with TRAPLINE result in a set of differentially expressed genes, their corresponding protein-protein interactions, splice variants, promoter activity, predicted miRNA-target interactions and files for single nucleotide polymorphism (SNP) calling. The obtained results are combined into a single file for downstream analysis such as network construction. We demonstrate the value of the proposed pipeline by characterizing the transcriptome of our recently described stem cell derived antibiotic selected cardiac bodies ('aCaBs').

Conclusion

TRAPLINE supports NGS-based research by providing a workflow that requires no bioinformatics skills, decreases the processing time of the analysis and works in the cloud. The pipeline is implemented in the biomedical research platform Galaxy and is freely accessible via www.sbi.uni-rostock.de/RNAseqTRAPLINE or the specific Galaxy manual page (https://usegalaxy.org/u/mwolfien/p/trapline---manual).","hji,kes",0,0,0,2,0,NA,NA +26753561,iMiRNA-SSF: Improving the Identification of MicroRNA Precursors by Combining Negative Sets with Different Distributions.,"The identification of microRNA precursors (pre-miRNAs) helps in understanding regulator in biological processes. The performance of computational predictors depends on their training sets, in which the negative sets play an important role. In this regard, we investigated the influence of benchmark datasets on the predictive performance of computational predictors in the field of miRNA identification, and found that the negative samples have significant impact on the predictive results of various methods. We constructed a new benchmark set with different data distributions of negative samples. Trained with this high quality benchmark dataset, a new computational predictor called iMiRNA-SSF was proposed, which employed various features extracted from RNA sequences. Experimental results showed that iMiRNA-SSF outperforms three state-of-the-art computational methods. For practical applications, a web-server of iMiRNA-SSF was established at the website http://bioinformatics.hitsz.edu.cn/iMiRNA-SSF/.","hji,kes",0,0,0,2,0,NA,NA +26756459,Intranasal or transdermal nicotine for the treatment of postoperative pain.,"

Background

Acute pain frequently occurs after surgical procedures. Nicotine has been explored as an adjunctive medication for management of postoperative pain.

Objectives

To assess the effect of transdermal or intranasal nicotine administration on postoperative pain, opioid analgesic use, and opioid-related adverse events.

Search methods

We searched MEDLINE (1966 to 20 March 2014), the Cochrane Central Register of Controlled Trials (CENTRAL; 2014, Issue 3), EMBASE (1980 to 20 March 2014), and also databases of ongoing trials (www.controlled-trials.com/andhttp://clinicaltrials.gov/). We re-ran the search on 28 April 2015. We will assess the one study of interest when we update the review.

Selection criteria

We included randomized, placebo-controlled clinical trials that evaluated the effects of perioperative (pre-, intra-, or postoperative) administration of nicotine on postoperative pain, opioid use, and opioid-related adverse events.We excluded all other studies.

Data collection and analysis

Two authors independently screened all titles and abstracts for eligibility and documented reasons for exclusion. In case of disagreement, a third author decided on the inclusion or exclusion of a trial report. When additional information was needed in order to decide if a trial should be included, one of the authors contacted the corresponding author of the trial in question.

Main results

Nine trials (666 participants) evaluated nicotine for postoperative pain. Nicotine may reduce postoperative pain scores at 24 hours by a small amount compared with placebo (eight trials, mean difference -0.88 on a 0 to 10 scale, 95% confidence interval (CI) -1.58 to -0.18; low quality evidence). The effect on pain at one hour and 12 hours postoperatively was less certain (very low quality evidence). Statistical heterogeneity was substantial and not adequately explained by stratification of trials according to type of surgical procedure, smoking status, mode of nicotine administration, timing of administration, or assessed risk of bias. Excluding one trial at high risk of bias resulted in similar findings. The effect of nicotine on postoperative opioid use was uncertain due to small number of participants in the studies. Nicotine probably increases the risk of postoperative nausea (seven trials, RR 1.24, 95% CI 1.03 to 1.50; moderate quality evidence). Three trials assessed sedation but the effect is very uncertain due to the very low quality of evidence. We found no evidence that nicotine increased the risk of vomiting (seven studies, risk difference (RD) 0.03, 95% CI -0.04 to 0.09; low quality evidence). The results from one single small trial were insufficient to establish whether nicotine led to an earlier hospital discharge (very low quality evidence).

Authors' conclusions

Based on evidence of generally low quality, nicotine may reduce postoperative pain at 24 hours compared with placebo, but the effects were relatively small (less than 1 point on a 10 point pain scale) and there was substantial heterogeneity in the results of our analyses. Nicotine does not appear to reduce postoperative use of opioids or opioid-related adverse events but probably increases the risk of nausea. More research is needed to determine the effectiveness of nicotine for postoperative pain and to understand the optimal timing, dose, and method of delivery of nicotine.","hji,kes",0,0,0,2,0,NA,NA +26782957,A randomized clinical trial of neoadjuvant chemotherapy versus neoadjuvant chemoradiotherapy for cancer of the oesophagus or gastro-oesophageal junction.,"

Background

Neoadjuvant therapy improves long-term survival after oesophagectomy, treating oesophageal cancer, but the evidence to date is insufficient to determine which of the two main neoadjuvant therapy types, chemotherapy (nCT) or chemoradiotherapy (nCRT), is more beneficial. We aimed to compare the effects of nCT with those of nCRT.

Patients and methods

This multicentre trial, which was conducted in Sweden and Norway, recruited 181 patients with carcinoma of the oesophagus or the gastro-oesophageal junction who were candidates for curative-intended treatment. The primary end point was histological complete response after neoadjuvant treatment, which has been shown to be correlated with increased long-term survival. Study participants were randomized to nCT or nCRT, followed by surgery with two-field lymphadenectomy. Three cycles of platin/5-fluorouracil were administered in both arms, whereas 40 Gy of concomitant radiotherapy was added in the nCRT arm.

Results

The trial met the primary end point, histological complete response being achieved in 28% after nCRT versus 9% after nCT (P = 0.002). Lymph-node metastases were observed in 62% in the nCT group versus 35% in the nCRT group (P = 0.001). The R0 resection rate was 87% after nCRT and 74% after nCT (P = 0.04). There was no difference in overall survival between the treatment arms.

Conclusion

The addition of radiotherapy to neoadjuvant chemotherapy results in higher histological complete response rate, higher R0 resection rate, and a lower frequency of lymph-node metastases, without significantly affecting survival.

Clinicaltrialsgov

NCT01362127 (https://clinicaltrials.gov; The full study protocol was registered in the Clinical Trials Database).","hji,kes",0,0,0,2,0,NA,NA +26803160,Alloscore: a method for predicting allosteric ligand-protein interactions.,"

Unlabelled

Allosteric ligands have increasingly gained attention as potential therapeutic agents due to their higher target selectivity and lower toxicity compared with classic orthosteric ligands. Despite the great interest in the development of allosteric drugs as a new tactic in drug discovery, the understanding of the ligand-protein interactions underlying allosteric binding represents a key challenge. Herein, we introduce Alloscore, a web server that predicts the binding affinities of allosteric ligand-protein interactions. This method exhibits prominent performance in describing allosteric binding and could be useful in allosteric virtual screening and the structural optimization of allosteric agonists/antagonists.

Availability and implementation

The Alloscore server and tutorials are freely available at http://mdl.shsmu.edu.cn/alloscore

Contact

jian.zhang@sjtu.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26810761,Forty-eight novel mutations causing biotinidase deficiency.,"Biotinidase deficiency is an autosomal recessively inherited disorder that results in the inability to recycle the vitamin biotin and is characterized by neurological and cutaneous symptoms. The symptoms can be ameliorated or prevented by administering pharmacological doses of biotin. Since 2008, approximately 300 samples have been submitted to ARUP's Molecular Sequencing Laboratory for biotinidase mutation analysis. Of these, 48 novel alterations in the biotinidase gene have been identified. Correlating the individual's serum enzymatic activity with the genotype, we have been able to determine the effect of the novel alteration on enzyme activity and, thereby, determine its likelihood of being pathogenic in 44 of these individuals. The novel mutations and uncertain alterations have been added to the database established by ARUP (http://arup.utah.edu/database/BTD/BTD_welcome.phps) to help clinicians make decisions about management and to better counsel their patients based on their genotypes.","hji,kes",0,0,0,2,0,NA,not about the resource +26833341,Integrative analysis for identifying joint modular patterns of gene-expression and drug-response data.,"

Motivation

The underlying relationship between genomic factors and the response of diverse cancer drugs still remains unclear. A number of studies showed that the heterogeneous responses to anticancer treatments of patients were partly associated with their specific changes in gene expression and somatic alterations. The emerging large-scale pharmacogenomic data provide us valuable opportunities to improve existing therapies or to guide early-phase clinical trials of compounds under development. However, how to identify the underlying combinatorial patterns among pharmacogenomics data are still a challenging issue.

Results

In this study, we adopted a sparse network-regularized partial least square (SNPLS) method to identify joint modular patterns using large-scale pairwise gene-expression and drug-response data. We incorporated a molecular network to the (sparse) partial least square model to improve the module accuracy via a network-based penalty. We first demonstrated the effectiveness of SNPLS using a set of simulation data and compared it with two typical methods. Further, we applied it to gene expression profiles for 13 321 genes and pharmacological profiles for 98 anticancer drugs across 641 cancer cell lines consisting of diverse types of human cancers. We identified 20 gene-drug co-modules, each of which consists of 30 cell lines, 137 genes and 2 drugs on average. The majority of identified co-modules have significantly functional implications and coordinated gene-drug associations. The modular analysis here provided us new insights into the molecular mechanisms of how drugs act and suggested new drug targets for therapy of certain types of cancers.

Availability and implementation

A matlab package of SNPLS is available at http://page.amss.ac.cn/shihua.zhang/

Contact

: zsh@amss.ac.cn

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +26893301,Clustering Genes of Common Evolutionary History.,"Phylogenetic inference can potentially result in a more accurate tree using data from multiple loci. However, if the loci are incongruent-due to events such as incomplete lineage sorting or horizontal gene transfer-it can be misleading to infer a single tree. To address this, many previous contributions have taken a mechanistic approach, by modeling specific processes. Alternatively, one can cluster loci without assuming how these incongruencies might arise. Such """"process-agnostic"""" approaches typically infer a tree for each locus and cluster these. There are, however, many possible combinations of tree distance and clustering methods; their comparative performance in the context of tree incongruence is largely unknown. Furthermore, because standard model selection criteria such as AIC cannot be applied to problems with a variable number of topologies, the issue of inferring the optimal number of clusters is poorly understood. Here, we perform a large-scale simulation study of phylogenetic distances and clustering methods to infer loci of common evolutionary history. We observe that the best-performing combinations are distances accounting for branch lengths followed by spectral clustering or Ward's method. We also introduce two statistical tests to infer the optimal number of clusters and show that they strongly outperform the silhouette criterion, a general-purpose heuristic. We illustrate the usefulness of the approach by 1) identifying errors in a previous phylogenetic analysis of yeast species and 2) identifying topological incongruence among newly sequenced loci of the globeflower fly genus Chiastocheta We release treeCl, a new program to cluster genes of common evolutionary history (http://git.io/treeCl).","hji,kes",0,0,0,2,0,NA,NA +26908244,CERAPP: Collaborative Estrogen Receptor Activity Prediction Project.,"

Background

Humans are exposed to thousands of man-made chemicals in the environment. Some chemicals mimic natural endocrine hormones and, thus, have the potential to be endocrine disruptors. Most of these chemicals have never been tested for their ability to interact with the estrogen receptor (ER). Risk assessors need tools to prioritize chemicals for evaluation in costly in vivo tests, for instance, within the U.S. EPA Endocrine Disruptor Screening Program.

Objectives

We describe a large-scale modeling project called CERAPP (Collaborative Estrogen Receptor Activity Prediction Project) and demonstrate the efficacy of using predictive computational models trained on high-throughput screening data to evaluate thousands of chemicals for ER-related activity and prioritize them for further testing.

Methods

CERAPP combined multiple models developed in collaboration with 17 groups in the United States and Europe to predict ER activity of a common set of 32,464 chemical structures. Quantitative structure-activity relationship models and docking approaches were employed, mostly using a common training set of 1,677 chemical structures provided by the U.S. EPA, to build a total of 40 categorical and 8 continuous models for binding, agonist, and antagonist ER activity. All predictions were evaluated on a set of 7,522 chemicals curated from the literature. To overcome the limitations of single models, a consensus was built by weighting models on scores based on their evaluated accuracies.

Results

Individual model scores ranged from 0.69 to 0.85, showing high prediction reliabilities. Out of the 32,464 chemicals, the consensus model predicted 4,001 chemicals (12.3%) as high priority actives and 6,742 potential actives (20.8%) to be considered for further testing.

Conclusion

This project demonstrated the possibility to screen large libraries of chemicals using a consensus of different in silico approaches. This concept will be applied in future projects related to other end points.

Citation

Mansouri K, Abdelaziz A, Rybacka A, Roncaglioni A, Tropsha A, Varnek A, Zakharov A, Worth A, Richard AM, Grulke CM, Trisciuzzi D, Fourches D, Horvath D, Benfenati E, Muratov E, Wedebye EB, Grisoni F, Mangiatordi GF, Incisivo GM, Hong H, Ng HW, Tetko IV, Balabin I, Kancherla J, Shen J, Burton J, Nicklaus M, Cassotti M, Nikolov NG, Nicolotti O, Andersson PL, Zang Q, Politi R, Beger RD, Todeschini R, Huang R, Farag S, Rosenberg SA, Slavov S, Hu X, Judson RS. 2016.

Cerapp

Collaborative Estrogen Receptor Activity Prediction Project. Environ Health Perspect 124:1023-1033; http://dx.doi.org/10.1289/ehp.1510267.","hji,kes",0,0,0,2,0,NA,NA +26910751,MDI-GPU: accelerating integrative modelling for genomic-scale data using GP-GPU computing.,"The integration of multi-dimensional datasets remains a key challenge in systems biology and genomic medicine. Modern high-throughput technologies generate a broad array of different data types, providing distinct--but often complementary--information. However, the large amount of data adds burden to any inference task. Flexible Bayesian methods may reduce the necessity for strong modelling assumptions, but can also increase the computational burden. We present an improved implementation of a Bayesian correlated clustering algorithm, that permits integrated clustering to be routinely performed across multiple datasets, each with tens of thousands of items. By exploiting GPU based computation, we are able to improve runtime performance of the algorithm by almost four orders of magnitude. This permits analysis across genomic-scale data sets, greatly expanding the range of applications over those originally possible. MDI is available here: http://www2.warwick.ac.uk/fac/sci/systemsbiology/research/software/.","hji,kes",0,0,0,2,0,NA,NA +26981408,De novo transcriptome assembly of two contrasting pumpkin cultivars.,"Cucurbita pepo (squash, pumpkin, gourd), a worldwide-cultivated vegetable of American origin, is extremely variable in fruit characteristics. However, the information associated with genes and genetic markers for pumpkin is very limited. In order to identify new genes and to develop genetic markers, we performed a transcriptome analysis (RNA-Seq) of two contrasting pumpkin cultivars. Leaves and female flowers of cultivars, 'Big Moose' with large round fruits and 'Munchkin' with small round fruits, were harvested for total RNA extraction. We obtained a total of 6GB (Big Moose; http://www.ncbi.nlm.nih.gov/Traces/sra/?run=SRR3056882) and 5GB (Munchkin; http://www.ncbi.nlm.nih.gov/Traces/sra/?run=SRR3056883) sequence data (NCBI SRA database SRX1502732 and SRX1502735, respectively), which correspond to 18,055,786 and 14,824,292 150-base reads. After quality assessment, the clean sequences where 17,995,932 and 14,774,486 respectively. The numbers of total transcripts for 'Big Moose' and 'Munchkin' were 84,727 and 68,051, respectively. TransDecoder identified possible coding regions in assembled transcripts. This study provides transcriptome data for two contrasting pumpkin cultivars, which might be useful for genetic marker development and comparative transcriptome analyses.","hji,kes",0,0,0,2,0,NA,references other data resource +27030549,The advantage of laser-capture microdissection over whole tissue analysis in proteomic profiling studies.,"Laser-capture microdissection (LCM) offers a reliable cell population enrichment tool and has been successfully coupled to MS analysis. Despite this, most proteomic studies employ whole tissue lysate (WTL) analysis in the discovery of disease biomarkers and in profiling analyses. Furthermore, the influence of tissue heterogeneity in WTL analysis, nor its impact in biomarker discovery studies have been completely elucidated. In order to address this, we compared previously obtained high resolution MS data from a cohort of 38 breast cancer tissues, of which both LCM enriched tumor epithelial cells and WTL samples were analyzed. Label-free quantification (LFQ) analysis through MaxQuant software showed a significantly higher number of identified and quantified proteins in LCM enriched samples (3404) compared to WTLs (2837). Furthermore, WTL samples displayed a higher amount of missing data compared to LCM both at peptide and protein levels (p-value < 0.001). 2D analysis on co-expressed proteins revealed discrepant expression of immune system and lipid metabolisms related proteins between LCM and WTL samples. We hereby show that LCM better dissected the biology of breast tumor epithelial cells, possibly due to lower interference from surrounding tissues and highly abundant proteins. All data have been deposited in the ProteomeXchange with the dataset identifier PXD002381 (http://proteomecentral.proteomexchange.org/dataset/PXD002381).","hji,kes",0,0,0,2,0,NA,data deposited as referenced +27041235,"A dietary polyphenol resveratrol acts to provide neuroprotection in recurrent stroke models by regulating AMPK and SIRT1 signaling, thereby reducing energy requirements during ischemia.","The above article from European Journal of Neuroscience, published online on 5 March 2013 in Wiley Online Library (http://onlinelibrary.wiley.com/doi/10.1111/ejn.12162/full), has been retracted by agreement between the Editors-in-Chief, Paul Bolam and John Foxe, the authors and John Wiley & Sons Ltd. The retraction has been agreed as Dr Phillip Barber has informed the publisher that he had seen neither the original data nor any version of the manuscript, and had not been involved in the work reported. A subsequent Institutional investigation found evidence of misconduct on the part of the submitting author. Reference Wang, L.-M., Wang, Y.-J., Cui, M., Luo, W.-J., Wang, X.-J., Barber, P.A. & Chen, Z.-Y. (2013) A dietary polyphenol resveratrol acts to provide neuroprotection in recurrent stroke models by regulating AMPK and SIRT1 signaling, thereby reducing energy requirements during ischemia.","hji,kes",0,0,0,2,0,NA,NA +27076459,Inferring microRNA-disease associations by random walk on a heterogeneous network with multiple data sources.,"Since the discovery of the regulatory function of microRNA (miRNA), increased attention has focused on identifying the relationship between miRNA and disease. It has been suggested that computational method are an efficient way to identify potential disease-related miRNAs for further confirmation using biological experiments. In this paper, we first highlighted three limitations commonly associated with previous computational methods. To resolve these limitations, we established disease similarity subnetwork and miRNA similarity subnetwork by integrating multiple data sources, where the disease similarity is composed of disease semantic similarity and disease functional similarity, and the miRNA similarity is calculated using the miRNA-target gene and miRNA-lncRNA (long non-coding RNA) associations. Then, a heterogeneous network was constructed by connecting the disease similarity subnetwork and the miRNA similarity subnetwork using the known miRNA-disease associations. We extended random walk with restart to predict miRNA-disease associations in the heterogeneous network. The leave-one-out cross-validation achieved an average area under the curve (AUC) of 0:8049 across 341 diseases and 476 miRNAs. For five-fold cross-validation, our method achieved an AUC from 0:7970 to 0:9249 for 15 human diseases. Case studies further demonstrated the feasibility of our method to discover potential miRNA-disease associations. An online service for prediction is freely available at http://ifmda.aliapp.com.","hji,kes",0,0,0,2,0,NA,NA +27092486,Profilings of MicroRNAs in the Liver of Common Carp (Cyprinus carpio) Infected with Flavobacterium columnare.,"MicroRNAs (miRNAs) play important roles in regulation of many biological processes in eukaryotes, including pathogen infection and host interactions. Flavobacterium columnare (FC) infection can cause great economic loss of common carp (Cyprinus carpio) which is one of the most important cultured fish in the world. However, miRNAs in response to FC infection in common carp has not been characterized. To identify specific miRNAs involved in common carp infected with FC, we performed microRNA sequencing using livers of common carp infected with and without FC. A total of 698 miRNAs were identified, including 142 which were identified and deposited in the miRbase database (Available online: http://www.mirbase.org/) and 556 had only predicted miRNAs. Among the deposited miRNAs, eight miRNAs were first identified in common carp. Thirty of the 698 miRNAs were differentially expressed miRNAs (DIE-miRNAs) between the FC infected and control samples. From the DIE-miRNAs, seven were selected randomly and their expression profiles were confirmed to be consistent with the microRNA sequencing results using RT-PCR and qRT-PCR. In addition, a total of 27,363 target genes of the 30 DIE-miRNAs were predicted. The target genes were enriched in five Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways, including focal adhesion, extracellular matrix (ECM)-receptor interaction, erythroblastic leukemia viral oncogene homolog (ErbB) signaling pathway, regulation of actin cytoskeleton, and adherent junction. The miRNA expression profile of the liver of common carp infected with FC will pave the way for the development of effective strategies to fight against FC infection.","hji,kes",0,0,0,2,0,NA,NA +27102804,Using expected sequence features to improve basecalling accuracy of amplicon pyrosequencing data.,"

Background

Amplicon pyrosequencing targets a known genetic region and thus inherently produces reads highly anticipated to have certain features, such as conserved nucleotide sequence, and in the case of protein coding DNA, an open reading frame. Pyrosequencing errors, consisting mainly of nucleotide insertions and deletions, are on the other hand likely to disrupt open reading frames. Such an inverse relationship between errors and expectation based on prior knowledge can be used advantageously to guide the process known as basecalling, i.e. the inference of nucleotide sequence from raw sequencing data.

Results

The new basecalling method described here, named Multipass, implements a probabilistic framework for working with the raw flowgrams obtained by pyrosequencing. For each sequence variant Multipass calculates the likelihood and nucleotide sequence of several most likely sequences given the flowgram data. This probabilistic approach enables integration of basecalling into a larger model where other parameters can be incorporated, such as the likelihood for observing a full-length open reading frame at the targeted region. We apply the method to 454 amplicon pyrosequencing data obtained from a malaria virulence gene family, where Multipass generates 20 % more error-free sequences than current state of the art methods, and provides sequence characteristics that allow generation of a set of high confidence error-free sequences.

Conclusions

This novel method can be used to increase accuracy of existing and future amplicon sequencing data, particularly where extensive prior knowledge is available about the obtained sequences, for example in analysis of the immunoglobulin VDJ region where Multipass can be combined with a model for the known recombining germline genes. Multipass is available for Roche 454 data at http://www.cbs.dtu.dk/services/MultiPass-1.0 , and the concept can potentially be implemented for other sequencing technologies as well.","hji,kes",0,0,0,2,0,NA,NA +27113639,Vapocoolants (cold spray) for pain treatment during intravenous cannulation.,"

Background

Intravenous cannulation is a painful procedure that can provoke anxiety and stress. Injecting local anaesthetic can provide analgesia at the time of cannulation, but it is a painful procedure. Topical anaesthetic creams take between 30 and 90 minutes to produce an effect. A quicker acting analgesic allows more timely investigation and treatment. Vapocoolants have been used in this setting, but studies have reported mixed results.

Objectives

To determine effects of vapocoolants on pain associated with intravenous cannulation in adults and children. To explore variables that might affect the performance of vapocoolants, including time required for application, distance from the skin when applied and time to cannulation. To look at adverse effects associated with the use of vapocoolants.

Search methods

We searched the Cochrane Central Register of Controlled Trials (CENTRAL), MEDLINE, EMBASE, Latin American Caribbean Health Sciences Literature (LILACS), the Cumulative Index to Nursing and Allied Health Literature (CINAHL), the Institute for Scientific Information (ISI) Web of Science and the http://clinicaltrials.gov/, http://www.controlled-trials.com/ and http://www.trialscentral.org/ databases to 1 May 2015. We applied no language restrictions. We also scanned the reference lists of included papers.

Selection criteria

We included all blinded and unblinded randomized controlled trials (RTCs) comparing any vapocoolant with placebo or control to reduce pain during intravenous cannulation in adults and children.

Data collection and analysis

Three review authors independently assessed trial quality and extracted data, contacted study authors for additional information and assessed included studies for risk of bias. We collected and analysed data for the primary outcome of pain during cannulation, and for the secondary outcomes of pain associated with application of the vapocoolant, first attempt success rate of intravenous cannulation, adverse events and participant satisfaction. We performed subgroup analyses for the primary outcome to examine differences based on age of participant, type of vapocoolant used, application time of vapocoolant and clinical situation (emergency vs elective). We used random-effects model meta-analysis in RevMan 5.3 and assessed heterogeneity between trial results by examining forest plots and calculating the I(2) statistic.

Main results

We found nine suitable studies of 1070 participants and included them in the qualitative analyses. We included eight studies of 848 participants in the meta-analysis for the primary outcome (pain during intravenous cannulation). Use of vapocoolants resulted in a reduction in pain scores as measured by a linear 100 mm visual analogue scale (VAS 100) compared with controls (difference between means -12.5 mm, 95% confidence interval (CI) -18.7 to -6.4 mm; moderate-quality evidence). We could not include in the meta-analysis one study, which showed no effects of the intervention.Use of vapocoolants resulted in increased pain scores at the time of application as measured by a VAS 100 compared with controls (difference between means 6.3 mm, 95% CI 2.2 to 10.3 mm; four studies, 461 participants; high-quality evidence) and led to no difference in first attempt success compared with controls (risk ratio (RR) 1.00, 95% CI 0.94 to 1.06; six studies, 812 participants; moderate-quality evidence). We documented eight minor adverse events reported in 279 vapocoolant participants (risk difference (RD) 0.03, 95% CI 0 to 0.05; five studies, 551 participants; low quality-evidence).The overall risk of bias of individual studies ranged from low to high, with high risk of bias for performance and detection bias in four studies. Sensitivity analysis showed that exclusion of studies at high or unclear risk of bias did not materially alter the results of this review.

Authors' conclusions

Moderate-quality evidence indicates that use of a vapocoolant immediately before intravenous cannulation reduces pain during the procedure. Use of vapocoolant does not increase the difficulty of cannulation nor cause serious adverse effects but is associated with mild discomfort during application.","hji,kes",0,0,0,2,0,NA,NA +27115029,Longitudinal data on cortical thickness before and after working memory training.,"The data and supplementary information provided in this article relate to our research article """"Task complexity and location specific changes of cortical thickness in executive and salience networks after working memory training"""" (Metzler-Baddeley et al., 2016) [1]. We provide cortical thickness and subcortical volume data derived from parieto-frontal cortical regions and the basal ganglia with the FreeSurfer longitudinal analyses stream (http://surfer.nmr.mgh.harvard.edu [2]) before and after Cogmed working memory training (Cogmed and Cogmed Working Memory Training, 2012) [3]. This article also provides supplementary information to the research article, i.e., within-group comparisons between baseline and outcome cortical thickness and subcortical volume measures, between-group tests of performance changes in cognitive benchmark tests (www.cambridgebrainsciences.com [4]), correlation analyses between performance changes in benchmark tests and training-related structural changes, correlation analyses between the time spent training and structural changes, a scatterplot of the relationship between cortical thickness measures derived from the occipital lobe as control region and the chronological order of the MRI sessions to assess potential scanner drift effects and a post-hoc vertex-wise whole brain analysis with FreeSurfer Qdec (https://surfer.nmr.mgh.harvard.edu/fswiki/Qdec [5]).","hji,kes",0,0,0,2,0,NA,NA +27152837,Evaluation of OASIS QSAR Models Using ToxCast™ in Vitro Estrogen and Androgen Receptor Binding Data and Application in an Integrated Endocrine Screening Approach.,"

Background

Integrative testing strategies (ITSs) for potential endocrine activity can use tiered in silico and in vitro models. Each component of an ITS should be thoroughly assessed.

Objectives

We used the data from three in vitro ToxCast binding assays to assess OASIS, a quantitative structure-activity relationship (QSAR) platform covering both estrogen receptor (ER) and androgen receptor (AR) binding. For stronger binders (described here as AC50 < 1 M), we also examined the relationship of QSAR predictions of ER or AR binding to the results from 18 ER and 10 AR transactivation assays, 72 ER-binding reference compounds, and the in vivo uterotrophic assay.

Methods

NovaScreen binding assay data for ER (human, bovine, and mouse) and AR (human, chimpanzee, and rat) were used to assess the sensitivity, specificity, concordance, and applicability domain of two OASIS QSAR models. The binding strength relative to the QSAR-predicted binding strength was examined for the ER data. The relationship of QSAR predictions of binding to transactivation- and pathway-based assays, as well as to in vivo uterotrophic responses, was examined.

Results

The QSAR models had both high sensitivity (> 75%) and specificity (> 86%) for ER as well as both high sensitivity (92-100%) and specificity (70-81%) for AR. For compounds within the domains of the ER and AR QSAR models that bound with AC50 < 1 M, the QSAR models accurately predicted the binding for the parent compounds. The parent compounds were active in all transactivation assays where metabolism was incorporated and, except for those compounds known to require metabolism to manifest activity, all assay platforms where metabolism was not incorporated. Compounds in-domain and predicted to bind by the ER QSAR model that were positive in ToxCast ER binding at AC50 < 1 M were active in the uterotrophic assay.

Conclusions

We used the extensive ToxCast HTS binding data set to show that OASIS ER and AR QSAR models had high sensitivity and specificity when compounds were in-domain of the models. Based on this research, we recommend a tiered screening approach wherein a) QSAR is used to identify compounds in-domain of the ER or AR binding models and predicted to bind; b) those compounds are screened in vitro to assess binding potency; and c) the stronger binders (AC50 < 1 M) are screened in vivo. This scheme prioritizes compounds for integrative testing and risk assessment. Importantly, compounds that are not in-domain, that are predicted either not to bind or to bind weakly, that are not active in in vitro, that require metabolism to manifest activity, or for which in vivo AR testing is in order, need to be assessed differently.

Citation

Bhhatarai B, Wilson DM, Price PS, Marty S, Parks AK, Carney E. 2016. Evaluation of OASIS QSAR models using ToxCast in vitro estrogen and androgen receptor binding data and application in an integrated endocrine screening approach. Environ Health Perspect 124:1453-1461; http://dx.doi.org/10.1289/EHP184.","hji,kes",0,0,0,2,0,NA,NA +27153703,Phasing for medical sequencing using rare variants and large haplotype reference panels.,"

Motivation

There is growing recognition that estimating haplotypes from high coverage sequencing of single samples in clinical settings is an important problem. At the same time very large datasets consisting of tens and hundreds of thousands of high-coverage sequenced samples will soon be available. We describe a method that takes advantage of these huge human genetic variation resources and rare variant sharing patterns to estimate haplotypes on single sequenced samples. Sharing rare variants between two individuals is more likely to arise from a recent common ancestor and, hence, also more likely to indicate similar shared haplotypes over a substantial flanking region of sequence.

Results

Our method exploits this idea to select a small set of highly informative copying states within a Hidden Markov Model (HMM) phasing algorithm. Using rare variants in this way allows us to avoid iterative MCMC methods to infer haplotypes. Compared to other approaches that do not explicitly use rare variants we obtain significant gains in phasing accuracy, less variation over phasing runs and improvements in speed. For example, using a reference panel of 7420 haplotypes from the UK10K project, we are able to reduce switch error rates by up to 50% when phasing samples sequenced at high-coverage. In addition, a single step rephasing of the UK10K panel, using rare variant information, has a downstream impact on phasing performance. These results represent a proof of concept that rare variant sharing patterns can be utilized to phase large high-coverage sequencing studies such as the 100 000 Genomes Project dataset.

Availability and implementation

A webserver that includes an implementation of this new method and allows phasing of high-coverage clinical samples is available at https://phasingserver.stats.ox.ac.uk/

Contact

marchini@stats.ox.ac.uk

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27164621,Prioritizing Chemicals for Risk Assessment Using Chemoinformatics: Examples from the IARC Monographs on Pesticides.,"

Background

Identifying cancer hazards is the first step towards cancer prevention. The International Agency for Research on Cancer (IARC) Monographs Programme, which has evaluated nearly 1,000 agents for their carcinogenic potential since 1971, typically selects agents for hazard identification on the basis of public nominations, expert advice, published data on carcinogenicity, and public health importance.

Objectives

Here, we present a novel and complementary strategy for identifying agents for hazard evaluation using chemoinformatics, database integration, and automated text mining.

Discussion

To inform selection among a broad range of pesticides nominated for evaluation, we identified and screened nearly 6,000 relevant chemical structures, after which we systematically compiled information on 980 pesticides, creating network maps that allowed cluster visualization by chemical similarity, pesticide class, and publicly available information concerning cancer epidemiology, cancer bioassays, and carcinogenic mechanisms. For the IARC Monograph meetings that took place in March and June 2015, this approach supported high-priority evaluation of glyphosate, malathion, parathion, tetrachlorvinphos, diazinon, p,p'-dichlorodiphenyltrichloroethane (DDT), lindane, and 2,4-dichlorophenoxyacetic acid (2,4-D).

Conclusions

This systematic approach, accounting for chemical similarity and overlaying multiple data sources, can be used by risk assessors as well as by researchers to systematize, inform, and increase efficiency in selecting and prioritizing agents for hazard identification, risk assessment, regulation, or further investigation. This approach could be extended to an array of outcomes and agents, including occupational carcinogens, drugs, and foods. Citation: Guha N, Guyton KZ, Loomis D, Barupal DK. 2016. Prioritizing chemicals for risk assessment using chemoinformatics: examples from the IARC Monographs on Pesticides. Environ Health Perspect 124:1823-1829; http://dx.doi.org/10.1289/EHP186.","hji,kes",0,0,0,2,0,NA,NA +27187006,Targeting vascular (endothelial) dysfunction.,"Cardiovascular diseases are major contributors to global deaths and disability-adjusted life years, with hypertension a significant risk factor for all causes of death. The endothelium that lines the inner wall of the vasculature regulates essential haemostatic functions, such as vascular tone, circulation of blood cells, inflammation and platelet activity. Endothelial dysfunction is an early predictor of atherosclerosis and future cardiovascular events. We review the prognostic value of obtaining measurements of endothelial function, the clinical techniques for its determination, the mechanisms leading to endothelial dysfunction and the therapeutic treatment of endothelial dysfunction. Since vascular oxidative stress and inflammation are major determinants of endothelial function, we have also addressed current antioxidant and anti-inflammatory therapies. In the light of recent data that dispute the prognostic value of endothelial function in healthy human cohorts, we also discuss alternative diagnostic parameters such as vascular stiffness index and intima/media thickness ratio. We also suggest that assessing vascular function, including that of smooth muscle and even perivascular adipose tissue, may be an appropriate parameter for clinical investigations.

Linked articles

This article is part of a themed section on Redox Biology and Oxidative Stress in Health and Disease. To view the other articles in this section visit http://onlinelibrary.wiley.com/doi/10.1111/bph.v174.12/issuetoc.","hji,kes",0,0,0,2,0,NA,NA +27207881,DIANA-mirExTra v2.0: Uncovering microRNAs and transcription factors with crucial roles in NGS expression data.,"Differential expression analysis (DEA) is one of the main instruments utilized for revealing molecular mechanisms in pathological and physiological conditions. DIANA-mirExTra v2.0 (http://www.microrna.gr/mirextrav2) performs a combined DEA of mRNAs and microRNAs (miRNAs) to uncover miRNAs and transcription factors (TFs) playing important regulatory roles between two investigated states. The web server uses as input miRNA/RNA-Seq read count data sets that can be uploaded for analysis. Users can combine their data with 350 small-RNA-Seq and 65 RNA-Seq in-house analyzed libraries which are provided by DIANA-mirExTra v2.0.The web server utilizes miRNA:mRNA, TF:mRNA and TF:miRNA interactions derived from extensive experimental data sets. More than 450 000 miRNA interactions and 2 000 000 TF binding sites from specific or high-throughput techniques have been incorporated, while accurate miRNA TSS annotation is obtained from microTSS experimental/in silico framework. These comprehensive data sets enable users to perform analyses based solely on experimentally supported information and to uncover central regulators within sequencing data: miRNAs controlling mRNAs and TFs regulating mRNA or miRNA expression. The server also supports predicted miRNA:gene interactions from DIANA-microT-CDS for 4 species (human, mouse, nematode and fruit fly). DIANA-mirExTra v2.0 has an intuitive user interface and is freely available to all users without any login requirement.","hji,kes",0,0,0,2,0,NA,data not available - analysis only +27207943,Complex heatmaps reveal patterns and correlations in multidimensional genomic data.,"

Unlabelled

Parallel heatmaps with carefully designed annotation graphics are powerful for efficient visualization of patterns and relationships among high dimensional genomic data. Here we present the ComplexHeatmap package that provides rich functionalities for customizing heatmaps, arranging multiple parallel heatmaps and including user-defined annotation graphics. We demonstrate the power of ComplexHeatmap to easily reveal patterns and correlations among multiple sources of information with four real-world datasets.

Availability and implementation

The ComplexHeatmap package and documentation are freely available from the Bioconductor project: http://www.bioconductor.org/packages/devel/bioc/html/ComplexHeatmap.html

Contact

m.schlesner@dkfz.de

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27240256,Data-driven hypothesis weighting increases detection power in genome-scale multiple testing.,"Hypothesis weighting improves the power of large-scale multiple testing. We describe independent hypothesis weighting (IHW), a method that assigns weights using covariates independent of the P-values under the null hypothesis but informative of each test's power or prior probability of the null hypothesis (http://www.bioconductor.org/packages/IHW). IHW increases power while controlling the false discovery rate and is a practical approach to discovering associations in genomics, high-throughput biology and other large data sets.","hji,kes",0,0,0,2,0,NA,NA +27268407,Tracking medical genetic literature through machine learning.,"There has been remarkable progress in identifying the causes of genetic conditions as well as understanding how changes in specific genes cause disease. Though difficult (and often superficial) to parse, an interesting tension involves emphasis on basic research aimed to dissect normal and abnormal biology versus more clearly clinical and therapeutic investigations. To examine one facet of this question and to better understand progress in Mendelian-related research, we developed an algorithm that classifies medical literature into three categories (Basic, Clinical, and Management) and conducted a retrospective analysis. We built a supervised machine learning classification model using the Azure Machine Learning (ML) Platform and analyzed the literature (1970-2014) from NCBI's Entrez Gene2Pubmed Database (http://www.ncbi.nlm.nih.gov/gene) using genes from the NHGRI's Clinical Genomics Database (http://research.nhgri.nih.gov/CGD/). We applied our model to 376,738 articles: 288,639 (76.6%) were classified as Basic, 54,178 (14.4%) as Clinical, and 24,569 (6.5%) as Management. The average classification accuracy was 92.2%. The rate of Clinical publication was significantly higher than Basic or Management. The rate of publication of article types differed significantly when divided into key eras: Human Genome Project (HGP) planning phase (1984-1990); HGP launch (1990) to publication (2001); following HGP completion to the """"Next Generation"""" advent (2009); the era following 2009. In conclusion, in addition to the findings regarding the pace and focus of genetic progress, our algorithm produced a database that can be used in a variety of contexts including automating the identification of management-related literature.","hji,kes",0,0,0,2,0,NA,NA +27296980,Unbiased probabilistic taxonomic classification for DNA barcoding.,"

Motivation

When targeted to a barcoding region, high-throughput sequencing can be used to identify species or operational taxonomical units from environmental samples, and thus to study the diversity and structure of species communities. Although there are many methods which provide confidence scores for assigning taxonomic affiliations, it is not straightforward to translate these values to unbiased probabilities. We present a probabilistic method for taxonomical classification (PROTAX) of DNA sequences. Given a pre-defined taxonomical tree structure that is partially populated by reference sequences, PROTAX decomposes the probability of one to the set of all possible outcomes. PROTAX accounts for species that are present in the taxonomy but that do not have reference sequences, the possibility of unknown taxonomical units, as well as mislabeled reference sequences. PROTAX is based on a statistical multinomial regression model, and it can utilize any kind of sequence similarity measures or the outputs of other classifiers as predictors.

Results

We demonstrate the performance of PROTAX by using as predictors the output from BLAST, the phylogenetic classification software TIPP, and the RDP classifier. We show that PROTAX improves the predictions of the baseline implementations of TIPP and RDP classifiers, and that it is able to combine complementary information provided by BLAST and TIPP, resulting in accurate and unbiased classifications even with very challenging cases such as 50% mislabeling of reference sequences.

Availability and implementation

Perl/R implementation of PROTAX is available at http://www.helsinki.fi/science/metapop/Software.htm

Contact

panu.somervuo@helsinki.fi

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27307608,Convolutional neural network architectures for predicting DNA-protein binding.,"

Motivation

Convolutional neural networks (CNN) have outperformed conventional methods in modeling the sequence specificity of DNA-protein binding. Yet inappropriate CNN architectures can yield poorer performance than simpler models. Thus an in-depth understanding of how to match CNN architecture to a given task is needed to fully harness the power of CNNs for computational biology applications.

Results

We present a systematic exploration of CNN architectures for predicting DNA sequence binding using a large compendium of transcription factor datasets. We identify the best-performing architectures by varying CNN width, depth and pooling designs. We find that adding convolutional kernels to a network is important for motif-based tasks. We show the benefits of CNNs in learning rich higher-order sequence features, such as secondary motifs and local sequence context, by comparing network performance on multiple modeling tasks ranging in difficulty. We also demonstrate how careful construction of sequence benchmark datasets, using approaches that control potentially confounding effects like positional or motif strength bias, is critical in making fair comparisons between competing methods. We explore how to establish the sufficiency of training data for these learning tasks, and we have created a flexible cloud-based framework that permits the rapid exploration of alternative neural network architectures for problems in computational biology.

Availability and implementation

All the models analyzed are available at http://cnn.csail.mit.edu

Contact

gifford@mit.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27307636,DFLpred: High-throughput prediction of disordered flexible linker regions in protein sequences.,"

Motivation

Disordered flexible linkers (DFLs) are disordered regions that serve as flexible linkers/spacers in multi-domain proteins or between structured constituents in domains. They are different from flexible linkers/residues because they are disordered and longer. Availability of experimentally annotated DFLs provides an opportunity to build high-throughput computational predictors of these regions from protein sequences. To date, there are no computational methods that directly predict DFLs and they can be found only indirectly by filtering predicted flexible residues with predictions of disorder.

Results

We conceptualized, developed and empirically assessed a first-of-its-kind sequence-based predictor of DFLs, DFLpred. This method outputs propensity to form DFLs for each residue in the input sequence. DFLpred uses a small set of empirically selected features that quantify propensities to form certain secondary structures, disordered regions and structured regions, which are processed by a fast linear model. Our high-throughput predictor can be used on the whole-proteome scale; it needs <1 h to predict entire proteome on a single CPU. When assessed on an independent test dataset with low sequence-identity proteins, it secures area under the receiver operating characteristic curve equal 0.715 and outperforms existing alternatives that include methods for the prediction of flexible linkers, flexible residues, intrinsically disordered residues and various combinations of these methods. Prediction on the complete human proteome reveals that about 10% of proteins have a large content of over 30% DFL residues. We also estimate that about 6000 DFL regions are long with =30 consecutive residues.

Availability and implementation

http://biomine.ece.ualberta.ca/DFLpred/

Contact

lkurgan@vcu.edu

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27307645,BioASF: a framework for automatically generating executable pathway models specified in BioPAX.,"

Motivation

Biological pathways play a key role in most cellular functions. To better understand these functions, diverse computational and cell biology researchers use biological pathway data for various analysis and modeling purposes. For specifying these biological pathways, a community of researchers has defined BioPAX and provided various tools for creating, validating and visualizing BioPAX models. However, a generic software framework for simulating BioPAX models is missing. Here, we attempt to fill this gap by introducing a generic simulation framework for BioPAX. The framework explicitly separates the execution model from the model structure as provided by BioPAX, with the advantage that the modelling process becomes more reproducible and intrinsically more modular; this ensures natural biological constraints are satisfied upon execution. The framework is based on the principles of discrete event systems and multi-agent systems, and is capable of automatically generating a hierarchical multi-agent system for a given BioPAX model.

Results

To demonstrate the applicability of the framework, we simulated two types of biological network models: a gene regulatory network modeling the haematopoietic stem cell regulators and a signal transduction network modeling the Wnt/-catenin signaling pathway. We observed that the results of the simulations performed using our framework were entirely consistent with the simulation results reported by the researchers who developed the original models in a proprietary language.

Availability and implementation

The framework, implemented in Java, is open source and its source code, documentation and tutorial are available at http://www.ibi.vu.nl/programs/BioASF CONTACT: j.heringa@vu.nl.","hji,kes",0,0,0,2,0,NA,NA +27334475,PEP_scaffolder: using (homologous) proteins to scaffold genomes.,"

Motivation

Recovering the gene structures is one of the important goals of genome assembly. In low-quality assemblies, and even some high-quality assemblies, certain gene regions are still incomplete; thus, novel scaffolding approaches are required to complete gene regions.

Results

We developed an efficient and fast genome scaffolding method called PEP_scaffolder, using proteins to scaffold genomes. The pipeline aims to recover protein-coding gene structures. We tested the method on human contigs; using human UniProt proteins as guides, the improvement on N50 size was 17% increase with an accuracy of ~97%. PEP_scaffolder improved the proportion of fully covered proteins among all proteins, which was close to the proportion in the finished genome. The method provided a high accuracy of 91% using orthologs of distant species. Tested on simulated fly contigs, PEP_scaffolder outperformed other scaffolders, with the shortest running time and the highest accuracy.

Availability and implementation

The software is freely available at http://www.fishbrowser.org/software/PEP_scaffolder/ CONTACT: lijt@cafs.ac.cnSupplementary information: Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27340894,Feasibility of Deploying Inhaler Sensors to Identify the Impacts of Environmental Triggers and Built Environment Factors on Asthma Short-Acting Bronchodilator Use.,"

Background

Epidemiological asthma research has relied upon self-reported symptoms or healthcare utilization data, and used the residential address as the primary location for exposure. These data sources can be temporally limited, spatially aggregated, subjective, and burdensome for the patient to collect.

Objectives

First, we aimed to test the feasibility of collecting rescue inhaler use data in space-time using electronic sensors. Second, we aimed to evaluate whether these data have the potential to identify environmental triggers and built environment factors associated with rescue inhaler use and to determine whether these findings would be consistent with the existing literature.

Methods

We utilized zero-truncated negative binomial models to identify triggers associated with inhaler use, and implemented three sensitivity analyses to validate our findings.

Results

Electronic sensors fitted on metered dose inhalers tracked 5,660 rescue inhaler use events in space and time for 140 participants from 13 June 2012 to 28 February 2014. We found that the inhaler sensors were feasible in passively collecting objective rescue inhaler use data. We identified several environmental triggers with a positive and significant association with inhaler use, including: AQI, PM10, weed pollen, and mold. Conversely, the spatial distribution of tree cover demonstrated a negative and significant association with inhaler use.

Conclusions

Utilizing a sensor to capture the signal of rescue inhaler use in space-time offered a passive and objective signal of asthma activity. This approach enabled detailed analyses to identify environmental triggers and built environment factors that are associated with asthma symptoms beyond the residential address. The application of these new technologies has the potential to improve our surveillance and understanding of asthma. Citation: Su JG, Barrett MA, Henderson K, Humblet O, Smith T, Sublett JW, Nesbitt L, Hogg C, Van Sickle D, Sublett JL. 2017. Feasibility of deploying inhaler sensors to identify the impacts of environmental triggers and built environment factors on asthma short-acting bronchodilator use. Environ Health Perspect 125:254-261; http://dx.doi.org/10.1289/EHP266.","hji,kes",0,0,0,2,0,NA,NA +27378294,ChAsE: chromatin analysis and exploration tool.,": We present ChAsE, a cross-platform desktop application developed for interactive visualization, exploration and clustering of epigenomic data such as ChIP-seq experiments. ChAsE is designed and developed in close collaboration with several groups of biologists and bioinformaticians with a focus on usability and interactivity. Data can be analyzed through k-means clustering, specifying presence or absence of signal in epigenetic data and performing set operations between clusters. Results can be explored in an interactive heat map and profile plot interface and exported for downstream analysis or as high quality figures suitable for publications.

Availability and implementation

Software, source code (MIT License), data and video tutorials available at http://chase.cs.univie.ac.at CONTACT: : mkarimi@brc.ubc.ca or torsten.moeller@univie.ac.atSupplementary information: Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27378299,Icarus: visualizer for de novo assembly evaluation.,": Data visualization plays an increasingly important role in NGS data analysis. With advances in both sequencing and computational technologies, it has become a new bottleneck in genomics studies. Indeed, evaluation of de novo genome assemblies is one of the areas that can benefit from the visualization. However, even though multiple quality assessment methods are now available, existing visualization tools are hardly suitable for this purpose. Here, we present Icarus-a novel genome visualizer for accurate assessment and analysis of genomic draft assemblies, which is based on the tool QUAST. Icarus can be used in studies where a related reference genome is available, as well as for non-model organisms. The tool is available online and as a standalone application.

Availability and implementation

http://cab.spbu.ru/software/icarus CONTACT: aleksey.gurevich@spbu.ruSupplementary information: Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27387388,Identification and Validation of HCC-specific Gene Transcriptional Signature for Tumor Antigen Discovery.,"A novel two-step bioinformatics strategy was applied for identification of signatures with therapeutic implications in hepatitis-associated HCC. Transcriptional profiles from HBV- and HCV-associated HCC samples were compared with non-tumor liver controls. Resulting HCC modulated genes were subsequently compared with different non-tumor tissue samples. Two related signatures were identified, namely """"HCC-associated"""" and """"HCC-specific"""". Expression data were validated by RNA-Seq analysis carried out on unrelated HCC samples and protein expression was confirmed according to The Human Protein Atlas"""" (http://proteinatlas.org/), a public repository of immunohistochemistry data. Among all, aldo-keto reductase family 1 member B10, and IGF2 mRNA-binding protein 3 were found strictly HCC-specific with no expression in 18/20 normal tissues. Target peptides for vaccine design were predicted for both proteins associated with the most prevalent HLA-class I and II alleles. The described novel strategy showed to be feasible for identification of HCC-specific proteins as highly potential target for HCC immunotherapy.","hji,kes",0,0,0,2,0,NA,references other data resource +27397138,Dynamic Bayesian Network for Accurate Detection of Peptides from Tandem Mass Spectra.,"A central problem in mass spectrometry analysis involves identifying, for each observed tandem mass spectrum, the corresponding generating peptide. We present a dynamic Bayesian network (DBN) toolkit that addresses this problem by using a machine learning approach. At the heart of this toolkit is a DBN for Rapid Identification (DRIP), which can be trained from collections of high-confidence peptide-spectrum matches (PSMs). DRIP's score function considers fragment ion matches using Gaussians rather than fixed fragment-ion tolerances and also finds the optimal alignment between the theoretical and observed spectrum by considering all possible alignments, up to a threshold that is controlled using a beam-pruning algorithm. This function not only yields state-of-the art database search accuracy but also can be used to generate features that significantly boost the performance of the Percolator postprocessor. The DRIP software is built upon a general purpose DBN toolkit (GMTK), thereby allowing a wide variety of options for user-specific inference tasks as well as facilitating easy modifications to the DRIP model in future work. DRIP is implemented in Python and C++ and is available under Apache license at http://melodi-lab.github.io/dripToolkit .","hji,kes",0,0,0,2,0,NA,NA +27399597,Development of a Korean Fracture Risk Score (KFRS) for Predicting Osteoporotic Fracture Risk: Analysis of Data from the Korean National Health Insurance Service.,"

Background

Asian-specific prediction models for estimating individual risk of osteoporotic fractures are rare. We developed a Korean fracture risk prediction model using clinical risk factors and assessed validity of the final model.

Methods

A total of 718,306 Korean men and women aged 50-90 years were followed for 7 years in a national system-based cohort study. In total, 50% of the subjects were assigned randomly to the development dataset and 50% were assigned to the validation dataset. Clinical risk factors for osteoporotic fracture were assessed at the biennial health check. Data on osteoporotic fractures during the follow-up period were identified by ICD-10 codes and the nationwide database of the National Health Insurance Service (NHIS).

Results

During the follow-up period, 19,840 osteoporotic fractures were reported (4,889 in men and 14,951 in women) in the development dataset. The assessment tool called the Korean Fracture Risk Score (KFRS) is comprised of a set of nine variables, including age, body mass index, recent fragility fracture, current smoking, high alcohol intake, lack of regular exercise, recent use of oral glucocorticoid, rheumatoid arthritis, and other causes of secondary osteoporosis. The KFRS predicted osteoporotic fractures over the 7 years. This score was validated using an independent dataset. A close relationship with overall fracture rate was observed when we compared the mean predicted scores after applying the KFRS with the observed risks after 7 years within each 10th of predicted risk.

Conclusion

We developed a Korean specific prediction model for osteoporotic fractures. The KFRS was able to predict risk of fracture in the primary population without bone mineral density testing and is therefore suitable for use in both clinical setting and self-assessment. The website is available at http://www.nhis.or.kr.","hji,kes",0,0,0,2,0,NA,clinical +27408916,"Glyco-centric lectin magnetic bead array (LeMBA) - proteomics dataset of human serum samples from healthy, Barrett׳s esophagus and esophageal adenocarcinoma individuals.","This data article describes serum glycoprotein biomarker discovery and qualification datasets generated using lectin magnetic bead array (LeMBA) - mass spectrometry techniques, """"Serum glycoprotein biomarker discovery and qualification pipeline reveals novel diagnostic biomarker candidates for esophageal adenocarcinoma"""" [1]. Serum samples collected from healthy, metaplastic Barretts esophagus (BE) and esophageal adenocarcinoma (EAC) individuals were profiled for glycoprotein subsets via differential lectin binding. The biomarker discovery proteomics dataset consisting of 20 individual lectin pull-downs for 29 serum samples with a spiked-in internal standard chicken ovalbumin protein has been deposited in the PRIDE partner repository of the ProteomeXchange Consortium with the data set identifier PRIDE: PXD002442. Annotated MS/MS spectra for the peptide identifications can be viewed using MS-Viewer () using search key """"jn7qafftux"""". The qualification dataset contained 6-lectin pulldown-coupled multiple reaction monitoring-mass spectrometry (MRM-MS) data for 41 protein candidates, from 60 serum samples. This dataset is available as a supplemental files with the original publication [1].","hji,kes",0,0,0,2,0,NA,data deposited as referenced +27430689,[Association between polymorphism in Vav3 genes and risk of primary prostatic cancer in Chinese Han population].,"

Objective

To study the associations between genetic variations of Vav3 gene and prostate cancer susceptibility.

Methods

Data were collected in a hospital-based and case-control study of 1 015 prostate cancer cases and 1 068 cancer-free controls collecting from a period of time between 2008 and 2012. Based on the online database, NCBI dbSNP (http: //www.ncbi.nlm.nih.gov/projects/SNP) and SNPinfo (http: //snpinfo.niehs.nih.gov/snpfunc.htm). Functional single nucleotide polymorphisms (SNPs) of Vav3 were screened and genotyped, and assessed their associations with risk of prostate cancer by using logistic regression analysis. Furthermore, the associations between SNPs of Vav3 and some clinicopathological parameters were evaluated.

Results

Among the two SNPs investigated, only Vav3 rs12410676 G>A was associated with decreased prostate cancer risk [additive model, OR=0.80 (0.69-0.93), P=0.003; dominant model, OR=0.81 (0.68-0.97), P=0.022; recessive model, OR=0.54 (0.36-0.82), P=0.004]. The combined effect of Vav3 rs8676 G>A and rs12410676 G>A was found as a decreased prostate cancer risk along with the increased variant alleles (P<0.05). Specifically, participants carrying Vav3 rs12410676 AA/AG genotypes were more likely to be at lower prostate cancer risk, compared with participants carrying GG genotypes, in groups of BMI=25 kg/m(2,) smoking, Gleason>7(4+ 3), and higher invasive prostate cancer. Finally, some positive findings were evidently significant with false positive report probability values at different prior probability levels (0.25, 0.1 and 0.01).

Conclusion

Vav3 SNPs may contribute to the risk of prostate cancer in Eastern Chinese men, but the effect is weak and needs further validation by larger, multicenter and ethnic-based studies.","hji,kes",0,0,0,2,0,NA,NA +27447888,MBMC: An Effective Markov Chain Approach for Binning Metagenomic Reads from Environmental Shotgun Sequencing Projects.,"Metagenomics is a next-generation omics field currently impacting postgenomic life sciences and medicine. Binning metagenomic reads is essential for the understanding of microbial function, compositions, and interactions in given environments. Despite the existence of dozens of computational methods for metagenomic read binning, it is still very challenging to bin reads. This is especially true for reads from unknown species, from species with similar abundance, and/or from low-abundance species in environmental samples. In this study, we developed a novel taxonomy-dependent and alignment-free approach called MBMC (Metagenomic Binning by Markov Chains). Different from all existing methods, MBMC bins reads by measuring the similarity of reads to the trained Markov chains for different taxa instead of directly comparing reads with known genomic sequences. By testing on more than 24 simulated and experimental datasets with species of similar abundance, species of low abundance, and/or unknown species, we report here that MBMC reliably grouped reads from different species into separate bins. Compared with four existing approaches, we demonstrated that the performance of MBMC was comparable with existing approaches when binning reads from sequenced species, and superior to existing approaches when binning reads from unknown species. MBMC is a pivotal tool for binning metagenomic reads in the current era of Big Data and postgenomic integrative biology. The MBMC software can be freely downloaded at http://hulab.ucf.edu/research/projects/metagenomics/MBMC.html .","hji,kes",0,0,0,2,0,NA,NA +27456943,Current Trends of Lung Cancer Surgery and Demographic and Social Factors Related to Changes in the Trends of Lung Cancer Surgery: An Analysis of the National Database from 2010 to 2014.,"

Purpose

We investigated current trends in lung cancer surgery and identified demographic and social factors related to changes in these trends.

Materials and methods

We estimated the incidence of lung cancer surgery using a procedure code-based approach provided by the Health Insurance Review and Assessment Service (http://opendata.hira.or.kr). The population data were obtained every year from 2010 to 2014 from the Korean Statistical Information Service (http://kosis.kr/). The annual percent change (APC) and statistical significance were calculated using the Joinpoint software.

Results

From January 2010 to December 2014, 25,687 patients underwent 25,921 lung cancer surgeries, which increased by 45.1% from 2010 to 2014. The crude incidence rate of lung cancer surgery in each year increased significantly (APC, 9.5; p < 0.05). The male-to-female ratio decreased from 2.1 to 1.6 (APC, -6.3; p < 0.05). The incidence increased in the age group of = 70 years for both sexes (male: APC, 3.7; p < 0.05; female: APC, 5.96; p < 0.05). Furthermore, the proportion of female patients aged = 65 years increased (APC, 7.2; p < 0.05), while that of male patients aged < 65 years decreased (APC, -3.9; p < 0.05). The proportions of segmentectomies (APC, 17.8; p < 0.05) and lobectomies (APC, 7.5; p < 0.05) increased, while the proportion of pneumonectomies decreased (APC, -6.3; p < 0.05). Finally, the proportion of patients undergoing surgery in Seoul increased (APC, 1.1; p < 0.05), while the proportion in other areas decreased (APC, -1.5; p < 0.05).

Conclusion

An increase in the use of lung cancer surgery in elderly patients and female patients, and a decrease in the proportion of patients requiring extensive pulmonary resection were identified. Furthermore, centralization of lung cancer surgery was noted.","hji,kes",0,0,0,2,0,NA,NA +27460614,Classifying Schizophrenia Using Multimodal Multivariate Pattern Recognition Analysis: Evaluating the Impact of Individual Clinical Profiles on the Neurodiagnostic Performance.,"Previous studies have shown that structural brain changes are among the best-studied candidate markers for schizophrenia (SZ) along with functional connectivity (FC) alterations of resting-state (RS) patterns. This study aimed to investigate effects of clinical and sociodemographic variables on the classification by applying multivariate pattern analysis (MVPA) to both gray matter (GM) volume and FC measures in patients with SZ and healthy controls (HC). RS and structural magnetic resonance imaging data (sMRI) from 74 HC and 71 SZ patients were obtained from a Mind Research Network COBRE dataset available via COINS (http://coins.mrn.org/dx). We used a MVPA framework using support-vector machines embedded in a repeated, nested cross-validation to generate a multi-modal diagnostic system and evaluate its generalizability. The dependence of neurodiagnostic performance on clinical and sociodemographic variables was evaluated. The RS classifier showed a slightly higher accuracy (70.5%) compared to the structural classifier (69.7%). The combination of sMRI and RS outperformed single MRI modalities classification by reaching 75% accuracy. The RS based moderator analysis revealed that the neurodiagnostic performance was driven by older SZ patients with an earlier illness onset and more pronounced negative symptoms. In contrast, there was no linear relationship between the clinical variables and neuroanatomically derived group membership measures. This study achieved higher accuracy distinguishing HC from SZ patients by fusing 2 imaging modalities. In addition the results of RS based moderator analysis showed that age of patients, as well as their age at the illness onset were the most important clinical features.","hji,kes",0,0,0,2,0,NA,NA +27466777,QSAR Modelling of Rat Acute Toxicity on the Basis of PASS Prediction.,"The method for QSAR modelling of rat acute toxicity based on the combination of QNA (Quantitative Neighbourhoods of Atoms) descriptors, PASS (Prediction of Activity Spectra for Substances) predictions and self-consistent regression (SCR) is presented. PASS predicted biological activity profiles are used as independent input variables for QSAR modelling with SCR. QSAR models were developed using LD50 values for compounds tested on rats with four types of administration (oral, intravenous, intraperitoneal, subcutaneous). The proposed method was evaluated on the set of compounds tested for acute rat toxicity with oral administration (7286 compounds) used for testing the known QSAR methods in T.E.S.T. 3.0 program (U.S. EPA). The several other sets of compounds tested for acute rat toxicity by different routes of administration selected from SYMYX MDL Toxicity Database were used too. The method was compared with the results of prediction of acute rodent toxicity for noncongeneric sets obtained by ACD/Labs Inc. The test sets were predicted with regards to the applicability domain. Comparison of accuracy for QSAR models obtained separately using QNA descriptors, PASS predictions, nearest neighbours' assessment with consensus models clearly demonstrated the benefits of consensus prediction. Free available web-service for prediction of LD50 values of rat acute toxicity was developed: http://www.pharmaexpert.ru/GUSAR/AcuToxPredict/.","hji,kes",0,0,0,2,0,NA,NA +27502039,Fascin Is Critical for the Maintenance of Breast Cancer Stem Cell Pool Predominantly via the Activation of the Notch Self-Renewal Pathway.,"An emerging dogma shows that tumors are initiated and maintained by a subpopulation of cancer cells that hijack some stem cell features and thus referred to as """"cancer stem cells"""" (CSCs). The exact mechanism that regulates the maintenance of CSC pool remains largely unknown. Fascin is an actin-bundling protein that we have previously demonstrated to be a major regulator of breast cancer chemoresistance and metastasis, two cardinal features of CSCs. Here, we manipulated fascin expression in breast cancer cell lines and used several in vitro and in vivo approaches to examine the relationship between fascin expression and breast CSCs. Fascin knockdown significantly reduced stem cell-like phenotype (CD44hi /CD24lo and ALDH+ ) and reversal of epithelial to mesenchymal transition. Interestingly, expression of the embryonic stem cell transcriptional factors (Oct4, Nanog, Sox2, and Klf4) was significantly reduced when fascin expression was down-regulated. Functionally, fascin-knockdown cells were less competent in forming colonies and tumorspheres, consistent with lower basal self-renewal activity and higher susceptibility to chemotherapy. Fascin effect on CSC chemoresistance and self-renewability was associated with Notch signaling. Activation of Notch induced the relevant downstream targets predominantly in the fascin-positive cells. Limiting-dilution xenotransplantation assay showed higher frequency of tumor-initiating cells in the fascin-positive group. Collectively, our data demonstrated fascin as a critical regulator of breast CSC pool at least partially via activation of the Notch self-renewal signaling pathway and modification of the expression embryonic transcriptional factors. Targeting fascin may halt CSCs and thus presents a novel therapeutic approach for effective treatment of breast cancer. Stem Cells 2016;34:2799-2813 Video Highlight: https://youtu.be/GxS4fJ_Ow-o.","hji,kes",0,0,0,2,0,NA,NA +27507827,Glycomics for Microbes and Microbiologists.,"The recent article """"Lectin-Glycan Interaction Network-Based Identification of Host Receptors of Microbial Pathogenic Adhesins"""" by Ielasi et al. describes a new development in microbial carbohydrate analysis [Ielasi FS, Alioscha-Perez M, Donohue D, Claes S, Sahli H, Schols D, Willaert RG, mBio 7(4):e00584-16, 2016, http://dx.doi.org/10.1128/mbio.00584-16]. Specific carbohydrate ligands have been identified from the patterns of lectin binding to oligosaccharides printed on a chip. The new technique links the output to a comprehensive glycan database and offers a number of data visualization options. The graphs highlight the occurrence of potential ligands, organized by organism, tissue, and patterns of association with disease states. The analysis has successfully predicted novel glycoprotein ligands for microbial lectins, including an interaction of E.coli FimH with HIV gp120.","hji,kes",0,0,0,2,0,NA,NA +27512621,Weighted K-means support vector machine for cancer prediction.,"To date, the support vector machine (SVM) has been widely applied to diverse bio-medical fields to address disease subtype identification and pathogenicity of genetic variants. In this paper, I propose the weighted K-means support vector machine (wKM-SVM) and weighted support vector machine (wSVM), for which I allow the SVM to impose weights to the loss term. Besides, I demonstrate the numerical relations between the objective function of the SVM and weights. Motivated by general ensemble techniques, which are known to improve accuracy, I directly adopt the boosting algorithm to the newly proposed weighted KM-SVM (and wSVM). For predictive performance, a range of simulation studies demonstrate that the weighted KM-SVM (and wSVM) with boosting outperforms the standard KM-SVM (and SVM) including but not limited to many popular classification rules. I applied the proposed methods to simulated data and two large-scale real applications in the TCGA pan-cancer methylation data of breast and kidney cancer. In conclusion, the weighted KM-SVM (and wSVM) increases accuracy of the classification model, and will facilitate disease diagnosis and clinical treatment decisions to benefit patients. A software package (wSVM) is publicly available at the R-project webpage (https://www.r-project.org).","hji,kes",0,0,0,2,0,NA,NA +27531102,iRSpot-EL: identify recombination spots with an ensemble learning approach.,"

Motivation

Coexisting in a DNA system, meiosis and recombination are two indispensible aspects for cell reproduction and growth. With the avalanche of genome sequences emerging in the post-genomic age, it is an urgent challenge to acquire the information of DNA recombination spots because it can timely provide very useful insights into the mechanism of meiotic recombination and the process of genome evolution.

Results

To address such a challenge, we have developed a predictor, called IRSPOT-EL: , by fusing different modes of pseudo K-tuple nucleotide composition and mode of dinucleotide-based auto-cross covariance into an ensemble classifier of clustering approach. Five-fold cross tests on a widely used benchmark dataset have indicated that the new predictor remarkably outperforms its existing counterparts. Particularly, far beyond their reach, the new predictor can be easily used to conduct the genome-wide analysis and the results obtained are quite consistent with the experimental map.

Availability and implementation

For the convenience of most experimental scientists, a user-friendly web-server for iRSpot-EL has been established at http://bioinformatics.hitsz.edu.cn/iRSpot-EL/, by which users can easily obtain their desired results without the need to go through the complicated mathematical equations involved.

Contact

bliu@gordonlifescience.org or bliu@insun.hit.edu.cnSupplementary information: Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27547793,Time-course proteomics dataset monitoring HeLa cells subjected to DTT induced endoplasmic reticulum stress.,"The data described here provide an analysis of the dynamic response of HeLa cell proteome to dithiothreitol (DTT) inducing stress of the endoplasmic reticulum (ER). During ER stress, accumulation of misfolded and unfolded proteins in the lumen of the ER initiates the Unfolded Protein Response (UPR), resulting in a large-scale redistribution of proteins. We used label-free mass spectrometry to monitor the proteomic changes of HeLa cells during a 30-h time course, monitoring eight time points (0, 0.5, 1, 2, 8, 16, 24, and 30h). The data are associated with the research article """"Differential dynamics of the mammalian mRNA and protein expression response to misfolding stress"""" [1], which discusses a core dataset of 1237 proteins. Here, we present the extended dataset of 2131 proteins. The raw mass spectrometry data and the analysis results have been deposited to the ProteomeXchange Consortium (http://proteomecentral.proteomexchange.org) via the PRIDE partner repository with the dataset identifier PRIDE: PXD002039.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +27559155,Fast-SNP: a fast matrix pre-processing algorithm for efficient loopless flux optimization of metabolic models.,"

Motivation

Computation of steady-state flux solutions in large metabolic models is routinely performed using flux balance analysis based on a simple LP (Linear Programming) formulation. A minimal requirement for thermodynamic feasibility of the flux solution is the absence of internal loops, which are enforced using 'loopless constraints'. The resulting loopless flux problem is a substantially harder MILP (Mixed Integer Linear Programming) problem, which is computationally expensive for large metabolic models.

Results

We developed a pre-processing algorithm that significantly reduces the size of the original loopless problem into an easier and equivalent MILP problem. The pre-processing step employs a fast matrix sparsification algorithm-Fast- sparse null-space pursuit (SNP)-inspired by recent results on SNP. By finding a reduced feasible 'loop-law' matrix subject to known directionalities, Fast-SNP considerably improves the computational efficiency in several metabolic models running different loopless optimization problems. Furthermore, analysis of the topology encoded in the reduced loop matrix enabled identification of key directional constraints for the potential permanent elimination of infeasible loops in the underlying model. Overall, Fast-SNP is an effective and simple algorithm for efficient formulation of loop-law constraints, making loopless flux optimization feasible and numerically tractable at large scale.

Availability and implementation

Source code for MATLAB including examples is freely available for download at http://www.aibn.uq.edu.au/cssb-resources under Software. Optimization uses Gurobi, CPLEX or GLPK (the latter is included with the algorithm).

Contact

lars.nielsen@uq.edu.auSupplementary information: Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +27596593,Order restricted inference for oscillatory systems for detecting rhythmic signals.,"

Motivation

Many biological processes, such as cell cycle, circadian clock, menstrual cycles, are governed by oscillatory systems consisting of numerous components that exhibit rhythmic patterns over time. It is not always easy to identify such rhythmic components. For example, it is a challenging problem to identify circadian genes in a given tissue using time-course gene expression data. There is a great potential for misclassifying non-rhythmic as rhythmic genes and vice versa. This has been a problem of considerable interest in recent years. In this article we develop a constrained inference based methodology called Order Restricted Inference for Oscillatory Systems (ORIOS) to detect rhythmic signals. Instead of using mathematical functions (e.g. sinusoidal) to describe shape of rhythmic signals, ORIOS uses mathematical inequalities. Consequently, it is robust and not limited by the biologist's choice of the mathematical model. We studied the performance of ORIOS using simulated as well as real data obtained from mouse liver, pituitary gland and data from NIH3T3, U2OS cell lines. Our results suggest that, for a broad collection of patterns of gene expression, ORIOS has substantially higher power to detect true rhythmic genes in comparison to some popular methods, while also declaring substantially fewer non-rhythmic genes as rhythmic.

Availability and implementation

A user friendly code implemented in R language can be downloaded from http://www.niehs.nih.gov/research/atniehs/labs/bb/staff/peddada/index.cfm CONTACT: peddada@niehs.nih.gov.","hji,kes",0,0,0,2,0,NA,NA +27597880,Functional networks inference from rule-based machine learning models.,"

Background

Functional networks play an important role in the analysis of biological processes and systems. The inference of these networks from high-throughput (-omics) data is an area of intense research. So far, the similarity-based inference paradigm (e.g. gene co-expression) has been the most popular approach. It assumes a functional relationship between genes which are expressed at similar levels across different samples. An alternative to this paradigm is the inference of relationships from the structure of machine learning models. These models are able to capture complex relationships between variables, that often are different/complementary to the similarity-based methods.

Results

We propose a protocol to infer functional networks from machine learning models, called FuNeL. It assumes, that genes used together within a rule-based machine learning model to classify the samples, might also be functionally related at a biological level. The protocol is first tested on synthetic datasets and then evaluated on a test suite of 8 real-world datasets related to human cancer. The networks inferred from the real-world data are compared against gene co-expression networks of equal size, generated with 3 different methods. The comparison is performed from two different points of view. We analyse the enriched biological terms in the set of network nodes and the relationships between known disease-associated genes in a context of the network topology. The comparison confirms both the biological relevance and the complementary character of the knowledge captured by the FuNeL networks in relation to similarity-based methods and demonstrates its potential to identify known disease associations as core elements of the network. Finally, using a prostate cancer dataset as a case study, we confirm that the biological knowledge captured by our method is relevant to the disease and consistent with the specialised literature and with an independent dataset not used in the inference process.

Availability

The implementation of our network inference protocol is available at: http://ico2s.org/software/funel.html.","hji,kes",0,0,0,2,0,NA,NA +27604408,ExonImpact: Prioritizing Pathogenic Alternative Splicing Events.,"Alternative splicing (AS) is a closely regulated process that allows a single gene to encode multiple protein isoforms, thereby contributing to the diversity of the proteome. Dysregulation of the splicing process has been found to be associated with many inherited diseases. However, among the pathogenic AS events, there are numerous """"passenger"""" events whose inclusion or exclusion does not lead to significant changes with respect to protein function. In this study, we evaluate the secondary and tertiary structural features of proteins associated with disease-causing and neutral AS events, and show that several structural features are strongly associated with the pathological impact of exon inclusion. We further develop a machine-learning-based computational model, ExonImpact, for prioritizing and evaluating the functional consequences of hitherto uncharacterized AS events. We evaluated our model using several strategies including cross-validation, and data from the Gene-Tissue Expression (GTEx) and ClinVar databases. ExonImpact is freely available at http://watson.compbio.iupui.edu/ExonImpact.","hji,kes",0,0,0,2,0,NA,NA +27624719,An Excel Spreadsheet Model for States and Districts to Assess the Cost-Benefit of School Nursing Services.,"This paper describes a user-friendly, Excel spreadsheet model and two data collection instruments constructed by the authors to help states and districts perform cost-benefit analyses of school nursing services delivered by full-time school nurses. Prior to applying the model, states or districts need to collect data using two forms: """"Daily Nurse Data Collection Form"""" and the """"Teacher Survey."""" The former is used to record daily nursing activities, including number of student health encounters, number of medications administered, number of student early dismissals, and number of medical procedures performed. The latter is used to obtain estimates for the time teachers spend addressing student health issues. Once inputs are entered in the model, outputs are automatically calculated, including program costs, total benefits, net benefits, and benefit-cost ratio. The spreadsheet model, data collection tools, and instructions are available at the NASN website ( http://www.nasn.org/The/CostBenefitAnalysis ).","hji,kes",0,0,0,2,0,NA,fascinating +27660521,AxIOM: Amphipod crustaceans from insular Posidonia oceanica seagrass meadows.,"

Background

The Neptune grass, Posidonia oceanica (L.) Delile, 1813, is the most widespread seagrass of the Mediterranean Sea. This foundation species forms large meadows that, through habitat and trophic services, act as biodiversity hotspots. In Neptune grass meadows, amphipod crustaceans are one of the dominant groups of vagile invertebrates, forming an abundant and diverse taxocenosis. They are key ecological components of the complex, pivotal, yet critically endangered Neptune grass ecosystems. Nevertheless, comprehensive qualitative and quantitative data about amphipod fauna found in Mediterranean Neptune grass meadows remain scarce, especially in insular locations.

New information

Here, we provide in-depth metadata about AxIOM, a sample-based dataset published on the GBIF portal. AxIOM is based on an extensive and spatially hierarchized sampling design with multiple years, seasons, day periods, and methods. Samples were taken along the coasts of Calvi Bay (Corsica, France) and of the Tavolara-Punta Coda Cavallo Marine Protected Area (Sardinia, Italy). In total, AxIOM contains 187 samples documenting occurrence (1775 records) and abundance (10720 specimens) of amphipod crustaceans belonging to 72 species spanning 29 families. The dataset is available at http://ipt.biodiversity.be/resource?r=axiom.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +27660889,MINFIT: A Spreadsheet-Based Tool for Parameter Estimation in an Equilibrium Speciation Software Program.,"Determination of equilibrium constants describing chemical reactions in the aqueous phase and at solid-water interface relies on inverse modeling and parameter estimation. Although there are existing tools available, the steep learning curve prevents the wider community of environmental engineers and chemists to adopt those tools. Stemming from classical chemical equilibrium codes, MINEQL+ has been one of the most widely used chemical equilibrium software programs. We developed a spreadsheet-based tool, which we are calling MINFIT, that interacts with MINEQL+ to perform parameter estimations that optimize model fits to experimental data sets. MINFIT enables automatic and convenient screening of a large number of parameter sets toward the optimal solutions by calling MINEQL+ to perform iterative forward calculations following either exhaustive equidistant grid search or randomized search algorithms. The combined use of the two algorithms can securely guide the searches for the global optima. We developed interactive interfaces so that the optimization processes are transparent. Benchmark examples including both aqueous and surface complexation problems illustrate the parameter estimation and associated sensitivity analysis. MINFIT is accessible at http://minfit.strikingly.com .","hji,kes",0,0,0,2,0,NA,NA +27752427,Possibilities of using the German Federal States' permanent soil monitoring program for the monitoring of potential effects of genetically modified organisms (GMO).,"

Background

In the Directive 2001/18/EC on the deliberate release of genetically modified organisms (GMO) into the environment, a monitoring of potential risks is prescribed after their deliberate release or placing on the market. Experience and data of already existing monitoring networks should be included. The present paper summarizes the major findings of a project funded by the Federal Agency for Nature Conservation (Nutzungsmglichkeiten der Boden-Dauerbeobachtung der Lnder fr das Monitoring der Umweltwirkungen gentechnisch vernderter Pflanzen. BfN Skripten, Bonn-Bad Godesberg 369, 2014). The full report in german language can be accessed on http://www.bfn.deand is available as Additional file 1. The aim of the project was to check if it is possible to use the German permanent soil monitoring program (PSM) for the monitoring of GMO. Soil organism communities are highly diverse and relevant with respect to the sustainability of soil functions. They are exposed to GMO material directly by feeding or indirectly through food chain interactions. Other impacts are possible due to their close association to soil particles.

Results

The PSM program can be considered as representative with regard to different soil types and ecoregions in Germany, but not for all habitat types relevant for soil organisms. Nevertheless, it is suitable as a basic grid for monitoring the potential effects of GMO on soil invertebrates.

Conclusions

PSM sites should be used to derive reference values, i.e. range of abundance and presence of different relevant species of soil organisms. Based on these references, it is possible to derive threshold values to define the limit of acceptable change or impact. Therefore, a minimum set of sites and minimum set of standardized methods are needed, i.e. characterization of each site, sampling of selected soil organism groups, adequate adaptation of methods for the purpose of monitoring of potential effects of GMO. Finally, and probably most demanding, it is needed to develop a harmonized evaluation concept.","hji,kes",0,0,0,2,0,NA,NA +27796840,An IoT-cloud Based Wearable ECG Monitoring System for Smart Healthcare.,"Public healthcare has been paid an increasing attention given the exponential growth human population and medical expenses. It is well known that an effective health monitoring system can detect abnormalities of health conditions in time and make diagnoses according to the gleaned data. As a vital approach to diagnose heart diseases, ECG monitoring is widely studied and applied. However, nearly all existing portable ECG monitoring systems cannot work without a mobile application, which is responsible for data collection and display. In this paper, we propose a new method for ECG monitoring based on Internet-of-Things (IoT) techniques. ECG data are gathered using a wearable monitoring node and are transmitted directly to the IoT cloud using Wi-Fi. Both the HTTP and MQTT protocols are employed in the IoT cloud in order to provide visual and timely ECG data to users. Nearly all smart terminals with a web browser can acquire ECG data conveniently, which has greatly alleviated the cross-platform issue. Experiments are carried out on healthy volunteers in order to verify the reliability of the entire system. Experimental results reveal that the proposed system is reliable in collecting and displaying real-time ECG data, which can aid in the primary diagnosis of certain heart diseases.","hji,kes",0,0,0,2,0,NA,NA +27878809,Telephone-assisted self-help for parents of children with attention-deficit/hyperactivity disorder who have residual functional impairment despite methylphenidate treatment: a randomized controlled trial.,"

Background

Self-help parenting interventions have been shown to be effective in the management of children with attention-deficit/hyperactivity disorder (ADHD) and may be useful when there are barriers to face-to-face therapist-led parent trainings. Previous studies indicate that behavioral interventions might be a useful adjunct to medication in children with residual ADHD symptoms, and regarding comorbid oppositional symptoms and multiple domains of functional impairment. In the present study, we examined whether a telephone-assisted self-help (TASH) parenting behavioral intervention (written materials plus telephone counseling) enhanced the effects of methylphenidate treatment in children with ADHD.

Methods

In this randomized controlled trial, parents of 103 school-aged children with ADHD and residual functional impairment despite methylphenidate treatment were randomly assigned to either the enhancement group, which received the TASH intervention as adjunct to routine clinical care (including continued medication), or to the active control group, which received routine clinical care only (including continued medication). Parent-completed outcome measures at baseline and at 12months (postassessment) included functional impairment, ADHD symptoms, oppositional defiant disorder (ODD) symptoms, parenting behavior, and parental satisfaction with the intervention (ClinicalTrials.gov: NCT01660425; URL: https://clinicaltrials.gov/ct2/show/NCT01660425).

Results

Intention-to-treat analyses of covariance (ANCOVAs), which controlled for baseline data, revealed significant and moderate intervention effects for ODD symptoms and negative parenting behavior at the postassessment, whereas per-protocol analyses additionally showed significant and moderate effects on functional impairment (primary outcome). Parents expressed high satisfaction with the program.

Conclusions

The TASH program enhances effects of methylphenidate treatment in families who complete the intervention. Thediscontinuation rate of about 30% and comparison between completing and discontinuing families suggest that the program may be more suitable for families with a higher educational level and fewer additional stresses.","hji,kes",0,0,0,2,0,NA,NA +27886717,RUbioSeq+: A multiplatform application that executes parallelized pipelines to analyse next-generation sequencing data.,"

Background and objective

To facilitate routine analysis and to improve the reproducibility of the results, next-generation sequencing (NGS) analysis requires intuitive, efficient and integrated data processing pipelines.

Methods

We have selected well-established software to construct a suite of automated and parallelized workflows to analyse NGS data for DNA-seq (single-nucleotide variants (SNVs) and indels), CNA-seq, bisulfite-seq and ChIP-seq experiments.

Results

Here, we present RUbioSeq+, an updated and extended version of RUbioSeq, a multiplatform application that incorporates a suite of automated and parallelized workflows to analyse NGS data. This new version includes: (i) an interactive graphical user interface (GUI) that facilitates its use by both biomedical researchers and bioinformaticians, (ii) a new pipeline for ChIP-seq experiments, (iii) pair-wise comparisons (case-control analyses) for DNA-seq experiments, (iv) and improvements in the parallelized and multithreaded execution options. Results generated by our software have been experimentally validated and accepted for publication.

Conclusions

RUbioSeq+ is free and open to all users at http://rubioseq.bioinfo.cnio.es/.","hji,kes",0,0,0,2,0,NA,NA +27895719,DRABAL: novel method to mine large high-throughput screening assays using Bayesian active learning.,"

Background

Mining high-throughput screening (HTS) assays is key for enhancing decisions in the area of drug repositioning and drug discovery. However, many challenges are encountered in the process of developing suitable and accurate methods for extracting useful information from these assays. Virtual screening and a wide variety of databases, methods and solutions proposed to-date, did not completely overcome these challenges. This study is based on a multi-label classification (MLC) technique for modeling correlations between several HTS assays, meaning that a single prediction represents a subset of assigned correlated labels instead of one label. Thus, the devised method provides an increased probability for more accurate predictions of compounds that were not tested in particular assays.

Results

Here we present DRABAL, a novel MLC solution that incorporates structure learning of a Bayesian network as a step to model dependency between the HTS assays. In this study, DRABAL was used to process more than 1.4 million interactions of over 400,000 compounds and analyze the existing relationships between five large HTS assays from the PubChem BioAssay Database. Compared to different MLC methods, DRABAL significantly improves the F1Score by about 22%, on average. We further illustrated usefulness and utility of DRABAL through screening FDA approved drugs and reported ones that have a high probability to interact with several targets, thus enabling drug-multi-target repositioning. Specifically DRABAL suggests the Thiabendazole drug as a common activator of the NCP1 and Rab-9A proteins, both of which are designed to identify treatment modalities for the Niemann-Pick type C disease.

Conclusion

We developed a novel MLC solution based on a Bayesian active learning framework to overcome the challenge of lacking fully labeled training data and exploit actual dependencies between the HTS assays. The solution is motivated by the need to model dependencies between existing experimental confirmatory HTS assays and improve prediction performance. We have pursued extensive experiments over several HTS assays and have shown the advantages of DRABAL. The datasets and programs can be downloaded from https://figshare.com/articles/DRABAL/3309562.Graphical abstract.","hji,kes",0,0,0,2,0,NA,NA +27896026,jicbioimage: a tool for automated and reproducible bioimage analysis.,"There has been steady improvement in methods for capturing bioimages. However analysing these images still remains a challenge. The Python programming language provides a powerful and flexible environment for scientific computation. It has a wide range of supporting libraries for image processing but lacks native support for common bioimage formats, and requires specific code to be written to ensure that suitable audit trails are generated and analyses are reproducible. Here we describe the development of a Python tool that: (1) allows users to quickly view and explore microscopy data; (2) generate reproducible analyses, encoding a complete history of image transformations from raw data to final result; and (3) scale up analyses from initial exploration to high throughput processing pipelines, with a minimal amount of extra effort. The tool, jicbioimage, is open source and freely available online at http://jicbioimage.readthedocs.io.","hji,kes",0,0,0,2,0,NA,NA +27976886,MIB: Metal Ion-Binding Site Prediction and Docking Server.,"The structure of a protein determines its biological function(s) and its interactions with other factors; the binding regions tend to be conserved in sequence and structure, and the interacting residues involved are usually in close 3D space. The Protein Data Bank currently contains more than 110000 protein structures, approximately one-third of which contain metal ions. Identifying and characterizing metal ion-binding sites is thus essential for investigating a protein's function(s) and interactions. However, experimental approaches are time-consuming and costly. The web server reported here was built to predict metal ion-binding residues and to generate the predicted metal ion-bound 3D structure. Binding templates have been constructed for regions that bind 12 types of metal ion-binding residues have been used to construct binding templates. The templates include residues within 3.5 of the metal ion, and the fragment transformation method was used for structural comparison between query proteins and templates without any data training. Through the adjustment of scoring functions, which are based on the similarity of structure and binding residues. Twelve kinds of metal ions (Ca2+, Cu2+, Fe3+, Mg2+, Mn2+, Zn2+, Cd2+, Fe2+, Ni2+, Hg2+, Co2+, and Cu+) binding residues prediction are supported. MIB also provides the metal ions docking after prediction. The MIB server is available at http://bioinfo.cmu.edu.tw/MIB/ .","hji,kes",0,0,0,2,0,NA,NA +27993775,GENIUS: web server to predict local gene networks and key genes for biological functions.,"

Summary

GENIUS is a user-friendly web server that uses a novel machine learning algorithm to infer functional gene networks focused on specific genes and experimental conditions that are relevant to biological functions of interest. These functions may have different levels of complexity, from specific biological processes to complex traits that involve several interacting processes. GENIUS also enriches the network with new genes related to the biological function of interest, with accuracies comparable to highly discriminative Support Vector Machine methods.

Availability and implementation

GENIUS currently supports eight model organisms and is freely available for public use at http://networks.bio.puc.cl/genius .

Contact

genius.psbl@gmail.com.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28035026,InMoDe: tools for learning and visualizing intra-motif dependencies of DNA binding sites.,"

Summary

Recent studies have shown that the traditional position weight matrix model is often insufficient for modeling transcription factor binding sites, as intra-motif dependencies play a significant role for an accurate description of binding motifs. Here, we present the Java application InMoDe, a collection of tools for learning, leveraging and visualizing such dependencies of putative higher order. The distinguishing feature of InMoDe is a robust model selection from a class of parsimonious models, taking into account dependencies only if justified by the data while choosing for simplicity otherwise.

Availability and implementation

InMoDe is implemented in Java and is available as command line application, as application with a graphical user-interface, and as an integration into Galaxy on the project website at http://www.jstacs.de/index.php/InMoDe .

Contact

ralf.eggeling@cs.helsinki.fi.","hji,kes",0,0,0,2,0,NA,NA +28039165,PBIT: Pipeline Builder for Identification of drug Targets for infectious diseases.,"

Summary

PBIT (Pipeline Builder for Identification of drug Targets) is an online webserver that has been developed for screening of microbial proteomes for critical features of human drug targets such as being non-homologous to human proteome as well as the human gut microbiota, essential for the pathogen's survival, participation in pathogen-specific pathways etc. The tool has been validated by analyzing 57 putative targets of Candida albicans documented in literature. PBIT integrates various in silico approaches known for drug target identification and will facilitate high-throughput prediction of drug targets for infectious diseases, including multi-pathogenic infections.

Availability and implementation

PBIT is freely accessible at http://www.pbit.bicnirrh.res.in/ .

Contact

thomass@nirrh.res.in.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28066963,Phenotiki: an open software and hardware platform for affordable and easy image-based phenotyping of rosette-shaped plants.,"Phenotyping is important to understand plant biology, but current solutions are costly, not versatile or are difficult to deploy. To solve this problem, we present Phenotiki, an affordable system for plant phenotyping that, relying on off-the-shelf parts, provides an easy to install and maintain platform, offering an out-of-box experience for a well-established phenotyping need: imaging rosette-shaped plants. The accompanying software (with available source code) processes data originating from our device seamlessly and automatically. Our software relies on machine learning to devise robust algorithms, and includes an automated leaf count obtained from 2D images without the need of depth (3D). Our affordable device (~200) can be deployed in growth chambers or greenhouse to acquire optical 2D images of approximately up to 60 adult Arabidopsis rosettes concurrently. Data from the device are processed remotely on a workstation or via a cloud application (based on CyVerse). In this paper, we present a proof-of-concept validation experiment on top-view images of 24 Arabidopsis plants in a combination of genotypes that has not been compared previously. Phenotypic analysis with respect to morphology, growth, color and leaf count has not been performed comprehensively before now. We confirm the findings of others on some of the extracted traits, showing that we can phenotype at reduced cost. We also perform extensive validations with external measurements and with higher fidelity equipment, and find no loss in statistical accuracy when we use the affordable setting that we propose. Device set-up instructions and analysis software are publicly available ( http://phenotiki.com).","hji,kes",0,0,0,2,0,NA,NA +28086253,[Vision Loss after Silicone Oil Surgery].,"Silicone oil is an intraocular tamponade that is essential for the treatment of complicated retinal detachment. As a long-term tamponade, it improves retinal reattachment and visual outcome. Unexpectedly, surgery with silicone oil tamponade may result in irreversible visual loss of unknown origin. In this report, we provide a general overview of unexplained visual loss after surgery with silicone oil. The frequency of such reports has increased continuously in recent years. The German Retina Society - supported by Retinanet (http://retina-net.uni-koeln.de) - has initiated data collection to gather information about such cases, in cooperation with Cologne University Eye Hospital. Ophthalmologists can provide data about cases of unexplained visual loss anonymously via the """"Cologne Clinical Trials Centre"""" or via augenklinik-silikonoel@uk-koeln.de.","hji,kes",0,0,0,2,0,NA,clinical +28130236,H(O)TA: estimation of DNA methylation and hydroxylation levels and efficiencies from time course data.,"

Motivation

Methylation and hydroxylation of cytosines to form 5-methylcytosine (5mC) and 5-hydroxymethylcytosine (5hmC) belong to the most important epigenetic modifications and their vital role in the regulation of gene expression has been widely recognized. Recent experimental techniques allow to infer methylation and hydroxylation levels at CpG dinucleotides but require a sophisticated statistical analysis to achieve accurate estimates.

Results

We present H(O)TA, a software tool based on a stochastic modeling approach, which simultaneously analyzes time course data from hairpin bisulfite sequencing and hairpin oxidative bisulfite sequencing.

Availability and implementation

: https://mosi.uni-saarland.de/HOTA.

Contact

charalampos.kyriakopoulos@uni-saarland.de or verena.wolf@uni-saarland.de.","hji,kes",0,0,0,2,0,NA,NA +28130241,Classification of RNA structure change by 'gazing' at experimental data.,"

Motivation

Mutations (or Single Nucleotide Variants) in folded RiboNucleic Acid structures that cause local or global conformational change are riboSNitches. Predicting riboSNitches is challenging, as it requires making two, albeit related, structure predictions. The data most often used to experimentally validate riboSNitch predictions is Selective 2' Hydroxyl Acylation by Primer Extension, or SHAPE. Experimentally establishing a riboSNitch requires the quantitative comparison of two SHAPE traces: wild-type (WT) and mutant. Historically, SHAPE data was collected on electropherograms and change in structure was evaluated by 'gel gazing.' SHAPE data is now routinely collected with next generation sequencing and/or capillary sequencers. We aim to establish a classifier capable of simulating human 'gazing' by identifying features of the SHAPE profile that human experts agree 'looks' like a riboSNitch.

Results

We find strong quantitative agreement between experts when RNA scientists 'gaze' at SHAPE data and identify riboSNitches. We identify dynamic time warping and seven other features predictive of the human consensus. The classSNitch classifier reported here accurately reproduces human consensus for 167 mutant/WT comparisons with an Area Under the Curve (AUC) above 0.8. When we analyze 2019 mutant traces for 17 different RNAs, we find that features of the WT SHAPE reactivity allow us to improve thermodynamic structure predictions of riboSNitches. This is significant, as accurate RNA structural analysis and prediction is likely to become an important aspect of precision medicine.

Availability and implementation

The classSNitch R package is freely available at http://classsnitch.r-forge.r-project.org .

Contact

alain@email.unc.edu.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28137292,Correcting for cell-type effects in DNA methylation studies: reference-based method outperforms latent variable approaches in empirical studies.,"Based on an extensive simulation study, McGregor and colleagues recently recommended the use of surrogate variable analysis (SVA) to control for the confounding effects of cell-type heterogeneity in DNA methylation association studies in scenarios where no cell-type proportions are available. As their recommendation was mainly based on simulated data, we sought to replicate findings in two large-scale empirical studies. In our empirical data, SVA did not fully correct for cell-type effects, its performance was somewhat unstable, and it carried a risk of missing true signals caused by removing variation that might be linked to actual disease processes. By contrast, a reference-based correction method performed well and did not show these limitations. A disadvantage of this approach is that if reference methylomes are not (publicly) available, they will need to be generated once for a small set of samples. However, given the notable risk we observed for cell-type confounding, we argue that, to avoid introducing false-positive findings into the literature, it could be well worth making this investment.Please see related Correspondence article: https://genomebiology.biomedcentral.com/articles/10/1186/s13059-017-1149-7 and related Research article: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0935-y.","hji,kes",0,0,0,2,0,NA,NA +28172348,ARResT/Interrogate: an interactive immunoprofiler for IG/TR NGS data.,"

Motivation

The study of immunoglobulins and T cell receptors using next-generation sequencing has finally allowed exploring immune repertoires and responses in their immense variability and complexity. Unsurprisingly, their analysis and interpretation is a highly convoluted task.

Results

We thus implemented ARResT/Interrogate, a web-based, interactive application. It can organize and filter large amounts of immunogenetic data by numerous criteria, calculate several relevant statistics, and present results in the form of multiple interconnected visualizations.

Availability and implementation

ARResT/Interrogate is implemented primarily in R, and is freely available at http://bat.infspire.org/arrest/interrogate/

Contact

nikos.darzentas@gmail.com

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28172495,LDAP: a web server for lncRNA-disease association prediction.,"

Motivation

Increasing evidences have demonstrated that long noncoding RNAs (lncRNAs) play important roles in many human diseases. Therefore, predicting novel lncRNA-disease associations would contribute to dissect the complex mechanisms of disease pathogenesis. Some computational methods have been developed to infer lncRNA-disease associations. However, most of these methods infer lncRNA-disease associations only based on single data resource.

Results

In this paper, we propose a new computational method to predict lncRNA-disease associations by integrating multiple biological data resources. Then, we implement this method as a web server for lncRNA-disease association prediction (LDAP). The input of the LDAP server is the lncRNA sequence. The LDAP predicts potential lncRNA-disease associations by using a bagging SVM classifier based on lncRNA similarity and disease similarity.

Availability and implementation

The web server is available at http://bioinformatics.csu.edu.cn/ldap

Contact

jxwang@mail.csu.edu.cn.

Supplimentary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28172632,SEQualyzer: interactive tool for quality control and exploratory analysis of high-throughput RNA structural profiling data.,"

Summary

To serve numerous functional roles, RNA must fold into specific structures. Determining these structures is thus of paramount importance. The recent advent of high-throughput sequencing-based structure profiling experiments has provided important insights into RNA structure and widened the scope of RNA studies. However, as a broad range of approaches continues to emerge, a universal framework is needed to quantitatively ensure consistent and high-quality data. We present SEQualyzer, a visual and interactive application that makes it easy and efficient to gauge data quality, screen for transcripts with high-quality information and identify discordant replicates in structure profiling experiments. Our methods rely on features common to a wide range of protocols and can serve as standards for quality control and analyses.

Availability and implementation

SEQualyzer is written in R, is platform-independent, and is freely available at http://bme.ucdavis.edu/aviranlab/SEQualyzer.

Contact

saviran@ucdavis.edu

Supplementary informantion

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28192776,"CORAL and Nano-QFAR: Quantitative feature - Activity relationships (QFAR) for bioavailability of nanoparticles (ZnO, CuO, Co3O4, and TiO2).","Quantitative feature - activity relationships (QFAR) approach was applied to prediction of bioavailability of metal oxide nanoparticles. ZnO, CuO, Co3O4, and TiO2 nanoxides were considered. The computational model for bioavailability of investigated species is asserted. The model was calculated using the Monte Carlo method. The CORAL free software (http://www.insilico.eu/coral) was used in this study. The developed model was tested by application of three different splits of data into the training and validation sets. So-called, quasi-SMILES are used to represent the conditions of action of metal oxide nanoparticles. A new paradigm of building up predictive models of endpoints related to nanomaterials is suggested. The paradigm is the following """"An endpoint is a mathematical function of available eclectic data (conditions)"""". Recently, the paradigm has been checked up with endpoints related to metal oxide nanoparticles, fullerenes, and multi-walled carbon-nanotubes.","hji,kes",0,0,0,2,0,NA,NA +28238542,Aberrant expression of cell cycle and material metabolism related genes contributes to hepatocellular carcinoma occurrence.,"This study aims to deepen our understanding of the molecular mechanism underlying the occurrence of hepatocellular carcinoma (HCC). We first downloaded a gene expression profile dataset GSE29721 (10 HCC and 10 control samples) from Gene Expression Omnibus database (http://www.ncbi.nlm.nih.gov/geo/). Differentially expressed genes (DEGs) were identified by the paired t-test using limma package. Pathway and functional enrichment analyses were performed with DAVID tools. Transcription factors were annotated with TRANSFAC database and tumor associated genes (TAGs) were annotated with TAG and TSGene databases. Protein-protein interaction (PPI) network was conducted using STRING online tool and function module was further identified with BioNet package. Totally, 527 up-regulated DEGs and 587 down-regulated DEGs were identified. GO functional and KEGG pathway enrichment analyses showed that the up-regulated DEGs were mainly related to cell division and cell cycle, while the down-regulated DEGs were largely related to material metabolism, especially secondary metabolism. Proteins encoded by DEGs CDK1, BUB1, CDC20, NCAPG, NDC80, CDCA8, MAD2L1, CCNB1, CCNA2 and BIRC5 were hub genes with high degrees in the PPI network; further module analysis detected a subnetwork consisting of 55 proteins, such as CYP2B6, ACAA1, BHMT and ALDH2. Taken together, aberrant expression of cell cycle related genes (e.g., CDK1, CCNA2, CCNB1, BUB1, MAD2L1 and CDC20) and material metabolism related genes (e.g., CYP2B6, ACAA1, BHMT and ALDH2) may contribute to HCC occurrence.","hji,kes",0,0,0,2,0,NA,references other data resource +28254065,"biomechZoo: An open-source toolbox for the processing, analysis, and visualization of biomechanical movement data.","It is common for biomechanics data sets to contain numerous dependent variables recorded over time, for many subjects, groups, and/or conditions. These data often require standard sorting, processing, and analysis operations to be performed in order to answer research questions. Visualization of these data is also crucial. This manuscript presents biomechZoo, an open-source toolbox that provides tools and graphical user interfaces to help users achieve these goals. The aims of this manuscript are to (1) introduce the main features of the toolbox, including a virtual three-dimensional environment to animate motion data (Director), a data plotting suite (Ensembler), and functions for the computation of three-dimensional lower-limb joint angles, moments, and power and (2) compare these computations to those of an existing validated system. To these ends, the steps required to process and analyze a sample data set via the toolbox are outlined. The data set comprises three-dimensional marker, ground reaction force (GRF), joint kinematic, and joint kinetic data of subjects performing straight walking and 90 turning manoeuvres. Joint kinematics and kinetics processed within the toolbox were found to be similar to outputs from a commercial system. The biomechZoo toolbox represents the work of several years and multiple contributors to provide a flexible platform to examine time-series data sets typical in the movement sciences. The toolbox has previously been used to process and analyse walking, running, and ice hockey data sets, and can integrate existing routines, such as the KineMat toolbox, for additional analyses. The toolbox can help researchers and clinicians new to programming or biomechanics to process and analyze their data through a customizable workflow, while advanced users are encouraged to contribute additional functionality to the project. Students may benefit from using biomechZoo as a learning and research tool. It is hoped that the toolbox can play a role in advancing research in the movement sciences. The biomechZoo m-files, sample data, and help repositories are available online (http://www.biomechzoo.com) under the Apache 2.0 License. The toolbox is supported for Matlab (r2014b or newer, The Mathworks Inc., Natick, USA) for Windows (Microsoft Corp., Redmond, USA) and Mac OS (Apple Inc., Cupertino, USA).","hji,kes",0,0,0,2,0,NA,NA +28263273,Hospital Administration and Nursing Leadership in Disasters: An Exploratory Study Using Concept Mapping.,"Strong leadership is critical in disaster situations when """"patient surge"""" challenges a hospital's capacity to respond and normally acceptable patterns of care are disrupted. Activation of the emergency operations plan triggers an incident command system structure for leadership decision making. Yet, implementation of the emergency operations plan and incident command system protocols is ultimately subject to nursing and hospital leadership at the service- and unit level. The results of these service-/unit-based leadership decisions have the potential to directly impact staff and patient safety, quality of care, and ultimately, patient outcomes. Despite the critical nature of these events, nurse leaders and administrators receive little education regarding leadership and decision making during disaster events. The purpose of this study is to identify essential competencies of nursing and hospital administrators' leadership during disaster events. An integrative mixed-methods design combining qualitative and quantitative approaches to data collection and analysis was used. Five focus groups were conducted with nurse leaders and hospital administrators at a large urban hospital in the Northeastern United States in a collaborative group process to generate relevant leadership competencies. Concept Systems Incorporated was used to sort, prioritize, and analyze the data (http://conceptsystemsinc.com/). The results suggest that participants' institutional knowledge (of existing resources, communications, processes) and prior disaster experience increase leadership competence.","hji,kes",0,0,0,2,0,NA,NA +28409825,Targeting inflammation to reduce cardiovascular disease risk: a realistic clinical prospect?,"Data from basic science experiments is overwhelmingly supportive of the causal role of immune-inflammatory response(s) at the core of atherosclerosis, and therefore, the theoretical potential to manipulate the inflammatory response to prevent cardiovascular events. However, extrapolation to humans requires care and we still lack definitive evidence to show that interfering in immune-inflammatory processes may safely lessen clinical atherosclerosis. In this review, we discuss key therapeutic targets in the treatment of vascular inflammation, placing basic research in a wider clinical perspective, as well as identifying outstanding questions.

Linked articles

This article is part of a themed section on Targeting Inflammation to Reduce Cardiovascular Disease Risk. To view the other articles in this section visit http://onlinelibrary.wiley.com/doi/10.1111/bph.v174.22/issuetoc and http://onlinelibrary.wiley.com/doi/10.1111/bcp.v82.4/issuetoc.","hji,kes",0,0,0,2,0,NA,NA +28426817,Evaluation of machine learning algorithms and structural features for optimal MRI-based diagnostic prediction in psychosis.,"A relatively large number of studies have investigated the power of structural magnetic resonance imaging (sMRI) data to discriminate patients with schizophrenia from healthy controls. However, very few of them have also included patients with bipolar disorder, allowing the clinically relevant discrimination between both psychotic diagnostics. To assess the efficacy of sMRI data for diagnostic prediction in psychosis we objectively evaluated the discriminative power of a wide range of commonly used machine learning algorithms (ridge, lasso, elastic net and L0 norm regularized logistic regressions, a support vector classifier, regularized discriminant analysis, random forests and a Gaussian process classifier) on main sMRI features including grey and white matter voxel-based morphometry (VBM), vertex-based cortical thickness and volume, region of interest volumetric measures and wavelet-based morphometry (WBM) maps. All possible combinations of algorithms and data features were considered in pairwise classifications of matched samples of healthy controls (N = 127), patients with schizophrenia (N = 128) and patients with bipolar disorder (N = 128). Results show that the selection of feature type is important, with grey matter VBM (without data reduction) delivering the best diagnostic prediction rates (averaging over classifiers: schizophrenia vs. healthy 75%, bipolar disorder vs. healthy 63% and schizophrenia vs. bipolar disorder 62%) whereas algorithms usually yielded very similar results. Indeed, those grey matter VBM accuracy rates were not even improved by combining all feature types in a single prediction model. Further multi-class classifications considering the three groups simultaneously made evident a lack of predictive power for the bipolar group, probably due to its intermediate anatomical features, located between those observed in healthy controls and those found in patients with schizophrenia. Finally, we provide MRIPredict (https://www.nitrc.org/projects/mripredict/), a free tool for SPM, FSL and R, to easily carry out voxelwise predictions based on VBM images.","hji,kes",0,0,0,2,0,NA,NA +28444590,Transcripts of pectin-degrading enzymes and isolation of complete cDNA sequence of a pectate lyase gene induced by coffee white stem borer (Xylotrechus quadripes) in the bark tissue of Coffea canephora (robusta coffee).,"Of the two commercially cultivated coffee (Coffea) species, C. arabica (arabica) is highly susceptible and C. canephora (robusta) is highly resistant to the insect pest Xylotrechus quadripes (Coleoptera: Cerambycidae), commonly known as coffee white stem borer (CWSB). We constructed a forward-subtracted cDNA library by Suppression Subtractive Hybridization (SSH) from robusta bark tissue for profiling genes induced by CWSB infestation. Among the 265 unigenes of the SSH EST library, 7 unigenes (5 contigs and 2 singletons) matching different pectin-degrading enzymes were discovered. These ESTs matched one pectate lyase, three polygalacturonases, and one pectin acetylesterase gene. Quantitative real-time PCR (qRT-PCR) revealed that CWSB infestation strongly induces the pectate lyase gene at 72h. Complete cDNA sequence of the pectate lyase gene was obtained through 3' and 5' RACE reactions. It was a 1595bp long sequence that included full CDS and both UTRs. Against C. canephora genome sequences in Coffee Genome Hub database ( http://coffee-genome.org/ ), it had 22 matches to different pectate lyase genes mapped on 9 of the 11 pseudochromosomes, the top match being Cc07_g00190 Pectate lyase. In NCBI database, it matched pectate lyase sequences of several plants. Apart from C. canephora, the closest pectate lyase matches were from Sesamum indicum and Nicotiana tabacum. The pectinolytic enzymes discovered here are thought to play a role in the production of oligogalacturonides (OGs) which act as Damage-Associated Molecular Pattern (DAMP) signals eliciting innate immunity in plants. The pectate lyase gene, induced by CWSB infestation, along with other endogenous pectinolytic enzymes and CWSB-specific elicitors, may be involved in triggering basal defense responses to protect the CWSB-damaged tissue against pathogens, as well as to contain CWSB in robusta.","hji,kes",0,0,0,2,0,NA,NA +28449110,veqtl-mapper: variance association mapping for molecular phenotypes.,"

Motivation

Genetic loci associated with the variance of phenotypic traits have been of recent interest as they can be signatures of genetic interactions, gene by environment interactions, parent of origin effects and canalization. We present a fast efficient tool to map loci affecting variance of gene expression and other molecular phenotypes in cis. Results: Applied to the publicly available Geuvadis gene expression dataset, we identify 816 loci associated with variance of gene expression using an additive model, and 32 showing differences in variance between homozygous and heterozygous alleles, signatures of parent of origin effects.

Availability and implementation

Documentation and links to source code and binaries for linux can be found at https://funpopgen.github.io/veqm/ .

Contact

andrew.brown@unige.ch.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28453624,Inferring Rates and Length-Distributions of Indels Using Approximate Bayesian Computation.,"The most common evolutionary events at the molecular level are single-base substitutions, as well as insertions and deletions (indels) of short DNA segments. A large body of research has been devoted to develop probabilistic substitution models and to infer their parameters using likelihood and Bayesian approaches. In contrast, relatively little has been done to model indel dynamics, probably due to the difficulty in writing explicit likelihood functions. Here, we contribute to the effort of modeling indel dynamics by presenting SpartaABC, an approximate Bayesian computation (ABC) approach to infer indel parameters from sequence data (either aligned or unaligned). SpartaABC circumvents the need to use an explicit likelihood function by extracting summary statistics from simulated sequences. First, summary statistics are extracted from the input sequence data. Second, SpartaABC samples indel parameters from a prior distribution and uses them to simulate sequences. Third, it computes summary statistics from the simulated sets of sequences. By computing a distance between the summary statistics extracted from the input and each simulation, SpartaABC can provide an approximation to the posterior distribution of indel parameters as well as point estimates. We study the performance of our methodology and show that it provides accurate estimates of indel parameters in simulations. We next demonstrate the utility of SpartaABC by studying the impact of alignment errors on the inference of positive selection. A C ++ program implementing SpartaABC is freely available in http://spartaabc.tau.ac.il.","hji,kes",0,0,0,2,0,NA,NA +28462382,GATA4 Is Sufficient to Establish Jejunal Versus Ileal Identity in the Small Intestine.,"

Background & aims

Patterning of the small intestinal epithelium along its cephalocaudal axis establishes three functionally distinct regions: duodenum, jejunum, and ileum. Efficient nutrient assimilation and growth depend on the proper spatial patterning of specialized digestive and absorptive functions performed by duodenal, jejunal, and ileal enterocytes. When enterocyte function is disrupted by disease or injury, intestinal failure can occur. One approach to alleviate intestinal failure would be to restore lost enterocyte functions. The molecular mechanisms determining regionally defined enterocyte functions, however, are poorly delineated. We previously showed that GATA binding protein 4 (GATA4) is essential to define jejunal enterocytes. The goal of this study was to test the hypothesis that GATA4 is sufficient to confer jejunal identity within the intestinal epithelium.

Methods

To test this hypothesis, we generated a novel Gata4 conditional knock-in mouse line and expressed GATA4 in the ileum, where it is absent.

Results

We found that GATA4-expressing ileum lost ileal identity. The global gene expression profile of GATA4-expressing ileal epithelium aligned more closely with jejunum and duodenum rather than ileum. Focusing on jejunal vs ileal identity, we defined sets of jejunal and ileal genes likely to be regulated directly by GATA4 to suppress ileal identity and promote jejunal identity. Furthermore, our study implicates GATA4 as a transcriptional repressor of fibroblast growth factor 15 (Fgf15), which encodes an enterokine that has been implicated in an increasing number of human diseases.

Conclusions

Overall, this study refines our understanding of an important GATA4-dependent molecular mechanism to pattern the intestinal epithelium along its cephalocaudal axis by elaborating on GATA4's function as a crucial dominant molecular determinant of jejunal enterocyte identity. Microarray data from this study have been deposited into NCBI Gene Expression Omnibus (http://www.ncbi.nlm.nih.gov/geo) and are accessible through GEO series accession number GSE75870.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +28472230,OMSim: a simulator for optical map data.,"

Motivation

The Bionano Genomics platform allows for the optical detection of short sequence patterns in very long DNA molecules (up to 2.5 Mbp). Molecules with overlapping patterns can be assembled to generate a consensus optical map of the entire genome. In turn, these optical maps can be used to validate or improve de novo genome assembly projects or to detect large-scale structural variation in genomes. Simulated optical map data can assist in the development and benchmarking of tools that operate on those data, such as alignment and assembly software. Additionally, it can help to optimize the experimental setup for a genome of interest. Such a simulator is currently not available.

Results

We have developed a simulator, OMSim, that produces synthetic optical map data that mimics real Bionano Genomics data. These simulated data have been tested for compatibility with the Bionano Genomics Irys software system and the Irys-scaffolding scripts. OMSim is capable of handling very large genomes (over 30 Gbp) with high throughput and low memory requirements.

Availability and implementation

The Python simulation tool and a cross-platform graphical user interface are available as open source software under the GNU GPL v2 license ( http://www.bioinformatics.intec.ugent.be/omsim ).

Contact

jan.fostier@ugent.be.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28472263,A deep learning framework for improving long-range residue-residue contact prediction using a hierarchical strategy.,"

Motivation

Residue-residue contacts are of great value for protein structure prediction, since contact information, especially from those long-range residue pairs, can significantly reduce the complexity of conformational sampling for protein structure prediction in practice. Despite progresses in the past decade on protein targets with abundant homologous sequences, accurate contact prediction for proteins with limited sequence information is still far from satisfaction. Methodologies for these hard targets still need further improvement.

Results

We presented a computational program DeepConPred, which includes a pipeline of two novel deep-learning-based methods (DeepCCon and DeepRCon) as well as a contact refinement step, to improve the prediction of long-range residue contacts from primary sequences. When compared with previous prediction approaches, our framework employed an effective scheme to identify optimal and important features for contact prediction, and was only trained with coevolutionary information derived from a limited number of homologous sequences to ensure robustness and usefulness for hard targets. Independent tests showed that 59.33%/49.97%, 64.39%/54.01% and 70.00%/59.81% of the top L/5, top L/10 and top 5 predictions were correct for CASP10/CASP11 proteins, respectively. In general, our algorithm ranked as one of the best methods for CASP targets.

Availability and implementation

All source data and codes are available at http://166.111.152.91/Downloads.html .

Contact

hgong@tsinghua.edu.cn or zengjy321@tsinghua.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28490512,Preconceptional and gestational weight trajectories and risk of delivering a small-for-gestational-age baby in rural Gambia.,"Background: Maternal nutritional status is a key determinant of small for gestational age (SGA), but some knowledge gaps remain, particularly regarding the role of the energy balance entering pregnancy.Objective: We investigated how preconceptional and gestational weight trajectories (summarized by individual-level traits) are associated with SGA risk in rural Gambia.Design: The sample comprised 670 women in a trial with serial weight data (7310 observations) that were available before and during pregnancy. Individual trajectories from 6 mo before conception to 30 wk of gestation were produced with the use of multilevel modeling. Summary traits were expressed as weight z scores [weight z score at 3 mo preconception (zwt-3 mo), weight z score at conception, weight z score at 3 mo postconception, weight z score at 7 mo postconception (zwt+7 mo), and conditional measures that represented the change from the preceding time] and were related to SGA risk with the use of Poisson regression with confounder adjustment; linear splines were used to account for nonlinearity.Results: Maternal weight at each time point had a consistent nonlinear relation with SGA risk. For example, the zwt-3 mo estimate was stronger in women with values =0.5 (RR: 0.736; 95% CI: 0.594, 0.910) than in women with values >0.5 (RR: 0.920; 95% CI: 0.682, 1.241). The former group had the highest observed SGA prevalence. Focusing on weight change, only conditional zwt+7 mo was associated with SGA and only in women with values >-0.5 (RR: 0.579; 95% CI: 0.463, 0.724).Conclusions: Protection against delivering an SGA neonate offered by greater preconceptional or gestational weight may be most pronounced in more undernourished and vulnerable women. Independent of this possibility, greater second- and third-trimester weight gain beyond a threshold may be protective. This trial was registered at http://www.isrctn.com/ as ISRCTN49285450.","hji,kes",0,0,0,2,0,NA,NA +28530006,Kindergarten/Elementary School Teachers and Web-based Oral Health-Related Resources: An Exploration.,"

Purpose

The percentage of U.S. children with poor oral health continues to be high. Kindergarten/elementary school educators could play an important role in teaching students about oral health promotion. The objectives were to assess which oral health-related web-based resources teachers consider most helpful and how teachers' attitudes, knowledge, and behavioural intentions concerning oral health-related teaching change between before and after having access to a resource website.

Materials and methods

Web-based survey data were collected from 95 kindergarten/elementary school educators before and after they accessed a website with oral health-related information for teachers (web-link: http://media.dent.umich.edu/teachoralhealth/index.html).

Results

Most teachers accessed lesson plans about 'Teeth and smiling' (90%) and 'Taking care of your teeth' (88%) and the fewest accessed information about 'Nutrition and health' (42%) and 'Information for parents' (39%). On average, all materials were perceived as useful (5-point scale with 5='very useful', range=3.80 to 4.04). Responses to the question on how important dental health is for a child's ability to learn improved significantly from before to after the educational intervention (5-point scale with 5='very important', 3.78 vs 4.44). Knowledge increased and behavioural intentions improved as well. The percentage of teachers who reported that they had included oral health-related material in the past was 47% and the percentage who intended to include it in the future was 65% (p<0.001).

Conclusions

Providing kindergarten/elementary school educators with web-based resource materials improves their attitudes, increases their knowledge and leads to positive behavioural intentions concerning educating their students about oral health.","hji,kes",0,0,0,2,0,NA,NA +28537071,CLMSVault: A Software Suite for Protein Cross-Linking Mass-Spectrometry Data Analysis and Visualization.,"Protein cross-linking mass spectrometry (CL-MS) enables the sensitive detection of protein interactions and the inference of protein complex topology. The detection of chemical cross-links between protein residues can identify intra- and interprotein contact sites or provide physical constraints for molecular modeling of protein structure. Recent innovations in cross-linker design, sample preparation, mass spectrometry, and software tools have significantly improved CL-MS approaches. Although a number of algorithms now exist for the identification of cross-linked peptides from mass spectral data, a dearth of user-friendly analysis tools represent a practical bottleneck to the broad adoption of the approach. To facilitate the analysis of CL-MS data, we developed CLMSVault, a software suite designed to leverage existing CL-MS algorithms and provide intuitive and flexible tools for cross-platform data interpretation. CLMSVault stores and combines complementary information obtained from different cross-linkers and search algorithms. CLMSVault provides filtering, comparison, and visualization tools to support CL-MS analyses and includes a workflow for label-free quantification of cross-linked peptides. An embedded 3D viewer enables the visualization of quantitative data and the mapping of cross-linked sites onto PDB structural models. We demonstrate the application of CLMSVault for the analysis of a noncovalent Cdc34-ubiquitin protein complex cross-linked under different conditions. CLMSVault is open-source software (available at https://gitlab.com/courcelm/clmsvault.git ), and a live demo is available at http://democlmsvault.tyerslab.com/ .","hji,kes",0,0,0,2,0,NA,NA +28579402,MIDAS: Mining differentially activated subpaths of KEGG pathways from multi-class RNA-seq data.,"Pathway based analysis of high throughput transcriptome data is a widely used approach to investigate biological mechanisms. Since a pathway consists of multiple functions, the recent approach is to determine condition specific sub-pathways or subpaths. However, there are several challenges. First, few existing methods utilize explicit gene expression information from RNA-seq. More importantly, subpath activity is usually an average of statistical scores, e.g., correlations, of edges in a candidate subpath, which fails to reflect gene expression quantity information. In addition, none of existing methods can handle multiple phenotypes. To address these technical problems, we designed and implemented an algorithm, MIDAS, that determines condition specific subpaths, each of which has different activities across multiple phenotypes. MIDAS utilizes gene expression quantity information fully and the network centrality information to determine condition specific subpaths. To test performance of our tool, we used TCGA breast cancer RNA-seq gene expression profiles with five molecular subtypes. 36 differentially activate subpaths were determined. The utility of our method, MIDAS, was demonstrated in four ways. All 36 subpaths are well supported by the literature information. Subsequently, we showed that these subpaths had a good discriminant power for five cancer subtype classification and also had a prognostic power in terms of survival analysis. Finally, in a performance comparison of MIDAS to a recent subpath prediction method, PATHOME, our method identified more subpaths and much more genes that are well supported by the literature information.

Availability

http://biohealth.snu.ac.kr/software/MIDAS/.","hji,kes",0,0,0,2,0,NA,NA +28590671,Genetics of the Connectome and the ENIGMA Project,"Here we give an overview of a worldwide effort, called the ENIGMA Consortium (http://enigma.ini.usc.edu), which unites scientists worldwide to determine how variants in our genetic code influence the brain, and how 12 major diseases affect the brain worldwide. At the time of writing, ENIGMA involves over 500 scientists from 185 institutions worldwide, working together on around 30 projects to discover factors that may help or harm the brain. By pooling genome-wide genomic data and brain imaging from over 33,000 people, ENIGMA has been able to identify single-nucleotide differences in the genome that are associated with differences in human brain structure and function. Given the broad interest in brain connectivity and the factors that affect it, we outline some tactics adopted by ENIGMA to discover specific genes that affect the brain; then we describe how ENIGMA is extending these methods to discover genetic influences on brain connectivity.","hji,kes",0,0,0,2,0,NA,NA +28597489,"Development of a web-based application and multicountry analysis framework for assessing interdicted infections and cost-utility of screening donated blood for HIV, HCV and HBV.","

Background and objectives

Most countries test donations for HIV, HCV and HBV using serology with or without nucleic acid testing (NAT). Cost-utility analyses provide information on the relative value of different screening options. The aim of this project was to develop an open access risk assessment and cost-utility analysis web-tool for assessing HIV, HCV and HBV screening options (http://www.isbtweb.org/working-parties/transfusion-transmitted-infectious-diseases/). An analysis for six countries (Brazil, Ghana, the Netherlands, South Africa, Thailand and USA) was conducted.

Materials and methods

Four strategies; (1) antibody assays (Abs) for HIV and HCV + HBsAg, (2) antibody assays that include antigens for HIV and HCV (Combo) + HBsAg, (3) NAT in minipools of variable size (MP NAT) and (4) individual donation (ID) NAT can be evaluated using the tool. Country-specific data on donors, donation testing results, recipient outcomes and costs are entered using the online interface. Results obtained include the number infections interdicted using each screening options, and the (incremental and average) cost-utility of the options.

Results

In each of the six countries evaluated, the use of antibody assays is cost effective or even cost saving. NAT has varying cost-utility depending on the setting, and where adopted, the incremental cost-utility exceeds any previously defined or proposed threshold in each country.

Conclusion

The web-tool allows an assessment of infectious units interdicted and value for money of different testing strategies. Regardless of gross national income (GNI) per capita, countries appear willing to dedicate healthcare resources to blood supply safety in excess of that for other sectors of health care.","hji,kes",0,0,0,2,0,NA,NA +28605774,GenomeHubs: simple containerized setup of a custom Ensembl database and web server for any species.,"

Database url

http://GenomeHubs.org.As the generation and use of genomic datasets is becoming increasingly common in all areas of biology, the need for resources to collate, analyse and present data from one or more genome projects is becoming more pressing. The Ensembl platform is a powerful tool to make genome data and cross-species analyses easily accessible through a web interface and a comprehensive application programming interface. Here we introduce GenomeHubs, which provide a containerized environment to facilitate the setup and hosting of custom Ensembl genome browsers. This simplifies mirroring of existing content and import of new genomic data into the Ensembl database schema. GenomeHubs also provide a set of analysis containers to decorate imported genomes with results of standard analyses and functional annotations and support export to flat files, including EMBL format for submission of assemblies and annotations to International Nucleotide Sequence Database Collaboration.","hji,kes",0,0,0,2,0,NA,NA +28610126,"Taxonomic description and 3D modelling of a new species of myzostomid (Annelida, Myzostomida) associated with black corals from Madagascar.","Eenymeenymyzostoma nigrocorallium n. sp. is the first species of myzostomid worm associated with black corals to be described. Endoparasitic specimens of E. nigrocorallium were found associated with three species of antipatharians on the Great Reef of Toliara. Individuals inhabit the gastrovascular ducts of their hosts and evidence of infestation is, most of the time, not visible externally. Phylogenetic analyses based on 18S rDNA, 16S rDNA and COI data indicate a close relation to Eenymeenymyzostoma cirripedium, the only other species of the genus. The morphology of E. nigrocorallium is very unusual compared to that of the more conventional E. cirripedium. The new species has five pairs of extremely reduced parapodia located on the body margin and no introvert, cirri or lateral organs. Individuals are hermaphroditic, with the male and female gonads both being located dorsally in the trunk. It also has a highly developed parenchymo-muscular layer on the ventral side, and the digestive system lies in the middle part of the trunk. A three-dimensional digital model of this worm's body plan has been constructed whereby the external morphology and in toto views of the observed organ systems (nervous, digestive and reproductive) can be viewed on-screen: http://doi.org/10.13140/RG.2.2.17911.21923.","hji,kes",0,0,0,2,0,NA,NA +28612849,"Psyllids, It's What's on the Inside That Counts: Community Cross Talk Facilitates Prophage Interactions.","Despite the availability of massive microbial community data sets (e.g., metagenomes), there is still a lack of knowledge on what molecular mechanisms facilitate cross talk between microbes and prophage within a community context. A study published in mSphere by Jain and colleagues (M. Jain, L. A. Fleites, and D. W. Gabriel, mSphere 2:e00171-17, 2017, https://doi.org/10.1128/mSphereDirect.00171-17) reports on an intriguing new twist of how a prophage of the bacterium """"Candidatus Liberibacter asiaticus"""" may have its lytic cycle suppressed partly because of a protein that is expressed by a cooccurring bacterium, Wolbachia. Both of these microbes coexist along with other microbial tenants inside their sap-feeding insect host, a psyllid. Although these results are still preliminary and alternative hypotheses need to be tested, these results suggest an interesting new dimension on how regulation of microbial genomes occurs in a community context.","hji,kes",0,0,0,2,0,NA,NA +28633385,TIminer: NGS data mining pipeline for cancer immunology and immunotherapy.,"

Summary

Recently, a number of powerful computational tools for dissecting tumor-immune cell interactions from next-generation sequencing data have been developed. However, the assembly of analytical pipelines and execution of multi-step workflows are laborious and involve a large number of intermediate steps with many dependencies and parameter settings. Here we present TIminer, an easy-to-use computational pipeline for mining tumor-immune cell interactions from next-generation sequencing data. TIminer enables integrative immunogenomic analyses, including: human leukocyte antigens typing, neoantigen prediction, characterization of immune infiltrates and quantification of tumor immunogenicity.

Availability and implementation

TIminer is freely available at http://icbi.i-med.ac.at/software/timiner/timiner.shtml.

Contact

zlatko.trajanoski@i-med.ac.at.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28666322,SVM-dependent pairwise HMM: an application to protein pairwise alignments.,"

Motivation

Methods able to provide reliable protein alignments are crucial for many bioinformatics applications. In the last years many different algorithms have been developed and various kinds of information, from sequence conservation to secondary structure, have been used to improve the alignment performances. This is especially relevant for proteins with highly divergent sequences. However, recent works suggest that different features may have different importance in diverse protein classes and it would be an advantage to have more customizable approaches, capable to deal with different alignment definitions.

Results

Here we present Rigapollo, a highly flexible pairwise alignment method based on a pairwise HMM-SVM that can use any type of information to build alignments. Rigapollo lets the user decide the optimal features to align their protein class of interest. It outperforms current state of the art methods on two well-known benchmark datasets when aligning highly divergent sequences.

Availability and implementation

A Python implementation of the algorithm is available at http://ibsquare.be/rigapollo.

Contact

wim.vranken@vub.be.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +28698795,Semi-automated Modular Program Constructor for physiological modeling: Building cell and organ models.,"The Modular Program Constructor (MPC) is an open-source Java based modeling utility, built upon JSim's Mathematical Modeling Language (MML) ( http://www.physiome.org/jsim/) that uses directives embedded in model code to construct larger, more complicated models quickly and with less error than manually combining models. A major obstacle in writing complex models for physiological processes is the large amount of time it takes to model the myriad processes taking place simultaneously in cells, tissues, and organs. MPC replaces this task with code-generating algorithms that take model code from several different existing models and produce model code for a new JSim model. This is particularly useful during multi-scale model development where many variants are to be configured and tested against data. MPC encodes and preserves information about how a model is built from its simpler model modules, allowing the researcher to quickly substitute or update modules for hypothesis testing. MPC is implemented in Java and requires JSim to use its output. MPC source code and documentation are available at http://www.physiome.org/software/MPC/.","hji,kes",0,0,0,2,0,NA,NA +28711973,PIWI-interacting RNAs as novel regulators of pancreatic beta cell function.,"

Aims/hypothesis

P-element induced Wimpy testis (PIWI)-interacting RNAs (piRNAs) are small non-coding RNAs that interact with PIWI proteins and guide them to silence transposable elements. They are abundantly expressed in germline cells and play key roles in spermatogenesis. There is mounting evidence that piRNAs are also present in somatic cells, where they may accomplish additional regulatory tasks. The aim of this study was to identify the piRNAs expressed in pancreatic islets and to determine whether they are involved in the control of beta cell activities.

Methods

piRNA profiling of rat pancreatic islets was performed by microarray analysis. The functions of piRNAs were investigated by silencing the two main Piwi genes or by modulating the level of selected piRNAs in islet cells.

Results

We detected about 18,000 piRNAs in rat pancreatic islets, many of which were differentially expressed throughout islet postnatal development. Moreover, we identified changes in the level of several piRNAs in the islets of Goto-Kakizaki rats, a well-established animal model of type 2 diabetes. Silencing of Piwil2 or Piwil4 genes in adult rat islets caused a reduction in the level of several piRNAs and resulted in defective insulin secretion and increased resistance of the cells to cytokine-induced cell death. Furthermore, overexpression in the islets of control animals of two piRNAs that are upregulated in diabetic rats led to a selective defect in glucose-induced insulin release.

Conclusions/interpretation

Our results provide evidence for a role of PIWI proteins and their associated piRNAs in the control of beta cell functions, and suggest a possible involvement in the development of type 2 diabetes.

Data availability

Data have been deposited in Gene Expression Omnibus repository under the accession number GSE93792. Data can be accessed via the following link: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?token=ojklueugdzehpkv&acc=GSE93792.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +28723463,The Global Prevalence of Infections in Urology (GPUI) Study: A Worldwide Surveillance Study in Urology Patients.,"The GPIU study is currently the only study registering health care-associated urogenital tract infections, especially in urology patients, in an ongoing surveillance protocol that can help to deliver data on adequate empirical antibiotic therapy in hospitalised urology patients according to guideline recommendations. The annual GPIU study will continue to be performed in November of each year under the URL http://gpiu.esiu.org/.","hji,kes",0,0,0,2,0,NA,NA +28783153,PECAN: library-free peptide detection for data-independent acquisition tandem mass spectrometry data.,"Data-independent acquisition (DIA) is an emerging mass spectrometry (MS)-based technique for unbiased and reproducible measurement of protein mixtures. DIA tandem mass spectrometry spectra are often highly multiplexed, containing product ions from multiple cofragmenting precursors. Detecting peptides directly from DIA data is therefore challenging; most DIA data analyses require spectral libraries. Here we present PECAN (http://pecan.maccosslab.org), a library-free, peptide-centric tool that robustly and accurately detects peptides directly from DIA data. PECAN reports evidence of detection based on product ion scoring, which enables detection of low-abundance analytes with poor precursor ion signal. We demonstrate the chromatographic peak picking accuracy and peptide detection capability of PECAN, and we further validate its detection with data-dependent acquisition and targeted analyses. Lastly, we used PECAN to build a plasma proteome library from DIA data and to query known sequence variants.","hji,kes",0,0,0,2,0,NA,NA +28787666,ERas is constitutively expressed in full term placenta of pregnant cows.,"ERas is a new gene recently found in mouse embryonic stem (ES) cells and localized on the X chromosome. It plays a role in mouse ES cell survival and is constitutively active without any mutations. It was also found to be responsible for the maintenance of quiescence of the hepatic stellate cells (HSCs), liver-resident mesenchymal stem cells, the activation of which results in liver fibrosis. This gene was not present in human ES cells. ERas was found to be activated in a significant population of human gastric cancer, where ERAS may play a crucial role in gastric cancer cell survival and metastases to liver via down-regulation of E-cadherin. ERas gene has been found to be expressed both in ES cells and adult tissues of cynomolgus monkey. Cynomolgus ERAS did not promote cell proliferation or induce tumor formation. ERAS was also detected in normal and neoplastic urothelium of the urinary bladder in cattle, where bovine ERAS formed a constitutive complex with platelet derived growth factor receptor (PDGFR) resulting in the activation of AKT signaling. Here, molecular and morphological findings of ERAS in the full term placenta of pregnant cows have been investigated for the first time. ERAS was studied by reverse transcriptase PCR (RT-PCR). Alignment of the sequence detects a 100% identity with all transcript variant bovine ERas mRNAs, present in the GenBank database (http://www.ncbi.nlm.nih.gov). Furthermore, ERAS was detected by Western blot and investigated by real time PCR that revealed an amount of ERAS more than ERAS found in normal bovine urothelium but less than ERAS present in the liver. Immunohistochemical examination revealed the presence of ERAS protein both at the level of plasma membrane and in cytoplasm of epithelial cells lining caruncular crypts and in trophoblasts of villi. An evident ERAS immunoreactivity was also seen throughout the chorionic and uterine gland epithelium. Although this is not a functional study and further investigations will be warranted, it is conceivable that ERAS may have pleiotropic effects in the placenta, some of which, like normal urothelial cells, might lead to activation of AKT pathway. We speculate that ERAS may play a key role in cellular processes such as cell differentiation and movement. Accordingly, we believe it may be an important factor involved in trophoblast invasiveness via AKT signaling pathway. Therefore, ERas gene is a functional gene which contributes to homeostasis of bovine placenta.","hji,kes",0,0,0,2,0,NA,references other data resource +28812439,Evaluating disease prediction models using a cohort whose covariate distribution differs from that of the target population.,"Personal predictive models for disease development play important roles in chronic disease prevention. The performance of these models is evaluated by applying them to the baseline covariates of participants in external cohort studies, with model predictions compared to subjects' subsequent disease incidence. However, the covariate distribution among participants in a validation cohort may differ from that of the population for which the model will be used. Since estimates of predictive model performance depend on the distribution of covariates among the subjects to which it is applied, such differences can cause misleading estimates of model performance in the target population. We propose a method for addressing this problem by weighting the cohort subjects to make their covariate distribution better match that of the target population. Simulations show that the method provides accurate estimates of model performance in the target population, while un-weighted estimates may not. We illustrate the method by applying it to evaluate an ovarian cancer prediction model targeted to US women, using cohort data from participants in the California Teachers Study. The methods can be implemented using open-source code for public use as the R-package RMAP (Risk Model Assessment Package) available at http://stanford.edu/~ggong/rmap/ .","hji,kes",0,0,0,2,0,NA,NA +28817627,Analysing researchers' outreach efforts and the association with publication metrics: A case study of Kudos.,"With the growth of scholarly collaboration networks and social communication platforms, members of the scholarly community are experimenting with their approach to disseminating research outputs, in an effort to increase their audience and outreach. However, from a researcher's point of view, it is difficult to determine whether efforts to make work more visible are worthwhile (in terms of the association with publication metrics) and within that, difficult to assess which platform or network is most effective for sharing work and connecting to a wider audience. We undertook a case study of Kudos (https://www.growkudos.com), a web-based service that claims to help researchers increase the outreach of their publications, to examine the most effective tools for sharing publications online, and to investigate which actions are associated with improved metrics. We extracted a dataset from Kudos of 830,565 unique publications claimed by authors, for which 20,775 had actions taken to explain or share via Kudos, and for 4,867 of these full text download data from publishers was available. Findings show that researchers are most likely to share their work on Facebook, but links shared on Twitter are more likely to be clicked on. A Mann-Whitney U test revealed that a treatment group (publications having actions in Kudos) had a significantly higher median average of 149 full text downloads (23.1% more) per publication as compared to a control group (having no actions in Kudos) with a median average of 121 full text downloads per publication. These findings suggest that performing actions on publications, such as sharing, explaining, or enriching, could help to increase the number of full text downloads of a publication.","hji,kes",0,0,0,2,0,NA,not about the resource +28880980,Progressive Tinnitus Management Level 3 Skills Education: A 5-Year Clinical Retrospective.,"

Purpose

The primary purpose of this study was to determine whether progressive tinnitus management Level 3 skills education workshops conducted at the Bay Pines and Boston Veterans Affairs hospitals result in consistent use of the presented tinnitus management strategies by patients 1-5 years after completing the workshops.

Method

In fiscal year (FY) 2015, the tinnitus workshop follow-up form was mailed to all veterans who completed the Level 3 workshops between FY 2010 and FY 2014. Data were compiled to determine which, if any, of the skills taught in the workshops were being used 1-5 years after completion of the workshops and the impact on quality-of-life indicators.

Results

All self-management skills were being utilized up to 5 years postcompletion; therapeutic sound was utilized the most. The majority of patients reported an improved ability to manage reactions to tinnitus and improved quality-of-life indicators. Over 90% of patients from both sites recommended the program to others with tinnitus.

Conclusion

The self-management skills taught in the progressive tinnitus management Level 3 workshops are sustained over time even when limited resources prevent the full complement of workshops or the involvement of mental health services. The workshops can also be successfully implemented through remote delivery via videoconferencing (telehealth).

Supplemental materials

https://doi.org/10.23641/asha.5370883.","hji,kes",0,0,0,2,0,NA,NA +28886603,Review of Epidemiological Studies of Drinking-Water Turbidity in Relation to Acute Gastrointestinal Illness.,"

Background

Turbidity has been used as an indicator of microbiological contamination of drinking water in time-series studies attempting to discern the presence of waterborne gastrointestinal illness; however, the utility of turbidity as a proxy exposure measure has been questioned.

Objectives

We conducted a review of epidemiological studies of the association between turbidity of drinking-water supplies and incidence of acute gastrointestinal illness (AGI), including a synthesis of the overall weight of evidence. Our goal was to evaluate the potential for causal inference from the studies.

Methods

We identified 14 studies on the topic (distinct by region, time period and/or population). We evaluated each study with regard to modeling approaches, potential biases, and the strength of evidence. We also considered consistencies and differences in the collective results.

Discussion

Positive associations between drinking-water turbidity and AGI incidence were found in different cities and time periods, and with both unfiltered and filtered supplies. There was some evidence for a stronger association at higher turbidity levels. The studies appeared to adequately adjust for confounding. There was fair consistency in the notable lags between turbidity measurement and AGI identification, which fell between 6 and 10 d in many studies.

Conclusions

The observed associations suggest a detectable incidence of waterborne AGI from drinking water in the systems and time periods studied. However, some discrepant results indicate that the association may be context specific. Combining turbidity with seasonal and climatic factors, additional water quality measures, and treatment data may enhance predictive modeling in future studies. https://doi.org/10.1289/EHP1090.","hji,kes",0,0,0,2,0,NA,NA +28918286,Identifying combinatorial biomarkers by association rule mining in the CAMD Alzheimer's database.,"The concept of combinatorial biomarkers was conceived when it was noticed that simple biomarkers are often inadequate for recognizing and characterizing complex diseases. Here we present an algorithmic search method for complex biomarkers which may predict or indicate Alzheimer's disease (AD) and other kinds of dementia. We show that our method is universal since it can describe any Boolean function for biomarker discovery. We applied data mining techniques that are capable to uncover implication-like logical schemes with detailed quality scoring. The new SCARF program was applied for the Tucson, Arizona based Critical Path Institute's CAMD database, containing laboratory and cognitive test data for 5821 patients from the placebo arm of clinical trials of large pharmaceutical companies, and consequently, the data is much more reliable than numerous other databases for dementia. The results of our study on this larger than 5800-patient cohort suggest beneficial effects of high B12 vitamin level, negative effects of high sodium levels or high AST (aspartate aminotransferase) liver enzyme levels to cognition. As an example for a more complex and quite surprising rule: Low or normal blood glucose level with either low cholesterol or high serum sodium would also increase the probability of bad cognition with a 3.7 multiplier. The source code of the new SCARF program is publicly available at http://pitgroup.org/static/scarf.zip.","hji,kes",0,0,0,2,0,NA,NA +28967123,"Antidepressant-like effects of BU10119, a novel buprenorphine analogue with mixed κ/μ receptor antagonist properties, in mice.","

Background and purpose

The receptor antagonists have potential for treating neuropsychiatric disorders. We have investigated the in vivo pharmacology of a novel buprenorphine analogue, BU10119, for the first time.

Experimental approach

To determine the opioid pharmacology of BU10119 (0.3-3mgkg-1 , i.p.) in vivo, the warm-water tail-withdrawal assay was applied in adult male CD1 mice. A range of behavioural paradigms was used to investigate the locomotor effects, rewarding properties and antidepressant or anxiolytic potential of BU10119. Additional groups of mice were exposed to a single (1נ2h) or repeated restraint stress (3 daily 2h) to determine the ability of BU10119 to block stress-induced analgesia.

Key results

BU10119 alone was without any antinociceptive activity. BU10119 (1mgkg-1 ) was able to block U50,488, buprenorphine and morphine-induced antinociception. The antagonist effects of BU10119 in the tail-withdrawal assay reversed between 24 and 48h. BU10119 was without significant locomotor or rewarding effects. BU10119 (1mgkg-1 ) significantly reduced the latency to feed in the novelty-induced hypophagia task and reduced immobility time in the forced swim test, compared to saline-treated animals. There were no significant effects of BU10119 in either the elevated plus maze or the light-dark box. Both acute and repeated restraint stress-induced analgesia were blocked by pretreatment with BU10119 (1mgkg-1 ). Parallel stress-induced increases in plasma corticosterone were not affected.

Conclusions and implications

BU10119 is a mixed / receptor antagonist with relatively short-duration antagonist activity. Based on these preclinical data, BU10119 has therapeutic potential for the treatment of depression and other stress-induced conditions.

Linked articles

This article is part of a themed section on Emerging Areas of Opioid Pharmacology. To view the other articles in this section visit http://onlinelibrary.wiley.com/doi/10.1111/bph.v175.14/issuetoc.","hji,kes",0,0,0,2,0,NA,NA +28974379,Structural-functional diversity of the natural oligopeptides.,"Natural oligopeptides may regulate nearly all vital processes. To date, the chemical structures of many oligopeptides have been identified from >2000 organisms representing all the biological kingdoms. Wehave considered a number of mathematical (sequence length), chemical, physical, and biological featuresof an array of natural oligopeptides on the basis of the oligopeptide EROP-Moscow database (http://erop.inbi.ras.ru, 15,351 entries) data. There is the substantial difference of these substances from polypeptide molecules of proteins according to their physicochemical characteristics. These characteristics may be critical for understanding the molecular mechanisms of the action of oligopeptides that lead to the development of physiological effects.","hji,kes",0,0,0,2,0,NA,not about the resource +28981577,'Multi-omic' data analysis using O-miner.,"Innovations in -omics technologies have driven advances in biomedical research. However, integrating and analysing the large volumes of data generated from different high-throughput -omics technologies remain a significant challenge to basic and clinical scientists without bioinformatics skills or access to bioinformatics support. To address this demand, we have significantly updated our previous O-miner analytical suite, to incorporate several new features and data types to provide an efficient and easy-to-use Web tool for the automated analysis of data from '-omics' technologies. Created from a biologist's perspective, this tool allows for the automated analysis of large and complex transcriptomic, genomic and methylomic data sets, together with biological/clinical information, to identify significantly altered pathways and prioritize novel biomarkers/targets for biological validation. Our resource can be used to analyse both in-house data and the huge amount of publicly available information from array and sequencing platforms. Multiple data sets can be easily combined, allowing for meta-analyses. Here, we describe the analytical pipelines currently available in O-miner and present examples of use to demonstrate its utility and relevance in maximizing research output. O-miner Web server is free to use and is available at http://www.o-miner.org.","hji,kes",0,0,0,2,0,NA,NA +28985876,The readability of psychosocial wellness patient resources: improving surgical outcomes.,"

Background

Patient education is increasingly accessed with online resources and is essential for patient satisfaction and clinical outcomes. The average American adult reads at a seventh grade level, and the National Institute of Health (NIH) and the American Medical Association (AMA) recommend that information be written at a sixth-grade reading level. Health literacy plays an important role in the disease course and outcomes of all patients, including those with depression and likely other psychiatric disorders, although this is an area in need of further study. The purpose of this study was to collect and analyze written, online mental health resources on the Veterans Health Administration (VA) website, and other websites, using readability assessment instruments.

Methods

An internet search was performed to identify written patient education information regarding mental health from the VA (the VA Mental Health Website) and top-rated psychiatric hospitals. Seven mental health topics were included in the analysis: generalized anxiety disorder, bipolar, major depressive disorder, posttraumatic stress disorder, schizophrenia, substance abuse, and suicide. Readability analyses were performed using the Gunning Fog Index, the Flesch-Kincaid Grade Level, the Coleman-Liau Index, the SMOG Readability Formula, and the Automated Readability Index. These scores were then combined into a Readability Consensus score. A two-tailed t-test was used to compare the mean values, and statistical significance was set at P<0.05.

Results

Twelve of the best hospitals for psychiatry 2016-2017 were identified. Nine had educational material. Six of the nine cited the same resource, The StayWell Company, LLC (StayWell Company, LLC; Yardley, PA), for at least one of the mental health topics analyzed. The VA mental health website (http://www.mentalhealth.va.gov) had a significantly higher readability consensus than six of the top psychiatric hospitals (P<0.05, P=0.0067, P=0.019, P=0.041, P=0.0093, P=0.0054, and P=0.0093). The overall average readability consensus for mental health information on all websites analyzed was 9.52.

Conclusions

Online resources for mental health disorders are more complex than recommended by the NIH and AMA. Efforts to improve readability of mental health and psychosocial wellness resources could benefit patient understanding and outcomes, especially in patients with lower literacy. Surgical outcomes are correlated with patient mental health and psychosocial wellness and thus can be improved with more appropriate levels of readability of psychosocial wellness resources.","hji,kes",0,0,0,2,0,NA,NA +28991473,INTerface Builder: A Fast Protein-Protein Interface Reconstruction Tool.,"INTerface Builder (INTBuilder) is a fast, easy-to-use program to compute protein-protein interfaces. It is designed to retrieve interfaces from molecular docking software outputs in an empirically determined linear complexity. INTBuilder directly reads the output formats of popular docking programs like ATTRACT, HEX, MAXDo, and ZDOCK, as well as a more generic format and Protein Data Bank (PDB) files. It identifies interacting surfaces at both residue and atom resolutions. INTerface Builder is an open source software written in C and freely available for noncommercial use (CeCILL license) at https://www.lcqb.upmc.fr/INTBuilder .","hji,kes",0,0,0,2,0,NA,NA +29028266,CircPro: an integrated tool for the identification of circRNAs with protein-coding potential.,"

Summary

Circular RNAs (circRNAs), a novel class of endogenous RNAs, are widespread in eukaryotic cells. Emerging roles in diverse biological processes suggest that circRNA is a promising key player in RNA world. Most circRNAs are generated through back-splicing of pre-mRNAs, forming a covalently closed loop structure with no 5' caps or 3' polyadenylated tails. In addition, most circRNAs were not associated with translating ribosomes, therefore, circRNAs were deemed to be noncoding. However, the latest research findings revealed that some circRNAs could generate proteins in vivo, which expands the landscape of transcriptome and proteome. To gain insights into the new area of circRNA translation, we introduce an integrated tool capable of detecting circRNAs with protein-coding potential from high-throughput sequencing data.

Availability and implementation

CircPro is available at http://bis.zju.edu.cn/CircPro.

Contact

mchen@zju.edu.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +29029172,DnaSP 6: DNA Sequence Polymorphism Analysis of Large Data Sets.,"We present version 6 of the DNA Sequence Polymorphism (DnaSP) software, a new version of the popular tool for performing exhaustive population genetic analyses on multiple sequence alignments. This major upgrade incorporates novel functionalities to analyze large data sets, such as those generated by high-throughput sequencing technologies. Among other features, DnaSP 6 implements: 1) modules for reading and analyzing data from genomic partitioning methods, such as RADseq or hybrid enrichment approaches, 2) faster methods scalable for high-throughput sequencing data, and 3) summary statistics for the analysis of multi-locus population genetics data. Furthermore, DnaSP 6 includes novel modules to perform single- and multi-locus coalescent simulations under a wide range of demographic scenarios. The DnaSP 6 program, with extensive documentation, is freely available at http://www.ub.edu/dnasp.","hji,kes",0,0,0,2,0,NA,NA +29036270,On expert curation and scalability: UniProtKB/Swiss-Prot as a case study.,"Motivation:Biological knowledgebases, such as UniProtKB/Swiss-Prot, constitute an essential component of daily scientific research by offering distilled, summarized and computable knowledge extracted from the literature by expert curators. While knowledgebases play an increasingly important role in the scientific community, their ability to keep up with the growth of biomedical literature is under scrutiny. Using UniProtKB/Swiss-Prot as a case study, we address this concern via multiple literature triage approaches. Results:With the assistance of the PubTator text-mining tool, we tagged more than 10 000 articles to assess the ratio of papers relevant for curation. We first show that curators read and evaluate many more papers than they curate, and that measuring the number of curated publications is insufficient to provide a complete picture as demonstrated by the fact that 8000-10 000 papers are curated in UniProt each year while curators evaluate 50 000-70 000 papers per year. We show that 90% of the papers in PubMed are out of the scope of UniProt, that a maximum of 2-3% of the papers indexed in PubMed each year are relevant for UniProt curation, and that, despite appearances, expert curation in UniProt is scalable. Availability and implementation:UniProt is freely available at http://www.uniprot.org/. Contact:sylvain.poux@sib.swiss. Supplementary information:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +29043042,feedr and animalnexus.ca: A paired R package and user-friendly Web application for transforming and visualizing animal movement data from static stations.,"Radio frequency identification (RFID) provides a simple and inexpensive approach for examining the movements of tagged animals, which can provide information on species behavior and ecology, such as habitat/resource use and social interactions. In addition, tracking animal movements is appealing to naturalists, citizen scientists, and the general public and thus represents a tool for public engagement in science and science education. Although a useful tool, the large amount of data collected using RFID may quickly become overwhelming. Here, we present an R package (feedr) we have developed for loading, transforming, and visualizing time-stamped, georeferenced data, such as RFID data collected from static logger stations. Using our package, data can be transformed from raw RFID data to visits, presence (regular detections by a logger over time), movements between loggers, displacements, and activity patterns. In addition, we provide several conversion functions to allow users to format data for use in functions from other complementary R packages. Data can also be visualized through static or interactive maps or as animations over time. To increase accessibility, data can be transformed and visualized either through R directly, or through the companion site: http://animalnexus.ca, an online, user-friendly, R-based Shiny Web application. This system can be used by professional and citizen scientists alike to view and study animal movements. We have designed this package to be flexible and to be able to handle data collected from other stationary sources (e.g., hair traps, static very high frequency (VHF) telemetry loggers, observations of marked individuals in colonies or staging sites), and we hope this framework will become a meeting point for science, education, and community awareness of the movements of animals. We aim to inspire citizen engagement while simultaneously enabling robust scientific analysis.","hji,kes",0,0,0,2,0,NA,NA +29058722,Deciphering lipid structures based on platform-independent decision rules.,"We achieve automated and reliable annotation of lipid species and their molecular structures in high-throughput data from chromatography-coupled tandem mass spectrometry using decision rule sets embedded in Lipid Data Analyzer (LDA; http://genome.tugraz.at/lda2). Using various low- and high-resolution mass spectrometry instruments with several collision energies, we proved the method's platform independence. We propose that the software's reliability, flexibility, and ability to identify novel lipid molecular species may now render current state-of-the-art lipid libraries obsolete.","hji,kes",0,0,0,2,0,NA,NA +29069297,SeqBox: RNAseq/ChIPseq reproducible analysis on a consumer game computer.,"

Summary

Short reads sequencing technology has been used for more than a decade now. However, the analysis of RNAseq and ChIPseq data is still computational demanding and the simple access to raw data does not guarantee results reproducibility between laboratories. To address these two aspects, we developed SeqBox, a cheap, efficient and reproducible RNAseq/ChIPseq hardware/software solution based on NUC6I7KYK mini-PC (an Intel consumer game computer with a fast processor and a high performance SSD disk), and Docker container platform. In SeqBox the analysis of RNAseq and ChIPseq data is supported by a friendly GUI. This allows access to fast and reproducible analysis also to scientists with/without scripting experience.

Availability and implementation

Docker container images, docker4seq package and the GUI are available at http://www.bioinformatica.unito.it/reproducibile.bioinformatics.html.

Contact

beccuti@di.unito.it.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +29118973,The 2017 Bioinformatics Open Source Conference (BOSC).,"The Bioinformatics Open Source Conference (BOSC) is a meeting organized by the Open Bioinformatics Foundation (OBF), a non-profit group dedicated to promoting the practice and philosophy of Open Source software development and Open Science within the biological research community. The 18th annual BOSC ( http://www.open-bio.org/wiki/BOSC_2017) took place in Prague, Czech Republic in July 2017. The conference brought together nearly 250 bioinformatics researchers, developers and users of open source software to interact and share ideas about standards, bioinformatics software development, open and reproducible science, and this year's theme, open data. As in previous years, the conference was preceded by a two-day collaborative coding event open to the bioinformatics community, called the OBF Codefest.","hji,kes",0,0,0,2,0,NA,NA +29149877,Key bottlenecks to the provision of safe obstetric anaesthesia in low- income countries; a cross-sectional survey of 64 hospitals in Uganda.,"

Background

Despite recent advances in surgery and anaesthesia which significantly improve safety, many health facilities in low-and middle-income countries (LMICs) remain chronically under-resourced with inability to cope effectively with serious obstetric complications (Knight et al., PLoS One 8:e63846, 2013). As a result many of these countries still have unacceptably high maternal and neonatal mortality rates. Recent data at the national referral hospitals in East Africa reported that none of the national referral hospitals met the World Federation of Societies of Anesthesiologists (WFSA) international standards required to provide safe obstetric anaesthesia (Epiu I: Challenges of Anesthesia in Low-and Middle-Income Countries. WFSA; 2014 http://wfsa.newsweaver.com/Newsletter/p8c8ta4ri7a1wsacct9y3u?a=2&p=47730565&t=27996496 ). In spite of this evidence, factors contributing to maternal mortality related to anaesthesia in LMICs and the magnitude of these issues have not been comprehensively studied. We therefore set out to assess regional referral, district, private for profit and private not-for profit hospitals in Uganda.

Methods

We conducted a cross-sectional survey at 64 government and private hospitals in Uganda using pre-set questionnaires to the anaesthetists and hospital directors. Access to the minimum requirements for safe obstetric anaesthesia according to WFSA guidelines were also checked using a checklist for operating and recovery rooms.

Results

Response rate was 100% following personal interviews of anaesthetists, and hospital directors. Only 3 of the 64 (5%) of the hospitals had all requirements available to meet the WFSA International guidelines for safe anaesthesia. Additionally, 54/64 (84%) did not have a trained physician anaesthetist and 5/64 (8%) had no trained providers for anaesthesia at all. Frequent shortages of drugs were reported for regional/neuroaxial anaesthesia, and other essential drugs were often lacking such as antacids and antihypertensives. We noted that many of the anaesthesia machines present were obsolete models without functional safety alarms and/or mechanical ventilators. Continuous ECG was only available in 3/64 (5%) of hospitals.

Conclusion

We conclude that there is a significant lack of essential equipment for the delivery of safe anaesthesia across this region. This is compounded by the shortage of trained providers and inadequate supervision. It is therefore essential to strengthen anaesthesia services by addressing these specific deficiencies. This will include improved training of associate clinicians, training more physician anaesthetists and providing the basic equipment required to provide safe and effective care. These services are key components of comprehensive emergency obstetric care and anaesthetists are crucial in managing critically ill mothers and ensuring good surgical outcomes.","hji,kes",0,0,0,2,0,NA,NA +29162558,Surgical Interventions for the Treatment of Supracondylar Humerus Fractures in Children: Protocol of a Systematic Review.,"BACKGROUND:The treatment of supracondylar humerus fracture in children (SHFC) is associated with complications such as functional deficit, residual deformity, and iatrogenic neurological damage. The standard treatment is closed reduction and percutaneous Kirschner wire fixation with different configurations. Despite this fact, there is still no consensus on the most effective technique for the treatment of these fractures. OBJECTIVE:The aim of this systematic review will be to evaluate the effect of surgical interventions on the treatment of Gartland type II and III SHFC by assessing function, complications, and error as primary outcomes. Clinical outcomes such as range of motion and pain and radiographic outcomes will also be judged. METHODS:A systematic review of randomized controlled trials or quasi-randomized controlled trials evaluating the surgical treatment of SHFC will be carried out in the Cochrane Central Register of Controlled Trials, PubMed, Literatura Latino-Americana e do Caribe em Cincias da Sade, and Excerpta Medica Database. The search will also occur at ongoing and recently completed clinical trials in selected databases. Data management and extraction will be performed using a data withdrawal form and by analyzing the following: study method characteristics, participant characteristics, intervention characteristics, results, methodological domains, and risk of bias. To assess the risk of bias of the included trials, the Cochrane Risk of Bias Tool will be used. Dichotomous outcome data will be analyzed as risk ratios, and continuous outcome data will be expressed as mean differences, both with 95% confidence intervals. Also, whenever possible, subgroup analysis, sensitivity analysis, and assessment of heterogeneity will be performed. RESULTS:Following the publication of this protocol, searches will be run and included studies will be deeply analyzed. We hope to obtain final results in the next few months and have the final paper published by the end of 2018. This study was funded by a government-based noncommercial agency, Fundao de Amparo Pesquisa do Estado de So Paulo (FAPESP). CONCLUSIONS:This study may provide surgical treatment effects evidence for SHFC. The results will assist clinical practice by demonstrating the effectiveness and potential complications of these interventions and might serve as a reference for future clinical trials on the topic. TRIAL REGISTRATION:PROSPERO CRD42014009304; https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=9304 (Archived by WebCite at http://www.webcitation.org/6usiDHzD7).","hji,kes",0,0,0,2,0,NA,NA +29168754,A Bioinformatic Pipeline for Monitoring of the Mutational Stability of Viral Drug Targets with Deep-Sequencing Technology.,"The efficient development of antiviral drugs, including efficient antiviral small interfering RNAs (siRNAs), requires continuous monitoring of the strict correspondence between a drug and the related highly variable viral DNA/RNA target(s). Deep sequencing is able to provide an assessment of both the general target conservation and the frequency of particular mutations in the different target sites. The aim of this study was to develop a reliable bioinformatic pipeline for the analysis of millions of short, deep sequencing reads corresponding to selected highly variable viral sequences that are drug target(s). The suggested bioinformatic pipeline combines the available programs and the ad hoc scripts based on an original algorithm of the search for the conserved targets in the deep sequencing data. We also present the statistical criteria for the threshold of reliable mutation detection and for the assessment of variations between corresponding data sets. These criteria are robust against the possible sequencing errors in the reads. As an example, the bioinformatic pipeline is applied to the study of the conservation of RNA interference (RNAi) targets in human immunodeficiency virus 1 (HIV-1) subtype A. The developed pipeline is freely available to download at the website http://virmut.eimb.ru/. Brief comments and comparisons between VirMut and other pipelines are also presented.","hji,kes",0,0,0,2,0,NA,NA +29178193,Proteomic Characterization of Caenorhabditis elegans Larval Development.,"The nematode Caenorhabditis elegans is widely used as a model organism to study cell and developmental biology. Quantitative proteomics of C. elegans is still in its infancy and, so far, most studies have been performed on adult worm samples. Here, we used quantitative mass spectrometry to characterize protein level changes across the four larval developmental stages (L1-L4) of C. elegans. In total, we identified 4130 proteins, and quantified 1541 proteins that were present across all four stages in three biological replicates from independent experiments. Using hierarchical clustering and functional ontological analyses, we identified 21 clusters containing proteins with similar protein profiles across the four stages, and highlighted the most overrepresented biological functions in each of these protein clusters. In addition, we used the dataset to identify putative larval stage-specific proteins in each individual developmental stage, as well as in the early and late developmental stages. In summary, this dataset provides system-wide analysis of protein level changes across the four C. elegans larval developmental stages, which serves as a useful resource for the C. elegans research community. MS data were deposited in ProteomeXchange (http://proteomecentral.proteomexchange.org) via the PRIDE partner repository with the primary accession identifier PXD006676.","hji,kes",0,0,0,2,0,NA,NA +29202050,One Size Doesn't Fit All: Measuring Individual Privacy in Aggregate Genomic Data.,"Even in the aggregate, genomic data can reveal sensitive information about individuals. We present a new model-based measure, PrivMAF, that provides provable privacy guarantees for aggregate data (namely minor allele frequencies) obtained from genomic studies. Unlike many previous measures that have been designed to measure the total privacy lost by all participants in a study, PrivMAF gives an individual privacy measure for each participant in the study, not just an average measure. These individual measures can then be combined to measure the worst case privacy loss in the study. Our measure also allows us to quantify the privacy gains achieved by perturbing the data, either by adding noise or binning. Our findings demonstrate that both perturbation approaches offer significant privacy gains. Moreover, we see that these privacy gains can be achieved while minimizing perturbation (and thus maximizing the utility) relative to stricter notions of privacy, such as differential privacy. We test PrivMAF using genotype data from the Wellcome Trust Case Control Consortium, providing a more nuanced understanding of the privacy risks involved in an actual genome-wide association studies. Interestingly, our analysis demonstrates that the privacy implications of releasing MAFs from a study can differ greatly from individual to individual. An implementation of our method is available at http://privmaf.csail.mit.edu.","hji,kes",0,0,0,2,0,NA,NA +29224730,A validated calculator to estimate risk of cesarean after an induction of labor with an unfavorable cervix.,"

Background

Induction of labor occurs in >20% of pregnancies, which equates to approximately 1 million women undergoing an induction in the United States annually. Regardless of how common inductions are, our ability to predict induction success is limited. Although multiple risk factors for a failed induction have been identified, risk factors alone are not enough to quantify an actual risk of cesarean for an individual woman undergoing a cesarean.

Objective

The objective of this study was to derive and validate a prediction model for cesarean after induction with an unfavorable cervix and to create a Web-based calculator to assist in patient counseling.

Study design

Derivation and validation of a prediction model for cesarean delivery after induction was performed as part of a planned secondary analysis of a large randomized trial. A predictive model for cesarean delivery was derived using multivariable logistic regression from a large randomized trial on induction methods (n= 491) that took place from 2013 through 2015 at an academic institution. Full-term (=37 weeks) women carrying a singleton gestation with intact membranes and an unfavorable cervix (Bishop score =6 and dilation =2 cm) undergoing an induction were included in this trial. Both nulliparous and multiparous women were included. Women with a prior cesarean were excluded. Refinement of the prediction model was performed using an observational cohort of women from the same institution who underwent an induction (n= 364) during the trial period. An external validation was performed utilizing a publicly available database (Consortium for Safe Labor) that includes information for >200,000 deliveries from 19 hospitals across the United States from 2002 through 2008. After applying the same inclusion and exclusion criteria utilized in the derivation cohort, a total of 8466 women remained for analysis. The discriminative power of each model was assessed using a bootstrap, bias-corrected area under the curve.

Results

The cesarean delivery rates in the derivation and external validation groups were: 27.7% (n= 136/491) and 26.4% (n= 2235/8466). In multivariable modeling, nulliparity, gestation age =40 weeks, body mass index at delivery, modified Bishop score, and height were significantly associated with cesarean. A nomogram and calculator were created and found to have an area under the curve in the external validation cohort of 0.73 (95% confidence interval, 0.72-0.74).

Conclusion

A nomogram and user-friendly Web-based calculator that incorporates 5 variables known at the start of induction has been developed and validated. It can be found at: http://www.uphs.upenn.edu/obgyn/labor-induction-calculator/. This calculator can be used to augment patient counseling for women undergoing an induction with an unfavorable cervix.","hji,kes",0,0,0,2,0,NA,NA +29240889,GeoBoost: accelerating research involving the geospatial metadata of virus GenBank records.,"Summary:GeoBoost is a command-line software package developed to address sparse or incomplete metadata in GenBank sequence records that relate to the location of the infected host (LOIH) of viruses. Given a set of GenBank accession numbers corresponding to virus GenBank records, GeoBoost extracts, integrates and normalizes geographic information reflecting the LOIH of the viruses using integrated information from GenBank metadata and related full-text publications. In addition, to facilitate probabilistic geospatial modeling, GeoBoost assigns probability scores for each possible LOIH. Availability and implementation:Binaries and resources required for running GeoBoost are packed into a single zipped file and freely available for download at https://tinyurl.com/geoboost. A video tutorial is included to help users quickly and easily install and run the software. The software is implemented in Java 1.8, and supported on MS Windows and Linux platforms. Contact:gragon@upenn.edu. Supplementary information:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +29253072,K2 and K2*: efficient alignment-free sequence similarity measurement based on Kendall statistics.,"

Motivation

Alignment-free sequence comparison methods can compute the pairwise similarity between a huge number of sequences much faster than sequence-alignment based methods.

Results

We propose a new non-parametric alignment-free sequence comparison method, called K2, based on the Kendall statistics. Comparing to the other state-of-the-art alignment-free comparison methods, K2 demonstrates competitive performance in generating the phylogenetic tree, in evaluating functionally related regulatory sequences, and in computing the edit distance (similarity/dissimilarity) between sequences. Furthermore, the K2 approach is much faster than the other methods. An improved method, K2*, is also proposed, which is able to determine the appropriate algorithmic parameter (length) automatically, without first considering different values. Comparative analysis with the state-of-the-art alignment-free sequence similarity methods demonstrates the superiority of the proposed approaches, especially with increasing sequence length, or increasing dataset sizes.

Availability and implementation

The K2 and K2* approaches are implemented in the R language as a package and is freely available for open access (http://community.wvu.edu/daadjeroh/projects/K2/K2_1.0.tar.gz).

Contact

yueljiang@163.com.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +29270816,"Arctic berry extracts target the gut-liver axis to alleviate metabolic endotoxaemia, insulin resistance and hepatic steatosis in diet-induced obese mice.","

Aims/hypothesis

There is growing evidence that fruit polyphenols exert beneficial effects on the metabolic syndrome, but the underlying mechanisms remain poorly understood. In the present study, we aimed to analyse the effects of polyphenolic extracts from five types of Arctic berries in a model of diet-induced obesity.

Methods

Male C57BL/6J mice were fed a high-fat/high-sucrose (HFHS) diet and orally treated with extracts of bog blueberry (BBE), cloudberry (CLE), crowberry (CRE), alpine bearberry (ABE), lingonberry (LGE) or vehicle (HFHS) for 8weeks. An additional group of standard-chow-fed, vehicle-treated mice was included as a reference control for diet-induced obesity. OGTTs and insulin tolerance tests were conducted, and both plasma insulin and C-peptide were assessed throughout the OGTT. Quantitative PCR, western blot analysis and ELISAs were used to assess enterohepatic immunometabolic features. Faecal DNA was extracted and 16S rRNA gene-based analysis was used to profile the gut microbiota.

Results

Treatment with CLE, ABE and LGE, but not with BBE or CRE, prevented both fasting hyperinsulinaemia (mean SEM [pmol/l]: chow 67.2 12.3, HFHS 153.9 19.3, BBE 114.4 14.3, CLE 82.5 13.0, CRE 152.3 24.4, ABE 90.6 18.0, LGE 95.4 10.5) and postprandial hyperinsulinaemia (mean SEM AUC [pmol/l min]: chow 14.3 1.4, HFHS 31.4 3.1, BBE 27.2 4.0, CLE 17.7 2.2, CRE 32.6 6.3, ABE 22.7 18.0, LGE 23.9 2.5). None of the berry extracts affected C-peptide levels or body weight gain. Levels of hepatic serine phosphorylated Akt were 1.6-, 1.5- and 1.2-fold higher with CLE, ABE and LGE treatment, respectively, and hepatic carcinoembryonic antigen-related cell adhesion molecule (CEACAM)-1 tyrosine phosphorylation was 0.6-, 0.7- and 0.9-fold increased in these mice vs vehicle-treated, HFHS-fed mice. These changes were associated with reduced liver triacylglycerol deposition, lower circulating endotoxins, alleviated hepatic and intestinal inflammation, and major gut microbial alterations (e.g. bloom of Akkermansia muciniphila, Turicibacter and Oscillibacter) in CLE-, ABE- and LGE-treated mice.

Conclusions/interpretation

Our findings reveal novel mechanisms by which polyphenolic extracts from ABE, LGE and especially CLE target the gut-liver axis to protect diet-induced obese mice against metabolic endotoxaemia, insulin resistance and hepatic steatosis, which importantly improves hepatic insulin clearance. These results support the potential benefits of these Arctic berries and their integration into health programmes to help attenuate obesity-related chronic inflammation and metabolic disorders.

Data availability

All raw sequences have been deposited in the public European Nucleotide Archive server under accession number PRJEB19783 ( https://www.ebi.ac.uk/ena/data/view/PRJEB19783 ).","hji,kes",0,0,0,2,0,NA,deposited data +29280994,PRAPI: post-transcriptional regulation analysis pipeline for Iso-Seq.,"

Summary

The single-molecule real-time (SMRT) isoform sequencing (Iso-Seq) based on Pacific Bioscience (PacBio) platform has received increasing attention for its ability to explore full-length isoforms. Thus, comprehensive tools for Iso-Seq bioinformatics analysis are extremely useful. Here, we present a one-stop solution for Iso-Seq analysis, called PRAPI to analyze alternative transcription initiation (ATI), alternative splicing (AS), alternative cleavage and polyadenylation (APA), natural antisense transcripts (NAT), and circular RNAs (circRNAs) comprehensively. PRAPI is capable of combining Iso-Seq full-length isoforms with short read data, such as RNA-Seq or polyadenylation site sequencing (PAS-seq) for differential expression analysis of NAT, AS, APA and circRNAs. Furthermore, PRAPI can annotate new genes and correct mis-annotated genes when gene annotation is available. Finally, PRAPI generates high-quality vector graphics to visualize and highlight the Iso-Seq results.

Availability and implementation

The Dockerfile of PRAPI is available at http://www.bioinfor.org/tool/PRAPI.

Contact

lfgu@fafu.edu.cn.","hji,kes",0,0,0,2,0,NA,NA +29297283,R3D-BLAST2: an improved search tool for similar RNA 3D substructures.,"BACKGROUND:RNA molecules have been known to play a variety of significant roles in cells. In principle, the functions of RNAs are largely determined by their three-dimensional (3D) structures. As more and more RNA 3D structures are available in the Protein Data Bank (PDB), a bioinformatics tool, which is able to rapidly and accurately search the PDB database for similar RNA 3D structures or substructures, is helpful to understand the structural and functional relationships of RNAs. RESULTS:Since its first release in 2011, R3D-BLAST has become a useful tool for searching the PDB database for similar RNA 3D structures and substructures. It was implemented by a structural-alphabet (SA)-based method, which utilizes an SA with 23 structural letters to encode RNA 3D structures into one-dimensional (1D) structural sequences and applies BLAST to the resulting structural sequences for searching similar substructures of RNAs. In this study, we have upgraded R3D-BLAST to develop a new web server named R3D-BLAST2 based on a higher quality SA newly constructed from a representative and sufficiently non-redundant list of RNA 3D structures. In addition, we have modified the kernel program in R3D-BLAST2 so that it can accept an RNA structure in the mmCIF format as an input. The results of our experiments on a benchmark dataset have demonstrated that R3D-BLAST2 indeed performs very well in comparison to its earlier version R3D-BLAST and other similar tools RNA FRABASE, FASTR3D and RAG-3D by searching a larger number of RNA 3D substructures resembling those of the input RNA. CONCLUSIONS:R3D-BLAST2 is a valuable BLAST-like search tool that can more accurately scan the PDB database for similar RNA 3D substructures. It is publicly available at http://genome.cs.nthu.edu.tw/R3D-BLAST2/ .","hji,kes",0,0,0,2,0,NA,NA +29309632,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on Surgical Resection for the Treatment of Patients With Vestibular Schwannomas.,"QUESTION 1:What surgical approaches for vestibular schwannomas (VS) are best for complete resection and facial nerve (FN) preservation when serviceable hearing is present? RECOMMENDATION:There is insufficient evidence to support the superiority of either the middle fossa (MF) or the retrosigmoid (RS) approach for complete VS resection and FN preservation when serviceable hearing is present. QUESTION 2:Which surgical approach (RS or translabyrinthine [TL]) for VS is best for complete resection and FN preservation when serviceable hearing is not present? RECOMMENDATION:There is insufficient evidence to support the superiority of either the RS or the TL approach for complete VS resection and FN preservation when serviceable hearing is not present. QUESTION 3:Does VS size matter for facial and vestibulocochlear nerve preservation with surgical resection? RECOMMENDATION:Level 3: Patients with larger VS tumor size should be counseled about the greater than average risk of loss of serviceable hearing. QUESTION 4:Should small intracanalicular tumors (<1.5 cm) be surgically resected? RECOMMENDATION:There are insufficient data to support a firm recommendation that surgery be the primary treatment for this subclass of VSs. QUESTION 5:Is hearing preservation routinely possible with VS surgical resection when serviceable hearing is present? RECOMMENDATION:Level 3: Hearing preservation surgery via the MF or the RS approach may be attempted in patients with small tumor size (<1.5 cm) and good preoperative hearing. QUESTION 6:When should surgical resection be the initial treatment in patients with neurofibromatosis type 2 (NF2)? RECOMMENDATION:There is insufficient evidence that surgical resection should be the initial treatment in patients with NF2. QUESTION 7:Does a multidisciplinary team, consisting of neurosurgery and neurotology, provides the best outcomes of complete resection and facial/vestibulocochlear nerve preservation for patients undergoing resection of VSs? RECOMMENDATION:There is insufficient evidence to support stating that a multidisciplinary team, usually consisting of a neurosurgeon and a neurotologist, provides superior outcomes compared to either subspecialist working alone. QUESTION 8:Does a subtotal surgical resection of a VS followed by stereotactic radiosurgery (SRS) to the residual tumor provide comparable hearing and FN preservation to patients who undergo a complete surgical resection? RECOMMENDATION:There is insufficient evidence to support subtotal resection (STR) followed by SRS provides comparable hearing and FN preservation to patients who undergo a complete surgical resection. QUESTION 9:Does surgical resection of VS treat preoperative balance problems more effectively than SRS? RECOMMENDATION:There is insufficient evidence to support either surgical resection or SRS for treatment of preoperative balance problems. QUESTION 10:Does surgical resection of VS treat preoperative trigeminal neuralgia more effectively than SRS? RECOMMENDATION:Level 3: Surgical resection of VSs may be used to better relieve symptoms of trigeminal neuralgia than SRS. QUESTION 11:Is surgical resection of VSs more difficult (associated with higher facial neuropathies and STR rates) after initial treatment with SRS? RECOMMENDATION:Level 3: If microsurgical resection is necessary after SRS, it is recommended that patients be counseled that there is an increased likelihood of a STR and decreased FN function. The full guideline can be found at: https://www.cns.org/guidelines/guidelines-management-patients-vestibular-schwannoma/chapter_8.","hji,kes",0,0,0,2,0,NA,NA +29387738,Ocean currents and acoustic backscatter data from shipboard ADCP measurements at three North Atlantic seamounts between 2004 and 2015.,"Seamounts are amongst the most common physiographic structures of the deep-ocean landscape, but remoteness and geographic complexity have limited the systematic collection of integrated and multidisciplinary data in the past. Consequently, important aspects of seamount ecology and dynamics remain poorly studied. We present a data collection of ocean currents and raw acoustic backscatter from shipboard Acoustic Doppler Current Profiler (ADCP) measurements during six cruises between 2004 and 2015 in the tropical and subtropical Northeast Atlantic to narrow this gap. Measurements were conducted at seamount locations between the island of Madeira and the Portuguese mainland (Ampre, Seine Seamount), as well as east of the Cape Verde archipelago (Senghor Seamount). The dataset includes two-minute ensemble averaged continuous velocity and backscatter profiles, supplemented by spatially gridded maps for each velocity component, error velocity and local bathymetry. The dataset is freely available from the digital data library PANGAEA at https://doi.pangaea.de/10.1594/PANGAEA.883193.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +29425325,Selenzyme: enzyme selection tool for pathway design.,"Summary:Synthetic biology applies the principles of engineering to biology in order to create biological functionalities not seen before in nature. One of the most exciting applications of synthetic biology is the design of new organisms with the ability to produce valuable chemicals including pharmaceuticals and biomaterials in a greener; sustainable fashion. Selecting the right enzymes to catalyze each reaction step in order to produce a desired target compound is, however, not trivial. Here, we present Selenzyme, a free online enzyme selection tool for metabolic pathway design. The user is guided through several decision steps in order to shortlist the best candidates for a given pathway step. The tool graphically presents key information about enzymes based on existing databases and tools such as: similarity of sequences and of catalyzed reactions; phylogenetic distance between source organism and intended host species; multiple alignment highlighting conserved regions, predicted catalytic site, and active regions and relevant properties such as predicted solubility and transmembrane regions. Selenzyme provides bespoke sequence selection for automated workflows in biofoundries. Availability and implementation:The tool is integrated as part of the pathway design stage into the design-build-test-learn SYNBIOCHEM pipeline. The Selenzyme web server is available at http://selenzyme.synbiochem.co.uk. Supplementary information:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,"decision tree, selector" +29512488,S2P: A software tool to quickly carry out reproducible biomedical research projects involving 2D-gel and MALDI-TOF MS protein data.,"

Background and objective

2D-gel electrophoresis is widely used in combination with MALDI-TOF mass spectrometry in order to analyze the proteome of biological samples. For instance, it can be used to discover proteins that are differentially expressed between two groups (e.g. two disease conditions, case vs. control, etc.) thus obtaining a set of potential biomarkers. This procedure requires a great deal of data processing in order to prepare data for analysis or to merge and integrate data from different sources. This kind of work is usually done manually (e.g. copying and pasting data into spreadsheet files), which is highly time consuming and distracts the researcher from other important, core tasks. Moreover, engaging in a repetitive process in a non-automated, handling-based manner is prone to error, thus threatening reliability and reproducibility. The objective of this paper is to present S2P, an open source software to overcome these drawbacks.

Methods

S2P is implemented in Java on top of the AIBench framework, and relies on well-established open source libraries to accomplish different tasks.

Results

S2P is an AIBench based desktop multiplatform application, specifically aimed to process 2D-gel and MALDI-mass spectrometry protein identification-based data in a computer-aided, reproducible manner. Different case studies are presented in order to show the usefulness of S2P.

Conclusions

S2P is open source and free to all users at http://www.sing-group.org/s2p. Through its user-friendly GUI interface, S2P dramatically reduces the time that researchers need to invest in order to prepare data for analysis.","hji,kes",0,0,0,2,0,NA,NA +29519778,Outcome of Critically ill Patients Undergoing Mandatory Insulin Therapy Compared to Usual Care Insulin Therapy: Protocol for a Pilot Randomized Controlled Trial.,"BACKGROUND:Observational and interventional studies in patients with both acute medical conditions and long-standing diabetes have shown that improved blood glucose control confers a survival advantage or reduces complication rates. Policies of """"tight"""" glycaemic control were rapidly adopted by many general intensive care units (ICUs) worldwide in the mid 00's, even though the results of the studies were not generalizable to mixed medical/surgical ICUs with different intravenous feeding policies. OBJECTIVE:The primary objective of the study is to assess the safety of mandatory insulin infusion in critically ill patients in a general ICU setting. METHODS:This protocol summarizes the rationale and design of a randomized, controlled, single-center trial investigating the effect of mandatory insulin therapy versus usual care insulin therapy for those patients admitted for a stay of longer than 48 hours. In total, 109 critically ill adults predicted to stay in intensive care for longer than 48 hours consented. The primary outcome is to determine the safety of mandatory insulin therapy in critically ill patients using the number of episodes of hypoglycaemia and hypokalaemia per unit length of stay in intensive care. Secondary outcomes include the duration of mechanical ventilation, duration of ICU and hospital stay, hospital mortality, and measures of renal, hepatic, and haematological dysfunction. RESULTS:The project was funded in 2005 and enrolment was completed 2007. Data analysis is currently underway and the first results are expected to be submitted for publication in 2018. CONCLUSIONS:This protocol for a randomized controlled trial investigating the effect of mandatory insulin therapy should provide an answer to a key question for the management of patients in the ICU and ultimately improving outcome. TRIAL REGISTRATION:International Standard Randomized Controlled Trial Number ISRCTN00550641; http://www.isrctn.com/ISRCTN00550641 (Archived at WebCite: http://www.webcitation.org/6xk8NXxNv).","hji,kes",0,0,0,2,0,NA,NA +29524011,HIVprotI: an integrated web based platform for prediction and design of HIV proteins inhibitors.,"A number of anti-retroviral drugs are being used for treating Human Immunodeficiency Virus (HIV) infection. Due to emergence of drug resistant strains, there is a constant quest to discover more effective anti-HIV compounds. In this endeavor, computational tools have proven useful in accelerating drug discovery. Although methods were published to design a class of compounds against a specific HIV protein, but an integrated web server for the same is lacking. Therefore, we have developed support vector machine based regression models using experimentally validated data from ChEMBL repository. Quantitative structure activity relationship based features were selected for predicting inhibition activity of a compound against HIV proteins namely protease (PR), reverse transcriptase (RT) and integrase (IN). The models presented a maximum Pearson correlation coefficient of 0.78, 0.76, 0.74 and 0.76, 0.68, 0.72 during tenfold cross-validation on IC50 and percent inhibition datasets of PR, RT, IN respectively. These models performed equally well on the independent datasets. Chemical space mapping, applicability domain analyses and other statistical tests further support robustness of the predictive models. Currently, we have identified a number of chemical descriptors that are imperative in predicting the compound inhibition potential. HIVprotI platform ( http://bioinfo.imtech.res.in/manojk/hivproti ) would be useful in virtual screening of inhibitors as well as designing of new molecules against the important HIV proteins for therapeutics development.","hji,kes",0,0,0,2,0,NA,data not unique +29527200,Whole Genome Sequence Analysis of CTX-M-15 Producing Klebsiella Isolates Allowed Dissecting a Polyclonal Outbreak Scenario.,"Extended-spectrum -lactamase (ESBL) producing Klebsiella pneumoniae pose an important threat of infection with increased morbidity and mortality, especially for immunocompromised patients. Here, we use the rise of multidrug-resistant K. pneumoniae in a German neurorehabilitation center from April 2015 to April 2016 to dissect the benefit of whole genome sequencing (WGS) for outbreak analyses. In total, 53 isolates were obtained from 52 patients and examined using WGS. Two independent analysis strategies (reference-based and -free) revealed the same distinct clusters of two CTX-M-15 producing K. pneumoniae clones (ST15, n = 31; ST405, n = 7) and one CTX-M-15 producing Klebsiella quasipneumoniae strain (ST414, n = 8). Additionally, we determined sequence variations associated with antimicrobial resistance phenotypes in single isolates expressing carbapenem and colistin resistance, respectively. For rapid detection of the major K. pneumoniae outbreak clone (ST15), a selective triplex PCR was deduced from WGS data of the major outbreak strain and K. pneumoniae genome data deposited in central databases. Moreover, we introduce two novel open-source applications supporting reference genome selection (refRank; https://gitlab.com/s.fuchs/refRank) and alignment-based SNP-filtering (SNPfilter; https://gitlab.com/s.fuchs/snpfilter) in NGS analyses.","hji,kes",0,0,0,2,0,NA,NA +29530061,Stearoyl-CoA desaturase-1 promotes colorectal cancer metastasis in response to glucose by suppressing PTEN.,"BACKGROUND:Diabetic patients have a higher risk factor for colorectal cancer (CRC) metastasis. Stearoyl-CoA desaturase 1 (SCD1), the main enzyme responsible for producing monounsaturated fatty acids(MUFA) from saturated fatty acids, is frequently deregulated in both diabetes and CRC. The function and mechanism of SCD1 in metastasis of CRC and its relevance to glucose remains largely unknown. METHODS:SCD1 expression levels were analyzed in human CRC tissues and the Cancer Browser database ( https://genome-cancer.ucsc.edu/ ). CRC cell lines stably transfected with SCD1 shRNAs or vector were established to investigate the role of SCD1 in modulating migration and invasion of CRC cells. A glucose concentration gradient was set to investigate regulation of SCD1 in CRC relevant to diabetic conditions. RESULTS:The clinical data analysis showed high expression of SCD1 in CRC tissues with a negative correlation with the prognosis of CRC. In vitro experiments revealed that SCD1 increased CRC progression through promoting epithelial-mesenchymal transition (EMT). Lipidomic analysis demonstrated that SCD1 increased MUFA levels and MUFA administration could rescue migration and invasion defect of CRC cells induced by SCD1 knockdown. Furthermore, SCD1-mediated progression of CRC was promoted by carbohydrate response-element binding protein (ChREBP) in response to high glucose. Mechanistically, hyperglycemia-SCD1-MUFA induced CRC cell migration and invasion by regulating PTEN. CONCLUSIONS:Our findings show that SCD1 promotes metastasis of CRC cells through MUFA production and suppressing PTEN in response to glucose, which may be a novel mechanism for diabetes-induced CRC metastasis.","hji,kes",0,0,0,2,0,NA,NA +29544540,"A graphical user interface for RAId, a knowledge integrated proteomics analysis suite with accurate statistics.","

Objective

RAId is a software package that has been actively developed for the past 10years for computationally and visually analyzing MS/MS data. Founded on rigorous statistical methods, RAId's core program computes accurate E-values for peptides and proteins identified during database searches. Making this robust tool readily accessible for the proteomics community by developing a graphical user interface (GUI) is our main goal here.

Results

We have constructed a graphical user interface to facilitate the use of RAId on users' local machines. Written in Java, RAId_GUI not only makes easy executions of RAId but also provides tools for data/spectra visualization, MS-product analysis, molecular isotopic distribution analysis, and graphing the retrieval versus the proportion of false discoveries. The results viewer displays and allows the users to download the analyses results. Both the knowledge-integrated organismal databases and the code package (containing source code, the graphical user interface, and a user manual) are available for download at https://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads/raid.html .","hji,kes",0,0,0,2,0,NA,NA +29572201,"Investigating the Impact of Hearing Aid Use and Auditory Training on Cognition, Depressive Symptoms, and Social Interaction in Adults With Hearing Loss: Protocol for a Crossover Trial.","BACKGROUND:Sensorineural hearing loss is the most common sensory deficit among older adults. Some of the psychosocial consequences of this condition include difficulty in understanding speech, depression, and social isolation. Studies have shown that older adults with hearing loss show some age-related cognitive decline. Hearing aids have been proven as successful interventions to alleviate sensorineural hearing loss. In addition to hearing aid use, the positive effects of auditory training-formal listening activities designed to optimize speech perception-are now being documented among adults with hearing loss who use hearing aids, especially new hearing aid users. Auditory training has also been shown to produce prolonged cognitive performance improvements. However, there is still little evidence to support the benefits of simultaneous hearing aid use and individualized face-to-face auditory training on cognitive performance in adults with hearing loss. OBJECTIVE:This study will investigate whether using hearing aids for the first time will improve the impact of individualized face-to-face auditory training on cognition, depression, and social interaction for adults with sensorineural hearing loss. The rationale for this study is based on the hypothesis that, in adults with sensorineural hearing loss, using hearing aids for the first time in combination with individualized face-to-face auditory training will be more effective for improving cognition, depressive symptoms, and social interaction rather than auditory training on its own. METHODS:This is a crossover trial targeting 40 men and women between 50 and 90 years of age with either mild or moderate symmetric sensorineural hearing loss. Consented, willing participants will be recruited from either an independent living accommodation or via a community database to undergo a 6-month intensive face-to-face auditory training program (active control). Participants will be assigned in random order to receive hearing aid (intervention) for either the first 3 or last 3 months of the 6-month auditory training program. Each participant will be tested at baseline, 3, and 6 months using a neuropsychological battery of computer-based cognitive assessments, together with a depression symptom instrument and a social interaction measure. The primary outcome will be cognitive performance with regard to spatial working memory. Secondary outcome measures include other cognition performance measures, depressive symptoms, social interaction, and hearing satisfaction. RESULTS:Data analysis is currently under way and the first results are expected to be submitted for publication in June 2018. CONCLUSIONS:Results from the study will inform strategies for aural rehabilitation, hearing aid delivery, and future hearing loss intervention trials. TRIAL REGISTRATION:ClinicalTrials.gov NCT03112850; https://clinicaltrials.gov/ct2/show/NCT03112850 (Archived by WebCite at http://www.webcitation.org/6xz12fD0B).","hji,kes",0,0,0,2,0,NA,NA +29592813,Cancer Genome Interpreter annotates the biological and clinical relevance of tumor alterations.,"While tumor genome sequencing has become widely available in clinical and research settings, the interpretation of tumor somatic variants remains an important bottleneck. Here we present the Cancer Genome Interpreter, a versatile platform that automates the interpretation of newly sequenced cancer genomes, annotating the potential of alterations detected in tumors to act as drivers and their possible effect on treatment response. The results are organized in different levels of evidence according to current knowledge, which we envision can support a broad range of oncology use cases. The resource is publicly available at http://www.cancergenomeinterpreter.org .","hji,kes",0,0,0,2,0,NA,NA +29620239,A functional polymorphism at miR‑491‑5p binding site in the 3'UTR of MMP9 gene confers increased risk for pressure ulcers after hip fracture.,"The roles of matrix metalloproteinase (MMP)9 in the control of pressure ulcers(PU) after hip fracture as well as how the rs1056629 in MMP9 3'UTR compromises the interaction between MMP9 and miR-491 were explored. Online miRNA database (http://www.bioguo.org) was utilized to explore gene polymorphism in MMP9 3'UTR that might break the interaction between MMP9 and miRNA. Luciferase assay was utilized to confirm the miRNA targeted MMP9. Real-time PCR, westernblot analysis and immunohistochemistry were carried out to understand the roles of MMP9 in PU as well as how rs1056629 in MMP9 3'UTR compromises the interaction between MMP9 and miR-491. rs1056629 in MMP9 3'UTR that compromised the interaction between MMP9 and four miRNAs including miR-194-3p, miR-491, miR-1915-3p and miR-941, and only miR-491 among miR-194-3p, miR-491, miR-1915-3p and miR-941 decreased luciferase activity of wild-type MMP9 3'UTR, and luciferase activities of mutant-3 and mutant-4 MMP9 3'UTR in miR-491 overexpressing cells was comparable with scramble control. miR-194-3p, miR-491, miR-1915-3p and miR-941 levels in PU group was comparable with healthy control, and miR-194-3p, miR-491, miR-1915-3p and miR-941 in subjects carrying AA genotype was similar with those in AC and CC groups. MMP9 mRNA and protein, and histology score in subjects with PU were much higher, and were also much higher in AA group. Only miR-491 mimic among miR-194-3p, miR-491, miR-1915-3p and miR-941 mimics downregulated the MMP9 level, and only miR-491 inhibitor among miR-194-3p, miR-491, miR-1915-3p and miR-941 inhibitors upregulated the MMP9 level. Our study indicated that rs1056629 polymorphism could be a novel biomarker for predicting the occurrence of PU after a hip fracture.","hji,kes",0,0,0,2,0,NA,references other data resource +29630775,PR-10 proteins as potential mediators of melatonin-cytokinin cross-talk in plants: crystallographic studies of LlPR-10.2B isoform from yellow lupine.,"LlPR-10.2B, a Pathogenesis-related class 10 (PR-10) protein from yellow lupine (Lupinus luteus) was crystallized in complex with melatonin, an emerging important plant regulator and antioxidant. The structure reveals two molecules of melatonin bound in the internal cavity of the protein, plus a very well-defined electron density near the cavity entrance, corresponding to an unknown ligand molecule comprised of two flat rings, which is most likely a product of melatonin transformation. In a separate LlPR-10.2B co-crystallization experiment with an equimolar mixture of melatonin and trans-zeatin, which is a cytokinin phytohormone well recognized as a PR-10-binding partner, a quaternary 1 : 1 : 1 : 1 complex was formed, in which one of the melatonin-binding sites has been substituted with trans-zeatin, whereas the binding of melatonin at the second binding site and binding of the unknown ligand are undisturbed. This unusual complex, when compared with the previously described PR-10/trans-zeatin complexes and with the emerging structural information about melatonin binding by PR-10 proteins, provides intriguing insights into the role of PR-10 proteins in phytohormone regulation in plants, especially with the involvement of melatonin, and implicates the PR-10 proteins as low-affinity melatonin binders under the conditions of elevated melatonin concentration. DATABASES:Atomic coordinates and processed structure factors corresponding to the final models of the LlPR-10.2B/melatonin and LlPR-10.2B/melatonin + trans-zeatin complexes have been deposited with the Protein Data Bank (PDB) under the accession codes 5MXB and 5MXW. The corresponding raw X-ray diffraction images have been deposited in the RepOD Repository at the Interdisciplinary Centre for Mathematical and Computational Modelling (ICM) of the University of Warsaw, Poland, and are available for download with the following Digital Object Identifiers (DOI): https://doi.org/10.18150/repod.9923638 and https://doi.org/10.18150/repod.6621013.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +29701446,Prenatal Organophosphate Pesticide Exposure and Traits Related to Autism Spectrum Disorders in a Population Living in Proximity to Agriculture.,"

Background

Prenatal exposure to organophosphate (OP) pesticides has been linked with poorer neurodevelopment and behaviors related to autism spectrum disorders (ASD) in previous studies, including in the Center for Health Assessment of Mothers and Children of Salinas (CHAMACOS) study, a birth cohort living in the agricultural Salinas Valley in California.

Objectives

To investigate the association of prenatal exposure to OP pesticides with traits related to ASD, in childhood and adolescents in CHAMACOS.

Methods

We assessed OP exposure during pregnancy with measurements of dialkyl phosphates (DAP) metabolites in urine, and residential proximity to OP use during pregnancy using California's Pesticide Use Reporting (PUR) data and estimated associations with ASD-related traits using linear regression models. We measured traits reported by parents and teachers as well as the child's performance on tests that evaluate the ability to use facial expressions to recognize the mental state of others at 7, 101/2, and 14 years of age.

Results

Prenatal DAPs were associated with poorer parent and teacher reported social behavior [e.g., a 10-fold DAP increase was associated with a 2.7-point increase (95% confidence interval (CI): 0.9, 4.5) in parent-reported Social Responsiveness Scale, Version 2, T-scores at age 14]. We did not find clear evidence of associations between residential proximity to OP use during pregnancy and ASD-related traits.

Conclusions

These findings contribute mixed evidence linking OP pesticide exposures with traits related to developmental disorders like ASD. Subtle pesticide-related effects on ASD-related traits among a population with ubiquitous exposure could result in a rise in cases of clinically diagnosed disorders like ASD. https://doi.org/10.1289/EHP2580.","hji,kes",0,0,0,2,0,NA,NA +29703719,Sialic Acid Blockade Suppresses Tumor Growth by Enhancing T-cell-Mediated Tumor Immunity.,"Sialic acid sugars on the surface of cancer cells have emerged as potent immune modulators that contribute to the immunosuppressive microenvironment and tumor immune evasion. However, the mechanisms by which these sugars modulate antitumor immunity as well as therapeutic strategies directed against them are limited. Here we report that intratumoral injections with a sialic acid mimetic Ac53FaxNeu5Ac block tumor sialic acid expression in vivo and suppress tumor growth in multiple tumor models. Sialic acid blockade had a major impact on the immune cell composition of the tumor, enhancing tumor-infiltrating natural killer cell and CD8+ T-cell numbers while reducing regulatory T-cell and myeloid regulatory cell numbers. Sialic acid blockade enhanced cytotoxic CD8+ T-cell-mediated killing of tumor cells in part by facilitating antigen-specific T-cell-tumor cell clustering. Sialic acid blockade also synergized with adoptive transfer of tumor-specific CD8+ T cells in vivo and enhanced CpG immune adjuvant therapy by increasing dendritic cell activation and subsequent CD8+ T-cell responses. Collectively, these data emphasize the crucial role of sialic acids in tumor immune evasion and provide proof of concept that sialic acid blockade creates an immune-permissive tumor microenvironment for CD8+ T-cell-mediated tumor immunity, either as single treatment or in combination with other immune-based intervention strategies.Significance: Sialic acid sugars function as important modulators of the immunosuppressive tumor microenvironment that limit potent antitumor immunity.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/78/13/3574/F1.large.jpg Cancer Res; 78(13); 3574-88. 2018 AACR.","hji,kes",0,0,0,2,0,NA,NA +29729661,"A Longitudinal Study Examining Changes in Street Connectivity, Land Use, and Density of Dwellings and Walking for Transport in Brisbane, Australia.","

Background

Societies face the challenge of keeping people active as they age. Walkable neighborhoods have been associated with physical activity, but more rigorous analytical approaches are needed.

Objectives

We used longitudinal data from adult residents of Brisbane, Australia (40-65 years of age at baseline) to estimate effects of changes in neighborhood characteristics over a 6-y period on the likelihood of walking for transport.

Methods

Analyses included 2,789-9,747 How Areas Influence Health and Activity (HABITAT) cohort participants from 200 neighborhoods at baseline (2007) who completed up to three follow-up questionnaires (through 2013). Principal components analysis was used to derive a proxy measure of walkability preference. Environmental predictors were changes in street connectivity, residential density, and land use mix within a one-kilometer network buffer. Associations with any walking and minutes of walking were estimated using logistic and linear regression, including random effects models adjusted for time-varying confounders and a measure of walkability preference, and fixed effects models of changes in individuals to eliminate confounding by time-invariant characteristics.

Results

Any walking for transport (vs. none) was increased in association with an increase in street connectivity (+10 intersections, fixed effects OR=1.19; 95% confidence interval (CI): 1.07, 1.32), residential density (+5 dwellings/hectare, OR=1.10; 95% CI: 1.05, 1.15), and land-use mix (10% increase, OR=1.12; 95% CI: 1.00, 1.26). Associations with minutes of walking were positive based on random effects models, but null for fixed effects models. The association between land-use mix and any walking appeared to be limited to participants in the highest tertile of increased street connectivity (fixed effects OR=1.17; 95% CI: 0.99, 1.35 for a 1-unit increase in land-use mix; interaction p-value=0.05).

Conclusions

Increases in street connectivity, residential density, and land-use heterogeneity were associated with walking for transport among middle-age residents of Brisbane, Australia. https://doi.org/10.1289/EHP2080.","hji,kes",0,0,0,2,0,NA,NA +29738769,Neutrophils infiltrating pancreatic ductal adenocarcinoma indicate higher malignancy and worse prognosis.,"CD177 is considered to represent neutrophils. We analyzed mRNA expression level of CD177 and clinical follow-up survey of PDAC to estimate overall survival (OS) from Gene Expression Omnibus (GEO) dataset (GSE21501, containing samples from 102 PDAC patients) by R2 platform (http://r2.amc.nl). We also analyzed correlated genes of CD177 by Gene Ontology (GO) and Kyoto Encyclopedia of Genes and Genomes (KEGG) analysis to predict the potential relationship between neutrophils and prognosis of PDAC. We then performed hematoxylin and eosin (H&E) staining and immunohistochemical staining of surgical specimens to verify infiltration of neutrophils in PDAC tissues. After analyzing mRNA expression data and clinical follow-up survey provided in the GEO dataset (GSE21501, containing samples from 102 PDAC patients) and clinicopathological data of 23 PDAC patients, we demonstrated that CD177 was correlated with poor prognosis. The univariate Kaplan-Meier survival analysis revealed that OS was inversely associated with increased expression of CD177 (P=0.012). Expression of phosphodiesterase (PDE)4D was positively related to CD177 in gene correlation analysis (R=0.413, P<0.001) by R2 platform. H&E staining and immunohistochemistry of CD177 in 23 PDAC surgical samples showed accumulation of neutrophils in the stroma and blood vessels around the cancer cells. In addition, immunohistochemical staining showed that CD177 was highly expressed in the stroma and blood vessels around tumor tissues of PDAC, which was similar to H&E staining. Expression of CD177 can be used to represent infiltration of neutrophils, which may have potential prognostic value in PDAC.","hji,kes",0,0,0,2,0,NA,NA +29743730,Evaluating soil moisture retrievals from ESA's SMOS and NASA's SMAP brightness temperature datasets.,"Two satellites are currently monitoring surface soil moisture (SM) using L-band observations: SMOS (Soil Moisture and Ocean Salinity), a joint ESA (European Space Agency), CNES (Centre national d'tudes spatiales), and CDTI (the Spanish government agency with responsibility for space) satellite launched on November 2, 2009 and SMAP (Soil Moisture Active Passive), a National Aeronautics and Space Administration (NASA) satellite successfully launched in January 2015. In this study, we used a multilinear regression approach to retrieve SM from SMAP data to create a global dataset of SM, which is consistent with SM data retrieved from SMOS. This was achieved by calibrating coefficients of the regression model using the CATDS (Centre Aval de Traitement des Donnes) SMOS Level 3 SM and the horizontally and vertically polarized brightness temperatures (TB) at 40 incidence angle, over the 2013 - 2014 period. Next, this model was applied to SMAP L3 TB data from Apr 2015 to Jul 2016. The retrieved SM from SMAP (referred to here as SMAP_Reg) was compared to: (i) the operational SMAP L3 SM (SMAP_SCA), retrieved using the baseline Single Channel retrieval Algorithm (SCA); and (ii) the operational SMOSL3 SM, derived from the multiangular inversion of the L-MEB model (L-MEB algorithm) (SMOSL3). This inter-comparison was made against in situ soil moisture measurements from more than 400 sites spread over the globe, which are used here as a reference soil moisture dataset. The in situ observations were obtained from the International Soil Moisture Network (ISMN; https://ismn.geo.tuwien.ac.at/) in North of America (PBO_H2O, SCAN, SNOTEL, iRON, and USCRN), in Australia (Oznet), Africa (DAHRA), and in Europe (REMEDHUS, SMOSMANIA, FMI, and RSMN). The agreement was analyzed in terms of four classical statistical criteria: Root Mean Squared Error (RMSE), Bias, Unbiased RMSE (UnbRMSE), and correlation coefficient (R). Results of the comparison of these various products with in situ observations show that the performance of both SMAP products i.e. SMAP_SCA and SMAP_Reg is similar and marginally better to that of the SMOSL3 product particularly over the PBO_H2O, SCAN, and USCRN sites. However, SMOSL3 SM was closer to the in situ observations over the DAHRA and Oznet sites. We found that the correlation between all three datasets and in situ measurements is best (R > 0.80) over the Oznet sites and worst (R = 0.58) over the SNOTEL sites for SMAP_SCA and over the DAHRA and SMOSMANIA sites (R= 0.51 and R= 0.45 for SMAP_Reg and SMOSL3, respectively). The Bias values showed that all products are generally dry, except over RSMN, DAHRA, and Oznet (and FMI for SMAP_SCA). Finally, our analysis provided interesting insights that can be useful to improve the consistency between SMAP and SMOS datasets.","hji,kes",0,0,0,2,0,NA,NA +29746699,RepeatsDB-lite: a web server for unit annotation of tandem repeat proteins.,"RepeatsDB-lite (http://protein.bio.unipd.it/repeatsdb-lite) is a web server for the prediction of repetitive structural elements and units in tandem repeat (TR) proteins. TRs are a widespread but poorly annotated class of non-globular proteins carrying heterogeneous functions. RepeatsDB-lite extends the prediction to all TR types and strongly improves the performance both in terms of computational time and accuracy over previous methods, with precision above 95% for solenoid structures. The algorithm exploits an improved TR unit library derived from the RepeatsDB database to perform an iterative structural search and assignment. The web interface provides tools for analyzing the evolutionary relationships between units and manually refine the prediction by changing unit positions and protein classification. An all-against-all structure-based sequence similarity matrix is calculated and visualized in real-time for every user edit. Reviewed predictions can be submitted to RepeatsDB for review and inclusion.","hji,kes",0,0,0,2,0,NA,"affliated with a DB, but a server for it…" +29753646,Network Visualization and Analysis of Spatially Aware Gene Expression Data with InsituNet.,"In situ sequencing methods generate spatially resolved RNA localization and expression data at an almost single-cell resolution. Few methods, however, currently exist to analyze and visualize the complex data that is produced, which can encode the localization and expression of a million or more individual transcripts in a tissue section. Here, we present InsituNet, an application that converts in situ sequencing data into interactive network-based visualizations, where each unique transcript is a node in the network and edges represent the spatial co-expression relationships between transcripts. InsituNet is available as an app for the Cytoscape platform at http://apps.cytoscape.org/apps/insitunet. InsituNet enables the analysis of the relationships that existbetween these transcripts and can uncover how spatial co-expression profiles change in different regions of the tissue or across different tissue sections.","hji,kes",0,0,0,2,0,NA,NA +29757429,AlloFinder: a strategy for allosteric modulator discovery and allosterome analyses.,"Allostery tweaks innumerable biological processes and plays a fundamental role in human disease and drug discovery. Exploration of allostery has thus been regarded as a crucial requirement for research on biological mechanisms and the development of novel therapeutics. Here, based on our previously developed allosteric data and methods, we present an interactive platform called AlloFinder that identifies potential endogenous or exogenous allosteric modulators and their involvement in human allosterome. AlloFinder automatically amalgamates allosteric site identification, allosteric screening and allosteric scoring evaluation of modulator-protein complexes to identify allosteric modulators, followed by allosterome mapping analyses of predicted allosteric sites and modulators in human proteome. This web server exhibits prominent performance in the reemergence of allosteric metabolites and exogenous allosteric modulators in known allosteric proteins. Specifically, AlloFinder enables identification of allosteric metabolites for metabolic enzymes and screening of potential allosteric compounds for disease-related targets. Significantly, the feasibility of AlloFinder to discover allosteric modulators was tested in a real case of signal transduction and activation of transcription 3 (STAT3) and validated by mutagenesis and functional experiments. Collectively, AlloFinder is expected to contribute to exploration of the mechanisms of allosteric regulation between metabolites and metabolic enzymes, and to accelerate allosteric drug discovery. The AlloFinder web server is freely available to all users at http://mdl.shsmu.edu.cn/ALF/.","hji,kes",0,0,0,2,0,NA,NA +29762724,"AAI-profiler: fast proteome-wide exploratory analysis reveals taxonomic identity, misclassification and contamination.","We present AAI-profiler, a web server for exploratory analysis and quality control in comparative genomics. AAI-profiler summarizes proteome-wide sequence search results to identify novel species, assess the need for taxonomic reclassification and detect multi-isolate and contaminated samples. AAI-profiler visualises results using a scatterplot that shows the Average Amino-acid Identity (AAI) from the query proteome to all similar species in the sequence database. Taxonomic groups are indicated by colour and marker styles, making outliers easy to spot. AAI-profiler uses SANSparallel to perform high-performance homology searches, making proteome-wide analysis possible. We demonstrate the efficacy of AAI-profiler in the discovery of a close relationship between two bacterial symbionts of an omnivorous pirate bug (Orius) and a thrip (Frankliniella occidentalis), an important pest in agriculture. The symbionts represent novel species within the genus Rosenbergiella so far described only in floral nectar. AAI-profiler is easy to use, the analysis presented only required two mouse clicks and was completed in a few minutes. AAI-profiler is available at http://ekhidna2.biocenter.helsinki.fi/AAI.","hji,kes",0,0,0,2,0,NA,NA +29775639,Surgical Management of Lower Urinary Tract Symptoms Attributed to Benign Prostatic Hyperplasia: AUA Guideline.,"PURPOSE:Male lower urinary tract symptoms (LUTS) secondary to benign prostatic hyperplasia (BPH) is common in men and can have negative effects on quality of life (QoL). It is the hope that this Guideline becomes a reference on the effective evidence-based surgical management of LUTS/BPH. MATERIALS AND METHODS:The evidence team searched Ovid MEDLINE, the Cochrane Library, and the Agency for Healthcare Research and Quality (AHRQ) database to identify studies indexed between January 2007 and September 2017. When sufficient evidence existed, the body of evidence was assigned a strength rating of A (high), B (moderate), or C (low) for support of Strong, Moderate, or Conditional Recommendations. In the absence of sufficient evidence, additional information is provided as Clinical Principles and Expert Opinions (table 1 in supplementary unabridged guideline, http://jurology.com/). RESULTS:This Guideline provides updated, evidence-based recommendations regarding management of LUTS/BPH utilizing surgery and minimally invasive surgical therapies; additional statements are made regarding diagnostic and pre-operative tests. Clinical statements are made in comparison to what is generally accepted as the gold standard (i.e. transurethral resection of the prostate [TURP]-monopolar and/or bipolar). This guideline is designed to be used in conjunction with the associated treatment algorithm. CONCLUSIONS:The prevalence and the severity of LUTS increases as men age and is an important diagnosis in the healthcare of patients and the welfare of society. This document will undergo additional literature reviews and updating as the knowledge regarding current treatments and future surgical options continues to expand.","hji,kes",0,0,0,2,0,NA,NA +29785409,De novo genome and transcriptome resources of the Adzuki bean borer Ostrinia scapulalis (Lepidoptera: Crambidae).,"We present a draft genome assembly with a de novo prediction and automated functional annotation of coding genes, and a reference transcriptome of the Adzuki bean borer, Ostrinia scapulalis, based on RNA sequencing of various tissues and developmental stages. The genome assembly spans 419Mb, has a GC content of 37.4% and includes 26,120 predicted coding genes. The reference transcriptome holds 33,080 unigenes and contains a high proportion of a set of genes conserved in eukaryotes and arthropods, used as quality assessment of the reconstructed transcripts. The new genomic and transcriptomic data presented here significantly enrich the public sequence databases for the Crambidae and Lepidoptera, and represent useful resources for future researches related to the evolution and the adaptation of phytophagous moths. The genome and transcriptome assemblies have been deposited and made accessible via a NCBI BioProject (id PRJNA390510) and the LepidoDB database (http://bipaa.genouest.org/sp/ostrinia_scapulalis/).","hji,kes",0,0,0,2,0,NA,data deposited as referenced +29804401,[Analysis of significant microRNA associated with chronic thromboembolic pulmonary hypertension].,"Objective: To find key microRNA (miR) associated with chronic thromboembolic pulmonary hypertension (CTEPH). Methods: Affymetrix miR microarray data and GSE56914 data downloaded from GEO database (http: //www.ncbi.nlm.nih.gov/geo/) were obtained and integrated. The microarray data were obtained from peripheral blood samples of CTEPH patients and the matched control. Differentially expressed miRs were screened. Target genes of these miRs were searched. Then, functional enrichment analyses for these miRs were performed. After that, disease network including miRs, target genes and pathways was constructed. Results: Five important miRs including hsa-miR-885-5p, hsa-miR-501-5p, hsa-miR-615-3p, hsa-miR-610, and hsa-miR-346 were identified. Furthermore, hsa-miR-885-5p and hsa-miR-501-5p were significantly enriched in cell cycle pathway. Hsa-miR-615-3p was involved in cytokine-cytokine receptor interaction, axon guidance, focal adhesion and cell cycle pathway. Hsa-miR-610 was significantly enriched in focal adhesion pathway, and hsa-miR-346 was involved in cytokine-cytokine receptor interaction, axon guidance, and focal adhesion pathway. Conclusions: Hsa-miR-885-5p, hsa-miR-501-5p, hsa-miR-615-3p, hsa-miR-610 and hsa-miR-346 are important miRs for the development of CTEPH.","hji,kes",0,0,0,2,0,NA,NA +29806693,Neuronal calcineurin transcriptional targets parallel changes observed in Alzheimer disease brain.,"Synaptic dysfunction and loss are core pathological features in Alzheimer disease (AD). In the vicinity of amyloid- plaques in animal models, synaptic toxicity occurs and is associated with chronic activation of the phosphatase calcineurin (CN). Indeed, pharmacological inhibition of CN blocks amyloid- synaptotoxicity. We therefore hypothesized that CN-mediated transcriptional changes may contribute to AD neuropathology and tested this by examining the impact of CN over-expression on neuronal gene expression invivo. We found dramatic transcriptional down-regulation, especially of synaptic mRNAs, in neurons chronically exposed to CN activation. Importantly, the transcriptional profile parallels the changes in human AD tissue. Bioinformatics analyses suggest that both nuclear factor of activated T cells and numerous microRNAs may all be impacted by CN, andparallel findings are observed in AD. These data and analyses support the hypothesis that at least part of the synaptic failure characterizing AD may result from aberrant CN activation leading to down-regulation of synaptic genes, potentially via activation of specific transcription factors and expression of repressive microRNAs.

Open practices

Open Science: This manuscript was awarded with the Open Materials Badge. For more information see: https://cos.io/our-services/open-science-badges/ Read the Editorial Highlight for this article on page8.","hji,kes",0,0,0,2,0,NA,NA +29843602,"Combining RNA-seq data and homology-based gene prediction for plants, animals and fungi.","

Background

Genome annotation is of key importance in many research questions. The identification of protein-coding genes is often based on transcriptome sequencing data, ab-initio or homology-based prediction. Recently, it was demonstrated that intron position conservation improves homology-based gene prediction, and that experimental data improves ab-initio gene prediction.

Results

Here, we present an extension of the gene prediction program GeMoMa that utilizes amino acid sequence conservation, intron position conservation and optionally RNA-seq data for homology-based gene prediction. We show on published benchmark data for plants, animals and fungi that GeMoMa performs better than the gene prediction programs BRAKER1, MAKER2, and CodingQuarry, and purely RNA-seq-based pipelines for transcript identification. In addition, we demonstrate that using multiple reference organisms may help to further improve the performance of GeMoMa. Finally, we apply GeMoMa to four nematode species and to the recently published barley reference genome indicating that current annotations of protein-coding genes may be refined using GeMoMa predictions.

Conclusions

GeMoMa might be of great utility for annotating newly sequenced genomes but also for finding homologs of a specific gene or gene family. GeMoMa has been published under GNU GPL3 and is freely available at http://www.jstacs.de/index.php/GeMoMa .","hji,kes",0,0,0,2,0,NA,NA +29956198,Prognostic Impact of Extracapsular Lymph Node Invasion on Survival in Non-small-Cell Lung Cancer: A Systematic Review and Meta-analysis.,"The extracapsular tumor extension (ECE) of nodal metastasis is an important prognostic factor in different types of malignancies. However, there is a lack of recent data in patients with non-small-cell lung cancer (NSCLC). In addition, the TNM staging system does not include ECE status as a prognostic factor. This systematic review and meta-analysis has been conducted to summarize and pool existing data to determine the prognostic role of ECE in patients with lymph node-positive NSCLC. Two authors performed an independent search in PubMed using a predefined keyword list, without language restrictions with publication date since 1990. Prospective or retrospective studies reporting data on prognostic parameters in subjects with NSCLC with positive ECE or with only intracapsular lymph node metastasis were retrieved. Data were summarized using risk ratios (RR) for the survival with 95% confidence intervals (CI). The data was analyzed using Mix 2 (ref: Bax L: MIX 2.0 - Professional software for meta-analysis in Excel. Version 2.015. BiostatXL, 2016. https://www.meta-analysis-made-easy.com ). There 2,105 studies were reviewed. Five studies covering a total of 828 subjects met the inclusion criteria and were included in the meta-analysis. Two hundred and ninety-eight (35.9%) patients were categorized as ECE+, of whom 54 (18.1%) survived at the end of follow-up. In the ECE-negative group, 257 patients (48.4%) survived by the end of follow-up. Thus, ECE status is associated with a significantly decreased survival rate: pooled RR 0.45 (95% CI 0.35-0.59), Q (4)=4.06, P value=0.39, and I 2=68.00% (95 CI 0.00-79.55%). In conclusion, ECE has a significant impact on survival in NSCLC patients and should be considered in diagnostic and therapeutic decisions in addition to the current TNM staging. Postoperative radiotherapy may be an option in ECE-positive pN1 NSCLC patients.","hji,kes",0,0,0,2,0,NA,NA +29983488,"LSAT: Liliaceae Simple Sequences Analysis Tool, a web server.","LSAT is a web-based microsatellite SSR marker designer tool specific for the Liliaceae family. It is developed using HTML, CSS, PHP, Perl and Java scripts. It works without extra add-ons on standard browsers. LSAT provides SSR primer designing service using the web interface. It helps in SSR mining and primer design. LSAT is user friendly with customizable search parameters producing visual output having download options. The current version of LSAT is backed by two data sets, namely, lily EST (Expressed Sequence Tag) from NCBI and lily nr (non redundant) with 4,099 and 216,768 unigenes, respectively. LSAT will be updated regularly upon availability of additional data (either EST and/or transcriptome) on Liliaceae.

Availability

LSAT is available for free at http://210.110.86.160/Lsat/Lsat.html.","hji,kes",0,0,0,2,0,NA,data no unique +29983907,Rapid onset of action and reduced nasal hyperreactivity: new targets in allergic rhinitis management.,"

Background

This article summarizes a EUFOREA symposium, presented during the European Rhinology Research Forum in Brussels (9-10 November 2017; https://www.rhinologyresearch.eu/) which focused on novel pathways and therapeutic approaches in allergic rhinitis (AR).

Main body

AR remains under-diagnosed, under-estimated and under-treated. A key component in understanding the AR landscape has been the realization of a significant mismatch between how physicians instruct AR patients to manage their disease and what AR patients actually do in real life. Data from the Allergy Diary (developed by MACVIA ARIA) showed that AR patients take their medication prn, rapidly switch treatments, often experience poor control, use multiple therapies and stop treatment when symptoms are controlled. Better control of AR may be achievable by using an AR treatment which has a rapid onset of action and which effectively targets breakthrough symptoms. Indeed, AR patients report complete symptom relief, lack of breakthrough symptoms, rapid onset of action, safety and use on an 'as needed' basis as key targets for new nasal sprays. MP-AzeFlu comprises intranasal azelastine and fluticasone propionate (FP) in a novel formulation delivered in a single device. It is the first AR treatment to break the 5min onset of action threshold and provides clinically relevant symptom relief in 15min, much faster than that noted for FP + oral loratadine. MP-AzeFlu also significantly reduces nasal hyperresponsiveness (NHR) which may be responsible for the breakthrough symptoms frequently reported by AR patients. Mechanisms underlying MP-AzeFlu's effect include inhibition of mast cell degranulation, stabilization of the mucosal barrier, synergistic inhibition of inflammatory cell recruitment and a unique desensitization of sensory neurons expressing the transient receptor potential A1 and V1 channels.

Conclusion

With the most rapid onset of action and onset of clinically-relevant effect of any AR medication currently available, and proven efficacy in the treatment of NHR, MP-AzeFlu is an AR treatment which provides what patients want, and fits how patients manage their AR in real life.","hji,kes",0,0,0,2,0,NA,NA +29985974,Noise peak filtering in multi-dimensional NMR spectra using convolutional neural networks.,"

Motivation

Multi-dimensional NMR spectra are generally used for NMR signal assignment and structure analysis. There are several programs that can achieve highly automated NMR signal assignments and structure analysis. On the other hand, NMR spectra tend to have a large number of noise peaks even for data acquired with good sample and machine conditions, and it is still difficult to eliminate these noise peaks.

Results

We have developed a method to eliminate noise peaks using convolutional neural networks, implemented in the program package Filt_Robot. The filtering accuracy of Filt_Robot was around 90-95% when applied to 2D and 3D NMR spectra, and the numbers of resulting non-noise peaks were close to those in corresponding manually prepared peaks lists. The filtering can strongly enhance automated NMR spectra analysis.

Availability and implementation

The full package of the program, documents and example data are available from http://bmrbdep.pdbj.org/en/nmr_tool_box/Filt_Robot.html.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +29993817,EBWS: Essential Bioinformatics Web Services for Sequence Analyses.,"The Essential Bioinformatics Web Services (EBWS) are implemented on a new PHP-based server that provides useful tools for analyses of DNA, RNA, and protein sequences applying a user-friendly interface. Nine Web-based applets are currently available on the Web server. They include reverse complementary DNA and random DNA/RNA/peptide oligomer generators, a pattern sequence searcher, a DNA restriction cutter, a prokaryotic ORF finder, a random DNA/RNA mutation generator. It also includes calculators of melting temperature (TM) of DNA/DNA, RNA/RNA, and DNA/RNA hybrids, a guide RNA (gRNA) generator for the CRISPR/Cas9 system and an annealing temperature calculator for multiplex PCR. The pattern-searching applet has no limitations in the number of motif inputs and applies a toolbox of Regex quantifiers that can be used for defining complex sequence queries of RNA, DNA, and protein sequences. The DNA enzyme digestion program utilizes a large database of 1502 restriction enzymes. The gRNA generator has a database of 25 bacterial genomes searchable for gRNA target sequences and has an option for searching in any genome sequence given by the user. All programs are permanently available online at http://penchovsky.atwebpages.com/applications.php without any restrictions.","hji,kes",0,0,0,2,0,NA,NA +30026590,Adult energy requirements predicted from doubly labeled water.,"

Background

Estimating energy requirements forms an integral part of developing diet and activity interventions. Current estimates often rely on a product of physical activity level (PAL) and a resting metabolic rate (RMR) prediction. PAL estimates, however, typically depend on subjective self-reported activity or a clinician's best guess. Energy-requirement models that do not depend on an input of PAL may provide an attractive alternative.

Methods

Total daily energy expenditure (TEE) measured by doubly labeled water (DLW) and a metabolic chamber from 119 subjects obtained from a database of pre-intervention measurements measured at Pennington Biomedical Research Center were used to develop a metabolic ward and free-living models that predict energy requirements. Graded models, including different combinations of input variables consisting of age, height, weight, waist circumference, body composition, and the resting metabolic rate were developed. The newly developed models were validated and compared to three independent databases.

Results

Sixty-four different linear and nonlinear regression models were developed. The adjusted R2 for models predicting free-living energy requirements ranged from 0.65 with covariates of age, height, and weight to 0.74 in models that included body composition and RMR. Independent validation R2 between actual and predicted TEE varied greatly across studies and between genders with higher coefficients of determination, lower bias, slopes closer to 1, and intercepts closer to zero, associated with inclusion of body composition and RMR covariates. The models were programmed into a user-friendly web-based app available at: http://www.pbrc.edu/research-and-faculty/calculators/energy-requirements/ (Video Demo for Reviewers at: https://www.youtube.com/watch?v=5UKjJeQdODQ ) CONCLUSIONS: Energy-requirement equations that do not require knowledge of activity levels and include all available input variables can provide more accurate baseline estimates. The models are clinically accessible through the web-based application.","hji,kes",0,0,0,2,0,NA,NA +30052762,WAVES: a web application for versatile enhanced bioinformatic services.,"

Summary

WAVES is a web application dedicated to bioinformatic tool integration. It provides an efficient way to implement a service for any bioinformatic software. Such services are automatically made available in three ways: web pages, web forms to include in remote websites and a RESTful web services single application programing interface to access remotely from applications. In order to fulfill the service's computational needs, WAVES can perform computation on various resources and environments, such as Galaxy instances.

Availability and implementation

WAVES was developed with Django, a Python-based web framework. It was designed as a reusable web application. It is fully portable, as only a Python installation is required to run Django. It is licensed under GNU General Public License. Source code, documentation with examples and demo are available from http://www.atgc-montpellier.fr/waves/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +30124903,A case study evaluating the portability of an executable computable phenotype algorithm across multiple institutions and electronic health record environments.,"Electronic health record (EHR) algorithms for defining patient cohorts are commonly shared as free-text descriptions that require human intervention both to interpret and implement. We developed the Phenotype Execution and Modeling Architecture (PhEMA, http://projectphema.org) to author and execute standardized computable phenotype algorithms. With PhEMA, we converted an algorithm for benign prostatic hyperplasia, developed for the electronic Medical Records and Genomics network (eMERGE), into a standards-based computable format. Eight sites (7 within eMERGE) received the computable algorithm, and 6 successfully executed it against local data warehouses and/or i2b2 instances. Blinded random chart review of cases selected by the computable algorithm shows PPV =90%, and 3 out of 5 sites had >90% overlap of selected cases when comparing the computable algorithm to their original eMERGE implementation. This case study demonstrates potential use of PhEMA computable representations to automate phenotyping across different EHR systems, but also highlights some ongoing challenges.","hji,kes",0,0,0,2,0,NA,NA +30154154,TIP: A Web Server for Resolving Tumor Immunophenotype Profiling.,": Systematically tracking the tumor immunophenotype is required to understand the mechanisms of cancer immunity and improve clinical benefit of cancer immunotherapy. However, progress in current research is hindered by the lack of comprehensive immune activity resources and easy-to-use tools for biologists, clinicians, and researchers to conveniently evaluate immune activity during the """"cancer-immunity cycle."""" We developed a user-friendly one-stop shop web tool called TIP to comprehensively resolve tumor immunophenotype. TIP has the capability to rapidly analyze and intuitively visualize the activity of anticancer immunity and the extent of tumor-infiltrating immune cells across the seven-step cancer-immunity cycle. Also, we precalculated the pan-cancer immunophenotype for 11,373 samples from 33 The Cancer Genome Atlas human cancers that allow users to obtain and compare immunophenotype of pan-cancer samples. We expect TIP to be useful in a large number of emerging cancer immunity studies and development of effective immunotherapy biomarkers. TIP is freely available for use at http://biocc.hrbmu.edu.cn/TIP/. SIGNIFICANCE: TIP is a one-stop shop platform that can help biologists, clinicians, and researchers conveniently evaluate anticancer immune activity with their own gene expression data.See related commentary by Hirano, p. 6536.","hji,kes",0,0,0,2,0,NA,NA +30160308,The Effect of Endothelial Cells on UVB-induced DNA Damage and Transformation of Keratinocytes In 3D Polycaprolactone Scaffold Co-culture System.,"Nitric oxide ( NO ) plays an important role in the regulation of redox balance in keratinocytes post-UVB exposure. Since endothelial cells releases NO for a prolonged time post-UVB, we determined whether human umbilical vein endothelial cells (HUVEC) could have an effect on UVB-induced DNA damage and transformation of their adjacent keratinocytes (HaCaT) using a 3D cell co-culturing system. Our data show that the levels of DNA breaks and/or cyclobutane pyrimidine dimer (CPD) along with H2AX are higher in the co-cultured than in the mono-cultured keratinocytes post-UVB. The NO level in the co-cultured cells is increased approximately 3-fold more than in mono-cultured HaCaT cells within 1-hour post-UVB but then is reduced quickly in co-cultured HaCaT cells comparing to mono-cultured cells from 6 to 24 h post-UVB. However, the peroxynitrite (ONOO- ) level is higher in the co-cultured than in the mono-cultured HaCaT cells in whole period post-UVB. Furthermore, while expression level of inducible nitric oxide synthase (iNOS) is increased, the ratio of coupled/uncoupled eNOS is reduced in co-cultured HaCaT cells compared to mono-cultured HaCaT cells. Finally, the co-cultured cells have a significantly increased transformation efficiency after repeating UVB exposure compared to mono-culture HaCaT cells. Our results suggest that endothelial cells could enhance NO /ONOO- imbalance and promote transformation of adjacent keratinocytes.","hji,kes",0,0,0,2,0,NA,NA +30161123,Interactive implementations of thermodynamics-based RNA structure and RNA-RNA interaction prediction approaches for example-driven teaching.,"The investigation of RNA-based regulation of cellular processes is becoming an increasingly important part of biological or medical research. For the analysis of this type of data, RNA-related prediction tools are integrated into many pipelines and workflows. In order to correctly apply and tune these programs, the user has to have a precise understanding of their limitations and concepts. Within this manuscript, we provide the mathematical foundations and extract the algorithmic ideas that are core to state-of-the-art RNA structure and RNA-RNA interaction prediction algorithms. To allow the reader to change and adapt the algorithms or to play with different inputs, we provide an open-source web interface to JavaScript implementations and visualizations of each algorithm. The conceptual, teaching-focused presentation enables a high-level survey of the approaches, while providing sufficient details for understanding important concepts. This is boosted by the simple generation and study of examples using the web interface available at http://rna.informatik.uni-freiburg.de/Teaching/. In combination, we provide a valuable resource for teaching, learning, and understanding the discussed prediction tools and thus enable a more informed analysis of RNA-related effects.","hji,kes",0,0,0,2,0,NA,NA +30185806,Machine learning identified an Alzheimer's disease-related FDG-PET pattern which is also expressed in Lewy body dementia and Parkinson's disease dementia.,"Utilizing the publicly available neuroimaging database enabled by Alzheimer's disease Neuroimaging Initiative (ADNI; http://adni.loni.usc.edu/ ), we have compared the performance of automated classification algorithms that differentiate AD vs. normal subjects using Positron Emission Tomography (PET) with fluorodeoxyglucose (FDG). General linear model, scaled subprofile modeling and support vector machines were examined. Among the tested classification methods, support vector machine with Iterative Single Data Algorithm produced the best performance, i.e., sensitivity (0.84) specificity (0.95), by 10-fold cross-validation. We have applied the same classification algorithm to four different datasets from ADNI, Health Science Centre (Winnipeg, Canada), Dong-A University Hospital (Busan, S. Korea) and Asan Medical Centre (Seoul, S. Korea). Our data analyses confirmed that the support vector machine with Iterative Single Data Algorithm showed the best performance in prediction of future development of AD from the prodromal stage (mild cognitive impairment), and that it was also sensitive to other types of dementia such as Parkinson's Disease Dementia and Dementia with Lewy Bodies, and that perfusion imaging using single photon emission computed tomography may achieve a similar accuracy to that of FDG-PET.","hji,kes",0,0,0,2,0,NA,clinical +30200994,TAP: a targeted clinical genomics pipeline for detecting transcript variants using RNA-seq data.,"

Background

RNA-seq is a powerful and cost-effective technology for molecular diagnostics of cancer and other diseases, and it can reach its full potential when coupled with validated clinical-grade informatics tools. Despite recent advances in long-read sequencing, transcriptome assembly of short reads remains a useful and cost-effective methodology for unveiling transcript-level rearrangements and novel isoforms. One of the major concerns for adopting the proven de novo assembly approach for RNA-seq data in clinical settings has been the analysis turnaround time. To address this concern, we have developed a targeted approach to expedite assembly and analysis of RNA-seq data.

Results

Here we present our Targeted Assembly Pipeline (TAP), which consists of four stages: 1) alignment-free gene-level classification of RNA-seq reads using BioBloomTools, 2) de novo assembly of individual targets using Trans-ABySS, 3) alignment of assembled contigs to the reference genome and transcriptome with GMAP and BWA and 4) structural and splicing variant detection using PAVFinder. We show that PAVFinder is a robust gene fusion detection tool when compared to established methods such as Tophat-Fusion and deFuse on simulated data of 448 events. Using the Leucegene acute myeloid leukemia (AML) RNA-seq data and a set of 580 COSMIC target genes, TAP identified a wide range of hallmark molecular anomalies including gene fusions, tandem duplications, insertions and deletions in agreement with published literature results. Moreover, also in this dataset, TAP captured AML-specific splicing variants such as skipped exons and novel splice sites reported in studies elsewhere. Running time of TAP on 100-150 million read pairs and a 580-gene set is one to 2 hours on a 48-core machine.

Conclusions

We demonstrated that TAP is a fast and robust RNA-seq variant detection pipeline that is potentially amenable to clinical applications. TAP is available at http://www.bcgsc.ca/platform/bioinfo/software/pavfinder.","hji,kes",0,0,0,2,0,NA,NA +30202060,Terminal exon characterization with TECtool reveals an abundance of cell-specific isoforms.,"Sequencing of RNA 3' ends has uncovered numerous sites that do not correspond to the termination sites of known transcripts. Through their 3' untranslated regions, protein-coding RNAs interact with RNA-binding proteins and microRNAs, which regulate many properties, including RNA stability and subcellular localization. We developed the terminal exon characterization (TEC) tool ( http://tectool.unibas.ch ), which can be used with RNA-sequencing data from any species for which a genome annotation that includes sites of RNA cleavage and polyadenylation is available. We discovered hundreds of previously unknown isoforms and cell-type-specific terminal exons in human cells. Ribosome profiling data revealed that many of these isoforms were translated. By applying TECtool to single-cell sequencing data, we found that the newly identified isoforms were expressed in subpopulations of cells. Thus, TECtool enables the identification of previously unknown isoforms in well-studied cell systems and in rare cell types.","hji,kes",0,0,0,2,0,NA,NA +30203078,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients With Thoracolumbar Spine Trauma: Prophylaxis and Treatment of Thromboembolic Events.,"

Question 1

Does routine screening for deep venous thrombosis prevent pulmonary embolism (or venous thromboembolism (VTE)-associated morbidity and mortality) in patients with thoracic and lumbar fractures?

Recommendation 1

There is insufficient evidence to recommend for or against routine screening for deep venous thrombosis in preventing pulmonary embolism (or VTE-associated morbidity and mortality) in patients with thoracic and lumbar fractures. Strength of Recommendation: Grade Insufficient.

Question 2

For patients with thoracic and lumbar fractures, is one regimen of VTE prophylaxis superior to others with respect to prevention of pulmonary embolism (or VTE-associated morbidity and mortality)?

Recommendation 2

There is insufficient evidence to recommend a specific regimen of VTE prophylaxis to prevent pulmonary embolism (or VTE-associated morbidity and mortality) in patients with thoracic and lumbar fractures. Strength of Recommendation: Grade Insufficient.

Question 3

Is there a specific treatment regimen for documented VTE that provides fewer complications than other treatments in patients with thoracic and lumbar fractures?

Recommendation 3

There is insufficient evidence to recommend for or against a specific treatment regimen for documented VTE that would provide fewer complications than other treatments in patients with thoracic and lumbar fractures. Strength of Recommendation: Grade Insufficient.

Recommendation 4

Based on published data from pooled (cervical and thoracolumbar) spinal cord injury populations, the use of thromboprophylaxis is recommended to reduce the risk of VTE events in patients with thoracic and lumbar fractures. Consensus Statement by the Workgroup The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_7.","hji,kes",0,0,0,2,0,NA,NA +30219490,Changing labour market conditions during the 'great recession' and mental health in Scotland 2007-2011: an example using the Scottish Longitudinal Study and data for local areas in Scotland.,"This paper reports research exploring how trends in local labour market conditions during the period 2007-2011 (early stages of the 'great recession') relate to reported mental illness for individuals. It contributes to research on spatio-temporal variation in the wider determinants of health, exploring how the lifecourse of places relates to socio-geographical inequalities in health outcomes for individuals. This study also contributes to the renewed research focus on the links between labour market trends and population health, prompted by the recent global economic recession. We report research using the Scottish Longitudinal Study (SLS), a 5.3% representative sample of the Scottish population, derived from census data (https://sls.lscs.ac.uk/). In Scotland, (2011) census data include self-reported mental health. SLS data were combined with non-disclosive information from other sources, including spatio-temporal trends in labour market conditions (calculated using trajectory modelling) in the 32 local authority areas in Scotland. We show that, for groups of local authorities in Scotland over the period 2007-2011, trends in employment varied. These geographically variable trends in employment rates were associated with inequalities in self-reported mental health across the country, after controlling for a number of other individual and neighbourhood risk factors. For residents of regions that had experienced relatively high and stable levels of employment the odds ratio for reporting a mental illness was significantly lower than for the 'reference group', living in areas with persistently low employment rates. In areas where employment declined markedly from higher levels, the odds ratio was similar to the reference group. The findings emphasise how changes in local economic conditions may influence people's health and wellbeing independently of their own employment status. We conclude that, during the recent recession, the economic life course of places across Scotland has been associated with individual mental health outcomes.","hji,kes",0,0,0,2,0,NA,NA +30239574,A clustering linear combination approach to jointly analyze multiple phenotypes for GWAS.,"

Summary

There is an increasing interest in joint analysis of multiple phenotypes for genome-wide association studies (GWASs) based on the following reasons. First, cohorts usually collect multiple phenotypes and complex diseases are usually measured by multiple correlated intermediate phenotypes. Second, jointly analyzing multiple phenotypes may increase statistical power for detecting genetic variants associated with complex diseases. Third, there is increasing evidence showing that pleiotropy is a widespread phenomenon in complex diseases. In this paper, we develop a clustering linear combination (CLC) method to jointly analyze multiple phenotypes for GWASs. In the CLC method, we first cluster individual statistics into positively correlated clusters and then, combine the individual statistics linearly within each cluster and combine the between-cluster terms in a quadratic form. CLC is not only robust to different signs of the means of individual statistics, but also reduce the degrees of freedom of the test statistic. We also theoretically prove that if we can cluster the individual statistics correctly, CLC is the most powerful test among all tests with certain quadratic forms. Our simulation results show that CLC is either the most powerful test or has similar power to the most powerful test among the tests we compared, and CLC is much more powerful than other tests when effect sizes align with inferred clusters. We also evaluate the performance of CLC through a real case study.

Availability and implementation

R code for implementing our method is available at http://www.math.mtu.edu/~shuzhang/software.html.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +30299485,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guidelines on the Evaluation and Treatment of Patients With Thoracolumbar Spine Trauma: Novel Surgical Strategies.,"

Background

Treatment of thoracolumbar burst fractures has traditionally involved spinal instrumentation with fusion performed with standard open surgical techniques. Novel surgical strategies, including instrumentation without fusion and percutaneous instrumentation alone, have been considered less invasive and more efficient treatments.

Objective

To review the current literature and determine the role of fusion in instrumented fixation, as well as the role of percutaneous instrumentation, in the treatment of patients with thoracolumbar burst fractures.

Methods

The task force members identified search terms/parameters and a medical librarian implemented the literature search, consistent with the literature search protocol (see Appendix I), using the National Library of Medicine PubMed database and the Cochrane Library for the period from January 1, 1946 to March 31, 2015.

Results

A total of 906 articles were identified and 38 were selected for full-text review. Of these articles, 12 articles met criteria for inclusion in this systematic review.

Conclusion

There is grade A evidence for the omission of fusion in instrumented fixation for thoracolumbar burst fractures. There is grade B evidence that percutaneous instrumentation is as effective as open instrumentation for thoracolumbar burst fractures.

Question

Does the addition of arthrodesis to instrumented fixation improve outcomes in patients with thoracic and lumbar burst fractures?

Recommendation

It is recommended that in the surgical treatment of patients with thoracolumbar burst fractures, surgeons should understand that the addition of arthrodesis to instrumented stabilization has not been shown to impact clinical or radiological outcomes, and adds to increased blood loss and operative time. Strength of Recommendation: Grade A.

Question

How does the use of minimally invasive techniques (including percutaneous instrumentation) affect outcomes in patients undergoing surgery for thoracic and lumbar fractures compared to conventional open techniques?

Recommendation

Stabilization using both open and percutaneous pedicle screws may be considered in the treatment of thoracolumbar burst fractures as the evidence suggests equivalent clinical outcomes. Strength of Recommendation: Grade B The full version of the guideline can be reviewed at: https://www.cns.org/guideline-chapters/congress-neurological-surgeons-systematic-review-evidence-based-guidelines/chapter_12.","hji,kes",0,0,0,2,0,NA,NA +30302823,Overlapping clustering of gene expression data using penalized weighted normalized cut.,"Clustering has been widely conducted in the analysis of gene expression data. For complex diseases, it has played an important role in identifying unknown functions of genes, serving as the basis of other analysis, and others. A common limitation of most existing clustering approaches is to assume that genes are separated into disjoint clusters. As genes often have multiple functions and thus can belong to more than one functional cluster, the disjoint clustering results can be unsatisfactory. In addition, due to the small sample sizes of genetic profiling studies and other factors, there may not be sufficient evidence to confirm the specific functions of some genes and cluster them definitively into disjoint clusters. In this study, we develop an effective overlapping clustering approach, which takes account into the multiplicity of gene functions and lack of certainty in practical analysis. A penalized weighted normalized cut (PWNCut) criterion is proposed based on the NCut technique and an L 2 norm constraint. It outperforms multiple competitors in simulation. The analysis of the cancer genome atlas (TCGA) data on breast cancer and cervical cancer leads to biologically sensible findings which differ from those using the alternatives. To facilitate implementation, we develop the function pwncut in the R package NCutYX.","hji,kes",0,0,0,2,0,NA,NA +30314257,A first preliminary study of the shallow water sponge fauna from Cyprus Island (Eastern Mediterranean).,"Currently, more than 8,500 valid sponge species are reported in the World Porifera Database (http://www.marinespecies.org/porifera/) (van Soest et al. 2018). The Mediterranean Sea sponge fauna, counting almost 700 species, is one of the best documented in the world (Pronzato 2003; Pansini et al. 2011; van Soest et al. 2018) but the eastern part of the basin is by far less studied, in comparison with other Mediterranean areas (Pansini et al. 2000; Voultsiadou Vafidis 2004; Topaloglu Evcen 2014). A small number of species, mainly belonging to the cosmopolitan genus Spongia (Dictyoceratida), are commonly used as bath sponges. Aim of this work is to provide further information on Cyprus Island sponges in general and on species that had commercial importance in the past.","hji,kes",0,0,0,2,0,NA,not about the resource +30367593,On the impact of uncertain gene tree rooting on duplication-transfer-loss reconciliation.,"

Background

Duplication-Transfer-Loss (DTL) reconciliation is a powerful and increasingly popular technique for studying the evolution of microbial gene families. DTL reconciliation requires the use of rooted gene trees to perform the reconciliation with the species tree, and the standard technique for rooting gene trees is to assign a root that results in the minimum reconciliation cost across all rootings of that gene tree. However, even though it is well understood that many gene trees have multiple optimal roots, only a single optimal root is randomly chosen to create the rooted gene tree and perform the reconciliation. This remains an important overlooked and unaddressed problem in DTL reconciliation, leading to incorrect evolutionary inferences. In this work, we perform an in-depth analysis of the impact of uncertain gene tree rooting on the computed DTL reconciliation and provide the first computational tools to quantify and negate the impact of gene tree rooting uncertainty on DTL reconciliation.

Results

Our analysis of a large data set of over 4500 gene families from 100 species shows that a large fraction of gene trees have multiple optimal rootings, that these multiple roots often, but not always, appear closely clustered together in the same region of the gene tree, that many aspects of the reconciliation remain conserved across the multiple rootings, that gene tree error has a profound impact on the prevalence and structure of multiple optimal rootings, and that there are specific interesting patterns in the reconciliation of those gene trees that have multiple optimal roots.

Conclusions

Our results show that unrooted gene trees can be meaningfully reconciled and high-quality evolutionary information can be obtained from them even after accounting for multiple optimal rootings. In addition, the techniques and tools introduced in this paper make it possible to systematically avoid incorrect evolutionary inferences caused by incorrect or uncertain gene tree rooting. These tools have been implemented in the phylogenetic reconciliation software package RANGER-DTL 2.0, freely available from http://compbio.engr.uconn.edu/software/RANGER-DTL/ .","hji,kes",0,0,0,2,0,NA,NA +30376034,VarSome: the human genomic variant search engine.,"

Summary

VarSome.com is a search engine, aggregator and impact analysis tool for human genetic variation and a community-driven project aiming at sharing global expertise on human variants.

Availability and implementation

VarSome is freely available at http://varsome.com.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +30387741,A Note on GRegNetSim: A Tool for the Discrete Simulation and Analysis of Genetic Regulatory Networks.,"Discrete simulations of genetic regulatory networks were used to study subsystems of yeast successfully. However, implementations of existing models underlying these simulations do not support a graphic interface, and require computations necessary to analyze their results to be done manually. Furthermore, differences between existing models suggest that an enriched model, encompassing both existing models, is needed. We developed a software tool, GRegNetSim, that allows the end-user to describe genetic regulatory networks graphically. The user can specify various transition functions at different nodes of the network, supporting, for example, threshold and gradient effects, and then apply the network to a variety of inputs. GRegNetSim displays the relationship between the inputs and the mode of behavior of the network in a graphic form that is easy to interpret. Furthermore, it can automatically extract statistical data necessary to analyze the simulations. The discrete simulations performed by GRegNetSim can be used to elucidate and predict the behavior, structure and properties of genetic regulatory networks in a unified manner. GRegNetSim is implemented as a Cytoscape App. Installation files, examples and source code, along with a detailed user guide, are freely available at https://sites.google.com/site/gregnetsim/.","hji,kes",0,0,0,2,0,NA,NA +30411228,Steady Flow in a Patient-Averaged Inferior Vena Cava-Part I: Particle Image Velocimetry Measurements at Rest and Exercise Conditions.,"

Purpose

Although many previous computational fluid dynamics (CFD) studies have investigated the hemodynamics in the inferior vena cava (IVC), few studies have compared computational predictions to experimental data, and only qualitative comparisons have been made. Herein, we provide particle image velocimetry (PIV) measurements of flow in a patient-averaged IVC geometry under idealized conditions typical of those used in the preclinical evaluation of IVC filters.

Methods

Measurements are acquired under rest and exercise flow rate conditions in an optically transparent model fabricated using 3D printing. To ensure that boundary conditions are well-defined and to make follow-on CFD validation studies more convenient, fully-developed flow is provided at the inlets (i.e., the iliac veins) by extending them with straight rigid tubing longer than the estimated entrance lengths. Velocity measurements are then obtained at the downstream end of the tubing to confirm Poiseuille inflow boundary conditions.

Results

Measurements in the infrarenal IVC reveal that flow profiles are blunter in the sagittal plane (minor axis) than in the coronal plane (major axis). Peak in-plane velocity magnitudes are 4.9cm/s and 27cm/s under the rest and exercise conditions, respectively. Flow profiles are less parabolic and exhibit more inflection points at the higher flow rate. Bimodal velocity peaks are also observed in the sagittal plane at the elevated flow condition.

Conclusions

The IVC geometry, boundary conditions, and infrarenal velocity measurements are provided for download on a free and publicly accessible repositoryat https://doi.org/10.6084/m9.figshare.7198703 . These data will facilitate future CFD validation studies of idealized, in vitro IVC hemodynamics and of similar laminar flows in vascular geometries.","hji,kes",0,0,0,2,0,NA,NA +30422398,Structural basis for protein phosphatase 1 recruitment by glycogen-targeting subunits.,"The rate-limiting enzymes in glycogen metabolism are subject to regulation by reversible phosphorylation. The glycogen-targeted protein phosphatase 1 (PP1) holoenzyme catalyzes their dephosphorylation. It is composed of a catalytic subunit (PP1C) and a glycogen-targeting subunit (G subunit). To date, seven G subunits have been identified. They all contain an RVxF PP1C-binding motif. The interactions between this motif in the skeletal muscle-specific GM and PP1C have been revealed by structural studies. However, whether elements outside of this motif contribute to the interaction with PP1C is not clear. In this study, we found that residues next to the RVxF motif in GM also mediate interactions to PP1C and revealed the mechanism of the interaction by structural studies. Sequence analysis revealed that the PP1C-binding region in GM is highly conserved among G subunits. Consistently, we found that the equivalent region in the liver-enriched GL adopts a similar structure upon binding PP1C. Dephosphorylation experiments indicated that this region and the glycogen-binding region in GM cooperate to stimulate PP1C's activity toward glycogen-associated substrates. DATABASES: The structure factors and coordinates for the PP1Ca-GM (1-99) and PP1Ca-GL (31-105) complexes have been deposited into the Protein Data Bank (http://www.pdb.org), with the accession codes 5ZQV and 5ZT0, respectively.","hji,kes",0,0,0,2,0,NA,NA +30423080,DREAM-Yara: an exact read mapper for very large databases with short update time.,"

Motivation

Mapping-based approaches have become limited in their application to very large sets of references since computing an FM-index for very large databases (e.g. >10 GB) has become a bottleneck. This affects many analyses that need such index as an essential step for approximate matching of the NGS reads to reference databases. For instance, in typical metagenomics analysis, the size of the reference sequences has become prohibitive to compute a single full-text index on standard machines. Even on large memory machines, computing such index takes about 1 day of computing time. As a result, updates of indices are rarely performed. Hence, it is desirable to create an alternative way of indexing while preserving fast search times.

Results

To solve the index construction and update problem we propose the DREAM (Dynamic seaRchablE pArallel coMpressed index) framework and provide an implementation. The main contributions are the introduction of an approximate search distributor via a novel use of Bloom filters. We combine several Bloom filters to form an interleaved Bloom filter and use this new data structure to quickly exclude reads for parts of the databases where they cannot match. This allows us to keep the databases in several indices which can be easily rebuilt if parts are updated while maintaining a fast search time. The second main contribution is an implementation of DREAM-Yara a distributed version of a fully sensitive read mapper under the DREAM framework.

Availability and implementation

https://gitlab.com/pirovc/dream_yara/.","hji,kes",0,0,0,2,0,NA,NA +30457571,Generation and quality control of lipidomics data for the alzheimer's disease neuroimaging initiative cohort.,"Alzheimer's disease (AD) is a major public health priority with a large socioeconomic burden and complex etiology. The Alzheimer Disease Metabolomics Consortium (ADMC) and the Alzheimer Disease Neuroimaging Initiative (ADNI) aim to gain new biological insights in the disease etiology. We report here an untargeted lipidomics of serum specimens of 806 subjects within the ADNI1 cohort (188 AD, 392 mild cognitive impairment and 226 cognitively normal subjects) along with 83 quality control samples. Lipids were detected and measured using an ultra-high-performance liquid chromatography quadruple/time-of-flight mass spectrometry (UHPLC-QTOF MS) instrument operated in both negative and positive electrospray ionization modes. The dataset includes a total 513 unique lipid species out of which 341 are known lipids. For over 95% of the detected lipids, a relative standard deviation of better than 20% was achieved in the quality control samples, indicating high technical reproducibility. Association modeling of this dataset and available clinical, metabolomics and drug-use data will provide novel insights into the AD etiology. These datasets are available at the ADNI repository at http://adni.loni.usc.edu/.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +30463894,Predicting Antimicrobial Resistance and Associated Genomic Features from Whole-Genome Sequencing.,"Thanks to the genomics revolution, thousands of strain-specific whole-genome sequences are now accessible for a wide range of pathogenic bacteria. This availability enables big data informatics approaches to be used to study the spread and acquisition of antimicrobial resistance (AMR). In this issue of the Journal of Clinical Microbiology, Nguyen et al. (M. Nguyen, S. W. Long, P. F. McDermott, R. J. Olsen, R. Olson, R. L. Stevens, G. H. Tyson, S. Zhao, and J. J. Davis, J Clin Microbiol 57:e01260-18, 2019, https://doi.org/10.1128/JCM.01260-18) report the results obtained with their machine learning models based on whole-genome sequencing data to predict the MICs of antibiotics for 5,728 nontyphoidal Salmonella genomes collected over 15 years in the United States. Their major finding demonstrates that MICs can be predicted with an average accuracy of 95% within 1 2-fold dilution step (confidence interval, 95% to 95%), an average very major error rate of 2.7%, and an average major error rate of 0.1%. Importantly, these models predict MICs with no a priori information about the underlying gene content or resistance phenotypes of the strains, enabling the possibility to identify AMR determinants and rapidly diagnose and prioritize antibiotic use directly from the organism sequence. Employing such tools to diagnose and limit the spread of resistance-conferring mechanisms could help ameliorate the looming antibiotic resistance crisis.","hji,kes",0,0,0,2,0,NA,NA +30467523,gganatogram: An R package for modular visualisation of anatograms and tissues based on ggplot2.,"Displaying data onto anatomical structures is a convenient technique to quickly observe tissue related information. However, drawing tissues is a complex task that requires both expertise in anatomy and the arts. While web based applications exist for displaying gene expression on anatograms, other non-genetic disciplines lack similar tools. Moreover, web based tools often lack the modularity associated with packages in programming languages, such as R. Here I present gganatogram, an R package used to plot modular species anatograms based on a combination of the graphical grammar of ggplot2 and the publicly available anatograms from the Expression Atlas. This combination allows for quick and easy, modular, and reproducible generation of anatograms. Using only one command and a data frame with tissue name, group, colour, and value, this tool enables the user to visualise specific human and mouse tissues with desired colours, grouped by a variable, or displaying a desired value, such as gene-expression, pharmacokinetics, or bacterial load across selected tissues. gganatogram consists of 5 highly annotated organisms, male/female human/mouse, and a cell anatogram. It further consists of 24 other less annotated organisms from the animal and plant kingdom. I hope that this tool will be useful by the wider community in biological sciences. Community members are welcome to submit additional anatograms, which can be incorporated into the package. A stable version gganatogram has been deposited to neuroconductor, and a development version can be found on github/jespermaag/gganatogram. An interactive shiny app of gganatogram can be found on https://jespermaag.shinyapps.io/gganatogram/, which allows for non-R users to create anatograms.","hji,kes",0,0,0,2,0,NA,NA +30474154,Estimating cross-population genetic correlations of causal effect sizes.,"Recent studies have examined the genetic correlations of single-nucleotide polymorphism (SNP) effect sizes across pairs of populations to better understand the genetic architectures of complex traits. These studies have estimated g , the cross-population correlation of joint-fit effect sizes at genotyped SNPs. However, the value of g depends both on the cross-population correlation of true causal effect sizes ( b ) and on the similarity in linkage disequilibrium (LD) patterns in the two populations, which drive tagging effects. Here, we derive the value of the ratio g / b as a function of LD in each population. By applying existing methods to obtain estimates of g , we can use this ratio to estimate b . Our estimates of b were equal to 0.55 ( SE = 0.14) between Europeans and East Asians averaged across nine traits in the Genetic Epidemiology Research on Adult Health and Aging data set, 0.54 ( SE = 0.18) between Europeans and South Asians averaged across 13 traits in the UK Biobank data set, and 0.48 ( SE = 0.06) and 0.65 ( SE = 0.09) between Europeans and East Asians in summary statistic data sets for type 2 diabetes and rheumatoid arthritis, respectively. These results implicate substantially different causal genetic architectures across continental populations.","hji,kes",0,0,0,2,0,NA,NA +30476000,Integrate multiple traits to detect novel trait-gene association using GWAS summary data with an adaptive test approach.,"

Motivation

Genetics hold great promise to precision medicine by tailoring treatment to the individual patient based on their genetic profiles. Toward this goal, many large-scale genome-wide association studies (GWAS) have been performed in the last decade to identify genetic variants associated with various traits and diseases. They have successfully identified tens of thousands of disease-related variants. However they have explained only a small proportion of the overall trait heritability for most traits and are of very limited clinical use. This is partly owing to the small effect sizes of most genetic variants, and the common practice of testing association between one trait and one genetic variant at a time in most GWAS, even when multiple related traits are often measured for each individual. Increasing evidence suggests that many genetic variants can influence multiple traits simultaneously, and we can gain more power by testing association of multiple traits simultaneously. It is appealing to develop novel multi-trait association test methods that need only GWAS summary data, since it is generally very hard to access the individual-level GWAS phenotype and genotype data.

Results

Many existing GWAS summary data-based association test methods have relied on ad hoc approach or crude Monte Carlo approximation. In this article, we develop rigorous statistical methods for efficient and powerful multi-trait association test. We develop robust and efficient methods to accurately estimate the marginal trait correlation matrix using only GWAS summary data. We construct the principal component (PC)-based association test from the summary statistics. PC-based test has optimal power when the underlying multi-trait signal can be captured by the first PC, and otherwise it will have suboptimal performance. We develop an adaptive test by optimally weighting the PC-based test and the omnibus chi-square test to achieve robust performance under various scenarios. We develop efficient numerical algorithms to compute the analytical P-values for all the proposed tests without the need of Monte Carlo sampling. We illustrate the utility of proposed methods through application to the GWAS meta-analysis summary data for multiple lipids and glycemic traits. We identify multiple novel loci that were missed by individual trait-based association test.

Availability and implementation

All the proposed methods are implemented in an R package available at http://www.github.com/baolinwu/MTAR. The developed R programs are extremely efficient: it takes less than 2 min to compute the list of genome-wide significant single nucleotide polymorphisms (SNPs) for all proposed multi-trait tests for the lipids GWAS summary data with 2.5 million SNPs on a single Linux desktop.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +30505900,Morphological and molecular datasets for Kaempferia species.,"This study compared morphological and molecular data for identification of Kaempferia species. Each species was deposited in Institute of Bioscience (IBS), Universiti Putra Malaysia (UPM) as voucher specimens and ITS sequences of each species deposited in NCBI (https://www.ncbi.nlm.nih.gov/) as GenBank accessions. DNA was extracted using a modified CTAB method and PCR amplification was completed using Internal Transcribed Spacer (ITS4 and ITS5) markers. PCR amplification of products were viewed under gel electrophoresis. Sequencing was performed and sequence characteristics of ITS rDNA in Kaempferia is shown. Qualitative and qualitative scoring of morphological characters and measuring techniques for Kaempferia species are included. In addition, a brief review of molecular markers used in phylogenetic studies of Zingiberaceae is included in this dataset.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +30560246,Glioma and Alzheimer's Disease.,"Background:Cancer mortality and Alzheimer's disease (AD) mortality increase with age, but some studies have shown an inverse relationship of the two diseases, that is, older persons with cancer have a reduced risk of AD and vice versa. However, other analyses suggest that AD and brain tumor might be positively correlated. Objective:In the current study, we wished to determine the relationship of AD mortality to malignant brain tumor mortality in US states and counties. Methods:Data and maps of malignant brain tumor mortality and Alzheimer's disease mortality (1999-2016) are from the CDC Wonder tool (https://wonder.cdc.gov/cmf-icd10.html). Data on malignant brain tumor types and their frequencies are from the Surveillance, Epidemiology, and End Results Program (SEER, https://seer.cancer.gov). Data on the genetics of lower grade glioma are from the TCGA Lower Grade Glioma (LGG) dataset in TCGA (The Cancer Genome Atlas). Results:SEER data indicate that astrocytomas make up 58.2% of malignant brain tumors in patients 65 and older; glioblastoma and anaplastic astrocytoma make up 41.6%. We found a significant positive correlation between AD mortality rate and malignant brain tumor mortality rate 1999-2016 in persons age 65 and older in A) 1,101 US counties, p < 0.001 and B) 50 US states, p < 0.001. Conclusion:Adult malignant brain tumors may share some environmental risks with AD. Malignant brain tumors and AD also have some genes in common: TREM2, SPI1, CD33, and INPP5D. The interaction of environment and genetics is complex and overlaps in malignant brain tumors and AD.","hji,kes",0,0,0,2,0,NA,references other data resource +30567473,Cloud-BS: A MapReduce-based bisulfite sequencing aligner on cloud.,"In recent years, there have been many studies utilizing DNA methylome data to answer fundamental biological questions. Bisulfite sequencing (BS-seq) has enabled measurement of a genome-wide absolute level of DNA methylation at single-nucleotide resolution. However, due to the ambiguity introduced by bisulfite-treatment, the aligning process especially in large-scale epigenetic research is still considered a huge burden. We present Cloud-BS, an efficient BS-seq aligner designed for parallel execution on a distributed environment. Utilizing Apache Hadoop framework, Cloud-BS splits sequencing reads into multiple blocks and transfers them to distributed nodes. By designing each aligning procedure into separate map and reducing tasks while an internal key-value structure is optimized based on the MapReduce programming model, the algorithm significantly improves alignment performance without sacrificing mapping accuracy. In addition, Cloud-BS minimizes the innate burden of configuring a distributed environment by providing a pre-configured cloud image. Cloud-BS shows significantly improved bisulfite alignment performance compared to other existing BS-seq aligners. We believe our algorithm facilitates large-scale methylome data analysis. The algorithm is freely available at https://paryoja.github.io/Cloud-BS/ .","hji,kes",0,0,0,2,0,NA,NA +30572878,"Geographical mobility of UK trainee doctors, from family home to first job: a national cohort study.","

Background

The UK faces geographical variation in the recruitment of doctors. Understanding where medical graduates choose to go for training is important because doctors are more likely to consider practicing in areas where they completed postgraduate training. The wider literature also suggests that there is a relationship between origin and background, and where doctors wish to train/work. Thus, the purpose of this paper is to investigate the geographical mobility of UK medical graduates from different socio-economic groups in terms of where they wish to spend their first years of postgraduate training.

Methods

This was an observational study of Foundation Programme (FP) doctors who graduated from 33 UK medical schools between 2012 and 2014. Data was accessed via the UK medical education database (UKMED: https://www.ukmed.ac.uk/ ). Chi-square tests were used to examine the relationships between doctor's sociodemographic characteristics and the dependent variable, average driving time from parental home to foundation school/region. Generalised Linear Mixed Models (GLMM) were used to estimate the effects of those factors in combination against the outcome measure.

Results

The majority of doctors prefer to train at foundation schools that are reasonably close to the family home. Those who attended state-funded schools, from non-white ethnic groups and/or from lower socio-economic groups were significantly more likely to choose foundation schools nearer their parental home. Doctors from disadvantaged backgrounds (as determined by entitlement to free school meals, OR = 1.29, p= 0.003 and no parental degree, OR = 1.34, p< 0.001) were associated with higher odds of selecting a foundation schools that were closer to parental home.

Conclusion

The data suggests that recruiting medical students from lower socioeconomic groups and those who originate from under-recruiting areas may be at least part of the solution to filling training posts in these areas. This has obvious implications for the widening access agenda, and equitable distribution of health services.","hji,kes",0,0,0,2,0,NA,NA +30578582,Sex differences in the circulatory responses to an isocapnic cold pressor test.,"

New findings

What is the central question of this study? Do sex differences exist in the cardiorespiratory responses to an isocapnic cold pressor test (CPT)? What is the main finding and its importance? During the CPT, there were no sex differences in the respiratory response; however, females demonstrated a reduced mean arterial pressure and reduced dilatation of the common carotid artery. Given that the CPT is predictive of future cardiovascular events, these data have clinical implications for improving the utility of the CPT to determine cardiovascular health risk. Sex differences should be taken into consideration when conducting and interpreting a CPT.

Abstract

The cold pressor test (CPT) elicits a transient increase in sympathetic nervous activity, minute ventilation ( V E ), mean arterial pressure (MAP) and common carotid artery (CCA) diameter in healthy individuals. Although the extent of dilatation of the CCA in response to the CPT has been used as a clinical indicator of cardiovascular health status, the potential sex differences have yet to be explored. In response to a CPT, we hypothesized that elevations in V E and MAP and dilatation of the CCA would be attenuated in females compared with males. In 20 young, healthy participants (10 females), we measured the respiratory, cardiovascular and CCA responses during a CPT, which consisted of a 3min immersion of the right foot into 0-1 ice water. Blood pressure (via finger photoplethysmography), heart rate (via electrocardiogram) and CCA diameter and velocity (via Duplex ultrasound) were simultaneously recorded immediately before and during the CPT. During the CPT, while controlling end-tidal gases to baseline values, the main findings were as follows: (i) no sex differences were present in absolute or relative changes in V E (P=0.801 and P=0.179, respectively); (ii) the relative MAP and CCA diameter response were reduced in females by 51 and 55%, respectively (P=0.008 and P=0.029versus males, respectively); and (iii) the relative MAP responses was positively correlated with the dilatation of the CCA in males (r=0.42, P=0.019), in females (r=0.43, P=0.019) and in males and females combined (r=0.55, P<0.001). Given that the CPT is used as a clinical tool to assess cardiovascular health status, sex differences should be considered in future studies.","hji,kes",0,0,0,2,0,NA,NA +30591010,DLAD4U: deriving and prioritizing disease lists from PubMed literature.,"

Background

Due to recent technology advancements, disease related knowledge is growing rapidly. It becomes nontrivial to go through all published literature to identify associations between human diseases and genetic, environmental, and life style factors, disease symptoms, and treatment strategies. Here we report DLAD4U (Disease List Automatically Derived For You), an efficient, accurate and easy-to-use disease search engine based on PubMed literature.

Results

DLAD4U uses the eSearch and eFetch APIs from the National Center for Biotechnology Information (NCBI) to find publications related to a query and to identify diseases from the retrieved publications. The hypergeometric test was used to prioritize identified diseases for displaying to users. DLAD4U accepts any valid queries for PubMed, and the output results include a ranked disease list, information associated with each disease, chronologically-ordered supporting publications, a summary of the run, and links for file export. DLAD4U outperformed other disease search engines in our comparative evaluation using selected genes and drugs as query terms and manually curated data as """"gold standard"""". For 100 genes that are associated with only one disease in the gold standard, the Mean Average Precision (MAP) measure from DLAD4U was 0.77, which clearly outperformed other tools. For 10 genes that are associated with multiple diseases in the gold standard, the mean precision, recall and F-measure scores from DLAD4U were always higher than those from other tools. The superior performance of DLAD4U was further confirmed using 100 drugs as queries, with an MAP of 0.90.

Conclusions

DLAD4U is a new, intuitive disease search engine that takes advantage of existing resources at NCBI to provide computational efficiency and uses statistical analyses to ensure accuracy. DLAD4U is publicly available at http://dlad4u.zhang-lab.org .","hji,kes",0,0,0,2,0,NA,NA +30611208,Development and validation of a risk score to predict mortality during TB treatment in patients with TB-diabetes comorbidity.,"

Background

Making an accurate prognosis for mortality during tuberculosis (TB) treatment in TB-diabetes (TB-DM) comorbid patients remains a challenge for health professionals, especially in low TB prevalent populations, due to the lack of a standardized prognostic model.

Methods

Using de-identified data from TB-DM patients from Texas, who received TB treatment had a treatment outcome of completed treatment or died before completion, reported to the National TB Surveillance System from January 2010-December 2016, we developed and internally validated a mortality scoring system, based on the regression coefficients.

Results

Of 1227 included TB-DM patients, 112 (9.1%) died during treatment. The score used nine characteristics routinely collected by most TB programs. Patients were divided into three groups based on their score: low-risk (< 12 points), medium-risk (12-21 points) and high-risk (=22 points). The model had good performance (with an area under the receiver operating characteristic (ROC) curve of 0.83 in development and 0.82 in validation), and good calibration. A practical mobile calculator app was also created ( https://oaa.app.link/Isqia5rN6K ).

Conclusion

Using demographic and clinical characteristics which are available from most TB programs at the patient's initial visits, our simple scoring system had good performance and may be a practical clinical tool for TB health professionals in identifying TB-DM comorbid patients with a high mortality risk.","hji,kes",0,0,0,2,0,NA,NA +30662564,Artificial intelligence-based decision-making for age-related macular degeneration.,"Artificial intelligence (AI) based on convolutional neural networks (CNNs) has a great potential to enhance medical workflow and improve health care quality. Of particular interest is practical implementation of such AI-based software as a cloud-based tool aimed for telemedicine, the practice of providing medical care from a distance using electronic interfaces. Methods: In this study, we used a dataset of labeled 35,900 optical coherence tomography (OCT) images obtained from age-related macular degeneration (AMD) patients and used them to train three types of CNNs to perform AMD diagnosis. Results: Here, we present an AI- and cloud-based telemedicine interaction tool for diagnosis and proposed treatment of AMD. Through deep learning process based on the analysis of preprocessed optical coherence tomography (OCT) imaging data, our AI-based system achieved the same image discrimination rate as that of retinal specialists in our hospital. The AI platform's detection accuracy was generally higher than 90% and was significantly superior (p < 0.001) to that of medical students (69.4% and 68.9%) and equal (p = 0.99) to that of retinal specialists (92.73% and 91.90%). Furthermore, it provided appropriate treatment recommendations comparable to those of retinal specialists. Conclusions: We therefore developed a website for realistic cloud computing based on this AI platform, available at https://www.ym.edu.tw/~AI-OCT/. Patients can upload their OCT images to the website to verify whether they have AMD and require treatment. Using an AI-based cloud service represents a real solution for medical imaging diagnostics and telemedicine.","hji,kes",0,0,0,2,0,NA,NA +30687361,PolyMorphPredict: A Universal Web-Tool for Rapid Polymorphic Microsatellite Marker Discovery From Whole Genome and Transcriptome Data.,"Microsatellites are ubiquitously distributed, polymorphic repeat sequence valuable for association, selection, population structure and identification. They can be mined by genomic library, probe hybridization and sequencing of selected clones. Such approach has many limitations like biased hybridization and selection of larger repeats. In silico mining of polymorphic markers using data of various genotypes can be rapid and economical. Available tools lack in some or other aspects like: targeted user defined primer generation, polymorphism discovery using multiple sequence, size and number limits of input sequence, no option for primer generation and e-PCR evaluation, transferability, lack of complete automation and user-friendliness. They also lack the provision to evaluate published primers in e-PCR mode to generate additional allelic data using re-sequenced data of various genotypes for judicious utilization of previously generated data. We developed the tool (PolyMorphPredict) using Perl, R, Java and launched at Apache which is available at http://webtom.cabgrid.res.in/polypred/. It mines microsatellite loci and computes primers from genome/transcriptome data of any species. It can perform e-PCR using published primers for polymorphism discovery and across species transferability of microsatellite loci. Present tool has been evaluated using five species of different genome size having 21 genotypes. Though server is equipped with genomic data of three species for test run with gel simulation, but can be used for any species. Further, polymorphism predictability has been validated using in silico and in vitro PCR of four rice genotypes. This tool can accelerate the in silico microsatellite polymorphism discovery in re-sequencing projects of any species of plant and animal for their diversity estimation along with variety/breed identification, population structure, MAS, QTL and gene discovery, traceability, parentage testing, fungal diagnostics and genome finishing.","hji,kes",0,0,0,2,0,NA,NA +30701134,"Prognostic values of GMPS, PR, CD40, and p21 in ovarian cancer.","Early detection and prediction of prognosis and treatment responses are all the keys in improving survival of ovarian cancer patients. This study profiled an ovarian cancer progression model to identify prognostic biomarkers for ovarian cancer patients. Mouse ovarian surface epithelial cells (MOSECs) can undergo spontaneous malignant transformation in vitro cell culture. These were used as a model of ovarian cancer progression for alterations in gene expression and signaling detected using the Illumina HiSeq2000 Next-Generation Sequencing platform and bioinformatical analyses. The differential expression of four selected genes was identified using the gene expression profiling interaction analysis (http://gepia.cancer-pku.cn/) and then associated with survival in ovarian cancer patients using the Cancer Genome Atlas dataset and the online Kaplan-Meier Plotter (http://www.kmplot.com) data. The data showed 263 aberrantly expressed genes, including 182 up-regulated and 81 down-regulated genes between the early and late stages of tumor progression in MOSECs. The bioinformatic data revealed four genes (i.e., guanosine 5'-monophosphate synthase (GMPS), progesterone receptor (PR), CD40, and p21 (cyclin-dependent kinase inhibitor 1A)) to play an important role in ovarian cancer progression. Furthermore, the Cancer Genome Atlas dataset validated the differential expression of these four genes, which were associated with prognosis in ovarian cancer patients. In conclusion, this study profiled differentially expressed genes using the ovarian cancer progression model and identified four (i.e., GMPS, PR, CD40, and p21) as prognostic markers for ovarian cancer patients. Future studies of prospective patients could further verify the clinical usefulness of this four-gene signature.","hji,kes",0,0,0,2,0,NA,NA +30708525,First Report of Impatiens Downy Mildew Outbreaks Caused by Plasmopara obducens Throughout the Hawai'ian Islands.,"Downy mildew of impatiens (Impatiens walleriana Hook.f.) was first reported from the continental United States in 2004. In 2011 to 2012, severe and widespread outbreaks were documented across the United States mainland, resulting in considerable economic losses. On May 5, 2013, downy mildew disease symptoms were observed from I. walleriana 'Super Elfin' at a retail nursery in Mililani, on the Hawai'ian island of Oahu. Throughout May and June 2013, additional sightings of the disease were documented from the islands of Oahu, Kauai, Maui, and Hawai'i from nurseries, home gardens, and botanical park and landscape plantings. Symptoms of infected plants initially showed downward leaf curl, followed by a stippled chlorotic appearance on the adaxial leaf surfaces. Abaxial leaf surfaces were covered with a layer of white mycelia. Affected plants exhibited defoliation, flower drop, and stem rot as the disease progressed. Based on morphological and molecular data, the organism was identified as Plasmopara obducens (J. Schrt.) J. Schrt. Microscopic observation disclosed coenocytic mycelium and hyaline, thin-walled, tree-like (monopodial branches), straight, 94.0 to 300.0 3.2 to 10.8 m sporangiophores. Ovoid, hyaline sporangia measuring 11.0 to 14.6 12.2 to 16.2 (average 13.2 14.7) m were borne on sterigma tips of rigid branchlets (8.0 to 15.0 m) at right angle to the main axis of the sporangiophores (1,3). Molecular identification of the pathogen was conducted by removing hyphae from the surface of three heavily infected leaves using sterile tweezers, then extracting DNA using the QIAGEN Plant DNA kit (QIAGEN, Gaithersburg, MD). The nuclear rDNA internal transcribed spacer was sequenced from each of the three samples bidirectionally from Illustra EXOStar (GE Healthcare, Piscataway, NJ) purified amplicon generated from primers ITS1-O and LR-0R (4). Resultant sequences (GenBank KF366378 to 80) shared 99 to 100% nucleotide identity with P. obducens accession DQ665666 (4). A voucher specimen (BPI892676) was deposited in the U.S. National Fungus Collections, Beltsville, MD. Pathogenicity tests were performed by spraying 6-week-old impatiens plants (I. walleriana var. Super Elfin) grown singly in 4-inch pots with a suspension of 1 104 P. obducens sporangia/ml until runoff using a handheld atomizer. Control plants were sprayed with distilled water. The plants were kept in high humidity by covering with black plastic bags for 48 h at 20C, and then maintained in the greenhouse (night/day temperature of 20/24C). The first symptoms (downward curling and chlorotic stippling of leaves) and sporulation of the pathogen on under-leaf surfaces of the inoculated plants appeared at 10 days and 21 days after inoculation, respectively. Control plants remained healthy. Morphological features and measurements matched those of the original inoculum, thus fulfilling Koch's postulates. To our knowledge, this is the first report of downy mildew on I. walleriana in Hawai'i (2). The disease appears to be widespread throughout the islands and is likely to cause considerable losses in Hawai'ian landscapes and production settings. References: (1) O. Constantinescu. Mycologia 83:473, 1991. (2) D. F. Farr and A. Y. Rossman. Systematic Mycology and Microbiology Laboratory, ARS, USDA. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ July 16, 2013. (3) P. A. Saccardo. Syllogue Fungorum 7:242, 1888. (4) M. Thines. Fungal Genet Biol 44:199, 2007.","hji,kes",0,0,0,2,0,NA,NA +30714121,The histidine decarboxylase model of tic pathophysiology: a new focus on the histamine H3 receptor.,"Histamine dysregulation was implicated as a rare cause of Tourette syndrome and other tic disorders a decade ago by a landmark genetic study in a high density family pedigree, which implicated a hypomorphic mutation in the histidine decarboxylase (Hdc) gene as a rare but high penetrance genetic cause. Studies in Hdc knockout (KO) mice have confirmed that this mutation causes tic-relevant behavioural and neurochemical abnormalities that parallel what is seen in patients and thus validate the KO as a potentially informative model of tic pathophysiology. Recent studies have focused on the potential role of the histamine H3 receptor in this model, and by association in tic disorders and related neuropsychiatric conditions. The H3 receptor is up-regulated in the striatum in Hdc KO mice. As the H3 receptor has constitutive activity in the absence of ligand, this receptor up-regulation may have significant cellular effects despite the absence of neurotransmitter histamine in these mice. Activation in vivo of H3 receptors in wild type mice regulates signalling in striatal medium spiny neurons (MSNs) that interacts non-linearly with dopamine receptor signalling. Baseline signalling alterations in MSNs in Hdc KO mice resemble those seen after H3 receptor agonist treatment in wild type animals. H3 receptor agonist treatment in the KOs further accentuates most of these signalling abnormalities and produces behavioural stereotypy. Together, these data suggest the intriguing hypothesis that constitutive signalling by up-regulated H3 receptors explains many of the molecular and behavioural abnormalities seen in these animals. LINKED ARTICLES: This article is part of a themed section on New Uses for 21st Century. To view the other articles in this section visit http://onlinelibrary.wiley.com/doi/10.1111/bph.v177.3/issuetoc.","hji,kes",0,0,0,2,0,NA,NA +30722515,"First Report of Globisporangium ultimum Causing Pythium Damping-Off on Aleppo Pine in Algeria, Africa, and the Mediterranean Region.","Globisporangium ultimum (Trow) Uzuhashi, Tojo & Kakish. (syn. Pythium ultimum Trow, syn. P. ultimum Trow var. ultimum) is a known oomycetal species from Pythium s.l. causing damping-off and/or root rot on a great variety of plants throughout the world, including some pine species (Pinus L.) and conifers (2,3). Aleppo pine (Pinus halepensis Mill.) is a common native forest tree in the Mediterranean region. Pre- and post-emergence damping-off disease symptoms were observed during 2008 and 2009 in four forest nurseries from northwestern Algeria (Relizane, Sidi Belabes, and Tlemcen departments). This disease occurred under cool conditions, and Aleppo pines were significantly affected, reducing seedling emergence. Disinfected segments, about 5 mm in length, from decayed root and collar, were cultured on CMA at 25C. This oomycetal species was identified based on the species description in Pythium keys (3,4). For the molecular identification, PCR was used to amplify the ITS region of Pythium isolates. It was amplified with the flanking primers ITS1 and ITS4, and these products were directly sequenced. Sequence data were compared to known sequences deposited in the NCBI non redundant database to confirm morphological identification. A BLAST search identified U3CR, U7CR, U1RT, U2CR, U4CR, U14CR, U7RT, and U17RT isolates (GenBank Accession Nos. JX191921, 22, 27, 29, 31, and 33 to 35, respectively) as G. ultimum based on 100% similarity with corresponding sequence of the reference isolate no. UZ056 MAFF240024 (AB468781) (3). Phytopathogenicity testing was conducted in a petri dish and pot experiment. In the petri dish experiment, a 3 mm diameter plug was transferred from a 7-day-old CMA colony to the center of a CMA petri dish, with three replicates per isolate, and three control plates were inoculated with sterile agar plugs. After 72 h, 10 Aleppo pine seeds were placed equally spaced to 1 cm from the edge of each plug. After 7 days at 22C in the dark, germination inhibition (46.1 to 87.6%) and root growth inhibition (62.3 to 92.2%) were calculated. In the control plates, germination failure (13.4%) and root length (27.7 cm) were observed. For the pot experiment, inocula were produced by adding a 5 mm diameter plug from a 7-day-old CMA culture to a previously sterilized 500 ml flask containing 237.5 g sand, 12.5 g cornmeal, and 80 ml SDW. Nine-day-old inoculum was mixed with sterile soil at a rate of 1:3 (v:v). Inoculum was transferred to 500 ml pot, and 10 Aleppo pine seeds were planted, with three replicates per isolate, and three control pots were used. After 2 weeks, all of the isolates tested caused typical symptoms of Aleppo pine Pythium damping-off, the percentage of inoculated plants that became infected was 36.6 to 83.3%. In the control pots, no infected plants were observed. To our knowledge, this is the first report of G. ultimum causing damping-off on Aleppo pine in Algeria, Africa, and the Mediterranean Region. Before, Aleppo pine damping-off caused by G. ultimum was reported in Australia (1). References: (1) R. P. Cook and A. J. Dub. Host-pathogen index of plant diseases in South Australia. SADA, Melbourne, Australia, 1989. (2) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory. ARS, USDA, Bestville, MD. Retrieved from http://nt.ars-grin.gov/fungaldatabases/ , June 24, 2012. (3) S. Uzuhashi et al. Mycoscience 51:337, 2010. (4) A. J. van der Plaats-Niterink. Stud. Mycol. 21:1, 1981.","hji,kes",0,0,0,2,0,NA,NA +30722630,First Report of Powdery Mildew Caused by Erysiphe platani on Sycamore (Platanus occidentalis) in South Korea.,"Platanus occidentalis L. (sycamore) is an important shade tree distributed throughout the Northern Hemisphere and in South Korea. It has been widely used as an ornamental tree, especially in urban regions and by roadsides. The average rate of roadside planting throughout South Korea covers about 5.7% (up to 38% in Seoul), equivalent to 0.36 million trees. In early July 2012, after a rainy spell in summer, an outbreak of powdery mildew on sycamore was first observed on roadside trees in Gwangju, a southern province of South Korea. A more extensive nationwide survey revealed no powdery mildew in northern or central regions of South Korea. The disease has spread rapidly within Gwangju, even though fungicide applications were carried out after the rainy spell. Major symptoms included white, superficial mycelia, grey to brown lesions on the surface of the leaves due to the presence of a hyperparasite (tentatively identified as Ampelomyces sp.), a slight chlorosis, and severe leaf distortion followed by defoliation. Conidiophores were produced singly, straight, and unbranched, with lengths of 35.2 to 315.2 m (average 170.4 m). Conidia were ellipsoid or doliiform, ranging in size from 34.9 to 47.4 m (average 38.2 m) long 16.5 to 26.8 m (average 23.9 m) wide. Primary conidia had a truncate base and rounded apex; secondary conidia had both a truncate base and apex. The conidial outer surface had a reticulated wrinkling. Cleistothecia (i.e., sexual spore structures) were not found during the survey, which extended from July to October. These characteristics and the host species match those of Microsphaera platani (syn. Erysiphe platani), which was described on P. occidentalis in Washington State (2). Fungal rDNA was amplified using primers ITS1 and LR5F (4) for one sample (EML-PLA1, GenBank JX485651). BLASTn searches of GenBank revealed high sequence identity to E. platani (99.5% to JQ365943 and 99.3% to JQ365940). Recently, Liang et al. (3) reported the first occurrence of powdery mildew by E. platani on P. orientalis in China based only on its morphology. Thus, in this study, author could only use ITS sequence data from the United States and Europe to characterize the isolate. To date, nine records of powdery mildews of Platanus spp. have been reported worldwide: on P. hispanica from Brazil, Japan, Hungary, and Slovakia; P. orientalis from Israel; P. racemosa from the United States; P. acerifolia from the United Kingdom and Germany; and Platanus sp. from Argentina and Australia (1). Interestingly, the hyperparasite, Ampelomyces sp., was found with E. platani, suggesting that there may be some level of biocontrol in nature. Pathogenicity was confirmed by gently pressing diseased leaves onto six leaves of healthy sycamore plants in the field in September. The treated leaves were sealed in sterilized vinyl pack to maintain humid condition for 2 days. Similar symptoms were observed on the inoculated leaves 10 days after inoculation. Koch's postulates were fulfilled by re-observing the fungal pathogen. To our knowledge, this is the first report of powdery mildew caused by E. platani on sycamore in South Korea. References: (1) D. F. Farr and A. Y. Rossman. Fungal Databases, Systematic Mycology and Microbiology Laboratory, ARS, USDA. http://nt.ars-grin.gov/fungaldatabases/ , 2012. (2) D. A. Glawe. Plant Health Progress, doi:10.1094/PHP-2003-0818-01-HN, 2003. (3) C. Liang et al. Plant Pathol. 57:375, 2008. (4) T. J White et al., pp. 315-322 in: PCR Protocols: A Guide to Methods and Applications. M. A. Innis et al., ed. Academic Press, New York, 1990.","hji,kes",0,0,0,2,0,NA,NA +30783000,Doravirine and the Potential for CYP3A-Mediated Drug-Drug Interactions.,"Identifying and understanding potential drug-drug interactions (DDIs) are vital for the treatment of human immunodeficiency virus type 1 (HIV-1) infection. This article discusses DDIs between doravirine, a nonnucleoside reverse transcriptase inhibitor (NNRTI), and cytochrome P450 3A (CYP3A) substrates and drugs that modulate CYP3A activity. Consistent with previously published in vitro data and DDI trials with the CYP3A substrates midazolam and atorvastatin, doravirine did not have any meaningful impact on the pharmacokinetics of the CYP3A substrates ethinyl estradiol and levonorgestrel. Coadministration of doravirine with CYP3A inhibitors (ritonavir or ketoconazole) increased doravirine exposure approximately 3-fold. However, these increases were not considered clinically meaningful. Conversely, previously published trials showed that coadministered CYP3A inducers (rifampin and rifabutin) decreased doravirine exposure by 88% and 50%, respectively (K. L. Yee, S. G. Khalilieh, R. I. Sanchez, R. Liu, et al., Clin Drug Investig 37:659-667, 2017 [https://doi.org/10.1007/s40261-017-0513-4]; S. G. Khalilieh, K. L. Yee, R. I. Sanchez, R. Liu, et al., J Clin Pharmacol 58:1044-1052, 2018 [https://doi.org/10.1002/jcph.1103]), while doravirine exposure following prior efavirenz administration led to an initial reduction in doravirine exposure of 62%, but the reduction became less pronounced with time (K. L. Yee, R. I. Sanchez, P. Auger, R. Liu, et al., Antimicrob Agents Chemother 61:e01757-16, 2017 [https://doi.org/10.1128/AAC.01757-16]). Overall, the coadministration of doravirine with CYP3A inhibitors and substrates is, therefore, supported by these data together with efficacy and safety data from clinical trials, while coadministration with strong CYP3A inducers, such as rifampin, cannot be recommended. Concomitant dosing with rifabutin (a CYP3A inducer less potent than rifampin) is acceptable if doravirine dosing is adjusted from once to twice daily; however, the effect of other moderate inducers on doravirine pharmacokinetics is unknown.","hji,kes",0,0,0,2,0,NA,NA +30832735,Ultrasound Elastography supplement assessing nodal status of magnetic resonance imaging staged cervical N0 patients with nasopharyngeal carcinoma.,"

Background

To determine whether ultrasound elastography can distinguish reactive or metastatic small lymph nodes (sLN) of magnetic resonance imaging (MRI) staged cervical N0 patients with nasopharyngeal carcinoma (NPC).

Methods

A pilot study was performed involving the diagnostic performances of conventional high-frequency ultrasound (CHFU) and/or shear wave elastography (SWE) for predicting metastases in sLN of MRI-staged N0 NPC patients with reference to the histologically-proven ultrasound guided core needle biopsy (US-CNB). The diagnosis of CHFU was based on the superficial lymph node ultrasonic criteria with the five-point-scale (FPS). The mean (Emean), minimum (Emin) and maximum (Emax) of the elasticity indices were measured by SWE at the stiffest part of the sLN in kilopascal. Diagnostic performances were analyzed using a receiver operating curve (ROC) on a per-node basis. The authenticity of this article has been validated by uploading the key raw data onto the Research Data Deposit public platform ( http://www.researchdata.org.cn ), with the approval RDD number as RDDA2017000447.

Results

All 113 cervical sLN of 49 MRI-staged cervical N0 NPC patients underwent evaluation of CHFU and SWE; 38 sLN (FPS < 2) were regarded as benign, which were excluded from subsequent analysis due to none biopsy-proven. And 75 indeterminate sLN (FPS=2) were referred to US-CNB and revealed 15 (20%) metastases. All SWE elastic indices were significantly higher in malignant sLNs than in benign sLNs (p< 0.05). Moreover, Emax exhibited the highest diagnostic value (AUC:0.733 0.067, p= 0.005) with excellent measurement reproducibility (ICC: 0.786; 95%CI: 0.684, 0.864). CHFU plus SWE was superior to CHFU or SWE alone for predicting metastases in sLN of MRI-staged N0 patients with NPC (p< 0.001).

Conclusions

CHFU plus SWE is an optional non-invasive modality to supplement MRI in assessing cervical nodal status of patients with NPC.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +30865284,PleioNet: a web-based visualization tool for exploring pleiotropy across complex traits.,"

Summary

Pleiotropy plays an important role in furthering our understanding of the shared genetic architecture of different human diseases and traits. However, exploring and visualizing pleiotropic information with currently publicly available tools is limiting and challenging. To aid researchers in constructing and digesting pleiotropic networks, we present PleioNet, a web-based visualization tool for exploring this information across human diseases and traits. This program provides an intuitive and interactive web interface that seamlessly integrates large database queries with visualizations that enable users to quickly explore complex high-dimensional pleiotropic information. PleioNet works on all modern computer and mobile web browsers, making pleiotropic information readily available to a broad range of researchers and clinicians with diverse technical backgrounds. We expect that PleioNet will be an important tool for studying the underlying pleiotropic connections among human diseases and traits.

Availability and implementation

PleioNet is hosted on Google cloud and freely available at http://www.pleionet.com/.","hji,kes",0,0,0,2,0,NA,NA +30867992,Lightweight data management with dtool.,"The explosion in volumes and types of data has led to substantial challenges in data management. These challenges are often faced by front-line researchers who are already dealing with rapidly changing technologies and have limited time to devote to data management. There are good high-level guidelines for managing and processing scientific data. However, there is a lack of simple, practical tools to implement these guidelines. This is particularly problematic in a highly distributed research environment where needs differ substantially from group to group and centralised solutions are difficult to implement and storage technologies change rapidly. To meet these challenges we have developed dtool, a command line tool for managing data. The tool packages data and metadata into a unified whole, which we call a dataset. The dataset provides consistency checking and the ability to access metadata for both the whole dataset and individual files. The tool can store these datasets on several different storage systems, including a traditional file system, object store (S3 and Azure) and iRODS. It includes an application programming interface that can be used to incorporate it into existing pipelines and workflows. The tool has provided substantial process, cost, and peace-of-mind benefits to our data management practices and we want to share these benefits. The tool is open source and available freely online at http://dtool.readthedocs.io.","hji,kes",0,0,0,2,0,NA,NA +30871603,RnBeads 2.0: comprehensive analysis of DNA methylation data.,"DNA methylation is a widely investigated epigenetic mark with important roles in development and disease. High-throughput assays enable genome-scale DNA methylation analysis in large numbers of samples. Here, we describe a new version of our RnBeads software-an R/Bioconductor package that implements start-to-finish analysis workflows for Infinium microarrays and various types of bisulfite sequencing. RnBeads 2.0 ( https://rnbeads.org/ ) provides additional data types and analysis methods, new functionality for interpreting DNA methylation differences, improved usability with a novel graphical user interface, and better use of computational resources. We demonstrate RnBeads 2.0 in four re-runnable use cases focusing on cell differentiation and cancer.","hji,kes",0,0,0,2,0,NA,NA +30918038,Relationship between sociodemographic factors and specialty destination of UK trainee doctors: a national cohort study.,"OBJECTIVES:Many countries are driving forward policies to widen the socioeconomic profile of medical students and to train more medical students for certain specialties. However, little is known about how socioeconomic origin relates to specialty choice. Nor is there a good understanding of the relationship between academic performance and specialty choice. To address these gaps, our aim was to identify the relationship between socioeconomic background, academic performance and accepted offers into specialty training. DESIGN:Longitudinal, cohort study using data from the UK Medical Education Database (https://www.ukmed.ac.uk/). PARTICIPANTS:6065 (60% females) UK doctors who accepted offers to a specialty training (residency) post after completing the 2-year generic foundation programme (UK Foundation Programme) between 2012 and 2014. MAIN OUTCOME MEASURES:2 tests were used to examine the relationships between sociodemographic characteristics, academic ability and the dependent variable, specialty choice. Multiple data imputation was used to address the issue of missing data. Multinomial regression was employed to test the independent variables in predicting the likelihood of choosing a given specialty. RESULTS:Participants pursuing careers in more competitive specialties had significantly higher academic scores than colleagues pursuing less competitive ones. After controlling for the presence of multiple factors, trainees who came from families where no parent was educated to a degree level had statistically significant lower odds of choosing careers in medical specialties relative to general practice (OR=0.78, 95% CI, 0.67 to 0.92). Students who entered medical school as school leavers, compared with mature students, had odds 1.2 times higher (95% CI, 1.04 to 1.56) of choosing surgical specialties than general practice. CONCLUSIONS:The data indicate a direct association between trainees' sociodemographic characteristics, academic ability and career choices. The findings can be used by medical school, training boards and workforce planners to inform recruitment and retention strategies.","hji,kes",0,0,0,2,0,NA,NA +30967119,Large-scale 3D chromatin reconstruction from chromosomal contacts.,"

Background

Recent advances in genome analysis have established that chromatin has preferred 3D conformations, which bring distant loci into contact. Identifying these contacts is important for us to understand possible interactions between these loci. This has motivated the creation of the Hi-C technology, which detects long-range chromosomal interactions. Distance geometry-based algorithms, such as ChromSDE and ShRec3D, have been able to utilize Hi-C data to infer 3D chromosomal structures. However, these algorithms, being matrix-based, are space- and time-consuming on very large datasets. A human genome of 100 kilobase resolution would involve ~30,000 loci, requiring gigabytes just in storing the matrices.

Results

We propose a succinct representation of the distance matrices which tremendously reduces the space requirement. We give a complete solution, called SuperRec, for the inference of chromosomal structures from Hi-C data, through iterative solving the large-scale weighted multidimensional scaling problem.

Conclusions

SuperRec runs faster than earlier systems without compromising on result accuracy. The SuperRec package can be obtained from http://www.cs.cityu.edu.hk/~shuaicli/SuperRec .","hji,kes",0,0,0,2,0,NA,NA +30979697,Visualizing Patterns in Pediatric and Adult Hospital Care.,"

Objectives

We aimed to design a graphical tool for understanding and effectively communicating the complex differences between pediatric and adult hospital care systems.

Patients and methods

We analyzed the most recent hospital administrative data sets for inpatient admission and emergency department visits from 7 US states (2014: Arkansas, Florida, Kentucky, Maryland, Massachusetts, and New York; 2011: California). Probabilities of care completion (Pcc) were calculated for pediatric (<18 years old) and adult conditions in all acute-care hospitals in each state. Using the Pcc, we constructed interactive heatmap visualizations for direct comparison of pediatric and adult hospital care systems.

Results

On average, across the 7 states, 70.6% of all hospitals had Pcc >0.5 for more than half of all adult conditions, whereas <14.9% of hospitals had Pcc >0.1 for half of pediatric conditions. Visualizations revealed wide variation among states with clearly apparent institutional dependencies and condition-specific gaps (full interactive versions are available at https://goo.gl/5t8vAw).

Conclusions

The functional disparities between pediatric and adult hospital care systems are substantial, and condition-specific differences should be considered in reimbursement strategies, disaster planning, network adequacy determinations, and public health planning.","hji,kes",0,0,0,2,0,NA,NA +30989232,Long-term Clinical Effectiveness of Ustekinumab in Patients with Crohn's Disease Who Failed Biologic Therapies: A National Cohort Study.,"

Background

Ustekinumab [UST] was recently approved in Europe for the treatment of moderate to severe Crohn's disease [CD]. Long-term real-world data are currently scarce for CD patients previously exposed to several biologics.

Methods

This is an observational, national, retrospective multicentre study. Patients received intravenous UST ~6 mg/kg at baseline, with 90 mg subcutaneously thereafter every 8 weeks. Response and remission rates were assessed at Weeks 8, 16, and 52.

Results

Data from 152 patients were analysed. All patients were exposed to at least one anti-TNFa agent, with 69.7% were exposed to even two anti-TNFa and vedolizumab. After 1 year, 42.1% and 25.7% of patients had experienced clinical response and clinical remission, respectively, and 38.8% and 24.3% had achieved steroid-free clinical response and remission, respectively; 38.8% of patients discontinued therapy during the 12 months of follow-up. Colonic location was predictive of clinical response at 1 year, and low body mass index [BMI] at baseline was a negative predictor of clinical remission. Resolution of arthralgia was associated with clinical response over time. De novo arthralgia was reported by 17.9% of patients at Week 8 and 13.5% of patients at Week 52. No impact of UST on arthralgia was observed in patients with concomitant ankylosing spondylitis [n = 17]. Others adverse events were reported in 7.2% of patients.

Conclusions

This real-world cohort study confirms the effectiveness of UST in CD patients previously exposed to several biologics. Ustekinumab was well tolerated with respect to adverse events.

Podcast

This article has an associated podcast which can be accessed at https://academic.oup.com/ecco-jcc/pages/podcast.","hji,kes",0,0,0,2,0,NA,NA +31001324,BayesPI-BAR2: A New Python Package for Predicting Functional Non-coding Mutations in Cancer Patient Cohorts.,"Most of somatic mutations in cancer occur outside of gene coding regions. These mutations may disrupt the gene regulation by affecting protein-DNA interaction. A study of these disruptions is important in understanding tumorigenesis. However, current computational tools process DNA sequence variants individually, when predicting the effect on protein-DNA binding. Thus, it is a daunting task to identify functional regulatory disturbances among thousands of mutations in a patient. Previously, we have reported and validated a pipeline for identifying functional non-coding somatic mutations in cancer patient cohorts, by integrating diverse information such as gene expression, spatial distribution of the mutations, and a biophysical model for estimating protein binding affinity. Here, we present a new user-friendly Python package BayesPI-BAR2 based on the proposed pipeline for integrative whole-genome sequence analysis. This may be the first prediction package that considers information from both multiple mutations and multiple patients. It is evaluated in follicular lymphoma and skin cancer patients, by focusing on sequence variants in gene promoter regions. BayesPI-BAR2 is a useful tool for predicting functional non-coding mutations in whole genome sequencing data: it allows identification of novel transcription factors (TFs) whose binding is altered by non-coding mutations in cancer. BayesPI-BAR2 program can analyze multiple datasets of genome-wide mutations at once and generate concise, easily interpretable reports for potentially affected gene regulatory sites. The package is freely available at http://folk.uio.no/junbaiw/BayesPI-BAR2/.","hji,kes",0,0,0,2,0,NA,NA +31028388,SEanalysis: a web tool for super-enhancer associated regulatory analysis.,"Super-enhancers (SEs) have prominent roles in biological and pathological processes through their unique transcriptional regulatory capability. To date, several SE databases have been developed by us and others. However, these existing databases do not provide downstream or upstream regulatory analyses of SEs. Pathways, transcription factors (TFs), SEs, and SE-associated genes form complex regulatory networks. Therefore, we designed a novel web server, SEanalysis, which provides comprehensive SE-associated regulatory network analyses. SEanalysis characterizes SE-associated genes, TFs binding to target SEs, and their upstream pathways. The current version of SEanalysis contains more than 330 000 SEs from more than 540 types of cells/tissues, 5042 TF ChIP-seq data generated from these cells/tissues, DNA-binding sequence motifs for ~700 human TFs and 2880 pathways from 10 databases. SEanalysis supports searching by either SEs, samples, TFs, pathways or genes. The complex regulatory networks formed by these factors can be interactively visualized. In addition, we developed a customizable genome browser containing >6000 customizable tracks for visualization. The server is freely available at http://licpathway.net/SEanalysis.","hji,kes",0,0,0,2,0,NA,NA +31028400,"Updated MS²PIP web server delivers fast and accurate MS² peak intensity prediction for multiple fragmentation methods, instruments and labeling techniques.","MSPIP is a data-driven tool that accurately predicts peak intensities for a given peptide's fragmentation mass spectrum. Since the release of the MSPIP web server in 2015, we have brought significant updates to both the tool and the web server. In addition to the original models for CID and HCD fragmentation, we have added specialized models for the TripleTOF 5600+ mass spectrometer, for TMT-labeled peptides, for iTRAQ-labeled peptides, and for iTRAQ-labeled phosphopeptides. Because the fragmentation pattern is heavily altered in each of these cases, these additional models greatly improve the prediction accuracy for their corresponding data types. We have also substantially reduced the computational resources required to run MSPIP, and have completely rebuilt the web server, which now allows predictions of up to 100 000 peptide sequences in a single request. The MSPIP web server is freely available at https://iomics.ugent.be/ms2pip/.","hji,kes",0,0,0,2,0,NA,NA +31031918,Future temperature and salinity do not exert selection pressure on cyst germination of a toxic phytoplankton species.,"Environmental conditions regulate the germination of phytoplankton resting stages. While some factors lead to synchronous germination, others stimulate germination of only a small fraction of the resting stages. This suggests that habitat filters may act on the germination level and thus affect selection of blooming strains. Benthic """"seed banks"""" of the toxic dinoflagellate Alexandrium ostenfeldii from the Baltic Sea are genetically and phenotypically diverse, indicating a high potential for adaptation by selection on standing genetic variation. Here, we experimentally tested the role of climate-related salinity and temperature as selection filters during germination and subsequent establishment of A.ostenfeldii strains. A representative resting cyst population was isolated from sediment samples, and germination and reciprocal transplantation experiments were carried out, including four treatments: Average present day germination conditions and three potential future conditions: high temperature, low salinity, and high temperature in combination with low salinity. We found that the final germination success of A.ostenfeldii resting cysts was unaffected by temperature and salinity in the range tested. A high germination success of more than 80% in all treatments indicates that strains are not selected by temperature and salinity during germination, but selection becomes more important shortly after germination, in the vegetative stage of the life cycle. Moreover, strains were not adapted to germination conditions. Instead, highly plastic responses occurred after transplantation and significantly higher growth rates were observed at higher temperature. High variability of strain-specific responses has probably masked the overall effect of the treatments, highlighting the importance of testing the effect of environmental factors on many strains. It is likely that A.ostenfeldii populations can persist in the future, because suitable strains, which are able to germinate and grow well at potential future climate conditions, are part of the highly diverse cyst population.

Open research badges

This article has earned an Open Data Badge for making publicly available the digitally-shareable data necessary to reproduce the reported results. The data is available at https://doi.org/10.5061/dryad.c8c83nr.","hji,kes",0,0,0,2,0,NA,dryad +31059668,Surgical Management of Lower Urinary Tract Symptoms Attributed to Benign Prostatic Hyperplasia: AUA Guideline Amendment 2019.,"

Purpose

Male lower urinary tract symptoms (LUTS) secondary to benign prostatic hyperplasia (BPH) is common in men and can have negative effects on quality of life (QoL). It is the hope that this Guideline becomes a reference for effective evidence-based surgical management of LUTS/BPH.

Materials and methods

The evidence team searched Ovid MEDLINE, the Cochrane Library, and the Agency for Healthcare Research and Quality database to identify studies indexed between January 2007-September 2017. Following initial publication, this guideline was amended in 2019 and reflects relevant literature published through January 2019. When sufficient evidence existed, the body of evidence was assigned a strength rating of A (high), B (moderate), or C (low) for support of Strong, Moderate, or Conditional Recommendations. In the absence of sufficient evidence, additional information is provided as Clinical Principles and Expert Opinions (table 1 in supplementary unabridged guideline, https://www.jurology.com).

Results

This Guideline provides evidence-based recommendations regarding management of LUTS/BPH utilizing surgery and minimally invasive surgical therapies (MIST). Additional statements are made regarding diagnostic and pre-operative tests. Clinical statements are made in comparison to what is generally accepted as the gold standard (i.e. transurethral resection of the prostate [TURP] monopolar and/or bipolar). This guideline is designed to be used in conjunction with the associated treatment algorithm (see figure).[Figure: see text]Conclusions:The prevalence and the severity of LUTS increases as men age and is an important diagnosis in the healthcare of patients and the welfare of society. This document will undergo updating as knowledge regarding treatments and future surgical options continues to expand.","hji,kes",0,0,0,2,0,NA,NA +31093959,Sex differences in the hypothalamic-pituitary-adrenal axis: An obstacle to antidepressant drug development?,"Hypothalamic-pituitary-adrenal (HPA) axis dysfunction has long been implicated in the pathophysiology of depression, and HPA axis-based compounds have served as potential new therapeutic targets, but with no success. This review details sex differences from animal and human studies in the function of HPA axis elements (glucocorticoids, corticotropin releasing factor, and vasopressin) and related compounds tested as candidate antidepressants. We propose that sex differences contribute to the failure of novel HPA axis-based drugs in clinical trials. Compounds studied preclinically in males were tested in clinical trials that recruited more, if not exclusively, women, and did not control, but rather adjusted, for potential sex differences. Indeed, clinical trials of antidepressants are usually not stratified by sex or other important factors, although preclinical and epidemiological data support such stratification. In conclusion, we suggest that clinical testing of HPA axis-related compounds creates an opportunity for targeted, personalized antidepressant treatments based on sex. LINKED ARTICLES: This article is part of a themed section on The Importance of Sex Differences in Pharmacology Research. To view the other articles in this section visit http://onlinelibrary.wiley.com/doi/10.1111/bph.v176.21/issuetoc.","hji,kes",0,0,0,2,0,NA,NA +31095319,PopNetD3-A Network-Based Web Resource for Exploring Population Structure.,"We present PopNetD3, a web tool that provides an integrated approach for the network-based visualization of population structure based on the PopNet clustering framework. Users first submit a tab-delimited file that defines diversity of SNPs across the genome which is subsequently processed by the PopNet backend to define patterns of conservation at the chromosome level. The resulting population structure is visualized through a dedicated D3-based tool, allowing users to interactively examine chromosomal regions predicted to share ancestry. We illustrate the capabilities of PopNetD3 through an analysis of 16 strains of Neisseria gonorrhoeae. PopNetD3 is capable of processing population data sets consisting of hundreds of individuals and is publicly available online at: http://compsysbio.org/popnetd3 Last Accessed: May 17, 2019.","hji,kes",0,0,0,2,0,NA,NA +31099384,NGSEP3: accurate variant calling across species and sequencing protocols.,"

Motivation

Accurate detection, genotyping and downstream analysis of genomic variants from high-throughput sequencing data are fundamental features in modern production pipelines for genetic-based diagnosis in medicine or genomic selection in plant and animal breeding. Our research group maintains the Next-Generation Sequencing Experience Platform (NGSEP) as a precise, efficient and easy-to-use software solution for these features.

Results

Understanding that incorrect alignments around short tandem repeats are an important source of genotyping errors, we implemented in NGSEP new algorithms for realignment and haplotype clustering of reads spanning indels and short tandem repeats. We performed extensive benchmark experiments comparing NGSEP to state-of-the-art software using real data from three sequencing protocols and four species with different distributions of repetitive elements. NGSEP consistently shows comparative accuracy and better efficiency compared to the existing solutions. We expect that this work will contribute to the continuous improvement of quality in variant calling needed for modern applications in medicine and agriculture.

Availability and implementation

NGSEP is available as open source software at http://ngsep.sf.net.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31112088,Using Artificial Intelligence to Revise ACR TI-RADS Risk Stratification of Thyroid Nodules: Diagnostic Accuracy and Utility.,"Background Risk stratification systems for thyroid nodules are often complicated and affected by low specificity. Continual improvement of these systems is necessary to reduce the number of unnecessary thyroid biopsies. Purpose To use artificial intelligence (AI) to optimize the American College of Radiology (ACR) Thyroid Imaging Reporting and Data System (TI-RADS). Materials and Methods A total of 1425 biopsy-proven thyroid nodules from 1264 consecutive patients (1026 women; mean age, 52.9 years [range, 18-93 years]) were evaluated retrospectively. Expert readers assigned points based on five ACR TI-RADS categories (composition, echogenicity, shape, margin, echogenic foci), and a genetic AI algorithm was applied to a training set (1325 nodules). Point and pathologic data were used to create an optimized scoring system (hereafter, AI TI-RADS). Performance of the systems was compared by using a test set of the final 100 nodules with interpretations from the expert reader, eight nonexpert readers, and an expert panel. Initial performance of AI TI-RADS was calculated by using a test for differences between binomial proportions. Additional comparisons across readers were conducted by using bootstrapping; diagnostic performance was assessed by using area under the receiver operating curve. Results AI TI-RADS assigned new point values for eight ACR TI-RADS features. Six features were assigned zero points, which simplified categorization. By using expert reader data, the diagnostic performance of ACR TI-RADS and AI TI-RADS was area under the receiver operating curve of 0.91 and 0.93, respectively. For the same expert, specificity of AI TI-RADS (65%, 55 of 85) was higher (P < .001) than that of ACR TI-RADS (47%, 40 of 85). For the eight nonexpert radiologists, mean specificity for AI TI-RADS (55%) was also higher (P < .001) than that of ACR TI-RADS (48%). An interactive AI TI-RADS calculator can be viewed at http://deckard.duhs.duke.edu/~ai-ti-rads . Conclusion An artificial intelligence-optimized Thyroid Imaging Reporting and Data System (TI-RADS) validates the American College of Radiology TI-RADS while slightly improving specificity and maintaining sensitivity. Additionally, it simplifies feature assignments, which may improve ease of use. RSNA, 2019 Online supplemental material is available for this article.","hji,kes",0,0,0,2,0,NA,NA +31112286,Urinary tract infection prevention after midurethral slings in pelvic floor reconstructive surgery: A systematic review and meta-analysis.,"INTRODUCTION:Synthetic midurethral slings are the most common procedures currently performed for stress urinary incontinence in women. Infection is a frequent complication of urogynecologic surgery. We performed a systematic review and meta-analysis to identify interventions that successfully prevent infections, including urinary tract infection (UTI) and/or bacteriuria, compared with no intervention, in women undergoing midurethral sling surgery with or without concomitant pelvic reconstructive procedures for prolapse. MATERIAL AND METHODS:The primary outcome was the development of any infection post-midurethral sling placement in women. MEDLINE, Embase, CINAHL and the Cochrane Library were searched for comparative studies from inception to July 2017, with no language restrictions. We used search terms related to midurethral sling, infections and infection-reduction interventions. Two independent reviewers abstracted data and assessed study quality. Pooled effect size estimates were calculated. We conducted meta-analysis of eligible studies. A protocol for this review has been registered and can be accessed online (http://hdl.handle.net/2429/64731). RESULTS:We identified seven eligible studies of infection risk-reducing interventions; all focused on UTIs. Only one study assessed preoperative antibiotics with midurethral sling alone and was halted early because of low UTI rates. All other studies (three randomized control trials and three observational studies) examined whether postoperative antibiotics decrease UTI/bacteriuria rates after midurethral sling with or without reconstructive procedures for pelvic organ prolapse and using bladder catheterization postoperatively. Due to considerable clinical heterogeneity, we only combined four studies for meta-analysis. Postoperative oral prophylactic nitrofurantoin showed no significant benefit in reducing UTI/bacteriuria in women post-midurethral sling with or without concomitant reconstructive pelvic surgery and the need for bladder catheterization, when compared with the reference group (pooled relative risk 0.73, 95% confidence interval [CI] 0.42-1.25). CONCLUSIONS:Based on the best available evidence, postoperative oral nitrofurantoin is not effective at reducing UTI/bacteriuria rates in catheterized women after midurethral sling with or without concomitant pelvic reconstructive surgery for prolapse. For midurethral sling alone, preoperative antibiotic prophylaxis may not be needed for UTI prevention.","hji,kes",0,0,0,2,0,NA,NA +31150060,Augmented Interval List: a novel data structure for efficient genomic interval search.,"

Motivation

Genomic data is frequently stored as segments or intervals. Because this data type is so common, interval-based comparisons are fundamental to genomic analysis. As the volume of available genomic data grows, developing efficient and scalable methods for searching interval data is necessary.

Results

We present a new data structure, the Augmented Interval List (AIList), to enumerate intersections between a query interval q and an interval set R. An AIList is constructed by first sorting R as a list by the interval start coordinate, then decomposing it into a few approximately flattened components (sublists), and then augmenting each sublist with the running maximum interval end. The query time for AIList is O(log2N+n+m), where n is the number of overlaps between R and q, N is the number of intervals in the set R and m is the average number of extra comparisons required to find the n overlaps. Tested on real genomic interval datasets, AIList code runs 5-18 times faster than standard high-performance code based on augmented interval-trees, nested containment lists or R-trees (BEDTools). For large datasets, the memory-usage for AIList is 4-60% of other methods. The AIList data structure, therefore, provides a significantly improved fundamental operation for highly scalable genomic data analysis.

Availability and implementation

An implementation of the AIList data structure with both construction and search algorithms is available at http://ailist.databio.org.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31161212,RNA-align: quick and accurate alignment of RNA 3D structures based on size-independent TM-scoreRNA.,"

Motivation

Comparison of RNA 3D structures can be used to infer functional relationship of RNA molecules. Most of the current RNA structure alignment programs are built on size-dependent scales, which complicate the interpretation of structure and functional relations. Meanwhile, the low speed prevents the programs from being applied to large-scale RNA structural database search.

Results

We developed an open-source algorithm, RNA-align, for RNA 3D structure alignment which has the structure similarity scaled by a size-independent and statistically interpretable scoring metric. Large-scale benchmark tests show that RNA-align significantly outperforms other state-of-the-art programs in both alignment accuracy and running speed. The major advantage of RNA-align lies at the quick convergence of the heuristic alignment iterations and the coarse-grained secondary structure assignment, both of which are crucial to the speed and accuracy of RNA structure alignments.

Availability and implementation

https://zhanglab.ccmb.med.umich.edu/RNA-align/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31168931,Proteome Dataset of Qualea grandiflora Mart. (Vochysiaceae) by LC-MS/MS Label-Free Identification in Response to Aluminum.,"This dataset brief is about the descriptive proteome of Qualea grandiflora plants by label free mass spectrometry (LC-MS/MS). Q. grandiflora is a plant that accumulates aluminum (Al) in high quantities and requires it for growth and development. Although quite relevant for the understanding of Al effects on plants, the proteome of Q. grandiflora has not been studied yet. Therefore, the current proteome analysis identifies a total of 2010 proteins. Furthermore, the identified Q. grandiflora root proteins are associated with several crucial molecular functions, biological processes, and cellular sites. Hence, the proteome analysis of Q. grandiflora will contribute to unravel how plants evolved to cope with high levels of Al in soils. All data can be accessed at the Centre for Computational Mass Spectrometry - MassIVE MSV000082284 - https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?task=adb9647282a5421a9cffe3124c060f46.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +31173064,RAISS: robust and accurate imputation from summary statistics.,"

Motivation

Multi-trait analyses using public summary statistics from genome-wide association studies (GWASs) are becoming increasingly popular. A constraint of multi-trait methods is that they require complete summary data for all traits. Although methods for the imputation of summary statistics exist, they lack precision for genetic variants with small effect size. This is benign for univariate analyses where only variants with large effect size are selected a posteriori. However, it can lead to strong p-value inflation in multi-trait testing. Here we present a new approach that improve the existing imputation methods and reach a precision suitable for multi-trait analyses.

Results

We fine-tuned parameters to obtain a very high accuracy imputation from summary statistics. We demonstrate this accuracy for variants of all effect sizes on real data of 28 GWAS. We implemented the resulting methodology in a python package specially designed to efficiently impute multiple GWAS in parallel.

Availability and implementation

The python package is available at: https://gitlab.pasteur.fr/statistical-genetics/raiss, its accompanying documentation is accessible here http://statistical-genetics.pages.pasteur.fr/raiss/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31179198,An Emergent Space for Distributed Data with Hidden Internal Order through Manifold Learning.,"Manifold-learning techniques are routinely used in mining complex spatiotemporal data to extract useful, parsimonious data representations/parametrizations; these are, in turn, useful in nonlinear model identification tasks. We focus here on the case of time series data that can ultimately be modelled as a spatially distributed system (e.g. a partial differential equation, PDE), but where we do not know the space in which this PDE should be formulated. Hence, even the spatial coordinates for the distributed system themselves need to be identified - to """"emerge from""""-the data mining process. We will first validate this """"emergent space"""" reconstruction for time series sampled without space labels in known PDEs; this brings up the issue of observability of physical space from temporal observation data, and the transition from spatially resolved to lumped (order-parameter-based) representations by tuning the scale of the data mining kernels. We will then present actual emergent space """"discovery"""" illustrations. Our illustrative examples include chimera states (states of coexisting coherent and incoherent dynamics), and chaotic as well as quasiperiodic spatiotemporal dynamics, arising in partial differential equations and/or in heterogeneous networks. We also discuss how data-driven """"spatial"""" coordinates can be extracted in ways invariant to the nature of the measuring instrument. Such gauge-invariant data mining can go beyond the fusion of heterogeneous observations of the same system, to the possible matching of apparently different systems. For an older version of this article, including other examples, see https://arxiv.org/abs/1708.05406.","hji,kes",0,0,0,2,0,NA,NA +31199465,Positive multistate protein design.,"

Motivation

Structure-based computational protein design (CPD) plays a critical role in advancing the field of protein engineering. Using an all-atom energy function, CPD tries to identify amino acid sequences that fold into a target structure and ultimately perform a desired function. The usual approach considers a single rigid backbone as a target, which ignores backbone flexibility. Multistate design (MSD) allows instead to consider several backbone states simultaneously, defining challenging computational problems.

Results

We introduce efficient reductions of positive MSD problems to Cost Function Networks with two different fitness definitions and implement them in the Pompd (Positive Multistate Protein design) software. Pompd is able to identify guaranteed optimal sequences of positive multistate full protein redesign problems and exhaustively enumerate suboptimal sequences close to the MSD optimum. Applied to nuclear magnetic resonance and back-rubbed X-ray structures, we observe that the average energy fitness provides the best sequence recovery. Our method outperforms state-of-the-art guaranteed computational design approaches by orders of magnitudes and can solve MSD problems with sizes previously unreachable with guaranteed algorithms.

Availability and implementation

https://forgemia.inra.fr/thomas.schiex/pompd as documented Open Source.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31230988,Baseline characteristics did not identify people with low back pain who respond best to a Movement System Impairment-Based classification treatment.,"

Study design

Secondary analysis of data from a randomized controlled trial.

Background

Treatment based on the Movement System Impairment-Based classification for chronic low back pain results in the same benefit when compared to other forms of exercise. It is possible that participant's characteristics measured at baseline can identify people with chronic low back pain who would respond best to a treatment based on the Movement System Impairment model.

Objectives

To assess if specific characteristics of people with chronic low back pain measured at baseline can modify the effects of a treatment based on the Movement System Impairment model on pain and disability.

Methods

Four variables assessed at baseline that could potentially modify the treatment effects of the treatment based on the Movement System Impairment model were selected (age, educational status, physical activity status and STarT back tool classification). Separate univariate models were used to investigate a possible modifier treatment effect of baseline participant's characteristics on pain and disability after the treatment. Findings of interaction values above 1 point for the outcome mean pain intensity or above 3 points for disability (Roland Morris questionnaire) were considered clinically relevant.

Results

Linear regression analyses for the outcomes of pain and disability did not show interaction values considered clinically relevant for age, educational status, physical activity status and STarT back tool classification.

Conclusion

Age, educational status, physical activity status and STarT back tool classification did not modify the effects of an 8-week treatment based on the Movement System Impairment model in patients with chronic low back pain. Registered at www.clinicaltrials.gov: NCT02221609 (https://clinicaltrials.gov/ct2/show/NCT02221609).","hji,kes",0,0,0,2,0,NA,NA +31236209,The research crisis in American institutions of complementary and integrative health: one proposed solution for chiropractic profession.,"A crisis confronts the Complementary and Integrative Health (CIH) teaching institutions in the US. Research infrastructure is needed to build and sustain productive research programs and retain their own research faculty. In most health professions, this infrastructure is largely built through research grants. In CIH, most educational institutions are funded through student tuition, which has historically also had to be the source for building their research programs. Only a limited number of these institutions have emerged as National Institute of Health (NIH) grant-funded programs. As a result, the American chiropractic institutions have seen a retrenchment in the number of active research programs. In addition, although research training programs e.g., NIH's K awards are available for CIH researchers, these programs generally result in these researchers leaving their institutions and depriving future CIH practitioners of the benefit of being trained in a culture of research. One proposed solution is to leverage the substantial research infrastructure and long history of collaboration available at the RAND Corporation (https://www.rand.org) This article presents the proposed five components of the RAND Center for Collaborative CIH Research and the steps required to bring it to being: 1) the CIH Research Network - an online resource and collaborative site for CIH researchers; 2) the CIH Research Advisory Board - the governing body for the Center selected by its members; 3) the RAND CIH Interest Group - a group of RAND researchers with an interest in and who could provide support to CIH research; 4) CIH Researcher Training - access to existing RAND research training as well as the potential for the Center to provide a research training home for those with training grants; and 5) CIH RAND Partnership for Research - a mentorship program to support successful CIH research. By necessity the first step in the Center's creation would be a meeting between the heads of interested CIH institutions to work out the details and to obtain buy-in. The future success of CIH-directed research on CIH will require a pooling of talent and resources across institutions; something that the American chiropractic institutions have not yet been able to achieve. This article discusses one possible solution.","hji,kes",0,0,0,2,0,NA,NA +31250882,Isoform function prediction based on bi-random walks on a heterogeneous network.,"MOTIVATION:Alternative splicing contributes to the functional diversity of protein species and the proteoforms translated from alternatively spliced isoforms of a gene actually execute the biological functions. Computationally predicting the functions of genes has been studied for decades. However, how to distinguish the functional annotations of isoforms, whose annotations are essential for understanding developmental abnormalities and cancers, is rarely explored. The main bottleneck is that functional annotations of isoforms are generally unavailable and functional genomic databases universally store the functional annotations at the gene level. RESULTS:We propose IsoFun to accomplish Isoform Function prediction based on bi-random walks on a heterogeneous network. IsoFun firstly constructs an isoform functional association network based on the expression profiles of isoforms derived from multiple RNA-seq datasets. Next, IsoFun uses the available Gene Ontology annotations of genes, gene-gene interactions and the relations between genes and isoforms to construct a heterogeneous network. After this, IsoFun performs a tailored bi-random walk on the heterogeneous network to predict the association between GO terms and isoforms, thus accomplishing the prediction of GO annotations of isoforms. Experimental results show that IsoFun significantly outperforms the state-of-the-art algorithms and improves the area under the receiver-operating curve (AUROC) and the area under the precision-recall curve (AUPRC) by 17% and 44% at the gene-level, respectively. We further validated the performance of IsoFun on the genes ADAM15 and BCL2L1. IsoFun accurately differentiates the functions of respective isoforms of these two genes. AVAILABILITY AND IMPLEMENTATION:The code of IsoFun is available at http://mlda.swu.edu.cn/codes.php? name=IsoFun. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31290545,A curated collection of transcriptome datasets to investigate the molecular mechanisms of immunoglobulin E-mediated atopic diseases.,"Prevalence of allergies has reached ~20% of population in developed countries and sensitization rate to one or more allergens among school age children are approaching 50%. However, the combination of the complexity of atopic allergy susceptibility/development and environmental factors has made identification of gene biomarkers challenging. The amount of publicly accessible transcriptomic data presents an unprecedented opportunity for mechanistic discoveries and validation of complex disease signatures across studies. However, this necessitates structured methodologies and visual tools for the interpretation of results. Here, we present a curated collection of transcriptomic datasets relevant to immunoglobin E-mediated atopic diseases (ranging from allergies to primary immunodeficiencies). Thirty-three datasets from the Gene Expression Omnibus, encompassing 1860 transcriptome profiles, were made available on the Gene Expression Browser (GXB), an online and open-source web application that allows for the query, visualization and annotation of metadata. The thematic compositions, disease categories, sample number and platforms of the collection are described. Ranked gene lists and sample grouping are used to facilitate data visualization/interpretation and are available online via GXB (http://ige.gxbsidra.org/dm3/geneBrowser/list). Dataset validation using associated publications showed good concordance in GXB gene expression trend and fold-change.","hji,kes",0,0,0,2,0,NA,iffy +31296218,Evaluating single-subject study methods for personal transcriptomic interpretations to advance precision medicine.,"

Background

Gene expression profiling has benefited medicine by providing clinically relevant insights at the molecular candidate and systems levels. However, to adopt a more 'precision' approach that integrates individual variability including 'omics data into risk assessments, diagnoses, and therapeutic decision making, whole transcriptome expression needs to be interpreted meaningfully for single subjects. We propose an """"all-against-one"""" framework that uses biological replicates in isogenic conditions for testing differentially expressed genes (DEGs) in a single subject (ss) in the absence of an appropriate external reference standard or replicates. To evaluate our proposed """"all-against-one"""" framework, we construct reference standards (RSs) with five conventional replicate-anchored analyses (NOISeq, DEGseq, edgeR, DESeq, DESeq2) and the remainder were treated separately as single-subject sample pairs for ss analyses (without replicates).

Results

Eight ss methods (NOISeq, DEGseq, edgeR, mixture model, DESeq, DESeq2, iDEG, and ensemble) for identifying genes with differential expression were compared in Yeast (parental line versus snf2 deletion mutant; n= 42/condition) and a MCF7 breast-cancer cell line (baseline versus stimulated with estradiol; n= 7/condition). Receiver-operator characteristic (ROC) and precision-recall plots were determined for eight ss methods against each of the five RSs in both datasets. Consistent with prior analyses of these data, ~ 50% and ~ 15% DEGs were obtained in Yeast and MCF7 datasets respectively, regardless of the RSs method. NOISeq, edgeR, and DESeq were the most concordant for creating a RS. Single-subject versions of NOISeq, DEGseq, and an ensemble learner achieved the best median ROC-area-under-the-curve to compare two transcriptomes without replicates regardless of the RS method and dataset (> 90% in Yeast, > 0.75 in MCF7). Further, distinct specific single-subject methods perform better according to different proportions of DEGs.

Conclusions

The """"all-against-one"""" framework provides a honest evaluation framework for single-subject DEG studies since these methods are evaluated, by design, against reference standards produced by unrelated DEG methods. The ss-ensemble method was the only one to reliably produce higher accuracies in all conditions tested in this conservative evaluation framework. However, single-subject methods for identifying DEGs from paired samples need improvement, as no method performed with precision> 90% and obtained moderate levels of recall. http://www.lussiergroup.org/publications/EnsembleBiomarker.","hji,kes",0,0,0,2,0,NA,NA +31301205,The Generation of a Comprehensive Spectral Library for the Analysis of the Guinea Pig Proteome by SWATH-MS.,"Advances in liquid chromatography-mass spectrometry have facilitated the incorporation of proteomic studies to many biology experimental workflows. Data-independent acquisition platforms, such as sequential window acquisition of all theoretical mass spectra (SWATH-MS), offer several advantages for label-free quantitative assessment of complex proteomes over data-dependent acquisition (DDA) approaches. However, SWATH data interpretation requires spectral libraries as a detailed reference resource. The guinea pig (Cavia porcellus) is an excellent experimental model for translation to many aspects of human physiology and disease, yet there is limited experimental information regarding its proteome. To overcome this knowledge gap, a comprehensive spectral library of the guinea pig proteome is generated. Homogenates and tryptic digests are prepared from 16 tissues and subjected to >200 DDA runs. Analysis of >250000 peptide-spectrum matches resulted in a library of 73594 peptides from 7666 proteins. Library validation is provided by i) analyzing externally derived SWATH files (https://doi.org/10.1016/j.jprot.2018.03.023) and comparing peptide intensity quantifications; ii) merging of externally derived data to the base library. This furnishes the research community with a comprehensive proteomic resource that will facilitate future molecular-phenotypic studies using (re-engaging) the guinea pig as an experimental model of relevance to human biology. The spectral library and raw data are freely accessible in the MassIVE repository (MSV000083199).","hji,kes",0,0,0,2,0,NA,data deposited as referenced +31307061,PRSice-2: Polygenic Risk Score software for biobank-scale data.,"

Background

Polygenic risk score (PRS) analyses have become an integral part of biomedical research, exploited to gain insights into shared aetiology among traits, to control for genomic profile in experimental studies, and to strengthen causal inference, among a range of applications. Substantial efforts are now devoted to biobank projects to collect large genetic and phenotypic data, providing unprecedented opportunity for genetic discovery and applications. To process the large-scale data provided by such biobank resources, highly efficient and scalable methods and software are required.

Results

Here we introduce PRSice-2, an efficient and scalable software program for automating and simplifying PRS analyses on large-scale data. PRSice-2 handles both genotyped and imputed data, provides empirical association P-values free from inflation due to overfitting, supports different inheritance models, and can evaluate multiple continuous and binary target traits simultaneously. We demonstrate that PRSice-2 is dramatically faster and more memory-efficient than PRSice-1 and alternative PRS software, LDpred and lassosum, while having comparable predictive power.

Conclusion

PRSice-2's combination of efficiency and power will be increasingly important as data sizes grow and as the applications of PRS become more sophisticated, e.g., when incorporated into high-dimensional or gene set-based analyses. PRSice-2 is written in C++, with an R script for plotting, and is freely available for download from http://PRSice.info.","hji,kes",0,0,0,2,0,NA,NA +31333564,Googling Service Boundaries for Endovascular Clot Retrieval (ECR) Hub Hospitals in Metropolitan Sydney.,"Background and Purpose: Endovascular clot retrieval (ECR) has revolutionized acute stroke therapy but is expensive to run and staff with accredited interventional neuroradiologists 24/7; consequently, it is only feasible for each metropolitan city to have a minimum number of hubs that is adequate to service the population. This method is applied to search the minimum number of hospitals to be designated as ECR hubs in Sydney as well as the population at risk of stroke reachable within 30 min. Methods: Traveling time from the centroids of each suburbs to five ECR capable hubs [Royal Prince Alfred/RPA, Prince of Wales/POW, Royal North Shore/RNS, Liverpool/LH and Westmead/WH]. This step was performed using ggmap package in R to interface with Google Map application program interface (API). Next, we calculate the percentage of suburbs within each catchment in which traveling time to the ECR hub is <30 min. This step was performed for all possible combination of ECR hubs. The maps are available at https://gntem3.shinyapps.io/ambsydney/. The population at risk of stroke was estimated using stroke incident studies in Melbourne and Adelaide. Results: The best 3-hospital combinations are LPH/WH/RNS (82.3, 45.7, and 79.7% of suburbs reachable within 30 min or 187 of 226 suburbs) follow by RPA/LPH/RNS (100.0, 80.9, and 73.1% of suburbs) and LPH/POW/RNS (83.3, 90.7, and 76.6% of suburbs). The best 4-hospital model is LPH/WH/POW/RNS (84.2%, 91.1%, 90.7%, 77.8%). In the 5-hospital model, ECR is available for 191 suburbs within 30 min: LPH (83%), RPA (100%), WH (90.2%), RNS (72.7%), POW (88.9%). Based on 3-hospital model and 15% of patient eligible for ECR, the expected number of cases to be handled by each hospital is 465. This number drops down to 374 if a 4-hospital model is preferred. Conclusions: The simulation studies supported a minimum of 4 ECR hubs servicing Sydney. This model provides data on number of suburbs and population at risk of stroke that can reach these hubs within 30 min.","hji,kes",0,0,0,2,0,NA,NA +31339576,Transferrin and H-ferritin involvement in brain iron acquisition during postnatal development: impact of sex and genotype.,"Iron delivery to the developing brain is essential for energy and metabolic support needed for processes such as myelination and neuronal development. Iron deficiency, especially in the developing brain, can result in a number of long-term neurological deficits that persist into adulthood. There is considerable debate that excess access to iron during development may result in iron overload in the brain and subsequently predispose individuals to age-related neurodegenerative diseases. There is a significant gap in knowledge regarding how the brain acquires iron during development and how biological variables such as development, genetics, and sex impact brain iron status. In this study, we used a mouse model expressing a mutant form of the iron homeostatic regulator protein HFE, (Hfe H63D), the most common gene variant in Caucasians, to determine impact of the mutation on brain iron uptake. Iron uptake was assessed using 59 Fe bound to either transferrin or H-ferritin as the iron carrier proteins. We demonstrate that at postnatal day 22, mutant mice brains take up greater amounts of iron compared with wildtype. Moreover, we introduce H-ferritin as a key protein in brain iron transport during development and identify a sex and genotype effect demonstrating female mutant mice take up more iron by transferrin, whereas male mutant mice take up more iron from H-ferritin at PND22. Furthermore, we begin to elucidate the mechanism for uptake using immunohistochemistry to profile the regional distribution and temporal expression of transferrin receptor and T-cell immunoglobulin and mucin domain 2, the latter is the receptor for H-ferritin. These data demonstrate that sex and genotype have significant effects on iron uptake and that regional receptor expression may play a large role in the uptake patterns during development. Open Science: This manuscript was awarded with the Open Materials Badge For more information see: https://cos.io/our-services/open-science-badges/ Cover Image for this issue: doi: 10.1111/jnc.14731.","hji,kes",0,0,0,2,0,NA,NA +31368353,OSkirc: a web tool for identifying prognostic biomarkers in kidney renal clear cell carcinoma.,"Aim: To develop a free and quick analysis online tool that allows users to easily investigate the prognostic potencies of interesting genes in kidney renal clear cell carcinoma (KIRC). Patients & methods: A total of 629 KIRC cases with gene expression profiling data and clinical follow-up information are collected from public Gene Expression Omnibus and The Cancer Genome Atlas databases. Results: One web application called Online consensus Survival analysis for KIRC (OSkirc) that can be used for exploring the prognostic implications of interesting genes in KIRC was constructed. By OSkirc, users could simply input the gene symbol to receive the Kaplan-Meier survival plot with hazard ratio and log-rank p-value. Conclusion: OSkirc is extremely valuable for basic and translational researchers to screen and validate the prognostic potencies of genes for KIRC, publicly accessible at http://bioinfo.henu.edu.cn/KIRC/KIRCList.jsp.","hji,kes",0,0,0,2,0,NA,no separate data +31368479,Bayesian estimation of genetic regulatory effects in high-throughput reporter assays.,"

Motivation

High-throughput reporter assays dramatically improve our ability to assign function to noncoding genetic variants, by measuring allelic effects on gene expression in the controlled setting of a reporter gene. Unlike genetic association tests, such assays are not confounded by linkage disequilibrium when loci are independently assayed. These methods can thus improve the identification of causal disease mutations. While work continues on improving experimental aspects of these assays, less effort has gone into developing methods for assessing the statistical significance of assay results, particularly in the case of rare variants captured from patient DNA.

Results

We describe a Bayesian hierarchical model, called Bayesian Inference of Regulatory Differences, which integrates prior information and explicitly accounts for variability between experimental replicates. The model produces substantially more accurate predictions than existing methods when allele frequencies are low, which is of clear advantage in the search for disease-causing variants in DNA captured from patient cohorts. Using the model, we demonstrate a clear tradeoff between variant sequencing coverage and numbers of biological replicates, and we show that the use of additional biological replicates decreases variance in estimates of effect size, due to the properties of the Poisson-binomial distribution. We also provide a power and sample size calculator, which facilitates decision making in experimental design parameters.

Availability and implementation

The software is freely available from www.geneprediction.org/bird. The experimental design web tool can be accessed at http://67.159.92.22:8080.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31400221,Alternating EM algorithm for a bilinear model in isoform quantification from RNA-seq data.,"

Motivation

Estimation of isoform-level gene expression from RNA-seq data depends on simplifying assumptions, such as uniform read distribution, that are easily violated in real data. Such violations typically lead to biased estimates. Most existing methods provide bias correction step(s), which is based on biological considerations-such as GC content-and applied in single samples separately. The main problem is that not all biases are known.

Results

We have developed a novel method called XAEM based on a more flexible and robust statistical model. Existing methods are essentially based on a linear model X, where the design matrix X is known and is computed based on the simplifying assumptions. In contrast XAEM considers X as a bilinear model with both X and unknown. Joint estimation of X and is made possible by a simultaneous analysis of multi-sample RNA-seq data. Compared to existing methods, XAEM automatically performs empirical correction of potentially unknown biases. We use an alternating expectation-maximization (AEM) algorithm, alternating between estimation of X and . For speed XAEM utilizes quasi-mapping for read alignment, thus leading to a fast algorithm. Overall XAEM performs favorably compared to recent advanced methods. For simulated datasets, XAEM obtains higher accuracy for multiple-isoform genes. In a differential-expression analysis of a real single-cell RNA-seq dataset, XAEM achieves substantially better rediscovery rates in independent validation sets.

Availability and implementation

The method and pipeline are implemented as a tool and freely available for use at http://fafner.meb.ki.se/biostatwiki/xaem/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31431912,Data from quantitative serum proteomic analysis after laparoscopic gastric plication.,"Bariatric surgery is an effective treatment for morbid obesity with a sustained weight loss and improvements in metabolic syndrome. We present a label free quantitative shotgun proteomics approach to analyze the serum proteome of obese people who underwent Laparoscopic Gastric Plication (LGP) as a new bariatric surgery. Pre-surgery serum samples of obese individuals were compared with the serum of the same subjects 1-2 months post-surgery (T1) and 4-5 months post-surgery (T2). The data provide a list of 224 quantifiable proteins with at least two unique peptides that were quantifiable in at least 70% of samples. Gene ontology biological processes and molecular functions of differentially regulated proteins between pre- and post-surgery samples were investigated using WebGestalt online tool. In addition, molecular networks of differentially abundant proteins were determined through Ingenuity Pathway Analysis (IPA) software. This report is related to the research article entitled """"Serum proteome changes and accelerated reduction of fat mass after Laparoscopic Gastric Plication in morbidly obese patients"""" (Savedoroudi etal. [1]). Proteomics data have been deposited to the ProteomeXchange Consortium (http://proteomecentral.proteomexchange.org) via the PRIDE partner repository through the identifier PXD010528.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +31439548,Meflin-Positive Cancer-Associated Fibroblasts Inhibit Pancreatic Carcinogenesis.,"Cancer-associated fibroblasts (CAF) constitute a major component of the tumor microenvironment. Recent observations in genetically engineered mouse models and clinical studies have suggested that there may exist at least two functionally different populations of CAFs, that is, cancer-promoting CAFs (pCAF) and cancer-restraining CAFs (rCAF). Although various pCAF markers have been identified, the identity of rCAFs remains unknown because of the lack of rCAF-specific marker(s). In this study, we found that Meflin, a glycosylphosphatidylinositol-anchored protein that is a marker of mesenchymal stromal/stem cells and maintains their undifferentiated state, is expressed by pancreatic stellate cells that are a source of CAFs in pancreatic ductal adenocarcinoma (PDAC). In situ hybridization analysis of 71 human PDAC tissues revealed that the infiltration of Meflin-positive CAFs correlated with favorable patient outcome. Consistent herewith, Meflin deficiency led to significant tumor progression with poorly differentiated histology in a PDAC mouse model. Similarly, genetic ablation of Meflin-positive CAFs resulted in poor differentiation of tumors in a syngeneic transplantation model. Conversely, delivery of a Meflin-expressing lentivirus into the tumor stroma or overexpression of Meflin in CAFs suppressed the growth of xenograft tumors. Lineage tracing revealed that Meflin-positive cells gave rise to a-smooth muscle actin-positive CAFs that are positive or negative for Meflin, suggesting a mechanism for generating CAF heterogeneity. Meflin deficiency or low expression resulted in straightened stromal collagen fibers, which represent a signature for aggressive tumors, in mouse or human PDAC tissues, respectively. Together, the data suggest that Meflin is a marker of rCAFs that suppress PDAC progression. SIGNIFICANCE: Meflin marks and functionally contributes to a subset of cancer-associated fibroblasts that exert antitumoral effects.Graphical Abstract: http://cancerres.aacrjournals.org/content/canres/79/20/5367/F1.large.jpg.","hji,kes",0,0,0,2,0,NA,NA +31448721,Renal length and volume prediction in healthy children.,"

Introduction

Little information is available regarding the evaluation of renal volume in healthy Latin-American children of different ages. The objective of this work was to establish a predictive model of renal size (volume and length) and develop a web-based calculator.

Materials and methods

A selective and representative sample was obtained randomly from the database of healthy children living in Resistencia city, Chaco, Argentina: a) the National Health Program for children under 6 years old; b)school children until 18 years old (primary and middle education). Renal dimensions were obtained by ultrasonography via a single experienced operator at the indicated site (schools or primary health care centers). Renal volume was calculated using Dinkel's formula. A multiple linear regression model was applied using potential predictors. The final model was implemented in a free web-based application.

Results

Random selection was made from the database to include 882 subjects with ages between 0.03 and 230.63 months. The data was divided into two sets (one for training and the other for model testing). The training set (423) included 212 (50%) females. Significant predictors included age, height, current weight and birth weight, and the interaction between age and present weight. Using the test dataset, both renal volume and length root mean square errors were 5.06 cm3 and 0.59 cm.

Conclusion

The prediction model was accurate and allowed for the development a freely-available web app: Renal size prediction (https://porbm28.shinyapps.io/RenalVolume/). Once the models are validated by additional studies, the app could be a useful tool to predict renal volume and length in pediatric clinical practice.","hji,kes",0,0,0,2,0,NA,NA +31453287,"Dataset of the frequency patterns of publications annotated to human protein-coding genes, their protein products and genetic relevance.","We present data concerning the distribution of scientific publications for human protein-coding genes together with their protein products and genetic relevance. We annotated the gene2pubmed dataset Maglott et al., 2007 provided by the NCBI (National Center for Biotechnology Information) with publication years, genetic metadata corresponding to Online Mendelian Inheritance in Man (OMIM) Hamosh et al., 2005 entries and the frequency of their appearance in Genome-Wide Association Studies (GWAS) Buniello et al., 2019 provided by the European Bioinformatics Institute (EBI) using the KNIME Analytics Platform Berthold et al., 2008. The results of this data integration process comprise two datasets: 1) A dataset containing information on all human protein-coding genes that can be used to analyse the number of scientific publications in context of the potential disease relevance of the individual genes. 2) A table with the annual and cumulated number of PubMed entries. For further interpretation of the data presented in this article, please see the research article 'Target 2035 - probing the human proteome' by Carter etal. https://doi.org/10.1016/j.drudis.2019.06.020 Carter et al., 2019.","hji,kes",0,0,0,2,0,NA,about data creation but data URL not available +31508533,Magnesium supplementation therapy to prevent cisplatin-induced acute nephrotoxicity in pediatric cancer: A protocol for a randomized phase 2 trial.,"Although cisplatin is one of the most effective agents against various pediatric cancers, it is sometimes difficult to manage due to its dose-limiting nephrotoxicity. Magnesium sulfate (Mg) showed a kidney-protective effect against cisplatin-induced nephrotoxicity (CIN) by regulating renal platinum accumulation both in vitro and in vivo, and the body of clinical data demonstrating the efficacy of this drug in adult cancer patients is increasing.In this open, multicenter, phase-2, randomized trial, patients under age 18 years who are scheduled to receive cisplatin-containing chemotherapy will be enrolled and randomly allocated either to an Mg supplementation arm in even-numbered chemotherapy courses (arm AB) or to another arm in odd-numbered courses (arm BA), with a 1:1 allocation. Analysis objects will be reconstructed into two groups depending on whether the chemotherapy course has Mg supplementation (group B) or not (group A). The primary endpoint is the proportion of chemotherapy courses resulting in elevated serum creatinine equal to or greater than 50% of the prechemotherapy value. For the secondary endpoints, various parameters for measuring kidney function, such as serum cystatin-C, B2M, L-FABP, NGAL, and urinary NAG in the two groups will be compared. A sample size based on alpha=5% and 80% power requires at least 40 samples per group (ideally, 60 samples per group).If Mg demonstrates efficacy, a phase-3 study to confirm the prophylactic effect of Mg supplementation in both acute and chronic CIN will be developed using novel and better biomarkers. Trial registration:UMIN-CTR (http://www.umin.ac.jp/icdr/index.html) Identifier UMIN000029215.","hji,kes",0,0,0,2,0,NA,NA +31573931,Development of Ovarian Tissue Autograft to Restore Ovarian Function: Protocol for a French Multicenter Cohort Study.,"BACKGROUND:Sterility is a major late effect of radiotherapy and chemotherapy treatments. Iatrogenic sterility is often permanent and greatly impacts long-term quality of life. Ovarian tissue cryopreservation (OTC) performed before gonadotoxic treatments with subsequent autograft is a method of fertility preservation available for girls and women. Its application in prepubertal girls is of particular value as it is the only possible approach in this patient group. In addition, it does not require a delay in cancer therapy and no ovarian stimulation is needed. OBJECTIVE:The primary aim of this protocol is to help increase the implementation of ovarian tissue autografting in France. Knowledge is still lacking regarding the efficacy of ovarian transplantation in restoring ovarian function and regarding the safety of this procedure, especially the risk of cancer cell reseeding in certain types of cancer. A secondary aim of this study is to generate data to improve our understanding of these two essential aspects. METHODS:The DATOR (Development of Ovarian Tissue Autograft in Order to Restore Ovarian Function) study is ongoing in 17 university hospitals. The DATOR protocol includes the autograft of ovarian cortex fragments. Candidates are identified from an observational prospective cohort (called the Prospective Cohort of Patients Candidates for Ovarian Tissue Autograft [PERIDATOR]) of patients who have undergone OTC. Enrollment in the study is initiated at the patient's request and must be validated by the center's multidisciplinary team and by the study steering committee. The DATOR study begins with a total medical checkup. Ovarian tissue qualification and residual disease detection, if required, are performed. RESULTS:The study is ongoing. Currently, 38 patients have provided informed consent and have been entered into the DATOR study. Graft has been performed for 34 of these patients. An interim analysis was conducted on the first 25 patients for whom the period of at least 1 year posttransplantation was achieved. Out of these 25 patients, 11 women succeeded in becoming pregnant (pregnancy rate=44% [11/25]; delivery rate=40% [10/25]). Among these, 6 women conceived twice, and 1 pregnancy led to a miscarriage. CONCLUSIONS:Our preliminary analysis appears to be coherent with the accumulating body of evidence indicating the potential utility of ovarian tissue autograft for patients with premature ovarian failure. All these elements justify the pursuit of our study. TRIAL REGISTRATION:ClinicalTrials.gov NCT02846064; https://clinicaltrials.gov/ct2/show/NCT02846064. INTERNATIONAL REGISTERED REPORT IDENTIFIER (IRRID):DERR1-10.2196/12944.","hji,kes",0,0,0,2,0,NA,NA +31618061,Lexical-Level Predictors of Reading Comprehension in Third Grade: Is Spelling a Unique Contributor?,"Purpose Considerable research effort has focused on understanding reading comprehension and reading comprehension difficulties. The purpose of this correlational study was to add to the small but growing body of literature on the role that spelling may play in reading comprehension, by investigating the full range of lexical-level literacy skills and whether spelling makes a unique contribution. This study also explored whether these relations vary with the spelling scoring metric. Method Data were collected from 63 children attending Grade 3 in a Midwestern state. In addition to measuring reading comprehension, word recognition, and vocabulary, 4 spelling scoring metrics were examined: the number of words spelled correctly, the number of correct letter sequences (CLS), and Spelling Sensitivity Scores for elements and for words. Results All spelling metrics were significantly correlated with reading comprehension. Results of hierarchical regressions showed that spelling was a significant, unique predictor of reading comprehension when the CLS metric was used. The scoring metrics were differentially related to reading comprehension. Metrics that gave credit based on orthographic precision only (number of words spelled correctly and CLS) were more highly related to reading comprehension than metrics that scored not only on orthographic accuracy but also on phonological and morphological accuracy (Spelling Sensitivity Scores for elements and for words). Conclusion These results indicate that spelling is related to reading comprehension and have theoretical and clinical implications for the use of spelling assessment. Supplemental Material https://doi.org/10.23641/asha.9947216.","hji,kes",0,0,0,2,0,NA,NA +31633588,Widespread Pain Is Associated with Increased Risk of No Clinical Improvement After TKA in Women.,"

Background

When conservative treatments do not work, TKA may be the best option for patients with knee osteoarthritis, although a relatively large proportion of individuals do not have clinically important improvement after TKA. Evidence also suggests that women are less likely to benefit from TKA than men, but the reasons are unclear. Widespread pain disproportionately affects women and has been associated with worse outcomes after joint arthroplasty, yet it is unknown if the effect of widespread pain on TKA outcomes differs by patient gender.

Questions/purposes

(1) Does the association between widespread pain and no clinically important improvement in osteoarthritis-related pain and disability 2 years after TKA differ between men and women? (2) Does the use of pain medications 2 years after TKA differ between those with widespread pain and those without widespread pain before surgery?

Methods

Osteoarthritis Initiative (https://nda.nih.gov/oai/) study participants were followed annually from March 2005 until October 2015. Participants who underwent TKA up to the 7-year follow-up visit with pain/disability assessment at the protocol-planned visit before TKA and at the second planned annual visit after surgery were included in the analysis. Among 4796 study participants, 391 had a confirmed TKA, including 315 with pain/disability assessment at the protocol-planned visit before TKA. Overall, 95% of participants (298) had the required follow-up assessment; 5% (17) did not have follow-up data. Widespread pain was defined based on the modified American College of Rheumatology criteria. Symptoms were assessed using the WOMAC pain (range 0 to 20; higher score, more pain) and disability (range 0 to 68; higher score, more disability) scores, and the Knee Injury and Osteoarthritis Outcome Score for pain (range 0 to 100; higher score, less pain). Improvements in pain and disability were classified based on improvement from established clinically important differences (decrease in WOMAC pain = 1.5; decrease in WOMAC disability = 6.0; increase in Knee Injury and Osteoarthritis Outcome Score for pain = 9). At baseline, more women presented with widespread pain than men (45% [84 of 184] versus 32% [36 of 114]). Probability and the relative risk (RR) of no clinically important improvement were estimated using a logistic regression analysis in which participants with widespread pain and those without were compared. The analyses were done for men and women separately, then adjusted for depression and baseline outcome scores.

Results

Among women, preoperative widespread pain was associated with an increased risk of no clinically important improvement 2 years after TKA, based on WOMAC pain scores (13.5% versus 4.6%; RR 2.93 [95% CI 1.18 to 7.30]; p = 0.02) and the Knee Injury and Osteoarthritis Outcome Score for pain (16.5% versus 4.9%; RR 3.39 [95% CI 1.34 to 8.59]; p = 0.02). Given the lower and upper limits of the confidence intervals, our data are compatible with a broad range of disparate associations between widespread pain and lack of clinically important improvement in WOMAC pain scores (RR 0.77 [95% CI 0.22 to 2.70]; p = 0.68) and the Knee Injury and Osteoarthritis Outcome Score for pain (RR 1.37 [95% CI 0.47 to 4.00]; p = 0.57) among men, as well as clinically important improvement in WOMAC disability scores among men (RR 0.72 [95% CI 0.20 to 2.55]; p = 0.61) and women (RR 1.98 [95% CI 0.92 to 4.26]; p = 0.08). Participants presenting with widespread pain before TKA were more likely than those without widespread pain to use medication for symptoms of knee osteoarthritis most days for at least 1 month 2 years after TKA (51% [61 of 120] versus 32% [57 of 178]; mean difference, 18.8 [95% CI 7.3 to 30.1]; p < 0.01).

Conclusions

Widespread pain before TKA was associated with an increased risk of no clinically important improvement in knee pain 2 years postoperatively among women. Because of the small number of men with widespread pain in the sample, the results for men were inconclusive. In clinical practice, screening TKA candidates for widespread pain may be useful, and expectations of surgical outcomes may need to be tempered if patients have a concurrent diagnosis of widespread pain. Future studies should include more men with widespread pain and investigate if treatment of widespread pain before or concurrent with TKA surgery may improve surgical outcomes.

Level of evidence

Level III, therapeutic study.","hji,kes",0,0,0,2,0,NA,NA +31656094,"Genetic Susceptibility, Dietary Protein Intake, and Changes of Blood Pressure: The POUNDS Lost Trial.","High blood pressure (BP) is closely related to obesity, and weight loss lowers BP. Evidence has shown considerable interpersonal variation of changes in BP among people experiencing weight loss, and such variation might be partly determined by genetic factors. We assessed the changes in systolic and diastolic BP (SBP/DBP) among 692 participants randomly assigned to 1 of 4 diets varying in macronutrient content for 2 years. Two separate polygenic scores (SBP/DBP-PGS derived from 52/50 single nucleotide polymorphisms) were built for each participant based on 66 BP-associated single nucleotide polymorphisms. During a 2-year intervention, participants in the bottom versus upper tertile of SBP/DBP-PGS had a greater decrease in SBP (SBP at 6, 12, and 24 months: -3.84 versus -1.61, -4.76 versus -2.75, -2.49 versus -1.63; P=0.001) or in DBP (DBP at 6, 12, and 24 months: -3.09 versus -1.34, -2.69 versus -1.44, -1.82 versus -0.53; P<0.001). We also found gene-diet interaction on changes in SBP from baseline to 24 months (Pinteraction=0.009). Among participants assigned to a high-protein diet, those with a lower SBP-polygenic scores had greater decreases in SBP at months 6 (P=0.018), months 12 (P=0.007), and months 24 (P=0.089); while no significant difference was observed across the SBP-polygenic scores tertile groups among those assigned to an average-protein diet (all P values >0.05). Our data indicate that genetic susceptibility may affect BP changes in response to weight-loss diet interventions, and protein intake may modify the genetic associations with changes in BP. This trial was registered at URL: http://www.clinicaltrials.gov. Unique identifier: NCT00072995.","hji,kes",0,0,0,2,0,NA,NA +31720340,"Draft genome sequence data of Cercospora kikuchii, a causal agent of Cercospora leaf blight and purple seed stain of soybeans.","Cercospora kikuchii (Tak. Matsumoto & Tomoy.) M.W. Gardner 1927 is an ascomycete fungal pathogen that causes Cercospora leaf blight and purple seed stain on soybean. Here, we report the first draft genome sequence and assembly of this pathogen. The C. kikuchii strain ARG_18_001 was isolated from soybean purple seed collected from San Pedro, Buenos Aires, Argentina, during the 2018 harvest. The genome was sequenced using a 2נ150 bp paired-end method by Illumina NovaSeq 6000. The C. kikuchii protein-coding genes were predicted using FunGAP (Fungal Genome Annotation Pipeline). The draft genome assembly was 33.1 Mb in size with a GC-content of 53%. The gene prediction resulted in 14,856 gene models/14,721 protein coding genes. Genomic data of C. kikuchii presented here will be a useful resource for future studies of this pathosystem. The data can be accessed at GenBank under the accession number VTAY00000000 https://www.ncbi.nlm.nih.gov/nuccore/VTAY00000000.","hji,kes",0,0,0,2,0,NA,NA +31730280,Temperature dependence of NMR chemical shifts: Tracking and statistical analysis.,"Isotropic chemical shifts measured by solution nuclear magnetic resonance (NMR) spectroscopy offer extensive insights into protein structure and dynamics. Temperature dependences add a valuable dimension; notably, the temperature dependences of amide proton chemical shifts are valuable probes of hydrogen bonding, temperature-dependent loss of structure, and exchange between distinct protein conformations. Accordingly, their uses include structural analysis of both folded and disordered proteins, and determination of the effects of mutations, binding, or solution conditions on protein energetics. Fundamentally, these temperature dependences result from changes in the local magnetic environments of nuclei, but correlations with global thermodynamic parameters measured via calorimetric methods have been observed. Although the temperature dependences of amide proton and nitrogen chemical shifts are often well approximated by a linear model, deviations from linearity are also observed and may be interpreted as evidence of fast exchange between distinct conformational states. Here, we describe computational methods, accessible via the Shift-T web server, including an automated tracking algorithm that propagates initial (single temperature) 1 H15 N cross peak assignments to spectra collected over a range of temperatures. Amide proton and nitrogen temperature coefficients (slopes determined by fitting chemical shift vs. temperature data to a linear model) are subsequently calculated. Also included are methods for the detection of systematic, statistically significant deviation from linearity (curvature) in the temperature dependences of amide proton chemical shifts. The use and utility of these methods are illustrated by example, and the Shift-T web server is freely available at http://meieringlab.uwaterloo.ca/shiftt.","hji,kes",0,0,0,2,0,NA,NA +31769676,ROBOKOP KG and KGB: Integrated Knowledge Graphs from Federated Sources.,"A proliferation of data sources has led to the notional existence of an implicit Knowledge Graph (KG) that contains vast amounts of biological knowledge contributed by distributed Application Programming Interfaces (APIs). However, challenges arise when integrating data across multiple APIs due to incompatible semantic types, identifier schemes, and data formats. We present ROBOKOP KG ( http://robokopkg.renci.org ), which is a KG that was initially built to support the open biomedical question-answering application, ROBOKOP (Reasoning Over Biomedical Objects linked in Knowledge-Oriented Pathways) ( http://robokop.renci.org ). Additionally, we present the ROBOKOP Knowledge Graph Builder (KGB), which constructs the KG and provides an extensible framework to handle graph query over and integration of federated data sources.","hji,kes",0,0,0,2,0,NA,NA +31780760,"PedMap: a pediatric diseases map generated from clinical big data from Hangzhou, China.","Epidemiological knowledge of pediatric diseases may improve professionals' understanding of the pathophysiology of and risk factors for diseases and is also crucial for decision making related to workforce and resource planning in pediatric departments. In this study, a pediatric disease epidemiology knowledgebase called PedMap (http://pedmap.nbscn.org) was constructed from the clinical data from 5 447 202 outpatient visits of 2 189 868 unique patients at a children's hospital (Hangzhou, China) from 2013 to 2016. The top 100 most-reported pediatric diseases were identified and visualized. These common pediatric diseases were clustered into 4 age groups and 4 seasons. The prevalence, age distribution and co-occurrence diseases for each disease were also visualized. Furthermore, an online prediction tool based on Gaussian regression models was developed to predict pediatric disease incidence based on weather information. PedMap is the first comprehensive epidemiological resource to show the full view of age-related, seasonal, climate-related variations in and co-occurrence patterns of pediatric diseases.","hji,kes",0,0,0,2,0,NA,clinical +31784425,Single-Cell Gene Expression Analyses Reveal Distinct Self-Renewing and Proliferating Subsets in the Leukemia Stem Cell Compartment in Acute Myeloid Leukemia.,"Standard chemotherapy for acute myeloid leukemia (AML) targets proliferative cells and efficiently induces complete remission; however, many patients relapse and die of their disease. Relapse is caused by leukemia stem cells (LSC), the cells with self-renewal capacity. Self-renewal and proliferation are separate functions in normal hematopoietic stem cells (HSC) in steady-state conditions. If these functions are also separate functions in LSCs, then antiproliferative therapies may fail to target self-renewal, allowing for relapse. We investigated whether proliferation and self-renewal are separate functions in LSCs as they often are in HSCs. Distinct transcriptional profiles within LSCs of Mll-AF9/NRASG12V murine AML were identified using single-cell RNA sequencing. Single-cell qPCR revealed that these genes were also differentially expressed in primary human LSCs and normal human HSPCs. A smaller subset of these genes was upregulated in LSCs relative to HSPCs; this subset of genes constitutes """"LSC-specific"""" genes in human AML. To assess the differences between these profiles, we identified cell surface markers, CD69 and CD36, whose genes were differentially expressed between these profiles. In vivo mouse reconstitution assays resealed that only CD69High LSCs were capable of self-renewal and were poorly proliferative. In contrast, CD36High LSCs were unable to transplant leukemia but were highly proliferative. These data demonstrate that the transcriptional foundations of self-renewal and proliferation are distinct in LSCs as they often are in normal stem cells and suggest that therapeutic strategies that target self-renewal, in addition to proliferation, are critical to prevent relapse and improve survival in AML. SIGNIFICANCE: These findings define and functionally validate a self-renewal gene profile of leukemia stem cells at the single-cell level and demonstrate that self-renewal and proliferation are distinct in AML. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/80/3/458/F1.large.jpg.","hji,kes",0,0,0,2,0,NA,NA +31793066,Practical recommendations of the EAU-ESPU guidelines committee for monosymptomatic enuresis-Bedwetting.,"

Background and aims

The objective of this update of the EAU-ESPU guidelines recommendations for nocturnal enuresis was to review the recent published literature of studies, reviews, guidelines regarding the etiology, diagnosis and treatment options of nocturnal enuresis and transform the information into a practical recommendation strategy for the general practitioner, pediatrician, pediatric urologist and urologist.

Material and methods

Since 2012 a monthly literature search using Scopus was performed and the relevant literature was reviewed and prospectively registered on the European Urology bedwetting enuresis resource center (http://bedwetting.europeanurology.com/). In addition, guideline papers and statements of the European Society for Paediatric Urology (ESPU), the European Association of Urology (EAU), the National Institute for Health and Care Excellence (NICE) and the International Children Continence Society (ICCS) were used to update the knowledge and evidence resulting in this practical recommendation strategy. Recommendations have been discussed and agreed within the working group of the EAU-ESPU guidelines committee members.

Results

The recommendations focus to place the child and his family in a control position. Pragmatic analysis is made of the bedwetting problem by collecting voiding and drinking habits during the day, measuring nighttime urine production and identification of possible risk factors such as high-volume evening drinking, nighttime overactive bladder, behavioral or psychological problems or sleep disordered breathing. A questionnaire will help to identify those risk factors.

Conclusion

Motivation of the child is important for success. Continuous involvement of the child and the family in the treatment will improve treatment compliance, success and patient satisfaction.","hji,kes",0,0,0,2,0,NA,NA +31815677,Effectiveness of Smartphone App-Based Interactive Management on Glycemic Control in Chinese Patients With Poorly Controlled Diabetes: Randomized Controlled Trial.,"BACKGROUND:In recent years, the rapid development of mobile medical technology has provided multiple ways for the long-term management of chronic diseases, especially diabetes. As a new type of management model, smartphone apps are global, convenient, cheap, and interactive. Although apps were proved to be more effective at glycemic control, compared with traditional computer- and Web-based telemedicine technologies, how to gain a further and sustained improvement is still being explored. OBJECTIVE:The objective of this study was to investigate the effectiveness of an app-based interactive management model by a professional health care team on glycemic control in Chinese patients with poorly controlled diabetes. METHODS:This study was a 6-month long, single-center, prospective randomized controlled trial. A total of 276 type 1 or type 2 diabetes patients were enrolled and randomized to the control group (group A), app self-management group (group B), and app interactive management group (group C) in a 1:1:1 ratio. The primary outcome was the change in glycated hemoglobin (HbA1c) level. Missing data were handled by multiple imputation. RESULTS:At months 3 and 6, all 3 groups showed significant decreases in HbA1c levels (all P<.05). Patients in the app interactive management group had a significantly lower HbA1clevel than those in the app self-management group at 6 months (P=.04). The average HbA1c reduction in the app interactive management group was larger than that in the app self-management and control groups at both months 3 and 6 (all P<.05). However, no differences in HbA1c reduction were observed between the app self-management and control groups at both months 3 and 6 (both P>.05). Multivariate line regression analyses also showed that the app interactive management group was associated with the larger reduction of HbA1c compared with groups A and B at both months 3 and 6 (all P>.05). In addition, the app interactive management group had better control of triglyceride and high-density lipoprotein cholesterol levels at both months 3 and 6 compared with baseline (both P<.05). CONCLUSIONS:In Chinese patients with poorly controlled diabetes, it was difficult to achieve long-term effective glucose improvement by using app self-management alone, but combining it with interactive management can help achieve rapid and sustained glycemic control. TRIAL REGISTRATION:ClinicalTrials.gov NCT02589730; https://clinicaltrials.gov/ct2/show/NCT02589730.","hji,kes",0,0,0,2,0,NA,NA +31833063,Influence of mental stress and environmental toxins on circadian clocks: Implications for redox regulation of the heart and cardioprotection.,"Risk factors in the environment such as air pollution and mental stress contribute to the development of chronic non-communicable disease. Air pollution was identified as the leading health risk factor in the physical environment, followed by water pollution, soil pollution/heavy metals/chemicals and occupational exposures, however neglecting the non-chemical environmental health risk factors (e.g. mental stress and noise). Epidemiological data suggest that environmental risk factors are associated with higher risk for cardiovascular, metabolic and mental diseases, including hypertension, heart failure, myocardial infarction, diabetes, arrhythmia, stroke, depression and anxiety disorders. We provide an overview on the impact of the external exposome comprising risk factors/exposures on cardiovascular health with a focus on dysregulation of stress hormones, mitochondrial function, redox balance and inflammation with special emphasis on the circadian clock. Finally, we assess the impact of circadian clock dysregulation on cardiovascular health and the potential of environment-specific preventive strategies or """"chrono"""" therapy for cardioprotection. LINKED ARTICLES: This article is part of a themed issue on Risk factors, comorbidities, and comedications in cardioprotection. To view the other articles in this section visit http://onlinelibrary.wiley.com/doi/10.1111/bph.v177.23/issuetoc.","hji,kes",0,0,0,2,0,NA,NA +31848453,PRIMEval: Optimization and screening of multiplex oligonucleotide assays.,"The development of multiplex polymerase chain reaction and microarray assays is challenging due to primer dimer formation, unspecific hybridization events, the generation of unspecific by-products, primer depletion, and thus lower amplification efficiencies. We have developed a software workflow with three underlying algorithms that differ in their use case and specificity, allowing the complete in silico evaluation of such assays on user-derived data sets. We experimentally evaluated the method for the prediction of oligonucleotide hybridization events including resulting products and probes, self-dimers, cross-dimers and hairpins at different experimental conditions. The developed method allows explaining the observed artefacts through in silico WGS data and thermodynamic predictions. PRIMEval is available publicly at https://primeval.ait.ac.at.","hji,kes",0,0,0,2,0,NA,NA +31861980,BISR-RNAseq: an efficient and scalable RNAseq analysis workflow with interactive report generation.,"

Background

RNA sequencing has become an increasingly affordable way to profile gene expression patterns. Here we introduce a workflow implementing several open-source softwares that can be run on a high performance computing environment.

Results

Developed as a tool by the Bioinformatics Shared Resource Group (BISR) at the Ohio State University, we have applied the pipeline to a few publicly available RNAseq datasets downloaded from GEO in order to demonstrate the feasibility of this workflow. Source code is available here: workflow: https://code.bmi.osumc.edu/gadepalli.3/BISR-RNAseq-ICIBM2019 and shiny: https://code.bmi.osumc.edu/gadepalli.3/BISR_RNASeq_ICIBM19. Example dataset is demonstrated here: https://dataportal.bmi.osumc.edu/RNA_Seq/.

Conclusion

The workflow allows for the analysis (alignment, QC, gene-wise counts generation) of raw RNAseq data and seamless integration of quality analysis and differential expression results into a configurable R shiny web application.","hji,kes",0,0,0,2,0,NA,NA +31873725,Causal network perturbations for instance-specific analysis of single cell and disease samples.,"

Motivation

Complex diseases involve perturbation in multiple pathways and a major challenge in clinical genomics is characterizing pathway perturbations in individual samples. This can lead to patient-specific identification of the underlying mechanism of disease thereby improving diagnosis and personalizing treatment. Existing methods rely on external databases to quantify pathway activity scores. This ignores the data dependencies and that pathways are incomplete or condition-specific.

Results

ssNPA is a new approach for subtyping samples based on deregulation of their gene networks. ssNPA learns a causal graph directly from control data. Sample-specific network neighborhood deregulation is quantified via the error incurred in predicting the expression of each gene from its Markov blanket. We evaluate the performance of ssNPA on liver development single-cell RNA-seq data, where the correct cell timing is recovered; and two TCGA datasets, where ssNPA patient clusters have significant survival differences. In all analyses ssNPA consistently outperforms alternative methods, highlighting the advantage of network-based approaches.

Availability and implementation

http://www.benoslab.pitt.edu/Software/ssnpa/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +31874601,CSA: a web service for the complete process of ChIP-Seq analysis.,"

Background

Chromatin immunoprecipitation sequencing (ChIP-seq) is a technology that combines chromatin immunoprecipitation (ChIP) with next generation of sequencing technology (NGS) to analyze protein interactions with DNA. At present, most ChIP-seq analysis tools adopt the command line, which lacks user-friendly interfaces. Although some web services with graphical interfaces have been developed for ChIP-seq analysis, these sites cannot provide a comprehensive analysis of ChIP-seq from raw data to downstream analysis.

Results

In this study, we develop a web service for the whole process of ChIP-Seq Analysis (CSA), which covers mapping, quality control, peak calling, and downstream analysis. In addition, CSA provides a customization function for users to define their own workflows. And the visualization of mapping, peak calling, motif finding, and pathway analysis results are also provided in CSA. For the different types of ChIP-seq datasets, CSA can provide the corresponding tool to perform the analysis. Moreover, CSA can detect differences in ChIP signals between ChIP samples and controls to identify absolute binding sites.

Conclusions

The two case studies demonstrate the effectiveness of CSA, which can complete the whole procedure of ChIP-seq analysis. CSA provides a web interface for users, and implements the visualization of every analysis step. The website of CSA is available at http://CompuBio.csu.edu.cn.","hji,kes",0,0,0,2,0,NA,NA +31886876,Prognostic model for multiple myeloma progression integrating gene expression and clinical features.,"BACKGROUND:Multiple myeloma (MM) is a hematological cancer caused by abnormal accumulation of monoclonal plasma cells in bone marrow. With the increase in treatment options, risk-adapted therapy is becoming more and more important. Survival analysis is commonly applied to study progression or other events of interest and stratify the risk of patients. RESULTS:In this study, we present the current state-of-the-art model for MM prognosis and the molecular biomarker set for stratification: the winning algorithm in the 2017 Multiple Myeloma DREAM Challenge, Sub-Challenge 3. Specifically, we built a non-parametric complete hazard ranking model to map the right-censored data into a linear space, where commonplace machine learning techniques, such as Gaussian process regression and random forests, can play their roles. Our model integrated both the gene expression profile and clinical features to predict the progression of MM. Compared with conventional models, such as Cox model and random survival forests, our model achieved higher accuracy in 3 within-cohort predictions. In addition, it showed robust predictive power in cross-cohort validations. Key molecular signatures related to MM progression were identified from our model, which may function as the core determinants of MM progression and provide important guidance for future research and clinical practice. Functional enrichment analysis and mammalian gene-gene interaction network revealed crucial biological processes and pathways involved in MM progression. The model is dockerized and publicly available at https://www.synapse.org/#!Synapse:syn11459638. Both data and reproducible code are included in the docker. CONCLUSIONS:We present the current state-of-the-art prognostic model for MM integrating gene expression and clinical features validated in an independent test set.","hji,kes",0,0,0,2,0,NA,iffy ...data and code available… but doesn't say benchmarking or any special about the data available +31913588,Measuring and optimising the efficiency of community hospital inpatient care for older people: the MoCHA mixed-methods study,"

Background

Community hospitals are small hospitals providing local inpatient and outpatient services. National surveys report that inpatient rehabilitation for older people is a core function but there are large differences in key performance measures. We have investigated these variations in community hospital ward performance.

Objectives

(1) To measure the relative performance of community hospital wards (studies 1 and 2); (2) to identify characteristics of community hospital wards that optimise performance (studies 1 and 3); (3) to develop a web-based interactive toolkit that supports operational changes to optimise ward performance (study 4); (4) to investigate the impact of community hospital wards on secondary care use (study 5); and (5) to investigate associations between short-term community (intermediate care) services and secondary care utilisation (study 5).

Methods

Study 1 we used national data to conduct econometric estimations using stochastic frontier analysis in which a cost function was modelled using significant predictors of community hospital ward costs. Study 2 a national postal survey was developed to collect data from a larger sample of community hospitals. Study 3 three ethnographic case studies were performed to provide insight into less tangible aspects of community hospital ward care. Study 4 a web-based interactive toolkit was developed by integrating the econometrics (study 1) and case study (study 3) findings. Study 5 regression analyses were conducted using data from the Atlas of Variation Map 61 (rate of emergency admissions to hospital for people aged = 75 years with a length of stay of < 24 hours) and the National Audit of Intermediate Care.

Results

Community hospital ward efficiency is comparable with the NHS acute hospital sector (mean cost efficiency 0.83, range 0.720.92). The rank order of community hospital ward efficiencies was distinguished to facilitate learning across the sector. On average, if all community hospital wards were operating in line with the highest cost efficiency, savings of 17% (or 47M per year) could be achieved (price year 2013/14) for our sample of 101 wards. Significant economies of scale were found: a 1% rise in output was associated with an average 0.85% increase in costs. We were unable to obtain a larger community hospital sample because of the low response rate to our national survey. The case studies identified how rehabilitation was delivered through collaborative, interdisciplinary working; interprofessional communication; and meaningful patient and family engagement. We also developed insight into patients recovery trajectories and care transitions. The web-based interactive toolkit was established [http://mocha.nhsbenchmarking.nhs.uk/ (accessed 9 September 2019)]. The crisis response team type of intermediate care, but not community hospitals, had a statistically significant negative association with emergency admissions.

Limitations

The econometric analyses were based on cross-sectional data and were also limited by missing data. The low response rate to our national survey means that we cannot extrapolate reliably from our community hospital sample.

Conclusions

The results suggest that significant community hospital ward savings may be realised by improving modifiable performance factors that might be augmented further by economies of scale.

Future work

How less efficient hospitals might reduce costs and sustain quality requires further research.

Funding

This project was funded by the National Institute for Health Research (NIHR) Health Services and Delivery Research programme and will be published in full in Health Services and Delivery Research; Vol. 8, No. 1. See the NIHR Journals Library website for further project information.","hji,kes",0,0,0,2,0,NA,NA +31921518,A transcriptomic study of probenecid on injured spinal cords in mice.,"Background:Recent studies have found that probenecid has neuroprotective and reparative effects on central nervous system injuries. However, its effect on genome-wide transcription in acute spinal cord injury (SCI) remains unknown. In the present study, RNA sequencing (RNA-Seq) is used to analyze the effect of probenecid on the local expression of gene transcription 8 h after spinal injury. Methods:An Infinite Horizon impactor was used to perform contusive SCI in mice. The SCI model was made by using a rod (1.3 mm diameter) with a force of 50 Kdynes. Sham-operated mice only received a laminectomy without contusive injury. The injured mice were randomly assigned into either the control (SCI_C) or probenecid injection (SCI_P) group. In the latter group, the probenecid drug was intraperitoneally injected (0.5 mg/kg) immediately following injury. Eight hours after the injury or laminectomy, the spinal cords were removed from the mice in both groups. The total RNAs were extracted and purified for library preparation and transcriptome sequencing. Differential gene expressions (DEGs) of the three groups-sham, SCI_C and SCI_P-were analyzed using a DESeq software. Gene Ontology (GO) and Kyoto Encyclopedia of Genes and Genomes (KEGG) enrichment analysis of DEGs were performed using a GOseq R package and KOBAS software. Real-time quantitative reverse-transcriptase polymerase chain reaction was used to validate RNA-Seq results. Results:RNA-Seq showed that, compared to the SCI_C group, the number of DEGs was 641 in the SCI_P group (286 upregulated and 355 downregulated). According to GO analysis, DEGs were most enriched in extracellular matrix (ECM), collagen trimer, protein bounding and sequence specific DNA binding. KEGG analysis showed that the most enriched pathways included: cell adhesion molecules, Leukocyte transendothelial migration, ECM-receptor interactions, PI3K-Akt signaling pathways, hematopoietic cell lineages, focal adhesions, the Rap1 signaling pathway, etc. The sequence data have been deposited into the Sequence Read Archive (https://www.ncbi.nlm.nih.gov/sra/PRJNA554464).","hji,kes",0,0,0,2,0,NA,data deposited as referenced +32022843,FTIP: an accurate and efficient method for global protein surface comparison.,"

Motivation

Global protein surface comparison (GPSC) studies have been limited compared to other research works on protein structure alignment/comparison due to lack of real applications associated with GPSC. However, the technology advances in cryo-electron tomography (CET) have made methods to identify proteins from their surface shapes extremely useful.

Results

In this study, we developed a new method called Farthest point sampling (FPS)-enhanced Triangulation-based Iterative-closest-Point (ICP) (FTIP) for GPSC. We applied it to protein classification using only surface shape information. Our method first extracts a set of feature points from protein surfaces using FPS and then uses a triangulation-based efficient ICP algorithm to align the feature points of the two proteins to be compared. Tested on a benchmark dataset with 2329 proteins using nearest-neighbor classification, FTIP outperformed the state-of-the-art method for GPSC based on 3D Zernike descriptors. Using real and simulated cryo-EM data, we show that FTIP could be applied in the future to address problems in protein identification in CET experiments.

Availability and implementation

Programs/scripts we developed/used in the study are available at http://ani.stat.fsu.edu/~yuan/index.fld/FTIP.tar.bz2.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +32025087,Reassessing Southern Ocean Air-Sea CO2 Flux Estimates With the Addition of Biogeochemical Float Observations.,"New estimates of pCO2 from profiling floats deployed by the Southern Ocean Carbon and Climate Observations and Modeling (SOCCOM) project have demonstrated the importance of wintertime outgassing south of the Polar Front, challenging the accepted magnitude of Southern Ocean carbon uptake (Gray et al., 2018, https://doi:10.1029/2018GL078013). Here, we put 3.5 years of SOCCOM observations into broader context with the global surface carbon dioxide database (Surface Ocean CO2 Atlas, SOCAT) by using the two interpolation methods currently used to assess the ocean models in the Global Carbon Budget (Le Qur et al., 2018, https://doi:10.5194/essd-10-2141-2018) to create a ship-only, a float-weighted, and a combined estimate of Southern Ocean carbon fluxes (<35S). In our ship-only estimate, we calculate a mean uptake of -1.14 0.19 Pg C/yr for 2015-2017, consistent with prior studies. The float-weighted estimate yields a significantly lower Southern Ocean uptake of -0.35 0.19 Pg C/yr. Subsampling of high-resolution ocean biogeochemical process models indicates that some of the differences between float and ship-only estimates of the Southern Ocean carbon flux can be explained by spatial and temporal sampling differences. The combined ship and float estimate minimizes the root-mean-square pCO2 difference between the mapped product and both data sets, giving a new Southern Ocean uptake of -0.75 0.22 Pg C/yr, though with uncertainties that overlap the ship-only estimate. An atmospheric inversion reveals that a shift of this magnitude in the contemporary Southern Ocean carbon flux must be compensated for by ocean or land sinks within the Southern Hemisphere.","hji,kes",0,0,0,2,0,NA,NA +32043185,Metabolic alterations in immune cells associate with progression to type 1 diabetes.,"

Aims/hypothesis

Previous metabolomics studies suggest that type 1 diabetes is preceded by specific metabolic disturbances. The aim of this study was to investigate whether distinct metabolic patterns occur in peripheral blood mononuclear cells (PBMCs) of children who later develop pancreatic beta cell autoimmunity or overt type 1 diabetes.

Methods

In a longitudinal cohort setting, PBMC metabolomic analysis was applied in children who (1) progressed to type 1 diabetes (PT1D, n = 34), (2) seroconverted to =1 islet autoantibody without progressing to type 1 diabetes (P1Ab, n = 27) or (3) remained autoantibody negative during follow-up (CTRL, n = 10).

Results

During the first year of life, levels of most lipids and polar metabolites were lower in the PT1D and P1Ab groups compared with the CTRL group. Pathway over-representation analysis suggested alanine, aspartate, glutamate, glycerophospholipid and sphingolipid metabolism were over-represented in PT1D. Genome-scale metabolic models of PBMCs during type 1 diabetes progression were developed by using publicly available transcriptomics data and constrained with metabolomics data from our study. Metabolic modelling confirmed altered ceramide pathways, known to play an important role in immune regulation, as specifically associated with type 1 diabetes progression.

Conclusions/interpretation

Our data suggest that systemic dysregulation of lipid metabolism, as observed in plasma, may impact the metabolism and function of immune cells during progression to overt type 1 diabetes.

Data availability

The GEMs for PBMCs have been submitted to BioModels (www.ebi.ac.uk/biomodels/), under accession number MODEL1905270001. The metabolomics datasets and the clinical metadata generated in this study were submitted to MetaboLights (https://www.ebi.ac.uk/metabolights/), under accession number MTBLS1015.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +32078592,"Characteristics and Health Status of Informal Unpaid Caregivers - 44 States, District of Columbia, and Puerto Rico, 2015-2017.","In 2015, an estimated 17.7 million U.S. persons were informal caregivers who provided substantial services through in-home, unpaid assistance to their family members and friends (1). Caregiving can have many benefits, such as enhancing the bond between caregiver and recipient, but it can also place an emotional and physical strain on caregivers, leading to higher rates of depression, lower quality of life, and poorer overall health (2). As the U.S. population continues to age (3), the need for informal caregivers will likely increase. However, little nationally representative information on prevalence of caregivers is available. This study examined demographic characteristics and health status of informal caregivers from 44 states,* the District of Columbia (DC), and Puerto Rico, based on data from the Behavioral Risk Factor Surveillance System (BRFSS) collected during 2015-2017. Overall, approximately one in five adults reported that they had provided care to a family member or friend in the preceding 30 days. Fifty-eight percent of caregivers were women, and a majority were non-Hispanic white, with at least some college education, and married or living with a partner. Across all states, 19.2% of caregivers reported being in fair or poor health, although significant state-to-state variation occurred. Caregivers provide important support to family members, friends, and the health care system and might compromise their own health to provide this support (1,2). Better understanding of caregivers and the challenges they face could inform implementation of improvements in support systems that could enhance not only the health of the caregiver, but that of the care recipient as well. For example, additional data regarding demographics at the state level might aid in more effective planning and support of caregivers with evidence-based programs and assistance (https://www.cdc.gov/aging/publications/features/caring-for-yourself.html).","hji,kes",0,0,0,2,0,NA,public health +32110491,DISNET: a framework for extracting phenotypic disease information from public sources.,"Background:Within the global endeavour of improving population health, one major challenge is the identification and integration of medical knowledge spread through several information sources. The creation of a comprehensive dataset of diseases and their clinical manifestations based on information from public sources is an interesting approach that allows one not only to complement and merge medical knowledge but also to increase it and thereby to interconnect existing data and analyse and relate diseases to each other. In this paper, we present DISNET (http://disnet.ctb.upm.es/), a web-based system designed to periodically extract the knowledge from signs and symptoms retrieved from medical databases, and to enable the creation of customisable disease networks. Methods:We here present the main features of the DISNET system. We describe how information on diseases and their phenotypic manifestations is extracted from Wikipedia and PubMed websites; specifically, texts from these sources are processed through a combination of text mining and natural language processing techniques. Results:We further present the validation of our system on Wikipedia and PubMed texts, obtaining the relevant accuracy. The final output includes the creation of a comprehensive symptoms-disease dataset, shared (free access) through the system's API. We finally describe, with some simple use cases, how a user can interact with it and extract information that could be used for subsequent analyses. Discussion:DISNET allows retrieving knowledge about the signs, symptoms and diagnostic tests associated with a disease. It is not limited to a specific category (all the categories that the selected sources of information offer us) and clinical diagnosis terms. It further allows to track the evolution of those terms through time, being thus an opportunity to analyse and observe the progress of human knowledge on diseases. We further discussed the validation of the system, suggesting that it is good enough to be used to extract diseases and diagnostically-relevant terms. At the same time, the evaluation also revealed that improvements could be introduced to enhance the system's reliability.","hji,kes",0,0,0,2,0,NA,"good, but too clinical I think" +32123714,Estimation of upper and lower bounds of Gini coefficient by fuzzy data.,"The data presented in this paper are used to examine the uncertainty in macroeconomic variables and their impact on the Gini coefficient. Annual data for the period 2017 - 1996 are taken from the Bank of Iran website https://www.cbi.ir. We used fuzzy regression with symmetric coefficients to calculate upper and lower bound data of Gini coefficient. Estimated data at this stage can be a very useful guide for policymakers, on the other hand, it is a benchmark for evaluating the effectiveness of government policies. The reason for using fuzzy regression to estimate data on Gini coefficients is the extra flexibility of this model.","hji,kes",0,0,0,2,0,NA,NA +32130122,Feasibility of an Electronic Health Tool to Promote Physical Activity in Primary Care: Pilot Cluster Randomized Controlled Trial.,"BACKGROUND:Physical inactivity is associated with increased health risks. Primary care providers (PCPs) are well positioned to support increased physical activity (PA) levels through screening and provision of PA prescriptions. However, PCP counseling on PA is not common. OBJECTIVE:This study aimed to assess the feasibility of implementing an electronic health (eHealth) tool to support PA counseling by PCPs and estimate intervention effectiveness on patients' PA levels. METHODS:A pragmatic pilot study was conducted using a stepped wedge cluster randomized trial design. The study was conducted at a single primary care clinic, with 4 pre-existing PCP teams. Adult patients who had a periodic health review (PHR) scheduled during the study period were invited to participate. The eHealth tool involved an electronic survey sent to participants before their PHR via an email or a tablet; data were used to automatically produce tailored resources and a PA prescription in the electronic medical record of participants in the intervention arm. Participants assigned to the control arm received usual care from their PCP. Feasibility was assessed by the proportion of completed surveys and patient-reported acceptability and fidelity measures. The primary effectiveness outcome was patient-reported PA at 4 months post-PHR, measured as metabolic equivalent of task (MET) minutes per week. Secondary outcomes assessed determinants of PA, including self-efficacy and intention to change based on the Health Action Process Approach behavior change theory. RESULTS:A total of 1028 patients receiving care from 34 PCPs were invited to participate and 530 (51.55%) consented (intervention [n=296] and control [n=234]). Of the participants who completed a process evaluation, almost half (88/178, 49.4%) stated they received a PA prescription, with only 42 receiving the full intervention including tailored resources from their PCP. A cluster-level linear regression analysis yielded a non-statistically significant positive difference in MET-minutes reported per week at follow-up between intervention and control conditions (mean difference 1027; 95% CI -155 to 2209; P=.09). No statistically significant differences were observed for secondary outcomes. CONCLUSIONS:Our results suggest that it is feasible to build an eHealth tool that screens and provides tailored resources for PA in a primary care setting but suboptimal intervention fidelity suggests greater work must be done to address PCP barriers to resource distribution. Participant responses to the primary effectiveness outcome (MET-minutes) were highly variable, reflecting a need for more robust measures of PA in future trials to address limitations in patient-reported data. TRIAL REGISTRATION:ClinicalTrials.gov NCT03181295; https://clinicaltrials.gov/ct2/show/NCT03181295.","hji,kes",0,0,0,2,0,NA,NA +32130186,A 21-Day School-Based Toothbrushing Intervention in Children Aged 6 to 9 Years in Indonesia and Nigeria: Protocol for a Two-Arm Superiority Randomized Controlled Trial.,"BACKGROUND:The World Health Organization reports that dental cavities affect 60% to 90% of children globally. FDI World Dental Federation and Unilever Oral Care have developed public health programs to improve brushing habits over their 12-year partnership. The last of these (phase III) named Brush Day & Night aimed to educate children on brushing twice daily with a fluoride toothpaste and gave useful information for a new project, phase IV. The 21-day Brush Day & Night program is an intense education activity designed to establish the habit of brushing day and night with a fluoride toothpaste. The program involves daily brushing instruction and includes free toothpaste and toothbrushes. OBJECTIVE:The main objective of the study is to evaluate the impact of a 21-day school program on children's oral health. As a secondary objective, we aim to evaluate the impact on the knowledge, behavior, toothbrushing habits, and quality of life in school children aged 6 to 9 years after a 21-day school program and compare with baseline and a control group as measured by the self-reported questionnaires issued to children (in particular, the self-reported brushing frequency and positive responses on fluoridated toothpaste use). The enduring nature of the program will be determined by the inclusion of 8- and 24-week time points. METHODS:The study is a 2-arm superiority randomized controlled trial. Clusters in this study are infant and junior schools in Indonesia and Nigeria. The study aims to recruit 20 schools with children aged 6 to 9 years in each country. At baseline, children in both intervention and control schools will answer a questionnaire and have their clinical oral health assessed using the Simplified Oral Hygiene Index (OHI) and Decayed Missing and Filled Teeth index. Children in the intervention schools will then take part in a structured 21-day Brush Day & Night intervention. Children in the control schools will be provided with free toothpaste and toothbrushes but will not receive the 21-day intervention. The questionnaires and OHI assessments are repeated after the 21-day program is completed and again 8 weeks later and 24 weeks later for all participating children. Parents/carers/guardians of all children will sign the informed consent and complete questionnaires on their own experience and attitudes toward oral health and toothbrushing routine at each of the four times points (baseline, 21 days, 8 weeks, and 24 weeks). The study will be conducted by the national dental associations of Indonesia and Nigeria and was approved by the ethics committees of both countries. RESULTS:The study is ongoing. Recruitment of schools started in Indonesia in February 2018 and in Nigeria in April 2018 for the first part of the study, which concluded in Indonesia in September 2018 and in Nigeria in November 2018. The second part of the study (the second half of the schools) started in November 2018 in Indonesia and December 2018 in Nigeria. CONCLUSIONS:We expect to collect all the data during 2019 and publish findings from the study by March 2020. TRIAL REGISTRATION:ClinicalTrials.gov NCT04001296; https://tinyurl.com/selxraa. INTERNATIONAL REGISTERED REPORT IDENTIFIER (IRRID):DERR1-10.2196/14156.","hji,kes",0,0,0,2,0,NA,NA +32139710,"Classification models for Invasive Ductal Carcinoma Progression, based on gene expression data-trained supervised machine learning.","Early detection of breast cancer and its correct stage determination are important for prognosis and rendering appropriate personalized clinical treatment to breast cancer patients. However, despite considerable efforts and progress, there is a need to identify the specific genomic factors responsible for, or accompanying Invasive Ductal Carcinoma (IDC) progression stages, which can aid the determination of the correct cancer stages. We have developed two-class machine-learning classification models to differentiate the early and late stages of IDC. The prediction models are trained with RNA-seq gene expression profiles representing different IDC stages of 610 patients, obtained from The Cancer Genome Atlas (TCGA). Different supervised learning algorithms were trained and evaluated with an enriched model learning, facilitated by different feature selection methods. We also developed a machine-learning classifier trained on the same datasets with training sets reduced data corresponding to IDC driver genes. Based on these two classifiers, we have developed a web-server Duct-BRCA-CSP to predict early stage from late stages of IDC based on input RNA-seq gene expression profiles. The analysis conducted by us also enables deeper insights into the stage-dependent molecular events accompanying IDC progression. The server is publicly available at http://bioinfo.icgeb.res.in/duct-BRCA-CSP.","hji,kes",0,0,0,2,0,NA,NA +32150354,Visualizing Human Protein-Protein Interactions and Subcellular Localizations on Cell Images Through CellMap.,"Visualizing protein data remains a challenging and stimulating task. Useful and intuitive visualization tools may help advance biomolecular and medical research; unintuitive tools may bar important breakthroughs. This protocol describes two use cases for the CellMap (http://cellmap.protein.properties) web tool. The tool allows researchers to visualize human protein-protein interaction data constrained by protein subcellular localizations. In the simplest form, proteins are visualized on cell images that also show protein-protein interactions (PPIs) through lines (edges) connecting the proteins across the compartments. At a glance, this simultaneously highlights spatial constraints that proteins are subject to in their physical environment and visualizes PPIs against these localizations. Visualizing two realities helps in decluttering the protein interaction visualization from """"hairball"""" phenomena that arise when single proteins or groups thereof interact with hundreds of partners. 2019 The Authors. Basic Protocol 1: Visualizing proteins and their interactions on cell images Basic Protocol 2: Displaying all interaction partners for a protein.","hji,kes",0,0,0,2,0,NA,NA +32166213,Machine learning with force-field inspired descriptors for materials: fast screening and mapping energy landscape.,"We present a complete set of chemo-structural descriptors to significantly extend the applicability of machine-learning (ML) in material screening and mapping energy landscape for multicomponent systems. These new descriptors allow differentiating between structural prototypes, which is not possible using the commonly used chemical-only descriptors. Specifically, we demonstrate that the combination of pairwise radial, nearest neighbor, bond-angle, dihedral-angle and core-charge distributions plays an important role in predicting formation energies, bandgaps, static refractive indices, magnetic properties, and modulus of elasticity for three-dimensional (3D) materials as well as exfoliation energies of two-dimensional (2D) layered materials. The training data consists of 24549 bulk and 616 monolayer materials taken from JARVIS-DFT database. We obtained very accurate ML models using gradient boosting algorithm. Then we use the trained models to discover exfoliable 2D-layered materials satisfying specific property requirements. Additionally, we integrate our formation energy ML model with a genetic algorithm for structure search to verify if the ML model reproduces the DFT convex hull. This verification establishes a more stringent evaluation metric for the ML model than what commonly used in data sciences. Our learnt model is publicly available on the JARVIS-ML website (https://www.ctcms.nist.gov/jarvisml) property predictions of generalized materials.","hji,kes",0,0,0,2,0,NA,NA +32175316,RF-PseU: A Random Forest Predictor for RNA Pseudouridine Sites.,"One of the ubiquitous chemical modifications in RNA, pseudouridine modification is crucial for various cellular biological and physiological processes. To gain more insight into the functional mechanisms involved, it is of fundamental importance to precisely identify pseudouridine sites in RNA. Several useful machine learning approaches have become available recently, with the increasing progress of next-generation sequencing technology; however, existing methods cannot predict sites with high accuracy. Thus, a more accurate predictor is required. In this study, a random forest-based predictor named RF-PseU is proposed for prediction of pseudouridylation sites. To optimize feature representation and obtain a better model, the light gradient boosting machine algorithm and incremental feature selection strategy were used to select the optimum feature space vector for training the random forest model RF-PseU. Compared with previous state-of-the-art predictors, the results on the same benchmark data sets of three species demonstrate that RF-PseU performs better overall. The integrated average leave-one-out cross-validation and independent testing accuracy scores were 71.4% and 74.7%, respectively, representing increments of 3.63% and 4.77% versus the best existing predictor. Moreover, the final RF-PseU model for prediction was built on leave-one-out cross-validation and provides a reliable and robust tool for identifying pseudouridine sites. A web server with a user-friendly interface is accessible at http://148.70.81.170:10228/rfpseu.","hji,kes",0,0,0,2,0,NA,NA +32186404,"Uterine Patterning, Endometrial Gland Development, and Implantation Failure in Mice Exposed Neonatally to Genistein.","BACKGROUND:Embryo implantation relies on precise hormonal regulation, associated gene expression changes, and appropriate female reproductive tract tissue architecture. Female mice exposed neonatally to the phytoestrogen genistein (GEN) at doses similar to those in infants consuming soy-based infant formulas are infertile due in part to uterine implantation defects. OBJECTIVES:Our goal was to determine the mechanisms by which neonatal GEN exposure causes implantation defects. METHODS:Female mice were exposed to GEN on postnatal days (PND)1-5 and uterine tissues collected on PND5, PND22-26, and during pregnancy. Analysis of tissue weights, morphology, and gene expression was performed using standard histology, confocal imaging with three-dimensional analysis, real-time reverse transcription polymerase chain reaction (real-time RT-PCR), and microarrays. The response of ovariectomized adults to 17-estradiol (E2) and artificial decidualization were measured. Leukemia inhibitory factor (LIF) injections were given intraperitoneally and implantation sites visualized. Gene expression patterns were compared with curated data sets to identify upstream regulators. RESULTS:GEN-exposed mice exhibited reduced uterine weight gain in response to E2 treatment or artificial decidualization compared with controls; however, expression of select hormone responsive genes remained similar between the two groups. Uteri from pregnant GEN-exposed mice were posteriorized and had reduced glandular epithelium. Implantation failure was not rescued by LIF administration. Microarray analysis of GEN-exposed uteri during early pregnancy revealed significant overlap with several conditional uterine knockout mouse models, including Foxa2, Wnt4, and Sox17. These models exhibit reduced endometrial glands, features of posteriorization and implantation failure. Expression of Foxa2, Wnt4, and Sox17, as well as genes important for neonatal uterine differentiation (Wnt7a, Hoxa10, and Msx2), were severely disrupted on PND5 in GEN-exposed mice. DISCUSSION:Our findings suggest that neonatal GEN exposure in mice disrupts expression of genes important for uterine development, causing posteriorization and diminished gland function during pregnancy that contribute to implantation failure. These findings could have implications for women who consumed soy-based formulas as infants. https://doi.org/10.1289/EHP6336.","hji,kes",0,0,0,2,0,NA,NA +32214380,Stochastic simulation and statistical inference platform for visualization and estimation of transcriptional kinetics.,"Recent advances in single-molecule fluorescent imaging have enabled quantitative measurements of transcription at a single gene copy, yet an accurate understanding of transcriptional kinetics is still lacking due to the difficulty of solving detailed biophysical models. Here we introduce a stochastic simulation and statistical inference platform for modeling detailed transcriptional kinetics in prokaryotic systems, which has not been solved analytically. The model includes stochastic two-state gene activation, mRNA synthesis initiation and stepwise elongation, release to the cytoplasm, and stepwise co-transcriptional degradation. Using the Gillespie algorithm, the platform simulates nascent and mature mRNA kinetics of a single gene copy and predicts fluorescent signals measurable by time-lapse single-cell mRNA imaging, for different experimental conditions. To approach the inverse problem of estimating the kinetic parameters of the model from experimental data, we develop a heuristic optimization method based on the genetic algorithm and the empirical distribution of mRNA generated by simulation. As a demonstration, we show that the optimization algorithm can successfully recover the transcriptional kinetics of simulated and experimental gene expression data. The platform is available as a MATLAB software package at https://data.caltech.edu/records/1287.","hji,kes",0,0,0,2,0,NA,NA +32266474,Conjunctive reward-place coding properties of dorsal distal CA1 hippocampus cells.,"Autonomous motivated spatial navigation in animals or robots requires the association between spatial location and value. Hippocampal place cells are involved in goal-directed spatial navigation and the consolidation of spatial memories. Recently, Gauthier and Tank (Neuron 99(1):179-193, 2018. https://doi.org/10.1016/j.neuron.2018.06.008) have identified a subpopulation of hippocampal cells selectively activated in relation to rewarded goals. However, the relationship between these cells' spiking activity and goal representation remains elusive. We analyzed data from experiments in which rats underwent five consecutive tasks in which reward locations and spatial context were manipulated. We found CA1 populations with properties continuously ranging from place cells to reward cells. Specifically, we found typical place cells insensitive to reward locations, reward cells that only fired at correct rewarded feeders in each task regardless of context, and """"hybrid cells"""" that responded to spatial locations and change of reward locations. Reward cells responded mostly to the reward delivery rather than to its expectation. In addition, we found a small group of neurons that transitioned between place and reward cells properties within the 5-task session. We conclude that some pyramidal cells (if not all) integrate both spatial and reward inputs to various degrees. These results provide insights into the integrative coding properties of CA1 pyramidal cells, focusing on their abilities to carry both spatial and reward information in a mixed and plastic manner. This conjunctive coding property prompts a re-thinking of current computational models of spatial navigation in which hippocampal spatial and subcortical value representations are independent.","hji,kes",0,0,0,2,0,NA,NA +32271149,Development and Modification of a Mobile Health Program to Promote Postpartum Weight Loss in Women at Elevated Risk for Cardiometabolic Disease: Single-Arm Pilot Study.,"BACKGROUND:Pregnancy complications in combination with postpartum weight retention lead to significant risks of cardiometabolic disease and obesity. The majority of traditional face-to-face interventions have not been effective in postpartum women. Mobile technology enables the active engagement of postpartum women to promote lifestyle changes to prevent chronic diseases. OBJECTIVE:We sought to employ an interactive, user-centered, and participatory method of development, evaluation, and iteration to design and optimize the mobile health (mHealth) Fit After Baby program. METHODS:For the initial development, a multidisciplinary team integrated evidence-based approaches for health behavior, diet and physical activity, and user-centered design and engagement. We implemented an iterative feedback and design process via 3 month-long beta pilots in which postpartum women with cardiometabolic risk factors participated in the program and provided weekly and ongoing feedback. We also conducted two group interviews using a structured interview guide to gather additional feedback. Qualitative data were recorded, transcribed, and analyzed using established qualitative methods. Modifications based on feedback were integrated into successive versions of the app. RESULTS:We conducted three pilot testing rounds with a total of 26 women. Feedback from each pilot cohort informed changes to the functionality and content of the app, and then a subsequent pilot group participated in the program. We optimized the program in response to feedback through three iterations leading to a final version. CONCLUSIONS:This study demonstrates the feasibility of using an interactive, user-centered, participatory method of rapid, iterative design and evaluation to develop and optimize a mHealth intervention program for postpartum women. TRIAL REGISTRATION:ClinicalTrials.gov NCT02384226; https://www.clinicaltrials.gov/ct2/show/NCT02384226.","hji,kes",0,0,0,2,0,NA,NA +32281541,Effects of the ACT OUT! Social Issue Theater Program on Social-Emotional Competence and Bullying in Youth and Adolescents: Protocol for a Cluster Randomized Controlled Trial.,"BACKGROUND:Students in the United States spend a meaningful portion of their developmental lives in school. In recent years, researchers and educators have begun to focus explicitly on social and emotional learning (SEL) in the school setting. Initial evidence from meta-analyses suggests that curricula designed to promote SEL likely produce benefits in terms of social-emotional competence (SEC) and numerous related behavioral and affective outcomes. At the same time, there are often barriers to implementing such curricula as intended, and some researchers have questioned the strength of the evaluation data from SEL programs. As part of the effort to improve programming in SEL, this paper describes the protocol for a cluster randomized trial of the ACT OUT! Social Issue Theater program, a brief psychodramatic intervention to build SEC and reduce bullying behavior in students. OBJECTIVE:The objective of this trial is to examine if a short dose of interactive psychodrama can affect SEC metrics and bullying experiences in schoolchildren in either the short (2-week) or medium (6-month) term. METHODS:The ACT OUT! trial is a cluster randomized superiority trial with 2 parallel groups. The unit of measurement is the student, and the unit of randomization is the classroom. For each grade (fourth, seventh, and 10th), an even number of classrooms will be selected from each school-half will be assigned to the intervention arm and half will be assigned to the control arm. The intervention will consist of 3 moderated psychodramatic performances by trained actors, and the control condition will be the usual school day. Outcome data will be collected at baseline (preintervention), 2-week postintervention (short term), and 6-month postintervention (medium term). Outcomes will include social-emotional competency; self-reported bullying and experiences of being bullied; receptivity to the program; and school-level data on truancy, absenteeism, and referrals to school displinary action for bullying. A power analysis adjusted for clustering effect, design effect, and potential attrition yielded a need for approximately 1594 students, consisting of an estimated 80 classrooms split evenly into intervention and control arms. RESULTS:This study was funded in June 2019; approved by the Indiana University Institutional review board on September 17, 2019; began subject recruitment on November 5, 2019; and prospectively registered with ClinicalTrials.gov. CONCLUSIONS:Many states have issued recommendations for the integration of SEL into schools. The proposed study uses a rigorous methodology to determine if the ACT OUT! psychodramatic intervention is a cost-effective means of bolstering SEC and reducing bullying incidence in schools. TRIAL REGISTRATION:ClinicalTrials.gov NCT04097496; https://clinicaltrials.gov/ct2/show/NCT04097496. INTERNATIONAL REGISTERED REPORT IDENTIFIER (IRRID):PRR1-10.2196/17900.","hji,kes",0,0,0,2,0,NA,NA +32293391,Evaluation of a computer-assisted multi-professional intervention to address lifestyle-related risk factors for overweight and obesity in expecting mothers and their infants: protocol for an effectiveness-implementation hybrid study.,"BACKGROUND:The first 1000 days after conception are a critical period to encourage lifestyle changes to reduce the risk of childhood obesity and early programming of chronic diseases. A healthy lifestyle during pregnancy is also crucial to avoid high post-partum weight retention. Currently, lifestyle changes are not consistently discussed during routine health services in Germany. The objective of this study is to evaluate a novel computer-assisted lifestyle intervention embedded in prenatal visits and infant check-ups. The intervention seeks to reduce lifestyle-related risk factors for overweight and obesity among expecting mothers and their infants. METHODS:The study is designed as a hybrid effectiveness-implementation trial to simultaneously collect data on the effectiveness and implementation of the lifestyle intervention. The trial will take place in eight regions of the German state Baden-Wuerttemberg. Region were matched using propensity score matching. Expecting mothers (n = 1860) will be recruited before 12 weeks of gestation through gynecological practices and followed for 18 months. During 11 routine prenatal visits and infant check-ups gynecologists, midwives and pediatricians provide lifestyle counseling using Motivational Interviewing techniques. The primary outcome measure is the proportion of expecting mothers with gestational weight gain within the recommended range. To understand the process of implementation (focus group) interviews will be conducted with providers and participants of the lifestyle intervention. Additionally, an analysis of administrative data and documents will be carried out. An economic analysis will provide insights into cost and consequences compared to routine health services. DISCUSSION:Findings of this study will add to the evidence on lifestyle interventions to reduce risk for overweight and obesity commenced during pregnancy. Insights gained will contribute to the prevention of early programming of chronic disease. Study results regarding implementation fidelity, adoption, reach and cost-effectiveness of the lifestyle intervention will inform decisions about scale up and public funding. TRIAL REGISTRATION:German Clinical Trials Register (DRKS00013173). Registered 3rd of January 2019, https://www.drks.de.","hji,kes",0,0,0,2,0,NA,NA +32297095,Using Complier Average Causal Effect Estimation to Examine Student Outcomes of the PAX Good Behavior Game When Integrated with the PATHS Curriculum.,"A growing body of research has documented a link between variation in implementation dosage and outcomes associated with preventive interventions. Complier Average Causal Effect (CACE; Jo in J Educ Behav Stat 27:385-409, 2002) analysis allows for estimating program impacts in light of variation in implementation. This study reports intent-to-treat (ITT) and CACE findings from a randomized controlled trial (RCT) testing the impacts of the universal PAX Good Behavior Game (PAX GBG)integrated with Promoting Alternative Thinking Strategies (i.e., PATHS to PAX) and PAX GBG only compared to a control. This study used ratings by 318K-5 teachers of 1526 at-risk children who, at baseline, were rated as displaying the top 33rd percentile of aggressive-disruptive behavior. Leveraging a prior study on these data (Berg et al. in Admin Policy Ment Health Ment Health Serv Res 44:558-571, https://doi.org/10.1007/s10488-016-0738-1 , 2017), CACE was defined as the effect of intervention assignment for compliers, using two compliance cut points (50th and 75th percentile), on posttest ratings of student academic engagement, social competence, peer relations, emotion regulation, hyperactivity, and aggressive-disruptive behavior. The ITT analyses indicated improvements for students in the integrated condition on ratings of social competence compared to the control condition. The CACE analyses also indicated significant effects of the integrated intervention on social competence, as well as academic engagement and emotion regulation for students in high compliance classrooms. These findings illustrate the importance of considering variation in implementation within the context of RCTs.","hji,kes",0,0,0,2,0,NA,NA +32298473,Clinical Best Practice Advice for Hepatology and Liver Transplant Providers During the COVID-19 Pandemic: AASLD Expert Panel Consensus Statement.,"

Background and aims

Coronavirus disease 2019 (COVID-19), the illness caused by the SARS-CoV-2 virus, is rapidly spreading throughout the world. Hospitals and healthcare providers are preparing for the anticipated surge in critically ill patients, but few are wholly equipped to manage this new disease. The goals of this document are to provide data on what is currently known about COVID-19, and how it may impact hepatologists and liver transplant providers and their patients. Our aim is to provide a template for the development of clinical recommendations and policies to mitigate the impact of the COVID-19 pandemic on liver patients and healthcare providers.

Approach and results

This article discusses what is known about COVID-19 with a focus on its impact on hepatologists, liver transplant providers, patients with liver disease, and liver transplant recipients. We provide clinicians with guidance for how to minimize the impact of the COVID-19 pandemic on their patients' care.

Conclusions

The situation is evolving rapidly, and these recommendations will need to evolve as well. As we learn more about how the COVID-19 pandemic impacts the care of patients with liver disease, we will update the online document available at https://www.aasld.org/about-aasld/covid-19-and-liver.","hji,kes",0,0,0,2,0,NA,NA +32343490,QIIME 2 Enables Comprehensive End-to-End Analysis of Diverse Microbiome Data and Comparative Studies with Publicly Available Data.,"QIIME 2 is a completely re-engineered microbiome bioinformatics platform based on the popular QIIME platform, which it has replaced. QIIME 2 facilitates comprehensive and fully reproducible microbiome data science, improving accessibility to diverse users by adding multiple user interfaces. QIIME 2 can be combined with Qiita, an open-source web-based platform, to re-use available data for meta-analysis. The following basic protocol describes how to install QIIME 2 on a single computer and analyze microbiome sequence data, from processing of raw DNA sequence reads through generating publishable interactive figures. These interactive figures allow readers of a study to interact with data with the same ease as its authors, advancing microbiome science transparency and reproducibility. We also show how plug-ins developed by the community to add analysis capabilities can be installed and used with QIIME 2, enhancing various aspects of microbiome analyses-e.g., improving taxonomic classification accuracy. Finally, we illustrate how users can perform meta-analyses combining different datasets using readily available public data through Qiita. In this tutorial, we analyze a subset of the Early Childhood Antibiotics and the Microbiome (ECAM) study, which tracked the microbiome composition and development of 43 infants in the United States from birth to 2 years of age, identifying microbiome associations with antibiotic exposure, delivery mode, and diet. For more information about QIIME 2, see https://qiime2.org. To troubleshoot or ask questions about QIIME 2 and microbiome analysis, join the active community at https://forum.qiime2.org. 2020 The Authors. Basic Protocol: Using QIIME 2 with microbiome data Support Protocol: Further microbiome analyses.","hji,kes",0,0,0,2,0,NA,workbench +32368601,Liquid based-cytology Pap smear dataset for automated multi-class diagnosis of pre-cancerous and cervical cancer lesions.,"While a publicly available benchmark dataset provides a base for the development of new algorithms and comparison of results, hospital-based data collected from the real-world clinical setup is also very important in AI-based medical research for automated disease diagnosis, prediction or classifications as per standard protocol. Primary data must be constantly updated so that the developed algorithms achieve as much accuracy as possible in the regional context. This dataset would support research work related to image segmentation and final classification for a complete decision support system (https://doi.org/10.1016/j.tice.2020.101347) [1]. Liquid-based cytology (LBC) is one of the cervical screening tests. The repository consists of a total of 963 LBC images sub-divided into four sets representing the four classes: NILM, LSIL, HSIL, and SCC. It comprises pre-cancerous and cancerous lesions related to cervical cancer as per standards under The Bethesda System (TBS). The images were captured in 40x magnification using Leica ICC50 HD microscope collected with due consent from 460 patients visiting the O&G department of the public hospital with various gynaecological problems. The images were then viewed and categorized by experts of the pathology department.","hji,kes",0,0,0,2,0,NA,"clinical, URL not to data" +32382359,Multiple Relapses of Visceral Leishmaniasis in HIV Co-Infected Patients: A Case Series from Ethiopia.,"Background:Human visceral leishmaniasis (VL) is a life-threatening protozoan disease caused by parasites belonging to the Leishmania donovani complex. Ethiopia has the highest VL-HIV co-infection rate in the world, with several of these patients presenting with repeated episodes of VL disease (ie, relapse). However, we lack data on how HIV patients with multiple VL relapse present clinically, and whether they continue to respond to currently available medicines. Methods:The medical records of VL-HIV co-infected patients with multiple VL relapses at the Leishmaniasis Treatment and Research Center in Gondar, Ethiopia, between June 2012 and June 2016 were retrieved. Variables on their clinical and laboratory profiles were collected. Descriptive analysis was done to show the characteristics of the VL episodes. Result:A total of 48 VL episodes in 12 patients were identified, the median number of episodes per patient was 5 (interquartile range, 4-8 episodes). The median time to relapse was 5 months (interquartile range, 3-5.5 months). Splenomegaly was present in 47 of the episodes (98%), fever or other accompanying symptoms were present in only 66% (32 out of 48). The median tissue parasite grade at VL diagnosis was 6+ (interquartile range, 5+- 6+). All patients were on antiretroviral therapy. The median duration of treatment per episode was 2 months (interquartile range, 2-2 months). All patients achieved parasitological cure at discharge at each episode. Conclusions:Multiple recurrences of VL diseases were observed in HIV co-infected patients. With recurrent episodes, splenomegaly was found to be the main manifestation, whereas fever was less common. These patients came with recurrence of diseases in <6 months and required prolonged treatment to achieve cure.Further research on prediction, prevention, and better management options for recurrent VL is needed. ORCID ID: https://orcid.org/0000-0002-1410-0454. (Curr Ther Res Clin Exp. 2020; 81:XXX-XXX).","hji,kes",0,0,0,2,0,NA,NA +32391909,Global ocean resistome revealed: Exploring antibiotic resistance gene abundance and distribution in TARA Oceans samples.,"

Background

The rise of antibiotic resistance (AR) in clinical settings is of great concern. Therefore, the understanding of AR mechanisms, evolution, and global distribution is a priority for patient survival. Despite all efforts in the elucidation of AR mechanisms in clinical strains, little is known about its prevalence and evolution in environmental microorganisms. We used 293 metagenomic samples from the TARA Oceans project to detect and quantify environmental antibiotic resistance genes (ARGs) using machine learning tools.

Results

After manual curation of ARGs, their abundance and distribution in the global ocean are presented. Additionally, the potential of horizontal ARG transfer by plasmids and their correlation with environmental and geographical parameters is shown. A total of 99,205 environmental open reading frames (ORFs) were classified as 1 of 560 different ARGs conferring resistance to 26 antibiotic classes. We found 24,567 ORFs in putative plasmid sequences, suggesting the importance of mobile genetic elements in the dynamics of environmental ARG transmission. Moreover, 4,804 contigs with >=2 putative ARGs were found, including 2 plasmid-like contigs with 5 different ARGs, highlighting the potential presence of multi-resistant microorganisms in the natural ocean environment. Finally, we identified ARGs conferring resistance to some of the most relevant clinical antibiotics, revealing the presence of 15 ARGs similar to mobilized colistin resistance genes (mcr) with high abundance on polar biomes. Of these, 5 are assigned to Psychrobacter, a genus including opportunistic human pathogens.

Conclusions

This study uncovers the diversity and abundance of ARGs in the global ocean metagenome. Our results are available on Zenodo in MySQL database dump format, and all the code used for the analyses, including a Jupyter notebook js avaliable on Github. We also developed a dashboard web application (http://www.resistomedb.com) for data visualization.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +32413019,"Evaluation of the National Sexually Transmitted Disease Curriculum: Reach, Utilization, and Engagement.","

Background

With increasing rates of sexually transmitted infections in the United States, there is a critical need to educate health professionals on the prevention, diagnosis, and treatment of sexually transmitted infections. The National Sexually Transmitted Disease Curriculum (NSTDC, https://www.std.uw.edu) is a free, online curriculum, funded by the Centers for Disease Control and Prevention. The purpose of this article is to evaluate the reach, utilization, and engagement of users with the curriculum.

Methods

Data on NSTDC utilization was collected for 24 months after the February 1, 2017 launch. For all users, Google Analytics was used to determine total number of users, geographic location, age and sex, and average session duration. For registered users, additional data analysis included work-role, demographics, and completion of self-study modules, check-on-learning questions, and question banks. User satisfaction was measured on a 5-point Likert scale.

Results

During the evaluation period, 136,270 individual users accessed the NSTDC, including 24,652 registered users. Among all registered users, 10,660 (43.2%) were registered nurses, 2810 (11.4%) physicians, 4942 (20.1%) Advanced Practice Nurses and Physician Assistants, and 6213 (25.2%) nonclinicians. Among registered users, 18,533 (75.2%) completed at least 1 module, 7898 (32.0%) completed all 7 modules, and 19,804 (80.4%) answered optional check-on-learning questions. Median satisfaction with the content was (5) very satisfied (interquartile range, 4-5).

Conclusions

The NSTDC is a free, guideline-based, online curriculum with novel dual functionality that has achieved extensive reach with a broad array of health professionals who engage deeply with the material. The wide usage of NSTDC demonstrates the need for high-quality, unbiased, free content in user-focused formats.","hji,kes",0,0,0,2,0,NA,NA +32419835,Integrated Analysis of the Mechanisms of Da-Chai-Hu Decoction in Type 2 Diabetes Mellitus by a Network Pharmacology Approach.,"Background:The incidence of type 2 diabetes mellitus (T2DM) has increased year by year, which not only seriously affects people's quality of life, but also imposes a heavy economic burden on the family, society, and country. Currently, the pathogenesis, diagnosis, and treatment of T2DM are still unclear. Therefore, exploration of a precise multitarget treatment strategy is urgent. Here, we attempt to screen out the active components, effective targets, and functional pathways of therapeutic drugs through network pharmacology with taking advantages of traditional Chinese medicine (TCM) formulas for multitarget holistic treatment of diseases to clarify the potential therapeutic mechanism of TCM formulas and provide a systematic and clear thought for T2DM treatment. Methods:First, we screened the active components of Da-Chai-Hu Decoction (DCHD) by absorption, distribution, metabolism, excretion, and toxicity (ADME/T) calculation. Second, we predicted and screened the active components of DCHD and its therapeutic targets for T2DM relying on the Traditional Chinese Medicine Systems Pharmacology Analysis Platform (TCMSP database) and Text Mining Tool (GoPubMed database), while using the Database for Annotation, Visualization, and Integrated Discovery (DAVID) to obtain T2DM targets. Third, we constructed a network of the active component-target, target-pathway of DCHD using Cytoscape software (http://cytoscape.org/,ver.3.5.1) and then analyzed gene function, related biological processes, and signal pathways through the DAVID database. Results:We screened 77 active components from 1278 DCHD components and 116 effective targets from 253 ones. After matching the targets of T2DM, we obtained 38 important targets and 7 core targets were selected through further analysis. Through enrichment analysis, we found that these important targets were mainly involved in many biological processes such as oxidative stress, inflammatory reaction, and apoptosis. After analyzing the relevant pathways, the synthetic pathway for the treatment of T2DM was obtained, which provided a diagnosis-treatment idea for DCHD in the treatment of T2DM. Conclusions:This article reveals the mechanism of DCHD in the treatment of T2DM related to inflammatory response and apoptosis through network pharmacology, which lays a foundation for further elucidation of drugs effective targets.","hji,kes",0,0,0,2,0,NA,NA +32438827,Long-Term Exposure to Air Pollution and Incidence of Myocardial Infarction: A Danish Nurse Cohort Study.,"

Background

Air pollution exposure has been linked to coronary heart disease, although evidence on PM2.5 and myocardial infarction (MI) incidence is mixed.

Objectives

This prospective cohort study aimed to investigate associations between long-term exposure to air pollution and MI incidence, adjusting for road traffic noise.

Methods

We used data from the nationwide Danish Nurse Cohort on 22,882 female nurses (>44 years of age) who, at recruitment in 1993 or 1999, reported information on cardiovascular disease risk factors. Data on MI incidence was collected from the Danish National Patient Register until the end of 2014. Annual mean concentrations of particulate matter (PM) with a diameter <2.5 g/m3 (PM2.5), PM10, nitrogen dioxide (NO2), and nitrogen oxides (NOx) at the nurses' residences since 1990 (PM10 and PM2.5) or 1970 (NO2 and NOx) were estimated using the Danish Eulerian Hemispheric Model/Urban Background Model/AirGIS (DEHM/UBM/AirGIS) dispersion model. We used time-varying Cox regression models to examine the association between 1- and 3-y running means of these pollutants, as well as 23-y running means of NO2 and NOx, with both overall and fatal incident MI. Associations were explored in three progressively adjusted models: Model 1, adjusted for age and baseline year; Model 2, with further adjustment for potential confounding by lifestyle and cardiovascular disease risk factors; and Model 3, with further adjustment for road traffic noise, modeled as the annual mean of a weighted 24-h average (Lden).

Results

Of the 22,882 women, 641 developed MI during a mean follow-up of 18.6 y, 121 (18.9%) of which were fatal. Reported hazard ratios (HRs) were based on interquartile range increases of 5.3, 5.5, 8.1, and 11.5 g/m3 for PM2.5, PM10, NO2, and NOx, respectively. In Model 1, we observed a positive association between a 3-y running mean of PM2.5 and an overall incident MI with an HR= 1.20 (95% CI: 1.07, 1.35), which attenuated to HR= 1.06 (95% CI: 0.92, 1.23) in Model 2. In Model 1 for incident fatal MI, we observed a strong association with a 3-y running mean of PM2.5, with an HR= 1.69 (95% CI: 1.33, 2.13), which attenuated to HR= 1.35 (95% CI: 1.01, 1.81) in Model 2. Similar associations were seen for PM10, with 3-y, Model 2 estimates for overall and fatal incident MI of HR= 1.06 (95% CI: 0.91, 1.23) and HR= 1.35 (95% CI: 1.01, 1.81), respectively. No evidence of an association was observed for NO2 or NOx. For all pollutants, associations in Model 2 were robust to further adjustment for road traffic noise in Model 3 and were similar for a 1-y running mean exposure.

Conclusions

We found no association between long-term exposure to PM2.5, PM10, NO2, or NOx and overall MI incidence, but we observed positive associations for PM2.5 and PM10 with fatal MI. We present novel findings that the association between PM and MI incidence is robust to adjustment for road traffic noise. https://doi.org/10.1289/EHP5818.","hji,kes",0,0,0,2,0,NA,NA +32440109,Proteome Profiling of Lung Tissues in Chronic Obstructive Pulmonary Disease (COPD): Platelet and Macrophage Dysfunction Contribute to the Pathogenesis of COPD.,"

Purpose

Chronic obstructive pulmonary disease (COPD) is a worldwide public health challenge due to its high prevalence and related disability and mortality; however, the pathogenesis of COPD remains unclear. In this study, we aimed to identify key proteins involved in the pathogenesis of COPD.

Patients and methods

We collected lung tissue from three patients with COPD who required thoracic surgery for lung transplantation in the China-Japan Friendship Hospital. Lung tissue from three donors who had no history of lung disease was collected as healthy controls through a whole-body donation program of Peking Union Medical College (China). We conducted a proteomic analysis of the protein expression profiles in the two groups using a combination of high-resolution liquid chromatography coupled with tandem mass spectrometry (LC-MS/MS) and quantitative 6-plex tandem mass tag-labeling; these data were validated by Western blot analysis.

Results

A total of 4976 proteins were identied and analyzed, of which 173 were significantly changed (118 downregulated and 55 upregulated). Gene ontology analysis and protein-protein interaction networks demonstrated that the significantly changed proteins, especially downregulated proteins, were involved in platelet and macrophage activation. The mass spectrometry proteomics data have been deposited to the ProteomeXchange Consortium (http://proteomecentral.proteomexchange.org) via the iProX partner repository with the dataset identifier PXD017158.

Conclusion

In our study, GP6, PF4, and THBS1, which are associated with platelet activation and wound healing, were significantly downregulated in COPD patients. These results indicate that patients with COPD are more likely to develop hemostasis disorders, which could impede the repair process of the lung tissues. Moreover, downregulation of CD163, MARCO and VSIG4, which are involved in dysfunction of alveolar macrophages in efferocytosis, may inhibit the resolution of inflammation and contribute to the pathogenesis of COPD.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +32470119,Coronavirus3D: 3D structural visualization of COVID-19 genomic divergence.,"

Motivation

As the COVID-19 pandemic is spreading around the world, the SARS-CoV-2 virus is evolving with mutations that potentially change and fine-tune functions of the proteins coded in its genome.

Results

Coronavirus3D website integrates data on the SARS-CoV-2 virus mutations with information about 3D structures of its proteins, allowing users to visually analyze the mutations in their 3D context.

Availability and implementation

Coronavirus3D server is freely available at https://coronavirus3d.org.","hji,kes",0,0,0,2,0,NA,data available elsewhere +32519765,A novel nonsense mutation of ZEB2 gene in a Chinese patient with Mowat-Wilson syndrome.,"

Background

Mowat-Wilson syndrome (MWS) is a rare genetic disorder characterized by intellectual disability, distinctive facial features, and multiple anomalies caused by haploinsufficiency ofthe ZEB2gene. We investigated the genetic causes of MWS in a 14-year-old girl who had characteristic features of MWS.

Methods

Clinical data and peripheral blood DNA samples were collected from the proband. Following extraction of genomic DNA, whole-exome sequencing was conducted to detect genetic variants. Bioinformaticsanalysis wascarriedoutto predictthefunction of the mutant gene.

Results

Mutation analysis of the proband identified a novel nonsense mutation (c.250G>T, p.E84*) within exon 3 of theZEB2gene. This novel alteration resulted in a termination codon at amino acid position 84, which was predicted to encode a truncated protein. This variant was not present in unrelated healthy control samples that were obtained from the exome sequence databases ExAc browser (http://exac.broadinstitute.org/) and gnomAD browser (http://gnomad.broadinstitute.org/). It is a novel variant that was determined to be a deleterious mutation according to the variant interpretation guidelines of the ACMG. The results of our study suggest that the p.E84* mutation in theZEB2gene was probably the pathogenic mutation that caused MWS in the proband.

Conclusions

This study reports the novel mutation in the proband will provide a basic foundation for further investigations to elucidate the ZEB2-related mechanisms of MWS.","hji,kes",0,0,0,2,0,NA,NA +32526479,RawVegetable - A data assessment tool for proteomics and cross-linking mass spectrometry experiments.,"We present RawVegetable, a software for mass spectrometry data assessment and quality control tailored toward shotgun proteomics and cross-linking experiments. RawVegetable provides four main modules with distinct features: (A) The charge state chromatogram that independently displays the ion current for each charge state; useful for optimizing the chromatography for highly charged ions and with lower XIC values such as those typically found in cross-linking experiments. (B) The XL-Artefact determination, which flags possible noncovalently associated peptides. (C) The TopN density estimation, for detecting retention time intervals of under or over-sampling, and (D) The chromatography reproducibility module, which provides pairwise comparisons between multiple experiments. RawVegetable, a tutorial, and the example data are freely available for academic use at: http://patternlabforproteomics.org/rawvegetable. SIGNIFICANCE: Chromatography optimization is a critical step for any shotgun proteomic or cross-linking mass spectrometry experiment. Here, we present a nifty solution with several key features, such as displaying individual charge state chromatograms, highlighting chromatographic regions of under- or over-sampling and checking for reproducibility.","hji,kes",0,0,0,2,0,NA,iffy - example data available +32541556,Interactive Web Application for Plotting Personalized Prognosis Prediction Curves in Allogeneic Hematopoietic Cell Transplantation Using Machine Learning.,"

Background

Allogeneic hematopoietic cell transplantation (allo-HCT) is a curative treatment option for malignant hematological disorders. Transplant clinicians estimate patient-specific prognosis empirically in clinical practice based on previous studies on similar patients. However, this approach does not provide objective data. The present study primarily aimed to develop a tool capable of providing accurate personalized prognosis prediction after allo-HCT in an objective manner.

Methods

We developed an interactive web application tool with a graphical user interface capable of plotting the personalized survival and cumulative incidence prediction curves after allo-HCT adjusted by 8 patient-specific factors, which are known as prognostic predictors, and assessed their predictive performances. A random survival forest model using the data of patients who underwent allo-HCT at our institution was applied to develop this application.

Results

We succeeded in showing the personalized prognosis prediction curves of 1-year overall survival, progression-free survival, relapse/progression, and nonrelapse mortality (NRM) interactively using our web application (https://predicted-os-after-transplantation.shinyapps.io/RSF_model/). To assess its predictive performance, the entire cohort (363 cases) was split into a training cohort (70%) and a test cohort (30%) time-sequentially based on the patients' transplant dates. The areas under the receiver-operating characteristic curves for 1-year overall survival, progression-free survival, relapse/progression, and nonrelapse mortality in test cohort were 0.70, 0.72, 0.73, and 0.77, respectively.

Conclusions

The new web application could allow transplant clinicians to inform a new allo-HCT candidate of the objective personalized prognosis prediction and facilitate decision-making.","hji,kes",0,0,0,2,0,NA,NA +32546899,Hypersensitivity reactions to biologics (part I): allergy as an important differential diagnosis in complex immune-derived adverse events.,"Purpose: Biotechnological substances (BSs) are strongly relied upon to prevent rejection of transplanted organs, and to treat oncological, allergological, and other inflammatory diseases. Allergic reactions to partly foreign biologics can occur due to their potential immunogenicity. The severity of an immune response to a biological drug may range from no clinical significance to a severe, life-threatening anaphylactic reaction.Methods: Detailed searches were performed on Pubmed, Web of Science, and Google Scholar to include all available publications. In addition, the Food and Drug Administration, the European Medicines Agency, and British Columbia Cancer Agency Drug Manual databases were screened for hypersensitivity reaction (HSR), infusion reaction, injection site reaction, urticaria, and anaphylaxis for individual BSs.Results: Treatment with BSs can cause various types of HSR. These are mentioned in the literature with definitions such as allergic reactions, anaphylactoid reactions, anaphylaxis, HSR, infusion reactions, injection site reactions, cytokine release syndrome, and urticaria. Due to the overlap in signs and symptoms in the reported descriptions, it is not always possible to differentiate these reactions properly according to their pathomechanism. Similarly, many data reported as anaphylaxis actually describe severe anaphylactic reactions (grades III or IV).Conclusion: There is an urgent need for a simpler symptom- or system-based classification and scoring system to create an awareness for HSRs to BSs. A better understanding of the pathophysiology of HSRs and increased clinical experience in the treatment of side effects will provide timely control of unexpected reactions. As a result, immunotherapy with BSs will become safer in the future.Cite this as Glsen A, Wedi B, Jappe U. Hypersensitivity reactions to biologics (part I): allergy as an important differential diagnosis in complex immune-derived adverse events. Allergo J Int 2020; 29:97-125https://doi.org/10.1007/s40629-020-00126-6.","hji,kes",0,0,0,2,0,NA,NA +32551881,"Fine Particulate Matter and Poor Cognitive Function among Chinese Older Adults: Evidence from a Community-Based, 12-Year Prospective Cohort Study.","

Background

Research on the relationship between long-term exposure to particulate matter with aerodynamic diameter =2.5m (PM2.5) and poor cognitive function is lacking in developing countries, especially in highly polluted areas.

Objectives

We evaluated associations of long-term exposure to PM2.5 with poor cognitive function in a diverse, national sample of older adults in China.

Methods

This analysis included data on 13,324 older adults (5,879 who were 65-79 years of age, 3,052 who were 80-89 years of age, 2,634 who were 90-99 years of age, and 1,759 who were =100 years of age) with normal cognitive function at baseline from March 2002 to September 2014, with 64,648 person-years of follow-up. We used a geographic information system analysis to estimate the annual average satellite-derived PM2.5 concentration for the geocoded location of the participants' baseline residences. Poor cognitive function was defined as a score of less than 18 on the Chinese version of the Mini-Mental State Examination (MMSE). Competing risk models were performed to explore the association of PM2.5 with poor cognitive function.

Results

Each 10-g/m3 increase in PM2.5 was associated with a 5.1% increased risk of poor cognitive function [adjusted hazard ratio (HR): 1.051; 95% confidence interval (CI): 1.023, 1.079]. Compared to the lowest quartile of PM2.5 (<41.4 g/m3), adjusted HR values were 1.20 (95% CI: 1.09, 1.33), 1.27 (95% CI: 1.15, 1.41), and 1.21 (95% CI: 1.09, 1.34) for the second (=41.4-50.3 ug/m3), third (=50.3-60.7g/m3), and fourth (=60.7 g/m3) quartiles of PM2.5, respectively (p for trend <0.001). Subgroup analyses suggested stronger associations between PM2.5 and poor cognitive impairment in men than women. The association was positive in the 65- to 79- and =100-y age group but not significant and positive in the other two age groups with similar results.

Conclusion

PM2.5 was identified as a risk factor for poor cognitive function in Chinese older adults. Improving air quality may reduce the future population burden of poor cognitive function, especially in areas with high air pollution. https://doi.org/10.1289/EHP5304.","hji,kes",0,0,0,2,0,NA,NA +32611389,Proteus: An algorithm for proposing stabilizing mutation pairs based on interactions observed in known protein 3D structures.,"BACKGROUND:Protein engineering has many applications for industry, such as the development of new drugs, vaccines, treatment therapies, food, and biofuel production. A common way to engineer a protein is to perform mutations in functionally essential residues to optimize their function. However, the discovery of beneficial mutations for proteins is a complex task, with a time-consuming and high cost for experimental validation. Hence, computational approaches have been used to propose new insights for experiments narrowing the search space and reducing the costs. RESULTS:In this study, we developed Proteus (an acronym for Protein Engineering Supporter), a new algorithm for proposing mutation pairs in a target 3D structure. These suggestions are based on contacts observed in other known structures from Protein Data Bank (PDB). Proteus' basic assumption is that if a non-interacting pair of amino acid residues in the target structure is exchanged to an interacting pair, this could enhance protein stability. This trade is only allowed if the main-chain conformation of the residues involved in the contact is conserved. Furthermore, no steric impediment is expected between the proposed mutations and the surrounding protein atoms. To evaluate Proteus, we performed two case studies with proteins of industrial interests. In the first case study, we evaluated if the mutations suggested by Proteus for four protein structures enhance the number of inter-residue contacts. Our results suggest that most mutations proposed by Proteus increase the number of interactions into the protein. In the second case study, we used Proteus to suggest mutations for a lysozyme protein. Then, we compared Proteus' outcomes to mutations with available experimental evidence reported in the ProTherm database. Four mutations, in which our results agree with the experimental data, were found. This could be initial evidence that changes in the side-chain of some residues do not cause disturbances that harm protein structure stability. CONCLUSION:We believe that Proteus could be used combined with other methods to give new insights into the rational development of engineered proteins. Proteus user-friendly web-based tool is available at < http://proteus.dcc.ufmg.br >.","hji,kes",0,0,0,2,0,NA,NA +32626466,Application of data science in risk assessment and early warning.,"The food supply chain has been recognised by the EU as a critical infrastructure, and its complexity is the main cause of vulnerability. Depending on the food matrix, natural and/or deliberate contamination, food-borne diseases or even food fraud incidents may occur worldwide. Consequently, robust predictive models and/or software tools are needed to support decision-making and mitigating risks in an efficient and timely manner. In this frame, the fellow participated in data collection and analysis tasks, so as to provide additional predictive models. The working programme, covered a wide range of aspects related to risk assessment including identification of emerging risks (quantitative), microbiological risk assessment, authenticity assessment, spatio-temporal epidemiological modelling and database formation for hosting predictive microbial models. The training and close integration, in the open-source, in-house (German Federal Institute for Risk Assessment (BfR)) developed software tools under the framework of FoodRisk-Labs (https://foodrisklabs.bfr.bund.de.) for data analysis, predictive microbiology, quantitative microbiological risk assessment and automatic data retrieval purposes allowed for the independent use. Moreover, the fellow actively contributed to the update of the upcoming Yersinia enterocolitica risk assessment, and also in authenticity assessment of edible oils. Over the course of the year, the fellow was closely involved in international and national research projects with experts in the above-mentioned disciplines. Lastly, he consolidated his acquired knowledge by presenting his scientific work to conferences, and BfR-internal meetings.","hji,kes",0,0,0,2,0,NA,NA +32645039,"UFO: A tool for unifying biomedical ontology-based semantic similarity calculation, enrichment analysis and visualization.","

Background

Biomedical ontologies have been growing quickly and proven to be useful in many biomedical applications. Important applications of those data include estimating the functional similarity between ontology terms and between annotated biomedical entities, analyzing enrichment for a set of biomedical entities. Many semantic similarity calculation and enrichment analysis methods have been proposed for such applications. Also, a number of tools implementing the methods have been developed on different platforms. However, these tools have implemented a small number of the semantic similarity calculation and enrichment analysis methods for a certain type of biomedical ontology. Note that the methods can be applied to all types of biomedical ontologies. More importantly, each method can be dominant in different applications; thus, users have more choice with more number of methods implemented in tools. Also, more functions would facilitate their task with ontology.

Results

In this study, we developed a Cytoscape app, named UFO, which unifies most of the semantic similarity measures for between-term and between-entity similarity calculation for all types of biomedical ontologies in OBO format. Based on the similarity calculation, UFO can calculate the similarity between two sets of entities and weigh imported entity networks as well as generate functional similarity networks. Besides, it can perform enrichment analysis of a set of entities by different methods. Moreover, UFO can visualize structural relationships between ontology terms, annotating relationships between entities and terms, and functional similarity between entities. Finally, we demonstrated the ability of UFO through some case studies on finding the best semantic similarity measures for assessing the similarity between human disease phenotypes, constructing biomedical entity functional similarity networks for predicting disease-associated biomarkers, and performing enrichment analysis on a set of similar phenotypes.

Conclusions

Taken together, UFO is expected to be a tool where biomedical ontologies can be exploited for various biomedical applications.

Availability

UFO is distributed as a Cytoscape app, and can be downloaded freely at Cytoscape App (http://apps.cytoscape.org/apps/ufo) for non-commercial use.","hji,kes",0,0,0,2,0,NA,NA +32647037,"An Analysis of Variability in """"CatWalk"""" Locomotor Measurements to Aid Experimental Design and Interpretation.","Preclinical studies in models of neurologic injury and disease rely on behavioral outcomes to measure intervention efficacy. For spinal cord injury, the CatWalk system provides unbiased quantitative assessment of subtle aspects of locomotor function in rodents and so can powerfully detect significant differences between experimental and control groups. Although clearly of key importance, summary group-level data can obscure the variability within and between individual subjects and therefore make it difficult to understand the magnitude of effect in individual animals and the proportion of a group that may show benefit. Here, we calculate reference change intervals (RCIs) that define boundaries of normal variability for measures of rat locomotion on the CatWalk. Our results indicate that many commonly-used outcome measures are highly variable, such that differences of up to 70% from baseline value must be considered normal variation. Many CatWalk outcome variables are also highly correlated and dependent on run speed. Application of calculated RCIs to open access data (https://scicrunch.org/odc-sci) on hindlimb stride length in spinal cord-injured rats illustrates the complementarity between group-level (16 mm change; p = 0.0009) and individual-level (5/32 animals show change outside RCI boundaries) analysis between week 3 and week 6 after injury. We also conclude that interdependence among CatWalk variables implies that test """"batteries"""" require careful composition to ensure that different aspects of defective gait are analyzed. Calculation of RCIs aids in experimental design by quantifying variability and enriches overall data analysis by providing details of change at an individual level that complement group-level analysis.","hji,kes",0,0,0,2,0,NA,NA +32685640,Lipid profile dataset of optogenetics induced optic nerve regeneration.,"The optic nerve transfers visual information from the retina to the brain through the axons of retinal ganglion cells (RGCs). In adult mammals, optic nerve injuries and progressive degenerative diseases lead to the irreversible loss of RGCs, resulting in vision loss and blindness. Optogenetic models have proved useful in manipulating the growth of RGCs through expression and stimulation of channelrhodopsins (Chr2) in RGCs using the RGC-specific thy-1 promoter. Using transgenic Chr2 mouse (Thy1-ChR2-EYFP) as a model of regeneration, we profile the lipid changes which occur after traumatic optic nerve crush, light stimulation and forced RGC axonal growth. Thy1-ChR2-EYFP and control (C57BL/6) mice were divided in four groups each - 1) no crush and no stimulation, 2) no crush with stimulation, 3) crush and without stimulation, and 4) crush with stimulation. After euthanasia, the optic nerves were collected for lipidomic analysis. The Bligh and Dyer method was used for lipid extraction, followed by mass spectrometry lipid profiling with a Q-Exactive Orbitrap Liquid Chromatography-Mass Spectrometer (LC MS-MS). The raw scans were analysed with LipidSearch 4.1.3 and the statistical analysis was conducted through Metaboanalyst 4.0. This data is available at Metabolomics Workbench, study ID ST001381: [https://www.metabolomicsworkbench.org/data/DRCCMetadata.php?Mode=Study&StudyID=ST001381&StudyType=MS&ResultType=5].","hji,kes",0,0,0,2,0,NA,data deposited as referenced +32700975,Cytotoxicity Burst? Differentiating Specific from Nonspecific Effects in Tox21 in Vitro Reporter Gene Assays.,"

Background

High-throughput screening of chemicals with in vitro reporter gene assays in Tox21 has produced a large database on cytotoxicity and specific modes of action. However, the validity of some of the reported activities is questionable due to the """"cytotoxicity burst,"""" which refers to the supposition that many stress responses are activated in a nonspecific way at concentrations close to cell death.

Objectives

We propose a pragmatic method to identify whether reporter gene activation is specific or cytotoxicity-triggered by comparing the measured effects with baseline toxicity.

Methods

Baseline toxicity, also termed narcosis, is the minimal toxicity any chemical causes. Quantitative structure-activity relationships (QSARs) developed for baseline toxicity in mammalian reporter gene cell lines served as anchors to define the chemical-specific threshold for the cytotoxicity burst and to evaluate the degree of specificity of the reporter gene activation. Measured 10% effect concentrations were related to measured or QSAR-predicted 10% cytotoxicity concentrations yielding specificity ratios (SR). We applied this approach to our own experimental data and to ~8,000 chemicals that were tested in six of the high-throughput Tox21 reporter gene assays.

Results

Confirmed baseline toxicants activated reporter gene activity around cytotoxic concentrations triggered by the cytotoxicity burst. In six Tox21 assays, 37%-87% of the active hits were presumably caused by the cytotoxicity burst (SR<1) and only 2%-14% were specific with SR=10 against experimental cytotoxicity but 75%-97% were specific against baseline toxicity. This difference was caused by a large fraction of chemicals showing excess cytotoxicity.

Conclusions

The specificity analysis for measured in vitro effects identified whether a cytotoxicity burst had likely occurred. The SR-analysis not only prevented false positives, but it may also serve as measure for relative effect potency and can be used for quantitative in vitro-invivo extrapolation and risk assessment of chemicals. https://doi.org/10.1289/EHP6664.","hji,kes",0,0,0,2,0,NA,NA +32706054,LncRNA SNHG16 promotes migration and invasion through suppression of CDKN1A in clear cell renal cell carcinoma.,"Since this article has been suspected of research misconduct and the corresponding authors did not respond to our request to prove originality of data and figures, """"LncRNA SNHG16 promotes migration and invasion through suppression of CDKN1A in clear cell renal cell carcinoma, by S.-B. Liu, H.-F. Wang, Q.-P. Xie, G. Li, L.-B. Zhou, B. Hu, published in Eur Rev Med Pharmacol Sci 2020; 24 (7): 3572-3578-DOI: 10.26355/eurrev_202004_20818-PMID: 32329831"""" has been withdrawn. The Publisher apologizes for any inconvenience this may cause. https://www.europeanreview.org/article/20818.","hji,kes",0,0,0,2,0,NA,NA +32713858,PRIGSA2: Improved version of protein repeat identification by graph spectral analysis.,"Tandemly repeated structural motifs in proteins form highly stable structural folds and provide multiple binding sites associated with diverse functional roles. The tertiary structure and function of these proteins are determined by the type and copy number of the repeating units. Each repeat type exhibits a unique pattern of intra- and inter-repeat unit interactions that is well-captured by the topological features in the network representation of protein structures. Here we present an improved version of our graph based algorithm, PRIGSA, with structure-based validation and filtering steps incorporated for accurate detection of tandem structural repeats. The algorithm integrates available knowledge on repeat families with de novo prediction to detect repeats in single monomer chains as well as in multimeric protein complexes. Three levels of performance evaluation are presented: comparison with state-of-the-art algorithms on benchmark dataset of repeat and nonrepeat proteins, accuracy in the detection of members of 13 known repeat families reported in UniProt and execution on the complete Protein Data Bank to show its ability to identify previously uncharacterized proteins. A ~3-fold increase in the coverage of the members of 13 known families and 3408 novel uncharacterized structural repeat proteins are identified on executing it on PDB. PRIGSA2 is available at http:// bioinf.iiit.ac.in/PRIGSA2/.","hji,kes",0,0,0,2,0,NA,NA +32744660,Long noncoding RNA UCA1 promotes proliferation and metastasis of thyroid cancer cells by sponging miR-497-3p.,"Since this article has been suspected of research misconduct and the corresponding authors did not respond to our request to prove originality of data and figures, """"Long noncoding RNA UCA1 promotes proliferation and metastasis of thyroid cancer cells by sponging miR-497-3p, by H. Gao, J.-Y. Yang, L.-X. Tong, H. Jin, C.-Z. Liu, published in Eur Rev Med Pharmacol Sci 2020; 24 (2): 728-734-DOI: 10.26355/eurrev_202001_20052-PMID: 32016975"""" has been withdrawn. The Publisher apologizes for any inconvenience this may cause. https://www.europeanreview.org/article/20052.","hji,kes",0,0,0,2,0,NA,NA +32750793,Index Networks.,"We show that existing upsampling operators can be unified using the notion of the index function. This notion is inspired by an observation in the decoding process of deep image matting where indices-guided unpooling can often recover boundary details considerably better than other upsampling operators such as bilinear interpolation. By viewing the indices as a function of the feature map, we introduce the concept of 'learning to index', and present a novel index-guided encoder-decoder framework where indices are learned adaptively from data and are used to guide downsampling and upsampling stages, without extra training supervision. At the core of this framework is a new learnable module, termed Index Network (IndexNet), which dynamically generates indices conditioned on the feature map. IndexNet can be used as a plug-in applicable to almost all convolutional networks that have coupled downsampling and upsampling stages, enabling the networks to dynamically capture variations of local patterns. In particular, we instantiate, investigate five families of IndexNet, highlight their superiority in delivering spatial information over other upsampling operators with experiments on synthetic data, and demonstrate their effectiveness on four dense prediction tasks, including image matting, image denoising, semantic segmentation, and monocular depth estimation. Code and is available at: https://git.io/IndexNet.","hji,kes",0,0,0,2,0,NA,NA +32767305,Targeted regulation of miR-195 on MAP2K1 for suppressing ADM drug resistance in prostate cancer cells.,"Since this article has been suspected of research misconduct and the corresponding authors did not respond to our request to prove originality of data and figures, """"Targeted regulation of miR-195 on MAP2K1 for suppressing ADM drug resistance in prostate cancer cells, by J.-Y. Zhang, Y.-N. Li, X. Mu, Z.-L. Pan, W.-B. Liu, published in Eur Rev Med Pharmacol Sci 2018; 22 (24): 8599-8608-DOI: 10.26355/eurrev_201812_16623-PMID: 30575899"""" has been withdrawn. The Publisher apologizes for any inconvenience this may cause. https://www.europeanreview.org/article/16623.","hji,kes",0,0,0,2,0,NA,NA +32804088,Increasing Awareness and Use of Mobile Health Technology Among Individuals With Hypertension in a Rural Community of Bangladesh: Protocol for a Randomized Controlled Trial.,"BACKGROUND:Hypertension remains one of the foremost noncommunicable diseases that most often lead to cardiovascular diseases and its different complications. The prevalence of hypertension in Bangladesh has been increasing. However, there are very limited studies that have evaluated the impact of health education and awareness development in mitigating the burden of hypertension and its complications in Bangladesh. OBJECTIVE:This study aims to increase awareness, enhance knowledge, and change lifestyle behaviors through health education and the use of mobile health (mHealth) technology among individuals with hypertension living in a rural community of Bangladesh. METHODS:A randomized controlled trial is underway in a Mirzapur subdistrict of Bangladesh. This trial compares two groups of individuals with hypertension: The comparison arm receives health education and the intervention arm receives health education and a periodic mobile phone-based text message intervention. The trial duration is 5 months. The primary end point is participants' actual behavior changes brought about by increased awareness and knowledge. RESULTS:Enrollment of participants started in August 2018, and collection of follow-up data was completed at the end of July 2019. A total of 420 participants volunteered to participate, and among them, 209 and 211 were randomly allocated to the intervention group and the control group, respectively. Among them, the ratio of males/females was 12.0/88.0 in the intervention group and 16.1/83.9 in the control group. Data cleaning and analyses have been completed and the results have been submitted for publication. CONCLUSIONS:Periodic short education using mHealth technology in addition to face-to-face health education may be an effective method for increasing awareness and knowledge about behavioral changes and maintaining healthy lifestyle behaviors. TRIAL REGISTRATION:Bangladesh Medical Research Council (BMRC) 06025072017; ClinicalTrials.gov NCT03614104, https://clinicaltrials.gov/ct2/show/NCT03614104; University hospital Medical Information Network (UMIN) R000033736, https://upload.umin.ac.jp/cgi-open-bin/ctr_e/ctr_his_list.cgi?recptno=R000033736. INTERNATIONAL REGISTERED REPORT IDENTIFIER (IRRID):DERR1-10.2196/15523.","hji,kes",0,0,0,2,0,NA,NA +32811511,Advancing brain barriers RNA sequencing: guidelines from experimental design to publication.,"

Background

RNA sequencing (RNA-Seq) in its varied forms has become an indispensable tool for analyzing differential gene expression and thus characterization of specific tissues. Aiming to understand the brain barriers genetic signature, RNA seq has also been introduced in brain barriers research. This has led to availability of both, bulk and single-cell RNA-Seq datasets over the last few years. If appropriately performed, the RNA-Seq studies provide powerful datasets that allow for significant deepening of knowledge on the molecular mechanisms that establish the brain barriers. However, RNA-Seq studies comprise complex workflows that require to consider many options and variables before, during and after the proper sequencing process.

Main body

In the current manuscript, we build on the interdisciplinary experience of the European PhD Training Network BtRAIN ( https://www.btrain-2020.eu/ ) where bioinformaticians and brain barriers researchers collaborated to analyze and establish RNA-Seq datasets on vertebrate brain barriers. The obstacles BtRAIN has identified in this process have been integrated into the present manuscript. It provides guidelines along the entire workflow of brain barriers RNA-Seq studies starting from the overall experimental design to interpretation of results. Focusing on the vertebrate endothelial blood-brain barrier (BBB) and epithelial blood-cerebrospinal-fluid barrier (BCSFB) of the choroid plexus, we provide a step-by-step description of the workflow, highlighting the decisions to be made at each step of the workflow and explaining the strengths and weaknesses of individual choices made. Finally, we propose recommendations for accurate data interpretation and on the information to be included into a publication to ensure appropriate accessibility of the data and reproducibility of the observations by the scientific community.

Conclusion

Next generation transcriptomic profiling of the brain barriers provides a novel resource for understanding the development, function and pathology of these barrier cells, which is essential for understanding CNS homeostasis and disease. Continuous advancement and sophistication of RNA-Seq will require interdisciplinary approaches between brain barrier researchers and bioinformaticians as successfully performed in BtRAIN. The present guidelines are built on the BtRAIN interdisciplinary experience and aim to facilitate collaboration of brain barriers researchers with bioinformaticians to advance RNA-Seq study design in the brain barriers community.","hji,kes",0,0,0,2,0,NA,NA +32838167,"COVID-19, Brachytherapy, and Gynecologic Cancers: a Moroccan Experience.","The treatment of gynecological cancers is the main activity of brachytherapy units. However, during COVID-19 pandemic, precautions should be done in order to reduce the spread of the virus while maintaining all chances to recovery for all patients (Radiother Oncol 148, 227-228, 2020). Despite the extent of the pandemic in our country, limited data are available to establish recommendations with a sufficient level of evidence (Radiother Oncol 148, 227-228, 2020). More recently, the American Brachytherapy Society published some clarifications in this regard and international expert consensus recommendations of radiation therapy for gynecologic malignancies during the COVID-19 pandemic were published (https://www.americanbrachytherapy.org/about-abs/abs-news/abs-statement-on-coronavirus/, Gynecol Oncol 15, 2020). In this commentary, we sought to share the procedures adopted for the management of gynecological cancer patients during COVID-19 pandemic in our brachytherapy unit.","hji,kes",0,0,0,2,0,NA,NA +32881514,Robust Accurate Identification and Biomass Estimates of Microorganisms via Tandem Mass Spectrometry.,"Rapid and accurate identification of microorganisms and estimation of their biomasses are of extreme importance to public health. Mass spectrometry has become an important technique for these purposes. Previously we published a workflow named Microorganism Classification and Identification (MiCId v.12.26.2017) that was shown to perform no worse than other workflows. This manuscript presents MiCId v.12.13.2018 that, in comparison with the earlier version v.12.26.2017, allows for biomass estimates, provides more accurate microorganism identifications (better controls the number of false positives), and is robust against database size increase. This significant advance is made possible by several new ingredients introduced: first, we apply a modified expectation-maximization method to compute for each taxon considered a prior probability, which can be used for biomass estimate; second, we introduce a new concept called ownership, through which the participation ratio is computed and use it as the number of taxa to be kept within a cluster of closely related taxa; third, based on confidently identified peptides, we calculate for each taxon its degree of independence from the rest of taxa considered to determine whether or not to split this taxon off the cluster. Using 270 data files, each containing a large number of MS/MS spectra, we show that, in comparison with v.12.26.2017, version v.12.13.2018 yields superior retrieval results. We also show that MiCId v.12.13.2018 can estimate species biomass reasonably well. The new MiCId v.12.13.2018, designed to run in Linux environment, is freely available for download at https://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads.html.","hji,kes",0,0,0,2,0,NA,NA +32881579,Correction to Geschwind et al. (2020).,"Reports an error in """"Positivity pays off: Clients' perspectives on positive compared with traditional cognitive behavioral therapy for depression"""" by Nicole Geschwind, Emke Bosgraaf, Fredrike Bannink and Frenk Peeters (Psychotherapy, Advanced Online Publication, Feb 20, 2020, np). In the article http://dx.doi.org/10.1037/pst0000288), the second to last sentence does not appear correctly and should appear instead as follows: The conclusion emerging from this study is that exploring better moments and building positivity efficiently counters depressive symptoms and builds well-being. (The following abstract of the original article appeared in record 2020-12346-001.) In this qualitative study, we explored the experiences of clients receiving cognitive behavioral therapy (CBT) for major depressive disorder. All participants received 8 sessions of traditional CBT (based on Beck, Rush, Shaw, & Emergy, 1979) and 8 sessions of positive CBT (order counterbalanced). The aim of the study was to examine clients' experience of positive CBT and to contrast this with their experience of traditional CBT. Positive CBT structurally and selectively focuses on better moments (exceptions to the problem as opposed to the problem), strengths, and positive emotions and integrates traditional CBT with solution-focused brief therapy and positive psychology. In addition to conducting interviews with 12 individuals, the second author attended all therapy sessions of 4 clients and observed biweekly supervision sessions as further methods of data collection. Qualitative analysis showed that, despite initial skepticism, clients preferred positive CBT and indicated experiencing a steeper learning curve during positive, compared with traditional, CBT for depression. The popularity of positive CBT was attributable to 4 influences: feeling good and empowered, benefitting from upward spiral effects of positive emotions, learning to appreciate baby steps, and (re)discovering optimism as a personal strength. Qualitative analysis showed that, despite better moments and building positivity efficiently counters depressive symptoms and builds well-being. Clients perceived positive CBT's upbeat tone as stimulating and as motivating for change. (PsycInfo Database Record (c) 2020 APA, all rights reserved).","hji,kes",0,0,0,2,0,NA,NA +32912508,"Rationale and Design of a Randomized, Double-Blind Trial Evaluating the Efficacy of Tranexamic Acid on Hematoma Expansion and Peri-hematomal Edema in Patients with Spontaneous Intracerebral Hemorrhage within 4.5 h after Symptom Onset: The THE-ICH Trial Protocol.","

Background

Hematoma expansion (HE) and peri-hematomal edema (PHE) are associated with adverse outcomes of patients with acute spontaneous intracerebral hemorrhage (sICH). Due to a lack of proven treatments, it is critical to explore novel treatments for HE and PHE to improve functional recovery after sICH.

Methods

This is a prospective, multicenter, placebo-controlled, double-blind, and randomized clinical study of approximately 2400 patients with sICH. Patients within 4.5h of sICH onset that fulfilling the clinical criteria for diagnosis (e.g. age more than 18 years old, the Glasgow Coma Scal>7, and no planned surgery) will randomly receive either intravenous tranexamic acid (TXA) 1 g 10-min bolus followed by 1g eight-hour infusion or placebo (sodium chloride 0.9%). Clinical data including the ICH score and the Glasgow Coma Scale score will be collected on admission. After assessment of HE and PHE expansion, follow-up will be conducted with enrolled patients for 90 days.

Results

Primary outcome metrics are HE (defined as either >33% or >6ml increase from baseline) and PHE expansion rate at 24 3h and 72 3h post-sICH. Secondary outcome metrics include mortality and the modified Rankin Scale on day 90 after sICH. Appropriate statistic methods will be used to evaluate the efficacy of TXA on patients with sICH within 4.5h of symptom onset.

Conclusions

HE usually occurs within the first few hours after onset of symptoms. It is essential to evaluate the efficacy of TXA on HE within a narrow window of time. This will be the first trial to evaluate the efficacy of TXA on HE and PHE expansion in sICH patients within 4.5h after symptom onset. This trial is registered as ChiCTR1900027065 at http://www.chictr.org.cn.","hji,kes",0,0,0,2,0,NA,NA +32964354,A novel online calculator predicting short-term postoperative outcomes in patients with metastatic brain tumors.,"

Purpose

Establishing predictors of hospital length of stay (LOS), discharge deposition, and total hospital charges is essential to providing high-quality, value-based care. Though previous research has investigated these outcomes for patients with metastatic brain tumors, there are currently no tools that synthesize such research findings and allow for prediction of these outcomes on a patient-by-patient basis. The present study sought to develop a prediction calculator that uses patient demographic and clinical information to predict extended hospital length of stay, non-routine discharge disposition, and high total hospital charges for patients with metastatic brain tumors.

Methods

Patients undergoing surgery for metastatic brain tumors at a single academic institution were analyzed (2017-2019). Multivariate logistic regression was used to identify independent predictors of extended LOS (> 7days), non-routine discharge, and high total hospital charges (> $46,082.63). p < 0.05 was considered statistically significant. C-statistics and the Hosmer-Lemeshow test were used to assess model discrimination and calibration, respectively.

Results

A total of 235 patients were included in our analysis, with a mean age of 62.74years. The majority of patients were female (52.3%) and Caucasian (76.6%). Our models predicting extended LOS, non-routine discharge, and high hospital charges had optimism-corrected c-statistics > 0.7, and all three models demonstrated adequate calibration (p > 0.05). The final models are available as an online calculator ( https://neurooncsurgery.shinyapps.io/brain_mets_calculator/ ).

Conclusions

Our models predicting postoperative outcomes allow for individualized risk-estimation for patients following surgery for metastatic brain tumors. Our results may be useful in helping clinicians to provide resource-conscious, high-value care.","hji,kes",0,0,0,2,0,NA,NA +32978618,Deep learning based prediction of reversible HAT/HDAC-specific lysine acetylation.,"Protein lysine acetylation regulation is an important molecular mechanism for regulating cellular processes and plays critical physiological and pathological roles in cancers and diseases. Although massive acetylation sites have been identified through experimental identification and high-throughput proteomics techniques, their enzyme-specific regulation remains largely unknown. Here, we developed the deep learning-based protein lysine acetylation modification prediction (Deep-PLA) software for histone acetyltransferase (HAT)/histone deacetylase (HDAC)-specific acetylation prediction based on deep learning. Experimentally identified substrates and sites of several HATs and HDACs were curated from the literature to generate enzyme-specific data sets. We integrated various protein sequence features with deep neural network and optimized the hyperparameters with particle swarm optimization, which achieved satisfactory performance. Through comparisons based on cross-validations and testing data sets, the model outperformed previous studies. Meanwhile, we found that protein-protein interactions could enrich enzyme-specific acetylation regulatory relations and visualized this information in the Deep-PLA web server. Furthermore, a cross-cancer analysis of acetylation-associated mutations revealed that acetylation regulation was intensively disrupted by mutations in cancers and heavily implicated in the regulation of cancer signaling. These prediction and analysis results might provide helpful information to reveal the regulatory mechanism of protein acetylation in various biological processes to promote the research on prognosis and treatment of cancers. Therefore, the Deep-PLA predictor and protein acetylation interaction networks could provide helpful information for studying the regulation of protein acetylation. The web server of Deep-PLA could be accessed at http://deeppla.cancerbio.info.","hji,kes",0,0,0,2,0,NA,NA +32986461,Correction to Bagby et al. (2020).,"Reports an error in """"Examining the """"traditional background hypothesis"""" for the MMPI-2-RF L-r scores in a Muslim faith-based sample"""" by R. Michael Bagby, Karin A. Onno, Ardeshir Mortezaei and Martin Sellbom (Psychological Assessment, Advanced Online Publication, Jul 27, 2020, np). In the article """"Examining the 'Traditional Background Hypothesis' for the MMPI-2-RF L-r Scores in a Muslim Faith-Based Sample,"""" by R. Michael Bagby, Karin A. Onno, Ardeshir Mortezaei, and Martin Sellbom (Psychological Assessment, 2020, Vol. 32, No. 10, pp. 991-995, http://dx.doi.org/ 10.1037/pas0000941), the word """"not"""" was missing in the abstract from the text """"(b) direct assessments of strength of faith or positive impression management were included or measured independently."""" The correct sentence should have read as follows: """"(b) direct assessments of strength of faith or positive impression management were not included or measured independently."""" All versions of this article have been corrected. (The following abstract of the original article appeared in record 2020-54974-001.) The traditional background hypothesis (TBH) is a long-standing belief associated with the Minnesota Multiphasic Personality Inventory (MMPI) L scale; a validity scale, which appears on every version of the family of MMPI instruments including the soon-to-be released MMPI-3. The L scale was originally designed to assess whether test respondents presented themselves in an unrealistically favorable light. Both researchers and clinicians noted, however, that those from traditional Christian faith-based groups produced elevated L-scale scores. A recent meta-analysis supported this observation, reporting an average L-scale elevation 0.50 SD greater than the MMPI-2 normative sample compared to samples of those with presumptively strong Christian-Judeo faith. Some limitations of this meta-analysis are that (a) the samples used in it included those undergoing an evaluative assessment, which could elevate L-scale scores independent of strength of faith belief, and (b) direct assessments of strength of faith or positive impression management were included or measured independently. Our primary goal in this study was to examine the TBH addressing these limitations with a sample of those who self-identified as believers in the Muslim faith (N = 267), the examination of which expands the scope beyond those of the Christian-Judeo faith. Consistent with previous results, the mean L-r (MMPI/MMPI-2 L scale counterpart on the MMPI-2-Restructured Form) was 56.41 T. Higher L-r scale scores were associated with increasing strength in the Muslim faith, and although increasing L-r scores were primarily associated with impression management, increasing Muslim-based faith values had a nontrivial influence on L-r scores and especially in the moderate score range of this scale. (PsycInfo Database Record (c) 2020 APA, all rights reserved).","hji,kes",0,0,0,2,0,NA,NA +32997632,Interhemispheric Functional Reorganization and its Structural Base After BCI-Guided Upper-Limb Training in Chronic Stroke.,"Brain-computer interface (BCI)-guided robot-assisted upper-limb training has been increasingly applied to stroke rehabilitation. However, the induced long-term neuroplasticity modulation still needs to be further characterized. This study investigated the functional reorganization and its structural base after BCI-guided robot-assisted training using resting-state fMRI, task-based fMRI, and diffusion tensor imaging (DTI) data. The clinical improvement and the neurological changes before, immediately after, and six months after 20-session BCI-guided robot hand training were explored in 14 chronic stroke subjects. The structural base of the induced functional reorganization and motor improvement were also investigated using DTI. Repeated measure ANOVA indicated long-term motor improvement was found (F[2, 26] = 6.367, p = 0.006). Significantly modulated functional connectivity (FC) was observed between ipsilesional motor regions (M1 and SMA) and some contralesional areas (SMA, PMd, SPL) in the seed-based analysis. Modulated FC with ipsilesional M1 was significantly correlated with motor function improvement (r = 0.6455, p = 0.0276). Besides, increased interhemispheric FC among the sensorimotor area from resting-state data and increased laterality index from task-based data together indicated the re-balance of the two hemispheres during the recovery. Multiple linear regression models suggested that both motor function improvement and the functional change between ipsilesional M1 and contralesional premotor area were significantly associated with the ipsilesional corticospinal tract integrity. The results in the current study provided solid support for stroke recovery mechanism in terms of interhemispheric interaction and its structural substrates, which could further enhance the understanding of BCI training in stroke rehabilitation. This study was registered at https://clinicaltrials.gov (NCT02323061).","hji,kes",0,0,0,2,0,NA,NA +33042605,LibMI: An Open Source Library for Efficient Histopathological Image Processing.,"

Background

Whole-slide images (WSIs) as a kind of image data are rapidly growing in the digital pathology domain. With unusual high resolution, these images make them hard to be supported by conventional tools or file formats. Thus, it obstructs data sharing and automated analysis. Here, we propose a library, LibMI, along with its open and standardized image file format. They can be used together to efficiently read, write, modify, and annotate large images.

Materials and methods

LibMI utilizes the concept of pyramid image structure and lazy propagation from a segment tree algorithm to support reading and modifying and to guarantee that both operations have linear time complexity. Further, a cache mechanism was introduced to speed up the program.

Results

LibMI is an open and efficient library for histopathological image processing. To demonstrate its functions, we applied it to several tasks including image thresholding, microscopic color correction, and storing pixel-wise information on WSIs. The result shows that libMI is particularly suitable for modifying large images. Furthermore, compared with congeneric libraries and file formats, libMI and modifiable multiscale image (MMSI) run 18.237 times faster on read-only tasks.

Conclusions

The combination of libMI library and MMSI file format enables developers to efficiently read and modify WSIs, thus can assist in pixel-wise image processing on extremely large images to promote building image processing pipeline. The library together with the data schema is freely available on GitLab: https://gitlab.com/BioAI/libMI.","hji,kes",0,0,0,2,0,NA,NA +33048108,A comprehensive comparison of residue-level methylation levels with the regression-based gene-level methylation estimations by ReGear.,"

Motivation

DNA methylation is a biological process impacting the gene functions without changing the underlying DNA sequence. The DNA methylation machinery usually attaches methyl groups to some specific cytosine residues, which modify the chromatin architectures. Such modifications in the promoter regions will inactivate some tumor-suppressor genes. DNA methylation within the coding region may significantly reduce the transcription elongation efficiency. The gene function may be tuned through some cytosines are methylated.

Methods

This study hypothesizes that the overall methylation level across a gene may have a better association with the sample labels like diseases than the methylations of individual cytosines. The gene methylation level is formulated as a regression model using the methylation levels of all the cytosines within this gene. A comprehensive evaluation of various feature selection algorithms and classification algorithms is carried out between the gene-level and residue-level methylation levels.

Results

A comprehensive evaluation was conducted to compare the gene and cytosine methylation levels for their associations with the sample labels and classification performances. The unsupervised clustering was also improved using the gene methylation levels. Some genes demonstrated statistically significant associations with the class label, even when no residue-level methylation features have statistically significant associations with the class label. So in summary, the trained gene methylation levels improved various methylome-based machine learning models. Both methodology development of regression algorithms and experimental validation of the gene-level methylation biomarkers are worth of further investigations in the future studies. The source code, example data files and manual are available at http://www.healthinformaticslab.org/supp/.","hji,kes",0,0,0,2,0,NA,NA +33058887,How the CORAL software can be used to select compounds for efficient treatment of neurodegenerative diseases?,"Recommendations on the efficient application of CORAL software (http://www.insilico.eu/coral) to establish quantitative structure-property/activity relationships (QSPRs/QSARs) are provided. The predictive potential of the approach has been demonstrated for QSAR models developed for inhibitor concentrations (negative decimal logarithm of IC50) of derivatives of N-methyl-d-aspartate (NMDA) receptor, leucine-rich repeat kinase 2 (LRRK2), and tropomyosin receptor kinase A (TrkA). The above three protein targets are related to various neurodegenerative diseases such as Alzheimer's and Parkinson's. Each model was checked using several splits of the data for the training and the validation sets. The index of ideality of correlation (IIC) represents a tool to improve the predictive potential for an arbitrary model. However, the use of the IIC should be carried out according to rules, described in this work.","hji,kes",0,0,0,2,0,NA,NA +33064603,Exploring Sentence Diversity at the Boundary of Typical and Impaired Language Abilities.,"Purpose This review article summarizes programmatic research on sentence diversity in toddlers developing language typically and explores developmental patterns of sentence diversity in toddlers at risk for specific language impairment. Method The first half of this review article presents a sentence-focused approach to language assessment and intervention and reviews findings from empirical studies of sentence diversity. In the second half, subject and verb diversity in three simple sentence types are explored in an archival database of toddlers with varying levels of grammatical outcomes at 36 months of age: low average, mild/moderate delay, and severe delay. Results Descriptive findings from the archival database replicated previous developmental patterns. All toddlers with low-average language abilities produced diverse simple sentences by 30 months of age and exhibited greater sentence diversity with first-person I-subjects before third-person subjects. Third-person subject diversity emerged in a developmental sequence, increasing in one-argument copula contexts and one-argument subject-verb sentences before two-argument subject-verb-object sentences. This developmental pattern held across all three outcome groups. Third-person subjects were least diverse for children with severe grammatical delays and were absent in all sentence contexts for two children with severe delays at 36 months. Conclusions Sentence diversity increases gradually and expands in predictable patterns. Understanding these developmental patterns may help identify and treat children who display unexpected difficulty combining different subjects and verbs in flexible ways. Supplemental Material and Presentation Video https://doi.org/10.23641/asha.12915320.","hji,kes",0,0,0,2,0,NA,NA +33130899,Functional analysis of low-grade glioma genetic variants predicts key target genes and transcription factors.,"

Background

Large-scale genome-wide association studies (GWAS) have implicated thousands of germline genetic variants in modulating individuals' risk to various diseases, including cancer. At least 25 risk loci have been identified for low-grade gliomas (LGGs), but their molecular functions remain largely unknown.

Methods

We hypothesized that GWAS loci contain causal single nucleotide polymorphisms (SNPs) that reside in accessible open chromatin regions and modulate the expression of target genes by perturbing the binding affinity of transcription factors (TFs). We performed an integrative analysis of genomic and epigenomic data from The Cancer Genome Atlas and other public repositories to identify candidate causal SNPs within linkage disequilibrium blocks of LGG GWAS loci. We assessed their potential regulatory role via in silico TF binding sequence perturbations, convolutional neural network trained on TF binding data, and simulated annealing-based interpretation methods.

Results

We built an interactive website (http://education.knoweng.org/alg3/) summarizing the functional footprinting of 280 variants in 25 LGG GWAS regions, providing rich information for further computational and experimental scrutiny. We identified as case studies PHLDB1 and SLC25A26 as candidate target genes of rs12803321 and rs11706832, respectively, and predicted the GWAS variant rs648044 to be the causal SNP modulating ZBTB16, a known tumor suppressor in multiple cancers. We showed that rs648044 likely perturbed the binding affinity of the TF MAFF, as supported by RNA interference and in vitro MAFF binding experiments.

Conclusions

The identified candidate (causal SNP, target gene, TF) triplets and the accompanying resource will help accelerate our understanding of the molecular mechanisms underlying genetic risk factors for gliomas.","hji,kes",0,0,0,2,0,NA,NA +33140980,Determinants and impact of physical impairment in patient-reported outcomes among older patients with type 2 diabetes mellitus in Japan.,"

Objective

To investigate the predictive factors associated with physical impairment among older patients with type 2 diabetes mellitus (T2DM) in Japan and to examine the potential impact of physical impairment on patient-reported health outcomes in this population.

Methods

A cross-sectional analysis was conducted using patient-reported data from the 2012-2014 Japan National Health and Wellness Survey. Physical impairment was measured using the Physical Component Summary (PCS) score of the Short-Form 36-Item Health Survey (SF-36) three-component model (using Japanese norms). Older T2DM patients (=65 years old; n = 1511) were dichotomized into physically impaired (PCS = 25th percentile; n = 378) and non-physically impaired (PCS > 25th percentile; n = 1133). Work productivity (absenteeism, presenteeism and overall work impairment), activity impairment and healthcare resource utilization were compared between these groups.

Results

Age, female sex, low and high body mass index (BMI), diabetes-related complications, cardiovascular events, unawareness of having hypoglycemic events in the past 3 months, and lack of regular exercise were significant factors associated with physical impairment in multivariable analysis. The physically impaired group reported significantly more regular outpatient visits (13.48 vs. 10.16, respectively, p < .001), 1% or greater absenteeism (16.7% vs. 4.1%, p = .005), greater presenteeism (27.8% vs. 12.2%, p = .001), overall work impairment (30.0% vs. 13.0%, p = .001) and overall activity impairment (39.5% vs. 17.2%, p < .001) than the non-physically-impaired group after adjusting for covariates.

Conclusions

This study identified age, BMI, diabetes-related comorbidities, history of cardiovascular events and lack of exercise as key predictors associated with physical impairment in older patients with T2DM in Japan, which predicted low work productivity as well as activity impairment. This study provides support that physical impairment in patients with T2DM may lead to low work productivity and activity impairment.Supplemental data for this article is available online at https://doi.org/10.1080/03007995.2020.1846170.","hji,kes",0,0,0,2,0,NA,NA +33186370,Individual and community level factors associated with anemia among children 6-59 months of age in Ethiopia: A further analysis of 2016 Ethiopia demographic and health survey.,"

Background

Anemia is a global public health problem; but its burden is disproportionately borne among children in the African Regions. The 2016 Ethiopia Demographic and Health Survey report showed that the prevalence of anemia among children 6-59 months of age was 57%; far exceeding the national target of 25% set for 2015. Although studies have been conducted in Ethiopia, multilevel analysis has rarely been used to identify factors associated with anemia among children. Therefore, this study aimed to identify individual and community-level factors associated with anemia among children 6-59 months of age by fitting a multilevel logistic regression model.

Methods

The data was obtained from the 2016 Ethiopia Demographic and Health Survey, conducted from January to June 2016, and downloaded from the website http://www.DHSprogram.com. The sample was taken using two-stage stratified sampling. In stage one, 645 Enumeration Areas and in stage two 28 households per Enumeration Area were selected. A sample of 7790 children 6-59 months of age was included. Data were analyzed using STATA version 14. A multilevel logistic regression model was fitted and an adjusted odds ratio with a 95% confidence interval was obtained.

Result

From the individual-level factors, anemia was associated most strongly with child age, wealth index, maternal anemia and child stunting followed by child underweight, child fever and birth order whereas from the community-level, the strongest odds of anemia occurred among children from Somali, Harari, Dire Dawa and Afar region followed by Oromia and Addis Ababa. Low community-poverty is a protective factor for anemia. The odds of anemia were 0.81 (95% CI: 0.66, 0.99) times lower for children who were living in communities of lower poverty status than children who were living in communities of higher poverty status. Children from Somali and Dire Dawa had 3.38 (95% CI: 3.25, 5.07) and 2.22 (95% CI: 1.42, 3.48) times higher odds of anemia, respectively than children from the Tigray region.

Conclusions

This study shows that anemia among children 6-59 months of age is affected both by the individual and community level factors. It is better to strengthen the strategies of early detection and management of stunted and underweight children. At the same time, interventions should be strengthened to address maternal anemia, child fever and poverty, specifically targeting regions identified to have a high risk of anemia.","hji,kes",0,0,0,2,0,NA,NA +33190499,LigMate: A Multifeature Integration Algorithm for Ligand-Similarity-Based Virtual Screening.,"Ligand-similarity-based virtual screening is one of the most applicable computer-aided drug design techniques. The current methodology relies heavily on several descriptors of molecular features, including atoms (zero-dimensional, 0D), the presence or absence of structural features (one-dimensional, 1D), topological descriptors (two-dimensional, 2D), geometry and volume (three-dimensional, 3D), or stereoelectronic and stereodynamic properties (four-dimensional, 4D). These descriptors have been frequently used in virtual screening; however, they are usually used independently without integration, which may hinder effective and precise virtual screening. In this study, we developed a multifeature integration algorithm named LigMate, which employs a Hungarian algorithm-based matching and a machine learning-based nonlinear combination of various descriptors, including the new relevant descriptors focusing on the maximum common substructures (maximum common substructure score, MCSS), the relative distance of atoms from the ligand mass center (intraligand distance score, ILDS), as well as the ring differences (ring score, RS). In the benchmark tests, LigMate achieved an overall enrichment factor of the first percent (EF1) of 36.14 and an area under the curve (AUC) value of 0.81 on the DUD-E data set, as well as an EF1 of 15.44 and an AUC of 0.69 on the maximum unbiased validation (MUV) data set, outperforming the control methods that are based on single descriptors. Thus, our study provides a new framework for multiple feature integration, which can benefit ligand-similarity-based virtual screening. LigMate is freely available for noncommercial users at http://cao.labshare.cn/ligmate/.","hji,kes",0,0,0,2,0,NA,NA +33201237,AlgPred 2.0: an improved method for predicting allergenic proteins and mapping of IgE epitopes.,"AlgPred 2.0 is a web server developed for predicting allergenic proteins and allergenic regions in a protein. It is an updated version of AlgPred developed in 2006. The dataset used for training, testing and validation consists of 10075 allergens and 10075 non-allergens. In addition, 10451 experimentally validated immunoglobulin E (IgE) epitopes were used to identify antigenic regions in a protein. All models were trained on 80% of data called training dataset, and the performance of models was evaluated using 5-fold cross-validation technique. The performance of the final model trained on the training dataset was evaluated on 20% of data called validation dataset; no two proteins in any two sets have more than 40% similarity. First, a Basic Local Alignment Search Tool (BLAST) search has been performed against the dataset, and allergens were predicted based on the level of similarity with known allergens. Second, IgE epitopes obtained from the IEDB database were searched in the dataset to predict allergens based on their presence in a protein. Third, motif-based approaches like multiple EM for motif elicitation/motif alignment and search tool have been used to predict allergens. Fourth, allergen prediction models have been developed using a wide range of machine learning techniques. Finally, the ensemble approach has been used for predicting allergenic protein by combining prediction scores of different approaches. Our best model achieved maximum performance in terms of area under receiver operating characteristic curve 0.98 with Matthew's correlation coefficient 0.85 on the validation dataset. A web server AlgPred 2.0 has been developed that allows the prediction of allergens, mapping of IgE epitope, motif search and BLAST search (https://webs.iiitd.edu.in/raghava/algpred2/).","hji,kes",0,0,0,2,0,NA,"tool only , I think" +33244798,Comparing the motivational value of rewards and losses in an EEG-pupillometry study.,"We found earlier that performance-contingent rewards lead to faster performance than equivalent losses [Carsten, Hoofs, Boehler, & Krebs, 2019. Motivation Science, 5(3). http://dx.doi.org/10.1037/mot0000117]. Here, we further tested the hypothesis that motivation to gain rewards is higher than to avoid losses, even when incentive values are matched. As implicit markers of motivation, we assessed electroencephalography (EEG) focusing on the P3 after target and feedback onset, and the Feedback-Related Negativity (FRN), as well as simultaneously recorded pupil size. Comparing only reward and loss prospect trials in Experiment 1, we found no consistent differences in behavior and electrophysiological markers of motivation, although pupil data suggested higher arousal after feedback in potential-loss trials. Including additional no-incentive trials in Experiment 2, we found consistent evidence that motivation to gain rewards was higher than to avoid losses: In line with behavior, the target-P3 was most pronounced for reward-related stimuli, followed by loss and no-incentive ones. This same ranking was found in the P3 and the FRN after positive outcomes (i.e., reward, avoided loss, and correct feedback in no-incentive trials). Negative outcomes featured a different pattern in line with the pupil response, which suggests that losses are emotionally salient events, without invigorating behavior proportionally. In sum, these findings suggest that the motivation to gain rewards is more pronounced than motivation to avoid equivalent losses, at least in tasks promoting transient increases in attention triggered by incentive prospect. These motivational differences may arise as avoided losses are not profitable in the long term, in contrast to gained rewards.","hji,kes",0,0,0,2,0,NA,NA +33245691,NonClasGP-Pred: robust and efficient prediction of non-classically secreted proteins by integrating subset-specific optimal models of imbalanced data.,"Non-classically secreted proteins (NCSPs) are proteins that are located in the extracellular environment, although there is a lack of known signal peptides or secretion motifs. They usually perform different biological functions in intracellular and extracellular environments, and several of their biological functions are linked to bacterial virulence and cell defence. Accurate protein localization is essential for all living organisms, however, the performance of existing methods developed for NCSP identification has been unsatisfactory and in particular suffer from data deficiency and possible overfitting problems. Further improvement is desirable, especially to address the lack of informative features and mining subset-specific features in imbalanced datasets. In the present study, a new computational predictor was developed for NCSP prediction of gram-positive bacteria. First, to address the possible prediction bias caused by the data imbalance problem, ten balanced subdatasets were generated for ensemble model construction. Then, the F-score algorithm combined with sequential forward search was used to strengthen the feature representation ability for each of the training subdatasets. Third, the subset-specific optimal feature combination process was adopted to characterize the original data from different aspects, and all subdataset-based models were integrated into a unified model, NonClasGP-Pred, which achieved an excellent performance with an accuracy of 93.23 %, a sensitivity of 100 %, a specificity of 89.01 %, a Matthew's correlation coefficient of 87.68 % and an area under the curve value of 0.9975 for ten-fold cross-validation. Based on assessment on the independent test dataset, the proposed model outperformed state-of-the-art available toolkits. For availability and implementation, see: http://lab.malab.cn/~wangchao/softwares/NonClasGP/.","hji,kes",0,0,0,2,0,NA,NA +33258916,MOSGA: Modular Open-Source Genome Annotator.,"

Motivation

The generation of high-quality assemblies, even for large eukaryotic genomes, has become a routine task for many biologists thanks to recent advances in sequencing technologies. However, the annotation of these assemblies-a crucial step toward unlocking the biology of the organism of interest-has remained a complex challenge that often requires advanced bioinformatics expertise.

Results

Here, we present MOSGA (Modular Open-Source Genome Annotator), a genome annotation framework for eukaryotic genomes with a user-friendly web-interface that generates and integrates annotations from various tools. The aggregated results can be analyzed with a fully integrated genome browser and are provided in a format ready for submission to NCBI. MOSGA is built on a portable, customizable and easily extendible Snakemake backend, and thus, can be tailored to a wide range of users and projects.

Availability and implementation

We provide MOSGA as a web service at https://mosga.mathematik.uni-marburg.de and as a docker container at registry.gitlab.com/mosga/mosga: latest. Source code can be found at https://gitlab.com/mosga/mosga.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +33295759,Skin Doctor CP: Conformal Prediction of the Skin Sensitization Potential of Small Organic Molecules.,"Skin sensitization potential or potency is an important end point in the safety assessment of new chemicals and new chemical mixtures. Formerly, animal experiments such as the local lymph node assay (LLNA) were the main form of assessment. Today, however, the focus lies on the development of nonanimal testing approaches (i.e., in vitro and in chemico assays) and computational models. In this work, we investigate, based on publicly available LLNA data, the ability of aggregated, Mondrian conformal prediction classifiers to differentiate between non- sensitizing and sensitizing compounds as well as between two levels of skin sensitization potential (weak to moderate sensitizers, and strong to extreme sensitizers). The advantage of the conformal prediction framework over other modeling approaches is that it assigns compounds to activity classes only if a defined minimum level of confidence is reached for the individual predictions. This eliminates the need for applicability domain criteria that often are arbitrary in their nature and less flexible. Our new binary classifier, named Skin Doctor CP, differentiates nonsensitizers from sensitizers with a higher reliability-to-efficiency ratio than the corresponding nonconformal prediction workflow that we presented earlier. When tested on a set of 257 compounds at the significance levels of 0.10 and 0.30, the model reached an efficiency of 0.49 and 0.92, and an accuracy of 0.83 and 0.75, respectively. In addition, we developed a ternary classification workflow to differentiate nonsensitizers, weak to moderate sensitizers, and strong to extreme sensitizers. Although this model achieved satisfactory overall performance (accuracies of 0.90 and 0.73, and efficiencies of 0.42 and 0.90, at significance levels 0.10 and 0.30, respectively), it did not obtain satisfying class-wise results (at a significance level of 0.30, the validities obtained for nonsensitizers, weak to moderate sensitizers, and strong to extreme sensitizers were 0.70, 0.58, and 0.63, respectively). We argue that the model is, in consequence, unable to reliably identify strong to extreme sensitizers and suggest that other ternary models derived from the currently accessible LLNA data might suffer from the same problem. Skin Doctor CP is available via a public web service at https://nerdd.zbh.uni-hamburg.de/skinDoctorII/.","hji,kes",0,0,0,2,0,NA,NA +33295914,iDHS-DASTS: identifying DNase I hypersensitive sites based on LASSO and stacking learning.,"The DNase I hypersensitivity site is an important marker of the DNA regulatory region, and its identification in the DNA sequence is of great significance for biomedical research. However, traditional identification methods are extremely time-consuming and can not obtain an accurate result. In this paper, we proposed a predictor called iDHS-DASTS to identify the DHS based on benchmark datasets. First, we adopt a feature extraction method called PseDNC which can incorporate the original DNA properties and spatial information of the DNA sequence. Then we use a method called LASSO to reduce the dimensions of the original data. Finally, we utilize stacking learning as a classifier, which includes Adaboost, random forest, gradient boosting, extra trees and SVM. Before we train the classifier, we use SMOTE-Tomek to overcome the imbalance of the datasets. In the experiment, our iDHS-DASTS achieves remarkable performance on three benchmark datasets. We achieve state-of-the-art results with over 92.06%, 91.06% and 90.72% accuracy for datasets [Doublestruck S]1, [Doublestruck S]2 and [Doublestruck S]3, respectively. To verify the validation and transferability of our model, we establish another independent dataset [Doublestruck S]4, for which the accuracy can reach 90.31%. Furthermore, we used the proposed model to construct a user friendly web server called iDHS-DASTS, which is available at http://www.xdu-duan.cn/.","hji,kes",0,0,0,2,0,NA,NA +33355345,Congress of Neurological Surgeons Systematic Review and Evidence-Based Guideline on Neuroablative Procedures for Patients With Cancer Pain.,"

Background

Managing cancer pain once it is refractory to conventional treatment continues to challenge caregivers committed to serving those who are suffering from a malignancy. Although neuromodulation has a role in the treatment of cancer pain for some patients, these therapies may not be suitable for all patients. Therefore, neuroablative procedures, which were once a mainstay in treating intractable cancer pain, are again on the rise. This guideline serves as a systematic review of the literature of the outcomes following neuroablative procedures.

Objective

To establish clinical practice guidelines for the use of neuroablative procedures to treat patients with cancer pain.

Methods

A systematic review of neuroablative procedures used to treat patients with cancer pain from 1980 to April 2019 was performed using the United States National Library of Medicine PubMed database, EMBASE, and Cochrane CENTRAL. After inclusion criteria were established, full text articles that met the inclusion criteria were reviewed by 2 members of the task force and the quality of the evidence was graded.

Results

In total, 14 646 relevant abstracts were identified by the literature search, from which 189 met initial screening criteria. After full text review, 58 of the 189 articles were included and subdivided into 4 different clinical scenarios. These include unilateral somatic nociceptive/neuropathic body cancer pain, craniofacial cancer pain, midline subdiaphragmatic visceral cancer pain, and disseminated cancer pain. Class II and III evidence was available for these 4 clinical scenarios. Level III recommendations were developed for the use of neuroablative procedures to treat patients with cancer pain.

Conclusion

Neuroablative procedures may be an option for treating patients with refractory cancer pain. Serious adverse events were reported in some studies, but were relatively uncommon. Improved imaging, refinements in technique and the availability of new lesioning modalities may minimize the risks of neuroablation even further.The full guidelines can be accessed at https://www.cns.org/guidelines/browse-guidelines-detail/guidelines-on-neuroablative-procedures-patients-wi.","hji,kes",0,0,0,2,0,NA,NA +33367514,BiCoN: Network-constrained biclustering of patients and omics data.,"

Motivation

Unsupervised learning approaches are frequently employed to stratify patients into clinically relevant subgroups and to identify biomarkers such as disease-associated genes. However, clustering and biclustering techniques are oblivious to the functional relationship of genes and are thus not ideally suited to pinpoint molecular mechanisms along with patient subgroups.

Results

We developed the network-constrained biclustering approach BiCoN (Biclustering Constrained by Networks) which (i) restricts biclusters to functionally related genes connected in molecular interaction networks and (ii) maximizes the difference in gene expression between two subgroups of patients. This allows BiCoN to simultaneously pinpoint molecular mechanisms responsible for the patient grouping. Network-constrained clustering of genes makes BiCoN more robust to noise and batch effects than typical clustering and biclustering methods. BiCoN can faithfully reproduce known disease subtypes as well as novel, clinically relevant patient subgroups, as we could demonstrate using breast and lung cancer datasets. In summary, BiCoN is a novel systems medicine tool that combines several heuristic optimization strategies for robust disease mechanism extraction. BiCoN is well-documented and freely available as a python package or a web interface.

Availability and implementation

PyPI package: https://pypi.org/project/bicon.

Web interface

https://exbio.wzw.tum.de/bicon.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +33381851,Dementia key gene identification with multi-layered SNP-gene-disease network.,"

Motivation

Recently, various approaches for diagnosing and treating dementia have received significant attention, especially in identifying key genes that are crucial for dementia. If the mutations of such key genes could be tracked, it would be possible to predict the time of onset of dementia and significantly aid in developing drugs to treat dementia. However, gene finding involves tremendous cost, time and effort. To alleviate these problems, research on utilizing computational biology to decrease the search space of candidate genes is actively conducted.In this study, we propose a framework in which diseases, genes and single-nucleotide polymorphisms are represented by a layered network, and key genes are predicted by a machine learning algorithm. The algorithm utilizes a network-based semi-supervised learning model that can be applied to layered data structures.

Results

The proposed method was applied to a dataset extracted from public databases related to diseases and genes with data collected from 186 patients. A portion of key genes obtained using the proposed method was verified in silico through PubMed literature, and the remaining genes were left as possible candidate genes.

Availability and implementation

The code for the framework will be available at http://www.alphaminers.net/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +33402389,Hypoxia-Induced Suppression of Alternative Splicing of MBD2 Promotes Breast Cancer Metastasis via Activation of FZD1.,"Metastasis is responsible for the majority of breast cancer-related deaths, however, the mechanisms underlying metastasis in this disease remain largely elusive. Here we report that under hypoxic conditions, alternative splicing of MBD2 is suppressed, favoring the production of MBD2a, which facilitates breast cancer metastasis. Specifically, MBD2a promoted, whereas its lesser known short form MBD2c suppressed metastasis. Activation of HIF1 under hypoxia facilitated MBD2a production via repression of SRSF2-mediated alternative splicing. As a result, elevated MBD2a outcompeted MBD2c for binding to promoter CpG islands to activate expression of FZD1, thereby promoting epithelial-to-mesenchymal transition and metastasis. Strikingly, clinical data reveal significantly correlated expression of MBD2a and MBD2c with the invasiveness of malignancy, indicating opposing roles for MBD2 splicing variants in regulating human breast cancer metastasis. Collectively, our findings establish a novel link between MBD2 switching and tumor metastasis and provide a promising therapeutic strategy and predictive biomarkers for hypoxia-driven breast cancer metastasis. SIGNIFICANCE: This study defines the opposing roles and clinical relevance of MBD2a and MBD2c, two MBD2 alternative splicing products, in hypoxia-driven breast cancer metastasis. GRAPHICAL ABSTRACT: http://cancerres.aacrjournals.org/content/canres/81/5/1265/F1.large.jpg.","hji,kes",0,0,0,2,0,NA,NA +33457124,GAPDH and PUM1: Optimal Housekeeping Genes for Quantitative Polymerase Chain Reaction-Based Analysis of Cancer Stem Cells and Epithelial-Mesenchymal Transition Gene Expression in Rectal Tumors.,"Background The overwhelming majority of published articles have taken colon and rectal cancer as a single group, i.e., colorectal cancer, when normalizing gene expression data with housekeeping genes (HKG) in quantitative polymerase chain reaction (qPCR) experiments though there are published reports that suggest the differential expression pattern of genes between the colon and rectal cancer groups and hence the current experiment wasattempted to find out the optimal set ofhousekeeping genes from the list of common HKG for rectal tumor gene expression analysis. Methods The expression of fivepotential housekeeping genes GAPDH, RPNI, PUM1, B2M, and PMM1was analyzed through qPCR and Bestkeeper software (http://www.wzw.tum.de/gene-quantification/bestkeeper.html) in 20 stage II-IV rectal cancer samplesto check for uniformity in their expression pattern. Cancer stem cell (CSC) marker ALDH1 and epithelial-mesenchymal transition marker (EMT) markers E cadherin, vimentin, Twist,and SNAI2 expression were evaluated in conjunction with the two optimal reference genes in10 rectal cancers as part of validation. Results The standard deviation of the cycle threshold value of GAPDH was found the lowest at 0.65 followed by RPN1 at 0.88, PUM1 at 0.94, PMM1 at 0.94, and B2M at 1.21 when analyzed with BestKeeper software. Using GAPDH and PUM1 as the reference gene for the validation phase, rectal cancer patients with stageIII/IV showed a 4.79-fold change (P=0.006) in ALDH1 expression, and an 11.76-fold change in Twist expression (P=0.003) with respect to stage II rectal tumor when normalized with GAPDH and PUM1. Conclusion GAPDH and PUM1 can be used as an optimal set of housekeeping genes for gene expression-related experiments in rectal tumors. ALDH1 and Twist were found significantly overexpressed in stage III/IV rectal tumors in comparison to stage II rectal cancer. Genes associated withcancer stem cells and EMT markers could be optimally analyzed by normalizing them with GAPDH and PUM1 as housekeeping genes.","hji,kes",0,0,0,2,0,NA,NA +33483306,Locus of Heat Resistance (LHR) in Meat-Borne Escherichia coli: Screening and Genetic Characterization.,"Microbial resistance to processing treatments poses a food safety concern, as treatment tolerant pathogens can emerge. Occasional foodborne outbreaks caused by pathogenic Escherichia coli have led to human and economic losses. Therefore, this study screened for the extreme heat resistance (XHR) phenotype as well as one known genetic marker, the locus of heat resistance (LHR), in 4,123 E. coli isolates from diverse meat animals at different processing stages. The prevalences of XHR and LHR among the meat-borne E. coli were found to be 10.3% and 11.4%, respectively, with 19% agreement between the two. Finished meat products showed the highest LHR prevalence (24.3%) compared to other processing stages (0 to 0.6%). None of the LHR+ E. coli in this study would be considered pathogens based on screening for virulence genes. Four high-quality genomes were generated by whole-genome sequencing of representative LHR+ isolates. Nine horizontally acquired LHRs were identified and characterized, four plasmid-borne and five chromosomal. Nine newly identified LHRs belong to ClpK1 LHR or ClpK2 LHR variants sharing 61 to 68% nucleotide sequence identity, while one LHR appears to be a hybrid. Our observations suggest positive correlation between the number of LHR regions present in isolates and the extent of heat resistance. The isolate exhibiting the highest degree of heat resistance possessed four LHRs belonging to three different variant groups. Maintenance of as many as four LHRs in a single genome emphasizes the benefits of the LHR in bacterial physiology and stress response.IMPORTANCE Currently, a """"multiple-hurdle"""" approach based on a combination of different antimicrobial interventions, including heat, is being utilized during meat processing to control the burden of spoilage and pathogenic bacteria. Our recent study (M. Guragain, G. E. Smith, D. A. King, and J. M. Bosilevac, J Food Prot 83:1438-1443, 2020, https://doi.org/10.4315/JFP-20-103) suggests that U.S. beef cattle harbor Escherichia coli that possess the locus of heat resistance (LHR). LHR seemingly contributes to the global stress tolerance in bacteria and hence poses a food safety concern. Therefore, it is important to understand the distribution of the LHRs among meat-borne bacteria identified at different stages of different meat processing systems. Complete genome sequencing and comparative analysis of selected heat-resistant bacteria provide a clearer understanding of stress and heat resistance mechanisms. Further, sequencing data may offer a platform to gain further insights into the genetic background that provides optimal bacterial tolerance against heat and other processing treatments.","hji,kes",0,0,0,2,0,NA,NA +33523611,Developing an evidence-based online method of linking behaviour change techniques and theoretical mechanisms of action: a multiple methods study,"

Background

Many global health challenges may be targeted by changing peoples behaviour. Behaviours including cigarette smoking, physical inactivity and alcohol misuse, as well as certain dietary behaviours, contribute to deaths and disability by increasing the risk of cancers, cardiovascular diseases and diabetes. Interventions have been designed to change these health behaviours with a view to reducing these health risks. However, the effectiveness of these interventions has been quite variable and further information is needed to enhance their success. More information is needed about the specific processes that underlie the effectiveness of intervention strategies.

Aim

Researchers have developed a taxonomy of 93 behaviour change techniques (i.e. the active components of an intervention that bring about behavioural change), but little is known regarding their potential mechanisms of action (i.e. the processes through which a behaviour change technique affects behaviour). We therefore aimed to examine links between behaviour change techniques and mechanisms of action.

Method

First, we conducted a literature synthesis study of 277 behaviour change intervention studies, from which we extracted information on links, described by authors, between behaviour change techniques and mechanisms of action, and identified an average of 10 links per intervention report. Second, behaviour change experts (n = 105) were engaged in a three-round consensus study in which they discussed and rated their confidence in the presence/absence of links and non-links between commonly used behaviour change techniques (n = 61) and a set of mechanisms of action (n = 26). Ninety links and 460 non-links reached the pre-set threshold of 80% agreement. To enhance the validity of these results, a third study was conducted that triangulated the findings of the first two studies. Discrepancies and uncertainties between the studies were included in a reconciliation consensus study with a new group of experts (n = 25). The final results identified 92 definite behaviour change techniquemechanism of action links and 465 definite non-links. In a fourth study, we examined whether or not groups of behaviour change techniques used together frequently across interventions revealed shared theoretical underpinnings. We found that experts agreed on the underlying theory for three groups of behaviour change techniques.

Results

Our results are potentially useful to policy-makers and practitioners in selecting behaviour change techniques to include in behaviour change interventions. However, our data do not demonstrate that the behaviour change techniques are effective in targeting the mechanism of action; rather, the links identified may be the best bets for interventions that are effective in changing mechanisms of action, and the non-links are unlikely to be effective. Researchers examining effectiveness of interventions in either primary studies or evidence syntheses may consider these links for further investigation.

Conclusion

To make our results usable by researchers, practitioners and policy-makers, they are available in an online interactive tool, which enables discussion and collaboration (https://theoryandtechniquetool.humanbehaviourchange.org/); accessed 1 March 2020. This work, building on previous work to develop the behaviour change technique taxonomy, is part of an ongoing programme of work: the Human Behaviour Change Project (www.humanbehaviourchange.org/; accessed 1 March 2020).

Funding

This project was funded by the Medical Research Council via its Methodology Panel: Developing methodology for designing and evaluating theory-based complex interventions: an ontology for linking behaviour change techniques to theory (reference MR/L011115/1).","hji,kes",0,0,0,2,0,NA,iffy +33533177,Neuroblastoma: The basis for cure in limited-resource settings.,"

Background

Neuroblastoma (NB) contributes the most to the mortality of childhood malignancies worldwide. The disease spectrum is heterogenous and the management complex and costly, especially in advanced disease or disease with adverse biology. In low- and middle-income countries (LMICs) the majority of NB presents in advanced stages. Therefore, with limited resources and poor prognosis the treatment of NB is often not a priority. The aim of the study was to evaluate the research activities and perceptions of the management of NB that determine the research and treatment approaches in LMICs.

Methods

Data were sourced from https://www.clinicaltrials.gov/ identifying NB trials open to LMIC. Abstracts on NB research presented at the International Society for Paediatric Oncology (SIOP) Congresses between 2014 and 2020 were evaluated according to income status. An online survey evaluating medical views on NB in LMICs and the effect on the management was conducted. Descriptive analysis was done. Where appropriate categorical association between covariates was assessed using the Pearson chi-square (2 ) test or Fishers exact test.

Results

There were 15/562 (2.7%) trials open to LMIC. Only six of 138 (4.3%) LMIC participated in NB trials. Of the 688 abstracts presented between 2014 and 2020 at the SIOP International Congress on NB as primary subject, 297 (42.7%) were from LMICs. Only two were from low-income countries (LICs). Sixty-one countries responded to the NB survey. Positive views towards NB management were present when treatment was based on a national protocol, the availability of trimodal or advanced treatment options were present, and when a balance of metastatic or local disease were treated.

Conclusion

Management of NB in LMICs should include increased advocacy and research as well as implementation of national management strategies.","hji,kes",0,0,0,2,0,NA,NA +33544274,The Characterization of Sex Differences in Hypoglycemia-Induced Activation of HPA Axis on the Transcriptomic Level.,"Activation of the hypothalamic-pituitary-adrenal (HPA) axis using an insulin tolerance test (ITT) is a medical diagnostic procedure that is frequently used in humans to assess the HPA and growth-hormone (GH) axes. Whether sex differences exist in the response to ITT stress is unknown. Thus, investigations into the analysis of transcripts during activation of the HPA axis in response to hypoglycemia have revealed the underlying influences of sex in signaling pathways that stimulate the HPA axis. We assessed four time points of ITT application in Balb/c mice. After insulin injection, expression levels of 192 microRNAs and 41 mRNAs associated with the HPA, GHand hypothalamic-pituitary-gonadal (HPG) axes were determined by real-time RT-PCR in the hypothalamus, pituitary and adrenal tissues, as well as blood samples (Raw data accession: https://drive.google.com/drive/folders/10qI00NAtjxOepcNKxSJnQbJeBFa6zgHK?usp=sharing ). Although the ITT is commonly used as a gold standard for evaluating the HPA axis, we found completely different responses between males and females with respect to activation of the HPA axis. While activation of several transcripts in the hypothalamus and pituitary was observed after performing the ITT in males within 10min, females responded via the pituitary and adrenal immediately and durably over 40min. Additionally, we found that microRNA alterations precede mRNA responses in the HPA axis. Furthermore, robust changes in the levels of several transcripts including Avpr1b and Avpr2 observed at all time points strongly suggest that transcriptional control of these genes occurs mostly via differential signaling in pituitary and blood between males and females. Male and female HPA axis responses to ITT involve a number of sophisticated regulatory signaling pathways of miRNAs and mRNAs. Our results highlight the first robust markers in several layers of HPA, HPG and GH axis involved in ITT/hypoglycemia stress-induced dynamics.","hji,kes",0,0,0,2,0,NA,NA +33577004,IGHG1 functions as an oncogene in tongue squamous cell carcinoma via JAK1/STAT5 signaling.,"The article """"IGHG1 functions as an oncogene in tongue squamous cell carcinoma via JAK1/STAT5 signaling, by Y.-L. Zheng, Y.-Y. Li, J.-F. Xie, H.-Q. Ma, published in Eur Rev Med Pharmacol Sci 2020; 24 (12): 6716-6725-DOI: 10.26355/eurrev_202006_21659-PMID: 32633362"""" has been withdrawn from the authors stating that """"the experimental data in the article are wrong"""". The Publisher apologizes for any inconvenience this may cause. https://www.europeanreview.org/article/21659.","hji,kes",0,0,0,2,0,NA,NA +33587706,StructADMM: Achieving Ultrahigh Efficiency in Structured Pruning for DNNs.,"Weight pruning methods of deep neural networks (DNNs) have been demonstrated to achieve a good model pruning rate without loss of accuracy, thereby alleviating the significant computation/storage requirements of large-scale DNNs. Structured weight pruning methods have been proposed to overcome the limitation of irregular network structure and demonstrated actual GPU acceleration. However, in prior work, the pruning rate (degree of sparsity) and GPU acceleration are limited (to less than 50%) when accuracy needs to be maintained. In this work, we overcome these limitations by proposing a unified, systematic framework of structured weight pruning for DNNs. It is a framework that can be used to induce different types of structured sparsity, such as filterwise, channelwise, and shapewise sparsity, as well as nonstructured sparsity. The proposed framework incorporates stochastic gradient descent (SGD; or ADAM) with alternating direction method of multipliers (ADMM) and can be understood as a dynamic regularization method in which the regularization target is analytically updated in each iteration. Leveraging special characteristics of ADMM, we further propose a progressive, multistep weight pruning framework and a network purification and unused path removal procedure, in order to achieve higher pruning rate without accuracy loss. Without loss of accuracy on the AlexNet model, we achieve 2.58x and 3.65x average measured speedup on two GPUs, clearly outperforming the prior work. The average speedups reach 3.15x and 8.52x when allowing a moderate accuracy loss of 2%. In this case, the model compression for convolutional layers is 15.0x, corresponding to 11.93x measured CPU speedup. As another example, for the ResNet-18 model on the CIFAR-10 data set, we achieve an unprecedented 54.2x structured pruning rate on CONV layers. This is 32x higher pruning rate compared with recent work and can further translate into 7.6x inference time speedup on the Adreno 640 mobile GPU compared with the original, unpruned DNN model. We share our codes and models at the link http://bit.ly/2M0V7DO.","hji,kes",0,0,0,2,0,NA,NA +33621129,Associations between Blood Lead Levels and Coronary Artery Stenosis Measured Using Coronary Computed Tomography Angiography.,"

Background

Lead exposure is a risk factor for increased blood pressure and cardiovascular disease, even when blood lead levels (BLLs) are within the normal range.

Objective

This study aimed to investigate the association between BLL and coronary artery stenosis (CAS) in asymptomatic adults using 128-slice dual-source coronary computed tomography (CT) angiography.

Methods

We analyzed medical records data from 2,193 adults (1,461 men and 732 women) who elected to complete a screening health examination, coronary CT angiography, and BLL measurement during 2011-2018 and had no history of CAS symptoms, cardiovascular disease, or occupational exposure to lead. Logistic regression models were used to estimate associations between moderate-to-severe CAS (=25% stenosis) and a 1-g/dL increase in blood lead, with and without adjustment for age, sex, hypertension, diabetes mellitus, dyslipidemia, body mass index, regular exercise, smoking status, and alcohol drinking.

Results

BLLs ranged from 0.12 to 10.14g/dL, with an arithmetic mean of 2.711.26g/dL. The arithmetic mean was higher for men than for women (2.981.26g/dL vs. 2.181.08g/dL, p<0.001) and higher in the moderate-to-severe CAS group than in the no-CAS or <25% stenosis group (3.021.44g/dL vs. 2.671.23g/dL, p<0.001). Moderate-to-severe CAS was significantly associated with BLL before and after adjustment, with an adjusted odds ratio for a 1-g/dL increase in BLL of 1.14 (95% CI: 1.02, 1.26), p=0.017.

Conclusions

BLL was positively associated with the prevalence of moderate-to-severe CAS in Korean adults who completed an elective screening examination for early cardiovascular disease, 94% of whom had a BLL of <5g/dL. More efforts and a strict health policy are needed to further reduce BLLs in the general population. https://doi.org/10.1289/EHP7351.","hji,kes",0,0,0,2,0,NA,NA +33660829,LncRNA ASB16-AS1 promotes proliferation and inhibits apoptosis of non small cell lung cancer cells by activating the Wnt/β catenin signaling pathway.,"The article """"LncRNA ASB16-AS1 promotes proliferation and inhibits apoptosis of non small cell lung cancer cells by activating the Wnt/ catenin signaling pathway, by L.-J. Tan, J.-T. Liu, M. Yang, T. Ju, Y.-S. Zhang, published in Eur Rev Med Pharmacol Sci 2020; 24 (4): 1870-1876-DOI: 10.26355/eurrev_202002_20365-PMID: 32141556"""" has been withdrawn from the authors due to due to some inaccuracies (some data cannot be repeated by our further research). The Publisher apologizes for any inconvenience this may cause. https://www.europeanreview.org/article/20365.","hji,kes",0,0,0,2,0,NA,NA +33675341,Maternal BMI is positively associated with human milk fat: a systematic review and meta-regression analysis.,"

Background

Lack of robust estimates of human-milk nutrient composition and influential maternal factors, such as body composition, are barriers to informing nutrition policies and programs.

Objective

The objective was to understand the relation between maternal BMI and human-milk energy, fat, and/or total protein.

Methods

Four electronic databases (MEDLINE, Embase, CINAHL, and Web of Science) were searched. Outcomes assessed were human-milk energy (kcal/L), fat (g/L), and total protein (g/L) from mothers 1 to 6 mo postpartum. Studies with data on maternal BMI or weight and height that quantified human-milk energy, fat, or protein between 1 and 6 mo postpartum were eligible. Random-effects meta-regression weighted by the inverse of the study-level SE was completed for each of the 3 outcomes. The certainty of evidence for each outcome was assessed using the GRADE (Grading of Recommendations Assessment, Development, and Evaluation) approach.

Results

A total of 11,373 titles and abstracts were identified, and after full-text screening, 69 articles of 66 studies were included. Meta-regression results showed a positive association between maternal BMI and human-milk fat (: 0.56 g/L; 95% CI: 0.034, 1.1; P = 0.04; I2 = 93.7%, n = 63 datapoints). There was no significant association between maternal BMI and human-milk energy (: 3.9 kcal/L; 95% CI: -1.6, 9.5; P = 0.16, I2 = 93.3%, n = 40 datapoints) or total protein (: 0.13 g/L; 95% CI: -0.16, 0.41; P = 0.37, I2 = 99.1%, n = 40 datapoints). The certainty of evidence for human-milk energy was low and the certainty of evidence for fat and total protein was very low.

Conclusions

Meta-regression analysis of available literature suggested an association between maternal BMI and human-milk fat between 1 and 6 mo postpartum. Future studies are needed to confirm the relation between maternal BMI; variation in human-milk energy, fat, and protein content; and the implications for child growth and development. This review is registered with International Prospective Register of Systematic Reviews (PROSPERO 2018 CRD42018098808) at https://www.crd.york.ac.uk/prospero/.","hji,kes",0,0,0,2,0,NA,NA +33677518,DeepS: A web server for image optical sectioning and super resolution microscopy based on a deep learning framework.,"

Motivation

Microscopy technology plays important roles in many biological research fields. Solvent-cleared brain high-resolution (HR) 3 D image reconstruction is an important microscopy application. However, 3 D microscopy image generation is time-consuming and expensive. Therefore, we have developed a deep learning framework (DeepS) for both image optical sectioning and super resolution microscopy.

Results

Using DeepS to perform super resolution solvent-cleared mouse brain microscopy 3 D image yields improved performance in comparison with the standard image processing workflow. We have also developed a web server to allow online usage of DeepS. Users can train their own models with only one pair of training images using the transfer learning function of the web server.

Availability

http://deeps.cibr.ac.cn.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +33693506,COVID-19: disease pathways and gene expression changes predict methylprednisolone can improve outcome in severe cases.,"

Motivation

COVID-19 has several distinct clinical phases: a viral replication phase, an inflammatory phase, and in some patients, a hyper-inflammatory phase. High mortality is associated with patients developing cytokine storm syndrome. Treatment of hyper-inflammation in these patients using existing, approved therapies with proven safety profiles could address the immediate need to reduce mortality.

Results

We analyzed the changes in the gene expression, pathways and putative mechanisms induced by SARS-CoV2 in NHBE, and A549 cells, as well as COVID-19 lung vs. their respective controls. We used these changes to identify FDA approved drugs that could be repurposed to help COVID-19 patients with severe symptoms related to hyper-inflammation. We identified methylprednisolone (MP) as a potential leading therapy. The results were then confirmed in five independent validation data sets including Vero E6 cells, lung and intestinal organoids, as well as additional patient lung sample vs. their respective controls. Finally, the efficacy of MP was validated in an independent clinical study. Thirty-day all-cause mortality occurred at a significantly lower rate in the MP-treated group compared to control group (29.6% vs. 16.6%, p = 0.027). Clinical results confirmed the in silico prediction that MP could improve outcomes in severe cases of COVID-19. A low number needed to treat (NNT = 5) suggests MP may be more efficacious than dexamethasone or hydrocortisone.

Availability

iPathwayGuide is available at https://ipathwayguide.advaitabio.com/.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +33720349,"Robust, flexible, and scalable tests for Hardy-Weinberg equilibrium across diverse ancestries.","Traditional Hardy-Weinberg equilibrium (HWE) tests (the 2 test and the exact test) have long been used as a metric for evaluating genotype quality, as technical artifacts leading to incorrect genotype calls often can be identified as deviations from HWE. However, in data sets composed of individuals from diverse ancestries, HWE can be violated even without genotyping error, complicating the use of HWE testing to assess genotype data quality. In this manuscript, we present the Robust Unified Test for HWE (RUTH) to test for HWE while accounting for population structure and genotype uncertainty, and to evaluate the impact of population heterogeneity and genotype uncertainty on the standard HWE tests and alternative methods using simulated and real sequence data sets. Our results demonstrate that ignoring population structure or genotype uncertainty in HWE tests can inflate false-positive rates by many orders of magnitude. Our evaluations demonstrate different tradeoffs between false positives and statistical power across the methods, with RUTH consistently among the best across all evaluations. RUTH is implemented as a practical and scalable software tool to rapidly perform HWE tests across millions of markers and hundreds of thousands of individuals while supporting standard VCF/BCF formats. RUTH is publicly available at https://www.github.com/statgen/ruth.","hji,kes",0,0,0,2,0,NA,NA +33725111,Whole genome analysis of more than 10 000 SARS-CoV-2 virus unveils global genetic diversity and target region of NSP6.,"Whole genome analysis of SARS-CoV-2 is important to identify its genetic diversity. Moreover, accurate detection of SARS-CoV-2 is required for its correct diagnosis. To address these, first we have analysed publicly available 10 664 complete or near-complete SARS-CoV-2 genomes of 73 countries globally to find mutation points in the coding regions as substitution, deletion, insertion and single nucleotide polymorphism (SNP) globally and country wise. In this regard, multiple sequence alignment is performed in the presence of reference sequence from NCBI. Once the alignment is done, a consensus sequence is build to analyse each genomic sequence to identify the unique mutation points as substitutions, deletions, insertions and SNPs globally, thereby resulting in 7209, 11700, 119 and 53 such mutation points respectively. Second, in such categories, unique mutations for individual countries are determined with respect to other 72 countries. In case of India, unique 385, 867, 1 and 11 substitutions, deletions, insertions and SNPs are present in 566 SARS-CoV-2 genomes while 458, 1343, 8 and 52 mutation points in such categories are common with other countries. In majority (above 10%) of virus population, the most frequent and common mutation points between global excluding India and India are L37F, P323L, F506L, S507G, D614G and Q57H in NSP6, RdRp, Exon, Spike and ORF3a respectively. While for India, the other most frequent mutation points are T1198K, A97V, T315N and P13L in NSP3, RdRp, Spike and ORF8 respectively. These mutations are further visualised in protein structures and phylogenetic analysis has been done to show the diversity in virus genomes. Third, a web application is provided for searching mutation points globally and country wise. Finally, we have identified the potential conserved region as target that belongs to the coding region of ORF1ab, specifically to the NSP6 gene. Subsequently, we have provided the primers and probes using that conserved region so that it can be used for detecting SARS-CoV-2. Contact:indrajit@nitttrkol.ac.inSupplementary information: Supplementary data are available at http://www.nitttrkol.ac.in/indrajit/projects/COVID-Mutation-10K.","hji,kes",0,0,0,2,0,NA,NA +33751898,Rate of Communicative Gestures and Developmental Outcomes in Toddlers With and Without Autism Spectrum Disorder During a Home Observation.,"Purpose Most toddlers with autism spectrum disorder and other developmental delays receive early intervention at home and may not participate in a clinic-based communication evaluation. However, there is limited research that has prospectively examined communication in very young children with and without autism in a home-based setting. This study used granular observational coding to document the communicative acts performed by toddlers with autism, developmental delay, and typical development in the home environment. Method Children were selected from the archival database of the FIRST WORDS Project (N = 211). At approximately 20 months of age, each child participated in everyday activities with a caregiver during an hour-long, video-recorded, naturalistic home observation. Inventories of unique gestures, rates per minute, and proportions of types of communicative acts and communicative functions were coded and compared using a one-way analysis of variance. Concurrent and prospective relationships between rate of communication and measures of social communication, language development, and autism symptoms were examined. Results A total of 40,738 communicative acts were coded. Children with autism, developmental delay, and typical development used eight, nine, and 12 unique gestures on average, respectively. Children with autism used deictic gestures, vocalizations, and communicative acts for behavior regulation at significantly lower rates than the other groups. Statistically significant correlations were observed between rate of communication and several outcome measures. Conclusion Observation of social communication in the natural environment may improve early identification of children with autism and communication delays, complement clinic-based assessments, and provide useful information about a child's social communication profile and the family's preferred activities and intervention priorities. Supplemental Material https://doi.org/10.23641/asha.14204522.","hji,kes",0,0,0,2,0,NA,NA +33761699,Increased bleeding events with the addition of apixaban to the dual anti-platelet regimen for the treatment of patients with acute coronary syndrome: A meta-analysis.,"

Background

Dual anti-platelet therapy (DAPT) with aspirin and clopidogrel has been the mainstay of treatment for patients with acute coronary syndrome (ACS). However, the recurrence of thrombotic events, potential aspirin and clopidogrel hypo-responsiveness, and other limitations of DAPT have led to the development of newer oral anti-thrombotic drugs. Apixaban, a new non-vitamin K antagonist, has been approved for use. In this meta-analysis, we aimed to compare the bleeding outcomes observed with the addition of apixaban to DAPT for the treatment of patients with ACS.

Methods

Online databases including EMBASE, Cochrane Central, http://www.ClinicalTrials.gov, MEDLINE and Web of Science were searched for English based publications comparing the use of apixaban added to DAPT for the treatment of patients with ACS. Different categories of bleeding events and cardiovascular outcomes were assessed. The analysis was carried out by the RevMan software version 5.4. Odds ratios (OR) with 95% confidence intervals (CI) were used to represent the data following analysis.

Results

This research analysis consisted of 4 trials with a total number of 9010 participants. Thrombolysis in myocardial infarction (TIMI) defined major bleeding (OR: 2.45, 95% CI: 1.45-4.12; P = .0008), TIMI defined minor bleeding (OR: 3.12, 95% CI: 1.71-5.70; P = .0002), International society of thrombosis and hemostasis (ISTH) major bleeding (OR: 2.49, 95% CI: 1.80-3.45; P = .00001) and Global Use of Strategies to Open Occluded Arteries (GUSTO) defined severe bleeding (OR: 3.00, 95% CI: 1.56-5.78; P = .01) were significantly increased with the addition of apixaban to DAPT versus DAPT alone in these patients with ACS. However fatal bleeding (OR: 10.96, 95% CI: 0.61-198.3; P = .11) was not significantly different.

Conclusions

Addition of the novel oral anticoagulant apixaban to the DAPT regimen significantly increased bleeding and therefore did not show any beneficial effect in these patients with ACS. However, due to the extremely limited data, we apparently have to rely on future larger studies to confirm this hypothesis.","hji,kes",0,0,0,2,0,NA,NA +33765181,The hepatokine fetuin-A disrupts functional maturation of pancreatic beta cells.,"

Aims/hypothesis

Neonatal beta cells carry out a programme of postnatal functional maturation to achieve full glucose responsiveness. A partial loss of the mature phenotype of adult beta cells may contribute to a reduction of functional beta cell mass and accelerate the onset of type 2 diabetes. We previously found that fetuin-A, a hepatokine increasingly secreted by the fatty liver and a determinant of type 2 diabetes, inhibits glucose-stimulated insulin secretion (GSIS) of human islets. Since fetuin-A is a ubiquitous fetal glycoprotein that declines peripartum, we examined here whether fetuin-A interferes with the functional maturity of beta cells.

Methods

The effects of fetuin-A were assessed during in vitro maturation of porcine neonatal islet cell clusters (NICCs) and in adult human islets. Expression alterations were examined via microarray, RNA sequencing and reverse transcription quantitative real-time PCR (qRT-PCR), proteins were analysed by western blotting and immunostaining, and insulin secretion was quantified in static incubations.

Results

NICC maturation was accompanied by the gain of glucose-responsive insulin secretion (twofold stimulation), backed up by mRNA upregulation of genes governing beta cell identity and function, such as NEUROD1, UCN3, ABCC8 and CASR (Log2 fold change [Log2FC] > 1.6). An active TGF receptor (TGFBR)-SMAD2/3 pathway facilitates NICC maturation, since the TGFBR inhibitor SB431542 counteracted the upregulation of aforementioned genes and de-repressed ALDOB, a gene disallowed in mature beta cells. In fetuin-A-treated NICCs, upregulation of beta cell markers and the onset of glucose responsiveness were suppressed. Concomitantly, SMAD2/3 phosphorylation was inhibited. Transcriptome analysis confirmed inhibitory effects of fetuin-A and SB431542 on TGF-1- and SMAD2/3-regulated transcription. However, contrary to SB431542 and regardless of cMYC upregulation, fetuin-A inhibited beta cell proliferation (0.27 0.08% vs 1.0 0.1% Ki67-positive cells in control NICCs). This effect was sustained by reduced expression (Log2FC = -2.4) of FOXM1, CENPA, CDK1 or TOP2A. In agreement, the number of insulin-positive cells was lower in fetuin-A-treated NICCs than in control NICCs (14.4 1.2% and 22.3 1.1%, respectively). In adult human islets fetuin-A abolished glucose responsiveness, i.e. 1.7- and 1.1-fold change over 2.8mmol/l glucose in control- and fetuin-A-cultured islets, respectively. In addition, fetuin-A reduced SMAD2/3 phosphorylation and suppressed expression of proliferative genes. Of note, in non-diabetic humans, plasma fetuin-A was negatively correlated (p = 0.013) with islet beta cell area.

Conclusions/interpretation

Our results suggest that the perinatal decline of fetuin-A relieves TGFBR signalling in islets, a process that facilitates functional maturation of neonatal beta cells. Functional maturity remains revocable in later life, and the occurrence of a metabolically unhealthy milieu, such as liver steatosis and elevated plasma fetuin-A, can impair both function and adaptive proliferation of beta cells.

Data availability

The RNAseq datasets and computer code produced in this study are available in the Gene Expression Omnibus (GEO): GSE144950; https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE144950.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +33768769,Prognostic factors in postoperative radiotherapy for prostate cancer - tertiary center experience.,"

Background

The aim of the study was to analyse the prognostic factors in postoperative prostate cancer irradiation and develop a nomogram for disease-free survival (DFS).

Patients and methods

This retrospective study included 236 consecutive prostate cancer patients who had radical prostatectomy followed by radiotherapy (RT) at a single tertiary institution between 2009 and 2014. The main outcome was DFS analysed through uni- and multivariable analysis, Kaplan-Meier curves, log-rank testing, recursive partitioning analysis, and nomogram development.

Results

The median follow up was 62.3 (interquartile range [IQR] 38.1-79) months. The independent clinical factors associated with increased risk of recurrence or progression in the multivariate analysis (MVA) were prostate-specific antigen (PSA) level before RT, pT3 characteristic, and local failure as salvage indication. The value of PSA nadir had a significant impact on the risk of biochemical failure. Biochemical control and DFS were significantly different depending on treatment indication (p < 0.0001). The recursive partitioning analysis highlighted the importance of the PSA level before RT, Gleason Grade Group, PSA nadir, and local failure as a treatment indication. Finally, the nomogram for DFS was developed and is available online at https://apps.konsta.com.pl/app/prostate-salvage-dfs/.

Conclusions

The Pre-RT PSA level, pT3 characteristic and local failure as salvage indication are pivotal prognostic factors associated with increased risk of recurrence or progression. The Gleason grade group of 4-5 and PSA nadir value allow for further risk stratification. The treatment outcomes in postoperative prostate cancer irradiation are significantly different depending on treatment indication. An online nomogram comprising of both pre-treatment and current data was developed allowing for visualization of changes in prognosis depending on clinical data.","hji,kes",0,0,0,2,0,NA,NA +33810805,Fast lightweight accurate xenograft sorting.,"

Motivation

With an increasing number of patient-derived xenograft (PDX) models being created and subsequently sequenced to study tumor heterogeneity and to guide therapy decisions, there is a similarly increasing need for methods to separate reads originating from the graft (human) tumor and reads originating from the host species' (mouse) surrounding tissue. Two kinds of methods are in use: On the one hand, alignment-based tools require that reads are mapped and aligned (by an external mapper/aligner) to the host and graft genomes separately first; the tool itself then processes the resulting alignments and quality metrics (typically BAM files) to assign each read or read pair. On the other hand, alignment-free tools work directly on the raw read data (typically FASTQ files). Recent studies compare different approaches and tools, with varying results.

Results

We show that alignment-free methods for xenograft sorting are superior concerning CPU time usage and equivalent in accuracy. We improve upon the state of the art sorting by presenting a fast lightweight approach based on three-way bucketed quotiented Cuckoo hashing. Our hash table requires memory comparable to an FM index typically used for read alignment and less than other alignment-free approaches. It allows extremely fast lookups and uses less CPU time than other alignment-free methods and alignment-based methods at similar accuracy. Several engineering steps (e.g., shortcuts for unsuccessful lookups, software prefetching) improve the performance even further.

Availability

Our software xengsort is available under the MIT license at http://gitlab.com/genomeinformatics/xengsort . It is written in numba-compiled Python and comes with sample Snakemake workflows for hash table construction and dataset processing.","hji,kes",0,0,0,2,0,NA,NA +33816867,"Citation.js: a format-independent, modular bibliography tool for the browser and command line.","

Background

Given the vast number of standards and formats for bibliographical data, any program working with bibliographies and citations has to be able to interpret such data. This paper describes the development of Citation.js (https://citation.js.org/), a tool to parse and format according to those standards. The program follows modern guidelines for software in general and JavaScript in specific, such as version control, source code analysis, integration testing and semantic versioning.

Results

The result is an extensible tool that has already seen adaption in a variety of sources and use cases: as part of a server-side page generator of a publishing platform, as part of a local extensible document generator, and as part of an in-browser converter of extracted references. Use cases range from transforming a list of DOIs or Wikidata identifiers into a BibTeX file on the command line, to displaying RIS references on a webpage with added Altmetric badges to generating """"How to cite this"""" sections on a blog. The accuracy of conversions is currently 27% for properties and 60% for types on average and a typical initialization takes 120 ms in browsers and 1 s with Node.js on the command line.

Conclusions

Citation.js is a library supporting various formats of bibliographic information in a broad selection of use cases and environments. Given the support for plugins, more formats can be added with relative ease.","hji,kes",0,0,0,2,0,NA,NA +33832071,Genomic analysis of pancreatic cancer reveals 3 molecular subtypes with different clinical outcomes.,"

Abstract

Pancreatic cancer has a very high mortality with a 5-year survival of <5%. The purpose of this study was to classify specific molecular subtypes associated with prognosis of pancreatic cancer using The Cancer Genome Atlas (TCGA) multiplatform genomic data.Multiplatform genomic data (N = 178), including gene expression, copy number alteration, and somatic mutation data, were obtained from cancer browser (https://genome-cancer.ucsc.edu, cohort: TCGA Pancreatic Cancer). Clinical data including survival results were analyzed. We also used validation cohort (GSE50827) to confirm the robustness of these molecular subtypes in pancreatic cancer.When we performed unsupervised clustering using TCGA gene expression data, we found three distinct molecular subtypes associated with different survival results. Copy number alteration and somatic mutation data showed different genomic patterns for these three subtypes. Ingenuity pathway analysis revealed that each subtype showed differentially altered pathways. Using each subtype-specific genes (200 were selected), we could predict molecular subtype in another cohort, confirming the robustness of these molecular subtypes of pancreatic cancer. Cox regression analysis revealed that molecular subtype is the only significant prognostic factor for pancreatic cancer (P = .042, 95% confidence interval 0.523-0.98).Genomic analysis of pancreatic cancer revealed 3 distinct molecular subtypes associated with different survival results. Using these subtype-specific genes and prediction model, we could predict molecular subtype associated with prognosis of pancreatic cancer.","hji,kes",0,0,0,2,0,NA,NA +33851388,Acquired FXIII Deficiency is Associated with High Morbidity.,"

Background

A factor XIII (FXIII) level >30% is considered necessary to prevent spontaneous bleeding. Bleeding is also a risk in patients with acquired FXIII deficiency, but the hemostatic level of FXIII in this context remains to be determined.

Methods

We retrospectively analyzed all patients diagnosed with acquired FXIII deficiency at a large hospital over 3 years (study ID NCT04416594, http://www.clinicaltrials.gov) and assessed clinical data to identify the best cut-off point for FXIII activity to distinguish between low and high risk of major bleeding in a mixed medical and surgical population.

Results

Of the 97 patients who experienced bleeding despite a normal coagulation test, 43.2% had FXIII activity <70%. FXIII activity was significantly lower in surgical patients and patients admitted to the intensive care unit (ICU). Low FXIII activity was significantly associated with long ICU stays and a high incidence of major bleeding.

Conclusion

Acquired FXIII deficiency is associated with high morbidity. The hemostatic level of FXIII in the setting of acquired FXIII deficiency might be above 30%.","hji,kes",0,0,0,2,0,NA,NA +33870774,The association between relationship strain and emotional well-being among older adult couples: the moderating role of social connectedness.,"

Objectives

The present study examines the moderating role of social connectedness (i.e. closeness, talk frequency, social network size, and neighborhood social ties) in the association between one's own and spouse's relationship strain and emotional well-being (i.e. depressive symptoms, happiness, and loneliness).

Method

Married couples (N = 865) were drawn from the second wave of the National Social, Health, and Aging Project. One Actor Partner Interdependence Model (APIM) and one Actor Partner Interdependence Model with Moderation (APIMoM) were conducted.

Results

In terms of actor effects, relationship strain was associated with all emotional well-being outcomes. Wives' and husbands' greater relationship strain was associated with spouses' loneliness. Only wives' greater relationship strain was associated with her husbands' higher level of depressive symptoms and no partner effects were found for happiness. In six instances, social connectedness factors helped to ameliorate the association between self/spouse relationship strain, depressive symptoms, and happiness. However, wives' greater neighborhood social ties amplified the association between wives greater relationship strain and husbands' greater depressive symptoms. We did not find that social connectedness factors moderated the associations between self/spouse relationship strain and loneliness.

Conclusion

Even in late life marriages, marital strain is associated with less happiness and greater depressive symptoms and loneliness. Practitioners addressing emotional well-being may need to pay attention to spousal perceptions of relationship strain and social relationships external to the marital relationship when working with heterosexual couples. Efforts throughout the life course should be made to ensure connections with diverse types of social networks.Supplemental data for this article is available online at https://doi.org/10.1080/13607863.2021.1910786.","hji,kes",0,0,0,2,0,NA,NA +33900211,"Effect of Diameter and Number of Hepatocellular Carcinomas on Survival After Resection, Transarterial Chemoembolization, and Ablation.","

Introduction

Most studies predicting survival after resection, transarterial chemoembolization (TACE), and ablation analyzed diameter and number of hepatocellular carcinomas (HCCs) as dichotomous variables, resulting in an underestimation of risk variation. We aimed to develop and validate a new prognostic model for patients with HCC using largest diameter and number of HCCs as continuous variables.

Methods

The prognostic model was developed using data from patients undergoing resection, TACE, and ablation in 645 Japanese institutions. The model results were shown after balanced using the inverse probability of treatment-weighted analysis and were externally validated in an international multi-institution cohort.

Results

Of 77,268 patients, 43,904 patients, including 15,313 (34.9%) undergoing liver resection, 13,375 (30.5%) undergoing TACE, and 15,216 (34.7%) undergoing ablation, met the inclusion criteria. Our model (http://www.u-tokyo-hbp-transplant-surgery.jp/about/calculation.html) showed that the 5-year overall survival (OS) in patients with HCC undergoing these procedures decreased with progressive incremental increases in diameter and number of HCCs. For patients undergoing resection, the inverse probability of treatment-weighted-adjusted 5-year OS probabilities were 10%-20% higher compared with patients undergoing TACE for 1-6 HCC lesions <10 cm and were also 10%-20% higher compared with patients undergoing ablation when the HCC diameter was 2-3 cm. For patients undergoing resection and TACE, the model performed well in the external cohort.

Discussion

Our novel prognostic model performed well in predicting OS after resection and TACE for HCC and demonstrated that resection may have a survival benefit over TACE and ablation based on the diameter and number of HCCs.","hji,kes",0,0,0,2,0,NA,NA +33902704,The impact of sequencing depth on the inferred taxonomic composition and AMR gene content of metagenomic samples.,"

Background

Shotgun metagenomics is increasingly used to characterise microbial communities, particularly for the investigation of antimicrobial resistance (AMR) in different animal and environmental contexts. There are many different approaches for inferring the taxonomic composition and AMR gene content of complex community samples from shotgun metagenomic data, but there has been little work establishing the optimum sequencing depth, data processing and analysis methods for these samples. In this study we used shotgun metagenomics and sequencing of cultured isolates from the same samples to address these issues. We sampled three potential environmental AMR gene reservoirs (pig caeca, river sediment, effluent) and sequenced samples with shotgun metagenomics at high depth (~ 200 million reads per sample). Alongside this, we cultured single-colony isolates of Enterobacteriaceae from the same samples and used hybrid sequencing (short- and long-reads) to create high-quality assemblies for comparison to the metagenomic data. To automate data processing, we developed an open-source software pipeline, 'ResPipe'.

Results

Taxonomic profiling was much more stable to sequencing depth than AMR gene content. 1 million reads per sample was sufficient to achieve < 1% dissimilarity to the full taxonomic composition. However, at least 80 million reads per sample were required to recover the full richness of different AMR gene families present in the sample, and additional allelic diversity of AMR genes was still being discovered in effluent at 200 million reads per sample. Normalising the number of reads mapping to AMR genes using gene length and an exogenous spike of Thermus thermophilus DNA substantially changed the estimated gene abundance distributions. While the majority of genomic content from cultured isolates from effluent was recoverable using shotgun metagenomics, this was not the case for pig caeca or river sediment.

Conclusions

Sequencing depth and profiling method can critically affect the profiling of polymicrobial animal and environmental samples with shotgun metagenomics. Both sequencing of cultured isolates and shotgun metagenomics can recover substantial diversity that is not identified using the other methods. Particular consideration is required when inferring AMR gene content or presence by mapping metagenomic reads to a database. ResPipe, the open-source software pipeline we have developed, is freely available ( https://gitlab.com/hsgweon/ResPipe ).","hji,kes",0,0,0,2,0,NA,NA +33951459,Hotspot identifies informative gene modules across modalities of single-cell genomics.,"Two fundamental aims that emerge when analyzing single-cell RNA-seq data are identifying which genes vary in an informative manner and determining how these genes organize into modules. Here, we propose a general approach to these problems, called """"Hotspot,"""" that operates directly on a given metric of cell-cell similarity, allowing for its integration with any method (linear or non-linear) for identifying the primary axes of transcriptional variation between cells. In addition, we show that when using multimodal data, Hotspot can be used to identify genes whose expression reflects alternative notions of similarity between cells, such as physical proximity in a tissue or clonal relatedness in a cell lineage tree. In this manner, we demonstrate that while Hotspot is capable of identifying genes that reflect nuanced transcriptional variability between T helper cells, it can also identify spatially dependent patterns of gene expression in the cerebellum as well as developmentally heritable expression programs during embryogenesis. Hotspot is implemented as an open-source Python package and is available for use at http://www.github.com/yoseflab/hotspot. A record of this paper's transparent peer review process is included in the supplemental information.","hji,kes",0,0,0,2,0,NA,NA +33968128,The Molecular Functions of MeCP2 in Rett Syndrome Pathology.,"MeCP2 protein, encoded by the MECP2 gene, binds to DNA and affects transcription. Outside of this activity the true range of MeCP2 function is still not entirely clear. As MECP2 gene mutations cause the neurodevelopmental disorder Rett syndrome in 1 in 10,000 female births, much of what is known about the biologic function of MeCP2 comes from studying human cell culture models and rodent models with Mecp2 gene mutations. In this review, the full scope of MeCP2 research available in the NIH Pubmed (https://pubmed.ncbi.nlm.nih.gov/) data base to date is considered. While not all original research can be mentioned due to space limitations, the main aspects of MeCP2 and Rett syndrome research are discussed while highlighting the work of individual researchers and research groups. First, the primary functions of MeCP2 relevant to Rett syndrome are summarized and explored. Second, the conflicting evidence and controversies surrounding emerging aspects of MeCP2 biology are examined. Next, the most obvious gaps in MeCP2 research studies are noted. Finally, the most recent discoveries in MeCP2 and Rett syndrome research are explored with a focus on the potential and pitfalls of novel treatments and therapies.","hji,kes",0,0,0,2,0,NA,NA +33976872,Analysis of tRNA Cys processing under salt stress in Bacillus subtilis spore outgrowth using RNA sequencing data.,"Background: In spore-forming bacteria, the molecular mechanisms of accumulation of transfer RNA (tRNA) during sporulation must be a priority as tRNAs play an essential role in protein synthesis during spore germination and outgrowth. However, tRNA processing has not been extensively studied in these conditions, and knowledge of these mechanisms is important to understand long-term stress survival. Methods:To gain further insight into tRNA processing during spore germination and outgrowth, the expression of the single copy tRNA Cys gene was analyzed in the presence and absence of 1.2 M NaCl in Bacillus subtilis using RNA-Seq data obtained from the Gene Expression Omnibus (GEO) database. The CLC Genomics work bench 12.0.2 (CLC Bio, Aarhus, Denmark, https://www.qiagenbioinformatics.com/) was used to analyze reads from the tRNA Cys gene. Results:The results show that spores store different populations of tRNA Cys-related molecules. One such population, representing 60% of total tRNA Cys, was composed of tRNA Cys fragments. Half of these fragments (3-tRF) possessed CC, CCA or incorrect additions at the 3end. tRNA Cys with correct CCA addition at the 3end represented 23% of total tRNA Cys, while with CC addition represented 9% of the total and with incorrect addition represented 7%. While an accumulation of tRNA Cys precursors was induced by upregulation of the rrnD operon under the control of s A -dependent promoters under both conditions investigated, salt stress produced only a modest effect on tRNA Cys expression and the accumulation of tRNA Cys related species. Conclusions:The results demonstrate that tRNA Cys molecules resident in spores undergo dynamic processing to produce functional molecules that may play an essential role during protein synthesis.","hji,kes",0,0,0,2,0,NA,references other data resource +33983910,"Surveillance of Vaccination Coverage Among Adult Populations -United States, 2018.","

Problem/condition

Adults are at risk for illness, hospitalization, disability and, in some cases, death from vaccine-preventable diseases, particularly influenza and pneumococcal disease. CDC recommends vaccinations for adults on the basis of age, health conditions, prior vaccinations, and other considerations. Updated vaccination recommendations from CDC are published annually in the U.S. Adult Immunization Schedule. Despite longstanding recommendations for use of many vaccines, vaccination coverage among U.S. adults remains low.

Reporting period

August 2017-June 2018 (for influenza vaccination) and January-December 2018 (for pneumococcal, herpes zoster, tetanus and diphtheria [Td]/tetanus toxoid, reduced diphtheria toxoid, and acellular pertussis [Tdap], hepatitis A, hepatitis B, and human papillomavirus [HPV] vaccination).

Description of system

The National Health Interview Survey (NHIS) is a continuous, cross-sectional national household survey of the noninstitutionalized U.S. civilian population. In-person interviews are conducted throughout the year in a probability sample of households, and NHIS data are compiled and released annually. NHIS's objective is to monitor the health of the U.S. population and provide estimates of health indicators, health care use and access, and health-related behaviors. Adult receipt of influenza, pneumococcal, herpes zoster, Td/Tdap, hepatitis A, hepatitis B, and at least 1 dose of HPV vaccines was assessed. Estimates were derived for a new composite adult vaccination quality measure and by selected demographic and access-to-care characteristics (e.g., age, race/ethnicity, indication for vaccination, travel history [travel to countries where hepatitis infections are endemic], health insurance status, contacts with physicians, nativity, and citizenship). Trends in adult vaccination were assessed during 2010-2018.

Results

Coverage for the adult age-appropriate composite measure was low in all age groups. Racial and ethnic differences in coverage persisted for all vaccinations, with lower coverage for most vaccinations among non-White compared with non-Hispanic White adults. Linear trend tests indicated coverage increased from 2010 to 2018 for most vaccines in this report. Few adults aged =19 years had received all age-appropriate vaccines, including influenza vaccination, regardless of whether inclusion of Tdap (13.5%) or inclusion of any tetanus toxoid-containing vaccine (20.2%) receipt was measured. Coverage among adults for influenza vaccination during the 2017-18 season (46.1%) was similar to the estimate for the 2016-17 season (45.4%), and coverage for pneumococcal (adults aged =65 years [69.0%]), herpes zoster (adults aged =50 years and aged =60 years [24.1% and 34.5%, respectively]), tetanus (adults aged =19 years [62.9%]), Tdap (adults aged =19 years [31.2%]), hepatitis A (adults aged =19 years [11.9%]), and HPV (females aged 19-26 years [52.8%]) vaccination in 2018 were similar to the estimates for 2017. Hepatitis B vaccination coverage among adults aged =19 years and health care personnel (HCP) aged =19 years increased 4.2 and 6.7 percentage points to 30.0% and 67.2%, respectively, from 2017. HPV vaccination coverage among males aged 19-26 years increased 5.2 percentage points to 26.3% from the 2017 estimate. Overall, HPV vaccination coverage among females aged 19-26 years did not increase, but coverage among Hispanic females aged 19-26 years increased 10.8 percentage points to 49.6% from the 2017 estimate. Coverage for the following vaccines was lower among adults without health insurance compared with those with health insurance: influenza vaccine (among adults aged =19 years, 19-49 years, and 50-64 years), pneumococcal vaccine (among adults aged 19-64 years at increased risk), Td vaccine (among all age groups), Tdap vaccine (among adults aged =19 years and 19-64 years), hepatitis A vaccine (among adults aged =19 years overall and among travelers aged =19 years), hepatitis B vaccine (among adults aged =19 years and 19-49 years and among travelers aged =19 years), herpes zoster vaccine (among adults aged =60 years), and HPV vaccine (among males and females aged 19-26 years). Adults who reported having a usual place for health care generally reported receipt of recommended vaccinations more often than those who did not have such a place, regardless of whether they had health insurance. Vaccination coverage was higher among adults reporting =1 physician contact during the preceding year compared with those who had not visited a physician during the preceding year, regardless of whether they had health insurance. Even among adults who had health insurance and =10 physician contacts during the preceding year, depending on the vaccine, 20.1%-87.5% reported not having received vaccinations that were recommended either for all persons or for those with specific indications. Overall, vaccination coverage among U.S.-born adults was significantly higher than that of foreign-born adults, including influenza vaccination (aged =19 years), pneumococcal vaccination (all ages), tetanus vaccination (all ages), Tdap vaccination (all ages), hepatitis B vaccination (aged =19 years and 19-49 years and travelers aged =19 years), herpes zoster vaccination (all ages), and HPV vaccination among females aged 19-26 years. Vaccination coverage also varied by citizenship status and years living in the United States.

Interpretation

NHIS data indicate that many adults remain unprotected against vaccine-preventable diseases. Coverage for the adult age-appropriate composite measures was low in all age groups. Individual adult vaccination coverage remained low as well, but modest gains occurred in vaccination coverage for hepatitis B (among adults aged =19 years and HCP aged =19 years), and HPV (among males aged 19-26 years and Hispanic females aged 19-26 years). Coverage for other vaccines and groups with Advisory Committee on Immunization Practices vaccination indications did not improve from 2017. Although HPV vaccination coverage among males aged 19-26 years and Hispanic females aged 19-26 years increased, approximately 50% of females aged 19-26 years and 70% of males aged 19-26 years remained unvaccinated. Racial/ethnic vaccination differences persisted for routinely recommended adult vaccines. Having health insurance coverage, having a usual place for health care, and having =1 physician contacts during the preceding 12 months were associated with higher vaccination coverage; however, these factors alone were not associated with optimal adult vaccination coverage, and findings indicate missed opportunities to vaccinate remained.

Public health actions

Substantial improvement in adult vaccination uptake is needed to reduce the burden of vaccine-preventable diseases. Following the Standards for Adult Immunization Practice (https://www.cdc.gov/vaccines/hcp/adults/for-practice/standards/index.html), all providers should routinely assess adults' vaccination status at every clinical encounter, strongly recommend appropriate vaccines, either offer needed vaccines or refer their patients to another provider who can administer the needed vaccines, and document vaccinations received by their patients in an immunization information system.","hji,kes",0,0,0,2,0,NA,NA +34003431,The C-BIG Repository: an Institution-Level Open Science Platform.,"In January 2016, the Montreal Neurological Institute-Hospital (The Neuro) declared itself an Open Science organization. This vision extends beyond efforts by individual scientists seeking to release individual datasets, software tools, or building platforms that provide for the free dissemination of such information. It involves multiple stakeholders and an infrastructure that considers governance, ethics, computational resourcing, physical design, workflows, training, education, and intra-institutional reporting structures. The C-BIG repository was built in response as The Neuro's institutional biospecimen and clinical data repository, and collects biospecimens as well as clinical, imaging, and genetic data from patients with neurological disease and healthy controls. It is aimed at helping scientific investigators, in both academia and industry, advance our understanding of neurological diseases and accelerate the development of treatments. As many neurological diseases are quite rare, they present several challenges to researchers due to their small patient populations. Overcoming these challenges required the aggregation of datasets from various projects and locations. The C-BIG repository achieves this goal and stands as a scalable working model for institutions to collect, track, curate, archive, and disseminate multimodal data from patients. In November 2020, a Registered Access layer was made available to the wider research community at https://cbigr-open.loris.ca , and in May2021 fully open data will be released to complement the Registered Access data. This article outlines many of the aspects of The Neuro's transition to Open Science by describing the data to be released, C-BIG's full capabilities, and the design aspects that were implemented for effective data sharing.","hji,kes",0,0,0,2,0,NA,scope +34007002,Severity modeling of propionic acidemia using clinical and laboratory biomarkers.,"

Purpose

To conduct a proof-of-principle study to identify subtypes of propionic acidemia (PA) and associated biomarkers.

Methods

Data from a clinically diverse PA patient population ( https://clinicaltrials.gov/ct2/show/NCT02890342 ) were used to train and test machine learning models, identify PA-relevant biomarkers, and perform validation analysis using data from liver-transplanted participants. k-Means clustering was used to test for the existence of PA subtypes. Expert knowledge was used to define PA subtypes (mild and severe). Given expert classification, supervised machine learning (support vector machine with a polynomial kernel, svmPoly) performed dimensional reduction to define relevant features of each PA subtype.

Results

Forty participants enrolled in the study; five underwent liver transplant. Analysis with k-means clustering indicated that several PA subtypes may exist on the biochemical continuum. The conventional PA biomarkers, plasma total 2-methylctirate and propionylcarnitine, were not statistically significantly different between nontransplanted and transplanted participants motivating us to search for other biomarkers. Unbiased dimensional reduction using svmPoly revealed that plasma transthyretin, alanine:serine ratio, GDF15, FGF21, and in vivo 1-13C-propionate oxidation, play roles in defining PA subtypes.

Conclusion

Support vector machine prioritized biomarkers that helped classify propionic acidemia patients according to severity subtypes, with important ramifications for future clinical trials and management of PA.","hji,kes",0,0,0,2,0,NA,NA +34013078,Host pharmacogenetic factors that may affect liver neoplasm incidence upon using direct-acting antivirals for treating hepatitis C infection.,"

Introduction

Direct-acting antivirals (DAAs) represent a breakthrough in hepatitis C virus (HCV) treatment as they directly inhibit HCV nonstructural (NS) proteins (NS3/4A, NS5A, and NS5B). However, ongoing debates exist regarding their relationship with hepatocellular carcinoma (HCC) whose incidence is widely debated among investigators. This study was conducted to identify host pharmacogenetic factors that may influence HCC incidence upon using HCV DAAs.

Materials and methods

Details regarding 16 HCV DAAs were collected from literature and DrugBank database. Digital structures of these drugs were fed into the pharmacogenomics/pharmacovigilance in - silico pipeline (PHARMIP) to predict the genetic factors that may underpin HCC development.

Results

We identified 184 unique genes and 40 unique variants that may have key answers for the DAA/HCC paradox. These findings could be used in different methods to aid in the precise application of HCV DAAs and minimize the proposed risk for HCC. All results could be accessed at: https://doi.org/10.17632/8ws8258hn3.2.

Discussion

All the identified factors are evidence related to HCC and significantly predicted by PHARMIP as DAA targets. We discuss some examples of the methods of using these results to address the DAA/HCC controversy based on the following three primary levels: 1 - individual DAA drug, 2 - DAA subclass, and 3 - the entire DAA class. Further wet laboratory investigation is required to evaluate these results.","hji,kes",0,0,0,2,0,NA,mendely +34019771,Practice Mediates Bidirectional Dual-Task Interference When Performing a Novel Sequential Nonword Repetition Task.,"Introduction The current study examined the extent to which practice amount mediates dual-task interference patterns associated with concurrent performance of a novel speech task and attention-demanding visuomotor task. Method A Sequential Nonword Repetition Task was used to examine the effect of practice on interference associated with concurrent performance of a Visuomotor Pursuit Task. Twenty-five young adult participants were assigned to either an Extended Practice Group or a Limited Practice Group and performed a novel Sequential Nonword Repetition Task in isolation and while performing a concurrent visuomotor pursuit rotor task. Results Participants in the Limited Practice Group who were afforded a limited amount of practice exhibited dual-task interference (i.e., dual-task performance reductions) for both the speech and visuomotor tasks (i.e., bidirectional dual-task interference). Conversely, participants in the Extended Practice Group who were afforded extended practice exhibited little-to-no observable dual-task interference on the nonword repetition task. Conclusion Data from the current investigation suggest that the amount of initial practice mediates the degree of dual-task interference observed when a novel speech production task is performed with an attention-demanding Visuomotor Pursuit Task. Supplemental Material https://doi.org/10.23641/asha.14608071.","hji,kes",0,0,0,2,0,NA,NA +34019885,Interventions to decrease complications after shoulder dystocia: a systematic review and Bayesian meta-analysis.,"

Objective

This study aimed to evaluate the outcomes associated with the implementation of simulation exercises to reduce the sequela of shoulder dystocia.

Data sources

Electronic databases (Ovid MEDLINE, Embase, the Cumulative Index to Nursing and Allied Health Literature database, and Scopus) were initially queried in June 2020 and updated in November 2020. The following 3 concepts were introduced and refined using the controlled vocabulary of the database: vaginal birth, shoulder dystocia, and simulation training. There were no limitations to the year of publication as part of the search strategy.

Study eligibility criteria

We included all studies that reported on the frequency of shoulder dystocia and the associated complications before and after the implementation of interventional exercises to improve outcomes.

Methods

Two authors independently assessed the abstracts and full-text articles of all studies for eligibility and evaluated the quality of the included studies using the Newcastle-Ottawa Scale. Any inconsistencies related to study evaluation or data extraction were resolved by a third author. The coprimary outcomes of this systematic review and meta-analysis were neonatal brachial plexus palsy diagnosed following deliveries complicated by shoulder dystocia and persistence of brachial palsy at 12 months or later. The secondary outcomes were the frequency of shoulder dystocia and cesarean delivery. Study effects were combined using a Bayesian meta-analysis and were reported as risk ratios and 95% credible intervals (Crs).

Results

Of the 372 articles reviewed, 16 publications, which included 428,552 deliveries with 217,713 (50.8%) deliveries during the preintervention and 210,839 (49.2%) deliveries during the postinterventional period, were included in the meta-analysis. The incidence of neonatal brachial plexus palsy after shoulder dystocia decreased from 12.1% to 5.7% (risk ratio, 0.37; 95% Cr, 0.26-0.57; probability of reduction 100%). The overall proportion of neonatal brachial plexus palsy decreased, but with less precision, from 0.3% to 0.1% (risk ratio, 0.53; 95% Cr, 0.21-1.26; probability of reduction 94%). Two studies followed newborns with brachial plexus palsy for at least 12 months. One study that reported on persistent neonatal brachial plexus palsy at 12 months among 1148 shoulder dystocia cases noted a reduction in persistent neonatal brachial plexus palsy from 1.9% to 0.2% of shoulder dystocia cases (risk ratio, 0.13; 95% confidence interval, 0.04-0.49). In contrast, the study that reported on persistent neonatal brachial plexus palsy at 12 months for all deliveries noted that it did not change significantly, namely from 0.3 to 0.2 per 1000 births (risk ratio, 0.77; 95% confidence interval, 0.31-1.90). Following the implementation of shoulder dystocia interventional exercises, the diagnosis of shoulder dystocia increased significantly from 1.2% to 1.7% of vaginal deliveries (risk ratio, 1.39; 95% Cr, 1.19-1.65; probability of increase 100%). Compared with the preimplementation period, the cesarean delivery rate increased postimplementation from 21.2% to 25.9% (risk ratio, 1.22; 95% Cr, 0.93-1.59; probability of increase 93%). We created an online tool (https://ccrebm-bell.shinyapps.io/sdmeta/) that permits calculation of the absolute risk reduction and absolute risk increase attributable to the intervention vis--vis the incidence of shoulder dystocia, neonatal brachial plexus palsy, and cesarean deliveries.

Conclusion

Introduction of shoulder dystocia interventional exercises decreased the rate of neonatal brachial plexus palsy per shoulder dystocia case; the data on persistence of neonatal brachial plexus palsy beyond 12 months is limited and contradictory. Implementation of the interventions was associated with an increase in the diagnosis of shoulder dystocia and rate of cesarean deliveries.","hji,kes",0,0,0,2,0,NA,calcualtion tool +34030448,Genomic Approaches to Plant-Pathogen Epidemiology and Diagnostics.,"Diseases have a significant cost to agriculture. Findings from analyses of whole-genome sequences show great promise for informing strategies to mitigate risks from diseases caused by phytopathogens. Genomic approaches can be used to dramatically shorten response times to outbreaks and inform disease management in novel ways. However, the use of these approaches requires expertise in working with big, complex data sets and an understanding of their pitfalls and limitations to infer well-supported conclusions. We suggest using an evolutionary framework to guide the use of genomic approaches in epidemiology and diagnostics of plant pathogens. We also describe steps that are necessary for realizing these as standard approaches in disease surveillance.","hji,kes",0,0,0,2,0,NA,how did this get in?? +34036443,"BDD Knowledge, Attitude and Practice Among Aesthetic Plastic Surgeons Worldwide.","

Background

Body dysmorphic disorder (BDD) is a controversial topic in the field of plastic surgery.

Objective

Our aim was to determine whether BDD knowledge, attitude and practice (KAP) are affected by the experience of the surgeon in the field, sex of the surgeon, country of practice, and the number of patients the surgeon sees annually. We were particularly interested in uncovering any significant relations in KAP of BDD between plastic surgeons practicing in developed versus developing countries.

Methods

We created a two-page survey of 24 questions about the KAP of BDD. The survey was sent to aesthetic plastic surgeons worldwide via ISAPS global email list. The data were collected over a period of 20 days at the end of 2020.

Results

A total of 464 plastic surgeons completed the survey. The only factor that determines the awareness of BDD is the experience of the surgeon. The more experienced the surgeon is, the more likely he/she is to be familiar with the clinical picture of BDD. Although aware, the more experienced surgeons tend to dismiss the importance of referring BDD patients to psychiatrists/psychologists. Male surgeons tend to diagnose more patients with BDD than female surgeons. Surgeons who estimated the correct prevalence of BDD among patients seeking surgery acquired knowledge of BDD from scientific journals. The KAP is relatively similar between surgeons practicing in developed and developing countries, and the main statistically significant difference was in the questions used during the course of the interviews to diagnose BDD.

Conclusion

We can deduce from the results that most aesthetic surgeons worldwide have got knowledge of the presentation of BDD and are keen to diagnose the disorder in their practice. It is worth noting that surgeons usually have their unique approach in the management of BDD. Our study highlights the importance of not only raising awareness of the best management of BDD, but also of establishing a consensus that BDD is a contraindication to aesthetic treatment. The best methods to raise awareness are through journals and plastic surgery residency.

Level of evidence v

This journal requires that authors assign a level of evidence to each article. For a full description of these Evidence-Based Medicine ratings, please refer to the Table of Contents or the online Instructions to Authors https://www.springer.com/00266 .","hji,kes",0,0,0,2,0,NA,clinical +34043002,Identification of evolutionarily stable functional and immunogenic sites across the SARS-CoV-2 proteome and greater coronavirus family.,"

Motivation

Since the first recognized case of COVID-19, more than 100 million people have been infected worldwide. Global efforts in drug and vaccine development to fight the disease have yielded vaccines and drug candidates to cure COVID-19. However, the spread of SARS-CoV-2 variants threatens the continued efficacy of these treatments. In order to address this, we interrogate the evolutionary history of the entire SARS-CoV-2 proteome to identify evolutionarily conserved functional sites that can inform the search for treatments with broader coverage across the coronavirus family.

Results

Combining coronavirus family sequence information with the mutations observed in the current COVID-19 outbreak, we systematically and comprehensively define evolutionarily stable sites that may provide useful drug and vaccine targets and which are less likely to be compromised by the emergence of new virus strains. Several experimentally-validated effective drugs interact with these proposed target sites. In addition, the same evolutionary information can prioritize cross reactive antigens that are useful in directing multi-epitope vaccine strategies to illicit broadly neutralizing immune responses to the betacoronavirus family. Although the results are focused on SARS-CoV-2, these approaches stem from evolutionary principles that are agnostic to the organism or infective agent.

Availability

The results of this work are made interactively available at http://cov.lichtargelab.org.

Supplementary information

Supplementary data are available at Bioinformatics online.","hji,kes",0,0,0,2,0,NA,NA +34100240,Providing a Second Opinion to Dr. Google with the WWW Framework.,"While clinicians are often aware that their patients seek second opinions, they are rarely taught specific skills for how to effectively communicate with patients when they are the ones providing that second opinion. The nuances of these skills are amplified when the second opinion being provided is to the ubiquitous (and often anonymous) Dr. Google. In this perspective, the authors share an approach for discussing apatient's pre-visit health-related internet findings. After emphasizing the importance of setting the stage, they describe the WWW Framework which proposes """"waiting"""" before responding with data, getting to the """"what"""" of the patient's search, and """"working together"""" to negotiate a plan. This stepwise approach is designed to provide psychological safety, build a therapeutic alliance, and empower collaborative treatment planning.","hji,kes",0,0,0,2,0,NA,NA +34123499,ML-SIM: universal reconstruction of structured illumination microscopy images using transfer learning.,"Structured illumination microscopy (SIM) has become an important technique for optical super-resolution imaging because it allows a doubling of image resolution at speeds compatible with live-cell imaging. However, the reconstruction of SIM images is often slow, prone to artefacts, and requires multiple parameter adjustments to reflect different hardware or experimental conditions. Here, we introduce a versatile reconstruction method, ML-SIM, which makes use of transfer learning to obtain a parameter-free model that generalises beyond the task of reconstructing data recorded by a specific imaging system for a specific sample type. We demonstrate the generality of the model and the high quality of the obtained reconstructions by application of ML-SIM on raw data obtained for multiple sample types acquired on distinct SIM microscopes. ML-SIM is an end-to-end deep residual neural network that is trained on an auxiliary domain consisting of simulated images, but is transferable to the target task of reconstructing experimental SIM images. By generating the training data to reflect challenging imaging conditions encountered in real systems, ML-SIM becomes robust to noise and irregularities in the illumination patterns of the raw SIM input frames. Since ML-SIM does not require the acquisition of experimental training data, the method can be efficiently adapted to any specific experimental SIM implementation. We compare the reconstruction quality enabled by ML-SIM with current state-of-the-art SIM reconstruction methods and demonstrate advantages in terms of generality and robustness to noise for both simulated and experimental inputs, thus making ML-SIM a useful alternative to traditional methods for challenging imaging conditions. Additionally, reconstruction of a SIM stack is accomplished in less than 200 ms on a modern graphics processing unit, enabling future applications for real-time imaging. Source code and ready-to-use software for the method are available at http://ML-SIM.github.io.","hji,kes",0,0,0,2,0,NA,NA +34135895,Transcriptomic Signature Differences Between SARS-CoV-2 and Influenza Virus Infected Patients.,"The reason why most individuals with COVID-19 have relatively limited symptoms while other develop respiratory distress with life-threatening complications remains unknown. Increasing evidence suggests that COVID-19 associated adverse outcomes mainly rely on dysregulated immunity.Here, we compared transcriptomic profiles of blood cells from 103 patients with different severity levels of COVID-19 with that of 27 healthy and 22 influenza-infected individuals. Data provided a complete overview of SARS-CoV-2-induced immune signature, including a dramatic defect in IFN responses, a reduction of toxicity-related molecules in NK cells, an increased degranulation of neutrophils, a dysregulation of T cells, a dramatic increase in B cell function and immunoglobulin production, as well as an important over-expression of genes involved in metabolism and cell cycle in patients infected with SARS-CoV-2 compared to those infected with influenza viruses. These features also differed according to COVID-19 severity. Overall and specific gene expression patterns across groups can be visualized on an interactive website (https://bix.unil.ch/covid/). Collectively, these transcriptomic host responses to SARS-CoV-2 infection are discussed in the context of current studies, thereby improving our understanding of COVID-19 pathogenesis and shaping the severity level of COVID-19.","hji,kes",0,0,0,2,0,NA,iffy - clinical but gene; seems to be viz only +34141842,"Data on the present and future distribution of suitable niches of the black vanilla orchid (Nigritella nigra s.l., Orchidaceae) and its pollinators.","The black vanilla orchid (Nigritella nigra s.l.) is a perennial plant found in the main European mountain ranges. It occurs in large numbers in the Alps, but it has become a rare and endangered species in Scandinavia due to the loss of suitable habitats. Here we present occurrence data on the occurrence of N. nigra s.l. and pollinators of this species which were used to evaluate the impact of climate change on the future distribution of the black vanilla orchid and its pollen vectors. Moreover, the values of bioclimatic variables for each locality are provided. The binary distribution models of both, orchids and insects, created using ecological niche modeling (ENM) technique are presented together with the information about changes in the coverage of suitable niches of studied organisms. Our data were used to evaluate the impact of climate change on orchid and its pollinator (https://doi.org/10.1016/j.gecco.2021.e01560) and datasets can be reused in other research on past and future distribution of suitable niches of the black vanilla orchid and its pollinators as well as in other biogeographical studies. Moreover, presented outcomes of research can be useful in establishing conservation plans for montane orchids and their pollinators.","hji,kes",0,0,0,2,0,NA,paper is about data but link is to a paper… +34186186,"In silico modelling of acute toxicity of 1, 2, 4-triazole antifungal agents towards zebrafish (Danio rerio) embryos: Application of the Small Dataset Modeller tool.","Nowadays, there is a widespread use of triazole antifungal agents to kill broad classes of fungi in farming lands and to protect herbs, fruits and grains. These agents further deposit into the aquatic systems causing toxicity to the living aquatic creatures, which can then affect human beings. Considering this issue, risk assessment of these toxic chemicals is a very essential task. Due to the inadequate experimental data on acute toxicity of antifungal agents containing the 1, 2, 4-triazole ring, higher testing costs along with the regulatory restrictions and the international regulations to lessen animal testing emphasize on in silico techniques such as quantitative structure-activity relationship (QSAR) studies. The application of QSAR modelling has created an easier avenue to predict activity/property/toxicity of newly synthesized compounds. In the present study, we have used 23 antifungal agents containing the 1, 2, 4-triazole ring to develop 2D-QSAR models and explored their structural attributes crucial for acute toxicity towards embryonic phase of zebrafish (Danio rerio). Here, we have employed simple 2D descriptors to develop the QSAR models. The models were evolved by executing the Small Dataset Modeller tool (https://dtclab.webs.com/software-tools), and the validation of the models was achieved by employing different precise validation principles. The statistical validation metrics confirm that built models are robust, useful and well predictive to forecast the acute toxicity of new compounds.","hji,kes",0,0,0,2,0,NA,NA +34193950,Identification of subtypes of anticancer peptides based on sequential features and physicochemical properties.,"Anticancer peptides (ACPs) are a kind of bioactive peptides which could be used as a novel type of anticancer drug that has several advantages over chemistry-based drug, including high specificity, strong tumor penetration capacity, and low toxicity to normal cells. As the number of experimentally verified bioactive peptides has increased significantly, various of in silico approaches are imperative for investigating the characteristics of ACPs. However, the lack of methods for investigating the differences in physicochemical properties of ACPs. In this study, we compared the N- and C-terminal amino acid composition for each peptide, there are three major subtypes of ACPs that are defined based on the distribution of positively charged residues. For the first time, we were motivated to develop a two-step machine learning model for identification of the subtypes of ACPs, which classify the input data into the corresponding group before applying the classifier. Further, to improve the predictive power, the hybrid feature sets were considered for prediction. Evaluation by five-fold cross-validation showed that the two-step model trained with sequence-based features and physicochemical properties was most effective in discriminating between ACPs and non-ACPs. The two-step model trained with the hybrid features performed well, with a sensitivity of 86.75%, a specificity of 85.75%, an accuracy of 86.08%, and a Matthews Correlation Coefficient value of 0.703. Furthermore, the model also consistently provides the effective performance in independent testing set, with sensitivity of 77.6%, specificity of 94.74%, accuracy of 88.99% and the MCC value reached 0.75. Finally, the two-step model has been implemented as a web-based tool, namely iDACP, which is now freely available at http://mer.hc.mmh.org.tw/iDACP/ .","hji,kes",0,0,0,2,0,NA,NA +34210950,Exploration of the Important Role of Microfibril-Associated Protein 4 Gene in Oral Squamous Cell Carcinoma.,"BACKGROUND Oral squamous cell carcinoma (OSCC) is a common tumor of the head and neck. Its treatment usually requires multiple modalities. Currently, there are no molecular biomarkers to guide these treatment strategies. Studies have shown that microfibril-associated protein 4 (MFAP4) is potentially useful for non-invasive assessment of various diseases; however, its biological function in tumors is still unknown. In this study, we propose that MFAP4 is a new prognostic target for OSCC. MATERIAL AND METHODS First, we collected OSCC data (GSE25099 and GSE30784 datasets) from the Gene Expression Omnibus (GEO) database and compared the differential expression of MFAP4 gene between the patients (tumor) and normal (control) groups. The comparison was done with University of California Santa Cruz Xena (https://xenabrowser.net/Datapages/), and we calculated the difference in MFAP4 gene expression between normal and tumor tissues in a pan-cancer analysis. Then, we compared the 2 groups with high and low expression of MFAP4 gene in terms of tumor mutation burden (TMB), miRNA regulation, and immune cell infiltration. RESULTS We found that the expression of MFAP4 gene was significantly decreased in tumors. Our research also showed that high expression of MFAP4 was related to better prognosis of patients and may be related to tumor gene mutation, miRNA regulation, and infiltration of different immune cells. CONCLUSIONS Our work provides evidence that expression of MFAP4 can be used as a prognostic biomarker for risk stratification of OSCC patients and elaborates on its relation with the regulation of TMB, miRNAs, and immune cell infiltration.","hji,kes",0,0,0,2,0,NA,references other data resource +34263466,Prediction of overall survival in patients with Stage I esophageal cancer: A novel web-based calculator.,"

Background and aims

In this study, we aimed to develop a convenient web-based calculator to predict the overall survival (OS) of patients with Stage I esophageal cancer (EC).

Methods

Data of 1664 patients, between 2004 and 2015, were extracted from the Surveillance, Epidemiology, and End Results database. Least absolute shrinkage and selection operator regression was employed to sift variables; subsequently, Cox proportional hazards regression model was built. We applied the enhanced bootstrap validation to appraise the discrimination and calibration of the model. Clinical benefit was measured using decision curve analysis (DCA). Thereafter, a web-based calculator based on the model, which could be used to predict the 1-, 3-, and 5-year OS rates, was developed.

Results

Race, age, histologic type, grade, N stage, and therapeutic methods were selected. C-indices of the prediction model in the training and validation groups were 0.726 (95% confidence interval [CI], 0.679-0.773) and 0.724 (95% CI, 0.679-0.769), respectively. Calibration curves showed good agreement between the groups. The DCA demonstrated that the prediction model is clinically useful.

Conclusions

The prediction model we developed showed a good performance in calculating the OS rates in patients with Stage I EC. The web-based calculator is available at https://championship.shinyapps.io/dynnomapp/.","hji,kes",0,0,0,2,0,NA,NA +34266288,CGRP measurements in human plasma - a methodological study.,"

Background

Calcitonin gene-related peptide plasma levels have frequently been determined as a biomarker for primary headaches. However, published data is often inconsistent resulting from different methods that are not precisely described in most studies.

Methods

We applied a well-proven enzyme-linked immunosorbent assay to measure calcitonin gene-related peptide concentrations in human blood plasma, we modified parameters of plasma preparation and protein purification and used calcitonin gene-related peptide-free plasma for standard solutions, which are described in detail.

Results

Calcitonin gene-related peptide levels are stable in plasma with peptidase inhibitors and after deep-freezing. Calcitonin gene-related peptide standard solutions based on synthetic intercellular fluid or pooled plasma with pre-absorbed calcitonin gene-related peptide influenced the measurements but yielded both comprehensible results. In a sample of 56 healthy subjects the calcitonin gene-related peptide plasma levels varied considerably from low (<50 pg/mL) to very high (>500 pg/mL) values. After a 12-hour exposure of these subjects to normobaric hypoxia the individual calcitonin gene-related peptide levels remained stable.

Conclusion

Buffering with peptidase inhibitors and immediate freezing or processing of plasma samples is essential to achieve reliable measurements. Individuals show considerable differences and partly high calcitonin gene-related peptide plasma levels without detectable pathological reason. Thus plasma measurements are suited particularly to follow calcitonin gene-related peptide levels in longitudinal studies.The use of data for this study was approved by the Ethics Committee of the MedicalUniversity of Innsbruck (https://www.i-med.ac.at/ethikkommission/; EK Nr: 1242/2017).","hji,kes",0,0,0,2,0,NA,NA +34282687,Glucosinolate-Enriched Fractions from Maca (Lepidium meyenii) Exert Myrosinase-Dependent Cytotoxic Effects against HepG2/C3A and HT29 Tumor Cell Lines.,"The consumption of glucosinolate (GL)-rich foods, including Brassica vegetables, such as mustard, broccoli, and maca, is associated with decreased risk of developing cancer. The GL content in maca, which is recognized as a """"superfood"""", is approximately 100-times higher than that in other brassicas. Although maca is a potential dietary source of GLs, limited studies have examined the bioactivity of maca GLs using the combination of chemical characterization and bioassays. In this study, the fractions (Lm-II and Lm-III) rich in intact GLs (glucotropaeolin and glucolimnanthin) were isolated and characterized from maca ethanolic extracts using chromatography and mass spectrometry. Additionally, the growth-inhibitory effects of Lm-II and Lm-II fractions against hepatocellular carcinoma (HepG2/C3A) and colon adenocarcinoma (HT29) cell lines were examined in the absence or presence of myrosinase (MYR). Fractions lacking low molecular weight sugars dose-dependently exerted cytotoxic effects in the presence of MYR. The half-maximal inhibitory concentration values of Lm-II and Lm-III against HepG2/C3A were 118.8 and 69.9 g/mL, respectively, while those against HT29 were 102.6 and 71.5 g/mL, respectively. These results suggest that the anticancer properties of maca can be attributed to GLs and corroborate the categorization of maca as a """"superfood.""""Supplemental data for this article is available online at https://doi.org/10.1080/01635581.2021.1952444.","hji,kes",0,0,0,2,0,NA,NA +34296017,Individual and community-level determinants of Iron-Folic Acid Intake for the recommended period among pregnant women in Ethiopia: A multilevel analysis.,"

Background

Iron-folic acid (IFA) intake for the recommended period during pregnancy reduces the risk of anemia and congenital anomalies. However, IFA intake for the recommended period is still very low in low-income countries including Ethiopia. Thus, the aim of this study was to assess both individual-and community-level determinants of IFA intake for the recommended period among pregnant women in Ethiopia.

Methods

Data were retrieved from the Demographic and Health Survey program's official database website (http://dhsprogram.com). A two-stage stratified cluster sampling technique was employed to conduct the 2016 Ethiopian Demographic and Health Survey. A sample of 3088 pregnant women who had received at least one dose of IFA in Ethiopia were included in this study. A multivariable multilevel logistic regression analysis model was fitted to identify the determinants of IFA intake below the recommended period [< 90 days] during pregnancy. Akaike's Information Criterion (AIC) was used during the model selection procedure.

Results

This study revealed that 87.6% [95% CI; 86.3%, 88.6%] of the women took IFA below the recommended period during the index pregnancy. After adjusting for the covariates: living in rural areas [AOR = 1.74: 95% CI 1.37, 2.50], and women's illiterate proportion [AOR = 1.43: 95% CI 1.06, 1.70] were community level factors. Whereas, primary education level [AOR = 0.63: 95% CI 0.40, 0.78], poorer wealth index [AOR = 1.53: 95% CI 1.08, 3.09], 4 + antenatal care visits [AOR = 0.43: 95% CI 0.31, 0.69], and receive nutritional counseling during pregnancy [AOR = 0.63: 95% CI 0.37, 0.84] were the individual-level factors of IFA intake below the recommended period during pregnancy.

Conclusions

In this study, nearly nine out of ten pregnant women did not take IFA for the recommended period. Thus, promoting recommended ANC visits, enhancing the quality of nutritional counseling, strengthening the expansion of media, and educate rural women towards the importance of optimal intake of IFA during pregnancy. Besides, the policymakers should design essential strategies based on identified barriers to improve the IFA intake for the recommended period.","hji,kes",0,0,0,2,0,NA,NA +34330087,ARTS: A novel In-vivo classifier of arteriolosclerosis for the older adult brain.,"Brain arteriolosclerosis, one of the main pathologies of cerebral small vessel disease, is common in older adults and has been linked to lower cognitive and motor function and higher odds of dementia. In spite of its frequency and associated morbidity, arteriolosclerosis can only be diagnosed at autopsy. Therefore, the purpose of this work was to develop an in-vivo classifier of arteriolosclerosis based on brain MRI. First, an ex-vivo classifier of arteriolosclerosis was developed based on features related to white matter hyperintensities, diffusion anisotropy and demographics by applying machine learning to ex-vivo MRI and pathology data from 119 participants of the Rush Memory and Aging Project (MAP) and Religious Orders Study (ROS), two longitudinal cohort studies of aging that recruit non-demented older adults. The ex-vivo classifier showed good performance in predicting the presence of arteriolosclerosis, with an average area under the receiver operating characteristic curve AUC=0.78. The ex-vivo classifier was then translated to in-vivo based on available in-vivo and ex-vivo MRI data on the same participants. The in-vivo classifier was named ARTS (short for ARTerioloSclerosis), is fully automated, and provides a score linked to the likelihood a person suffers from arteriolosclerosis. The performance of ARTS in predicting the presence of arteriolosclerosis in-vivo was tested in a separate, 91% dementia-free group of 79 MAP/ROS participants and exhibited an AUC=0.79 in persons with antemortem intervals shorter than 2.4years. This level of performance in mostly non-demented older adults is notable considering that arteriolosclerosis can only be diagnosed at autopsy. The scan-rescan reproducibility of the ARTS score was excellent, with an intraclass correlation of 0.99, suggesting that application of ARTS in longitudinal studies may show high sensitivity in detecting small changes. Finally, higher ARTS scores in non-demented older adults were associated with greater decline in cognition two years after baseline MRI, especially in perceptual speed which has been linked to arteriolosclerosis and small vessel disease. This finding was shown in a separate group of 369 non-demented MAP/ROS participants and was validated in 72 non-demented Black participants of the Minority Aging Research Study (MARS) and also in 244 non-demented participants of the Alzheimer's Disease Neuroimaging Initiative 2 and 3. The results of this work suggest that ARTS may have broad implications in the advancement of diagnosis, prevention and treatment of arteriolosclerosis. ARTS is publicly available at https://www.nitrc.org/projects/arts/.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +34337710,MiR-1269a acts as an onco-miRNA in non-small cell lung cancer via down-regulating SOX6.,"The article """"MiR-1269a acts as an onco-miRNA in non-small cell lung cancer via down-regulating SOX6, by R.-H. Jin, D.-J. Yu, M. Zhong, published in Eur Rev Med Pharmacol Sci 2018; 22 (15): 4888-4897- DOI: 10.26355/eurrev_201808_15625-PMID: 30070324"""" has been withdrawn from the authors to some technical reasons (there are some errors and incorrect data). The Publisher apologizes for any inconvenience this may cause. https://www.europeanreview.org/article/15625.","hji,kes",0,0,0,2,0,NA,NA +IND44720920,"8th International Food Data Conference: Quality food composition data, key for health and trade","The 8th International Food Data Conference, with the main theme of Quality food composition data: key for health and trade, was organised by the Institute of Nutrition, Mahidol University, Thailand, from 1 to 3 October 2009, under the auspices of the International Network of Food Data System (INFOODS) and the International Union of Nutritional Sciences (IUNS) Task Force. Over 140 delegates from 43 countries attended the conference, which included 2 keynote addresses, 8 special lectures, 32 oral and 80 poster presentations. The conference programme, abstracts of oral and poster presentations, power point slide shows of oral presentations and the Bangkok Declaration are all available on the conference website: http://www.inmu.mahidol.ac.th/8ifdc/. The conference allowed participants to disseminate up-to-date knowledge and the latest information pertaining to food composition databases, to exchange knowledge and experience and to discuss issues of mutual interest.","hji,kes",0,0,0,2,0,NA,NA +IND500739467,"Revision of the Cales noacki species complex (Hymenoptera, Chalcidoidea, Aphelinidae)","The genus Cales (Hymenoptera: Aphelinidae) includes 13 species worldwide, of which 10 form a highly morphologically uniform species complex with a native range in the Neotropical region. We recognize ten species previously attributed to a single Neotropical species, Cales noacki Howard, which in the strict sense is a species broadly disseminated to control woolly whitefly. A neotype is designated for C. noacki, and it is redescribed based on specimens molecularly determined to be conspecific with the neotype. Newly described species include: C. bicolor Mottern, n.sp., C. breviclava Mottern, n.sp., C. brevisensillum Mottern n.sp., C. curvigladius Mottern, n.sp., C. longiseta Mottern, n.sp., C. multisensillum Mottern n.sp., C. noyesi Mottern, n.sp., C. parvigladius Mottern, n.sp. and C. rosei Mottern, n.sp. Species are delimited based on a combination of morphological and molecular data (28S-D2 rDNA and COI). Additional specimens are included in the phylogenetic analyses and although these likely represent several new species, we lack sufficient specimen sampling to describe them at this time. Cales are highly morphologically conserved and character-poor, resulting in several cryptic species. A molecular phylogeny of the known Neotropical species based on 28S-D25 rDNA and a 390-bp segment of COI is included, and identification keys to males and females are provided. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:7FEB0479-9B2E-48E8-8603-4B7C2759D4EC.","hji,kes",0,0,0,2,0,NA,in ZooBank +IND604793137,"Re‐evolution of a morphological precursor of crypsis investment in the newly revised horned praying mantises (Insecta, Mantodea, Vatinae)","The Neotropical praying mantis tribe Vatini Stl is revised using total evidence phylogenetic analysis based on molecular and coded morphological data. The subfamily Vatinae is redefined to only include Neotropical taxa with the removal of distantly related African and Asian lineages. A new tribe is erected under Vatinae (Heterovatini trib.n.) for two unique genera with historically unstable taxonomic placement (Heterovates Saussure and Chopardiella Giglio-Tos). Phylogenetic results and morphology support the synonymy of three genera (Lobovates Deeleman-Reinhold, Phyllovates Kirby, and Hagiotata Saussure & Zehntner) and the validity of Chopardiella Giglio-Tos, Heterovates Saussure, Callivates Roy, Pseudovates Saussure, Vates Burmeister, and Zoolea Audinet Serville. A new genus (Alangularis gen.n.) is created for a former species of Vates with unique morphology and separate phylogenetic placement. All genera are redescribed based on external morphology and the male genital complex. A key to genera for Vatinae is provided with dorsal habitus images of representatives for each genus. A distinct pattern of correlated evolution of morphological characters linked to crypsis was uncovered. Cuticular leg lobes within single leg segments are evolving as sets, and serially homologous lobes appear simultaneously or in close succession. The posteroventral lobes in the apical position on thoracic femora appear to be the precursors to multiple positive rate shifts in the evolutionary accumulation of cryptic features. One shift occurred early in the evolution of Vatinae while the second occurred much later, after the loss and re-evolution of the posteroventral lobes in the apical position on thoracic femora, a violation of Dollo's law. This published work has been registered in ZooBank, http://zoobank.org/urn:lsid:zoobank.org:pub:724C16AF-069A-46A1-B66C-007D8DE18C68.","hji,kes",0,0,0,2,0,NA,data deposited as referenced +IND605427586,"Protein, fat, moisture and cooking yields from a U.S. study of retail beef cuts","Nutrient data from the U.S. Department of Agriculture (USDA) are an important resource for U.S. and international databases. To ensure that data for retail beef cuts in USDA's National Nutrient Database for Standard Reference (SR) are current, a comprehensive, nationwide, multi-phase study was conducted. Samples were collected and analyzed in three phases based on primal category. Using a statistically based sampling plan, 72 beef carcasses per phase were obtained with nationally representative quality and yield grades, genders and genetic types. Retail cuts were fabricated, cooked and dissected to obtain component weights. Nutrient values were determined by validated laboratories using quality assurance procedures. Full nutrient profiles were made available in SR (http://www.ars.usda.gov/nutrientdata). Results for 16 beef retail cuts were compared for cooking yield and protein, fat and moisture concentrations. For example, cooked fat levels differed among three roasted cuts and among three grilled cuts from chuck, rib and loin (p<0.01). Cooking yield for roasted ribeye (76%) was lower (p<0.001) than for grilled ribeye (83%) or for chuck eye grilled (80%) or roasted (84%). This study demonstrates the importance of maintaining data for a variety of retail beef cuts due to their unique properties and different cooking methods.","hji,kes",0,0,0,2,0,NA,references other data resource +IND605724905,Omics analysis of acetic acid tolerance in Saccharomyces cerevisiae,"Acetic acid is an inhibitor in industrial processes such as wine making and bioethanol production from cellulosic hydrolysate. It causes energy depletion, inhibition of metabolic enzyme activity, growth arrest and ethanol productivity losses in Saccharomyces cerevisiae. Therefore, understanding the mechanisms of the yeast responses to acetic acid stress is essential for improving acetic acid tolerance and ethanol production. Although 329 genes associated with acetic acid tolerance have been identified in the Saccharomyces genome and included in the database (http://www.yeastgenome.org/observable/resistance_to_acetic_acid/overview), the cellular mechanistic responses to acetic acid remain unclear in this organism. Post-genomic approaches such as transcriptomics, proteomics, metabolomics and chemogenomics are being applied to yeast and are providing insight into the mechanisms and interactions of genes, proteins and other components that together determine complex quantitative phenotypic traits such as acetic acid tolerance. This review focuses on these omics approaches in the response to acetic acid in S. cerevisiae. Additionally, several novel strains with improved acetic acid tolerance have been engineered by modifying key genes, and the application of these strains and recently acquired knowledge to industrial processes is also discussed.","hji,kes",0,0,0,2,0,NA,NA +IND605743169,Estimating Thermodynamic Properties of Pure Triglyceride Systems Using the Triglyceride Property Calculator,"To date, the most comprehensive model for predicting thermodynamic properties of pure triglycerides was presented by Wesdorp in Liquid-multiple solid phase equilibria in fats: theory and experiments (1990). In this paper, we present (1) corrections to the published model, as well as (2) a software implementation of the model for numerical assessment. The software tool, Triglyceride Property Calculator (TPC), uses a semi-empirical model to estimate the enthalpy of fusion and melting temperature for a given triglyceride based on its molecular composition and polymorphic form. These estimates are compared to experimentally collected data when available. The web application is available at http://www.crcfoodandhealth.com (under research tools) and through the AOCS Lipid Library. The quality of estimates is characterized according to defined counting metrics and presented for TAG subcategories. Additionally, the extrapolative value of the TPC is assessed by checking for consistency with underlying thermodynamic constraints. The current TPC implementation is effective in describing experimentally collected melting point data, with greater than 91% of the fitted values falling within 10% of the actual data. The TPC is also very good at describing collected enthalpy data. The underlying semi-empirical model and parameter set perform well in ensuring enthalpy predictions are thermodynamically consistent, however, extrapolated melting temperatures appear unreliable. Developing models and parameter sets that ensure thermodynamic consistency is a priority with future TPC iterations.","hji,kes",0,0,0,2,0,NA,NA +IND605918011,Genetic diversity of endangered orchid Phaius australis across a fragmented Australian landscape,"Historical events such as colonisation, spatial distribution across different habitats, and contemporary processes, such as human-mediated habitat fragmentation can leave lasting imprints on the population genetics of a species. Orchids currently comprise 17% of threatened flora species in Australia (Environment Protection and Biodiversity Conservation Act 1999) due to the combination of fragmentation and illegal harvesting (Benwell in Recovery plan, swamp orchids Phaius australis, Phaius tancarvilliae, NSW National Parks and Wildlife Service, Sydney, 1994; Jones in A complete guide to native orchids of Australia including the island territories, 2ndedn, Reed Natural History, Sydney, 2006; DE in Phaius australis in species profile and threats database, Department of the Environment. http://www.environment.gov.au/sprat , 2015). The federally endangered Swamp Orchid Phaius australis has a disjunct distribution across an almost 2000km latitudinal range along Australias east coast but it was estimated that 95% of the populations have been lost since European settlement (Benwell 1994). Phaius australis is endangered due to illegal collection and habitat loss that has resulted in limited connectivity between populations, in ecosystems that are vulnerable to climate change. Thus the genetic impacts of its history combined with more recent fragmentation may have impacts on its future viability especially in light of changing environmental conditions. Thirty-four populations were sampled from tropical north Queensland to the southern edge of the subtropics in New South Wales. Population genetics analysis was conducted using 13 polymorphic microsatellite markers developed for the species using NextGen sequencing. Spatial genetic patterns indicate post-colonisation divergence from the tropics southwards to its current climate niche limits. Genetic diversity is low across all populations (A = 1.5, H =0.171), and there is little evidence of genetic differentiation between regions. Consistent with population genetic theory, the historic loss of populations has resulted in significantly lower genetic diversity in small populations compared to large (P, A, He; p < 0.05). The viability and persistence of P. australis populations now and in a changing climate are discussed in the context of conservation priorities.","hji,kes",0,0,0,2,0,NA,NA +IND606427025,First Report of Sugarcane Yellow Leaf Disease in Mexico and Detection of ‘Candidatus Phytoplasma asteris’-Related Strains in Affected Plants,"Sugarcane is a common name for any of several tall perennial grass species of the genus Saccharum. As a major source for sugar production and an efficient feedstock for biofuel generation, sugarcane is widely cultivated in tropical and subtropical regions. Mexico alone produces over six million metric tons of cane sugar annually valued at 1.3 billion U.S. dollars. Although economically important, sugarcane is susceptible to devastating diseases caused by phloem-colonizing, cell wall-less bacteria known as phytoplasmas. Different sugarcane phytoplasmal diseases around the world have been attributed to diverse phytoplasmas belonging to six mutually distinct Candidatus Phytoplasma species (Marcone 2002). During the 2015 to 2016 growing season, sugarcane plants exhibiting leaf discolorations (white and yellow streaks) indicative of sugarcane yellow leaf (ScYL) disease were observed in a sugarcane field in Cosamaloapan, Veracruz, Mexico, with less than 1% of the plants in the field being affected. Leaf samples were collected from three symptomatic and three asymptomatic plants in the same field. Total DNA was extracted from leaf midribs using a modified cetyltrimethylammonium bromide method (Perez-Lopez et al. 2016). A preliminary diagnostic assay was carried out using direct polymerase chain reactions (PCRs) with phytoplasma-specific primer pair R16F2n/R16R2. All PCR assays with DNA templates from symptomatic plants produced a phytoplasma-characteristic amplicon of 1.25 kb. No amplicon was detected from samples of asymptomatic plants. The DNA samples from symptomatic plants were subjected to further PCR analysis with primer pair P1A/16S-SR as previously described (Wei et al. 2011). All three samples were PCR positive, and each yielded a 1,539-bp amplicon. The amplicons were cloned and sequenced with at least 6 coverage per base position. DNA sequence analysis confirmed that the amplicon represented a near-full-length 16S rRNA gene and a partial 16S-23S RNA gene intergenic spacer. The obtained DNA sequences were deposited into GenBank (accession nos. MH891144 through MH891146). Analysis of the sequences through the iPhyClassifier (Zhao et al. 2009, https://plantpathology.ba.ars.usda.gov/cgi-bin/resource/iphyclassifier.cgi) revealed that the phytoplasmas detected in the ScYL-diseased plants were closely related to the reference strain of Candidatus Phytoplasma asteris (aster yellows phytoplasma, >99.4% sequence similarity). Although the three 16S rRNA gene sequences differed from each other by two to five bases, all possessed the entire set of signature sequences (G196GGAGGA202, C444TGACGGTACC454, and C976ACAGTGGA GGTTATCAGTTG996) of Ca. P. asteris (Lee et al. 2004), indicating the three ScYL phytoplasma strains are sequevars (Davis et al. 2015) affiliated with a single phytoplasma species. To our knowledge, this is the first report of phytoplasmal ScYL disease in Mexico. Associations of aster yellows phytoplasma with sugarcane diseases were previously reported in Cuba and Brazil (Arocha et al. 1999; Silva et al. 2009). Recently, infection of aster yellows phytoplasma in weedy grass growing near sugarcane fields in Mexico was noted (Perez-Lopez and Dumonceaux 2017). Findings of aster yellows phytoplasma infecting sugarcane in the countries of Brazil, Cuba, and now Mexico (this report) underscore the need for disease surveillance of sugarcane in neighboring countries, because insect vectors capable of spreading aster yellows phytoplasma strains are known to be present over wide areas, including the Caribbean countries and the United States.","hji,kes",0,0,0,2,0,NA,NA +IND606755271,Visual features based automated identification of fish species using deep convolutional neural networks,"Morphological based fish species identification is an erroneous and time-consuming process. There are numerous fish species and due to their close resemblance with each other, it is difficult to classify them by external characters. Recently, computer vision and deep learning-based identification of different animal species is being widely used by the researchers. Convolutional Neural Network (CNN) is one of the most analytically powerful tools in deep learning architecture for the image classification based on visual features. This work aims to propose a deep learning framework based on the CNN method for fish species identification. The proposed CNN architecture contains 32 deep layers that are considerably deep to derive valuable and discriminating features from the image. The deep supervision is inflicted on the VGGNet architecture to increase the classification performance by instantly adding four convolutional layers to the training of each level in the network. To test the performance of proposed 32-Layer CNN architecture, we developed a dataset termed as Fish-Pak and is publicly available at Mendeley data (Fish-Pak: https://doi.org/10.17632/n3ydw29sbz.3#folder-6b024354-bae3-460a-a758-352685ba0e38). Fish-Pak contains 915 images with six distinct classes; Ctenopharyngodon idella (Grass carp), Cyprinus carpio (Common carp), Cirrhinus mrigala (Mori), Labeo rohita (Rohu), Hypophthalmichthys molitrix (Silver carp), and Catla catla (Thala) and three different image views (head region, body shape, and scale). To ensure the superior performance of proposed CNN architecture, we have carried out the experimental comparison with other deep learning frameworks involving VGG-16 for transfer learning, one block VGG, two block VGG, three block VGG, LeNet-5, AlexNet, GoogleNet, and ResNet-50 on the Fish-Pak data set. Comprehensive empirical analyses reveal that the proposed method achieves state of the art performance and outperforms existing methods.","hji,kes",0,0,0,2,0,NA,mendley data +IND606827856,Occurrence of the Stunt Nematode Neodolichorhynchus sulcatus as Pathogen of Pepper (Capsicum annuum) in Israel,"Stunt nematodes, family Dolichodoridae, are migratory ectoparasites of roots. They feed on epidermal cells by inserting only the stylet tip into the cells surface tissue. As one of the less common stunt nematodes, little is known about Neodolichorhynchus sulcatus (de Guiran, 1967) Jairajpuri & Hunt, 1984. To date, it has been reported from Morocco (de Guiran 1967), Spain (Tobar-Jimenez 1970), Cameroon (Sakwe and Geraert 1991), India (Sultan et al. 1995), and Iran (Pourjam et al. 2011). This is the first detection of N. sulcatus infecting pepper (variety 1204 Alef Beit Zeraim) root from a farm in Ein Yahav, Arava Rift, Israel (3039'54.3''N, 3515'02.9''E). In March 2019, we observed decline in development of pepper plants. We recovered nematodes from the pepper root zone at high population density. Nematodes were extracted from the soil using the Baermann tray technique. Identification was based on a combination of molecular and morphological methods. Genomic DNA was extracted from a single fresh nematode; amplicons from 18S rRNA, 28S D2 to D3 rRNA, and the intergenic spacer (ITS) region were generated following the procedure detailed in Qing et al. (2019); and sequencing was performed. The sequences were submitted to GenBank with accession numbers MK96525 to MK965256 (28S), MK965252 (18S), and MK965249 and MK965250 (ITS), providing the first rRNA data for N. sulcatus. Phylogenetic analysis placed this species as sister to Bitylenchus iphilus in 18S (98.15% similar in BLAST), sister to Paratrophurus bhutanensis in 28S (91.68% similar in BLAST), or sister to a well-supported clade containing B. iphilus (91.35% similar in BLAST, highest match), B. maximus, B. hispaniensis, P. bhutanensis, and P. bursifer in ITS (figures for phylogeny trees available at https://photos.app.goo.gl/2Y7fToEftBopbnB67). Although our species were not clustered with other Neodolichorhynchus species, morphology and morphometry confirmed its identity as N. sulcatus, including the cephalic region offset, basal bulb pyriform, a cuticle with 16 longitudinal ridges including the lateral fields, irregularly areolated lateral fields with three ridges, bursa not notched at tail tip, gubernaculum with smooth proximal end, and vulva lacking lateral flaps, tail cylindroid-conical with rounded and smooth terminus. Measurements (mean standard error, range, in m) for female (n = 10): body length 801 49.4 (731 to 901), anterior end to vulva 435 34.8 (410 to 520), V value 54.2 1.69 (52.4 to 57.7), lip height 4.53 0.31 (4.39 to 5.41), stylet length 21.4 0.37 (20.6 to 21.8), cone of stylet 11.4 0.63 (10.7 to 12.8), anterior end to center of median bulb 86.0 3.53 (78.7 to 91.3), pharynx length 146 4.05 (139 to 153), maximum body width 25.6 1.00 (24 to 27), anus/cloacal width 15.7 1.39 (14.3 to 17.9), tail length 47.2 3.85 (43.4 to 56.5); for male (n = 7): body length 786 64.0 (704 to 874), lip height 4.34 0.55 (3.67 to 5.24), stylet length 20.4 0.73 (19.7 to 21.7), cone of stylet 10.9 0.82 (10.3 to 12.7), anterior end to median bulb 81.2 3.54 (75.2 to 87), pharynx length 138 6.66 (132 to 148), maximum body width 22.6 2.15 (20.4 to 25.8), anus/cloacal width 17.7 1.50 (15.2 to 19.9), tail length 54.7 5.67 (46.6 to 62.6), spicule length 26.3 1.21 (24.2 to 27.6). To confirm pathogenicity, we performed inoculation assays in greenhouse conditions. Individual nematodes were manually picked, and 300 juveniles were inoculated onto healthy pepper (variety Maccabi) roots growing in sterile soil in a greenhouse. Three pots with four pepper plants per pot were inoculated, and three noninoculated pots served as a control. Nematodes were harvested from the root and soil 40 days after inoculation, yielding an average of 483 75 nematodes per pot. We observed a reduction in plant growth and necrotic spots on the roots similar to those infected pepper in Ein Yahav. These results confirmed the nematodes pathogenicity to pepper. This is the first report of N. sulcatus infecting pepper plants in Israel.","hji,kes",0,0,0,2,0,NA,NA +IND607322953,First Report of Grapevine Red Globe Virus in Grapevines in Washington State,"Grapevine red globe virus (GRGV; genus Maculavirus, family Tymoviridae) has been reported in grapevines (Vitis spp.) from Italy, Greece, France, China, Spain, and Germany and in California, U.S.A. (Cretazzo et al. 2017; Fan et al. 2016; Ruiz-Garcia et al. 2018; Sabanadzovic et al. 2000). During surveys of grapevine nurseries, a total of 241 composite samples, each consisting of four petioles from mature leaves/vine from five asymptomatic grapevines, from 33 grapevine (Vitis vinifera) cultivars were collected. Total RNA isolated from these samples using a Spectrum Total RNA isolation kit (Sigma-Aldrich, St. Louis, MO) was subjected to high-throughput sequencing (HTS) on an Illumina HiSeq2500 or NovaSeq 6000 platform in paired-end mode (Genomics Core Facility, Huntsman Cancer Institute, Utah University, Salt Lake City, UT). After trimming raw reads based on quality and ambiguity, the paired-end quality reads of approximately 120 (HiSeq) or 145 (NovaSeq) base pair (bp) length were assembled de novo into a pool of contigs (CLC Genomics Workbench 12). These contigs were subjected to BLASTn analysis against the nonredundant virus database from GenBank (https://www.ncbi.nlm.nih.gov/blast). A total of 49 contig sequences, ranging from 200 to 1,645 bp in length with an average coverage ranging up to 418.7, aligning with GRGV genome, were detected in cultivars Aglianico, Cabernet franc, Pinot gris, and Riesling. BLASTn analysis of contigs greater than 500 bp in length showed sequence identity between 88.5 and 95% with corresponding GRGV sequences reported from other countries. These results indicated the presence of genetically distinct isolates of GRGV. HTS data also revealed coinfection of GRGV in all samples with one or more of the following viruses and/or viroids: grapevine rupestris stem pitting associated virus, grapevine rupestris vein feathering virus, hop stunt viroid, or grapevine yellow speckle viroid-1. To further confirm infection by GRGV, total RNA was extracted from two asymptomatic Pinot gris vines that previously tested positive in HTS using the Spectrum Total RNA isolation kit and subjected to reverse transcription PCR using primers specific to the replicase polyprotein gene of the virus (RG4847F, 5'-TGGTCTGTTGTTCGCATCTT-3'; RG6076R, 5' CGGAAGGGGAAGCATTGATCT-3' Cretazzo et al. 2017). Sequence analysis of the approximately 1,250-bp amplicons (accession no. MT749359) showed 91.2% nt sequence identity with a corresponding sequence of GRGV isolate from Brazil (KX828704.1). To our knowledge, this is the first report of GRGV in Washington State. Together with the report of the occurrence of GRGV in California (Sabanadzovic et al. 2000), these results indicate wide geographical distribution of the virus. Although GRGV can cause asymptomatic infections in grapevines (Martelli et al. 2002), the economic importance of GRGV as single or coinfections with other viruses needs to be examined to assess the potential significance of the virus to grape production and grapevine certification programs.","hji,kes",0,0,0,2,0,NA,NA +IND607338495,The Ocean barcode atlas: A web service to explore the biodiversity and biogeography of marine organisms,"The Ocean Barcode Atlas (OBA) is a user friendly web service designed for biologists who wish to explore the biodiversity and biogeography of marine organisms locked in otherwise difficult to mine planetary scale DNA metabarcode data sets. Using just a web browser, a comprehensive picture of the diversity of a taxon or a barcode sequence is visualized graphically on world maps and interactive charts. Interactive results panels allow dynamic threshold adjustments and the display of diversity results in their environmental context measured at the time of sampling (temperature, oxygen, latitude, etc). Ecological analyses such as alpha and beta-diversity plots are produced via publication quality vector graphics representations. Currently, the Ocean Barcode Altas is deployed online with the (i) Tara Oceans eukaryotic 18S-V9 rDNA metabarcodes; (ii) Tara Oceans 16S/18S rRNA Tags; and (iii) 16S-V4V5 metabarcodes collected during the Malaspina-2010 expedition. Additional prokaryotic or eukaryotic plankton barcode data sets will be added upon availability, given they provide the required complement of barcodes (including raw reads to compute barcode abundance) associated with their contextual environmental variables. Ocean Barcode Atlas is a freely-available web service at: http://oba.mio.osupytheas.fr/ocean-atlas/.","hji,kes",0,0,0,2,0,NA,visualization +PMC3498971,"BioNetwork Bench: Database and Software for Storage, Query, and Analysis of Gene and Protein Networks","Gene and protein networks offer a powerful approach for integration of the disparate yet complimentary types of data that result from high-throughput analyses. Although many tools and databases are currently available for accessing such data, they are left unutilized by bench scientists as they generally lack features for effective analysis and integration of both public and private datasets and do not offer an intuitive interface for use by scientists with limited computational expertise. We describe BioNetwork Bench, an open source, user-friendly suite of database and software tools for constructing, querying, and analyzing gene and protein network models. It enables biologists to analyze public as well as private gene expression; interactively query gene expression datasets; integrate data from multiple networks; store and selectively share the data and results. Finally, we describe an application of BioNetwork Bench to the assembly and iterative expansion of a gene network that controls the differentiation of retinal progenitor cells into rod photoreceptors. The tool is available from http://bionetworkbench.sourceforge.net/ Background The emergence of high-throughput technologies has allowed many biological investigators to collect a great deal of information about the behavior of genes and gene products over time or during a particular disease state. Gene and protein networks offer a powerful approach for integration of the disparate yet complimentary types of data that result from such high-throughput analyses. There are a growing number of public databases, as well as tools for visualization and analysis of networks. However, such databases and tools have yet to be widely utilized by bench scientists, as they generally lack features for effective analysis and integration of both public and private datasets and do not offer an intuitive interface for use by biological scientists with limited computational expertise. Results We describe BioNetwork Bench, an open source, user-friendly suite of database and software tools for constructing, querying, and analyzing gene and protein network models. BioNetwork Bench currently supports a broad class of gene and protein network models (eg, weighted and un-weighted, undirected graphs, multi-graphs). It enables biologists to analyze public as well as private gene expression, macromolecular interaction and annotation data; interactively query gene expression datasets; integrate data from multiple networks; query multiple networks for interactions of interest; store and selectively share the data as well as results of analyses. BioNetwork Bench is implemented as a plug-in for, and hence is fully interoperable with, Cytoscape, a popular open-source software suite for visualizing macromolecular interaction networks. Finally, we describe an application of BioNetwork Bench to the problem of assembly and iterative expansion of a gene network that controls the differentiation of retinal progenitor cells into rod photoreceptors. Conclusions BioNetwork Bench provides a suite of open source software for construction, querying, and selective sharing of gene and protein networks. Although initially aimed at a community of biologists interested in retinal development, the tool can be adapted easily to work with other biological systems simply by populating the associated database with the relevant datasets.","hji,kes",0,0,0,2,0,NA,no new data or value add? +PMC3788421,PS3-52: Heart Health in Your Pocket: Lessons Learned from the Development of a Smartphone App,"

Background/Aims

As part of the Million Hearts national initiative to prevent cardiovascular disease (CVD), there are increasing calls to leverage health information technology. The Marshfield Clinic developed a Heart Health Mobile application (app) that is designed to improve awareness of CVD risk and promote risk factor control among regional smartphone users. It deploys an engaging user interface that provides a brief CVD risk assessment that takes into account self-reported behavioral, familial, and biometric risk factors, including blood pressure and lipids. Users are then directed to nearby community pharmacies, clinics, and other locations where more advanced CVD risk factor screenings can be obtained. Along with social media connections and measurement prompts, basic education materials are provided on key CVD prevention topics such as hypertension, dyslipidemia, weight management, and tobacco cessation.

Methods

A multidisciplinary team of 24 members was created to develop the app over a 30-day timeframe. This team included a broad cross-section of clinical professionals from medicine, epidemiology, health IT, usability and graphic designers, business analytics, and marketing. An Agile programming method was used to promote adaptive planning and evolutionary development in self-organizing, cross-functional teams.

Results

The iOS app was successfully developed, tested, and launched within the 30-day timeframe. It was submitted competitively as part of the Million Hearts Risk Check Challenge, a CVD prevention app contest sponsored by Office of the National Coordinator for Health Information Technology. The final product is shown in detail at http://www.youtube.com/watch?v=qfESTQipjtw. The app was developed in six different languages, and epidemiologic data on downloads, unique users, geo-segmentation, risk factor profile, and customer loyalty, among other data points, are actively collected.

Conclusions

Health-related consumer smartphone apps can be developed rapidly and brought to scale as part of healthcare delivery systems business and clinical strategies. They provide users with important information, education, and directions on CVD prevention and have wide-ranging potential across numerous health conditions. From an HMORN perspective, such apps also provide real-time data collection methods that can be used to identify health trends at a lower cost (and comparable quality) relative to traditional population research methods.","hji,kes",0,0,0,2,0,NA,NA +PMC5847379,Best Paper Selection,"Arnold CW, Wallace WD, Chen S, Oh A, Abtin F, Genshaft S, Binder S, Aberle D, Enzmann D. RadPath: A web-based system for integrating and correlating radiology and pathology findings during cancer diagnosis. Acad Radiol 2016 Jan;23(1):90-100","hji,kes",0,0,0,2,0,NA,NA +PMC5888665,T253. THE CORRELATION ANALYSIS BETWEEN RENAMING SCHIZOPHRENIA AND VISITING FREQUENCY OF MENTAL HEALTH SERVICES BY BIG DATA ANALYSIS (INTERNET SEARCHES AND NEWSPAPER ARTICLES) IN SOUTH KOREA,"Abstract

Background

Korean Neuropsychiatric Association changed the Korean term for schizophrenia from split-mind disorder to attunement disorder in 2012, to dispel the stigma associated with name, and to promote early detection and treatment. Information on the internet affects the public awareness and attitude toward schizophrenia. The main purpose of this study was to investigate the correlation between renaming schizophrenia and the pattern of mental health services utilization by big data analysis of internet (newspaper articles and internet searches) in Korea.

Methods

From January 2016 to September 2017, newspaper articles on attunement disorder and split-mind disorder available on the internet were classified as related with negative images like crime and helpful or positive in dispelling the stigma. The relationship between the number of anti-stigma newspaper articles and newspaper articles of schizophrenia containing both positive and negative images was examined. In addition, using Naver, a major internet search engine in Korea, we investigated the total number of internet searches of both old and new name of schizophrenia by gender differences. Finally, the frequency of the visits of mental health services of patients with schizophrenia was measured using the Korean Healthcare Bigdata Hub (http://opendata.hira.or.kr/home.do#none) for 14 months and the correlation between the frequency of the visits and the above big data was examined. The data were analyzed using the SPSS/WIN 24.0. Pearson correlation coefficients were used to analyze correlations.

Results

The amounts of newspaper articles containing anti-stigma of schizophrenia were correlated with the amounts of newspaper articles containing negative images like crime of the new name (attunement disorder) of schizophrenia (r=0.528, p<0.01), which was greater than the amounts of newspaper articles containing the old name (split-mind disorder) of schizophrenia (r=0.300, p<0.01). We also found that a strong positive correlation between the number of articles about attunement disorder and search frequency about the term on the internet. In addition, the search frequency was more highly related to the number of articles containing negative images of the illness (e.g., related crimes, r = 0.910, p<0.01) than that of articles providing positive aspects of the illness (e.g., dispelling stigma, r = 0.423, p<0.01). There was no significant correlation between the number of schizophrenia-related newspaper articles in previous month and the visits of mental health services of patients with schizophrenia in next month. There were no gender differences in internet searches. The correlation between the internet search frequency for attunement disorder in the previous month and the visits of the mental health services of patients with schizophrenia (r = 0.185, p>0.05) in next month was larger than the correlation of split-mind disorder searches with mental health services utilization (r = 0.082, p>0.05).

Discussion

Attunement disorder rather than split-mind disorder was appeared more frequently in newspaper articles of the anti-stigma characteristics. Attunement disorder seems to be more useful for anti-stigma campaign. Renaming schizophrenia didnt seem to affect the visiting frequency of mental health services. There was statistical limitation which was originated from the lack of numbers of patients information. It was because Korean Bigdata Hub provided patients information just for 14 months as monthly data. Also, it should be considered that the time period, the kinds of mental disorders and the search engine we investigated were limited. Future research needs to overcome these limitations.","hji,kes",0,0,0,2,0,NA,NA +PMC6012140,"TBIO-17. IMPLEMENTATION OF METHYLATION PROFILING FOR CNS TUMOR DIAGNOSIS IN THE PRINCESS MÁXIMA CENTER FOR PEDIATRIC ONCOLOGY, THE NETHERLANDS","Abstract

BACKGROUND

Since the fall of 2016, in the Netherlands diagnosis and treatment of children with CNS tumors is increasingly concentrated in the Princess Mxima Center (PMC), i.e. the newly established, national center for pediatric oncology. Because of its potential as a support tool for CNS tumor diagnosis, Infinium MethylationEPIC BeadChip analysis was readily implemented and routinely performed on pediatric CNS tumor samples in the PMC. AIM: To identify the challenges and opportunities of methylation profiling as a support tool for the diagnosis of pediatric CNS tumors. PATIENTS AND

METHODS

We have now analyzed >150 pediatric (formalin-fixed/paraffin-embedded) CNS tumor samples using methylation profiling and matched the profiles with the Heidelberg database (https://www.molecularneuropathology.org/mnp).

RESULTS

In > 90% of analyzed cases, the suggested methylation class corresponded very well with the pathological (histological +/- molecular) diagnosis. In some other cases, the suggested methylation class was not very helpful because the reliability score of the test was (too) low, or because the suggested class not adequately reflected the actual presence of a tumor (e.g. reactive/inflammatory changes) or was not (yet) as precise as desired (e.g. diffuse glioma/glioblastoma, IDH-wildtype, midline type (but not H3-mutant)). However, in many other cases the tool was very helpful for reaching a more precise diagnosis (esp. so for medulloblastomas, ependymomas) or directed towards the right diagnosis.

CONCLUSIONS

Methylation profiling is a very powerful tool to confirm, fine-tune and/or direct pediatric CNS tumor diagnostics. However, it is crucial to interpret this new level of information in the clinical, radiological and pathological context.","hji,kes",0,0,0,2,0,NA,not about the resource +PMC6666521,PSIII-22 Performance of DNA 600 Duroc sired pigs when split sex fed with commercial diets with or without a blend of phytonutrients (Lean Fuel),"Abstract The objective was to evaluate the effect of a blend of phytonutrients (Lean Fuel, LF) on performance of pigs in late finishing in a commercial research barn. A total of 590 DNA 600 Duroc finishing pigs (BW=75.7 + 1.0 kg) were blocked by weight and sex and allocated across two dietary treatments with 6 replications per treatment and 21 to 26 pigs per pen. Dietary treatments were: barrow (B) diets with and without LF and gilt (G) diets with and without LF. Diets were formulated to split-sex requirements (CON) for each period and LF diets were control diets + 0.125% LF. The experiment was 44-d where d 0 was 98-d post-wean. All data were analyzed using the MIXED procedure of SAS as a randomized complete block design as a 2 x 2 (diet x sex) factorial arrangement. Pen served as the experimental unit. Overall (d 0-44), the B gained more weight (P = 0.0282) and consumed more feed (P > 0.10) in ADFI. There was no diet x sex interaction (P > 0.10) for ADG. There was a tendency for an interaction (P = 0.0545) for ADFI, where LF B consumed more feed compared to CON B and CON G consumed more feed compared to LF G. There was an interaction for G:F (P = 0.0028) where G on LF had higher G:F compared to G on CON whereas G:F for B was not different to B on LF. In conclusion, LF improved ADG and G:F, but did so differently for G and B. http://www.conferenceharvester.com/","hji,kes",0,0,0,2,0,NA,NA +PMC6666854,"PSIV-13 Basal endogenous loss, standardized total tract digestibility, and retention of Ca in sows change throughout gestation, but microbial phytase reduces basal endogenous loss of Ca by gestating sows","Abstract The objective was to test the hypothesis that standardized total tract digestibility (STTD) of Ca and Ca and P retention and the response to microbial phytase in diets fed to sows are constant throughout gestation. Thirty-six sows (parity = 3.3) were allotted to 4 diets on d 7 post-breeding. Two corn-based diets in which calcium carbonate was the sole source of Ca and 2 Ca-free diets were formulated without or with phytase (500 units per kg). Sows were housed individually in metabolism crates during early-gestation, mid-gestation, and late-gestation, and feces and urine were quantitatively collected. Data were analyzed by repeated measures using a model that included phytase, period of gestation, and the interaction between phytase and period as fixed effects, and block and replicate as random effects. Interactions between period and phytase were not observed. The basal endogenous loss (BEL) of Ca was greater (P < 0.05) in early-gestation than in mid- and late-gestation, but phytase reduced (P = 0.002) BEL of Ca and tended (P = 0.099) to increase apparent total tract digestibility (ATTD) of P in the Ca-free diet. Phytase did not affect ATTD of DM, STTD of Ca, ATTD of P, or Ca and P retention in sows fed the diet containing calcium carbonate (Table 1). The ATTD of DM was not affected by period, but Ca retention and ATTD of Ca and P were least (P < 0.05) in mid-gestation, followed by early- and late-gestation, and the STTD of Ca in mid-gestation was also reduced (P < 0.05) compared with early- or late-gestation. Phosphorus retention was greater (P < 0.05) in late-gestation than in the earlier periods. In conclusion, BEL of Ca, STTD of Ca, ATTD of P, and Ca and P retention in sows change throughout gestation regardless of use of phytase. http://www.conferenceharvester.com/","hji,kes",0,0,0,2,0,NA,NA +PMC7238667,Cover Image: Metronomic chemotherapy of cyclophosphamide plus methotrexate for advanced breast cancer: Real‐world data analyses and experience of one center,"The cover image is based on the Original Article Metronomic chemotherapy of cyclophosphamide plus methotrexate in advanced breast cancer: Real-world data and experience of one center (CHJC-D-19-00380) by Shusen Wang et al., https://doi.org/10.1002/cac2.12029.","hji,kes",0,0,0,2,0,NA,NA +PMC7776433,475. Describing the impact of the COVID-19 pandemic on HIV care in Latin America,"Abstract

Background

The effects of the COVID-19 pandemic on people living with HIV (PWH) are unknown. Beyond SARS-CoV-2 co-infection, the pandemic may have devastating consequences for HIV care delivery. Understanding these is crucial as reduced antiretroviral therapy (ART) availability alone could lead to =500,000 AIDS-related deaths in 20202021. With Latin America now a focal point in the pandemic, we sought to describe the impact of COVID-19 on HIV care at Latin American clinical sites.

Methods

Caribbean, Central and South America network for HIV epidemiology (CCASAnet) and additional Brazilian HIV care sites in Argentina, Brazil, Chile, Haiti, Honduras, Mexico, and Peru were included. An electronic survey of COVID-19 effects on HIV clinic operations was administered in Spanish or English via phone and email, April 28-June 2, 2020. We also compared national COVID-19 case, mortality, and policy data from public sources.

Results

Brazils and Mexicos epidemics appear most pronounced, with >10,000 confirmed COVID-19-related deaths (Figure 1); countries implemented social distancing policies at different times after initial cases, with Haiti earliest and Mexico latest (Figure 2). Nearly all 13 sites reported decreased hours and providers for HIV care. Twelve of 13 reported increased use of telehealth, suspension/postponements of routine HIV appointments, and/or suspension of HIV research. Eleven of 13 reported initiation of new COVID-19 research but suspension of community HIV testing, and nearly half provided additional ART supplies. Nearly 70% reported impacts on HIV viral load testing and nearly 40% reported personal protective equipment stock-outs (Table). All 13 sites experienced changes in resources/services in tandem with national policies; there was wide variation, however, in the number of economic and health supports implemented thus far (e.g., quarantines, tax deferrals, interest rate reductions, etc.), from 172 COVID-19-related policies in Brazil to only 30 in Mexico. Table Site Assessment of Impacts of the COVID-19 Pandemic on HIV services in Latin America at CCASAnet and Coorte Sites, N=13 Figure 1. Cumulative mortality due to COVID-19 in countries within which CCASAnet and Coorte sites are located Figure 1 footnote: Source for mortality counts: the WHO COVID-19 Dashboard, available at: https://covid19.who.int/ All data were up-to-date as of, and were accessed on, June 17th, 2020 Figure 2. Cumulative cases of COVID-19 in countries within which CCASAnet and Coorte sites are located and dates (relative to the day on which the first positive case of COVID-19 was detected) of general social distancing, public health emergency, or mass quarantine policy introduction (vertical dashed lines), 2020 Figure 2 footnote: Source for case counts: the WHO COVID-19 Dashboard, available at: https://covid19.who.int/ Source for health policy implementation: the United Nations Economic Council for Latin America & the Caribbean, available at: https://cepalstat-prod.cepal.org/forms/covid-countrysheet/index.html All data were up-to-date as of, and were accessed on, June 17th, 2020

Conclusion

The COVID-19 pandemic has already had a substantial effect on daily operations of HIV clinics in Latin America. The downstream effects of these impacts on HIV outcomes in Latin America will need to be further studied.

Disclosures

All Authors: No reported disclosures","hji,kes",0,0,0,2,0,NA,clinical +PMC7989604,"112 Outcomes and Clinical Characteristics of COVID-19 Disease in the Frail, Elderly Population of Tayside","Abstract

Introduction

With advancing age, frailty, multi-morbidity and need for care, elderly patients are some of the most vulnerable to Covid-19 disease. In NHS Tayside, a dedicated Covid-19 Medicine for the Elderly (MFE) Team was formed to care for patients identified as frail and likely to benefit from comprehensive geriatric assessment.

Methods

All Covid-19 patients meeting frailty criteria1, cared for by the Covid-19 MFE Team were identified. Data on outcomes and clinical characteristics for all (140) patients admitted during the first pandemic wave (MarchJuly 2020) was collected using electronic patient records and analysed.

Results

Patients were predominantly male (58.6%). Ages ranged from 6599years, with 43.6% aged =85years. 82.1% had one or more of cough, fever and anosmia on admission fitting Covid-19 case definition 2. Lymphopenia was present in 92.1%. Of note, 26.5% of patients had a normal or unchanged chest x-ray report, with only 10.2% showing bilateral peripheral infiltrates. 28-day mortality was 37.1% with Covid-19 Disease listed as primary cause of death in 90.4%.

Conclusion(s)

Entering further waves of infection, it is vital that we understand the clinical presentation and course of Covid-19 disease in elderly patients. Our data highlights that any Covid-19 symptom, even in isolation, should raise suspicion of disease. Chest x-rays should not be used alone as a diagnostic tool. The presence of lymphopenia should raise suspicion of Covid-19 infection. In developing an understanding of how elderly patients with Covid-19 present, we can ensure early identification and initiation of appropriate infection control measures.

References

1. Healthcare Improvement Scotland. Think Frailty. 2014. http://www.healthcareimprovementscotland.org/his/idoc.ashx?docid=8abd8530-48f3-4152-bbfb-d0918b870ec9&version=-1 2. Scottish Government. Update to Coronavirus Symptoms 2020. https://www.gov.scot/news/update-to-coronavirus-symptoms","hji,kes",0,0,0,2,0,NA,NA +PMC8054692,"A Spatial Web Application to Explore the Interactions between Human Mobility, Government Policies, and COVID-19 Cases","Reports of coronavirus disease 2019 (COVID-19) cases began in December 2019. Soon after, the virus had spread around the world and became a pandemic. Social restrictions, quarantines, and other governmental policies in response to the pandemic altered normal operations across the world. One area significantly affected is human mobility. Typical movement patterns have been hindered by the pandemic. But inversely, mobility patterns can influence patterns of the virus. With this in mind, we created an interactive web application to visualize in near-real time the relationship between the COVID-19 pandemic and human mobility, as well as the impact of governmental policies at different spatial scales. The web application allows users to select a country at the global scale or a state or county for the USA and then displays a corresponding plot that compares human mobility to COVID-19 cases across time for the location, as well as to policy data. The application is useful for quickly revealing insightful patterns. First, the initial impact of the COVID-19 pandemic was a rather sudden decrease in mobility. Second, a relationship exists between mobility and COVID-19 offset by a lag, but that lag is not consistent over space or time. Third, spatial autocorrelation of relationship is apparent, meaning locations near each other share similar patterns. Overall, the application is a useful data visualization tool that helps uncover patterns that might otherwise go unnoticed. The application is available at this link: https://chrischapin7.shinyapps.io/covid19_vs_humanmobility/","hji,kes",0,0,0,2,0,NA,NA +PMC8090191,Study of Patients’ Characteristics and Mutual Impact Between Covid-19 and Hyperglycemia at a Community Hospital in Central Brooklyn,"Abstract Background: Studies have shown that poorly-controlled hyperglycemia worsens the outcomes in patients with COVID-19 (C-19) and C-19 may damage pancreatic islets via ACE2 receptors causing acute hyperglycemia. The major population we serve at Kingsbrook Jewish Medical Center (KJMC) are underprivileged with many of them having multiple comorbidities. Methods: This is a retrospective study wherein patients, admitted from February 2020 to April 2020 with hyperglycemia, were selected and divided into 2 groups based on presence or absence of C-19. Data include demographics, comorbidities, blood glucose level, serum osmolality, serum bicarbonate, anion gap, acute kidney injury (AKI), serum creatinine, ICU admission, length of stay (LOS) and mortality. Data were analyzed using descriptive study and T-test. Results: 100 patients were included in the C-19 group (CG) and 88 patients were included in the Non C-19 group (NCG). Major comorbidities were similar in both groups including HTN, DM, CKD followed by ESRD. Mean age of patients (years) was 65.68 in CG and 61.17 in NCG. 61% were male in CG and 53.41% were male in NCG. 16% and 9% developed DKA and HHS in CG, and 13.64% and 6.82% developed DKA and HHS in NCG respectively. 15% in CG had combined DKA & HHS and 3.41% had same in NCG. Mean blood glucose level (mg/dl) was 541.6 in CG and 460.0 in NCG (p=0.03). Mean serum osmolality (mOsm/kg) was 335.7 (SD41.01) in CG and 317.1 (SD30.54) in NCG (p=0.01). Mean serum bicarbonate (mEq/L) was 17.73 (SD6.31) in CG and 21.46 (SD5.94) in NCG (p<0.0001). Mean anion gap was 17.93 (SD7.6) in CG and 13.10 (SD7.2) in NCG (p<0.0001). 56% in CG and 37% in NCG developed AKI respectively (p=0.01). Mean serum creatinine (mg/dl) was 4.22 in CG and 1.65 in NCG (p=0.004). 55% of CG were admitted to ICU and 34% of NCG were admitted to ICU (p=0.003). Median LOS (days) in discharged patients was 8 in CG and 5 in NCG (p=0.02). Mortality was 40% in CG and 3.41% in NCG (p<0.0001). 12 patients in CG and 2 patients in NCG developed new-onset diabetes. In the subset of DKA, interestingly, mean age (years) was 61.63 (SD17.73) in CG and 39.67 in NCG (SD13.39) (p=0.001). Conclusion: In our study, patients in the CG carry worse laboratory parameters, unfavorable clinical outcomes and strikingly higher mortality. We discovered increased incidence of new-onset diabetes and elderly DKA in CG. In an inner city population like ours, the burden of DM with significant social and health care disparities is quite severe. Diabetic patients with concurrent C-19 infection can have particularly negative outcomes and C-19 possibly damages the pancreatic islets resulting in acute hyperglycemic crisis. Further research on larger population is required. References: (1)https://dx.doi.org/10.1016%2Fj.diabres.2020.108142(2) https://doi.org/10.2337/dc20-0723(3)https://www.nejm.org/doi/full/10.1056/NEJMc2018688","hji,kes",0,0,0,2,0,NA,NA diff --git a/data/manual_ner_extraction.csv b/data/manual_ner_extraction.csv new file mode 100644 index 0000000..85e6c71 --- /dev/null +++ b/data/manual_ner_extraction.csv @@ -0,0 +1,554 @@ +id,title,abstract,full_name,common_name,url,short_description +28791657,MEGALEX: A megastudy of visual and auditory word recognition.,"Using the megastudy approach, we report a new database (MEGALEX) of visual and auditory lexical decision times and accuracy rates for tens of thousands of words. We collected visual lexical decision data for 28,466 French words and the same number of pseudowords, and auditory lexical decision data for 17,876 French words and the same number of pseudowords (synthesized tokens were used for the auditory modality). This constitutes the first large-scale database for auditory lexical decision, and the first database to enable a direct comparison of word recognition in different modalities. Different regression analyses were conducted to illustrate potential ways to exploit this megastudy database. First, we compared the proportions of variance accounted for by five word frequency measures. Second, we conducted item-level regression analyses to examine the relative importance of the lexical variables influencing performance in the different modalities (visual and auditory). Finally, we compared the similarities and differences between the two modalities. All data are freely available on our website ( https://sedufau.shinyapps.io/megalex/ ) and are searchable at www.lexique.org , inside the Open Lexique search engine.",MEGALEX,MEGALEX,https://sedufau.shinyapps.io/megalex/,"the first large-scale database for auditory lexical decision, and the first database to enable a direct comparison of word recognition in different modalities" +29718389,An update on PUG-REST: RESTful interface for programmatic access to PubChem.,"PubChem (https://pubchem.ncbi.nlm.nih.gov) is one of the largest open chemical information resources available. It currently receives millions of unique users per month on average, serving as a key resource for many research fields such as cheminformatics, chemical biology, medicinal chemistry, and drug discovery. PubChem provides multiple programmatic access routes to its data and services. One of them is PUG-REST, a Representational State Transfer (REST)-like web service interface to PubChem. On average, PUG-REST receives more than a million requests per day from tens of thousands of unique users. The present paper provides an update on PUG-REST since our previous paper published in 2015. This includes access to new kinds of data (e.g. concise bioactivity data, table of contents headings, etc.), full implementation of synchronous fast structure search, support for assay data retrieval using accession identifiers in response to the deprecation of NCBI's GI numbers, data exchange between PUG-REST and NCBI's E-Utilities through the List Gateway, implementation of dynamic traffic control through throttling, and enhanced usage policies. In addition, example Perl scripts are provided, which the user can easily modify, run, or translate into another scripting language.",PUG-REST,PUG-REST,https://pubchem.ncbi.nlm.nih.gov,RESTful interface for programmatic access to PubChem +30445657,"The European Bioinformatics Institute in 2018: tools, infrastructure and training.","The European Bioinformatics Institute (https://www.ebi.ac.uk/) archives, curates and analyses life sciences data produced by researchers throughout the world, and makes these data available for re-use globally (https://www.ebi.ac.uk/). Data volumes continue to grow exponentially: total raw storage capacity now exceeds 160 petabytes, and we manage these increasing data flows while maintaining the quality of our services. This year we have improved the efficiency of our computational infrastructure and doubled the bandwidth of our connection to the worldwide web. We report two new data resources, the Single Cell Expression Atlas (https://www.ebi.ac.uk/gxa/sc/), which is a component of the Expression Atlas; and the PDBe-Knowledgebase (https://www.ebi.ac.uk/pdbe/pdbe-kb), which collates functional annotations and predictions for structure data in the Protein Data Bank. Additionally, Europe PMC (http://europepmc.org/) has added preprint abstracts to its search results, supplementing results from peer-reviewed publications. EMBL-EBI maintains over 150 analytical bioinformatics tools that complement our data resources. We make these tools available for users through a web interface as well as programmatically using application programming interfaces, whilst ensuring the latest versions are available for our users. Our training team, with support from all of our staff, continued to provide on-site, off-site and web-based training opportunities for thousands of researchers worldwide this year.",Single Cell Expression Atlas,Single Cell Expression Atlas,https://www.ebi.ac.uk/gxa/sc/, +30445657,"The European Bioinformatics Institute in 2018: tools, infrastructure and training.","The European Bioinformatics Institute (https://www.ebi.ac.uk/) archives, curates and analyses life sciences data produced by researchers throughout the world, and makes these data available for re-use globally (https://www.ebi.ac.uk/). Data volumes continue to grow exponentially: total raw storage capacity now exceeds 160 petabytes, and we manage these increasing data flows while maintaining the quality of our services. This year we have improved the efficiency of our computational infrastructure and doubled the bandwidth of our connection to the worldwide web. We report two new data resources, the Single Cell Expression Atlas (https://www.ebi.ac.uk/gxa/sc/), which is a component of the Expression Atlas; and the PDBe-Knowledgebase (https://www.ebi.ac.uk/pdbe/pdbe-kb), which collates functional annotations and predictions for structure data in the Protein Data Bank. Additionally, Europe PMC (http://europepmc.org/) has added preprint abstracts to its search results, supplementing results from peer-reviewed publications. EMBL-EBI maintains over 150 analytical bioinformatics tools that complement our data resources. We make these tools available for users through a web interface as well as programmatically using application programming interfaces, whilst ensuring the latest versions are available for our users. Our training team, with support from all of our staff, continued to provide on-site, off-site and web-based training opportunities for thousands of researchers worldwide this year.",PDBe-Knowledgebase,PDBe-Knowledgebase,https://www.ebi.ac.uk/pdbe/pdbe-kb, +31501868,Diurnal.plant.tools: Comparative Transcriptomic and Co-expression Analyses of Diurnal Gene Expression of the Archaeplastida Kingdom.,"Almost all organisms coordinate some aspects of their biology through the diurnal cycle. Photosynthetic organisms, and plants especially, have established complex programs that coordinate physiological, metabolic and developmental processes with the changing light. The diurnal regulation of the underlying transcriptional processes is observed when groups of functionally related genes (gene modules) are expressed at a specific time of the day. However, studying the diurnal regulation of these gene modules in the plant kingdom was hampered by the large amount of data required for the analyses. To meet this need, we used gene expression data from 17 diurnal studies spanning the whole Archaeplastida kingdom (Plantae kingdom in the broad sense) to make an online diurnal database. We have equipped the database with tools that allow user-friendly cross-species comparisons of gene expression profiles, entire co-expression networks, co-expressed clusters (involved in specific biological processes), time-specific gene expression and others. We exemplify how these tools can be used by studying three important biological questions: (i) the evolution of cell division, (ii) the diurnal control of gene modules in algae and (iii) the conservation of diurnally controlled modules across species. The database is freely available at http://diurnal.plant.tools.",Diurnal.plant.tools,Diurnal.plant.tools,http://diurnal.plant.tools,Comparative Transcriptomic and Co-expression Analyses of Diurnal Gene Expression of the Archaeplastida Kingdom +32291734,"The Auditory English Lexicon Project: A multi-talker, multi-region psycholinguistic database of 10,170 spoken words and nonwords.","The Auditory English Lexicon Project (AELP) is a multi-talker, multi-region psycholinguistic database of 10,170 spoken words and 10,170 spoken nonwords. Six tokens of each stimulus were recorded as 44.1-kHz, 16-bit, mono WAV files by native speakers of American, British, and Singapore English, with one from each gender. Intelligibility norms, as determined by average identification scores and confidence ratings from between 15 and 20 responses per token, were obtained from 561 participants. Auditory lexical decision accuracies and latencies, with between 25 and 36 responses per token, were obtained from 438 participants. The database also includes a variety of lexico-semantic variables and structural indices for the words and nonwords, as well as participants' individual difference measures such as age, gender, language background, and proficiency. Taken together, there are a total of 122,040 sound files and over 4 million behavioral data points in the AELP. We describe some of the characteristics of this database. This resource is freely available from a website ( https://inetapps.nus.edu.sg/aelp/ ) hosted by the Department of Psychology at the National University of Singapore.", Auditory English Lexicon Project,AELP,https://inetapps.nus.edu.sg/aelp/,"a multi-talker, multi-region psycholinguistic database of 10,170 spoken words and 10,170 spoken nonwords" +32291734,"The Auditory English Lexicon Project: A multi-talker, multi-region psycholinguistic database of 10,170 spoken words and nonwords.","The Auditory English Lexicon Project (AELP) is a multi-talker, multi-region psycholinguistic database of 10,170 spoken words and 10,170 spoken nonwords. Six tokens of each stimulus were recorded as 44.1-kHz, 16-bit, mono WAV files by native speakers of American, British, and Singapore English, with one from each gender. Intelligibility norms, as determined by average identification scores and confidence ratings from between 15 and 20 responses per token, were obtained from 561 participants. Auditory lexical decision accuracies and latencies, with between 25 and 36 responses per token, were obtained from 438 participants. The database also includes a variety of lexico-semantic variables and structural indices for the words and nonwords, as well as participants' individual difference measures such as age, gender, language background, and proficiency. Taken together, there are a total of 122,040 sound files and over 4 million behavioral data points in the AELP. We describe some of the characteristics of this database. This resource is freely available from a website ( https://inetapps.nus.edu.sg/aelp/ ) hosted by the Department of Psychology at the National University of Singapore.", Auditory English Lexicon Project,AELP,https://inetapps.nus.edu.sg/aelp/,"A multi-talker, multi-region psycholinguistic database of 10,170 spoken words and nonwords" +32548865,The UK Veterinary Immunological Toolbox Website: promoting vaccine research by facilitating communication and removing reagent barriers.,"Using the best animal models to study immune responses against specific pathogens or vaccines can dramatically accelerate our understanding. Veterinary species are well studied, particularly livestock, to reduce their disease burden. They have also proven to be powerful models, especially for zoonotic pathogens and novel vaccination strategies. A prerequisite for any model selection is having the right quality and range of species-specific immunological reagents. To help promote the widest possible use of veterinary species, an open access website (https://www.immunologicaltoolbox.co.uk) has been created as a central community annotated hub for veterinary immunological reagents. The website is also the portal into services offered by the UK Immunological Toolbox project that includes antibody generation, sequencing and recombinant expression. The funding for this effort is linked into sustainable sources, but ultimate success relies on community engagement to continually increase the quality and quantity of information. It is hoped that as more users and reagent owners engage, it will become an essential resource for researchers, veterinarians and clinicians alike by removing barriers that prevent the use of the most informative animal models.",UK Immunological Toolbox,UK Immunological Toolbox,https://www.immunologicaltoolbox.co.uk,a central community annotated hub for veterinary immunological reagents +33455583,Establishment and application of information resource of mutant mice in RIKEN BioResource Research Center.,"Online databases are crucial infrastructures to facilitate the wide effective and efficient use of mouse mutant resources in life sciences. The number and types of mouse resources have been rapidly growing due to the development of genetic modification technology with associated information of genomic sequence and phenotypes. Therefore, data integration technologies to improve the findability, accessibility, interoperability, and reusability of mouse strain data becomes essential for mouse strain repositories. In 2020, the RIKEN BioResource Research Center released an integrated database of bioresources including, experimental mouse strains, Arabidopsis thaliana as a laboratory plant, cell lines, microorganisms, and genetic materials using Resource Description Framework-related technologies. The integrated database shows multiple advanced features for the dissemination of bioresource information. The current version of our online catalog of mouse strains which functions as a part of the integrated database of bioresources is available from search bars on the page of the Center ( https://brc.riken.jp ) and the Experimental Animal Division ( https://mus.brc.riken.jp/ ) websites. The BioResource Research Center also released a genomic variation database of mouse strains established in Japan and Western Europe, MoG+ ( https://molossinus.brc.riken.jp/mogplus/ ), and a database for phenotype-phenotype associations across the mouse phenome using data from the International Mouse Phenotyping Platform. In this review, we describe features of current version of databases related to mouse strain resources in RIKEN BioResource Research Center and discuss future views.",MoG+,MoG+,https://molossinus.brc.riken.jp/mogplus/ ,a genomic variation database of mouse strains established in Japan and Western Europe +34224351,Deep Learning for Ultrasound Image Formation: CUBDL Evaluation Framework & Open Datasets.,"Deep learning for ultrasound image formation is rapidly garnering research support and attention, quickly rising as the latest frontier in ultrasound image formation, with much promise to balance both image quality and display speed. Despite this promise, one challenge with identifying optimal solutions is the absence of unified evaluation methods and datasets that are not specific to a single research group. This paper introduces the largest known international database of ultrasound channel data and describes associated evaluation methods that were initially developed for the Challenge on Ultrasound Beamforming with Deep Learning (CUBDL), which was offered as a component of the 2020 IEEE International Ultrasonics Symposium. We summarize the challenge results and present qualitative and quantitative assessments using both the initially closed CUBDL evaluation test dataset (which was crowd-sourced from multiple groups around the world) and additional in vivo breast ultrasound data contributed after the challenge was completed. As an example quantitative assessment, single plane wave images from the CUBDL Task 1 dataset produced a mean generalized contrast-to-noise ratio (gCNR) of 0.67 and a mean lateral resolution of 0.42 mm when formed with delay-and-sum beamforming, compared to a mean gCNR as high as 0.81 and a mean lateral resolution as low as 0.32 mm when formed with networks submitted by the challenge winners. We also describe contributed CUBDL data that may be used for training of future networks. The compiled database includes a total of 576 image acquisition sequences. We additionally introduce a neural network-based global sound speed estimator implementation that was necessary to fairly evaluate results obtained with this international database. The integration of CUBDL evaluation methods, evaluation code, network weights from the challenge winners, and all datasets described herein are publicly available (visit https://cubdl.jhu.edu for details).",,,https://cubdl.jhu.edu,a total of 576 image acquisition sequences +29899596,Cyanobacterial diversity held in microbial biological resource centers as a biotechnological asset: the case study of the newly established LEGE culture collection.,"Cyanobacteria are a well-known source of bioproducts which renders culturable strains a valuable resource for biotechnology purposes. We describe here the establishment of a cyanobacterial culture collection (CC) and present the first version of the strain catalog and its online database (http://lege.ciimar.up.pt/). The LEGE CC holds 386 strains, mainly collected in coastal (48%), estuarine (11%), and fresh (34%) water bodies, for the most part from Portugal (84%). By following the most recent taxonomic classification, LEGE CC strains were classified into at least 46 genera from six orders (41% belong to the Synechococcales), several of them are unique among the phylogenetic diversity of the cyanobacteria. For all strains, primary data were obtained and secondary data were surveyed and reviewed, which can be reached through the strain sheets either in the catalog or in the online database. An overview on the notable biodiversity of LEGE CC strains is showcased, including a searchable phylogenetic tree and images for all strains. With this work, 80% of the LEGE CC strains have now their 16S rRNA gene sequences deposited in GenBank. Also, based in primary data, it is demonstrated that several LEGE CC strains are a promising source of extracellular polymeric substances (EPS). Through a review of previously published data, it is exposed that LEGE CC strains have the potential or actual capacity to produce a variety of biotechnologically interesting compounds, including common cyanotoxins or unprecedented bioactive molecules. Phylogenetic diversity of LEGE CC strains does not entirely reflect chemodiversity. Further bioprospecting should, therefore, account for strain specificity of the valuable cyanobacterial holdings of LEGE CC.",LEGE CC,LEGE CC,http://lege.ciimar.up.pt/, +29997612,CDG: An Online Server for Detecting Biologically Closest Disease-Causing Genes and its Application to Primary Immunodeficiency.,"High-throughput genomic technologies yield about 20,000 variants in the protein-coding exome of each individual. A commonly used approach to select candidate disease-causing variants is to test whether the associated gene has been previously reported to be disease-causing. In the absence of known disease-causing genes, it can be challenging to associate candidate genes with specific genetic diseases. To facilitate the discovery of novel gene-disease associations, we determined the putative biologically closest known genes and their associated diseases for 13,005 human genes not currently reported to be disease-associated. We used these data to construct the closest disease-causing genes (CDG) server, which can be used to infer the closest genes with an associated disease for a user-defined list of genes or diseases. We demonstrate the utility of the CDG server in five immunodeficiency patient exomes across different diseases and modes of inheritance, where CDG dramatically reduced the number of candidate genes to be evaluated. This resource will be a considerable asset for ascertaining the potential relevance of genetic variants found in patient exomes to specific diseases of interest. The CDG database and online server are freely available to non-commercial users at: http://lab.rockefeller.edu/casanova/CDG.",closest disease-causing genes server,CDG,http://lab.rockefeller.edu/casanova/CDG,An Online Server for Detecting Biologically Closest Disease-Causing Genes and its Application to Primary Immunodeficiency +30967549,iFISH is a publically available resource enabling versatile DNA FISH to study genome architecture.,"DNA fluorescence in situ hybridization (DNA FISH) is a powerful method to study chromosomal organization in single cells. At present, there is a lack of free resources of DNA FISH probes and probe design tools which can be readily applied. Here, we describe iFISH, an open-source repository currently comprising 380 DNA FISH probes targeting multiple loci on the human autosomes and chromosome X, as well as a genome-wide database of optimally designed oligonucleotides and a freely accessible web interface ( http://ifish4u.org ) that can be used to design DNA FISH probes. We individually validate 153 probes and take advantage of our probe repository to quantify the extent of intermingling between multiple heterologous chromosome pairs, showing a much higher extent of intermingling in human embryonic stem cells compared to fibroblasts. In conclusion, iFISH is a versatile and expandable resource, which can greatly facilitate the use of DNA FISH in research and diagnostics.",iFISH,iFISH,http://ifish4u.org,a publically available resource enabling versatile DNA FISH to study genome architecture +32382747,RSVdb: a comprehensive database of transcriptome RNA structure.,"RNA fulfills a crucial regulatory role in cells by folding into a complex RNA structure. To date, a chemical compound, dimethyl sulfate (DMS), has been developed to probe the RNA structure at the transcriptome level effectively. We proposed a database, RSVdb (https://taolab.nwafu.edu.cn/rsvdb/), for the browsing and visualization of transcriptome RNA structures. RSVdb, including 626 225 RNAs with validated DMS reactivity from 178 samples in eight species, supports four main functions: information retrieval, research overview, structure prediction and resource download. Users can search for species, studies, transcripts and genes of interest; browse the quality control of sequencing data and statistical charts of RNA structure information; preview and perform online prediction of RNA structures in silico and under DMS restraint of different experimental treatments and download RNA structure data for species and studies. Together, RSVdb provides a reference for RNA structure and will support future research on the function of RNA structure at the transcriptome level.",RSVdb,RSVdb,https://taolab.nwafu.edu.cn/rsvdb/,a comprehensive database of transcriptome RNA structure +33021634,The Nucleome Data Bank: web-based resources to simulate and analyze the three-dimensional genome.,"We introduce the Nucleome Data Bank (NDB), a web-based platform to simulate and analyze the three-dimensional (3D) organization of genomes. The NDB enables physics-based simulation of chromosomal structural dynamics through the MEGABASE + MiChroM computational pipeline. The input of the pipeline consists of epigenetic information sourced from the Encode database; the output consists of the trajectories of chromosomal motions that accurately predict Hi-C and fluorescence insitu hybridization data, as well as multiple observations of chromosomal dynamics in vivo. As an intermediate step, users can also generate chromosomal sub-compartment annotations directly from the same epigenetic input, without the use of any DNA-DNA proximity ligation data. Additionally, the NDB freely hosts both experimental and computational structural genomics data. Besides being able to perform their own genome simulations and download the hosted data, users can also analyze and visualize the same data through custom-designed web-based tools. In particular, the one-dimensional genetic and epigenetic data can be overlaid onto accurate 3D structures of chromosomes, to study the spatial distribution of genetic and epigenetic features. The NDB aims to be a shared resource to biologists, biophysicists and all genome scientists. The NDB is available at https://ndb.rice.edu.",Nucleome Data Bank,NDB,https://ndb.rice.edu,web-based resources to simulate and analyze the three-dimensional genome +33021634,The Nucleome Data Bank: web-based resources to simulate and analyze the three-dimensional genome.,"We introduce the Nucleome Data Bank (NDB), a web-based platform to simulate and analyze the three-dimensional (3D) organization of genomes. The NDB enables physics-based simulation of chromosomal structural dynamics through the MEGABASE + MiChroM computational pipeline. The input of the pipeline consists of epigenetic information sourced from the Encode database; the output consists of the trajectories of chromosomal motions that accurately predict Hi-C and fluorescence insitu hybridization data, as well as multiple observations of chromosomal dynamics in vivo. As an intermediate step, users can also generate chromosomal sub-compartment annotations directly from the same epigenetic input, without the use of any DNA-DNA proximity ligation data. Additionally, the NDB freely hosts both experimental and computational structural genomics data. Besides being able to perform their own genome simulations and download the hosted data, users can also analyze and visualize the same data through custom-designed web-based tools. In particular, the one-dimensional genetic and epigenetic data can be overlaid onto accurate 3D structures of chromosomes, to study the spatial distribution of genetic and epigenetic features. The NDB aims to be a shared resource to biologists, biophysicists and all genome scientists. The NDB is available at https://ndb.rice.edu.",Nucleome Data Bank,NDB,https://ndb.rice.edu,a web-based platform to simulate and analyze the three-dimensional (3D) organization of genomes +33068420,Chewie Nomenclature Server (chewie-NS): a deployable nomenclature server for easy sharing of core and whole genome MLST schemas.,"Chewie Nomenclature Server (chewie-NS, https://chewbbaca.online/) allows users to share genome-based gene-by-gene typing schemas and to maintain a common nomenclature, simplifying the comparison of results. The combination between local analyses and a public repository of allelic data strikes a balance between potential confidentiality issues and the need to compare results. The possibility of deploying private instances of chewie-NS facilitates the creation of nomenclature servers with a restricted user base to allow compliance with the strictest data policies. Chewie-NS allows users to easily share their own schemas and to explore publicly available schemas, including informative statistics on schemas and loci presented in interactive charts and tables. Users can retrieve all the information necessary to run a schema locally or all the alleles identified at a particular locus. The integration with the chewBBACA suite enables users to directly upload new schemas to chewie-NS, download existing schemas and synchronize local and remote schemas from chewBBACA command line version, allowing an easier integration into high-throughput analysis pipelines. The same REST API linking chewie-NS and the chewBBACA suite supports the interaction of other interfaces or pipelines with the databases available at chewie-NS, facilitating the reusability of the stored data.",Chewie Nomenclature Server,chewie-NS,https://chewbbaca.online/, a deployable nomenclature server for easy sharing of core and whole genome MLST schemas +33279968,"Gene Circuit Explorer (GeneEx): an interactive web-app for visualizing, simulating and analyzing gene regulatory circuits.","

Summary

GeneEx is an interactive web-app that uses an ODE-based mathematical modeling approach to simulate, visualize and analyze gene regulatory circuits (GRCs) for an explicit kinetic parameter set or for a large ensemble of random parameter sets. GeneEx offers users the freedom to modify many aspects of the simulation such as the parameter ranges, the levels of gene expression noise and the GRC network topology itself. This degree of flexibility allows users to explore a variety of hypotheses by providing insight into the number and stability of attractors for a given GRC. Moreover, users have the option to upload, and subsequently compare, experimental gene expression data to simulated data generated from the analysis of a built or uploaded custom circuit. Finally, GeneEx offers a curated database that contains circuit motifs and known biological GRCs to facilitate further inquiry into these. Overall, GeneEx enables users to investigate the effects of parameter variation, stochasticity and/or topological changes on gene expression for GRCs using a systems-biology approach.

Availability and implementation

GeneEx is available at https://geneex.jax.org. This web-app is released under the MIT license and is free and open to all users and there is no mandatory login requirement.

Supplementary information

Supplementary data are available at Bioinformatics online.",Gene Circuit Explorer,GeneEx,https://geneex.jax.org,"an interactive web-app for visualizing, simulating and analyzing gene regulatory circuits" +33655207,TMSNP: a web server to predict pathogenesis of missense mutations in the transmembrane region of membrane proteins.,"The massive amount of data generated from genome sequencing brings tons of newly identified mutations, whose pathogenic/non-pathogenic effects need to be evaluated. This has given rise to several mutation predictor tools that, in general, do not consider the specificities of the various protein groups. We aimed to develop a predictor tool dedicated to membrane proteins, under the premise that their specific structural features and environment would give different responses to mutations compared to globular proteins. For this purpose, we created TMSNP, a database that currently contains information from 2624 pathogenic and 196 705 non-pathogenic reported mutations located in the transmembrane region of membrane proteins. By computing various conservation parameters on these mutations in combination with annotations, we trained a machine-learning model able to classify mutations as pathogenic or not. TMSNP (freely available at http://lmc.uab.es/tmsnp/) improves considerably the prediction power of commonly used mutation predictors trained with globular proteins.",,TMSNP,http://lmc.uab.es/tmsnp/,a web server to predict pathogenesis of missense mutations in the transmembrane region of membrane proteins +33655207,TMSNP: a web server to predict pathogenesis of missense mutations in the transmembrane region of membrane proteins.,"The massive amount of data generated from genome sequencing brings tons of newly identified mutations, whose pathogenic/non-pathogenic effects need to be evaluated. This has given rise to several mutation predictor tools that, in general, do not consider the specificities of the various protein groups. We aimed to develop a predictor tool dedicated to membrane proteins, under the premise that their specific structural features and environment would give different responses to mutations compared to globular proteins. For this purpose, we created TMSNP, a database that currently contains information from 2624 pathogenic and 196 705 non-pathogenic reported mutations located in the transmembrane region of membrane proteins. By computing various conservation parameters on these mutations in combination with annotations, we trained a machine-learning model able to classify mutations as pathogenic or not. TMSNP (freely available at http://lmc.uab.es/tmsnp/) improves considerably the prediction power of commonly used mutation predictors trained with globular proteins.",,TMSNP,http://lmc.uab.es/tmsnp/,a database that currently contains information from 2624 pathogenic and 196 705 non-pathogenic reported mutations located in the transmembrane region of membrane proteins +21249531,The DIADEM data sets: representative light microscopy images of neuronal morphology to advance automation of digital reconstructions.,"The comprehensive characterization of neuronal morphology requires tracing extensive axonal and dendritic arbors imaged with light microscopy into digital reconstructions. Considerable effort is ongoing to automate this greatly labor-intensive and currently rate-determining process. Experimental data in the form of manually traced digital reconstructions and corresponding image stacks play a vital role in developing increasingly more powerful reconstruction algorithms. The DIADEM challenge (short for DIgital reconstruction of Axonal and DEndritic Morphology) successfully stimulated progress in this area by utilizing six data set collections from different animal species, brain regions, neuron types, and visualization methods. The original research projects that provided these data are representative of the diverse scientific questions addressed in this field. At the same time, these data provide a benchmark for the types of demands automated software must meet to achieve the quality of manual reconstructions while minimizing human involvement. The DIADEM data underwent extensive curation, including quality control, metadata annotation, and format standardization, to focus the challenge on the most substantial technical obstacles. This data set package is now freely released ( http://diademchallenge.org ) to train, test, and aid development of automated reconstruction algorithms.",DIgital reconstruction of Axonal and DEndritic Morphology,DIADEM,http://diademchallenge.org,representative light microscopy images of neuronal morphology to advance automation of digital reconstructions +21249531,The DIADEM data sets: representative light microscopy images of neuronal morphology to advance automation of digital reconstructions.,"The comprehensive characterization of neuronal morphology requires tracing extensive axonal and dendritic arbors imaged with light microscopy into digital reconstructions. Considerable effort is ongoing to automate this greatly labor-intensive and currently rate-determining process. Experimental data in the form of manually traced digital reconstructions and corresponding image stacks play a vital role in developing increasingly more powerful reconstruction algorithms. The DIADEM challenge (short for DIgital reconstruction of Axonal and DEndritic Morphology) successfully stimulated progress in this area by utilizing six data set collections from different animal species, brain regions, neuron types, and visualization methods. The original research projects that provided these data are representative of the diverse scientific questions addressed in this field. At the same time, these data provide a benchmark for the types of demands automated software must meet to achieve the quality of manual reconstructions while minimizing human involvement. The DIADEM data underwent extensive curation, including quality control, metadata annotation, and format standardization, to focus the challenge on the most substantial technical obstacles. This data set package is now freely released ( http://diademchallenge.org ) to train, test, and aid development of automated reconstruction algorithms.",DIADEM data sets,DIADEM,http://diademchallenge.org,representative light microscopy images of neuronal morphology to advance automation of digital reconstructions +21643562,Toxicity tests aiming to protect Brazilian aquatic systems: current status and implications for management.,"The current status of toxicological tests performed with Brazilian native species was evaluated through a survey of the scientific data available in the literature. The information gathered was processed and an electronic toxicology database (http://www.inct-ta.furg.br/bd_toxicologico.php) was generated. This database provides valuable information for researchers to select sensitive and tolerant aquatic species to a large variety of aquatic pollutants. Furthermore, the toxicology database allows researchers to select species representative of an ecosystem of interest. Analysis of the toxicology database showed that ecotoxicological assays have significantly improved in Brazil over the last decade, in spite of the still relatively low number of tests performed and the restricted number of native species tested. This is because most of the research is developed in a few laboratories concentrated in certain regions of Brazil, especially in Southern and Southeast regions. Considering the extremely rich biodiversity and the large variety of aquatic ecosystems in Brazil, this finding points to the urgent need for the development of ecotoxicological studies with other groups of aquatic animals, such as insects, foraminifera, cnidarians, worms, amphibians, among others. This would help to derive more realistic water quality criteria (WQC) values, which would better protect the different aquatic ecosystems in Brazil. Finally, the toxicology database generated presents solid and science based information, which can encourage and drive the Environmental Regulatory Agencies in Brazil to derive WQC based on native species. In this context, the present paper discusses the historical evolution of ecotoxicological studies in Brazil, and how they have contributed to the improvement of the Brazilian Federal and Regional regulations for environment.",,,http://www.inct-ta.furg.br/bd_toxicologico.php,an electronic toxicology database +21786137,Prediction of protein-protein interactions between Ralstonia solanacearum and Arabidopsis thaliana.,"Ralstonia solanacearum is a devastating bacterial pathogen that has an unusually wide host range. R. solanacearum, together with Arabidopsis thaliana, has become a model system for studying the molecular basis of plant-pathogen interactions. Protein-protein interactions (PPIs) play a critical role in the infection process, and some PPIs can initiate a plant defense response. However, experimental investigations have rarely addressed such PPIs. Using two computational methods, the interolog and the domain-based methods, we predicted 3,074 potential PPIs between 119 R. solanacearum and 1,442 A. thaliana proteins. Interestingly, we found that the potential pathogen-targeted proteins are more important in the A. thaliana PPI network. To facilitate further studies, all predicted PPI data were compiled into a database server called PPIRA (http://protein.cau.edu.cn/ppira/). We hope that our work will provide new insights for future research addressing the pathogenesis of R. solanacearum.",PPIRA,PPIRA,http://protein.cau.edu.cn/ppira/,Prediction of protein-protein interactions between Ralstonia solanacearum and Arabidopsis thaliana +21929785,The representation of protein complexes in the Protein Ontology (PRO).,"

Background

Representing species-specific proteins and protein complexes in ontologies that are both human- and machine-readable facilitates the retrieval, analysis, and interpretation of genome-scale data sets. Although existing protin-centric informatics resources provide the biomedical research community with well-curated compendia of protein sequence and structure, these resources lack formal ontological representations of the relationships among the proteins themselves. The Protein Ontology (PRO) Consortium is filling this informatics resource gap by developing ontological representations and relationships among proteins and their variants and modified forms. Because proteins are often functional only as members of stable protein complexes, the PRO Consortium, in collaboration with existing protein and pathway databases, has launched a new initiative to implement logical and consistent representation of protein complexes.

Description

We describe here how the PRO Consortium is meeting the challenge of representing species-specific protein complexes, how protein complex representation in PRO supports annotation of protein complexes and comparative biology, and how PRO is being integrated into existing community bioinformatics resources. The PRO resource is accessible at http://pir.georgetown.edu/pro/.

Conclusion

PRO is a unique database resource for species-specific protein complexes. PRO facilitates robust annotation of variations in composition and function contexts for protein complexes within and between species.",Protein Ontology Consortium,PRO,http://pir.georgetown.edu/pro/,a unique database resource for species-specific protein complexes +22440904,Quantitative proteomics identifies vasopressin-responsive nuclear proteins in collecting duct cells.,"Vasopressin controls transport in the renal collecting duct, in part, by regulating transcription. This complex process, which can involve translocation and/or modification of transcriptional regulators, is not completely understood. Here, we applied a method for large-scale profiling of nuclear proteins to quantify vasopressin-induced changes in the nuclear proteome of cortical collecting duct (mpkCCD) cells. Using stable isotope labeling and tandem mass spectrometry, we quantified 3987 nuclear proteins and identified significant changes in the abundance of 65, including previously established targets of vasopressin signaling in the collecting duct. Vasopressin-induced changes in the abundance of the transcription factors JunB, Elf3, Gatad2b, and Hmbox1; transcriptional co-regulators Ctnnb1 (-catenin) and Crebbp; subunits of the Mediator complex; E3 ubiquitin ligase Nedd4; nuclear transport regulator RanGap1; and several proteins associated with tight junctions and adherens junctions. Bioinformatic analysis showed that many of the quantified transcription factors have putative binding sites in the 5'-flanking regions of genes coding for the channel proteins Aqp2, Aqp3, Scnn1b (ENaC), and Scnn1g (ENaC), which are known targets of vasopressin. Immunoblotting demonstrated that the increase in -catenin in nuclear fractions was accompanied by an even larger increase in its phosphorylated form (pSer552). The findings provide a new online database resource for nuclear proteomics (http://helixweb.nih.gov/ESBL/Database/mNPD/) and generate new hypotheses regarding vasopressin-mediated transcriptional regulation in the collecting duct.",,,http://helixweb.nih.gov/ESBL/Database/mNPD/,a new online database resource for nuclear proteomics +22759420,Comparative evaluation of set-level techniques in predictive classification of gene expression samples.,"

Background

Analysis of gene expression data in terms of a priori-defined gene sets has recently received significant attention as this approach typically yields more compact and interpretable results than those produced by traditional methods that rely on individual genes. The set-level strategy can also be adopted with similar benefits in predictive classification tasks accomplished with machine learning algorithms. Initial studies into the predictive performance of set-level classifiers have yielded rather controversial results. The goal of this study is to provide a more conclusive evaluation by testing various components of the set-level framework within a large collection of machine learning experiments.

Results

Genuine curated gene sets constitute better features for classification than sets assembled without biological relevance. For identifying the best gene sets for classification, the Global test outperforms the gene-set methods GSEA and SAM-GS as well as two generic feature selection methods. To aggregate expressions of genes into a feature value, the singular value decomposition (SVD) method as well as the SetSig technique improve on simple arithmetic averaging. Set-level classifiers learned with 10 features constituted by the Global test slightly outperform baseline gene-level classifiers learned with all original data features although they are slightly less accurate than gene-level classifiers learned with a prior feature-selection step.

Conclusion

Set-level classifiers do not boost predictive accuracy, however, they do achieve competitive accuracy if learned with the right combination of ingredients.

Availability

Open-source, publicly available software was used for classifier learning and testing. The gene expression datasets and the gene set database used are also publicly available. The full tabulation of experimental results is available at http://ida.felk.cvut.cz/CESLT.",,,http://ida.felk.cvut.cz/CESLT, +22800569,Modeling of folds and folding pathways for some protein families of (α + β)- and (α/β)-classes.,"In this paper, updated structural trees for a/-proteins containing five- and seven-segment (a/)-motifs are represented. Novel structural motifs occurring in some families of (a + )- and (a/)-proteins are also characterized. Databases of these proteins have been compiled from the Protein Data Bank (PDB) and Structural Classification of Proteins (SCOP) and the corresponding structural trees have been constructed. The classification of these proteins has been developed and organized as an extension of the PCBOST database, which is available at http://strees.protres.ru . In total, the updated Protein Classification Based on Structural Trees database contains 11 structural trees, 106 levels, 635 folds, 4911 proteins and domains, and 14,202 PDB entries.",PCBOST database,PCBOST,http://strees.protres.ru, +22804825,HuPho: the human phosphatase portal.,"Phosphatases and kinases contribute to the regulation of protein phosphorylation homeostasis in the cell. Phosphorylation is a key post-translational modification underlying the regulation of many cellular processes. Thus, a comprehensive picture of phosphatase function and the identification of their target substrates would aid a systematic approach to a mechanistic description of cell signalling. Here we present a website designed to facilitate the retrieval of information about human protein phosphatases. To this end we developed a search engine to recover and integrate information annotated in several publicly available web resources. In addition we present a text-mining-assisted annotation effort aimed at extracting phosphatase related data reported in the scientific literature. The HuPho (human phosphatases) website can be accessed at http://hupho.uniroma2.it.",human phosphatase portal,HuPho,http://hupho.uniroma2.it,a text-mining-assisted annotation effort aimed at extracting phosphatase related data reported in the scientific literature +22804825,HuPho: the human phosphatase portal.,"Phosphatases and kinases contribute to the regulation of protein phosphorylation homeostasis in the cell. Phosphorylation is a key post-translational modification underlying the regulation of many cellular processes. Thus, a comprehensive picture of phosphatase function and the identification of their target substrates would aid a systematic approach to a mechanistic description of cell signalling. Here we present a website designed to facilitate the retrieval of information about human protein phosphatases. To this end we developed a search engine to recover and integrate information annotated in several publicly available web resources. In addition we present a text-mining-assisted annotation effort aimed at extracting phosphatase related data reported in the scientific literature. The HuPho (human phosphatases) website can be accessed at http://hupho.uniroma2.it.",human phosphatases,HuPho,http://hupho.uniroma2.it,a text-mining-assisted annotation effort aimed at extracting phosphatase related data reported in the scientific literature +22961451,Identifying cancer highly-expressed membrane receptors for targeted drug delivery.,"Currently, the accompanying side effects of anti-cancer drugs owing to incorrect delivery to normal tissues should be reduced. We present a database (MRTDD) with identified cancer highly-expressed membrane receptors (CHMRs) which can be used in targeted drug delivery. To evaluate the probability of occurrence of incorrect delivery, we calculate tissue index for each CHMR and expect to identify good candidates. The information provided includes: (1) genomic annotations; (2) gene expression profiles of membrane receptors in cancer tissue vs. corresponding normal tissue, normal tissues of body and cancer cell-lines; (3) available antibody services of manufacturers. MRTDD is available at http://mrtdd.mbc.nctu.edu.tw/.",MRTDD,MRTDD,http://mrtdd.mbc.nctu.edu.tw/,identified cancer highly-expressed membrane receptors (CHMRs) which can be used in targeted drug delivery +23095498,"The human """"magnesome"""": detecting magnesium binding sites on human proteins.","

Background

Magnesium research is increasing in molecular medicine due to the relevance of this ion in several important biological processes and associated molecular pathogeneses. It is still difficult to predict from the protein covalent structure whether a human chain is or not involved in magnesium binding. This is mainly due to little information on the structural characteristics of magnesium binding sites in proteins and protein complexes. Magnesium binding features, differently from those of other divalent cations such as calcium and zinc, are elusive. Here we address a question that is relevant in protein annotation: how many human proteins can bind Mg2+? Our analysis is performed taking advantage of the recently implemented Bologna Annotation Resource (BAR-PLUS), a non hierarchical clustering method that relies on the pair wise sequence comparison of about 14 millions proteins from over 300.000 species and their grouping into clusters where annotation can safely be inherited after statistical validation.

Results

After cluster assignment of the latest version of the human proteome, the total number of human proteins for which we can assign putative Mg binding sites is 3,751. Among these proteins, 2,688 inherit annotation directly from human templates and 1,063 inherit annotation from templates of other organisms. Protein structures are highly conserved inside a given cluster. Transfer of structural properties is possible after alignment of a given sequence with the protein structures that characterise a given cluster as obtained with a Hidden Markov Model (HMM) based procedure. Interestingly a set of 370 human sequences inherit Mg2+ binding sites from templates sharing less than 30% sequence identity with the template.

Conclusion

We describe and deliver the """"human magnesome"""", a set of proteins of the human proteome that inherit putative binding of magnesium ions. With our BAR-hMG, 251 clusters including 1,341 magnesium binding protein structures corresponding to 387 sequences are sufficient to annotate some 13,689 residues in 3,751 human sequences as """"magnesium binding"""". Protein structures act therefore as three dimensional seeds for structural and functional annotation of human sequences. The data base collects specifically all the human proteins that can be annotated according to our procedure as """"magnesium binding"""", the corresponding structures and BAR+ clusters from where they derive the annotation (http://bar.biocomp.unibo.it/mg).",human magnesome,,http://bar.biocomp.unibo.it/mg,a set of proteins of the human proteome that inherit putative binding of magnesium ions +23433959,Identification of candidate transcription factor binding sites in the cattle genome.,"A resource that provides candidate transcription factor binding sites (TFBSs) does not currently exist for cattle. Such data is necessary, as predicted sites may serve as excellent starting locations for future omics studies to develop transcriptional regulation hypotheses. In order to generate this resource, we employed a phylogenetic footprinting approach-using sequence conservation across cattle, human and dog-and position-specific scoring matrices to identify 379,333 putative TFBSs upstream of nearly 8000 Mammalian Gene Collection (MGC) annotated genes within the cattle genome. Comparisons of our predictions to known binding site loci within the PCK1, ACTA1 and G6PC promoter regions revealed 75% sensitivity for our method of discovery. Additionally, we intersected our predictions with known cattle SNP variants in dbSNP and on the Illumina BovineHD 770k and Bos 1 SNP chips, finding 7534, 444 and 346 overlaps, respectively. Due to our stringent filtering criteria, these results represent high quality predictions of putative TFBSs within the cattle genome. All binding site predictions are freely available at http://bfgl.anri.barc.usda.gov/BovineTFBS/ or http://199.133.54.77/BovineTFBS.",,,http://bfgl.anri.barc.usda.gov/BovineTFBS/,high quality predictions of putative TFBSs within the cattle genome +23433959,Identification of candidate transcription factor binding sites in the cattle genome.,"A resource that provides candidate transcription factor binding sites (TFBSs) does not currently exist for cattle. Such data is necessary, as predicted sites may serve as excellent starting locations for future omics studies to develop transcriptional regulation hypotheses. In order to generate this resource, we employed a phylogenetic footprinting approach-using sequence conservation across cattle, human and dog-and position-specific scoring matrices to identify 379,333 putative TFBSs upstream of nearly 8000 Mammalian Gene Collection (MGC) annotated genes within the cattle genome. Comparisons of our predictions to known binding site loci within the PCK1, ACTA1 and G6PC promoter regions revealed 75% sensitivity for our method of discovery. Additionally, we intersected our predictions with known cattle SNP variants in dbSNP and on the Illumina BovineHD 770k and Bos 1 SNP chips, finding 7534, 444 and 346 overlaps, respectively. Due to our stringent filtering criteria, these results represent high quality predictions of putative TFBSs within the cattle genome. All binding site predictions are freely available at http://bfgl.anri.barc.usda.gov/BovineTFBS/ or http://199.133.54.77/BovineTFBS.",,,http://199.133.54.77/BovineTFBS,high quality predictions of putative TFBSs within the cattle genome +23704925,Comprehensive genomic characterization of cutaneous malignant melanoma cell lines derived from metastatic lesions by whole-exome sequencing and SNP array profiling.,"Cutaneous malignant melanoma is the most fatal skin cancer and although improved comprehension of its pathogenic pathways allowed to realize some effective molecular targeted therapies, novel targets and drugs are still needed. Aiming to add genetic information potentially useful for novel targets discovery, we performed an extensive genomic characterization by whole-exome sequencing and SNP array profiling of six cutaneous melanoma cell lines derived from metastatic patients. We obtained a total of 3,325 novel coding single nucleotide variants, including 2,172 non-synonymous variants. We catalogued the coding mutations according to Sanger COSMIC database and to a manually curated list including genes involved in melanoma pathways identified by mining recent literature. Besides confirming the presence of known melanoma driver mutations (BRAF(V600E), NRAS(Q61R) ), we identified novel mutated genes involved in signalling pathways crucial for melanoma pathogenesis and already addressed by current targeted therapies (such as MAPK and glutamate pathways). We also identified mutations in four genes (MUC19, PAICS, RBMXL1, KIF23) never reported in melanoma, which might deserve further investigations. All data are available to the entire research community in our Melanoma Exome Database (at https://155.253.6.64/MExDB/). In summary, these cell lines are valuable biological tools to improve the genetic comprehension of this complex cancer disease and to study functional relevance of individual mutational events, and these findings could provide insights potentially useful for identification of novel therapeutic targets for cutaneous malignant melanoma.",Melanoma Exome Database,,https://155.253.6.64/MExDB/,Comprehensive genomic characterization of cutaneous malignant melanoma cell lines derived from metastatic lesions by whole-exome sequencing and SNP array profiling +23729657,"The non-obese diabetic mouse sequence, annotation and variation resource: an aid for investigating type 1 diabetes.","Model organisms are becoming increasingly important for the study of complex diseases such as type 1 diabetes (T1D). The non-obese diabetic (NOD) mouse is an experimental model for T1D having been bred to develop the disease spontaneously in a process that is similar to humans. Genetic analysis of the NOD mouse has identified around 50 disease loci, which have the nomenclature Idd for insulin-dependent diabetes, distributed across at least 11 different chromosomes. In total, 21 Idd regions across 6 chromosomes, that are major contributors to T1D susceptibility or resistance, were selected for finished sequencing and annotation at the Wellcome Trust Sanger Institute. Here we describe the generation of 40.4 mega base-pairs of finished sequence from 289 bacterial artificial chromosomes for the NOD mouse. Manual annotation has identified 738 genes in the diabetes sensitive NOD mouse and 765 genes in homologous regions of the diabetes resistant C57BL/6J reference mouse across 19 candidate Idd regions. This has allowed us to call variation consequences between homologous exonic sequences for all annotated regions in the two mouse strains. We demonstrate the importance of this resource further by illustrating the technical difficulties that regions of inter-strain structural variation between the NOD mouse and the C57BL/6J reference mouse can cause for current next generation sequencing and assembly techniques. Furthermore, we have established that the variation rate in the Idd regions is 2.3 times higher than the mean found for the whole genome assembly for the NOD/ShiLtJ genome, which we suggest reflects the fact that positive selection for functional variation in immune genes is beneficial in regard to host defence. In summary, we provide an important resource, which aids the analysis of potential causative genes involved in T1D susceptibility. Database URLs: http://www.sanger.ac.uk/resources/mouse/nod/; http://vega-previous.sanger.ac.uk/info/data/mouse_regions.html#Idd","The non-obese diabetic mouse sequence, annotation and variation resource",,http://www.sanger.ac.uk/resources/mouse/nod/, +23729657,"The non-obese diabetic mouse sequence, annotation and variation resource: an aid for investigating type 1 diabetes.","Model organisms are becoming increasingly important for the study of complex diseases such as type 1 diabetes (T1D). The non-obese diabetic (NOD) mouse is an experimental model for T1D having been bred to develop the disease spontaneously in a process that is similar to humans. Genetic analysis of the NOD mouse has identified around 50 disease loci, which have the nomenclature Idd for insulin-dependent diabetes, distributed across at least 11 different chromosomes. In total, 21 Idd regions across 6 chromosomes, that are major contributors to T1D susceptibility or resistance, were selected for finished sequencing and annotation at the Wellcome Trust Sanger Institute. Here we describe the generation of 40.4 mega base-pairs of finished sequence from 289 bacterial artificial chromosomes for the NOD mouse. Manual annotation has identified 738 genes in the diabetes sensitive NOD mouse and 765 genes in homologous regions of the diabetes resistant C57BL/6J reference mouse across 19 candidate Idd regions. This has allowed us to call variation consequences between homologous exonic sequences for all annotated regions in the two mouse strains. We demonstrate the importance of this resource further by illustrating the technical difficulties that regions of inter-strain structural variation between the NOD mouse and the C57BL/6J reference mouse can cause for current next generation sequencing and assembly techniques. Furthermore, we have established that the variation rate in the Idd regions is 2.3 times higher than the mean found for the whole genome assembly for the NOD/ShiLtJ genome, which we suggest reflects the fact that positive selection for functional variation in immune genes is beneficial in regard to host defence. In summary, we provide an important resource, which aids the analysis of potential causative genes involved in T1D susceptibility. Database URLs: http://www.sanger.ac.uk/resources/mouse/nod/; http://vega-previous.sanger.ac.uk/info/data/mouse_regions.html#Idd","The non-obese diabetic mouse sequence, annotation and variation resource",,http://vega-previous.sanger.ac.uk/info/data/mouse_regions.html#Idd, +23730305,The systems genetics resource: a web application to mine global data for complex disease traits.,"The Systems Genetics Resource (SGR) (http://systems.genetics.ucla.edu) is a new open-access web application and database that contains genotypes and clinical and intermediate phenotypes from both human and mouse studies. The mouse data include studies using crosses between specific inbred strains and studies using the Hybrid Mouse Diversity Panel. SGR is designed to assist researchers studying genes and pathways contributing to complex disease traits, including obesity, diabetes, atherosclerosis, heart failure, osteoporosis, and lipoprotein metabolism. Over the next few years, we hope to add data relevant to deafness, addiction, hepatic steatosis, toxin responses, and vascular injury. The intermediate phenotypes include expression array data for a variety of tissues and cultured cells, metabolite levels, and protein levels. Pre-computed tables of genetic loci controlling intermediate and clinical phenotypes, as well as phenotype correlations, are accessed via a user-friendly web interface. The web site includes detailed protocols for all of the studies. Data from published studies are freely available; unpublished studies have restricted access during their embargo period.",Systems Genetics Resource,SGR,http://systems.genetics.ucla.edu,a new open-access web application and database that contains genotypes and clinical and intermediate phenotypes from both human and mouse studies +23730305,The systems genetics resource: a web application to mine global data for complex disease traits.,"The Systems Genetics Resource (SGR) (http://systems.genetics.ucla.edu) is a new open-access web application and database that contains genotypes and clinical and intermediate phenotypes from both human and mouse studies. The mouse data include studies using crosses between specific inbred strains and studies using the Hybrid Mouse Diversity Panel. SGR is designed to assist researchers studying genes and pathways contributing to complex disease traits, including obesity, diabetes, atherosclerosis, heart failure, osteoporosis, and lipoprotein metabolism. Over the next few years, we hope to add data relevant to deafness, addiction, hepatic steatosis, toxin responses, and vascular injury. The intermediate phenotypes include expression array data for a variety of tissues and cultured cells, metabolite levels, and protein levels. Pre-computed tables of genetic loci controlling intermediate and clinical phenotypes, as well as phenotype correlations, are accessed via a user-friendly web interface. The web site includes detailed protocols for all of the studies. Data from published studies are freely available; unpublished studies have restricted access during their embargo period.",systems genetics resource,SGR,http://systems.genetics.ucla.edu,a web application to mine global data for complex disease traits +23734622,Biotea: RDFizing PubMed Central in support for the paper as an interface to the Web of Data.,"

Background

The World Wide Web has become a dissemination platform for scientific and non-scientific publications. However, most of the information remains locked up in discrete documents that are not always interconnected or machine-readable. The connectivity tissue provided by RDF technology has not yet been widely used to support the generation of self-describing, machine-readable documents.

Results

In this paper, we present our approach to the generation of self-describing machine-readable scholarly documents. We understand the scientific document as an entry point and interface to the Web of Data. We have semantically processed the full-text, open-access subset of PubMed Central. Our RDF model and resulting dataset make extensive use of existing ontologies and semantic enrichment services. We expose our model, services, prototype, and datasets at http://biotea.idiginfo.org/

Conclusions

The semantic processing of biomedical literature presented in this paper embeds documents within the Web of Data and facilitates the execution of concept-based queries against the entire digital library. Our approach delivers a flexible and adaptable set of tools for metadata enrichment and semantic processing of biomedical documents. Our model delivers a semantically rich and highly interconnected dataset with self-describing content so that software can make effective use of it.",Biotea,Biotea,http://biotea.idiginfo.org/,"self-describing, machine-readable documents" +23828786,Towards building a disease-phenotype knowledge base: extracting disease-manifestation relationship from literature.,"

Motivation

Systems approaches to studying phenotypic relationships among diseases are emerging as an active area of research for both novel disease gene discovery and drug repurposing. Currently, systematic study of disease phenotypic relationships on a phenome-wide scale is limited because large-scale machine-understandable disease-phenotype relationship knowledge bases are often unavailable. Here, we present an automatic approach to extract disease-manifestation (D-M) pairs (one specific type of disease-phenotype relationship) from the wide body of published biomedical literature.

Data and methods

Our method leverages external knowledge and limits the amount of human effort required. For the text corpus, we used 119 085 682 MEDLINE sentences (21 354 075 citations). First, we used D-M pairs from existing biomedical ontologies as prior knowledge to automatically discover D-M-specific syntactic patterns. We then extracted additional pairs from MEDLINE using the learned patterns. Finally, we analysed correlations between disease manifestations and disease-associated genes and drugs to demonstrate the potential of this newly created knowledge base in disease gene discovery and drug repurposing.

Results

In total, we extracted 121 359 unique D-M pairs with a high precision of 0.924. Among the extracted pairs, 120 419 (99.2%) have not been captured in existing structured knowledge sources. We have shown that disease manifestations correlate positively with both disease-associated genes and drug treatments.

Conclusions

The main contribution of our study is the creation of a large-scale and accurate D-M phenotype relationship knowledge base. This unique knowledge base, when combined with existing phenotypic, genetic and proteomic datasets, can have profound implications in our deeper understanding of disease etiology and in rapid drug repurposing.

Availability

http://nlp.case.edu/public/data/DMPatternUMLS/",,,http://nlp.case.edu/public/data/DMPatternUMLS/,a large-scale and accurate D-M phenotype relationship knowledge base +23868073,Large-scale gene function analysis with the PANTHER classification system.,"The PANTHER (protein annotation through evolutionary relationship) classification system (http://www.pantherdb.org/) is a comprehensive system that combines gene function, ontology, pathways and statistical analysis tools that enable biologists to analyze large-scale, genome-wide data from sequencing, proteomics or gene expression experiments. The system is built with 82 complete genomes organized into gene families and subfamilies, and their evolutionary relationships are captured in phylogenetic trees, multiple sequence alignments and statistical models (hidden Markov models or HMMs). Genes are classified according to their function in several different ways: families and subfamilies are annotated with ontology terms (Gene Ontology (GO) and PANTHER protein class), and sequences are assigned to PANTHER pathways. The PANTHER website includes a suite of tools that enable users to browse and query gene functions, and to analyze large-scale experimental data with a number of statistical tests. It is widely used by bench scientists, bioinformaticians, computer scientists and systems biologists. In the 2013 release of PANTHER (v.8.0), in addition to an update of the data content, we redesigned the website interface to improve both user experience and the system's analytical capability. This protocol provides a detailed description of how to analyze genome-wide experimental data with the PANTHER classification system.",protein annotation through evolutionary relationship,PANTHER,http://www.pantherdb.org/, +24060102,Comparative genomics of metabolic capacities of regulons controlled by cis-regulatory RNA motifs in bacteria.,"

Background

In silico comparative genomics approaches have been efficiently used for functional prediction and reconstruction of metabolic and regulatory networks. Riboswitches are metabolite-sensing structures often found in bacterial mRNA leaders controlling gene expression on transcriptional or translational levels.An increasing number of riboswitches and other cis-regulatory RNAs have been recently classified into numerous RNA families in the Rfam database. High conservation of these RNA motifs provides a unique advantage for their genomic identification and comparative analysis.

Results

A comparative genomics approach implemented in the RegPredict tool was used for reconstruction and functional annotation of regulons controlled by RNAs from 43 Rfam families in diverse taxonomic groups of Bacteria. The inferred regulons include ~5200 cis-regulatory RNAs and more than 12000 target genes in 255 microbial genomes. All predicted RNA-regulated genes were classified into specific and overall functional categories. Analysis of taxonomic distribution of these categories allowed us to establish major functional preferences for each analyzed cis-regulatory RNA motif family. Overall, most RNA motif regulons showed predictable functional content in accordance with their experimentally established effector ligands. Our results suggest that some RNA motifs (including thiamin pyrophosphate and cobalamin riboswitches that control the cofactor metabolism) are widespread and likely originated from the last common ancestor of all bacteria. However, many more analyzed RNA motifs are restricted to a narrow taxonomic group of bacteria and likely represent more recent evolutionary innovations.

Conclusions

The reconstructed regulatory networks for major known RNA motifs substantially expand the existing knowledge of transcriptional regulation in bacteria. The inferred regulons can be used for genetic experiments, functional annotations of genes, metabolic reconstruction and evolutionary analysis. The obtained genome-wide collection of reference RNA motif regulons is available in the RegPrecise database (http://regprecise.lbl.gov/).",RegPrecise database,RegPrecise,http://regprecise.lbl.gov/,genome-wide collection of reference RNA motif regulons +24165883,IMG 4 version of the integrated microbial genomes comparative analysis system.,"The Integrated Microbial Genomes (IMG) data warehouse integrates genomes from all three domains of life, as well as plasmids, viruses and genome fragments. IMG provides tools for analyzing and reviewing the structural and functional annotations of genomes in a comparative context. IMG's data content and analytical capabilities have increased continuously since its first version released in 2005. Since the last report published in the 2012 NAR Database Issue, IMG's annotation and data integration pipelines have evolved while new tools have been added for recording and analyzing single cell genomes, RNA Seq and biosynthetic cluster data. Different IMG datamarts provide support for the analysis of publicly available genomes (IMG/W: http://img.jgi.doe.gov/w), expert review of genome annotations (IMG/ER: http://img.jgi.doe.gov/er) and teaching and training in the area of microbial genome analysis (IMG/EDU: http://img.jgi.doe.gov/edu).",Integrated Microbial Genomes,IMG,IMG/W: http://img.jgi.doe.gov/w,"genomes from all three domains of life, as well as plasmids, viruses and genome fragments" +24165883,IMG 4 version of the integrated microbial genomes comparative analysis system.,"The Integrated Microbial Genomes (IMG) data warehouse integrates genomes from all three domains of life, as well as plasmids, viruses and genome fragments. IMG provides tools for analyzing and reviewing the structural and functional annotations of genomes in a comparative context. IMG's data content and analytical capabilities have increased continuously since its first version released in 2005. Since the last report published in the 2012 NAR Database Issue, IMG's annotation and data integration pipelines have evolved while new tools have been added for recording and analyzing single cell genomes, RNA Seq and biosynthetic cluster data. Different IMG datamarts provide support for the analysis of publicly available genomes (IMG/W: http://img.jgi.doe.gov/w), expert review of genome annotations (IMG/ER: http://img.jgi.doe.gov/er) and teaching and training in the area of microbial genome analysis (IMG/EDU: http://img.jgi.doe.gov/edu).",Integrated Microbial Genomes,IMG,IMG/EDU: http://img.jgi.doe.gov/edu,"genomes from all three domains of life, as well as plasmids, viruses and genome fragments" +24165883,IMG 4 version of the integrated microbial genomes comparative analysis system.,"The Integrated Microbial Genomes (IMG) data warehouse integrates genomes from all three domains of life, as well as plasmids, viruses and genome fragments. IMG provides tools for analyzing and reviewing the structural and functional annotations of genomes in a comparative context. IMG's data content and analytical capabilities have increased continuously since its first version released in 2005. Since the last report published in the 2012 NAR Database Issue, IMG's annotation and data integration pipelines have evolved while new tools have been added for recording and analyzing single cell genomes, RNA Seq and biosynthetic cluster data. Different IMG datamarts provide support for the analysis of publicly available genomes (IMG/W: http://img.jgi.doe.gov/w), expert review of genome annotations (IMG/ER: http://img.jgi.doe.gov/er) and teaching and training in the area of microbial genome analysis (IMG/EDU: http://img.jgi.doe.gov/edu).",Integrated Microbial Genomes,IMG,IMG/ER: http://img.jgi.doe.gov/er,"genomes from all three domains of life, as well as plasmids, viruses and genome fragments" +24178034,IDEAL in 2014 illustrates interaction networks composed of intrinsically disordered proteins and their binding partners.,"IDEAL (Intrinsically Disordered proteins with Extensive Annotations and Literature, http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/) is a collection of intrinsically disordered proteins (IDPs) that cannot adopt stable globular structures under physiological conditions. Since its previous publication in 2012, the number of entries in IDEAL has almost tripled (120 to 340). In addition to the increase in quantity, the quality of IDEAL has been significantly improved. The new IDEAL incorporates the interactions of IDPs and their binding partners more explicitly, and illustrates the protein-protein interaction (PPI) networks and the structures of protein complexes. Redundant experimental data are arranged based on the clustering of Protein Data Bank entries, and similar sequences with the same binding mode are grouped. As a result, the new IDEAL presents more concise and informative experimental data. Nuclear magnetic resonance (NMR) disorder is annotated in a systematic manner, by identifying the regions with large deviations among the NMR models. The ordered/disordered and new domain predictions by DICHOT are available, as well as the domain assignments by HMMER. Some examples of the PPI networks and the highly deviated regions derived from NMR models will be described, together with other advances. These enhancements will facilitate deeper understanding of IDPs, in terms of their flexibility, plasticity and promiscuity.",Intrinsically Disordered proteins with Extensive Annotations and Literature,IDEAL,http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/, a collection of intrinsically disordered proteins (IDPs) that cannot adopt stable globular structures under physiological conditions +24270792,Gene3D: Multi-domain annotations for protein sequence and comparative genome analysis.,"Gene3D (http://gene3d.biochem.ucl.ac.uk) is a database of protein domain structure annotations for protein sequences. Domains are predicted using a library of profile HMMs from 2738 CATH superfamilies. Gene3D assigns domain annotations to Ensembl and UniProt sequence sets including >6000 cellular genomes and >20 million unique protein sequences. This represents an increase of 45% in the number of protein sequences since our last publication. Thanks to improvements in the underlying data and pipeline, we see large increases in the domain coverage of sequences. We have expanded this coverage by integrating Pfam and SUPERFAMILY domain annotations, and we now resolve domain overlaps to provide highly comprehensive composite multi-domain architectures. To make these data more accessible for comparative genome analyses, we have developed novel search algorithms for searching genomes to identify related multi-domain architectures. In addition to providing domain family annotations, we have now developed a pipeline for 3D homology modelling of domains in Gene3D. This has been applied to the human genome and will be rolled out to other major organisms over the next year.",Gene3D,Gene3D,http://gene3d.biochem.ucl.ac.uk,a database of protein domain structure annotations for protein sequences +24319143,MMDB and VAST+: tracking structural similarities between macromolecular complexes.,"The computational detection of similarities between protein 3D structures has become an indispensable tool for the detection of homologous relationships, the classification of protein families and functional inference. Consequently, numerous algorithms have been developed that facilitate structure comparison, including rapid searches against a steadily growing collection of protein structures. To this end, NCBI's Molecular Modeling Database (MMDB), which is based on the Protein Data Bank (PDB), maintains a comprehensive and up-to-date archive of protein structure similarities computed with the Vector Alignment Search Tool (VAST). These similarities have been recorded on the level of single proteins and protein domains, comprising in excess of 1.5 billion pairwise alignments. Here we present VAST+, an extension to the existing VAST service, which summarizes and presents structural similarity on the level of biological assemblies or macromolecular complexes. VAST+ simplifies structure neighboring results and shows, for macromolecular complexes tracked in MMDB, lists of similar complexes ranked by the extent of similarity. VAST+ replaces the previous VAST service as the default presentation of structure neighboring data in NCBI's Entrez query and retrieval system. MMDB and VAST+ can be accessed via http://www.ncbi.nlm.nih.gov/Structure.",Molecular Modeling Database,MMDB,http://www.ncbi.nlm.nih.gov/Structure,a comprehensive and up-to-date archive of protein structure similarities computed with the Vector Alignment Search Tool (VAST) +24580755,"CDSbank: taxonomy-aware extraction, selection, renaming and formatting of protein-coding DNA or amino acid sequences.","

Background

Protein-coding DNA sequences and their corresponding amino acid sequences are routinely used to study relationships between sequence, structure, function, and evolution. The rapidly growing size of sequence databases increases the power of such comparative analyses but it makes it more challenging to prepare high quality sequence data sets with control over redundancy, quality, completeness, formatting, and labeling. Software tools for some individual steps in this process exist but manual intervention remains a common and time consuming necessity.

Description

CDSbank is a database that stores both the protein-coding DNA sequence (CDS) and amino acid sequence for each protein annotated in Genbank. CDSbank also stores Genbank feature annotation, a flag to indicate incomplete 5' and 3' ends, full taxonomic data, and a heuristic to rank the scientific interest of each species. This rich information allows fully automated data set preparation with a level of sophistication that aims to meet or exceed manual processing. Defaults ensure ease of use for typical scenarios while allowing great flexibility when needed. Access is via a free web server at http://hazeslab.med.ualberta.ca/CDSbank/.

Conclusions

CDSbank presents a user-friendly web server to download, filter, format, and name large sequence data sets. Common usage scenarios can be accessed via pre-programmed default choices, while optional sections give full control over the processing pipeline. Particular strengths are: extract protein-coding DNA sequences just as easily as amino acid sequences, full access to taxonomy for labeling and filtering, awareness of incomplete sequences, and the ability to take one protein sequence and extract all synonymous CDS or identical protein sequences in other species. Finally, CDSbank can also create labeled property files to, for instance, annotate or re-label phylogenetic trees.",CDSbank,CDSbank,http://hazeslab.med.ualberta.ca/CDSbank/,a database that stores both the protein-coding DNA sequence (CDS) and amino acid sequence for each protein annotated in Genbank +24655548,Analysis of growth factor signaling in genetically diverse breast cancer lines.,"

Background

Soluble growth factors present in the microenvironment play a major role in tumor development, invasion, metastasis, and responsiveness to targeted therapies. While the biochemistry of growth factor-dependent signal transduction has been studied extensively in individual cell types, relatively little systematic data are available across genetically diverse cell lines.

Results

We describe a quantitative and comparative dataset focused on immediate-early signaling that regulates the AKT (AKT1/2/3) and ERK (MAPK1/3) pathways in a canonical panel of well-characterized breast cancer lines. We also provide interactive web-based tools to facilitate follow-on analysis of the data. Our findings show that breast cancers are diverse with respect to ligand sensitivity and signaling biochemistry. Surprisingly, triple negative breast cancers (TNBCs; which express low levels of ErbB2, progesterone and estrogen receptors) are the most broadly responsive to growth factors and HER2amp cancers (which overexpress ErbB2) the least. The ratio of ERK to AKT activation varies with ligand and subtype, with a systematic bias in favor of ERK in hormone receptor positive (HR+) cells. The factors that correlate with growth factor responsiveness depend on whether fold-change or absolute activity is considered the key biological variable, and they differ between ERK and AKT pathways.

Conclusions

Responses to growth factors are highly diverse across breast cancer cell lines, even within the same subtype. A simple four-part heuristic suggests that diversity arises from variation in receptor abundance, an ERK/AKT bias that depends on ligand identity, a set of factors common to all receptors that varies in abundance or activity with cell line, and an """"indirect negative regulation"""" by ErbB2. This analysis sets the stage for the development of a mechanistic and predictive model of growth factor signaling in diverse cancer lines. Interactive tools for looking up these results and downloading raw data are available at http://lincs.hms.harvard.edu/niepel-bmcbiol-2014/.",,,http://lincs.hms.harvard.edu/niepel-bmcbiol-2014/,a quantitative and comparative dataset focused on immediate-early signaling that regulates the AKT (AKT1/2/3) and ERK (MAPK1/3) pathways in a canonical panel of well-characterized breast cancer lines +24678734,Research resource: EPSLiM: ensemble predictor for short linear motifs in nuclear hormone receptors.,"Nuclear receptors (NRs) are a superfamily of transcription factors central to regulating many biological processes, including cell growth, death, metabolism, and immune responses. NR-mediated gene expression can be modulated by coactivators and corepressors through direct physical interaction or protein complexes with functional domains in NRs. One class of these domains includes short linear motifs (SLiMs), which facilitate protein-protein interactions, phosphorylation, and ligand binding primarily in the intrinsically disordered regions (IDRs) of proteins. Across all proteins, the number of known SLiMs is limited due to the difficulty in studying IDRs experimentally. Computational tools provide a systematic and data-driven approach for predicting functional motifs that can be used to prioritize experimental efforts. Accordingly, several tools have been developed based on sequence conservation or biophysical features; however, discrepancies in predictions make it difficult to determine the true candidate SLiMs. In this work, we present the ensemble predictor for short linear motifs (EPSLiM), a novel strategy to prioritize the residues that are most likely to be SLiMs in IDRs. EPSLiM applies a generalized linear model to integrate predictions from individual methodologies. We show that EPSLiM outperforms individual predictors, and we apply our method to NRs. The androgen receptor is an example with an N-terminal domain of 559 disordered amino acids that contains several validated SLiMs important for transcriptional activation. We use the androgen receptor to illustrate the predictive performance of EPSLiM and make the results of all human and mouse NRs publically available through the web service http://epslim.bwh.harvard.edu.",ensemble predictor for short linear motifs,EPSLiM,http://epslim.bwh.harvard.edu, +24727366,"In silico expression analysis', a novel PathoPlant web tool to identify abiotic and biotic stress conditions associated with specific cis-regulatory sequences.","Using bioinformatics, putative cis-regulatory sequences can be easily identified using pattern recognition programs on promoters of specific gene sets. The abundance of predicted cis-sequences is a major challenge to associate these sequences with a possible function in gene expression regulation. To identify a possible function of the predicted cis-sequences, a novel web tool designated 'in silico expression analysis' was developed that correlates submitted cis-sequences with gene expression data from Arabidopsis thaliana. The web tool identifies the A. thaliana genes harbouring the sequence in a defined promoter region and compares the expression of these genes with microarray data. The result is a hierarchy of abiotic and biotic stress conditions to which these genes are most likely responsive. When testing the performance of the web tool, known cis-regulatory sequences were submitted to the 'in silico expression analysis' resulting in the correct identification of the associated stress conditions. When using a recently identified novel elicitor-responsive sequence, a WT-box (CGACTTTT), the 'in silico expression analysis' predicts that genes harbouring this sequence in their promoter are most likely Botrytis cinerea induced. Consistent with this prediction, the strongest induction of a reporter gene harbouring this sequence in the promoter is observed with B. cinerea in transgenic A. thaliana. DATABASE URL: http://www.pathoplant.de/expression_analysis.php.",PathoPlant,PathoPlant,http://www.pathoplant.de/expression_analysis.php, +25119676,Improvements to pairwise sequence comparison (PASC): a genome-based web tool for virus classification.,"The number of viral genome sequences in the public databases is increasing dramatically, and these sequences are playing an important role in virus classification. Pairwise sequence comparison is a sequence-based virus classification method. A program using this method calculates the pairwise identities of virus sequences within a virus family and displays their distribution, and visual analysis helps to determine demarcations at different taxonomic levels such as strain, species, genus and subfamily. Subsequent comparison of new sequences against existing ones allows viruses from which the new sequences were derived to be classified. Although this method cannot be used as the only criterion for virus classification in some cases, it is a quantitative method and has many advantages over conventional virus classification methods. It has been applied to several virus families, and there is an increasing interest in using this method for other virus families/groups. The Pairwise Sequence Comparison (PASC) classification tool was created at the National Center for Biotechnology Information. The tool's database stores pairwise identities for complete genomes/segments of 56 virus families/groups. Data in the system are updated every day to reflect changes in virus taxonomy and additions of new virus sequences to the public database. The web interface of the tool ( http://www.ncbi.nlm.nih.gov/sutils/pasc/ ) makes it easy to navigate and perform analyses. Multiple new viral genome sequences can be tested simultaneously with this system to suggest the taxonomic position of virus isolates in a specific family. PASC eliminates potential discrepancies in the results caused by different algorithms and/or different data used by researchers.",Pairwise Sequence Comparison,PASC,http://www.ncbi.nlm.nih.gov/sutils/pasc/, +25404128,"Beyond protein expression, MOPED goes multi-omics.","MOPED (Multi-Omics Profiling Expression Database; http://moped.proteinspire.org) has transitioned from solely a protein expression database to a multi-omics resource for human and model organisms. Through a web-based interface, MOPED presents consistently processed data for gene, protein and pathway expression. To improve data quality, consistency and use, MOPED includes metadata detailing experimental design and analysis methods. The multi-omics data are integrated through direct links between genes and proteins and further connected to pathways and experiments. MOPED now contains over 5 million records, information for approximately 75,000 genes and 50,000 proteins from four organisms (human, mouse, worm, yeast). These records correspond to 670 unique combinations of experiment, condition, localization and tissue. MOPED includes the following new features: pathway expression, Pathway Details pages, experimental metadata checklists, experiment summary statistics and more advanced searching tools. Advanced searching enables querying for genes, proteins, experiments, pathways and keywords of interest. The system is enhanced with visualizations for comparing across different data types. In the future MOPED will expand the number of organisms, increase integration with pathways and provide connections to disease.",Multi-Omics Profiling Expression Database,MOPED,http://moped.proteinspire.org,a multi-omics resource for human and model organisms +25428349,"OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders.","Online Mendelian Inheritance in Man, OMIM(), is a comprehensive, authoritative and timely research resource of curated descriptions of human genes and phenotypes and the relationships between them. The new official website for OMIM, OMIM.org (http://omim.org), was launched in January 2011. OMIM is based on the published peer-reviewed biomedical literature and is used by overlapping and diverse communities of clinicians, molecular biologists and genome scientists, as well as by students and teachers of these disciplines. Genes and phenotypes are described in separate entries and are given unique, stable six-digit identifiers (MIM numbers). OMIM entries have a structured free-text format that provides the flexibility necessary to describe the complex and nuanced relationships between genes and genetic phenotypes in an efficient manner. OMIM also has a derivative table of genes and genetic phenotypes, the Morbid Map. OMIM.org has enhanced search capabilities such as genome coordinate searching and thesaurus-enhanced search term options. Phenotypic series have been created to facilitate viewing genetic heterogeneity of phenotypes. Clinical synopsis features are enhanced with UMLS, Human Phenotype Ontology and Elements of Morphology terms and image links. All OMIM data are available for FTP download and through an API. MIMmatch is a novel outreach feature to disseminate updates and encourage collaboration.",Online Mendelian Inheritance in Man,OMIM,http://omim.org,an online catalog of human genes and genetic disorders +25428349,"OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders.","Online Mendelian Inheritance in Man, OMIM(), is a comprehensive, authoritative and timely research resource of curated descriptions of human genes and phenotypes and the relationships between them. The new official website for OMIM, OMIM.org (http://omim.org), was launched in January 2011. OMIM is based on the published peer-reviewed biomedical literature and is used by overlapping and diverse communities of clinicians, molecular biologists and genome scientists, as well as by students and teachers of these disciplines. Genes and phenotypes are described in separate entries and are given unique, stable six-digit identifiers (MIM numbers). OMIM entries have a structured free-text format that provides the flexibility necessary to describe the complex and nuanced relationships between genes and genetic phenotypes in an efficient manner. OMIM also has a derivative table of genes and genetic phenotypes, the Morbid Map. OMIM.org has enhanced search capabilities such as genome coordinate searching and thesaurus-enhanced search term options. Phenotypic series have been created to facilitate viewing genetic heterogeneity of phenotypes. Clinical synopsis features are enhanced with UMLS, Human Phenotype Ontology and Elements of Morphology terms and image links. All OMIM data are available for FTP download and through an API. MIMmatch is a novel outreach feature to disseminate updates and encourage collaboration.",Online Mendelian Inheritance in Man,OMIM,http://omim.org,"a comprehensive, authoritative and timely research resource of curated descriptions of human genes and phenotypes and the relationships between them" +25629077,Genetic variability of microRNA regulome in human.,"MicroRNAs are currently being extensively studied due to their important role as post-transcriptional regulators. During miRNA biogenesis, precursors undergo two cleavage steps performed by Drosha-DGCR8 (Microprocessor) cleaving of pri-miRNA to produce pre-miRNA and Dicer-mediated cleaving to create mature miRNA. Genetic variants within human miRNA regulome have been shown to influence miRNA expression, target interaction and to affect the phenotype. In this study, we reviewed the literature, existing bioinformatics tools and catalogs associated with polymorphic miRNA regulome, and organized them into four categories: (1) polymorphisms located within miRNA genes (miR-SNPs), (2) transcription factor-binding sites/miRNA regulatory regions (miR-rSNPs), (3) miRNA target sites (miR-TS-SNPs), and 4. miRNA silencing machinery (miR-SM-SNPs). Since the miR-SM-SNPs have not been systematically studied yet, we have collected polymorphisms associated with miRNA silencing machinery. We have developed two catalogs containing genetic variability within: (1) genes encoding three main catalytic components of the silencing machinery, DROSHA, DGCR8, and DICER1; (2) miRNA genes itself, overlapping Drosha and Dicer cleavage sites. The developed resource of polymorphisms is available online (http://www.integratomics-time.com/miRNA-regulome) and will be useful for further functional studies and development of biomarkers associated with diseases and phenotypic traits.",,,http://www.integratomics-time.com/miRNA-regulome, +26043787,Detection and analysis of disease-associated single nucleotide polymorphism influencing post-translational modification.,"Post-translational modification (PTM) plays a crucial role in biological functions and corresponding disease developments. Discovering disease-associated non-synonymous SNPs (nsSNPs) altering PTM sites can help to estimate the various PTM candidates involved in diseases, therefore, an integrated analysis between SNPs, PTMs and diseases is necessary. However, only a few types of PTMs affected by nsSNPs have been studied without considering disease-association until now. In this study, we developed a new database called PTM-SNP which contains a comprehensive collection of human nsSNPs that affect PTM sites, together with disease information. Total 179,325 PTM-SNPs were collected by aligning missense SNPs and stop-gain SNPs on PTM sites (position 0) or their flanking region (position -7 to 7). Disease-associated SNPs from GWAS catalogs were also matched with detected PTM-SNP to find disease associated PTM-SNPs. Our result shows PTM-SNPs are highly associated with diseases, compared with other nsSNP sites and functional classes including near gene, intron and so on. PTM-SNP can provide an insight about discovering important PTMs involved in the diseases easily through the web site. PTM-SNP is freely available at http://gcode.kaist.ac.kr/ptmsnp.",PTM-SNP,PTM-SNP,http://gcode.kaist.ac.kr/ptmsnp,"a comprehensive collection of human nsSNPs that affect PTM sites, together with disease information" +26109357,Gene Model Annotations for Drosophila melanogaster: Impact of High-Throughput Data.,"We report the current status of the FlyBase annotated gene set for Drosophila melanogaster and highlight improvements based on high-throughput data. The FlyBase annotated gene set consists entirely of manually annotated gene models, with the exception of some classes of small non-coding RNAs. All gene models have been reviewed using evidence from high-throughput datasets, primarily from the modENCODE project. These datasets include RNA-Seq coverage data, RNA-Seq junction data, transcription start site profiles, and translation stop-codon read-through predictions. New annotation guidelines were developed to take into account the use of the high-throughput data. We describe how this flood of new data was incorporated into thousands of new and revised annotations. FlyBase has adopted a philosophy of excluding low-confidence and low-frequency data from gene model annotations; we also do not attempt to represent all possible permutations for complex and modularly organized genes. This has allowed us to produce a high-confidence, manageable gene annotation dataset that is available at FlyBase (http://flybase.org). Interesting aspects of new annotations include new genes (coding, non-coding, and antisense), many genes with alternative transcripts with very long 3' UTRs (up to 15-18 kb), and a stunning mismatch in the number of male-specific genes (approximately 13% of all annotated gene models) vs. female-specific genes (less than 1%). The number of identified pseudogenes and mutations in the sequenced strain also increased significantly. We discuss remaining challenges, for instance, identification of functional small polypeptides and detection of alternative translation starts.",FlyBase,FlyBase,http://flybase.org,"a high-confidence, manageable gene annotation dataset" +26227548,Simulated unbound structures for benchmarking of protein docking in the DOCKGROUND resource.,"

Background

Proteins play an important role in biological processes in living organisms. Many protein functions are based on interaction with other proteins. The structural information is important for adequate description of these interactions. Sets of protein structures determined in both bound and unbound states are essential for benchmarking of the docking procedures. However, the number of such proteins in PDB is relatively small. A radical expansion of such sets is possible if the unbound structures are computationally simulated.

Results

The DOCKGROUND public resource provides data to improve our understanding of protein-protein interactions and to assist in the development of better tools for structural modeling of protein complexes, such as docking algorithms and scoring functions. A large set of simulated unbound protein structures was generated from the bound structures. The modeling protocol was based on 1 ns Langevin dynamics simulation. The simulated structures were validated on the ensemble of experimentally determined unbound and bound structures. The set is intended for large scale benchmarking of docking algorithms and scoring functions.

Conclusions

A radical expansion of the unbound protein docking benchmark set was achieved by simulating the unbound structures. The simulated unbound structures were selected according to criteria from systematic comparison of experimentally determined bound and unbound structures. The set is publicly available at http://dockground.compbio.ku.edu.",DOCKGROUND,DOCKGROUND,http://dockground.compbio.ku.edu,"data to improve our understanding of protein-protein interactions and to assist in the development of better tools for structural modeling of protein complexes, such as docking algorithms and scoring functions" +26852673,Multi-tissue transcriptomics for construction of a comprehensive gene resource for the terrestrial snail Theba pisana.,"The land snail Theba pisana is native to the Mediterranean region but has become one of the most abundant invasive species worldwide. Here, we present three transcriptomes of this agriculture pest derived from three tissues: the central nervous system, hepatopancreas (digestive gland), and foot muscle. Sequencing of the three tissues produced 339,479,092 high quality reads and a global de novo assembly generated a total of 250,848 unique transcripts (unigenes). BLAST analysis mapped 52,590 unigenes to NCBI non-redundant protein databases and further functional analysis annotated 21,849 unigenes with gene ontology. We report that T. pisana transcripts have representatives in all functional classes and a comparison of differentially expressed transcripts amongst all three tissues demonstrates enormous differences in their potential metabolic activities. The genes differentially expressed include those with sequence similarity to those genes associated with multiple bacterial diseases and neurological diseases. To provide a valuable resource that will assist functional genomics study, we have implemented a user-friendly web interface, ThebaDB (http://thebadb.bioinfo-minzhao.org/). This online database allows for complex text queries, sequence searches, and data browsing by enriched functional terms and KEGG mapping.",ThebaDB,ThebaDB,http://thebadb.bioinfo-minzhao.org/,"online database allows for complex text queries, sequence searches, and data browsing by enriched functional terms and KEGG mapping" +27102089,Gene-set activity toolbox (GAT): A platform for microarray-based cancer diagnosis using an integrative gene-set analysis approach.,"Cancer is a complex disease that cannot be diagnosed reliably using only single gene expression analysis. Using gene-set analysis on high throughput gene expression profiling controlled by various environmental factors is a commonly adopted technique used by the cancer research community. This work develops a comprehensive gene expression analysis tool (gene-set activity toolbox: (GAT)) that is implemented with data retriever, traditional data pre-processing, several gene-set analysis methods, network visualization and data mining tools. The gene-set analysis methods are used to identify subsets of phenotype-relevant genes that will be used to build a classification model. To evaluate GAT performance, we performed a cross-dataset validation study on three common cancers namely colorectal, breast and lung cancers. The results show that GAT can be used to build a reasonable disease diagnostic model and the predicted markers have biological relevance. GAT can be accessed from http://gat.sit.kmutt.ac.th where GAT's java library for gene-set analysis, simple classification and a database with three cancer benchmark datasets can be downloaded.",gene-set activity toolbox,GAT,http://gat.sit.kmutt.ac.th,a database with three cancer benchmark datasets can be downloaded +27450113,The archiving and dissemination of biological structure data.,"The global Protein Data Bank (PDB) was the first open-access digital archive in biology. The history and evolution of the PDB are described, together with the ways in which molecular structural biology data and information are collected, curated, validated, archived, and disseminated by the members of the Worldwide Protein Data Bank organization (wwPDB; http://wwpdb.org). Particular emphasis is placed on the role of community in establishing the standards and policies by which the PDB archive is managed day-to-day.",Worldwide Protein Data Bank organization,wwPDB,http://wwpdb.org, +27509041,MMpI: A WideRange of Available Compounds of Matrix Metalloproteinase Inhibitors.,"Matrix metalloproteinases (MMPs) are a family of zinc-dependent proteinases involved in the regulation of the extracellular signaling and structural matrix environment of cells and tissues. MMPs are considered as promising targets for the treatment of many diseases. Therefore, creation of database on the inhibitors of MMP would definitely accelerate the research activities in this area due to its implication in above-mentioned diseases and associated limitations in the first and second generation inhibitors. In this communication, we report the development of a new MMpI database which provides resourceful information for all researchers working in this field. It is a web-accessible, unique resource that contains detailed information on the inhibitors of MMP including small molecules, peptides and MMP Drug Leads. The database contains entries of ~3000 inhibitors including ~72 MMP Drug Leads and ~73 peptide based inhibitors. This database provides the detailed molecular and structural details which are necessary for the drug discovery and development. The MMpI database contains physical properties, 2D and 3D structures (mol2 and pdb format files) of inhibitors of MMP. Other data fields are hyperlinked to PubChem, ChEMBL, BindingDB, DrugBank, PDB, MEROPS and PubMed. The database has extensive searching facility with MMpI ID, IUPAC name, chemical structure and with the title of research article. The MMP inhibitors provided in MMpI database are optimized using Python-based Hierarchical Environment for Integrated Xtallography (Phenix) software. MMpI Database is unique and it is the only public database that contains and provides the complete information on the inhibitors of MMP. Database URL: http://clri.res.in/subramanian/databases/mmpi/index.php.",MMpI,MMpI,http://clri.res.in/subramanian/databases/mmpi/index.php,A WideRange of Available Compounds of Matrix Metalloproteinase Inhibitors +27551106,PPI4DOCK: large scale assessment of the use of homology models in free docking over more than 1000 realistic targets.,"

Motivation

Protein-protein docking methods are of great importance for understanding interactomes at the structural level. It has become increasingly appealing to use not only experimental structures but also homology models of unbound subunits as input for docking simulations. So far we are missing a large scale assessment of the success of rigid-body free docking methods on homology models.

Results

We explored how we could benefit from comparative modelling of unbound subunits to expand docking benchmark datasets. Starting from a collection of 3157 non-redundant, high X-ray resolution heterodimers, we developed the PPI4DOCK benchmark containing 1417 docking targets based on unbound homology models. Rigid-body docking by Zdock showed that for 1208 cases (85.2%), at least one correct decoy was generated, emphasizing the efficiency of rigid-body docking in generating correct assemblies. Overall, the PPI4DOCK benchmark contains a large set of realistic cases and provides new ground for assessing docking and scoring methodologies.

Availability and implementation

Benchmark sets can be downloaded from http://biodev.cea.fr/interevol/ppi4dock/ CONTACT: guerois@cea.frSupplementary information: Supplementary data are available at Bioinformatics online.",PPI4DOCK,PPI4DOCK,http://biodev.cea.fr/interevol/ppi4dock/,a large set of realistic cases and provides new ground for assessing docking and scoring methodologies +27779621,"A studyforrest extension, simultaneous fMRI and eye gaze recordings during prolonged natural stimulation.","Here we present an update of the studyforrest (http://studyforrest.org) dataset that complements the previously released functional magnetic resonance imaging (fMRI) data for natural language processing with a new two-hour 3 Tesla fMRI acquisition while 15 of the original participants were shown an audio-visual version of the stimulus motion picture. We demonstrate with two validation analyses that these new data support modeling specific properties of the complex natural stimulus, as well as a substantial within-subject BOLD response congruency in brain areas related to the processing of auditory inputs, speech, and narrative when compared to the existing fMRI data for audio-only stimulation. In addition, we provide participants' eye gaze location as recorded simultaneously with fMRI, and an additional sample of 15 control participants whose eye gaze trajectories for the entire movie were recorded in a lab setting-to enable studies on attentional processes and comparative investigations on the potential impact of the stimulation setting on these processes.",studyforrest,studyforrest,http://studyforrest.org,functional magnetic resonance imaging (fMRI) data for natural language processing +27899584,CATH: an expanded resource to predict protein function through structure and sequence.,"The latest version of the CATH-Gene3D protein structure classification database has recently been released (version 4.1, http://www.cathdb.info). The resource comprises over 300 000 domain structures and over 53 million protein domains classified into 2737 homologous superfamilies, doubling the number of predicted protein domains in the previous version. The daily-updated CATH-B, which contains our very latest domain assignment data, provides putative classifications for over 100 000 additional protein domains. This article describes developments to the CATH-Gene3D resource over the last two years since the publication in 2015, including: significant increases to our structural and sequence coverage; expansion of the functional families in CATH; building a support vector machine (SVM) to automatically assign domains to superfamilies; improved search facilities to return alignments of query sequences against multiple sequence alignments; the redesign of the web pages and download site.",CATH-Gene3D protein structure classification database,CATH-Gene3D,http://www.cathdb.info,an expanded resource to predict protein function through structure and sequence +28053162,POSTAR: a platform for exploring post-transcriptional regulation coordinated by RNA-binding proteins.,"We present POSTAR (http://POSTAR.ncrnalab.org), a resource of POST-trAnscriptional Regulation coordinated by RNA-binding proteins (RBPs). Precise characterization of post-transcriptional regulatory maps has accelerated dramatically in the past few years. Based on new studies and resources, POSTAR supplies the largest collection of experimentally probed (~23 million) and computationally predicted (approximately 117 million) RBP binding sites in the human and mouse transcriptomes. POSTAR annotates every transcript and its RBP binding sites using extensive information regarding various molecular regulatory events (e.g., splicing, editing, and modification), RNA secondary structures, disease-associated variants, and gene expression and function. Moreover, POSTAR provides a friendly, multi-mode, integrated search interface, which helps users to connect multiple RBP binding sites with post-transcriptional regulatory events, phenotypes, and diseases. Based on our platform, we were able to obtain novel insights into post-transcriptional regulation, such as the putative association between CPSF6 binding, RNA structural domains, and Li-Fraumeni syndrome SNPs. In summary, POSTAR represents an early effort to systematically annotate post-transcriptional regulatory maps and explore the putative roles of RBPs in human diseases.", POSTAR, POSTAR,http://POSTAR.ncrnalab.org,a resource of POST-trAnscriptional Regulation coordinated by RNA-binding proteins (RBPs) +28053162,POSTAR: a platform for exploring post-transcriptional regulation coordinated by RNA-binding proteins.,"We present POSTAR (http://POSTAR.ncrnalab.org), a resource of POST-trAnscriptional Regulation coordinated by RNA-binding proteins (RBPs). Precise characterization of post-transcriptional regulatory maps has accelerated dramatically in the past few years. Based on new studies and resources, POSTAR supplies the largest collection of experimentally probed (~23 million) and computationally predicted (approximately 117 million) RBP binding sites in the human and mouse transcriptomes. POSTAR annotates every transcript and its RBP binding sites using extensive information regarding various molecular regulatory events (e.g., splicing, editing, and modification), RNA secondary structures, disease-associated variants, and gene expression and function. Moreover, POSTAR provides a friendly, multi-mode, integrated search interface, which helps users to connect multiple RBP binding sites with post-transcriptional regulatory events, phenotypes, and diseases. Based on our platform, we were able to obtain novel insights into post-transcriptional regulation, such as the putative association between CPSF6 binding, RNA structural domains, and Li-Fraumeni syndrome SNPs. In summary, POSTAR represents an early effort to systematically annotate post-transcriptional regulatory maps and explore the putative roles of RBPs in human diseases.", POSTAR, POSTAR,http://POSTAR.ncrnalab.org,a platform for exploring post-transcriptional regulation coordinated by RNA-binding proteins +28633399,The interfacial character of antibody paratopes: analysis of antibody-antigen structures.,"

Summary

In this study, computational methods are applied to investigate the general properties of antigen engaging residues of a paratope from a non-redundant dataset of 403 antibody-antigen complexes to dissect the contribution of hydrogen bonds, hydrophobic, van der Waals contacts and ionic interactions, as well as role of water molecules in the antigen-antibody interface. Consistent with previous reports using smaller datasets, we found that Tyr, Trp, Ser, Asn, Asp, Thr, Arg, Gly, His contribute substantially to the interactions between antibody and antigen. Furthermore, antibody-antigen interactions can be mediated by interfacial waters. However, there is no reported comprehensive analysis for a large number of structured waters that engage in higher ordered structures at the antibody-antigen interface. From our dataset, we have found the presence of interfacial waters in 242 complexes. We present evidence that suggests a compelling role of these interfacial waters in interactions of antibodies with a range of antigens differing in shape complementarity. Finally, we carry out 296 835 pairwise 3D structure comparisons of 771 structures of contact residues of antibodies with their interfacial water molecules from our dataset using CLICK method. A heuristic clustering algorithm is used to obtain unique structural similarities, and found to separate into 368 different clusters. These clusters are used to identify structural motifs of contact residues of antibodies for epitope binding.

Availability and implementation

This clustering database of contact residues is freely accessible at http://mspc.bii.a-star.edu.sg/minhn/pclick.html.

Contact

minhn@bii.a-star.edu.sg, chandra@bii.a-star.edu.sg or zhong_pingyu@immunol.a-star.edu.sg.

Supplementary information

Supplementary data are available at Bioinformatics online.",,,http://mspc.bii.a-star.edu.sg/minhn/pclick.html,clustering database of contact residue +28827280,Systematic and Quantitative Assessment of Hydrogen Peroxide Reactivity With Cysteines Across Human Proteomes.,"Protein cysteinyl residues are the mediators of hydrogen peroxide (H2O2)-dependent redox signaling. However, site-specific mapping of the selectivity and dynamics of these redox reactions in cells poses a major analytical challenge. Here we describe a chemoproteomic platform to systematically and quantitatively analyze the reactivity of thousands of cysteines toward H2O2 in human cells. We identified >900 H2O2-sensitive cysteines, which are defined as the H2O2-dependent redoxome. Although redox sites associated with antioxidative and metabolic functions are consistent, most of the H2O2-dependent redoxome varies dramatically between different cells. Structural analyses reveal that H2O2-sensitive cysteines are less conserved than their redox-insensitive counterparts and display distinct sequence motifs, structural features, and potential for crosstalk with lysine modifications. Notably, our chemoproteomic platform also provides an opportunity to predict oxidation-triggered protein conformational changes. The data are freely accessible as a resource at http://redox.ncpsb.org/OXID/.",,,http://redox.ncpsb.org/OXID/, +29039006,Information Resources for Functional Genomics Studies in Brachypodium distachyon.,"Online tools and databases play an essential role in the promotion of functional genomics studies. Several resources for information regarding Brachypodium distachyon (Brachypodium) are available on the Web. In this chapter, we focus on recently published resources for Brachypodium research. The Brachypodium.org website ( http://www.brachypodium.org /) is an information portal that provides links to various genomic resources regarding Brachypodium, including genome annotation and re-sequencing datasets of accessions. RIKEN Full-length cDNA Database (RBFLDB, http://brachy.bmep.riken.jp/ver.1/index.pl ) is a web-accessible database that provides information of Brachypodium full-length cDNAs (FLcDNAs) collected in RIKEN and updated gene structures of Brachypodium based on the FLcDNA sequences as well as results of comparative analyses with available sequence resources for Triticeae crops, wheat, and barley. We introduce the functionalities and availability of these important information resources. Furthermore, we also present brief descriptions of useful online tools that facilitate Brachypodium functional genomics studies.",Brachypodium.org,Brachypodium.org,http://www.brachypodium.org /,"an information portal that provides links to various genomic resources regarding Brachypodium, including genome annotation and re-sequencing datasets of accessions" +29039006,Information Resources for Functional Genomics Studies in Brachypodium distachyon.,"Online tools and databases play an essential role in the promotion of functional genomics studies. Several resources for information regarding Brachypodium distachyon (Brachypodium) are available on the Web. In this chapter, we focus on recently published resources for Brachypodium research. The Brachypodium.org website ( http://www.brachypodium.org /) is an information portal that provides links to various genomic resources regarding Brachypodium, including genome annotation and re-sequencing datasets of accessions. RIKEN Full-length cDNA Database (RBFLDB, http://brachy.bmep.riken.jp/ver.1/index.pl ) is a web-accessible database that provides information of Brachypodium full-length cDNAs (FLcDNAs) collected in RIKEN and updated gene structures of Brachypodium based on the FLcDNA sequences as well as results of comparative analyses with available sequence resources for Triticeae crops, wheat, and barley. We introduce the functionalities and availability of these important information resources. Furthermore, we also present brief descriptions of useful online tools that facilitate Brachypodium functional genomics studies.",RIKEN Full-length cDNA Database,RBFLDB,http://brachy.bmep.riken.jp/ver.1/index.pl,"a web-accessible database that provides information of Brachypodium full-length cDNAs (FLcDNAs) collected in RIKEN and updated gene structures of Brachypodium based on the FLcDNA sequences as well as results of comparative analyses with available sequence resources for Triticeae crops, wheat, and barley" +29351546,Accessing an Expanded Exposure Science Module at the Comparative Toxicogenomics Database.,"SUMMARY:The Comparative Toxicogenomics Database (CTD; http://ctdbase.org) is a free resource that provides manually curated information on chemical, gene, phenotype, and disease relationships to advance understanding of the effect of environmental exposures on human health. Four core content areas are independently curated: chemical-gene interactions, chemical-disease and gene-disease associations, chemical-phenotype interactions, and environmental exposure data (e.g., effects of chemical stressors on humans). Since releasing exposure data in 2015, we have vastly increased our coverage of chemicals and disease/phenotype outcomes; greatly expanded access to exposure content; added search capability by stressors, cohorts, population demographics, and measured outcomes; and created user-specified displays of content. These enhancements aim to facilitate human studies by allowing comparisons among experimental parameters and across studies involving specified chemicals, populations, or outcomes. Integration of data among CTD's four content areas and external data sets, such as Gene Ontology annotations and pathway information, links exposure data with over 1.8 million chemical-gene, chemical-disease and gene-disease interactions. Our analysis tools reveal direct and inferred relationships among the data and provide opportunities to generate predictive connections between environmental exposures and population-level health outcomes. https://doi.org/10.1289/EHP2873.",Comparative Toxicogenomics Database,CTD,http://ctdbase.org,"a free resource that provides manually curated information on chemical, gene, phenotype, and disease relationships to advance understanding of the effect of environmental exposures on human health" +29855811,Procura-PALavras (P-PAL): A Web-based interface for a new European Portuguese lexical database.,"In this article, we present Procura-PALavras (P-PAL), a Web-based interface for a new European Portuguese (EP) lexical database. Based on a contemporary printed corpus of over 227 million words, P-PAL provides a broad range of word attributes and statistics, including several measures of word frequency (e.g., raw counts, per-million word frequency, logarithmic Zipf scale), morpho-syntactic information (e.g., parts of speech [PoSs], grammatical gender and number, dominant PoS, and frequency and relative frequency of the dominant PoS), as well as several lexical and sublexical orthographic (e.g., number of letters; consonant-vowel orthographic structure; density and frequency of orthographic neighbors; orthographic Levenshtein distance; orthographic uniqueness point; orthographic syllabification; and trigram, bigram, and letter type and token frequencies), and phonological measures (e.g., pronunciation, number of phonemes, stress, density and frequency of phonological neighbors, transposed and phonographic neighbors, syllabification, and biphone and phone type and token frequencies) for ~53,000 lemmatized and ~208,000 nonlemmatized EP word forms. To obtain these metrics, researchers can choose between two word queries in the application: (i) analyze words previously selected for specific attributes and/or lexical and sublexical characteristics, or (ii) generate word lists that meet word requirements defined by the user in the menu of analyses. For the measures it provides and the flexibility it allows, P-PAL will be a key resource to support research in all cognitive areas that use EP verbal stimuli. P-PAL is freely available at http://p-pal.di.uminho.pt/tools .",Procura-PALavras,P-PAL,http://p-pal.di.uminho.pt/tools,a Web-based interface for a new European Portuguese (EP) lexical database +29990104,A Robust 3D-2D Interactive Tool for Scene Segmentation and Annotation.,"Recent advances of 3D acquisition devices have enabled large-scale acquisition of 3D scene data. Such data, if completely and well annotated, can serve as useful ingredients for a wide spectrum of computer vision and graphics works such as data-driven modeling and scene understanding, object detection and recognition. However, annotating a vast amount of 3D scene data remains challenging due to the lack of an effective tool and/or the complexity of 3D scenes (e.g. clutter, varying illumination conditions). This paper aims to build a robust annotation tool that effectively and conveniently enables the segmentation and annotation of massive 3D data. Our tool works by coupling 2D and 3D information via an interactive framework, through which users can provide high-level semantic annotation for objects. We have experimented our tool and found that a typical indoor scene could be well segmented and annotated in less than 30 minutes by using the tool, as opposed to a few hours if done manually. Along with the tool, we created a dataset of over a hundred 3D scenes associated with complete annotations using our tool. Both the tool and dataset will be available at http://scenenn.net.",,,http://scenenn.net,a dataset of over a hundred 3D scenes associated with complete annotations using our tool +29990255,Meta-Path Methods for Prioritizing Candidate Disease miRNAs.,"MicroRNAs (miRNAs) play critical roles in regulating gene expression at post-transcriptional levels. Numerous experimental studies indicate that alterations and dysregulations in miRNAs are associated with important complex diseases, especially cancers. Predicting potential miRNA-disease association is beneficial not only to explore the pathogenesis of diseases, but also to understand biological processes. In this work, we propose two methods that can effectively predict potential miRNA-disease associations using our reconstructed miRNA and disease similarity networks, which are based on the latest experimental data. We reconstruct a miRNA functional similarity network using the following biological information: the miRNA family information, miRNA cluster information, experimentally valid miRNA-target association and disease-miRNA information. We also reconstruct a disease similarity network using disease functional information and disease semantic information. We present Katz with specific weights and Katz with machine learning, on the comprehensive heterogeneous network. These methods, which achieve corresponding AUC values of 0.897 and 0.919, exhibit performance superior to the existing methods. Comprehensive data networks and reasonable considerations guarantee the high performance of our methods. Contrary to several methods, which cannot work in such situations, the proposed methods also predict associations for diseases without any known related miRNAs. A web service for the download and prediction of relationships between diseases and miRNAs is available at http://lab.malab.cn/soft/MDPredict/.",,,http://lab.malab.cn/soft/MDPredict/,A web service for the download and prediction of relationships between diseases and miRNAs +30102334,"Cell membrane proteins with high N-glycosylation, high expression and multiple interaction partners are preferred by mammalian viruses as receptors.","

Motivation

Receptor mediated entry is the first step for viral infection. However, the question of how viruses select receptors remains unanswered.

Results

Here, by manually curating a high-quality database of 268 pairs of mammalian virus-host receptor interaction, which included 128 unique viral species or sub-species and 119 virus receptors, we found the viral receptors are structurally and functionally diverse, yet they had several common features when compared to other cell membrane proteins: more protein domains, higher level of N-glycosylation, higher ratio of self-interaction and more interaction partners, and higher expression in most tissues of the host. This study could deepen our understanding of virus-receptor interaction.

Availability and implementation

The database of mammalian virus-host receptor interaction is available at http://www.computationalbiology.cn: 5000/viralReceptor.

Supplementary information

Supplementary data are available at Bioinformatics online.",,,http://www.computationalbiology.cn: 5000/viralReceptor,database of mammalian virus-host receptor interaction +30137226,ImaGEO: integrative gene expression meta-analysis from GEO database.,"SUMMARY:The Gene Expression Omnibus (GEO) database provides an invaluable resource of publicly available gene expression data that can be integrated and analyzed to derive new hypothesis and knowledge. In this context, gene expression meta-analysis (geMAs) is increasingly used in several fields to improve study reproducibility and discovering robust biomarkers. Nevertheless, integrating data is not straightforward without bioinformatics expertise. Here, we present ImaGEO, a web tool for geMAs that implements a complete and comprehensive meta-analysis workflow starting from GEO dataset identifiers. The application integrates GEO datasets, applies different meta-analysis techniques and provides functional analysis results in an easy-to-use environment. ImaGEO is a powerful and useful resource that allows researchers to integrate and perform meta-analysis of GEO datasets to lead robust findings for biomarker discovery studies. AVAILABILITY AND IMPLEMENTATION:ImaGEO is accessible at http://bioinfo.genyo.es/imageo/. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",ImaGEO,ImaGEO,http://bioinfo.genyo.es/imageo/,a web tool for geMAs that implements a complete and comprehensive meta-analysis workflow starting from GEO dataset identifiers +30407550,"Human Disease Ontology 2018 update: classification, content and workflow expansion.","The Human Disease Ontology (DO) (http://www.disease-ontology.org), database has undergone significant expansion in the past three years. The DO disease classification includes specific formal semantic rules to express meaningful disease models and has expanded from a single asserted classification to include multiple-inferred mechanistic disease classifications, thus providing novel perspectives on related diseases. Expansion of disease terms, alternative anatomy, cell type and genetic disease classifications and workflow automation highlight the updates for the DO since 2015. The enhanced breadth and depth of the DO's knowledgebase has expanded the DO's utility for exploring the multi-etiology of human disease, thus improving the capture and communication of health-related data across biomedical databases, bioinformatics tools, genomic and cancer resources and demonstrated by a 6.6 growth in DO's user community since 2015. The DO's continual integration of human disease knowledge, evidenced by the more than 200 SVN/GitHub releases/revisions, since previously reported in our DO 2015 NAR paper, includes the addition of 2650 new disease terms, a 30% increase of textual definitions, and an expanding suite of disease classification hierarchies constructed through defined logical axioms.",Human Disease Ontology,DO,http://www.disease-ontology.org, +30963486,Sfold Tools for MicroRNA Target Prediction.,"Computational prediction of miRNA binding sites on target mRNAs facilitates experimental investigation of miRNA functions. In this chapter, we describe STarMir and STarMirDB, two application modules of the Sfold RNA package. STarMir is a Web server for performing miRNA binding site predictions for mRNA and target sequences submitted by users. STarMirDB is a database of precomputed transcriptome-scale predictions. Both STarMir and STarMirDB provide comprehensive sequence, thermodynamic, and target structure features, a logistic probability as a measure of confidence for each predicted site, and a publication-quality diagram of the predicted miRNA-target hybrid. In addition, STarMir now offers a new quantitative score to address combined regulatory effects of multiple seed and seedless sites. This score provides a quantitative measure of the overall regulatory effects of both seed and seedless sites on the target. STarMir and STarMirDB are freely available to all through the Sfold Web application server at http://sfold.wadsworth.org .",STarMirDB,STarMirDB,http://sfold.wadsworth.org,a database of precomputed transcriptome-scale predictions +31142855,Multi-omics of the gut microbial ecosystem in inflammatory bowel diseases.,"Inflammatory bowel diseases, which include Crohn's disease and ulcerative colitis, affect several million individuals worldwide. Crohn's disease and ulcerative colitis are complex diseases that are heterogeneous at the clinical, immunological, molecular, genetic, and microbial levels. Individual contributing factors have been the focus of extensive research. As part of the Integrative Human Microbiome Project (HMP2 or iHMP), we followed 132 subjects for one year each to generate integrated longitudinal molecular profiles of host and microbial activity during disease (up to 24 time points each; in total 2,965 stool, biopsy, and blood specimens). Here we present the results, which provide a comprehensive view of functional dysbiosis in the gut microbiome during inflammatory bowel disease activity. We demonstrate a characteristic increase in facultative anaerobes at the expense of obligate anaerobes, as well as molecular disruptions in microbial transcription (for example, among clostridia), metabolite pools (acylcarnitines, bile acids, and short-chain fatty acids), and levels of antibodies in host serum. Periods of disease activity were also marked by increases in temporal variability, with characteristic taxonomic, functional, and biochemical shifts. Finally, integrative analysis identified microbial, biochemical, and host factors central to this dysregulation. The study's infrastructure resources, results, and data, which are available through the Inflammatory Bowel Disease Multi'omics Database ( http://ibdmdb.org ), provide the most comprehensive description to date of host and microbial activities in inflammatory bowel diseases.",Inflammatory Bowel Disease Multi'omics Database,,http://ibdmdb.org,the most comprehensive description to date of host and microbial activities in inflammatory bowel diseases +31201317,Multi omics analysis of fibrotic kidneys in two mouse models.,"Kidney fibrosis represents an urgent unmet clinical need due to the lack of effective therapies and an inadequate understanding of the molecular pathogenesis. We have generated a comprehensive and combined multi-omics dataset (proteomics, mRNA and small RNA transcriptomics) of fibrotic kidneys that is searchable through a user-friendly web application: http://hbcreports.med.harvard.edu/fmm/ . Two commonly used mouse models were utilized: a reversible chemical-induced injury model (folic acid (FA) induced nephropathy) and an irreversible surgically-induced fibrosis model (unilateral ureteral obstruction (UUO)). mRNA and small RNA sequencing, as well as 10-plex tandem mass tag (TMT) proteomics were performed with kidney samples from different time points over the course of fibrosis development. The bioinformatics workflow used to process, technically validate, and combine the single omics data will be described. In summary, we present temporal multi-omics data from fibrotic mouse kidneys that are accessible through an interrogation tool (Mouse Kidney Fibromics browser) to provide a searchable transcriptome and proteome for kidney fibrosis researchers.",Mouse Kidney Fibromics browser,,http://hbcreports.med.harvard.edu/fmm/,"a comprehensive and combined multi-omics dataset (proteomics, mRNA and small RNA transcriptomics) of fibrotic kidneys" +31220804,Investigation and development of maize fused network analysis with multi-omics.,"Maize is a critically important staple crop in the whole world, which has contributed to both economic security and food in planting areas. The main target for researchers and breeding is the improvement of maize quality and yield. The use of computational biology methods combined with multi-omics for selecting biomolecules of interest for maize breeding has been receiving more attention. Moreover, the rapid growth of high-throughput sequencing data provides the opportunity to explore biomolecules of interest at the molecular level in maize. Furthermore, we constructed weighted networks for each of the omics and then integrated them into a final fused weighted network based on a nonlinear combination method. We also analyzed the final fused network and mined the orphan nodes, some of which were shown to be transcription factors that played a key role in maize development. This study could help to improve maize production via insights at the multi-omics level and provide a new perspective for maize researchers. All related data have been released at http://lab.malab.cn/~jj/maize.htm.",,,http://lab.malab.cn/~jj/maize.htm, +31432427,Essential Features and Use Cases of the Cerebrospinal Fluid Proteome Resource (CSF-PR).,"Every year, a large number of published studies present biomarkers for various neurological disorders. Many of these studies are based on mass spectrometry proteomics data and describe comparison of the abundance of proteins in cerebrospinal fluid between two or more disease groups. As the number of such studies is growing, it is no longer straightforward to obtain an overview of which specific proteins are increased or decreased between the numerous relevant diseases and their many subcategories, or to see the larger picture or trends between related diseases. To alleviate this situation, we therefore mined the literature for mass spectrometry-based proteomics studies including quantitative protein data from cerebrospinal fluid of patients with multiple sclerosis, Alzheimer's disease, and Parkinson's disease and organized the extracted data in the Cerebrospinal Fluid Proteome Resource (CSF-PR). CSF-PR is freely available online at http://probe.uib.no/csf-pr , is highly interactive, and allows for easy navigation, visualization, and export of the published scientific data. This chapter will guide the user through some of the most important features of the tool and show examples of the suggested use cases.",Cerebrospinal Fluid Proteome Resource,CSF-PR,http://probe.uib.no/csf-pr, +31640730,NARD: whole-genome reference panel of 1779 Northeast Asians improves imputation accuracy of rare and low-frequency variants.,"Here, we present the Northeast Asian Reference Database (NARD), including whole-genome sequencing data of 1779 individuals from Korea, Mongolia, Japan, China, and Hong Kong. NARD provides the genetic diversity of Korean (n= 850) and Mongolian (n= 384) ancestries that were not present in the 1000 Genomes Project Phase 3 (1KGP3). We combined and re-phased the genotypes from NARD and 1KGP3 to construct a union set of haplotypes. This approach established a robust imputation reference panel for Northeast Asians, which yields the greatest imputation accuracy of rare and low-frequency variants compared with the existing panels. NARD imputation panel is available at https://nard.macrogen.com/ .",Northeast Asian Reference Database,NARD,https://nard.macrogen.com/,"whole-genome sequencing data of 1779 individuals from Korea, Mongolia, Japan, China, and Hong Kong" +31706268,Genome-wide prediction and prioritization of human aging genes by data fusion: a machine learning approach.,"BACKGROUND:Machine learning can effectively nominate novel genes for various research purposes in the laboratory. On a genome-wide scale, we implemented multiple databases and algorithms to predict and prioritize the human aging genes (PPHAGE). RESULTS:We fused data from 11 databases, and used Nave Bayes classifier and positive unlabeled learning (PUL) methods, NB, Spy, and Rocchio-SVM, to rank human genes in respect with their implication in aging. The PUL methods enabled us to identify a list of negative (non-aging) genes to use alongside the seed (known age-related) genes in the ranking process. Comparison of the PUL algorithms revealed that none of the methods for identifying a negative sample were advantageous over other methods, and their simultaneous use in a form of fusion was critical for obtaining optimal results (PPHAGE is publicly available at https://cbb.ut.ac.ir/pphage). CONCLUSION:We predict and prioritize over 3,000 candidate age-related genes in human, based on significant ranking scores. The identified candidate genes are associated with pathways, ontologies, and diseases that are linked to aging, such as cancer and diabetes. Our data offer a platform for future experimental research on the genetic and biological aspects of aging. Additionally, we demonstrate that fusion of PUL methods and data sources can be successfully used for aging and disease candidate gene prioritization.",PPHAGE,PPHAGE,https://cbb.ut.ac.ir/pphage,"over 3,000 candidate age-related genes in human, based on significant ranking scores" +32081774,Discovery and development of safe-in-man broad-spectrum antiviral agents.,"Viral diseases are one of the leading causes of morbidity and mortality in the world. Virus-specific vaccines and antiviral drugs are the most powerful tools to combat viral diseases. However, broad-spectrum antiviral agents (BSAAs, i.e. compounds targeting viruses belonging to two or more viral families) could provide additional protection of the general population from emerging and re-emerging viral diseases, reinforcing the arsenal of available antiviral options. Here, we review discovery and development of BSAAs and summarize the information on 120 safe-in-man agents in a freely accessible database (https://drugvirus.info/). Future and ongoing pre-clinical and clinical studies will increase the number of BSAAs, expand the spectrum of their indications, and identify drug combinations for treatment of emerging and re-emerging viral infections as well as co-infections.",,,https://drugvirus.info/)., +32392296,CoCoCoNet: conserved and comparative co-expression across a diverse set of species.,"Co-expression analysis has provided insight into gene function in organisms from Arabidopsis to zebrafish. Comparison across species has the potential to enrich these results, for example by prioritizing among candidate human disease genes based on their network properties or by finding alternative model systems where their co-expression is conserved. Here, we present CoCoCoNet as a tool for identifying conserved gene modules and comparing co-expression networks. CoCoCoNet is a resource for both data and methods, providing gold standard networks and sophisticated tools for on-the-fly comparative analyses across 14 species. We show how CoCoCoNet can be used in two use cases. In the first, we demonstrate deep conservation of a nucleolus gene module across very divergent organisms, and in the second, we show how the heterogeneity of autism mechanisms in humans can be broken down by functional groups and translated to model organisms. CoCoCoNet is free to use and available to all at https://milton.cshl.edu/CoCoCoNet, with data and R scripts available at ftp://milton.cshl.edu/data.",CoCoCoNet,CoCoCoNet,ftp://milton.cshl.edu/data,"a resource for both data and methods, providing gold standard networks and sophisticated tools for on-the-fly comparative analyses across 14 species" +32765587,ABC-GWAS: Functional Annotation of Estrogen Receptor-Positive Breast Cancer Genetic Variants.,"Over the past decade, hundreds of genome-wide association studies (GWAS) have implicated genetic variants in various diseases, including cancer. However, only a few of these variants have been functionally characterized to date, mainly because the majority of the variants reside in non-coding regions of the human genome with unknown function. A comprehensive functional annotation of the candidate variants is thus necessary to fill the gap between the correlative findings of GWAS and the development of therapeutic strategies. By integrating large-scale multi-omics datasets such as the Cancer Genome Atlas (TCGA) and the Encyclopedia of DNA Elements (ENCODE), we performed multivariate linear regression analysis of expression quantitative trait loci, sequence permutation test of transcription factor binding perturbation, and modeling of three-dimensional chromatin interactions to analyze the potential molecular functions of 2,813 single nucleotide variants in 93 genomic loci associated with estrogen receptor-positive breast cancer. To facilitate rapid progress in functional genomics of breast cancer, we have created """"Analysis of Breast Cancer GWAS"""" (ABC-GWAS), an interactive database of functional annotation of estrogen receptor-positive breast cancer GWAS variants. Our resource includes expression quantitative trait loci, long-range chromatin interaction predictions, and transcription factor binding motif analyses to prioritize putative target genes, causal variants, and transcription factors. An embedded genome browser also facilitates convenient visualization of the GWAS loci in genomic and epigenomic context. ABC-GWAS provides an interactive visual summary of comprehensive functional characterization of estrogen receptor-positive breast cancer variants. The web resource will be useful to both computational and experimental biologists who wish to generate and test their hypotheses regarding the genetic susceptibility, etiology, and carcinogenesis of breast cancer. ABC-GWAS can also be used as a user-friendly educational resource for teaching functional genomics. ABC-GWAS is available at http://education.knoweng.org/abc-gwas/.",Analysis of Breast Cancer GWAS,ABC-GWAS,http://education.knoweng.org/abc-gwas/,Functional Annotation of Estrogen Receptor-Positive Breast Cancer Genetic Variants +33177514,"SAVI, in silico generation of billions of easily synthesizable compounds through expert-system type rules.","We have made available a database of over 1 billion compounds predicted to be easily synthesizable, called Synthetically Accessible Virtual Inventory (SAVI). They have been created by a set of transforms based on an adaptation and extension of the CHMTRN/PATRAN programming languages describing chemical synthesis expert knowledge, which originally stem from the LHASA project. The chemoinformatics toolkit CACTVS was used to apply a total of 53 transforms to about 150,000 readily available building blocks (enamine.net). Only single-step, two-reactant syntheses were calculated for this database even though the technology can execute multi-step reactions. The possibility to incorporate scoring systems in CHMTRN allowed us to subdivide the database of 1.75 billion compounds in sets according to their predicted synthesizability, with the most-synthesizable class comprising 1.09 billion synthetic products. Properties calculated for all SAVI products show that the database should be well-suited for drug discovery. It is being made publicly available for free download from https://doi.org/10.35115/37n9-5738.",Synthetically Accessible Virtual Inventory,SAVI,https://doi.org/10.35115/37n9-5738,a database of over 1 billion compounds predicted to be easily synthesizable +33181824,CircR2Cancer: a manually curated database of associations between circRNAs and cancers.,"Accumulating evidences have shown that the deregulation of circRNA has close association with many human cancers. However, these experimental verified circRNA-cancer associations are not collected in any database. Here, we develop a manually curated database (circR2Cancer) that provides experimentally supported associations between circRNAs and cancers. The current version of the circR2Cancer contains 1439 associations between 1135 circRNAs and 82 cancers by extracting data from existing literatures and databases. In addition, circR2Cancer contains the information of cancer exacted from Disease Ontology and basic biological information of circRNAs from circBase. At the same time, circR2Cancer provides a simple and friendly interface for users to conveniently browse, search and download the data. It will be a useful and valuable resource for researchers to understanding the regulation mechanism of circRNA in cancers.

Database url

http://www.biobdlab.cn:8000.",circR2Cancer,circR2Cancer,http://www.biobdlab.cn:8000,a manually curated database of associations between circRNAs and cancers +33181824,CircR2Cancer: a manually curated database of associations between circRNAs and cancers.,"Accumulating evidences have shown that the deregulation of circRNA has close association with many human cancers. However, these experimental verified circRNA-cancer associations are not collected in any database. Here, we develop a manually curated database (circR2Cancer) that provides experimentally supported associations between circRNAs and cancers. The current version of the circR2Cancer contains 1439 associations between 1135 circRNAs and 82 cancers by extracting data from existing literatures and databases. In addition, circR2Cancer contains the information of cancer exacted from Disease Ontology and basic biological information of circRNAs from circBase. At the same time, circR2Cancer provides a simple and friendly interface for users to conveniently browse, search and download the data. It will be a useful and valuable resource for researchers to understanding the regulation mechanism of circRNA in cancers.

Database url

http://www.biobdlab.cn:8000.",CircR2Cancer,CircR2Cancer,http://www.biobdlab.cn:8000,a manually curated database of associations between circRNAs and cancers +33391232,Creation of an Online Platform for Identification of Microorganisms: Peak Picking or Full-Spectrum Analysis.,"Identification of microorganisms by MALDI-TOF mass spectrometry is a very efficient method with high throughput, speed, and accuracy. However, it is significantly limited by the absence of a universal database of reference mass spectra. This problem can be solved by creating an Internet platform for open databases of protein spectra of microorganisms. Choosing the optimal mathematical apparatus is the pivotal issue for this task. In our previous study we proposed the geometric approach for processing mass spectrometry data, which represented a mass spectrum as a vector in a multidimensional Euclidean space. This algorithm was implemented in a Jacob4 stand-alone package. We demonstrated its efficiency in delimiting two closely related species of the Bacillus pumilus group. In this study, the geometric approach was realized as R scripts which allowed us to design a Web-based application. We also studied the possibility of using full spectra analysis (FSA) without calculating mass peaks (PPA), which is the logical development of the method. We used 74 microbial strains from the collections of ICiG SB RAS, UNIQEM, IEGM, KMM, and VGM as the models. We demonstrated that the algorithms based on peak-picking and analysis of complete data have accuracy no less than that of Biotyper 3.1 software. We proposed a method for calculating cut-off thresholds based on averaged intraspecific distances. The resulting database, raw data, and the set of R scripts are available online at https://icg-test.mydisk.nsc.ru/s/qj6cfZg57g6qwzN.",,,https://icg-test.mydisk.nsc.ru/s/qj6cfZg57g6qwzN, +IND607223097,Characterization of transcriptomes from sexual and asexual lineages of a New Zealand snail (Potamopyrgus antipodarum),"Understanding the evolution and maintenance of sexual reproduction is one of the central challenges of evolutionary biology, yet we know very little about how sex influences molecular evolution. The New Zealand freshwater snail Potamopyrgus antipodarum is ideally suited to address this knowledge gap because obligately sexual individuals often coexist with multiple independently derived obligately asexual lineages. This unusual situation allows direct comparisons both between sexual and asexual P. antipodarum and across populations that differ in the relative frequency of sexual individuals. As such, P. antipodarum has received a great deal of attention as a model system for the maintenance of sex in nature and is also used as a model for environmental toxicology and biological invasions. Molecular genetic resources for P. antipodarum will thus be useful to investigators in a variety of biological fields. We used 454 sequencing of cDNA libraries to generate transcriptomes from two sexual and two asexual P. antipodarum lineages. A de novo assembly of 116.7Mb of sequence reads produced 41396 contigs, and sequence similarity-based Gene Ontology annotations were obtained for 3740 contigs. We detected 408315 SNP loci and 7315 microsatellite loci, which together represent the first genome-scale resource available for P. antipodarum. Raw 454 read sequences, contig sequences, annotation data and polymorphism data are publicly available in a searchable online database and for download at http://www.biology.uiowa.edu/neiman/transcriptome.php.",,,http://www.biology.uiowa.edu/neiman/transcriptome.php,Characterization of transcriptomes from sexual and asexual lineages of a New Zealand snail (Potamopyrgus antipodarum) +22075991,The Gene Wiki in 2011: community intelligence applied to human gene annotation.,"The Gene Wiki is an open-access and openly editable collection of Wikipedia articles about human genes. Initiated in 2008, it has grown to include articles about more than 10,000 genes that, collectively, contain more than 1.4 million words of gene-centric text with extensive citations back to the primary scientific literature. This growing body of useful, gene-centric content is the result of the work of thousands of individuals throughout the scientific community. Here, we describe recent improvements to the automated system that keeps the structured data presented on Gene Wiki articles in sync with the data from trusted primary databases. We also describe the expanding contents, editors and users of the Gene Wiki. Finally, we introduce a new automated system, called WikiTrust, which can effectively compute the quality of Wikipedia articles, including Gene Wiki articles, at the word level. All articles in the Gene Wiki can be freely accessed and edited at Wikipedia, and additional links and information can be found at the project's Wikipedia portal page: http://en.wikipedia.org/wiki/Portal:Gene_Wiki.",Gene Wiki,,http://en.wikipedia.org/wiki/Portal:Gene_Wiki,an open-access and openly editable collection of Wikipedia articles about human genes +30715167,The global dissemination of bacterial infections necessitates the study of reverse genomic epidemiology.,"Whole genome sequencing (WGS) has revolutionized the genotyping of bacterial pathogens and is expected to become the new gold standard for tracing the transmissions of bacterial infectious diseases for public health purposes. Traditional genomic epidemiology often uses WGS as a verification tool, namely, when a common source or epidemiological link is suspected, the collected isolates are sequenced for the determination of clonal relationships. However, increasingly frequent international travel and food transportation, and the associated potential for the cross-border transmission of bacterial pathogens, often lead to an absence of information on bacterial transmission routes. Here we introduce the concept of 'reverse genomic epidemiology', i.e. when isolates are inspected by genome comparisons to be sufficiently similar to one another, they are assumed to be a consequence of infection from a common source. Through BacWGSTdb (http://bacdb.org/BacWGSTdb/), a database we have developed for bacterial genome typing and source tracking, we have found that almost the entire analyzed 20 bacterial species exhibit the phenomenon of cross-border clonal dissemination. Five networks were further identified in which isolates sharing nearly identical genomes were collected from at least five different countries. Three of these have been documented as real infectious disease outbreaks, therefore demonstrating the feasibility and authority of reverse genomic epidemiology. Our survey and proposed strategy would be of potential value in establishing a global surveillance system for tracing bacterial transmissions and outbreaks; the related database and techniques require urgent standardization.",BacWGSTdb,BacWGSTdb,http://bacdb.org/BacWGSTdb/,a database for bacterial genome typing and source tracking +30874795,mirtronDB: a mirtron knowledge base.,"

Motivation

Mirtrons arise from short introns with atypical cleavage by using the splicing mechanism. In the current literature, there is no repository centralizing and organizing the data available to the public. To fill this gap, we developed mirtronDB, the first knowledge database dedicated to mirtron, and it is available at http://mirtrondb.cp.utfpr.edu.br/. MirtronDB currently contains a total of 1407 mirtron precursors and 2426 mirtron mature sequences in 18 species.

Results

Through a user-friendly interface, users can now browse and search mirtrons by organism, organism group, type and name. MirtronDB is a specialized resource that provides free and user-friendly access to knowledge on mirtron data.

Availability and implementation

MirtronDB is available at http://mirtrondb.cp.utfpr.edu.br/.

Supplementary information

Supplementary data are available at Bioinformatics online.",mirtronDB,mirtronDB,http://mirtrondb.cp.utfpr.edu.br/,a mirtron knowledge base +31197322,MetOSite: an integrated resource for the study of methionine residues sulfoxidation.,"

Motivation

The oxidation of protein-bound methionine to form methionine sulfoxide has traditionally been regarded as an oxidative damage. However, growing evidences support the view of this reversible reaction also as a regulatory post-translational modification. Thus, the oxidation of methionine residues has been reported to have multiple and varied implications for protein function. However, despite the importance of this modification and the abundance of reports, all these data are scattered in the literature. No database/resource on methionine sulfoxidation exists currently. Since this information is useful to gain further insights into the redox regulation of cellular proteins, we have created a primary database of experimentally confirmed sulfoxidation sites.

Results

MetOSite currently contains 7242 methionine sulfoxide sites found in 3562 different proteins from 23 species, with Homo sapiens, Arabidopsis thaliana and Bacillus cereus as the main contributors. Each collected site has been classified according to the effect of its sulfoxidation on the biological properties of the modified protein. Thus, MetOSite documents cases where the sulfoxidation of methionine leads to (i) gain of activity, (ii) loss of activity, (iii) increased protein-protein interaction susceptibility, (iv) decreased protein-protein interaction susceptibility, (v) changes in protein stability and (vi) changes in subcellular location.

Availability and implementation

MetOSite is available at https://metosite.uma.es.",MetOSite,MetOSite,https://metosite.uma.es,an integrated resource for the study of methionine residues sulfoxidation +31307376,PhenPath: a tool for characterizing biological functions underlying different phenotypes.,"

Background

Many diseases are associated with complex patterns of symptoms and phenotypic manifestations. Parsimonious explanations aim at reconciling the multiplicity of phenotypic traits with the perturbation of one or few biological functions. For this, it is necessary to characterize human phenotypes at the molecular and functional levels, by exploiting gene annotations and known relations among genes, diseases and phenotypes. This characterization makes it possible to implement tools for retrieving functions shared among phenotypes, co-occurring in the same patient and facilitating the formulation of hypotheses about the molecular causes of the disease.

Results

We introduce PhenPath, a new resource consisting of two parts: PhenPathDB and PhenPathTOOL. The former is a database collecting the human genes associated with the phenotypes described in Human Phenotype Ontology (HPO) and OMIM Clinical Synopses. Phenotypes are then associated with biological functions and pathways by means of NET-GE, a network-based method for functional enrichment of sets of genes. The present version considers only phenotypes related to diseases. PhenPathDB collects information for 18 OMIM Clinical synopses and 7137 HPO phenotypes, related to 4292 diseases and 3446 genes. Enrichment of Gene Ontology annotations endows some 87.7, 86.9 and 73.6% of HPO phenotypes with Biological Process, Molecular Function and Cellular Component terms, respectively. Furthermore, 58.8 and 77.8% of HPO phenotypes are also enriched for KEGG and Reactome pathways, respectively. Based on PhenPathDB, PhenPathTOOL analyzes user-defined sets of phenotypes retrieving diseases, genes and functional terms which they share. This information can provide clues for interpreting the co-occurrence of phenotypes in a patient.

Conclusions

The resource allows finding molecular features useful to investigate diseases characterized by multiple phenotypes, and by this, it can help researchers and physicians in identifying molecular mechanisms and biological functions underlying the concomitant manifestation of phenotypes. The resource is freely available at http://phenpath.biocomp.unibo.it .",PhenPathDB,PhenPathDB,http://phenpath.biocomp.unibo.it,a database collecting the human genes associated with the phenotypes described in Human Phenotype Ontology (HPO) and OMIM Clinical Synopses +31307376,PhenPath: a tool for characterizing biological functions underlying different phenotypes.,"

Background

Many diseases are associated with complex patterns of symptoms and phenotypic manifestations. Parsimonious explanations aim at reconciling the multiplicity of phenotypic traits with the perturbation of one or few biological functions. For this, it is necessary to characterize human phenotypes at the molecular and functional levels, by exploiting gene annotations and known relations among genes, diseases and phenotypes. This characterization makes it possible to implement tools for retrieving functions shared among phenotypes, co-occurring in the same patient and facilitating the formulation of hypotheses about the molecular causes of the disease.

Results

We introduce PhenPath, a new resource consisting of two parts: PhenPathDB and PhenPathTOOL. The former is a database collecting the human genes associated with the phenotypes described in Human Phenotype Ontology (HPO) and OMIM Clinical Synopses. Phenotypes are then associated with biological functions and pathways by means of NET-GE, a network-based method for functional enrichment of sets of genes. The present version considers only phenotypes related to diseases. PhenPathDB collects information for 18 OMIM Clinical synopses and 7137 HPO phenotypes, related to 4292 diseases and 3446 genes. Enrichment of Gene Ontology annotations endows some 87.7, 86.9 and 73.6% of HPO phenotypes with Biological Process, Molecular Function and Cellular Component terms, respectively. Furthermore, 58.8 and 77.8% of HPO phenotypes are also enriched for KEGG and Reactome pathways, respectively. Based on PhenPathDB, PhenPathTOOL analyzes user-defined sets of phenotypes retrieving diseases, genes and functional terms which they share. This information can provide clues for interpreting the co-occurrence of phenotypes in a patient.

Conclusions

The resource allows finding molecular features useful to investigate diseases characterized by multiple phenotypes, and by this, it can help researchers and physicians in identifying molecular mechanisms and biological functions underlying the concomitant manifestation of phenotypes. The resource is freely available at http://phenpath.biocomp.unibo.it .",PhenPath,PhenPath,http://phenpath.biocomp.unibo.it,a new resource consisting of two parts: PhenPathDB and PhenPathTOOL +31429284,One Thousand and One Software for Proteomics: Tales of the Toolmakers of Science.,"Proteomics is a highly dynamic field driven by frequent introduction of new technological approaches, leading to high demand for new software tools and the concurrent development of many methods for data analysis, processing, and storage. The rapidly changing landscape of proteomics software makes finding a tool fit for a particular purpose a significant challenge. The comparison of software and the selection of tools capable to perform a certain operation on a given type of data rely on their detailed annotation using well-defined descriptors. However, finding accurate information including tool input/output capabilities can be challenging and often heavily depends on manual curation efforts. This is further hampered by a rather low half-life of most of the tools, thus demanding the maintenance of a resource with updated information about the tools. We present here our approach to curate a collection of 189 software tools with detailed information about their functional capabilities. We furthermore describe our efforts to reach out to the proteomics community for their engagement, which further increased the catalog to >750 tools being about 70% of the estimated number of 1097 tools existing for proteomics data analysis. Descriptions of all annotated tools are available at https://proteomics.bio.tools.",,,https://proteomics.bio.tools,a collection of 189 software tools with detailed information about their functional capabilities +31602484,VDJbase: an adaptive immune receptor genotype and haplotype database.,"VDJbase is a publicly available database that offers easy searching of data describing the complete sets of gene sequences (genotypes and haplotypes) inferred from adaptive immune receptor repertoire sequencing datasets. VDJbase is designed to act as a resource that will allow the scientific community to explore the genetic variability of the immunoglobulin (Ig) and T cell receptor (TR) gene loci. It can also assist in the investigation of Ig- and TR-related genetic predispositions to diseases. Our database includes web-based query and online tools to assist in visualization and analysis of the genotype and haplotype data. It enables users to detect those alleles and genes that are significantly over-represented in a particular population, in terms of genotype, haplotype and gene expression. The database website can be freely accessed at https://www.vdjbase.org/, and no login is required. The data and code use creative common licenses and are freely downloadable from https://bitbucket.org/account/user/yaarilab/projects/GPHP.",VDJbase,VDJbase,https://www.vdjbase.org/,an adaptive immune receptor genotype and haplotype database +31641782,The IPD Project: a centralised resource for the study of polymorphism in genes of the immune system.,"The Immuno Polymorphism Database (IPD), https://www.ebi.ac.uk/ipd/, is a set of specialist databases that enable the study of polymorphic genes which function as part of the vertebrate immune system. The major focus is on the hyperpolymorphic major histocompatibility complex (MHC) genes and the killer-cell immunoglobulin-like receptor (KIR) genes, by providing the official repository and primary source of sequence data. Databases are centred around humans as well as animals important for food security, for companionship and as disease models. The IPD project works with specialist groups or nomenclature committees who provide and manually curate individual sections before they are submitted for online publication. To reflect the recent advance of allele sequencing technologies and the increasing demands of novel tools for the analysis of genomic variation, the IPD project is undergoing a progressive redesign and reorganisation. In this review, recent updates and future developments are discussed, with a focus on the core concepts to better future-proof the project.",Immuno Polymorphism Database,IPD,https://www.ebi.ac.uk/ipd/,a centralised resource for the study of polymorphism in genes of the immune system +31642470,WormBase: a modern Model Organism Information Resource.,"WormBase (https://wormbase.org/) is a mature Model Organism Information Resource supporting researchers using the nematode Caenorhabditis elegans as a model system for studies across a broad range of basic biological processes. Toward this mission, WormBase efforts are arranged in three primary facets: curation, user interface and architecture. In this update, we describe progress in each of these three areas. In particular, we discuss the status of literature curation and recently added data, detail new features of the web interface and options for users wishing to conduct data mining workflows, and discuss our efforts to build a robust and scalable architecture by leveraging commercial cloud offerings. We conclude with a description of WormBase's role as a founding member of the nascent Alliance of Genome Resources.",WormBase,WormBase,https://wormbase.org/,a modern Model Organism Information Resource +31648087,AdditiveChem: A comprehensive bioinformatics knowledge-base for food additive chemicals.,"Food additives are considered to be the catalysts and headstones of the modern food industry, affecting every step of food production, processing, and storage. The urgent need for a comprehensive curation of food additives, including their molecular structures, biological activities, and precise toxicological evaluations, prompted the creation of the AdditiveChem database (http://www.rxnfinder.org/additivechem/). This database has curated >9064 types of food additives, along with their molecular structure, chemical and physical properties, absorption, distribution, metabolism, excretion and toxicity properties, biosynthesis and biodegradation methods, usage specifications, toxicological and risk assessment data, and targets in the human body from 16 databases to construct an efficient search platform for in silico preliminary evaluations. AdditiveChem database will enable an exploration of the relationship between the structure and function of food additives.",AdditiveChem,AdditiveChem,http://www.rxnfinder.org/additivechem/,A comprehensive bioinformatics knowledge-base for food additive chemicals +31665416,IMG-ABC v.5.0: an update to the IMG/Atlas of Biosynthetic Gene Clusters Knowledgebase.,"Microbial secondary metabolism is a reservoir of bioactive compounds of immense biotechnological and biomedical potential. The biosynthetic machinery responsible for the production of these secondary metabolites (SMs) (also called natural products) is often encoded by collocated groups of genes called biosynthetic gene clusters (BGCs). High-throughput genome sequencing of both isolates and metagenomic samples combined with the development of specialized computational workflows is enabling systematic identification of BGCs and the discovery of novel SMs. In order to advance exploration of microbial secondary metabolism and its diversity, we developed the largest publicly available database of predicted BGCs combined with experimentally verified BGCs, the Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters (IMG-ABC) (https://img.jgi.doe.gov/abc-public). Here we describe the first major content update of the IMG-ABC knowledgebase, since its initial release in 2015, refreshing the BGC prediction pipeline with the latest version of antiSMASH (v5) as well as presenting the data in the context of underlying environmental metadata sourced from GOLD (https://gold.jgi.doe.gov/). This update has greatly improved the quality and expanded the types of predicted BGCs compared to the previous version.",Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters,IMG-ABC,https://img.jgi.doe.gov/abc-public,an update to the IMG/Atlas of Biosynthetic Gene Clusters Knowledgebase +31665503,BBCancer: an expression atlas of blood-based biomarkers in the early diagnosis of cancers.,"The early detection of cancer holds the key to combat and control the increasing global burden of cancer morbidity and mortality. Blood-based screenings using circulating DNAs (ctDNAs), circulating RNA (ctRNAs), circulating tumor cells (CTCs) and extracellular vesicles (EVs) have shown promising prospects in the early detection of cancer. Recent high-throughput gene expression profiling of blood samples from cancer patients has provided a valuable resource for developing new biomarkers for the early detection of cancer. However, a well-organized online repository for these blood-based high-throughput gene expression data is still not available. Here, we present BBCancer (http://bbcancer.renlab.org/), a web-accessible and comprehensive open resource for providing the expression landscape of six types of RNAs, including messenger RNAs (mRNAs), long noncoding RNAs (lncRNAs), microRNAs (miRNAs), circular RNAs (circRNAs), tRNA-derived fragments (tRFRNAs) and Piwi-interacting RNAs (piRNAs) in blood samples, including plasma, CTCs and EVs, from cancer patients with various cancer types. Currently, BBCancer contains expression data of the six RNA types from 5040 normal and tumor blood samples across 15 cancer types. We believe this database will serve as a powerful platform for developing blood biomarkers.",BBCancer,BBCancer,http://bbcancer.renlab.org/,an expression atlas of blood-based biomarkers in the early diagnosis of cancers +31831730,"The odonate phenotypic database, a new open data resource for comparative studies of an old insect order.","We present The Odonate Phenotypic Database (OPD): an online data resource of dragonfly and damselfly phenotypes (Insecta: Odonata). Odonata is a relatively small insect order that currently consists of about 6400 species belonging to 32 families. The database consists of multiple morphological, life-history and behavioral traits, and biogeographical information collected from literature sources. We see taxon-specific phenotypic databases from Odonata and other organismal groups as becoming an increasing valuable resource in comparative studies. Our database has phenotypic records for 1011 of all 6400 known odonate species. The database is accessible at http://www.odonatephenotypicdatabase.org/, and a static version with an information file about the variables in the database is archived at Dryad.",Odonate Phenotypic Database,OPD,http://www.odonatephenotypicdatabase.org/,a new open data resource for comparative studies of an old insect order +31874631,"JCDB: a comprehensive knowledge base for Jatropha curcas, an emerging model for woody energy plants.","

Background

Jatropha curcas is an oil-bearing plant, and has seeds with high oil content (~‚Äâ40%). Several advantages, such as easy genetic transformation and short generation duration, have led to the emergence of J. curcas as a model for woody energy plants. With the development of high-throughput sequencing, the genome of Jatropha curcas has been sequenced by different groups and a mass of transcriptome data was released. How to integrate and analyze these omics data is crucial for functional genomics research on J. curcas.

Results

By establishing pipelines for processing novel gene identification, gene function annotation, and gene network construction, we systematically integrated and analyzed a series of J. curcas transcriptome data. Based on these data, we constructed a J. curcas database (JCDB), which not only includes general gene information, gene functional annotation, gene interaction networks, and gene expression matrices but also provides tools for browsing, searching, and downloading data, as well as online BLAST, the JBrowse genome browser, ID conversion, heatmaps, and gene network analysis tools.

Conclusions

JCDB is the most comprehensive and well annotated knowledge base for J. curcas. We believe it will make a valuable contribution to the functional genomics study of J. curcas. The database is accessible at http://jcdb.xtbg.ac.cn.",J. curcas database,JCDB,http://jcdb.xtbg.ac.cn,"a comprehensive knowledge base for Jatropha curcas, an emerging model for woody energy plants" +31942979,Phenotype-genotype network construction and characterization: a case study of cardiovascular diseases and associated non-coding RNAs.,"The phenotype-genotype relationship is a key for personalized and precision medicine for complex diseases. To unravel the complexity of the clinical phenotype-genotype network, we used cardiovascular diseases (CVDs) and associated non-coding RNAs (ncRNAs) (i.e. miRNAs, long ncRNAs, etc.) as the case for the study of CVDs at a systems or network level. We first integrated a database of CVDs and ncRNAs (CVDncR, http://sysbio.org.cn/cvdncr/) to construct CVD-ncRNA networks and annotate their clinical associations. To characterize the networks, we then separated the miRNAs into two groups, i.e. universal miRNAs associated with at least two types of CVDs and specific miRNAs related only to one type of CVD. Our analyses indicated two interesting patterns in these CVD-ncRNA networks. First, scale-free features were present within both CVD-miRNA and CVD-lncRNA networks; second, universal miRNAs were more likely to be CVDs biomarkers. These results were confirmed by computational functional analyses. The findings offer theoretical guidance for decoding CVD-ncRNA associations and will facilitate the screening of CVD ncRNA biomarkers. Database URL: http://sysbio.org.cn/cvdncr/.",CVDncR,CVDncR,http://sysbio.org.cn/cvdncr/, +31982380,TissueCoCoPUTs: Novel Human Tissue-Specific Codon and Codon-Pair Usage Tables Based on Differential Tissue Gene Expression.,"Protein expression in multicellular organisms varies widely across tissues. Codon usage in the transcriptome of each tissue is derived from genomic codon usage and the relative expression level of each gene. We created a comprehensive computational resource that houses tissue-specific codon, codon-pair, and dinucleotide usage data for 51 Homo sapiens tissues (TissueCoCoPUTs: https://hive.biochemistry.gwu.edu/review/tissue_codon), using transcriptome data from the Broad Institute Genotype-Tissue Expression (GTEx) portal. Distances between tissue-specific codon and codon-pair frequencies were used to generate a dendrogram based on the unique patterns of codon and codon-pair usage in each tissue that are clearly distinct from the genomic distribution. This novel resource may be useful in unraveling the relationship between codon usage and tRNA abundance, which could be critical in determining translation kinetics and efficiency across tissues. Areas of investigation such as biotherapeutic development, tissue-specific genetic engineering, and genetic disease prediction will greatly benefit from this resource.",TissueCoCoPUTs,TissueCoCoPUTs,https://hive.biochemistry.gwu.edu/review/tissue_codon,Novel Human Tissue-Specific Codon and Codon-Pair Usage Tables Based on Differential Tissue Gene Expression +32009518,RATEmiRs: the rat atlas of tissue-specific and enriched miRNAs for discerning baseline expression exclusivity of candidate biomarkers.,"MicroRNAs (miRNAs) are small RNAs that regulate mRNA expression and have been targeted as biomarkers of organ damage and disease. To explore the utility of miRNAs to assess injury to specific tissues, a tissue atlas of miRNA abundance was constructed. The Rat Atlas of Tissue-specific and Enriched miRNAs (RATEmiRs) catalogues miRNA sequencing data from 21 and 23 tissues in male and female Sprague-Dawley rats, respectively. RATEmiRs identifies tissue-enriched (TE), tissue-specific (TS), or organ-specific (OS) miRNAs via comparisons of one or more tissue or organ vs others. We provide a brief overview of RATEmiRs and present how to use it to detect miRNA expression abundance of candidate biomarkers as well as to compare the expression of miRNAs between rat and human. The database is available at https://www.niehs.nih.gov/ratemirs/.",Rat Atlas of Tissue-specific and Enriched miRNAs,RATEmiRs,https://www.niehs.nih.gov/ratemirs/,the rat atlas of tissue-specific and enriched miRNAs for discerning baseline expression exclusivity of candidate biomarkers +32090261,NipahVR: a resource of multi-targeted putative therapeutics and epitopes for the Nipah virus.,"Nipah virus (NiV) is an emerging and priority pathogen from the Paramyxoviridae family with a high fatality rate. It causes various diseases such as respiratory ailments and encephalitis and poses a great threat to humans and livestock. Despite various efforts, there is no approved antiviral treatment available. Therefore, to expedite and assist the research, we have developed an integrative resource NipahVR (http://bioinfo.imtech.res.in/manojk/nipahvr/) for the multi-targeted putative therapeutics and epitopes for NiV. It is structured into different sections, i.e. genomes, codon usage, phylogenomics, molecular diagnostic primers, therapeutics (siRNAs, sgRNAs, miRNAs) and vaccine epitopes (B-cell, CTL, MHC-I and -II binders). Most decisively, potentially efficient therapeutic regimens targeting different NiV proteins and genes were anticipated and projected. We hope this computational resource would be helpful in developing combating strategies against this deadly pathogen. Database URL: http://bioinfo.imtech.res.in/manojk/nipahvr/.",NipahVR,NipahVR,http://bioinfo.imtech.res.in/manojk/nipahvr/,a resource of multi-targeted putative therapeutics and epitopes for the Nipah virus +32103267,Predicted Drosophila Interactome Resource and web tool for functional interpretation of differentially expressed genes.,"Drosophila melanogaster is a well-established model organism that is widely used in genetic studies. This species enjoys the availability of a wide range of research tools, well-annotated reference databases and highly similar gene circuitry to other insects. To facilitate molecular mechanism studies in Drosophila, we present the Predicted Drosophila Interactome Resource (PDIR), a database of high-quality predicted functional gene interactions. These interactions were inferred from evidence in 10 public databases providing information for functional gene interactions from diverse perspectives. The current version of PDIR includes 102‚Äâ835 putative functional associations with balanced sensitivity and specificity, which are expected to cover 22.56% of all Drosophila protein interactions. This set of functional interactions is a good reference for hypothesis formulation in molecular mechanism studies. At the same time, these interactions also serve as a high-quality reference interactome for gene set linkage analysis (GSLA), which is a web tool for the interpretation of the potential functional impacts of a set of changed genes observed in transcriptomics analyses. In a case study, we show that the PDIR/GSLA system was able to produce a more comprehensive and concise interpretation of the collective functional impact of multiple simultaneously changed genes compared with the widely used gene set annotation tools, including PANTHER and David. PDIR and its associated GSLA service can be accessed at http://drosophila.biomedtzc.cn.",Predicted Drosophila Interactome Resource,PDIR,http://drosophila.biomedtzc.cn,a database of high-quality predicted functional gene interactions +32117995,Nc2Eye: A Curated ncRNAomics Knowledgebase for Bridging Basic and Clinical Research in Eye Diseases.,"Eye diseases (EDs) represent a group of disorders affecting the visual system, most of which can lead to visual impairment and blindness. Accumulating evidence reveals that non-coding RNAs (ncRNAs) are closely associated with a wide variety of EDs. However, abundant associations between ncRNAs and EDs are scattered across the published literature, obstructing a global view of ncRNA-ED associations. A public resource of high-quality manually curated ncRNAomics knowledge associated with EDs remains unavailable. To address this gap, we thus developed Nc2Eye (http://nc2eye.bio-data.cn/), which is the first knowledgebase dedicated to providing a comprehensive ncRNAomics resource for bridging basic and clinical research in EDs. Through a comprehensive review of more than 2400 published papers, Nc2Eye catalogs 7088 manually curated ncRNA-ED associations involving 4363 ncRNAs across eight species. We also provide detailed descriptions and annotation information for each ncRNA-disease association such as ncRNA categories, experimental methods, expression pattern and related clinical drugs. To further expand the pathogenic ncRNAs, we also collected more than 90 high-throughput EDs-related transcriptome datasets. Furthermore, a user-friendly interface was constructed for convenient and flexible data browsing, querying, and retrieving. We believe that Nc2Eye is a timely and valuable knowledgebase for significantly improving and useful for discovery of new diagnostic and therapeutic biomarkers.",Nc2Eye,Nc2Eye,http://nc2eye.bio-data.cn/,A Curated ncRNAomics Knowledgebase for Bridging Basic and Clinical Research in Eye Diseases +32315389,HotSpot3D web server: an integrated resource for mutation analysis in protein 3D structures.,"

Motivation

HotSpot3D is a widely used software for identifying mutation hotspots on the 3D structures of proteins. To further assist users, we developed a new HotSpot3D web server to make this software more versatile, convenient and interactive.

Results

The HotSpot3D web server performs data pre-processing, clustering, visualization and log-viewing on one stop. Users can interactively explore each cluster and easily re-visualize the mutational clusters within browsers. We also provide a database that allows users to search and visualize proximal mutations from 33 cancers in the Cancer Genome Atlas.

Availability and implementation

http://niulab.scgrid.cn/HotSpot3D/.

Supplementary information

Supplementary data are available at Bioinformatics online.",HotSpot3D,HotSpot3D,http://niulab.scgrid.cn/HotSpot3D/,an integrated resource for mutation analysis in protein 3D structures +32315389,HotSpot3D web server: an integrated resource for mutation analysis in protein 3D structures.,"

Motivation

HotSpot3D is a widely used software for identifying mutation hotspots on the 3D structures of proteins. To further assist users, we developed a new HotSpot3D web server to make this software more versatile, convenient and interactive.

Results

The HotSpot3D web server performs data pre-processing, clustering, visualization and log-viewing on one stop. Users can interactively explore each cluster and easily re-visualize the mutational clusters within browsers. We also provide a database that allows users to search and visualize proximal mutations from 33 cancers in the Cancer Genome Atlas.

Availability and implementation

http://niulab.scgrid.cn/HotSpot3D/.

Supplementary information

Supplementary data are available at Bioinformatics online.",HotSpot3D,HotSpot3D,http://niulab.scgrid.cn/HotSpot3D/,a database that allows users to search and visualize proximal mutations from 33 cancers in the Cancer Genome Atlas +32337573,MPTherm: database for membrane protein thermodynamics for understanding folding and stability.,"The functions of membrane proteins (MPs) are attributed to their structure and stability. Factors influencing the stability of MPs differ from globular proteins due to the presence of membrane spanning regions. Thermodynamic data of MPs aid to understand the relationship among their structure, stability and function. Although a wealth of experimental data on thermodynamics of MPs are reported in the literature, there is no database available explicitly for MPs. In this work, we have developed a database for MP thermodynamics, MPTherm, which contains more than 7000 thermodynamic data from about 320 MPs. Each entry contains protein sequence and structural information, membrane topology, experimental conditions, thermodynamic parameters such as melting temperature, free energy, enthalpy etc. and literature information. MPTherm assists users to retrieve the data by using different search and display options. We have also provided the sequence and structure visualization as well as cross-links to UniProt and PDB databases. MPTherm database is freely available at http://www.iitm.ac.in/bioinfo/mptherm/. It is implemented in HTML, PHP, MySQL and JavaScript, and supports the latest versions of major browsers, such as Firefox, Chrome and Opera. MPTherm would serve as an effective resource for understanding the stability of MPs, development of prediction tools and identifying drug targets for diseases associated with MPs.",MPTherm,MPTherm,http://www.iitm.ac.in/bioinfo/mptherm/,database for membrane protein thermodynamics for understanding folding and stability +32358997,ProNetView-ccRCC: A Web-Based Portal to Interactively Explore Clear Cell Renal Cell Carcinoma Proteogenomics Networks.,"To better understand the molecular basis of cancer, the NCI's Clinical Proteomics Tumor Analysis Consortium (CPTAC) has been performing comprehensive large-scale proteogenomic characterizations of multiple cancer types. Gene and protein regulatory networks are subsequently being derived based on these proteogenomic profiles, which serve as tools to gain systems-level understanding of the molecular regulatory factories underlying these diseases. On the other hand, it remains a challenge to effectively visualize and navigate the resulting network models, which capture higher order structures in the proteogenomic profiles. There is a pressing need to have a new open community resource tool for intuitive visual exploration, interpretation, and communication of these gene/protein regulatory networks by the cancer research community. In this work, ProNetView-ccRCC (http://ccrcc.cptac-network-view.org/), an interactive web-based network exploration portal for investigating phosphopeptide co-expression network inferred based on the CPTAC clear cell renal cell carcinoma (ccRCC) phosphoproteomics data is introduced. ProNetView-ccRCC enables quick, user-intuitive visual interactions with the ccRCC tumor phosphoprotein co-expression network comprised of 3614 genes, as well as 30 functional pathway-enriched network modules. Users can interact with the network portal and can conveniently query for association between abundance of each phosphopeptide in the network and clinical variables such as tumor grade.",ProNetView-ccRCC,ProNetView-ccRCC,http://ccrcc.cptac-network-view.org/,A Web-Based Portal to Interactively Explore Clear Cell Renal Cell Carcinoma Proteogenomics Networks +32392583,"M6A2Target: a comprehensive database for targets of m6A writers, erasers and readers.","N6-methyladenosine (m6A) is the most abundant posttranscriptional modification in mammalian mRNA molecules and has a crucial function in the regulation of many fundamental biological processes. The m6A modification is a dynamic and reversible process regulated by a series of writers, erasers and readers (WERs). Different WERs might have different functions, and even the same WER might function differently in different conditions, which are mostly due to different downstream genes being targeted by the WERs. Therefore, identification of the targets of WERs is particularly important for elucidating this dynamic modification. However, there is still no public repository to host the known targets of WERs. Therefore, we developed the m6A WER target gene database (m6A2Target) to provide a comprehensive resource of the targets of m6A WERs. M6A2Target provides a user-friendly interface to present WER targets in two different modules: 'Validated Targets', referred to as WER targets identified from low-throughput studies, and 'Potential Targets', including WER targets analyzed from high-throughput studies. Compared to other existing m6A-associated databases, m6A2Target is the first specific resource for m6A WER target genes. M6A2Target is freely accessible at http://m6a2target.canceromics.org.",m6A WER target gene database,m6A2Target,http://m6a2target.canceromics.org,"a comprehensive database for targets of m6A writers, erasers and readers" +32415965,Palantir: a springboard for the analysis of secondary metabolite gene clusters in large-scale genome mining projects.,"

Summary

To support small and large-scale genome mining projects, we present Post-processing Analysis tooLbox for ANTIsmash Reports (Palantir), a dedicated software suite for handling and refining secondary metabolite biosynthetic gene cluster (BGC) data annotated with the popular antiSMASH pipeline. Palantir provides new functionalities building on NRPS/PKS predictions from antiSMASH, such as improved BGC annotation, module delineation and easy access to sub-sequences at different levels (cluster, gene, module and domain). Moreover, it can parse user-provided antiSMASH reports and reformat them for direct use or storage in a relational database.

Availability and implementation

Palantir is released both as a Perl API available on CPAN (https://metacpan.org/release/Bio-Palantir) and as a web application (http://palantir.uliege.be). As a practical use case, the web interface also features a database built from the mining of 1616 cyanobacterial genomes, of which 1488 were predicted to encode at least one BGC.

Supplementary information

Supplementary data are available at Bioinformatics online.",Post-processing Analysis tooLbox for ANTIsmash Reports,Palantir,https://metacpan.org/release/Bio-Palantir,"a database built from the mining of 1616 cyanobacterial genomes, of which 1488 were predicted to encode at least one BGC" +32415965,Palantir: a springboard for the analysis of secondary metabolite gene clusters in large-scale genome mining projects.,"

Summary

To support small and large-scale genome mining projects, we present Post-processing Analysis tooLbox for ANTIsmash Reports (Palantir), a dedicated software suite for handling and refining secondary metabolite biosynthetic gene cluster (BGC) data annotated with the popular antiSMASH pipeline. Palantir provides new functionalities building on NRPS/PKS predictions from antiSMASH, such as improved BGC annotation, module delineation and easy access to sub-sequences at different levels (cluster, gene, module and domain). Moreover, it can parse user-provided antiSMASH reports and reformat them for direct use or storage in a relational database.

Availability and implementation

Palantir is released both as a Perl API available on CPAN (https://metacpan.org/release/Bio-Palantir) and as a web application (http://palantir.uliege.be). As a practical use case, the web interface also features a database built from the mining of 1616 cyanobacterial genomes, of which 1488 were predicted to encode at least one BGC.

Supplementary information

Supplementary data are available at Bioinformatics online.",Post-processing Analysis tooLbox for ANTIsmash Reports,Palantir,https://metacpan.org/release/Bio-Palantir,a springboard for the analysis of secondary metabolite gene clusters in large-scale genome mining projects +32433469,Construction of a web-based nanomaterial database by big data curation and modeling friendly nanostructure annotations.,"Modern nanotechnology research has generated numerous experimental data for various nanomaterials. However, the few nanomaterial databases available are not suitable for modeling studies due to the way they are curated. Here, we report the construction of a large nanomaterial database containing annotated nanostructures suited for modeling research. The database, which is publicly available through http://www.pubvinas.com/, contains 705 unique nanomaterials covering 11 material types. Each nanomaterial has up to six physicochemical properties and/or bioactivities, resulting in more than ten endpoints in the database. All the nanostructures are annotated and transformed into protein data bank files, which are downloadable by researchers worldwide. Furthermore, the nanostructure annotation procedure generates 2142 nanodescriptors for all nanomaterials for machine learning purposes, which are also available through the portal. This database provides a public resource for data-driven nanoinformatics modeling research aimed at rational nanomaterial design and other areas of modern computational nanotechnology.",,,http://www.pubvinas.com/,a large nanomaterial database containing annotated nanostructures suited for modeling research +32436932,miRactDB characterizes miRNA-gene relation switch between normal and cancer tissues across pan-cancer.,"It has been increasingly accepted that microRNA (miRNA) can both activate and suppress gene expression, directly or indirectly, under particular circumstances. Yet, a systematic study on the switch in their interaction pattern between activation and suppression and between normal and cancer conditions based on multi-omics evidences is not available. We built miRactDB, a database for miRNA-gene interaction, at https://ccsm.uth.edu/miRactDB, to provide a versatile resource and platform for annotation and interpretation of miRNA-gene relations. We conducted a comprehensive investigation on miRNA-gene interactions and their biological implications across tissue types in both tumour and normal conditions, based on TCGA, CCLE and GTEx databases. We particularly explored the genetic and epigenetic mechanisms potentially contributing to the positive correlation, including identification of miRNA binding sites in the gene coding sequence (CDS) and promoter regions of partner genes. Integrative analysis based on this resource revealed that top-ranked genes derived from TCGA tumour and adjacent normal samples share an overwhelming part of biological processes, which are quite different than those from CCLE and GTEx. The most active miRNAs predicted to target CDS and promoter regions are largely overlapped. These findings corroborate that adjacent normal tissues might have undergone significant molecular transformations towards oncogenesis before phenotypic and histological change; and there probably exists a small yet critical set of miRNAs that profoundly influence various cancer hallmark processes. miRactDB provides a unique resource for the cancer and genomics communities to screen, prioritize and rationalize their candidates of miRNA-gene interactions, in both normal and cancer scenarios.",miRactDB,miRactDB,https://ccsm.uth.edu/miRactDB,a database for miRNA-gene interaction +32493955,WilsonGen a comprehensive clinically annotated genomic variant resource for Wilson's Disease.,"Wilson disease (WD) is one of the most prevalent genetic diseases with an estimated global carrier frequency of 1 in 90 and a prevalence of 1 in 30,000. The disease owes its genesis to Kinnier Wilson who described the disease, and is caused by accumulation of Copper (Cu) in various organs including the liver, central nervous system, cornea, kidney, joints and cardiac muscle which contribute to the characteristic clinical features of WD. A number of studies have reported genetic variants in the ATP7B gene from diverse ethnic and geographical origins. The recent advent of next-generation sequencing approaches has also enabled the discovery of a large number of novel variants in the gene associated with the disease. Previous attempts have been made to compile the knowledgebase and spectrum of genetic variants from across the multitude of publications, but have been limited by the utility due to the significant differences in approaches used to qualify pathogenicity of variants in each of the publications. The recent formulation of guidelines and algorithms for assessment of the pathogenicity of variants jointly put forward by the American College of Medical Genetics and the Association of Molecular Pathologists (ACMG &) has provided a framework for evidence based and systematic assessment of pathogenicity of variants. In this paper, we describe a comprehensive resource of genetic variants in ATP7B gene manually curated from literature and data resources and systematically annotated using the ACMG & AMP guidelines for assessing pathogenicity. The resource therefore serves as a central point for clinicians and geneticists working on WD and to the best of our knowledge is the most comprehensive and only clinically annotated resource for WD. The resource is available at URL http://clingen.igib.res.in/WilsonGen/. We compiled a total of 3662 genetic variants from publications and databases associated with WD. Of these variants compiled, a total of 1458 were found to be unique entries. This is the largest WD database comprising 656 pathogenic/likely pathogenic variants reported classified according to ACMG & AMP guidelines. We also mapped all the pathogenic variants corresponding to ATP7B protein from literature and other databases. In addition, geographical origin and distribution of ATP7B pathogenic variants reported are also mapped in the database.",WilsonGen,WilsonGen,http://clingen.igib.res.in/WilsonGen/,a comprehensive clinically annotated genomic variant resource for Wilson's Disease +32496513,gutMEGA: a database of the human gut MEtaGenome Atlas.,"The gut microbiota plays important roles in human health through regulating both physiological homeostasis and disease emergence. The accumulation of metagenomic sequencing studies enables us to better understand the temporal and spatial variations of the gut microbiota under different physiological and pathological conditions. However, it is inconvenient for scientists to query and retrieve published data; thus, a comprehensive resource for the quantitative gut metagenome is urgently needed. In this study, we developed gut MEtaGenome Atlas (gutMEGA), a well-annotated comprehensive database, to curate and host published quantitative gut microbiota datasets from Homo sapiens. By carefully curating the gut microbiota composition, phenotypes and experimental information, gutMEGA finally integrated 59 132 quantification events for 6457 taxa at seven different levels (kingdom, phylum, class, order, family, genus and species) under 776 conditions. Moreover, with various browsing and search functions, gutMEGA provides a fast and simple way for users to obtain the relative abundances of intestinal microbes among phenotypes. Overall, gutMEGA is a convenient and comprehensive resource for gut metagenome research, which can be freely accessed at http://gutmega.omicsbio.info.",gut MEtaGenome Atlas,gutMEGA,http://gutmega.omicsbio.info,a database of the human gut MEtaGenome Atlas +32507889,"The articles.ELM resource: simplifying access to protein linear motif literature by annotation, text-mining and classification.","Modern biology produces data at a staggering rate. Yet, much of these biological data is still isolated in the text, figures, tables and supplementary materials of articles. As a result, biological information created at great expense is significantly underutilised. The protein motif biology field does not have sufficient resources to curate the corpus of motif-related literature and, to date, only a fraction of the available articles have been curated. In this study, we develop a set of tools and a web resource, 'articles.ELM', to rapidly identify the motif literature articles pertinent to a researcher's interest. At the core of the resource is a manually curated set of about 8000 motif-related articles. These articles are automatically annotated with a range of relevant biological data allowing in-depth search functionality. Machine-learning article classification is used to group articles based on their similarity to manually curated motif classes in the Eukaryotic Linear Motif resource. Articles can also be manually classified within the resource. The 'articles.ELM' resource permits the rapid and accurate discovery of relevant motif articles thereby improving the visibility of motif literature and simplifying the recovery of valuable biological insights sequestered within scientific articles. Consequently, this web resource removes a critical bottleneck in scientific productivity for the motif biology field. Database URL: http://slim.icr.ac.uk/articles/.",articles.ELM,articles.ELM,http://slim.icr.ac.uk/articles/,a manually curated set of about 8000 motif-related articles +32510549,DenvInD: dengue virus inhibitors database for clinical and molecular research.,"Dengue virus (DENV) researchers often face challenges with the highly time-consuming process of collecting and curating information on known inhibitors during the standard drug discovery process. To this end, however, required collective information is not yet available on a single platform. Hence, we have developed the DenvInD database for experimentally validated DENV inhibitors against its known targets presently hosted at https://webs.iiitd.edu.in/raghava/denvind/. This database provides comprehensive information, i.e. PubChem IDs, SMILES, IC50, EC50, CC50, and wherever available Ki values of the 484 compounds in vitro validated as inhibitors against respective drug targets of DENV. Also, the DenvInD database has been linked to the user-friendly web-based interface and accessibility features, such as simple search, advanced search and data browsing. All the required data curation was conducted manually from the reported scientific literature and PubChem. The collected information was then organized into the DenvInD database using sequence query language under user interface by hypertext markup language. DenvInD is the first useful repository of its kind which would augment the DENV drug discovery research by providing essential information on known DENV inhibitors for molecular docking, computational screening, pharmacophore modeling and quantitative structure-activity relationship modeling.",DenvInD,DenvInD,https://webs.iiitd.edu.in/raghava/denvind/,dengue virus inhibitors database for clinical and molecular research +32539086,ForageGrassBase: molecular resource for the forage grass meadow fescue (Festuca pratensis Huds.).,"Meadow fescue (Festuca pratensis Huds.) is one of the most important forage grasses in temperate regions. It is a diploid (2n‚Äâ=‚Äâ14) outbreeding species that belongs to the genus Festuca. Together with Lolium perenne, they are the most important genera of forage grasses. Meadow fescue has very high quality of yield with good winter survival and persistency. However, extensive genomic resources for meadow fescue have not become available so far. To address this lack of comprehensive publicly available datasets, we have developed functionally annotated draft genome sequences of two meadow fescue genotypes, 'HF7/2' and 'B14/16', and constructed the platform ForageGrassBase, available at http://foragegrass.org/, for data visualization, download and querying. This is the first open-access platform that provides extensive genomic resources related to this forage grass species. The current database provides the most up-to-date draft genome sequence along with structural and functional annotations for genes that can be accessed using Genome Browser (GBrowse), along with comparative genomic alignments to Arabidopsis, L. perenne, barley, rice, Brachypodium and maize genomes. We have integrated homologous search tool BLAST also for the users to analyze their data. Combined, GBrowse, BLAST and downloadable data gives a user-friendly access to meadow fescue genomic resources. To our knowledge, ForageGrassBase is the first genome database dedicated to forage grasses. The current forage grass database provides valuable resources for a range of research fields related to meadow fescue and other forage crop species, as well as for plant research communities in general. The genome database can be accessed at http://foragegrass.org.",ForageGrassBase,ForageGrassBase,http://foragegrass.org/,molecular resource for the forage grass meadow fescue (Festuca pratensis Huds.) +32591816,ExoBCD: a comprehensive database for exosomal biomarker discovery in breast cancer.,"Effective and safe implementation of precision oncology for breast cancer is a vital strategy to improve patient outcomes, which relies on the application of reliable biomarkers. As 'liquid biopsy' and novel resource for biomarkers, exosomes provide a promising avenue for the diagnosis and treatment of breast cancer. Although several exosome-related databases have been developed, there is still lacking of an integrated database for exosome-based biomarker discovery. To this end, a comprehensive database ExoBCD (https://exobcd.liumwei.org) was constructed with the combination of robust analysis of four high-throughput datasets, transcriptome validation of 1191 TCGA cases and manual mining of 950 studies. In ExoBCD, approximately 20 900 annotation entries were integrated from 25 external sources and 306 exosomal molecules (49 potential biomarkers and 257 biologically interesting molecules). The latter could be divided into 3 molecule types, including 121 mRNAs, 172 miRNAs and 13 lncRNAs. Thus, the well-linked information about molecular characters, experimental biology, gene expression patterns, overall survival, functional evidence, tumour stage and clinical use were fully integrated. As a data-driven and literature-based paradigm proposed of biomarker discovery, this study also demonstrated the corroborative analysis and identified 36 promising molecules, as well as the most promising prognostic biomarkers, IGF1R and FRS2. Taken together, ExoBCD is the first well-corroborated knowledge base for exosomal studies of breast cancer. It not only lays a foundation for subsequent studies but also strengthens the studies of probing molecular mechanisms, discovering biomarkers and developing meaningful clinical use.",ExoBCD,ExoBCD,https://exobcd.liumwei.org,a comprehensive database for exosomal biomarker discovery in breast cancer +32597467,A comprehensive integrated drug similarity resource for in-silico drug repositioning and beyond.,"Drug similarity studies are driven by the hypothesis that similar drugs should display similar therapeutic actions and thus can potentially treat a similar constellation of diseases. Drug-drug similarity has been derived by variety of direct and indirect sources of evidence and frequently shown high predictive power in discovering validated repositioning candidates as well as other in-silico drug development applications. Yet, existing resources either have limited coverage or rely on an individual source of evidence, overlooking the wealth and diversity of drug-related data sources. Hence, there has been an unmet need for a comprehensive resource integrating diverse drug-related information to derive multi-evidenced drug-drug similarities. We addressed this resource gap by compiling heterogenous information for an exhaustive set of small-molecule drugs (total of 10 367 in the current version) and systematically integrated multiple sources of evidence to derive a multi-modal drug-drug similarity network. The resulting database, 'DrugSimDB' currently includes 238 635 drug pairs with significant aggregated similarity, complemented with an interactive user-friendly web interface (http://vafaeelab.com/drugSimDB.html), which not only enables database ease of access, search, filtration and export, but also provides a variety of complementary information on queried drugs and interactions. The integration approach can flexibly incorporate further drug information into the similarity network, providing an easily extendable platform. The database compilation and construction source-code has been well-documented and semi-automated for any-time upgrade to account for new drugs and up-to-date drug information.",DrugSimDB,DrugSimDB,http://vafaeelab.com/drugSimDB.html,A comprehensive integrated drug similarity resource for in-silico drug repositioning and beyond +32608479,CHDGKB: a knowledgebase for systematic understanding of genetic variations associated with non-syndromic congenital heart disease.,"Congenital heart disease (CHD) is one of the most common birth defects, with complex genetic and environmental etiologies. The reports of genetic variation associated with CHD have increased dramatically in recent years due to the revolutionary development of molecular technology. However, CHD is a heterogeneous disease, and its genetic origins remain inconclusive in most patients. Here we present a database of genetic variations for non-syndromic CHD (NS-CHD). By manually literature extraction and analyses, 5345 NS-CHD-associated genetic variations were collected, curated and stored in the public online database. The objective of our database is to provide the most comprehensive updates on NS-CHD genetic research and to aid systematic analyses of pathogenesis of NS-CHD in molecular level and the correlation between NS-CHD genotypes and phenotypes. Database URL: http://www.sysbio.org.cn/CHDGKB/.",CHDGKB,CHDGKB,http://www.sysbio.org.cn/CHDGKB/,a knowledgebase for systematic understanding of genetic variations associated with non-syndromic congenital heart disease +32621232,Dockground Tool for Development and Benchmarking of Protein Docking Procedures.,"Databases of protein-protein complexes are essential for the development of protein modeling/docking techniques. Such databases provide a knowledge base for docking algorithms, intermolecular potentials, search procedures, scoring functions, and refinement protocols. Development of docking techniques requires systematic validation of the modeling protocols on carefully curated benchmark sets of complexes. We present a description and a guide to the DOCKGROUND resource ( http://dockground.compbio.ku.edu ) for structural modeling of protein interactions. The resource integrates various datasets of protein complexes and other data for the development and testing of protein docking techniques. The sets include bound complexes, experimentally determined unbound, simulated unbound, model-model complexes, and docking decoys. The datasets are available to the user community through a Web interface.",DOCKGROUND,DOCKGROUND,http://dockground.compbio.ku.edu,Tool for Development and Benchmarking of Protein Docking Procedures +32632099,Database of literature derived cellular measurements from the murine basal ganglia.,"Quantitative measurements and descriptive statistics of different cellular elements in the brain are typically published in journal articles as text, tables, and example figures, and represent an important basis for the creation of biologically constrained computational models, design of intervention studies, and comparison of subject groups. Such data can be challenging to extract from publications and difficult to normalise and compare across studies, and few studies have so far attempted to integrate quantitative information available in journal articles. We here present a database of quantitative information about cellular parameters in the frequently studied murine basal ganglia. The database holds a curated and normalised selection of currently available data collected from the literature and public repositories, providing the most comprehensive collection of quantitative neuroanatomical data from the basal ganglia to date. The database is shared as a downloadable resource from the EBRAINS Knowledge Graph (https://kg.ebrains.eu), together with a workflow that allows interested researchers to update and expand the database with data from future reports.",,,https://kg.ebrains.eu,Database of literature derived cellular measurements from the murine basal ganglia +32639365,A transcriptional toolbox for exploring peripheral neuroimmune interactions.,"

Abstract

Correct communication between immune cells and peripheral neurons is crucial for the protection of our bodies. Its breakdown is observed in many common, often painful conditions, including arthritis, neuropathies, and inflammatory bowel or bladder disease. Here, we have characterised the immune response in a mouse model of neuropathic pain using flow cytometry and cell-type-specific RNA sequencing (RNA-seq). We found few striking sex differences, but a very persistent inflammatory response, with increased numbers of monocytes and macrophages up to 3 1/2 months after the initial injury. This raises the question of whether the commonly used categorisation of pain into ""inflammatory"" and ""neuropathic"" is one that is mechanistically appropriate. Finally, we collated our data with other published RNA-seq data sets on neurons, macrophages, and Schwann cells in naive and nerve injury states. The result is a practical web-based tool for the transcriptional data mining of peripheral neuroimmune interactions. http://rna-seq-browser.herokuapp.com/.",,,http://rna-seq-browser.herokuapp.com/,A transcriptional toolbox for exploring peripheral neuroimmune interactions +32702093,"AciDB 1.0: a database of acidophilic organisms, their genomic information and associated metadata.","

Motivation

There are about 600 available genome sequences of acidophilic organisms (grow at a pH‚Äâ<‚Äâ5) from the three domains of the Tree of Life. Information about acidophiles is scattered over many heterogeneous sites making it extraordinarily difficult to link physiological traits with genomic data. We were motivated to generate a curated, searchable database to address this problem.

Results

AciDB 1.0 is a curated database of sequenced acidophiles that enables researchers to execute complex queries linking genomic features to growth data, environmental descriptions and taxonomic information.

Availability and implementation

AciDB 1.0 is freely available online at: http://AciDB.cl. The source code is released under an MIT license at: https://gitlab.com/Hawkline451/acidb/.",AciDB 1.0,AciDB,http://AciDB.cl,"a database of acidophilic organisms, their genomic information and associated metadata" +32728249,Expanded encyclopaedias of DNA elements in the human and mouse genomes.,"The human and mouse genomes contain instructions that specify RNAs and proteins and govern the timing, magnitude, and cellular context of their production. To better delineate these elements, phase III of the Encyclopedia of DNA Elements (ENCODE) Project has expanded analysis of the cell and tissue repertoires of RNA transcription, chromatin structure and modification, DNA methylation, chromatin looping, and occupancy by transcription factors and RNA-binding proteins. Here we summarize these efforts, which have produced 5,992 new experimental datasets, including systematic determinations across mouse fetal development. All data are available through the ENCODE data portal (https://www.encodeproject.org), including phase II ENCODE1 and Roadmap Epigenomics2 data. We have developed a registry of 926,535 human and 339,815 mouse candidate cis-regulatory elements, covering 7.9 and 3.4% of their respective genomes, by integrating selected datatypes associated with gene regulation, and constructed a web-based server (SCREEN; http://screen.encodeproject.org) to provide flexible, user-defined access to this resource. Collectively, the ENCODE data and registry provide an expansive resource for the scientific community to build a better understanding of the organization and function of the human and mouse genomes.",Encyclopedia of DNA Elements,ENCODE,https://www.encodeproject.org,encyclopaedias of DNA elements in the human and mouse genomes +32738156,Predicted yeast interactome and network-based interpretation of transcriptionally changed genes.,"Saccharomyces cerevisiae, budding yeast, is a widely used model organism and research tool in genetics studies. Many efforts have been directed at constructing a high-quality comprehensive molecular interaction network to elucidate the design logic of the gene circuitries in this classic model organism. In this work, we present the yeast interactome resource (YIR), which includes 22,238 putative functional gene interactions inferred from functional gene association data integrated from 10 databases focusing on diverse functional perspectives. These putative functional gene interactions are expected to cover 18.84% of yeast protein interactions, and 38.49% may represent protein interactions. Based on the YIR, a gene set linkage analysis (GSLA) web tool was developed to annotate the potential functional impacts of a set of transcriptionally changed genes. In a case study, we show that the YIR/GSLA system produced more extensive and concise annotations compared with widely used gene set annotation tools, including PANTHER and DAVID. Both YIR and GSLA are accessible through the website http://yeast.biomedtzc.cn.",,,http://yeast.biomedtzc.cn,Predicted yeast interactome and network-based interpretation of transcriptionally changed genes +32761142,"NCBI Taxonomy: a comprehensive update on curation, resources and tools.","The National Center for Biotechnology Information (NCBI) Taxonomy includes organism names and classifications for every sequence in the nucleotide and protein sequence databases of the International Nucleotide Sequence Database Collaboration. Since the last review of this resource in 2012, it has undergone several improvements. Most notable is the shift from a single SQL database to a series of linked databases tied to a framework of data called NameBank. This means that relations among data elements can be adjusted in more detail, resulting in expanded annotation of synonyms, the ability to flag names with specific nomenclatural properties, enhanced tracking of publications tied to names and improved annotation of scientific authorities and types. Additionally, practices utilized by NCBI Taxonomy curators specific to major taxonomic groups are described, terms peculiar to NCBI Taxonomy are explained, external resources are acknowledged and updates to tools and other resources are documented. Database URL: https://www.ncbi.nlm.nih.gov/taxonomy.",National Center for Biotechnology Information Taxonomy,NCBI Taxonomy,https://www.ncbi.nlm.nih.gov/taxonomy,organism names and classifications for every sequence in the nucleotide and protein sequence databases of the International Nucleotide Sequence Database Collaboration +32766766,LncR2metasta: a manually curated database for experimentally supported lncRNAs during various cancer metastatic events.,"Mounting evidence has shown the involvement of long non-coding RNAs (lncRNAs) during various cancer metastatic events (abbreviated as CMEs, e.g. cancer cell invasion, intravasation, extravasation, proliferation, etc.) that may cooperatively facilitate malignant tumor spread and cause massive patient deaths. The study of lncRNA-CME associations might help understand lncRNA functions in metastasis and present reliable biomarkers for early dissemination detection and optimized treatment. Therefore, we developed a database named 'lncR2metasta' by manually compiling experimentally supported lncRNAs during various CMEs from existing studies. LncR2metasta documents 1238 associations between 304 lncRNAs and 39 CMEs across 54 human cancer subtypes. Each entry of lncR2metasta contains detailed information on a lncRNA-CME association, including lncRNA symbol, a specific CME, brief description of the association, lncRNA category, lncRNA Entrez or Ensembl ID, lncRNA genomic location and strand, lncRNA experiment, lncRNA expression pattern, detection method, target gene (or pathway) of lncRNA, lncRNA regulatory role on a CME, cancer name and the literature reference. An easy-to-use web interface was deployed in lncR2metasta for its users to easily browse, search and download as well as to submit novel lncRNA-CME associations. LncR2metasta will be a useful resource in cancer research community. It is freely available at http://lncR2metasta.wchoda.com.",lncR2metasta,lncR2metasta,http://lncR2metasta.wchoda.com,a manually curated database for experimentally supported lncRNAs during various cancer metastatic events +32792559,"ACDC, a global database of amphibian cytochrome-b sequences using reproducible curation for GenBank records.","Genetic data are a crucial and exponentially growing resource across all biological sciences, yet curated databases are scarce. The widespread occurrence of sequence and (meta)data errors in public repositories calls for comprehensive improvements of curation protocols leading to robust research and downstream analyses. We collated and curated all available GenBank cytochrome-b sequences for amphibians, a benchmark marker in this globally declining vertebrate clade. The Amphibia's Curated Database of Cytochrome-b (ACDC) consists of 36,514 sequences representing 2,309 species from 398 genera (median‚Äâ=‚Äâ2 with 50% interquartile ranges of 1-7 species/genus). We updated the taxonomic identity of >4,800 sequences (ca. 13%) and found 2,359 (6%) conflicting sequences with 84% of the errors originating from taxonomic misidentifications. The database (accessible at https://doi.org/10.6084/m9.figshare.9944759 ) also includes an R script to replicate our study for other loci and taxonomic groups. We provide recommendations to improve genetic-data quality in public repositories and flag species for which there is a need for taxonomic refinement in the face of increased rate of amphibian extinctions in the Anthropocene.",Amphibia's Curated Database of Cytochrome-b,ACDC,https://doi.org/10.6084/m9.figshare.994475,a global database of amphibian cytochrome-b sequences using reproducible curation for GenBank records +32829394,Reanalysis of genome sequences of tomato accessions and its wild relatives: development of Tomato Genomic Variation (TGV) database integrating SNPs and INDELs polymorphisms.,"

Motivation

Facilitated by technological advances and expeditious decrease in the sequencing costs, whole-genome sequencing is increasingly implemented to uncover variations in cultivars/accessions of many crop plants. In tomato (Solanum lycopersicum), the availability of the genome sequence, followed by the resequencing of tomato cultivars and its wild relatives, has provided a prodigious resource for the improvement of traits. A high-quality genome resequencing of 84 tomato accessions and wild relatives generated a dataset that can be used as a resource to identify agronomically important alleles across the genome. Converting this dataset into a searchable database, including information about the influence of single-nucleotide polymorphisms (SNPs) on protein function, provides valuable information about the genetic variations. The database will assist in searching for functional variants of a gene for introgression into tomato cultivars.

Results

A recent release of better-quality tomato genome reference assembly SL3.0, and new annotation ITAG3.2 of SL3.0, dropped 3857 genes, added 4900 novel genes and updated 20 766 genes. Using the above version, we remapped the data from the tomato lines resequenced under the '100 tomato genome resequencing project' on new tomato genome assembly SL3.0 and made an online searchable Tomato Genomic Variations (TGVs) database. The TGV contains information about SNPs and insertion/deletion events and expands it by functional annotation of variants with new ITAG3.2 using SIFT4G software. This database with search function assists in inferring the influence of SNPs on the function of a target gene. This database can be used for selecting SNPs, which can be potentially deployed for improving tomato traits.

Availability and implementation

TGV is freely available at http://psd.uohyd.ac.in/tgv.",Tomato Genomic Variations database,TGV database,http://psd.uohyd.ac.in/tgv, +32890396,CoV3D: a database of high resolution coronavirus protein structures.,"SARS-CoV-2, the etiologic agent of COVID-19, exemplifies the general threat to global health posed by coronaviruses. The urgent need for effective vaccines and therapies is leading to a rapid rise in the number of high resolution structures of SARS-CoV-2 proteins that collectively reveal a map of virus vulnerabilities. To assist structure-based design of vaccines and therapeutics against SARS-CoV-2 and other coronaviruses, we have developed CoV3D, a database and resource for coronavirus protein structures, which is updated on a weekly basis. CoV3D provides users with comprehensive sets of structures of coronavirus proteins and their complexes with antibodies, receptors, and small molecules. Integrated molecular viewers allow users to visualize structures of the spike glycoprotein, which is the major target of neutralizing antibodies and vaccine design efforts, as well as sets of spike-antibody complexes, spike sequence variability, and known polymorphisms. In order to aid structure-based design and analysis of the spike glycoprotein, CoV3D permits visualization and download of spike structures with modeled N-glycosylation at known glycan sites, and contains structure-based classification of spike conformations, generated by unsupervised clustering. CoV3D can serve the research community as a centralized reference and resource for spike and other coronavirus protein structures, and is available at: https://cov3d.ibbr.umd.edu.",CoV3D,CoV3D,https://cov3d.ibbr.umd.edu,a database of high resolution coronavirus protein structures +32897080,METATRYP v 2.0: Metaproteomic Least Common Ancestor Analysis for Taxonomic Inference Using Specialized Sequence Assemblies-Standalone Software and Web Servers for Marine Microorganisms and Coronaviruses.,"We present METATRYP version 2 software that identifies shared peptides across the predicted proteomes of organisms within environmental metaproteomics studies to enable accurate taxonomic attribution of peptides during protein inference. Improvements include ingestion of complex sequence assembly data categories (metagenomic and metatranscriptomic assemblies, single cell amplified genomes, and metagenome assembled genomes), prediction of the least common ancestor (LCA) for a peptide shared across multiple organisms, increased performance through updates to the backend architecture, and development of a web portal (https://metatryp.whoi.edu). Major expansion of the marine METATRYP database with predicted proteomes from environmental sequencing confirms a low occurrence of shared tryptic peptides among disparate marine microorganisms, implying tractability for targeted metaproteomics. METATRYP was designed to facilitate ocean metaproteomics and has been integrated into the Ocean Protein Portal (https://oceanproteinportal.org); however, it can be readily applied to other domains. We describe the rapid deployment of a coronavirus-specific web portal (https://metatryp-coronavirus.whoi.edu/) to aid in use of proteomics on coronavirus research during the ongoing pandemic. A coronavirus-focused METATRYP database identified potential SARS-CoV-2 peptide biomarkers and indicated very few shared tryptic peptides between SARS-CoV-2 and other disparate taxa analyzed, sharing <1% peptides with taxa outside of the betacoronavirus group, establishing that taxonomic specificity is achievable using tryptic peptide-based proteomic diagnostic approaches.",METATRYP v 2.0,METATRYP,https://metatryp.whoi.edu, +32911083,MosaicBase: A Knowledgebase of Postzygotic Mosaic Variants in Noncancer Disease-related and Healthy Human Individuals.,"Mosaic variants resulting from postzygotic mutations are prevalent in the human genome and play important roles in human diseases. However, except for cancer-related variants, there is no collection of postzygotic mosaic variants in noncancer disease-related and healthy individuals. Here, we present MosaicBase, a comprehensive database that includes 6698 mosaic variants related to 266 noncancer diseases and 27,991 mosaic variants identified in 422 healthy individuals. Genomic and phenotypic information of each variant was manually extracted and curated from 383 publications. MosaicBase supports the query of variants with Online Mendelian Inheritance in Man (OMIM) entries, genomic coordinates, gene symbols, or Entrez IDs. We also provide an integrated genome browser for users to easily access mosaic variants and their related annotations for any genomic region. By analyzing the variants collected in MosaicBase, we find that mosaic variants that directly contribute to disease phenotype show features distinct from those of variants in individuals with mild or no phenotypes, in terms of their genomic distribution, mutation signatures, and fraction of mutant cells. MosaicBase will not only assist clinicians in genetic counseling and diagnosis but also provide a useful resource to understand the genomic baseline of postzygotic mutations in the general human population. MosaicBase is publicly available at http://mosaicbase.com/ or http://49.4.21.8:8000.",MosaicBase,MosaicBase,http://mosaicbase.com,A Knowledgebase of Postzygotic Mosaic Variants in Noncancer Disease-related and Healthy Human Individuals +32934277,StoneMod: a database for kidney stone modulatory proteins with experimental evidence.,"Better understanding of molecular mechanisms for kidney stone formation is required to improve management of kidney stone disease with better therapeutic outcome. Recent kidney stone research has indicated critical roles of a group of proteins, namely 'stone modulators', in promotion or inhibition of the stone formation. Nevertheless, such information is currently dispersed and difficult to obtain. Herein, we present the kidney stone modulator database (StoneMod), which is a curated resource by obtaining necessary information of such stone modulatory proteins, which can act as stone promoters or inhibitors, with experimental evidence from previously published studies. Currently, the StoneMod database contains 10, 16, 13, 8 modulatory proteins that affect calcium oxalate crystallization, crystal growth, crystal aggregation, and crystal adhesion on renal tubular cells, respectively. Informative details of each modulatory protein and PubMed links to the published articles are provided. Additionally, hyperlinks to other protein/gene databases (e.g., UniProtKB, Swiss-Prot, Human Protein Atlas, PeptideAtlas, and Ensembl) are made available for the users to obtain additional in-depth information of each protein. Moreover, this database provides a user-friendly web interface, in which the users can freely access to the information and/or submit their data to deposit or update. Database URL: https://www.stonemod.org .",kidney stone modulator database,StoneMod,https://www.stonemod.org,a database for kidney stone modulatory proteins with experimental evidence +32941628,"IDDB: a comprehensive resource featuring genes, variants and characteristics associated with infertility.","Infertility is a complex multifactorial disease that affects up to 10% of couples across the world. However, many mechanisms of infertility remain unclear due to the lack of studies based on systematic knowledge, leading to ineffective treatment and/or transmission of genetic defects to offspring. Here, we developed an infertility disease database to provide a comprehensive resource featuring various factors involved in infertility. Features in the current IDDB version were manually curated as follows: (i) a total of 307 infertility-associated genes in human and 1348 genes associated with reproductive disorder in 9 model organisms; (ii) a total of 202 chromosomal abnormalities leading to human infertility, including aneuploidies and structural variants; and (iii) a total of 2078 pathogenic variants from infertility patients' samples across 60 different diseases causing infertility. Additionally, the characteristics of clinically diagnosed infertility patients (i.e. causative variants, laboratory indexes and clinical manifestations) were collected. To the best of our knowledge, the IDDB is the first infertility database serving as a systematic resource for biologists to decipher infertility mechanisms and for clinicians to achieve better diagnosis/treatment of patients from disease phenotype to genetic factors. The IDDB is freely available at http://mdl.shsmu.edu.cn/IDDB/.",infertility disease database,IDDB,http://mdl.shsmu.edu.cn/IDDB/,"a comprehensive resource featuring genes, variants and characteristics associated with infertility" +32943659,The landscape of microsatellites in the enset (Ensete ventricosum) genome and web-based marker resource development.,"Ensete ventricosum (Musaceae, enset) is an Ethiopian food security crop. To realize the potential of enset for rural livelihoods, further knowledge of enset diversity, genetics and genomics is required to support breeding programs and conservation. This study was conducted to explore the enset genome to develop molecular markers, genomics resources, and characterize enset landraces while giving insight into the organization of the genome. We identified 233 microsatellites (simple sequence repeats, SSRs) per Mbp in the enset genome, representing 0.28% of the genome. Mono- and di-nucleotide repeats motifs were found in a higher proportion than other classes of SSR-motifs. In total, 154,586 non-redundant enset microsatellite markers (EMM) were identified and 40 selected for primer development. Marker validation by PCR and low-cost agarose gel electrophoresis revealed that 92.5% were polymorphic, showing a high PIC (Polymorphism Information Content; 0.87) and expected heterozygosity (He‚Äâ=‚Äâ0.79-0.82). In silico analysis of genomes of closely related species showed 46.86% of the markers were transferable among enset species and 1.90% were transferable to Musa. The SSRs are robust (with basic PCR methods and agarose gel electrophoresis), informative, and applicable in measuring enset diversity, genotyping, selection and potentially breeding. Enset SSRs are available in a web-based database at https://enset-project.org/EnMom@base.html (or https://enset.aau.edu.et/index.html , downloadable from Figshare).",,,https://enset-project.org/EnMom@base.html, +32943659,The landscape of microsatellites in the enset (Ensete ventricosum) genome and web-based marker resource development.,"Ensete ventricosum (Musaceae, enset) is an Ethiopian food security crop. To realize the potential of enset for rural livelihoods, further knowledge of enset diversity, genetics and genomics is required to support breeding programs and conservation. This study was conducted to explore the enset genome to develop molecular markers, genomics resources, and characterize enset landraces while giving insight into the organization of the genome. We identified 233 microsatellites (simple sequence repeats, SSRs) per Mbp in the enset genome, representing 0.28% of the genome. Mono- and di-nucleotide repeats motifs were found in a higher proportion than other classes of SSR-motifs. In total, 154,586 non-redundant enset microsatellite markers (EMM) were identified and 40 selected for primer development. Marker validation by PCR and low-cost agarose gel electrophoresis revealed that 92.5% were polymorphic, showing a high PIC (Polymorphism Information Content; 0.87) and expected heterozygosity (He‚Äâ=‚Äâ0.79-0.82). In silico analysis of genomes of closely related species showed 46.86% of the markers were transferable among enset species and 1.90% were transferable to Musa. The SSRs are robust (with basic PCR methods and agarose gel electrophoresis), informative, and applicable in measuring enset diversity, genotyping, selection and potentially breeding. Enset SSRs are available in a web-based database at https://enset-project.org/EnMom@base.html (or https://enset.aau.edu.et/index.html , downloadable from Figshare).",,,https://enset.aau.edu.et/index.html, +32976589,DIGGER: exploring the functional role of alternative splicing in protein interactions.,"Alternative splicing plays a major role in regulating the functional repertoire of the proteome. However, isoform-specific effects to protein-protein interactions (PPIs) are usually overlooked, making it impossible to judge the functional role of individual exons on a systems biology level. We overcome this barrier by integrating protein-protein interactions, domain-domain interactions and residue-level interactions information to lift exon expression analysis to a network level. Our user-friendly database DIGGER is available at https://exbio.wzw.tum.de/digger and allows users to seamlessly switch between isoform and exon-centric views of the interactome and to extract sub-networks of relevant isoforms, making it an essential resource for studying mechanistic consequences of alternative splicing.",DIGGER,DIGGER,https://exbio.wzw.tum.de/digger, +32986834,"The ModelSEED Biochemistry Database for the integration of metabolic annotations and the reconstruction, comparison and analysis of metabolic models for plants, fungi and microbes.","For over 10 years, ModelSEED has been a primary resource for the construction of draft genome-scale metabolic models based on annotated microbial or plant genomes. Now being released, the biochemistry database serves as the foundation of biochemical data underlying ModelSEED and KBase. The biochemistry database embodies several properties that, taken together, distinguish it from other published biochemistry resources by: (i) including compartmentalization, transport reactions, charged molecules and proton balancing on reactions; (ii) being extensible by the user community, with all data stored in GitHub; and (iii) design as a biochemical 'Rosetta Stone' to facilitate comparison and integration of annotations from many different tools and databases. The database was constructed by combining chemical data from many resources, applying standard transformations, identifying redundancies and computing thermodynamic properties. The ModelSEED biochemistry is continually tested using flux balance analysis to ensure the biochemical network is modeling-ready and capable of simulating diverse phenotypes. Ontologies can be designed to aid in comparing and reconciling metabolic reconstructions that differ in how they represent various metabolic pathways. ModelSEED now includes 33,978 compounds and 36,645 reactions, available as a set of extensible files on GitHub, and available to search at https://modelseed.org/biochem and KBase.",ModelSEED Biochemistry Database,,https://modelseed.org/biochem,"Biochemistry Database for the integration of metabolic annotations and the reconstruction, comparison and analysis of metabolic models for plants, fungi and microbes" +32990749,TCRdb: a comprehensive database for T-cell receptor sequences with powerful search function.,"T cells and the T-cell receptor (TCR) repertoire play pivotal roles in immune response and immunotherapy. TCR sequencing (TCR-Seq) technology has enabled accurate profiling TCR repertoire and currently a large number of TCR-Seq data are available in public. Based on the urgent need to effectively re-use these data, we developed TCRdb, a comprehensive human TCR sequences database, by a uniform pipeline to characterize TCR sequences on TCR-Seq data. TCRdb contains more than 277 million highly reliable TCR sequences from over 8265 TCR-Seq samples across hundreds of tissues/clinical conditions/cell types. The unique features of TCRdb include: (i) comprehensive and reliable sequences for TCR repertoire in different samples generated by a strict and uniform pipeline of TCRdb; (ii) powerful search function, allowing users to identify their interested TCR sequences in different conditions; (iii) categorized sample metadata, enabling comparison of TCRs in different sample types; (iv) interactive data visualization charts, describing the TCR repertoire in TCR diversity, length distribution and V-J gene utilization. The TCRdb database is freely available at http://bioinfo.life.hust.edu.cn/TCRdb/ and will be a useful resource in the research and application community of T cell immunology.",TCRdb,TCRdb,http://bioinfo.life.hust.edu.cn/TCRdb,a comprehensive database for T-cell receptor sequences with powerful search function +33010163,cncRNAdb: a manually curated resource of experimentally supported RNAs with both protein-coding and noncoding function.,"RNA endowed with both protein-coding and noncoding functions is referred to as 'dual-function RNA', 'binary functional RNA (bifunctional RNA)' or 'cncRNA (coding and noncoding RNA)'. Recently, an increasing number of cncRNAs have been identified, including both translated ncRNAs (ncRNAs with coding functions) and untranslated mRNAs (mRNAs with noncoding functions). However, an appropriate database for storing and organizing cncRNAs is still lacking. Here, we developed cncRNAdb, a manually curated database of experimentally supported cncRNAs, which aims to provide a resource for efficient manipulation, browsing and analysis of cncRNAs. The current version of cncRNAdb documents about 2600 manually curated entries of cncRNA functions with experimental evidence, involving more than 2,000 RNAs (including over 1300 translated ncRNAs and over 600 untranslated mRNAs) across over 20 species. In summary, we believe that cncRNAdb will help elucidate the functions and mechanisms of cncRNAs and develop new prediction methods. The database is available at http://www.rna-society.org/cncrnadb/.",cncRNAdb,cncRNAdb,http://www.rna-society.org/cncrnadb/,a manually curated resource of experimentally supported RNAs with both protein-coding and noncoding function +33010176,CancerImmunityQTL: a database to systematically evaluate the impact of genetic variants on immune infiltration in human cancer.,"Tumor-infiltrating immune cells as integral component of the tumor microenvironment are associated with tumor progress, prognosis and responses to immunotherapy. Genetic variants have been demonstrated to impact tumor-infiltrating, underscoring the heritable character of immune landscape. Therefore, identification of immunity quantitative trait loci (immunQTLs), which evaluate the effect of genetic variants on immune cells infiltration, might present a critical step toward fully understanding the contribution of genetic variants in tumor development. Although emerging studies have demonstrated the determinants of germline variants on immune infiltration, no database has yet been developed to systematically analyze immunQTLs across multiple cancer types. Using genotype data from TCGA database and immune cell fractions estimated by CIBERSORT, we developed a computational pipeline to identify immunQTLs in 33 cancer types. A total of 913 immunQTLs across different cancer types were identified. Among them, 5 immunQTLs are associated with patient overall survival. Furthermore, by integrating immunQTLs with GWAS data, we identified 527 immunQTLs overlapping with known GWAS linkage disequilibrium regions. Finally, we constructed a user-friendly database, CancerImmunityQTL (http://www.cancerimmunityqtl-hust.com/) for users to browse, search and download data of interest. This database provides an informative resource to understand the germline determinants of immune infiltration in human cancer and benefit from personalized cancer immunotherapy.",CancerImmunityQTL,CancerImmunityQTL,http://www.cancerimmunityqtl-hust.com/,a database to systematically evaluate the impact of genetic variants on immune infiltration in human cancer +33020484,"lncRNAKB, a knowledgebase of tissue-specific functional annotation and trait association of long noncoding RNA.","Long non-coding RNA Knowledgebase (lncRNAKB) is an integrated resource for exploring lncRNA biology in the context of tissue-specificity and disease association. A systematic integration of annotations from six independent databases resulted in 77,199 human lncRNA (224,286 transcripts). The user-friendly knowledgebase covers a comprehensive breadth and depth of lncRNA annotation. lncRNAKB is a compendium of expression patterns, derived from analysis of RNA-seq data in thousands of samples across 31 solid human normal tissues (GTEx). Thousands of co-expression modules identified via network analysis and pathway enrichment to delineate lncRNA function are also accessible. Millions of expression quantitative trait loci (cis-eQTL) computed using whole genome sequence genotype data (GTEx) can be downloaded at lncRNAKB that also includes tissue-specificity, phylogenetic conservation and coding potential scores. Tissue-specific lncRNA-trait associations encompassing 323 GWAS (UK Biobank) are also provided. LncRNAKB is accessible at http://www.lncrnakb.org/ , and the data are freely available through Open Science Framework ( https://doi.org/10.17605/OSF.IO/RU4D2 ).",Long non-coding RNA Knowledgebase,lncRNAKB,http://www.lncrnakb.org,a knowledgebase of tissue-specific functional annotation and trait association of long noncoding RNA +33045721,ViruSurf: an integrated database to investigate viral sequences.,"ViruSurf, available at http://gmql.eu/virusurf/, is a large public database of viral sequences and integrated and curated metadata from heterogeneous sources (RefSeq, GenBank, COG-UK and NMDC); it also exposes computed nucleotide and amino acid variants, called from original sequences. A GISAID-specific ViruSurf database, available at http://gmql.eu/virusurf_gisaid/, offers a subset of these functionalities. Given the current pandemic outbreak, SARS-CoV-2 data are collected from the four sources; but ViruSurf contains other virus species harmful to humans, including SARS-CoV, MERS-CoV, Ebola and Dengue. The database is centered on sequences, described from their biological, technological and organizational dimensions. In addition, the analytical dimension characterizes the sequence in terms of its annotations and variants. The web interface enables expressing complex search queries in a simple way; arbitrary search queries can freely combine conditions on attributes from the four dimensions, extracting the resulting sequences. Several example queries on the database confirm and possibly improve results from recent research papers; results can be recomputed over time and upon selected populations. Effective search over large and curated sequence data may enable faster responses to future threats that could arise from new viruses.",ViruSurf,ViruSurf,http://gmql.eu/virusurf/,an integrated database to investigate viral sequences +33045721,ViruSurf: an integrated database to investigate viral sequences.,"ViruSurf, available at http://gmql.eu/virusurf/, is a large public database of viral sequences and integrated and curated metadata from heterogeneous sources (RefSeq, GenBank, COG-UK and NMDC); it also exposes computed nucleotide and amino acid variants, called from original sequences. A GISAID-specific ViruSurf database, available at http://gmql.eu/virusurf_gisaid/, offers a subset of these functionalities. Given the current pandemic outbreak, SARS-CoV-2 data are collected from the four sources; but ViruSurf contains other virus species harmful to humans, including SARS-CoV, MERS-CoV, Ebola and Dengue. The database is centered on sequences, described from their biological, technological and organizational dimensions. In addition, the analytical dimension characterizes the sequence in terms of its annotations and variants. The web interface enables expressing complex search queries in a simple way; arbitrary search queries can freely combine conditions on attributes from the four dimensions, extracting the resulting sequences. Several example queries on the database confirm and possibly improve results from recent research papers; results can be recomputed over time and upon selected populations. Effective search over large and curated sequence data may enable faster responses to future threats that could arise from new viruses.",ViruSurf,ViruSurf,http://gmql.eu/virusurf/,a large public database of viral sequences and integrated and curated metadata from heterogeneous sources +33045737,INTEDE: interactome of drug-metabolizing enzymes.,"Drug-metabolizing enzymes (DMEs) are critical determinant of drug safety and efficacy, and the interactome of DMEs has attracted extensive attention. There are 3 major interaction types in an interactome: microbiome-DME interaction (MICBIO), xenobiotics-DME interaction (XEOTIC) and host protein-DME interaction (HOSPPI). The interaction data of each type are essential for drug metabolism, and the collective consideration of multiple types has implication for the future practice of precision medicine. However, no database was designed to systematically provide the data of all types of DME interactions. Here, a database of the Interactome of Drug-Metabolizing Enzymes (INTEDE) was therefore constructed to offer these interaction data. First, 1047 unique DMEs (448 host and 599 microbial) were confirmed, for the first time, using their metabolizing drugs. Second, for these newly confirmed DMEs, all types of their interactions (3359 MICBIOs between 225 microbial species and 185 DMEs; 47 778 XEOTICs between 4150 xenobiotics and 501 DMEs; 7849 HOSPPIs between 565 human proteins and 566 DMEs) were comprehensively collected and then provided, which enabled the crosstalk analysis among multiple types. Because of the huge amount of accumulated data, the INTEDE made it possible to generalize key features for revealing disease etiology and optimizing clinical treatment. INTEDE is freely accessible at: https://idrblab.org/intede/.",Interactome of Drug-Metabolizing Enzymes,INTEDE,https://idrblab.org/intede/,interactome of drug-metabolizing enzymes +33046717,Protein ontology on the semantic web for knowledge discovery.,"The Protein Ontology (PRO) provides an ontological representation of protein-related entities, ranging from protein families to proteoforms to complexes. Protein Ontology Linked Open Data (LOD) exposes, shares, and connects knowledge about protein-related entities on the Semantic Web using Resource Description Framework (RDF), thus enabling integration with other Linked Open Data for biological knowledge discovery. For example, proteins (or variants thereof) can be retrieved on the basis of specific disease associations. As a community resource, we strive to follow the Findability, Accessibility, Interoperability, and Reusability (FAIR) principles, disseminate regular updates of our data, support multiple methods for accessing, querying and downloading data in various formats, and provide documentation both for scientists and programmers. PRO Linked Open Data can be browsed via faceted browser interface and queried using SPARQL via YASGUI. RDF data dumps are also available for download. Additionally, we developed RESTful APIs to support programmatic data access. We also provide W3C HCLS specification compliant metadata description for our data. The PRO Linked Open Data is available at https://lod.proconsortium.org/ .",PRO Linked Open Data,,https://lod.proconsortium.org, +33051688,dbGuide: a database of functionally validated guide RNAs for genome editing in human and mouse cells.,"With the technology's accessibility and ease of use, CRISPR has been employed widely in many different organisms and experimental settings. As a result, thousands of publications have used CRISPR to make specific genetic perturbations, establishing in itself a resource of validated guide RNA sequences. While numerous computational tools to assist in the design and identification of candidate guide RNAs exist, these are still just at best predictions and generally, researchers inevitably will test multiple sequences for functional activity. Here, we present dbGuide (https://sgrnascorer.cancer.gov/dbguide), a database of functionally validated guide RNA sequences for CRISPR/Cas9-based knockout in human and mouse. Our database not only contains computationally determined candidate guide RNA sequences, but of even greater value, over 4000 sequences which have been functionally validated either through direct amplicon sequencing or manual curation of literature from over 1000 publications. Finally, our established framework will allow for continual addition of newly published and experimentally validated guide RNA sequences for CRISPR/Cas9-based knockout as well as incorporation of sequences from different gene editing systems, additional species and other types of site-specific functionalities such as base editing, gene activation, repression and epigenetic modification.",dbGuide,dbGuide,https://sgrnascorer.cancer.gov/dbguide,a database of functionally validated guide RNAs for genome editing in human and mouse cells +33053178,"DNAmoreDB, a database of DNAzymes.","Deoxyribozymes, DNA enzymes or simply DNAzymes are single-stranded oligo-deoxyribonucleotide molecules that, like proteins and ribozymes, possess the ability to perform catalysis. Although DNAzymes have not yet been found in living organisms, they have been isolated in the laboratory through in vitro selection. The selected DNAzyme sequences have the ability to catalyze a broad range of chemical reactions, utilizing DNA, RNA, peptides or small organic compounds as substrates. DNAmoreDB is a comprehensive database resource for DNAzymes that collects and organizes the following types of information: sequences, conditions of the selection procedure, catalyzed reactions, kinetic parameters, substrates, cofactors, structural information whenever available, and literature references. Currently, DNAmoreDB contains information about DNAzymes that catalyze 20 different reactions. We included a submission form for new data, a REST-based API system that allows users to retrieve the database contents in a machine-readable format, and keyword and BLASTN search features. The database is publicly available at https://www.genesilico.pl/DNAmoreDB/.",DNAmoreDB,DNAmoreDB,https://www.genesilico.pl/DNAmoreDB/,a database of DNAzymes +33068428,Comparative Toxicogenomics Database (CTD): update 2021.,"The public Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is an innovative digital ecosystem that relates toxicological information for chemicals, genes, phenotypes, diseases, and exposures to advance understanding about human health. Literature-based, manually curated interactions are integrated to create a knowledgebase that harmonizes cross-species heterogeneous data for chemical exposures and their biological repercussions. In this biennial update, we report a 20% increase in CTD curated content and now provide 45 million toxicogenomic relationships for over 16 300 chemicals, 51 300 genes, 5500 phenotypes, 7200 diseases and 163 000 exposure events, from 600 comparative species. Furthermore, we increase the functionality of chemical-phenotype content with new data-tabs on CTD Disease pages (to help fill in knowledge gaps for environmental health) and new phenotype search parameters (for Batch Query and Venn analysis tools). As well, we introduce new CTD Anatomy pages that allow users to uniquely explore and analyze chemical-phenotype interactions from an anatomical perspective. Finally, we have enhanced CTD Chemical pages with new literature-based chemical synonyms (to improve querying) and added 1600 amino acid-based compounds (to increase chemical landscape). Together, these updates continue to augment CTD as a powerful resource for generating testable hypotheses about the etiologies and molecular mechanisms underlying environmentally influenced diseases.",Comparative Toxicogenomics Database,CTD,http://ctdbase.org/,"an innovative digital ecosystem that relates toxicological information for chemicals, genes, phenotypes, diseases, and exposures to advance understanding about human health" +33068433,CovalentInDB: a comprehensive database facilitating the discovery of covalent inhibitors.,"Inhibitors that form covalent bonds with their targets have traditionally been considered highly adventurous due to their potential off-target effects and toxicity concerns. However, with the clinical validation and approval of many covalent inhibitors during the past decade, design and discovery of novel covalent inhibitors have attracted increasing attention. A large amount of scattered experimental data for covalent inhibitors have been reported, but a resource by integrating the experimental information for covalent inhibitor discovery is still lacking. In this study, we presented Covalent Inhibitor Database (CovalentInDB), the largest online database that provides the structural information and experimental data for covalent inhibitors. CovalentInDB contains 4511 covalent inhibitors (including 68 approved drugs) with 57 different reactive warheads for 280 protein targets. The crystal structures of some of the proteins bound with a covalent inhibitor are provided to visualize the protein-ligand interactions around the binding site. Each covalent inhibitor is annotated with the structure, warhead, experimental bioactivity, physicochemical properties, etc. Moreover, CovalentInDB provides the covalent reaction mechanism and the corresponding experimental verification methods for each inhibitor towards its target. High-quality datasets are downloadable for users to evaluate and develop computational methods for covalent drug design. CovalentInDB is freely accessible at http://cadd.zju.edu.cn/cidb/.",Covalent Inhibitor Database,CovalentInDB,http://cadd.zju.edu.cn/cidb/,a comprehensive database facilitating the discovery of covalent inhibitors +33074314,TransCirc: an interactive database for translatable circular RNAs based on multi-omics evidence.,"TransCirc (https://www.biosino.org/transcirc/) is a specialized database that provide comprehensive evidences supporting the translation potential of circular RNAs (circRNAs). This database was generated by integrating various direct and indirect evidences to predict coding potential of each human circRNA and the putative translation products. Seven types of evidences for circRNA translation were included: (i) ribosome/polysome binding evidences supporting the occupancy of ribosomes onto circRNAs; (ii) experimentally mapped translation initiation sites on circRNAs; (iii) internal ribosome entry site on circRNAs; (iv) published N-6-methyladenosine modification data in circRNA that promote translation initiation; (v) lengths of the circRNA specific open reading frames; (vi) sequence composition scores from a machine learning prediction of all potential open reading frames; (vii) mass spectrometry data that directly support the circRNA encoded peptides across back-splice junctions. TransCirc provides a user-friendly searching/browsing interface and independent lines of evidences to predicte how likely a circRNA can be translated. In addition, several flexible tools have been developed to aid retrieval and analysis of the data. TransCirc can serve as an important resource for investigating the translation capacity of circRNAs and the potential circRNA-encoded peptides, and can be expanded to include new evidences or additional species in the future.",TransCirc,TransCirc,https://www.biosino.org/transcirc/,an interactive database for translatable circular RNAs based on multi-omics evidence +33074547,"Usage of the Sea Urchin Hemicentrotus pulcherrimus Database, HpBase.","HpBase ( http://cell-innovation.nig.ac.jp/Hpul/ ) is a database that provides genome and transcriptome resources of the sea urchin Hemicentrotus pulcherrimus. In addition to downloading the bulk data, several analysis tools for resource use are available: gene search, homology search, and genome browsing. HpBase also discloses the protocols for biological experiments using H. pulcherrimus that have been accumulated so far. Therefore, HpBase can assist efficient use of genome resources for researchers from various fields-evolutionary, developmental, and cell biology. In this chapter we present an overview and usage of tools in HpBase.",HpBase,HpBase,http://cell-innovation.nig.ac.jp/Hpul,a database that provides genome and transcriptome resources of the sea urchin Hemicentrotus pulcherrimus +33076954,Predicted functional interactome of Caenorhabditis elegans and a web tool for the functional interpretation of differentially expressed genes.,"

Background

The nematode worm, Caenorhabditis elegans, is a saprophytic species that has been emerging as a standard model organism since the early 1960s. This species is useful in numerous fields, including developmental biology, neurobiology, and ageing. A high-quality comprehensive molecular interaction network is needed to facilitate molecular mechanism studies in C. elegans.

Results

We present the predicted functional interactome of Caenorhabditis elegans (FIC), which integrates functional association data from 10 public databases to infer functional gene interactions on diverse functional perspectives. In this work, FIC includes 108,550 putative functional associations with balanced sensitivity and specificity, which are expected to cover 21.42% of all C. elegans protein interactions, and 29.25% of these associations may represent protein interactions. Based on FIC, we developed a gene set linkage analysis (GSLA) web tool to interpret potential functional impacts from a set of differentially expressed genes observed in transcriptome analyses.

Conclusion

We present the predicted C. elegans interactome database FIC, which is a high-quality database of predicted functional interactions among genes. The functional interactions in FIC serve as a good reference interactome for GSLA to annotate differentially expressed genes for their potential functional impacts. In a case study, the FIC/GSLA system shows more comprehensive and concise annotations compared to other widely used gene set annotation tools, including PANTHER and DAVID. FIC and its associated GSLA are available at the website http://worm.biomedtzc.cn .",functional interactome of Caenorhabditis elegans,FIC,http://worm.biomedtzc.cn, +33079988,The Dark Kinase Knowledgebase: an online compendium of knowledge and experimental results of understudied kinases.,"Kinases form the backbone of numerous cell signaling pathways, with their dysfunction similarly implicated in multiple pathologies. Further facilitated by their druggability, kinases are a major focus of therapeutic development efforts in diseases such as cancer, infectious disease and autoimmune disorders. While their importance is clear, the role or biological function of nearly one-third of kinases is largely unknown. Here, we describe a data resource, the Dark Kinase Knowledgebase (DKK; https://darkkinome.org), that is specifically focused on providing data and reagents for these understudied kinases to the broader research community. Supported through NIH's Illuminating the Druggable Genome (IDG) Program, the DKK is focused on data and knowledge generation for 162 poorly studied or 'dark' kinases. Types of data provided through the DKK include parallel reaction monitoring (PRM) peptides for quantitative proteomics, protein interactions, NanoBRET reagents, and kinase-specific compounds. Higher-level data is similarly being generated and consolidated such as tissue gene expression profiles and, longer-term, functional relationships derived through perturbation studies. Associated web tools that help investigators interrogate both internal and external data are also provided through the site. As an evolving resource, the DKK seeks to continually support and enhance knowledge on these potentially high-impact druggable targets.",Dark Kinase Knowledgebase,DKK,https://darkkinome.org,an online compendium of knowledge and experimental results of understudied kinases +33079992,PLncDB V2.0: a comprehensive encyclopedia of plant long noncoding RNAs.,"Long noncoding RNAs (lncRNAs) are transcripts longer than 200 nucleotides with little or no protein coding potential. The expanding list of lncRNAs and accumulating evidence of their functions in plants have necessitated the creation of a comprehensive database for lncRNA research. However, currently available plant lncRNA databases have some deficiencies, including the lack of lncRNA data from some model plants, uneven annotation standards, a lack of visualization for expression patterns, and the absence of epigenetic information. To overcome these problems, we upgraded our Plant Long noncoding RNA Database (PLncDB, http://plncdb.tobaccodb.org/), which was based on a uniform annotation pipeline. PLncDB V2.0 currently contains 1 246 372 lncRNAs for 80 plant species based on 13 834 RNA-Seq datasets, integrating lncRNA information from four other resources including EVLncRNAs, RNAcentral and etc. Expression patterns and epigenetic signals can be visualized using multiple tools (JBrowse, eFP Browser and EPexplorer). Targets and regulatory networks for lncRNAs are also provided for function exploration. In addition, PLncDB V2.0 is hierarchical and user-friendly and has five built-in search engines. We believe PLncDB V2.0 is useful for the plant lncRNA community and data mining studies and provides a comprehensive resource for data-driven lncRNA research in plants.",Plant Long noncoding RNA Database,PLncDB,http://plncdb.tobaccodb.org/,a comprehensive encyclopedia of plant long noncoding RNAs +33080028,Peryton: a manual collection of experimentally supported microbe-disease associations.,"We present Peryton (https://dianalab.e-ce.uth.gr/peryton/), a database of experimentally supported microbe-disease associations. Its first version constitutes a novel resource hosting more than 7900 entries linking 43 diseases with 1396 microorganisms. Peryton's content is exclusively sustained by manual curation of biomedical articles. Diseases and microorganisms are provided in a systematic, standardized manner using reference resources to create database dictionaries. Information about the experimental design, study cohorts and the applied high- or low-throughput techniques is meticulously annotated and catered to users. Several functionalities are provided to enhance user experience and enable ingenious use of Peryton. One or more microorganisms and/or diseases can be queried at the same time. Advanced filtering options and direct text-based filtering of results enable refinement of returned information and the conducting of tailored queries suitable to different research questions. Peryton also provides interactive visualizations to effectively capture different aspects of its content and results can be directly downloaded for local storage and downstream analyses. Peryton will serve as a valuable source, enabling scientists of microbe-related disease fields to form novel hypotheses but, equally importantly, to assist in cross-validation of findings.",Peryton,Peryton,https://dianalab.e-ce.uth.gr/peryton/,a manual collection of experimentally supported microbe-disease associations +33084889,KLIFS: an overhaul after the first 5 years of supporting kinase research.,"Kinases are a prime target of drug development efforts with >60 drug approvals in the past two decades. Due to the research into this protein family, a wealth of data has been accumulated that keeps on growing. KLIFS-Kinase-Ligand Interaction Fingerprints and Structures-is a structural database focusing on how kinase inhibitors interact with their targets. The aim of KLIFS is to support (structure-based) kinase research through the systematic collection, annotation, and processing of kinase structures. Now, 5 years after releasing the initial KLIFS website, the database has undergone a complete overhaul with a new website, new logo, and new functionalities. In this article, we start by looking back at how KLIFS has been used by the research community, followed by a description of the renewed KLIFS, and conclude with showcasing the functionalities of KLIFS. Major changes include the integration of approved drugs and inhibitors in clinical trials, extension of the coverage to atypical kinases, and a RESTful API for programmatic access. KLIFS is available at the new domain https://klifs.net.",Kinase-Ligand Interaction Fingerprints and Structures,KLIFS,https://klifs.net,a structural database focusing on how kinase inhibitors interact with their targets +33084904,DualSeqDB: the host-pathogen dual RNA sequencing database for infection processes.,"Despite antibiotic resistance being a matter of growing concern worldwide, the bacterial mechanisms of pathogenesis remain underexplored, restraining our ability to develop new antimicrobials. The rise of high-throughput sequencing technology has made available a massive amount of transcriptomic data that could help elucidate the mechanisms underlying bacterial infection. Here, we introduce the DualSeqDB database, a resource that helps the identification of gene transcriptional changes in both pathogenic bacteria and their natural hosts upon infection. DualSeqDB comprises nearly 300 000 entries from eight different studies, with information on bacterial and host differential gene expression under in vivo and in vitro conditions. Expression data values were calculated entirely from raw data and analyzed through a standardized pipeline to ensure consistency between different studies. It includes information on seven different strains of pathogenic bacteria and a variety of cell types and tissues in Homo sapiens, Mus musculus and Macaca fascicularis at different time points. We envisage that DualSeqDB can help the research community in the systematic characterization of genes involved in host infection and help the development and tailoring of new molecules against infectious diseases. DualSeqDB is freely available at http://www.tartaglialab.com/dualseq.",DualSeqDB,DualSeqDB,http://www.tartaglialab.com/dualseq,the host-pathogen dual RNA sequencing database for infection processes +33084905,MeDAS: a Metazoan Developmental Alternative Splicing database.,"Alternative splicing is widespread throughout eukaryotic genomes and greatly increases transcriptomic diversity. Many alternative isoforms have functional roles in developmental processes and are precisely temporally regulated. To facilitate the study of alternative splicing in a developmental context, we created MeDAS, a Metazoan Developmental Alternative Splicing database. MeDAS is an added-value resource that re-analyses publicly archived RNA-seq libraries to provide quantitative data on alternative splicing events as they vary across the time course of development. It has broad temporal and taxonomic scope and is intended to assist the user in identifying trends in alternative splicing throughout development. To create MeDAS, we re-analysed a curated set of 2232 Illumina polyA+ RNA-seq libraries that chart detailed time courses of embryonic and post-natal development across 18 species with a taxonomic range spanning the major metazoan lineages from Caenorhabditis elegans to human. MeDAS is freely available at https://das.chenlulab.com both as raw data tables and as an interactive browser allowing searches by species, tissue, or genomic feature (gene, transcript or exon ID and sequence). Results will provide details on alternative splicing events identified for the queried feature and can be visualised at the gene-, transcript- and exon-level as time courses of expression and inclusion levels, respectively.",Metazoan Developmental Alternative Splicing database,MeDAS,https://das.chenlulab.com,a Metazoan Developmental Alternative Splicing database +33086069,SCLC-CellMiner: A Resource for Small Cell Lung Cancer Cell Line Genomics and Pharmacology Based on Genomic Signatures.,"CellMiner-SCLC (https://discover.nci.nih.gov/SclcCellMinerCDB/) integrates drug sensitivity and genomic data, including high-resolution methylome and transcriptome from 118 patient-derived small cell lung cancer (SCLC) cell lines, providing a resource for research into this ""recalcitrant cancer."" We demonstrate the reproducibility and stability of data from multiple sources and validate the SCLC consensus nomenclature on the basis of expression of master transcription factors NEUROD1, ASCL1, POU2F3, and YAP1. Our analyses reveal transcription networks linking SCLC subtypes with MYC and its paralogs and the NOTCH and HIPPO pathways. SCLC subsets express specific surface markers, providing potential opportunities for antibody-based targeted therapies. YAP1-driven SCLCs are notable for differential expression of the NOTCH pathway, epithelial-mesenchymal transition (EMT), and antigen-presenting machinery (APM) genes and sensitivity to mTOR and AKT inhibitors. These analyses provide insights into SCLC biology and a framework for future investigations into subtype-specific SCLC vulnerabilities.",SCLC-CellMiner,SCLC-CellMiner,https://discover.nci.nih.gov/SclcCellMinerCDB/,A Resource for Small Cell Lung Cancer Cell Line Genomics and Pharmacology Based on Genomic Signatures +33086069,SCLC-CellMiner: A Resource for Small Cell Lung Cancer Cell Line Genomics and Pharmacology Based on Genomic Signatures.,"CellMiner-SCLC (https://discover.nci.nih.gov/SclcCellMinerCDB/) integrates drug sensitivity and genomic data, including high-resolution methylome and transcriptome from 118 patient-derived small cell lung cancer (SCLC) cell lines, providing a resource for research into this ""recalcitrant cancer."" We demonstrate the reproducibility and stability of data from multiple sources and validate the SCLC consensus nomenclature on the basis of expression of master transcription factors NEUROD1, ASCL1, POU2F3, and YAP1. Our analyses reveal transcription networks linking SCLC subtypes with MYC and its paralogs and the NOTCH and HIPPO pathways. SCLC subsets express specific surface markers, providing potential opportunities for antibody-based targeted therapies. YAP1-driven SCLCs are notable for differential expression of the NOTCH pathway, epithelial-mesenchymal transition (EMT), and antigen-presenting machinery (APM) genes and sensitivity to mTOR and AKT inhibitors. These analyses provide insights into SCLC biology and a framework for future investigations into subtype-specific SCLC vulnerabilities.",CellMiner-SCLC,CellMiner-SCLC,https://discover.nci.nih.gov/SclcCellMinerCDB/,A Resource for Small Cell Lung Cancer Cell Line Genomics and Pharmacology Based on Genomic Signatures +33095860,CNCDatabase: a database of non-coding cancer drivers.,"Most mutations in cancer genomes occur in the non-coding regions with unknown impact on tumor development. Although the increase in the number of cancer whole-genome sequences has revealed numerous putative non-coding cancer drivers, their information is dispersed across multiple studies making it difficult to understand their roles in tumorigenesis of different cancer types. We have developed CNCDatabase, Cornell Non-coding Cancer driver Database (https://cncdatabase.med.cornell.edu/) that contains detailed information about predicted non-coding drivers at gene promoters, 5' and 3' UTRs (untranslated regions), enhancers, CTCF insulators and non-coding RNAs. CNCDatabase documents 1111 protein-coding genes and 90 non-coding RNAs with reported drivers in their non-coding regions from 32 cancer types by computational predictions of positive selection using whole-genome sequences; differential gene expression in samples with and without mutations; or another set of experimental validations including luciferase reporter assays and genome editing. The database can be easily modified and scaled as lists of non-coding drivers are revised in the community with larger whole-genome sequencing studies, CRISPR screens and further experimental validations. Overall, CNCDatabase provides a helpful resource for researchers to explore the pathological role of non-coding alterations in human cancers.",Cornell Non-coding Cancer driver Database,CNCDatabase,https://cncdatabase.med.cornell.edu/,a database of non-coding cancer drivers +33095866,VARAdb: a comprehensive variation annotation database for human.,"With the study of human diseases and biological processes increasing, a large number of non-coding variants have been identified and facilitated. The rapid accumulation of genetic and epigenomic information has resulted in an urgent need to collect and process data to explore the regulation of non-coding variants. Here, we developed a comprehensive variation annotation database for human (VARAdb, http://www.licpathway.net/VARAdb/), which specifically considers non-coding variants. VARAdb provides annotation information for 577,283,813 variations and novel variants, prioritizes variations based on scores using nine annotation categories, and supports pathway downstream analysis. Importantly, VARAdb integrates a large amount of genetic and epigenomic data into five annotation sections, which include 'Variation information', 'Regulatory information', 'Related genes', 'Chromatin accessibility' and 'Chromatin interaction'. The detailed annotation information consists of motif changes, risk SNPs, LD SNPs, eQTLs, clinical variant-drug-gene pairs, sequence conservation, somatic mutations, enhancers, super enhancers, promoters, transcription factors, chromatin states, histone modifications, chromatin accessibility regions and chromatin interactions. This database is a user-friendly interface to query, browse and visualize variations and related annotation information. VARAdb is a useful resource for selecting potential functional variations and interpreting their effects on human diseases and biological processes.",variation annotation database for human,VARAdb,http://www.licpathway.net/VARAdb/,a comprehensive variation annotation database for human +33095885,IndiGenomes: a comprehensive resource of genetic variants from over 1000 Indian genomes.,"With the advent of next-generation sequencing, large-scale initiatives for mining whole genomes and exomes have been employed to better understand global or population-level genetic architecture. India encompasses more than 17% of the world population with extensive genetic diversity, but is under-represented in the global sequencing datasets. This gave us the impetus to perform and analyze the whole genome sequencing of 1029 healthy Indian individuals under the pilot phase of the 'IndiGen' program. We generated a compendium of 55,898,122 single allelic genetic variants from geographically distinct Indian genomes and calculated the allele frequency, allele count, allele number, along with the number of heterozygous or homozygous individuals. In the present study, these variants were systematically annotated using publicly available population databases and can be accessed through a browsable online database named as 'IndiGenomes' http://clingen.igib.res.in/indigen/. The IndiGenomes database will help clinicians and researchers in exploring the genetic component underlying medical conditions. Till date, this is the most comprehensive genetic variant resource for the Indian population and is made freely available for academic utility. The resource has also been accessed extensively by the worldwide community since it's launch.",IndiGenomes,IndiGenomes,http://clingen.igib.res.in/indigen/,a comprehensive resource of genetic variants from over 1000 Indian genomes +33104772,The mouse Gene Expression Database (GXD): 2021 update.,"The Gene Expression Database (GXD; www.informatics.jax.org/expression.shtml) is an extensive and well-curated community resource of mouse developmental gene expression information. For many years, GXD has collected and integrated data from RNA in situ hybridization, immunohistochemistry, RT-PCR, northern blot, and western blot experiments through curation of the scientific literature and by collaborations with large-scale expression projects. Since our last report in 2019, we have continued to acquire these classical types of expression data; developed a searchable index of RNA-Seq and microarray experiments that allows users to quickly and reliably find specific mouse expression studies in ArrayExpress (https://www.ebi.ac.uk/arrayexpress/) and GEO (https://www.ncbi.nlm.nih.gov/geo/); and expanded GXD to include RNA-Seq data. Uniformly processed RNA-Seq data are imported from the EBI Expression Atlas and then integrated with the other types of expression data in GXD, and with the genetic, functional, phenotypic and disease-related information in Mouse Genome Informatics (MGI). This integration has made the RNA-Seq data accessible via GXD's enhanced searching and filtering capabilities. Further, we have embedded the Morpheus heat map utility into the GXD user interface to provide additional tools for display and analysis of RNA-Seq data, including heat map visualization, sorting, filtering, hierarchical clustering, nearest neighbors analysis and visual enrichment.",mouse Gene Expression Database,GXD,https://www.ebi.ac.uk/arrayexpress/,an extensive and well-curated community resource of mouse developmental gene expression information +33106848,"RNAcentral 2021: secondary structure integration, improved sequence search and new member databases.","RNAcentral is a comprehensive database of non-coding RNA (ncRNA) sequences that provides a single access point to 44 RNA resources and >18 million ncRNA sequences from a wide range of organisms and RNA types. RNAcentral now also includes secondary (2D) structure information for >13 million sequences, making RNAcentral the world's largest RNA 2D structure database. The 2D diagrams are displayed using R2DT, a new 2D structure visualization method that uses consistent, reproducible and recognizable layouts for related RNAs. The sequence similarity search has been updated with a faster interface featuring facets for filtering search results by RNA type, organism, source database or any keyword. This sequence search tool is available as a reusable web component, and has been integrated into several RNAcentral member databases, including Rfam, miRBase and snoDB. To allow for a more fine-grained assignment of RNA types and subtypes, all RNAcentral sequences have been annotated with Sequence Ontology terms. The RNAcentral database continues to grow and provide a central data resource for the RNA community. RNAcentral is freely available at https://rnacentral.org.",RNAcentral,RNAcentral,https://rnacentral.org,"secondary structure integration, improved sequence search and new member databases" +33112702,LncRBase V.2: an updated resource for multispecies lncRNAs and ClinicLSNP hosting genetic variants in lncRNAs for cancer patients.,"The recent discovery of long non-coding RNA as a regulatory molecule in the cellular system has altered the concept of the functional aptitude of the genome. Since our publication of the first version of LncRBase in 2014, there has been an enormous increase in the number of annotated lncRNAs of multiple species other than Human and Mouse. LncRBase V.2 hosts information of 549,648 lncRNAs corresponding to six additional species besides Human and Mouse, viz. Rat, Fruitfly, Zebrafish, Chicken, Cow and C.elegans. It provides additional distinct features such as (i) Transcription Factor Binding Site (TFBS) in the lncRNA promoter region, (ii) sub-cellular localization pattern of lncRNAs (iii) lnc-pri-miRNAs (iv) Possible small open reading frames (sORFs) within lncRNA. (v) Manually curated information of interacting target molecules and disease association of lncRNA genes (vi) Distribution of lncRNAs across multiple tissues of all species. Moreover, we have hosted ClinicLSNP within LncRBase V.2. ClinicLSNP has a comprehensive catalogue of lncRNA variants present within breast, ovarian, and cervical cancer inferred from 561 RNA-Seq data corresponding to these cancers. Further, we have checked whether these lncRNA variants overlap with (i)Repeat elements,(ii)CGI, (iii)TFBS within lncRNA loci (iv)SNP localization in trait-associated Linkage Disequilibrium(LD) region, (v)predicted the potentially pathogenic variants and (vi)effect of SNP on lncRNA secondary structure. Overall, LncRBaseV.2 is a user-friendly database to survey, search and retrieve information about multi-species lncRNAs. Further, ClinicLSNP will serve as a useful resource for cancer specific lncRNA variants and their related information. The database is freely accessible and available at http://dibresources.jcbose.ac.in/zhumur/lncrbase2/.",LncRBase V.2,LncRBase,http://dibresources.jcbose.ac.in/zhumur/lncrbase2/,an updated resource for multispecies lncRNAs and ClinicLSNP hosting genetic variants in lncRNAs for cancer patients +33119751,The MemMoRF database for recognizing disordered protein regions interacting with cellular membranes.,"Protein and lipid membrane interactions play fundamental roles in a large number of cellular processes (e.g. signalling, vesicle trafficking, or viral invasion). A growing number of examples indicate that such interactions can also rely on intrinsically disordered protein regions (IDRs), which can form specific reversible interactions not only with proteins but also with lipids. We named IDRs involved in such membrane lipid-induced disorder-to-order transition as MemMoRFs, in an analogy to IDRs exhibiting disorder-to-order transition upon interaction with protein partners termed Molecular Recognition Features (MoRFs). Currently, both the experimental detection and computational characterization of MemMoRFs are challenging, and information about these regions are scattered in the literature. To facilitate the related investigations we generated a comprehensive database of experimentally validated MemMoRFs based on manual curation of literature and structural data. To characterize the dynamics of MemMoRFs, secondary structure propensity and flexibility calculated from nuclear magnetic resonance chemical shifts were incorporated into the database. These data were supplemented by inclusion of sentences from papers, functional data and disease-related information. The MemMoRF database can be accessed via a user-friendly interface at https://memmorf.hegelab.org, potentially providing a central resource for the characterization of disordered regions in transmembrane and membrane-associated proteins.",MemMoRFs,MemMoRFs,https://memmorf.hegelab.org,database for recognizing disordered protein regions interacting with cellular membranes +33125078,Pfam: The protein families database in 2021.,"The Pfam database is a widely used resource for classifying protein sequences into families and domains. Since Pfam was last described in this journal, over 350 new families have been added in Pfam 33.1 and numerous improvements have been made to existing entries. To facilitate research on COVID-19, we have revised the Pfam entries that cover the SARS-CoV-2 proteome, and built new entries for regions that were not covered by Pfam. We have reintroduced Pfam-B which provides an automatically generated supplement to Pfam and contains 136 730 novel clusters of sequences that are not yet matched by a Pfam family. The new Pfam-B is based on a clustering by the MMseqs2 software. We have compared all of the regions in the RepeatsDB to those in Pfam and have started to use the results to build and refine Pfam repeat families. Pfam is freely available for browsing and download at http://pfam.xfam.org/.",Pfam 33.1,Pfam,http://pfam.xfam.org/,The protein families database in 2021 +33125081,KEGG: integrating viruses and cellular organisms.,"KEGG (https://www.kegg.jp/) is a manually curated resource integrating eighteen databases categorized into systems, genomic, chemical and health information. It also provides KEGG mapping tools, which enable understanding of cellular and organism-level functions from genome sequences and other molecular datasets. KEGG mapping is a predictive method of reconstructing molecular network systems from molecular building blocks based on the concept of functional orthologs. Since the introduction of the KEGG NETWORK database, various diseases have been associated with network variants, which are perturbed molecular networks caused by human gene variants, viruses, other pathogens and environmental factors. The network variation maps are created as aligned sets of related networks showing, for example, how different viruses inhibit or activate specific cellular signaling pathways. The KEGG pathway maps are now integrated with network variation maps in the NETWORK database, as well as with conserved functional units of KEGG modules and reaction modules in the MODULE database. The KO database for functional orthologs continues to be improved and virus KOs are being expanded for better understanding of virus-cell interactions and for enabling prediction of viral perturbations.",KEGG,KEGG,https://www.kegg.jp/,"a manually curated resource integrating eighteen databases categorized into systems, genomic, chemical and health information" +33137173,Global Substance Registration System: consistent scientific descriptions for substances related to health.,"The US Food and Drug Administration (FDA) and the National Center for Advancing Translational Sciences (NCATS) have collaborated to publish rigorous scientific descriptions of substances relevant to regulated products. The FDA has adopted the global ISO 11238 data standard for the identification of substances in medicinal products and has populated a database to organize the agency's regulatory submissions and marketed products data. NCATS has worked with FDA to develop the Global Substance Registration System (GSRS) and produce a non-proprietary version of the database for public benefit. In 2019, more than half of all new drugs in clinical development were proteins, nucleic acid therapeutics, polymer products, structurally diverse natural products or cellular therapies. While multiple databases of small molecule chemical structures are available, this resource is unique in its application of regulatory standards for the identification of medicinal substances and its robust support for other substances in addition to small molecules. This public, manually curated dataset provides unique ingredient identifiers (UNIIs) and detailed descriptions for over 100 000 substances that are particularly relevant to medicine and translational research. The dataset can be accessed and queried at https://gsrs.ncats.nih.gov/app/substances.",Global Substance Registration System,GSRS,https://gsrs.ncats.nih.gov/app/substances,consistent scientific descriptions for substances related to health +33137183,IMG/VR v3: an integrated ecological and evolutionary framework for interrogating genomes of uncultivated viruses.,"Viruses are integral components of all ecosystems and microbiomes on Earth. Through pervasive infections of their cellular hosts, viruses can reshape microbial community structure and drive global nutrient cycling. Over the past decade, viral sequences identified from genomes and metagenomes have provided an unprecedented view of viral genome diversity in nature. Since 2016, the IMG/VR database has provided access to the largest collection of viral sequences obtained from (meta)genomes. Here, we present the third version of IMG/VR, composed of 18 373 cultivated and 2 314 329 uncultivated viral genomes (UViGs), nearly tripling the total number of sequences compared to the previous version. These clustered into 935 362 viral Operational Taxonomic Units (vOTUs), including 188 930 with two or more members. UViGs in IMG/VR are now reported as single viral contigs, integrated proviruses or genome bins, and are annotated with a new standardized pipeline including genome quality estimation using CheckV, taxonomic classification reflecting the latest ICTV update, and expanded host taxonomy prediction. The new IMG/VR interface enables users to efficiently browse, search, and select UViGs based on genome features and/or sequence similarity. IMG/VR v3 is available at https://img.jgi.doe.gov/vr, and the underlying data are available to download at https://genome.jgi.doe.gov/portal/IMG_VR.",IMG/VR database,IMG/VR,https://img.jgi.doe.gov/vr,an integrated ecological and evolutionary framework for interrogating genomes of uncultivated viruses +33137190,Ensembl 2021.,"The Ensembl project (https://www.ensembl.org) annotates genomes and disseminates genomic data for vertebrate species. We create detailed and comprehensive annotation of gene structures, regulatory elements and variants, and enable comparative genomics by inferring the evolutionary history of genes and genomes. Our integrated genomic data are made available in a variety of ways, including genome browsers, search interfaces, specialist tools such as the Ensembl Variant Effect Predictor, download files and programmatic interfaces. Here, we present recent Ensembl developments including two new website portals. Ensembl Rapid Release (http://rapid.ensembl.org) is designed to provide core tools and services for genomes as soon as possible and has been deployed to support large biodiversity sequencing projects. Our SARS-CoV-2 genome browser (https://covid-19.ensembl.org) integrates our own annotation with publicly available genomic data from numerous sources to facilitate the use of genomics in the international scientific response to the COVID-19 pandemic. We also report on other updates to our annotation resources, tools and services. All Ensembl data and software are freely available without restriction.",SARS-CoV-2 genome browser,,https://covid-19.ensembl.org, +33137190,Ensembl 2021.,"The Ensembl project (https://www.ensembl.org) annotates genomes and disseminates genomic data for vertebrate species. We create detailed and comprehensive annotation of gene structures, regulatory elements and variants, and enable comparative genomics by inferring the evolutionary history of genes and genomes. Our integrated genomic data are made available in a variety of ways, including genome browsers, search interfaces, specialist tools such as the Ensembl Variant Effect Predictor, download files and programmatic interfaces. Here, we present recent Ensembl developments including two new website portals. Ensembl Rapid Release (http://rapid.ensembl.org) is designed to provide core tools and services for genomes as soon as possible and has been deployed to support large biodiversity sequencing projects. Our SARS-CoV-2 genome browser (https://covid-19.ensembl.org) integrates our own annotation with publicly available genomic data from numerous sources to facilitate the use of genomics in the international scientific response to the COVID-19 pandemic. We also report on other updates to our annotation resources, tools and services. All Ensembl data and software are freely available without restriction.",Ensembl project,Ensembl,https://www.ensembl.org, +33137192,Plant-ImputeDB: an integrated multiple plant reference panel database for genotype imputation.,"Genotype imputation is a process that estimates missing genotypes in terms of the haplotypes and genotypes in a reference panel. It can effectively increase the density of single nucleotide polymorphisms (SNPs), boost the power to identify genetic association and promote the combination of genetic studies. However, there has been a lack of high-quality reference panels for most plants, which greatly hinders the application of genotype imputation. Here, we developed Plant-ImputeDB (http://gong_lab.hzau.edu.cn/Plant_imputeDB/), a comprehensive database with reference panels of 12 plant species for online genotype imputation, SNP and block search and free download. By integrating genotype data and whole-genome resequencing data of plants from various studies and databases, the current Plant-ImputeDB provides high-quality reference panels of 12 plant species, including ∼69.9 million SNPs from 34 244 samples. It also provides an easy-to-use online tool with the option of two popular tools specifically designed for genotype imputation. In addition, Plant-ImputeDB accepts submissions of different types of genomic variations, and provides free and open access to all publicly available data in support of related research worldwide. In general, Plant-ImputeDB may serve as an important resource for plant genotype imputation and greatly facilitate the research on plant genetic research.",Plant-ImputeDB,Plant-ImputeDB,http://gong_lab.hzau.edu.cn/Plant_imputeDB/,an integrated multiple plant reference panel database for genotype imputation +33147626,CellTalkDB: a manually curated database of ligand-receptor interactions in humans and mice.,"Cell-cell communications in multicellular organisms generally involve secreted ligand-receptor (LR) interactions, which is vital for various biological phenomena. Recent advancements in single-cell RNA sequencing (scRNA-seq) have effectively resolved cellular phenotypic heterogeneity and the cell-type composition of complex tissues, facilitating the systematic investigation of cell-cell communications at single-cell resolution. However, assessment of chemical-signal-dependent cell-cell communication through scRNA-seq relies heavily on prior knowledge of LR interaction pairs. We constructed CellTalkDB (http://tcm.zju.edu.cn/celltalkdb), a manually curated comprehensive database of LR interaction pairs in humans and mice comprising 3398 human LR pairs and 2033 mouse LR pairs, through text mining and manual verification of known protein-protein interactions using the STRING database, with literature-supported evidence for each pair. Compared with SingleCellSignalR, the largest LR-pair resource, CellTalkDB includes not only 2033 mouse LR pairs but also 377 additional human LR pairs. In conclusion, the data on human and mouse LR pairs contained in CellTalkDB could help to further the inference and understanding of the LR-interaction-based cell-cell communications, which might provide new insights into the mechanism underlying biological processes.",CellTalkDB,CellTalkDB,http://tcm.zju.edu.cn/celltalkdb,a manually curated database of ligand-receptor interactions in humans and mice +33151287,DrugCentral 2021 supports drug discovery and repositioning.,"DrugCentral is a public resource (http://drugcentral.org) that serves the scientific community by providing up-to-date drug information, as described in previous papers. The current release includes 109 newly approved (October 2018 through March 2020) active pharmaceutical ingredients in the US, Europe, Japan and other countries; and two molecular entities (e.g. mefuparib) of interest for COVID19. New additions include a set of pharmacokinetic properties for ∼1000 drugs, and a sex-based separation of side effects, processed from FAERS (FDA Adverse Event Reporting System); as well as a drug repositioning prioritization scheme based on the market availability and intellectual property rights forFDA approved drugs. In the context of the COVID19 pandemic, we also incorporated REDIAL-2020, a machine learning platform that estimates anti-SARS-CoV-2 activities, as well as the 'drugs in news' feature offers a brief enumeration of the most interesting drugs at the present moment. The full database dump and data files are available for download from the DrugCentral web portal.",DrugCentral,DrugCentral,http://drugcentral.org, +33151298,GRNdb: decoding the gene regulatory networks in diverse human and mouse conditions.,"Gene regulatory networks (GRNs) formed by transcription factors (TFs) and their downstream target genes play essential roles in gene expression regulation. Moreover, GRNs can be dynamic changing across different conditions, which are crucial for understanding the underlying mechanisms of disease pathogenesis. However, no existing database provides comprehensive GRN information for various human and mouse normal tissues and diseases at the single-cell level. Based on the known TF-target relationships and the large-scale single-cell RNA-seq data collected from public databases as well as the bulk data of The Cancer Genome Atlas and the Genotype-Tissue Expression project, we systematically predicted the GRNs of 184 different physiological and pathological conditions of human and mouse involving >633 000 cells and >27 700 bulk samples. We further developed GRNdb, a freely accessible and user-friendly database (http://www.grndb.com/) for searching, comparing, browsing, visualizing, and downloading the predicted information of 77 746 GRNs, 19 687 841 TF-target pairs, and related binding motifs at single-cell/bulk resolution. GRNdb also allows users to explore the gene expression profile, correlations, and the associations between expression levels and the patient survival of diverse cancers. Overall, GRNdb provides a valuable and timely resource to the scientific community to elucidate the functions and mechanisms of gene expression regulation in various conditions.",GRNdb,GRNdb,http://www.grndb.com/, +33152070,Genenames.org: the HGNC and VGNC resources in 2021.,"The HUGO Gene Nomenclature Committee (HGNC) based at EMBL's European Bioinformatics Institute (EMBL-EBI) assigns unique symbols and names to human genes. There are over 42,000 approved gene symbols in our current database of which over 19 000 are for protein-coding genes. While we still update placeholder and problematic symbols, we are working towards stabilizing symbols where possible; over 2000 symbols for disease associated genes are now marked as stable in our symbol reports. All of our data is available at the HGNC website https://www.genenames.org. The Vertebrate Gene Nomenclature Committee (VGNC) was established to assign standardized nomenclature in line with human for vertebrate species lacking their own nomenclature committee. In addition to the previous VGNC core species of chimpanzee, cow, horse and dog, we now name genes in cat, macaque and pig. Gene groups have been added to VGNC and currently include two complex families: olfactory receptors (ORs) and cytochrome P450s (CYPs). In collaboration with specialists we have also named CYPs in species beyond our core set. All VGNC data is available at https://vertebrate.genenames.org/. This article provides an overview of our online data and resources, focusing on updates over the last two years.",Genenames.org,,https://www.genenames.org, +33156326,MetaNetX/MNXref: unified namespace for metabolites and biochemical reactions in the context of metabolic models.,"MetaNetX/MNXref is a reconciliation of metabolites and biochemical reactions providing cross-links between major public biochemistry and Genome-Scale Metabolic Network (GSMN) databases. The new release brings several improvements with respect to the quality of the reconciliation, with particular attention dedicated to preserving the intrinsic properties of GSMN models. The MetaNetX website (https://www.metanetx.org/) provides access to the full database and online services. A major improvement is for mapping of user-provided GSMNs to MXNref, which now provides diagnostic messages about model content. In addition to the website and flat files, the resource can now be accessed through a SPARQL endpoint (https://rdf.metanetx.org).",MetaNetX/MNXref,MetaNetX,https://www.metanetx.org/,unified namespace for metabolites and biochemical reactions in the context of metabolic models. +33156327,TCRD and Pharos 2021: mining the human proteome for disease biology.,"In 2014, the National Institutes of Health (NIH) initiated the Illuminating the Druggable Genome (IDG) program to identify and improve our understanding of poorly characterized proteins that can potentially be modulated using small molecules or biologics. Two resources produced from these efforts are: The Target Central Resource Database (TCRD) (http://juniper.health.unm.edu/tcrd/) and Pharos (https://pharos.nih.gov/), a web interface to browse the TCRD. The ultimate goal of these resources is to highlight and facilitate research into currently understudied proteins, by aggregating a multitude of data sources, and ranking targets based on the amount of data available, and presenting data in machine learning ready format. Since the 2017 release, both TCRD and Pharos have produced two major releases, which have incorporated or expanded an additional 25 data sources. Recently incorporated data types include human and viral-human protein-protein interactions, protein-disease and protein-phenotype associations, and drug-induced gene signatures, among others. These aggregated data have enabled us to generate new visualizations and content sections in Pharos, in order to empower users to find new areas of study in the druggable genome.",Pharos,Pharos,https://pharos.nih.gov/, +33156327,TCRD and Pharos 2021: mining the human proteome for disease biology.,"In 2014, the National Institutes of Health (NIH) initiated the Illuminating the Druggable Genome (IDG) program to identify and improve our understanding of poorly characterized proteins that can potentially be modulated using small molecules or biologics. Two resources produced from these efforts are: The Target Central Resource Database (TCRD) (http://juniper.health.unm.edu/tcrd/) and Pharos (https://pharos.nih.gov/), a web interface to browse the TCRD. The ultimate goal of these resources is to highlight and facilitate research into currently understudied proteins, by aggregating a multitude of data sources, and ranking targets based on the amount of data available, and presenting data in machine learning ready format. Since the 2017 release, both TCRD and Pharos have produced two major releases, which have incorporated or expanded an additional 25 data sources. Recently incorporated data types include human and viral-human protein-protein interactions, protein-disease and protein-phenotype associations, and drug-induced gene signatures, among others. These aggregated data have enabled us to generate new visualizations and content sections in Pharos, in order to empower users to find new areas of study in the druggable genome.",The Target Central Resource Database,TCRD,http://juniper.health.unm.edu/tcrd/, +33156332,DDBJ update: streamlining submission and access of human data.,"The Bioinformation and DDBJ Center (DDBJ Center, https://www.ddbj.nig.ac.jp) provides databases that capture, preserve and disseminate diverse biological data to support research in the life sciences. This center collects nucleotide sequences with annotations, raw sequencing data, and alignment information from high-throughput sequencing platforms, and study and sample information, in collaboration with the National Center for Biotechnology Information (NCBI) and the European Bioinformatics Institute (EBI). This collaborative framework is known as the International Nucleotide Sequence Database Collaboration (INSDC). In collaboration with the National Bioscience Database Center (NBDC), the DDBJ Center also provides a controlled-access database, the Japanese Genotype-phenotype Archive (JGA), which archives and distributes human genotype and phenotype data, requiring authorized access. The NBDC formulates guidelines and policies for sharing human data and reviews data submission and use applications. To streamline all of the processes at NBDC and JGA, we have integrated the two systems by introducing a unified login platform with a group structure in September 2020. In addition to the public databases, the DDBJ Center provides a computer resource, the NIG supercomputer, for domestic researchers to analyze large-scale genomic data. This report describes updates to the services of the DDBJ Center, focusing on the NBDC and JGA system enhancements.",DDBJ Center,DDBJ,https://www.ddbj.nig.ac.jp, +33156333,The InterPro protein families and domains database: 20 years on.,"The InterPro database (https://www.ebi.ac.uk/interpro/) provides an integrative classification of protein sequences into families, and identifies functionally important domains and conserved sites. InterProScan is the underlying software that allows protein and nucleic acid sequences to be searched against InterPro's signatures. Signatures are predictive models which describe protein families, domains or sites, and are provided by multiple databases. InterPro combines signatures representing equivalent families, domains or sites, and provides additional information such as descriptions, literature references and Gene Ontology (GO) terms, to produce a comprehensive resource for protein classification. Founded in 1999, InterPro has become one of the most widely used resources for protein family annotation. Here, we report the status of InterPro (version 81.0) in its 20th year of operation, and its associated software, including updates to database content, the release of a new website and REST API, and performance improvements in InterProScan.",InterPro database,InterPro,https://www.ebi.ac.uk/interpro/, +33166388,LegumeIP V3: from models to crops-an integrative gene discovery platform for translational genomics in legumes.,"Legumes have contributed to human health, sustainable food and feed production worldwide for centuries. The study of model legumes has played vital roles in deciphering key genes, pathways, and networks regulating biological mechanisms and agronomic traits. Along with emerging breeding technology such as genome editing, translation of the knowledge gained from model plants to crops is in high demand. The updated database (V3) was redesigned for translational genomics targeting the discovery of novel key genes in less-studied non-model legume crops by referring to the knowledge gained in model legumes. The database contains genomic data for all 22 included species, and transcriptomic data covering thousands of RNA-seq samples mostly from model species. The rich biological data and analytic tools for gene expression and pathway analyses can be used to decipher critical genes, pathways, and networks in model legumes. The integrated comparative genomic functions further facilitate the translation of this knowledge to legume crops. Therefore, the database will be a valuable resource to identify important genes regulating specific biological mechanisms or agronomic traits in the non-model yet economically significant legume crops. LegumeIP V3 is available free to the public at https://plantgrn.noble.org/LegumeIP. Access to the database does not require login, registration, or password.",LegumeIP V3,LegumeIP,https://plantgrn.noble.org/LegumeIP,an integrative gene discovery platform for translational genomics in legumes. +33170210,The Zebrafish Information Network: major gene page and home page updates.,"The Zebrafish Information Network (ZFIN) (https://zfin.org/) is the database for the model organism, zebrafish (Danio rerio). ZFIN expertly curates, organizes, and provides a wide array of zebrafish genetic and genomic data, including genes, alleles, transgenic lines, gene expression, gene function, mutant phenotypes, orthology, human disease models, gene and mutant nomenclature, and reagents. New features at ZFIN include major updates to the home page and the gene page, the two most used pages at ZFIN. Data including disease models, phenotypes, expression, mutants and gene function continue to be contributed to The Alliance of Genome Resources for integration with similar data from other model organisms.",Zebrafish Information Network,ZFIN,https://zfin.org/,"the database for the model organism, zebrafish (Danio rerio)" +33174598,"LectomeXplore, an update of UniLectin for the discovery of carbohydrate-binding proteins based on a new lectin classification.","Lectins are non-covalent glycan-binding proteins mediating cellular interactions but their annotation in newly sequenced organisms is lacking. The limited size of functional domains and the low level of sequence similarity challenge usual bioinformatics tools. The identification of lectin domains in proteomes requires the manual curation of sequence alignments based on structural folds. A new lectin classification is proposed. It is built on three levels: (i) 35 lectin domain folds, (ii) 109 classes of lectins sharing at least 20% sequence similarity and (iii) 350 families of lectins sharing at least 70% sequence similarity. This information is compiled in the UniLectin platform that includes the previously described UniLectin3D database of curated lectin 3D structures. Since its first release, UniLectin3D has been updated with 485 additional 3D structures. The database is now complemented by two additional modules: PropLec containing predicted β-propeller lectins and LectomeXplore including predicted lectins from sequences of the NBCI-nr and UniProt for every curated lectin class. UniLectin is accessible at https://www.unilectin.eu/.",UniLectin,UniLectin,https://www.unilectin.eu/, +33174605,"OMA orthology in 2021: website overhaul, conserved isoforms, ancestral gene order and more.","OMA is an established resource to elucidate evolutionary relationships among genes from currently 2326 genomes covering all domains of life. OMA provides pairwise and groupwise orthologs, functional annotations, local and global gene order conservation (synteny) information, among many other functions. This update paper describes the reorganisation of the database into gene-, group- and genome-centric pages. Other new and improved features are detailed, such as reporting of the evolutionarily best conserved isoforms of alternatively spliced genes, the inferred local order of ancestral genes, phylogenetic profiling, better cross-references, fast genome mapping, semantic data sharing via RDF, as well as a special coronavirus OMA with 119 viruses from the Nidovirales order, including SARS-CoV-2, the agent of the COVID-19 pandemic. We conclude with improvements to the documentation of the resource through primers, tutorials and short videos. OMA is accessible at https://omabrowser.org.",OMA,OMA,https://omabrowser.org,an established resource to elucidate evolutionary relationships among genes from currently 2326 genomes covering all domains of life +33175872,High density genotype storage for plant breeding in the Chado schema of Breedbase.,"Modern breeding programs routinely use genome-wide information for selecting individuals to advance. The large volumes of genotypic information required present a challenge for data storage and query efficiency. Major use cases require genotyping data to be linked with trait phenotyping data. In contrast to phenotyping data that are often stored in relational database schemas, next-generation genotyping data are traditionally stored in non-relational storage systems due to their extremely large scope. This study presents a novel data model implemented in Breedbase (https://breedbase.org/) for uniting relational phenotyping data and non-relational genotyping data within the open-source PostgreSQL database engine. Breedbase is an open-source, web-database designed to manage all of a breeder's informatics needs: management of field experiments, phenotypic and genotypic data collection and storage, and statistical analyses. The genotyping data is stored in a PostgreSQL data-type known as binary JavaScript Object Notation (JSONb), where the JSON structures closely follow the Variant Call Format (VCF) data model. The Breedbase genotyping data model can handle different ploidy levels, structural variants, and any genotype encoded in VCF. JSONb is both compressed and indexed, resulting in a space and time efficient system. Furthermore, file caching maximizes data retrieval performance. Integration of all breeding data within the Chado database schema retains referential integrity that may be lost when genotyping and phenotyping data are stored in separate systems. Benchmarking demonstrates that the system is fast enough for computation of a genomic relationship matrix (GRM) and genome wide association study (GWAS) for datasets involving 1,325 diploid Zea mays, 314 triploid Musa acuminata, and 924 diploid Manihot esculenta samples genotyped with 955,690, 142,119, and 287,952 genotype-by-sequencing (GBS) markers, respectively.",Breedbase,Breedbase,https://breedbase.org/, +33179747,jMorp updates in 2020: large enhancement of multi-omics data resources on the general Japanese population.,"In the Tohoku Medical Megabank project, genome and omics analyses of participants in two cohort studies were performed. A part of the data is available at the Japanese Multi Omics Reference Panel (jMorp; https://jmorp.megabank.tohoku.ac.jp) as a web-based database, as reported in our previous manuscript published in Nucleic Acid Research in 2018. At that time, jMorp mainly consisted of metabolome data; however, now genome, methylome, and transcriptome data have been integrated in addition to the enhancement of the number of samples for the metabolome data. For genomic data, jMorp provides a Japanese reference sequence obtained using de novo assembly of sequences from three Japanese individuals and allele frequencies obtained using whole-genome sequencing of 8,380 Japanese individuals. In addition, the omics data include methylome and transcriptome data from ∼300 samples and distribution of concentrations of more than 755 metabolites obtained using high-throughput nuclear magnetic resonance and high-sensitivity mass spectrometry. In summary, jMorp now provides four different kinds of omics data (genome, methylome, transcriptome, and metabolome), with a user-friendly web interface. This will be a useful scientific data resource on the general population for the discovery of disease biomarkers and personalized disease prevention and early diagnosis.",Japanese Multi Omics Reference Panel,jMorp,https://jmorp.megabank.tohoku.ac.jp, +33186585,PolarProtDb: A Database of Transmembrane and Secreted Proteins showing Apical-Basal Polarity.,"Most cells in multicellular organisms are somehow asymmetric, polarized: maintaining separate membrane domains. Typical examples are the epithelial cells (apical-basal polarization), neurons (dendritic-axonal domains), or migratory cells (with a leading and a trailing edge). Here we present the most comprehensive database containing experimentally verified mammalian proteins that display polarized sorting or secretion, focusing on epithelial polarity. In addition to the source cells or tissues, homology-based inferences and transmembrane topology (if applicable) are all provided. PolarProtDb also offers a detailed interface displaying all information that may be relevant for trafficking: including post-translational modifications (glycosylations and phosphorylations), known or predicted short linear motifs conserved across orthologs, as well as potential interaction partners. Data on polarized sorting has so far been scattered across myriads of publications, hence difficult to access. This information can help researchers in several areas, such as scanning for potential entry points of viral agents like COVID-19. PolarProtDb shall be a useful resource to design future experiments as well as for comparative analyses. The database is available at http://polarprotdb.enzim.hu.",PolarProtDb,PolarProtDb,http://polarprotdb.enzim.hu,A Database of Transmembrane and Secreted Proteins showing Apical-Basal Polarity +33196830,GenBank.,"GenBank® (https://www.ncbi.nlm.nih.gov/genbank/) is a comprehensive, public database that contains 9.9 trillion base pairs from over 2.1 billion nucleotide sequences for 478 000 formally described species. Daily data exchange with the European Nucleotide Archive and the DNA Data Bank of Japan ensures worldwide coverage. Recent updates include new resources for data from the SARS-CoV-2 virus, updates to the NCBI Submission Portal and associated submission wizards for dengue and SARS-CoV-2 viruses, new taxonomy queries for viruses and prokaryotes, and simplified submission processes for EST and GSS sequences.",GenBank,GenBank,https://www.ncbi.nlm.nih.gov/genbank/,"a comprehensive, public database that contains 9.9 trillion base pairs from over 2.1 billion nucleotide sequences for 478 000 formally described species" +33211864,FANTOM enters 20th year: expansion of transcriptomic atlases and functional annotation of non-coding RNAs.,"The Functional ANnoTation Of the Mammalian genome (FANTOM) Consortium has continued to provide extensive resources in the pursuit of understanding the transcriptome, and transcriptional regulation, of mammalian genomes for the last 20 years. To share these resources with the research community, the FANTOM web-interfaces and databases are being regularly updated, enhanced and expanded with new data types. In recent years, the FANTOM Consortium's efforts have been mainly focused on creating new non-coding RNA datasets and resources. The existing FANTOM5 human and mouse miRNA atlas was supplemented with rat, dog, and chicken datasets. The sixth (latest) edition of the FANTOM project was launched to assess the function of human long non-coding RNAs (lncRNAs). From its creation until 2020, FANTOM6 has contributed to the research community a large dataset generated from the knock-down of 285 lncRNAs in human dermal fibroblasts; this is followed with extensive expression profiling and cellular phenotyping. Other updates to the FANTOM resource includes the reprocessing of the miRNA and promoter atlases of human, mouse and chicken with the latest reference genome assemblies. To facilitate the use and accessibility of all above resources we further enhanced FANTOM data viewers and web interfaces. The updated FANTOM web resource is publicly available at https://fantom.gsc.riken.jp/.",Functional ANnoTation Of the Mammalian genome,FANTOM,https://fantom.gsc.riken.jp/, +33211869,"Rfam 14: expanded coverage of metagenomic, viral and microRNA families.","Rfam is a database of RNA families where each of the 3444 families is represented by a multiple sequence alignment of known RNA sequences and a covariance model that can be used to search for additional members of the family. Recent developments have involved expert collaborations to improve the quality and coverage of Rfam data, focusing on microRNAs, viral and bacterial RNAs. We have completed the first phase of synchronising microRNA families in Rfam and miRBase, creating 356 new Rfam families and updating 40. We established a procedure for comprehensive annotation of viral RNA families starting with Flavivirus and Coronaviridae RNAs. We have also increased the coverage of bacterial and metagenome-based RNA families from the ZWD database. These developments have enabled a significant growth of the database, with the addition of 759 new families in Rfam 14. To facilitate further community contribution to Rfam, expert users are now able to build and submit new families using the newly developed Rfam Cloud family curation system. New Rfam website features include a new sequence similarity search powered by RNAcentral, as well as search and visualisation of families with pseudoknots. Rfam is freely available at https://rfam.org.",Rfam 14,Rfam,https://rfam.org,a database of RNA families where each of the 3444 families is represented by a multiple sequence alignment of known RNA sequences and a covariance model that can be used to search for additional members of the family +33211879,From ArrayExpress to BioStudies.,"ArrayExpress (https://www.ebi.ac.uk/arrayexpress) is an archive of functional genomics data at EMBL-EBI, established in 2002, initially as an archive for publication-related microarray data and was later extended to accept sequencing-based data. Over the last decade an increasing share of biological experiments involve multiple technologies assaying different biological modalities, such as epigenetics, and RNA and protein expression, and thus the BioStudies database (https://www.ebi.ac.uk/biostudies) was established to deal with such multimodal data. Its central concept is a study, which typically is associated with a publication. BioStudies stores metadata describing the study, provides links to the relevant databases, such as European Nucleotide Archive (ENA), as well as hosts the types of data for which specialized databases do not exist. With BioStudies now fully functional, we are able to further harmonize the archival data infrastructure at EMBL-EBI, and ArrayExpress is being migrated to BioStudies. In future, all functional genomics data will be archived at BioStudies. The process will be seamless for the users, who will continue to submit data using the online tool Annotare and will be able to query and download data largely in the same manner as before. Nevertheless, some technical aspects, particularly programmatic access, will change. This update guides the users through these changes.",ArrayExpress,ArrayExpress,https://www.ebi.ac.uk/arrayexpress,"an archive of functional genomics data at EMBL-EBI, established in 2002, initially as an archive for publication-related microarray data and was later extended to accept sequencing-based data" +33211880,"BRENDA, the ELIXIR core data resource in 2021: new developments and updates.","The BRENDA enzyme database (https://www.brenda-enzymes.org), established in 1987, has evolved into the main collection of functional enzyme and metabolism data. In 2018, BRENDA was selected as an ELIXIR Core Data Resource. BRENDA provides reliable data, continuous curation and updates of classified enzymes, and the integration of newly discovered enzymes. The main part contains >5 million data for ∼90 000 enzymes from ∼13 000 organisms, manually extracted from ∼157 000 primary literature references, combined with information of text and data mining, data integration, and prediction algorithms. Supplements comprise disease-related data, protein sequences, 3D structures, genome annotations, ligand information, taxonomic, bibliographic, and kinetic data. BRENDA offers an easy access to enzyme information from quick to advanced searches, text- and structured-based queries for enzyme-ligand interactions, word maps, and visualization of enzyme data. The BRENDA Pathway Maps are completely revised and updated for an enhanced interactive and intuitive usability. The new design of the Enzyme Summary Page provides an improved access to each individual enzyme. A new protein structure 3D viewer was integrated. The prediction of the intracellular localization of eukaryotic enzymes has been implemented. The new EnzymeDetector combines BRENDA enzyme annotations with protein and genome databases for the detection of eukaryotic and prokaryotic enzymes.",BRENDA enzyme database,BRENDA,https://www.brenda-enzymes.org, +33216893,"DPL: a comprehensive database on sequences, structures, sources and functions of peptide ligands.","DPL (http://www.peptide-ligand.cn/) is a comprehensive database of peptide ligand (DPL). DPL1.0 holds 1044 peptide ligand entries and provides references for the study of the polypeptide platform. The data were collected from PubMed-NCBI, PDB, APD3, CAMPR3, etc. The lengths of the base sequences are varied from 3 to78. DPL database has 923 linear peptides and 88 cyclic peptides. The functions of peptides collected by DPL are very wide. It includes 540 entries of antiviral peptides (including SARS-CoV-2), 55 entries of signal peptides, 48 entries of protease inhibitors, 45 entries of anti-hypertension, 37 entries of anticancer peptides, etc. There are 270 different kinds of peptide targets. All peptides in DPL have clear binding targets. Most of the peptides and receptors have 3D structures experimentally verified or predicted by CYCLOPS, I-TASSER and SWISS-MODEL. With the rapid development of the COVID-2019 epidemic, this database also collects the research progress of peptides against coronavirus. In conclusion, DPL is a unique resource, which allows users easily to explore the targets, different structures as well as properties of peptides.",database of peptide ligand,DPL,http://www.peptide-ligand.cn/,"a comprehensive database on sequences, structures, sources and functions of peptide ligands" +33216899,WCSdb: a database of wild Coffea species.,"Coffee is a beverage enjoyed by millions of people worldwide and an important commodity for millions of people. Beside the two cultivated species (Coffea arabica and Coffea canephora), the 139 wild coffee species/taxa belonging to the Coffea genus are largely unknown to coffee scientists and breeders although these species may be crucial for future coffee crop development to face climate changes. Here we present the Wild Coffee Species database (WCSdb) hosted by Pl@ntNet platform (http://publish.plantnet-project.org/project/wildcofdb_en), providing information for 141 coffee species/taxa, for which 84 contain a photo gallery and 82 contain sequencing data (genotyping-by-sequencing, chloroplast or whole genome sequences). The objective of this database is to better understand and characterize the species (identification, morphology, biochemical compounds, genetic diversity and sequence data) in order to better protect and promote them.

Database url

http://publish.plantnet-project.org/project/wildcofdb_en.",Wild Coffee Species database,WCSdb,http://publish.plantnet-project.org/project/wildcofdb_en,a database of wild Coffea species +33219674,canSAR: update to the cancer translational research and drug discovery knowledgebase.,"canSAR (http://cansar.icr.ac.uk) is the largest, public, freely available, integrative translational research and drug discovery knowledgebase for oncology. canSAR integrates vast multidisciplinary data from across genomic, protein, pharmacological, drug and chemical data with structural biology, protein networks and more. It also provides unique data, curation and annotation and crucially, AI-informed target assessment for drug discovery. canSAR is widely used internationally by academia and industry. Here we describe significant developments and enhancements to the data, web interface and infrastructure of canSAR in the form of the new implementation of the system: canSARblack. We demonstrate new functionality in aiding translation hypothesis generation and experimental design, and show how canSAR can be adapted and utilised outside oncology.",canSAR,canSAR,http://cansar.icr.ac.uk,the cancer translational research and drug discovery knowledgebase +33219686,LnCeCell: a comprehensive database of predicted lncRNA-associated ceRNA networks at single-cell resolution.,"Within the tumour microenvironment, cells exhibit different behaviours driven by fine-tuning of gene regulation. Identification of cellular-specific gene regulatory networks will deepen the understanding of disease pathology at single-cell resolution and contribute to the development of precision medicine. Here, we describe a database, LnCeCell (http://www.bio-bigdata.net/LnCeCell/ or http://bio-bigdata.hrbmu.edu.cn/LnCeCell/), which aims to document cellular-specific long non-coding RNA (lncRNA)-associated competing endogenous RNA (ceRNA) networks for personalised characterisation of diseases based on the 'One Cell, One World' theory. LnCeCell is curated with cellular-specific ceRNA regulations from >94 000 cells across 25 types of cancers and provides >9000 experimentally supported lncRNA biomarkers, associated with tumour metastasis, recurrence, prognosis, circulation, drug resistance, etc. For each cell, LnCeCell illustrates a global map of ceRNA sub-cellular locations, which have been manually curated from the literature and related data sources, and portrays a functional state atlas for a single cancer cell. LnCeCell also provides several flexible tools to infer ceRNA functions based on a specific cellular background. LnCeCell serves as an important resource for investigating the gene regulatory networks within a single cell and can help researchers understand the regulatory mechanisms underlying complex microbial ecosystems and individual phenotypes.",LnCeCell,LnCeCell,http://bio-bigdata.hrbmu.edu.cn/LnCeCell/,a comprehensive database of predicted lncRNA-associated ceRNA networks at single-cell resolution +33219686,LnCeCell: a comprehensive database of predicted lncRNA-associated ceRNA networks at single-cell resolution.,"Within the tumour microenvironment, cells exhibit different behaviours driven by fine-tuning of gene regulation. Identification of cellular-specific gene regulatory networks will deepen the understanding of disease pathology at single-cell resolution and contribute to the development of precision medicine. Here, we describe a database, LnCeCell (http://www.bio-bigdata.net/LnCeCell/ or http://bio-bigdata.hrbmu.edu.cn/LnCeCell/), which aims to document cellular-specific long non-coding RNA (lncRNA)-associated competing endogenous RNA (ceRNA) networks for personalised characterisation of diseases based on the 'One Cell, One World' theory. LnCeCell is curated with cellular-specific ceRNA regulations from >94 000 cells across 25 types of cancers and provides >9000 experimentally supported lncRNA biomarkers, associated with tumour metastasis, recurrence, prognosis, circulation, drug resistance, etc. For each cell, LnCeCell illustrates a global map of ceRNA sub-cellular locations, which have been manually curated from the literature and related data sources, and portrays a functional state atlas for a single cancer cell. LnCeCell also provides several flexible tools to infer ceRNA functions based on a specific cellular background. LnCeCell serves as an important resource for investigating the gene regulatory networks within a single cell and can help researchers understand the regulatory mechanisms underlying complex microbial ecosystems and individual phenotypes.",LnCeCell,LnCeCell,http://www.bio-bigdata.net/LnCeCell,a comprehensive database of predicted lncRNA-associated ceRNA networks at single-cell resolution +33231677,GTRD: an integrated view of transcription regulation.,"The Gene Transcription Regulation Database (GTRD; http://gtrd.biouml.org/) contains uniformly annotated and processed NGS data related to gene transcription regulation: ChIP-seq, ChIP-exo, DNase-seq, MNase-seq, ATAC-seq and RNA-seq. With the latest release, the database has reached a new level of data integration. All cell types (cell lines and tissues) presented in the GTRD were arranged into a dictionary and linked with different ontologies (BRENDA, Cell Ontology, Uberon, Cellosaurus and Experimental Factor Ontology) and with related experiments in specialized databases on transcription regulation (FANTOM5, ENCODE and GTEx). The updated version of the GTRD provides an integrated view of transcription regulation through a dedicated web interface with advanced browsing and search capabilities, an integrated genome browser, and table reports by cell types, transcription factors, and genes of interest.",Gene Transcription Regulation Database,GTRD,http://gtrd.biouml.org/, +33237286,UniProt: the universal protein knowledgebase in 2021.,"The aim of the UniProt Knowledgebase is to provide users with a comprehensive, high-quality and freely accessible set of protein sequences annotated with functional information. In this article, we describe significant updates that we have made over the last two years to the resource. The number of sequences in UniProtKB has risen to approximately 190 million, despite continued work to reduce sequence redundancy at the proteome level. We have adopted new methods of assessing proteome completeness and quality. We continue to extract detailed annotations from the literature to add to reviewed entries and supplement these in unreviewed entries with annotations provided by automated systems such as the newly implemented Association-Rule-Based Annotator (ARBA). We have developed a credit-based publication submission interface to allow the community to contribute publications and annotations to UniProt entries. We describe how UniProtKB responded to the COVID-19 pandemic through expert curation of relevant entries that were rapidly made available to the research community through a dedicated portal. UniProt resources are available under a CC-BY (4.0) license via the web at https://www.uniprot.org/.",UniProt Knowledgebase,UniProt,https://www.uniprot.org/,the universal protein knowledgebase in 2021 +33237286,UniProt: the universal protein knowledgebase in 2021.,"The aim of the UniProt Knowledgebase is to provide users with a comprehensive, high-quality and freely accessible set of protein sequences annotated with functional information. In this article, we describe significant updates that we have made over the last two years to the resource. The number of sequences in UniProtKB has risen to approximately 190 million, despite continued work to reduce sequence redundancy at the proteome level. We have adopted new methods of assessing proteome completeness and quality. We continue to extract detailed annotations from the literature to add to reviewed entries and supplement these in unreviewed entries with annotations provided by automated systems such as the newly implemented Association-Rule-Based Annotator (ARBA). We have developed a credit-based publication submission interface to allow the community to contribute publications and annotations to UniProt entries. We describe how UniProtKB responded to the COVID-19 pandemic through expert curation of relevant entries that were rapidly made available to the research community through a dedicated portal. UniProt resources are available under a CC-BY (4.0) license via the web at https://www.uniprot.org/.",UniProt Knowledgebase,UniProtKB,https://www.uniprot.org/,the universal protein knowledgebase in 2021 +33237311,"The STRING database in 2021: customizable protein-protein networks, and functional characterization of user-uploaded gene/measurement sets.","Cellular life depends on a complex web of functional associations between biomolecules. Among these associations, protein-protein interactions are particularly important due to their versatility, specificity and adaptability. The STRING database aims to integrate all known and predicted associations between proteins, including both physical interactions as well as functional associations. To achieve this, STRING collects and scores evidence from a number of sources: (i) automated text mining of the scientific literature, (ii) databases of interaction experiments and annotated complexes/pathways, (iii) computational interaction predictions from co-expression and from conserved genomic context and (iv) systematic transfers of interaction evidence from one organism to another. STRING aims for wide coverage; the upcoming version 11.5 of the resource will contain more than 14 000 organisms. In this update paper, we describe changes to the text-mining system, a new scoring-mode for physical interactions, as well as extensive user interface features for customizing, extending and sharing protein networks. In addition, we describe how to query STRING with genome-wide, experimental data, including the automated detection of enriched functionalities and potential biases in the user's query data. The STRING resource is available online, at https://string-db.org/.",STRING database,STRING,https://string-db.org/,"customizable protein-protein networks, and functional characterization of user-uploaded gene/measurement sets" +33237313,RepeatsDB in 2021: improved data and extended classification for protein tandem repeat structures.,"The RepeatsDB database (URL: https://repeatsdb.org/) provides annotations and classification for protein tandem repeat structures from the Protein Data Bank (PDB). Protein tandem repeats are ubiquitous in all branches of the tree of life. The accumulation of solved repeat structures provides new possibilities for classification and detection, but also increasing the need for annotation. Here we present RepeatsDB 3.0, which addresses these challenges and presents an extended classification scheme. The major conceptual change compared to the previous version is the hierarchical classification combining top levels based solely on structural similarity (Class > Topology > Fold) with two new levels (Clan > Family) requiring sequence similarity and describing repeat motifs in collaboration with Pfam. Data growth has been addressed with improved mechanisms for browsing the classification hierarchy. A new UniProt-centric view unifies the increasingly frequent annotation of structures from identical or similar sequences. This update of RepeatsDB aligns with our commitment to develop a resource that extracts, organizes and distributes specialized information on tandem repeat protein structures.",RepeatsDB 3.0,RepeatsDB,https://repeatsdb.org/,annotations and classification for protein tandem repeat structures from the Protein Data Bank (PDB) +33245777,3DIV update for 2021: a comprehensive resource of 3D genome and 3D cancer genome.,"Three-dimensional (3D) genome organization is tightly coupled with gene regulation in various biological processes and diseases. In cancer, various types of large-scale genomic rearrangements can disrupt the 3D genome, leading to oncogenic gene expression. However, unraveling the pathogenicity of the 3D cancer genome remains a challenge since closer examinations have been greatly limited due to the lack of appropriate tools specialized for disorganized higher-order chromatin structure. Here, we updated a 3D-genome Interaction Viewer and database named 3DIV by uniformly processing ∼230 billion raw Hi-C reads to expand our contents to the 3D cancer genome. The updates of 3DIV are listed as follows: (i) the collection of 401 samples including 220 cancer cell line/tumor Hi-C data, 153 normal cell line/tissue Hi-C data, and 28 promoter capture Hi-C data, (ii) the live interactive manipulation of the 3D cancer genome to simulate the impact of structural variations and (iii) the reconstruction of Hi-C contact maps by user-defined chromosome order to investigate the 3D genome of the complex genomic rearrangement. In summary, the updated 3DIV will be the most comprehensive resource to explore the gene regulatory effects of both the normal and cancer 3D genome. '3DIV' is freely available at http://3div.kr.",3D-genome Interaction Viewer and database,3DIV,http://3div.kr,a comprehensive resource of 3D genome and 3D cancer genome +33247931,"KiMoSys 2.0: an upgraded database for submitting, storing and accessing experimental data for kinetic modeling.","The KiMoSys (https://kimosys.org), launched in 2014, is a public repository of published experimental data, which contains concentration data of metabolites, protein abundances and flux data. It offers a web-based interface and upload facility to share data, making it accessible in structured formats, while also integrating associated kinetic models related to the data. In addition, it also supplies tools to simplify the construction process of ODE (Ordinary Differential Equations)-based models of metabolic networks. In this release, we present an update of KiMoSys with new data and several new features, including (i) an improved web interface, (ii) a new multi-filter mechanism, (iii) introduction of data visualization tools, (iv) the addition of downloadable data in machine-readable formats, (v) an improved data submission tool, (vi) the integration of a kinetic model simulation environment and (vii) the introduction of a unique persistent identifier system. We believe that this new version will improve its role as a valuable resource for the systems biology community. Database URL: www.kimosys.org.",KiMoSys 2.0,KiMoSys,https://kimosys.org,"an upgraded database for submitting, storing and accessing experimental data for kinetic modeling" +33252190,GRIN database: A unified and manually curated repertoire of GRIN variants.,"Glutamatergic neurotransmission is crucial for brain development, wiring neuronal function, and synaptic plasticity mechanisms. Recent genetic studies showed the existence of autosomal dominant de novo GRIN gene variants associated with GRIN-related disorders (GRDs), a rare pediatric neurological disorder caused by N-methyl- d-aspartate receptor (NMDAR) dysfunction. Notwithstanding, GRIN variants identification is exponentially growing and their clinical, genetic, and functional annotations remain highly fragmented, representing a bottleneck in GRD patient's stratification. To shorten the gap between GRIN variant identification and patient stratification, we present the GRIN database (GRINdb), a publicly available, nonredundant, updated, and curated database gathering all available genetic, functional, and clinical data from more than 4000 GRIN variants. The manually curated GRINdb outputs on a web server, allowing query and retrieval of reported GRIN variants, and thus representing a fast and reliable bioinformatics resource for molecular clinical advice. Furthermore, the comprehensive mapping of GRIN variants' genetic and clinical information along NMDAR structure revealed important differences in GRIN variants' pathogenicity and clinical phenotypes, shedding light on GRIN-specific fingerprints. Overall, the GRINdb and web server is a resource for molecular stratification of GRIN variants, delivering clinical and investigational insights into GRDs. GRINdb is accessible at http://lmc.uab.es/grindb.",GRIN database,GRINdb,http://lmc.uab.es/grindb,A unified and manually curated repertoire of GRIN variants +33262341,"HuskinDB, a database for skin permeation of xenobiotics.","Skin permeation is an essential biological property of small organic compounds our body is exposed to, such as drugs in topic formulations, cosmetics, and environmental toxins. Despite the limited availability of experimental data, there is a lack of systematic analysis and structure. We present a novel resource on skin permeation data that collects all measurements available in the literature and systematically structures experimental conditions. Besides the skin permeation value kp, it includes experimental protocols such as skin source site, skin layer used, preparation technique, storage conditions, as well as test conditions such as temperature, pH as well as the type of donor and acceptor solution. It is important to include these parameters in the assessment of the skin permeation data. In addition, we provide an analysis of physicochemical properties and chemical space coverage, laying the basis for applicability domain determination of insights drawn from the collected data points. The database is freely accessible under https://huskindb.drug-design.de or https://doi.org/10.7303/syn21998881 .",HuskinDB,HuskinDB,https://huskindb.drug-design.de,a database for skin permeation of xenobiotics +33270111,GENCODE 2021.,"The GENCODE project annotates human and mouse genes and transcripts supported by experimental data with high accuracy, providing a foundational resource that supports genome biology and clinical genomics. GENCODE annotation processes make use of primary data and bioinformatic tools and analysis generated both within the consortium and externally to support the creation of transcript structures and the determination of their function. Here, we present improvements to our annotation infrastructure, bioinformatics tools, and analysis, and the advances they support in the annotation of the human and mouse genomes including: the completion of first pass manual annotation for the mouse reference genome; targeted improvements to the annotation of genes associated with SARS-CoV-2 infection; collaborative projects to achieve convergence across reference annotation databases for the annotation of human and mouse protein-coding genes; and the first GENCODE manually supervised automated annotation of lncRNAs. Our annotation is accessible via Ensembl, the UCSC Genome Browser and https://www.gencodegenes.org.",GENCODE,GENCODE,https://www.gencodegenes.org, +33275967,ncRNAVar: A Manually Curated Database for Identification of Noncoding RNA Variants Associated with Human Diseases.,"While variants of noncoding RNAs (ncRNAs) have been experimentally validated as a new class of biomarkers and drug targets, the discovery and interpretation of relationships between ncRNA variants and human diseases become important and challenging. Here we present ncRNAVar (http://www.liwzlab.cn/ncrnavar/), the first database that provides association data between validated ncRNA variants and human diseases through manual curation on 2650 publications and computational annotation. ncRNAVar contains 4565 associations between 711 human disease phenotypes and 3112 variants from 2597 ncRNAs. Each association was reviewed by professional curators, incorporated with valuable annotation and cross references, and designated with an association score by our refined score model. ncRNAVar offers web applications including association prioritization, network visualization, and relationship mapping. ncRNAVar, presenting a landscape of ncRNA variants in human diseases and a useful resource for subsequent software development, will improve our insight of relationships between ncRNA variants and human health.",ncRNAVar,ncRNAVar,http://www.liwzlab.cn/ncrnavar/,A Manually Curated Database for Identification of Noncoding RNA Variants Associated with Human Diseases +33290552,The Gene Ontology resource: enriching a GOld mine.,"The Gene Ontology Consortium (GOC) provides the most comprehensive resource currently available for computable knowledge regarding the functions of genes and gene products. Here, we report the advances of the consortium over the past two years. The new GO-CAM annotation framework was notably improved, and we formalized the model with a computational schema to check and validate the rapidly increasing repository of 2838 GO-CAMs. In addition, we describe the impacts of several collaborations to refine GO and report a 10% increase in the number of GO annotations, a 25% increase in annotated gene products, and over 9,400 new scientific articles annotated. As the project matures, we continue our efforts to review older annotations in light of newer findings, and, to maintain consistency with other ontologies. As a result, 20 000 annotations derived from experimental data were reviewed, corresponding to 2.5% of experimental GO annotations. The website (http://geneontology.org) was redesigned for quick access to documentation, downloads and tools. To maintain an accurate resource and support traceability and reproducibility, we have made available a historical archive covering the past 15 years of GO data with a consistent format and file structure for both the ontology and annotations.",The Gene Ontology resource,,http://geneontology.org, +33290554,"PANTHER version 16: a revised family classification, tree-based classification tool, enhancer regions and extensive API.","PANTHER (Protein Analysis Through Evolutionary Relationships, http://www.pantherdb.org) is a resource for the evolutionary and functional classification of protein-coding genes from all domains of life. The evolutionary classification is based on a library of over 15,000 phylogenetic trees, and the functional classifications include Gene Ontology terms and pathways. Here, we analyze the current coverage of genes from genomes in different taxonomic groups, so that users can better understand what to expect when analyzing a gene list using PANTHER tools. We also describe extensive improvements to PANTHER made in the past two years. The PANTHER Protein Class ontology has been completely refactored, and 6101 PANTHER families have been manually assigned to a Protein Class, providing a high level classification of protein families and their genes. Users can access the TreeGrafter tool to add their own protein sequences to the reference phylogenetic trees in PANTHER, to infer evolutionary context as well as fine-grained annotations. We have added human enhancer-gene links that associate non-coding regions with the annotated human genes in PANTHER. We have also expanded the available services for programmatic access to PANTHER tools and data via application programming interfaces (APIs). Other improvements include additional plant genomes and an updated PANTHER GO-slim.",Protein Analysis Through Evolutionary Relationships,PANTHER,http://www.pantherdb.org,a resource for the evolutionary and functional classification of protein-coding genes from all domains of life +33294866,Pancreatlas: Applying an Adaptable Framework to Map the Human Pancreas in Health and Disease.,"Human tissue phenotyping generates complex spatial information from numerous imaging modalities, yet images typically become static figures for publication, and original data and metadata are rarely available. While comprehensive image maps exist for some organs, most resources have limited support for multiplexed imaging or have non-intuitive user interfaces. Therefore, we built a Pancreatlas resource that integrates several technologies into a unique interface, allowing users to access richly annotated web pages, drill down to individual images, and deeply explore data online. The current version of Pancreatlas contains over 800 unique images acquired by whole-slide scanning, confocal microscopy, and imaging mass cytometry, and is available at https://www.pancreatlas.org. To create this human pancreas-specific biological imaging resource, we developed a React-based web application and Python-based application programming interface, collectively called Flexible Framework for Integrating and Navigating Data (FFIND), which can be adapted beyond Pancreatlas to meet countless imaging or other structured data-management needs.",Pancreatlas,Pancreatlas,https://www.pancreatlas.org,"a unique interface, allowing users to access richly annotated web pages, drill down to individual images, and deeply explore data online" +33306802,NPBS database: a chemical data resource with relational data between natural products and biological sources.,"NPBS (Natural Products & Biological Sources) database is a chemical data resource with relational data between natural products and biological sources, manually curated from literatures of natural product researches. The relational data link a specific species and all the natural products derived from it and contrarily link a specific natural product and all the biological sources. The biological sources cover diverse species of plant, bacterial, fungal and marine organisms; the natural molecules have proper chemical structure data and computable molecular properties and all the relational data have corresponding references. NPBS database provides a wider choice of biological sources and can be used for dereplication to prevent re-isolation and re-characterization of already known natural products. Database URL: http://www.organchem.csdb.cn/scdb/NPBS.",Natural Products & Biological Sources database,NPBS,http://www.organchem.csdb.cn/scdb/NPBS,a chemical data resource with relational data between natural products and biological sources +33330918,Systematic evaluation of the effects of genetic variants on PIWI-interacting RNA expression across 33 cancer types.,"PIWI-interacting RNAs (piRNAs) are an emerging class of non-coding RNAs involved in tumorigenesis. Expression quantitative trait locus (eQTL) analysis has been demonstrated to help reveal the genetic mechanism of single nucleotide polymorphisms (SNPs) in cancer etiology. However, there are no databases that have been constructed to provide an eQTL analysis between SNPs and piRNA expression. In this study, we collected genotyping and piRNA expression data for 10 997 samples across 33 cancer types from The Cancer Genome Atlas (TCGA). Using linear regression cis-eQTL analysis with adjustment of appropriate covariates, we identified millions of SNP-piRNA pairs in tumor (76 924 831) and normal (24 431 061) tissues. Further, we performed differential expression and survival analyses, and linked the eQTLs to genome-wide association study (GWAS) data to comprehensively decipher the functional roles of identified cis-piRNA eQTLs. Finally, we developed a user-friendly database, piRNA-eQTL (http://njmu-edu.cn:3838/piRNA-eQTL/), to help users query, browse and download corresponding eQTL results. In summary, piRNA-eQTL could serve as an important resource to assist the research community in understanding the roles of genetic variants and piRNAs in the development of cancers.",piRNA-eQTL,piRNA-eQTL,http://njmu-edu.cn:3838/piRNA-eQTL/, +33367605,Virxicon: A Lexicon Of Viral Sequences.,"

Motivation

Viruses are the most abundant biological entities and constitute a large reservoir of genetic diversity. In recent years, knowledge about them has increased significantly as a result of dynamic development in life sciences and rapid technological progress. This knowledge is scattered across various data repositories, making a comprehensive analysis of viral data difficult.

Results

In response to the need for gathering a comprehensive knowledge of viruses and viral sequences, we developed Virxicon, a lexicon of all experimentally-acquired sequences for RNA and DNA viruses. The ability to quickly obtain data for entire viral groups, searching sequences by levels of taxonomic hierarchy-according to the Baltimore classification and ICTV taxonomy-and tracking the distribution of viral data and its growth over time are unique features of our database compared to the other tools.

Availability

Virxicon is a publicly available resource, updated weekly. It has an intuitive web interface and can be freely accessed at http://virxicon.cs.put.poznan.pl/.

Supplementary information

Supplementary data are available at Bioinformatics online.",Virxicon,Virxicon,http://virxicon.cs.put.poznan.pl/,A Lexicon Of Viral Sequences +33367605,Virxicon: A Lexicon Of Viral Sequences.,"

Motivation

Viruses are the most abundant biological entities and constitute a large reservoir of genetic diversity. In recent years, knowledge about them has increased significantly as a result of dynamic development in life sciences and rapid technological progress. This knowledge is scattered across various data repositories, making a comprehensive analysis of viral data difficult.

Results

In response to the need for gathering a comprehensive knowledge of viruses and viral sequences, we developed Virxicon, a lexicon of all experimentally-acquired sequences for RNA and DNA viruses. The ability to quickly obtain data for entire viral groups, searching sequences by levels of taxonomic hierarchy-according to the Baltimore classification and ICTV taxonomy-and tracking the distribution of viral data and its growth over time are unique features of our database compared to the other tools.

Availability

Virxicon is a publicly available resource, updated weekly. It has an intuitive web interface and can be freely accessed at http://virxicon.cs.put.poznan.pl/.

Supplementary information

Supplementary data are available at Bioinformatics online.",Virxicon,Virxicon,http://virxicon.cs.put.poznan.pl/,a lexicon of all experimentally-acquired sequences for RNA and DNA viruses +33382886,Creating a Metabolic Syndrome Research Resource using the National Health and Nutrition Examination Survey.,"Metabolic syndrome (MetS) is multifaceted. Risk factors include visceral adiposity, dyslipidemia, hyperglycemia, hypertension and environmental stimuli. MetS leads to an increased risk of cardiovascular disease, type 2 diabetes and stroke. Comparative studies, however, have identified heterogeneity in the pathology of MetS across groups though the etiology of these differences has yet to be elucidated. The Metabolic Syndrome Research Resource (MetSRR) described in this report is a curated database that provides access to MetS-associated biological and ancillary data and pools current and potential biomarkers of MetS extracted from relevant National Health and Nutrition Examination Survey (NHANES) data from 1999-2016. Each potential biomarker was selected following the review of over 100 peer-reviewed articles. MetSRR includes 28 demographics, survey and known MetS-related variables, including 9 curated categorical variables and 42 potentially novel biomarkers. All measures are captured from over 90 000 individuals. This biocuration effort provides increased access to curated MetS-related data and will serve as a hypothesis-generating tool to aid in novel biomarker discovery. In addition, MetSRR provides the ability to generate and export ethnic group-/race-, sex- and age-specific curated datasets, thus broadening participation in research efforts to identify clinically evaluative MetS biomarkers for disparate populations. Although there are other databases, such as BioM2MetDisease, designed to explore metabolic diseases through analysis of miRNAs and disease phenotypes, MetSRR is the only MetS-specific database designed to explore etiology of MetS across groups, through the biocuration of demographic, biological samples and biometric data. Database URL: http://www.healthdisparityinformatics.com/MetSRR.",Metabolic Syndrome Research Resource,MetSRR,http://www.healthdisparityinformatics.com/MetSRR,a curated database that provides access to MetS-associated biological and ancillary data and pools current and potential biomarkers of MetS extracted from relevant National Health and Nutrition Examination Survey (NHANES) data from 1999-2016 +33391542,Network- and systems-based re-engineering of dendritic cells with non-coding RNAs for cancer immunotherapy.,"Dendritic cells (DCs) are professional antigen-presenting cells that induce and regulate adaptive immunity by presenting antigens to T cells. Due to their coordinative role in adaptive immune responses, DCs have been used as cell-based therapeutic vaccination against cancer. The capacity of DCs to induce a therapeutic immune response can be enhanced by re-wiring of cellular signalling pathways with microRNAs (miRNAs). Methods: Since the activation and maturation of DCs is controlled by an interconnected signalling network, we deploy an approach that combines RNA sequencing data and systems biology methods to delineate miRNA-based strategies that enhance DC-elicited immune responses. Results: Through RNA sequencing of IKKβ-matured DCs that are currently being tested in a clinical trial on therapeutic anti-cancer vaccination, we identified 44 differentially expressed miRNAs. According to a network analysis, most of these miRNAs regulate targets that are linked to immune pathways, such as cytokine and interleukin signalling. We employed a network topology-oriented scoring model to rank the miRNAs, analysed their impact on immunogenic potency of DCs, and identified dozens of promising miRNA candidates, with miR-15a and miR-16 as the top ones. The results of our analysis are presented in a database that constitutes a tool to identify DC-relevant miRNA-gene interactions with therapeutic potential (https://www.synmirapy.net/dc-optimization). Conclusions: Our approach enables the systematic analysis and identification of functional miRNA-gene interactions that can be experimentally tested for improving DC immunogenic potency.",,,https://www.synmirapy.net/dc-optimization,a tool to identify DC-relevant miRNA-gene interactions with therapeutic potential +33399824,BnaGVD: A genomic variation database of rapeseed (Brassica napus).,"Rapeseed (Brassica napus L.) is a typical polyploid crop and one of the most important oilseed crops worldwide. With the rapid progress on high-throughput sequencing technologies and the reduction of sequencing cost, large-scale genomic data of a specific crop have become available. However, raw sequence data are mostly deposited in the sequence read archive of the National Center of Biotechnology Information (NCBI) and the European Nucleotide Archive (ENA), which is freely accessible to all researchers. Extensive tools for practical purposes should be developed to efficiently utilize these large raw data. Here, we report a web-based rapeseed genomic variation database (BnaGVD, http://rapeseed.biocloud.net/home) from which genomic variations, such as single nucleotide polymorphisms (SNPs) and insertions/deletions (InDels) across a world-wide collection of rapeseed accessions, can be referred. The current release of the BnaGVD contains 34,591,899 high-quality SNPs and 12,281,923 high-quality InDels and provides search tools to retrieve genomic variations and gene annotations across 1,007 accessions of worldwide rapeseed germplasm. We implement a variety of built-in tools (e.g., BnaGWAS, BnaPCA, and BnaStructure) to help users perform in-depth analyses. We recommend this web resource for accelerating studies on the functional genomics and screening of molecular markers for rapeseed breeding.",rapeseed genomic variation database,BnaGVD,http://rapeseed.biocloud.net/home,A genomic variation database of rapeseed (Brassica napus) +33401309,ADeditome provides the genomic landscape of A-to-I RNA editing in Alzheimer's disease.,"A-to-I RNA editing, contributing to nearly 90% of all editing events in human, has been reported to involve in the pathogenesis of Alzheimer's disease (AD) due to its roles in brain development and immune regulation, such as the deficient editing of GluA2 Q/R related to cell death and memory loss. Currently, there are urgent needs for the systematic annotations of A-to-I RNA editing events in AD. Here, we built ADeditome, the annotation database of A-to-I RNA editing in AD available at https://ccsm.uth.edu/ADeditome, aiming to provide a resource and reference for functional annotation of A-to-I RNA editing in AD to identify therapeutically targetable genes in an individual. We detected 1676 363 editing sites in 1524 samples across nine brain regions from ROSMAP, MayoRNAseq and MSBB. For these editing events, we performed multiple functional annotations including identification of specific and disease stage associated editing events and the influence of editing events on gene expression, protein recoding, alternative splicing and miRNA regulation for all the genes, especially for AD-related genes in order to explore the pathology of AD. Combing all the analysis results, we found 108 010 and 26 168 editing events which may promote or inhibit AD progression, respectively. We also found 5582 brain region-specific editing events with potentially dual roles in AD across different brain regions. ADeditome will be a unique resource for AD and drug research communities to identify therapeutically targetable editing events. Significance: ADeditome is the first comprehensive resource of the functional genomics of individual A-to-I RNA editing events in AD, which will be useful for many researchers in the fields of AD pathology, precision medicine, and therapeutic researches.",ADeditome,ADeditome,https://ccsm.uth.edu/ADeditome,the annotation database of A-to-I RNA editing in AD +33416858,The iPPI-DB initiative: A Community-centered database of Protein-Protein Interaction modulators.,"

Motivation

One avenue to address the paucity of clinically testable targets is to reinvestigate the druggable genome by tackling complicated types of targets such as Protein-Protein Interactions (PPIs). Given the challenge to target those interfaces with small chemical compounds, it has become clear that learning from successful examples of PPI modulation is a powerful strategy. Freely-accessible databases of PPI modulators that provide the community with tractable chemical and pharmacological data, as well as powerful tools to query them, are therefore essential to stimulate new drug discovery projects on PPI targets.

Results

Here, we present the new version iPPI-DB, our manually curated database of PPI modulators. In this completely redesigned version of the database, we introduce a new web interface relying on crowdsourcing for the maintenance of the database. This interface was created to enable community contributions, whereby external experts can suggest new database entries. Moreover, the data model, the graphical interface, and the tools to query the database have been completely modernized and improved. We added new PPI modulators, new PPI targets, and extended our focus to stabilizers of PPIs as well.

Availability and implementation

The iPPI-DB server is available at https://ippidb.pasteur.fr The source code for this server is available at https://gitlab.pasteur.fr/ippidb/ippidb-web/ and is distributed under GPL licence (http://www.gnu.org/licences/gpl). Queries can be shared through persistent links according to the FAIR data standards. Data can be downloaded from the website as csv files.

Supplementary information

Supplementary data are available at Bioinformatics online.",iPPI-DB,iPPI-DB,https://ippidb.pasteur.fr,A Community-centered database of Protein-Protein Interaction modulators +33436076,"The Dfam community resource of transposable element families, sequence models, and genome annotations.","Dfam is an open access database of repetitive DNA families, sequence models, and genome annotations. The 3.0-3.3 releases of Dfam ( https://dfam.org ) represent an evolution from a proof-of-principle collection of transposable element families in model organisms into a community resource for a broad range of species, and for both curated and uncurated datasets. In addition, releases since Dfam 3.0 provide auxiliary consensus sequence models, transposable element protein alignments, and a formalized classification system to support the growing diversity of organisms represented in the resource. The latest release includes 266,740 new de novo generated transposable element families from 336 species contributed by the EBI. This expansion demonstrates the utility of many of Dfam's new features and provides insight into the long term challenges ahead for improving de novo generated transposable element datasets.",Dfam,Dfam,https://dfam.org,"an open access database of repetitive DNA families, sequence models, and genome annotations" +33459764,SWITCHES: Searchable web interface for topologies of CHEmical switches.,"

‚ÄÇ

Bistable biochemical switches are key motifs in cellular state decisions and long-term storage of cellular 'memory'. There are a few known biological switches that have been well characterized, however these examples are insufficient for systematic surveys of properties of these important systems. Here we present a resource of all possible bistable biochemical reaction networks with up to 6 reactions between 3 molecules, and 3 reactions between 4 molecules. Over 35,000 reaction topologies were constructed by identifying unique combinations of reactions between a fixed number of molecules. Then, these topologies were populated with rates within a biologically realistic range. The Searchable Web Interface for Topologies of CHEmical Switches (SWITCHES, https://switches.ncbs.res.in) provides a bistability and parameter analysis of over 7 million models from this systematic survey of chemical reaction space. This database will be useful for theoreticians interested in analyzing stability in chemical systems and also experimentalists for creating robust synthetic biological switches.

Availability and implementation

Freely available on the web at https://switches.ncbs.res.in. Website implemented in PHP, MariaDB, Graphviz, and Apache, with all major browsers supported.",Searchable web interface for topologies of CHEmical switches,SWITCHES,https://switches.ncbs.res.in,Searchable web interface for topologies of CHEmical switches +33461215,Ligand-based approach for predicting drug targets and for virtual screening against COVID-19.,"Discovering efficient drugs and identifying target proteins are still an unmet but urgent need for curing coronavirus disease 2019 (COVID-19). Protein structure-based docking is a widely applied approach for discovering active compounds against drug targets and for predicting potential targets of active compounds. However, this approach has its inherent deficiency caused by e.g. various different conformations with largely varied binding pockets adopted by proteins, or the lack of true target proteins in the database. This deficiency may result in false negative results. As a complementary approach to the protein structure-based platform for COVID-19, termed as D3Docking in our previous work, we developed in this study a ligand-based method, named D3Similarity, which is based on the molecular similarity evaluation between the submitted molecule(s) and those in an active compound database. The database is constituted by all the reported bioactive molecules against the coronaviruses, viz., severe acute respiratory syndrome coronavirus (SARS), Middle East respiratory syndrome coronavirus (MERS), severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2), human betacoronavirus 2c EMC/2012 (HCoV-EMC), human CoV 229E (HCoV-229E) and feline infectious peritonitis virus (FIPV), some of which have target or mechanism information but some do not. Based on the two-dimensional (2D) and three-dimensional (3D) similarity evaluation of molecular structures, virtual screening and target prediction could be performed according to similarity ranking results. With two examples, we demonstrated the reliability and efficiency of D3Similarity by using 2D‚Äâ√ó‚Äâ3D value as score for drug discovery and target prediction against COVID-19. The database, which will be updated regularly, is available free of charge at https://www.d3pharma.com/D3Targets-2019-nCoV/D3Similarity/index.php.",D3Similarity,D3Similarity,https://www.d3pharma.com/D3Targets-2019-nCoV/D3Similarity/index.php, +33480398,DrugRepV: a compendium of repurposed drugs and chemicals targeting epidemic and pandemic viruses.,"Viruses are responsible for causing various epidemics and pandemics with a high mortality rate e.g. ongoing SARS-CoronaVirus-2 crisis. The discovery of novel antivirals remains a challenge but drug repurposing is emerging as a potential solution to develop antivirals in a cost-effective manner. In this regard, we collated the information of repurposed drugs tested for antiviral activity from literature and presented it in the form of a user-friendly web server named 'DrugRepV'. The database contains 8485 entries (3448 unique) with biological, chemical, clinical and structural information of 23 viruses responsible to cause epidemics/pandemics. The database harbors browse and search options to explore the repurposed drug entries. The data can be explored by some important fields like drugs, viruses, drug targets, clinical trials, assays, etc. For summarizing the data, we provide overall statistics of the repurposed candidates. To make the database more informative, it is hyperlinked to various external repositories like DrugBank, PubChem, NCBI-Taxonomy, Clinicaltrials.gov, World Health Organization and many more. 'DrugRepV' database (https://bioinfo.imtech.res.in/manojk/drugrepv/) would be highly useful to the research community working to develop antivirals.",DrugRepV,DrugRepV,https://bioinfo.imtech.res.in/manojk/drugrepv/,a compendium of repurposed drugs and chemicals targeting epidemic and pandemic viruses +33485793,cgMLST@Taiwan: A web service platform for Vibrio cholerae cgMLST profiling and global strain tracking.,"

Background

Cholera, a rapidly dehydrating diarrheal disease caused by toxigenic Vibrio cholerae, is a leading cause of morbidity and mortality in some regions of the world. Core genome multilocus sequence typing (cgMLST) is a promising approach in generating genetic fingerprints from whole-genome sequencing (WGS) data for strain comparison among laboratories.

Methods

We constructed a V. cholerae core gene allele database using an in-house developed computational pipeline, a database with cgMLST profiles converted from genomic sequences from the National Center for Biotechnology Information, and built a REST-based web accessible via the Internet.

Results

We built a web service platform-cgMLST@Taiwan and installed a V. cholerae allele database, a cgMLST profile database, and computational tools for generating V. cholerae cgMLST profiles (based on 3,017 core genes), performing rapid global strain tracking, and clustering analysis of cgMLST profiles. This web-based platform provides services to researchers, public health microbiologists, and physicians who use WGS data for the investigation of cholera outbreaks and tracking of V. cholerae strain transmission across countries and geographic regions. The cgMLST@Taiwan is accessible at http://rdvd.cdc.gov.tw/cgMLST.",cgMLST@Taiwan,cgMLST@Taiwan,http://rdvd.cdc.gov.tw/cgMLST,A web service platform for Vibrio cholerae cgMLST profiling and global strain tracking +33502607,Missense3D-DB web catalogue: an atom-based analysis and repository of 4M human protein-coding genetic variants.,"The interpretation of human genetic variation is one of the greatest challenges of modern genetics. New approaches are urgently needed to prioritize variants, especially those that are rare or lack a definitive clinical interpretation. We examined 10,136,597 human missense genetic variants from GnomAD, ClinVar and UniProt. We were able to perform large-scale atom-based mapping and phenotype interpretation of 3,960,015 of these variants onto 18,874 experimental and 84,818 in house predicted three-dimensional coordinates of the human proteome. We demonstrate that 14% of amino acid substitutions from the GnomAD database that could be structurally analysed are predicted to affect protein structure (n‚Äâ=‚Äâ568,548, of which 566,439 rare or extremely rare) and may, therefore, have a yet unknown disease-causing effect. The same is true for 19.0% (n‚Äâ=‚Äâ6266) of variants of unknown clinical significance or conflicting interpretation reported in the ClinVar database. The results of the structural analysis are available in the dedicated web catalogue Missense3D-DB ( http://missense3d.bc.ic.ac.uk/ ). For each of the 4 M variants, the results of the structural analysis are presented in a friendly concise format that can be included in clinical genetic reports. A detailed report of the structural analysis is also available for the non-experts in structural biology. Population frequency and predictions from SIFT and PolyPhen are included for a more comprehensive variant interpretation. This is the first large-scale atom-based structural interpretation of human genetic variation and offers geneticists and the biomedical community a new approach to genetic variant interpretation.",Missense3D-DB,Missense3D-DB,http://missense3d.bc.ic.ac.uk,an atom-based analysis and repository of 4M human protein-coding genetic variants +33507270,InSexBase: an annotated genomic resource of sex chromosomes and sex-biased genes in insects.,"Sex determination and the regulation of sexual dimorphism are among the most fascinating topics in modern biology. As the most species-rich group of sexually reproducing organisms on Earth, insects have multiple sex determination systems. Though sex chromosomes and sex-biased genes are well-studied in dozens of insects, their gene sequences are scattered in various databases. Moreover, a shortage of annotation hinders the deep mining of these data. Here, we collected the chromosome-level sex chromosome data of 49 insect species, including 34 X chromosomes, 15 Z chromosomes, 5 W chromosomes and 2 Y chromosomes. We also obtained Y-linked contigs of four insects species-Anopheles gambiae, Drosophila innubila, Drosophila yakuba and Tribolium castaneum. The unannotated chromosome-level sex chromosomes were annotated using a standard pipeline, yielding a total of 123‚Äâ030 protein-coding genes, 2‚Äâ159‚Äâ427 repeat sequences, 894 miRNAs, 1574 rRNAs, 5105 tRNAs, 395 snoRNAs (small nucleolar RNA), 54 snRNAs (small nuclear RNA) and 5959 other ncRNAs (non-coding RNA). In addition, 36‚Äâ781 sex-biased genes were identified by analyzing 62 RNA-seq (RNA sequencing) datasets. Together with 5707 sex-biased genes from the Drosophila genus collected from the Sex-Associated Gene Database, we obtained a total of 42‚Äâ488 sex-biased genes from 13 insect species. All these data were deposited into InSexBase, a new user-friendly database of insect sex chromosomes and sex-biased genes. Database URL: http://www.insect-genome.com/Sexdb/.",InSexBase,InSexBase,http://www.insect-genome.com/Sexdb/,an annotated genomic resource of sex chromosomes and sex-biased genes in insects +33511767,FAWMine: An integrated database and analysis platform for fall armyworm genomics.,"Fall armyworm (Spodoptera frugiperda), a native insect species in the Americas, is rapidly becoming a major agricultural pest worldwide and is causing great damage to corn, rice, soybeans, and other crops. To control this pest, scientists have accumulated a great deal of high-throughput data of fall armyworm, and nine versions of its genomes and transcriptomes have been published. However, easily accessing and performing integrated analysis of these omics data sets is challenging. Here, we developed the Fall Armyworm Genome Database (FAWMine, http://159.226.67.243:8080/fawmine/) to maintain genome sequences, structural and functional annotations, transcriptomes, co-expression, protein interactions, homologs, pathways, and single-nucleotide variations. FAWMine provides a powerful framework that helps users to perform flexible and customized searching, present integrated data sets using diverse visualization methods, output results tables in a range of file formats, analyze candidate gene lists using multiple widgets, and query data available in other InterMine systems. Additionally, stand-alone JBrowse and BLAST services are also established, allowing the users to visualize RNA-Seq data and search genome and annotated gene sequences. Altogether, FAWMine is a useful tool for querying, visualizing, and analyzing compiled data sets rapidly and efficiently. FAWMine will be continually updated to function as a community resource for fall armyworm genomics and pest control research.",Fall Armyworm Genome Database,FAWMine,http://159.226.67.243:8080/fawmine/,An integrated database and analysis platform for fall armyworm genomics +33514395,My personal mutanome: a computational genomic medicine platform for searching network perturbing alleles linking genotype to phenotype.,"Massive genome sequencing data have inspired new challenges in personalized treatments and facilitated oncological drug discovery. We present a comprehensive database, My Personal Mutanome (MPM), for accelerating the development of precision cancer medicine protocols. MPM contains 490,245 mutations from over 10,800 tumor exomes across 33 cancer types in The Cancer Genome Atlas mapped to 94,563 structure-resolved/predicted protein-protein interaction interfaces (""edgetic"") and 311,022 functional sites (""nodetic""), including ligand-protein binding sites and 8 types of protein posttranslational modifications. In total, 8884 survival results and 1,271,132 drug responses are obtained for these mapped interactions. MPM is available at https://mutanome.lerner.ccf.org .",My Personal Mutanome,MPM,https://mutanome.lerner.ccf.org,a computational genomic medicine platform for searching network perturbing alleles linking genotype to phenotype +33515030,HVIDB: a comprehensive database for human-virus protein-protein interactions.,"While leading to millions of people's deaths every year the treatment of viral infectious diseases remains a huge public health challenge.Therefore, an in-depth understanding of human-virus protein-protein interactions (PPIs) as the molecular interface between a virus and its host cell is of paramount importance to obtain new insights into the pathogenesis of viral infections and development of antiviral therapeutic treatments. However, current human-virus PPI database resources are incomplete, lack annotation and usually do not provide the opportunity to computationally predict human-virus PPIs. Here, we present the Human-Virus Interaction DataBase (HVIDB, http://zzdlab.com/hvidb/) that provides comprehensively annotated human-virus PPI data as well as seamlessly integrates online PPI prediction tools. Currently, HVIDB highlights 48 643 experimentally verified human-virus PPIs covering 35 virus families, 6633 virally targeted host complexes, 3572 host dependency/restriction factors as well as 911 experimentally verified/predicted 3D complex structures of human-virus PPIs. Furthermore, our database resource provides tissue-specific expression profiles of 6790 human genes that are targeted by viruses and 129 Gene Expression Omnibus series of differentially expressed genes post-viral infections. Based on these multifaceted and annotated data, our database allows the users to easily obtain reliable information about PPIs of various human viruses and conduct an in-depth analysis of their inherent biological significance. In particular, HVIDB also integrates well-performing machine learning models to predict interactions between the human host and viral proteins that are based on (i) sequence embedding techniques, (ii) interolog mapping and (iii) domain-domain interaction inference. We anticipate that HVIDB will serve as a one-stop knowledge base to further guide hypothesis-driven experimental efforts to investigate human-virus relationships.",Human-Virus Interaction DataBase,HVIDB,http://zzdlab.com/hvidb/,a comprehensive database for human-virus protein-protein interactions +33539887,ADDRESS: A Database of Disease-associated Human Variants Incorporating Protein Structure and Folding Stabilities.,"Numerous human diseases are caused by mutations in genomic sequences. Since amino acid changes affect protein function through mechanisms often predictable from protein structure, the integration of structural and sequence data enables us to estimate with greater accuracy whether and how a given mutation will lead to disease. Publicly available annotated databases enable hypothesis assessment and benchmarking of prediction tools. However, the results are often presented as summary statistics or black box predictors, without providing full descriptive information. We developed a new semi-manually curated human variant database presenting information on the protein contact-map, sequence-to-structure mapping, amino acid identity change, and stability prediction for the popular UniProt database. We found that the profiles of pathogenic and benign missense polymorphisms can be effectively deduced using decision trees and comparative analyses based on the presented dataset. The database is made publicly available through https://zhanglab.ccmb.med.umich.edu/ADDRESS.",ADDRESS,ADDRESS,https://zhanglab.ccmb.med.umich.edu/ADDRESS,A Database of Disease-associated Human Variants Incorporating Protein Structure and Folding Stabilities +33546584,Plant Co-expression Annotation Resource: a web server for identifying targets for genetically modified crop breeding pipelines.,"The development of genetically modified crops (GM) includes the discovery of candidate genes through bioinformatics analysis using genomics data, gene expression, and others. Proteins of unknown function (PUFs) are interesting targets for GM crops breeding pipelines for the novelty associated with such targets and also to avoid copyright protection. One method of inferring the putative function of PUFs is by relating them to factors of interest such as abiotic stresses using orthology and co-expression networks, in a guilt-by-association manner. In this regard, we have downloaded, analyzed, and processed genomics data of 53 angiosperms, totaling 1,862,010 genes and 2,332,974 RNA. Diamond and InterproScan were used to discover 72,266 PUFs for all organisms. RNA-seq datasets related to abiotic stresses were downloaded from NCBI/GEO. The RNA-seq data was used as input to the LSTrAP software to construct co-expression networks. LSTrAP also created clusters of transcripts with correlated expression, whose members are more probably related to the molecular mechanisms associated with abiotic stresses in the plants. Orthologous groups were created (OrhtoMCL) using all 2,332,974 proteins in order to associate PUFs to abiotic stress-related clusters of co-expression and therefore infer their function in a guilt-by-association manner. A freely available web resource named ""Plant Co-expression Annotation Resource"" ( https://www.machado.cnptia.embrapa.br/plantannot ), Plantannot, was created to provide indexed queries to search for PUF putatively associated with abiotic stresses. The web interface also allows browsing, querying, and retrieving of public genomics data from 53 plants. We hope Plantannot to be useful for researchers trying to obtain novel GM crops resistant to climate change hazards.",Plant Co-expression Annotation Resource,Plantannot,https://www.machado.cnptia.embrapa.br/plantannot,a web server for identifying targets for genetically modified crop breeding pipelines +33547946,Web resource on available DNA variant tests for hereditary diseases and genetic predispositions in dogs and cats: An Update.,"Vast progress has been made in the clinical diagnosis and molecular basis of hereditary diseases and genetic predisposition in companion animals. The purpose of this report is to provide an update on the availability of DNA testing for hereditary diseases and genetic predispositions in dogs and cats utilizing the WSAVA-PennGen DNA Testing Database web resource (URL: http://research.vet.upenn.edu/WSAVA-LabSearch ). Information on hereditary diseases, DNA tests, genetic testing laboratories and afflicted breeds added to the web-based WSAVA-PennGen DNA Testing Database was gathered. Following verification through original research and clinical studies, searching various databases on hereditary diseases in dogs and cats, and contacting laboratories offering DNA tests, the data were compared to the resource reported on in 2013. The number of molecularly defined Mendelian inherited diseases and variants in companion animals listed in the WSAVA-PennGen DNA Testing Database in 2020 drastically increased by 112% and 141%, respectively. The number of DNA variant tests offered by each laboratory has also doubled for dogs and cats. While the overall number of laboratories has only slightly increased from 43 to 47, the number of larger corporate laboratories increased, while academic laboratories have declined. In addition, there are now several laboratories that are offering breed-specific or all-breed panel tests rather than single-DNA tests for dogs and cats. This unique regularly updated searchable web-based database allows veterinary clinicians, breeders and pet owners to readily find available DNA tests, laboratories performing these DNA tests worldwide, and canine and feline breeds afflicted and also serves as a valuable resource for comparative geneticists.",WSAVA-PennGen DNA Testing Database,,http://research.vet.upenn.edu/WSAVA-LabSearch,Web resource on available DNA variant tests for hereditary diseases and genetic predispositions in dogs and cats +33553941,SARSCOVIDB-A New Platform for the Analysis of the Molecular Impact of SARS-CoV-2 Viral Infection.,"The COVID-19 pandemic caused by the new coronavirus (SARS-CoV-2) has become a global emergency issue for public health. This threat has led to an acceleration in related research and, consequently, an unprecedented volume of clinical and experimental data that include changes in gene expression resulting from infection. The SARS-CoV-2 infection database (SARSCOVIDB: https://sarscovidb.org/) was created to mitigate the difficulties related to this scenario. The SARSCOVIDB is an online platform that aims to integrate all differential gene expression data, at messenger RNA and protein levels, helping to speed up analysis and research on the molecular impact of COVID-19. The database can be searched from different experimental perspectives and presents all related information from published data, such as viral strains, hosts, methodological approaches (proteomics or transcriptomics), genes/proteins, and samples (clinical or experimental). All information was taken from 24 articles related to analyses of differential gene expression out of 5,554 COVID-19/SARS-CoV-2-related articles published so far. The database features 12,535 genes whose expression has been identified as altered due to SARS-CoV-2 infection. Thus, the SARSCOVIDB is a new resource to support the health workers and the scientific community in understanding the pathogenesis and molecular impact caused by SARS-CoV-2.",SARS-CoV-2 infection database,SARSCOVIDB,https://sarscovidb.org/,A New Platform for the Analysis of the Molecular Impact of SARS-CoV-2 Viral Infection +33581334,OGP: A Repository of Experimentally Characterized O-Glycoproteins to Facilitate Studies on O-Glycosylation.,"Numerous studies on cancer, biopharmaceuticals, and clinical trials have necessitated comprehensive and precise analysis of protein O-glycosylation. However, the lack of updated and convenient databases deters the storage of and reference to emerging O-glycoprotein data. To resolve this issue, an O-glycoprotein repository named OGP was established in this work. It was constructed with a collection of O-glycoprotein data from different sources. OGP contains 9354 O-glycosylation sites and 11,633 site-specific O-glycans mapping to 2133 O-glycoproteins, and it is the largest O-glycoprotein repository thus far. Based on the recorded O-glycosylation sites, an O-glycosylation site prediction tool was developed. Moreover, an OGP-based website is already available (http://www.oglyp.org/). The website comprises four specially designed and user-friendly modules: statistical analysis, database search, site prediction, and data submission. The first version of OGP repository and the website allow users to obtain various O-glycoprotein-related information, such as protein accession numbers, O-glycosylation sites, glycopeptide sequences, site-specific glycan structures, experimental methods, and potential O-glycosylation sites. O-glycosylation data mining can be performed efficiently on this website, which will greatly facilitate related studies. In addition, the database is accessible from OGP website (http://www.oglyp.org/download.php).",OGP,OGP,http://www.oglyp.org/,A Repository of Experimentally Characterized O-Glycoproteins to Facilitate Studies on O-Glycosylation +33594411,"Viral Host Range database, an online tool for recording, analyzing and disseminating virus-host interactions.","

Motivation

Viruses are ubiquitous in the living world, and their ability to infect more than one host defines their host range. However, information about which virus infects which host, and about which host is infected by which virus, is not readily available.

Results

We developed a web-based tool called the Viral Host Range database to record, analyze and disseminate experimental host range data for viruses infecting archaea, bacteria and eukaryotes.

Availability

The ViralHostRangeDB application is available from https://viralhostrangedb.pasteur.cloud. Its source code is freely available from the Gitlab hub of Institut Pasteur (https://gitlab.pasteur.fr/hub/viralhostrangedb).",Viral Host Range database,ViralHostRangeDB,https://viralhostrangedb.pasteur.cloud,"an online tool for recording, analyzing and disseminating virus-host interactions" +33599248,bc-GenExMiner 4.5: new mining module computes breast cancer differential gene expression analyses.,"'Breast cancer gene-expression miner' (bc-GenExMiner) is a breast cancer-associated web portal (http://bcgenex.ico.unicancer.fr). Here, we describe the development of a new statistical mining module, which permits several differential gene expression analyses, i.e. 'Expression' module. Sixty-two breast cancer cohorts and one healthy breast cohort with their corresponding clinicopathological information are included in bc-GenExMiner v4.5 version. Analyses are based on microarray or RNAseq transcriptomic data. Thirty-nine differential gene expression analyses, grouped into 13 categories, according to clinicopathological and molecular characteristics ('Targeted' and 'Exhaustive') and gene expression ('Customized'), have been developed. Output results are visualized in four forms of plots. This new statistical mining module offers, among other things, the possibility to compare gene expression in healthy (cancer-free), tumour-adjacent and tumour tissues at once and in three triple-negative breast cancer subtypes (i.e. C1: molecular apocrine tumours; C2: basal-like tumours infiltrated by immune suppressive cells and C3: basal-like tumours triggering an ineffective immune response). Several validation tests showed that bioinformatics process did not alter the pathobiological information contained in the source data. In this work, we developed and demonstrated that bc-GenExMiner 'Expression' module can be used for exploratory and validation purposes. Database URL: http://bcgenex.ico.unicancer.fr.",Breast cancer gene-expression miner,bc-GenExMiner,http://bcgenex.ico.unicancer.fr, +33600011,MutSpliceDB: A database of splice sites variants with RNA-seq based evidence on effects on splicing.,"Splice site variants may lead to transcript alterations, causing exons inclusion, exclusion, truncation, or intron retention. Interpreting the consequences of a specific splice site variant is not straightforward, especially if the variant is located outside of the canonical splice sites. We developed MutSpliceDB: https://brb.nci.nih.gov/splicing, a public resource to facilitate the interpretation of splice sites variants effects on splicing based on manually reviewed RNA-seq BAM files from samples with splice site variants.",MutSpliceDB,MutSpliceDB,https://brb.nci.nih.gov/splicing,A database of splice sites variants with RNA-seq based evidence on effects on splicing +33643383,Abiotic Stress-Responsive miRNA and Transcription Factor-Mediated Gene Regulatory Network in Oryza sativa: Construction and Structural Measure Study.,"Climate changes and environmental stresses have a consequential association with crop plant growth and yield, meaning it is necessary to cultivate crops that have tolerance toward the changing climate and environmental disturbances such as water stress, temperature fluctuation, and salt toxicity. Recent studies have shown that trans-acting regulatory elements, including microRNAs (miRNAs) and transcription factors (TFs), are emerging as promising tools for engineering naive improved crop varieties with tolerance for multiple environmental stresses and enhanced quality as well as yield. However, the interwoven complex regulatory function of TFs and miRNAs at transcriptional and post-transcriptional levels is unexplored in Oryza sativa. To this end, we have constructed a multiple abiotic stress responsive TF-miRNA-gene regulatory network for O. sativa using a transcriptome and degradome sequencing data meta-analysis approach. The theoretical network approach has shown the networks to be dense, scale-free, and small-world, which makes the network stable. They are also invariant to scale change where an efficient, quick transmission of biological signals occurs within the network on extrinsic hindrance. The analysis also deciphered the existence of communities (cluster of TF, miRNA, and genes) working together to help plants in acclimatizing to multiple stresses. It highlighted that genes, TFs, and miRNAs shared by multiple stress conditions that work as hubs or bottlenecks for signal propagation, for example, during the interaction between stress-responsive genes (TFs/miRNAs/other genes) and genes involved in floral development pathways under multiple environmental stresses. This study further highlights how the fine-tuning feedback mechanism works for balancing stress tolerance and how timely flowering enable crops to survive in adverse conditions. This study developed the abiotic stress-responsive regulatory network, APRegNet database (http://lms.snu.edu.in/APRegNet), which may help researchers studying the roles of miRNAs and TFs. Furthermore, it advances current understanding of multiple abiotic stress tolerance mechanisms.",APRegNet database,APRegNet,http://lms.snu.edu.in/APRegNet, +33683565,"Health and longevity studies in C. elegans: the ""healthy worm database"" reveals strengths, weaknesses and gaps of test compound-based studies.","Several biogerontology databases exist that focus on genetic or gene expression data linked to health as well as survival, subsequent to compound treatments or genetic manipulations in animal models. However, none of these has yet collected experimental results of compound-related health changes. Since quality of life is often regarded as more valuable than length of life, we aim to fill this gap with the ""Healthy Worm Database"" ( http://healthy-worm-database.eu ). Literature describing health-related compound studies in the aging model Caenorhabditis elegans was screened, and data for 440 compounds collected. The database considers 189 publications describing 89 different phenotypes measured in 2995 different conditions. Besides enabling a targeted search for promising compounds for further investigations, this database also offers insights into the research field of studies on healthy aging based on a frequently used model organism. Some weaknesses of C. elegans-based aging studies, like underrepresented phenotypes, especially concerning cognitive functions, as well as the convenience-based use of young worms as the starting point for compound treatment or phenotype measurement are discussed. In conclusion, the database provides an anchor for the search for compounds affecting health, with a link to public databases, and it further highlights some potential shortcomings in current aging research.",Healthy Worm Database,,http://healthy-worm-database.eu, +33685383,lncRNADetector: a bioinformatics pipeline for long non-coding RNA identification and MAPslnc: a repository of medicinal and aromatic plant lncRNAs.,"Long non-coding RNAs (lncRNAs) are an emerging class of non-coding RNAs and potent regulatory elements in the living cells. High throughput RNA sequencing analyses have generated a tremendous amount of transcript sequence data. A large proportion of these transcript sequences does not code for proteins and are known as non-coding RNAs. Among them, lncRNAs are a unique class of transcripts longer than 200 nucleotides with diverse biological functions and regulatory mechanisms. Recent emerging studies and next-generation sequencing technologies show a substantial amount of lncRNAs within the plant genome, which are yet to be identified. The computational identification of lncRNAs from these transcripts is a challenging task due to the involvement of a series of filtering steps. We have developed lncRNADetector, a bioinformatics pipeline for the identification of novel lncRNAs, especially from medicinal and aromatic plant (MAP) species. The lncRNADetector has been utilized to analyse and identify more than 88,459 lncRNAs from 21 species of MAPs. To provide a knowledge resource for the plant research community towards elucidating the diversity of biological roles of lncRNAs, the information generated about MAP lncRNAs (post-filtering steps) through lncRNADetector has been stored and organized in MAPslnc database (MAPslnc, https://lncrnapipe.cimap.res.in). The lncRNADetector web server and MAPslnc database have been developed in order to facilitate researchers for accurate identification of lncRNAs from the next-generation sequencing data of different organisms for downstream studies. To the best of our knowledge no such MAPslnc database is available till date.",MAPslnc,MAPslnc,https://lncrnapipe.cimap.res.in,a repository of medicinal and aromatic plant lncRNAs +33685493,riboCIRC: a comprehensive database of translatable circRNAs.,"riboCIRC is a translatome data-oriented circRNA database specifically designed for hosting, exploring, analyzing, and visualizing translatable circRNAs from multi-species. The database provides a comprehensive repository of computationally predicted ribosome-associated circRNAs; a manually curated collection of experimentally verified translated circRNAs; an evaluation of cross-species conservation of translatable circRNAs; a systematic de novo annotation of putative circRNA-encoded peptides, including sequence, structure, and function; and a genome browser to visualize the context-specific occupant footprints of circRNAs. It represents a valuable resource for the circRNA research community and is publicly available at http://www.ribocirc.com .",riboCIRC,riboCIRC,http://www.ribocirc.com,a comprehensive database of translatable circRNAs +33705530,Development of a biomarker database toward performing disease classification and finding disease interrelations.,"A biomarker is a measurable indicator of a disease or abnormal state of a body that plays an important role in disease diagnosis, prognosis and treatment. The biomarker has become a significant topic due to its versatile usage in the medical field and in rapid detection of the presence or severity of some diseases. The volume of biomarker data is rapidly increasing and the identified data are scattered. To provide comprehensive information, the explosively growing data need to be recorded in a single platform. There is no open-source freely available comprehensive online biomarker database. To fulfill this purpose, we have developed a human biomarker database as part of the KNApSAcK family databases which contain a vast quantity of information on the relationships between biomarkers and diseases. We have classified the diseases into 18 disease classes, mostly according to the National Center for Biotechnology Information definitions. Apart from this database development, we also have performed disease classification by separately using protein and metabolite biomarkers based on the network clustering algorithm DPClusO and hierarchical clustering. Finally, we reached a conclusion about the relationships among the disease classes. The human biomarker database can be accessed online and the inter-disease relationships may be helpful in understanding the molecular mechanisms of diseases. To our knowledge, this is one of the first approaches to classify diseases based on biomarkers. Database URL: http://www.knapsackfamily.com/Biomarker/top.php.",human biomarker database,,http://www.knapsackfamily.com/Biomarker/top.php, +33735949,OverCOVID: an integrative web portal for SARS-CoV-2 bioinformatics resources.,"Outbreaks of COVID-19 caused by the novel coronavirus SARS-CoV-2 is still a threat to global human health. In order to understand the biology of SARS-CoV-2 and developing drug against COVID-19, a vast amount of genomic, proteomic, interatomic, and clinical data is being generated, and the bioinformatics researchers produced databases, webservers and tools to gather those publicly available data and provide an opportunity of analyzing such data. However, these bioinformatics resources are scattered and researchers need to find them from different resources discretely. To facilitate researchers in finding the resources in one frame, we have developed an integrated web portal called OverCOVID (http://bis.zju.edu.cn/overcovid/). The publicly available webservers, databases and tools associated with SARS-CoV-2 have been incorporated in the resource page. In addition, a network view of the resources is provided to display the scope of the research. Other information like SARS-CoV-2 strains is visualized and various layers of interaction resources is listed in distinct pages of the web portal. As an integrative web portal, the OverCOVID will help the scientist to search the resources and accelerate the clinical research of SARS-CoV-2.",OverCOVID,OverCOVID,http://bis.zju.edu.cn/overcovid/,an integrative web portal for SARS-CoV-2 bioinformatics resources +33749993,Integrated intra- and intercellular signaling knowledge for multicellular omics analysis.,"Molecular knowledge of biological processes is a cornerstone in omics data analysis. Applied to single-cell data, such analyses provide mechanistic insights into individual cells and their interactions. However, knowledge of intercellular communication is scarce, scattered across resources, and not linked to intracellular processes. To address this gap, we combined over 100 resources covering interactions and roles of proteins in inter- and intracellular signaling, as well as transcriptional and post-transcriptional regulation. We added protein complex information and annotations on function, localization, and role in diseases for each protein. The resource is available for human, and via homology translation for mouse and rat. The data are accessible via OmniPath's web service (https://omnipathdb.org/), a Cytoscape plug-in, and packages in R/Bioconductor and Python, providing access options for computational and experimental scientists. We created workflows with tutorials to facilitate the analysis of cell-cell interactions and affected downstream intracellular signaling processes. OmniPath provides a single access point to knowledge spanning intra- and intercellular processes for data analysis, as we demonstrate in applications studying SARS-CoV-2 infection and ulcerative colitis.",OmniPath,OmniPath,https://omnipathdb.org/,a single access point to knowledge spanning intra- and intercellular processes for data analysis +33757430,ATAV: a comprehensive platform for population-scale genomic analyses.,"

Background

A common approach for sequencing studies is to do joint-calling and store variants of all samples in a single file. If new samples are continually added or controls are re-used for several studies, the cost and time required to perform joint-calling for each analysis can become prohibitive.

Results

We present ATAV, an analysis platform for large-scale whole-exome and whole-genome sequencing projects. ATAV stores variant and per site coverage data for all samples in a centralized database, which is efficiently queried by ATAV to support diagnostic analyses for trios and singletons, as well as rare-variant collapsing analyses for finding disease associations in complex diseases. Runtime logs ensure full reproducibility and the modularized ATAV framework makes it extensible to continuous development. Besides helping with the identification of disease-causing variants for a range of diseases, ATAV has also enabled the discovery of disease-genes by rare-variant collapsing on datasets containing more than 20,000 samples. Analyses to date have been performed on data of more than 110,000 individuals demonstrating the scalability of the framework. To allow users to easily access variant-level data directly from the database, we provide a web-based interface, the ATAV data browser ( http://atavdb.org/ ). Through this browser, summary-level data for more than 40,000 samples can be queried by the general public representing a mix of cases and controls of diverse ancestries. Users have access to phenotype categories of variant carriers, as well as predicted ancestry, gender, and quality metrics. In contrast to many other platforms, the data browser is able to show data of newly-added samples in real-time and therefore evolves rapidly as more and more samples are sequenced.

Conclusions

Through ATAV, users have public access to one of the largest variant databases for patients sequenced at a tertiary care center and can look up any genes or variants of interest. Additionally, since the entire code is freely available on GitHub, ATAV can easily be deployed by other groups that wish to build their own platform, database, and user interface.",ATAV,ATAV,http://atavdb.org,a comprehensive platform for population-scale genomic analyses +33769951,A Comprehensive Map of mRNAs and Their Isoforms across All 14 Renal Tubule Segments of Mouse.,"

Background

The repertoire of protein expression along the renal tubule depends both on regulation of transcription and regulation of alternative splicing that can generate multiple proteins from a single gene.

Methods

A full-length, small-sample RNA-seq protocol profiled transcriptomes for all 14 renal tubule segments microdissected from mouse kidneys.

Results

This study identified >34,000 transcripts, including 3709 that were expressed in a segment-specific manner. All data are provided as an online resource (https://esbl.nhlbi.nih.gov/MRECA/Nephron/). Many of the genes expressed in unique patterns along the renal tubule were solute carriers, transcription factors, or G protein-coupled receptors that account for segment-specific function. Mapping the distribution of transcripts associated with Wnk-SPAK-PKA signaling, renin-angiotensin-aldosterone signaling, and cystic diseases of the kidney illustrated the applications of the online resource. The method allowed full-length mapping of RNA-seq reads, which facilitated comprehensive, unbiased characterization of alternative exon usage along the renal tubule, including known isoforms of Cldn10, Kcnj1 (ROMK), Slc12a1 (NKCC2), Wnk1, Stk39 (SPAK), and Slc14a2 (UT-A urea transporter). It also identified many novel isoforms with segment-specific distribution. These included variants associated with altered protein structure (Slc9a8, Khk, Tsc22d1, and Scoc), and variants that may affect untranslated, regulatory regions of transcripts (Pth1r, Pkar1a, and Dab2).

Conclusions

Full-length, unbiased sequencing of transcripts identified gene-expression patterns along the mouse renal tubule. The data, provided as an online resource, include both quantitative and qualitative differences in transcripts. Identification of alternative splicing along the renal tubule may prove critical to understanding renal physiology and pathophysiology.",,,https://esbl.nhlbi.nih.gov/MRECA/Nephron/,A Comprehensive Map of mRNAs and Their Isoforms across All 14 Renal Tubule Segments of Mouse +33772585,An immunologically friendly classification of non-peptidic ligands.,"The Immune Epitope Database (IEDB) freely provides experimental data regarding immune epitopes to the scientific public. The main users of the IEDB are immunologists who can easily use our web interface to search for peptidic epitopes via their simple single-letter codes. For example, 'A' stands for 'alanine'. Similarly, users can easily navigate the IEDB's simplified NCBI taxonomy hierarchy to locate proteins from specific organisms. However, some epitopes are non-peptidic, such as carbohydrates, lipids, chemicals and drugs, and it is more challenging to consistently name them and search upon, making access to their data more problematic for immunologists. Therefore, we set out to improve access to non-peptidic epitope data in the IEDB through the simplification of the non-peptidic hierarchy used in our search interfaces. Here, we present these efforts and their outcomes. Database URL: http://www.iedb.org/.",Immune Epitope Database,IEDB,http://www.iedb.org/, +33776770,An Open Access Database of Licensed Cancer Drugs.,"A global, comprehensive and open access listing of approved anticancer drugs does not currently exist. Partial information is available from multiple sources, including regulatory authorities, national formularies and scientific agencies. Many such data sources include drugs used in oncology for supportive care, diagnostic or other non-antineoplastic uses. We describe a methodology to combine and cleanse relevant data from multiple sources to produce an open access database of drugs licensed specifically for therapeutic antineoplastic purposes. The resulting list is provided as an open access database, (http://www.redo-project.org/cancer-drugs-db/), so that it may be used by researchers as input for further research projects, for example literature-based text mining for drug repurposing.",,,http://www.redo-project.org/cancer-drugs-db/,An Open Access Database of Licensed Cancer Drugs +33780471,MCPdb: The bacterial microcompartment database.,"Bacterial microcompartments are organelle-like structures composed entirely of proteins. They have evolved to carry out several distinct and specialized metabolic functions in a wide variety of bacteria. Their outer shell is constructed from thousands of tessellating protein subunits, encapsulating enzymes that carry out the internal metabolic reactions. The shell proteins are varied, with single, tandem and permuted versions of the PF00936 protein family domain comprising the primary structural component of their polyhedral architecture, which is reminiscent of a viral capsid. While considerable amounts of structural and biophysical data have been generated in the last 15 years, the existing functionalities of current resources have limited our ability to rapidly understand the functional and structural properties of microcompartments (MCPs) and their diversity. In order to make the remarkable structural features of bacterial microcompartments accessible to a broad community of scientists and non-specialists, we developed MCPdb: The Bacterial Microcompartment Database (https://mcpdb.mbi.ucla.edu/). MCPdb is a comprehensive resource that categorizes and organizes known microcompartment protein structures and their larger assemblies. To emphasize the critical roles symmetric assembly and architecture play in microcompartment function, each structure in the MCPdb is validated and annotated with respect to: (1) its predicted natural assembly state (2) tertiary structure and topology and (3) the metabolic compartment type from which it derives. The current database includes 163 structures and is available to the public with the anticipation that it will serve as a growing resource for scientists interested in understanding protein-based metabolic organelles in bacteria.",bacterial microcompartment database,MCPdb,https://mcpdb.mbi.ucla.edu/,a comprehensive resource that categorizes and organizes known microcompartment protein structures and their larger assemblies +33784373,Bioinformatics tools developed to support BioCompute Objects.,"Developments in high-throughput sequencing (HTS) result in an exponential increase in the amount of data generated by sequencing experiments, an increase in the complexity of bioinformatics analysis reporting and an increase in the types of data generated. These increases in volume, diversity and complexity of the data generated and their analysis expose the necessity of a structured and standardized reporting template. BioCompute Objects (BCOs) provide the requisite support for communication of HTS data analysis that includes support for workflow, as well as data, curation, accessibility and reproducibility of communication. BCOs standardize how researchers report provenance and the established verification and validation protocols used in workflows while also being robust enough to convey content integration or curation in knowledge bases. BCOs that encapsulate tools, platforms, datasets and workflows are FAIR (findable, accessible, interoperable and reusable) compliant. Providing operational workflow and data information facilitates interoperability between platforms and incorporation of future dataset within an HTS analysis for use within industrial, academic and regulatory settings. Cloud-based platforms, including High-performance Integrated Virtual Environment (HIVE), Cancer Genomics Cloud (CGC) and Galaxy, support BCO generation for users. Given the 100K+ userbase between these platforms, BioCompute can be leveraged for workflow documentation. In this paper, we report the availability of platform-dependent and platform-independent BCO tools: HIVE BCO App, CGC BCO App, Galaxy BCO API Extension and BCO Portal. Community engagement was utilized to evaluate tool efficacy. We demonstrate that these tools further advance BCO creation from text editing approaches used in earlier releases of the standard. Moreover, we demonstrate that integrating BCO generation within existing analysis platforms greatly streamlines BCO creation while capturing granular workflow details. We also demonstrate that the BCO tools described in the paper provide an approach to solve the long-standing challenge of standardizing workflow descriptions that are both human and machine readable while accommodating manual and automated curation with evidence tagging. Database URL: https://www.biocomputeobject.org/resources.",,,https://www.biocomputeobject.org/resources, +33813885,MolluscDB: a genome and transcriptome database for molluscs.,"As sequencing becomes more accessible and affordable, the analysis of genomic and transcriptomic data has become a cornerstone of many research initiatives. Communities with a focus on particular taxa or ecosystems need solutions capable of aggregating genomic resources and serving them in a standardized and analysis-friendly manner. Taxon-focussed resources can be more flexible in addressing the needs of a research community than can universal or general databases. Here, we present MolluscDB, a genome and transcriptome database for molluscs. MolluscDB offers a rich ecosystem of tools, including an Ensembl browser, a BLAST server for homology searches and an HTTP server from which any dataset present in the database can be downloaded. To demonstrate the utility of the database and verify the quality of its data, we imported data from assembled genomes and transcriptomes of 22 species, estimated the phylogeny of Mollusca using single-copy orthologues, explored patterns of gene family size change and interrogated the data for biomineralization-associated enzymes and shell matrix proteins. MolluscDB provides an easy-to-use and openly accessible data resource for the research community. This article is part of the Theo Murphy meeting issue 'Molluscan genomics: broad insights and future directions for a neglected phylum'.",MolluscDB,MolluscDB,,a genome and transcriptome database for molluscs +33843105,Sequence and evolutionary analysis of bacterial ribosomal S1 proteins.,"The multi-domain bacterial S1 protein is the largest and most functionally important ribosomal protein of the 30S subunit, which interacts with both mRNA and proteins. The family of ribosomal S1 proteins differs in the classical sense from a protein with tandem repeats and has a ""bead-on-string"" organization, where each repeat is folded into a globular domain. Based on our recent data, the study of evolutionary relationships for the bacterial phyla will provide evidence for one of the proposed theories of the evolutionary development of proteins with structural repeats: from multiple repeats of assembles to single repeats, or vice versa. In this comparative analysis of 1333 S1 sequences that were identified in 24 different phyla, we demonstrate how such phyla can form independently/dependently during evolution. To the best of our knowledge, this work is the first study of the evolutionary history of bacterial ribosomal S1 proteins. The collected and structured data can be useful to computer biologists as a resource for determining percent identity, amino acid composition and logo motifs, as well as dN/dS ratio in bacterial S1 protein. The obtained research data indicate that the evolutionary development of bacterial ribosomal S1 proteins evolved from multiple assemblies to single repeat. The presented data are integrated into the server, which can be accessed at http://oka.protres.ru:4200.",,,http://oka.protres.ru:4200, +33849075,iNetModels 2.0: an interactive visualization and database of multi-omics data.,"It is essential to reveal the associations between various omics data for a comprehensive understanding of the altered biological process in human wellness and disease. To date, very few studies have focused on collecting and exhibiting multi-omics associations in a single database. Here, we present iNetModels, an interactive database and visualization platform of Multi-Omics Biological Networks (MOBNs). This platform describes the associations between the clinical chemistry, anthropometric parameters, plasma proteomics, plasma metabolomics, as well as metagenomics for oral and gut microbiome obtained from the same individuals. Moreover, iNetModels includes tissue- and cancer-specific Gene Co-expression Networks (GCNs) for exploring the connections between the specific genes. This platform allows the user to interactively explore a single feature's association with other omics data and customize its particular context (e.g. male/female specific). The users can also register their data for sharing and visualization of the MOBNs and GCNs. Moreover, iNetModels allows users who do not have a bioinformatics background to facilitate human wellness and disease research. iNetModels can be accessed freely at https://inetmodels.com without any limitation.",iNetModels 2.0,iNetModels,https://inetmodels.com,an interactive visualization and database of multi-omics data +33849445,TANTIGEN 2.0: a knowledge base of tumor T cell antigens and epitopes.,"We previously developed TANTIGEN, a comprehensive online database cataloging more than 1000 T cell epitopes and HLA ligands from 292 tumor antigens. In TANTIGEN 2.0, we significantly expanded coverage in both immune response targets (T cell epitopes and HLA ligands) and tumor antigens. It catalogs 4,296 antigen variants from 403 unique tumor antigens and more than 1500 T cell epitopes and HLA ligands. We also included neoantigens, a class of tumor antigens generated through mutations resulting in new amino acid sequences in tumor antigens. TANTIGEN 2.0 contains validated TCR sequences specific for cognate T cell epitopes and tumor antigen gene/mRNA/protein expression information in major human cancers extracted by Human Pathology Atlas. TANTIGEN 2.0 is a rich data resource for tumor antigens and their associated epitopes and neoepitopes. It hosts a set of tailored data analytics tools tightly integrated with the data to form meaningful analysis workflows. It is freely available at http://projects.met-hilab.org/tadb .",TANTIGEN 2.0,TANTIGEN,http://projects.met-hilab.org/tadb,a knowledge base of tumor T cell antigens and epitopes +33858332,Predicting tumor response to drugs based on gene-expression biomarkers of sensitivity learned from cancer cell lines.,"

Background

Human cancer cell line profiling and drug sensitivity studies provide valuable information about the therapeutic potential of drugs and their possible mechanisms of action. The goal of those studies is to translate the findings from in vitro studies of cancer cell lines into in vivo therapeutic relevance and, eventually, patients' care. Tremendous progress has been made.

Results

In this work, we built predictive models for 453 drugs using data on gene expression and drug sensitivity (IC50) from cancer cell lines. We identified many known drug-gene interactions and uncovered several potentially novel drug-gene associations. Importantly, we further applied these predictive models to ~‚Äâ17,000 bulk RNA-seq samples from The Cancer Genome Atlas (TCGA) and the Genotype-Tissue Expression (GTEx) database to predict drug sensitivity for both normal and tumor tissues. We created a web site for users to visualize and download our predicted data ( https://manticore.niehs.nih.gov/cancerRxTissue ). Using trametinib as an example, we showed that our approach can faithfully recapitulate the known tumor specificity of the drug.

Conclusions

We demonstrated that our approach can predict drugs that 1) are tumor-type specific; 2) elicit higher sensitivity from tumor compared to corresponding normal tissue; 3) elicit differential sensitivity across breast cancer subtypes. If validated, our prediction could have relevance for preclinical drug testing and in phase I clinical design.",,,https://manticore.niehs.nih.gov/cancerRxTissue, +33858848,HLA Ligand Atlas: a benign reference of HLA-presented peptides to improve T-cell-based cancer immunotherapy.,"

Background

The human leucocyte antigen (HLA) complex controls adaptive immunity by presenting defined fractions of the intracellular and extracellular protein content to immune cells. Understanding the benign HLA ligand repertoire is a prerequisite to define safe T-cell-based immunotherapies against cancer. Due to the poor availability of benign tissues, if available, normal tissue adjacent to the tumor has been used as a benign surrogate when defining tumor-associated antigens. However, this comparison has proven to be insufficient and even resulted in lethal outcomes. In order to match the tumor immunopeptidome with an equivalent counterpart, we created the HLA Ligand Atlas, the first extensive collection of paired HLA-I and HLA-II immunopeptidomes from 227 benign human tissue samples. This dataset facilitates a balanced comparison between tumor and benign tissues on HLA ligand level.

Methods

Human tissue samples were obtained from 16 subjects at autopsy, five thymus samples and two ovary samples originating from living donors. HLA ligands were isolated via immunoaffinity purification and analyzed in over 1200 liquid chromatography mass spectrometry runs. Experimentally and computationally reproducible protocols were employed for data acquisition and processing.

Results

The initial release covers 51 HLA-I and 86 HLA-II allotypes presenting 90,428 HLA-I- and 142,625 HLA-II ligands. The HLA allotypes are representative for the world population. We observe that immunopeptidomes differ considerably between tissues and individuals on source protein and HLA-ligand level. Moreover, we discover 1407 HLA-I ligands from non-canonical genomic regions. Such peptides were previously described in tumors, peripheral blood mononuclear cells (PBMCs), healthy lung tissues and cell lines. In a case study in glioblastoma, we show that potential on-target off-tumor adverse events in immunotherapy can be avoided by comparing tumor immunopeptidomes to the provided multi-tissue reference.

Conclusion

Given that T-cell-based immunotherapies, such as CAR-T cells, affinity-enhanced T cell transfer, cancer vaccines and immune checkpoint inhibition, have significant side effects, the HLA Ligand Atlas is the first step toward defining tumor-associated targets with an improved safety profile. The resource provides insights into basic and applied immune-associated questions in the context of cancer immunotherapy, infection, transplantation, allergy and autoimmunity. It is publicly available and can be browsed in an easy-to-use web interface at https://hla-ligand-atlas.org .",HLA Ligand Atlas,HLA Ligand Atlas,https://hla-ligand-atlas.org,a benign reference of HLA-presented peptides to improve T-cell-based cancer immunotherapy +33876217,CanDriS: posterior profiling of cancer-driving sites based on two-component evolutionary model.,"Current cancer genomics databases have accumulated millions of somatic mutations that remain to be further explored. Due to the over-excess mutations unrelated to cancer, the great challenge is to identify somatic mutations that are cancer-driven. Under the notion that carcinogenesis is a form of somatic-cell evolution, we developed a two-component mixture model: while the ground component corresponds to passenger mutations, the rapidly evolving component corresponds to driver mutations. Then, we implemented an empirical Bayesian procedure to calculate the posterior probability of a site being cancer-driven. Based on these, we developed a software CanDriS (Cancer Driver Sites) to profile the potential cancer-driving sites for thousands of tumor samples from the Cancer Genome Atlas and International Cancer Genome Consortium across tumor types and pan-cancer level. As a result, we identified that approximately 1% of the sites have posterior probabilities larger than 0.90 and listed potential cancer-wide and cancer-specific driver mutations. By comprehensively profiling all potential cancer-driving sites, CanDriS greatly enhances our ability to refine our knowledge of the genetic basis of cancer and might guide clinical medication in the upcoming era of precision medicine. The results were displayed in a database CandrisDB (http://biopharm.zju.edu.cn/candrisdb/).",CandrisDB,CandrisDB,http://biopharm.zju.edu.cn/candrisdb/,posterior profiling of cancer-driving sites based on two-component evolutionary model +33882119,BC-TFdb: a database of transcription factor drivers in breast cancer.,"Transcription factors (TFs) are DNA-binding proteins, which regulate many essential biological functions. In several cancer types, TF function is altered by various direct mechanisms, including gene amplification or deletion, point mutations, chromosomal translocations, expression alterations, as well as indirectly by non-coding DNA mutations influencing the binding of the TF. TFs are also actively involved in breast cancer (BC) initiation and progression. Herein, we have developed an open-access database, BC-TFdb (Breast Cancer Transcription Factors database), of curated, non-redundant TF involved in BC. The database provides BC driver TFs related information including genomic sequences, proteomic sequences, structural data, pathway information, mutations information, DNA binding residues, survival and therapeutic resources. The database will be a useful platform for researchers to obtain BC-related TF-specific information. High-quality datasets are downloadable for users to evaluate and develop computational methods for drug designing against BC. Database URL: https://www.dqweilab-sjtu.com/index.php.",Breast Cancer Transcription Factors database,BC-TFdb,https://www.dqweilab-sjtu.com/index.php,a database of transcription factor drivers in breast cancer +33898816,FermFooDb: A database of bioactive peptides derived from fermented foods.,"Globally fermented foods are in demands due to their functional and nutritional benefits. These foods are sources of probiotic organisms and bioactive peptides, various amino acids, enzymes etc. that provides numerous health benefits. FermFooDb (https://webs.iiitd.edu.in/raghava/fermfoodb/) is a manually curated database of bioactive peptides derived from wide range of foods that maintain comprehensive information about peptides and process of fermentation. This database comprises of 2205 entries with following major fields, peptide sequence, Mass and IC50, food source, functional activity, fermentation conditions, starter culture, testing conditions of sequences in vitro or in vivo, type of model and method of analysis. The bioactive peptides in our database have wide range of therapeutic potentials that includes antihypertensive, ACE-inhibitory, antioxidant, antimicrobial, immunomodulatory and cholesterol lowering peptides. These bioactive peptides were derived from different types of fermented foods that include milk, cheese, yogurt, wheat and rice. Numerous, web-based tools have been integrated to retrieve data, peptide mapping of proteins, similarity search and multiple-sequence alignment. This database will be useful for the food industry and researchers to explore full therapeutic potential of fermented foods from specific cultures.",FermFooDb,FermFooDb,https://webs.iiitd.edu.in/raghava/fermfoodb/,A database of bioactive peptides derived from fermented foods +33905618,The PhenX Toolkit: Establishing Standard Measures for COVID-19 Research.,"The PhenX (consensus measures for Phenotypes and eXposures) Toolkit (https://www.phenxtoolkit.org/) is a publicly available, web-based catalog of recommended, well-established measurement protocols of phenotypes and exposures. The goal of PhenX is to facilitate the use of standard measures, enhance data interoperability, and promote collaborative and translational research. PhenX is driven by the scientific community and historically has depended on working groups of experts to recommend measures for release in the PhenX Toolkit. The urgent need for recommended, standard measures for COVID-19 research triggered the development of a ""rapid release"" process for releasing new content in the PhenX Toolkit. Initially, PhenX collaborated with the National Institutes of Health (NIH) Office of Behavioral and Social Sciences Research, the National Human Genome Research Institute, and the NIH Disaster Research Response (DR2) program to create a library of COVID-19 measurement protocols. With additional support from NIH, PhenX adapted crowdsourcing techniques to accelerate prioritization and recommendation of protocols for release in the PhenX Toolkit. Prioritized COVID-19-specific protocols were used to anchor and define specialty collections of protocols that were subject to review and approval by the PhenX Steering Committee. In addition to the COVID-19-specific protocols, the specialty collections include existing, well-established PhenX protocols, use of which will further enhance data interoperability and cross-study analysis. The COVID-19 specialty collections are Behaviors and Risks; Ethnicity, Race and Demographics; History, Treatment and Outcomes; Information Resources; Psychosocial and Mental Health; and Socioeconomic. The development and usage of PhenX COVID-19 specialty collections are described in this article. © 2021 The Authors. Basic Protocol: Selecting COVID-19 protocols.",PhenX Toolkit,,https://www.phenxtoolkit.org/,"a publicly available, web-based catalog of recommended, well-established measurement protocols of phenotypes and exposures" +33906563,M6ADD: a comprehensive database of m6A modifications in diseases.,"N6-methyladenosine (m6A) modification is an important regulatory factor affecting diseases, including multiple cancers and it is a developing direction for targeted disease therapy. Here, we present the M6ADD (m6A-diseases database) database, a public data resource containing manually curated data on potential m6A-disease associations for which some experimental evidence is available; the related high-throughput sequencing data are also provided and analysed by using different computational methods. To give researchers a tool to query the m6A modification data, the M6ADD was designed as a web-based comprehensive resource focusing on the collection, storage and online analysis of m6A modifications, aimed at exploring the associations between m6A modification and gene disorders and diseases. The M6ADD includes 222 experimentally confirmed m6A-disease associations, involving 59 diseases from a review of more than 2000 published papers. The M6ADD also includes 409,229 m6A-disease associations obtained by computational and statistical methods from 30 high-throughput sequencing datasets. In addition, we provide data on 5239 potential m6A regulatory proteins related to 24 cancers based on network analysis prediction methods. In addition, we have developed a tool to explore the function of m6A-modified genes through the protein-protein interaction networks. The M6ADD can be accessed at http://m6add.edbc.org/.",m6A-diseases database,M6ADD,http://m6add.edbc.org/,a comprehensive database of m6A modifications in diseases +33929905,Risk-Based Chemical Ranking and Generating a Prioritized Human Exposome Database.,"

Background

Due to the ubiquitous use of chemicals in modern society, humans are increasingly exposed to thousands of chemicals that contribute to a major portion of the human exposome. Should a comprehensive and risk-based human exposome database be created, it would be conducive to the rapid progress of human exposomics research. In addition, once a xenobiotic is biotransformed with distinct half-lives upon exposure, monitoring the parent compounds alone may not reflect the actual human exposure. To address these questions, a comprehensive and risk-prioritized human exposome database is needed.

Objectives

Our objective was to set up a comprehensive risk-prioritized human exposome database including physicochemical properties as well as risk prediction and develop a graphical user interface (GUI) that has the ability to conduct searches for content associated with chemicals in our database.

Methods

We built a comprehensive risk-prioritized human exposome database by text mining and database fusion. Subsequently, chemicals were prioritized by integrating exposure level obtained from the Systematic Empirical Evaluation of Models with toxicity data predicted by the Toxicity Estimation Software Tool and the Toxicological Priority Index calculated from the ToxCast database. The biotransformation half-lives (HLBs) of all the chemicals were assessed using the Iterative Fragment Selection approach and biotransformation products were predicted using the previously developed BioTransformer machine-learning method.

Results

We compiled a human exposome database of >20,000 chemicals, prioritized 13,441 chemicals based on probabilistic hazard quotient and 7,770 chemicals based on risk index, and provided a predicted biotransformation metabolite database of >95,000 metabolites. In addition, a user-interactive Java software (Oracle)-based search GUI was generated to enable open access to this new resource.

Discussion

Our database can be used to guide chemical management and enhance scientific understanding to rapidly and effectively prioritize chemicals for comprehensive biomonitoring in epidemiological investigations. https://doi.org/10.1289/EHP7722.",,,https://doi.org/10.1289/EHP7722,a comprehensive risk-prioritized human exposome database +33942874,Human IRES Atlas: an integrative platform for studying IRES-driven translational regulation in humans.,"It is now known that cap-independent translation initiation facilitated by internal ribosome entry sites (IRESs) is vital in selective cellular protein synthesis under stress and different physiological conditions. However, three problems make it hard to understand transcriptome-wide cellular IRES-mediated translation initiation mechanisms: (i) complex interplay between IRESs and other translation initiation-related information, (ii) reliability issue of in silico cellular IRES investigation and (iii) labor-intensive in vivo IRES identification. In this research, we constructed the Human IRES Atlas database for a comprehensive understanding of cellular IRESs in humans. First, currently available and suitable IRES prediction tools (IRESfinder, PatSearch and IRESpy) were used to obtain transcriptome-wide human IRESs. Then, we collected eight genres of translation initiation-related features to help study the potential molecular mechanisms of each of the putative IRESs. Three functional tests (conservation, structural RNA-protein scores and conditional translation efficiency) were devised to evaluate the functionality of the identified putative IRESs. Moreover, an easy-to-use interface and an IRES-translation initiation interaction map for each gene transcript were implemented to help understand the interactions between IRESs and translation initiation-related features. Researchers can easily search/browse an IRES of interest using the web interface and deduce testable mechanism hypotheses of human IRES-driven translation initiation based on the integrated results. In summary, Human IRES Atlas integrates putative IRES elements and translation initiation-related experiments for better usage of these data and deduction of mechanism hypotheses. Database URL: http://cobishss0.im.nuk.edu.tw/Human_IRES_Atlas/.",Human IRES Atlas,,http://cobishss0.im.nuk.edu.tw/Human_IRES_Atlas/,an integrative platform for studying IRES-driven translational regulation in humans +33950201,Trips-Viz: an environment for the analysis of public and user-generated ribosome profiling data.,"Trips-Viz (https://trips.ucc.ie/) is an interactive platform for the analysis and visualization of ribosome profiling (Ribo-Seq) and shotgun RNA sequencing (RNA-seq) data. This includes publicly available and user generated data, hence Trips-Viz can be classified as a database and as a server. As a database it provides access to many processed Ribo-Seq and RNA-seq data aligned to reference transcriptomes which has been expanded considerably since its inception. Here, we focus on the server functionality of Trips-viz which also has been greatly improved. Trips-viz now enables visualisation of proteomics data from a large number of processed mass spectrometry datasets. It can be used to support translation inferred from Ribo-Seq data. Users are now able to upload a custom reference transcriptome as well as data types other than Ribo-Seq/RNA-Seq. Incorporating custom data has been streamlined with RiboGalaxy (https://ribogalaxy.ucc.ie/) integration. The other new functionality is the rapid detection of translated open reading frames (ORFs) through a simple easy to use interface. The analysis of differential expression has been also improved via integration of DESeq2 and Anota2seq in addition to a number of other improvements of existing Trips-viz features.",Trips-Viz,Trips-Viz,https://trips.ucc.ie/,an environment for the analysis of public and user-generated ribosome profiling data +33952332,SANCDB: an update on South African natural compounds and their readily available analogs.,"

Background

South African Natural Compounds Database (SANCDB; https://sancdb.rubi.ru.ac.za/ ) is the sole and a fully referenced database of natural chemical compounds of South African biodiversity. It is freely available, and since its inception in 2015, the database has become an important resource to several studies. Its content has been: used as training data for machine learning models; incorporated to larger databases; and utilized in drug discovery studies for hit identifications.

Description

Here, we report the updated version of SANCDB. The new version includes 412 additional compounds that have been reported since 2015, giving a total of 1012 compounds in the database. Further, although natural products (NPs) are an important source of unique scaffolds, they have a major drawback due to their complex structure resulting in low synthetic feasibility in the laboratory. With this in mind, SANCDB is, now, updated to provide direct links to commercially available analogs from two major chemical databases namely Mcule and MolPort. To our knowledge, this feature is not available in other NP databases. Additionally, for easier access to information by users, the database and website interface were updated. The compounds are now downloadable in many different chemical formats.

Conclusions

The drug discovery process relies heavily on NPs due to their unique chemical organization. This has inspired the establishment of numerous NP chemical databases. With the emergence of newer chemoinformatic technologies, existing chemical databases require constant updates to facilitate information accessibility and integration by users. Besides increasing the NPs compound content, the updated SANCDB allows users to access the individual compounds (if available) or their analogs from commercial databases seamlessly.",South African Natural Compounds Database,SANCDB,https://sancdb.rubi.ru.ac.za,a fully referenced database of natural chemical compounds of South African biodiversity +33970229,ChemHub: a knowledgebase of functional chemicals for synthetic biology studies.,"

Summary

The field of synthetic biology lacks a comprehensive knowledgebase for selecting synthetic target molecules according to their functions, economic applications, and known biosynthetic pathways. We implemented ChemHub, a knowledgebase containing >90,000 chemicals and their functions, along with related biosynthesis information for these chemicals that was manually extracted from >600,000 published studies by more than 100 people over the past 10‚Äâyears.

Availability and implementation

Multiple algorithms were implemented to enable biosynthetic pathway design and precursor discovery, which can support investigation of the biosynthetic potential of these functional chemicals. ChemHub is freely available at: http://www.rxnfinder.org/chemhub/.

Supplementary information

Supplementary data are available at Bioinformatics online.",ChemHub,ChemHub,http://www.rxnfinder.org/chemhub/,a knowledgebase of functional chemicals for synthetic biology studies +33973408,"hu.MAP 2.0: integration of over 15,000 proteomic experiments builds a global compendium of human multiprotein assemblies.","A general principle of biology is the self-assembly of proteins into functional complexes. Characterizing their composition is, therefore, required for our understanding of cellular functions. Unfortunately, we lack knowledge of the comprehensive set of identities of protein complexes in human cells. To address this gap, we developed a machine learning framework to identify protein complexes in over 15,000 mass spectrometry experiments which resulted in the identification of nearly 7,000 physical assemblies. We show our resource, hu.MAP 2.0, is more accurate and comprehensive than previous state of the art high-throughput protein complex resources and gives rise to many new hypotheses, including for 274 completely uncharacterized proteins. Further, we identify 253 promiscuous proteins that participate in multiple complexes pointing to possible moonlighting roles. We have made hu.MAP 2.0 easily searchable in a web interface (http://humap2.proteincomplexes.org/), which will be a valuable resource for researchers across a broad range of interests including systems biology, structural biology, and molecular explanations of disease.",hu.MAP 2.0,hu.MAP 2.0,http://humap2.proteincomplexes.org/, +33984507,HisPhosSite: A comprehensive database of histidine phosphorylated proteins and sites.,"Histidine phosphorylation is critically important in a variety of cellular processes including signal transduction, cell cycle, proliferation, differentiation, and apoptosis. It is estimated to account for 6% of all phosphorylated amino acids. However, due to the acid lability of the PN bond, the study of pHis lags far behind that of pSer, pThr, and pTyr. Recently, the development and use of pHis-specific antibodies and methodologies have led to a resurgence in the study of histidine phosphorylation. Although a considerable number of pHis proteins and sites have been discovered, most of them have not been manually curated and integrated to any databases. There is a lack of a data repository for pHis, and such work is expected to help further systemic studies of pHis. Thus, we present a comprehensive resource database of histidine phosphorylation (HisPhosSite) by curating experimentally validated pHis proteins and sites and compiling putative pHis sites with ortholog search. HisPhosSite contains 776 verified pHis sites and 2702 verified pHis proteins in 38 eukaryotic and prokaryotic species and 15,378 putative pHis sites and 10,816 putative pHis proteins in 1366 species. HisPhosSite provides rich annotations of pHis sites and proteins and multiple search engines (including motif search and BLAST search) for users to locate pHis sites of interest. HisPhosSite is available at http://reprod.njmu.edu.cn/hisphossite. SIGNIFICANCE: Histidine phosphorylation is involved in a variety of cellular processes as well as cancers, and it has been proved to be more common than previously thought. The HisPhosSite database was developed to collect pHis data from published literatures with experimental evidences. Unification of the identified pHis proteins and sites will give researchers an informative resource for histidine phosphorylation. HisPhosSite has a user-friendly interface with multiple search engines for users to locate pHis sites of interest. In addition, the database provides rich structural and functional annotations. HisPhosSite will help future studies and elucidation of the functions of histidine phosphorylation.",HisPhosSite,HisPhosSite,http://reprod.njmu.edu.cn/hisphossite,A comprehensive database of histidine phosphorylated proteins and sites +33985427,TarDB: an online database for plant miRNA targets and miRNA-triggered phased siRNAs.,"

Background

In plants, microRNAs (miRNAs) are pivotal regulators of plant development and stress responses. Different computational tools and web servers have been developed for plant miRNA target prediction; however, in silico prediction normally contains false positive results. In addition, many plant miRNA target prediction servers lack information for miRNA-triggered phased small interfering RNAs (phasiRNAs). Creating a comprehensive and relatively high-confidence plant miRNA target database is much needed.

Results

Here, we report TarDB, an online database that collects three categories of relatively high-confidence plant miRNA targets: (i) cross-species conserved miRNA targets; (ii) degradome/PARE (Parallel Analysis of RNA Ends) sequencing supported miRNA targets; (iii) miRNA-triggered phasiRNA loci. TarDB provides a user-friendly interface that enables users to easily search, browse and retrieve miRNA targets and miRNA initiated phasiRNAs in a broad variety of plants. TarDB has a comprehensive collection of reliable plant miRNA targets containing previously unreported miRNA targets and miRNA-triggered phasiRNAs even in the well-studied model species. Most of these novel miRNA targets are relevant to lineage-specific or species-specific miRNAs. TarDB data is freely available at http://www.biosequencing.cn/TarDB .

Conclusions

In summary, TarDB serves as a useful web resource for exploring relatively high-confidence miRNA targets and miRNA-triggered phasiRNAs in plants.",TarDB,TarDB,http://www.biosequencing.cn/TarDB,online database for plant miRNA targets and miRNA-triggered phased siRNAs +33985559,TGIF-DB: terse genomics interface for developing botany.,"

Objectives

Pearl millet (Pennisetum glaucum) is a staple cereal crop for semi-arid regions. Its whole genome sequence and deduced putative gene sequences are available. However, the functions of many pearl millet genes are unknown. Situations are similar for other crop species such as garden asparagus (Asparagus officinalis), chickpea (Cicer arietinum) and Tartary buckwheat (Fagopyrum tataricum). The objective of the data presented here was to improve functional annotations of genes of pearl millet, garden asparagus, chickpea and Tartary buckwheat with gene annotations of model plants, to systematically provide such annotations as well as their sequences on a website, and thereby to promote genomics for those crops.

Data description

Sequences of genomes and transcripts of pearl millet, garden asparagus, chickpea and Tartary buckwheat were downloaded from a public database. These transcripts were associated with functional annotations of their Arabidopsis thaliana and rice (Oryza sativa) counterparts identified by BLASTX. Conserved domains in protein sequences of those species were identified by the HMMER scan with the Pfam database. The resulting data was deposited in the figshare repository and can be browsed on the Terse Genomics Interface for Developing Botany (TGIF-DB) website ( http://webpark2116.sakura.ne.jp/rlgpr/ ).",Terse Genomics Interface for Developing Botany,TGIF-DB,http://webpark2116.sakura.ne.jp/rlgpr,terse genomics interface for developing botany +33993461,TUPDB: Target-Unrelated Peptide Data Bank.,"The isolation of target-unrelated peptides (TUPs) through biopanning remains as a major problem of phage display selection experiments. These TUPs do not have any actual affinity toward targets of interest, which tend to be mistakenly identified as target-binding peptides. Therefore, an information portal for storing TUP data is urgently needed. Here, we present a TUP data bank (TUPDB), which is a comprehensive, manually curated database of approximately 73 experimentally verified TUPs and 1963 potential TUPs collected from TUPScan, the BDB database, and public research articles. The TUPScan tool has been integrated in TUPDB to facilitate TUP analysis. We believe that TUPDB can help identify and remove TUPs in future reports in the biopanning community. The database is of great importance to improving the quality of phage display-based epitope mapping and promoting the development of vaccines, diagnostics, and therapeutics. The TUPDB database is available at http://i.uestc.edu.cn/tupdb .",TUP data bank,TUPDB,http://i.uestc.edu.cn/tupdb,"a comprehensive, manually curated database of approximately 73 experimentally verified TUPs and 1963 potential TUPs collected from TUPScan, the BDB database, and public research articles" +33994075,A Review of the Literature Organized Into a New Database: RHeference.,"Hundreds of articles containing heterogeneous data describe D variants or add to the knowledge of known alleles. Data can be difficult to find despite existing online blood group resources and genetic and literature databases. We have developed a modern, elaborate database for D variants, thanks to an extensive literature search with meticulous curation of 387 peer-reviewed articles and 80 abstracts from major conferences and other sources. RHeference contains entries for 710 RHD alleles, 11 RHCE alleles, 30 phenotype descriptions (preventing data loss from historical sources), 35 partly characterized alleles, 3 haplotypes, and 16 miscellaneous entries. The entries include molecular, phenotypic, serological, alloimmunization, haplotype, geographical, and other data, detailed for each source. The main characteristics are summarized for each entry. The sources for all information are included and easily accessible through doi and PMID links. Overall, the database contains more than 10,000 individual pieces of data. We have set up the database architecture based on our previous expertise on database setup and biocuration for other topics, using modern technologies such as the Django framework, BioPython, Bootstrap, and Jquery. This architecture allows an easy access to data and enables simple and complex queries: combining multiple mutations, keywords, or any of the characteristics included in the database. RHeference provides a complement to existing resources and will continue to grow as our knowledge expands and new articles are published. The database url is http://www.rheference.org/.",RHeference,RHeference,http://www.rheference.org/,"a modern, elaborate database for D variants" +33995920,ExVe: The knowledge base of orthologous proteins identified in fungal extracellular vesicles.,"Extracellular vesicles (EVs) are double-membrane particles associated with intercellular communication. Since the discovery of EV production in the fungus Cryptococcus neoformans, the importance of EV release in its physiology and pathogenicity has been investigated. To date, few studies have investigated the proteomic content of EVs from multiple fungal species. Our main objective was to use an orthology approach to compare proteins identified by EV shotgun proteomics in 8 pathogenic and 1 nonpathogenic species. Using protein information from the UniProt and FungiDB databases, we integrated data for 11,433 hits in fungal EVs with an orthology perspective, resulting in 3,834 different orthologous groups. OG6_100083 (Hsp70 Pfam domain) was the unique orthologous group that was identified for all fungal species. Proteins with this protein domain are associated with the stress response, survival and morphological changes in different fungal species. Although no pathogenic orthologous group was found, we identified 5 orthologous groups exclusive to S. cerevisiae. Using the criteria of at least 7 pathogenic fungi to define a cluster, we detected the 4 unique pathogenic orthologous groups. Taken together, our data suggest that Hsp70-related proteins might play a key role in fungal EVs, regardless of the pathogenic status. Using an orthology approach, we identified at least 4 protein domains that could be novel therapeutic targets against pathogenic fungi. Our results were compiled in the herein described ExVe database, which is publicly available at http://exve.icc.fiocruz.br.",ExVe,ExVe,http://exve.icc.fiocruz.br,The knowledge base of orthologous proteins identified in fungal extracellular vesicles +34010390,Integration of 1:1 orthology maps and updated datasets into Echinobase.,"Echinobase (https://echinobase.org) is a central online platform that generates, manages and hosts genomic data relevant to echinoderm research. While the resource primarily serves the echinoderm research community, the recent release of an excellent quality genome for the frequently studied purple sea urchin (Strongylocentrotus purpuratus genome, v5.0) has provided an opportunity to adapt to the needs of a broader research community across other model systems. To this end, establishing pipelines to identify orthologous genes between echinoderms and other species has become a priority in many contexts including nomenclature, linking to data in other model organisms, and in internal functionality where data gathered in one hosted species can be associated with genes in other hosted echinoderms. This paper describes the orthology pipelines currently employed by Echinobase and how orthology data are processed to yield 1:1 ortholog mappings between a variety of echinoderms and other model taxa. We also describe functions of interest that have recently been included on the resource, including an updated developmental time course for S.purpuratus, and additional tracks for genome browsing. These data enhancements will increase the accessibility of the resource to non-echinoderm researchers and simultaneously expand the data quality and quantity available to core Echinobase users. Database URL: https://echinobase.org.",Echinobase,Echinobase,https://echinobase.org,"a central online platform that generates, manages and hosts genomic data relevant to echinoderm research" +34012763,Benchmarking mass spectrometry based proteomics algorithms using a simulated database.,"Protein sequencing algorithms process data from a variety of instruments that has been generated under diverse experimental conditions. Currently there is no way to predict the accuracy of an algorithm for a given data set. Most of the published algorithms and associated software has been evaluated on limited number of experimental data sets. However, these performance evaluations do not cover the complete search space the algorithmand the software might encounter in real-world. To this end, we present a database of simulated spectra that can be used to benchmark any spectra to peptide search engine. We demonstrate the usability of this database by bench marking two popular peptide sequencing engines. We show wide variation in the accuracy of peptide deductions and a complete quality profile of a given algorithm can be useful for practitioners and algorithm developers. All benchmarking data is available at https://users.cs.fiu.edu/~fsaeed/Benchmark.html.",,,https://users.cs.fiu.edu,a database of simulated spectra that can be used to benchmark any spectra to peptide search engine +34016708,Analyzing the vast coronavirus literature with CoronaCentral.,"The SARS-CoV-2 pandemic has caused a surge in research exploring all aspects of the virus and its effects on human health. The overwhelming publication rate means that researchers are unable to keep abreast of the literature. To ameliorate this, we present the CoronaCentral resource that uses machine learning to process the research literature on SARS-CoV-2 together with SARS-CoV and MERS-CoV. We categorize the literature into useful topics and article types and enable analysis of the contents, pace, and emphasis of research during the crisis with integration of Altmetric data. These topics include therapeutics, disease forecasting, as well as growing areas such as ""long COVID"" and studies of inequality. This resource, available at https://coronacentral.ai, is updated daily.",CoronaCentral,CoronaCentral,https://coronacentral.ai, +34017945,Identity and compatibility of reference genome resources.,"Genome analysis relies on reference data like sequences, feature annotations, and aligner indexes. These data can be found in many versions from many sources, making it challenging to identify and assess compatibility among them. For example, how can you determine which indexes are derived from identical raw sequence files, or which annotations share a compatible coordinate system? Here, we describe a novel approach to establish identity and compatibility of reference genome resources. We approach this with three advances: first, we derive unique identifiers for each resource; second, we record parent-child relationships among resources; and third, we describe recursive identifiers that determine identity as well as compatibility of coordinate systems and sequence names. These advances facilitate portability, reproducibility, and re-use of genome reference data. Available athttps://refgenie.databio.org.",,,https://refgenie.databio.org, +34020544,Ori-Finder 3: a web server for genome-wide prediction of replication origins in Saccharomyces cerevisiae.,"DNA replication is a fundamental process in all organisms; this event initiates at sites termed origins of replication. The characteristics of eukaryotic replication origins are best understood in Saccharomyces cerevisiae. For this species, origin prediction algorithms or web servers have been developed based on the sequence features of autonomously replicating sequences (ARSs). However, their performances are far from satisfactory. By utilizing the Z-curve methodology, we present a novel pipeline, Ori-Finder 3, for the computational prediction of replication origins in S. cerevisiae at the genome-wide level based solely on DNA sequences. The ARS exhibiting both an AT-rich stretch and ARS consensus sequence element can be predicted at the single-nucleotide level. For the identified ARSs in the S. cerevisiae reference genome, 83 and 60% of the top 100 and top 300 predictions matched the known ARS records, respectively. Based on Ori-Finder 3, we subsequently built a database of the predicted ARSs identified in more than a hundred S. cerevisiae genomes. Consequently, we developed a user-friendly web server including the ARS prediction pipeline and the predicted ARSs database, which can be freely accessed at http://tubic.tju.edu.cn/Ori-Finder3.",,,http://tubic.tju.edu.cn/Ori-Finder3,a database of the predicted ARSs identified in more than a hundred S. cerevisiae genomes +34022814,The Rhododendron Plant Genome Database (RPGD): a comprehensive online omics database for Rhododendron.,"

Background

The genus Rhododendron L. has been widely cultivated for hundreds of years around the world. Members of this genus are known for great ornamental and medicinal value. Owing to advances in sequencing technology, genomes and transcriptomes of members of the Rhododendron genus have been sequenced and published by various laboratories. With increasing amounts of omics data available, a centralized platform is necessary for effective storage, analysis, and integration of these large-scale datasets to ensure consistency, independence, and maintainability.

Results

Here, we report our development of the Rhododendron Plant Genome Database (RPGD; http://bioinfor.kib.ac.cn/RPGD/ ), which represents the first comprehensive database of Rhododendron genomics information. It includes large amounts of omics data, including genome sequence assemblies for R. delavayi, R. williamsianum, and R. simsii, gene expression profiles derived from public RNA-Seq data, functional annotations, gene families, transcription factor identification, gene homology, simple sequence repeats, and chloroplast genome. Additionally, many useful tools, including BLAST, JBrowse, Orthologous Groups, Genome Synteny Browser, Flanking Sequence Finder, Expression Heatmap, and Batch Download were integrated into the platform.

Conclusions

RPGD is designed to be a comprehensive and helpful platform for all Rhododendron researchers. Believe that RPGD will be an indispensable hub for Rhododendron studies.",Rhododendron Plant Genome Database,RPGD,http://bioinfor.kib.ac.cn/RPGD,a comprehensive online omics database for Rhododendron +34023905,"AnnotSV and knotAnnotSV: a web server for human structural variations annotations, ranking and analysis.","With the dramatic increase of pangenomic analysis, Human geneticists have generated large amount of genomic data including millions of small variants (SNV/indel) but also thousands of structural variations (SV) mainly from next-generation sequencing and array-based techniques. While the identification of the complete SV repertoire of a patient is getting possible, the interpretation of each SV remains challenging. To help identifying human pathogenic SV, we have developed a web server dedicated to their annotation and ranking (AnnotSV) as well as their visualization and interpretation (knotAnnotSV) freely available at the following address: https://www.lbgi.fr/AnnotSV/. A large amount of annotations from >20 sources is integrated in our web server including among others genes, haploinsufficiency, triplosensitivity, regulatory elements, known pathogenic or benign genomic regions, phenotypic data. An ACMG/ClinGen compliant prioritization module allows the scoring and the ranking of SV into 5 SV classes from pathogenic to benign. Finally, the visualization interface displays the annotated SV in an interactive way including popups, search fields, filtering options, advanced colouring to highlight pathogenic SV and hyperlinks to the UCSC genome browser or other public databases. This web server is designed for diagnostic and research analysis by providing important resources to the user.",AnnotSV,AnnotSV,https://www.lbgi.fr/AnnotSV/,"a web server for human structural variations annotations, ranking and analysis" +34023905,"AnnotSV and knotAnnotSV: a web server for human structural variations annotations, ranking and analysis.","With the dramatic increase of pangenomic analysis, Human geneticists have generated large amount of genomic data including millions of small variants (SNV/indel) but also thousands of structural variations (SV) mainly from next-generation sequencing and array-based techniques. While the identification of the complete SV repertoire of a patient is getting possible, the interpretation of each SV remains challenging. To help identifying human pathogenic SV, we have developed a web server dedicated to their annotation and ranking (AnnotSV) as well as their visualization and interpretation (knotAnnotSV) freely available at the following address: https://www.lbgi.fr/AnnotSV/. A large amount of annotations from >20 sources is integrated in our web server including among others genes, haploinsufficiency, triplosensitivity, regulatory elements, known pathogenic or benign genomic regions, phenotypic data. An ACMG/ClinGen compliant prioritization module allows the scoring and the ranking of SV into 5 SV classes from pathogenic to benign. Finally, the visualization interface displays the annotated SV in an interactive way including popups, search fields, filtering options, advanced colouring to highlight pathogenic SV and hyperlinks to the UCSC genome browser or other public databases. This web server is designed for diagnostic and research analysis by providing important resources to the user.",knotAnnotSV,knotAnnotSV,https://www.lbgi.fr/AnnotSV/,"a web server for human structural variations annotations, ranking and analysis" +34025933,AddictGene: An integrated knowledge base for differentially expressed genes associated with addictive substance.,"Addiction, a disorder of maladaptive brain plasticity, is associated with changes in numerous gene expressions. Nowadays, high-throughput sequencing data on addictive substance-induced gene expression have become widely available. A resource for comprehensive annotation of genes that show differential expression in response to commonly abused substances is necessary. So, we developed AddictGene by integrating gene expression, gene-gene interaction, gene-drug interaction and epigenetic regulatory annotation for over 70,156 items of differentially expressed genes associated with 7 commonly abused substances, including alcohol, nicotine, cocaine, morphine, heroin, methamphetamine, and amphetamine, across three species (human, mouse, rat). We also collected 1,141 addiction-related experimentally validated genes by techniques such as RT-PCR, northern blot and in situ hybridization. The easy-to-use web interface of AddictGene (http://159.226.67.237/sun/addictgedb/) allows users to search and browse multidimensional data on DEGs of their interest: 1) detailed gene-specific information extracted from the original studies; 2) basic information about the specific gene extracted from NCBI; 3) SNP associated with substance dependence and other psychiatry disorders; 4) expression alteration of specific gene in other psychiatric disorders; 5) expression patterns of interested gene across 31 primary and 54 secondary human tissues; 6) functional annotation of interested gene; 7) epigenetic regulators involved in the alteration of specific genes, including histone modifications and DNA methylation; 8) protein-protein interaction for functional linkage with interested gene; 9) drug-gene interaction for potential druggability. AddictGene offers a valuable repository for researchers to study the molecular mechanisms underlying addiction, and might provide valuable insights into potential therapies for drug abuse and relapse.",AddictGene,AddictGene,http://159.226.67.237/sun/addictgedb/,An integrated knowledge base for differentially expressed genes associated with addictive substance +34025934,Computational modeling and bioinformatic analyses of functional mutations in drug target genes in Mycobacterium tuberculosis.,"Tuberculosis (TB) continues to be the leading cause of deaths due to its persistent drug resistance and the consequent ineffectiveness of anti-TB treatment. Recent years witnessed huge amount of sequencing data, revealing mutations responsible for drug resistance. However, the lack of an up-to-date repository remains a barrier towards utilization of these data and identifying major mutations-associated with resistance. Amongst all mutations, non-synonymous mutations alter the amino acid sequence of a protein and have a much greater effect on pathogenicity. Hence, this type of gene mutation is of prime interest of the present study. The purpose of this study is to develop an updated database comprising almost all reported substitutions within the Mycobacterium tuberculosis (M.tb) drug target genes rpoB, inhA, katG, pncA, gyrA and gyrB. Various bioinformatics prediction tools were used to assess the structural and biophysical impacts of the resistance causing non-synonymous single nucleotide polymorphisms (nsSNPs) at the molecular level. This was followed by evaluating the impact of these mutations on binding affinity of the drugs to target proteins. We have developed a comprehensive online resource named MycoTRAP-DB (Mycobacterium tuberculosis Resistance Associated Polymorphisms Database) that connects mutations in genes with their structural, functional and pathogenic implications on protein. This database is accessible at http://139.59.12.92. This integrated platform would enable comprehensive analysis and prioritization of SNPs for the development of improved diagnostics and antimycobacterial medications. Moreover, our study puts forward secondary mutations that can be important for prognostic assessments of drug-resistance mechanism and actionable anti-TB drugs.",Mycobacterium tuberculosis Resistance Associated Polymorphisms Database,MycoTRAP-DB,http://139.59.12.92, +34029142,Landscape of GPCR expression along the mouse nephron.,"Kidney transport and other renal functions are regulated by multiple G protein-coupled receptors (GPCRs) expressed along the renal tubule. The rapid, recent appearance of comprehensive unbiased gene expression data in the various renal tubule segments, chiefly RNA sequencing and protein mass spectrometry data, has provided a means of identifying patterns of GPCR expression along the renal tubule. To allow for comprehensive mapping, we first curated a comprehensive list of GPCRs in the genomes of mice, rats, and humans (https://hpcwebapps.cit.nih.gov/ESBL/Database/GPCRs/) using multiple online data sources. We used this list to mine segment-specific and cell type-specific expression data from RNA-sequencing studies in microdissected mouse tubule segments to identify GPCRs that are selectively expressed in discrete tubule segments. Comparisons of these mapped mouse GPCRs with other omics datasets as well as functional data from isolated perfused tubule and micropuncture studies confirmed patterns of expression for well-known receptors and identified poorly studied GPCRs that are likely to play roles in the regulation of renal tubule function. Thus, we provide data resources for GPCR expression across the renal tubule, highlighting both well-known GPCRs and understudied receptors to provide guidance for future studies.",,,https://hpcwebapps.cit.nih.gov/ESBL/Database/GPCRs/,"a comprehensive list of GPCRs in the genomes of mice, rats, and humans" +34032471,The Human Salivary Proteome Wiki: A Community-Driven Research Platform.,"Saliva has become an attractive body fluid for on-site, remote, and real-time monitoring of oral and systemic health. At the same time, the scientific community needs a saliva-centered information platform that keeps pace with the rapid accumulation of new data and knowledge by annotating, refining, and updating the salivary proteome catalog. We developed the Human Salivary Proteome (HSP) Wiki as a public data platform for researching and retrieving custom-curated data and knowledge on the saliva proteome. The HSP Wiki is dynamically compiled and updated based on published saliva proteome studies and up-to-date protein reference records. It integrates a wide range of available information by funneling in data from established external protein, genome, transcriptome, and glycome databases. In addition, the HSP Wiki incorporates data from human disease-related studies. Users can explore the proteome of saliva simply by browsing the database, querying the available data, performing comparisons of data sets, and annotating existing protein entries using a simple, intuitive interface. The annotation process includes both user feedback and curator committee review to ensure the quality and validity of each entry. Here, we present the first overview of features and functions the HSP Wiki offers. As a saliva proteome-centric, publicly accessible database, the HSP Wiki will advance the knowledge of saliva composition and function in health and disease for users across a wide range of disciplines. As a community-based data- and knowledgebase, the HSP Wiki will serve as a worldwide platform to exchange salivary proteome information, inspire novel research ideas, and foster cross-discipline collaborations. The HSP Wiki will pave the way for harnessing the full potential of the salivary proteome for diagnosis, risk prediction, therapy of oral and systemic diseases, and preparedness for emerging infectious diseases.Database URL: https://salivaryproteome.nidcr.nih.gov/.",Human Salivary Proteome Wiki,HSP Wiki,https://salivaryproteome.nidcr.nih.gov/,a public data platform for researching and retrieving custom-curated data and knowledge on the saliva proteome +34034817,PhenCards: a data resource linking human phenotype information to biomedical knowledge.,"We present PhenCards ( https://phencards.org ), a database and web server intended as a one-stop shop for previously disconnected biomedical knowledge related to human clinical phenotypes. Users can query human phenotype terms or clinical notes. PhenCards obtains relevant disease/phenotype prevalence and co-occurrence, drug, procedural, pathway, literature, grant, and collaborator data. PhenCards recommends the most probable genetic diseases and candidate genes based on phenotype terms from clinical notes. PhenCards facilitates exploration of phenotype, e.g., which drugs cause or are prescribed for patient symptoms, which genes likely cause specific symptoms, and which comorbidities co-occur with phenotypes.",PhenCards,PhenCards,https://phencards.org,a data resource linking human phenotype information to biomedical knowledge +34042771,Introducing a Platform for Integrating and Sharing Stem Cell Research Data.,"Advancements in regenerative medicine have highlighted the need for increased standardization and sharing of stem cell products to help drive these innovative interventions toward public availability and to increase collaboration in the scientific community. Although numerous attempts and numerous databases have been made to store this data, there is still a lack of a platform that incorporates heterogeneous stem cell information into a harmonized project-based framework. The aim of the platform described in this study, ReMeDy, is to provide an intelligent informatics solution which integrates diverse stem cell product characteristics with study subject and omics information. In the resulting platform, heterogeneous data is validated using predefined ontologies and stored in a relational database. In this initial feasibility study, testing of the ReMeDy functionality was performed using published, publically-available induced pluripotent stem cell projects conducted in in vitro, preclinical and intervention evaluations. It demonstrated the robustness of ReMeDy for storing diverse iPSC data, by seamlessly harmonizing diverse common data elements, and the potential utility of this platform for driving knowledge generation from the aggregation of this shared data. Next steps include increasing the number of curated projects by developing a crowdsourcing framework for data upload and an automated pipeline for metadata abstraction. The database is publically accessible at https://remedy.mssm.edu/.",ReMeDy,ReMeDy,https://remedy.mssm.edu/,a Platform for Integrating and Sharing Stem Cell Research Data +34046592,A map of the SARS-CoV-2 RNA structurome.,"SARS-CoV-2 has exploded throughout the human population. To facilitate efforts to gain insights into SARS-CoV-2 biology and to target the virus therapeutically, it is essential to have a roadmap of likely functional regions embedded in its RNA genome. In this report, we used a bioinformatics approach, ScanFold, to deduce the local RNA structural landscape of the SARS-CoV-2 genome with the highest likelihood of being functional. We recapitulate previously-known elements of RNA structure and provide a model for the folding of an essential frameshift signal. Our results find that SARS-CoV-2 is greatly enriched in unusually stable and likely evolutionarily ordered RNA structure, which provides a large reservoir of potential drug targets for RNA-binding small molecules. Results are enhanced via the re-analyses of publicly-available genome-wide biochemical structure probing datasets that are broadly in agreement with our models. Additionally, ScanFold was updated to incorporate experimental data as constraints in the analysis to facilitate comparisons between ScanFold and other RNA modelling approaches. Ultimately, ScanFold was able to identify eight highly structured/conserved motifs in SARS-CoV-2 that agree with experimental data, without explicitly using these data. All results are made available via a public database (the RNAStructuromeDB: https://structurome.bb.iastate.edu/sars-cov-2) and model comparisons are readily viewable at https://structurome.bb.iastate.edu/sars-cov-2-global-model-comparisons.",RNAStructuromeDB,RNAStructuromeDB,https://structurome.bb.iastate.edu/sars-cov-2, +34048545,"dbGENVOC: database of GENomic Variants of Oral Cancer, with special reference to India.","Oral cancer is highly prevalent in India and is the most frequent cancer type among Indian males. It is also very common in southeast Asia. India has participated in the International Cancer Genome Consortium (ICGC) and some national initiatives to generate large-scale genomic data on oral cancer patients and analyze to identify associations and systematically catalog the associated variants. We have now created an open, web-accessible database of these variants found significantly associated with Indian oral cancer patients, with a user-friendly interface to enable easy mining. We have value added to this database by including relevant data collated from various sources on other global populations, thereby providing opportunities of comparative geographical and/or ethnic analyses. Currently, no other database of similar nature is available on oral cancer. We have developed Database of GENomic Variants of Oral Cancer, a browsable online database framework for storage, retrieval and analysis of large-scale data on genomic variants and make it freely accessible to the scientific community. Presently, the web-accessible database allows potential users to mine data on ∼24 million clinically relevant somatic and germline variants derived from exomes (n = 100) and whole genomes (n = 5) of Indian oral cancer patients; all generated by us. Variant data from The Cancer Genome Atlas and data manually curated from peer-reviewed publications were also incorporated into the database for comparative analyses. It allows users to query the database by a single gene, multiple genes, multiple variant sites, genomic region, patient ID and pathway identities. Database URL: http://research.nibmg.ac.in/dbcares/dbgenvoc/.",Database of GENomic Variants of Oral Cancer,dbGENVOC,http://research.nibmg.ac.in/dbcares/dbgenvoc/,"a browsable online database framework for storage, retrieval and analysis of large-scale data on genomic variants and make it freely accessible to the scientific community" +34048547,emiRIT: a text-mining-based resource for microRNA information.,"microRNAs (miRNAs) are essential gene regulators, and their dysregulation often leads to diseases. Easy access to miRNA information is crucial for interpreting generated experimental data, connecting facts across publications and developing new hypotheses built on previous knowledge. Here, we present extracting miRNA Information from Text (emiRIT), a text-miningbased resource, which presents miRNA information mined from the literature through a user-friendly interface. We collected 149 ,233 miRNA -PubMed ID pairs from Medline between January 1997 and May 2020. emiRIT currently contains 'miRNA -gene regulation' (69 ,152 relations), 'miRNA disease (cancer)' (12 ,300 relations), 'miRNA -biological process and pathways' (23, 390 relations) and circulatory 'miRNAs in extracellular locations' (3782 relations). Biological entities and their relation to miRNAs were extracted from Medline abstracts using publicly available and in-house developed text-mining tools, and the entities were normalized to facilitate querying and integration. We built a database and an interface to store and access the integrated data, respectively. We provide an up-to-date and user-friendly resource to facilitate access to comprehensive miRNA information from the literature on a large scale, enabling users to navigate through different roles of miRNA and examine them in a context specific to their information needs. To assess our resource's information coverage, we have conducted two case studies focusing on the target and differential expression information of miRNAs in the context of cancer and a third case study to assess the usage of emiRIT in the curation of miRNA information. Database URL: https://research.bioinformatics.udel.edu/emirit/.",extracting miRNA Information from Text,emiRIT,https://research.bioinformatics.udel.edu/emirit/,a text-mining-based resource for microRNA information +34048576,The COVID-19 Data Portal: accelerating SARS-CoV-2 and COVID-19 research through rapid open access data sharing.,"The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic will be remembered as one of the defining events of the 21st century. The rapid global outbreak has had significant impacts on human society and is already responsible for millions of deaths. Understanding and tackling the impact of the virus has required a worldwide mobilisation and coordination of scientific research. The COVID-19 Data Portal (https://www.covid19dataportal.org/) was first released as part of the European COVID-19 Data Platform, on April 20th 2020 to facilitate rapid and open data sharing and analysis, to accelerate global SARS-CoV-2 and COVID-19 research. The COVID-19 Data Portal has fortnightly feature releases to continue to add new data types, search options, visualisations and improvements based on user feedback and research. The open datasets and intuitive suite of search, identification and download services, represent a truly FAIR (Findable, Accessible, Interoperable and Reusable) resource that enables researchers to easily identify and quickly obtain the key datasets needed for their COVID-19 research.",COVID-19 Data Portal,,https://www.covid19dataportal.org/, +34052284,miREV: An Online Database and Tool to Uncover Potential Reference RNAs and Biomarkers in Small-RNA Sequencing Data Sets from Extracellular Vesicles Enriched Samples.,"Extracellular vesicles (EVs) are nano-sized, membrane-enclosed vesicles released by cells for intercellular communication. EVs are involved in pathological processes and miRNAs in EVs have gained interest as easily accessible biomolecules in liquid biopsies for diagnostic purposes. To validate potential miRNA biomarker, transcriptome analyses must be carried out to detect suitable reference miRNAs. miREV is a database with over 400 miRNA sequencing data sets and helps the researcher to find suitable reference miRNAs for their individual experimental setup. The researcher can put together a specific sample set in miREV, which is similar to his own experimental concept in order to find the most suitable references. This allows to run validation experiments without having to carry out a complex and costly transcriptome analysis priorly. Additional read count tables of each generated sample set are downloadable for further analysis. miREV is freely available at https://www.physio.wzw.tum.de/mirev/.",miREV,miREV,https://www.physio.wzw.tum.de/mirev/,An Online Database and Tool to Uncover Potential Reference RNAs and Biomarkers in Small-RNA Sequencing Data Sets from Extracellular Vesicles Enriched Samples +34080131,"Preclinical Western Blot in the Era of Digital Transformation and Reproducible Research, an Eastern Perspective.","The current research is an interdisciplinary endeavor to develop a necessary tool in preclinical protein studies of diseases or disorders through western blotting. In the era of digital transformation and open access principles, an interactive cloud-based database called East-West Blot ( https://rancs-lab.shinyapps.io/WesternBlots ) is designed and developed. The online interactive subject-specific database built on the R shiny platform facilitates a systematic literature search on the specific subject matter, here set to western blot studies of protein regulation in the preclinical model of TBI. The tool summarizes the existing publicly available knowledge through a data visualization technique and easy access to the critical data elements and links to the study itself. The application compiled a relational database of PubMed-indexed western blot studies labeled under HHS public access, reporting downstream protein regulations presented by fluid percussion injury model of traumatic brain injury. The promises of the developed tool include progressing toward implementing the principles of 3Rs (replacement, reduction, and refinement) for humane experiments, cultivating the prerequisites of reproducible research in terms of reporting characteristics, paving the ways for a more collaborative experimental design in basic science, and rendering an up-to-date and summarized perspective of current publicly available knowledge.",East-West Blot,,https://rancs-lab.shinyapps.io/WesternBlots, +34081565,Biomedical Entity Explorer: A Web Server for Biomedical Entity Exploration.,"Biomedical Entity Explorer (BEE) is a web server that can search for biomedical entities from a database of six biomedical entity types (gene, miRNA, drug, disease, single nucleotide polymorphism [SNP], pathway) and their gene associations. The search results can be explored using intersections, unions, and negations. BEE has integrated biomedical entities from 16 databases (Ensemble, PharmGKB, Genetic Home Reference, Tarbase, Mirbase, NCI Thesaurus, DisGeNET, Linked life data, UMLS, GSEA MsigDB, Reactome, KEGG, Gene Ontology, HGVD, SNPedia, and dbSNP) based on their gene associations and built a database with their synonyms, descriptions, and links containing individual details. Users can enter the keyword of one or more entities and select the type of entity for which they want to know the relationship for and by using set operations such as union, negation, and intersection, they can navigate the search results more clearly. We believe that BEE will not only be useful for biologists querying for complex associations between entities, but can also be a good starting point for general users searching for biomedical entities. BEE is accessible at (http://bike-bee.snu.ac.kr).",Biomedical Entity Explorer,BEE,http://bike-bee.snu.ac.kr,A Web Server for Biomedical Entity Exploration +34085038,"EyeDiseases: an integrated resource for dedicating to genetic variants, gene expression and epigenetic factors of human eye diseases.","Eye diseases are remarkably common and encompass a large and diverse range of morbidities that affect different components of the visual system and visual function. With advances in omics technology of eye disorders, genome-scale datasets have been rapidly accumulated in genetics and epigenetics field. However, the efficient collection and comprehensive analysis of different kinds of omics data are lacking. Herein, we developed EyeDiseases (https://eyediseases.bio-data.cn/), the first database for multi-omics data integration and interpretation of human eyes diseases. It contains 1344 disease-associated genes with genetic variation, 1774 transcription files of bulk cell expression and single-cell RNA-seq, 105 epigenomics data across 185 kinds of human eye diseases. Using EyeDiseases, we investigated SARS-CoV-2 potential tropism in eye infection and found that the SARS-CoV-2 entry factors, ACE2 and TMPRSS2 are highly correlated with cornea and keratoconus, suggest that ocular surface cells are susceptible to infection by SARS-CoV-2. Additionally, integrating analysis of Age-related macular degeneration (AMD) GWAS loci and co-expression data revealed 9 associated genes involved in HIF-1 signaling pathway and voltage-gate potassium channel complex. The EyeDiseases provides a valuable resource for accelerating the discovery and validation of candidate loci and genes contributed to the molecular diagnosis and therapeutic vulnerabilities with various eyes diseases.",EyeDiseases,EyeDiseases,https://eyediseases.bio-data.cn/,"an integrated resource for dedicating to genetic variants, gene expression and epigenetic factors of human eye diseases" +34104972,eSkip-Finder: a machine learning-based web application and database to identify the optimal sequences of antisense oligonucleotides for exon skipping.,"Exon skipping using antisense oligonucleotides (ASOs) has recently proven to be a powerful tool for mRNA splicing modulation. Several exon-skipping ASOs have been approved to treat genetic diseases worldwide. However, a significant challenge is the difficulty in selecting an optimal sequence for exon skipping. The efficacy of ASOs is often unpredictable, because of the numerous factors involved in exon skipping. To address this gap, we have developed a computational method using machine-learning algorithms that factors in many parameters as well as experimental data to design highly effective ASOs for exon skipping. eSkip-Finder (https://eskip-finder.org) is the first web-based resource for helping researchers identify effective exon skipping ASOs. eSkip-Finder features two sections: (i) a predictor of the exon skipping efficacy of novel ASOs and (ii) a database of exon skipping ASOs. The predictor facilitates rapid analysis of a given set of exon/intron sequences and ASO lengths to identify effective ASOs for exon skipping based on a machine learning model trained by experimental data. We confirmed that predictions correlated well with in vitro skipping efficacy of sequences that were not included in the training data. The database enables users to search for ASOs using queries such as gene name, species, and exon number.",eSkip-Finder,eSkip-Finder,https://eskip-finder.org,a machine learning-based web application and database to identify the optimal sequences of antisense oligonucleotides for exon skipping +34107869,PINIR: a comprehensive information resource for Pin-II type protease inhibitors.,"

Background

Serine protease inhibitors belonging to the Potato type-II Inhibitor family Protease Inhibitors (Pin-II type PIs) are essential plant defense molecules. They are characterized by multiple inhibitory repeat domains, conserved disulfide bond pattern, and a tripeptide reactive center loop. These features of Pin-II type PIs make them potential molecules for protein engineering and designing inhibitors for agricultural and therapeutic applications. However, the diversity in these PIs remains unexplored due to the lack of annotated protein sequences and their functional attributes in the available databases.

Results

We have developed a database, PINIR (Pin-II type PIs Information Resource), by systematic collection and manual annotation of 415 Pin-II type PI protein sequences. For each PI, the number and position for signature sequences are specified: 695 domains, 75 linkers, 63 reactive center loops, and 10 disulfide bond patterns are identified and mapped. Database analysis revealed novel subcategories of PIs, species-correlated occurrence of inhibitory domains, reactive center loops, and disulfide bond patterns. By analyzing linker regions, we predict that alternative processing at linker regions could generate PI variants in the Solanaceae family.

Conclusion

PINIR ( https://pinir.ncl.res.in ) provides a web interface for browsing and analyzing the protein sequences of Pin-II type PIs. Information about signature sequences, spatio-temporal expression, biochemical properties, gene sequences, and literature references are provided. Analysis of PINIR depicts conserved species-specific features of Pin-II type PI protein sequences. Diversity in the sequence of inhibitory domains and reactive loops directs potential applications to engineer Pin-II type PIs. The PINIR database will serve as a comprehensive information resource for further research into Pin-II type PIs.",Pin-II type PIs Information Resource,PINIR,https://pinir.ncl.res.in,a comprehensive information resource for Pin-II type protease inhibitors +34113986,Tfcancer: a manually curated database of transcription factors associated with human cancers.,"

Summary

Transcription factors (TFs) are critical regulation elements and its dysregulation can lead to a variety of cancers. However, currently, there are no such online resources for large-scale collection, storage and analysis of TF-cancer associations in those cancers. To fill this gap, we present a database called TFcancer (http://lcbb.swjtu.edu.cn/tfcancer/), which contains 3136 experimentally supported associations between 364 TFs and 33 TCGA cancers by manually curating more than 1800 literature. TFcancer mainly concentrates on four aspects: TF expression, molecular alteration, regulatory relationships between TFs and target genes, and biological processes and signaling pathways of TFs in cancers. TFcancer not only provides a user-friendly interface for browsing and searching but also allows flexible data downloading and user data submitting. It is believed that TFcancer is a helpful and valuable resource for researchers who seek to understand the functions and molecular mechanisms of TFs involved in human cancers.

Availability and implementation

The TFcancer are freely available at http://lcbb.swjtu.edu.cn/tfcancer/.

Supplementary information

Supplementary data are available at Bioinformatics online.",TFcancer,TFcancer,http://lcbb.swjtu.edu.cn/tfcancer/,a manually curated database of transcription factors associated with human cancers +34120586,NUCOME: A comprehensive database of nucleosome organization referenced landscapes in mammalian genomes.,"

Background

Nucleosome organization is involved in many regulatory activities in various organisms. However, studies integrating nucleosome organization in mammalian genomes are very limited mainly due to the lack of comprehensive data quality control (QC) assessment and uneven data quality of public data sets.

Results

The NUCOME is a database focused on filtering qualified nucleosome organization referenced landscapes covering various cell types in human and mouse based on QC metrics. The filtering strategy guarantees the quality of nucleosome organization referenced landscapes and exempts users from redundant data set selection and processing. The NUCOME database provides standardized, qualified data source and informative nucleosome organization features at a whole-genome scale and on the level of individual loci.

Conclusions

The NUCOME provides valuable data resources for integrative analyses focus on nucleosome organization. The NUCOME is freely available at http://compbio-zhanglab.org/NUCOME .",NUCOME,NUCOME,http://compbio-zhanglab.org/NUCOME,A comprehensive database of nucleosome organization referenced landscapes in mammalian genomes +34127402,Immu-Mela: An open resource for exploring immunotherapy-related multidimensional genomic profiles in melanoma.,"There are increasing studies aimed to reveal genomic hallmarks predictive of immune checkpoint blockade (ICB) treatment response, which generated a large number of data and provided an unprecedented opportunity to identify response-related features and evaluate their robustness across cohorts. However, those valuable data sets are not easily accessible to the research community. To take full advantage of existing large-scale immuno-genomic profiles, we developed Immu-Mela (http://bioinfo.vanderbilt.edu/database/Immu-Mela/), a multidimensional immuno-genomic portal that provides interactive exploration of associations between ICB responsiveness and multi-omics features in melanoma, including genetic, transcriptomics, immune cells, and single-cell populations. Immu-Mela also enables integrative analysis of any two genomic features. We demonstrated the value of Immu-Mela by identifying known and novel genomic features associated with ICB response. In addition, Immu-Mela allows users to upload their data sets (unrestricted to any cancer types) and co-analyze with existing data to identify and validate signatures of interest. Immu-Mela reduces barriers between researchers and complex genomic data, facilitating discoveries in cancer immunotherapy.",Immu-Mela,Immu-Mela,http://bioinfo.vanderbilt.edu/database/Immu-Mela/,An open resource for exploring immunotherapy-related multidimensional genomic profiles in melanoma +34147352,MPSBase: Comprehensive repository of differentially expressed genes for mucopolysaccharidoses.,"Mucopolysaccharidoses (MPS) are lysosomal storage diseases (LSDs) caused by the deficiency of enzymes essential for the metabolism of extracellular matrix components called glycosaminoglycans (GAGs). To understand the physiopathology and alterations due to the lysosomal accumulation resulting from enzymatic deficiencies and their secondary outcomes can improve the diagnosis and treatment of rare genetic diseases. This work presents a database for differentially expressed genes from different public MPS data. We developed our database, including 13 studies previously deposited in the GEO (https://www.ncbi.nlm.nih.gov/geo/). The website is hosted in the UFRGS data processing center (CPD) and is available at . The site was constructed in PHP, and the analyses were performed in R. The organisms represented by the datasets are Canis lupus familiaris, Homo sapiens, Mus musculus, and Rattus norvegicus. The user can search for the differentially expressed genes and ontologies by species, MPS type, or tissue type. For each comparison, a heatmap with the 50 top differentially expressed genes is available as well as dot plots for the 30 top ontologies divided by biological process, cellular component, KEGG pathways, and molecular function. This data is also fully available in tables. There are 54 possible comparisons involving about 5000 to 10,000 genes each. This website is the only specific database for MPS with filtering and presenting their results in a one-click approach to the best of our knowledge. The development of such analytical and automated strategies accessible to health professionals is essential for fostering MPS research. The MPSBase is a web user-friendly, comprehensive repository of differentially expressed genes and ontologies regarding the MPS data.",MPSBase,MPSBase,https://www.ncbi.nlm.nih.gov/geo/,Comprehensive repository of differentially expressed genes for mucopolysaccharidoses +34156446,MetamORF: a repository of unique short open reading frames identified by both experimental and computational approaches for gene and metagene analyses.,"The development of high-throughput technologies revealed the existence of non-canonical short open reading frames (sORFs) on most eukaryotic ribonucleic acids. They are ubiquitous genetic elements conserved across species and suspected to be involved in numerous cellular processes. MetamORF (https://metamorf.hb.univ-amu.fr/) aims to provide a repository of unique sORFs identified in the human and mouse genomes with both experimental and computational approaches. By gathering publicly available sORF data, normalizing them and summarizing redundant information, we were able to identify a total of 1‚Äâ162‚Äâ675 unique sORFs. Despite the usual characterization of ORFs as short, upstream or downstream, there is currently no clear consensus regarding the definition of these categories. Thus, the data have been reprocessed using a normalized nomenclature. MetamORF enables new analyses at locus, gene, transcript and ORF levels, which should offer the possibility to address new questions regarding sORF functions in the future. The repository is available through an user-friendly web interface, allowing easy browsing, visualization, filtering over multiple criteria and export possibilities. sORFs can be searched starting from a gene, a transcript and an ORF ID, looking in a genome area or browsing the whole repository for a species. The database content has also been made available through track hubs at UCSC Genome Browser. Finally, we demonstrated an enrichment of genes harboring upstream ORFs among genes expressed in response to reticular stress. Database URL https://metamorf.hb.univ-amu.fr/.",MetamORF,MetamORF,https://metamorf.hb.univ-amu.fr/,a repository of unique short open reading frames identified by both experimental and computational approaches for gene and metagene analyses +34164644,HFBD: a biomarker knowledge database for heart failure heterogeneity and personalized applications.,"

Motivation

Heart failure (HF) is a cardiovascular disease with a high incidence around the world. Accumulating studies have focused on the identification of biomarkers for HF precision medicine. To understand the HF heterogeneity and provide biomarker information for the personalized diagnosis and treatment of HF, a knowledge database collecting the distributed and multiple-level biomarker information is necessary.

Results

In this study, the HF biomarker knowledge database (HFBD) was established by manually collecting the data and knowledge from literature in PubMed. HFBD contains 2618 records and 868 HF biomarkers (731 single and 137 combined) extracted from 1237 original articles. The biomarkers were classified into proteins, RNAs, DNAs, and the others at molecular, image, cellular and physiological levels. The biomarkers were annotated with biological, clinical and article information as well as the experimental methods used for the biomarker discovery. With its user-friendly interface, this knowledge database provides a unique resource for the systematic understanding of HF heterogeneity and personalized diagnosis and treatment of HF in the era of precision medicine.

Availability

The platform is openly available at http://sysbio.org.cn/HFBD/.",HF biomarker knowledge database,HFBD,http://sysbio.org.cn/HFBD/,a biomarker knowledge database for heart failure heterogeneity and personalized applications +34167460,A comprehensive database for integrated analysis of omics data in autoimmune diseases.,"

Background

Autoimmune diseases are heterogeneous pathologies with difficult diagnosis and few therapeutic options. In the last decade, several omics studies have provided significant insights into the molecular mechanisms of these diseases. Nevertheless, data from different cohorts and pathologies are stored independently in public repositories and a unified resource is imperative to assist researchers in this field.

Results

Here, we present Autoimmune Diseases Explorer ( https://adex.genyo.es ), a database that integrates 82 curated transcriptomics and methylation studies covering 5609 samples for some of the most common autoimmune diseases. The database provides, in an easy-to-use environment, advanced data analysis and statistical methods for exploring omics datasets, including meta-analysis, differential expression or pathway analysis.

Conclusions

This is the first omics database focused on autoimmune diseases. This resource incorporates homogeneously processed data to facilitate integrative analyses among studies.",Autoimmune Diseases Explorer,,https://adex.genyo.es,A comprehensive database for integrated analysis of omics data in autoimmune diseases +34169314,KMDATA: a curated database of reconstructed individual patient-level data from 153 oncology clinical trials.,"We created a database of reconstructed patient-level data from published clinical trials that includes multiple time-to-event outcomes such as overall survival and progression-free survival. Outcomes were extracted from Kaplan-Meier (KM) curves reported in 153 oncology Phase III clinical trial publications identified through a PubMed search of clinical trials in breast, lung, prostate and colorectal cancer, published between 2014 and 2016. For each trial that met our search criteria, we curated study-level information and digitized all reported KM curves with the software Digitizelt. We then used the digitized KM survival curves to estimate (possibly censored) patient-level time-to-event outcomes. Collections of time-to-event datasets from completed trials can be used to support the choice of appropriate trial designs for future clinical studies. Patient-level data allow investigators to tailor clinical trial designs to diseases and classes of treatments. Patient-level data also allow investigators to estimate the operating characteristics (e.g. power and type I error rate) of candidate statistical designs and methods. Database URL: https://10.6084/m9.figshare.14642247.v1.",KMDATA,KMDATA,https://10.6084/m9.figshare.14642247.v1,a curated database of reconstructed individual patient-level data from 153 oncology clinical trials +34174131,CanVaS: Documenting the genetic variation spectrum of Greek cancer patients.,"National genetic variation registries vastly increase the level of detail for the relevant population, while directly affecting patient management. Herein, we report CanVaS, a Cancer Variation reSource aiming to document the genetic variation of cancer patients in Greece. CanVaS comprises germline genetic data from 7,363 Greek individuals with a personal and/or family history of malignancy. The data set incorporates approximately 24,000 functionally annotated rare variants in 97 established or suspected cancer susceptibility genes. For each variant, allele frequency for the Greek population, interpretation for clinical significance, anonymized family and segregation information, as well as phenotypic traits of the carriers, are included. Moreover, information on the geographic distribution of the variants across the country is provided, enabling the study of Greek population isolates. Direct comparisons between Greek (sub)populations with relevant genetic resources are supported, allowing fine-grain localized adjustment of guidelines and clinical decision-making. Most importantly, anonymized data are available for download, while the Leiden Open Variation Database schema is adopted, enabling integration/interconnection with central resources. CanVaS could become a stepping-stone for a countrywide effort to characterize the cancer genetic variation landscape, concurrently supporting national and international cancer research. The database can be accessed at: http://ithaka.rrp.demokritos.gr/CanVaS.",Cancer Variation reSource,CanVaS,http://ithaka.rrp.demokritos.gr/CanVaS,Documenting the genetic variation spectrum of Greek cancer patients +34174819,UniBind: maps of high-confidence direct TF-DNA interactions across nine species.,"

Background

Transcription factors (TFs) bind specifically to TF binding sites (TFBSs) at cis-regulatory regions to control transcription. It is critical to locate these TF-DNA interactions to understand transcriptional regulation. Efforts to predict bona fide TFBSs benefit from the availability of experimental data mapping DNA binding regions of TFs (chromatin immunoprecipitation followed by sequencing - ChIP-seq).

Results

In this study, we processed ~‚Äâ10,000 public ChIP-seq datasets from nine species to provide high-quality TFBS predictions. After quality control, it culminated with the prediction of ~‚Äâ56 million TFBSs with experimental and computational support for direct TF-DNA interactions for 644 TFs in >‚Äâ1000 cell lines and tissues. These TFBSs were used to predict >‚Äâ197,000 cis-regulatory modules representing clusters of binding events in the corresponding genomes. The high-quality of the TFBSs was reinforced by their evolutionary conservation, enrichment at active cis-regulatory regions, and capacity to predict combinatorial binding of TFs. Further, we confirmed that the cell type and tissue specificity of enhancer activity was correlated with the number of TFs with binding sites predicted in these regions. All the data is provided to the community through the UniBind database that can be accessed through its web-interface ( https://unibind.uio.no/ ), a dedicated RESTful API, and as genomic tracks. Finally, we provide an enrichment tool, available as a web-service and an R package, for users to find TFs with enriched TFBSs in a set of provided genomic regions.

Conclusions

UniBind is the first resource of its kind, providing the largest collection of high-confidence direct TF-DNA interactions in nine species.",UniBind,UniBind,https://unibind.uio.no,maps of high-confidence direct TF-DNA interactions across nine species +34175476,Genome Warehouse: A Public Repository Housing Genome-scale Data.,"The Genome Warehouse (GWH) is a public repository housing genome assembly data for a wide range of species and delivering a series of web services for genome data submission, storage, release, and sharing. As one of the core resources in the National Genomics Data Center (NGDC), part of the China National Center for Bioinformation (CNCB, https://ngdc.cncb.ac.cn), GWH accepts both full genome and partial genome (chloroplast, mitochondrion, and plasmid) sequences with different assembly levels, as well as an update of existing genome assemblies. For each assembly, GWH collects detailed genome-related metadata of biological project, biological sample, and genome assembly, in addition to genome sequence and annotation. To archive high-quality genome sequences and annotations, GWH is equipped with a uniform and standardized procedure for quality control. Besides basic browse and search functionalities, all released genome sequences and annotations can be visualized with JBrowse. By May 21, 2021, GWH has received 19,124 direct submissions covering a diversity of 1108 species and has released 8772 of them. Collectively, GWH serves as an important resource for genome-scale data management and provides free and publicly accessible data to support research activities throughout the world. GWH is publicly accessible at https://ngdc.cncb.ac.cn/gwh.",Genome Warehouse,GWH,https://ngdc.cncb.ac.cn,A Public Repository Housing Genome-scale Data +34178036,RHIVDB: A Freely Accessible Database of HIV Amino Acid Sequences and Clinical Data of Infected Patients.,"Human immunodeficiency virus (HIV) infection remains one of the most severe problems for humanity, particularly due to the development of HIV resistance. To evaluate an association between viral sequence data and drug combinations and to estimate an effect of a particular drug combination on the treatment results, collection of the most representative drug combinations used to cure HIV and the biological data on amino acid sequences of HIV proteins is essential. We have created a new, freely available web database containing 1,651 amino acid sequences of HIV structural proteins [reverse transcriptase (RT), protease (PR), integrase (IN), and envelope protein (ENV)], treatment history information, and CD4+ cell count and viral load data available by the user's query. Additionally, the biological data on new HIV sequences and treatment data can be stored in the database by any user followed by an expert's verification. The database is available on the web at http://www.way2drug.com/rhivdb.",RHIVDB,RHIVDB,http://www.way2drug.com/rhivdb,A Freely Accessible Database of HIV Amino Acid Sequences and Clinical Data of Infected Patients +34214659,An inferred functional impact map of genetic variants in rice.,"Interpreting the functional impacts of genetic variants (GVs) is an important challenge for functional genomic studies in crops and next-generation breeding. Currently, studies in rice (Oryza sativa) have mainly focused on the identification of GVs, while the functional annotation of GVs has not yet been systematically carried out. Here we present a functional impact map of GVs in rice. We curated haplotype information of 17,397,026 GVs from sequencing data of 4,726 rice accessions. We quantitatively evaluated the effects of missense mutations in coding regions in each haplotype based on the conservation of amino acid residues and obtained the effects of 918,848 non-redundant missense GVs. We also generated high-quality chromatin accessibility (CA) data from six representative rice tissues and used these data to train deep convolutional neural network models to predict the impacts of 5,067,405 GVs for CA in regulatory regions. We characterized the functional properties and tissue specificity of the effects of GVs and found that large-effect GVs in coding and regulatory regions might be subject to selection in different directions. We finally demonstrated how the functional impact map could be used to prioritize the causal variants in mapping populations. This impact map will be a useful resource for accelerating gene cloning and functional studies in rice and can be freely queried in RiceVarMap V2.0 (http://ricevarmap.ncpgr.cn).",RiceVarMap V2.0,,http://ricevarmap.ncpgr.cn,An inferred functional impact map of genetic variants in rice +34220930,"The FAANG Data Portal: Global, Open-Access, ""FAIR"", and Richly Validated Genotype to Phenotype Data for High-Quality Functional Annotation of Animal Genomes.","The Functional Annotation of ANimal Genomes (FAANG) project is a worldwide coordinated action creating high-quality functional annotation of farmed and companion animal genomes. The generation of a rich genome-to-phenome resource and supporting informatic infrastructure advances the scope of comparative genomics and furthers the understanding of functional elements. The project also provides terrestrial and aquatic animal agriculture community powerful resources for supporting improvements to farmed animal production, disease resistance, and genetic diversity. The FAANG Data Portal (https://data.faang.org) ensures Findable, Accessible, Interoperable and Reusable (FAIR) open access to the wealth of sample, sequencing, and analysis data produced by an ever-growing number of FAANG consortia. It is developed and maintained by the FAANG Data Coordination Centre (DCC) at the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI). FAANG projects produce a standardised set of multi-omic assays with resulting data placed into a range of specialised open data archives. To ensure this data is easily findable and accessible by the community, the portal automatically identifies and collates all submitted FAANG data into a single easily searchable resource. The Data Portal supports direct download from the multiple underlying archives to enable seamless access to all FAANG data from within the portal itself. The portal provides a range of predefined filters, powerful predictive search, and a catalogue of sampling and analysis protocols and automatically identifies publications associated with any dataset. To ensure all FAANG data submissions are high-quality, the portal includes powerful contextual metadata validation and data submissions brokering to the underlying EMBL-EBI archives. The portal will incorporate extensive new technical infrastructure to effectively deliver and standardise FAANG's shift to single-cellomics, cell atlases, pangenomes, and novel phenotypic prediction models. The Data Portal plays a key role for FAANG by supporting high-quality functional annotation of animal genomes, through open FAIR sharing of data, complete with standardised rich metadata. Future Data Portal features developed by the DCC will support new technological developments for continued improvement for FAANG projects.",FAANG Data Portal,FAANG,https://data.faang.org,"Global, Open-Access, ""FAIR"", and Richly Validated Genotype to Phenotype Data for High-Quality Functional Annotation of Animal Genomes" +34225788,A global overview of genetically interpretable multimorbidities among common diseases in the UK Biobank.,"

Background

Multimorbidities greatly increase the global health burdens, but the landscapes of their genetic risks have not been systematically investigated.

Methods

We used the hospital inpatient data of 385,335 patients in the UK Biobank to investigate the multimorbid relations among 439 common diseases. Post-GWAS analyses were performed to identify multimorbidity shared genetic risks at the genomic loci, network, as well as overall genetic architecture levels. We conducted network decomposition for the networks of genetically interpretable multimorbidities to detect the hub diseases and the involved molecules and functions in each module.

Results

In total, 11,285 multimorbidities among 439 common diseases were identified, and 46% of them were genetically interpretable at the loci, network, or overall genetic architecture levels. Multimorbidities affecting the same and different physiological systems displayed different patterns of the shared genetic components, with the former more likely to share loci-level genetic components while the latter more likely to share network-level genetic components. Moreover, both the loci- and network-level genetic components shared by multimorbidities converged on cell immunity, protein metabolism, and gene silencing. Furthermore, we found that the genetically interpretable multimorbidities tend to form network modules, mediated by hub diseases and featuring physiological categories. Finally, we showcased how hub diseases mediating the multimorbidity modules could help provide useful insights for the genetic contributors of multimorbidities.

Conclusions

Our results provide a systematic resource for understanding the genetic predispositions of multimorbidities and indicate that hub diseases and converged molecules and functions may be the key for treating multimorbidities. We have created an online database that facilitates researchers and physicians to browse, search, or download these multimorbidities ( https://multimorbidity.comp-sysbio.org ).",,,https://multimorbidity.comp-sysbio.org, +26483767,Clinical utilization of genomics data produced by the international Pseudomonas aeruginosa consortium.,"The International Pseudomonas aeruginosa Consortium is sequencing over 1000 genomes and building an analysis pipeline for the study of Pseudomonas genome evolution, antibiotic resistance and virulence genes. Metadata, including genomic and phenotypic data for each isolate of the collection, are available through the International Pseudomonas Consortium Database (http://ipcd.ibis.ulaval.ca/). Here, we present our strategy and the results that emerged from the analysis of the first 389 genomes. With as yet unmatched resolution, our results confirm that P. aeruginosa strains can be divided into three major groups that are further divided into subgroups, some not previously reported in the literature. We also provide the first snapshot of P. aeruginosa strain diversity with respect to antibiotic resistance. Our approach will allow us to draw potential links between environmental strains and those implicated in human and animal infections, understand how patients become infected and how the infection evolves over time as well as identify prognostic markers for better evidence-based decisions on patient care.",International Pseudomonas Consortium Database,,http://ipcd.ibis.ulaval.ca/, +26995712,Rare disease relations through common genes and protein interactions.,"ODCs (Orphan Disease Connections), available at http://csbg.cnb.csic.es/odcs, is a novel resource to explore potential molecular relations between rare diseases. These molecular relations have been established through the integration of disease susceptibility genes and human protein-protein interactions. The database currently contains 54,941 relations between 3032 diseases.",Orphan Disease Connections,ODCs,http://csbg.cnb.csic.es/odcs,a novel resource to explore potential molecular relations between rare diseases +21255607,"Aromatic-Aromatic Interactions Database, A(2)ID: an analysis of aromatic Ï€-networks in proteins.","The geometrical arrangement of the aromatic rings of phenylalanine, tyrosine, tryptophan and histidine has been analyzed at a database level using the X-ray crystal structure of proteins from PDB in order to find out the aromatic-aromatic (p-p) networks in proteins and to understand how these aromatic rings are connected with each-other in a specific p-p network. A stringent examination of the 7848 proteins indicates that close to 89% of the proteins have occurrence of at least a network of 2p or a higher p-p network. The occurrence of p-p networks in various protein superfamilies based on SCOP, CATH and EC classifiers has also been probed in the present work. In general, we find that multidomain and membrane proteins as well as lyases show a more number of these networks. Analysis of the distribution of angle between planes of two proximal aromatic rings () distribution indicates that at a larger cutoff distance (between centroid of two aromatic rings), above 5Å, C-Hp interactions (T-shaped orientation) are more prevalent, while p-p interactions (stacked orientation) are more prevalent at a smaller cutoff distance. The connectivity patterns of p-p networks propose strong propensity of finding arrangement of aromatic residues as clusters rather than linear arrangement. We have also made a public domain database """"Aromatic-Aromatic Interactions Database"""" (A(2)ID) comprising of all types of p-p networks and their connectivity pattern present in proteins. It can be accessed by url http://203.199.182.73/gnsmmg/databases/aidb/aidb.html.",Aromatic-Aromatic Interactions Database,A(2)ID,http://203.199.182.73/gnsmmg/databases/aidb/aidb.html,an analysis of aromatic Ï€-networks in proteins +21398668,sc-PDB: a database for identifying variations and multiplicity of 'druggable' binding sites in proteins.,"

Background

The sc-PDB database is an annotated archive of druggable binding sites extracted from the Protein Data Bank. It contains all-atoms coordinates for 8166 protein-ligand complexes, chosen for their geometrical and physico-chemical properties. The sc-PDB provides a functional annotation for proteins, a chemical description for ligands and the detailed intermolecular interactions for complexes. The sc-PDB now includes a hierarchical classification of all the binding sites within a functional class.

Method

The sc-PDB entries were first clustered according to the protein name indifferent of the species. For each cluster, we identified dissimilar sites (e.g. catalytic and allosteric sites of an enzyme). SCOPE AND APPLICATIONS: The classification of sc-PDB targets by binding site diversity was intended to facilitate chemogenomics approaches to drug design. In ligand-based approaches, it avoids comparing ligands that do not share the same binding site. In structure-based approaches, it permits to quantitatively evaluate the diversity of the binding site definition (variations in size, sequence and/or structure).

Availability

The sc-PDB database is freely available at: http://bioinfo-pharma.u-strasbg.fr/scPDB.",sc-PDB,sc-PDB,http://bioinfo-pharma.u-strasbg.fr/scPDB,an annotated archive of druggable binding sites extracted from the Protein Data Bank +21398668,sc-PDB: a database for identifying variations and multiplicity of 'druggable' binding sites in proteins.,"

Background

The sc-PDB database is an annotated archive of druggable binding sites extracted from the Protein Data Bank. It contains all-atoms coordinates for 8166 protein-ligand complexes, chosen for their geometrical and physico-chemical properties. The sc-PDB provides a functional annotation for proteins, a chemical description for ligands and the detailed intermolecular interactions for complexes. The sc-PDB now includes a hierarchical classification of all the binding sites within a functional class.

Method

The sc-PDB entries were first clustered according to the protein name indifferent of the species. For each cluster, we identified dissimilar sites (e.g. catalytic and allosteric sites of an enzyme). SCOPE AND APPLICATIONS: The classification of sc-PDB targets by binding site diversity was intended to facilitate chemogenomics approaches to drug design. In ligand-based approaches, it avoids comparing ligands that do not share the same binding site. In structure-based approaches, it permits to quantitatively evaluate the diversity of the binding site definition (variations in size, sequence and/or structure).

Availability

The sc-PDB database is freely available at: http://bioinfo-pharma.u-strasbg.fr/scPDB.",sc-PDB,sc-PDB,http://bioinfo-pharma.u-strasbg.fr/scPDB,a database for identifying variations and multiplicity of 'druggable' binding sites in proteins +21624162,Developing a kidney and urinary pathway knowledge base.,"

Background

Chronic renal disease is a global health problem. The identification of suitable biomarkers could facilitate early detection and diagnosis and allow better understanding of the underlying pathology. One of the challenges in meeting this goal is the necessary integration of experimental results from multiple biological levels for further analysis by data mining. Data integration in the life science is still a struggle, and many groups are looking to the benefits promised by the Semantic Web for data integration.

Results

We present a Semantic Web approach to developing a knowledge base that integrates data from high-throughput experiments on kidney and urine. A specialised KUP ontology is used to tie the various layers together, whilst background knowledge from external databases is incorporated by conversion into RDF. Using SPARQL as a query mechanism, we are able to query for proteins expressed in urine and place these back into the context of genes expressed in regions of the kidney.

Conclusions

The KUPKB gives KUP biologists the means to ask queries across many resources in order to aggregate knowledge that is necessary for answering biological questions. The Semantic Web technologies we use, together with the background knowledge from the domain's ontologies, allows both rapid conversion and integration of this knowledge base. The KUPKB is still relatively small, but questions remain about scalability, maintenance and availability of the knowledge itself.

Availability

The KUPKB may be accessed via http://www.e-lico.eu/kupkb.",kidney and urinary pathway knowledge base,KUPKB,http://www.e-lico.eu/kupkb,a knowledge base that integrates data from high-throughput experiments on kidney and urine +21656910,A database of reaction monitoring mass spectrometry assays for elucidating therapeutic response in cancer.,"

Purpose

The Quantitative Assay Database (QuAD), http://proteome.moffitt.org/QUAD/, facilitates widespread implementation of quantitative mass spectrometry in cancer biology and clinical research through sharing of methods and reagents for monitoring protein expression and modification.

Experimental design

Liquid chromatography coupled to multiple reaction monitoring (LC-MRM) mass spectrometry assays are developed using SDS-PAGE fractionated lysates from cancer cell lines. Pathway maps created using GeneGO Metacore provide the biological relationships between proteins and illustrate concepts for multiplexed analysis; each protein can be selected to examine assay development at the protein and peptide levels.

Results

The coupling of SDS-PAGE and multiple reaction monitoring mass spectrometry screening has been used to detect 876 peptides from 218 cancer-related proteins in model systems including colon, lung, melanoma, leukemias, and myeloma, which has led to the development of 95 quantitative assays including stable-isotope-labeled peptide standards. Methods are published online and peptide standards are made available to the research community. Protein expression measurements for heat shock proteins, including a comparison with ELISA and monitoring response to the HSP90 inhibitor, 17-(dimethylaminoethylamino)-17-demethoxygeldanamycin (17-DMAG), are used to illustrate the components of the QuAD and its potential utility.

Conclusions and clinical relevance

This resource enables quantitative assessment of protein components of signaling pathways and biological processes and holds promise for systematic investigation of treatment responses in cancer.",The Quantitative Assay Database,QuAD,http://proteome.moffitt.org/QUAD/,A database of reaction monitoring mass spectrometry assays for elucidating therapeutic response in cancer +21769196,VPDB: Viral Protein Structural Database.,"

Unlabelled

Viral Protein Database is an interactive database for three dimensional viral proteins. Our aim is to provide a comprehensive resource to the community of structural virology, with an emphasis on the description of derived data from structural biology. Currently, VPDB includes ˜1,670 viral protein structures from >277 viruses with more than 465 virus strains. The whole database can be easily accessed through the user convenience text search. Interactivity has been enhanced by using Jmol, WebMol and Strap to visualize the viral protein molecular structure.

Availability

The database is available for free at http://www.vpdb.bicpu.edu.in.",Viral Protein Structural Database,VPDB,http://www.vpdb.bicpu.edu.in,an interactive database for three dimensional viral proteins +21821666,NeuroPedia: neuropeptide database and spectral library.,"

Summary

Neuropeptides are essential for cell-cell communication in neurological and endocrine physiological processes in health and disease. While many neuropeptides have been identified in previous studies, the resulting data has not been structured to facilitate further analysis by tandem mass spectrometry (MS/MS), the main technology for high-throughput neuropeptide identification. Many neuropeptides are difficult to identify when searching MS/MS spectra against large protein databases because of their atypical lengths (e.g. shorter/longer than common tryptic peptides) and lack of tryptic residues to facilitate peptide ionization/fragmentation. NeuroPedia is a neuropeptide encyclopedia of peptide sequences (including genomic and taxonomic information) and spectral libraries of identified MS/MS spectra of homolog neuropeptides from multiple species. Searching neuropeptide MS/MS data against known NeuroPedia sequences will improve the sensitivity of database search tools. Moreover, the availability of neuropeptide spectral libraries will also enable the utilization of spectral library search tools, which are known to further improve the sensitivity of peptide identification. These will also reinforce the confidence in peptide identifications by enabling visual comparisons between new and previously identified neuropeptide MS/MS spectra.

Availability

http://proteomics.ucsd.edu/Software/NeuroPedia.html

Contact

bandeira@ucsd.edu

Supplementary information

Supplementary materials are available at Bioinformatics online.",NeuroPedia,NeuroPedia,http://proteomics.ucsd.edu/Software/NeuroPedia.html,neuropeptide database and spectral library +22058129,DistiLD Database: diseases and traits in linkage disequilibrium blocks.,"Genome-wide association studies (GWAS) have identified thousands of single nucleotide polymorphisms (SNPs) associated with the risk of hundreds of diseases. However, there is currently no database that enables non-specialists to answer the following simple questions: which SNPs associated with diseases are in linkage disequilibrium (LD) with a gene of interest? Which chromosomal regions have been associated with a given disease, and which are the potentially causal genes in each region? To answer these questions, we use data from the HapMap Project to partition each chromosome into so-called LD blocks, so that SNPs in LD with each other are preferentially in the same block, whereas SNPs not in LD are in different blocks. By projecting SNPs and genes onto LD blocks, the DistiLD database aims to increase usage of existing GWAS results by making it easy to query and visualize disease-associated SNPs and genes in their chromosomal context. The database is available at http://distild.jensenlab.org/.",DistiLD Database,,http://distild.jensenlab.org/,diseases and traits in linkage disequilibrium blocks +22064864,Gene Expression Atlas update--a value-added database of microarray and sequencing-based functional genomics experiments.,"Gene Expression Atlas (http://www.ebi.ac.uk/gxa) is an added-value database providing information about gene expression in different cell types, organism parts, developmental stages, disease states, sample treatments and other biological/experimental conditions. The content of this database derives from curation, re-annotation and statistical analysis of selected data from the ArrayExpress Archive and the European Nucleotide Archive. A simple interface allows the user to query for differential gene expression either by gene names or attributes or by biological conditions, e.g. diseases, organism parts or cell types. Since our previous report we made 20 monthly releases and, as of Release 11.08 (August 2011), the database supports 19 species, which contains expression data measured for 19,014 biological conditions in 136,551 assays from 5598 independent studies.",Gene Expression Atlas,,http://www.ebi.ac.uk/gxa,"an added-value database providing information about gene expression in different cell types, organism parts, developmental stages, disease states, sample treatments and other biological/experimental conditions" +22064864,Gene Expression Atlas update--a value-added database of microarray and sequencing-based functional genomics experiments.,"Gene Expression Atlas (http://www.ebi.ac.uk/gxa) is an added-value database providing information about gene expression in different cell types, organism parts, developmental stages, disease states, sample treatments and other biological/experimental conditions. The content of this database derives from curation, re-annotation and statistical analysis of selected data from the ArrayExpress Archive and the European Nucleotide Archive. A simple interface allows the user to query for differential gene expression either by gene names or attributes or by biological conditions, e.g. diseases, organism parts or cell types. Since our previous report we made 20 monthly releases and, as of Release 11.08 (August 2011), the database supports 19 species, which contains expression data measured for 19,014 biological conditions in 136,551 assays from 5598 independent studies.",Gene Expression Atlas,,http://www.ebi.ac.uk/gxa,a value-added database of microarray and sequencing-based functional genomics experiments +22067443,PINA v2.0: mining interactome modules.,"The Protein Interaction Network Analysis (PINA) platform is a comprehensive web resource, which includes a database of unified protein-protein interaction data integrated from six manually curated public databases, and a set of built-in tools for network construction, filtering, analysis and visualization. The second version of PINA enhances its utility for studies of protein interactions at a network level, by including multiple collections of interaction modules identified by different clustering approaches from the whole network of protein interactions ('interactome') for six model organisms. All identified modules are fully annotated by enriched Gene Ontology terms, KEGG pathways, Pfam domains and the chemical and genetic perturbations collection from MSigDB. Moreover, a new tool is provided for module enrichment analysis in addition to simple query function. The interactome data are also available on the web site for further bioinformatics analysis. PINA is freely accessible at http://cbg.garvan.unsw.edu.au/pina/.",Protein Interaction Network Analysis,PINA,http://cbg.garvan.unsw.edu.au/pina/,"a comprehensive web resource, which includes a database of unified protein-protein interaction data integrated from six manually curated public databases, and a set of built-in tools for network construction, filtering, analysis and visualization" +22067445,"SCRIPDB: a portal for easy access to syntheses, chemicals and reactions in patents.","The patent literature is a rich catalog of biologically relevant chemicals; many public and commercial molecular databases contain the structures disclosed in patent claims. However, patents are an equally rich source of metadata about bioactive molecules, including mechanism of action, disease class, homologous experimental series, structural alternatives, or the synthetic pathways used to produce molecules of interest. Unfortunately, this metadata is discarded when chemical structures are deposited separately in databases. SCRIPDB is a chemical structure database designed to make this metadata accessible. SCRIPDB provides the full original patent text, reactions and relationships described within any individual patent, in addition to the molecular files common to structural databases. We discuss how such information is valuable in medical text mining, chemical image analysis, reaction extraction and in silico pharmaceutical lead optimization. SCRIPDB may be searched by exact chemical structure, substructure or molecular similarity and the results may be restricted to patents describing synthetic routes. SCRIPDB is available at http://dcv.uhnres.utoronto.ca/SCRIPDB.",SCRIPDB,SCRIPDB,http://dcv.uhnres.utoronto.ca/SCRIPDB,"a portal for easy access to syntheses, chemicals and reactions in patents" +22067451,IDEAL: Intrinsically Disordered proteins with Extensive Annotations and Literature.,"IDEAL, Intrinsically Disordered proteins with Extensive Annotations and Literature (http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/), is a collection of knowledge on experimentally verified intrinsically disordered proteins. IDEAL contains manual annotations by curators on intrinsically disordered regions, interaction regions to other molecules, post-translational modification sites, references and structural domain assignments. In particular, IDEAL explicitly describes protean segments that can be transformed from a disordered state to an ordered state. Since in most cases they can act as molecular recognition elements upon binding of partner proteins, IDEAL provides a data resource for functional regions of intrinsically disordered proteins. The information in IDEAL is provided on a user-friendly graphical view and in a computer-friendly XML format.",Intrinsically Disordered proteins with Extensive Annotations and Literature,IDEAL,http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/,a collection of knowledge on experimentally verified intrinsically disordered proteins +22080559,"The Aspergillus Genome Database (AspGD): recent developments in comprehensive multispecies curation, comparative genomics and community resources.","The Aspergillus Genome Database (AspGD; http://www.aspgd.org) is a freely available, web-based resource for researchers studying fungi of the genus Aspergillus, which includes organisms of clinical, agricultural and industrial importance. AspGD curators have now completed comprehensive review of the entire published literature about Aspergillus nidulans and Aspergillus fumigatus, and this annotation is provided with streamlined, ortholog-based navigation of the multispecies information. AspGD facilitates comparative genomics by providing a full-featured genomics viewer, as well as matched and standardized sets of genomic information for the sequenced aspergilli. AspGD also provides resources to foster interaction and dissemination of community information and resources. We welcome and encourage feedback at aspergillus-curator@lists.stanford.edu.",Aspergillus Genome Database,AspGD,http://www.aspgd.org,"a freely available, web-based resource for researchers studying fungi of the genus Aspergillus, which includes organisms of clinical, agricultural and industrial importance" +22086951,The UCSC Genome Browser database: extensions and updates 2011.,"The University of California Santa Cruz Genome Browser (http://genome.ucsc.edu) offers online public access to a growing database of genomic sequence and annotations for a wide variety of organisms. The Browser is an integrated tool set for visualizing, comparing, analyzing and sharing both publicly available and user-generated genomic data sets. In the past year, the local database has been updated with four new species assemblies, and we anticipate another four will be released by the end of 2011. Further, a large number of annotation tracks have been either added, updated by contributors, or remapped to the latest human reference genome. Among these are new phenotype and disease annotations, UCSC genes, and a major dbSNP update, which required new visualization methods. Growing beyond the local database, this year we have introduced 'track data hubs', which allow the Genome Browser to provide access to remotely located sets of annotations. This feature is designed to significantly extend the number and variety of annotation tracks that are publicly available for visualization and analysis from within our site. We have also introduced several usability features including track search and a context-sensitive menu of options available with a right-click anywhere on the Browser's image.",University of California Santa Cruz Genome Browser,UCSC Genome Browser database,http://genome.ucsc.edu,a growing database of genomic sequence and annotations for a wide variety of organisms +22086963,Ensembl 2012.,"The Ensembl project (http://www.ensembl.org) provides genome resources for chordate genomes with a particular focus on human genome data as well as data for key model organisms such as mouse, rat and zebrafish. Five additional species were added in the last year including gibbon (Nomascus leucogenys) and Tasmanian devil (Sarcophilus harrisii) bringing the total number of supported species to 61 as of Ensembl release 64 (September 2011). Of these, 55 species appear on the main Ensembl website and six species are provided on the Ensembl preview site (Pre!Ensembl; http://pre.ensembl.org) with preliminary support. The past year has also seen improvements across the project.",Ensembl,Ensembl,http://www.ensembl.org, +22102583,Mouse Phenome Database (MPD).,"The Mouse Phenome Project was launched a decade ago to complement mouse genome sequencing efforts by promoting new phenotyping initiatives under standardized conditions and collecting the data in a central public database, the Mouse Phenome Database (MPD; http://phenome.jax.org). MPD houses a wealth of strain characteristics data to facilitate the use of the laboratory mouse in translational research for human health and disease, helping alleviate problems involving experimentation in humans that cannot be done practically or ethically. Data sets are voluntarily contributed by researchers from a variety of institutions and settings, or in some cases, retrieved by MPD staff from public sources. MPD maintains a growing collection of standardized reference data that assists investigators in selecting mouse strains for research applications; houses treatment/control data for drug studies and other interventions; offers a standardized platform for discovering genotype-phenotype relationships; and provides tools for hypothesis testing. MPD improvements and updates since our last NAR report are presented, including the addition of new tools and features to facilitate navigation and data mining as well as the acquisition of new data (phenotypic, genotypic and gene expression).",Mouse Phenome Database,MPD,http://phenome.jax.org, +22102771,EuDBase: An online resource for automated EST analysis pipeline (ESTFrontier) and database for red seaweed Eucheuma denticulatum.,"Functional genomics has proven to be an efficient tool in identifying genes involved in various biological functions. However the availability of commercially important seaweed Eucheuma denticulatum functional resources is still limited. EuDBase is the first seaweed online repository that provides integrated access to ESTs of Eucheuma denticulatum generated from samples collected from Kudat and Semporna in Sabah, Malaysia. The database stored 10,031 ESTs that are clustered and assembled into 2,275 unique transcripts (UT) and 955 singletons. Raw data were automatically processed using ESTFrontier, an in-house automated EST analysis pipeline. Data was collected in MySQL database. Web interface is implemented using PHP and it allows browsing and querying EuDBase through search engine. Data is searchable via BLAST hit, domain search, Gene Ontology or KEGG Pathway. A user-friendly interface allows the identification of sequences either using a simple text query or similarity search. The development of EuDBase is initiated to store, manage and analyze the E. denticulatum ESTs and to provide accumulative digital resources for the use of global scientific community. EuDBase is freely available from http://www.inbiosis.ukm.my/eudbase/.",EuDBase,EuDBase,http://www.inbiosis.ukm.my/eudbase/,"the first seaweed online repository that provides integrated access to ESTs of Eucheuma denticulatum generated from samples collected from Kudat and Semporna in Sabah, Malaysia" +22102771,EuDBase: An online resource for automated EST analysis pipeline (ESTFrontier) and database for red seaweed Eucheuma denticulatum.,"Functional genomics has proven to be an efficient tool in identifying genes involved in various biological functions. However the availability of commercially important seaweed Eucheuma denticulatum functional resources is still limited. EuDBase is the first seaweed online repository that provides integrated access to ESTs of Eucheuma denticulatum generated from samples collected from Kudat and Semporna in Sabah, Malaysia. The database stored 10,031 ESTs that are clustered and assembled into 2,275 unique transcripts (UT) and 955 singletons. Raw data were automatically processed using ESTFrontier, an in-house automated EST analysis pipeline. Data was collected in MySQL database. Web interface is implemented using PHP and it allows browsing and querying EuDBase through search engine. Data is searchable via BLAST hit, domain search, Gene Ontology or KEGG Pathway. A user-friendly interface allows the identification of sequences either using a simple text query or similarity search. The development of EuDBase is initiated to store, manage and analyze the E. denticulatum ESTs and to provide accumulative digital resources for the use of global scientific community. EuDBase is freely available from http://www.inbiosis.ukm.my/eudbase/.",EuDBase,EuDBase,http://www.inbiosis.ukm.my/eudbase/,An online resource for automated EST analysis pipeline (ESTFrontier) and database for red seaweed Eucheuma denticulatum +22121220,The IntAct molecular interaction database in 2012.,"IntAct is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. Two levels of curation are now available within the database, with both IMEx-level annotation and less detailed MIMIx-compatible entries currently supported. As from September 2011, IntAct contains approximately 275,000 curated binary interaction evidences from over 5000 publications. The IntAct website has been improved to enhance the search process and in particular the graphical display of the results. New data download formats are also available, which will facilitate the inclusion of IntAct's data in the Semantic Web. IntAct is an active contributor to the IMEx consortium (http://www.imexconsortium.org). IntAct source code and data are freely available at http://www.ebi.ac.uk/intact.",IntAct,IntAct,http://www.ebi.ac.uk/intact,"an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions" +22135298,PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse.,"PhosphoSitePlus (http://www.phosphosite.org) is an open, comprehensive, manually curated and interactive resource for studying experimentally observed post-translational modifications, primarily of human and mouse proteins. It encompasses 1,30,000 non-redundant modification sites, primarily phosphorylation, ubiquitinylation and acetylation. The interface is designed for clarity and ease of navigation. From the home page, users can launch simple or complex searches and browse high-throughput data sets by disease, tissue or cell line. Searches can be restricted by specific treatments, protein types, domains, cellular components, disease, cell types, cell lines, tissue and sequences or motifs. A few clicks of the mouse will take users to substrate pages or protein pages with sites, sequences, domain diagrams and molecular visualization of side-chains known to be modified; to site pages with information about how the modified site relates to the functions of specific proteins and cellular processes and to curated information pages summarizing the details from one record. PyMOL and Chimera scripts that colorize reactive groups on residues that are modified can be downloaded. Features designed to facilitate proteomic analyses include downloads of modification sites, kinase-substrate data sets, sequence logo generators, a Cytoscape plugin and BioPAX download to enable pathway visualization of the kinase-substrate interactions in PhosphoSitePlus®.",PhosphoSitePlus,PhosphoSitePlus,http://www.phosphosite.org,"an open, comprehensive, manually curated and interactive resource for studying experimentally observed post-translational modifications, primarily of human and mouse proteins" +22135298,PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse.,"PhosphoSitePlus (http://www.phosphosite.org) is an open, comprehensive, manually curated and interactive resource for studying experimentally observed post-translational modifications, primarily of human and mouse proteins. It encompasses 1,30,000 non-redundant modification sites, primarily phosphorylation, ubiquitinylation and acetylation. The interface is designed for clarity and ease of navigation. From the home page, users can launch simple or complex searches and browse high-throughput data sets by disease, tissue or cell line. Searches can be restricted by specific treatments, protein types, domains, cellular components, disease, cell types, cell lines, tissue and sequences or motifs. A few clicks of the mouse will take users to substrate pages or protein pages with sites, sequences, domain diagrams and molecular visualization of side-chains known to be modified; to site pages with information about how the modified site relates to the functions of specific proteins and cellular processes and to curated information pages summarizing the details from one record. PyMOL and Chimera scripts that colorize reactive groups on residues that are modified can be downloaded. Features designed to facilitate proteomic analyses include downloads of modification sites, kinase-substrate data sets, sequence logo generators, a Cytoscape plugin and BioPAX download to enable pathway visualization of the kinase-substrate interactions in PhosphoSitePlus®.",PhosphoSitePlus,PhosphoSitePlus,http://www.phosphosite.org,a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse +22139934,Cube-DB: detection of functional divergence in human protein families.,"Cube-DB is a database of pre-evaluated results for detection of functional divergence in human/vertebrate protein families. The analysis is organized around the nomenclature associated with the human proteins, but based on all currently available vertebrate genomes. Using full genomes enables us, through a mutual-best-hit strategy, to construct comparable taxonomical samples for all paralogues under consideration. Functional specialization is scored on the residue level according to two models of behavior after divergence: heterotachy and homotachy. In the first case, the positions on the protein sequence are scored highly if they are conserved in the reference group of orthologs, and overlap poorly with the residue type choice in the paralogs groups (such positions will also be termed functional determinants). The second model additionally requires conservation within each group of paralogs (functional discriminants). The scoring functions are phylogeny independent, but sensitive to the residue type similarity. The results are presented as a table of per-residue scores, and mapped onto related structure (when available) via browser-embedded visualization tool. They can also be downloaded as a spreadsheet table, and sessions for two additional molecular visualization tools. The database interface is available at http://epsf.bmad.bii.a-star.edu.sg/cube/db/html/home.html.",Cube-DB,Cube-DB, http://epsf.bmad.bii.a-star.edu.sg/cube/db/html/home.html,a database of pre-evaluated results for detection of functional divergence in human/vertebrate protein families +22230935,Database for crude drugs and Kampo medicine.,"A wiki-based repository for crude drugs and Kampo medicine is introduced. It provides taxonomic and chemical information for 158 crude drugs and 348 prescriptions of the traditional Kampo medicine in Japan, which is a variation of ancient Chinese medicine. The system is built on MediaWiki with extensions for inline page search and for sending user-input elements to the server. These functions together realize implementation of word checks and data integration at the user-level. In this scheme, any user can participate in creating an integrated database with controlled vocabularies on the wiki system. Our implementation and data are accessible at http://metabolomics.jp/wiki/.",,,http://metabolomics.jp/wiki/,Database for crude drugs and Kampo medicine +22276777,miRdSNP: a database of disease-associated SNPs and microRNA target sites on 3'UTRs of human genes.,"

Background

Single nucleotide polymorphisms (SNPs) can lead to the susceptibility and onset of diseases through their effects on gene expression at the posttranscriptional level. Recent findings indicate that SNPs could create, destroy, or modify the efficiency of miRNA binding to the 3'UTR of a gene, resulting in gene dysregulation. With the rapidly growing number of published disease-associated SNPs (dSNPs), there is a strong need for resources specifically recording dSNPs on the 3'UTRs and their nucleotide distance from miRNA target sites. We present here miRdSNP, a database incorporating three important areas of dSNPs, miRNA target sites, and diseases.

Description

miRdSNP provides a unique database of dSNPs on the 3'UTRs of human genes manually curated from PubMed. The current release includes 786 dSNP-disease associations for 630 unique dSNPs and 204 disease types. miRdSNP annotates genes with experimentally confirmed targeting by miRNAs and indexes miRNA target sites predicted by TargetScan and PicTar as well as potential miRNA target sites newly generated by dSNPs. A robust web interface and search tools are provided for studying the proximity of miRNA binding sites to dSNPs in relation to human diseases. Searches can be dynamically filtered by gene name, miRBase ID, target prediction algorithm, disease, and any nucleotide distance between dSNPs and miRNA target sites. Results can be viewed at the sequence level showing the annotated locations for miRNA target sites and dSNPs on the entire 3'UTR sequences. The integration of dSNPs with the UCSC Genome browser is also supported.

Conclusion

miRdSNP provides a comprehensive data source of dSNPs and robust tools for exploring their distance from miRNA target sites on the 3'UTRs of human genes. miRdSNP enables researchers to further explore the molecular mechanism of gene dysregulation for dSNPs at posttranscriptional level. miRdSNP is freely available on the web at http://mirdsnp.ccr.buffalo.edu.",miRdSNP,miRdSNP,http://mirdsnp.ccr.buffalo.edu,a comprehensive data source of dSNPs and robust tools for exploring their distance from miRNA target sites on the 3'UTRs of human genes +22276777,miRdSNP: a database of disease-associated SNPs and microRNA target sites on 3'UTRs of human genes.,"

Background

Single nucleotide polymorphisms (SNPs) can lead to the susceptibility and onset of diseases through their effects on gene expression at the posttranscriptional level. Recent findings indicate that SNPs could create, destroy, or modify the efficiency of miRNA binding to the 3'UTR of a gene, resulting in gene dysregulation. With the rapidly growing number of published disease-associated SNPs (dSNPs), there is a strong need for resources specifically recording dSNPs on the 3'UTRs and their nucleotide distance from miRNA target sites. We present here miRdSNP, a database incorporating three important areas of dSNPs, miRNA target sites, and diseases.

Description

miRdSNP provides a unique database of dSNPs on the 3'UTRs of human genes manually curated from PubMed. The current release includes 786 dSNP-disease associations for 630 unique dSNPs and 204 disease types. miRdSNP annotates genes with experimentally confirmed targeting by miRNAs and indexes miRNA target sites predicted by TargetScan and PicTar as well as potential miRNA target sites newly generated by dSNPs. A robust web interface and search tools are provided for studying the proximity of miRNA binding sites to dSNPs in relation to human diseases. Searches can be dynamically filtered by gene name, miRBase ID, target prediction algorithm, disease, and any nucleotide distance between dSNPs and miRNA target sites. Results can be viewed at the sequence level showing the annotated locations for miRNA target sites and dSNPs on the entire 3'UTR sequences. The integration of dSNPs with the UCSC Genome browser is also supported.

Conclusion

miRdSNP provides a comprehensive data source of dSNPs and robust tools for exploring their distance from miRNA target sites on the 3'UTRs of human genes. miRdSNP enables researchers to further explore the molecular mechanism of gene dysregulation for dSNPs at posttranscriptional level. miRdSNP is freely available on the web at http://mirdsnp.ccr.buffalo.edu.",miRdSNP,miRdSNP,http://mirdsnp.ccr.buffalo.edu,a database of disease-associated SNPs and microRNA target sites on 3'UTRs of human genes +22334387,The androgen receptor gene mutations database: 2012 update.,"The current version of the androgen receptor gene (AR) mutations database is described. A major change to the database is that the nomenclature and numbering scheme now conforms to all Human Genome Variation Society norms. The total number of reported mutations has risen from 605 to 1,029 since 2004. The database now contains a number of mutations that are associated with prostate cancer (CaP) treatment regimens, while the number of AR mutations found in CaP tissues has more than doubled from 76 to 159. In addition, in a number of androgen insensitivity syndrome (AIS) and CaP cases, multiple mutations have been found within the same tissue samples. For the first time, we report on a disconnect within the AIS phenotype-genotype relationship among our own patient database, in that over 40% of our patients with a classic complete AIS or partial AIS phenotypes did not appear to have a mutation in their AR gene. The implications of this phenomenon on future locus-specific mutation database (LSDB) development are discussed, together with the concept that mutations can be associated with both loss- and gain-of-function, and the effect of multiple AR mutations within individuals. The database is available on the internet (http://androgendb.mcgill.ca), and a web-based LSDB with the variants using the Leiden Open Variation Database platform is available at http://www.lovd.nl/AR.",androgen receptor gene mutations database,,http://androgendb.mcgill.ca, +22345505,ANAP: an integrated knowledge base for Arabidopsis protein interaction network analysis.,"Protein interactions are fundamental to the molecular processes occurring within an organism and can be utilized in network biology to help organize, simplify, and understand biological complexity. Currently, there are more than 10 publicly available Arabidopsis (Arabidopsis thaliana) protein interaction databases. However, there are limitations with these databases, including different types of interaction evidence, a lack of defined standards for protein identifiers, differing levels of information, and, critically, a lack of integration between them. In this paper, we present an interactive bioinformatics Web tool, ANAP (Arabidopsis Network Analysis Pipeline), which serves to effectively integrate the different data sets and maximize access to available data. ANAP has been developed for Arabidopsis protein interaction integration and network-based study to facilitate functional protein network analysis. ANAP integrates 11 Arabidopsis protein interaction databases, comprising 201,699 unique protein interaction pairs, 15,208 identifiers (including 11,931 The Arabidopsis Information Resource Arabidopsis Genome Initiative codes), 89 interaction detection methods, 73 species that interact with Arabidopsis, and 6,161 references. ANAP can be used as a knowledge base for constructing protein interaction networks based on user input and supports both direct and indirect interaction analysis. It has an intuitive graphical interface allowing easy network visualization and provides extensive detailed evidence for each interaction. In addition, ANAP displays the gene and protein annotation in the generated interactive network with links to The Arabidopsis Information Resource, the AtGenExpress Visualization Tool, the Arabidopsis 1,001 Genomes GBrowse, the Protein Knowledgebase, the Kyoto Encyclopedia of Genes and Genomes, and the Ensembl Genome Browser to significantly aid functional network analysis. The tool is available open access at http://gmdd.shgmo.org/Computational-Biology/ANAP.",Arabidopsis Network Analysis Pipeline,ANAP,http://gmdd.shgmo.org/Computational-Biology/ANAP,an integrated knowledge base for Arabidopsis protein interaction network analysis +22369658,DetoxiProt: an integrated database for detoxification proteins.,"

Background

Detoxification proteins are a class of proteins for degradation and/or elimination of endogenous and exogenous toxins or medicines, as well as reactive oxygen species (ROS) produced by these materials. Most of these proteins are generated as a response to the stimulation of toxins or medicines. They are essential for the clearance of harmful substances and for maintenance of physiological balance in organisms. Thus, it is important to collect and integrate information on detoxification proteins.

Results

To store, retrieve and analyze the information related to their features and functions, we developed the DetoxiProt, a comprehensive database for annotation of these proteins. This database provides detailed introductions about different classes of the detoxification proteins. Extensive annotations of these proteins, including sequences, structures, features, inducers, inhibitors, substrates, chromosomal location, functional domains as well as physiological-biochemical properties were generated. Furthermore, pre-computed BLAST results, multiple sequence alignments and evolutionary trees for detoxification proteins are also provided for evolutionary study of conserved function and pathways. The current version of DetoxiProt contains 5956 protein entries distributed in 628 organisms. An easy to use web interface was designed, so that annotations about each detoxification protein can be retrieved by browsing with a specific method or by searching with different criteria.

Conclusions

DetoxiProt provides an effective and efficient way of accessing the detoxification protein sequences and other high-quality information. This database would be a valuable source for toxicologists, pharmacologists and medicinal chemists. DetoxiProt database is freely available at http://lifecenter.sgst.cn/detoxiprot/.",DetoxiProt,DetoxiProt,http://lifecenter.sgst.cn/detoxiprot/,an integrated database for detoxification proteins +22784567,SigCS base: an integrated genetic information resource for human cerebral stroke.,"

Background

To understand how stroke risk factors mechanistically contribute to stroke, the genetic components regulating each risk factor need to be integrated and evaluated with respect to biological function and through pathway-based algorithms. This resource will provide information to researchers studying the molecular and genetic causes of stroke in terms of genomic variants, genes, and pathways.

Methods

Reported genetic variants, gene structure, phenotypes, and literature information regarding stroke were collected and extracted from publicly available databases describing variants, genome, proteome, functional annotation, and disease subtypes. Stroke related candidate pathways and etiologic genes that participate significantly in risk were analyzed in terms of canonical pathways in public biological pathway databases. These efforts resulted in a relational database of genetic signals of cerebral stroke, SigCS base, which implements an effective web retrieval system.

Results

The current version of SigCS base documents 1943 non-redundant genes with 11472 genetic variants and 165 non-redundant pathways. The web retrieval system of SigCS base consists of two principal search flows, including: 1) a gene-based variant search using gene table browsing or a keyword search, and, 2) a pathway-based variant search using pathway table browsing. SigCS base is freely accessible at http://sysbio.kribb.re.kr/sigcs.

Conclusions

SigCS base is an effective tool that can assist researchers in the identification of the genetic factors associated with stroke by utilizing existing literature information, selecting candidate genes and variants for experimental studies, and examining the pathways that contribute to the pathophysiological mechanisms of stroke.",SigCS base,,http://sysbio.kribb.re.kr/sigcs,a relational database of genetic signals of cerebral stroke +22784567,SigCS base: an integrated genetic information resource for human cerebral stroke.,"

Background

To understand how stroke risk factors mechanistically contribute to stroke, the genetic components regulating each risk factor need to be integrated and evaluated with respect to biological function and through pathway-based algorithms. This resource will provide information to researchers studying the molecular and genetic causes of stroke in terms of genomic variants, genes, and pathways.

Methods

Reported genetic variants, gene structure, phenotypes, and literature information regarding stroke were collected and extracted from publicly available databases describing variants, genome, proteome, functional annotation, and disease subtypes. Stroke related candidate pathways and etiologic genes that participate significantly in risk were analyzed in terms of canonical pathways in public biological pathway databases. These efforts resulted in a relational database of genetic signals of cerebral stroke, SigCS base, which implements an effective web retrieval system.

Results

The current version of SigCS base documents 1943 non-redundant genes with 11472 genetic variants and 165 non-redundant pathways. The web retrieval system of SigCS base consists of two principal search flows, including: 1) a gene-based variant search using gene table browsing or a keyword search, and, 2) a pathway-based variant search using pathway table browsing. SigCS base is freely accessible at http://sysbio.kribb.re.kr/sigcs.

Conclusions

SigCS base is an effective tool that can assist researchers in the identification of the genetic factors associated with stroke by utilizing existing literature information, selecting candidate genes and variants for experimental studies, and examining the pathways that contribute to the pathophysiological mechanisms of stroke.",SigCS base,,http://sysbio.kribb.re.kr/sigcs,an integrated genetic information resource for human cerebral stroke +22836712,A comparative cellular and molecular biology of longevity database.,"Discovering key cellular and molecular traits that promote longevity is a major goal of aging and longevity research. One experimental strategy is to determine which traits have been selected during the evolution of longevity in naturally long-lived animal species. This comparative approach has been applied to lifespan research for nearly four decades, yielding hundreds of datasets describing aspects of cell and molecular biology hypothesized to relate to animal longevity. Here, we introduce a Comparative Cellular and Molecular Biology of Longevity Database, available at ( http://genomics.brocku.ca/ccmbl/ ), as a compendium of comparative cell and molecular data presented in the context of longevity. This open access database will facilitate the meta-analysis of amalgamated datasets using standardized maximum lifespan (MLSP) data (from AnAge). The first edition contains over 800 data records describing experimental measurements of cellular stress resistance, reactive oxygen species metabolism, membrane composition, protein homeostasis, and genome homeostasis as they relate to vertebrate species MLSP. The purpose of this review is to introduce the database and briefly demonstrate its use in the meta-analysis of combined datasets.",,,http://genomics.brocku.ca/ccmbl/,A comparative cellular and molecular biology of longevity database +22912585,Dissecting the gene network of dietary restriction to identify evolutionarily conserved pathways and new functional genes.,"Dietary restriction (DR), limiting nutrient intake from diet without causing malnutrition, delays the aging process and extends lifespan in multiple organisms. The conserved life-extending effect of DR suggests the involvement of fundamental mechanisms, although these remain a subject of debate. To help decipher the life-extending mechanisms of DR, we first compiled a list of genes that if genetically altered disrupt or prevent the life-extending effects of DR. We called these DR-essential genes and identified more than 100 in model organisms such as yeast, worms, flies, and mice. In order for other researchers to benefit from this first curated list of genes essential for DR, we established an online database called GenDR (http://genomics.senescence.info/diet/). To dissect the interactions of DR-essential genes and discover the underlying lifespan-extending mechanisms, we then used a variety of network and systems biology approaches to analyze the gene network of DR. We show that DR-essential genes are more conserved at the molecular level and have more molecular interactions than expected by chance. Furthermore, we employed a guilt-by-association method to predict novel DR-essential genes. In budding yeast, we predicted nine genes related to vacuolar functions; we show experimentally that mutations deleting eight of those genes prevent the life-extending effects of DR. Three of these mutants (OPT2, FRE6, and RCR2) had extended lifespan under ad libitum, indicating that the lack of further longevity under DR is not caused by a general compromise of fitness. These results demonstrate how network analyses of DR using GenDR can be used to make phenotypically relevant predictions. Moreover, gene-regulatory circuits reveal that the DR-induced transcriptional signature in yeast involves nutrient-sensing, stress responses and meiotic transcription factors. Finally, comparing the influence of gene expression changes during DR on the interactomes of multiple organisms led us to suggest that DR commonly suppresses translation, while stimulating an ancient reproduction-related process.",GenDR,GenDR,http://genomics.senescence.info/diet/,first curated list of genes essential for DR +23084601,PESNPdb: a comprehensive database of SNPs studied in association with pre-eclampsia.,"Pre-eclampsia is a pregnancy specific disorder that can be life threatening for mother and child. Multiple studies have been carried out in an attempt to identify SNPs that contribute to the genetic susceptibility of the disease. Here we describe PESNPdb (http://bejerano.stanford.edu/pesnpdb), a database aimed at centralizing SNP and study details investigated in association with pre-eclampsia. We also describe a Placenta Disorders ontology that utilizes information from PESNPdb. The main focus of PESNPdb is to help researchers study the genetic complexity of pre-eclampsia through a user-friendly interface that encourages community participation.",PESNPdb,PESNPdb,http://bejerano.stanford.edu/pesnpdb,a database aimed at centralizing SNP and study details investigated in association with pre-eclampsia +23084601,PESNPdb: a comprehensive database of SNPs studied in association with pre-eclampsia.,"Pre-eclampsia is a pregnancy specific disorder that can be life threatening for mother and child. Multiple studies have been carried out in an attempt to identify SNPs that contribute to the genetic susceptibility of the disease. Here we describe PESNPdb (http://bejerano.stanford.edu/pesnpdb), a database aimed at centralizing SNP and study details investigated in association with pre-eclampsia. We also describe a Placenta Disorders ontology that utilizes information from PESNPdb. The main focus of PESNPdb is to help researchers study the genetic complexity of pre-eclampsia through a user-friendly interface that encourages community participation.",PESNPdb,PESNPdb,http://bejerano.stanford.edu/pesnpdb,a comprehensive database of SNPs studied in association with pre-eclampsia +23093601,DoriC 5.0: an updated database of oriC regions in both bacterial and archaeal genomes.,"Replication of chromosomes is one of the central events in the cell cycle. Chromosome replication begins at specific sites, called origins of replication (oriCs), for all three domains of life. However, the origins of replication still remain unknown in a considerably large number of bacterial and archaeal genomes completely sequenced so far. The availability of increasing complete bacterial and archaeal genomes has created challenges and opportunities for identification of their oriCs in silico, as well as in vivo. Based on the Z-curve theory, we have developed a web-based system Ori-Finder to predict oriCs in bacterial genomes with high accuracy and reliability by taking advantage of comparative genomics, and the predicted oriC regions have been organized into an online database DoriC, which is publicly available at http://tubic.tju.edu.cn/doric/ since 2007. Five years after we constructed DoriC, the database has significant advances over the number of bacterial genomes, increasing about 4-fold. Additionally, oriC regions in archaeal genomes identified by in vivo experiments, as well as in silico analyses, have also been added to the database. Consequently, the latest release of DoriC contains oriCs for >1500 bacterial genomes and 81 archaeal genomes, respectively.",DoriC 5.0,DoriC,http://tubic.tju.edu.cn/doric/,an updated database of oriC regions in both bacterial and archaeal genomes +23118484,MODOMICS: a database of RNA modification pathways--2013 update.,"MODOMICS is a database of RNA modifications that provides comprehensive information concerning the chemical structures of modified ribonucleosides, their biosynthetic pathways, RNA-modifying enzymes and location of modified residues in RNA sequences. In the current database version, accessible at http://modomics.genesilico.pl, we included new features: a census of human and yeast snoRNAs involved in RNA-guided RNA modification, a new section covering the 5'-end capping process, and a catalogue of 'building blocks' for chemical synthesis of a large variety of modified nucleosides. The MODOMICS collections of RNA modifications, RNA-modifying enzymes and modified RNAs have been also updated. A number of newly identified modified ribonucleosides and more than one hundred functionally and structurally characterized proteins from various organisms have been added. In the RNA sequences section, snRNAs and snoRNAs with experimentally mapped modified nucleosides have been added and the current collection of rRNA and tRNA sequences has been substantially enlarged. To facilitate literature searches, each record in MODOMICS has been cross-referenced to other databases and to selected key publications. New options for database searching and querying have been implemented, including a BLAST search of protein sequences and a PARALIGN search of the collected nucleic acid sequences.",MODOMICS,MODOMICS,http://modomics.genesilico.pl,"a database of RNA modifications that provides comprehensive information concerning the chemical structures of modified ribonucleosides, their biosynthetic pathways, RNA-modifying enzymes and location of modified residues in RNA sequences" +23118484,MODOMICS: a database of RNA modification pathways--2013 update.,"MODOMICS is a database of RNA modifications that provides comprehensive information concerning the chemical structures of modified ribonucleosides, their biosynthetic pathways, RNA-modifying enzymes and location of modified residues in RNA sequences. In the current database version, accessible at http://modomics.genesilico.pl, we included new features: a census of human and yeast snoRNAs involved in RNA-guided RNA modification, a new section covering the 5'-end capping process, and a catalogue of 'building blocks' for chemical synthesis of a large variety of modified nucleosides. The MODOMICS collections of RNA modifications, RNA-modifying enzymes and modified RNAs have been also updated. A number of newly identified modified ribonucleosides and more than one hundred functionally and structurally characterized proteins from various organisms have been added. In the RNA sequences section, snRNAs and snoRNAs with experimentally mapped modified nucleosides have been added and the current collection of rRNA and tRNA sequences has been substantially enlarged. To facilitate literature searches, each record in MODOMICS has been cross-referenced to other databases and to selected key publications. New options for database searching and querying have been implemented, including a BLAST search of protein sequences and a PARALIGN search of the collected nucleic acid sequences.",MODOMICS,MODOMICS,http://modomics.genesilico.pl,a database of RNA modification pathways +23143106,EcoCyc: fusing model organism databases with systems biology.,"EcoCyc (http://EcoCyc.org) is a model organism database built on the genome sequence of Escherichia coli K-12 MG1655. Expert manual curation of the functions of individual E. coli gene products in EcoCyc has been based on information found in the experimental literature for E. coli K-12-derived strains. Updates to EcoCyc content continue to improve the comprehensive picture of E. coli biology. The utility of EcoCyc is enhanced by new tools available on the EcoCyc web site, and the development of EcoCyc as a teaching tool is increasing the impact of the knowledge collected in EcoCyc.",EcoCyc,EcoCyc,http://EcoCyc.org,a model organism database built on the genome sequence of Escherichia coli K-12 MG1655 +23151233,PolySac3DB: an annotated data base of 3 dimensional structures of polysaccharides.,"

Background

Polysaccharides are ubiquitously present in the living world. Their structural versatility makes them important and interesting components in numerous biological and technological processes ranging from structural stabilization to a variety of immunologically important molecular recognition events. The knowledge of polysaccharide three-dimensional (3D) structure is important in studying carbohydrate-mediated host-pathogen interactions, interactions with other bio-macromolecules, drug design and vaccine development as well as material science applications or production of bio-ethanol.

Description

PolySac3DB is an annotated database that contains the 3D structural information of 157 polysaccharide entries that have been collected from an extensive screening of scientific literature. They have been systematically organized using standard names in the field of carbohydrate research into 18 categories representing polysaccharide families. Structure-related information includes the saccharides making up the repeat unit(s) and their glycosidic linkages, the expanded 3D representation of the repeat unit, unit cell dimensions and space group, helix type, diffraction diagram(s) (when applicable), experimental and/or simulation methods used for structure description, link to the abstract of the publication, reference and the atomic coordinate files for visualization and download. The database is accompanied by a user-friendly graphical user interface (GUI). It features interactive displays of polysaccharide structures and customized search options for beginners and experts, respectively. The site also serves as an information portal for polysaccharide structure determination techniques. The web-interface also references external links where other carbohydrate-related resources are available.

Conclusion

PolySac3DB is established to maintain information on the detailed 3D structures of polysaccharides. All the data and features are available via the web-interface utilizing the search engine and can be accessed at http://polysac3db.cermav.cnrs.fr.",PolySac3DB,PolySac3DB,http://polysac3db.cermav.cnrs.fr,an annotated data base of 3 dimensional structures of polysaccharides +23161672,APPRIS: annotation of principal and alternative splice isoforms.,"Here, we present APPRIS (http://appris.bioinfo.cnio.es), a database that houses annotations of human splice isoforms. APPRIS has been designed to provide value to manual annotations of the human genome by adding reliable protein structural and functional data and information from cross-species conservation. The visual representation of the annotations provided by APPRIS for each gene allows annotators and researchers alike to easily identify functional changes brought about by splicing events. In addition to collecting, integrating and analyzing reliable predictions of the effect of splicing events, APPRIS also selects a single reference sequence for each gene, here termed the principal isoform, based on the annotations of structure, function and conservation for each transcript. APPRIS identifies a principal isoform for 85% of the protein-coding genes in the GENCODE 7 release for ENSEMBL. Analysis of the APPRIS data shows that at least 70% of the alternative (non-principal) variants would lose important functional or structural information relative to the principal isoform.",annotation of principal and alternative splice isoforms,APPRIS,http://appris.bioinfo.cnio.es,a database that houses annotations of human splice isoforms +23178820,MicrobPad MD: microbial pathogen diagnostic methods database.,"Medical pathogens induce infections, illnesses and sometimes serious medical conditions in the infected hosts. Diagnosis of these pathogens is important for proper treatment and investigation of pathogenesis processes. Molecular techniques have been developed for facilitating accurate, sensitive and low-cost diagnosis of these pathogens. Based on these techniques, diagnostic devices have been developed for a number of pathogens. More devices are needed for comprehensive coverage of medical pathogens. To facilitate the development of these devices, a database with integrated information about diagnostic methods, targets, and primers/probes for the known bacterial, fungal and viral pathogens is needed. We developed the microbial pathogen diagnostic methods database MicrobPad MD (http://bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp or http://pha-bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp) to provide comprehensive information about the molecular diagnostic techniques, targets, primers/probes, detection procedures and conditions, and tested diagnostic accuracies and limit of diagnosis for 314 bacterial, fungal and viral species from 61 genera. While available, additional information such as pathogen strains and hosts, tissue distribution or habitats, cultivation methods, biochemical characteristics, virulence factors, morphology, diseases, symptoms, treatment and prevention methods are provided. Our Database covers 242 gene targets, 700 primers/probes, 340 virulence factors, and 261 diseases. Cross-links to the NCBI genome and SwissProt/UniProt databases are provided.",microbial pathogen diagnostic methods database,MicrobPad MD,http://pha-bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp,"a database with integrated information about diagnostic methods, targets, and primers/probes for the known bacterial, fungal and viral pathogens" +23178820,MicrobPad MD: microbial pathogen diagnostic methods database.,"Medical pathogens induce infections, illnesses and sometimes serious medical conditions in the infected hosts. Diagnosis of these pathogens is important for proper treatment and investigation of pathogenesis processes. Molecular techniques have been developed for facilitating accurate, sensitive and low-cost diagnosis of these pathogens. Based on these techniques, diagnostic devices have been developed for a number of pathogens. More devices are needed for comprehensive coverage of medical pathogens. To facilitate the development of these devices, a database with integrated information about diagnostic methods, targets, and primers/probes for the known bacterial, fungal and viral pathogens is needed. We developed the microbial pathogen diagnostic methods database MicrobPad MD (http://bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp or http://pha-bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp) to provide comprehensive information about the molecular diagnostic techniques, targets, primers/probes, detection procedures and conditions, and tested diagnostic accuracies and limit of diagnosis for 314 bacterial, fungal and viral species from 61 genera. While available, additional information such as pathogen strains and hosts, tissue distribution or habitats, cultivation methods, biochemical characteristics, virulence factors, morphology, diseases, symptoms, treatment and prevention methods are provided. Our Database covers 242 gene targets, 700 primers/probes, 340 virulence factors, and 261 diseases. Cross-links to the NCBI genome and SwissProt/UniProt databases are provided.",microbial pathogen diagnostic methods database,MicrobPad MD,http://bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp,"a database with integrated information about diagnostic methods, targets, and primers/probes for the known bacterial, fungal and viral pathogens" +23180799,PGDD: a database of gene and genome duplication in plants.,"Genome duplication (GD) has permanently shaped the architecture and function of many higher eukaryotic genomes. The angiosperms (flowering plants) are outstanding models in which to elucidate consequences of GD for higher eukaryotes, owing to their propensity for chromosomal duplication or even triplication in a few cases. Duplicated genome structures often require both intra- and inter-genome alignments to unravel their evolutionary history, also providing the means to deduce both obvious and otherwise-cryptic orthology, paralogy and other relationships among genes. The burgeoning sets of angiosperm genome sequences provide the foundation for a host of investigations into the functional and evolutionary consequences of gene and GD. To provide genome alignments from a single resource based on uniform standards that have been validated by empirical studies, we built the Plant Genome Duplication Database (PGDD; freely available at http://chibba.agtec.uga.edu/duplication/), a web service providing synteny information in terms of colinearity between chromosomes. At present, PGDD contains data for 26 plants including bryophytes and chlorophyta, as well as angiosperms with draft genome sequences. In addition to the inclusion of new genomes as they become available, we are preparing new functions to enhance PGDD.",Plant Genome Duplication Database,PGDD,http://chibba.agtec.uga.edu/duplication,a database of gene and genome duplication in plants +23209799,The duplicated genes database: identification and functional annotation of co-localised duplicated genes across genomes.,"

Background

There has been a surge in studies linking genome structure and gene expression, with special focus on duplicated genes. Although initially duplicated from the same sequence, duplicated genes can diverge strongly over evolution and take on different functions or regulated expression. However, information on the function and expression of duplicated genes remains sparse. Identifying groups of duplicated genes in different genomes and characterizing their expression and function would therefore be of great interest to the research community. The 'Duplicated Genes Database' (DGD) was developed for this purpose.

Methodology

Nine species were included in the DGD. For each species, BLAST analyses were conducted on peptide sequences corresponding to the genes mapped on a same chromosome. Groups of duplicated genes were defined based on these pairwise BLAST comparisons and the genomic location of the genes. For each group, Pearson correlations between gene expression data and semantic similarities between functional GO annotations were also computed when the relevant information was available.

Conclusions

The Duplicated Gene Database provides a list of co-localised and duplicated genes for several species with the available gene co-expression level and semantic similarity value of functional annotation. Adding these data to the groups of duplicated genes provides biological information that can prove useful to gene expression analyses. The Duplicated Gene Database can be freely accessed through the DGD website at http://dgd.genouest.org.",Duplicated Genes Database,DGD,http://dgd.genouest.org,identification and functional annotation of co-localised duplicated genes across genomes +23406793,"T-HOD: a literature-based candidate gene database for hypertension, obesity and diabetes.","Researchers are finding it more and more difficult to follow the changing status of disease candidate genes due to the exponential increase in gene mapping studies. The Text-mined Hypertension, Obesity and Diabetes candidate gene database (T-HOD) is developed to help trace existing research on three kinds of cardiovascular diseases: hypertension, obesity and diabetes, with the last disease categorized into Type 1 and Type 2, by regularly and semiautomatically extracting HOD-related genes from newly published literature. Currently, there are 837, 835 and 821 candidate genes recorded in T-HOD for hypertension, obesity and diabetes, respectively. T-HOD employed the state-of-art text-mining technologies, including a gene/disease identification system and a disease-gene relation extraction system, which can be used to affirm the association of genes with three diseases and provide more evidence for further studies. The primary inputs of T-HOD are the three kinds of diseases, and the output is a list of disease-related genes that can be ranked based on their number of appearance, protein-protein interactions and single-nucleotide polymorphisms. Unlike manually constructed disease gene databases, the content of T-HOD is regularly updated by our text-mining system and verified by domain experts. The interface of T-HOD facilitates easy browsing for users and allows T-HOD curators to verify data efficiently. We believe that T-HOD can help life scientists in search for more disease candidate genes in a less time- and effort-consuming manner. Database URL: http://bws.iis.sinica.edu.tw/THOD.","Text-mined Hypertension, Obesity and Diabetes candidate gene database",T-HOD,http://bws.iis.sinica.edu.tw/THOD,"a literature-based candidate gene database for hypertension, obesity and diabetes" +23411718,The Eimeria transcript DB: an integrated resource for annotated transcripts of protozoan parasites of the genus Eimeria.,"Parasites of the genus Eimeria infect a wide range of vertebrate hosts, including chickens. We have recently reported a comparative analysis of the transcriptomes of Eimeria acervulina, Eimeria maxima and Eimeria tenella, integrating ORESTES data produced by our group and publicly available Expressed Sequence Tags (ESTs). All cDNA reads have been assembled, and the reconstructed transcripts have been submitted to a comprehensive functional annotation pipeline. Additional studies included orthology assignment across apicomplexan parasites and clustering analyses of gene expression profiles among different developmental stages of the parasites. To make all this body of information publicly available, we constructed the Eimeria Transcript Database (EimeriaTDB), a web repository that provides access to sequence data, annotation and comparative analyses. Here, we describe the web interface, available sequence data sets and query tools implemented on the site. The main goal of this work is to offer a public repository of sequence and functional annotation data of reconstructed transcripts of parasites of the genus Eimeria. We believe that EimeriaTDB will represent a valuable and complementary resource for the Eimeria scientific community and for those researchers interested in comparative genomics of apicomplexan parasites. Database URL: http://www.coccidia.icb.usp.br/eimeriatdb/",Eimeria transcript DB,EimeriaTDB,http://www.coccidia.icb.usp.br/eimeriatdb/,"a web repository that provides access to sequence data, annotation and comparative analyses" +23411718,The Eimeria transcript DB: an integrated resource for annotated transcripts of protozoan parasites of the genus Eimeria.,"Parasites of the genus Eimeria infect a wide range of vertebrate hosts, including chickens. We have recently reported a comparative analysis of the transcriptomes of Eimeria acervulina, Eimeria maxima and Eimeria tenella, integrating ORESTES data produced by our group and publicly available Expressed Sequence Tags (ESTs). All cDNA reads have been assembled, and the reconstructed transcripts have been submitted to a comprehensive functional annotation pipeline. Additional studies included orthology assignment across apicomplexan parasites and clustering analyses of gene expression profiles among different developmental stages of the parasites. To make all this body of information publicly available, we constructed the Eimeria Transcript Database (EimeriaTDB), a web repository that provides access to sequence data, annotation and comparative analyses. Here, we describe the web interface, available sequence data sets and query tools implemented on the site. The main goal of this work is to offer a public repository of sequence and functional annotation data of reconstructed transcripts of parasites of the genus Eimeria. We believe that EimeriaTDB will represent a valuable and complementary resource for the Eimeria scientific community and for those researchers interested in comparative genomics of apicomplexan parasites. Database URL: http://www.coccidia.icb.usp.br/eimeriatdb/",Eimeria transcript DB,EimeriaTDB,http://www.coccidia.icb.usp.br/eimeriatdb/,an integrated resource for annotated transcripts of protozoan parasites of the genus Eimeria +23599502,INstruct: a database of high-quality 3D structurally resolved protein interactome networks.,"

Unlabelled

INstruct is a database of high-quality, 3D, structurally resolved protein interactome networks in human and six model organisms. INstruct combines the scale of available high-quality binary protein interaction data with the specificity of atomic-resolution structural information derived from co-crystal evidence using a tested interaction interface inference method. Its web interface is designed to allow for flexible search based on standard and organism-specific protein and gene-naming conventions, visualization of protein architecture highlighting interaction interfaces and viewing and downloading custom 3D structurally resolved interactome datasets.

Availability

INstruct is freely available on the web at http://instruct.yulab.org with all major browsers supported.",INstruct,INstruct,,a database of high-quality 3D structurally resolved protein interactome networks +23599502,INstruct: a database of high-quality 3D structurally resolved protein interactome networks.,"

Unlabelled

INstruct is a database of high-quality, 3D, structurally resolved protein interactome networks in human and six model organisms. INstruct combines the scale of available high-quality binary protein interaction data with the specificity of atomic-resolution structural information derived from co-crystal evidence using a tested interaction interface inference method. Its web interface is designed to allow for flexible search based on standard and organism-specific protein and gene-naming conventions, visualization of protein architecture highlighting interaction interfaces and viewing and downloading custom 3D structurally resolved interactome datasets.

Availability

INstruct is freely available on the web at http://instruct.yulab.org with all major browsers supported.",INstruct,INstruct,http://instruct.yulab.org,"a database of high-quality, 3D, structurally resolved protein interactome networks in human and six model organisms" +23633602,Integrated database of information from structural genomics experiments.,"Information from structural genomics experiments at the RIKEN SPring-8 Center, Japan has been compiled and published as an integrated database. The contents of the database are (i) experimental data from nine species of bacteria that cover a large variety of protein molecules in terms of both evolution and properties (http://database.riken.jp/db/bacpedia), (ii) experimental data from mutant proteins that were designed systematically to study the influence of mutations on the diffraction quality of protein crystals (http://database.riken.jp/db/bacpedia) and (iii) experimental data from heavy-atom-labelled proteins from the heavy-atom database HATODAS (http://database.riken.jp/db/hatodas). The database integration adopts the semantic web, which is suitable for data reuse and automatic processing, thereby allowing batch downloads of full data and data reconstruction to produce new databases. In addition, to enhance the use of data (i) and (ii) by general researchers in biosciences, a comprehensible user interface, Bacpedia (http://bacpedia.harima.riken.jp), has been developed.",Bacpedia,Bacpedia,http://database.riken.jp/db/bacpedia,Integrated database of information from structural genomics experiments +23633602,Integrated database of information from structural genomics experiments.,"Information from structural genomics experiments at the RIKEN SPring-8 Center, Japan has been compiled and published as an integrated database. The contents of the database are (i) experimental data from nine species of bacteria that cover a large variety of protein molecules in terms of both evolution and properties (http://database.riken.jp/db/bacpedia), (ii) experimental data from mutant proteins that were designed systematically to study the influence of mutations on the diffraction quality of protein crystals (http://database.riken.jp/db/bacpedia) and (iii) experimental data from heavy-atom-labelled proteins from the heavy-atom database HATODAS (http://database.riken.jp/db/hatodas). The database integration adopts the semantic web, which is suitable for data reuse and automatic processing, thereby allowing batch downloads of full data and data reconstruction to produce new databases. In addition, to enhance the use of data (i) and (ii) by general researchers in biosciences, a comprehensible user interface, Bacpedia (http://bacpedia.harima.riken.jp), has been developed.",Bacpedia,Bacpedia,http://database.riken.jp/db/hatodas,Integrated database of information from structural genomics experiments +23633602,Integrated database of information from structural genomics experiments.,"Information from structural genomics experiments at the RIKEN SPring-8 Center, Japan has been compiled and published as an integrated database. The contents of the database are (i) experimental data from nine species of bacteria that cover a large variety of protein molecules in terms of both evolution and properties (http://database.riken.jp/db/bacpedia), (ii) experimental data from mutant proteins that were designed systematically to study the influence of mutations on the diffraction quality of protein crystals (http://database.riken.jp/db/bacpedia) and (iii) experimental data from heavy-atom-labelled proteins from the heavy-atom database HATODAS (http://database.riken.jp/db/hatodas). The database integration adopts the semantic web, which is suitable for data reuse and automatic processing, thereby allowing batch downloads of full data and data reconstruction to produce new databases. In addition, to enhance the use of data (i) and (ii) by general researchers in biosciences, a comprehensible user interface, Bacpedia (http://bacpedia.harima.riken.jp), has been developed.",Bacpedia,Bacpedia,http://bacpedia.harima.riken.jp,Integrated database of information from structural genomics experiments +23721660,HIM-herbal ingredients in-vivo metabolism database.,"

Background

Herbal medicine has long been viewed as a valuable asset for potential new drug discovery and herbal ingredients' metabolites, especially the in vivo metabolites were often found to gain better pharmacological, pharmacokinetic and even better safety profiles compared to their parent compounds. However, these herbal metabolite information is still scattered and waiting to be collected.

Description

HIM database manually collected so far the most comprehensive available in-vivo metabolism information for herbal active ingredients, as well as their corresponding bioactivity, organs and/or tissues distribution, toxicity, ADME and the clinical research profile. Currently HIM contains 361 ingredients and 1104 corresponding in-vivo metabolites from 673 reputable herbs. Tools of structural similarity, substructure search and Lipinski's Rule of Five are also provided. Various links were made to PubChem, PubMed, TCM-ID (Traditional Chinese Medicine Information database) and HIT (Herbal ingredients' targets databases).

Conclusions

A curated database HIM is set up for the in vivo metabolites information of the active ingredients for Chinese herbs, together with their corresponding bioactivity, toxicity and ADME profile. HIM is freely accessible to academic researchers at http://www.bioinformatics.org.cn/.",herbal ingredients in-vivo metabolism database,HIM database,http://www.bioinformatics.org.cn/,"the most comprehensive available in-vivo metabolism information for herbal active ingredients, as well as their corresponding bioactivity, organs and/or tissues distribution, toxicity, ADME and the clinical research profile" +23936191,HSC-explorer: a curated database for hematopoietic stem cells.,"HSC-Explorer (http://mips.helmholtz-muenchen.de/HSC/) is a publicly available, integrative database containing detailed information about the early steps of hematopoiesis. The resource aims at providing fast and easy access to relevant information, in particular to the complex network of interacting cell types and molecules, from the wealth of publications in the field through visualization interfaces. It provides structured information on more than 7000 experimentally validated interactions between molecules, bioprocesses and environmental factors. Information is manually derived by critical reading of the scientific literature from expert annotators. Hematopoiesis-relevant interactions are accompanied with context information such as model organisms and experimental methods for enabling assessment of reliability and relevance of experimental results. Usage of established vocabularies facilitates downstream bioinformatics applications and to convert the results into complex networks. Several predefined datasets (Selected topics) offer insights into stem cell behavior, the stem cell niche and signaling processes supporting hematopoietic stem cell maintenance. HSC-Explorer provides a versatile web-based resource for scientists entering the field of hematopoiesis enabling users to inspect the associated biological processes through interactive graphical presentation.",HSC-Explorer,HSC-Explorer,http://mips.helmholtz-muenchen.de/HSC/,"a publicly available, integrative database containing detailed information about the early steps of hematopoiesis" +23936191,HSC-explorer: a curated database for hematopoietic stem cells.,"HSC-Explorer (http://mips.helmholtz-muenchen.de/HSC/) is a publicly available, integrative database containing detailed information about the early steps of hematopoiesis. The resource aims at providing fast and easy access to relevant information, in particular to the complex network of interacting cell types and molecules, from the wealth of publications in the field through visualization interfaces. It provides structured information on more than 7000 experimentally validated interactions between molecules, bioprocesses and environmental factors. Information is manually derived by critical reading of the scientific literature from expert annotators. Hematopoiesis-relevant interactions are accompanied with context information such as model organisms and experimental methods for enabling assessment of reliability and relevance of experimental results. Usage of established vocabularies facilitates downstream bioinformatics applications and to convert the results into complex networks. Several predefined datasets (Selected topics) offer insights into stem cell behavior, the stem cell niche and signaling processes supporting hematopoietic stem cell maintenance. HSC-Explorer provides a versatile web-based resource for scientists entering the field of hematopoiesis enabling users to inspect the associated biological processes through interactive graphical presentation.",HSC-explorer,HSC-explorer,http://mips.helmholtz-muenchen.de/HSC/,a curated database for hematopoietic stem cells +24009897,EVpedia: an integrated database of high-throughput data for systemic analyses of extracellular vesicles.,"Secretion of extracellular vesicles is a general cellular activity that spans the range from simple unicellular organisms (e.g. archaea; Gram-positive and Gram-negative bacteria) to complex multicellular ones, suggesting that this extracellular vesicle-mediated communication is evolutionarily conserved. Extracellular vesicles are spherical bilayered proteolipids with a mean diameter of 20-1,000 nm, which are known to contain various bioactive molecules including proteins, lipids, and nucleic acids. Here, we present EVpedia, which is an integrated database of high-throughput datasets from prokaryotic and eukaryotic extracellular vesicles. EVpedia provides high-throughput datasets of vesicular components (proteins, mRNAs, miRNAs, and lipids) present on prokaryotic, non-mammalian eukaryotic, and mammalian extracellular vesicles. In addition, EVpedia also provides an array of tools, such as the search and browse of vesicular components, Gene Ontology enrichment analysis, network analysis of vesicular proteins and mRNAs, and a comparison of vesicular datasets by ortholog identification. Moreover, publications on extracellular vesicle studies are listed in the database. This free web-based database of EVpedia (http://evpedia.info) might serve as a fundamental repository to stimulate the advancement of extracellular vesicle studies and to elucidate the novel functions of these complex extracellular organelles.",EVpedia,EVpedia,http://evpedia.info,an integrated database of high-throughput datasets from prokaryotic and eukaryotic extracellular vesicles +24009897,EVpedia: an integrated database of high-throughput data for systemic analyses of extracellular vesicles.,"Secretion of extracellular vesicles is a general cellular activity that spans the range from simple unicellular organisms (e.g. archaea; Gram-positive and Gram-negative bacteria) to complex multicellular ones, suggesting that this extracellular vesicle-mediated communication is evolutionarily conserved. Extracellular vesicles are spherical bilayered proteolipids with a mean diameter of 20-1,000 nm, which are known to contain various bioactive molecules including proteins, lipids, and nucleic acids. Here, we present EVpedia, which is an integrated database of high-throughput datasets from prokaryotic and eukaryotic extracellular vesicles. EVpedia provides high-throughput datasets of vesicular components (proteins, mRNAs, miRNAs, and lipids) present on prokaryotic, non-mammalian eukaryotic, and mammalian extracellular vesicles. In addition, EVpedia also provides an array of tools, such as the search and browse of vesicular components, Gene Ontology enrichment analysis, network analysis of vesicular proteins and mRNAs, and a comparison of vesicular datasets by ortholog identification. Moreover, publications on extracellular vesicle studies are listed in the database. This free web-based database of EVpedia (http://evpedia.info) might serve as a fundamental repository to stimulate the advancement of extracellular vesicle studies and to elucidate the novel functions of these complex extracellular organelles.",EVpedia,EVpedia,http://evpedia.info,an integrated database of high-throughput data for systemic analyses of extracellular vesicles +24163255,The pancreatic expression database: recent extensions and updates.,"The Pancreatic Expression Database (PED, http://www.pancreasexpression.org) is the only device currently available for mining of pancreatic cancer literature data. It brings together the largest collection of multidimensional pancreatic data from the literature including genomic, proteomic, microRNA, methylomic and transcriptomic profiles. PED allows the user to ask specific questions on the observed levels of deregulation among a broad range of specimen/experimental types including healthy/patient tissue and body fluid specimens, cell lines and murine models as well as related treatments/drugs data. Here we provide an update to PED, which has been previously featured in the Database issue of this journal. Briefly, PED data content has been substantially increased and expanded to cover methylomics studies. We introduced an extensive controlled vocabulary that records specific details on the samples and added data from large-scale meta-analysis studies. The web interface has been improved/redesigned with a quick search option to rapidly extract information about a gene/protein of interest and an upload option allowing users to add their own data to PED. We added a user guide and implemented integrated graphical tools to overlay and visualize retrieved information. Interoperability with biomart-compatible data sets was significantly improved to allow integrative queries with pancreatic cancer data.",pancreatic expression database,PED,http://www.pancreasexpression.org, +24163255,The pancreatic expression database: recent extensions and updates.,"The Pancreatic Expression Database (PED, http://www.pancreasexpression.org) is the only device currently available for mining of pancreatic cancer literature data. It brings together the largest collection of multidimensional pancreatic data from the literature including genomic, proteomic, microRNA, methylomic and transcriptomic profiles. PED allows the user to ask specific questions on the observed levels of deregulation among a broad range of specimen/experimental types including healthy/patient tissue and body fluid specimens, cell lines and murine models as well as related treatments/drugs data. Here we provide an update to PED, which has been previously featured in the Database issue of this journal. Briefly, PED data content has been substantially increased and expanded to cover methylomics studies. We introduced an extensive controlled vocabulary that records specific details on the samples and added data from large-scale meta-analysis studies. The web interface has been improved/redesigned with a quick search option to rapidly extract information about a gene/protein of interest and an upload option allowing users to add their own data to PED. We added a user guide and implemented integrated graphical tools to overlay and visualize retrieved information. Interoperability with biomart-compatible data sets was significantly improved to allow integrative queries with pancreatic cancer data.",Pancreatic Expression Database,PED,http://www.pancreasexpression.org, +24185698,LSD 2.0: an update of the leaf senescence database.,"This manuscript describes an update of the leaf senescence database (LSD) previously featured in the 2011 NAR Database Issue. LSD provides comprehensive information concerning senescence-associated genes (SAGs) and their corresponding mutants. We have made extensive annotations for these SAGs through both manual and computational approaches. Recently, we updated LSD to a new version LSD 2.0 (http://www.eplantsenescence.org/), which contains 5356 genes and 322 mutants from 44 species, an extension from the previous version containing 1145 genes and 154 mutants from 21 species. In the current version, we also included several new features: (i) Primer sequences retrieved based on experimental evidence or designed for high-throughput analysis were added; (ii) More than 100 images of Arabidopsis SAG mutants were added; (iii) Arabidopsis seed information obtained from The Arabidopsis Information Resource (TAIR) was integrated; (iv) Subcellular localization information of SAGs in Arabidopsis mined from literature or generated from the SUBA3 program was presented; (v) Quantitative Trait Loci information was added with links to the original database and (vi) New options such as primer and miRNA search for database query were implemented. The updated database will be a valuable and informative resource for basic research of leaf senescence and for the manipulation of traits of agronomically important plants.",leaf senescence database,LSD,http://www.eplantsenescence.org/, +24203703,"CottonGen: a genomics, genetics and breeding database for cotton research.","CottonGen (http://www.cottongen.org) is a curated and integrated web-based relational database providing access to publicly available genomic, genetic and breeding data for cotton. CottonGen supercedes CottonDB and the Cotton Marker Database, with enhanced tools for easier data sharing, mining, visualization and data retrieval of cotton research data. CottonGen contains annotated whole genome sequences, unigenes from expressed sequence tags (ESTs), markers, trait loci, genetic maps, genes, taxonomy, germplasm, publications and communication resources for the cotton community. Annotated whole genome sequences of Gossypium raimondii are available with aligned genetic markers and transcripts. These whole genome data can be accessed through genome pages, search tools and GBrowse, a popular genome browser. Most of the published cotton genetic maps can be viewed and compared using CMap, a comparative map viewer, and are searchable via map search tools. Search tools also exist for markers, quantitative trait loci (QTLs), germplasm, publications and trait evaluation data. CottonGen also provides online analysis tools such as NCBI BLAST and Batch BLAST.",CottonGen,CottonGen,http://www.cottongen.org,"a curated and integrated web-based relational database providing access to publicly available genomic, genetic and breeding data for cotton" +24203703,"CottonGen: a genomics, genetics and breeding database for cotton research.","CottonGen (http://www.cottongen.org) is a curated and integrated web-based relational database providing access to publicly available genomic, genetic and breeding data for cotton. CottonGen supercedes CottonDB and the Cotton Marker Database, with enhanced tools for easier data sharing, mining, visualization and data retrieval of cotton research data. CottonGen contains annotated whole genome sequences, unigenes from expressed sequence tags (ESTs), markers, trait loci, genetic maps, genes, taxonomy, germplasm, publications and communication resources for the cotton community. Annotated whole genome sequences of Gossypium raimondii are available with aligned genetic markers and transcripts. These whole genome data can be accessed through genome pages, search tools and GBrowse, a popular genome browser. Most of the published cotton genetic maps can be viewed and compared using CMap, a comparative map viewer, and are searchable via map search tools. Search tools also exist for markers, quantitative trait loci (QTLs), germplasm, publications and trait evaluation data. CottonGen also provides online analysis tools such as NCBI BLAST and Batch BLAST.",CottonGen,CottonGen,http://www.cottongen.org,"a genomics, genetics and breeding database for cotton research" +24234451,The MIntAct project--IntAct as a common curation platform for 11 molecular interaction databases.,"IntAct (freely available at http://www.ebi.ac.uk/intact) is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. IntAct has developed a sophisticated web-based curation tool, capable of supporting both IMEx- and MIMIx-level curation. This tool is now utilized by multiple additional curation teams, all of whom annotate data directly into the IntAct database. Members of the IntAct team supply appropriate levels of training, perform quality control on entries and take responsibility for long-term data maintenance. Recently, the MINT and IntAct databases decided to merge their separate efforts to make optimal use of limited developer resources and maximize the curation output. All data manually curated by the MINT curators have been moved into the IntAct database at EMBL-EBI and are merged with the existing IntAct dataset. Both IntAct and MINT are active contributors to the IMEx consortium (http://www.imexconsortium.org).",IntAct,IntAct,http://www.ebi.ac.uk/intact,a common curation platform for 11 molecular interaction databases +24234451,The MIntAct project--IntAct as a common curation platform for 11 molecular interaction databases.,"IntAct (freely available at http://www.ebi.ac.uk/intact) is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. IntAct has developed a sophisticated web-based curation tool, capable of supporting both IMEx- and MIMIx-level curation. This tool is now utilized by multiple additional curation teams, all of whom annotate data directly into the IntAct database. Members of the IntAct team supply appropriate levels of training, perform quality control on entries and take responsibility for long-term data maintenance. Recently, the MINT and IntAct databases decided to merge their separate efforts to make optimal use of limited developer resources and maximize the curation output. All data manually curated by the MINT curators have been moved into the IntAct database at EMBL-EBI and are merged with the existing IntAct dataset. Both IntAct and MINT are active contributors to the IMEx consortium (http://www.imexconsortium.org).",IntAct,IntAct,http://www.ebi.ac.uk/intact,"an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions" +24243842,LPSN--list of prokaryotic names with standing in nomenclature.,"The List of Prokaryotic Names with Standing in Nomenclature (LPSN; http://www.bacterio.net) is a database that lists the names of prokaryotes (Bacteria and Archaea) that have been validly published in the International Journal of Systematic and Evolutionary Microbiology directly or by inclusion in a Validation List, under the Rules of International Code of Nomenclature of Bacteria. Currently there are 15 974 taxa listed. In addition, LPSN has an up-to-date classification of prokaryotes and information on prokaryotic nomenclature and culture collections.",list of prokaryotic names with standing in nomenclature,LPSN,http://www.bacterio.net,"a database that lists the names of prokaryotes (Bacteria and Archaea) that have been validly published in the International Journal of Systematic and Evolutionary Microbiology directly or by inclusion in a Validation List, under the Rules of International Code of Nomenclature of Bacteria" +24243842,LPSN--list of prokaryotic names with standing in nomenclature.,"The List of Prokaryotic Names with Standing in Nomenclature (LPSN; http://www.bacterio.net) is a database that lists the names of prokaryotes (Bacteria and Archaea) that have been validly published in the International Journal of Systematic and Evolutionary Microbiology directly or by inclusion in a Validation List, under the Rules of International Code of Nomenclature of Bacteria. Currently there are 15 974 taxa listed. In addition, LPSN has an up-to-date classification of prokaryotes and information on prokaryotic nomenclature and culture collections.",List of Prokaryotic Names with Standing in Nomenclature,LPSN,http://www.bacterio.net,"a database that lists the names of prokaryotes (Bacteria and Archaea) that have been validly published in the International Journal of Systematic and Evolutionary Microbiology directly or by inclusion in a Validation List, under the Rules of International Code of Nomenclature of Bacteria" +24270047,ProfileDB: a resource for proteomics and cross-omics biomarker discovery.,"The increasing size and complexity of high-throughput datasets pose a growing challenge for researchers. Often very different (cross-omics) techniques with individual data analysis pipelines are employed making a unified biomarker discovery strategy and a direct comparison of different experiments difficult and time consuming. Here we present the comprehensive web-based application ProfileDB. The application is designed to integrate data from different high-throughput 'omics' data types (Transcriptomics, Proteomics, Metabolomics) with clinical parameters and prior knowledge on pathways and ontologies. Beyond data storage, ProfileDB provides a set of dedicated tools for study inspection and data visualization. The user can gain insights into a complex experiment with just a few mouse clicks. We will demonstrate the application by presenting typical use cases for the identification of proteomics biomarkers. All presented analyses can be reproduced using the public ProfileDB web server. The ProfileDB application is available by standard browser (Firefox 18+, Internet Explorer Version 9+) technology via http://profileDB.-microdiscovery.de/ (login and pass-word: profileDB). The installation contains several public datasets including different cross-'omics' experiments. This article is part of a Special Issue entitled: Biomarkers: A Proteomic Challenge.",ProfileDB,ProfileDB,http://profileDB.-microdiscovery.de/,a resource for proteomics and cross-omics biomarker discovery +24273012,SpliceProt: a protein sequence repository of predicted human splice variants.,"The mechanism of alternative splicing in the transcriptome may increase the proteome diversity in eukaryotes. In proteomics, several studies aim to use protein sequence repositories to annotate MS experiments or to detect differentially expressed proteins. However, the available protein sequence repositories are not designed to fully detect protein isoforms derived from mRNA splice variants. To foster knowledge for the field, here we introduce SpliceProt, a new protein sequence repository of transcriptome experimental data used to investigate for putative splice variants in human proteomes. Current version of SpliceProt contains 159 719 non-redundant putative polypeptide sequences. The assessment of the potential of SpliceProt in detecting new protein isoforms resulting from alternative splicing was performed by using publicly available proteomics data. We detected 173 peptides hypothetically derived from splice variants, which 54 of them are not present in UniprotKB/TrEMBL sequence repository. In comparison to other protein sequence repositories, SpliceProt contains a greater number of unique peptides and is able to detect more splice variants. Therefore, SpliceProt provides a solution for the annotation of proteomics experiments regarding splice isofoms. The repository files containing the translated sequences of the predicted splice variants and a visualization tool are freely available at http://lbbc.inca.gov.br/spliceprot.",SpliceProt,SpliceProt,http://lbbc.inca.gov.br/spliceprot,a protein sequence repository of predicted human splice variants +24304892,miRTarBase update 2014: an information resource for experimentally validated miRNA-target interactions.,"MicroRNAs (miRNAs) are small non-coding RNA molecules capable of negatively regulating gene expression to control many cellular mechanisms. The miRTarBase database (http://mirtarbase.mbc.nctu.edu.tw/) provides the most current and comprehensive information of experimentally validated miRNA-target interactions. The database was launched in 2010 with data sources for >100 published studies in the identification of miRNA targets, molecular networks of miRNA targets and systems biology, and the current release (2013, version 4) includes significant expansions and enhancements over the initial release (2010, version 1). This article reports the current status of and recent improvements to the database, including (i) a 14-fold increase to miRNA-target interaction entries, (ii) a miRNA-target network, (iii) expression profile of miRNA and its target gene, (iv) miRNA target-associated diseases and (v) additional utilities including an upgrade reminder and an error reporting/user feedback system.",miRTarBase,miRTarBase,http://mirtarbase.mbc.nctu.edu.tw/,an information resource for experimentally validated miRNA-target interactions +24334957,The Transformer database: biotransformation of xenobiotics.,"As the number of prescribed drugs is constantly rising, drug-drug interactions are an important issue. The simultaneous administration of several drugs can cause severe adverse effects based on interactions with the same metabolizing enzyme(s). The Transformer database (http://bioinformatics.charite.de/transformer) contains integrated information on the three phases of biotransformation (modification, conjugation and excretion) of 3000 drugs and >350 relevant food ingredients (e.g. grapefruit juice) and herbs, which are catalyzed by 400 proteins. A total of 100,000 interactions were found through text mining and manual validation. The 3D structures of 200 relevant proteins are included. The database enables users to search for drugs with a visual display of known interactions with phase I (Cytochrome P450) and phase II enzymes, transporters, food and herbs. For each interaction, PubMed references are given. To detect mutual impairments of drugs, the drug-cocktail tool displays interactions between selected drugs. By choosing the indication for a drug, the tool offers suggestions for alternative medications to avoid metabolic conflicts. Drug interactions can also be visualized in an interactive network view. Additionally, prodrugs, including their mechanisms of activation, and further information on enzymes of biotransformation, including 3D models, can be viewed.",Transformer database,,http://bioinformatics.charite.de/transformer,biotransformation of xenobiotics +24428888,OncomiRdbB: a comprehensive database of microRNAs and their targets in breast cancer.,"

Background

Given the estimate that 30% of our genes are controlled by microRNAs, it is essential that we understand the precise relationship between microRNAs and their targets. OncomiRs are microRNAs (miRNAs) that have been frequently shown to be deregulated in cancer. However, although several oncomiRs have been identified and characterized, there is as yet no comprehensive compilation of this data which has rendered it underutilized by cancer biologists. There is therefore an unmet need in generating bioinformatic platforms to speed the identification of novel therapeutic targets.

Description

We describe here OncomiRdbB, a comprehensive database of oncomiRs mined from different existing databases for mouse and humans along with novel oncomiRs that we have validated in human breast cancer samples. The database also lists their respective predicted targets, identified using miRanda, along with their IDs, sequences, chromosome location and detailed description. This database facilitates querying by search strings including microRNA name, sequence, accession number, target genes and organisms. The microRNA networks and their hubs with respective targets at 3'UTR, 5'UTR and exons of different pathway genes were also deciphered using the 'R' algorithm.

Conclusion

OncomiRdbB is a comprehensive and integrated database of oncomiRs and their targets in breast cancer with multiple query options which will help enhance both understanding of the biology of breast cancer and the development of new and innovative microRNA based diagnostic tools and targets of therapeutic significance. OncomiRdbB is freely available for download through the URL link http://tdb.ccmb.res.in/OncomiRdbB/index.htm.",OncomiRdbB,OncomiRdbB,http://tdb.ccmb.res.in/OncomiRdbB/index.htm,a comprehensive database of oncomiRs mined from different existing databases for mouse and humans along with novel oncomiRs that we have validated in human breast cancer samples +24428888,OncomiRdbB: a comprehensive database of microRNAs and their targets in breast cancer.,"

Background

Given the estimate that 30% of our genes are controlled by microRNAs, it is essential that we understand the precise relationship between microRNAs and their targets. OncomiRs are microRNAs (miRNAs) that have been frequently shown to be deregulated in cancer. However, although several oncomiRs have been identified and characterized, there is as yet no comprehensive compilation of this data which has rendered it underutilized by cancer biologists. There is therefore an unmet need in generating bioinformatic platforms to speed the identification of novel therapeutic targets.

Description

We describe here OncomiRdbB, a comprehensive database of oncomiRs mined from different existing databases for mouse and humans along with novel oncomiRs that we have validated in human breast cancer samples. The database also lists their respective predicted targets, identified using miRanda, along with their IDs, sequences, chromosome location and detailed description. This database facilitates querying by search strings including microRNA name, sequence, accession number, target genes and organisms. The microRNA networks and their hubs with respective targets at 3'UTR, 5'UTR and exons of different pathway genes were also deciphered using the 'R' algorithm.

Conclusion

OncomiRdbB is a comprehensive and integrated database of oncomiRs and their targets in breast cancer with multiple query options which will help enhance both understanding of the biology of breast cancer and the development of new and innovative microRNA based diagnostic tools and targets of therapeutic significance. OncomiRdbB is freely available for download through the URL link http://tdb.ccmb.res.in/OncomiRdbB/index.htm.",OncomiRdbB,OncomiRdbB,http://tdb.ccmb.res.in/OncomiRdbB/index.htm,a comprehensive database of microRNAs and their targets in breast cancer +24466021,CoryneBase: Corynebacterium genomic resources and analysis tools at your fingertips.,"Corynebacteria are used for a wide variety of industrial purposes but some species are associated with human diseases. With increasing number of corynebacterial genomes having been sequenced, comparative analysis of these strains may provide better understanding of their biology, phylogeny, virulence and taxonomy that may lead to the discoveries of beneficial industrial strains or contribute to better management of diseases. To facilitate the ongoing research of corynebacteria, a specialized central repository and analysis platform for the corynebacterial research community is needed to host the fast-growing amount of genomic data and facilitate the analysis of these data. Here we present CoryneBase, a genomic database for Corynebacterium with diverse functionality for the analysis of genomes aimed to provide: (1) annotated genome sequences of Corynebacterium where 165,918 coding sequences and 4,180 RNAs can be found in 27 species; (2) access to comprehensive Corynebacterium data through the use of advanced web technologies for interactive web interfaces; and (3) advanced bioinformatic analysis tools consisting of standard BLAST for homology search, VFDB BLAST for sequence homology search against the Virulence Factor Database (VFDB), Pairwise Genome Comparison (PGC) tool for comparative genomic analysis, and a newly designed Pathogenomics Profiling Tool (PathoProT) for comparative pathogenomic analysis. CoryneBase offers the access of a range of Corynebacterium genomic resources as well as analysis tools for comparative genomics and pathogenomics. It is publicly available at http://corynebacterium.um.edu.my/.",CoryneBase,CoryneBase,http://corynebacterium.um.edu.my/,Corynebacterium genomic resources and analysis tools at your fingertips +24558441,Human transporter database: comprehensive knowledge and discovery tools in the human transporter genes.,"Transporters are essential in homeostatic exchange of endogenous and exogenous substances at the systematic, organic, cellular, and subcellular levels. Gene mutations of transporters are often related to pharmacogenetics traits. Recent developments in high throughput technologies on genomics, transcriptomics and proteomics allow in depth studies of transporter genes in normal cellular processes and diverse disease conditions. The flood of high throughput data have resulted in urgent need for an updated knowledgebase with curated, organized, and annotated human transporters in an easily accessible way. Using a pipeline with the combination of automated keywords query, sequence similarity search and manual curation on transporters, we collected 1,555 human non-redundant transporter genes to develop the Human Transporter Database (HTD) (http://htd.cbi.pku.edu.cn). Based on the extensive annotations, global properties of the transporter genes were illustrated, such as expression patterns and polymorphisms in relationships with their ligands. We noted that the human transporters were enriched in many fundamental biological processes such as oxidative phosphorylation and cardiac muscle contraction, and significantly associated with Mendelian and complex diseases such as epilepsy and sudden infant death syndrome. Overall, HTD provides a well-organized interface to facilitate research communities to search detailed molecular and genetic information of transporters for development of personalized medicine.",Human Transporter Database,HTD,http://htd.cbi.pku.edu.cn,comprehensive knowledge and discovery tools in the human transporter genes +24558441,Human transporter database: comprehensive knowledge and discovery tools in the human transporter genes.,"Transporters are essential in homeostatic exchange of endogenous and exogenous substances at the systematic, organic, cellular, and subcellular levels. Gene mutations of transporters are often related to pharmacogenetics traits. Recent developments in high throughput technologies on genomics, transcriptomics and proteomics allow in depth studies of transporter genes in normal cellular processes and diverse disease conditions. The flood of high throughput data have resulted in urgent need for an updated knowledgebase with curated, organized, and annotated human transporters in an easily accessible way. Using a pipeline with the combination of automated keywords query, sequence similarity search and manual curation on transporters, we collected 1,555 human non-redundant transporter genes to develop the Human Transporter Database (HTD) (http://htd.cbi.pku.edu.cn). Based on the extensive annotations, global properties of the transporter genes were illustrated, such as expression patterns and polymorphisms in relationships with their ligands. We noted that the human transporters were enriched in many fundamental biological processes such as oxidative phosphorylation and cardiac muscle contraction, and significantly associated with Mendelian and complex diseases such as epilepsy and sudden infant death syndrome. Overall, HTD provides a well-organized interface to facilitate research communities to search detailed molecular and genetic information of transporters for development of personalized medicine.",Human transporter database,HTD,http://htd.cbi.pku.edu.cn,comprehensive knowledge and discovery tools in the human transporter genes +24651967,OncomiRDB: a database for the experimentally verified oncogenic and tumor-suppressive microRNAs.,"

Summary

MicroRNAs (miRNAs), a class of small regulatory RNAs, play important roles in cancer initiation, progression and therapy. MiRNAs are found to regulate diverse cancer-related processes by targeting a large set of oncogenic and tumor-suppressive genes. To establish a high-confidence reference resource for studying the miRNA-regulated target genes and cellular processes in cancer, we manually curated 2259 entries of cancer-related miRNA regulations with direct experimental evidence from ~9000 abstracts, covering more than 300 miRNAs and 829 target genes across 25 cancer tissues. A web-based portal named oncomiRDB, which provides both graphical and text-based interfaces, was developed for easily browsing and searching all the annotations. It should be a useful resource for both the computational analysis and experimental study on miRNA regulatory networks and functions in cancer.

Availability and implementation

http://bioinfo.au.tsinghua.edu.cn/oncomirdb/

Contact

jgu@tsinghua.edu.cn

Supplementary information

Supplementary data are available at Bioinformatics online.",OncomiRDB,OncomiRDB,http://bioinfo.au.tsinghua.edu.cn/oncomirdb/,a database for the experimentally verified oncogenic and tumor-suppressive microRNAs +24712981,SFGD: a comprehensive platform for mining functional information from soybean transcriptome data and its use in identifying acyl-lipid metabolism pathways.,"

Background

Soybean (Glycine max L.) is one of the world's most important leguminous crops producing high-quality protein and oil. Increasing the relative oil concentration in soybean seeds is many researchers' goal, but a complete analysis platform of functional annotation for the genes involved in the soybean acyl-lipid pathway is still lacking. Following the success of soybean whole-genome sequencing, functional annotation has become a major challenge for the scientific community. Whole-genome transcriptome analysis is a powerful way to predict genes with biological functions. It is essential to build a comprehensive analysis platform for integrating soybean whole-genome sequencing data, the available transcriptome data and protein information. This platform could also be used to identify acyl-lipid metabolism pathways.

Description

In this study, we describe our construction of the Soybean Functional Genomics Database (SFGD) using Generic Genome Browser (Gbrowse) as the core platform. We integrated microarray expression profiling with 255 samples from 14 groups' experiments and mRNA-seq data with 30 samples from four groups' experiments, including spatial and temporal transcriptome data for different soybean development stages and environmental stresses. The SFGD includes a gene co-expression regulatory network containing 23,267 genes and 1873 miRNA-target pairs, and a group of acyl-lipid pathways containing 221 enzymes and more than 1550 genes. The SFGD also provides some key analysis tools, i.e. BLAST search, expression pattern search and cis-element significance analysis, as well as gene ontology information search and single nucleotide polymorphism display.

Conclusion

The SFGD is a comprehensive database integrating genome and transcriptome data, and also for soybean acyl-lipid metabolism pathways. It provides useful toolboxes for biologists to improve the accuracy and robustness of soybean functional genomics analysis, further improving understanding of gene regulatory networks for effective crop improvement. The SFGD is publically accessible at http://bioinformatics.cau.edu.cn/SFGD/, with all data available for downloading.",Soybean Functional Genomics Database,SFGD,http://bioinformatics.cau.edu.cn/SFGD/,a comprehensive platform for mining functional information from soybean transcriptome data and its use in identifying acyl-lipid metabolism pathways +24855436,Polytraits: A database on biological traits of marine polychaetes.,"The study of ecosystem functioning - the role which organisms play in an ecosystem - is becoming increasingly important in marine ecological research. The functional structure of a community can be represented by a set of functional traits assigned to behavioural, reproductive and morphological characteristics. The collection of these traits from the literature is however a laborious and time-consuming process, and gaps of knowledge and restricted availability of literature are a common problem. Trait data are not yet readily being shared by research communities, and even if they are, a lack of trait data repositories and standards for data formats leads to the publication of trait information in forms which cannot be processed by computers. This paper describes Polytraits (http://polytraits.lifewatchgreece.eu), a database on biological traits of marine polychaetes (bristle worms, Polychaeta: Annelida). At present, the database contains almost 20,000 records on morphological, behavioural and reproductive characteristics of more than 1,000 marine polychaete species, all referenced by literature sources. All data can be freely accessed through the project website in different ways and formats, both human-readable and machine-readable, and have been submitted to the Encyclopedia of Life for archival and integration with trait information from other sources.",Polytraits,Polytraits,http://polytraits.lifewatchgreece.eu,"a database on biological traits of marine polychaetes (bristle worms, Polychaeta: Annelida)" +24855436,Polytraits: A database on biological traits of marine polychaetes.,"The study of ecosystem functioning - the role which organisms play in an ecosystem - is becoming increasingly important in marine ecological research. The functional structure of a community can be represented by a set of functional traits assigned to behavioural, reproductive and morphological characteristics. The collection of these traits from the literature is however a laborious and time-consuming process, and gaps of knowledge and restricted availability of literature are a common problem. Trait data are not yet readily being shared by research communities, and even if they are, a lack of trait data repositories and standards for data formats leads to the publication of trait information in forms which cannot be processed by computers. This paper describes Polytraits (http://polytraits.lifewatchgreece.eu), a database on biological traits of marine polychaetes (bristle worms, Polychaeta: Annelida). At present, the database contains almost 20,000 records on morphological, behavioural and reproductive characteristics of more than 1,000 marine polychaete species, all referenced by literature sources. All data can be freely accessed through the project website in different ways and formats, both human-readable and machine-readable, and have been submitted to the Encyclopedia of Life for archival and integration with trait information from other sources.",Polytraits,Polytraits,http://polytraits.lifewatchgreece.eu,A database on biological traits of marine polychaetes +24870500,Phytoseiidae database: a website for taxonomic and distributional information on phytoseiid mites (Acari).,"This paper announces a database on the taxonomy and distribution of mites of the family Phytoseiidae Berlese, which is available online at http://www.lea.esalq.usp.br/phytoseiidae/. Synthesis of species diversity per genus, subfamily and country are given. Information about use of the database is provided.",Phytoseiidae database,,http://www.lea.esalq.usp.br/phytoseiidae/,a database on the taxonomy and distribution of mites of the family Phytoseiidae Berlese +24870500,Phytoseiidae database: a website for taxonomic and distributional information on phytoseiid mites (Acari).,"This paper announces a database on the taxonomy and distribution of mites of the family Phytoseiidae Berlese, which is available online at http://www.lea.esalq.usp.br/phytoseiidae/. Synthesis of species diversity per genus, subfamily and country are given. Information about use of the database is provided.",Phytoseiidae database,,http://www.lea.esalq.usp.br/phytoseiidae/,a website for taxonomic and distributional information on phytoseiid mites (Acari) +24907201,SeaBase: a multispecies transcriptomic resource and platform for gene network inference.,"Marine and aquatic animals are extraordinarily useful as models for identifying mechanisms of development and evolution, regeneration, resistance to cancer, longevity and symbiosis, among many other areas of research. This is due to the great diversity of these organisms and their wide-ranging capabilities. Genomics tools are essential for taking advantage of these """"free lessons"""" of nature. However, genomics and transcriptomics are challenging in emerging model systems. Here, we present SeaBase, a tool for helping to meet these needs. Specifically, SeaBase provides a platform for sharing and searching transcriptome data. More importantly, SeaBase will support a growing number of tools for inferring gene network mechanisms. The first dataset available on SeaBase is a developmental transcriptomic profile of the sea anemone Nematostella vectensis (Anthozoa, Cnidaria). Additional datasets are currently being prepared and we are aiming to expand SeaBase to include user-supplied data for any number of marine and aquatic organisms, thereby supporting many potentially new models for gene network studies. SeaBase can be accessed online at: http://seabase.core.cli.mbl.edu.",SeaBase,SeaBase,http://seabase.core.cli.mbl.edu,a multispecies transcriptomic resource and platform for gene network inference +25098325,MediaDB: a database of microbial growth conditions in defined media.,"Isolating pure microbial cultures and cultivating them in the laboratory on defined media is used to more fully characterize the metabolism and physiology of organisms. However, identifying an appropriate growth medium for a novel isolate remains a challenging task. Even organisms with sequenced and annotated genomes can be difficult to grow, despite our ability to build genome-scale metabolic networks that connect genomic data with metabolic function. The scientific literature is scattered with information about defined growth media used successfully for cultivating a wide variety of organisms, but to date there exists no centralized repository to inform efforts to cultivate less characterized organisms by bridging the gap between genomic data and compound composition for growth media. Here we present MediaDB, a manually curated database of defined media that have been used for cultivating organisms with sequenced genomes, with an emphasis on organisms with metabolic network models. The database is accessible online, can be queried by keyword searches or downloaded in its entirety, and can generate exportable individual media formulation files. The data assembled in MediaDB facilitate comparative studies of organism growth media, serve as a starting point for formulating novel growth media, and contribute to formulating media for in silico investigation of metabolic networks. MediaDB is freely available for public use at https://mediadb.systemsbiology.net.",MediaDB,MediaDB,https://mediadb.systemsbiology.net,"a manually curated database of defined media that have been used for cultivating organisms with sequenced genomes, with an emphasis on organisms with metabolic network models" +25098325,MediaDB: a database of microbial growth conditions in defined media.,"Isolating pure microbial cultures and cultivating them in the laboratory on defined media is used to more fully characterize the metabolism and physiology of organisms. However, identifying an appropriate growth medium for a novel isolate remains a challenging task. Even organisms with sequenced and annotated genomes can be difficult to grow, despite our ability to build genome-scale metabolic networks that connect genomic data with metabolic function. The scientific literature is scattered with information about defined growth media used successfully for cultivating a wide variety of organisms, but to date there exists no centralized repository to inform efforts to cultivate less characterized organisms by bridging the gap between genomic data and compound composition for growth media. Here we present MediaDB, a manually curated database of defined media that have been used for cultivating organisms with sequenced genomes, with an emphasis on organisms with metabolic network models. The database is accessible online, can be queried by keyword searches or downloaded in its entirety, and can generate exportable individual media formulation files. The data assembled in MediaDB facilitate comparative studies of organism growth media, serve as a starting point for formulating novel growth media, and contribute to formulating media for in silico investigation of metabolic networks. MediaDB is freely available for public use at https://mediadb.systemsbiology.net.",MediaDB,MediaDB,https://mediadb.systemsbiology.net,a database of microbial growth conditions in defined media +25178289,Native Pig and Chicken Breed Database: NPCDB.,"Indigenous (native) breeds of livestock have higher disease resistance and adaptation to the environment due to high genetic diversity. Even though their extinction rate is accelerated due to the increase of commercial breeds, natural disaster, and civil war, there is a lack of well-established databases for the native breeds. Thus, we constructed the native pig and chicken breed database (NPCDB) which integrates available information on the breeds from around the world. It is a nonprofit public database aimed to provide information on the genetic resources of indigenous pig and chicken breeds for their conservation. The NPCDB (http://npcdb.snu.ac.kr/) provides the phenotypic information and population size of each breed as well as its specific habitat. In addition, it provides information on the distribution of genetic resources across the country. The database will contribute to understanding of the breed's characteristics such as disease resistance and adaptation to environmental changes as well as the conservation of indigenous genetic resources.",native pig and chicken breed database,NPCDB,http://npcdb.snu.ac.kr/, +25178289,Native Pig and Chicken Breed Database: NPCDB.,"Indigenous (native) breeds of livestock have higher disease resistance and adaptation to the environment due to high genetic diversity. Even though their extinction rate is accelerated due to the increase of commercial breeds, natural disaster, and civil war, there is a lack of well-established databases for the native breeds. Thus, we constructed the native pig and chicken breed database (NPCDB) which integrates available information on the breeds from around the world. It is a nonprofit public database aimed to provide information on the genetic resources of indigenous pig and chicken breeds for their conservation. The NPCDB (http://npcdb.snu.ac.kr/) provides the phenotypic information and population size of each breed as well as its specific habitat. In addition, it provides information on the distribution of genetic resources across the country. The database will contribute to understanding of the breed's characteristics such as disease resistance and adaptation to environmental changes as well as the conservation of indigenous genetic resources.",Native Pig and Chicken Breed Database,NPCDB,http://npcdb.snu.ac.kr/, +25234927,circBase: a database for circular RNAs.,"Recently, several laboratories have reported thousands of circular RNAs (circRNAs) in animals. Numerous circRNAs are highly stable and have specific spatiotemporal expression patterns. Even though a function for circRNAs is unknown, these features make circRNAs an interesting class of RNAs as possible biomarkers and for further research. We developed a database and website, """"circBase,"""" where merged and unified data sets of circRNAs and the evidence supporting their expression can be accessed, downloaded, and browsed within the genomic context. circBase also provides scripts to identify known and novel circRNAs in sequencing data. The database is freely accessible through the web server at http://www.circbase.org/.",circBase,circBase,http://www.circbase.org/,a database for circular RNAs +25267795,CarrotDB: a genomic and transcriptomic database for carrot.,"Carrot (Daucus carota L.) is an economically important vegetable worldwide and is the largest source of carotenoids and provitamin A in the human diet. Given the importance of this vegetable to humans, research and breeding communities on carrot should obtain useful genomic and transcriptomic information. The first whole-genome sequences of 'DC-27' carrot were de novo assembled and analyzed. Transcriptomic sequences of 14 carrot genotypes were downloaded from the Sequence Read Archive (SRA) database of National Center for Biotechnology Information (NCBI) and mapped to the whole-genome sequence before assembly. Based on these data sets, the first Web-based genomic and transcriptomic database for D. carota (CarrotDB) was developed (database homepage: http://apiaceae.njau.edu.cn/car rotdb). CarrotDB offers the tools of Genome Map and Basic Local Alignment Search Tool. Using these tools, users can search certain target genes and simple sequence repeats along with designed primers of 'DC-27'. Assembled transcriptomic sequences along with fragments per kilobase of transcript sequence per millions base pairs sequenced information (FPKM) information of 14 carrot genotypes are also provided. Users can download de novo assembled whole-genome sequences, putative gene sequences and putative protein sequences of 'DC-27'. Users can also download transcriptome sequence assemblies of 14 carrot genotypes along with their FPKM information. A total of 2826 transcription factor (TF) genes classified into 57 families were identified in the entire genome sequences. These TF genes were embedded in CarrotDB as an interface. The 'GERMPLASM' part of CarrotDB also offers taproot photos of 45 carrot genotypes and a table containing accession numbers, names, countries of origin and colors of cortex, phloem and xylem parts of taproots corresponding to each carrot genotype. CarrotDB will be continuously updated with new information. Database URL: http://apiaceae.njau.edu.cn/carrotdb/",CarrotDB,CarrotDB,http://apiaceae.njau.edu.cn/car rotdb,a genomic and transcriptomic database for carrot +25392413,The coffee genome hub: a resource for coffee genomes.,"The whole genome sequence of Coffea canephora, the perennial diploid species known as Robusta, has been recently released. In the context of the C. canephora genome sequencing project and to support post-genomics efforts, we developed the Coffee Genome Hub (http://coffee-genome.org/), an integrative genome information system that allows centralized access to genomics and genetics data and analysis tools to facilitate translational and applied research in coffee. We provide the complete genome sequence of C. canephora along with gene structure, gene product information, metabolism, gene families, transcriptomics, syntenic blocks, genetic markers and genetic maps. The hub relies on generic software (e.g. GMOD tools) for easy querying, visualizing and downloading research data. It includes a Genome Browser enhanced by a Community Annotation System, enabling the improvement of automatic gene annotation through an annotation editor. In addition, the hub aims at developing interoperability among other existing South Green tools managing coffee data (phylogenomics resources, SNPs) and/or supporting data analyses with the Galaxy workflow manager.",Coffee Genome Hub,,http://coffee-genome.org/,an integrative genome information system that allows centralized access to genomics and genetics data and analysis tools to facilitate translational and applied research in coffee +25392413,The coffee genome hub: a resource for coffee genomes.,"The whole genome sequence of Coffea canephora, the perennial diploid species known as Robusta, has been recently released. In the context of the C. canephora genome sequencing project and to support post-genomics efforts, we developed the Coffee Genome Hub (http://coffee-genome.org/), an integrative genome information system that allows centralized access to genomics and genetics data and analysis tools to facilitate translational and applied research in coffee. We provide the complete genome sequence of C. canephora along with gene structure, gene product information, metabolism, gene families, transcriptomics, syntenic blocks, genetic markers and genetic maps. The hub relies on generic software (e.g. GMOD tools) for easy querying, visualizing and downloading research data. It includes a Genome Browser enhanced by a Community Annotation System, enabling the improvement of automatic gene annotation through an annotation editor. In addition, the hub aims at developing interoperability among other existing South Green tools managing coffee data (phylogenomics resources, SNPs) and/or supporting data analyses with the Galaxy workflow manager.",coffee genome hub,,http://coffee-genome.org/,a resource for coffee genomes +25404137,SuperFly: a comparative database for quantified spatio-temporal gene expression patterns in early dipteran embryos.,"We present SuperFly (http://superfly.crg.eu), a relational database for quantified spatio-temporal expression data of segmentation genes during early development in different species of dipteran insects (flies, midges and mosquitoes). SuperFly has a special focus on emerging non-drosophilid model systems. The database currently includes data of high spatio-temporal resolution for three species: the vinegar fly Drosophila melanogaster, the scuttle fly Megaselia abdita and the moth midge Clogmia albipunctata. At this point, SuperFly covers up to 9 genes and 16 time points per species, with a total of 1823 individual embryos. It provides an intuitive web interface, enabling the user to query and access original embryo images, quantified expression profiles, extracted positions of expression boundaries and integrated datasets, plus metadata and intermediate processing steps. SuperFly is a valuable new resource for the quantitative comparative study of gene expression patterns across dipteran species. Moreover, it provides an interesting test set for systems biologists interested in fitting mathematical gene network models to data. Both of these aspects are essential ingredients for progress toward a more quantitative and mechanistic understanding of developmental evolution.",SuperFly,SuperFly,http://superfly.crg.eu,"a relational database for quantified spatio-temporal expression data of segmentation genes during early development in different species of dipteran insects (flies, midges and mosquitoes)" +25404137,SuperFly: a comparative database for quantified spatio-temporal gene expression patterns in early dipteran embryos.,"We present SuperFly (http://superfly.crg.eu), a relational database for quantified spatio-temporal expression data of segmentation genes during early development in different species of dipteran insects (flies, midges and mosquitoes). SuperFly has a special focus on emerging non-drosophilid model systems. The database currently includes data of high spatio-temporal resolution for three species: the vinegar fly Drosophila melanogaster, the scuttle fly Megaselia abdita and the moth midge Clogmia albipunctata. At this point, SuperFly covers up to 9 genes and 16 time points per species, with a total of 1823 individual embryos. It provides an intuitive web interface, enabling the user to query and access original embryo images, quantified expression profiles, extracted positions of expression boundaries and integrated datasets, plus metadata and intermediate processing steps. SuperFly is a valuable new resource for the quantitative comparative study of gene expression patterns across dipteran species. Moreover, it provides an interesting test set for systems biologists interested in fitting mathematical gene network models to data. Both of these aspects are essential ingredients for progress toward a more quantitative and mechanistic understanding of developmental evolution.",SuperFly,SuperFly,http://superfly.crg.eu,a comparative database for quantified spatio-temporal gene expression patterns in early dipteran embryos +25551368,PD_NGSAtlas: a reference database combining next-generation sequencing epigenomic and transcriptomic data for psychiatric disorders.,"

Background

Psychiatric disorders such as schizophrenia (SZ) and bipolar disorder (BP) are projected to lead the global disease burden within the next decade. Several lines of evidence suggest that epigenetic- or genetic-mediated dysfunction is frequently present in these disorders. To date, the inheritance patterns have been complicated by the problem of integrating epigenomic and transcriptomic factors that have yet to be elucidated. Therefore, there is a need to build a comprehensive database for storing epigenomic and transcriptomic data relating to psychiatric disorders.

Description

We have developed the PD_NGSAtlas, which focuses on the efficient storage of epigenomic and transcriptomic data based on next-generation sequencing and on the quantitative analyses of epigenetic and transcriptional alterations involved in psychiatric disorders. The current release of the PD_NGSAtlas contains 43 DNA methylation profiles and 37 transcription profiles detected by MeDIP-Seq and RNA-Seq, respectively, in two distinct brain regions and peripheral blood of SZ, BP and non-psychiatric controls. In addition to these data that were generated in-house, we have included, and will continue to include, published DNA methylation and gene expression data from other research groups, with a focus on psychiatric disorders. A flexible query engine has been developed for the acquisition of methylation profiles and transcription profiles for special genes or genomic regions of interest of the selected samples. Furthermore, the PD_NGSAtlas offers online tools for identifying aberrantly methylated and expressed events involved in psychiatric disorders. A genome browser has been developed to provide integrative and detailed views of multidimensional data in a given genomic context, which can help researchers understand molecular mechanisms from epigenetic and transcriptional perspectives. Moreover, users can download the methylation and transcription data for further analyses.

Conclusions

The PD_NGSAtlas aims to provide storage of epigenomic and transcriptomic data as well as quantitative analyses of epigenetic and transcriptional alterations involved in psychiatric disorders. The PD_NGSAtlas will be a valuable data resource and will enable researchers to investigate the pathophysiology and aetiology of disease in detail. The database is available at http://bioinfo.hrbmu.edu.cn/pd_ngsatlas/.",PD_NGSAtlas,PD_NGSAtlas,http://bioinfo.hrbmu.edu.cn/pd_ngsatlas/,a reference database combining next-generation sequencing epigenomic and transcriptomic data for psychiatric disorders +25632258,Araneae Sloveniae: a national spider species checklist.,"The research of the spider fauna of Slovenia dates back to the very beginning of binomial nomenclature, and has gone through more and less prolific phases with authors concentrating on taxonomy, faunistics, ecology and zoogeographic reviews. Although the body of published works is remarkable for a small nation, the faunistic data has remained too scattered for a thorough understanding of regional biotic diversity, for comparative and ecological research, and for informed conservation purposes. A national checklist is long overdue. Here, a critical review of all published records in any language is provided. The species list currently comprises 738 species, is published online at http://www.bioportal.si/katalog/araneae.php under the title Araneae Sloveniae, and will be updated in due course. This tool will fill the void in cataloguing regional spider faunas and will facilitate further araneological research in central and southern Europe.",Araneae Sloveniae,,http://www.bioportal.si/katalog/araneae.php,a national spider species checklist +25922515,The Fossil Calibration Database-A New Resource for Divergence Dating.,"Fossils provide the principal basis for temporal calibrations, which are critical to the accuracy of divergence dating analyses. Translating fossil data into minimum and maximum bounds for calibrations is the most important-often least appreciated-step of divergence dating. Properly justified calibrations require the synthesis of phylogenetic, paleontological, and geological evidence and can be difficult for nonspecialists to formulate. The dynamic nature of the fossil record (e.g., new discoveries, taxonomic revisions, updates of global or local stratigraphy) requires that calibration data be updated continually lest they become obsolete. Here, we announce the Fossil Calibration Database (http://fossilcalibrations.org), a new open-access resource providing vetted fossil calibrations to the scientific community. Calibrations accessioned into this database are based on individual fossil specimens and follow best practices for phylogenetic justification and geochronological constraint. The associated Fossil Calibration Series, a calibration-themed publication series at Palaeontologia Electronica, will serve as a key pipeline for peer-reviewed calibrations to enter the database.",The Fossil Calibration Database,,http://fossilcalibrations.org,a new open-access resource providing vetted fossil calibrations to the scientific community +25922515,The Fossil Calibration Database-A New Resource for Divergence Dating.,"Fossils provide the principal basis for temporal calibrations, which are critical to the accuracy of divergence dating analyses. Translating fossil data into minimum and maximum bounds for calibrations is the most important-often least appreciated-step of divergence dating. Properly justified calibrations require the synthesis of phylogenetic, paleontological, and geological evidence and can be difficult for nonspecialists to formulate. The dynamic nature of the fossil record (e.g., new discoveries, taxonomic revisions, updates of global or local stratigraphy) requires that calibration data be updated continually lest they become obsolete. Here, we announce the Fossil Calibration Database (http://fossilcalibrations.org), a new open-access resource providing vetted fossil calibrations to the scientific community. Calibrations accessioned into this database are based on individual fossil specimens and follow best practices for phylogenetic justification and geochronological constraint. The associated Fossil Calibration Series, a calibration-themed publication series at Palaeontologia Electronica, will serve as a key pipeline for peer-reviewed calibrations to enter the database.",The Fossil Calibration Database,,http://fossilcalibrations.org,A New Resource for Divergence Dating +26061870,Human Chromosome Y and Haplogroups; introducing YDHS Database.,"

Background

As the high throughput sequencing efforts generate more biological information, scientists from different disciplines are interpreting the polymorphisms that make us unique. In addition, there is an increasing trend in general public to research their own genealogy, find distant relatives and to know more about their biological background. Commercial vendors are providing analyses of mitochondrial and Y-chromosomal markers for such purposes. Clearly, an easy-to-use free interface to the existing data on the identified variants would be in the interest of general public and professionals less familiar with the field. Here we introduce a novel metadatabase YDHS that aims to provide such an interface for Y-chromosomal DNA (Y-DNA) haplogroups and sequence variants.

Methods

The database uses ISOGG Y-DNA tree as the source of mutations and haplogroups and by using genomic positions of the mutations the database links them to genes and other biological entities. YDHS contains analysis tools for deeper Y-SNP analysis.

Results

YDHS addresses the shortage of Y-DNA related databases. We have tested our database using a set of different cases from literature ranging from infertility to autism. The database is at http://www.semanticgen.net/ydhs

Conclusions

Y-chromosomal DNA (Y-DNA) haplogroups and sequence variants have not been in the scientific limelight, excluding certain specialized fields like forensics, mainly because there is not much freely available information or it is scattered in different sources. However, as we have demonstrated Y-SNPs do play a role in various cases on the haplogroup level and it is possible to create a free Y-DNA dedicated bioinformatics resource.",YDHS Database,YDHS,http://www.semanticgen.net/ydhs, +26138588,SmedGD 2.0: The Schmidtea mediterranea genome database.,"Planarians have emerged as excellent models for the study of key biological processes such as stem cell function and regulation, axial polarity specification, regeneration, and tissue homeostasis among others. The most widely used organism for these studies is the free-living flatworm Schmidtea mediterranea. In 2007, the Schmidtea mediterranea Genome Database (SmedGD) was first released to provide a much needed resource for the small, but growing planarian community. SmedGD 1.0 has been a depository for genome sequence, a draft assembly, and related experimental data (e.g., RNAi phenotypes, in situ hybridization images, and differential gene expression results). We report here a comprehensive update to SmedGD (SmedGD 2.0) that aims to expand its role as an interactive community resource. The new database includes more recent, and up-to-date transcription data, provides tools that enhance interconnectivity between different genome assemblies and transcriptomes, including next-generation assemblies for both the sexual and asexual biotypes of S. mediterranea. SmedGD 2.0 (http://smedgd.stowers.org) not only provides significantly improved gene annotations, but also tools for data sharing, attributes that will help both the planarian and biomedical communities to more efficiently mine the genomics and transcriptomics of S. mediterranea.",Schmidtea mediterranea genome database,SmedGD,http://smedgd.stowers.org, +26243198,miRegulome: a knowledge-base of miRNA regulomics and analysis.,"

Unlabelled

miRNAs regulate post transcriptional gene expression by targeting multiple mRNAs and hence can modulate multiple signalling pathways, biological processes, and patho-physiologies. Therefore, understanding of miRNA regulatory networks is essential in order to modulate the functions of a miRNA. The focus of several existing databases is to provide information on specific aspects of miRNA regulation. However, an integrated resource on the miRNA regulome is currently not available to facilitate the exploration and understanding of miRNA regulomics. miRegulome attempts to bridge this gap. The current version of miRegulome v1.0 provides details on the entire regulatory modules of miRNAs altered in response to chemical treatments and transcription factors, based on validated data manually curated from published literature. Modules of miRegulome (upstream regulators, downstream targets, miRNA regulated pathways, functions, diseases, etc) are hyperlinked to an appropriate external resource and are displayed visually to provide a comprehensive understanding. Four analysis tools are incorporated to identify relationships among different modules based on user specified datasets. miRegulome and its tools are helpful in understanding the biology of miRNAs and will also facilitate the discovery of biomarkers and therapeutics. With added features in upcoming releases, miRegulome will be an essential resource to the scientific community.

Availability

http://bnet.egr.vcu.edu/miRegulome.",miRegulome,miRegulome,http://bnet.egr.vcu.edu/miRegulome,a knowledge-base of miRNA regulomics and analysis +26321999,Speech error and tip of the tongue diary for mobile devices.,"Collections of various types of speech errors have increased our understanding of the acquisition, production, and perception of language. Although such collections of naturally occurring language errors are invaluable for a number of reasons, the process of collecting various types of speech errors presents many challenges to the researcher interested in building such a collection, among them a significant investment of time and effort to obtain a sufficient number of examples to enable statistical analysis. Here we describe a freely accessible website http://spedi.ku.edu that helps users document slips of the tongue, slips of the ear, and tip of the tongue states that they experience firsthand or observe in others. The documented errors are amassed, and made available for other users to analyze, thereby distributing the time and effort involved in collecting errors across a large number of individuals instead of saddling the lone researcher, and facilitating distribution of the collection to other researchers. This approach also addresses some issues related to data curation that hampered previous error collections, and enables the collection to continue to grow over a longer period of time than previous collections. Finally, this web-based tool creates an opportunity for language scientists to engage in outreach efforts to increase the understanding of language disorders and research in the general public.",,,http://spedi.ku.edu,"a freely accessible website that helps users document slips of the tongue, slips of the ear, and tip of the tongue states that they experience firsthand or observe in others" +26322998,CTDB: An Integrated Chickpea Transcriptome Database for Functional and Applied Genomics.,"Chickpea is an important grain legume used as a rich source of protein in human diet. The narrow genetic diversity and limited availability of genomic resources are the major constraints in implementing breeding strategies and biotechnological interventions for genetic enhancement of chickpea. We developed an integrated Chickpea Transcriptome Database (CTDB), which provides the comprehensive web interface for visualization and easy retrieval of transcriptome data in chickpea. The database features many tools for similarity search, functional annotation (putative function, PFAM domain and gene ontology) search and comparative gene expression analysis. The current release of CTDB (v2.0) hosts transcriptome datasets with high quality functional annotation from cultivated (desi and kabuli types) and wild chickpea. A catalog of transcription factor families and their expression profiles in chickpea are available in the database. The gene expression data have been integrated to study the expression profiles of chickpea transcripts in major tissues/organs and various stages of flower development. The utilities, such as similarity search, ortholog identification and comparative gene expression have also been implemented in the database to facilitate comparative genomic studies among different legumes and Arabidopsis. Furthermore, the CTDB represents a resource for the discovery of functional molecular markers (microsatellites and single nucleotide polymorphisms) between different chickpea types. We anticipate that integrated information content of this database will accelerate the functional and applied genomic research for improvement of chickpea. The CTDB web service is freely available at http://nipgr.res.in/ctdb.html.",Chickpea Transcriptome Database,CTDB,http://nipgr.res.in/ctdb.html,An Integrated Chickpea Transcriptome Database for Functional and Applied Genomics +26387108,The Protein Ensemble Database.,"The scientific community's major conceptual notion of structural biology has recently shifted in emphasis from the classical structure-function paradigm due to the emergence of intrinsically disordered proteins (IDPs). As opposed to their folded cousins, these proteins are defined by the lack of a stable 3D fold and a high degree of inherent structural heterogeneity that is closely tied to their function. Due to their flexible nature, solution techniques such as small-angle X-ray scattering (SAXS), nuclear magnetic resonance (NMR) spectroscopy and fluorescence resonance energy transfer (FRET) are particularly well-suited for characterizing their biophysical properties. Computationally derived structural ensembles based on such experimental measurements provide models of the conformational sampling displayed by these proteins, and they may offer valuable insights into the functional consequences of inherent flexibility. The Protein Ensemble Database (http://pedb.vib.be) is the first openly accessible, manually curated online resource storing the ensemble models, protocols used during the calculation procedure, and underlying primary experimental data derived from SAXS and/or NMR measurements. By making this previously inaccessible data freely available to researchers, this novel resource is expected to promote the development of more advanced modelling methodologies, facilitate the design of standardized calculation protocols, and consequently lead to a better understanding of how function arises from the disordered state.",Protein Ensemble Database,,http://pedb.vib.be,"the first openly accessible, manually curated online resource storing the ensemble models, protocols used during the calculation procedure, and underlying primary experimental data derived from SAXS and/or NMR measurements" +26432833,iPPI-DB: an online database of modulators of protein-protein interactions.,"In order to boost the identification of low-molecular-weight drugs on protein-protein interactions (PPI), it is essential to properly collect and annotate experimental data about successful examples. This provides the scientific community with the necessary information to derive trends about privileged physicochemical properties and chemotypes that maximize the likelihood of promoting a given chemical probe to the most advanced stages of development. To this end we have developed iPPI-DB (freely accessible at http://www.ippidb.cdithem.fr), a database that contains the structure, some physicochemical characteristics, the pharmacological data and the profile of the PPI targets of several hundreds modulators of protein-protein interactions. iPPI-DB is accessible through a web application and can be queried according to two general approaches: using physicochemical/pharmacological criteria; or by chemical similarity to a user-defined structure input. In both cases the results are displayed as a sortable and exportable datasheet with links to external databases such as Uniprot, PubMed. Furthermore each compound in the table has a link to an individual ID card that contains its physicochemical and pharmacological profile derived from iPPI-DB data. This includes information about its binding data, ligand and lipophilic efficiencies, location in the PPI chemical space, and importantly similarity with known drugs, and links to external databases like PubChem, and ChEMBL.",iPPI-DB,iPPI-DB,http://www.ippidb.cdithem.fr,"a database that contains the structure, some physicochemical characteristics, the pharmacological data and the profile of the PPI targets of several hundreds modulators of protein-protein interactions" +26432833,iPPI-DB: an online database of modulators of protein-protein interactions.,"In order to boost the identification of low-molecular-weight drugs on protein-protein interactions (PPI), it is essential to properly collect and annotate experimental data about successful examples. This provides the scientific community with the necessary information to derive trends about privileged physicochemical properties and chemotypes that maximize the likelihood of promoting a given chemical probe to the most advanced stages of development. To this end we have developed iPPI-DB (freely accessible at http://www.ippidb.cdithem.fr), a database that contains the structure, some physicochemical characteristics, the pharmacological data and the profile of the PPI targets of several hundreds modulators of protein-protein interactions. iPPI-DB is accessible through a web application and can be queried according to two general approaches: using physicochemical/pharmacological criteria; or by chemical similarity to a user-defined structure input. In both cases the results are displayed as a sortable and exportable datasheet with links to external databases such as Uniprot, PubMed. Furthermore each compound in the table has a link to an individual ID card that contains its physicochemical and pharmacological profile derived from iPPI-DB data. This includes information about its binding data, ligand and lipophilic efficiencies, location in the PPI chemical space, and importantly similarity with known drugs, and links to external databases like PubChem, and ChEMBL.",iPPI-DB,iPPI-DB,http://www.ippidb.cdithem.fr,an online database of modulators of protein-protein interactions +26515641,Mouse polyQ database: a new online resource for research using mouse models of neurodegenerative diseases.,"

Background

The polyglutamine (polyQ) family of disorders comprises 9 genetic diseases, including several types of ataxia and Huntington disease. Approximately two decades of investigation and the creation of more than 130 mouse models of polyQ disorders have revealed many similarities between these diseases. The disorders share common mutation types, neurological characteristics and certain aspects of pathogenesis, including morphological and physiological neuronal alterations. All of the diseases still remain incurable.

Description

The large volume of information collected as a result of the investigation of polyQ models currently represents a great potential for searching, comparing and translating pathogenesis and therapeutic information between diseases. Therefore, we generated a public database comprising the polyQ mouse models, phenotypes and therapeutic interventions tested in vivo. The database is available at http://conyza.man.poznan.pl/ .

Conclusion

The use of the database in the field of polyQ diseases may accelerate research on these and other neurodegenerative diseases and provide new perspectives for future investigation.",Mouse polyQ database,,http://conyza.man.poznan.pl/,a new online resource for research using mouse models of neurodegenerative diseases +26553798,InterRNA: a database of base interactions in RNA structures.,"A major component of RNA structure stabilization are the hydrogen bonded interactions between the base residues. The importance and biological relevance for large clusters of base interactions can be much more easily investigated when their occurrences have been systematically detected, catalogued and compared. In this paper, we describe the database InterRNA (INTERactions in RNA structures database-http://mfrlab.org/interrna/) that contains records of known RNA 3D motifs as well as records for clusters of bases that are interconnected by hydrogen bonds. The contents of the database were compiled from RNA structural annotations carried out by the NASSAM (http://mfrlab.org/grafss/nassam) and COGNAC (http://mfrlab.org/grafss/cognac) computer programs. An analysis of the database content and comparisons with the existing corpus of knowledge regarding RNA 3D motifs clearly show that InterRNA is able to provide an extension of the annotations for known motifs as well as able to provide novel interactions for further investigations.",INTERactions in RNA structures database,InterRNA,http://mfrlab.org/interrna/,a database of base interactions in RNA structures +26582915,piRNA cluster database: a web resource for piRNA producing loci.,"Piwi proteins and their guiding small RNAs, termed Piwi-interacting (pi-) RNAs, are essential for silencing of transposons in the germline of animals. A substantial fraction of piRNAs originates from genomic loci termed piRNA clusters and sequences encoded in these piRNA clusters determine putative targets for the Piwi/piRNA system. In the past decade, studies of piRNA transcriptomes in different species revealed additional roles for piRNAs beyond transposon silencing, reflecting the astonishing plasticity of the Piwi/piRNA system along different phylogenetic branches. Moreover, piRNA transcriptomes can change drastically during development and vary across different tissues.Since piRNA clusters crucially shape piRNA profiles, analysis of these loci is imperative for a thorough understanding of functional and evolutionary aspects of the piRNA pathway. But despite the ever-growing amount of available piRNA sequence data, we know little about the factors that determine differential regulation of piRNA clusters, nor the evolutionary events that cause their gain or loss.In order to facilitate addressing these subjects, we established a user-friendly piRNA cluster database (http://www.smallrnagroup-mainz.de/piRNAclusterDB.html) that provides comprehensive data on piRNA clusters in multiple species, tissues and developmental stages based on small RNA sequence data deposited at NCBI's Sequence Read Archive (SRA).",piRNA cluster database,,http://www.smallrnagroup-mainz.de/piRNAclusterDB.html,a web resource for piRNA producing loci +26582919,EBI metagenomics in 2016--an expanding and evolving resource for the analysis and archiving of metagenomic data.,"EBI metagenomics (https://www.ebi.ac.uk/metagenomics/) is a freely available hub for the analysis and archiving of metagenomic and metatranscriptomic data. Over the last 2 years, the resource has undergone rapid growth, with an increase of over five-fold in the number of processed samples and consequently represents one of the largest resources of analysed shotgun metagenomes. Here, we report the status of the resource in 2016 and give an overview of new developments. In particular, we describe updates to data content, a complete overhaul of the analysis pipeline, streamlining of data presentation via the website and the development of a new web based tool to compare functional analyses of sequence runs within a study. We also highlight two of the higher profile projects that have been analysed using the resource in the last year: the oceanographic projects Ocean Sampling Day and Tara Oceans.",EBI metagenomics,,https://www.ebi.ac.uk/metagenomics/,a freely available hub for the analysis and archiving of metagenomic and metatranscriptomic data +26582919,EBI metagenomics in 2016--an expanding and evolving resource for the analysis and archiving of metagenomic data.,"EBI metagenomics (https://www.ebi.ac.uk/metagenomics/) is a freely available hub for the analysis and archiving of metagenomic and metatranscriptomic data. Over the last 2 years, the resource has undergone rapid growth, with an increase of over five-fold in the number of processed samples and consequently represents one of the largest resources of analysed shotgun metagenomes. Here, we report the status of the resource in 2016 and give an overview of new developments. In particular, we describe updates to data content, a complete overhaul of the analysis pipeline, streamlining of data presentation via the website and the development of a new web based tool to compare functional analyses of sequence runs within a study. We also highlight two of the higher profile projects that have been analysed using the resource in the last year: the oceanographic projects Ocean Sampling Day and Tara Oceans.",EBI metagenomics,,https://www.ebi.ac.uk/metagenomics/,an expanding and evolving resource for the analysis and archiving of metagenomic data +26590405,TSGene 2.0: an updated literature-based knowledgebase for tumor suppressor genes.,"Tumor suppressor genes (TSGs) are a major type of gatekeeper genes in the cell growth. A knowledgebase with the systematic collection and curation of TSGs in multiple cancer types is critically important for further studying their biological functions as well as for developing therapeutic strategies. Since its development in 2012, the Tumor Suppressor Gene database (TSGene), has become a popular resource in the cancer research community. Here, we reported the TSGene version 2.0, which has substantial updates of contents (e.g. up-to-date literature and pan-cancer genomic data collection and curation), data types (noncoding RNAs and protein-coding genes) and content accessibility. Specifically, the current TSGene 2.0 contains 1217 human TSGs (1018 protein-coding and 199 non-coding genes) curated from over 9000 articles. Additionally, TSGene 2.0 provides thousands of expression and mutation patterns derived from pan-cancer data of The Cancer Genome Atlas. A new web interface is available at http://bioinfo.mc.vanderbilt.edu/TSGene/. Systematic analyses of 199 non-coding TSGs provide numerous cancer-specific non-coding mutational events for further screening and clinical use. Intriguingly, we identified 49 protein-coding TSGs that were consistently down-regulated in 11 cancer types. In summary, TSGene 2.0, which is the only available database for TSGs, provides the most updated TSGs and their features in pan-cancer.",Tumor Suppressor Gene database,TSGene,http://bioinfo.mc.vanderbilt.edu/TSGene/,an updated literature-based knowledgebase for tumor suppressor genes +26612867,The Dfam database of repetitive DNA families.,"Repetitive DNA, especially that due to transposable elements (TEs), makes up a large fraction of many genomes. Dfam is an open access database of families of repetitive DNA elements, in which each family is represented by a multiple sequence alignment and a profile hidden Markov model (HMM). The initial release of Dfam, featured in the 2013 NAR Database Issue, contained 1143 families of repetitive elements found in humans, and was used to produce more than 100 Mb of additional annotation of TE-derived regions in the human genome, with improved speed. Here, we describe recent advances, most notably expansion to 4150 total families including a comprehensive set of known repeat families from four new organisms (mouse, zebrafish, fly and nematode). We describe improvements to coverage, and to our methods for identifying and reducing false annotation. We also describe updates to the website interface. The Dfam website has moved to http://dfam.org. Seed alignments, profile HMMs, hit lists and other underlying data are available for download.",Dfam,Dfam,http://dfam.org,database of repetitive DNA families +26644461,ALCOdb: Gene Coexpression Database for Microalgae.,"In the era of energy and food shortage, microalgae have gained much attention as promising sources of biofuels and food ingredients. However, only a small fraction of microalgal genes have been functionally characterized. Here, we have developed the Algae Gene Coexpression database (ALCOdb; http://alcodb.jp), which provides gene coexpression information to survey gene modules for a function of interest. ALCOdb currently supports two model algae: the green alga Chlamydomonas reinhardtii and the red alga Cyanidioschyzon merolae. Users can retrieve coexpression information for genes of interest through three unique data pages: (i) Coexpressed Gene List; (ii) Gene Information; and (iii) Coexpressed Gene Network. In addition to the basal coexpression information, ALCOdb also provides several advanced functionalities such as an expression profile viewer and a differentially expressed gene search tool. Using these user interfaces, we demonstrated that our gene coexpression data have the potential to detect functionally related genes and are useful in extrapolating the biological roles of uncharacterized genes. ALCOdb will facilitate molecular and biochemical studies of microalgal biological phenomena, such as lipid metabolism and organelle development, and promote the evolutionary understanding of plant cellular systems.",Algae Gene Coexpression database,ALCOdb,http://alcodb.jp,Gene Coexpression Database for Microalgae +26657893,"HRGRN: A Graph Search-Empowered Integrative Database of Arabidopsis Signaling Transduction, Metabolism and Gene Regulation Networks.","The biological networks controlling plant signal transduction, metabolism and gene regulation are composed of not only tens of thousands of genes, compounds, proteins and RNAs but also the complicated interactions and co-ordination among them. These networks play critical roles in many fundamental mechanisms, such as plant growth, development and environmental response. Although much is known about these complex interactions, the knowledge and data are currently scattered throughout the published literature, publicly available high-throughput data sets and third-party databases. Many 'unknown' yet important interactions among genes need to be mined and established through extensive computational analysis. However, exploring these complex biological interactions at the network level from existing heterogeneous resources remains challenging and time-consuming for biologists. Here, we introduce HRGRN, a graph search-empowered integrative database of Arabidopsis signal transduction, metabolism and gene regulatory networks. HRGRN utilizes Neo4j, which is a highly scalable graph database management system, to host large-scale biological interactions among genes, proteins, compounds and small RNAs that were either validated experimentally or predicted computationally. The associated biological pathway information was also specially marked for the interactions that are involved in the pathway to facilitate the investigation of cross-talk between pathways. Furthermore, HRGRN integrates a series of graph path search algorithms to discover novel relationships among genes, compounds, RNAs and even pathways from heterogeneous biological interaction data that could be missed by traditional SQL database search methods. Users can also build subnetworks based on known interactions. The outcomes are visualized with rich text, figures and interactive network graphs on web pages. The HRGRN database is freely available at http://plantgrn.noble.org/hrgrn/.",HRGRN,HRGRN,http://plantgrn.noble.org/hrgrn/,"A Graph Search-Empowered Integrative Database of Arabidopsis Signaling Transduction, Metabolism and Gene Regulation Networks" +26777304,dbEM: A database of epigenetic modifiers curated from cancerous and normal genomes.,"We have developed a database called dbEM (database of Epigenetic Modifiers) to maintain the genomic information of about 167 epigenetic modifiers/proteins, which are considered as potential cancer targets. In dbEM, modifiers are classified on functional basis and comprise of 48 histone methyl transferases, 33 chromatin remodelers and 31 histone demethylases. dbEM maintains the genomic information like mutations, copy number variation and gene expression in thousands of tumor samples, cancer cell lines and healthy samples. This information is obtained from public resources viz. COSMIC, CCLE and 1000-genome project. Gene essentiality data retrieved from COLT database further highlights the importance of various epigenetic proteins for cancer survival. We have also reported the sequence profiles, tertiary structures and post-translational modifications of these epigenetic proteins in cancer. It also contains information of 54 drug molecules against different epigenetic proteins. A wide range of tools have been integrated in dbEM e.g. Search, BLAST, Alignment and Profile based prediction. In our analysis, we found that epigenetic proteins DNMT3A, HDAC2, KDM6A, and TET2 are highly mutated in variety of cancers. We are confident that dbEM will be very useful in cancer research particularly in the field of epigenetic proteins based cancer therapeutics. This database is available for public at URL: http://crdd.osdd.net/raghava/dbem.",database of Epigenetic Modifiers,dbEM,http://crdd.osdd.net/raghava/dbem,A database of epigenetic modifiers curated from cancerous and normal genomes +27037912,GAMDB: a web resource to connect microRNAs with autophagy in gerontology.,"

Objectives

MicroRNAs (miRNAs) are endogenous ~23 nucleotides (nt) RNAs, regulating gene expression by pairing to the mRNAs of protein-coding genes to direct their post-transcriptional repression. Both in normal and aberrant activities, miRNAs contribute to a recurring paradigm of cellular behaviors in pathological settings, especially in gerontology. Autophagy, a multi-step lysosomal degradation process with function to degrade long-lived proteins and damaged organelles, has significant impact on gerontology. Thus, elucidating how miRNAs participate in autophagy may enlarge the scope of miRNA in autophagy and facilitate researches in gerontology.

Materials and methods

Herein, based upon the published studies, predicted targets and gerontology-related diseases, we constructed a web resource named Gerontology-Autophagic-MicroRNA Database (GAMDB) (http://gamdb.liu-lab.com/index.php), which contained 836 autophagy-related miRNAs, 197 targeted genes/proteins and 56 aging-related diseases such as Parkinson' disease, Alzheimer's disease and Huntington's disease.

Results and conclusion

We made use of large amounts of data to elucidate the intricate relationships between microRNA-regulated autophagic mechanisms and gerontology. This database will facilitate better understanding of autophagy regulation network in gerontology and thus promoting gerontology-related therapy in the future.",Gerontology-Autophagic-MicroRNA Database,GAMDB,http://gamdb.liu-lab.com/index.php,a web resource to connect microRNAs with autophagy in gerontology +27188311,A comprehensive database of high-throughput sequencing-based RNA secondary structure probing data (Structure Surfer).,"

Background

RNA molecules fold into complex three-dimensional shapes, guided by the pattern of hydrogen bonding between nucleotides. This pattern of base pairing, known as RNA secondary structure, is critical to their cellular function. Recently several diverse methods have been developed to assay RNA secondary structure on a transcriptome-wide scale using high-throughput sequencing. Each approach has its own strengths and caveats, however there is no widely available tool for visualizing and comparing the results from these varied methods.

Methods

To address this, we have developed Structure Surfer, a database and visualization tool for inspecting RNA secondary structure in six transcriptome-wide data sets from human and mouse ( http://tesla.pcbi.upenn.edu/strucuturesurfer/ ). The data sets were generated using four different high-throughput sequencing based methods. Each one was analyzed with a scoring pipeline specific to its experimental design. Users of Structure Surfer have the ability to query individual loci as well as detect trends across multiple sites.

Results

Here, we describe the included data sets and their differences. We illustrate the database's function by examining known structural elements and we explore example use cases in which combined data is used to detect structural trends.

Conclusions

In total, Structure Surfer provides an easy-to-use database and visualization interface for allowing users to interrogate the currently available transcriptome-wide RNA secondary structure information for mammals.",Structure Surfer,,http://tesla.pcbi.upenn.edu/strucuturesurfer,A comprehensive database of high-throughput sequencing-based RNA secondary structure probing data +27199454,Comprehensive database of human E3 ubiquitin ligases: application to aquaporin-2 regulation.,"Aquaporin-2 (AQP2) is regulated in part via vasopressin-mediated changes in protein half-life that are in turn dependent on AQP2 ubiquitination. Here we addressed the question, """"What E3 ubiquitin ligase is most likely to be responsible for AQP2 ubiquitination?"""" using large-scale data integration based on Bayes' rule. The first step was to bioinformatically identify all E3 ligase genes coded by the human genome. The 377 E3 ubiquitin ligases identified in the human genome, consisting predominant of HECT, RING, and U-box proteins, have been used to create a publically accessible and downloadable online database (https://hpcwebapps.cit.nih.gov/ESBL/Database/E3-ligases/). We also curated a second database of E3 ligase accessory proteins that included BTB domain proteins, cullins, SOCS-box proteins, and F-box proteins. Using Bayes' theorem to integrate information from multiple large-scale proteomic and transcriptomic datasets, we ranked these 377 E3 ligases with respect to their probability of interaction with AQP2. Application of Bayes' rule identified the E3 ligases most likely to interact with AQP2 as (in order of probability): NEDD4 and NEDD4L (tied for first), AMFR, STUB1, ITCH, ZFPL1. Significantly, the two E3 ligases tied for top rank have also been studied extensively in the reductionist literature as regulatory proteins in renal tubule epithelia. The concordance of conclusions from reductionist and systems-level data provides strong motivation for further studies of the roles of NEDD4 and NEDD4L in the regulation of AQP2 protein turnover.",,,https://hpcwebapps.cit.nih.gov/ESBL/Database/E3-ligases/,Comprehensive database of human E3 ubiquitin ligases +27242836,VESPUCCI: Exploring Patterns of Gene Expression in Grapevine.,"Large-scale transcriptional studies aim to decipher the dynamic cellular responses to a stimulus, like different environmental conditions. In the era of high-throughput omics biology, the most used technologies for these purposes are microarray and RNA-Seq, whose data are usually required to be deposited in public repositories upon publication. Such repositories have the enormous potential to provide a comprehensive view of how different experimental conditions lead to expression changes, by comparing gene expression across all possible measured conditions. Unfortunately, this task is greatly impaired by differences among experimental platforms that make direct comparisons difficult. In this paper, we present the Vitis Expression Studies Platform Using COLOMBOS Compendia Instances (VESPUCCI), a gene expression compendium for grapevine which was built by adapting an approach originally developed for bacteria, and show how it can be used to investigate complex gene expression patterns. We integrated nearly all publicly available microarray and RNA-Seq expression data: 1608 gene expression samples from 10 different technological platforms. Each sample has been manually annotated using a controlled vocabulary developed ad hoc to ensure both human readability and computational tractability. Expression data in the compendium can be visually explored using several tools provided by the web interface or can be programmatically accessed using the REST interface. VESPUCCI is freely accessible at http://vespucci.colombos.fmach.it.",Vitis Expression Studies Platform Using COLOMBOS Compendia Instances,VESPUCCI,http://vespucci.colombos.fmach.it,"a gene expression compendium for grapevine which was built by adapting an approach originally developed for bacteria, and show how it can be used to investigate complex gene expression patterns" +27451428,SZDB: A Database for Schizophrenia Genetic Research.,"Schizophrenia (SZ) is a debilitating brain disorder with a complex genetic architecture. Genetic studies, especially recent genome-wide association studies (GWAS), have identified multiple variants (loci) conferring risk to SZ. However, how to efficiently extract meaningful biological information from bulk genetic findings of SZ remains a major challenge. There is a pressing need to integrate multiple layers of data from various sources, eg, genetic findings from GWAS, copy number variations (CNVs), association and linkage studies, gene expression, protein-protein interaction (PPI), co-expression, expression quantitative trait loci (eQTL), and Encyclopedia of DNA Elements (ENCODE) data, to provide a comprehensive resource to facilitate the translation of genetic findings into SZ molecular diagnosis and mechanism study. Here we developed the SZDB database (http://www.szdb.org/), a comprehensive resource for SZ research. SZ genetic data, gene expression data, network-based data, brain eQTL data, and SNP function annotation information were systematically extracted, curated and deposited in SZDB. In-depth analyses and systematic integration were performed to identify top prioritized SZ genes and enriched pathways. Multiple types of data from various layers of SZ research were systematically integrated and deposited in SZDB. In-depth data analyses and integration identified top prioritized SZ genes and enriched pathways. We further showed that genes implicated in SZ are highly co-expressed in human brain and proteins encoded by the prioritized SZ risk genes are significantly interacted. The user-friendly SZDB provides high-confidence candidate variants and genes for further functional characterization. More important, SZDB provides convenient online tools for data search and browse, data integration, and customized data analyses.",SZDB,SZDB,http://www.szdb.org/,A Database for Schizophrenia Genetic Research +27451428,SZDB: A Database for Schizophrenia Genetic Research.,"Schizophrenia (SZ) is a debilitating brain disorder with a complex genetic architecture. Genetic studies, especially recent genome-wide association studies (GWAS), have identified multiple variants (loci) conferring risk to SZ. However, how to efficiently extract meaningful biological information from bulk genetic findings of SZ remains a major challenge. There is a pressing need to integrate multiple layers of data from various sources, eg, genetic findings from GWAS, copy number variations (CNVs), association and linkage studies, gene expression, protein-protein interaction (PPI), co-expression, expression quantitative trait loci (eQTL), and Encyclopedia of DNA Elements (ENCODE) data, to provide a comprehensive resource to facilitate the translation of genetic findings into SZ molecular diagnosis and mechanism study. Here we developed the SZDB database (http://www.szdb.org/), a comprehensive resource for SZ research. SZ genetic data, gene expression data, network-based data, brain eQTL data, and SNP function annotation information were systematically extracted, curated and deposited in SZDB. In-depth analyses and systematic integration were performed to identify top prioritized SZ genes and enriched pathways. Multiple types of data from various layers of SZ research were systematically integrated and deposited in SZDB. In-depth data analyses and integration identified top prioritized SZ genes and enriched pathways. We further showed that genes implicated in SZ are highly co-expressed in human brain and proteins encoded by the prioritized SZ risk genes are significantly interacted. The user-friendly SZDB provides high-confidence candidate variants and genes for further functional characterization. More important, SZDB provides convenient online tools for data search and browse, data integration, and customized data analyses.",SZDB,SZDB,http://www.szdb.org/,a comprehensive resource for SZ research +27484196,iLIR database: A web resource for LIR motif-containing proteins in eukaryotes.,"Atg8-family proteins are the best-studied proteins of the core autophagic machinery. They are essential for the elongation and closure of the phagophore into a proper autophagosome. Moreover, Atg8-family proteins are associated with the phagophore from the initiation of the autophagic process to, or just prior to, the fusion between autophagosomes with lysosomes. In addition to their implication in autophagosome biogenesis, they are crucial for selective autophagy through their ability to interact with selective autophagy receptor proteins necessary for the specific targeting of substrates for autophagic degradation. In the past few years it has been revealed that Atg8-interacting proteins include not only receptors but also components of the core autophagic machinery, proteins associated with vesicles and their transport, and specific proteins that are selectively degraded by autophagy. Atg8-interacting proteins contain a short linear LC3-interacting region/LC3 recognition sequence/Atg8-interacting motif (LIR/LRS/AIM) motif which is responsible for their interaction with Atg8-family proteins. These proteins are referred to as LIR-containing proteins (LIRCPs). So far, many experimental efforts have been carried out to identify new LIRCPs, leading to the characterization of some of them in the past 10 years. Given the need for the identification of LIRCPs in various organisms, we developed the iLIR database ( https://ilir.warwick.ac.uk ) as a freely available web resource, listing all the putative canonical LIRCPs identified in silico in the proteomes of 8 model organisms using the iLIR server, combined with a Gene Ontology (GO) term analysis. Additionally, a curated text-mining analysis of the literature permitted us to identify novel putative LICRPs in mammals that have not previously been associated with autophagy.",iLIR database,iLIR,https://ilir.warwick.ac.uk,A web resource for LIR motif-containing proteins in eukaryotes +27794041,NGSmethDB 2017: enhanced methylomes and differential methylation.,"The 2017 update of NGSmethDB stores whole genome methylomes generated from short-read data sets obtained by bisulfite sequencing (WGBS) technology. To generate high-quality methylomes, stringent quality controls were integrated with third-part software, adding also a two-step mapping process to exploit the advantages of the new genome assembly models. The samples were all profiled under constant parameter settings, thus enabling comparative downstream analyses. Besides a significant increase in the number of samples, NGSmethDB now includes two additional data-types, which are a valuable resource for the discovery of methylation epigenetic biomarkers: (i) differentially methylated single-cytosines; and (ii) methylation segments (i.e. genome regions of homogeneous methylation). The NGSmethDB back-end is now based on MongoDB, a NoSQL hierarchical database using JSON-formatted documents and dynamic schemas, thus accelerating sample comparative analyses. Besides conventional database dumps, track hubs were implemented, which improved database access, visualization in genome browsers and comparative analyses to third-part annotations. In addition, the database can be also accessed through a RESTful API. Lastly, a Python client and a multiplatform virtual machine allow for program-driven access from user desktop. This way, private methylation data can be compared to NGSmethDB without the need to upload them to public servers. Database website: http://bioinfo2.ugr.es/NGSmethDB.",NGSmethDB,NGSmethDB,http://bioinfo2.ugr.es/NGSmethD, +27841751,A public database of macromolecular diffraction experiments.,"The low reproducibility of published experimental results in many scientific disciplines has recently garnered negative attention in scientific journals and the general media. Public transparency, including the availability of `raw' experimental data, will help to address growing concerns regarding scientific integrity. Macromolecular X-ray crystallography has led the way in requiring the public dissemination of atomic coordinates and a wealth of experimental data, making the field one of the most reproducible in the biological sciences. However, there remains no mandate for public disclosure of the original diffraction data. The Integrated Resource for Reproducibility in Macromolecular Crystallography (IRRMC) has been developed to archive raw data from diffraction experiments and, equally importantly, to provide related metadata. Currently, the database of our resource contains data from 2920 macromolecular diffraction experiments (5767 data sets), accounting for around 3% of all depositions in the Protein Data Bank (PDB), with their corresponding partially curated metadata. IRRMC utilizes distributed storage implemented using a federated architecture of many independent storage servers, which provides both scalability and sustainability. The resource, which is accessible via the web portal at http://www.proteindiffraction.org, can be searched using various criteria. All data are available for unrestricted access and download. The resource serves as a proof of concept and demonstrates the feasibility of archiving raw diffraction data and associated metadata from X-ray crystallographic studies of biological macromolecules. The goal is to expand this resource and include data sets that failed to yield X-ray structures in order to facilitate collaborative efforts that will improve protein structure-determination methods and to ensure the availability of `orphan' data left behind for various reasons by individual investigators and/or extinct structural genomics projects.",Integrated Resource for Reproducibility in Macromolecular Crystallography,IRRMC,http://www.proteindiffraction.org,A public database of macromolecular diffraction experiments +27899625,YM500v3: a database for small RNA sequencing in human cancer research.,"We previously presented the YM500 database, which contains >8000 small RNA sequencing (smRNA-seq) data sets and integrated analysis results for various cancer miRNome studies. In the updated YM500v3 database (http://ngs.ym.edu.tw/ym500/) presented herein, we not only focus on miRNAs but also on other functional small non-coding RNAs (sncRNAs), such as PIWI-interacting RNAs (piRNAs), tRNA-derived fragments (tRFs), small nuclear RNAs (snRNAs) and small nucleolar RNAs (snoRNAs). There is growing knowledge of the role of sncRNAs in gene regulation and tumorigenesis. We have also incorporated >10 000 cancer-related RNA-seq and >3000 more smRNA-seq data sets into the YM500v3 database. Furthermore, there are two main new sections, 'Survival' and 'Cancer', in this updated version. The 'Survival' section provides the survival analysis results in all cancer types or in a user-defined group of samples for a specific sncRNA. The 'Cancer' section provides the results of differential expression analyses, miRNA-gene interactions and cancer miRNA-related pathways. In the 'Expression' section, sncRNA expression profiles across cancer and sample types are newly provided. Cancer-related sncRNAs hold potential for both biotech applications and basic research.",YM500v3,YM500v3,http://ngs.ym.edu.tw/ym500/,a database for small RNA sequencing in human cancer research +27980519,ContaMiner and ContaBase: a webserver and database for early identification of unwantedly crystallized protein contaminants.,"Solving the phase problem in protein X-ray crystallography relies heavily on the identity of the crystallized protein, especially when molecular replacement (MR) methods are used. Yet, it is not uncommon that a contaminant crystallizes instead of the protein of interest. Such contaminants may be proteins from the expression host organism, protein fusion tags or proteins added during the purification steps. Many contaminants co-purify easily, crystallize and give good diffraction data. Identification of contaminant crystals may take time, since the presence of the contaminant is unexpected and its identity unknown. A webserver (ContaMiner) and a contaminant database (ContaBase) have been established, to allow fast MR-based screening of crystallographic data against currently 62 known contaminants. The web-based ContaMiner (available at http://strube.cbrc.kaust.edu.sa/contaminer/) currently produces results in 5 min to 4 h. The program is also available in a github repository and can be installed locally. ContaMiner enables screening of novel crystals at synchrotron beamlines, and it would be valuable as a routine safety check for 'crystallization and preliminary X-ray analysis' publications. Thus, in addition to potentially saving X-ray crystallographers much time and effort, ContaMiner might considerably lower the risk of publishing erroneous data.",ContaBase,ContaBase,http://strube.cbrc.kaust.edu.sa/contaminer/,database for early identification of unwantedly crystallized protein contaminants +28090394,GExplore 1.4: An expanded web interface for queries on Caenorhabditis elegans protein and gene function.,"Genetic high-throughput experiments often result in hundreds or thousands of genes satisfying certain experimental conditions. Grouping and prioritizing a large number of genes for further analysis can be a time-consuming challenge. In 2009 we developed a web-based user interface, GExplore, to assist with large-scale data-mining related to gene function in Caenorhabditis elegans. The underlying database contained information about Caenorhabditis elegans genes and proteins including domain organization of the proteins, phenotypic descriptions, expression data and Gene Ontology Consortium annotations. These data enable users to quickly obtain an overview of biological and biochemical functions of a large number of genes at once. Since its inception the underlying database has been updated and expanded significantly. Here we describe the current version of GExplore 1.4, documenting the changes since the original release. GExplore 1.4 now contains information about the domain organization of the proteomes of 9 nematode species, can display the location of Caenorhabditis elegans mutations with respect to the domain organization of the proteins, and includes stage-specific RNAseq gene expression data generated by the modENCODE project. The underlying database has been reorganized to facilitate independent updates of the different parts of the database and to allow the addition of novel data sets in the future. The web interface is available under http://genome.sfu.ca/gexplore.",Gexplore 1.4,GExplore,http://genome.sfu.ca/gexplore,An expanded web interface for queries on Caenorhabditis elegans protein and gene function +28149703,Expanding our understanding of the trade in marine aquarium animals.,"The trade of live marine animals for home and public aquaria has grown into a major global industry. Millions of marine fishes and invertebrates are removed from coral reefs and associated habitats each year. The majority are imported into the United States, with the remainder sent to Europe, Japan, and a handful of other countries. Despite the recent growth and diversification of the aquarium trade, to date, data collection is not mandatory, and hence comprehensive information on species volume and diversity is lacking. This lack of information makes it impossible to study trade pathways. Without species-specific volume and diversity data, it is unclear how importing and exporting governments can oversee this industry effectively or how sustainability should be encouraged. To expand our knowledge and understanding of the trade, and to effectively communicate this new understanding, we introduce the publically-available Marine Aquarium Biodiversity and Trade Flow online database (https://www.aquariumtradedata.org/). This tool was created to communicate the volume and diversity of marine fishes and/or invertebrates imported into the US over three complete years (2008, 2009, and 2011) and three partial years (2000, 2004, 2005). To create this tool, invoices pertaining to shipments of live marine fishes and invertebrates were scanned and analyzed for species name, species quantities, country of origin, port of entry, and city of import destination. Here we focus on the analysis of the later three years of data and also produce an estimate for the entirety of 2000, 2004, and 2005. The three-year aggregate totals (2008, 2009, 2011) indicate that just under 2,300 fish and 725 invertebrate species were imported into the US cumulatively, although just under 1,800 fish and 550 invertebrate species were traded annually. Overall, the total number of live marine animals decreased between 2008 and 2011. In 2008, 2009, and 2011, the total number of individual fish (8.2, 7.3, and 6.9 million individuals) and invertebrates (4.2, 3.7, and 3.6 million individuals) assessed by analyzing the invoice data are roughly 60% of the total volumes recorded through the Law Enforcement Management Information System (LEMIS) dataset. Using these complete years, we back-calculated the number of individuals of both fishes and invertebrates imported in 2000, 2004, and 2005. These estimates (9.3, 10.8, and 11.2 million individual fish per year) were consistent with the three years of complete data. We also use these data to understand the global trade in two species (Banggai cardinalfish, Pterapogon kauderni, and orange clownfish, Amphiprion ocellaris / percula) recently considered for Endangered Species Act listing. Aquariumtradedata.org can help create more effective management plans for the traded species, and ideally could be implemented at key trade ports to better assess the global trade of aquatic wildlife.",Marine Aquarium Biodiversity and Trade Flow online database,,https://www.aquariumtradedata.org/, +28203233,Exo-miRExplorer: A Comprehensive Resource for Exploring and Comparatively Analyzing Exogenous MicroRNAs.,"MicroRNAs (miRNAs) are small regulatory RNAs that play important roles in animals, plants, and viruses. Deep-sequencing technology has been widely adopted in miRNA investigations. However, it is still a big mysterious why nearly all sequencing data contain miRNA sequences from exogenous species, called exo-miRNAs. In this study, we developed a novel platform, exo-miRExplorer, for mining and identifying exo-miRNAs from high-throughput small RNA sequencing experiments which originated from tissues and cell lines of multiple organisms. Thousands of exo-miRNAs are characterized with their expression abundance, the RNA families, original organisms and the sequencing platforms presented in exo-miRExplorer. Subsequently, we used exo-miRExplorer to perform further analysis. Comparative analysis of the exo-miRNAs between different sequencing datasets revealed significant correlation of exo-miRNAs between experiments in the same study. The plant-derived exo-miRNAs analysis provided robust evidence for non-diet source of exo-miRNAs. Virus-derived exo-miRNA analysis showed that pathogen RNAs could transfer to host cells and exist in deep-sequencing result at abundance level. In conclusion, exo-miRExplorer provides users with an integrative resource to facilitate detection and analysis of exo-miRNAs. exo-miRExplorer is available at the following URL: http://rna.sysu.edu.cn/exomiRDB/.",Exo-miRExplorer,Exo-miRExplorer,http://rna.sysu.edu.cn/exomiRDB/,A Comprehensive Resource for Exploring and Comparatively Analyzing Exogenous MicroRNAs +28387199,GSA: Genome Sequence Archive.,"With the rapid development of sequencing technologies towards higher throughput and lower cost, sequence data are generated at an unprecedentedly explosive rate. To provide an efficient and easy-to-use platform for managing huge sequence data, here we present Genome Sequence Archive (GSA; http://bigd.big.ac.cn/gsa or http://gsa.big.ac.cn), a data repository for archiving raw sequence data. In compliance with data standards and structures of the International Nucleotide Sequence Database Collaboration (INSDC), GSA adopts four data objects (BioProject, BioSample, Experiment, and Run) for data organization, accepts raw sequence reads produced by a variety of sequencing platforms, stores both sequence reads and metadata submitted from all over the world, and makes all these data publicly available to worldwide scientific communities. In the era of big data, GSA is not only an important complement to existing INSDC members by alleviating the increasing burdens of handling sequence data deluge, but also takes the significant responsibility for global big data archive and provides free unrestricted access to all publicly available data in support of research activities throughout the world.",Genome Sequence Archive,GSA,http://bigd.big.ac.cn/gsa or http://gsa.big.ac.cn,a data repository for archiving raw sequence data. In compliance with data standards and structures of the International Nucleotide Sequence Database Collaboration (INSDC) +28413782,APMicroDB: A microsatellite database of Acyrthosiphon pisum.,"Pea aphids represent a complex genetic system that could be used for QTL analysis, genetic diversity and population genetics studies. Here, we described the development of first microsatellite repeat database of the pea aphid (APMicroDB), accessible at """"http://deepaklab.com/aphidmicrodb"""". We identified 3,40,233 SSRs using MIcroSAtellite (MISA) tool that was distributed in 14,067 (out of 23,924) scaffold of the pea aphid. We observed 89.53% simple repeats of which 73.41% were mono-nucleotide, followed by di-nucleotide repeats. This database stored information about the repeats kind, GC content, motif type (mono - hexa), genomic location etc. We have also incorporated the primer information derived from Primer3 software of the 2504 bp flanking region of the identified marker. Blast tool is also provided for searching the user query sequence for identified marker and their primers. This work has an immense use for scientific community working in the field of agricultural pest management, QTL mapping, and host-pathogen interaction analysis.",microsatellite repeat database of the pea aphid,APMicroDB,http://deepaklab.com/aphidmicrodb,A microsatellite database of Acyrthosiphon pisum +28539606,SesameFG: an integrated database for the functional genomics of sesame.,"Sesame (Sesamum indicum L.) has high oil content, a small diploid genome and a short growth period, making it an attractive species for genetic studies on oilseed crops. With the advancement of next-generation sequencing technology, genomics and functional genomics research of sesame has developed quickly in the last few years, and large amounts of data have been generated. However, these results are distributed in many different publications, and there is a lack of integration. To promote functional genomics research of sesame, we collected genetic information combined with comprehensive phenotypic information and integrated them in the web-based database named SesameFG. The current version of SesameFG contains phenotypic information on agronomic traits of 705 sesame accessions, de novo assembled genomes of three sesame varieties, massive numbers of identified SNPs, gene expression profiles of five tissues, gene families, candidate genes for the important agronomic traits and genomic-SSR markers. All phenotypic and genotypic information in SesameFG is available for online queries and can be downloaded freely. SesameFG provides useful search functions and data mining tools, including Genome Browser and local BLAST services. SesameFG is freely accessible at http://ncgr.ac.cn/SesameFG/. SesameFG provides valuable resources and tools for functional genomics research and the molecular breeding of sesame.",SesameFG,SesameFG,http://ncgr.ac.cn/SesameFG/,an integrated database for the functional genomics of sesame +28748223,PhenoPlasm: a database of disruption phenotypes for malaria parasite genes.,"Two decades after the first Plasmodium transfection, attempts have been made to disrupt more than 3,151 genes in malaria parasites, across five Plasmodium species. While results from rodent malaria transfections have been curated and systematised, empowering large-scale analysis, phenotypic data from human malaria parasite transfections currently exists as individual reports scattered across a the literature. To facilitate systematic analysis of published experimental genetic data across Plasmodium species, we have built PhenoPlasm ( http://www.phenoplasm.org), a database of phenotypes generated by transfection experiments in all Plasmodium parasites. The site provides a simple interface linking citation-backed Plasmodium reverse-genetic phenotypes to gene IDs. The database has been populated with phenotypic data on 367 P. falciparum genes, curated from 176 individual publications, as well as existing data on rodent Plasmodium species from RMgmDB and PlasmoGEM. This is the first time that all available data on P. falciparum transfection experiments has been brought together in a single place. These data are presented using ortholog mapping to allow a researcher interested in a gene in one species to see results across other Plasmodium species. The collaborative nature of the database enables any researcher to add new phenotypes as they are discovered. As an example of database utility, we use the currently available datasets to identify RAP (RNA-binding domain abundant in Apicomplexa)-domain containing proteins as crucial to parasite survival.",PhenoPlasm,PhenoPlasm,http://www.phenoplasm.org,a database of phenotypes generated by transfection experiments in all Plasmodium parasites +28748223,PhenoPlasm: a database of disruption phenotypes for malaria parasite genes.,"Two decades after the first Plasmodium transfection, attempts have been made to disrupt more than 3,151 genes in malaria parasites, across five Plasmodium species. While results from rodent malaria transfections have been curated and systematised, empowering large-scale analysis, phenotypic data from human malaria parasite transfections currently exists as individual reports scattered across a the literature. To facilitate systematic analysis of published experimental genetic data across Plasmodium species, we have built PhenoPlasm ( http://www.phenoplasm.org), a database of phenotypes generated by transfection experiments in all Plasmodium parasites. The site provides a simple interface linking citation-backed Plasmodium reverse-genetic phenotypes to gene IDs. The database has been populated with phenotypic data on 367 P. falciparum genes, curated from 176 individual publications, as well as existing data on rodent Plasmodium species from RMgmDB and PlasmoGEM. This is the first time that all available data on P. falciparum transfection experiments has been brought together in a single place. These data are presented using ortholog mapping to allow a researcher interested in a gene in one species to see results across other Plasmodium species. The collaborative nature of the database enables any researcher to add new phenotypes as they are discovered. As an example of database utility, we use the currently available datasets to identify RAP (RNA-binding domain abundant in Apicomplexa)-domain containing proteins as crucial to parasite survival.",PhenoPlasm,PhenoPlasm,http://www.phenoplasm.org,a database of disruption phenotypes for malaria parasite genes +28850115,"RefEx, a reference gene expression dataset as a web tool for the functional analysis of genes.","Gene expression data are exponentially accumulating; thus, the functional annotation of such sequence data from metadata is urgently required. However, life scientists have difficulty utilizing the available data due to its sheer magnitude and complicated access. We have developed a web tool for browsing reference gene expression pattern of mammalian tissues and cell lines measured using different methods, which should facilitate the reuse of the precious data archived in several public databases. The web tool is called Reference Expression dataset (RefEx), and RefEx allows users to search by the gene name, various types of IDs, chromosomal regions in genetic maps, gene family based on InterPro, gene expression patterns, or biological categories based on Gene Ontology. RefEx also provides information about genes with tissue-specific expression, and the relative gene expression values are shown as choropleth maps on 3D human body images from BodyParts3D. Combined with the newly incorporated Functional Annotation of Mammals (FANTOM) dataset, RefEx provides insight regarding the functional interpretation of unfamiliar genes. RefEx is publicly available at http://refex.dbcls.jp/.",Reference Expression dataset,RefEx,http://refex.dbcls.jp/,a reference gene expression dataset as a web tool for the functional analysis of genes +28904183,"The TB Portals: an Open-Access, Web-Based Platform for Global Drug-Resistant-Tuberculosis Data Sharing and Analysis.","The TB Portals program is an international consortium of physicians, radiologists, and microbiologists from countries with a heavy burden of drug-resistant tuberculosis working with data scientists and information technology professionals. Together, we have built the TB Portals, a repository of socioeconomic/geographic, clinical, laboratory, radiological, and genomic data from patient cases of drug-resistant tuberculosis backed by shareable, physical samples. Currently, there are 1,299 total cases from five country sites (Azerbaijan, Belarus, Moldova, Georgia, and Romania), 976 (75.1%) of which are multidrug or extensively drug resistant and 38.2%, 51.9%, and 36.3% of which contain X-ray, computed tomography (CT) scan, and genomic data, respectively. The top Mycobacterium tuberculosis lineages represented among collected samples are Beijing, T1, and H3, and single nucleotide polymorphisms (SNPs) that confer resistance to isoniazid, rifampin, ofloxacin, and moxifloxacin occur the most frequently. These data and samples have promoted drug discovery efforts and research into genomics and quantitative image analysis to improve diagnostics while also serving as a valuable resource for researchers and clinical providers. The TB Portals database and associated projects are continually growing, and we invite new partners and collaborations to our initiative. The TB Portals data and their associated analytical and statistical tools are freely available at https://tbportals.niaid.nih.gov/.",TB Portals,,https://tbportals.niaid.nih.gov/,"a repository of socioeconomic/geographic, clinical, laboratory, radiological, and genomic data from patient cases of drug-resistant tuberculosis backed by shareable, physical samples" +28904183,"The TB Portals: an Open-Access, Web-Based Platform for Global Drug-Resistant-Tuberculosis Data Sharing and Analysis.","The TB Portals program is an international consortium of physicians, radiologists, and microbiologists from countries with a heavy burden of drug-resistant tuberculosis working with data scientists and information technology professionals. Together, we have built the TB Portals, a repository of socioeconomic/geographic, clinical, laboratory, radiological, and genomic data from patient cases of drug-resistant tuberculosis backed by shareable, physical samples. Currently, there are 1,299 total cases from five country sites (Azerbaijan, Belarus, Moldova, Georgia, and Romania), 976 (75.1%) of which are multidrug or extensively drug resistant and 38.2%, 51.9%, and 36.3% of which contain X-ray, computed tomography (CT) scan, and genomic data, respectively. The top Mycobacterium tuberculosis lineages represented among collected samples are Beijing, T1, and H3, and single nucleotide polymorphisms (SNPs) that confer resistance to isoniazid, rifampin, ofloxacin, and moxifloxacin occur the most frequently. These data and samples have promoted drug discovery efforts and research into genomics and quantitative image analysis to improve diagnostics while also serving as a valuable resource for researchers and clinical providers. The TB Portals database and associated projects are continually growing, and we invite new partners and collaborations to our initiative. The TB Portals data and their associated analytical and statistical tools are freely available at https://tbportals.niaid.nih.gov/.",TB Portals,,https://tbportals.niaid.nih.gov/,"an Open-Access, Web-Based Platform for Global Drug-Resistant-Tuberculosis Data Sharing and Analysis" +28985416,EVLncRNAs: a manually curated database for long non-coding RNAs validated by low-throughput experiments.,"Long non-coding RNAs (lncRNAs) play important functional roles in various biological processes. Early databases were utilized to deposit all lncRNA candidates produced by high-throughput experimental and/or computational techniques to facilitate classification, assessment and validation. As more lncRNAs are validated by low-throughput experiments, several databases were established for experimentally validated lncRNAs. However, these databases are small in scale (with a few hundreds of lncRNAs only) and specific in their focuses (plants, diseases or interactions). Thus, it is highly desirable to have a comprehensive dataset for experimentally validated lncRNAs as a central repository for all of their structures, functions and phenotypes. Here, we established EVLncRNAs by curating lncRNAs validated by low-throughput experiments (up to 1 May 2016) and integrating specific databases (lncRNAdb, LncRANDisease, Lnc2Cancer and PLNIncRBase) with additional functional and disease-specific information not covered previously. The current version of EVLncRNAs contains 1543 lncRNAs from 77 species that is 2.9 times larger than the current largest database for experimentally validated lncRNAs. Seventy-four percent lncRNA entries are partially or completely new, comparing to all existing experimentally validated databases. The established database allows users to browse, search and download as well as to submit experimentally validated lncRNAs. The database is available at http://biophy.dzu.edu.cn/EVLncRNAs.",EVLncRNAs,EVLncRNAs,http://biophy.dzu.edu.cn/EVLncRNAs,a manually curated database for long non-coding RNAs validated by low-throughput experiments +29036719,ChannelsDB: database of biomacromolecular tunnels and pores.,"ChannelsDB (http://ncbr.muni.cz/ChannelsDB) is a database providing information about the positions, geometry and physicochemical properties of channels (pores and tunnels) found within biomacromolecular structures deposited in the Protein Data Bank. Channels were deposited from two sources; from literature using manual deposition and from a software tool automatically detecting tunnels leading to the enzymatic active sites and selected cofactors, and transmembrane pores. The database stores information about geometrical features (e.g. length and radius profile along a channel) and physicochemical properties involving polarity, hydrophobicity, hydropathy, charge and mutability. The stored data are interlinked with available UniProt annotation data mapping known mutation effects to channel-lining residues. All structures with channels are displayed in a clear interactive manner, further facilitating data manipulation and interpretation. As such, ChannelsDB provides an invaluable resource for research related to deciphering the biological function of biomacromolecular channels.",ChannelsDB,ChannelsDB,http://ncbr.muni.cz/ChannelsDB,"a database providing information about the positions, geometry and physicochemical properties of channels (pores and tunnels) found within biomacromolecular structures deposited in the Protein Data Bank" +29036719,ChannelsDB: database of biomacromolecular tunnels and pores.,"ChannelsDB (http://ncbr.muni.cz/ChannelsDB) is a database providing information about the positions, geometry and physicochemical properties of channels (pores and tunnels) found within biomacromolecular structures deposited in the Protein Data Bank. Channels were deposited from two sources; from literature using manual deposition and from a software tool automatically detecting tunnels leading to the enzymatic active sites and selected cofactors, and transmembrane pores. The database stores information about geometrical features (e.g. length and radius profile along a channel) and physicochemical properties involving polarity, hydrophobicity, hydropathy, charge and mutability. The stored data are interlinked with available UniProt annotation data mapping known mutation effects to channel-lining residues. All structures with channels are displayed in a clear interactive manner, further facilitating data manipulation and interpretation. As such, ChannelsDB provides an invaluable resource for research related to deciphering the biological function of biomacromolecular channels.",ChannelsDB,ChannelsDB,http://ncbr.muni.cz/ChannelsDB,database of biomacromolecular tunnels and pores +29059366,SBCDDB: Sleeping Beauty Cancer Driver Database for gene discovery in mouse models of human cancers.,"Large-scale oncogenomic studies have identified few frequently mutated cancer drivers and hundreds of infrequently mutated drivers. Defining the biological context for rare driving events is fundamentally important to increasing our understanding of the druggable pathways in cancer. Sleeping Beauty (SB) insertional mutagenesis is a powerful gene discovery tool used to model human cancers in mice. Our lab and others have published a number of studies that identify cancer drivers from these models using various statistical and computational approaches. Here, we have integrated SB data from primary tumor models into an analysis and reporting framework, the Sleeping Beauty Cancer Driver DataBase (SBCDDB, http://sbcddb.moffitt.org), which identifies drivers in individual tumors or tumor populations. Unique to this effort, the SBCDDB utilizes a single, scalable, statistical analysis method that enables data to be grouped by different biological properties. This allows for SB drivers to be evaluated (and re-evaluated) under different contexts. The SBCDDB provides visual representations highlighting the spatial attributes of transposon mutagenesis and couples this functionality with analysis of gene sets, enabling users to interrogate relationships between drivers. The SBCDDB is a powerful resource for comparative oncogenomic analyses with human cancer genomics datasets for driver prioritization.",Sleeping Beauty Cancer Driver Database,SBCDDB,http://sbcddb.moffitt.org,Sleeping Beauty Cancer Driver Database for gene discovery in mouse models of human cancers +29077896,mirTrans: a resource of transcriptional regulation on microRNAs for human cell lines.,"The cell-specific information of transcriptional regulation on microRNAs (miRNAs) is crucial to the precise understanding of gene regulations in various physiological and pathological processes existed in different tissues and cell types. The database, mirTrans, provides comprehensive information about cell-specific transcription of miRNAs including the transcriptional start sites (TSSs) of miRNAs, transcription factor (TF) to miRNA regulations and miRNA promoter sequences. mirTrans also maps the experimental H3K4me3 and DHS (DNase-I hypersensitive site) marks within miRNA promoters and expressed sequence tags (ESTs) within transcribed regions. The current version of database covers 35 259 TSSs and over 2.3 million TF-miRNA regulations for 1513 miRNAs in a total of 54 human cell lines. These cell lines span most of the biological systems, including circulatory system, digestive system and nervous system. Information for both the intragenic miRNAs and intergenic miRNAs is offered. Particularly, the quality of miRNA TSSs and TF-miRNA regulations is evaluated by literature curation. 23 447 TSS records and 2148 TF-miRNA regulations are supported by special experiments as a result of literature curation. EST coverage is also used to evaluate the accuracy of miRNA TSSs. Interface of mirTrans is friendly designed and convenient to make downloads (http://mcube.nju.edu.cn/jwang/lab/soft/mirtrans/ or http://120.27.239.192/mirtrans/).",mirTrans,mirTrans,http://120.27.239.192/mirtrans/,a resource of transcriptional regulation on microRNAs for human cell lines +29077896,mirTrans: a resource of transcriptional regulation on microRNAs for human cell lines.,"The cell-specific information of transcriptional regulation on microRNAs (miRNAs) is crucial to the precise understanding of gene regulations in various physiological and pathological processes existed in different tissues and cell types. The database, mirTrans, provides comprehensive information about cell-specific transcription of miRNAs including the transcriptional start sites (TSSs) of miRNAs, transcription factor (TF) to miRNA regulations and miRNA promoter sequences. mirTrans also maps the experimental H3K4me3 and DHS (DNase-I hypersensitive site) marks within miRNA promoters and expressed sequence tags (ESTs) within transcribed regions. The current version of database covers 35 259 TSSs and over 2.3 million TF-miRNA regulations for 1513 miRNAs in a total of 54 human cell lines. These cell lines span most of the biological systems, including circulatory system, digestive system and nervous system. Information for both the intragenic miRNAs and intergenic miRNAs is offered. Particularly, the quality of miRNA TSSs and TF-miRNA regulations is evaluated by literature curation. 23 447 TSS records and 2148 TF-miRNA regulations are supported by special experiments as a result of literature curation. EST coverage is also used to evaluate the accuracy of miRNA TSSs. Interface of mirTrans is friendly designed and convenient to make downloads (http://mcube.nju.edu.cn/jwang/lab/soft/mirtrans/ or http://120.27.239.192/mirtrans/).",mirTrans,mirTrans,http://mcube.nju.edu.cn/jwang/lab/soft/mirtrans/,a resource of transcriptional regulation on microRNAs for human cell lines +29092050,Ensembl Genomes 2018: an integrated omics infrastructure for non-vertebrate species.,"Ensembl Genomes (http://www.ensemblgenomes.org) is an integrating resource for genome-scale data from non-vertebrate species, complementing the resources for vertebrate genomics developed in the Ensembl project (http://www.ensembl.org). Together, the two resources provide a consistent set of programmatic and interactive interfaces to a rich range of data including genome sequence, gene models, transcript sequence, genetic variation, and comparative analysis. This paper provides an update to the previous publications about the resource, with a focus on recent developments and expansions. These include the incorporation of almost 20 000 additional genome sequences and over 35 000 tracks of RNA-Seq data, which have been aligned to genomic sequence and made available for visualization. Other advances since 2015 include the release of the database in Resource Description Framework (RDF) format, a large increase in community-derived curation, a new high-performance protein sequence search, additional cross-references, improved annotation of non-protein-coding genes, and the launch of pre-release and archival sites. Collectively, these changes are part of a continuing response to the increasing quantity of publicly-available genome-scale data, and the consequent need to archive, integrate, annotate and disseminate these using automated, scalable methods.",Ensembl Genomes,,http://www.ensemblgenomes.org,"an integrating resource for genome-scale data from non-vertebrate species, complementing the resources for vertebrate genomics developed in the Ensembl project (http://www.ensembl.org)" +29092050,Ensembl Genomes 2018: an integrated omics infrastructure for non-vertebrate species.,"Ensembl Genomes (http://www.ensemblgenomes.org) is an integrating resource for genome-scale data from non-vertebrate species, complementing the resources for vertebrate genomics developed in the Ensembl project (http://www.ensembl.org). Together, the two resources provide a consistent set of programmatic and interactive interfaces to a rich range of data including genome sequence, gene models, transcript sequence, genetic variation, and comparative analysis. This paper provides an update to the previous publications about the resource, with a focus on recent developments and expansions. These include the incorporation of almost 20 000 additional genome sequences and over 35 000 tracks of RNA-Seq data, which have been aligned to genomic sequence and made available for visualization. Other advances since 2015 include the release of the database in Resource Description Framework (RDF) format, a large increase in community-derived curation, a new high-performance protein sequence search, additional cross-references, improved annotation of non-protein-coding genes, and the launch of pre-release and archival sites. Collectively, these changes are part of a continuing response to the increasing quantity of publicly-available genome-scale data, and the consequent need to archive, integrate, annotate and disseminate these using automated, scalable methods.",Ensembl Genomes,,http://www.ensemblgenomes.org,an integrated omics infrastructure for non-vertebrate species +29092072,Mouse Genome Database (MGD)-2018: knowledgebase for the laboratory mouse.,"The Mouse Genome Database (MGD; http://www.informatics.jax.org) is the key community mouse database which supports basic, translational and computational research by providing integrated data on the genetics, genomics, and biology of the laboratory mouse. MGD serves as the source for biological reference data sets related to mouse genes, gene functions, phenotypes and disease models with an increasing emphasis on the association of these data to human biology and disease. We report here on recent enhancements to this resource, including improved access to mouse disease model and human phenotype data and enhanced relationships of mouse models to human disease.",Mouse Genome Database,MGD,http://www.informatics.jax.org,"the key community mouse database which supports basic, translational and computational research by providing integrated data on the genetics, genomics, and biology of the laboratory mouse" +29092072,Mouse Genome Database (MGD)-2018: knowledgebase for the laboratory mouse.,"The Mouse Genome Database (MGD; http://www.informatics.jax.org) is the key community mouse database which supports basic, translational and computational research by providing integrated data on the genetics, genomics, and biology of the laboratory mouse. MGD serves as the source for biological reference data sets related to mouse genes, gene functions, phenotypes and disease models with an increasing emphasis on the association of these data to human biology and disease. We report here on recent enhancements to this resource, including improved access to mouse disease model and human phenotype data and enhanced relationships of mouse models to human disease.",Mouse Genome Database,MGD,http://www.informatics.jax.org,knowledgebase for the laboratory mouse +29121237,Human Ageing Genomic Resources: new and updated databases.,"In spite of a growing body of research and data, human ageing remains a poorly understood process. Over 10 years ago we developed the Human Ageing Genomic Resources (HAGR), a collection of databases and tools for studying the biology and genetics of ageing. Here, we present HAGR's main functionalities, highlighting new additions and improvements. HAGR consists of six core databases: (i) the GenAge database of ageing-related genes, in turn composed of a dataset of >300 human ageing-related genes and a dataset with >2000 genes associated with ageing or longevity in model organisms; (ii) the AnAge database of animal ageing and longevity, featuring >4000 species; (iii) the GenDR database with >200 genes associated with the life-extending effects of dietary restriction; (iv) the LongevityMap database of human genetic association studies of longevity with >500 entries; (v) the DrugAge database with >400 ageing or longevity-associated drugs or compounds; (vi) the CellAge database with >200 genes associated with cell senescence. All our databases are manually curated by experts and regularly updated to ensure a high quality data. Cross-links across our databases and to external resources help researchers locate and integrate relevant information. HAGR is freely available online (http://genomics.senescence.info/).",Human Ageing Genomic Resources,HAGR,http://genomics.senescence.info/,a collection of databases and tools for studying the biology and genetics of ageing +29126285,ReMap 2018: an updated atlas of regulatory regions from an integrative analysis of DNA-binding ChIP-seq experiments.,"With this latest release of ReMap (http://remap.cisreg.eu), we present a unique collection of regulatory regions in human, as a result of a large-scale integrative analysis of ChIP-seq experiments for hundreds of transcriptional regulators (TRs) such as transcription factors, transcriptional co-activators and chromatin regulators. In 2015, we introduced the ReMap database to capture the genome regulatory space by integrating public ChIP-seq datasets, covering 237 TRs across 13 million (M) peaks. In this release, we have extended this catalog to constitute a unique collection of regulatory regions. Specifically, we have collected, analyzed and retained after quality control a total of 2829 ChIP-seq datasets available from public sources, covering a total of 485 TRs with a catalog of 80M peaks. Additionally, the updated database includes new search features for TR names as well as aliases, including cell line names and the ability to navigate the data directly within genome browsers via public track hubs. Finally, full access to this catalog is available online together with a TR binding enrichment analysis tool. ReMap 2018 provides a significant update of the ReMap database, providing an in depth view of the complexity of the regulatory landscape in human.",ReMap,ReMap,http://remap.cisreg.eu,an updated atlas of regulatory regions from an integrative analysis of DNA-binding ChIP-seq experiments +29126312,MeT-DB V2.0: elucidating context-specific functions of N6-methyl-adenosine methyltranscriptome.,"Methyltranscriptome is an exciting new area that studies the mechanisms and functions of methylation in transcripts. A knowledge base with the systematic collection and curation of context specific transcriptome-wide methylations is critical for elucidating their biological functions as well as for developing bioinformatics tools. Since its inception in 2014, the Met-DB (Liu, H., Flores, M.A., Meng, J., Zhang, L., Zhao, X., Rao, M.K., Chen, Y. and Huang, Y. (2015) MeT-DB: a database of transcriptome methylation in mammalian cells. Nucleic Acids Res., 43, D197-D203), has become an important resource for methyltranscriptome, especially in the N6-methyl-adenosine (m6A) research community. Here, we report Met-DB v2.0, the significantly improved second version of Met-DB, which is entirely redesigned to focus more on elucidating context-specific m6A functions. Met-DB v2.0 has a major increase in context-specific m6A peaks and single-base sites predicted from 185 samples for 7 species from 26 independent studies. Moreover, it is also integrated with a new database for targets of m6A readers, erasers and writers and expanded with more collections of functional data. The redesigned Met-DB v2.0 web interface and genome browser provide more friendly, powerful, and informative ways to query and visualize the data. More importantly, MeT-DB v2.0 offers for the first time a series of tools specifically designed for understanding m6A functions. Met-DB V2.0 will be a valuable resource for m6A methyltranscriptome research. The Met-DB V2.0 database is available at http://compgenomics.utsa.edu/MeTDB/ and http://www.xjtlu.edu.cn/metdb2.",MeT-DB V2.0,MeT-DB,http://compgenomics.utsa.edu/MeTDB/,a database of transcriptome methylation in mammalian cells +29126312,MeT-DB V2.0: elucidating context-specific functions of N6-methyl-adenosine methyltranscriptome.,"Methyltranscriptome is an exciting new area that studies the mechanisms and functions of methylation in transcripts. A knowledge base with the systematic collection and curation of context specific transcriptome-wide methylations is critical for elucidating their biological functions as well as for developing bioinformatics tools. Since its inception in 2014, the Met-DB (Liu, H., Flores, M.A., Meng, J., Zhang, L., Zhao, X., Rao, M.K., Chen, Y. and Huang, Y. (2015) MeT-DB: a database of transcriptome methylation in mammalian cells. Nucleic Acids Res., 43, D197-D203), has become an important resource for methyltranscriptome, especially in the N6-methyl-adenosine (m6A) research community. Here, we report Met-DB v2.0, the significantly improved second version of Met-DB, which is entirely redesigned to focus more on elucidating context-specific m6A functions. Met-DB v2.0 has a major increase in context-specific m6A peaks and single-base sites predicted from 185 samples for 7 species from 26 independent studies. Moreover, it is also integrated with a new database for targets of m6A readers, erasers and writers and expanded with more collections of functional data. The redesigned Met-DB v2.0 web interface and genome browser provide more friendly, powerful, and informative ways to query and visualize the data. More importantly, MeT-DB v2.0 offers for the first time a series of tools specifically designed for understanding m6A functions. Met-DB V2.0 will be a valuable resource for m6A methyltranscriptome research. The Met-DB V2.0 database is available at http://compgenomics.utsa.edu/MeTDB/ and http://www.xjtlu.edu.cn/metdb2.",MeT-DB V2.0,MeT-DB,http://www.xjtlu.edu.cn/metdb2,a database of transcriptome methylation in mammalian cells +29145629,The Reactome Pathway Knowledgebase.,"The Reactome Knowledgebase (https://reactome.org) provides molecular details of signal transduction, transport, DNA replication, metabolism, and other cellular processes as an ordered network of molecular transformations-an extended version of a classic metabolic map, in a single consistent data model. Reactome functions both as an archive of biological processes and as a tool for discovering unexpected functional relationships in data such as gene expression profiles or somatic mutation catalogues from tumor cells. To support the continued brisk growth in the size and complexity of Reactome, we have implemented a graph database, improved performance of data analysis tools, and designed new data structures and strategies to boost diagram viewer performance. To make our website more accessible to human users, we have improved pathway display and navigation by implementing interactive Enhanced High Level Diagrams (EHLDs) with an associated icon library, and subpathway highlighting and zooming, in a simplified and reorganized web site with adaptive design. To encourage re-use of our content, we have enabled export of pathway diagrams as 'PowerPoint' files.",Reactome Knowledgebase,,https://reactome.org, +29145629,The Reactome Pathway Knowledgebase.,"The Reactome Knowledgebase (https://reactome.org) provides molecular details of signal transduction, transport, DNA replication, metabolism, and other cellular processes as an ordered network of molecular transformations-an extended version of a classic metabolic map, in a single consistent data model. Reactome functions both as an archive of biological processes and as a tool for discovering unexpected functional relationships in data such as gene expression profiles or somatic mutation catalogues from tumor cells. To support the continued brisk growth in the size and complexity of Reactome, we have implemented a graph database, improved performance of data analysis tools, and designed new data structures and strategies to boost diagram viewer performance. To make our website more accessible to human users, we have improved pathway display and navigation by implementing interactive Enhanced High Level Diagrams (EHLDs) with an associated icon library, and subpathway highlighting and zooming, in a simplified and reorganized web site with adaptive design. To encourage re-use of our content, we have enabled export of pathway diagrams as 'PowerPoint' files.",Reactome Pathway Knowledgebase,,https://reactome.org, +29155231,RTFAdb: A database of computationally predicted associations between retrotransposons and transcription factors in the human and mouse genomes.,"In recent years, retrotransposons have gained increasing attention as a source of binding motifs for transcription factors (TFs). Despite the substantial roles of these mobile genetic elements in the regulation of gene expression, a comprehensive resource enabling the investigation of retrotransposon species that are bound by TFs is still lacking. Herein, I introduce for the first time a novel database called RTFAdb, which allows exploring computationally predicted associations between retrotransposons and TFs in diverse cell lines and tissues of human and mouse. My database, using over 3.000 TF ChIP-seq binding profiles collected from human and mouse samples, makes possible searching more than 1.500 retrotransposon species in the binding sites of a total of 596 TFs. RTFAdb is freely available at http://tools.ibg.deu.edu.tr/rtfa/ and has the potential to offer novel insights into mammalian transcriptional networks by providing an additional layer of information regarding the regulatory roles of retrotransposons.",RTFAdb,RTFAdb,http://tools.ibg.deu.edu.tr/rtfa/,A database of computationally predicted associations between retrotransposons and transcription factors in the human and mouse genomes +29209336,DRDB: An Online Date Palm Genomic Resource Database.,"Background: Date palm (Phoenix dactylifera L.) is a cultivated woody plant with agricultural and economic importance in many countries around the world. With the advantages of next generation sequencing technologies, genome sequences for many date palm cultivars have been released recently. Short sequence repeat (SSR) and single nucleotide polymorphism (SNP) can be identified from these genomic data, and have been proven to be very useful biomarkers in plant genome analysis and breeding. Results: Here, we first improved the date palm genome assembly using 130X of HiSeq data generated in our lab. Then 246,445 SSRs (214,901 SSRs and 31,544 compound SSRs) were annotated in this genome assembly; among the SSRs, mononucleotide SSRs (58.92%) were the most abundant, followed by di- (29.92%), tri- (8.14%), tetra- (2.47%), penta- (0.36%), and hexa-nucleotide SSRs (0.19%). The high-quality PCR primer pairs were designed for most (174,497; 70.81% out of total) SSRs. We also annotated 6,375,806 SNPs with raw read depth=3 in 90% cultivars. To further reduce false positive SNPs, we only kept 5,572,650 (87.40% out of total) SNPs with at least 20% cultivars support for downstream analyses. The high-quality PCR primer pairs were also obtained for 4,177,778 (65.53%) SNPs. We reconstructed the phylogenetic relationships among the 62 cultivars using these variants and found that they can be divided into three clusters, namely North Africa, Egypt - Sudan, and Middle East - South Asian, with Egypt - Sudan being the admixture of North Africa and Middle East - South Asian cultivars; we further confirmed these clusters using principal component analysis. Moreover, 34,346 SSRs and 4,177,778 SNPs with PCR primers were assigned to shared cultivars for cultivar classification and diversity analysis. All these SSRs, SNPs and their classification are available in our database, and can be used for cultivar identification, comparison, and molecular breeding. Conclusion:DRDB is a comprehensive genomic resource database of date palm. It can serve as a bioinformatics platform for date palm genomics, genetics, and molecular breeding. DRDB is freely available at http://drdb.big.ac.cn/home.",Date Palm Genomic Resource Database,DRDB,http://drdb.big.ac.cn/home,a comprehensive genomic resource database of date palm +29315358,HTT-DB: new features and updates.,"Horizontal Transfer (HT) of genetic material between species is a common phenomenon among Bacteria and Archaea species and several databases are available for information retrieval and data mining. However, little attention has been given to this phenomenon among eukaryotic species mainly due to the lower proportion of these events. In the last years, a vertiginous amount of new HT events involving eukaryotic species was reported in the literature, highlighting the need of a common repository to keep the scientific community up to date and describe overall trends. Recently, we published the first HT database focused on HT of transposable elements among eukaryotes: the Horizontal Transposon Transfer DataBase (http://lpa.saogabriel.unipampa.edu.br: 8080/httdatabase/). Here, we present new features and updates of this unique database: (i) its expansion to include virus-host exchange of genetic material, which we called Horizontal Virus Transfer (HVT) and (ii) the availability of a web server for HT detection, where we implemented the online version of vertical and horizontal inheritance consistence analysis (VHICA), an R package developed for HT detection. These improvements will help researchers to navigate through known HVT cases, take data-informed decision and export figures based on keywords searches. Moreover, the availability of the VHICA as an online tool will make this software easily reachable even for researchers with no or little computation knowledge as well as foster our capability to detect new HT events in a wide variety of taxa. (Database URL: http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase/).",Horizontal Transposon Transfer DataBase,HTT-DB,http://lpa.saogabriel.unipampa.edu.br: 8080/httdatabase/,the first HT database focused on HT of transposable elements among eukaryotes +29401218,MutHTP: mutations in human transmembrane proteins.,"Motivation:Existing sources of experimental mutation data do not consider the structural environment of amino acid substitutions and distinguish between soluble and membrane proteins. They also suffer from a number of further limitations, including data redundancy, lack of disease classification, incompatible information content, and ambiguous annotations (e.g. the same mutation being annotated as disease and benign). Results:We have developed a novel database, MutHTP, which contains information on 183 395 disease-associated and 17 827 neutral mutations in human transmembrane proteins. For each mutation site MutHTP provides a description of its location with respect to the membrane protein topology, structural environment (if available) and functional features. Comprehensive visualization, search, display and download options are available. Availability and implementation:The database is publicly available at http://www.iitm.ac.in/bioinfo/MutHTP/. The website is implemented using HTML, PHP and javascript and supports recent versions of all major browsers, such as Firefox, Chrome and Opera. Supplementary information:Supplementary data are available at Bioinformatics online.",mutations in human transmembrane proteins,MutHTP,http://www.iitm.ac.in/bioinfo/MutHTP/, +29455297,Pan European Phenological database (PEP725): a single point of access for European data.,"The Pan European Phenology (PEP) project is a European infrastructure to promote and facilitate phenological research, education, and environmental monitoring. The main objective is to maintain and develop a Pan European Phenological database (PEP725) with an open, unrestricted data access for science and education. PEP725 is the successor of the database developed through the COST action 725 """"Establishing a European phenological data platform for climatological applications"""" working as a single access point for European-wide plant phenological data. So far, 32 European meteorological services and project partners from across Europe have joined and supplied data collected by volunteers from 1868 to the present for the PEP725 database. Most of the partners actively provide data on a regular basis. The database presently holds almost 12 million records, about 46 growing stages and 265 plant species (including cultivars), and can be accessed via http://www.pep725.eu/ . Users of the PEP725 database have studied a diversity of topics ranging from climate change impact, plant physiological question, phenological modeling, and remote sensing of vegetation to ecosystem productivity.",Pan European Phenological database,PEP725,http://www.pep725.eu/, +29617941,A reference peptide database for proteome quantification based on experimental mass spectrum response curves.,"

Motivation

Mass spectrometry (MS) based quantification of proteins/peptides has become a powerful tool in biological research with high sensitivity and throughput. The accuracy of quantification, however, has been problematic as not all peptides are suitable for quantification. Several methods and tools have been developed to identify peptides that response well in mass spectrometry and they are mainly based on predictive models, and rarely consider the linearity of the response curve, limiting the accuracy and applicability of the methods. An alternative solution is to select empirically superior peptides that offer satisfactory MS response intensity and linearity in a wide dynamic range of peptide concentration.

Results

We constructed a reference database for proteome quantification based on experimental mass spectrum response curves. The intensity and dynamic range of over 2 647 773 transitions from 121 318 peptides were obtained from a set of dilution experiments, covering 11 040 gene products. These transitions and peptides were evaluated and presented in a database named SCRIPT-MAP. We showed that the best-responder (BR) peptide approach for quantification based on SCRIPT-MAP database is robust, repeatable and accurate in proteome-scale protein quantification. This study provides a reference database as well as a peptides/transitions selection method for quantitative proteomics.

Availability and implementation

SCRIPT-MAP database is available at http://www.firmiana.org/responders/.

Supplementary information

Supplementary data are available at Bioinformatics online.",SCRIPT-MAP,SCRIPT-MAP,http://www.firmiana.org/responders/,A reference peptide database for proteome quantification based on experimental mass spectrum response curves +29662024,"PKIDB: A Curated, Annotated and Updated Database of Protein Kinase Inhibitors in Clinical Trials.","The number of protein kinase inhibitors (PKIs) approved worldwide continues to grow steadily, with 39 drugs approved in the period between 2001 and January 2018. PKIs on the market have been the subject of many reviews, and structure-property relationships specific to this class of drugs have been inferred. However, the large number of PKIs under development is often overlooked. In this paper, we present PKIDB (Protein Kinase Inhibitor Database), a monthly-updated database gathering approved PKIs as well as PKIs currently in clinical trials. The database compiles currently 180 inhibitors ranging from phase 0 to 4 clinical trials along with annotations extracted from seven public resources. The distribution and property ranges of standard physicochemical properties are presented. They can be used as filters to better prioritize compound selection for future screening campaigns. Interestingly, more than one-third of the kinase inhibitors violate at least one Lipinski's rule. A Principal Component Analysis (PCA) reveals that Type-II inhibitors are mapped to a distinct chemical space as compared to orally administrated drugs as well as to other types of kinase inhibitors. Using a Principal Moment of Inertia (PMI) analysis, we show that PKIs under development tend to explore new shape territories as compared to approved PKIs. In order to facilitate the analysis of the protein space, the kinome tree has been annotated with all protein kinases being targeted by PKIs. Finally, we analyzed the pipeline of the pharmaceutical companies having PKIs on the market or still under development. We hope that this work will assist researchers in the kinase field in identifying and designing the next generation of kinase inhibitors for still untargeted kinases. The PKIDB database is freely accessible from a website at http://www.icoa.fr/pkidb and can be easily browsed through a user-friendly spreadsheet-like interface.",Protein Kinase Inhibitor Database,PKIDB,http://www.icoa.fr/pkidb,a monthly-updated database gathering approved PKIs as well as PKIs currently in clinical trials +29662024,"PKIDB: A Curated, Annotated and Updated Database of Protein Kinase Inhibitors in Clinical Trials.","The number of protein kinase inhibitors (PKIs) approved worldwide continues to grow steadily, with 39 drugs approved in the period between 2001 and January 2018. PKIs on the market have been the subject of many reviews, and structure-property relationships specific to this class of drugs have been inferred. However, the large number of PKIs under development is often overlooked. In this paper, we present PKIDB (Protein Kinase Inhibitor Database), a monthly-updated database gathering approved PKIs as well as PKIs currently in clinical trials. The database compiles currently 180 inhibitors ranging from phase 0 to 4 clinical trials along with annotations extracted from seven public resources. The distribution and property ranges of standard physicochemical properties are presented. They can be used as filters to better prioritize compound selection for future screening campaigns. Interestingly, more than one-third of the kinase inhibitors violate at least one Lipinski's rule. A Principal Component Analysis (PCA) reveals that Type-II inhibitors are mapped to a distinct chemical space as compared to orally administrated drugs as well as to other types of kinase inhibitors. Using a Principal Moment of Inertia (PMI) analysis, we show that PKIs under development tend to explore new shape territories as compared to approved PKIs. In order to facilitate the analysis of the protein space, the kinome tree has been annotated with all protein kinases being targeted by PKIs. Finally, we analyzed the pipeline of the pharmaceutical companies having PKIs on the market or still under development. We hope that this work will assist researchers in the kinase field in identifying and designing the next generation of kinase inhibitors for still untargeted kinases. The PKIDB database is freely accessible from a website at http://www.icoa.fr/pkidb and can be easily browsed through a user-friendly spreadsheet-like interface.",Protein Kinase Inhibitor Database,PKIDB,http://www.icoa.fr/pkidb,"A Curated, Annotated and Updated Database of Protein Kinase Inhibitors in Clinical Trials" +29743053,PDXliver: a database of liver cancer patient derived xenograft mouse models.,"

Background

Liver cancer is the second leading cause of cancer-related deaths and characterized by heterogeneity and drug resistance. Patient-derived xenograft (PDX) models have been widely used in cancer research because they reproduce the characteristics of original tumors. However, the current studies of liver cancer PDX mice are scattered and the number of available PDX models are too small to represent the heterogeneity of liver cancer patients. To improve this situation and to complement available PDX models related resources, here we constructed a comprehensive database, PDXliver, to integrate and analyze liver cancer PDX models.

Description

Currently, PDXliver contains 116 PDX models from Chinese liver cancer patients, 51 of them were established by the in-house PDX platform and others were curated from the public literatures. These models are annotated with complete information, including clinical characteristics of patients, genome-wide expression profiles, germline variations, somatic mutations and copy number alterations. Analysis of expression subtypes and mutated genes show that PDXliver represents the diversity of human patients. Another feature of PDXliver is storing drug response data of PDX mice, which makes it possible to explore the association between molecular profiles and drug sensitivity. All data can be accessed via the Browse and Search pages. Additionally, two tools are provided to interactively visualize the omics data of selected PDXs or to compare two groups of PDXs.

Conclusion

As far as we known, PDXliver is the first public database of liver cancer PDX models. We hope that this comprehensive resource will accelerate the utility of PDX models and facilitate liver cancer research. The PDXliver database is freely available online at: http://www.picb.ac.cn/PDXliver/.",PDXliver,PDXliver,http://www.picb.ac.cn/PDXliver/,a database of liver cancer patient derived xenograft mouse models +29761459,Mouse Genome Informatics (MGI) Is the International Resource for Information on the Laboratory Mouse.,"Mouse Genome Informatics (MGI, http://www.informatics.jax.org/ ) web resources provide free access to meticulously curated information about the laboratory mouse. MGI's primary goal is to help researchers investigate the genetic foundations of human diseases by translating information from mouse phenotypes and disease models studies to human systems. MGI provides comprehensive phenotypes for over 50,000 mutant alleles in mice and provides experimental model descriptions for over 1500 human diseases. Curated data from scientific publications are integrated with those from high-throughput phenotyping and gene expression centers. Data are standardized using defined, hierarchical vocabularies such as the Mammalian Phenotype (MP) Ontology, Mouse Developmental Anatomy and the Gene Ontologies (GO). This chapter introduces you to Gene and Allele Detail pages and provides step-by-step instructions for simple searches and those that take advantage of the breadth of MGI data integration.",Mouse Genome Informatics,MGI,http://www.informatics.jax.org/,the International Resource for Information on the Laboratory Mouse +29890119,FlyXCDB-A Resource for Drosophila Cell Surface and Secreted Proteins and Their Extracellular Domains.,"Genomes of metazoan organisms possess a large number of genes encoding cell surface and secreted (CSS) proteins that carry out crucial functions in cell adhesion and communication, signal transduction, extracellular matrix establishment, nutrient digestion and uptake, immunity, and developmental processes. We developed the FlyXCDB database (http://prodata.swmed.edu/FlyXCDB) that provides a comprehensive resource to investigate extracellular (XC) domains in CSS proteins of Drosophila melanogaster, the most studied insect model organism in various aspects of animal biology. More than 300 Drosophila XC domains were discovered in Drosophila CSS proteins encoded by over 2500 genes through analyses of computational predictions of signal peptide, transmembrane (TM) segment, and GPI-anchor signal sequence, profile-based sequence similarity searches, gene ontology, and literature. These domains were classified into six classes mainly based on their molecular functions, including protein-protein interactions (class P), signaling molecules (class S), binding of non-protein molecules or groups (class B), enzyme homologs (class E), enzyme regulation and inhibition (class R), and unknown molecular function (class U). Main cellular functions such as cell adhesion, cell signaling, and extracellular matrix composition were described for the most abundant domains in each functional class. We assigned cell membrane topology categories (E, secreted; S, type I/III single-pass TM; T, type II single-pass TM; M, multi-pass TM; and G, GPI-anchored) to the products of genes with XC domains and investigated their regulation by mechanisms such as alternative splicing and stop codon readthrough.",FlyXCDB database,FlyXCDB,http://prodata.swmed.edu/FlyXCDB,A Resource for Drosophila Cell Surface and Secreted Proteins and Their Extracellular Domains +29897484,ILDgenDB: integrated genetic knowledge resource for interstitial lung diseases (ILDs).,"Interstitial lung diseases (ILDs) are a diverse group of ~200 acute and chronic pulmonary disorders that are characterized by variable amounts of inflammation, fibrosis and architectural distortion with substantial morbidity and mortality. Inaccurate and delayed diagnoses increase the risk, especially in developing countries. Studies have indicated the significant roles of genetic elements in ILDs pathogenesis. Therefore, the first genetic knowledge resource, ILDgenDB, has been developed with an objective to provide ILDs genetic data and their integrated analyses for the better understanding of disease pathogenesis and identification of diagnostics-based biomarkers. This resource contains literature-curated disease candidate genes (DCGs) enriched with various regulatory elements that have been generated using an integrated bioinformatics workflow of databases searches, literature-mining and DCGs-microRNA (miRNAs)-single nucleotide polymorphisms (SNPs) association analyses. To provide statistical significance to disease-gene association, ILD-specificity index and hypergeomatric test scores were also incorporated. Association analyses of miRNAs, SNPs and pathways responsible for the pathogenesis of different sub-classes of ILDs were also incorporated. Manually verified 299 DCGs and their significant associations with 1932 SNPs, 2966 miRNAs and 9170 miR-polymorphisms were also provided. Furthermore, 216 literature-mined and proposed biomarkers were identified. The ILDgenDB resource provides user-friendly browsing and extensive query-based information retrieval systems. Additionally, this resource also facilitates graphical view of predicted DCGs-SNPs/miRNAs and literature associated DCGs-ILDs interactions for each ILD to facilitate efficient data interpretation. Outcomes of analyses suggested the significant involvement of immune system and defense mechanisms in ILDs pathogenesis. This resource may potentially facilitate genetic-based disease monitoring and diagnosis.Database URL: http://14.139.240.55/ildgendb/index.php.",ILDgenDB,ILDgenDB,http://14.139.240.55/ildgendb/index.php,integrated genetic knowledge resource for interstitial lung diseases (ILDs) +29961819,dbLGL: an online leukemia gene and literature database for the retrospective comparison of adult and childhood leukemia genetics with literature evidence.,"Leukemia is a group of cancers with increased numbers of immature or abnormal leucocytes that originated in the bone marrow and other blood-forming organs. The development of differentially diagnostic biomarkers for different subtypes largely depends on understanding the biological pathways and regulatory mechanisms associated with leukemia-implicated genes. Unfortunately, the leukemia-implicated genes that have been identified thus far are scattered among thousands of published studies, and no systematic summary of the differences between adult and childhood leukemia exists with regard to the causative genetic mutations and genetic mechanisms of the various subtypes. In this study, we performed a systematic literature review of those susceptibility genes reported in small-scale experiments and built an online gene database containing a total of 1805 leukemia-associated genes, available at http://soft.bioinfo-minzhao.org/lgl/. Our comparison of genes from the four primary subtypes and between adult and childhood cases identified a number of potential genes related to patient survival. These curated genes can satisfy a growing demand for further integrating genomics screening for leukemia-associated low-frequency mutated genes.Database URL: http://soft.bioinfo-minzhao.org/lgl/.",dbLGL,dbLGL,http://soft.bioinfo-minzhao.org/lgl/,an online leukemia gene and literature database for the retrospective comparison of adult and childhood leukemia genetics with literature evidence +30150996,AromaDb: A Database of Medicinal and Aromatic Plant's Aroma Molecules With Phytochemistry and Therapeutic Potentials.,"In traditional, herbal medicine, and aromatherapy, use of essential oils and their aroma compounds have been known since long, for the management of various human diseases. The essential oil is a mixture of highly complex, naturally occurring volatile aroma compounds synthesized by medicinal and aromatic plants as secondary metabolites. Essential oils widely used in pharmaceutical, cosmetic, sanitary, food industry and agriculture for their antibacterial, antiviral, antifungal, antiparasitic, insecticidal, anticancer, neuroprotective, psychophysiological, and anti-aging activities. Moreover, volatile aroma compounds comprise a chemically diverse class of low molecular weight organic compounds with significant vapor pressure. However, aroma compounds produced by plants, mainly attract pollinators, seed dispersers and provide defense against pests or pathogens. However, in humans, about 300 active olfactory receptor genes are involved to detect thousands of different aroma compounds and modulates expression of different metabolic genes regulating human psychophysiological activity, brain function, pharmacological signaling, and therapeutic potential. Keeping in mind this importance, present database, namely, AromaDb (http://bioinfo.cimap.res.in/aromadb/) covers information of plant varieties/chemotypes, essential oils, chemical constituents, GC-MS profile, yield variations due to agro-morphological parameters, trade data, aroma compounds, fragrance type, and bioactivity details. The database includes 1,321 aroma chemical structures, bioactivities of essential oil/aroma compounds, 357 fragrance type, 166 commercially used plants, and their high yielding 148 varieties/chemotypes. Also includes calculated cheminformatics properties related to identification, physico-chemical properties, pharmacokinetics, toxicological, and ecological information. Also comprises interacted human genes affecting various diseases related cell signaling pathways correlating the use of aromatherapy. This database could be a useful resource to the plant's growers/producers, an aroma/fragrance industrialist, health professionals, and researchers exploring the potential of essential oils and aroma compounds in the development of novel formulations against human diseases.",AromaDb,AromaDb,http://bioinfo.cimap.res.in/aromadb/,A Database of Medicinal and Aromatic Plant's Aroma Molecules With Phytochemistry and Therapeutic Potentials +30152276,HYPO: A Database of Human Hypothetical Proteins.,"

Background

There are genes whose function remains obscure as they may not have similarities to known regions in the genome. Such known 'unknown' genes constituting the Open Reading Frames (ORF) that remain in the epigenome are termed as orphan genes and the proteins encoded by them but having no experimental evidence of translation are termed as 'Hypothetical Proteins' (HPs).

Objectives

We have enhanced our former database of Hypothetical Proteins (HP) in human (HypoDB) with added annotation, application programming interfaces and descriptive features. The database hosts 1000+ manually curated records of the known 'unknown' regions in the human genome. The new updated version of HypoDB with functionalities (Blast, Match) is freely accessible at http://www.bioclues.org/hypo2.

Methods

The total collection of HPs were checked using experimentally validated sets (from Swiss-Prot) or non-experimentally validated set (TrEMBL) or the complete set (UniProtKB). The database was designed with java at the core backend, integrated with databases, viz. EMBL, PIR, HPRD and those including descriptors for structural databases, interaction and association databases.

Results

The HypoDB constituted Application Programming Interfaces (API) for implicitly searching resources linking them to other databases like NCBI Link-out in addition to multiple search capabilities along with advanced searches using integrated bio-tools, viz. Match and BLAST were incorporated.

Conclusion

The HypoDB is perhaps the only open-source HP database with a range of tools for common bioinformatics retrievals and serves as a standby reference to researchers who are interested in finding candidate sequences for their potential experimental work.",HypoDB,HypoDB,http://www.bioclues.org/hypo2,A Database of Human Hypothetical Proteins +30152276,HYPO: A Database of Human Hypothetical Proteins.,"

Background

There are genes whose function remains obscure as they may not have similarities to known regions in the genome. Such known 'unknown' genes constituting the Open Reading Frames (ORF) that remain in the epigenome are termed as orphan genes and the proteins encoded by them but having no experimental evidence of translation are termed as 'Hypothetical Proteins' (HPs).

Objectives

We have enhanced our former database of Hypothetical Proteins (HP) in human (HypoDB) with added annotation, application programming interfaces and descriptive features. The database hosts 1000+ manually curated records of the known 'unknown' regions in the human genome. The new updated version of HypoDB with functionalities (Blast, Match) is freely accessible at http://www.bioclues.org/hypo2.

Methods

The total collection of HPs were checked using experimentally validated sets (from Swiss-Prot) or non-experimentally validated set (TrEMBL) or the complete set (UniProtKB). The database was designed with java at the core backend, integrated with databases, viz. EMBL, PIR, HPRD and those including descriptors for structural databases, interaction and association databases.

Results

The HypoDB constituted Application Programming Interfaces (API) for implicitly searching resources linking them to other databases like NCBI Link-out in addition to multiple search capabilities along with advanced searches using integrated bio-tools, viz. Match and BLAST were incorporated.

Conclusion

The HypoDB is perhaps the only open-source HP database with a range of tools for common bioinformatics retrievals and serves as a standby reference to researchers who are interested in finding candidate sequences for their potential experimental work.",HYPO,HYPO,http://www.bioclues.org/hypo2,A Database of Human Hypothetical Proteins +30223042,TSNAdb: A Database for Tumor-specific Neoantigens from Immunogenomics Data Analysis.,"Tumor-specific neoantigens have attracted much attention since they can be used as biomarkers to predict therapeutic effects of immune checkpoint blockade therapy and as potential targets for cancer immunotherapy. In this study, we developed a comprehensive tumor-specific neoantigen database (TSNAdb v1.0), based on pan-cancer immunogenomic analyses of somatic mutation data and human leukocyte antigen (HLA) allele information for 16 tumor types with 7748 tumor samples from The Cancer Genome Atlas (TCGA) and The Cancer Immunome Atlas (TCIA). We predicted binding affinities between mutant/wild-type peptides and HLA class I molecules by NetMHCpan v2.8/v4.0, and presented detailed information of 3,707,562/1,146,961 potential neoantigens generated by somatic mutations of all tumor samples. Moreover, we employed recurrent mutations in combination with highly frequent HLA alleles to predict potential shared neoantigens across tumor patients, which would facilitate the discovery of putative targets for neoantigen-based cancer immunotherapy. TSNAdb is freely available at http://biopharm.zju.edu.cn/tsnadb.",tumor-specific neoantigen database,TSNAdb,http://biopharm.zju.edu.cn/tsnadb,A Database for Tumor-specific Neoantigens from Immunogenomics Data Analysis +30244175,PTMD: A Database of Human Disease-associated Post-translational Modifications.,"Various posttranslational modifications (PTMs) participate in nearly all aspects of biological processes by regulating protein functions, and aberrant states of PTMs are frequently implicated in human diseases. Therefore, an integral resource of PTM-disease associations (PDAs) would be a great help for both academic research and clinical use. In this work, we reported PTMD, a well-curated database containing PTMs that are associated with human diseases. We manually collected 1950 known PDAs in 749 proteins for 23 types of PTMs and 275 types of diseases from the literature. Database analyses show that phosphorylation has the largest number of disease associations, whereas neurologic diseases have the largest number of PTM associations. We classified all known PDAs into six classes according to the PTM status in diseases and demonstrated that the upregulation and presence of PTM events account for a predominant proportion of disease-associated PTM events. By reconstructing a disease-gene network, we observed that breast cancers have the largest number of associated PTMs and AKT1 has the largest number of PTMs connected to diseases. Finally, the PTMD database was developed with detailed annotations and can be a useful resource for further analyzing the relations between PTMs and human diseases. PTMD is freely accessible at http://ptmd.biocuckoo.org.",PTMD,PTMD,http://ptmd.biocuckoo.org,a well-curated database containing PTMs that are associated with human diseases +30244175,PTMD: A Database of Human Disease-associated Post-translational Modifications.,"Various posttranslational modifications (PTMs) participate in nearly all aspects of biological processes by regulating protein functions, and aberrant states of PTMs are frequently implicated in human diseases. Therefore, an integral resource of PTM-disease associations (PDAs) would be a great help for both academic research and clinical use. In this work, we reported PTMD, a well-curated database containing PTMs that are associated with human diseases. We manually collected 1950 known PDAs in 749 proteins for 23 types of PTMs and 275 types of diseases from the literature. Database analyses show that phosphorylation has the largest number of disease associations, whereas neurologic diseases have the largest number of PTM associations. We classified all known PDAs into six classes according to the PTM status in diseases and demonstrated that the upregulation and presence of PTM events account for a predominant proportion of disease-associated PTM events. By reconstructing a disease-gene network, we observed that breast cancers have the largest number of associated PTMs and AKT1 has the largest number of PTMs connected to diseases. Finally, the PTMD database was developed with detailed annotations and can be a useful resource for further analyzing the relations between PTMs and human diseases. PTMD is freely accessible at http://ptmd.biocuckoo.org.",PTMD,PTMD,http://ptmd.biocuckoo.org,A Database of Human Disease-associated Post-translational Modifications +30268934,GAAD: A Gene and Autoimmiune Disease Association Database.,"Autoimmune diseases (ADs) arise from an abnormal immune response of the body against substances and tissues normally present in the body. More than a hundred of ADs have been described in the literature so far. Although their etiology remains largely unclear, various types of ADs tend to share more associated genes with other types of ADs than with non-AD types. Here we present GAAD, a gene and AD association database. In GAAD, we collected 44,762 associations between 49 ADs and 4249 genes from public databases and MEDLINE documents. We manually verified the associations to ensure the quality and credibility. We reconstructed and recapitulated the relationships among ADs using their shared genes, which further validated the quality of our data. We also provided a list of significantly co-occurring gene pairs among ADs; with embedded tools, users can query gene co-occurrences and construct customized co-occurrence network with genes of interest. To make GAAD more straightforward to experimental biologists and medical scientists, we extracted additional information describing the associations through text mining, including the putative diagnostic value of the associations, type and position of gene polymorphisms, expression changes of implicated genes, as well as the phenotypical consequences, and grouped the associations accordingly. GAAD is freely available at http://gaad.medgenius.info.",Gene and Autoimmiune Disease Association Database,GAAD,http://gaad.medgenius.info,a gene and AD association database +30268934,GAAD: A Gene and Autoimmiune Disease Association Database.,"Autoimmune diseases (ADs) arise from an abnormal immune response of the body against substances and tissues normally present in the body. More than a hundred of ADs have been described in the literature so far. Although their etiology remains largely unclear, various types of ADs tend to share more associated genes with other types of ADs than with non-AD types. Here we present GAAD, a gene and AD association database. In GAAD, we collected 44,762 associations between 49 ADs and 4249 genes from public databases and MEDLINE documents. We manually verified the associations to ensure the quality and credibility. We reconstructed and recapitulated the relationships among ADs using their shared genes, which further validated the quality of our data. We also provided a list of significantly co-occurring gene pairs among ADs; with embedded tools, users can query gene co-occurrences and construct customized co-occurrence network with genes of interest. To make GAAD more straightforward to experimental biologists and medical scientists, we extracted additional information describing the associations through text mining, including the putative diagnostic value of the associations, type and position of gene polymorphisms, expression changes of implicated genes, as well as the phenotypical consequences, and grouped the associations accordingly. GAAD is freely available at http://gaad.medgenius.info.",Gene and Autoimmiune Disease Association Database,GAAD,http://gaad.medgenius.info,A Gene and Autoimmiune Disease Association Database +30371822,MatrixDB: integration of new data with a focus on glycosaminoglycan interactions.,"MatrixDB (http://matrixdb.univ-lyon1.fr/) is an interaction database focused on biomolecular interactions established by extracellular matrix (ECM) proteins and glycosaminoglycans (GAGs). It is an active member of the International Molecular Exchange (IMEx) consortium (https://www.imexconsortium.org/). It has adopted the HUPO Proteomics Standards Initiative standards for annotating and exchanging interaction data, either at the MIMIx (The Minimum Information about a Molecular Interaction eXperiment) or IMEx level. The following items related to GAGs have been added in the updated version of MatrixDB: (i) cross-references of GAG sequences to the GlyTouCan database, (ii) representation of GAG sequences in different formats (IUPAC and GlycoCT) and as SNFG (Symbol Nomenclature For Glycans) images and (iii) the GAG Builder online tool to build 3D models of GAG sequences from GlycoCT codes. The database schema has been improved to represent n-ary experiments. Gene expression data, imported from Expression Atlas (https://www.ebi.ac.uk/gxa/home), quantitative ECM proteomic datasets (http://matrisomeproject.mit.edu/ecm-atlas), and a new visualization tool of the 3D structures of biomolecules, based on the PDB Component Library and LiteMol, have also been added. A new advanced query interface now allows users to mine MatrixDB data using combinations of criteria, in order to build specific interaction networks related to diseases, biological processes, molecular functions or publications.",MatrixDB,MatrixDB,http://matrixdb.univ-lyon1.fr/,an interaction database focused on biomolecular interactions established by extracellular matrix (ECM) proteins and glycosaminoglycans (GAGs) +30380112,Translocatome: a novel resource for the analysis of protein translocation between cellular organelles.,"Here we present Translocatome, the first dedicated database of human translocating proteins (URL: http://translocatome.linkgroup.hu). The core of the Translocatome database is the manually curated data set of 213 human translocating proteins listing the source of their experimental validation, several details of their translocation mechanism, their local compartmentalized interactome, as well as their involvement in signalling pathways and disease development. In addition, using the well-established and widely used gradient boosting machine learning tool, XGBoost, Translocatome provides translocation probability values for 13 066 human proteins identifying 1133 and 3268 high- and low-confidence translocating proteins, respectively. The database has user-friendly search options with a UniProt autocomplete quick search and advanced search for proteins filtered by their localization, UniProt identifiers, translocation likelihood or data complexity. Download options of search results, manually curated and predicted translocating protein sets are available on its website. The update of the database is helped by its manual curation framework and connection to the previously published ComPPI compartmentalized protein-protein interaction database (http://comppi.linkgroup.hu). As shown by the application examples of merlin (NF2) and tumor protein 63 (TP63) Translocatome allows a better comprehension of protein translocation as a systems biology phenomenon and can be used as a discovery-tool in the protein translocation field.",Translocatome,Translocatome,http://translocatome.linkgroup.hu,the first dedicated database of human translocating proteins +30380112,Translocatome: a novel resource for the analysis of protein translocation between cellular organelles.,"Here we present Translocatome, the first dedicated database of human translocating proteins (URL: http://translocatome.linkgroup.hu). The core of the Translocatome database is the manually curated data set of 213 human translocating proteins listing the source of their experimental validation, several details of their translocation mechanism, their local compartmentalized interactome, as well as their involvement in signalling pathways and disease development. In addition, using the well-established and widely used gradient boosting machine learning tool, XGBoost, Translocatome provides translocation probability values for 13 066 human proteins identifying 1133 and 3268 high- and low-confidence translocating proteins, respectively. The database has user-friendly search options with a UniProt autocomplete quick search and advanced search for proteins filtered by their localization, UniProt identifiers, translocation likelihood or data complexity. Download options of search results, manually curated and predicted translocating protein sets are available on its website. The update of the database is helped by its manual curation framework and connection to the previously published ComPPI compartmentalized protein-protein interaction database (http://comppi.linkgroup.hu). As shown by the application examples of merlin (NF2) and tumor protein 63 (TP63) Translocatome allows a better comprehension of protein translocation as a systems biology phenomenon and can be used as a discovery-tool in the protein translocation field.",Translocatome,Translocatome,http://translocatome.linkgroup.hu,a novel resource for the analysis of protein translocation between cellular organelles +30652085,PDB_Amyloid: an extended live amyloid structure list from the PDB.,"The Protein Data Bank (PDB) contains more than 135 000 entries at present. From these, relatively few amyloid structures can be identified, since amyloids are insoluble in water. Therefore, most amyloid structures deposited in the PDB are in the form of solid state NMR data. Based on the geometric analysis of these deposited structures, we have prepared an automatically updated web server, which generates a list of the deposited amyloid structures, and also entries of globular proteins that have amyloid-like substructures of given size and characteristics. We have found that by applying only appropriately selected geometric conditions, it is possible to identify deposited amyloid structures and a number of globular proteins with amyloid-like substructures. We have analyzed these globular proteins and have found proof in the literature that many of them form amyloids more easily than many other globular proteins. Our results relate to the method of Stankovic et al. [Stankovic I et al. (2017) IPSI BgD Tran Int Res 13, 47-51], who applied a hybrid textual-search and geometric approach for finding amyloids in the PDB. If one intends to identify a subset of the PDB for certain applications, the identification algorithm needs to be re-run periodically, since in 2017 on average 30 new entries per day were deposited in the data bank. Our web server is updated regularly and automatically, and the identified amyloid and partial amyloid structures can be viewed or their list can be downloaded from the following website https://pitgroup.org/amyloid.",PDB_Amyloid,PDB_Amyloid,https://pitgroup.org/amyloid,an extended live amyloid structure list from the PDB +30674925,"Smooth Muscle Transcriptome Browser: offering genome-wide references and expression profiles of transcripts expressed in intestinal SMC, ICC, and PDGFRα+ cells.","Transcriptome data on the quantitative numbers of transcriptional variants expressed in primary cells offer essential clues into specific cellular functions and biological processes. We have previously collected transcriptomes from primary smooth muscle cells (SMC), interstitial cells of Cajal (ICC), and PDGFRa+ cells (fibroblast-like cells) isolated from murine jejunal and colonic smooth muscle and/or mucosal tissues as well as transcriptomes from the associated tissues (jejunal smooth muscle, colonic smooth muscle, and colonic mucosa). In this study, we have built the Smooth Muscle Transcriptome Browser (SMTB), https://med.unr.edu/physio/transcriptome , a web-based, graphical user interface that offers genetic references and expression profiles of all transcripts expressed at both the cellular (SMC, ICC, and PDGFRa+ cells) and tissue level (smooth muscle and mucosal tissue). This browser brings new insights into the cellular and biological functions of the cell types in gastrointestinal smooth muscle biology.",Smooth Muscle Transcriptome Browser,SMTB,https://med.unr.edu/physio/transcriptome,"a web-based, graphical user interface that offers genetic references and expression profiles of all transcripts expressed at both the cellular (SMC, ICC, and PDGFRa+ cells) and tissue level (smooth muscle and mucosal tissue)" +30715274,APID database: redefining protein-protein interaction experimental evidences and binary interactomes.,"The collection and integration of all the known protein-protein physical interactions within a proteome framework are critical to allow proper exploration of the protein interaction networks that drive biological processes in cells at molecular level. APID Interactomes is a public resource of biological data (http://apid.dep.usal.es) that provides a comprehensive and curated collection of `protein interactomes' for more than 1100 organisms, including 30 species with more than 500 interactions, derived from the integration of experimentally detected protein-to-protein physical interactions (PPIs). We have performed an update of APID database including a redefinition of several key properties of the PPIs to provide a more precise data integration and to avoid false duplicated records. This includes the unification of all the PPIs from five primary databases of molecular interactions (BioGRID, DIP, HPRD, IntAct and MINT), plus the information from two original systematic sources of human data and from experimentally resolved 3D structures (i.e. PDBs, Protein Data Bank files, where more than two distinct proteins have been identified). Thus, APID provides PPIs reported in published research articles (with traceable PMIDs) and detected by valid experimental interaction methods that give evidences about such protein interactions (following the `ontology and controlled vocabulary': www.ebi.ac.uk/ols/ontologies/mi; developed by `HUPO PSI-MI'). Within this data mining framework, all interaction detection methods have been grouped into two main types: (i) `binary' physical direct detection methods and (ii) `indirect' methods. As a result of these redefinitions, APID provides unified protein interactomes including the specific `experimental evidences' that support each PPI, indicating whether the interactions can be considered `binary' (i.e. supported by at least one binary detection method) or not.",APID Interactomes,,http://apid.dep.usal.es,"a public resource of biological data that provides a comprehensive and curated collection of `protein interactomes' for more than 1100 organisms, including 30 species with more than 500 interactions, derived from the integration of experimentally detected protein-to-protein physical interactions (PPIs)" +30715274,APID database: redefining protein-protein interaction experimental evidences and binary interactomes.,"The collection and integration of all the known protein-protein physical interactions within a proteome framework are critical to allow proper exploration of the protein interaction networks that drive biological processes in cells at molecular level. APID Interactomes is a public resource of biological data (http://apid.dep.usal.es) that provides a comprehensive and curated collection of `protein interactomes' for more than 1100 organisms, including 30 species with more than 500 interactions, derived from the integration of experimentally detected protein-to-protein physical interactions (PPIs). We have performed an update of APID database including a redefinition of several key properties of the PPIs to provide a more precise data integration and to avoid false duplicated records. This includes the unification of all the PPIs from five primary databases of molecular interactions (BioGRID, DIP, HPRD, IntAct and MINT), plus the information from two original systematic sources of human data and from experimentally resolved 3D structures (i.e. PDBs, Protein Data Bank files, where more than two distinct proteins have been identified). Thus, APID provides PPIs reported in published research articles (with traceable PMIDs) and detected by valid experimental interaction methods that give evidences about such protein interactions (following the `ontology and controlled vocabulary': www.ebi.ac.uk/ols/ontologies/mi; developed by `HUPO PSI-MI'). Within this data mining framework, all interaction detection methods have been grouped into two main types: (i) `binary' physical direct detection methods and (ii) `indirect' methods. As a result of these redefinitions, APID provides unified protein interactomes including the specific `experimental evidences' that support each PPI, indicating whether the interactions can be considered `binary' (i.e. supported by at least one binary detection method) or not.",APID database,,http://apid.dep.usal.es,"a public resource of biological data that provides a comprehensive and curated collection of `protein interactomes' for more than 1100 organisms, including 30 species with more than 500 interactions, derived from the integration of experimentally detected protein-to-protein physical interactions (PPIs)" +30760842,Development and validation of whole genome-wide and genic microsatellite markers in oil palm (Elaeis guineensis Jacq.): First microsatellite database (OpSatdb).,"The availability of large expressed sequence tag (EST) and whole genome databases of oil palm enabled the development of a data base of microsatellite markers. For this purpose, an EST database consisting of 40,979 EST sequences spanning 27 Mb and a chromosome-wise whole genome databases were downloaded. A total of 3,950 primer pairs were identified and developed from EST sequences. The tri and tetra nucleotide repeat motifs were most prevalent (each 24.75%) followed by di-nucleotide repeat motifs. Whole genome-wide analysis found a total of 245,654 SSR repeats across the 16 chromosomes of oil palm, of which 38,717 were compound microsatellite repeats. A web application, OpSatdb, the first microsatellite database of oil palm, was developed using the PHP and MySQL database ( https://ssr.icar.gov.in/index.php ). It is a simple and systematic web-based search engine for searching SSRs based on repeat motif type, repeat type, and primer details. High synteny was observed between oil palm and rice genomes. The mapping of ESTs having SSRs by Blast2GO resulted in the identification of 19.2% sequences with gene ontology (GO) annotations. Randomly, a set of ten genic SSRs and five genomic SSRs were used for validation and genetic diversity on 100 genotypes belonging to the world oil palm genetic resources. The grouping pattern was observed to be broadly in accordance with the geographical origin of the genotypes. The identified genic and genome-wide SSRs can be effectively useful for various genomic applications of oil palm, such as genetic diversity, linkage map construction, mapping of QTLs, marker-assisted selection, and comparative population studies.",OpSatdb,,https://ssr.icar.gov.in/index.php,the first microsatellite database of oil palm +30994884,Graph-based data integration from bioactive peptide databases of pharmaceutical interest: toward an organized collection enabling visual network analysis.,"

Motivation

Bioactive peptides have gained great attention in the academy and pharmaceutical industry since they play an important role in human health. However, the increasing number of bioactive peptide databases is causing the problem of data redundancy and duplicated efforts. Even worse is the fact that the available data is non-standardized and often dirty with data entry errors. Therefore, there is a need for a unified view that enables a more comprehensive analysis of the information on this topic residing at different sites.

Results

After collecting web pages from a large variety of bioactive peptide databases, we organized the web content into an integrated graph database (starPepDB) that holds a total of 71 310 nodes and 348 505 relationships. In this graph structure, there are 45 120 nodes representing peptides, and the rest of the nodes are connected to peptides for describing metadata. Additionally, to facilitate a better understanding of the integrated data, a software tool (starPep toolbox) has been developed for supporting visual network analysis in a user-friendly way; providing several functionalities such as peptide retrieval and filtering, network construction and visualization, interactive exploration and exporting data options.

Availability and implementation

Both starPepDB and starPep toolbox are freely available at http://mobiosd-hub.com/starpep/.

Supplementary information

Supplementary data are available at Bioinformatics online.",starPepDB,,http://mobiosd-hub.com/starpep/, +31016417,DNAmod: the DNA modification database.,"Covalent DNA modifications, such as 5-methylcytosine (5mC), are increasingly the focus of numerous research programs. In eukaryotes, both 5mC and 5-hydroxymethylcytosine (5hmC) are now recognized as stable epigenetic marks, with diverse functions. Bacteria, archaea, and viruses contain various other modified DNA nucleobases. Numerous databases describe RNA and histone modifications, but no database specifically catalogues DNA modifications, despite their broad importance in epigenetic regulation. To address this need, we have developed DNAmod: the DNA modification database. DNAmod is an open-source database ( https://dnamod.hoffmanlab.org ) that catalogues DNA modifications and provides a single source to learn about their properties. DNAmod provides a web interface to easily browse and search through these modifications. The database annotates the chemical properties and structures of all curated modified DNA bases, and a much larger list of candidate chemical entities. DNAmod includes manual annotations of available sequencing methods, descriptions of their occurrence in nature, and provides existing and suggested nomenclature. DNAmod enables researchers to rapidly review previous work, select mapping techniques, and track recent developments concerning modified bases of interest.",DNA modification database,DNAmod,https://dnamod.hoffmanlab.org,an open-source database that catalogues DNA modifications and provides a single source to learn about their properties +31034103,Functional analysis tools for post-translational modification: a post-translational modification database for analysis of proteins and metabolic pathways.,"Post-translational modifications (PTMs) are critical regulators of protein function, and nearly 200 different types of PTM have been identified. Advances in high-resolution mass spectrometry have led to the identification of an unprecedented number of PTM sites in numerous organisms, potentially facilitating a more complete understanding of how PTMs regulate cellular behavior. While databases have been created to house the resulting data, most of these resources focus on individual types of PTM, do not consider quantitative PTM analyses or do not provide tools for the visualization and analysis of PTM data. Here, we describe the Functional Analysis Tools for Post-Translational Modifications (FAT-PTM) database (https://bioinformatics.cse.unr.edu/fat-ptm/), which currently supports eight different types of PTM and over 49 000 PTM sites identified in large-scale proteomic surveys of the model organism Arabidopsis thaliana. The FAT-PTM database currently supports tools to visualize protein-centric PTM networks, quantitative phosphorylation site data from over 10 different quantitative phosphoproteomic studies, PTM information displayed in protein-centric metabolic pathways and groups of proteins that are co-modified by multiple PTMs. Overall, the FAT-PTM database provides users with a robust platform to share and visualize experimentally supported PTM data, develop hypotheses related to target proteins or identify emergent patterns in PTM data for signaling and metabolic pathways.",Functional Analysis Tools for Post-Translational Modifications,FAT-PTM,https://bioinformatics.cse.unr.edu/fat-ptm/,a post-translational modification database for analysis of proteins and metabolic pathways +31096089,"DrugR+: A comprehensive relational database for drug repurposing, combination therapy, and replacement therapy.","Drug repurposing or repositioning, which introduces new applications of the existing drugs, is an emerging field in drug discovery scope. To enhance the success rate of the research and development (R&D) process in a cost- and time-effective manner, a number of pharmaceutical companies worldwide have made tremendous investments. Besides, many researchers have proposed various methods and databases for the repurposing of various drugs. However, there is not a proper and well-organized database available. To this end, for the first time, we developed a new database based on DrugBank and KEGG data, which is named """"DrugR+"""". Our developed database provides some advantages relative to the DrugBank, and its interface supplies new capabilities for both single and synthetic repositioning of drugs. Moreover, it includes four new datasets which can be used for predicting drug-target interactions using supervised machine learning methods. As a case study, we introduced novel applications of some drugs and discussed the obtained results. A comparison of several machine learning methods on the generated datasets has also been reported in the Supplementary File. Having included several normalized tables, DrugR + has been organized to provide key information on data structures for the repurposing and combining applications of drugs. It provides the SQL query capability for professional users and an appropriate method with different options for unprofessional users. Additionally, DrugR + consists of repurposing service that accepts a drug and proposes a list of potential drugs for some usages. Taken all, DrugR+ is a free web-based database and accessible using (http://www.drugr.ir), which can be updated through a map-reduce parallel processing method to provide the most relevant information.",DrugR+,DrugR+,http://www.drugr.ir,"A comprehensive relational database for drug repurposing, combination therapy, and replacement therapy" +31110280,"CancerMine: a literature-mined resource for drivers, oncogenes and tumor suppressors in cancer.","Tumors from individuals with cancer are frequently genetically profiled for information about the driving forces behind the disease. We present the CancerMine resource, a text-mined and routinely updated database of drivers, oncogenes and tumor suppressors in different types of cancer. All data are available online ( http://bionlp.bcgsc.ca/cancermine ) and downloadable under a Creative Commons Zero license for ease of use.",CancerMine,CancerMine,http://bionlp.bcgsc.ca/cancermine,"a literature-mined resource for drivers, oncogenes and tumor suppressors in cancer" +31160594,"PathoPhenoDB, linking human pathogens to their phenotypes in support of infectious disease research.","Understanding the relationship between the pathophysiology of infectious disease, the biology of the causative agent and the development of therapeutic and diagnostic approaches is dependent on the synthesis of a wide range of types of information. Provision of a comprehensive and integrated disease phenotype knowledgebase has the potential to provide novel and orthogonal sources of information for the understanding of infectious agent pathogenesis, and support for research on disease mechanisms. We have developed PathoPhenoDB, a database containing pathogen-to-phenotype associations. PathoPhenoDB relies on manual curation of pathogen-disease relations, on ontology-based text mining as well as manual curation to associate host disease phenotypes with infectious agents. Using Semantic Web technologies, PathoPhenoDB also links to knowledge about drug resistance mechanisms and drugs used in the treatment of infectious diseases. PathoPhenoDB is accessible at http://patho.phenomebrowser.net/ , and the data are freely available through a public SPARQL endpoint.",PathoPhenoDB,PathoPhenoDB,http://patho.phenomebrowser.net/,a database containing pathogen-to-phenotype associations +31284879,ImtRDB: a database and software for mitochondrial imperfect interspersed repeats annotation.,"

Background

Mitochondria is a powerhouse of all eukaryotic cells that have its own circular DNA (mtDNA) encoding various RNAs and proteins. Somatic perturbations of mtDNA are accumulating with age thus it is of great importance to uncover the main sources of mtDNA instability. Recent analyses demonstrated that somatic mtDNA deletions depend on imperfect repeats of various nature between distant mtDNA segments. However, till now there are no comprehensive databases annotating all types of imperfect repeats in numerous species with sequenced complete mitochondrial genome as well as there are no algorithms capable to call all types of imperfect repeats in circular mtDNA.

Results

We implemented naïve algorithm of pattern recognition by analogy to standard dot-plot construction procedures allowing us to find both perfect and imperfect repeats of four main types: direct, inverted, mirror and complementary. Our algorithm is adapted to specific characteristics of mtDNA such as circularity and an excess of short repeats - it calls imperfect repeats starting from the length of 10 b.p. We constructed interactive web available database ImtRDB depositing perfect and imperfect repeats positions in mtDNAs of more than 3500 Vertebrate species. Additional tools, such as visualization of repeats within a genome, comparison of repeat densities among different genomes and a possibility to download all results make this database useful for many biologists. Our first analyses of the database demonstrated that mtDNA imperfect repeats (i) are usually short; (ii) associated with unfolded DNA structures; (iii) four types of repeats positively correlate with each other forming two equivalent pairs: direct and mirror versus inverted and complementary, with identical nucleotide content and similar distribution between species; (iv) abundance of repeats is negatively associated with GC content; (v) dinucleotides GC versus CG are overrepresented on light chain of mtDNA covered by repeats.

Conclusions

ImtRDB is available at http://bioinfodbs.kantiana.ru/ImtRDB/ . It is accompanied by the software calling all types of interspersed repeats with different level of degeneracy in circular DNA. This database and software can become a very useful tool in various areas of mitochondrial and chloroplast DNA research.",ImtRDB,ImtRDB,http://bioinfodbs.kantiana.ru/ImtRDB/,a database and software for mitochondrial imperfect interspersed repeats annotation +31416842,Curatopes Melanoma: A Database of Predicted T-cell Epitopes from Overly Expressed Proteins in Metastatic Cutaneous Melanoma.,"Therapeutic anticancer vaccination has been adapted as an immunotherapy in several solid tumors. However, the selection of promising candidates from the total quantity of possible epitopes poses a challenge to clinicians and bioinformaticians alike, and very few epitopes have been tested in experimental or clinical settings to validate their efficacy. Here, we present a comprehensive database of predicted nonmutated peptide epitopes derived from genes that are overly expressed in a group of 32 melanoma biopsies compared with healthy tissues and that were filtered against expression in a curated list of survival-critical tissues. We hypothesize that these """"self-tolerant"""" epitopes have two desirable properties: they do not depend on mutations, being immediately applicable to a large patient collective, and they potentially cause fewer autoimmune reactions. To support epitope selection, we provide an aggregated score of expected therapeutic efficiency as a shortlist mechanism. The database has applications in facilitating epitope selection and trial design and is freely accessible at https://www.curatopes.com. SIGNIFICANCE: A database is presented that predicts and scores antitumor T-cell epitopes, with a focus on tolerability and avoidance of severe autoimmunity, offering a supplementary epitope set for further investigation in immunotherapy.",Curatopes Melanoma,,https://www.curatopes.com,A Database of Predicted T-cell Epitopes from Overly Expressed Proteins in Metastatic Cutaneous Melanoma +31490686,TMB Library of Nucleosome Simulations.,"Nucleosomes are the fundamental building blocks of chromatin, the biomaterial that houses the genome in all higher organisms. A nucleosome consists of 145-147 base pairs of DNA wrapped 1.7 times around eight histones. Given a four-letter code (A, C, G, T), there are approximately 4147 or 1088 oligonucleotides that can form a nucleosome. Comparative, rather than comprehensive, studies are required. Here we introduce the TMB Library of nucleosome simulations and present a meta-analysis of over 20 µs of all atom molecular dynamics simulations representing 518 different realizations of the nucleosome. The TMB Library serves as a reference for future comparative, on-demand simulations of nucleosomes and a demonstration of iBIOMES Lite as a tool for managing a laboratory's simulation library. For every simulation, dewatered trajectories, RMSD, and DNA helical parameter data are provided through iBIOMES Lite in a Web browser and a file browser format. A novel view of nucleosomal DNA emerges from our meta-analysis of the TMB Library. DNA conformation is restricted to a specific left-handed superhelix, but the range of conformations observed for individual bases and base pairs is not more restricted nor more highly deformed than DNA free in solution. With the exception of Roll, mean DNA helical parameter values obtained from simulations of nucleosomes are largely within the range of thermal motion of DNA free in solution. The library provides evidence of DNA kinking in the nucleosome and clearly demonstrates the effects of DNA sequence on the gross structure and dynamics of nucleosomes. These effects and mispositioning of the 601 super strong nucleosome positioning sequence can be detected in short simulations (10 ns). Collectively, the results provide a basis for comparative simulation studies of nucleosomes and extend our understanding of the binding of proteins and drugs to nucleosomal DNA. The TMB Library can be found at http://dna.engr.latech.edu/~tmbshare/ .",TMB Library,TMB Library,http://dna.engr.latech.edu/~tmbshare/, +31642469,PhenoModifier: a genetic modifier database for elucidating the genetic basis of human phenotypic variation.,"From clinical observations to large-scale sequencing studies, the phenotypic impact of genetic modifiers is evident. To better understand the full spectrum of the genetic contribution to human disease, concerted efforts are needed to construct a useful modifier resource for interpreting the information from sequencing data. Here, we present the PhenoModifier (https://www.biosino.org/PhenoModifier), a manually curated database that provides a comprehensive overview of human genetic modifiers. By manually curating over ten thousand published articles, 3078 records of modifier information were entered into the current version of PhenoModifier, related to 288 different disorders, 2126 genetic modifier variants and 843 distinct modifier genes. To help users probe further into the mechanism of their interested modifier genes, we extended the yeast genetic interaction data and yeast quantitative trait loci to the human and we also integrated GWAS data into the PhenoModifier to assist users in evaluating all possible phenotypes associated with a modifier allele. As the first comprehensive resource of human genetic modifiers, PhenoModifier provides a more complete spectrum of genetic factors contributing to human phenotypic variation. The portal has a broad scientific and clinical scope, spanning activities relevant to variant interpretation for research purposes as well as clinical decision making.",PhenoModifier,PhenoModifier,https://www.biosino.org/PhenoModifier,a genetic modifier database for elucidating the genetic basis of human phenotypic variation +31647100,Bovine Genome Database: new annotation tools for a new reference genome.,"The Bovine Genome Database (BGD) (http://bovinegenome.org) has been the key community bovine genomics database for more than a decade. To accommodate the increasing amount and complexity of bovine genomics data, BGD continues to advance its practices in data acquisition, curation, integration and efficient data retrieval. BGD provides tools for genome browsing (JBrowse), genome annotation (Apollo), data mining (BovineMine) and sequence database searching (BLAST). To augment the BGD genome annotation capabilities, we have developed a new Apollo plug-in, called the Locus-Specific Alternate Assembly (LSAA) tool, which enables users to identify and report potential genome assembly errors and structural variants. BGD now hosts both the newest bovine reference genome assembly, ARS-UCD1.2, as well as the previous reference genome, UMD3.1.1, with cross-genome navigation and queries supported in JBrowse and BovineMine, respectively. Other notable enhancements to BovineMine include the incorporation of genomes and gene annotation datasets for non-bovine ruminant species (goat and sheep), support for multiple assemblies per organism in the Regions Search tool, integration of additional ontologies and development of many new template queries. To better serve the research community, we continue to focus on improving existing tools, developing new tools, adding new datasets and encouraging researchers to use these resources.",Bovine Genome Database,BGD,http://bovinegenome.org, +31664080,dendPoint: a web resource for dendrimer pharmacokinetics investigation and prediction.,"Nanomedicine development currently suffers from a lack of efficient tools to predict pharmacokinetic behavior without relying upon testing in large numbers of animals, impacting success rates and development costs. This work presents dendPoint, the first in silico model to predict the intravenous pharmacokinetics of dendrimers, a commonly explored drug vector, based on physicochemical properties. We have manually curated the largest relational database of dendrimer pharmacokinetic parameters and their structural/physicochemical properties. This was used to develop a machine learning-based model capable of accurately predicting pharmacokinetic parameters, including half-life, clearance, volume of distribution and dose recovered in the liver and urine. dendPoint successfully predicts dendrimer pharmacokinetic properties, achieving correlations of up to r = 0.83 and Q2 up to 0.68. dendPoint is freely available as a user-friendly web-service and database at http://biosig.unimelb.edu.au/dendpoint . This platform is ultimately expected to be used to guide dendrimer construct design and refinement prior to embarking on more time consuming and expensive in vivo testing.",dendPoint,dendPoint,http://biosig.unimelb.edu.au/dendpoint,a web resource for dendrimer pharmacokinetics investigation and prediction +31780665,A database of high-resolution MS/MS spectra for lichen metabolites.,"While analytical techniques in natural products research massively shifted to liquid chromatography-mass spectrometry, lichen chemistry remains reliant on limited analytical methods, Thin Layer Chromatography being the gold standard. To meet the modern standards of metabolomics within lichenochemistry, we announce the publication of an open access MS/MS library with 250 metabolites, coined LDB for Lichen DataBase, providing a comprehensive coverage of lichen chemodiversity. These were donated by the Berlin Garden and Botanical Museum from the collection of Siegfried Huneck to be analyzed by LC-MS/MS. Spectra at individual collision energies were submitted to MetaboLights (https://www.ebi.ac.uk/metabolights/MTBLS999) while merged spectra were uploaded to the GNPS platform (CCMSLIB00004751209 to CCMSLIB00004751517). Technical validation was achieved by dereplicating three lichen extracts using a Molecular Networking approach, revealing the detection of eleven unique molecules that would have been missed without LDB implementation to the GNPS. From a chemist's viewpoint, this database should help streamlining the isolation of formerly unreported metabolites. From a taxonomist perspective, the LDB offers a versatile tool for the chemical profiling of newly reported species.",Lichen DataBase,LDB,https://www.ebi.ac.uk/metabolights/MTBLS999,A database of high-resolution MS/MS spectra for lichen metabolites +32119071,ProCaff: protein-carbohydrate complex binding affinity database.,"MOTIVATION:Protein-carbohydrate interactions perform several cellular and biological functions and their structure and function are mainly dictated by their binding affinity. Although plenty of experimental data on binding affinity are available, there is no reliable and comprehensive database in the literature. RESULTS:We have developed a database on binding affinity of protein-carbohydrate complexes, ProCaff, which contains 3122 entries on dissociation constant (Kd), Gibbs free energy change (G), experimental conditions, sequence, structure and literature information. Additional features include the options to search, display, visualization, download and upload the data. AVAILABILITY AND IMPLEMENTATION:The database is freely available at http://web.iitm.ac.in/bioinfo2/procaff/. The website is implemented using HTML and PHP and supports recent versions of major browsers such as Chrome, Firefox, IE10 and Opera. CONTACT:gromiha@iitm.ac.in. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",protein-carbohydrate complex binding affinity database,ProCaff,http://web.iitm.ac.in/bioinfo2/procaff/,a database on binding affinity of protein-carbohydrate complexes +32159215,TeaMiD: a comprehensive database of simple sequence repeat markers of tea.,"Tea is a highly cross-pollinated, woody, perennial tree. High heterozygosity combined with a long gestational period makes conventional breeding a cumbersome process. Therefore, marker-assisted breeding is a better alternative approach when compared with conventional breeding. Considering the large genome size of tea (~3 Gb), information about simple sequence repeat (SSR) is scanty. Thus, we have taken advantage of the recently published tea genomes to identify large numbers of SSR markers in the tea. Besides the genomic sequences, we identified SSRs from the other publicly available sequences such as RNA-seq, GSS, ESTs and organelle genomes (chloroplasts and mitochondrial) and also searched published literature to catalog validated set of tea SSR markers. The complete exercise yielded a total of 935 547 SSRs. Out of the total, 82 SSRs were selected for validation among a diverse set of tea genotypes. Six primers (each with four to six alleles, an average of five alleles per locus) out of the total 27 polymorphic primers were used for a diversity analysis in 36 tea genotypes with mean polymorphic information content of 0.61-0.76. Finally, using all the information generated in this study, we have developed a user-friendly database (TeaMiD; http://indianteagenome.in:8080/teamid/) that hosts SSR from all the six resources including three nuclear genomes of tea and transcriptome sequences of 17 Camellia wild species. Database URL: http://indianteagenome.in:8080/teamid/.",TeaMiD,TeaMiD,http://indianteagenome.in:8080/teamid/,a comprehensive database of simple sequence repeat markers of tea +32219412,Circad: a comprehensive manually curated resource of circular RNA associated with diseases.,"Circular RNAs (circRNAs) are unique transcript isoforms characterized by back splicing of exon ends to form a covalently closed loop or circular conformation. These transcript isoforms are now known to be expressed in a variety of organisms across the kingdoms of life. Recent studies have shown the role of circRNAs in a number of diseases and increasing evidence points to their potential application as biomarkers in these diseases. We have created a comprehensive manually curated database of circular RNAs associated with diseases. This database is available at URL http://clingen.igib.res.in/circad/. The Database lists more than 1300 circRNAs associated with 150 diseases and mapping to 113 International Statistical Classification of Diseases (ICD) codes with evidence of association linked to published literature. The database is unique in many ways. Firstly, it provides ready-to-use primers to work with, in order to use circRNAs as biomarkers or to perform functional studies. It additionally lists the assay and PCR primer details including experimentally validated ones as a ready reference to researchers along with fold change and statistical significance. It also provides standard disease nomenclature as per the ICD codes. To the best of our knowledge, circad is the most comprehensive and updated database of disease associated circular RNAs.Availability: http://clingen.igib.res.in/circad/.",Circad,Circad,http://clingen.igib.res.in/circad,a comprehensive manually curated resource of circular RNA associated with diseases +32277449,Choice of the Promoter for Tissue and Developmental Stage-Specific Gene Expression.,"Transgenic technologies belong to important tools of reverse genetics and biotechnology in plants. Targeted genetic modifications can reveal functions of genes of interest, change metabolic and regulatory pathways, or result in accumulation of valuable proteins or metabolites. However, to be efficient in targeted genetic modification, the chimeric gene construct should be designed properly. In particular, the promoters used to control transgene expression need to be carefully chosen. Most promoters in widely used vectors belong to strong and constitutively expressed variants. However, in many cases transgene expression has to be restricted to certain tissue, stage of development, or response to some internal or external stimuli. In turn, a large variety of tissue-specific promoters have been studied and information on their characteristics may be recovered from the literature. An appropriate promoter may be selected and used in genetic construct to optimize the transgene transcription pattern. We have previously designed the TGP database (TransGene Promoters, http://wwwmgs.bionet.nsc.ru/mgs/dbases/tgp/home.html ) collecting information from the publications in this field. Here we review the wide range of noncanonical tissue-specific and developmentally regulated promoters that might be used for transgene expression control.",TransGene Promoters,TGP database,http://wwwmgs.bionet.nsc.ru/mgs/dbases/tgp/home.html, +32404014,PDB-2-PBv3.0: An updated protein block database.,"Our protein block (PB) sequence database PDB-2-PBv1.0 provides PB sequences and dihedral angles for 74,297 protein structures comprising of 103,252 protein chains of Protein Data Bank (PDB) as on 2011. Since there are a lot of practical applications of PB and also as the size of PDB database increases, it becomes necessary to provide the PB sequences for all PDB protein structures. The current updated PDB-2-PBv3.0 contains PB sequences for 147,602 PDB structures comprising of 400,355 protein chains as on October 2019. When compared to our previous version PDB-2-PBv1.0, the current PDB-2-PBv3.0 contains 2- and 4-fold increase in the number of protein structures and chains, respectively. Notably, it provides PB information for any protein chain, regardless of the missing atom records of protein structure data in PDB. It includes protein interaction information with DNA and RNA along with their corresponding functional classes from Nucleic Acid Database (NDB) and PDB. Now, the updated version allows the user to download multiple PB records by parameter search and/or by a given list. This database is freely accessible at http://bioinfo.bdu.ac.in/pb3.",PDB-2-PBv3.0,PDB-2-PBv3.0,http://bioinfo.bdu.ac.in/pb3,An updated protein block database. +32422927,HDVdb: A Comprehensive Hepatitis D Virus Database.,"Hepatitis D virus (HDV) causes the most severe form of viral hepatitis, which may rapidly progress to liver cirrhosis and hepatocellular carcinoma (HCC). It has been estimated that 15-20 million people worldwide are suffering from the chronic HDV infection. Currently, no effective therapies are available to treat acute or chronic HDV infection. The remarkable sequence variability of the HDV genome, particularly within the hypervariable region has resulted in the provisional classification of eight major genotypes and various subtypes. We have developed a specialized database, HDVdb (http://hdvdb.bio.wzw.tum.de/), which contains a collection of partial and complete HDV genomic sequences obtained from the GenBank and from our own patient cohort. HDVdb enables the researchers to investigate the genetic variability of all available HDV sequences, correlation of genotypes to epidemiology and pathogenesis. Additionally, it will contribute in understanding the drug resistant mutations and develop effective vaccines against HDV infection. The database can be accessed through a web interface that allows for static and dynamic queries and offers integrated generic and specialized sequence analysis tools, such as annotation, genotyping, primer prediction, and phylogenetic analyses.",HDVdb,HDVdb,http://hdvdb.bio.wzw.tum.de/,A Comprehensive Hepatitis D Virus Database +32647128,"IDEAL, the Infectious Diseases of East African Livestock project open access database and biobank.","The Infectious Diseases of East African Livestock (IDEAL) project was a longitudinal cohort study of calf health which was conducted in Western Kenya between 2007-2010. A total of 548 East African shorthorn zebu calves were recruited at birth and followed at least every 5 weeks during the first year of life. Comprehensive clinical and epidemiological data, blood and tissue samples were collected at every visit. These samples were screened for over 100 different pathogens or infectious exposures, using a range of diagnostic methods. This manuscript describes this comprehensive dataset and bio-repository, and how to access it through a single online site ( http://data.ctlgh.org/ideal/ ). This provides extensive filtering and searching capabilities. These data are useful to illustrate outcomes of multiple infections on health, investigate patterns of morbidity and mortality due to parasite infections, and to study genotypic determinants of immunity and disease.",Infectious Diseases of East African Livestock,IDEAL,http://data.ctlgh.org/ideal/,the Infectious Diseases of East African Livestock project open access database and biobank +32858223,hTFtarget: A Comprehensive Database for Regulations of Human Transcription Factors and Their Targets.,"Transcription factors (TFs) as key regulators play crucial roles in biological processes. The identification of TF-target regulatory relationships is a key step for revealing functions of TFs and their regulations on gene expression. The accumulated data of chromatin immunoprecipitation sequencing (ChIP-seq) provide great opportunities to discover the TF-target regulations across different conditions. In this study, we constructed a database named hTFtarget, which integrated huge human TF target resources (7190 ChIP-seq samples of 659 TFs and high-confidence binding sites of 699 TFs) and epigenetic modification information to predict accurate TF-target regulations. hTFtarget offers the following functions for users to explore TF-target regulations: (1) browse or search general targets of a query TF across datasets; (2) browse TF-target regulations for a query TF in a specific dataset or tissue; (3) search potential TFs for a given target gene or non-coding RNA; (4) investigate co-association between TFs in cell lines; (5) explore potential co-regulations for given target genes or TFs; (6) predict candidate TF binding sites on given DNA sequences; (7) visualize ChIP-seq peaks for different TFs and conditions in a genome browser. hTFtarget provides a comprehensive, reliable and user-friendly resource for exploring human TF-target regulations, which will be very useful for a wide range of users in the TF and gene expression regulation community. hTFtarget is available at http://bioinfo.life.hust.edu.cn/hTFtarget.",hTFtarget,hTFtarget,http://bioinfo.life.hust.edu.cn/hTFtarget,"a comprehensive, reliable and user-friendly resource for exploring human TF-target regulations" +32858223,hTFtarget: A Comprehensive Database for Regulations of Human Transcription Factors and Their Targets.,"Transcription factors (TFs) as key regulators play crucial roles in biological processes. The identification of TF-target regulatory relationships is a key step for revealing functions of TFs and their regulations on gene expression. The accumulated data of chromatin immunoprecipitation sequencing (ChIP-seq) provide great opportunities to discover the TF-target regulations across different conditions. In this study, we constructed a database named hTFtarget, which integrated huge human TF target resources (7190 ChIP-seq samples of 659 TFs and high-confidence binding sites of 699 TFs) and epigenetic modification information to predict accurate TF-target regulations. hTFtarget offers the following functions for users to explore TF-target regulations: (1) browse or search general targets of a query TF across datasets; (2) browse TF-target regulations for a query TF in a specific dataset or tissue; (3) search potential TFs for a given target gene or non-coding RNA; (4) investigate co-association between TFs in cell lines; (5) explore potential co-regulations for given target genes or TFs; (6) predict candidate TF binding sites on given DNA sequences; (7) visualize ChIP-seq peaks for different TFs and conditions in a genome browser. hTFtarget provides a comprehensive, reliable and user-friendly resource for exploring human TF-target regulations, which will be very useful for a wide range of users in the TF and gene expression regulation community. hTFtarget is available at http://bioinfo.life.hust.edu.cn/hTFtarget.",hTFtarget,hTFtarget,http://bioinfo.life.hust.edu.cn/hTFtarget,A Comprehensive Database for Regulations of Human Transcription Factors and Their Targets +32931381,ZenoFishDb v1.1: A Database for Xenotransplantation Studies in Zebrafish.,"Rapidly accumulating literature has proven feasibility of the zebrafish xenograft models in cancer research. Nevertheless, online databases for searching the current zebrafish xenograft literature are in great demand. Herein, we have developed a manually curated database, called ZenoFishDb v1.1 (https://konulab.shinyapps.io/zenofishdb), based on R Shiny platform aiming to provide searchable information on ever increasing collection of zebrafish studies for cancer cell line transplantation and patient-derived xenografts (PDXs). ZenoFishDb v1.1 user interface contains four modules: DataTable, Visualization, PDX Details, and PDX Charts. The DataTable and Visualization pages represent xenograft study details, including injected cell lines, PDX injections, molecular modifications of cell lines, zebrafish strains, as well as technical aspects of the xenotransplantation procedures in table, bar, and/or pie chart formats. The PDX Details module provides comprehensive information on the patient details in table format and can be searched and visualized. Overall, ZenoFishDb v1.1 enables researchers to effectively search, list, and visualize different technical and biological attributes of zebrafish xenotransplantation studies particularly focusing on the new trends that make use of reporters, RNA interference, overexpression, or mutant gene constructs of transplanted cancer cells, stem cells, and PDXs, as well as distinguished host modifications.",ZenoFishDb v1.1,ZenoFishDb,https://konulab.shinyapps.io/zenofishdb,A Database for Xenotransplantation Studies in Zebrafish +33008298,Database: web application for visualization of the cumulated RNAseq data against the salicylic acid (SA) and methyl jasmonate (MeJA) treatment of Arabidopsis thaliana.,"

Background

Plants have adapted to survive under adverse conditions or exploit favorable conditions in response to their environment as sessile creatures. In a way of plant adaptation, plant hormones have been evolved to efficiently use limited resources. Plant hormones including auxin, jasmonic acid, salicylic acid, and ethylene have been studied to reveal their role in plant adaptation against their environment by phenotypic observation with experimental design such as mutation on hormone receptors and treatment / non-treatment of plant hormones along with other environmental conditions. With the development of Next Generation Sequencing (NGS) technology, it became possible to score the total gene expression of the sampled plants and estimate the degree of effect of plant hormones in gene expression. This allowed us to infer the signaling pathway through plant hormones, which greatly stimulated the study of functional genomics using mutants. Due to the continued development of NGS technology and analytical techniques, many plant hormone-related studies have produced and accumulated NGS-based data, especially RNAseq data have been stored in the sequence read archive represented by NCBI, EBI, and DDBJ.

Description

Here, hormone treatment RNAseq data of Arabidopsis (Col0), wild-type genotype, were collected with mock, SA, and MeJA treatments. The genes affected by hormones were identified through a machine learning approach. The degree of expression of the affected gene was quantified, visualized in boxplot using d3 (data-driven-document), and the database was built by Django.

Conclusion

Using this database, we created a web application ( http://pgl.gnu.ac.kr/hormoneDB/ ) that lists hormone-related or hormone-affected genes and visualizes the boxplot of the gene expression of selected genes. This web application eventually aids the functional genomics researchers who want to gather the cases of the gene responses by the hormones.",,,http://pgl.gnu.ac.kr/hormoneDB/,web application for visualization of the cumulated RNAseq data against the salicylic acid (SA) and methyl jasmonate (MeJA) treatment of Arabidopsis thaliana +33103271,The NanDeSyn database for Nannochloropsis systems and synthetic biology.,"Nannochloropsis species, unicellular industrial oleaginous microalgae, are model organisms for microalgal systems and synthetic biology. To facilitate community-based annotation and mining of the rapidly accumulating functional genomics resources, we have initiated an international consortium and present a comprehensive multi-omics resource database named Nannochloropsis Design and Synthesis (NanDeSyn; http://nandesyn.single-cell.cn). Via the Tripal toolkit, it features user-friendly interfaces hosting genomic resources with gene annotations and transcriptomic and proteomic data for six Nannochloropsis species, including two updated genomes of Nannochloropsis oceanica IMET1 and Nannochloropsis salina CCMP1776. Toolboxes for search, Blast, synteny view, enrichment analysis, metabolic pathway analysis, a genome browser, etc. are also included. In addition, functional validation of genes is indicated based on phenotypes of mutants and relevant bibliography. Furthermore, epigenomic resources are also incorporated, especially for sequencing of small RNAs including microRNAs and circular RNAs. Such comprehensive and integrated landscapes of Nannochloropsis genomics and epigenomics will promote and accelerate community efforts in systems and synthetic biology of these industrially important microalgae.",Nannochloropsis Design and Synthesis,NanDeSyn,http://nandesyn.single-cell.cn,database for Nannochloropsis systems and synthetic biology +33137185,iCSDB: an integrated database of CRISPR screens.,"High-throughput screening based on CRISPR-Cas9 libraries has become an attractive and powerful technique to identify target genes for functional studies. However, accessibility of public data is limited due to the lack of user-friendly utilities and up-to-date resources covering experiments from third parties. Here, we describe iCSDB, an integrated database of CRISPR screening experiments using human cell lines. We compiled two major sources of CRISPR-Cas9 screening: the DepMap portal and BioGRID ORCS. DepMap portal itself is an integrated database that includes three large-scale projects of CRISPR screening. We additionally aggregated CRISPR screens from BioGRID ORCS that is a collection of screening results from PubMed articles. Currently, iCSDB contains 1375 genome-wide screens across 976 human cell lines, covering 28 tissues and 70 cancer types. Importantly, the batch effects from different CRISPR libraries were removed and the screening scores were converted into a single metric to estimate the knockout efficiency. Clinical and molecular information were also integrated to help users to select cell lines of interest readily. Furthermore, we have implemented various interactive tools and viewers to facilitate users to choose, examine and compare the screen results both at the gene and guide RNA levels. iCSDB is available at https://www.kobic.re.kr/icsdb/.",iCSDB,iCSDB,https://www.kobic.re.kr/icsdb/,an integrated database of CRISPR screening experiments using human cell lines +33137185,iCSDB: an integrated database of CRISPR screens.,"High-throughput screening based on CRISPR-Cas9 libraries has become an attractive and powerful technique to identify target genes for functional studies. However, accessibility of public data is limited due to the lack of user-friendly utilities and up-to-date resources covering experiments from third parties. Here, we describe iCSDB, an integrated database of CRISPR screening experiments using human cell lines. We compiled two major sources of CRISPR-Cas9 screening: the DepMap portal and BioGRID ORCS. DepMap portal itself is an integrated database that includes three large-scale projects of CRISPR screening. We additionally aggregated CRISPR screens from BioGRID ORCS that is a collection of screening results from PubMed articles. Currently, iCSDB contains 1375 genome-wide screens across 976 human cell lines, covering 28 tissues and 70 cancer types. Importantly, the batch effects from different CRISPR libraries were removed and the screening scores were converted into a single metric to estimate the knockout efficiency. Clinical and molecular information were also integrated to help users to select cell lines of interest readily. Furthermore, we have implemented various interactive tools and viewers to facilitate users to choose, examine and compare the screen results both at the gene and guide RNA levels. iCSDB is available at https://www.kobic.re.kr/icsdb/.",iCSDB,iCSDB,https://www.kobic.re.kr/icsdb/,an integrated database of CRISPR screens +33166383,FireProtDB: database of manually curated protein stability data.,"The majority of naturally occurring proteins have evolved to function under mild conditions inside the living organisms. One of the critical obstacles for the use of proteins in biotechnological applications is their insufficient stability at elevated temperatures or in the presence of salts. Since experimental screening for stabilizing mutations is typically laborious and expensive, in silico predictors are often used for narrowing down the mutational landscape. The recent advances in machine learning and artificial intelligence further facilitate the development of such computational tools. However, the accuracy of these predictors strongly depends on the quality and amount of data used for training and testing, which have often been reported as the current bottleneck of the approach. To address this problem, we present a novel database of experimental thermostability data for single-point mutants FireProtDB. The database combines the published datasets, data extracted manually from the recent literature, and the data collected in our laboratory. Its user interface is designed to facilitate both types of the expected use: (i) the interactive explorations of individual entries on the level of a protein or mutation and (ii) the construction of highly customized and machine learning-friendly datasets using advanced searching and filtering. The database is freely available at https://loschmidt.chemi.muni.cz/fireprotdb.",FireProtDB,FireProtDB,https://loschmidt.chemi.muni.cz/fireprotdb,a novel database of experimental thermostability data for single-point mutants +33166383,FireProtDB: database of manually curated protein stability data.,"The majority of naturally occurring proteins have evolved to function under mild conditions inside the living organisms. One of the critical obstacles for the use of proteins in biotechnological applications is their insufficient stability at elevated temperatures or in the presence of salts. Since experimental screening for stabilizing mutations is typically laborious and expensive, in silico predictors are often used for narrowing down the mutational landscape. The recent advances in machine learning and artificial intelligence further facilitate the development of such computational tools. However, the accuracy of these predictors strongly depends on the quality and amount of data used for training and testing, which have often been reported as the current bottleneck of the approach. To address this problem, we present a novel database of experimental thermostability data for single-point mutants FireProtDB. The database combines the published datasets, data extracted manually from the recent literature, and the data collected in our laboratory. Its user interface is designed to facilitate both types of the expected use: (i) the interactive explorations of individual entries on the level of a protein or mutation and (ii) the construction of highly customized and machine learning-friendly datasets using advanced searching and filtering. The database is freely available at https://loschmidt.chemi.muni.cz/fireprotdb.",FireProtDB,FireProtDB,https://loschmidt.chemi.muni.cz/fireprotdb,database of manually curated protein stability data +33313828,PSORTdb 4.0: expanded and redesigned bacterial and archaeal protein subcellular localization database incorporating new secondary localizations.,"Protein subcellular localization (SCL) is important for understanding protein function, genome annotation, and aids identification of potential cell surface diagnostic markers, drug targets, or vaccine components. PSORTdb comprises ePSORTdb, a manually curated database of experimentally verified protein SCLs, and cPSORTdb, a pre-computed database of PSORTb-predicted SCLs for NCBI's RefSeq deduced bacterial and archaeal proteomes. We now report PSORTdb 4.0 (http://db.psort.org/). It features a website refresh, in particular a more user-friendly database search. It also addresses the need to uniquely identify proteins from NCBI genomes now that GI numbers have been retired. It further expands both ePSORTdb and cPSORTdb, including additional data about novel secondary localizations, such as proteins found in bacterial outer membrane vesicles. Protein predictions in cPSORTdb have increased along with the number of available microbial genomes, from approximately 13 million when PSORTdb 3.0 was released, to over 66 million currently. Now, analyses of both complete and draft genomes are included. This expanded database will be of wide use to researchers developing SCL predictors or studying diverse microbes, including medically, agriculturally and industrially important species that have both classic or atypical cell envelope structures or vesicles.",PSORTdb 4.0,PSORTdb,http://db.psort.org/,expanded and redesigned bacterial and archaeal protein subcellular localization database incorporating new secondary localizations +33511845,FMODB: The World's First Database of Quantum Mechanical Calculations for Biomacromolecules Based on the Fragment Molecular Orbital Method.,"We developed the world's first web-based public database for the storage, management, and sharing of fragment molecular orbital (FMO) calculation data sets describing the complex interactions between biomacromolecules, named FMO Database (https://drugdesign.riken.jp/FMODB/). Each entry in the database contains relevant background information on how the data was compiled as well as the total energy of each molecular system and interfragment interaction energy (IFIE) and pair interaction energy decomposition analysis (PIEDA) values. Currently, the database contains more than 13 600 FMO calculation data sets, and a comprehensive search function implemented at the front-end. The procedure for selecting target proteins, preprocessing the experimental structures, construction of the database, and details of the database front-end were described. Then, we demonstrated a use of the FMODB by comparing IFIE value distributions of hydrogen bond, ion-pair, and XH/p interactions obtained by FMO method to those by molecular mechanics approach. From the comparison, the statistical analysis of the data provided standard reference values for the three types of interactions that will be useful for determining whether each interaction in a given system is relatively strong or weak compared to the interactions contained within the data in the FMODB. In the final part, we demonstrate the use of the database to examine the contribution of halogen atoms to the binding affinity between human cathepsin L and its inhibitors. We found that the electrostatic term derived by PIEDA greatly correlated with the binding affinities of the halogen containing cathepsin L inhibitors, indicating the importance of QM calculation for quantitative analysis of halogen interactions. Thus, the FMO calculation data in FMODB will be useful for conducting statistical analyses to drug discovery, for conducting molecular recognition studies in structural biology, and for other studies involving quantum mechanics-based interactions.",FMO Database,FMODB,https://drugdesign.riken.jp/FMODB/,The World's First Database of Quantum Mechanical Calculations for Biomacromolecules Based on the Fragment Molecular Orbital Method +33704069,"The Global Landscape of SARS-CoV-2 Genomes, Variants, and Haplotypes in 2019nCoVR.","On January 22, 2020, China National Center for Bioinformation (CNCB) released the 2019 Novel Coronavirus Resource (2019nCoVR), an open-access information resource for the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). 2019nCoVR features a comprehensive integration of sequence and clinical information for all publicly available SARS-CoV-2 isolates, which are manually curated with value-added annotations and quality evaluated by an automated in-house pipeline. Of particular note, 2019nCoVR offers systematic analyses to generate a dynamic landscape of SARS-CoV-2 genomic variations at a global scale. It provides all identified variants and their detailed statistics for each virus isolate, and congregates the quality score, functional annotation, and population frequency for each variant. Spatiotemporal change for each variant can be visualized and historical viral haplotype network maps for the course of the outbreak are also generated based on all complete and high-quality genomes available. Moreover, 2019nCoVR provides a full collection of SARS-CoV-2 relevant literature on the coronavirus disease 2019 (COVID-19), including published papers from PubMed as well as preprints from services such as bioRxiv and medRxiv through Europe PMC. Furthermore, by linking with relevant databases in CNCB, 2019nCoVR offers data submission services for raw sequence reads and assembled genomes, and data sharing with NCBI. Collectively, SARS-CoV-2 is updated daily to collect the latest information on genome sequences, variants, haplotypes, and literature for a timely reflection, making 2019nCoVR a valuable resource for the global research community. 2019nCoVR is accessible at https://bigd.big.ac.cn/ncov/.",2019 Novel Coronavirus Resource,2019nCoVR,https://bigd.big.ac.cn/ncov/,an open-access information resource for the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) +33787872,Drugmonizome and Drugmonizome-ML: integration and abstraction of small molecule attributes for drug enrichment analysis and machine learning.,"Understanding the underlying molecular and structural similarities between seemingly heterogeneous sets of drugs can aid in identifying drug repurposing opportunities and assist in the discovery of novel properties of preclinical small molecules. A wealth of information about drug and small molecule structure, targets, indications and side effects; induced gene expression signatures; and other attributes are publicly available through web-based tools, databases and repositories. By processing, abstracting and aggregating information from these resources into drug set libraries, knowledge about novel properties of drugs and small molecules can be systematically imputed with machine learning. In addition, drug set libraries can be used as the underlying database for drug set enrichment analysis. Here, we present Drugmonizome, a database with a search engine for querying annotated sets of drugs and small molecules for performing drug set enrichment analysis. Utilizing the data within Drugmonizome, we also developed Drugmonizome-ML. Drugmonizome-ML enables users to construct customized machine learning pipelines using the drug set libraries from Drugmonizome. To demonstrate the utility of Drugmonizome, drug sets from 12 independent SARS-CoV-2 in vitro screens were subjected to consensus enrichment analysis. Despite the low overlap among these 12 independent in vitro screens, we identified common biological processes critical for blocking viral replication. To demonstrate Drugmonizome-ML, we constructed a machine learning pipeline to predict whether approved and preclinical drugs may induce peripheral neuropathy as a potential side effect. Overall, the Drugmonizome and Drugmonizome-ML resources provide rich and diverse knowledge about drugs and small molecules for direct systems pharmacology applications. Database URL: https://maayanlab.cloud/drugmonizome/.",Drugmonizome,Drugmonizome,https://maayanlab.cloud/drugmonizome/,a database with a search engine for querying annotated sets of drugs and small molecules for performing drug set enrichment analysis +33798715,COnVIDa: COVID-19 multidisciplinary data collection and dashboard.,"Since the first reported case in Wuhan in late 2019, COVID-19 has rapidly spread worldwide, dramatically impacting the lives of millions of citizens. To deal with the severe crisis resulting from the pandemic, worldwide institutions have been forced to make decisions that profoundly affect the socio-economic realm. In this sense, researchers from diverse knowledge areas are investigating the behavior of the disease in a rush against time. In both cases, the lack of reliable data has been an obstacle to carry out such tasks with accuracy. To tackle this challenge, COnVIDa (https://convida.inf.um.es) has been designed and developed as a user-friendly tool that easily gathers rigorous multidisciplinary data related to the COVID-19 pandemic from different data sources. In particular, the pandemic expansion is analyzed with variables of health nature, but also social ones, mobility, etc. Besides, COnVIDa permits to smoothly join such data, compare and download them for further analysis. Due to the open-science nature of the project, COnVIDa is easily extensible to any other region of the planet. In this way, COnVIDa becomes a data facilitator for decision-making processes, as well as a catalyst for new scientific researches related to this pandemic.",COnVIDa,COnVIDa,https://convida.inf.um.es,COVID-19 multidisciplinary data collection and dashboard +34177338,MassBase: A large-scaled depository of mass spectrometry datasets for metabolome analysis.,"Depository of low-molecular-weight compounds or metabolites detected in various organisms in a non-targeted manner is indispensable for metabolomics research. Due to the diverse chemical compounds, various mass spectrometry (MS) setups with state-of-the-art technologies have been used. Over the past two decades, we have analyzed various biological samples by using gas chromatography-mass spectrometry, liquid chromatography-mass spectrometry, or capillary electrophoresis-mass spectrometry, and archived the datasets in the depository MassBase (http://webs2.kazusa.or.jp/massbase/). As the format of MS datasets depends on the MS setup used, we converted each raw binary dataset of the mass chromatogram to text file format, and thereafter, information of the chromatograph peak was extracted in the text file from the converted file. In total, the depository comprises 46,493 datasets, of which 38,750 belong to the plant species and 7,743 are authentic or mixed chemicals as well as other sources (microorganisms, animals, and foods), as on August 1, 2020. All files in the depository can be downloaded in bulk from the website. Mass chromatograms of 90 plant species obtained by LC-Fourier transform ion cyclotron resonance MS or Orbitrap MS, which detect the ionized molecules with high accuracy allowing speculation of chemical compositions, were converted to text files by the software PowerGet, and the chemical annotation of each peak was added. The processed datasets were deposited in the annotation database KomicMarket2 (http://webs2.kazusa.or.jp/km2/). The archives provide fundamental resources for comparative metabolomics and functional genomics, which may result in deeper understanding of living organisms.",MassBase,MassBase,http://webs2.kazusa.or.jp/massbase/,A large-scaled depository of mass spectrometry datasets for metabolome analysis +IND601142821,MulSatDB: a first online database for mulberry microsatellites,"KEY MESSAGE : Simple sequence repeat motifs were mined from the genome and EST sequences of Morus notabilis and archived in MulSatDB. Bioinformatics tools were integrated with the database for the analysis of genomic datasets. Mulberry is a crop of economic importance in sericulture, which shapes the lives of millions of rural people among different Eurasian and Latin American countries. Limited availability of genomic resources has constrained the molecular breeding efforts in mulberry, a poorly studied crop. Microsatellite or simple sequence repeat (SSR) has revolutionized the plant breeding and is used in linkage mapping, association studies, diversity, and parentage analysis, etc. Recent availability of mulberry whole genome assembly provided an opportunity for the development of mulberry-specific DNA markers. In this study, we mined a total of 217,312 microsatellites from whole genome and 961 microsatellites from EST sequences of Morus notabilis. Mono-repeats were predominant among both whole genome and EST sequences. The SSR containing EST sequences were functionally annotated, and SSRs mined from whole genome were mapped on chromosomes of the phylogenetically related genus—Fragaria vesca, to aid the selection of markers based on the function and location. All the mined markers were archived in the mulberry microsatellite database (MulSatDB), and the markers can be retrieved based on different criteria like marker location, repeat kind, motif type and size. Primer3plus and CMap tools are integrated with the database to design primers for PCR amplification and to visualize markers on F. vesca chromosomes, respectively. A blast tool is also integrated to collate new markers with the database. MulSatDB is the first and complete destination for mulberry researchers to browse SSR markers, design primers, and locate markers on strawberry chromosomes. MulSatDB is freely accessible at http://btismysore.in/mulsatdb .",MulSatDB,MulSatDB,http://btismysore.in/mulsatdb,a first online database for mulberry microsatellites +IND606040020,Global database of plants with root‐symbiotic nitrogen fixation: NodDB,"Plants associated with symbiotic N-fixing bacteria play important roles in early successional, riparian and semi-dry ecosystems. These so-called N-fixing plants are widely used for reclamation of disturbed vegetation and improvement of soil fertility in agroforestry. Yet, available information about plants that are capable of establishing nodulation is fragmented and somewhat outdated. This article introduces the NodDB database of N-fixing plants based on morphological and phylogenetic evidence (available at https://doi.org/10.15156/bio/587469) and discusses plant groups with conflicting reports and interpretation, such as certain legume clades and the Zygophyllaceae family. During angiosperm evolution, N-fixing plants became common in the fabid rather than in the ‘nitrogen-fixing’ clade. The global GBIF plant species distribution data indicated that N-fixing plants tend to be relatively more diverse in savanna and semi-desert biomes. The compiled and re-interpreted information about N-fixing plants enables accurate analyses of biogeography and community ecology of biological N fixation.",NodDB,NodDB,https://doi.org/10.15156/bio/587469,Global database of plants with root‐symbiotic nitrogen fixation diff --git a/data/manually_reviewed_inventory.csv b/data/manually_reviewed_inventory.csv new file mode 100644 index 0000000..fc611db --- /dev/null +++ b/data/manually_reviewed_inventory.csv @@ -0,0 +1,3566 @@ +ID,extracted_url,text,best_common,best_common_prob,best_full,best_full_prob,best_name,best_name_prob,article_count,duplicate_urls,duplicate_names,low_prob,review_low_prob,review_dup_urls,review_dup_names,review_notes_low_prob,review_notes_dup_urls,review_notes_dup_names,publication_date +22451271,http://1001proteomes.masc-proteomics.org,"1001 Proteomes: a functional proteomics portal for the analysis of Arabidopsis thaliana accessions. Motivation The sequencing of over a thousand natural strains of the model plant Arabidopsis thaliana is producing unparalleled information at the genetic level for plant researchers. To enable the rapid exploitation of these data for functional proteomics studies, we have created a resource for the visualization of protein information and proteomic datasets for sequenced natural strains of A. thaliana. Results The 1001 Proteomes portal can be used to visualize amino acid substitutions or non-synonymous single-nucleotide polymorphisms in individual proteins of A. thaliana based on the reference genome Col-0. We have used the available processed sequence information to analyze the conservation of known residues subject to protein phosphorylation among these natural strains. The substitution of amino acids in A. thaliana natural strains is heavily constrained and is likely a result of the conservation of functional attributes within proteins. At a practical level, we demonstrate that this information can be used to clarify ambiguously defined phosphorylation sites from phosphoproteomic studies. Protein sets of available natural variants are available for download to enable proteomic studies on these accessions. Together this information can be used to uncover the possible roles of specific amino acids in determining the structure and function of proteins in the model plant A. thaliana. An online portal to enable the community to exploit these data can be accessed at http://1001proteomes.masc-proteomics.org/",1001,0.714360118,NA,0,1001,0.714360118,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/25/2012 +32246067,http://microgreen-23sdatabase.ea.inra.fr,"√ɬÉ√ǬÇ√ɬÇ√Ǭµgreen-db: a reference database for the 23S rRNA gene of eukaryotic plastids and cyanobacteria. Studying the ecology of photosynthetic microeukaryotes and prokaryotic cyanobacterial communities requires molecular tools to complement morphological observations. These tools rely on specific genetic markers and require the development of specialised databases to achieve taxonomic assignment. We set up a reference database, called √ɬÉ√ǬÇ√ɬÇ√Ǭµgreen-db, for the 23S rRNA gene. The sequences were retrieved from generalist (NCBI, SILVA) or Comparative RNA Web (CRW) databases, in addition to a more original approach involving recursive BLAST searches to obtain the best possible sequence recovery. At present, √ɬÉ√ǬÇ√ɬÇ√Ǭµgreen-db includes 2,326 23S rRNA sequences belonging to both eukaryotes and prokaryotes encompassing 442 unique genera and 736 species of photosynthetic microeukaryotes, cyanobacteria and non-vascular land plants based on the NCBI and AlgaeBase taxonomy. When PR2/SILVA taxonomy is used instead, √ɬÉ√ǬÇ√ɬÇ√Ǭµgreen-db contains 2,217 sequences (399 unique genera and 696 unique species). Using √ɬÉ√ǬÇ√ɬÇ√Ǭµgreen-db, we were able to assign 96% of the sequences of the V domain of the 23S rRNA gene obtained by metabarcoding after amplification from soil DNA at the genus level, highlighting good coverage of the database. √ɬÉ√ǬÇ√ɬÇ√Ǭµgreen-db is accessible at http://microgreen-23sdatabase.ea.inra.fr.",√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭµgreen-db,0.816602846,NA,0,√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√Ǭµgreen-db,0.816602846,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,4/3/2020 +28511181,http://slsdb.manipal.edu/ocm,"1-CMDb: A Curated Database of Genomic Variations of the One-Carbon Metabolism Pathway. Background The one-carbon metabolism pathway is vital in maintaining tissue homeostasis by driving the critical reactions of folate and methionine cycles. A myriad of genetic and epigenetic events mark the rate of reactions in a tissue-specific manner. Integration of these to predict and provide personalized health management requires robust computational tools that can process multiomics data. The DNA sequences that may determine the chain of biological events and the endpoint reactions within one-carbon metabolism genes remain to be comprehensively recorded. Hence, we designed the one-carbon metabolism database (1-CMDb) as a platform to interrogate its association with a host of human disorders. Methods DNA sequence and network information of a total of 48 genes were extracted from a literature survey and KEGG pathway that are involved in the one-carbon folate-mediated pathway. The information generated, collected, and compiled for all these genes from the UCSC genome browser included the single nucleotide polymorphisms (SNPs), CpGs, copy number variations (CNVs), and miRNAs, and a comprehensive database was created. Furthermore, a significant correlation analysis was performed for SNPs in the pathway genes. Results Detailed data of SNPs, CNVs, CpG islands, and miRNAs for 48 folate pathway genes were compiled. The SNPs in CNVs (9670), CpGs (984), and miRNAs (14) were also compiled for all pathway genes. The SIFT score, the prediction and PolyPhen score, as well as the prediction for each of the SNPs were tabulated and represented for folate pathway genes. Also included in the database for folate pathway genes were the links to 124 various phenotypes and disease associations as reported in the literature and from publicly available information. Conclusion A comprehensive database was generated consisting of genomic elements within and among SNPs, CNVs, CpGs, and miRNAs of one-carbon metabolism pathways to facilitate (a) single source of information and (b) integration into large-genome scale network analysis to be developed in the future by the scientific community. The database can be accessed at http://slsdb.manipal.edu/ocm/.",1-CMDb,0.992982775,one-carbon metabolism database,0.925644517,1-CMDb,0.992982775,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/17/2017 +24275494,http://hsb.upf.edu,"1000 Genomes Selection Browser 1.0: a genome browser dedicated to signatures of natural selection in modern humans. Searching for Darwinian selection in natural populations has been the focus of a multitude of studies over the last decades. Here we present the 1000 Genomes Selection Browser 1.0 (http://hsb.upf.edu) as a resource for signatures of recent natural selection in modern humans. We have implemented and applied a large number of neutrality tests as well as summary statistics informative for the action of selection such as Tajima's D, CLR, Fay and Wu's H, Fu and Li's F* and D*, XPEHH, √ɬÉ√Ǭé√ɬÇ√ǬîiHH, iHS, F(ST), √ɬÉ√Ǭé√ɬÇ√ǬîDAF and XPCLR among others to low coverage sequencing data from the 1000 genomes project (Phase 1; release April 2012). We have implemented a publicly available genome-wide browser to communicate the results from three different populations of West African, Northern European and East Asian ancestry (YRI, CEU, CHB). Information is provided in UCSC-style format to facilitate the integration with the rich UCSC browser tracks and an access page is provided with instructions and for convenient visualization. We believe that this expandable resource will facilitate the interpretation of signals of selection on different temporal, geographical and genomic scales.",NA,0,1000 Genomes Selection Browser,0.770321417,1000 Genomes Selection Browser,0.770321417,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/25/2013 +30304689,http://10kimmunomes.org,"The 10,000 Immunomes Project: Building a Resource for Human Immunology. There is increasing appreciation that the immune system plays critical roles not only in the traditional domains of infection and inflammation but also in many areas of biology, including tumorigenesis, metabolism, and even neurobiology. However, one of the major barriers for understanding human immunological mechanisms is that immune assays have not been reproducibly characterized for a sufficiently large and diverse healthy human cohort. Here, we present the 10,000 Immunomes Project (10KIP), a√ɬÉ√ǬÇ√ɬÇ√Ǭ†framework for growing a diverse human immunology reference, from ImmPort, a publicly available resource of subject-level immunology data. Although some measurement types are sparse in the presently deposited ImmPort database, the extant data allow for a diversity of robust comparisons. Using 10KIP, we describe variations in serum cytokines and leukocytes by age, race, and sex; define a baseline cell-cytokine network; and describe immunologic changes in pregnancy. All data in the resource are available for visualization and download at http://10kimmunomes.org/.",10KIP,0.928437392,"10,000 Immunomes Project",0.844234129,10KIP,0.928437392,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/1/2018 +21389154,http://147.8.74.24/16SpathDB,"Automated identification of medically important bacteria by 16S rRNA gene sequencing using a novel comprehensive database, 16SpathDB. Despite the increasing use of 16S rRNA gene sequencing, interpretation of 16S rRNA gene sequence results is one of the most difficult problems faced by clinical microbiologists and technicians. To overcome the problems we encountered in the existing databases during 16S rRNA gene sequence interpretation, we built a comprehensive database, 16SpathDB (http://147.8.74.24/16SpathDB) based on the 16S rRNA gene sequences of all medically important bacteria listed in the Manual of Clinical Microbiology and evaluated its use for automated identification of these bacteria. Among 91 nonduplicated bacterial isolates collected in our clinical microbiology laboratory, 71 (78%) were reported by 16SpathDB as a single bacterial species having >98.0% nucleotide identity with the query sequence, 19 (20.9%) were reported as more than one bacterial species having >98.0% nucleotide identity with the query sequence, and 1 (1.1%) was reported as no match. For the 71 bacterial isolates reported as a single bacterial species, all results were identical to their true identities as determined by a polyphasic approach. For the 19 bacterial isolates reported as more than one bacterial species, all results contained their true identities as determined by a polyphasic approach and all of them had their true identities as the ""best match in 16SpathDB."" For the isolate (Gordonibacter pamelaeae) reported as no match, the bacterium has never been reported to be associated with human disease and was not included in the Manual of Clinical Microbiology. 16SpathDB is an automated, user-friendly, efficient, accurate, and regularly updated database for 16S rRNA gene sequence interpretation in clinical microbiology laboratories.",16SpathDB,0.996311396,NA,0,16SpathDB,0.996311396,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/9/2011 +"32102777, 33704069",http://bigd.big.ac.cn/ncov,"The 2019 novel coronavirus resource. An ongoing outbreak of a novel coronavirus infection in Wuhan, China since December 2019 has led to 31,516 infected persons and 638 deaths across 25 countries (till 16:00 on February 7, 2020). The virus causing this pneumonia was then named as the 2019 novel coronavirus (2019-nCoV) by the World Health Organization. To promote the data sharing and make all relevant information of 2019-nCoV publicly available, we construct the 2019 Novel Coronavirus Resource (2019nCoVR, https://bigd.big.ac.cn/ncov). 2019nCoVR features comprehensive integration of genomic and proteomic sequences as well as their metadata information from the Global Initiative on Sharing All Influenza Data, National Center for Biotechnology Information, China National GeneBank, National Microbiology Data Center and China National Center for Bioinformation (CNCB)/National Genomics Data Center (NGDC). It also incorporates a wide range of relevant information including scientific literatures, news, and popular articles for science dissemination, and provides visualization functionalities for genome variation analysis results based on all collected 2019-nCoV strains. Moreover, by linking seamlessly with related databases in CNCB/NGDC, 2019nCoVR offers virus data submission and sharing services for raw sequence reads and assembled sequences. In this report, we provide comprehensive descriptions on data deposition, management, release and utility in 2019nCoVR, laying important foundations in aid of studies on virus classification and origin, genome variation and evolution, fast detection, drug development and pneumonia precision prevention and therapy.",2019nCoVR,0.997189482,2019 Novel Coronavirus Resource,0.953625798,2019nCoVR,0.997189482,2,NA,33175170,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/28/2020 +33175170,http://bigd.big.ac.cn,"Database√ɬÉ√ǬÇ√ɬÇ√Ǭ†Resources of the National Genomics Data Center, China National Center for Bioinformation in 2021. The National Genomics Data Center (NGDC), part of the China National Center for Bioinformation (CNCB), provides a suite of database resources to support worldwide research activities in both academia and industry. With the explosive growth of multi-omics data, CNCB-NGDC is continually expanding, updating and enriching its core database resources through big data deposition, integration and translation. In the past year, considerable efforts have been devoted to 2019nCoVR, a newly established resource providing a global landscape of SARS-CoV-2 genomic sequences, variants, and haplotypes, as well as Aging Atlas, BrainBase, GTDB (Glycosyltransferases Database), LncExpDB, and TransCirc (Translation potential for circular RNAs). Meanwhile, a series of resources have been updated and improved, including BioProject, BioSample, GWH (Genome Warehouse), GVM (Genome Variation Map), GEN (Gene Expression Nebulas) as well as several biodiversity and plant resources. Particularly, BIG Search, a scalable, one-stop, cross-database search engine, has been significantly updated by providing easy access to a large number of internal and external biological resources from CNCB-NGDC, our partners, EBI and NCBI. All of these resources along with their services are publicly accessible at https://bigd.big.ac.cn.",2019nCoVR,0.986051157,NA,0,2019nCoVR,0.986051157,1,"29036542.0, 30365034.0","32102777.0, 33704069.0",NA,NA,conflicting record(s) to be removed,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +34244700,http://2dprots.ncbr.muni.cz,"2DProts: Database of Family-Wide Protein Secondary Structure Diagrams. . Secondary structures provide a deep insight into the protein architecture. They can serve for comparison between individual protein family members. The most straightforward way how to deal with protein secondary structure is its visualization using 2√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâD diagrams. Several software tools for the generation of 2√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâD diagrams were developed. Unfortunately, they create 2√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâD diagrams based on only a single protein. Therefore, 2√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâD diagrams of two proteins from one family markedly differ. For this reason, we developed the 2DProts database, which contains secondary structure 2√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâD diagrams for all domains from the CATH and all proteins from PDB databases. These 2√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâD diagrams are generated based on a whole protein family, and they also consider information about the 3√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâD arrangement of secondary structure elements. Moreover, 2DProts database contains multiple 2√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâD diagrams, which provide an overview of a whole protein family's secondary structures. 2DProts is updated weekly and is integrated into CATH. Freely accessible at https://2dprots.ncbr.muni.cz. The web interface was implemented in JavaScript. The database was implemented in Python. Supplementary data are available at Bioinformatics online.",2DProts,0.990597233,NA,0,2DProts,0.990597233,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/9/2021 +23203891,http://2p2idb.cnrs-mrs.fr,"2P2Idb: a structural database dedicated to orthosteric modulation of protein-protein interactions. Protein-protein interactions are considered as one of the next generation of therapeutic targets. Specific tools thus need to be developed to tackle this challenging chemical space. In an effort to derive some common principles from recent successes, we have built 2P2Idb (freely accessible at http://2p2idb.cnrs-mrs.fr), a hand-curated structural database dedicated to protein-protein interactions with known orthosteric modulators. It includes all interactions for which both the protein-protein and protein-ligand complexes have been structurally characterized. A web server provides links to related sites of interest, binding affinity data, pre-calculated structural information about protein-protein interfaces and 3D interactive views through java applets. Comparison of interfaces in 2P2Idb to those of representative datasets of heterodimeric complexes has led to the identification of geometrical parameters and residue properties to assess the druggability of protein-protein complexes. A tool is proposed to calculate a series of biophysical and geometrical parameters that characterize protein-protein interfaces. A large range of descriptors are computed including, buried accessible surface area, gap volume, non-bonded contacts, hydrogen-bonds, atom and residue composition, number of segments and secondary structure contribution. All together the 2P2I database represents a structural source of information for scientists from academic institutions or pharmaceutical industries.",2P2I,0.99723657,NA,0,2P2I,0.99723657,1,26980515,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/30/2012 +26980515,http://2p2idb.cnrs-mrs.fr,"2P2Idb v2: update of a structural database dedicated to orthosteric modulation of protein-protein interactions. . 2P2Idb is a hand-curated structural database dedicated to protein-protein interactions with known small molecule orthosteric modulators. It compiles the structural information related to orthosteric inhibitors and their target [i.e. related 3D structures available in the RCSB Protein Data Bank (PDB)] and provides links to other useful databases. 2P2Idb includes all interactions for which both the protein-protein and protein-inhibitor complexes have been structurally characterized. Since its first release in 2010, the database has grown constantly and the current version contains 27 protein-protein complexes and 274 protein-inhibitor complexes corresponding to 242 unique small molecule inhibitors which represent almost a 5-fold increase compared to the previous version. A number of new data have been added, including new protein-protein complexes, binding affinities, molecular descriptors, precalculated interface parameters and links to other webservers. A new query tool has been implemented to search for inhibitors within the database using standard molecular descriptors. A novel version of the 2P2I-inspector tool has been implemented to calculate a series of physical and chemical parameters of the protein interfaces. Several geometrical parameters including planarity, eccentricity and circularity have been added as well as customizable distance cutoffs. This tool has also been extended to protein-ligand interfaces. The 2P2I database thus represents a wealth of structural source of information for scientists interested in the properties of protein-protein interactions and the design of protein-protein interaction modulators. Database URL: http://2p2idb.cnrs-mrs.fr.",2P2Idb,0.997046232,NA,0,2P2Idb,0.997046232,1,23203891,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,3/15/2016 +27081154,http://3cdb.big.ac.cn,"3CDB: a manually curated database of chromosome conformation capture data. . Chromosome conformation capture (3C) is a biochemical technology to analyse contact frequencies between selected genomic sites in a cell population. Its recent genomic variants, e.g. Hi-C/ chromatin interaction analysis by paired-end tag (ChIA-PET), have enabled the study of nuclear organization at an unprecedented level. However, due to the inherent low resolution and ultrahigh cost of Hi-C/ChIA-PET, 3C is still the gold standard for determining interactions between given regulatory DNA elements, such as enhancers and promoters. Therefore, we developed a database of 3C determined functional chromatin interactions (3CDB;http://3cdb.big.ac.cn). To construct 3CDB, we searched PubMed and Google Scholar with carefully designed keyword combinations and retrieved more than 5000 articles from which we manually extracted 3319 interactions in 17 species. Moreover, we proposed a systematic evaluation scheme for data reliability and classified the interactions into four categories. Contact frequencies are not directly comparable as a result of various modified 3C protocols employed among laboratories. Our evaluation scheme provides a plausible solution to this long-standing problem in the field. A user-friendly web interface was designed to assist quick searches in 3CDB. We believe that 3CDB will provide fundamental information for experimental design and phylogenetic analysis, as well as bridge the gap between molecular and systems biologists who must now contend with noisy high-throughput data.Database URL:http://3cdb.big.ac.cn.",3CDB,0.995992641,NA,0,3CDB,0.995992641,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/14/2016 +27694207,http://nucleus3d.cent.uw.edu.pl/influenza,"3DFlu: database of sequence and structural variability of the influenza hemagglutinin at population scale. . The influenza virus type A (IVA) is an important pathogen which is able to cause annual epidemics and even pandemics. This fact is the consequence of the antigenic shifts and drifts capabilities of IVA, caused by the high mutation rate and the reassortment capabilities of the virus. The hemagglutinin (HA) protein constitutes the main IVA antigen and has a crucial role in the infection mechanism, being responsible for the recognition of host-specific sialic acid derivatives. Despite the relative abundance of HA sequence and serological studies, comparative structure-based analysis of HA are less investigated. The 3DFlu database contains well annotated HA representatives: 1192 models and 263 crystallographic structures. The relations between these proteins are defined using different metrics and are visualized as a network in the provided web interface. Moreover structural and sequence comparison of the proteins can be explored. Metadata information (e.g. protein identifier, IVA strain, year and location of infection) can enhance the exploration of the presented data. With our database researchers gain a useful tool for the exploration of high quality HA models, viewing and comparing changes in the HA viral subtypes at several information levels (sequence, structure, ESP). The complete and integrated view of those relations might be useful to determine the efficiency of transmission, pathogenicity and for the investigation of evolutionary tendencies of the influenza virus.Database URL: http://nucleus3d.cent.uw.edu.pl/influenza.",3DFlu,0.99639225,NA,0,3DFlu,0.99639225,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/2/2016 +24526713,http://3dgd.biosino.org,"The 3DGD: a database of genome 3D structure. Unlabelled The studies of chromatin 3D structure help us to understand its formation and function. Techniques combining chromosome conformation capture and next generation sequencing can capture chromatin structure information and has been applied to several different species and cell lines. We built 3DGD (3D Genome Database), a database that currently collected Hi-C data on four species, for easy accessing and visualization of chromatin 3D structure data. With the integration of other omics data such as genome-wide protein-DNA-binding data, this data source would be useful for researchers interested in chromatin structure and its biological functions. Availability and implementation The 3DGD v1.1, data browser, downloadable files and documentation are available at: http://3dgd.biosino.org/.",3DGD,0.996886671,Genome Database,0.968708754,3DGD,0.996886671,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/12/2014 +24081580,http://3did.irbbarcelona.org,"3did: a catalog of domain-based interactions of known three-dimensional structure. The database of 3D interacting domains (3did, available online for browsing and bulk download at http://3did.irbbarcelona.org) is a catalog of protein-protein interactions for which a high-resolution 3D structure is known. 3did collects and classifies all structural templates of domain-domain interactions in the Protein Data Bank, providing molecular details for such interactions. The current version also includes a pipeline for the discovery and annotation of novel domain-motif interactions. For every interaction, 3did identifies and groups different binding modes by clustering similar interfaces into 'interaction topologies'. By maintaining a constantly updated collection of domain-based structural interaction templates, 3did is a reference source of information for the structural characterization of protein interaction networks. 3did is updated every 6 months.",3did,0.997478525,interacting domains,0.710986495,3did,0.997478525,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/29/2013 +29106613,http://kobic.kr/3div,"3DIV: A 3D-genome Interaction Viewer and database. Three-dimensional (3D) chromatin structure is an emerging paradigm for understanding gene regulation mechanisms. Hi-C (high-throughput chromatin conformation capture), a method to detect long-range chromatin interactions, allows extensive genome-wide investigation of 3D chromatin structure. However, broad application of Hi-C data have been hindered by the level of complexity in processing Hi-C data and the large size of raw sequencing data. In order to overcome these limitations, we constructed a database named 3DIV (a 3D-genome Interaction Viewer and database) that provides a list of long-range chromatin interaction partners for the queried locus with genomic and epigenomic annotations. 3DIV is the first of its kind to collect all publicly available human Hi-C data to provide 66 billion uniformly processed raw Hi-C read pairs obtained from 80 different human cell/tissue types. In contrast to other databases, 3DIV uniquely provides normalized chromatin interaction frequencies against genomic distance dependent background signals and a dynamic browsing visualization tool for the listed interactions, which could greatly advance the interpretation of chromatin interactions. '3DIV' is available at http://kobic.kr/3div.",3DIV,0.995887399,3D-genome Interaction Viewer and database,0.817636555,3DIV,0.995887399,1,NA,33245777,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +33245777,http://3div.kr,"3DIV update for 2021: a comprehensive resource of 3D genome and 3D cancer genome. Three-dimensional (3D) genome organization is tightly coupled with gene regulation in various biological processes and diseases. In cancer, various types of large-scale genomic rearrangements can disrupt the 3D genome, leading to oncogenic gene expression. However, unraveling the pathogenicity of the 3D cancer genome remains a challenge since closer examinations have been greatly limited due to the lack of appropriate tools specialized for disorganized higher-order chromatin structure. Here, we updated a 3D-genome Interaction Viewer and database named 3DIV by uniformly processing √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº230 billion raw Hi-C reads to expand our contents to the 3D cancer genome. The updates of 3DIV are listed as follows:√ɬÉ√ǬÇ√ɬÇ√Ǭ†(i) the collection of 401 samples including 220 cancer cell line/tumor Hi-C data, 153 normal cell line/tissue Hi-C data, and 28 promoter capture Hi-C data, (ii) the live interactive manipulation of the 3D cancer genome to simulate the impact of structural variations and (iii) the reconstruction of Hi-C contact maps by user-defined chromosome order to investigate the 3D genome of the complex genomic rearrangement. In summary, the updated 3DIV will be the most comprehensive resource to explore the gene regulatory effects of both the normal and cancer 3D genome. '3DIV' is freely available at http://3div.kr.",3DIV,0.989706784,3D-genome Interaction Viewer and database,0.976927248,3DIV,0.989706784,1,NA,29106613,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +23293959,"http://www.genome.jp/kegg/compound/, http://www.3dmet.dna.affrc.go.jp","Three-dimensional structure database of natural metabolites (3DMET): a novel database of curated 3D structures. A database of 3D structures of natural metabolites has been developed called 3DMET. During the process of structure conversion from 2D to 3D, we found many structures were misconverted at chiral atoms and bonds. Several popular converters were tested in regard to their conversion accuracy. For verification, three canonical strings were also tested. No procedure could satisfactorily cover all the structures of the natural products. The misconverted structures had to be corrected manually. However, a nonnegligible number of mistakes were also observed even after manual curation, so a self-checking system was developed and introduced to our work flow. Thus, the 3D structures in our 3DMET database were evaluated in two steps: automatically and manually. The current version includes most of the natural products of the KEGG COMPOUND collection [ http://www.genome.jp/kegg/compound/ ] and is searchable by string, value range, and substructure. 3DMET can be accessed via http://www.3dmet.dna.affrc.go.jp/ , which also has detailed manuals.",3DMET,0.974991322,Three-dimensional structure database of natural metabolites,0.861961424,3DMET,0.974991322,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/7/2013 +27789693,http://biotech.bmi.ac.cn/3dsnp,"3DSNP: a database for linking human noncoding SNPs to their three-dimensional interacting genes. The vast noncoding portion of the human genome harbors a rich array of functional elements and disease-causing regulatory variants. Recent high-throughput chromosome conformation capture studies have outlined the principles of these elements interacting and regulating the expression of distal target genes through three-dimensional (3D) chromatin looping. Here we present 3DSNP, an integrated database for annotating human noncoding variants by exploring their roles in the distal interactions between genes and regulatory elements. 3DSNP integrates 3D chromatin interactions, local chromatin signatures in different cell types and linkage disequilibrium (LD) information from the 1000 Genomes Project. 3DSNP provides informative visualization tools to display the integrated local and 3D chromatin signatures and the genetic associations among variants. Data from different functional categories are integrated in a scoring system that quantitatively measures the functionality of SNPs to help select important variants from a large pool. 3DSNP is a valuable resource for the annotation of human noncoding genome sequence and investigating the impact of noncoding variants on clinical phenotypes. The 3DSNP database is available at http://biotech.bmi.ac.cn/3dsnp/.",3DSNP,0.99662596,NA,0,3DSNP,0.99662596,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/26/2016 +24700812,http://caps.ncbs.res.in/3pfdbplus,"3PFDB+: improved search protocol and update for the identification of representatives of protein sequence domain families. Protein domain families are usually classified on the basis of similarity of amino acid sequences. Selection of a single representative sequence for each family provides targets for structure determination or modeling and also enables fast sequence searches to associate new members to a family. Such a selection could be challenging since some of these domain families exhibit huge variation depending on the number of members in the family, the average family sequence length or the extent of sequence divergence within a family. We had earlier created 3PFDB database as a repository of best representative sequences, selected from each PFAM domain family on the basis of high coverage. In this study, we have improved the database using more efficient strategies for the initial generation of sequence profiles and implement two independent methods, FASSM and HMMER, for identifying family members. HMMER employs a global sequence similarity search, while FASSM relies on motif identification and matching. This improved and updated database, 3PFDB+ generated in this study, provides representative sequences and profiles for PFAM families, with 13 519 family representatives having more than 90% family coverage. The representative sequence is also highlighted in a two-dimensional plot, which reflects the relative divergence between family members. Representatives belonging to small families with short sequences are mainly associated with low coverage. The set of sequences not recognized by the family representative profiles, highlight several potential false or weak family associations in PFAM. Partial domains and fragments dominate such cases, along with sequences that are highly diverged or different from other family members. Some of these outliers were also predicted to have different secondary structure contents, which reflect different putative structure or functional roles for these domain sequences. Database URL: http://caps.ncbs.res.in/3pfdbplus/.",3PFDB,0.971633414,NA,0,3PFDB,0.971633414,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,4/3/2014 +31240104,http://jmorp.megabank.tohoku.ac.jp,"3.5KJPNv2: an allele frequency panel of 3552 Japanese individuals including the X chromosome. The first step towards realizing personalized healthcare is to catalog the genetic variations in a population. Since the dissemination of individual-level genomic information is strictly controlled, it will be useful to construct population-level allele frequency panels with easy-to-use interfaces. In the Tohoku Medical Megabank Project, we sequenced nearly 4000 individuals from a Japanese population and constructed an allele frequency panel of 3552 individuals after removing related samples. The panel is called the 3.5KJPNv2. It was constructed by using a standard pipeline including the 1KGP and gnomAD algorithms to reduce technical biases and to allow comparisons to other populations. Our database is the first large-scale panel providing the frequencies of variants present on the X chromosome and on the mitochondria in the Japanese population. All the data are available on our original database at https://jmorp.megabank.tohoku.ac.jp.",5KJPNv2,0.771728354,NA,0,5KJPNv2,0.771728354,1,"29069501.0, 33179747.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,6/18/2019 +26490961,http://combio.pl/5srnadb,"5SRNAdb: an information resource for 5S ribosomal RNAs. Ribosomal 5S RNA (5S rRNA) is the ubiquitous RNA component found in the large subunit of ribosomes in all known organisms. Due to its small size, abundance and evolutionary conservation 5S rRNA for many years now is used as a model molecule in studies on RNA structure, RNA-protein interactions and molecular phylogeny. 5SRNAdb (http://combio.pl/5srnadb/) is the first database that provides a high quality reference set of ribosomal 5S RNAs (5S rRNA) across three domains of life. Here, we give an overview of new developments in the database and associated web tools since 2002, including updates to database content, curation processes and user web interfaces.",5SRNAdb,0.94896158,NA,0,5SRNAdb,0.94896158,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/20/2015 +25465051,http://bioinf.mind.meiji.ac.jp/a-wings,"A-WINGS: an integrated genome database for Pleurocybella porrigens (Angel's wing oyster mushroom, Sugihiratake). Background The angel's wing oyster mushroom (Pleurocybella porrigens, Sugihiratake) is a well-known delicacy. However, its potential risk in acute encephalopathy was recently revealed by a food poisoning incident. To disclose the genes underlying the accident and provide mechanistic insight, we seek to develop an information infrastructure containing omics data. In our previous work, we sequenced the genome and transcriptome using next-generation sequencing techniques. The next step in achieving our goal is to develop a web database to facilitate the efficient mining of large-scale omics data and identification of genes specifically expressed in the mushroom. Findings This paper introduces a web database A-WINGS (http://bioinf.mind.meiji.ac.jp/a-wings/) that provides integrated genomic and transcriptomic information for the angel's wing oyster mushroom. The database contains structure and functional annotations of transcripts and gene expressions. Functional annotations contain information on homologous sequences from NCBI nr and UniProt, Gene Ontology, and KEGG Orthology. Digital gene expression profiles were derived from RNA sequencing (RNA-seq) analysis in the fruiting bodies and mycelia. The omics information stored in the database is freely accessible through interactive and graphical interfaces by search functions that include 'GO TREE VIEW' browsing, keyword searches, and BLAST searches. Conclusions The A-WINGS database will accelerate omics studies on specific aspects of the angel's wing oyster mushroom and the family Tricholomataceae.",A-WINGS,0.987336159,NA,0,A-WINGS,0.987336159,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/3/2014 +33169878,http://apeswiki.eva.mpg.de,"Open-access platform to synthesize knowledge of ape conservation across sites. Despite the large body of literature on ape conservation, much of the data needed for evidence-based conservation decision-making is still not readily accessible and standardized, rendering cross-site comparison difficult. To support knowledge synthesis and to complement the IUCN SSC Ape Populations, Environments and Surveys database, we created the A.P.E.S. Wiki (https://apeswiki.eva.mpg.de), an open-access platform providing site-level information on ape conservation status and context. The aim of this Wiki is to provide information and data about geographical ape locations, to curate information on individuals and organizations active in ape research and conservation, and to act as a tool to support collaboration between conservation practitioners, scientists, and other stakeholders. To illustrate the process and benefits of knowledge synthesis, we used the momentum of the update of the conservation action plan for western chimpanzees (Pan troglodytes verus) and began with this critically endangered taxon. First, we gathered information on 59 sites in West Africa from scientific publications, reports, and online sources. Information was compiled in a standardized format and can thus be summarized using a web scraping approach. We then asked experts working at those sites to review and complement the information (20 sites have been reviewed to date). We demonstrate the utility of the information available through the Wiki, for example, for studying species distribution. Importantly, as an open-access platform and based on the well-known wiki layout, the A.P.E.S. Wiki can contribute to direct and interactive information sharing and promote the efforts invested by the ape research and conservation community. The Section on Great Apes and the Section on Small Apes of the IUCN SSC Primate Specialist Group will guide and support the expansion of the platform to all small and great ape taxa. Similar collaborative efforts can contribute to extending knowledge synthesis to all nonhuman primate species.",A.P.E.S,0.811360155,NA,0,A.P.E.S,0.811360155,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/10/2020 +28779078,http://www.iictindia.org/A2MDB,"Aspergillus Secondary Metabolite Database, a resource to understand the Secondary metabolome of Aspergillus genus. Aspergillus is a genus of ubiquitous fungi that are pathologically & therapeutically important. Aspergillus Secondary Metabolites Database (A2MDB) is a curated compendium of information on Aspergillus & its secondary metabolome. A2MDB catalogs 807 unique non-redundantsecondary metabolites√ɬÉ√ǬÇ√ɬÇ√Ǭ†derived from 675 Aspergillus species. A2MDB has a compilation of 100 cellular targets of secondary metabolites, 44 secondary metabolic pathways, 150 electron and light microscopy images of various Aspergillus species. A phylogenetic representation of over 2500 strains has been provided. A2MDB presents a detailed chemical information of secondary metabolites and their mycotoxins. Molecular docking models of metabolite-target protein interactions have been put together. A2MDB also has epidemiological data representing Aspergillosis and global occurrence of Aspergillus species. Furthermore a novel classification of Aspergillosis along with 370 case reports with images, were made available. For each metabolite catalogued, external links to related databases have been provided. All this data is available on A2MDB, launched through Indian Institute of Chemical Technology, Hyderabad, India, as an open resource http://www.iictindia.org/A2MDB . We believe A2MDB is of practical relevance to the scientific community that is in pursuit of novel therapeutics.",A2MDB,0.993841752,Aspergillus Secondary Metabolites Database,0.987300144,A2MDB,0.993841752,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/4/2017 +"27924021, 32162267",http://biokb.ncpsb.org/aagatlas,"AAgAtlas 1.0: a human autoantigen database. Autoantibodies refer to antibodies that target self-antigens, which can play pivotal roles in maintaining homeostasis, distinguishing normal from tumor tissue and trigger autoimmune diseases. In the last three decades, tremendous efforts have been devoted to elucidate the generation, evolution and functions of autoantibodies, as well as their target autoantigens. However, reports of these countless previously identified autoantigens are randomly dispersed in the literature. Here, we constructed an AAgAtlas database 1.0 using text-mining and manual curation. We extracted 45 830 autoantigen-related abstracts and 94 313 sentences from PubMed using the keywords of either 'autoantigen' or 'autoantibody' or their lexical variants, which were further refined to 25 520 abstracts, 43 253 sentences and 3984 candidates by our bio-entity recognizer based on the Protein Ontology. Finally, we identified 1126 genes as human autoantigens and 1071 related human diseases, with which we constructed a human autoantigen database (AAgAtlas database 1.0). The database provides a user-friendly interface to conveniently browse, retrieve and download human autoantigens as well as their associated diseases. The database is freely accessible at http://biokb.ncpsb.org/aagatlas/ We believe this database will be a valuable resource to track and understand human autoantigens as well as to investigate their functions in basic and translational research.",AAgAtlas,0.991741061,NA,0,AAgAtlas,0.991741061,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +28977551,http://bioinfo.wilmer.jhu.edu/AAgMarker,"AAgMarker 1.0: a resource of serological autoantigen biomarkers for clinical diagnosis and prognosis of various human diseases. Autoantibodies are produced to target an individual's own antigens (e.g. proteins). They can trigger autoimmune responses and inflammation, and thus, cause many types of diseases. Many high-throughput autoantibody profiling projects have been reported for unbiased identification of serological autoantigen-based biomarkers. However, a lack of centralized data portal for these published assays has been a major obstacle to further data mining and cross-evaluate the quality of these datasets generated from different diseases. Here, we introduce a user-friendly database, AAgMarker 1.0, which collects many published raw datasets obtained from serum profiling assays on the proteome microarrays, and provides a toolbox for mining these data. The current version of AAgMarker 1.0 contains 854 serum samples, involving 136 092 proteins. A total of 7803 (4470 non-redundant) candidate autoantigen biomarkers were identified and collected for 12 diseases, such as Alzheimer's disease, Bechet's disease and Parkinson's disease. Seven statistical parameters are introduced to quantitatively assess these biomarkers. Users can retrieve, analyse and compare the datasets through basic search, advanced search and browse. These biomarkers are also downloadable by disease terms. The AAgMarker 1.0 is now freely accessible at http://bioinfo.wilmer.jhu.edu/AAgMarker/. We believe this database will be a valuable resource for the community of both biomedical and clinical research.",AAgMarker,0.988109291,NA,0,AAgMarker,0.988109291,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24888382,http://aair.cimed.ike.liu.se,"The Allergic Airway Inflammation Repository--a user-friendly, curated resource of mRNA expression levels in studies of allergic airways. Public microarray databases allow analysis of expression levels of candidate genes in different contexts. However, finding relevant microarray data is complicated by the large number of available studies. We have compiled a user-friendly, open-access database of mRNA microarray experiments relevant to allergic airway inflammation, the Allergic Airway Inflammation Repository (AAIR, http://aair.cimed.ike.liu.se/). The aim is to allow allergy researchers to determine the expression profile of their genes of interest in multiple clinical data sets and several experimental systems quickly and intuitively. AAIR also provides quick links to other relevant information such as experimental protocols, related literature and raw data files.",AAIR,0.995133579,Allergic Airway Inflammation Repository,0.976998336,AAIR,0.995133579,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/3/2014 +27242034,http://abasy.ccg.unam.mx,"Abasy Atlas: a comprehensive inventory of systems, global network properties and systems-level elements across bacteria. . The availability of databases electronically encoding curated regulatory networks and of high-throughput technologies and methods to discover regulatory interactions provides an invaluable source of data to understand the principles underpinning the organization and evolution of these networks responsible for cellular regulation. Nevertheless, data on these sources never goes beyond the regulon level despite the fact that regulatory networks are complex hierarchical-modular structures still challenging our understanding. This brings the necessity for an inventory of systems across a large range of organisms, a key step to rendering feasible comparative systems biology approaches. In this work, we take the first step towards a global understanding of the regulatory networks organization by making a cartography of the functional architectures of diverse bacteria. Abasy ( A: cross- BA: cteria SY: stems) Atlas provides a comprehensive inventory of annotated functional systems, global network properties and systems-level elements (global regulators, modular genes shaping functional systems, basal machinery genes and intermodular genes) predicted by the natural decomposition approach for reconstructed and meta-curated regulatory networks across a large range of bacteria, including pathogenically and biotechnologically relevant organisms. The meta-curation of regulatory datasets provides the most complete and reliable set of regulatory interactions currently available, which can even be projected into subsets by considering the force or weight of evidence supporting them or the systems that they belong to. Besides, Abasy Atlas provides data enabling large-scale comparative systems biology studies aimed at understanding the common principles and particular lifestyle adaptions of systems across bacteria. Abasy Atlas contains systems and system-level elements for 50 regulatory networks comprising 78 649 regulatory interactions covering 42 bacteria in nine taxa, containing 3708 regulons and 1776 systems. All this brings together a large corpus of data that will surely inspire studies to generate hypothesis regarding the principles governing the evolution and organization of systems and the functional architectures controlling them.Database URL: http://abasy.ccg.unam.mx.",Abasy,0.987492204,NA,0,Abasy,0.987492204,1,32542109,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,5/30/2016 +32542109,http://abasy.ccg.unam.mx,"Abasy Atlas v2.2: The most comprehensive and up-to-date inventory of meta-curated, historical, bacterial regulatory networks, their completeness and system-level characterization. Some organism-specific databases about regulation in bacteria have become larger, accelerated by high-throughput methodologies, while others are no longer updated or accessible. Each database homogenize its datasets, giving rise to heterogeneity across databases. Such heterogeneity mainly encompasses different names for a gene and different network representations, generating duplicated interactions that could bias network analyses. Abasy (Across-bacteria systems) Atlas consolidates information from different sources into meta-curated regulatory networks in bacteria. The high-quality networks in Abasy Atlas enable cross-organisms analyses, such as benchmarking studies where gold standards are required. Nevertheless, network incompleteness still casts doubts on the conclusions of network analyses, and available sampling methods cannot reflect the curation process. To tackle this problem, the updated version of Abasy Atlas presented in this work provides historical snapshots of regulatory networks. Thus, network analyses can be performed at different completeness levels, making possible to identify potential bias and to predict future results. We leverage the recently found constraint in the complexity of regulatory networks to develop a novel model to quantify the total number of regulatory interactions as a function of the genome size. This completeness estimation is a valuable insight that may aid in the daunting task of network curation, prediction, and validation. The new version of Abasy Atlas provides 76 networks (204,282 regulatory interactions) covering 42 bacteria (64% Gram-positive and 36% Gram-negative) distributed in 9 species (Mycobacterium tuberculosis, Bacillus subtilis, Escherichia coli, Corynebacterium glutamicum, Staphylococcus aureus, Pseudomonas aeruginosa, Streptococcus pyogenes, Streptococcus pneumoniae, and Streptomyces coelicolor), containing 8459 regulons and 4335 modules. Database URL: https://abasy.ccg.unam.mx/.",Abasy Atlas,0.890438616,acteria systems,0.613150299,Abasy Atlas,0.890438616,1,27242034,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,5/16/2020 +31501752,http://www.bioinfoindia.org/abcd,"ABCD: Alzheimer's disease Biomarkers Comprehensive Database. Alzheimer's disease (AD) is an age-related, non-reversible, and progressive brain disorder. Memory loss, confusion, and personality changes are major symptoms noticed. AD ultimately leads to a severe loss of mental function. Due to lack of effective biomarkers, no effective medication was available for the complete treatment of AD. There is a need to provide all AD-related essential information to the scientific community. Our resource Alzheimer's disease Biomarkers Comprehensive Database (ABCD) is being planned to accomplish this objective. ABCD is a huge collection of AD-related data of molecular markers. The web interface contains information concerning the proteins, genes, transcription factors, SNPs, miRNAs, mitochondrial genes, and expressed genes implicated in AD pathogenesis. In addition to the molecular-level data, the database has information for animal models, medicinal candidates and pathways involved in the AD and some image data for AD patients. ABCD is coupled with some major external resources where the user can retrieve additional general information about the disease. The database was designed in such a manner that user can extract meaningful information about gene, protein, pathway, and regulatory elements based search options. This database is unique in the sense that it is completely dedicated to specific neurological disorder i.e. AD. Further advance options like AD-affected brain image data of patients and structural compound level information add values to our database. Features of this database enable users to extract, analyze and display information related to a disease in many different ways. The database is available for academic purpose and accessible at http://www.bioinfoindia.org/abcd.",ABCD,0.989302754,Alzheimer's disease Biomarkers Comprehensive Database,0.986439314,ABCD,0.989302754,1,NA,31410491,NA,NA,NA,do not merge,NA,NA,NA,9/3/2019 +31410491,http://web.expasy.org/abcd,"The ABCD database: a repository for chemically defined antibodies. The ABCD (for AntiBodies Chemically Defined) database is a repository of sequenced antibodies, integrating curated information about the antibody and its antigen with cross-links to standardized databases of chemical and protein entities. It is freely available to the academic community, accessible through the ExPASy server (https://web.expasy.org/abcd/). The ABCD database aims at helping to improve reproducibility in academic research by providing a unique, unambiguous identifier associated to each antibody sequence. It also allows to determine rapidly if a sequenced antibody is available for a given antigen.",ABCD,0.981157601,for AntiBodies Chemically Defined,0.948119296,ABCD,0.981157601,1,NA,31501752,NA,NA,NA,do not merge,NA,NA,NA,1/1/2020 +28365738,http://abcm2.hegelab.org,"ABCMdb reloaded: updates on mutations in ATP binding cassette proteins. . ABC (ATP-Binding Cassette) proteins with altered function are responsible for numerous human diseases. To aid the selection of positions and amino acids for ABC structure/function studies we have generated a database, ABCMdb (Gyimesi et al. , ABCMdb: a database for the comparative analysis of protein mutations in ABC transporters, and a potential framework for a general application. Hum Mutat 2012; 33:1547-1556.), with interactive tools. The database has been populated with mentions of mutations extracted from full text papers, alignments and structural models. In the new version of the database we aimed to collect the effect of mutations from databases including ClinVar. Because of the low number of available data, even in the case of the widely studied disease-causing ABC proteins, we also included the possible effects of mutations based on SNAP2 and PROVEAN predictions. To aid the interpretation of variations in non-coding regions, the database was supplemented with related DNA level information. Our results emphasize the importance of in silico predictions because of the sparse information available on variants and suggest that mutations at analogous positions in homologous ABC proteins have a strong predictive power for the effects of mutations. Our improved ABCMdb advances the design of both experimental studies and meta-analyses in order to understand drug interactions of ABC proteins and the effects of mutations on functional expression. http://abcm2.hegelab.org.",ABCMdb,0.99738276,NA,0,ABCMdb,0.99738276,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +26339155,http://nabic.rda.go.kr/SNP,"NABIC SNP: an integrated database for SNP markers. UNLABELLED:The National Agricultural Biotechnology Information Center (NABIC) constructed a web-based database to provide information about 54,310 single nucleotide polymorphisms (SNPs) identified in the seven species in a high-throughput manner. The database consists of three major functional categories: SNP marker search, detailed information viewer and download of SNP sequence. The SNP annotation table provides detailed information such as ownership information, basic information, bio-entry information, reference, comments, features, and sequence data. AVAILABILITY:The database is available online for free at http://nabic.rda.go.kr/SNP.",ABIC,0.657440186,NA,0,ABIC,0.657440186,1,NA,24250118,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,7/31/2015 +24250118,http://nabic.rda.go.kr/gere/rice/molecularMarkers,"NABIC marker database: A molecular markers information network of agricultural crops. Unlabelled In 2013, National Agricultural Biotechnology Information Center (NABIC) reconstructs a molecular marker database for useful genetic resources. The web-based marker database consists of three major functional categories: map viewer, RSN marker and gene annotation. It provides 7250 marker locations, 3301 RSN marker property, 3280 molecular marker annotation information in agricultural plants. The individual molecular marker provides information such as marker name, expressed sequence tag number, gene definition and general marker information. This updated marker-based database provides useful information through a user-friendly web interface that assisted in tracing any new structures of the chromosomes and gene positional functions using specific molecular markers. Availability The database is available for free at http://nabic.rda.go.kr/gere/rice/molecularMarkers/",ABIC,0.63711524,NA,0,ABIC,0.63711524,1,NA,26339155,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,10/16/2013 +29156005,http://bioinfo.imtech.res.in/manojk/abiofilm,"aBiofilm: a resource of anti-biofilm agents and their potential implications in targeting antibiotic drug resistance. Biofilms play an important role in the antibiotic drug resistance, which is threatening public health globally. Almost, all microbes mimic multicellular lifestyle to form biofilm by undergoing phenotypic changes to adapt adverse environmental conditions. Many anti-biofilm agents have been experimentally validated to disrupt the biofilms during last three decades. To organize this data, we developed the 'aBiofilm' resource (http://bioinfo.imtech.res.in/manojk/abiofilm/) that harbors a database, a predictor, and the data visualization modules. The database contains biological, chemical, and structural details of 5027 anti-biofilm agents (1720 unique) reported from 1988-2017. These agents target over 140 organisms including Gram-negative, Gram-positive bacteria, and fungus. They are mainly chemicals, peptides, phages, secondary metabolites, antibodies, nanoparticles and extracts. They show the diverse mode of actions by attacking mainly signaling molecules, biofilm matrix, genes, extracellular polymeric substances, and many more. The QSAR based predictor identifies the anti-biofilm potential of an unknown chemical with an accuracy of √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº80.00%. The data visualization section summarized the biofilm stages targeted (Circos plot); interaction maps (Cytoscape) and chemicals diversification (CheS-Mapper) of the agents. This comprehensive platform would help the researchers to understand the multilevel communication in the microbial consortium. It may aid in developing anti-biofilm therapeutics to deal with antibiotic drug resistance menace.",aBiofilm,0.9043421,NA,0,aBiofilm,0.9043421,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +31832668,http://acetobase.molbio.slu.se,"AcetoBase: a functional gene repository and database for formyltetrahydrofolate synthetase sequences. . Acetogenic bacteria are imperative to environmental carbon cycling and diverse biotechnological applications, but their extensive physiological and taxonomical diversity is an impediment to systematic taxonomic studies. Acetogens are chemolithoautotrophic bacteria that perform reductive carbon fixation under anaerobic conditions through the Wood-Ljungdahl pathway (WLP)/acetyl-coenzyme A pathway. The gene-encoding formyltetrahydrofolate synthetase (FTHFS), a key enzyme of this pathway, is highly conserved and can be used as a molecular marker to probe acetogenic communities. However, there is a lack of systematic collection of FTHFS sequence data at nucleotide and protein levels. In an attempt to streamline investigations on acetogens, we developed AcetoBase - a repository and database for systematically collecting and organizing information related to FTHFS sequences. AcetoBase also provides an opportunity to submit data and obtain accession numbers, perform homology searches for sequence identification and access a customized blast database of submitted sequences. AcetoBase provides the prospect to identify potential acetogenic bacteria, based on metadata information related to genome content and the WLP, supplemented with FTHFS sequence accessions, and can be an important tool in the study of acetogenic communities. AcetoBase can be publicly accessed at https://acetobase.molbio.slu.se.",AcetoBase,0.99637115,NA,0,AcetoBase,0.99637115,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +32702093,"http://AciDB.cl, http://gitlab.com/Hawkline451/acidb","AciDB 1.0: a database of acidophilic organisms, their genomic information and associated metadata. Motivation There are about 600 available genome sequences of acidophilic organisms (grow at a pH√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ<√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ5) from the three domains of the Tree of Life. Information about acidophiles is scattered over many heterogeneous sites making it extraordinarily difficult to link physiological traits with genomic data. We were motivated to generate a curated, searchable database to address this problem. Results AciDB 1.0 is a curated database of sequenced acidophiles that enables researchers to execute complex queries linking genomic features to growth data, environmental descriptions and taxonomic information. Availability and implementation AciDB 1.0 is freely available online at: http://AciDB.cl. The source code is released under an MIT license at: https://gitlab.com/Hawkline451/acidb/.",AciDB,0.99098736,NA,0,AciDB,0.99098736,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +34497528,http://www.acnpd-fu.com,"ACNPD: The Database for Elucidating the Relationships Between Natural Products, Compounds, Molecular Mechanisms, and Cancer Types. Objectives: Cancer is well-known as a collection of diseases of uncontrolled proliferation of cells caused by mutated genes which are generated by external or internal factors. As the mechanisms of cancer have been constantly revealed, including cell cycle, proliferation, apoptosis and so on, a series of new emerging anti-cancer drugs acting on each stage have also been developed. It is worth noting that natural products are one of the important sources for the development of anti-cancer drugs. To the best of our knowledge, there is not any database summarizing the relationships between natural products, compounds, molecular mechanisms, and cancer types. Materials and methods: Based upon published literatures and other sources, we have constructed an anti-cancer natural product database (ACNPD) (http://www.acnpd-fu.com/). The database currently contains 521 compounds, which specifically refer to natural compounds derived from traditional Chinese medicine plants (derivatives are not considered herein). And, it includes 1,593 molecular mechanisms/signaling pathways, covering 10 common cancer types, such as breast cancer, lung cancer and cervical cancer. Results: Integrating existing data sources, we have obtained a large amount of information on natural anti-cancer products, including herbal sources, regulatory targets and signaling pathways. ACNPD is a valuable online resource that illustrates the complex pharmacological relationship between natural products and human cancers. Conclusion: In summary, ACNPD is crucial for better understanding of the relationships between traditional Chinese medicine (TCM) and cancer, which is not only conducive to expand the influence of TCM, but help to find more new anti-cancer drugs in the future.",ACNPD,0.9954561,anti-cancer natural product database,0.887865017,ACNPD,0.9954561,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/23/2021 +25229122,http://www.ats.amherst.edu/protein,"A comprehensive database of verified experimental data on protein folding kinetics. Insights into protein folding rely increasingly on the synergy between experimental and theoretical approaches. Developing successful computational models requires access to experimental data of sufficient quantity and high quality. We compiled folding rate constants for what initially appeared to be 184 proteins from 15 published collections/web databases. To generate the highest confidence in the dataset, we verified the reported lnkf value and exact experimental construct and conditions from the original experimental report(s). The resulting comprehensive database of 126 verified entries, ACPro, will serve as a freely accessible resource (https://www.ats.amherst.edu/protein/) for the protein folding community to enable confident testing of predictive models. In addition, we provide a streamlined submission form for researchers to add new folding kinetics results, requiring specification of all the relevant experimental information according to the standards proposed in 2005 by the protein folding consortium organized by Plaxco. As the number and diversity of proteins whose folding kinetics are studied expands, our curated database will enable efficient and confident incorporation of new experimental results into a standardized collection. This database will support a more robust symbiosis between experiment and theory, leading ultimately to more rapid and accurate insights into protein folding, stability, and dynamics.",ACPro,0.962495983,NA,0,ACPro,0.962495983,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/14/2014 +33068435,http://bcb.unl.edu/AcrDB,"AcrDB: a database of anti-CRISPR operons in prokaryotes and viruses. CRISPR-Cas is an anti-viral mechanism of prokaryotes that has been widely adopted for genome editing. To make CRISPR-Cas genome editing more controllable and safer to use, anti-CRISPR proteins have been recently exploited to prevent excessive/prolonged Cas nuclease cleavage. Anti-CRISPR (Acr) proteins are encoded by (pro)phages/(pro)viruses, and have the ability to inhibit their host's CRISPR-Cas systems. We have built an online database AcrDB (http://bcb.unl.edu/AcrDB) by scanning √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº19 000 genomes of prokaryotes and viruses with AcrFinder, a recently developed Acr-Aca (Acr-associated regulator) operon prediction program. Proteins in Acr-Aca operons were further processed by two machine learning-based programs (AcRanker and PaCRISPR) to obtain numerical scores/ranks. Compared to other anti-CRISPR databases, AcrDB has the following unique features: (i) It is a genome-scale database with the largest collection of data (39 799 Acr-Aca operons containing Aca or Acr homologs); (ii) It offers a user-friendly web interface with various functions for browsing, graphically viewing, searching, and batch downloading Acr-Aca operons; (iii) It focuses on the genomic context of Acr and Aca candidates instead of individual Acr protein family and (iv) It collects data with three independent programs each having a unique data mining algorithm for cross validation. AcrDB will be a valuable resource to the anti-CRISPR research community.",AcrDB,0.997832954,NA,0,AcrDB,0.997832954,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +33137193,http://pacrispr.erc.monash.edu/AcrHub,"AcrHub: an integrative hub for investigating, predicting and mapping anti-CRISPR proteins. Anti-CRISPR (Acr) proteins naturally inhibit CRISPR-Cas adaptive immune systems across bacterial and archaeal domains of life. This emerging field has caused a paradigm shift in the way we think about the CRISPR-Cas system, and promises a number of useful applications from gene editing to phage therapy. As the number of verified and predicted Acrs rapidly expands, few online resources have been developed to deal with this wealth of information. To overcome this shortcoming, we developed AcrHub, an integrative database to provide an all-in-one solution for investigating, predicting and mapping Acr proteins. AcrHub catalogs 339 non-redundant experimentally validated Acrs and over 70 000 predicted Acrs extracted from genome sequence data from a diverse range of prokaryotic organisms and their viruses. It integrates state-of-the-art predictors to predict potential Acrs, and incorporates three analytical modules: similarity analysis, phylogenetic analysis and homology network analysis, to analyze their relationships with known Acrs. By interconnecting all modules as a platform, AcrHub presents enriched and in-depth analysis of known and potential Acrs and therefore provides new and exciting insights into the future of Acr discovery and validation. AcrHub is freely available at http://pacrispr.erc.monash.edu/AcrHub/.",AcrHub,0.99744314,NA,0,AcrHub,0.99744314,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +20949389,http://acsr.ucsf.edu,"The AIDS and Cancer Specimen Resource. The AIDS and Cancer Specimen Resource (ACSR) is a cooperative agreement among the United States National Cancer Institute (NCI) (Office of the Director, Office of HIV and AIDS Malignancy (OHAM)) and regional US consortia, University of California, San Francisco (West Coast), George Washington University (East Coast), and The Ohio State University (Mid-Region). The ACSR's main objective is to collect, preserve, and disperse HIV-related tissues and biologic fluids along with clinical data to qualified investigators with a focus on HIV/AIDS-related malignancies. The ACSR biorepository has more than 265,000 human HIV-positive and control samples available from 39 processing types, 16 specimen types, and 52 anatomical site types. These HIV-infected biological fluids and tissues are made available to funded approved investigators at no fee. Technical support such as HIV DNA identification in tissues and tissue microarray (TMA) blocks are available to assist approved investigators. Research needs may be filled through ACSR cooperative arrangements when not met by currently banked material. Those participating with the ACSR are expected to share their research findings with the scientific community. Some 117 abstract/poster and podium reports at national and international scientific meetings and 94 publications have been contributed to the scientific literature (as of 2010). Investigators can browse the ACSR Internet site at http://acsr.ucsf.edu for biospecimens to support their scientific initiatives, including basic, translational, biomarker discovery, and molecular epidemiology studies.",ACSR,0.963491698,AIDS and Cancer Specimen Resource,0.931551437,ACSR,0.963491698,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2011 +22829726,http://www.actinobase.in,"Actinobase: Database on molecular diversity, phylogeny and biocatalytic potential of salt tolerant alkaliphilic actinomycetes. Unlabelled Actinobase is a relational database of molecular diversity, phylogeny and biocatalytic potential of haloalkaliphilic actinomycetes. The main objective of this data base is to provide easy access to range of information, data storage, comparison and analysis apart from reduced data redundancy, data entry, storage, retrieval costs and improve data security. Information related to habitat, cell morphology, Gram reaction, biochemical characterization and molecular features would allow researchers in understanding identification and stress adaptation of the existing and new candidates belonging to salt tolerant alkaliphilic actinomycetes. The PHP front end helps to add nucleotides and protein sequence of reported entries which directly help researchers to obtain the required details. Analysis of the genus wise status of the salt tolerant alkaliphilic actinomycetes indicated 6 different genera among the 40 classified entries of the salt tolerant alkaliphilic actinomycetes. The results represented wide spread occurrence of salt tolerant alkaliphilic actinomycetes belonging to diverse taxonomic positions. Entries and information related to actinomycetes in the database are publicly accessible at http://www.actinobase.in. On clustalW/X multiple sequence alignment of the alkaline protease gene sequences, different clusters emerged among the groups. The narrow search and limit options of the constructed database provided comparable information. The user friendly access to PHP front end facilitates would facilitate addition of sequences of reported entries. Availability The database is available for free at http://www.actinobase.in.",Actinobase,0.993042409,NA,0,Actinobase,0.993042409,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/16/2012 +29126202,http://www.ActiveDriverDB.org,"ActiveDriverDB: human disease mutations and genome variation in post-translational modification sites of proteins. Interpretation of genetic variation is needed for deciphering genotype-phenotype associations, mechanisms of inherited disease, and cancer driver mutations. Millions of single nucleotide variants (SNVs) in human genomes are known and thousands are associated with disease. An estimated 21% of disease-associated amino acid substitutions corresponding to missense SNVs are located in protein sites of post-translational modifications (PTMs), chemical modifications of amino acids that extend protein function. ActiveDriverDB is a comprehensive human proteo-genomics database that annotates disease mutations and population variants through the lens of PTMs. We integrated >385,000 published PTM sites with √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº3.6 million substitutions from The Cancer Genome Atlas (TCGA), the ClinVar database of disease genes, and human genome sequencing projects. The database includes site-specific interaction networks of proteins, upstream enzymes such as kinases, and drugs targeting these enzymes. We also predicted network-rewiring impact of mutations by analyzing gains and losses of kinase-bound sequence motifs. ActiveDriverDB provides detailed visualization, filtering, browsing and searching options for studying PTM-associated mutations. Users can upload mutation datasets interactively and use our application programming interface in pipelines. Integrative analysis of mutations and PTMs may help decipher molecular mechanisms of phenotypes and disease, as exemplified by case studies of TP53, BRCA2 and VHL. The open-source database is available at https://www.ActiveDriverDB.org.",ActiveDriverDB,0.994272113,NA,0,ActiveDriverDB,0.994272113,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +29956270,http://www.molgen.vib-ua.be/FTDmutations,"Data Mining: Applying the AD&FTD Mutation Database to Progranulin. The online AD&FTD Mutation Database ( http://www.molgen.vib-ua.be/FTDmutations ) was conceived to meet the needs of a comprehensive knowledge base of genetic variations in genes associated with monogenic forms of Alzheimer's disease (AD) and frontotemporal dementia (FTD). Today, the AD&FTD Mutation Database provides curated, referenced information of 764 genetic variants in APP, PSEN1, and PSEN2 associated with AD and GRN, C9orf72, TBK1, MAPT, VCP, CHMP2B, TARDBP, and FUS associated with FTD and related diseases. In addition, the database stores demographic and clinicogenetic data of 1646 dementia families associated with these mutations. In FTD, the granulin (GRN) gene has the highest number of different mutations (79/231√ɬÉ√ǬÇ√ɬÇ√Ǭ†=√ɬÉ√ǬÇ√ɬÇ√Ǭ†34%) and the second highest number of associated FTD families after C9orf72. In addition to the detailed mutation and patient information, summary reports in tabular and graphical formats can be consulted. Further, all variants can be uploaded to the human genome browser for custom-designed analyses.",AD&FTD,0.950616956,NA,0,AD&FTD,0.950616956,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +25392412,http://www.addgene.org,"The Addgene repository: an international nonprofit plasmid and data resource. The Addgene Repository (http://www.addgene.org) was founded to accelerate research and discovery by improving access to useful, high-quality research materials and information. The repository archives plasmids generated by scientists, conducts quality control, annotates the associated data and makes the plasmids and their data available to the scientific community. Plasmid associated data undergoes ongoing curation by members of the scientific community and by Addgene scientists. The growing database contains information on >31,000 unique plasmids spanning most experimental biological systems and organisms. The library includes a large number of plasmid tools for use in a wide variety of research areas, such as empty backbones, lentiviral resources, fluorescent protein vectors and genome engineering tools. The Addgene Repository database is always evolving with new plasmid deposits so it contains currently pertinent resources while ensuring the information on earlier deposits is still available. Custom search and browse features are available to access information on the diverse collection. Extensive educational materials and information are provided by the database curators to support the scientists that are accessing the repository's materials and data.",Addgene Repository,0.828912163,NA,0,Addgene Repository,0.828912163,1,24905498,NA,low_prob_best_name,do not remove,conflicting record(s) to be removed,NA,NA,NA,NA,11/11/2014 +34025933,http://159.226.67.237/sun/addictgedb,"AddictGene: An integrated knowledge base for differentially expressed genes associated with addictive substance. Addiction, a disorder of maladaptive brain plasticity, is associated with changes in numerous gene expressions. Nowadays, high-throughput sequencing data on addictive substance-induced gene expression have become widely available. A resource for comprehensive annotation of genes that show differential expression in response to commonly abused substances is necessary. So, we developed AddictGene by integrating gene expression, gene-gene interaction, gene-drug interaction and epigenetic regulatory annotation for over 70,156 items of differentially expressed genes associated with 7 commonly abused substances, including alcohol, nicotine, cocaine, morphine, heroin, methamphetamine, and amphetamine, across three species (human, mouse, rat). We also collected 1,141 addiction-related experimentally validated genes by techniques such as RT-PCR, northern blot and in situ hybridization. The easy-to-use web interface of AddictGene (http://159.226.67.237/sun/addictgedb/) allows users to search and browse multidimensional data on DEGs of their interest: 1) detailed gene-specific information extracted from the original studies; 2) basic information about the specific gene extracted from NCBI; 3) SNP associated with substance dependence and other psychiatry disorders; 4) expression alteration of specific gene in other psychiatric disorders; 5) expression patterns of interested gene across 31 primary and 54 secondary human tissues; 6) functional annotation of interested gene; 7) epigenetic regulators involved in the alteration of specific genes, including histone modifications and DNA methylation; 8) protein-protein interaction for functional linkage with interested gene; 9) drug-gene interaction for potential druggability. AddictGene offers a valuable repository for researchers to study the molecular mechanisms underlying addiction, and might provide valuable insights into potential therapies for drug abuse and relapse.",AddictGene,0.997377753,NA,0,AddictGene,0.997377753,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/19/2021 +31648087,http://www.rxnfinder.org/additivechem,"AdditiveChem: A comprehensive bioinformatics knowledge-base for food additive chemicals. Food additives are considered to be the catalysts and headstones of the modern food industry, affecting every step of food production, processing, and storage. The urgent need for a comprehensive curation of food additives, including their molecular structures, biological activities, and precise toxicological evaluations, prompted the creation of the AdditiveChem database (http://www.rxnfinder.org/additivechem/). This database has curated >9064 types of food additives, along with their molecular structure, chemical and physical properties, absorption, distribution, metabolism, excretion and toxicity properties, biosynthesis and biodegradation methods, usage specifications, toxicological and risk assessment data, and targets in the human body from 16 databases to construct an efficient search platform for in silico preliminary evaluations. AdditiveChem database will enable an exploration of the relationship between the structure and function of food additives.",AdditiveChem,0.995760739,NA,0,AdditiveChem,0.995760739,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/11/2019 +33401309,http://ccsm.uth.edu/ADeditome,"ADeditome provides the genomic landscape of A-to-I RNA editing in Alzheimer's disease. . A-to-I RNA editing, contributing to nearly 90% of all editing events in human, has been reported to involve in the pathogenesis of Alzheimer's disease (AD) due to its roles in brain development and immune regulation, such as the deficient editing of GluA2 Q/R related to cell death and memory loss. Currently, there are urgent needs for the systematic annotations of A-to-I RNA editing events in AD. Here, we built ADeditome, the annotation database of A-to-I RNA editing in AD available at https://ccsm.uth.edu/ADeditome, aiming to provide a resource and reference for functional annotation of A-to-I RNA editing in AD to identify therapeutically targetable genes in an individual. We detected 1676√ɬÉ√ǬÇ√ɬÇ√Ǭ†363 editing sites in 1524 samples across nine brain regions from ROSMAP, MayoRNAseq and MSBB. For these editing events, we performed multiple functional annotations including identification of specific and disease stage associated editing events and the influence of editing events on gene expression, protein recoding, alternative splicing and miRNA regulation for all the genes, especially for AD-related genes in order to explore the pathology of AD. Combing all the analysis results, we found 108√ɬÉ√ǬÇ√ɬÇ√Ǭ†010 and 26√ɬÉ√ǬÇ√ɬÇ√Ǭ†168 editing events which may promote or inhibit AD progression, respectively. We also found 5582 brain region-specific editing events with potentially dual roles in AD across different brain regions. ADeditome will be a unique resource for AD and drug research communities to identify therapeutically targetable editing events. Significance: ADeditome is the first comprehensive resource of the functional genomics of individual A-to-I RNA editing events in AD, which will be useful for many researchers in the fields of AD pathology, precision medicine, and therapeutic researches.",ADeditome,0.997124791,NA,0,ADeditome,0.997124791,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2021 +22080511,http://adhd.psych.ac.cn,"ADHDgene: a genetic database for attention deficit hyperactivity disorder. With a worldwide prevalence of ~5%, attention deficit hyperactivity disorder (ADHD) has become one of the most common psychiatric disorders. The polygenetic nature of ADHD indicates that multiple genes jointly contribute to the development of this complex disease. Studies aiming to explore genetic susceptibility of ADHD have been increasing in recent years. There is a growing need to integrate the genetic data from various genetic studies to provide a comprehensive data set and uniform access for convenience of in-depth data mining. So far, there has been no such effort for ADHD. To address the genetic complexity of ADHD, we developed the ADHDgene database by integrating ADHD-related genetic factors by profound literature reading. Based on the data from the literature, extended functional analysis, including linkage disequilibrium analysis, pathway-based analysis and gene mapping were performed to provide new insights into genetic causes of ADHD. Moreover, powerful search tools and a graphical browser were developed to facilitate the navigation of the data and data connections. As the first genetic database for ADHD, ADHDgene aims to provide researchers with a central genetic resource and analysis platform for ADHD and is freely available at http://adhd.psych.ac.cn/.",ADHDgene,0.933751583,NA,0,ADHDgene,0.933751583,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/10/2011 +23092397,http://www.admetexp.org,"admetSAR: a comprehensive source and free tool for assessment of chemical ADMET properties. Absorption, distribution, metabolism, excretion, and toxicity (ADMET) properties play key roles in the discovery/development of drugs, pesticides, food additives, consumer products, and industrial chemicals. This information is especially useful when to conduct environmental and human hazard assessment. The most critical rate limiting step in the chemical safety assessment workflow is the availability of high quality data. This paper describes an ADMET structure-activity relationship database, abbreviated as admetSAR. It is an open source, text and structure searchable, and continually updated database that collects, curates, and manages available ADMET-associated properties data from the published literature. In admetSAR, over 210,000 ADMET annotated data points for more than 96,000 unique compounds with 45 kinds of ADMET-associated properties, proteins, species, or organisms have been carefully curated from a large number of diverse literatures. The database provides a user-friendly interface to query a specific chemical profile, using either CAS registry number, common name, or structure similarity. In addition, the database includes 22 qualitative classification and 5 quantitative regression models with highly predictive accuracy, allowing to estimate ecological/mammalian ADMET properties for novel chemicals. AdmetSAR is accessible free of charge at http://www.admetexp.org.",admetSAR,0.990115881,NA,0,admetSAR,0.990115881,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2012 +27507885,http://ADPriboDB.leunglab.org,"ADPriboDB: The database of ADP-ribosylated proteins. ADP-ribosylation refers to the addition of one or more ADP-ribose units onto proteins post-translationally. This protein modification is often added by ADP-ribosyltransferases, commonly known as PARPs, but it can also be added by other enzymes, including sirtuins or bacterial toxins. While past literature has utilized a variety of methods to identify ADP-ribosylated proteins, recent proteomics studies bring the power of mass spectrometry to determine sites of the modification. To appreciate the diverse roles of ADP-ribosylation across the proteome, we have created ADPriboDB - a database of ADP-ribosylated proteins (http://ADPriboDB.leunglab.org). Each entry of ADPriboDB is annotated manually by at least two independent curators from the literature between January 1975 and July 2015. The current database includes over 12 400 protein entries from 459 publications, identifying 2389 unique proteins. Here, we describe the structure and the current state of ADPriboDB as well as the criteria for entry inclusion. Using this aggregate data, we identified a statistically significant enrichment of ADP-ribosylated proteins in non-membranous RNA granules. To our knowledge, ADPriboDB is the first publicly available database encapsulating ADP-ribosylated proteins identified from the past 40 years, with a hope to facilitate the research of both basic scientists and clinicians to better understand ADP-ribosylation at the molecular level.",ADPriboDB,0.997497678,NA,0,ADPriboDB,0.997497678,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/9/2016 +25361966,http://bioinf.xmu.edu.cn/ADReCS,"ADReCS: an ontology database for aiding standardization and hierarchical classification of adverse drug reaction terms. Adverse drug reactions (ADRs) are noxious and unexpected effects during normal drug therapy. They have caused significant clinical burden and been responsible for a large portion of new drug development failure. Molecular understanding and in silico evaluation of drug (or candidate) safety in laboratory is thus so desired, and unfortunately has been largely hindered by misuse of ADR terms. The growing impact of bioinformatics and systems biology in toxicological research also requires a specialized ADR term system that works beyond a simple glossary. Adverse Drug Reaction Classification System (ADReCS; http://bioinf.xmu.edu.cn/ADReCS) is a comprehensive ADR ontology database that provides not only ADR standardization but also hierarchical classification of ADR terms. The ADR terms were pre-assigned with unique digital IDs and at the same time were well organized into a four-level ADR hierarchy tree for building an ADR-ADR relation. Currently, the database covers 6544 standard ADR terms and 34,796 synonyms. It also incorporates information of 1355 single active ingredient drugs and 134,022 drug-ADR pairs. In summary, ADReCS offers an opportunity for direct computation on ADR terms and also provides clues to mining common features underlying ADRs.",ADReCS,0.995718002,Adverse Drug Reaction Classification System,0.979371885,ADReCS,0.995718002,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2014 +32291734,http://inetapps.nus.edu.sg/aelp,"The Auditory English Lexicon Project: A multi-talker, multi-region psycholinguistic database of 10,170 spoken words and nonwords. The Auditory English Lexicon Project (AELP) is a multi-talker, multi-region psycholinguistic database of 10,170 spoken words and 10,170 spoken nonwords. Six tokens of each stimulus were recorded as 44.1-kHz, 16-bit, mono WAV files by native speakers of American, British, and Singapore English, with one from each gender. Intelligibility norms, as determined by average identification scores and confidence ratings from between 15 and 20 responses per token, were obtained from 561 participants. Auditory lexical decision accuracies and latencies, with between 25 and 36 responses per token, were obtained from 438 participants. The database also includes a variety of lexico-semantic variables and structural indices for the words and nonwords, as well as participants' individual difference measures such as age, gender, language background, and proficiency. Taken together, there are a total of 122,040 sound files and over 4 million behavioral data points in the AELP. We describe some of the characteristics of this database. This resource is freely available from a website ( https://inetapps.nus.edu.sg/aelp/ ) hosted by the Department of Psychology at the National University of Singapore.",AELP,0.983020544,Auditory English Lexicon Project,0.609619483,AELP,0.983020544,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2020 +22682155,http://www.affinomics.org,"European and international collaboration in affinity proteomics. In affinity proteomics, specific protein-binding molecules (a.k.a. binders), principally antibodies, are applied as reagents in proteome analysis. In recent years, advances in binder technologies have created the potential for an unprecedented view on protein expression and distribution patterns in plasma, cells and tissues and increasingly on protein function. Particular strengths of affinity proteomics methods include detecting proteins in their natural environments of cell or tissue, high sensitivity and selectivity for detection of low abundance proteins and exploiting binding actions such as functional interference in living cells. To maximise the use and impact of affinity reagents, it will be essential to create comprehensive, standardised binder collections. With this in mind, the EU FP7 programme AFFINOMICS (http://www.affinomics.org), together with the preceding EU programmes ProteomeBinders and AffinityProteome, aims to extend affinity proteomics research by generating a large-scale resource of validated protein-binding molecules for characterisation of the human proteome. Activity is directed at producing binders to about 1000 protein targets, primarily in signal transduction and cancer, by establishing a high throughput, coordinated production pipeline. An important aspect of AFFINOMICS is the development of highly efficient recombinant selection methods, based on phage, cell and ribosome display, capable of producing high quality binders at greater throughput and lower cost than hitherto. The programme also involves development of innovative and sensitive technologies for specific detection of target proteins and their interactions, and deployment of binders in proteomics studies of clinical relevance. The need for such binder generation programmes is now recognised internationally, with parallel initiatives in the USA for cancer (NCI) and transcription factors (NIH) and within the Human Proteome Organisation (HUPO). The papers in this volume of New Biotechnology are all contributed by participants at the 5th ESF Workshop on Affinity Proteomics organised by the AFFINOMICS consortium and held in Alpbach, Austria, in March 2011.",AFFINOMICS,0.97896564,NA,0,AFFINOMICS,0.97896564,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2012 +23993619,http://www.fao.org/infoods/biodiversity/index_en.stm,"Review of availability of food composition data for fish and shellfish. The FAO/INFOODS database on fish and shellfish (aFiSh) is a collection of analytical data from primary sources and holds values for 2,277 entries on raw and processed food with sufficient quality. Most data were entered on fatty acids (60%), followed by macronutrients and their fractions (16%), minerals (10%), amino acids (7%), (pro)vitamins (2%), heavy metals (2%) and other components (3%). Information on several factors that contribute to the variation of compositional data (e.g., biodiversity, catch season, habitat, size and part of fish/shellfish analysed) as well as the bibliographic references are presented alongside with each food entry. The data were published in the FAO/INFOODS Food Composition Database for Biodiversity (BioFoodComp2.0) and in the FAO/INFOODS Analytical Food Composition Database (AnFooD1.0), freely available at the INFOODS webpage http://www.fao.org/infoods/biodiversity/index_en.stm. The provision of easy accessible, analytical compositional data should be seen as stimulation for researchers and compilers to incorporate more analytical and detailed data of fish and shellfish into future food composition tables and databases and to improve dietary assessment tools.",aFiSh,0.991861537,NFOODS database on fish and shellfish,0.929391642,aFiSh,0.991861537,1,23601383,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,NA,7/11/2013 +"25414323, 29858801, 33755549",http://www.allelefrequencies.net,"Allele frequency net 2015 update: new features for HLA epitopes, KIR and disease and HLA adverse drug reaction associations. It has been 12 years since the Allele Frequency Net Database (AFND; http://www.allelefrequencies.net) was first launched, providing the scientific community with an online repository for the storage of immune gene frequencies in different populations across the world. There have been a significant number of improvements from the first version, making AFND a primary resource for many clinical and scientific areas including histocompatibility, immunogenetics, pharmacogenetics and anthropology studies, among many others. The most widely used part of AFND stores population frequency data (alleles, genes or haplotypes) related to human leukocyte antigens (HLA), killer-cell immunoglobulin-like receptors (KIR), major histocompatibility complex class I chain-related genes (MIC) and a number of cytokine gene polymorphisms. AFND now contains >1400 populations from more than 10 million healthy individuals. Here, we report how the main features of AFND have been updated to include a new section on 'HLA epitope' frequencies in populations, a new section capturing the results of studies identifying HLA associations with adverse drug reactions (ADRs) and one for the examination of infectious and autoimmune diseases associated with KIR polymorphisms-thus extending AFND to serve a new user base in these growing areas of research. New criteria on data quality have also been included.",AFND,0.989334464,Allele Frequency Net Database,0.951628447,AFND,0.989334464,3,23584834,NA,NA,NA,do not merge,NA,NA,NA,NA,10/21/2020 +25048123,http://bioinfo.net.in/AgAbDb.htm,"Antigen-Antibody Interaction Database (AgAbDb): a compendium of antigen-antibody interactions. Antigen-Antibody Interaction Database (AgAbDb) is an immunoinformatics resource developed at the Bioinformatics Centre, University of Pune, and is available online at http://bioinfo.net.in/AgAbDb.htm. Antigen-antibody interactions are a special class of protein-protein interactions that are characterized by high affinity and strict specificity of antibodies towards their antigens. Several co-crystal structures of antigen-antibody complexes have been solved and are available in the Protein Data Bank (PDB). AgAbDb is a derived knowledgebase developed with an objective to compile, curate, and analyze determinants of interactions between the respective antigen-antibody molecules. AgAbDb lists not only the residues of binding sites of antigens and antibodies, but also interacting residue pairs. It also helps in the identification of interacting residues and buried residues that constitute antibody-binding sites of protein and peptide antigens. The Antigen-Antibody Interaction Finder (AAIF), a program developed in-house, is used to compile the molecular interactions, viz. van der Waals interactions, salt bridges, and hydrogen bonds. A module for curating water-mediated interactions has also been developed. In addition, various residue-level features, viz. accessible surface area, data on epitope segment, and secondary structural state of binding site residues, are also compiled. Apart from the PDB numbering, Wu-Kabat numbering and explicit definitions of complementarity-determining regions are provided for residues of antibodies. The molecular interactions can be visualized using the program Jmol. AgAbDb can be used as a benchmark dataset to validate algorithms for prediction of B-cell epitopes. It can as well be used to improve accuracy of existing algorithms and to design new algorithms. AgAbDb can also be used to design mimotopes representing antigens as well as aid in designing processes leading to humanization of antibodies.",AgAbDb,0.995833278,Antigen-Antibody Interaction Database,0.982613113,AgAbDb,0.995833278,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2014 +30239679,http://www.agbiodata.org,"AgBioData consortium recommendations for sustainable genomics and genetics databases for agriculture. . The future of agricultural research depends on data. The sheer volume of agricultural biological data being produced today makes excellent data management essential. Governmental agencies, publishers and science funders require data management plans for publicly funded research. Furthermore, the value of data increases exponentially when they are properly stored, described, integrated and shared, so that they can be easily utilized in future analyses. AgBioData (https://www.agbiodata.org) is a consortium of people working at agricultural biological databases, data archives and knowledgbases who strive to identify common issues in database development, curation and management, with the goal of creating database products that are more Findable, Accessible, Interoperable and Reusable. We strive to promote authentic, detailed, accurate and explicit communication between all parties involved in scientific data. As a step toward this goal, we present the current state of biocuration, ontologies, metadata and persistence, database platforms, programmatic (machine) access to data, communication and sustainability with regard to data curation. Each section describes challenges and opportunities for these topics, along with recommendations and best practices.",AgBioData,0.996884644,NA,0,AgBioData,0.996884644,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24217911,http://agefactdb.jenage.de,"AgeFactDB--the JenAge Ageing Factor Database--towards data integration in ageing research. AgeFactDB (http://agefactdb.jenage.de) is a database aimed at the collection and integration of ageing phenotype data including lifespan information. Ageing factors are considered to be genes, chemical compounds or other factors such as dietary restriction, whose action results in a changed lifespan or another ageing phenotype. Any information related to the effects of ageing factors is called an observation and is presented on observation pages. To provide concise access to the complete information for a particular ageing factor, corresponding observations are also summarized on ageing factor pages. In a first step, ageing-related data were primarily taken from existing databases such as the Ageing Gene Database--GenAge, the Lifespan Observations Database and the Dietary Restriction Gene Database--GenDR. In addition, we have started to include new ageing-related information. Based on homology data taken from the HomoloGene Database, AgeFactDB also provides observation and ageing factor pages of genes that are homologous to known ageing-related genes. These homologues are considered as candidate or putative ageing-related genes. AgeFactDB offers a variety of search and browse options, and also allows the download of ageing factor or observation lists in TSV, CSV and XML formats.",AgeFactDB,0.997452736,JenAge Ageing Factor Database,0.809117585,AgeFactDB,0.997452736,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2013 +26602690,http://agingchart.org,"Aging Chart: a community resource for rapid exploratory pathway analysis of age-related processes. Aging research is a multi-disciplinary field encompassing knowledge from many areas of basic, applied and clinical research. Age-related processes occur on molecular, cellular, tissue, organ, system, organismal and even psychological levels, trigger the onset of multiple debilitating diseases and lead to a loss of function, and there is a need for a unified knowledge repository designed to track, analyze and visualize the cause and effect relationships and interactions between the many elements and processes on all levels. Aging Chart (http://agingchart.org/) is a new, community-curated collection of aging pathways and knowledge that provides a platform for rapid exploratory analysis. Building on an initial content base constructed by a team of experts from peer-reviewed literature, users can integrate new data into biological pathway diagrams for a visible, intuitive, top-down framework of aging processes that fosters knowledge-building and collaboration. As the body of knowledge in aging research is rapidly increasing, an open visual encyclopedia of aging processes will be useful to both the new entrants and experts in the field.",Aging Chart,0.833349913,NA,0,Aging Chart,0.833349913,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/23/2015 +30231853,http://expression.latrobe.edu.au/agriseqdb,"AgriSeqDB: an online RNA-Seq database for functional studies of agriculturally relevant plant species. Background The genome-wide expression profile of genes in different tissues/cell types and developmental stages is a vital component of many functional genomic studies. Transcriptome data obtained by RNA-sequencing (RNA-Seq) is often deposited in public databases that are made available via data portals. Data visualization is one of the first steps in assessment and hypothesis generation. However, these databases do not typically include visualization tools and establishing one is not trivial for users who are not computational experts. This, as well as the various formats in which data is commonly deposited, makes the processes of data access, sharing and utility more difficult. Our goal was to provide a simple and user-friendly repository that meets these needs for data-sets from major agricultural crops. Description AgriSeqDB ( https://expression.latrobe.edu.au/agriseqdb ) is a database for viewing, analysing and interpreting developmental and tissue/cell-specific transcriptome data from several species, including major agricultural crops such as wheat, rice, maize, barley and tomato. The disparate manner in which public transcriptome data is often warehoused and the challenge of visualizing raw data are both major hurdles to data reuse. The popular eFP browser does an excellent job of presenting transcriptome data in an easily interpretable view, but previous implementation has been mostly on a case-by-case basis. Here we present an integrated visualisation database of transcriptome data-sets from six species that did not previously have public-facing visualisations. We combine the eFP browser, for gene-by-gene investigation, with the Degust browser, which enables visualisation of all transcripts across multiple samples. The two visualisation interfaces launch from the same point, enabling users to easily switch between analysis modes. The tools allow users, even those without bioinformatics expertise, to mine into data-sets and understand the behaviour of transcripts of interest across samples and time. We have also incorporated an additional graphic download option to simplify incorporation into presentations or publications. Conclusion Powered by eFP and Degust browsers, AgriSeqDB is a quick and easy-to-use platform for data analysis and visualization in five crops and Arabidopsis. Furthermore, it provides a tool that makes it easy for researchers to share their data-sets, promoting research collaborations and data-set reuse.",AgriSeqDB,0.997874022,NA,0,AgriSeqDB,0.997874022,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/19/2018 +22084200,"http://ahdb.ee.ncku.edu.tw/, http://ahdb.csbb.ntu.edu.tw","AH-DB: collecting protein structure pairs before and after binding. This work presents the Apo-Holo DataBase (AH-DB, http://ahdb.ee.ncku.edu.tw/ and http://ahdb.csbb.ntu.edu.tw/), which provides corresponding pairs of protein structures before and after binding. Conformational transitions are commonly observed in various protein interactions that are involved in important biological functions. For example, copper-zinc superoxide dismutase (SOD1), which destroys free superoxide radicals in the body, undergoes a large conformational transition from an 'open' state (apo structure) to a 'closed' state (holo structure). Many studies have utilized collections of apo-holo structure pairs to investigate the conformational transitions and critical residues. However, the collection process is usually complicated, varies from study to study and produces a small-scale data set. AH-DB is designed to provide an easy and unified way to prepare such data, which is generated by identifying/mapping molecules in different Protein Data Bank (PDB) entries. Conformational transitions are identified based on a refined alignment scheme to overcome the challenge that many structures in the PDB database are only protein fragments and not complete proteins. There are 746,314 apo-holo pairs in AH-DB, which is about 30 times those in the second largest collection of similar data. AH-DB provides sophisticated interfaces for searching apo-holo structure pairs and exploring conformational transitions from apo structures to the corresponding holo structures.",AH-DB,0.99563925,Apo-Holo DataBase,0.77852336,AH-DB,0.99563925,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/13/2011 +28376796,http://public.sylics.com,"AHCODA-DB: a data repository with web-based mining tools for the analysis of automated high-content mouse phenomics data. Background Systematic, standardized and in-depth phenotyping and data analyses of rodent behaviour empowers gene-function studies, drug testing and therapy design. However, no data repositories are currently available for standardized quality control, data analysis and mining at the resolution of individual mice. Description Here, we present AHCODA-DB, a public data repository with standardized quality control and exclusion criteria aimed to enhance robustness of data, enabled with web-based mining tools for the analysis of individually and group-wise collected mouse phenotypic data. AHCODA-DB allows monitoring in vivo effects of compounds collected from conventional behavioural tests and from automated home-cage experiments assessing spontaneous behaviour, anxiety and cognition without human interference. AHCODA-DB includes such data from mutant mice (transgenics, knock-out, knock-in), (recombinant) inbred strains, and compound effects in wildtype mice and disease models. AHCODA-DB provides real time statistical analyses with single mouse resolution and versatile suite of data presentation tools. On March 9th, 2017 AHCODA-DB contained 650√ɬÉ√ǬÇ√ɬÇ√Ǭ†k data points on 2419 parameters from 1563 mice. Conclusion AHCODA-DB provides users with tools to systematically explore mouse behavioural data, both with positive and negative outcome, published and unpublished, across time and experiments with single mouse resolution. The standardized (automated) experimental settings and the large current dataset (1563 mice) in AHCODA-DB provide a unique framework for the interpretation of behavioural data and drug effects. The use of common ontologies allows data export to other databases such as the Mouse Phenome Database. Unbiased presentation of positive and negative data obtained under the highly standardized screening conditions increase cost efficiency of publicly funded mouse screening projects and help to reach consensus conclusions on drug responses and mouse behavioural phenotypes. The website is publicly accessible through https://public.sylics.com and can be viewed in every recent version of all commonly used browsers.",AHCODA-DB,0.993691454,NA,0,AHCODA-DB,0.993691454,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/4/2017 +25392419,http://crdd.osdd.net/raghava/ahtpdb,"AHTPDB: a comprehensive platform for analysis and presentation of antihypertensive peptides. AHTPDB (http://crdd.osdd.net/raghava/ahtpdb/) is a manually curated database of experimentally validated antihypertensive peptides. Information pertaining to peptides with antihypertensive activity was collected from research articles and from various peptide repositories. These peptides were derived from 35 major sources that include milk, egg, fish, pork, chicken, soybean, etc. In AHTPDB, most of the peptides belong to a family of angiotensin-I converting enzyme inhibiting peptides. The current release of AHTPDB contains 5978 peptide entries among which 1694 are unique peptides. Each entry provides detailed information about a peptide like sequence, inhibitory concentration (IC50), toxicity/bitterness value, source, length, molecular mass and information related to purification of peptides. In addition, the database provides structural information of these peptides that includes predicted tertiary and secondary structures. A user-friendly web interface with various tools has been developed to retrieve and analyse the data. It is anticipated that AHTPDB will be a useful and unique resource for the researchers working in the field of antihypertensive peptides.",AHTPDB,0.997885823,NA,0,AHTPDB,0.997885823,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +31123286,http://956023.ichengyun.net/AICD/index.php,"AICD: an integrated anti-inflammatory compounds database for drug discovery. Systemic or local inflammation drives the pathogenesis of various human diseases. Small compounds with anti-inflammatory properties hold great potential for clinical translation. Over recent decades, many compounds have been screened for their action against inflammation-related targets. Databases that integrate the physicochemical properties and bioassay results of these compounds are lacking. We created an ""Anti-Inflammatory Compounds Database"" (AICD) to deposit compounds with potential anti-inflammation activities. A total of 232 inflammation-related targets were recruited by the AICD. Gene set enrichment analysis showed that these targets were involved in various human diseases. Bioassays of these targets were collected from open-access databases and adopted to extract 79,781 small molecules with information on chemical properties, candidate targets, bioassay models and bioassay results. Principal component analysis demonstrated that these deposited compounds were closely related to US Food and Drug Administration-approved drugs with respect to chemical space and chemical properties. Finally, pathway-based screening for drug combination/multi-target drugs provided a case study for drug discovery using the AICD. The AICD focuses on inflammation-related drug targets and contains substantial candidate compounds with high chemical diversity and good drug-like properties. It could be serviced for the discovery of anti-inflammatory medicines and can be accessed freely at http://956023.ichengyun.net/AICD/index.php .",AICD,0.981977388,nti-Inflammatory Compounds Database,0.904850148,AICD,0.981977388,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/23/2019 +21148158,http://www.aid-register.uk-essen.de,"Translational research network and patient registry for auto-inflammatory diseases. Objective Auto-inflammatory diseases (AIDs) are characterized by recurrent self-limiting systemic inflammation. In a multicentre effort, we set out to register genetic, epidemiological and clinical features as well as prognostic factors of these diseases by prospective longitudinal and long-term documentation, in order to define novel AIDs and to better understand treatment responses and outcome. Methods In 2009, a federally funded clinical and research consortium (AID-Net) was established, including an online registry for AIDs (http://www.aid-register.uk-essen.de). Inclusion criteria are disease-associated mutations for hereditary periodic fever syndromes [FMF, hyperimmunoglobulinaemia D and periodic fever syndrome (HIDS), TNF receptor 1-associated periodic syndrome (TRAPS) and cryopyrin-associated periodic syndrome (CAPS)], or, alternatively, clinically confirmed AID, systemic-onset JIA (SoJIA) and periodic fever, aphthous stomatitis, pharyngitis and adenopathy (PFAPA) syndrome with unknown genetic background. Patients were recruited to the registry and patient material was deposited in biomaterial banks (DNA/serum). In addition, basic research projects were initiated that focus on molecular mechanisms of AID. Results During the first 9 months, 117 patients (65 males, 52 females; age 1-21 years) have been recorded and classified as FMF (n=84), HIDS (n=1), TRAPS (n=3) and CAPS (n=1); clinically confirmed AID (n=5); SoJIA (n=22); and PFAPA (n=1). One hundred and fifty blood samples of 18 patients were included in biomaterial banks. Conclusion Recruitment and follow-up of patients with AID will enable us to comprehensively address the correlation between clinical and epidemiological data, genetics and biomarkers. The translational approach may help to identify genetic or inflammatory markers relevant for the course and outcome of diseases.",AID-Net,0.68992514,NA,0,AID-Net,0.68992514,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2011 +33485793,http://rdvd.cdc.gov.tw/cgMLST,"cgMLST@Taiwan: A web service platform for Vibrio cholerae cgMLST profiling and global strain tracking. Background Cholera, a rapidly dehydrating diarrheal disease caused by toxigenic Vibrio cholerae, is a leading cause of morbidity and mortality in some regions of the world. Core genome multilocus sequence typing (cgMLST) is a promising approach in generating genetic fingerprints from whole-genome sequencing (WGS) data for strain comparison among laboratories. Methods We constructed a V. cholerae core gene allele database using an in-house developed computational pipeline, a database with cgMLST profiles converted from genomic sequences from the National Center for Biotechnology Information, and built a REST-based web accessible via the Internet. Results We built a web service platform-cgMLST@Taiwan and installed a V. cholerae allele database, a cgMLST profile database, and computational tools for generating V. cholerae cgMLST profiles (based on 3,017 core genes), performing rapid global strain tracking, and clustering analysis of cgMLST profiles. This web-based platform provides services to researchers, public health microbiologists, and physicians who use WGS data for the investigation of cholera outbreaks and tracking of V. cholerae strain transmission across countries and geographic regions. The cgMLST@Taiwan is accessible at http://rdvd.cdc.gov.tw/cgMLST.",aiwan,0.590910196,NA,0,aiwan,0.590910196,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/15/2021 +26644461,http://alcodb.jp,"ALCOdb: Gene Coexpression Database for Microalgae. In the era of energy and food shortage, microalgae have gained much attention as promising sources of biofuels and food ingredients. However, only a small fraction of microalgal genes have been functionally characterized. Here, we have developed the Algae Gene Coexpression database (ALCOdb; http://alcodb.jp), which provides gene coexpression information to survey gene modules for a function of interest. ALCOdb currently supports two model algae: the green alga Chlamydomonas reinhardtii and the red alga Cyanidioschyzon merolae. Users can retrieve coexpression information for genes of interest through three unique data pages: (i) Coexpressed Gene List; (ii) Gene Information; and (iii) Coexpressed Gene Network. In addition to the basal coexpression information, ALCOdb also provides several advanced functionalities such as an expression profile viewer and a differentially expressed gene search tool. Using these user interfaces, we demonstrated that our gene coexpression data have the potential to detect functionally related genes and are useful in extrapolating the biological roles of uncharacterized genes. ALCOdb will facilitate molecular and biochemical studies of microalgal biological phenomena, such as lipid metabolism and organelle development, and promote the evolutionary understanding of plant cellular systems.",ALCOdb,0.997920737,Algae Gene Coexpression database,0.973787616,ALCOdb,0.997920737,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/7/2015 +25853886,http://res.xaut.edu.cn/aldb/index.jsp,"ALDB: a domestic-animal long noncoding RNA database. Background Long noncoding RNAs (lncRNAs) have attracted significant attention in recent years due to their important roles in many biological processes. Domestic animals constitute a unique resource for understanding the genetic basis of phenotypic variation and are ideal models relevant to diverse areas of biomedical research. With improving sequencing technologies, numerous domestic-animal lncRNAs are now available. Thus, there is an immediate need for a database resource that can assist researchers to store, organize, analyze and visualize domestic-animal lncRNAs. Results The domestic-animal lncRNA database, named ALDB, is the first comprehensive database with a focus on the domestic-animal lncRNAs. It currently archives 12,103 pig intergenic lncRNAs (lincRNAs), 8,923 chicken lincRNAs and 8,250 cow lincRNAs. In addition to the annotations of lincRNAs, it offers related data that is not available yet in existing lncRNA databases (lncRNAdb and NONCODE), such as genome-wide expression profiles and animal quantitative trait loci (QTLs) of domestic animals. Moreover, a collection of interfaces and applications, such as the Basic Local Alignment Search Tool (BLAST), the Generic Genome Browser (GBrowse) and flexible search functionalities, are available to help users effectively explore, analyze and download data related to domestic-animal lncRNAs. Conclusions ALDB enables the exploration and comparative analysis of lncRNAs in domestic animals. A user-friendly web interface, integrated information and tools make it valuable to researchers in their studies. ALDB is freely available from http://res.xaut.edu.cn/aldb/index.jsp.",ALDB,0.989124298,nimal,0.511159897,ALDB,0.989124298,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/8/2015 +30357390,http://aledb.org,"ALEdb 1.0: a database of mutations from adaptive laboratory evolution experimentation. Adaptive Laboratory Evolution (ALE) has emerged as an experimental approach to discover causal mutations that confer desired phenotypic functions. ALE not only represents a controllable experimental approach to systematically discover genotype-phenotype relationships, but also allows for the revelation of the series of genetic alterations required to acquire the new phenotype. Numerous ALE studies have been published, providing a strong impetus for developing databases to warehouse experimental evolution information and make it retrievable for large-scale analysis. Here, the first step towards establishing this resource is presented: ALEdb (http://aledb.org). This initial release contains over 11√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 mutations that have been discovered from eleven ALE publications. ALEdb (i) is a web-based platform that comprehensively reports on ALE acquired mutations and their conditions, (ii) reports key mutations using previously established trends, (iii) enables a search-driven workflow to enhance user mutation functional analysis through mutation cross-reference, (iv) allows exporting of mutation query results for custom analysis, (v) includes a bibliome describing the databased experiment publications and (vi) contains experimental evolution mutations from multiple model organisms. Thus, ALEdb is an informative platform which will become increasingly revealing as the number of reported ALE experiments and identified mutations continue to expand.",ALEdb,0.993111849,NA,0,ALEdb,0.993111849,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +24556904,http://plantpathology.ba.ars.usda.gov/alfalfatfdb.html,"In silico identification of transcription factors in Medicago sativa using available transcriptomic resources. Transcription factors (TFs) are proteins that govern organismal development and response to the environment by regulating gene expression. Information on the amount and diversity of TFs within individual plant species is critical for understanding of their biological roles and evolutionary history across the plant kingdom. Currently, only scattered information on separate TFs is available for alfalfa, the most extensively cultivated forage legume in the world. In the meantime, several large transcriptomic resources that can be used to identify and characterize alfalfa TF genes are freely accessible online. In this study, we have performed an in silico analysis of transcriptome data generated in our laboratory and publicly acquirable from other sources to reveal and systematize alfalfa transcription factors. Transcriptome-wide mining enabled prediction of 983 TFs along with their sequence features and putative phylogenies of the largest families. All data were assembled into a simple open-access database named AlfalfaTFDB ( http://plantpathology.ba.ars.usda.gov/alfalfatfdb.html ). Transcriptomic analysis used in this work represents an effective approach for the identification of TF genes in plants with incomplete genomes, such as alfalfa. Integrated TF repertoires of Medicago sativa will provide an important tool for studying regulation of gene expression in other complex non-model species of agricultural significance.",AlfalfaTFDB,0.975518227,NA,0,AlfalfaTFDB,0.975518227,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/21/2014 +34361108,http://bioinfo.usu.edu/alfanet,"alfaNET: A Database of Alfalfa-Bacterial Stem Blight Protein-Protein Interactions Revealing the Molecular Features of the Disease-causing Bacteria. . Alfalfa has emerged as one of the most important forage crops, owing to its wide adaptation and high biomass production worldwide. In the last decade, the emergence of bacterial stem blight (caused by Pseudomonas syringae pv. syringae ALF3) in alfalfa has caused around 50% yield losses in the United States. Studies are being conducted to decipher the roles of the key genes and pathways regulating the disease, but due to the sparse knowledge about the infection mechanisms of Pseudomonas, the development of resistant cultivars is hampered. The database alfaNET is an attempt to assist researchers by providing comprehensive Pseudomonas proteome annotations, as well as a host-pathogen interactome tool, which predicts the interactions between host and pathogen based on orthology. alfaNET is a user-friendly and efficient tool and includes other features such as subcellular localization annotations of pathogen proteins, gene ontology (GO) annotations, network visualization, and effector protein prediction. Users can also browse and search the database using particular keywords or proteins with a specific length. Additionally, the BLAST search tool enables the user to perform a homology sequence search against the alfalfa and Pseudomonas proteomes. With the successful implementation of these attributes, alfaNET will be a beneficial resource to the research community engaged in implementing molecular strategies to mitigate the disease. alfaNET is freely available for public use at http://bioinfo.usu.edu/alfanet/.",alfaNET,0.997048736,NA,0,alfaNET,0.997048736,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/3/2021 +22039151,http://alfred.med.yale.edu,"ALFRED: an allele frequency resource for research and teaching. ALFRED (http://alfred.med.yale.edu) is a free, web accessible, curated compilation of allele frequency data on DNA sequence polymorphisms in anthropologically defined human populations. Currently, ALFRED has allele frequency tables on over 663,400 polymorphic sites; 170 of them have frequency tables for more than 100 different population samples. In ALFRED, a population may have multiple samples with each 'sample' consisting of many individuals on which an allele frequency is based. There are 3566 population samples from 710 different populations with allele frequency tables on at least one polymorphism. Fifty of those population samples have allele frequency data for over 650,000 polymorphisms. Records also have active links to relevant resources (dbSNP, PharmGKB, OMIM, Ethnologue, etc.). The flexible search options and data display and download capabilities available through the web interface allow easy access to the large quantity of high-quality data in ALFRED.",ALFRED,0.992198706,NA,0,ALFRED,0.992198706,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2011 +28069893,http://alga-pras.riken.jp,"Alga-PrAS (Algal Protein Annotation Suite): A Database of Comprehensive Annotation in Algal Proteomes. Algae are smaller organisms than land plants and offer clear advantages in research over terrestrial species in terms of rapid production, short generation time and varied commercial applications. Thus, studies investigating the practical development of effective algal production are important and will improve our understanding of both aquatic and terrestrial plants. In this study we estimated multiple physicochemical and secondary structural properties of protein sequences, the predicted presence of post-translational modification (PTM) sites, and subcellular localization using a total of 510,123 protein sequences from the proteomes of 31 algal and three plant species. Algal species were broadly selected from green and red algae, glaucophytes, oomycetes, diatoms and other microalgal groups. The results were deposited in the Algal Protein Annotation Suite database (Alga-PrAS; http://alga-pras.riken.jp/), which can be freely accessed online.",Alga-PrAS,0.994979988,Algal Protein Annotation Suite database,0.78106205,Alga-PrAS,0.994979988,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +24628857,http://algaepath.itps.ncku.edu.tw,"AlgaePath: comprehensive analysis of metabolic pathways using transcript abundance data from next-generation sequencing in green algae. Background Algae are important non-vascular plants that have many research applications, including high species diversity, biofuel sources, and adsorption of heavy metals and, following processing, are used as ingredients in health supplements. The increasing availability of next-generation sequencing (NGS) data for algae genomes and transcriptomes has made the development of an integrated resource for retrieving gene expression data and metabolic pathway essential for functional analysis and systems biology. In a currently available resource, gene expression profiles and biological pathways are displayed separately, making it impossible to easily search current databases to identify the cellular response mechanisms. Therefore, in this work the novel AlgaePath database was developed to retrieve transcript abundance profiles efficiently under various conditions in numerous metabolic pathways. Description AlgaePath is a web-based database that integrates gene information, biological pathways, and NGS datasets for the green algae Chlamydomonas reinhardtii and Neodesmus sp. UTEX 2219-4. Users can search this database to identify transcript abundance profiles and pathway information using five query pages (Gene Search, Pathway Search, Differentially Expressed Genes (DEGs) Search, Gene Group Analysis, and Co-expression Analysis). The transcript abundance data of 45 and four samples from C. reinhardtii and Neodesmus sp. UTEX 2219-4, respectively, can be obtained directly on pathway maps. Genes that are differentially expressed between two conditions can be identified using Folds Search. The Gene Group Analysis page includes a pathway enrichment analysis, and can be used to easily compare the transcript abundance profiles of functionally related genes on a map. Finally, the Co-expression Analysis page can be used to search for co-expressed transcripts of a target gene. The results of the searches will provide a valuable reference for designing further experiments and for elucidating critical mechanisms from high-throughput data. Conclusions AlgaePath is an effective interface that can be used to clarify the transcript response mechanisms in different metabolic pathways under various conditions. Importantly, AlgaePath can be mined to identify critical mechanisms based on high-throughput sequencing. To our knowledge, AlgaePath is the most comprehensive resource for integrating numerous databases and analysis tools in algae. The system can be accessed freely online at http://algaepath.itps.ncku.edu.tw.",AlgaePath,0.996096075,NA,0,AlgaePath,0.996096075,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/14/2014 +22659196,http://alkamid.ugent.be,"Alkamid database: Chemistry, occurrence and functionality of plant N-alkylamides. Ethnopharmacological relevance N-Alkylamides (NAAs) are a promising group of bioactive compounds, which are anticipated to act as important lead compounds for plant protection and biocidal products, functional food, cosmeceuticals and drugs in the next decennia. These molecules, currently found in more than 25 plant families and with a wide structural diversity, exert a variety of biological-pharmacological effects and are of high ethnopharmacological importance. However, information is scattered in literature, with different, often unstandardized, pharmacological methodologies being used. Therefore, a comprehensive NAA database (acronym: Alkamid) was constructed to collect the available structural and functional NAA data, linked to their occurrence in plants (family, tribe, species, genus). Materials and methods For loading information in the database, literature data was gathered over the period 1950-2010, by using several search engines. In order to represent the collected information about NAAs, the plants in which they occur and the functionalities for which they have been examined, a relational database is constructed and implemented on a MySQL back-end. Results The database is supported by describing the NAA plant-, functional- and chemical-space. The chemical space includes a NAA classification, according to their fatty acid and amine structures. Conclusions The Alkamid database (publicly available on the website http://alkamid.ugent.be/) is not only a central information point, but can also function as a useful tool to prioritize the NAA choice in the evaluation of their functionality, to perform data mining leading to quantitative structure-property relationships (QSPRs), functionality comparisons, clustering, plant biochemistry and taxonomic evaluations.",Alkamid,0.985504031,NA,0,Alkamid,0.985504031,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/30/2012 +23193282,http://www.brain-map.org,"Allen Brain Atlas: an integrated spatio-temporal portal for exploring the central nervous system. The Allen Brain Atlas (http://www.brain-map.org) provides a unique online public resource integrating extensive gene expression data, connectivity data and neuroanatomical information with powerful search and viewing tools for the adult and developing brain in mouse, human and non-human primate. Here, we review the resources available at the Allen Brain Atlas, describing each product and data type [such as in situ hybridization (ISH) and supporting histology, microarray, RNA sequencing, reference atlases, projection mapping and magnetic resonance imaging]. In addition, standardized and unique features in the web applications are described that enable users to search and mine the various data sets. Features include both simple and sophisticated methods for gene searches, colorimetric and fluorescent ISH image viewers, graphical displays of ISH, microarray and RNA sequencing data, Brain Explorer software for 3D navigation of anatomy and gene expression, and an interactive reference atlas viewer. In addition, cross data set searches enable users to query multiple Allen Brain Atlas data sets simultaneously. All of the Allen Brain Atlas resources can be accessed through the Allen Brain Atlas data portal.",Allen Brain Atlas,0.758832355,NA,0,Allen Brain Atlas,0.758832355,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/28/2012 +25097382,http://nabic.rda.go.kr/allergen,"AllergenPro: an integrated database for allergenicity analysis and prediction. Unlabelled The National Agricultural Biotechnology Information Center (NABIC) reconstructed an AllergenPro database for allergenic proteins analysis and allergenicity prediction. The AllergenPro is an integrated web-based system providing information about allergen in foods, microorganisms, animals and plants. The allergen database has the three main features namely, (1) allergen list with epitopes, (2) searching of allergen using keyword, and (3) methods for allergenicity prediction. This updated AllergenPro outputs the search based allergen information through a user-friendly web interface, and users can run tools for allergenicity prediction using three different methods namely, (1) FAO/WHO, (2) motif-based and (3) epitope-based methods. Availability The database is available for free at http://nabic.rda.go.kr/allergen/",AllergenPro,0.997101068,NA,0,AllergenPro,0.997101068,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/30/2014 +33494129,http://nhiss.nhis.or.kr,"Data resource profile: the allergic disease database of the Korean National Health Insurance Service. Researchers have been interested in probing how the environmental factors associated with allergic diseases affect the use of medical services. Considering this demand, we have constructed a database, named the Allergic Disease Database, based on the National Health Insurance Database (NHID). The NHID contains information on demographic and medical service utilization for approximately 99% of the Korean population. This study targeted 3 major allergic diseases, including allergic rhinitis, atopic dermatitis, and asthma. For the target diseases, our database provides daily medical service information, including the number of daily visits from 2013 and 2017, categorized by patients' characteristics such as address, sex, age, and duration of residence. We provide additional information, including yearly population, a number of patients, and averaged geocoding coordinates by eup, myeon, and dong district code (the smallest-scale administrative units in Korea). This information enables researchers to analyze how daily changes in the environmental factors of allergic diseases (e.g., particulate matter, sulfur dioxide, and ozone) in certain regions would influence patients' behavioral patterns of medical service utilization. Moreover, researchers can analyze long-term trends in allergic diseases and the health effects caused by environmental factors such as daily climate and pollution data. The advantages of this database are easy access to data, additional levels of geographic detail, time-efficient data-refining and processing, and a de-identification process that minimizes the exposure of identifiable personal information. All datasets included in the Allergic Disease Database can be downloaded by accessing the National Health Insurance Service data sharing webpage (https://nhiss.nhis.or.kr).",Allergic,0.594847083,NA,0,Allergic,0.594847083,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/21/2021 +21498548,http://allie.dbcls.jp,"Allie: a database and a search service of abbreviations and long forms. Many abbreviations are used in the literature especially in the life sciences, and polysemous abbreviations appear frequently, making it difficult to read and understand scientific papers that are outside of a reader's expertise. Thus, we have developed Allie, a database and a search service of abbreviations and their long forms (a.k.a. full forms or definitions). Allie searches for abbreviations and their corresponding long forms in a database that we have generated based on all titles and abstracts in MEDLINE. When a user query matches an abbreviation, Allie returns all potential long forms of the query along with their bibliographic data (i.e. title and publication year). In addition, for each candidate, co-occurring abbreviations and a research field in which it frequently appears in the MEDLINE data are displayed. This function helps users learn about the context in which an abbreviation appears. To deal with synonymous long forms, we use a dictionary called GENA that contains domain-specific terms such as gene, protein or disease names along with their synonymic information. Conceptually identical domain-specific terms are regarded as one term, and then conceptually identical abbreviation-long form pairs are grouped taking into account their appearance in MEDLINE. To keep up with new abbreviations that are continuously introduced, Allie has an automatic update system. In addition, the database of abbreviations and their long forms with their corresponding PubMed IDs is constructed and updated weekly. Database URL: The Allie service is available at http://allie.dbcls.jp/.",Allie,0.968772352,NA,0,Allie,0.968772352,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/15/2011 +22559261,http://alliumgenetics.org,"AlliumMap-A comparative genomics resource for cultivated Allium vegetables. Background Vegetables of the genus Allium are widely consumed but remain poorly understood genetically. Genetic mapping has been conducted in intraspecific crosses of onion (Allium cepa L.), A. fistulosum and interspecific crosses between A. roylei and these two species, but it has not been possible to access genetic maps and underlying data from these studies easily. Description An online comparative genomics database, AlliumMap, has been developed based on the GMOD CMap tool at http://alliumgenetics.org. It has been populated with curated data linking genetic maps with underlying markers and sequence data from multiple studies. It includes data from multiple onion mapping populations as well as the most closely related species A. roylei and A. fistulosum. Further onion EST-derived markers were evaluated in the A. cepa x A. roylei interspecific population, enabling merging of the AFLP-based maps. In addition, data concerning markers assigned in multiple studies to the Allium physical map using A. cepa-A. fistulosum alien monosomic addition lines have been compiled. The compiled data reveal extensive synteny between onion and A. fistulosum. Conclusions The database provides the first online resource providing genetic map and marker data from multiple Allium species and populations. The additional markers placed on the interspecific Allium map confirm the value of A. roylei as a valuable bridge between the genetics of onion and A. fistulosum and as a means to conduct efficient mapping of expressed sequence markers in Allium. The data presented suggest that comparative approaches will be valuable for genetic and genomic studies of onion and A. fistulosum. This online resource will provide a valuable means to integrate genetic and sequence-based explorations of Allium genomes.",AlliumMap,0.986065328,NA,0,AlliumMap,0.986065328,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/4/2012 +30365033,http://allomaps.bii.a-star.edu.sg,"AlloMAPS: allosteric mutation analysis and polymorphism of signaling database. AlloMAPS database provides data on the causality and energetics of allosteric communication obtained with the structure-based statistical mechanical model of allostery√ɬÉ√ǬÇ√ɬÇ√Ǭ†(SBSMMA). The database contains data on allosteric signaling in three sets of proteins and protein chains: (i) 46 proteins with comprehensively annotated functional and allosteric sites; (ii) 1908 protein chains from PDBselect set of chains with low (<25%) sequence identity; (iii) 33 proteins with more than 50 known pathological SNPs in each molecule. In addition to energetics of allosteric signaling between known functional and regulatory sites, allosteric modulation caused by the binding to these sites, by SNPs, and by mutations designated by the user can be explored. Allosteric Signaling Maps (ASMs), which are produced via the exhaustive computational scanning for stabilizing and destabilizing mutations and for the modulation range caused by the sequence position are available for each protein/protein chain in the database. We propose to use this database for evaluating the effects of allosteric signaling in the search for latent regulatory sites and in the design of allosteric sites and effectors. The database is freely available at: http://allomaps.bii.a-star.edu.sg.",AlloMAPS,0.996572554,NA,0,AlloMAPS,0.996572554,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33852582,http://www.nipgr.ac.in/AlnC,"AlnC: An extensive database of long non-coding RNAs in angiosperms. Long non-coding RNAs (lncRNAs) are defined as transcripts of greater than 200 nucleotides that play a crucial role in various cellular processes such as the development, differentiation and gene regulation across all eukaryotes, including plant cells. Since the last decade, there has been a significant rise in our understanding of lncRNA molecular functions in plants, resulting in an exponential increase in lncRNA transcripts, while these went unannounced from the major Angiosperm plant species despite the availability of large-scale high throughput sequencing data in public repositories. We, therefore, developed a user-friendly, open-access web interface, AlnC (Angiosperm lncRNA Catalogue) for the exploration of lncRNAs in diverse Angiosperm plant species using recent 1000 plant (1KP) trancriptomes data. The current version of AlnC offers 10,855,598 annotated lncRNA transcripts across 682 Angiosperm plant species encompassing 809 tissues. To improve the user interface, we added features for browsing, searching, and downloading lncRNA data, interactive graphs, and an online BLAST service. Additionally, each lncRNA record is annotated with possible small open reading frames (sORFs) to facilitate the study of peptides encoded within lncRNAs. With this user-friendly interface, we anticipate that AlnC will provide a rich source of lncRNAs for small-and large-scale studies in a variety of flowering plants, as well as aid in the improvement of key characteristics in relevance to their economic importance. Database URL: http://www.nipgr.ac.in/AlnC.",AlnC,0.996692717,Catalogue,0.617510498,AlnC,0.996692717,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/14/2021 +22753137,http://alsod.iop.kcl.ac.uk,"ALSoD: A user-friendly online bioinformatics tool for amyotrophic lateral sclerosis genetics. Amyotrophic lateral sclerosis (ALS) is the commonest adult onset motor neuron disease, with a peak age of onset in the seventh decade. With advances in genetic technology, there is an enormous increase in the volume of genetic data produced, and a corresponding need for storage, analysis, and interpretation, particularly as our understanding of the relationships between genotype and phenotype mature. Here, we present a system to enable this in the form of the ALS Online Database (ALSoD at http://alsod.iop.kcl.ac.uk), a freely available database that has been transformed from a single gene storage facility recording mutations in the SOD1 gene to a multigene ALS bioinformatics repository and analytical instrument combining genotype, phenotype, and geographical information with associated analysis tools. These include a comparison tool to evaluate genes side by side or jointly with user configurable features, a pathogenicity prediction tool using a combination of computational approaches to distinguish variants with nonfunctional characteristics from disease-associated mutations with more dangerous consequences, and a credibility tool to enable ALS researchers to objectively assess the evidence for gene causation in ALS. Furthermore, integration of external tools, systems for feedback, annotation by users, and two-way links to collaborators hosting complementary databases further enhance the functionality of ALSoD.",ALSoD,0.992415547,Online Database,0.633407295,ALSoD,0.992415547,1,NA,21702733,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,7/16/2012 +21702733,"http://alsod.iop.kcl.ac.uk/, http://www.alsgene.org","Keeping up with genetic discoveries in amyotrophic lateral sclerosis: the ALSoD and ALSGene databases. Amyotrophic lateral sclerosis (ALS) is a genetically heterogeneous disorder that shows a characteristic dichotomy of familial forms typically displaying Mendelian inheritance patterns, and sporadic ALS showing no or less obvious familial aggregation. While the former is caused by rare, highly penetrant, and pathogenic mutations, risk for sporadic ALS is probably the result of the combined effects of common polymorphisms with minor to moderate effect sizes. Owing to recent advances in high-throughput genotyping and sequencing technologies, genetic research in both fields is evolving at a rapidly increasing pace making it more and more difficult to follow and evaluate the most significant progress in the field. To alleviate this problem, our groups have created dedicated and freely available online databases, ALSoD ( http://alsod.iop.kcl.ac.uk/ ) and ALSGene ( http://www.alsgene.org ), which provide systematic and in-depth qualitative and quantitative overviews of genetic research in both familial and sporadic ALS. This review briefly introduces the background and main features of both databases and provides an overview of the currently most compelling genetic findings in ALS derived from analyses using these resources.",ALSoD,0.907861531,NA,0,ALSoD,0.907861531,1,NA,22753137,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",TRUE POS: two resources; name and URL of first will be correct; second is lost,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,7/1/2011 +34316271,"http://altaiflora.asu.ru/en/, http://altb.asu.ru","Virtual Herbarium ALTB: collection of vascular plants of the Altai Mountain Country. Background The herbarium of the South-Siberian Botanical Garden of Altai State University (ALTB) houses the largest collection of plants from the Altai Mountain Country (AMC), an area that extends across Russia, Kazakhstan, Mongolia and China. The collection of ALTB includes more than 450,00 specimens, making it the seventh largest in Russia and the fourth largest amongst Russian university herbaria. Altai State University (ASU), the home of ALTB, is one of the most important centres of academic education and research in Siberia and the Russian Far East. It is a sociocultural centre that provides a distinguished learning environment for undergraduate and graduate students in many scholarly and professional fields, meeting the needs of today's knowledge-based post-industrial society and contributing to regional development. It actively promotes international cooperation and strategic collaboration amongst countries of the AMC in the fields of science, education and culture. In particular, the activities of the South-Siberian Botanical Garden include: development of measures to protect rare and endangered plant species, research on the flora and vegetation of the AMC, preparation and publication of a multi-volume work ""Flora Altaica"", monographic study of individual plant groups, conducting laboratory classes, summer practicals and special courses. The main purpose of this article is to attract the attention of the scientific community to the botanical research of transboundary territory of the Altai Mountain Country (Russia, Kazakhstan, China and Mongolia) and to the future development of digital plant collections in partnership with Global Biodiversity Information Facility (GBIF). New information The Virtual Herbarium ALTB (Russian interface - altb.asu.ru) is the largest digital collection of plants from the transboundary territory of the Altai Mountain Country and the main source of primary material for the ""Flora Altaica"" project (http://altaiflora.asu.ru/en/). Since 2017, when Altai State University became a GBIF data publisher, data from the Virtual Herbarium ALTB has been exported to the dataset ""Virtual Herbarium ALTB (South-Siberian Botanical Garden)"" in GBIF. Currently, it includes images and data from 22,466 vascular plants, of which 67% have geographic coordinates (accessed on 30.03.2021). Most of the specimens have been collected since 1977, with the most intensive collecting years being 1995-2008. In 2019, the label-data table of the Virtual Herbarium ALTB was modified to bring it into conformity with the Darwin Core specification (http://altb.asu.ru/). This effectively solved the major impediment to sharing plant diversity data from the AMC and adjacent regions in a multilingual environment.",ALTB,0.855050087,NA,0,ALTB,0.855050087,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/13/2021 +21880703,http://www.aluhunter.com,AluHunter: a database of potentially polymorphic Alu insertions for use in primate phylogeny and population genetics. Summary AluHunter is a database of taxon-specific primate Alu elements for use in phylogeny and population genetics. The software automatically isolates potentially polymorphic Alu insertions in sequences submitted to GenBank by screening the elements against reference genomes. The resultant database of variable markers is a valuable resource for researchers interested in characterizing Alu elements in their primate taxon of interest. Availability and implementation The AluHunter database can be accessed at http://www.aluhunter.com. Contact cmb433@nyu.edu.,AluHunter,0.993216336,NA,0,AluHunter,0.993216336,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/31/2011 +25432889,http://alz.big.ac.cn/alzBase,"AlzBase: an Integrative Database for Gene Dysregulation in Alzheimer's Disease. Alzheimer's disease (AD) affects a significant portion of elderly people worldwide. Although the amyloid-√ɬÉ√Ǭé√ɬÇ√Ǭ≤ (A√ɬÉ√Ǭé√ɬÇ√Ǭ≤) cascade hypothesis has been the prevailing theory for the molecular mechanism of AD in the past few decades, treatment strategies targeting the A√ɬÉ√Ǭé√ɬÇ√Ǭ≤ cascade have not demonstrated effectiveness as yet. Thus, elucidating the spatial and temporal evolution of the molecular pathways in AD remains to be a daunting task. To facilitate novel discoveries in this filed, here, we have integrated information from multiple sources for the better understanding of gene functions in AD pathogenesis. Several categories of information have been collected, including (1) gene dysregulation in AD and closely related processes/diseases such as aging and neurological disorders, (2) correlation of gene dysregulation with AD severity, (3) a wealth of annotations on the functional and regulatory information, and (4) network connections for gene-gene relationship. In addition, we have also provided a comprehensive summary for the top ranked genes in AlzBase. By evaluating the information curated in AlzBase, researchers can prioritize genes from their own research and generate novel hypothesis regarding the molecular mechanism of AD. To demonstrate the utility of AlzBase, we examined the genes from the genetic studies of AD. It revealed links between the upstream genetic variations and downstream endo-phenotype and suggested several genes with higher priority. This integrative database is freely available on the web at http://alz.big.ac.cn/alzBase .",AlzBase,0.974508405,NA,0,AlzBase,0.974508405,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/29/2014 +22647208,http://alzpathway.org,"AlzPathway: a comprehensive map of signaling pathways of Alzheimer's disease. Background Alzheimer's disease (AD) is the most common cause of dementia among the elderly. To clarify pathogenesis of AD, thousands of reports have been accumulating. However, knowledge of signaling pathways in the field of AD has not been compiled as a database before. Description Here, we have constructed a publicly available pathway map called ""AlzPathway"" that comprehensively catalogs signaling pathways in the field of AD. We have collected and manually curated over 100 review articles related to AD, and have built an AD pathway map using CellDesigner. AlzPathway is currently composed of 1347 molecules and 1070 reactions in neuron, brain blood barrier, presynaptic, postsynaptic, astrocyte, and microglial cells and their cellular localizations. AlzPathway is available as both the SBML (Systems Biology Markup Language) map for CellDesigner and the high resolution image map. AlzPathway is also available as a web service (online map) based on Payao system, a community-based, collaborative web service platform for pathway model curation, enabling continuous updates by AD researchers. Conclusions AlzPathway is the first comprehensive map of intra, inter and extra cellular AD signaling pathways which can enable mechanistic deciphering of AD pathogenesis. The AlzPathway map is accessible at http://alzpathway.org/.",AlzPathway,0.996937859,NA,0,AlzPathway,0.996937859,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/30/2012 +34335304,http://gift2disease.net/GIFTED,"Amadis: A Comprehensive Database for Association Between Microbiota and Disease. The human gastrointestinal tract represents a symbiotic bioreactor that can mediate the interaction of the human host. The deployment and integration of multi-omics technologies have depicted a more complete image of the functions performed by microbial organisms. In addition, a large amount of data has been generated in a short time. However, researchers struggling to keep track of these mountains of information need a way to conveniently gain a comprehensive understanding of the relationship between microbiota and human diseases. To tackle this issue, we developed Amadis (http://gift2disease.net/GIFTED), a manually curated database that provides experimentally supported microbiota-disease associations and a dynamic network construction method. The current version of the Amadis database documents 20167 associations between 221 human diseases and 774 gut microbes across 17 species, curated from more than 1000 articles. By using the curated data, users can freely select and combine modules to obtain a specific microbe-based human disease network. Additionally, Amadis provides a user-friendly interface for browsing, searching and downloading. We hope it can serve as a useful and valuable resource for researchers exploring the associations between gastrointestinal microbiota and human diseases.",Amadis,0.995023131,NA,0,Amadis,0.995023131,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/14/2021 +25762455,http://amamutdb.no,"amamutdb.no: A relational database for MAN2B1 allelic variants that compiles genotypes, clinical phenotypes, and biochemical and structural data of mutant MAN2B1 in √ɬÉ√Ǭé√ɬÇ√Ǭ±-mannosidosis. √ɬÉ√Ǭé√ɬÇ√Ǭ±-Mannosidosis is an autosomal recessive lysosomal storage disorder caused by mutations in the MAN2B1 gene, encoding lysosomal √ɬÉ√Ǭé√ɬÇ√Ǭ±-mannosidase. The disorder is characterized by a range of clinical phenotypes of which the major manifestations are mental impairment, hearing impairment, skeletal changes, and immunodeficiency. Here, we report an √ɬÉ√Ǭé√ɬÇ√Ǭ±-mannosidosis mutation database, amamutdb.no, which has been constructed as a publicly accessible online resource for recording and analyzing MAN2B1 variants (http://amamutdb.no). Our aim has been to offer structured and relational information on MAN2B1 mutations and genotypes along with associated clinical phenotypes. Classifying missense mutations, as pathogenic or benign, is a challenge. Therefore, they have been given special attention as we have compiled all available data that relate to their biochemical, functional, and structural properties. The √ɬÉ√Ǭé√ɬÇ√Ǭ±-mannosidosis mutation database is comprehensive and relational in the sense that information can be retrieved and compiled across datasets; hence, it will facilitate diagnostics and increase our understanding of the clinical and molecular aspects of √ɬÉ√Ǭé√ɬÇ√Ǭ±-mannosidosis. We believe that the amamutdb.no structure and architecture will be applicable for the development of databases for any monogenic disorder.",amamutdb.no,0.977049748,NA,0,amamutdb.no,0.977049748,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/9/2015 +32193422,http://www.amazon-fish.com,"A database of freshwater fish species of the Amazon Basin. The Amazon Basin is an unquestionable biodiversity hotspot, containing the highest freshwater biodiversity on earth and facing off a recent increase in anthropogenic threats. The current knowledge on the spatial distribution of the freshwater fish species is greatly deficient in this basin, preventing a comprehensive understanding of this hyper-diverse ecosystem as a whole. Filling this gap was the priority of a transnational collaborative project, i.e. the AmazonFish project - https://www.amazon-fish.com/. Relying on the outputs of this project, we provide the most complete fish species distribution records covering the whole Amazon drainage. The database, including 2,406 validated freshwater native fish species, 232,936 georeferenced records, results from an extensive survey of species distribution including 590 different sources (e.g. published articles, grey literature, online biodiversity databases and scientific collections from museums and universities worldwide) and field expeditions conducted during the project. This database, delivered at both georeferenced localities (21,500 localities) and sub-drainages grains (144 units), represents a highly valuable source of information for further studies on freshwater fish biodiversity, biogeography and conservation.",AmazonFish,0.907141924,NA,0,AmazonFish,0.907141924,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/19/2020 +23317704,http://www.amddatabase.info,"AMDD: antimicrobial drug database. Drug resistance is one of the major concerns for antimicrobial chemotherapy against any particular target. Knowledge of the primary structure of antimicrobial agents and their activities is essential for rational drug design. Thus, we developed a comprehensive database, anti microbial drug database (AMDD), of known synthetic antibacterial and antifungal compounds that were extracted from the available literature and other chemical databases, e.g., PubChem, PubChem BioAssay and ZINC, etc. The current version of AMDD contains ~2900 antibacterial and ~1200 antifungal compounds. The molecules are annotated with properties such as description, target, format, bioassay, molecular weight, hydrogen bond donor, hydrogen bond acceptor and rotatable bond. The availability of these antimicrobial agents on common platform not only provides useful information but also facilitate the virtual screening process, thus saving time and overcoming difficulties in selecting specific type of inhibitors for the specific targets. AMDD may provide a more effective and efficient way of accessing antimicrobial compounds based on their properties along with the links to their structure and bioassay. All the compounds are freely available at the advanced web-based search interface http://www.amddatabase.info.",AMDD,0.986542583,anti microbial drug database,0.958445907,AMDD,0.986542583,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/7/2012 +26828034,http://www.amphibiachina.org,"AmphibiaChina: an online database of Chinese Amphibians. AmphibiaChina, an open-access, web-based database, is designed to provide comprehensive and up-to-date information on Chinese amphibians. It offers an integrated module with six major sections. Compared to other known databases including AmphibiaWeb and Amphibian Species of the World, AmphibiaChina has the following new functions: (1) online species identification based on DNA barcode sequences; (2) comparisons and discussions of different major taxonomic systems; and (3) phylogenetic progress on Chinese amphibians. This database offers a window for the world to access available information of Chinese amphibians. AmphibiaChina with its Chinese version can be accessed at http://www.amphibiachina.org.",AmphibiaChina,0.995878279,NA,0,AmphibiaChina,0.995878279,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2016 +30247677,http://amtdb.org,"AmtDB: a database of ancient human mitochondrial genomes. Ancient mitochondrial DNA is used for tracing human past demographic events due to its population-level variability. The number of published ancient mitochondrial genomes has increased in recent years, alongside with the development of high-throughput sequencing and capture enrichment methods. Here, we present AmtDB, the first database of ancient human mitochondrial genomes. Release version contains 1107 hand-curated ancient samples, freely accessible for download, together with the individual descriptors, including geographic location, radiocarbon dating, and archaeological culture affiliation. The database also features an interactive map for sample location visualization. AmtDB is a key platform for ancient population genetic studies and is available at https://amtdb.org.",AmtDB,0.998366475,NA,0,AmtDB,0.998366475,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +31094220,http://bioinformatics.biol.uoa.gr/amyco,"AmyCo: the amyloidoses collection. Amyloid fibrils are formed when soluble proteins misfold into highly ordered insoluble fibrillar aggregates and affect various organs and tissues. The deposition of amyloid fibrils is the main hallmark of a group of disorders, called amyloidoses. Curiously, fibril deposition has been also recorded as a complication in a number of other pathological conditions, including well-known neurodegenerative or endocrine diseases. To date, amyloidoses are roughly classified, owing to their tremendous heterogeneity. In this work, we introduce AmyCo, a freely available collection of amyloidoses and clinical disorders related to amyloid deposition. AmyCo classifies 75 diseases associated with amyloid deposition into two distinct categories, namely 1) amyloidosis and 2) clinical conditions associated with amyloidosis. Each database entry is annotated with the major protein component (causative protein), other components of amyloid deposits and affected tissues or organs. Database entries are also supplemented with appropriate detailed annotation and are referenced to ICD-10, MeSH, OMIM, PubMed, AmyPro and UniProtKB databases. To our knowledge, AmyCo is the first attempt towards the creation of a complete and an up-to-date repository, containing information about amyloidoses and diseases related to amyloid deposition. The AmyCo web interface is available at http://bioinformatics.biol.uoa.gr/amyco .",AmyCo,0.988337398,amyloidoses collection,0.742020524,AmyCo,0.988337398,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/16/2019 +26088800,http://comprec-lin.iiar.pwr.edu.pl/amyload,"AmyLoad: website dedicated to amyloidogenic protein fragments. Unlabelled Analyses of amyloidogenic sequence fragments are essential in studies of neurodegenerative diseases. However, there is no one internet dataset that collects all the sequences that have been investigated for their amyloidogenicity. Therefore, we have created the AmyLoad website which collects the amyloidogenic sequences from all major sources. The website allows for filtration of the fragments and provides detailed information about each of them. Registered users can both personalize their work with the website and submit their own sequences into the database. To maintain database reliability, submitted sequences are reviewed before making them available to the public. Finally, we re-implemented several amyloidogenic sequence predictors, thus the AmyLoad website can be used as a sequence analysis tool. We encourage researchers working on amyloid proteins to contribute to our service. Availability and implementation The AmyLoad website is freely available at http://comprec-lin.iiar.pwr.edu.pl/amyload/. Contact malgorzata.kotulska@pwr.edu.pl.",AmyLoad,0.924672663,NA,0,AmyLoad,0.924672663,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/17/2015 +29040693,http://amypro.net,"AmyPro: a database of proteins with validated amyloidogenic regions. Soluble functional proteins may transform into insoluble amyloid fibrils that deposit in a variety of tissues. Amyloid formation is a hallmark of age-related degenerative disorders. Perhaps surprisingly, amyloid fibrils can also be beneficial and are frequently exploited for diverse functional roles in organisms. Here we introduce AmyPro, an open-access database providing a comprehensive, carefully curated collection of validated amyloid fibril-forming proteins from all kingdoms of life classified into broad functional categories (http://amypro.net). In particular, AmyPro provides the boundaries of experimentally validated amyloidogenic sequence regions, short descriptions of the functional relevance of the proteins and their amyloid state, a list of the experimental techniques applied to study the amyloid state, important structural/functional/variation/mutation data transferred from UniProt, a list of relevant PDB structures categorized according to protein states, database cross-references and literature references. AmyPro greatly improves on similar currently available resources by incorporating both prions and functional amyloids in addition to pathogenic amyloids, and allows users to screen their sequences against the entire collection of validated amyloidogenic sequence fragments. By enabling further elucidation of the sequential determinants of amyloid fibril formation, we hope AmyPro will enhance the development of new methods for the precise prediction of amyloidogenic regions within proteins.",AmyPro,0.99600482,NA,0,AmyPro,0.99600482,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +32765587,http://education.knoweng.org/abc-gwas,"ABC-GWAS: Functional Annotation of Estrogen Receptor-Positive Breast Cancer Genetic Variants. Over the past decade, hundreds of genome-wide association studies (GWAS) have implicated genetic variants in various diseases, including cancer. However, only a few of these variants have been functionally characterized to date, mainly because the majority of the variants reside in non-coding regions of the human genome with unknown function. A comprehensive functional annotation of the candidate variants is thus necessary to fill the gap between the correlative findings of GWAS and the development of therapeutic strategies. By integrating large-scale multi-omics datasets such as the Cancer Genome Atlas (TCGA) and the Encyclopedia of DNA Elements (ENCODE), we performed multivariate linear regression analysis of expression quantitative trait loci, sequence permutation test of transcription factor binding perturbation, and modeling of three-dimensional chromatin interactions to analyze the potential molecular functions of 2,813 single nucleotide variants in 93 genomic loci associated with estrogen receptor-positive breast cancer. To facilitate rapid progress in functional genomics of breast cancer, we have created ""Analysis of Breast Cancer GWAS"" (ABC-GWAS), an interactive database of functional annotation of estrogen receptor-positive breast cancer GWAS variants. Our resource includes expression quantitative trait loci, long-range chromatin interaction predictions, and transcription factor binding motif analyses to prioritize putative target genes, causal variants, and transcription factors. An embedded genome browser also facilitates convenient visualization of the GWAS loci in genomic and epigenomic context. ABC-GWAS provides an interactive visual summary of comprehensive functional characterization of estrogen receptor-positive breast cancer variants. The web resource will be useful to both computational and experimental biologists who wish to generate and test their hypotheses regarding the genetic susceptibility, etiology, and carcinogenesis of breast cancer. ABC-GWAS can also be used as a user-friendly educational resource for teaching functional genomics. ABC-GWAS is available at http://education.knoweng.org/abc-gwas/.",ABC-GWAS,0.923012972,Analysis of Breast Cancer GWAS,0.94699221,Analysis of Breast Cancer GWAS,0.94699221,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/20/2020 +22102578,http://colt.ccbr.utoronto.ca/cancer,"COLT-Cancer: functional genetic screening resource for essential genes in human cancer cell lines. Genome-wide pooled shRNA screens enable global identification of the genes essential for cancer cell survival and proliferation and provide a 'functional genetic' map of human cancer to complement genomic studies. Using a lentiviral shRNA library targeting approximately 16,000 human genes and a newly developed scoring approach, we identified essential gene profiles in more than 70 breast, pancreatic and ovarian cancer cell lines. We developed a web-accessible database system for capturing information from each step in our standardized screening pipeline and a gene-centric search tool for exploring shRNA activities within a given cell line or across multiple cell lines. The database consists of a laboratory information and management system for tracking each step of a pooled shRNA screen as well as a web interface for querying and visualization of shRNA and gene-level performance across multiple cancer cell lines. COLT-Cancer Version 1.0 is currently accessible at http://colt.ccbr.utoronto.ca/cancer.",ancer,0.55935967,NA,0,ancer,0.55935967,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/18/2011 +30371900,http://ancestralgenomes.org,"Ancestral Genomes: a resource for reconstructed ancestral genes and genomes across the tree of life. A growing number of whole genome sequencing projects, in combination with development of phylogenetic methods for reconstructing gene evolution, have provided us with a window into genomes that existed millions, and even billions, of years ago. Ancestral Genomes (http://ancestralgenomes.org) is a resource for comprehensive reconstructions of these 'fossil genomes'. Comprehensive sets of protein-coding genes have been reconstructed for 78 genomes of now-extinct species that were the common ancestors of extant species from across the tree of life. The reconstructed genes are based on the extensive library of over 15 000 gene family trees from the PANTHER database, and are updated on a yearly basis. For each ancestral gene, we assign a stable identifier, and provide additional information designed to facilitate analysis: an inferred name, a reconstructed protein sequence, a set of inferred Gene Ontology (GO) annotations, and a 'proxy gene' for each ancestral gene, defined as the least-diverged descendant of the ancestral gene in a given extant genome. On the Ancestral Genomes website, users can browse the Ancestral Genomes by selecting nodes in a species tree, and can compare an extant genome with any of its reconstructed ancestors to understand how the genome evolved.",Ancestral Genomes,0.586193487,NA,0,Ancestral Genomes,0.586193487,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +31347432,http://arecanut.icmr.org.in,"ANDB: Development of a Database Based on a Global Survey of Literature on Areca Nut and Associated Health Effects. Areca nut (AN), commonly known as ""Supari"" in India is an addictive substance and widely consumed with or without tobacco as a part of customs in many South East Asian countries. Owing to the adverse health effects of AN, public awareness and stringent government policies to prohibit AN production and regulation of products containing AN should be addressed without further delay. Lack of a research database, motivated us to develop a comprehensive online portal on global survey of published articles with reference to AN. The Areca nut database (ANDB) is a manually curated database which provides the information on global literature according to the publication year, author, population, harmful effects, and associated disease. The present study is an attempt to deliver the relevant information which would be helpful to researchers in prioritizing the research areas with respect to AN and associated health effects. The portal has been developed in MySQL and the interface has been designed using core PhP and CSS, HTML. ANDB is an online resource available to provide global literature of AN in a user-friendly manner. It can be accessed freely on http://arecanut.icmr.org.in/. To the best of our knowledge, ANDB is the first portal delivering inclusive scientific literature related to AN and its health effects. This evidence-based scientific information would be useful for policy makers to make guidelines for increasing awareness and implementing the laws for regulated use of this potentially carcinogenic substance, thereby controlling the burden of many dreaded diseases primarily oral submucous fibrosis, cardiovascular disease, and cancers.",ANDB,0.951109529,Areca nut database,0.914340153,ANDB,0.951109529,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/26/2019 +27582018,http://angiogenes.uni-frankfurt.de,"ANGIOGENES: knowledge database for protein-coding and noncoding RNA genes in endothelial cells. Increasing evidence indicates the presence of long noncoding RNAs (lncRNAs) is specific to various cell types. Although lncRNAs are speculated to be more numerous than protein-coding genes, the annotations of lncRNAs remain primitive due to the lack of well-structured schemes for their identification and description. Here, we introduce a new knowledge database ""ANGIOGENES"" (http://angiogenes.uni-frankfurt.de) to allow for in silico screening of protein-coding genes and lncRNAs expressed in various types of endothelial cells, which are present in all tissues. Using the latest annotations of protein-coding genes and lncRNAs, publicly-available RNA-seq data was analyzed to identify transcripts that are expressed in endothelial cells of human, mouse and zebrafish. The analyzed data were incorporated into ANGIOGENES to provide a one-stop-shop for transcriptomics data to facilitate further biological validation. ANGIOGENES is an intuitive and easy-to-use database to allow in silico screening of expressed, enriched and/or specific endothelial transcripts under various conditions. We anticipate that ANGIOGENES serves as a starting point for functional studies to elucidate the roles of protein-coding genes and lncRNAs in angiogenesis.",ANGIOGENES,0.997215807,NA,0,ANGIOGENES,0.997215807,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2016 +30407520,http://www.animalgenome.org/QTLdb,"Building a livestock genetic and genomic information knowledgebase through integrative developments of Animal QTLdb and CorrDB. Successful development of biological databases requires accommodation of the burgeoning amounts of data from high-throughput genomics pipelines. As the volume of curated data in Animal QTLdb (https://www.animalgenome.org/QTLdb) increases exponentially, the resulting challenges must be met with rapid infrastructure development to effectively accommodate abundant data curation and make metadata analysis more powerful. The development of Animal QTLdb and CorrDB for the past 15 years has provided valuable tools for researchers to utilize a wealth of phenotype/genotype data to study the genetic architecture of livestock traits. We have focused our efforts on data curation, improved data quality maintenance, new tool developments, and database co-developments, in order to provide convenient platforms for users to query and analyze data. The database currently has 158 499 QTL/associations, 10 482 correlations and 1977 heritability data as a result of an average 32% data increase per year. In addition, we have made >14 functional improvements or new tool implementations since our last report. Our ultimate goals of database development are to provide infrastructure for data collection, curation, and annotation, and more importantly, to support innovated data structure for new types of data mining, data reanalysis, and networked genetic analysis that lead to the generation of new knowledge.",Animal QTLdb,0.974247932,NA,0,Animal QTLdb,0.974247932,1,"23180796.0, 26602686.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,1/1/2019 +30937442,http://www.ccb.uni-saarland.de/asra,"The sncRNA Zoo: a repository for circulating small noncoding RNAs in animals. The repertoire of small noncoding RNAs (sncRNAs), particularly miRNAs, in animals is considered to be evolutionarily conserved. Studies on sncRNAs are often largely based on homology-based information, relying on genomic sequence similarity and excluding actual expression data. To obtain information on sncRNA expression (including miRNAs, snoRNAs, YRNAs and tRNAs), we performed low-input-volume next-generation sequencing of 500√ɬÉ√ǬÇ√ɬÇ√Ǭ†pg of RNA from 21 animals at two German zoological gardens. Notably, none of the species under investigation were previously annotated in any miRNA reference database. Sequencing was performed on blood cells as they are amongst the most accessible, stable and abundant sources of the different sncRNA classes. We evaluated and compared the composition and nature of sncRNAs across the different species by computational approaches. While the distribution of sncRNAs in the different RNA classes varied significantly, general evolutionary patterns were maintained. In particular, miRNA sequences and expression were found to be even more conserved than previously assumed. To make the results available for other researchers, all data, including expression profiles at the species and family levels, and different tools for viewing, filtering and searching the data are freely available in the online resource ASRA (Animal sncRNA Atlas) at https://www.ccb.uni-saarland.de/asra/.",ASRA,0.944391489,Animal sncRNA Atlas,0.959473997,Animal sncRNA Atlas,0.959473997,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/1/2019 +32986825,http://gong_lab.hzau.edu.cn/Animal-APAdb,"Animal-APAdb: a comprehensive animal alternative polyadenylation database. Alternative polyadenylation (APA) is an important post-transcriptional regulatory mechanism that recognizes different polyadenylation signals on transcripts, resulting in transcripts with different lengths of 3' untranslated regions and thereby influencing a series of biological processes. Recent studies have highlighted the important roles of APA in human. However, APA profiles in other animals have not been fully recognized, and there is no database that provides comprehensive APA information for other animals except human. Here, by using the RNA sequencing data collected from public databases, we systematically characterized the APA profiles in 9244 samples of 18 species. In total,√ɬÉ√ǬÇ√ɬÇ√Ǭ†we identified 342 952 APA events with a median of 17 020 per species using the DaPars2 algorithm, and 315 691 APA events with a median of 17 953 per species using the QAPA algorithm in these 18 species, respectively. In addition, we predicted the polyadenylation sites (PAS) and motifs near PAS of these species. We further developed Animal-APAdb, a user-friendly database (http://gong_lab.hzau.edu.cn/Animal-APAdb/) for data searching, browsing and downloading. With comprehensive information of APA events in different tissues of different species, Animal-APAdb may greatly facilitate the exploration of animal APA patterns and novel mechanisms, gene expression regulation and APA evolution across tissues and species.",Animal-APAdb,0.993082929,NA,0,Animal-APAdb,0.993082929,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +31584087,http://gong_lab.hzau.edu.cn/Animal_ImputeDB,"Animal-ImputeDB: a comprehensive database with multiple animal reference panels for genotype imputation. Animal-ImputeDB (http://gong_lab.hzau.edu.cn/Animal_ImputeDB/) is a public database with genomic reference panels of 13 animal species for online genotype imputation, genetic variant search, and free download. Genotype imputation is a process of estimating missing genotypes in terms of the haplotypes and genotypes in a reference panel. It can effectively increase the density of single nucleotide polymorphisms (SNPs) and thus can be widely used in large-scale genome-wide association studies (GWASs) using relatively inexpensive and low-density SNP arrays. However, most animals except humans lack high-quality reference panels, which greatly limits the application of genotype imputation in animals. To overcome this limitation, we developed Animal-ImputeDB, which is dedicated to collecting genotype data and whole-genome resequencing data of nonhuman animals from various studies and databases. A computational pipeline was developed to process different types of raw data to construct reference panels. Finally, 13 high-quality reference panels including √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº400 million SNPs from 2265 samples were constructed. In Animal-ImputeDB, an easy-to-use online tool consisting of two popular imputation tools was designed for the purpose of genotype imputation. Collectively, Animal-ImputeDB serves as an important resource for animal genotype imputation and will greatly facilitate research on animal genomic selection and genetic improvement.",Animal-ImputeDB,0.997726774,NA,0,Animal-ImputeDB,0.997726774,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +21887013,http://www.research-bioinformatics.in,"AnimalLectinDb: An integrated animal lectin database. Unlabelled Lectins, a class of carbohydrate-binding proteins and widely recognized to play a range of crucial roles in many cell-cell recognition events triggering several important cellular processes encompass different members that are diverse in their protein structures, carbohydrate affinities and specificities, their larger biological roles and potential applications. To attain an effective use of all the diverse data initially an animal lectin database 'AnimalLectinDb' with information pertaining to taxonomic, structural, domain architecture, molecular sequence, carbohydrate structure and blood group specificity has been developed. It is expected to be of high value not only for basic study in lectin biology but also for advanced research in pursuing several applications in biotechnology, immunology, and clinical practice. Availability The database is available for free at http://www.research-bioinformatics.in.",AnimalLectinDb,0.99539414,NA,0,AnimalLectinDb,0.99539414,1,22493537,NA,NA,NA,do not merge,NA,NA,NA,NA,4/22/2011 +"25262351, 30204897",http://bioinfo.life.hust.edu.cn/AnimalTFDB,"AnimalTFDB 2.0: a resource for expression, prediction and functional study of animal transcription factors. Transcription factors (TFs) are key regulators for gene expression. Here we updated the animal TF database AnimalTFDB to version 2.0 (http://bioinfo.life.hust.edu.cn/AnimalTFDB/). Using the improved prediction pipeline, we identified 72 336 TF genes, 21 053 transcription co-factor genes and 6502 chromatin remodeling factor genes from 65 species covering main animal lineages. Besides the abundant annotations (basic information, gene model, protein functional domain, gene ontology, pathway, protein interaction, ortholog and paralog, etc.) in the previous version, we made several new features and functions in the updated version. These new features are: (i) gene expression from RNA-Seq for nine model species, (ii) gene phenotype information, (iii) multiple sequence alignment of TF DNA-binding domains, and the weblogo and phylogenetic tree based on the alignment, (iv) a TF prediction server to identify new TFs from input sequences and (v) a BLAST server to search against TFs in AnimalTFDB. A new nice web interface was designed for AnimalTFDB 2.0 allowing users to browse and search all data in the database. We aim to maintain the AnimalTFDB as a solid resource for TF identification and studies of transcription regulation and comparative genomics.",AnimalTFDB,0.997303069,Animal Transcription Factor DataBase,0.923694839,AnimalTFDB,0.997303069,2,NA,22080564,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2019 +22080564,http://www.bioguo.org/AnimalTFDB,"AnimalTFDB: a comprehensive animal transcription factor database. Transcription factors (TFs) are proteins that bind to specific DNA sequences, thereby playing crucial roles in gene-expression regulation through controlling the transcription of genetic information from DNA to RNA. Transcription cofactors and chromatin remodeling factors are also essential in the gene transcriptional regulation. Identifying and annotating all the TFs are primary and crucial steps for illustrating their functions and understanding the transcriptional regulation. In this study, based on manual literature reviews, we collected and curated 72 TF families for animals, which is currently the most complete list of TF families in animals. Then, we systematically characterized all the TFs in 50 animal species and constructed a comprehensive animal TF database, AnimalTFDB. To better serve the community, we provided detailed annotations for each TF, including basic information, gene structure, functional domain, 3D structure hit, Gene Ontology, pathway, protein-protein interaction, paralogs, orthologs, potential TF-binding sites and targets. In addition, we collected and annotated transcription cofactors and chromatin remodeling factors. AnimalTFDB has a user-friendly web interface with multiple browse and search functions, as well as data downloading. It is freely available at http://www.bioguo.org/AnimalTFDB/.",AnimalTFDB,0.989919841,NA,0,AnimalTFDB,0.989919841,1,NA,"25262351.0, 30204897.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/12/2011 +31680137,http://www.aniseed.cnrs.fr,"ANISEED 2019: 4D exploration of genetic data for an extended range of tunicates. ANISEED (https://www.aniseed.cnrs.fr) is the main model organism database for the worldwide community of scientists working on tunicates, the vertebrate sister-group. Information provided for each species includes functionally-annotated gene and transcript models with orthology relationships within tunicates, and with echinoderms, cephalochordates and vertebrates. Beyond genes the system describes other genetic elements, including repeated elements and cis-regulatory modules. Gene expression profiles for several thousand genes are formalized in both wild-type and experimentally-manipulated conditions, using formal anatomical ontologies. These data can be explored through three complementary types of browsers, each offering a different view-point. A developmental browser summarizes the information in a gene- or territory-centric manner. Advanced genomic browsers integrate the genetic features surrounding genes or gene sets within a species. A Genomicus synteny browser explores the conservation of local gene order across deuterostome. This new release covers an extended taxonomic range of 14 species, including for the first time a non-ascidian species, the appendicularian Oikopleura dioica. Functional annotations, provided for each species, were enhanced through a combination of manual curation of gene models and the development of an improved orthology detection pipeline. Finally, gene expression profiles and anatomical territories can be explored in 4D online through the newly developed Morphonet morphogenetic browser.",ANISEED,0.995582819,NA,0,ANISEED,0.995582819,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +32406920,http://annolnc.gao-lab.org,"AnnoLnc2: the one-stop portal to systematically annotate novel lncRNAs for human and mouse. With the abundant mammalian lncRNAs identified recently, a comprehensive annotation resource for these novel lncRNAs is an urgent need. Since its first release in November 2016, AnnoLnc has been the only online server for comprehensively annotating novel human lncRNAs on-the-fly. Here, with significant updates to multiple annotation modules, backend datasets and the code base, AnnoLnc2 continues the effort to provide the scientific community with a one-stop online portal for systematically annotating novel human and mouse lncRNAs with a comprehensive functional spectrum covering sequences, structure, expression, regulation, genetic association and evolution. In response to numerous requests from multiple users, a standalone package is also provided for large-scale offline analysis. We believe that updated AnnoLnc2 (http://annolnc.gao-lab.org/) will help both computational and bench biologists identify lncRNA functions and investigate underlying mechanisms.",AnnoLnc,0.998637438,NA,0,AnnoLnc,0.998637438,1,NA,33326073,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,7/1/2020 +33326073,http://annolnc1.gao-lab.org,"AnnoLnc: A One-Stop Portal to Systematically Annotate Novel Human Long Noncoding RNAs. While more than a hundred thousand long noncoding RNAs (lncRNAs) have been identified in human genome, their biological functions and regulation are largely elusive. Here we present AnnoLnc, a one-stop online annotation portal for human lncRNAs ( http://annolnc1.gao-lab.org/ ). As the first (and the most comprehensive) Web server to provide on-the-fly annotation for novel human lncRNAs, AnnoLnc exploits more than 700 data sources to annotate inputted lncRNA systematically, spanning genomic location, secondary structure, expression patterns, coexpression-based functional annotation, transcriptional regulation, miRNA interaction, protein interaction, genetic association, and evolution. Moreover, in addition to a user-friendly Web interface, AnnoLnc can also be integrated into existing pipelines by either a set of JSON-based web service APIs or a stand-alone version for Linux server.",AnnoLnc,0.998452187,NA,0,AnnoLnc,0.998452187,1,NA,32406920,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +21904438,http://bioinfopresidencycollegekolkata.edu.in/antagomirs.html,"Antagomirbase- a putative antagomir database. Unlabelled The accurate prediction of a comprehensive set of messenger putative antagomirs against microRNAs (miRNAs) remains an open problem. In particular, a set of putative antagomirs against human miRNA is predicted in this current version of database. We have developed Antagomir database, based on putative antagomirs-miRNA heterodimers. In this work, the human miRNA dataset was used as template to design putative antagomirs, using GC content and secondary structures as parameters. The algorithm used predicted the free energy of unbound antagomirs. Although in its infancy the development of antagomirs, that can target cell specific genes or families of genes, may pave the way forward for the generation of a new class of therapeutics, to treat complex inflammatory diseases. Future versions need to incorporate further sequences from other mammalian homologues for designing of antagomirs for aid in research. Availability The database is available for free at http://bioinfopresidencycollegekolkata.edu.in/antagomirs.html.",Antagomir,0.96470964,NA,0,Antagomir,0.96470964,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/20/2011 +33996073,http://antifungalinteractions.org/was,"Drug-drug interaction database for safe prescribing of systemic antifungal agents. Introduction A drug-drug interaction (DDI) describes the influence of one drug upon another or the change in a drug's effect on the body when the drug is taken together with a second drug. A DDI can delay, decrease or enhance absorption or metabolism of either drug. Several antifungal agents have a large number of potentially deleterious DDIs. Methods The antifungal drug interactions database https://antifungalinteractions.org/was first launched in 2012 and is updated regularly. It is available as web and app versions to allow information on potential drug interactions with antifungals with a version for patients and another for health professionals. A new and updated database and interface with apps was created in 2019. This allows clinicians and patients to rapidly check for DDIs. The database is fully referenced to allow the user to access further information if needed. Currently DDIs for fluconazole, itraconazole, voriconazole, posaconazole, isavuconazole, terbinafine, amphotericin B, caspofungin, micafungin and anidulafungin are cross-referenced against 2398 other licensed drugs, a total of nearly 17,000 potential DDIs. Results The database records 541 potentially severe DDIs, 1129 moderate and 1015 mild DDIs, a total of 2685 (15.9%). Conclusion As the online database and apps are free to use, we hope that widespread acceptance and usage will reduce medical misadventure and iatrogenic harm from unconsidered DDIs.",NA,0,antifungal drug interactions database,0.586464763,antifungal drug interactions database,0.586464763,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +32584882,http://amdr.amu.ac.in/acd,"ACD: Antimicrobial chemotherapeutics database. Antimicrobial resistance is becoming a growing health problem, which has become a challenge for the physicians to control infection and also an economic burden on the healthcare. This increase in resistance to the present antimicrobial agents led the researchers to find some alternative and more efficient drugs which can fight with the resistant microorganisms more effectively. Hence, in silico approach is used to design some novel drugs against various targets of microorganisms. For effective virtual screening of the drugs, there is a need to know about the chemical structure and properties of the antimicrobial agents. Therefore, we have prepared a comprehensive database as a platform for the researcher to search for possible lead molecules. Antimicrobial chemotherapeutics database (ACD) is comprised of ~4100 synthetic antimicrobial compounds as well as ~1030 active antimicrobial peptides. The Antimicrobial peptides are mainly from biological sources but some of them are synthetic in nature. Only those compounds, which are found to be active against either bacteria (both Gram-positive and negative) or fungus, are selected for this database.The ACD database is freely available at URL: http://amdr.amu.ac.in/acd, and it is compatible with desktops, smartphones, and tablets.",ACD,0.987468759,Antimicrobial chemotherapeutics database,0.987474948,Antimicrobial chemotherapeutics database,0.987474948,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/25/2020 +30639529,http://www.ceb.uminho.pt/aecd,"Catalysing the way towards antimicrobial effectiveness: A systematic analysis and a new online resource for antimicrobial-enzyme combinations against Pseudomonas aeruginosa and Staphylococcus aureus. Growing antimicrobial resistance and the resilience of biofilm infections have led researchers to study the potential of antimicrobial combinations, including those incorporating enzymes with biofilm-disrupting abilities. This work aimed to evaluate the journey of antimicrobial-enzyme combination research and to gain insights into its current status and most promising leads. Expert curators annotated and analysed all published experimental data on enzyme-containing combinations for two major biofilm-forming pathogens, namely Pseudomonas aeruginosa and Staphylococcus aureus. This entailed the construction of the first publicly accessible online database on antimicrobial-enzyme combinations, the Antimicrobial Enzyme Combinations Database (https://www.ceb.uminho.pt/aecd). Gathered data were also reconstructed as knowledge networks to help analyse and visualise annotated entities (e.g. enzymes, methods, strains, combination outputs). The database currently holds 122 and 206 annotated combinations for P. aeruginosa and S. aureus, respectively, and their analysis allowed a systematic review of the available evidence on enzyme combinations, reliably illustrating the studies being performed. The most tested enzymes (e.g. lysozyme, DNase, lysostaphin) were scrutinised and the rationale behind each combination was explained. This research area is still growing although current research gaps/opportunities were identified, such as lack of biofilm testing and studies on polymicrobial scenarios. Hopefully, this work will shed light on the synergistic potential of enzyme combinations and alleviate some of the time- and resource-consuming tasks related to enzyme combination research by helping the selection and design of new enzyme-related therapeutic options for P. aeruginosa and S. aureus infections.",NA,0,Antimicrobial Enzyme Combinations Database,0.657052189,Antimicrobial Enzyme Combinations Database,0.657052189,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/9/2019 +34529321,http://aps.unmc.edu,"The evolution of the antimicrobial peptide database over 18√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâyears: Milestones and new features. The antimicrobial peptide database (APD) has served the antimicrobial peptide field for 18√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâyears. Because it is widely used in research and education, this article documents database milestones and key events that have transformed it into the current form. A comparison is made for the APD peptide statistics between 2010 and 2020, validating the major database findings to date. We also describe new additions ranging from peptide entries to search functions. Of note, the APD also contains antimicrobial peptides from host microbiota, which are important in shaping immune systems and could be linked to a variety of human diseases. Finally, the database has been re-programmed to the web branding and latest security compliance of the University of Nebraska Medical Center. The reprogrammed APD can be accessed at https://aps.unmc.edu.",APD,0.899997145,antimicrobial peptide database,0.933922029,antimicrobial peptide database,0.933922029,1,NA,25555720,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,9/24/2021 +25555720,http://aps.unmc.edu/AP,"Improved methods for classification, prediction, and design of antimicrobial peptides. Peptides with diverse amino acid sequences, structures, and functions are essential players in biological systems. The construction of well-annotated databases not only facilitates effective information management, search, and mining but also lays the foundation for developing and testing new peptide algorithms and machines. The antimicrobial peptide database (APD) is an original construction in terms of both database design and peptide entries. The host defense antimicrobial peptides (AMPs) registered in the APD cover the five kingdoms (bacteria, protists, fungi, plants, and animals) or three domains of life (bacteria, archaea, and eukaryota). This comprehensive database ( http://aps.unmc.edu/AP ) provides useful information on peptide discovery timeline, nomenclature, classification, glossary, calculation tools, and statistics. The APD enables effective search, prediction, and design of peptides with antibacterial, antiviral, antifungal, antiparasitic, insecticidal, spermicidal, anticancer activities, chemotactic, immune modulation, or antioxidative properties. A universal classification scheme is proposed herein to unify innate immunity peptides from a variety of biological sources. As an improvement, the upgraded APD makes predictions based on the database-defined parameter space and provides a list of the sequences most similar to natural AMPs. In addition, the powerful pipeline design of the database search engine laid a solid basis for designing novel antimicrobials to combat resistant superbugs, viruses, fungi, or parasites. This comprehensive AMP database is a useful tool for both research and education.",APD,0.571198583,antimicrobial peptide database,0.857568729,antimicrobial peptide database,0.857568729,1,26602694,34529321,low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2015 +"27924032, 30395294, 33152079",http://antismash-db.secondarymetabolites.org,"The antiSMASH database, a comprehensive database of microbial secondary metabolite biosynthetic gene clusters. Secondary metabolites produced by microorganisms are the main source of bioactive compounds that are in use as antimicrobial and anticancer drugs, fungicides, herbicides and pesticides. In the last decade, the increasing availability of microbial genomes has established genome mining as a very important method for the identification of their biosynthetic gene clusters (BGCs). One of the most popular tools for this task is antiSMASH. However, so far, antiSMASH is limited to de novo computing results for user-submitted genomes and only partially connects these with BGCs from other organisms. Therefore, we developed the antiSMASH database, a simple but highly useful new resource to browse antiSMASH-annotated BGCs in the currently 3907 bacterial genomes in the database and perform advanced search queries combining multiple search criteria. antiSMASH-DB is available at http://antismash-db.secondarymetabolites.org/.",antiSMASH,0.958897054,NA,0,antiSMASH,0.958897054,3,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +27671474,http://www.antistaphybase.com,"ANTISTAPHYBASE: database of antimicrobial peptides (AMPs) and essential oils (EOs) against methicillin-resistant Staphylococcus aureus (MRSA) and Staphylococcus aureus. Staphylococcus aureus and methicillin-resistant S. aureus are major pathogens. The antimicrobial peptides and essential oils (EOs) display narrow- or broad-spectrum activity against bacteria including these strains. A centralized resource, such as a database, designed specifically for anti-S. aureus/anti-methicillin-resistant S. aureus antimicrobial peptides and EOs is therefore needed to facilitate the comprehensive investigation of their structure/activity associations and combinations. The database ANTISTAPHYBASE is created to facilitate access to important information on antimicrobial peptides and essential peptides against methicillin-resistant S. aureus and S. aureus. At the moment, the database contains 596 sequences of antimicrobial peptides produced by diverse organisms and 287 essential oil records. It permits a quick and easy search of peptides based on their activity as well as their general, physicochemical properties and literature data. These data are very useful to perform further bioinformatic or chemometric analysis and would certainly be useful for the development of new drugs for medical use. The ANTISTAPHYBASE database is freely available at: https://www.antistaphybase.com/ .",ANTISTAPHYBASE,0.932832658,NA,0,ANTISTAPHYBASE,0.932832658,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/26/2016 +28784999,http://lin.uestc.edu.cn/AODdatabase/index.aspx,"AOD: the antioxidant protein database. An antioxidant is a molecule that can prevent free radicals from causing damages in organisms. The increasing studies on antioxidants calls for a specialized database that is not readily available yet. To this end, in the present study, the Antioxidant Database (AOD) was developed to help researchers understand and reveal the biological functions of antioxidant proteins. AOD is freely available at http://lin.uestc.edu.cn/AODdatabase/index.aspx . The current release of AOD consists of 710 antioxidant proteins. Information including taxonomy, source organism, subcellular location, gene ontology, catalytic activity and function of antioxidant proteins are all extracted from UniProtKB/Swiss-Prot and captured in AOD. In addition, two web-based tools for performing sequence similarity search and computationally identification of antioxidants were also integrated in AOD. We believe that AOD will greatly facilitate the researches on antioxidants.",AOD,0.994940579,Antioxidant Database,0.930749983,AOD,0.994940579,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/7/2017 +31978081,http://aoe.dbcls.jp,"All of gene expression (AOE): An integrated index for public gene expression databases. Gene expression data have been archived as microarray and RNA-seq datasets in two public databases, Gene Expression Omnibus (GEO) and ArrayExpress (AE). In 2018, the DNA DataBank of Japan started a similar repository called the Genomic Expression Archive (GEA). These databases are useful resources for the functional interpretation of genes, but have been separately maintained and may lack RNA-seq data, while the original sequence data are available in the Sequence Read Archive (SRA). We constructed an index for those gene expression data repositories, called All Of gene Expression (AOE), to integrate publicly available gene expression data. The web interface of AOE can graphically query data in addition to the application programming interface. By collecting gene expression data from RNA-seq in the SRA, AOE also includes data not included in GEO and AE. AOE is accessible as a search tool from the GEA website and is freely available at https://aoe.dbcls.jp/.",AOE,0.969555974,NA,0,AOE,0.969555974,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/24/2020 +31586392,http://hanlab.uth.edu/apa,"APAatlas: decoding alternative polyadenylation across human tissues. Alternative polyadenylation (APA) is an RNA-processing mechanism on the 3' terminus that generates distinct isoforms of mRNAs and/or other RNA polymerase II transcripts with different 3'UTR lengths. Widespread APA affects post-transcriptional gene regulation in mRNA translation, stability, and localization, and exhibits strong tissue specificity. However, no existing database provides comprehensive information about APA events in a large number of human normal tissues. Using the RNA-seq data from the Genotype-Tissue Expression project, we systematically identified APA events from 9475 samples across 53 human tissues and examined their associations with multiple traits and gene expression across tissues. We further developed APAatlas, a user-friendly database (https://hanlab.uth.edu/apa/) for searching, browsing and downloading related information. APAatlas will help the biomedical research community elucidate the functions and mechanisms of APA events in human tissues.",APAatlas,0.997015476,NA,0,APAatlas,0.997015476,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25052703,http://tools.genxpro.net/apadb,"APADB: a database for alternative polyadenylation and microRNA regulation events. . Alternative polyadenylation (APA) is a widespread mechanism that contributes to the sophisticated dynamics of gene regulation. Approximately 50% of all protein-coding human genes harbor multiple polyadenylation (PA) sites; their selective and combinatorial use gives rise to transcript variants with differing length of their 3' untranslated region (3'UTR). Shortened variants escape UTR-mediated regulation by microRNAs (miRNAs), especially in cancer, where global 3'UTR shortening accelerates disease progression, dedifferentiation and proliferation. Here we present APADB, a database of vertebrate PA sites determined by 3' end sequencing, using massive analysis of complementary DNA ends. APADB provides (A)PA sites for coding and non-coding transcripts of human, mouse and chicken genes. For human and mouse, several tissue types, including different cancer specimens, are available. APADB records the loss of predicted miRNA binding sites and visualizes next-generation sequencing reads that support each PA site in a genome browser. The database tables can either be browsed according to organism and tissue or alternatively searched for a gene of interest. APADB is the largest database of APA in human, chicken and mouse. The stored information provides experimental evidence for thousands of PA sites and APA events. APADB combines 3' end sequencing data with prediction algorithms of miRNA binding sites, allowing to further improve prediction algorithms. Current databases lack correct information about 3'UTR lengths, especially for chicken, and APADB provides necessary information to close this gap. Database URL: http://tools.genxpro.net/apadb/.",APADB,0.981904626,NA,0,APADB,0.981904626,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/22/2014 +25378337,http://mosas.sysu.edu.cn/utr,"APASdb: a database describing alternative poly(A) sites and selection of heterogeneous cleavage sites downstream of poly(A) signals. Increasing amounts of genes have been shown to utilize alternative polyadenylation (APA) 3'-processing sites depending on the cell and tissue type and/or physiological and pathological conditions at the time of processing, and the construction of genome-wide database regarding APA is urgently needed for better understanding poly(A) site selection and APA-directed gene expression regulation for a given biology. Here we present a web-accessible database, named APASdb (http://mosas.sysu.edu.cn/utr), which can visualize the precise map and usage quantification of different APA isoforms for all genes. The datasets are deeply profiled by the sequencing alternative polyadenylation sites (SAPAS) method capable of high-throughput sequencing 3'-ends of polyadenylated transcripts. Thus, APASdb details all the heterogeneous cleavage sites downstream of poly(A) signals, and maintains near complete coverage for APA sites, much better than the previous databases using conventional methods. Furthermore, APASdb provides the quantification of a given APA variant among transcripts with different APA sites by computing their corresponding normalized-reads, making our database more useful. In addition, APASdb supports URL-based retrieval, browsing and display of exon-intron structure, poly(A) signals, poly(A) sites location and usage reads, and 3'-untranslated regions (3'-UTRs). Currently, APASdb involves APA in various biological processes and diseases in human, mouse and zebrafish.",APASdb,0.995926738,NA,0,APASdb,0.995926738,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2014 +26602694,http://aps.unmc.edu/AP,"APD3: the antimicrobial peptide database as a tool for research and education. The antimicrobial peptide database (APD, http://aps.unmc.edu/AP/) is an original database initially online in 2003. The APD2 (2009 version) has been regularly updated and further expanded into the APD3. This database currently focuses on natural antimicrobial peptides (AMPs) with defined sequence and activity. It includes a total of 2619 AMPs with 261 bacteriocins from bacteria, 4 AMPs from archaea, 7 from protists, 13 from fungi, 321 from plants and 1972 animal host defense peptides. The APD3 contains 2169 antibacterial, 172 antiviral, 105 anti-HIV, 959 antifungal, 80 antiparasitic and 185 anticancer peptides. Newly annotated are AMPs with antibiofilm, antimalarial, anti-protist, insecticidal, spermicidal, chemotactic, wound healing, antioxidant and protease inhibiting properties. We also describe other searchable annotations, including target pathogens, molecule-binding partners, post-translational modifications and animal models. Amino acid profiles or signatures of natural AMPs are important for peptide classification, prediction and design. Finally, we summarize various database applications in research and education.",APD3,0.988607168,The antimicrobial peptide database,0.915424158,APD3,0.988607168,1,25555720,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/23/2015 +30715274,http://apid.dep.usal.es,"APID database: redefining protein-protein interaction experimental evidences and binary interactomes. . The collection and integration of all the known protein-protein physical interactions within a proteome framework are critical to allow proper exploration of the protein interaction networks that drive biological processes in cells at molecular level. APID Interactomes is a public resource of biological data (http://apid.dep.usal.es) that provides a comprehensive and curated collection of `protein interactomes' for more than 1100 organisms, including 30 species with more than 500 interactions, derived from the integration of experimentally detected protein-to-protein physical interactions (PPIs). We have performed an update of APID database including a redefinition of several key properties of the PPIs to provide a more precise data integration and to avoid false duplicated records. This includes the unification of all the PPIs from five primary databases of molecular interactions (BioGRID, DIP, HPRD, IntAct and MINT), plus the information from two original systematic sources of human data and from experimentally resolved 3D structures (i.e. PDBs, Protein Data Bank files, where more than two distinct proteins have been identified). Thus, APID provides PPIs reported in published research articles (with traceable PMIDs) and detected by valid experimental interaction methods that give evidences about such protein interactions (following the `ontology and controlled vocabulary': www.ebi.ac.uk/ols/ontologies/mi; developed by `HUPO PSI-MI'). Within this data mining framework, all interaction detection methods have been grouped into two main types: (i) `binary' physical direct detection methods and (ii) `indirect' methods. As a result of these redefinitions, APID provides unified protein interactomes including the specific `experimental evidences' that support each PPI, indicating whether the interactions can be considered `binary' (i.e. supported by at least one binary detection method) or not.",APID,0.780257463,NA,0,APID,0.780257463,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +28413782,http://deepaklab.com/aphidmicrodb,"APMicroDB: A microsatellite database of Acyrthosiphon pisum. Pea aphids represent a complex genetic system that could be used for QTL analysis, genetic diversity and population genetics studies. Here, we described the development of first microsatellite repeat database of the pea aphid (APMicroDB), accessible at ""http://deepaklab.com/aphidmicrodb"". We identified 3,40,233 SSRs using MIcroSAtellite (MISA) tool that was distributed in 14,067 (out of 23,924) scaffold of the pea aphid. We observed 89.53% simple repeats of which 73.41% were mono-nucleotide, followed by di-nucleotide repeats. This database stored information about the repeats kind, GC content, motif type (mono - hexa), genomic location etc. We have also incorporated the primer information derived from Primer3 software of the 2504√ɬÉ√ǬÇ√ɬÇ√Ǭ†bp flanking region of the identified marker. Blast tool is also provided for searching the user query sequence for identified marker and their primers. This work has an immense use for scientific community working in the field of agricultural pest management, QTL mapping, and host-pathogen interaction analysis.",APMicroDB,0.996813858,microsatellite repeat database of the pea aphid,0.969031361,APMicroDB,0.996813858,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/30/2017 +26861916,http://crdd.osdd.net/raghava/apocand,"ApoCanD: Database of human apoptotic proteins in the context of cancer. In the past decade, apoptosis pathway has gained a serious consideration being a critical cellular process in determining the cancer progression. Inverse relationship between cancer progression and apoptosis rate has been well established in the literature. It causes apoptosis proteins under the investigative scanner for developing anticancer therapies, which certainly got a success in the case of few apoptosis proteins as drug targets. In the present study, we have developed a dedicated database of 82 apoptosis proteins called ApoCanD. This database comprises of crucial information of apoptosis proteins in the context of cancer. Genomic status of proteins in the form of mutation, copy number variation and expression in thousands of tumour samples and cancer cell lines are the major bricks of this database. In analysis, we have found that TP53 and MYD88 are the two most frequently mutated proteins in cancer. Availability of other information e.g. gene essentiality data, tertiary structure, sequence alignments, sequences profiles, post-translational modifications makes it even more useful for the researchers. A user-friendly web interface is provided to ameliorate the use of ApoCanD. We anticipate that, this database will facilitate the research community working in the field of apoptosis and cancer. The database can be accessed at: http://crdd.osdd.net/raghava/apocand.",ApoCanD,0.977928281,NA,0,ApoCanD,0.977928281,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/10/2016 +22067098,http://apoptoproteomics.uio.no,"ApoptoProteomics, an integrated database for analysis of proteomics data obtained from apoptotic cells. Apoptosis is the most commonly described form of programmed cell death, and dysfunction is implicated in a large number of human diseases. Many quantitative proteome analyses of apoptosis have been performed to gain insight in proteins involved in the process. This resulted in large and complex data sets that are difficult to evaluate. Therefore, we developed the ApoptoProteomics database for storage, browsing, and analysis of the outcome of large scale proteome analyses of apoptosis derived from human, mouse, and rat. The proteomics data of 52 publications were integrated and unified with protein annotations from UniProt-KB, the caspase substrate database homepage (CASBAH), and gene ontology. Currently, more than 2300 records of more than 1500 unique proteins were included, covering a large proportion of the core signaling pathways of apoptosis. Analysis of the data set revealed a high level of agreement between the reported changes in directionality reported in proteomics studies and expected apoptosis-related function and may disclose proteins without a current recognized involvement in apoptosis based on gene ontology. Comparison between induction of apoptosis by the intrinsic and the extrinsic apoptotic signaling pathway revealed slight differences. Furthermore, proteomics has significantly contributed to the field of apoptosis in identifying hundreds of caspase substrates. The database is available at http://apoptoproteomics.uio.no.",ApoptoProteomics,0.990143538,NA,0,ApoptoProteomics,0.990143538,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2011 +31695717,http://bioinformatics.cau.edu.cn/AppleMDO,"AppleMDO: A Multi-Dimensional Omics Database for Apple Co-Expression Networks and Chromatin States. As an economically important crop, apple is one of the most cultivated fruit trees in temperate regions worldwide. Recently, a large number of high-quality transcriptomic and epigenomic datasets for apple were made available to the public, which could be helpful in inferring gene regulatory relationships and thus predicting gene function at the genome level. Through integration of the available apple genomic, transcriptomic, and epigenomic datasets, we constructed co-expression networks, identified functional modules, and predicted chromatin states. A total of 112 RNA-seq datasets were integrated to construct a global network and a conditional network (tissue-preferential network). Furthermore, a total of 1,076 functional modules with closely related gene sets were identified to assess the modularity of biological networks and further subjected to functional enrichment analysis. The results showed that the function of many modules was related to development, secondary metabolism, hormone response, and transcriptional regulation. Transcriptional regulation is closely related to epigenetic marks on chromatin. A total of 20 epigenomic datasets, which included ChIP-seq, DNase-seq, and DNA methylation analysis datasets, were integrated and used to classify chromatin states. Based on the ChromHMM algorithm, the genome was divided into 620,122 fragments, which were classified into 24 states according to the combination of epigenetic marks and enriched-feature regions. Finally, through the collaborative analysis of different omics datasets, the online database AppleMDO (http://bioinformatics.cau.edu.cn/AppleMDO/) was established for cross-referencing and the exploration of possible novel functions of apple genes. In addition, gene annotation information and functional support toolkits were also provided. Our database might be convenient for researchers to develop insights into the function of genes related to important agronomic traits and might serve as a reference for other fruit trees.",AppleMDO,0.992001891,NA,0,AppleMDO,0.992001891,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2019 +23161672,http://appris.bioinfo.cnio.es,"APPRIS: annotation of principal and alternative splice isoforms. Here, we present APPRIS (http://appris.bioinfo.cnio.es), a database that houses annotations of human splice isoforms. APPRIS has been designed to provide value to manual annotations of the human genome by adding reliable protein structural and functional data and information from cross-species conservation. The visual representation of the annotations provided by APPRIS for each gene allows annotators and researchers alike to easily identify functional changes brought about by splicing events. In addition to collecting, integrating and analyzing reliable predictions of the effect of splicing events, APPRIS also selects a single reference sequence for each gene, here termed the principal isoform, based on the annotations of structure, function and conservation for each transcript. APPRIS identifies a principal isoform for 85% of the protein-coding genes in the GENCODE 7 release for ENSEMBL. Analysis of the APPRIS data shows that at least 70% of the alternative (non-principal) variants would lose important functional or structural information relative to the principal isoform.",APPRIS,0.998272896,of,0.716509938,APPRIS,0.998272896,1,NA,29069475,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/17/2012 +29069475,http://appris-tools.org,"APPRIS 2017: principal isoforms for multiple gene sets. The APPRIS database (http://appris-tools.org) uses protein structural and functional features and information from cross-species conservation to annotate splice isoforms in protein-coding genes. APPRIS selects a single protein isoform, the 'principal' isoform, as the reference for each gene based on these annotations. A single main splice isoform reflects the biological reality for most protein coding genes and APPRIS principal isoforms are the best predictors of these main proteins isoforms. Here, we present the updates to the database, new developments that include the addition of three new species (chimpanzee, Drosophila melangaster and Caenorhabditis elegans), the expansion of APPRIS to cover the RefSeq gene set and the UniProtKB proteome for six species and refinements in the core methods that make up the annotation pipeline. In addition APPRIS now provides a measure of reliability for individual principal isoforms and updates with each release of the GENCODE/Ensembl and RefSeq reference sets. The individual GENCODE/Ensembl, RefSeq and UniProtKB reference gene sets for six organisms have been merged to produce common sets of splice variants.",APPRIS,0.995996177,NA,0,APPRIS,0.995996177,1,NA,23161672,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +33643383,http://lms.snu.edu.in/APRegNet,"Abiotic Stress-Responsive miRNA and Transcription Factor-Mediated Gene Regulatory Network in Oryza sativa: Construction and Structural Measure Study. Climate changes and environmental stresses have a consequential association with crop plant growth and yield, meaning it is necessary to cultivate crops that have tolerance toward the changing climate and environmental disturbances such as water stress, temperature fluctuation, and salt toxicity. Recent studies have shown that trans-acting regulatory elements, including microRNAs (miRNAs) and transcription factors (TFs), are emerging as promising tools for engineering naive improved crop varieties with tolerance for multiple environmental stresses and enhanced quality as well as yield. However, the interwoven complex regulatory function of TFs and miRNAs at transcriptional and post-transcriptional levels is unexplored in Oryza sativa. To this end, we have constructed a multiple abiotic stress responsive TF-miRNA-gene regulatory network for O. sativa using a transcriptome and degradome sequencing data meta-analysis approach. The theoretical network approach has shown the networks to be dense, scale-free, and small-world, which makes the network stable. They are also invariant to scale change where an efficient, quick transmission of biological signals occurs within the network on extrinsic hindrance. The analysis also deciphered the existence of communities (cluster of TF, miRNA, and genes) working together to help plants in acclimatizing to multiple stresses. It highlighted that genes, TFs, and miRNAs shared by multiple stress conditions that work as hubs or bottlenecks for signal propagation, for example, during the interaction between stress-responsive genes (TFs/miRNAs/other genes) and genes involved in floral development pathways under multiple environmental stresses. This study further highlights how the fine-tuning feedback mechanism works for balancing stress tolerance and how timely flowering enable crops to survive in adverse conditions. This study developed the abiotic stress-responsive regulatory network, APRegNet database (http://lms.snu.edu.in/APRegNet), which may help researchers studying the roles of miRNAs and TFs. Furthermore, it advances current understanding of multiple abiotic stress tolerance mechanisms.",APRegNet,0.974425733,NA,0,APRegNet,0.974425733,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/12/2021 +34556150,http://apricotgpd.com,"AprGPD: the apricot genomic and phenotypic database. Background Apricot is cultivated worldwide because of its high nutritive content and strong adaptability. Its flesh is delicious and has a unique and pleasant aroma. Apricot kernel is also consumed as nuts. The genome of apricot has been sequenced, and the transcriptome, resequencing, and phenotype data have been increasely√ɬÉ√ǬÇ√ɬÇ√Ǭ†generated. However, with the emergence of new information, the data are expected to integrate, and disseminate. Results To better manage the continuous addition of new data and increase convenience, we constructed the apricot genomic and phenotypic database (AprGPD, http://apricotgpd.com ). At present, AprGPD contains three reference genomes, 1692 germplasms, 306 genome√ɬÉ√ǬÇ√ɬÇ√Ǭ†resequencing data, 90 RNA sequencing√ɬÉ√ǬÇ√ɬÇ√Ǭ†data. A set of user-friendly query, analysis, and visualization tools have been implemented in AprGPD. We have also performed a detailed analysis of 59 transcription factor families for the three genomes of apricot. Conclusion Six modules are displayed in AprGPD, including species, germplasm, genome, variation, product, tools. The data integrated by AprGPD will be helpful for the molecular breeding of apricot.",AprGPD,0.988473594,apricot,0.7814821,AprGPD,0.988473594,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/23/2021 +22434840,http://aptamer.freebase.com,"Aptamer Base: a collaborative knowledge base to describe aptamers and SELEX experiments. Over the past several decades, rapid developments in both molecular and information technology have collectively increased our ability to understand molecular recognition. One emerging area of interest in molecular recognition research includes the isolation of aptamers. Aptamers are single-stranded nucleic acid or amino acid polymers that recognize and bind to targets with high affinity and selectivity. While research has focused on collecting aptamers and their interactions, most of the information regarding experimental methods remains in the unstructured and textual format of peer reviewed publications. To address this, we present the Aptamer Base, a database that provides detailed, structured information about the experimental conditions under which aptamers were selected and their binding affinity quantified. The open collaborative nature of the Aptamer Base provides the community with a unique resource that can be updated and curated in a decentralized manner, thereby accommodating the ever evolving field of aptamer research. DATABASE URL: http://aptamer.freebase.com.",Aptamer Base,0.971290752,NA,0,Aptamer Base,0.971290752,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/20/2012 +28095775,http://www.biw.kuleuven.be/CSB/ARA-PEPs,"ARA-PEPs: a repository of putative sORF-encoded peptides in Arabidopsis thaliana. Background Many eukaryotic RNAs have been considered non-coding as they only contain short open reading frames (sORFs). However, there is increasing evidence for the translation of these sORFs into bioactive peptides with potent signaling, antimicrobial, developmental, antioxidant roles etc. Yet only a few peptides encoded by sORFs are annotated in the model organism Arabidopsis thaliana. Results To aid the functional annotation of these peptides, we have developed ARA-PEPs (available at http://www.biw.kuleuven.be/CSB/ARA-PEPs ), a repository of putative peptides encoded by sORFs in the A. thaliana genome starting from in-house Tiling arrays, RNA-seq data and other publicly available datasets. ARA-PEPs currently lists 13,748 sORF-encoded peptides with transcriptional evidence. In addition to existing data, we have identified 100 novel transcriptionally active regions (TARs) that might encode 341 novel stress-induced peptides (SIPs). To aid in identification of bioactivity, we add functional annotation and sequence conservation to predicted peptides. Conclusion To our knowledge, this is the largest repository of plant peptides encoded by sORFs with transcript evidence, publicly available and this resource will help scientists to effortlessly navigate the list of experimentally studied peptides, the experimental and computational evidence supporting the activity of these peptides and gain new perspectives for peptide discovery.",ARA-PEPs,0.987573981,NA,0,ARA-PEPs,0.987573981,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/17/2017 +24272250,http://rarge-v2.psc.riken.jp,"RARGE II: an integrated phenotype database of Arabidopsis mutant traits using a controlled vocabulary. Arabidopsis thaliana is one of the most popular experimental plants. However, only 40% of its genes have at least one experimental Gene Ontology (GO) annotation assigned. Systematic observation of mutant phenotypes is an important technique for elucidating gene functions. Indeed, several large-scale phenotypic analyses have been performed and have generated phenotypic data sets from many Arabidopsis mutant lines and overexpressing lines, which are freely available online. Since each Arabidopsis mutant line database uses individual phenotype expression, the differences in the structured term sets used by each database make it difficult to compare data sets and make it impossible to search across databases. Therefore, we obtained publicly available information for a total of 66,209 Arabidopsis mutant lines, including loss-of-function (RATM and TARAPPER) and gain-of-function (AtFOX and OsFOX) lines, and integrated the phenotype data by mapping the descriptions onto Plant Ontology (PO) and Phenotypic Quality Ontology (PATO) terms. This approach made it possible to manage the four different phenotype databases as one large data set. Here, we report a publicly accessible web-based database, the RIKEN Arabidopsis Genome Encyclopedia II (RARGE II; http://rarge-v2.psc.riken.jp/), in which all of the data described in this study are included. Using the database, we demonstrated consistency (in terms of protein function) with a previous study and identified the presumed function of an unknown gene. We provide examples of AT1G21600, which is a subunit in the plastid-encoded RNA polymerase complex, and AT5G56980, which is related to the jasmonic acid signaling pathway.",RARGE II,0.89071492,Arabidopsis Genome Encyclopedia II,0.959254963,Arabidopsis Genome Encyclopedia II,0.959254963,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/21/2013 +22345505,http://gmdd.shgmo.org/Computational-Biology/ANAP,"ANAP: an integrated knowledge base for Arabidopsis protein interaction network analysis. Protein interactions are fundamental to the molecular processes occurring within an organism and can be utilized in network biology to help organize, simplify, and understand biological complexity. Currently, there are more than 10 publicly available Arabidopsis (Arabidopsis thaliana) protein interaction databases. However, there are limitations with these databases, including different types of interaction evidence, a lack of defined standards for protein identifiers, differing levels of information, and, critically, a lack of integration between them. In this paper, we present an interactive bioinformatics Web tool, ANAP (Arabidopsis Network Analysis Pipeline), which serves to effectively integrate the different data sets and maximize access to available data. ANAP has been developed for Arabidopsis protein interaction integration and network-based study to facilitate functional protein network analysis. ANAP integrates 11 Arabidopsis protein interaction databases, comprising 201,699 unique protein interaction pairs, 15,208 identifiers (including 11,931 The Arabidopsis Information Resource Arabidopsis Genome Initiative codes), 89 interaction detection methods, 73 species that interact with Arabidopsis, and 6,161 references. ANAP can be used as a knowledge base for constructing protein interaction networks based on user input and supports both direct and indirect interaction analysis. It has an intuitive graphical interface allowing easy network visualization and provides extensive detailed evidence for each interaction. In addition, ANAP displays the gene and protein annotation in the generated interactive network with links to The Arabidopsis Information Resource, the AtGenExpress Visualization Tool, the Arabidopsis 1,001 Genomes GBrowse, the Protein Knowledgebase, the Kyoto Encyclopedia of Genes and Genomes, and the Ensembl Genome Browser to significantly aid functional network analysis. The tool is available open access at http://gmdd.shgmo.org/Computational-Biology/ANAP.",ANAP,0.893375516,Arabidopsis Network Analysis Pipeline,0.910027817,Arabidopsis Network Analysis Pipeline,0.910027817,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/16/2012 +29069336,http://arachnoserver.org,"ArachnoServer 3.0: an online resource for automated discovery, analysis and annotation of spider toxins. Summary ArachnoServer is a manually curated database that consolidates information on the sequence, structure, function and pharmacology of spider-venom toxins. Although spider venoms are complex chemical arsenals, the primary constituents are small disulfide-bridged peptides that target neuronal ion channels and receptors. Due to their high potency and selectivity, these peptides have been developed as pharmacological tools, bioinsecticides and drug leads. A new version of ArachnoServer (v3.0) has been developed that includes a bioinformatics pipeline for automated detection and analysis of peptide toxin transcripts in assembled venom-gland transcriptomes. ArachnoServer v3.0 was updated with the latest sequence, structure and functional data, the search-by-mass feature has been enhanced, and toxin cards provide additional information about each mature toxin. Availability and implementation http://arachnoserver.org. Contact support@arachnoserver.org. Supplementary information Supplementary data are available at Bioinformatics online.",ArachnoServer,0.996285319,NA,0,ArachnoServer,0.996285319,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2018 +29059333,http://aragwas.1001genomes.org,"The AraGWAS Catalog: a curated and standardized Arabidopsis thaliana GWAS catalog. The abundance of high-quality genotype and phenotype data for the model organism Arabidopsis thaliana enables scientists to study the genetic architecture of many complex traits at an unprecedented level of detail using genome-wide association studies (GWAS). GWAS have been a great success in A. thaliana and many SNP-trait associations have been published. With the AraGWAS Catalog (https://aragwas.1001genomes.org) we provide a publicly available, manually curated and standardized GWAS catalog for all publicly available phenotypes from the central A. thaliana phenotype repository, AraPheno. All GWAS have been recomputed on the latest imputed genotype release of the 1001 Genomes Consortium using a standardized GWAS pipeline to ensure comparability between results. The catalog includes currently 167 phenotypes and more than 222 000 SNP-trait associations with P < 10-4, of which 3887 are significantly associated using permutation-based thresholds. The AraGWAS Catalog can be accessed via a modern web-interface and provides various features to easily access, download and visualize the results and summary statistics across GWAS.",AraGWAS,0.865443408,NA,0,AraGWAS,0.865443408,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +25487439,http://aralip.plantbiology.msu.edu,"An annotated database of Arabidopsis mutants of acyl lipid metabolism. Key message We have constructed and annotated a web-based database of over 280 Arabidopsis genes that have characterized mutants associated with Arabidopsis acyl lipid metabolism. Mutants have played a fundamental role in gene discovery and in understanding the function of genes involved in plant acyl lipid metabolism. The first mutant in Arabidopsis lipid metabolism (fad4) was described in 1985. Since that time, characterization of mutants in more than 280 genes associated with acyl lipid metabolism has been reported. This review provides a brief background and history on identification of mutants in acyl lipid metabolism, an analysis of the distribution of mutants in different areas of acyl lipid metabolism and presents an annotated database (ARALIPmutantDB) of these mutants. The database provides information on the phenotypes of mutants, pathways and enzymes/proteins associated with the mutants, and allows rapid access via hyperlinks to summaries of information about each mutant and to literature that provides information on the lipid composition of the mutants. In addition, the database of mutants is integrated within the ARALIP plant acyl lipid metabolism website ( http://aralip.plantbiology.msu.edu ) so that information on mutants is displayed on and can be accessed from metabolic pathway maps. Mutants for at least 30% of the genes in the database have multiple names, which have been compiled here to reduce ambiguities in searches for information. The database should also provide a tool for exploring the relationships between mutants in acyl lipid-related genes and their lipid phenotypes and point to opportunities for further research.",ARALIP,0.893481195,NA,0,ARALIP,0.893481195,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/10/2014 +25355510,http://www.inetbio.org/aranet,"AraNet v2: an improved database of co-functional gene networks for the study of Arabidopsis thaliana and 27 other nonmodel plant species. Arabidopsis thaliana is a reference plant that has been studied intensively for several decades. Recent advances in high-throughput experimental technology have enabled the generation of an unprecedented amount of data from A. thaliana, which has facilitated data-driven approaches to unravel the genetic organization of plant phenotypes. We previously published a description of a genome-scale functional gene network for A. thaliana, AraNet, which was constructed by integrating multiple co-functional gene networks inferred from diverse data types, and we demonstrated the predictive power of this network for complex phenotypes. More recently, we have observed significant growth in the availability of omics data for A. thaliana as well as improvements in data analysis methods that we anticipate will further enhance the integrated database of co-functional networks. Here, we present an updated co-functional gene network for A. thaliana, AraNet v2 (available at http://www.inetbio.org/aranet), which covers approximately 84% of the coding genome. We demonstrate significant improvements in both genome coverage and accuracy. To enhance the usability of the network, we implemented an AraNet v2 web server, which generates functional predictions for A. thaliana and 27 nonmodel plant species using an orthology-based projection of nonmodel plant genes on the A. thaliana gene network.",AraNet,0.987630188,NA,0,AraNet,0.987630188,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/29/2014 +22760305,http://bioinformatics.sdstate.edu/arapath,"AraPath: a knowledgebase for pathway analysis in Arabidopsis. Unlabelled Studying plants using high-throughput genomics technologies is becoming routine, but interpretation of genome-wide expression data in terms of biological pathways remains a challenge, partly due to the lack of pathway databases. To create a knowledgebase for plant pathway analysis, we collected 1683 lists of differentially expressed genes from 397 gene-expression studies, which constitute a molecular signature database of various genetic and environmental perturbations of Arabidopsis. In addition, we extracted 1909 gene sets from various sources such as Gene Ontology, KEGG, AraCyc, Plant Ontology, predicted target genes of microRNAs and transcription factors, and computational gene clusters defined by meta-analysis. With this knowledgebase, we applied Gene Set Enrichment Analysis to an expression profile of cold acclimation and identified expected functional categories and pathways. Our results suggest that the AraPath database can be used to generate specific, testable hypotheses regarding plant molecular pathways from gene expression data. Availability http://bioinformatics.sdstate.edu/arapath/.",AraPath,0.983345628,NA,0,AraPath,0.983345628,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/3/2012 +31642487,"http://arapheno.1001genomes.org, http://aragwas.1001genomes.org","AraPheno and the AraGWAS Catalog 2020: a major database update including RNA-Seq and knockout mutation data for Arabidopsis thaliana. Genome-wide association studies (GWAS) are integral for studying genotype-phenotype relationships and gaining a deeper understanding of the genetic architecture underlying trait variation. A plethora of genetic associations between distinct loci and various traits have been successfully discovered and published for the model plant Arabidopsis thaliana. This success and the free availability of full genomes and phenotypic data for more than 1,000 different natural inbred lines led to the development of several data repositories. AraPheno (https://arapheno.1001genomes.org) serves as a central repository of population-scale phenotypes in A. thaliana, while the AraGWAS Catalog (https://aragwas.1001genomes.org) provides a publicly available, manually curated and standardized collection of marker-trait associations for all available phenotypes from AraPheno. In this major update, we introduce the next generation of both platforms, including new data, features and tools. We included novel results on associations between knockout-mutations and all AraPheno traits. Furthermore, AraPheno has been extended to display RNA-Seq data for hundreds of accessions, providing expression information for over 28 000 genes for these accessions. All data, including the imputed genotype matrix used for GWAS, are easily downloadable via the respective databases.",AraPheno,0.995172918,NA,0,AraPheno,0.995172918,1,27924043,27924043,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2020 +27924043,http://arapheno.1001genomes.org,"AraPheno: a public database for Arabidopsis thaliana phenotypes. Natural genetic variation makes it possible to discover evolutionary changes that have been maintained in a population because they are advantageous. To understand genotype-phenotype relationships and to investigate trait architecture, the existence of both high-resolution genotypic and phenotypic data is necessary. Arabidopsis thaliana is a prime model for these purposes. This herb naturally occurs across much of the Eurasian continent and North America. Thus, it is exposed to a wide range of environmental factors and has been subject to natural selection under distinct conditions. Full genome sequencing data for more than 1000 different natural inbred lines are available, and this has encouraged the distributed generation of many types of phenotypic data. To leverage these data for meta analyses, AraPheno (https://arapheno.1001genomes.org) provide a central repository of population-scale phenotypes for A. thaliana inbred lines. AraPheno includes various features to easily access, download and visualize the phenotypic data. This will facilitate a comparative analysis of the many different types of phenotypic data, which is the base to further enhance our understanding of the genotype-phenotype map.",AraPheno,0.992314994,NA,0,AraPheno,0.992314994,1,31642487,31642487,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/24/2016 +25414324,http://www.araport.org,"Araport: the Arabidopsis information portal. The Arabidopsis Information Portal (https://www.araport.org) is a new online resource for plant biology research. It houses the Arabidopsis thaliana genome sequence and associated annotation. It was conceived as a framework that allows the research community to develop and release 'modules' that integrate, analyze and visualize Arabidopsis data that may reside at remote sites. The current implementation provides an indexed database of core genomic information. These data are made available through feature-rich web applications that provide search, data mining, and genome browser functionality, and also by bulk download and web services. Araport uses software from the InterMine and JBrowse projects to expose curated data from TAIR, GO, BAR, EBI, UniProt, PubMed and EPIC CoGe. The site also hosts 'science apps,' developed as prototypes for community modules that use dynamic web pages to present data obtained on-demand from third-party servers via RESTful web services. Designed for sustainability, the Arabidopsis Information Portal strategy exploits existing scientific computing infrastructure, adopts a practical mixture of data integration technologies and encourages collaborative enhancement of the resource by its user community.",Araport,0.875419378,NA,0,Araport,0.875419378,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/20/2014 +27995664,http://www.bioinformatics.nl/Ara,"AraQTL - workbench and archive for systems genetics in Arabidopsis thaliana. Genetical genomics studies uncover genome-wide genetic interactions between genes and their transcriptional regulators. High-throughput measurement of gene expression in recombinant inbred line populations has enabled investigation of the genetic architecture of variation in gene expression. This has the potential to enrich our understanding of the molecular mechanisms affected by and underlying natural variation. Moreover, it contributes to the systems biology of natural variation, as a substantial number of experiments have resulted in a valuable amount of interconnectable phenotypic, molecular and genotypic data. A number of genetical genomics studies have been published for Arabidopsis thaliana, uncovering many expression quantitative trait loci (eQTLs). However, these complex data are not easily accessible to the plant research community, leaving most of the valuable genetic interactions unexplored as cross-analysis of these studies is a major effort. We address this problem with AraQTL (http://www.bioinformatics.nl/Ara QTL/), an easily accessible workbench and database for comparative analysis and meta-analysis of all published Arabidopsis eQTL datasets. AraQTL provides a workbench for comparing, re-using and extending upon the results of these experiments. For example, one can easily screen a physical region for specific local eQTLs that could harbour candidate genes for phenotypic QTLs, or detect gene-by-environment interactions by comparing eQTLs under different conditions.",AraQTL,0.994779587,NA,0,AraQTL,0.994779587,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/13/2017 +24265221,http://sbi.imim.es/archdb,"ArchDB 2014: structural classification of loops in proteins. The function of a protein is determined by its three-dimensional structure, which is formed by regular (i.e. √ɬÉ√Ǭé√ɬÇ√Ǭ≤-strands and √ɬÉ√Ǭé√ɬÇ√Ǭ±-helices) and non-periodic structural units such as loops. Compared to regular structural elements, non-periodic, non-repetitive conformational units enclose a much higher degree of variability--raising difficulties in the identification of regularities, and yet represent an important part of the structure of a protein. Indeed, loops often play a pivotal role in the function of a protein and different aspects of protein folding and dynamics. Therefore, the structural classification of protein loops is an important subject with clear applications in homology modelling, protein structure prediction, protein design (e.g. enzyme design and catalytic loops) and function prediction. ArchDB, the database presented here (freely available at http://sbi.imim.es/archdb), represents such a resource and has been an important asset for the scientific community throughout the years. In this article, we present a completely reworked and updated version of ArchDB. The new version of ArchDB features a novel, fast and user-friendly web-based interface, and a novel graph-based, computationally efficient, clustering algorithm. The current version of ArchDB classifies 149,134 loops in 5739 classes and 9608 subclasses.",ArchDB,0.988814056,NA,0,ArchDB,0.988814056,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2013 +29077946,http://brp.kfshrc.edu.sa/ared,"ARED-Plus: an updated and expanded database of AU-rich element-containing mRNAs and pre-mRNAs. Here we present an updated version of the AU-Rich Element Database (ARED-Plus) that is freely available at http://brp.kfshrc.edu.sa/ared. AREs are conserved sequence elements that were first discovered in the 3'UTR of mammalian transcripts. Over the past years, we compiled a series of ARE databases that revealed the extent and wide distribution of ARE-containing genes. For this update, we adopted an optimized search algorithm with improved specificity and sensitivity in ARE selection. The designation of the different ARE clusters was simplified by directly correlating the number of the ARE cluster to the number of overlapping AUUUA pentamers. Additionally, the new database was expanded to include genes with intronic AREs (pre-mRNAs) and their characteristics since recent observations reported their abundance and biological significance. Several enhancements were incorporated such as customized column view, additional search options and live search functionalities. The new version includes links to AREsite and AREScore, two related ARE assessment algorithms for further evaluation of the ARE characteristics. ARED-Plus now contains an updated repertoire of AREs in the human transcriptome that may be useful in several research fields.",ARED-Plus,0.983083916,AU-Rich Element Database,0.900331807,ARED-Plus,0.983083916,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +26602692,http://rna.tbi.univie.ac.at/AREsite,"AREsite2: an enhanced database for the comprehensive investigation of AU/GU/U-rich elements. AREsite2 represents an update for AREsite, an on-line resource for the investigation of AU-rich elements (ARE) in human and mouse mRNA 3'UTR sequences. The new updated and enhanced version allows detailed investigation of AU, GU and U-rich elements (ARE, GRE, URE) in the transcriptome of Homo sapiens, Mus musculus, Danio rerio, Caenorhabditis elegans and Drosophila melanogaster. It contains information on genomic location, genic context, RNA secondary structure context and conservation of annotated motifs. Improvements include annotation of motifs not only in 3'UTRs but in the whole gene body including introns, additional genomes, and locally stable secondary structures from genome wide scans. Furthermore, we include data from CLIP-Seq experiments in order to highlight motifs with validated protein interaction. Additionally, we provide a REST interface for experienced users to interact with the database in a semi-automated manner. The database is publicly available at: http://rna.tbi.univie.ac.at/AREsite.",AREsite2,0.991525769,NA,0,AREsite2,0.991525769,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/23/2015 +25635527,http://autophagy-regulation.org,"Autophagy Regulatory Network - a systems-level bioinformatics resource for studying the mechanism and regulation of autophagy. Autophagy is a complex cellular process having multiple roles, depending on tissue, physiological, or pathological conditions. Major post-translational regulators of autophagy are well known, however, they have not yet been collected comprehensively. The precise and context-dependent regulation of autophagy necessitates additional regulators, including transcriptional and post-transcriptional components that are listed in various datasets. Prompted by the lack of systems-level autophagy-related information, we manually collected the literature and integrated external resources to gain a high coverage autophagy database. We developed an online resource, Autophagy Regulatory Network (ARN; http://autophagy-regulation.org), to provide an integrated and systems-level database for autophagy research. ARN contains manually curated, imported, and predicted interactions of autophagy components (1,485 proteins with 4,013 interactions) in humans. We listed 413 transcription factors and 386 miRNAs that could regulate autophagy components or their protein regulators. We also connected the above-mentioned autophagy components and regulators with signaling pathways from the SignaLink 2 resource. The user-friendly website of ARN allows researchers without computational background to search, browse, and download the database. The database can be downloaded in SQL, CSV, BioPAX, SBML, PSI-MI, and in a Cytoscape CYS file formats. ARN has the potential to facilitate the experimental validation of novel autophagy components and regulators. In addition, ARN helps the investigation of transcription factors, miRNAs and signaling pathways implicated in the control of the autophagic pathway. The list of such known and predicted regulators could be important in pharmacological attempts against cancer and neurodegenerative diseases.",ARN,0.983717382,Autophagy Regulatory Network,0.98153131,ARN,0.983717382,1,NA,27982098,NA,NA,NA,do not merge,NA,NA,NA,1/1/2015 +27982098,http://210.27.80.93/arn,"ARN: Analysis and Visualization System for Adipogenic Regulation Network Information. Adipogenesis is the process of cell differentiation through which preadipocytes become adipocytes. Lots of research is currently ongoing to identify genes, including their gene products and microRNAs, that correlate with fat cell development. However, information fragmentation hampers the identification of key regulatory genes and pathways. Here, we present a database of literature-curated adipogenesis-related regulatory interactions, designated the Adipogenesis Regulation Network (ARN, http://210.27.80.93/arn/), which currently contains 3101 nodes (genes and microRNAs), 1863 regulatory interactions, and 33,969 expression records associated with adipogenesis, based on 1619 papers. A sentence-based text-mining approach was employed for efficient manual curation of regulatory interactions from approximately 37,000 PubMed abstracts. Additionally, we further determined 13,103 possible node relationships by searching miRGate, BioGRID, PAZAR and TRRUST. ARN also has several useful features: i) regulatory map information; ii) tests to examine the impact of a query node on adipogenesis; iii) tests for the interactions and modes of a query node; iv) prediction of interactions of a query node; and v) analysis of experimental data or the construction of hypotheses related to adipogenesis. In summary, ARN can store, retrieve and analyze adipogenesis-related information as well as support ongoing adipogenesis research and contribute to the discovery of key regulatory genes and pathways.",ARN,0.983553469,Adipogenesis Regulation Network,0.879501736,ARN,0.983553469,1,NA,25635527,NA,NA,NA,do not merge,NA,NA,NA,12/16/2016 +34738791,http://www.pharmbioinf.uni-freiburg.de/arocagedb,"AroCageDB: A Web-Based Resource for Aromatic Cage Binding Sites and Their Intrinsic Ligands. While aromatic cages have extensively been investigated in the context of structural biology, molecular recognition, and drug discovery, there exist to date no comprehensive resource for proteins sharing this conserved structural motif. To this end, we parsed the Protein Data Bank and thus constructed the Aromatic Cage Database (AroCageDB), a database for investigating the binding pocket descriptors and ligand binding space of aromatic-cage-containing proteins (ACCPs). AroCageDB contains 487 unique ACCPs bound to 890 unique ligands, for a total of 1636 complexes. This web-accessible database provides a user-friendly interface for the interactive visualization of ligand-bound ACCP structures, with a variety of search options that will open up opportunities for structural analyses and drug discovery campaigns. AroCageDB is freely available at http://www.pharmbioinf.uni-freiburg.de/arocagedb/.",AroCageDB,0.994719088,Aromatic Cage Database,0.872022057,AroCageDB,0.994719088,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/5/2021 +30150996,http://bioinfo.cimap.res.in/aromadb,"AromaDb: A Database of Medicinal and Aromatic Plant's Aroma Molecules With Phytochemistry and Therapeutic Potentials. In traditional, herbal medicine, and aromatherapy, use of essential oils and their aroma compounds have been known since long, for the management of various human diseases. The essential oil is a mixture of highly complex, naturally occurring volatile aroma compounds synthesized by medicinal and aromatic plants as secondary metabolites. Essential oils widely used in pharmaceutical, cosmetic, sanitary, food industry and agriculture for their antibacterial, antiviral, antifungal, antiparasitic, insecticidal, anticancer, neuroprotective, psychophysiological, and anti-aging activities. Moreover, volatile aroma compounds comprise a chemically diverse class of low molecular weight organic compounds with significant vapor pressure. However, aroma compounds produced by plants, mainly attract pollinators, seed dispersers and provide defense against pests or pathogens. However, in humans, about 300 active olfactory receptor genes are involved to detect thousands of different aroma compounds and modulates expression of different metabolic genes regulating human psychophysiological activity, brain function, pharmacological signaling, and therapeutic potential. Keeping in mind this importance, present database, namely, AromaDb (http://bioinfo.cimap.res.in/aromadb/) covers information of plant varieties/chemotypes, essential oils, chemical constituents, GC-MS profile, yield variations due to agro-morphological parameters, trade data, aroma compounds, fragrance type, and bioactivity details. The database includes 1,321 aroma chemical structures, bioactivities of essential oil/aroma compounds, 357 fragrance type, 166 commercially used plants, and their high yielding 148 varieties/chemotypes. Also includes calculated cheminformatics properties related to identification, physico-chemical properties, pharmacokinetics, toxicological, and ecological information. Also comprises interacted human genes affecting various diseases related cell signaling pathways correlating the use of aromatherapy. This database could be a useful resource to the plant's growers/producers, an aroma/fragrance industrialist, health professionals, and researchers exploring the potential of essential oils and aroma compounds in the development of novel formulations against human diseases.",AromaDb,0.996747673,NA,0,AromaDb,0.996747673,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/13/2018 +25468931,http://aromadeg.siona.helmholtz-hzi.de,"AromaDeg, a novel database for phylogenomics of aerobic bacterial degradation of aromatics. Understanding prokaryotic transformation of recalcitrant pollutants and the in-situ metabolic nets require the integration of massive amounts of biological data. Decades of biochemical studies together with novel next-generation sequencing data have exponentially increased information on aerobic aromatic degradation pathways. However, the majority of protein sequences in public databases have not been experimentally characterized and homology-based methods are still the most routinely used approach to assign protein function, allowing the propagation of misannotations. AromaDeg is a web-based resource targeting aerobic degradation of aromatics that comprises recently updated (September 2013) and manually curated databases constructed based on a phylogenomic approach. Grounded in phylogenetic analyses of protein sequences of key catabolic protein families and of proteins of documented function, AromaDeg allows query and data mining of novel genomic, metagenomic or metatranscriptomic data sets. Essentially, each query sequence that match a given protein family of AromaDeg is associated to a specific cluster of a given phylogenetic tree and further function annotation and/or substrate specificity may be inferred from the neighboring cluster members with experimentally validated function. This allows a detailed characterization of individual protein superfamilies as well as high-throughput functional classifications. Thus, AromaDeg addresses the deficiencies of homology-based protein function prediction, combining phylogenetic tree construction and integration of experimental data to obtain more accurate annotations of new biological data related to aerobic aromatic biodegradation pathways. We pursue in future the expansion of AromaDeg to other enzyme families involved in aromatic degradation and its regular update. Database URL: http://aromadeg.siona.helmholtz-hzi.de",AromaDeg,0.997393668,NA,0,AromaDeg,0.997393668,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2014 +"23193272, 25361974, 30357387",http://www.ebi.ac.uk/arrayexpress,"ArrayExpress update--trends in database growth and links to data analysis tools. The ArrayExpress Archive of Functional Genomics Data (http://www.ebi.ac.uk/arrayexpress) is one of three international functional genomics public data repositories, alongside the Gene Expression Omnibus at NCBI and the DDBJ Omics Archive, supporting peer-reviewed publications. It accepts data generated by sequencing or array-based technologies and currently contains data from almost a million assays, from over 30 000 experiments. The proportion of sequencing-based submissions has grown significantly over the last 2 years and has reached, in 2012, 15% of all new data. All data are available from ArrayExpress in MAGE-TAB format, which allows robust linking to data analysis and visualization tools, including Bioconductor and GenomeSpace. Additionally, R objects, for microarray data, and binary alignment format files, for sequencing data, have been generated for a significant proportion of ArrayExpress data.",ArrayExpress,0.994881332,NA,0,ArrayExpress,0.994881332,3,33211879,33211879,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +33211879,"http://www.ebi.ac.uk/arrayexpress, http://www.ebi.ac.uk/biostudies","From ArrayExpress to BioStudies. ArrayExpress (https://www.ebi.ac.uk/arrayexpress) is an archive of functional genomics data at EMBL-EBI, established in 2002, initially as an archive for publication-related microarray data and was later extended to accept sequencing-based data. Over the last decade an increasing share of biological experiments involve multiple technologies assaying different biological modalities, such as epigenetics, and RNA and protein expression, and thus the BioStudies database (https://www.ebi.ac.uk/biostudies) was established to deal with such multimodal data. Its central concept is a study, which typically is associated with a publication. BioStudies stores metadata describing the study, provides links to the relevant databases, such as European Nucleotide Archive (ENA), as well as hosts the types of data for which specialized databases do not exist. With BioStudies now fully functional, we are able to further harmonize the archival data infrastructure at EMBL-EBI, and ArrayExpress is being migrated to BioStudies. In future, all functional genomics data will be archived at BioStudies. The process will be seamless for the users, who will continue to submit data using the online tool Annotare and will be able to query and download data largely in the same manner as before. Nevertheless, some technical aspects, particularly programmatic access, will change. This update guides the users through these changes.",ArrayExpress,0.992819309,NA,0,ArrayExpress,0.992819309,1,"23193272.0, 25361974.0, 30357387.0","23193272.0, 25361974.0, 30357387.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +25428357,http://www.arraymap.org,"arrayMap 2014: an updated cancer genome resource. Somatic copy number aberrations (CNA) represent a mutation type encountered in the majority of cancer genomes. Here, we present the 2014 edition of arrayMap (http://www.arraymap.org), a publicly accessible collection of pre-processed oncogenomic array data sets and CNA profiles, representing a vast range of human malignancies. Since the initial release, we have enhanced this resource both in content and especially with regard to data mining support. The 2014 release of arrayMap contains more than 64,000 genomic array data sets, representing about 250 tumor diagnoses. Data sets included in arrayMap have been assembled from public repositories as well as additional resources, and integrated by applying custom processing pipelines. Online tools have been upgraded for a more flexible array data visualization, including options for processing user provided, non-public data sets. Data integration has been improved by mapping to multiple editions of the human reference genome, with the majority of the data now being available for the UCSC hg18 as well as GRCh37 versions. The large amount of tumor CNA data in arrayMap can be freely downloaded by users to promote data mining projects, and to explore special events such as chromothripsis-like genome patterns.",arrayMap,0.994105458,NA,0,arrayMap,0.994105458,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/26/2014 +27242037,http://arthropodacyc.cycadsys.org,"ArthropodaCyc: a CycADS powered collection of BioCyc databases to analyse and compare metabolism of arthropods. . Arthropods interact with humans at different levels with highly beneficial roles (e.g. as pollinators), as well as with a negative impact for example as vectors of human or animal diseases, or as agricultural pests. Several arthropod genomes are available at present and many others will be sequenced in the near future in the context of the i5K initiative, offering opportunities for reconstructing, modelling and comparing their metabolic networks. In-depth analysis of these genomic data through metabolism reconstruction is expected to contribute to a better understanding of the biology of arthropods, thereby allowing the development of new strategies to control harmful species. In this context, we present here ArthropodaCyc, a dedicated BioCyc collection of databases using the Cyc annotation database system (CycADS), allowing researchers to perform reliable metabolism comparisons of fully sequenced arthropods genomes. Since the annotation quality is a key factor when performing such global genome comparisons, all proteins from the genomes included in the ArthropodaCyc database were re-annotated using several annotation tools and orthology information. All functional/domain annotation results and their sources were integrated in the databases for user access. Currently, ArthropodaCyc offers a centralized repository of metabolic pathways, protein sequence domains, Gene Ontology annotations as well as evolutionary information for 28 arthropod species. Such database collection allows metabolism analysis both with integrated tools and through extraction of data in formats suitable for systems biology studies.Database URL: http://arthropodacyc.cycadsys.org/.",ArthropodaCyc,0.994480968,NA,0,ArthropodaCyc,0.994480968,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/30/2016 +32507889,http://slim.icr.ac.uk/articles,"The articles.ELM resource: simplifying access to protein linear motif literature by annotation, text-mining and classification. . Modern biology produces data at a staggering rate. Yet, much of these biological data is still isolated in the text, figures, tables and supplementary materials of articles. As a result, biological information created at great expense is significantly underutilised. The protein motif biology field does not have sufficient resources to curate the corpus of motif-related literature and, to date, only a fraction of the available articles have been curated. In this study, we develop a set of tools and a web resource, 'articles.ELM', to rapidly identify the motif literature articles pertinent to a researcher's interest. At the core of the resource is a manually curated set of about 8000 motif-related articles. These articles are automatically annotated with a range of relevant biological data allowing in-depth search functionality. Machine-learning article classification is used to group articles based on their similarity to manually curated motif classes in the Eukaryotic Linear Motif resource. Articles can also be manually classified within the resource. The 'articles.ELM' resource permits the rapid and accurate discovery of relevant motif articles thereby improving the visibility of motif literature and simplifying the recovery of valuable biological insights sequestered within scientific articles. Consequently, this web resource removes a critical bottleneck in scientific productivity for the motif biology field. Database URL: http://slim.icr.ac.uk/articles/.",articles.ELM,0.941570143,NA,0,articles.ELM,0.941570143,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +32449934,http://asap.epfl.ch,"ASAP 2020 update: an open, scalable and interactive web-based portal for (single-cell) omics analyses. Single-cell omics enables researchers to dissect biological systems at a resolution that was unthinkable just 10 years ago. However, this analytical revolution also triggered new demands in 'big data' management, forcing researchers to stay up to speed with increasingly complex analytical processes and rapidly evolving methods. To render these processes and approaches more accessible, we developed the web-based, collaborative portal ASAP (Automated Single-cell Analysis Portal). Our primary goal is thereby to democratize single-cell omics data analyses (scRNA-seq and more recently scATAC-seq). By taking advantage of a Docker system to enhance reproducibility, and novel bioinformatics approaches that were recently developed for improving scalability, ASAP meets challenging requirements set by recent cell atlasing efforts such as the Human (HCA) and Fly (FCA) Cell Atlas Projects. Specifically, ASAP can now handle datasets containing millions of cells, integrating intuitive tools that allow researchers to collaborate on the same project synchronously. ASAP tools are versioned, and researchers can create unique access IDs for storing complete analyses that can be reproduced or completed by others. Finally, ASAP does not require any installation and provides a full and modular single-cell RNA-seq analysis pipeline. ASAP is freely available at https://asap.epfl.ch.",ASAP,0.72770232,Automated Single-cell Analysis Portal,0.707243107,ASAP,0.72770232,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2020 +"31665428, 31707700",http://mdl.shsmu.edu.cn/ASD,"Unraveling allosteric landscapes of allosterome with ASD. Allosteric regulation is one of the most direct and efficient ways to fine-tune protein function; it is induced by the binding of a ligand at an allosteric site that is topographically distinct from an orthosteric site. The Allosteric Database (ASD, available online at http://mdl.shsmu.edu.cn/ASD) was developed ten years ago to provide comprehensive information related to allosteric regulation. In recent years, allosteric regulation has received great attention in biological research, bioengineering, and drug discovery, leading to the emergence of entire allosteric landscapes as allosteromes. To facilitate research from the perspective of the allosterome, in ASD 2019, novel features were curated as follows: (i) >10√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 potential allosteric sites of human proteins were deposited for allosteric drug discovery; (ii) 7 human allosterome maps, including protease and ion channel maps, were built to reveal allosteric evolution within families; (iii) 1312 somatic missense mutations at allosteric sites were collected from patient samples from 33 cancer types and (iv) 1493 pharmacophores extracted from allosteric sites were provided for modulator screening. Over the past ten years, the ASD has become a central resource for studying allosteric regulation and will play more important roles in both target identification and allosteric drug discovery in the future.",ASD,0.992668907,The Allosteric Database,0.877874815,ASD,0.992668907,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +26822210,http://www.rcdd.org.cn/asdb/with,"ASDB: a resource for probing protein functions with small molecules. Unlabelled: Identifying chemical probes or seeking scaffolds for a specific biological target is important for protein function studies. Therefore, we create the Annotated Scaffold Database (ASDB), a computer-readable and systematic target-annotated scaffold database, to serve such needs. The scaffolds in ASDB were derived from public databases including ChEMBL, DrugBank and TCMSP, with a scaffold-based classification approach. Each scaffold was assigned with an InChIKey as its unique identifier, energy-minimized 3D conformations, and other calculated properties. A scaffold is also associated with drugs, natural products, drug targets and medical indications. The database can be retrieved through text or structure query tools. ASDB collects 333 601 scaffolds, which are associated with 4368 targets. The scaffolds consist of 3032 scaffolds derived from drugs and 5163 scaffolds derived from natural products. For given scaffolds, scaffold-target networks can be generated from the database to demonstrate the relations of scaffolds and targets. Availability and implementation ASDB is freely available at http://www.rcdd.org.cn/asdb/with the major web browsers. Contact junxu@biochemomes.com or xujun9@mail.sysu.edu.cn Supplementary information Supplementary data are available at Bioinformatics online.",ASDB,0.991850942,Annotated Scaffold Database,0.81746671,ASDB,0.991850942,1,NA,33588073,NA,NA,NA,do not merge,NA,NA,NA,1/28/2016 +33588073,http://asdb.jungleran.com,"ASDB: A comprehensive omics database for Anopheles sinensis. Anopheles sinensis is a key disease vector for human malaria and parasitic diseases such as malayan filariasis, and it is considered to be one of the most important malaria vectors in China and Southeast Asia. As high-throughput sequencing and assembly technology are widely used in An. sinensis, a lot of omics data have been generated, and abundant genome, mRNA transcriptome, miRNA transcriptome and resequencing results have been accumulated. In addition, lots of valuable morphological images and publications have been produced with the in-depth studies on An. sinensis. However, the increased quantity, variety, and structure complexity of the omics data create inconveniences for researchers to use and manage this information. We have built an An. sinensis omics database (ASDB, http://asdb.jungleran.com/) - a comprehensive and integrated database to promote scientific research on An. sinensis. Docker was used to deploy a development environment and Drupal to build ASDB. ASDB provides a Blast tool to do sequence alignment of genome sequence, gene sequence and protein sequence of An. sinensis. It also offers JBrowse (a next-generation genome visualization and analysis web platform) to facilitate researchers visualize the gene structure, non-coding RNA (include miRNA, snRNA, tRNA and so on) structure and genomic variation sites as desired. ASDB has integrated various latest omics data of An. Sinensis, including de novo genome and its annotation data, genome variation data (such as SNP and InDel), transcriptome and its expression value, miRNA expression value and miRNA-mRNA interaction, metagenomes. The database has also included the morphological images of different developmental stages and tissues, and important literatures associated with An. sinensis. ASDB provides a user-friendly search and displays pages. The integration of these resources will contribute to the study of basic biology and functional genome of An. sinensis.",ASDB,0.989679317,sinensis omics database,0.906412411,ASDB,0.989679317,1,NA,26822210,NA,NA,NA,do not merge,NA,NA,NA,2/12/2021 +24475134,http://ASDCD.amss.ac.cn,"ASDCD: antifungal synergistic drug combination database. Finding effective drugs to treat fungal infections has important clinical significance based on high mortality rates, especially in an immunodeficient population. Traditional antifungal drugs with single targets have been reported to cause serious side effects and drug resistance. Nowadays, however, drug combinations, particularly with respect to synergistic interaction, have attracted the attention of researchers. In fact, synergistic drug combinations could simultaneously affect multiple subpopulations, targets, and diseases. Therefore, a strategy that employs synergistic antifungal drug combinations could eliminate the limitations noted above and offer the opportunity to explore this emerging bioactive chemical space. However, it is first necessary to build a powerful database in order to facilitate the analysis of drug combinations. To address this gap in our knowledge, we have built the first Antifungal Synergistic Drug Combination Database (ASDCD), including previously published synergistic antifungal drug combinations, chemical structures, targets, target-related signaling pathways, indications, and other pertinent data. Its current version includes 210 antifungal synergistic drug combinations and 1225 drug-target interactions, involving 105 individual drugs from more than 12,000 references. ASDCD is freely available at http://ASDCD.amss.ac.cn.",ASDCD,0.993572259,Antifungal Synergistic Drug Combination Database,0.987889366,ASDCD,0.993572259,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/24/2014 +34839012,http://aser.ihb.ac.cn,"ASER: Animal Sex Reversal Database. Sex reversal, representing extraordinary sexual plasticity during the life cycle, not only triggers reproduction in animals but also affects reproductive and endocrine system-related diseases and cancers in humans. Sex reversal has been broadly reported in animals; however, an integrated resource hub of sex reversal information is still lacking. Here, we constructed a comprehensive database named ASER (Animal Sex Reversal) by integrating sex reversal-related data of 18 species from teleostei to mammalia. We systematically collected 40,018 published papers and mined the sex reversal-associated genes (SRGs), including their regulatory networks, from 1611 core papers. We annotated homologous genes and computed conservation scores for whole genomes across the 18 species. Furthermore, we collected available RNA-seq datasets and investigated the expression dynamics of SRGs during sex reversal or sex determination processes. In addition, we manually annotated 550 in situ hybridization (ISH), fluorescence in situ hybridization (FISH), and immunohistochemistry (IHC) images of SRGs from the literature and described their spatial expression in the gonads. Collectively, ASER provides a unique and integrated resource for researchers to query and reuse organized data to explore the mechanisms and applications of SRGs in animal breeding and human health. The ASER database is publicly available at http://aser.ihb.ac.cn/.",ASER,0.987263083,Animal Sex Reversal Database,0.97150902,ASER,0.987263083,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/25/2021 +32294195,http://asfvdb.popgenetics.net,"ASFVdb: an integrative resource for genomic and proteomic analyses of African swine fever virus. . The recent outbreaks of African swine fever (ASF) in China and Europe have threatened the swine industry globally. To control the transmission of ASF virus (ASFV), we developed the African swine fever virus database (ASFVdb), an online data visualization and analysis platform for comparative genomics and proteomics. On the basis of known ASFV genes, ASFVdb reannotates the genomes of every strain and newly annotates 5352 possible open reading frames (ORFs) of 45 strains. Moreover, ASFVdb performs a thorough analysis of the population genetics of all the published genomes of ASFV strains and performs functional and structural predictions for all genes. Users can obtain not only basic information for each gene but also its distribution in strains and conserved or high mutation regions, possible subcellular location and topology. In the genome browser, ASFVdb provides a sliding window for results of population genetic analysis, which facilitates genetic and evolutionary analyses at the genomic level. The web interface was constructed based on SWAV 1.0. ASFVdb is freely accessible at http://asfvdb.popgenetics.net.",ASFVdb,0.996319771,African swine fever virus database,0.897414913,ASFVdb,0.996319771,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +29321052,http://www.asgdb.org,"ASGDB: a specialised genomic resource for interpreting Anopheles sinensis insecticide√ɬÉ√ǬÇ√ɬÇ√Ǭ†resistance. BACKGROUND:Anopheles sinensis is an important malaria vector in Southeast Asia. The widespread emergence of insecticide resistance in this mosquito species poses a serious threat to the efficacy of malaria control measures, particularly in China. Recently, the whole-genome sequencing and de novo assembly of An. sinensis (China strain) has been finished. A series of insecticide-resistant studies in An. sinensis have also been reported. There is a growing need to integrate these valuable data to provide a comprehensive database for further studies on insecticide-resistant management of An. sinensis. RESULTS:A bioinformatics database named An. sinensis genome database (ASGDB) was built. In addition to being a searchable database of published An. sinensis genome sequences and annotation, ASGDB provides in-depth analytical platforms for further understanding of the genomic and genetic data, including visualization of genomic data, orthologous relationship analysis, GO analysis, pathway analysis, expression analysis and resistance-related gene analysis. Moreover, ASGDB provides a panoramic view of insecticide resistance studies in An. sinensis in China. In total, 551 insecticide-resistant phenotypic and genotypic reports on An. sinensis distributed in Chinese malaria-endemic areas since the mid-1980s have been collected, manually edited in the same format and integrated into OpenLayers map-based interface, which allows the international community to assess and exploit the high volume of scattered data much easier. The database has been given the URL: http://www.asgdb.org /. CONCLUSIONS:ASGDB was built to help users mine data from the genome sequence of An. sinensis easily and effectively, especially with its advantages in insecticide resistance surveillance and control.",ASGDB,0.969647527,sinensis genome database,0.934053496,ASGDB,0.969647527,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/10/2018 +27193158,http://asl-lex.org,"ASL-LEX: A lexical database of American Sign Language. ASL-LEX is a lexical database that catalogues information about nearly 1,000 signs in American Sign Language (ASL). It includes the following information: subjective frequency ratings from 25-31 deaf signers, iconicity ratings from 21-37 hearing non-signers, videoclip duration, sign length (onset and offset), grammatical class, and whether the sign is initialized, a fingerspelled loan sign, or a compound. Information about English translations is available for a subset of signs (e.g., alternate translations, translation consistency). In addition, phonological properties (sign type, selected fingers, flexion, major and minor location, and movement) were coded and used to generate sub-lexical frequency and neighborhood density estimates. ASL-LEX is intended for use by researchers, educators, and students who are interested in the properties of the ASL lexicon. An interactive website where the database can be browsed and downloaded is available at http://asl-lex.org .",ASL-LEX,0.995315278,NA,0,ASL-LEX,0.995315278,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2017 +31036810,http://bansallab.github.io/asnr,"A multi-species repository of social networks. Social network analysis is an invaluable tool to understand the patterns, evolution, and consequences of sociality. Comparative studies over a range of social systems across multiple taxonomic groups are particularly valuable. Such studies however require quantitative social association or interaction data across multiple species which is not easily available. We introduce the Animal Social Network Repository (ASNR) as the first multi-taxonomic repository that collates 790 social networks from more than 45 species, including those of mammals, reptiles, fish, birds, and insects. The repository was created by consolidating social network datasets from the literature on wild and captive animals into a consistent and easy-to-use network data format. The repository is archived at https://bansallab.github.io/asnr/ . ASNR has tremendous research potential, including testing hypotheses in the fields of animal ecology, social behavior, epidemiology and evolutionary biology.",ASNR,0.994962871,Animal Social Network Repository,0.941487324,ASNR,0.994962871,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/29/2019 +29106599,http://combio.snu.ac.kr/aspedia,"ASpedia: a comprehensive encyclopedia of human alternative splicing. Alternative splicing confers the human genome complexity by increasing the diversity of expressed mRNAs. Hundreds or thousands of splicing regions have been identified through differential alternative splicing analysis of high-throughput datasets. However, it is hard to explain the functional impact of each splicing event. Protein domain formation and nonsense-mediated decay are considered the main functional features of splicing. However, other functional features such as miRNA target sites, phosphorylation sites and single-nucleotide variations are directly affected by alternative splicing and affect downstream function. Hence, we established ASpedia: a comprehensive database for human alternative splicing annotation, which encompasses a range of functions, from genomic annotation to isoform-specific function (ASpedia, http://combio.snu.ac.kr/aspedia). The database provides three features: (i) genomic annotation extracted from DNA, RNA and proteins; (ii) transcription and regulation elements analyzed from next-generation sequencing datasets; and (iii) isoform-specific functions collected from known and published datasets. The ASpedia web application includes three components: an annotation database, a retrieval system and a browser specialized in the identification of human alternative splicing events. The retrieval system supports multiple AS event searches resulting from high-throughput analysis and the AS browser comprises genome tracks. Thus, ASpedia facilitates the systemic annotation of the functional impacts of multiple AS events.",ASpedia,0.996890545,NA,0,ASpedia,0.996890545,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +"22080559, 24194595",http://www.aspgd.org,"The Aspergillus Genome Database (AspGD): recent developments in comprehensive multispecies curation, comparative genomics and community resources. The Aspergillus Genome Database (AspGD; http://www.aspgd.org) is a freely available, web-based resource for researchers studying fungi of the genus Aspergillus, which includes organisms of clinical, agricultural and industrial importance. AspGD curators have now completed comprehensive review of the entire published literature about Aspergillus nidulans and Aspergillus fumigatus, and this annotation is provided with streamlined, ortholog-based navigation of the multispecies information. AspGD facilitates comparative genomics by providing a full-featured genomics viewer, as well as matched and standardized sets of genomic information for the sequenced aspergilli. AspGD also provides resources to foster interaction and dissemination of community information and resources. We welcome and encourage feedback at aspergillus-curator@lists.stanford.edu.",AspGD,0.997273564,Aspergillus Genome Database,0.978452827,AspGD,0.997273564,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/4/2013 +31843802,http://ipf.sustech.edu.cn/pub/asrd,"An Online Database for Exploring Over 2,000 Arabidopsis Small RNA Libraries. Small RNAs (sRNAs) play a wide range of important roles in plants, from maintaining genome stability and enhancing disease resistance to regulating developmental processes. Over the past decade, next-generation sequencing technologies have allowed us to explore the sRNA populations with unprecedented depth and accuracy. The community has accumulated a tremendous amount of sRNA sequencing (sRNA-seq) data from various genotypes, tissues, and treatments. However, it has become increasingly challenging to access these ""big data"" and extract useful information, particularly for researchers lacking sophisticated bioinformatics tools and expensive computational resources. Here, we constructed an online website, Arabidopsis Small RNA Database (ASRD, http://ipf.sustech.edu.cn/pub/asrd), that allows users to easily explore the information from publicly available Arabidopsis (Arabidopsis thaliana) sRNA libraries. Our database contains √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº2.3 billion sRNA reads, representing √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº250 million unique sequences from 2,024 sRNA-seq libraries. We downloaded the raw data for all libraries and reprocessed them with a unified pipeline so that the normalized abundance of any particular sRNA or the sum of abundances of sRNAs from a genic or transposable element region can be compared across all libraries. We also integrated an online Integrative Genomics Viewer browser into our Web site for convenient visualization. ASRD is a free, web-accessible, and user-friendly database that supports the direct query of over 2,000 Arabidopsis sRNA-seq libraries. We believe this resource will help plant researchers take advantage of the vast next-generation sequencing datasets available in the public domain.",ASRD,0.982676625,Arabidopsis Small RNA Database,0.980507145,ASRD,0.982676625,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/16/2019 +23904744,http://121.241.218.70/ASRDb,"ASRDb: A comprehensive resource for archaeal stress response genes. An organism's survival strategy under the constantly changing environment depends on its ability to sense and respond to changes in its environment. Archaea, being capable to grow under various extreme environmental conditions, provide valuable model for exploring how single-celled organisms respond to environmental stresses. However, no such approach has ever been made to make an integrated classification of various archaeal stress responses. Archaeal Stress Response Database (ASRDb) is a web accessible (http://121.241.218.70/ASRDb) database that represents the first online available resource providing a comprehensive overview of stress response genes of 66 archaeal genomes. This database currently contains almost 6000 stress specific genes of 66 archaeal genomes. All the stress specific genes are grouped into 17 different stress categories. A user-friendly interface has been designed to examine data using query tools. This database provides an efficient search engine for random and advanced database search operations. We have incorporated BLAST search options to the resulting sequences retrieved from database search operations. A site map page representing the schematic diagram will enable user to understand the logic behind the construction of the database. We have also provided a very rich and informative help page to make user familiar with the database. We sincerely believe that ASRDb will be of particular interest to the life science community and facilitates the biologists to unravel the role of stress specific genes in the adaptation of microorganisms under various extreme environmental conditions.",ASRDb,0.997044563,Archaeal Stress Response Database,0.98826167,ASRDb,0.997044563,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/12/2013 +25179222,http://cb.iri.univ-lille1.fr/Users/lensink/Score_set,"Score_set: a CAPRI benchmark for scoring protein complexes. Critical Assessment of PRedicted Interactions (CAPRI) has proven to be a catalyst for the development of docking algorithms. An essential step in docking is the scoring of predicted binding modes in order to identify stable complexes. In 2005, CAPRI introduced the scoring experiment, where upon completion of a prediction round, a larger set of models predicted by different groups and comprising both correct and incorrect binding modes, is made available to all participants for testing new scoring functions independently from docking calculations. Here we present an expanded benchmark data set for testing scoring functions, which comprises the consolidated ensemble of predicted complexes made available in the CAPRI scoring experiment since its inception. This consolidated scoring benchmark contains predicted complexes for 15 published CAPRI targets. These targets were subjected to 23 CAPRI assessments, due to existence of multiple binding modes for some targets. The benchmark contains more than 19,000 protein complexes. About 10% of the complexes represent docking predictions of acceptable quality or better, the remainder represent incorrect solutions (decoys). The benchmark set contains models predicted by 47 different predictor groups including web servers, which use different docking and scoring procedures, and is arguably as diverse as one may expect, representing the state of the art in protein docking. The data set is publicly available at the following URL: http://cb.iri.univ-lille1.fr/Users/lensink/Score_set.",CAPRI,0.576791465,Assessment of,0.607478499,Assessment of,0.607478499,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,9/11/2014 +29987736,http://at-chloro.prabi.fr/at_chloro,"AT_CHLORO: The First Step When Looking for Information About Subplastidial Localization of Proteins. Plastids contain several key subcompartments. The two limiting envelope membranes (inner and outer membrane of the plastid envelope with an intermembrane space between), an aqueous phase (stroma), and an internal membrane system terms (thylakoids) formed of flat compressed vesicles (grana) and more light structures (lamellae). The thylakoid vesicles delimit another discrete soluble compartment, the thylakoid lumen. AT_CHLORO ( http://at-chloro.prabi.fr/at_chloro/ ) is a unique database supplying information about the subplastidial localization of proteins. It was created from simultaneous proteomic analyses targeted to the main subcompartments of the chloroplast from Arabidopsis thaliana (i.e., envelope, stroma, thylakoid) and to the two subdomains of thylakoid membranes (i.e., grana and stroma lamellae). AT_CHLORO assembles several complementary information (MS-based experimental data, curated functional annotations and subplastidial localization, links to other public databases and references) which give a comprehensive overview of the current knowledge about the subplastidial localization and the function of chloroplast proteins, with a specific attention given to chloroplast envelope proteins.",AT_CHLORO,0.995981574,NA,0,AT_CHLORO,0.995981574,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31535335,http://www.atacamadb.cl,"Atacama Database: a platform of the microbiome of the Atacama Desert. The Atacama Desert is one of the oldest and driest places on Earth. In the last decade, microbial richness and diversity has been acknowledged as an important biological resource of this region. Owing to the value of the microbial diversity apparent in potential biotechnology applications and conservation purposes, it is necessary to catalogue these microbial communities to promote research activities and help to preserve the wide range of ecological niches of the Atacama region. A prototype Atacama Database has been designed and it provides a description of the rich microbial diversity of the Atacama Desert, and helps to visualise available literature resources. Data has been collected, curated, and organised into several categories to generate a single record for each organism in the database that covers classification, isolation metadata, morphology, physiology, genome and metabolism information. The current version of Atacama Database contains 2302 microorganisms and includes cultured and uncultured organisms retrieved from different environments within the desert between 1984 and 2016. These organisms are distributed in bacterial, archaeal or eukaryotic domains, along with those that are unclassified taxonomically. The initial prototype of the Atacama Database includes a basic search and taxonomic and advanced search tools to allow identification and comparison of microbial populations, and space distribution within this biome. A geolocation search was implemented to visualise the microbial diversity of the ecological niches defined by sectors and extract general information of the sampling sites. This effort will aid understanding of the microbial ecology of the desert, microbial population dynamics, seasonal behaviour, impact of climate change over time, and reveal further biotechnological applications of these microorganisms. The Atacama Database is freely available at: https://www.atacamadb.cl.",Atacama,0.921638429,NA,0,Atacama,0.921638429,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/18/2019 +33125076,http://www.licpathway.net/ATACdb,"ATACdb: a comprehensive human chromatin accessibility database. Accessible chromatin is a highly informative structural feature for identifying regulatory elements, which provides a large amount of information about transcriptional activity and gene regulatory mechanisms. Human ATAC-seq datasets are accumulating rapidly, prompting an urgent need to comprehensively collect and effectively process these data. We developed a comprehensive human chromatin accessibility database (ATACdb, http://www.licpathway.net/ATACdb), with the aim of providing a large amount of publicly available resources on human chromatin accessibility data, and to annotate and illustrate potential roles in a tissue/cell type-specific manner. The current version of ATACdb documented a total of 52√ɬÉ√ǬÇ√ɬÇ√Ǭ†078√ɬÉ√ǬÇ√ɬÇ√Ǭ†883 regions from over 1400 ATAC-seq samples. These samples have been manually curated from over 2200 chromatin accessibility samples from NCBI GEO/SRA. To make these datasets more accessible to the research community, ATACdb provides a quality assurance process including four quality control (QC) metrics. ATACdb provides detailed (epi)genetic annotations in chromatin accessibility regions, including super-enhancers, typical enhancers, transcription factors (TFs), common single-nucleotide polymorphisms (SNPs), risk SNPs, eQTLs, LD SNPs, methylations, chromatin interactions and TADs. Especially, ATACdb provides accurate inference of TF footprints within chromatin accessibility regions. ATACdb is a powerful platform that provides the most comprehensive accessible chromatin data, QC, TF footprint and various other annotations.",ATACdb,0.997429222,chromatin accessibility database,0.808434457,ATACdb,0.997429222,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +34817215,http://genomes.atcc.org,"The ATCC Genome Portal: Microbial Genome Reference Standards with Data Provenance. Lack of data provenance negatively impacts scientific reproducibility and the reliability of genomic data. The ATCC Genome Portal (https://genomes.atcc.org) addresses this by providing data provenance information for microbial whole-genome assemblies originating from authenticated biological materials. To date, we have sequenced 1,579 complete genomes, including 466 type strains and 1,156 novel genomes.",ATCC,0.587389708,NA,0,ATCC,0.587389708,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/24/2021 +28968841,http://genome.sdau.edu.cn/circRNA,"AtCircDB: a tissue-specific database for Arabidopsis circular RNAs. Circular RNAs are widely existing in eukaryotes. However, there is as yet no tissue-specific Arabidopsis circular RNA database, which hinders the study of circular RNA in plants. Here, we used 622 Arabidopsis RNA sequencing data sets from 87 independent studies hosted at NCBI SRA and developed AtCircDB to systematically identify, store and retrieve circular RNAs. By analyzing back-splicing sites, we characterized 84√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ685 circular RNAs, 30√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ648 tissue-specific circular RNAs and 3486 microRNA-circular RNA interactions. In addition, we used a metric (detection score) to measure the detection ability of the circular RNAs using a big-data approach. By experimental validation, we demonstrate that this metric improves the accuracy of the detection algorithm. We also defined the regions hosting enriched circular RNAs as super circular RNA regions. The results suggest that these regions are highly related to alternative splicing and chloroplast. Finally, we developed a comprehensive tissue-specific database (AtCircDB) to help the community store, retrieve, visualize and download Arabidopsis circular RNAs. This database will greatly expand our understanding of circular RNAs and their related regulatory networks. AtCircDB is freely available at http://genome.sdau.edu.cn/circRNA.",AtCircDB,0.993186295,NA,0,AtCircDB,0.993186295,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30239683,http://auto2disease.nwsuaflmz.com,"ATD: a comprehensive bioinformatics resource for deciphering the association of autophagy and diseases. . Autophagy is the natural, regulated, destructive mechanism of the eukaryotes cell that disassembles unnecessary or dysfunctional components. In recent years, the association between autophagy and diseases has attracted more and more attention, but our understanding of the molecular mechanism about the association in the system perspective is limited and ambiguous. Hence, we developed the comprehensive bioinformatics resource Autophagy To Disease (ATD, http://auto2disease.nwsuaflmz.com) to archive autophagy-associated diseases. This resource provides bioinformatics annotation system about genes and chemicals about autophagy and human diseases by extracting results from previous studies with text mining technology. Based on the big data from ATD, we found that some classes of disease tend to be related with autophagy, including respiratory disease, cancer, urogenital disease and digestive system disease. We also found that some classes of autophagy-related diseases have a strong association among each other and constitute modules. Furthermore, we extracted the autophagy-disease-related genes (ADGs) from ATD and provided a novel algorithm Optimized Random Forest with Label model to predict potential ADGs. This bioinformatics annotation system about autophagy and human diseases may provide a basic resource for the further detection of the molecular mechanisms of autophagy pathway to disease.",ATD,0.981130342,Autophagy To Disease,0.913275957,ATD,0.981130342,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +32681639,http://www.bigzju.com/ATdb,"Autophagy and Tumor Database: ATdb, a novel database connecting autophagy and tumor. . Autophagy is an essential cellular process that is closely implicated in diverse pathophysiological processes and a variety of human diseases, especially tumors. Autophagy is regarded as not only an anti-cancer process in tumorigenesis but also a pro-tumor process in progression and metastasis according to current research. It means the role of autophagy in tumor is considered to be complex, controversial and context dependent. Hence, a comprehensive database is of great significance to obtain an in-depth understanding of such complex correlations between autophagy and tumor. To achieve this objective, here we developed the Autophagy and Tumor Database (named as ATdb, http://www.bigzju.com/ATdb/#/) to compile the published information concerning autophagy and tumor research. ATdb connected 25 types of tumors with 137 genes required for autophagy-related pathways, containing 219 population filters, 2650 hazard ratio trend plots, 658 interacting microRNAs, 266 interacting long non-coding RNAs, 155 post-translational modifications, 298 DNA methylation records, 331 animal models and 70 clinical trials. ATdb could enable users to search, browse, download and carry out efficient online analysis. For instance, users can make prediction of autophagy gene regulators in a context-dependent manner and in a precise subpopulation and tumor subtypes. Also, it is feasible in ATdb to cluster tumors into distinguished groups based on the gene-related long non-coding RNAs to gain novel insights into their potential functional implications. Thus, ATdb offers a powerful online database for the autophagy community to explore the complex world of autophagy and tumor. Database URL: http://www.bigzju.com/ATdb/#/.",ATdb,0.992148757,Autophagy and Tumor Database,0.977843061,ATdb,0.992148757,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +30624648,http://www.nipgr.res.in/AtFusionDB,"AtFusionDB: a database of fusion transcripts in Arabidopsis thaliana. . Fusion transcripts are chimeric RNAs generated as a result of fusion either at DNA or RNA level. These novel transcripts have been extensively studied in the case of human cancers but still remain underexamined in plants. In this study, we introduce the first plant-specific database of fusion transcripts named AtFusionDB (http://www.nipgr.res.in/AtFusionDB). This is a comprehensive database that contains the detailed information about fusion transcripts identified in model plant Arabidopsis thaliana. A total of 82√ɬÉ√ǬÇ√ɬÇ√Ǭ†969 fusion transcript entries generated from 17√ɬÉ√ǬÇ√ɬÇ√Ǭ†181 different genes of A. thaliana are available in this database. Apart from the basic information consisting of the Ensembl gene names, official gene name, tissue type, EricScore, fusion type, AtFusionDB ID and sample ID (e.g. Sequence Read Archive ID), additional information like UniProt, gene coordinates (together with the function of parental genes), junction sequence, expression level of both parent genes and fusion transcript may be of high utility to the user. Two different types of search modules viz. 'Simple Search' and 'Advanced Search' in addition to the 'Browse' option with data download facility are provided in this database. Three different modules for mapping and alignment of the query sequences viz. BLASTN, SW Align and Mapping are incorporated in AtFusionDB. This database is a head start for exploring the complex and unexplored domain of gene/transcript fusion in plants.",AtFusionDB,0.996401966,NA,0,AtFusionDB,0.996401966,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +22800758,http://www.athamap.de,"'MicroRNA Targets', a new AthaMap web-tool for genome-wide identification of miRNA targets in Arabidopsis thaliana. Background The AthaMap database generates a genome-wide map for putative transcription factor binding sites for A. thaliana. When analyzing transcriptional regulation using AthaMap it may be important to learn which genes are also post-transcriptionally regulated by inhibitory RNAs. Therefore, a unified database for transcriptional and post-transcriptional regulation will be highly useful for the analysis of gene expression regulation. Methods To identify putative microRNA target sites in the genome of A. thaliana, processed mature miRNAs from 243 annotated miRNA genes were used for screening with the psRNATarget web server. Positional information, target genes and the psRNATarget score for each target site were annotated to the AthaMap database. Furthermore, putative target sites for small RNAs from seven small RNA transcriptome datasets were used to determine small RNA target sites within the A. thaliana genome. Results Putative 41,965 genome wide miRNA target sites and 10,442 miRNA target genes were identified in the A. thaliana genome. Taken together with genes targeted by small RNAs from small RNA transcriptome datasets, a total of 16,600 A. thaliana genes are putatively regulated by inhibitory RNAs. A novel web-tool, 'MicroRNA Targets', was integrated into AthaMap which permits the identification of genes predicted to be regulated by selected miRNAs. The predicted target genes are displayed with positional information and the psRNATarget score of the target site. Furthermore, putative target sites of small RNAs from selected tissue datasets can be identified with the new 'Small RNA Targets' web-tool. Conclusions The integration of predicted miRNA and small RNA target sites with transcription factor binding sites will be useful for AthaMap-assisted gene expression analysis. URL: http://www.athamap.de/",AthaMap,0.989823103,NA,0,AthaMap,0.989823103,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/16/2012 +28160322,http://zlab.umassmed.edu/atlas/web,"ATLAS: A database linking binding affinities with structures for wild-type and mutant TCR-pMHC complexes. The ATLAS (Altered TCR Ligand Affinities and Structures) database (https://zlab.umassmed.edu/atlas/web/) is a manually curated repository containing the binding affinities for wild-type and mutant T cell receptors (TCRs) and their antigens, peptides presented by the major histocompatibility complex (pMHC). The database links experimentally measured binding affinities with the corresponding three dimensional (3D) structures for TCR-pMHC complexes. The user can browse and search affinities, structures, and experimental details for TCRs, peptides, and MHCs of interest. We expect this database to facilitate the development of next-generation protein design algorithms targeting TCR-pMHC interactions. ATLAS can be easily parsed using modeling software that builds protein structures for training and testing. As an example, we provide structural models for all mutant TCRs in ATLAS, built using the Rosetta program. Utilizing these structures, we report a correlation of 0.63 between experimentally measured changes in binding energies and our predicted changes. Proteins 2017; 85:908-916. √ɬÉ√ǬÇ√ɬÇ√Ǭ© 2016 Wiley Periodicals, Inc.",ATLAS,0.976716459,Altered TCR Ligand Affinities and Structures,0.972670598,ATLAS,0.976716459,1,NA,"27404214.0, 32421310.0",low_prob_best_name,do not remove,NA,conflicting record(s) to be removed,NA,NA,NA,2/16/2017 +"27404214, 32421310",http://lcsb-databases.epfl.ch/atlas,"ATLAS of Biochemistry: A Repository of All Possible Biochemical Reactions for Synthetic Biology and Metabolic Engineering Studies. Because the complexity of metabolism cannot be intuitively understood or analyzed, computational methods are indispensable for studying biochemistry and deepening our understanding of cellular metabolism to promote new discoveries. We used the computational framework BNICE.ch along with cheminformatic tools to assemble the whole theoretical reactome from the known metabolome through expansion of the known biochemistry presented in the Kyoto Encyclopedia of Genes and Genomes (KEGG) database. We constructed the ATLAS of Biochemistry, a database of all theoretical biochemical reactions based on known biochemical principles and compounds. ATLAS includes more than 130√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ000 hypothetical enzymatic reactions that connect two or more KEGG metabolites through novel enzymatic reactions that have never been reported to occur in living organisms. Moreover, ATLAS reactions integrate 42% of KEGG metabolites that are not currently present in any KEGG reaction into one or more novel enzymatic reactions. The generated repository of information is organized in a Web-based database ( http://lcsb-databases.epfl.ch/atlas/ ) that allows the user to search for all possible routes from any substrate compound to any product. The resulting pathways involve known and novel enzymatic steps that may indicate unidentified enzymatic activities and provide potential targets for protein engineering. Our approach of introducing novel biochemistry into pathway design and associated databases will be important for synthetic biology and metabolic engineering.",ATLAS,0.686975062,ATLAS of Biochemistry,0.570031409,ATLAS,0.686975062,2,NA,28160322,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,6/2/2020 +22876890,http://www.t4ss.lncc.br,"AtlasT4SS: a curated database for type IV secretion systems. Background The type IV secretion system (T4SS) can be classified as a large family of macromolecule transporter systems, divided into three recognized sub-families, according to the well-known functions. The major sub-family is the conjugation system, which allows transfer of genetic material, such as a nucleoprotein, via cell contact among bacteria. Also, the conjugation system can transfer genetic material from bacteria to eukaryotic cells; such is the case with the T-DNA transfer of Agrobacterium tumefaciens to host plant cells. The system of effector protein transport constitutes the second sub-family, and the third one corresponds to the DNA uptake/release system. Genome analyses have revealed numerous T4SS in Bacteria and Archaea. The purpose of this work was to organize, classify, and integrate the T4SS data into a single database, called AtlasT4SS - the first public database devoted exclusively to this prokaryotic secretion system. Description The AtlasT4SS is a manual curated database that describes a large number of proteins related to the type IV secretion system reported so far in Gram-negative and Gram-positive bacteria, as well as in Archaea. The database was created using the RDBMS MySQL and the Catalyst Framework based in the Perl programming language and using the Model-View-Controller (MVC) design pattern for Web. The current version holds a comprehensive collection of 1,617 T4SS proteins from 58 Bacteria (49 Gram-negative and 9 Gram-Positive), one Archaea and 11 plasmids. By applying the bi-directional best hit (BBH) relationship in pairwise genome comparison, it was possible to obtain a core set of 134 clusters of orthologous genes encoding T4SS proteins. Conclusions In our database we present one way of classifying orthologous groups of T4SSs in a hierarchical classification scheme with three levels. The first level comprises four classes that are based on the organization of genetic determinants, shared homologies, and evolutionary relationships: (i) F-T4SS, (ii) P-T4SS, (iii) I-T4SS, and (iv) GI-T4SS. The second level designates a specific well-known protein families otherwise an uncharacterized protein family. Finally, in the third level, each protein of an ortholog cluster is classified according to its involvement in a specific cellular process. AtlasT4SS database is open access and is available at http://www.t4ss.lncc.br.",AtlasT4SS,0.984139264,NA,0,AtlasT4SS,0.984139264,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/9/2012 +33219693,http://www.megabionet.org/atmad,"AtMAD: Arabidopsis thaliana multi-omics association database. Integration analysis of multi-omics data provides a comprehensive landscape for understanding biological systems and mechanisms. The abundance of high-quality multi-omics data (genomics, transcriptomics, methylomics and phenomics) for the model organism Arabidopsis thaliana enables scientists to study the genetic mechanism of many biological processes. However, no resource is available to provide comprehensive and systematic multi-omics associations for Arabidopsis. Here, we developed an Arabidopsis thaliana Multi-omics Association Database (AtMAD, http://www.megabionet.org/atmad), a public repository for large-scale measurements of associations between genome, transcriptome, methylome, pathway and phenotype in Arabidopsis, designed for facilitating identification of eQTL, emQTL, Pathway-mQTL, Phenotype-pathway, GWAS, TWAS and EWAS. Candidate variants/methylations/genes were identified in AtMAD for specific phenotypes or biological processes, many of them are supported by experimental evidence. Based on the multi-omics association strategy, we have identified 11 796 cis-eQTLs and 10 119 trans-eQTLs. Among them, 68 837 environment-eQTL associations and 149 622 GWAS-eQTL associations were identified and stored in AtMAD. For expression-methylation quantitative trait loci (emQTL), we identified 265 776 emQTLs and 122 344 pathway-mQTLs. For TWAS and EWAS, we obtained 62 754 significant phenotype-gene associations and 3 993 379 significant phenotype-methylation associations, respectively. Overall, the multi-omics associated network in AtMAD will provide new insights into exploring biological mechanisms of plants at multi-omics levels.",AtMAD,0.989043355,Arabidopsis thaliana Multi-omics Association Database,0.988037554,AtMAD,0.989043355,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +25972521,http://AtmiRNET.itps.ncku.edu.tw,"AtmiRNET: a web-based resource for reconstructing regulatory networks of Arabidopsis microRNAs. Compared with animal microRNAs (miRNAs), our limited knowledge of how miRNAs involve in significant biological processes in plants is still unclear. AtmiRNET is a novel resource geared toward plant scientists for reconstructing regulatory networks of Arabidopsis miRNAs. By means of highlighted miRNA studies in target recognition, functional enrichment of target genes, promoter identification and detection of cis- and trans-elements, AtmiRNET allows users to explore mechanisms of transcriptional regulation and miRNA functions in Arabidopsis thaliana, which are rarely investigated so far. High-throughput next-generation sequencing datasets from transcriptional start sites (TSSs)-relevant experiments as well as five core promoter elements were collected to establish the support vector machine-based prediction model for Arabidopsis miRNA TSSs. Then, high-confidence transcription factors participate in transcriptional regulation of Arabidopsis miRNAs are provided based on statistical approach. Furthermore, both experimentally verified and putative miRNA-target interactions, whose validity was supported by the correlations between the expression levels of miRNAs and their targets, are elucidated for functional enrichment analysis. The inferred regulatory networks give users an intuitive insight into the pivotal roles of Arabidopsis miRNAs through the crosstalk between miRNA transcriptional regulation (upstream) and miRNA-mediate (downstream) gene circuits. The valuable information that is visually oriented in AtmiRNET recruits the scant understanding of plant miRNAs and will be useful (e.g. ABA-miR167c-auxin signaling pathway) for further research. Database URL: http://AtmiRNET.itps.ncku.edu.tw/",AtmiRNET,0.997909009,NA,0,AtmiRNET,0.997909009,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/13/2015 +22397531,http://AtPAN.itps.ncku.edu.tw,"AtPAN: an integrated system for reconstructing transcriptional regulatory networks in Arabidopsis thaliana. Background Construction of transcriptional regulatory networks (TRNs) is of priority concern in systems biology. Numerous high-throughput approaches, including microarray and next-generation sequencing, are extensively adopted to examine transcriptional expression patterns on the whole-genome scale; those data are helpful in reconstructing TRNs. Identifying transcription factor binding sites (TFBSs) in a gene promoter is the initial step in elucidating the transcriptional regulation mechanism. Since transcription factors usually co-regulate a common group of genes by forming regulatory modules with similar TFBSs. Therefore, the combinatorial interactions of transcription factors must be modeled to reconstruct the gene regulatory networks. Description For systems biology applications, this work develops a novel database called Arabidopsis thaliana Promoter Analysis Net (AtPAN), capable of detecting TFBSs and their corresponding transcription factors (TFs) in a promoter or a set of promoters in Arabidopsis. For further analysis, according to the microarray expression data and literature, the co-expressed TFs and their target genes can be retrieved from AtPAN. Additionally, proteins interacting with the co-expressed TFs are also incorporated to reconstruct co-expressed TRNs. Moreover, combinatorial TFs can be detected by the frequency of TFBSs co-occurrence in a group of gene promoters. In addition, TFBSs in the conserved regions between the two input sequences or homologous genes in Arabidopsis and rice are also provided in AtPAN. The output results also suggest conducting wet experiments in the future. Conclusions The AtPAN, which has a user-friendly input/output interface and provide graphical view of the TRNs. This novel and creative resource is freely available online at http://AtPAN.itps.ncku.edu.tw/.",AtPAN,0.98359412,Arabidopsis thaliana Promoter Analysis Net,0.875704557,AtPAN,0.98359412,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/8/2012 +27899679,http://www.megabionet.org/atpid,"AtPID: a genome-scale resource for genotype-phenotype associations in Arabidopsis. AtPID (Arabidopsis thaliana Protein Interactome Database, available at http://www.megabionet.org/atpid) is an integrated database resource for protein interaction network and functional annotation. In the past few years, we collected 5564 mutants with significant morphological alterations and manually curated them to 167 plant ontology (PO) morphology categories. These single/multiple-gene mutants were indexed and linked to 3919 genes. After integrated these genotype-phenotype associations with the comprehensive protein interaction network in AtPID, we developed a Na√ɬÉ√ǬÉ√ɬÇ√ǬØve Bayes method and predicted 4457 novel high confidence gene-PO pairs with 1369 genes as the complement. Along with the accumulated novel data for protein interaction and functional annotation, and the updated visualization toolkits, we present a genome-scale resource for genotype-phenotype associations for Arabidopsis in AtPID 5.0. In our updated website, all the new genotype-phenotype associations from mutants, protein network, and the protein annotation information can be vividly displayed in a comprehensive network view, which will greatly enhance plant protein function and genotype-phenotype association studies in a systematical way.",AtPID,0.997839749,Arabidopsis thaliana Protein Interactome Database,0.974847411,AtPID,0.997839749,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +30534948,http://atsnp.biostat.wisc.edu,"atSNP Search: a web resource for statistically evaluating influence of human genetic variation on transcription factor binding. Summary Understanding the regulatory roles of non-coding genetic variants has become a central goal for interpreting results of genome-wide association studies. The regulatory significance of the variants may be interrogated by assessing their influence on transcription factor binding. We have developed atSNP Search, a comprehensive web database for evaluating motif matches to the human genome with both reference and variant alleles and assessing the overall significance of the variant alterations on the motif matches. Convenient search features, comprehensive search outputs and a useful help menu are key components of atSNP Search. atSNP Search enables convenient interpretation of regulatory variants by statistical significance testing and composite logo plots, which are graphical representations of motif matches with the reference and variant alleles. Existing motif-based regulatory variant discovery tools only consider a limited pool of variants due to storage or other limitations. In contrast, atSNP Search users can test more than 37 billion variant-motif pairs with marginal significance in motif matches or match alteration. Computational evidence from atSNP Search, when combined with experimental validation, may help with the discovery of underlying disease mechanisms. Availability and implementation atSNP Search is freely available at http://atsnp.biostat.wisc.edu. Supplementary information Supplementary data are available at Bioinformatics online.",atSNP,0.982023239,NA,0,atSNP,0.982023239,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2019 +"21217125, 24334350, 26546318, 29216398",http://atted.jp,"ATTED-II updates: condition-specific gene coexpression to extend coexpression analyses and applications to a broad range of flowering plants. ATTED-II (http://atted.jp) is a gene coexpression database for a wide variety of experimental designs, such as prioritizations of genes for functional identification and analyses of the regulatory relationships among genes. Here, we report updates of ATTED-II focusing on two new features: condition-specific coexpression and homologous coexpression with rice. To analyze a broad range of biological phenomena, it is important to collect data under many diverse experimental conditions, but the meaning of coexpression can become ambiguous under these conditions. One approach to overcome this difficulty is to calculate the coexpression for each set of conditions with a clear biological meaning. With this viewpoint, we prepared five sets of experimental conditions (tissue, abiotic stress, biotic stress, hormones and light conditions), and users can evaluate the coexpression by employing comparative gene lists and switchable gene networks. We also developed an interactive visualization system, using the Cytoscape web system, to improve the network representation. As the second update, rice coexpression is now available. The previous version of ATTED-II was specifically developed for Arabidopsis, and thus coexpression analyses for other useful plants have been difficult. To solve this problem, we extended ATTED-II by including comparison tables between Arabidopsis and rice. This representation will make it possible to analyze the conservation of coexpression among flowering plants. With the ability to investigate condition-specific coexpression and species conservation, ATTED-II can help researchers to clarify the functional and regulatory networks of genes in a broad array of plant species.",ATTED-II,0.954756707,NA,0,ATTED-II,0.954756707,4,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +27161830,http://14.139.59.221/MauPIR,"Global De Novo Protein-Protein Interactome Elucidates Interactions of Drought-Responsive Proteins in Horse Gram (Macrotyloma uniflorum). Inspired by the availability of de novo transcriptome of horse gram (Macrotyloma uniflorum) and recent developments in systems biology studies, the first ever global protein-protein interactome (PPI) map was constructed for this highly drought-tolerant legume. Large-scale studies of PPIs and the constructed database would provide rationale behind the interplay at cascading translational levels for drought stress-adaptive mechanisms in horse gram. Using a bidirectional approach (interolog and domain-based), a high-confidence interactome map and database for horse gram was constructed. Available transcriptomic information for shoot and root tissues of a sensitive (M-191; genotype 1) and a drought-tolerant (M-249; genotype 2) genotype of horse gram was utilized to draw comparative PPI subnetworks under drought stress. High-confidence 6804 interactions were predicted among 1812 proteins covering about one-fourth of the horse gram proteome. The highest number of interactions (33.86%) in horse gram interactome matched with Arabidopsis PPI data. The top five hub nodes mostly included ubiquitin and heat-shock-related proteins. Higher numbers of PPIs were found to be responsive in shoot tissue (416) and root tissue (2228) of genotype 2 compared with shoot tissue (136) and root tissue (579) of genotype 1. Characterization of PPIs using gene ontology analysis revealed that kinase and transferase activities involved in signal transduction, cellular processes, nucleocytoplasmic transport, protein ubiquitination, and localization of molecules were most responsive to drought stress. Hence, these could be framed in stress adaptive mechanisms of horse gram. Being the first legume global PPI map, it would provide new insights into gene and protein regulatory networks for drought stress tolerance mechanisms in horse gram. Information compiled in the form of database (MauPIR) will provide the much needed high-confidence systems biology information for horse gram genes, proteins, and involved processes. This information would ease the effort and increase the efficacy for similar studies on other legumes. Public access is available at http://14.139.59.221/MauPIR/ .",auPIR,0.777803779,NA,0,auPIR,0.777803779,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/17/2016 +"22057158, 26779400",http://aura.science.unitn.it,"AURA: Atlas of UTR Regulatory Activity. Summary The Atlas of UTR Regulatory Activity (AURA) is a manually curated and comprehensive catalog of human mRNA untranslated regions (UTRs) and UTR regulatory annotations. Through its intuitive web interface, it provides full access to a wealth of information on UTRs that integrates phylogenetic conservation, RNA sequence and structure data, single nucleotide variation, gene expression and gene functional descriptions from literature and specialized databases. Availability http://aura.science.unitn.it Contact aura@science.unitn.it; dassi@science.unitn Supplementary information Supplementary data are available at Bioinformatics online.",AURA,0.97320962,Atlas of UTR Regulatory Activity,0.889621913,AURA,0.97320962,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/29/2014 +29198880,http://aureowiki.med.uni-greifswald.de,"AureoWiki √ɬÉ√Ǭå√ɬÇ√Ǭµ The repository of the Staphylococcus aureus research and annotation community. In light of continuously accumulating data and knowledge on major human pathogens, comprehensive and up-to-date sources of easily accessible information are urgently required. The AureoWiki database (http://aureowiki.med.uni-greifswald.de) provides detailed information on the genes and proteins of clinically and experimentally relevant S. aureus strains, currently covering NCTC 8325, COL, Newman, USA300_FPR3757, and N315. By implementing a pan-genome approach, AureoWiki facilitates the transfer of knowledge gained in studies with different S. aureus strains, thus supporting functional annotation and better understanding of this organism. All data related to a given gene or gene product is compiled on a strain-specific gene page. The gene pages contain sequence-based information complemented by data on, for example, protein function and localization, transcriptional regulation, and gene expression. The information provided is connected via links to other databases and published literature. Importantly, orthologous genes of the individual strains, which are linked by a pan-genome gene identifier and a unified gene name, are presented side by side using strain-specific tabs. The respective pan-genome gene page contains an orthologue table for 32 S. aureus strains, a multiple-strain genome viewer, a protein sequence alignment as well as other comparative information. The data collected in AureoWiki is also accessible through various download options in order to support bioinformatics applications. In addition, based on two large-scale gene expression data sets, AureoWiki provides graphical representations of condition-dependent mRNA levels and protein profiles under various laboratory and infection-related conditions.",AureoWiki,0.996498525,NA,0,AureoWiki,0.996498525,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/24/2017 +29186576,http://autism.mindspec.org/autdb/Welcome.do,"AutDB: a platform to decode the genetic architecture of autism. AutDB is a deeply annotated resource for exploring the impact of genetic variations associated with autism spectrum disorders (ASD). First released in 2007, AutDB has evolved into a multi-modular resource of diverse types of genetic and functional evidence related to ASD. Current modules include: Human Gene, which annotates all ASD-linked genes and their variants; Animal Model, which catalogs behavioral, anatomical and physiological data from rodent models of ASD; Protein Interaction (PIN), which builds interactomes from direct relationships of protein products of ASD genes; and Copy Number Variant (CNV), which catalogs deletions and duplications of chromosomal loci identified in ASD. A multilevel data-integration strategy is utilized to connect the ASD genes to the components of the other modules. All information in this resource is manually curated by expert scientists from primary scientific publications and is referenced to source articles. AutDB is actively maintained with a rigorous quarterly data release schedule. As of June 2017, AutDB contains detailed annotations for 910 genes, 2197 CNV loci, 1060 rodent models and 38 296 PINs. With its widespread use by the research community, AutDB serves as a reference resource for analysis of large datasets, accelerating ASD research and potentially leading to targeted drug treatments. AutDB is available at http://autism.mindspec.org/autdb/Welcome.do.",AutDB,0.997044206,NA,0,AutDB,0.997044206,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +23774715,http://fcon_1000.projects.nitrc.org/indi/abide,"The autism brain imaging data exchange: towards a large-scale evaluation of the intrinsic brain architecture in autism. Autism spectrum disorders (ASDs) represent a formidable challenge for psychiatry and neuroscience because of their high prevalence, lifelong nature, complexity and substantial heterogeneity. Facing these obstacles requires large-scale multidisciplinary efforts. Although the field of genetics has pioneered data sharing for these reasons, neuroimaging had not kept pace. In response, we introduce the Autism Brain Imaging Data Exchange (ABIDE)-a grassroots consortium aggregating and openly sharing 1112 existing resting-state functional magnetic resonance imaging (R-fMRI) data sets with corresponding structural MRI and phenotypic information from 539 individuals with ASDs and 573 age-matched typical controls (TCs; 7-64 years) (http://fcon_1000.projects.nitrc.org/indi/abide/). Here, we present this resource and demonstrate its suitability for advancing knowledge of ASD neurobiology based on analyses of 360 male subjects with ASDs and 403 male age-matched TCs. We focused on whole-brain intrinsic functional connectivity and also survey a range of voxel-wise measures of intrinsic functional brain architecture. Whole-brain analyses reconciled seemingly disparate themes of both hypo- and hyperconnectivity in the ASD literature; both were detected, although hypoconnectivity dominated, particularly for corticocortical and interhemispheric functional connectivity. Exploratory analyses using an array of regional metrics of intrinsic brain function converged on common loci of dysfunction in ASDs (mid- and posterior insula and posterior cingulate cortex), and highlighted less commonly explored regions such as the thalamus. The survey of the ABIDE R-fMRI data sets provides unprecedented demonstrations of both replication and novel discovery. By pooling multiple international data sets, ABIDE is expected to accelerate the pace of discovery setting the stage for the next generation of ASD studies.",ABIDE,0.968805671,Autism Brain Imaging Data Exchange,0.98496213,Autism Brain Imaging Data Exchange,0.98496213,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/18/2013 +22139918,http://autismkb.cbi.pku.edu.cn,"AutismKB: an evidence-based knowledgebase of autism genetics. Autism spectrum disorder (ASD) is a heterogeneous neurodevelopmental disorder with a prevalence of 0.9-2.6%. Twin studies showed a heritability of 38-90%, indicating strong genetic contributions. Yet it is unclear how many genes have been associated with ASD and how strong the evidence is. A comprehensive review and analysis of literature and data may bring a clearer big picture of autism genetics. We show that as many as 2193 genes, 2806 SNPs/VNTRs, 4544 copy number variations (CNVs) and 158 linkage regions have been associated with ASD by GWAS, genome-wide CNV studies, linkage analyses, low-scale genetic association studies, expression profiling and other low-scale experimental studies. To evaluate the evidence, we collected metadata about each study including clinical and demographic features, experimental design and statistical significance, and used a scoring and ranking approach to select a core data set of 434 high-confidence genes. The genes mapped to pathways including neuroactive ligand-receptor interaction, synapse transmission and axon guidance. To better understand the genes we parsed over 30 databases to retrieve extensive data about expression patterns, protein interactions, animal models and pharmacogenetics. We constructed a MySQL-based online database and share it with the broader autism research community at http://autismkb.cbi.pku.edu.cn, supporting sophisticated browsing and searching functionalities.",AutismKB,0.762071371,NA,0,AutismKB,0.762071371,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2011 +22753780,"http://autobind.csie.ncku.edu.tw/, http://autobind.mc.ntu.edu.tw","AutoBind: automatic extraction of protein-ligand-binding affinity data from biological literature. Motivation Determination of the binding affinity of a protein-ligand complex is important to quantitatively specify whether a particular small molecule will bind to the target protein. Besides, collection of comprehensive datasets for protein-ligand complexes and their corresponding binding affinities is crucial in developing accurate scoring functions for the prediction of the binding affinities of previously unknown protein-ligand complexes. In the past decades, several databases of protein-ligand-binding affinities have been created via visual extraction from literature. However, such approaches are time-consuming and most of these databases are updated only a few times per year. Hence, there is an immediate demand for an automatic extraction method with high precision for binding affinity collection. Result We have created a new database of protein-ligand-binding affinity data, AutoBind, based on automatic information retrieval. We first compiled a collection of 1586 articles where the binding affinities have been marked manually. Based on this annotated collection, we designed four sentence patterns that are used to scan full-text articles as well as a scoring function to rank the sentences that match our patterns. The proposed sentence patterns can effectively identify the binding affinities in full-text articles. Our assessment shows that AutoBind achieved 84.22% precision and 79.07% recall on the testing corpus. Currently, 13 616 protein-ligand complexes and the corresponding binding affinities have been deposited in AutoBind from 17 221 articles. Availability AutoBind is automatically updated on a monthly basis, and it is freely available at http://autobind.csie.ncku.edu.tw/ and http://autobind.mc.ntu.edu.tw/. All of the deposited binding affinities have been refined and approved manually before being released.",AutoBind,0.98091805,NA,0,AutoBind,0.98091805,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/2/2012 +30669929,http://www.autophagysmdb.org,"AutophagySMDB: a curated database of small molecules that modulate protein targets regulating autophagy. Macroautophagy/autophagy is a complex self-degradative mechanism responsible for clearance of non functional organelles and proteins. A range of factors influences the autophagic process, and disruptions in autophagy-related mechanisms lead to disease states, and further exacerbation of disease. Despite in-depth research into autophagy and its role in pathophysiological processes, the resources available to use it for therapeutic purposes are currently lacking. Herein we report the Autophagy Small Molecule Database (AutophagySMDB; http://www.autophagysmdb.org/ ) of small molecules and their cognate protein targets that modulate autophagy. Presently, AutophagySMDB enlists ~10,000 small molecules which regulate 71 target proteins. All entries are comprised of information such as EC50 (half maximal effective concentration), IC50 (half maximal inhibitory concentration), Kd (dissociation constant) and Ki (inhibition constant), IUPAC name, canonical SMILE, structure, molecular weight, QSAR (quantitative structure activity relationship) properties such as hydrogen donor and acceptor count, aromatic rings and XlogP. AutophagySMDB is an exhaustive, cross-platform, manually curated database, where either the cognate targets for small molecule or small molecules for a target can be searched. This database is provided with different search options including text search, advanced search and structure search. Various computational tools such as tree tool, cataloging tools, and clustering tools have also been implemented for advanced analysis. Data and the tools provided in this database helps to identify common or unique scaffolds for designing novel drugs or to improve the existing ones for autophagy small molecule therapeutics. The approach to multitarget drug discovery by identifying common scaffolds has been illustrated with experimental validation. Abbreviations: AMPK: AMP-activated protein kinase; ATG: autophagy related; AutophagySMDB: autophagy small molecule database; BCL2: BCL2, apoptosis regulator; BECN1: beclin 1; CAPN: calpain; MTOR: mechanistic target of rapamycin kinase; PPARG: peroxisome proliferator activated receptor gamma; SMILES: simplified molecular input line entry system; SQSTM1: sequestosome 1; STAT3: signal transducer and activator of transcription.",AutophagySMDB,0.995587846,Autophagy Small Molecule Database,0.9487999,AutophagySMDB,0.995587846,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/3/2019 +33176685,http://avimm.ab.mpg.de,"Avian Immunome DB: an example of a user-friendly interface for extracting genetic information. Background Genomic and genetic studies often require a target list of genes before conducting any hypothesis testing or experimental verification. With the ever-growing number of sequenced genomes and a variety of different annotation strategies, comes the potential for ambiguous gene symbols, making it cumbersome to capture the ""correct"" set of genes. In this article, we present and describe the Avian Immunome DB (AVIMM) for easy gene property extraction as exemplified by avian immune genes. The avian immune system is characterised by a cascade of complex biological processes underlaid by more than 1000 different genes. It is a vital trait to study particularly in birds considering that they are a significant driver in spreading zoonotic diseases. With the completion of phase II of the B10K (""Bird 10,000 Genomes"") consortium's whole-genome sequencing effort, we have included 363 annotated bird genomes in addition to other publicly available bird genome data which serve as a valuable foundation for AVIMM. Construction and content A relational database with avian immune gene evidence from Gene Ontology, Ensembl, UniProt and the B10K consortium has been designed and set up. The foundation stone or the ""seed"" for the initial set of avian immune genes is based on the well-studied model organism chicken (Gallus gallus). Gene annotations, different transcript isoforms, nucleotide sequences and protein information, including amino acid sequences, are included. Ambiguous gene names (symbols) are resolved within the database and linked to their canonical gene symbol. AVIMM is supplemented by a command-line interface and a web front-end to query the database. Utility and discussion The internal mapping of unique gene symbol identifiers to canonical gene symbols allows for an ambiguous gene property search. The database is organised within core and feature tables, which makes it straightforward to extend for future purposes. The database design is ready to be applied to other taxa or biological processes. Currently, the database contains 1170 distinct avian immune genes with canonical gene symbols and 612 synonyms across 363 bird species. While the command-line interface readily integrates into bioinformatics pipelines, the intuitive web front-end with download functionality offers sophisticated search functionalities and tracks the origin for each record. AVIMM is publicly accessible at https://avimm.ab.mpg.de .",AVIMM,0.990362942,Avian Immunome DB,0.966759622,AVIMM,0.990362942,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2020 +24285301,http://crdd.osdd.net/servers/avpdb,"AVPdb: a database of experimentally validated antiviral peptides targeting medically important viruses. Antiviral peptides (AVPs) have exhibited huge potential in inhibiting viruses by targeting various stages of their life cycle. Therefore, we have developed AVPdb, available online at http://crdd.osdd.net/servers/avpdb, to provide a dedicated resource of experimentally verified AVPs targeting over 60 medically important viruses including Influenza, HCV, HSV, RSV, HBV, DENV, SARS, etc. However, we have separately provided HIV inhibiting peptides in 'HIPdb'. AVPdb contains detailed information of 2683 peptides, including 624 modified peptides experimentally tested for antiviral activity. In modified peptides a chemical moiety is attached for increasing their efficacy and stability. Detailed information include: peptide sequence, length, source, virus targeted, virus family, cell line used, efficacy (qualitative/quantitative), target step/protein, assay used in determining the efficacy and PubMed reference. The database also furnishes physicochemical properties and predicted structure for each peptide. We have provided user-friendly browsing and search facility along with other analysis tools to help the users. Entering of many synthetic peptide-based drugs in various stages of clinical trials reiterate the importance for the AVP resources. AVPdb is anticipated to cater to the needs of scientific community working for the development of antiviral therapeutics.",AVPdb,0.996542871,NA,0,AVPdb,0.996542871,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/26/2013 +30215764,http://www.awesome-hust.com,"AWESOME: a database of SNPs that affect protein post-translational modifications. Protein post-translational modifications (PTMs), including phosphorylation, ubiquitination, methylation, acetylation, glycosylation et al, are very important biological processes. PTM changes in some critical genes, which may be induced by base-pair substitution, are shown to affect the risk of diseases. Recently, large-scale exome-wide association studies found that missense single nucleotide polymorphisms (SNPs) play an important role in the susceptibility for complex diseases or traits. One of the functional mechanisms of missense SNPs is that they may affect PTMs and leads to a protein dysfunction and its downstream signaling pathway disorder. Here, we constructed a database named AWESOME (A Website Exhibits SNP On Modification Event, http://www.awesome-hust.com), which is an interactive web-based analysis tool that systematically evaluates the role of SNPs on nearly all kinds of PTMs based on 20 available tools. We also provided a well-designed scoring system to compare the performance of different PTM prediction tools and help users to get a better interpretation of results. Users can search SNPs, genes or position of interest, filter with specific modifications or prediction methods, to get a comprehensive PTM change induced by SNPs. In summary, our database provides a convenient way to detect PTM-related SNPs, which may potentially be pathogenic factors or therapeutic targets.",AWESOME,0.993797898,Exhibits SNP,0.626161829,AWESOME,0.993797898,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30893420,http://lmse.github.io/aybrah,"AYbRAH: a curated ortholog database for yeasts and fungi spanning 600 million years of evolution. . Budding yeasts inhabit a range of environments by exploiting various metabolic traits. The genetic bases for these traits are mostly unknown, preventing their addition or removal in a chassis organism for metabolic engineering. Insight into the evolution of orthologs, paralogs and xenologs in the yeast pan-genome can help bridge these genotypes; however, existing phylogenomic databases do not span diverse yeasts, and sometimes cannot distinguish between these homologs. To help understand the molecular evolution of these traits in yeasts, we created Analyzing Yeasts by Reconstructing Ancestry of Homologs (AYbRAH), an open-source database of predicted and manually curated ortholog groups for 33 diverse fungi and yeasts in Dikarya, spanning 600 million years of evolution. OrthoMCL and OrthoDB were used to cluster protein sequence into ortholog and homolog groups, respectively; MAFFT and PhyML reconstructed the phylogeny of all homolog groups. Ortholog assignments for enzymes and small metabolite transporters were compared to their phylogenetic reconstruction, and curated to resolve any discrepancies. Information on homolog and ortholog groups can be viewed in the AYbRAH web portal (https://lmse.github.io/aybrah/), including functional annotations, predictions for mitochondrial localization and transmembrane domains, literature references and phylogenetic reconstructions. Ortholog assignments in AYbRAH were compared to HOGENOM, KEGG Orthology, OMA, eggNOG and PANTHER. PANTHER and OMA had the most congruent ortholog groups with AYbRAH, while the other phylogenomic databases had greater amounts of under-clustering, over-clustering or no ortholog annotations for proteins. Future plans are discussed for AYbRAH, and recommendations are made for other research communities seeking to create curated ortholog databases.",AYbRAH,0.996119797,Analyzing Yeasts by Reconstructing Ancestry of Homologs,0.874202971,AYbRAH,0.996119797,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +34976872,http://b-amp.karishmakaushiklab.com,"AMPing Up the Search: A Structural and Functional Repository of Antimicrobial Peptides for Biofilm Studies, and a Case Study of Its Application to Corynebacterium striatum, an Emerging Pathogen. Antimicrobial peptides (AMPs) have been recognized for their ability to target processes important for biofilm formation. Given the vast array of AMPs, identifying potential anti-biofilm candidates remains a significant challenge, and prompts the need for preliminary in silico investigations prior to extensive in vitro and in vivo studies. We have developed Biofilm-AMP (B-AMP), a curated 3D structural and functional repository of AMPs relevant to biofilm studies. In its current version, B-AMP contains predicted 3D structural models of 5544 AMPs (from the DRAMP database) developed using a suite of molecular modeling tools. The repository supports a user-friendly search, using source, name, DRAMP ID, and PepID (unique to B-AMP). Further, AMPs are annotated to existing biofilm literature, consisting of a vast library of over 10,000 articles, enhancing the functional capabilities of B-AMP. To provide an example of the usability of B-AMP, we use the sortase C biofilm target of the emerging pathogen Corynebacterium striatum as a case study. For this, 100 structural AMP models from B-AMP were subject to in silico protein-peptide molecular docking against the catalytic site residues of the C. striatum sortase C protein. Based on docking scores and interacting residues, we suggest a preference scale using which candidate AMPs could be taken up for further in silico, in vitro and in vivo testing. The 3D protein-peptide interaction models and preference scale are available in B-AMP. B-AMP is a comprehensive structural and functional repository of AMPs, and will serve as a starting point for future studies exploring AMPs for biofilm studies. B-AMP is freely available to the community at https://b-amp.karishmakaushiklab.com and will be regularly updated with AMP structures, interaction models with potential biofilm targets, and annotations to biofilm literature.",B-AMP,0.991384625,NA,0,B-AMP,0.991384625,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/16/2021 +21335611,http://www.b2gfar.org,"B2G-FAR, a species-centered GO annotation repository. Motivation Functional genomics research has expanded enormously in the last decade thanks to the cost reduction in high-throughput technologies and the development of computational tools that generate, standardize and share information on gene and protein function such as the Gene Ontology (GO). Nevertheless, many biologists, especially working with non-model organisms, still suffer from non-existing or low-coverage functional annotation, or simply struggle retrieving, summarizing and querying these data. Results The Blast2GO Functional Annotation Repository (B2G-FAR) is a bioinformatics resource envisaged to provide functional information for otherwise uncharacterized sequence data and offers data mining tools to analyze a larger repertoire of species than currently available. This new annotation resource has been created by applying the Blast2GO functional annotation engine in a strongly high-throughput manner to the entire space of public available sequences. The resulting repository contains GO term predictions for over 13.2 million non-redundant protein sequences based on BLAST search alignments from the SIMAP database. We generated GO annotation for approximately 150 000 different taxa making available 2000 species with the highest coverage through B2G-FAR. A second section within B2G-FAR holds functional annotations for 17 non-model organism Affymetrix GeneChips. Conclusions B2G-FAR provides easy access to exhaustive functional annotation for 2000 species offering a good balance between quality and quantity, thereby supporting functional genomics research especially in the case of non-model organisms. Availability The annotation resource is available at http://www.b2gfar.org.",B2G-FAR,0.91893776,Annotation Repository,0.673282564,B2G-FAR,0.91893776,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/18/2011 +34269889,http://webs.iiitd.edu.in/raghava/b3pdb,"B3Pdb: an archive of blood-brain barrier-penetrating peptides. The blood-brain barrier poses major hurdles in the treatment of brain-related ailments. Over the past decade, interest in peptides-based therapeutics has thrived a lot because of their higher benefit to risk ratio. However, a complete knowledgebase providing a well-annotated picture of the peptide as a therapeutic molecule to cure brain-related ailments is lacking. We have built up a knowledgebase B3Pdb on blood-brain barrier (BBB)-penetrating peptides in the present study. The B3Pdb holds clinically relevant experimental information on 1225 BBB-penetrating peptides, including mode of delivery, animal model, in vitro/in vivo experiments, chemical modifications, length. Hoping that drug delivery systems can improve central nervous system disorder-related therapeutics. In this regard, B3Pdb is an important resource to support the rational design of therapeutics peptides for CNS-related disorders. The complete ready-to-use and updated database with a user-friendly web interface is available to the scientific community at https://webs.iiitd.edu.in/raghava/b3pdb/ .",B3Pdb,0.996475801,NA,0,B3Pdb,0.996475801,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/16/2021 +"24214959, 26424852, 30256983",http://bacdive.dsmz.de,"BacDive--the Bacterial Diversity Metadatabase. BacDive-the Bacterial Diversity Metadatabase (http://bacdive.dsmz.de) merges detailed strain-linked information on the different aspects of bacterial and archaeal biodiversity. Currently (release 9/2013), BacDive contains entries for 23 458 strains and provides information on their taxonomy, morphology, physiology, sampling and concomitant environmental conditions as well as molecular biology. Where available, links to access the respective biological resources are given. The majority of the BacDive data is manually annotated and curated. The BacDive portal offers an easy-to-use simple search and in addition powerful advanced search functionalities allowing to combine more than 30 search fields for text and numerical data. The user can compile individual sets of strains to a download selection that can easily be imported into nearly all spreadsheet applications.",BacDive,0.997053862,Bacterial Diversity Metadatabase,0.693979033,BacDive,0.997053862,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +22135301,http://bacmap.wishartlab.com,"BacMap: an up-to-date electronic atlas of annotated bacterial genomes. Originally released in 2005, BacMap is an electronic, interactive atlas of fully sequenced bacterial genomes. It contains fully labeled, zoomable and searchable chromosome maps for essentially all sequenced prokaryotic (archaebacterial and eubacterial) species. Each map can be zoomed to the level of individual genes and each gene is hyperlinked to a richly annotated gene card. The latest release of BacMap (http://bacmap.wishartlab.com/) now contains data for more than 1700 bacterial species (~10√ɬÉ√ǬÉ√ɬÇ√Ǭó more than the 2005 release), corresponding to more than 2800 chromosome and plasmid maps. All bacterial genome maps are now supplemented with separate prophage genome maps as well as separate tRNA and rRNA maps. Each bacterial chromosome entry in BacMap also contains graphs and tables on a variety of gene and protein statistics. Likewise, every bacterial species entry contains a bacterial 'biography' card, with taxonomic details, phenotypic details, textual descriptions and images (when available). Improved data browsing and searching tools have also been added to allow more facile filtering, sorting and display of the chromosome maps and their contents.",BacMap,0.990752041,NA,0,BacMap,0.990752041,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2011 +24304895,http://bacmet.biomedicine.gu.se)--a,"BacMet: antibacterial biocide and metal resistance genes database. Antibiotic resistance has become a major human health concern due to widespread use, misuse and overuse of antibiotics. In addition to antibiotics, antibacterial biocides and metals can contribute to the development and maintenance of antibiotic resistance in bacterial communities through co-selection. Information on metal and biocide resistance genes, including their sequences and molecular functions, is, however, scattered. Here, we introduce BacMet (http://bacmet.biomedicine.gu.se)--a manually curated database of antibacterial biocide- and metal-resistance genes based on an in-depth review of the scientific literature. The BacMet database contains 470 experimentally verified resistance genes. In addition, the database also contains 25 477 potential resistance genes collected from public sequence repositories. All resistance genes in the BacMet database have been organized according to their molecular function and induced resistance phenotype.",BacMet,0.994012475,NA,0,BacMet,0.994012475,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/3/2013 +34838806,http://bacteria.guru,"Bacteria.guru: Comparative Transcriptomics and Co-Expression Database for Bacterial Pathogens. While bacteria can be beneficial to our health, their deadly pathogenic potential has been an ever-present concern exacerbated by the emergence of drug-resistant strains. As such, there is a pressing urgency for an enhanced understanding of their gene function and regulation, which could mediate the development of novel antimicrobials. Transcriptomic analyses have been established as insightful and indispensable to the functional characterization of genes and identification of new biological pathways, but in the context of bacterial studies, they remain limited to species-specific datasets. To address this, we integrated the genomic and transcriptomic data of the 17 most notorious and researched bacterial pathogens, creating bacteria.guru, an interactive database that can identify, visualize, and compare gene expression profiles, coexpression networks, functionally enriched clusters, and gene families across species. Through illustrating antibiotic resistance mechanisms in P. aeruginosa, we demonstrate that bacteria.guru could potentially aid in discovering multi-faceted antibiotic targets and, overall, facilitate future bacterial research. AVAILABILITY: The database and coexpression networks are freely available from https://bacteria.guru/. Sample annotations can be found in the supplemental data.",bacteria.guru,0.995123416,NA,0,bacteria.guru,0.995123416,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/25/2021 +24680503,http://csdb.glycoscience.ru,"Expansion of coverage of Carbohydrate Structure Database (CSDB). The Bacterial Carbohydrate Structure Database (BCSDB), which has been maintained since 2005, was expanded to cover glycans from plants and fungi. The current coverage on plant and fungal glycans includes several thousands of the CarbBank records, as well as data published before 1996 but not deposited in CarbBank. Prior to deposition, the data were verified against the original publications and supplemented with additional information, such as NMR spectra. Both the Bacterial and Plant and Fungal Carbohydrate Structure Databases are freely available at http://csdb.glycoscience.ru.",BCSDB,0.763852611,Bacterial Carbohydrate Structure Database,0.883428037,Bacterial Carbohydrate Structure Database,0.883428037,1,"25753703.0, 26286194.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,10/23/2013 +22493537,http://www.research-bioinformatics.in,"BacterialLectinDb: An integrated bacterial lectin database. Unlabelled Studies of various diversified bacterial lectins/ lectin data may serve as a tool with enormous promise to help biotechnologists/ geneticists in their innovative technology to explore a deeper understanding in proteomics/ genomics research for finding the molecular basis of infectious diseases and also to new approaches for their prevention and in development of new bacterial vaccines. Hence we developed a bacterial lectin database named 'BacterialLectinDb'. An organized database schema for BacterialLectinDb was designed to collate all the available information about all bacterial lectins as a central repository. The database was designed using HTML, XML. Availability The database is available for free at http://www.research-bioinformatics.in.",BacterialLectinDb,0.95345632,NA,0,BacterialLectinDb,0.95345632,1,21887013,NA,low_prob_best_name,do not remove,do not merge,NA,NA,NA,NA,3/31/2012 +30272193,http://bactome.helmholtz-hzi.de,"BACTOME-a reference database to explore the sequence- and gene expression-variation landscape of Pseudomonas aeruginosa clinical isolates. Extensive use of next-generation sequencing (NGS) for pathogen profiling has the potential to transform our understanding of how genomic plasticity contributes to phenotypic versatility. However, the storage of large amounts of NGS data and visualization tools need to evolve to offer the scientific community fast and convenient access to these data. We introduce BACTOME as a database system that links aligned DNA- and RNA-sequencing reads of clinical Pseudomonas aeruginosa isolates with clinically relevant pathogen phenotypes. The database allows data extraction for any single isolate, gene or phenotype as well as data filtering and phenotypic grouping for specific research questions. With the integration of statistical tools we illustrate the usefulness of a relational database structure for the identification of phenotype-genotype correlations as an essential part of the discovery pipeline in genomic research. Furthermore, the database provides a compilation of DNA sequences and gene expression values of a plethora of clinical isolates to give a consensus DNA sequence and consensus gene expression signature. Deviations from the consensus thereby describe the genomic landscape and the transcriptional plasticity of the species P. aeruginosa. The database is available at https://bactome.helmholtz-hzi.de.",BACTOME,0.997656286,NA,0,BACTOME,0.997656286,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +25377257,"http://bactpepdb.rpbs.univ-paris-diderot.fr, http://www.yeastgenome.org","BactPepDB: a database of predicted peptides from a exhaustive survey of complete prokaryote genomes. . With the recent progress in complete genome sequencing, mining the increasing amount of genomic information available should in theory provide the means to discover new classes of peptides. However, annotation pipelines often do not consider small reading frames likely to be expressed. BactPepDB, available online at http://bactpepdb.rpbs.univ-paris-diderot.fr, is a database that aims at providing an exhaustive re-annotation of all complete prokaryotic genomes-chromosomal and plasmid DNA-available in RefSeq for coding sequences ranging between 10 and 80 amino acids. The identified peptides are classified as (i) previously identified in RefSeq, (ii) entity-overlapping (intragenic) or intergenic, and (iii) potential pseudogenes-intergenic sequences corresponding to a portion of a previously annotated larger gene. Additional information is related to homologs within order, predicted signal sequence, transmembrane segments, disulfide bonds, secondary structure, and the existence of a related 3D structure in the Protein Databank. As a result, BactPepDB provides insights about candidate peptides, and provides information about their conservation, together with some of their expected biological/structural features. The BactPepDB interface allows to search for candidate peptides in the database, or to search for peptides similar to a query, according to the multiple properties predicted or related to genomic localization. Database URL: http://www.yeastgenome.org/",BactPepDB,0.998118401,NA,0,BactPepDB,0.998118401,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2014 +"26433226, 30715167, 33010178",http://bacdb.org/BacWGSTdb,"BacWGSTdb, a database for genotyping and source tracking bacterial pathogens. Whole genome sequencing has become one of the routine methods in molecular epidemiological practice. In this study, we present BacWGSTdb (http://bacdb.org/BacWGSTdb), a bacterial whole genome sequence typing database which is designed for clinicians, clinical microbiologists and hospital epidemiologists. This database borrows the population structure from the current multi-locus sequence typing (MLST) scheme and adopts a hierarchical data structure: species, clonal complex and isolates. When users upload the pre-assembled genome sequences to BacWGSTdb, it offers the functionality of bacterial genotyping at both traditional MLST and whole-genome levels. More importantly, users are told which isolates in the public database are phylogenetically close to the query isolate, along with their clinical information such as host, isolation source, disease, collection time and geographical location. In this way, BacWGSTdb offers a rapid and convenient platform for worldwide users to address a variety of clinical microbiological issues such as source tracking bacterial pathogens.",BacWGSTdb,0.994153142,NA,0,BacWGSTdb,0.994153142,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +24602877,"http://www.bamboogdb.org/, http://www.bamboogdb.org","BambooGDB: a bamboo genome database with functional annotation and an analysis platform. Bamboo, as one of the most important non-timber forest products and fastest-growing plants in the world, represents the only major lineage of grasses that is native to forests. Recent success on the first high-quality draft genome sequence of moso bamboo (Phyllostachys edulis) provides new insights on bamboo genetics and evolution. To further extend our understanding on bamboo genome and facilitate future studies on the basis of previous achievements, here we have developed BambooGDB, a bamboo genome database with functional annotation and analysis platform. The de novo sequencing data, together with the full-length complementary DNA and RNA-seq data of moso bamboo composed the main contents of this database. Based on these sequence data, a comprehensively functional annotation for bamboo genome was made. Besides, an analytical platform composed of comparative genomic analysis, protein-protein interactions network, pathway analysis and visualization of genomic data was also constructed. As discovery tools to understand and identify biological mechanisms of bamboo, the platform can be used as a systematic framework for helping and designing experiments for further validation. Moreover, diverse and powerful search tools and a convenient browser were incorporated to facilitate the navigation of these data. As far as we know, this is the first genome database for bamboo. Through integrating high-throughput sequencing data, a full functional annotation and several analysis modules, BambooGDB aims to provide worldwide researchers with a central genomic resource and an extensible analysis platform for bamboo genome. BambooGDB is freely available at http://www.bamboogdb.org/. Database URL: http://www.bamboogdb.org.",BambooGDB,0.996197879,NA,0,BambooGDB,0.996197879,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/5/2014 +25336620,http://www.barcdb.org,"The Biobanking Analysis Resource Catalogue (BARCdb): a new research tool for the analysis of biobank samples. We report the development of a new database of technology services and products for analysis of biobank samples in biomedical research. BARCdb, the Biobanking Analysis Resource Catalogue (http://www.barcdb.org), is a freely available web resource, listing expertise and molecular resource capabilities of research centres and biotechnology companies. The database is designed for researchers who require information on how to make best use of valuable biospecimens from biobanks and other sample collections, focusing on the choice of analytical techniques and the demands they make on the type of samples, pre-analytical sample preparation and amounts needed. BARCdb has been developed as part of the Swedish biobanking infrastructure (BBMRI.se), but now welcomes submissions from service providers throughout Europe. BARCdb can help match resource providers with potential users, stimulating transnational collaborations and ensuring compatibility of results from different labs. It can promote a more optimal use of European resources in general, both with respect to standard and more experimental technologies, as well as for valuable biobank samples. This article describes how information on service and reagent providers of relevant technologies is made available on BARCdb, and how this resource may contribute to strengthening biomedical research in academia and in the biotechnology and pharmaceutical industries.",BARCdb,0.997212112,The Biobanking Analysis Resource Catalogue,0.877752744,BARCdb,0.997212112,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +25477388,http://bard.nih.gov,"BioAssay Research Database (BARD): chemical biology and probe-development enabled by structured metadata and result types. BARD, the BioAssay Research Database (https://bard.nih.gov/) is a public database and suite of tools developed to provide access to bioassay data produced by the NIH Molecular Libraries Program (MLP). Data from 631 MLP projects were migrated to a new structured vocabulary designed to capture bioassay data in a formalized manner, with particular emphasis placed on the description of assay protocols. New data can be submitted to BARD with a user-friendly set of tools that assist in the creation of appropriately formatted datasets and assay definitions. Data published through the BARD application program interface (API) can be accessed by researchers using web-based query tools or a desktop client. Third-party developers wishing to create new tools can use the API to produce stand-alone tools or new plug-ins that can be integrated into BARD. The entire BARD suite of tools therefore supports three classes of researcher: those who wish to publish data, those who wish to mine data for testable hypotheses, and those in the developer community who wish to build tools that leverage this carefully curated chemical biology resource.",BARD,0.990859985,BioAssay Research Database,0.854191172,BARD,0.990859985,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/4/2014 +33247932,http://146.118.64.11/BarleyVar,"BarleyVarDB: a database of barley genomic variation. . Barley (Hordeum vulgare L.) is one of the first domesticated grain crops and represents the fourth most important cereal source for human and animal consumption. BarleyVarDB is a database of barley genomic variation. It can be publicly accessible through the website at http://146.118.64.11/BarleyVar. This database mainly provides three sets of information. First, there are 57√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ754√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ224 single nuclear polymorphisms (SNPs) and 3√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ600√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ663 insertions or deletions (InDels) included in BarleyVarDB, which were identified from high-coverage whole genome sequencing of 21 barley germplasm, including 8 wild barley accessions from 3 barley evolutionary original centers and 13 barley landraces from different continents. Second, it uses the latest barley genome reference and its annotation information publicly accessible, which has been achieved by the International Barley Genome Sequencing Consortium (IBSC). Third, 522√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ212 whole genome-wide microsatellites/simple sequence repeats (SSRs) were also included in this database, which were identified in the reference barley pseudo-molecular genome sequence. Additionally, several useful web-based applications are provided including JBrowse, BLAST and Primer3. Users can design PCR primers to asses polymorphic variants deposited in this database and use a user-friendly interface for accessing the barley reference genome. We envisage that the BarleyVarDB will benefit the barley genetic research community by providing access to all publicly available barley genomic variation information and barley reference genome as well as providing them with an ultra-high density of SNP and InDel markers for molecular breeding and identification of functional genes with important agronomic traits in barley. Database URL: http://146.118.64.11/BarleyVar.",BarleyVarDB,0.997781932,NA,0,BarleyVarDB,0.997781932,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2020 +34264745,http://sbcb.inf.ufrgs.br/barracurda,"Benchmarking and Testing Machine Learning Approaches with BARRA:CuRDa, a Curated RNA-Seq Database for Cancer Research. RNA-seq is gradually becoming the dominating technique employed to access the global gene expression in biological samples, allowing more flexible protocols and robust analysis. However, the nature of RNA-seq results imposes new data-handling challenges when it comes to computational analysis. With the increasing employment of machine learning (ML) techniques in biomedical sciences, databases that could provide curated data sets treated with state-of-the-art approaches already adapted to ML protocols, become essential for testing new algorithms. In this study, we present the Benchmarking of ARtificial intelligence Research: Curated RNA-seq Database (BARRA:CuRDa). BARRA:CuRDa was built exclusively for cancer research and is composed of 17 handpicked RNA-seq data sets for Homo sapiens that were gathered from the Gene Expression Omnibus, using rigorous filtering criteria. All data sets were individually submitted to sample quality analysis, removal of low-quality bases and artifacts from the experimental process, removal of ribosomal RNA, and estimation of transcript-level abundance. Moreover, all data sets were tested using standard approaches in the field, which allows them to be used as benchmark to new ML approaches. A feature selection analysis was also performed on each data set to investigate the biological accuracy of basic techniques. Results include genes already related to their specific tumoral tissue a large amount of long noncoding RNA and pseudogenes. BARRA:CuRDa is available at http://sbcb.inf.ufrgs.br/barracurda.",BARRA:CuRDa,0.988691002,Benchmarking of ARtificial intelligence Research,0.805160913,BARRA:CuRDa,0.988691002,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/14/2021 +34615485,http://www.bayberrybase.cn,"The bayberry database: a multiomic database for Myrica rubra, an important fruit tree with medicinal value. Background Chinese bayberry (Myrica rubra Sieb. & Zucc.) is an important fruit tree in China, and has high medicinal value. At present, the genome, transcriptome and germplasm resources of bayberry have been reported. In order to make more convenient use of these data, the Bayberry Database was established. Results The Bayberry Database is a comprehensive and intuitive data platform for examining the diverse annotated genome and germplasm resources of this species. This database contains nine central functional domains to interact with multiomic data: home, genome, germplasm, markers, tools, map, expression, reference, and contact. All domains provide pathways to a variety of data types composed of a reference genome sequence, transcriptomic data, gene patterns, phenotypic data, fruit images of Myrica rubra varieties, gSSR data, gene maps with annotation and evolutionary analyses. The tools module includes BLAST search, keyword search, sequence fetch and enrichment analysis functions. Conclusions The web address of the database is as follows http://www.bayberrybase.cn/ . The Myrica rubra database is an intelligent, interactive, and user-friendly system that enables researchers, breeders and horticultural personnel to browse, search and retrieve relevant and useful information and thus facilitate genomic research and breeding efforts concerning Myrica rubra. This database will be of great help to bayberry research and breeding in the future.",Bayberry,0.680092216,NA,0,Bayberry,0.680092216,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/6/2021 +31665503,http://bbcancer.renlab.org,"BBCancer: an expression atlas of blood-based biomarkers in the early diagnosis of cancers. The early detection of cancer holds the key to combat and control the increasing global burden of cancer morbidity and mortality. Blood-based screenings using circulating DNAs (ctDNAs), circulating RNA (ctRNAs), circulating tumor cells (CTCs) and extracellular vesicles (EVs) have shown promising prospects in the early detection of cancer. Recent high-throughput gene expression profiling of blood samples from cancer patients has provided a valuable resource for developing new biomarkers for the early detection of cancer. However, a well-organized online repository for these blood-based high-throughput gene expression data is still not available. Here, we present BBCancer (http://bbcancer.renlab.org/), a web-accessible and comprehensive open resource for providing the expression landscape of six types of RNAs, including messenger RNAs (mRNAs), long noncoding RNAs (lncRNAs), microRNAs (miRNAs), circular RNAs (circRNAs), tRNA-derived fragments (tRFRNAs) and Piwi-interacting RNAs (piRNAs) in blood samples, including plasma, CTCs and EVs, from cancer patients with various cancer types. Currently, BBCancer contains expression data of the six RNA types from 5040√ɬÉ√ǬÇ√ɬÇ√Ǭ†normal and tumor blood samples across 15 cancer types. We believe this database will serve as a powerful platform for developing blood biomarkers.",BBCancer,0.997065306,NA,0,BBCancer,0.997065306,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24250117,http://bioinformatics.towson.edu/BBGD454,"BBGD454: A database for transcriptome analysis of blueberry using 454 sequences. Unlabelled Blueberry is an economically and nutritionally important small fruit crop, native to North America. As with many crops, extreme low temperature can affect blueberry crop yield negatively and cause major losses to growers. For this reason, blueberry breeding programs have focused on developing improved cultivars with broader climatic adaptation. To help achieve this goal, the blueberry genomic database (BBGD454) was developed to provide the research community with valuable resources to identify genes that play an important role in flower bud and fruit development, cold acclimation and chilling accumulation in blueberry. The database was developed using SQLServer2008 to house 454 transcript sequences, annotations and gene expression profiles of blueberry genes. BBGD454 can be accessed publically from a web-based interface; this website provides search and browse functionalities to allow scientists to access and search the data in order to correlate gene expression with gene function in different stages of blueberry fruit ripening, at different stages of cold acclimation of flower buds, and in leaves. Availability It can be accessed from http://bioinformatics.towson.edu/BBGD454/",BBGD454,0.995709896,blueberry genomic database,0.90938741,BBGD454,0.995709896,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/16/2013 +24077841,http://bbgre.org,"BBGRE: brain and body genetic resource exchange. Studies of copy number variation (genomic imbalance) are providing insight into both complex and Mendelian genetic disorders. Array comparative genomic hybridization (array CGH), a tool for detecting copy number variants at a resolution previously unattainable in clinical diagnostics, is increasingly used as a first-line test at clinical genetics laboratories. Many copy number variants are of unknown significance; correlation and comparison with other patients will therefore be essential for interpretation. We present a resource for clinicians and researchers to identify specific copy number variants and associated phenotypes in patients from a single catchment area, tested using array CGH at the SE Thames Regional Genetics Centre, London. User-friendly searching is available, with links to external resources, providing a powerful tool for the elucidation of gene function. We hope to promote research by facilitating interactions between researchers and patients. The BBGRE (Brain and Body Genetic Resource Exchange) resource can be accessed at the following website: http://bbgre.org DATABASE URL: http://bbgre.org.",BBGRE,0.97111398,Brain and Body Genetic Resource Exchange,0.924002247,BBGRE,0.97111398,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/27/2013 +29297289,http://arc-gem.uams.edu/dbbqs,"dBBQs: dataBase of Bacterial Quality scores. BACKGROUND:It is well-known that genome sequencing technologies are becoming significantly cheaper and faster. As a result of this, the exponential growth in sequencing data in public databases allows us to explore ever growing large collections of genome sequences. However, it is less known that the majority of available sequenced genome sequences in public databases are not complete, drafts of varying qualities. We have calculated quality scores for around 100,000 bacterial genomes from all major genome repositories and put them in a fast and easy-to-use database. RESULTS:Prokaryotic genomic data from all sources were collected and combined to make a non-redundant set of bacterial genomes. The genome quality score for each was calculated by four different measurements: assembly quality, number of rRNA and tRNA genes, and the occurrence of conserved functional domains. The dataBase of Bacterial Quality scores (dBBQs) was designed to store and retrieve quality scores. It offers fast searching and download features which the result can be used for further analysis. In addition, the search results are shown in interactive JavaScript chart framework using DC.js. The analysis of quality scores across major public genome databases find that around 68% of the genomes are of acceptable quality for many uses. CONCLUSIONS:dBBQs (available at http://arc-gem.uams.edu/dbbqs ) provides genome quality scores for all available prokaryotic genome sequences with a user-friendly Web-interface. These scores can be used as cut-offs to get a high-quality set of genomes for testing bioinformatics tools or improving the analysis. Moreover, all data of the four measurements that were combined to make the quality score for each genome, which can potentially be used for further analysis. dBBQs will be updated regularly and is freely use for non-commercial purpose.",BBQs,0.971838474,scores,0.604294538,BBQs,0.971838474,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,12/28/2017 +33599248,http://bcgenex.ico.unicancer.fr,"bc-GenExMiner 4.5: new mining module computes breast cancer differential gene expression analyses. . 'Breast cancer gene-expression miner' (bc-GenExMiner) is a breast cancer-associated web portal (http://bcgenex.ico.unicancer.fr). Here, we describe the development of a new statistical mining module, which permits several differential gene expression analyses, i.e. 'Expression' module. Sixty-two breast cancer cohorts and one healthy breast cohort with their corresponding clinicopathological information are included in bc-GenExMiner v4.5 version. Analyses are based on microarray or RNAseq transcriptomic data. Thirty-nine differential gene expression analyses, grouped into 13 categories, according to clinicopathological and molecular characteristics ('Targeted' and 'Exhaustive') and gene expression ('Customized'), have been developed. Output results are visualized in four forms of plots. This new statistical mining module offers, among other things, the possibility to compare gene expression in healthy (cancer-free), tumour-adjacent and tumour tissues at once and in three triple-negative breast cancer subtypes (i.e. C1: molecular apocrine tumours; C2: basal-like tumours infiltrated by immune suppressive cells and C3: basal-like tumours triggering an ineffective immune response). Several validation tests showed that bioinformatics process did not alter the pathobiological information contained in the source data. In this work, we developed and demonstrated that bc-GenExMiner 'Expression' module can be used for exploratory and validation purposes. Database URL: http://bcgenex.ico.unicancer.fr.",bc-GenExMiner,0.983812017,Breast cancer gene-expression miner,0.969961941,bc-GenExMiner,0.983812017,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2021 +33882119,http://www.dqweilab-sjtu.com/index.php,"BC-TFdb: a database of transcription factor drivers in breast cancer. . Transcription factors (TFs) are DNA-binding proteins, which regulate many essential biological functions. In several cancer types, TF function is altered by various direct mechanisms, including gene amplification or deletion, point mutations, chromosomal translocations, expression alterations, as well as indirectly by non-coding DNA mutations influencing the binding of the TF. TFs are also actively involved in breast cancer (BC) initiation and progression. Herein, we have developed an open-access database, BC-TFdb (Breast Cancer Transcription Factors database), of curated, non-redundant TF involved in BC. The database provides BC driver TFs related information including genomic sequences, proteomic sequences, structural data, pathway information, mutations information, DNA binding residues, survival and therapeutic resources. The database will be a useful platform for researchers to obtain BC-related TF-specific information. High-quality datasets are downloadable for users to evaluate and develop computational methods for drug designing against BC. Database URL: https://www.dqweilab-sjtu.com/index.php.",BC-TFdb,0.990113586,Breast Cancer Transcription Factors database,0.960293174,BC-TFdb,0.990113586,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2021 +27161011,http://www.biocreative.org/tasks/biocreative-v/track-3-cdr,"BioCreative V CDR task corpus: a resource for chemical disease relation extraction. . Community-run, formal evaluations and manually annotated text corpora are critically important for advancing biomedical text-mining research. Recently in BioCreative V, a new challenge was organized for the tasks of disease named entity recognition (DNER) and chemical-induced disease (CID) relation extraction. Given the nature of both tasks, a test collection is required to contain both disease/chemical annotations and relation annotations in the same set of articles. Despite previous efforts in biomedical corpus construction, none was found to be sufficient for the task. Thus, we developed our own corpus called BC5CDR during the challenge by inviting a team of Medical Subject Headings (MeSH) indexers for disease/chemical entity annotation and Comparative Toxicogenomics Database (CTD) curators for CID relation annotation. To ensure high annotation quality and productivity, detailed annotation guidelines and automatic annotation tools were provided. The resulting BC5CDR corpus consists of 1500 PubMed articles with 4409 annotated chemicals, 5818 diseases and 3116 chemical-disease interactions. Each entity annotation includes both the mention text spans and normalized concept identifiers, using MeSH as the controlled vocabulary. To ensure accuracy, the entities were first captured independently by two annotators followed by a consensus annotation: The average inter-annotator agreement (IAA) scores were 87.49% and 96.05% for the disease and chemicals, respectively, in the test set according to the Jaccard similarity coefficient. Our corpus was successfully used for the BioCreative V challenge tasks and should serve as a valuable resource for the text-mining research community.Database URL: http://www.biocreative.org/tasks/biocreative-v/track-3-cdr/.",BC5CDR,0.981737942,NA,0,BC5CDR,0.981737942,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/9/2016 +27376128,"http://www.bccluster.org/, http://rubyonrails.org","BcCluster: A Bladder Cancer Database at the Molecular Level. Background Bladder Cancer (BC) has two clearly distinct phenotypes. Non-muscle invasive BC has good prognosis and is treated with tumor resection and intravesical therapy whereas muscle invasive BC has poor prognosis and requires usually systemic cisplatin based chemotherapy either prior to or after radical cystectomy. Neoadjuvant chemotherapy is not often used for patients undergoing cystectomy. High-throughput analytical omics techniques are now available that allow the identification of individual molecular signatures to characterize the invasive phenotype. However, a large amount of data produced by omics experiments is not easily accessible since it is often scattered over many publications or stored in supplementary files. Objective To develop a novel open-source database, BcCluster (http://www.bccluster.org/), dedicated to the comprehensive molecular characterization of muscle invasive bladder carcinoma. Materials A database was created containing all reported molecular features significant in invasive BC. The query interface was developed in Ruby programming language (version 1.9.3) using the web-framework Rails (version 4.1.5) (http://rubyonrails.org/). Results BcCluster contains the data from 112 published references, providing 1,559 statistically significant features relative to BC invasion. The database also holds 435 protein-protein interaction data and 92 molecular pathways significant in BC invasion. The database can be used to retrieve binding partners and pathways for any protein of interest. We illustrate this possibility using survivin, a known BC biomarker. Conclusions BcCluster is an online database for retrieving molecular signatures relative to BC invasion. This application offers a comprehensive view of BC invasiveness at the molecular level and allows formulation of research hypotheses relevant to this phenotype.",BcCluster,0.995449245,NA,0,BcCluster,0.995449245,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/7/2016 +25332396,http://bioinformatics.breastcancertissue,"BCCTBbp: the Breast Cancer Campaign Tissue Bank bioinformatics portal. BCCTBbp (http://bioinformatics.breastcancertissue bank.org) was initially developed as the data-mining portal of the Breast Cancer Campaign Tissue Bank (BCCTB), a vital resource of breast cancer tissue for researchers to support and promote cutting-edge research. BCCTBbp is dedicated to maximising research on patient tissues by initially storing genomics, methylomics, transcriptomics, proteomics and microRNA data that has been mined from the literature and linking to pathways and mechanisms involved in breast cancer. Currently, the portal holds 146 datasets comprising over 227,795 expression/genomic measurements from various breast tissues (e.g. normal, malignant or benign lesions), cell lines and body fluids. BCCTBbp can be used to build on breast cancer knowledge and maximise the value of existing research. By recording a large number of annotations on samples and studies, and linking to other databases, such as NCBI, Ensembl and Reactome, a wide variety of different investigations can be carried out. Additionally, BCCTBbp has a dedicated analytical layer allowing researchers to further analyse stored datasets. A future important role for BCCTBbp is to make available all data generated on BCCTB tissues thus building a valuable resource of information on the tissues in BCCTB that will save repetition of experiments and expand scientific knowledge.",BCCTBbp,0.998296082,Breast Cancer Campaign Tissue Bank,0.761125972,BCCTBbp,0.998296082,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/20/2014 +32786900,http://mmb.irbbarcelona.org/BCE,"Bioactive Conformational Ensemble Server and Database. A Public Framework to Speed Up In Silico Drug Discovery. Modern high-throughput structure-based drug discovery algorithms consider ligand flexibility, but typically with low accuracy, which results in a loss of performance in the derived models. Here we present the bioactive conformational ensemble (BCE) server and its associated database. The server creates conformational ensembles of drug-like ligands and stores them in the BCE database, where a variety of analyses are offered to the user. The workflow implemented in the BCE server combines enhanced sampling molecular dynamics with self-consistent reaction field quantum mechanics (SCRF/QM) calculations. The server automatizes all of the steps to transform one-dimensional (1D) or 2D representation of drugs into 3D molecules, which are then titrated, parametrized, hydrated, and optimized before being subjected to Hamiltonian replica-exchange (HREX) molecular dynamics simulations. Ensembles are collected and subjected to a clustering procedure to derive representative conformers, which are then analyzed at the SCRF/QM level of theory. All structural data are organized in a noSQL database accessible through a graphical interface and in a programmatic manner through a REST API. The server allows the user to define a private workspace and offers a deposition protocol as well as input files for ""in house"" calculations in those cases where confidentiality is a must. The database and the associated server are available at https://mmb.irbbarcelona.org/BCE.",BCE,0.897705595,bioactive conformational,0.562582284,BCE,0.897705595,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2020 +34144671,http://soft.bioinfo-minzhao.org/bcgene,"Online database for brain cancer-implicated genes: exploring the subtype-specific mechanisms of brain cancer. Background Brain cancer is one of the eight most common cancers occurring in people aged 40+ and is the fifth-leading cause of cancer-related deaths for males aged 40-59. Accurate subtype identification is crucial for precise therapeutic treatment, which largely depends on understanding the biological pathways and regulatory mechanisms associated with different brain cancer subtypes. Unfortunately, the subtype-implicated genes that have been identified are scattered in thousands of published studies. So, systematic literature curation and cross-validation could provide a solid base for comparative genetic studies about major subtypes. Results Here, we constructed a literature-based brain cancer gene database (BCGene). In the current release, we have a collection of 1421 unique human genes gathered through an extensive manual examination of over 6000 PubMed abstracts. We comprehensively annotated those curated genes to facilitate biological pathway identification, cancer genomic comparison, and differential expression analysis in various anatomical brain regions. By curating cancer subtypes from the literature, our database provides a basis for exploring the common and unique genetic mechanisms among 40 brain cancer subtypes. By further prioritizing the relative importance of those curated genes in the development of brain cancer, we identified 33 top-ranked genes with evidence mentioned only once in the literature, which were significantly associated with survival rates in a combined dataset of 2997 brain cancer cases. Conclusion BCGene provides a useful tool for exploring the genetic mechanisms of and gene priorities in brain cancer. BCGene is freely available to academic users at http://soft.bioinfo-minzhao.org/bcgene/ .",BCGene,0.99187088,brain cancer gene database,0.777056694,BCGene,0.99187088,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/18/2021 +28327601,http://omics.bmi.ac.cn/bcancer,"BCIP: a gene-centered platform for identifying potential regulatory genes in breast cancer. Breast cancer is a disease with high heterogeneity. Many issues on tumorigenesis and progression are still elusive. It is critical to identify genes that play important roles in the progression of tumors, especially for tumors with poor prognosis such as basal-like breast cancer and tumors in very young women. To facilitate the identification of potential regulatory or driver genes, we present the Breast Cancer Integrative Platform (BCIP, http://omics.bmi.ac.cn/bcancer/). BCIP maintains multi-omics data selected with strict quality control and processed with uniform normalization methods, including gene expression profiles from 9,005 tumor and 376 normal tissue samples, copy number variation information from 3,035 tumor samples, microRNA-target interactions, co-expressed genes, KEGG pathways, and mammary tissue-specific gene functional networks. This platform provides a user-friendly interface integrating comprehensive and flexible analysis tools on differential gene expression, copy number variation, and survival analysis. The prominent characteristic of BCIP is that users can perform analysis by customizing subgroups with single or combined clinical features, including subtypes, histological grades, pathologic stages, metastasis status, lymph node status, ER/PR/HER2 status, TP53 mutation status, menopause status, age, tumor size, therapy responses, and prognosis. BCIP will help to identify regulatory or driver genes and candidate biomarkers for further research in breast cancer.",BCIP,0.996045272,Breast Cancer Integrative Platform,0.955718553,BCIP,0.996045272,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/22/2017 +24608034,http://bcl2db.ibcp.fr,"BCL2DB: database of BCL-2 family members and BH3-only proteins. BCL2DB (http://bcl2db.ibcp.fr) is a database designed to integrate data on BCL-2 family members and BH3-only proteins. These proteins control the mitochondrial apoptotic pathway and probably many other cellular processes as well. This large protein group is formed by a family of pro-apoptotic and anti-apoptotic homologs that have phylogenetic relationships with BCL-2, and by a collection of evolutionarily and structurally unrelated proteins characterized by the presence of a region of local sequence similarity with BCL-2, termed the BH3 motif. BCL2DB is monthly built, thanks to an automated procedure relying on a set of homemade profile HMMs computed from seed reference sequences representative of the various BCL-2 homologs and BH3-only proteins. The BCL2DB entries integrate data from the Ensembl, Ensembl Genomes, European Nucleotide Archive and Protein Data Bank databases and are enriched with specific information like protein classification into orthology groups and distribution of BH motifs along the sequences. The Web interface allows for easy browsing of the site and fast access to data, as well as sequence analysis with generic and specific tools. BCL2DB provides a helpful and powerful tool to both 'BCL-2-ologists' and researchers working in the various fields of physiopathology. Database URL: http://bcl2db.ibcp.fr.",BCL2DB,0.997701004,NA,0,BCL2DB,0.997701004,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/6/2014 +26503249,http://immunet.cn/bdb,"BDB: biopanning data bank. The BDB database (http://immunet.cn/bdb) is an update of the MimoDB database, which was previously described in the 2012 Nucleic Acids Research Database issue. The rebranded name BDB is short for Biopanning Data Bank, which aims to be a portal for biopanning results of the combinatorial peptide library. Last updated in July 2015, BDB contains 2904 sets of biopanning data collected from 1322 peer-reviewed papers. It contains 25,786 peptide sequences, 1704 targets, 492 known templates, 447 peptide libraries and 310 crystal structures of target-template or target-peptide complexes. All data stored in BDB were revisited, and information on peptide affinity, measurement method and procedures was added for 2298 peptides from 411 sets of biopanning data from 246 published papers. In addition, a more professional and user-friendly web interface was implemented, a more detailed help system was designed, and a new on-the-fly data visualization tool and a series of tools for data analysis were integrated. With these new data and tools made available, we expect that the BDB database would become a major resource for scholars using phage display, with improved utility for biopanning and related scientific communities.",BDB,0.991443157,Biopanning Data Bank,0.795208553,BDB,0.991443157,1,NA,25352545,NA,NA,NA,do not merge,NA,NA,NA,10/25/2015 +25352545,http://swift.cmbi.ru.nl/gv/facilities,"A series of PDB-related databanks for everyday needs. We present a series of databanks (http://swift.cmbi.ru.nl/gv/facilities/) that hold information that is computationally derived from Protein Data Bank (PDB) entries and that might augment macromolecular structure studies. These derived databanks run parallel to the PDB, i.e. they have one entry per PDB entry. Several of the well-established databanks such as HSSP, PDBREPORT and PDB_REDO have been updated and/or improved. The software that creates the DSSP databank, for example, has been rewritten to better cope with √ɬÉ√Ǭè√ɬÇ√ǬÄ-helices. A large number of databanks have been added to aid computational structural biology; some examples are lists of residues that make crystal contacts, lists of contacting residues using a series of contact definitions or lists of residue accessibilities. PDB files are not the optimal presentation of the underlying data for many studies. We therefore made a series of databanks that hold PDB files in an easier to use or more consistent representation. The BDB databank holds X-ray PDB files with consistently represented B-factors. We also added several visualization tools to aid the users of our databanks.",BDB,0.792936504,NA,0,BDB,0.792936504,1,NA,26503249,low_prob_best_name,do not remove,NA,do not merge,NA,NA,NA,10/28/2014 +34736471,http://t21omics.cngb.org,"BDdb: a comprehensive platform for exploration and utilization of birth defect multi-omics data. Background Birth defects pose a major challenge to infant health. Thus far, however, the causes of most birth defects remain cryptic. Over the past few decades, considerable effort has been expended on disclosing the underlying mechanisms related to birth defects, yielding myriad treatises and data. To meet the increasing requirements for data resources, we developed a freely accessible birth defect multi-omics database (BDdb, http://t21omics.cngb.org ) consisting of multi-omics data and potential disease biomarkers. Results In total, omics datasets from 136 Gene Expression Omnibus (GEO) Series records, including 5245 samples, as well as 869 biomarkers of 22 birth defects in six different species, were integrated into the BDdb. The database provides a user-friendly interface for searching, browsing, and downloading data of interest. The BDdb also enables users to explore the correlations among different sequencing methods, such as chromatin immunoprecipitation sequencing (ChIP-Seq) and RNA sequencing (RNA-Seq) from different studies, to obtain the information on gene expression patterns from diverse aspects. Conclusion To the best of our knowledge, the BDdb is the first comprehensive database associated with birth defects, which should benefit the diagnosis and prevention of birth defects.",BDdb,0.995132804,birth defect multi-omics database,0.690108945,BDdb,0.995132804,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/4/2021 +23764453,http://bdgene.psych.ac.cn,"BDgene: a genetic database for bipolar disorder and its overlap with schizophrenia and major depressive disorder. Background Bipolar disorder (BD) is a common psychiatric disorder with complex genetic architecture. It shares overlapping genetic influences with schizophrenia (SZ) and major depressive disorder (MDD). Large numbers of genetic studies of BD and cross-disorder studies between BD and SZ/MDD have accumulated numerous genetic data. There is a growing need to integrate the data to provide a comprehensive data set to facilitate the genetic study of BD and its highly relevant diseases. Methods BDgene database was developed to integrate BD-related genetic factors and shared ones with SZ/MDD from profound literature reading. On the basis of data from the literature, in-depth analyses were performed for further understanding of the data, including gene prioritization, pathway-based analysis, intersection analysis of multidisease candidate genes, and pathway enrichment analysis. Results BDgene includes multiple types of literature-reported genetic factors of BD with both positive and negative results, including 797 genes, 3119 single nucleotide polymorphisms, and 789 regions. Shared genetic factors such as single nucleotide polymorphisms, genes, and regions from published cross-disorder studies among BD and SZ/MDD were also presented. In-depth data analyses identified 43 BD core genes; 70 BD candidate pathways; and 127, 79, and 107 new potential cross-disorder genes for BD-SZ, BD-MDD, and BD-SZ-MDD, respectively. Conclusions As a central genetic database for BD and the first cross-disorder database for BD and SZ/MDD, BDgene provides not only a comprehensive review of current genetic research but also high-confidence candidate genes and pathways for understanding of BD mechanism and shared etiology among its relevant diseases. BDgene is freely available at http://bdgene.psych.ac.cn.",BDgene,0.972416461,NA,0,BDgene,0.972416461,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/10/2013 +34081565,http://bike-bee.snu.ac.kr,"Biomedical Entity Explorer: A Web Server for Biomedical Entity Exploration. Biomedical Entity Explorer (BEE) is a web server that can search for biomedical entities from a database of six biomedical entity types (gene, miRNA, drug, disease, single nucleotide polymorphism [SNP], pathway) and their gene associations. The search results can be explored using intersections, unions, and negations. BEE has integrated biomedical entities from 16 databases (Ensemble, PharmGKB, Genetic Home Reference, Tarbase, Mirbase, NCI Thesaurus, DisGeNET, Linked life data, UMLS, GSEA MsigDB, Reactome, KEGG, Gene Ontology, HGVD, SNPedia, and dbSNP) based on their gene associations and built a database with their synonyms, descriptions, and links containing individual details. Users can enter the keyword of one or more entities and select the type of entity for which they want to know the relationship for and by using set operations such as union, negation, and intersection, they can navigate the search results more clearly. We believe that BEE will not only be useful for biologists querying for complex associations between entities, but can also be a good starting point for general users searching for biomedical entities. BEE is accessible at (http://bike-bee.snu.ac.kr).",BEE,0.989852111,Biomedical Entity Explorer,0.810965747,BEE,0.989852111,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/2/2021 +22238270,http://besckb.ornl.gov,"BESC knowledgebase public portal. Unlabelled The BioEnergy Science Center (BESC) is undertaking large experimental campaigns to understand the biosynthesis and biodegradation of biomass and to develop biofuel solutions. BESC is generating large volumes of diverse data, including genome sequences, omics data and assay results. The purpose of the BESC Knowledgebase is to serve as a centralized repository for experimentally generated data and to provide an integrated, interactive and user-friendly analysis framework. The Portal makes available tools for visualization, integration and analysis of data either produced by BESC or obtained from external resources. Availability http://besckb.ornl.gov.",BESC,0.902424991,NA,0,BESC,0.902424991,1,NA,22465851,low_prob_best_name,do not remove,NA,conflicting record(s) to be removed,NA,NA,NA,1/11/2012 +22465851,"http://cricket.ornl.gov/cgi-bin/beocyc_home.cgi, http://cricket.ornl.gov:1555/PTR/new-image?object=SUGAR-NUCLEOTIDES","Enhancing a Pathway-Genome Database (PGDB) to capture subcellular localization of metabolites and enzymes: the nucleotide-sugar biosynthetic pathways of Populus trichocarpa. Understanding how cellular metabolism works and is regulated requires that the underlying biochemical pathways be adequately represented and integrated with large metabolomic data sets to establish a robust network model. Genetically engineering energy crops to be less recalcitrant to saccharification requires detailed knowledge of plant polysaccharide structures and a thorough understanding of the metabolic pathways involved in forming and regulating cell-wall synthesis. Nucleotide-sugars are building blocks for synthesis of cell wall polysaccharides. The biosynthesis of nucleotide-sugars is catalyzed by a multitude of enzymes that reside in different subcellular organelles, and precise representation of these pathways requires accurate capture of this biological compartmentalization. The lack of simple localization cues in genomic sequence data and annotations however leads to missing compartmentalization information for eukaryotes in automatically generated databases, such as the Pathway-Genome Databases (PGDBs) of the SRI Pathway Tools software that drives much biochemical knowledge representation on the internet. In this report, we provide an informal mechanism using the existing Pathway Tools framework to integrate protein and metabolite sub-cellular localization data with the existing representation of the nucleotide-sugar metabolic pathways in a prototype PGDB for Populus trichocarpa. The enhanced pathway representations have been successfully used to map SNP abundance data to individual nucleotide-sugar biosynthetic genes in the PGDB. The manually curated pathway representations are more conducive to the construction of a computational platform that will allow the simulation of natural and engineered nucleotide-sugar precursor fluxes into specific recalcitrant polysaccharide(s). Database URL: The curated Populus PGDB is available in the BESC public portal at http://cricket.ornl.gov/cgi-bin/beocyc_home.cgi and the nucleotide-sugar biosynthetic pathways can be directly accessed at http://cricket.ornl.gov:1555/PTR/new-image?object=SUGAR-NUCLEOTIDES.",BESC,0.838247061,Pathway-Genome Database,0.765071064,BESC,0.838247061,1,NA,22238270,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,3/31/2012 +32655358,http://bci.med.tsinghua.edu.cn/download.html,"BETA: A Large Benchmark Database Toward SSVEP-BCI Application. The brain-computer interface (BCI) provides an alternative means to communicate and it has sparked growing interest in the past two decades. Specifically, for Steady-State Visual Evoked Potential (SSVEP) based BCI, marked improvement has been made in the frequency recognition method and data sharing. However, the number of pubic databases is still limited in this field. Therefore, we present a BEnchmark database Towards BCI Application (BETA) in the study. The BETA database is composed of 64-channel Electroencephalogram (EEG) data of 70 subjects performing a 40-target cued-spelling task. The design and the acquisition of the BETA are in pursuit of meeting the demand from real-world applications and it can be used as a test-bed for these scenarios. We validate the database by a series of analyses and conduct the classification analysis of eleven frequency recognition methods on BETA. We recommend using the metric of wide-band signal-to-noise ratio (SNR) and BCI quotient to characterize the SSVEP at the single-trial and population levels, respectively. The BETA database can be downloaded from the following link http://bci.med.tsinghua.edu.cn/download.html.",BETA,0.910390854,BCI,0.519562721,BETA,0.910390854,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/23/2020 +24399916,http://barleyflc.dna.affrc.go.jp/bexdb/index.html,"bex-db: Bioinformatics workbench for comprehensive analysis of barley-expressed genes. Barley (Hordeum vulgare) is one of the world's most important cereal crops. Although its large and complex genome has held back barley genomics for quite a while, the whole genome sequence was released in 2012 by the International Barley Genome Sequencing Consortium (IBSC). Moreover, more than 30,000 barley full-length cDNAs (FLcDNAs) are now available in the public domain. Here we present the Barley Gene Expression Database (bex-db: http://barleyflc.dna.affrc.go.jp/bexdb/index.html) as a repository of transcriptome data including the sequences and the expression profiles of barley genes resulting from microarray analysis. In addition to FLcDNA sequences, bex-db also contains partial sequences of more than 309,000 novel expressed sequence tags (ESTs). Users can browse the data via keyword, sequence homology and expression profile search options. A genome browser was also developed to display the chromosomal locations of barley FLcDNAs and wheat (Triticum aestivum) transcripts as well as Aegilops tauschii gene models on the IBSC genome sequence for future comparative analysis of orthologs among Triticeae species. The bex-db should provide a useful resource for further genomics studies and development of genome-based tools to enhance the progress of the genetic improvement of cereal crops.",bex-db,0.995140064,Barley Gene Expression Database,0.958310401,bex-db,0.995140064,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2013 +22250003,http://bfgr.plantbiology.msu.edu,"The Biofuel Feedstock Genomics Resource: a web-based portal and database to enable functional genomics of plant biofuel feedstock species. Major feedstock sources for future biofuel production are likely to be high biomass producing plant species such as poplar, pine, switchgrass, sorghum and maize. One active area of research in these species is genome-enabled improvement of lignocellulosic biofuel feedstock quality and yield. To facilitate genomic-based investigations in these species, we developed the Biofuel Feedstock Genomic Resource (BFGR), a database and web-portal that provides high-quality, uniform and integrated functional annotation of gene and transcript assembly sequences from species of interest to lignocellulosic biofuel feedstock researchers. The BFGR includes sequence data from 54 species and permits researchers to view, analyze and obtain annotation at the gene, transcript, protein and genome level. Annotation of biochemical pathways permits the identification of key genes and transcripts central to the improvement of lignocellulosic properties in these species. The integrated nature of the BFGR in terms of annotation methods, orthologous/paralogous relationships and linkage to seven species with complete genome sequences allows comparative analyses for biofuel feedstock species with limited sequence resources. Database URL: http://bfgr.plantbiology.msu.edu.",BFGR,0.983292729,Biofuel Feedstock Genomics Resource,0.972530476,BFGR,0.983292729,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/15/2012 +22125386,http://www.bfluenza.info,"BFluenza: A Proteomic Database on Bird Flu. Unlabelled Influenza A virus subtype H5N1, also known as ""bird flu"" has been documented to cause an outbreak of respiratory diseases in humans. The unprecedented spread of highly pathogenic avian influenza type A is a threat to veterinary and human health. The BFluenza is a relational database which is solely devoted to proteomic information of H5N1 subtype. Bfluenza has novel features including computed physico-chemical properties data of H5N1 viral proteins, modeled structures of viral proteins, data of protein coordinates, experimental details, molecular description and bibliographic reference. The database also contains nucleotide and their decoded protein sequences data. The database can be searched in various modes by setting search options. The structure of viral protein could be visualized by JMol viewer or by Discovery Studio. Availability The database is available for free at http://www.bfluenza.info.",BFluenza,0.856544018,NA,0,BFluenza,0.856544018,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/28/2011 +24570022,http://www.bgbx.com.br,"Brazilian genetic database of chromosome X. The X chromosome is a singular source of information in population genetics, anthropological research and in forensic cases. Thus, many researchers have been interested in characterizing X chromosome markers in different populations. The Brazilian Genetic Database of Chromosome X (BGBX--Banco Gen√ɬÉ√ǬÉ√ɬÇ√Ǭ©tico Brasileiro do Cromossomo X) website is freely available in Portuguese and English versions and was developed with the main purpose of compiling all Brazilian population genetic data for X chromosome short tandem repeats (X-STRs) markers published in scientific journals searchable via PubMed. Furthermore, this database presents other relevant information concerning X-STRs, such as genetic and physical locations, allele structure, nomenclature, mutation rates, primers described in the literature and likelihood ratio calculation. The entire scientific community is now encouraged to submit their X-STR population genetic data to this website, available at http://www.bgbx.com.br. Regarding future prospects of BGBX, the authors intend to expand the website with data and information of X-linked insertion-deletion polymorphisms.",BGBX,0.957761586,Brazilian Genetic Database of Chromosome X,0.940487146,BGBX,0.957761586,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/26/2014 +26110276,http://donglab.ecnu.edu.cn/databases/BatGenome,"BGD: a database of bat genomes. Bats account for ~20% of mammalian species, and are the only mammals with true powered flight. For the sake of their specialized phenotypic traits, many researches have been devoted to examine the evolution of bats. Until now, some whole genome sequences of bats have been assembled and annotated, however, a uniform resource for the annotated bat genomes is still unavailable. To make the extensive data associated with the bat genomes accessible to the general biological communities, we established a Bat Genome Database (BGD). BGD is an open-access, web-available portal that integrates available data of bat genomes and genes. It hosts data from six bat species, including two megabats and four microbats. Users can query the gene annotations using efficient searching engine, and it offers browsable tracks of bat genomes. Furthermore, an easy-to-use phylogenetic analysis tool was also provided to facilitate online phylogeny study of genes. To the best of our knowledge, BGD is the first database of bat genomes. It will extend our understanding of the bat evolution and be advantageous to the bat sequences analysis. BGD is freely available at: http://donglab.ecnu.edu.cn/databases/BatGenome/.",BGD,0.959574401,Genome Database,0.728102009,BGD,0.959574401,1,NA,"26481361.0, 31647100.0",low_prob_best_name,do not remove,NA,do not merge,NA,NA,NA,6/25/2015 +26481361,http://BovineGenome.org,"Bovine Genome Database: new tools for gleaning function from the Bos taurus genome. We report an update of the Bovine Genome Database (BGD) (http://BovineGenome.org). The goal of BGD is to support bovine genomics research by providing genome annotation and data mining tools. We have developed new genome and annotation browsers using JBrowse and WebApollo for two Bos taurus genome assemblies, the reference genome assembly (UMD3.1.1) and the alternate genome assembly (Btau_4.6.1). Annotation tools have been customized to highlight priority genes for annotation, and to aid annotators in selecting gene evidence tracks from 91 tissue specific RNAseq datasets. We have also developed BovineMine, based on the InterMine data warehousing system, to integrate the bovine genome, annotation, QTL, SNP and expression data with external sources of orthology, gene ontology, gene interaction and pathway information. BovineMine provides powerful query building tools, as well as customized query templates, and allows users to analyze and download genome-wide datasets. With BovineMine, bovine researchers can use orthology to leverage the curated gene pathways of model organisms, such as human, mouse and rat. BovineMine will be especially useful for gene ontology and pathway analyses in conjunction with GWAS and QTL studies.",BGD,0.996976018,Bovine Genome Database,0.979248871,BGD,0.996976018,1,NA,"26110276.0, 31647100.0",NA,NA,NA,merge only:,NA,NA,"26481361.0, 31647100.0",10/19/2015 +31647100,http://bovinegenome.org,"Bovine Genome Database: new annotation tools for a new reference genome. The Bovine Genome Database (BGD) (http://bovinegenome.org) has been the key community bovine genomics database for more than a decade. To accommodate the increasing amount and complexity of bovine genomics data, BGD continues to advance its practices in data acquisition, curation, integration and efficient data retrieval. BGD provides tools for genome browsing (JBrowse), genome annotation (Apollo), data mining (BovineMine) and sequence database searching (BLAST). To augment the BGD genome annotation capabilities, we have developed a new Apollo plug-in, called the Locus-Specific Alternate Assembly (LSAA) tool, which enables users to identify and report potential genome assembly errors and structural variants. BGD now hosts both the newest bovine reference genome assembly, ARS-UCD1.2, as well as the previous reference genome, UMD3.1.1, with cross-genome navigation and queries supported in JBrowse and BovineMine, respectively. Other notable enhancements to BovineMine include the incorporation of genomes and gene annotation datasets for non-bovine ruminant species (goat and sheep), support for multiple assemblies per organism in the Regions Search tool, integration of additional ontologies and development of many new template queries. To better serve the research community, we continue to focus on improving existing tools, developing new tools, adding new datasets and encouraging researchers to use these resources.",BGD,0.989425619,Bovine Genome Database,0.980239809,BGD,0.989425619,1,NA,"26110276.0, 26481361.0",NA,NA,NA,merge only:,NA,NA,"26481361.0, 31647100.0",1/1/2020 +23894186,http://dailab.sysu.edu.cn/bgdb,"BGDB: a database of bivalent genes. Bivalent gene is a gene marked with both H3K4me3 and H3K27me3 epigenetic modification in the same area, and is proposed to play a pivotal role related to pluripotency in embryonic stem (ES) cells. Identification of these bivalent genes and understanding their functions are important for further research of lineage specification and embryo development. So far, lots of genome-wide histone modification data were generated in mouse and human ES cells. These valuable data make it possible to identify bivalent genes, but no comprehensive data repositories or analysis tools are available for bivalent genes currently. In this work, we develop BGDB, the database of bivalent genes. The database contains 6897 bivalent genes in human and mouse ES cells, which are manually collected from scientific literature. Each entry contains curated information, including genomic context, sequences, gene ontology and other relevant information. The web services of BGDB database were implemented with PHP + MySQL + JavaScript, and provide diverse query functions. Database URL: http://dailab.sysu.edu.cn/bgdb/",BGDB,0.981280982,NA,0,BGDB,0.981280982,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/26/2013 +33037820,http://bgee.org,"The Bgee suite: integrated curated expression atlas and comparative transcriptomics in animals. Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced by integrating multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data). It is based exclusively on curated healthy wild-type expression data (e.g., no gene knock-out, no treatment, no disease), to provide a comparable reference of normal gene expression. Curation includes very large datasets such as GTEx (re-annotation of samples as 'healthy' or not) as well as many small ones. Data are integrated and made comparable between species thanks to consistent data annotation and processing, and to calls of presence/absence of expression, along with expression scores. As a result, Bgee is capable of detecting the conditions of expression of any single gene, accommodating any data type and species. Bgee provides several tools for analyses, allowing, e.g., automated comparisons of gene expression patterns within and between species, retrieval of the prefered conditions of expression of any gene, or enrichment analyses of conditions with expression of sets of genes. Bgee release 14.1 includes 29 animal species, and is available at https://bgee.org/ and through its Bioconductor R package BgeeDB.",Bgee,0.995789826,NA,0,Bgee,0.995789826,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +31807141,http://seqBEACON.genomics.cn:443/home.html,"SEQdata-BEACON: a comprehensive database of sequencing performance and statistical tools for performance evaluation and yield simulation in BGISEQ-500. Background The sequencing platform BGISEQ-500 is based on DNBSEQ technology and provides high throughput with low costs. This sequencer has been widely used in various areas of scientific and clinical research. A better understanding of the sequencing process and performance of this system is essential for stabilizing the sequencing process, accurately interpreting sequencing results and efficiently solving sequencing problems. To address these concerns, a comprehensive database, SEQdata-BEACON, was constructed to accumulate the run performance data in BGISEQ-500. Results A total of 60 BGISEQ-500 instruments in the BGI-Wuhan lab were used to collect sequencing performance data. Lanes in paired-end 100 (PE100) sequencing using 10√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâbp barcode were chosen, and each lane was assigned a unique entry number as its identification number (ID). From November 2018 to April 2019, 2236 entries were recorded in the database containing 65 metrics about sample, yield, quality, machine state and supplies information. Using a correlation matrix, 52 numerical metrics were clustered into three groups signifying yield-quality, machine state and sequencing calibration. The distributions of the metrics also delivered information about patterns and rendered clues for further explanation or analysis of the sequencing process. Using the data of a total of 200√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâcycles, a linear regression model well simulated the final outputs. Moreover, the predicted final yield could be provided in the 15th cycle of the early stage of sequencing, and the corresponding R2 of the 200th and 15th cycle models were 0.97 and 0.81, respectively. The model was run with the test sets obtained from May 2019 to predict the yield, which resulted in an R2 of 0.96. These results indicate that our simulation model was reliable and effective. Conclusions Data sources, statistical findings and application tools provide a constantly updated reference for BGISEQ-500 users to comprehensively understand DNBSEQ technology, solve sequencing problems and optimize run performance. These resources are available on our website http://seqBEACON.genomics.cn:443/home.html.",BGISEQ-500,0.996839881,NA,0,BGISEQ-500,0.996839881,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2019 +22084196,http://www.ncbi.nlm.nih.gov/projects/gv/rbc/xslcgi.fcgi?cmd=bgmut,"BGMUT: NCBI dbRBC database of allelic variations of genes encoding antigens of blood group systems. Analogous to human leukocyte antigens, blood group antigens are surface markers on the erythrocyte cell membrane whose structures differ among individuals and which can be serologically identified. The Blood Group Antigen Gene Mutation Database (BGMUT) is an online repository of allelic variations in genes that determine the antigens of various human blood group systems. The database is manually curated with allelic information collated from scientific literature and from direct submissions from research laboratories. Currently, the database documents sequence variations of a total of 1251 alleles of all 40 gene loci that together are known to affect antigens of 30 human blood group systems. When available, information on the geographic or ethnic prevalence of an allele is also provided. The BGMUT website also has general information on the human blood group systems and the genes responsible for them. BGMUT is a part of the dbRBC resource of the National Center for Biotechnology Information, USA, and is available online at http://www.ncbi.nlm.nih.gov/projects/gv/rbc/xslcgi.fcgi?cmd=bgmut. The database should be of use to members of the transfusion medicine community, those interested in studies of genetic variation and related topics such as human migrations, and students as well as members of the general public.",BGMUT,0.994283319,Blood Group Antigen Gene Mutation Database,0.958958909,BGMUT,0.994283319,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/13/2011 +34897852,http://clingen.igib.res.in/bgvar,"BGvar: A comprehensive resource for blood group immunogenetics. Background Blood groups form the basis of effective and safe blood transfusion. There are about 43 well-recognised human blood group systems presently known. Blood groups are molecularly determined by the presence of specific antigens on the red blood cells and are genetically determined and inherited following Mendelian principles. The lack of a comprehensive, relevant, manually compiled and genome-ready dataset of red cell antigens limited the widespread application of genomic technologies to characterise and interpret the blood group complement of an individual from genomic datasets. Materials and methods A range of public datasets was used to systematically annotate the variation compendium for its functionality and allele frequencies across global populations. Details on phenotype or relevant clinical importance were collated from reported literature evidence. Results We have compiled the Blood Group Associated Genomic Variant Resource (BGvar), a manually curated online resource comprising all known human blood group related allelic variants including a total of 1700 International Society of Blood Transfusion approved alleles and 1706 alleles predicted and curated from literature reports. This repository includes 1682 single nucleotide variations (SNVs), 310 Insertions, Deletions (InDels) and Duplications (Copy Number Variations) and about 1360 combination mutations corresponding to 43 human blood group systems and 2 transcription factors. This compendium also encompasses gene fusion and rearrangement events occurring in human blood group genes. Conclusion To the best of our knowledge, BGvar is a comprehensive and a user-friendly resource with most relevant collation of blood group alleles in humans. BGvar is accessible online at URL: http://clingen.igib.res.in/bgvar/.",BGvar,0.97764498,Blood Group Associated Genomic Variant Resource,0.957016902,BGvar,0.97764498,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/13/2021 +32540200,http://animal.nwsuaf.edu.cn/BosVar,"BGVD: An Integrated Database for Bovine Sequencing Variations and Selective Signatures. Next-generation sequencing has yielded a vast amount of cattle genomic data for global characterization of population genetic diversity and identification of genomic regions under natural and artificial selection. However, efficient storage, querying, and visualization of such large datasets remain challenging. Here, we developed a comprehensive database, the Bovine Genome Variation Database (BGVD). It provides six main functionalities: gene search, variation search, genomic signature search, Genome Browser, alignment search tools, and the genome coordinate conversion tool. BGVD contains information on genomic variations comprising ~60.44√ɬÉ√ǬÇ√ɬÇ√Ǭ†M SNPs, ~6.86√ɬÉ√ǬÇ√ɬÇ√Ǭ†M indels, 76,634 CNV regions, and signatures of selective sweeps in 432 samples from modern cattle worldwide. Users can quickly retrieve distribution patterns of these variations for 54 cattle breeds through an interactive source of breed origin map, using a given gene symbol or genomic region for any of the three versions of the bovine reference genomes (ARS-UCD1.2, UMD3.1.1, and Btau 5.0.1). Signals of selection sweep are displayed as Manhattan plots and Genome Browser tracks. To further investigate and visualize the relationships between variants and signatures of selection, the Genome Browser integrates all variations, selection data, and resources, from NCBI, the UCSC Genome Browser, and Animal QTLdb. Collectively, all these features make the BGVD a useful archive for in-depth data mining and analyses of cattle biology and cattle breeding on a global scale. BGVD is publicly available at http://animal.nwsuaf.edu.cn/BosVar.",BGVD,0.991665125,Bovine Genome Variation Database,0.976398796,BGVD,0.991665125,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2020 +33010170,http://bigfam.bioinformatics.nl,"BiG-FAM: the biosynthetic gene cluster families database. Computational analysis of biosynthetic gene clusters (BGCs) has revolutionized natural product discovery by enabling the rapid investigation of secondary metabolic potential within microbial genome sequences. Grouping homologous BGCs into Gene Cluster Families (GCFs) facilitates mapping their architectural and taxonomic diversity and provides insights into the novelty of putative BGCs, through dereplication with BGCs of known function. While multiple databases exist for exploring BGCs from publicly available data, no public resources exist that focus on GCF relationships. Here, we present BiG-FAM, a database of 29,955 GCFs capturing the global diversity of 1,225,071 BGCs predicted from 209,206 publicly available microbial genomes and metagenome-assembled genomes (MAGs). The database offers rich functionalities, such as multi-criterion GCF searches, direct links to BGC databases such as antiSMASH-DB, and rapid GCF annotation of user-supplied BGCs from antiSMASH results. BiG-FAM can be accessed online at https://bigfam.bioinformatics.nl.",BiG-FAM,0.988548267,biosynthetic gene cluster families,0.828322877,BiG-FAM,0.988548267,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +26476456,http://bigg.ucsd.edu,"BiGG Models: A platform for integrating, standardizing and sharing genome-scale models. Genome-scale metabolic models are mathematically-structured knowledge bases that can be used to predict metabolic pathway usage and growth phenotypes. Furthermore, they can generate and test hypotheses when integrated with experimental data. To maximize the value of these models, centralized repositories of high-quality models must be established, models must adhere to established standards and model components must be linked to relevant databases. Tools for model visualization further enhance their utility. To meet these needs, we present BiGG Models (http://bigg.ucsd.edu), a completely redesigned Biochemical, Genetic and Genomic knowledge base. BiGG Models contains more than 75 high-quality, manually-curated genome-scale metabolic models. On the website, users can browse, search and visualize models. BiGG Models connects genome-scale models to genome annotations and external databases. Reaction and metabolite identifiers have been standardized across models to conform to community standards and enable rapid comparison across models. Furthermore, BiGG Models provides a comprehensive application programming interface for accessing BiGG Models with modeling and analysis tools. As a resource for highly curated, standardized and accessible models of metabolism, BiGG Models will facilitate diverse systems biology studies and support knowledge-based analysis of diverse experimental data.",BiGG,0.950149775,NA,0,BiGG,0.950149775,1,31696234,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,10/17/2015 +31696234,http://bigg.ucsd.edu,"BiGG Models 2020: multi-strain genome-scale models and expansion across the phylogenetic tree. The BiGG Models knowledge base (http://bigg.ucsd.edu) is a centralized repository for high-quality genome-scale metabolic models. For the past 12 years, the website has allowed users to browse and search metabolic models. Within this update, we detail new content and features in the repository, continuing the original effort to connect each model to genome annotations and external databases as well as standardization of reactions and metabolites. We describe the addition of 31 new models that expand the portion of the phylogenetic tree covered by BiGG Models. We also describe new functionality for hosting multi-strain models, which have proven to be insightful in a variety of studies centered on comparisons of related strains. Finally, the models in the knowledge base have been benchmarked using Memote, a new community-developed validator for genome-scale models to demonstrate the improving quality and transparency of model content in BiGG Models.",BiGG Models,0.889322599,NA,0,BiGG Models,0.889322599,1,26476456,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,1/1/2020 +21233089,http://download.baderlab.org/BINDTranslation,"The Biomolecular Interaction Network Database in PSI-MI 2.5. The Biomolecular Interaction Network Database (BIND) is a major source of curated biomolecular interactions, which has been unmaintained for the last few years, a trend which will eventually result in the loss of a significant amount of unique biomolecular interaction information, mostly as database identifiers become out of date. To help reverse this trend, we converted BIND to a standard format, Proteomics Standard Initiative-Molecular Interaction 2.5, starting from the last curated data release (from 2005) available in a custom XML format and made the core components (interactions and complexes) plus additional valuable curated information available for download (http://download.baderlab.org/BINDTranslation/). Major work during the conversion process was required to update out of date molecule identifiers resulting in a more comprehensive conversion of BIND, by measures including number of species and interactor types covered, than what is currently accessible elsewhere. This work also highlights issues of data modeling, controlled vocabulary adoption and data cleaning that can serve as a general case study on the future compatibility of interaction databases. Database URL: http://download.baderlab.org/BINDTranslation/",BIND,0.947274288,Biomolecular Interaction Network Database,0.91527611,BIND,0.947274288,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/12/2011 +25378330,http://www.BindingMOAD.org,"Recent improvements to Binding MOAD: a resource for protein-ligand binding affinities and structures. For over 10 years, Binding MOAD (Mother of All Databases; http://www.BindingMOAD.org) has been one of the largest resources for high-quality protein-ligand complexes and associated binding affinity data. Binding MOAD has grown at the rate of 1994 complexes per year, on average. Currently, it contains 23,269 complexes and 8156 binding affinities. Our annual updates curate the data using a semi-automated literature search of the references cited within the PDB file, and we have recently upgraded our website and added new features and functionalities to better serve Binding MOAD users. In order to eliminate the legacy application server of the old platform and to accommodate new changes, the website has been completely rewritten in the LAMP (Linux, Apache, MySQL and PHP) environment. The improved user interface incorporates current third-party plugins for better visualization of protein and ligand molecules, and it provides features like sorting, filtering and filtered downloads. In addition to the field-based searching, Binding MOAD now can be searched by structural queries based on the ligand. In order to remove redundancy, Binding MOAD records are clustered in different families based on 90% sequence identity. The new Binding MOAD, with the upgraded platform, features and functionalities, is now equipped to better serve its users.",Binding MOAD,0.90315028,NA,0,Binding MOAD,0.90315028,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/6/2014 +31405382,http://bio.tools,"The bio.tools registry of software tools and data resources for the life sciences. Bioinformaticians and biologists rely increasingly upon workflows for the flexible utilization of the many life science tools that are needed to optimally convert data into knowledge. We outline a pan-European enterprise to provide a catalogue ( https://bio.tools ) of tools and databases that can be used in these workflows. bio.tools not only lists where to find resources, but also provides a wide variety of practical information.",bio.tools,0.987343351,NA,0,bio.tools,0.987343351,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/12/2019 +28708831,http://biochem4j.org,"biochem4j: Integrated and extensible biochemical knowledge through graph databases. Biologists and biochemists have at their disposal a number of excellent, publicly available data resources such as UniProt, KEGG, and NCBI Taxonomy, which catalogue biological entities. Despite the usefulness of these resources, they remain fundamentally unconnected. While links may appear between entries across these databases, users are typically only able to follow such links by manual browsing or through specialised workflows. Although many of the resources provide web-service interfaces for computational access, performing federated queries across databases remains a non-trivial but essential activity in interdisciplinary systems and synthetic biology programmes. What is needed are integrated repositories to catalogue both biological entities and-crucially-the relationships between them. Such a resource should be extensible, such that newly discovered relationships-for example, those between novel, synthetic enzymes and non-natural products-can be added over time. With the introduction of graph databases, the barrier to the rapid generation, extension and querying of such a resource has been lowered considerably. With a particular focus on metabolic engineering as an illustrative application domain, biochem4j, freely available at http://biochem4j.org, is introduced to provide an integrated, queryable database that warehouses chemical, reaction, enzyme and taxonomic data from a range of reliable resources. The biochem4j framework establishes a starting point for the flexible integration and exploitation of an ever-wider range of biological data sources, from public databases to laboratory-specific experimental datasets, for the benefit of systems biologists, biosystems engineers and the wider community of molecular biologists and biological chemists.",biochem4j,0.994090736,NA,0,biochem4j,0.994090736,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/14/2017 +25516260,http://www.nd.edu,"A database of circadian and diel rhythmic gene expression in the yellow fever mosquito Aedes aegypti. Background The mosquito species Aedes aegypti is the primary vector of many arboviral diseases, including dengue and yellow fevers, that are responsible for a large worldwide health burden. The biological rhythms of mosquitoes regulate many of the physiological processes and behaviors that influence the transmission of these diseases. For insight into the molecular basis of biological rhythms, diel and circadian gene expression profiling has been carried out for many species. To bring these resources to Aedes aegypti researchers, we used microarray technology to carry out a genome wide assessment of gene expression during the 24√ɬÉ√ǬÇ√ɬÇ√Ǭ†hour light/dark (LD) cycle and during constant darkness (DD). The purpose of this report is to describe the methods, the validation of the results, and the organization of this database resource. Description The Aedes aegypti Circadian Database is a publicly accessible database that can be searched via a text-based query to visualize 44√ɬÉ√ǬÇ√ɬÇ√Ǭ†hour temporal expression patterns of a given gene in Ae. aegypti heads under diel (observed under a 12√ɬÉ√ǬÇ√ɬÇ√Ǭ†hour/12√ɬÉ√ǬÇ√ɬÇ√Ǭ†hour LD cycle) and circadian (observed under DD) conditions. Profiles of gene expression under these conditions were assayed by Nimblegen 12-plex microarrays and rhythmicity was objectively assessed by the JTK_CYCLE algorithm. The output of the search is a graphical representation of the expression data along with computed period length, the time-of-day of gene expression peaks, and statistical determination for rhythmicity. Conclusion Our results show that at least 7.9% of the gene set present in the Aedes aegypti head are rhythmic under LD conditions and 6.7% can be considered circadian, oscillating under constant dark conditions. We present these results in the Aedes aegypti Circadian Database through Bioclock, a public website hosted by the University of Notre Dame at http://www.nd.edu/~bioclock/. This website allows searchable browsing of this quantitative gene expression information. The visualization allows for gene-by-gene comparison of transcript expression under both diel and circadian conditions, and the results are presented graphically in a plot profile of gene expression. The Ae. aegypti Circadian Database provides a community resource for observing diel and circadian fluctuations in gene expression across the Ae. aegypti genome.",Bioclock,0.913504779,Aedes aegypti Circadian Database,0.621617784,Bioclock,0.913504779,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/17/2014 +22359433,http://www.biodenzdatabase.in,"BiodEnz:A database of biodegrading enzymes. Unlabelled Azo dyes, which re characterized by azo bonds, are a predominant class of colorants used in tattooing, cosmetics, foods, textile and consumer products. Laccases (EC 1.10.3.2), lignin peroxidases (EC 1.11.1.14) , Azo reductases (EC 1.7.1.6) of different micro organisms are mainly useful for the development of biodegradation systems as they catalyse reductive cleavage of azo groups (-N=N-) . Laccases have very broad substrate specificity with respect to the electron donor and is capable of oxidizing phenols and aromatic amines. Azoreductase belongs to the family of oxidoreductases, acting on other nitrogenous compounds as donors with NAD+ or NADP+ as acceptor. Lignin peroxidase enzymes are highly non-specific and are well reported to decolourize various dyes We have developed BiodEnz database by collecting information like strains that produce particular enzymes, azo dyes that are degraded , substrate specificity, molecular weight, the optimum temperature and pH, sequence data of the above enzymes ,as the most effective inoculants used for bioremediation are able to degrade dyes over a broad concentration range, tolerate a range of environmental conditions of temperature, pH, and activity of the enzymes. The database can be searched by using a user friendly web interface. Availability The database is available for free at http://www.biodenzdatabase.in.",BiodEnz,0.967340887,NA,0,BiodEnz,0.967340887,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/6/2012 +28875065,"http://metabiosys.iiserb.ac.in/biofueldb, http://metagenomics.iiserb.ac.in/biofueldb","BioFuelDB: a database and prediction server of enzymes involved in biofuels production. Background In light of the rapid decrease in fossils fuel reserves and an increasing demand for energy, novel methods are required to explore alternative biofuel production processes to alleviate these pressures. A wide variety of molecules which can either be used as biofuels or as biofuel precursors are produced using microbial enzymes. However, the common challenges in the industrial implementation of enzyme catalysis for biofuel production are the unavailability of a comprehensive biofuel enzyme resource, low efficiency of known enzymes, and limited availability of enzymes which can function under extreme conditions in the industrial processes. Methods We have developed a comprehensive database of known enzymes with proven or potential applications in biofuel production through text mining of PubMed abstracts and other publicly available information. A total of 131 enzymes with a role in biofuel production were identified and classified into six enzyme classes and four broad application categories namely 'Alcohol production', 'Biodiesel production', 'Fuel Cell' and 'Alternate biofuels'. A prediction tool 'Benz' was developed to identify and classify novel homologues of the known biofuel enzyme sequences from sequenced genomes and metagenomes. 'Benz' employs a hybrid approach incorporating HMMER 3.0 and RAPSearch2 programs to provide high accuracy and high speed for prediction. Results Using the Benz tool, 153,754 novel homologues of biofuel enzymes were identified from 23 diverse metagenomic sources. The comprehensive data of curated biofuel enzymes, their novel homologs identified from diverse metagenomes, and the hybrid prediction tool Benz are presented as a web server which can be used for the prediction of biofuel enzymes from genomic and metagenomic datasets. The database and the Benz tool is publicly available at http://metabiosys.iiserb.ac.in/biofueldb& http://metagenomics.iiserb.ac.in/biofueldb.",BioFuelDB,0.865778863,NA,0,BioFuelDB,0.865778863,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/28/2017 +21904428,http://www.tnaugenomics.com/biogenbase/casava.php,"A web accessible resource for investigating cassava phenomics and genomics information: BIOGEN BASE. Unlabelled The goal of our research is to establish a unique portal to bring out the potential outcome of the research in the Casssava crop. The Biogen base for cassava clearly brings out the variations of different traits of the germplasms, maintained at the Tapioca and Castor Research Station, Tamil Nadu Agricultural University. Phenotypic and genotypic variations of the accessions are clearly depicted, for the users to browse and interpret the variations using the microsatellite markers. Database (BIOGEN BASE - CASSAVA) is designed using PHP and MySQL and is equipped with extensive search options. It is more user-friendly and made publicly available, to improve the research and development of cassava by making a wealth of genetics and genomics data available through open, common, and worldwide forum for all individuals interested in the field. Availability The database is available for free at http://www.tnaugenomics.com/biogenbase/casava.php.",Biogen base,0.733742376,IOGEN BASE,0.716412246,Biogen base,0.733742376,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/2/2011 +26578587,http://biogps.org,"BioGPS: building your own mash-up of gene annotations and expression profiles. BioGPS (http://biogps.org) is a centralized gene-annotation portal that enables researchers to access distributed gene annotation resources. This article focuses on the updates to BioGPS since our last paper (2013 database issue). The unique features of BioGPS, compared to those of other gene portals, are its community extensibility and user customizability. Users contribute the gene-specific resources accessible from BioGPS ('plugins'), which helps ensure that the resource collection is always up-to-date and that it will continue expanding over time (since the 2013 paper, 162 resources have been added, for a 34% increase in the number of resources available). BioGPS users can create their own collections of relevant plugins and save them as customized gene-report pages or 'layouts' (since the 2013 paper, 488 user-created layouts have been added, for a 22% increase in the number of layouts). In addition, we recently updated the most popular plugin, the 'Gene expression/activity chart', to include √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº 6000 datasets (from √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº 2000 datasets) and we enhanced user interactivity. We also added a new 'gene list' feature that allows users to save query results for future reference.",BioGPS,0.984374166,NA,0,BioGPS,0.984374166,1,23175613,23175613,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/17/2015 +23175613,"http://biogps.org, http://mygene.info","BioGPS and MyGene.info: organizing online, gene-centric information. Fast-evolving technologies have enabled researchers to easily generate data at genome scale, and using these technologies to compare biological states typically results in a list of candidate genes. Researchers are then faced with the daunting task of prioritizing these candidate genes for follow-up studies. There are hundreds, possibly even thousands, of web-based gene annotation resources available, but it quickly becomes impractical to manually access and review all of these sites for each gene in a candidate gene list. BioGPS (http://biogps.org) was created as a centralized gene portal for aggregating distributed gene annotation resources, emphasizing community extensibility and user customizability. BioGPS serves as a convenient tool for users to access known gene-centric resources, as well as a mechanism to discover new resources that were previously unknown to the user. This article describes updates to BioGPS made after its initial release in 2008. We summarize recent additions of features and data, as well as the robust user activity that underlies this community intelligence application. Finally, we describe MyGene.info (http://mygene.info) and related web services that provide programmatic access to BioGPS.",BioGPS,0.973907888,NA,0,BioGPS,0.973907888,1,26578587,26578587,low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",TRUE POS: two resources; name and URL of first will be correct; second is lost,NA,NA,11/21/2012 +"25428363, 27980099",http://thebiogrid.org,"The BioGRID interaction database: 2015 update. The Biological General Repository for Interaction Datasets (BioGRID: http://thebiogrid.org) is an open access database that houses genetic and protein interactions curated from the primary biomedical literature for all major model organism species and humans. As of September 2014, the BioGRID contains 749,912 interactions as drawn from 43,149 publications that represent 30 model organisms. This interaction count represents a 50% increase compared to our previous 2013 BioGRID update. BioGRID data are freely distributed through partner model organism databases and meta-databases and are directly downloadable in a variety of formats. In addition to general curation of the published literature for the major model species, BioGRID undertakes themed curation projects in areas of particular relevance for biomedical sciences, such as the ubiquitin-proteasome system and various human disease-associated interaction networks. BioGRID curation is coordinated through an Interaction Management System (IMS) that facilitates the compilation interaction records through structured evidence codes, phenotype ontologies, and gene annotation. The BioGRID architecture has been improved in order to support a broader range of interaction and post-translational modification types, to allow the representation of more complex multi-gene/protein interactions, to account for cellular phenotypes through structured ontologies, to expedite curation through semi-automated text-mining approaches, and to enhance curation quality control.",BioGRID,0.997427076,Biological General Repository for Interaction Datasets,0.975577229,BioGRID,0.997427076,2,30476227,"28077563.0, 30476227.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,12/14/2016 +30476227,"http://thebiogrid.org, http://orcs.thebiogrid.org","The BioGRID interaction database: 2019 update. The Biological General Repository for Interaction Datasets (BioGRID: https://thebiogrid.org) is an open access database dedicated to the curation and archival storage of protein, genetic and chemical interactions for all major model organism species and humans. As of September 2018 (build 3.4.164), BioGRID contains records for 1 598 688 biological interactions manually annotated from 55 809 publications for 71 species, as classified by an updated set of controlled vocabularies for experimental detection methods. BioGRID also houses records for >700 000 post-translational modification sites. BioGRID now captures chemical interaction data, including chemical-protein interactions for human drug targets drawn from the DrugBank database and manually curated bioactive compounds reported in the literature. A new dedicated aspect of BioGRID annotates genome-wide CRISPR/Cas9-based screens that report gene-phenotype and gene-gene relationships. An extension of the BioGRID resource called the Open Repository for CRISPR Screens (ORCS) database (https://orcs.thebiogrid.org) currently contains over 500 genome-wide screens carried out in human or mouse cell lines. All data in BioGRID is made freely available without restriction, is directly downloadable in standard formats and can be readily incorporated into existing applications via our web service platforms. BioGRID data are also freely distributed through partner model organism databases and meta-databases.",BioGRID,0.99651432,Biological General Repository for Interaction Datasets,0.967993659,BioGRID,0.99651432,1,"25428363.0, 27980099.0","28077563.0, 25428363.0, 27980099.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +28077563,http://bioc.sourceforge.net/BioC-BioGRID.html,"The BioC-BioGRID corpus: full text articles annotated for curation of protein-protein and genetic interactions. . A great deal of information on the molecular genetics and biochemistry of model organisms has been reported in the scientific literature. However, this data is typically described in free text form and is not readily amenable to computational analyses. To this end, the BioGRID database systematically curates the biomedical literature for genetic and protein interaction data. This data is provided in a standardized computationally tractable format and includes structured annotation of experimental evidence. BioGRID curation necessarily involves substantial human effort by expert curators who must read each publication to extract the relevant information. Computational text-mining methods offer the potential to augment and accelerate manual curation. To facilitate the development of practical text-mining strategies, a new challenge was organized in BioCreative V for the BioC task, the collaborative Biocurator Assistant Task. This was a non-competitive, cooperative task in which the participants worked together to build BioC-compatible modules into an integrated pipeline to assist BioGRID curators. As an integral part of this task, a test collection of full text articles was developed that contained both biological entity annotations (gene/protein and organism/species) and molecular interaction annotations (protein-protein and genetic interactions (PPIs and GIs)). This collection, which we call the BioC-BioGRID corpus, was annotated by four BioGRID curators over three rounds of annotation and contains 120 full text articles curated in a dataset representing two major model organisms, namely budding yeast and human. The BioC-BioGRID corpus contains annotations for 6409 mentions of genes and their Entrez Gene IDs, 186 mentions of organism names and their NCBI Taxonomy IDs, 1867 mentions of PPIs and 701 annotations of PPI experimental evidence statements, 856 mentions of GIs and 399 annotations of GI evidence statements. The purpose, characteristics and possible future uses of the BioC-BioGRID corpus are detailed in this report.Database URL: http://bioc.sourceforge.net/BioC-BioGRID.html.",BioGRID,0.991511285,NA,0,BioGRID,0.991511285,1,NA,"30476227.0, 25428363.0, 27980099.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/10/2017 +27246819,http://biohub.cs.manchester.ac.uk/ontology/biohub-kb.owl,"The BioHub Knowledge Base: Ontology and Repository for Sustainable Biosourcing. Background The motivation for the BioHub project is to create an Integrated Knowledge Management System (IKMS) that will enable chemists to source ingredients from bio-renewables, rather than from non-sustainable sources such as fossil oil and its derivatives. Method The BioHubKB is the data repository of the IKMS; it employs Semantic Web technologies, especially OWL, to host data about chemical transformations, bio-renewable feedstocks, co-product streams and their chemical components. Access to this knowledge base is provided to other modules within the IKMS through a set of RESTful web services, driven by SPARQL queries to a Sesame back-end. The BioHubKB re-uses several bio-ontologies and bespoke extensions, primarily for chemical feedstocks and products, to form its knowledge organisation schema. Results Parts of plants form feedstocks, while various processes generate co-product streams that contain certain chemicals. Both chemicals and transformations are associated with certain qualities, which the BioHubKB also attempts to capture. Of immediate commercial and industrial importance is to estimate the cost of particular sets of chemical transformations (leading to candidate surfactants) performed in sequence, and these costs too are captured. Data are sourced from companies' internal knowledge and document stores, and from the publicly available literature. Both text analytics and manual curation play their part in populating the ontology. We describe the prototype IKMS, the BioHubKB and the services that it supports for the IKMS. Availability The BioHubKB can be found via http://biohub.cs.manchester.ac.uk/ontology/biohub-kb.owl .",BioHub,0.989485323,NA,0,BioHub,0.989485323,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2016 +26401099,http://bioimg.org,"BioImg.org: A Catalog of Virtual Machine Images for the Life Sciences. Virtualization is becoming increasingly important in bioscience, enabling assembly and provisioning of complete computer setups, including operating system, data, software, and services packaged as virtual machine images (VMIs). We present an open catalog of VMIs for the life sciences, where scientists can share information about images and optionally upload them to a server equipped with a large file system and fast Internet connection. Other scientists can then search for and download images that can be run on the local computer or in a cloud computing environment, providing easy access to bioinformatics environments. We also describe applications where VMIs aid life science research, including distributing tools and data, supporting reproducible analysis, and facilitating education. BioImg.org is freely available at: https://bioimg.org.",BioImg,0.641896486,NA,0,BioImg,0.641896486,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/10/2015 +22700703,http://bioinformatics.ca/links_directory,"A decade of Web Server updates at the Bioinformatics Links Directory: 2003-2012. The 2012 Bioinformatics Links Directory update marks the 10th special Web Server issue from Nucleic Acids Research. Beginning with content from their 2003 publication, the Bioinformatics Links Directory in collaboration with Nucleic Acids Research has compiled and published a comprehensive list of freely accessible, online tools, databases and resource materials for the bioinformatics and life science research communities. The past decade has exhibited significant growth and change in the types of tools, databases and resources being put forth, reflecting both technology changes and the nature of research over that time. With the addition of 90 web server tools and 12 updates from the July 2012 Web Server issue of Nucleic Acids Research, the Bioinformatics Links Directory at http://bioinformatics.ca/links_directory/ now contains an impressive 134 resources, 455 databases and 1205 web server tools, mirroring the continued activity and efforts of our field.",Bioinformatics,0.748195767,NA,0,Bioinformatics,0.748195767,1,21715385,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,6/14/2012 +21715385,http://bioinformatics.ca/links_directory,"The 2011 Bioinformatics Links Directory update: more resources, tools and databases and features to empower the bioinformatics community. The Bioinformatics Links Directory continues its collaboration with Nucleic Acids Research to collaboratively publish and compile a freely accessible, online collection of tools, databases and resource materials for bioinformatics and molecular biology research. The July 2011 Web Server issue of Nucleic Acids Research adds an additional 78 web server tools and 14 updates to the directory at http://bioinformatics.ca/links_directory/.",Bioinformatics Links,0.632332242,NA,0,Bioinformatics Links,0.632332242,1,22700703,NA,low_prob_best_name,do not remove,conflicting record(s) to be removed,NA,NA,NA,NA,7/1/2011 +23087378,http://zhanglab.ccmb.med.umich.edu/BioLiP,"BioLiP: a semi-manually curated database for biologically relevant ligand-protein interactions. BioLiP (http://zhanglab.ccmb.med.umich.edu/BioLiP/) is a semi-manually curated database for biologically relevant ligand-protein interactions. Establishing interactions between protein and biologically relevant ligands is an important step toward understanding the protein functions. Most ligand-binding sites prediction methods use the protein structures from the Protein Data Bank (PDB) as templates. However, not all ligands present in the PDB are biologically relevant, as small molecules are often used as additives for solving the protein structures. To facilitate template-based ligand-protein docking, virtual ligand screening and protein function annotations, we develop a hierarchical procedure for assessing the biological relevance of ligands present in the PDB structures, which involves a four-step biological feature filtering followed by careful manual verifications. This procedure is used for BioLiP construction. Each entry in BioLiP contains annotations on: ligand-binding residues, ligand-binding affinity, catalytic sites, Enzyme Commission numbers, Gene Ontology terms and cross-links to the other databases. In addition, to facilitate the use of BioLiP for function annotation of uncharacterized proteins, a new consensus-based algorithm COACH is developed to predict ligand-binding sites from protein sequence or using 3D structure. The BioLiP database is updated weekly and the current release contains 204 223 entries.",BioLiP,0.991326213,NA,0,BioLiP,0.991326213,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/18/2012 +28605773,http://www.bio-bigdata.com/BioM2MetDisease,"BioM2MetDisease: a manually curated database for associations between microRNAs, metabolites, small molecules and metabolic diseases. . BioM2MetDisease is a manually curated database that aims to provide a comprehensive and experimentally supported resource of associations between metabolic diseases and various biomolecules. Recently, metabolic diseases such as diabetes have become one of the leading threats to people√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭôs health. Metabolic disease associated with alterations of multiple types of biomolecules such as miRNAs and metabolites. An integrated and high-quality data source that collection of metabolic disease associated biomolecules is essential for exploring the underlying molecular mechanisms and discovering novel therapeutics. Here, we developed the BioM2MetDisease database, which currently documents 2681 entries of relationships between 1147 biomolecules (miRNAs, metabolites and small molecules/drugs) and 78 metabolic diseases across 14 species. Each entry includes biomolecule category, species, biomolecule name, disease name, dysregulation pattern, experimental technique, a brief description of metabolic disease-biomolecule relationships, the reference, additional annotation information etc. BioM2MetDisease provides a user-friendly interface to explore and retrieve all data conveniently. A submission page was also offered for researchers to submit new associations between biomolecules and metabolic diseases. BioM2MetDisease provides a comprehensive resource for studying biology molecules act in metabolic diseases, and it is helpful for understanding the molecular mechanisms and developing novel therapeutics for metabolic diseases. http://www.bio-bigdata.com/BioM2MetDisease/.",BioM2MetDisease,0.99300829,NA,0,BioM2MetDisease,0.99300829,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +29846545,http://sysbio.suda.edu.cn/CBD,"CBD: a biomarker database for colorectal cancer. . Colorectal cancer (CRC) biomarker database (CBD) was established based on 870 identified CRC biomarkers and their relevant information from 1115 original articles in PubMed published from 1986 to 2017. In this version of the CBD, CRC biomarker data were collected, sorted, displayed and analysed. The CBD with the credible contents as a powerful and time-saving tool provide more comprehensive and accurate information for further CRC biomarker research. The CBD was constructed under MySQL server. HTML, PHP and JavaScript languages have been used to implement the web interface. The Apache was selected as HTTP server. All of these web operations were implemented under the Windows system. The CBD could provide to users the multiple individual biomarker information and categorized into the biological category, source and application of biomarkers; the experiment methods, results, authors and publication resources; the research region, the average age of cohort, gender, race, the number of tumours, tumour location and stage. We only collect data from the articles with clear and credible results to prove the biomarkers are useful in the diagnosis, treatment or prognosis of CRC. The CBD can also provide a professional platform to researchers who are interested in CRC research to communicate, exchange their research ideas and further design high-quality research in CRC. They can submit their new findings to our database via the submission page and communicate with us in the CBD.Database URL: http://sysbio.suda.edu.cn/CBD/.",CBD,0.900338312,biomarker database,0.936357975,biomarker database,0.936357975,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2018 +33552037,http://www.biomaster-uestc.cn,"BioMaster: An Integrated Database and Analytic Platform to Provide Comprehensive Information About BioBrick Parts. Synthetic biology seeks to create new biological parts, devices, and systems, and to reconfigure existing natural biological systems for custom-designed purposes. The standardized BioBrick parts are the foundation of synthetic biology. The incomplete and flawed metadata of BioBrick parts, however, are a major obstacle for designing genetic circuit easily, quickly, and accurately. Here, a database termed BioMaster http://www.biomaster-uestc.cn was developed to extensively complement information about BioBrick parts, which includes 47,934 items of BioBrick parts from the international Genetically Engineered Machine (iGEM) Registry with more comprehensive information integrated from 10 databases, providing corresponding information about functions, activities, interactions, and related literature. Moreover, BioMaster is also a user-friendly platform for retrieval and analyses of relevant information on BioBrick parts.",BioMaster,0.990046203,NA,0,BioMaster,0.990046203,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/21/2021 +31599923,http://kobic.re.kr/biomenet,"BiomeNet: a database for construction and analysis of functional interaction networks for any species with a sequenced genome. Motivation Owing to advanced DNA sequencing and genome assembly technology, the number of species with sequenced genomes is rapidly increasing. The aim of the recently launched Earth BioGenome Project is to sequence genomes of all eukaryotic species on Earth over the next 10√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâyears, making it feasible to obtain genomic blueprints of the majority of animal and plant species by this time. Genetic models of the sequenced species will later be subject to functional annotation, and a comprehensive molecular network should facilitate functional analysis of individual genes and pathways. However, network databases are lagging behind genome sequencing projects as even the largest network database provides gene networks for less than 10% of sequenced eukaryotic genomes, and the knowledge gap between genomes and interactomes continues to widen. Results We present BiomeNet, a database of 95 scored networks comprising over 8 million co-functional links, which can build and analyze gene networks for any species with the sequenced genome. BiomeNet transfers functional interactions between orthologous proteins from source networks to the target species within minutes and automatically constructs gene networks with the quality comparable to that of existing networks. BiomeNet enables assembly of the first-in-species gene networks not available through other databases, which are highly predictive of diverse biological processes and can also provide network analysis by extracting subnetworks for individual biological processes and network-based gene prioritizations. These data indicate that BiomeNet could enhance the benefits of decoding the genomes of various species, thus improving our understanding of the Earth' biodiversity. Availability and implementation The BiomeNet is freely available at http://kobic.re.kr/biomenet/. Supplementary information Supplementary data are available at Bioinformatics online.",BiomeNet,0.979145229,NA,0,BiomeNet,0.979145229,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2020 +"25414348, 31701150",http://www.ebi.ac.uk/biomodels,"BioModels: ten-year anniversary. BioModels (http://www.ebi.ac.uk/biomodels/) is a repository of mathematical models of biological processes. A large set of models is curated to verify both correspondence to the biological process that the model seeks to represent, and reproducibility of the simulation results as described in the corresponding peer-reviewed publication. Many models submitted to the database are annotated, cross-referencing its components to external resources such as database records, and terms from controlled vocabularies and ontologies. BioModels comprises two main branches: one is composed of models derived from literature, while the second is generated through automated processes. BioModels currently hosts over 1200 models derived directly from the literature, as well as in excess of 140,000 models automatically generated from pathway resources. This represents an approximate 60-fold growth for literature-based model numbers alone, since BioModels' first release a decade ago. This article describes updates to the resource over this period, which include changes to the user interface, the annotation profiles of models in the curation pipeline, major infrastructure changes, ability to perform online simulations and the availability of model content in Linked Data form. We also outline planned improvements to cope with a diverse array of new challenges.",BioModels,0.990961909,NA,0,BioModels,0.990961909,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +30053270,"http://hive.biochemistry.gwu.edu/biomuta, http://hive.biochemistry.gwu.edu/bioxpress","BioMuta and BioXpress: mutation and expression knowledgebases for cancer biomarker discovery. Single-nucleotide variation and gene expression of disease samples represent important resources for biomarker discovery. Many databases have been built to host and make available such data to the community, but these databases are frequently limited in scope and/or content. BioMuta, a database of cancer-associated single-nucleotide variations, and BioXpress, a database of cancer-associated differentially expressed genes and microRNAs, differ from other disease-associated variation and expression databases primarily through the aggregation of data across many studies into a single source with a unified representation and annotation of functional attributes. Early versions of these resources were initiated by pilot funding for specific research applications, but newly awarded funds have enabled hardening of these databases to production-level quality and will allow for sustained development of these resources for the next few years. Because both resources were developed using a similar methodology of integration, curation, unification, and annotation, we present BioMuta and BioXpress as allied databases that will facilitate a more comprehensive view of gene associations in cancer. BioMuta and BioXpress are hosted on the High-performance Integrated Virtual Environment (HIVE) server at the George Washington University at https://hive.biochemistry.gwu.edu/biomuta and https://hive.biochemistry.gwu.edu/bioxpress, respectively.",BioMuta,0.790885687,NA,0,BioMuta,0.790885687,1,NA,NA,low_prob_best_name,do not remove,NA,NA,TRUE POS: two resources; name and URL of first will be correct; second is lost,NA,NA,1/1/2018 +22032181,http://bionot.askhermes.org,"BioN√ɬÉ√ǬÉ√ɬÇ√ǬòT: a searchable database of biomedical negated sentences. Background Negated biomedical events are often ignored by text-mining applications; however, such events carry scientific significance. We report on the development of BioN√ɬÉ√ǬÉ√ɬÇ√ǬòT, a database of negated sentences that can be used to extract such negated events. Description Currently BioN√ɬÉ√ǬÉ√ɬÇ√ǬòT incorporates √ɬÉ√Ǭ¢√ɬÇ√Ǭâ√ɬÇ√Ǭà32 million negated sentences, extracted from over 336 million biomedical sentences from three resources: √ɬÉ√Ǭ¢√ɬÇ√Ǭâ√ɬÇ√Ǭà2 million full-text biomedical articles in Elsevier and the PubMed Central, as well as √ɬÉ√Ǭ¢√ɬÇ√Ǭâ√ɬÇ√Ǭà20 million abstracts in PubMed. We evaluated BioN√ɬÉ√ǬÉ√ɬÇ√ǬòT on three important genetic disorders: autism, Alzheimer's disease and Parkinson's disease, and found that BioN√ɬÉ√ǬÉ√ɬÇ√ǬòT is able to capture negated events that may be ignored by experts. Conclusions The BioN√ɬÉ√ǬÉ√ɬÇ√ǬòT database can be a useful resource for biomedical researchers. BioN√ɬÉ√ǬÉ√ɬÇ√ǬòT is freely available at http://bionot.askhermes.org/. In future work, we will develop semantic web related technologies to enrich BioN√ɬÉ√ǬÉ√ɬÇ√ǬòT.",BioN√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√ǬÉ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√ǬÉ,0.798340797,NA,0,BioN√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√ǬÉ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√ǬÉ,0.798340797,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/27/2011 +24244913,http://bionames.org,"BioNames: linking taxonomy, texts, and trees. BioNames is a web database of taxonomic names for animals, linked to the primary literature and, wherever possible, to phylogenetic trees. It aims to provide a taxonomic ""dashboard"" where at a glance we can see a summary of the taxonomic and phylogenetic information we have for a given taxon and hence provide a quick answer to the basic question ""what is this taxon?"" BioNames combines classifications from the Global Biodiversity Information Facility (GBIF) and GenBank, images from the Encyclopedia of Life (EOL), animal names from the Index of Organism Names (ION), and bibliographic data from multiple sources including the Biodiversity Heritage Library (BHL) and CrossRef. The user interface includes display of full text articles, interactive timelines of taxonomic publications, and zoomable phylogenies. It is available at http://bionames.org.",BioNames,0.997415423,NA,0,BioNames,0.997415423,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/29/2013 +29637199,http://onto-apps.stanford.edu/bionic,"BiOnIC: A Catalog of User Interactions with Biomedical Ontologies. BiOnIC is a catalog of aggregated statistics of user clicks, queries, and reuse counts for access to over 200 biomedical ontologies. BiOnIC also provides anonymized sequences of classes accessed by users over a period of four years. To generate the statistics, we processed the access logs of BioPortal, a large open biomedical ontology repository. We publish the BiOnIC data using DCAT and SKOS metadata standards. The BiOnIC catalog has a wide range of applicability, which we demonstrate through its use in three different types of applications. To our knowledge, this type of interaction data stemming from a real-world, large-scale application has not been published before. We expect that the catalog will become an important resource for researchers and developers in the Semantic Web community by providing novel insights into how ontologies are explored, queried and reused. The BiOnIC catalog may ultimately assist in the more informed development of intelligent user interfaces for semantic resources through interface customization, prediction of user browsing and querying behavior, and ontology summarization. The BiOnIC catalog is available at: http://onto-apps.stanford.edu/bionic.",BiOnIC,0.997383654,NA,0,BiOnIC,0.997383654,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/4/2017 +29529902,http://bis.zju.edu.cn/biopepdbr,"BioPepDB: an integrated data platform for food-derived bioactive peptides. Food-derived bioactive peptides play critical roles in regulating most biological processes and have considerable biological, medical and industrial importance. However, a large number of active peptides data, including sequence, function, source, commercial product information, references and other information are poorly integrated. BioPepDB is a searchable database of food-derived bioactive peptides and their related articles, including more than four thousand bioactive peptide entries. Moreover, BioPepDB provides modules of prediction and hydrolysis-simulation for discovering novel peptides. It can serve as a reference database to investigate the function of different bioactive peptides. BioPepDB is available at http://bis.zju.edu.cn/biopepdbr/ . The web page utilises Apache, PHP5 and MySQL to provide the user interface for accessing the database and predict novel peptides. The database itself is operated on a specialised server.",BioPepDB,0.996458948,NA,0,BioPepDB,0.996458948,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/12/2018 +25360160,http://ab-openlab.csir.res.in/biophytmol,"BioPhytMol: a drug discovery community resource on anti-mycobacterial phytomolecules and plant extracts. Background Tuberculosis (TB) is the second leading cause of death from a single infectious organism, demanding attention towards discovery of novel anti-tubercular compounds. Natural products or their derivatives have provided more than 50% of all existing drugs, offering a chemically diverse space for discovery of novel drugs. Description BioPhytMol has been designed to systematically curate and analyze the anti-mycobacterial natural product chemical space. BioPhytMol is developed as a drug-discovery community resource with anti-mycobacterial phytomolecules and plant extracts. Currently, it holds 2582 entries including 188 plant families (692 genera and 808 species) from global flora, manually curated from literature. In total, there are 633 phytomolecules (with structures) curated against 25 target mycobacteria. Multiple analysis approaches have been used to prioritize the library for drug-like compounds, for both whole cell screening and target-based approaches. In order to represent the multidimensional data on chemical diversity, physiochemical properties and biological activity data of the compound library, novel approaches such as the use of circular graphs have been employed. Conclusion BioPhytMol has been designed to systematically represent and search for anti-mycobacterial phytochemical information. Extensive compound analyses can also be performed through web-application for prioritizing drug-like compounds. The resource is freely available online at http://ab-openlab.csir.res.in/biophytmol/. Graphical AbstractBioPhytMol: a drug discovery community resource on anti-mycobacterial phytomolecules and plant extracts generated using Crowdsourcing. The platform comprises of manually curated data on antimycobacterial natural products along with tools to perform structure similarity and visualization. The platform allows for prioritization of drug like natural products for antimycobacterial drug discovery.",BioPhytMol,0.970525742,NA,0,BioPhytMol,0.970525742,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/11/2014 +31133849,http://tripod.nih.gov/bioplanet,"The NCATS BioPlanet - An Integrated Platform for Exploring the Universe of Cellular Signaling Pathways for Toxicology, Systems Biology, and Chemical Genomics. Chemical genomics aims to comprehensively define, and ultimately predict, the effects of small molecule compounds on biological systems. Chemical activity profiling approaches must consider chemical effects on all pathways operative in mammalian cells. To enable a strategic and maximally efficient chemical profiling of pathway space, we have created the NCATS BioPlanet, a comprehensive integrated pathway resource that incorporates the universe of 1,658 human pathways sourced from publicly available, manually curated sources, which have been subjected to thorough redundancy and consistency cross-evaluation. BioPlanet supports interactive browsing, retrieval, and analysis of pathways, exploration of pathway connections, and pathway search by gene targets, category, and availability of corresponding bioactivity assay, as well as visualization of pathways on a 3-dimensional globe, in which the distance between any two pathways is proportional to their degree of gene component overlap. Using this resource, we propose a strategy to identify a minimal set of 362 biological assays that can interrogate the universe of human pathways. The NCATS BioPlanet is a public resource, which will be continually expanded and updated, for systems biology, toxicology, and chemical genomics, available at http://tripod.nih.gov/bioplanet/.",BioPlanet,0.989541769,NA,0,BioPlanet,0.989541769,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/26/2019 +25214827,http://sparql.bioontology.org,"BioPortal as a Dataset of Linked Biomedical Ontologies and Terminologies in RDF. BioPortal is a repository of biomedical ontologies-the largest such repository, with more than 300 ontologies to date. This set includes ontologies that were developed in OWL, OBO and other formats, as well as a large number of medical terminologies that the US National Library of Medicine distributes in its own proprietary format. We have published the RDF version of all these ontologies at http://sparql.bioontology.org. This dataset contains 190M triples, representing both metadata and content for the 300 ontologies. We use the metadata that the ontology authors provide and simple RDFS reasoning in order to provide dataset users with uniform access to key properties of the ontologies, such as lexical properties for the class names and provenance data. The dataset also contains 9.8M cross-ontology mappings of different types, generated both manually and automatically, which come with their own metadata.",BioPortal,0.99581337,NA,0,BioPortal,0.99581337,1,NA,21672956,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2013 +21672956,"http://bioportal.bioontology.org, http://www.bioontology.org/wiki/index.php/NCBO_REST_services","BioPortal: enhanced functionality via new Web services from the National Center for Biomedical Ontology to access and use ontologies in software applications. The National Center for Biomedical Ontology (NCBO) is one of the National Centers for Biomedical Computing funded under the NIH Roadmap Initiative. Contributing to the national computing infrastructure, NCBO has developed BioPortal, a web portal that provides access to a library of biomedical ontologies and terminologies (http://bioportal.bioontology.org) via the NCBO Web services. BioPortal enables community participation in the evaluation and evolution of ontology content by providing features to add mappings between terms, to add comments linked to specific ontology terms and to provide ontology reviews. The NCBO Web services (http://www.bioontology.org/wiki/index.php/NCBO_REST_services) enable this functionality and provide a uniform mechanism to access ontologies from a variety of knowledge representation formats, such as Web Ontology Language (OWL) and Open Biological and Biomedical Ontologies (OBO) format. The Web services provide multi-layered access to the ontology content, from getting all terms in an ontology to retrieving metadata about a term. Users can easily incorporate the NCBO Web services into software applications to generate semantically aware applications and to facilitate structured data collection.",BioPortal,0.977196991,NA,0,BioPortal,0.977196991,1,NA,25214827,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,6/14/2011 +22139929,"http://www.ncbi.nlm.nih.gov/bioproject, http://www.ncbi.nlm.nih.gov/biosample","BioProject and BioSample databases at NCBI: facilitating capture and organization of metadata. As the volume and complexity of data sets archived at NCBI grow rapidly, so does the need to gather and organize the associated metadata. Although metadata has been collected for some archival databases, previously, there was no centralized approach at NCBI for collecting this information and using it across databases. The BioProject database was recently established to facilitate organization and classification of project data submitted to NCBI, EBI and DDBJ databases. It captures descriptive information about research projects that result in high volume submissions to archival databases, ties together related data across multiple archives and serves as a central portal by which to inform users of data availability. Concomitantly, the BioSample database is being developed to capture descriptive information about the biological samples investigated in projects. BioProject and BioSample records link to corresponding data stored in archival repositories. Submissions are supported by a web-based Submission Portal that guides users through a series of forms for input of rich metadata describing their projects and samples. Together, these databases offer improved ways for users to query, locate, integrate and interpret the masses of data held in NCBI's archival repositories. The BioProject and BioSample databases are available at http://www.ncbi.nlm.nih.gov/bioproject and http://www.ncbi.nlm.nih.gov/biosample, respectively.",BioProject,0.682163715,NA,0,BioProject,0.682163715,1,NA,NA,low_prob_best_name,do not remove,NA,NA,TRUE POS: two resources; name and URL of first will be correct; second is lost,NA,NA,12/1/2011 +30407529,"http://www.ebi.ac.uk/biosamples, http://www.ebi.ac.uk/about/terms-of-use","BioSamples database: an updated sample metadata hub. The BioSamples database at EMBL-EBI provides a central hub for sample metadata storage and linkage to other EMBL-EBI resources. BioSamples has recently undergone major changes, both in terms of data content and supporting infrastructure. The data content has more than doubled from around 2 million samples in 2014 to just over 5 million samples in 2018. Fast, reciprocal data exchange was fully established between sister Biosample databases and other INSDC partners, enabling a worldwide common representation and centralization of sample metadata. The BioSamples platform has been upgraded to accommodate anticipated increases in the number of submissions via GA4GH driver projects such as the Human Cell Atlas and the EGA, as well as from mirroring of NCBI dbGaP data. The BioSamples database is now the authoritative repository for all INSDC sample metadata, an ELIXIR Deposition Database for Biomolecular Data and the EMBL-EBI sample metadata hub. To support faster turnaround for sample submission, and to increase scalability and resilience, we have upgraded the BioSamples database backend storage, APIs and user interface. Finally, the website has been redesigned to allow search and retrieval of records based on specific filters, such as 'disease' or 'organism'. These changes are targeted at answering current use cases as well as providing functionalities for future emerging and anticipated developments. Availability: The BioSamples database is freely available at http://www.ebi.ac.uk/biosamples. Content is distributed under the EMBL-EBI Terms of Use available at https://www.ebi.ac.uk/about/terms-of-use.",BioSamples,0.992277086,NA,0,BioSamples,0.992277086,1,"22096232.0, 24265224.0",24265224,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +24265224,http://www.ebi.ac.uk/biosamples,"Updates to BioSamples database at European Bioinformatics Institute. The BioSamples database at the EBI (http://www.ebi.ac.uk/biosamples) provides an integration point for BioSamples information between technology specific databases at the EBI, projects such as ENCODE and reference collections such as cell lines. The database delivers a unified query interface and API to query sample information across EBI's databases and provides links back to assay databases. Sample groups are used to manage related samples, e.g. those from an experimental submission, or a single reference collection. Infrastructural improvements include a new user interface with ontological and key word queries, a new query API, a new data submission API, complete RDF data download and a supporting SPARQL endpoint, accessioning at the point of submission to the European Nucleotide Archive and European Genotype Phenotype Archives and improved query response times.",BioSamples,0.893052816,NA,0,BioSamples,0.893052816,1,"22096232.0, 30407529.0",30407529,low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/21/2013 +22096232,http://www.ebi.ac.uk/biosamples,"The BioSample Database (BioSD) at the European Bioinformatics Institute. The BioSample Database (http://www.ebi.ac.uk/biosamples) is a new database at EBI that stores information about biological samples used in molecular experiments, such as sequencing, gene expression or proteomics. The goals of the BioSample Database include: (i) recording and linking of sample information consistently within EBI databases such as ENA, ArrayExpress and PRIDE; (ii) minimizing data entry efforts for EBI database submitters by enabling submitting sample descriptions once and referencing them later in data submissions to assay databases and (iii) supporting cross database queries by sample characteristics. Each sample in the database is assigned an accession number. The database includes a growing set of reference samples, such as cell lines, which are repeatedly used in experiments and can be easily referenced from any database by their accession numbers. Accession numbers for the reference samples will be exchanged with a similar database at NCBI. The samples in the database can be queried by their attributes, such as sample types, disease names or sample providers. A simple tab-delimited format facilitates submissions of sample information to the database, initially via email to biosamples@ebi.ac.uk.",BioSD,0.614407798,BioSample Database,0.469798426,BioSD,0.614407798,1,"24265224.0, 30407529.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,11/16/2011 +27189610,"http://www.biosharing.org, http://www.biosharing.org","BioSharing: curated and crowd-sourced metadata standards, databases and data policies in the life sciences. . BioSharing (http://www.biosharing.org) is a manually curated, searchable portal of three linked registries. These resources cover standards (terminologies, formats and models, and reporting guidelines), databases, and data policies in the life sciences, broadly encompassing the biological, environmental and biomedical sciences. Launched in 2011 and built by the same core team as the successful MIBBI portal, BioSharing harnesses community curation to collate and cross-reference resources across the life sciences from around the world. BioSharing makes these resources findable and accessible (the core of the FAIR principle). Every record is designed to be interlinked, providing a detailed description not only on the resource itself, but also on its relations with other life science infrastructures. Serving a variety of stakeholders, BioSharing cultivates a growing community, to which it offers diverse benefits. It is a resource for funding bodies and journal publishers to navigate the metadata landscape of the biological sciences; an educational resource for librarians and information advisors; a publicising platform for standard and database developers/curators; and a research tool for bench and computer scientists to plan their work. BioSharing is working with an increasing number of journals and other registries, for example linking standards and databases to training material and tools. Driven by an international Advisory Board, the BioSharing user-base has grown by over 40% (by unique IP address), in the last year thanks to successful engagement with researchers, publishers, librarians, developers and other stakeholders via several routes, including a joint RDA/Force11 working group and a collaboration with the International Society for Biocuration. In this article, we describe BioSharing, with a particular focus on community-led curation.Database URL: https://www.biosharing.org.",BioSharing,0.98941499,NA,0,BioSharing,0.98941499,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/17/2016 +26820405,http://brd.bsvgateway.org/brd,"The Biosurveillance Analytics Resource Directory (BARD): Facilitating the Use of Epidemiological Models for Infectious Disease Surveillance. Epidemiological modeling for infectious disease is important for disease management and its routine implementation needs to be facilitated through better description of models in an operational context. A standardized model characterization process that allows selection or making manual comparisons of available models and their results is currently lacking. A key need is a universal framework to facilitate model description and understanding of its features. Los Alamos National Laboratory (LANL) has developed a comprehensive framework that can be used to characterize an infectious disease model in an operational context. The framework was developed through a consensus among a panel of subject matter experts. In this paper, we describe the framework, its application to model characterization, and the development of the Biosurveillance Analytics Resource Directory (BARD; http://brd.bsvgateway.org/brd/), to facilitate the rapid selection of operational models for specific infectious/communicable diseases. We offer this framework and associated database to stakeholders of the infectious disease modeling field as a tool for standardizing model description and facilitating the use of epidemiological models.",ARD,0.646821141,Biosurveillance Analytics Resource Directory,0.863397956,Biosurveillance Analytics Resource Directory,0.863397956,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/28/2016 +23550138,http://arup.utah.edu/database/BTD/BTD_welcome.php,"The Biotinidase Gene Variants Registry: A Paradigm Public Database. The BTD gene codes for production of biotinidase, the enzyme responsible for helping the body reuse and recycle the biotin found in foods. Biotinidase deficiency is an autosomal recessively inherited disorder resulting in the inability to recycle the vitamin biotin and affects approximately 1 in 60,000 newborns. If untreated, the depletion of intracellular biotin leads to impaired activities of the biotin-dependent carboxylases and can result in cutaneous and neurological abnormalities in individuals with the disorder. Mutations in the biotinidase gene (BTD) alter enzymatic function. To date, more than 165 mutations in BTD have been reported. Our group has developed a database that characterizes the known mutations and sequence variants in BTD (http://arup.utah.edu/database/BTD/BTD_welcome.php). All sequence variants have been verified for their positions within the BTD gene and designated according to standard nomenclature suggested by Human Genome Variation Society (HGVS). In addition, we describe the change in the protein, indicate whether the variant is a known or likely mutation vs. a benign polymorphism, and include the reference that first described the alteration. We also indicate whether the alteration is known to be clinically pathological based on an observation of a known symptomatic individual or predicted to be pathological based on enzymatic activity or putative disruption of the protein structure. We incorporated the published phenotype to help establish genotype-phenotype correlations and facilitate this process for those performing mutation analysis and/or interpreting results. Other features of this database include disease information, relevant links about biotinidase deficiency, reference sequences, ability to query by various criteria, and the process for submitting novel variations. This database is free to the public and will be updated quarterly. This database is a paradigm for formulating databases for other inherited metabolic disorders.",NA,0,Biotinidase Gene Variants Registry,0.961240504,Biotinidase Gene Variants Registry,0.961240504,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/9/2013 +28423831,http://purl.org/biotop,"The BioTop Family of Upper Level Ontological Resources for Biomedicine. BioTop is a domain upper level ontology for the life sciences, based on OWL DL, introduced ten years ago. This paper provides an update of the current state of this resource, with a special focus on BioTop's top level, BioTopLite, which currently contains 55 classes, 37 object properties and 247 description logics axioms. A bridging file allows harmonising BioTopLite with the classes of Basic Formal Ontology BFO2. The updated OWL resources are available at http://purl.org/biotop. They build the core of several upper level ontological artefacts including bridging ontologies to other upper level resources.",BioTop,0.992148519,NA,0,BioTop,0.992148519,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +32369809,http://sites.unipampa.edu.br/birdchromosomedatabase,"Introducing the Bird Chromosome Database: An Overview of Cytogenetic Studies in Birds. Bird chromosomes, which have been investigated scientifically for more than a century, present a number of unique features. In general, bird karyotypes have a high diploid number (2n) of typically around 80 chromosomes that are divided into macro- and microchromosomes. In recent decades, FISH studies using whole chromosome painting probes have shown that the macrochromosomes evolved through both inter- and intrachromosomal rearrangements. However, chromosome painting data are available for only a few bird species, which hinders a more systematic approach to the understanding of the evolutionary history of the enigmatic bird karyotype. Thus, we decided to create an innovative database through compilation of the cytogenetic data available for birds, including chromosome numbers and the results of chromosome painting with chicken (Gallus gallus) probes. The data were obtained through an extensive literature review, which focused on cytogenetic studies published up to 2019. In the first version of the ""Bird Chromosome Database (BCD)"" (https://sites.unipampa.edu.br/birdchromosomedatabase) we have compiled data on the chromosome numbers of 1,067 bird species and chromosome painting data on 96 species. We found considerable variation in the diploid numbers, which ranged from 40 to 142, although most (around 50%) of the species studied up to now have between 78 and 82 chromosomes. Despite its importance for cytogenetic research, chromosome painting has been applied to less than 1% of all bird species. The BCD will enable researchers to identify the main knowledge gaps in bird cytogenetics, including the most under-sampled groups, and make inferences on chromosomal homologies in phylogenetic studies.",BCD,0.883974791,Bird Chromosome Database,0.89551114,Bird Chromosome Database,0.89551114,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/6/2020 +23390356,http://www.bioterrorism.biowaves.org,"BIRS - Bioterrorism Information Retrieval System. Unlabelled Bioterrorism is the intended use of pathogenic strains of microbes to widen terror in a population. There is a definite need to promote research for development of vaccines, therapeutics and diagnostic methods as a part of preparedness to any bioterror attack in the future. BIRS is an open-access database of collective information on the organisms related to bioterrorism. The architecture of database utilizes the current open-source technology viz PHP ver 5.3.19, MySQL and IIS server under windows platform for database designing. Database stores information on literature, generic- information and unique pathways of about 10 microorganisms involved in bioterrorism. This may serve as a collective repository to accelerate the drug discovery and vaccines designing process against such bioterrorist agents (microbes). The available data has been validated from various online resources and literature mining in order to provide the user with a comprehensive information system. Availability The database is freely available at http://www.bioterrorism.biowaves.org.",BIRS,0.993844032,BIRS - Bioterrorism Information Retrieval System,0.7868629,BIRS,0.993844032,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/18/2013 +30357384,http://bitterdb.agri.huji.ac.il,"BitterDB: taste ligands and receptors database in 2019. BitterDB (http://bitterdb.agri.huji.ac.il) was introduced in 2012 as a central resource for information on bitter-tasting molecules and their receptors. The information in BitterDB is frequently used for choosing suitable ligands for experimental studies, for developing bitterness predictors, for analysis of receptors promiscuity and more. Here, we describe a major upgrade of the database, including significant increase in content as well as new features. BitterDB now holds over 1000 bitter molecules, up from the initial 550. When available, quantitative sensory data on bitterness intensity as well as toxicity information were added. For 270 molecules, at least one associated bitter taste receptor (T2R) is reported. The overall number of ligand-T2R associations is now close to 800. BitterDB was extended to several species: in addition to human, it now holds information on mouse, cat and chicken T2Rs, and the compounds that activate them. BitterDB now provides a unique platform for structure-based studies with high-quality homology models, known ligands, and for the human receptors also data from mutagenesis experiments, information on frequently occurring single nucleotide polymorphisms and links to expression levels in different tissues.",BitterDB,0.998442054,NA,0,BitterDB,0.998442054,1,NA,21940398,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2019 +21940398,http://bitterdb.agri.huji.ac.il/bitterdb,"BitterDB: a database of bitter compounds. Basic taste qualities like sour, salty, sweet, bitter and umami serve specific functions in identifying food components found in the diet of humans and animals, and are recognized by proteins in the oral cavity. Recognition of bitter taste and aversion to it are thought to protect the organism against the ingestion of poisonous food compounds, which are often bitter. Interestingly, bitter taste receptors are expressed not only in the mouth but also in extraoral tissues, such as the gastrointestinal tract, indicating that they may play a role in digestive and metabolic processes. BitterDB database, available at http://bitterdb.agri.huji.ac.il/bitterdb/, includes over 550 compounds that were reported to taste bitter to humans. The compounds can be searched by name, chemical structure, similarity to other bitter compounds, association with a particular human bitter taste receptor, and so on. The database also contains information on mutations in bitter taste receptors that were shown to influence receptor activation by bitter compounds. The aim of BitterDB is to facilitate studying the chemical features associated with bitterness. These studies may contribute to predicting bitterness of unknown compounds, predicting ligands for bitter receptors from different species and rational design of bitterness modulators.",BitterDB,0.998117387,NA,0,BitterDB,0.998117387,1,NA,30357384,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,9/22/2011 +24264865,http://agknapp.chemie.fu-berlin.de/bivalbind,"Database of protein complexes with multivalent binding ability: Bival-Bind. Phenomena of multivalent binding of ligands with receptors are ubiquitous in biology and of growing interest in material sciences. Multivalency can enhance binding affinity dramatically. To understand the mechanism of multivalent binding in more detail model systems of bi- and multivalent receptors are needed, but are difficult to find. Furthermore it is useful to know about multivalent receptors, which can serve as targets to design multivalent drugs. The present contribution tries to close this gap. The Bival-Bind database (http://agknapp.chemie.fu-berlin.de/bivalbind) provides a relatively complete list - 2073 protein complexes with less than 90% sequence identity - out of the protein database, which can serve as bi- or multivalent receptors. Steric clashes of molecular spacers - necessary to connect the monomeric ligand units - with the receptor surface can diminish binding affinity dramatically and, thus, abolish the expected enhancement of binding affinity due to the multivalency. The potential multivalent receptors in the Bival-Bind database are characterized with respect to the receptor surface topography. A height profile between the receptor binding pockets is provided, which is an important information to estimate the influence of unfavorable spacer receptor interaction.",Bival-Bind,0.987922519,NA,0,Bival-Bind,0.987922519,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/22/2013 +24185696,http://www.med.unsw.edu.au/CRCWeb.nsf/page/BloodChIP,"BloodChIP: a database of comparative genome-wide transcription factor binding profiles in human blood cells. The BloodChIP database (http://www.med.unsw.edu.au/CRCWeb.nsf/page/BloodChIP) supports exploration and visualization of combinatorial transcription factor (TF) binding at a particular locus in human CD34-positive and other normal and leukaemic cells or retrieval of target gene sets for user-defined combinations of TFs across one or more cell types. Increasing numbers of genome-wide TF binding profiles are being added to public repositories, and this trend is likely to continue. For the power of these data sets to be fully harnessed by experimental scientists, there is a need for these data to be placed in context and easily accessible for downstream applications. To this end, we have built a user-friendly database that has at its core the genome-wide binding profiles of seven key haematopoietic TFs in human stem/progenitor cells. These binding profiles are compared with binding profiles in normal differentiated and leukaemic cells. We have integrated these TF binding profiles with chromatin marks and expression data in normal and leukaemic cell fractions. All queries can be exported into external sites to construct TF-gene and protein-protein networks and to evaluate the association of genes with cellular processes and tissue expression.",BloodChIP,0.998485148,NA,0,BloodChIP,0.998485148,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2013 +27623959,http://gene.cqu.edu.cn/BmncRNAdb/index.php,"BmncRNAdb: a comprehensive database of non-coding RNAs in the silkworm, Bombyx mori. Background Long non-coding RNAs (lncRNAs) may play critical roles in a wide range of developmental processes of higher organisms. Recently, lncRNAs have been widely identified across eukaryotes and many databases of lncRNAs have been developed for human, mouse, fruit fly, etc. However, there is rare information about them in the only completely domesticated insect, silkworm (Bombyx mori). Description In this study, we systematically scanned lncRNAs using the available silkworm RNA-seq data and public unigenes. Finally, we identified and collected 6281 lncRNAs in the silkworm. Besides, we also collected 1986 microRNAs (miRNAs) from previous studies. Then, we organized them into a comprehensive and web-based database, BmncRNAdb. This database offers a user-friendly interface for data browse and online analysis as well as the three online tools for users to predict the target genes of lncRNA or miRNA. Conclusions We have systematically identified and collected the silkworm lncRNAs and constructed a comprehensive database of the silkworm lncRNAs and miRNAs. This work gives a glimpse into lncRNAs of the silkworm and lays foundations for the ncRNAs study of the silkworm and other insects in the future. The BmncRNAdb is freely available at http://gene.cqu.edu.cn/BmncRNAdb/index.php .",BmncRNAdb,0.86746788,NA,0,BmncRNAdb,0.86746788,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/13/2016 +23886610,http://gene.cqu.edu.cn/BmTEdb,"BmTEdb: a collective database of transposable elements in the silkworm genome. The silkworm, Bombyx mori, is one of the major insect model organisms, and its draft and fine genome sequences became available in 2004 and 2008, respectively. Transposable elements (TEs) constitute ~40% of the silkworm genome. To better understand the roles of TEs in organization, structure and evolution of the silkworm genome, we used a combination of de novo, structure-based and homology-based approaches for identification of the silkworm TEs and identified 1308 silkworm TE families. These TE families and their classification information were organized into a comprehensive and easy-to-use web-based database, BmTEdb. Users are entitled to browse, search and download the sequences in the database. Sequence analyses such as BLAST, HMMER and EMBOSS GetORF were also provided in BmTEdb. This database will facilitate studies for the silkworm genomics, the TE functions in the silkworm and the comparative analysis of the insect TEs. Database URL: http://gene.cqu.edu.cn/BmTEdb/.",BmTEdb,0.981323123,NA,0,BmTEdb,0.981323123,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/25/2013 +28365726,http://sites.biology.duke.edu/windhamlab,"Boechera microsatellite website: an online portal for species identification and determination of hybrid parentage. . Boechera (Brassicaceae) has many features to recommend it as a model genus for ecological and evolutionary research, including species richness, ecological diversity, experimental tractability and close phylogenetic proximity to Arabidopsis . However, efforts to realize the full potential of this model system have been thwarted by the frequent inability of researchers to identify their samples and place them in a broader evolutionary context. Here we present the Boechera Microsatellite Website (BMW), a portal that archives over 55 000 microsatellite allele calls from 4471 specimens (including 133 nomenclatural types). The portal includes analytical tools that utilize data from 15 microsatellite loci as a highly effective DNA barcoding system. The BMW facilitates the accurate identification of Boechera samples and the investigation of reticulate evolution among the √ɬÉ√ǬÇ√ɬÇ√Ǭ±83 sexual diploid taxa in the genus, thereby greatly enhancing Boechera 's potential as a model system. http://sites.biology.duke.edu/windhamlab/.",BMW,0.984817743,Boechera Microsatellite Website,0.962700583,BMW,0.984817743,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +24214957,http://bNAber.org,"bNAber: database of broadly neutralizing HIV antibodies. The discovery of broadly neutralizing antibodies (bNAbs) has provided an enormous impetus to the HIV vaccine research and to entire immunology. The bNAber database at http://bNAber.org provides open, user-friendly access to detailed data on the rapidly growing list of HIV bNAbs, including neutralization profiles, sequences and three-dimensional structures (when available). It also provides an extensive list of visualization and analysis tools, such as heatmaps to analyse neutralization data as well as structure and sequence viewers to correlate bNAbs properties with structural and sequence features of individual antibodies. The goal of the bNAber database is to enable researchers in this field to easily compare and analyse available information on bNAbs thereby supporting efforts to design an effective vaccine for HIV/AIDS. The bNAber database not only provides easy access to data that currently is scattered in the Supplementary Materials sections of individual papers, but also contributes to the development of general standards of data that have to be presented with the discovery of new bNAbs and a universal mechanism of how such data can be shared.",bNAber,0.994556367,NA,0,bNAber,0.994556367,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/7/2013 +33399824,http://rapeseed.biocloud.net/home,"BnaGVD: A genomic variation database of rapeseed (Brassica napus). . Rapeseed (Brassica napus L.) is a typical polyploid crop and one of the most important oilseed crops worldwide. With the rapid progress on high-throughput sequencing technologies and the reduction of sequencing cost, large-scale genomic data of a specific crop have become available. However, raw sequence data are mostly deposited in the sequence read archive of the National Center of Biotechnology Information (NCBI) and the European Nucleotide Archive (ENA), which is freely accessible to all researchers. Extensive tools for practical purposes should be developed to efficiently utilize these large raw data. Here, we report a web-based rapeseed genomic variation database (BnaGVD, http://rapeseed.biocloud.net/home) from which genomic variations, such as single nucleotide polymorphisms (SNPs) and insertions/deletions (InDels) across a world-wide collection of rapeseed accessions, can be referred. The current release of the BnaGVD contains 34,591,899 high-quality SNPs and 12,281,923 high-quality InDels and provides search tools to retrieve genomic variations and gene annotations across 1,007 accessions of worldwide rapeseed germplasm. We implement a variety of built-in tools (e.g., BnaGWAS, BnaPCA, and BnaStructure) to help users perform in-depth analyses. We recommend this web resource for accelerating studies on the functional genomics and screening of molecular markers for rapeseed breeding.",BnaGVD,0.99503864,rapeseed genomic variation database,0.979584813,BnaGVD,0.99503864,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/5/2021 +24079801,http://ocri-genomics.org/bolbase,"Bolbase: a comprehensive genomics database for Brassica oleracea. Background Brassica oleracea is a morphologically diverse species in the family Brassicaceae and contains a group of nutrition-rich vegetable crops, including common heading cabbage, cauliflower, broccoli, kohlrabi, kale, Brussels sprouts. This diversity along with its phylogenetic membership in a group of three diploid and three tetraploid species, and the recent availability of genome sequences within Brassica provide an unprecedented opportunity to study intra- and inter-species divergence and evolution in this species and its close relatives. Description We have developed a comprehensive database, Bolbase, which provides access to the B. oleracea genome data and comparative genomics information. The whole genome of B. oleracea is available, including nine fully assembled chromosomes and 1,848 scaffolds, with 45,758 predicted genes, 13,382 transposable elements, and 3,581 non-coding RNAs. Comparative genomics information is available, including syntenic regions among B. oleracea, Brassica rapa and Arabidopsis thaliana, synonymous (Ks) and non-synonymous (Ka) substitution rates between orthologous gene pairs, gene families or clusters, and differences in quantity, category, and distribution of transposable elements on chromosomes. Bolbase provides useful search and data mining tools, including a keyword search, a local BLAST server, and a customized GBrowse tool, which can be used to extract annotations of genome components, identify similar sequences and visualize syntenic regions among species. Users can download all genomic data and explore comparative genomics in a highly visual setting. Conclusions Bolbase is the first resource platform for the B. oleracea genome and for genomic comparisons with its relatives, and thus it will help the research community to better study the function and evolution of Brassica genomes as well as enhance molecular breeding research. This database will be updated regularly with new features, improvements to genome annotation, and new genomic sequences as they become available. Bolbase is freely available at http://ocri-genomics.org/bolbase.",Bolbase,0.990760982,NA,0,Bolbase,0.990760982,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/30/2013 +28453653,http://bar.biocomp.unibo.it/bar3,"The Bologna Annotation Resource (BAR 3.0): improving protein functional annotation. BAR 3.0 updates our server BAR (Bologna Annotation Resource) for predicting protein structural and functional features from sequence. We increase data volume, query capabilities and information conveyed to the user. The core of BAR 3.0 is a graph-based clustering procedure of UniProtKB sequences, following strict pairwise similarity criteria (sequence identity √ɬÉ√Ǭ¢√ɬÇ√Ǭâ√ɬÇ√Ǭ•40% with alignment coverage √ɬÉ√Ǭ¢√ɬÇ√Ǭâ√ɬÇ√Ǭ•90%). Each cluster contains the available annotation downloaded from UniProtKB, GO, PFAM and PDB. After statistical validation, GO terms and PFAM domains are cluster-specific and annotate new sequences entering the cluster after satisfying similarity constraints. BAR 3.0 includes 28 869 663 sequences in 1 361 773 clusters, of which 22.2% (22 241 661 sequences) and 47.4% (24 555 055 sequences) have at least one validated GO term and one PFAM domain, respectively. 1.4% of the clusters (36% of all sequences) include PDB structures and the cluster is associated to a hidden Markov model that allows building template-target alignment suitable for structural modeling. Some other 3 399 026 sequences are singletons. BAR 3.0 offers an improved search interface, allowing queries by UniProtKB-accession, Fasta sequence, GO-term, PFAM-domain, organism, PDB and ligand/s. When evaluated on the CAFA2 targets, BAR 3.0 largely outperforms our previous version and scores among state-of-the-art methods. BAR 3.0 is publicly available and accessible at http://bar.biocomp.unibo.it/bar3.",NA,0,Bologna Annotation Resource,0.698403805,Bologna Annotation Resource,0.698403805,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2017 +31958638,http://bomiprot.org,"BoMiProt: A database of bovine milk proteins. Bovine milk has become an important biological fluid for proteomic research due to its nutritional and immunological benefits. To date, over 300 publications have reported changes in bovine milk protein composition based on seasons, lactation stages, breeds, health status and milk fractions while there are no reports on consolidation or overlap of data between studies. Thus, we have developed a literature-based, manually curated open online database of bovine milk proteome, BoMiProt (http://bomiprot.org), with over 3100 proteins from whey, fat globule membranes and exosomes. Each entry in the database is thoroughly cross-referenced including 397 proteins with well-defined information on protein function, biochemical properties, post-translational modifications and significance in milk from different publications. Of 397 proteins, over 199 have been reported with a structural gallery of homology models and crystal structures in the database. The proteome data can be retrieved using several search parameters such as protein name, accession IDs, FASTA sequence. Furthermore, the proteome data can be filtered based on milk fractions, post-translational modifications and/or structures. Taken together, BoMiProt represents an extensive compilation of bovine milk proteins from literature, providing a foundation for future studies to identify specific milk proteins which may be linked to mammary gland pathophysiology. BIOLOGICAL SIGNIFICANCE: Protein data identified from different previously published proteomic studies on bovine milk samples (21 publications) were gathered in the BoMiProt database. Unification of the identified proteins will give researchers an initial reference database on bovine milk proteome to understand the complexities of milk as a biological fluid. BoMiProt has a user-friendly interface with several useful features, including different search criteria for primary and secondary information of proteins along with cross-references to external databases. The database will provide insights into the existing literature and possible future directions to investigate further and improve the beneficial effects of bovine milk components and dairy products on human health.",BoMiProt,0.996749759,NA,0,BoMiProt,0.996749759,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/17/2020 +23203889,http://www.ncbi.nlm.nih.gov/books,"NCBI Bookshelf: books and documents in life sciences and health care. Bookshelf (http://www.ncbi.nlm.nih.gov/books/) is a full-text electronic literature resource of books and documents in life sciences and health care at the National Center for Biotechnology Information (NCBI). Created in 1999 with a single book as an encyclopedic reference for resources such as PubMed and GenBank, it has grown to its current size of >1300 titles. Unlike other NCBI databases, such as GenBank and Gene, which have a strict data structure, books come in all forms; they are diverse in publication types, formats, sizes and authoring models. The Bookshelf data format is XML tagged in the NCBI Book DTD (Document Type Definition), modeled after the National Library of Medicine journal article DTDs. The book DTD has been used for systematically tagging the diverse data formats of books, a move that has set the foundation for the growth of this resource. Books at NCBI followed the route of journal articles in the PubMed Central project, using the PubMed Central architectural framework, workflows and processes. Through integration with other NCBI molecular databases, books at NCBI can be used to provide reference information for biological data and facilitate its discovery. This article describes Bookshelf at NCBI: its growth, data handling and retrieval and integration with molecular databases.",Bookshelf,0.957379818,NA,0,Bookshelf,0.957379818,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/29/2012 +24994456,http://borreliabase.org,"BorreliaBase: a phylogeny-centered browser of Borrelia genomes. Background The bacterial genus Borrelia (phylum Spirochaetes) consists of two groups of pathogens represented respectively by B. burgdorferi, the agent of Lyme borreliosis, and B. hermsii, the agent of tick-borne relapsing fever. The number of publicly available Borrelia genomic sequences is growing rapidly with the discovery and sequencing of Borrelia strains worldwide. There is however a lack of dedicated online databases to facilitate comparative analyses of Borrelia genomes. Description We have developed BorreliaBase, an online database for comparative browsing of Borrelia genomes. The database is currently populated with sequences from 35 genomes of eight Lyme-borreliosis (LB) group Borrelia species and 7 Relapsing-fever (RF) group Borrelia species. Distinct from genome repositories and aggregator databases, BorreliaBase serves manually curated comparative-genomic data including genome-based phylogeny, genome synteny, and sequence alignments of orthologous genes and intergenic spacers. Conclusions With a genome phylogeny at its center, BorreliaBase allows online identification of hypervariable lipoprotein genes, potential regulatory elements, and recombination footprints by providing evolution-based expectations of sequence variability at each genomic locus. The phylo-centric design of BorreliaBase (http://borreliabase.org) is a novel model for interactive browsing and comparative analysis of bacterial genomes online.",BorreliaBase,0.995369911,NA,0,BorreliaBase,0.995369911,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/3/2014 +21995777,http://brassicadb.org,"BRAD, the genetics and genomics database for Brassica plants. Background Brassica species include both vegetable and oilseed crops, which are very important to the daily life of common human beings. Meanwhile, the Brassica species represent an excellent system for studying numerous aspects of plant biology, specifically for the analysis of genome evolution following polyploidy, so it is also very important for scientific research. Now, the genome of Brassica rapa has already been assembled, it is the time to do deep mining of the genome data. Description BRAD, the Brassica database, is a web-based resource focusing on genome scale genetic and genomic data for important Brassica crops. BRAD was built based on the first whole genome sequence and on further data analysis of the Brassica A genome species, Brassica rapa (Chiifu-401-42). It provides datasets, such as the complete genome sequence of B. rapa, which was de novo assembled from Illumina GA II short reads and from BAC clone sequences, predicted genes and associated annotations, non coding RNAs, transposable elements (TE), B. rapa genes' orthologous to those in A. thaliana, as well as genetic markers and linkage maps. BRAD offers useful searching and data mining tools, including search across annotation datasets, search for syntenic or non-syntenic orthologs, and to search the flanking regions of a certain target, as well as the tools of BLAST and Gbrowse. BRAD allows users to enter almost any kind of information, such as a B. rapa or A. thaliana gene ID, physical position or genetic marker. Conclusion BRAD, a new database which focuses on the genetics and genomics of the Brassica plants has been developed, it aims at helping scientists and breeders to fully and efficiently use the information of genome data of Brassica plants. BRAD will be continuously updated and can be accessed through http://brassicadb.org.",BRAD,0.977056324,Brassica database,0.716363907,BRAD,0.977056324,1,NA,26589635,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/13/2011 +26589635,http://brassicadb.org/brad,"Brassica database (BRAD) version 2.0: integrating and mining Brassicaceae species genomic resources. . The Brassica database (BRAD) was built initially to assist users apply Brassica rapa and Arabidopsis thaliana genomic data efficiently to their research. However, many Brassicaceae genomes have been sequenced and released after its construction. These genomes are rich resources for comparative genomics, gene annotation and functional evolutionary studies of Brassica crops. Therefore, we have updated BRAD to version 2.0 (V2.0). In BRAD V2.0, 11 more Brassicaceae genomes have been integrated into the database, namely those of Arabidopsis lyrata, Aethionema arabicum, Brassica oleracea, Brassica napus, Camelina sativa, Capsella rubella, Leavenworthia alabamica, Sisymbrium irio and three extremophiles Schrenkiella parvula, Thellungiella halophila and Thellungiella salsuginea. BRAD V2.0 provides plots of syntenic genomic fragments between pairs of Brassicaceae species, from the level of chromosomes to genomic blocks. The Generic Synteny Browser (GBrowse_syn), a module of the Genome Browser (GBrowse), is used to show syntenic relationships between multiple genomes. Search functions for retrieving syntenic and non-syntenic orthologs, as well as their annotation and sequences are also provided. Furthermore, genome and annotation information have been imported into GBrowse so that all functional elements can be visualized in one frame. We plan to continually update BRAD by integrating more Brassicaceae genomes into the database. Database URL: http://brassicadb.org/brad/.",BRAD,0.961567163,Brassica database,0.860853076,BRAD,0.961567163,1,NA,21995777,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/20/2015 +25784642,http://tumorsdatabase.altervista.org,"Brain Tumor Database, a free relational database for collection and analysis of brain tumor patient information. In this study, we describe the development and utilization of a relational database designed to manage the clinical and radiological data of patients with brain tumors. The Brain Tumor Database was implemented using MySQL v.5.0, while the graphical user interface was created using PHP and HTML, thus making it easily accessible through a web browser. This web-based approach allows for multiple institutions to potentially access the database. The BT Database can record brain tumor patient information (e.g. clinical features, anatomical attributes, and radiological characteristics) and be used for clinical and research purposes. Analytic tools to automatically generate statistics and different plots are provided. The BT Database is a free and powerful user-friendly tool with a wide range of possible clinical and research applications in neurology and neurosurgery. The BT Database graphical user interface source code and manual are freely available at http://tumorsdatabase.altervista.org.",Brain Tumor,0.489937514,Brain Tumor Database,0.788560754,Brain Tumor Database,0.788560754,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/1/2015 +34528715,http://www.brain-uk.org,"BRAIN UK: Accessing NHS tissue archives for neuroscience research. The purpose of BRAIN UK (the UK BRain Archive Information Network) is to make the very extensive and comprehensive National Health Service (NHS) Neuropathology archives available to the national and international neuroscience research community. The archives comprise samples of tumours and a wide range of other neurological disorders, not only from the brain but also spinal cord, peripheral nerve, muscle, eye and other organs when relevant. BRAIN UK was founded after the recognition of the importance of this large tissue resource, which was not previously readily accessible for research use. BRAIN UK has successfully engaged the majority of the regional clinical neuroscience centres in the United Kingdom to produce a centralised database of the extensive autopsy and biopsy archive. Together with a simple application process and its broad ethical approval, BRAIN UK offers researchers easy access to most of the national archives of neurological tissues and tumours (http://www.brain-uk.org). The range of tissues available reflects the spectrum of disease in society, including many conditions not covered by disease-specific brain banks, and also allows relatively large numbers of cases of uncommon conditions to be studied. BRAIN UK has supported 141 studies (2010-2020) that have generated 70 publications employing methodology as diverse as morphometrics, genetics, proteomics and methylomics. Tissue samples that would otherwise have been unused have supported valuable neuroscience research. The importance of this unique resource will only increase as molecular techniques applicable to human tissues continue to develop and technical advances permit large-scale high-throughput studies.",BRAIN UK,0.898930291,NA,0,BRAIN UK,0.898930291,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/28/2021 +29985970,http://www.brainexp.org,"BrainEXP: a database featuring with spatiotemporal expression variations and co-expression organizations in human brains. Summary Gene expression changes over the lifespan and varies among different tissues or cell types. Gene co-expression also changes by sex, age, different tissues or cell types. However, gene expression under the normal state and gene co-expression in the human brain has not been fully defined and quantified. Here we present a database named Brain EXPression Database (BrainEXP) which provides spatiotemporal expression of individual genes and co-expression in normal human brains. BrainEXP consists of 4567 samples from 2863 healthy individuals gathered from existing public databases and our own data, in either microarray or RNA-Seq library types. We mainly provide two analysis results based on the large dataset: (i) basic gene expression across specific brain regions, age ranges and sexes; (ii) co-expression analysis from different platforms. Availability and implementation http://www.brainexp.org/. Supplementary information Supplementary data are available at Bioinformatics online.",BrainEXP,0.997224808,Brain EXPression Database,0.938273266,BrainEXP,0.997224808,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +26794641,"http://www.brainsimagebank.ac.uk, http://dicom.nema.org","A brain imaging repository of normal structural MRI across the life course: Brain Images of Normal Subjects (BRAINS). The Brain Images of Normal Subjects (BRAINS) Imagebank (http://www.brainsimagebank.ac.uk) is an integrated repository project hosted by the University of Edinburgh and sponsored by the Scottish Imaging Network: A Platform for Scientific Excellence (SINAPSE) collaborators. BRAINS provide sharing and archiving of detailed normal human brain imaging and relevant phenotypic data already collected in studies of healthy volunteers across the life-course. It particularly focusses on the extremes of age (currently older age, and in future perinatal) where variability is largest, and which are under-represented in existing databanks. BRAINS is a living imagebank where new data will be added when available. Currently BRAINS contains data from 808 healthy volunteers, from 15 to 81years of age, from 7 projects in 3 centres. Additional completed and ongoing studies of normal individuals from 1st to 10th decades are in preparation and will be included as they become available. BRAINS holds several MRI structural sequences, including T1, T2, T2* and fluid attenuated inversion recovery (FLAIR), available in DICOM (http://dicom.nema.org/); in future Diffusion Tensor Imaging (DTI) will be added where available. Images are linked to a wide range of 'textual data', such as age, medical history, physiological measures (e.g. blood pressure), medication use, cognitive ability, and perinatal information for pre/post-natal subjects. The imagebank can be searched to include or exclude ranges of these variables to create better estimates of 'what is normal' at different ages.",BRAINS,0.985277057,Brain Images of Normal Subjects,0.966778862,BRAINS,0.985277057,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/18/2016 +24259684,http://brassibase.cos.uni-heidelberg.de,"BrassiBase: introduction to a novel knowledge database on Brassicaceae evolution. The Brassicaceae family (mustards or crucifers) includes Arabidopsis thaliana as one of the most important model species in plant biology and a number of important crop plants such as the various Brassica species (e.g. cabbage, canola and mustard). Moreover, the family comprises an increasing number of species that serve as study systems in many fields of plant science and evolutionary research. However, the systematics and taxonomy of the family are very complex and access to scientifically valuable and reliable information linked to species and genus names and its interpretation are often difficult. BrassiBase is a continuously developing and growing knowledge database (http://brassibase.cos.uni-heidelberg.de) that aims at providing direct access to many different types of information ranging from taxonomy and systematics to phylo- and cytogenetics. Providing critically revised key information, the database intends to optimize comparative evolutionary research in this family and supports the introduction of the Brassicaceae as the model family for evolutionary biology and plant sciences. Some features that should help to accomplish these goals within a comprehensive taxonomic framework have now been implemented in the new version 1.1.9. A 'Phylogenetic Placement Tool' should help to identify critical accessions and germplasm and provide a first visualization of phylogenetic relationships. The 'Cytogenetics Tool' provides in-depth information on genome sizes, chromosome numbers and polyploidy, and sets this information into a Brassicaceae-wide context.",BrassiBase,0.991759181,NA,0,BrassiBase,0.991759181,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/19/2013 +24948109,http://im-crop.snu.ac.kr/BrassicaTED/index.php,"BrassicaTED - a public database for utilization of miniature transposable elements in Brassica species. Background MITE, TRIM and SINEs are miniature form transposable elements (mTEs) that are ubiquitous and dispersed throughout entire plant genomes. Tens of thousands of members cause insertion polymorphism at both the inter- and intra- species level. Therefore, mTEs are valuable targets and resources for development of markers that can be utilized for breeding, genetic diversity and genome evolution studies. Taking advantage of the completely sequenced genomes of Brassica rapa and B. oleracea, characterization of mTEs and building a curated database are prerequisite to extending their utilization for genomics and applied fields in Brassica crops. Findings We have developed BrassicaTED as a unique web portal containing detailed characterization information for mTEs of Brassica species. At present, BrassicaTED has datasets for 41 mTE families, including 5894 and 6026 members from 20 MITE families, 1393 and 1639 members from 5 TRIM families, 1270 and 2364 members from 16 SINE families in B. rapa and B. oleracea, respectively. BrassicaTED offers different sections to browse structural and positional characteristics for every mTE family. In addition, we have added data on 289 MITE insertion polymorphisms from a survey of seven Brassica relatives. Genes with internal mTE insertions are shown with detailed gene annotation and microarray-based comparative gene expression data in comparison with their paralogs in the triplicated B. rapa genome. This database also includes a novel tool, K BLAST (Karyotype BLAST), for clear visualization of the locations for each member in the B. rapa and B. oleracea pseudo-genome sequences. Conclusions BrassicaTED is a newly developed database of information regarding the characteristics and potential utility of mTEs including MITE, TRIM and SINEs in B. rapa and B. oleracea. The database will promote the development of desirable mTE-based markers, which can be utilized for genomics and breeding in Brassica species. BrassicaTED will be a valuable repository for scientists and breeders, promoting efficient research on Brassica species. BrassicaTED can be accessed at http://im-crop.snu.ac.kr/BrassicaTED/index.php.",BrassicaTED,0.992530644,NA,0,BrassicaTED,0.992530644,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/20/2014 +29136180,http://bioinformatics.breastcancertissuebank.org,"BCNTB bioinformatics: the next evolutionary step in the bioinformatics of breast cancer tissue banking. Here, we present an update of Breast Cancer Now Tissue Bank bioinformatics, a rich platform for the sharing, mining, integration and analysis of breast cancer data. Its modalities provide researchers with access to a centralised information gateway from which they can access a network of bioinformatic resources to query findings from publicly available, in-house and experimental data generated using samples supplied from the Breast Cancer Now Tissue Bank. This in silico environment aims to help researchers use breast cancer data to their full potential, irrespective of any bioinformatics barriers. For this new release, a complete overhaul of the IT and bioinformatic infrastructure underlying the portal has been conducted and a host of novel analytical modules established. We developed and adopted an automated data selection and prioritisation system, expanded the data content and included tissue and cell line data generated from The Cancer Genome Atlas and the Cancer Cell Line Encyclopedia, designed a host of novel analytical modalities and enhanced the query building process. Furthermore, the results are presented in an interactive format, providing researchers with greater control over the information on which they want to focus. Breast Cancer Now Tissue Bank bioinformatics can be accessed at http://bioinformatics.breastcancertissuebank.org/.",BCNTB,0.615942299,Breast Cancer Now Tissue Bank bioinformatics,0.752365947,Breast Cancer Now Tissue Bank bioinformatics,0.752365947,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +28821760,http://bioinformatics.cimap.res.in/sharma/boss,"A database of breast oncogenic specific siRNAs. Breast cancer is a serious problem causing the death of women across the world. At present, one of the major challenges is to design drugs to target breast cancer specific gene(s). RNA interference (RNAi) is an important technique for targeted gene silencing that may lead to promising novel therapeutic strategies for breast cancer. Therefore, identification of such molecules having high oncogene specificity is the need of the hour. Here, we have developed a database named as Breast Oncogenic Specific siRNAs (BOSS, http://bioinformatics.cimap.res.in/sharma/boss/ ) on the basis of the current research status on siRNA-mediated repression of oncogenes in different breast cancer cell lines. BOSS is a resource of experimentally validated breast oncogenic siRNAs, collected from research articles and patents published yet. The present database contains information on 865 breast oncogenic siRNA entries. Each entry provides comprehensive information of an siRNA that includes its name, sequence, target gene, type of cells, and inhibition value, etc. Additionally, some useful tools like siRNAMAP and BOSS BLAST were also developed and linked with the database. siRNAMAP can be used for the selection of best siRNA against a target gene while BOSS BLAST tool helps to locate the siRNA sequences in deferent oncogenes.",BOSS,0.972771386,Breast Oncogenic Specific siRNAs,0.987298205,Breast Oncogenic Specific siRNAs,0.987298205,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/18/2017 +26586806,http://brecandb.igib.res.in,"BreCAN-DB: a repository cum browser of personalized DNA breakpoint profiles of cancer genomes. BreCAN-DB (http://brecandb.igib.res.in) is a repository cum browser of whole genome somatic DNA breakpoint profiles of cancer genomes, mapped at single nucleotide resolution using deep sequencing data. These breakpoints are associated with deletions, insertions, inversions, tandem duplications, translocations and a combination of these structural genomic alterations. The current release of BreCAN-DB features breakpoint profiles from 99 cancer-normal pairs, comprising five cancer types. We identified DNA breakpoints across genomes using high-coverage next-generation sequencing data obtained from TCGA and dbGaP. Further, in these cancer genomes, we methodically identified breakpoint hotspots which were significantly enriched with somatic structural alterations. To visualize the breakpoint profiles, a next-generation genome browser was integrated with BreCAN-DB. Moreover, we also included previously reported breakpoint profiles from 138 cancer-normal pairs, spanning 10 cancer types into the browser. Additionally, BreCAN-DB allows one to identify breakpoint hotspots in user uploaded data set. We have also included a functionality to query overlap of any breakpoint profile with regions of user's interest. Users can download breakpoint profiles from the database or may submit their data to be integrated in BreCAN-DB. We believe that BreCAN-DB will be useful resource for genomics scientific community and is a step towards personalized cancer genomics.",BreCAN-DB,0.997648492,NA,0,BreCAN-DB,0.997648492,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/19/2015 +"23203881, 25378310, 33211880",http://www.brenda-enzymes.org,"BRENDA in 2013: integrated reactions, kinetic data, enzyme function data, improved disease classification: new options and contents in BRENDA. The BRENDA (BRaunschweig ENzyme DAtabase) enzyme portal (http://www.brenda-enzymes.org) is the main information system of functional biochemical and molecular enzyme data and provides access to seven interconnected databases. BRENDA contains 2.7 million manually annotated data on enzyme occurrence, function, kinetics and molecular properties. Each entry is connected to a reference and the source organism. Enzyme ligands are stored with their structures and can be accessed via their names, synonyms or via a structure search. FRENDA (Full Reference ENzyme DAta) and AMENDA (Automatic Mining of ENzyme DAta) are based on text mining methods and represent a complete survey of PubMed abstracts with information on enzymes in different organisms, tissues or organelles. The supplemental database DRENDA provides more than 910 000 new EC number-disease relations in more than 510 000 references from automatic search and a classification of enzyme-disease-related information. KENDA (Kinetic ENzyme DAta), a new amendment extracts and displays kinetic values from PubMed abstracts. The integration of the EnzymeDetector offers an automatic comparison, evaluation and prediction of enzyme function annotations for prokaryotic genomes. The biochemical reaction database BKM-react contains non-redundant enzyme-catalysed and spontaneous reactions and was developed to facilitate and accelerate the construction of biochemical models.",BRENDA,0.997791767,BRaunschweig ENzyme DAtabase,0.770335999,BRENDA,0.997791767,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +27164438,"http://www.dbtbrucellosis.in/brucellabase.html, http://59.99.226.203/brucellabase/homepage.html","BrucellaBase: Genome information resource. Brucella sp. causes a major zoonotic disease, brucellosis. Brucella belongs to the family Brucellaceae under the order Rhizobiales of Alphaproteobacteria. We present BrucellaBase, a web-based platform, providing features of a genome database together with unique analysis tools. We have developed a web version of the multilocus sequence typing (MLST) (Whatmore et al., 2007) and phylogenetic analysis of Brucella spp. BrucellaBase currently contains genome data of 510 Brucella strains along with the user interfaces for BLAST, VFDB, CARD, pairwise genome alignment and MLST typing. Availability of these tools will enable the researchers interested in Brucella to get meaningful information from Brucella genome sequences. BrucellaBase will regularly be updated with new genome sequences, new features along with improvements in genome annotations. BrucellaBase is available online at http://www.dbtbrucellosis.in/brucellabase.html or http://59.99.226.203/brucellabase/homepage.html.",BrucellaBase,0.996022701,NA,0,BrucellaBase,0.996022701,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/7/2016 +33539279,http://rth.dk/resources/bsgatlas,"BSGatlas: a unified Bacillus subtilis genome and transcriptome annotation atlas with enhanced information access. . A large part of our current understanding of gene regulation in Gram-positive bacteria is based on Bacillus subtilis, as it is one of the most well studied bacterial model systems. The rapid growth in data concerning its molecular and genomic biology is distributed across multiple annotation resources. Consequently, the interpretation of data from further B. subtilis experiments becomes increasingly challenging in both low- and large-scale analyses. Additionally, B. subtilis annotation of structured RNA and non-coding RNA (ncRNA), as well as the operon structure, is still lagging behind the annotation of the coding sequences. To address these challenges, we created the B. subtilis genome atlas, BSGatlas, which integrates and unifies multiple existing annotation resources. Compared to any of the individual resources, the BSGatlas contains twice as many ncRNAs, while improving the positional annotation for 70√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭä% of the ncRNAs. Furthermore, we combined known transcription start and termination sites with lists of known co-transcribed gene sets to create a comprehensive transcript map. The combination with transcription start/termination site annotations resulted in 717 new sets of co-transcribed genes and 5335 untranslated regions (UTRs). In comparison to existing resources, the number of 5' and 3' UTRs increased nearly fivefold, and the number of internal UTRs doubled. The transcript map is organized in 2266 operons, which provides transcriptional annotation for 92√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭä% of all genes in the genome compared to the at most 82√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭä% by previous resources. We predicted an off-target-aware genome-wide library of CRISPR-Cas9 guide RNAs, which we also linked to polycistronic operons. We provide the BSGatlas in multiple forms: as a website (https://rth.dk/resources/bsgatlas/), an annotation hub for display in the UCSC genome browser, supplementary tables and standardized GFF3 format, which can be used in large scale -omics studies. By complementing existing resources, the BSGatlas supports analyses of the B. subtilis genome and its molecular biology with respect to not only non-coding genes but also genome-wide transcriptional relationships of all genes.",BSGatlas,0.995428026,NA,0,BSGatlas,0.995428026,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2021 +32026396,http://bsma.pdbj.org,"The Biological Structure Model Archive (BSM-Arc): an archive for in silico models and simulations. We present the Biological Structure Model Archive (BSM-Arc, https://bsma.pdbj.org), which aims to collect raw data obtained via in silico methods related to structural biology, such as computationally modeled 3D structures and molecular dynamics trajectories. Since BSM-Arc does not enforce a specific data format for the raw data, depositors are free to upload their data without any prior conversion. Besides uploading raw data, BSM-Arc enables depositors to annotate their data with additional explanations and figures. Furthermore, via our WebGL-based molecular viewer Molmil, it is possible to recreate 3D scenes as shown in the corresponding scientific article in an interactive manner. To submit a new entry, depositors require an ORCID ID to login, and to finally publish the data, an accompanying peer-reviewed paper describing the work must be associated with the entry. Submitting their data enables researchers to not only have an external backup but also provide an opportunity to promote their work via an interactive platform and to provide third-party researchers access to their raw data.",BSM-Arc,0.996515274,Biological Structure Model Archive,0.965628356,BSM-Arc,0.996515274,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/5/2020 +23203879,http://kwanlab.bio.cuhk.edu.hk/BSRD,"BSRD: a repository for bacterial small regulatory RNA. In bacteria, small regulatory non-coding RNAs (sRNAs) are the most abundant class of post-transcriptional regulators. They are involved in diverse processes including quorum sensing, stress response, virulence and carbon metabolism. Recent developments in high-throughput techniques, such as genomic tiling arrays and RNA-Seq, have allowed efficient detection and characterization of bacterial sRNAs. However, a comprehensive repository to host sRNAs and their annotations is not available. Existing databases suffer from a limited number of bacterial species or sRNAs included. In addition, these databases do not have tools to integrate or analyse high-throughput sequencing data. Here, we have developed BSRD (http://kwanlab.bio.cuhk.edu.hk/BSRD), a comprehensive bacterial sRNAs database, as a repository for published bacterial sRNA sequences with annotations and expression profiles. BSRD contains over nine times more experimentally validated sRNAs than any other available databases. BSRD also provides combinatorial regulatory networks of transcription factors and sRNAs with their common targets. We have built and implemented in BSRD a novel RNA-Seq analysis platform, sRNADeep, to characterize sRNAs in large-scale transcriptome sequencing projects. We will update BSRD regularly.",BSRD,0.995059371,NA,0,BSRD,0.995059371,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +21210251,http://cmbteg.childrensmemorial.org,"BTECH: a platform to integrate genomic, transcriptomic and epigenomic alterations in brain tumors. The identification of molecular signatures predictive of clinical behavior and outcome in brain tumors has been the focus of many studies in the recent years. Despite the wealth of data that are available in the public domain on alterations in the genome, epigenome and transcriptome of brain tumors, the underlying molecular mechanisms leading to tumor initiation and progression remain largely unknown. Unfortunately, most of these data are scattered in multiple databases and supplementary materials of publications, thus making their retrieval, evaluation, comparison and visualization a rather arduous task. Here we report the development and implementation of an open access database (BTECH), a community resource for the deposition of a wide range of molecular data derived from brain tumor studies. This comprehensive database integrates multiple datasets, including transcript profiles, epigenomic CpG methylation data, DNA copy number alterations and structural chromosomal rearrangements, tumor-associated gene lists, SNPs, genomic features concerning Alu repeats and general genomic annotations. A genome browser has also been developed that allows for the simultaneous visualization of the different datasets and the various annotated features. Besides enabling an integrative view of diverse datasets through the genome browser, we also provide links to the original references for users to have a more accurate understanding of each specific dataset. This integrated platform will facilitate uncovering interactions among genetic and epigenetic factors associated with brain tumor development. BTECH is freely available at http://cmbteg.childrensmemorial.org/.",BTECH,0.996129572,NA,0,BTECH,0.996129572,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2011 +25656309,http://www.gurupi.uft.edu.br/btoxdb,"BtoxDB: a comprehensive database of protein structural data on toxin-antitoxin systems. Purpose Toxin-antitoxin (TA) systems are diverse and abundant genetic modules in prokaryotic cells that are typically formed by two genes encoding a stable toxin and a labile antitoxin. Because TA systems are able to repress growth or kill cells and are considered to be important actors in cell persistence (multidrug resistance without genetic change), these modules are considered potential targets for alternative drug design. In this scenario, structural information for the proteins in these systems is highly valuable. In this report, we describe the development of a web-based system, named BtoxDB, that stores all protein structural data on TA systems. Methods The BtoxDB database was implemented as a MySQL relational database using PHP scripting language. Web interfaces were developed using HTML, CSS and JavaScript. The data were collected from the PDB, UniProt and Entrez databases. These data were appropriately filtered using specialized literature and our previous knowledge about toxin-antitoxin systems. Results The database provides three modules (""Search"", ""Browse"" and ""Statistics"") that enable searches, acquisition of contents and access to statistical data. Direct links to matching external databases are also available. Conclusions The compilation of all protein structural data on TA systems in one platform is highly useful for researchers interested in this content. BtoxDB is publicly available at http://www.gurupi.uft.edu.br/btoxdb.",BtoxDB,0.996941984,NA,0,BtoxDB,0.996941984,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/17/2015 +23336431,http://cabindb.iasri.res.in/buffsatdb,"In silico mining of putative microsatellite markers from whole genome sequence of water buffalo (Bubalus bubalis) and development of first BuffSatDB. Background Though India has sequenced water buffalo genome but its draft assembly is based on cattle genome BTau 4.0, thus de novo chromosome wise assembly is a major pending issue for global community. The existing radiation hybrid of buffalo and these reported STR can be used further in final gap plugging and ""finishing"" expected in de novo genome assembly. QTL and gene mapping needs mining of putative STR from buffalo genome at equal interval on each and every chromosome. Such markers have potential role in improvement of desirable characteristics, such as high milk yields, resistance to diseases, high growth rate. The STR mining from whole genome and development of user friendly database is yet to be done to reap the benefit of whole genome sequence. Description By in silico microsatellite mining of whole genome, we have developed first STR database of water buffalo, BuffSatDb (Buffalo MicroSatellite Database (http://cabindb.iasri.res.in/buffsatdb/) which is a web based relational database of 910529 microsatellite markers, developed using PHP and MySQL database. Microsatellite markers have been generated using MIcroSAtellite tool. It is simple and systematic web based search for customised retrieval of chromosome wise and genome-wide microsatellites. Search has been enabled based on chromosomes, motif type (mono-hexa), repeat motif and repeat kind (simple and composite). The search may be customised by limiting location of STR on chromosome as well as number of markers in that range. This is a novel approach and not been implemented in any of the existing marker database. This database has been further appended with Primer3 for primer designing of the selected markers enabling researcher to select markers of choice at desired interval over the chromosome. The unique add-on of degenerate bases further helps in resolving presence of degenerate bases in current buffalo assembly. Conclusion Being first buffalo STR database in the world , this would not only pave the way in resolving current assembly problem but shall be of immense use for global community in QTL/gene mapping critically required to increase knowledge in the endeavour to increase buffalo productivity, especially for third world country where rural economy is significantly dependent on buffalo productivity.",BuffSatDB,0.98745203,Buffalo MicroSatellite Database,0.644897648,BuffSatDB,0.98745203,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/19/2013 +22080550,http://bykdb.ibcp.fr,"BYKdb: the Bacterial protein tYrosine Kinase database. Bacterial tyrosine-kinases share no resemblance with their eukaryotic counterparts and they have been unified in a new protein family named BY-kinases. These enzymes have been shown to control several biological functions in the bacterial cells. In recent years biochemical studies, sequence analyses and structure resolutions allowed the deciphering of a common signature. However, BY-kinase sequence annotations in primary databases remain incomplete. This prompted us to develop a specialized database of computer-annotated BY-kinase sequences: the Bacterial protein tyrosine-kinase database (BYKdb). BY-kinase sequences are first identified, thanks to a workflow developed in a previous work. A second workflow annotates the UniProtKB entries in order to provide the BYKdb entries. The database can be accessed through a web interface that allows static and dynamic queries and offers integrated sequence analysis tools. BYKdb can be found at http://bykdb.ibcp.fr.",BYKdb,0.956137955,Bacterial protein tYrosine Kinase database,0.903621137,BYKdb,0.956137955,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/12/2011 +22621612,http://sites.google.com/site/tecatalog,"C-GATE - catalogue of genes affected by transposable elements. Background Functional regulatory sequences are present in many transposable element (TE) copies, resulting in TEs being frequently exapted by host genes. Today, many examples of TEs impacting host gene expression can be found in the literature and we believe a new catalogue of such exaptations would be useful for the field. Findings We have established the catalogue of genes affected by transposable elements (C-GATE), which can be found at https://sites.google.com/site/tecatalog/. To date, it holds 221 cases of biologically verified TE exaptations and more than 10,000 in silico TE-gene partnerships. C-GATE is interactive and allows users to include missed or new TE exaptation data. C-GATE provides a graphic representation of the entire library, which may be used for future statistical analysis of TE impact on host gene expression. Conclusions We hope C-GATE will be valuable for the TE community but also for others who have realized the role that TEs may have in their research.",C-GATE,0.988704075,affected by transposable,0.630219376,C-GATE,0.988704075,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/23/2012 +27050421,http://cterminome.bio-toolkit.com,"The Functional Human C-Terminome. All translated proteins end with a carboxylic acid commonly called the C-terminus. Many short functional sequences (minimotifs) are located on or immediately proximal to the C-terminus. However, information about the function of protein C-termini has not been consolidated into a single source. Here, we built a new ""C-terminome"" database and web system focused on human proteins. Approximately 3,600 C-termini in the human proteome have a minimotif with an established molecular function. To help evaluate the function of the remaining C-termini in the human proteome, we inferred minimotifs identified by experimentation in rodent cells, predicted minimotifs based upon consensus sequence matches, and predicted novel highly repetitive sequences in C-termini. Predictions can be ranked by enrichment scores or Gene Evolutionary Rate Profiling (GERP) scores, a measurement of evolutionary constraint. By searching for new anchored sequences on the last 10 amino acids of proteins in the human proteome with lengths between 3-10 residues and up to 5 degenerate positions in the consensus sequences, we have identified new consensus sequences that predict instances in the majority of human genes. All of this information is consolidated into a database that can be accessed through a C-terminome web system with search and browse functions for minimotifs and human proteins. A known consensus sequence-based predicted function is assigned to nearly half the proteins in the human proteome. Weblink: http://cterminome.bio-toolkit.com.",C-terminome,0.84958427,NA,0,C-terminome,0.84958427,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/6/2016 +31649674,http://cab-rep.c2b2.columbia.edu,"cAb-Rep: A Database of Curated Antibody Repertoires for Exploring Antibody Diversity and Predicting Antibody Prevalence. The diversity of B cell receptors provides a basis for recognizing numerous pathogens. Antibody repertoire sequencing has revealed relationships between B cell receptor sequences, their diversity, and their function in infection, vaccination, and disease. However, many repertoire datasets have been deposited without annotation or quality control, limiting their utility. To accelerate investigations of B cell immunoglobulin sequence repertoires and to facilitate development of algorithms for their analysis, we constructed a comprehensive public database of curated human B cell immunoglobulin sequence repertoires, cAb-Rep (https://cab-rep.c2b2.columbia.edu), which currently includes 306 immunoglobulin repertoires from 121 human donors, who were healthy, vaccinated, or had autoimmune disease. The database contains a total of 267.9 million V(D)J heavy chain and 72.9 million VJ light chain transcripts. These transcripts are full-length or near full-length, have been annotated with gene origin, antibody isotype, somatic hypermutations, and other biological characteristics, and are stored in FASTA format to facilitate their direct use by most current repertoire-analysis programs. We describe a website to search cAb-Rep for similar antibodies along with methods for analysis of the prevalence of antibodies with specific genetic signatures, for estimation of reproducibility of somatic hypermutation patterns of interest, and for delineating frequencies of somatically introduced N-glycosylation. cAb-Rep should be useful for investigating attributes of B cell sequence repertoires, for understanding characteristics of affinity maturation, and for identifying potential barriers to the elicitation of effective neutralizing antibodies in infection or by vaccination.",cAb-Rep,0.99714224,NA,0,cAb-Rep,0.99714224,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/9/2019 +22584068,http://cgc.kribb.re.kr/map,"CACG: a database for comparative analysis of conjoined genes. A conjoined gene is defined as one formed at the time of transcription by combining at least part of one exon from each of two or more distinct genes that lie on the same chromosome, in the same or opposite orientation, which translate independently into different proteins. We comparatively studied the extent of conjoined genes in thirteen genomes by analyzing the public databases of expressed sequence tags and mRNA sequences using a set of computational tools designed to identify conjoined genes on the same DNA strand or opposite DNA strands of the same genomic locus. The CACG database, available at http://cgc.kribb.re.kr/map/, includes a number of conjoined genes (7131-human, 2-chimpanzee, 5-orangutan, 57-chicken, 4-rhesus monkey, 651-cow, 27-dog, 2512-mouse, 263-rat, 1482-zebrafish, 5-horse, 29-sheep, and 8-medaka) and is very effective and easy to use to analyze the evolutionary process of conjoined genes when comparing different species.",CACG,0.998239875,NA,0,CACG,0.998239875,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/11/2012 +22080563,http://www.cadre-genomes.org.uk,"CADRE: the Central Aspergillus Data REpository 2012. The Central Aspergillus Data REpository (CADRE; http://www.cadre-genomes.org.uk) is a public resource for genomic data extracted from species of Aspergillus. It provides an array of online tools for searching and visualising features of this significant fungal genus. CADRE arose from a need within the medical community to understand the human pathogen Aspergillus fumigatus. Due to the paucity of Aspergillus genomic resources 10 years ago, the long-term goal of this project was to collate and maintain Aspergillus genomes as they became available. Since our first release in 2004, the resource has expanded to encompass annotated sequence for eight other Aspergilli and provides much needed support to the international Aspergillus research community. Recent developments, however, in sequencing technology are creating a vast amount of genomic data and, as a result, we shortly expect a tidal wave of Aspergillus data. In preparation for this, we have upgraded the database and software suite. This not only enables better management of more complex data sets, but also improves annotation by providing access to genome comparison data and the integration of high-throughput data.",CADRE,0.994582891,Central Aspergillus Data REpository,0.973802202,CADRE,0.994582891,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2011 +30329086,http://www.cagmdb.org,"CAGm: a repository of germline microsatellite variations in the 1000 genomes project. The human genome harbors an abundance of repetitive DNA; however, its function continues to be debated. Microsatellites-a class of short tandem repeat-are established as an important source of genetic variation. Array length variants are common among microsatellites and affect gene expression; but, efforts to understand the role and diversity of microsatellite variation has been hampered by several challenges. Without adequate depth, both long-read and short-read sequencing may not detect the variants present in a sample; additionally, large sample sizes are needed to reveal the degree of population-level polymorphism. To address these challenges we present the Comparative Analysis of Germline Microsatellites (CAGm): a database of germline microsatellites from 2529 individuals in the 1000 genomes project. A key novelty of CAGm is the ability to aggregate microsatellite variation by population, ethnicity (super population) and gender. The database provides advanced searching for microsatellites embedded in genes and functional elements. All data can be downloaded as Microsoft Excel spreadsheets. Two use-case scenarios are presented to demonstrate its utility: a mononucleotide (A) microsatellite at the BAT-26 locus and a dinucleotide (CA) microsatellite in the coding region of FGFRL1. CAGm is freely available at http://www.cagmdb.org/.",CAGm,0.991939008,Comparative Analysis of Germline Microsatellites,0.97115584,CAGm,0.991939008,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33444113,http://www.dsimb.inserm.fr/CALR-ET,"CALR-ETdb, the database of calreticulin variants diversity in essential thrombocythemia. Essential thrombocythemia (ET) is a blood cancer defined by a strong increase of platelet numbers. A quarter of patients suffering from ET show mutations in the last exon of calreticulin (CALR) gene. Two variants named type 1 and type 2 represent 85% of these patients. However, a large number of other variants have been determined. In this study, we have compiled variants taken from COSMIC database and literature leading to 155 different variants. This large number of variants allowed redefining 5 new classes extending the classification of type 1-like and type 2-like to a finer description. These analyses showed that last class, named E, corresponding to more than 10% of CALR variants seemed not attached to ET. Structural properties analyzed showed that CALR variants associated to ET have common features. All the compiled and refined information had been included into a freely dedicated database CALR-ETdb (https://www.dsimb.inserm.fr/CALR-ET).",CALR-ETdb,0.93914603,NA,0,CALR-ETdb,0.93914603,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/14/2021 +24265220,http://www.camp.bicnirrh.res.in,"CAMP: Collection of sequences and structures of antimicrobial peptides. Antimicrobial peptides (AMPs) are gaining importance as anti-infective agents. Here we describe the updated Collection of Antimicrobial Peptide (CAMP) database, available online at http://www.camp.bicnirrh.res.in/. The 3D structures of peptides are known to influence antimicrobial activity. Although there exists databases of AMPs, information on structures of AMPs is limited in these databases. CAMP is manually curated and currently holds 6756 sequences and 682 3D structures of AMPs. Sequence and structure analysis tools have been incorporated to enhance the usefulness of the database.",CAMP,0.997035444,Collection of Antimicrobial Peptide,0.979017951,CAMP,0.997035444,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2013 +22213543,http://webclu.bio.wzw.tum.de/CAMPS2.0,"Camps 2.0: exploring the sequence and structure space of prokaryotic, eukaryotic, and viral membrane proteins. Structural bioinformatics of membrane proteins is still in its infancy, and the picture of their fold space is only beginning to emerge. Because only a handful of three-dimensional structures are available, sequence comparison and structure prediction remain the main tools for investigating sequence-structure relationships in membrane protein families. Here we present a comprehensive analysis of the structural families corresponding to √ɬÉ√Ǭé√ɬÇ√Ǭ±-helical membrane proteins with at least three transmembrane helices. The new version of our CAMPS database (CAMPS 2.0) covers nearly 1300 eukaryotic, prokaryotic, and viral genomes. Using an advanced classification procedure, which is based on high-order hidden Markov models and considers both sequence similarity as well as the number of transmembrane helices and loop lengths, we identified 1353 structurally homogeneous clusters roughly corresponding to membrane protein folds. Only 53 clusters are associated with experimentally determined three-dimensional structures, and for these clusters CAMPS is in reasonable agreement with structure-based classification approaches such as SCOP and CATH. We therefore estimate that √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº1300 structures would need to be determined to provide a sufficient structural coverage of polytopic membrane proteins. CAMPS 2.0 is available at http://webclu.bio.wzw.tum.de/CAMPS2.0/.",CAMPS,0.880289257,NA,0,CAMPS,0.880289257,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/28/2011 +33306801,http://camregbase.org,"CamRegBase: a gene regulation database for the biofuel crop, Camelina sativa. . Camelina is an annual oilseed plant from the Brassicaceae family that is gaining momentum as a biofuel winter cover crop. However, a significant limitation in further enhancing its utility as a producer of oils that can be used as biofuels, jet fuels or bio-based products is the absence of a repository for all the gene expression and regulatory information that is being rapidly generated by the community. Here, we provide CamRegBase (https://camregbase.org/) as a one-stop resource to access Camelina information on gene expression and co-expression, transcription factors, lipid associated genes and genome-wide orthologs in the close-relative reference plant Arabidopsis. We envision this as a resource of curated information for users, as well as a repository of new gene regulation information.",CamRegBase,0.996998787,NA,0,CamRegBase,0.996998787,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +30367574,http://bioinformatics.iasi.cnr.it/camurweb,"CamurWeb: a classification software and a large knowledge base for gene expression data of cancer. Background The high growth of Next Generation Sequencing data currently demands new knowledge extraction methods. In particular, the RNA sequencing gene expression experimental technique stands out for case-control studies on cancer, which can be addressed with supervised machine learning techniques able to extract human interpretable models composed of genes, and their relation to the investigated disease. State of the art rule-based classifiers are designed to extract a single classification model, possibly composed of few relevant genes. Conversely, we aim to create a large knowledge base composed of many rule-based models, and thus determine which genes could be potentially involved in the analyzed tumor. This comprehensive and open access knowledge base is required to disseminate novel insights about cancer. Results We propose CamurWeb, a new method and web-based software that is able to extract multiple and equivalent classification models in form of logic formulas (""if then"" rules) and to create a knowledge base of these rules that can be queried and analyzed. The method is based on an iterative classification procedure and an adaptive feature elimination technique that enables the computation of many rule-based models related to the cancer under study. Additionally, CamurWeb includes a user friendly interface for running the software, querying the results, and managing the performed experiments. The user can create her profile, upload her gene expression data, run the classification analyses, and interpret the results with predefined queries. In order to validate the software we apply it to all public available RNA sequencing datasets from The Cancer Genome Atlas database obtaining a large open access knowledge base about cancer. CamurWeb is available at http://bioinformatics.iasi.cnr.it/camurweb . Conclusions The experiments prove the validity of CamurWeb, obtaining many classification models and thus several genes that are associated to 21 different cancer types. Finally, the comprehensive knowledge base about cancer and the software tool are released online; interested researchers have free access to them for further studies and to design biological experiments in cancer research.",CamurWeb,0.970218897,NA,0,CamurWeb,0.970218897,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/15/2018 +34712617,http://bio.liclab.net/Cancer_crc/index.html,"Cancer CRC: A Comprehensive Cancer Core Transcriptional Regulatory Circuit Resource and Analysis Platform. A core transcriptional regulatory circuit (CRC) is a group of interconnected auto-regulating transcription factors (TFs) that form loops and can be identified by super-enhancers (SEs). Studies have indicated that CRCs play an important role in defining cellular identity and determining cellular fate. Additionally, core TFs in CRCs are regulators of cell-type-specific transcriptional regulation. However, a global view of CRC properties across various cancer types has not been generated. Thus, we integrated paired cancer ATAC-seq and H3K27ac ChIP-seq data for specific cell lines to develop the Cancer CRC (http://bio.liclab.net/Cancer_crc/index.html). This platform documented 94,108 cancer CRCs, including 325 core TFs. The cancer CRC also provided the ""SE active core TFs analysis"" and ""TF enrichment analysis"" tools to identify potentially key TFs in cancer. In addition, we performed a comprehensive analysis of core TFs in various cancer types to reveal conserved and cancer-specific TFs.",Cancer,0.699524522,NA,0,Cancer,0.699524522,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/12/2021 +28453651,http://panoromics.irbbarcelona.org,"A PanorOmic view of personal cancer genomes. The massive molecular profiling of thousands of cancer patients has led to the identification of many tumor type specific driver genes. However, only a few (or none) of them are present in each individual tumor and, to enable precision oncology, we need to interpret the alterations found in a single patient. Cancer PanorOmics (http://panoromics.irbbarcelona.org) is a web-based resource to contextualize genomic variations detected in a personal cancer genome within the body of clinical and scientific evidence available for 26 tumor types, offering complementary cohort- and patient-centric views. Additionally, it explores the cellular environment of mutations by mapping them on the human interactome and providing quasi-atomic structural details, whenever available. This 'PanorOmic' molecular view of individual tumors, together with the appropriate genetic counselling and medical advice, should contribute to the identification of actionable alterations ultimately guiding the clinical decision-making process.",Cancer PanorOmics,0.830598618,NA,0,Cancer PanorOmics,0.830598618,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2017 +34903605,http://bioinfo.vanderbilt.edu/database/Cancer-Immu,"A pan-cancer immunogenomic atlas for immune checkpoint blockade immunotherapy. The ability to identify robust genomic signatures that predict response to immune checkpoint blockade is restricted by limited sample sizes and ungeneralizable performance across cohorts. To address these challenges, we established Cancer-Immu (http://bioinfo.vanderbilt.edu/database/Cancer-Immu/) a comprehensive platform that integrates large-scale multidimensional omics data, including genetic, bulk, and single-cell transcriptomic, proteomic, and dynamic genomic profiles, with clinical phenotypes to explore consistent and rare immunogenomic connections. Currently Cancer-Immu has incorporated data for 3,652 samples for 16 cancer types. It provides easy access to immunogenomic data and empowers researchers to translate omics datasets into biological insights and clinical applications.",Cancer-Immu,0.998258367,NA,0,Cancer-Immu,0.998258367,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/13/2021 +"25392415, 30407596",http://www.cancer3d.org,"Cancer3D: understanding cancer mutations through protein structures. The new era of cancer genomics is providing us with extensive knowledge of mutations and other alterations in cancer. The Cancer3D database at http://www.cancer3d.org gives an open and user-friendly way to analyze cancer missense mutations in the context of structures of proteins in which they are found. The database also helps users analyze the distribution patterns of the mutations as well as their relationship to changes in drug activity through two algorithms: e-Driver and e-Drug. These algorithms use knowledge of modular structure of genes and proteins to separately study each region. This approach allows users to find novel candidate driver regions or drug biomarkers that cannot be found when similar analyses are done on the whole-gene level. The Cancer3D database provides access to the results of such analyses based on data from The Cancer Genome Atlas (TCGA) and the Cancer Cell Line Encyclopedia (CCLE). In addition, it displays mutations from over 14,700 proteins mapped to more than 24,300 structures from PDB. This helps users visualize the distribution of mutations and identify novel three-dimensional patterns in their distribution.",Cancer3D,0.993081719,NA,0,Cancer3D,0.993081719,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +23486013,http://crdd.osdd.net/raghava/cancerdr,"CancerDR: cancer drug resistance database. Cancer therapies are limited by the development of drug resistance, and mutations in drug targets is one of the main reasons for developing acquired resistance. The adequate knowledge of these mutations in drug targets would help to design effective personalized therapies. Keeping this in mind, we have developed a database ""CancerDR"", which provides information of 148 anti-cancer drugs, and their pharmacological profiling across 952 cancer cell lines. CancerDR provides comprehensive information about each drug target that includes; (i) sequence of natural variants, (ii) mutations, (iii) tertiary structure, and (iv) alignment profile of mutants/variants. A number of web-based tools have been integrated in CancerDR. This database will be very useful for identification of genetic alterations in genes encoding drug targets, and in turn the residues responsible for drug resistance. CancerDR allows user to identify promiscuous drug molecules that can kill wide range of cancer cells. CancerDR is freely accessible at http://crdd.osdd.net/raghava/cancerdr/",CancerDR,0.992159104,cancer drug resistance database,0.807872832,CancerDR,0.992159104,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2013 +32360910,http://webs.iiitd.edu.in/raghava/cancerend,"CancerEnD: A database of cancer associated enhancers. CancerEnD is an integrated resource developed for annotating 8524 unique expressed enhancers, associated genes, somatic mutations and copy number variations of 8063 cancer samples from 18 cancer types of TCGA. Somatic mutation data was taken from the COSMIC repository. To delineate the relationship of change in copy number of enhancer elements with the prognosis of cancer patients, survival analysis was done using the survival package in R. We identified 1762 overall survival associated enhancers, which can be used for prognostic purposes of cancer patients in a tissue-specific manner. CancerEnD (https://webs.iiitd.edu.in/raghava/cancerend/) is developed on a user-friendly responsive template, that enables searching, browsing and downloading of the annotated enhancer elements in terms of gene expression, copy number variation and survival association. We hope it provides a promising avenue for researchers to facilitate the understanding of enhancer deregulation in tumorigenesis, and to identify new biomarkers for therapy and disease-diagnosis.",CancerEnD,0.998435497,NA,0,CancerEnD,0.998435497,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2020 +31598703,http://signor.uniroma2.it/CancerGeneNet,"CancerGeneNet: linking driver genes to cancer hallmarks. CancerGeneNet (https://signor.uniroma2.it/CancerGeneNet/) is a resource that links genes that are frequently mutated in cancers to cancer phenotypes. The resource takes advantage of a curation effort aimed at embedding a large fraction of the gene products that are found altered in cancer cells into a network of causal protein relationships. Graph algorithms, in turn, allow to infer likely paths of causal interactions linking cancer associated genes to cancer phenotypes thus offering a rational framework for the design of strategies to revert disease phenotypes. CancerGeneNet bridges two interaction layers by connecting proteins whose activities are affected by cancer drivers to proteins that impact on the 'hallmarks of cancer'. In addition, CancerGeneNet annotates curated pathways that are relevant to rationalize the pathological consequences of cancer driver mutations in selected common cancers and 'MiniPathways' illustrating regulatory circuits that are frequently altered in different cancers.",CancerGeneNet,0.994320154,NA,0,CancerGeneNet,0.994320154,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +26074488,http://lsp.nwsuaf.edu.cn/CancerHSP.php,"CancerHSP: anticancer herbs database of systems pharmacology. The numerous natural products and their bioactivity potentially afford an extraordinary resource for new drug discovery and have been employed in cancer treatment. However, the underlying pharmacological mechanisms of most natural anticancer compounds remain elusive, which has become one of the major obstacles in developing novel effective anticancer agents. Here, to address these unmet needs, we developed an anticancer herbs database of systems pharmacology (CancerHSP), which records anticancer herbs related information through manual curation. Currently, CancerHSP contains 2439 anticancer herbal medicines with 3575 anticancer ingredients. For each ingredient, the molecular structure and nine key ADME parameters are provided. Moreover, we also provide the anticancer activities of these compounds based on 492 different cancer cell lines. Further, the protein targets of the compounds are predicted by state-of-art methods or collected from literatures. CancerHSP will help reveal the molecular mechanisms of natural anticancer products and accelerate anticancer drug development, especially facilitate future investigations on drug repositioning and drug discovery. CancerHSP is freely available on the web at http://lsp.nwsuaf.edu.cn/CancerHSP.php.",CancerHSP,0.992445946,anticancer herbs database of,0.878181517,CancerHSP,0.992445946,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/15/2015 +33010176,http://www.cancerimmunityqtl-hust.com,"CancerImmunityQTL: a database to systematically evaluate the impact of genetic variants on immune infiltration in human cancer. Tumor-infiltrating immune cells as integral component of the tumor microenvironment are associated with tumor progress, prognosis and responses to immunotherapy. Genetic variants have been demonstrated to impact tumor-infiltrating, underscoring the heritable character of immune landscape. Therefore, identification of immunity quantitative trait loci (immunQTLs), which evaluate the effect of genetic variants on immune cells infiltration, might present a critical step toward fully understanding the contribution of genetic variants in tumor development. Although emerging studies have demonstrated the determinants of germline variants on immune infiltration, no database has yet been developed to systematically analyze immunQTLs across multiple cancer types. Using genotype data from TCGA database and immune cell fractions estimated by CIBERSORT, we developed a computational pipeline to identify immunQTLs in 33 cancer types. A total of 913 immunQTLs across different cancer types were identified. Among them, 5 immunQTLs are associated with patient overall survival. Furthermore, by integrating immunQTLs with GWAS data, we identified 527 immunQTLs overlapping with known GWAS linkage disequilibrium regions. Finally, we constructed a user-friendly database, CancerImmunityQTL (http://www.cancerimmunityqtl-hust.com/) for users to browse, search and download data of interest. This database provides an informative resource to understand the germline determinants of immune infiltration in human cancer and benefit from personalized cancer immunotherapy.",CancerImmunityQTL,0.99584347,NA,0,CancerImmunityQTL,0.99584347,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +31110280,http://bionlp.bcgsc.ca/cancermine,"CancerMine: a literature-mined resource for drivers, oncogenes and tumor suppressors in cancer. Tumors from individuals with cancer are frequently genetically profiled for information about the driving forces behind the disease. We present the CancerMine resource, a text-mined and routinely updated database of drivers, oncogenes and tumor suppressors in different types of cancer. All data are available online ( http://bionlp.bcgsc.ca/cancermine ) and downloadable under a Creative Commons Zero license for ease of use.",CancerMine,0.981553555,NA,0,CancerMine,0.981553555,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/20/2019 +26690544,http://bis.zju.edu.cn/CancerNet,"CancerNet: a database for decoding multilevel molecular interactions across diverse cancer types. Protein-protein interactions (PPIs) and microRNA (miRNA)-target interactions are important for deciphering the mechanisms of tumorigenesis. However, current PPI databases do not support cancer-specific analysis. Also, no available databases can be used to retrieve cancer-associated miRNA-target interactions. As the pathogenesis of human cancers is affected by several miRNAs rather than a single miRNA, it is needed to uncover miRNA synergism in a systems level. Here for each cancer type, we constructed a miRNA-miRNA functionally synergistic network based on the functions of miRNA targets and their topological features in that cancer PPI network. And for the first time, we report the cancer-specific database CancerNet (http://bis.zju.edu.cn/CancerNet), which contains information about PPIs, miRNA-target interactions and functionally synergistic miRNA-miRNA pairs across 33 human cancer types. In addition, PPI information across 33 main normal tissues and cell types are included. Flexible query methods are allowed to retrieve cancer molecular interactions. Network viewer can be used to visualize interactions that users are interested in. Enrichment analysis tool was designed to detect significantly overrepresented Gene Ontology categories of miRNA targets. Thus, CancerNet serves as a comprehensive platform for assessing the roles of proteins and miRNAs, as well as their interactions across human cancers.",CancerNet,0.995466173,NA,0,CancerNet,0.995466173,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/21/2015 +28473704,http://crdd.osdd.net/raghava/cancerpdf,"CancerPDF: A repository of cancer-associated peptidome found in human biofluids. CancerPDF (Cancer Peptidome Database of bioFluids) is a comprehensive database of endogenous peptides detected in the human biofluids. The peptidome patterns reflect the synthesis, processing and degradation of proteins in the tissue environment and therefore can act as a gold mine to probe the peptide-based cancer biomarkers. Although an extensive data on cancer peptidome has been generated in the recent years, lack of a comprehensive resource restrains the facility to query the growing community knowledge. We have developed the cancer peptidome resource named CancerPDF, to collect and compile all the endogenous peptides isolated from human biofluids in various cancer profiling studies. CancerPDF has 14,367 entries with 9,692 unique peptide sequences corresponding to 2,230 unique precursor proteins from 56 high-throughput studies for ~27 cancer conditions. We have provided an interactive interface to query the endogenous peptides along with the primary information such as m/z, precursor protein, the type of cancer and its regulation status in cancer. To add-on, many web-based tools have been incorporated, which comprise of search, browse and similarity identification modules. We consider that the CancerPDF will be an invaluable resource to unwind the potential of peptidome-based cancer biomarkers. The CancerPDF is available at the web address http://crdd.osdd.net/raghava/cancerpdf/ .",CancerPDF,0.993715048,Cancer Peptidome Database of bioFluids,0.919792932,CancerPDF,0.993715048,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/4/2017 +25270878,http://crdd.osdd.net/raghava/cancerppd,"CancerPPD: a database of anticancer peptides and proteins. CancerPPD (http://crdd.osdd.net/raghava/cancerppd/) is a repository of experimentally verified anticancer peptides (ACPs) and anticancer proteins. Data were manually collected from published research articles, patents and from other databases. The current release of CancerPPD consists of 3491 ACP and 121 anticancer protein entries. Each entry provides comprehensive information related to a peptide like its source of origin, nature of the peptide, anticancer activity, N- and C-terminal modifications, conformation, etc. Additionally, CancerPPD provides the information of around 249 types of cancer cell lines and 16 different assays used for testing the ACPs. In addition to natural peptides, CancerPPD contains peptides having non-natural, chemically modified residues and D-amino acids. Besides this primary information, CancerPPD stores predicted tertiary structures as well as peptide sequences in SMILES format. Tertiary structures of peptides were predicted using the state-of-art method, PEPstr and secondary structural states were assigned using DSSP. In order to assist users, a number of web-based tools have been integrated, these include keyword search, data browsing, sequence and structural similarity search. We believe that CancerPPD will be very useful in designing peptide-based anticancer therapeutics.",CancerPPD,0.996373594,NA,0,CancerPPD,0.996373594,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/30/2014 +22659240,http://cancerproview.dmb.med.keio.ac.jp/php/cpv.html,"CancerProView: a graphical image database of cancer-related genes and proteins. We have developed a graphical image database CancerProView (URL: http://cancerproview.dmb.med.keio.ac.jp/php/cpv.html) to assist the search for alterations of the motifs/domains in the cancer-related proteins that are caused by mutations in the corresponding genes. For the CancerProView, we have collected various kinds of data on 180 cancer-related proteins in terms of the motifs/domains, genomic structures of corresponding genes, and 109 charts of the protein interaction pathways. Moreover, we have collected the relevant data on 1041 reference genes including 197 non-cancer disease-associated genes, and the nucleotide sequences for 2011 full-length cDNA's and the alternatively spliced transcript variants. Thus, the CancerProView database system would provide valuable information to facilitate basic cancer research as well as for designing new molecular diagnosis and drug discovery for cancers. The CancerProView database can be operated via Internet with any Web browser, and the system is freely available to interested users without ID and password.",CancerProView,0.997434795,NA,0,CancerProView,0.997434795,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/31/2012 +30329142,"http://biocc.hrbmu.edu.cn/CancerSEA/, http://202.97.205.69/CancerSEA","CancerSEA: a cancer single-cell state atlas. High functional heterogeneity of cancer cells poses a major challenge for cancer research. Single-cell sequencing technology provides an unprecedented opportunity to decipher diverse functional states of cancer cells at single-cell resolution, and cancer scRNA-seq datasets have been largely accumulated. This emphasizes the urgent need to build a dedicated resource to decode the functional states of cancer single cells. Here, we developed CancerSEA (http://biocc.hrbmu.edu.cn/CancerSEA/ or http://202.97.205.69/CancerSEA/), the first dedicated database that aims to comprehensively explore distinct functional states of cancer cells at the single-cell level. CancerSEA portrays a cancer single-cell functional state atlas, involving 14 functional states (including stemness, invasion, metastasis, proliferation, EMT, angiogenesis, apoptosis, cell cycle, differentiation, DNA damage, DNA repair, hypoxia, inflammation and quiescence) of 41 900 cancer single cells from 25 cancer types. It allows querying which functional states are associated with the gene (or gene list) of interest in different cancers. CancerSEA also provides functional state-associated PCG/lncRNA repertoires across all cancers, in specific cancers, and in individual cancer single-cell datasets. In summary, CancerSEA provides a user-friendly interface for comprehensively searching, browsing, visualizing and downloading functional state activity profiles of tens of thousands of cancer single cells and the corresponding PCGs/lncRNAs expression profiles.",CancerSEA,0.997147083,NA,0,CancerSEA,0.997147083,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30329095,http://www.cancersplicingqtl-hust.com,"CancerSplicingQTL: a database for genome-wide identification of splicing QTLs in human cancer. Alternative splicing (AS) is a widespread process that increases structural transcript variation and proteome diversity. Aberrant splicing patterns are frequently observed in cancer initiation, progress, prognosis and therapy. Increasing evidence has demonstrated that AS events could undergo modulation by genetic variants. The identification of splicing quantitative trait loci (sQTLs), genetic variants that affect AS events, might represent an important step toward fully understanding the contribution of genetic variants in disease development. However, no database has yet been developed to systematically analyze sQTLs across multiple cancer types. Using genotype data from The Cancer Genome Atlas and corresponding AS values calculated by TCGASpliceSeq, we developed a computational pipeline to identify sQTLs from 9 026 tumor samples in 33 cancer types. We totally identified 4 599 598 sQTLs across all cancer types. We further performed survival analyses and identified 17 072 sQTLs associated with patient overall survival times. Furthermore, using genome-wide association study (GWAS) catalog data, we identified 1 180 132 sQTLs overlapping with known GWAS linkage disequilibrium regions. Finally, we constructed a user-friendly database, CancerSplicingQTL (http://www.cancersplicingqtl-hust.com/) for users to conveniently browse, search and download data of interest. This database provides an informative sQTL resource for further characterizing the potential functional roles of SNPs that control transcript isoforms in human cancer.",CancerSplicingQTL,0.997213602,NA,0,CancerSplicingQTL,0.997213602,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +27832200,http://www.imtech.res.in/raghava/cancertope,"A Platform for Designing Genome-Based Personalized Immunotherapy or Vaccine against Cancer. Due to advancement in sequencing technology, genomes of thousands of cancer tissues or cell-lines have been sequenced. Identification of cancer-specific epitopes or neoepitopes from cancer genomes is one of the major challenges in the field of immunotherapy or vaccine development. This paper describes a platform Cancertope, developed for designing genome-based immunotherapy or vaccine against a cancer cell. Broadly, the integrated resources on this platform are apportioned into three precise sections. First section explains a cancer-specific database of neoepitopes generated from genome of 905 cancer cell lines. This database harbors wide range of epitopes (e.g., B-cell, CD8+ T-cell, HLA class I, HLA class II) against 60 cancer-specific vaccine antigens. Second section describes a partially personalized module developed for predicting potential neoepitopes against a user-specific cancer genome. Finally, we describe a fully personalized module developed for identification of neoepitopes from genomes of cancerous and healthy cells of a cancer-patient. In order to assist the scientific community, wide range of tools are incorporated in this platform that includes screening of epitopes against human reference proteome (http://www.imtech.res.in/raghava/cancertope/).",Cancertope,0.991436124,NA,0,Cancertope,0.991436124,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/10/2016 +31701131,http://cailab.labshare.cn/cancertracer,"CancerTracer: a curated database for intrapatient tumor heterogeneity. Comprehensive genomic analyses of cancers have revealed substantial intrapatient molecular heterogeneities that may explain some instances of drug resistance and treatment failures. Examination of the clonal composition of an individual tumor and its evolution through disease progression and treatment may enable identification of precise therapeutic targets for drug design. Multi-region and single-cell sequencing are powerful tools that can be used to capture intratumor heterogeneity. Here, we present a database we've named CancerTracer (http://cailab.labshare.cn/cancertracer): a manually curated database designed to track and characterize the evolutionary trajectories of tumor growth in individual patients. We collected over 6000 tumor samples from 1548 patients corresponding to 45 different types of cancer. Patient-specific tumor phylogenetic trees were constructed based on somatic mutations or copy number alterations identified in multiple biopsies. Using the structured heterogeneity data, researchers can identify common driver events shared by all tumor regions, and the heterogeneous somatic events present in different regions of a tumor of interest. The database can also be used to investigate the phylogenetic relationships between primary and metastatic tumors. It is our hope that CancerTracer will significantly improve our understanding of the evolutionary histories of tumors, and may facilitate the identification of predictive biomarkers for personalized cancer therapies.",CancerTracer,0.997136056,NA,0,CancerTracer,0.997136056,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +21718534,http://code.google.com/p/cangsdb,"CANGS DB: a stand-alone web-based database tool for processing, managing and analyzing 454 data in biodiversity studies. Background Next generation sequencing (NGS) is widely used in metagenomic and transcriptomic analyses in biodiversity. The ease of data generation provided by NGS platforms has allowed researchers to perform these analyses on their particular study systems. In particular the 454 platform has become the preferred choice for PCR amplicon based biodiversity surveys because it generates the longest sequence reads. Nevertheless, the handling and organization of massive amounts of sequencing data poses a major problem for the research community, particularly when multiple researchers are involved in data acquisition and analysis. An integrated and user-friendly tool, which performs quality control, read trimming, PCR primer removal, and data organization is desperately needed, therefore, to make data interpretation fast and manageable. Findings We developed CANGS DB (Cleaning and Analyzing Next Generation Sequences DataBase) a flexible, stand alone and user-friendly integrated database tool. CANGS DB is specifically designed to organize and manage the massive amount of sequencing data arising from various NGS projects. CANGS DB also provides an intuitive user interface for sequence trimming and quality control, taxonomy analysis and rarefaction analysis. Our database tool can be easily adapted to handle multiple sequencing projects in parallel with different sample information, amplicon sizes, primer sequences, and quality thresholds, which makes this software especially useful for non-bioinformaticians. Furthermore, CANGS DB is especially suited for projects where multiple users need to access the data. CANGS DB is available at http://code.google.com/p/cangsdb/. Conclusion CANGS DB provides a simple and user-friendly solution to process, store and analyze 454 sequencing data. Being a local database that is accessible through a user-friendly interface, CANGS DB provides the perfect tool for collaborative amplicon based biodiversity surveys without requiring prior bioinformatics skills.",CANGS,0.988113701,Analyzing Next Generation Sequences DataBase,0.793947458,CANGS,0.988113701,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/30/2011 +34345532,http://www.biomedical-web.com/cancerit,"CanImmunother: a manually curated database for identification of cancer immunotherapies associating with biomarkers, targets, and clinical effects. As immunotherapy is evolving into an essential armamentarium against cancers, numerous translational studies associated with relevant biomarkers, targets, and clinical effects have been reported in recent years. However, a large amount of associated experimental data remains unexplored due to the difficulty in accessibility and utilization. Here, we established a comprehensive high-quality database for cancer immunotherapy called CanImmunother (http://www.biomedical-web.com/cancerit/) through manual curation on 4515 publications. CanImmunother contains 3267 experimentally validated associations between 218 cancer sub-types across 34 body parts and 484 immunotherapies with 642 biomarkers, 108 targets, and 121 control therapies. Each association was manually curated by professional curators, incorporated with valuable annotation and cross references, and assigned with an association score for prioritization. To help clinicians and researchers in identifying and discovering better cancer immunotherapy and their respective biomarkers and targets, CanImmunother offers user-friendly web applications including search, browse, excel table, association prioritization, and network visualization. CanImmunother presents a landscape of experimental cancer immunotherapy association data, serving as a useful resource to improve our insight and to facilitate further discovery of advanced immunotherapy options for cancer patients.",CanImmunother,0.998033106,NA,0,CanImmunother,0.998033106,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/16/2021 +33942873,http://cannusedb.csic.es,"CANNUSE, a database of traditional Cannabis uses-an opportunity for new research. . Cannabis is one of the most versatile genera in terms of plant uses and has been exploited by humans for millennia due to its medicinal properties, strong fibres, nutritious seeds and psychoactive resin. Nowadays, Cannabis is the centre of many scientific studies, which mainly focus on its chemical composition and medicinal properties. Unfortunately, while new applications of this plant are continuously being developed, some of its traditional uses are becoming rare and even disappearing altogether. Information on traditional uses of Cannabis is vast, but it is scattered across many publication sources in different formats, so synthesis and standardization of these data are increasingly important. The CANNUSE database provides an organized information source for scientists and general public interested in different aspects of Cannabis use. It contains over 2300 entries from 649 publications related to medicinal, alimentary, fibre and other uses from different geographical areas and cultures around the world. We believe this database will serve as a starting point for new research and development strategies based on the traditional knowledge. Database URL: http://cannusedb.csic.es.",CANNUSE,0.991508305,NA,0,CANNUSE,0.991508305,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +"26657895, 30945201","http://cantata.amu.edu.pl, http://yeti.amu.edu.pl/CANTATA","CANTATAdb: A Collection of Plant Long Non-Coding RNAs. Long non-coding RNAs (lncRNAs) represent a class of potent regulators of gene expression that are found in a wide array of eukaryotes; however, our knowledge about these molecules in plants is still very limited. In particular, a number of model plant species still lack comprehensive data sets of lncRNAs and their annotations, and very little is known about their biological roles. To meet these shortcomings, we created an online database of lncRNAs in 10 model plant species. The lncRNAs were identified computationally using dozens of publicly available RNA sequencing (RNA-Seq) libraries. Expression values, coding potential, sequence alignments as well as other types of data provide annotation for the identified lncRNAs. In order to better characterize them, we investigated their potential roles in splicing modulation and deregulation of microRNA functions. The data are freely available for searching, browsing and downloading from an online database called CANTATAdb (http://cantata.amu.edu.pl, http://yeti.amu.edu.pl/CANTATA/).",CANTATAdb,0.954158604,NA,0,CANTATAdb,0.954158604,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +34174131,http://ithaka.rrp.demokritos.gr/CanVaS,"CanVaS: Documenting the genetic variation spectrum of Greek cancer patients. National genetic variation registries vastly increase the level of detail for the relevant population, while directly affecting patient management. Herein, we report CanVaS, a Cancer Variation reSource aiming to document the genetic variation of cancer patients in Greece. CanVaS comprises germline genetic data from 7,363 Greek individuals with a personal and/or family history of malignancy. The data set incorporates approximately 24,000 functionally annotated rare variants in 97 established or suspected cancer susceptibility genes. For each variant, allele frequency for the Greek population, interpretation for clinical significance, anonymized family and segregation information, as well as phenotypic traits of the carriers, are included. Moreover, information on the geographic distribution of the variants across the country is provided, enabling the study of Greek population isolates. Direct comparisons between Greek (sub)populations with relevant genetic resources are supported, allowing fine-grain localized adjustment of guidelines and clinical decision-making. Most importantly, anonymized data are available for download, while the Leiden Open Variation Database schema is adopted, enabling integration/interconnection with central resources. CanVaS could become a stepping-stone for a countrywide effort to characterize the cancer genetic variation landscape, concurrently supporting national and international cancer research. The database can be accessed at: http://ithaka.rrp.demokritos.gr/CanVaS.",CanVaS,0.996876478,Cancer Variation,0.886106948,CanVaS,0.996876478,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/6/2021 +22021380,http://www.bioinsilico.org/CAPSDB,"CAPS-DB: a structural classification of helix-capping motifs. The regions of the polypeptide chain immediately preceding or following an √ɬÉ√Ǭé√ɬÇ√Ǭ±-helix are known as Nt- and Ct cappings, respectively. Cappings play a central role stabilizing √ɬÉ√Ǭé√ɬÇ√Ǭ±-helices due to lack of intrahelical hydrogen bonds in the first and last turn. Sequence patterns of amino acid type preferences have been derived for cappings but the structural motifs associated to them are still unclassified. CAPS-DB is a database of clusters of structural patterns of different capping types. The clustering algorithm is based in the geometry and the (√ɬÉ√Ǭé√ɬÇ√Ǭ¶-√ɬÉ√Ǭè√ɬÇ√Ǭà)-space conformation of these regions. CAPS-DB is a relational database that allows the user to search, browse, inspect and retrieve structural data associated to cappings. The contents of CAPS-DB might be of interest to a wide range of scientist covering different areas such as protein design and engineering, structural biology and bioinformatics. The database is accessible at: http://www.bioinsilico.org/CAPSDB.",CAPS-DB,0.997070983,NA,0,CAPS-DB,0.997070983,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2011 +29509874,http://digbio.missouri.edu/CarbonylDB,"CarbonylDB: a curated data-resource of protein carbonylation sites. Motivation:Oxidative stress and protein damage have been associated with over 200 human ailments including cancer, stroke, neuro-degenerative diseases and aging. Protein carbonylation, a chemically diverse oxidative post-translational modification, is widely considered as the biomarker for oxidative stress and protein damage. Despite their importance and extensive studies, no database/resource on carbonylated proteins/sites exists. As such information is very useful to research in biology/medicine, we have manually curated a data-resource (CarbonylDB) of experimentally-confirmed carbonylated proteins/sites. Results:The CarbonylDB currently contains 1495 carbonylated proteins and 3781 sites from 21 species, with human, rat and yeast as the top three species. We have made further analyses of these carbonylated proteins/sites and presented their occurrence and occupancy patterns. Carbonylation site data on serum albumin, in particular, provides a fine model system to understand the dynamics of oxidative protein modifications/damage. Availability and implementation:The CarbonylDB is available as a web-resource and for download at http://digbio.missouri.edu/CarbonylDB/. Supplementary information:Supplementary data are available at Bioinformatics online.",CarbonylDB,0.994347632,NA,0,CarbonylDB,0.994347632,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2018 +"23650175, 27789705",http://arpcard.mcmaster.ca,"The comprehensive antibiotic resistance database. The field of antibiotic drug discovery and the monitoring of new antibiotic resistance elements have yet to fully exploit the power of the genome revolution. Despite the fact that the first genomes sequenced of free living organisms were those of bacteria, there have been few specialized bioinformatic tools developed to mine the growing amount of genomic data associated with pathogens. In particular, there are few tools to study the genetics and genomics of antibiotic resistance and how it impacts bacterial populations, ecology, and the clinic. We have initiated development of such tools in the form of the Comprehensive Antibiotic Research Database (CARD; http://arpcard.mcmaster.ca). The CARD integrates disparate molecular and sequence data, provides a unique organizing principle in the form of the Antibiotic Resistance Ontology (ARO), and can quickly identify putative antibiotic resistance genes in new unannotated genome sequences. This unique platform provides an informatic tool that bridges antibiotic resistance concerns in health care, agriculture, and the environment.",CARD,0.996455212,Comprehensive Antibiotic Resistance Database,0.990965346,CARD,0.996455212,2,NA,31665441,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/26/2016 +31665441,http://card.mcmaster.ca,"CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database. The Comprehensive Antibiotic Resistance Database (CARD; https://card.mcmaster.ca) is a curated resource providing reference DNA and protein sequences, detection models and bioinformatics tools on the molecular basis of bacterial antimicrobial resistance (AMR). CARD focuses on providing high-quality reference data and molecular sequences within a controlled vocabulary, the Antibiotic Resistance Ontology (ARO), designed by the CARD biocuration team to integrate with software development efforts for resistome analysis and prediction, such as CARD's Resistance Gene Identifier (RGI) software. Since 2017, CARD has expanded through extensive curation of reference sequences, revision of the ontological structure, curation of over 500 new AMR detection models, development of a new classification paradigm and expansion of analytical tools. Most notably, a new Resistomes & Variants module provides analysis and statistical summary of in silico predicted resistance variants from 82 pathogens and over 100 000 genomes. By adding these resistance variants to CARD, we are able to summarize predicted resistance using the information included in CARD, identify trends in AMR mobility and determine previously undescribed and novel resistance variants. Here, we describe updates and recent expansions to CARD and its biocuration process, including new resources for community biocuration of AMR molecular reference data.",CARD,0.994743566,Comprehensive Antibiotic Resistance Database,0.971033146,CARD,0.994743566,1,NA,"23650175.0, 27789705.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2020 +29939204,"http://bio-bigdata.hrbmu.edu.cn/CARDIO-LNCRNAS/, http://www.bio-bigdata.net/CARDIO-LNCRNAS","Landscape of the long non-coding RNA transcriptome in human heart. Long non-coding RNAs (lncRNAs) have been revealed to play essential roles in the human cardiovascular system. However, information about their mechanisms is limited, and a comprehensive view of cardiac lncRNAs is lacking from a multiple tissues perspective to date. Here, the landscape of the lncRNA transcriptome in human heart was summarized. We summarized all lncRNA transcripts from publicly available human transcriptome resources (156 heart samples and 210 samples from 29 other tissues) and systematically analysed all annotated and novel lncRNAs expressed in heart. A total of 7485 lncRNAs whose expression was elevated in heart (HE lncRNAs) and 453 lncRNAs expressed in all 30 analysed tissues (EIA lncRNAs) were extracted. Using various bioinformatics resources, methods and tools, the features of these lncRNAs were discussed from various perspectives, including genomic structure, conservation, dynamic variation during heart development, cis-regulation, differential expression in cardiovascular diseases and cancers as well as regulation at transcriptional and post-transcriptional levels. Afterwards, all the features discussed above were integrated into a user-friendly resource named CARDIO-LNCRNAS (http://bio-bigdata.hrbmu.edu.cn/CARDIO-LNCRNAS/ or http://www.bio-bigdata.net/CARDIO-LNCRNAS/). This study represents the first global view of lncRNAs in the human cardiovascular system based on multiple tissues and sheds light on the role of lncRNAs in developments and heart disorders.",CARDIO-LNCRNAS,0.93584047,NA,0,CARDIO-LNCRNAS,0.93584047,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2019 +27635320,http://www.cardiosignal.org/database/cardiotf.html,"CardioTF, a database of deconstructing transcriptional circuits in the heart system. Background Information on cardiovascular gene transcription is fragmented and far behind the present requirements of the systems biology field. To create a comprehensive source of data for cardiovascular gene regulation and to facilitate a deeper understanding of genomic data, the CardioTF database was constructed. The purpose of this database is to collate information on cardiovascular transcription factors (TFs), position weight matrices (PWMs), and enhancer sequences discovered using the ChIP-seq method. Methods The Na√ɬÉ√ǬÉ√ɬÇ√ǬØve-Bayes algorithm was used to classify literature and identify all PubMed abstracts on cardiovascular development. The natural language learning tool GNAT was then used to identify corresponding gene names embedded within these abstracts. Local Perl scripts were used to integrate and dump data from public databases into the MariaDB management system (MySQL). In-house R scripts were written to analyze and visualize the results. Results Known cardiovascular TFs from humans and human homologs from fly, Ciona, zebrafish, frog, chicken, and mouse were identified and deposited in the database. PWMs from Jaspar, hPDI, and UniPROBE databases were deposited in the database and can be retrieved using their corresponding TF names. Gene enhancer regions from various sources of ChIP-seq data were deposited into the database and were able to be visualized by graphical output. Besides biocuration, mouse homologs of the 81 core cardiac TFs were selected using a Na√ɬÉ√ǬÉ√ɬÇ√ǬØve-Bayes approach and then by intersecting four independent data sources: RNA profiling, expert annotation, PubMed abstracts and phenotype. Discussion The CardioTF database can be used as a portal to construct transcriptional network of cardiac development. Availability and implementation Database URL: http://www.cardiosignal.org/database/cardiotf.html.",CardioTF,0.989912987,NA,0,CardioTF,0.989912987,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/23/2016 +23794735,http://carlsbad.health.unm.edu/carlsbad,"The CARLSBAD database: a confederated database of chemical bioactivities. Many bioactivity databases offer information regarding the biological activity of small molecules on protein targets. Information in these databases is often hard to resolve with certainty because of subsetting different data in a variety of formats; use of different bioactivity metrics; use of different identifiers for chemicals and proteins; and having to access different query interfaces, respectively. Given the multitude of data sources, interfaces and standards, it is challenging to gather relevant facts and make appropriate connections and decisions regarding chemical-protein associations. The CARLSBAD database has been developed as an integrated resource, focused on high-quality subsets from several bioactivity databases, which are aggregated and presented in a uniform manner, suitable for the study of the relationships between small molecules and targets. In contrast to data collection resources, CARLSBAD provides a single normalized activity value of a given type for each unique chemical-protein target pair. Two types of scaffold perception methods have been implemented and are available for datamining: HierS (hierarchical scaffolds) and MCES (maximum common edge subgraph). The 2012 release of CARLSBAD contains 439 985 unique chemical structures, mapped onto 1,420 889 unique bioactivities, and annotated with 277 140 HierS scaffolds and 54 135 MCES chemical patterns, respectively. Of the 890 323 unique structure-target pairs curated in CARLSBAD, 13.95% are aggregated from multiple structure-target values: 94 975 are aggregated from two bioactivities, 14 544 from three, 7 930 from four and 2214 have five bioactivities, respectively. CARLSBAD captures bioactivities and tags for 1435 unique chemical structures of active pharmaceutical ingredients (i.e. 'drugs'). CARLSBAD processing resulted in a net 17.3% data reduction for chemicals, 34.3% reduction for bioactivities, 23% reduction for HierS and 25% reduction for MCES, respectively. The CARLSBAD database supports a knowledge mining system that provides non-specialists with novel integrative ways of exploring chemical biology space to facilitate knowledge mining in drug discovery and repurposing. Database URL: http://carlsbad.health.unm.edu/carlsbad/.",CARLSBAD,0.99360311,NA,0,CARLSBAD,0.99360311,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/21/2013 +26040787,http://bioinfo.sibs.ac.cn/carmo,"CARMO: a comprehensive annotation platform for functional exploration of rice multi-omics data. High-throughput technology is gradually becoming a powerful tool for routine research in rice. Interpretation of biological significance from the huge amount of data is a critical but non-trivial task, especially for rice, for which gene annotations rely heavily on sequence similarity rather than direct experimental evidence. Here we describe the annotation platform for comprehensive annotation of rice multi-omics data (CARMO), which provides multiple web-based analysis tools for in-depth data mining and visualization. The central idea involves systematic integration of 1819 samples from omics studies and diverse sources of functional evidence (15√ɬÉ√ǬÇ√ɬÇ√Ǭ†401 terms), which are further organized into gene sets and higher-level gene modules. In this way, the high-throughput data may easily be compared across studies and platforms, and integration of multiple types of evidence allows biological interpretation from the level of gene functional modules with high confidence. In addition, the functions and pathways for thousands of genes lacking description or validation may be deduced based on concerted expression of genes within the constructed co-expression networks or gene modules. Overall, CARMO provides comprehensive annotations for transcriptomic datasets, epi-genomic modification sites, single nucleotide polymorphisms identified from genome re-sequencing, and the large gene lists derived from these omics studies. Well-organized results, as well as multiple tools for interactive visualization, are available through a user-friendly web interface. Finally, we illustrate how CARMO enables biological insights using four examples, demonstrating that CARMO is a highly useful resource for intensive data mining and hypothesis generation based on rice multi-omics data. CARMO is freely available online (http://bioinfo.sibs.ac.cn/carmo).",CARMO,0.95898664,comprehensive annotation of rice multi-omics data,0.74433168,CARMO,0.95898664,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2015 +28365725,http://carotenoiddb.jp,"Carotenoids Database: structures, chemical fingerprints and distribution among organisms. . To promote understanding of how organisms are related via carotenoids, either evolutionarily or symbiotically, or in food chains through natural histories, we built the Carotenoids Database. This provides chemical information on 1117 natural carotenoids with 683 source organisms. For extracting organisms closely related through the biosynthesis of carotenoids, we offer a new similarity search system 'Search similar carotenoids' using our original chemical fingerprint 'Carotenoid DB Chemical Fingerprints'. These Carotenoid DB Chemical Fingerprints describe the chemical substructure and the modification details based upon International Union of Pure and Applied Chemistry (IUPAC) semi-systematic names of the carotenoids. The fingerprints also allow (i) easier prediction of six biological functions of carotenoids: provitamin A, membrane stabilizers, odorous substances, allelochemicals, antiproliferative activity and reverse MDR activity against cancer cells, (ii) easier classification of carotenoid structures, (iii) partial and exact structure searching and (iv) easier extraction of structural isomers and stereoisomers. We believe this to be the first attempt to establish fingerprints using the IUPAC semi-systematic names. For extracting close profiled organisms, we provide a new tool 'Search similar profiled organisms'. Our current statistics show some insights into natural history: carotenoids seem to have been spread largely by bacteria, as they produce C30, C40, C45 and C50 carotenoids, with the widest range of end groups, and they share a small portion of C40 carotenoids with eukaryotes. Archaea share an even smaller portion with eukaryotes. Eukaryotes then have evolved a considerable variety of C40 carotenoids. Considering carotenoids, eukaryotes seem more closely related to bacteria than to archaea aside from 16S rRNA lineage analysis. : http://carotenoiddb.jp.",NA,0,Carotenoid,0.51871109,Carotenoid,0.51871109,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2017 +25267795,"http://apiaceae.njau.edu.cn/car√ɬÉ√ǬÉ√ɬÇ√ǬÉ, http://apiaceae.njau.edu.cn/carrotdb","CarrotDB: a genomic and transcriptomic database for carrot. . Carrot (Daucus carota L.) is an economically important vegetable worldwide and is the largest source of carotenoids and provitamin A in the human diet. Given the importance of this vegetable to humans, research and breeding communities on carrot should obtain useful genomic and transcriptomic information. The first whole-genome sequences of 'DC-27' carrot were de novo assembled and analyzed. Transcriptomic sequences of 14 carrot genotypes were downloaded from the Sequence Read Archive (SRA) database of National Center for Biotechnology Information (NCBI) and mapped to the whole-genome sequence before assembly. Based on these data sets, the first Web-based genomic and transcriptomic database for D. carota (CarrotDB) was developed (database homepage: http://apiaceae.njau.edu.cn/car√ɬÉ√ǬÇ√ɬÇ√Ǭ†rotdb). CarrotDB offers the tools of Genome Map and Basic Local Alignment Search Tool. Using these tools, users can search certain target genes and simple sequence repeats along with designed primers of 'DC-27'. Assembled transcriptomic sequences along with fragments per kilobase of transcript sequence per millions base pairs sequenced information (FPKM) information of 14 carrot genotypes are also provided. Users can download de novo assembled whole-genome sequences, putative gene sequences and putative protein sequences of 'DC-27'. Users can also download transcriptome sequence assemblies of 14 carrot genotypes along with their FPKM information. A total of 2826 transcription factor (TF) genes classified into 57 families were identified in the entire genome sequences. These TF genes were embedded in CarrotDB as an interface. The 'GERMPLASM' part of CarrotDB also offers taproot photos of 45 carrot genotypes and a table containing accession numbers, names, countries of origin and colors of cortex, phloem and xylem parts of taproots corresponding to each carrot genotype. CarrotDB will be continuously updated with new information. Database URL: http://apiaceae.njau.edu.cn/carrotdb/",CarrotDB,0.99532944,NA,0,CarrotDB,0.99532944,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/29/2014 +31024751,http://biokinet.belozersky.msu.ru/casbench,"CASBench: A Benchmarking Set of Proteins with Annotated Catalytic and Allosteric Sites in Their Structures. In recent years, the phenomenon of allostery has witnessed growing attention driven by a fundamental interest in new ways to regulate the functional properties of proteins, as well as the prospects of using allosteric sites as targets to design novel drugs with lower toxicity due to a higher selectivity of binding and specificity of the mechanism of action. The currently available bioinformatic methods can sometimes correctly detect previously unknown ligand binding sites in protein structures. However, the development of universal and more efficient approaches requires a deeper understanding of the common and distinctive features of the structural organization of both functional (catalytic) and allosteric sites, the evolution of their amino acid sequences in respective protein families, and allosteric communication pathways. The CASBench benchmark set contains 91 entries related to enzymes with both catalytic and allosteric sites within their structures annotated based on the experimental information from the Allosteric Database, Catalytic Site Atlas, and Protein Data Bank. The obtained dataset can be used to benchmark the performance of existing computational approaches and develop/train perspective algorithms to search for new catalytic and regulatory sites, as well as to study the mechanisms of protein regulation on a large collection of allosteric enzymes. Establishing a relationship between the structure, function, and regulation is expected to improve our understanding of the mechanisms of action of enzymes and open up new prospects for discovering new drugs and designing more efficient biocatalysts. The CASBench can be operated offline on a local computer or online using built-in interactive tools at https://biokinet.belozersky.msu.ru/casbench.",CASBench,0.995392978,NA,0,CASBench,0.995392978,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +35134148,http://www.xiejjlab.bio/cata,"CATA: a comprehensive chromatin accessibility database for cancer. Accessible chromatin refers to the active regions of a chromosome that are bound by many transcription factors (TFs). Changes in chromatin accessibility play a critical role in tumorigenesis. With the emergence of novel methods like Assay for Transposase-accessible Chromatin Sequencing, a sequencing method that maps chromatin-accessible regions (CARs) and enables the computational analysis of TF binding at chromatin-accessible sites, the regulatory landscape in cancer can be dissected. Herein, we developed a comprehensive cancer chromatin accessibility database named CATA, which aims to provide available resources of cancer CARs and to annotate their potential roles in the regulation of genes in a cancer type-specific manner. In this version, CATA stores 2√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ991√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ163 CARs from 23 cancer types, binding information of 1398 TFs within the CARs, and provides multiple annotations about these regions, including common single nucleotide polymorphisms (SNPs), risk SNPs, copy number variation, somatic mutations, motif changes, expression quantitative trait loci, methylation and CRISPR/Cas9 target loci. Moreover, CATA supports cancer survival analysis of the CAR-associated genes and provides detailed clinical information of the tumor samples. Database URL: CATA is available at http://www.xiejjlab.bio/cata/.",CATA,0.994815171,chromatin accessibility database,0.624158442,CATA,0.994815171,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25392409,http://urgv.evry.inra.fr/CATdb,"GEM2Net: from gene expression modeling to -omics networks, a new CATdb module to investigate Arabidopsis thaliana genes involved in stress response. CATdb (http://urgv.evry.inra.fr/CATdb) is a database providing a public access to a large collection of transcriptomic data, mainly for Arabidopsis but also for other plants. This resource has the rare advantage to contain several thousands of microarray experiments obtained with the same technical protocol and analyzed by the same statistical pipelines. In this paper, we present GEM2Net, a new module of CATdb that takes advantage of this homogeneous dataset to mine co-expression units and decipher Arabidopsis gene functions. GEM2Net explores 387 stress conditions organized into 18 biotic and abiotic stress categories. For each one, a model-based clustering is applied on expression differences to identify clusters of co-expressed genes. To characterize functions associated with these clusters, various resources are analyzed and integrated: Gene Ontology, subcellular localization of proteins, Hormone Families, Transcription Factor Families and a refined stress-related gene list associated to publications. Exploiting protein-protein interactions and transcription factors-targets interactions enables to display gene networks. GEM2Net presents the analysis of the 18 stress categories, in which 17,264 genes are involved and organized within 681 co-expression clusters. The meta-data analyses were stored and organized to compose a dynamic Web resource.",CATdb,0.994086862,NA,0,CATdb,0.994086862,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +30398663,http://www.cathdb.info,"CATH: expanding the horizons of structure-based functional annotations for genome sequences. This article provides an update of the latest data and developments within the CATH protein structure classification database (http://www.cathdb.info). The resource provides two levels of release: CATH-B, a daily snapshot of the latest structural domain boundaries and superfamily assignments, and CATH+, which adds layers of derived data, such as predicted sequence domains, functional annotations and functional clustering (known as Functional Families or FunFams). The most recent CATH+ release (version 4.2) provides a huge update in the coverage of structural data. This release increases the number of fully- classified domains by over 40% (from 308 999 to 434 857 structural domains), corresponding to an almost two- fold increase in sequence data (from 53 million to over 95 million predicted domains) organised into 6119 superfamilies. The coverage of high-resolution, protein PDB chains that contain at least one assigned CATH domain is now 90.2% (increased from 82.3% in the previous release). A number of highly requested features have also been implemented in our web pages: allowing the user to view an alignment between their query sequence and a representative FunFam structure and providing tools that make it easier to view the full structural context (multi-domain architecture) of domains and chains.",CATH,0.967545748,NA,0,CATH,0.967545748,1,"25348408.0, 27899584.0",NA,low_prob_best_name,do not remove,conflicting record(s) to be removed,NA,NA,NA,NA,1/1/2019 +"25348408, 27899584",http://www.cathdb.info,"CATH: comprehensive structural and functional annotations for genome sequences. The latest version of the CATH-Gene3D protein structure classification database (4.0, http://www.cathdb.info) provides annotations for over 235,000 protein domain structures and includes 25 million domain predictions. This article provides an update on the major developments in the 2 years since the last publication in this journal including: significant improvements to the predictive power of our functional families (FunFams); the release of our 'current' putative domain assignments (CATH-B); a new, strictly non-redundant data set of CATH domains suitable for homology benchmarking experiments (CATH-40) and a number of improvements to the web pages.",CATH-Gene3D,0.957338452,NA,0,CATH-Gene3D,0.957338452,2,30398663,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,11/28/2016 +23493402,http://www.cathacyc.org,"CathaCyc, a metabolic pathway database built from Catharanthus roseus RNA-Seq data. The medicinal plant Madagascar periwinkle (Catharanthus roseus) synthesizes numerous terpenoid indole alkaloids (TIAs), such as the anticancer drugs vinblastine and vincristine. The TIA pathway operates in a complex metabolic network that steers plant growth and survival. Pathway databases and metabolic networks reconstructed from 'omics' sequence data can help to discover missing enzymes, study metabolic pathway evolution and, ultimately, engineer metabolic pathways. To date, such databases have mainly been built for model plant species with sequenced genomes. Although genome sequence data are not available for most medicinal plant species, next-generation sequencing is now extensively employed to create comprehensive medicinal plant transcriptome sequence resources. Here we report on the construction of CathaCyc, a detailed metabolic pathway database, from C. roseus RNA-Seq data sets. CathaCyc (version 1.0) contains 390 pathways with 1,347 assigned enzymes and spans primary and secondary metabolism. Curation of the pathways linked with the synthesis of TIAs and triterpenoids, their primary metabolic precursors, and their elicitors, the jasmonate hormones, demonstrated that RNA-Seq resources are suitable for the construction of pathway databases. CathaCyc is accessible online (http://www.cathacyc.org) and offers a range of tools for the visualization and analysis of metabolic networks and 'omics' data. Overlay with expression data from publicly available RNA-Seq resources demonstrated that two well-characterized C. roseus terpenoid pathways, those of TIAs and triterpenoids, are subject to distinct regulation by both developmental and environmental cues. We anticipate that databases such as CathaCyc will become key to the study and exploitation of the metabolism of medicinal plants.",CathaCyc,0.995314121,NA,0,CathaCyc,0.995314121,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/14/2013 +25887162,http://causalbionet.com,"Causal biological network database: a comprehensive platform of causal biological network models focused on the pulmonary and vascular systems. With the wealth of publications and data available, powerful and transparent computational approaches are required to represent measured data and scientific knowledge in a computable and searchable format. We developed a set of biological network models, scripted in the Biological Expression Language, that reflect causal signaling pathways across a wide range of biological processes, including cell fate, cell stress, cell proliferation, inflammation, tissue repair and angiogenesis in the pulmonary and cardiovascular context. This comprehensive collection of networks is now freely available to the scientific community in a centralized web-based repository, the Causal Biological Network database, which is composed of over 120 manually curated and well annotated biological network models and can be accessed at http://causalbionet.com. The website accesses a MongoDB, which stores all versions of the networks as JSON objects and allows users to search for genes, proteins, biological processes, small molecules and keywords in the network descriptions to retrieve biological networks of interest. The content of the networks can be visualized and browsed. Nodes and edges can be filtered and all supporting evidence for the edges can be browsed and is linked to the original articles in PubMed. Moreover, networks may be downloaded for further visualization and evaluation. Database URL: http://causalbionet.com",Causal,0.838220835,Causal biological network database,0.702677703,Causal,0.838220835,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,4/17/2015 +31691819,http://mulinlab.org/causaldb,"CAUSALdb: a database for disease/trait causal variants identified using summary statistics of genome-wide association studies. Genome-wide association studies (GWASs) have revolutionized the field of complex trait genetics over the past decade, yet for most of the significant genotype-phenotype associations the true causal variants remain unknown. Identifying and interpreting how causal genetic variants confer disease susceptibility is still a big challenge. Herein we introduce a new database, CAUSALdb, to integrate the most comprehensive GWAS summary statistics to date and identify credible sets of potential causal variants using uniformly processed fine-mapping. The database has six major features: it (i) curates 3052 high-quality, fine-mappable GWAS summary statistics across five human super-populations and 2629 unique traits; (ii) estimates causal probabilities of all genetic variants in GWAS significant loci using three state-of-the-art fine-mapping tools; (iii) maps the reported traits to a powerful ontology MeSH, making it simple for users to browse studies on the trait tree; (iv) incorporates highly interactive Manhattan and LocusZoom-like plots to allow visualization of credible sets in a single web page more efficiently; (v) enables online comparison of causal relations on variant-, gene-√ɬÉ√ǬÇ√ɬÇ√Ǭ†and trait-levels among studies with different sample sizes or populations and (vi) offers comprehensive variant annotations by integrating massive base-wise and allele-specific functional annotations. CAUSALdb is freely available at http://mulinlab.org/causaldb.",CAUSALdb,0.994969904,NA,0,CAUSALdb,0.994969904,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +"23514094, 24270786",http://www.cazy.org,"Expansion of the enzymatic repertoire of the CAZy database to integrate auxiliary redox enzymes. Background Since its inception, the carbohydrate-active enzymes database (CAZy; http://www.cazy.org) has described the families of enzymes that cleave or build complex carbohydrates, namely the glycoside hydrolases (GH), the polysaccharide lyases (PL), the carbohydrate esterases (CE), the glycosyltransferases (GT) and their appended non-catalytic carbohydrate-binding modules (CBM). The recent discovery that members of families CBM33 and family GH61 are in fact lytic polysaccharide monooxygenases (LPMO), demands a reclassification of these families into a suitable category. Results Because lignin is invariably found together with polysaccharides in the plant cell wall and because lignin fragments are likely to act in concert with (LPMO), we have decided to join the families of lignin degradation enzymes to the LPMO families and launch a new CAZy class that we name ""Auxiliary Activities"" in order to accommodate a range of enzyme mechanisms and substrates related to lignocellulose conversion. Comparative analyses of these auxiliary activities in 41 fungal genomes reveal a pertinent division of several fungal groups and subgroups combining their phylogenetic origin and their nutritional mode (white vs. brown rot). Conclusions The new class introduced in the CAZy database extends the traditional CAZy families, and provides a better coverage of the full extent of the lignocellulose breakdown machinery.",CAZy,0.99791199,Carbohydrate-Active Enzymes database,0.980871044,CAZy,0.99791199,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2013 +29040563,http://www.cazypedia.org,"Ten years of CAZypedia: a living encyclopedia of carbohydrate-active enzymes. CAZypedia was initiated in 2007 to create a comprehensive, living encyclopedia of the carbohydrate-active enzymes (CAZymes) and associated carbohydrate-binding modules involved in the synthesis, modification and degradation of complex carbohydrates. CAZypedia is closely connected with the actively curated CAZy database, which provides a sequence-based foundation for the biochemical, mechanistic and structural characterization of these diverse proteins. Now celebrating its 10th anniversary online, CAZypedia is a successful example of dynamic, community-driven and expert-based biocuration. CAZypedia is an open-access resource available at URL http://www.cazypedia.org.",CAZypedia,0.9983778,NA,0,CAZypedia,0.9983778,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2018 +26660198,http://search.bwh.harvard.edu/new/CBDatabase.html,"CB Database: A change blindness database for objects in natural indoor scenes. Change blindness has been a topic of interest in cognitive sciences for decades. Change detection experiments are frequently used for studying various research topics such as attention and perception. However, creating change detection stimuli is tedious and there is no open repository of such stimuli using natural scenes. We introduce the Change Blindness (CB) Database with object changes in 130 colored images of natural indoor scenes. The size and eccentricity are provided for all the changes as well as reaction time data from a baseline experiment. In addition, we have two specialized satellite databases that are subsets of the 130 images. In one set, changes are seen in rooms or in mirrors in those rooms (Mirror Change Database). In the other, changes occur in a room or out a window (Window Change Database). Both the sets have controlled background, change size, and eccentricity. The CB Database is intended to provide researchers with a stimulus set of natural scenes with defined stimulus parameters that can be used for a wide range of experiments. The CB Database can be found at http://search.bwh.harvard.edu/new/CBDatabase.html .",CB,0.666790307,Blindness,0.491330385,CB,0.666790307,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2016 +30717315,http://sysbio.suda.edu.cn/CBD/index.html,"Potential Applications of DNA, RNA and Protein Biomarkers in Diagnosis, Therapy and Prognosis for Colorectal Cancer: A Study from Databases to AI-Assisted Verification. . In order to find out the most valuable biomarkers and pathways for diagnosis, therapy and prognosis in colorectal cancer (CRC) we have collected the published CRC biomarkers and established a CRC biomarker database (CBD: http://sysbio.suda.edu.cn/CBD/index.html). In this study, we analysed the single and multiple DNA, RNA and protein biomarkers as well as their positions in cancer related pathways and protein-protein interaction (PPI) networks to describe their potential applications in diagnosis, therapy and prognosis. CRC biomarkers were collected from the CBD. The RNA and protein biomarkers were matched to their corresponding DNAs by the miRDB database and the PubMed Gene database, respectively. The PPI networks were used to investigate the relationships between protein biomarkers and further detect the multiple biomarkers. The Kyoto Encyclopaedia of Genes and Genomes (KEGG) pathway enrichment analysis and Gene Ontology (GO) annotation were used to analyse biological functions of the biomarkers. AI classification techniques were utilized to further verify the significances of the multiple biomarkers in diagnosis and prognosis for CRC. We showed that a large number of the DNA, RNA and protein biomarkers were associated with the diagnosis, therapy and prognosis in various degrees in the CRC biomarker networks. The CRC biomarkers were closely related to the CRC initiation and progression. Moreover, the biomarkers played critical roles in cellular proliferation, apoptosis and angiogenesis and they were involved in Ras, p53 and PI3K pathways. There were overlaps among the DNA, RNA and protein biomarkers. AI classification verifications showed that the combined multiple protein biomarkers played important roles to accurate early diagnosis and predict outcome for CRC. There were several single and multiple CRC protein biomarkers which were associated with diagnosis, therapy and prognosis in CRC. Further, AI-assisted analysis revealed that multiple biomarkers had potential applications for diagnosis and prognosis in CRC.",CBD,0.993405302,CRC biomarker database,0.776337951,CBD,0.993405302,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2019 +22588877,http://cbioportal.org,"The cBio cancer genomics portal: an open platform for exploring multidimensional cancer genomics data. The cBio Cancer Genomics Portal (http://cbioportal.org) is an open-access resource for interactive exploration of multidimensional cancer genomics data sets, currently providing access to data from more than 5,000 tumor samples from 20 cancer studies. The cBio Cancer Genomics Portal significantly lowers the barriers between complex genomic data and cancer researchers who want rapid, intuitive, and high-quality access to molecular profiles and clinical attributes from large-scale cancer genomics projects and empowers researchers to translate these rich data sets into biologic insights and clinical applications.",cBio,0.957699895,NA,0,cBio,0.957699895,1,"23550210.0, 31308250.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,5/1/2012 +"23550210, 31308250",http://cbioportal.org,"Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal. The cBioPortal for Cancer Genomics (http://cbioportal.org) provides a Web resource for exploring, visualizing, and analyzing multidimensional cancer genomics data. The portal reduces molecular profiling data from cancer tissues and cell lines into readily understandable genetic, epigenetic, gene expression, and proteomic events. The query interface combined with customized data storage enables researchers to interactively explore genetic alterations across samples, genes, and pathways and, when available in the underlying data, to link these to clinical outcomes. The portal provides graphical summaries of gene-level data from multiple platforms, network visualization and analysis, survival analysis, patient-centric queries, and software programmatic access. The intuitive Web interface of the portal makes complex cancer genomics profiles accessible to researchers and clinicians without requiring bioinformatics expertise, thus facilitating biological discoveries. Here, we provide a practical guide to the analysis and visualization features of the cBioPortal for Cancer Genomics.",cBioPortal,0.992826402,NA,0,cBioPortal,0.992826402,2,22588877,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,NA,7/15/2019 +29020642,http://cbit.maastrichtuniversity.nl,"cBiT: A transcriptomics database for innovative biomaterial engineering. Creating biomaterials that are suited for clinical application is still hampered by a lack of understanding of the interaction between a cell and the biomaterial surface it grows on. This surface communication can strongly impact cellular behavior, which in turn affects the chances of a successful interaction between a material and the host tissue. Transcriptomics data have previously been linked to measurements of biomaterial properties in order to explain the biological mechanisms underlying these cell-biomaterial interactions. However, such multi-assay data are highly complex and therefore require careful and unambiguous characterization and storage. Failure to do so may result in loss of valuable data or erroneous data analysis. In order to start a new initiative that tackles these issues and offers a platform for innovative biomaterial development, we have created a publically accessible repository called The Compendium for Biomaterial Transcriptomics (cBiT, https://cbit.maastrichtuniversity.nl). cBiT is a data warehouse that gives users the opportunity to search through biomaterial-based transcriptomics data sets using a web interface. Data of interest can be selected and downloaded, together with associated measurements of material properties. Researchers are also invited to add their data to cBiT in order to further enhance its scientific value. We aim to make cBiT the hub for biomaterial-associated data, thereby enabling major contributions to a more efficient development of new materials with improved body integration. Here, we describe the structure of cBiT and provide a use case with clinically applied materials to demonstrate how cBiT can be used to correlate data across transcriptomics studies.",cBiT,0.986032426,Compendium for Biomaterial Transcriptomics,0.794626407,cBiT,0.986032426,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/3/2017 +29753807,http://cabgrid.res.in/cblncrnadb,"Genome-wide identification and characterization of lncRNAs and miRNAs in cluster bean (Cyamopsis tetragonoloba). Long non coding RNAs (lncRNAs) are a class of non-protein coding RNAs that play a crucial role in most of the biological activities like nodule metabolism, flowering time and male sterility. Quite often, the function of lncRNAs is species-specific in nature. Thus an attempt has been made in cluster bean (Cyamopsis tetragonoloba) for the first time to computationally identify lncRNAs based on a proposed index and study their targeted genes. Further, these targeted genes of lncRNAs were identified and characterized for their role in various biological processes like stress mechanisms, DNA damage repair, cell wall synthesis. Besides, lncRNAs and miRNAs bearing Simple Sequence Repeats (SSRs) were identified that contribute towards biogenesis of small non-coding RNAs. Moreover, five novel endogenous Target Mimic lncRNAs (eTMs) were identified that may disrupt the miRNA-mRNA regulations. For easy understanding and usability, a database CbLncRNAdb has been developed and made available at http://cabgrid.res.in/cblncrnadb.",CbLncRNAdb,0.888481498,NA,0,CbLncRNAdb,0.888481498,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/26/2018 +25475113,http://14.139.227.92/mkumar/lactamasedb,"CBMAR: a comprehensive √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamase molecular annotation resource. √ɬÉ√Ǭé√ɬÇ√Ǭ≤-Lactam antibiotics are among the most widely used antibiotics against microbial pathogens. However, enzymatic hydrolysis of these antibiotics by bacterial √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamases is increasingly compromising their efficiency. Although new generation √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactam antibiotics have been developed to combat antibiotic resistance, √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamases have also evolved along with the new variants of the substrate. A strong selection pressure from the newer generation of √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactam antibiotics has resulted in evolution of different families within each class of √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamase. To facilitate detailed characterization of different families of √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamases, we have created a database, CBMAR, which facilitates comprehensive molecular annotation and discovery of novel √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamases. As against the limited scope of other existing similar databases, CBMAR provides information useful for molecular and biochemical characterization of each family of √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamase. The basic architecture of CBMAR is based on Ambler classification, which divides √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamases as serine (Classes A, C and D) and metallo-√ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamases (Class B). Each class is further divided into several families on the basis of their hydrolytic character. In CBMAR, each family is annotated with (i) sequence variability, (ii) antibiotic resistance profile, (iii) inhibitor susceptibility, (iv) active site, (v) family fingerprints, (vi) mutational profile, (vii) variants, (viii) gene location, (ix) phylogenetic tree and several other features. Each entry also has external links to the relevant protein/nucleotide sequence and structure databases. The database also supports sequence similarity searches using BLAST and assigns a new √ɬÉ√Ǭé√ɬÇ√Ǭ≤-lactamase protein to its respective family on the basis of family-specific fingerprint. Database URL: http://14.139.227.92/mkumar/lactamasedb",CBMAR,0.971514285,NA,0,CBMAR,0.971514285,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/3/2014 +23228284,http://compfly.bio.ub.es/CBS,"CBS: an open platform that integrates predictive methods and epigenetics information to characterize conserved regulatory features in multiple Drosophila genomes. Background Information about the composition of regulatory regions is of great value for designing experiments to functionally characterize gene expression. The multiplicity of available applications to predict transcription factor binding sites in a particular locus contrasts with the substantial computational expertise that is demanded to manipulate them, which may constitute a potential barrier for the experimental community. Results CBS (Conserved regulatory Binding Sites, http://compfly.bio.ub.es/CBS) is a public platform of evolutionarily conserved binding sites and enhancers predicted in multiple Drosophila genomes that is furnished with published chromatin signatures associated to transcriptionally active regions and other experimental sources of information. The rapid access to this novel body of knowledge through a user-friendly web interface enables non-expert users to identify the binding sequences available for any particular gene, transcription factor, or genome region. Conclusions The CBS platform is a powerful resource that provides tools for data mining individual sequences and groups of co-expressed genes with epigenomics information to conduct regulatory screenings in Drosophila.",CBS,0.998403251,Conserved regulatory Binding Sites,0.973923177,CBS,0.998403251,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/10/2012 +25540181,http://wwpdb.org,"The chemical component dictionary: complete descriptions of constituent molecules in experimentally determined 3D macromolecules in the Protein Data Bank. Unlabelled The Chemical Component Dictionary (CCD) is a chemical reference data resource that describes all residue and small molecule components found in Protein Data Bank (PDB) entries. The CCD contains detailed chemical descriptions for standard and modified amino acids/nucleotides, small molecule ligands and solvent molecules. Each chemical definition includes descriptions of chemical properties such as stereochemical assignments, chemical descriptors, systematic chemical names and idealized coordinates. The content, preparation, validation and distribution of this CCD chemical reference dataset are described. Availability and implementation The CCD is updated regularly in conjunction with the scheduled weekly release of new PDB structure data. The CCD and amino acid variant reference datasets are hosted in the public PDB ftp repository at ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz, ftp://ftp.wwpdb.org/pub/pdb/data/monomers/aa-variants-v1.cif.gz, and its mirror sites, and can be accessed from http://wwpdb.org. Contact jwest@rcsb.rutgers.edu. Supplementary information Supplementary data are available at Bioinformatics online.",CCD,0.83908997,The Chemical Component Dictionary,0.668254346,CCD,0.83908997,1,"27450113.0, 28296894.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: URL scramble,NA,NA,12/2/2014 +29126148,http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi,"Consensus coding sequence (CCDS) database: a standardized set of human and mouse protein-coding regions supported by expert curation. The Consensus Coding Sequence (CCDS) project provides a dataset of protein-coding regions that are identically annotated on the human and mouse reference genome assembly in genome annotations produced independently by NCBI and the Ensembl group at EMBL-EBI. This dataset is the product of an international collaboration that includes NCBI, Ensembl, HUGO Gene Nomenclature Committee, Mouse Genome Informatics and University of California, Santa Cruz. Identically annotated coding regions, which are generated using an automated pipeline and pass multiple quality assurance checks, are assigned a stable and tracked identifier (CCDS ID). Additionally, coordinated manual review by expert curators from the CCDS collaboration helps in maintaining the integrity and high quality of the dataset. The CCDS data are available through an interactive web page (https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi) and an FTP site (ftp://ftp.ncbi.nlm.nih.gov/pub/CCDS/). In this paper, we outline the ongoing work, growth and stability of the CCDS dataset and provide updates on new collaboration members and new features added to the CCDS user interface. We also present expert curation scenarios, with specific examples highlighting the importance of an accurate reference genome assembly and the crucial role played by input from the research community.",CCDS,0.956499179,Consensus Coding Sequence,0.615940392,CCDS,0.956499179,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +32386544,http://atlas.brain-map.org,"The Allen Mouse Brain Common Coordinate Framework: A 3D Reference Atlas. Recent large-scale collaborations are generating major surveys of cell types and connections in the mouse brain, collecting large amounts of data across modalities, spatial scales, and brain areas. Successful integration of these data requires a standard 3D reference atlas. Here, we present the Allen Mouse Brain Common Coordinate Framework (CCFv3) as such a resource. We constructed an average template brain at 10√ɬÉ√ǬÇ√ɬÇ√Ǭ†√ɬÉ√Ǭé√ɬÇ√Ǭºm voxel resolution by interpolating high resolution in-plane serial two-photon tomography images with 100√ɬÉ√ǬÇ√ɬÇ√Ǭ†√ɬÉ√Ǭé√ɬÇ√Ǭºm z-sampling from 1,675 young adult C57BL/6J mice. Then, using multimodal reference data, we parcellated the entire brain directly in 3D, labeling every voxel with a brain structure spanning 43 isocortical areas and their layers, 329 subcortical gray matter structures, 81 fiber tracts, and 8 ventricular structures. CCFv3 can be used to analyze, visualize, and integrate multimodal and multiscale datasets in 3D and is openly accessible (https://atlas.brain-map.org/).",CCFv3,0.989022076,Mouse Brain Common Coordinate Framework,0.890492062,CCFv3,0.989022076,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/7/2020 +28147217,http://ccg.xingene.net,"CCG: an integrative resource of cancer protein-coding genes and long noncoding RNAs. The identification of cancer genes remains a main aim of cancer research. With the advances of high-throughput sequencing technologies, thousands of novel cancer genes were identified through recurrent mutation analyses and differential expression analyses between normal tissues and tumors in large populations. Many databases were developed to document the cancer genes. However, no public database providing both cancer protein-coding genes and cancer lncRNAs is available presently. Here, we present the Catalogue of Cancer Genes (CCG) database (http://ccg.xingene.net), a catalogue of cancer genes. It includes both well-supported and candidate cancer protein-coding genes and cancer lncRNAs collected from literature search and public databases. In addition, uniform genomic aberration information (such as somatic mutation and copy number variation) and drug-gene interactions were assigned to cancer genes in the database. CCG represents an effort on integrative assembly of well-supported and candidate cancer protein-coding and long noncoding RNA genes and takes advantages of high-throughput sequencing results on large populations. With the help of CCG, users can easily access a comprehensive list of cancer genes as well as genomic aberration related with these genes. The availability of integrative information will facilitate the understanding of cancer mechanisms. In addition, drug-gene information in CCG provides a useful guide to the development of new anti-cancer drugs and selection of rational combination therapies.",CCG,0.97024858,Catalogue of Cancer Genes,0.896562586,CCG,0.97024858,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2016 +25190456,http://ccgd-starrlab.oit.umn.edu,"The Candidate Cancer Gene Database: a database of cancer driver genes from forward genetic screens in mice. Identification of cancer driver gene mutations is crucial for advancing cancer therapeutics. Due to the overwhelming number of passenger mutations in the human tumor genome, it is difficult to pinpoint causative driver genes. Using transposon mutagenesis in mice many laboratories have conducted forward genetic screens and identified thousands of candidate driver genes that are highly relevant to human cancer. Unfortunately, this information is difficult to access and utilize because it is scattered across multiple publications using different mouse genome builds and strength metrics. To improve access to these findings and facilitate meta-analyses, we developed the Candidate Cancer Gene Database (CCGD, http://ccgd-starrlab.oit.umn.edu/). The CCGD is a manually curated database containing a unified description of all identified candidate driver genes and the genomic location of transposon common insertion sites (CISs) from all currently published transposon-based screens. To demonstrate relevance to human cancer, we performed a modified gene set enrichment analysis using KEGG pathways and show that human cancer pathways are highly enriched in the database. We also used hierarchical clustering to identify pathways enriched in blood cancers compared to solid cancers. The CCGD is a novel resource available to scientists interested in the identification of genetic drivers of cancer.",CCGD,0.979682426,Candidate Cancer Gene Database,0.891004175,CCGD,0.979682426,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/4/2014 +30208340,http://db.cbi.pku.edu.cn/ccgd/ESCCdb,"CCGD-ESCC: A Comprehensive Database for Genetic Variants Associated with Esophageal Squamous Cell Carcinoma in Chinese Population. Esophageal squamous-cell carcinoma (ESCC) is one of the most lethal malignancies in the world and occurs at particularly higher frequency in China. While several genome-wide association studies (GWAS) of germline variants and whole-genome or whole-exome sequencing studies of somatic mutations in ESCC have been published, there is no comprehensive database publically available for this cancer. Here, we developed the Chinese Cancer Genomic Database-Esophageal Squamous Cell Carcinoma (CCGD-ESCC) database, which contains the associations of 69,593 single nucleotide polymorphisms (SNPs) with ESCC risk in 2022 cases and 2039 controls, survival time of 1006 ESCC patients (survival GWAS) and gene expression (expression quantitative trait loci, eQTL) in 94 ESCC patients. Moreover, this database also provides the associations between 8833 somatic mutations and survival time in 675 ESCC patients. Our user-friendly database is a resource useful for biologists and oncologists not only in identifying the associations of genetic variants or somatic mutations with the development and progression of ESCC but also in studying the underlying mechanisms for tumorigenesis of the cancer. CCGD-ESCC is freely accessible at http://db.cbi.pku.edu.cn/ccgd/ESCCdb.",CCGD-ESCC,0.976696948,Chinese Cancer Genomic Database-Esophageal Squamous Cell Carcinoma,0.954146482,CCGD-ESCC,0.976696948,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/1/2018 +26205660,http://sblab.celldesigner.org:18080/Payao11/bin,"The gastrin and cholecystokinin receptors mediated signaling network: a scaffold for data analysis and new hypotheses on regulatory mechanisms. Background The gastrointestinal peptide hormones cholecystokinin and gastrin exert their biological functions via cholecystokinin receptors CCK1R and CCK2R respectively. Gastrin, a central regulator of gastric acid secretion, is involved in growth and differentiation of gastric and colonic mucosa, and there is evidence that it is pro-carcinogenic. Cholecystokinin is implicated in digestion, appetite control and body weight regulation, and may play a role in several digestive disorders. Results We performed a detailed analysis of the literature reporting experimental evidence on signaling pathways triggered by CCK1R and CCK2R, in order to create a comprehensive map of gastrin and cholecystokinin-mediated intracellular signaling cascades. The resulting signaling map captures 413 reactions involving 530 molecular species, and incorporates the currently available knowledge into one integrated signaling network. The decomposition of the signaling map into sub-networks revealed 18 modules that represent higher-level structures of the signaling map. These modules allow a more compact mapping of intracellular signaling reactions to known cell behavioral outcomes such as proliferation, migration and apoptosis. The integration of large-scale protein-protein interaction data to this literature-based signaling map in combination with topological analyses allowed us to identify 70 proteins able to increase the compactness of the map. These proteins represent experimentally testable hypotheses for gaining new knowledge on gastrin- and cholecystokinin receptor signaling. The CCKR map is freely available both in a downloadable, machine-readable SBML-compatible format and as a web resource through PAYAO ( http://sblab.celldesigner.org:18080/Payao11/bin/). Conclusion We have demonstrated how a literature-based CCKR signaling map together with its protein interaction extensions can be analyzed to generate new hypotheses on molecular mechanisms involved in gastrin- and cholecystokinin-mediated regulation of cellular processes.",CCKR,0.967607975,NA,0,CCKR,0.967607975,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,7/24/2015 +26519468,http://bioinfo.mc.vanderbilt.edu/ccmGDB,"ccmGDB: a database for cancer cell metabolism genes. Accumulating evidence has demonstrated that rewiring of metabolism in cells is an important hallmark of cancer. The percentage of patients killed by metabolic disorder has been estimated to be 30% of the advanced-stage cancer patients. Thus, a systematic annotation of cancer cell metabolism genes is imperative. Here, we present ccmGDB (Cancer Cell Metabolism Gene DataBase), a comprehensive annotation database for cell metabolism genes in cancer, available at http://bioinfo.mc.vanderbilt.edu/ccmGDB. We assembled, curated, and integrated genetic, genomic, transcriptomic, proteomic, biological network and functional information for over 2000 cell metabolism genes in more than 30 cancer types. In total, we integrated over 260 000 somatic alterations including non-synonymous mutations, copy number variants and structural variants. We also integrated RNA-Seq data in various primary tumors, gene expression microarray data in over 1000 cancer cell lines and protein expression data. Furthermore, we constructed cancer or tissue type-specific, gene co-expression based protein interaction networks and drug-target interaction networks. Using these systematic annotations, the ccmGDB portal site provides 6 categories: gene summary, phenotypic information, somatic mutations, gene and protein expression, gene co-expression network and drug pharmacological information with a user-friendly interface for browsing and searching. ccmGDB is developed and maintained as a useful resource for the cancer research community.",ccmGDB,0.998047173,Cancer Cell Metabolism Gene DataBase,0.980098925,ccmGDB,0.998047173,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/30/2015 +28053168,http://structuralbiology.cau.edu.cn/gossypium,"ccNET: Database of co-expression networks with functional modules for diploid and polyploid Gossypium. Plant genera with both diploid and polyploid species are a common evolutionary occurrence. Polyploids, especially allopolyploids such as cotton and wheat, are a great model system for heterosis research. Here, we have integrated genome sequences and transcriptome data of Gossypium species to construct co-expression networks and identified functional modules from different cotton species, including 1155 and 1884 modules in G. arboreum and G. hirsutum, respectively. We overlayed the gene expression results onto the co-expression network. We further provided network comparison analysis for orthologous genes across the diploid and allotetraploid Gossypium We also constructed miRNA-target networks and predicted PPI networks for both cotton species. Furthermore, we integrated in-house ChIP-seq data of histone modification (H3K4me3) together with cis-element analysis and gene sets enrichment analysis tools for studying possible gene regulatory mechanism in Gossypium species. Finally, we have constructed an online ccNET database (http://structuralbiology.cau.edu.cn/gossypium) for comparative gene functional analyses at a multi-dimensional network and epigenomic level across diploid and polyploid Gossypium species. The ccNET database will be beneficial for community to yield novel insights into gene/module functions during cotton development and stress response, and might be useful for studying conservation and diversity in other polyploid plants, such as T. aestivum and Brassica napus.",ccNET,0.994896412,NA,0,ccNET,0.994896412,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/7/2016 +22139939,http://crdd.osdd.net/raghava/ccpdb,"ccPDB: compilation and creation of data sets from Protein Data Bank. ccPDB (http://crdd.osdd.net/raghava/ccpdb/) is a database of data sets compiled from the literature and Protein Data Bank (PDB). First, we collected and compiled data sets from the literature used for developing bioinformatics methods to annotate the structure and function of proteins. Second, data sets were derived from the latest release of PDB using standard protocols. Third, we developed a powerful module for creating a wide range of customized data sets from the current release of PDB. This is a flexible module that allows users to create data sets using a simple six step procedure. In addition, a number of web services have been integrated in ccPDB, which include submission of jobs on PDB-based servers, annotation of protein structures and generation of patterns. This database maintains >30 types of data sets such as secondary structure, tight-turns, nucleotide interacting residues, metals interacting residues, DNA/RNA binding residues and so on.",ccPDB,0.997259557,NA,0,ccPDB,0.997259557,1,NA,30689843,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/1/2011 +30689843,http://webs.iiitd.edu.in/raghava/ccpdb,"ccPDB 2.0: an updated version of datasets created and compiled from Protein Data Bank. . ccPDB 2.0 (http://webs.iiitd.edu.in/raghava/ccpdb) is an updated version of the manually curated database ccPDB that maintains datasets required for developing methods to predict the structure and function of proteins. The number of datasets compiled from literature increased from 45 to 141 in ccPDB 2.0. Similarly, the number of protein structures used for creating datasets also increased from ~74√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 to ~137√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 (PDB March 2018 release). ccPDB 2.0 provides the same web services and flexible tools which were present in the previous version of the database. In the updated version, links of the number of methods developed in the past few years have also been incorporated. This updated resource is built on responsive templates which is compatible with smartphones (mobile, iPhone, iPad, tablets etc.) and large screen gadgets. In summary, ccPDB 2.0 is a user-friendly web-based platform that provides comprehensive as well as updated information about datasets.",ccPDB,0.990919232,NA,0,ccPDB,0.990919232,1,NA,22139939,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2019 +26868054,http://songyanglab.sysu.edu.cn/ccsi,"CCSI: a database providing chromatin-chromatin spatial interaction information. . Distal regulatory elements have been shown to regulate gene transcription through spatial interactions, and single nucleotide polymorphisms (SNPs) are linked with distal gene expression by spatial proximity, which helps to explain the causal role of disease-associated SNPs in non-coding region. Therefore, studies on spatial interactions between chromatin have created a new avenue for elucidating the mechanism of transcriptional regulation in disease pathogenesis. Recently, a growing number of chromatin interactions have been revealed by means of 3C, 4C, 5C, ChIA-PET and Hi-C technologies. To interpret and utilize these interactions, we constructed chromatin-chromatin spatial interaction (CCSI) database by integrating and annotating 91 sets of chromatin interaction data derived from published literature, UCSC database and NCBI GEO database, resulting in a total of 3,017,962 pairwise interactions (false discovery rate√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ<√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ0.05), covering human, mouse and yeast. A web interface has been designed to provide access to the chromatin interactions. The main features of CCSI are (i) showing chromatin interactions and corresponding genes, enhancers and SNPs within the regions in the search page; (ii) offering complete interaction datasets, enhancer and SNP information in the download page; and (iii) providing analysis pipeline for the annotation of interaction data. In conclusion, CCSI will facilitate exploring transcriptional regulatory mechanism in disease pathogenesis associated with spatial interactions among genes, regulatory regions and SNPs. Database URL: http://songyanglab.sysu.edu.cn/ccsi.",CCSI,0.997085452,chromatin-chromatin spatial interaction,0.910043742,CCSI,0.997085452,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/11/2016 +34958914,http://minedatabase.mcs.anl.gov/cdmine,"Chemical-damage MINE: A database of curated and predicted spontaneous metabolic reactions. Spontaneous reactions between metabolites are often neglected in favor of emphasizing enzyme-catalyzed chemistry because spontaneous reaction rates are assumed to be insignificant under physiological conditions. However, synthetic biology and engineering efforts can raise natural metabolites' levels or introduce unnatural ones, so that previously innocuous or nonexistent spontaneous reactions become an issue. Problems arise when spontaneous reaction rates exceed the capacity of a platform organism to dispose of toxic or chemically active reaction products. While various reliable sources list competing or toxic enzymatic pathways' side-reactions, no corresponding compilation of spontaneous side-reactions exists, nor is it possible to predict their occurrence. We addressed this deficiency by creating the Chemical Damage (CD)-MINE resource. First, we used literature data to construct a comprehensive database of metabolite reactions that occur spontaneously in physiological conditions. We then leveraged this data to construct 148 reaction rules describing the known spontaneous chemistry in a substrate-generic way. We applied these rules to all compounds in the ModelSEED database, predicting 180,891 spontaneous reactions. The resulting (CD)-MINE is available at https://minedatabase.mcs.anl.gov/cdmine/#/home and through developer tools. We also demonstrate how damage-prone intermediates and end products are widely distributed among metabolic pathways, and how predicting spontaneous chemical damage helps rationalize toxicity and carbon loss using examples from published pathways to commercial products. We explain how analyzing damage-prone areas in metabolism helps design effective engineering strategies. Finally, we use the CD-MINE toolset to predict the formation of the novel damage product N-carbamoyl proline, and present mass spectrometric evidence for its presence in Escherichia coli.",CD)-MINE,0.910743642,Chemical Damage,0.618579507,CD)-MINE,0.910743642,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,12/25/2021 +"23197659, 25414356, 27899674, 31777944, 31851420",http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml,"CDD: conserved domains and protein three-dimensional structure. CDD, the Conserved Domain Database, is part of NCBI's Entrez query and retrieval system and is also accessible via http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml. CDD provides annotation of protein sequences with the location of conserved domain footprints and functional sites inferred from these footprints. Pre-computed annotation is available via Entrez, and interactive search services accept single protein or nucleotide queries, as well as batch submissions of protein query sequences, utilizing RPS-BLAST to rapidly identify putative matches. CDD incorporates several protein domain and full-length protein model collections, and maintains an active curation effort that aims at providing fine grained classifications for major and well-characterized protein domain families, as supported by available protein three-dimensional (3D) structure and the published literature. To this date, the majority of protein 3D structures are represented by models tracked by CDD, and CDD curators are characterizing novel families that emerge from protein structure determination efforts.",CDD,0.996487617,Conserved Domain Database,0.861336619,CDD,0.996487617,5,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2020 +29997612,http://lab.rockefeller.edu/casanova/CDG,"CDG: An Online Server for Detecting Biologically Closest Disease-Causing Genes and its Application to Primary Immunodeficiency. High-throughput genomic technologies yield about 20,000 variants in the protein-coding exome of each individual. A commonly used approach to select candidate disease-causing variants is to test whether the associated gene has been previously reported to be disease-causing. In the absence of known disease-causing genes, it can be challenging to associate candidate genes with specific genetic diseases. To facilitate the discovery of novel gene-disease associations, we determined the putative biologically closest known genes and their associated diseases for 13,005 human genes not currently reported to be disease-associated. We used these data to construct the closest disease-causing genes (CDG) server, which can be used to infer the closest genes with an associated disease for a user-defined list of genes or diseases. We demonstrate the utility of the CDG server in five immunodeficiency patient exomes across different diseases and modes of inheritance, where CDG dramatically reduced the number of candidate genes to be evaluated. This resource will be a considerable asset for ascertaining the potential relevance of genetic variants found in patient exomes to specific diseases of interest. The CDG database and online server are freely available to non-commercial users at: http://lab.rockefeller.edu/casanova/CDG.",CDG,0.988353372,NA,0,CDG,0.988353372,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/27/2018 +23537399,http://celldeathproteomics.uio.no,"Cell death proteomics database: consolidating proteomics data on cell death. Programmed cell death is a ubiquitous process of utmost importance for the development and maintenance of multicellular organisms. More than 10 different types of programmed cell death forms have been discovered. Several proteomics analyses have been performed to gain insight in proteins involved in the different forms of programmed cell death. To consolidate these studies, we have developed the cell death proteomics (CDP) database, which comprehends data from apoptosis, autophagy, cytotoxic granule-mediated cell death, excitotoxicity, mitotic catastrophe, paraptosis, pyroptosis, and Wallerian degeneration. The CDP database is available as a web-based database to compare protein identifications and quantitative information across different experimental setups. The proteomics data of 73 publications were integrated and unified with protein annotations from UniProt-KB and gene ontology (GO). Currently, more than 6,500 records of more than 3,700 proteins are included in the CDP. Comparing apoptosis and autophagy using overrepresentation analysis of GO terms, the majority of enriched processes were found in both, but also some clear differences were perceived. Furthermore, the analysis revealed differences and similarities of the proteome between autophagosomal and overall autophagy. The CDP database represents a useful tool to consolidate data from proteome analyses of programmed cell death and is available at http://celldeathproteomics.uio.no.",CDP,0.828693906,death proteomics,0.697402914,CDP,0.828693906,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/10/2013 +30759968,http://cdrgator.ewha.ac.kr,"CDRgator: An Integrative Navigator of Cancer Drug Resistance Gene Signatures. Understanding the mechanisms of cancer drug resistance is a critical challenge in cancer therapy. For many cancer drugs, various resistance mechanisms have been identified such as target alteration, alternative signaling pathways, epithelial-mesenchymal transition, and epigenetic modulation. Resistance may arise via multiple mechanisms even for a single drug, making it necessary to investigate multiple independent models for comprehensive understanding and therapeutic application. In particular, we hypothesize that different resistance processes result in distinct gene expression changes. Here, we present a web-based database, CDRgator (Cancer Drug Resistance navigator) for comparative analysis of gene expression signatures of cancer drug resistance. Resistance signatures were extracted from two different types of datasets. First, resistance signatures were extracted from transcriptomic profiles of cancer cells or patient samples and their resistance-induced counterparts for >30 cancer drugs. Second, drug resistance group signatures were also extracted from two large-scale drug sensitivity datasets representing ~1,000 cancer cell lines. All the datasets are available for download, and are conveniently accessible based on drug class and cancer type, along with analytic features such as clustering analysis, multidimensional scaling, and pathway analysis. CDRgator allows meta-analysis of independent resistance models for more comprehensive understanding of drug-resistance mechanisms that is difficult to accomplish with individual datasets alone (database URL: http://cdrgator.ewha.ac.kr).",CDRgator,0.994814634,Cancer Drug Resistance navigator,0.785553472,CDRgator,0.994814634,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/12/2019 +23893318,http://cancer.digitalslidearchive.net,"Cancer Digital Slide Archive: an informatics resource to support integrated in silico analysis of TCGA pathology data. Background The integration and visualization of multimodal datasets is a common challenge in biomedical informatics. Several recent studies of The Cancer Genome Atlas (TCGA) data have illustrated important relationships between morphology observed in whole-slide images, outcome, and genetic events. The pairing of genomics and rich clinical descriptions with whole-slide imaging provided by TCGA presents a unique opportunity to perform these correlative studies. However, better tools are needed to integrate the vast and disparate data types. Objective To build an integrated web-based platform supporting whole-slide pathology image visualization and data integration. Materials and methods All images and genomic data were directly obtained from the TCGA and National Cancer Institute (NCI) websites. Results The Cancer Digital Slide Archive (CDSA) produced is accessible to the public (http://cancer.digitalslidearchive.net) and currently hosts more than 20,000 whole-slide images from 22 cancer types. Discussion The capabilities of CDSA are demonstrated using TCGA datasets to integrate pathology imaging with associated clinical, genomic and MRI measurements in glioblastomas and can be extended to other tumor types. CDSA also allows URL-based sharing of whole-slide images, and has preliminary support for directly sharing regions of interest and other annotations. Images can also be selected on the basis of other metadata, such as mutational profile, patient age, and other relevant characteristics. Conclusions With the increasing availability of whole-slide scanners, analysis of digitized pathology images will become increasingly important in linking morphologic observations with genomic and clinical endpoints.",CDSA,0.964991828,Cancer Digital Slide Archive,0.8173123,CDSA,0.964991828,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/25/2013 +24580755,http://hazeslab.med.ualberta.ca/CDSbank,"CDSbank: taxonomy-aware extraction, selection, renaming and formatting of protein-coding DNA or amino acid sequences. Background Protein-coding DNA sequences and their corresponding amino acid sequences are routinely used to study relationships between sequence, structure, function, and evolution. The rapidly growing size of sequence databases increases the power of such comparative analyses but it makes it more challenging to prepare high quality sequence data sets with control over redundancy, quality, completeness, formatting, and labeling. Software tools for some individual steps in this process exist but manual intervention remains a common and time consuming necessity. Description CDSbank is a database that stores both the protein-coding DNA sequence (CDS) and amino acid sequence for each protein annotated in Genbank. CDSbank also stores Genbank feature annotation, a flag to indicate incomplete 5' and 3' ends, full taxonomic data, and a heuristic to rank the scientific interest of each species. This rich information allows fully automated data set preparation with a level of sophistication that aims to meet or exceed manual processing. Defaults ensure ease of use for typical scenarios while allowing great flexibility when needed. Access is via a free web server at http://hazeslab.med.ualberta.ca/CDSbank/. Conclusions CDSbank presents a user-friendly web server to download, filter, format, and name large sequence data sets. Common usage scenarios can be accessed via pre-programmed default choices, while optional sections give full control over the processing pipeline. Particular strengths are: extract protein-coding DNA sequences just as easily as amino acid sequences, full access to taxonomy for labeling and filtering, awareness of incomplete sequences, and the ability to take one protein sequence and extract all synonymous CDS or identical protein sequences in other species. Finally, CDSbank can also create labeled property files to, for instance, annotate or re-label phylogenetic trees.",CDSbank,0.99717778,NA,0,CDSbank,0.99717778,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/28/2014 +27899660,http://www.niehs.nih.gov/research/resources/databases/cebs,"CEBS: a comprehensive annotated database of toxicological data. The Chemical Effects in Biological Systems database (CEBS) is a comprehensive and unique toxicology resource that compiles individual and summary animal data from the National Toxicology Program (NTP) testing program and other depositors into a single electronic repository. CEBS has undergone significant updates in recent years and currently contains over 11 000 test articles (exposure agents) and over 8000 studies including all available NTP carcinogenicity, short-term toxicity and genetic toxicity studies. Study data provided to CEBS are manually curated, accessioned and subject to quality assurance review prior to release to ensure high quality. The CEBS database has two main components: data collection and data delivery. To accommodate the breadth of data produced by NTP, the CEBS data collection component is an integrated relational design that allows the flexibility to capture any type of electronic data (to date). The data delivery component of the database comprises a series of dedicated user interface tables containing pre-processed data that support each component of the user interface. The user interface has been updated to include a series of nine Guided Search tools that allow access to NTP summary and conclusion data and larger non-NTP datasets. The CEBS database can be accessed online at http://www.niehs.nih.gov/research/resources/databases/cebs/.",CEBS,0.996447543,Chemical Effects in Biological Systems database,0.990120093,CEBS,0.996447543,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +25392417,http://www.cecafdb.org,"CeCaFDB: a curated database for the documentation, visualization and comparative analysis of central carbon metabolic flux distributions explored by 13C-fluxomics. The Central Carbon Metabolic Flux Database (CeCaFDB, available at http://www.cecafdb.org) is a manually curated, multipurpose and open-access database for the documentation, visualization and comparative analysis of the quantitative flux results of central carbon metabolism among microbes and animal cells. It encompasses records for more than 500 flux distributions among 36 organisms and includes information regarding the genotype, culture medium, growth conditions and other specific information gathered from hundreds of journal articles. In addition to its comprehensive literature-derived data, the CeCaFDB supports a common text search function among the data and interactive visualization of the curated flux distributions with compartmentation information based on the Cytoscape Web API, which facilitates data interpretation. The CeCaFDB offers four modules to calculate a similarity score or to perform an alignment between the flux distributions. One of the modules was built using an inter programming algorithm for flux distribution alignment that was specifically designed for this study. Based on these modules, the CeCaFDB also supports an extensive flux distribution comparison function among the curated data. The CeCaFDB is strenuously designed to address the broad demands of biochemists, metabolic engineers, systems biologists and members of the -omics community.",CeCaFDB,0.99858432,Central Carbon Metabolic Flux Database,0.983667357,CeCaFDB,0.99858432,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +33306800,http://cefg.uestc.cn/ceg,"CEG 2.0: an updated database of clusters of essential genes including eukaryotic organisms. . Essential genes are key elements for organisms to maintain their living. Building databases that store essential genes in the form of homologous clusters, rather than storing them as a singleton, can provide more enlightening information such as the general essentiality of homologous genes in multiple organisms. In 2013, the first database to store prokaryotic essential genes in clusters, CEG (Clusters of Essential Genes), was constructed. Afterward, the amount of available data for essential genes increased by a factor >3 since the last revision. Herein, we updated CEG to version 2, including more prokaryotic essential genes (from 16 gene datasets to 29 gene datasets) and newly added eukaryotic essential genes (nine species), specifically the human essential genes of 12 cancer cell lines. For prokaryotes, information associated with drug targets, such as protein structure, ligand-protein interaction, virulence factor and matched drugs, is also provided. Finally, we provided the service of essential gene prediction for both prokaryotes and eukaryotes. We hope our updated database will benefit more researchers in drug targets and evolutionary genomics. Database URL: http://cefg.uestc.cn/ceg.",CEG,0.990911424,Clusters of Essential Genes,0.888208412,CEG,0.990911424,1,NA,24209780,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,12/1/2020 +24209780,http://cefg.uestc.edu.cn/ceg,"CEG: a database of essential gene clusters. Background Essential genes are indispensable for the survival of living entities. They are the cornerstones of synthetic biology, and are potential candidate targets for antimicrobial and vaccine design. Description Here we describe the Cluster of Essential Genes (CEG) database, which contains clusters of orthologous essential genes. Based on the size of a cluster, users can easily decide whether an essential gene is conserved in multiple bacterial species or is species-specific. It contains the similarity value of every essential gene cluster against human proteins or genes. The CEG_Match tool is based on the CEG database, and was developed for prediction of essential genes according to function. The database is available at http://cefg.uestc.edu.cn/ceg. Conclusions Properties contained in the CEG database, such as cluster size, and the similarity of essential gene clusters against human proteins or genes, are very important for evolutionary research and drug design. An advantage of CEG is that it clusters essential genes based on function, and therefore decreases false positive results when predicting essential genes in comparison with using the similarity alignment method.",CEG,0.956548989,Cluster of Essential Genes,0.826709604,CEG,0.956548989,1,NA,33306800,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,11/9/2013 +26527719,http://cega.ezlab.org,"CEGA--a catalog of conserved elements from genomic alignments. By identifying genomic sequence regions conserved among several species, comparative genomics offers opportunities to discover putatively functional elements without any prior knowledge of what these functions might be. Comparative analyses across mammals estimated 4-5% of the human genome to be functionally constrained, a much larger fraction than the 1-2% occupied by annotated protein-coding or RNA genes. Such functionally constrained yet unannotated regions have been referred to as conserved non-coding sequences (CNCs) or ultra-conserved elements (UCEs), which remain largely uncharacterized but probably form a highly heterogeneous group of elements including enhancers, promoters, motifs, and others. To facilitate the study of such CNCs/UCEs, we present our resource of Conserved Elements from Genomic Alignments (CEGA), accessible from http://cega.ezlab.org. Harnessing the power of multiple species comparisons to detect genomic elements under purifying selection, CEGA provides a comprehensive set of CNCs identified at different radiations along the vertebrate lineage. Evolutionary constraint is identified using threshold-free phylogenetic modeling of unbiased and sensitive global alignments of genomic synteny blocks identified using protein orthology. We identified CNCs independently for five vertebrate clades, each referring to a different last common ancestor and therefore to an overlapping but varying set of CNCs with 24 488 in vertebrates, 241 575 in amniotes, 709 743 in Eutheria, 642 701 in Boreoeutheria and 612 364 in Euarchontoglires, spanning from 6 Mbp in vertebrates to 119 Mbp in Euarchontoglires. The dynamic CEGA web interface displays alignments, genomic locations, as well as biologically relevant data to help prioritize and select CNCs of interest for further functional investigations.",CEGA,0.962110519,Conserved Elements from Genomic Alignments,0.852080087,CEGA,0.962110519,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/2/2015 +29992323,http://apiaceae.njau.edu.cn/celerydb,"CeleryDB: a genomic database for celery. . Celery (Apium graveolens L.) is a plant belonging to the Apiaceae family, and a popular vegetable worldwide because of its abundant nutrients and various medical functions. Although extensive genetic and molecular biological studies have been conducted on celery, its genomic data remain unclear. Given the significance of celery and the growing demand for its genomic data, the whole genome of 'Q2-JN11' celery (a highly inbred line obtained by artificial selfing of 'Jinnan Shiqin') was sequenced using HiSeq 2000 sequencing technology. For the convenience of researchers to study celery, an online database of the whole-genome sequences of celery, CeleryDB, was constructed. The sequences of the whole genome, nucleotide sequences of the predicted genes and amino acid sequences of the predicted proteins are available online on CeleryDB. Home, BLAST, Genome Browser, Transcription Factor and Download interfaces composed of the organizational structure of CeleryDB. Users can search the celery genomic data by using two user-friendly query tools: basic local alignment search tool and Genome Browser. In the future, CeleryDB will be constantly updated to satisfy the needs of celery researchers worldwide.Database URL: http://apiaceae.njau.edu.cn/celerydb.",CeleryDB,0.992402375,NA,0,CeleryDB,0.992402375,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +34971674,http://celldepot.bxgenomics.com,"CellDepot: A Unified Repository for scRNA-seq Data and Visual Exploration. CellDepot containing over 270 datasets from 8 species and many tissues serves as an integrated web application to empower scientists in exploring single-cell RNA-seq (scRNA-seq) datasets and comparing the datasets among various studies through a user-friendly interface with advanced visualization and analytical capabilities. To begin with, it provides an efficient data management system that users can upload single cell datasets and query the database by multiple attributes such as species and cell types. In addition, the graphical multi-logic, multi-condition query builder and convenient filtering tool backed by MySQL database system, allows users to quickly find the datasets of interest and compare the expression of gene(s) across these. Moreover, by embedding the cellxgene VIP tool, CellDepot enables fast exploration of individual dataset in the manner of interactivity and scalability to gain more refined insights such as cell composition, gene expression profiles, and differentially expressed genes among cell types by leveraging more than 20 frequently applied plotting functions and high-level analysis methods in single cell research. In summary, the web portal available at http://celldepot.bxgenomics.com, prompts large scale single cell data sharing, facilitates meta-analysis and visualization, and encourages scientists to contribute to the single-cell community in a tractable and collaborative way. Finally, CellDepot is released as open-source software under MIT license to motivate crowd contribution, broad adoption, and local deployment for private datasets.",CellDepot,0.996150434,NA,0,CellDepot,0.996150434,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/28/2021 +24304896,http://www.cellfinder.org,"CellFinder: a cell data repository. CellFinder (http://www.cellfinder.org) is a comprehensive one-stop resource for molecular data characterizing mammalian cells in different tissues and in different development stages. It is built from carefully selected data sets stemming from other curated databases and the biomedical literature. To date, CellFinder describes 3394 cell types and 50 951 cell lines. The database currently contains 3055 microscopic and anatomical images, 205 whole-genome expression profiles of 194 cell/tissue types from RNA-seq and microarrays and 553 905 protein expressions for 535 cells/tissues. Text mining of a corpus of >2000 publications followed by manual curation confirmed expression information on √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº900 proteins and genes. CellFinder's data model is capable to seamlessly represent entities from single cells to the organ level, to incorporate mappings between homologous entities in different species and to describe processes of cell development and differentiation. Its ontological backbone currently consists of 204 741 ontology terms incorporated from 10 different ontologies unified under the novel CELDA ontology. CellFinder's web portal allows searching, browsing and comparing the stored data, interactive construction of developmental trees and navigating the partonomic hierarchy of cells and tissues through a unique body browser designed for life scientists and clinicians.",CellFinder,0.99684298,NA,0,CellFinder,0.99684298,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/3/2013 +33471060,http://www.rna-society.org/cellinker,"Cellinker: a platform of ligand-receptor interactions for intercellular communication analysis. . Ligand-receptor (L-R) interactions mediate cell adhesion, recognition and communication and play essential roles in physiological and pathological signaling. With the rapid development of single-cell RNA sequencing (scRNA-seq) technologies, systematically decoding the intercellular communication network involving L-R interactions has become a focus of research. Therefore, construction of a comprehensive, high-confidence and well-organized resource to retrieve L-R interactions in order to study the functional effects of cell-cell communications would be of great value. In this study, we developed Cellinker, a manually curated resource of literature-supported L-R interactions that play roles in cell-cell communication. We aimed to provide a useful platform for studies on cell-cell communication mediated by L-R interactions. The current version of Cellinker documents over 3,700 human and 3,200 mouse L-R protein-protein interactions (PPIs) and embeds a practical and convenient webserver with which researchers can decode intercellular communications based on scRNA-seq data. And over 400 endogenous small molecule (sMOL) related L-R interactions were collected as well. Moreover, to help with research on coronavirus (CoV) infection, Cellinker collects information on 16√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâL-R PPIs involved in CoV-human interactions (including 12√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâL-R PPIs involved in SARS-CoV-2 infection). In summary, Cellinker provides a user-friendly interface for querying, browsing and visualizing L-R interactions as well as a practical and convenient web tool for inferring intercellular communications based on scRNA-seq data. We believe this platform could promote intercellular communication research and accelerate the development of related algorithms for scRNA-seq studies. Cellinker is available at http://www.rna-society.org/cellinker/. Supplementary data are available at Bioinformatics online.",Cellinker,0.993874848,NA,0,Cellinker,0.993874848,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/20/2021 +23251048,http://dev.pubgene.com/cellmine,"CellLineMiner: a knowledge portal for human cell lines. Unlabelled Experimental models of human tissues and disease phenotypes frequently rely upon immortalized cell lines, which are easily accessible and simple to use due to their infinite capability of cell division. For decades, cell lines have been used to investigate cellular mechanisms of disease and the efficacy of drugs, most prominently for human cancers. However, the large body of knowledge with respect to human cell lines exists primarily in an unstructured fashion, that is, as free text in the scientific literature. Here we present CellLineMiner, a novel text mining-based web database that provides a comprehensive view of human cell line knowledge. The application offers a simple search in all indexed cell lines, accompanied by a rapid display of all identified literature associations. The CellLineMiner is intended to serve as a knowledge resource companion to the cellular model systems used in biomedical research. Availability CellLineMiner is accessible at http://dev.pubgene.com/cellmine.",CellLineMiner,0.992360115,NA,0,CellLineMiner,0.992360115,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/13/2012 +30289549,"http://biocc.hrbmu.edu.cn/CellMarker/, http://bio-bigdata.hrbmu.edu.cn/CellMarker","CellMarker: a manually curated resource of cell markers in human and mouse. One of the most fundamental questions in biology is what types of cells form different tissues and organs in a functionally coordinated fashion. Larger-scale single-cell sequencing and biology experiment studies are now rapidly opening up new ways to track this question by revealing substantial cell markers for distinguishing different cell types in tissues. Here, we developed the CellMarker database (http://biocc.hrbmu.edu.cn/CellMarker/ or http://bio-bigdata.hrbmu.edu.cn/CellMarker/), aiming to provide a comprehensive and accurate resource of cell markers for various cell types in tissues of human and mouse. By manually curating over 100 000 published papers, 4124 entries including the cell marker information, tissue type, cell type, cancer information and source, were recorded. At last, 13 605 cell markers of 467 cell types in 158 human tissues/sub-tissues and 9148 cell makers of 389 cell types in 81 mouse tissues/sub-tissues were collected and deposited in CellMarker. CellMarker provides a user-friendly interface for browsing, searching and downloading markers of diverse cell types of different tissues. Furthermore, a summarized marker prevalence in each cell type is graphically and intuitively presented through a vivid statistical graph. We believe that CellMarker is a comprehensive and valuable resource for cell researches in precisely identifying and characterizing cells, especially at the single-cell level.",CellMarker,0.992818952,NA,0,CellMarker,0.992818952,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33086069,http://discover.nci.nih.gov/SclcCellMinerCDB,"SCLC-CellMiner: A Resource for Small Cell Lung Cancer Cell Line Genomics and Pharmacology Based on Genomic Signatures. CellMiner-SCLC (https://discover.nci.nih.gov/SclcCellMinerCDB/) integrates drug sensitivity and genomic data, including high-resolution methylome and transcriptome from 118 patient-derived small cell lung cancer (SCLC) cell lines, providing a resource for research into this ""recalcitrant cancer."" We demonstrate the reproducibility and stability of data from multiple sources and validate the SCLC consensus nomenclature on the basis of expression of master transcription factors NEUROD1, ASCL1, POU2F3, and YAP1. Our analyses reveal transcription networks linking SCLC subtypes with MYC and its paralogs and the NOTCH and HIPPO pathways. SCLC subsets express specific surface markers, providing potential opportunities for antibody-based targeted therapies. YAP1-driven SCLCs are notable for differential expression of the NOTCH pathway, epithelial-mesenchymal transition (EMT), and antigen-presenting machinery (APM) genes and sensitivity to mTOR and AKT inhibitors. These analyses provide insights into SCLC biology and a framework for future investigations into subtype-specific SCLC vulnerabilities.",CellMiner-SCLC,0.997686309,NA,0,CellMiner-SCLC,0.997686309,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2020 +24016071,http://www.medicalgenomics.org/cellminerhcc,"CellMinerHCC: a microarray-based expression database for hepatocellular carcinoma cell lines. Background & aims Therapeutic options for hepatocellular carcinoma (HCC) still remain limited. Development of gene targeted therapies is a promising option. A better understanding of the underlying molecular biology is gained in in vitro experiments. However, even with targeted manipulation of gene expression varying treatment responses were observed in diverse HCC cell lines. Therefore, information on gene expression profiles of various HCC cell lines may be crucial to experimental designs. To generate a publicly available database containing microarray expression profiles of diverse HCC cell lines. Methods Microarray data were analyzed using an individually scripted R program package. Data were stored in a PostgreSQL database with a PHP written web interface. Evaluation and comparison of individual cell line expression profiles are supported via public web interface. Results This database allows evaluation of gene expression profiles of 18 HCC cell lines and comparison of differential gene expression between multiple cell lines. Analysis of commonly regulated genes for signaling pathway enrichment and interactions demonstrates a liver tumor phenotype with enrichment of major cancer related KEGG signatures like 'cancer' and 'inflammatory response'. Further molecular associations of strong scientific interest, e.g. 'lipid metabolism', were also identified. Conclusions We have generated CellMinerHCC (http://www.medicalgenomics.org/cellminerhcc), a publicly available database containing gene expression data of 18 HCC cell lines. This database will aid in the design of in vitro experiments in HCC research, because the genetic specificities of various HCC cell lines will be considered.",CellMinerHCC,0.941295445,NA,0,CellMinerHCC,0.941295445,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/9/2013 +22039163,http://cellpedia.cbrc.jp,"CELLPEDIA: a repository for human cell information for cell studies and differentiation analyses. CELLPEDIA is a repository database for current knowledge about human cells. It contains various types of information, such as cell morphologies, gene expression and literature references. The major role of CELLPEDIA is to provide a digital dictionary of human cells for the biomedical field, including support for the characterization of artificially generated cells in regenerative medicine. CELLPEDIA features (i) its own cell classification scheme, in which whole human cells are classified by their physical locations in addition to conventional taxonomy; and (ii) cell differentiation pathways compiled from biomedical textbooks and journal papers. Currently, human differentiated cells and stem cells are classified into 2260 and 66 cell taxonomy keys, respectively, from which 934 parent-child relationships reported in cell differentiation or transdifferentiation pathways are retrievable. As far as we know, this is the first attempt to develop a digital cell bank to function as a public resource for the accumulation of current knowledge about human cells. The CELLPEDIA homepage is freely accessible except for the data submission pages that require authentication (please send a password request to cell-info@cbrc.jp). Database URL: http://cellpedia.cbrc.jp/",CELLPEDIA,0.998041213,NA,0,CELLPEDIA,0.998041213,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/29/2011 +33147626,http://tcm.zju.edu.cn/celltalkdb,"CellTalkDB: a manually curated database of ligand-receptor interactions in humans and mice. . Cell-cell communications in multicellular organisms generally involve secreted ligand-receptor (LR) interactions, which is vital for various biological phenomena. Recent advancements in single-cell RNA sequencing (scRNA-seq) have effectively resolved cellular phenotypic heterogeneity and the cell-type composition of complex tissues, facilitating the systematic investigation of cell-cell communications at single-cell resolution. However, assessment of chemical-signal-dependent cell-cell communication through scRNA-seq relies heavily on prior knowledge of LR interaction pairs. We constructed CellTalkDB (http://tcm.zju.edu.cn/celltalkdb), a manually curated comprehensive database of LR interaction pairs in humans and mice comprising 3398 human LR pairs and 2033 mouse LR pairs, through text mining and manual verification of known protein-protein interactions using the STRING database, with literature-supported evidence for each pair. Compared with SingleCellSignalR, the largest LR-pair resource, CellTalkDB includes not only 2033 mouse LR pairs but also 377 additional human LR pairs. In conclusion, the data on human and mouse LR pairs contained in CellTalkDB could help to further the inference and understanding of the LR-interaction-based cell-cell communications, which might provide new insights into the mechanism underlying biological processes.",CellTalkDB,0.99723649,NA,0,CellTalkDB,0.99723649,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +25592564,http://cellx.sourceforge.net,"Cell Index Database (CELLX): a web tool for cancer precision medicine. The Cell Index Database, (CELLX) (http://cellx.sourceforge.net) provides a computational framework for integrating expression, copy number variation, mutation, compound activity, and meta data from cancer cells. CELLX provides the computational biologist a quick way to perform routine analyses as well as the means to rapidly integrate data for offline analysis. Data is accessible through a web interface which utilizes R to generate plots and perform clustering, correlations, and statistical tests for associations within and between data types for ~20,000 samples from TCGA, CCLE, Sanger, GSK, GEO, GTEx, and other public sources. We show how CELLX supports precision oncology through indications discovery, biomarker evaluation, and cell line screening analysis.",CELLX,0.997325838,Cell Index Database,0.766975661,CELLX,0.997325838,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2015 +25970778,http://www.cemtdd.com/index.html,"CEMTDD: The database for elucidating the relationships among herbs, compounds, targets and related diseases for Chinese ethnic minority traditional drugs. China has different ethnic minorities that establish their own medical systems and practice experience for thousand years, thereafter named Chinese Ethnic Minority Traditional Drugs (CEMTDs) (http://www.cemtdd.com/index.html). Since many compounds from CEMTDs have been reported to perturb human's dysfunction network and restore human normal physiological conditions, the relationships amongst a series of compounds from specific herbs, their targets and relevant diseases have become our main focus in CEMTD modernization. Herein, we have constructed the first Chinese Ethnic Minority Traditional Drug Database (CEMTDD) mainly from Xinjiang Uygur Autonomous Region (XUAR), retrieving CEMTD-related information from different resources. CEMTDD contains about 621 herbs, 4, 060 compounds, 2, 163 targets and 210 diseases, among which most of herbs can be applied into gerontology therapy including inflammation, cardiovascular disease and neurodegenerative disease. Gerontology is highly occurred in XUAR, and has abundant experience in treating such diseases, which may benefit for developing a new gerontology therapeutic strategy. CEMTDD displays networks for intricate relationships between CEMTDs and treated diseases, as well as the interrelations between active compounds and action targets, which may shed new light on the combination therapy of CEMTDs and further understanding of their herb molecular mechanisms for better modernized utilizations of CEMTDs, especially in gerontology.",CEMTDD,0.996073186,Chinese Ethnic Minority Traditional Drug Database,0.91564708,CEMTDD,0.996073186,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2015 +27701074,http://www.elegansvariation.org,"CeNDR, the Caenorhabditis elegans natural diversity resource. Studies in model organisms have yielded considerable insights into the etiology of disease and our understanding of evolutionary processes. Caenorhabditis elegans is among the most powerful model organisms used to understand biology. However, C. elegans is not used as extensively as other model organisms to investigate how natural variation shapes traits, especially through the use of genome-wide association (GWA) analyses. Here, we introduce a new platform, the C. elegans Natural Diversity Resource (CeNDR) to enable statistical genetics and genomics studies of C. elegans and to connect the results to human disease. CeNDR provides the research community with wild strains, genome-wide sequence and variant data for every strain, and a GWA mapping portal for studying natural variation in C. elegans Additionally, researchers outside of the C. elegans community can benefit from public mappings and integrated tools for comparative analyses. CeNDR uses several databases that are continually updated through the addition of new strains, sequencing data, and association mapping results. The CeNDR data are accessible through a freely available web portal located at http://www.elegansvariation.org or through an application programming interface.",CeNDR,0.993216455,elegans Natural Diversity Resource,0.823794746,CeNDR,0.993216455,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/3/2016 +24270791,http://centrosome.cnb.csic.es,"CentrosomeDB: a new generation of the centrosomal proteins database for Human and Drosophila melanogaster. We present the second generation of centrosomeDB, available online at http://centrosome.cnb.csic.es, with a significant expansion of 1357 human and drosophila centrosomal genes and their corresponding information. The centrosome of animal cells takes part in important biological processes such as the organization of the interphase microtubule cytoskeleton and the assembly of the mitotic spindle. The active research done during the past decades has produced lots of data related to centrosomal proteins. Unfortunately, the accumulated data are dispersed among diverse and heterogeneous sources of information. We believe that the availability of a repository collecting curated evidences of centrosomal proteins would constitute a key resource for the scientific community. This was our first motivation to introduce CentrosomeDB in NAR database issue in 2009, collecting a set of human centrosomal proteins that were reported in the literature and other sources. The intensive use of this resource during these years has encouraged us to present this new expanded version. Using our database, the researcher is offered the possibility to study the evolution, function and structure of the centrosome. We have compiled information from many sources, including Gene Ontology, disease-association, single nucleotide polymorphisms and associated gene expression experiments. Special interest has been paid to protein-protein interaction.",CentrosomeDB,0.996333599,NA,0,CentrosomeDB,0.996333599,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2013 +21247929,http://www.cerealab.unimore.it/jws/cerealab.jnlp,"A genotypic and phenotypic information source for marker-assisted selection of cereals: the CEREALAB database. The CEREALAB database aims to store genotypic and phenotypic data obtained by the CEREALAB project and to integrate them with already existing data sources in order to create a tool for plant breeders and geneticists. The database can help them in unravelling the genetics of economically important phenotypic traits; in identifying and choosing molecular markers associated to key traits; and in choosing the desired parentals for breeding programs. The database is divided into three sub-schemas corresponding to the species of interest: wheat, barley and rice; each sub-schema is then divided into two sub-ontologies, regarding genotypic and phenotypic data, respectively. Database URL: http://www.cerealab.unimore.it/jws/cerealab.jnlp.",CEREALAB,0.995646238,NA,0,CEREALAB,0.995646238,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/18/2011 +32754757,http://www.cerealsdb.uk.net/cerealgenomics/CerealsDB/indexNEW.php,"CerealsDB-new tools for the analysis of the wheat genome: update 2020. . CerealsDB (www.cerealsdb.uk.net) is an online repository of mainly hexaploid wheat (Triticum aestivum) single nucleotide polymorphisms (SNPs) and genotyping data. The CerealsDB website has been designed to enable wheat breeders and scientists to select the appropriate markers for research breeding tasks, such as marker-assisted selection. We report a large update of genotyping information for over 6000 wheat accessions and describe new webtools for exploring and visualizing the data. We also describe a new database of quantitative trait loci that links phenotypic traits to CerealsDB SNP markers and allelic scores for each of those markers. CerealsDB is an open-access website that hosts information on wheat SNPs considered useful for both plant breeders and research scientists. The latest CerealsDB database is available at https://www.cerealsdb.uk.net/cerealgenomics/CerealsDB/indexNEW.php.",CerealsDB,0.996999264,NA,0,CerealsDB,0.996999264,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +31428785,http://www.bio-data.cn/CFEA,"CFEA: a cell-free epigenome atlas in human diseases. Epigenetic alterations, including 5-methylcytosine (5mC), 5-hydroxymethylcytosine (5hmC) and nucleosome positioning (NP), in cell-free DNA (cfDNA) have been widely observed in human diseases, and many available cfDNA-based epigenome-wide profiles exhibit high sensitivity and specificity in disease detection and classification. However, due to the lack of efficient collection, standardized quality control, and analysis procedures, efficiently integrating and reusing these data remain considerable challenges. Here, we introduce CFEA (http://www.bio-data.cn/CFEA), a cell-free epigenome database dedicated to three types of widely adopted epigenetic modifications (5mC, 5hmC and NP) involved in 27 human diseases. We developed bioinformatic pipelines for quality control and standard data processing and an easy-to-use web interface to facilitate the query, visualization and download of these cell-free epigenome data. We also manually curated related biological and clinical information for each profile, allowing users to better browse and compare cfDNA epigenomes at a specific stage (such as early- or metastasis-stage) of cancer development. CFEA provides a comprehensive and timely resource to the scientific community and supports the development of liquid biopsy-based biomarkers for various human diseases.",CFEA,0.997379303,NA,0,CFEA,0.997379303,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +23193288,http://cfgp.snu.ac.kr,"CFGP 2.0: a versatile web-based platform for supporting comparative and evolutionary genomics of fungi and Oomycetes. In 2007, Comparative Fungal Genomics Platform (CFGP; http://cfgp.snu.ac.kr/) was publicly open with 65 genomes corresponding to 58 fungal and Oomycete species. The CFGP provided six bioinformatics tools, including a novel tool entitled BLASTMatrix that enables search homologous genes to queries in multiple species simultaneously. CFGP also introduced Favorite, a personalized virtual space for data storage and analysis with these six tools. Since 2007, CFGP has grown to archive 283 genomes corresponding to 152 fungal and Oomycete species as well as 201 genomes that correspond to seven bacteria, 39 plants and 105 animals. In addition, the number of tools in Favorite increased to 27. The Taxonomy Browser of CFGP 2.0 allows users to interactively navigate through a large number of genomes according to their taxonomic positions. The user interface of BLASTMatrix was also improved to facilitate subsequent analyses of retrieved data. A newly developed genome browser, Seoul National University Genome Browser (SNUGB), was integrated into CFGP 2.0 to support graphical presentation of diverse genomic contexts. Based on the standardized genome warehouse of CFGP 2.0, several systematic platforms designed to support studies on selected gene families have been developed. Most of them are connected through Favorite to allow of sharing data across the platforms.",CFGP,0.979907155,Comparative Fungal Genomics Platform,0.961724751,CFGP,0.979907155,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +28603918,http://cftr.iurc.montp.inserm.fr/cftr,"CFTR-France, a national relational patient database for sharing genetic and phenotypic data associated with rare CFTR variants. Most of the 2,000 variants identified in the CFTR (cystic fibrosis transmembrane regulator) gene are rare or private. Their interpretation is hampered by the lack of available data and resources, making patient care and genetic counseling challenging. We developed a patient-based database dedicated to the annotations of rare CFTR variants in the context of their cis- and trans-allelic combinations. Based on almost 30 years of experience of CFTR testing, CFTR-France (https://cftr.iurc.montp.inserm.fr/cftr) currently compiles 16,819 variant records from 4,615 individuals with cystic fibrosis (CF) or CFTR-RD (related disorders), fetuses with ultrasound bowel anomalies, newborns awaiting clinical diagnosis, and asymptomatic compound heterozygotes. For each of the 736 different variants reported in the database, patient characteristics and genetic information (other variations in cis or in trans) have been thoroughly checked by a dedicated curator. Combining updated clinical, epidemiological, in silico, or in vitro functional data helps to the interpretation of unclassified and the reassessment of misclassified variants. This comprehensive CFTR database is now an invaluable tool for diagnostic laboratories gathering information on rare variants, especially in the context of genetic counseling, prenatal and preimplantation genetic diagnosis. CFTR-France is thus highly complementary to the international database CFTR2 focused so far on the most common CF-causing alleles.",CFTR-France,0.977899387,ystic fibrosis transmembrane,0.618076883,CFTR-France,0.977899387,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/28/2017 +23696674,http://research.nhgri.nih.gov/CGD,"Clinical genomic database. Technological advances have greatly increased the availability of human genomic sequencing. However, the capacity to analyze genomic data in a clinically meaningful way lags behind the ability to generate such data. To help address this obstacle, we reviewed all conditions with genetic causes and constructed the Clinical Genomic Database (CGD) (http://research.nhgri.nih.gov/CGD/), a searchable, freely Web-accessible database of conditions based on the clinical utility of genetic diagnosis and the availability of specific medical interventions. The CGD currently includes a total of 2,616 genes organized clinically by affected organ systems and interventions (including preventive measures, disease surveillance, and medical or surgical interventions) that could be reasonably warranted by the identification of pathogenic mutations. To aid independent analysis and optimize new data incorporation, the CGD also includes all genetic conditions for which genetic knowledge may affect the selection of supportive care, informed medical decision-making, prognostic considerations, reproductive decisions, and allow avoidance of unnecessary testing, but for which specific interventions are not otherwise currently available. For each entry, the CGD includes the gene symbol, conditions, allelic conditions, clinical categorization (for both manifestations and interventions), mode of inheritance, affected age group, description of interventions/rationale, links to other complementary databases, including databases of variants and presumed pathogenic mutations, and links to PubMed references (>20,000). The CGD will be regularly maintained and updated to keep pace with scientific discovery. Further content-based expert opinions are actively solicited. Eventually, the CGD may assist the rapid curation of individual genomes as part of active medical care.",CGD,0.991376301,Clinical Genomic Database,0.881686285,CGD,0.991376301,1,NA,"22064862.0, 24185697.0, 27738138.0",NA,NA,NA,do not merge,NA,NA,NA,5/21/2013 +"22064862, 24185697, 27738138",http://www.candidagenome.org,"The Candida genome database incorporates multiple Candida species: multispecies search and analysis tools with curated gene and protein information for Candida albicans and Candida glabrata. The Candida Genome Database (CGD, http://www.candidagenome.org/) is an internet-based resource that provides centralized access to genomic sequence data and manually curated functional information about genes and proteins of the fungal pathogen Candida albicans and other Candida species. As the scope of Candida research, and the number of sequenced strains and related species, has grown in recent years, the need for expanded genomic resources has also grown. To answer this need, CGD has expanded beyond storing data solely for C. albicans, now integrating data from multiple species. Herein we describe the incorporation of this multispecies information, which includes curated gene information and the reference sequence for C. glabrata, as well as orthology relationships that interconnect Locus Summary pages, allowing easy navigation between genes of C. albicans and C. glabrata. These orthology relationships are also used to predict GO annotations of their products. We have also added protein information pages that display domains, structural information and physicochemical properties; bibliographic pages highlighting important topic areas in Candida biology; and a laboratory strain lineage page that describes the lineage of commonly used laboratory strains. All of these data are freely available at http://www.candidagenome.org/. We welcome feedback from the research community at candida-curator@lists.stanford.edu.",CGD,0.991008719,Candida Genome Database,0.961281765,CGD,0.991008719,3,NA,23696674,NA,NA,NA,do not merge,NA,NA,NA,10/13/2016 +27789706,http://cgdb.biocuckoo.org,"CGDB: a database of circadian genes in eukaryotes. We report a database of circadian genes in eukaryotes (CGDB, http://cgdb.biocuckoo.org), containing √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº73 000 circadian-related genes in 68 animals, 39 plants and 41 fungi. Circadian rhythm is √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº24 h rhythm in behavioral and physiological processes that exists in almost all organisms on the earth. Defects in the circadian system are highly associated with a number of diseases such as cancers. Although several databases have been established for rhythmically expressed genes, a comprehensive database of cycling genes across phyla is still lacking. From the literature, we collected 1382 genes of which transcript level oscillations were validated using methods such as RT-PCR, northern blot and in situ hybridization. Given that many genes exhibit different oscillatory patterns in different tissues/cells within an organism, we have included information regarding the phase and amplitude of the oscillation, as well as the tissue/cells in which the oscillation was identified. Using these well characterized cycling genes, we have then conducted an orthologous search and identified √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº45 000 potential cycling genes from 148 eukaryotes. Given that significant effort has been devoted to identifying cycling genes by transcriptome profiling, we have also incorporated these results, a total of over 26 000 genes, into our database.",CGDB,0.998253147,of circadian genes in eukaryotes,0.924593337,CGDB,0.998253147,1,NA,32257241,NA,NA,NA,do not merge,NA,NA,NA,10/26/2016 +32257241,http://cgdb.bio2db.com,"Coriander Genomics Database: a genomic, transcriptomic, and metabolic database for coriander. Coriander (Coriandrum sativum L.), also known as cilantro, is a globally important vegetable and spice crop. Its genome and that of carrot are models for studying the evolution of the Apiaceae family. Here, we developed the Coriander Genomics Database (CGDB, http://cgdb.bio2db.com/) to collect, store, and integrate the genomic, transcriptomic, metabolic, functional annotation, and repeat sequence data of coriander and carrot to serve as a central online platform for Apiaceae and other related plants. Using these data sets in the CGDB, we intriguingly found that seven transcription factor (TF) families showed significantly greater numbers of members in the coriander genome than in the carrot genome. The highest ratio of the numbers of MADS TFs between coriander and carrot reached 3.15, followed by those for tubby protein (TUB) and heat shock factors. As a demonstration of CGDB applications, we identified 17 TUB family genes and conducted systematic comparative and evolutionary analyses. RNA-seq data deposited in the CGDB also suggest dose compensation effects of gene expression in coriander. CGDB allows bulk downloading, significance searches, genome browser analyses, and BLAST searches for comparisons between coriander and other plants regarding genomics, gene families, gene collinearity, gene expression, and the metabolome. A detailed user manual and contact information are also available to provide support to the scientific research community and address scientific questions. CGDB will be continuously updated, and new data will be integrated for comparative and functional genomic analysis in Apiaceae and other related plants.",CGDB,0.994118055,Coriander Genomics Database,0.98100695,CGDB,0.994118055,1,NA,27789706,NA,NA,NA,do not merge,NA,NA,NA,4/1/2020 +26160459,http://cgmd.in,"CGMD: An integrated database of cancer genes and markers. Integrating cancer genes and markers with experimental evidence might provide valuable information for the further investigation of crosstalk between tumor genes and markers in cancer biology. To achieve this objective, we developed a database known as the Cancer Gene Marker Database (CGMD), which integrates data on tumor genes and markers based on experimental evidence. The major goal of CGMD is to provide the following: 1) current systematic treatment approaches and recent advances in different cancer treatments; 2) the aggregation of different genes and markers by their molecular characteristics and pathway associations; and 3) free access to the data compiled by CGMD at http://cgmd.in/. The database consists of 309 genes and 206 markers, as well as a list of 40 different human cancers, with detailed descriptions of all characterized markers. CGMD provides complete cancer annotations and molecular descriptions of cancer genes and markers such as CpG islands, promoters, exons, PDB structures, active sites and domains.",CGMD,0.992433429,Cancer Gene Marker Database,0.956503713,CGMD,0.992433429,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/10/2015 +23486613,http://cgob.ucd.ie,"Comparative genome analysis and gene finding in Candida species using CGOB. The Candida Gene Order Browser (CGOB) was developed as a tool to visualize and analyze synteny relationships in multiple Candida species, and to provide an accurate, manually curated set of orthologous Candida genes for evolutionary analyses. Here, we describe major improvements to CGOB. The underlying structure of the database has been changed significantly. Genomic features are now based directly on genome annotations rather than on protein sequences, which allows non-protein features such as centromere locations in Candida albicans and tRNA genes in all species to be included. The data set has been expanded to 13 species, including genomes of pathogens (C. albicans, C. parapsilosis, C. tropicalis, and C. orthopsilosis), and those of xylose-degrading species with important biotechnological applications (C. tenuis, Scheffersomyces stipitis, and Spathaspora passalidarum). Updated annotations of C. parapsilosis, C. dubliniensis, and Debaryomyces hansenii have been incorporated. We discovered more than 1,500 previously unannotated genes among the 13 genomes, ranging in size from 29 to 3,850 amino acids. Poorly conserved and rapidly evolving genes were also identified. Re-analysis of the mating type loci of the xylose degraders suggests that C. tenuis is heterothallic, whereas both Spa. passalidarum and S. stipitis are homothallic. As well as hosting the browser, the CGOB website (http://cgob.ucd.ie) gives direct access to all the underlying genome annotations, sequences, and curated orthology data.",CGOB,0.997058729,Candida Gene Order Browser,0.948766589,CGOB,0.997058729,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/13/2013 +25518738,http://www.nipgr.res.in/CGWR/home.php,"The chickpea genomic web resource: visualization and analysis of the desi-type Cicer arietinum nuclear genome for comparative exploration of legumes. Background Availability of the draft nuclear genome sequences of small-seeded desi-type legume crop Cicer arietinum has provided an opportunity for investigating unique chickpea genomic features and evaluation of their biological significance. The increasing number of legume genome sequences also presents a challenge for developing reliable and information-driven bioinformatics applications suitable for comparative exploration of this important class of crop plants. Results The Chickpea Genomic Web Resource (CGWR) is an implementation of a suite of web-based applications dedicated to chickpea genome visualization and comparative analysis, based on next generation sequencing and assembly of Cicer arietinum desi-type genotype ICC4958. CGWR has been designed and configured for mapping, scanning and browsing the significant chickpea genomic features in view of the important existing and potential roles played by the various legume genome projects in mutant mapping and cloning. It also enables comparative informatics of ICC4958 DNA sequence analysis with other wild and cultivated genotypes of chickpea, various other leguminous species as well as several non-leguminous model plants, to enable investigations into evolutionary processes that shape legume genomes. Conclusions CGWR is an online database offering a comprehensive visual and functional genomic analysis of the chickpea genome, along with customized maps and gene-clustering options. It is also the only plant based web resource supporting display and analysis of nucleosome positioning patterns in the genome. The usefulness of CGWR has been demonstrated with discoveries of biological significance made using this server. The CGWR is compatible with all available operating systems and browsers, and is available freely under the open source license at http://www.nipgr.res.in/CGWR/home.php.",CGWR,0.983098507,Chickpea Genomic Web Resource,0.932702328,CGWR,0.983098507,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/18/2014 +22232598,http://channelpedia.net,"Channelpedia: an integrative and interactive database for ion channels. Ion channels are membrane proteins that selectively conduct ions across the cell membrane. The flux of ions through ion channels drives electrical and biochemical processes in cells and plays a critical role in shaping the electrical properties of neurons. During the past three decades, extensive research has been carried out to characterize the molecular, structural, and biophysical properties of ion channels. This research has begun to elucidate the role of ion channels in neuronal function and has subsequently led to the development of computational models of ion channel function. Although there have been substantial efforts to consolidate these findings into easily accessible and coherent online resources, a single comprehensive resource is still lacking. The success of these initiatives has been hindered by the sheer diversity of approaches and the variety in data formats. Here, we present ""Channelpedia"" (http://channelpedia.net), which is designed to store information related to ion channels and models and is characterized by an efficient information management framework. Composed of a combination of a database and a wiki-like discussion platform Channelpedia allows researchers to collaborate and synthesize ion channel information from literature. Equipped to automatically update references, Channelpedia integrates and highlights recent publications with relevant information in the database. It is web based, freely accessible and currently contains 187 annotated ion channels with 45 Hodgkin-Huxley models.",Channelpedia,0.995265245,NA,0,Channelpedia,0.995265245,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/30/2011 +28168870,http://www.nextprot.org/portals/navmut,"Annotation of functional impact of voltage-gated sodium channel mutations. Voltage-gated sodium channels are pore-forming transmembrane proteins that selectively allow sodium ions to flow across the plasma membrane according to the electro-chemical gradient thus mediating the rising phase of action potentials in excitable cells and playing key roles in physiological processes such as neurotransmission, skeletal muscle contraction, heart rhythm, and pain sensation. Genetic variations in the nine human genes encoding these channels are known to cause a large range of diseases affecting the nervous and cardiac systems. Understanding the molecular effect of genetic variations is critical for elucidating the pathologic mechanisms of known variations and in predicting the effect of newly discovered ones. To this end, we have created a Web-based tool, the Ion Channels Variants Portal, which compiles all variants characterized functionally in the human sodium channel genes. This portal describes 672 variants each associated with at least one molecular or clinical phenotypic impact, for a total of 4,658 observations extracted from 264 different research articles. These data were captured as structured annotations using standardized vocabularies and ontologies, such as the Gene Ontology and the Ion Channel ElectroPhysiology Ontology. All these data are available to the scientific community via neXtProt at https://www.nextprot.org/portals/navmut.",neXtProt,0.639292121,Channels,0.651000381,Channels,0.651000381,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,2/28/2017 +29036719,http://ncbr.muni.cz/ChannelsDB,"ChannelsDB: database of biomacromolecular tunnels and pores. ChannelsDB (http://ncbr.muni.cz/ChannelsDB) is a database providing information about the positions, geometry and physicochemical properties of channels (pores and tunnels) found within biomacromolecular structures deposited in the Protein Data Bank. Channels were deposited from two sources; from literature using manual deposition and from a software tool automatically detecting tunnels leading to the enzymatic active sites and selected cofactors, and transmembrane pores. The database stores information about geometrical features (e.g. length and radius profile along a channel) and physicochemical properties involving polarity, hydrophobicity, hydropathy, charge and mutability. The stored data are interlinked with available UniProt annotation data mapping known mutation effects to channel-lining residues. All structures with channels are displayed in a clear interactive manner, further facilitating data manipulation and interpretation. As such, ChannelsDB provides an invaluable resource for research related to deciphering the biological function of biomacromolecular channels.",ChannelsDB,0.998202503,NA,0,ChannelsDB,0.998202503,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22140108,http://www.jcvi.org/charprotdb,"CharProtDB: a database of experimentally characterized protein annotations. CharProtDB (http://www.jcvi.org/charprotdb/) is a curated database of biochemically characterized proteins. It provides a source of direct rather than transitive assignments of function, designed to support automated annotation pipelines. The initial data set in CharProtDB was collected through manual literature curation over the years by analysts at the J. Craig Venter Institute (JCVI) [formerly The Institute of Genomic Research (TIGR)] as part of their prokaryotic genome sequencing projects. The CharProtDB has been expanded by import of selected records from publicly available protein collections whose biocuration indicated direct rather than homology-based assignment of function. Annotations in CharProtDB include gene name, symbol and various controlled vocabulary terms, including Gene Ontology terms, Enzyme Commission number and TransportDB accession. Each annotation is referenced with the source; ideally a journal reference, or, if imported and lacking one, the original database source.",CharProtDB,0.996790111,NA,0,CharProtDB,0.996790111,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/2/2011 +24498619,http://www.cdc.gov/hemophiliamutations,"The CDC Hemophilia B mutation project mutation list: a new online resource. Hemophilia B (HB) is caused by mutations in the human gene F9. The mutation type plays a pivotal role in genetic counseling and prediction of inhibitor development. To help the HB community understand the molecular etiology of HB, we have developed a listing of all F9 mutations that are reported to cause HB based on the literature and existing databases. The Centers for Disease Control and Prevention (CDC) Hemophilia B Mutation Project (CHBMP) mutation list is compiled in an easily accessible format of Microsoft Excel and contains 1083 unique mutations that are reported to cause HB. Each mutation is identified using Human Genome Variation Society (HGVS) nomenclature standards. The mutation types and the predicted changes in amino acids, if applicable, are also provided. Related information including the location of mutation, severity of HB, the presence of inhibitor, and original publication reference are listed as well. Therefore, our mutation list provides an easily accessible resource for genetic counselors and HB researchers to predict inhibitors. The CHBMP mutation list is freely accessible at http://www.cdc.gov/hemophiliamutations.",CHBMP,0.899794623,Hemophilia B mutation project,0.751574079,CHBMP,0.899794623,1,23280990,NA,low_prob_best_name,do not remove,conflicting record(s) to be removed,NA,NA,NA,NA,8/19/2013 +28651548,http://chd.vnbiology.com,"Canis mtDNA HV1 database: a web-based tool for collecting and surveying Canis mtDNA HV1 haplotype in public database. Background Canine and wolf mitochondrial DNA haplotypes, which can be used for forensic or phylogenetic analyses, have been defined in various schemes depending on the region analyzed. In recent studies, the 582√ɬÉ√ǬÇ√ɬÇ√Ǭ†bp fragment of the HV1 region is most commonly used. 317 different canine HV1 haplotypes have been reported in the rapidly growing public database GenBank. These reported haplotypes contain several inconsistencies in their haplotype information. To overcome this issue, we have developed a Canis mtDNA HV1 database. This database collects data on the HV1 582√ɬÉ√ǬÇ√ɬÇ√Ǭ†bp region in dog mitochondrial DNA from the GenBank to screen and correct the inconsistencies. It also supports users in detection of new novel mutation profiles and assignment of new haplotypes. Description The Canis mtDNA HV1 database (CHD) contains 5567 nucleotide entries originating from 15 subspecies in the species Canis lupus. Of these entries, 3646 were haplotypes and grouped into 804 distinct sequences. 319 sequences were recognized as previously assigned haplotypes, while the remaining 485 sequences had new mutation profiles and were marked as new haplotype candidates awaiting further analysis for haplotype assignment. Of the 3646 nucleotide entries, only 414 were annotated with correct haplotype information, while 3232 had insufficient or lacked haplotype information and were corrected or modified before storing in the CHD. The CHD can be accessed at http://chd.vnbiology.com . It provides sequences, haplotype information, and a web-based tool for mtDNA HV1 haplotyping. The CHD is updated monthly and supplies all data for download. Conclusions The Canis mtDNA HV1 database contains information about canine mitochondrial DNA HV1 sequences with reconciled annotation. It serves as a tool for detection of inconsistencies in GenBank and helps identifying new HV1 haplotypes. Thus, it supports the scientific community in naming new HV1 haplotypes and to reconcile existing annotation of HV1 582√ɬÉ√ǬÇ√ɬÇ√Ǭ†bp sequences.",CHD,0.82080698,Canis mtDNA HV1,0.562380518,CHD,0.82080698,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/26/2017 +23818526,http://tcm.zju.edu.cn/chd,"CHD@ZJU: a knowledgebase providing network-based research platform on coronary heart disease. Coronary heart disease (CHD), the leading cause of global morbidity and mortality in adults, has been reported to be associated with hundreds of genes. A comprehensive understanding of the CHD-related genes and their corresponding interactions is essential to advance the translational research on CHD. Accordingly, we construct this knowledgebase, CHD@ZJU, which records CHD-related information (genes, pathways, drugs and references) collected from different resources and through text-mining method followed by manual confirmation. In current release, CHD@ZJU contains 660 CHD-related genes, 45 common pathways and 1405 drugs accompanied with >8000 supporting references. Almost half of the genes collected in CHD@ZJU were novel to other publicly available CHD databases. Additionally, CHD@ZJU incorporated the protein-protein interactions to investigate the cross-talk within the pathways from a multi-layer network view. These functions offered by CHD@ZJU would allow researchers to dissect the molecular mechanism of CHD in a systematic manner and therefore facilitate the research on CHD-related multi-target therapeutic discovery. Database URL: http://tcm.zju.edu.cn/chd/",CHD@ZJU,0.971079409,NA,0,CHD@ZJU,0.971079409,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2013 +32608479,http://www.sysbio.org.cn/CHDGKB,"CHDGKB: a knowledgebase for systematic understanding of genetic variations associated with non-syndromic congenital heart disease. . Congenital heart disease (CHD) is one of the most common birth defects, with complex genetic and environmental etiologies. The reports of genetic variation associated with CHD have increased dramatically in recent years due to the revolutionary development of molecular technology. However, CHD is a heterogeneous disease, and its genetic origins remain inconclusive in most patients. Here we present a database of genetic variations for non-syndromic CHD (NS-CHD). By manually literature extraction and analyses, 5345 NS-CHD-associated genetic variations were collected, curated and stored in the public online database. The objective of our database is to provide the most comprehensive updates on NS-CHD genetic research and to aid systematic analyses of pathogenesis of NS-CHD in molecular level and the correlation between NS-CHD genotypes and phenotypes. Database URL: http://www.sysbio.org.cn/CHDGKB/.",CHDGKB,0.63245517,NA,0,CHDGKB,0.63245517,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +28383342,http://chearprogram.org,"The Children's Health Exposure Analysis Resource: enabling research into the environmental influences on children's health outcomes. Purpose of review The Children's Health Exposure Analysis Resource (CHEAR) is a new infrastructure supported by the National Institute of Environmental Health Sciences to expand the ability of children's health researchers to include analysis of environmental exposures in their research and to incorporate the emerging concept of the exposome. Recent findings There is extensive discussion of the potential of the exposome to advance understanding of the totality of environmental influences on human health. Children's health is a logical choice to demonstrate the exposome concept due to the extensive existing knowledge of individual environmental exposures affecting normal health and development and the short latency between exposures and observable phenotypes. Achieving this demonstration will require access to extensive analytical capabilities to measure a suite of exposures through traditional biomonitoring approaches and to cross-validate these with emerging exposomic approaches. Summary CHEAR is a full-service exposure assessment resource, linking up-front consultation with both laboratory and data analysis. Analyses of biological samples are intended to enhance studies by including targeted analysis of specific exposures and untargeted analysis of small molecules associated with phenotypic endpoints. Services provided by CHEAR are made available without cost but require a brief application and adherence to policies detailed on the CHEAR web page at https://chearprogram.org/.",CHEAR,0.995191038,Children's Health Exposure Analysis Resource,0.965028177,CHEAR,0.995191038,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2017 +"23180789, 26467479",http://www.ebi.ac.uk/chebi,"The ChEBI reference database and ontology for biologically relevant chemistry: enhancements for 2013. ChEBI (http://www.ebi.ac.uk/chebi) is a database and ontology of chemical entities of biological interest. Over the past few years, ChEBI has continued to grow steadily in content, and has added several new features. In addition to incorporating all user-requested compounds, our annotation efforts have emphasized immunology, natural products and metabolites in many species. All database entries are now 'is_a' classified within the ontology, meaning that all of the chemicals are available to semantic reasoning tools that harness the classification hierarchy. We have completely aligned the ontology with the Open Biomedical Ontologies (OBO) Foundry-recommended upper level Basic Formal Ontology. Furthermore, we have aligned our chemical classification with the classification of chemical-involving processes in the Gene Ontology (GO), and as a result of this effort, the majority of chemical-involving processes in GO are now defined in terms of the ChEBI entities that participate in them. This effort necessitated incorporating many additional biologically relevant compounds. We have incorporated additional data types including reference citations, and the species and component for metabolites. Finally, our website and web services have had several enhancements, most notably the provision of a dynamic new interactive graph-based ontology visualization.",ChEBI,0.998489141,NA,0,ChEBI,0.998489141,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/13/2015 +25883136,"http://www.ebi.ac.uk/chembl/api/data/docs, http://www.ebi.ac.uk/chembl/api/utils/docs","ChEMBL web services: streamlining access to drug discovery data and utilities. ChEMBL is now a well-established resource in the fields of drug discovery and medicinal chemistry research. The ChEMBL database curates and stores standardized bioactivity, molecule, target and drug data extracted from multiple sources, including the primary medicinal chemistry literature. Programmatic access to ChEMBL data has been improved by a recent update to the ChEMBL web services (version 2.0.x, https://www.ebi.ac.uk/chembl/api/data/docs), which exposes significantly more data from the underlying database and introduces new functionality. To complement the data-focused services, a utility service (version 1.0.x, https://www.ebi.ac.uk/chembl/api/utils/docs), which provides RESTful access to commonly used cheminformatics methods, has also been concurrently developed. The ChEMBL web services can be used together or independently to build applications and data processing workflows relevant to drug discovery and chemical biology.",ChEMBL,0.99859941,NA,0,ChEMBL,0.99859941,1,NA,"21936816.0, 24214965.0, 27899562.0, 30398643.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,4/16/2015 +"24214965, 27899562, 30398643",http://www.ebi.ac.uk/chembl,"The ChEMBL bioactivity database: an update. ChEMBL is an open large-scale bioactivity database (https://www.ebi.ac.uk/chembl), previously described in the 2012 Nucleic Acids Research Database Issue. Since then, a variety of new data sources and improvements in functionality have contributed to the growth and utility of the resource. In particular, more comprehensive tracking of compounds from research stages through clinical development to market is provided through the inclusion of data from United States Adopted Name applications; a new richer data model for representing drug targets has been developed; and a number of methods have been put in place to allow users to more easily identify reliable data. Finally, access to ChEMBL is now available via a new Resource Description Framework format, in addition to the web-based interface, data downloads and web services.",ChEMBL,0.998430729,NA,0,ChEMBL,0.998430729,3,NA,"21936816.0, 25883136.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +21936816,http://www.ebi.ac.uk/chembldb,"Collation and data-mining of literature bioactivity data for drug discovery. The challenge of translating the huge amount of genomic and biochemical data into new drugs is a costly and challenging task. Historically, there has been comparatively little focus on linking the biochemical and chemical worlds. To address this need, we have developed ChEMBL, an online resource of small-molecule SAR (structure-activity relationship) data, which can be used to support chemical biology, lead discovery and target selection in drug discovery. The database contains the abstracted structures, properties and biological activities for over 700000 distinct compounds and in excess of more than 3 million bioactivity records abstracted from over 40000 publications. Additional public domain resources can be readily integrated into the same data model (e.g. PubChem BioAssay data). The compounds in ChEMBL are largely extracted from the primary medicinal chemistry literature, and are therefore usually 'drug-like' or 'lead-like' small molecules with full experimental context. The data cover a significant fraction of the discovery of modern drugs, and are useful in a wide range of drug design and discovery tasks. In addition to the compound data, ChEMBL also contains information for over 8000 protein, cell line and whole-organism 'targets', with over 4000 of those being proteins linked to their underlying genes. The database is searchable both chemically, using an interactive compound sketch tool, protein sequences, family hierarchies, SMILES strings, compound research codes and key words, and biologically, using a variety of gene identifiers, protein sequence similarity and protein families. The information retrieved can then be readily filtered and downloaded into various formats. ChEMBL can be accessed online at https://www.ebi.ac.uk/chembldb.",ChEMBL,0.997582018,NA,0,ChEMBL,0.997582018,1,NA,"25883136.0, 24214965.0, 27899562.0, 30398643.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/1/2011 +33970229,http://www.rxnfinder.org/chemhub,"ChemHub: a knowledgebase of functional chemicals for synthetic biology studies. . The field of synthetic biology lacks a comprehensive knowledgebase for selecting synthetic target molecules according to their functions, economic applications, and known biosynthetic pathways. We implemented ChemHub, a knowledgebase containing >90,000 chemicals and their functions, along with related biosynthesis information for these chemicals that was manually extracted from >600,000 published studies by more than 100 people over the past 10√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâyears. Multiple algorithms were implemented to enable biosynthetic pathway design and precursor discovery, which can support investigation of the biosynthetic potential of these functional chemicals. ChemHub is freely available at: http://www.rxnfinder.org/chemhub/. Supplementary data are available at Bioinformatics online.",ChemHub,0.996807337,NA,0,ChemHub,0.996807337,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/10/2021 +28578993,http://pcs-db.fr,"A statistical view of protein chemical synthesis using NCL and extended methodologies. Native chemical ligation and extended methodologies are the most popular chemoselective reactions for protein chemical synthesis. Their combination with desulfurization techniques can give access to small or challenging proteins that are exploited in a large variety of research areas. In this report, we have conducted a statistical review of their use for protein chemical synthesis in order to provide a flavor of the recent trends and identify the most popular chemical tools used by protein chemists. To this end, a protein chemical synthesis (PCS) database (http://pcs-db.fr) was created by collecting a set of relevant data from more than 450 publications covering the period 1994-2017. A preliminary account of what this database tells us is presented in this report.",NA,0,chemical synthesis,0.52219218,chemical synthesis,0.52219218,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/24/2017 +26876982,http://potentia.cbs.dtu.dk/ChemProt,"ChemProt-3.0: a global chemical biology diseases mapping. . ChemProt is a publicly available compilation of chemical-protein-disease annotation resources that enables the study of systems pharmacology for a small molecule across multiple layers of complexity from molecular to clinical levels. In this third version, ChemProt has been updated to more than 1.7 million compounds with 7.8 million bioactivity measurements for 19,504 proteins. Here, we report the implementation of global pharmacological heatmap, supporting a user-friendly navigation of chemogenomics space. This facilitates the visualization and selection of chemicals that share similar structural properties. In addition, the user has the possibility to search by compound, target, pathway, disease and clinical effect. Genetic variations associated to target proteins were integrated, making it possible to plan pharmacogenetic studies and to suggest human response variability to drug. Finally, Quantitative Structure-Activity Relationship models for 850 proteins having sufficient data were implemented, enabling secondary pharmacological profiling predictions from molecular structure. Database URL: http://potentia.cbs.dtu.dk/ChemProt/.",ChemProt,0.998003364,NA,0,ChemProt,0.998003364,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/13/2016 +23185041,http://www.cbs.dtu.dk/services/ChemProt-2.0,"ChemProt-2.0: visual navigation in a disease chemical biology database. ChemProt-2.0 (http://www.cbs.dtu.dk/services/ChemProt-2.0) is a public available compilation of multiple chemical-protein annotation resources integrated with diseases and clinical outcomes information. The database has been updated to >1.15 million compounds with 5.32 millions bioactivity measurements for 15 290 proteins. Each protein is linked to quality-scored human protein-protein interactions data based on more than half a million interactions, for studying diseases and biological outcomes (diseases, pathways and GO terms) through protein complexes. In ChemProt-2.0, therapeutic effects as well as adverse drug reactions have been integrated allowing for suggesting proteins associated to clinical outcomes. New chemical structure fingerprints were computed based on the similarity ensemble approach. Protein sequence similarity search was also integrated to evaluate the promiscuity of proteins, which can help in the prediction of off-target effects. Finally, the database was integrated into a visual interface that enables navigation of the pharmacological space for small molecules. Filtering options were included in order to facilitate and to guide dynamic search of specific queries.",ChemProt-2.0,0.885878646,NA,0,ChemProt-2.0,0.885878646,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/26/2012 +24470572,http://chepimod.org,"ChEpiMod: a knowledgebase for chemical modulators of epigenome reader domains. Context Epigenome reader domains are rapidly emerging as a new class of drug targets for a wide array of human diseases. To facilitate study of structure-activity relationship and small-molecule ligand design for these domains, we have created ChEpiMod. ChEpiMod is a free knowledgebase of chemical modulators with documented modulatory activity for epigenome reader domains. Methods ChEpiMod organizes information about chemical modulators and their associated binding-affinity data, as well as available structures of epigenome readers from the Protein Data Bank. The data are gathered from the literature and patents. Entries are supplemented by annotation. The current version of ChEpiMod covers six epigenome reader domain families (Bromodomain, PHD finger, Chromodomain, MBT, PWWP and Tudor). The database can be used to browse existing chemical modulators and bioactivity data, as well as, all available structures of readers and their molecular interactions. The database is updated weekly. Availability ChEpiMod is freely available at http://chepimod.org Contact ming-ming.zhou@mssm.edu Supplementary information Supplementary data is available at Bioinformatics online.",ChEpiMod,0.997203112,NA,0,ChEpiMod,0.997203112,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/27/2014 +30486838,http://ccb.jhu.edu/chess,"CHESS: a new human gene catalog curated from thousands of large-scale RNA sequencing experiments reveals extensive transcriptional noise. We assembled the sequences from deep RNA sequencing experiments by the Genotype-Tissue Expression (GTEx) project, to create a new catalog of human genes and transcripts, called CHESS. The new database contains 42,611 genes, of which 20,352 are potentially protein-coding and 22,259 are noncoding, and a total of 323,258 transcripts. These include 224 novel protein-coding genes and 116,156 novel transcripts. We detected over 30 million additional transcripts at more than 650,000 genomic loci, nearly all of which are likely nonfunctional, revealing a heretofore unappreciated amount of transcriptional noise in human cells. The CHESS database is available at http://ccb.jhu.edu/chess .",CHESS,0.993582785,NA,0,CHESS,0.993582785,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2018 +33068420,http://chewbbaca.online,"Chewie Nomenclature Server (chewie-NS): a deployable nomenclature server for easy sharing of core and whole genome MLST schemas. Chewie Nomenclature Server (chewie-NS, https://chewbbaca.online/) allows users to share genome-based gene-by-gene typing schemas and to maintain a common nomenclature, simplifying the comparison of results. The combination between local analyses and a public repository of allelic data strikes a balance between potential confidentiality issues and the need to compare results. The possibility of deploying private instances of chewie-NS facilitates the creation of nomenclature servers with a restricted user base to allow compliance with the strictest data policies. Chewie-NS allows users to easily share their own schemas and to explore publicly available schemas, including informative statistics on schemas and loci presented in interactive charts and tables. Users can retrieve all the information necessary to run a schema locally or all the alleles identified at a particular locus. The integration with the chewBBACA suite enables users to directly upload new schemas to chewie-NS, download existing schemas and synchronize local and remote schemas from chewBBACA command line version, allowing an easier integration into high-throughput analysis pipelines. The same REST API linking chewie-NS and the chewBBACA suite supports the interaction of other interfaces or pipelines with the databases available at chewie-NS, facilitating the reusability of the stored data.",chewie-NS,0.992993156,Chewie Nomenclature Server,0.940701187,chewie-NS,0.992993156,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +31210271,http://geneatlas.arl.arizona.edu,"Chickspress: a resource for chicken gene expression. . High-throughput sequencing and proteomics technologies are markedly increasing the amount of RNA and peptide data that are available to researchers, which are typically made publicly available via data repositories such as the NCBI Sequence Read Archive and proteome archives, respectively. These data sets contain valuable information about when and where gene products are expressed, but this information is not readily obtainable from archived data sets. Here we report Chickspress (http://geneatlas.arl.arizona.edu), the first publicly available gene expression resource for chicken tissues. Since there is no single source of chicken gene models, Chickspress incorporates both NCBI and Ensembl gene models and links these gene sets with experimental gene expression data and QTL information. By linking gene models from both NCBI and Ensembl gene prediction pipelines, researchers can, for the first time, easily compare gene models from each of these prediction workflows to available experimental data for these products. We use Chickspress data to show the differences between these gene annotation pipelines. Chickspress also provides rapid search, visualization and download capacity for chicken gene sets based upon tissue type, developmental stage and experiment type. This first Chickspress release contains 161 gene expression data sets, including expression of mRNAs, miRNAs, proteins and peptides. We provide several examples demonstrating how researchers may use this resource.",Chickspress,0.997966588,NA,0,Chickspress,0.997966588,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +24997141,http://ento.njau.edu.cn/ChiloDB,"ChiloDB: a genomic and transcriptome database for an important rice insect pest Chilo suppressalis. . ChiloDB is an integrated resource that will be of use to the rice stem borer research community. The rice striped stem borer (SSB), Chilo suppressalis Walker, is a major rice pest that causes severe yield losses in most rice-producing countries. A draft genome of this insect is available. The aims of ChiloDB are (i) to store recently acquired genomic sequence and transcriptome data and integrate them with protein-coding genes, microRNAs, piwi-interacting RNAs (piRNAs) and RNA sequencing (RNA-Seq) data and (ii) to provide comprehensive search tools and downloadable data sets for comparative genomics and gene annotation of this important rice pest. ChiloDB contains the first version of the official SSB gene set, comprising 80,479 scaffolds and 10√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ221 annotated protein-coding genes. Additionally, 262 SSB microRNA genes predicted from a small RNA library, 82√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ639 piRNAs identified using the piRNApredictor software, 37,040 transcripts from a midgut transcriptome and 69√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ977 transcripts from a mixed sample have all been integrated into ChiloDB. ChiloDB was constructed using a data structure that is compatible with data resources, which will be incorporated into the database in the future. This resource will serve as a long-term and open-access database for research on the biology, evolution and pest control of SSB. To the best of our knowledge, ChiloDB is one of the first genomic and transcriptome database for rice insect pests. Database URL: http://ento.njau.edu.cn/ChiloDB.",ChiloDB,0.997956932,NA,0,ChiloDB,0.997956932,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/4/2014 +31680157,http://www.kobic.re.kr/chimerdb,"ChimerDB 4.0: an updated and expanded database of fusion genes. Fusion genes represent an important class of biomarkers and therapeutic targets in cancer. ChimerDB is a comprehensive database of fusion genes encompassing analysis of deep sequencing data (ChimerSeq) and text mining of publications (ChimerPub) with extensive manual annotations (ChimerKB). In this update, we present all three modules substantially enhanced by incorporating the recent flood of deep sequencing data and related publications. ChimerSeq now covers all 10 565 patients in the TCGA project, with compilation of computational results from two reliable programs of STAR-Fusion and FusionScan with several public resources. In sum, ChimerSeq includes 65√ɬÉ√ǬÇ√ɬÇ√Ǭ†945 fusion candidates, 21 106 of which were predicted by multiple programs (ChimerSeq-Plus). ChimerPub has been upgraded by applying a deep learning method for text mining followed by extensive manual curation, which yielded 1257 fusion genes including 777 cases with experimental supports (ChimerPub-Plus). ChimerKB includes 1597 fusion genes with publication support, experimental evidences and breakpoint information. Importantly, we implemented several new features to aid estimation of functional significance, including the fusion structure viewer with domain information, gene expression plot of fusion positive versus negative patients and a STRING network viewer. The user interface also was greatly enhanced by applying responsive web design. ChimerDB 4.0 is available at http://www.kobic.re.kr/chimerdb/.",ChimerDB,0.997194171,NA,0,ChimerDB,0.997194171,1,NA,27899563,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2020 +27899563,http://ercsb.ewha.ac.kr/fusiongene,"ChimerDB 3.0: an enhanced database for fusion genes from cancer transcriptome and literature data mining. Fusion gene is an important class of therapeutic targets and prognostic markers in cancer. ChimerDB is a comprehensive database of fusion genes encompassing analysis of deep sequencing data and manual curations. In this update, the database coverage was enhanced considerably by adding two new modules of The Cancer Genome Atlas (TCGA) RNA-Seq analysis and PubMed abstract mining. ChimerDB 3.0 is composed of three modules of ChimerKB, ChimerPub and ChimerSeq. ChimerKB represents a knowledgebase including 1066 fusion genes with manual curation that were compiled from public resources of fusion genes with experimental evidences. ChimerPub includes 2767 fusion genes obtained from text mining of PubMed abstracts. ChimerSeq module is designed to archive the fusion candidates from deep sequencing data. Importantly, we have analyzed RNA-Seq data of the TCGA project covering 4569 patients in 23 cancer types using two reliable programs of FusionScan and TopHat-Fusion. The new user interface supports diverse search options and graphic representation of fusion gene structure. ChimerDB 3.0 is available at http://ercsb.ewha.ac.kr/fusiongene/.",ChimerDB,0.995721757,NA,0,ChimerDB,0.995721757,1,NA,31680157,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/28/2016 +33662628,http://www.cgga.org.cn,"Chinese Glioma Genome Atlas (CGGA): A Comprehensive Resource with Functional Genomic Data from Chinese Glioma Patients. Gliomas are the most common and malignant intracranial tumors in adults. Recent studies have revealed the significance of functional genomics for glioma pathophysiological studies and treatments. However, access to comprehensive genomic data and analytical platforms is often limited. Here, we developed the Chinese Glioma Genome Atlas (CGGA), a user-friendly data portal for the storage and interactive exploration of cross-omics data, including nearly 2000 primary and recurrent glioma samples from Chinese cohort. Currently, open access is provided to whole-exome sequencing data (286 samples), mRNA sequencing (1018 samples) and microarray data (301 samples), DNA methylation microarray data (159 samples), and microRNA microarray data (198 samples), and to detailed clinical information (age, gender, chemoradiotherapy status, WHO grade, histological type, critical molecular pathological information, and survival data). In addition, we have developed several tools for users to analyze the mutation profiles, mRNA/microRNA expression, and DNA methylation profiles, and to perform survival and gene correlation analyses of specific glioma subtypes. This database removes the barriers for researchers, providing rapid and convenient access to high-quality functional genomic data resources for biological studies and clinical applications. CGGA is available at http://www.cgga.org.cn.",CGGA,0.972158313,Chinese Glioma Genome Atlas,0.975209602,Chinese Glioma Genome Atlas,0.975209602,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/1/2021 +23161675,http://deepbase.sysu.edu.cn/chipbase,"ChIPBase: a database for decoding the transcriptional regulation of long non-coding RNA and microRNA genes from ChIP-Seq data. Long non-coding RNAs (lncRNAs) and microRNAs (miRNAs) represent two classes of important non-coding RNAs in eukaryotes. Although these non-coding RNAs have been implicated in organismal development and in various human diseases, surprisingly little is known about their transcriptional regulation. Recent advances in chromatin immunoprecipitation with next-generation DNA sequencing (ChIP-Seq) have provided methods of detecting transcription factor binding sites (TFBSs) with unprecedented sensitivity. In this study, we describe ChIPBase (http://deepbase.sysu.edu.cn/chipbase/), a novel database that we have developed to facilitate the comprehensive annotation and discovery of transcription factor binding maps and transcriptional regulatory relationships of lncRNAs and miRNAs from ChIP-Seq data. The current release of ChIPBase includes high-throughput sequencing data that were generated by 543 ChIP-Seq experiments in diverse tissues and cell lines from six organisms. By analysing millions of TFBSs, we identified tens of thousands of TF-lncRNA and TF-miRNA regulatory relationships. Furthermore, two web-based servers were developed to annotate and discover transcriptional regulatory relationships of lncRNAs and miRNAs from ChIP-Seq data. In addition, we developed two genome browsers, deepView and genomeView, to provide integrated views of multidimensional data. Moreover, our web implementation supports diverse query types and the exploration of TFs, lncRNAs, miRNAs, gene ontologies and pathways.",ChIPBase,0.99755013,NA,0,ChIPBase,0.99755013,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2012 +30202990,"http://umiamihealth.org/bascom-palmer-eye-institute/research/clinical-and-laboratory-research/ocular-oncology-laboratory/chip-primers, http://www.chipprimers.com","ChIPprimersDB: a public repository of verified qPCR primers for chromatin immunoprecipitation (ChIP). Chromatin immunoprecipitation (ChIP) has ushered in a new era of scientific discovery by allowing new insights into DNA-protein interactions. ChIP is used to quantify enriched genomic regions using qPCR, and more recently is combined with next generation sequencing (ChIP-seq) to obtain a genome wide profile of protein binding sites. Nevertheless, ChIP-qPCR remains an integral component of this technology for quality control purposes, before the library preparation and sequencing steps. In addition, ChIP-qPCR remains more time- and cost-effective for many focused projects in which the DNA regions of interest are already known. However, the DNA oligonucleotide primers needed for ChIP-qPCR are more challenging to design than for other qPCR projects. Here, we present the first public repository for ChIP oligonucleotides that have been verified to perform well in ChIP-qPCR experiments. ChIPprimersDB was developed by manual screening of publications to ensure primer quality and provide additional specific information on the ChIP experiments where the primers have been used. In addition to the primer sequences, the database includes information about the antibody, cells and tissues used in the experiment, information on the experimental design, and a direct link to the original publication. The database is linked at https://umiamihealth.org/bascom-palmer-eye-institute/research/clinical-and-laboratory-research/ocular-oncology-laboratory/chip-primers and hosted at https://www.chipprimers.com/.",ChIPprimersDB,0.948491991,NA,0,ChIPprimersDB,0.948491991,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +31942977,http://summit.med.unideb.hu/summitdb,"ChIPSummitDB: a ChIP-seq-based database of human transcription factor binding sites and the topological arrangements of the proteins bound to them. . ChIP-seq reveals genomic regions where proteins, e.g. transcription factors (TFs) interact with DNA. A substantial fraction of these regions, however, do not contain the cognate binding site for the TF of interest. This phenomenon might be explained by protein-protein interactions and co-precipitation of interacting gene regulatory elements. We uniformly processed 3727 human ChIP-seq data sets and determined the cistrome of 292 TFs, as well as the distances between the TF binding motif centers and the ChIP-seq peak summits. ChIPSummitDB enables the analysis of ChIP-seq data using multiple approaches. The 292 cistromes and corresponding ChIP-seq peak sets can be browsed in GenomeView. Overlapping SNPs can be inspected in dbSNPView. Most importantly, the MotifView and PairShiftView pages show the average distance between motif centers and overlapping ChIP-seq peak summits and distance distributions thereof, respectively. In addition to providing a comprehensive human TF binding site collection, the ChIPSummitDB database and web interface allows for the examination of the topological arrangement of TF complexes genome-wide. ChIPSummitDB is freely accessible at http://summit.med.unideb.hu/summitdb/. The database will be regularly updated and extended with the newly available human and mouse ChIP-seq data sets.",ChIPSummitDB,0.995177627,NA,0,ChIPSummitDB,0.995177627,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +"27899596, 31747015",http://chitars.md.biu.ac.il,"ChiTaRS-3.1-the enhanced chimeric transcripts and RNA-seq database matched with protein-protein interactions. Discovery of chimeric RNAs, which are produced by chromosomal translocations as well as the joining of exons from different genes by trans-splicing, has added a new level of complexity to our study and understanding of the transcriptome. The enhanced ChiTaRS-3.1 database (http://chitars.md.biu.ac.il) is designed to make widely accessible a wealth of mined data on chimeric RNAs, with easy-to-use analytical tools built-in. The database comprises 34 922: chimeric transcripts along with 11 714: cancer breakpoints. In this latest version, we have included multiple cross-references to GeneCards, iHop, PubMed, NCBI, Ensembl, OMIM, RefSeq and the Mitelman collection for every entry in the 'Full Collection'. In addition, for every chimera, we have added a predicted Chimeric Protein-Protein Interaction (ChiPPI) network, which allows for easy visualization of protein partners of both parental and fusion proteins for all human chimeras. The database contains a comprehensive annotation for 34 922: chimeric transcripts from eight organisms, and includes the manual annotation of 200 sense-antiSense (SaS) chimeras. The current improvements in the content and functionality to the ChiTaRS database make it a central resource for the study of chimeric transcripts and fusion proteins.",ChiTaRS,0.996551275,NA,0,ChiTaRS,0.996551275,2,NA,"23143107.0, 25414346.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2020 +"23143107, 25414346",http://chitars.bioinfo.cnio.es,"ChiTaRS: a database of human, mouse and fruit fly chimeric transcripts and RNA-sequencing data. Chimeric RNAs that comprise two or more different transcripts have been identified in many cancers and among the Expressed Sequence Tags (ESTs) isolated from different organisms; they might represent functional proteins and produce different disease phenotypes. The ChiTaRS database of Chimeric Transcripts and RNA-Sequencing data (http://chitars.bioinfo.cnio.es/) collects more than 16 000 chimeric RNAs from humans, mice and fruit flies, 233 chimeras confirmed by RNA-seq reads and √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº2000 cancer breakpoints. The database indicates the expression and tissue specificity of these chimeras, as confirmed by RNA-seq data, and it includes mass spectrometry results for some human entries at their junctions. Moreover, the database has advanced features to analyze junction consistency and to rank chimeras based on the evidence of repeated junction sites. Finally, 'Junction Search' screens through the RNA-seq reads found at the chimeras' junction sites to identify putative junctions in novel sequences entered by users. Thus, ChiTaRS is an extensive catalog of human, mouse and fruit fly chimeras that will extend our understanding of the evolution of chimeric transcripts in eukaryotes and can be advantageous in the analysis of human cancer breakpoints.",ChiTaRS,0.9914096,NA,0,ChiTaRS,0.9914096,2,NA,"27899596.0, 31747015.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/20/2014 +31665454,http://chlamdb.ch,"ChlamDB: a comparative genomics database of the phylum Chlamydiae and other members of the Planctomycetes-Verrucomicrobiae-Chlamydiae superphylum. ChlamDB is a comparative genomics database containing 277 genomes covering the entire Chlamydiae phylum as well as their closest relatives belonging to the Planctomycetes-Verrucomicrobiae-Chlamydiae (PVC) superphylum. Genomes can be compared, analyzed and retrieved using accessions numbers of the most widely used databases including COG, KEGG ortholog, KEGG pathway, KEGG module, Pfam and InterPro. Gene annotations from multiple databases including UniProt (curated and automated protein annotations), KEGG (annotation of pathways), COG (orthology), TCDB (transporters), STRING (protein-protein interactions) and InterPro (domains and signatures) can be accessed in a comprehensive overview page. Candidate effectors of the Type III secretion system (T3SS) were identified using four in silico methods. The identification of orthologs among all PVC genomes allows users to perform large-scale comparative analyses and to identify orthologs of any protein in all genomes integrated in the database. Phylogenetic relationships of PVC proteins and their closest homologs in RefSeq, comparison of transmembrane domains and Pfam domains, conservation of gene neighborhood and taxonomic profiles can be visualized using dynamically generated graphs, available for download. As a central resource for researchers working on chlamydia, chlamydia-related bacteria, verrucomicrobia and planctomyces, ChlamDB facilitates the access to comprehensive annotations, integrates multiple tools for comparative genomic analyses and is freely available at https://chlamdb.ch/. Database URL: https://chlamdb.ch/.",ChlamDB,0.998569787,NA,0,ChlamDB,0.998569787,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +29461516,http://portal.aodn.org.au,"A database of chlorophyll a in Australian waters. Chlorophyll a is the most commonly used indicator of phytoplankton biomass in the marine environment. It is relatively simple and cost effective to measure when compared to phytoplankton abundance and is thus routinely included in many surveys. Here we collate 173, 333 records of chlorophyll a collected since 1965 from Australian waters gathered from researchers on regular coastal monitoring surveys and ocean voyages into a single repository. This dataset includes the chlorophyll a values as measured from samples analysed using spectrophotometry, fluorometry and high performance liquid chromatography (HPLC). The Australian Chlorophyll a database is freely available through the Australian Ocean Data Network portal (https://portal.aodn.org.au/). These data can be used in isolation as an index of phytoplankton biomass or in combination with other data to provide insight into water quality, ecosystem state, and relationships with other trophic levels such as zooplankton or fish.",NA,0,Chlorophyll,0.505291402,Chlorophyll,0.505291402,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,2/20/2018 +23230006,http://rarge.psc.riken.jp/chloroplast,"The Chloroplast Function Database II: a comprehensive collection of homozygous mutants and their phenotypic/genotypic traits for nuclear-encoded chloroplast proteins. The Chloroplast Function Database has so far offered phenotype information on mutants of the nuclear-encoded chloroplast proteins in Arabidopsis that pertains to >200 phenotypic data sets that were obtained from 1,722 transposon- or T-DNA-tagged lines. Here, we present the development of the second version of the database, which is named the Chloroplast Function Database II and was redesigned to increase the number of mutant characters and new user-friendly tools for data mining and integration. The upgraded database offers information on genome-wide mutant screens for any visible phenotype against 2,495 tagged lines to create a comprehensive homozygous mutant collection. The collection consists of 147 lines with seedling phenotypes and 185 lines for which we could not obtain homozygotes, as well as 1,740 homozygotes with wild-type phenotypes. Besides providing basic information about primer lists that were used for the PCR genotyping of T-DNA-tagged lines and explanations about the preparation of homozygous mutants and phenotype screening, the database includes access to a link between the gene locus and existing publicly available databases. This gives users access to a combined pool of data, enabling them to gain valuable insights into biological processes. In addition, high-resolution images of plastid morphologies of mutants with seedling-specific chloroplast defects as observed with transmission electron microscopy (TEM) are available in the current database. This database is used to compare the phenotypes of visually identifiable mutants with their plastid ultrastructures and to evaluate their potential significance from characteristic patterns of plastid morphology in vivo. Thus, the Chloroplast Function Database II is a useful and comprehensive information resource that can help researchers to connect individual Arabidopsis genes to plastid functions on the basis of phenotype analysis of our tagged mutant collection. It can be freely accessed at http://rarge.psc.riken.jp/chloroplast/.",Chloroplast,0.589059353,NA,0,Chloroplast,0.589059353,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,12/10/2012 +28605771,http://www.chogenome.org,"CHOmine: an integrated data warehouse for CHO systems biology and modeling. . The last decade has seen a surge in published genome-scale information for Chinese hamster ovary (CHO) cells, which are the main production vehicles for therapeutic proteins. While a single access point is available at www.CHOgenome.org, the primary data is distributed over several databases at different institutions. Currently research is frequently hampered by a plethora of gene names and IDs that vary between published draft genomes and databases making systems biology analyses cumbersome and elaborate. Here we present CHOmine, an integrative data warehouse connecting data from various databases and links to other ones. Furthermore, we introduce CHOmodel, a web based resource that provides access to recently published CHO cell line specific metabolic reconstructions. Both resources allow to query CHO relevant data, find interconnections between different types of data and thus provides a simple, standardized entry point to the world of CHO systems biology. http://www.chogenome.org.",CHOmodel,0.946449459,NA,0,CHOmodel,0.946449459,1,22105744,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2017 +22904610,http://ioda.univ-provence.fr,"The chordate proteome history database. The chordate proteome history database (http://ioda.univ-provence.fr) comprises some 20,000 evolutionary analyses of proteins from chordate species. Our main objective was to characterize and study the evolutionary histories of the chordate proteome, and in particular to detect genomic events and automatic functional searches. Firstly, phylogenetic analyses based on high quality multiple sequence alignments and a robust phylogenetic pipeline were performed for the whole protein and for each individual domain. Novel approaches were developed to identify orthologs/paralogs, and predict gene duplication/gain/loss events and the occurrence of new protein architectures (domain gains, losses and shuffling). These important genetic events were localized on the phylogenetic trees and on the genomic sequence. Secondly, the phylogenetic trees were enhanced by the creation of phylogroups, whereby groups of orthologous sequences created using OrthoMCL were corrected based on the phylogenetic trees; gene family size and gene gain/loss in a given lineage could be deduced from the phylogroups. For each ortholog group obtained from the phylogenetic or the phylogroup analysis, functional information and expression data can be retrieved. Database searches can be performed easily using biological objects: protein identifier, keyword or domain, but can also be based on events, eg, domain exchange events can be retrieved. To our knowledge, this is the first database that links group clustering, phylogeny and automatic functional searches along with the detection of important events occurring during genome evolution, such as the appearance of a new domain architecture.",chordate proteome history,0.743933582,NA,0,chordate proteome history,0.743933582,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/1/2012 +23405067,http://www1.i2r.a-star.edu.sg/xlli/CHPC2012/CHPC2012.htm,"Benchmarking human protein complexes to investigate drug-related systems and evaluate predicted protein complexes. Protein complexes are key entities to perform cellular functions. Human diseases are also revealed to associate with some specific human protein complexes. In fact, human protein complexes are widely used for protein function annotation, inference of human protein interactome, disease gene prediction, and so on. Therefore, it is highly desired to build an up-to-date catalogue of human complexes to support the research in these applications. Protein complexes from different databases are as expected to be highly redundant. In this paper, we designed a set of concise operations to compile these redundant human complexes and built a comprehensive catalogue called CHPC2012 (Catalogue of Human Protein Complexes). CHPC2012 achieves a higher coverage for proteins and protein complexes than those individual databases. It is also verified to be a set of complexes with high quality as its co-complex protein associations have a high overlap with protein-protein interactions (PPI) in various existing PPI databases. We demonstrated two distinct applications of CHPC2012, that is, investigating the relationship between protein complexes and drug-related systems and evaluating the quality of predicted protein complexes. In particular, CHPC2012 provides more insights into drug development. For instance, proteins involved in multiple complexes (the overlapping proteins) are potential drug targets; the drug-complex network is utilized to investigate multi-target drugs and drug-drug interactions; and the disease-specific complex-drug networks will provide new clues for drug repositioning. With this up-to-date reference set of human protein complexes, we believe that the CHPC2012 catalogue is able to enhance the studies for protein interactions, protein functions, human diseases, drugs, and related fields of research. CHPC2012 complexes can be downloaded from http://www1.i2r.a-star.edu.sg/xlli/CHPC2012/CHPC2012.htm.",CHPC2012,0.927504778,Catalogue of Human Protein Complexes,0.88755808,CHPC2012,0.927504778,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/6/2013 +22718786,http://www.thesgc.org/chromohub,"ChromoHub: a data hub for navigators of chromatin-mediated signalling. Unlabelled The rapidly increasing research activity focused on chromatin-mediated regulation of epigenetic mechanisms is generating waves of data on writers, readers and erasers of the histone code, such as protein methyltransferases, bromodomains or histone deacetylases. To make these data easily accessible to communities of research scientists coming from diverse horizons, we have created ChromoHub, an online resource where users can map on phylogenetic trees disease associations, protein structures, chemical inhibitors, histone substrates, chromosomal aberrations and other types of data extracted from public repositories and the published literature. The interface can be used to define the structural or chemical coverage of a protein family, highlight domain architectures, interrogate disease relevance or zoom in on specific genes for more detailed information. This open-access resource should serve as a hub for cell biologists, medicinal chemists, structural biologists and other navigators that explore the biology of chromatin signalling. Availability http://www.thesgc.org/chromohub/.",ChromoHub,0.996145904,NA,0,ChromoHub,0.996145904,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/19/2012 +"26722116, 29564831",http://cgma.scu.edu.cn/ChromothripsisDB,"ChromothripsisDB: a curated database of chromothripsis. Unlabelled Chromothripsis is a single catastrophic event that can lead to massive genomic rearrangements confined to one or a few chromosomes. It provides an alternative paradigm in cancer development and changes the conventional view that cancer develops in a stepwise progression. The mechanisms underlying chromothripsis and their specific impact on tumorigenesis are still poorly understood, and further examination of a large number of identified chromothripsis samples is needed. Unfortunately, this data are difficult to access, as they are scattered across multiple publications, come in different formats and descriptions, or are hidden in figures and supplementary materials. To improve access to this data and promote meta-analysis, we developed ChromothripsisDB, a manually curated database containing a unified description of all published chromothripsis cases and relevant genomic aberrations. Currently, 423 chromothripsis samples representing 107 research articles are included in our database. ChromothripsisDB represents an extraordinary resource for mining the existing knowledge of chromothripsis, and will facilitate the identification of mechanisms involved in this phenomenon. Availability and implementation ChromothripsisDB is freely available at http://cgma.scu.edu.cn/ChromothripsisDB CONTACT: haoyang.cai@scu.edu.cn Supplementary information Supplementary data are available at Bioinformatics online.",ChromothripsisDB,0.99702704,NA,0,ChromothripsisDB,0.99702704,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +23139595,http://ibsd.gov.in/cibman,"CIBMAN: Database exploring Citrus biodiversity of Manipur. Unlabelled The rich wealth of Citrus genetic resources makes India to enjoy a remarkable position in the ""Citrus belt of the world"". We have developed CIBMAN, a unique database on Citrus biodiversity of Manipur which comprises 33 accessions collected through extensive survey for more than three years. CIBMAN provides integrated access to Citrus species through sophisticated web interface which has following capabilities a) morphological details, b) socio-economic details, c) taxonomic details and d) geographical distribution. Morphological variability among Citrus accessions is due to variance in their genome which contributes to diverse agronomical traits and diverse bioactive compounds of high value. This diverse gene pool can be potential source for genetic improvement of existing cultivars and rootstocks. Systematic collection, characterization and conservation of the underutilized or lesser exploited varieties is required for incorporating in breeding program and conserve the germplasm from ever going on genetic erosion. This database will be useful for scientific validations and updating of traditional wisdom in bioprospecting aspects especially industrialization of Citrus found in the state. Further, the features will be suited for detailed investigation on potential medicinal and edible Citrus that make CIBMAN a powerful tool for sustainable management. Availability http://ibsd.gov.in/cibman.",CIBMAN,0.99584347,NA,0,CIBMAN,0.99584347,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/11/2012 +24952649,http://cicarmisatdb.icrisat.org,"CicArMiSatDB: the chickpea microsatellite database. Background Chickpea (Cicer arietinum) is a widely grown legume crop in tropical, sub-tropical and temperate regions. Molecular breeding approaches seem to be essential for enhancing crop productivity in chickpea. Until recently, limited numbers of molecular markers were available in the case of chickpea for use in molecular breeding. However, the recent advances in genomics facilitated the development of large scale markers especially SSRs (simple sequence repeats), the markers of choice in any breeding program. Availability of genome sequence very recently opens new avenues for accelerating molecular breeding approaches for chickpea improvement. Description In order to assist genetic studies and breeding applications, we have developed a user friendly relational database named the Chickpea Microsatellite Database (CicArMiSatDB http://cicarmisatdb.icrisat.org). This database provides detailed information on SSRs along with their features in the genome. SSRs have been classified and made accessible through an easy-to-use web interface. Conclusions This database is expected to help chickpea community in particular and legume community in general, to select SSRs of particular type or from a specific region in the genome to advance both basic genomics research as well as applied aspects of crop improvement.",CicArMiSatDB,0.988039034,Chickpea Microsatellite Database,0.596938559,CicArMiSatDB,0.988039034,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/21/2014 +26289427,"http://cicarvardb.icrisat.org/, http://cicarvardb.icrisat.org","CicArVarDB: SNP and InDel database for advancing genetics research and breeding applications in chickpea. . Molecular markers are valuable tools for breeders to help accelerate crop improvement. High throughput sequencing technologies facilitate the discovery of large-scale variations such as single nucleotide polymorphisms (SNPs) and simple sequence repeats (SSRs). Sequencing of chickpea genome along with re-sequencing of several chickpea lines has enabled the discovery of 4.4 million variations including SNPs and InDels. Here we report a repository of 1.9 million variations (SNPs and InDels) anchored on eight pseudomolecules in a custom database, referred as CicArVarDB that can be accessed at http://cicarvardb.icrisat.org/. It includes an easy interface for users to select variations around specific regions associated with quantitative trait loci, with embedded webBLAST search and JBrowse visualisation. We hope that this database will be immensely useful for the chickpea research community for both advancing genetics research as well as breeding applications for crop improvement. Database URL: http://cicarvardb.icrisat.org.",CicArVarDB,0.995038033,NA,0,CicArVarDB,0.995038033,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/19/2015 +34762703,http://cicersptedb.easyomics.org/index.php,"CicerSpTEdb: A web-based database for high-resolution genome-wide identification of transposable elements in Cicer species. Recently, Cicer species have experienced increased research interest due to their economic importance, especially in genetics, genomics, and crop improvement. The Cicer arietinum, Cicer reticulatum, and Cicer echinospermum genomes have been sequenced and provide valuable resources for trait improvement. Since the publication of the chickpea draft genome, progress has been made in genome assembly, functional annotation, and identification of polymorphic markers. However, work is still needed to identify transposable elements (TEs) and make them available for researchers. In this paper, we present CicerSpTEdb, a comprehensive TE database for Cicer species that aims to improve our understanding of the organization and structural variations of the chickpea genome. Using structure and homology-based methods, 3942 C. echinospermum, 3579 C. reticulatum, and 2240 C. arietinum TEs were identified. Comparisons between Cicer species indicate that C. echinospermum has the highest number of LTR-RT and hAT TEs. C. reticulatum has more Mutator, PIF Harbinger, Tc1 Mariner, and CACTA TEs, while C. arietinum has the highest number of Helitron. CicerSpTEdb enables users to search and visualize TEs by location and download their results. The database will provide a powerful resource that can assist in developing TE target markers for molecular breeding and answer related biological questions. Database URL: http://cicersptedb.easyomics.org/index.php.",CicerSpTEdb,0.994646966,NA,0,CicerSpTEdb,0.994646966,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2021 +27472917,http://www.cicertransdb.esy.es,"CicerTransDB 1.0: a resource for expression and functional study of chickpea transcription factors. Background Transcription factor (TF) databases are major resource for systematic studies of TFs in specific species as well as related family members. Even though there are several publicly available multi-species databases, the information on the amount and diversity of TFs within individual species is fragmented, especially for newly sequenced genomes of non-model species of agricultural significance. Description We constructed CicerTransDB (Cicer Transcription Factor Database), the first database of its kind, which would provide a centralized putatively complete list of TFs in a food legume, chickpea. CicerTransDB, available at www.cicertransdb.esy.es , is based on chickpea (Cicer arietinum L.) annotation v 1.0. The database is an outcome of genome-wide domain study and manual classification of TF families. This database not only provides information of the gene, but also gene ontology, domain and motif architecture. Conclusion CicerTransDB v 1.0 comprises information of 1124 genes of chickpea and enables the user to not only search, browse and download sequences but also retrieve sequence features. CicerTransDB also provides several single click interfaces, transconnecting to various other databases to ease further analysis. Several webAPI(s) integrated in the database allow end-users direct access of data. A critical comparison of CicerTransDB with PlantTFDB (Plant Transcription Factor Database) revealed 68 novel TFs in the chickpea genome, hitherto unexplored. Database URL: http://www.cicertransdb.esy.es.",CicerTransDB,0.995746732,Cicer Transcription Factor Database,0.750756256,CicerTransDB,0.995746732,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/29/2016 +22809392,http://mips.helmholtz-muenchen.de/cider,"CIDeR: multifactorial interaction networks in human diseases. The pathobiology of common diseases is influenced by heterogeneous factors interacting in complex networks. CIDeR http://mips.helmholtz-muenchen.de/cider/ is a publicly available, manually curated, integrative database of metabolic and neurological disorders. The resource provides structured information on 18,813 experimentally validated interactions between molecules, bioprocesses and environmental factors extracted from the scientific literature. Systematic annotation and interactive graphical representation of disease networks make CIDeR a versatile knowledge base for biologists, analysis of large-scale data and systems biology approaches.",CIDeR,0.997793913,NA,0,CIDeR,0.997793913,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/18/2012 +30045691,http://soft.bioinfo-minzhao.org/cigene,"CIGene: a literature-based online resource for cancer initiation genes. Background Cancer initiation genes (CIGs) are genes that can directly promote cell proliferation or induce cancer. There are thousands of published studies identifying various CIGs; however, no systematic collection or description is available. Results To construct a CIG reference for genetic screening, we have collected 177 human genes curated from 1507 PubMed abstracts. To facilitate data queries and browsing, the identified CIGs along with extensive bioinformatic annotations were stored in an online database called CIGene. Initial functional analysis revealed an overlooked role for cell motility in cancer initiation. Subsequent cross-referencing of known tumor suppressor genes and oncogenes against the 177 CIGs identified 96 and 81 CIGs with and without known oncogenic roles, respectively. Successive network analyses of all 177 CIGs determined that the two groups of genes were more likely to link within their group. The distinct molecular functions for these groups were also confirmed with functional studies. While the 96 known oncogenic genes had fundamental roles in gene regulation and signaling, the remaining 81 genes possessed more ancillary functions, such enhancer binding. Further network and mutational analysis of the 96 known oncogenic genes revealed that mutations in these genes were highly prevalent in multiple cancers. By focusing on breast cancer, we found that 32 of the 96 genes with mutations in breast cancers were significantly associated with patient survival. Conclusions As the first literature-based online resource for CIGs, CIGene will serve as a useful gateway for the systematic analysis of cancer initiation. CIGene is freely available to all academic users at http://soft.bioinfo-minzhao.org/cigene/ .",CIGene,0.99484241,NA,0,CIGene,0.99484241,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/25/2018 +23203874,http://www.cellimagelibrary.org,"The cell: an image library-CCDB: a curated repository of microscopy data. The cell: an image library-CCDB (CIL-CCDB) (http://www.cellimagelibrary.org) is a searchable database and archive of cellular images. As a repository for microscopy data, it accepts all forms of cell imaging from light and electron microscopy, including multi-dimensional images, Z- and time stacks in a broad variety of raw-data formats, as well as movies and animations. The software design of CIL-CCDB was intentionally designed to allow easy incorporation of new technologies and image formats as they are developed. Currently, CIL-CCDB contains over 9250 images from 358 different species. Images are evaluated for quality and annotated with terms from 14 different ontologies in 16 different fields as well as a basic description and technical details. Since its public launch on 9 August 2010, it has been designed to serve as not only an archive but also an active site for researchers and educators.",CIL-CCDB,0.996889138,an image library-CCDB,0.696443155,CIL-CCDB,0.996889138,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +31095607,http://bioinformatics.bio.uu.nl/john/syscilia/ciliacarta,"CiliaCarta: An integrated and validated compendium of ciliary genes. The cilium is an essential organelle at the surface of mammalian cells whose dysfunction causes a wide range of genetic diseases collectively called ciliopathies. The current rate at which new ciliopathy genes are identified suggests that many ciliary components remain undiscovered. We generated and rigorously analyzed genomic, proteomic, transcriptomic and evolutionary data and systematically integrated these using Bayesian statistics into a predictive score for ciliary function. This resulted in 285 candidate ciliary genes. We generated independent experimental evidence of ciliary associations for 24 out of 36 analyzed candidate proteins using multiple cell and animal model systems (mouse, zebrafish and nematode) and techniques. For example, we show that OSCP1, which has previously been implicated in two distinct non-ciliary processes, causes ciliogenic and ciliopathy-associated tissue phenotypes when depleted in zebrafish. The candidate list forms the basis of CiliaCarta, a comprehensive ciliary compendium covering 956 genes. The resource can be used to objectively prioritize candidate genes in whole exome or genome sequencing of ciliopathy patients and can be accessed at http://bioinformatics.bio.uu.nl/john/syscilia/ciliacarta/.",CiliaCarta,0.935106993,NA,0,CiliaCarta,0.935106993,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/16/2019 +30004104,http://www.citogenetica.ufes.br,"Karyotypes of Brazilian non-volant small mammals (Didelphidae and Rodentia): An online tool for accessing the chromosomal diversity. We have created a database system named CIPEMAB (CItogen√ɬÉ√ǬÉ√ɬÇ√Ǭ©tica dos PEquenos MAm√ɬÉ√ǬÉ√ɬÇ√Ǭ≠feros Brasileiros) to assemble images of the chromosomes of Brazilian small mammals (Rodents and Marsupials). It includes karyotype information, such as diploid number, karyotype features, idiograms, and sexual chromosomes characteristics. CIPEMAB facilitates quick sharing of information on chromosome research among cytogeneticists as well as researchers in other fields. The database contains more than 300 microscopic images, including karyotypic images obtained from 182 species of small mammals from the literature. Researchers can browse the contents of the database online (http://www.citogenetica.ufes.br). The system enables users to locate images of interest by taxa, and to display the document with detailed information on species names, authors, year of the species publication, and karyotypes pictures in different colorations. CIPEMAB has a wide range of applications, such as comparing various karyotypes of Brazilian species and identifying manuscripts of interest.",CIPEMAB,0.996844947,NA,0,CIPEMAB,0.996844947,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/28/2018 +24339831,http://gyanxet-beta.com/circdb,"Circ2Traits: a comprehensive database for circular RNA potentially associated with disease and traits. Circular RNAs are new players in regulation of post transcriptional gene expression. Animal genomes express many circular RNAs from diverse genomic locations. A recent study has validated a fairly large number of circular RNAs in human, mouse, and nematode. Circular RNAs play a crucial role in fine tuning the level of miRNA mediated regulation of gene expression by sequestering the miRNAs. Their interaction with disease associated miRNAs indicates that circular RNAs are important for disease regulation. In this paper we studied the potential association of circular RNAs (circRNA) with human diseases in two different ways. Firstly, the interactions of circRNAs with disease associated miRNAs were identified, following which the likelihood of a circRNA being associated with a disease was calculated. For the miRNAs associated with individual diseases, we constructed a network of predicted interactions between the miRNAs and protein coding, long non-coding and circular RNA genes. We carried out gene ontology (GO) enrichment analysis on the set of protein coding genes in the miRNA- circRNA interactome of individual diseases to check the enrichment of genes associated with particular biological processes. Secondly, disease associated SNPs were mapped on circRNA loci, and Argonaute (Ago) interaction sites on circular RNAs were identified. We compiled a database of disease-circRNA association in Circ2Traits (http://gyanxet-beta.com/circdb/), the first comprehensive knowledgebase of potential association of circular RNAs with diseases in human.",Circ2Traits,0.996000826,NA,0,Circ2Traits,0.996000826,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/10/2013 +32219412,http://clingen.igib.res.in/circad,"Circad: a comprehensive manually curated resource of circular RNA associated with diseases. . Circular RNAs (circRNAs) are unique transcript isoforms characterized by back splicing of exon ends to form a covalently closed loop or circular conformation. These transcript isoforms are now known to be expressed in a variety of organisms across the kingdoms of life. Recent studies have shown the role of circRNAs in a number of diseases and increasing evidence points to their potential application as biomarkers in these diseases. We have created a comprehensive manually curated database of circular RNAs associated with diseases. This database is available at URL http://clingen.igib.res.in/circad/. The Database lists more than 1300 circRNAs associated with 150 diseases and mapping to 113 International Statistical Classification of Diseases (ICD) codes with evidence of association linked to published literature. The database is unique in many ways. Firstly, it provides ready-to-use primers to work with, in order to use circRNAs as biomarkers or to perform functional studies. It additionally lists the assay and PCR primer details including experimentally validated ones as a ready reference to researchers along with fold change and statistical significance. It also provides standard disease nomenclature as per the ICD codes. To the best of our knowledge, circad is the most comprehensive and updated database of disease associated circular RNAs. Availability: http://clingen.igib.res.in/circad/.",circad,0.970793426,NA,0,circad,0.970793426,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +32345360,http://circatlas.biols.ac.cn,"CircAtlas: an integrated resource of one million highly accurate circular RNAs from 1070 vertebrate transcriptomes. Existing circular RNA (circRNA) databases have become essential for transcriptomics. However, most are unsuitable for mining in-depth information for candidate circRNA prioritization. To address this, we integrate circular transcript collections to develop the circAtlas database based on 1070 RNA-seq samples collected from 19 normal tissues across six vertebrate species. This database contains 1,007,087 highly reliable circRNAs, of which over 81.3% have been assembled into full-length sequences. We profile their expression pattern, conservation, and functional annotation. We describe a novel multiple conservation score, co-expression, and regulatory networks for circRNA annotation and prioritization. CircAtlas can be accessed at http://circatlas.biols.ac.cn/.",CircAtlas,0.997903705,NA,0,CircAtlas,0.997903705,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/28/2020 +25234927,http://www.circbase.org,"circBase: a database for circular RNAs. Recently, several laboratories have reported thousands of circular RNAs (circRNAs) in animals. Numerous circRNAs are highly stable and have specific spatiotemporal expression patterns. Even though a function for circRNAs is unknown, these features make circRNAs an interesting class of RNAs as possible biomarkers and for further research. We developed a database and website, ""circBase,"" where merged and unified data sets of circRNAs and the evidence supporting their expression can be accessed, downloaded, and browsed within the genomic context. circBase also provides scripts to identify known and novel circRNAs in sequencing data. The database is freely accessible through the web server at http://www.circbase.org/.",circBase,0.970117331,NA,0,circBase,0.970117331,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/18/2014 +34296749,"http://soft.bioinfo-minzhao.org/circexp, http://soft.bioinfominzhao.org/circexp","circExp database: an online transcriptome platform for human circRNA expressions in cancers. . Circular RNA (circRNA) is a highly stable, single-stranded, closed-loop RNA that works as RNA or as a protein decoy to regulate gene expression. In humans, thousands of circRNA transcriptional products precisely express in specific developmental stages, tissues and cell types. Due to their stability and specificity, circRNAs are ideal biomarkers for cancer diagnosis and prognosis. To provide an integrated and standardized circRNA expression profile for human cancers, we performed extensive data curation across 11 technical platforms, collecting 48 expression profile data sets for 18 cancer types and amassing 860√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ751 expression records. We also identified 189√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ193 differential expression signatures that are significantly different between normal and cancer samples. All the pre-calculated expression analysis results are organized into 132 plain text files for bulk download. Our online interface, circExp, provides data browsing and search functions. For each data set, a dynamic expression heatmap provides a profile overview. Based on the processed data, we found that 52 circRNAs were consistently and differentially expressed in 20 or more processed analyses. By mapping those circRNAs to their parent protein-coding genes, we found that they may have profoundly affected the survival of 10√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ797 patients in the The Cancer Genome Atlas pan-cancer data set. In sum, we developed circExp and demonstrated that it is useful to identify circRNAs that have potential diagnostic and prognostic significance for a variety of cancer types. In this online and reusable database, found at http://soft.bioinfo-minzhao.org/circexp, we have provided pre-calculated expression data about circRNAs and their parental genes, as well as data browsing and searching functions. Database URL: http://soft.bioinfominzhao.org/circexp/.",circExp,0.985390902,NA,0,circExp,0.985390902,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +29194536,http://app.cgu.edu.tw/circlnc,"circlncRNAnet: an integrated web-based resource for mapping functional networks of long or circular forms of noncoding RNAs. Background Despite their lack of protein-coding potential, long noncoding RNAs (lncRNAs) and circular RNAs (circRNAs) have emerged as key determinants in gene regulation, acting to fine-tune transcriptional and signaling output. These noncoding RNA transcripts are known to affect expression of messenger RNAs (mRNAs) via epigenetic and post-transcriptional regulation. Given their widespread target spectrum, as well as extensive modes of action, a complete understanding of their biological relevance will depend on integrative analyses of systems data at various levels. Findings While a handful of publicly available databases have been reported, existing tools do not fully capture, from a network perspective, the functional implications of lncRNAs or circRNAs of interest. Through an integrated and streamlined design, circlncRNAnet aims to broaden the understanding of ncRNA candidates by testing in silico several hypotheses of ncRNA-based functions, on the basis of large-scale RNA-seq data. This web server is implemented with several features that represent advances in the bioinformatics of ncRNAs: (1) a flexible framework that accepts and processes user-defined next-generation sequencing-based expression data; (2) multiple analytic modules that assign and productively assess the regulatory networks of user-selected ncRNAs by cross-referencing extensively curated databases; (3) an all-purpose, information-rich workflow design that is tailored to all types of ncRNAs. Outputs on expression profiles, co-expression networks and pathways, and molecular interactomes, are dynamically and interactively displayed according to user-defined criteria. Conclusions In short, users may apply circlncRNAnet to obtain, in real time, multiple lines of functionally relevant information on circRNAs/lncRNAs of their interest. In summary, circlncRNAnet provides a ""one-stop"" resource for in-depth analyses of ncRNA biology. circlncRNAnet is freely available at http://app.cgu.edu.tw/circlnc/.",circlncRNAnet,0.996753991,NA,0,circlncRNAnet,0.996753991,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +26450965,http://circnet.mbc.nctu.edu.tw,"CircNet: a database of circular RNAs derived from transcriptome sequencing data. Circular RNAs (circRNAs) represent a new type of regulatory noncoding RNA that only recently has been identified and cataloged. Emerging evidence indicates that circRNAs exert a new layer of post-transcriptional regulation of gene expression. In this study, we utilized transcriptome sequencing datasets to systematically identify the expression of circRNAs (including known and newly identified ones by our pipeline) in 464 RNA-seq samples, and then constructed the CircNet database (http://circnet.mbc.nctu.edu.tw/) that provides the following resources: (i) novel circRNAs, (ii) integrated miRNA-target networks, (iii) expression profiles of circRNA isoforms, (iv) genomic annotations of circRNA isoforms (e.g. 282 948 exon positions), and (v) sequences of circRNA isoforms. The CircNet database is to our knowledge the first public database that provides tissue-specific circRNA expression profiles and circRNA-miRNA-gene regulatory networks. It not only extends the most up to date catalog of circRNAs but also provides a thorough expression analysis of both previously reported and novel circRNAs. Furthermore, it generates an integrated regulatory network that illustrates the regulation between circRNAs, miRNAs and genes.",CircNet,0.982483149,NA,0,CircNet,0.982483149,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/7/2015 +"27365365, 30172046",http://www.picb.ac.cn/rnomics/circpedia,"Diverse alternative back-splicing and alternative splicing landscape of circular RNAs. Circular RNAs (circRNAs) derived from back-spliced exons have been widely identified as being co-expressed with their linear counterparts. A single gene locus can produce multiple circRNAs through alternative back-splice site selection and/or alternative splice site selection; however, a detailed map of alternative back-splicing/splicing in circRNAs is lacking. Here, with the upgraded CIRCexplorer2 pipeline, we systematically annotated different types of alternative back-splicing and alternative splicing events in circRNAs from various cell lines. Compared with their linear cognate RNAs, circRNAs exhibited distinct patterns of alternative back-splicing and alternative splicing. Alternative back-splice site selection was correlated with the competition of putative RNA pairs across introns that bracket alternative back-splice sites. In addition, all four basic types of alternative splicing that have been identified in the (linear) mRNA process were found within circRNAs, and many exons were predominantly spliced in circRNAs. Unexpectedly, thousands of previously unannotated exons were detected in circRNAs from the examined cell lines. Although these novel exons had similar splice site strength, they were much less conserved than known exons in sequences. Finally, both alternative back-splicing and circRNA-predominant alternative splicing were highly diverse among the examined cell lines. All of the identified alternative back-splicing and alternative splicing in circRNAs are available in the CIRCpedia database (http://www.picb.ac.cn/rnomics/circpedia). Collectively, the annotation of alternative back-splicing and alternative splicing in circRNAs provides a valuable resource for depicting the complexity of circRNA biogenesis and for studying the potential functions of circRNAs in different cells.",CIRCpedia,0.990549028,NA,0,CIRCpedia,0.990549028,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/29/2018 +33181824,http://www.biobdlab.cn:8000,"CircR2Cancer: a manually curated database of associations between circRNAs and cancers. . Accumulating evidences have shown that the deregulation of circRNA has close association with many human cancers. However, these experimental verified circRNA-cancer associations are not collected in any database. Here, we develop a manually curated database (circR2Cancer) that provides experimentally supported associations between circRNAs and cancers. The current version of the circR2Cancer contains 1439 associations between 1135 circRNAs and 82 cancers by extracting data from existing literatures and databases. In addition, circR2Cancer contains the information of cancer exacted from Disease Ontology and basic biological information of circRNAs from circBase. At the same time, circR2Cancer provides a simple and friendly interface for users to conveniently browse, search and download the data. It will be a useful and valuable resource for researchers to understanding the regulation mechanism of circRNA in cancers. http://www.biobdlab.cn:8000.",circR2Cancer,0.993368697,NA,0,circR2Cancer,0.993368697,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +34856391,http://bioinfo.snnu.edu.cn/CircR2Disease_v2.0,"CircR2Disease v2.0: An Updated Web Server for Experimentally Validated circRNA-disease Associations and Its Application. . With accumulating dysregulated circular RNAs (circRNAs) in pathological processes, the regulatory functions of circRNAs, especially circRNAs as microRNA (miRNA) sponges and their interaction with RNA binding proteins (RBPs), have been widely validated. However, the collected information on experimentally validated circRNA-disease associations is only preliminary. Therefore, an updated CircR2Disease database providing a comprehensive resource and web tool to clarify the relationships between circRNAs and diseases in diverse species is necessary. Here, we present an updated CircR2Disease v2.0 with the increased number of circRNA-disease associations and novel characteristics. CircR2Disease v2.0 provides more than 5-fold experimentally validated circRNA-disease associations compared to its previous version. This version includes 4201 entries between 3077 circRNAs and 312 disease subtypes. Secondly, the information of circRNA-miRNA, circRNA-miRNA-target, and circRNA-protein has been manually collected for various diseases. Thirdly, the gene symbols of circRNAs and disease name IDs can be linked with various nomenclature databases. Detailed descriptions such as samples and journals have also been integrated into the updated version. Thus, CircR2Disease v2.0 can serve as a platform for users to systematically investigate the roles of dysregulated circRNAs in various diseases and further explore the posttranscriptional regulatory function in diseases. Finally, we propose a computational method named circDis based on the graph convolutional network (GCN) and gradient boosting decision tree (GBDT) to illustrate the applications of the CircR2Disease v2.0 database. CircR2Disease v2.0 is available at http://bioinfo.snnu.edu.cn/CircR2Disease_v2.0.",CircR2Disease,0.969459782,NA,0,CircR2Disease,0.969459782,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/29/2021 +27725737,http://reprod.njmu.edu.cn/circrnadb,"circRNADb: A comprehensive database for human circular RNAs with protein-coding annotations. It has been known that circular RNAs are widely expressed in human tissues and cells, and play important regulatory roles in physiological or pathological processes. However, there is lack of comprehensively annotated human circular RNAs database. In this study we established a circRNA database, named as circRNADb, containing 32,914 human exonic circRNAs carefully selected from diversified sources. The detailed information of the circRNA, including genomic information, exon splicing, genome sequence, internal ribosome entry site (IRES), open reading frame (ORF) and references were provided in circRNADb. In addition, circRNAs were found to be able to encode proteins, which have not been reported in any species. 16328 circRNAs were annotated to have ORF longer than 100 amino acids, of which 7170 have IRES elements. 46 circRNAs from 37 genes were found to have their corresponding proteins expressed according mass spectrometry. The database provides the function of data search, browse, download, submit and feedback for the user to study particular circular RNA of interest and update the database continually. circRNADb will be built to be a biological information platform for circRNA molecules and related biological functions in the future. The database can be freely available through the web server at http://reprod.njmu.edu.cn/circrnadb.",circRNADb,0.975911617,NA,0,circRNADb,0.975911617,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/11/2016 +33121433,http://soft.bioinfo-minzhao.org/circvar,"circVAR database: genome-wide archive of genetic variants for human circular RNAs. Background Circular RNAs (circRNAs) play important roles in regulating gene expression through binding miRNAs and RNA binding proteins. Genetic variation of circRNAs may affect complex traits/diseases by changing their binding efficiency to target miRNAs and proteins. There is a growing demand for investigations of the functions of genetic changes using large-scale experimental evidence. However, there is no online genetic resource for circRNA genes. Results We performed extensive genetic annotation of 295,526 circRNAs integrated from circBase, circNet and circRNAdb. All pre-computed genetic variants were presented at our online resource, circVAR, with data browsing and search functionality. We explored the chromosome-based distribution of circRNAs and their associated variants. We found that, based on mapping to the 1000 Genomes and ClinVAR databases, chromosome 17 has a relatively large number of circRNAs and associated common and health-related genetic variants. Following the annotation of genome wide association studies (GWAS)-based circRNA variants, we found many non-coding variants within circRNAs, suggesting novel mechanisms for common diseases reported from GWAS studies. For cancer-based somatic variants, we found that chromosome 7 has many highly complex mutations that have been overlooked in previous research. Conclusion We used the circVAR database to collect SNPs and small insertions and deletions (INDELs) in putative circRNA regions and to identify their potential phenotypic information. To provide a reusable resource for the circRNA research community, we have published all the pre-computed genetic data concerning circRNAs and associated genes together with data query and browsing functions at http://soft.bioinfo-minzhao.org/circvar .",circVAR,0.893169641,NA,0,circVAR,0.893169641,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/29/2020 +29059379,http://cirgrdb.biols.ac.cn,"CirGRDB: a database for the genome-wide deciphering circadian genes and regulators. Circadian rhythms govern various kinds of physiological and behavioral functions of the living organisms, and disruptions of the rhythms are highly detrimental to health. Although several databases have been built for circadian genes, a resource for comprehensive post-transcriptional regulatory information of circadian RNAs and expression patterns of disease-related circadian RNAs is still lacking. Here, we developed CirGRDB (http://cirgrdb.biols.ac.cn) by integrating more than 4936 genome-wide assays, with the aim of fulfilling the growing need to understand the rhythms of life. CirGRDB presents a friendly web interface that allows users to search and browse temporal expression patterns of interested genes in 37 human/mouse tissues or cell lines, and three clinical disorders including sleep disorder, aging and tumor. More importantly, eight kinds of potential transcriptional and post-transcriptional regulators involved in the rhythmic expression of the specific genes, including transcription factors, histone modifications, chromatin accessibility, enhancer RNAs, miRNAs, RNA-binding proteins, RNA editing and RNA methylation, can also be retrieved. Furthermore, a regulatory network could be generated based on the regulatory information. In summary, CirGRDB offers a useful repository for exploring disease-related circadian RNAs, and deciphering the transcriptional and post-transcriptional regulation of circadian rhythms.",CirGRDB,0.997688591,NA,0,CirGRDB,0.997688591,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +27789702,http://cistrome.org/db,"Cistrome Data Browser: a data portal for ChIP-Seq and chromatin accessibility data in human and mouse. Chromatin immunoprecipitation, DNase I hypersensitivity and transposase-accessibility assays combined with high-throughput sequencing enable the genome-wide study of chromatin dynamics, transcription factor binding and gene regulation. Although rapidly accumulating publicly available ChIP-seq, DNase-seq and ATAC-seq data are a valuable resource for the systematic investigation of gene regulation processes, a lack of standardized curation, quality control and analysis procedures have hindered extensive reuse of these data. To overcome this challenge, we built the Cistrome database, a collection of ChIP-seq and chromatin accessibility data (DNase-seq and ATAC-seq) published before January 1, 2016, including 13 366 human and 9953 mouse samples. All the data have been carefully curated and processed with a streamlined analysis pipeline and evaluated with comprehensive quality control metrics. We have also created a user-friendly web server for data query, exploration and visualization. The resulting Cistrome DB (Cistrome Data Browser), available online at http://cistrome.org/db, is expected to become a valuable resource for transcriptional and epigenetic regulation studies.",Cistrome,0.987228572,Browser,0.58698076,Cistrome,0.987228572,1,30462313,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,10/26/2016 +29092931,http://cistrome.org/CistromeCancer,"Cistrome Cancer: A Web Resource for Integrative Gene Regulation Modeling in Cancer. Cancer results from a breakdown of normal gene expression control, so the study of gene regulation is critical to cancer research. To gain insight into the transcriptional and epigenetic factors regulating abnormal gene expression patterns in cancers, we developed the Cistrome Cancer web resource (http://cistrome.org/CistromeCancer/). We conducted the systematic integration and modeling of over 10,000 tumor molecular profiles from The Cancer Genome Atlas (TCGA) with over 23,000 ChIP-seq and chromatin accessibility profiles from our Cistrome collection. The results include reconstruction of functional enhancer profiles, ""super-enhancer"" target genes, as well as predictions of active transcription factors and their target genes for each TCGA cancer type. Cistrome Cancer reveals novel insights from integrative analyses combining chromatin profiles with tumor molecular profiles and will be a useful resource to the cancer gene regulation community. Cancer Res; 77(21); e19-22. √ɬÉ√ǬÇ√ɬÇ√Ǭ©2017 AACR.",Cistrome Cancer,0.990812868,NA,0,Cistrome Cancer,0.990812868,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2017 +30462313,"http://cistrome.org/db, http://dbtoolkit.cistrome.org","Cistrome Data Browser: expanded datasets and new tools for gene regulatory analysis. The Cistrome Data Browser (DB) is a resource of human and mouse cis-regulatory information derived from ChIP-seq, DNase-seq and ATAC-seq chromatin profiling assays, which map the genome-wide locations of transcription factor binding sites, histone post-translational modifications and regions of chromatin accessible to endonuclease activity. Currently, the Cistrome DB contains approximately 47,000 human and mouse samples with about 24,000 newly collected datasets compared to the previous release two years ago. Furthermore, the Cistrome DB has a new Toolkit module with several features that allow users to better utilize the large-scale ChIP-seq, DNase-seq, and ATAC-seq data. First, users can query the factors which are likely to regulate a specific gene of interest. Second, the Cistrome DB Toolkit facilitates searches for factor binding, histone modifications, and chromatin accessibility in any given genomic interval shorter than 2Mb. Third, the Toolkit can determine the most similar ChIP-seq, DNase-seq, and ATAC-seq samples in terms of genomic interval overlaps with user-provided genomic interval sets. The Cistrome DB is a user-friendly, up-to-date, and well maintained resource, and the new tools will greatly benefit the biomedical research community. The database is freely available at http://cistrome.org/db, and the Toolkit is at http://dbtoolkit.cistrome.org.",Cistrome DB,0.963168994,Cistrome Data Browser,0.840564919,Cistrome DB,0.963168994,1,27789702,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,1/1/2019 +23508969,http://cistrome.org/finder,"CistromeFinder for ChIP-seq and DNase-seq data reuse. Summary Chromatin immunoprecipitation and DNase I hypersensitivity assays with high-throughput sequencing have greatly accelerated the understanding of transcriptional and epigenetic regulation, although data reuse for the community of experimental biologists has been challenging. We created a data portal CistromeFinder that can help query, evaluate and visualize publicly available Chromatin immunoprecipitation and DNase I hypersensitivity assays with high-throughput sequencing data in human and mouse. The database currently contains 6378 samples over 4391 datasets, 313 factors and 102 cell lines or cell populations. Each dataset has gone through a consistent analysis and quality control pipeline; therefore, users could evaluate the overall quality of each dataset before examining binding sites near their genes of interest. CistromeFinder is integrated with UCSC genome browser for visualization, Primer3Plus for ChIP-qPCR primer design and CistromeMap for submitting newly available datasets. It also allows users to leave comments to facilitate data evaluation and update. Availability http://cistrome.org/finder. Contact xsliu@jimmy.harvard.edu or henry_long@dfci.harvard.edu.",CistromeFinder,0.990140975,NA,0,CistromeFinder,0.990140975,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/18/2013 +29688375,http://citgenedb.yubiolab.org,"CITGeneDB: a comprehensive database of human and mouse genes enhancing or suppressing cold-induced thermogenesis validated by perturbation experiments in mice. . Cold-induced thermogenesis increases energy expenditure and can reduce body weight in mammals, so the genes involved in it are thought to be potential therapeutic targets for treating obesity and diabetes. In the quest for more effective therapies, a great deal of research has been conducted to elucidate the regulatory mechanism of cold-induced thermogenesis. Over the last decade, a large number of genes that can enhance or suppress cold-induced thermogenesis have been discovered, but a comprehensive list of these genes is lacking. To fill this gap, we examined all of the annotated human and mouse genes and curated those demonstrated to enhance or suppress cold-induced thermogenesis by in vivo or ex vivo experiments in mice. The results of this highly accurate and comprehensive annotation are hosted on a database called CITGeneDB, which includes a searchable web interface to facilitate broad public use. The database will be updated as new genes are found to enhance or suppress cold-induced thermogenesis. It is expected that CITGeneDB will be a valuable resource in future explorations of the molecular mechanism of cold-induced thermogenesis, helping pave the way for new obesity and diabetes treatments.Database URL: http://citgenedb.yubiolab.org.",CITGeneDB,0.997810602,NA,0,CITGeneDB,0.997810602,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +32025315,http://citgvd.cric.cn/home,"CitGVD: a comprehensive database of citrus genomic variations. Citrus is one of the most important commercial fruit crops worldwide. With the vast genomic data currently available for citrus fruit, genetic relationships, and molecular markers can be assessed for the development of molecular breeding and genomic selection strategies. In this study, to permit the ease of access to these data, a web-based database, the citrus genomic variation database (CitGVD, http://citgvd.cric.cn/home) was developed as the first citrus-specific comprehensive database dedicated to genome-wide variations including single nucleotide polymorphisms (SNPs) and insertions/deletions (INDELs). The current version (V1.0.0) of CitGVD is an open-access resource centered on 1,493,258,964 high-quality genomic variations and 84 phenotypes of 346 organisms curated from in-house projects and public resources. CitGVD integrates closely related information on genomic variation annotations, related gene annotations, and details regarding the organisms, incorporating a variety of built-in tools for data accession and analysis. As an example, CitGWAS can be used for genome-wide association studies (GWASs) with SNPs and phenotypic data, while CitEVOL can be used for genetic structure analysis. These features make CitGVD a comprehensive web portal and bioinformatics platform for citrus-related studies. It also provides a model for analyzing genome-wide variations for a wide range of crop varieties.",CitGVD,0.992236495,citrus genomic variation database,0.845783427,CitGVD,0.992236495,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2020 +24489955,http://citrus.hzau.edu.cn,"Citrus sinensis annotation project (CAP): a comprehensive database for sweet orange genome. Citrus is one of the most important and widely grown fruit crop with global production ranking firstly among all the fruit crops in the world. Sweet orange accounts for more than half of the Citrus production both in fresh fruit and processed juice. We have sequenced the draft genome of a double-haploid sweet orange (C. sinensis cv. Valencia), and constructed the Citrus sinensis annotation project (CAP) to store and visualize the sequenced genomic and transcriptome data. CAP provides GBrowse-based organization of sweet orange genomic data, which integrates ab initio gene prediction, EST, RNA-seq and RNA-paired end tag (RNA-PET) evidence-based gene annotation. Furthermore, we provide a user-friendly web interface to show the predicted protein-protein interactions (PPIs) and metabolic pathways in sweet orange. CAP provides comprehensive information beneficial to the researchers of sweet orange and other woody plants, which is freely available at http://citrus.hzau.edu.cn/.",NA,0,Citrus sinensis annotation project,0.768495091,Citrus sinensis annotation project,0.768495091,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/28/2014 +33181825,http://bioinfo.deinfo.uepg.br/citrus,"CitrusKB: a comprehensive knowledge base for transcriptome and interactome of Citrus spp. infected by Xanthomonas citri subsp. citri at different infection stages. . Citrus canker type A is a serious disease caused by Xanthomonas citri subsp. citri (X. citri), which is responsible for severe losses to growers and to the citrus industry worldwide. To date, no canker-resistant citrus genotypes are available, and there is limited information regarding the molecular and genetic mechanisms involved in the early stages of the citrus canker development. Here, we present the CitrusKB knowledge base. This is the first in vivo interactome database for different citrus cultivars, and it was produced to provide a valuable resource of information on citrus and their interaction with the citrus canker bacterium X. citri. CitrusKB provides tools for a user-friendly web interface to let users search and analyse a large amount of information regarding eight citrus cultivars with distinct levels of susceptibility to the disease, with controls and infected plants at different stages of infection by the citrus canker bacterium X. citri. Currently, CitrusKB comprises a reference citrus genome and its transcriptome, expressed transcripts, pseudogenes and predicted genomic variations (SNPs and SSRs). The updating process will continue over time by the incorporation of novel annotations and analysis tools. We expect that CitrusKB may substantially contribute to the field of citrus genomics. CitrusKB is accessible at http://bioinfo.deinfo.uepg.br/citrus. Users can download all the generated raw sequences and generated datasets by this study from the CitrusKB website.",CitrusKB,0.99783051,NA,0,CitrusKB,0.99783051,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +33109630,http://www.ckttdb.org,"Checkpoint therapeutic target database (CKTTD): the first comprehensive database for checkpoint targets and their modulators in cancer immunotherapy. . Checkpoint targets play a key role in tumor-mediated immune escape and therefore are critical for cancer immunotherapy. Unfortunately, there is a lack of bioinformatics resource that compile all the checkpoint targets for translational research and drug discovery in immuno-oncology. To this end, we developed checkpoint therapeutic target database (CKTTD), the first comprehensive database for immune checkpoint targets (proteins, miRNAs and LncRNAs) and their modulators. A scoring system was adopted to filter more relevant targets with high confidence. In addition, a few biological databases such as Oncomine, Drugbank, miRBase and Lnc2Cancer database were integrated into CKTTD to provide an in-depth information. Moreover, we computed and provided ligand-binding site information for all the targets which may support bench scientists for drug discovery efforts. In total, CKTTD compiles 105 checkpoint protein targets, 53 modulators (small-molecules and antibody), 30 miRNAs and 18 LncRNAs in cancer immunotherapy with validated experimental evidences curated from 10√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ649 literatures via an enhanced text-mining system. In conclusion, the CKTTD may serve as a useful platform for the research of cancer immunotherapy and drug discovery. The CKTTD database is freely available to public at http://www.ckttdb.org/.",CKTTD,0.997114658,Checkpoint therapeutic target database,0.942019236,CKTTD,0.997114658,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2020 +29934697,http://www.chineselexicaldatabase.com,"Chinese lexical database (CLD) : A large-scale lexical database for simplified Mandarin Chinese. We present the Chinese Lexical Database (CLD): a large-scale lexical database for simplified Chinese. The CLD provides a wealth of lexical information for 3913 one-character words, 34,233 two-character words, 7143 three-character words, and 3355 four-character words, and is publicly available through http://www.chineselexicaldatabase.com . For each of the 48,644 words in the CLD, we provide a wide range of categorical predictors, as well as an extensive set of frequency measures, complexity measures, neighborhood density measures, orthography-phonology consistency measures, and information-theoretic measures. We evaluate the explanatory power of the lexical variables in the CLD in the context of experimental data through analyses of lexical decision latencies for one-character, two-character, three-character and four-character words, as well as word naming latencies for one-character and two-character words. The results of these analyses are discussed.",CLD,0.992355183,Chinese Lexical Database,0.949212909,CLD,0.992355183,1,NA,26868053,NA,NA,NA,do not merge,NA,NA,NA,12/1/2018 +26868053,http://www.corvids.de/cld,"The Corvids Literature Database--500 years of ornithological research from a crow's perspective. . Corvids (Corvidae) play a major role in ornithological research. Because of their worldwide distribution, diversity and adaptiveness, they have been studied extensively. The aim of the Corvids Literature Database (CLD, http://www.corvids.de/cld) is to record all publications (citation format) on all extant and extinct Crows, Ravens, Jays and Magpies worldwide and tag them with specific keywords making them available for researchers worldwide. The self-maintained project started in 2006 and today comprises 8000 articles, spanning almost 500 years. The CLD covers publications from 164 countries, written in 36 languages and published by 8026 authors in 1503 journals (plus books, theses and other publications). Forty-nine percent of all records are available online as full-text documents or deposited in the physical CLD archive. The CLD contains 442 original corvid descriptions. Here, we present a metadata assessment of articles recorded in the CLD including a gap analysis and prospects for future research. Database URL: http://www.corvids.de/cld.",CLD,0.986547867,Corvids Literature Database,0.931742728,CLD,0.986547867,1,NA,29934697,NA,NA,NA,do not merge,NA,NA,NA,2/11/2016 +24678985,http://clearedleavesdb.org,"ClearedLeavesDB: an online database of cleared plant leaf images. Background Leaf vein networks are critical to both the structure and function of leaves. A growing body of recent work has linked leaf vein network structure to the physiology, ecology and evolution of land plants. In the process, multiple institutions and individual researchers have assembled collections of cleared leaf specimens in which vascular bundles (veins) are rendered visible. In an effort to facilitate analysis and digitally preserve these specimens, high-resolution images are usually created, either of entire leaves or of magnified leaf subsections. In a few cases, collections of digital images of cleared leaves are available for use online. However, these collections do not share a common platform nor is there a means to digitally archive cleared leaf images held by individual researchers (in addition to those held by institutions). Hence, there is a growing need for a digital archive that enables online viewing, sharing and disseminating of cleared leaf image collections held by both institutions and individual researchers. Description The Cleared Leaf Image Database (ClearedLeavesDB), is an online web-based resource for a community of researchers to contribute, access and share cleared leaf images. ClearedLeavesDB leverages resources of large-scale, curated collections while enabling the aggregation of small-scale collections within the same online platform. ClearedLeavesDB is built on Drupal, an open source content management platform. It allows plant biologists to store leaf images online with corresponding meta-data, share image collections with a user community and discuss images and collections via a common forum. We provide tools to upload processed images and results to the database via a web services client application that can be downloaded from the database. Conclusions We developed ClearedLeavesDB, a database focusing on cleared leaf images that combines interactions between users and data via an intuitive web interface. The web interface allows storage of large collections and integrates with leaf image analysis applications via an open application programming interface (API). The open API allows uploading of processed images and other trait data to the database, further enabling distribution and documentation of analyzed data within the community. The initial database is seeded with nearly 19,000 cleared leaf images representing over 40√ɬÉ√ǬÇ√ɬÇ√Ǭ†GB of image data. Extensible storage and growth of the database is ensured by using the data storage resources of the iPlant Discovery Environment. ClearedLeavesDB can be accessed at http://clearedleavesdb.org.",ClearedLeavesDB,0.982267678,Cleared Leaf Image Database,0.655172122,ClearedLeavesDB,0.982267678,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/28/2014 +22916227,http://clearpond.northwestern.edu,"CLEARPOND: cross-linguistic easy-access resource for phonological and orthographic neighborhood densities. Past research has demonstrated cross-linguistic, cross-modal, and task-dependent differences in neighborhood density effects, indicating a need to control for neighborhood variables when developing and interpreting research on language processing. The goals of the present paper are two-fold: (1) to introduce CLEARPOND (Cross-Linguistic Easy-Access Resource for Phonological and Orthographic Neighborhood Densities), a centralized database of phonological and orthographic neighborhood information, both within and between languages, for five commonly-studied languages: Dutch, English, French, German, and Spanish; and (2) to show how CLEARPOND can be used to compare general properties of phonological and orthographic neighborhoods across languages. CLEARPOND allows researchers to input a word or list of words and obtain phonological and orthographic neighbors, neighborhood densities, mean neighborhood frequencies, word lengths by number of phonemes and graphemes, and spoken-word frequencies. Neighbors can be defined by substitution, deletion, and/or addition, and the database can be queried separately along each metric or summed across all three. Neighborhood values can be obtained both within and across languages, and outputs can optionally be restricted to neighbors of higher frequency. To enable researchers to more quickly and easily develop stimuli, CLEARPOND can also be searched by features, generating lists of words that meet precise criteria, such as a specific range of neighborhood sizes, lexical frequencies, and/or word lengths. CLEARPOND is freely-available to researchers and the public as a searchable, online database and for download at http://clearpond.northwestern.edu.",CLEARPOND,0.993278623,Resource for,0.694889188,CLEARPOND,0.993278623,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/20/2012 +"26582918, 29165669, 31777943",http://www.ncbi.nlm.nih.gov/clinvar,"ClinVar: public archive of interpretations of clinically relevant variants. ClinVar (https://www.ncbi.nlm.nih.gov/clinvar/) at the National Center for Biotechnology Information (NCBI) is a freely available archive for interpretations of clinical significance of variants for reported conditions. The database includes germline and somatic variants of any size, type or genomic location. Interpretations are submitted by clinical testing laboratories, research laboratories, locus-specific databases, OMIM√ɬÉ√ǬÇ√ɬÇ√ǬÆ, GeneReviews√ɬÉ√Ǭ¢√ɬÇ√ǬÑ√ɬÇ√Ǭ¢, UniProt, expert panels and practice guidelines. In NCBI's Variation submission portal, submitters upload batch submissions or use the Submission Wizard for single submissions. Each submitted interpretation is assigned an accession number prefixed with SCV. ClinVar staff review validation reports with data types such as HGVS (Human Genome Variation Society) expressions; however, clinical significance is reported directly from submitters. Interpretations are aggregated by variant-condition combination and assigned an accession number prefixed with RCV. Clinical significance is calculated for the aggregate record, indicating consensus or conflict in the submitted interpretations. ClinVar uses data standards, such as HGVS nomenclature for variants and MedGen identifiers for conditions. The data are available on the web as variant-specific views; the entire data set can be downloaded via ftp. Programmatic access for ClinVar records is available through NCBI's E-utilities. Future development includes providing a variant-centric XML archive and a web page for details of SCV submissions.",ClinVar,0.996684611,NA,0,ClinVar,0.996684611,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25652745,http://clipdb.ncrnalab.org,"CLIPdb: a CLIP-seq database for protein-RNA interactions. Background RNA-binding proteins (RBPs) play essential roles in gene expression regulation through their interactions with RNA transcripts, including coding, canonical non-coding and long non-coding RNAs. Large amounts of crosslinking immunoprecipitation (CLIP)-seq data (including HITS-CLIP, PAR-CLIP, and iCLIP) have been recently produced to reveal transcriptome-wide binding sites of RBPs at the single-nucleotide level. Description Here, we constructed a database, CLIPdb, to describe RBP-RNA interactions based on 395 publicly available CLIP-seq data sets for 111 RBPs from four organisms: human, mouse, worm and yeast. We consistently annotated the CLIP-seq data sets and RBPs, and developed a user-friendly interface for rapid navigation of the CLIP-seq data. We applied a unified computational method to identify transcriptome-wide binding sites, making the binding sites directly comparable and the data available for integration across different CLIP-seq studies. The high-resolution binding sites of the RBPs can be visualized on the whole-genome scale using a browser. In addition, users can browse and download the identified binding sites of all profiled RBPs by querying genes of interest, including both protein coding genes and non-coding RNAs. Conclusion Manually curated metadata and uniformly identified binding sites of publicly available CLIP-seq data sets will be a foundation for further integrative and comparative analyses. With maintained up-to-date data sets and improved functionality, CLIPdb ( http://clipdb.ncrnalab.org ) will be a valuable resource for improving the understanding of post-transcriptional regulatory networks.",CLIPdb,0.996144474,NA,0,CLIPdb,0.996144474,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/5/2015 +23193260,http://www.ncbi.nlm.nih.gov/clone,"Clone DB: an integrated NCBI resource for clone-associated data. The National Center for Biotechnology Information (NCBI) Clone DB (http://www.ncbi.nlm.nih.gov/clone/) is an integrated resource providing information about and facilitating access to clones, which serve as valuable research reagents in many fields, including genome sequencing and variation analysis. Clone DB represents an expansion and replacement of the former NCBI Clone Registry and has records for genomic and cell-based libraries and clones representing more than 100 different eukaryotic taxa. Records provide details of library construction, associated sequences, map positions and information about resource distribution. Clone DB is indexed in the NCBI Entrez system and can be queried by fields that include organism, clone name, gene name and sequence identifier. Whenever possible, genomic clones are mapped to reference assemblies and their map positions provided in clone records. Clones mapping to specific genomic regions can also be searched for using the NCBI Clone Finder tool, which accepts queries based on sequence coordinates or features such as gene or transcript names. Clone DB makes reports of library, clone and placement data on its FTP site available for download. With Clone DB, users now have available to them a centralized resource that provides them with the tools they will need to make use of these important research reagents.",Clone,0.87266922,NA,0,Clone,0.87266922,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/27/2012 +25913159,http://bif.uohyd.ac.in/closindb,"ClosIndb: A resource for computationally derived information from clostridial genomes. Over the past few years, several clostridial genomes have been sequenced, and since then new sequencing projects are also under way. Clostridia is one of the most sequenced genera, and presently, complete genome sequences of 49 clostridial species are available in public archives. Unraveling this wealth of genomic information opens up potential avenues in clostridial research. In the present study, we have carried out in silico analysis to decipher the genomic data. Subsequently, a web resource, ClosIndb, has been developed which collates the computationally derived information associated with all clostridial genes. It features various aspects of coding regions as well as non-coding regions, such as putative orthologs, proteins physicochemical properties, operons and cis-regulatory elements. It provides users with comparative details of all clostridial proteins across the firmicutes. ClosIndb is a comprehensive resource for all completely sequenced clostridial genomes and is under constant development. ClosIndb is freely accessible at http://bif.uohyd.ac.in/closindb/.",ClosIndb,0.993508637,NA,0,ClosIndb,0.993508637,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/23/2015 +33735471,"http://physics.carleton.ca/clrp/eye_plaque_v2, http://doi.org/10.22215/clrp/EPv2","Update of the CLRP eye plaque brachytherapy database for photon-emitting sources. Purpose To update and extend the Carleton Laboratory for Radiotherapy Physics (CLRP) Eye Plaque (EP) dosimetry database for low-energy photon-emitting brachytherapy sources using egs_brachy, an open-source EGSnrc application. The previous database, CLRP_EPv1, contained datasets for the Collaborative Ocular Melanoma Study (COMS) plaques (10-22√ɬÉ√ǬÇ√ɬÇ√Ǭ†mm diameter) with 103 Pd or 125 I seeds (BrachyDose-computed, 2008). The new database, CLRP_EPv2, consists of newly calculated three-dimensional (3D) dose distributions for 17 plaques [eight COMS, five Eckert & Ziegler BEBIG, and four others representative of models used worldwide] for 103 Pd, 125 I, and 131 Cs seeds. Acquisition and validation methods Plaque models are developed with egs_brachy, based on published/manufacturer dimensions and material data. The BEBIG plaques (modeled for the first time) are identical in dimensions to COMS plaques but differ in elemental composition and/or density. Previously benchmarked seed models are used. Eye plaques and seeds are simulated at the center of full-scatter water phantoms, scoring in (0.05√ɬÉ√ǬÇ√ɬÇ√Ǭ†cm)3 voxels spanning the eye for scenarios: (a) ""HOMO"": simulated TG43 conditions; (b) ""HETERO"": eye plaques and seeds fully modeled; (c) ""HETsi"" (BEBIG only): one seed is active at a time with other seed geometries present but not emitting photons (inactive); summation over all i seeds in a plaque then yields ""HETsum"" (includes interseed effects). For validation, doses are compared to those from CLRP_EPv1 and published data. Data format and access Data are available at https://physics.carleton.ca/clrp/eye_plaque_v2, http://doi.org/10.22215/clrp/EPv2. The data consist of 3D dose distributions (text-based EGSnrc ""3ddose"" file format) and graphical presentations of the comparisons to previously published data. Potential applications The CLRP_EPv2 database provides accurate reference 3D dose distributions to advance ocular brachytherapy dose evaluations. The fully-benchmarked eye plaque models will be freely distributed with egs_brachy, supporting adoption of model-based dose evaluations as recommended by TG-129, TG-186, and TG-221.",CLRP,0.958588719,NA,0,CLRP,0.958588719,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/17/2021 +23104377,http://www.clustermine360.ca,"ClusterMine360: a database of microbial PKS/NRPS biosynthesis. ClusterMine360 (http://www.clustermine360.ca/) is a database of microbial polyketide and non-ribosomal peptide gene clusters. It takes advantage of crowd-sourcing by allowing members of the community to make contributions while automation is used to help achieve high data consistency and quality. The database currently has >200 gene clusters from >185 compound families. It also features a unique sequence repository containing >10 000 polyketide synthase/non-ribosomal peptide synthetase domains. The sequences are filterable and downloadable as individual or multiple sequence FASTA files. We are confident that this database will be a useful resource for members of the polyketide synthases/non-ribosomal peptide synthetases research community, enabling them to keep up with the growing number of sequenced gene clusters and rapidly mine these clusters for functional information.",ClusterMine360,0.942080637,NA,0,ClusterMine360,0.942080637,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/26/2012 +23661693,http://www.complement.us/cmap,"CMAP: Complement Map Database. Summary The human complement system is increasingly perceived as an intricate protein network of effectors, inhibitors and regulators that drives critical processes in health and disease and extensively communicates with associated physiological pathways ranging from immunity and inflammation to homeostasis and development. A steady stream of experimental data reveals new fascinating connections at a rapid pace; although opening unique opportunities for research discoveries, the comprehensiveness and large diversity of experimental methods, nomenclatures and publication sources renders it highly challenging to keep up with the essential findings. With the Complement Map Database (CMAP), we have created a novel and easily accessible research tool to assist the complement community and scientists from related disciplines in exploring the complement network and discovering new connections. Availability http://www.complement.us/cmap. Contact lambris@upenn.edu Supplementary information Supplementary data are available at Bioinformatics online.",CMAP,0.981967479,Complement Map Database,0.876230553,CMAP,0.981967479,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/9/2013 +30357356,http://bidd2.nus.edu.sg/CMAUP,"CMAUP: a database of collective molecular activities of useful plants. The beneficial effects of functionally useful plants (e.g. medicinal and food plants) arise from the multi-target activities of multiple ingredients of these plants. The knowledge of the collective molecular activities of these plants facilitates mechanistic studies and expanded applications. A number of databases provide information about the effects and targets of various plants and ingredients. More comprehensive information is needed for broader classes of plants and for the landscapes of individual plant's multiple targets, collective activities and regulated biological pathways, processes and diseases. We therefore developed a new database, Collective Molecular Activities of Useful Plants (CMAUP), to provide the collective landscapes of multiple targets (ChEMBL target classes) and activity levels (in 2D target-ingredient heatmap), and regulated gene ontologies (GO categories), biological pathways (KEGG categories)√ɬÉ√ǬÇ√ɬÇ√Ǭ†and diseases (ICD blocks) for 5645 plants (2567 medicinal, 170 food, 1567 edible, 3 agricultural and 119 garden plants) collected from or traditionally used in 153 countries and regions. These landscapes were derived from 47 645 plant ingredients active against 646 targets in 234 KEGG pathways associated with 2473 gene ontologies and 656 diseases. CMAUP (http://bidd2.nus.edu.sg/CMAUP/) is freely accessible and searchable by keywords, plant usage classes, species families, targets, KEGG pathways, gene ontologies, diseases (ICD code) and geographical locations.",CMAUP,0.998420835,Collective Molecular Activities of Useful Plants,0.974408348,CMAUP,0.998420835,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33693668,http://www.sysbio.org.cn/CMBD,"CMBD: a manually curated cancer metabolic biomarker knowledge database. . The pathogenesis of cancer is influenced by interactions among genes, proteins, metabolites and other small molecules. Understanding cancer progression at the metabolic level is propitious to the visual decoding of changes in living organisms. To date, a large number of metabolic biomarkers in cancer have been measured and reported, which provide an alternative method for cancer precision diagnosis, treatment and prognosis. To systematically understand the heterogeneity of cancers, we developed the database CMBD to integrate the cancer metabolic biomarkers scattered over literatures in PubMed. At present, CMBD contains 438 manually curated relationships between 282 biomarkers and 76 cancer subtypes of 18 tissues reported in 248 literatures. Users can access the comprehensive metabolic biomarker information about cancers, references, clinical samples and their relationships from our online database. As case studies, pathway analysis was performed on the metabolic biomarkers of breast and prostate cancers, respectively. 'Phenylalanine, tyrosine and tryptophan biosynthesis', 'phenylalanine metabolism' and 'primary bile acid biosynthesis' were identified as playing key roles in breast cancer. 'Glyoxylate and dicarboxylate metabolism', 'citrate cycle (TCA cycle)', and 'alanine, aspartate and glutamate metabolism' have important functions in prostate cancer. These findings provide us with an understanding of the metabolic pathway of cancer initiation and progression. Database URL: http://www.sysbio.org.cn/CMBD/.",CMBD,0.997389793,NA,0,CMBD,0.997389793,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +30668638,http://syslab5.nchu.edu.tw/CMEP,"CMEP: a database for circulating microRNA expression profiling. Motivation In recent years, several experimental studies have revealed that the microRNAs (miRNAs) in serum, plasma, exosome and whole blood are dysregulated in various types of diseases, indicating that the circulating miRNAs may serve as potential noninvasive biomarkers for disease diagnosis and prognosis. However, no database has been constructed to integrate the large-scale circulating miRNA profiles, explore the functional pathways involved and predict the potential biomarkers using feature selection between the disease conditions. Although there have been several studies attempting to generate a circulating miRNA database, they have not yet integrated the large-scale circulating miRNA profiles or provided the biomarker-selection function using machine learning methods. Results To fill this gap, we constructed the Circulating MicroRNA Expression Profiling (CMEP) database for integrating, analyzing and visualizing the large-scale expression profiles of phenotype-specific circulating miRNAs. The CMEP database contains massive datasets that were manually curated from NCBI GEO and the exRNA Atlas, including 66 datasets, 228 subsets and 10 419 samples. The CMEP provides the differential expression circulating miRNAs analysis and the KEGG functional pathway enrichment analysis. Furthermore, to provide the function of noninvasive biomarker discovery, we implemented several feature-selection methods, including ridge regression, lasso regression, support vector machine and random forests. Finally, we implemented a user-friendly web interface to improve the user experience and to visualize the data and results of CMEP. Availability and implementation CMEP is accessible at http://syslab5.nchu.edu.tw/CMEP.",CMEP,0.926891863,Circulating MicroRNA Expression Profiling,0.608082005,CMEP,0.926891863,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2019 +25885062,http://65.181.125.102/cmmdb2/index.html,"CmMDb: a versatile database for Cucumis melo microsatellite markers and other horticulture crop research. Cucumis melo L. that belongs to Cucurbitaceae family ranks among one of the highest valued horticulture crops being cultivated across the globe. Besides its economical and medicinal importance, Cucumis melo L. is a valuable resource and model system for the evolutionary studies of cucurbit family. However, very limited numbers of molecular markers were reported for Cucumis melo L. so far that limits the pace of functional genomic research in melon and other similar horticulture crops. We developed the first whole genome based microsatellite DNA marker database of Cucumis melo L. and comprehensive web resource that aids in variety identification and physical mapping of Cucurbitaceae family. The Cucumis melo L. microsatellite database (CmMDb: http://65.181.125.102/cmmdb2/index.html) encompasses 39,072 SSR markers along with its motif repeat, motif length, motif sequence, marker ID, motif type and chromosomal locations. The database is featured with novel automated primer designing facility to meet the needs of wet lab researchers. CmMDb is a freely available web resource that facilitates the researchers to select the most appropriate markers for marker-assisted selection in melons and to improve breeding strategies.",CmMDb,0.993868947,Cucumis melo L,0.824225145,CmMDb,0.993868947,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/17/2015 +32986829,http://www.cmnpd.org,"CMNPD: a comprehensive marine natural products database towards facilitating drug discovery from the ocean. Marine organisms are expected to be an important source of inspiration for drug discovery after terrestrial plants and microorganisms. Despite the remarkable progress in the field of marine natural products (MNPs) chemistry, there are only a few open access databases dedicated to MNPs research. To meet the growing demand for mining and sharing for MNPs-related data resources, we developed CMNPD, a comprehensive marine natural products database based on manually curated data. CMNPD currently contains more than 31 000 chemical entities with various physicochemical and pharmacokinetic properties, standardized biological activity data, systematic taxonomy and geographical distribution of source organisms, and detailed literature citations. It is an integrated platform for structure dereplication (assessment of novelty) of (marine) natural products, discovery of lead compounds, data mining of structure-activity relationships and investigation of chemical ecology. Access is available through a user-friendly web interface at https://www.cmnpd.org. We are committed to providing a free data sharing platform for not only professional MNPs researchers but also the broader scientific community to facilitate drug discovery from the ocean.",CMNPD,0.998440802,NA,0,CMNPD,0.998440802,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +25398898,http://cgbc.cgu.edu.tw/cmpd,"CMPD: cancer mutant proteome database. Whole-exome sequencing, which centres on the protein coding regions of disease/cancer associated genes, represents the most cost-effective method to-date for deciphering the association between genetic alterations and diseases. Large-scale whole exome/genome sequencing projects have been launched by various institutions, such as NCI, Broad Institute and TCGA, to provide a comprehensive catalogue of coding variants in diverse tissue samples and cell lines. Further functional and clinical interrogation of these sequence variations must rely on extensive cross-platforms integration of sequencing information and a proteome database that explicitly and comprehensively archives the corresponding mutated peptide sequences. While such data resource is a critical for the mass spectrometry-based proteomic analysis of exomic variants, no database is currently available for the collection of mutant protein sequences that correspond to recent large-scale genomic data. To address this issue and serve as bridge to integrate genomic and proteomics datasets, CMPD (http://cgbc.cgu.edu.tw/cmpd) collected over 2 millions genetic alterations, which not only facilitates the confirmation and examination of potential cancer biomarkers but also provides an invaluable resource for translational medicine research and opportunities to identify mutated proteins encoded by mutated genes.",CMPD,0.993044734,cancer mutant proteome database,0.959299552,CMPD,0.993044734,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/14/2014 +26062809,http://lgcm.icb.ufmg.br/cmregnet,"CMRegNet-An interspecies reference database for corynebacterial and mycobacterial regulatory networks. Background Organisms utilize a multitude of mechanisms for responding to changing environmental conditions, maintaining their functional homeostasis and to overcome stress situations. One of the most important mechanisms is transcriptional gene regulation. In-depth study of the transcriptional gene regulatory network can lead to various practical applications, creating a greater understanding of how organisms control their cellular behavior. Description In this work, we present a new database, CMRegNet for the gene regulatory networks of Corynebacterium glutamicum ATCC 13032 and Mycobacterium tuberculosis H37Rv. We furthermore transferred the known networks of these model organisms to 18 other non-model but phylogenetically close species (target organisms) of the CMNR group. In comparison to other network transfers, for the first time we utilized two model organisms resulting into a more diverse and complete network of the target organisms. Conclusion CMRegNet provides easy access to a total of 3,103 known regulations in C. glutamicum ATCC 13032 and M. tuberculosis H37Rv and to 38,940 evolutionary conserved interactions for 18 non-model species of the CMNR group. This makes CMRegNet to date the most comprehensive database of regulatory interactions of CMNR bacteria. The content of CMRegNet is publicly available online via a web interface found at http://lgcm.icb.ufmg.br/cmregnet .",CMRegNet,0.995652676,NA,0,CMRegNet,0.995652676,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/11/2015 +23630576,http://cbbiweb.uthscsa.edu/KMethylomes,"CMS: a web-based system for visualization and analysis of genome-wide methylation data of human cancers. Background DNA methylation of promoter CpG islands is associated with gene suppression, and its unique genome-wide profiles have been linked to tumor progression. Coupled with high-throughput sequencing technologies, it can now efficiently determine genome-wide methylation profiles in cancer cells. Also, experimental and computational technologies make it possible to find the functional relationship between cancer-specific methylation patterns and their clinicopathological parameters. Methodology/principal findings Cancer methylome system (CMS) is a web-based database application designed for the visualization, comparison and statistical analysis of human cancer-specific DNA methylation. Methylation intensities were obtained from MBDCap-sequencing, pre-processed and stored in the database. 191 patient samples (169 tumor and 22 normal specimen) and 41 breast cancer cell-lines are deposited in the database, comprising about 6.6 billion uniquely mapped sequence reads. This provides comprehensive and genome-wide epigenetic portraits of human breast cancer and endometrial cancer to date. Two views are proposed for users to better understand methylation structure at the genomic level or systemic methylation alteration at the gene level. In addition, a variety of annotation tracks are provided to cover genomic information. CMS includes important analytic functions for interpretation of methylation data, such as the detection of differentially methylated regions, statistical calculation of global methylation intensities, multiple gene sets of biologically significant categories, interactivity with UCSC via custom-track data. We also present examples of discoveries utilizing the framework. Conclusions/significance CMS provides visualization and analytic functions for cancer methylome datasets. A comprehensive collection of datasets, a variety of embedded analytic functions and extensive applications with biological and translational significance make this system powerful and unique in cancer methylation research. CMS is freely accessible at: http://cbbiweb.uthscsa.edu/KMethylomes/.",CMS,0.961046875,Cancer methylome system,0.863095567,CMS,0.961046875,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/22/2013 +31813095,"http://shaktisahislab.com/include/CMV/, http://weislab.com/WeiDOCK/include/content/CMV","CytoMegaloVirus Infection Database: A Public Omics Database for Systematic and Comparable Information of CMV. CytoMegaloVirus (CMV) is known to cause infection in humans and may remain dormant throughout the life span of an individual. CMV infection has been reported to be fatal in patients with weak immunity. It is transmitted through blood, saliva, urine, semen and breast milk. Although medications are available to treat the infected patients, there is no cure for CMV. This concern prompted us to construct a comprehensive database having exhaustive information regarding CMV, its infections and therapies to be available on a single platform. Thus, we propose a newly designed database that includes all the information from various public resources such as biological databases, virus taxonomy databanks, viral databases, and drug bank, integrated into this database, named as cytomegalovirus database (CMVdb). It features all the relevant data regarding the strains of CMV, genes, expressed proteins, the genomic sequence of CMV and drugs used in the treatment of cytomegalovirus infection. CMVdb has a unique feature of in-house data analysis, so all the data obtained from various resources are processed within the system. The user interface is more responsive because of the integrated platform that will highly facilitate the researchers. Based on CMVdb functionality and quality of the data, it will accelerate the research and development in the field of infectious diseases and immunology with a special focus on CMV. The obtained data would be useful in designing better therapeutic strategies and agents for the treatment of CMV infections. The proposed database (CMVdb) is freely accessible at http://shaktisahislab.com/include/CMV/ or http://weislab.com/WeiDOCK/include/content/CMV/.",CMVdb,0.988853097,CytoMegaloVirus Infection Database,0.726735294,CMVdb,0.988853097,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/7/2019 +31901979,http://cailab.labshare.cn/CNAdbCC,"Genome-wide somatic copy number alteration analysis and database construction for cervical cancer. Cervical cancer is a common gynecological malignancy with high incidence and mortality. Somatic copy number alterations (CNAs) play an important role in identifying tumor suppressor genes and oncogenes and are a useful diagnostic indicator for many cancer types. However, the genomic landscape of CNAs in cervical cancer has not yet been comprehensively characterized. In the present study, we collected 974 cervical cancer samples from different data sources. All samples were analyzed by genomic arrays to obtain high-resolution CNAs. Focal genomic regions with CNA events and potential cancer driver genes were identified by GISTIC2.0. Meanwhile, we constructed a comprehensive cervical cancer database by PHP and self-written Perl and R scripts. In total, 54 recurrent regions of amplification and deletion were detected. Frequently altered tumor suppressor genes were found in these regions, including PIK3CA, ERBB2, EP300 and FBXW7. CNA hotspots and related enriched functional categories were also identified. The incidence of chromothripsis in cervical cancer was estimated to be 6.06%, and the chromosome pulverization hotspot regions were detected. Based on the curated data, we developed CNAdbCC (http://cailab.labshare.cn/CNAdbCC/), a comprehensive database for copy number alterations in cervical cancer. We provide a user-friendly Web interface for data mining and visualization. It is the most comprehensive public database devoted exclusively to genomic alterations in cervical cancer. These results extend our molecular understanding of cervical cancer. The database will enable researchers to explore specific CNA patterns in this lethal cancer and facilitate the discovery of therapeutic candidates.",CNAdbCC,0.992988825,NA,0,CNAdbCC,0.992988825,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/4/2020 +33095860,http://cncdatabase.med.cornell.edu,"CNCDatabase: a database of non-coding cancer drivers. Most mutations in cancer genomes occur in the non-coding regions with unknown impact on tumor development. Although the increase in the number of cancer whole-genome sequences has revealed numerous putative non-coding cancer drivers, their information is dispersed across multiple studies making it difficult to understand their roles in tumorigenesis of different cancer types. We have developed CNCDatabase, Cornell Non-coding Cancer driver Database (https://cncdatabase.med.cornell.edu/) that contains detailed information about predicted non-coding drivers at gene promoters, 5' and 3' UTRs (untranslated regions), enhancers, CTCF insulators and non-coding RNAs. CNCDatabase documents 1111 protein-coding genes and 90 non-coding RNAs with reported drivers in their non-coding regions from 32 cancer types by computational predictions of positive selection using whole-genome sequences; differential gene expression in samples with and without mutations; or another set of experimental validations including luciferase reporter assays and genome editing. The database can be easily modified and scaled as lists of non-coding drivers are revised in the community with larger whole-genome sequencing studies, CRISPR screens and further experimental validations. Overall, CNCDatabase provides a helpful resource for researchers to explore the pathological role of non-coding alterations in human cancers.",CNCDatabase,0.997600734,Cornell Non-coding Cancer driver Database,0.958675064,CNCDatabase,0.997600734,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +33010163,http://www.rna-society.org/cncrnadb,"cncRNAdb: a manually curated resource of experimentally supported RNAs with both protein-coding and noncoding function. RNA endowed with both protein-coding and noncoding functions is referred to as 'dual-function RNA', 'binary functional RNA (bifunctional RNA)' or 'cncRNA (coding and noncoding RNA)'. Recently, an increasing number of cncRNAs have been identified, including both translated ncRNAs (ncRNAs with coding functions) and untranslated mRNAs (mRNAs with noncoding functions). However, an appropriate database for storing and organizing cncRNAs is still lacking. Here, we developed cncRNAdb, a manually curated database of experimentally supported cncRNAs, which aims to provide a resource for efficient manipulation, browsing and analysis of cncRNAs. The current version of cncRNAdb documents about 2600 manually curated entries of cncRNA functions with experimental evidence, involving more than 2,000 RNAs (including over 1300 translated ncRNAs and over 600 untranslated mRNAs) across over 20 species. In summary, we believe that cncRNAdb will help elucidate the functions and mechanisms of cncRNAs and develop new prediction methods. The database is available at http://www.rna-society.org/cncrnadb/.",cncRNAdb,0.991504371,NA,0,cncRNAdb,0.991504371,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +32952115,http://db.cngb.org,"CNGBdb: China National GeneBank DataBase. China National GeneBank DataBase (CNGBdb) is a data platform aiming to systematically archiving and sharing of multi-omics data in life science. As the service portal of Bio-informatics Data Center of the core structure, namely, ""Three Banks and Two Platforms"" of China National GeneBank (CNGB), CNGBdb has the advantages of rich sample resources, data resources, cooperation projects, powerful data computation and analysis capabilities. With the advent of high throughput sequencing technologies, research in life science has entered the big data era, which is in the need of closer international cooperation and data sharing. With the development of China's economy and the increase of investment in life science research, we need to establish a national public platform for data archiving and sharing in life science to promote the systematic management, application and industrial utilization. Currently, CNGBdb can provide genomic data archiving, information search engines, data management and data analysis services. The data schema of CNGBdb has covered projects, samples, experiments, runs, assemblies, variations and sequences. Until May 22, 2020, CNGBdb has archived 2176 research projects and more than 2221 TB sequencing data submitted by researchers globally. In the future, CNGBdb will continue to be dedicated to promoting data sharing in life science research and improving the service capability. CNGBdb website is: https://db.cngb.org/.",CNGBdb,0.99559629,China National GeneBank DataBase,0.846177836,CNGBdb,0.99559629,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2020 +32705130,http://db.cngb.org/cnsa,"CNSA: a data repository for archiving omics data. . With the application and development of high-throughput sequencing technology in life and health sciences, massive multi-omics data brings the problem of efficient management and utilization. Database development and biocuration are the prerequisites for the reuse of these big data. Here, relying on China National GeneBank (CNGB), we present CNGB Sequence Archive (CNSA) for archiving omics data, including raw sequencing data and its further analyzed results which are organized into six objects, namely Project, Sample, Experiment, Run, Assembly and Variation at present. Moreover, CNSA has created a correlation model of living samples, sample information and analytical data on some projects. Both living samples and analytical data are directly correlated with the sample information. From either one, information or data of the other two can be obtained, so that all data can be traced throughout the life cycle from the living sample to the sample information to the analytical data. Complying with the data standards commonly used in the life sciences, CNSA is committed to building a comprehensive and curated data repository for storing, managing and sharing of omics data. We will continue to improve the data standards and provide free access to open-data resources for worldwide scientific communities to support academic research and the bio-industry. Database URL: https://db.cngb.org/cnsa/.",CNSA,0.969382127,CNGB,0.596538961,CNSA,0.969382127,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +22826268,http://bioinfo.hrbmu.edu.cn/CNVD,"CNVD: text mining-based copy number variation in disease database. Copy number variation (CNV) is a kind of chromosomal structural reorganization that has been detected, in this decade, mainly by high-throughput biological technology. Researchers have found that CNVs are ubiquitous in many species and accumulating evidence indicates that CNVs are closely related with complex diseases. The investigation of chromosomal structural alterations has begun to reveal some important clues to the pathologic causes of diseases and to the disease process. However, many of the published studies have focused on a single disease and, so far, the experimental results have not been systematically collected or organized. Manual text mining from 6301 published papers was used to build the Copy Number Variation in Disease database (CNVD). CNVD contains CNV information for 792 diseases in 22 species from diverse types of experiments, thus, ensuring high confidence and comprehensive representation of the relationship between the CNVs and the diseases. In addition, multiple query modes and visualized results are provided in the CNVD database. With its user-friendly interface and the integrated CNV information for different diseases, CNVD will offer a truly comprehensive platform for disease research based on chromosomal structural variations. The CNVD interface is accessible at http://bioinfo.hrbmu.edu.cn/CNVD.",CNVD,0.973126009,Copy Number Variation in Disease database,0.926422502,CNVD,0.973126009,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/23/2012 +30598077,http://cnv.gtxlab.com,"Constructing a database for the relations between CNV and human genetic diseases via systematic text mining. Background The detection and interpretation of CNVs are of clinical importance in genetic testing. Several databases and web services are already being used by clinical geneticists to interpret the medical relevance of identified CNVs in patients. However, geneticists or physicians would like to obtain the original literature context for more detailed information, especially for rare CNVs that were not included in databases. Results The resulting CNVdigest database includes 440,485 sentences for CNV-disease relationship. A total number of 1582 CNVs and 2425 diseases are involved. Sentences describing CNV-disease correlations are indexed in CNVdigest, with CNV mentions and disease mentions annotated. Conclusions In this paper, we use a systematic text mining method to construct a database for the relationship between CNVs and diseases. Based on that, we also developed a concise front-end to facilitate the analysis of CNV/disease association, providing a user-friendly web interface for convenient queries. The resulting system is publically available at http://cnv.gtxlab.com /.",CNVdigest,0.993451834,NA,0,CNVdigest,0.993451834,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/31/2018 +34259866,http://cnvintegrate.cgm.ntu.edu.tw,"CNVIntegrate: the first multi-ethnic database for identifying copy number variations associated with cancer. . Human copy number variations (CNVs) and copy number alterations (CNAs) are DNA segments (>1000 base pairs) of duplications or deletions with respect to the reference genome, potentially causing genomic imbalance leading to diseases such as cancer. CNVs further cause genetic diversity in healthy populations and are predominant drivers of gene/genome evolution. Initiatives have been taken by the research community to establish large-scale databases to comprehensively characterize CNVs in humans. Exome Aggregation Consortium (ExAC) is one such endeavor that catalogs CNVs, of nearly 60√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ000 healthy individuals across five demographic clusters. Furthermore, large projects such as the Catalogue of Somatic Mutations in Cancer (COSMIC) and the Cancer Cell Line Encyclopedia (CCLE) combine CNA data from cancer-affected individuals and large panels of human cancer cell lines, respectively. However, we lack a structured and comprehensive CNV/CNA resource including both healthy individuals and cancer patients across large populations. CNVIntegrate is the first web-based system that hosts CNV and CNA data from both healthy populations and cancer patients, respectively, and concomitantly provides statistical comparisons between copy number frequencies of multiple ethnic populations. It further includes, for the first time, well-cataloged CNV and CNA data from Taiwanese healthy individuals and Taiwan Breast Cancer data, respectively, along with imported resources from ExAC, COSMIC and CCLE. CNVIntegrate offers a CNV/CNA-data hub for structured information retrieval for clinicians and scientists towards important drug discoveries and precision treatments. Database URL: http://cnvintegrate.cgm.ntu.edu.tw/.",CNVIntegrate,0.995991111,NA,0,CNVIntegrate,0.995991111,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +28583129,http://cox-path-db.kazusa.or.jp/tomato,"Co-expressed Pathways DataBase for Tomato: a database to predict pathways relevant to a query gene. Background Gene co-expression, the similarity of gene expression profiles under various experimental conditions, has been used as an indicator of functional relationships between genes, and many co-expression databases have been developed for predicting gene functions. These databases usually provide users with a co-expression network and a list of strongly co-expressed genes for a query gene. Several of these databases also provide functional information on a set of strongly co-expressed genes (i.e., provide biological processes and pathways that are enriched in these strongly co-expressed genes), which is generally analyzed via over-representation analysis (ORA). A limitation of this approach may be that users can predict gene functions only based on the strongly co-expressed genes. Results In this study, we developed a new co-expression database that enables users to predict the function of tomato genes from the results of functional enrichment analyses of co-expressed genes while considering the genes that are not strongly co-expressed. To achieve this, we used the ORA approach with several thresholds to select co-expressed genes, and performed gene set enrichment analysis (GSEA) applied to a ranked list of genes ordered by the co-expression degree. We found that internal correlation in pathways affected the significance levels of the enrichment analyses. Therefore, we introduced a new measure for evaluating the relationship between the gene and pathway, termed the percentile (p)-score, which enables users to predict functionally relevant pathways without being affected by the internal correlation in pathways. In addition, we evaluated our approaches using receiver operating characteristic curves, which concluded that the p-score could improve the performance of the ORA. Conclusions We developed a new database, named Co-expressed Pathways DataBase for Tomato, which is available at http://cox-path-db.kazusa.or.jp/tomato . The database allows users to predict pathways that are relevant to a query gene, which would help to infer gene functions.",NA,0,Co-expressed Pathways DataBase for,0.784190648,Co-expressed Pathways DataBase for,0.784190648,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,6/5/2017 +32392296,http://milton.cshl.edu/CoCoCoNet,"CoCoCoNet: conserved and comparative co-expression across a diverse set of species. Co-expression analysis has provided insight into gene function in organisms from Arabidopsis to zebrafish. Comparison across species has the potential to enrich these results, for example by prioritizing among candidate human disease genes based on their network properties or by finding alternative model systems where their co-expression is conserved. Here, we present CoCoCoNet as a tool for identifying conserved gene modules and comparing co-expression networks. CoCoCoNet is a resource for both data and methods, providing gold standard networks and sophisticated tools for on-the-fly comparative analyses across 14 species. We show how CoCoCoNet can be used in two use cases. In the first, we demonstrate deep conservation of a nucleolus gene module across very divergent organisms, and in the second, we show how the heterogeneity of autism mechanisms in humans can be broken down by functional groups and translated to model organisms. CoCoCoNet is free to use and available to all at https://milton.cshl.edu/CoCoCoNet, with data and R scripts available at ftp://milton.cshl.edu/data.",CoCoCoNet,0.998220623,NA,0,CoCoCoNet,0.998220623,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2020 +33423696,http://coconut.naturalproducts.net,"COCONUT online: Collection of Open Natural Products database. Natural products (NPs) are small molecules produced by living organisms with potential applications in pharmacology and other industries as many of them are bioactive. This potential raised great interest in NP research around the world and in different application fields, therefore, over the years a multiplication of generalistic and thematic NP databases has been observed. However, there is, at this moment, no online resource regrouping all known NPs in just one place, which would greatly simplify NPs research and allow computational screening and other in silico applications. In this manuscript we present the online version of the COlleCtion of Open Natural prodUcTs (COCONUT): an aggregated dataset of elucidated and predicted NPs collected from open sources and a web interface to browse, search and easily and quickly download NPs. COCONUT web is freely available at https://coconut.naturalproducts.net .",COCONUT,0.993007143,COlleCtion of Open Natural prodUcTs,0.784143726,COCONUT,0.993007143,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/10/2021 +22070882,http://www.crystallography.net,"Crystallography Open Database (COD): an open-access collection of crystal structures and platform for world-wide collaboration. Using an open-access distribution model, the Crystallography Open Database (COD, http://www.crystallography.net) collects all known 'small molecule / small to medium sized unit cell' crystal structures and makes them available freely on the Internet. As of today, the COD has aggregated ~150,000 structures, offering basic search capabilities and the possibility to download the whole database, or parts thereof using a variety of standard open communication protocols. A newly developed website provides capabilities for all registered users to deposit published and so far unpublished structures as personal communications or pre-publication depositions. Such a setup enables extension of the COD database by many users simultaneously. This increases the possibilities for growth of the COD database, and is the first step towards establishing a world wide Internet-based collaborative platform dedicated to the collection and curation of structural knowledge.",COD,0.984296083,Crystallography Open Database,0.856681943,COD,0.984296083,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2011 +25270877,http://codex.stemcells.cam.ac.uk,"CODEX: a next-generation sequencing experiment database for the haematopoietic and embryonic stem cell communities. CODEX (http://codex.stemcells.cam.ac.uk/) is a user-friendly database for the direct access and interrogation of publicly available next-generation sequencing (NGS) data, specifically aimed at experimental biologists. In an era of multi-centre genomic dataset generation, CODEX provides a single database where these samples are collected, uniformly processed and vetted. The main drive of CODEX is to provide the wider scientific community with instant access to high-quality NGS data, which, irrespective of the publishing laboratory, is directly comparable. CODEX allows users to immediately visualize or download processed datasets, or compare user-generated data against the database's cumulative knowledge-base. CODEX contains four types of NGS experiments: transcription factor chromatin immunoprecipitation coupled to high-throughput sequencing (ChIP-Seq), histone modification ChIP-Seq, DNase-Seq and RNA-Seq. These are largely encompassed within two specialized repositories, HAEMCODE and ESCODE, which are focused on haematopoiesis and embryonic stem cell samples, respectively. To date, CODEX contains over 1000 samples, including 221 unique TFs and 93 unique cell types. CODEX therefore provides one of the most complete resources of publicly available NGS data for the direct interrogation of transcriptional programmes that regulate cellular identity and fate in the context of mammalian development, homeostasis and disease.",CODEX,0.997284114,NA,0,CODEX,0.997284114,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/30/2014 +23846747,http://www.codnas.com.ar,"CoDNaS: a database of conformational diversity in the native state of proteins. Motivation Conformational diversity is a key concept in the understanding of different issues related with protein function such as the study of catalytic processes in enzymes, protein-protein recognition, protein evolution and the origins of new biological functions. Here, we present a database of proteins with different degrees of conformational diversity. Conformational Diversity of Native State (CoDNaS) is a redundant collection of three-dimensional structures for the same protein derived from protein data bank. Structures for the same protein obtained under different crystallographic conditions have been associated with snapshots of protein dynamism and consequently could characterize protein conformers. CoDNaS allows the user to explore global and local structural differences among conformers as a function of different parameters such as presence of ligand, post-translational modifications, changes in oligomeric states and differences in pH and temperature. Additionally, CoDNaS contains information about protein taxonomy and function, disorder level and structural classification offering useful information to explore the underlying mechanism of conformational diversity and its close relationship with protein function. Currently, CoDNaS has 122 122 structures integrating 12 684 entries, with an average of 9.63 conformers per protein. Availability The database is freely available at http://www.codnas.com.ar/.",CoDNaS,0.99604851,Conformational Diversity of Native State,0.846067939,CoDNaS,0.99604851,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/11/2013 +34954795,"http://ufq.unq.edu.ar/codnasrna, http://codnas-rna.bioinformatica.org","CoDNaS-RNA: a database of Conformational Diversity in the Native State of RNA. . Conformational changes in RNA native ensembles are central to fulfill many of their biological roles. Systematic knowledge of the extent and possible modulators of this conformational diversity is desirable to better understand the relationship between RNA dynamics and function. We have developed CoDNaS-RNA as the first database of conformational diversity in RNA molecules. Known RNA structures are retrieved and clustered to identify alternative conformers of each molecule. Pairwise structural comparisons between all conformers within each cluster allows to measure the variability of the molecule. Additional annotations about structural features, molecular interactions and biological function are provided. All data in CoDNaS-RNA is free to download and available as a public website that can be of interest for researchers in computational biology and other life science disciplines. CoDNaS-RNA and the latest version of its data are available at http://ufq.unq.edu.ar/codnasrna or https://codnas-rna.bioinformatica.org/. Supplementary data are available at Bioinformatics online.",CoDNaS-RNA,0.989147276,NA,0,CoDNaS-RNA,0.989147276,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/25/2021 +31029701,http://hive.biochemistry.gwu.edu/review/codon2,"Codon and Codon-Pair Usage Tables (CoCoPUTs): Facilitating Genetic Variation Analyses and Recombinant Gene Design. Usage of sequential codon-pairs is non-random and unique to each species. Codon-pair bias is related to but clearly distinct from individual codon usage bias. Codon-pair bias is thought to affect translational fidelity and efficiency and is presumed to be under the selective pressure. It was suggested that changes in codon-pair utilization may affect human disease more significantly than changes in single codons. Although recombinant gene technologies often take codon-pair usage bias into account, codon-pair usage data/tables are not readily available, thus potentially impeding research efforts. The present computational resource (https://hive.biochemistry.gwu.edu/review/codon2) systematically addresses this issue. Building on our recent HIVE-Codon Usage Tables, we constructed a new database to include genomic codon-pair and dinucleotide statistics of all organisms with sequenced genome, available in the GenBank. We believe that the growing understanding of the importance of codon-pair usage will make this resource an invaluable tool to many researchers in academia and pharmaceutical industry.",CoCoPUTs,0.973919183,Codon and Codon-Pair Usage Tables,0.987904727,Codon and Codon-Pair Usage Tables,0.987904727,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/26/2019 +30357342,http://phylodb.unil.ch/CoevDB,"CoevDB: a database of intramolecular coevolution among protein-coding genes of the bony vertebrates. The study of molecular coevolution, due to its potential to identify gene regions under functional or structural constraints, has recently been subject to numerous scientific inquiries. Particular efforts have been conducted to develop methods predicting the presence of coevolution in molecular sequences. Among these methods, a few aim to model the underlying evolutionary process of coevolution, which enable to differentiate the shared history of genes to coevolution and thus improve their accuracy. However, the usage of such methods remains sparse due to their expensive computational cost and the lack of resources alleviating this issue. Here we present CoevDB (http://phylodb.unil.ch/CoevDB), a database containing the result of a large-scale analysis of intramolecular coevolution of 8201 protein-coding genes of bony vertebrates. The web interface of CoevDB gives access to the results to 800 millions of statistical tests corresponding to all the pairs of sites analyzed. Several type of queries enable users to explore the database by either targeting specific genes or by discovering genes having promising estimations of coevolution.",CoevDB,0.997794569,NA,0,CoevDB,0.997794569,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +28334239,http://www.bioapp.org/coexpressMAP,"Comparison of the general co-expression landscapes between human and mouse. The murine model serves as an important experimental system in biomedical science because of its high degree of similarities at the sequence level with human. Recent studies have compared the transcriptional landscapes between human and mouse, but the general co-expression landscapes have not been characterized. Here, we calculated the general co-expression coefficients and constructed the general co-expression maps for human and mouse. The differences and similarities of the general co-expression maps between the two species were compared in detail. The results showed low similarities in the human and mouse, with only about 36.54% of the co-expression relationships conserved between the two species. These results indicate that researchers should pay attention to these differences when performing research using the expression data of human and mouse. To facilitate use of this information, we also developed the human-mouse general co-expression difference database (coexpressMAP) to search differences in co-expression between human and mouse. This database is freely available at http://www.bioapp.org/coexpressMAP.",coexpressMAP,0.980166078,human-mouse general co-expression difference database,0.907979217,coexpressMAP,0.980166078,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2018 +32436316,http://bioinformatics.fafu.edu.cn/fly,"CoFly: A gene coexpression database for the fruit fly Drosophila melanogaster. The fruit fly Drosophila melanogaster can be used as a model organism for studying various problems in biomedicine and pest management. A large number of fruit fly transcriptomes have been profiled in various cell types, tissues, development stages, toxicological exposures, and other conditions by microarray. Until now, there are still no database developed for exploring those precious data. Microarray data for 4,367 samples from National Center for Biotechnology Information Gene Expression Omnibus was collected, and analyzed by weighted gene coexpression network analysis algorithm. Fifty one gene coexpression modules that are related to cell types, tissues, development stages, and other experimental conditions were identified. The high dimensional gene expression was reduced to tens of modules that were associated with experiments/traits, representing signatures for phenotypes. Six modules were enriched with genomic regions of clustered genes. Hub genes could also be screened by intramodule connectivity. By analyzing higher order module networks, we found that cell signaling modules are more connected than other modules. Module-based gene function identification may help to discover novel gene function. An easy-to-use database was developed, which provides a new source for gene function study in the fruit fly (http://bioinformatics.fafu.edu.cn/fly/).",CoFly,0.555579185,NA,0,CoFly,0.555579185,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/20/2020 +33167031,http://www.ncbi.nlm.nih.gov/research/COG,"COG database update: focus on microbial diversity, model organisms, and widespread pathogens. The Clusters of Orthologous Genes (COG) database, also referred to as the Clusters of Orthologous Groups of proteins, was created in 1997 and went through several rounds of updates, most recently, in 2014. The current update, available at https://www.ncbi.nlm.nih.gov/research/COG, substantially expands the scope of the database to include complete genomes of 1187 bacteria and 122 archaea, typically, with a single genome per genus. In addition, the current version of the COGs includes the following new features: (i) the recently deprecated NCBI's gene index (gi) numbers for the encoded proteins are replaced with stable RefSeq or GenBank\ENA\DDBJ coding sequence (CDS) accession numbers; (ii) COG annotations are updated for >200 newly characterized protein families with corresponding references and PDB links, where available; (iii) lists of COGs grouped by pathways and functional systems are added; (iv) 266 new COGs for proteins involved in CRISPR-Cas immunity, sporulation in Firmicutes and photosynthesis in cyanobacteria are included; and (v) the database is made available as a web page, in addition to FTP. The current release includes 4877 COGs. Future plans include further expansion of the COG collection by adding archaeal COGs (arCOGs), splitting the COGs containing multiple paralogs, and continued refinement of COG annotations.",COG,0.928446889,Clusters of Orthologous Genes,0.892548233,COG,0.928446889,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +25428365,http://www.ncbi.nlm.nih.gov/COG,"Expanded microbial genome coverage and improved protein family annotation in the COG database. Microbial genome sequencing projects produce numerous sequences of deduced proteins, only a small fraction of which have been or will ever be studied experimentally. This leaves sequence analysis as the only feasible way to annotate these proteins and assign to them tentative functions. The Clusters of Orthologous Groups of proteins (COGs) database (http://www.ncbi.nlm.nih.gov/COG/), first created in 1997, has been a popular tool for functional annotation. Its success was largely based on (i) its reliance on complete microbial genomes, which allowed reliable assignment of orthologs and paralogs for most genes; (ii) orthology-based approach, which used the function(s) of the characterized member(s) of the protein family (COG) to assign function(s) to the entire set of carefully identified orthologs and describe the range of potential functions when there were more than one; and (iii) careful manual curation of the annotation of the COGs, aimed at detailed prediction of the biological function(s) for each COG while avoiding annotation errors and overprediction. Here we present an update of the COGs, the first since 2003, and a comprehensive revision of the COG annotations and expansion of the genome coverage to include representative complete genomes from all bacterial and archaeal lineages down to the genus level. This re-analysis of the COGs shows that the original COG assignments had an error rate below 0.5% and allows an assessment of the progress in functional genomics in the past 12 years. During this time, functions of many previously uncharacterized COGs have been elucidated and tentative functional assignments of many COGs have been validated, either by targeted experiments or through the use of high-throughput methods. A particularly important development is the assignment of functions to several widespread, conserved proteins many of which turned out to participate in translation, in particular rRNA maturation and tRNA modification. The new version of the COGs is expected to become an important tool for microbial genomics.",COGs,0.89940232,Clusters of Orthologous Groups of proteins,0.764110201,COGs,0.89940232,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/26/2014 +34964846,http://www.cogvic.vip,"The first comprehensive database of germline pathogenic variants in East Asian cancer patients. . Pathogenic germline variants in cancer-associated genes are risk factors for cancer predisposition. However, systematic mining and summarizing of cancer pathogenic or likely pathogenic variants has not been performed for people of East Asian descent. This study aimed to investigate publicly available data to identify germline variants in East Asian cancer cohorts and compare them to variants in Caucasian cancer cohorts. Based on the data we retrieved, we built a comprehensive database, named COGVIC (Catalog of Germline Variants in Cancer). A total of 233 variants in the East Asian population were identified. The majority (87%) of genes with cancer-associated variants were not shared between the East Asian and Caucasian cohorts. This included pathogenic variants in BRCA2. Our study summarized the prevalence of germline variants in East Asian cancer cohorts and provides an easy-to-use online tool to explore germline mutations related to cancer susceptibility. http://www.cogvic.vip/.",COGVIC,0.994361281,Catalog of Germline Variants in,0.96474456,COGVIC,0.994361281,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2021 +22275896,http://coins.mrn.org,"COINS: An Innovative Informatics and Neuroimaging Tool Suite Built for Large Heterogeneous Datasets. The availability of well-characterized neuroimaging data with large numbers of subjects, especially for clinical populations, is critical to advancing our understanding of the healthy and diseased brain. Such data enables questions to be answered in a much more generalizable manner and also has the potential to yield solutions derived from novel methods that were conceived after the original studies' implementation. Though there is currently growing interest in data sharing, the neuroimaging community has been struggling for years with how to best encourage sharing data across brain imaging studies. With the advent of studies that are much more consistent across sites (e.g., resting functional magnetic resonance imaging, diffusion tensor imaging, and structural imaging) the potential of pooling data across studies continues to gain momentum. At the mind research network, we have developed the collaborative informatics and neuroimaging suite (COINS; http://coins.mrn.org) to provide researchers with an information system based on an open-source model that includes web-based tools to manage studies, subjects, imaging, clinical data, and other assessments. The system currently hosts data from nine institutions, over 300 studies, over 14,000 subjects, and over 19,000 MRI, MEG, and EEG scan sessions in addition to more than 180,000 clinical assessments. In this paper we provide a description of COINS with comparison to a valuable and popular system known as XNAT. Although there are many similarities between COINS and other electronic data management systems, the differences that may concern researchers in the context of multi-site, multi-organizational data sharing environments with intuitive ease of use and PHI security are emphasized as important attributes.",COINS,0.97806251,and,0.496829301,COINS,0.97806251,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/23/2011 +24234444,http://collectf.umbc.edu,"CollecTF: a database of experimentally validated transcription factor-binding sites in Bacteria. The influx of high-throughput data and the need for complex models to describe the interaction of prokaryotic transcription factors (TF) with their target sites pose new challenges for TF-binding site databases. CollecTF (http://collectf.umbc.edu) compiles data on experimentally validated, naturally occurring TF-binding sites across the Bacteria domain, placing a strong emphasis on the transparency of the curation process, the quality and availability of the stored data and fully customizable access to its records. CollecTF integrates multiple sources of data automatically and openly, allowing users to dynamically redefine binding motifs and their experimental support base. Data quality and currency are fostered in CollecTF by adopting a sustainable model that encourages direct author submissions in combination with in-house validation and curation of published literature. CollecTF entries are periodically submitted to NCBI for integration into RefSeq complete genome records as link-out features, maximizing the visibility of the data and enriching the annotation of RefSeq files with regulatory information. Seeking to facilitate comparative genomics and machine-learning analyses of regulatory interactions, in its initial release CollecTF provides domain-wide coverage of two TF families (LexA and Fur), as well as extensive representation for a clinically important bacterial family, the Vibrionaceae.",CollecTF,0.998359561,NA,0,CollecTF,0.998359561,1,NA,27114493,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/14/2013 +27114493,http://www.collectf.org,"From data repositories to submission portals: rethinking the role of domain-specific databases in CollecTF. . Domain-specific databases are essential resources for the biomedical community, leveraging expert knowledge to curate published literature and provide access to referenced data and knowledge. The limited scope of these databases, however, poses important challenges on their infrastructure, visibility, funding and usefulness to the broader scientific community. CollecTF is a community-oriented database documenting experimentally validated transcription factor (TF)-binding sites in the Bacteria domain. In its quest to become a community resource for the annotation of transcriptional regulatory elements in bacterial genomes, CollecTF aims to move away from the conventional data-repository paradigm of domain-specific databases. Through the adoption of well-established ontologies, identifiers and collaborations, CollecTF has progressively become also a portal for the annotation and submission of information on transcriptional regulatory elements to major biological sequence resources (RefSeq, UniProtKB and the Gene Ontology Consortium). This fundamental change in database conception capitalizes on the domain-specific knowledge of contributing communities to provide high-quality annotations, while leveraging the availability of stable information hubs to promote long-term access and provide high-visibility to the data. As a submission portal, CollecTF generates TF-binding site information through direct annotation of RefSeq genome records, definition of TF-based regulatory networks in UniProtKB entries and submission of functional annotations to the Gene Ontology. As a database, CollecTF provides enhanced search and browsing, targeted data exports, binding motif analysis tools and integration with motif discovery and search platforms. This innovative approach will allow CollecTF to focus its limited resources on the generation of high-quality information and the provision of specialized access to the data.Database URL: http://www.collectf.org/.",CollecTF,0.996962905,NA,0,CollecTF,0.996962905,1,NA,24234444,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,4/25/2016 +32073269,http://spin.ccic.osu.edu/index.php/colmarm/index2,"COLMAR Lipids Web Server and Ultrahigh-Resolution Methods for Two-Dimensional Nuclear Magnetic Resonance- and Mass Spectrometry-Based Lipidomics. Accurate identification of lipids in biological samples is a key step in lipidomics studies. Multidimensional nuclear magnetic resonance (NMR) spectroscopy is a powerful analytical tool for this purpose as it provides comprehensive structural information on lipid composition at atomic resolution. However, the interpretation of NMR spectra of complex lipid mixtures is currently hampered by limited spectral resolution and the absence of a customized lipid NMR database along with user-friendly spectral analysis tools. We introduce a new two-dimensional (2D) NMR metabolite database ""COLMAR Lipids"" that was specifically curated for hydrophobic metabolites presently containing 501 compounds with accurate experimental 2D 13C-1H heteronuclear single quantum coherence (HSQC) chemical shift data measured in CDCl3. A new module in the public COLMAR suite of NMR web servers was developed for the (semi)automated analysis of complex lipidomics mixtures (http://spin.ccic.osu.edu/index.php/colmarm/index2). To obtain 2D HSQC spectra with the necessary high spectral resolution along both 13C and 1H dimensions, nonuniform sampling in combination with pure shift spectroscopy was applied allowing the extraction of an abundance of unique cross-peaks belonging to hydrophobic compounds in complex lipidomics mixtures. As shown here, this information is critical for the unambiguous identification of underlying lipid molecules by means of the new COLMAR Lipids web server, also in combination with mass spectrometry, as is demonstrated for Caco-2 cell and lung tissue cell extracts.",COLMAR Lipids,0.91474843,NA,0,COLMAR Lipids,0.91474843,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/4/2020 +26586805,http://colombos.net,"COLOMBOS v3.0: leveraging gene expression compendia for cross-species analyses. COLOMBOS is a database that integrates publicly available transcriptomics data for several prokaryotic model organisms. Compared to the previous version it has more than doubled in size, both in terms of species and data available. The manually curated condition annotation has been overhauled as well, giving more complete information about samples' experimental conditions and their differences. Functionality-wise cross-species analyses now enable users to analyse expression data for all species simultaneously, and identify candidate genes with evolutionary conserved expression behaviour. All the expression-based query tools have undergone a substantial improvement, overcoming the limit of enforced co-expression data retrieval and instead enabling the return of more complex patterns of expression behaviour. COLOMBOS is freely available through a web application at http://colombos.net/. The complete database is also accessible via REST API or downloadable as tab-delimited text files.",COLOMBOS,0.998501897,NA,0,COLOMBOS,0.998501897,1,NA,24214998,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,11/19/2015 +24214998,http://www.colombos.net,"COLOMBOS v2.0: an ever expanding collection of bacterial expression compendia. The COLOMBOS database (http://www.colombos.net) features comprehensive organism-specific cross-platform gene expression compendia of several bacterial model organisms and is supported by a fully interactive web portal and an extensive web API. COLOMBOS was originally published in PLoS One, and COLOMBOS v2.0 includes both an update of the expression data, by expanding the previously available compendia and by adding compendia for several new species, and an update of the surrounding functionality, with improved search and visualization options and novel tools for programmatic access to the database. The scope of the database has also been extended to incorporate RNA-seq data in our compendia by a dedicated analysis pipeline. We demonstrate the validity and robustness of this approach by comparing the same RNA samples measured in parallel using both microarrays and RNA-seq. As far as we know, COLOMBOS currently hosts the largest homogenized gene expression compendia available for seven bacterial model organisms.",COLOMBOS,0.998174131,NA,0,COLOMBOS,0.998174131,1,NA,26586805,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,11/8/2013 +33181822,http://data.color.com,"Color Data v2: a user-friendly, open-access database with hereditary cancer and hereditary cardiovascular conditions datasets. . Publicly available genetic databases promote data sharing and fuel scientific discoveries for the prevention, treatment and management of disease. In 2018, we built Color Data, a user-friendly, open access database containing genotypic and self-reported phenotypic information from 50√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ000 individuals who were sequenced for 30 genes associated with hereditary cancer. In a continued effort to promote access to these types of data, we launched Color Data v2, an updated version of the Color Data database. This new release includes additional clinical genetic testing results from more than 18√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ000 individuals who were sequenced for 30 genes associated with hereditary cardiovascular conditions as well as polygenic risk scores for breast cancer, coronary artery disease and atrial fibrillation. In addition, we used self-reported phenotypic information to implement the following four clinical risk models: Gail Model for 5-year risk of breast cancer, Claus Model for lifetime risk of breast cancer, simple office-based Framingham Coronary Heart Disease Risk Score for 10-year risk of coronary heart disease and CHARGE-AF simple score for 5-year risk of atrial fibrillation. These new features and capabilities are highlighted through two sample queries in the database. We hope that the broad dissemination of these data will help researchers continue to explore genotype-phenotype correlations and identify novel variants for functional analysis, enabling scientific discoveries in the field of population genomics. Database URL: https://data.color.com/.",Color Data,0.852575928,NA,0,Color Data,0.852575928,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +33313674,http://rna.sysu.edu.cn/colorcells,"ColorCells: a database of expression, classification and functions of lncRNAs in single cells. . Although long noncoding RNAs (lncRNAs) have significant tissue specificity, their expression and variability in single cells remain unclear. Here, we developed ColorCells (http://rna.sysu.edu.cn/colorcells/), a resource for comparative analysis of lncRNAs expression, classification and functions in single-cell RNA-Seq data. ColorCells was applied to 167√ɬÉ√ǬÇ√ɬÇ√Ǭ†913 publicly available scRNA-Seq datasets from six species, and identified a batch of cell-specific lncRNAs. These lncRNAs show surprising levels of expression variability between different cell clusters, and has the comparable cell classification ability as known marker genes. Cell-specific lncRNAs have been identified and further validated by in vitro experiments. We found that lncRNAs are typically co-expressed with the mRNAs in the same cell cluster, which can be used to uncover lncRNAs' functions. Our study emphasizes the need to uncover lncRNAs in all cell types and shows the power of lncRNAs as novel marker genes at single cell resolution.",ColorCells,0.997294188,NA,0,ColorCells,0.997294188,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +26496946,http://www.colonatlas.org,"Colorectal cancer atlas: An integrative resource for genomic and proteomic annotations from colorectal cancer cell lines and tissues. In order to advance our understanding of colorectal cancer (CRC) development and progression, biomedical researchers have generated large amounts of OMICS data from CRC patient samples and representative cell lines. However, these data are deposited in various repositories or in supplementary tables. A database which integrates data from heterogeneous resources and enables analysis of the multidimensional data sets, specifically pertaining to CRC is currently lacking. Here, we have developed Colorectal Cancer Atlas (http://www.colonatlas.org), an integrated web-based resource that catalogues the genomic and proteomic annotations identified in CRC tissues and cell lines. The data catalogued to-date include sequence variations as well as quantitative and non-quantitative protein expression data. The database enables the analysis of these data in the context of signaling pathways, protein-protein interactions, Gene Ontology terms, protein domains and post-translational modifications. Currently, Colorectal Cancer Atlas contains data for >13 711 CRC tissues, >165 CRC cell lines, 62 251 protein identifications, >8.3 million MS/MS spectra, >18 410 genes with sequence variations (404 278 entries) and 351 pathways with sequence variants. Overall, Colorectal Cancer Atlas has been designed to serve as a central resource to facilitate research in CRC.",Colorectal Cancer Atlas,0.970538229,NA,0,Colorectal Cancer Atlas,0.970538229,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/22/2015 +21177655,"http://www.oxfordjournals.org/nar/database/a/, http://nar.oxfordjournals.org","The 2011 Nucleic Acids Research Database Issue and the online Molecular Biology Database Collection. The current 18th Database Issue of Nucleic Acids Research features descriptions of 96 new and 83 updated online databases covering various areas of molecular biology. It includes two editorials, one that discusses COMBREX, a new exciting project aimed at figuring out the functions of the 'conserved hypothetical' proteins, and one concerning BioDBcore, a proposed description of the 'minimal information about a biological database'. Papers from the members of the International Nucleotide Sequence Database collaboration (INSDC) describe each of the participating databases, DDBJ, ENA and GenBank, principles of data exchange within the collaboration, and the recently established Sequence Read Archive. A testament to the longevity of databases, this issue includes updates on the RNA modification database, Definition of Secondary Structure of Proteins (DSSP) and Homology-derived Secondary Structure of Proteins (HSSP) databases, which have not been featured here in >12 years. There is also a block of papers describing recent progress in protein structure databases, such as Protein DataBank (PDB), PDB in Europe (PDBe), CATH, SUPERFAMILY and others, as well as databases on protein structure modeling, protein-protein interactions and the organization of inter-protein contact sites. Other highlights include updates of the popular gene expression databases, GEO and ArrayExpress, several cancer gene databases and a detailed description of the UK PubMed Central project. The Nucleic Acids Research online Database Collection, available at: http://www.oxfordjournals.org/nar/database/a/, now lists 1330 carefully selected molecular biology databases. The full content of the Database Issue is freely available online at the Nucleic Acids Research web site (http://nar.oxfordjournals.org/).",COMBREX,0.983597279,Acids,0.534308016,COMBREX,0.983597279,1,"25593347.0, 24316579.0, 25593347.0",NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,NA,1/1/2011 +26051695,http://comirnet.di.uniba.it,"ComiRNet: a web-based system for the analysis of miRNA-gene regulatory networks. Background The understanding of mechanisms and functions of microRNAs (miRNAs) is fundamental for the study of many biological processes and for the elucidation of the pathogenesis of many human diseases. Technological advances represented by high-throughput technologies, such as microarray and next-generation sequencing, have significantly aided miRNA research in the last decade. Nevertheless, the identification of true miRNA targets and the complete elucidation of the rules governing their functional targeting remain nebulous. Computational tools have been proven to be fundamental for guiding experimental validations for the discovery of new miRNAs, for the identification of their targets and for the elucidation of their regulatory mechanisms. Description ComiRNet (Co-clustered miRNA Regulatory Networks) is a web-based database specifically designed to provide biologists and clinicians with user-friendly and effective tools for the study of miRNA-gene target interaction data and for the discovery of miRNA functions and mechanisms. Data in ComiRNet are produced by a combined computational approach based on: 1) a semi-supervised ensemble-based classifier, which learns to combine miRNA-gene target interactions (MTIs) from several prediction algorithms, and 2) the biclustering algorithm HOCCLUS2, which exploits the large set of produced predictions, with the associated probabilities, to identify overlapping and hierarchically organized biclusters that represent miRNA-gene regulatory networks (MGRNs). Conclusions ComiRNet represents a valuable resource for elucidating the miRNAs' role in complex biological processes by exploiting data on their putative function in the context of MGRNs. ComiRnet currently stores about 5 million predicted MTIs between 934 human miRNAs and 30,875 mRNAs, as well as 15 bicluster hierarchies, each of which represents MGRNs at different levels of granularity. The database can be freely accessed at: http://comirnet.di.uniba.it.",ComiRNet,0.991619289,Co-clustered miRNA Regulatory Networks,0.947112972,ComiRNet,0.991619289,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2015 +24225386,http://commode.i-med.ac.at,"[COMMODE] a large-scale database of molecular descriptors using compounds from PubChem. Background Molecular descriptors have been extensively used in the field of structure-oriented drug design and structural chemistry. They have been applied in QSPR and QSAR models to predict ADME-Tox properties, which specify essential features for drugs. Molecular descriptors capture chemical and structural information, but investigating their interpretation and meaning remains very challenging. Results This paper introduces a large-scale database of molecular descriptors called COMMODE containing more than 25 million compounds originated from PubChem. About 2500 DRAGON-descriptors have been calculated for all compounds and integrated into this database, which is accessible through a web interface at http://commode.i-med.ac.at.",COMMODE,0.942243874,NA,0,COMMODE,0.942243874,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/13/2013 +22836712,http://genomics.brocku.ca/ccmbl,"A comparative cellular and molecular biology of longevity database. Discovering key cellular and molecular traits that promote longevity is a major goal of aging and longevity research. One experimental strategy is to determine which traits have been selected during the evolution of longevity in naturally long-lived animal species. This comparative approach has been applied to lifespan research for nearly four decades, yielding hundreds of datasets describing aspects of cell and molecular biology hypothesized to relate to animal longevity. Here, we introduce a Comparative Cellular and Molecular Biology of Longevity Database, available at ( http://genomics.brocku.ca/ccmbl/ ), as a compendium of comparative cell and molecular data presented in the context of longevity. This open access database will facilitate the meta-analysis of amalgamated datasets using standardized maximum lifespan (MLSP) data (from AnAge). The first edition contains over 800 data records describing experimental measurements of cellular stress resistance, reactive oxygen species metabolism, membrane composition, protein homeostasis, and genome homeostasis as they relate to vertebrate species MLSP. The purpose of this review is to introduce the database and briefly demonstrate its use in the meta-analysis of combined datasets.",NA,0,Comparative Cellular and Molecular Biology of Longevity Database,0.74333477,Comparative Cellular and Molecular Biology of Longevity Database,0.74333477,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/27/2012 +25333826,http://spin.ccic.ohio-state.edu/index.php/hsqc/index,"Unified and isomer-specific NMR metabolomics database for the accurate analysis of (13)C-(1)H HSQC spectra. A new metabolomics database and query algorithm for the analysis of (13)C-(1)H HSQC spectra is introduced, which unifies NMR spectroscopic information on 555 metabolites from both the Biological Magnetic Resonance Data Bank (BMRB) and Human Metabolome Database (HMDB). The new database, termed Complex Mixture Analysis by NMR (COLMAR) (13)C-(1)H HSQC database, can be queried via an interactive, easy to use web interface at http://spin.ccic.ohio-state.edu/index.php/hsqc/index . Our new HSQC database separately treats slowly exchanging isomers that belong to the same metabolite, which permits improved query in cases where lowly populated isomers are below the HSQC detection limit. The performance of our new database and query web server compares favorably with the one of existing web servers, especially for spectra of samples of high complexity, including metabolite mixtures from the model organisms Drosophila melanogaster and Escherichia coli. For such samples, our web server has on average a 37% higher accuracy (true positive rate) and a 82% lower false positive rate, which makes it a useful tool for the rapid and accurate identification of metabolites from (13)C-(1)H HSQC spectra at natural abundance. This information can be combined and validated with NMR data from 2D TOCSY-type spectra that provide connectivity information not present in HSQC spectra.",COLMAR,0.561694831,Complex Mixture Analysis by NMR,0.879976043,Complex Mixture Analysis by NMR,0.879976043,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/5/2014 +25348397,http://ComPPI.LinkGroup.hu,"ComPPI: a cellular compartment-specific database for protein-protein interaction network analysis. Here we present ComPPI, a cellular compartment-specific database of proteins and their interactions enabling an extensive, compartmentalized protein-protein interaction network analysis (URL: http://ComPPI.LinkGroup.hu). ComPPI enables the user to filter biologically unlikely interactions, where the two interacting proteins have no common subcellular localizations and to predict novel properties, such as compartment-specific biological functions. ComPPI is an integrated database covering four species (S. cerevisiae, C. elegans, D. melanogaster and H. sapiens). The compilation of nine protein-protein interaction and eight subcellular localization data sets had four curation steps including a manually built, comprehensive hierarchical structure of >1600 subcellular localizations. ComPPI provides confidence scores for protein subcellular localizations and protein-protein interactions. ComPPI has user-friendly search options for individual proteins giving their subcellular localization, their interactions and the likelihood of their interactions considering the subcellular localization of their interacting partners. Download options of search results, whole-proteomes, organelle-specific interactomes and subcellular localization data are available on its website. Due to its novel features, ComPPI is useful for the analysis of experimental results in biochemistry and molecular biology, as well as for proteome-wide studies in bioinformatics and network science helping cellular biology, medicine and drug design.",ComPPI,0.99763155,NA,0,ComPPI,0.99763155,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/27/2014 +32754758,http://conomode.qnlm.ac/conomode/conomode/index,"ConoMode, a database for conopeptide binding modes. . ConoMode is a database for complex three-dimensional (3D) structures of conopeptides binding with their target proteins. Conopeptides, a large family of peptides from the venom of marine snails of the Conus genus, have exceptionally diverse sequences, and their high specificity to block ion channels makes them crucial as drug leads and tools for physiological studies. ConoMode is a specialized archive for the collection of 3D coordinate data for the conopeptides and their binding target proteins from published literature and the Protein Data Bank. These 3D structures can be determined using experimental methods such as X-ray crystallography and electron microscopy and computational methods including docking, homology modeling and molecular dynamics simulations. The binding modes for the conopeptides determined using computational modeling must be validated based on experimental data. The 3D coordinate data from ConoMode can be searched, visualized, downloaded and uploaded. Currently, ConoMode manages 19 conopeptide sequences (from 10 Conus species), 15 protein sequences and 37 3D structures. ConoMode utilizes a modern technical framework to provide a good user experience on mobile devices with touch interaction features. Furthermore, the database is fully optimized for unstructured data and flexible data models. Database URL: http://conomode.qnlm.ac/conomode/conomode/index.",ConoMode,0.997492313,NA,0,ConoMode,0.997492313,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +22058133,http://www.conoserver.org,"ConoServer: updated content, knowledge, and discovery tools in the conopeptide database. ConoServer (http://www.conoserver.org) is a database specializing in the sequences and structures of conopeptides, which are toxins expressed by marine cone snails. Cone snails are carnivorous gastropods, which hunt their prey using a cocktail of toxins that potently subvert nervous system function. The ability of these toxins to specifically target receptors, channels and transporters of the nervous system has attracted considerable interest for their use in physiological research and as drug leads. Since the founding publication on ConoServer in 2008, the number of entries in the database has nearly doubled, the interface has been redesigned and new annotations have been added, including a more detailed description of cone snail species, biological activity measurements and information regarding the identification of each sequence. Automatically updated statistics on classification schemes, three-dimensional structures, conopeptide-bearing species and endoplasmic reticulum signal sequence conservation trends, provide a convenient overview of current knowledge on conopeptides. Transcriptomics and proteomics have began generating massive numbers of new conopeptide sequences, and two dedicated tools have been recently implemented in ConoServer to standardize the analysis of conopeptide precursor sequences and to help in the identification by mass spectrometry of toxins whose sequences were predicted at the nucleic acid level.",ConoServer,0.996880889,NA,0,ConoServer,0.996880889,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/3/2011 +27606777,http://consensuspathdb.org,"Analyzing and interpreting genome data at the network level with ConsensusPathDB. ConsensusPathDB consists of a comprehensive collection of human (as well as mouse and yeast) molecular interaction data integrated from 32 different public repositories and a web interface featuring a set of computational methods and visualization tools to explore these data. This protocol describes the use of ConsensusPathDB (http://consensuspathdb.org) with respect to the functional and network-based characterization of biomolecules (genes, proteins and metabolites) that are submitted to the system either as a priority list or together with associated experimental data such as RNA-seq. The tool reports interaction network modules, biochemical pathways and functional information that are significantly enriched by the user's input, applying computational methods for statistical over-representation, enrichment and graph analysis. The results of this protocol can be observed within a few minutes, even with genome-wide data. The resulting network associations can be used to interpret high-throughput data mechanistically, to characterize and prioritize biomarkers, to integrate different omics levels, to design follow-up functional assay experiments and to generate topology for kinetic models at different scales.",ConsensusPathDB,0.998762488,NA,0,ConsensusPathDB,0.998762488,1,NA,23143270,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,9/8/2016 +23143270,http://ConsensusPathDB.org,"The ConsensusPathDB interaction database: 2013 update. Knowledge of the various interactions between molecules in the cell is crucial for understanding cellular processes in health and disease. Currently available interaction databases, being largely complementary to each other, must be integrated to obtain a comprehensive global map of the different types of interactions. We have previously reported the development of an integrative interaction database called ConsensusPathDB (http://ConsensusPathDB.org) that aims to fulfill this task. In this update article, we report its significant progress in terms of interaction content and web interface tools. ConsensusPathDB has grown mainly due to the integration of 12 further databases; it now contains 215 541 unique interactions and 4601 pathways from overall 30 databases. Binary protein interactions are scored with our confidence assessment tool, IntScore. The ConsensusPathDB web interface allows users to take advantage of these integrated interaction and pathway data in different contexts. Recent developments include pathway analysis of metabolite lists, visualization of functional gene/metabolite sets as overlap graphs, gene set analysis based on protein complexes and induced network modules analysis that connects a list of genes through various interaction types. To facilitate the interactive, visual interpretation of interaction and pathway data, we have re-implemented the graph visualization feature of ConsensusPathDB using the Cytoscape.js library.",ConsensusPathDB,0.997402191,NA,0,ConsensusPathDB,0.997402191,1,NA,27606777,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/11/2012 +31702846,http://consurfdb.tau.ac.il,"ConSurf-DB: An accessible repository for the evolutionary conservation patterns of the majority of PDB proteins. Patterns observed by examining the evolutionary relationships among proteins of common origin can reveal the structural and functional importance of specific residue positions. In particular, amino acids that are highly conserved (i.e., their positions evolve at a slower rate than other positions) are particularly likely to be of biological importance, for example, for ligand binding. ConSurf is a bioinformatics tool for accurately estimating the evolutionary rate of each position in a protein family. Here we introduce a new release of ConSurf-DB, a database of precalculated ConSurf evolutionary conservation profiles for proteins of known structure. ConSurf-DB provides high-accuracy estimates of the evolutionary rates of the amino acids in each protein. A reliable estimate of a query protein's evolutionary rates depends on having a sufficiently large number of effective homologues (i.e., nonredundant yet sufficiently similar). With current sequence data, ConSurf-DB covers 82% of the PDB proteins. It will be updated on a regular basis to ensure that coverage remains high-and that it might even increase. Much effort was dedicated to improving the user experience. The repository is available at https://consurfdb.tau.ac.il/. BROADER AUDIENCE: By comparing a protein to other proteins of similar origin, it is possible to determine the extent to which each amino acid position in the protein evolved slowly or rapidly. A protein's evolutionary profile can provide valuable insights: For example, amino acid positions that are highly conserved (i.e., evolved slowly) are particularly likely to be of structural and/or functional importance, for example, for ligand binding and catalysis. We introduce here a new and improved version of ConSurf-DB, a continually updated database that provides precalculated evolutionary profiles of proteins with known structure.",ConSurf,0.988563061,NA,0,ConSurf,0.988563061,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/22/2019 +29335563,http://www.mrc-lmb.cam.ac.uk/pca,"Visualization and analysis of non-covalent contacts using the Protein Contacts Atlas. Visualizations of biomolecular structures empower us to gain insights into biological functions, generate testable hypotheses, and communicate biological concepts. Typical visualizations (such as ball and stick) primarily depict covalent bonds. In contrast, non-covalent contacts between atoms, which govern normal physiology, pathogenesis, and drug action, are seldom visualized. We present the Protein Contacts Atlas, an interactive resource of non-covalent contacts from over 100,000 PDB crystal structures. We developed multiple representations for visualization and analysis of non-covalent contacts at different scales of organization: atoms, residues, secondary structure, subunits, and entire complexes. The Protein Contacts Atlas enables researchers from different disciplines to investigate diverse questions in the framework of non-covalent contacts, including the interpretation of allostery, disease mutations and polymorphisms, by exploring individual subunits, interfaces, and protein-ligand contacts and by mapping external information. The Protein Contacts Atlas is available at http://www.mrc-lmb.cam.ac.uk/pca/ and also through PDBe.",Contacts,0.618500888,NA,0,Contacts,0.618500888,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/15/2018 +27980519,http://strube.cbrc.kaust.edu.sa/contaminer,"ContaMiner and ContaBase: a webserver and database for early identification of unwantedly crystallized protein contaminants. Solving the phase problem in protein X-ray crystallography relies heavily on the identity of the crystallized protein, especially when molecular replacement (MR) methods are used. Yet, it is not uncommon that a contaminant crystallizes instead of the protein of interest. Such contaminants may be proteins from the expression host organism, protein fusion tags or proteins added during the purification steps. Many contaminants co-purify easily, crystallize and give good diffraction data. Identification of contaminant crystals may take time, since the presence of the contaminant is unexpected and its identity unknown. A webserver (ContaMiner) and a contaminant database (ContaBase) have been established, to allow fast MR-based screening of crystallographic data against currently 62 known contaminants. The web-based ContaMiner (available at http://strube.cbrc.kaust.edu.sa/contaminer/) currently produces results in 5√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬÖmin to 4√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬÖh. The program is also available in a github repository and can be installed locally. ContaMiner enables screening of novel crystals at synchrotron beamlines, and it would be valuable as a routine safety check for 'crystallization and preliminary X-ray analysis' publications. Thus, in addition to potentially saving X-ray crystallographers much time and effort, ContaMiner might considerably lower the risk of publishing erroneous data.",ContaMiner,0.980479717,NA,0,ContaMiner,0.980479717,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/2/2016 +33798715,http://convida.inf.um.es,"COnVIDa: COVID-19 multidisciplinary data collection and dashboard. Since the first reported case in Wuhan in late 2019, COVID-19 has rapidly spread worldwide, dramatically impacting the lives of millions of citizens. To deal with the severe crisis resulting from the pandemic, worldwide institutions have been forced to make decisions that profoundly affect the socio-economic realm. In this sense, researchers from diverse knowledge areas are investigating the behavior of the disease in a rush against time. In both cases, the lack of reliable data has been an obstacle to carry out such tasks with accuracy. To tackle this challenge, COnVIDa (https://convida.inf.um.es) has been designed and developed as a user-friendly tool that easily gathers rigorous multidisciplinary data related to the COVID-19 pandemic from different data sources. In particular, the pandemic expansion is analyzed with variables of health nature, but also social ones, mobility, etc. Besides, COnVIDa permits to smoothly join such data, compare and download them for further analysis. Due to the open-science nature of the project, COnVIDa is easily extensible to any other region of the planet. In this way, COnVIDa becomes a data facilitator for decision-making processes, as well as a catalyst for new scientific researches related to this pandemic.",COnVIDa,0.995118141,NA,0,COnVIDa,0.995118141,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/30/2021 +31269035,http://landslides.nasa.gov,"Using citizen science to expand the global map of landslides: Introducing the Cooperative Open Online Landslide Repository (COOLR). Robust inventories are vital for improving assessment of and response to deadly and costly landslide hazards. However, collecting landslide events in inventories is difficult at the global scale due to inconsistencies in or the absence of landslide reporting. Citizen science is a valuable opportunity for addressing some of these challenges. The new Cooperative Open Online Landslide Repository (COOLR) supplements data in a NASA-developed Global Landslide Catalog (GLC) with citizen science reports to build a more robust, publicly available global inventory. This manuscript introduces the COOLR project and its methods, evaluates the initial citizen science results from the first 13 months, and discusses future improvements to increase the global engagement with the project. The COOLR project (https://landslides.nasa.gov) contains Landslide Reporter, the first global citizen science project for landslides, and Landslide Viewer, a portal to visualize data from COOLR and other satellite and model products. From March 2018 to April 2019, 49 citizen scientists contributed 162 new landslide events to COOLR. These events spanned 37 countries in five continents. The initial results demonstrated that both expert and novice participants are contributing via Landslide Reporter. Citizen scientists are filling in data gaps through news sources in 11 different languages, in-person observations, and new landslide events occurring hundreds and thousands of kilometers away from any existing GLC data. The data is of sufficient accuracy to use in NASA susceptibility and hazard models. COOLR continues to expand as an open platform of landslide inventories with new data from citizen scientists, NASA scientists, and other landslide groups. Future work on the COOLR project will seek to increase participation and functionality of the platform as well as move towards collective post-disaster mapping.",COOLR,0.98030597,Cooperative Open Online Landslide Repository,0.584065162,COOLR,0.98030597,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/3/2019 +27242036,"http://cosbi.ee.ncku.edu.tw/CoopTFD/, http://cosbi2.ee.ncku.edu.tw/CoopTFD","CoopTFD: a repository for predicted yeast cooperative transcription factor pairs. . In eukaryotic cells, transcriptional regulation of gene expression is usually accomplished by cooperative Transcription Factors (TFs). Therefore, knowing cooperative TFs is helpful for uncovering the mechanisms of transcriptional regulation. In yeast, many cooperative TF pairs have been predicted by various algorithms in the literature. However, until now, there is still no database which collects the predicted yeast cooperative TFs from existing algorithms. This prompts us to construct Cooperative Transcription Factors Database (CoopTFD), which has a comprehensive collection of 2622 predicted cooperative TF pairs (PCTFPs) in yeast from 17 existing algorithms. For each PCTFP, our database also provides five types of validation information: (i) the algorithms which predict this PCTFP, (ii) the publications which experimentally show that this PCTFP has physical or genetic interactions, (iii) the publications which experimentally study the biological roles of both TFs of this PCTFP, (iv) the common Gene Ontology (GO) terms of this PCTFP and (v) the common target genes of this PCTFP. Based on the provided validation information, users can judge the biological plausibility of a PCTFP of interest. We believe that CoopTFD will be a valuable resource for yeast biologists to study the combinatorial regulation of gene expression controlled by cooperative TFs.Database URL: http://cosbi.ee.ncku.edu.tw/CoopTFD/ or http://cosbi2.ee.ncku.edu.tw/CoopTFD/.",CoopTFD,0.997820497,Cooperative Transcription Factors Database,0.965329289,CoopTFD,0.997820497,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/30/2016 +21544197,http://microbiome.osu.edu,"CORE: a phylogenetically-curated 16S rDNA database of the core oral microbiome. Comparing bacterial 16S rDNA sequences to GenBank and other large public databases via BLAST often provides results of little use for identification and taxonomic assignment of the organisms of interest. The human microbiome, and in particular the oral microbiome, includes many taxa, and accurate identification of sequence data is essential for studies of these communities. For this purpose, a phylogenetically curated 16S rDNA database of the core oral microbiome, CORE, was developed. The goal was to include a comprehensive and minimally redundant representation of the bacteria that regularly reside in the human oral cavity with computationally robust classification at the level of species and genus. Clades of cultivated and uncultivated taxa were formed based on sequence analyses using multiple criteria, including maximum-likelihood-based topology and bootstrap support, genetic distance, and previous naming. A number of classification inconsistencies for previously named species, especially at the level of genus, were resolved. The performance of the CORE database for identifying clinical sequences was compared to that of three publicly available databases, GenBank nr/nt, RDP and HOMD, using a set of sequencing reads that had not been used in creation of the database. CORE offered improved performance compared to other public databases for identification of human oral bacterial 16S sequences by a number of criteria. In addition, the CORE database and phylogenetic tree provide a framework for measures of community divergence, and the focused size of the database offers advantages of efficiency for BLAST searching of large datasets. The CORE database is available as a searchable interface and for download at http://microbiome.osu.edu.",CORE,0.780686021,NA,0,CORE,0.780686021,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/22/2011 +33382885,http://corkoakdb.org,"CorkOakDB-The Cork Oak Genome Database Portal. . Quercus suber (cork oak) is an evergreen tree native to the Mediterranean basin, which plays a key role in the ecology and economy of this area. Over the last decades, this species has gone through an observable decline, mostly due to environmental factors. Deciphering the mechanisms of cork oak's response to the environment and getting a deep insight into its biology are crucial to counteract biotic and abiotic stresses compromising the stability of a unique ecosystem. In the light of these setbacks, the publication of the genome in 2018 was a major step towards understanding the genetic make-up of this species. In an effort to integrate this information in a comprehensive, accessible and intuitive format, we have developed The Cork Oak Genome Database Portal (CorkOakDB). The CorkOakDB is supported by the BioData.pt e-infrastructure, the Portuguese ELIXIR node for biological data. The portal gives public access to search and explore the curated genomic and transcriptomic data on this species. Moreover, CorkOakDB provides a user-friendly interface and functional tools to help the research community take advantage of the increased accessibility to genomic information. A study case is provided to highlight the functionalities of the portal. CorkOakDB guarantees the update, curation and data collection, aiming to collect data besides the genetic/genomic information, in order to become the main repository in cork oak research. Database URL: http://corkoakdb.org/.",CorkOakDB,0.994003475,Genome,0.661612511,CorkOakDB,0.994003475,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +34016708,http://coronacentral.ai,"Analyzing the vast coronavirus literature with CoronaCentral. . The SARS-CoV-2 pandemic has caused a surge in research exploring all aspects of the virus and its effects on human health. The overwhelming publication rate means that researchers are unable to keep abreast of the literature. To ameliorate this, we present the CoronaCentral resource that uses machine learning to process the research literature on SARS-CoV-2 together with SARS-CoV and MERS-CoV. We categorize the literature into useful topics and article types and enable analysis of the contents, pace, and emphasis of research during the crisis with integration of Altmetric data. These topics include therapeutics, disease forecasting, as well as growing areas such as ""long COVID"" and studies of inequality. This resource, available at https://coronacentral.ai, is updated daily.",CoronaCentral,0.994874954,NA,0,CoronaCentral,0.994874954,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2021 +24991954,http://cortecon.neuralsci.org,"CORTECON: a temporal transcriptome analysis of in vitro human cerebral cortex development from human embryonic stem cells. Many neurological and psychiatric disorders affect the cerebral cortex, and a clearer understanding of the molecular processes underlying human corticogenesis will provide greater insight into such pathologies. To date, knowledge of gene expression changes accompanying corticogenesis is largely based on murine data. Here we present a searchable, comprehensive, temporal gene expression data set encompassing cerebral cortical development from human embryonic stem cells (hESCs). Using a modified differentiation protocol that yields neurons suggestive of prefrontal cortex, we identified sets of genes and long noncoding RNAs that significantly change during corticogenesis and those enriched for disease-associations. Numerous alternatively spliced genes with varying temporal patterns of expression are revealed, including TGIF1, involved in holoprosencephaly, and MARK1, involved in autism. We have created a database (http://cortecon.neuralsci.org/) that provides online, query-based access to changes in RNA expression and alternatively spliced transcripts during human cortical development.",CORTECON,0.730165362,NA,0,CORTECON,0.730165362,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2014 +30357367,http://mips.helmholtz-muenchen.de/corum,"CORUM: the comprehensive resource of mammalian protein complexes-2019. CORUM is a database that provides a manually curated repository of experimentally characterized protein complexes from mammalian organisms, mainly human (67%), mouse (15%) and rat (10%). Given the vital functions of these macromolecular machines, their identification and functional characterization is foundational to our understanding of normal and disease biology. The new CORUM 3.0 release encompasses 4274 protein complexes offering the largest and most comprehensive publicly available dataset of mammalian protein complexes. The CORUM dataset is built from 4473 different genes, representing 22% of the protein coding genes in humans. Protein complexes are described by a protein complex name, subunit composition, cellular functions as well as the literature references. Information about stoichiometry of subunits depends on availability of experimental data. Recent developments include a graphical tool displaying known interactions between subunits. This allows the prediction of structural interconnections within protein complexes of unknown structure. In addition, we present a set of 58 protein complexes with alternatively spliced subunits. Those were found to affect cellular functions such as regulation of apoptotic activity, protein complex assembly or define cellular localization. CORUM is freely accessible at http://mips.helmholtz-muenchen.de/corum/.",CORUM,0.997224689,of mammalian protein complexes,0.598213032,CORUM,0.997224689,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +24466021,http://corynebacterium.um.edu.my,"CoryneBase: Corynebacterium genomic resources and analysis tools at your fingertips. Corynebacteria are used for a wide variety of industrial purposes but some species are associated with human diseases. With increasing number of corynebacterial genomes having been sequenced, comparative analysis of these strains may provide better understanding of their biology, phylogeny, virulence and taxonomy that may lead to the discoveries of beneficial industrial strains or contribute to better management of diseases. To facilitate the ongoing research of corynebacteria, a specialized central repository and analysis platform for the corynebacterial research community is needed to host the fast-growing amount of genomic data and facilitate the analysis of these data. Here we present CoryneBase, a genomic database for Corynebacterium with diverse functionality for the analysis of genomes aimed to provide: (1) annotated genome sequences of Corynebacterium where 165,918 coding sequences and 4,180 RNAs can be found in 27 species; (2) access to comprehensive Corynebacterium data through the use of advanced web technologies for interactive web interfaces; and (3) advanced bioinformatic analysis tools consisting of standard BLAST for homology search, VFDB BLAST for sequence homology search against the Virulence Factor Database (VFDB), Pairwise Genome Comparison (PGC) tool for comparative genomic analysis, and a newly designed Pathogenomics Profiling Tool (PathoProT) for comparative pathogenomic analysis. CoryneBase offers the access of a range of Corynebacterium genomic resources as well as analysis tools for comparative genomics and pathogenomics. It is publicly available at http://corynebacterium.um.edu.my/.",CoryneBase,0.996892035,NA,0,CoryneBase,0.996892035,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/17/2014 +22080556,http://www.coryneregnet.de,"CoryneRegNet 6.0--Updated database content, new analysis methods and novel features focusing on community demands. Post-genomic analysis techniques such as next-generation sequencing have produced vast amounts of data about micro organisms including genetic sequences, their functional annotations and gene regulatory interactions. The latter are genetic mechanisms that control a cell's characteristics, for instance, pathogenicity as well as survival and reproduction strategies. CoryneRegNet is the reference database and analysis platform for corynebacterial gene regulatory networks. In this article we introduce the updated version 6.0 of CoryneRegNet and describe the updated database content which includes, 6352 corynebacterial regulatory interactions compared with 4928 interactions in release 5.0 and 3235 regulations in release 4.0, respectively. We also demonstrate how we support the community by integrating analysis and visualization features for transiently imported custom data, such as gene regulatory interactions. Furthermore, with release 6.0, we provide easy-to-use functions that allow the user to submit data for persistent storage with the CoryneRegNet database. Thus, it offers important options to its users in terms of community demands. CoryneRegNet is publicly available at http://www.coryneregnet.de.",CoryneRegNet,0.996991575,NA,0,CoryneRegNet,0.996991575,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2011 +"25355519, 27727438, 27899578, 30371878",http://cancer.sanger.ac.uk,"COSMIC: exploring the world's knowledge of somatic mutations in human cancer. COSMIC, the Catalogue Of Somatic Mutations In Cancer (http://cancer.sanger.ac.uk) is the world's largest and most comprehensive resource for exploring the impact of somatic mutations in human cancer. Our latest release (v70; Aug 2014) describes 2 002 811 coding point mutations in over one million tumor samples and across most human genes. To emphasize depth of knowledge on known cancer genes, mutation information is curated manually from the scientific literature, allowing very precise definitions of disease types and patient details. Combination of almost 20,000 published studies gives substantial resolution of how mutations and phenotypes relate in human cancer, providing insights into the stratification of mutations and biomarkers across cancer patient populations. Conversely, our curation of cancer genomes (over 12,000) emphasizes knowledge breadth, driving discovery of unrecognized cancer-driving hotspots and molecular targets. Our high-resolution curation approach is globally unique, giving substantial insight into molecular biomarkers in human oncology. In addition, COSMIC also details more than six million noncoding mutations, 10,534 gene fusions, 61,299 genome rearrangements, 695,504 abnormal copy number segments and 60,119,787 abnormal expression variants. All these types of somatic mutation are annotated to both the human genome and each affected coding gene, then correlated across disease and mutation types.",COSMIC,0.99756813,the Catalogue Of Somatic Mutations In Cancer,0.939183259,COSMIC,0.99756813,4,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +28595571,http://cottonfgd.org,"CottonFGD: an integrated functional genomics database for cotton. Background Cotton (Gossypium spp.) is the most important fiber and oil crop in the world. With the emergence of huge -omics data sets, it is essential to have an integrated functional genomics database that allows worldwide users to quickly and easily fetch and visualize genomic information. Currently available cotton-related databases have some weakness in integrating multiple kinds of -omics data from multiple Gossypium species. Therefore, it is necessary to establish an integrated functional genomics database for cotton. Description We developed CottonFGD (Cotton Functional Genomic Database, https://cottonfgd.org ), an integrated database that includes genomic sequences, gene structural and functional annotations, genetic marker data, transcriptome data, and population genome resequencing data for all four of the sequenced Gossypium species. It consists of three interconnected modules: search, profile, and analysis. These modules make CottonFGD enable both single gene review and batch analysis with multiple kinds of -omics data and multiple species. CottonFGD also includes additional pages for data statistics, bulk data download, and a detailed user manual. Conclusion Equipped with specialized functional modules and modernized visualization tools, and populated with multiple kinds of -omics data, CottonFGD provides a quick and easy-to-use data analysis platform for cotton researchers worldwide.",CottonFGD,0.992367327,Cotton Functional Genomic Database,0.977265196,CottonFGD,0.992367327,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/8/2017 +24203703,http://www.cottongen.org,"CottonGen: a genomics, genetics and breeding database for cotton research. CottonGen (http://www.cottongen.org) is a curated and integrated web-based relational database providing access to publicly available genomic, genetic and breeding data for cotton. CottonGen supercedes CottonDB and the Cotton Marker Database, with enhanced tools for easier data sharing, mining, visualization and data retrieval of cotton research data. CottonGen contains annotated whole genome sequences, unigenes from expressed sequence tags (ESTs), markers, trait loci, genetic maps, genes, taxonomy, germplasm, publications and communication resources for the cotton community. Annotated whole genome sequences of Gossypium raimondii are available with aligned genetic markers and transcripts. These whole genome data can be accessed through genome pages, search tools and GBrowse, a popular genome browser. Most of the published cotton genetic maps can be viewed and compared using CMap, a comparative map viewer, and are searchable via map search tools. Search tools also exist for markers, quantitative trait loci (QTLs), germplasm, publications and trait evaluation data. CottonGen also provides online analysis tools such as NCBI BLAST and Batch BLAST.",CottonGen,0.994491816,NA,0,CottonGen,0.994491816,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2013 +34992626,"http://120.78.174.209/, http://db.cngb.org/cottonGVD","CottonGVD: A Comprehensive Genomic Variation Database for Cultivated Cottons. Cultivated cottons are the most important economic crop, which produce natural fiber for the textile industry. In recent years, the genetic basis of several essential traits for cultivated cottons has been gradually elucidated by decoding their genomic variations. Although an abundance of resequencing data is available in public, there is still a lack of a comprehensive tool to exhibit the results of genomic variations and genome-wide association study (GWAS). To assist cotton researchers in utilizing these data efficiently and conveniently, we constructed the cotton genomic variation database (CottonGVD; http://120.78.174.209/ or http://db.cngb.org/cottonGVD). This database contains the published genomic information of three cultivated cotton species, the corresponding population variations (SNP and InDel markers), and the visualized results of GWAS for major traits. Various built-in genomic tools help users retrieve, browse, and query the variations conveniently. The database also provides interactive maps (e.g., Manhattan map, scatter plot, heatmap, and linkage disequilibrium block) to exhibit GWAS and expression GWAS results. Cotton researchers could easily focus on phenotype-associated loci visualization, and they are interested in and screen for candidate genes. Moreover, CottonGVD will continue to update by adding more data and functions.",CottonGVD,0.994645226,cotton genomic variation database,0.905625567,CottonGVD,0.994645226,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/21/2021 +25758743,http://www.cottonqtldb.org,"Cotton QTLdb: a cotton QTL database for QTL analysis, visualization, and comparison between Gossypium hirsutum and G. hirsutum√ɬÉ√ǬÇ√ɬÇ√Ǭ†√ɬÉ√ǬÉ√ɬÇ√Ǭó√ɬÉ√ǬÇ√ɬÇ√Ǭ†G. barbadense populations. Key message A specialized database currently containing more than 2200 QTL is established, which allows graphic presentation, visualization and submission of QTL. In cotton quantitative trait loci (QTL), studies are focused on intraspecific Gossypium hirsutum and interspecific G. hirsutum √ɬÉ√ǬÉ√ɬÇ√Ǭó G. barbadense populations. These two populations are commercially important for the textile industry and are evaluated for fiber quality, yield, seed quality, resistance, physiological, and morphological trait QTL. With meta-analysis data based on the vast amount of QTL studies in cotton it will be beneficial to organize the data into a functional database for the cotton community. Here we provide a tool for cotton researchers to visualize previously identified QTL and submit their own QTL to the Cotton QTLdb database. The database provides the user with the option of selecting various QTL trait types from either the G. hirsutum or G. hirsutum √ɬÉ√ǬÉ√ɬÇ√Ǭó G. barbadense populations. Based on the user's QTL trait selection, graphical representations of chromosomes of the population selected are displayed in publication ready images. The database also provides users with trait information on QTL, LOD scores, and explained phenotypic variances for all QTL selected. The CottonQTLdb database provides cotton geneticist and breeders with statistical data on cotton QTL previously identified and provides a visualization tool to view QTL positions on chromosomes. Currently the database (Release 1) contains 2274 QTLs, and succeeding QTL studies will be updated regularly by the curators and members of the cotton community that contribute their data to keep the database current. The database is accessible from http://www.cottonqtldb.org.",CottonQTLdb,0.928469539,NA,0,CottonQTLdb,0.928469539,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/11/2015 +32890396,http://cov3d.ibbr.umd.edu,"CoV3D: a database of high resolution coronavirus protein structures. SARS-CoV-2, the etiologic agent of COVID-19, exemplifies the general threat to global health posed by coronaviruses. The urgent need for effective vaccines and therapies is leading to a rapid rise in the number of high resolution structures of SARS-CoV-2 proteins that collectively reveal a map of virus vulnerabilities. To assist structure-based design of vaccines and therapeutics against SARS-CoV-2 and other coronaviruses, we have developed CoV3D, a database and resource for coronavirus protein structures, which is updated on a weekly basis. CoV3D provides users with comprehensive sets of structures of coronavirus proteins and their complexes with antibodies, receptors, and small molecules. Integrated molecular viewers allow users to visualize structures of the spike glycoprotein, which is the major target of neutralizing antibodies and vaccine design efforts, as well as sets of spike-antibody complexes, spike sequence variability, and known polymorphisms. In order to aid structure-based design and analysis of the spike glycoprotein, CoV3D permits visualization and download of spike structures with modeled N-glycosylation at known glycan sites, and contains structure-based classification of spike conformations, generated by unsupervised clustering. CoV3D can serve the research community as a centralized reference and resource for spike and other coronavirus protein structures, and is available at: https://cov3d.ibbr.umd.edu.",CoV3D,0.995971898,NA,0,CoV3D,0.995971898,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +33068433,http://cadd.zju.edu.cn/cidb,"CovalentInDB: a comprehensive database facilitating the discovery of covalent inhibitors. Inhibitors that form covalent bonds with their targets have traditionally been considered highly adventurous due to their potential off-target effects and toxicity concerns. However, with the clinical validation and approval of many covalent inhibitors during the past decade, design and discovery of novel covalent inhibitors have attracted increasing attention. A large amount of scattered experimental data for covalent inhibitors have been reported, but a resource by integrating the experimental information for covalent inhibitor discovery is still lacking. In this study, we presented Covalent Inhibitor Database (CovalentInDB), the largest online database that provides the structural information and experimental data for covalent inhibitors. CovalentInDB contains 4511 covalent inhibitors (including 68 approved drugs) with 57 different reactive warheads for 280 protein targets. The crystal structures of some of the proteins bound with a covalent inhibitor are provided to visualize the protein-ligand interactions around the binding site. Each covalent inhibitor is annotated with the structure, warhead, experimental bioactivity, physicochemical properties, etc. Moreover, CovalentInDB provides the covalent reaction mechanism and the corresponding experimental verification methods for each inhibitor towards its target. High-quality datasets are downloadable for users to evaluate and develop computational methods for covalent drug design. CovalentInDB is freely accessible at http://cadd.zju.edu.cn/cidb/.",CovalentInDB,0.996971965,Covalent Inhibitor Database,0.987251094,CovalentInDB,0.996971965,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +33009914,http://covdb.popgenetics.net,"A database resource and online analysis tools for coronaviruses on a historical and global scale. . The recent outbreak of COVID-19 caused by a new zoonotic origin coronavirus (SARS-CoV-2 or 2019-nCoV) has sound the alarm for the potential spread of epidemic coronavirus crossing species. With the urgent needs to assist disease control and to provide invaluable scientific information, we developed the coronavirus database (CoVdb), an online genomic, proteomic and evolutionary analysis platform. CoVdb has brought together genomes of more than 5000 coronavirus strains, which were collected from 1941 to 2020, in more than 60 countries and in hosts belonging to more than 30 species, ranging from fish to human. CoVdb presents comprehensive genomic information, such as gene function, subcellular localization, topology and protein structure. To facilitate coronavirus research, CoVdb also provides flexible search approaches and online tools to view and analyze protein structure, to perform multiple alignments, to automatically build phylogenetic trees and to carry on evolutionary analyses. CoVdb can be accessed freely at http://covdb.popgenetics.net. Hopefully, it will accelerate the progress to develop medicines or vaccines to control the pandemic of COVID-19.",CoVdb,0.996979177,coronavirus database,0.904240698,CoVdb,0.996979177,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2021 +32665542,http://exbio.wzw.tum.de/covex,"Exploring the SARS-CoV-2 virus-host-drug interactome for drug repurposing. Coronavirus Disease-2019 (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Various studies exist about the molecular mechanisms of viral infection. However, such information is spread across many publications and it is very time-consuming to integrate, and exploit. We develop CoVex, an interactive online platform for SARS-CoV-2 host interactome exploration and drug (target) identification. CoVex integrates virus-human protein interactions, human protein-protein interactions, and drug-target interactions. It allows visual exploration of the virus-host interactome and implements systems medicine algorithms for network-based prediction of drug candidates. Thus, CoVex is a resource to understand molecular mechanisms of pathogenicity and to prioritize candidate therapeutics. We investigate recent hypotheses on a systems biology level to explore mechanistic virus life cycle drivers, and to extract drug repurposing candidates. CoVex renders COVID-19 drug research systems-medicine-ready by giving the scientific community direct access to network medicine algorithms. It is available at https://exbio.wzw.tum.de/covex/.",CoVex,0.995960653,NA,0,CoVex,0.995960653,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/14/2020 +34048576,http://www.covid19dataportal.org,"The COVID-19 Data Portal: accelerating SARS-CoV-2 and COVID-19 research through rapid open access data sharing. The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic will be remembered as one of the defining events of the 21st century. The rapid global outbreak has had significant impacts on human society and is already responsible for millions of deaths. Understanding and tackling the impact of the virus has required a worldwide mobilisation and coordination of scientific research. The COVID-19 Data Portal (https://www.covid19dataportal.org/) was first released as part of the European COVID-19 Data Platform, on April 20th 2020 to facilitate rapid and open data sharing and analysis, to accelerate global SARS-CoV-2 and COVID-19 research. The COVID-19 Data Portal has fortnightly feature releases to continue to add new data types, search options, visualisations and improvements based on user feedback and research. The open datasets and intuitive suite of search, identification and download services, represent a truly FAIR (Findable, Accessible, Interoperable and Reusable) resource that enables researchers to easily identify and quickly obtain the key datasets needed for their COVID-19 research.",COVID-1,0.593866607,NA,0,COVID-1,0.593866607,1,NA,33564397,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,7/1/2021 +33564397,http://covid-19-diagnostics.jrc.ec.europa.eu,"The EU one-stop-shop collection of publicly available information on COVID-19 in vitro diagnostic medical devices. The JRC COVID-19 In Vitro Diagnostic Devices and Test Methods Database, aimed to collect in a single place all publicly available information on performance of CE-marked in vitro diagnostic medical devices (IVDs) as well as in house laboratory-developed devices and related test methods for COVID-19, is here presented. The database, manually curated and regularly updated, has been developed as a follow-up to the Communication from the European Commission ""Guidelines on in vitro diagnostic tests and their performance"" of 15 April 2020 and is freely accessible at https://covid-19-diagnostics.jrc.ec.europa.eu/.",COVID-1,0.42113771,NA,0,COVID-1,0.42113771,1,NA,34048576,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,11/3/2020 +34585731,http://kraza.in/covidium,"COVIDium: a COVID-19 resource compendium. . The severe acute respiratory syndrome coronavirus 2 that causes coronavirus disease 2019 (COVID-19) disrupted the normal functioning throughout the world since early 2020 and it continues to do so. Nonetheless, the global pandemic was taken up as a challenge by researchers across the globe to discover an effective cure, either in the form of a drug or vaccine. This resulted in an unprecedented surge of experimental and computational data and publications, which often translated their findings in the form of databases (DBs) and tools. Over 160 such DBs and more than 80 software tools were developed, which are uncharacterized, unannotated, deployed at different universal resource locators and are challenging to reach out through a normal web search. Besides, most of the DBs/tools are present on preprints and are either underutilized or unrecognized because of their inability to make it to top Google search hits. Henceforth, there was a need to crawl and characterize these DBs and create a compendium for easy referencing. The current article is one such concerted effort in this direction to create a COVID-19 resource compendium (COVIDium) that would facilitate the researchers to find suitable DBs and tools for their research studies. COVIDium tries to classify the DBs and tools into 11 broad categories for quick navigation. It also provides end-users some generic hit terms to filter the DB entries for quick access to the resources. Additionally, the DB provides Tracker Dashboard, Neuro Resources, references to COVID-19 datasets and protein-protein interactions. This compendium will be periodically updated to accommodate new resources. Database URL: The COVIDium is accessible through http://kraza.in/covidium/.",COVIDium,0.977083564,NA,0,COVIDium,0.977083564,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/29/2021 +34931882,http://coxbase.q-gaps.de,"CoxBase: an Online Platform for Epidemiological Surveillance, Visualization, Analysis, and Typing of Coxiella burnetii Genomic Sequences. Q (query) fever is an infectious zoonotic disease caused by the Gram-negative bacterium Coxiella burnetii. Although the disease has been studied for decades, it still represents a threat due to sporadic outbreaks across farms in Europe. The absence of a central platform for Coxiella typing data management is an important epidemiological gap that is relevant in the case of an outbreak. To fill this gap, we have designed and implemented an online, open-source, web-based platform called CoxBase (https://coxbase.q-gaps.de). This platform includes a database that holds genotyping information on more than 400 Coxiella isolates alongside metadata that annotate them. We have also implemented features for in silico genotyping of completely or minimally assembled Coxiella sequences using five different typing methods, querying of existing isolates, visualization of isolate geodata via aggregation on a world map, and submission of new isolates. We tested our in silico typing method on 50 Coxiella genomes downloaded from the RefSeq database, and we successfully genotyped all genomes except for cases where the sequence quality was poor. We identified new spacer sequences using our implementation of the multispacer sequence typing (MST) in silico typing method and established adaA gene phenotypes for all 50 genomes as well as their plasmid types. IMPORTANCE Q fever is a zoonotic disease that is a source of active epidemiological concern due to its persistent threat to public health. In this project, we have identified areas in the field of Coxiella research, especially regarding public health and genomic analysis, where there is an inadequacy of resources to monitor, organize, and analyze genomic data from C. burnetii. Subsequently, we have created an open, web-based platform that contains epidemiological information, genome typing functions comprising all the available Coxiella typing methods, and tools for isolate data discovery and visualization that could help address the above-mentioned challenges. This is the first platform to combine all disparate genotyping systems for Coxiella burnetii as well as metadata assets with tools for genomic comparison and analyses. This platform is a valuable resource for laboratory researchers as well as research epidemiologists interested in investigating the relatedness or dissimilarity among C. burnetii strains.",CoxBase,0.994834006,NA,0,CoxBase,0.994834006,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/21/2021 +"23203868, 25392420, 30462320",http://coxpresdb.jp,"COXPRESdb: a database of comparative gene coexpression networks of eleven species for mammals. Coexpressed gene databases are valuable resources for identifying new gene functions or functional modules in metabolic pathways and signaling pathways. Although coexpressed gene databases are a fundamental platform in the field of plant biology, their use in animal studies is relatively limited. The COXPRESdb (http://coxpresdb.jp) provides coexpression relationships for multiple animal species, as comparisons of coexpressed gene lists can enhance the reliability of gene coexpression determinations. Here, we report the updates of the database, mainly focusing on the following two points. First, we updated our coexpression data by including recent microarray data for the previous seven species (human, mouse, rat, chicken, fly, zebrafish and nematode) and adding four new species (monkey, dog, budding yeast and fission yeast), along with a new human microarray platform. A reliability scoring function was also implemented, based on coexpression conservation to filter out coexpression with low reliability. Second, the network drawing function was updated, to implement automatic cluster analyses with enrichment analyses in Gene Ontology and in cis elements, along with interactive network analyses with Cytoscape Web. With these updates, COXPRESdb will become a more powerful tool for analyses of functional and regulatory networks of genes in a variety of animal species.",COXPRESdb,0.98220855,NA,0,COXPRESdb,0.98220855,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +31979981,http://web.iitm.ac.in/bioinfo2/cpad2/index.html,"CPAD 2.0: a repository of curated experimental data on aggregating proteins and peptides. The Curated Protein Aggregation Database (CPAD) is a manually curated and open-access database dedicated to providing comprehensive information related to mechanistic, kinetic and structural aspects of protein and peptide aggregation. The database has been updated to CPAD 2.0 by significantly expanding datasets and improving the user-interface. Key features of CPAD 2.0 are (i) 83,098 data points on aggregation kinetics experiments, (ii) 565 structures related to aggregation, which are classified into proteins, fibrils, and protein-ligand complexes, (iii) 2031 aggregating/non-aggregating peptides with pre-calculated aggregation properties, and (iv) 912 aggregation-prone regions in amyloidogenic proteins. This database will help the scientific community (a) by facilitating research leading to improved understanding of protein aggregation, (b) by helping develop, validate and benchmark mechanistic and kinetic models of protein aggregation, and (c) by assisting experimentalists with design of their investigations and dissemination of data generated by their studies. CPAD 2.0 can be accessed at https://web.iitm.ac.in/bioinfo2/cpad2/index.html.",CPAD,0.985166868,Curated Protein Aggregation Database,0.962279081,CPAD,0.985166868,1,NA,27043825,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/24/2020 +27043825,http://www.iitm.ac.in/bioinfo/CPAD,"CPAD, Curated Protein Aggregation Database: A Repository of Manually Curated Experimental Data on Protein and Peptide Aggregation. Accurate distinction between peptide sequences that can form amyloid-fibrils or amorphous √ɬÉ√Ǭé√ɬÇ√Ǭ≤-aggregates, identification of potential aggregation prone regions in proteins, and prediction of change in aggregation rate of a protein upon mutation(s) are critical to research on protein misfolding diseases, such as Alzheimer's and Parkinson's, as well as biotechnological production of protein based therapeutics. We have developed a Curated Protein Aggregation Database (CPAD), which has collected results from experimental studies performed by scientific community aimed at understanding protein/peptide aggregation. CPAD contains more than 2300 experimentally observed aggregation rates upon mutations in known amyloidogenic proteins. Each entry includes numerical values for the following parameters: change in rate of aggregation as measured by fluorescence intensity or turbidity, name and source of the protein, Uniprot and Protein Data Bank codes, single point as well as multiple mutations, and literature citation. The data in CPAD has been supplemented with five different types of additional information: (i) Amyloid fibril forming hexa-peptides, (ii) Amorphous √ɬÉ√Ǭé√ɬÇ√Ǭ≤-aggregating hexa-peptides, (iii) Amyloid fibril forming peptides of different lengths, (iv) Amyloid fibril forming hexa-peptides whose crystal structures are available in the Protein Data Bank (PDB) and (v) Experimentally validated aggregation prone regions found in amyloidogenic proteins. Furthermore, CPAD is linked to other related databases and resources, such as Uniprot, Protein Data Bank, PUBMED, GAP, TANGO, WALTZ etc. We have set up a web interface with different search and display options so that users have the ability to get the data in multiple ways. CPAD is freely available at http://www.iitm.ac.in/bioinfo/CPAD/. The potential applications of CPAD have also been discussed.",CPAD,0.964463413,Curated Protein Aggregation Database,0.924321791,CPAD,0.964463413,1,NA,31979981,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,4/4/2016 +21269480,http://cpass.unl.edu,"Searching the protein structure database for ligand-binding site similarities using CPASS v.2. Background A recent analysis of protein sequences deposited in the NCBI RefSeq database indicates that ~8.5 million protein sequences are encoded in prokaryotic and eukaryotic genomes, where ~30% are explicitly annotated as ""hypothetical"" or ""uncharacterized"" protein. Our Comparison of Protein Active-Site Structures (CPASS v.2) database and software compares the sequence and structural characteristics of experimentally determined ligand binding sites to infer a functional relationship in the absence of global sequence or structure similarity. CPASS is an important component of our Functional Annotation Screening Technology by NMR (FAST-NMR) protocol and has been successfully applied to aid the annotation of a number of proteins of unknown function. Findings We report a major upgrade to our CPASS software and database that significantly improves its broad utility. CPASS v.2 is designed with a layered architecture to increase flexibility and portability that also enables job distribution over the Open Science Grid (OSG) to increase speed. Similarly, the CPASS interface was enhanced to provide more user flexibility in submitting a CPASS query. CPASS v.2 now allows for both automatic and manual definition of ligand-binding sites and permits pair-wise, one versus all, one versus list, or list versus list comparisons. Solvent accessible surface area, ligand root-mean square difference, and C√ɬÉ√Ǭé√ɬÇ√Ǭ≤ distances have been incorporated into the CPASS similarity function to improve the quality of the results. The CPASS database has also been updated. Conclusions CPASS v.2 is more than an order of magnitude faster than the original implementation, and allows for multiple simultaneous job submissions. Similarly, the CPASS database of ligand-defined binding sites has increased in size by ~ 38%, dramatically increasing the likelihood of a positive search result. The modification to the CPASS similarity function is effective in reducing CPASS similarity scores for false positives by ~30%, while leaving true positives unaffected. Importantly, receiver operating characteristics (ROC) curves demonstrate the high correlation between CPASS similarity scores and an accurate functional assignment. As indicated by distribution curves, scores √ɬÉ√Ǭ¢√ɬÇ√Ǭâ√ɬÇ√Ǭ• 30% infer a functional similarity. Software URL: http://cpass.unl.edu.",CPASS,0.901464581,ite Structures,0.53467082,CPASS,0.901464581,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/26/2011 +28962356,http://actor.epa.gov/cpcat,"Exploring consumer exposure pathways and patterns of use for chemicals in the environment. Humans are exposed to thousands of chemicals in the workplace, home, and via air, water, food, and soil. A major challenge in estimating chemical exposures is to understand which chemicals are present in these media and microenvironments. Here we describe the Chemical/Product Categories Database (CPCat), a new, publically available (http://actor.epa.gov/cpcat) database of information on chemicals mapped to ""use categories"" describing the usage or function of the chemical. CPCat was created by combining multiple and diverse sources of data on consumer- and industrial-process based chemical uses from regulatory agencies, manufacturers, and retailers in various countries. The database uses a controlled vocabulary of 833 terms and a novel nomenclature to capture and streamline descriptors of chemical use for 43,596 chemicals from the various sources. Examples of potential applications of CPCat are provided, including identifying chemicals to which children may be exposed and to support prioritization of chemicals for toxicity screening. CPCat is expected to be a valuable resource for regulators, risk assessors, and exposure scientists to identify potential sources of human exposures and exposure pathways, particularly for use in high-throughput chemical exposure assessment.",CPCat,0.98875711,Chemical/Product Categories Database,0.958640075,CPCat,0.98875711,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/2/2015 +25861964,http://www.ebi.ac.uk/fg/sym,"Cellular phenotype database: a repository for systems microscopy data. Motivation The Cellular Phenotype Database (CPD) is a repository for data derived from high-throughput systems microscopy studies. The aims of this resource are: (i) to provide easy access to cellular phenotype and molecular localization data for the broader research community; (ii) to facilitate integration of independent phenotypic studies by means of data aggregation techniques, including use of an ontology and (iii) to facilitate development of analytical methods in this field. Results In this article we present CPD, its data structure and user interface, propose a minimal set of information describing RNA interference experiments, and suggest a generic schema for management and aggregation of outputs from phenotypic or molecular localization experiments. The database has a flexible structure for management of data from heterogeneous sources of systems microscopy experimental outputs generated by a variety of protocols and technologies and can be queried by gene, reagent, gene attribute, study keywords, phenotype or ontology terms. Availability and implementation CPD is developed as part of the Systems Microscopy Network of Excellence and is accessible at http://www.ebi.ac.uk/fg/sym. Contact jes@ebi.ac.uk or ugis@ebi.ac.uk Supplementary information Supplementary data are available at Bioinformatics online.",CPD,0.994626105,Cellular Phenotype Database,0.898467913,CPD,0.994626105,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/9/2015 +22120664,http://cpgr.plantbiology.msu.edu,"The Comprehensive Phytopathogen Genomics Resource: a web-based resource for data-mining plant pathogen genomes. The Comprehensive Phytopathogen Genomics Resource (CPGR) provides a web-based portal for plant pathologists and diagnosticians to view the genome and trancriptome sequence status of 806 bacterial, fungal, oomycete, nematode, viral and viroid plant pathogens. Tools are available to search and analyze annotated genome sequences of 74 bacterial, fungal and oomycete pathogens. Oomycete and fungal genomes are obtained directly from GenBank, whereas bacterial genome sequences are downloaded from the A Systematic Annotation Package (ASAP) database that provides curation of genomes using comparative approaches. Curated lists of bacterial genes relevant to pathogenicity and avirulence are also provided. The Plant Pathogen Transcript Assemblies Database provides annotated assemblies of the transcribed regions of 82 eukaryotic genomes from publicly available single pass Expressed Sequence Tags. Data-mining tools are provided along with tools to create candidate diagnostic markers, an emerging use for genomic sequence data in plant pathology. The Plant Pathogen Ribosomal DNA (rDNA) database is a resource for pathogens that lack genome or transcriptome data sets and contains 131√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ755 rDNA sequences from GenBank for 17√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ613 species identified as plant pathogens and related genera. Database URL: http://cpgr.plantbiology.msu.edu.",CPGR,0.99133648,Comprehensive Phytopathogen Genomics Resource,0.985549808,CPGR,0.99133648,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/26/2011 +24214993,http://cplm.biocuckoo.org,"CPLM: a database of protein lysine modifications. We reported an integrated database of Compendium of Protein Lysine Modifications (CPLM; http://cplm.biocuckoo.org) for protein lysine modifications (PLMs), which occur at active √ɬÉ√Ǭé√ɬÇ√Ǭµ-amino groups of specific lysine residues in proteins and are critical for orchestrating various biological processes. The CPLM database was updated from our previously developed database of Compendium of Protein Lysine Acetylation (CPLA), which contained 7151 lysine acetylation sites in 3311 proteins. Here, we manually collected experimentally identified substrates and sites for 12 types of PLMs, including acetylation, ubiquitination, sumoylation, methylation, butyrylation, crotonylation, glycation, malonylation, phosphoglycerylation, propionylation, succinylation and pupylation. In total, the CPLM database contained 203,972 modification events on 189,919 modified lysines in 45,748 proteins for 122 species. With the dataset, we totally identified 76 types of co-occurrences of various PLMs on the same lysine residues, and the most abundant PLM crosstalk is between acetylation and ubiquitination. Up to 53.5% of acetylation and 33.1% of ubiquitination events co-occur at 10 746 lysine sites. Thus, the various PLM crosstalks suggested that a considerable proportion of lysines were competitively and dynamically regulated in a complicated manner. Taken together, the CPLM database can serve as a useful resource for further research of PLMs.",CPLM,0.995593056,Compendium of Protein Lysine Modifications,0.978943653,CPLM,0.995593056,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2013 +26586798,http://crdd.osdd.net/raghava/cppsite,"CPPsite 2.0: a repository of experimentally validated cell-penetrating peptides. CPPsite 2.0 (http://crdd.osdd.net/raghava/cppsite/) is an updated version of manually curated database (CPPsite) of cell-penetrating peptides (CPPs). The current version holds around 1850 peptide entries, which is nearly two times than the entries in the previous version. The updated data were curated from research papers and patents published in last three years. It was observed that most of the CPPs discovered/ tested, in last three years, have diverse chemical modifications (e.g. non-natural residues, linkers, lipid moieties, etc.). We have compiled this information on chemical modifications systematically in the updated version of the database. In order to understand the structure-function relationship of these peptides, we predicted tertiary structure of CPPs, possessing both modified and natural residues, using state-of-the-art techniques. CPPsite 2.0 also maintains information about model systems (in vitro/in vivo) used for CPP evaluation and different type of cargoes (e.g. nucleic acid, protein, nanoparticles, etc.) delivered by these peptides. In order to assist a wide range of users, we developed a user-friendly responsive website, with various tools, suitable for smartphone, tablet and desktop users. In conclusion, CPPsite 2.0 provides significant improvements over the previous version in terms of data content.",CPPsite,0.992566049,NA,0,CPPsite,0.992566049,1,NA,33186582,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/19/2015 +33186582,http://webs.iiitd.edu.in/raghava/cppsite,"Cppsite 2.0: An Available Database of Experimentally Validated Cell-Penetrating Peptides Predicting their Secondary and Tertiary Structures. One of the biggest barriers in drug and vaccine development is to find an effective delivery system. Cell-penetrating peptides (CPPs) play a crucial role for delivery of biological cargoes and pass them through the membranes. Several databases have been developed for therapeutic peptides as potential drug candidates and delivery vehicles. A rapid growth has occurred in many patents and research articles on CPPs as therapeutic peptides. To save time and cost in laboratories, prediction and design of CPPs before in vitro/in vivo experiments using computational methods and online web servers are rational. Various online web servers which provide prediction of CPPs including CellPPD, CPPpred, CPPred-RF and MLCPP, and also different curated databases that present validated information of CPPs such as CPPsite 2.0 have been developed up to now. Two methods including CellPPD and CPPpred were applied to predict and design potent CPPs. CPPsite 2.0 is a user-friendly updated database that provides various information about CPPs and contains 1855 entries. This database provides comprehensive information on experimentally tested CPPs and prediction of their secondary and tertiary structures to realize their structure-function relationship. Furthermore, each entry presents information of a CPP including chirality, origin, nature of peptide, sub-cellular localization, uptake mechanism and efficiency, amino acid composition, hydrophobicity, and physicochemical properties. One of main goals of CPPsite 2.0 database is to provide the latest datasets of CPPs for analysis and development of CPP prediction methods. CPPsite 2.0 is freely available at https://webs.iiitd.edu.in/raghava/cppsite.",CPPsite,0.910450459,NA,0,CPPsite,0.910450459,1,NA,26586798,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/10/2020 +26867747,http://assays.cancer.gov,"Using the CPTAC Assay Portal to Identify and Implement Highly Characterized Targeted Proteomics Assays. The Clinical Proteomic Tumor Analysis Consortium (CPTAC) of the National Cancer Institute (NCI) has launched an Assay Portal (http://assays.cancer.gov) to serve as an open-source repository of well-characterized targeted proteomic assays. The portal is designed to curate and disseminate highly characterized, targeted mass spectrometry (MS)-based assays by providing detailed assay performance characterization data, standard operating procedures, and access to reagents. Assay content is accessed via the portal through queries to find assays targeting proteins associated with specific cellular pathways, protein complexes, or specific chromosomal regions. The position of the peptide analytes for which there are available assays are mapped relative to other features of interest in the protein, such as sequence domains, isoforms, single nucleotide polymorphisms, and posttranslational modifications. The overarching goals are to enable robust quantification of all human proteins and to standardize the quantification of targeted MS-based assays to ultimately enable harmonization of results over time and across laboratories.",CPTAC,0.764862835,Clinical Proteomic Tumor,0.639635273,CPTAC,0.764862835,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2016 +24253304,"http://compbio.tongji.edu.cn/cr, http://cistrome.org/cr","CR Cistrome: a ChIP-Seq database for chromatin regulators and histone modification linkages in human and mouse. Diversified histone modifications (HMs) are essential epigenetic features. They play important roles in fundamental biological processes including transcription, DNA repair and DNA replication. Chromatin regulators (CRs), which are indispensable in epigenetics, can mediate HMs to adjust chromatin structures and functions. With the development of ChIP-Seq technology, there is an opportunity to study CR and HM profiles at the whole-genome scale. However, no specific resource for the integration of CR ChIP-Seq data or CR-HM ChIP-Seq linkage pairs is currently available. Therefore, we constructed the CR Cistrome database, available online at http://compbio.tongji.edu.cn/cr and http://cistrome.org/cr/, to further elucidate CR functions and CR-HM linkages. Within this database, we collected all publicly available ChIP-Seq data on CRs in human and mouse and categorized the data into four cohorts: the reader, writer, eraser and remodeler cohorts, together with curated introductions and ChIP-Seq data analysis results. For the HM readers, writers and erasers, we provided further ChIP-Seq analysis data for the targeted HMs and schematized the relationships between them. We believe CR Cistrome is a valuable resource for the epigenetics community.",CR Cistrome,0.879878566,NA,0,CR Cistrome,0.879878566,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/18/2013 +29036683,http://cis.hku.hk/CR2Cancer,"CR2Cancer: a database for chromatin regulators in human cancer. Chromatin regulators (CRs) can dynamically modulate chromatin architecture to epigenetically regulate gene expression in response to intrinsic and extrinsic signalling cues. Somatic alterations or misexpression of CRs might reprogram the epigenomic landscape of chromatin, which in turn lead to a wide range of common diseases, notably cancer. Here, we present CR2Cancer, a comprehensive annotation and visualization database for CRs in human cancer constructed by high throughput data analysis and literature mining. We collected and integrated genomic, transcriptomic, proteomic, clinical and functional information for over 400 CRs across multiple cancer types. We also built diverse types of CR-associated relations, including cancer type dependent (CR-target and miRNA-CR) and independent (protein-protein interaction and drug-target) ones. Furthermore, we manually curated around 6000 items of aberrant molecular alterations and interactions of CRs in cancer development from 5007 publications. CR2Cancer provides a user-friendly web interface to conveniently browse, search and download data of interest. We believe that this database would become a valuable resource for cancer epigenetics investigation and potential clinical application. CR2Cancer is freely available at http://cis.hku.hk/CR2Cancer.",CR2Cancer,0.981912553,NA,0,CR2Cancer,0.981912553,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31725864,http://bionlp-corpora.sourceforge.net/CRAFT/index.shtml,"Gold-standard ontology-based anatomical annotation in the CRAFT Corpus. . Gold-standard annotated corpora have become important resources for the training and testing of natural-language-processing (NLP) systems designed to support biocuration efforts, and ontologies are increasingly used to facilitate curational consistency and semantic integration across disparate resources. Bringing together the respective power of these, the Colorado Richly Annotated Full-Text (CRAFT) Corpus, a collection of full-length, open-access biomedical journal articles with extensive manually created syntactic, formatting and semantic markup, was previously created and released. This initial public release has already been used in multiple projects to drive development of systems focused on a variety of biocuration, search, visualization, and semantic and syntactic NLP tasks. Building on its demonstrated utility, we have expanded the CRAFT Corpus with a large set of manually created semantic annotations relying on Uberon, an ontology representing anatomical entities and life-cycle stages of multicellular organisms across species as well as types of multicellular organisms defined in terms of life-cycle stage and sexual characteristics. This newly created set of annotations, which has been added for v2.1 of the corpus, is by far the largest publicly available collection of gold-standard anatomical markup and is the first large-scale effort at manual markup of biomedical text relying on the entirety of an anatomical terminology, as opposed to annotation with a small number of high-level anatomical categories, as performed in previous corpora. In addition to presenting and discussing this newly available resource, we apply it to provide a performance baseline for the automatic annotation of anatomical concepts in biomedical text using a prominent concept recognition system. The full corpus, released with a CC BY 3.0 license, may be downloaded from http://bionlp-corpora.sourceforge.net/CRAFT/index.shtml. Database URL: http://bionlp-corpora.sourceforge.net/CRAFT/index.shtml.",CRAFT,0.874941985,Annotated,0.625489712,CRAFT,0.874941985,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2017 +26862144,http://14.139.227.92/mkumar/cragdb,"CrAgDb--a database of annotated chaperone repertoire in archaeal genomes. . Chaperones are a diverse class of ubiquitous proteins that assist other cellular proteins in folding correctly and maintaining their native structure. Many different chaperones cooperate to constitute the 'proteostasis' machinery in the cells. It has been proposed earlier that archaeal organisms could be ideal model systems for deciphering the basic functioning of the 'protein folding machinery' in higher eukaryotes. Several chaperone families have been characterized in archaea over the years but mostly one protein at a time, making it difficult to decipher the composition and mechanistics of the protein folding system as a whole. In order to deal with these lacunae, we have developed a database of all archaeal chaperone proteins, CrAgDb (Chaperone repertoire in Archaeal genomes). The data have been presented in a systematic way with intuitive browse and search facilities for easy retrieval of information. Access to these curated datasets should expedite large-scale analysis of archaeal chaperone networks and significantly advance our understanding of operation and regulation of the protein folding machinery in archaea. Researchers could then translate this knowledge to comprehend the more complex protein folding pathways in eukaryotic systems. The database is freely available at http://14.139.227.92/mkumar/cragdb/.",CrAgDb,0.977069199,haperone repertoire in Archaeal,0.644523211,CrAgDb,0.977069199,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/8/2016 +26450948,http://bioinfo.au-kbc.org.in/ngs/ngshome.html,"CRCDA--Comprehensive resources for cancer NGS data analysis. . Next generation sequencing (NGS) innovations put a compelling landmark in life science and changed the direction of research in clinical oncology with its productivity to diagnose and treat cancer. The aim of our portal comprehensive resources for cancer NGS data analysis (CRCDA) is to provide a collection of different NGS tools and pipelines under diverse classes with cancer pathways and databases and furthermore, literature information from PubMed. The literature data was constrained to 18 most common cancer types such as breast cancer, colon cancer and other cancers that exhibit in worldwide population. NGS-cancer tools for the convenience have been categorized into cancer genomics, cancer transcriptomics, cancer epigenomics, quality control and visualization. Pipelines for variant detection, quality control and data analysis were listed to provide out-of-the box solution for NGS data analysis, which may help researchers to overcome challenges in selecting and configuring individual tools for analysing exome, whole genome and transcriptome data. An extensive search page was developed that can be queried by using (i) type of data [literature, gene data and sequence read archive (SRA) data] and (ii) type of cancer (selected based on global incidence and accessibility of data). For each category of analysis, variety of tools are available and the biggest challenge is in searching and using the right tool for the right application. The objective of the work is collecting tools in each category available at various places and arranging the tools and other data in a simple and user-friendly manner for biologists and oncologists to find information easier. To the best of our knowledge, we have collected and presented a comprehensive package of most of the resources available in cancer for NGS data analysis. Given these factors, we believe that this website will be an useful resource to the NGS research community working on cancer. Database URL: http://bioinfo.au-kbc.org.in/ngs/ngshome.html.",CRCDA,0.995928064,resources for cancer NGS data analysis,0.909173157,CRCDA,0.995928064,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/8/2015 +23019048,http://www.chs.med.ed.ac.uk/CRCgene,"Systematic meta-analyses and field synopsis of genetic association studies in colorectal cancer. Background Colorectal cancer is a major global public health problem, with approximately 950,000 patients newly diagnosed each year. We report the first comprehensive field synopsis and creation of a parallel publicly available and regularly updated database (CRCgene) that catalogs all genetic association studies on colorectal cancer (http://www.chs.med.ed.ac.uk/CRCgene/). Methods We performed two independent systematic reviews, reviewing 10 145 titles, then collated and extracted data from 635 publications reporting on 445 polymorphisms in 110 different genes. We carried out meta-analyses to derive summary effect estimates for 92 polymorphisms in 64 different genes. For assessing the credibility of associations, we applied the Venice criteria and the Bayesian False Discovery Probability (BFDP) test. Results We consider 16 independent variants at 13 loci (MUTYH, MTHFR, SMAD7, and common variants tagging the loci 8q24, 8q23.3, 11q23.1, 14q22.2, 1q41, 20p12.3, 20q13.33, 3q26.2, 16q22.1, and 19q13.1) to have the most highly credible associations with colorectal cancer, with all variants except those in MUTYH and 19q13.1 reaching genome-wide statistical significance in at least one meta-analysis model. We identified less-credible (higher heterogeneity, lower statistical power, BFDP >0.2) associations with 23 more variants at 22 loci. The meta-analyses of a further 20 variants for which associations have previously been reported found no evidence to support these as true associations. Conclusion The CRCgene database provides the context for genetic association data to be interpreted appropriately and helps inform future research direction.",CRCgene,0.993980265,NA,0,CRCgene,0.993980265,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/26/2012 +23868908,http://www-cryst.bioc.cam.ac.uk/credo,"CREDO: a structural interactomics database for drug discovery. CREDO is a unique relational database storing all pairwise atomic interactions of inter- as well as intra-molecular contacts between small molecules and macromolecules found in experimentally determined structures from the Protein Data Bank. These interactions are integrated with further chemical and biological data. The database implements useful data structures and algorithms such as cheminformatics routines to create a comprehensive analysis platform for drug discovery. The database can be accessed through a web-based interface, downloads of data sets and web services at http://www-cryst.bioc.cam.ac.uk/credo. Database URL: http://www-cryst.bioc.cam.ac.uk/credo.",CREDO,0.997011185,NA,0,CREDO,0.997011185,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/18/2013 +34482425,http://www.informatics.jax.org/home/recombinase,"Annotated expression and activity data for murine recombinase alleles and transgenes: the CrePortal resource. Recombinase alleles and transgenes can be used to facilitate spatio-temporal specificity of gene disruption or transgene expression. However, the versatility of this in vivo recombination system relies on having detailed and accurate characterization of recombinase expression and activity to enable selection of the appropriate allele or transgene. The CrePortal ( http://www.informatics.jax.org/home/recombinase ) leverages the informatics infrastructure of Mouse Genome Informatics to integrate data from the scientific literature, direct data submissions from the scientific community at-large, and from major projects developing new recombinase lines and characterizing recombinase expression and specificity patterns. Searching the CrePortal by recombinase activity or specific recombinase gene driver provides users with a recombinase alleles and transgenes activity tissue summary and matrix comparison of gene expression and recombinase activity with links to generation details, a recombinase activity grid, and associated phenotype annotations. Future improvements will add cell type-based activity annotations. The CrePortal provides a comprehensive presentation of recombinase allele and transgene data to assist researchers in selection of the recombinase allele or transgene based on where and when recombination is desired.",CrePortal,0.977086961,NA,0,CrePortal,0.977086961,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/4/2021 +26855883,http://cressint.cchmc.org,"CressInt: a user-friendly web resource for genome-scale exploration of gene regulation in Arabidopsis thaliana. The thale cress Arabidopsis thaliana is a powerful model organism for studying a wide variety of biological processes. Recent advances in sequencing technology have resulted in a wealth of information describing numerous aspects of A. thaliana genome function. However, there is a relative paucity of computational systems for efficiently and effectively using these data to create testable hypotheses. We present CressInt, a user-friendly web resource for exploring gene regulatory mechanisms in A. thaliana on a genomic scale. The CressInt system incorporates a variety of genome-wide data types relevant to gene regulation, including transcription factor (TF) binding site models, ChIP-seq, DNase-seq, eQTLs, and GWAS. We demonstrate the utility of CressInt by showing how the system can be used to (1) Identify TFs binding to the promoter of a gene of interest; (2) identify genetic variants that are likely to impact TF binding based on a ChIP-seq dataset; and (3) identify specific TFs whose binding might be impacted by phenotype-associated variants. CressInt is freely available at http://cressint.cchmc.org.",CressInt,0.996792436,NA,0,CressInt,0.996792436,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2015 +23668932,http://crezoo.crt-dresden.de,"The zebrafish CreZoo: an easy-to-handle database for novel CreER(T2)-driver lines. We report a new open access database, the zebrafish CreZoo ( http://crezoo.crt-dresden.de ), which contains novel CreER(T2)-driver lines that express Cre fused to the mutated human ligand-binding domain of the estrogen receptor (CreER(T2)) in several tissues. Recently, the conditional Cre/loxP technology has been added to the toolbox for the precise manipulation of the zebrafish genome, but currently the number of CreER(T2)-driver lines is limited. To enlarge the pool of existing CreER(T2)-driver lines, we conducted a genome-wide screen using a gene trap cassette comprising a splice acceptor and an mCherry-tagged variant of CreER(T2). All molecular and expression data obtained in this screen are summarized in the CreZoo database, which currently comprises an inventory of about 47 Cre-driver lines expressing CreER(T2) in a cell- and tissue-specific manner during development and adulthood. Combined with other Cre-dependent effector lines, the CreZoo will be a great tool to manipulate the zebrafish genome.",CreZoo,0.978289962,NA,0,CreZoo,0.978289962,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/13/2013 +33010154,http://crispview.weililab.org,"CRISP-view: a database of functional genetic screens spanning multiple phenotypes. High-throughput genetic screening based on CRISPR/Cas9 or RNA-interference (RNAi) enables the exploration of genes associated with the phenotype of interest on a large scale. The rapid accumulation of public available genetic screening data provides a wealth of knowledge about genotype-to-phenotype relationships and a valuable resource for the systematic analysis of gene functions. Here we present CRISP-view, a comprehensive database of CRISPR/Cas9 and RNAi screening datasets that span multiple phenotypes, including in vitro and in vivo cell proliferation and viability, response to cancer immunotherapy, virus response, protein expression, etc. By 22√ɬÉ√ǬÇ√ɬÇ√Ǭ†September√ɬÉ√ǬÇ√ɬÇ√Ǭ†2020, CRISP-view has collected 10 321√ɬÉ√ǬÇ√ɬÇ√Ǭ†human samples and 825√ɬÉ√ǬÇ√ɬÇ√Ǭ†mouse samples from 167√ɬÉ√ǬÇ√ɬÇ√Ǭ†papers. All the datasets have been curated, annotated, and processed by a standard MAGeCK-VISPR analysis pipeline with quality control (QC) metrics. We also developed a user-friendly webserver to visualize, explore, and search these datasets. The webserver is freely available at http://crispview.weililab.org.",CRISP-view,0.996309042,NA,0,CRISP-view,0.996309042,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +31624845,http://crisprcas.i2bc.paris-saclay.fr,"CRISPRCasdb a successor of CRISPRdb containing CRISPR arrays and cas genes from complete genome sequences, and tools to download and query lists of repeats and spacers. In Archaea and Bacteria, the arrays called CRISPRs for 'clustered regularly interspaced short palindromic repeats' and the CRISPR associated genes or cas provide adaptive immunity against viruses, plasmids and transposable elements. Short sequences called spacers, corresponding to fragments of invading DNA, are stored in-between repeated sequences. The CRISPR-Cas systems target sequences homologous to spacers leading to their degradation. To facilitate investigations of CRISPRs, we developed 12 years ago a website holding the CRISPRdb. We now propose CRISPRCasdb, a completely new version giving access to both CRISPRs and cas genes. We used CRISPRCasFinder, a program that identifies CRISPR arrays and cas√ɬÉ√ǬÇ√ɬÇ√Ǭ†genes and determine the system's type and subtype, to process public whole genome assemblies. Strains are displayed either in an alphabetic list or in taxonomic order. The database is part of the CRISPR-Cas++ website which also offers the possibility to analyse submitted sequences and to download programs. A BLAST search against lists of repeats and spacers extracted from the database is proposed. To date, 16 990 complete prokaryote genomes (16 650 bacteria from 2973 species and 340 archaea from 300 species) are included. CRISPR-Cas systems were found in 36% of Bacteria and 75% of Archaea strains. CRISPRCasdb is freely accessible at https://crisprcas.i2bc.paris-saclay.fr/.",CRISPRCasdb,0.988769293,NA,0,CRISPRCasdb,0.988769293,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +30285246,"http://www.crisprlnc.org, http://crisprlnc.xtbg.ac.cn","CRISPRlnc: a manually curated database of validated sgRNAs for lncRNAs. The CRISPR/Cas9 system, as a revolutionary genome editing tool for all areas of molecular biology, provides new opportunities for research on lncRNA's function. However, designing a CRISPR/Cas9 single guide RNA (sgRNA) for lncRNA is not easy with an unwarrantable effectiveness. Thus, it is worthy of collecting validated sgRNAs, to assist in efficiently choosing sgRNA with an expected activity. CRISPRlnc (http://www.crisprlnc.org or http://crisprlnc.xtbg.ac.cn) is a manually curated database of validated CRISPR/Cas9 sgRNAs for lncRNAs from all species. After manually reviewing more than 200 published literature, the current version of CRISPRlnc contains 305 lncRNAs and 2102 validated sgRNAs across eight species, including mammalian, insect and plant. We handled the ID, position in the genome, sequence and functional description of these lncRNAs, as well as the sequence, protoacceptor-motif (PAM), CRISPR type and validity of their paired sgRNAs. In CRISPRlnc, we provided the tools for browsing, searching and downloading data, as well as online BLAST service and genome browse server. As the first database against the validated sgRNAs of lncRNAs, CRISPRlnc will provide a new and powerful platform to promote CRISPR/Cas9 applications for future functional studies of lncRNAs.",CRISPRlnc,0.995125651,NA,0,CRISPRlnc,0.995125651,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33084893,http://www.crisprsql.com,"crisprSQL: a novel database platform for CRISPR/Cas off-target cleavage assays. With ongoing development of the CRISPR/Cas programmable nuclease system, applications in the area of in vivo therapeutic gene editing are increasingly within reach. However, non-negligible off-target effects remain a major concern for clinical applications. Even though a multitude of off-target cleavage datasets have been published, a comprehensive, transparent overview tool has not yet been established. Here, we present crisprSQL (http://www.crisprsql.com), an interactive and bioinformatically enhanced collection of CRISPR/Cas9 off-target cleavage studies aimed at enriching the fields of cleavage profiling, gene editing safety analysis and transcriptomics. The current version of crisprSQL contains cleavage data from 144 guide RNAs on 25,632 guide-target pairs from human and rodent cell lines, with interaction-specific references to epigenetic markers and gene names. The first curated database of this standard, it promises to enhance safety quantification research, inform experiment design and fuel development of computational off-target prediction algorithms.",crisprSQL,0.997429252,NA,0,crisprSQL,0.997429252,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +26438539,http://research.nhgri.nih.gov/CRISPRz,"CRISPRz: a database of zebrafish validated sgRNAs. CRISPRz (http://research.nhgri.nih.gov/CRISPRz/) is a database of CRISPR/Cas9 target sequences that have been experimentally validated in zebrafish. Programmable RNA-guided CRISPR/Cas9 has recently emerged as a simple and efficient genome editing method in various cell types and organisms, including zebrafish. Because the technique is so easy and efficient in zebrafish, the most valuable asset is no longer a mutated fish (which has distribution challenges), but rather a CRISPR/Cas9 target sequence to the gene confirmed to have high mutagenic efficiency. With a highly active CRISPR target, a mutant fish can be quickly replicated in any genetic background anywhere in the world. However, sgRNA's vary widely in their activity and models for predicting target activity are imperfect. Thus, it is very useful to collect in one place validated CRISPR target sequences with their relative mutagenic activities. A researcher could then select a target of interest in the database with an expected activity. Here, we report the development of CRISPRz, a database of validated zebrafish CRISPR target sites collected from published sources, as well as from our own in-house large-scale mutagenesis project. CRISPRz can be searched using multiple inputs such as ZFIN IDs, accession number, UniGene ID, or gene symbols from zebrafish, human and mouse.",CRISPRz,0.997380316,NA,0,CRISPRz,0.997380316,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/4/2015 +30598113,http://crlnc.xtbg.ac.cn,"CRlncRNA: a manually curated database of cancer-related long non-coding RNAs with experimental proof of functions on clinicopathological and molecular features. Background Recent studies demonstrated that long non-coding RNAs (lncRNAs) could be intricately implicated in cancer-related molecular networks, and related to cancer occurrence, development and prognosis. However, clinicopathological and molecular features for these cancer-related lncRNAs, which are very important in bridging lncRNA basic research with clinical research, fail to well settle to integration. Results After manually reviewing more than 2500 published literature, we collected the cancer-related lncRNAs with the experimental proof of functions. By integrating from literature and public databases, we constructed CRlncRNA, a database of cancer-related lncRNAs. The current version of CRlncRNA embodied 355 entries of cancer-related lncRNAs, covering 1072 cancer-lncRNA associations regarding to 76 types of cancer, and 1238 interactions with different RNAs and proteins. We further annotated clinicopathological features of these lncRNAs, such as the clinical stages and the cancer hallmarks. We also provided tools for data browsing, searching and download, as well as online BLAST, genome browser and gene network visualization service. Conclusions CRlncRNA is a manually curated database for retrieving clinicopathological and molecular features of cancer-related lncRNAs supported by highly reliable evidences. CRlncRNA aims to provide a bridge from lncRNA basic research to clinical research. The lncRNA dataset collected by CRlncRNA can be used as a golden standard dataset for the prospective experimental and in-silico studies of cancer-related lncRNAs. CRlncRNA is freely available for all users at http://crlnc.xtbg.ac.cn .",CRlncRNA,0.995219409,NA,0,CRlncRNA,0.995219409,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/31/2018 +33529633,http://crmarker.hnnu.edu.cn,"CRMarker: A manually curated comprehensive resource of cancer RNA markers. Biomolecular markers have extremely important value for cancer research and treatment. However, as far as we know, there are still no searchable and predictable resources focusing on multiple classes of RNA molecular markers in cancers. Herein, we developed CRMarker, a manually curated comprehensive repository of cancer RNA markers. In the current release, CRMarker v1.1 consists of 5489 ""known"" cancer RNA markers based on 8756 valid publications in PubMed, including 2878 mRNAs (genes), 1314 miRNAs, 1097 lncRNAs and 200 circRNAs, and involving two functional molecules (diagnosis and prognosis), 21 organisms and 154 cancers. The search results provided by the database are comprehensive, including 11 items such as RNA molecule expression and risk level, type of tissue or sample, cancer subtype, reference type, etc. Moreover, CRMarker also provides more than 18,000 potential cancer RNA markers, which are predicted based on ""guilt-by-association"" analysis of the above-mentioned ""known"" RNA markers and three molecular interaction networks, and survival analysis of 18 gene expression data sets with survival data. CRMarker v1.1 has a friendly interface and is freely available online at http://crmarker.hnnu.edu.cn/. We aim to build a comprehensive platform that is convenient for cancer researchers and clinicians to inquire and retrieve.",CRMarker,0.994199812,NA,0,CRMarker,0.994199812,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/30/2021 +26602695,http://syslab4.nchu.edu.tw/CRN,"Cancer RNA-Seq Nexus: a database of phenotype-specific transcriptome profiling in cancer cells. The genome-wide transcriptome profiling of cancerous and normal tissue samples can provide insights into the molecular mechanisms of cancer initiation and progression. RNA Sequencing (RNA-Seq) is a revolutionary tool that has been used extensively in cancer research. However, no existing RNA-Seq database provides all of the following features: (i) large-scale and comprehensive data archives and analyses, including coding-transcript profiling, long non-coding RNA (lncRNA) profiling and coexpression networks; (ii) phenotype-oriented data organization and searching and (iii) the visualization of expression profiles, differential expression and regulatory networks. We have constructed the first public database that meets these criteria, the Cancer RNA-Seq Nexus (CRN, http://syslab4.nchu.edu.tw/CRN). CRN has a user-friendly web interface designed to facilitate cancer research and personalized medicine. It is an open resource for intuitive data exploration, providing coding-transcript/lncRNA expression profiles to support researchers generating new hypotheses in cancer research and personalized medicine.",CRN,0.987320423,Cancer RNA-Seq Nexus,0.908770829,CRN,0.987320423,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/23/2015 +30967897,http://bioinformatics.cau.edu.cn/croFGD,"croFGD: Catharanthus roseus Functional Genomics Database. Catharanthus roseus is a medicinal plant, which can produce monoterpene indole alkaloid (MIA) metabolites with biological activity and is rich in vinblastine and vincristine. With release of the scaffolded genome sequence of C. roseus, it is necessary to annotate gene functions on the whole-genome level. Recently, 53 RNA-seq datasets are available in public with different tissues (flower, root, leaf, seedling, and shoot) and different treatments (MeJA, PnWB infection and yeast elicitor). We used in-house data process pipeline with the combination of PCC and MR algorithms to construct a co-expression network exploring multi-dimensional gene expression (global, tissue preferential, and treat response) through multi-layered approaches. In the meanwhile, we added miRNA-target pairs, predicted PPI pairs into the network and provided several tools such as gene set enrichment analysis, functional module enrichment analysis, and motif analysis for functional prediction of the co-expression genes. Finally, we have constructed an online croFGD database (http://bioinformatics.cau.edu.cn/croFGD/). We hope croFGD can help the communities to study the C. roseus functional genomics and make novel discoveries about key genes involved in some important biological processes.",croFGD,0.99771744,Catharanthus roseus Functional Genomics Database,0.82261859,croFGD,0.99771744,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/22/2019 +26556651,http://crop-PAL.org,"Finding the Subcellular Location of Barley, Wheat, Rice and Maize Proteins: The Compendium of Crop Proteins with Annotated Locations (cropPAL). Barley, wheat, rice and maize provide the bulk of human nutrition and have extensive industrial use as agricultural products. The genomes of these crops each contains >40,000 genes encoding proteins; however, the major genome databases for these species lack annotation information of protein subcellular location for >80% of these gene products. We address this gap, by constructing the compendium of crop protein subcellular locations called crop Proteins with Annotated Locations (cropPAL). Subcellular location is most commonly determined by fluorescent protein tagging of live cells or mass spectrometry detection in subcellular purifications, but can also be predicted from amino acid sequence or protein expression patterns. The cropPAL database collates 556 published studies, from >300 research institutes in >30 countries that have been previously published, as well as compiling eight pre-computed subcellular predictions for all Hordeum vulgare, Triticum aestivum, Oryza sativa and Zea mays protein sequences. The data collection including metadata for proteins and published studies can be accessed through a search portal http://crop-PAL.org. The subcellular localization information housed in cropPAL helps to depict plant cells as compartmentalized protein networks that can be investigated for improving crop yield and quality, and developing new biotechnological solutions to agricultural challenges.",cropPAL,0.991583467,crop Proteins with Annotated Locations,0.932682865,cropPAL,0.991583467,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/9/2015 +30548723,http://snpdb.appliedbioinformatics.com.au,"CropSNPdb: a database of SNP array data for Brassica crops and hexaploid bread wheat. Advances in sequencing technology have led to a rapid rise in the genomic data available for plants, driving new insights into the evolution, domestication and improvement of crops. Single nucleotide polymorphisms (SNPs) are a major component of crop genomic diversity, and are invaluable as genetic markers in research and breeding programs. High-throughput SNP arrays, or 'SNP chips', can generate reproducible sets of informative SNP markers and have been broadly adopted. Although there are many public repositories for sequencing data, which are routinely uploaded, there are no formal repositories for crop SNP array data. To make SNP array data more easily accessible, we have developed CropSNPdb (http://snpdb.appliedbioinformatics.com.au), a database for SNP array data produced by the Illumina Infinium√ɬÉ√Ǭ¢√ɬÇ√ǬÑ√ɬÇ√Ǭ¢ hexaploid bread wheat (Triticum aestivum) 90K and Brassica 60K arrays. We currently host SNPs from datasets covering 526 Brassica lines and 309 bread wheat lines, and provide search, download and upload utilities for users. CropSNPdb provides a useful repository for these data, which can be applied for a range of genomics and molecular crop-breeding activities.",CropSNPdb,0.997403085,NA,0,CropSNPdb,0.997403085,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/28/2019 +28724888,http://proteinguru.com/crosscheck,"CrossCheck: an open-source web tool for high-throughput screen data analysis. Modern high-throughput screening methods allow researchers to generate large datasets that potentially contain important biological information. However, oftentimes, picking relevant hits from such screens and generating testable hypotheses requires training in bioinformatics and the skills to efficiently perform database mining. There are currently no tools available to general public that allow users to cross-reference their screen datasets with published screen datasets. To this end, we developed CrossCheck, an online platform for high-throughput screen data analysis. CrossCheck is a centralized database that allows effortless comparison of the user-entered list of gene symbols with 16,231 published datasets. These datasets include published data from genome-wide RNAi and CRISPR screens, interactome proteomics and phosphoproteomics screens, cancer mutation databases, low-throughput studies of major cell signaling mediators, such as kinases, E3 ubiquitin ligases and phosphatases, and gene ontological information. Moreover, CrossCheck includes a novel database of predicted protein kinase substrates, which was developed using proteome-wide consensus motif searches. CrossCheck dramatically simplifies high-throughput screen data analysis and enables researchers to dig deep into the published literature and streamline data-driven hypothesis generation. CrossCheck is freely accessible as a web-based application at http://proteinguru.com/crosscheck.",CrossCheck,0.990276933,NA,0,CrossCheck,0.990276933,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/19/2017 +23396301,http://www.crosstope.com.br,"CrossTope: a curate repository of 3D structures of immunogenic peptide: MHC complexes. The CrossTope is a highly curate repository of three-dimensional structures of peptide:major histocompatibility complex (MHC) class I complexes (pMHC-I). The complexes hosted by this databank were obtained in protein databases and by large-scale in silico construction of pMHC-I structures, using a new approach developed by our group. At this moment, the database contains 182 'non-redundant' pMHC-I complexes from two human and two murine alleles. A web server provides interface for database query. The user can download (i) structure coordinate files and (ii) topological and charges distribution maps images from the T-cell receptor-interacting surface of pMHC-I complexes. The retrieved structures and maps can be used to cluster similar epitopes in cross-reactivity approaches, to analyse viral escape mutations in a structural level or even to improve the immunogenicity of tumour antigens. Database URL: http://www.crosstope.com.br.",CrossTope,0.991143823,NA,0,CrossTope,0.991143823,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/8/2013 +34927675,http://www.sysbio.org.cn/CRPMKB,"CRPMKB: a knowledge base of cancer risk prediction models for systematic comparison and personalized applications. . In the era of big data and precision medicine, accurate risk assessment is a prerequisite for the implementation of risk screening and preventive treatment. A large number of studies have focused on the risk of cancer, and related risk prediction models have been constructed, but there is a lack of effective resource integration for systematic comparison and personalized applications. Therefore, the establishment and analysis of the cancer risk prediction model knowledge base (CRPMKB) is of great significance. The current knowledge base contains 802 model data. The model comparison indicates that the accuracy of cancer risk prediction was greatly affected by regional differences, cancer types and model types. We divided the model variables into four categories: environment, behavioral lifestyle, biological genetics and clinical examination, and found that there are differences in the distribution of various variables among different cancer types. Taking 50 genes involved in the lung cancer risk prediction models as an example to perform pathway enrichment analyses and the results showed that these genes were significantly enriched in p53 Signaling and Aryl Hydrocarbon Receptor Signaling pathways which are associated with cancer and specific diseases. In addition, we verified the biological significance of overlapping lung cancer genes via STRING database. CRPMKB was established to provide researchers an online tool for the future personalized model application and developing. This study of CRPMKB suggests that developing more targeted models based on specific demographic characteristics and cancer types will further improve the accuracy of cancer risk model predictions. http://www.sysbio.org.cn/CRPMKB/. Supplementary data are available at Bioinformatics online.",CRPMKB,0.995024562,cancer risk prediction model,0.801454693,CRPMKB,0.995024562,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/20/2021 +31918660,http://cat.sls.cuhk.edu.hk,"A crustacean annotated transcriptome (CAT) database. BACKGROUND:Decapods are an order of crustaceans which includes shrimps, crabs, lobsters and crayfish. They occur worldwide and are of great scientific interest as well as being of ecological and economic importance in fisheries and aquaculture. However, our knowledge of their biology mainly comes from the group which is most closely related to crustaceans - insects. Here we produce a de novo transcriptome database, crustacean annotated transcriptome (CAT) database, spanning multiple tissues and the life stages of seven crustaceans. DESCRIPTION:A total of 71 transcriptome assemblies from six decapod species and a stomatopod species, including the coral shrimp Stenopus hispidus, the cherry shrimp Neocaridina davidi, the redclaw crayfish Cherax quadricarinatus, the spiny lobster Panulirus ornatus, the red king crab Paralithodes camtschaticus, the coconut crab Birgus latro, and the zebra mantis shrimp Lysiosquillina maculata, were generated. Differential gene expression analyses within species were generated as a reference and included in a graphical user interface database at http://cat.sls.cuhk.edu.hk/. Users can carry out gene name searches and also access gene sequences based on a sequence query using the BLAST search function. CONCLUSIONS:The data generated and deposited in this database offers a valuable resource for the further study of these crustaceans, as well as being of use in aquaculture development.",NA,0,crustacean,0.614458144,crustacean,0.614458144,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/9/2020 +29178828,http://qinlab.sls.cuhk.edu.hk/CrusTF,"CrusTF: a comprehensive resource of transcriptomes for evolutionary and functional studies of crustacean transcription factors. Background Crustacea, the second largest subphylum of Arthropoda, includes species of major ecological and economic importance, such as crabs, lobsters, crayfishes, shrimps, and barnacles. With the rapid development of crustacean aquaculture and biodiversity loss, understanding the gene regulatory mechanisms of growth, reproduction, and development of crustaceans is crucial to both aquaculture development and biodiversity conservation of this group of organisms. In these biological processes, transcription factors (TFs) play a vital role in regulating gene expression. However, crustacean transcription factors are still largely unknown, because the lack of complete genome sequences of most crustacean species hampers the studies on their transcriptional regulation on a system-wide scale. Thus, the current TF databases derived from genome sequences contain TF information for only a few crustacean species and are insufficient to elucidate the transcriptional diversity of such a large animal group. Results Our database CrusTF ( http://qinlab.sls.cuhk.edu.hk/CrusTF ) provides comprehensive information for evolutionary and functional studies on the crustacean transcriptional regulatory system. CrusTF fills the knowledge gap of transcriptional regulation in crustaceans by exploring publicly available and newly sequenced transcriptomes of 170 crustacean species and identifying 131,941 TFs within 63 TF families. CrusTF features three categories of information: sequence, function, and evolution of crustacean TFs. The database enables searching, browsing and downloading of crustacean TF sequences. CrusTF infers DNA binding motifs of crustacean TFs, thus facilitating the users to predict potential downstream TF targets. The database also presents evolutionary analyses of crustacean TFs, which improve our understanding of the evolution of transcriptional regulatory systems in crustaceans. Conclusions Given the importance of TF information in evolutionary and functional studies on transcriptional regulatory systems of crustaceans, this database will constitute a key resource for the research community of crustacean biology and evolutionary biology. Moreover, CrusTF serves as a model for the construction of TF database derived from transcriptome data. A similar approach could be applied to other groups of organisms, for which transcriptomes are more readily available than genomes.",CrusTF,0.992500782,NA,0,CrusTF,0.992500782,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/25/2017 +32928113,http://crustybase.org,"CrustyBase: an interactive online database for crustacean transcriptomes. Transcriptome sequencing has opened the field of genomics to a wide variety of researchers, owing to its efficiency, applicability across species and ability to quantify gene expression. The resulting datasets are a rich source of information that can be mined for many years into the future, with each dataset providing a unique angle on a specific context in biology. Maintaining accessibility to this accumulation of data presents quite a challenge for researchers.The primary focus of conventional genomics databases is the storage, navigation and interpretation of sequence data, which is typically classified down to the level of a species or individual. The addition of expression data adds a new dimension to this paradigm - the sampling context. Does gene expression describe different tissues, a temporal distribution or an experimental treatment? These data not only describe an individual, but the biological context surrounding that individual. The structure and utility of a transcriptome database must therefore reflect these attributes. We present an online database which has been designed to maximise the accessibility of crustacean transcriptome data by providing intuitive navigation within and between datasets and instant visualization of gene expression and protein structure.The site is accessible at https://crustybase.org and currently holds 10 datasets from a range of crustacean species. It also allows for upload of novel transcriptome datasets through a simple web interface, allowing the research community to contribute their own data to a pool of shared knowledge.",CrustyBase,0.661278129,NA,0,CrustyBase,0.661278129,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/14/2020 +31452162,"http://cryptodb.org/, http://eupathdb.org","Accessing Cryptosporidium Omic and Isolate Data via CryptoDB.org. Cryptosporidium has historically been a difficult organism to work with, and molecular genomic data for this important pathogen have typically lagged behind other prominent protist pathogens. CryptoDB ( http://cryptodb.org/ ) was launched in 2004 following the appearance of draft genome sequences for both C. parvum and C. hominis. CryptoDB merged with the EuPathDB Bioinformatics Resource Center family of databases ( https://eupathdb.org ) and has been maintained and updated regularly since its establishment. These resources are freely available, are web-based, and permit users to analyze their own sequence data in the context of reference genome sequences in our user workspaces. Advances in technology have greatly facilitated Cryptosporidium research in the last several years greatly enhancing and extending the data and types of data available for this genus. Currently, 13 genome sequences are available for 9 species of Cryptosporidium as well as the distantly related Gregarina niphandrodes and two free-living alveolate outgroups of the Apicomplexa, Chromera velia and Vitrella brassicaformis. Recent years have seen several new genome sequences for both existing and new Cryptosporidium species as well as transcriptomics, proteomics, SNP, and isolate population surveys. This chapter introduces the extensive data mining and visualization capabilities of the EuPathDB software platform and introduces the data types and tools that are currently available for Cryptosporidium. Key features are demonstrated with Cryptosporidium-relevant examples and explanations.",CryptoDB,0.997344255,NA,0,CryptoDB,0.997344255,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +28095366,http://cryptogc.igs.umaryland.edu,"Cryptosporidium hominis gene catalog: a resource for the selection of novel Cryptosporidium vaccine candidates. . Human cryptosporidiosis, caused primarily by Cryptosporidium hominis and a subset of Cryptosporidium parvum, is a major cause of moderate-to-severe diarrhea in children under 5 years of age in developing countries and can lead to nutritional stunting and death. Cryptosporidiosis is particularly severe and potentially lethal in immunocompromised hosts. Biological and technical challenges have impeded traditional vaccinology approaches to identify novel targets for the development of vaccines against C. hominis, the predominant species associated with human disease. We deemed that the existence of genomic resources for multiple species in the genus, including a much-improved genome assembly and annotation for C. hominis, makes a reverse vaccinology approach feasible. To this end, we sought to generate a searchable online resource, termed C. hominis gene catalog, which registers all C. hominis genes and their properties relevant for the identification and prioritization of candidate vaccine antigens, including physical attributes, properties related to antigenic potential and expression data. Using bioinformatic approaches, we identified √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº400 C. hominis genes containing properties typical of surface-exposed antigens, such as predicted glycosylphosphatidylinositol (GPI)-anchor motifs, multiple transmembrane motifs and/or signal peptides targeting the encoded protein to the secretory pathway. This set can be narrowed further, e.g. by focusing on potential GPI-anchored proteins lacking homologs in the human genome, but with homologs in the other Cryptosporidium species for which genomic data are available, and with low amino acid polymorphism. Additional selection criteria related to recombinant expression and purification include minimizing predicted post-translation modifications and potential disulfide bonds. Forty proteins satisfying these criteria were selected from 3745 proteins in the updated C. hominis annotation. The immunogenic potential of a few of these is currently being tested.Database URL: http://cryptogc.igs.umaryland.edu.",hominis,0.382772744,Cryptosporidium hominis,0.555859486,Cryptosporidium hominis,0.555859486,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/19/2016 +25264971,http://cs.psych.ac.cn,"Applying differentially expressed genes from rodent models of chronic stress to research of stress-related disease: an online database. Objective To systematically collect differentially expressed genes (DEGs) from rodent models of chronic stress (CS) and apply them to research of stress-related disease. CS is an important environmental factor that may affect numerous complex diseases. Its relevant DEGs identified from rodent models provide valuable information for understanding the mechanisms underlying stress-related diseases. Currently, no suitable data tool have been developed to use such data. Methods We systematically searched and reviewed publications in PubMed. CS-DEGs were collected from original studies that reported gene expression statuses in rodent models of CS. CS disease overlapping genes, CS pathways and CS pathway clusters, and CS regulatory elements were analyzed on the basis of CS-DEGs. An online database was developed to store and manage curated CS-DEGs and analyzed data. Results A total of 2956 CS-DEGs were collected from 195 articles, among which 815 genes are shared among CS and seven stress-related diseases. Nine hundred twenty-seven CS pathway clusters were identified. Three types of CS regulatory elements are predicted for all CS genes. An online database (CS-DEGs), freely available at http://cs.psych.ac.cn, includes and presents CS-DEGs and all analyzed data. Conclusions CS-DEGs is the first gene database on CS research. It enables researchers to apply rodent expression data in candidate gene and pathway identification for stress-related disease study.",CS-DEGs,0.965403174,NA,0,CS-DEGs,0.965403174,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/1/2014 +24319146,http://www.ebi.ac.uk/thornton-srv/databases/CSA,"The Catalytic Site Atlas 2.0: cataloging catalytic sites and residues identified in enzymes. Understanding which are the catalytic residues in an enzyme and what function they perform is crucial to many biology studies, particularly those leading to new therapeutics and enzyme design. The original version of the Catalytic Site Atlas (CSA) (http://www.ebi.ac.uk/thornton-srv/databases/CSA) published in 2004, which catalogs the residues involved in enzyme catalysis in experimentally determined protein structures, had only 177 curated entries and employed a simplistic approach to expanding these annotations to homologous enzyme structures. Here we present a new version of the CSA (CSA 2.0), which greatly expands the number of both curated (968) and automatically annotated catalytic sites in enzyme structures, utilizing a new method for annotation transfer. The curated entries are used, along with the variation in residue type from the sequence comparison, to generate 3D templates of the catalytic sites, which in turn can be used to find catalytic sites in new structures. To ease the transfer of CSA annotations to other resources a new ontology has been developed: the Enzyme Mechanism Ontology, which has permitted the transfer of annotations to Mechanism, Annotation and Classification in Enzymes (MACiE) and UniProt Knowledge Base (UniProtKB) resources. The CSA database schema has been re-designed and both the CSA data and search capabilities are presented in a new modern web interface.",CSA,0.969528695,Catalytic Site Atlas,0.905872226,CSA,0.969528695,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/6/2013 +23155063,http://genome.ucsc.edu,"The UCSC Genome Browser database: extensions and updates 2013. The University of California Santa Cruz (UCSC) Genome Browser (http://genome.ucsc.edu) offers online public access to a growing database of genomic sequence and annotations for a wide variety of organisms. The Browser is an integrated tool set for visualizing, comparing, analysing and sharing both publicly available and user-generated genomic datasets. As of September 2012, genomic sequence and a basic set of annotation 'tracks' are provided for 63 organisms, including 26 mammals, 13 non-mammal vertebrates, 3 invertebrate deuterostomes, 13 insects, 6 worms, yeast and sea hare. In the past year 19 new genome assemblies have been added, and we anticipate releasing another 28 in early 2013. Further, a large number of annotation tracks have been either added, updated by contributors or remapped to the latest human reference genome. Among these are an updated UCSC Genes track for human and mouse assemblies. We have also introduced several features to improve usability, including new navigation menus. This article provides an update to the UCSC Genome Browser database, which has been previously featured in the Database issue of this journal.",CSC,0.82776475,Genome,0.612467766,CSC,0.82776475,1,"22086951.0, 24270787.0, 25428374.0, 27899642.0, 30407534.0, 26590259.0, 33221922.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,11/15/2012 +29036403,http://gb.whu.edu.cn/CSCD,"CSCD: a database for cancer-specific circular RNAs. Circular RNA (circRNA) is a large group of RNA family extensively existed in cells and tissues. High-throughput sequencing provides a way to view circRNAs across different samples, especially in various diseases. However, there is still no comprehensive database for exploring the cancer-specific circRNAs. We collected 228 total RNA or polyA(-) RNA-seq samples from both cancer and normal cell lines, and identified 272 152 cancer-specific circRNAs. A total of 950 962 circRNAs were identified in normal samples only, and 170 909 circRNAs were identified in both tumor and normal samples, which could be further used as non-tumor background. We constructed a cancer-specific circRNA database (CSCD, http://gb.whu.edu.cn/CSCD). To understand the functional effects of circRNAs, we predicted the microRNA response element sites and RNA binding protein sites for each circRNA. We further predicted potential open reading frames to highlight translatable circRNAs. To understand the association between the linear splicing and the back-splicing, we also predicted the splicing events in linear transcripts of each circRNA. As the first comprehensive cancer-specific circRNA database, we believe CSCD could significantly contribute to the research for the function and regulation of cancer-associated circRNAs.",CSCD,0.997561216,pecific circRNA database,0.575147778,CSCD,0.997561216,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +26989154,http://bioinformatics.ustc.edu.cn/cscdb,"CSCdb: a cancer stem cells portal for markers, related genes and functional information. . Cancer stem cells (CSCs), which have the ability to self-renew and differentiate into various tumor cell types, are a special class of tumor cells. Characterizing the genes involved in CSCs regulation is fundamental to understand the mechanisms underlying the biological process and develop treatment methods for tumor therapy. Recently, much effort has been expended in the study of CSCs and a large amount of data has been generated. However, to the best of our knowledge, database dedicated to CSCs is not available until now. We have thus developed a CSCs database (CSCdb), which includes marker genes, CSCs-related genes/microRNAs and functional annotations. The information in the CSCdb was manual collected from about 13 000 articles. The CSCdb provides detailed information of 1769 genes that have been reported to participate in the functional regulation of CSCs and 74 marker genes that can be used for identification or isolation of CSCs. The CSCdb also provides 9475 annotations about 13 CSCs-related functions, such as oncogenesis, radio resistance, tumorigenesis, differentiation, etc. Annotations of the identified genes, which include protein function description, post-transcription modification information, related literature, Gene Ontology (GO), protein-protein interaction (PPI) information and regulatory relationships, are integrated into the CSCdb to help users get information more easily. CSCdb provides a comprehensive resource for CSCs research work, which would assist in finding new CSCs-related genes and would be a useful tool for biologists. Database URL: http://bioinformatics.ustc.edu.cn/cscdb.",CSCdb,0.995143592,CSCs database,0.68326959,CSCdb,0.995143592,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/17/2016 +28191780,http://www.csctt.org,"Cancer Stem Cells Therapeutic Target Database: The First Comprehensive Database for Therapeutic Targets of Cancer Stem Cells. Cancer stem cells (CSCs) are a subpopulation of tumor cells that have strong self-renewal capabilities and may contribute to the failure of conventional cancer therapies. Hence, therapeutics homing in on CSCs represent a novel and promising approach that may eradicate malignant tumors. However, the lack of information on validated targets of CSCs has greatly hindered the development of CSC-directed therapeutics. Herein, we describe the Cancer Stem Cells Therapeutic Target Database (CSCTT), the first online database to provide a rich bioinformatics resource for the display, search, and analysis of structure, function, and related annotation for therapeutic targets of cancer stem cells. CSCTT contains 135 proteins that are potential targets of CSCs, with validated experimental evidence manually curated from existing literatures. Proteins are carefully annotated with a detailed description of protein families, biological process, related diseases, and experimental evidences. In addition, CSCTT has compiled 213 documented therapeutic methods for cancer stem cells, including 118 small molecules and 20 biotherapy methods. The CSCTT may serve as a useful platform for the development of CSC-directed therapeutics against various malignant tumors. The CSCTT database is freely available to the public at http://www.csctt.org/. Stem Cells Translational Medicine 2017;6:331-334.",CSCTT,0.99582231,Cancer Stem Cells Therapeutic Target Database,0.993961447,CSCTT,0.99582231,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/11/2016 +25753703,http://csdb.glycoscience.ru,"Bacterial, plant, and fungal carbohydrate structure databases: daily usage. Natural carbohydrates play important roles in living systems and therefore are used as diagnostic and therapeutic targets. The main goal of glycomics is systematization of carbohydrates and elucidation of their role in human health and disease. The amount of information on natural carbohydrates accumulates rapidly, but scientists still lack databases and computer-assisted tools needed for orientation in the glycomic information space. Therefore, freely available, regularly updated, and cross-linked databases are demanded. Bacterial Carbohydrate Structure Database (Bacterial CSDB) was developed for provision of structural, bibliographic, taxonomic, NMR spectroscopic, and other related information on bacterial and archaeal carbohydrate structures. Its main features are (1) coverage above 90%, (2) high data consistence (above 90% of error-free records), and (3) presence of manually verified bibliographic, NMR spectroscopic, and taxonomic annotations. Recently, CSDB has been expanded to cover carbohydrates of plant and fungal origin. The achievement of full coverage in the plant and fungal domains is expected in the future. CSDB is freely available on the Internet as a web service at http://csdb.glycoscience.ru. This chapter aims at showing how to use CSDB in your daily scientific practice.",CSDB,0.980909228,Bacterial Carbohydrate Structure Database,0.947883934,CSDB,0.980909228,1,"24680503.0, 26286194.0",NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2015 +30759212,"http://csdb.glycoscience.ru/gt.html, http://csdb.glycoscience.ru/database/index.html","Expanding CSDB_GT glycosyltransferase database with Escherichia coli. In 2017, we reported a new database on glycosyltransferase (GT) activities, CSDB_GT (http://csdb.glycoscience.ru/gt.html), which was built at the platform of the Carbohydrate Structure Database (CSDB, http://csdb.glycoscience.ru/database/index.html) and contained data on experimentally confirmed GT activities from Arabidopsis thaliana. All entries in CSDB_GT are curated manually upon the analysis of scientific publications, and the key features of the database are accurate structural, genetic, protein and bibliographic references and close-to-complete coverage on experimentally proven GT activities in selected species. In 2018, CSDB_GT was supplemented with data on Escherichia coli GT activities. Now it contains ca. 800 entries on E. coli GTs, including ca. 550 entries with functions predicted in silico. This information was extracted from research papers published up to the year 2018 or was obtained by the authors' efforts on GT annotation. Thus, CSDB_GT was extended to provide not only experimentally confirmed GT activities, but also those predicted on the basis of gene or protein sequence homology that could carry valuable information. Accordingly, a new confirmation status-predicted in silico-was introduced. In addition, the coverage on A. thaliana was extended up to ca. 900 entries, all of which had experimental confirmation. Currently, CSDB_GT provides close-to-complete coverage on experimentally confirmed GT activities from A. thaliana and E. coli presented up to the year 2018.",CSDB_GT,0.99439846,NA,0,CSDB_GT,0.99439846,1,"28011601.0, 33242091.0","28011601.0, 33242091.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,4/1/2019 +"28011601, 33242091",http://csdb.glycoscience.ru/gt.html,"CSDB_GT: a new curated database on glycosyltransferases. Glycosyltransferases√ɬÉ√ǬÇ√ɬÇ√Ǭ†(GTs) are carbohydrate-active enzymes (CAZy) involved in the synthesis of natural glycan structures. The application of CAZy is highly demanded in biotechnology and pharmaceutics. However, it is being hindered by the lack of high-quality and comprehensive repositories of the research data accumulated so far. In this paper, we describe a new curated Carbohydrate Structure Glycosyltransferase Database√ɬÉ√ǬÇ√ɬÇ√Ǭ†(CSDB_GT). Currently, CSDB_GT provides ca. 780 activities exhibited by GTs, as well as several other CAZy, found in Arabidopsis thaliana and described in ca. 180 publications. It covers most published data on A. thaliana GTs with evidenced functions. CSDB_GT is linked to the Carbohydrate Structure Database (CSDB), which stores data on archaeal, bacterial, fungal and plant glycans. The CSDB_GT data are supported by experimental evidences and can be traced to original publications. CSDB_GT is freely available at http://csdb.glycoscience.ru/gt.html.",CSDB_GT,0.988374993,Carbohydrate Structure Glycosyltransferase Database√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√ǬÉ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√ǬÇ,0.911011142,CSDB_GT,0.988374993,2,30759212,30759212,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,6/1/2021 +26286194,http://csdb.glycoscience.ru,"Carbohydrate structure database merged from bacterial, archaeal, plant and fungal parts. The Carbohydrate Structure Databases (CSDBs, http://csdb.glycoscience.ru) store structural, bibliographic, taxonomic, NMR spectroscopic, and other data on natural carbohydrates and their derivatives published in the scientific literature. The CSDB project was launched in 2005 for bacterial saccharides (as BCSDB). Currently, it includes two parts, the Bacterial CSDB and the Plant&Fungal CSDB. In March 2015, these databases were merged to the single CSDB. The combined CSDB includes information on bacterial and archaeal glycans and derivatives (the coverage is close to complete), as well as on plant and fungal glycans and glycoconjugates (almost all structures published up to 1998). CSDB is regularly updated via manual expert annotation of original publications. Both newly annotated data and data imported from other databases are manually curated. The CSDB data are exportable in a number of modern formats, such as GlycoRDF. CSDB provides additional services for simulation of (1)H, (13)C and 2D NMR spectra of saccharides, NMR-based structure prediction, glycan-based taxon clustering and other.",CSDBs,0.974537402,Carbohydrate Structure Databases,0.910097814,CSDBs,0.974537402,1,"24680503.0, 25753703.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,8/18/2015 +33211888,http://bioinfo.uth.edu/CSEADB,"CSEA-DB: an omnibus for human complex trait and cell type associations. During the past decade, genome-wide association studies (GWAS) have identified many genetic variants with susceptibility to several thousands of complex diseases or traits. The genetic regulation of gene expression is highly tissue-specific and cell type-specific. Recently, single-cell technology has paved the way to dissect cellular heterogeneity in human tissues. Here, we present a reference database for GWAS trait-associated cell type-specificity, named Cell type-Specific Enrichment Analysis DataBase (CSEA-DB, available at https://bioinfo.uth.edu/CSEADB/). Specifically, we curated total of 5120 GWAS summary statistics data for a wide range of human traits and diseases followed by rigorous quality control. We further collected >900 000 cells from the leading consortia such as Human Cell Landscape, Human Cell Atlas, and extensive literature mining, including 752 tissue cell types from 71 adult and fetal tissues across 11 human organ systems. The tissues and cell types were annotated with Uberon and Cell Ontology. By applying our deTS algorithm, we conducted 10 250 480 times of trait-cell type associations, reporting a total of 598 (11.68%) GWAS traits with at least one significantly associated cell type. In summary, CSEA-DB could serve as a repository of association map for human complex traits and their underlying cell types, manually curated GWAS, and single-cell transcriptome resources.",CSEA-DB,0.997746289,Cell type-Specific Enrichment Analysis DataBase,0.910774702,CSEA-DB,0.997746289,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +"25038066, 31432427",http://probe.uib.no/csf-pr,"In-depth characterization of the cerebrospinal fluid (CSF) proteome displayed through the CSF proteome resource (CSF-PR). In this study, the human cerebrospinal fluid (CSF) proteome was mapped using three different strategies prior to Orbitrap LC-MS/MS analysis: SDS-PAGE and mixed mode reversed phase-anion exchange for mapping the global CSF proteome, and hydrazide-based glycopeptide capture for mapping glycopeptides. A maximal protein set of 3081 proteins (28,811 peptide sequences) was identified, of which 520 were identified as glycoproteins from the glycopeptide enrichment strategy, including 1121 glycopeptides and their glycosylation sites. To our knowledge, this is the largest number of identified proteins and glycopeptides reported for CSF, including 417 glycosylation sites not previously reported. From parallel plasma samples, we identified 1050 proteins (9739 peptide sequences). An overlap of 877 proteins was found between the two body fluids, whereas 2204 proteins were identified only in CSF and 173 only in plasma. All mapping results are freely available via the new CSF Proteome Resource (http://probe.uib.no/csf-pr), which can be used to navigate the CSF proteome and help guide the selection of signature peptides in targeted quantitative proteomics.",CSF-PR,0.993137106,Cerebrospinal Fluid Proteome Resource,0.987415892,CSF-PR,0.993137106,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +28704505,"http://cosbi.ee.ncku.edu.tw/CSmiRTar/, http://cosbi4.ee.ncku.edu.tw/CSmiRTar","CSmiRTar: Condition-Specific microRNA targets database. MicroRNAs (miRNAs) are functional RNA molecules which play important roles in the post-transcriptional regulation. miRNAs regulate their target genes by repressing translation or inducing degradation of the target genes' mRNAs. Many databases have been constructed to provide computationally predicted miRNA targets. However, they cannot provide the miRNA targets expressed in a specific tissue and related to a specific disease at the same time. Moreover, they cannot provide the common targets of multiple miRNAs and the common miRNAs of multiple genes at the same time. To solve these two problems, we construct a database called CSmiRTar (Condition-Specific miRNA Targets). CSmiRTar collects computationally predicted targets of 2588 human miRNAs and 1945 mouse miRNAs from four most widely used miRNA target prediction databases (miRDB, TargetScan, microRNA.org and DIANA-microT) and implements functional filters which allows users to search (i) a miRNA's targets expressed in a specific tissue or/and related to a specific disease, (ii) multiple miRNAs' common targets expressed in a specific tissue or/and related to a specific disease, (iii) a gene's miRNAs related to a specific disease, and (iv) multiple genes' common miRNAs related to a specific disease. We believe that CSmiRTar will be a useful database for biologists to study the molecular mechanisms of post-transcriptional regulation in human or mouse. CSmiRTar is available at http://cosbi.ee.ncku.edu.tw/CSmiRTar/ or http://cosbi4.ee.ncku.edu.tw/CSmiRTar/.",CSmiRTar,0.990449607,Condition-Specific miRNA Targets,0.896995284,CSmiRTar,0.990449607,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/13/2017 +32990755,http://csvs.babelomics.org,"CSVS, a crowdsourcing database of the Spanish population genetic variability. The knowledge of the genetic variability of the local population is of utmost importance in personalized medicine and has been revealed as a critical factor for the discovery of new disease variants. Here, we present the Collaborative Spanish Variability Server (CSVS), which currently contains more than 2000 genomes and exomes of unrelated Spanish individuals. This database has been generated in a collaborative crowdsourcing effort collecting sequencing data produced by local genomic projects and for other purposes. Sequences have been grouped by ICD10 upper categories. A web interface allows querying the database removing one or more ICD10 categories. In this way, aggregated counts of allele frequencies of the pseudo-control Spanish population can be obtained for diseases belonging to the category removed. Interestingly, in addition to pseudo-control studies, some population studies can be made, as, for example, prevalence of pharmacogenomic variants, etc. In addition, this genomic data has been used to define the first Spanish Genome Reference Panel (SGRP1.0) for imputation. This is the first local repository of variability entirely produced by a crowdsourcing effort and constitutes an example for future initiatives to characterize local variability worldwide. CSVS is also part of the GA4GH Beacon network. CSVS can be accessed at: http://csvs.babelomics.org/.",CSVS,0.992008924,Collaborative Spanish Variability Server,0.858899653,CSVS,0.992008924,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +23193294,http://insulatordb.uthsc.edu,"CTCFBSDB 2.0: a database for CTCF-binding sites and genome organization. CTCF is a highly conserved transcriptional regulator protein that performs diverse functions such as regulating gene expression and organizing the 3D structure of the genome. Here, we describe recent updates to a database of CTCF-binding sites, CTCFBSDB (http://insulatordb.uthsc.edu/), which now contains almost 15 million CTCF-binding sequences in 10 species. Since the original publication of the database, studies of the 3D structure of the genome, such as those provided by Hi-C experiments, have suggested that CTCF plays an important role in mediating intra- and inter-chromosomal interactions. To reflect this important progress, we have integrated CTCF-binding sites with genomic topological domains defined using Hi-C data. Additionally, the updated database includes new features enabled by new CTCF-binding site data, including binding site occupancy and the ability to visualize overlapping CTCF-binding sites determined in separate experiments.",CTCFBSDB,0.996337473,NA,0,CTCFBSDB,0.996337473,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +32294193,http://www.origin-gene.cn/database/ctcRbase,"ctcRbase: the gene expression database of circulating tumor cells and microemboli. . Circulating tumor cells/microemboli (CTCs/CTMs) are malignant cells that depart from cancerous lesions and shed into the bloodstream. Analysis of CTCs can allow the investigation of tumor cell biomarker expression from a non-invasive liquid biopsy. To date, high-throughput technologies have become a powerful tool to provide a genome-wide view of transcriptomic changes associated with CTCs/CTMs. These data provided us much information to understand the tumor heterogeneity, and the underlying molecular mechanism of tumor metastases. Unfortunately, these data have been deposited into various repositories, and a uniform resource for the cancer metastasis is still unavailable. To this end, we integrated previously published transcriptome datasets of CTCs/CTMs and constructed a web-accessible database. The first release of ctcRbase contains 526 CTCs/CTM samples across seven cancer types. The expression of 14√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ631 mRNAs and 3642 long non-coding RNAs of CTCs/CTMs were included. Experimental validations from the published literature are also included. Since CTCs/CTMs are considered to be precursors of metastases, ctcRbase also collected the expression data of primary tumors and metastases, which allows user to discover a unique 'circulating tumor cell gene signature' that is distinct from primary tumor and metastases. An easy-to-use database was constructed to query and browse CTCs/CTMs genes. ctcRbase can be freely accessible at http://www.origin-gene.cn/database/ctcRbase/.",ctcRbase,0.997189045,NA,0,ctcRbase,0.997189045,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +29351546,"http://ctdbase.org, http://doi.org/10.1289/EHP2873","Accessing an Expanded Exposure Science Module at the Comparative Toxicogenomics Database. SUMMARY:The Comparative Toxicogenomics Database (CTD; http://ctdbase.org) is a free resource that provides manually curated information on chemical, gene, phenotype, and disease relationships to advance understanding of the effect of environmental exposures on human health. Four core content areas are independently curated: chemical-gene interactions, chemical-disease and gene-disease associations, chemical-phenotype interactions, and environmental exposure data (e.g., effects of chemical stressors on humans). Since releasing exposure data in 2015, we have vastly increased our coverage of chemicals and disease/phenotype outcomes; greatly expanded access to exposure content; added search capability by stressors, cohorts, population demographics, and measured outcomes; and created user-specified displays of content. These enhancements aim to facilitate human studies by allowing comparisons among experimental parameters and across studies involving specified chemicals, populations, or outcomes. Integration of data among CTD's four content areas and external data sets, such as Gene Ontology annotations and pathway information, links exposure data with over 1.8 million chemical-gene, chemical-disease and gene-disease interactions. Our analysis tools reveal direct and inferred relationships among the data and provide opportunities to generate predictive connections between environmental exposures and population-level health outcomes. https://doi.org/10.1289/EHP2873.",CTD,0.997465014,Comparative Toxicogenomics Database,0.988830974,CTD,0.997465014,1,"23093600.0, 25326323.0, 27651457.0, 29846728.0, 30247620.0, 33068428.0","27170236.0, 23093600.0, 25326323.0, 27651457.0, 29846728.0, 30247620.0, 33068428.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/18/2018 +"23093600, 25326323, 27651457, 29846728, 30247620, 33068428",http://ctdbase.org,"The Comparative Toxicogenomics Database: update 2013. The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) provides information about interactions between environmental chemicals and gene products and their relationships to diseases. Chemical-gene, chemical-disease and gene-disease interactions manually curated from the literature are integrated to generate expanded networks and predict many novel associations between different data types. CTD now contains over 15 million toxicogenomic relationships. To navigate this sea of data, we added several new features, including DiseaseComps (which finds comparable diseases that share toxicogenomic profiles), statistical scoring for inferred gene-disease and pathway-chemical relationships, filtering options for several tools to refine user analysis and our new Gene Set Enricher (which provides biological annotations that are enriched for gene sets). To improve data visualization, we added a Cytoscape Web view to our ChemComps feature, included color-coded interactions and created a 'slim list' for our MEDIC disease vocabulary (allowing diseases to be grouped for meta-analysis, visualization and better data management). CTD continues to promote interoperability with external databases by providing content and cross-links to their sites. Together, this wealth of expanded chemical-gene-disease data, combined with novel ways to analyze and view content, continues to help users generate testable hypotheses about the molecular mechanisms of environmental diseases.",CTD,0.996501962,Comparative Toxicogenomics Database,0.980877916,CTD,0.996501962,6,29351546,"27170236.0, 29351546.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +27170236,http://dx.doi.org/10.1289/EHP174,"Advancing Exposure Science through Chemical Data Curation and Integration in the Comparative Toxicogenomics Database. Background Exposure science studies the interactions and outcomes between environmental stressors and human or ecological receptors. To augment its role in understanding human health and the exposome, we aimed to centralize and integrate exposure science data into the broader biological framework of the Comparative Toxicogenomics Database (CTD), a public resource that promotes understanding of environmental chemicals and their effects on human health. Objectives We integrated exposure data within the CTD to provide a centralized, freely available resource that facilitates identification of connections between real-world exposures, chemicals, genes/proteins, diseases, biological processes, and molecular pathways. Methods We developed a manual curation paradigm that captures exposure data from the scientific literature using controlled vocabularies and free text within the context of four primary exposure concepts: stressor, receptor, exposure event, and exposure outcome. Using data from the Agricultural Health Study, we have illustrated the benefits of both centralization and integration of exposure information with CTD core data. Results We have described our curation process, demonstrated how exposure data can be accessed and analyzed in the CTD, and shown how this integration provides a broad biological context for exposure data to promote mechanistic understanding of environmental influences on human health. Conclusions Curation and integration of exposure data within the CTD provides researchers with new opportunities to correlate exposures with human health outcomes, to identify underlying potential molecular mechanisms, and to improve understanding about the exposome. Citation Grondin CJ, Davis AP, Wiegers TC, King BL, Wiegers JA, Reif DM, Hoppin JA, Mattingly CJ. 2016. Advancing exposure science through chemical data curation and integration in the Comparative Toxicogenomics Database. Environ Health Perspect 124:1592-1599; http://dx.doi.org/10.1289/EHP174.",CTD,0.888477489,Comparative Toxicogenomics Database,0.731422563,CTD,0.888477489,1,NA,"29351546.0, 23093600.0, 25326323.0, 27651457.0, 29846728.0, 30247620.0, 33068428.0",low_prob_best_name,remove,NA,"merge all ""dup name"" IDs",FALSE POS: CLASS,NA,NA,5/12/2016 +26322998,http://nipgr.res.in/ctdb.html,"CTDB: An Integrated Chickpea Transcriptome Database for Functional and Applied Genomics. Chickpea is an important grain legume used as a rich source of protein in human diet. The narrow genetic diversity and limited availability of genomic resources are the major constraints in implementing breeding strategies and biotechnological interventions for genetic enhancement of chickpea. We developed an integrated Chickpea Transcriptome Database (CTDB), which provides the comprehensive web interface for visualization and easy retrieval of transcriptome data in chickpea. The database features many tools for similarity search, functional annotation (putative function, PFAM domain and gene ontology) search and comparative gene expression analysis. The current release of CTDB (v2.0) hosts transcriptome datasets with high quality functional annotation from cultivated (desi and kabuli types) and wild chickpea. A catalog of transcription factor families and their expression profiles in chickpea are available in the database. The gene expression data have been integrated to study the expression profiles of chickpea transcripts in major tissues/organs and various stages of flower development. The utilities, such as similarity search, ortholog identification and comparative gene expression have also been implemented in the database to facilitate comparative genomic studies among different legumes and Arabidopsis. Furthermore, the CTDB represents a resource for the discovery of functional molecular markers (microsatellites and single nucleotide polymorphisms) between different chickpea types. We anticipate that integrated information content of this database will accelerate the functional and applied genomic research for improvement of chickpea. The CTDB web service is freely available at http://nipgr.res.in/ctdb.html.",CTDB,0.979035616,Chickpea Transcriptome Database,0.944384923,CTDB,0.979035616,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/31/2015 +32490069,http://physionet.org/content/ctu-uhb-ctgdb/1.0.0,"Annotation dataset of the cardiotocographic recordings constituting the ""CTU-CHB intra-partum CTG database"". The proposed dataset provides annotations for the 552 cardiotocographic (CTG) recordings included in the publicly available ""CTU-CHB intra-partum CTG database"" from Physionet (https://physionet.org/content/ctu-uhb-ctgdb/1.0.0/). Each CTG recording is composed by two simultaneously acquired signals: i) the fetal heart rate (FHR) and ii) the maternal tocogram (representing uterine activity). Annotations consist in the detection of starting and ending points of specific CTG events on both FHR signal and maternal tocogram. Annotated events for the FHR signal are the bradycardia, tachycardia, acceleration and deceleration episodes. Annotated events for the maternal tocogram are the uterine contractions. The dataset also reports classification of each deceleration as early, late, variable or prolonged, in relation to the presence of a uterine contraction. Annotations were obtained by an expert gynecologist with the support of CTG Analyzer, a dedicated software application for automatic analysis of digital CTG recordings. These annotations can be useful in the development, testing and comparison of algorithms for the automatic analysis of digital CTG recordings, which can make CTG interpretation more objective and independent from clinician's experience.",CTU-CHB,0.761122217,NA,0,CTU-CHB,0.761122217,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/19/2020 +32183712,http://cmb.bnu.edu.cn/alt_iso/index.php,"CuAS: a database of annotated transcripts generated by alternative splicing in cucumbers. BACKGROUND:Alternative splicing (AS) plays a critical regulatory role in modulating transcriptome and proteome diversity. In particular, it increases the functional diversity of proteins. Recent genome-wide analysis of AS using RNA-Seq has revealed that AS is highly pervasive in plants. Furthermore, it has been suggested that most AS events are subject to tissue-specific regulation. DESCRIPTION:To reveal the functional characteristics induced by AS and tissue-specific splicing events, a database for exploring these characteristics is needed, especially in plants. To address these goals, we constructed a database of annotated transcripts generated by alternative splicing in cucumbers (CuAS: http://cmb.bnu.edu.cn/alt_iso/index.php) that integrates genomic annotations, isoform-level functions, isoform-level features, and tissue-specific AS events among multiple tissues. CuAS supports a retrieval system that identifies unique IDs (gene ID, isoform ID, UniProt ID, and gene name), chromosomal positions, and gene families, and a browser for visualization of each gene. CONCLUSION:We believe that CuAS could be helpful for revealing the novel functional characteristics induced by AS and tissue-specific AS events in cucumbers. CuAS is freely available at http://cmb.bnu.edu.cn/alt_iso/index.php.",CuAS,0.986856222,NA,0,CuAS,0.986856222,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/18/2020 +22139934,http://epsf.bmad.bii.a-star.edu.sg/cube/db/html/home.html,"Cube-DB: detection of functional divergence in human protein families. Cube-DB is a database of pre-evaluated results for detection of functional divergence in human/vertebrate protein families. The analysis is organized around the nomenclature associated with the human proteins, but based on all currently available vertebrate genomes. Using full genomes enables us, through a mutual-best-hit strategy, to construct comparable taxonomical samples for all paralogues under consideration. Functional specialization is scored on the residue level according to two models of behavior after divergence: heterotachy and homotachy. In the first case, the positions on the protein sequence are scored highly if they are conserved in the reference group of orthologs, and overlap poorly with the residue type choice in the paralogs groups (such positions will also be termed functional determinants). The second model additionally requires conservation within each group of paralogs (functional discriminants). The scoring functions are phylogeny independent, but sensitive to the residue type similarity. The results are presented as a table of per-residue scores, and mapped onto related structure (when available) via browser-embedded visualization tool. They can also be downloaded as a spreadsheet table, and sessions for two additional molecular visualization tools. The database interface is available at http://epsf.bmad.bii.a-star.edu.sg/cube/db/html/home.html.",Cube-DB,0.995983998,NA,0,Cube-DB,0.995983998,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2011 +30321383,http://cucurbitgenomics.org,"Cucurbit Genomics Database (CuGenDB): a central portal for comparative and functional genomics of cucurbit crops. The Cucurbitaceae family (cucurbit) includes several economically important crops, such as melon, cucumber, watermelon, pumpkin, squash and gourds. During the past several years, genomic and genetic data have been rapidly accumulated for cucurbits. To store, mine, analyze, integrate and disseminate these large-scale datasets and to provide a central portal for the cucurbit research and breeding community, we have developed the Cucurbit Genomics Database (CuGenDB; http://cucurbitgenomics.org) using the Tripal toolkit. The database currently contains all available genome and expressed sequence tag (EST) sequences, genetic maps, and transcriptome profiles for cucurbit species, as well as sequence annotations, biochemical pathways and comparative genomic analysis results such as synteny blocks and homologous gene pairs between different cucurbit species. A set of analysis and visualization tools and user-friendly query interfaces have been implemented in the database to facilitate the usage of these large-scale data by the community. In particular, two new tools have been developed in the database, a 'SyntenyViewer' to view genome synteny between different cucurbit species and an 'RNA-Seq' module to analyze and visualize gene expression profiles. Both tools have been packed as Tripal extension modules that can be adopted in other genomics databases developed using the Tripal system.",CuGenDB,0.997316897,Cucurbit Genomics Database,0.988422981,CuGenDB,0.997316897,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +22493527,http://cell-lines.toku-e.com,"Cell-culture Database: Literature-based reference tool for human and mammalian experimentallybased cell culture applications. Unlabelled Cultivation of primary cells is essential for biotechnological research and viral vaccine production. Significant advances in cell and tissue culture, more specifically, advances in the transfection and transduction of human and mammalian cells, has directly led to giant leaps forward in fields such as cancer research, genetics, and public health. At the same time, a corresponding increase has been seen in available cell culture related literature. Often times, due to the sheer number and degree of variability of available literature, it is a challenge to find specific, yet practical cell culture related information.To respond to this rising tide of information, a practical, user-friendly database containing cell-lines, plasmids, vectors, selection agents, concentrations and media was created. The database currently consists of over 3,900 cell lines (Human and Mammalian) and 1,900 plasmids/vectors collected from 2,700 pieces of published literature. The database is continually being expanded and it is hoped that through the continual addition of unique data, the database can further serve and enrich the work of cell and molecular biologists, life-science professionals, and the worldwide scientific community at large. Availability The database is available for free at http://cell-lines.toku-e.com/",NA,0,culture Database,0.51308019,culture Database,0.51308019,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/17/2012 +23550061,http://bcb.dfci.harvard.edu/ovariancancer,"curatedOvarianData: clinically annotated data for the ovarian cancer transcriptome. This article introduces a manually curated data collection for gene expression meta-analysis of patients with ovarian cancer and software for reproducible preparation of similar databases. This resource provides uniformly prepared microarray data for 2970 patients from 23 studies with curated and documented clinical metadata. It allows users to efficiently identify studies and patient subgroups of interest for analysis and to perform meta-analysis immediately without the challenges posed by harmonizing heterogeneous microarray technologies, study designs, expression data processing methods and clinical data formats. We confirm that the recently proposed biomarker CXCL12 is associated with patient survival, independently of stage and optimal surgical debulking, which was possible only through meta-analysis owing to insufficient sample sizes of the individual studies. The database is implemented as the curatedOvarianData Bioconductor package for the R statistical computing language, providing a comprehensive and flexible resource for clinically oriented investigation of the ovarian cancer transcriptome. The package and pipeline for producing it are available from http://bcb.dfci.harvard.edu/ovariancancer.",curatedOvarianData,0.78992039,NA,0,curatedOvarianData,0.78992039,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/2/2013 +27164589,http://www.helsinki.fi/psychology/groups/visualcognition,"CVD2014-A Database for Evaluating No-Reference Video Quality Assessment Algorithms. In this paper, we present a new video database: CVD2014-Camera Video Database. In contrast to previous video databases, this database uses real cameras rather than introducing distortions via post-processing, which results in a complex distortion space in regard to the video acquisition process. CVD2014 contains a total of 234 videos that are recorded using 78 different cameras. Moreover, this database contains the observer-specific quality evaluation scores rather than only providing mean opinion scores. We have also collected open-ended quality descriptions that are provided by the observers. These descriptions were used to define the quality dimensions for the videos in CVD2014. The dimensions included sharpness, graininess, color balance, darkness, and jerkiness. At the end of this paper, a performance study of image and video quality algorithms for predicting the subjective video quality is reported. For this performance study, we proposed a new performance measure that accounts for observer variance. The performance study revealed that there is room for improvement regarding the video quality assessment algorithms. The CVD2014 video database has been made publicly available for the research community. All video sequences and corresponding subjective ratings can be obtained from the CVD2014 project page (http://www.helsinki.fi/psychology/groups/visualcognition/).",CVD2014,0.991893431,NA,0,CVD2014,0.991893431,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/3/2016 +24344970,http://pkuxxj.pku.edu.cn/CVDHD,"CVDHD: a cardiovascular disease herbal database for drug discovery and network pharmacology. Background Cardiovascular disease (CVD) is the leading cause of death and associates with multiple risk factors. Herb medicines have been used to treat CVD long ago in china and several natural products or derivatives (e.g., aspirin and reserpine) are most common drugs all over the world. The objective of this work was to construct a systematic database for drug discovery based on natural products separated from CVD-related medicinal herbs and to research on action mechanism of herb medicines. Description The cardiovascular disease herbal database (CVDHD) was designed to be a comprehensive resource for virtual screening and drug discovery from natural products isolated from medicinal herbs for cardiovascular-related diseases. CVDHD comprises 35230 distinct molecules and their identification information (chemical name, CAS registry number, molecular formula, molecular weight, international chemical identifier (InChI) and SMILES), calculated molecular properties (AlogP, number of hydrogen bond acceptor and donors, etc.), docking results between all molecules and 2395 target proteins, cardiovascular-related diseases, pathways and clinical biomarkers. All 3D structures were optimized in the MMFF94 force field and can be freely accessed. Conclusions CVDHD integrated medicinal herbs, natural products, CVD-related target proteins, docking results, diseases and clinical biomarkers. By using the methods of virtual screening and network pharmacology, CVDHD will provide a platform to streamline drug/lead discovery from natural products and explore the action mechanism of medicinal herbs. CVDHD is freely available at http://pkuxxj.pku.edu.cn/CVDHD.",CVDHD,0.995064616,cardiovascular disease herbal database,0.954253152,CVDHD,0.995064616,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/18/2013 +27226753,http://cyanobacteria.myspecies.info,"Capturing biodiversity: linking a cyanobacteria culture collection to the ""scratchpads"" virtual research environment enhances biodiversity knowledge. Background Currently, cyanobacterial diversity is examined using a polyphasic approach by assessing morphological and molecular data (Kom√ɬÉ√ǬÉ√ɬÇ√Ǭ°rek 2015). However, the comparison of morphological and genetic data is sometimes hindered by the lack of cultures of several cyanobacterial morphospecies and inadequate morphological data of sequenced strains (Rajaniemi et al. 2005). Furthermore, in order to evaluate the phenotypic plasticity within defined taxa, the variability observed in cultures has to be compared to the range in natural variation (Kom√ɬÉ√ǬÉ√ɬÇ√Ǭ°rek and Mare√ɬÉ√ǬÖ√ɬÇ√Ǭ° 2012). Thus, new tools are needed to aggregate, link and process data in a meaningful way, in order to properly study and understand cyanodiversity. New information An online database on cyanobacteria has been created, namely the Cyanobacteria culture collection (CCC) (http://cyanobacteria.myspecies.info/) using as case studies cyanobacterial strains isolated from lakes of Greece, which are part of the AUTH culture collection (School of Biology, Aristotle University of Thessaloniki). The database hosts, for the first time, information and data such as morphology/morphometry, biogeography, phylogeny, microphotographs, distribution maps, toxicology and biochemical traits of the strains. All this data are structured managed, and presented online and are publicly accessible with a recently developed tool, namely ""Scratchpads"", a taxon-centric virtual research environment allowing browsing the taxonomic classification and retrieving various kinds of relevant information for each taxon.",Scratchpads,0.894429704,Cyanobacteria culture collection,0.949196661,Cyanobacteria culture collection,0.949196661,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/6/2016 +26305368,http://nfmc.res.in/ckb/index.html,"Cyanobacterial KnowledgeBase (CKB), a Compendium of Cyanobacterial Genomes and Proteomes. Cyanobacterial KnowledgeBase (CKB) is a free access database that contains the genomic and proteomic information of 74 fully sequenced cyanobacterial genomes belonging to seven orders. The database also contains tools for sequence analysis. The Species report and the gene report provide details about each species and gene (including sequence features and gene ontology annotations) respectively. The database also includes cyanoBLAST, an advanced tool that facilitates comparative analysis, among cyanobacterial genomes and genomes of E. coli (prokaryote) and Arabidopsis (eukaryote). The database is developed and maintained by the Sub-Distributed Informatics Centre (sponsored by the Department of Biotechnology, Govt. of India) of the National Facility for Marine Cyanobacteria, a facility dedicated to marine cyanobacterial research. CKB is freely available at http://nfmc.res.in/ckb/index.html.",CKB,0.926491141,Cyanobacterial KnowledgeBase,0.959425698,Cyanobacterial KnowledgeBase,0.959425698,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/25/2015 +27899668,http://genome.microbedb.jp/cyanobase,"CyanoBase:√ɬÉ√ǬÇ√ɬÇ√Ǭ†a large-scale update on its 20th anniversary. The first ever cyanobacterial genome sequence was determined two decades ago and CyanoBase (http://genome.microbedb.jp/cyanobase), the first database for cyanobacteria was simultaneously developed to allow this genomic information to be used more efficiently. Since then, CyanoBase has constantly been extended and has received several updates. Here, we describe a new large-scale update of the database, which coincides with its 20th anniversary. We have expanded the number of cyanobacterial genomic sequences from 39 to 376 species, which consists of 86 complete and 290 draft genomes. We have also optimized the user interface for large genomic data to include the use of semantic web technologies and JBrowse and have extended community-based reannotation resources through the re-annotation of Synechocystis sp. PCC 6803 by the cyanobacterial research community. These updates have markedly improved CyanoBase, providing cyanobacterial genome annotations as references for cyanobacterial research.",CyanoBase,0.994817376,NA,0,CyanoBase,0.994817376,1,24275496,24275496,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/29/2016 +24275496,"http://genome.microbedb.jp/cyanobase, http://genome.microbedb.jp/rhizobase","CyanoBase and RhizoBase: databases of manually curated annotations for cyanobacterial and rhizobial genomes. To understand newly sequenced genomes of closely related species, comprehensively curated reference genome databases are becoming increasingly important. We have extended CyanoBase (http://genome.microbedb.jp/cyanobase), a genome database for cyanobacteria, and newly developed RhizoBase (http://genome.microbedb.jp/rhizobase), a genome database for rhizobia, nitrogen-fixing bacteria associated with leguminous plants. Both databases focus on the representation and reusability of reference genome annotations, which are continuously updated by manual curation. Domain experts have extracted names, products and functions of each gene reported in the literature. To ensure effectiveness of this procedure, we developed the TogoAnnotation system offering a web-based user interface and a uniform storage of annotations for the curators of the CyanoBase and RhizoBase databases. The number of references investigated for CyanoBase increased from 2260 in our previous report to 5285, and for RhizoBase, we perused 1216 references. The results of these intensive annotations are displayed on the GeneView pages of each database. Advanced users can also retrieve this information through the representational state transfer-based web application programming interface in an automated manner.",CyanoBase,0.946007907,NA,0,CyanoBase,0.946007907,1,27899668,27899668,low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",TRUE POS: two resources; name and URL of first will be correct; second is lost,NA,NA,11/25/2013 +22829745,http://cyanoexpress.sysbiolab.eu,"CyanoEXpress: A web database for exploration and visualisation of the integrated transcriptome of cyanobacterium Synechocystis sp. PCC6803. Unlabelled Synechocystis sp. PCC6803 is one of the best studied cyanobacteria and an important model organism for our understanding of photosynthesis. The early availability of its complete genome sequence initiated numerous transcriptome studies, which have generated a wealth of expression data. Analysis of the accumulated data can be a powerful tool to study transcription in a comprehensive manner and to reveal underlying regulatory mechanisms, as well as to annotate genes whose functions are yet unknown. However, use of divergent microarray platforms, as well as distributed data storage make meta-analyses of Synechocystis expression data highly challenging, especially for researchers with limited bioinformatic expertise and resources. To facilitate utilisation of the accumulated expression data for a wider research community, we have developed CyanoEXpress, a web database for interactive exploration and visualisation of transcriptional response patterns in Synechocystis. CyanoEXpress currently comprises expression data for 3073 genes and 178 environmental and genetic perturbations obtained in 31 independent studies. At present, CyanoEXpress constitutes the most comprehensive collection of expression data available for Synechocystis and can be freely accessed. Availability The database is available for free at http://cyanoexpress.sysbiolab.eu.",CyanoEXpress,0.994646311,NA,0,CyanoEXpress,0.994646311,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/6/2012 +23175607,http://cyanolyase.genouest.org,"CyanoLyase: a database of phycobilin lyase sequences, motifs and functions. CyanoLyase (http://cyanolyase.genouest.org/) is a manually curated sequence and motif database of phycobilin lyases and related proteins. These enzymes catalyze the covalent ligation of chromophores (phycobilins) to specific binding sites of phycobiliproteins (PBPs). The latter constitute the building bricks of phycobilisomes, the major light-harvesting systems of cyanobacteria and red algae. Phycobilin lyases sequences are poorly annotated in public databases. Sequences included in CyanoLyase were retrieved from all available genomes of these organisms and a few others by similarity searches using biochemically characterized enzyme sequences and then classified into 3 clans and 32 families. Amino acid motifs were computed for each family using Protomata learner. CyanoLyase also includes BLAST and a novel pattern matching tool (Protomatch) that allow users to rapidly retrieve and annotate lyases from any new genome. In addition, it provides phylogenetic analyses of all phycobilin lyases families, describes their function, their presence/absence in all genomes of the database (phyletic profiles) and predicts the chromophorylation of PBPs in each strain. The site also includes a thorough bibliography about phycobilin lyases and genomes included in the database. This resource should be useful to scientists and companies interested in natural or artificial PBPs, which have a number of biotechnological applications, notably as fluorescent markers.",CyanoLyase,0.991719246,NA,0,CyanoLyase,0.991719246,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2012 +25632108,http://lag.ihb.ac.cn/cyanomics,"CyanOmics: an integrated database of omics for the model cyanobacterium Synechococcus sp. PCC 7002. . Cyanobacteria are an important group of organisms that carry out oxygenic photosynthesis and play vital roles in both the carbon and nitrogen cycles of the Earth. The annotated genome of Synechococcus sp. PCC 7002, as an ideal model cyanobacterium, is available. A series of transcriptomic and proteomic studies of Synechococcus sp. PCC 7002 cells grown under different conditions have been reported. However, no database of such integrated omics studies has been constructed. Here we present CyanOmics, a database based on the results of Synechococcus sp. PCC 7002 omics studies. CyanOmics comprises one genomic dataset, 29 transcriptomic datasets and one proteomic dataset and should prove useful for systematic and comprehensive analysis of all those data. Powerful browsing and searching tools are integrated to help users directly access information of interest with enhanced visualization of the analytical results. Furthermore, Blast is included for sequence-based similarity searching and Cluster 3.0, as well as the R hclust function is provided for cluster analyses, to increase CyanOmics's usefulness. To the best of our knowledge, it is the first integrated omics analysis database for cyanobacteria. This database should further understanding of the transcriptional patterns, and proteomic profiling of Synechococcus sp. PCC 7002 and other cyanobacteria. Additionally, the entire database framework is applicable to any sequenced prokaryotic genome and could be applied to other integrated omics analysis projects. Database URL: http://lag.ihb.ac.cn/cyanomics.",CyanOmics,0.996836007,NA,0,CyanOmics,0.996836007,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/28/2015 +33320930,http://www.csbg-jlu.info/CyanoPATH,"CyanoPATH: a knowledgebase of genome-scale functional repertoire for toxic cyanobacterial blooms. . CyanoPATH is a database that curates and analyzes the common genomic functional repertoire for cyanobacteria harmful algal blooms (CyanoHABs) in eutrophic waters. Based on the literature of empirical studies and genome/protein databases, it summarizes four types of information: common biological functions (pathways) driving CyanoHABs, customized pathway maps, classification of blooming type based on databases and the genomes of cyanobacteria. A total of 19 pathways are reconstructed, which are involved in the utilization of macronutrients (e.g. carbon, nitrogen, phosphorus and sulfur), micronutrients (e.g. zinc, magnesium, iron, etc.) and other resources (e.g. light and vitamins) and in stress resistance (e.g. lead and copper). These pathways, comprised of both transport and biochemical reactions, are reconstructed with proteins from NCBI and reactions from KEGG and visualized with self-created transport/reaction maps. The pathways are hierarchical and consist of subpathways, protein/enzyme complexes and constituent proteins. New cyanobacterial genomes can be annotated and visualized for these pathways and compared with existing species. This set of genomic functional repertoire is useful in analyzing aquatic metagenomes and metatranscriptomes in CyanoHAB research. Most importantly, it establishes a link between genome and ecology. All these reference proteins, pathways and maps and genomes are free to download at http://www.csbg-jlu.info/CyanoPATH.",CyanoPATH,0.997585058,NA,0,CyanoPATH,0.997585058,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +23185330,http://bif.uohyd.ac.in/cpc,"CyanoPhyChe: a database for physico-chemical properties, structure and biochemical pathway information of cyanobacterial proteins. CyanoPhyChe is a user friendly database that one can browse through for physico-chemical properties, structure and biochemical pathway information of cyanobacterial proteins. We downloaded all the protein sequences from the cyanobacterial genome database for calculating the physico-chemical properties, such as molecular weight, net charge of protein, isoelectric point, molar extinction coefficient, canonical variable for solubility, grand average hydropathy, aliphatic index, and number of charged residues. Based on the physico-chemical properties, we provide the polarity, structural stability and probability of a protein entering in to an inclusion body (PEPIB). We used the data generated on physico-chemical properties, structure and biochemical pathway information of all cyanobacterial proteins to construct CyanoPhyChe. The data can be used for optimizing methods of expression and characterization of cyanobacterial proteins. Moreover, the 'Search' and data export options provided will be useful for proteome analysis. Secondary structure was predicted for all the cyanobacterial proteins using PSIPRED tool and the data generated is made accessible to researchers working on cyanobacteria. In addition, external links are provided to biological databases such as PDB and KEGG for molecular structure and biochemical pathway information, respectively. External links are also provided to different cyanobacterial databases. CyanoPhyChe can be accessed from the following URL: http://bif.uohyd.ac.in/cpc.",CyanoPhyChe,0.996099055,NA,0,CyanoPhyChe,0.996099055,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2012 +28440791,http://lege.ciimar.up.pt/cyanotype,"A curated database of cyanobacterial strains relevant for modern taxonomy and phylogenetic studies. The dataset herein described lays the groundwork for an online database of relevant cyanobacterial strains, named CyanoType (http://lege.ciimar.up.pt/cyanotype). It is a database that includes categorized cyanobacterial strains useful for taxonomic, phylogenetic or genomic purposes, with associated information obtained by means of a literature-based curation. The dataset lists 371 strains and represents the first version of the database (CyanoType v.1). Information for each strain includes strain synonymy and/or co-identity, strain categorization, habitat, accession numbers for molecular data, taxonomy and nomenclature notes according to three different classification schemes, hierarchical automatic classification, phylogenetic placement according to a selection of relevant studies (including this), and important bibliographic references. The database will be updated periodically, namely by adding new strains meeting the criteria for inclusion and by revising and adding up-to-date metadata for strains already listed. A global 16S rDNA-based phylogeny is provided in order to assist users when choosing the appropriate strains for their studies.",CyanoType,0.9971928,NA,0,CyanoType,0.9971928,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/25/2017 +33170358,http://www.drks.de,"The rationale and development of a CyberKnife√ɬÉ√ǬÇ√ɬÇ√Ǭ© registry for pediatric patients with CNS lesions. Background CyberKnife√ɬÉ√ǬÇ√ɬÇ√Ǭ© Radiosurgery (CKRS) is a recognized treatment concept for CNS lesions in adults due to its high precision and efficacy beside a high patient comfort. However, scientific evidence for this treatment modality in pediatric patients is scarce. A dedicated registry was designed to document CyberKnife√ɬÉ√ǬÇ√ɬÇ√Ǭ© procedures in children, aiming to test the hypothesis that it is safe and efficient for the treatment of CNS lesions. Methods The CyberKnife√ɬÉ√ǬÇ√ɬÇ√Ǭ© registry is designed as a retrospective and prospective multicenter observational study (German Clinical Trials Register ( https://www.drks.de ), DRKS-ID 00016973). Patient recruitment will be ongoing throughout a 5-year period and includes collection of demographic, treatment, clinical, and imaging data. Follow-up results will be monitored for 10 years. All data will be registered in a centralized electronic database at the Charit√ɬÉ√ǬÉ√ɬÇ√Ǭ©-Universit√ɬÉ√ǬÉ√ɬÇ√Ǭ§tsmedizin. The primary endpoint is stable disease for benign and vascular lesions at 5 years of follow-up and local tumor control for malign lesions at 1- and 2-year follow-up. Secondary endpoints are radiation toxicity, side effects, and neurocognitive development. Conclusion The CyberKnife√ɬÉ√ǬÇ√ɬÇ√Ǭ© registry intends to generate scientific evidence for all treatment- and outcome-related aspects in pediatric patients with treated CNS lesions. The registry may define safety and efficacy of CKRS in children and serve as a basis for future clinical trials, inter-methodological comparisons and changes of treatment algorithms.",CyberKnife√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√ǬÉ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√ǬÇ,0.852204263,NA,0,CyberKnife√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√ǬÉ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√ǬÇ,0.852204263,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/10/2020 +25378319,http://www.cyclebase.org--an,"Cyclebase 3.0: a multi-organism database on cell-cycle regulation and phenotypes. The eukaryotic cell division cycle is a highly regulated process that consists of a complex series of events and involves thousands of proteins. Researchers have studied the regulation of the cell cycle in several organisms, employing a wide range of high-throughput technologies, such as microarray-based mRNA expression profiling and quantitative proteomics. Due to its complexity, the cell cycle can also fail or otherwise change in many different ways if important genes are knocked out, which has been studied in several microscopy-based knockdown screens. The data from these many large-scale efforts are not easily accessed, analyzed and combined due to their inherent heterogeneity. To address this, we have created Cyclebase--available at http://www.cyclebase.org--an online database that allows users to easily visualize and download results from genome-wide cell-cycle-related experiments. In Cyclebase version 3.0, we have updated the content of the database to reflect changes to genome annotation, added new mRNA and protein expression data, and integrated cell-cycle phenotype information from high-content screens and model-organism databases. The new version of Cyclebase also features a new web interface, designed around an overview figure that summarizes all the cell-cycle-related data for a gene.",Cyclebase--a,0.934412281,NA,0,Cyclebase--a,0.934412281,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/5/2014 +27354697,http://cyclo-lib.mduse.com,"Cyclo-lib: a database of computational molecular dynamics simulations of cyclodextrins. Motivation Cyclodextrins (CDs) are amongst the most versatile/multi-functional molecules used in molecular research and chemical applications. They are natural cyclic oligosaccharides typically employed to encapsulate hydrophobic groups in their central cavity. This allows solubilizing, protecting or reducing the toxicity of a large variety of different molecules including drugs, dyes and surfactant agents. In spite of their great potential, atomic level information of these molecules, which is key for their function, is really scarce. Computational Molecular Dynamics (MD) simulations have the potential to efficiently fill this gap, providing structural-dynamic information at atomic level in time scales ranging from ps to √ɬÉ√Ǭé√ɬÇ√Ǭºs. Results Cyclo-lib is a database with a publicly accessible web-interface containing structural and dynamic analysis obtained from computational MD simulation trajectories (250√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬÇns long) of native and modified CDs in explicit water molecules. Cyclo-lib currently includes 70 CDs typically employed for fundamental and industrial research. Tools for comparative analysis between different CDs, as well as to restrict the analysis to specific time-segments within the trajectories are also available. Cyclo-lib provides atomic resolution information aimed to complement experimental results performed with the same molecules. Availability and implementation The database is freely available under http://cyclo-lib.mduse.com/ CONTACT: Angel.Pineiro@usc.es.",Cyclo-lib,0.97895883,NA,0,Cyclo-lib,0.97895883,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/27/2016 +26048563,http://cyclops.ccbr.utoronto.ca,"CYCLoPs: A Comprehensive Database Constructed from Automated Analysis of Protein Abundance and Subcellular Localization Patterns in Saccharomyces cerevisiae. Changes in protein subcellular localization and abundance are central to biological regulation in eukaryotic cells. Quantitative measures of protein dynamics in vivo are therefore highly useful for elucidating specific regulatory pathways. Using a combinatorial approach of yeast synthetic genetic array technology, high-content screening, and machine learning classifiers, we developed an automated platform to characterize protein localization and abundance patterns from images of log phase cells from the open-reading frame-green fluorescent protein collection in the budding yeast, Saccharomyces cerevisiae. For each protein, we produced quantitative profiles of localization scores for 16 subcellular compartments at single-cell resolution to trace proteome-wide relocalization in conditions over time. We generated a collection of √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº300,000 micrographs, comprising more than 20 million cells and √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº9 billion quantitative measurements. The images depict the localization and abundance dynamics of more than 4000 proteins under two chemical treatments and in a selected mutant background. Here, we describe CYCLoPs (Collection of Yeast Cells Localization Patterns), a web database resource that provides a central platform for housing and analyzing our yeast proteome dynamics datasets at the single cell level. CYCLoPs version 1.0 is available at http://cyclops.ccbr.utoronto.ca. CYCLoPs will provide a valuable resource for the yeast and eukaryotic cell biology communities and will be updated as new experiments become available.",CYCLoPs,0.996802568,Collection of Yeast Cells Localization Patterns,0.984272313,CYCLoPs,0.996802568,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/15/2015 +34782688,http://cysticfibrosismap.github.io,"CyFi-MAP: an interactive pathway-based resource for cystic fibrosis. Cystic fibrosis (CF) is a life-threatening autosomal recessive disease caused by more than 2100 mutations in the CF transmembrane conductance regulator (CFTR) gene, generating variability in disease severity among individuals with CF sharing the same CFTR genotype. Systems biology can assist in the collection and visualization of CF data to extract additional biological significance and find novel therapeutic targets. Here, we present the CyFi-MAP-a disease map repository of CFTR molecular mechanisms and pathways involved in CF. Specifically, we represented the wild-type (wt-CFTR) and the F508del associated processes (F508del-CFTR) in separate submaps, with pathways related to protein biosynthesis, endoplasmic reticulum retention, export, activation/inactivation of channel function, and recycling/degradation after endocytosis. CyFi-MAP is an open-access resource with specific, curated and continuously updated information on CFTR-related pathways available online at https://cysticfibrosismap.github.io/ . This tool was developed as a reference CF pathway data repository to be continuously updated and used worldwide in CF research.",CyFi-MAP,0.988857135,NA,0,CyFi-MAP,0.988857135,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2021 +23475683,http://www.cypalleles.ki.se,"Update on allele nomenclature for human cytochromes P450 and the Human Cytochrome P450 Allele (CYP-allele) Nomenclature Database. Interindividual variability in xenobiotic metabolism and drug response is extensive and genetic factors play an important role in this variation. A majority of clinically used drugs are substrates for the cytochrome P450 (CYP) enzyme system and interindividual variability in expression and function of these enzymes is a major factor for explaining individual susceptibility for adverse drug reactions and drug response. Because of the existence of many polymorphic CYP genes, for many of which the number of allelic variants is continually increasing, a universal and official nomenclature system is important. Since 1999, all functionally relevant polymorphic CYP alleles are named and published on the Human Cytochrome P450 Allele (CYP-allele) Nomenclature Web site (http://www.cypalleles.ki.se). Currently, the database covers nomenclature of more than 660 alleles in a total of 30 genes that includes 29 CYPs as well as the cytochrome P450 oxidoreductase (POR) gene. On the CYP-allele Web site, each gene has its own Webpage, which lists the alleles with their nucleotide changes, their functional consequences, and links to publications identifying or characterizing the alleles. CYP2D6, CYP2C9, CYP2C19, and CYP3A4 are the most important CYPs in terms of drug metabolism, which is also reflected in their corresponding highest number of Webpage hits at the CYP-allele Web site.The main advantage of the CYP-allele database is that it offers a rapid online publication of CYP-alleles and their effects and provides an overview of peer-reviewed data to the scientific community. Here, we provide an update of the CYP-allele database and the associated nomenclature.",CYP-allele,0.778351414,50,0.744195342,CYP-allele,0.778351414,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2013 +22843230,http://cypdatabase.sjtu.edu.cn,"CYP-nsSNP: a specialized database focused on effect of non-synonymous SNPs on function of CYPs. The cytochrome P450 (CYP) enzymes play the central role in synthesis of endogenous substances and metabolism of xenobiotics. The substitution of single amino acid caused by non-synonymous single nucleotide polymorphism (nsSNP) will lead to the change in enzymatic activity of CYP isozymes, especially the drugmetabolizing ability. CYP-nsSNP is a specialized database focused on the effect of nsSNPs on enzymatic activity of CYPs. Its unique feature lies in providing the qualitative and quantitative description of the CYP variants in terms of enzymatic activity. In addition, the database also offers the general information about nsSNP and compounds that are involved in corresponding enzymatic reaction. The current CYP-nsSNP can be accessible at http://cypdatabase.sjtu.edu.cn/ and includes more than 300 genetic variants of 12 CYP isozymes together with about 100 compounds. In order to keep the accuracy of information within database, all experimental data were collected from the scientific literatures, and the users who conducted research to identify the novel CYP variants are encouraged to contribute their data. Therefore, CYP-nsSNP can be considered as a valuable source for experimental and computational studies of impact of genetic polymorphism on the function of CYPs.",CYP-nsSNP,0.985920737,NA,0,CYP-nsSNP,0.985920737,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2012 +27553277,http://www.cysteinome.org,"Cysteinome: The first comprehensive database for proteins with targetable cysteine and their covalent inhibitors. The covalent modification of intrinsically nucleophilic cysteine in proteins is crucial for diverse biochemical events. Bioinformatics approaches may prove useful in the design and discovery of covalent molecules targeting the cysteine in proteins to tune their functions and activities. Herein, we describe the Cysteinome, the first online database that provides a rich resource for the display, search and analysis of structure, function and related annotation for proteins with targetable cysteine as well as their covalent modulators. To this end, Cysteinome compiles 462 proteins with targetable cysteine from 122 different species along with 1217 covalent modulators curated from existing literatures. Proteins are annotated with a detailed description of protein families, biological process and related diseases. In addition, covalent modulators are carefully annotated with chemical name, chemical structure, binding affinity, physicochemical properties, molecule type and related diseases etc. The Cysteinome database may serve as a useful platform for the identification of crucial proteins with targetable cysteine in certain cellular context. Furthermore, it may help biologists and chemists for the design and discovery of covalent chemical probes or inhibitors homing at functional cysteine of critical protein targets implicated in various physiological or disease process. The Cysteinome database is freely available to public at http://www.cysteinome.org/.",Cysteinome,0.994982839,NA,0,Cysteinome,0.994982839,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/20/2016 +26895996,http://servoy.infocomsa.com/cfc_database,"[Cystic Fibrosis Cloud database: An information system for storage and management of clinical and microbiological data of cystic fibrosis patients]. The epidemiological and clinical management of cystic fibrosis (CF) patients suffering from acute pulmonary exacerbations or chronic lung infections demands continuous updating of medical and microbiological processes associated with the constant evolution of pathogens during host colonization. In order to monitor the dynamics of these processes, it is essential to have expert systems capable of storing and subsequently extracting the information generated from different studies of the patients and microorganisms isolated from them. In this work we have designed and developed an on-line database based on an information system that allows to store, manage and visualize data from clinical studies and microbiological analysis of bacteria obtained from the respiratory tract of patients suffering from cystic fibrosis. The information system, named Cystic Fibrosis Cloud database is available on the http://servoy.infocomsa.com/cfc_database site and is composed of a main database and a web-based interface, which uses Servoy's product architecture based on Java technology. Although the CFC database system can be implemented as a local program for private use in CF centers, it can also be used, updated and shared by different users who can access the stored information in a systematic, practical and safe manner. The implementation of the CFC database could have a significant impact on the monitoring of respiratory infections, the prevention of exacerbations, the detection of emerging organisms, and the adequacy of control strategies for lung infections in CF patients.",CFC,0.6286695,Cystic Fibrosis Cloud database,0.86593811,Cystic Fibrosis Cloud database,0.86593811,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2016 +23161685,http://AtlasGeneticsOncology.org,"Atlas of genetics and cytogenetics in oncology and haematology in 2013. The Atlas of Genetics and Cytogenetics in Oncology and Haematology (http://AtlasGeneticsOncology.org) is a peer-reviewed internet journal/encyclopaedia/database focused on genes implicated in cancer, cytogenetics and clinical entities in cancer and cancer-prone hereditary diseases. The main goal of the Atlas is to provide review articles that describe complementary topics, namely, genes, genetic abnormalities, histopathology, clinical diagnoses and a large iconography. This description, which was historically based on karyotypic abnormalities and in situ hybridization (fluorescence in situ hybridization) techniques, now benefits from comparative genomic hybridization and massive sequencing, uncovering a tremendous amount of genetic rearrangements. As the Atlas combines different types of information (genes, genetic abnormalities, histopathology, clinical diagnoses and external links), its content is currently unique. The Atlas is a cognitive tool for fundamental and clinical research and has developed into an encyclopaedic work. In clinical practice, it contributes to the cytogenetic diagnosis and may guide treatment decision making, particularly regarding rare diseases (because they are numerous and are frequently encountered). Readers as well as the authors of the Atlas are researchers and/or clinicians.",NA,0,Cytogenetics in,0.680524933,Cytogenetics in,0.680524933,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/17/2012 +34256256,http://www.cmvdb.dqweilab-sjtu.com/index.php,"CytomegaloVirusDb: Multi-omics knowledge database for cytomegaloviruses. Cytomegalovirus infection is a significant health concern and need further exploration in immunologic response mechanisms during primary and reactivated CMV infection. In this work, we evaluated the whole genomes and proteomes of different CMV species and developed an integrated open-access platform, CytomegaloVirusDb, a multi-Omics knowledge database for Cytomegaloviruses. The resource is categorized into the main sections ""Genomics,"" ""Proteomics,"" ""Immune response,"" and ""Therapeutics,"". The database is annotated with the list of all CMV species included in the study, and available information is freely accessible at http://www.cmvdb.dqweilab-sjtu.com/index.php. Various parameters used in the analysis for each section were primarily based on the whole genome or proteome of each specie. The platform provided datasets are open to access for researchers to obtain CMV species-specific information. This will help further to explore the dynamics of CMV-specific immune response and therapeutics. This platform is a useful resource to aid in advancing research against Cytomegaloviruses.",CytomegaloVirusDb,0.857445955,NA,0,CytomegaloVirusDb,0.857445955,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/9/2021 +21738316,http://www.cro-m.eu/CytReD,"CytReD: A database collecting human cytokinome information. Unlabelled The cytokines/related receptors system represents a complex regulatory network that is involved in those chronic inflammatory processes which lead to many diseases as cancers. We developed a Cytokine Receptor Database (CytReD) to collect information on cytokine receptors related to their biological activity, gene data, protein structures and diseases in which these and their ligands are implicated. This large set of information may be used by researchers as well as by physicians or clinicians to identify which cytokines, reported in the literature, are important in a given disease and, therefore, useful for purposes of diagnosis or prognostic. Availability The database is available for free at http://www.cro-m.eu/CytReD/",CytReD,0.998036659,Cytokine Receptor Database,0.98959893,CytReD,0.998036659,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/26/2011 +30117424,http://zfish.cn,"[Development of resources, technologies and services at the China Zebrafish Resource Center]. With the rapid growth of the Chinese zebrafish community, there is an increasing demand for various types of zebrafish-related resources and technologies. The China Zebrafish Resource Center (CZRC, web: http://zfish.cn) was established at the Institute of Hydrobiology (IHB), Chinese Academy of Sciences (CAS) in 2012. Till now, CZRC has built the largest zebrafish aquaculture unit in China, organized a resource bank containing more than 1200 zebrafish lines and more than 10 000 frozen sperm samples, among which over 200 mutant and transgenic lines were generated by CZRC. CZRC has established several technical supporting platforms, such as the zebrafish husbandry and health control program of international standard, a high-efficient gene manipulation technology platform, and a stable and efficient sperm cryopreservation technology platform. The main task of CZRC is to provide different types of services to zebrafish investigators in China and worldwide, such as resource services (e.g. zebrafish lines), technical services (e.g. gene knockout) and transgenic services, consultancy services (e.g. zebrafish husbandry and health consultation), and conference services [e.g. holding regular technical training courses and biennale Chinese Zebrafish Principal Investigator Meeting (CZPM)]. After five years' development, CZRC is now recognized as one of the three major resource centers in the global zebrafish community.",CZRC,0.992718458,China Zebrafish Resource Center,0.937906504,CZRC,0.992718458,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2018 +31390943,http://www.jianglab.cn/D-lnc,"D-lnc: a comprehensive database and analytical platform to dissect the modification of drugs on lncRNA expression. Long non-coding RNAs (lncRNAs) have been proven to be implicated in the pathogenesis of various diseases. Multiple studies have demonstrated that small molecule drugs can modify lncRNA expression, which suggests a promising therapy for human diseases. Here, we constructed a comprehensive query and analytical platform D-lnc to dissect the influence of drugs on lncRNA expression. Firstly, we manually curated the experimentally validated regulations of drugs on lncRNA expression and recorded 7,825 entries between 59 drugs and 7,538 lncRNAs across five species from nearly 1,000 published papers. Secondly, we comprehensively screened the Connectivity Map (cMap) and the Gene Expression Omnibus (GEO) databases to obtain the drug-perturbed gene expression profiles. Through probe re-annotation of microarray data, we identified 19,946 putative associations between 1,279 drugs and 129 lncRNAs in cMap and 36,210 entries between 115 drugs and 2,360 lncRNAs in GEO. Finally, we developed an online analytical platform to predict the potential acting drugs or modified lncRNAs based on user input lncRNA sequence or drug structure through computing the similarities of lncRNA sequences or drug structures. In a word, D-lnc provides a comprehensive platform to detect the modification of drugs on lncRNA expression, which would facilitate the development of lncRNA-targeted therapeutics. D-lnc is freely available at http://www.jianglab.cn/D-lnc/ .",D-lnc,0.984383583,NA,0,D-lnc,0.984383583,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/7/2019 +27391016,http://d-place.org,"D-PLACE: A Global Database of Cultural, Linguistic and Environmental Diversity. From the foods we eat and the houses we construct, to our religious practices and political organization, to who we can marry and the types of games we teach our children, the diversity of cultural practices in the world is astounding. Yet, our ability to visualize and understand this diversity is limited by the ways it has been documented and shared: on a culture-by-culture basis, in locally-told stories or difficult-to-access repositories. In this paper we introduce D-PLACE, the Database of Places, Language, Culture, and Environment. This expandable and open-access database (accessible at https://d-place.org) brings together a dispersed corpus of information on the geography, language, culture, and environment of over 1400 human societies. We aim to enable researchers to investigate the extent to which patterns in cultural diversity are shaped by different forces, including shared history, demographics, migration/diffusion, cultural innovations, and environmental and ecological conditions. We detail how D-PLACE helps to overcome four common barriers to understanding these forces: i) location of relevant cultural data, (ii) linking data from distinct sources using diverse ethnonyms, (iii) variable time and place foci for data, and (iv) spatial and historical dependencies among cultural groups that present challenges for analysis. D-PLACE facilitates the visualisation of relationships among cultural groups and between people and their environments, with results downloadable as tables, on a map, or on a linguistic tree. We also describe how D-PLACE can be used for exploratory, predictive, and evolutionary analyses of cultural diversity by a range of users, from members of the worldwide public interested in contrasting their own cultural practices with those of other societies, to researchers using large-scale computational phylogenetic analyses to study cultural evolution. In summary, we hope that D-PLACE will enable new lines of investigation into the major drivers of cultural change and global patterns of cultural diversity.",D-PLACE,0.995508909,NA,0,D-PLACE,0.995508909,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/8/2016 +23203878,http://d2p2.pro,"D√ɬÉ√ǬÇ√ɬÇ√Ǭ≤P√ɬÉ√ǬÇ√ɬÇ√Ǭ≤: database of disordered protein predictions. We present the Database of Disordered Protein Prediction (D(2)P(2)), available at http://d2p2.pro (including website source code). A battery of disorder predictors and their variants, VL-XT, VSL2b, PrDOS, PV2, Espritz and IUPred, were run on all protein sequences from 1765 complete proteomes (to be updated as more genomes are completed). Integrated with these results are all of the predicted (mostly structured) SCOP domains using the SUPERFAMILY predictor. These disorder/structure annotations together enable comparison of the disorder predictors with each other and examination of the overlap between disordered predictions and SCOP domains on a large scale. D(2)P(2) will increase our understanding of the interplay between disorder and structure, the genomic distribution of disorder, and its evolutionary history. The parsed data are made available in a unified format for download as flat files or SQL tables either by genome, by predictor, or for the complete set. An interactive website provides a graphical view of each protein annotated with the SCOP domains and disordered regions from all predictors overlaid (or shown as a consensus). There are statistics and tools for browsing and comparing genomes and their disorder within the context of their position on the tree of life.",D(2)P(2,0.978753158,Database of Disordered Protein Prediction,0.95127369,D(2)P(2,0.978753158,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +33938221,http://www.d3pharma.com/D3DistalMutation/index.php,"D3DistalMutation: a Database to Explore the Effect of Distal Mutations on Enzyme Activity. Enzyme activity is affected by amino acid mutations, particularly mutations near the active site. Increasing evidence has shown that distal mutations more than 10 √ɬÉ√ǬÉ√ɬÇ√Ç¬Ö away from the active site may significantly affect enzyme activity. However, it is difficult to study the enzyme regulation mechanism of distal mutations due to the lack of a systematic collection of three-dimensional (3D) structures, highlighting distal mutation site and the corresponding enzyme activity change. Therefore, we constructed a distal mutation database, namely, D3DistalMutation, which relates the distal mutation to enzyme activity. As a result, we observed that approximately 80% of distal mutations could affect enzyme activity and 72.7% of distal mutations would decrease or abolish enzyme activity in D3DistalMutation. Only 6.6% of distal mutations in D3DistalMutation could increase enzyme activity, which have great potential to the industrial field. Among these mutations, the Y to F, S to D, and T to D mutations are most likely to increase enzyme activity, which sheds some light on industrial catalysis. Distal mutations decreasing enzyme activity in the allosteric pocket play an indispensable role in allosteric drug design. In addition, the pockets in the enzyme structures are provided to explore the enzyme regulation mechanism of distal mutations. D3DistalMutation is accessible free of charge at https://www.d3pharma.com/D3DistalMutation/index.php.",D3DistalMutation,0.934222619,NA,0,D3DistalMutation,0.934222619,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/2/2021 +25232097,http://ageing-map.org,"The Digital Ageing Atlas: integrating the diversity of age-related changes into a unified resource. Multiple studies characterizing the human ageing phenotype have been conducted for decades. However, there is no centralized resource in which data on multiple age-related changes are collated. Currently, researchers must consult several sources, including primary publications, in order to obtain age-related data at various levels. To address this and facilitate integrative, system-level studies of ageing we developed the Digital Ageing Atlas (DAA). The DAA is a one-stop collection of human age-related data covering different biological levels (molecular, cellular, physiological, psychological and pathological) that is freely available online (http://ageing-map.org/). Each of the >3000 age-related changes is associated with a specific tissue and has its own page displaying a variety of information, including at least one reference. Age-related changes can also be linked to each other in hierarchical trees to represent different types of relationships. In addition, we developed an intuitive and user-friendly interface that allows searching, browsing and retrieving information in an integrated and interactive fashion. Overall, the DAA offers a new approach to systemizing ageing resources, providing a manually-curated and readily accessible source of age-related changes.",DAA,0.846934438,Ageing Atlas,0.621505678,DAA,0.846934438,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/17/2014 +22079417,http://www.iupui.edu,"Disease associated cytokine SNPs database: an annotation and dissemination model. Cytokines mediate crucial functions in innate and adaptive immunity. They play valuable roles in immune cell growth and lineage specification, and are associated with various disease pathologies. A large number of low, medium and high throughput studies have implicated association of single nucleotide polymorphisms (SNPs) in cytokine genes with diseases. A preponderance of such experiments has not shown any causality of an identified SNP to the associated disease. Instead, they have identified statistically significant SNP-disease associations; it is likely that some of these cytokine gene variants may directly or indirectly cause the disease phenotype(s). To fill this knowledge gap and derive study parameters for cytokine SNP-disease causality relationships, we have designed and developed the disease associated cytokine SNP database (DACS-DB). DACS-DB has data on 456 cytokine genes, approximately 63,000 SNPs, and 853 SNP-associated diseases. In DACS-DB, among other attributes, we present functional annotation, and heterozygosity allele frequency for the SNPs, and literature-validated SNP association for diseases. Users of the DB can run queries such as the ones to find disease-associated SNPs in a cytokine gene, and all the SNPs involved in a disease. We have developed a web front end (available at http://www.iupui.edu/~cytosnp) to disseminate this information for immunologists, biomedical researchers, and other interested biological researchers. Since there is no such comprehensive collection of disease associated cytokine SNPs, this DB will be vital to understand the role of cytokine SNPs as markers in disease, and more importantly, in causality to disease thus helping to identify drug targets for common inflammatory diseases.",DACS-DB,0.99001509,disease associated cytokine SNP database,0.952232748,DACS-DB,0.99001509,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/10/2011 +34803258,http://www.hccbif.org/usersearch.php,"DaiCee: A database for anti-cancer compounds with targets and side effect profiles. Identification of the toxicity of compounds is more crucial before entering clinical trials. Awareness of physiochemical properties, possible targets and side effects has become a major public health issue to reduce risks. Experimental determination of analyzing the physiochemical properties of a drug, their interaction with specific receptors and identifying their side-effects remain challenging is time consuming and costly. We describe a manually compiled database named DaiCee database, which contains 2100 anticancer drugs with information on their physiochemical properties, targets of action and side effects. It includes both synthetic and herbal anti-cancer compounds. It allows the search for SMILES notation, Lipinski's and ADME/T properties, targets and side effect profiles of the drugs. This helps to identify drugs with effective anticancer properties, their toxic nature, drug-likeness for in-vitro and in-vivo experiments. It also used for comparative analysis and screening of effective anticancer drugs using available data for compounds in the database. The database will be updated regularly to provide the users with latest information. The database is available at the URL http://www.hccbif.org/usersearch.php.",DaiCee,0.974195957,NA,0,DaiCee,0.974195957,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/30/2020 +22110032,http://apps.sanbi.ac.za/dampd,"DAMPD: a manually curated antimicrobial peptide database. The demand for antimicrobial peptides (AMPs) is rising because of the increased occurrence of pathogens that are tolerant or resistant to conventional antibiotics. Since naturally occurring AMPs could serve as templates for the development of new anti-infectious agents to which pathogens are not resistant, a resource that contains relevant information on AMP is of great interest. To that extent, we developed the Dragon Antimicrobial Peptide Database (DAMPD, http://apps.sanbi.ac.za/dampd) that contains 1232 manually curated AMPs. DAMPD is an update and a replacement of the ANTIMIC database. In DAMPD an integrated interface allows in a simple fashion querying based on taxonomy, species, AMP family, citation, keywords and a combination of search terms and fields (Advanced Search). A number of tools such as Blast, ClustalW, HMMER, Hydrocalculator, SignalP, AMP predictor, as well as a number of other resources that provide additional information about the results are also provided and integrated into DAMPD to augment biological analysis of AMPs.",DAMPD,0.989307284,Dragon Antimicrobial Peptide Database,0.977057718,DAMPD,0.989307284,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2011 +31366898,http://www.daphnia-stressordb.uni-hamburg.de/dsdbstart.php,"Daphnia stressor database: Taking advantage of a decade of Daphnia '-omics' data for gene annotation. Gene expression patterns help to measure and characterize the effect of environmental perturbations at the cellular and organism-level. Complicating interpretation is the presence of uncharacterized or ""hypothetical"" gene functions for a large percentage of genomes. This is particularly evident in Daphnia genomes, which contains many regions coding for ""hypothetical proteins"" and are significantly divergent from many of the available arthropod model species, but might be ecologically important. In the present study, we developed a gene expression database, the Daphnia stressor database (http://www.daphnia-stressordb.uni-hamburg.de/dsdbstart.php), built from 90 published studies on Daphnia gene expression. Using a comparative genomics approach, we used the database to annotate D. galeata transcripts. The extensive body of literature available for Daphnia species allowed to associate stressors with gene expression patterns. We believe that our stressor based annotation strategy allows for better understanding and interpretation of the functional role of the understudied hypothetical or uncharacterized Daphnia genes, thereby increasing our understanding of Daphnia's genetic and phenotypic variability.",Daphnia stressor database,0.718929927,NA,0,Daphnia stressor database,0.718929927,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/31/2019 +23074185,http://darned.ucc.ie,"Darned in 2013: inclusion of model organisms and linking with Wikipedia. DARNED (DAtabase of RNa EDiting, available at http://darned.ucc.ie) is a centralized repository of reference genome coordinates corresponding to RNA nucleotides having altered templated identities in the process of RNA editing. The data in DARNED are derived from published datasets of RNA editing events. RNA editing instances have been identified with various methods, such as bioinformatics screenings, deep sequencing and/or biochemical techniques. Here we report our current progress in the development and expansion of the DARNED. In addition to novel database features the DARNED update describes inclusion of Drosophila melanogaster and Mus musculus RNA editing events and the launch of a community-based annotation in the RNA WikiProject.",DARNED,0.997423947,DAtabase of RNa,0.902838162,DARNED,0.997423947,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/15/2012 +26144527,http://sphppdashboard.cnb.csic.es,"Proteogenomics Dashboard for the Human Proteome Project. dasHPPboard is a novel proteomics-based dashboard that collects and reports the experiments produced by the Spanish Human Proteome Project consortium (SpHPP) and aims to help HPP to map the entire human proteome. We have followed the strategy of analog genomics projects like the Encyclopedia of DNA Elements (ENCODE), which provides a vast amount of data on human cell lines experiments. The dashboard includes results of shotgun and selected reaction monitoring proteomics experiments, post-translational modifications information, as well as proteogenomics studies. We have also processed the transcriptomics data from the ENCODE and Human Body Map (HBM) projects for the identification of specific gene expression patterns in different cell lines and tissues, taking special interest in those genes having little proteomic evidence available (missing proteins). Peptide databases have been built using single nucleotide variants and novel junctions derived from RNA-Seq data that can be used in search engines for sample-specific protein identifications on the same cell lines or tissues. The dasHPPboard has been designed as a tool that can be used to share and visualize a combination of proteomic and transcriptomic data, providing at the same time easy access to resources for proteogenomics analyses. The dasHPPboard can be freely accessed at: http://sphppdashboard.cnb.csic.es.",dasHPPboard,0.991571605,NA,0,dasHPPboard,0.991571605,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/16/2015 +26553799,http://lisanwanglab.org/DASHR,"DASHR: database of small human noncoding RNAs. Small non-coding RNAs (sncRNAs) are highly abundant RNAs, typically <100 nucleotides long, that act as key regulators of diverse cellular processes. Although thousands of sncRNA genes are known to exist in the human genome, no single database provides searchable, unified annotation, and expression information for full sncRNA transcripts and mature RNA products derived from these larger RNAs. Here, we present the Database of small human noncoding RNAs (DASHR). DASHR contains the most comprehensive information to date on human sncRNA genes and mature sncRNA products. DASHR provides a simple user interface for researchers to view sequence and secondary structure, compare expression levels, and evidence of specific processing across all sncRNA genes and mature sncRNA products in various human tissues. DASHR annotation and expression data covers all major classes of sncRNAs including microRNAs (miRNAs), Piwi-interacting (piRNAs), small nuclear, nucleolar, cytoplasmic (sn-, sno-, scRNAs, respectively), transfer (tRNAs), and ribosomal RNAs (rRNAs). Currently, DASHR (v1.0) integrates 187 smRNA high-throughput sequencing (smRNA-seq) datasets with over 2.5 billion reads and annotation data from multiple public sources. DASHR contains annotations for √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº 48,000 human sncRNA genes and mature sncRNA products, 82% of which are expressed in one or more of the curated tissues. DASHR is available at http://lisanwanglab.org/DASHR.",DASHR,0.997241557,Database of small human noncoding RNAs,0.921087686,DASHR,0.997241557,1,NA,30668832,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/8/2015 +30668832,http://lisanwanglab.org/DASHRv2,"DASHR 2.0: integrated database of human small non-coding RNA genes and mature products. Motivation Small non-coding RNAs (sncRNAs, <100 nts) are highly abundant RNAs that regulate diverse and often tissue-specific cellular processes by associating with transcription factor complexes or binding to mRNAs. While thousands of sncRNA genes exist in the human genome, no single resource provides searchable, unified annotation, expression and processing information for full sncRNA transcripts and mature RNA products derived from these larger RNAs. Results Our goal is to establish a complete catalog of annotation, expression, processing, conservation, tissue-specificity and other biological features for all human sncRNA genes and mature products derived from all major RNA classes. DASHR (Database of small human non-coding RNAs) v2.0 database is the first that integrates human sncRNA gene and mature products profiles obtained from multiple RNA-seq protocols. Altogether, 185 tissues/cell types and sncRNA annotations and >800 curated experiments from ENCODE and GEO/SRA across multiple RNA-seq protocols for both GRCh38/hg38 and GRCh37/hg19 assemblies are integrated in DASHR. Moreover, DASHR is the first to contain both known and novel, previously un-annotated sncRNA loci identified by unsupervised segmentation (13 times more loci with 1 678 800 total). Additionally, DASHR v2.0 adds >3 200 000 annotations for non-small RNA genes and other genomic features (long-noncoding RNAs, mRNAs, promoters, repeats). Furthermore, DASHR v2.0 introduces an enhanced user interface, interactive experiment-by-locus table view, sncRNA locus sorting and filtering by biological features. All annotation and expression information directly downloadable and accessible as UCSC genome browser tracks. Availability and implementation DASHR v2.0 is freely available at https://lisanwanglab.org/DASHRv2. Supplementary information Supplementary data are available at Bioinformatics online.",DASHR,0.994447291,Database of small human non-coding RNAs,0.823471373,DASHR,0.994447291,1,NA,26553799,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,3/1/2019 +25435547,http://www.plantenergy.uwa.edu.au/applications/mpic,"MPIC: a mitochondrial protein import components database for plant and non-plant species. In the 2 billion years since the endosymbiotic event that gave rise to mitochondria, variations in mitochondrial protein import have evolved across different species. With the genomes of an increasing number of plant species sequenced, it is possible to gain novel insights into mitochondrial protein import pathways. We have generated the Mitochondrial Protein Import Components (MPIC) Database (DB; http://www.plantenergy.uwa.edu.au/applications/mpic) providing searchable information on the protein import apparatus of plant and non-plant mitochondria. An in silico analysis was carried out, comparing the mitochondrial protein import apparatus from 24 species representing various lineages from Saccharomyces cerevisiae (yeast) and algae to Homo sapiens (human) and higher plants, including Arabidopsis thaliana (Arabidopsis), Oryza sativa (rice) and other more recently sequenced plant species. Each of these species was extensively searched and manually assembled for analysis in the MPIC DB. The database presents an interactive diagram in a user-friendly manner, allowing users to select their import component of interest. The MPIC DB presents an extensive resource facilitating detailed investigation of the mitochondrial protein import machinery and allowing patterns of conservation and divergence to be recognized that would otherwise have been missed. To demonstrate the usefulness of the MPIC DB, we present a comparative analysis of the mitochondrial protein import machinery in plants and non-plant species, revealing plant-specific features that have evolved.",DB,0.85708645,Database,0.979652524,Database,0.979652524,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2014 +34791106,http://dcmp.vit.ac.in,"DCMP: database of cancer mutant protein domains. . Protein domains are functional and structural units of proteins. They are responsible for a particular function that contributes to protein's overall role. Because of this essential role, the majority of the genetic variants occur in the domains. In this study, the somatic mutations across 21 cancer types were mapped to the individual protein domains. To map the mutations to the domains, we employed the whole human proteome to predict the domains in each protein sequence and recognized about 149√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ668 domains. A novel Perl-API program was developed to convert the protein domain positions into genomic positions, and users can freely access them through GitHub. We determined the distribution of protein domains across 23 chromosomes with the help of these genomic positions. Interestingly, chromosome 19 has more number of protein domains in comparison with other chromosomes. Then, we mapped the cancer mutations to all the protein domains. Around 46-65% of mutations were mapped to their corresponding protein domains, and significantly mutated domains for all the cancer types were determined using the local false discovery ratio (locfdr). The chromosome positions for all the protein domains can be verified using the cross-reference ensemble database. Database URL: https://dcmp.vit.ac.in/.",NA,0,database of cancer mutant protein domains,0.897191525,database of cancer mutant protein domains,0.897191525,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/1/2021 +28294141,http://gear.comp-sysbio.org,"GEAR: A database of Genomic Elements Associated with drug Resistance. Drug resistance is becoming a serious problem that leads to the failure of standard treatments, which is generally developed because of genetic mutations of certain molecules. Here, we present GEAR (A database of Genomic Elements Associated with drug Resistance) that aims to provide comprehensive information about genomic elements (including genes, single-nucleotide polymorphisms and microRNAs) that are responsible for drug resistance. Right now, GEAR contains 1631 associations between 201 human drugs and 758 genes, 106 associations between 29 human drugs and 66 miRNAs, and 44 associations between 17 human drugs and 22 SNPs. These relationships are firstly extracted from primary literature with text mining and then manually curated. The drug resistome deposited in GEAR provides insights into the genetic factors underlying drug resistance. In addition, new indications and potential drug combinations can be identified based on the resistome. The GEAR database can be freely accessed through http://gear.comp-sysbio.org.",GEAR,0.687603295,database of Genomic Elements Associated with drug Resistance,0.862406161,database of Genomic Elements Associated with drug Resistance,0.862406161,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/15/2017 +23529715,http://www.dirum.org,"Resource-use measurement based on patient recall: issues and challenges for economic evaluation. Accurate resource-use measurement is challenging within an economic evaluation, but is a fundamental requirement for estimating efficiency. Considerable research effort has been concentrated on the appropriate measurement of outcomes and the policy implications of economic evaluation, while methods for resource-use measurement have been relatively neglected. Recently, the Database of Instruments for Resource Use Measurement (DIRUM) was set up at http://www.dirum.org to provide a repository where researchers can share resource-use measures and methods. A workshop to discuss the issues was held at the University of Birmingham in October 2011. Based on material presented at the workshop, this article highlights the state of the art of UK instruments for resource-use data collection based on patient recall. We consider methodological issues in the design and analysis of resource-use instruments, and the challenges associated with designing new questionnaires. We suggest a method of developing a good practice guideline, and identify some areas for future research. Consensus amongst health economists has yet to be reached on many aspects of resource-use measurement. We argue that researchers should now afford costing methodologies the same attention as outcome measurement, and we hope that this Current Opinion article will stimulate a debate on methods of resource-use data collection and establish a research agenda to improve the precision and accuracy of resource-use estimates.",DIRUM,0.710099598,Database of Instruments for Resource Use Measurement,0.910336412,Database of Instruments for Resource Use Measurement,0.910336412,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/1/2013 +33331653,http://www.dataman.co.nz,"DATAMAN: A global database of nitrous oxide and ammonia emission factors for excreta deposited by livestock and land-applied manure. Nitrous oxide (N2 O), ammonia (NH3 ), and methane (CH4 ) emissions from the manure management chain of livestock production systems are important contributors to greenhouse gases (GHGs) and NH3 emitted by human activities. Several studies have evaluated manure-related emissions and associated key variables at regional, national, or continental scales. However, there have been few studies focusing on the drivers of these emissions using a global dataset. An international project was created (DATAMAN) to develop a global database on GHG and NH3 emissions from the manure management chain (housing, storage, and field) to identify key variables influencing emissions and ultimately to refine emission factors (EFs) for future national GHG inventories and NH3 emission reporting. This paper describes the ""field"" database that focuses on N2 O and NH3 EFs from land-applied manure and excreta deposited by grazing livestock. We collated relevant information (EFs, manure characteristics, soil properties, and climatic conditions) from published peer-reviewed research, conference papers, and existing databases. The database, containing 5,632 observations compiled from 184 studies, was relatively evenly split between N2 O and NH3 (56 and 44% of the EF values, respectively). The N2 O data were derived from studies conducted in 21 countries on five continents, with New Zealand, the United Kingdom, Kenya, and Brazil representing 86% of the data. The NH3 data originated from studies conducted in 17 countries on four continents, with the United Kingdom, Denmark, Canada, and The Netherlands representing 79% of the data. Wet temperate climates represented 90% of the total database. The DATAMAN field database is available at http://www.dataman.co.nz.",DATAMAN,0.970632815,NA,0,DATAMAN,0.970632815,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/22/2021 +33174603,http://datanator.info,"Datanator: an integrated database of molecular data for quantitatively modeling cellular behavior. Integrative research about multiple biochemical subsystems has significant potential to help advance biology, bioengineering and medicine. However, it is difficult to obtain the diverse data needed for integrative research. To facilitate biochemical research, we developed Datanator (https://datanator.info), an integrated database and set of tools for finding clouds of multiple types of molecular data about specific molecules and reactions in specific organisms and environments, as well as data about chemically-similar molecules and reactions in phylogenetically-similar organisms in similar environments. Currently, Datanator includes metabolite concentrations, RNA modifications and half-lives, protein abundances and modifications, and reaction rate constants about a broad range of organisms. Going forward, we aim to launch a community initiative to curate additional data. Datanator also provides tools for filtering, visualizing and exporting these data clouds. We believe that Datanator can facilitate a wide range of research from integrative mechanistic models, such as whole-cell models, to comparative data-driven analyses of multiple organisms.",Datanator,0.994000673,NA,0,Datanator,0.994000673,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29485625,http://amp.pharm.mssm.edu/datasets2tools,"Datasets2Tools, repository and search engine for bioinformatics datasets, tools and canned analyses. Biomedical data repositories such as the Gene Expression Omnibus (GEO) enable the search and discovery of relevant biomedical digital data objects. Similarly, resources such as OMICtools, index bioinformatics tools that can extract knowledge from these digital data objects. However, systematic access to pre-generated 'canned' analyses applied by bioinformatics tools to biomedical digital data objects is currently not available. Datasets2Tools is a repository indexing 31,473 canned bioinformatics analyses applied to 6,431 datasets. The Datasets2Tools repository also contains the indexing of 4,901 published bioinformatics software tools, and all the analyzed datasets. Datasets2Tools enables users to rapidly find datasets, tools, and canned analyses through an intuitive web interface, a Google Chrome extension, and an API. Furthermore, Datasets2Tools provides a platform for contributing canned analyses, datasets, and tools, as well as evaluating these digital objects according to their compliance with the findable, accessible, interoperable, and reusable (FAIR) principles. By incorporating community engagement, Datasets2Tools promotes sharing of digital resources to stimulate the extraction of knowledge from biomedical research data. Datasets2Tools is freely available from: http://amp.pharm.mssm.edu/datasets2tools.",Datasets2Tools,0.979977262,NA,0,Datasets2Tools,0.979977262,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/27/2018 +28187413,http://bis.zju.edu.cn/DaTo,"DaTo: an atlas of biological databases and tools. This work presents DaTo, a semi-automatically generated world atlas of biological databases and tools. It extracts raw information from all PubMed articles which contain exact URLs in their abstract section, followed by a manual curation of the abstract and the URL accessibility. DaTo features a user-friendly query interface, providing extensible URL-related annotations, such as the status, the location and the country of the URL. A graphical interaction network browser has also been integrated into the DaTo web interface to facilitate exploration of the relationship between different tools and databases with respect to their ontology-based semantic similarity. Using DaTo, the geographical locations, the health statuses, as well as the journal associations were evaluated with respect to the historical development of bioinformatics tools and databases over the last 20 years. We hope it will inspire the biological community to gain a systematic insight into bioinformatics resources. DaTo is accessible via http://bis.zju.edu.cn/DaTo/.",DaTo,0.992877007,NA,0,DaTo,0.992877007,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/18/2016 +25278960,http://echelon.cmmt.ubc.ca/dbaccess,"DaVIE: Database for the Visualization and Integration of Epigenetic data. One of the challenges in the analysis of large data sets, particularly in a population-based setting, is the ability to perform comparisons across projects. This has to be done in such a way that the integrity of each individual project is maintained, while ensuring that the data are comparable across projects. These issues are beginning to be observed in human DNA methylation studies, as the Illumina 450k platform and next generation sequencing-based assays grow in popularity and decrease in price. This increase in productivity is enabling new insights into epigenetics, but also requires the development of pipelines and software capable of handling the large volumes of data. The specific problems inherent in creating a platform for the storage, comparison, integration, and visualization of DNA methylation data include data storage, algorithm efficiency and ability to interpret the results to derive biological meaning from them. Databases provide a ready-made solution to these issues, but as yet no tools exist that that leverage these advantages while providing an intuitive user interface for interpreting results in a genomic context. We have addressed this void by integrating a database to store DNA methylation data with a web interface to query and visualize the database and a set of libraries for more complex analysis. The resulting platform is called DaVIE: Database for the Visualization and Integration of Epigenetics data. DaVIE can use data culled from a variety of sources, and the web interface includes the ability to group samples by sub-type, compare multiple projects and visualize genomic features in relation to sites of interest. We have used DaVIE to identify patterns of DNA methylation in specific projects and across different projects, identify outlier samples, and cross-check differentially methylated CpG sites identified in specific projects across large numbers of samples. A demonstration server has been setup using GEO data at http://echelon.cmmt.ubc.ca/dbaccess/, with login ""guest"" and password ""guest."" Groups may download and install their own version of the server following the instructions on the project's wiki.",DaVIE,0.972131968,NA,0,DaVIE,0.972131968,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/18/2014 +34679165,http://www.mmvdb.dqweilab-sjtu.com/index.php,"MMV-db: vaccinomics and RNA-based therapeutics database for infectious hemorrhagic fever-causing mammarenaviruses. The recent viral outbreaks and the current pandemic situation urges us to timely address any emerging viral infections by designing therapeutic strategies. Multi-omics and therapeutic data are of great interest to develop early remedial interventions. This work provides a therapeutic data platform (Mammarenavirus (MMV)-db) for pathogenic mammarenaviruses with potential catastrophic effects on human health around the world. The database integrates vaccinomics and RNA-based therapeutics data for seven human pathogenic MMVs associated with severe viral hemorrhagic fever and lethality in humans. Protein-specific cytotoxic T lymphocytes, B lymphocytes, helper T-cell and interferon-inducing epitopes were mapped using a cluster of immune-omics-based algorithms and tools for the seven human pathogenic viral species. Furthermore, the physiochemical and antigenic properties were also explored to guide protein-specific multi-epitope subunit vaccine for each species. Moreover, highly efficacious RNAs (small Interfering RNA (siRNA), microRNA and single guide RNA (sgRNA)) after extensive genome-based analysis with therapeutic relevance were explored. All the therapeutic RNAs were further classified and listed on the basis of predicted higher efficacy. The online platform (http://www.mmvdb.dqweilab-sjtu.com/index.php) contains easily accessible data sets and vaccine designs with potential utility in further computational and experimental work. Conclusively, the current study provides a baseline data platform to secure better future therapeutic interventions against the hemorrhagic fever causing mammarenaviruses. Database URL: http://www.mmvdb.dqweilab-sjtu.com/index.php.",db,0.675035894,NA,0,db,0.675035894,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/1/2021 +23275696,http://www.bifku.in/DBD,"DB Dehydrogenase: an online integrated structural database on enzyme dehydrogenase. Unlabelled Dehydrogenase enzymes are almost inevitable for metabolic processes. Shortage or malfunctioning of dehydrogenases often leads to several acute diseases like cancers, retinal diseases, diabetes mellitus, Alzheimer, hepatitis B & C etc. With advancement in modern-day research, huge amount of sequential, structural and functional data are generated everyday and widens the gap between structural attributes and its functional understanding. DB Dehydrogenase is an effort to relate the functionalities of dehydrogenase with its structures. It is a completely web-based structural database, covering almost all dehydrogenases [~150 enzyme classes, ~1200 entries from ~160 organisms] whose structures are known. It is created by extracting and integrating various online resources to provide the true and reliable data and implemented by MySQL relational database through user friendly web interfaces using CGI Perl. Flexible search options are there for data extraction and exploration. To summarize, sequence, structure, function of all dehydrogenases in one place along with the necessary option of cross-referencing; this database will be utile for researchers to carry out further work in this field. Availability The database is available for free at http://www.bifku.in/DBD/",DB Dehydrogenase,0.939671206,NA,0,DB Dehydrogenase,0.939671206,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/13/2012 +25414358,http://fullmal.hgc.jp,"DB-AT: a 2015 update to the Full-parasites database brings a multitude of new transcriptomic data for apicomplexan parasites. The previous release of our Full-parasites database (http://fullmal.hgc.jp/) brought enhanced functionality, an expanded full-length cDNA content, and new RNA-Seq datasets from several important apicomplexan parasites. The 2015 update witnesses the major shift in the databases content with focus on diverse transcriptomes of the apicomplexan parasites. The content of the database was substantially enriched with transcriptome information for new apicomplexan parasites. The latest version covers a total of 17 species, with addition of our newly generated RNA-Seq data of a total of 909,150,388 tags. Moreover, we have generated and included two novel and unique datasets, which represent diverse nature of transcriptomes in individual parasites in vivo and in vitro. One is the data collected from 116 Indonesian patients infected with Plasmodium falciparum. The other is a series of transcriptome data collected from a total of 38 single cells of P. falciparum cultured in vitro. We believe that with the recent advances our database becomes an even better resource and a unique platform in the analysis of apicomplexan parasites and their interaction with their hosts. To adequately reflect the recent modifications and the current content we have changed the database name to DB-AT--DataBase of Apicomplexa Transcriptomes.",DB-AT,0.903057555,of Apicomplexa,0.682551831,DB-AT,0.903057555,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/20/2014 +26836976,"http://genomeinformatics.dce.edu/dbAARD/, http://genomeinformatics.dce.edu/AGP","dbAARD & AGP: A computational pipeline for the prediction of genes associated with age related disorders. The atrocious behavioral and physiological shift with aging accelerate occurrence of deleterious disorders. Contemporary research is focused at uncovering the role of genetic associations in age-related disorders (ARDs). While the completion of the Human Genome Project and the HapMap project has generated huge amount of data on genetic variations; Genome-Wide Association Studies (GWAS) have identified genetic variations, essentially SNPs associated with several disorders including ARDs. However, a repository that houses all such ARD associations is lacking. The present work is aimed at filling this void. A database, dbAARD (database of Aging and Age Related Disorders) has been developed which hosts information on more than 3000 genetic variations significantly (p-value <0.05) associated with 51 ARDs. Furthermore, a machine learning based gene prediction tool AGP (Age Related Disorders Gene Prediction) has been constructed by employing rotation forest algorithm, to prioritize genes associated with ARDs. The tool achieved an overall accuracy in terms of precision 75%, recall 76%, F-measure 76% and AUC 0.85. Both the web resources have been made available online at http://genomeinformatics.dce.edu/dbAARD/ and http://genomeinformatics.dce.edu/AGP/ respectively for easy retrieval and usage by the scientific community. We believe that this work may facilitate the analysis of plethora of variants associated with ARDs and provide cues for deciphering the biology of aging.",dbAARD,0.983507156,database of Aging and Age Related Disorders,0.922870524,dbAARD,0.983507156,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/2/2016 +"26578581, 33151284",http://dbaasp.org,"DBAASP v.2: an enhanced database of structure and antimicrobial/cytotoxic activity of natural and synthetic peptides. Antimicrobial peptides (AMPs) are anti-infectives that may represent a novel and untapped class of biotherapeutics. Increasing interest in AMPs means that new peptides (natural and synthetic) are discovered faster than ever before. We describe herein a new version of the Database of Antimicrobial Activity and Structure of Peptides (DBAASPv.2, which is freely accessible at http://dbaasp.org). This iteration of the database reports chemical structures and empirically-determined activities (MICs, IC50, etc.) against more than 4200 specific target microbes for more than 2000 ribosomal, 80 non-ribosomal and 5700 synthetic peptides. Of these, the vast majority are monomeric, but nearly 200 of these peptides are found as homo- or heterodimers. More than 6100 of the peptides are linear, but about 515 are cyclic and more than 1300 have other intra-chain covalent bonds. More than half of the entries in the database were added after the resource was initially described, which reflects the recent sharp uptick of interest in AMPs. New features of DBAASPv.2 include: (i) user-friendly utilities and reporting functions, (ii) a 'Ranking Search' function to query the database by target species and return a ranked list of peptides with activity against that target and (iii) structural descriptions of the peptides derived from empirical data or calculated by molecular dynamics (MD) simulations. The three-dimensional structural data are critical components for understanding structure-activity relationships and for design of new antimicrobial drugs. We created more than 300 high-throughput MD simulations specifically for inclusion in DBAASP. The resulting structures are described in the database by novel trajectory analysis plots and movies. Another 200+ DBAASP entries have links to the Protein DataBank. All of the structures are easily visualized directly in the web browser.",DBAASP,0.996657729,Database of Antimicrobial Activity and Structure of Peptides,0.952155938,DBAASP,0.996657729,2,NA,24888447,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +24888447,http://www.biomedicine.org.ge/dbaasp,"DBAASP: database of antimicrobial activity and structure of peptides. The Database of Antimicrobial Activity and Structure of Peptides (DBAASP) is a manually curated database for those peptides for which antimicrobial activity against particular targets has been evaluated experimentally. The database is a depository of complete information on: the chemical structure of peptides; target species; target object of cell; peptide antimicrobial/haemolytic/cytotoxic activities; and experimental conditions at which activities were estimated. The DBAASP search page allows the user to search peptides according to their structural characteristics, complexity type (monomer, dimer and two-peptide), source, synthesis type (ribosomal, nonribosomal and synthetic) and target species. The database prediction algorithm provides a tool for rational design of new antimicrobial peptides. DBAASP is accessible at http://www.biomedicine.org.ge/dbaasp/.",DBAASP,0.996422017,Database of Antimicrobial Activity and Structure of Peptides,0.945471898,DBAASP,0.996422017,1,NA,"26578581.0, 33151284.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,7/10/2014 +30380085,http://csb.cse.yzu.edu.tw/dbAMP,"dbAMP: an integrated resource for exploring antimicrobial peptides with functional activities and physicochemical properties on transcriptome and proteome data. Antimicrobial peptides (AMPs), naturally encoded from genes and generally contained 10-100 amino acids, are crucial components of the innate immune system and can protect the host from various pathogenic bacteria, as well as viruses. In recent years, the widespread use of antibiotics has inspired the rapid growth of antibiotic-resistant microorganisms that usually induce critical infection and pathogenesis. An increasing interest therefore was motivated to explore natural AMPs that enable the development of new antibiotics. With the potential of AMPs being as new drugs for multidrug-resistant pathogens, we were thus motivated to develop a database (dbAMP, http://csb.cse.yzu.edu.tw/dbAMP/) by accumulating comprehensive AMPs from public domain and manually curating literature. Currently in dbAMP there are 12 389 unique entries, including 4271 experimentally verified AMPs and 8118 putative AMPs along with their functional activities, supported by 1924 research articles. The advent of high-throughput biotechnologies, such as mass spectrometry and next-generation sequencing, has led us to further expand dbAMP as a database-assisted platform for providing comprehensively functional and physicochemical analyses for AMPs based on the large-scale transcriptome and proteome data. Significant improvements available in dbAMP include the information of AMP-protein interactions, antimicrobial potency analysis for 'cryptic' region detection, annotations of AMP target species, as well as AMP detection on transcriptome and proteome datasets. Additionally, a Docker container has been developed as a downloadable package for discovering known and novel AMPs on high-throughput omics data. The user-friendly visualization interfaces have been created to facilitate peptide searching, browsing, and sequence alignment against dbAMP entries. All the facilities integrated into dbAMP can promote the functional analyses of AMPs and the discovery of new antimicrobial drugs.",dbAMP,0.994630337,NA,0,dbAMP,0.994630337,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +22150118,http://www.med.mun.ca/angio,"A curated database of genetic markers from the angiogenesis/VEGF pathway and their relation to clinical outcome in human cancers. Introduction Angiogenesis causes local growth, aggressiveness and metastasis in solid tumors, and thus, is almost always associated with poor prognosis and survival in cancer patients. Because of this clinical importance, several chemotherapeutic agents targeting angiogenesis have also been developed. Genes and genetic variations in angiogenesis/VEGF pathway thus may be correlated with clinical outcome in cancer patients. Material and methods Here, we describe a manually curated public database, dbANGIO, which posts the results of studies testing the possible correlation of genetic variations (polymorphisms and mutations) from the angiogenesis/VEGF pathway with demographic features, clinicopathological features, treatment response and toxicity, and prognosis and survival-related endpoints in human cancers. The scientific findings are retrieved from PUBMED and posted in the dbANGIO website in a summarized form. Results and conclusion As of September 2011, dbANGIO includes 362 entries from 83 research articles encompassing 154 unique genetic variations from 39 genes investigated in several solid and hematological cancers. By curating the literature findings and making them freely available to researchers, dbANGIO will expedite the research on genetic factors from the angiogenesis pathway and will assist in their utility in clinical management of cancer patients. dbANGIO is freely available for non-profit institutions at http://www.med.mun.ca/angio.",dbANGIO,0.997428656,NA,0,dbANGIO,0.997428656,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/12/2011 +23842462,http://bioinformatica.uniroma2.it/DBATE,"DBATE: database of alternative transcripts expression. The use of high-throughput RNA sequencing technology (RNA-seq) allows whole transcriptome analysis, providing an unbiased and unabridged view of alternative transcript expression. Coupling splicing variant-specific expression with its functional inference is still an open and difficult issue for which we created the DataBase of Alternative Transcripts Expression (DBATE), a web-based repository storing expression values and functional annotation of alternative splicing variants. We processed 13 large RNA-seq panels from human healthy tissues and in disease conditions, reporting expression levels and functional annotations gathered and integrated from different sources for each splicing variant, using a variant-specific annotation transfer pipeline. The possibility to perform complex queries by cross-referencing different functional annotations permits the retrieval of desired subsets of splicing variant expression values that can be visualized in several ways, from simple to more informative. DBATE is intended as a novel tool to help appreciate how, and possibly why, the transcriptome expression is shaped. DATABASE URL: http://bioinformatica.uniroma2.it/DBATE/.",DBATE,0.993781567,DataBase of Alternative Transcripts Expression,0.966058254,DBATE,0.993781567,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/9/2013 +29764375,http://dbATM.mbc.nctu.edu.tw,"The aquatic animals' transcriptome resource for comparative functional analysis. BACKGROUND:Aquatic animals have great economic and ecological importance. Among them, non-model organisms have been studied regarding eco-toxicity, stress biology, and environmental adaptation. Due to recent advances in next-generation sequencing techniques, large amounts of RNA-seq data for aquatic animals are publicly available. However, currently there is no comprehensive resource exist for the analysis, unification, and integration of these datasets. This study utilizes computational approaches to build a new resource of transcriptomic maps for aquatic animals. This aquatic animal transcriptome map database dbATM provides de novo assembly of transcriptome, gene annotation and comparative analysis of more than twenty aquatic organisms without draft genome. RESULTS:To improve the assembly quality, three computational tools (Trinity, Oases and SOAPdenovo-Trans) were employed to enhance individual transcriptome assembly, and CAP3 and CD-HIT-EST software were then used to merge these three assembled transcriptomes. In addition, functional annotation analysis provides valuable clues to gene characteristics, including full-length transcript coding regions, conserved domains, gene ontology and KEGG pathways. Furthermore, all aquatic animal genes are essential for comparative genomics tasks such as constructing homologous gene groups and blast databases and phylogenetic analysis. CONCLUSION:In conclusion, we establish a resource for non model organism aquatic animals, which is great economic and ecological importance and provide transcriptomic information including functional annotation and comparative transcriptome analysis. The database is now publically accessible through the URL http://dbATM.mbc.nctu.edu.tw/ .",dbATM,0.992344558,animal transcriptome map,0.634973332,dbATM,0.992344558,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/9/2018 +24647629,http://www.mgc.ac.cn/DBatVir,"DBatVir: the database of bat-associated viruses. Emerging infectious diseases remain a significant threat to public health. Most emerging infectious disease agents in humans are of zoonotic origin. Bats are important reservoir hosts of many highly lethal zoonotic viruses and have been implicated in numerous emerging infectious disease events in recent years. It is essential to enhance our knowledge and understanding of the genetic diversity of the bat-associated viruses to prevent future outbreaks. To facilitate further research, we constructed the database of bat-associated viruses (DBatVir). Known viral sequences detected in bat samples were manually collected and curated, along with the related metadata, such as the sampling time, location, bat species and specimen type. Additional information concerning the bats, including common names, diet type, geographic distribution and phylogeny were integrated into the database to bridge the gap between virologists and zoologists. The database currently covers >4100 bat-associated animal viruses of 23 viral families detected from 196 bat species in 69 countries worldwide. It provides an overview and snapshot of the current research regarding bat-associated viruses, which is essential now that the field is rapidly expanding. With a user-friendly interface and integrated online bioinformatics tools, DBatVir provides a convenient and powerful platform for virologists and zoologists to analyze the virome diversity of bats, as well as for epidemiologists and public health researchers to monitor and track current and future bat-related infectious diseases. Database URL: http://www.mgc.ac.cn/DBatVir/.",DBatVir,0.997140765,database of bat-associated viruses,0.885839264,DBatVir,0.997140765,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/18/2014 +25474259,http://bclab.inha.ac.kr/dbbp,"DBBP: database of binding pairs in protein-nucleic acid interactions. Background Interaction of proteins with other molecules plays an important role in many biological activities. As many structures of protein-DNA complexes and protein-RNA complexes have been determined in the past years, several databases have been constructed to provide structure data of the complexes. However, the information on the binding sites between proteins and nucleic acids is not readily available from the structure data since the data consists mostly of the three-dimensional coordinates of the atoms in the complexes. Results We analyzed the huge amount of structure data for the hydrogen bonding interactions between proteins and nucleic acids and developed a database called DBBP (DataBase of Binding Pairs in protein-nucleic acid interactions, http://bclab.inha.ac.kr/dbbp). DBBP contains 44,955 hydrogen bonds (H-bonds) of protein-DNA interactions and 77,947 H-bonds of protein-RNA interactions. Conclusions Analysis of the huge amount of structure data of protein-nucleic acid complexes is labor-intensive, yet provides useful information for studying protein-nucleic acid interactions. DBBP provides the detailed information of hydrogen-bonding interactions between proteins and nucleic acids at various levels from the atomic level to the residue level. The binding information can be used as a valuable resource for developing a computational method aiming at predicting new binding sites in proteins or nucleic acids.",DBBP,0.99723047,DataBase of Binding Pairs in protein-nucleic acid interactions,0.836951999,DBBP,0.99723047,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/3/2014 +32941621,http://bcb.unl.edu/dbCAN_PUL,"dbCAN-PUL: a database of experimentally characterized CAZyme gene clusters and their substrates. PULs (polysaccharide utilization loci) are discrete gene clusters of CAZymes (Carbohydrate Active EnZymes) and other genes that work together to digest and utilize carbohydrate substrates. While PULs have been extensively characterized in Bacteroidetes, there exist PULs from other bacterial phyla, as well as archaea and metagenomes, that remain to be catalogued in a database for efficient retrieval. We have developed an online database dbCAN-PUL (http://bcb.unl.edu/dbCAN_PUL/) to display experimentally verified CAZyme-containing PULs from literature with pertinent metadata, sequences, and annotation. Compared to other online CAZyme and PUL resources, dbCAN-PUL has the following new features: (i) Batch download of PUL data by target substrate, species/genome, genus, or experimental characterization method; (ii) Annotation for each PUL that displays associated metadata such as substrate(s), experimental characterization method(s) and protein sequence information, (iii) Links to external annotation pages for CAZymes (CAZy), transporters (UniProt) and other genes, (iv) Display of homologous gene clusters in GenBank sequences via integrated MultiGeneBlast tool and (v) An integrated BLASTX service available for users to query their sequences against PUL proteins in dbCAN-PUL. With these features, dbCAN-PUL will be an important repository for CAZyme and PUL research, complementing our other web servers and databases (dbCAN2, dbCAN-seq).",dbCAN-PUL,0.993980992,NA,0,dbCAN-PUL,0.993980992,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +30053267,http://cys.bios.niu.edu/dbCAN_seq,"dbCAN-seq: a database of carbohydrate-active enzyme (CAZyme) sequence and annotation. Carbohydrate-active enzyme (CAZymes) are not only the most important enzymes for bioenergy and agricultural industries, but also very important for human health, in that human gut microbiota encode hundreds of CAZyme genes in their genomes for degrading various dietary and host carbohydrates. We have built an online database dbCAN-seq (http://cys.bios.niu.edu/dbCAN_seq) to provide pre-computed CAZyme sequence and annotation data for 5,349 bacterial genomes. Compared to the other CAZyme resources, dbCAN-seq has the following new features: (i) a convenient download page to allow batch download of all the sequence and annotation data; (ii) an annotation page for every CAZyme to provide the most comprehensive annotation data; (iii) a metadata page to organize the bacterial genomes according to species metadata such as disease, habitat, oxygen requirement, temperature, metabolism; (iv) a very fast tool to identify physically linked CAZyme gene clusters (CGCs) and (v) a powerful search function to allow fast and efficient data query. With these unique utilities, dbCAN-seq will become a valuable web resource for CAZyme research, with a focus complementary to dbCAN (automated CAZyme annotation server) and CAZy (CAZyme family classification and reference database).",dbCAN-seq,0.997396141,NA,0,dbCAN-seq,0.997396141,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +21214365,http://dbcat.cgm.ntu.edu.tw,"DBCAT: database of CpG islands and analytical tools for identifying comprehensive methylation profiles in cancer cells. DBCAT (database of CpG islands and analytical tools, http://dbcat.cgm.ntu.edu.tw/ ), developed to characterize comprehensive DNA methylation profiles in human cancers, is a web-based application and methylation database containing several convenient tools for investigating epigenetic regulation in human diseases. To our knowledge, DBCAT is the first online methylation analytical tool, and is composed of three parts: a CpG island finder, a genome query browser, and a tool for analyzing methylation microarray data. The analytical tools can quickly identify genes with methylated regions from microarray data, compare the methylation status changes between different arrays, and provide functional analysis in addition to colocalizing transcription factor binding sites.",DBCAT,0.998600185,database of CpG,0.804415733,DBCAT,0.998600185,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/8/2011 +24918550,http://128.135.207.10/dbCerEx,"dbCerEx: a web-based database for the analysis of cervical cancer transcriptomes. Background Cervical cancers are ranked the second-most hazardous ailments among women worldwide. In the past two decades, microarray technologies have been applied to study genes involved in malignancy progress. However, in most of the published microarray studies, only a few genes were reported leaving rather a large amount of data unused. Also, RNA-Seq data has become more standard for transcriptome analysis and is widely applied in cancer studies. There is a growing demand for a tool to help the experimental researchers who are keen to explore cervical cancer gene therapy, but lack computer expertise to access and analyze the high throughput gene expression data. Description The dbCerEx database is designed to retrieve and process gene expression data from cervical cancer samples. It includes the genome wide expression profiles of cervical cancer samples, as well as a web utility to cluster genes with similar expression patterns. This feature will help researchers conduct further research to uncover novel gene functions. Conclusion The dbCerEx database is freely available for non-commercial use at http://128.135.207.10/dbCerEx/, and will be updated and integrated with more features as needed.",dbCerEx,0.99592483,NA,0,dbCerEx,0.99592483,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/11/2014 +30016397,http://bioinfo.ahu.edu.cn:8080/dbCID,"dbCID: a manually curated resource for exploring the driver indels in human cancer. While recent advances in next-generation sequencing technologies have enabled the creation of a multitude of databases in cancer genomic research, there is no comprehensive database focusing on the annotation of driver indels (insertions and deletions) yet. Therefore, we have developed the database of Cancer driver InDels (dbCID), which is a collection of known coding indels that likely to be engaged in cancer development, progression or therapy. dbCID contains experimentally supported and putative driver indels derived from manual curation of literature and is freely available online at http://bioinfo.ahu.edu.cn:8080/dbCID. Using the data deposited in dbCID, we summarized features of driver indels in four levels (gene, DNA, transcript and protein) through comparing with putative neutral indels. We found that most of the genes containing driver indels in dbCID are known cancer genes playing a role in tumorigenesis. Contrary to the expectation, the sequences affected by driver frameshift indels are not larger than those by neutral ones. In addition, the frameshift and inframe driver indels prefer to disrupt high-conservative regions both in DNA sequences and protein domains. Finally, we developed a computational method for discriminating cancer driver from neutral frameshift indels based on the deposited data in dbCID. The proposed method outperformed other widely used non-cancer-specific predictors on an external test set, which demonstrated the usefulness of the data deposited in dbCID. We hope dbCID will be a benchmark for improving and evaluating prediction algorithms, and the characteristics summarized here may assist with investigating the mechanism of indel-cancer association.",dbCID,0.99611038,database of Cancer driver InDels,0.814343606,dbCID,0.99611038,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2019 +33196844,http://yamasati.nig.ac.jp/dbcns,"dbCNS: A New Database for Conserved Noncoding Sequences. We developed dbCNS (http://yamasati.nig.ac.jp/dbcns), a new database for conserved noncoding sequences (CNSs). CNSs exist in many eukaryotes and are assumed to be involved in protein expression control. Version 1 of dbCNS, introduced here, includes a powerful and precise CNS identification pipeline for multiple vertebrate genomes. Mutations in CNSs may induce morphological changes and cause genetic diseases. For this reason, many vertebrate CNSs have been identified, with special reference to primate genomes. We integrated √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº6.9 million CNSs from many vertebrate genomes into dbCNS, which allows users to extract CNSs near genes of interest using keyword searches. In addition to CNSs, dbCNS contains published genome sequences of 161 species. With purposeful taxonomic sampling of genomes, users can employ CNSs as queries to reconstruct CNS alignments and phylogenetic trees, to evaluate CNS modifications, acquisitions, and losses, and to roughly identify species with CNSs having accelerated substitution rates. dbCNS also produces links to dbSNP for searching pathogenic single-nucleotide polymorphisms in human CNSs. Thus, dbCNS connects morphological changes with genetic diseases. A test analysis using 38 gnathostome genomes was accomplished within 30√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâs. dbCNS results can evaluate CNSs identified by other stand-alone programs using genome-scale data.",dbCNS,0.997333527,NA,0,dbCNS,0.997333527,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2021 +28977473,http://dbcorc.cam-su.org,"dbCoRC: a database of core transcriptional regulatory circuitries modeled by H3K27ac ChIP-seq signals. Core transcription regulatory circuitry (CRC) is comprised of a small group of self-regulated transcription factors (TFs) and their interconnected regulatory loops. Studies from embryonic stem cells and other cellular models have revealed the elementary roles of CRCs in transcriptional control of cell identity and cellular fate. Systematic identification and subsequent archiving of CRCs across diverse cell types and tissues are needed to explore both cell/tissue type-specific and disease-associated transcriptional networks. Here, we present a comprehensive and interactive database (dbCoRC, http://dbcorc.cam-su.org) of CRC models which are computationally inferred from mapping of super-enhancer and prediction of TF binding sites. The current version of dbCoRC contains CRC models for 188 human and 50 murine cell lines/tissue samples. In companion with CRC models, this database also provides: (i) super enhancer, typical enhancer, and H3K27ac landscape for individual samples, (ii) putative binding sites of each core TF across the super-enhancer regions within CRC and (iii) expression of each core TF in normal or cancer cells/tissues. The dbCoRC will serve as a valuable resource for the scientific community to explore transcriptional control and regulatory circuitries in biological processes related to, but not limited to lineage specification, tissue homeostasis and tumorigenesis.",dbCoRC,0.997102678,NA,0,dbCoRC,0.997102678,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +33276297,http://covp.immt.res.in,"DBCOVP: A database of coronavirus virulent glycoproteins. Since the emergence of SARS-CoV-1 (2002), novel coronaviruses have emerged periodically like the MERS- CoV (2012) and now, the SARS-CoV-2 outbreak which has posed a global threat to public health. Although, this is the third zoonotic coronavirus breakout within the last two decades, there are only a few platforms that provide information about coronavirus genomes. None of them is specific for the virulence glycoproteins and complete sequence-structural features of these virulence factors across the betacoronavirus family including SARS-CoV-2 strains are lacking. Against this backdrop, we present DBCOVP (http://covp.immt.res.in/), the first manually-curated, web-based resource to provide extensive information on the complete repertoire of structural virulent glycoproteins from coronavirus genomes belonging to betacoronavirus genera. The database provides various sequence-structural properties in which users can browse and analyze information in different ways. Furthermore, many conserved T-cell and B-cell epitopes predicted for each protein are present that may perform a significant role in eliciting the humoral and cellular immune response. The tertiary structure of the epitopes together with the docked epitope-HLA binding-complex is made available to facilitate further analysis. DBCOVP presents an easy-to-use interface with in-built tools for similarity search, cross-genome comparison, phylogenetic, and multiple sequence alignment. DBCOVP will certainly be an important resource for experimental biologists engaged in coronavirus research studies and will aid in vaccine development.",DBCOVP,0.997579992,NA,0,DBCOVP,0.997579992,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2020 +27192119,http://bioinfo.ahu.edu.cn:8080/dbCPG/index.jsp,"dbCPG: A web resource for cancer predisposition genes. Cancer predisposition genes (CPGs) are genes in which inherited mutations confer highly or moderately increased risks of developing cancer. Identification of these genes and understanding the biological mechanisms that underlie them is crucial for the prevention, early diagnosis, and optimized management of cancer. Over the past decades, great efforts have been made to identify CPGs through multiple strategies. However, information on these CPGs and their molecular functions is scattered. To address this issue and provide a comprehensive resource for researchers, we developed the Cancer Predisposition Gene Database (dbCPG, Database URL: http://bioinfo.ahu.edu.cn:8080/dbCPG/index.jsp), the first literature-based gene resource for exploring human CPGs. It contains 827 human (724 protein-coding, 23 non-coding, and 80 unknown type genes), 637 rats, and 658 mouse CPGs. Furthermore, data mining was performed to gain insights into the understanding of the CPGs data, including functional annotation, gene prioritization, network analysis of prioritized genes and overlap analysis across multiple cancer types. A user-friendly web interface with multiple browse, search, and upload functions was also developed to facilitate access to the latest information on CPGs. Taken together, the dbCPG database provides a comprehensive data resource for further studies of cancer predisposition genes.",dbCPG,0.991826773,Cancer Predisposition Gene Database,0.988404105,dbCPG,0.991826773,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2016 +30379998,http://bioinfo.ahu.edu.cn:8080/dbCPM,"dbCPM: a manually curated database for exploring the cancer passenger mutations. . While recently emergent driver mutation data sets are available for developing computational methods to predict cancer mutation effects, benchmark sets focusing on passenger mutations are largely missing. Here, we developed a comprehensive literature-based database of Cancer Passenger Mutations (dbCPM), which contains 941 experimentally supported and 978 putative passenger mutations derived from a manual curation of the literature. Using the missense mutation data, the largest group in the dbCPM, we explored patterns of missense passenger mutations by comparing them with the missense driver mutations and assessed the performance of four cancer-focused mutation effect predictors. We found that the missense passenger mutations showed significant differences with drivers at multiple levels, and several appeared in both the passenger and driver categories, showing pleiotropic functions depending on the tumor context. Although all the predictors displayed good true positive rates, their true negative rates were relatively low due to the lack of negative training samples with experimental evidence, which suggests that a suitable negative data set for developing a more robust methodology is needed. We hope that the dbCPM will be a benchmark data set for improving and evaluating prediction algorithms and serve as a valuable resource for the cancer research community. dbCPM is freely available online at http://bioinfo.ahu.edu.cn:8080/dbCPM.",dbCPM,0.986568928,of Cancer Passenger Mutations,0.904574347,dbCPM,0.986568928,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/30/2018 +29860480,http://bioinfo.ahu.edu.cn,"dbCRSR: a manually curated database for regulation of cancer radiosensitivity. . Radiotherapy is used to treat approximately 50% of all cancer patients, with varying prognoses. Intrinsic radiosensitivity is an important factor underlying the radiotherapeutic efficacy of this precise treatment. During the past decades, great efforts have been made to improve radiotherapy treatment through multiple strategies. However, invaluable data remains buried in the extensive radiotherapy literature, making it difficult to obtain an overall view of the detailed mechanisms leading to radiosensitivity, thus limiting advances in radiotherapy. To address this issue, we collected data from the relevant literature contained in the PubMed database and developed a literature-based database that we term the cancer radiosensitivity regulation factors database (dbCRSR). dbCRSR is a manually curated catalogue of radiosensitivity, containing multiple radiosensitivity regulation factors (395 coding genes, 119 non-coding RNAs and 306 chemical compounds) with appropriate annotation. To illustrate the value of the data we collected, data mining was performed including functional annotation and network analysis. In summary, dbCRSR is the first literature-based database to focus on radiosensitivity and provides a resource to better understand the detailed mechanisms of radiosensitivity. We anticipate dbCRSR will be a useful resource to enrich our knowledge and to promote further study of radiosensitivity.Database URL: http://bioinfo.ahu.edu.cn: 8080/dbCRSR/.",dbCRSR,0.995114565,cancer radiosensitivity regulation factors database,0.697191248,dbCRSR,0.995114565,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24700709,http://www.dbdb.urmc.rochester.edu/home,"The Developmental Brain Disorders Database (DBDB): a curated neurogenetics knowledge base with clinical and research applications. The number of single genes associated with neurodevelopmental disorders has increased dramatically over the past decade. The identification of causative genes for these disorders is important to clinical outcome as it allows for accurate assessment of prognosis, genetic counseling, delineation of natural history, inclusion in clinical trials, and in some cases determines therapy. Clinicians face the challenge of correctly identifying neurodevelopmental phenotypes, recognizing syndromes, and prioritizing the best candidate genes for testing. However, there is no central repository of definitions for many phenotypes, leading to errors of diagnosis. Additionally, there is no system of levels of evidence linking genes to phenotypes, making it difficult for clinicians to know which genes are most strongly associated with a given condition. We have developed the Developmental Brain Disorders Database (DBDB: https://www.dbdb.urmc.rochester.edu/home), a publicly available, online-curated repository of genes, phenotypes, and syndromes associated with neurodevelopmental disorders. DBDB contains the first referenced ontology of developmental brain phenotypes, and uses a novel system of levels of evidence for gene-phenotype associations. It is intended to assist clinicians in arriving at the correct diagnosis, select the most appropriate genetic test for that phenotype, and improve the care of patients with developmental brain disorders. For researchers interested in the discovery of novel genes for developmental brain disorders, DBDB provides a well-curated source of important genes against which research sequencing results can be compared. Finally, DBDB allows novel observations about the landscape of the neurogenetics knowledge base.",DBDB,0.992844542,Developmental Brain Disorders Database,0.949120533,DBDB,0.992844542,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/3/2014 +27899556,http://www.picb.ac.cn/dbDEMC,"dbDEMC 2.0: updated database of differentially expressed miRNAs in human cancers. MicroRNAs (miRNAs) are often deregulated in cancer and are thought to play an important role in cancer development. Large amount of differentially expressed miRNAs have been identified in various cancers by using high-throughput methods. It is therefore quite important to make a comprehensive collection of these miRNAs and to decipher their roles in oncogenesis and tumor progression. In 2010, we presented the first release of dbDEMC, representing a database for collection of differentially expressed miRNAs in human cancers obtained from microarray data. Here we describe an update of the database. dbDEMC 2.0 documents 209 expression profiling data sets across 36 cancer types and 73 subtypes, and a total of 2224 differentially expressed miRNAs were identified. An easy-to-use web interface was constructed that allows users to make a quick search of the differentially expressed miRNAs in certain cancer types. In addition, a new function of 'meta-profiling' was added to view differential expression events according to user-defined miRNAs and cancer types. We expect this database to continue to serve as a valuable source for cancer investigation and potential clinical application related to miRNAs. dbDEMC 2.0 is freely available at http://www.picb.ac.cn/dbDEMC.",dbDEMC,0.986323833,NA,0,dbDEMC,0.986323833,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +22096234,http://lifecenter.sgst.cn/dbdepc/index.do,"dbDEPC 2.0: updated database of differentially expressed proteins in human cancers. A large amount of differentially expressed proteins (DEPs) have been identified in various cancer proteomics experiments, curation and annotation of these proteins are important in deciphering their roles in oncogenesis and tumor progression, and may further help to discover potential protein biomarkers for clinical applications. In 2009, we published the first database of DEPs in human cancers (dbDEPCs). In this updated version of 2011, dbDEPC 2.0 has more than doubly expanded to over 4000 protein entries, curated from 331 experiments across 20 types of human cancers. This resource allows researchers to search whether their interested proteins have been reported changing in certain cancers, to compare their own proteomic discovery with previous studies, to picture selected protein expression heatmap across multiple cancers and to relate protein expression changes with aberrance in other genetic level. New important developments include addition of experiment design information, advanced filter tools for customer-specified analysis and a network analysis tool. We expect dbDEPC 2.0 to be a much more powerful tool than it was in its first release and can serve as reference to both proteomics and cancer researchers. dbDEPC 2.0 is available at http://lifecenter.sgst.cn/dbdepc/index.do.",dbDEPC,0.980938792,NA,0,dbDEPC,0.980938792,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/16/2011 +22917656,http://www.juit.ac.in/attachments/dbdiarrhea/diarrhea_home.html,"dbDiarrhea: the database of pathogen proteins and vaccine antigens from diarrheal pathogens. Diarrhea occurs world-wide and is most commonly caused by gastrointestinal infections which kill around 2.2 million people globally each year, mostly children in developing countries. We describe here dbDiarrhea, which is currently the most comprehensive catalog of proteins implicated in the pathogenesis of diarrhea caused by major bacterial, viral and parasitic species. The current release of the database houses 820 proteins gleaned through an extensive and critical survey of research articles from PubMed. The major contributors to this compendium of proteins are Escherichia coli and Salmonella enterica. These proteins are classified into different categories such as Type III secretion system effectors, Type III secretion system components, and Pathogen proteins. There is another complementary module called 'Host proteins'. dbDiarrhea also serves as a repository of the research articles describing (1) trials of subunit and whole organism vaccines (2) high-throughput screening of Type III secretion system inhibitors and (3) diagnostic assays, for various diarrheal pathogens. The database is web accessible through an intuitive user interface that allows querying proteins and research articles for different organism, keywords and accession number. Besides providing the search facility through browsing, the database supports sequence similarity search with the BLAST tool. With the rapidly burgeoning global burden of the diarrhea, we anticipate that this database would serve as a source of useful information for furthering research on diarrhea. The database can be freely accessed at http://www.juit.ac.in/attachments/dbdiarrhea/diarrhea_home.html.",dbDiarrhea,0.997005939,NA,0,dbDiarrhea,0.997005939,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/13/2012 +25978092,http://www.juit.ac.in/attachments/dbdiasnp,"DBDiaSNP: An Open-Source Knowledgebase of Genetic Polymorphisms and Resistance Genes Related to Diarrheal Pathogens. Diarrhea is a highly common infection among children, responsible for significant morbidity and mortality rate worldwide. After pneumonia, diarrhea remains the second leading cause of neonatal deaths. Numerous viral, bacterial, and parasitic enteric pathogens are associated with diarrhea. With increasing antibiotic resistance among enteric pathogens, there is an urgent need for global surveillance of the mutations and resistance genes primarily responsible for resistance to antibiotic treatment. Single Nucleotide Polymorphisms are important in this regard as they have a vast potential to be utilized as molecular diagnostics for gene-disease or pharmacogenomics association studies linking genotype to phenotype. DBDiaSNP is a comprehensive repository of mutations and resistance genes among various diarrheal pathogens and hosts to advance breakthroughs that will find applications from development of sequence-based diagnostic tools to drug discovery. It contains information about 946 mutations and 326 resistance genes compiled from literature and various web resources. As of March 2015, it houses various pathogen genes and the mutations responsible for antibiotic resistance. The pathogens include, for example, DEC (Diarrheagenic E.coli), Salmonella spp., Campylobacter spp., Shigella spp., Clostridium difficile, Aeromonas spp., Helicobacter pylori, Entamoeba histolytica, Vibrio cholera, and viruses. It also includes mutations from hosts (e.g., humans, pigs, others) that render them either susceptible or resistant to a certain type of diarrhea. DBDiaSNP is therefore intended as an integrated open access database for researchers and clinicians working on diarrheal diseases. Additionally, we note that the DBDiaSNP is one of the first antibiotic resistance databases for the diarrheal pathogens covering mutations and resistance genes that have clinical relevance from a broad range of pathogens and hosts. For future translational research involving integrative biology and global health, the database offers veritable potentials, particularly for developing countries and worldwide monitoring and personalized effective treatment of pathogens associated with diarrhea. The database is accessible on the public domain at http://www.juit.ac.in/attachments/dbdiasnp/ .",DBDiaSNP,0.997262299,NA,0,DBDiaSNP,0.997262299,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/15/2015 +27153700,http://bioinfo.ahu.edu.cn:8080/dbDSM/index.jsp,"dbDSM: a manually curated database for deleterious synonymous mutations. Motivation Synonymous mutations (SMs), which changed the sequence of a gene without directly altering the amino acid sequence of the encoded protein, were thought to have no functional consequences for a long time. They are often assumed to be neutral in models of mutation and selection and were completely ignored in many studies. However, accumulating experimental evidence has demonstrated that these mutations exert their impact on gene functions via splicing accuracy, mRNA stability, translation fidelity, protein folding and expression, and some of these mutations are implicated in human diseases. To the best of our knowledge, there is still no database specially focusing on disease-related SMs. Results We have developed a new database called dbDSM (database of Deleterious Synonymous Mutation), a continually updated database that collects, curates and manages available human disease-related SM data obtained from published literature. In the current release, dbDSM collects 1936 SM-disease association entries, including 1289 SMs and 443 human diseases from ClinVar, GRASP, GWAS Catalog, GWASdb, PolymiRTS database, PubMed database and Web of Knowledge. Additionally, we provided users a link to download all the data in the dbDSM and a link to submit novel data into the database. We hope dbDSM will be a useful resource for investigating the roles of SMs in human disease. Availability and implementation dbDSM is freely available online at http://bioinfo.ahu.edu.cn:8080/dbDSM/index.jsp with all major browser supported. Contact jfxia@ahu.edu.cn Supplementary information Supplementary data are available at Bioinformatics online.",dbDSM,0.997294545,database of Deleterious Synonymous Mutation,0.933270373,dbDSM,0.997294545,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/15/2016 +26099468,http://dbemt.bioinfo-minzhao.org,"dbEMT: an epithelial-mesenchymal transition associated gene resource. As a cellular process that changes epithelial cells to mesenchymal cells, Epithelial-mesenchymal transition (EMT) plays important roles in development and cancer metastasis. Recent studies on cancer metastasis have identified many new susceptibility genes that control this transition. However, there is no comprehensive resource for EMT by integrating various genetic studies and the relationship between EMT and the risk of complex diseases such as cancer are still unclear. To investigate the cellular complexity of EMT, we have constructed dbEMT (http://dbemt.bioinfo-minzhao.org/), the first literature-based gene resource for exploring EMT-related human genes. We manually curated 377 experimentally verified genes from literature. Functional analyses highlighted the prominent role of proteoglycans in tumor metastatic cascades. In addition, the disease enrichment analysis provides a clue for the potential transformation in affected tissues or cells in Alzheimer's disease and Type 2 Diabetes. Moreover, the global mutation pattern of EMT-related genes across multiple cancers may reveal common cancer metastasis mechanisms. Our further reconstruction of the EMT-related protein-protein interaction network uncovered a highly modular structure. These results illustrate the importance of dbEMT to our understanding of cell development and cancer metastasis, and also highlight the utility of dbEMT for elucidating the functions of EMT-related genes.",dbEMT,0.992519438,NA,0,dbEMT,0.992519438,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/23/2015 +26577058,http://dbendo.charite.de,"DBEndo: a web-based endodontic case management tool. Background The success of endodontic treatment depends-among many other factors-on good documentation. Paper-based records are often difficult to read or incomplete and commercially available tools focus on billing. An electronic record captures the state of treatment at all times. Databases are a common tool in everyday life. Results Here, we present a database created for the Charit√ɬÉ√ǬÉ√ɬÇ√Ǭ©-Universit√ɬÉ√ǬÉ√ɬÇ√Ǭ§tsmedizin Berlin, Germany. Through consistent digital documentation, data analytics of patients, root canal anatomies, instrumentation techniques, efficacy of chemical disinfection, root filling techniques, and corresponding recall success rates, which needed extensive research before, are now easy to perform. Tables and even graphics and data analystics are only one click away and can be exported to other programs. Conclusions DBEndo is a database to store and visualise internally, as well as to share endodontic cases online. For academic use we provide the database including all forms and some anonymous data for free at: http://dbendo.charite.de . Through easy import and export of the data, the system is open and flexible.",DBEndo,0.913484514,NA,0,DBEndo,0.913484514,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/17/2015 +22102573,http://www.hpppi.iicb.res.in/btox,"DBETH: a Database of Bacterial Exotoxins for Human. Pathogenic bacteria produce protein toxins to survive in the hostile environments defined by the host's defense systems and immune response. Recent progresses in high-throughput genome sequencing and structure determination techniques have contributed to a better understanding of mechanisms of action of the bacterial toxins at the cellular and molecular levels leading to pathogenicity. It is fair to assume that with time more and more unknown toxins will emerge not only by the discovery of newer species but also due to the genetic rearrangement of existing bacterial genomes. Hence, it is crucial to organize a systematic compilation and subsequent analyses of the inherent features of known bacterial toxins. We developed a Database for Bacterial ExoToxins (DBETH, http://www.hpppi.iicb.res.in/btox/), which contains sequence, structure, interaction network and analytical results for 229 toxins categorized within 24 mechanistic and activity types from 26 bacterial genuses. The main objective of this database is to provide a comprehensive knowledgebase for human pathogenic bacterial toxins where various important sequence, structure and physico-chemical property based analyses are provided. Further, we have developed a prediction server attached to this database which aims to identify bacterial toxin like sequences either by establishing homology with known toxin sequences/domains or by classifying bacterial toxin specific features using a support vector based machine learning techniques.",DBETH,0.990900735,Database for Bacterial ExoToxins,0.92325345,DBETH,0.990900735,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2011 +24297256,http://www.ncbi.nlm.nih.gov/gap,"NCBI's Database of Genotypes and Phenotypes: dbGaP. The Database of Genotypes and Phenotypes (dbGap, http://www.ncbi.nlm.nih.gov/gap) is a National Institutes of Health-sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of genotype and phenotype. Information in dbGaP is organized as a hierarchical structure and includes the accessioned objects, phenotypes (as variables and datasets), various molecular assay data (SNP and Expression Array data, Sequence and Epigenomic marks), analyses and documents. Publicly accessible metadata about submitted studies, summary level data, and documents related to studies can be accessed freely on the dbGaP website. Individual-level data are accessible via Controlled Access application to scientists across the globe.",dbGaP,0.997724652,Database of Genotypes and Phenotypes,0.917498708,dbGaP,0.997724652,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2013 +29031638,http://www.bmicnip.in/dbgaps,"dbGAPs: A comprehensive database of genes and genetic markers associated with psoriasis and its subtypes. . Psoriasis is a systemic hyperproliferative inflammatory skin disorder, although rarely fatal but significantly reduces quality of life. Understanding the full genetic component of the disease association may provide insight into biological pathways as well as targets and biomarkers for diagnosis, prognosis and therapy. Studies related to psoriasis associated genes and genetic markers are scattered and not easily amendable to data-mining. To alleviate difficulties, we have developed dbGAPs an integrated knowledgebase representing a gateway to psoriasis associated genomic data. The database contains annotation for 202 manually curated genes associated with psoriasis and its subtypes with cross-references. Functional enrichment of these genes, in context of Gene Ontology and pathways, provide insight into their important role in psoriasis etiology and pathogenesis. The dbGAPs interface is enriched with an interactive search engine for data retrieval along with unique customized tools for Single Nucleotide Polymorphism (SNP)/indel detection and SNP/indel annotations. dbGAPs is accessible at http://www.bmicnip.in/dbgaps/.",dbGAPs,0.978835642,NA,0,dbGAPs,0.978835642,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/12/2017 +26566288,http://bminfor.tongji.edu.cn/dbgc/index.do,"DBGC: A Database of Human Gastric Cancer. The Database of Human Gastric Cancer (DBGC) is a comprehensive database that integrates various human gastric cancer-related data resources. Human gastric cancer-related transcriptomics projects, proteomics projects, mutations, biomarkers and drug-sensitive genes from different sources were collected and unified in this database. Moreover, epidemiological statistics of gastric cancer patients in China and clinicopathological information annotated with gastric cancer cases were also integrated into the DBGC. We believe that this database will greatly facilitate research regarding human gastric cancer in many fields. DBGC is freely available at http://bminfor.tongji.edu.cn/dbgc/index.do.",DBGC,0.987804174,Database of Human Gastric Cancer,0.921517382,DBGC,0.987804174,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/13/2015 +24790154,http://csb.cse.yzu.edu.tw/dbGSH,"dbGSH: a database of S-glutathionylation. Unlabelled S-glutathionylation, the reversible protein posttranslational modification (PTM) that generates a mixed disulfide bond between glutathione and cysteine residue, critically regulates protein activity, stability and redox regulation. Due to its importance in regulating oxidative/nitrosative stress and balance in cellular response, a number of methods have been rapidly developed to study S-glutathionylation, thus expanding the dataset of experimentally determined glutathionylation sites. However, there is currently no database dedicated to the integration of all experimentally verified S-glutathionylation sites along with their characteristics or structural or functional information. Thus, the dbGSH database has been created to integrate all available datasets and to provide the relevant structural analysis. As of January 31, 2014, dbGSH has manually collected >2200 experimentally verified S-glutathionylated peptides from 169 research articles using a text-mining approach. To solve the problem of heterogeneity of the data collected from different sources, the sequence identity of the reported S-glutathionylated peptides is mapped to UniProtKB protein entries. To delineate the structural correlations and consensus motifs of these S-glutathionylation sites, the dbGSH database also provides structural and functional analyses, including the motifs of substrate sites, solvent accessibility, protein secondary and tertiary structures, protein domains and gene ontology. Availability and implementation dbGSH is now freely accessible at http://csb.cse.yzu.edu.tw/dbGSH/. The database content is regularly updated with new data collected by the continuous survey of research articles.",dbGSH,0.994318962,NA,0,dbGSH,0.994318962,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/29/2014 +33051688,http://sgrnascorer.cancer.gov/dbguide,"dbGuide: a database of functionally validated guide RNAs for genome editing in human and mouse cells. With the technology's accessibility and ease of use, CRISPR has been employed widely in many different organisms and experimental settings. As a result, thousands of publications have used CRISPR to make specific genetic perturbations, establishing in itself a resource of validated guide RNA sequences. While numerous computational tools to assist in the design and identification of candidate guide RNAs exist, these are still just at best predictions and generally, researchers inevitably will test multiple sequences for functional activity. Here, we present dbGuide (https://sgrnascorer.cancer.gov/dbguide), a database of functionally validated guide RNA sequences for CRISPR/Cas9-based knockout in human and mouse. Our database not only contains computationally determined candidate guide RNA sequences, but of even greater value, over 4000 sequences which have been functionally validated either through direct amplicon sequencing or manual curation of literature from over 1000 publications. Finally, our established framework will allow for continual addition of newly published and experimentally validated guide RNA sequences for CRISPR/Cas9-based knockout as well as incorporation of sequences from different gene editing systems, additional species and other types of site-specific functionalities such as base editing, gene activation, repression and epigenetic modification.",dbGuide,0.996610284,NA,0,dbGuide,0.996610284,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +21936021,http://GenetMed.fudan.edu.cn/dbHCCvar,"dbHCCvar: a comprehensive database of human genetic variations in hepatocellular carcinoma. Hepatocellular carcinoma (HCC) is a common cancer with a high mortality rate. The complete pathogenesis of HCC is not completely understood, and highly efficient therapy is still unavailable. In the past several decades, various genetic variations such as mutations and polymorphisms have been reported to be associated with HCC risk, progression, survival, and recurrence. However, to our knowledge, these genetic variations have not been comprehensively and systematically compiled. In this study we constructed dbHCCvar, a free online database of human genetic variations in HCC. Eligible publications were collected from PubMed, and detailed information and major research data from each eligible study were then extracted and recorded in our database. As a result, dbHCCvar contains almost all human genetic variations reported to be associated or not associated with HCC risk, clinical pathology, drug reaction, survival, or recurrence to date. It is expected that dbHCCvar will function as a useful tool for researchers to facilitate the search and identification of new genetic markers for HCC. dbHCCvar is free for all visitors at http://GenetMed.fudan.edu.cn/dbHCCvar.",dbHCCvar,0.986464977,NA,0,dbHCCvar,0.986464977,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/20/2011 +30665056,http://DeepLearner.ahu.edu.cn/web/dbDPLS,"dbHDPLS: A database of human disease-related protein-ligand structures. Protein-ligand complexes perform specific functions, most of which are related to human diseases. The database, called as human disease-related protein-ligand structures (dbHDPLS), collected 8833 structures which were extracted from protein data bank (PDB) and other related databases. The database is annotated with comprehensive information involving ligands and drugs, related human diseases and protein-ligand interaction information, with the information of protein structures. The database may be a reliable resource for structure-based drug target discoveries and druggability predictions of protein-ligand binding sites, drug-disease relationships based on protein-ligand complex structures. It can be publicly accessed at the website: http://DeepLearner.ahu.edu.cn/web/dbDPLS/.",dbHDPLS,0.9490378,human disease-related protein-ligand structures,0.909200006,dbHDPLS,0.9490378,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/11/2019 +26055100,http://hme.riceblast.snu.ac.kr,"dbHiMo: a web-based epigenomics platform for histone-modifying enzymes. Over the past two decades, epigenetics has evolved into a key concept for understanding regulation of gene expression. Among many epigenetic mechanisms, covalent modifications such as acetylation and methylation of lysine residues on core histones emerged as a major mechanism in epigenetic regulation. Here, we present the database for histone-modifying enzymes (dbHiMo; http://hme.riceblast.snu.ac.kr/) aimed at facilitating functional and comparative analysis of histone-modifying enzymes (HMEs). HMEs were identified by applying a search pipeline built upon profile hidden Markov model (HMM) to proteomes. The database incorporates 11,576 HMEs identified from 603 proteomes including 483 fungal, 32 plants and 51 metazoan species. The dbHiMo provides users with web-based personalized data browsing and analysis tools, supporting comparative and evolutionary genomics. With comprehensive data entries and associated web-based tools, our database will be a valuable resource for future epigenetics/epigenomics studies.",dbHiMo,0.996748298,database for histone-modifying enzymes,0.902459852,dbHiMo,0.996748298,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/8/2015 +31603498,http://enhancer-indel.cam-su.org,"dbInDel: a database of enhancer-associated insertion and deletion variants by analysis of H3K27ac ChIP-Seq. Summary Cancer hallmarks rely on its specific transcriptional programs, which are dysregulated by multiple mechanisms, including genomic aberrations in the DNA regulatory regions. Genome-wide association studies have shown many variants are found within putative enhancer elements. To provide insights into the regulatory role of enhancer-associated non-coding variants in cancer epigenome, and to facilitate the identification of functional non-coding mutations, we present dbInDel, a database where we have comprehensively analyzed enhancer-associated insertion and deletion variants for both human and murine samples using ChIP-Seq data. Moreover, we provide the identification and visualization of upstream TF binding motifs in InDel-containing enhancers. Downstream target genes are also predicted and analyzed in the context of cancer biology. The dbInDel database promotes the investigation of functional contributions of non-coding variants in cancer epigenome. Availability and implementation The database, dbInDel, can be accessed from http://enhancer-indel.cam-su.org/. Supplementary information Supplementary data are available at Bioinformatics online.",dbInDel,0.994747579,NA,0,dbInDel,0.994747579,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2020 +29961819,http://soft.bioinfo-minzhao.org/lgl,"dbLGL: an online leukemia gene and literature database for the retrospective comparison of adult and childhood leukemia genetics with literature evidence. . Leukemia is a group of cancers with increased numbers of immature or abnormal leucocytes that originated in the bone marrow and other blood-forming organs. The development of differentially diagnostic biomarkers for different subtypes largely depends on understanding the biological pathways and regulatory mechanisms associated with leukemia-implicated genes. Unfortunately, the leukemia-implicated genes that have been identified thus far are scattered among thousands of published studies, and no systematic summary of the differences between adult and childhood leukemia exists with regard to the causative genetic mutations and genetic mechanisms of the various subtypes. In this study, we performed a systematic literature review of those susceptibility genes reported in small-scale experiments and built an online gene database containing a total of 1805 leukemia-associated genes, available at http://soft.bioinfo-minzhao.org/lgl/. Our comparison of genes from the four primary subtypes and between adult and childhood cases identified a number of potential genes related to patient survival. These curated genes can satisfy a growing demand for further integrating genomics screening for leukemia-associated low-frequency mutated genes.Database URL: http://soft.bioinfo-minzhao.org/lgl/.",dbLGL,0.99372381,NA,0,dbLGL,0.99372381,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24434032,http://iae.fafu.edu.cn/DBM,"DBM-DB: the diamondback moth genome database. The diamondback moth Genome Database (DBM-DB) is a central online repository for storing and integrating genomic data of diamondback moth (DBM), Plutella xylostella (L.). It provides comprehensive search tools and downloadable datasets for scientists to study comparative genomics, biological interpretation and gene annotation of this insect pest. DBM-DB contains assembled transcriptome datasets from multiple DBM strains and developmental stages, and the annotated genome of P. xylostella (version 2). We have also integrated publically available ESTs from NCBI and a putative gene set from a second DBM genome (KONAGbase) to enable users to compare different gene models. DBM-DB was developed with the capacity to incorporate future data resources, and will serve as a long-term and open-access database that can be conveniently used for research on the biology, distribution and evolution of DBM. This resource aims to help reduce the impact DBM has on agriculture using genomic and molecular tools. Database URL: http://iae.fafu.edu.cn/DBM/",DBM-DB,0.997007683,diamondback moth Genome Database,0.987979064,DBM-DB,0.997007683,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/16/2014 +26503248,http://mae.hms.harvard.edu,"dbMAE: the database of autosomal monoallelic expression. Recently, data on 'random' autosomal monoallelic expression has become available for the entire genome in multiple human and mouse tissues and cell types, creating a need for better access and dissemination. The database of autosomal monoallelic expression (dbMAE; https://mae.hms.harvard.edu) incorporates data from multiple recent reports of genome-wide analyses. These include transcriptome-wide analyses of allelic imbalance in clonal cell populations based on sequence polymorphisms, as well as indirect identification, based on a specific chromatin signature present in MAE gene bodies. Currently, dbMAE contains transcriptome-wide chromatin identification calls for 8 human and 21 mouse tissues, and describes over 16 000 murine and √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº 700 human cases of directly measured biased expression, compiled from allele-specific RNA-seq and genotyping array data. All data are manually curated. To ensure cross-publication uniformity, we performed re-analysis of transcriptome-wide RNA-seq data using the same pipeline. Data are accessed through an interface that allows for basic and advanced searches; all source references, including raw data, are clearly described and hyperlinked. This ensures the utility of the resource as an initial screening tool for those interested in investigating the role of monoallelic expression in their specific genes and tissues of interest.",dbMAE,0.997952342,database of autosomal monoallelic expression,0.85295561,dbMAE,0.997952342,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/25/2015 +34314366,http://bioinfo.aielab.cc/dbMCS,"dbMCS: A Database for Exploring the Mutation Markers of Anti-Cancer Drug Sensitivity. The identification of mutation markers and the selection of appropriate treatment for patients with specific genome mutations are important steps in the development of targeted therapies and the realization of precision medicine for human cancers. To investigate the baseline characteristics of drug sensitivity markers and develop computational methods of mutation effect prediction, we presented a manually curated online-based database of mutation Markers for anti-Cancer drug Sensitivity (dbMCS). Currently, dbMCS contains 1271 mutations and 4427 mutation-disease-drug associations (3151 and 1276 for sensitivity and resistance, respectively) with their PubMed indexed articles. By comparing the mutations in dbMCS with the putative neutral polymorphisms, we investigated the characteristics of drug sensitivity markers. We found that the mutation markers tend to significantly impact on high-conservative regions both in DNA sequences and protein domains. And some of them presented pleiotropic effects depending on the tumor context, appearing concurrently in the sensitivity and resistance categories. In addition, we preliminarily explored the machine learning-based methods for identifying mutation markers of anti-cancer drug sensitivity and produced optimistic results, which suggests that a reliable dataset may provide new insights and essential clues for future cancer pharmacogenomics studies. dbMCS is available at http://bioinfo.aielab.cc/dbMCS/.",dbMCS,0.993598044,for anti-Cancer drug Sensitivity,0.694444135,dbMCS,0.993598044,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/5/2021 +29145823,http://dbmdega.shinyapps.io/dbMDEGA,"dbMDEGA: a database for meta-analysis of differentially expressed genes in autism spectrum disorder. Background Autism spectrum disorders (ASD) are hereditary, heterogeneous and biologically complex neurodevelopmental disorders. Individual studies on gene expression in ASD cannot provide clear consensus conclusions. Therefore, a systematic review to synthesize the current findings from brain tissues and a search tool to share the meta-analysis results are urgently needed. Methods Here, we conducted a meta-analysis of brain gene expression profiles in the current reported human ASD expression datasets (with 84 frozen male cortex samples, 17 female cortex samples, 32 cerebellum samples and 4 formalin fixed samples) and knock-out mouse ASD model expression datasets (with 80 collective brain samples). Then, we applied R language software and developed an interactive shared and updated database (dbMDEGA) displaying the results of meta-analysis of data from ASD studies regarding differentially expressed genes (DEGs) in the brain. Results This database, dbMDEGA ( https://dbmdega.shinyapps.io/dbMDEGA/ ), is a publicly available web-portal for manual annotation and visualization of DEGs in the brain from data from ASD studies. This database uniquely presents meta-analysis values and homologous forest plots of DEGs in brain tissues. Gene entries are annotated with meta-values, statistical values and forest plots of DEGs in brain samples. This database aims to provide searchable meta-analysis results based on the current reported brain gene expression datasets of ASD to help detect candidate genes underlying this disorder. Conclusion This new analytical tool may provide valuable assistance in the discovery of DEGs and the elucidation of the molecular pathogenicity of ASD. This database model may be replicated to study other disorders.",dbMDEGA,0.986826241,NA,0,dbMDEGA,0.986826241,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/16/2017 +21781283,http://203.190.147.116/dbmdr,"DbMDR: a relational database for multidrug resistance genes as potential drug targets. DbMDR is non-redundant reference database of multidrug resistance (MDR) genes and their orthologs acting as potential drug targets. Drug resistance is a common phenomenon of pathogens, creating a serious problem of inactivation of drugs and antibiotics resulting in occurrence of diseases. Apart from other factors, the MDR genes present in pathogens are shown to be responsible for multidrug resistance. Much of the unorganized information on MDR genes is scattered across the literature and other web resources. Thus, consolidation of such knowledge about MDR genes into one database will make the drug discovery research more efficient. Mining of text for MDR genes has resulted into a large number of publications but in scattered and unorganized form. This information was compiled into a database, which enables a user not only to look at a particular MDR gene but also to find out putative homologs based on sequence similarity, conserved domains, and motifs in proteins encoded by MDR genes more efficiently. At present, DbMDR database contains 2843 MDR genes characterized experimentally as well as functionally annotated with cross-referencing search support. The DbMDR database (http://203.190.147.116/dbmdr/) is a comprehensive resource for comparative study focused on MDR genes and metabolic pathway efflux pumps and intended to provide a platform for researchers for further research in drug resistance.",DbMDR,0.997138441,NA,0,DbMDR,0.997138441,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/6/2011 +30482172,http://DeepLearner.ahu.edu.cn/web/dbMPIKT,"dbMPIKT: a database of kinetic and thermodynamic mutant protein interactions. Background Protein-protein interactions (PPIs) play important roles in biological functions. Studies of the effects of mutants on protein interactions can provide further understanding of PPIs. Currently, many databases collect experimental mutants to assess protein interactions, but most of these databases are old and have not been updated for several years. Results To address this issue, we manually curated a kinetic and thermodynamic database of mutant protein interactions (dbMPIKT) that is freely accessible at our website. This database contains 5291 mutants in protein interactions collected from previous databases and the literature published within the last three years. Furthermore, some data analysis, such as mutation number, mutation type, protein pair source and network map construction, can be performed online. Conclusion Our work can promote the study on PPIs, and novel information can be mined from the new database. Our database is available in http://DeepLearner.ahu.edu.cn/web/dbMPIKT/ for use by all, including both academics and non-academics.",dbMPIKT,0.991190982,NA,0,dbMPIKT,0.991190982,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2018 +32227657,http://database.liulab.science/dbMTS,"dbMTS: A comprehensive database of putative human microRNA target site SNVs and their functional predictions. MicroRNAs (miRNA) are short noncoding RNAs that can repress the expression of protein-coding messenger RNAs (mRNAs) by binding to the 3'-untranslated region (UTR) of the target. Genetic mutations such as single nucleotide variants (SNVs) in the 3'-UTR of the mRNAs can disrupt miRNA regulation. In this study, we presented dbMTS, a database for miRNA target site (MTS) SNVs and their functional annotations. This database can help studies easily identify putative SNVs that affect miRNA targeting and facilitate the prioritization of their functional importance. dbMTS is freely available for academic use at http://database.liulab.science/dbMTS as a web service or a downloadable attached database of dbNSFP.",dbMTS,0.996018052,NA,0,dbMTS,0.996018052,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/6/2020 +"21520341, 23843252, 26555599",http://sites.google.com/site/jpopgen/dbNSFP,"dbNSFP: a lightweight database of human nonsynonymous SNPs and their functional predictions. With the advance of sequencing technologies, whole exome sequencing has increasingly been used to identify mutations that cause human diseases, especially rare Mendelian diseases. Among the analysis steps, functional prediction (of being deleterious) plays an important role in filtering or prioritizing nonsynonymous SNP (NS) for further analysis. Unfortunately, different prediction algorithms use different information and each has its own strength and weakness. It has been suggested that investigators should use predictions from multiple algorithms instead of relying on a single one. However, querying predictions from different databases/Web-servers for different algorithms is both tedious and time consuming, especially when dealing with a huge number of NSs identified by exome sequencing. To facilitate the process, we developed dbNSFP (database for nonsynonymous SNPs' functional predictions). It compiles prediction scores from four new and popular algorithms (SIFT, Polyphen2, LRT, and MutationTaster), along with a conservation score (PhyloP) and other related information, for every potential NS in the human genome (a total of 75,931,005). It is the first integrated database of functional predictions from multiple algorithms for the comprehensive collection of human NSs. dbNSFP is freely available for download at http://sites.google.com/site/jpopgen/dbNSFP.",dbNSFP,0.998201787,NA,0,dbNSFP,0.998201787,3,NA,33261662,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/5/2016 +33261662,http://database.liulab.science/dbNSFP,"dbNSFP v4: a comprehensive database of transcript-specific functional predictions and annotations for human nonsynonymous and splice-site SNVs. Whole exome sequencing has been increasingly used in human disease studies. Prioritization based on appropriate functional annotations has been used as an indispensable step to select candidate variants. Here we present the latest updates to dbNSFP (version 4.1), a database designed to facilitate this step by providing deleteriousness prediction and functional annotation for all potential nonsynonymous and splice-site SNVs (a total of 84,013,093) in the human genome. The current version compiled 36 deleteriousness prediction scores, including 12 transcript-specific scores, and other variant and gene-level functional annotations. The database is available at http://database.liulab.science/dbNSFP with a downloadable version and a web-service.",dbNSFP,0.997456431,NA,0,dbNSFP,0.997456431,1,NA,"21520341.0, 23843252.0, 26555599.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/2/2020 +27010073,http://dbpaf.biocuckoo.org,"dbPAF: an integrative database of protein phosphorylation in animals and fungi. Protein phosphorylation is one of the most important post-translational modifications (PTMs) and regulates a broad spectrum of biological processes. Recent progresses in phosphoproteomic identifications have generated a flood of phosphorylation sites, while the integration of these sites is an urgent need. In this work, we developed a curated database of dbPAF, containing known phosphorylation sites in H. sapiens, M. musculus, R. norvegicus, D. melanogaster, C. elegans, S. pombe and S. cerevisiae. From the scientific literature and public databases, we totally collected and integrated 54,148 phosphoproteins with 483,001 phosphorylation sites. Multiple options were provided for accessing the data, while original references and other annotations were also present for each phosphoprotein. Based on the new data set, we computationally detected significantly over-represented sequence motifs around phosphorylation sites, predicted potential kinases that are responsible for the modification of collected phospho-sites, and evolutionarily analyzed phosphorylation conservation states across different species. Besides to be largely consistent with previous reports, our results also proposed new features of phospho-regulation. Taken together, our database can be useful for further analyses of protein phosphorylation in human and other model organisms. The dbPAF database was implemented in PHP√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ+√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâMySQL and freely available at http://dbpaf.biocuckoo.org.",dbPAF,0.997537851,NA,0,dbPAF,0.997537851,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/24/2016 +26946289,http://ptbdb.cs.brown.edu/dbpec,"dbPEC: a comprehensive literature-based database for preeclampsia related genes and phenotypes. . Preeclampsia is one of the most common causes of fetal and maternal morbidity and mortality in the world. We built a Database for Preeclampsia (dbPEC) consisting of the clinical features, concurrent conditions, published literature and genes associated with Preeclampsia. We included gene sets associated with severity, concurrent conditions, tissue sources and networks. The published scientific literature is the primary repository for all information documenting human disease. We used semantic data mining to retrieve and extract the articles pertaining to preeclampsia-associated genes and performed manual curation. We deposited the articles, genes, preeclampsia phenotypes and other supporting information into the dbPEC. It is publicly available and freely accessible. Previously, we developed a database for preterm birth (dbPTB) using a similar approach. Using the gene sets in dbPTB, we were able to successfully analyze a genome-wide study of preterm birth including 4000 women and children. We identified important genes and pathways associated with preterm birth that were not otherwise demonstrable using genome-wide approaches. dbPEC serves not only as a resources for genes and articles associated with preeclampsia, it is a robust source of gene sets to analyze a wide range of high-throughput data for gene set enrichment analysis. Database URL: http://ptbdb.cs.brown.edu/dbpec/.",dbPEC,0.996957302,Database for Preeclampsia,0.837573124,dbPEC,0.996957302,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/5/2016 +26940364,http://lifecenter.sgst.cn/dbphcc/Conclusions,"dbPHCC: a database of prognostic biomarkers for hepatocellular carcinoma that provides online prognostic modeling. Background Hepatocellular carcinoma (HCC) is one of the most common malignant cancers with a poor prognosis. For decades, more and more biomarkers were found to effect on HCC prognosis, but these studies were scattered and there were no unified identifiers. Therefore, we built the database of prognostic biomarkers and models for hepatocellular carcinoma (dbPHCC). Methods dbPHCC focuses on biomarkers which were related to HCC prognosis by traditional experiments rather than high-throughput technology. All of the prognostic biomarkers came from literatures issued during 2002 to 2014 in PubMed and were manually selected. dbPHCC collects comprehensive information of candidate biomarkers and HCC prognosis. Results dbPHCC mainly contains 567 biomarkers: 323 proteins, 154 genes, and 90 microRNAs. For each biomarker, the reference information, experimental conditions, and prognostic information are shown. Based on two available patient cohort data sets, an exemplified prognostic model was constructed using 15 phosphotransferases in dbPHCC. The web interface does not only provide a full range of browsing and searching, but also provides online analysis tools. dbPHCC is available at http://lifecenter.sgst.cn/dbphcc/Conclusions dbPHCC provides a comprehensive and convenient search and analysis platform for HCC prognosis research. General significance dbPHCC is the first database to focus on experimentally verified individual biomarkers, which are related to HCC prognosis. Prognostic markers in dbPHCC have the potential to be therapeutic drug targets and may help in designing new treatments to improve survival of HCC patients. This article is part of a Special Issue entitled ""System Genetics"" Guest Editor: Dr. Yudong Cai and Dr. Tao Huang.",dbPHCC,0.919840753,NA,0,dbPHCC,0.919840753,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/2/2016 +25534750,"http://dbppt.biocuckoo.org, http://dbppt.biocuckoo.or","dbPPT: a comprehensive database of protein phosphorylation in plants. As one of the most important protein post-translational modifications, the reversible phosphorylation is critical for plants in regulating a variety of biological processes such as cellular metabolism, signal transduction and responses to environmental stress. Numerous efforts especially large-scale phosphoproteome profiling studies have been contributed to dissect the phosphorylation signaling in various plants, while a large number of phosphorylation events were identified. To provide an integrated data resource for further investigations, here we present a comprehensive database of dbPPT (database of Phosphorylation site in PlanTs, at http://dbppt.biocuckoo.org), which contains experimentally identified phosphorylation sites in proteins from plants. The phosphorylation sites in dbPPT were manually curated from the literatures, whereas datasets in other public databases were also integrated. In total, there were 82,175 phosphorylation sites in 31,012 proteins from 20 plant organisms in dbPPT, presenting a larger quantity of phosphorylation sites and a higher coverage of plant species in comparison with other databases. The proportions of residue types including serine, threonine and tyrosine were 77.99, 17.81 and 4.20%, respectively. All the phosphoproteins and phosphorylation sites in the database were critically annotated. Since the phosphorylation signaling in plants attracted great attention recently, such a comprehensive resource of plant protein phosphorylation can be useful for the research community. Database URL: http://dbppt.biocuckoo.or",dbPPT,0.996150672,database of Phosphorylation site in,0.916097972,dbPPT,0.996150672,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/22/2014 +24194603,http://jjwanglab.org/dbpshp,"dbPSHP: a database of recent positive selection across human populations. The dbPSHP database (http://jjwanglab.org/dbpshp) aims to help researchers to efficiently identify, validate and visualize putative positively selected loci in human evolution and further discover the mechanism governing these natural selections. Recent evolution of human populations at the genomic level reflects the adaptations to the living environments, including climate change and availability and stability of nutrients. Many genetic regions under positive selection have been identified, which assist us to understand how natural selection has shaped population differences. Here, we manually collect recent positive selections in different human populations, consisting of 15,472 loci from 132 publications. We further compiled a database that used 15 statistical terms of different evolutionary attributes for single nucleotide variant sites from the HapMap 3 and 1000 Genomes Project to identify putative regions under positive selection. These attributes include variant allele/genotype properties, variant heterozygosity, within population diversity, long-range haplotypes, pairwise population differentiation and evolutionary conservation. We also provide interactive pages for visualization and annotation of different selective signals. The database is freely available to the public and will be frequently updated.",dbPSHP,0.997607529,NA,0,dbPSHP,0.997607529,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/4/2013 +25841437,http://dbpsp.biocuckoo.org,"dbPSP: a curated database for protein phosphorylation sites in prokaryotes. As one of the most important post-translational modifications, phosphorylation is highly involved in almost all of biological processes through temporally and spatially modifying substrate proteins. Recently, phosphorylation in prokaryotes attracted much attention for its critical roles in various cellular processes such as signal transduction. Thus, an integrative data resource of the prokaryotic phosphorylation will be useful for further analysis. In this study, we presented a curated database of phosphorylation sites in prokaryotes (dbPSP, Database URL: http://dbpsp.biocuckoo.org) for 96 prokaryotic organisms, which belong to 11 phyla in two domains including bacteria and archaea. From the scientific literature, we manually collected experimentally identified phosphorylation sites on seven types of residues, including serine, threonine, tyrosine, aspartic acid, histidine, cysteine and arginine. In total, the dbPSP database contains 7391 phosphorylation sites in 3750 prokaryotic proteins. With the dataset, the sequence preferences of the phosphorylation sites and functional annotations of the phosphoproteins were analyzed, while the results shows that there were obvious differences among the phosphorylation in bacteria, archaea and eukaryotes. All the phosphorylation sites were annotated with original references and other descriptions in the database, which could be easily accessed through user-friendly website interface including various search and browse options. Taken together, the dbPSP database provides a comprehensive data resource for further studies of protein phosphorylation in prokaryotes. Database URL: http://dbpsp.biocuckoo.org",dbPSP,0.99289383,prokaryotes,0.651427448,dbPSP,0.99289383,1,NA,32472030,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,4/4/2015 +32472030,http://dbpsp.biocuckoo.cn,"dbPSP 2.0, an updated database of protein phosphorylation sites in prokaryotes. In prokaryotes, protein phosphorylation plays a critical role in regulating a broad spectrum of biological processes and occurs mainly on various amino acids, including serine (S), threonine (T), tyrosine (Y), arginine (R), aspartic acid (D), histidine (H) and cysteine (C) residues of protein substrates. Through literature curation and public database integration, here we reported an updated database of phosphorylation sites (p-sites) in prokaryotes (dbPSP 2.0) that contains 19,296 experimentally identified p-sites in 8,586 proteins from 200 prokaryotic organisms, which belong to 12 phyla of two kingdoms, bacteria and archaea. To carefully annotate these phosphoproteins and p-sites, we integrated the knowledge from 88 publicly available resources that covers 9 aspects, namely, taxonomy annotation, genome annotation, function annotation, transcriptional regulation, sequence and structure information, family and domain annotation, interaction, orthologous information and biological pathway. In contrast to version 1.0 (~30 MB), dbPSP 2.0 contains ~9 GB of data, with a 300-fold increased volume. We anticipate that dbPSP 2.0 can serve as a useful data resource for further investigating phosphorylation events in prokaryotes. dbPSP 2.0 is free for all users to access at: http://dbpsp.biocuckoo.cn.",dbPSP,0.99149704,NA,0,dbPSP,0.99149704,1,NA,25841437,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,5/29/2020 +"23193290, 26578568, 30418626",http://dbPTM.mbc.nctu.edu.tw,"DbPTM 3.0: an informative resource for investigating substrate site specificity and functional association of protein post-translational modifications. Protein modification is an extremely important post-translational regulation that adjusts the physical and chemical properties, conformation, stability and activity of a protein; thus altering protein function. Due to the high throughput of mass spectrometry (MS)-based methods in identifying site-specific post-translational modifications (PTMs), dbPTM (http://dbPTM.mbc.nctu.edu.tw/) is updated to integrate experimental PTMs obtained from public resources as well as manually curated MS/MS peptides associated with PTMs from research articles. Version 3.0 of dbPTM aims to be an informative resource for investigating the substrate specificity of PTM sites and functional association of PTMs between substrates and their interacting proteins. In order to investigate the substrate specificity for modification sites, a newly developed statistical method has been applied to identify the significant substrate motifs for each type of PTMs containing sufficient experimental data. According to the data statistics in dbPTM, >60% of PTM sites are located in the functional domains of proteins. It is known that most PTMs can create binding sites for specific protein-interaction domains that work together for cellular function. Thus, this update integrates protein-protein interaction and domain-domain interaction to determine the functional association of PTM sites located in protein-interacting domains. Additionally, the information of structural topologies on transmembrane (TM) proteins is integrated in dbPTM in order to delineate the structural correlation between the reported PTM sites and TM topologies. To facilitate the investigation of PTMs on TM proteins, the PTM substrate sites and the structural topology are graphically represented. Also, literature information related to PTMs, orthologous conservations and substrate motifs of PTMs are also provided in the resource. Finally, this version features an improved web interface to facilitate convenient access to the resource.",dbPTM,0.998039007,NA,0,dbPTM,0.998039007,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +27903894,http://www.megabionet.org/dbSAP/index.html,"dbSAP: single amino-acid polymorphism database for protein variation detection. Millions of human single nucleotide polymorphisms (SNPs) or mutations have been identified so far, and these variants could be strongly correlated with phenotypic variations of traits/diseases. Among these variants, non-synonymous ones can result in amino-acid changes that are called single amino-acid polymorphisms (SAPs). Although some studies have tried to investigate the SAPs, only a small fraction of SAPs have been identified due to inadequately inferred protein variation database and the low coverage of mass spectrometry (MS) experiments. Here, we present the dbSAP database for conveniently accessing the comprehensive information and relationships of spectra, peptides and proteins of SAPs, as well as related genes, pathways, diseases and drug targets. In order to fully explore human SAPs, we built a customized protein database that contained comprehensive variant proteins by integrating and annotating the human SNPs and mutations from eight distinct databases (UniProt, Protein Mutation Database, HPMD, MSIPI, MS-CanProVar, dbSNP, Ensembl and COSMIC). After a series of quality controls, a total of 16 854 SAP peptides involving in 439 537 spectra were identified with large scale MS datasets from various human tissues and cell lines. dbSAP is freely available at http://www.megabionet.org/dbSAP/index.html.",dbSAP,0.994454622,NA,0,dbSAP,0.994454622,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2016 +25030112,http://applications.bhsai.org/dbsecsys,"DBSecSys: a database of Burkholderia mallei secretion systems. Background Bacterial pathogenicity represents a major public health concern worldwide. Secretion systems are a key component of bacterial pathogenicity, as they provide the means for bacterial proteins to penetrate host-cell membranes and insert themselves directly into the host cells' cytosol. Burkholderia mallei is a Gram-negative bacterium that uses multiple secretion systems during its host infection life cycle. To date, the identities of secretion system proteins for B. mallei are not well known, and their pathogenic mechanisms of action and host factors are largely uncharacterized. Description We present the Database of Burkholderia malleiSecretion Systems (DBSecSys), a compilation of manually curated and computationally predicted bacterial secretion system proteins and their host factors. Currently, DBSecSys contains comprehensive experimentally and computationally derived information about B. mallei strain ATCC 23344. The database includes 143 B. mallei proteins associated with five secretion systems, their 1,635 human and murine interacting targets, and the corresponding 2,400 host-B. mallei interactions. The database also includes information about 10 pathogenic mechanisms of action for B. mallei secretion system proteins inferred from the available literature. Additionally, DBSecSys provides details about 42 virulence attenuation experiments for 27 B. mallei secretion system proteins. Users interact with DBSecSys through a Web interface that allows for data browsing, querying, visualizing, and downloading. Conclusions DBSecSys provides a comprehensive, systematically organized resource of experimental and computational data associated with B. mallei secretion systems. It provides the unique ability to study secretion systems not only through characterization of their corresponding pathogen proteins, but also through characterization of their host-interacting partners.The database is available at https://applications.bhsai.org/dbsecsys.",DBSecSys,0.992282927,of Burkholderia malleiSecretion Systems,0.793931067,DBSecSys,0.992282927,1,NA,27650316,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,7/16/2014 +27650316,http://dbsecsys.bhsai.org,"DBSecSys 2.0: a database of Burkholderia mallei and Burkholderia pseudomallei secretion systems. Background Burkholderia mallei and B. pseudomallei are the causative agents of glanders and melioidosis, respectively, diseases with high morbidity and mortality rates. B. mallei and B. pseudomallei are closely related genetically; B. mallei evolved from an ancestral strain of B. pseudomallei by genome reduction and adaptation to an obligate intracellular lifestyle. Although these two bacteria cause different diseases, they share multiple virulence factors, including bacterial secretion systems, which represent key components of bacterial pathogenicity. Despite recent progress, the secretion system proteins for B. mallei and B. pseudomallei, their pathogenic mechanisms of action, and host factors are not well characterized. Results We previously developed a manually curated database, DBSecSys, of bacterial secretion system proteins for B. mallei. Here, we report an expansion of the database with corresponding information about B. pseudomallei. DBSecSys 2.0 contains comprehensive literature-based and computationally derived information about B. mallei ATCC 23344 and literature-based and computationally derived information about B. pseudomallei K96243. The database contains updated information for 163 B. mallei proteins from the previous database and 61 additional B. mallei proteins, and new information for 281 B. pseudomallei proteins associated with 5 secretion systems, their 1,633 human- and murine-interacting targets, and 2,400 host-B. mallei interactions and 2,286 host-B. pseudomallei interactions. The database also includes information about 13 pathogenic mechanisms of action for B. mallei and B. pseudomallei secretion system proteins inferred from the available literature or computationally. Additionally, DBSecSys 2.0 provides details about 82 virulence attenuation experiments for 52 B. mallei secretion system proteins and 98 virulence attenuation experiments for 61 B. pseudomallei secretion system proteins. We updated the Web interface and data access layer to speed-up users' search of detailed information for orthologous proteins related to secretion systems of the two pathogens. Conclusions The updates of DBSecSys 2.0 provide unique capabilities to access comprehensive information about secretion systems of B. mallei and B. pseudomallei. They enable studies and comparisons of corresponding proteins of these two closely related pathogens and their host-interacting partners. The database is available at http://dbsecsys.bhsai.org .",DBSecSys,0.988291562,NA,0,DBSecSys,0.988291562,1,NA,25030112,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,9/20/2016 +"22782549, 25399423",http://dbSNO.mbc.nctu.edu.tw,"dbSNO: a database of cysteine S-nitrosylation. Unlabelled S-nitrosylation (SNO), a selective and reversible protein post-translational modification that involves the covalent attachment of nitric oxide (NO) to the sulfur atom of cysteine, critically regulates protein activity, localization and stability. Due to its importance in regulating protein functions and cell signaling, a mass spectrometry-based proteomics method rapidly evolved to increase the dataset of experimentally determined SNO sites. However, there is currently no database dedicated to the integration of all experimentally verified S-nitrosylation sites with their structural or functional information. Thus, the dbSNO database is created to integrate all available datasets and to provide their structural analysis. Up to April 15, 2012, the dbSNO has manually accumulated >3000 experimentally verified S-nitrosylated peptides from 219 research articles using a text mining approach. To solve the heterogeneity among the data collected from different sources, the sequence identity of these reported S-nitrosylated peptides are mapped to the UniProtKB protein entries. To delineate the structural correlation and consensus motif of these SNO sites, the dbSNO database also provides structural and functional analyses, including the motifs of substrate sites, solvent accessibility, protein secondary and tertiary structures, protein domains and gene ontology. Availability The dbSNO is now freely accessible via http://dbSNO.mbc.nctu.edu.tw. The database content is regularly updated upon collecting new data obtained from continuously surveying research articles.",dbSNO,0.99643296,NA,0,dbSNO,0.99643296,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2014 +25385275,http://structure.bmc.lu.se/VariSNP,"VariSNP, a benchmark database for variations from dbSNP. For development and evaluation of methods for predicting the effects of variations, benchmark datasets are needed. Some previously developed datasets are available for this purpose, but newer and larger benchmark sets for benign variants have largely been missing. VariSNP datasets are selected from dbSNP. These subsets were filtered against disease-related variants in the ClinVar, UniProtKB/Swiss-Prot, and PhenCode databases, to identify neutral or nonpathogenic cases. All variant descriptions include mapping to reference sequences on chromosomal, genomic, coding DNA, and protein levels. The datasets will be updated with automated scripts on a regular basis and are freely available at http://structure.bmc.lu.se/VariSNP.",dbSNP,0.677056015,NA,0,dbSNP,0.677056015,1,NA,"24356117.0, 27402678.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,1/8/2015 +24356117,http://www.ncbi.nlm.nih.gov/projects/SNP,"Caveat emptor: single nucleotide polymorphism reporting in pharmacogenomics. While it is arguably the most comprehensive source of genetic information, the NCBI's dbSNP database (National Center for Biotechnology Information database of single nucleotide polymorphisms; http://www.ncbi.nlm.nih.gov/projects/SNP/) is imperfect. In this commentary, we highlight the issues surrounding this database, while considering the great importance and utility of this resource for those in the pharmacology and pharmacogenomics communities. We describe our experience with the information in this database as a cautionary tale for those who will utilize such information in the future. We also discuss several measures that could render it more reliable.",dbSNP,0.992551982,NA,0,dbSNP,0.992551982,1,NA,"25385275.0, 27402678.0",NA,NA,NA,do not merge,NA,NA,NA,12/12/2013 +27402678,http://www.actrec.gov.in/pi-webpages/AmitDutt/TMCSNP/TMCSNPdp.html,"TMC-SNPdb: an Indian germline variant database derived from whole exome sequences. . Cancer is predominantly a somatic disease. A mutant allele present in a cancer cell genome is considered somatic when it's absent in the paired normal genome along with public SNP databases. The current build of dbSNP, the most comprehensive public SNP database, however inadequately represents several non-European Caucasian populations, posing a limitation in cancer genomic analyses of data from these populations. We present the T: ata M: emorial C: entre-SNP D: ata B: ase (TMC-SNPdb), as the first open source, flexible, upgradable, and freely available SNP database (accessible through dbSNP build 149 and ANNOVAR)-representing 114 309 unique germline variants-generated from whole exome data of 62 normal samples derived from cancer patients of Indian origin. The TMC-SNPdb is presented with a companion subtraction tool that can be executed with command line option or using an easy-to-use graphical user interface with the ability to deplete additional Indian population specific SNPs over and above dbSNP and 1000 Genomes databases. Using an institutional generated whole exome data set of 132 samples of Indian origin, we demonstrate that TMC-SNPdb could deplete 42, 33 and 28% false positive somatic events post dbSNP depletion in Indian origin tongue, gallbladder, and cervical cancer samples, respectively. Beyond cancer somatic analyses, we anticipate utility of the TMC-SNPdb in several Mendelian germline diseases. In addition to dbSNP build 149 and ANNOVAR, the TMC-SNPdb along with the subtraction tool is available for download in the public domain at the following:Database URL: http://www.actrec.gov.in/pi-webpages/AmitDutt/TMCSNP/TMCSNPdp.html.",dbSNP,0.988259852,NA,0,dbSNP,0.988259852,1,NA,"24356117.0, 25385275.0",NA,NA,NA,do not merge,NA,NA,NA,7/9/2016 +33641184,http://bioinf.iiit.ac.in/dbstrips,"DbStRiPs: Database of structural repeats in proteins. Recent interest in repeat proteins has arisen due to stable structural folds, high evolutionary conservation and repertoire of functions provided by these proteins. However, repeat proteins are poorly characterized because of high sequence variation between repeating units and structure-based identification and classification of repeats is desirable. Using a robust network-based pipeline, manual curation and Kajava's structure-based classification schema, we have developed a database of tandem structural repeats, Database of Structural Repeats in Proteins (DbStRiPs). A unique feature of this database is that available knowledge on sequence repeat families is incorporated by mapping Pfam classification scheme onto structural classification. Integration of sequence and structure-based classifications help in identifying different functional groups within the same structural subclass, leading to refinement in the annotation of repeat proteins. Analysis of complete Protein Data Bank revealed 16,472 repeat annotations in 15,141 protein chains, one previously uncharacterized novel protein repeat family (PRF), named left-handed beta helix, and 33 protein repeat clusters (PRCs). Based on their unique structural motif, ~79% of these repeat proteins are classified in one of the 14 PRFs or 33 PRCs, and the remaining are grouped as unclassified repeat proteins. Each repeat protein is provided with a detailed annotation in DbStRiPs that includes start and end boundaries of repeating units, copy number, secondary and tertiary structure view, repeat class/subclass, disease association, MSA of repeating units and cross-references to various protein pattern databases, human protein atlas and interaction resources. DbStRiPs provides easy search and download options to high-quality annotations of structural repeat proteins (URL: http://bioinf.iiit.ac.in/dbstrips/).",DbStRiPs,0.990017354,Database of Structural Repeats,0.719607194,DbStRiPs,0.990017354,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/6/2021 +26438538,http://bioinfo.au.tsinghua.edu.cn/dbsuper,"dbSUPER: a database of super-enhancers in mouse and human genome. Super-enhancers are clusters of transcriptional enhancers that drive cell-type-specific gene expression and are crucial to cell identity. Many disease-associated sequence variations are enriched in super-enhancer regions of disease-relevant cell types. Thus, super-enhancers can be used as potential biomarkers for disease diagnosis and therapeutics. Current studies have identified super-enhancers in more than 100 cell types and demonstrated their functional importance. However, a centralized resource to integrate all these findings is not currently available. We developed dbSUPER (http://bioinfo.au.tsinghua.edu.cn/dbsuper/), the first integrated and interactive database of super-enhancers, with the primary goal of providing a resource for assistance in further studies related to transcriptional control of cell identity and disease. dbSUPER provides a responsive and user-friendly web interface to facilitate efficient and comprehensive search and browsing. The data can be easily sent to Galaxy instances, GREAT and Cistrome web-servers for downstream analysis, and can also be visualized in the UCSC genome browser where custom tracks can be added automatically. The data can be downloaded and exported in variety of formats. Furthermore, dbSUPER lists genes associated with super-enhancers and also links to external databases such as GeneCards, UniProt and Entrez. dbSUPER also provides an overlap analysis tool to annotate user-defined regions. We believe dbSUPER is a valuable resource for the biology and genetic research communities.",dbSUPER,0.994813621,NA,0,dbSUPER,0.994813621,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/4/2015 +29665371,http://bioinfo.iitk.ac.in/bioinfo/dbSWEET/Home,"dbSWEET: An Integrated Resource for SWEET Superfamily to Understand, Analyze and Predict the Function of Sugar Transporters in Prokaryotes and Eukaryotes. SWEET (Sweet Will Eventually be Exported Transporter) proteins have been recently discovered and form one of the three major families of sugar transporters. Homologs of SWEET are found in both prokaryotes and eukaryotes. Bacterial SWEET homologs have three transmembrane segments forming a triple-helical bundle and the functional form is dimers. Eukaryotic SWEETs have seven transmembrane helical segments forming two triple-helical bundles with a linker helix. Members of SWEET homologs have been shown to be involved in several important physiological processes in plants. However, not much is known regarding the biological significance of SWEET homologs in prokaryotes and in mammals. We have collected more than 2000 SWEET homologs from both prokaryotes and eukaryotes. For each homolog, we have modeled three different conformational states representing outward open, inward open and occluded states. We have provided details regarding substrate-interacting residues and residues forming the selectivity filter for each SWEET homolog. Several search and analysis options are available. The users can generate a phylogenetic tree and structure-based sequence alignment for selected set of sequences. With no metazoan SWEETs functionally characterized, the features observed in the selectivity filter residues can be used to predict the potential substrates that are likely to be transported across the metazoan SWEETs. We believe that this database will help the researchers to design mutational experiments and simulation studies that will aid to advance our understanding of the physiological role of SWEET homologs. This database is freely available to the scientific community at http://bioinfo.iitk.ac.in/bioinfo/dbSWEET/Home.",dbSWEET,0.741674781,NA,0,dbSWEET,0.741674781,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/14/2018 +29236308,http://www.vit.ac.in/files/database/Home.php,"Database of transcription factors in lung cancer (DBTFLC): A novel resource for exploring transcription factors associated with lung cancer. Lung cancer is considered as the most prevalent form of cancer and it is found to be frequent cause of cancer related death. Even though, approved molecular targeted therapies other than chemotherapy are currently unavailable, the mechanism of pathogenesis in lung cancer remains still unclear. Transcription factors (TFs) play a critical role in cancer cell processes, such as cell proliferation, apoptosis, migration, and regulate gene expression. Thus, the identification and characterization of transcription factors involved in lung cancer would provide valuable information for further elucidation of the mechanism(s) underlying pathogenesis and the identification of potential therapeutic target types, which are critical for the development of therapeutic strategies. Through an extensive literature survey, we have identified 349 transcription factors noted for their strong involvement in lung cancer. Database of Transcription Factors in Lung Cancer (DBTFLC) was constructed as a data repository and analytical platform for systematic collection, curation of TFs and their interacting partners. The database includes all pertinent information such as lung cancer related TFs, chromosomal location, family, lung cancer type, references, TF-TF interaction(s), and TF-target gene interaction(s); thus, it could serve as a valuable resource for therapeutic studies in lung cancer. The database is freely available at http://www.vit.ac.in/files/database/Home.php.",DBTFLC,0.955499542,Database of Transcription Factors in Lung Cancer,0.953766271,DBTFLC,0.955499542,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/17/2018 +25336621,http://dbtmee.hgc.jp,"DBTMEE: a database of transcriptome in mouse early embryos. DBTMEE (http://dbtmee.hgc.jp/) is a searchable and browsable database designed to manipulate gene expression information from our ultralarge-scale whole-transcriptome analysis of mouse early embryos. Since integrative approaches with multiple public analytical data have become indispensable for studying embryogenesis due to technical challenges such as biological sample collection, we intend DBTMEE to be an integrated gateway for the research community. To do so, we combined the gene expression profile with various public resources. Thereby, users can extensively investigate molecular characteristics among totipotent, pluripotent and differentiated cells while taking genetic and epigenetic characteristics into consideration. We have also designed user friendly web interfaces that enable users to access the data quickly and easily. DBTMEE will help to promote our understanding of the enigmatic fertilization dynamics.",DBTMEE,0.997862458,NA,0,DBTMEE,0.997862458,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/21/2014 +22086958,http://dbtss.hgc.jp,"DBTSS: DataBase of Transcriptional Start Sites progress report in 2012. To support transcriptional regulation studies, we have constructed DBTSS (DataBase of Transcriptional Start Sites), which contains exact positions of transcriptional start sites (TSSs), determined with our own technique named TSS-seq, in the genomes of various species. In its latest version, DBTSS covers the data of the majority of human adult and embryonic tissues: it now contains 418 million TSS tag sequences from 28 tissues/cell cultures. Moreover, we integrated a series of our own transcriptomic data, such as the RNA-seq data of subcellular-fractionated RNAs as well as the ChIP-seq data of histone modifications and the binding of RNA polymerase II/several transcription factors in cultured cell lines into our original TSS information. We also included several external epigenomic data, such as the chromatin map of the ENCODE project. We further associated our TSS information with public or original single-nucleotide variation (SNV) data, in order to identify SNVs in the regulatory regions. These data can be browsed in our new viewer, which supports versatile search conditions of users. We believe that our new DBTSS will be an invaluable resource for interpreting the differential uses of TSSs and for identifying human genetic variations that are associated with disordered transcriptional regulation. DBTSS can be accessed at http://dbtss.hgc.jp.",DBTSS,0.997758806,DataBase of Transcriptional Start Sites,0.971812087,DBTSS,0.997758806,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2011 +25887129,"http://watson.hgen.pitt.edu/register, http://watson.hgen.pitt.edu/register/docs/dbvor.html","dbVOR: a database system for importing pedigree, phenotype and genotype data and exporting selected subsets. Background When studying the genetics of a human trait, we typically have to manage both genome-wide and targeted genotype data. There can be overlap of both people and markers from different genotyping experiments; the overlap can introduce several kinds of problems. Most times the overlapping genotypes are the same, but sometimes they are different. Occasionally, the lab will return genotypes using a different allele labeling scheme (for example 1/2 vs A/C). Sometimes, the genotype for a person/marker index is unreliable or missing. Further, over time some markers are merged and bad samples are re-run under a different sample name. We need a consistent picture of the subset of data we have chosen to work with even though there might possibly be conflicting measurements from multiple data sources. Results We have developed the dbVOR database, which is designed to hold data efficiently for both genome-wide and targeted experiments. The data are indexed for fast retrieval by person and marker. In addition, we store pedigree and phenotype data for our subjects. The dbVOR database allows us to select subsets of the data by several different criteria and to merge their results into a coherent and consistent whole. Data may be filtered by: family, person, trait value, markers, chromosomes, and chromosome ranges. The results can be presented in columnar, Mega2, or PLINK format. Conclusions dbVOR serves our needs well. It is freely available from https://watson.hgen.pitt.edu/register . Documentation for dbVOR can be found at https://watson.hgen.pitt.edu/register/docs/dbvor.html .",dbVOR,0.990226269,NA,0,dbVOR,0.990226269,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/18/2015 +26989155,http://bioinfo.au.tsinghua.edu.cn/dbwgfp,"dbWGFP: a database and web server of human whole-genome single nucleotide variants and their functional predictions. . The recent advancement of the next generation sequencing technology has enabled the fast and low-cost detection of all genetic variants spreading across the entire human genome, making the application of whole-genome sequencing a tendency in the study of disease-causing genetic variants. Nevertheless, there still lacks a repository that collects predictions of functionally damaging effects of human genetic variants, though it has been well recognized that such predictions play a central role in the analysis of whole-genome sequencing data. To fill this gap, we developed a database named dbWGFP (a database and web server of human whole-genome single nucleotide variants and their functional predictions) that contains functional predictions and annotations of nearly 8.58 billion possible human whole-genome single nucleotide variants. Specifically, this database integrates 48 functional predictions calculated by 17 popular computational methods and 44 valuable annotations obtained from various data sources. Standalone software, user-friendly query services and free downloads of this database are available at http://bioinfo.au.tsinghua.edu.cn/dbwgfp. dbWGFP provides a valuable resource for the analysis of whole-genome sequencing, exome sequencing and SNP array data, thereby complementing existing data sources and computational resources in deciphering genetic bases of human inherited diseases.",dbWGFP,0.997119367,NA,0,dbWGFP,0.997119367,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/17/2016 +33619257,http://gdc.cancer.gov,"Uniform genomic data analysis in the NCI Genomic Data Commons. The goal of the National Cancer Institute's (NCI's) Genomic Data Commons (GDC) is to provide the cancer research community with a data repository of uniformly processed genomic and associated clinical data that enables data sharing and collaborative analysis in the support of precision medicine. The initial GDC dataset include genomic, epigenomic, proteomic, clinical and other data from the NCI TCGA and TARGET programs. Data production for the GDC started in June, 2015 using an OpenStack-based private cloud. By June of 2016, the GDC had analyzed more than 50,000 raw sequencing data inputs, as well as multiple other data types. Using the latest human genome reference build GRCh38, the GDC generated a variety of data types from aligned reads to somatic mutations, gene expression, miRNA expression, DNA methylation status, and copy number variation. In this paper, we describe the pipelines and workflows used to process and harmonize the data in the GDC. The generated data, as well as the original input files from TCGA and TARGET, are available for download and exploratory analysis at the GDC Data Portal and Legacy Archive ( https://gdc.cancer.gov/ ).",DC,0.631403685,NA,0,DC,0.631403685,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,2/22/2021 +23161684,http://supfam.org/SUPERFAMILY/dcGO,"DcGO: database of domain-centric ontologies on functions, phenotypes, diseases and more. We present 'dcGO' (http://supfam.org/SUPERFAMILY/dcGO), a comprehensive ontology database for protein domains. Domains are often the functional units of proteins, thus instead of associating ontological terms only with full-length proteins, it sometimes makes more sense to associate terms with individual domains. Domain-centric GO, 'dcGO', provides associations between ontological terms and protein domains at the superfamily and family levels. Some functional units consist of more than one domain acting together or acting at an interface between domains; therefore, ontological terms associated with pairs of domains, triplets and longer supra-domains are also provided. At the time of writing the ontologies in dcGO include the Gene Ontology (GO); Enzyme Commission (EC) numbers; pathways from UniPathway; human phenotype ontology and phenotype ontologies from five model organisms, including plants; anatomy ontologies from three organisms; human disease ontology and drugs from DrugBank. All ontological terms have probabilistic scores for their associations. In addition to associations to domains and supra-domains, the ontological terms have been transferred to proteins, through homology, providing annotations of >80 million sequences covering 2414 complete genomes, hundreds of meta-genomes, thousands of viruses and so forth. The dcGO database is updated fortnightly, and its website provides downloads, search, browse, phylogenetic context and other data-mining facilities.",dcGO,0.997635027,NA,0,dcGO,0.997635027,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2012 +27924010,"http://www.ddbj.nig.ac.jp, http://www.insdc.org","DNA Data Bank of Japan. The DNA Data Bank of Japan (DDBJ) (http://www.ddbj.nig.ac.jp) has been providing public data services for thirty years (since 1987). We are collecting nucleotide sequence data from researchers as a member of the International Nucleotide Sequence Database Collaboration (INSDC, http://www.insdc.org), in collaboration with the US National Center for Biotechnology Information (NCBI) and European Bioinformatics Institute (EBI). The DDBJ Center also services Japanese Genotype-phenotype Archive (JGA), with the National Bioscience Database Center to collect human-subjected data from Japanese researchers. Here, we report our database activities for INSDC and JGA over the past year, and introduce retrieval and analytical services running on our supercomputer system and their recent modifications. Furthermore, with the Database Center for Life Science, the DDBJ Center improves semantic web technologies to integrate and to share biological data, for providing the RDF version of the sequence data.",DDBJ,0.986539379,DNA Data Bank of Japan,0.896418548,DDBJ,0.986539379,1,"24194602.0, 25477381.0, 26578571.0, 29040613.0, 30357349.0, 33156332.0","24194602.0, 25477381.0, 26578571.0, 29040613.0, 30357349.0, 33156332.0",NA,NA,conflicting record(s) to be removed,"merge all ""dup name"" IDs",NA,NA,NA,10/24/2016 +"24194602, 25477381, 26578571, 29040613, 30357349, 33156332",http://www.ddbj.nig.ac.jp,"DDBJ progress report: a new submission system for leading to a correct annotation. The DNA Data Bank of Japan (DDBJ; http://www.ddbj.nig.ac.jp) maintains and provides archival, retrieval and analytical resources for biological information. This database content is shared with the US National Center for Biotechnology Information (NCBI) and the European Bioinformatics Institute (EBI) within the framework of the International Nucleotide Sequence Database Collaboration (INSDC). DDBJ launched a new nucleotide sequence submission system for receiving traditional nucleotide sequence. We expect that the new submission system will be useful for many submitters to input accurate annotation and reduce the time needed for data input. In addition, DDBJ has started a new service, the Japanese Genotype-phenotype Archive (JGA), with our partner institute, the National Bioscience Database Center (NBDC). JGA permanently archives and shares all types of individual human genetic and phenotypic data. We also introduce improvements in the DDBJ services and databases made during the past year.",DDBJ,0.967361391,DNA Data Bank of Japan,0.954660726,DDBJ,0.967361391,6,27924010,27924010,low_prob_best_name,do not remove,conflicting record(s) to be removed,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +32527280,http://ddiem.phenomebrowser.net,"DDIEM: drug database for inborn errors of metabolism. Background Inborn errors of metabolism (IEM) represent a subclass of rare inherited diseases caused by a wide range of defects in metabolic enzymes or their regulation. Of over a thousand characterized IEMs, only about half are understood at the molecular level, and overall the development of treatment and management strategies has proved challenging. An overview of the changing landscape of therapeutic approaches is helpful in assessing strategic patterns in the approach to therapy, but the information is scattered throughout the literature and public data resources. Results We gathered data on therapeutic strategies for 300 diseases into the Drug Database for Inborn Errors of Metabolism (DDIEM). Therapeutic approaches, including both successful and ineffective treatments, were manually classified by their mechanisms of action using a new ontology. Conclusions We present a manually curated, ontologically formalized knowledgebase of drugs, therapeutic procedures, and mitigated phenotypes. DDIEM is freely available through a web interface and for download at http://ddiem.phenomebrowser.net.",DDIEM,0.995850682,Drug Database for Inborn Errors of Metabolism,0.948216963,DDIEM,0.995850682,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/11/2020 +23906817,http://labda.inf.uc3m.es/ddicorpus,"The DDI corpus: an annotated corpus with pharmacological substances and drug-drug interactions. The management of drug-drug interactions (DDIs) is a critical issue resulting from the overwhelming amount of information available on them. Natural Language Processing (NLP) techniques can provide an interesting way to reduce the time spent by healthcare professionals on reviewing biomedical literature. However, NLP techniques rely mostly on the availability of the annotated corpora. While there are several annotated corpora with biological entities and their relationships, there is a lack of corpora annotated with pharmacological substances and DDIs. Moreover, other works in this field have focused in pharmacokinetic (PK) DDIs only, but not in pharmacodynamic (PD) DDIs. To address this problem, we have created a manually annotated corpus consisting of 792 texts selected from the DrugBank database and other 233 Medline abstracts. This fined-grained corpus has been annotated with a total of 18,502 pharmacological substances and 5028 DDIs, including both PK as well as PD interactions. The quality and consistency of the annotation process has been ensured through the creation of annotation guidelines and has been evaluated by the measurement of the inter-annotator agreement between two annotators. The agreement was almost perfect (Kappa up to 0.96 and generally over 0.80), except for the DDIs in the MedLine database (0.55-0.72). The DDI corpus has been used in the SemEval 2013 DDIExtraction challenge as a gold standard for the evaluation of information extraction techniques applied to the recognition of pharmacological substances and the detection of DDIs from biomedical texts. DDIExtraction 2013 has attracted wide attention with a total of 14 teams from 7 different countries. For the task of recognition and classification of pharmacological names, the best system achieved an F1 of 71.5%, while, for the detection and classification of DDIs, the best result was F1 of 65.1%. These results show that the corpus has enough quality to be used for training and testing NLP techniques applied to the field of Pharmacovigilance. The DDI corpus and the annotation guidelines are free for use for academic research and are available at http://labda.inf.uc3m.es/ddicorpus.",DDIExtraction,0.722436488,NA,0,DDIExtraction,0.722436488,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,7/29/2013 +25398897,http://www.cbrc.kaust.edu.sa/ddmgd,"DDMGD: the database of text-mined associations between genes methylated in diseases from different species. Gathering information about associations between methylated genes and diseases is important for diseases diagnosis and treatment decisions. Recent advancements in epigenetics research allow for large-scale discoveries of associations of genes methylated in diseases in different species. Searching manually for such information is not easy, as it is scattered across a large number of electronic publications and repositories. Therefore, we developed DDMGD database (http://www.cbrc.kaust.edu.sa/ddmgd/) to provide a comprehensive repository of information related to genes methylated in diseases that can be found through text mining. DDMGD's scope is not limited to a particular group of genes, diseases or species. Using the text mining system DEMGD we developed earlier and additional post-processing, we extracted associations of genes methylated in different diseases from PubMed Central articles and PubMed abstracts. The accuracy of extracted associations is 82% as estimated on 2500 hand-curated entries. DDMGD provides a user-friendly interface facilitating retrieval of these associations ranked according to confidence scores. Submission of new associations to DDMGD is provided. A comparison analysis of DDMGD with several other databases focused on genes methylated in diseases shows that DDMGD is comprehensive and includes most of the recent information on genes methylated in diseases.",DDMGD,0.992112517,NA,0,DDMGD,0.992112517,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/14/2014 +27577567,http://ddr.cbbio.es,"DDRprot: a database of DNA damage response-related proteins. . The DNA Damage Response (DDR) signalling network is an essential system that protects the genome's integrity. The DDRprot database presented here is a resource that integrates manually curated information on the human DDR network and its sub-pathways. For each particular DDR protein, we present detailed information about its function. If involved in post-translational modifications (PTMs) with each other, we depict the position of the modified residue/s in the three-dimensional structures, when resolved structures are available for the proteins. All this information is linked to the original publication from where it was obtained. Phylogenetic information is also shown, including time of emergence and conservation across 47 selected species, family trees and sequence alignments of homologues. The DDRprot database can be queried by different criteria: pathways, species, evolutionary age or involvement in (PTM). Sequence searches using hidden Markov models can be also used.Database URL: http://ddr.cbbio.es.",DDRprot,0.997383237,NA,0,DDRprot,0.997383237,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/29/2016 +21938213,http://bmi.icmr.org.in/DDTRP,"DDTRP: Database of Drug Targets for Resistant Pathogens. Emergence of drug resistance is a major threat to public health. Many pathogens have developed resistance to most of the existing antibiotics, and multidrug-resistant and extensively drug resistant strains are extremely difficult to treat. This has resulted in an urgent need for novel drugs. We describe a database called 'Database of Drug Targets for Resistant Pathogens' (DDTRP). The database contains information on drugs with reported resistance, their respective targets, metabolic pathways involving these targets, and a list of potential alternate targets for seven pathogens. The database can be accessed freely at http://bmi.icmr.org.in/DDTRP.",DDTRP,0.977842283,Database of Drug Targets for Resistant Pathogens,0.933010811,DDTRP,0.977842283,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/6/2011 +32307725,http://2de-pattern.pnpi.nrcki.ru,"A database for inventory of proteoform profiles: ""2DE-pattern"". The human proteome is composed of a diverse and heterogeneous range of gene products/proteoforms/protein species. Because of the growing amount of information about proteoforms generated by different methods, we need a convenient approach to make an inventory of the data. Here, we present a database of proteoforms that is based on information obtained by separation of proteoforms using 2DE followed by shotgun ESI-LC-MS/MS. The database's principles and structure are described. The database is called ""2DE-pattern"" as it contains multiple isoform-centric patterns of proteoforms separated according to 2DE principles. The database can be freely used at http://2de-pattern.pnpi.nrcki.ru.",DE-pattern,0.593041122,NA,0,DE-pattern,0.593041122,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/27/2020 +23516335,http://ibsd.gov.in/debdom,"DEBDOM: Database Exploring Banana Diversity of Manipur. Unlabelled: Being poor man's apple, banana has a wide popularity worldwide. It's one of the important horticultural crops used irrespective of rich and poor alike. Manipur along with the other states of Northeast India harboured with plenty of wild and cultivated species of banana that are not fully explored. A data base named DEBDOM has been developed here describing the diversity of banana resources of Manipur and it comprises twenty eight genotypes of Musaceae. The database DEBDOM provides a sophisticated web base access to the details of the taxonomy, morphological characteristics, utility as well as sites of collection of Musa genotypes, and it would have contribute as a potential gene pool sources for the conservation, sustainability as well as for crop improvement in the future breeding programmes. Availability http://ibsd.gov.in/debdom/",DEBDOM,0.996568859,Database Exploring Banana Diversity of,0.850851814,DEBDOM,0.996568859,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/2/2013 +"22962312, 24150940",http://decipher.sanger.ac.uk,"DECIPHER: web-based, community resource for clinical interpretation of rare variants in developmental disorders. Patients with developmental disorders often harbour sub-microscopic deletions or duplications that lead to a disruption of normal gene expression or perturbation in the copy number of dosage-sensitive genes. Clinical interpretation for such patients in isolation is hindered by the rarity and novelty of such disorders. The DECIPHER project (https://decipher.sanger.ac.uk) was established in 2004 as an accessible online repository of genomic and associated phenotypic data with the primary goal of aiding the clinical interpretation of rare copy-number variants (CNVs). DECIPHER integrates information from a variety of bioinformatics resources and uses visualization tools to identify potential disease genes within a CNV. A two-tier access system permits clinicians and clinical scientists to maintain confidential linked anonymous records of phenotypes and CNVs for their patients that, with informed consent, can subsequently be shared with the wider clinical genetics and research communities. Advances in next-generation sequencing technologies are making it practical and affordable to sequence the whole exome/genome of patients who display features suggestive of a genetic disorder. This approach enables the identification of smaller intragenic mutations including single-nucleotide variants that are not accessible even with high-resolution genomic array analysis. This article briefly summarizes the current status and achievements of the DECIPHER project and looks ahead to the opportunities and challenges of jointly analysing structural and sequence variation in the human genome.",DECIPHER,0.995207131,NA,0,DECIPHER,0.995207131,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2013 +31349169,http://cb.imsc.res.in/deduct,"A curated knowledgebase on endocrine disrupting chemicals and their biological systems-level perturbations. Human well-being can be affected by exposure to several chemicals in the environment. One such group is endocrine disrupting chemicals (EDCs) that can perturb the hormonal homeostasis leading to adverse health effects. In this work, we have developed a detailed workflow to identify EDCs with supporting evidence of endocrine disruption in published experiments in humans or rodents. Thereafter, this workflow was used to manually evaluate more than 16,000 published research articles and identify 686 potential EDCs with published evidence in humans or rodents. Importantly, we have compiled the observed adverse effects or endocrine-specific perturbations along with the dosage information for the potential EDCs from their supporting published experiments. Subsequently, the potential EDCs were classified based on the type of supporting evidence, their environmental source and their chemical properties. Additional compiled information for potential EDCs include their chemical structure, physicochemical properties, predicted ADMET properties and target genes. In order to enable future research based on this compiled information on potential EDCs, we have built an online knowledgebase, Database of Endocrine Disrupting Chemicals and their Toxicity profiles (DEDuCT), accessible at: https://cb.imsc.res.in/deduct/. After building this comprehensive resource, we have performed a network-centric analysis of the chemical space and the associated biological space of target genes of EDCs. Specifically, we have constructed two networks of EDCs using our resource based on similarity of chemical structures or target genes. Ensuing analysis revealed a lack of correlation between chemical structure and target genes of EDCs. Though our detailed results highlight potential challenges in developing predictive models for EDCs, the compiled information in our resource will undoubtedly enable future research in the field, especially, those focussed towards mechanistic understanding of the systems-level perturbations caused by EDCs.",DEDuCT,0.989132156,Database of Endocrine Disrupting Chemicals and their Toxicity profiles,0.964690409,DEDuCT,0.989132156,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/16/2019 +30942868,http://dee2.io,"Digital expression explorer 2: a repository of uniformly processed RNA sequencing data. . RNA sequencing (RNA-seq) is an indispensable tool in the study of gene regulation. While the technology has brought with it better transcript coverage and quantification, there remain considerable barriers to entry for the computational biologist to analyse large data sets. There is a real need for a repository of uniformly processed RNA-seq data that is easy to use. To address these obstacles, we developed Digital Expression Explorer 2 (DEE2), a web-based repository of RNA-seq data in the form of gene-level and transcript-level expression counts. DEE2 contains >5.3 trillion assigned reads from 580,000 RNA-seq data sets including species Escherichia coli, yeast, Arabidopsis, worm, fruit fly, zebrafish, rat, mouse, and human. Base-space sequence data downloaded from the National Center for Biotechnology Information Sequence Read Archive underwent quality control prior to transcriptome and genome mapping using open-source tools. Uniform data processing methods ensure consistency across experiments, facilitating fast and reproducible meta-analyses. The web interface allows users to quickly identify data sets of interest using accession number and keyword searches. The data can also be accessed programmatically using a specifically designed R package. We demonstrate that DEE2 data are compatible with statistical packages such as edgeR or DESeq. Bulk data are also available for download. DEE2 can be found at http://dee2.io.",DEE2,0.989227772,Expression Explorer,0.759982735,DEE2,0.989227772,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2019 +22144203,http://deepbase.sysu.edu.cn,"DeepBase: annotation and discovery of microRNAs and other noncoding RNAs from deep-sequencing data. Recent advances in high-throughput deep-sequencing technology have produced large numbers of short and long RNA sequences and enabled the detection and profiling of known and novel microRNAs (miRNAs) and other noncoding RNAs (ncRNAs) at unprecedented sensitivity and depth. In this chapter, we describe the use of deepBase, a database that we have developed to integrate all public deep-sequencing data and to facilitate the comprehensive annotation and discovery of miRNAs and other ncRNAs from these data. deepBase provides an integrative, interactive, and versatile web graphical interface to evaluate miRBase-annotated miRNA genes and other known ncRNAs, explores the expression patterns of miRNAs and other ncRNAs, and discovers novel miRNAs and other ncRNAs from deep-sequencing data. deepBase also provides a deepView genome browser to comparatively analyze these data at multiple levels. deepBase is available at http://deepbase.sysu.edu.cn/.",deepBase,0.993842721,NA,0,deepBase,0.993842721,1,NA,33175131,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2012 +33175131,http://rna.sysu.edu.cn/deepbase3/index.html,"deepBase v3.0: expression atlas and interactive analysis of ncRNAs from thousands of deep-sequencing data. Eukaryotic genomes encode thousands of small and large non-coding RNAs (ncRNAs). However, the expression, functions and evolution of these ncRNAs are still largely unknown. In this study, we have updated deepBase to version 3.0 (deepBase v3.0, http://rna.sysu.edu.cn/deepbase3/index.html), an increasingly popular and openly licensed resource that facilitates integrative and interactive display and analysis of the expression, evolution, and functions of various ncRNAs by deeply mining thousands of high-throughput sequencing data from tissue, tumor and exosome samples. We updated deepBase v3.0 to provide the most comprehensive expression atlas of small RNAs and lncRNAs by integrating √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº67 620 data from 80 normal tissues and √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº50 cancer tissues. The extracellular patterns of various ncRNAs were profiled to explore their applications for discovery of noninvasive biomarkers. Moreover, we constructed survival maps of tRNA-derived RNA Fragments (tRFs), miRNAs, snoRNAs and lncRNAs by analyzing >45 000 cancer sample data and corresponding clinical information. We also developed interactive webs to analyze the differential expression and biological functions of various ncRNAs in √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº50 types of cancers. This update is expected to provide a variety of new modules and graphic visualizations to facilitate analyses and explorations of the functions and mechanisms of various types of ncRNAs.",deepBase,0.935966055,NA,0,deepBase,0.935966055,1,NA,22144203,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2021 +27084938,http://deepblue.mpi-inf.mpg.de,"DeepBlue epigenomic data server: programmatic data retrieval and analysis of epigenome region sets. Large amounts of epigenomic data are generated under the umbrella of the International Human Epigenome Consortium, which aims to establish 1000 reference epigenomes within the next few years. These data have the potential to unravel the complexity of epigenomic regulation. However, their effective use is hindered by the lack of flexible and easy-to-use methods for data retrieval. Extracting region sets of interest is a cumbersome task that involves several manual steps: identifying the relevant experiments, downloading the corresponding data files and filtering the region sets of interest. Here we present the DeepBlue Epigenomic Data Server, which streamlines epigenomic data analysis as well as software development. DeepBlue provides a comprehensive programmatic interface for finding, selecting, filtering, summarizing and downloading region sets. It contains data from four major epigenome projects, namely ENCODE, ROADMAP, BLUEPRINT and DEEP. DeepBlue comes with a user manual, examples and a well-documented application programming interface (API). The latter is accessed via the XML-RPC protocol supported by many programming languages. To demonstrate usage of the API and to enable convenient data retrieval for non-programmers, we offer an optional web interface. DeepBlue can be openly accessed at http://deepblue.mpi-inf.mpg.de.",DeepBlue,0.988800287,NA,0,DeepBlue,0.988800287,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/15/2016 +23500449,http://bsb.kiz.ac.cn:90/DEER,"The DEER database: a bridge connecting drugs, environmental effects, and regulations. Variability in patient drug responses is observed with increasing frequency, necessitating the establishment of causal associations between factors and drug response phenotypes. This individual variability can be caused by genetic factors and environmental factors (ENFs). Although pharmacogenetics has been instrumental in describing genetic variations, frameworks for understanding the association between ENFs (particularly chemical ENFs) and drug responses are lacking. In this study, we constructed a novel database, DEER, for interpretations of chemical ENF effects on drug responses. DEER includes computational predictions of the associations between chemical ENFs and drug responses. Putative regulatory intermediates such as transcription factors, cytochrome P450s (CYPs), drug targets, and transporters as well as chemical similarities are provided to support our predictions. DEER currently encompasses 579 drugs, 401 chemical ENFs, and 9247 predicted drug-ENF associations. The entire dataset can be easily queried through a search page. The results can be downloaded, and each drug-ENF association with intermediary factors can be displayed via a graphical viewer. DEER is available at http://bsb.kiz.ac.cn:90/DEER/. We expect this approach and resource to be valuable for personalized medicine and drug development.",DEER,0.993173659,NA,0,DEER,0.993173659,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/13/2013 +23264352,http://wellslab.ucsf.edu/degrabase,"The DegraBase: a database of proteolysis in healthy and apoptotic human cells. Proteolysis is a critical post-translational modification for regulation of cellular processes. Our lab has previously developed a technique for specifically labeling unmodified protein N termini, the √ɬÉ√Ǭé√ɬÇ√Ǭ±-aminome, using the engineered enzyme, subtiligase. Here we present a database, called the DegraBase (http://wellslab.ucsf.edu/degrabase/), which compiles 8090 unique N termini from 3206 proteins directly identified in subtiligase-based positive enrichment mass spectrometry experiments in healthy and apoptotic human cell lines. We include both previously published and unpublished data in our analysis, resulting in a total of 2144 unique √ɬÉ√Ǭé√ɬÇ√Ǭ±-amines identified in healthy cells, and 6990 in cells undergoing apoptosis. The N termini derive from three general categories of proteolysis with respect to cleavage location and functional role: translational N-terminal methionine processing (√ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº10% of total proteolysis), sites close to the translational N terminus that likely represent removal of transit or signal peptides (√ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº25% of total), and finally, other endoproteolytic cuts (√ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº65% of total). Induction of apoptosis causes relatively little change in the first two proteolytic categories, but dramatic changes are seen in endoproteolysis. For example, we observed 1706 putative apoptotic caspase cuts, more than double the total annotated sites in the CASBAH and MEROPS databases. In the endoproteolysis category, there are a total of nearly 3000 noncaspase nontryptic cleavages that are not currently reported in the MEROPS database. These studies significantly increase the annotation for all categories of proteolysis in human cells and allow public access for investigators to explore interesting proteolytic events in healthy and apoptotic human cells.",DegraBase,0.968228161,NA,0,DegraBase,0.968228161,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/20/2012 +26553809,http://degradome.uniovi.es,"The Degradome database: expanding roles of mammalian proteases in life and disease. Since the definition of the degradome as the complete repertoire of proteases in a given organism, the combined effort of numerous laboratories has greatly expanded our knowledge of its roles in biology and pathology. Once the genomic sequences of several important model organisms were made available, we presented the Degradome database containing the curated sets of known protease genes in human, chimpanzee, mouse and rat. Here, we describe the updated Degradome database, featuring 81 new protease genes and 7 new protease families. Notably, in this short time span, the number of known hereditary diseases caused by mutations in protease genes has increased from 77 to 119. This increase reflects the growing interest on the roles of the degradome in multiple diseases, including cancer and ageing. Finally, we have leveraged the widespread adoption of new webtools to provide interactive graphic views that show information about proteases in the global context of the degradome. The Degradome database can be accessed through its web interface at http://degradome.uniovi.es.",Degradome,0.932848215,NA,0,Degradome,0.932848215,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/8/2015 +26980516,http://fungaldb.um.edu.my,"DemaDb: an integrated dematiaceous fungal genomes database. . Many species of dematiaceous fungi are associated with allergic reactions and potentially fatal diseases in human, especially in tropical climates. Over the past 10 years, we have isolated more than 400 dematiaceous fungi from various clinical samples. In this study, DemaDb, an integrated database was designed to support the integration and analysis of dematiaceous fungal genomes. A total of 92 072 putative genes and 6527 pathways that identified in eight dematiaceous fungi (Bipolaris papendorfii UM 226, Daldinia eschscholtzii UM 1400, D. eschscholtzii UM 1020, Pyrenochaeta unguis-hominis UM 256, Ochroconis mirabilis UM 578, Cladosporium sphaerospermum UM 843, Herpotrichiellaceae sp. UM 238 and Pleosporales sp. UM 1110) were deposited in DemaDb. DemaDb includes functional annotations for all predicted gene models in all genomes, such as Gene Ontology, EuKaryotic Orthologous Groups, Kyoto Encyclopedia of Genes and Genomes (KEGG), Pfam and InterProScan. All predicted protein models were further functionally annotated to Carbohydrate-Active enzymes, peptidases, secondary metabolites and virulence factors. DemaDb Genome Browser enables users to browse and visualize entire genomes with annotation data including gene prediction, structure, orientation and custom feature tracks. The Pathway Browser based on the KEGG pathway database allows users to look into molecular interaction and reaction networks for all KEGG annotated genes. The availability of downloadable files containing assembly, nucleic acid, as well as protein data allows the direct retrieval for further downstream works. DemaDb is a useful resource for fungal research community especially those involved in genome-scale analysis, functional genomics, genetics and disease studies of dematiaceous fungi. Database URL: http://fungaldb.um.edu.my.",DemaDb,0.9967255,NA,0,DemaDb,0.9967255,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/15/2016 +26727469,http://bbprof.immt.res.in/embf,"dEMBF: A Comprehensive Database of Enzymes of Microalgal Biofuel Feedstock. Microalgae have attracted wide attention as one of the most versatile renewable feedstocks for production of biofuel. To develop genetically engineered high lipid yielding algal strains, a thorough understanding of the lipid biosynthetic pathway and the underpinning enzymes is essential. In this work, we have systematically mined the genomes of fifteen diverse algal species belonging to Chlorophyta, Heterokontophyta, Rhodophyta, and Haptophyta, to identify and annotate the putative enzymes of lipid metabolic pathway. Consequently, we have also developed a database, dEMBF (Database of Enzymes of Microalgal Biofuel Feedstock), which catalogues the complete list of identified enzymes along with their computed annotation details including length, hydrophobicity, amino acid composition, subcellular location, gene ontology, KEGG pathway, orthologous group, Pfam domain, intron-exon organization, transmembrane topology, and secondary/tertiary structural data. Furthermore, to facilitate functional and evolutionary study of these enzymes, a collection of built-in applications for BLAST search, motif identification, sequence and phylogenetic analysis have been seamlessly integrated into the database. dEMBF is the first database that brings together all enzymes responsible for lipid synthesis from available algal genomes, and provides an integrative platform for enzyme inquiry and analysis. This database will be extremely useful for algal biofuel research. It can be accessed at http://bbprof.immt.res.in/embf.",dEMBF,0.996164143,Database of Enzymes of Microalgal Biofuel Feedstock,0.963428038,dEMBF,0.996164143,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/4/2016 +26342387,http://www.cbrc.kaust.edu.sa/dendb,"DENdb: database of integrated human enhancers. . Enhancers are cis-acting DNA regulatory regions that play a key role in distal control of transcriptional activities. Identification of enhancers, coupled with a comprehensive functional analysis of their properties, could improve our understanding of complex gene transcription mechanisms and gene regulation processes in general. We developed DENdb, a centralized on-line repository of predicted enhancers derived from multiple human cell-lines. DENdb integrates enhancers predicted by five different methods generating an enriched catalogue of putative enhancers for each of the analysed cell-lines. DENdb provides information about the overlap of enhancers with DNase I hypersensitive regions, ChIP-seq regions of a number of transcription factors and transcription factor binding motifs, means to explore enhancer interactions with DNA using several chromatin interaction assays and enhancer neighbouring genes. DENdb is designed as a relational database that facilitates fast and efficient searching, browsing and visualization of information. Database URL: http://www.cbrc.kaust.edu.sa/dendb/.",DENdb,0.997271538,NA,0,DENdb,0.997271538,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/5/2015 +31664080,http://biosig.unimelb.edu.au/dendpoint,"dendPoint: a web resource for dendrimer pharmacokinetics investigation and prediction. Nanomedicine development currently suffers from a lack of efficient tools to predict pharmacokinetic behavior without relying upon testing in large numbers of animals, impacting success rates and development costs. This work presents dendPoint, the first in silico model to predict the intravenous pharmacokinetics of dendrimers, a commonly explored drug vector, based on physicochemical properties. We have manually curated the largest relational database of dendrimer pharmacokinetic parameters and their structural/physicochemical properties. This was used to develop a machine learning-based model capable of accurately predicting pharmacokinetic parameters, including half-life, clearance, volume of distribution and dose recovered in the liver and urine. dendPoint successfully predicts dendrimer pharmacokinetic properties, achieving correlations of up to r√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ=√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ0.83 and Q2 up to 0.68. dendPoint is freely available as a user-friendly web-service and database at http://biosig.unimelb.edu.au/dendpoint . This platform is ultimately expected to be used to guide dendrimer construct design and refinement prior to embarking on more time consuming and expensive in vivo testing.",dendPoint,0.994515121,NA,0,dendPoint,0.994515121,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/29/2019 +27618709,http://proline.biochem.iisc.ernet.in/DenHunt,"DenHunt - A Comprehensive Database of the Intricate Network of Dengue-Human Interactions. Dengue virus (DENV) is a human pathogen and its etiology has been widely established. There are many interactions between DENV and human proteins that have been reported in literature. However, no publicly accessible resource for efficiently retrieving the information is yet available. In this study, we mined all publicly available dengue-human interactions that have been reported in the literature into a database called DenHunt. We retrieved 682 direct interactions of human proteins with dengue viral components, 382 indirect interactions and 4120 differentially expressed human genes in dengue infected cell lines and patients. We have illustrated the importance of DenHunt by mapping the dengue-human interactions on to the host interactome and observed that the virus targets multiple host functional complexes of important cellular processes such as metabolism, immune system and signaling pathways suggesting a potential role of these interactions in viral pathogenesis. We also observed that 7 percent of the dengue virus interacting human proteins are also associated with other infectious and non-infectious diseases. Finally, the understanding that comes from such analyses could be used to design better strategies to counteract the diseases caused by dengue virus. The whole dataset has been catalogued in a searchable database, called DenHunt (http://proline.biochem.iisc.ernet.in/DenHunt/).",DenHunt,0.991763353,NA,0,DenHunt,0.991763353,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/12/2016 +27907889,http://denovo-db.gs.washington.edu,"denovo-db: a compendium of human de novo variants. Whole-exome and whole-genome sequencing have facilitated the large-scale discovery of de novo variants in human disease. To date, most de novo discovery through next-generation sequencing focused on congenital heart disease and neurodevelopmental disorders (NDDs). Currently, de novo variants are one of the most significant risk factors for NDDs with a substantial overlap of genes involved in more than one NDD. To facilitate better usage of published data, provide standardization of annotation, and improve accessibility, we created denovo-db (http://denovo-db.gs.washington.edu), a database for human de novo variants. As of July 2016, denovo-db contained 40 different studies and 32,991 de novo variants from 23,098 trios. Database features include basic variant information (chromosome location, change, type); detailed annotation at the transcript and protein levels; severity scores; frequency; validation status; and, most importantly, the phenotype of the individual with the variant. We included a feature on our browsable website to download any query result, including a downloadable file of the full database with additional variant details. denovo-db provides necessary information for researchers to compare their data to other individuals with the same phenotype and also to controls allowing for a better understanding of the biology of de novo variants and their contribution to disease.",denovo-db,0.997356877,NA,0,denovo-db,0.997356877,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/5/2016 +32510549,http://webs.iiitd.edu.in/raghava/denvind,"DenvInD: dengue virus inhibitors database for clinical and molecular research. . Dengue virus (DENV) researchers often face challenges with the highly time-consuming process of collecting and curating information on known inhibitors during the standard drug discovery process. To this end, however, required collective information is not yet available on a single platform. Hence, we have developed the DenvInD database for experimentally validated DENV inhibitors against its known targets presently hosted at https://webs.iiitd.edu.in/raghava/denvind/. This database provides comprehensive information, i.e. PubChem IDs, SMILES, IC50, EC50, CC50, and wherever available Ki values of the 484 compounds in vitro validated as inhibitors against respective drug targets of DENV. Also, the DenvInD database has been linked to the user-friendly web-based interface and accessibility features, such as simple search, advanced search and data browsing. All the required data curation was conducted manually from the reported scientific literature and PubChem. The collected information was then organized into the DenvInD database using sequence query language under user interface by hypertext markup language. DenvInD is the first useful repository of its kind which would augment the DENV drug discovery research by providing essential information on known DENV inhibitors for molecular docking, computational screening, pharmacophore modeling and quantitative structure-activity relationship modeling.",DenvInD,0.996846437,NA,0,DenvInD,0.996846437,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +25326239,http://www.cbrc.kaust.edu.sa/deop,"DEOP: a database on osmoprotectants and associated pathways. . Microorganisms are known to counteract salt stress through salt influx or by the accumulation of osmoprotectants (also called compatible solutes). Understanding the pathways that synthesize and/or breakdown these osmoprotectants is of interest to studies of crops halotolerance and to biotechnology applications that use microbes as cell factories for production of biomass or commercial chemicals. To facilitate the exploration of osmoprotectants, we have developed the first online resource, 'Dragon Explorer of Osmoprotection associated Pathways' (DEOP) that gathers and presents curated information about osmoprotectants, complemented by information about reactions and pathways that use or affect them. A combined total of 141 compounds were confirmed osmoprotectants, which were matched to 1883 reactions and 834 pathways. DEOP can also be used to map genes or microbial genomes to potential osmoprotection-associated pathways, and thus link genes and genomes to other associated osmoprotection information. Moreover, DEOP provides a text-mining utility to search deeper into the scientific literature for supporting evidence or for new associations of osmoprotectants to pathways, reactions, enzymes, genes or organisms. Two case studies are provided to demonstrate the usefulness of DEOP. The system can be accessed at. Database URL: http://www.cbrc.kaust.edu.sa/deop/",DEOP,0.958839059,ragon Explorer of Osmoprotection associated Pathways,0.914404387,DEOP,0.958839059,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/17/2014 +22467915,http://tubic.tju.edu.cn/deori,"DeOri: a database of eukaryotic DNA replication origins. Summary DNA replication, a central event for cell proliferation, is the basis of biological inheritance. The identification of replication origins helps to reveal the mechanism of the regulation of DNA replication. However, only few eukaryotic replication origins were characterized not long ago; nevertheless, recent genome-wide approaches have boosted the number of mapped replication origins. To gain a comprehensive understanding of the nature of eukaryotic replication origins, we have constructed a Database of Eukaryotic ORIs (DeOri), which contains all the eukaryotic ones identified by genome-wide analyses currently available. A total of 16 145 eukaryotic replication origins have been collected from 6 eukaryotic organisms in which genome-wide studies have been performed, the replication-origin numbers being 433, 7489, 1543, 148, 348 and 6184 for humans, mice, Arabidopsis thaliana, Kluyveromyces lactis, Schizosaccharomyces pombe and Drosophila melanogaster, respectively. Availability Database of Eukaryotic ORIs (DeOri) can be accessed from http://tubic.tju.edu.cn/deori/",DeOri,0.925356305,Database of Eukaryotic ORIs,0.895951286,DeOri,0.925356305,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/30/2012 +25332398,"http://www.depod.org, http://www.koehn.embl.de/depod","The human DEPhOsphorylation database DEPOD: a 2015 update. Phosphatases are crucial enzymes in health and disease, but the knowledge of their biological roles is still limited. Identifying substrates continues to be a great challenge. To support the research on phosphatase-kinase-substrate networks we present here an update on the human DEPhOsphorylation Database: DEPOD (http://www.depod.org or http://www.koehn.embl.de/depod). DEPOD is a manually curated open access database providing human phosphatases, their protein and non-protein substrates, dephosphorylation sites, pathway involvements and external links to kinases and small molecule modulators. All internal data are fully searchable including a BLAST application. Since the first release, more human phosphatases and substrates, their associated signaling pathways (also from new sources), and interacting proteins for all phosphatases and protein substrates have been added into DEPOD. The user interface has been further optimized; for example, the interactive human phosphatase-substrate network contains now a 'highlight node' function for phosphatases, which includes the visualization of neighbors in the network.",DEPOD,0.997329473,human DEPhOsphorylation Database,0.98109927,DEPOD,0.997329473,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/20/2014 +33119734,http://biomine.cs.vcu.edu/servers/DESCRIBEPROT,"DescribePROT: database of amino acid-level protein structure and function predictions. We present DescribePROT, the database of predicted amino acid-level descriptors of structure and function of proteins. DescribePROT delivers a comprehensive collection of 13 complementary descriptors predicted using 10 popular and accurate algorithms for 83 complete proteomes that cover key model organisms. The current version includes 7.8 billion predictions for close to 600 million amino acids in 1.4 million proteins. The descriptors encompass sequence conservation, position specific scoring matrix, secondary structure, solvent accessibility, intrinsic disorder, disordered linkers, signal peptides, MoRFs and interactions with proteins, DNA and RNAs. Users can search DescribePROT by the amino acid sequence and the UniProt accession number and entry name. The pre-computed results are made available instantaneously. The predictions can be accesses via an interactive graphical interface that allows simultaneous analysis of multiple descriptors and can be also downloaded in structured formats at the protein, proteome and whole database scale. The putative annotations included by DescriPROT are useful for a broad range of studies, including: investigations of protein function, applied projects focusing on therapeutics and diseases, and in the development of predictors for other protein sequence descriptors. Future releases will expand the coverage of DescribePROT. DescribePROT can be accessed at http://biomine.cs.vcu.edu/servers/DESCRIBEPROT/.",DescribePROT,0.997794986,NA,0,DescribePROT,0.997794986,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +23415072,http://www.cbrc.kaust.edu.sa/desmsci,"Dragon exploration system on marine sponge compounds interactions. Background Natural products are considered a rich source of new chemical structures that may lead to the therapeutic agents in all major disease areas. About 50% of the drugs introduced in the market in the last 20 years were natural products/derivatives or natural products mimics, which clearly shows the influence of natural products in drug discovery. Results In an effort to further support the research in this field, we have developed an integrative knowledge base on Marine Sponge Compounds Interactions (Dragon Exploration System on Marine Sponge Compounds Interactions - DESMSCI) as a web resource. This knowledge base provides information about the associations of the sponge compounds with different biological concepts such as human genes or proteins, diseases, as well as pathways, based on the literature information available in PubMed and information deposited in several other databases. As such, DESMSCI is aimed as a research support resource for problems on the utilization of marine sponge compounds. DESMSCI allows visualization of relationships between different chemical compounds and biological concepts through textual and tabular views, graphs and relational networks. In addition, DESMSCI has built in hypotheses discovery module that generates potentially new/interesting associations among different biomedical concepts. We also present a case study derived from the hypotheses generated by DESMSCI which provides a possible novel mode of action for variolins in Alzheimer's disease. Conclusion DESMSCI is the first publicly available (http://www.cbrc.kaust.edu.sa/desmsci) comprehensive resource where users can explore information, compiled by text- and data-mining approaches, on biological and chemical data related to sponge compounds.",DESMSCI,0.997198522,Dragon Exploration System on Marine Sponge Compounds Interactions,0.903089372,DESMSCI,0.997198522,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/16/2013 +25951377,http://prir.ustb.edu.cn/DeTEXT,"DeTEXT: A Database for Evaluating Text Extraction from Biomedical Literature Figures. Hundreds of millions of figures are available in biomedical literature, representing important biomedical experimental evidence. Since text is a rich source of information in figures, automatically extracting such text may assist in the task of mining figure information. A high-quality ground truth standard can greatly facilitate the development of an automated system. This article describes DeTEXT: A database for evaluating text extraction from biomedical literature figures. It is the first publicly available, human-annotated, high quality, and large-scale figure-text dataset with 288 full-text articles, 500 biomedical figures, and 9308 text regions. This article describes how figures were selected from open-access full-text biomedical articles and how annotation guidelines and annotation tools were developed. We also discuss the inter-annotator agreement and the reliability of the annotations. We summarize the statistics of the DeTEXT data and make available evaluation protocols for DeTEXT. Finally we lay out challenges we observed in the automated detection and recognition of figure text and discuss research directions in this area. DeTEXT is publicly available for downloading at http://prir.ustb.edu.cn/DeTEXT/.",DeTEXT,0.969740272,NA,0,DeTEXT,0.969740272,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/7/2015 +22369658,http://lifecenter.sgst.cn/detoxiprot,"DetoxiProt: an integrated database for detoxification proteins. Background Detoxification proteins are a class of proteins for degradation and/or elimination of endogenous and exogenous toxins or medicines, as well as reactive oxygen species (ROS) produced by these materials. Most of these proteins are generated as a response to the stimulation of toxins or medicines. They are essential for the clearance of harmful substances and for maintenance of physiological balance in organisms. Thus, it is important to collect and integrate information on detoxification proteins. Results To store, retrieve and analyze the information related to their features and functions, we developed the DetoxiProt, a comprehensive database for annotation of these proteins. This database provides detailed introductions about different classes of the detoxification proteins. Extensive annotations of these proteins, including sequences, structures, features, inducers, inhibitors, substrates, chromosomal location, functional domains as well as physiological-biochemical properties were generated. Furthermore, pre-computed BLAST results, multiple sequence alignments and evolutionary trees for detoxification proteins are also provided for evolutionary study of conserved function and pathways. The current version of DetoxiProt contains 5956 protein entries distributed in 628 organisms. An easy to use web interface was designed, so that annotations about each detoxification protein can be retrieved by browsing with a specific method or by searching with different criteria. Conclusions DetoxiProt provides an effective and efficient way of accessing the detoxification protein sequences and other high-quality information. This database would be a valuable source for toxicologists, pharmacologists and medicinal chemists. DetoxiProt database is freely available at http://lifecenter.sgst.cn/detoxiprot/.",DetoxiProt,0.993119836,NA,0,DetoxiProt,0.993119836,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/30/2011 +34097004,http://devomics.cn,"DevOmics: an integrated multi-omics database of human and mouse early embryo. . Transcriptomic and epigenetic alterations during early embryo development have been proven to play essential roles in regulating the cell fate. Nowadays, advances in single-cell transcriptomics and epigenomics profiling techniques provide large volumes of data for understanding the molecular regulatory mechanisms in early embryos and facilitate the investigation of assisted reproductive technology as well as preimplantation genetic testing. However, the lack of integrated data collection and unified analytic procedures greatly limits their usage in scientific research and clinical application. Hence, it is necessary to establish a database integrating the regulatory information of human and mouse early embryos with unified analytic procedures. Here, we introduce DevOmics (http://devomics.cn/), which contains normalized gene expression, DNA methylation, histone modifications (H3K4me3, H3K9me3, H3K27me3, H3K27ac), chromatin accessibility and 3D chromatin architecture profiles of human and mouse early embryos spanning six developmental stages (zygote, 2cell, 4cell, 8cell, morula and blastocyst (ICM, TE)). The current version of DevOmics provides Search and Advanced Search for retrieving genes a researcher is interested in, Analysis Tools including the differentially expressed genes (DEGs) analysis for acquiring DEGs between different types of samples, allelic explorer for displaying allele-specific gene expression as well as epigenetic modifications and correlation analysis for showing the dynamic changes in different layers of data across developmental stages, as well as Genome Browser and Ortholog for visualization. DevOmics offers a user-friendly website for biologists and clinicians to decipher molecular regulatory mechanisms of human and mouse early embryos.",DevOmics,0.997623146,NA,0,DevOmics,0.997623146,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2021 +"26612867, 33436076",http://dfam.org,"The Dfam database of repetitive DNA families. Repetitive DNA, especially that due to transposable elements (TEs), makes up a large fraction of many genomes. Dfam is an open access database of families of repetitive DNA elements, in which each family is represented by a multiple sequence alignment and a profile hidden Markov model (HMM). The initial release of Dfam, featured in the 2013 NAR Database Issue, contained 1143 families of repetitive elements found in humans, and was used to produce more than 100 Mb of additional annotation of TE-derived regions in the human genome, with improved speed. Here, we describe recent advances, most notably expansion to 4150 total families including a comprehensive set of known repeat families from four new organisms (mouse, zebrafish, fly and nematode). We describe improvements to coverage, and to our methods for identifying and reducing false annotation. We also describe updates to the website interface. The Dfam website has moved to http://dfam.org. Seed alignments, profile HMMs, hit lists and other underlying data are available for download.",Dfam,0.997758031,NA,0,Dfam,0.997758031,2,NA,23203985,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/12/2021 +23203985,http://dfam.janelia.org,"Dfam: a database of repetitive DNA based on profile hidden Markov models. We present a database of repetitive DNA elements, called Dfam (http://dfam.janelia.org). Many genomes contain a large fraction of repetitive DNA, much of which is made up of remnants of transposable elements (TEs). Accurate annotation of TEs enables research into their biology and can shed light on the evolutionary processes that shape genomes. Identification and masking of TEs can also greatly simplify many downstream genome annotation and sequence analysis tasks. The commonly used TE annotation tools RepeatMasker and Censor depend on sequence homology search tools such as cross_match and BLAST variants, as well as Repbase, a collection of known TE families each represented by a single consensus sequence. Dfam contains entries corresponding to all Repbase TE entries for which instances have been found in the human genome. Each Dfam entry is represented by a profile hidden Markov model, built from alignments generated using RepeatMasker and Repbase. When used in conjunction with the hidden Markov model search tool nhmmer, Dfam produces a 2.9% increase in coverage over consensus sequence search methods on a large human benchmark, while maintaining low false discovery rates, and coverage of the full human genome is 54.5%. The website provides a collection of tools and data views to support improved TE curation and annotation efforts. Dfam is also available for download in flat file format or in the form of MySQL table dumps.",Dfam,0.994080424,NA,0,Dfam,0.994080424,1,NA,"26612867.0, 33436076.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/30/2012 +22493695,http://www.digitalfishlibrary.org,"The Digital Fish Library: using MRI to digitize, database, and document the morphological diversity of fish. Museum fish collections possess a wealth of anatomical and morphological data that are essential for documenting and understanding biodiversity. Obtaining access to specimens for research, however, is not always practical and frequently conflicts with the need to maintain the physical integrity of specimens and the collection as a whole. Non-invasive three-dimensional (3D) digital imaging therefore serves a critical role in facilitating the digitization of these specimens for anatomical and morphological analysis as well as facilitating an efficient method for online storage and sharing of this imaging data. Here we describe the development of the Digital Fish Library (DFL, http://www.digitalfishlibrary.org), an online digital archive of high-resolution, high-contrast, magnetic resonance imaging (MRI) scans of the soft tissue anatomy of an array of fishes preserved in the Marine Vertebrate Collection of Scripps Institution of Oceanography. We have imaged and uploaded MRI data for over 300 marine and freshwater species, developed a data archival and retrieval system with a web-based image analysis and visualization tool, and integrated these into the public DFL website to disseminate data and associated metadata freely over the web. We show that MRI is a rapid and powerful method for accurately depicting the in-situ soft-tissue anatomy of preserved fishes in sufficient detail for large-scale comparative digital morphology. However these 3D volumetric data require a sophisticated computational and archival infrastructure in order to be broadly accessible to researchers and educators.",DFL,0.954812447,Fish Library,0.858391941,DFL,0.954812447,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/6/2012 +21782820,http://bio.dfci.harvard.edu/DFRMLI,"Dana-Farber repository for machine learning in immunology. The immune system is characterized by high combinatorial complexity that necessitates the use of specialized computational tools for analysis of immunological data. Machine learning (ML) algorithms are used in combination with classical experimentation for the selection of vaccine targets and in computational simulations that reduce the number of necessary experiments. The development of ML algorithms requires standardized data sets, consistent measurement methods, and uniform scales. To bridge the gap between the immunology community and the ML community, we designed a repository for machine learning in immunology named Dana-Farber Repository for Machine Learning in Immunology (DFRMLI). This repository provides standardized data sets of HLA-binding peptides with all binding affinities mapped onto a common scale. It also provides a list of experimentally validated naturally processed T cell epitopes derived from tumor or virus antigens. The DFRMLI data were preprocessed and ensure consistency, comparability, detailed descriptions, and statistically meaningful sample sizes for peptides that bind to various HLA molecules. The repository is accessible at http://bio.dfci.harvard.edu/DFRMLI/.",DFRMLI,0.90959398,Dana-Farber Repository for,0.82518174,DFRMLI,0.90959398,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/18/2011 +23197658,http://dga.nubic.northwestern.edu,"The Disease and Gene Annotations (DGA): an annotation resource for human disease. Disease and Gene Annotations database (DGA, http://dga.nubic.northwestern.edu) is a collaborative effort aiming to provide a comprehensive and integrative annotation of the human genes in disease network context by integrating computable controlled vocabulary of the Disease Ontology (DO version 3 revision 2510, which has 8043 inherited, developmental and acquired human diseases), NCBI Gene Reference Into Function (GeneRIF) and molecular interaction network (MIN). DGA integrates these resources together using semantic mappings to build an integrative set of disease-to-gene and gene-to-gene relationships with excellent coverage based on current knowledge. DGA is kept current by periodically reparsing DO, GeneRIF, and MINs. DGA provides a user-friendly and interactive web interface system enabling users to efficiently query, download and visualize the DO tree structure and annotations as a tree, a network graph or a tabular list. To facilitate integrative analysis, DGA provides a web service Application Programming Interface for integration with external analytic tools.",DGA,0.992823601,Disease and Gene Annotations database,0.900466466,DGA,0.992823601,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2012 +24122041,http://dgidb.org,DGIdb: mining the druggable genome. The Drug-Gene Interaction database (DGIdb) mines existing resources that generate hypotheses about how mutated genes might be targeted therapeutically or prioritized for drug development. It provides an interface for searching lists of genes against a compendium of drug-gene interactions and potentially 'druggable' genes. DGIdb can be accessed at http://dgidb.org/.,DGIdb,0.997908175,Drug-Gene Interaction database,0.983872821,DGIdb,0.997908175,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/13/2013 +24174537,http://dgv.tcag.ca,"The Database of Genomic Variants: a curated collection of structural variation in the human genome. Over the past decade, the Database of Genomic Variants (DGV; http://dgv.tcag.ca/) has provided a publicly accessible, comprehensive curated catalogue of structural variation (SV) found in the genomes of control individuals from worldwide populations. Here, we describe updates and new features, which have expanded the utility of DGV for both the basic research and clinical diagnostic communities. The current version of DGV consists of 55 published studies, comprising >2.5 million entries identified in >22,300 genomes. Studies included in DGV are selected from the accessioned data sets in the archival SV databases dbVar (NCBI) and DGVa (EBI), and then further curated for accuracy and validity. The core visualization tool (gbrowse) has been upgraded with additional functions to facilitate data analysis and comparison, and a new query tool has been developed to provide flexible and interactive access to the data. The content from DGV is regularly incorporated into other large-scale genome reference databases and represents a standard data resource for new product and database development, in particular for copy number variation testing in clinical labs. The accurate cataloguing of variants in DGV will continue to enable medical genetics and genome sequencing research.",DGV,0.99448659,Database of Genomic Variants,0.923672388,DGV,0.99448659,1,NA,27375595,NA,NA,NA,do not merge,NA,NA,NA,10/29/2013 +27375595,http://gph.niid.go.jp/geograph/dengue/content/genomemap,"DGV: Dengue Genographic Viewer. Dengue viruses (DENVs) and their vectors are widely distributed throughout the tropical and subtropical regions of the world. An autochthonous case of DENV was reported in Tokyo, Japan, in 2014, for the first time in 70 years. A comprehensive database of DENV sequences containing both serotype and genotype data and epidemiological data is crucial to trace DENV outbreak isolates and promptly respond to outbreaks. We constructed a DENV database containing the serotype, genotype, year and country/region of collection by collecting all publically available DENV sequence information from the National Center for Biotechnology Information (NCBI) and assigning genotype information. We also implemented the web service Dengue Genographic Viewer (DGV), which shows the geographical distribution of each DENV genotype in a user-specified time span. DGV also assigns the serotype and genotype to a user-specified sequence by performing a homology search against the curated DENV database, and shows its homologous sequences with the geographical position and year of collection. DGV also shows the distribution of DENV-infected entrants to Japan by plotting epidemiological data from the Infectious Agents Surveillance Report (IASR), Japan. This overview of the DENV genotype distribution may aid in planning for the control of DENV infections. DGV is freely available online at: (https://gph.niid.go.jp/geograph/dengue/content/genomemap).",DGV,0.96955502,Dengue Genographic Viewer,0.616353422,DGV,0.96955502,1,NA,24174537,low_prob_best_name,do not remove,NA,do not merge,NA,NA,NA,6/7/2016 +23193291,"http://www.ebi.ac.uk/dgva, http://www.ncbi.nlm.nih.gov/dbvar","DbVar and DGVa: public archives for genomic structural variation. Much has changed in the last two years at DGVa (http://www.ebi.ac.uk/dgva) and dbVar (http://www.ncbi.nlm.nih.gov/dbvar). We are now processing direct submissions rather than only curating data from the literature and our joint study catalog includes data from over 100 studies in 11 organisms. Studies from human dominate with data from control and case populations, tumor samples as well as three large curated studies derived from multiple sources. During the processing of these data, we have made improvements to our data model, submission process and data representation. Additionally, we have made significant improvements in providing access to these data via web and FTP interfaces.",DGVa,0.929166198,NA,0,DGVa,0.929166198,1,NA,NA,low_prob_best_name,do not remove,NA,NA,TRUE POS: two resources; name and URL of first will be correct; second is lost,NA,NA,11/27/2012 +24307774,http://www.kaubic.in/diacan,DIACAN: Integrated Database for Antidiabetic and Anticancer Medicinal Plants. Unlabelled Medicinal plants and plant derived molecules are widely used in traditional cultures all over the world and they are becoming large popular among biomedical researchers and pharmaceutical companies as a natural alternative to synthetic medicine. Information related to medicinal plants and herbal drugs accumulated over the ages are scattered and unstructured which make it prudent to develop a curated database for medicinal plants. The Antidiabetic and Anticancer Medicinal Plants Database (DIACAN) aims to collect and provide an integrated platform for plants and phytochemiclas having antidiabetic or anticancer activity. Availability http://www.kaubic.in/diacan.,DIACAN,0.994966656,Antidiabetic and Anticancer Medicinal Plants Database,0.987637103,DIACAN,0.994966656,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2013 +21249531,http://diademchallenge.org,"The DIADEM data sets: representative light microscopy images of neuronal morphology to advance automation of digital reconstructions. The comprehensive characterization of neuronal morphology requires tracing extensive axonal and dendritic arbors imaged with light microscopy into digital reconstructions. Considerable effort is ongoing to automate this greatly labor-intensive and currently rate-determining process. Experimental data in the form of manually traced digital reconstructions and corresponding image stacks play a vital role in developing increasingly more powerful reconstruction algorithms. The DIADEM challenge (short for DIgital reconstruction of Axonal and DEndritic Morphology) successfully stimulated progress in this area by utilizing six data set collections from different animal species, brain regions, neuron types, and visualization methods. The original research projects that provided these data are representative of the diverse scientific questions addressed in this field. At the same time, these data provide a benchmark for the types of demands automated software must meet to achieve the quality of manual reconstructions while minimizing human involvement. The DIADEM data underwent extensive curation, including quality control, metadata annotation, and format standardization, to focus the challenge on the most substantial technical obstacles. This data set package is now freely released ( http://diademchallenge.org ) to train, test, and aid development of automated reconstruction algorithms.",DIADEM,0.989351451,DIgital reconstruction of Axonal and DEndritic Morphology,0.851490708,DIADEM,0.989351451,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2011 +26586797,http://www.microrna.gr/mirgen,"DIANA-miRGen v3.0: accurate characterization of microRNA promoters and their regulators. microRNAs (miRNAs) are small non-coding RNAs that actively fine-tune gene expression. The accurate characterization of the mechanisms underlying miRNA transcription regulation will further expand our knowledge regarding their implication in homeostatic and pathobiological networks. Aim of DIANA-miRGen v3.0 (http://www.microrna.gr/mirgen) is to provide for the first time accurate cell-line-specific miRNA gene transcription start sites (TSSs), coupled with genome-wide maps of transcription factor (TF) binding sites in order to unveil the mechanisms of miRNA transcription regulation. To this end, more than 7.3 billion RNA-, ChIP- and DNase-Seq next generation sequencing reads were analyzed/assembled and combined with state-of-the-art miRNA TSS prediction and TF binding site identification algorithms. The new database schema and web interface facilitates user interaction, provides advanced queries and innate connection with other DIANA resources for miRNA target identification and pathway analysis. The database currently supports 276 miRNA TSSs that correspond to 428 precursors and >19M binding sites of 202 TFs on a genome-wide scale in nine cell-lines and six tissues of Homo sapiens and Mus musculus.",DIANA-miRGen,0.908604195,NA,0,DIANA-miRGen,0.908604195,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/19/2015 +25416803,http://www.microrna.gr/tarbase,"DIANA-TarBase v7.0: indexing more than half a million experimentally supported miRNA:mRNA interactions. microRNAs (miRNAs) are short non-coding RNA species, which act as potent gene expression regulators. Accurate identification of miRNA targets is crucial to understanding their function. Currently, hundreds of thousands of miRNA:gene interactions have been experimentally identified. However, this wealth of information is fragmented and hidden in thousands of manuscripts and raw next-generation sequencing data sets. DIANA-TarBase was initially released in 2006 and it was the first database aiming to catalog published experimentally validated miRNA:gene interactions. DIANA-TarBase v7.0 (http://www.microrna.gr/tarbase) aims to provide for the first time hundreds of thousands of high-quality manually curated experimentally validated miRNA:gene interactions, enhanced with detailed meta-data. DIANA-TarBase v7.0 enables users to easily identify positive or negative experimental results, the utilized experimental methodology, experimental conditions including cell/tissue type and treatment. The new interface provides also advanced information ranging from the binding site location, as identified experimentally as well as in silico, to the primer sequences used for cloning experiments. More than half a million miRNA:gene interactions have been curated from published experiments on 356 different cell types from 24 species, corresponding to 9- to 250-fold more entries than any other relevant database. DIANA-TarBase v7.0 is freely available.",DIANA-TarBase,0.989856553,NA,0,DIANA-TarBase,0.989856553,1,"27603020.0, 22135297.0, 29156006.0",27603020,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/21/2014 +27603020,"http://www.microrna.gr/tarbase, http://www.microrna.gr","DIANA-TarBase and DIANA Suite Tools: Studying Experimentally Supported microRNA Targets. microRNAs (miRNAs) are short non-coding RNAs (√ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº22 nts) present in animals, plants, and viruses. They are considered central post-transcriptional regulators of gene expression and are key components in a great number of physiological and pathological conditions. The accurate characterization of their targets is considered essential to a series of applications and basic or applied research settings. DIANA-TarBase (http://www.microrna.gr/tarbase) was initially launched in 2006. It is a reference repository indexing experimentally derived miRNA-gene interactions in different cell types, tissues, and conditions across numerous species. This unit focuses on the study of experimentally supported miRNA-gene interactions, as well as their functional interpretation through the use of available tools in the DIANA suite (http://www.microrna.gr). The proposed use-case scenarios are presented in protocols, describing how to utilize the DIANA-TarBase database and DIANA-microT-CDS server and perform miRNA-targeted pathway analysis with DIANA-miRPath-v3. All analyses are directly invoked or initiated from DIANA-TarBase. √ɬÉ√ǬÇ√ɬÇ√Ǭ© 2016 by John Wiley & Sons, Inc.",DIANA-TarBase,0.983838618,NA,0,DIANA-TarBase,0.983838618,1,"25416803.0, 22135297.0, 29156006.0",25416803,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,9/7/2016 +35424427,http://rdu.iquimica.unam.mx/handle/20.500.12214/1186,"DiaNat-DB: a molecular database of antidiabetic compounds from medicinal plants. Natural products are an invaluable source of molecules with a large variety of biological activities. Interest in natural products in drug discovery is documented in an increasing number of publications of bioactive secondary metabolites. Among those, medicinal plants are one of the most studied for this endeavor. An ever thriving area of opportunity within the field concerns the discovery of antidiabetic natural products. As a result, a vast amount of secondary metabolites are isolated from medicinal plants used against diabetes mellitus but whose information has not been organized systematically yet. Several research articles enumerate antidiabetic compounds, but the lack of a chemical database for antidiabetic metabolites limits their application in drug development. In this work, we present DiaNat-DB, a comprehensive collection of 336 molecules from medicinal plants reported to have in vitro or in vivo antidiabetic activity. We also discuss a chemoinformatic analysis of DiaNat-DB to compare antidiabetic drugs and natural product databases. To further explore the antidiabetic chemical space based on DiaNat compounds, we searched for analogs in ZINC15, an extensive database listing commercially available compounds. This work will help future analyses, design, and development of new antidiabetic drugs. DiaNat-DB and its ZINC15 analogs are freely available at http://rdu.iquimica.unam.mx/handle/20.500.12214/1186.",DiaNat-DB,0.974225625,NA,0,DiaNat-DB,0.974225625,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/28/2021 +25378341,http://www.diark.org,"diArk--the database for eukaryotic genome and transcriptome assemblies in 2014. Eukaryotic genomes are the basis for understanding the complexity of life from populations to the molecular level. Recent technological innovations have revolutionized the speed of data generation enabling the sequencing of eukaryotic genomes and transcriptomes within days. The database diArk (http://www.diark.org) has been developed with the aim to provide access to all available assembled genomes and transcriptomes. In September 2014, diArk contains about 2600 eukaryotes with 6000 genome and transcriptome assemblies, of which 22% are not available via NCBI/ENA/DDBJ. Several indicators for the quality of the assemblies are provided to facilitate their comparison for selecting the most appropriate dataset for further studies. diArk has a user-friendly web interface with extensive options for filtering and browsing the sequenced eukaryotes. In this new version of the database we have also integrated species, for which transcriptome assemblies are available, and we provide more analyses of assemblies.",diArk,0.992710948,NA,0,diArk,0.992710948,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2014 +31641158,http://www6.inra.fr/carrtel-collection_eng/Barcoding-database,"Diat.barcode, an open-access curated barcode library for diatoms. Diatoms (Bacillariophyta) are ubiquitous microalgae which produce a siliceous exoskeleton and which make a major contribution to the productivity of oceans and freshwaters. They display a huge diversity, which makes them excellent ecological indicators of aquatic ecosystems. Usually, diatoms are identified using characteristics of their exoskeleton morphology. DNA-barcoding is an alternative to this and the use of High-Throughput-Sequencing enables the rapid analysis of many environmental samples at a lower cost than analyses under microscope. However, to identify environmental sequences correctly, an expertly curated reference library is needed. Several curated libraries for protists exists; none, however are dedicated to diatoms. Diat.barcode is an open-access library dedicated to diatoms which has been maintained since 2012. Data come from two sources (1) the NCBI nucleotide database and (2) unpublished sequencing data of culture collections. Since 2017, several experts have collaborated to curate this library for rbcL, a chloroplast marker suitable for species-level identification of diatoms. For the latest version of the database (version 7), 605 of the 3482 taxonomical names originally assigned by the authors of the rbcL sequences were modified after curation. The database is accessible at https://www6.inra.fr/carrtel-collection_eng/Barcoding-database .",Diat.barcode,0.889230361,NA,0,Diat.barcode,0.889230361,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/22/2019 +22332784,http://www.diatomcyc.org,"The metabolic blueprint of Phaeodactylum tricornutum reveals a eukaryotic Entner-Doudoroff glycolytic pathway. Diatoms are one of the most successful groups of unicellular eukaryotic algae. Successive endosymbiotic events contributed to their flexible metabolism, making them competitive in variable aquatic habitats. Although the recently sequenced genomes of the model diatoms Phaeodactylum tricornutum and Thalassiosira pseudonana have provided the first insights into their metabolic organization, the current knowledge on diatom biochemistry remains fragmentary. By means of a genome-wide approach, we developed DiatomCyc, a detailed pathway/genome database of P.√ɬÉ√ǬÇ√ɬÇ√Ǭ†tricornutum. DiatomCyc contains 286 pathways with 1719 metabolic reactions and 1613 assigned enzymes, spanning both the central and parts of the secondary metabolism of P.√ɬÉ√ǬÇ√ɬÇ√Ǭ†tricornutum. Central metabolic pathways, such as those of carbohydrates, amino acids and fatty acids, were covered. Furthermore, our understanding of the carbohydrate model in P.√ɬÉ√ǬÇ√ɬÇ√Ǭ†tricornutum was extended. In particular we highlight the discovery of a functional Entner-Doudoroff pathway, an ancient alternative for the glycolytic Embden-Meyerhof-Parnas pathway, and a putative phosphoketolase pathway, both uncommon in eukaryotes. DiatomCyc is accessible online (http://www.diatomcyc.org), and offers a range of software tools for the visualization and analysis of metabolic networks and 'omics' data. We anticipate that DiatomCyc will be key to gaining further understanding of diatom metabolism and, ultimately, will feed metabolic engineering strategies for the industrial valorization of diatoms.",DiatomCyc,0.997156441,NA,0,DiatomCyc,0.997156441,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/31/2012 +29385418,http://dibs.enzim.ttk.mta.hu,"DIBS: a repository of disordered binding sites mediating interactions with ordered proteins. Motivation Intrinsically Disordered Proteins (IDPs) mediate crucial protein-protein interactions, most notably in signaling and regulation. As their importance is increasingly recognized, the detailed analyses of specific IDP interactions opened up new opportunities for therapeutic targeting. Yet, large scale information about IDP-mediated interactions in structural and functional details are lacking, hindering the understanding of the mechanisms underlying this distinct binding mode. Results Here, we present DIBS, the first comprehensive, curated collection of complexes between IDPs and ordered proteins. DIBS not only describes by far the highest number of cases, it also provides the dissociation constants of their interactions, as well as the description of potential post-translational modifications modulating the binding strength and linear motifs involved in the binding. Together with the wide range of structural and functional annotations, DIBS will provide the cornerstone for structural and functional studies of IDP complexes. Availability and implementation DIBS is freely accessible at http://dibs.enzim.ttk.mta.hu/. The DIBS application is hosted by Apache web server and was implemented in PHP. To enrich querying features and to enhance backend performance a MySQL database was also created. Contact dosztanyi@caesar.elte.hu or bmeszaros@caesar.elte.hu. Supplementary information Supplementary data are available at Bioinformatics online.",DIBS,0.994214892,NA,0,DIBS,0.994214892,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2018 +"23172289, 23494302",http://dictybase.org,"DictyBase 2013: integrating multiple Dictyostelid species. dictyBase (http://dictybase.org) is the model organism database for the social amoeba Dictyostelium discoideum. This contribution provides an update on dictyBase that has been previously presented. During the past 3 years, dictyBase has taken significant strides toward becoming a genome portal for the whole Amoebozoa clade. In its latest release, dictyBase has scaled up to host multiple Dictyostelids, including Dictyostelium purpureum [Sucgang, Kuo, Tian, Salerno, Parikh, Feasley, Dalin, Tu, Huang, Barry et al.(2011) (Comparative genomics of the social amoebae Dictyostelium discoideum and Dictyostelium purpureum. Genome Biol., 12, R20)], Dictyostelium fasciculatum and Polysphondylium pallidum [Heidel, Lawal, Felder, Schilde, Helps, Tunggal, Rivero, John, Schleicher, Eichinger et al. (2011) (Phylogeny-wide analysis of social amoeba genomes highlights ancient origins for complex intercellular communication. Genome Res., 21, 1882-1891)]. The new release includes a new Genome Browser with RNAseq expression, interspecies Basic Local Alignment Search Tool alignments and a unified Basic Local Alignment Search Tool search for cross-species comparisons.",dictyBase,0.994000256,NA,0,dictyBase,0.994000256,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2013 +26481352,http://dida.ibsquare.be,"DIDA: A curated and annotated digenic diseases database. DIDA (DIgenic diseases DAtabase) is a novel database that provides for the first time detailed information on genes and associated genetic variants involved in digenic diseases, the simplest form of oligogenic inheritance. The database is accessible via http://dida.ibsquare.be and currently includes 213 digenic combinations involved in 44 different digenic diseases. These combinations are composed of 364 distinct variants, which are distributed over 136 distinct genes. The web interface provides browsing and search functionalities, as well as documentation and help pages, general database statistics and references to the original publications from which the data have been collected. The possibility to submit novel digenic data to DIDA is also provided. Creating this new repository was essential as current databases do not allow one to retrieve detailed records regarding digenic combinations. Genes, variants, diseases and digenic combinations in DIDA are annotated with manually curated information and information mined from other online resources. Next to providing a unique resource for the development of new analysis methods, DIDA gives clinical and molecular geneticists a tool to find the most comprehensive information on the digenic nature of their diseases of interest.",DIDA,0.982567787,DIgenic diseases DAtabase,0.88692459,DIDA,0.982567787,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/19/2015 +29069447,http://netbio.bgu.ac.il/diffnet,"The DifferentialNet database of differential protein-protein interactions in human tissues. DifferentialNet is a novel database that provides users with differential interactome analysis of human tissues (http://netbio.bgu.ac.il/diffnet/). Users query DifferentialNet by protein, and retrieve its differential protein-protein interactions (PPIs) per tissue via an interactive graphical interface. To compute differential PPIs, we integrated available data of experimentally detected PPIs with RNA-sequencing profiles of tens of human tissues gathered by the Genotype-Tissue Expression consortium (GTEx) and by the Human Protein Atlas (HPA). We associated each PPI with a score that reflects whether its corresponding genes were expressed similarly across tissues, or were up- or down-regulated in the selected tissue. By this, users can identify tissue-specific interactions, filter out PPIs that are relatively stable across tissues, and highlight PPIs that show relative changes across tissues. The differential PPIs can be used to identify tissue-specific processes and to decipher tissue-specific phenotypes. Moreover, they unravel processes that are tissue-wide yet tailored to the specific demands of each tissue.",DifferentialNet,0.995048225,NA,0,DifferentialNet,0.995048225,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +32976589,http://exbio.wzw.tum.de/digger,"DIGGER: exploring the functional role of alternative splicing in protein interactions. Alternative splicing plays a major role in regulating the functional repertoire of the proteome. However, isoform-specific effects to protein-protein interactions (PPIs) are usually overlooked, making it impossible to judge the functional role of individual exons on a systems biology level. We overcome this barrier by integrating protein-protein interactions, domain-domain interactions and residue-level interactions information to lift exon expression analysis to a network level. Our user-friendly database DIGGER is available at https://exbio.wzw.tum.de/digger and allows users to seamlessly switch between isoform and exon-centric views of the interactome and to extract sub-networks of relevant isoforms, making it an essential resource for studying mechanistic consequences of alternative splicing.",DIGGER,0.998090148,NA,0,DIGGER,0.998090148,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +22080506,http://biocomputing.it/digit,"A database of immunoglobulins with integrated tools: DIGIT. The DIGIT (Database of ImmunoGlobulins with Integrated Tools) database (http://biocomputing.it/digit) is an integrated resource storing sequences of annotated immunoglobulin variable domains and enriched with tools for searching and analyzing them. The annotations in the database include information on the type of antigen, the respective germline sequences and on pairing information between light and heavy chains. Other annotations, such as the identification of the complementarity determining regions, assignment of their structural class and identification of mutations with respect to the germline, are computed on the fly and can also be obtained for user-submitted sequences. The system allows customized BLAST searches and automatic building of 3D models of the domains to be performed.",DIGIT,0.994396985,Database of ImmunoGlobulins with Integrated Tools,0.9503698,DIGIT,0.994396985,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/10/2011 +26503254,http://www.digital-development.org,"Digital development: a database of cell lineage differentiation in C. elegans with lineage phenotypes, cell-specific gene functions and a multiscale model. Developmental systems biology is poised to exploit large-scale data from two approaches: genomics and live imaging. The combination of the two offers the opportunity to map gene functions and gene networks in vivo at single-cell resolution using cell tracking and quantification of cellular phenotypes. Here we present Digital Development (http://www.digital-development.org), a database of cell lineage differentiation with curated phenotypes, cell-specific gene functions and a multiscale model. The database stores data from recent systematic studies of cell lineage differentiation in the C. elegans embryo containing √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº 200 conserved genes, 1400 perturbed cell lineages and 600,000 digitized single cells. Users can conveniently browse, search and download four categories of phenotypic and functional information from an intuitive web interface. This information includes lineage differentiation phenotypes, cell-specific gene functions, differentiation landscapes and fate choices, and a multiscale model of lineage differentiation. Digital Development provides a comprehensive, curated, multidimensional database for developmental biology. The scale, resolution and richness of biological information presented here facilitate exploration of gene-specific and systems-level mechanisms of lineage differentiation in Metazoans.",Digital Development,0.921984583,NA,0,Digital Development,0.921984583,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/25/2015 +33007622,http://slsdb.manipal.edu/dinax,"DINAX- a comprehensive database of inherited ataxias. Background Neurodegenerative disorders such as hereditary ataxia often manifest overlapping symptoms and are likely to be misdiagnosed based on clinical phenotypes. To identify the genes associated with such disorders for diagnostic purposes, geneticists often use high throughput technologies which generate an enormous amount of data on variants whose relevance can be unclear. Besides, analysis and interpretation of high throughput data require gleaning of several web-based resources which can be laborious and time-consuming. To overcome these, we have created a Database for Inherited Ataxia (DINAX), a repository of gene variants from publicly available information. Methods DINAX is implemented as a MySQL relational database using the PHP scripting language. Web interfaces were developed using HTML, CSS, and JavaScript. Variant and phenotype information was collected and manually curated from published literature and primary databases such as OMIM and ClinVar. These were further analyzed to decipher expression and pathway analysis. Results DINAX is an inventory of 7166 genomic variants (single nucleotide polymorphisms, deletions, insertions, and translocations) reported till date among the 185 genes associated with different subtypes of inherited ataxia. DINAX implements a dual search methodology for genes and phenotypes linking to ataxia associated genes, variants, and their source. Pathway analysis confirmed their association with ataxia. Conclusion The database is created to provide a single web source for obtaining information about ataxia related genes. Besides, the database facilitates easy identification of known and reported variants as well as the novel or unreported variants. DINAX is freely available at http://slsdb.manipal.edu/dinax.",DINAX,0.989951134,Database for Inherited Ataxia,0.747629498,DINAX,0.989951134,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/17/2020 +28502574,http://www.neurostresspep.eu/diner,"DINeR: Database for Insect Neuropeptide Research. Neuropeptides are responsible for regulating a variety of functions, including development, metabolism, water and ion homeostasis, and as neuromodulators in circuits of the central nervous system. Numerous neuropeptides have been identified and characterized. However, both discovery and functional characterization of neuropeptides across the massive Class Insecta has been sporadic. To leverage advances in post-genomic technologies for this rapidly growing field, insect neuroendocrinology requires a consolidated, comprehensive and standardised resource for managing neuropeptide information. The Database for Insect Neuropeptide Research (DINeR) is a web-based database-application used for search and retrieval of neuropeptide information of various insect species detailing their isoform sequences, physiological functionality and images of their receptor-binding sites, in an intuitive, accessible and user-friendly format. The curated data includes representatives of 50 well described neuropeptide families from over 400 different insect species. Approximately 4700 FASTA formatted, neuropeptide isoform amino acid sequences and over 200 records of physiological functionality have been recorded based on published literature. Also available are images of neuropeptide receptor locations. In addition, the data include comprehensive summaries for each neuropeptide family, including their function, location, known functionality, as well as cladograms, sequence alignments and logos covering most insect orders. Moreover, we have adopted a standardised nomenclature to address inconsistent classification of neuropeptides. As part of the H2020 nEUROSTRESSPEP project, the data will be actively maintained and curated, ensuring a comprehensive and standardised resource for the scientific community. DINeR is publicly available at the project website: http://www.neurostresspep.eu/diner/.",DINeR,0.992539942,Database for Insect Neuropeptide Research,0.922759838,DINeR,0.992539942,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/11/2017 +33382035,http://www.sbms.hku.hk/dclab/DIPPER,"DIPPER, a spatiotemporal proteomics atlas of human intervertebral discs for exploring ageing and degeneration dynamics. . The spatiotemporal proteome of the intervertebral disc (IVD) underpins its integrity and function. We present DIPPER, a deep and comprehensive IVD proteomic resource comprising 94 genome-wide profiles from 17 individuals. To begin with, protein modules defining key directional trends spanning the lateral and anteroposterior axes were derived from high-resolution spatial proteomes of intact young cadaveric lumbar IVDs. They revealed novel region-specific profiles of regulatory activities and displayed potential paths of deconstruction in the level- and location-matched aged cadaveric discs. Machine learning methods predicted a 'hydration matrisome' that connects extracellular matrix with MRI intensity. Importantly, the static proteome used as point-references can be integrated with dynamic proteome (SILAC/degradome) and transcriptome data from multiple clinical samples, enhancing robustness and clinical relevance. The data, findings, and methodology, available on a web interface (http://www.sbms.hku.hk/dclab/DIPPER/), will be valuable references in the field of IVD biology and proteomic analytics.",DIPPER,0.9955585,NA,0,DIPPER,0.9955585,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/31/2020 +28381244,http://biophy.dzu.edu.cn/DisBind,"DisBind: A database of classified functional binding sites in disordered and structured regions of intrinsically disordered proteins. Background Intrinsically unstructured or disordered proteins function via interacting with other molecules. Annotation of these binding sites is the first step for mapping functional impact of genetic variants in coding regions of human and other genomes, considering that a significant portion of eukaryotic genomes code for intrinsically disordered regions in proteins. Results DisBind (available at http://biophy.dzu.edu.cn/DisBind ) is a collection of experimentally supported binding sites in intrinsically disordered proteins and proteins with both structured and disordered regions. There are a total of 226 IDPs with functional site annotations. These IDPs contain 465 structured regions (ORs) and 428 IDRs according to annotation by DisProt. The database contains a total of 4232 binding residues (from UniProt and PDB structures) in which 2836 residues are in ORs and 1396 in IDRs. These binding sites are classified according to their interacting partners including proteins, RNA, DNA, metal ions and others with 2984, 258, 383, 350, and 262 annotated binding sites, respectively. Each entry contains site-specific annotations (structured regions, intrinsically disordered regions, and functional binding regions) that are experimentally supported according to PDB structures or annotations from UniProt. Conclusion The searchable DisBind provides a reliable data resource for functional classification of intrinsically disordered proteins at the residue level.",DisBind,0.997564256,NA,0,DisBind,0.997564256,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/5/2017 +30407550,http://www.disease-ontology.org,"Human Disease Ontology 2018 update: classification, content and workflow expansion. The Human Disease Ontology (DO) (http://www.disease-ontology.org), database has undergone significant expansion in the past three years. The DO disease classification includes specific formal semantic rules to express meaningful disease models and has expanded from a single asserted classification to include multiple-inferred mechanistic disease classifications, thus providing novel perspectives on related diseases. Expansion of disease terms, alternative anatomy, cell type and genetic disease classifications and workflow automation highlight the updates for the DO since 2015. The enhanced breadth and depth of the DO's knowledgebase has expanded the DO's utility for exploring the multi-etiology of human disease, thus improving the capture and communication of health-related data across biomedical databases, bioinformatics tools, genomic and cancer resources and demonstrated by a 6.6√ɬÉ√ǬÉ√ɬÇ√Ǭó growth in DO's user community since 2015. The DO's continual integration of human disease knowledge, evidenced by the more than 200 SVN/GitHub releases/revisions, since previously reported in our DO 2015 NAR paper, includes the addition of 2650 new disease terms, a 30% increase of textual definitions, and an expanding suite of disease classification hierarchies constructed through defined logical axioms.",NA,0,Disease Ontology,0.572252492,Disease Ontology,0.572252492,1,25348409,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2019 +21712250,http://www.patricbrc.org,"Integration and visualization of host-pathogen data related to infectious diseases. Motivation Infectious disease research is generating an increasing amount of disparate data on pathogenic systems. There is a growing need for resources that effectively integrate, analyze, deliver and visualize these data, both to improve our understanding of infectious diseases and to facilitate the development of strategies for disease control and prevention. Results We have developed Disease View, an online host-pathogen resource that enables infectious disease-centric access, analysis and visualization of host-pathogen interactions. In this resource, we associate infectious diseases with corresponding pathogens, provide information on pathogens, pathogen virulence genes and the genetic and chemical evidences for the human genes that are associated with the diseases. We also deliver the relationships between pathogens, genes and diseases in an interactive graph and provide the geolocation reports of associated diseases around the globe in real time. Unlike many other resources, we have applied an iterative, user-centered design process to the entire resource development, including data acquisition, analysis and visualization. Availability and implementation Freely available at http://www.patricbrc.org; all major web browsers supported. Contact cmao@vbi.vt.edu Supplementary information Supplementary data are available at Bioinformatics online.",Disease View,0.744547874,NA,0,Disease View,0.744547874,1,"24225323.0, 31667520.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: CLASS,NA,NA,6/27/2011 +23973272,http://bioinformatics.ua.pt/diseasecard,"An innovative portal for rare genetic diseases research: the semantic Diseasecard. Advances in ""omics"" hardware and software technologies are bringing rare diseases research back from the sidelines. Whereas in the past these disorders were seldom considered relevant, in the era of whole genome sequencing the direct connections between rare phenotypes and a reduced set of genes are of vital relevance. This increased interest in rare genetic diseases research is pushing forward investment and effort towards the creation of software in the field, and leveraging the wealth of available life sciences data. Alas, most of these tools target one or more rare diseases, are focused solely on a single type of user, or are limited to the most relevant scientific breakthroughs for a specific niche. Furthermore, despite some high quality efforts, the ever-growing number of resources, databases, services and applications is still a burden to this area. Hence, there is a clear interest in new strategies to deliver a holistic perspective over the entire rare genetic diseases research domain. This is Diseasecard's reasoning, to build a true lightweight knowledge base covering rare genetic diseases. Developed with the latest semantic web technologies, this portal delivers unified access to a comprehensive network for researchers, clinicians, patients and bioinformatics developers. With in-context access covering over 20 distinct heterogeneous resources, Diseasecard's workspace provides access to the most relevant scientific knowledge regarding a given disorder, whether through direct common identifiers or through full-text search over all connected resources. In addition to its user-oriented features, Diseasecard's semantic knowledge base is also available for direct querying, enabling everyone to include rare genetic diseases knowledge in new or existing information systems. Diseasecard is publicly available at http://bioinformatics.ua.pt/diseasecard/.",Diseasecard,0.896775365,NA,0,Diseasecard,0.896775365,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/21/2013 +29059320,http://biocc.hrbmu.edu.cn/DiseaseEnhancer,"DiseaseEnhancer: a resource of human disease-associated enhancer catalog. Large-scale sequencing studies discovered substantial genetic variants occurring in enhancers which regulate genes via long range chromatin interactions. Importantly, such variants could affect enhancer regulation by changing transcription factor bindings or enhancer hijacking, and in turn, make an essential contribution to disease progression. To facilitate better usage of published data and exploring enhancer deregulation in various human diseases, we created DiseaseEnhancer (http://biocc.hrbmu.edu.cn/DiseaseEnhancer/), a manually curated database for disease-associated enhancers. As of July 2017, DiseaseEnhancer includes 847 disease-associated enhancers in 143 human diseases. Database features include basic enhancer information (i.e. genomic location and target genes); disease types; associated variants on the enhancer and their mediated phenotypes (i.e. gain/loss of enhancer and the alterations of transcription factor bindings). We also include a feature on our website to export any query results into a file and download the full database. DiseaseEnhancer provides a promising avenue for researchers to facilitate the understanding of enhancer deregulation in disease pathogenesis, and identify new biomarkers for disease diagnosis and therapy.",DiseaseEnhancer,0.996761441,NA,0,DiseaseEnhancer,0.996761441,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +"22135302, 27899673",http://bioinfo.hrbmu.edu.cn/diseasemeth,"DiseaseMeth: a human disease methylation database. DNA methylation is an important epigenetic modification for genomic regulation in higher organisms that plays a crucial role in the initiation and progression of diseases. The integration and mining of DNA methylation data by methylation-specific PCR and genome-wide profiling technology could greatly assist the discovery of novel candidate disease biomarkers. However, this is difficult without a comprehensive DNA methylation repository of human diseases. Therefore, we have developed DiseaseMeth, a human disease methylation database (http://bioinfo.hrbmu.edu.cn/diseasemeth). Its focus is the efficient storage and statistical analysis of DNA methylation data sets from various diseases. Experimental information from over 14,000 entries and 175 high-throughput data sets from a wide number of sources have been collected and incorporated into DiseaseMeth. The latest release incorporates the gene-centric methylation data of 72 human diseases from a variety of technologies and platforms. To facilitate data extraction, DiseaseMeth supports multiple search options such as gene ID and disease name. DiseaseMeth provides integrated gene methylation data based on cross-data set analysis for disease and normal samples. These can be used for in-depth identification of differentially methylated genes and the investigation of gene-disease relationship.",DiseaseMeth,0.993306458,human disease methylation database,0.805280375,DiseaseMeth,0.993306458,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2016 +25484339,http://diseases.jensenlab.org,"DISEASES: text mining and data integration of disease-gene associations. Text mining is a flexible technology that can be applied to numerous different tasks in biology and medicine. We present a system for extracting disease-gene associations from biomedical abstracts. The system consists of a highly efficient dictionary-based tagger for named entity recognition of human genes and diseases, which we combine with a scoring scheme that takes into account co-occurrences both within and between sentences. We show that this approach is able to extract half of all manually curated associations with a false positive rate of only 0.16%. Nonetheless, text mining should not stand alone, but be combined with other types of evidence. For this reason, we have developed the DISEASES resource, which integrates the results from text mining with manually curated disease-gene associations, cancer mutation data, and genome-wide association studies from existing databases. The DISEASES resource is accessible through a web interface at http://diseases.jensenlab.org/, where the text-mining software and all associations are also freely available for download.",DISEASES,0.994236827,NA,0,DISEASES,0.994236827,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/5/2014 +"25877637, 27924018, 31680165",http://www.disgenet.org,"DisGeNET: a discovery platform for the dynamical exploration of human diseases and their genes. DisGeNET is a comprehensive discovery platform designed to address a variety of questions concerning the genetic underpinning of human diseases. DisGeNET contains over 380,000 associations between >16,000 genes and 13,000 diseases, which makes it one of the largest repositories currently available of its kind. DisGeNET integrates expert-curated databases with text-mined data, covers information on Mendelian and complex diseases, and includes data from animal disease models. It features a score based on the supporting evidence to prioritize gene-disease associations. It is an open access resource available through a web interface, a Cytoscape plugin and as a Semantic Web resource. The web interface supports user-friendly data exploration and navigation. DisGeNET data can also be analysed via the DisGeNET Cytoscape plugin, and enriched with the annotations of other plugins of this popular network analysis software suite. Finally, the information contained in DisGeNET can be expanded and complemented using Semantic Web technologies and linked to a variety of resources already present in the Linked Data cloud. Hence, DisGeNET offers one of the most comprehensive collections of human gene-disease associations and a valuable set of tools for investigating the molecular mechanisms underlying diseases of genetic origin, designed to fulfill the needs of different user profiles, including bioinformaticians, biologists and health-care practitioners. Database URL: http://www.disgenet.org/",DisGeNET,0.997833073,NA,0,DisGeNET,0.997833073,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +29036667,http://DISNOR.uniroma2.it,"DISNOR: a disease network open resource. DISNOR is a new resource that aims at exploiting the explosion of data on the identification of disease-associated genes to assemble inferred disease pathways. This may help dissecting the signaling events whose disruption causes the pathological phenotypes and may contribute to build a platform for precision medicine. To this end we combine the gene-disease association (GDA) data annotated in the DisGeNET resource with a new curation effort aimed at populating the SIGNOR database with causal interactions related to disease genes with the highest possible coverage. DISNOR can be freely accessed at http://DISNOR.uniroma2.it/ where >3700 disease-networks, linking √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº2600 disease genes, can be explored. For each disease curated in DisGeNET, DISNOR links disease genes by manually annotated causal relationships and offers an intuitive visualization of the inferred 'patho-pathways' at different complexity levels. User-defined gene lists are also accepted in the query pipeline. In addition, for each list of query genes-either annotated in DisGeNET or user-defined-DISNOR performs a gene set enrichment analysis on KEGG-defined pathways or on the lists of proteins associated with the inferred disease pathways. This function offers additional information on disease-associated cellular pathways and disease similarity.",DISNOR,0.998069823,NA,0,DISNOR,0.998069823,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31713636,http://disprot.org,"DisProt: intrinsic protein disorder annotation in 2020. The Database of Protein Disorder (DisProt, URL: https://disprot.org) provides manually curated annotations of intrinsically disordered proteins from the literature. Here we report recent developments with DisProt (version 8), including the doubling of protein entries, a new disorder ontology, improvements of the annotation format and a completely new website. The website includes a redesigned graphical interface, a better search engine, a clearer API for programmatic access and a new annotation interface that integrates text mining technologies. The new entry format provides a greater flexibility, simplifies maintenance and allows the capture of more information from the literature. The new disorder ontology has been formalized and made interoperable by adopting the OWL format, as well as its structure and term definitions have been improved. The new annotation interface has made the curation process faster and more effective. We recently showed that new DisProt annotations can be effectively used to train and validate disorder predictors. We believe the growth of DisProt will accelerate, contributing to the improvement of function and disorder predictors and therefore to illuminate the 'dark' proteome.",DisProt,0.995845914,Database of Protein Disorder,0.837283194,DisProt,0.995845914,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +31593887,http://andromeda.matf.bg.ac.rs/aadis_dynamic,"DistAA: Database of amino acid distances in proteins and web application for statistical review of distances. Three-dimensional structure of a protein chain is determined by its amino acid interactions. One approach to the analysis of amino acid interactions refers to geometric distances of amino acid pairs in polypeptide chains. For a detailed analysis of the amino acid distances, the database with three types of amino acid distances in a set of chains was created. Web application Distances of Amino Acids has also been developed to enable scientists to explore interactions of amino acids with different properties based on distances stored in the database. Web application calculates and displays descriptive statistics and graphs of amino acid pair distances with selected properties, such as geometric distance threshold, corresponding SCOP class of proteins and secondary structure types. In addition to the analysis of pre-calculated distances stored in the database, the amino acid distances of a single protein with the specified PDB identifier can also be analyzed. The web application is available at http://andromeda.matf.bg.ac.rs/aadis_dynamic/.",NA,0,Distances of Amino Acids,0.922101881,Distances of Amino Acids,0.922101881,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/28/2019 +22058129,http://distild.jensenlab.org,"DistiLD Database: diseases and traits in linkage disequilibrium blocks. Genome-wide association studies (GWAS) have identified thousands of single nucleotide polymorphisms (SNPs) associated with the risk of hundreds of diseases. However, there is currently no database that enables non-specialists to answer the following simple questions: which SNPs associated with diseases are in linkage disequilibrium (LD) with a gene of interest? Which chromosomal regions have been associated with a given disease, and which are the potentially causal genes in each region? To answer these questions, we use data from the HapMap Project to partition each chromosome into so-called LD blocks, so that SNPs in LD with each other are preferentially in the same block, whereas SNPs not in LD are in different blocks. By projecting SNPs and genes onto LD blocks, the DistiLD database aims to increase usage of existing GWAS results by making it easy to query and visualize disease-associated SNPs and genes in their chromosomal context. The database is available at http://distild.jensenlab.org/.",DistiLD,0.8471573,NA,0,DistiLD,0.8471573,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/3/2011 +26363178,http://rvs.u.hpc.mssm.edu/divas,"DIVAS: a centralized genetic variant repository representing 150,000 individuals from multiple disease cohorts. Motivation A plethora of sequenced and genotyped disease cohorts is available to the biomedical research community, spread across many portals and represented in various formats. Results We have gathered several large studies, including GERA and GRU, and computed population- and disease-specific genetic variant frequencies. In total, our portal provides fast access to genetic variants observed in 84,928 individuals from 39 disease populations. We also include 66,335 controls, such as the 1000 Genomes and Scripps Wellderly. Conclusion Combining multiple studies helps validate disease-associated variants in each underlying data set, detect potential false positives using frequencies of control populations, and identify novel candidate disease-causing alterations in known or suspected genes. Availability and implementation https://rvs.u.hpc.mssm.edu/divas Contact rong.chen@mssm.edu Supplementary information Supplementary data are available at Bioinformatics online.",DIVAS,0.973447561,NA,0,DIVAS,0.973447561,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/12/2015 +31788512,http://divercropblog.wordpress.com,"An open dataset about georeferenced harmonized national agricultural censuses and surveys of seven mediterranean countries. The dataset presented in this paper is based on data gathered from several countries within the West Mediterranean area at the highest detailed scale regarding official statistics, with the aim of investigating land and food systems dynamics in the Mediterranean. Characterizing land and food systems dynamics is critical to reveal insights regarding interactions between current dynamics of agricultural practices, species diversity and local food systems. These interactions were analyzed, at multiple spatial scales, on a large part of the Mediterranean basin within the DIVERCROP Project (https://divercropblog.wordpress.com/). An harmonized dataset with the desired characteristics was not readily available from official sources and, therefore, it was necessary to build an ad hoc database that could: (1) cover the Mediterranean areas of seven countries, namely Algeria (DZ), France (FR), Italy (IT), Malta (MT), Portugal (PT), Spain (ES) and Tunisia (TN); (2) contain data referred to the most disaggregated level of administrative units for which data is available in each country; (3) contain data referred to at least two time points, including the latest available data, in each country; (4) contain data on number of farm holdings, on the physical areas covered by the main annual and permanent crops and on livestock (number of heads); (5) contain a primary key that allows joining the census and surveys database to a geographical dataset of administrative units covering the entire area; (6) have an associated complete geographical dataset of administrative units, to allow spatial data analyses.",DIVERCROP,0.765197396,NA,0,DIVERCROP,0.765197396,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,11/8/2019 +25505093,http://www.dixa-fp7.eu,"diXa: a data infrastructure for chemical safety assessment. Motivation The field of toxicogenomics (the application of '-omics' technologies to risk assessment of compound toxicities) has expanded in the last decade, partly driven by new legislation, aimed at reducing animal testing in chemical risk assessment but mainly as a result of a paradigm change in toxicology towards the use and integration of genome wide data. Many research groups worldwide have generated large amounts of such toxicogenomics data. However, there is no centralized repository for archiving and making these data and associated tools for their analysis easily available. Results The Data Infrastructure for Chemical Safety Assessment (diXa) is a robust and sustainable infrastructure storing toxicogenomics data. A central data warehouse is connected to a portal with links to chemical information and molecular and phenotype data. diXa is publicly available through a user-friendly web interface. New data can be readily deposited into diXa using guidelines and templates available online. Analysis descriptions and tools for interrogating the data are available via the diXa portal. Availability and implementation http://www.dixa-fp7.eu Contact d.hendrickx@maastrichtuniversity.nl; info@dixa-fp7.eu Supplementary information Supplementary data are available at Bioinformatics online.",diXa,0.938735694,Infrastructure for Chemical Safety Assessment,0.744968057,diXa,0.938735694,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/12/2014 +33079988,http://darkkinome.org,"The Dark Kinase Knowledgebase: an online compendium of knowledge and experimental results of understudied kinases. Kinases form the backbone of numerous cell signaling pathways, with their dysfunction similarly implicated in multiple pathologies. Further facilitated by their druggability, kinases are a major focus of therapeutic development efforts in diseases such as cancer, infectious disease and autoimmune disorders. While their importance is clear, the role or biological function of nearly one-third of kinases is largely unknown. Here, we describe a data resource, the Dark Kinase Knowledgebase (DKK; https://darkkinome.org), that is specifically focused on providing data and reagents for these understudied kinases to the broader research community. Supported through NIH's Illuminating the Druggable Genome (IDG) Program, the DKK is focused on data and knowledge generation for 162 poorly studied or 'dark' kinases. Types of data provided through the DKK include parallel reaction monitoring (PRM) peptides for quantitative proteomics, protein interactions, NanoBRET reagents, and kinase-specific compounds. Higher-level data is similarly being generated and consolidated such as tissue gene expression profiles and, longer-term, functional relationships derived through perturbation studies. Associated web tools that help investigators interrogate both internal and external data are also provided through the site. As an evolving resource, the DKK seeks to continually support and enhance knowledge on these potentially high-impact druggable targets.",DKK,0.987769703,Dark Kinase Knowledgebase,0.946007538,DKK,0.987769703,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +26393351,http://dknet.org,"The NIDDK Information Network: A Community Portal for Finding Data, Materials, and Tools for Researchers Studying Diabetes, Digestive, and Kidney Diseases. The NIDDK Information Network (dkNET; http://dknet.org) was launched to serve the needs of basic and clinical investigators in metabolic, digestive and kidney disease by facilitating access to research resources that advance the mission of the National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK). By research resources, we mean the multitude of data, software tools, materials, services, projects and organizations available to researchers in the public domain. Most of these are accessed via web-accessible databases or web portals, each developed, designed and maintained by numerous different projects, organizations and individuals. While many of the large government funded databases, maintained by agencies such as European Bioinformatics Institute and the National Center for Biotechnology Information, are well known to researchers, many more that have been developed by and for the biomedical research community are unknown or underutilized. At least part of the problem is the nature of dynamic databases, which are considered part of the ""hidden"" web, that is, content that is not easily accessed by search engines. dkNET was created specifically to address the challenge of connecting researchers to research resources via these types of community databases and web portals. dkNET functions as a ""search engine for data"", searching across millions of database records contained in hundreds of biomedical databases developed and maintained by independent projects around the world. A primary focus of dkNET are centers and projects specifically created to provide high quality data and resources to NIDDK researchers. Through the novel data ingest process used in dkNET, additional data sources can easily be incorporated, allowing it to scale with the growth of digital data and the needs of the dkNET community. Here, we provide an overview of the dkNET portal and its functions. We show how dkNET can be used to address a variety of use cases that involve searching for research resources.",dkNET,0.995002985,NIDDK Information Network,0.846032488,dkNET,0.995002985,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/22/2015 +26697753,http://lcgbase.big.ac.cn/DLGP,"DLGP: A database for lineage-conserved and lineage-specific gene pairs in animal and plant genomes. The conservation of gene organization in the genome with lineage-specificity is an invaluable resource to decipher their potential functionality with diverse selective constraints, especially in higher animals and plants. Gene pairs appear to be the minimal structure for such kind of gene clusters that tend to reside in their preferred locations, representing the distinctive genomic characteristics in single species or a given lineage. Despite gene families having been investigated in a widespread manner, the definition of gene pair families in various taxa still lacks adequate attention. To address this issue, we report DLGP (http://lcgbase.big.ac.cn/DLGP/) that stores the pre-calculated lineage-based gene pairs in currently available 134 animal and plant genomes and inspect them under the same analytical framework, bringing out a set of innovational features. First, the taxonomy or lineage has been classified into four levels such as Kingdom, Phylum, Class and Order. It adopts all-to-all comparison strategy to identify the possible conserved gene pairs in all species for each gene pair in certain species and reckon those that are conserved in over a significant proportion of species in a given lineage (e.g. Primates, Diptera or Poales) as the lineage-conserved gene pairs. Furthermore, it predicts the lineage-specific gene pairs by retaining the above-mentioned lineage-conserved gene pairs that are not conserved in any other lineages. Second, it carries out pairwise comparison for the gene pairs between two compared species and creates the table including all the conserved gene pairs and the image elucidating the conservation degree of gene pairs in chromosomal level. Third, it supplies gene order browser to extend gene pairs to gene clusters, allowing users to view the evolution dynamics in the gene context in an intuitive manner. This database will be able to facilitate the particular comparison between animals and plants, between vertebrates and arthropods, and between monocots and eudicots, accounting for the significant contribution of gene pairs to speciation and diversification in specific lineages.",DLGP,0.996312201,NA,0,DLGP,0.996312201,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/15/2015 +26030752,http://sbbi.unl.edu/dmd,"Dietary MicroRNA Database (DMD): An Archive Database and Analytic Tool for Food-Borne microRNAs. With the advent of high throughput technology, a huge amount of microRNA information has been added to the growing body of knowledge for non-coding RNAs. Here we present the Dietary MicroRNA Databases (DMD), the first repository for archiving and analyzing the published and novel microRNAs discovered in dietary resources. Currently there are fifteen types of dietary species, such as apple, grape, cow milk, and cow fat, included in the database originating from 9 plant and 5 animal species. Annotation for each entry, a mature microRNA indexed as DM0000*, covers information of the mature sequences, genome locations, hairpin structures of parental pre-microRNAs, cross-species sequence comparison, disease relevance, and the experimentally validated gene targets. Furthermore, a few functional analyses including target prediction, pathway enrichment and gene network construction have been integrated into the system, which enable users to generate functional insights through viewing the functional pathways and building protein-protein interaction networks associated with each microRNA. Another unique feature of DMD is that it provides a feature generator where a total of 411 descriptive attributes can be calculated for any given microRNAs based on their sequences and structures. DMD would be particularly useful for research groups studying microRNA regulation from a nutrition point of view. The database can be accessed at http://sbbi.unl.edu/dmd/.",DMD,0.993849114,Dietary MicroRNA Databases,0.963514006,DMD,0.993849114,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2015 +22486148,http://medirectory.org,"Design and implementation of a web directory for medical education (WDME): a tool to facilitate research in medical education. Background Access to the medical resources on the web is one of current challenges for researchers and medical science educators. The purpose of current project was to design and implement a comprehensive and specific subject/web directory of medical education. Methods First, the categories to be incorporated in the directory were defined through reviewing related directories and obtaining medical education experts' opinions in a focus group. Then, number of sources such as (Meta) search engines, subject directories, databases and library catalogs searched/browsed for selecting and collecting high quality resources. Finally, the website was designed and the resources were entered into the directory. Results The main categories incorporating WDME resources are: Journals, Organizations, Best Evidence in Medical Education, and Textbooks. Each category is divided into sub-categories and related resources of each category are described shortly within it. The resources in this directory could be accessed both by browsing and keyword searching. WDME is accessible on http://medirectory.org. Conclusions The innovative Web Directory for Medical Education (WDME) presented in this paper, is more comprehensive than other existing directories, and expandable through user suggestions. It may help medical educators to find their desirable resources more quickly and easily; hence have more informed decisions in education.",DME,0.820596695,Directory for Medical Education,0.725969657,DME,0.820596695,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2012 +31016417,http://dnamod.hoffmanlab.org,"DNAmod: the DNA modification database. Covalent DNA modifications, such as 5-methylcytosine (5mC), are increasingly the focus of numerous research programs. In eukaryotes, both 5mC and 5-hydroxymethylcytosine (5hmC) are now recognized as stable epigenetic marks, with diverse functions. Bacteria, archaea, and viruses contain various other modified DNA nucleobases. Numerous databases describe RNA and histone modifications, but no database specifically catalogues DNA modifications, despite their broad importance in epigenetic regulation. To address this need, we have developed DNAmod: the DNA modification database. DNAmod is an open-source database ( https://dnamod.hoffmanlab.org ) that catalogues DNA modifications and provides a single source to learn about their properties. DNAmod provides a web interface to easily browse and search through these modifications. The database annotates the chemical properties and structures of all curated modified DNA bases, and a much larger list of candidate chemical entities. DNAmod includes manual annotations of available sequencing methods, descriptions of their occurrence in nature, and provides existing and suggested nomenclature. DNAmod enables researchers to rapidly review previous work, select mapping techniques, and track recent developments concerning modified bases of interest.",DNAmod,0.983683288,NA,0,DNAmod,0.983683288,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/23/2019 +33053178,http://www.genesilico.pl/DNAmoreDB,"DNAmoreDB, a database of DNAzymes. Deoxyribozymes, DNA enzymes or simply DNAzymes are single-stranded oligo-deoxyribonucleotide molecules that, like proteins and ribozymes, possess the ability to perform catalysis. Although DNAzymes have not yet been found in living organisms, they have been isolated in the laboratory through in vitro selection. The selected DNAzyme sequences have the ability to catalyze a broad range of chemical reactions, utilizing DNA, RNA, peptides or small organic compounds as substrates. DNAmoreDB is a comprehensive database resource for DNAzymes that collects and organizes the following types of information: sequences, conditions of the selection procedure, catalyzed reactions, kinetic parameters, substrates, cofactors, structural information whenever available, and literature references. Currently, DNAmoreDB contains information about DNAzymes that catalyze 20 different reactions. We included a submission form for new data, a REST-based API system that allows users to retrieve the database contents in a machine-readable format, and keyword and BLASTN search features. The database is publicly available at https://www.genesilico.pl/DNAmoreDB/.",DNAmoreDB,0.992567003,NA,0,DNAmoreDB,0.992567003,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +28234924,http://tga.nig.ac.jp/dnapod,"DNApod: DNA polymorphism annotation database from next-generation sequence read archives. With the rapid advances in next-generation sequencing (NGS), datasets for DNA polymorphisms among various species and strains have been produced, stored, and distributed. However, reliability varies among these datasets because the experimental and analytical conditions used differ among assays. Furthermore, such datasets have been frequently distributed from the websites of individual sequencing projects. It is desirable to integrate DNA polymorphism data into one database featuring uniform quality control that is distributed from a single platform at a single place. DNA polymorphism annotation database (DNApod; http://tga.nig.ac.jp/dnapod/) is an integrated database that stores genome-wide DNA polymorphism datasets acquired under uniform analytical conditions, and this includes uniformity in the quality of the raw data, the reference genome version, and evaluation algorithms. DNApod genotypic data are re-analyzed whole-genome shotgun datasets extracted from sequence read archives, and DNApod distributes genome-wide DNA polymorphism datasets and known-gene annotations for each DNA polymorphism. This new database was developed for storing genome-wide DNA polymorphism datasets of plants, with crops being the first priority. Here, we describe our analyzed data for 679, 404, and 66 strains of rice, maize, and sorghum, respectively. The analytical methods are available as a DNApod workflow in an NGS annotation system of the DNA Data Bank of Japan and a virtual machine image. Furthermore, DNApod provides tables of links of identifiers between DNApod genotypic data and public phenotypic data. To advance the sharing of organism knowledge, DNApod offers basic and ubiquitous functions for multiple alignment and phylogenetic tree construction by using orthologous gene information.",DNApod,0.990965605,DNA polymorphism annotation database,0.922978652,DNApod,0.990965605,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/24/2017 +31612957,http://dnaprodb.usc.edu,"DNAproDB: an expanded database and web-based tool for structural analysis of DNA-protein complexes. DNAproDB (https://dnaprodb.usc.edu) is a web-based database and structural analysis tool that offers a combination of data visualization, data processing and search functionality that improves the speed and ease with which researchers can analyze, access and visualize structural data of DNA-protein complexes. In this paper, we report significant improvements made to DNAproDB since its initial release. DNAproDB now supports any DNA secondary structure from typical B-form DNA to single-stranded DNA to G-quadruplexes. We have updated the structure of our data files to support complex DNA conformations, multiple DNA-protein complexes within a DNAproDB entry and model indexing for analysis of ensemble data. Support for chemically modified residues and nucleotides has been significantly improved along with the addition of new structural features, improved structural moiety assignment and use of more sequence-based annotations. We have redesigned our report pages and search forms to support these enhancements, and the DNAproDB website has been improved to be more responsive and user-friendly. DNAproDB is now integrated with the Nucleic Acid Database, and we have increased our coverage of available Protein Data Bank entries. Our database now contains 95% of all available DNA-protein complexes, making our tools for analysis of these structures accessible to a broad community.",DNAproDB,0.995879531,NA,0,DNAproDB,0.995879531,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24225319,"http://dnasu.asu.edu, http://dnasu.org","DNASU plasmid and PSI:Biology-Materials repositories: resources to accelerate biological research. The mission of the DNASU Plasmid Repository is to accelerate research by providing high-quality, annotated plasmid samples and online plasmid resources to the research community through the curated DNASU database, website and repository (http://dnasu.asu.edu or http://dnasu.org). The collection includes plasmids from grant-funded, high-throughput cloning projects performed in our laboratory, plasmids from external researchers, and large collections from consortia such as the ORFeome Collaboration and the NIGMS-funded Protein Structure Initiative: Biology (PSI:Biology). Through DNASU, researchers can search for and access detailed information about each plasmid such as the full length gene insert sequence, vector information, associated publications, and links to external resources that provide additional protein annotations and experimental protocols. Plasmids can be requested directly through the DNASU website. DNASU and the PSI:Biology-Materials Repositories were previously described in the 2010 NAR Database Issue (Cormier, C.Y., Mohr, S.E., Zuo, D., Hu, Y., Rolfs, A., Kramer, J., Taycher, E., Kelley, F., Fiacco, M., Turnbull, G. et al. (2010) Protein Structure Initiative Material Repository: an open shared public resource of structural genomics plasmids for the biological community. Nucleic Acids Res., 38, D743-D749.). In this update we will describe the plasmid collection and highlight the new features in the website redesign, including new browse/search options, plasmid annotations and a dynamic vector mapping feature that was developed in collaboration with LabGenius. Overall, these plasmid resources continue to enable research with the goal of elucidating the role of proteins in both normal biological processes and disease.",DNASU,0.963655591,NA,0,DNASU,0.963655591,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/12/2013 +22110027,http://dnatraffic.ibb.waw.pl,"DNAtraffic--a new database for systems biology of DNA dynamics during the cell life. DNAtraffic (http://dnatraffic.ibb.waw.pl/) is dedicated to be a unique comprehensive and richly annotated database of genome dynamics during the cell life. It contains extensive data on the nomenclature, ontology, structure and function of proteins related to the DNA integrity mechanisms such as chromatin remodeling, histone modifications, DNA repair and damage response from eight organisms: Homo sapiens, Mus musculus, Drosophila melanogaster, Caenorhabditis elegans, Saccharomyces cerevisiae, Schizosaccharomyces pombe, Escherichia coli and Arabidopsis thaliana. DNAtraffic contains comprehensive information on the diseases related to the assembled human proteins. DNAtraffic is richly annotated in the systemic information on the nomenclature, chemistry and structure of DNA damage and their sources, including environmental agents or commonly used drugs targeting nucleic acids and/or proteins involved in the maintenance of genome stability. One of the DNAtraffic database aim is to create the first platform of the combinatorial complexity of DNA network analysis. Database includes illustrations of pathways, damage, proteins and drugs. Since DNAtraffic is designed to cover a broad spectrum of scientific disciplines, it has to be extensively linked to numerous external data sources. Our database represents the result of the manual annotation work aimed at making the DNAtraffic much more useful for a wide range of systems biology applications.",DNAtraffic,0.997660697,NA,0,DNAtraffic,0.997660697,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/22/2011 +27076334,http://www.violinet.org/dnavaxdb,"The Web-Based DNA Vaccine Database DNAVaxDB and Its Usage for Rational DNA Vaccine Design. A DNA vaccine is a vaccine that uses a mammalian expression vector to express one or more protein antigens and is administered in vivo to induce an adaptive immune response. Since the 1990s, a significant amount of research has been performed on DNA vaccines and the mechanisms behind them. To meet the needs of the DNA vaccine research community, we created DNAVaxDB ( http://www.violinet.org/dnavaxdb ), the first Web-based database and analysis resource of experimentally verified DNA vaccines. All the data in DNAVaxDB, which includes plasmids, antigens, vaccines, and sources, is manually curated and experimentally verified. This chapter goes over the detail of DNAVaxDB system and shows how the DNA vaccine database, combined with the Vaxign vaccine design tool, can be used for rational design of a DNA vaccine against a pathogen, such as Mycobacterium bovis.",DNAVaxDB,0.992402136,NA,0,DNAVaxDB,0.992402136,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2016 +27209279,http://app.scbit.org/DNetDB,"DNetDB: The human disease network database based on dysfunctional regulation mechanism. Disease similarity study provides new insights into disease taxonomy, pathogenesis, which plays a guiding role in diagnosis and treatment. The early studies were limited to estimate disease similarities based on clinical manifestations, disease-related genes, medical vocabulary concepts or registry data, which were inevitably biased to well-studied diseases and offered small chance of discovering novel findings in disease relationships. In other words, genome-scale expression data give us another angle to address this problem since simultaneous measurement of the expression of thousands of genes allows for the exploration of gene transcriptional regulation, which is believed to be crucial to biological functions. Although differential expression analysis based methods have the potential to explore new disease relationships, it is difficult to unravel the upstream dysregulation mechanisms of diseases. We therefore estimated disease similarities based on gene expression data by using differential coexpression analysis, a recently emerging method, which has been proved to be more potential to capture dysfunctional regulation mechanisms than differential expression analysis. A total of 1,326 disease relationships among 108 diseases were identified, and the relevant information constituted the human disease network database (DNetDB). Benefiting from the use of differential coexpression analysis, the potential common dysfunctional regulation mechanisms shared by disease pairs (i.e. disease relationships) were extracted and presented. Statistical indicators, common disease-related genes and drugs shared by disease pairs were also included in DNetDB. In total, 1,326 disease relationships among 108 diseases, 5,598 pathways, 7,357 disease-related genes and 342 disease drugs are recorded in DNetDB, among which 3,762 genes and 148 drugs are shared by at least two diseases. DNetDB is the first database focusing on disease similarity from the viewpoint of gene regulation mechanism. It provides an easy-to-use web interface to search and browse the disease relationships and thus helps to systematically investigate etiology and pathogenesis, perform drug repositioning, and design novel therapeutic interventions.Database URL: http://app.scbit.org/DNetDB/ #.",DNetDB,0.996409118,NA,0,DNetDB,0.996409118,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/21/2016 +31598709,http://www.unimd.org/dnmivd,"DNMIVD: DNA methylation interactive visualization database. Aberrant DNA methylation plays an important role in cancer progression. However, no resource has been available that comprehensively provides DNA methylation-based diagnostic and prognostic models, expression-methylation quantitative trait loci (emQTL), pathway activity-methylation quantitative trait loci (pathway-meQTL), differentially variable and differentially methylated CpGs, and survival analysis, as well as functional epigenetic modules for different cancers. These provide valuable information for researchers to explore DNA methylation profiles from different aspects in cancer. To this end, we constructed a user-friendly database named DNA Methylation Interactive Visualization Database (DNMIVD), which comprehensively provides the following important resources: (i) diagnostic and prognostic models based on DNA methylation for multiple cancer types of The Cancer Genome Atlas (TCGA); (ii) meQTL, emQTL and pathway-meQTL for diverse cancers; (iii) Functional Epigenetic Modules (FEM) constructed from Protein-Protein Interactions (PPI) and Co-Occurrence and Mutual Exclusive (COME) network by integrating DNA methylation and gene expression data of TCGA cancers; (iv) differentially variable and differentially methylated CpGs and differentially methylated genes as well as related enhancer information; (v) correlations between methylation of gene promoter and corresponding gene expression and (vi) patient survival-associated CpGs and genes with different endpoints. DNMIVD is freely available at http://www.unimd.org/dnmivd/. We believe that DNMIVD can facilitate research of diverse cancers.",DNMIVD,0.995371461,DNA Methylation Interactive Visualization Database,0.965341255,DNMIVD,0.995371461,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +23185043,http://www.bio.nite.go.jp/pks,"DoBISCUIT: a database of secondary metabolite biosynthetic gene clusters. This article introduces DoBISCUIT (Database of BIoSynthesis clusters CUrated and InTegrated, http://www.bio.nite.go.jp/pks/), a literature-based, manually curated database of gene clusters for secondary metabolite biosynthesis. Bacterial secondary metabolites often show pharmacologically important activities and can serve as lead compounds and/or candidates for drug development. Biosynthesis of each secondary metabolite is catalyzed by a number of enzymes, usually encoded by a gene cluster. Although many scientific papers describe such gene clusters, the gene information is not always described in a comprehensive manner and the related information is rarely integrated. DoBISCUIT integrates the latest literature information and provides standardized gene/module/domain descriptions related to the gene clusters.",DoBISCUIT,0.998316765,Database of BIoSynthesis clusters,0.979332394,DoBISCUIT,0.998316765,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/26/2012 +33035337,http://covirus.cc/drugs,"DockCoV2: a drug database against SARS-CoV-2. The current state of the COVID-19 pandemic is a global health crisis. To fight the novel coronavirus, one of the best-known ways is to block enzymes essential for virus replication. Currently, we know that the SARS-CoV-2 virus encodes about 29 proteins such as spike protein, 3C-like protease (3CLpro), RNA-dependent RNA polymerase (RdRp), Papain-like protease (PLpro), and nucleocapsid (N) protein. SARS-CoV-2 uses human angiotensin-converting enzyme 2 (ACE2) for viral entry and transmembrane serine protease family member II (TMPRSS2) for spike protein priming. Thus in order to speed up the discovery of potential drugs, we develop DockCoV2, a drug database for SARS-CoV-2. DockCoV2 focuses on predicting the binding affinity of FDA-approved and Taiwan National Health Insurance (NHI) drugs with the seven proteins mentioned above. This database contains a total of 3,109 drugs. DockCoV2 is easy to use and search against, is well cross-linked to external databases, and provides the state-of-the-art prediction results in one site. Users can download their drug-protein docking data of interest and examine additional drug-related information on DockCoV2. Furthermore, DockCoV2 provides experimental information to help users understand which drugs have already been reported to be effective against MERS or SARS-CoV. DockCoV2 is available at https://covirus.cc/drugs/.",DockCoV2,0.975444973,NA,0,DockCoV2,0.975444973,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +"26227548, 32621232",http://dockground.compbio.ku.edu,"Simulated unbound structures for benchmarking of protein docking in the DOCKGROUND resource. Background Proteins play an important role in biological processes in living organisms. Many protein functions are based on interaction with other proteins. The structural information is important for adequate description of these interactions. Sets of protein structures determined in both bound and unbound states are essential for benchmarking of the docking procedures. However, the number of such proteins in PDB is relatively small. A radical expansion of such sets is possible if the unbound structures are computationally simulated. Results The DOCKGROUND public resource provides data to improve our understanding of protein-protein interactions and to assist in the development of better tools for structural modeling of protein complexes, such as docking algorithms and scoring functions. A large set of simulated unbound protein structures was generated from the bound structures. The modeling protocol was based on 1 ns Langevin dynamics simulation. The simulated structures were validated on the ensemble of experimentally determined unbound and bound structures. The set is intended for large scale benchmarking of docking algorithms and scoring functions. Conclusions A radical expansion of the unbound protein docking benchmark set was achieved by simulating the unbound structures. The simulated unbound structures were selected according to criteria from systematic comparison of experimentally determined bound and unbound structures. The set is publicly available at http://dockground.compbio.ku.edu.",DOCKGROUND,0.997901142,NA,0,DOCKGROUND,0.997901142,2,28891124,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2020 +28891124,http://dockground.compbio.ku.edu,"Dockground: A comprehensive data resource for modeling of protein complexes. Characterization of life processes at the molecular level requires structural details of protein interactions. The number of experimentally determined structures of protein-protein complexes accounts only for a fraction of known protein interactions. This gap in structural description of the interactome has to be bridged by modeling. An essential part of the development of structural modeling/docking techniques for protein interactions is databases of protein-protein complexes. They are necessary for studying protein interfaces, providing a knowledge base for docking algorithms, and developing intermolecular potentials, search procedures, and scoring functions. Development of protein-protein docking techniques requires thorough benchmarking of different parts of the docking protocols on carefully curated sets of protein-protein complexes. We present a comprehensive description of the Dockground resource (http://dockground.compbio.ku.edu) for structural modeling of protein interactions, including previously unpublished unbound docking benchmark set 4, and the X-ray docking decoy set 2. The resource offers a variety of interconnected datasets of protein-protein complexes and other data for the development and testing of different aspects of protein docking methodologies. Based on protein-protein complexes extracted from the PDB biounit files, Dockground offers sets of X-ray unbound, simulated unbound, model, and docking decoy structures. All datasets are freely available for download, as a whole or selecting specific structures, through a user-friendly interface on one integrated website.",Dockground,0.995778322,NA,0,Dockground,0.995778322,1,"26227548.0, 32621232.0",NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,10/10/2017 +25404132,http://dogsd.big.ac.cn,"DoGSD: the dog and wolf genome SNP database. The rapid advancement of next-generation sequencing technology has generated a deluge of genomic data from domesticated dogs and their wild ancestor, grey wolves, which have simultaneously broadened our understanding of domestication and diseases that are shared by humans and dogs. To address the scarcity of single nucleotide polymorphism (SNP) data provided by authorized databases and to make SNP data more easily/friendly usable and available, we propose DoGSD (http://dogsd.big.ac.cn), the first canidae-specific database which focuses on whole genome SNP data from domesticated dogs and grey wolves. The DoGSD is a web-based, open-access resource comprising √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº 19 million high-quality whole-genome SNPs. In addition to the dbSNP data set (build 139), DoGSD incorporates a comprehensive collection of SNPs from two newly sequenced samples (1 wolf and 1 dog) and collected SNPs from three latest dog/wolf genetic studies (7 wolves and 68 dogs), which were taken together for analysis with the population genetic statistics, Fst. In addition, DoGSD integrates some closely related information including SNP annotation, summary lists of SNPs located in genes, synonymous and non-synonymous SNPs, sampling location and breed information. All these features make DoGSD a useful resource for in-depth analysis in dog-/wolf-related studies.",DoGSD,0.996389866,NA,0,DoGSD,0.996389866,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2014 +"22135305, 26827237",http://dommino.org,"DOMMINO: a database of macromolecular interactions. With the growing number of experimentally resolved structures of macromolecular complexes, it becomes clear that the interactions that involve protein structures are mediated not only by the protein domains, but also by various non-structured regions, such as interdomain linkers, or terminal sequences. Here, we present DOMMINO (http://dommino.org), a comprehensive database of macromolecular interactions that includes the interactions between protein domains, interdomain linkers, N- and C-terminal regions and protein peptides. The database complements SCOP domain annotations with domain predictions by SUPERFAMILY and is automatically updated every week. The database interface is designed to provide the user with a three-stage pipeline to study macromolecular interactions: (i) a flexible search that can include a PDB ID, type of interaction, SCOP family of interacting proteins, organism name, interaction keyword and a minimal threshold on the number of contact pairs; (ii) visualization of subunit interaction network, where the user can investigate the types of interactions within a macromolecular assembly; and (iii) visualization of an interface structure between any pair of the interacting subunits, where the user can highlight several different types of residues within the interfaces as well as study the structure of the corresponding binary complex of subunits.",DOMMINO,0.927519023,NA,0,DOMMINO,0.927519023,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/30/2016 +24214966,http://csbl.bmb.uga.edu/DOOR,"DOOR 2.0: presenting operons and their functions through dynamic and integrated views. We have recently developed a new version of the DOOR operon database, DOOR 2.0, which is available online at http://csbl.bmb.uga.edu/DOOR/ and will be updated on a regular basis. DOOR 2.0 contains genome-scale operons for 2072 prokaryotes with complete genomes, three times the number of genomes covered in the previous version published in 2009. DOOR 2.0 has a number of new features, compared with its previous version, including (i) more than 250,000 transcription units, experimentally validated or computationally predicted based on RNA-seq data, providing a dynamic functional view of the underlying operons; (ii) an integrated operon-centric data resource that provides not only operons for each covered genome but also their functional and regulatory information such as their cis-regulatory binding sites for transcription initiation and termination, gene expression levels estimated based on RNA-seq data and conservation information across multiple genomes; (iii) a high-performance web service for online operon prediction on user-provided genomic sequences; (iv) an intuitive genome browser to support visualization of user-selected data; and (v) a keyword-based Google-like search engine for finding the needed information intuitively and rapidly in this database.",DOOR,0.986369848,NA,0,DOOR,0.986369848,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/7/2013 +25002814,http://caps.ncbs.res.in/DOR,"DOR - a Database of Olfactory Receptors - Integrated Repository for Sequence and Secondary Structural Information of Olfactory Receptors in Selected Eukaryotic Genomes. Olfaction is the response to odors and is mediated by a class of membrane-bound proteins called olfactory receptors (ORs). An understanding of these receptors serves as a good model for basic signal transduction mechanisms and also provides important clues for the strategies adopted by organisms for their ultimate survival using chemosensory perception in search of food or defense against predators. Prior research on cross-genome phylogenetic analyses from our group motivated the addressal of conserved evolutionary trends, clustering, and ortholog prediction of ORs. The database of olfactory receptors (DOR) is a repository that provides sequence and structural information on ORs of selected organisms (such as Saccharomyces cerevisiae, Drosophila melanogaster, Caenorhabditis elegans, Mus musculus, and Homo sapiens). Users can download OR sequences, study predicted membrane topology, and obtain cross-genome sequence alignments and phylogeny, including three-dimensional (3D) structural models of 100 selected ORs and their predicted dimer interfaces. The database can be accessed from http://caps.ncbs.res.in/DOR. Such a database should be helpful in designing experiments on point mutations to probe into the possible dimerization modes of ORs and to even understand the evolutionary changes between different receptors.",DOR,0.974434396,Database of Olfactory Receptors,0.88843143,DOR,0.974434396,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/12/2014 +30364951,"http://tubic.org/doric/, http://tubic.tju.edu.cn/doric","DoriC 10.0: an updated database of replication origins in prokaryotic genomes including chromosomes and plasmids. DoriC, a database of replication origins, was initially created to present the bacterial oriCs predicted by Ori-Finder or determined by experiments in 2007.√ɬÉ√ǬÇ√ɬÇ√Ǭ†DoriC 5.0, an updated database of oriC regions in both bacterial and archaeal genomes, was published in the 2013 Nucleic Acids Research database issue. Now, the latest release DoriC 10, a large-scale update of replication origins in prokaryotic genomes including chromosomes and plasmids, has been presented with a completely redesigned user interface, which is freely available at http://tubic.org/doric/ and http://tubic.tju.edu.cn/doric/. In the current release, the database of DoriC has made significant improvements compared with version 5.0 as follows: (i) inclusion of oriCs on more bacterial chromosomes increased from 1633 to 7580; (ii) inclusion of oriCs on more archaeal chromosomes increased from 86 to 226; (iii) inclusion of 1209 plasmid replication origins retrieved from NCBI annotations or predicted by in silico analysis; (iv) inclusion of more replication origin elements on bacterial chromosomes including DnaA-trio motifs. Now, DoriC becomes the most complete and scalable database of replication origins in prokaryotic genomes, and facilitates the studies in large-scale oriC data mining, strand-biased analyses and replication origin predictions.",DoriC,0.994725108,NA,0,DoriC,0.994725108,1,NA,23093601,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +23093601,http://tubic.tju.edu.cn/doric,"DoriC 5.0: an updated database of oriC regions in both bacterial and archaeal genomes. Replication of chromosomes is one of the central events in the cell cycle. Chromosome replication begins at specific sites, called origins of replication (oriCs), for all three domains of life. However, the origins of replication still remain unknown in a considerably large number of bacterial and archaeal genomes completely sequenced so far. The availability of increasing complete bacterial and archaeal genomes has created challenges and opportunities for identification of their oriCs in silico, as well as in vivo. Based on the Z-curve theory, we have developed a web-based system Ori-Finder to predict oriCs in bacterial genomes with high accuracy and reliability by taking advantage of comparative genomics, and the predicted oriC regions have been organized into an online database DoriC, which is publicly available at http://tubic.tju.edu.cn/doric/ since 2007. Five years after we constructed DoriC, the database has significant advances over the number of bacterial genomes, increasing about 4-fold. Additionally, oriC regions in archaeal genomes identified by in vivo experiments, as well as in silico analyses, have also been added to the database. Consequently, the latest release of DoriC contains oriCs for >1500 bacterial genomes and 81 archaeal genomes, respectively.",DoriC,0.991857886,NA,0,DoriC,0.991857886,1,NA,30364951,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/23/2012 +25416797,http://dorina.mdc-berlin.de,"DoRiNA 2.0--upgrading the doRiNA database of RNA interactions in post-transcriptional regulation. The expression of almost all genes in animals is subject to post-transcriptional regulation by RNA binding proteins (RBPs) and microRNAs (miRNAs). The interactions between both RBPs and miRNAs with mRNA can be mapped on a whole-transcriptome level using experimental and computational techniques established in the past years. The combined action of RBPs and miRNAs is thought to form a post-transcriptional regulatory code. Here we present doRiNA 2.0, available at http://dorina.mdc-berlin.de. In this highly improved new version, we have completely reworked the user interface and expanded the database to improve the usability of the website. Taking into account user feedback over the past years, the input forms for both the simple and the combinatorial search function have been streamlined and combined into a single web page that will also display the search results. Especially, custom uploads is one of the key new features in doRiNA 2.0. To enable the inclusion of doRiNA into third-party analysis pipelines, all operations are accessible via a REST API. Alternatively, local installations can be queried using a Python API. Both the web application and the APIs are available under an OSI-approved Open Source license that allows research and commercial access and re-use.",doRiNA,0.983651519,NA,0,doRiNA,0.983651519,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2014 +31581093,http://ciceklab.cs.bilkent.edu.tr/dorman,"DORMAN: Database of Reconstructed MetAbolic Networks. Genome-scale reconstructed metabolic networks have provided an organism specific understanding of cellular processes and their relations to phenotype. As they are deemed essential to study metabolism, the number of organisms with reconstructed metabolic networks continues to increase. This everlasting research interest lead to the development of online systems/repositories that store existing reconstructions and enable new model generation, integration, and constraint-based analyses. While features that support model reconstruction are widely available, current systems lack the means to help users who are interested in analyzing the topology of the reconstructed networks. Here, we present the Database of Reconstructed Metabolic Networks - DORMAN. DORMAN is a centralized online database that stores SBML-based reconstructed metabolic networks published in the literature, and provides web-based computational tools for visualizing and analyzing the model topology. Novel features of DORMAN are (i) interactive visualization interface that allows rendering of the complete network as well as editing and exporting the model, (ii) hierarchical navigation that provides efficient access to connected entities in the model, (iii) built-in query interface that allow posing topological queries, and finally, and (iv) model comparison tool that enables comparing models with different nomenclatures, using approximate string matching. DORMAN is online and freely accessible at http://ciceklab.cs.bilkent.edu.tr/dorman.",DORMAN,0.996239364,Database of Reconstructed Metabolic Networks,0.906664733,DORMAN,0.996239364,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +23846594,http://bo-protscience.fr/dosa,"DoSA: Database of Structural Alignments. Protein structure alignment is a crucial step in protein structure-function analysis. Despite the advances in protein structure alignment algorithms, some of the local conformationally similar regions are mislabeled as structurally variable regions (SVRs). These regions are not well superimposed because of differences in their spatial orientations. The Database of Structural Alignments (DoSA) addresses this gap in identification of local structural similarities obscured in global protein structural alignments by realigning SVRs using an algorithm based on protein blocks. A set of protein blocks is a structural alphabet that abstracts protein structures into 16 unique local structural motifs. DoSA provides unique information about 159,780 conformationally similar and 56,140 conformationally dissimilar SVRs in 74 705 pairwise structural alignments of homologous proteins. The information provided on conformationally similar and dissimilar SVRs can be helpful to model loop regions. It is also conceivable that conformationally similar SVRs with conserved residues could potentially contribute toward functional integrity of homologues, and hence identifying such SVRs could be helpful in understanding the structural basis of protein function. Database URL: http://bo-protscience.fr/dosa/",DoSA,0.991789222,Database of Structural Alignments,0.871132361,DoSA,0.991789222,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/11/2013 +34405389,http://dowls.site,"Database of word-level statistics for Mandarin Chinese (DoWLS-MAN). In this article we present the Database of Word-Level Statistics for Mandarin Chinese (DoWLS-MAN). The database addresses the lack of agreement in phonological syllable segmentation specific to Mandarin by offering phonological features for each lexical item according to 16 schematic representations of the syllable (8 with tone and 8 without tone). Those lexical statistics that differ per phonological word and nonword due to changes in syllable segmentation are of the variant category and include subtitle lexical frequency, phonological neighborhood density measures, homophone density, and network science measures. The invariant characteristics consist of each items' lexical tone, phonological transcription, and syllable structure among others. The goal of DoWLS-MAN is to provide researchers both the ability to choose stimuli that are derived from a segmentation schema that supports an existing model of Mandarin speech processing, and the ability to choose stimuli that allow for the testing of hypotheses on phonological segmentation according to multiple schemas. In an exploratory analysis we illustrate how multiple schematic representations of the phonological mental lexicon can aid in hypothesis generation, specifically in terms of phonological processing when reading Chinese orthography. Users of the database can search among over 92,000 words, over 1600 out-of-vocabulary Chinese characters, and 4300 phonological nonwords according to either Chinese orthography, pinyin, or ASCII phonetic script. Users can also generate a list of phonological words and nonwords according to user-defined ranges and categories of lexical characteristics. DoWLS-MAN is available to the public for search or download at https://dowls.site .",DoWLS-MAN,0.988136002,Database of Word-Level Statistics for Mandarin Chinese,0.950584922,DoWLS-MAN,0.988136002,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/17/2021 +33216893,http://www.peptide-ligand.cn,"DPL: a comprehensive database on sequences, structures, sources and functions of peptide ligands. . DPL (http://www.peptide-ligand.cn/) is a comprehensive database of peptide ligand (DPL). DPL1.0 holds 1044 peptide ligand entries and provides references for the study of the polypeptide platform. The data were collected from PubMed-NCBI, PDB, APD3, CAMPR3, etc. The lengths of the base sequences are varied from 3 to78. DPL database has 923 linear peptides and 88 cyclic peptides. The functions of peptides collected by DPL are very wide. It includes 540 entries of antiviral peptides (including SARS-CoV-2), 55 entries of signal peptides, 48 entries of protease inhibitors, 45 entries of anti-hypertension, 37 entries of anticancer peptides, etc. There are 270 different kinds of peptide targets. All peptides in DPL have clear binding targets. Most of the peptides and receptors have 3D structures experimentally verified or predicted by CYCLOPS, I-TASSER and SWISS-MODEL. With the rapid development of the COVID-2019 epidemic, this database also collects the research progress of peptides against coronavirus. In conclusion, DPL is a unique resource, which allows users easily to explore the targets, different structures as well as properties of peptides.",DPL,0.99479425,database of,0.562851697,DPL,0.99479425,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2020 +24302579,http://syslab.nchu.edu.tw/DPRP,"DPRP: a database of phenotype-specific regulatory programs derived from transcription factor binding data. Gene expression profiling has been extensively used in the past decades, resulting in an enormous amount of expression data available in public databases. These data sets are informative in elucidating transcriptional regulation of genes underlying various biological and clinical conditions. However, it is usually difficult to identify transcription factors (TFs) responsible for gene expression changes directly from their own expression, as TF activity is often regulated at the posttranscriptional level. In recent years, technical advances have made it possible to systematically determine the target genes of TFs by ChIP-seq experiments. To identify the regulatory programs underlying gene expression profiles, we constructed a database of phenotype-specific regulatory programs (DPRP, http://syslab.nchu.edu.tw/DPRP/) derived from the integrative analysis of TF binding data and gene expression data. DPRP provides three methods: the Fisher's Exact Test, the Kolmogorov-Smirnov test and the BASE algorithm to facilitate the application of gene expression data for generating new hypotheses on transcriptional regulatory programs in biological and clinical studies.",DPRP,0.99437356,NA,0,DPRP,0.99437356,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/2/2013 +27173524,http://genedenovoweb.ticp.net:81/DPTEdb/index.php,"DPTEdb, an integrative database of transposable elements in dioecious plants. . Dioecious plants usually harbor 'young' sex chromosomes, providing an opportunity to study the early stages of sex chromosome evolution. Transposable elements (TEs) are mobile DNA elements frequently found in plants and are suggested to play important roles in plant sex chromosome evolution. The genomes of several dioecious plants have been sequenced, offering an opportunity to annotate and mine the TE data. However, comprehensive and unified annotation of TEs in these dioecious plants is still lacking. In this study, we constructed a dioecious plant transposable element database (DPTEdb). DPTEdb is a specific, comprehensive and unified relational database and web interface. We used a combination of de novo, structure-based and homology-based approaches to identify TEs from the genome assemblies of previously published data, as well as our own. The database currently integrates eight dioecious plant species and a total of 31 340 TEs along with classification information. DPTEdb provides user-friendly web interfaces to browse, search and download the TE sequences in the database. Users can also use tools, including BLAST, GetORF, HMMER, Cut sequence and JBrowse, to analyze TE data. Given the role of TEs in plant sex chromosome evolution, the database will contribute to the investigation of TEs in structural, functional and evolutionary dynamics of the genome of dioecious plants. In addition, the database will supplement the research of sex diversification and sex chromosome evolution of dioecious plants.Database URL: http://genedenovoweb.ticp.net:81/DPTEdb/index.php.",DPTEdb,0.989185214,dioecious plant transposable element database,0.863010341,DPTEdb,0.989185214,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/12/2016 +24548788,http://www.bioinfoindia.org/drgas,"DR-GAS: a database of functional genetic variants and their phosphorylation states in human DNA repair systems. We present DR-GAS(1), a unique, consolidated and comprehensive DNA repair genetic association studies database of human DNA repair system. It presents information on repair genes, assorted mechanisms of DNA repair, linkage disequilibrium, haplotype blocks, nsSNPs, phosphorylation sites, associated diseases, and pathways involved in repair systems. DNA repair is an intricate process which plays an essential role in maintaining the integrity of the genome by eradicating the damaging effect of internal and external changes in the genome. Hence, it is crucial to extensively understand the intact process of DNA repair, genes involved, non-synonymous SNPs which perhaps affect the function, phosphorylated residues and other related genetic parameters. All the corresponding entries for DNA repair genes, such as proteins, OMIM IDs, literature references and pathways are cross-referenced to their respective primary databases. DNA repair genes and their associated parameters are either represented in tabular or in graphical form through images elucidated by computational and statistical analyses. It is believed that the database will assist molecular biologists, biotechnologists, therapeutic developers and other scientific community to encounter biologically meaningful information, and meticulous contribution of genetic level information towards treacherous diseases in human DNA repair systems. DR-GAS is freely available for academic and research purposes at: http://www.bioinfoindia.org/drgas.",DR-GAS,0.991618946,NA,0,DR-GAS,0.991618946,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/16/2014 +22135288,http://www.scbit.org/dbmi/drvis,"Dr.VIS: a database of human disease-related viral integration sites. Viral integration plays an important role in the development of malignant diseases. Viruses differ in preferred integration site and flanking sequence. Viral integration sites (VIS) have been found next to oncogenes and common fragile sites. Understanding the typical DNA features near VIS is useful for the identification of potential oncogenes, prediction of malignant disease development and assessing the probability of malignant transformation in gene therapy. Therefore, we have built a database of human disease-related VIS (Dr.VIS, http://www.scbit.org/dbmi/drvis) to collect and maintain human disease-related VIS data, including characteristics of the malignant disease, chromosome region, genomic position and viral-host junction sequence. The current build of Dr.VIS covers about 600 natural VIS of 5 oncogenic viruses representing 11 diseases. Among them, about 200 VIS have viral-host junction sequence.",Dr.VIS,0.991928021,NA,0,Dr.VIS,0.991928021,1,NA,25355513,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,12/1/2011 +25355513,http://www.bioinfo.org/drvis,"Dr.VIS v2.0: an updated database of human disease-related viral integration sites in the era of high-throughput deep sequencing. Dr.VIS is a database of human disease-related viral integration sites (VIS). The number of VIS has grown rapidly since Dr.VIS was first released in 2011, and there is growing recognition of the important role that viral integration plays in the development of malignancies. The updated database version, Dr.VIS v2.0 (http://www.bioinfo.org/drvis or bminfor.tongji.edu.cn/drvis_v2), represents 25 diseases, covers 3340 integration sites of eight oncogenic viruses in human chromosomes and provides more accurate information about VIS from high-throughput deep sequencing results obtained mainly after 2012. Data of VISes for three newly identified oncogenic viruses for 14 related diseases have been added to this 2015 update, which has a 5-fold increase of VISes compared to Dr.VIS v1.0. Dr.VIS v2.0 has 2244 precise integration sites, 867 integration regions and 551 junction sequences. A total of 2295 integration sites are located near 1730 involved genes. Of the VISes, 1153 are detected in the exons or introns of genes, with 294 located up to 5 kb and a further 112 located up to 10 kb away. As viral integration may alter chromosome stability and gene expression levels, characterizing VISes will contribute toward the discovery of novel oncogenes, tumor suppressor genes and tumor-associated pathways.",Dr.VIS,0.978358825,NA,0,Dr.VIS,0.978358825,1,NA,22135288,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,10/29/2014 +31409791,http://dramp.cpu-bioinfor.org,"DRAMP 2.0, an updated data repository of antimicrobial peptides. Data Repository of Antimicrobial Peptides (DRAMP, http://dramp.cpu-bioinfor.org/ ) is an open-access comprehensive database containing general, patent and clinical antimicrobial peptides (AMPs). Currently DRAMP has been updated to version 2.0, it contains a total of 19,899 entries (newly added 2,550 entries), including 5,084 general entries, 14,739 patent entries, and 76 clinical entries. The update covers new entries, structures, annotations, classifications and downloads. Compared with APD and CAMP, DRAMP contains 14,040 (70.56% in DRAMP) non-overlapping sequences. In order to facilitate users to trace original references, PubMed_ID of references have been contained in activity information. The data of DRAMP can be downloaded by dataset and activity, and the website source code is also available on dedicatedly designed download webpage. Although thousands of AMPs have been reported, only a few parts have entered clinical stage. In the paper, we described several AMPs in clinical trials, including their properties, indications and clinicaltrials.gov identifiers. Finally, we provide the applications of DRAMP in the development of AMPs.",DRAMP,0.984241545,Data Repository of Antimicrobial Peptides,0.965962529,DRAMP,0.984241545,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/13/2019 +29209336,http://drdb.big.ac.cn/home,"DRDB: An Online Date Palm Genomic Resource Database. Background: Date palm (Phoenix dactylifera L.) is a cultivated woody plant with agricultural and economic importance in many countries around the world. With the advantages of next generation sequencing technologies, genome sequences for many date palm cultivars have been released recently. Short sequence repeat (SSR) and single nucleotide polymorphism (SNP) can be identified from these genomic data, and have been proven to be very useful biomarkers in plant genome analysis and breeding. Results: Here, we first improved the date palm genome assembly using 130X of HiSeq data generated in our lab. Then 246,445 SSRs (214,901 SSRs and 31,544 compound SSRs) were annotated in this genome assembly; among the SSRs, mononucleotide SSRs (58.92%) were the most abundant, followed by di- (29.92%), tri- (8.14%), tetra- (2.47%), penta- (0.36%), and hexa-nucleotide SSRs (0.19%). The high-quality PCR primer pairs were designed for most (174,497; 70.81% out of total) SSRs. We also annotated 6,375,806 SNPs with raw read depth√ɬÉ√Ǭ¢√ɬÇ√Ǭâ√ɬÇ√Ǭ•3 in 90% cultivars. To further reduce false positive SNPs, we only kept 5,572,650 (87.40% out of total) SNPs with at least 20% cultivars support for downstream analyses. The high-quality PCR primer pairs were also obtained for 4,177,778 (65.53%) SNPs. We reconstructed the phylogenetic relationships among the 62 cultivars using these variants and found that they can be divided into three clusters, namely North Africa, Egypt - Sudan, and Middle East - South Asian, with Egypt - Sudan being the admixture of North Africa and Middle East - South Asian cultivars; we further confirmed these clusters using principal component analysis. Moreover, 34,346 SSRs and 4,177,778 SNPs with PCR primers were assigned to shared cultivars for cultivar classification and diversity analysis. All these SSRs, SNPs and their classification are available in our database, and can be used for cultivar identification, comparison, and molecular breeding. Conclusion:DRDB is a comprehensive genomic resource database of date palm. It can serve as a bioinformatics platform for date palm genomics, genetics, and molecular breeding. DRDB is freely available at http://drdb.big.ac.cn/home.",DRDB,0.974232197,Date Palm Genomic Resource Database,0.711140464,DRDB,0.974232197,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/2/2017 +34774049,http://bio-big-data.cn:8080/DREAM,"DREAM: a database of experimentally supported protein-coding RNAs and drug associations in human cancer. The Drug Response Gene Expression Associated Map, also referred as ""DREAM"" ( http://bio-big-data.cn:8080/DREAM ), is a manually curated database of experimentally supported protein-coding RNAs and drugs associations in human cancers. The current version of the DREAM documents 3048 entries about scientific literatures supported drug sensitivity or drug intervention related protein-coding RNAs from PubMed database and 195 high-throughput microarray data about drug sensitivity or drug intervention related protein-coding RNAs data from GEO database. Each entry in DREAM database contains detailed information on protein-coding RNA, drug, cancer, and other information including title, PubMed ID, journal, publish time. The DREAM database also provides some data visualization and online analysis services such as volcano plot, GO/KEGG enrichment function analysis, and novel drug discovery analysis. We hope the DREAM database should serve as a valuable resource for clinical practice and basic research, which could help researchers better understand the effects of protein-coding RNAs on drug response in human cancers.",DREAM,0.993100206,Drug Response Gene Expression Associated Map,0.914302438,DREAM,0.993100206,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/13/2021 +27276067,http://www.bioinfoindia.org/dremecels,"DREMECELS: A Curated Database for Base Excision and Mismatch Repair Mechanisms Associated Human Malignancies. DNA repair mechanisms act as a warrior combating various damaging processes that ensue critical malignancies. DREMECELS was designed considering the malignancies with frequent alterations in DNA repair pathways, that is, colorectal and endometrial cancers, associated with Lynch syndrome (also known as HNPCC). Since lynch syndrome carries high risk (~40-60%) for both cancers, therefore we decided to cover all three diseases in this portal. Although a large population is presently affected by these malignancies, many resources are available for various cancer types but no database archives information on the genes specifically for only these cancers and disorders. The database contains 156 genes and two repair mechanisms, base excision repair (BER) and mismatch repair (MMR). Other parameters include some of the regulatory processes that have roles in these disease progressions due to incompetent repair mechanisms, specifically BER and MMR. However, our unique database mainly provides qualitative and quantitative information on these cancer types along with methylation, drug sensitivity, miRNAs, copy number variation (CNV) and somatic mutations data. This database would serve the scientific community by providing integrated information on these disease types, thus sustaining diagnostic and therapeutic processes. This repository would serve as an excellent accompaniment for researchers and biomedical professionals and facilitate in understanding such critical diseases. DREMECELS is publicly available at http://www.bioinfoindia.org/dremecels.",DREMECELS,0.997764349,NA,0,DREMECELS,0.997764349,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/8/2016 +"24214964, 26635391, 30542988, 31701128",http://ngs.ym.edu.tw/driverdb,"DriverDB: an exome sequencing database for cancer driver gene identification. Exome sequencing (exome-seq) has aided in the discovery of a huge amount of mutations in cancers, yet challenges remain in converting oncogenomics data into information that is interpretable and accessible for clinical care. We constructed DriverDB (http://ngs.ym.edu.tw/driverdb/), a database which incorporates 6079 cases of exome-seq data, annotation databases (such as dbSNP, 1000 Genome and Cosmic) and published bioinformatics algorithms dedicated to driver gene/mutation identification. We provide two points of view, 'Cancer' and 'Gene', to help researchers to visualize the relationships between cancers and driver genes/mutations. The 'Cancer' section summarizes the calculated results of driver genes by eight computational methods for a specific cancer type/dataset and provides three levels of biological interpretation for realization of the relationships between driver genes. The 'Gene' section is designed to visualize the mutation information of a driver gene in five different aspects. Moreover, a 'Meta-Analysis' function is provided so researchers may identify driver genes in customer-defined samples. The novel driver genes/mutations identified hold potential for both basic research and biotech applications.",DriverDB,0.995986342,NA,0,DriverDB,0.995986342,4,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +31691822,http://llps.biocuckoo.cn,"DrLLPS: a data resource of liquid-liquid phase separation in eukaryotes. Here, we presented an integrative database named DrLLPS (http://llps.biocuckoo.cn/) for proteins involved in liquid-liquid phase separation (LLPS), which is a ubiquitous and crucial mechanism for spatiotemporal organization of various biochemical reactions, by creating membraneless organelles (MLOs) in eukaryotic cells. From the literature, we manually collected 150 scaffold proteins that are drivers of LLPS, 987 regulators that contribute in modulating LLPS, and 8148 potential client proteins that might be dispensable for the formation of MLOs, which were then categorized into 40 biomolecular condensates. We searched potential orthologs of these known proteins, and in total DrLLPS contained 437 887 known and potential LLPS-associated proteins in 164 eukaryotes. Furthermore, we carefully annotated LLPS-associated proteins in eight model organisms, by using the knowledge integrated from 110 widely used resources that covered 16 aspects, including protein disordered regions, domain annotations, post-translational modifications (PTMs), genetic variations, cancer mutations, molecular interactions, disease-associated information, drug-target relations, physicochemical property, protein functional annotations, protein expressions/proteomics, protein 3D structures, subcellular localizations, mRNA expressions, DNA & RNA elements, and DNA methylations. We anticipate DrLLPS can serve as a helpful resource for further analysis of LLPS.",DrLLPS,0.997929573,NA,0,DrLLPS,0.997929573,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +28533016,http://www.mgc.ac.cn/DRodVir,"DRodVir: A resource for exploring the virome diversity in rodents. Emerging zoonotic diseases have received tremendous interests in recent years, as they pose a significant threat to human health, animal welfare, and economic stability. A high proportion of zoonoses originate from wildlife reservoirs. Rodents are the most numerous, widespread, and diverse group of mammals on the earth and are reservoirs for many zoonotic viruses responsible for significant morbidity and mortality. A better understanding of virome diversity in rodents would be of importance for researchers and professionals in the field. Therefore, we developed the DRodVir database (http://www.mgc.ac.cn/DRodVir/), a comprehensive, up-to-date, and well-curated repository of rodent-associated animal viruses. The database currently covers 7690 sequences from 5491 rodent-associated mammal viruses of 26 viral families detected from 194 rodent species in 93 countries worldwide. In addition to virus sequences, the database provides detailed information on related samples and host rodents, as well as a set of online analytical tools for text query, BLAST search and phylogenetic reconstruction. The DRodVir database will help virologists better understand the virome diversity of rodents. Moreover, it will be a valuable tool for epidemiologists and zoologists for easy monitoring and tracking of the current and future zoonotic diseases. As a data application example, we further compared the current status of rodent-associated viruses with bat-associated viruses to highlight the necessity for including additional host species and geographic regions in future investigations, which will help us achieve a better understanding of the virome diversities in the two major reservoirs of emerging zoonotic infectious diseases.",DRodVir,0.988821983,NA,0,DRodVir,0.988821983,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/3/2017 +25979979,http://pgsb.helmholtz-muenchen.de/droughtdb,"DroughtDB: an expert-curated compilation of plant drought stress genes and their homologs in nine species. Plants are sessile and therefore exposed to a number of biotic and abiotic stresses. Drought is the major abiotic stress restricting plant growth worldwide. A number of genes involved in drought stress response have already been characterized, mainly in the model species Arabidopsis thaliana and Oryza sativa. However, with the aim to produce drought tolerant crop varieties, it is of importance to identify the respective orthologs for each species. We have developed DroughtDB, a manually curated compilation of molecularly characterized genes that are involved in drought stress response. DroughtDB includes information about the originally identified gene, its physiological and/or molecular function and mutant phenotypes and provides detailed information about computed orthologous genes in nine model and crop plant species including maize and barley. All identified orthologs are interlinked with the respective reference entry in MIPS/PGSB PlantsDB, which allows retrieval of additional information like genome context and sequence information. Thus, DroughtDB is a valuable resource and information tool for researchers working on drought stress and will facilitate the identification, analysis and characterization of genes involved in drought stress tolerance in agriculturally important crop plants. Database URL: http://pgsb.helmholtz-muenchen.de/droughtdb/",DroughtDB,0.997056782,NA,0,DroughtDB,0.997056782,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/15/2015 +33995899,http://www.flyrnai.org/tools/single_cell/web,"DRscDB: A single-cell RNA-seq resource for data mining and data comparison across species. With the advent of single-cell RNA sequencing (scRNA-seq) technologies, there has been a spike in studies involving scRNA-seq of several tissues across diverse species including Drosophila. Although a few databases exist for users to query genes of interest within the scRNA-seq studies, search tools that enable users to find orthologous genes and their cell type-specific expression patterns across species are limited. Here, we built a new search database, DRscDB (https://www.flyrnai.org/tools/single_cell/web/), to address this need. DRscDB serves as a comprehensive repository for published scRNA-seq datasets for Drosophila and relevant datasets from human and other model organisms. DRscDB is based on manual curation of Drosophila scRNA-seq studies of various tissue types and their corresponding analogous tissues in vertebrates including zebrafish, mouse, and human. Of note, our search database provides most of the literature-derived marker genes, thus preserving the original analysis of the published scRNA-seq datasets. Finally, DRscDB serves as a web-based user interface that allows users to mine gene expression data from scRNA-seq studies and perform cell cluster enrichment analyses pertaining to various scRNA-seq studies, both within and across species.",DRscDB,0.997646034,NA,0,DRscDB,0.997646034,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/11/2021 +24618344,http://www.drug2gene.com,"Drug2Gene: an exhaustive resource to explore effectively the drug-target relation network. Background Information about drug-target relations is at the heart of drug discovery. There are now dozens of databases providing drug-target interaction data with varying scope, and focus. Therefore, and due to the large chemical space, the overlap of the different data sets is surprisingly small. As searching through these sources manually is cumbersome, time-consuming and error-prone, integrating all the data is highly desirable. Despite a few attempts, integration has been hampered by the diversity of descriptions of compounds, and by the fact that the reported activity values, coming from different data sets, are not always directly comparable due to usage of different metrics or data formats. Description We have built Drug2Gene, a knowledge base, which combines the compound/drug-gene/protein information from 19 publicly available databases. A key feature is our rigorous unification and standardization process which makes the data truly comparable on a large scale, allowing for the first time effective data mining in such a large knowledge corpus. As of version 3.2, Drug2Gene contains 4,372,290 unified relations between compounds and their targets most of which include reported bioactivity data. We extend this set with putative (i.e. homology-inferred) relations where sufficient sequence homology between proteins suggests they may bind to similar compounds. Drug2Gene provides powerful search functionalities, very flexible export procedures, and a user-friendly web interface. Conclusions Drug2Gene v3.2 has become a mature and comprehensive knowledge base providing unified, standardized drug-target related information gathered from publicly available data sources. It can be used to integrate proprietary data sets with publicly available data sets. Its main goal is to be a 'one-stop shop' to identify tool compounds targeting a given gene product or for finding all known targets of a drug. Drug2Gene with its integrated data set of public compound-target relations is freely accessible without restrictions at http://www.drug2gene.com.",Drug2Gene,0.990610043,NA,0,Drug2Gene,0.990610043,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/11/2014 +28299908,http://genomics.senescence.info/drugs,"The DrugAge database of aging-related drugs. Aging is a major worldwide medical challenge. Not surprisingly, identifying drugs and compounds that extend lifespan in model organisms is a growing research area. Here, we present DrugAge (http://genomics.senescence.info/drugs/), a curated database of lifespan-extending drugs and compounds. At the time of writing, DrugAge contains 1316 entries featuring 418 different compounds from studies across 27 model organisms, including worms, flies, yeast and mice. Data were manually curated from 324 publications. Using drug-gene interaction data, we also performed a functional enrichment analysis of targets of lifespan-extending drugs. Enriched terms include various functional categories related to glutathione and antioxidant activity, ion transport and metabolic processes. In addition, we found a modest but significant overlap between targets of lifespan-extending drugs and known aging-related genes, suggesting that some but not most aging-related pathways have been targeted pharmacologically in longevity studies. DrugAge is freely available online for the scientific community and will be an important resource for biogerontologists.",DrugAge,0.997624516,NA,0,DrugAge,0.997624516,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/16/2017 +24203711,http://www.drugbank.ca,"DrugBank 4.0: shedding new light on drug metabolism. DrugBank (http://www.drugbank.ca) is a comprehensive online database containing extensive biochemical and pharmacological information about drugs, their mechanisms and their targets. Since it was first described in 2006, DrugBank has rapidly evolved, both in response to user requests and in response to changing trends in drug research and development. Previous versions of DrugBank have been widely used to facilitate drug and in silico drug target discovery. The latest update, DrugBank 4.0, has been further expanded to contain data on drug metabolism, absorption, distribution, metabolism, excretion and toxicity (ADMET) and other kinds of quantitative structure activity relationships (QSAR) information. These enhancements are intended to facilitate research in xenobiotic metabolism (both prediction and characterization), pharmacokinetics, pharmacodynamics and drug design/discovery. For this release, >1200 drug metabolites (including their structures, names, activity, abundance and other detailed data) have been added along with >1300 drug metabolism reactions (including metabolizing enzymes and reaction types) and dozens of drug metabolism pathways. Another 30 predicted or measured ADMET parameters have been added to each DrugCard, bringing the average number of quantitative ADMET values for Food and Drug Administration-approved drugs close to 40. Referential nuclear magnetic resonance and MS spectra have been added for almost 400 drugs as well as spectral and mass matching tools to facilitate compound identification. This expanded collection of drug information is complemented by a number of new or improved search tools, including one that provides a simple analyses of drug-target, -enzyme and -transporter associations to provide insight on drug-drug interactions.",DrugBank,0.995137691,NA,0,DrugBank,0.995137691,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2013 +"30371892, 33151287",http://drugcentral.org,"DrugCentral 2018: an update. DrugCentral is a drug information resource (http://drugcentral.org) open to the public since 2016 and previously described in the 2017 Nucleic Acids Research Database issue. Since the 2016 release, 103 new approved drugs were updated. The following new data sources have been included: Food and Drug Administration (FDA) Adverse Event Reporting System (FAERS), FDA Orange Book information, L1000 gene perturbation profile distance/similarity matrices and estimated protonation constants. New and existing entries have been updated with the latest information from scientific literature, drug labels and external databases. The web interface has been updated to display and query new data. The full database dump and data files are available for download from the DrugCentral website.",DrugCentral,0.996682286,NA,0,DrugCentral,0.996682286,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +31066443,http://drugcomb.fimm.fi,"DrugComb: an integrative cancer drug combination data portal. Drug combination therapy has the potential to enhance efficacy, reduce dose-dependent toxicity and prevent the emergence of drug resistance. However, discovery of synergistic and effective drug combinations has been a laborious and often serendipitous process. In recent years, identification of combination therapies has been accelerated due to the advances in high-throughput drug screening, but informatics approaches for systems-level data management and analysis are needed. To contribute toward this goal, we created an open-access data portal called DrugComb (https://drugcomb.fimm.fi) where the results of drug combination screening studies are accumulated, standardized and harmonized. Through the data portal, we provided a web server to analyze and visualize users' own drug combination screening data. The users can also effectively participate a crowdsourcing data curation effect by depositing their data at DrugComb. To initiate the data repository, we collected 437 932 drug combinations tested on a variety of cancer cell lines. We showed that linear regression approaches, when considering chemical fingerprints as predictors, have the potential to achieve high accuracy of predicting the sensitivity of drug combinations. All the data and informatics tools are freely available in DrugComb to enable a more efficient utilization of data resources for future drug combination discovery.",DrugComb,0.991504312,NA,0,DrugComb,0.991504312,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2019 +31665429,http://drugcombdb.denglab.org,"DrugCombDB: a comprehensive database of drug combinations toward the discovery of combinatorial therapy. Drug combinations have demonstrated high efficacy and low adverse side effects compared to single drug administration in cancer therapies and thus have drawn intensive attention from researchers and pharmaceutical enterprises. Due to the rapid development of high-throughput screening (HTS), the number of drug combination datasets available has increased tremendously in recent years. Therefore, there is an urgent need for a comprehensive database that is crucial to both experimental and computational screening of synergistic drug combinations. In this paper, we present DrugCombDB, a comprehensive database devoted to the curation of drug combinations from various data sources: (i) HTS assays of drug combinations; (ii) manual curations from the literature; and (iii) FDA Orange Book and external databases. Specifically, DrugCombDB includes 448 555 drug combinations derived from HTS assays, covering 2887 unique drugs and 124 human cancer cell lines. In particular, DrugCombDB has more than 6000 000 quantitative dose responses from which we computed multiple synergy scores to determine the overall synergistic or antagonistic effects of drug combinations. In addition to the combinations extracted from existing databases, we manually curated 457 drug combinations from thousands of PubMed publications. To benefit the further experimental validation and development of computational models, multiple datasets that are ready to train prediction models for classification and regression analysis were constructed and other significant related data were gathered. A website with a user-friendly graphical visualization has been developed for users to access the wealth of data and download prebuilt datasets. Our database is available at http://drugcombdb.denglab.org/.",DrugCombDB,0.996946216,NA,0,DrugCombDB,0.996946216,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25228099,http://drugevar.genomicmedicinealliance.org,"DruGeVar: an online resource triangulating drugs with genes and genomic biomarkers for clinical pharmacogenomics. Background/aims Pharmacogenomics aims to rationalize drug use by minimizing drug toxicity and/or by increasing drug efficacy. A large number of genomic markers have been correlated with variable drug responses and severity of adverse drug reactions. Although a number of these drugs bear pharmacogenomic information in their labels--approved by regulatory agencies--and comprehensive drug/gene lists exist online, information related to the respective pharmacogenomic biomarkers is currently missing from such lists. Methods We extracted information from the published literature and online resources and developed DruGeVar (http://drugevar.genomicmedicinealliance.org), an online resource triangulating drugs with genes and pharmacogenomic biomarkers in an effort to build a comprehensive database that could serve clinical pharmacogenomics. Results and conclusions A user-friendly data querying and visualization interface allows users to formulate simple and complex queries. Such a database would be readily applicable as a stand-alone resource or a plug-in module for other databases.",DruGeVar,0.996403813,NA,0,DruGeVar,0.996403813,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/9/2014 +33787872,http://maayanlab.cloud/drugmonizome,"Drugmonizome and Drugmonizome-ML: integration and abstraction of small molecule attributes for drug enrichment analysis and machine learning. . Understanding the underlying molecular and structural similarities between seemingly heterogeneous sets of drugs can aid in identifying drug repurposing opportunities and assist in the discovery of novel properties of preclinical small molecules. A wealth of information about drug and small molecule structure, targets, indications and side effects; induced gene expression signatures; and other attributes are publicly available through web-based tools, databases and repositories. By processing, abstracting and aggregating information from these resources into drug set libraries, knowledge about novel properties of drugs and small molecules can be systematically imputed with machine learning. In addition, drug set libraries can be used as the underlying database for drug set enrichment analysis. Here, we present Drugmonizome, a database with a search engine for querying annotated sets of drugs and small molecules for performing drug set enrichment analysis. Utilizing the data within Drugmonizome, we also developed Drugmonizome-ML. Drugmonizome-ML enables users to construct customized machine learning pipelines using the drug set libraries from Drugmonizome. To demonstrate the utility of Drugmonizome, drug sets from 12 independent SARS-CoV-2 in vitro screens were subjected to consensus enrichment analysis. Despite the low overlap among these 12 independent in vitro screens, we identified common biological processes critical for blocking viral replication. To demonstrate Drugmonizome-ML, we constructed a machine learning pipeline to predict whether approved and preclinical drugs may induce peripheral neuropathy as a potential side effect. Overall, the Drugmonizome and Drugmonizome-ML resources provide rich and diverse knowledge about drugs and small molecules for direct systems pharmacology applications. Database URL: https://maayanlab.cloud/drugmonizome/.",Drugmonizome,0.990401685,NA,0,Drugmonizome,0.990401685,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +24663501,"http://www.drugpath.org, http://mimi.ncibi.org","DrugPath: a database for academic investigators to match oncology molecular targets with drugs in development. Purpose Academic laboratories are developing increasingly large amounts of data that describe the genomic landscape and gene expression patterns of various types of cancers. Such data can potentially identify novel oncology molecular targets in cancer types that may not be the primary focus of a drug sponsor's initial research for an investigational new drug. Obtaining preclinical data that point toward the potential for a given molecularly targeted agent, or a novel combination of agents requires knowledge of drugs currently in development in both the academic and commercial sectors. Methods We have developed the DrugPath database ( http://www.drugpath.org ) as a comprehensive, free-of-charge resource for academic investigators to identify agents being developed in academics or industry that may act against molecular targets of interest. DrugPath data on molecular targets overlay the Michigan Molecular Interactions ( http://mimi.ncibi.org ) gene-gene interaction map to facilitate identification of related agents in the same pathway. Results The database catalogs 2,081 drug development programs representing 751 drug sponsors and 722 molecular and genetic targets. Conclusions DrugPath should assist investigators in identifying and obtaining drugs acting on specific molecular targets for biological and preclinical therapeutic studies.",DrugPath,0.983626783,NA,0,DrugPath,0.983626783,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/25/2014 +31096089,http://www.drugr.ir,"DrugR+: A comprehensive relational database for drug repurposing, combination therapy, and replacement therapy. Drug repurposing or repositioning, which introduces new applications of the existing drugs, is an emerging field in drug discovery scope. To enhance the success rate of the research and development (R&D) process in a cost- and time-effective manner, a number of pharmaceutical companies worldwide have made tremendous investments. Besides, many researchers have proposed various methods and databases for the repurposing of various drugs. However, there is not a proper and well-organized database available. To this end, for the first time, we developed a new database based on DrugBank and KEGG data, which is named ""DrugR+"". Our developed database provides some advantages relative to the DrugBank, and its interface supplies new capabilities for both single and synthetic repositioning of drugs. Moreover, it includes four new datasets which can be used for predicting drug-target interactions using supervised machine learning methods. As a case study, we introduced novel applications of some drugs and discussed the obtained results. A comparison of several machine learning methods on the generated datasets has also been reported in the Supplementary File. Having included several normalized tables, DrugR√ɬÉ√ǬÇ√ɬÇ√Ǭ†+√ɬÉ√ǬÇ√ɬÇ√Ǭ†has been organized to provide key information on data structures for the repurposing and combining applications of drugs. It provides the SQL query capability for professional users and an appropriate method with different options for unprofessional users. Additionally, DrugR√ɬÉ√ǬÇ√ɬÇ√Ǭ†+√ɬÉ√ǬÇ√ɬÇ√Ǭ†consists of repurposing service that accepts a drug and proposes a list of potential drugs for some usages. Taken all, DrugR+ is a free web-based database and accessible using (http://www.drugr.ir), which can be updated through a map-reduce parallel processing method to provide the most relevant information.",DrugR,0.989119887,NA,0,DrugR,0.989119887,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/8/2019 +28562632,http://biotechlab.fudan.edu.cn/database/drugsig,"DrugSig: A resource for computational drug repositioning utilizing gene expression signatures. Computational drug repositioning has been proved as an effective approach to develop new drug uses. However, currently existing strategies strongly rely on drug response gene signatures which scattered in separated or individual experimental data, and resulted in low efficient outputs. So, a fully drug response gene signatures database will be very helpful to these methods. We collected drug response microarray data and annotated related drug and targets information from public databases and scientific literature. By selecting top 500 up-regulated and down-regulated genes as drug signatures, we manually established the DrugSig database. Currently DrugSig contains more than 1300 drugs, 7000 microarray and 800 targets. Moreover, we developed the signature based and target based functions to aid drug repositioning. The constructed database can serve as a resource to quicken computational drug repositioning. Database URL: http://biotechlab.fudan.edu.cn/database/drugsig/.",DrugSig,0.996384323,NA,0,DrugSig,0.996384323,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/31/2017 +32597467,http://vafaeelab.com/drugSimDB.html,"A comprehensive integrated drug similarity resource for in-silico drug repositioning and beyond. . Drug similarity studies are driven by the hypothesis that similar drugs should display similar therapeutic actions and thus can potentially treat a similar constellation of diseases. Drug-drug similarity has been derived by variety of direct and indirect sources of evidence and frequently shown high predictive power in discovering validated repositioning candidates as well as other in-silico drug development applications. Yet, existing resources either have limited coverage or rely on an individual source of evidence, overlooking the wealth and diversity of drug-related data sources. Hence, there has been an unmet need for a comprehensive resource integrating diverse drug-related information to derive multi-evidenced drug-drug similarities. We addressed this resource gap by compiling heterogenous information for an exhaustive set of small-molecule drugs (total of 10 367 in the current version) and systematically integrated multiple sources of evidence to derive a multi-modal drug-drug similarity network. The resulting database, 'DrugSimDB' currently includes 238 635 drug pairs with significant aggregated similarity, complemented with an interactive user-friendly web interface (http://vafaeelab.com/drugSimDB.html), which not only enables database ease of access, search, filtration and export, but also provides a variety of complementary information on queried drugs and interactions. The integration approach can flexibly incorporate further drug information into the similarity network, providing an easily extendable platform. The database compilation and construction source-code has been well-documented and semi-automated for any-time upgrade to account for new drugs and up-to-date drug information.",DrugSimDB,0.995589495,NA,0,DrugSimDB,0.995589495,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +33104791,http://drugspacex.simm.ac.cn,"DrugSpaceX: a large screenable and synthetically tractable database extending drug space. One of the most prominent topics in drug discovery is efficient exploration of the vast drug-like chemical space to find synthesizable and novel chemical structures with desired biological properties. To address this challenge, we created the DrugSpaceX (https://drugspacex.simm.ac.cn/) database based on expert-defined transformations of approved drug molecules. The current version of DrugSpaceX contains >100 million transformed chemical products for virtual screening, with outstanding characteristics in terms of structural novelty, diversity and large three-dimensional chemical space coverage. To illustrate its practical application in drug discovery, we used a case study of discoidin domain receptor 1 (DDR1), a kinase target implicated in fibrosis and other diseases, to show DrugSpaceX performing a quick search of initial hit compounds. Additionally, for ligand identification and optimization purposes, DrugSpaceX also provides several subsets for download, including a 10% diversity subset, an extended drug-like subset, a drug-like subset, a lead-like subset, and a fragment-like subset. In addition to chemical properties and transformation instructions, DrugSpaceX can locate the position of transformation, which will enable medicinal chemists to easily integrate strategy planning and protection design.",DrugSpaceX,0.995280385,NA,0,DrugSpaceX,0.995280385,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +25990557,http://tanlab.ucdenver.edu/DSigDB,"DSigDB: drug signatures database for gene set analysis. Unlabelled We report the creation of Drug Signatures Database (DSigDB), a new gene set resource that relates drugs/compounds and their target genes, for gene set enrichment analysis (GSEA). DSigDB currently holds 22√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ527 gene sets, consists of 17√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ389 unique compounds covering 19√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ531 genes. We also developed an online DSigDB resource that allows users to search, view and download drugs/compounds and gene sets. DSigDB gene sets provide seamless integration to GSEA software for linking gene expressions with drugs/compounds for drug repurposing and translational research. Availability and implementation DSigDB is freely available for non-commercial use at http://tanlab.ucdenver.edu/DSigDB. Supplementary information Supplementary data are available at Bioinformatics online. Contact aikchoon.tan@ucdenver.edu.",DSigDB,0.996987581,Drug Signatures Database,0.949252993,DSigDB,0.996987581,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/19/2015 +34366563,http://dsld.nlm.nih.gov/dsld,"Modernization of the National Institutes of Health Dietary Supplement Label Database. Launched in 2008, NIH's DSLD (https://dsld.nlm.nih.gov/dsld/) currently catalogs information printed on over 125,000 (historical and current) labels of dietary supplement products sold in the U.S.. The database is maintained and updated continuously, and new versions deployed regularly. The new home page includes a prominent search bar and counter that displays the number of searchable labels in the database. The redesigned website yields near-instantaneous label retrieval, a more attractive layout of information, tailored search filters and download options, and the ability to view data in pictorial formats resulting in a much-improved user experience. The modernization of the DSLD ensures that this NIH resource has new forms of data delivery to meet the needs of App developers and data scientists, and improved performance for users. The DSLD is updated frequently to reflect the products sold in the rapidly evolving U.S. dietary supplement market.",DSLD,0.982116222,Supplement Label Database,0.595839878,DSLD,0.982116222,1,NA,30774152,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,6/25/2021 +30774152,http://dsld.nlm.nih.gov,"Using the Dietary Supplement Label Database to Identify Potentially Harmful Dietary Supplement Ingredients. Over half of young adults, athletes, and Military Service Members self-report using at least one dietary supplement (DS) 1 or more times per week. DS may be consumed to improve health, provide more energy, increase muscle strength, and/or enhance performance. The United States Food and Drug Administration (FDA) has raised concerns regarding adulteration, safety, and adverse events associated with DS marketed for brain health and bodybuilding. Some DS products may compromise health as well as lead to a serious adverse event. The National Institutes of Health (NIH) Dietary Supplement Label Database (DSLD), available at https://dsld.nlm.nih.gov/, can be freely accessed and used by researchers, providers, and consumers alike to screen for potentially harmful DS. It was developed to serve the research community and as a resource for health care providers and the public. Herein we provide two examples of how the database can be used to identify DS ingredients of concern in products marketed for brain health and bodybuilding. The search for DS marketed for brain health returned 49 unique DS, and the search on DS marketed for bodybuilding returned 18 unique DS. Search results were cross-referenced with the Operation Supplement Safety High-Risk Supplement List, the FDA Tainted Products Marketed as Dietary Supplements list, the Natural Medicines database, and NIH Office of Dietary Supplements Fact Sheets. Three ingredients found in DS marketed for brain health and two ingredients in DS marketed for bodybuilding were identified as ""of concern"". Educational tools, including the DSLD, can help consumers and providers make informed decisions regarding DS.",DSLD,0.953341067,Dietary Supplement Label Database,0.795074418,DSLD,0.953341067,1,NA,34366563,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,9/1/2018 +30380071,http://dsmnc.big.ac.cn,"DSMNC: a database of somatic mutations in normal cells. Numerous non-inherited somatic mutations, distinct from those of germ-line origin, occur in somatic cells during DNA replication per cell-division. The somatic mutations, recording the unique genetic cell-lineage 'history' of each proliferating normal cell, are important but remain to be investigated because of their ultra-low frequency hidden in the genetic background of heterogeneous cells. Luckily, the recent development of single-cell genomics biotechnologies enables the screening and collection of the somatic mutations, especial single nucleotide variations (SNVs), occurring in normal cells. Here, we established DSMNC: a database of somatic mutations in normal cells (http://dsmnc.big.ac.cn/), which provides most comprehensive catalogue of somatic SNVs in single cells from various normal tissues. In the current version, the database collected √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº0.8 million SNVs accumulated in √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº600 single normal cells (579 human cells and 39 mouse cells). The database interface supports the user-friendly capability of browsing and searching the SNVs and their annotation information. DSMNC, which serves as a timely and valuable collection of somatic mutations in individual normal cells, has made it possible to analyze the burdens and signatures of somatic mutations in various types of heterogeneous normal cells. Therefore, DSMNC will significantly improve our understanding of the characteristics of somatic mutations in normal cells.",DSMNC,0.989300251,NA,0,DSMNC,0.989300251,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33426407,http://comptox.epa.gov/dashboard,"EPA's DSSTox database: History of development of a curated chemistry resource supporting computational toxicology research. . The US Environmental Protection Agency's (EPA) Distributed Structure-Searchable Toxicity (DSSTox) database, launched publicly in 2004, currently exceeds 875 K substances spanning hundreds of lists of interest to EPA and environmental researchers. From its inception, DSSTox has focused curation efforts on resolving chemical identifier errors and conflicts in the public domain towards the goal of assigning accurate chemical structures to data and lists of importance to the environmental research and regulatory community. Accurate structure-data associations, in turn, are necessary inputs to structure-based predictive models supporting hazard and risk assessments. In 2014, the legacy, manually curated DSSTox_V1 content was migrated to a MySQL data model, with modern cheminformatics tools supporting both manual and automated curation processes to increase efficiencies. This was followed by sequential auto-loads of filtered portions of three public datasets: EPA's Substance Registry Services (SRS), the National Library of Medicine's ChemID, and PubChem. This process was constrained by a key requirement of uniquely mapped identifiers (i.e., CAS RN, name and structure) for each substance, rejecting content where any two identifiers were conflicted either within or across datasets. This rejected content highlighted the degree of conflicting, inaccurate substance-structure ID mappings in the public domain, ranging from 12% (within EPA SRS) to 49% (across ChemID and PubChem). Substances successfully added to DSSTox from each auto-load were assigned to one of five qc_levels, conveying curator confidence in each dataset. This process enabled a significant expansion of DSSTox content to provide better coverage of the chemical landscape of interest to environmental scientists, while retaining focus on the accuracy of substance-structure-data associations. Currently, DSSTox serves as the core foundation of EPA's CompTox Chemicals Dashboard [https://comptox.epa.gov/dashboard], which provides public access to DSSTox content in support of a broad range of modeling and research activities within EPA and, increasingly, across the field of computational toxicology.",DSSTox,0.9958359,Toxicity,0.549814343,DSSTox,0.9958359,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2019 +26909679,http://bi.sky.zstu.edu.cn/DsTRD/home.php,"DsTRD: Danshen Transcriptional Resource Database. Salvia miltiorrhiza has been comprehensively studied as a medicinal model plant. However, research progress on this species is significantly hindered by its unavailable genome sequences and limited number of expressed sequence tags in the National Center for Biotechnology Information database. Thus, a transcript database must be developed to assist researchers to browse, search, and align sequences for gene cloning and functional analysis in S. miltiorrhiza. In this study, the Danshen Transcriptional Resource Database (DsTRD) was built using 76,531 transcribed sequences assembled from 12 RNA-Seq transcriptomes. Among these 12 RNA-seq data, ten were downloaded from NCBI database. The remaining two were enced on the Hiseq2000 platform using the stem and hairy-root of S. miltiorrhiza. The transcripts were annotated as protein-coding RNAs, long non-coding RNAs, microRNA precursors, and phased secondary small-interfering RNA genes through several bioinformatics methods. The tissue expression levels for each transcript were also calculated and presented in terms of RNA-Seq data. Overall, DsTRD facilitates browsing and searching for sequences and functional annotations of S. miltiorrhiza. DsTRD is freely available at http://bi.sky.zstu.edu.cn/DsTRD/home.php.",DsTRD,0.992711484,Danshen Transcriptional Resource Database,0.94311095,DsTRD,0.992711484,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/24/2016 +31560645,http://www.cdc.gov/nccdphp/dnpao/data-trends-maps/index.html,"Data for Decision-Making: Exploring the Division of Nutrition, Physical Activity, and Obesity's Data, Trends, and Maps. Public health practitioners need quick and easy access to reliable surveillance data to monitor states' progress over time, compare benchmarks nationally or among states, and make strategic decisions about priorities and resources. Data, Trends, and Maps (DTM) at https://www.cdc.gov/nccdphp/dnpao/data-trends-maps/index.html is a free, online interactive database that houses and displays data on nutrition, physical activity, breastfeeding, and obesity that practitioners can use for public health action. Created in 2015 by the Centers for Disease Control and Prevention's (CDC) Division of Nutrition, Physical Activity, and Obesity, DTM was updated and relaunched in April 2017 with the capability to customize and download data sets directly; DTM also has other user-friendly features, such as visualization options. Since its relaunch, DTM has received more than 386,000 page views from approximately 110,000 unique visitors. However, the potential exists for more widespread use of DTM if more public health practitioners understood what the site offered and how others have used it in the field. Here, we explain how public health practitioners can explore the most recent state-level data on nutrition, physical activity, breastfeeding, and obesity and use this data to inform programmatic and policy efforts to prevent and control chronic diseases. We demonstrate 3 different ways practitioners can visualize data (ie, Explore by Location, Explore by Topic, and the Open Data Portal) and present 3 real-world examples to highlight DTM's utility as a public health tool.",DTM,0.971463203,Trends,0.880578995,DTM,0.971463203,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/26/2019 +33084904,http://www.tartaglialab.com/dualseq,"DualSeqDB: the host-pathogen dual RNA sequencing database for infection processes. Despite antibiotic resistance being a matter of growing concern worldwide, the bacterial mechanisms of pathogenesis remain underexplored, restraining our ability to develop new antimicrobials. The rise of high-throughput sequencing technology has made available a massive amount of transcriptomic data that could help elucidate the mechanisms underlying bacterial infection. Here, we introduce the DualSeqDB database, a resource that helps the identification of gene transcriptional changes in both pathogenic bacteria and their natural hosts upon infection. DualSeqDB comprises nearly 300√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 entries from eight different studies, with information on bacterial and host differential gene expression under in vivo and in vitro conditions. Expression data values were calculated entirely from raw data and analyzed through a standardized pipeline to ensure consistency between different studies. It includes information on seven different strains of pathogenic bacteria and a variety of cell types and tissues in Homo sapiens, Mus musculus and Macaca fascicularis at different time points. We envisage that DualSeqDB can help the research community in the systematic characterization of genes involved in host infection and help the development and tailoring of new molecules against infectious diseases. DualSeqDB is freely available at http://www.tartaglialab.com/dualseq.",DualSeqDB,0.997340143,NA,0,DualSeqDB,0.997340143,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +26019122,http://coins.mrn.org/dx,"COINS Data Exchange: An open platform for compiling, curating, and disseminating neuroimaging data. Neuroimaging data collection is inherently expensive. Maximizing the return on investment in neuroimaging studies requires that neuroimaging data be re-used whenever possible. In an effort to further scientific knowledge, the COINS Data Exchange (DX) (http://coins.mrn.org/dx) aims to make data sharing seamless and commonplace. DX takes a three-pronged approach towards improving the overall state of data sharing within the neuroscience community. The first prong is compiling data into one location that has been collected from all over the world in many different formats. The second prong is curating the data so that it can be stored in one consistent format and so that data QA/QC measures can be assured. The third prong is disseminating the data so that it is easy to consume and straightforward to interpret. This paper explains the concepts behind each prong and describes some challenges and successes that the Data Exchange has experienced.",DX,0.759305418,COINS Data Exchange,0.739541188,DX,0.759305418,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/24/2015 +28575155,http://bioinfo.ibp.ac.cn/Dynamic-BM,"Dynamic-BM: multispecies Dynamic BodyMap database from temporal RNA-seq data. Biological processes, especially developmental processes, are often dynamic. Previous BodyMap projects for human and mouse have provided researchers with portals to tissue-specific gene expression, but these efforts have not included dynamic gene expression patterns. Over the past few years, substantial progress in our understanding of the molecular mechanisms of protein-coding and long noncoding RNA (lncRNA) genes in development processes has been achieved through numerous time series RNA sequencing (RNA-seq) studies. However, none of the existing databases focuses on these time series data, thus rendering the exploration of dynamic gene expression patterns inconvenient. Here, we present Dynamic BodyMap (Dynamic-BM), a database for temporal gene expression profiles, obtained from 2203 time series of RNA-seq samples, covering >25 tissues from five species. Dynamic-BM has a user-friendly Web interface designed for browsing and searching the dynamic expression pattern of genes from different sources. It is an open resource for efficient data exploration, providing dynamic expression profiles of both protein-coding genes and lncRNAs to facilitate the generation of new hypotheses in developmental biology research. Additionally, Dynamic-BM includes a literature-based knowledgebase for lncRNAs associated with tissue development and a list of manually selected lncRNA candidates that may be involved in tissue development. Dynamic-BM is available at http://bioinfo.ibp.ac.cn/Dynamic-BM.",Dynamic-BM,0.985531533,NA,0,Dynamic-BM,0.985531533,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2018 +22539672,http://chemoinfo.ipmc.cnrs.fr/e-drug3d.html,"e-Drug3D: 3D structure collections dedicated to drug repurposing and fragment-based drug design. Motivation In the drug discovery field, new uses for old drugs, selective optimization of side activities and fragment-based drug design (FBDD) have proved to be successful alternatives to high-throughput screening. e-Drug3D is a database of 3D chemical structures of drugs that provides several collections of ready-to-screen SD files of drugs and commercial drug fragments. They are natural inputs in studies dedicated to drug repurposing and FBDD. Availability e-Drug3D collections are freely available at http://chemoinfo.ipmc.cnrs.fr/e-drug3d.html either for download or for direct in silico web-based screenings.",e-Drug3D,0.993923992,NA,0,e-Drug3D,0.993923992,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/26/2012 +27766955,http://www.mypeg.info/egrasp,"e-GRASP: an integrated evolutionary and GRASP resource for exploring disease associations. Background Genome-wide association studies (GWAS) have become a mainstay of biological research concerned with discovering genetic variation linked to phenotypic traits and diseases. Both discrete and continuous traits can be analyzed in GWAS to discover associations between single nucleotide polymorphisms (SNPs) and traits of interest. Associations are typically determined by estimating the significance of the statistical relationship between genetic loci and the given trait. However, the prioritization of bona fide, reproducible genetic associations from GWAS results remains a central challenge in identifying genomic loci underlying common complex diseases. Evolutionary-aware meta-analysis of the growing GWAS literature is one way to address this challenge and to advance from association to causation in the discovery of genotype-phenotype relationships. Description We have created an evolutionary GWAS resource to enable in-depth query and exploration of published GWAS results. This resource uses the publically available GWAS results annotated in the GRASP2 database. The GRASP2 database includes results from 2082 studies, 177 broad phenotype categories, and ~8.87 million SNP-phenotype associations. For each SNP in e-GRASP, we present information from the GRASP2 database for convenience as well as evolutionary information (e.g., rate and timespan). Users can, therefore, identify not only SNPs with highly significant phenotype-association P-values, but also SNPs that are highly replicated and/or occur at evolutionarily conserved sites that are likely to be functionally important. Additionally, we provide an evolutionary-adjusted SNP association ranking (E-rank) that uses cross-species evolutionary conservation scores and population allele frequencies to transform P-values in an effort to enhance the discovery of SNPs with a greater probability of biologically meaningful disease associations. Conclusion By adding an evolutionary dimension to the GWAS results available in the GRASP2 database, our e-GRASP resource will enable a more effective exploration of SNPs not only by the statistical significance of trait associations, but also by the number of studies in which associations have been replicated, and the evolutionary context of the associated mutations. Therefore, e-GRASP will be a valuable resource for aiding researchers in the identification of bona fide, reproducible genetic associations from GWAS results. This resource is freely available at http://www.mypeg.info/egrasp .",e-GRASP,0.995206547,NA,0,e-GRASP,0.995206547,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/17/2016 +27153640,http://www.sheba-cancer.org.il/e23D,"e23D: database and visualization of A-to-I RNA editing sites mapped to 3D protein structures. Unlabelled e23D, a database of A-to-I RNA editing sites from human, mouse and fly mapped to evolutionary related protein 3D structures, is presented. Genomic coordinates of A-to-I RNA editing sites are converted to protein coordinates and mapped onto 3D structures from PDB or theoretical models from ModBase. e23D allows visualization of the protein structure, modeling of recoding events and orientation of the editing with respect to nearby genomic functional sites from databases of disease causing mutations and genomic polymorphism. Availability and implementation http://www.sheba-cancer.org.il/e23D CONTACT: oz.solomon@live.biu.ac.il or Eran.Eyal@sheba.health.gov.il.",e23D,0.991435468,NA,0,e23D,0.991435468,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/19/2016 +23897986,http://www.fda.gov/ScienceResearch/BioinformaticsTools/EstrogenicActivityDatabaseEADB/default.htm,"EADB: an estrogenic activity database for assessing potential endocrine activity. Endocrine-active chemicals can potentially have adverse effects on both humans and wildlife. They can interfere with the body's endocrine system through direct or indirect interactions with many protein targets. Estrogen receptors (ERs) are one of the major targets, and many endocrine disruptors are estrogenic and affect the normal estrogen signaling pathways. However, ERs can also serve as therapeutic targets for various medical conditions, such as menopausal symptoms, osteoporosis, and ER-positive breast cancer. Because of the decades-long interest in the safety and therapeutic utility of estrogenic chemicals, a large number of chemicals have been assayed for estrogenic activity, but these data exist in various sources and different formats that restrict the ability of regulatory and industry scientists to utilize them fully for assessing risk-benefit. To address this issue, we have developed an Estrogenic Activity Database (EADB; http://www.fda.gov/ScienceResearch/BioinformaticsTools/EstrogenicActivityDatabaseEADB/default.htm) and made it freely available to the public. EADB contains 18,114 estrogenic activity data points collected for 8212 chemicals tested in 1284 binding, reporter gene, cell proliferation, and in vivo assays in 11 different species. The chemicals cover a broad chemical structure space and the data span a wide range of activities. A set of tools allow users to access EADB and evaluate potential endocrine activity of chemicals. As a case study, a classification model was developed using EADB for predicting ER binding of chemicals.",EADB,0.976858467,Estrogenic Activity Database,0.930413914,EADB,0.976858467,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/28/2013 +33683565,http://healthy-worm-database.eu,"Health and longevity studies in C. elegans: the ""healthy worm database"" reveals strengths, weaknesses and gaps of test compound-based studies. Several biogerontology databases exist that focus on genetic or gene expression data linked to health as well as survival, subsequent to compound treatments or genetic manipulations in animal models. However, none of these has yet collected experimental results of compound-related health changes. Since quality of life is often regarded as more valuable than length of life, we aim to fill this gap with the ""Healthy Worm Database"" ( http://healthy-worm-database.eu ). Literature describing health-related compound studies in the aging model Caenorhabditis elegans was screened, and data for 440 compounds collected. The database considers 189 publications describing 89 different phenotypes measured in 2995 different conditions. Besides enabling a targeted search for promising compounds for further investigations, this database also offers insights into the research field of studies on healthy aging based on a frequently used model organism. Some weaknesses of C. elegans-based aging studies, like underrepresented phenotypes, especially concerning cognitive functions, as well as the convenience-based use of young worms as the starting point for compound treatment or phenotype measurement are discussed. In conclusion, the database provides an anchor for the search for compounds affecting health, with a link to public databases, and it further highlights some potential shortcomings in current aging research.",ealthy,0.504844904,NA,0,ealthy,0.504844904,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/8/2021 +32964659,http://african-compounds.org,"Pharmacoinformatic Investigation of Medicinal Plants from East Africa. Medicinal plants have widely been used in the traditional treatment of ailments and have been proven effective. Their contribution still holds an important place in modern drug discovery due to their chemical, and biological diversities. However, the poor documentation of traditional medicine, in developing African countries for instance, can lead to the loss of knowledge related to such practices. In this study, we present the Eastern Africa Natural Products Database (EANPDB) containing the structural and bioactivity information of 1870 unique molecules isolated from about 300 source species from the Eastern African region. This represents the largest collection of natural products (NPs) from this geographical region, covering literature data of the period from 1962 to 2019. The computed physicochemical properties and toxicity profiles of each compound have been included. A comparative analysis of some physico-chemical properties like molecular weight, H-bond donor/acceptor, logPo/w , etc. as well scaffold diversity analysis has been carried out with other published NP databases. EANPDB was combined with the previously published Northern African Natural Products Database (NANPDB), to form a merger African Natural Products Database (ANPDB), containing √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº6500 unique molecules isolated from about 1000 source species (freely available at http://african-compounds.org). As a case study, latrunculins A and B isolated from the sponge Negombata magnifica (Podospongiidae) with previously reported antitumour activities, were identified via substructure searching as molecules to be explored as putative binders of histone deacetylases (HDACs).",EANPDB,0.993993628,Eastern Africa Natural Products Database,0.975782382,EANPDB,0.993993628,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/8/2020 +26582919,http://www.ebi.ac.uk/metagenomics,"EBI metagenomics in 2016--an expanding and evolving resource for the analysis and archiving of metagenomic data. EBI metagenomics (https://www.ebi.ac.uk/metagenomics/) is a freely available hub for the analysis and archiving of metagenomic and metatranscriptomic data. Over the last 2 years, the resource has undergone rapid growth, with an increase of over five-fold in the number of processed samples and consequently represents one of the largest resources of analysed shotgun metagenomes. Here, we report the status of the resource in 2016 and give an overview of new developments. In particular, we describe updates to data content, a complete overhaul of the analysis pipeline, streamlining of data presentation via the website and the development of a new web based tool to compare functional analyses of sequence runs within a study. We also highlight two of the higher profile projects that have been analysed using the resource in the last year: the oceanographic projects Ocean Sampling Day and Tara Oceans.",EBI metagenomics,0.739527357,NA,0,EBI metagenomics,0.739527357,1,31696235,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,11/17/2015 +34015403,http://ebola.bicpu.edu.in,"Ebolabase: Zaire ebolavirus-human protein interaction database for drug-repurposing. Ebola Virus (EBOV) is one of the deadliest pathogenic virus which causes hemorrhagic fever. Though many Ebola-human interaction studies and databases are already reported, the unavailability of an adequate model and lack of publically accessible resources requires a comprehensive study to curate the Ebola-Human-Drug interactions. In total, 270 human proteins interacted with EBOV are collected from published experimental evidence. Then the protein-protein interaction networks are generated as EBOV-human and EBOV-Human-Drugs interaction. These results can help the researcher to find the effective repurposed drug for EBOV treatment. Further, the illustration of gene enrichment and pathway analysis would provide knowledge and insight of EBOV-human interaction describes the importance of the study. Investigating the networks may help to identify a suitable human-based drug target for ebola research community. The inclusion of an emerging concept, a human-based drug targeted therapy plays a very significant role in drug repurposing which reduces the time and effort is the highlight of the current research. An integrated database namely, Ebolabase has been developed and linked with other repositories such as Epitopes, Structures, Literature, Genomics and Proteomics. All generated networks are made to be viewed in a customized manner and the required data can be downloaded freely. The Ebolabase is available at http://ebola.bicpu.edu.in.",Ebolabase,0.985048413,NA,0,Ebolabase,0.985048413,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/17/2021 +32632099,http://kg.ebrains.eu,"Database of literature derived cellular measurements from the murine basal ganglia. Quantitative measurements and descriptive statistics of different cellular elements in the brain are typically published in journal articles as text, tables, and example figures, and represent an important basis for the creation of biologically constrained computational models, design of intervention studies, and comparison of subject groups. Such data can be challenging to extract from publications and difficult to normalise and compare across studies, and few studies have so far attempted to integrate quantitative information available in journal articles. We here present a database of quantitative information about cellular parameters in the frequently studied murine basal ganglia. The database holds a curated and normalised selection of currently available data collected from the literature and public repositories, providing the most comprehensive collection of quantitative neuroanatomical data from the basal ganglia to date. The database is shared as a downloadable resource from the EBRAINS Knowledge Graph (https://kg.ebrains.eu), together with a workflow that allows interested researchers to update and expand the database with data from future reports.",EBRAINS,0.80999589,NA,0,EBRAINS,0.80999589,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/6/2020 +32646415,http://www.phoc.org.cn/ECCParaCorp,"ECCParaCorp: a cross-lingual parallel corpus towards cancer education, dissemination and application. Background The increasing global cancer incidence corresponds to serious health impact in countries worldwide. Knowledge-powered health system in different languages would enhance clinicians' healthcare practice, patients' health management and public health literacy. High-quality corpus containing cancer information is the necessary foundation of cancer education. Massive non-structural information resources exist in clinical narratives, electronic health records (EHR) etc. They can only be used for training AI models after being transformed into structured corpus. However, the scarcity of multilingual cancer corpus limits the intelligent processing, such as machine translation in medical scenarios. Thus, we created the cancer specific cross-lingual corpus and open it to the public for academic use. Methods Aiming to build an English-Chinese cancer parallel corpus, we developed a workflow of seven steps including data retrieval, data parsing, data processing, corpus implementation, assessment verification, corpus release, and application. We applied the workflow to a cross-lingual, comprehensive and authoritative cancer information resource, PDQ (Physician Data Query). We constructed, validated and released the parallel corpus named as ECCParaCorp, made it openly accessible online. Results The proposed English-Chinese Cancer Parallel Corpus (ECCParaCorp) consists of 6685 aligned text pairs in Xml, Excel, Csv format, containing 5190 sentence pairs, 1083 phrase pairs and 412 word pairs, which involved information of 6 cancers including breast cancer, liver cancer, lung cancer, esophageal cancer, colorectal cancer, and stomach cancer, and 3 cancer themes containing cancer prevention, screening, and treatment. All data in the parallel corpus are online, available for users to browse and download ( http://www.phoc.org.cn/ECCParaCorp/ ). Conclusions ECCParaCorp is a parallel corpus focused on cancer in a cross-lingual form, which is openly accessible. It would make up the imbalance of scarce multilingual corpus resources, bridge the gap between human readable information and machine understanding data resources, and would contribute to intelligent technology application as a preparatory data foundation e.g. cancer-related machine translation, cancer system development towards medical education, and disease-oriented knowledge extraction.",ECCParaCorp,0.992599487,Physician Data Query,0.659761125,ECCParaCorp,0.992599487,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/9/2020 +22828716,http://www.ecgview.org,"Construction of an open-access QT database for detecting the proarrhythmia potential of marketed drugs: ECG-ViEW. Information about the QT interval from surface electrocardiograms (ECGs) is essential for surveillance of the proarrhythmia potential of marketed drugs. However, ECG records obtained in daily practice cannot be easily used for this purpose without labor-intensive manual effort. This study was aimed at constructing an open-access QT database, the Electrocardiogram Vigilance with Electronic Data Warehouse (ECG-ViEW). This longitudinal observational database contains 710,369 measurements of QT and associated clinical data from 371,401 patients. The de-identified database is freely available at http://www.ecgview.org.",ECG-ViEW,0.982872033,Electrocardiogram Vigilance with,0.866497644,ECG-ViEW,0.982872033,1,28437484,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,7/25/2012 +28437484,http://www.ecgview.org,"ECG-ViEW II, a freely accessible electrocardiogram database. The Electrocardiogram Vigilance with Electronic data Warehouse II (ECG-ViEW II) is a large, single-center database comprising numeric parameter data of the surface electrocardiograms of all patients who underwent testing from 1 June 1994 to 31 July 2013. The electrocardiographic data include the test date, clinical department, RR interval, PR interval, QRS duration, QT interval, QTc interval, P axis, QRS axis, and T axis. These data are connected with patient age, sex, ethnicity, comorbidities, age-adjusted Charlson comorbidity index, prescribed drugs, and electrolyte levels. This longitudinal observational database contains 979,273 electrocardiograms from 461,178 patients over a 19-year study period. This database can provide an opportunity to study electrocardiographic changes caused by medications, disease, or other demographic variables. ECG-ViEW II is freely available at http://www.ecgview.org.",ECG-ViEW II,0.971217104,Electrocardiogram Vigilance with Electronic data Warehouse II,0.880449582,ECG-ViEW II,0.971217104,1,22828716,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,4/24/2017 +26699919,http://ecgene.bioinfo-minzhao.org,"ECGene: A Literature-Based Knowledgebase of Endometrial Cancer Genes. Endometrial cancer (EC) ranks as the sixth common cancer for women worldwide. To better distinguish cancer subtypes and identify effective early diagnostic biomarkers, we need improved understanding of the biological mechanisms associated with EC dysregulated genes. Although there is a wealth of clinical and molecular information relevant to EC in the literature, there has been no systematic summary of EC-implicated genes. In this study, we developed a literature-based database ECGene (Endometrial Cancer Gene database) with comprehensive annotations. ECGene features manual curation of 414 genes from thousands of publications, results from eight EC gene expression datasets, precomputation of coexpressed long noncoding RNAs, and an EC-implicated gene interactome. In the current release, we generated and comprehensively annotated a list of 458 EC-implicated genes. We found the top-ranked EC-implicated genes are frequently mutated in The Cancer Genome Atlas (TCGA) tumor samples. Furthermore, systematic analysis of coexpressed lncRNAs provided insight into the important roles of lncRNA in EC development. ECGene has a user-friendly Web interface and is freely available at http://ecgene.bioinfo-minzhao.org/. As the first literature-based online resource for EC, ECGene serves as a useful gateway for researchers to explore EC genetics.",ECGene,0.994564354,Endometrial Cancer Gene database,0.8197405,ECGene,0.994564354,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/13/2016 +34010390,http://echinobase.org,"Integration of 1:1 orthology maps and updated datasets into Echinobase. . Echinobase (https://echinobase.org) is a central online platform that generates, manages and hosts genomic data relevant to echinoderm research. While the resource primarily serves the echinoderm research community, the recent release of an excellent quality genome for the frequently studied purple sea urchin (Strongylocentrotus purpuratus genome, v5.0) has provided an opportunity to adapt to the needs of a broader research community across other model systems. To this end, establishing pipelines to identify orthologous genes between echinoderms and other species has become a priority in many contexts including nomenclature, linking to data in other model organisms, and in internal functionality where data gathered in one hosted species can be associated with genes in other hosted echinoderms. This paper describes the orthology pipelines currently employed by Echinobase and how orthology data are processed to yield 1:1 ortholog mappings between a variety of echinoderms and other model taxa. We also describe functions of interest that have recently been included on the resource, including an updated developmental time course for S.purpuratus, and additional tracks for genome browsing. These data enhancements will increase the accessibility of the resource to non-echinoderm researchers and simultaneously expand the data quality and quantity available to core Echinobase users. Database URL: https://echinobase.org.",Echinobase,0.997653604,NA,0,Echinobase,0.997653604,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +26800861,"http://echinotol.org, http://echinodb.uncc.edu","EchinoDB, an application for comparative transcriptomics of deeply-sampled clades of echinoderms. Background One of our goals for the echinoderm tree of life project (http://echinotol.org) is to identify orthologs suitable for phylogenetic analysis from next-generation transcriptome data. The current dataset is the largest assembled for echinoderm phylogeny and transcriptomics. We used RNA-Seq to profile adult tissues from 42 echinoderm specimens from 24 orders and 37 families. In order to achieve sampling members of clades that span key evolutionary divergence, many of our exemplars were collected from deep and polar seas. Description A small fraction of the transcriptome data we produced is being used for phylogenetic reconstruction. Thus to make a larger dataset available to researchers with a wide variety of interests, we made a web-based application, EchinoDB (http://echinodb.uncc.edu). EchinoDB is a repository of orthologous transcripts from echinoderms that is searchable via keywords and sequence similarity. Conclusions From transcripts we identified 749,397 clusters of orthologous loci. We have developed the information technology to manage and search the loci their annotations with respect to the Sea Urchin (Strongylocentrotus purpuratus) genome. Several users have already taken advantage of these data for spin-off projects in developmental biology, gene family studies, and neuroscience. We hope others will search EchinoDB to discover datasets relevant to a variety of additional questions in comparative biology.",EchinoDB,0.992635667,NA,0,EchinoDB,0.992635667,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/22/2016 +"23109553, 26481353",http://www.ecmdb.ca,"ECMDB: the E. coli Metabolome Database. The Escherichia coli Metabolome Database (ECMDB, http://www.ecmdb.ca) is a comprehensively annotated metabolomic database containing detailed information about the metabolome of E. coli (K-12). Modelled closely on the Human and Yeast Metabolome Databases, the ECMDB contains >2600 metabolites with links to √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº1500 different genes and proteins, including enzymes and transporters. The information in the ECMDB has been collected from dozens of textbooks, journal articles and electronic databases. Each metabolite entry in the ECMDB contains an average of 75 separate data fields, including comprehensive compound descriptions, names and synonyms, chemical taxonomy, compound structural and physicochemical data, bacterial growth conditions and substrates, reactions, pathway information, enzyme data, gene/protein sequence data and numerous hyperlinks to images, references and other public databases. The ECMDB also includes an extensive collection of intracellular metabolite concentration data compiled from our own work as well as other published metabolomic studies. This information is further supplemented with thousands of fully assigned reference nuclear magnetic resonance and mass spectrometry spectra obtained from pure E. coli metabolites that we (and others) have collected. Extensive searching, relational querying and data browsing tools are also provided that support text, chemical structure, spectral, molecular weight and gene/protein sequence queries. Because of E. coli's importance as a model organism for biologists and as a biofactory for industry, we believe this kind of database could have considerable appeal not only to metabolomics researchers but also to molecular biologists, systems biologists and individuals in the biotechnology industry.",ECMDB,0.995682538,coli Metabolome Database,0.95562607,ECMDB,0.995682538,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/19/2015 +24569102,http://eco.iarc.fr,"The European Cancer Observatory: A new data resource. Population-based cancer registries provide indispensable information on cancer incidence and survival, which cannot be obtained by any other means. It is clear that complete and effective use of these data is essential for cancer control, but sharing this information in a uniform, timely and user-friendly manner has been somewhat limited up to now. The European Cancer Observatory (ECO, http://eco.iarc.fr) has been developed in the framework of the EUROCOURSE project (EUROpe against Cancer: Optimisation of Use of Registries for Scientific Excellence in Research) as a comprehensive resource combining all the information currently available in Europe on cancer incidence, mortality, survival and prevalence. The website provides analytical and presentation tools to examine national estimates for 2012 in 40 European countries (EUCAN), data for 130 national or sub-national areas covered by cancer registries for up to 60 years, until 2011 (EUREG) and a planned mechanism for data download (European Cancer Incidence and Mortality (EUROCIM)). The generated statistics outline the considerable variability across Europe in the rates of all major cancer types and help identify key concerns that need to be addressed by public health policies e.g. the unprecedented rise of lung cancer incidence in women with its full impact expected within a decade or so. The support, maintenance and further development of the ECO website should be a high priority for European cancer policymakers, to continue providing this unique information to health professionals, researchers and the general public in Europe and beyond.",ECO,0.859230498,European Cancer Observatory,0.629971464,ECO,0.859230498,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/22/2014 +23143106,http://EcoCyc.org,"EcoCyc: fusing model organism databases with systems biology. EcoCyc (http://EcoCyc.org) is a model organism database built on the genome sequence of Escherichia coli K-12 MG1655. Expert manual curation of the functions of individual E. coli gene products in EcoCyc has been based on information found in the experimental literature for E. coli K-12-derived strains. Updates to EcoCyc content continue to improve the comprehensive picture of E. coli biology. The utility of EcoCyc is enhanced by new tools available on the EcoCyc web site, and the development of EcoCyc as a teaching tool is increasing the impact of the knowledge collected in EcoCyc.",EcoCyc,0.992975175,NA,0,EcoCyc,0.992975175,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/9/2012 +25352573,http://www.casper.organ.su.se/ECODAB,"Development of the ECODAB into a relational database for Escherichia coli O-antigens and other bacterial polysaccharides. Escherichia coli O-antigen database (ECODAB) is a web-based application to support the collection of E. coli O-antigen structures, polymerase and flippase amino acid sequences, NMR chemical shift data of O-antigens as well as information on glycosyltransferases (GTs) involved in the assembly of O-antigen polysaccharides. The database content has been compiled from scientific literature. Furthermore, the system has evolved from being a repository to one that can be used for generating novel data on its own. GT specificity is suggested through sequence comparison with GTs whose function is known. The migration of ECODAB to a relational database has allowed the automation of all processes to update, retrieve and present information, thereby, endowing the system with greater flexibility and improved overall performance. ECODAB is freely available at http://www.casper.organ.su.se/ECODAB/. Currently, data on 169 E. coli unique O-antigen entries and 338 GTs is covered. Moreover, the scope of the database has been extended so that polysaccharide structure and related information from other bacteria subsequently can be added, for example, from Streptococcus pneumoniae.",ECODAB,0.991039038,Escherichia coli O-antigen database,0.911410725,ECODAB,0.991039038,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2014 +23197660,http://ecogene.org,"EcoGene 3.0. EcoGene (http://ecogene.org) is a database and website devoted to continuously improving the structural and functional annotation of Escherichia coli K-12, one of the most well understood model organisms, represented by the MG1655(Seq) genome sequence and annotations. Major improvements to EcoGene in the past decade include (i) graphic presentations of genome map features; (ii) ability to design Boolean queries and Venn diagrams from EcoArray, EcoTopics or user-provided GeneSets; (iii) the genome-wide clone and deletion primer design tool, PrimerPairs; (iv) sequence searches using a customized EcoBLAST; (v) a Cross Reference table of synonymous gene and protein identifiers; (vi) proteome-wide indexing with GO terms; (vii) EcoTools access to >2000 complete bacterial genomes in EcoGene-RefSeq; (viii) establishment of a MySql relational database; and (ix) use of web content management systems. The biomedical literature is surveyed daily to provide citation and gene function updates. As of September 2012, the review of 37 397 abstracts and articles led to creation of 98 425 PubMed-Gene links and 5415 PubMed-Topic links. Annotation updates to Genbank U00096 are transmitted from EcoGene to NCBI. Experimental verifications include confirmation of a CTG start codon, pseudogene restoration and quality assurance of the Keio strain collection.",EcoGene,0.984577298,NA,0,EcoGene,0.984577298,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2012 +25650278,http://www.inetbio.org/ecolinet,"EcoliNet: a database of cofunctional gene network for Escherichia coli. . During the past several decades, Escherichia coli has been a treasure chest for molecular biology. The molecular mechanisms of many fundamental cellular processes have been discovered through research on this bacterium. Although much basic research now focuses on more complex model organisms, E. coli still remains important in metabolic engineering and synthetic biology. Despite its long history as a subject of molecular investigation, more than one-third of the E. coli genome has no pathway annotation supported by either experimental evidence or manual curation. Recently, a network-assisted genetics approach to the efficient identification of novel gene functions has increased in popularity. To accelerate the speed of pathway annotation for the remaining uncharacterized part of the E. coli genome, we have constructed a database of cofunctional gene network with near-complete genome coverage of the organism, dubbed EcoliNet. We find that EcoliNet is highly predictive for diverse bacterial phenotypes, including antibiotic response, indicating that it will be useful in prioritizing novel candidate genes for a wide spectrum of bacterial phenotypes. We have implemented a web server where biologists can easily run network algorithms over EcoliNet to predict novel genes involved in a pathway or novel functions for a gene. All integrated cofunctional associations can be downloaded, enabling orthology-based reconstruction of gene networks for other bacterial species as well. Database URL: http://www.inetbio.org/ecolinet.",EcoliNet,0.970784962,NA,0,EcoliNet,0.970784962,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/2/2015 +24333540,http://birg4.fbb.utm.my:8080/EcoliOverExpressionDB,"EcoliOverExpressionDB: a database of recombinant protein overexpression in E. coli. Unlabelled Recombinant protein production is a significant biotechnological process as it allows researchers to produce a specific protein in desired quantities. Escherichia coli (E. coli) is the most popular heterologous expression host for the production of recombinant proteins due to its advantages such as low cost, high-productivity, well-characterized genetics, simple growth requirements and rapid growth. There are a number of factors that influence the expression level of a recombinant protein in E. coli which are the gene to be expressed, the expression vector, the expression host, and the culture condition. The major motivation to develop our database, EcoliOverExpressionDB, is to provide a means for researchers to quickly locate key factors in the overexpression of certain proteins. Such information would be a useful guide for the overexpression of similar proteins in E. coli. To the best of the present researchers' knowledge, in general and specifically in E. coli, EcoliOverExpressionDB is the first database of recombinant protein expression experiments which gathers the influential parameters on protein overexpression and the results in one place. Availability EcoliOverExpressionDB is freely available and accessible using all major browsers at http://birg4.fbb.utm.my:8080/EcoliOverExpressionDB/.",EcoliOverExpressionDB,0.917664945,NA,0,EcoliOverExpressionDB,0.917664945,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/11/2013 +22064863,"http://porteco.org, http://ecoliwiki.net","EcoliWiki: a wiki-based community resource for Escherichia coli. EcoliWiki is the community annotation component of the PortEco (http://porteco.org; formerly EcoliHub) project, an online data resource that integrates information on laboratory strains of Escherichia coli, its phages, plasmids and mobile genetic elements. As one of the early adopters of the wiki approach to model organism databases, EcoliWiki was designed to not only facilitate community-driven sharing of biological knowledge about E. coli as a model organism, but also to be interoperable with other data resources. EcoliWiki content currently covers genes from five laboratory E. coli strains, 21 bacteriophage genomes, F plasmid and eight transposons. EcoliWiki integrates the Mediawiki wiki platform with other open-source software tools and in-house software development to extend how wikis can be used for model organism databases. EcoliWiki can be accessed online at http://ecoliwiki.net.",EcoliWiki,0.997703433,NA,0,EcoliWiki,0.997703433,1,24285306,NA,NA,NA,do not merge,NA,NA,NA,NA,11/7/2011 +25065645,http://ectogem.irisa.fr,"The genome-scale metabolic network of Ectocarpus siliculosus (EctoGEM): a resource to study brown algal physiology and beyond. Brown algae (stramenopiles) are key players in intertidal ecosystems, and represent a source of biomass with several industrial applications. Ectocarpus siliculosus is a model to study the biology of these organisms. Its genome has been sequenced and a number of post-genomic tools have been implemented. Based on this knowledge, we report the reconstruction and analysis of a genome-scale metabolic network for E.√ɬÉ√ǬÇ√ɬÇ√Ǭ†siliculosus, EctoGEM (http://ectogem.irisa.fr). This atlas of metabolic pathways consists of 1866 reactions and 2020 metabolites, and its construction was performed by means of an integrative computational approach for identifying metabolic pathways, gap filling and manual refinement. The capability of the network to produce biomass was validated by flux balance analysis. EctoGEM enabled the reannotation of 56 genes within the E.√ɬÉ√ǬÇ√ɬÇ√Ǭ†siliculosus genome, and shed light on the evolution of metabolic processes. For example, E.√ɬÉ√ǬÇ√ɬÇ√Ǭ†siliculosus has the potential to produce phenylalanine and tyrosine from prephenate and arogenate, but does not possess a phenylalanine hydroxylase, as is found in other stramenopiles. It also possesses the complete eukaryote molybdenum co-factor biosynthesis pathway, as well as a second molybdopterin synthase that was most likely acquired via horizontal gene transfer from cyanobacteria by a common ancestor of stramenopiles. EctoGEM represents an evolving community resource to gain deeper understanding of the biology of brown algae and the diversification of physiological processes. The integrative computational method applied for its reconstruction will be valuable to set up similar approaches for other organisms distant from biological benchmark models.",EctoGEM,0.991552711,scale metabolic network of Ectocarpus,0.682853514,EctoGEM,0.991552711,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/27/2014 +22359444,http://www.kubic.nic.in/ectomychorhiza,"Ectomychorrizal DB: a symbiotic association database. Unlabelled Ectomycorrhizal (ECM) fungal species, a ""Symbiotic"" relationship between tress and fungi in forest has a great ecological and economic importance. Here is an attempt to describe database named ""EctomycorrhizalDB"", addressing ECM diversity of Central Himalaya (Kumaun region), with special emphasis on their characterization, physical properties and morphological features along with specifications. This database would help the scientific community to draw a better understanding of the environmental factors that affects species diversity. Availability The database is available for free at http://www.kubic.nic.in/ectomychorhiza.",EctomycorrhizalDB,0.745949835,NA,0,EctomycorrhizalDB,0.745949835,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/20/2012 +25451822,http://edcs.unicartagena.edu.co,"EDCs DataBank: 3D-Structure database of endocrine disrupting chemicals. Endocrine disrupting chemicals (EDCs) are a group of compounds that affect the endocrine system, frequently found in everyday products and epidemiologically associated with several diseases. The purpose of this work was to develop EDCs DataBank, the only database of EDCs with three-dimensional structures. This database was built on MySQL using the EU list of potential endocrine disruptors and TEDX list. It contains the three-dimensional structures available on PubChem, as well as a wide variety of information from different databases and text mining tools, useful for almost any kind of research regarding EDCs. The web platform was developed employing HTML, CSS and PHP languages, with dynamic contents in a graphic environment, facilitating information analysis. Currently EDCs DataBank has 615 molecules, including pesticides, natural and industrial products, cosmetics, drugs and food additives, among other low molecular weight xenobiotics. Therefore, this database can be used to study the toxicological effects of these molecules, or to develop pharmaceuticals targeting hormone receptors, through docking studies, high-throughput virtual screening and ligand-protein interaction analysis. EDCs DataBank is totally user-friendly and the 3D-structures of the molecules can be downloaded in several formats. This database is freely available at http://edcs.unicartagena.edu.co.",EDCs DataBank,0.928272024,NA,0,EDCs DataBank,0.928272024,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/25/2014 +24302289,http://eddb.cbi.pku.edu.cn,"EDdb: a web resource for eating disorder and its application to identify an extended adipocytokine signaling pathway related to eating disorder. Eating disorder is a group of physiological and psychological disorders affecting approximately 1% of the female population worldwide. Although the genetic epidemiology of eating disorder is becoming increasingly clear with accumulated studies, the underlying molecular mechanisms are still unclear. Recently, integration of various high-throughput data expanded the range of candidate genes and started to generate hypotheses for understanding potential pathogenesis in complex diseases. This article presents EDdb (Eating Disorder database), the first evidence-based gene resource for eating disorder. Fifty-nine experimentally validated genes from the literature in relation to eating disorder were collected as the core dataset. Another four datasets with 2824 candidate genes across 601 genome regions were expanded based on the core dataset using different criteria (e.g., protein-protein interactions, shared cytobands, and related complex diseases). Based on human protein-protein interaction data, we reconstructed a potential molecular sub-network related to eating disorder. Furthermore, with an integrative pathway enrichment analysis of genes in EDdb, we identified an extended adipocytokine signaling pathway in eating disorder. Three genes in EDdb (ADIPO (adiponectin), TNF (tumor necrosis factor) and NR3C1 (nuclear receptor subfamily 3, group C, member 1)) link the KEGG (Kyoto Encyclopedia of Genes and Genomes) ""adipocytokine signaling pathway"" with the BioCarta ""visceral fat deposits and the metabolic syndrome"" pathway to form a joint pathway. In total, the joint pathway contains 43 genes, among which 39 genes are related to eating disorder. As the first comprehensive gene resource for eating disorder, EDdb ( http://eddb.cbi.pku.edu.cn ) enables the exploration of gene-disease relationships and cross-talk mechanisms between related disorders. Through pathway statistical studies, we revealed that abnormal body weight caused by eating disorder and obesity may both be related to dysregulation of the novel joint pathway of adipocytokine signaling. In addition, this joint pathway may be the common pathway for body weight regulation in complex human diseases related to unhealthy lifestyle.",EDdb,0.994813681,Eating Disorder database,0.941057016,EDdb,0.994813681,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/5/2013 +30357418,http://bigd.big.ac.cn/edk,"Editome Disease Knowledgebase (EDK): a curated knowledgebase of editome-disease associations in human. RNA editing, as an essential co-/post-transcriptional RNA modification type, plays critical roles in many biological processes and involves with a variety of human diseases. Although several databases have been developed to collect RNA editing data in both model and non-model animals, there still lacks a resource integrating associations between editome and human disease. In this study, we present Editome-Disease Knowledgebase (EDK; http://bigd.big.ac.cn/edk), an integrated knowledgebase of RNA editome-disease associations manually curated from published literatures. In the current version, EDK incorporates 61 diseases associated with 248 experimentally validated abnormal editing events located in 32 mRNAs, 16 miRNAs, 1 lncRNA and 11 viruses, and 44 aberrant activities involved with 6 editing enzymes, which together are curated from more than 200 publications. In addition, to facilitate standardization of editome-disease knowledge integration, we propose a data curation model in EDK, factoring an abundance of relevant information to fully capture the context of editome-disease associations. Taken together, EDK is a comprehensive collection of editome-disease associations and bears the great utility in aid of better understanding the RNA editing machinery and complex molecular mechanisms associated with human diseases.",EDK,0.992027998,Editome-Disease Knowledgebase,0.989611737,EDK,0.992027998,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +23340253,http://edr.research.bcm.edu,"Research resource: the Endometrium Database Resource (EDR). In order to understand the biology of the endometrium and potentially develop new diagnostic tools and treatments for endometrial diseases, the highly orchestrated gene expression/regulation that occurs within the uterus must first be understood. Even though a wealth of information on endometrial gene expression/regulation is available, this information is scattered across several different resources in formats that can be difficult for the average bench scientist to query, integrate, and utilize. The Endometrium Database Resource (EDR) was created as a single evolving resource for protein- and micro-RNA-encoding genes that have been shown by gene expression microarray, Northern blot, or other experiments in the literature to have their expression regulated in the uterus of humans, mice, rats, cows, domestic pigs, guinea pigs, and sheep. Genes are annotated in EDR with basic gene information (eg, gene symbol and chromosome), gene orthologs, and gene ontologies. Links are also provided to external resources for publication/s, nucleic and amino acid sequence, gene product function, and Gene Expression Omnibus (GEO) phase expression graph information. The resource also allows for direct comparison of relative gene expression in different microarray experiments for genes shown in the literature to be differentially expressed in the uterus. It is available via a user-friendly, web-based interface and is available without charge or restriction to the entire scientific community. The EDR can be accessed at http://edr.research.bcm.edu.",EDR,0.954789698,Endometrium Database Resource,0.906237185,EDR,0.954789698,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/22/2013 +27899667,http://snp-seek.irri.org,"Rice SNP-seek database update: new SNPs, indels, and queries. We describe updates to the Rice SNP-Seek Database since its first release. We ran a new SNP-calling pipeline followed by filtering that resulted in complete, base, filtered and core SNP datasets. Besides the Nipponbare reference genome, the pipeline was run on genome assemblies of IR 64, 93-11, DJ 123 and Kasalath. New genotype query and display features are added for reference assemblies, SNP datasets and indels. JBrowse now displays BAM, VCF and other annotation tracks, the additional genome assemblies and an embedded VISTA genome comparison viewer. Middleware is redesigned for improved performance by using a hybrid of HDF5 and RDMS for genotype storage. Query modules for genotypes, varieties and genes are improved to handle various constraints. An integrated list manager allows the user to pass query parameters for further analysis. The SNP Annotator adds traits, ontology terms, effects and interactions to markers in a list. Web-service calls were implemented to access most data. These features enable seamless querying of SNP-Seek across various biological entities, a step toward semi-automated gene-trait association discovery. URL: http://snp-seek.irri.org.",eek,0.851565361,NA,0,eek,0.851565361,1,NA,25429973,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,11/29/2016 +25429973,http://www.oryzasnp.org/iric-portal,"SNP-Seek database of SNPs derived from 3000 rice genomes. We have identified about 20 million rice SNPs by aligning reads from the 3000 rice genomes project with the Nipponbare genome. The SNPs and allele information are organized into a SNP-Seek system (http://www.oryzasnp.org/iric-portal/), which consists of Oracle database having a total number of rows with SNP genotypes close to 60 billion (20 M SNPs √ɬÉ√ǬÉ√ɬÇ√Ǭó 3 K rice lines) and web interface for convenient querying. The database allows quick retrieving of SNP alleles for all varieties in a given genome region, finding different alleles from predefined varieties and querying basic passport and morphological phenotypic information about sequenced rice lines. SNPs can be visualized together with the gene structures in JBrowse genome browser. Evolutionary relationships between rice varieties can be explored using phylogenetic trees or multidimensional scaling plots.",eek,0.552493989,NA,0,eek,0.552493989,1,NA,27899667,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,11/27/2014 +23203870,http://eendb.zfgenetics.org,"EENdb: a database and knowledge base of ZFNs and TALENs for endonuclease engineering. We report here the construction of engineered endonuclease database (EENdb) (http://eendb.zfgenetics.org/), a searchable database and knowledge base for customizable engineered endonucleases (EENs), including zinc finger nucleases (ZFNs) and transcription activator-like effector nucleases (TALENs). EENs are artificial nucleases designed to target and cleave specific DNA sequences. EENs have been shown to be a very useful genetic tool for targeted genome modification and have shown great potentials in the applications in basic research, clinical therapies and agricultural utilities, and they are specifically essential for reverse genetics research in species where no other gene targeting techniques are available. EENdb contains over 700 records of all the reported ZFNs and TALENs and related information, such as their target sequences, the peptide components [zinc finger protein-/transcription activator-like effector (TALE)-binding domains, FokI variants and linker peptide/framework], the efficiency and specificity of their activities. The database also lists EEN engineering tools and resources as well as information about forms and types of EENs, EEN screening and construction methods, detection methods for targeting efficiency and many other utilities. The aim of EENdb is to represent a central hub for EEN information and an integrated solution for EEN engineering. These studies may help to extract in-depth properties and common rules regarding ZFN or TALEN efficiency through comparison of the known ZFNs or TALENs.",EENdb,0.995524585,engineered endonuclease database,0.9866168,EENdb,0.995524585,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +26590402,http://effectivedb.org,"EffectiveDB--updates and novel features for a better annotation of bacterial secreted proteins and Type III, IV, VI secretion systems. Protein secretion systems play a key role in the interaction of bacteria and hosts. EffectiveDB (http://effectivedb.org) contains pre-calculated predictions of bacterial secreted proteins and of intact secretion systems. Here we describe a major update of the database, which was previously featured in the NAR Database Issue. EffectiveDB bundles various tools to recognize Type III secretion signals, conserved binding sites of Type III chaperones, Type IV secretion peptides, eukaryotic-like domains and subcellular targeting signals in the host. Beyond the analysis of arbitrary protein sequence collections, the new release of EffectiveDB also provides a 'genome-mode', in which protein sequences from nearly complete genomes or metagenomic bins can be screened for the presence of three important secretion systems (Type III, IV, VI). EffectiveDB contains pre-calculated predictions for currently 1677 bacterial genomes from the EggNOG 4.0 database and for additional bacterial genomes from NCBI RefSeq. The new, user-friendly and informative web portal offers a submission tool for running the EffectiveDB prediction tools on user-provided data.",EffectiveDB,0.997481704,NA,0,EffectiveDB,0.997481704,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2015 +23798489,http://csb.shu.edu.cn/efg,"eFG: an electronic resource for Fusarium graminearum. Fusarium graminearum is a plant pathogen, which causes crop diseases and further leads to huge economic damage worldwide in past decades. Recently, the accumulation of different types of molecular data provides insights into the pathogenic mechanism of F. graminearum, and might help develop efficient strategies to combat this destructive fungus. Unfortunately, most available molecular data related to F. graminearum are distributed in various media, where each single source only provides limited information on the complex biological systems of the fungus. In this work, we present a comprehensive database, namely eFG (Electronic resource for Fusarium graminearum), to the community for further understanding this destructive pathogen. In particular, a large amount of functional genomics data generated by our group is deposited in eFG, including protein subcellular localizations, protein-protein interactions and orthologous genes in other model organisms. This valuable knowledge can not only help to disclose the molecular underpinnings of pathogenesis of the destructive fungus F. graminearum but also help the community to develop efficient strategies to combat this pathogen. To our best knowledge, eFG is the most comprehensive functional genomics database for F. graminearum until now. The eFG database is freely accessible at http://csb.shu.edu.cn/efg/ with a user-friendly and interactive interface, and all data can be downloaded freely. DATABASE URL: http://csb.shu.edu.cn/efg/",eFG,0.989360213,Electronic resource for Fusarium graminearum,0.876718317,eFG,0.989360213,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/22/2013 +"22096231, 24297252, 26582926, 30418610",http://eggnog.embl.de,"eggNOG v3.0: orthologous groups covering 1133 organisms at 41 different taxonomic ranges. Orthologous relationships form the basis of most comparative genomic and metagenomic studies and are essential for proper phylogenetic and functional analyses. The third version of the eggNOG database (http://eggnog.embl.de) contains non-supervised orthologous groups constructed from 1133 organisms, doubling the number of genes with orthology assignment compared to eggNOG v2. The new release is the result of a number of improvements and expansions: (i) the underlying homology searches are now based on the SIMAP database; (ii) the orthologous groups have been extended to 41 levels of selected taxonomic ranges enabling much more fine-grained orthology assignments; and (iii) the newly designed web page is considerably faster with more functionality. In total, eggNOG v3 contains 721,801 orthologous groups, encompassing a total of 4,396,591 genes. Additionally, we updated 4873 and 4850 original COGs and KOGs, respectively, to include all 1133 organisms. At the universal level, covering all three domains of life, 101,208 orthologous groups are available, while the others are applicable at 40 more limited taxonomic ranges. Each group is amended by multiple sequence alignments and maximum-likelihood trees and broad functional descriptions are provided for 450,904 orthologous groups (62.5%).",eggNOG,0.992302001,NA,0,eggNOG,0.992302001,4,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +21317139,http://egob.biomedicine.gu.se,"eGOB: eukaryotic Gene Order Browser. Unlabelled A large number of genomes have been sequenced, allowing a range of comparative studies. Here, we present the eukaryotic Gene Order Browser with information on the order of protein and non-coding RNA (ncRNA) genes of 74 different eukaryotic species. The browser is able to display a gene of interest together with its genomic context in all species where that gene is present. Thereby, questions related to the evolution of gene organization and non-random gene order may be examined. The browser also provides access to data collected on pairs of adjacent genes that are evolutionarily conserved. Availability eGOB as well as underlying data are freely available at http://egob.biomedicine.gu.se Supplementary information Supplementary data are available at Bioinformatics online. Contact tore.samuelsson@medkem.gu.se.",eGOB,0.991934419,eukaryotic Gene Order Browser,0.86592653,eGOB,0.991934419,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/10/2011 +26519912,http://www.sussex.ac.uk/affiliates/halophytes,"eHALOPH a Database of Salt-Tolerant Plants: Helping put Halophytes to Work. eHALOPH (http://www.sussex.ac.uk/affiliates/halophytes/) is a database of salt-tolerant plants-halophytes. Records of plant species tolerant of salt concentrations of around 80 mM sodium chloride or more have been collected, along with data on plant type, life form, ecotypes, maximum salinity tolerated, the presence or absence of salt glands, photosynthetic pathway, antioxidants, secondary metabolites, compatible solutes, habitat, economic use and whether there are publications on germination, microbial interactions and mycorrhizal status, bioremediation and of molecular data. The database eHALOPH can be used in the analysis of traits associated with tolerance and for informing choice of species that might be used for saline agriculture, bioremediation or ecological restoration and rehabilitation of degraded wetlands or other areas.",eHALOPH,0.997677743,NA,0,eHALOPH,0.997677743,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2015 +25414353,http://biotech.bmi.ac.cn/ehfpi,"EHFPI: a database and analysis resource of essential host factors for pathogenic infection. High-throughput screening and computational technology has greatly changed the face of microbiology in better understanding pathogen-host interactions. Genome-wide RNA interference (RNAi) screens have given rise to a new class of host genes designated as Essential Host Factors (EHFs), whose knockdown effects significantly influence pathogenic infections. Therefore, we present the first release of a manually-curated bioinformatics database and analysis resource EHFPI (Essential Host Factors for Pathogenic Infection, http://biotech.bmi.ac.cn/ehfpi). EHFPI captures detailed article, screen, pathogen and phenotype annotation information for a total of 4634 EHF genes of 25 clinically important pathogenic species. Notably, EHFPI also provides six powerful and data-integrative analysis tools, i.e. EHF Overlap Analysis, EHF-pathogen Network Analysis, Gene Enrichment Analysis, Pathogen Interacting Proteins (PIPs) Analysis, Drug Target Analysis and GWAS Candidate Gene Analysis, which advance the comprehensive understanding of the biological roles of EHF genes, as in diverse perspectives of protein-protein interaction network, drug targets and diseases/traits. The EHFPI web interface provides appropriate tools that allow efficient query of EHF data and visualization of custom-made analysis results. EHFPI data and tools shall keep available without charge and serve the microbiology, biomedicine and pharmaceutics research communities, to finally facilitate the development of diagnostics, prophylactics and therapeutics for human pathogens.",EHFPI,0.998098135,Essential Host Factors for Pathogenic Infection,0.983319062,EHFPI,0.998098135,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2014 +23411718,http://www.coccidia.icb.usp.br/eimeriatdb,"The Eimeria transcript DB: an integrated resource for annotated transcripts of protozoan parasites of the genus Eimeria. Parasites of the genus Eimeria infect a wide range of vertebrate hosts, including chickens. We have recently reported a comparative analysis of the transcriptomes of Eimeria acervulina, Eimeria maxima and Eimeria tenella, integrating ORESTES data produced by our group and publicly available Expressed Sequence Tags (ESTs). All cDNA reads have been assembled, and the reconstructed transcripts have been submitted to a comprehensive functional annotation pipeline. Additional studies included orthology assignment across apicomplexan parasites and clustering analyses of gene expression profiles among different developmental stages of the parasites. To make all this body of information publicly available, we constructed the Eimeria Transcript Database (EimeriaTDB), a web repository that provides access to sequence data, annotation and comparative analyses. Here, we describe the web interface, available sequence data sets and query tools implemented on the site. The main goal of this work is to offer a public repository of sequence and functional annotation data of reconstructed transcripts of parasites of the genus Eimeria. We believe that EimeriaTDB will represent a valuable and complementary resource for the Eimeria scientific community and for those researchers interested in comparative genomics of apicomplexan parasites. Database URL: http://www.coccidia.icb.usp.br/eimeriatdb/",EimeriaTDB,0.991310894,Eimeria transcript DB,0.578976917,EimeriaTDB,0.991310894,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/14/2013 +24214991,http://ekpd.biocuckoo.org,"EKPD: a hierarchical database of eukaryotic protein kinases and protein phosphatases. We present here EKPD (http://ekpd.biocuckoo.org), a hierarchical database of eukaryotic protein kinases (PKs) and protein phosphatases (PPs), the key molecules responsible for the reversible phosphorylation of proteins that are involved in almost all aspects of biological processes. As extensive experimental and computational efforts have been carried out to identify PKs and PPs, an integrative resource with detailed classification and annotation information would be of great value for both experimentalists and computational biologists. In this work, we first collected 1855 PKs and 347 PPs from the scientific literature and various public databases. Based on previously established rationales, we classified all of the known PKs and PPs into a hierarchical structure with three levels, i.e. group, family and individual PK/PP. There are 10 groups with 149 families for the PKs and 10 groups with 33 families for the PPs. We constructed 139 and 27 Hidden Markov Model profiles for PK and PP families, respectively. Then we systematically characterized √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº50,000 PKs and >10,000 PPs in eukaryotes. In addition, >500 PKs and >400 PPs were computationally identified by ortholog search. Finally, the online service of the EKPD database was implemented in PHP + MySQL + JavaScript.",EKPD,0.997297764,NA,0,EKPD,0.997297764,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2013 +24318814,http://www.emouseatlas.org/emage,"EMAGE: Electronic Mouse Atlas of Gene Expression. The EMAGE (Electronic Mouse Atlas of Gene Expression) database (http://www.emouseatlas.org/emage) allows users to perform on-line queries of mouse developmental gene expression. EMAGE data are represented spatially using a framework of 3D mouse embryo models, thus allowing uniquely spatial queries to be carried out alongside more traditional text-based queries. This spatial representation of the data also allows a comparison of spatial similarity between the expression patterns. The data are mapped to the models by a team of curators using bespoke mapping software, and the associated meta-data are curated for accuracy and completeness. The data contained in EMAGE are gathered from three main sources: from the published literature, through large-scale screens and collaborations, and via direct submissions from researchers. There are a variety of ways to query the EMAGE database via the on-line search interfaces, as well as via direct computational script-based queries. EMAGE is a free, on-line, community resource funded by the Medical Research Council, UK.",EMAGE,0.976893961,Electronic Mouse Atlas of Gene Expression,0.988375112,Electronic Mouse Atlas of Gene Expression,0.988375112,1,24265223,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2014 +"22110040, 24214962, 26615199, 31680160",http://elm.eu.org,"ELM--the database of eukaryotic linear motifs. Linear motifs are short, evolutionarily plastic components of regulatory proteins and provide low-affinity interaction interfaces. These compact modules play central roles in mediating every aspect of the regulatory functionality of the cell. They are particularly prominent in mediating cell signaling, controlling protein turnover and directing protein localization. Given their importance, our understanding of motifs is surprisingly limited, largely as a result of the difficulty of discovery, both experimentally and computationally. The Eukaryotic Linear Motif (ELM) resource at http://elm.eu.org provides the biological community with a comprehensive database of known experimentally validated motifs, and an exploratory tool to discover putative linear motifs in user-submitted protein sequences. The current update of the ELM database comprises 1800 annotated motif instances representing 170 distinct functional classes, including approximately 500 novel instances and 24 novel classes. Several older motif class entries have been also revisited, improving annotation and adding novel instances. Furthermore, addition of full-text search capabilities, an enhanced interface and simplified batch download has improved the overall accessibility of the ELM data. The motif discovery portion of the ELM resource has added conservation, and structural attributes have been incorporated to aid users to discriminate biologically relevant motifs from stochastically occurring non-functional instances.",ELM,0.994939625,eukaryotic linear motif,0.945976029,ELM,0.994939625,4,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24265223,http://www.emouseatlas.org/emage,"EMAGE mouse embryo spatial gene expression database: 2014 update. EMAGE (http://www.emouseatlas.org/emage/) is a freely available database of in situ gene expression patterns that allows users to perform online queries of mouse developmental gene expression. EMAGE is unique in providing both text-based descriptions of gene expression plus spatial maps of gene expression patterns. This mapping allows spatial queries to be accomplished alongside more traditional text-based queries. Here, we describe our recent progress in spatial mapping and data integration. EMAGE has developed a method of spatially mapping 3D embryo images captured using optical projection tomography, and through the use of an IIP3D viewer allows users to view arbitrary sections of raw and mapped 3D image data in the context of a web browser. EMAGE now includes enhancer data, and we have spatially mapped images from a comprehensive screen of transgenic reporter mice that detail the expression of mouse non-coding genomic DNA fragments with enhancer activity. We have integrated the eMouseAtlas anatomical atlas and the EMAGE database so that a user of the atlas can query the EMAGE database easily. In addition, we have extended the atlas framework to enable EMAGE to spatially cross-index EMBRYS whole mount in situ hybridization data. We additionally report on recent developments to the EMAGE web interface, including new query and analysis capabilities.",EMAGE,0.985727191,EMAGE mouse embryo spatial gene expression database,0.830008045,EMAGE,0.985727191,1,24318814,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/21/2013 +"24271396, 31701143",http://www.ebi.ac.uk,"The European Bioinformatics Institute's data resources 2014. Molecular Biology has been at the heart of the 'big data' revolution from its very beginning, and the need for access to biological data is a common thread running from the 1965 publication of Dayhoff's 'Atlas of Protein Sequence and Structure' through the Human Genome Project in the late 1990s and early 2000s to today's population-scale sequencing initiatives. The European Bioinformatics Institute (EMBL-EBI; http://www.ebi.ac.uk) is one of three organizations worldwide that provides free access to comprehensive, integrated molecular data sets. Here, we summarize the principles underpinning the development of these public resources and provide an overview of EMBL-EBI's database collection to complement the reviews of individual databases provided elsewhere in this issue.",EMBL-EBI,0.858718193,NA,0,EMBL-EBI,0.858718193,2,29186510,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: CLASS,NA,NA,1/1/2020 +24146773,http://embrys.jp/embrys/html/MainMenu.html,"The AERO system: a 3D-like approach for recording gene expression patterns in the whole mouse embryo. We have recently constructed a web-based database of gene expression in the mouse whole embryo, EMBRYS (http://embrys.jp/embrys/html/MainMenu.html). To allow examination of gene expression patterns to the fullest extent possible, this database provides both photo images and annotation data. However, since embryos develop via an intricate process of morphogenesis, it would be of great value to track embryonic gene expression from a three dimensional perspective. In fact, several methods have been developed to achieve this goal, but highly laborious procedures and specific operational skills are generally required. We utilized a novel microscopic technique that enables the easy capture of rotational, 3D-like images of the whole embryo. In this method, a rotary head equipped with two mirrors that are designed to obtain an image tilted at 45 degrees to the microscope stage captures serial images at 2-degree intervals. By a simple operation, 180 images are automatically collected. These 2D images obtained at multiple angles are then used to reconstruct 3D-like images, termed AERO images. By means of this system, over 800 AERO images of 191 gene expression patterns were captured. These images can be easily rotated on the computer screen using the EMBRYS database so that researchers can view an entire embryo by a virtual viewing on a computer screen in an unbiased or non-predetermined manner. The advantages afforded by this approach make it especially useful for generating data viewed in public databases.",EMBRYS,0.995997906,NA,0,EMBRYS,0.995997906,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/16/2013 +30008982,"http://emdb-empiar.org, http://emdb-empiar.org/emstats","EMDB Web Resources. The Electron Microscopy Data Bank (EMDB; http://emdb-empiar.org) is a global openly-accessible archive of biomolecular and cellular 3D reconstructions derived from electron microscopy (EM) data. EMBL-EBI develops web-based resources to facilitate the reuse of EMDB data. Here we provide protocols for how these resources can be used for searching EMDB, visualising EMDB structures, statistically analysing EMDB content and checking the validity of EMDB structures. Protocols for searching include quick link categories from the main page, links to latest entries released during the weekly cycle, filtered browsing of the entire archive and a form-based search. For visualisation, the 'Volume Slicer' enables slices of EMDB entries to be visualised interactively and in three orthogonal directions. The EMstats web service (https://emdb-empiar.org/emstats) provides up-to-date interactive statistical charts analysing EMDB. All EMDB entries have 'visual analysis' pages that provide basic validation information for the entry.",EMDB,0.995346447,Electron Microscopy Data Bank,0.956331355,EMDB,0.995346447,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2018 +31164042,http://bioinfor.imu.edu.cn/emexplorer,"EmExplorer: a database for exploring time activation of gene expression in mammalian embryos. Understanding early development offers a striking opportunity to investigate genetic disease, stem cell and assisted reproductive technology. Recent advances in high-throughput sequencing technology have led to the rising influx of omics data, which have rapidly boosted our understanding of mammalian developmental mechanisms. Here, we review the database EmExplorer (a database for exploring time activation of gene expression in mammalian embryos), which systematically organizes the genes from development-related pathways, and which we have already established and continue to update it. The current version of EmExplorer incorporates over 26 000 genes obtained from 306 functional pathways in five species. The function annotations of development-related genes were also integrated into EmExplorer. To facilitate data extraction, the database also contains the following information. (i) The dynamic expression values for each development stage are matched to the corresponding genes. (ii) A two-layer search tool which supports multi-option searching, such as by official symbol, pathway name and function annotation. The returned entries can directly link to the analysis results for the corresponding gene or pathway in the analysis module. (iii) The analysis module provides different gene comparisons at the multi-species level and functional pathway level, which shows the species specificity and stage specificity at the gene or pathway level. (iv) The analysis based on the hypergeometric distribution test reveals the enrichment of gene functions at a particular stage of one organism's pathway. (v) The browser is designed for users with ambiguous searching goals and greatly helps new users to get a general idea of the contents of the database. (vi) The experimentally validated pathways are manually curated and shown on the home page. EmExplorer will be helpful for elucidating early developmental mechanisms and exploring time activation genes. EmExplorer is freely available at http://bioinfor.imu.edu.cn/emexplorer .",EmExplorer,0.986415982,NA,0,EmExplorer,0.986415982,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/5/2019 +34048547,http://research.bioinformatics.udel.edu/emirit,"emiRIT: a text-mining-based resource for microRNA information. . microRNAs (miRNAs) are essential gene regulators, and their dysregulation often leads to diseases. Easy access to miRNA information is crucial for interpreting generated experimental data, connecting facts across publications and developing new hypotheses built on previous knowledge. Here, we present extracting miRNA Information from Text (emiRIT), a text-miningbased resource, which presents miRNA information mined from the literature through a user-friendly interface. We collected 149√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ,233 miRNA -PubMed ID pairs from Medline between January 1997 and May 2020. emiRIT currently contains 'miRNA -gene regulation' (69√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ,152 relations), 'miRNA disease (cancer)' (12√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ,300 relations), 'miRNA -biological process and pathways' (23, 390 relations) and circulatory 'miRNAs in extracellular locations' (3782 relations). Biological entities and their relation to miRNAs were extracted from Medline abstracts using publicly available and in-house developed text-mining tools, and the entities were normalized to facilitate querying and integration. We built a database and an interface to store and access the integrated data, respectively. We provide an up-to-date and user-friendly resource to facilitate access to comprehensive miRNA information from the literature on a large scale, enabling users to navigate through different roles of miRNA and examine them in a context specific to their information needs. To assess our resource's information coverage, we have conducted two case studies focusing on the target and differential expression information of miRNAs in the context of cancer and a third case study to assess the usage of emiRIT in the curation of miRNA information. Database URL: https://research.bioinformatics.udel.edu/emirit/.",emiRIT,0.994740963,extracting miRNA Information from Text,0.961960316,emiRIT,0.994740963,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +25414328,http://www.infrafrontier.eu,"INFRAFRONTIER--providing mutant mouse resources as research tools for the international scientific community. The laboratory mouse is a key model organism to investigate mechanism and therapeutics of human disease. The number of targeted genetic mouse models of disease is growing rapidly due to high-throughput production strategies employed by the International Mouse Phenotyping Consortium (IMPC) and the development of new, more efficient genome engineering techniques such as CRISPR based systems. We have previously described the European Mouse Mutant Archive (EMMA) resource and how this international infrastructure provides archiving and distribution worldwide for mutant mouse strains. EMMA has since evolved into INFRAFRONTIER (http://www.infrafrontier.eu), the pan-European research infrastructure for the systemic phenotyping, archiving and distribution of mouse disease models. Here we describe new features including improved search for mouse strains, support for new embryonic stem cell resources, access to training materials via a comprehensive knowledgebase and the promotion of innovative analytical and diagnostic techniques.",EMMA,0.971358657,European Mouse Mutant Archive,0.605616391,EMMA,0.971358657,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,11/20/2014 +"22080548, 23203883, 24214989, 26615190, 27899630, 29140475, 30395270, 31722421, 33175160",http://www.ebi.ac.uk/ena,"Major submissions tool developments at the European Nucleotide Archive. The European Nucleotide Archive (ENA; http://www.ebi.ac.uk/ena), Europe's primary nucleotide sequence resource, captures and presents globally comprehensive nucleic acid sequence and associated information. Covering the spectrum from raw data to assembled and functionally annotated genomes, the ENA has witnessed a dramatic growth resulting from advances in sequencing technology and ever broadening application of the methodology. During 2011, we have continued to operate and extend the broad range of ENA services. In particular, we have released major new functionality in our interactive web submission system, Webin, through developments in template-based submissions for annotated sequences and support for raw next-generation sequence read submissions.",ENA,0.991740763,European Nucleotide Archive,0.941507971,ENA,0.991740763,9,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +32728249,"http://www.encodeproject.org, http://screen.encodeproject.org","Expanded encyclopaedias of DNA elements in the human and mouse genomes. The human and mouse genomes contain instructions that specify RNAs and proteins and govern the timing, magnitude, and cellular context of their production. To better delineate these elements, phase III of the Encyclopedia of DNA Elements (ENCODE) Project has expanded analysis of the cell and tissue repertoires of RNA transcription, chromatin structure and modification, DNA methylation, chromatin looping, and occupancy by transcription factors and RNA-binding proteins. Here we summarize these efforts, which have produced 5,992 new experimental datasets, including systematic determinations across mouse fetal development. All data are available through the ENCODE data portal (https://www.encodeproject.org), including phase II ENCODE1 and Roadmap Epigenomics2 data. We have developed a registry of 926,535 human and 339,815 mouse candidate cis-regulatory elements, covering 7.9 and 3.4% of their respective genomes, by integrating selected datatypes associated with gene regulation, and constructed a web-based server (SCREEN; http://screen.encodeproject.org) to provide flexible, user-defined access to this resource. Collectively, the ENCODE data and registry provide an expansive resource for the scientific community to build a better understanding of the organization and function of the human and mouse genomes.",ENCODE,0.997707129,NA,0,ENCODE,0.997707129,1,29126249,"23193274.0, 29126249.0, 31713622.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,7/29/2020 +31713622,"http://www.encodeproject.org/, http://registry.opendata.aws/encode-project","New developments on the Encyclopedia of DNA Elements (ENCODE) data portal. The Encyclopedia of DNA Elements (ENCODE) is an ongoing collaborative research project aimed at identifying all the functional elements in the human and mouse genomes. Data generated by the ENCODE consortium are freely accessible at the ENCODE portal (https://www.encodeproject.org/), which is developed and maintained by the ENCODE Data Coordinating Center (DCC). Since the initial portal release in 2013, the ENCODE DCC has updated the portal to make ENCODE data more findable, accessible, interoperable and reusable. Here, we report on recent updates, including new ENCODE data and assays, ENCODE uniform data processing pipelines, new visualization tools, a dataset cart feature, unrestricted public access to ENCODE data on the cloud (Amazon Web Services open data registry, https://registry.opendata.aws/encode-project/) and more comprehensive tutorials and documentation.",ENCODE,0.994007528,The Encyclopedia of DNA Elements,0.795711929,ENCODE,0.994007528,1,NA,"23193274.0, 29126249.0, 32728249.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2020 +23193274,"http://encodeproject.org, http://genome.ucsc.edu","ENCODE data in the UCSC Genome Browser: year 5 update. The Encyclopedia of DNA Elements (ENCODE), http://encodeproject.org, has completed its fifth year of scientific collaboration to create a comprehensive catalog of functional elements in the human genome, and its third year of investigations in the mouse genome. Since the last report in this journal, the ENCODE human data repertoire has grown by 898 new experiments (totaling 2886), accompanied by a major integrative analysis. In the mouse genome, results from 404 new experiments became available this year, increasing the total to 583, collected during the course of the project. The University of California, Santa Cruz, makes this data available on the public Genome Browser http://genome.ucsc.edu for visual browsing and data mining. Download of raw and processed data files are all supported. The ENCODE portal provides specialized tools and information about the ENCODE data sets.",ENCODE,0.986879021,The Encyclopedia of DNA Elements,0.854699457,ENCODE,0.986879021,1,NA,"29126249.0, 31713622.0, 32728249.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/27/2012 +29126249,http://www.encodeproject.org,"The Encyclopedia of DNA elements (ENCODE): data portal update. The Encyclopedia of DNA Elements (ENCODE) Data Coordinating Center has developed the ENCODE Portal database and website as the source for the data and metadata generated by the ENCODE Consortium. Two principles have motivated the design. First, experimental protocols, analytical procedures and the data themselves should be made publicly accessible through a coherent, web-based search and download interface. Second, the same interface should serve carefully curated metadata that record the provenance of the data and justify its interpretation in biological terms. Since its initial release in 2013 and in response to recommendations from consortium members and the wider community of scientists who use the Portal to access ENCODE data, the Portal has been regularly updated to better reflect these design principles. Here we report on these updates, including results from new experiments, uniformly-processed data from other projects, new visualization tools and more comprehensive metadata to describe experiments and analyses. Additionally, the Portal is now home to meta(data) from related projects including Genomics of Gene Regulation, Roadmap Epigenome Project, Model organism ENCODE (modENCODE) and modERN. The Portal now makes available over 13000 datasets and their accompanying metadata and can be accessed at: https://www.encodeproject.org/.",ENCODE,0.92023621,of,0.702912629,ENCODE,0.92023621,1,32728249,"23193274.0, 31713622.0, 32728249.0",low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2018 +30357403,http://encompass.ninds.nih.gov,"EncoMPASS: an online database for analyzing structure and symmetry in membrane proteins. The EncoMPASS online database (http://encompass.ninds.nih.gov) collects, organizes, and presents information about membrane proteins of known structure, emphasizing their structural similarities as well as their quaternary and internal symmetries. Unlike, e.g.√ɬÉ√ǬÇ√ɬÇ√Ǭ†SCOP, the EncoMPASS database does not aim for a strict classification of membrane proteins, but instead is organized as a protein chain-centric network of sequence and structural homologues. The online server for the EncoMPASS database provides tools for comparing the structural features of its entries, making it a useful resource for homology modeling and active site identification studies. The database can also be used for inferring functionality, which for membrane proteins often involves symmetry-related mechanisms. To this end, the online database also provides a comprehensive description of both the quaternary and internal symmetries in known membrane protein structures, with a particular focus on their orientation relative to the membrane.",EncoMPASS,0.996818304,NA,0,EncoMPASS,0.996818304,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +31665430,http://www.licpathway.net/ENdb,"ENdb: a manually curated database of experimentally supported enhancers for human and mouse. Enhancers are a class of cis-regulatory elements that can increase gene transcription by forming loops in intergenic regions, introns and exons. Enhancers, as well as their associated target genes, and transcription factors (TFs) that bind to them, are highly associated with human disease and biological processes. Although some enhancer databases have been published, most only focus on enhancers identified by high-throughput experimental techniques. Therefore, it is highly desirable to construct a comprehensive resource of manually curated enhancers and their related information based on low-throughput experimental evidences. Here, we established a comprehensive manually-curated enhancer database for human and mouse, which provides a resource for experimentally supported enhancers, and to annotate the detailed information of enhancers. The current release of ENdb documents 737 experimentally validated enhancers and their related information, including 384 target genes, 263 TFs, 110 diseases and 153 functions in human and mouse. Moreover, the enhancer-related information was supported by experimental evidences, such as RNAi, in vitro knockdown, western blotting, qRT-PCR, luciferase reporter assay, chromatin conformation capture (3C) and chromosome conformation capture-on-chip (4C) assays. ENdb provides a user-friendly interface to query, browse and visualize the detailed information of enhancers. The database is available at http://www.licpathway.net/ENdb.",ENdb,0.997351289,NA,0,ENdb,0.997351289,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +30788500,http://bioinfo.au.tsinghua.edu.cn/endisease,"EnDisease: a manually curated database for enhancer-disease associations. . Genome-wide association studies have successfully identified thousands of genomic loci potentially associated with hundreds of complex traits in the past decade. Nevertheless, the fact that more than 90% of such disease-associated variants lie in non-coding DNA with unknown functional implications has been appealing for advanced analysis of plenty of genetic variants. Toward this goal, recent studies focusing on individual non-coding variants have revealed that complex diseases are often the consequences of erroneous interactions between enhancers and their target genes. However, such enhancer-disease associations are dispersed in a variety of independent studies, and thus far it is still difficult to carry out comprehensive downstream analysis with these experimentally supported enhancer-disease associations. To fill in this gap, we collected experimentally supported associations between complex diseases and enhancers and then developed a manually curated database called EnDisease (http://bioinfo.au.tsinghua.edu.cn/endisease/). Concretely, EnDisease documents 535 associations between 133 diseases and 454 enhancers, extracted from 199 articles. Moreover, after annotating these enhancers using 649 human and 115 mouse DNase-seq experiments, we find that cancer-related enhancers tend to be open across a large number of cell types. This database provides a user-friendly interface for browsing and searching, and it also allows users to download data freely. EnDisease has the potential to become a helpful and important resource for researchers who aim to understand the molecular mechanisms of enhancers involved in complex diseases.",EnDisease,0.997681141,NA,0,EnDisease,0.997681141,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30357379,http://vibcancer.be/software-tools/endodb,"EndoDB: a database of endothelial cell transcriptomics data. Endothelial cells (ECs) line blood vessels, regulate homeostatic processes (blood flow, immune cell trafficking), but are also involved in many prevalent diseases. The increasing use of high-throughput technologies such as gene expression microarrays and (single cell) RNA sequencing generated a wealth of data on the molecular basis of EC (dys-)function. Extracting biological insight from these datasets is challenging for scientists who are not proficient in bioinformatics. To facilitate the re-use of publicly available EC transcriptomics data, we developed the endothelial database EndoDB, a web-accessible collection of expert curated, quality assured and pre-analyzed data collected from 360 datasets comprising a total of 4741 bulk and 5847 single cell endothelial transcriptomes from six different organisms. Unlike other added-value databases, EndoDB allows to easily retrieve and explore data of specific studies, determine under which conditions genes and pathways of interest are deregulated and assess reprogramming of metabolism via principal component analysis, differential gene expression analysis, gene set enrichment analysis, heatmaps and metabolic and transcription factor analysis, while single cell data are visualized as gene expression color-coded t-SNE plots. Plots and tables in EndoDB are customizable, downloadable and interactive. EndoDB is freely available at https://vibcancer.be/software-tools/endodb, and will be updated to include new studies.",EndoDB,0.996743798,NA,0,EndoDB,0.996743798,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +27515742,http://www.enhanceratlas.org,"EnhancerAtlas: a resource for enhancer annotation and analysis in 105 human cell/tissue types. Motivation Multiple high-throughput approaches have recently been developed and allowed the discovery of enhancers on a genome scale in a single experiment. However, the datasets generated from these approaches are not fully utilized by the research community due to technical challenges such as lack of consensus enhancer annotation and integrative analytic tools. Results We developed an interactive database, EnhancerAtlas, which contains an atlas of 2,534,123 enhancers for 105 cell/tissue types. A consensus enhancer annotation was obtained for each cell by summation of independent experimental datasets with the relative weights derived from a cross-validation approach. Moreover, EnhancerAtlas provides a set of useful analytic tools that allow users to query and compare enhancers in a particular genomic region or associated with a gene of interest, and assign enhancers and their target genes from a custom dataset. Availability and implementation The database with analytic tools is available at http://www.enhanceratlas.org/ CONTACT: jiang.qian@jhmi.edu or tank1@email.chop.eduSupplementary information: Supplementary data are available at Bioinformatics online.",EnhancerAtlas,0.995407641,NA,0,EnhancerAtlas,0.995407641,1,NA,31740966,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,8/10/2016 +31740966,http://www.enhanceratlas.org/indexv2.php,"EnhancerAtlas 2.0: an updated resource with enhancer annotation in 586 tissue/cell types across nine species. Enhancers are distal cis-regulatory elements that activate the transcription of their target genes. They regulate a wide range of important biological functions and processes, including embryogenesis, development, and homeostasis. As more and more large-scale technologies were developed for enhancer identification, a comprehensive database is highly desirable for enhancer annotation based on various genome-wide profiling datasets across different species. Here, we present an updated database EnhancerAtlas 2.0 (http://www.enhanceratlas.org/indexv2.php), covering 586 tissue/cell types that include a large number of normal tissues, cancer cell lines, and cells at different development stages across nine species. Overall, the database contains 13 494 603 enhancers, which were obtained from 16 055 datasets using 12 high-throughput experiment methods (e.g. H3K4me1/H3K27ac, DNase-seq/ATAC-seq, P300, POLR2A, CAGE, ChIA-PET, GRO-seq, STARR-seq and MPRA). The updated version is a huge expansion of the first version, which only contains the enhancers in human cells. In addition, we predicted enhancer-target gene relationships in human, mouse and fly. Finally, the users can search enhancers and enhancer-target gene relationships through five user-friendly, interactive modules. We believe the new annotation of enhancers in EnhancerAtlas 2.0 will facilitate users to perform useful functional analysis of enhancers in various genomes.",EnhancerAtlas,0.991393745,NA,0,EnhancerAtlas,0.991393745,1,NA,27515742,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2020 +35694152,http://lcbb.swjtu.edu.cn/EnhFFL,"EnhFFL: A database of enhancer mediated feed-forward loops for human and mouse. Feed-forward loops (FFLs) are thought to be one of the most common and important classes of transcriptional network motifs involved in various diseases. Enhancers are cis-regulatory elements that positively regulate protein-coding genes or microRNAs (miRNAs) by recruiting DNA-binding transcription factors (TFs). However, a comprehensive resource to identify, store, and analyze the FFLs of typical enhancer and super-enhancer FFLs is not currently available. Here, we present EnhFFL, an online database to provide a data resource for users to browse and search typical enhancer and super-enhancer FFLs. The current database covers 46√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ280/7000 TF-enhancer-miRNA FFLs, 9997/236 enhancer-miRNA-gene FFLs, 3√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ561√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ164/3√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ193√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ182 TF-enhancer-gene FFLs, and 1259/235 TF-enhancer feed-back loops (FBLs) across 91 tissues/cell lines of human and mouse, respectively. Users can browse loops by selecting species, types of tissue/cell line, and types of FFLs. EnhFFL supports searching elements including name/ID, genomic location, and the conservation of miRNA target genes. We also developed tools for users to screen customized FFLs using the threshold of q value as well as the confidence score of miRNA target genes. Disease and functional enrichment analysis showed that master miRNAs that are widely engaged in FFLs including TF-enhancer-miRNAs and enhancer-miRNA-genes are significantly involved in tumorigenesis. Database URL:http://lcbb.swjtu.edu.cn/EnhFFL/.",EnhFFL,0.994996548,NA,0,EnhFFL,0.994996548,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/14/2021 +30476229,http://qinlab.sls.cuhk.edu.hk/ENPD,"ENPD - A Database of Eukaryotic Nucleic Acid Binding Proteins: Linking Gene Regulations to Proteins. Eukaryotic nucleic acid binding protein database (ENPD, http://qinlab.sls.cuhk.edu.hk/ENPD/) is a library of nucleic acid binding proteins (NBPs) and their functional information. NBPs such as DNA binding proteins (DBPs), RNA binding proteins (RBPs), and DNA and RNA binding proteins (DRBPs) are involved in every stage of gene regulation through their interactions with DNA and RNA. Due to the importance of NBPs, the database was constructed based on manual curation and a newly developed pipeline utilizing both sequenced transcriptomes and genomes. In total the database has recorded 2.8 million of NBPs and their binding motifs from 662 NBP families and 2423 species, constituting the largest NBP database. ENPD covers evolutionarily important lineages which have never been included in the previous NBP databases, while lineage-specific NBP family expansions were also found. ENPD also focuses on the involvements of DBPs, RBPs and DRBPs in non-coding RNA (ncRNA) mediated gene regulation. The predicted and experimentally validated targets of NBPs have both been recorded and manually curated in ENPD, linking the interactions between ncRNAs, DNA regulatory elements and NBPs in gene regulation. This database provides key resources for the scientific community, laying a solid foundation for future gene regulatory studies from both functional and evolutionary perspectives.",ENPD,0.971551538,Eukaryotic nucleic acid binding protein database,0.936309212,ENPD,0.971551538,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +27141961,http://amp.pharm.mssm.edu/Enrichr,"Enrichr: a comprehensive gene set enrichment analysis web server 2016 update. Enrichment analysis is a popular method for analyzing gene sets generated by genome-wide experiments. Here we present a significant update to one of the tools in this domain called Enrichr. Enrichr currently contains a large collection of diverse gene set libraries available for analysis and download. In total, Enrichr currently contains 180 184 annotated gene sets from 102 gene set libraries. New features have been added to Enrichr including the ability to submit fuzzy sets, upload BED files, improved application programming interface and visualization of the results as clustergrams. Overall, Enrichr is a comprehensive resource for curated gene sets and a search engine that accumulates biological knowledge for further biological discoveries. Enrichr is freely available at: http://amp.pharm.mssm.edu/Enrichr.",Enrichr,0.996474743,NA,0,Enrichr,0.996474743,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/3/2016 +24717071,http://ww.iprox.org,"Discovery of novel genes and gene isoforms by integrating transcriptomic and proteomic profiling from mouse liver. Comprehensively identifying gene expression in both transcriptomic and proteomic levels of one tissue is a prerequisite for a deeper understanding of its biological functions. Alternative splicing and RNA editing, two main forms of transcriptional processing, play important roles in transcriptome and proteome diversity and result in multiple isoforms for one gene, which are hard to identify by mass spectrometry (MS)-based proteomics approach due to the relative lack of isoform information in standard protein databases. In our study, we employed MS and RNA-Seq in parallel into mouse liver tissue and captured a considerable catalogue of both transcripts and proteins that, respectively, covered 60 and 34% of protein-coding genes in Ensembl. We then developed a bioinformatics workflow for building a customized protein database that for the first time included new splicing-derived peptides and RNA-editing-caused peptide variants, allowing us to more completely identify protein isoforms. Using this experimentally determined database, we totally identified 150 peptides not present in standard biological databases at false discovery rate of <1%, corresponding to 72 novel splicing isoforms, 43 new genetic regions, and 15 RNA-editing sites. Of these, 11 randomly selected novel events passed experimental verification by PCR and Sanger sequencing. New discoveries of gene products with high confidence in two omics levels demonstrated the robustness and effectiveness of our approach and its potential application into improve genome annotation. All the MS data have been deposited to the iProx ( http://ww.iprox.org ) with the identifier IPX00003601.",Ensembl,0.902588725,NA,0,Ensembl,0.902588725,1,NA,"22086963.0, 26578574.0, 33235280.0, 23203987.0, 24316576.0, 26888907.0, 26896847.0, 29155950.0, 30407521.0, 31691826.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: CLASS,NA,NA,4/18/2014 +33235280,http://www.fragencode.org,"An integrative atlas of chicken long non-coding genes and their annotations across 25 tissues. Long non-coding RNAs (LNC) regulate numerous biological processes. In contrast to human, the identification of LNC in farm species, like chicken, is still lacunar. We propose a catalogue of 52,075 chicken genes enriched in LNC ( http://www.fragencode.org/ ), built from the Ensembl reference extended using novel LNC modelled here from 364 RNA-seq and LNC from four public databases. The Ensembl reference grew from 4,643 to 30,084 LNC, of which 59% and 41% with expression√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ√ɬÉ√Ǭ¢√ɬÇ√Ǭâ√ɬÇ√Ǭ•√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ0.5 and√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ√ɬÉ√Ǭ¢√ɬÇ√Ǭâ√ɬÇ√Ǭ•√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ1 TPM respectively. Characterization of these LNC relatively to the closest protein coding genes (PCG) revealed that 79% of LNC are in intergenic regions, as in other species. Expression analysis across 25 tissues revealed an enrichment of co-expressed LNC:PCG pairs, suggesting co-regulation and/or co-function. As expected LNC were more tissue-specific than PCG (25% vs. 10%). Similarly to human, 16% of chicken LNC hosted one or more miRNA. We highlighted a new chicken LNC, hosting miR155, conserved in human, highly expressed in immune tissues like miR155, and correlated with immunity-related PCG in both species. Among LNC:PCG pairs tissue-specific in the same tissue, we revealed an enrichment of divergent pairs with the PCG coding transcription factors, as for example LHX5, HXD3 and TBX4, in both human and chicken.",Ensembl,0.697516799,NA,0,Ensembl,0.697516799,1,NA,"22086963.0, 24717071.0, 26578574.0, 23203987.0, 24316576.0, 26888907.0, 26896847.0, 29155950.0, 30407521.0, 31691826.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,11/24/2020 +"23203987, 24316576, 26888907, 26896847, 29155950, 30407521, 31691826",http://www.ensembl.org,"Ensembl 2013. The Ensembl project (http://www.ensembl.org) provides genome information for sequenced chordate genomes with a particular focus on human, mouse, zebrafish and rat. Our resources include evidenced-based gene sets for all supported species; large-scale whole genome multiple species alignments across vertebrates and clade-specific alignments for eutherian mammals, primates, birds and fish; variation data resources for 17 species and regulation annotations based on ENCODE and other data sets. Ensembl data are accessible through the genome browser at http://www.ensembl.org and through other tools and programmatic interfaces.",Ensembl,0.996469855,NA,0,Ensembl,0.996469855,7,22086963,"22086963.0, 24717071.0, 26578574.0, 33235280.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2020 +22086963,"http://www.ensembl.org, http://pre.ensembl.org","Ensembl 2012. The Ensembl project (http://www.ensembl.org) provides genome resources for chordate genomes with a particular focus on human genome data as well as data for key model organisms such as mouse, rat and zebrafish. Five additional species were added in the last year including gibbon (Nomascus leucogenys) and Tasmanian devil (Sarcophilus harrisii) bringing the total number of supported species to 61 as of Ensembl release 64 (September 2011). Of these, 55 species appear on the main Ensembl website and six species are provided on the Ensembl preview site (Pre!Ensembl; http://pre.ensembl.org) with preliminary support. The past year has also seen improvements across the project.",Ensembl,0.994191885,NA,0,Ensembl,0.994191885,1,"23203987.0, 24316576.0, 26888907.0, 26896847.0, 29155950.0, 30407521.0, 31691826.0","24717071.0, 26578574.0, 33235280.0, 23203987.0, 24316576.0, 26888907.0, 26896847.0, 29155950.0, 30407521.0, 31691826.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/15/2011 +26578574,"http://www.ensemblgenomes.org, http://www.ensembl.org","Ensembl Genomes 2016: more genomes, more complexity. Ensembl Genomes (http://www.ensemblgenomes.org) is an integrating resource for genome-scale data from non-vertebrate species, complementing the resources for vertebrate genomics developed in the context of the Ensembl project (http://www.ensembl.org). Together, the two resources provide a consistent set of programmatic and interactive interfaces to a rich range of data including reference sequence, gene models, transcriptional data, genetic variation and comparative analysis. This paper provides an update to the previous publications about the resource, with a focus on recent developments. These include the development of new analyses and views to represent polyploid genomes (of which bread wheat is the primary exemplar); and the continued up-scaling of the resource, which now includes over 23 000 bacterial genomes, 400 fungal genomes and 100 protist genomes, in addition to 55 genomes from invertebrate metazoa and 39 genomes from plants. This dramatic increase in the number of included genomes is one part of a broader effort to automate the integration of archival data (genome sequence, but also associated RNA sequence data and variant calls) within the context of reference genomes and make it available through the Ensembl user interfaces.",Ensembl,0.952908397,NA,0,Ensembl,0.952908397,1,"22067447.0, 24163254.0, 29092050.0, 31598706.0, 29092050.0, 31598706.0","22086963.0, 24717071.0, 33235280.0, 23203987.0, 24316576.0, 26888907.0, 26896847.0, 29155950.0, 30407521.0, 31691826.0",low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/17/2015 +"29092050, 31598706","http://www.ensemblgenomes.org, http://www.ensembl.org","Ensembl Genomes 2018: an integrated omics infrastructure for non-vertebrate species. Ensembl Genomes (http://www.ensemblgenomes.org) is an integrating resource for genome-scale data from non-vertebrate species, complementing the resources for vertebrate genomics developed in the Ensembl project (http://www.ensembl.org). Together, the two resources provide a consistent set of programmatic and interactive interfaces to a rich range of data including genome sequence, gene models, transcript sequence, genetic variation, and comparative analysis. This paper provides an update to the previous publications about the resource, with a focus on recent developments and expansions. These include the incorporation of almost 20 000 additional genome sequences and over 35 000 tracks of RNA-Seq data, which have been aligned to genomic sequence and made available for visualization. Other advances since 2015 include the release of the database in Resource Description Framework (RDF) format, a large increase in community-derived curation, a new high-performance protein sequence search, additional cross-references, improved annotation of non-protein-coding genes, and the launch of pre-release and archival sites. Collectively, these changes are part of a continuing response to the increasing quantity of publicly-available genome-scale data, and the consequent need to archive, integrate, annotate and disseminate these using automated, scalable methods.",Ensembl Genomes,0.899982035,NA,0,Ensembl Genomes,0.899982035,2,"26578574.0, 22067447.0, 24163254.0, 26578574.0","22067447.0, 24163254.0",low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2020 +"22067447, 24163254",http://www.ensemblgenomes.org,"Ensembl Genomes: an integrative resource for genome-scale data from non-vertebrate species. Ensembl Genomes (http://www.ensemblgenomes.org) is an integrative resource for genome-scale data from non-vertebrate species. The project exploits and extends technology (for genome annotation, analysis and dissemination) developed in the context of the (vertebrate-focused) Ensembl project and provides a complementary set of resources for non-vertebrate species through a consistent set of programmatic and interactive interfaces. These provide access to data including reference sequence, gene models, transcriptional data, polymorphisms and comparative analysis. Since its launch in 2009, Ensembl Genomes has undergone rapid expansion, with the goal of providing coverage of all major experimental organisms, and additionally including taxonomic reference points to provide the evolutionary context in which genes can be understood. Against the backdrop of a continuing increase in genome sequencing activities in all parts of the tree of life, we seek to work, wherever possible, with the communities actively generating and using data, and are participants in a growing range of collaborations involved in the annotation and analysis of genomes.",Ensembl Genomes,0.818894243,NA,0,Ensembl Genomes,0.818894243,2,"26578574.0, 29092050.0, 31598706.0","29092050.0, 31598706.0",low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,10/25/2013 +25432969,http://plants.ensembl.org,"Triticeae resources in Ensembl Plants. Recent developments in DNA sequencing have enabled the large and complex genomes of many crop species to be determined for the first time, even those previously intractable due to their polyploid nature. Indeed, over the course of the last 2 years, the genome sequences of several commercially important cereals, notably barley and bread wheat, have become available, as well as those of related wild species. While still incomplete, comparison with other, more completely assembled species suggests that coverage of genic regions is likely to be high. Ensembl Plants (http://plants.ensembl.org) is an integrative resource organizing, analyzing and visualizing genome-scale information for important crop and model plants. Available data include reference genome sequence, variant loci, gene models and functional annotation. For variant loci, individual and population genotypes, linkage information and, where available, phenotypic information are shown. Comparative analyses are performed on DNA and protein sequence alignments. The resulting genome alignments and gene trees, representing the implied evolutionary history of the gene family, are made available for visualization and analysis. Driven by the case of bread wheat, specific extensions to the analysis pipelines and web interface have recently been developed to support polyploid genomes. Data in Ensembl Plants is accessible through a genome browser incorporating various specialist interfaces for different data types, and through a variety of additional methods for programmatic access and data mining. These interfaces are consistent with those offered through the Ensembl interface for the genomes of non-plant species, including those of plant pathogens, pests and pollinators, facilitating the study of the plant in its environment.",Ensembl Plants,0.933526009,NA,0,Ensembl Plants,0.933526009,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/27/2014 +32726198,http://enterobase.warwick.ac.uk,"A publicly accessible database for Clostridioides difficile genome sequences supports tracing of transmission chains and epidemics. . Clostridioides difficile is the primary infectious cause of antibiotic-associated diarrhea. Local transmissions and international outbreaks of this pathogen have been previously elucidated by bacterial whole-genome sequencing, but comparative genomic analyses at the global scale were hampered by the lack of specific bioinformatic tools. Here we introduce a publicly accessible database within EnteroBase (http://enterobase.warwick.ac.uk) that automatically retrieves and assembles C. difficile short-reads from the public domain, and calls alleles for core-genome multilocus sequence typing (cgMLST). We demonstrate that comparable levels of resolution and precision are attained by EnteroBase cgMLST and single-nucleotide polymorphism analysis. EnteroBase currently contains 18√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭä254 quality-controlled C. difficile genomes, which have been assigned to hierarchical sets of single-linkage clusters by cgMLST distances. This hierarchical clustering is used to identify and name populations of C. difficile at all epidemiological levels, from recent transmission chains through to epidemic and endemic strains. Moreover, it puts newly collected isolates into phylogenetic and epidemiological context by identifying related strains among all previously published genome data. For example, HC2 clusters (i.e. chains of genomes with pairwise distances of up to two cgMLST alleles) were statistically associated with specific hospitals (P<10-4) or single wards (P=0.01) within hospitals, indicating they represented local transmission clusters. We also detected several HC2 clusters spanning more than one hospital that by retrospective epidemiological analysis were confirmed to be associated with inter-hospital patient transfers. In contrast, clustering at level HC150 correlated with k-mer-based classification and was largely compatible with PCR ribotyping, thus enabling comparisons to earlier surveillance data. EnteroBase enables contextual interpretation of a growing collection of assembled, quality-controlled C. difficile genome sequences and their associated metadata. Hierarchical clustering rapidly identifies database entries that are related at multiple levels of genetic distance, facilitating communication among researchers, clinicians and public-health officials who are combatting disease caused by C. difficile.",EnteroBase,0.997007787,NA,0,EnteroBase,0.997007787,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/29/2020 +26582924,http://envipath.org,"enviPath--The environmental contaminant biotransformation pathway resource. The University of Minnesota Biocatalysis/Biodegradation Database and Pathway Prediction System (UM-BBD/PPS) has been a unique resource covering microbial biotransformation pathways of primarily xenobiotic chemicals for over 15 years. This paper introduces the successor system, enviPath (The Environmental Contaminant Biotransformation Pathway Resource), which is a complete redesign and reimplementation of UM-BBD/PPS. enviPath uses the database from the UM-BBD/PPS as a basis, extends the use of this database, and allows users to include their own data to support multiple use cases. Relative reasoning is supported for the refinement of predictions and to allow its extensions in terms of previously published, but not implemented machine learning models. User access is simplified by providing a REST API that simplifies the inclusion of enviPath into existing workflows. An RDF database is used to enable simple integration with other databases. enviPath is publicly available at https://envipath.org with free and open access to its core data.",enviPath,0.992086053,The Environmental Contaminant Biotransformation Pathway Resource,0.90123873,enviPath,0.992086053,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +29683130,http://enviroatlas.epa.gov,"An inventory of continental U.S. terrestrial candidate ecological restoration areas based on landscape context. Landscape context is an important factor in restoration ecology, but the use of landscape context for site prioritization has not been as fully developed. We used morphological image processing to identify candidate ecological restoration areas based on their proximity to existing natural vegetation. We identified 1,102,720 candidate ecological restoration areas across the continental United States. Candidate ecological restoration areas were concentrated in the Great Plains and eastern United States. We populated the database of candidate ecological restoration areas with 17 attributes related to site content and context, including factors such as soil fertility and roads (site content), and number and area of potentially conjoined vegetated regions (site context) to facilitate its use for site prioritization. We demonstrate the utility of the database in the state of North Carolina, U.S.A. for a restoration objective related to restoration of water quality (mandated by the U.S. Clean Water Act), wetlands, and forest. The database will be made publicly available on the U.S. Environmental Protection Agency's EnviroAtlas website (http://enviroatlas.epa.gov) for stakeholders interested in ecological restoration.",EnviroAtlas,0.60561341,NA,0,EnviroAtlas,0.60561341,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/1/2017 +27664130,"http://www.environmentontology.org/, http://purl.obolibrary.org/obo/envo.owl","The environment ontology in 2016: bridging domains with increased scope, semantic density, and interoperation. Background The Environment Ontology (ENVO; http://www.environmentontology.org/ ), first described in 2013, is a resource and research target for the semantically controlled description of environmental entities. The ontology's initial aim was the representation of the biomes, environmental features, and environmental materials pertinent to genomic and microbiome-related investigations. However, the need for environmental semantics is common to a multitude of fields, and ENVO's use has steadily grown since its initial description. We have thus expanded, enhanced, and generalised the ontology to support its increasingly diverse applications. Methods We have updated our development suite to promote expressivity, consistency, and speed: we now develop ENVO in the Web Ontology Language (OWL) and employ templating methods to accelerate class creation. We have also taken steps to better align ENVO with the Open Biological and Biomedical Ontologies (OBO) Foundry principles and interoperate with existing OBO ontologies. Further, we applied text-mining approaches to extract habitat information from the Encyclopedia of Life and automatically create experimental habitat classes within ENVO. Results Relative to its state in 2013, ENVO's content, scope, and implementation have been enhanced and much of its existing content revised for improved semantic representation. ENVO now offers representations of habitats, environmental processes, anthropogenic environments, and entities relevant to environmental health initiatives and the global Sustainable Development Agenda for 2030. Several branches of ENVO have been used to incubate and seed new ontologies in previously unrepresented domains such as food and agronomy. The current release version of the ontology, in OWL format, is available at http://purl.obolibrary.org/obo/envo.owl . Conclusions ENVO has been shaped into an ontology which bridges multiple domains including biomedicine, natural and anthropogenic ecology, 'omics, and socioeconomic development. Through continued interactions with our users and partners, particularly those performing data archiving and sythesis, we anticipate that ENVO's growth will accelerate in 2017. As always, we invite further contributions and collaboration to advance the semantic representation of the environment, ranging from geographic features and environmental materials, across habitats and ecosystems, to everyday objects in household settings.",ENVO,0.97583425,Ontology,0.52125144,ENVO,0.97583425,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/23/2016 +22489867,http://biotechlab.fudan.edu.cn/database/EnzyBase/home.php,"EnzyBase: a novel database for enzybiotic studies. Background Enzybiotics are becoming increasingly recognized as potential alternative therapies for drug-resistant bacteria. Although only a few enzybiotics are currently well characterized, much information is still missing or is unavailable for researchers. The construction of an enzybiotics database would therefore increase efficiency and convenience in investigating these bioactive proteins and thus help reduce or delay the recent increase in antibiotic resistance. Description In the present manuscript, we describe the development of a novel and original database called EnzyBase, which contains 1144 enzybiotics from 216 natural sources. To ensure data quality, we limited the source of information to authoritative public databases and published scientific literature. The interface of EnzyBase is easy to use and allows users to rapidly retrieve data according to their desired search criteria and blast the database for homologous sequences. We also describe examples of database-aided enzybiotics discovery and design. Conclusion EnzyBase serves as a unique tool for enzybiotic studies. It has several potential applications, e.g. in silico enzybiotic combination as cocktails, and novel enzybiotic design, in response to continuously emerging drug-resistant pathogens. This database is a valuable platform for researchers who are interested in enzybiotic studies. EnzyBase is available online at http://biotechlab.fudan.edu.cn/database/EnzyBase/home.php.",EnzyBase,0.992943108,NA,0,EnzyBase,0.992943108,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/11/2012 +33002112,http://www.rxnfinder.org/enzymine,"EnzyMine: a comprehensive database for enzyme function annotation with enzymatic reaction chemical feature. . Addition of chemical structural information in enzymatic reactions has proven to be significant for accurate enzyme function prediction. However, such chemical data lack systematic feature mining and hardly exist in enzyme-related databases. Therefore, global mining of enzymatic reactions will offer a unique landscape for researchers to understand the basic functional mechanisms of natural bioprocesses and facilitate enzyme function annotation. Here, we established a new knowledge base called EnzyMine, through which we propose to elucidate enzymatic reaction features and then link them with sequence and structural annotations. EnzyMine represents an advanced database that extends enzyme knowledge by incorporating reaction chemical feature strategies, strengthening the connectivity between enzyme and metabolic reactions. Therefore, it has the potential to reveal many new metabolic pathways involved with given enzymes, as well as expand enzyme function annotation. Database URL: http://www.rxnfinder.org/enzymine/.",EnzyMine,0.996929705,NA,0,EnzyMine,0.996929705,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2020 +29351734,http://ciliates.ihb.ac.cn/database/home,"EOGD: the Euplotes octocarinatus genome database. Background Euplotes, a ciliated protozoan, is a useful unicellular model organism. Studies on Euplotes have provided excellent insights into various basic biological principles. We have recently sequenced the macronuclear genome of the common freshwater species Euplotes octocarinatus to provide novel insights into Euplotes genetics and molecular biology. Results In this study, we present the E. octocarinatus Genome Database (EOGD), a functional annotation and analysis platform for the global study of the Euplotes genome. EOGD includes macronuclear genomic and transcriptomic data, predicted gene models, coding sequences, protein sequences, and functional annotations. The GBrowser and BLAST tools are embedded in EOGD to enable the search, visualization and analysis of E. octocarinatus genomic and transcriptomic data. Conclusions EOGD is a useful resource for the research community, particularly for researchers who conduct genome-scale analysis and molecular biology studies of Euplotes or other ciliates. EOGD will be continuously updated to integrate more datasets and analytical tools. EOGD is freely available at http://ciliates.ihb.ac.cn/database/home/#eo .",EOGD,0.995608449,octocarinatus Genome Database,0.981783918,EOGD,0.995608449,1,30217145,NA,NA,NA,do not merge,NA,NA,NA,NA,1/19/2018 +28981707,http://peptracker.com/epd,"The Encyclopedia of Proteome Dynamics: a big data ecosystem for (prote)omics. Driven by improvements in speed and resolution of mass spectrometers (MS), the field of proteomics, which involves the large-scale detection and analysis of proteins in cells, tissues and organisms, continues to expand in scale and complexity. There is a resulting growth in datasets of both raw MS files and processed peptide and protein identifications. MS-based proteomics technology is also used increasingly to measure additional protein properties affecting cellular function and disease mechanisms, including post-translational modifications, protein-protein interactions, subcellular and tissue distributions. Consequently, biologists and clinicians need innovative tools to conveniently analyse, visualize and explore such large, complex proteomics data and to integrate it with genomics and other related large-scale datasets. We have created the Encyclopedia of Proteome Dynamics (EPD) to meet this need (https://peptracker.com/epd/). The EPD combines a polyglot persistent database and web-application that provides open access to integrated proteomics data for >30 000 proteins from published studies on human cells and model organisms. It is designed to provide a user-friendly interface, featuring graphical navigation with interactive visualizations that facilitate powerful data exploration in an intuitive manner. The EPD offers a flexible and scalable ecosystem to integrate proteomics data with genomics information, RNA expression and other related, large-scale datasets.",EPD,0.979538023,Encyclopedia of Proteome Dynamics,0.85972634,EPD,0.979538023,1,NA,"23193273.0, 31680159.0",NA,NA,NA,do not merge,NA,NA,NA,1/1/2018 +23193273,http://epd.vital-it.ch,"EPD and EPDnew, high-quality promoter resources in the next-generation sequencing era. The Eukaryotic Promoter Database (EPD), available online at http://epd.vital-it.ch, is a collection of experimentally defined eukaryotic POL II promoters which has been maintained for more than 25 years. A promoter is represented by a single position in the genome, typically the major transcription start site (TSS). EPD primarily serves biologists interested in analysing the motif content, chromatin structure or DNA methylation status of co-regulated promoter subsets. Initially, promoter evidence came from TSS mapping experiments targeted at single genes and published in journal articles. Today, the TSS positions provided by EPD are inferred from next-generation sequencing data distributed in electronic form. Traditionally, EPD has been a high-quality database with low coverage. The focus of recent efforts has been to reach complete gene coverage for important model organisms. To this end, we introduced a new section called EPDnew, which is automatically assembled from multiple, carefully selected input datasets. As another novelty, we started to use chromatin signatures in addition to mRNA 5'tags to locate promoters of weekly expressed genes. Regarding user interfaces, we introduced a new promoter viewer which enables users to explore promoter-defining experimental evidence in a UCSC genome browser window.",EPD,0.994761258,Eukaryotic Promoter Database,0.976350265,EPD,0.994761258,1,"25378343.0, 27899657.0","28981707.0, 31680159.0",NA,NA,merge on record with best name prob,merge only:,NA,NA,"23193273.0, 31680159.0; URL assoc with best name prob RESOLVES SATISFACTORILY",11/27/2012 +31680159,http://epd.epfl.ch,"EPD in 2020: enhanced data visualization and extension to ncRNA promoters. The Eukaryotic Promoter Database (EPD), available online at https://epd.epfl.ch, provides accurate transcription start site (TSS) information for promoters of 15 model organisms plus corresponding functional genomics data that can be viewed in a genome browser, queried or analyzed via web interfaces, or exported in standard formats (FASTA, BED, CSV) for subsequent analysis with other tools. Recent work has focused on the improvement of the EPD promoter viewers, which use the UCSC Genome Browser as visualization platform. Thousands of high-resolution tracks for CAGE, ChIP-seq and similar data have been generated and organized into public track hubs. Customized, reproducible promoter views, combining EPD-supplied tracks with native UCSC Genome Browser tracks, can be accessed from the organism summary pages or from individual promoter entries. Moreover, thanks to recent improvements and stabilization of ncRNA gene catalogs, we were able to release promoter collections for certain classes of ncRNAs from human and mouse. Furthermore, we developed automatic computational protocols to assign orphan TSS peaks to downstream genes based on paired-end (RAMPAGE) TSS mapping data, which enabled us to add nearly 9000 new entries to the human promoter collection. Since our last article in this journal, EPD was extended to five more model organisms: rhesus monkey, rat, dog, chicken and Plasmodium falciparum.",EPD,0.994192779,Eukaryotic Promoter Database,0.955298398,EPD,0.994192779,1,NA,"23193273.0, 28981707.0",NA,NA,NA,merge only:,NA,NA,"23193273.0, 31680159.0; URL assoc with best name prob RESOLVES SATISFACTORILY",1/1/2020 +25378343,http://epd.vital-it.ch,"The Eukaryotic Promoter Database: expansion of EPDnew and new promoter analysis tools. We present an update of EPDNew (http://epd.vital-it.ch), a recently introduced new part of the Eukaryotic Promoter Database (EPD) which has been described in more detail in a previous NAR Database Issue. EPD is an old database of experimentally characterized eukaryotic POL II promoters, which are conceptually defined as transcription initiation sites or regions. EPDnew is a collection of automatically compiled, organism-specific promoter lists complementing the old corpus of manually compiled promoter entries of EPD. This new part is exclusively derived from next generation sequencing data from high-throughput promoter mapping experiments. We report on the recent growth of EPDnew, its extension to additional model organisms and its improved integration with other bioinformatics resources developed by our group, in particular the Signal Search Analysis and ChIP-Seq web servers.",EPDnew,0.981706798,The Eukaryotic Promoter Database,0.753703289,EPDnew,0.981706798,1,"23193273.0, 27899657.0",NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/6/2014 +29040751,http://www.epidenovo.biols.ac.cn,"EpiDenovo: a platform for linking regulatory de novo mutations to developmental epigenetics and diseases. De novo mutations (DNMs) have been shown to be a major cause of severe early-onset genetic disorders such as autism spectrum disorder and intellectual disability. Over one million DNMs have been identified in developmental disorders by next generation sequencing, but linking these DNMs to the genes that they impact remains a challenge, as the majority of them are embedded in non-coding regions. As most developmental diseases occur in the early stages of development or during childhood, it is crucial to clarify the details of epigenetic regulation in early development in order to interpret the mechanisms underlying developmental disorders. Here, we develop EpiDenovo, a database that is freely available at http://www.epidenovo.biols.ac.cn/, and which provides the associations between embryonic epigenomes and DNMs in developmental disorders, including several neuropsychiatric disorders and congenital heart disease. EpiDenovo provides an easy-to-use web interface allowing users rapidly to find the epigenetic signatures of DNMs and the expression patterns of the genes that they regulate during embryonic development. In summary, EpiDenovo is a useful resource for selecting candidate genes for further functional studies in embryonic development, and for investigating regulatory DNMs as well as other genetic variants causing or underlying developmental disorders.",EpiDenovo,0.997566879,NA,0,EpiDenovo,0.997566879,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +35308974,http://bmhinformatics.case.edu/Epilepsyconnect/login,"Epilepsy-Connect: An Integrated Knowledgebase for Characterizing Alterations in Consciousness State of Pharmacoresistant Epilepsy Patients. Alterations in consciousness state are a defining characteristic of focal epileptic seizures. Consequently, understanding the complex changes in neurocognitive networks which underpin seizure-induced alterations in consciousness state is important for advancement in seizure classification. Comprehension of these changes are complicated by a lack of data standardization; however, the use of a common terminological system or ontology in a patient registry minimizes this issue. In this paper, we introduce an integrated knowledgebase called Epilepsy-Connect to improve the understanding of changes in consciousness states during focal seizures of pharmacoresistant epilepsy patients. This registry catalogues over 809 seizures from 70 patients at University Hospital's Epilepsy Center who were undergoing stereotactic electroencephalography (SEEG) monitoring as part of an evaluation for surgical intervention. Although Epilepsy-Connect focuses on consciousness states, it aims to enable users to leverage data from an informatics platform to analyze epilepsy data in a streamlined manner. Epilepsy-Connect is available at https://bmhinformatics.case.edu/Epilepsyconnect/login/.",Epilepsy-Connect,0.927172029,NA,0,Epilepsy-Connect,0.927172029,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +25324312,http://61.152.91.49/EpilepsyGene,"EpilepsyGene: a genetic resource for genes and mutations related to epilepsy. Epilepsy is one of the most prevalent chronic neurological disorders, afflicting about 3.5-6.5 per 1000 children and 10.8 per 1000 elderly people. With intensive effort made during the last two decades, numerous genes and mutations have been published to be associated with the disease. An organized resource integrating and annotating the ever-increasing genetic data will be imperative to acquire a global view of the cutting-edge in epilepsy research. Herein, we developed EpilepsyGene (http://61.152.91.49/EpilepsyGene). It contains cumulative to date 499 genes and 3931 variants associated with 331 clinical phenotypes collected from 818 publications. Furthermore, in-depth data mining was performed to gain insights into the understanding of the data, including functional annotation, gene prioritization, functional analysis of prioritized genes and overlap analysis focusing on the comorbidity. An intuitive web interface to search and browse the diversified genetic data was also developed to facilitate access to the data of interest. In general, EpilepsyGene is designed to be a central genetic database to provide the research community substantial convenience to uncover the genetic basis of epilepsy.",EpilepsyGene,0.996541262,NA,0,EpilepsyGene,0.996541262,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/16/2014 +24682734,http://bioinfo.hrbmu.edu.cn/EpimiR,"EpimiR: a database of curated mutual regulation between miRNAs and epigenetic modifications. As two kinds of important gene expression regulators, both epigenetic modification and microRNA (miRNA) can play significant roles in a wide range of human diseases. Recently, many studies have demonstrated that epigenetics and miRNA can affect each other in various ways. In this study, we established the EpimiR database, which collects 1974 regulations between 19 kinds of epigenetic modifications (such as DNA methylation, histone acetylation, H3K4me3, H3S10p) and 617 miRNAs across seven species (including Homo sapiens, Mus musculus, Rattus norvegicus, Gallus gallus, Epstein-Barr virus, Canis familiaris and Arabidopsis thaliana) from >300 references in the literature. These regulations can be divided into two parts: miR2Epi (103 entries describing how miRNA regulates epigenetic modification) and Epi2miR (1871 entries describing how epigenetic modification affects miRNA). Each entry of EpimiR not only contains basic descriptions of the validated experiment (method, species, reference and so on) but also clearly illuminates the regulatory pathway between epigenetics and miRNA. As a supplement to the curated information, the EpimiR extends to gather predicted epigenetic features (such as predicted transcription start site, upstream CpG island) associated with miRNA for users to guide their future biological experiments. Finally, EpimiR offers download and submission pages. Thus, EpimiR provides a fairly comprehensive repository about the mutual regulation between epigenetic modifications and miRNAs, which will promote the research on the regulatory mechanism of epigenetics and miRNA. Database URL: http://bioinfo.hrbmu.edu.cn/EpimiR/.",EpimiR,0.989621162,NA,0,EpimiR,0.989621162,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/28/2014 +26748106,http://www.epimirbase.eu,"EpimiRBase: a comprehensive database of microRNA-epilepsy associations. Unlabelled MicroRNAs are short non-coding RNA which function to fine-tune protein levels in all cells. This is achieved mainly by sequence-specific binding to 3' untranslated regions of target mRNA. The result is post-transcriptional interference in gene expression which reduces protein levels either by promoting destabilisation of mRNA or translational repression. Research published since 2010 shows that microRNAs are important regulators of gene expression in epilepsy. A series of microRNA profiling studies in rodent and human tissue has revealed that epilepsy is associated with wide ranging changes to microRNA levels in the brain. These are thought to influence processes including cell death, inflammation and re-wiring of neuronal networks. MicroRNAs have also been identified in the blood after injury to the brain and therefore may serve as biomarkers of epilepsy. EpimiRBase is a manually curated database for researchers interested in the role of microRNAs in epilepsy. The fully searchable database includes information on up- and down-regulated microRNAs in the brain and blood, as well as functional studies, and covers both rodent models and human epilepsy. Availability and implementation EpimiRBase is available at http://www.epimirbase.eu Contact catherinemooney@rcsi.ie.",EpimiRBase,0.9903965,NA,0,EpimiRBase,0.9903965,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/8/2016 +32459338,http://epiregio.de,"EpiRegio: analysis and retrieval of regulatory elements linked to genes. A current challenge in genomics is to interpret non-coding regions and their role in transcriptional regulation of possibly distant target genes. Genome-wide association studies show that a large part of genomic variants are found in those non-coding regions, but their mechanisms of gene regulation are often unknown. An additional challenge is to reliably identify the target genes of the regulatory regions, which is an essential step in understanding their impact on gene expression. Here we present the EpiRegio web server, a resource of regulatory elements (REMs). REMs are genomic regions that exhibit variations in their chromatin accessibility profile associated with changes in expression of their target genes. EpiRegio incorporates both epigenomic and gene expression data for various human primary cell types and tissues, providing an integrated view of REMs in the genome. Our web server allows the analysis of genes and their associated REMs, including the REM's activity and its estimated cell type-specific contribution to its target gene's expression. Further, it is possible to explore genomic regions for their regulatory potential, investigate overlapping REMs and by that the dissection of regions of large epigenomic complexity. EpiRegio allows programmatic access through a REST API and is freely available at https://epiregio.de/.",EpiRegio,0.996769607,NA,0,EpiRegio,0.996769607,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2020 +24213601,http://epitrans.org,"EPITRANS: a database that integrates epigenome and transcriptome data. Epigenetic modifications affect gene expression and thereby govern a wide range of biological processes such as differentiation, development and tumorigenesis. Recent initiatives to define genome-wide DNA methylation and histone modification profiles by microarray and sequencing methods have led to the construction of databases. These databases are repositories for international epigenetic consortiums or provide mining results from PubMed, but do not integrate the epigenetic information with gene expression changes. In order to overcome this limitation, we constructed EPITRANS, a novel database that visualizes the relationships between gene expression and epigenetic modifications. EPITRANS uses combined analysis of epigenetic modification and gene expression to search for cell function-related epigenetic and transcriptomic alterations (Freely available on the web at http://epitrans.org ).",EPITRANS,0.996830702,NA,0,EPITRANS,0.996830702,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2013 +30458204,http://www.epregistry.com.br,"Second update of the International Registry of HLA Epitopes. I. The HLA-ABC Epitope Database. The International Registry of HLA Epitopes (http://www.epregistry.com.br) is a website-based resource for HLA epitopes important in transplant rejection and platelet transfusion refractoriness. Its primary goal is to document epitopes that are verified experimentally with specific antibodies. Such epitopes can be defined by single eplets and by eplets paired with certain polymorphic residues within a 15-√ɬÉ√ǬÉ√ɬÇ√Ç¬Ö radius, the dimension of the corresponding structural epitope. This report is an update of the HLA-ABC repertoire including descriptions of 72 antibody-verifications of epitopes defined by eplets and/or eplet pairs. The newly updated version 2.0 EpRegistry shows also the polymorphic residue compositions of structural epitopes corresponding to eplets shared between groups of alleles. At present, 151 eplets have not been antibody-verified, and we ranked them with a so-called ElliPro score as a potential predictor of immunogenicity. Sixty eplets with low ElliPro scores might be considered non-epitopes incapable of inducing specific antibodies.",EpRegistry,0.841949165,of,0.78137511,EpRegistry,0.841949165,1,25305456,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,11/17/2018 +23161695,http://bioservices.hs-mittweida.de/Epros,"eProS--a database and toolbox for investigating protein sequence-structure-function relationships through energy profiles. Gaining information about structural and functional features of newly identified proteins is often a difficult task. This information is crucial for understanding sequence-structure-function relationships of target proteins and, thus, essential in comprehending the mechanisms and dynamics of the molecular systems of interest. Using protein energy profiles is a novel approach that can contribute in addressing such problems. An energy profile corresponds to the sequence of energy values that are derived from a coarse-grained energy model. Energy profiles can be computed from protein structures or predicted from sequences. As shown, correspondences and dissimilarities in energy profiles can be applied for investigations of protein mechanics and dynamics. We developed eProS (energy profile suite, freely available at http://bioservices.hs-mittweida.de/Epros/), a database that provides √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº76√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ000 pre-calculated energy profiles as well as a toolbox for addressing numerous problems of structure biology. Energy profiles can be browsed, visualized, calculated from an uploaded structure or predicted from sequence. Furthermore, it is possible to align energy profiles of interest or compare them with all entries in the eProS database to identify significantly similar energy profiles and, thus, possibly relevant structural and functional relationships. Additionally, annotations and cross-links from numerous sources provide a broad view of potential biological correspondences.",eProS,0.996275544,profile suite,0.617196172,eProS,0.996275544,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2012 +30446142,http://www.epsdatabase.com,"The exopolysaccharide properties and structures database: EPS-DB. Application to bacterial exopolysaccharides. The EPS Database (EPS-DB) is a web-based, platform-independent database of bacterial exopolysaccharides (EPSs) providing access to detailed structural, taxonomic, growth conditions, functional properties, genetic, and bibliographic information for EPSs. It is freely available on the Internet as a website at http://www.epsdatabase.com. Several structural data representation schemes are used following the most commonly accepted formats. This guarantees full interoperability with other structural, experimental, and functional databases in the area of glycoscience. The scientific usage of EPS-DB throughout a user-friendly interface is presented with a subsection of the database exemplified by EPSs from lactic acid bacteria.",EPS-DB,0.998128307,Database,0.788730741,EPS-DB,0.998128307,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2018 +32008039,http://epsd.biocuckoo.cn,"EPSD: a well-annotated data resource of protein phosphorylation sites in eukaryotes. As an important post-translational modification (PTM), protein phosphorylation is involved in the regulation of almost all of biological processes in eukaryotes. Due to the rapid progress in mass spectrometry-based phosphoproteomics, a large number of phosphorylation sites (p-sites) have been characterized but remain to be curated. Here, we briefly summarized the current progresses in the development of data resources for the collection, curation, integration and annotation of p-sites in eukaryotic proteins. Also, we designed the eukaryotic phosphorylation site database (EPSD), which contained 1√ɬÉ√ǬÇ√ɬÇ√Ǭ†616√ɬÉ√ǬÇ√ɬÇ√Ǭ†804 experimentally identified p-sites in 209√ɬÉ√ǬÇ√ɬÇ√Ǭ†326 phosphoproteins from 68 eukaryotic species. In EPSD, we not only collected 1√ɬÉ√ǬÇ√ɬÇ√Ǭ†451√ɬÉ√ǬÇ√ɬÇ√Ǭ†629 newly identified p-sites from high-throughput (HTP) phosphoproteomic studies, but also integrated known p-sites from 13 additional databases. Moreover, we carefully annotated the phosphoproteins and p-sites of eight model organisms by integrating the knowledge from 100 additional resources that covered 15 aspects, including phosphorylation regulator, genetic variation and mutation, functional annotation, structural annotation, physicochemical property, functional domain, disease-associated information, protein-protein interaction, drug-target relation, orthologous information, biological pathway, transcriptional regulator, mRNA expression, protein expression/proteomics and subcellular localization. We anticipate that the EPSD can serve as a useful resource for further analysis of eukaryotic phosphorylation. With a data volume of 14.1√ɬÉ√ǬÇ√ɬÇ√Ǭ†GB, EPSD is free for all users at http://epsd.biocuckoo.cn/.",EPSD,0.98701781,eukaryotic phosphorylation site database,0.948447161,EPSD,0.98701781,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +24678734,http://epslim.bwh.harvard.edu,"Research resource: EPSLiM: ensemble predictor for short linear motifs in nuclear hormone receptors. Nuclear receptors (NRs) are a superfamily of transcription factors central to regulating many biological processes, including cell growth, death, metabolism, and immune responses. NR-mediated gene expression can be modulated by coactivators and corepressors through direct physical interaction or protein complexes with functional domains in NRs. One class of these domains includes short linear motifs (SLiMs), which facilitate protein-protein interactions, phosphorylation, and ligand binding primarily in the intrinsically disordered regions (IDRs) of proteins. Across all proteins, the number of known SLiMs is limited due to the difficulty in studying IDRs experimentally. Computational tools provide a systematic and data-driven approach for predicting functional motifs that can be used to prioritize experimental efforts. Accordingly, several tools have been developed based on sequence conservation or biophysical features; however, discrepancies in predictions make it difficult to determine the true candidate SLiMs. In this work, we present the ensemble predictor for short linear motifs (EPSLiM), a novel strategy to prioritize the residues that are most likely to be SLiMs in IDRs. EPSLiM applies a generalized linear model to integrate predictions from individual methodologies. We show that EPSLiM outperforms individual predictors, and we apply our method to NRs. The androgen receptor is an example with an N-terminal domain of 559 disordered amino acids that contains several validated SLiMs important for transcriptional activation. We use the androgen receptor to illustrate the predictive performance of EPSLiM and make the results of all human and mouse NRs publically available through the web service http://epslim.bwh.harvard.edu.",EPSLiM,0.966204286,predictor for,0.749277949,EPSLiM,0.966204286,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/28/2014 +34493866,http://www.ebi.ac.uk/eqtl,"A compendium of uniformly processed human gene expression and splicing quantitative trait loci. Many gene expression quantitative trait locus (eQTL) studies have published their summary statistics, which can be used to gain insight into complex human traits by downstream analyses, such as fine mapping and co-localization. However, technical differences between these datasets are a barrier to their widespread use. Consequently, target genes for most genome-wide association study (GWAS) signals have still not been identified. In the present study, we present the eQTL Catalogue ( https://www.ebi.ac.uk/eqtl ), a resource of quality-controlled, uniformly re-computed gene expression and splicing QTLs from 21 studies. We find that, for matching cell types and tissues, the eQTL effect sizes are highly reproducible between studies. Although most QTLs were shared between most bulk tissues, we identified a greater diversity of cell-type-specific QTLs from purified cell types, a subset of which also manifested as new disease co-localizations. Our summary statistics are freely available to enable the systematic interpretation of human GWAS associations across many cell types and tissues.",eQTL,0.880491197,NA,0,eQTL,0.880491197,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/6/2021 +29106618,http://www.unimd.org/eram,"eRAM: encyclopedia of rare disease annotations for precision medicine. Rare diseases affect over a hundred million people worldwide, most of these patients are not accurately diagnosed and effectively treated. The limited knowledge of rare diseases forms the biggest obstacle for improving their treatment. Detailed clinical phenotyping is considered as a keystone of deciphering genes and realizing the precision medicine for rare diseases. Here, we preset a standardized system for various types of rare diseases, called encyclopedia of Rare disease Annotations for Precision Medicine (eRAM). eRAM was built by text-mining nearly 10 million scientific publications and electronic medical records, and integrating various data in existing recognized databases (such as Unified Medical Language System (UMLS), Human Phenotype Ontology, Orphanet, OMIM, GWAS). eRAM systematically incorporates currently available data on clinical manifestations and molecular mechanisms of rare diseases and uncovers many novel associations among diseases. eRAM provides enriched annotations for 15 942 rare diseases, yielding 6147 human disease related phenotype terms, 31 661 mammalians phenotype terms, 10,202 symptoms from UMLS, 18 815 genes and 92 580 genotypes. eRAM can not only provide information about rare disease mechanism but also facilitate clinicians to make accurate diagnostic and therapeutic decisions towards rare diseases. eRAM can be freely accessed at http://www.unimd.org/eram/.",eRAM,0.992225349,encyclopedia of Rare disease Annotations for,0.910854608,eRAM,0.992225349,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +23299413,http://lemur.amu.edu.pl/share/ERISdb,"ERISdb: a database of plant splice sites and splicing signals. Splicing is one of the major contributors to observed spatiotemporal diversification of transcripts and proteins in metazoans. There are numerous factors that affect the process, but splice sites themselves along with the adjacent splicing signals are critical here. Unfortunately, there is still little known about splicing in plants and, consequently, further research in some fields of plant molecular biology will encounter difficulties. Keeping this in mind, we performed a large-scale analysis of splice sites in eight plant species, using novel algorithms and tools developed by us. The analyses included identification of orthologous splice sites, polypyrimidine tracts and branch sites. Additionally we identified putative intronic and exonic cis-regulatory motifs, U12 introns as well as splice sites in 45 microRNA genes in five plant species. We also provide experimental evidence for plant splice sites in the form of expressed sequence tag and RNA-Seq data. All the data are stored in a novel database called ERISdb and are freely available at http://lemur.amu.edu.pl/share/ERISdb/.",ERISdb,0.825776637,NA,0,ERISdb,0.825776637,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/7/2013 +23794736,http://www.maayanlab.net/ESCAPE,"ESCAPE: database for integrating high-content published data collected from human and mouse embryonic stem cells. High content studies that profile mouse and human embryonic stem cells (m/hESCs) using various genome-wide technologies such as transcriptomics and proteomics are constantly being published. However, efforts to integrate such data to obtain a global view of the molecular circuitry in m/hESCs are lagging behind. Here, we present an m/hESC-centered database called Embryonic Stem Cell Atlas from Pluripotency Evidence integrating data from many recent diverse high-throughput studies including chromatin immunoprecipitation followed by deep sequencing, genome-wide inhibitory RNA screens, gene expression microarrays or RNA-seq after knockdown (KD) or overexpression of critical factors, immunoprecipitation followed by mass spectrometry proteomics and phosphoproteomics. The database provides web-based interactive search and visualization tools that can be used to build subnetworks and to identify known and novel regulatory interactions across various regulatory layers. The web-interface also includes tools to predict the effects of combinatorial KDs by additive effects controlled by sliders, or through simulation software implemented in MATLAB. Overall, the Embryonic Stem Cell Atlas from Pluripotency Evidence database is a comprehensive resource for the stem cell systems biology community. Database URL: http://www.maayanlab.net/ESCAPE",ESCAPE,0.888467371,Embryonic Stem Cell Atlas from,0.817273289,ESCAPE,0.888467371,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/21/2013 +30143675,http://www.esccatlas.org,"ESCC ATLAS: A population wide compendium of biomarkers for Esophageal Squamous Cell Carcinoma. Esophageal cancer (EC) is the eighth most aggressive malignancy and its treatment remains a challenge due to the lack of biomarkers that can facilitate early detection. EC is identified in two major histological forms namely - Adenocarcinoma (EAC) and Squamous cell carcinoma (ESCC), each showing differences in the incidence among populations that are geographically separated. Hence the detection of potential drug target and biomarkers demands a population-centric understanding of the molecular and cellular mechanisms of EC. To provide an adequate impetus to the biomarker discovery for ESCC, which is the most prevalent esophageal cancer worldwide, here we have developed ESCC ATLAS, a manually curated database that integrates genetic, epigenetic, transcriptomic, and proteomic ESCC-related genes from the published literature. It consists of 3475 genes associated to molecular signatures such as, altered transcription (2600), altered translation (560), contain copy number variation/structural variations (233), SNPs (102), altered DNA methylation (82), Histone modifications (16) and miRNA based regulation (261). We provide a user-friendly web interface ( http://www.esccatlas.org , freely accessible for academic, non-profit users) that facilitates the exploration and the analysis of genes among different populations. We anticipate it to be a valuable resource for the population specific investigation and biomarker discovery for ESCC.",ESCC ATLAS,0.917660445,NA,0,ESCC ATLAS,0.917660445,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/24/2018 +23709164,http://p-pal.di.uminho.pt/about/databases,"ESCOLEX: a grade-level lexical database from European Portuguese elementary to middle school textbooks. In this article, we introduce ESCOLEX, the first European Portuguese children's lexical database with grade-level-adjusted word frequency statistics. Computed from a 3.2-million-word corpus, ESCOLEX provides 48,381 word forms extracted from 171 elementary and middle school textbooks for 6- to 11-year-old children attending the first six grades in the Portuguese educational system. Like other children's grade-level databases (e.g., Carroll, Davies, & Richman, 1971; Corral, Ferrero, & Goikoetxea, Behavior Research Methods, 41, 1009-1017, 2009; L√ɬÉ√ǬÉ√ɬÇ√Ǭ©t√ɬÉ√ǬÉ√ɬÇ√Ǭ©, Sprenger-Charolles, & Col√ɬÉ√ǬÉ√ɬÇ√Ǭ©, Behavior Research Methods, Instruments, & Computers, 36, 156-166, 2004; Zeno, Ivens, Millard, Duvvuri, 1995), ESCOLEX provides four frequency indices for each grade: overall word frequency (F), index of dispersion across the selected textbooks (D), estimated frequency per million words (U), and standard frequency index (SFI). It also provides a new measure, contextual diversity (CD). In addition, the number of letters in the word and its part(s) of speech, number of syllables, syllable structure, and adult frequencies taken from P-PAL (a European Portuguese corpus-based lexical database; Soares, Comesa√ɬÉ√ǬÉ√ɬÇ√Ǭ±a, Iriarte, Almeida, Sim√ɬÉ√ǬÉ√ɬÇ√Ǭµes, Costa, √ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭ¶, Machado, 2010; Soares, Iriarte, Almeida, Sim√ɬÉ√ǬÉ√ɬÇ√Ǭµes, Costa, Fran√ɬÉ√ǬÉ√ɬÇ√Ǭßa, √ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭ¶, Comesa√ɬÉ√ǬÉ√ɬÇ√Ǭ±a, in press) are provided. ESCOLEX will be a useful tool both for researchers interested in language processing and development and for professionals in need of verbal materials adjusted to children's developmental stages. ESCOLEX can be downloaded along with this article or from http://p-pal.di.uminho.pt/about/databases .",ESCOLEX,0.997294307,NA,0,ESCOLEX,0.997294307,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2014 +31263866,"http://esid.org/Working-Parties/Registry-Working-Party/ESID-Registry, http://cci-esid-reg-demo-app.uniklinik-freiburg.de/EERS","The NEW ESID online database network. SUMMARY:Primary Immunodeficiencies (PIDs) belong to the group of rare diseases. The European Society for Immunodeficiencies (ESID) operates an international research database application for continuous long-term documentation of patient data. The system is a web application which runs in a standard browser. Therefore, the system is easy to access from any location. Technically, the system is based on Gails backed by MariaDB with high standard security features to comply with the demands of a modern research platform. AVAILABILITY AND IMPLEMENTATION:The ESID Online Database is accessible via the official website: https://esid.org/Working-Parties/Registry-Working-Party/ESID-Registry. A demo system is available via: https://cci-esid-reg-demo-app.uniklinik-freiburg.de/EERS with user demouser and password Demo-2019.",ESID,0.5849545,NA,0,ESID,0.5849545,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2019 +28960889,http://soft.bioinfo-minzhao.org/esnail,"eSnail: A transcriptome-based molecular resource of the central nervous system for terrestrial gastropods. To expand on emerging terrestrial gastropod molecular resources, we have undertaken transcriptome-based sequencing of the central nervous system (CNS) from six ecologically invasive terrestrial gastropods. Focusing on snail species Cochlicella acuta and Helix aspersa and reticulated slugs Deroceras invadens, Deroceras reticulatum, Lehmannia nyctelia and Milax gagates, we obtained a total of 367,869,636 high-quality reads and compared them with existing CNS transcript resources for the invasive Mediterranean snail, Theba pisana. In total, we obtained 419,289 unique transcripts (unigenes) from 1,410,569 assembled contigs, with blast search analysis of multiple protein databases leading to the annotation of 124,268 unigenes, of which 92,544 mapped to ncbi nonredundant protein databases. We found that these transcriptomes have representatives in most biological functions, based on comparison of gene ontology, kegg pathway and protein family contents, demonstrating a high range of transcripts responsible for regulating metabolic activities and molecular functions occurring within the CNS. To provide an accessible genetic resource, we also demonstrate the presence of 66,687 microsatellites and 304,693 single-nucleotide variants, which can be used for the design of potentially thousands of unique primers for functional screening. An online ""eSnail"" database with a user-friendly web interface was implemented to query all the information obtained herein (http://soft.bioinfo-minzhao.org/esnail). We demonstrate the usefulness of the database through the mining of molluscan neuropeptides. As the most comprehensive CNS transcriptome resource for terrestrial gastropods, eSnail may serve as a useful gateway for researchers to explore gastropod CNS function for multiple purposes, including for the development of biocontrol approaches.",eSnail,0.99808991,NA,0,eSnail,0.99808991,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2017 +23468181,http://www.bcbl.eu/databases/espal,"EsPal: one-stop shopping for Spanish word properties. This article introduces EsPal: a Web-accessible repository containing a comprehensive set of properties of Spanish words. EsPal is based on an extensible set of data sources, beginning with a 300 million token written database and a 460 million token subtitle database. Properties available include word frequency, orthographic structure and neighborhoods, phonological structure and neighborhoods, and subjective ratings such as imageability. Subword structure properties are also available in terms of bigrams and trigrams, biphones, and bisyllables. Lemma and part-of-speech information and their corresponding frequencies are also indexed. The website enables users either to upload a set of words to receive their properties or to receive a set of words matching constraints on the properties. The properties themselves are easily extensible and will be added over time as they become available. It is freely available from the following website: http://www.bcbl.eu/databases/espal/ .",EsPal,0.998021126,NA,0,EsPal,0.998021126,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2013 +25534749,http://nipgr.res.in/Essoildb,"EssOilDB: a database of essential oils reflecting terpene composition and variability in the plant kingdom. Plant essential oils are complex mixtures of volatile organic compounds, which play indispensable roles in the environment, for the plant itself, as well as for humans. The potential biological information stored in essential oil composition data can provide an insight into the silent language of plants, and the roles of these chemical emissions in defense, communication and pollinator attraction. In order to decipher volatile profile patterns from a global perspective, we have developed the ESSential OIL DataBase (EssOilDB), a continually updated, freely available electronic database designed to provide knowledge resource for plant essential oils, that enables one to address a multitude of queries on volatile profiles of native, invasive, normal or stressed plants, across taxonomic clades, geographical locations and several other biotic and abiotic influences. To our knowledge, EssOilDB is the only database in the public domain providing an opportunity for context based scientific research on volatile patterns in plants. EssOilDB presently contains 123√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ041 essential oil records spanning a century of published reports on volatile profiles, with data from 92 plant taxonomic families, spread across diverse geographical locations all over the globe. We hope that this huge repository of VOCs will facilitate unraveling of the true significance of volatiles in plants, along with creating potential avenues for industrial applications of essential oils. We also illustrate the use of this database in terpene biology and show how EssOilDB can be used to complement data from computational genomics to gain insights into the diversity and variability of terpenoids in the plant kingdom. EssOilDB would serve as a valuable information resource, for students and researchers in plant biology, in the design and discovery of new odor profiles, as well as for entrepreneurs--the potential for generating consumer specific scents being one of the most attractive and interesting topics in the cosmetic industry. Database URL: http://nipgr.res.in/Essoildb/",EssOilDB,0.99728632,NA,0,EssOilDB,0.99728632,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/22/2014 +23193256,http://bioweb.ensam.inra.fr/esther,"ESTHER, the database of the √ɬÉ√Ǭé√ɬÇ√Ǭ±/√ɬÉ√Ǭé√ɬÇ√Ǭ≤-hydrolase fold superfamily of proteins: tools to explore diversity of functions. The ESTHER database, which is freely available via a web server (http://bioweb.ensam.inra.fr/esther) and is widely used, is dedicated to proteins with an √ɬÉ√Ǭé√ɬÇ√Ǭ±/√ɬÉ√Ǭé√ɬÇ√Ǭ≤-hydrolase fold, and it currently contains >30 000 manually curated proteins. Herein, we report those substantial changes towards improvement that we have made to improve ESTHER during the past 8 years since our 2004 update. In particular, we generated 87 new families and increased the coverage of the UniProt Knowledgebase (UniProtKB). We also renewed the ESTHER website and added new visualization tools, such as the Overall Table and the Family Tree. We also address two topics of particular interest to the ESTHER users. First, we explain how the different enzyme classifications (bacterial lipases, peptidases, carboxylesterases) used by different communities of users are combined in ESTHER. Second, we discuss how variations of core architecture or in predicted active site residues result in a more precise clustering of families, and whether this strategy provides trustable hints to identify enzyme-like proteins with no catalytic activity.",ESTHER,0.996905684,NA,0,ESTHER,0.996905684,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +30365030,http://www.nrc.ac.cn:9090/ETCM,"ETCM: an encyclopaedia of traditional Chinese medicine. Traditional Chinese medicine (TCM) is not only an effective solution for primary health care, but also a great resource for drug innovation and discovery. To meet the increasing needs for TCM-related data resources, we developed ETCM, an Encyclopedia of Traditional Chinese Medicine. ETCM includes comprehensive and standardized information for the commonly used herbs and formulas of TCM, as well as their ingredients. The herb basic property and quality control standard, formula composition, ingredient drug-likeness, as well as many other information provided by ETCM can serve as a convenient resource for users to obtain thorough information about a herb or a formula. To facilitate functional and mechanistic studies of TCM, ETCM provides predicted target genes of TCM ingredients, herbs, and formulas, according to the chemical fingerprint similarity between TCM ingredients and known drugs. A systematic analysis function is also developed in ETCM, which allows users to explore the relationships or build networks among TCM herbs, formulas,ingredients, gene targets, and related pathways or diseases. ETCM is freely accessible at http://www.nrc.ac.cn:9090/ETCM/. We expect ETCM to develop into a major data warehouse for TCM and to promote TCM related researches and drug development in the future.",ETCM,0.994092822,NA,0,ETCM,0.994092822,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +31412866,http://biosoft.kaist.ac.kr/etm,"ETM-DB: integrated Ethiopian traditional herbal medicine and phytochemicals database. Background Recently, there has been an increasing tendency to go back to nature in search of new medicines. To facilitate this, a great deal of effort has been made to compile information on natural products worldwide, and as a result, many ethnic-based traditional medicine databases have been developed. In Ethiopia, there are more than 80 ethnic groups, each having their indigenous knowledge on the use of traditional medicine. About 80% of the population uses traditional medicine for primary health care. Despite this, there is no structured online database for Ethiopian traditional medicine, which limits natural products based drug discovery researches using natural products from this country. Description To develop ETM-DB, online research articles, theses, books, and public databases containing Ethiopian herbal medicine and phytochemicals information were searched. These resources were thoroughly inspected and the necessary data were extracted. Then, we developed a comprehensive online relational database which contains information on 1054 Ethiopian medicinal herbs with 1465 traditional therapeutic uses, 573 multi-herb prescriptions, 4285 compounds, 11,621 human target gene/proteins, covering 5779 herb-phenotype, 1879 prescription-herb, 16,426 herb-compound, 105,202 compound-phenotype, 162,632 compound-gene/protein, and 16,584 phenotype-gene/protein relationships. Using various cheminformatics tools, we obtained predicted physicochemical and absorption, distribution, metabolism, excretion, and toxicity (ADMET) properties of ETM-DB compounds. We also evaluated drug-likeness properties of these compounds using FAF-Drugs4 webserver. From the 4285 compounds, 4080 of them passed the FAF-Drugs4 input data curation stage, of which 876 were found to have acceptable drug-likeness properties. Conclusion ETM-DB is the largest, freely accessible, web-based integrated resource on Ethiopian traditional medicine. It provides traditional herbal medicine entities and their relationships in well-structured forms including reference to the sources. The ETM-DB website interface allows users to search the entities using various options provided by the search menu. We hope that our database will expedite drug discovery and development researches from Ethiopian natural products as it contains information on the chemical composition and related human target gene/proteins. The current version of ETM-DB is openly accessible at http://biosoft.kaist.ac.kr/etm .",ETM-DB,0.991336733,NA,0,ETM-DB,0.991336733,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/14/2019 +27481021,http://www.etoxproject.eu,"The eTOX Library of Public Resources for in Silico Toxicity Prediction. (1000-1500 characters) In spite of the increasing amount of public access resources that offer original data related to drug toxicology, the successful exploitation of such data for the development of in silico predictive models is still limited by the quality of the data available, its integrability and its coverage for each toxicity endpoint. This work describes the strategy developed by the IMI eTOX consortium for identifying and compiling data and other related resources from the biomedical literature and a wide spectrum of public on-line sources. The main result of this effort is a large web-based structured library containing links to articles of toxicological relevance (data that can be used for modeling purposes, computational models, and toxicity mechanisms), public databases, standardized vocabularies and modeling tools. All this material has been manually reviewed, systematically evaluated and grouped into different categories. The library has been made public at the eTOX website (http://www.etoxproject.eu/), where it is updated on a monthly basis, constituting a useful resource for affording the in silico toxicity prediction of novel drug candidates.",eTOX,0.934810936,NA,0,eTOX,0.934810936,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/11/2013 +31887789,http://klab.sjtu.edu.cn/enhancer,"ETph: enhancers and their targets in pig and human database. Enhancers, as the genomic non-coding sequences, play a key role in the activation of gene expression. They have been widely identified in the human genome. Pig is an important biomedical model for human health. Few studies have been performed to explore the enhancers in the pig genome. The human enhancer information may be useful to identify enhancers in the pig genome. In addition, the genetic background of pig traits could be useful to annotate human enhancers and diseases. Thus, in order to further study enhancers and their potential roles in human and pig, we developed a public database, ETph (Enhancers and their Targets in pig and human). ETph integrates the information on human enhancers, pig putative enhancers, target genes, pig QTL terms, human diseases, GO terms and the KEGG pathway. A total of 25√ɬÉ√ǬÇ√ɬÇ√Ǭ†182 enhancers were identified in the pig genome using the human homology sequence information. Among them, 6232 high-confidence enhancers were used to build the ETph. ETph provides a convenient platform to search, browse and download data. Moreover, a web-based analytical tool was designed to visualize networks and topology graphs among pig putative enhancers, target genes, pig QTL traits and human diseases. ETph might provide a useful tool for researchers to investigate the genetic background of pig traits and human diseases. ETph is freely accessible at http://klab.sjtu.edu.cn/enhancer/.",ETph,0.99349612,NA,0,ETph,0.99349612,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/30/2019 +22102771,http://www.inbiosis.ukm.my/eudbase,"EuDBase: An online resource for automated EST analysis pipeline (ESTFrontier) and database for red seaweed Eucheuma denticulatum. Functional genomics has proven to be an efficient tool in identifying genes involved in various biological functions. However the availability of commercially important seaweed Eucheuma denticulatum functional resources is still limited. EuDBase is the first seaweed online repository that provides integrated access to ESTs of Eucheuma denticulatum generated from samples collected from Kudat and Semporna in Sabah, Malaysia. The database stored 10,031 ESTs that are clustered and assembled into 2,275 unique transcripts (UT) and 955 singletons. Raw data were automatically processed using ESTFrontier, an in-house automated EST analysis pipeline. Data was collected in MySQL database. Web interface is implemented using PHP and it allows browsing and querying EuDBase through search engine. Data is searchable via BLAST hit, domain search, Gene Ontology or KEGG Pathway. A user-friendly interface allows the identification of sequences either using a simple text query or similarity search. The development of EuDBase is initiated to store, manage and analyze the E. denticulatum ESTs and to provide accumulative digital resources for the use of global scientific community. EuDBase is freely available from http://www.inbiosis.ukm.my/eudbase/.",EuDBase,0.996863604,NA,0,EuDBase,0.996863604,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/14/2011 +27899657,http://epd.vital-it.ch,"The eukaryotic promoter database in its 30th year: focus on non-vertebrate organisms. We present an update of the Eukaryotic Promoter Database EPD (http://epd.vital-it.ch), more specifically on the EPDnew division, which contains comprehensive organisms-specific transcription start site (TSS) collections automatically derived from next generation sequencing (NGS) data. Thanks to the abundant release of new high-throughput transcript mapping data (CAGE, TSS-seq, GRO-cap) the database could be extended to plant and fungal species. We further report on the expansion of the mass genome annotation (MGA) repository containing promoter-relevant chromatin profiling data and on improvements for the EPD entry viewers. Finally, we present a new data access tool, ChIP-Extract, which enables computational biologists to extract diverse types of promoter-associated data in numerical table formats that are readily imported into statistical analysis platforms such as R.",EPDnew,0.970172226,Eukaryotic Promoter Database,0.983197996,Eukaryotic Promoter Database,0.983197996,1,"23193273.0, 25378343.0",NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/28/2016 +25352549,http://euL1db.unice.fr,"euL1db: the European database of L1HS retrotransposon insertions in humans. Retrotransposons account for almost half of our genome. They are mobile genetics elements-also known as jumping genes--but only the L1HS subfamily of Long Interspersed Nuclear Elements (LINEs) has retained the ability to jump autonomously in modern humans. Their mobilization in germline--but also some somatic tissues--contributes to human genetic diversity and to diseases, such as cancer. Here, we present euL1db, the European database of L1HS retrotransposon insertions in humans (available at http://euL1db.unice.fr). euL1db provides a curated and comprehensive summary of L1HS insertion polymorphisms identified in healthy or pathological human samples and published in peer-reviewed journals. A key feature of euL1db is its sample--wise organization. Hence L1HS insertion polymorphisms are connected to samples, individuals, families and clinical conditions. The current version of euL1db centralizes results obtained in 32 studies. It contains >900 samples, >140,000 sample-wise insertions and almost 9000 distinct merged insertions. euL1db will help understanding the link between L1 retrotransposon insertion polymorphisms and phenotype or disease.",euL1db,0.994982398,NA,0,euL1db,0.994982398,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2014 +"23175615, 25388105, 27903906, 29761457",http://eupathdb.org,"EuPathDB: the eukaryotic pathogen database. EuPathDB (http://eupathdb.org) resources include 11 databases supporting eukaryotic pathogen genomic and functional genomic data, isolate data and phylogenomics. EuPathDB resources are built using the same infrastructure and provide a sophisticated search strategy system enabling complex interrogations of underlying data. Recent advances in EuPathDB resources include the design and implementation of a new data loading workflow, a new database supporting Piroplasmida (i.e. Babesia and Theileria), the addition of large amounts of new data and data types and the incorporation of new analysis tools. New data include genome sequences and annotation, strand-specific RNA-seq data, splice junction predictions (based on RNA-seq), phosphoproteomic data, high-throughput phenotyping data, single nucleotide polymorphism data based on high-throughput sequencing (HTS) and expression quantitative trait loci data. New analysis tools enable users to search for DNA motifs and define genes based on their genomic colocation, view results from searches graphically (i.e. genes mapped to chromosomes or isolates displayed on a map) and analyze data from columns in result tables (word cloud and histogram summaries of column content). The manuscript herein describes updates to EuPathDB since the previous report published in NAR in 2010.",EuPathDB,0.998307586,Eukaryotic Pathogen Genomics Database Resource,0.985402346,EuPathDB,0.998307586,4,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31598693,http://EuRBPDB.syshospital.org,"EuRBPDB: a comprehensive resource for annotation, functional and oncological investigation of eukaryotic RNA binding proteins (RBPs). RNA binding proteins (RBPs) are a large protein family that plays important roles at almost all levels of gene regulation through interacting with RNAs, and contributes to numerous biological processes. However, the complete list of eukaryotic RBPs including human is still unavailable. Here, we systematically identified RBPs in 162 eukaryotic species based on both computational analysis of RNA binding domains (RBDs) and large-scale RNA binding proteomic data, and established a comprehensive eukaryotic RBP database, EuRBPDB (http://EuRBPDB.syshospital.org). We identified a total of 311 571 RBPs with RBDs (corresponding to 6368 ortholog groups) and 3,651 non-canonical RBPs without known RBDs. EuRBPDB provides detailed annotations for each RBP, including basic information and functional annotation. Moreover, we systematically investigated RBPs in the context of cancer biology based on published literatures, PPI-network and large-scale omics data. To facilitate the exploration of the clinical relevance of RBPs, we additionally designed a cancer web interface to systematically and interactively display the biological features of RBPs in various types of cancers. EuRBPDB has a user-friendly web interface with browse and search functions, as well as data downloading function. We expect that EuRBPDB will be a widely-used resource and platform for both the communities of RNA biology and cancer biology.",EuRBPDB,0.998039126,NA,0,EuRBPDB,0.998039126,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25941089,http://jerlab.psych.sc.edu/NeurodevelopmentalMRIDatabase,"A database of age-appropriate average MRI templates. This article summarizes a life-span neurodevelopmental MRI database. The study of neurostructural development or neurofunctional development has been hampered by the lack of age-appropriate MRI reference volumes. This causes misspecification of segmented data, irregular registrations, and the absence of appropriate stereotaxic volumes. We have created the ""Neurodevelopmental MRI Database"" that provides age-specific reference data from 2 weeks through 89 years of age. The data are presented in fine-grained ages (e.g., 3 months intervals through 1 year; 6 months intervals through 19.5 years; 5 year intervals from 20 through 89 years). The base component of the database at each age is an age-specific average MRI template. The average MRI templates are accompanied by segmented partial volume estimates for segmenting priors, and a common stereotaxic atlas for infant, pediatric, and adult participants. The database is available online (http://jerlab.psych.sc.edu/NeurodevelopmentalMRIDatabase/).",eurodevelopmental,0.478637457,NA,0,eurodevelopmental,0.478637457,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/2/2015 +"25378340, 29161421, 33180112",http://europepmc.org,"Europe PMC: a full-text literature database for the life sciences and platform for innovation. This article describes recent developments of Europe PMC (http://europepmc.org), the leading database for life science literature. Formerly known as UKPMC, the service was rebranded in November 2012 as Europe PMC to reflect the scope of the funding agencies that support it. Several new developments have enriched Europe PMC considerably since then. Europe PMC now offers RESTful web services to access both articles and grants, powerful search tools such as citation-count sort order and data citation features, a service to add publications to your ORCID, a variety of export formats, and an External Links service that enables any related resource to be linked from Europe PMC content.",Europe PMC,0.938993772,NA,0,Europe PMC,0.938993772,3,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +21762488,http://www.scbi.uma.es/pindb,"EuroPineDB: a high-coverage web database for maritime pine transcriptome. Background Pinus pinaster is an economically and ecologically important species that is becoming a woody gymnosperm model. Its enormous genome size makes whole-genome sequencing approaches are hard to apply. Therefore, the expressed portion of the genome has to be characterised and the results and annotations have to be stored in dedicated databases. Description EuroPineDB is the largest sequence collection available for a single pine species, Pinus pinaster (maritime pine), since it comprises 951 641 raw sequence reads obtained from non-normalised cDNA libraries and high-throughput sequencing from adult (xylem, phloem, roots, stem, needles, cones, strobili) and embryonic (germinated embryos, buds, callus) maritime pine tissues. Using open-source tools, sequences were optimally pre-processed, assembled, and extensively annotated (GO, EC and KEGG terms, descriptions, SNPs, SSRs, ORFs and InterPro codes). As a result, a 10.5√ɬÉ√ǬÉ√ɬÇ√Ǭó P. pinaster genome was covered and assembled in 55 322 UniGenes. A total of 32 919 (59.5%) of P. pinaster UniGenes were annotated with at least one description, revealing at least 18 466 different genes. The complete database, which is designed to be scalable, maintainable, and expandable, is freely available at: http://www.scbi.uma.es/pindb/. It can be retrieved by gene libraries, pine species, annotations, UniGenes and microarrays (i.e., the sequences are distributed in two-colour microarrays; this is the only conifer database that provides this information) and will be periodically updated. Small assemblies can be viewed using a dedicated visualisation tool that connects them with SNPs. Any sequence or annotation set shown on-screen can be downloaded. Retrieval mechanisms for sequences and gene annotations are provided. Conclusions The EuroPineDB with its integrated information can be used to reveal new knowledge, offers an easy-to-use collection of information to directly support experimental work (including microarray hybridisation), and provides deeper knowledge on the maritime pine transcriptome.",EuroPineDB,0.993739247,NA,0,EuroPineDB,0.993739247,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/15/2011 +26179317,http://ssl2.isped.u-bordeaux2.fr/eva_003,"EV@LUTIL: An open access database on occupational exposures to asbestos and man-made mineral fibres. Objectives The aim of Evalutil is to document occupational exposure to asbestos and man-made mineral fibers. Methods These databases provide grouped descriptive and metrological data from observed situations of occupational exposure, collected through the analysis of scientific articles and technical reports by industrial hygienists. Results Over 5,000 measurements were collected. We describe the occupations, economic activities, fiber-containing products, and operations on them that have been documented most often. Graphical measurement syntheses of these data show that the situations presented for asbestos and RCF, except mineral wools, report fiber concentrations mainly above historical occupational exposure limits. Conclusion Free access to these data in French and in English on the Internet (https://ssl2.isped.u-bordeaux2.fr/eva_003/) helps public health and prevention professionals to identify and characterize occupational exposures to fibers. Extended recently to nanoscale particles, Evalutil continues to contribute to the improvement of knowledge about exposure to inhaled particles and the health risks associated with them.",Evalutil,0.994965792,NA,0,Evalutil,0.994965792,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/14/2015 +"28985416, 30945202",http://biophy.dzu.edu.cn/EVLncRNAs,"EVLncRNAs: a manually curated database for long non-coding RNAs validated by low-throughput experiments. Long non-coding RNAs (lncRNAs) play important functional roles in various biological processes. Early databases were utilized to deposit all lncRNA candidates produced by high-throughput experimental and/or computational techniques to facilitate classification, assessment and validation. As more lncRNAs are validated by low-throughput experiments, several databases were established for experimentally validated lncRNAs. However, these databases are small in scale (with a few hundreds of lncRNAs only) and specific in their focuses (plants, diseases or interactions). Thus, it is highly desirable to have a comprehensive dataset for experimentally validated lncRNAs as a central repository for all of their structures, functions and phenotypes. Here, we established EVLncRNAs by curating lncRNAs validated by low-throughput experiments (up to 1 May 2016) and integrating specific databases (lncRNAdb, LncRANDisease, Lnc2Cancer and PLNIncRBase) with additional functional and disease-specific information not covered previously. The current version of EVLncRNAs contains 1543 lncRNAs from 77 species that is 2.9 times larger than the current largest database for experimentally validated lncRNAs. Seventy-four percent lncRNA entries are partially or completely new, comparing to all existing experimentally validated databases. The established database allows users to browse, search and download as well as to submit experimentally validated lncRNAs. The database is available at http://biophy.dzu.edu.cn/EVLncRNAs.",EVLncRNAs,0.995015562,NA,0,EVLncRNAs,0.995015562,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30335161,http://bioinfo.life.hust.edu.cn/EVmiRNA,"EVmiRNA: a database of miRNA profiling in extracellular vesicles. Extracellular vesicles (EVs), such as exosomes and microvesicles, acted as cell-to-cell communication vectors and potential biomarkers for diseases. microRNAs (miRNAs) are the most well studied molecules in EVs, thus a comprehensive investigation of miRNA expression profiles in EVs will be helpful to explore their functions and biomarkers. We curated 462 small RNA sequencing samples of EVs from 17 sources/diseases and constructed the EVmiRNA database (http://bioinfo.life.hust.edu.cn/EVmiRNA) to show the miRNA expression profiles. We found >1000 miRNAs expressed in these EVs and detected specific miRNAs for EVs of each source/disease. EVmiRNA provides three functional modules: (i) the miRNA expression profiles and the sample information of EVs from different sources (such as blood, breast milk etc.); (ii) the specifically expressed miRNAs in different EVs that would be helpful for biomarker identification; (iii) the miRNA annotations including the miRNA expression in EVs and TCGA cancer types, miRNA pathway regulations as well as miRNA function and publications. EVmiRNA has a user-friendly web interface with powerful browse and search functions, as well as data downloading. It is the first database focusing on miRNA expression profiles in EVs and will be useful for the research and application community of EV biomarker, miRNA function and liquid biopsy.",EVmiRNA,0.996470809,NA,0,EVmiRNA,0.996470809,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +23977990,http://biomi.cdc.go.kr/EvoSNP,"EvoSNP-DB: A database of genetic diversity in East Asian populations. Genome-wide association studies (GWAS) have become popular as an approach for the identification of large numbers of phenotype-associated variants. However, differences in genetic architecture and environmental factors mean that the effect of variants can vary across populations. Understanding population genetic diversity is valuable for the investigation of possible population specific and independent effects of variants. EvoSNP-DB aims to provide information regarding genetic diversity among East Asian populations, including Chinese, Japanese, and Korean. Non-redundant SNPs (1.6 million) were genotyped in 54 Korean trios (162 samples) and were compared with 4 million SNPs from HapMap phase II populations. EvoSNP-DB provides two user interfaces for data query and visualization, and integrates scores of genetic diversity (Fst and VarLD) at the level of SNPs, genes, and chromosome regions. EvoSNP-DB is a web-based application that allows users to navigate and visualize measurements of population genetic differences in an interactive manner, and is available online at [http://biomi.cdc.go.kr/EvoSNP/].",EvoSNP-DB,0.993930091,NA,0,EvoSNP-DB,0.993930091,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2013 +"24009897, 25388151",http://evpedia.info,"EVpedia: an integrated database of high-throughput data for systemic analyses of extracellular vesicles. . Secretion of extracellular vesicles is a general cellular activity that spans the range from simple unicellular organisms (e.g. archaea; Gram-positive and Gram-negative bacteria) to complex multicellular ones, suggesting that this extracellular vesicle-mediated communication is evolutionarily conserved. Extracellular vesicles are spherical bilayered proteolipids with a mean diameter of 20-1,000 nm, which are known to contain various bioactive molecules including proteins, lipids, and nucleic acids. Here, we present EVpedia, which is an integrated database of high-throughput datasets from prokaryotic and eukaryotic extracellular vesicles. EVpedia provides high-throughput datasets of vesicular components (proteins, mRNAs, miRNAs, and lipids) present on prokaryotic, non-mammalian eukaryotic, and mammalian extracellular vesicles. In addition, EVpedia also provides an array of tools, such as the search and browse of vesicular components, Gene Ontology enrichment analysis, network analysis of vesicular proteins and mRNAs, and a comparison of vesicular datasets by ortholog identification. Moreover, publications on extracellular vesicle studies are listed in the database. This free web-based database of EVpedia (http://evpedia.info) might serve as a fundamental repository to stimulate the advancement of extracellular vesicle studies and to elucidate the novel functions of these complex extracellular organelles.",EVpedia,0.995006621,NA,0,EVpedia,0.995006621,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/10/2014 +31584095,http://bigd.big.ac.cn/ewas/datahub,"EWAS Data Hub: a resource of DNA methylation array data and metadata. Epigenome-Wide Association Study (EWAS) has become an effective strategy to explore epigenetic basis of complex traits. Over the past decade, a large amount of epigenetic data, especially those sourced from DNA methylation array, has been accumulated as the result of numerous EWAS projects. We present EWAS Data Hub (https://bigd.big.ac.cn/ewas/datahub), a resource for collecting and normalizing DNA methylation array data as well as archiving associated metadata. The current release of EWAS Data Hub integrates a comprehensive collection of DNA methylation array data from 75 344 samples and employs an effective normalization method to remove batch effects among different datasets. Accordingly, taking advantages of both massive high-quality DNA methylation data and standardized metadata, EWAS Data Hub provides reference DNA methylation profiles under different contexts, involving 81 tissues/cell types (that contain 25 brain parts and 25 blood cell types), six ancestry categories, and 67 diseases (including 39 cancers). In summary, EWAS Data Hub bears great promise to aid the retrieval and discovery of methylation-based biomarkers for phenotype characterization, clinical treatment and health care.",EWAS,0.916500092,NA,0,EWAS,0.916500092,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +30364969,http://bigd.big.ac.cn/ewas,"EWAS Atlas: a curated knowledgebase of epigenome-wide association studies. Epigenome-Wide Association Study (EWAS) has become increasingly significant in identifying the associations between epigenetic variations and different biological traits. In this study, we develop EWAS Atlas (http://bigd.big.ac.cn/ewas), a curated knowledgebase of EWAS that provides a comprehensive collection of EWAS knowledge. Unlike extant data-oriented epigenetic resources, EWAS Atlas features manual curation of EWAS knowledge from extensive publications. In the current implementation, EWAS Atlas focuses on DNA methylation-one of the key epigenetic marks; it integrates a large number of 329 172 high-quality EWAS associations, involving 112 tissues/cell lines and covering 305 traits, 1830 cohorts and 390 ontology entities, which are completely based on manual curation from 649 studies reported in 401 publications. In addition, it is equipped with a powerful trait enrichment analysis tool, which is capable of profiling trait-trait and trait-epigenome relationships. Future developments include regular curation of recent EWAS publications, incorporation of more epigenetic marks and possible integration of EWAS with GWAS. Collectively, EWAS Atlas is dedicated to the curation, integration and standardization of EWAS knowledge and has the great potential to help researchers dissect molecular mechanisms of epigenetic modifications associated with biological traits.",EWAS Atlas,0.979906976,NA,0,EWAS Atlas,0.979906976,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30321400,"http://www.ewas.org.cn/ewasdb, http://www.bioapp.org/ewasdb","EWASdb: epigenome-wide association study database. DNA methylation, the most intensively studied epigenetic modification, plays an important role in understanding the molecular basis of diseases. Furthermore, epigenome-wide association study (EWAS) provides a systematic approach to identify epigenetic variants underlying common diseases/phenotypes. However, there is no comprehensive database to archive the results of EWASs. To fill this gap, we developed the EWASdb, which is a part of 'The EWAS Project', to store the epigenetic association results of DNA methylation from EWASs. In its current version (v 1.0, up to July 2018), the EWASdb has curated 1319 EWASs associated with 302 diseases/phenotypes. There are three types of EWAS results curated in this database: (i) EWAS for single marker; (ii) EWAS for KEGG pathway and (iii) EWAS for GO (Gene Ontology) category. As the first comprehensive EWAS database, EWASdb has been searched or downloaded by researchers from 43 countries to date. We believe that EWASdb will become a valuable resource and significantly contribute to the epigenetic research of diseases/phenotypes and have potential clinical applications. EWASdb is freely available at http://www.ewas.org.cn/ewasdb or http://www.bioapp.org/ewasdb.",EWASdb,0.995485365,NA,0,EWASdb,0.995485365,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +24444128,http://egtc.jp,"Database for exchangeable gene trap clones: pathway and gene ontology analysis of exchangeable gene trap clone mouse lines. Gene trapping in embryonic stem (ES) cells is a proven method for large-scale random insertional mutagenesis in the mouse genome. We have established an exchangeable gene trap system, in which a reporter gene can be exchanged for any other DNA of interest through Cre/mutant lox-mediated recombination. We isolated trap clones, analyzed trapped genes, and constructed the database for Exchangeable Gene Trap Clones (EGTC) [http://egtc.jp]. The number of registered ES cell lines was 1162 on 31 August 2013. We also established 454 mouse lines from trap ES clones and deposited them in the mouse embryo bank at the Center for Animal Resources and Development, Kumamoto University, Japan. The EGTC database is the most extensive academic resource for gene-trap mouse lines. Because we used a promoter-trap strategy, all trapped genes were expressed in ES cells. To understand the general characteristics of the trapped genes in the EGTC library, we used Kyoto Encyclopedia of Genes and Genomes (KEGG) for pathway analysis and found that the EGTC ES clones covered a broad range of pathways. We also used Gene Ontology (GO) classification data provided by Mouse Genome Informatics (MGI) to compare the functional distribution of genes in each GO term between trapped genes in the EGTC mouse lines and total genes annotated in MGI. We found the functional distributions for the trapped genes in the EGTC mouse lines and for the RefSeq genes for the whole mouse genome were similar, indicating that the EGTC mouse lines had trapped a wide range of mouse genes.",EGTC,0.735953112,Exchangeable Gene Trap Clones,0.933022529,Exchangeable Gene Trap Clones,0.933022529,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/20/2014 +32862462,http://exed.biocatnet.de,"Expansin Engineering Database: A navigation and classification tool for expansins and homologues. Expansins have the remarkable ability to loosen plant cell walls and cellulose material without showing catalytic activity and therefore have potential applications in biomass degradation. To support the study of sequence-structure-function relationships and the search for novel expansins, the Expansin Engineering Database (ExED, https://exed.biocatnet.de) collected sequence and structure data on expansins from Bacteria, Fungi, and Viridiplantae, and expansin-like homologues such as carbohydrate binding modules, glycoside hydrolases, loosenins, swollenins, cerato-platanins, and EXPNs. Based on global sequence alignment and protein sequence network analysis, the sequences are highly diverse. However, many similarities were found between the expansin domains. Newly created profile hidden Markov models of the two expansin domains enable standard numbering schemes, comprehensive conservation analyses, and genome annotation. Conserved key amino acids in the expansin domains were identified, a refined classification of expansins and carbohydrate binding modules was proposed, and new sequence motifs facilitate the search of novel candidate genes and the engineering of expansins.",ExED,0.984499017,Expansin Engineering Database,0.970954204,ExED,0.984499017,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/9/2020 +32591816,http://exobcd.liumwei.org,"ExoBCD: a comprehensive database for exosomal biomarker discovery in breast cancer. . Effective and safe implementation of precision oncology for breast cancer is a vital strategy to improve patient outcomes, which relies on the application of reliable biomarkers. As 'liquid biopsy' and novel resource for biomarkers, exosomes provide a promising avenue for the diagnosis and treatment of breast cancer. Although several exosome-related databases have been developed, there is still lacking of an integrated database for exosome-based biomarker discovery. To this end, a comprehensive database ExoBCD (https://exobcd.liumwei.org) was constructed with the combination of robust analysis of four high-throughput datasets, transcriptome validation of 1191 TCGA cases and manual mining of 950 studies. In ExoBCD, approximately 20√ɬÉ√ǬÇ√ɬÇ√Ǭ†900 annotation entries were integrated from 25 external sources and 306 exosomal molecules (49 potential biomarkers and 257 biologically interesting molecules). The latter could be divided into 3 molecule types, including 121 mRNAs, 172 miRNAs and 13 lncRNAs. Thus, the well-linked information about molecular characters, experimental biology, gene expression patterns, overall survival, functional evidence, tumour stage and clinical use were fully integrated. As a data-driven and literature-based paradigm proposed of biomarker discovery, this study also demonstrated the corroborative analysis and identified 36 promising molecules, as well as the most promising prognostic biomarkers, IGF1R and FRS2. Taken together, ExoBCD is the first well-corroborated knowledge base for exosomal studies of breast cancer. It not only lays a foundation for subsequent studies but also strengthens the studies of probing molecular mechanisms, discovering biomarkers and developing meaningful clinical use.",ExoBCD,0.998073876,NA,0,ExoBCD,0.998073876,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +26434508,"http://www.exocarta.org, http://www.funrich.org","ExoCarta: A Web-Based Compendium of Exosomal Cargo. Exosomes are membranous vesicles that are released by a variety of cells into the extracellular microenvironment and are implicated in intercellular communication. As exosomes contain RNA, proteins and lipids, there is a significant interest in characterizing the molecular cargo of exosomes. Here, we describe ExoCarta (http://www.exocarta.org), a manually curated Web-based compendium of exosomal proteins, RNAs and lipids. Since its inception, the database has been highly accessed (>54,000 visitors from 135 countries). The current version of ExoCarta hosts 41,860 proteins, >7540 RNA and 1116 lipid molecules from more than 286 exosomal studies annotated with International Society for Extracellular Vesicles minimal experimental requirements for definition of extracellular vesicles. Besides, ExoCarta features dynamic protein-protein interaction networks and biological pathways of exosomal proteins. Users can download most often identified exosomal proteins based on the number of studies. The downloaded files can further be imported directly into FunRich (http://www.funrich.org) tool for additional functional enrichment and interaction network analysis.",ExoCarta,0.996231556,NA,0,ExoCarta,0.996231556,1,"21989406.0, 24009883.0","21989406.0, 24009883.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,10/3/2015 +"21989406, 24009883",http://www.exocarta.org,"ExoCarta 2012: database of exosomal proteins, RNA and lipids. Exosomes are membraneous nanovesicles of endocytic origin released by most cell types from diverse organisms; they play a critical role in cell-cell communication. ExoCarta (http://www.exocarta.org) is a manually curated database of exosomal proteins, RNA and lipids. The database catalogs information from both published and unpublished exosomal studies. The mode of exosomal purification and characterization, the biophysical and molecular properties are listed in the database aiding biomedical scientists in assessing the quality of the exosomal preparation and the corresponding data obtained. Currently, ExoCarta (Version 3.1) contains information on 11,261 protein entries, 2375 mRNA entries and 764 miRNA entries that were obtained from 134 exosomal studies. In addition to the data update, as a new feature, lipids identified in exosomes are added to ExoCarta. We believe that this free web-based community resource will aid researchers in identifying molecular signatures (proteins/RNA/lipids) that are specific to certain tissue/cell type derived exosomes and trigger new exosomal studies.",ExoCarta,0.995963633,NA,0,ExoCarta,0.995963633,2,26434508,26434508,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,4/16/2012 +32681912,http://www.exocerna-atlas.com/exoceRNA,"ExoceRNA atlas: A database of cancer ceRNAs in human blood exosomes. Aims Competing endogenous RNAs (ceRNAs) play essential roles in cancer pathogenesis and those in exosomes have been the promising biomarkers for cancer diagnose and therapy. We aim to identify potential active ceRNA pairs in cancer blood exosomes by combining TCGA and exoRBase. Main methods Two strict screening criteria were implemented, including hypergeometric test on the targets predicted by RNA22 for differential miRNAs and Pearson test on the candidate mRNAs and lncRNAs for each cancer. Then2638292, 4925485 and 70669 ceRNAs in blood exosomes are available for colorectal cancer (CRC), hepatocellular carcinoma (HCC) and pancreatic adenocarcinoma (PAAD), respectively. Key findings A comprehensive functional analysis on differential miRNAs in cancer blood exosomes indicates that they play important roles in development of cancer by degrading or inhibiting the post-transcription translation level of mRNA or by acting as mediators to regulate the expression of mRNA. Topological and biological functional analysis of ceRNA networks demonstrate that hub ceRNAs involve in cancer-related biological pathways and processes, so as to influence the occurrence and development of cancer and would be the potential biomarkers for three cancers. Finally, we designed a web-accessible database, ExoceRNA Atlas (https://www.exocerna-atlas.com/exoceRNA#/) as a repository of ceRNAs in blood exosomes. It can friendly search, browse and visualize ceRNA networks of the query genes along with giving the detailed functional analysis results. The entire ceRNA data can also be freely downloaded. Significance ExoceRNA Atlas will serve as a powerful public resource for identifying ceRNAs and greatly deepen our understanding their functions in cancer exosomes.",ExoceRNA Atlas,0.949007857,NA,0,ExoceRNA Atlas,0.949007857,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/15/2020 +31642488,http://ccsm.uth.edu/ExonSkipDB,"ExonSkipDB: functional annotation of exon skipping event in human. Exon skipping (ES) is reported to be the most common alternative splicing event due to loss of functional domains/sites or shifting of the open reading frame (ORF), leading to a variety of human diseases and considered therapeutic targets.√ɬÉ√ǬÇ√ɬÇ√Ǭ†To date, systematic and intensive annotations of ES events based on the skipped exon units in cancer and normal tissues are not available. Here, we built ExonSkipDB, the ES annotation database available at https://ccsm.uth.edu/ExonSkipDB/, aiming to provide a resource and reference for functional annotation of ES events in multiple cancer and tissues to identify therapeutically targetable genes in individual exon units. We collected 14 272 genes that have 90 616 and 89 845 ES events across 33 cancer types and 31 normal tissues from The Cancer Genome Atlas (TCGA) and Genotype-Tissue Expression (GTEx). For the ES events, we performed multiple functional annotations. These include ORF assignment of exon skipped transcript, studies of lost protein functional features due to ES events, and studies of exon skipping events associated with mutations and methylations based on multi-omics evidence. ExonSkipDB will be a unique resource for cancer and drug research communities to identify therapeutically targetable exon skipping events.",ExonSkipDB,0.997633815,NA,0,ExonSkipDB,0.997633815,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +30053265,http://www.exoRBase.org,"exoRBase: a database of circRNA, lncRNA and mRNA in human blood exosomes. Exosomes, which are nanosized endocytic vesicles that are secreted by most cells, contain an abundant cargo of different RNA species that can modulate the behavior of recipient cells and may be used as circulating biomarkers for diseases. Here, we develop a web-accessible database (http://www.exoRBase.org), exoRBase, which is a repository of circular RNA (circRNA), long non-coding RNA (lncRNA) and messenger RNA (mRNA) derived from RNA-seq data analyses of human blood exosomes. Experimental validations from the published literature are also included. exoRBase features the integration and visualization of RNA expression profiles based on normalized RNA-seq data spanning both normal individuals and patients with different diseases. exoRBase aims to collect and characterize all long RNA species in human blood exosomes. The first release of exoRBase contains 58 330 circRNAs, 15 501 lncRNAs and 18 333 mRNAs. The annotation, expression level and possible original tissues are provided. exoRBase will aid researchers in identifying molecular signatures in blood exosomes and will trigger new exosomal biomarker discovery and functional implication for human diseases.",exoRBase,0.996153355,NA,0,exoRBase,0.996153355,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22661580,http://www.expasy.org,"ExPASy: SIB bioinformatics resource portal. ExPASy (http://www.expasy.org) has worldwide reputation as one of the main bioinformatics resources for proteomics. It has now evolved, becoming an extensible and integrative portal accessing many scientific resources, databases and software tools in different areas of life sciences. Scientists can henceforth access seamlessly a wide range of resources in many different domains, such as proteomics, genomics, phylogeny/evolution, systems biology, population genetics, transcriptomics, etc. The individual resources (databases, web-based and downloadable software tools) are hosted in a 'decentralized' way by different groups of the SIB Swiss Institute of Bioinformatics and partner institutions. Specifically, a single web portal provides a common entry point to a wide range of resources developed and operated by different SIB groups and external institutions. The portal features a search function across 'selected' resources. Additionally, the availability and usage of resources are monitored. The portal is aimed for both expert users and people who are not familiar with a specific domain in life sciences. The new web interface provides, in particular, visual guidance for newcomers to ExPASy.",ExPASy,0.99731946,NA,0,ExPASy,0.99731946,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/31/2012 +25708775,http://expath.itps.ncku.edu.tw,"EXPath: a database of comparative expression analysis inferring metabolic pathways for plants. Background In general, the expression of gene alters conditionally to catalyze a specific metabolic pathway. Microarray-based datasets have been massively produced to monitor gene expression levels in parallel with numerous experimental treatments. Although several studies facilitated the linkage of gene expression data and metabolic pathways, none of them are amassed for plants. Moreover, advanced analysis such as pathways enrichment or how genes express under different conditions is not rendered. Description Therefore, EXPath was developed to not only comprehensively congregate the public microarray expression data from over 1000 samples in biotic stress, abiotic stress, and hormone secretion but also allow the usage of this abundant resource for coexpression analysis and differentially expression genes (DEGs) identification, finally inferring the enriched KEGG pathways and gene ontology (GO) terms of three model plants: Arabidopsis thaliana, Oryza sativa, and Zea mays. Users can access the gene expression patterns of interest under various conditions via five main functions (Gene Search, Pathway Search, DEGs Search, Pathways/GO Enrichment, and Coexpression analysis) in EXPath, which are presented by a user-friendly interface and valuable for further research. Conclusions In conclusion, EXPath, freely available at http://expath.itps.ncku.edu.tw, is a database resource that collects and utilizes gene expression profiles derived from microarray platforms under various conditions to infer metabolic pathways for plants.",EXPath,0.997518301,NA,0,EXPath,0.997518301,1,NA,32898258,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/21/2015 +32898258,http://EXPath.itps.ncku.edu.tw,"EXPath 2.0: An Updated Database for Integrating High-Throughput Gene Expression Data with Biological Pathways. Co-expressed genes tend to have regulatory relationships and participate in similar biological processes. Construction of gene correlation networks from microarray or RNA-seq expression data has been widely applied to study transcriptional regulatory mechanisms and metabolic pathways under specific conditions. Furthermore, since transcription factors (TFs) are critical regulators of gene expression, it is worth investigating TFs on the promoters of co-expressed genes. Although co-expressed genes and their related metabolic pathways can be easily identified from previous resources, such as EXPath and EXPath Tool, this information is not simultaneously available to identify their regulatory TFs. EXPath 2.0 is an updated database for the investigation of regulatory mechanisms in various plant metabolic pathways with 1,881 microarray and 978 RNA-seq samples. There are six significant improvements in EXPath 2.0: (i) the number of species has been extended from three to six to include Arabidopsis, rice, maize, Medicago, soybean and tomato; (ii) gene expression at various developmental stages have been added; (iii) construction of correlation networks according to a group of genes is available; (iv) hierarchical figures of the enriched Gene Ontology (GO) terms are accessible; (v) promoter analysis of genes in a metabolic pathway or correlation network is provided; and (vi) user's gene expression data can be uploaded and analyzed. Thus, EXPath 2.0 is an updated platform for investigating gene expression profiles and metabolic pathways under specific conditions. It facilitates users to access the regulatory mechanisms of plant biological processes. The new version is available at http://EXPath.itps.ncku.edu.tw.",EXPath,0.960307598,NA,0,EXPath,0.960307598,1,NA,25708775,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/1/2020 +"27924041, 31724701",http://exposome-explorer.iarc.fr,"Exposome-Explorer: a manually-curated database on biomarkers of exposure to dietary and environmental factors. Exposome-Explorer (http://exposome-explorer.iarc.fr) is the first database dedicated to biomarkers of exposure to environmental risk factors. It contains detailed information on the nature of biomarkers, their concentrations in various human biospecimens, the study population where measured and the analytical techniques used for measurement. It also contains correlations with external exposure measurements and data on biological reproducibility over time. The data in Exposome-Explorer was manually collected from peer-reviewed publications and organized to make it easily accessible through a web interface for in-depth analyses. The database and the web interface were developed using the Ruby on Rails framework. A total of 480 publications were analyzed and 10 510 concentration values in blood, urine and other biospecimens for 692 dietary and pollutant biomarkers were collected. Over 8000 correlation values between dietary biomarker levels and food intake as well as 536 values of biological reproducibility over time were also compiled. Exposome-Explorer makes it easy to compare the performance between biomarkers and their fields of application. It should be particularly useful for epidemiologists and clinicians wishing to select panels of biomarkers that can be used in biomonitoring studies or in exposome-wide association studies, thereby allowing them to better understand the etiology of chronic diseases.",Exposome-Explorer,0.96887961,NA,0,Exposome-Explorer,0.96887961,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +29337142,http://www.iupui.edu/√ɬÉ√ǬÉ√ɬÇ√ǬÉ,"Express: A database of transcriptome profiles encompassing known and novel transcripts across multiple development stages in eye tissues. Advances in sequencing have facilitated nucleotide-resolution genome-wide transcriptomic profiles across multiple mouse eye tissues. However, these RNA sequencing (RNA-seq) based eye developmental transcriptomes are not organized for easy public access, making any further analysis challenging. Here, we present a new database ""Express"" (http://www.iupui.edu/√ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭºsysbio/express/) that unifies various mouse lens and retina RNA-seq data and provides user-friendly visualization of the transcriptome to facilitate gene discovery in the eye. We obtained RNA-seq data encompassing 7 developmental stages of lens in addition to that on isolated lens epithelial and fibers, as well as on 11 developmental stages of retina/isolated retinal rod photoreceptor cells from publicly available wild-type mouse datasets. These datasets were pre-processed, aligned, quantified and normalized for expression levels of known and novel transcripts using a unified expression quantification framework. Express provides heatmap and browser view allowing easy navigation of the genomic organization of transcripts or gene loci. Further, it allows users to search candidate genes and export both the visualizations and the embedded data to facilitate downstream analysis. We identified total of >81,000 transcripts in the lens and >178,000 transcripts in the retina across all the included developmental stages. This analysis revealed that a significant number of the retina-expressed transcripts are novel. Expression of several transcripts in the lens and retina across multiple developmental stages was independently validated by RT-qPCR for established genes such as Pax6 and Lhx2 as well as for new candidates such as Elavl4, Rbm5, Pabpc1, Tia1 and Tubb2b. Thus, Express serves as an effective portal for analyzing pruned RNA-seq expression datasets presently collected for the lens and retina. It will allow a wild-type context for the detailed analysis of targeted gene-knockout mouse ocular defect models and facilitate the prioritization of candidate genes from Exome-seq data of eye disease patients.",Express,0.922874272,NA,0,Express,0.922874272,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/11/2018 +"24304889, 26481351, 29165655",http://www.ebi.ac.uk/gxa,"Expression Atlas update--a database of gene and transcript expression from microarray- and sequencing-based functional genomics experiments. Expression Atlas (http://www.ebi.ac.uk/gxa) is a value-added database providing information about gene, protein and splice variant expression in different cell types, organism parts, developmental stages, diseases and other biological and experimental conditions. The database consists of selected high-quality microarray and RNA-sequencing experiments from ArrayExpress that have been manually curated, annotated with Experimental Factor Ontology terms and processed using standardized microarray and RNA-sequencing analysis methods. The new version of Expression Atlas introduces the concept of 'baseline' expression, i.e. gene and splice variant abundance levels in healthy or untreated conditions, such as tissues or cell types. Differential gene expression data benefit from an in-depth curation of experimental intent, resulting in biologically meaningful 'contrasts', i.e. instances of differential pairwise comparisons between two sets of biological replicates. Other novel aspects of Expression Atlas are its strict quality control of raw experimental data, up-to-date RNA-sequencing analysis methods, expression data at the level of gene sets, as well as genes and a more powerful search interface designed to maximize the biological value provided to the user.",Expression Atlas,0.897050291,NA,0,Expression Atlas,0.897050291,3,22064864,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,1/1/2018 +28529706,http://bioinformatics.cs.vt.edu/expresso,"Expresso: A database and web server for exploring the interaction of transcription factors and their target genes in Arabidopsis thaliana using ChIP-Seq peak data. Motivation: The increasing availability of chromatin immunoprecipitation sequencing (ChIP-Seq) data enables us to learn more about the action of transcription factors in the regulation of gene expression. Even though in vivo transcriptional regulation often involves the concerted action of more than one transcription factor, the format of each individual ChIP-Seq dataset usually represents the action of a single transcription factor. Therefore, a relational database in which available ChIP-Seq datasets are curated is essential. Results: We present Expresso (database and webserver) as a tool for the collection and integration of available Arabidopsis ChIP-Seq peak data, which in turn can be linked to a user's gene expression data. Known target genes of transcription factors were identified by motif analysis of publicly available GEO ChIP-Seq data sets. Expresso currently provides three services: 1) Identification of target genes of a given transcription factor; 2) Identification of transcription factors that regulate a gene of interest; 3) Computation of correlation between the gene expression of transcription factors and their target genes. Availability: Expresso is freely available at http://bioinformatics.cs.vt.edu/expresso/.",Expresso,0.997199416,NA,0,Expresso,0.997199416,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/28/2017 +25152233,http://biotech.bmi.ac.cn/ExpTreeDB,"ExpTreeDB: web-based query and visualization of manually annotated gene expression profiling experiments of human and mouse from GEO. Motivation Numerous public microarray datasets are valuable resources for the scientific communities. Several online tools have made great steps to use these data by querying related datasets with users' own gene signatures or expression profiles. However, dataset annotation and result exhibition still need to be improved. Results ExpTreeDB is a database that allows for queries on human and mouse microarray experiments from Gene Expression Omnibus with gene signatures or profiles. Compared with similar applications, ExpTreeDB pays more attention to dataset annotations and result visualization. We introduced a multiple-level annotation system to depict and organize original experiments. For example, a tamoxifen-treated cell line experiment is hierarchically annotated as 'agent√ɬÉ√Ǭ¢√ɬÇ√ǬÜ√ɬÇ√Ǭídrug√ɬÉ√Ǭ¢√ɬÇ√ǬÜ√ɬÇ√Ǭíestrogen receptor antagonist√ɬÉ√Ǭ¢√ɬÇ√ǬÜ√ɬÇ√Ǭítamoxifen'. Consequently, retrieved results are exhibited by an interactive tree-structured graphics, which provide an overview for related experiments and might enlighten users on key items of interest. Availability and implementation The database is freely available at http://biotech.bmi.ac.cn/ExpTreeDB. Web site is implemented in Perl, PHP, R, MySQL and Apache.",ExpTreeDB,0.995681107,NA,0,ExpTreeDB,0.995681107,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/24/2014 +30951672,http://exrna-atlas.org,"exRNA Atlas Analysis Reveals Distinct Extracellular RNA Cargo Types and Their Carriers Present across Human Biofluids. To develop a map of cell-cell communication mediated by extracellular RNA (exRNA), the NIH Extracellular RNA Communication Consortium created the exRNA Atlas resource (https://exrna-atlas.org). The Atlas version 4P1 hosts 5,309 exRNA-seq and exRNA qPCR profiles from 19 studies and a suite of analysis and visualization tools. To analyze variation between profiles, we apply computational deconvolution. The analysis leads to a model with six exRNA cargo types (CT1, CT2, CT3A, CT3B, CT3C, CT4), each detectable in multiple biofluids (serum, plasma, CSF, saliva, urine). Five of the cargo types associate with known vesicular and non-vesicular (lipoprotein and ribonucleoprotein) exRNA carriers. To validate utility of this model, we re-analyze an exercise response study by√ɬÉ√ǬÇ√ɬÇ√Ǭ†deconvolution to identify physiologically relevant response pathways that were not detected previously. To enable wide application of this model, as part of the√ɬÉ√ǬÇ√ɬÇ√Ǭ†exRNA Atlas resource, we provide tools for deconvolution and analysis of user-provided case-control studies.",exRNA Atlas,0.840340992,NA,0,exRNA Atlas,0.840340992,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/1/2019 +23696792,http://extrem.igib.res.in,"ExtremeDB: a unified web repository of extremophilic archaea and bacteria. Extremophiles are the microorganisms which can survive under extreme conditions of temperature, pressure, pH, salinity etc. They have gained much attention for their potential role in biotechnological and industrial applications. The large amount of experimental data in the literature is so diverse, that it becomes difficult and time consuming for the researcher to implement it in various areas of research. Therefore, a systematic arrangement of data and redirection in a similar fashion through web interface can assist researchers in analyzing the data as per their requirement. ExtremeDB is a freely available web based relational database which integrates general characteristics, genome-proteome information, industrial applications and recent scientific investigations of the seven major groups of 865 extremophillic microorganisms. The search options are user friendly and analyses tools such as Compare and Extreme BLAST have been incorporated for comparative analysis of two or more extremophiles and determining the sequence similarity of a given protein/nucleotide in relation to other extremophiles respectively. The effort put forth herein in the form of database, would open up new avenues on the potential utility of extremophiles in applied research. ExtremeDB is freely accessible via http://extrem.igib.res.in.",ExtremeDB,0.997353792,NA,0,ExtremeDB,0.997353792,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/16/2013 +33995920,http://exve.icc.fiocruz.br,"ExVe: The knowledge base of orthologous proteins identified in fungal extracellular vesicles. Extracellular vesicles (EVs) are double-membrane particles associated with intercellular communication. Since the discovery of EV production in the fungus Cryptococcus neoformans, the importance of EV release in its physiology and pathogenicity has been investigated. To date, few studies have investigated the proteomic content of EVs from multiple fungal species. Our main objective was to use an orthology approach to compare proteins identified by EV shotgun proteomics in 8 pathogenic and 1 nonpathogenic species. Using protein information from the UniProt and FungiDB databases, we integrated data for 11,433 hits in fungal EVs with an orthology perspective, resulting in 3,834 different orthologous groups. OG6_100083 (Hsp70 Pfam domain) was the unique orthologous group that was identified for all fungal species. Proteins with this protein domain are associated with the stress response, survival and morphological changes in different fungal species. Although no pathogenic orthologous group was found, we identified 5 orthologous groups exclusive to S. cerevisiae. Using the criteria of at least 7 pathogenic fungi to define a cluster, we detected the 4 unique pathogenic orthologous groups. Taken together, our data suggest that Hsp70-related proteins might play a key role in fungal EVs, regardless of the pathogenic status. Using an orthology approach, we identified at least 4 protein domains that could be novel therapeutic targets against pathogenic fungi. Our results were compiled in the herein described ExVe database, which is publicly available at http://exve.icc.fiocruz.br.",ExVe,0.89774555,NA,0,ExVe,0.89774555,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/17/2021 +34085038,http://eyediseases.bio-data.cn,"EyeDiseases: an integrated resource for dedicating to genetic variants, gene expression and epigenetic factors of human eye diseases. Eye diseases are remarkably common and encompass a large and diverse range of morbidities that affect different components of the visual system and visual function. With advances in omics technology of eye disorders, genome-scale datasets have been rapidly accumulated in genetics and epigenetics field. However, the efficient collection and comprehensive analysis of different kinds of omics data are lacking. Herein, we developed EyeDiseases (https://eyediseases.bio-data.cn/), the first database for multi-omics data integration and interpretation of human eyes diseases. It contains 1344 disease-associated genes with genetic variation, 1774 transcription files of bulk cell expression and single-cell RNA-seq, 105 epigenomics data across 185 kinds of human eye diseases. Using EyeDiseases, we investigated SARS-CoV-2 potential tropism in eye infection and found that the SARS-CoV-2 entry factors, ACE2 and TMPRSS2 are highly correlated with cornea and keratoconus, suggest that ocular surface cells are susceptible to infection by SARS-CoV-2. Additionally, integrating analysis of Age-related macular degeneration (AMD) GWAS loci and co-expression data revealed 9 associated genes involved in HIF-1 signaling pathway and voltage-gate potassium channel complex. The EyeDiseases provides a valuable resource for accelerating the discovery and validation of candidate loci and genes contributed to the molecular diagnosis and therapeutic vulnerabilities with various eyes diseases.",EyeDiseases,0.998263299,NA,0,EyeDiseases,0.998263299,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2021 +31343654,http://eyeIntegration.nei.nih.gov,"Eye in a Disk: eyeIntegration Human Pan-Eye and Body Transcriptome Database Version 1.0. Purpose We develop an accessible and reliable RNA sequencing (RNA-seq) transcriptome database of healthy human eye tissues and a matching reactive web application to query gene expression in eye and body tissues. Methods We downloaded the raw sequence data for 1375 RNA-seq samples across 54 tissues in the Genotype-Tissue Expression (GTEx) project as a noneye reference set. We then queried several public repositories to find all healthy, nonperturbed, human eye-related tissue RNA-seq samples. The 916 eye and 1375 GTEx samples were sent into a Snakemake-based reproducible pipeline we wrote to quantify all known transcripts and genes, removes samples with poor sequence quality and mislabels, normalizes expression values across each tissue, perform 882 differential expression tests, calculate GO term enrichment, and output all as a single SQLite database file: the Eye in a Disk (EiaD) dataset. Furthermore, we rewrote the web application eyeIntegration (available in the public domain at https://eyeIntegration.nei.nih.gov) to display EiaD. Results The new eyeIntegration portal provides quick visualization of human eye-related transcriptomes published to date by database version, gene/transcript, 19 eye tissues, and 54 body tissues. As a test of the value of this unified pan-eye dataset, we showed that fetal and organoid retina are highly similar at a pan-transcriptome level, but display distinct differences in certain pathways and gene families, such as protocadherin and HOXB family members. Conclusions The eyeIntegration v1.0 web app serves the pan-human eye and body transcriptome dataset, EiaD. This offers the eye community a powerful and quick means to test hypotheses on human gene and transcript expression across 54 body and 19 eye tissues.",eyeIntegration,0.929627955,NA,0,eyeIntegration,0.929627955,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2019 +25324316,http://ezcatdb.cbrc.jp/EzCatDB,"EzCatDB: the enzyme reaction database, 2015 update. The EzCatDB database (http://ezcatdb.cbrc.jp/EzCatDB/) has emphasized manual classification of enzyme reactions from the viewpoints of enzyme active-site structures and their catalytic mechanisms based on literature information, amino acid sequences of enzymes (UniProtKB) and the corresponding tertiary structures from the Protein Data Bank (PDB). Reaction types such as hydrolysis, transfer, addition, elimination, isomerization, hydride transfer and electron transfer have been included in the reaction classification, RLCP. This database includes information related to ligand molecules on the enzyme structures in the PDB data, classified in terms of cofactors, substrates, products and intermediates, which are also necessary to elucidate the catalytic mechanisms. Recently, the database system was updated. The 3D structures of active sites for each PDB entry can be viewed using Jmol or Rasmol software. Moreover, sequence search systems of two types were developed for the EzCatDB database: EzCat-BLAST and EzCat-FORTE. EzCat-BLAST is suitable for quick searches, adopting the BLAST algorithm, whereas EzCat-FORTE is more suitable for detecting remote homologues, adopting the algorithm for FORTE protein structure prediction software. Another system, EzMetAct, is also available to searching for major active-site structures in EzCatDB, for which PDB-formatted queries can be searched.",EzCatDB,0.997173011,the enzyme reaction database,0.613930479,EzCatDB,0.997173011,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/16/2014 +22140171,http://eztaxon-e.ezbiocloud.net,"Introducing EzTaxon-e: a prokaryotic 16S rRNA gene sequence database with phylotypes that represent uncultured species. Despite recent advances in commercially optimized identification systems, bacterial identification remains a challenging task in many routine microbiological laboratories, especially in situations where taxonomically novel isolates are involved. The 16S rRNA gene has been used extensively for this task when coupled with a well-curated database, such as EzTaxon, containing sequences of type strains of prokaryotic species with validly published names. Although the EzTaxon database has been widely used for routine identification of prokaryotic isolates, sequences from uncultured prokaryotes have not been considered. Here, the next generation database, named EzTaxon-e, is formally introduced. This new database covers not only species within the formal nomenclatural system but also phylotypes that may represent species in nature. In addition to an identification function based on Basic Local Alignment Search Tool (blast) searches and pairwise global sequence alignments, a new objective method of assessing the degree of completeness in sequencing is proposed. All sequences that are held in the EzTaxon-e database have been subjected to phylogenetic analysis and this has resulted in a complete hierarchical classification system. It is concluded that the EzTaxon-e database provides a useful taxonomic backbone for the identification of cultured and uncultured prokaryotes and offers a valuable means of communication among microbiologists who routinely encounter taxonomically novel isolates. The database and its analytical functions can be found at http://eztaxon-e.ezbiocloud.net/.",EzTaxon-e,0.969534889,NA,0,EzTaxon-e,0.969534889,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/25/2011 +34220930,http://data.faang.org,"The FAANG Data Portal: Global, Open-Access, ""FAIR"", and Richly Validated Genotype to Phenotype Data for High-Quality Functional Annotation of Animal Genomes. The Functional Annotation of ANimal Genomes (FAANG) project is a worldwide coordinated action creating high-quality functional annotation of farmed and companion animal genomes. The generation of a rich genome-to-phenome resource and supporting informatic infrastructure advances the scope of comparative genomics and furthers the understanding of functional elements. The project also provides terrestrial and aquatic animal agriculture community powerful resources for supporting improvements to farmed animal production, disease resistance, and genetic diversity. The FAANG Data Portal (https://data.faang.org) ensures Findable, Accessible, Interoperable and Reusable (FAIR) open access to the wealth of sample, sequencing, and analysis data produced by an ever-growing number of FAANG consortia. It is developed and maintained by the FAANG Data Coordination Centre (DCC) at the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI). FAANG projects produce a standardised set of multi-omic assays with resulting data placed into a range of specialised open data archives. To ensure this data is easily findable and accessible by the community, the portal automatically identifies and collates all submitted FAANG data into a single easily searchable resource. The Data Portal supports direct download from the multiple underlying archives to enable seamless access to all FAANG data from within the portal itself. The portal provides a range of predefined filters, powerful predictive search, and a catalogue of sampling and analysis protocols and automatically identifies publications associated with any dataset. To ensure all FAANG data submissions are high-quality, the portal includes powerful contextual metadata validation and data submissions brokering to the underlying EMBL-EBI archives. The portal will incorporate extensive new technical infrastructure to effectively deliver and standardise FAANG's shift to single-cellomics, cell atlases, pangenomes, and novel phenotypic prediction models. The Data Portal plays a key role for FAANG by supporting high-quality functional annotation of animal genomes, through open FAIR sharing of data, complete with standardised rich metadata. Future Data Portal features developed by the DCC will support new technological developments for continued improvement for FAANG projects.",FAANG,0.989271164,Functional Annotation of,0.782491729,FAANG,0.989271164,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/17/2021 +23203885,http://factorbook.org,"Factorbook.org: a Wiki-based database for transcription factor-binding data generated by the ENCODE consortium. The Encyclopedia of DNA Elements (ENCODE) consortium aims to identify all functional elements in the human genome including transcripts, transcriptional regulatory regions, along with their chromatin states and DNA methylation patterns. The ENCODE project generates data utilizing a variety of techniques that can enrich for regulatory regions, such as chromatin immunoprecipitation (ChIP), micrococcal nuclease (MNase) digestion and DNase I digestion, followed by deeply sequencing the resulting DNA. As part of the ENCODE project, we have developed a Web-accessible repository accessible at http://factorbook.org. In Wiki format, factorbook is a transcription factor (TF)-centric repository of all ENCODE ChIP-seq datasets on TF-binding regions, as well as the rich analysis results of these data. In the first release, factorbook contains 457 ChIP-seq datasets on 119 TFs in a number of human cell lines, the average profiles of histone modifications and nucleosome positioning around the TF-binding regions, sequence motifs enriched in the regions and the distance and orientation preferences between motif sites.",factorbook,0.995431662,of,0.503504157,factorbook,0.995431662,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +27899646,http://fairdomhub.org,"FAIRDOMHub: a repository and collaboration environment for sharing systems biology research. The FAIRDOMHub is a repository for publishing FAIR (Findable, Accessible, Interoperable and Reusable) Data, Operating procedures and Models (https://fairdomhub.org/) for the Systems Biology community. It is a web-accessible repository for storing and sharing systems biology research assets. It enables researchers to organize, share and publish data, models and protocols, interlink them in the context of the systems biology investigations that produced them, and to interrogate them via API interfaces. By using the FAIRDOMHub, researchers can achieve more effective exchange with geographically distributed collaborators during projects, ensure results are sustained and preserved and generate reproducible publications that adhere to the FAIR guiding principles of data stewardship.",FAIRDOMHub,0.996229708,NA,0,FAIRDOMHub,0.996229708,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +"30407557, 33211864",http://fantom.gsc.riken.jp,"Update of the FANTOM web resource: expansion to provide additional transcriptome atlases. The FANTOM web resource (http://fantom.gsc.riken.jp/) was developed to provide easy access to the data produced by the FANTOM project. It contains the most complete and comprehensive sets of actively transcribed enhancers and promoters in the human and mouse genomes. We determined the transcription activities of these regulatory elements by CAGE (Cap Analysis of Gene Expression) for both steady and dynamic cellular states in all major and some rare cell types, consecutive stages of differentiation and responses to stimuli. We have expanded the resource by employing different assays, such as RNA-seq, short RNA-seq and a paired-end protocol for CAGE (CAGEscan), to provide new angles to study the transcriptome. That yielded additional atlases of long noncoding RNAs, miRNAs and their promoters. We have also expanded the CAGE analysis to cover rat, dog, chicken, and macaque species for a limited number of cell types. The CAGE data obtained from human and mouse were reprocessed to make them available on the latest genome assemblies. Here, we report the recent updates of both data and interfaces in the FANTOM web resource.",FANTOM,0.988861993,Functional ANnoTation Of the Mammalian genome,0.984211731,FANTOM,0.988861993,2,27794045,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2021 +25723102,http://fantom.gsc.riken.jp/5,"Gateways to the FANTOM5 promoter level mammalian expression atlas. The FANTOM5 project investigates transcription initiation activities in more than 1,000 human and mouse primary cells, cell lines and tissues using CAGE. Based on manual curation of sample information and development of an ontology for sample classification, we assemble the resulting data into a centralized data resource (http://fantom.gsc.riken.jp/5/). This resource contains web-based tools and data-access points for the research community to search and extract data related to samples, genes, promoter activities, transcription factors and enhancers across the FANTOM5 atlas.",FANTOM5,0.996881783,NA,0,FANTOM5,0.996881783,1,NA,27794045,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/5/2015 +27794045,http://fantom.gsc.riken.jp,"Update of the FANTOM web resource: high resolution transcriptome of diverse cell types in mammals. Upon the first publication of the fifth iteration of the Functional Annotation of Mammalian Genomes collaborative project, FANTOM5, we gathered a series of primary data and database systems into the FANTOM web resource (http://fantom.gsc.riken.jp) to facilitate researchers to explore transcriptional regulation and cellular states. In the course of the collaboration, primary data and analysis results have been expanded, and functionalities of the database systems enhanced. We believe that our data and web systems are invaluable resources, and we think the scientific community will benefit for this recent update to deepen their understanding of mammalian cellular organization. We introduce the contents of FANTOM5 here, report recent updates in the web resource and provide future perspectives.",FANTOM5,0.996190906,Functional Annotation of Mammalian Genomes,0.886339247,FANTOM5,0.996190906,1,"30407557.0, 33211864.0",25723102,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/27/2016 +26384373,http://ppi.bioinfo.asia.edu.tw/FARE-CAFE,"FARE-CAFE: a database of functional and regulatory elements of cancer-associated fusion events. . Chromosomal translocation (CT) is of enormous clinical interest because this disorder is associated with various major solid tumors and leukemia. A tumor-specific fusion gene event may occur when a translocation joins two separate genes. Currently, various CT databases provide information about fusion genes and their genomic elements. However, no database of the roles of fusion genes, in terms of essential functional and regulatory elements in oncogenesis, is available. FARE-CAFE is a unique combination of CTs, fusion proteins, protein domains, domain-domain interactions, protein-protein interactions, transcription factors and microRNAs, with subsequent experimental information, which cannot be found in any other CT database. Genomic DNA information including, for example, manually collected exact locations of the first and second break points, sequences and karyotypes of fusion genes are included. FARE-CAFE will substantially facilitate the cancer biologist's mission of elucidating the pathogenesis of various types of cancer. This database will ultimately help to develop 'novel' therapeutic approaches. Database URL: http://ppi.bioinfo.asia.edu.tw/FARE-CAFE.",FARE-CAFE,0.994346166,NA,0,FARE-CAFE,0.994346166,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/16/2015 +27924038,http://cbrc.kaust.edu.sa/farna,"FARNA: knowledgebase of inferred functions of non-coding RNA transcripts. Non-coding RNA (ncRNA) genes play a major role in control of heterogeneous cellular behavior. Yet, their functions are largely uncharacterized. Current available databases lack in-depth information of ncRNA functions across spectrum of various cells/tissues. Here, we present FARNA, a knowledgebase of inferred functions of 10,289 human ncRNA transcripts (2,734 microRNA and 7,555 long ncRNA) in 119 tissues and 177 primary cells of human. Since transcription factors (TFs) and TF co-factors (TcoFs) are crucial components of regulatory machinery for activation of gene transcription, cellular processes and diseases in which TFs and TcoFs are involved suggest functions of the transcripts they regulate. In FARNA, functions of a transcript are inferred from TFs and TcoFs whose genes co-express with the transcript controlled by these TFs and TcoFs in a considered cell/tissue. Transcripts were annotated using statistically enriched GO terms, pathways and diseases across cells/tissues based on guilt-by-association principle. Expression profiles across cells/tissues based on Cap Analysis of Gene Expression (CAGE) are provided. FARNA, having the most comprehensive function annotation of considered ncRNAs across widest spectrum of human cells/tissues, has a potential to greatly contribute to our understanding of ncRNA roles and their regulatory mechanisms in human. FARNA can be accessed at: http://cbrc.kaust.edu.sa/farna.",FARNA,0.99584657,NA,0,FARNA,0.99584657,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2017 +31034103,http://bioinformatics.cse.unr.edu/fat-ptm,"Functional analysis tools for post-translational modification: a post-translational modification database for analysis of proteins and metabolic pathways. Post-translational modifications (PTMs) are critical regulators of protein function, and nearly 200 different types of PTM have been identified. Advances in high-resolution mass spectrometry have led to the identification of an unprecedented number of PTM sites in numerous organisms, potentially facilitating a more complete understanding of how PTMs regulate cellular behavior. While databases have been created to house the resulting data, most of these resources focus on individual types of PTM, do not consider quantitative PTM analyses or do not provide tools for the visualization and analysis of PTM data. Here, we describe the Functional Analysis Tools for Post-Translational Modifications (FAT-PTM) database (https://bioinformatics.cse.unr.edu/fat-ptm/), which currently supports eight different types of PTM and over 49√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 PTM sites identified in large-scale proteomic surveys of the model organism Arabidopsis thaliana. The FAT-PTM database currently supports tools to visualize protein-centric PTM networks, quantitative phosphorylation site data from over 10 different quantitative phosphoproteomic studies, PTM information displayed in protein-centric metabolic pathways and groups of proteins that are co-modified by multiple PTMs. Overall, the FAT-PTM database provides users with a robust platform to share and visualize experimentally supported PTM data, develop hypotheses related to target proteins or identify emergent patterns in PTM data for signaling and metabolic pathways.",FAT-PTM,0.994561623,Functional Analysis Tools for Post-Translational Modifications,0.982620938,FAT-PTM,0.994561623,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/31/2019 +33511767,http://159.226.67.243:8080/fawmine,"FAWMine: An integrated database and analysis platform for fall armyworm genomics. Fall armyworm (Spodoptera frugiperda), a native insect species in the Americas, is rapidly becoming a major agricultural pest worldwide and is causing great damage to corn, rice, soybeans, and other crops. To control this pest, scientists have accumulated a great deal of high-throughput data of fall armyworm, and nine versions of its genomes and transcriptomes have been published. However, easily accessing and performing integrated analysis of these omics data sets is challenging. Here, we developed the Fall Armyworm Genome Database (FAWMine, http://159.226.67.243:8080/fawmine/) to maintain genome sequences, structural and functional annotations, transcriptomes, co-expression, protein interactions, homologs, pathways, and single-nucleotide variations. FAWMine provides a powerful framework that helps users to perform flexible and customized searching, present integrated data sets using diverse visualization methods, output results tables in a range of file formats, analyze candidate gene lists using multiple widgets, and query data available in other InterMine systems. Additionally, stand-alone JBrowse and BLAST services are also established, allowing the users to visualize RNA-Seq data and search genome and annotated gene sequences. Altogether, FAWMine is a useful tool for querying, visualizing, and analyzing compiled data sets rapidly and efficiently. FAWMine will be continually updated to function as a community resource for fall armyworm genomics and pest control research.",FAWMine,0.997773767,Fall Armyworm Genome Database,0.988626023,FAWMine,0.997773767,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/29/2021 +22715304,http://mail.nbfgr.res.in/fbis,"FBIS: A regional DNA barcode archival & analysis system for Indian fishes. Unlabelled DNA barcode is a new tool for taxon recognition and classification of biological organisms based on sequence of a fragment of mitochondrial gene, cytochrome c oxidase I (COI). In view of the growing importance of the fish DNA barcoding for species identification, molecular taxonomy and fish diversity conservation, we developed a Fish Barcode Information System (FBIS) for Indian fishes, which will serve as a regional DNA barcode archival and analysis system. The database presently contains 2334 sequence records of COI gene for 472 aquatic species belonging to 39 orders and 136 families, collected from available published data sources. Additionally, it contains information on phenotype, distribution and IUCN Red List status of fishes. The web version of FBIS was designed using MySQL, Perl and PHP under Linux operating platform to (a) store and manage the acquisition (b) analyze and explore DNA barcode records (c) identify species and estimate genetic divergence. FBIS has also been integrated with appropriate tools for retrieving and viewing information about the database statistics and taxonomy. It is expected that FBIS would be useful as a potent information system in fish molecular taxonomy, phylogeny and genomics. Availability The database is available for free at http://mail.nbfgr.res.in/fbis/",FBIS,0.808700919,Barcode,0.65479672,FBIS,0.808700919,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/31/2012 +34793786,http://cb.imsc.res.in/fccp,"An atlas of fragrance chemicals in children's products. Exposure to environmental chemicals during early childhood is a potential health concern. At a tender age, children are exposed to fragrance chemicals used in toys and child care products. Although there are few initiatives in Europe and United States towards monitoring and regulation of fragrance chemicals in children's products, such efforts are still lacking elsewhere. Besides there has been no systematic effort to create a database compiling the surrounding knowledge on fragrance chemicals used in children's products from published literature. Here, we built a database of Fragrance Chemicals in Children's Products (FCCP) that compiles information on 153 fragrance chemicals from published literature. The fragrance chemicals in FCCP have been classified based on their chemical structure, children's product source, chemical origin and odor profile. Moreover, we have also compiled the physicochemical properties, predicted Absorption, Distribution, Metabolism, Excretion and Toxicity (ADMET) properties, molecular descriptors and human target genes for the fragrance chemicals in FCCP. After building FCCP, we performed multiple analyses of the associated fragrance chemical space. Firstly, we assessed the regulatory status of the fragrance chemicals in FCCP through a comparative analysis with 21 chemical lists reflecting current guidelines or regulations. We find that several fragrance chemicals in children's products are potential carcinogens, endocrine disruptors, neurotoxicants, phytotoxins and skin sensitizers. Secondly, we performed a similarity network based analysis of the fragrance chemicals in children's products to reveal the high structural diversity of the associated chemical space. Lastly, we identified skin sensitizing fragrance chemicals in children's products using ToxCast assays. In a nutshell, we present a comprehensive resource and detailed analysis of fragrance chemicals in children's products highlighting the need for their better risk assessment and regulation to deliver safer products for children. FCCP is accessible at: https://cb.imsc.res.in/fccp.",FCCP,0.977487087,Fragrance Chemicals in Children's Products,0.952817567,FCCP,0.977487087,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/15/2021 +23601403,http://www.czfcdb.cz,"The new on-line Czech Food Composition Database. The new on-line Czech Food Composition Database (FCDB) was launched on http://www.czfcdb.cz in December 2010 as a main freely available channel for dissemination of Czech food composition data. The application is based on a complied FCDB documented according to the EuroFIR standardised procedure for full value documentation and indexing of foods by the LanguaL√ɬÉ√Ǭ¢√ɬÇ√ǬÑ√ɬÇ√Ǭ¢ Thesaurus. A content management system was implemented for administration of the website and performing data export (comma-separated values or EuroFIR XML transport package formats) by a compiler. Reference/s are provided for each published value with linking to available freely accessible on-line sources of data (e.g. full texts, EuroFIR Document Repository, on-line national FCDBs). LanguaL√ɬÉ√Ǭ¢√ɬÇ√ǬÑ√ɬÇ√Ǭ¢ codes are displayed within each food record as searchable keywords of the database. A photo (or a photo gallery) is used as a visual descriptor of a food item. The application is searchable on foods, components, food groups, alphabet and a multi-field advanced search.",FCDB,0.992240489,Czech Food Composition Database,0.975081468,FCDB,0.992240489,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/11/2013 +25352729,http://www.fruitcropsdd.com,"FCDD: A Database for Fruit Crops Diseases. Unlabelled Fruit Crops Diseases Database (FCDD) requires a number of biotechnology and bioinformatics tools. The FCDD is a unique bioinformatics resource that compiles information about 162 details on fruit crops diseases, diseases type, its causal organism, images, symptoms and their control. The FCDD contains 171 phytochemicals from 25 fruits, their 2D images and their 20 possible sequences. This information has been manually extracted and manually verified from numerous sources, including other electronic databases, textbooks and scientific journals. FCDD is fully searchable and supports extensive text search. The main focus of the FCDD is on providing possible information of fruit crops diseases, which will help in discovery of potential drugs from one of the common bioresource-fruits. The database was developed using MySQL. The database interface is developed in PHP, HTML and JAVA. FCDD is freely available. Availability http://www.fruitcropsdd.com/",FCDD,0.995983303,Fruit Crops Diseases Database,0.945625222,FCDD,0.995983303,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/30/2014 +22267904,http://www.bioacademy.gr/bioinformatics/projects/ProteinFusion/index.htm,"SAFE Software and FED Database to Uncover Protein-Protein Interactions using Gene Fusion Analysis. Domain Fusion Analysis takes advantage of the fact that certain proteins in a given proteome A, are found to have statistically significant similarity with two separate proteins in another proteome B. In other words, the result of a fusion event between two separate proteins in proteome B is a specific full-length protein in proteome A. In such a case, it can be safely concluded that the protein pair has a common biological function or even interacts physically. In this paper, we present the Fusion Events Database (FED), a database for the maintenance and retrieval of fusion data both in prokaryotic and eukaryotic organisms and the Software for the Analysis of Fusion Events (SAFE), a computational platform implemented for the automated detection, filtering and visualization of fusion events (both available at: http://www.bioacademy.gr/bioinformatics/projects/ProteinFusion/index.htm). Finally, we analyze the proteomes of three microorganisms using these tools in order to demonstrate their functionality.",FED,0.953724042,Fusion Events Database,0.76366665,FED,0.953724042,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/18/2011 +34626475,http://feda.sciensano.be,"Food Enzyme Database (FEDA): a web application gathering information about food enzyme preparations available on the European market. . Following the European Commission No. 1332/2008 regulation and the consequent necessity of a scientific evaluation of food enzymes (FEs) for their approval for sale on the European Union market, many FE dossiers have been submitted to the European Commission and various documents currently co-exist. In order to centralize all relevant information in one structured location that is easily accessible to support enforcement laboratories and the competent authorities, we developed a web application, called Food Enzyme Database (FEDA). FEDA allows searching and collection of information originating from many different sources in one centralized portal. Queries can be performed using key information types, which include information on the producing company, production source (strain type, genetically modified microorganism status), type of enzyme protein and evaluation status with employed evaluation criteria. The database contains all current publicly available information. Centralizing all information coupled with intuitive searching functionality also allows the generation of general statistics regarding the current market situation. FEDA is open access and is freely available at the following location: https://feda.sciensano.be. Database URL : https://feda.sciensano.be.",FEDA,0.989463806,Food Enzyme Database,0.845898256,FEDA,0.989463806,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2021 +33898816,http://webs.iiitd.edu.in/raghava/fermfoodb,"FermFooDb: A database of bioactive peptides derived from fermented foods. Globally fermented foods are in demands due to their functional and nutritional benefits. These foods are sources of probiotic organisms and bioactive peptides, various amino acids, enzymes etc. that provides numerous health benefits. FermFooDb (https://webs.iiitd.edu.in/raghava/fermfoodb/) is a manually curated database of bioactive peptides derived from wide range of foods that maintain comprehensive information about peptides and process of fermentation. This database comprises of 2205 entries with following major fields, peptide sequence, Mass and IC50, food source, functional activity, fermentation conditions, starter culture, testing conditions of sequences in vitro or in vivo, type of model and method of analysis. The bioactive peptides in our database have wide range of therapeutic potentials that includes antihypertensive, ACE-inhibitory, antioxidant, antimicrobial, immunomodulatory and cholesterol lowering peptides. These bioactive peptides were derived from different types of fermented foods that include milk, cheese, yogurt, wheat and rice. Numerous, web-based tools have been integrated to retrieve data, peptide mapping of proteins, similarity search and multiple-sequence alignment. This database will be useful for the food industry and researchers to explore full therapeutic potential of fermented foods from specific cultures.",FermFooDb,0.998155117,NA,0,FermFooDb,0.998155117,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/8/2021 +32219413,http://www.zhounan.org/ferrdb,"FerrDb: a manually curated resource for regulators and markers of ferroptosis and ferroptosis-disease associations. . Ferroptosis is a mode of regulated cell death that depends on iron. Cells die from the toxic accumulation of lipid reactive oxygen species. Ferroptosis is tightly linked to a variety of human diseases, such as cancers and degenerative diseases. The ferroptotic process is complicated and consists of a wide range of metabolites and biomolecules. Although great progress has been achieved, the mechanism of ferroptosis remains enigmatic. We have currently entered an era of extensive knowledge advancement, and thus, it is important to find ways to organize and utilize data efficiently. We have observed a high-quality knowledge base of ferroptosis research is lacking. In this study, we downloaded 784 ferroptosis articles from the PubMed database. Ferroptosis regulators and markers and associated diseases were extracted from these articles and annotated. In summary, 253 regulators (including 108 drivers, 69 suppressors, 35 inducers and 41 inhibitors), 111 markers and 95 ferroptosis-disease associations were found. We then developed FerrDb, the first manually curated database for regulators and markers of ferroptosis and ferroptosis-disease associations. The database has a user-friendly interface, and it will be updated every 6√ɬÉ√ǬÇ√ɬÇ√Ǭ†months to offer long-term service. FerrDb is expected to help researchers acquire insights into ferroptosis. Database URL: http://www.zhounan.org/ferrdb.",FerrDb,0.984397888,NA,0,FerrDb,0.984397888,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +27242503,http://bams1.org,"Building the Ferretome. Databases of structural connections of the mammalian brain, such as CoCoMac (cocomac.g-node.org) or BAMS (https://bams1.org), are valuable resources for the analysis of brain connectivity and the modeling of brain dynamics in species such as the non-human primate or the rodent, and have also contributed to the computational modeling of the human brain. Another animal model that is widely used in electrophysiological or developmental studies is the ferret; however, no systematic compilation of brain connectivity is currently available for this species. Thus, we have started developing a database of anatomical connections and architectonic features of the ferret brain, the Ferret(connect)ome, www.Ferretome.org. The Ferretome database has adapted essential features of the CoCoMac methodology and legacy, such as the CoCoMac data model. This data model was simplified and extended in order to accommodate new data modalities that were not represented previously, such as the cytoarchitecture of brain areas. The Ferretome uses a semantic parcellation of brain regions as well as a logical brain map transformation algorithm (objective relational transformation, ORT). The ORT algorithm was also adopted for the transformation of architecture data. The database is being developed in MySQL and has been populated with literature reports on tract-tracing observations in the ferret brain using a custom-designed web interface that allows efficient and validated simultaneous input and proofreading by multiple curators. The database is equipped with a non-specialist web interface. This interface can be extended to produce connectivity matrices in several formats, including a graphical representation superimposed on established ferret brain maps. An important feature of the Ferretome database is the possibility to trace back entries in connectivity matrices to the original studies archived in the system. Currently, the Ferretome contains 50 reports on connections comprising 20 injection reports with more than 150 labeled source and target areas, the majority reflecting connectivity of subcortical nuclei and 15 descriptions of regional brain architecture. We hope that the Ferretome database will become a useful resource for neuroinformatics and neural modeling, and will support studies of the ferret brain as well as facilitate advances in comparative studies of mesoscopic brain connectivity.",Ferretome,0.98255372,NA,0,Ferretome,0.98255372,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/10/2016 +34954426,http://mcg.ustc.edu.cn/bsc/spermgenes2.0/index.html,"FertilityOnline, A Straight Pipeline for Functional Gene Annotation and Disease Mutation Discovery. . Exploring the genetic basis of human infertility is currently under intensive investigation. However, only a handful of genes have been validated in animal models as disease-causing genes in infertile men. Thus, to better understand the genetic basis of human spermatogenesis and bridge the knowledge gap between humans and other animal species, we constructed the FertilityOnline, a database integrating the literature-curated functional genes during spermatogenesis into an existing spermatogenic database, SpermatogenesisOnline 1.0. Additional features, including the functional annotation and genetic variants of human genes, are also incorporated into FertilityOnline. By searching this database, users can browse the functional genes in spermatogenesis and instantly narrow down the number of candidates of genetic mutations underly male infertility in a user-friendly web interface. Clinical application of this database was exampled by the identification of novel causative mutations in synaptonemal complex central element protein 1 (SYCE1) and stromal antigen 3 (STAG3) in azoospermic men. In conclusion, FertilityOnline is not only an integrated resource for spermatogenic genes but also a useful tool facilitating the exploration of the genetic basis of male infertility. FertilityOnline can be freely accessed at http://mcg.ustc.edu.cn/bsc/spermgenes2.0/index.html.",FertilityOnline,0.993602693,NA,0,FertilityOnline,0.993602693,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/23/2021 +33897975,http://fgdb.unmc.edu,"FGDB: Database of follicle stimulating hormone glycans. Glycomics, the study of the entire complement of sugars of an organism has received significant attention in the recent past due to the advances made in high throughput mass spectrometry technologies. These analytical advancements have facilitated the characterization of glycans associated with the follicle-stimulating hormones (FSH), which play a central role in the√ɬÉ√ǬÇ√ɬÇ√Ǭ†human reproductive system both in males and females utilizing regulating gonadal (testicular and ovarian) functions. The irregularities in FSH activity are also directly linked with osteoporosis. The glycoanalytical studies have been tremendously helpful in understanding the biological roles of FSH. Subsequently, the increasing number of characterized FSH glycan structures and related glycoform data has thrown a challenge to the glycoinformatics community in terms of data organization, storage and access. Also, a user-friendly platform is needed for providing easy access to the database and performing integrated analysis using a high volume of experimental data to accelerate FSH-focused research. FSH Glycans DataBase (FGDB) serves as a comprehensive and unique repository of structures, features, and related information of glycans associated with FSH. Apart from providing multiple search options, the database also facilitates an integrated user-friendly interface to perform the glycan abundance and comparative analyses using experimental data. The automated integrated pipelines present the possible structures of glycans and variants of FSH based on the input data, and allow the user to perform various analyses. The potential application of FGDB will significantly help both glycoinformaticians as well as wet-lab researchers to stimulate the research in this area. FGDB web access: https://fgdb.unmc.edu/.",FGDB,0.979747037,FSH Glycans DataBase,0.933014452,FGDB,0.979747037,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/22/2021 +32076423,http://www.biotoclin.org/FHLdb,"FHLdb: A Comprehensive Database on the Molecular Basis of Familial Hemophagocytic Lymphohistiocytosis. Background: Primary immunodeficiencies (PIDs) are a heterogeneous group of disorders. The lack of comprehensive disease-specific mutation databases may hinder or delay classification of the genetic variants found in samples from these patients. This is especially true for familial hemophagocytic lymphohistiocytosis (FHL), a life-threatening PID classically considered an autosomal recessive condition, but with increasingly demonstrated genetic heterogeneity. Objective: The aim of this study was to build an open-access repository to collect detailed information on the known genetic variants reported in FHL. Methods: We manually reviewed more than 120 articles to identify all reported variants related to FHL. We retrieved relevant information about the allelic status, the number of patients with the same variant, and whether functional assays were done. We stored all the data retrieved in a PostgreSQL database and then built a website on top of it, using the Django framework. Results: The database designed (FHLdb) (https://www.biotoclin.org/FHLdb) contains comprehensive information on reported variants in the 4 genes related to FHL (PRF1, UNC13D, STXBP2, STX11). It comprises 240 missense, 69 frameshift, 51 nonsense, 51 splicing, 10 in-frame indel, 7 deep intronic, and 5 large rearrangement variants together with their allelic status, carrier(s) information, and functional evidence. All genetic variants have been classified as pathogenic, likely pathogenic, uncertain significance, likely benign or benign, according to the American College of Medical Genetics guidelines. Additionally, it integrates information from other relevant databases: clinical evidence from ClinVar and UniProt, population allele frequency from ExAC and gnomAD, and pathogenicity predictions from well-recognized tools (e.g., PolyPhen-2, SIFT). Finally, a diagram depicts the location of the variant relative to the gene exon and protein domain structures. Conclusion: FHLdb includes a broad range of data on the reported genetic variants in familial HLH genes. It is a free-access and easy-to-use resource that will facilitate the interpretation of molecular results of FHL patients, and it illustrates the potential value of disease-specific databases for other PIDs.",FHLdb,0.99812746,Familial Hemophagocytic,0.593275462,FHLdb,0.99812746,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/31/2020 +34741074,http://www.fibromine.com/Fibromine,"Fibromine is a multi-omics database and mining tool for target discovery in pulmonary fibrosis. Idiopathic pulmonary fibrosis is a lethal lung fibroproliferative disease with limited therapeutic options. Differential expression profiling of affected sites has been instrumental for involved pathogenetic mechanisms dissection and therapeutic targets discovery. However, there have been limited efforts to comparatively analyse/mine the numerous related publicly available datasets, to fully exploit their potential on the validation/creation of novel research hypotheses. In this context and towards that goal, we present Fibromine, an integrated database and exploration environment comprising of consistently re-analysed, manually curated transcriptomic and proteomic pulmonary fibrosis datasets covering a wide range of experimental designs in both patients and animal models. Fibromine can be accessed via an R Shiny application ( http://www.fibromine.com/Fibromine ) which offers dynamic data exploration and real-time integration functionalities. Moreover, we introduce a novel benchmarking system based on transcriptomic datasets underlying characteristics, resulting to dataset accreditation aiming to aid the user on dataset selection. Cell specificity of gene expression can be visualised and/or explored in several scRNA-seq datasets, in an effort to link legacy data with this cutting-edge methodology and paving the way to their integration. Several use case examples are presented, that, importantly, can be reproduced on-the-fly by a non-specialist user, the primary target and potential user of this endeavour.",Fibromine,0.996297419,NA,0,Fibromine,0.996297419,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/5/2021 +33497436,http://www.nwsuaflmz.com/FifBase,"FifBase: a comprehensive fertility-associated indicators factor database for domestic animals. . Fertility refers to the ability of animals to maintain reproductive function and give birth to offspring, which is an important indicator to measure the productivity of animals. Fertility is affected by many factors, among which environmental factors may also play key roles. During the past years, substantial research studies have been conducted to detect the factors related to fecundity, including genetic factors and environmental factors. However, the identified genes associated with fertility from countless previous studies are randomly dispersed in the literature, whereas some other novel fertility-related genes are needed to detect from omics-based datasets. Here, we constructed a fertility index factor database FifBase based on manually curated published literature and RNA-Seq datasets. During the construction of the literature group, we obtained 3301 articles related to fecundity for 13 species from PubMed, involving 2823 genes, which are related to 75 fecundity indicators or 47 environmental factors. Eventually, 1558 genes associated with fertility were filtered in 10 species, of which 1088 and 470 were from RNA-Seq datasets and text mining data, respectively, involving 2910 fertility-gene pairs and 58 fertility-environmental factors. All these data were cataloged into FifBase (http://www.nwsuaflmz.com/FifBase/), where the fertility-related factor information, including gene annotation and environmental factors, can be browsed, retrieved and downloaded with the user-friendly interface.",FifBase,0.995629787,NA,0,FifBase,0.995629787,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2021 +26806463,http://filobase.bicpu.edu.in,"Essential proteins and possible therapeutic targets of Wolbachia endosymbiont and development of FiloBase--a comprehensive drug target database for Lymphatic filariasis. Lymphatic filariasis (Lf) is one of the oldest and most debilitating tropical diseases. Millions of people are suffering from this prevalent disease. It is estimated to infect over 120 million people in at least 80 nations of the world through the tropical and subtropical regions. More than one billion people are in danger of getting affected with this life-threatening disease. Several studies were suggested its emerging limitations and resistance towards the available drugs and therapeutic targets for Lf. Therefore, better medicine and drug targets are in demand. We took an initiative to identify the essential proteins of Wolbachia endosymbiont of Brugia malayi, which are indispensable for their survival and non-homologous to human host proteins. In this current study, we have used proteome subtractive approach to screen the possible therapeutic targets for wBm. In addition, numerous literatures were mined in the hunt for potential drug targets, drugs, epitopes, crystal structures, and expressed sequence tag (EST) sequences for filarial causing nematodes. Data obtained from our study were presented in a user friendly database named FiloBase. We hope that information stored in this database may be used for further research and drug development process against filariasis. URL: http://filobase.bicpu.edu.in.",FiloBase,0.992046833,NA,0,FiloBase,0.992046833,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/25/2016 +29328995,http://bioinfo.net.in/filterbase,"FilTer BaSe: A web accessible chemical database for small compound libraries. Finding novel chemical agents for targeting disease associated drug targets often requires screening of large number of new chemical libraries. In silico methods are generally implemented at initial stages for virtual screening. Filtering of such compound libraries on physicochemical and substructure ground is done to ensure elimination of compounds with undesired chemical properties. Filtering procedure, is redundant, time consuming and requires efficient bioinformatics/computer manpower along with high end software involving huge capital investment that forms a major obstacle in drug discovery projects in academic setup. We present an open source resource, FilTer BaSe- a chemoinformatics platform (http://bioinfo.net.in/filterbase/) that host fully filtered, ready to use compound libraries with workable size. The resource also hosts a database that enables efficient searching the chemical space of around 348,000 compounds on the basis of physicochemical and substructure properties. Ready to use compound libraries and database presented here is expected to aid a helping hand for new drug developers and medicinal chemists.",FilTer BaSe,0.918705657,NA,0,FilTer BaSe,0.918705657,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/6/2018 +34368755,http://cliffordworkman.com/resources,"The Face Image Meta-Database (fIMDb) & ChatLab Facial Anomaly Database (CFAD): Tools for research on face perception and social stigma. . Investigators increasingly need high quality face photographs that they can use in service of their scholarly pursuits-whether serving as experimental stimuli or to benchmark face recognition algorithms. Up to now, an index of known face databases, their features, and how to access them has not been available. This absence has had at least two negative repercussions: First, without alternatives, some researchers may have used face databases that are widely known but not optimal for their research. Second, a reliance on databases comprised only of young white faces will lead to science that isn't representative of all the people whose tax contributions, in many cases, make that research possible. The ""Face Image Meta-Database"" (fIMDb) provides researchers with the tools to find the face images best suited to their research, with filters to locate databases with people of a varied racial and ethnic backgrounds and ages. Problems of representation in face databases are not restricted to race and ethnicity or age - there is a dearth of databases with faces that have visible differences (e.g., scars, port wine stains, and cleft lip and palate). A well-characterized database is needed to support programmatic research into perceivers' attitudes, behaviors, and neural responses to anomalous faces. The ""ChatLab Facial Anomaly Database"" (CFAD) was constructed to fill this gap, with photographs of faces with visible differences of various types, etiologies, sizes, locations, and that depict individuals from various ethnic backgrounds and age groups. Both the fIMDb and CFAD are available from: https://cliffordworkman.com/resources/.",fIMDb,0.865089297,ace Image Meta-Database,0.734685099,fIMDb,0.865089297,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/24/2021 +34562055,http://laji.fi/en/theme/protax,"A molecular-based identification resource for the arthropods of Finland. To associate specimens identified by molecular characters to other biological knowledge, we need reference sequences annotated by Linnaean taxonomy. In this study, we (1) report the creation of a comprehensive reference library of DNA barcodes for the arthropods of an entire country (Finland), (2) publish this library, and (3) deliver a new identification tool for insects and spiders, as based on this resource. The reference library contains mtDNA COI barcodes for 11,275 (43%) of 26,437 arthropod species known from Finland, including 10,811 (45%) of 23,956 insect species. To quantify the improvement in identification accuracy enabled by the current reference library, we ran 1000 Finnish insect and spider species through the Barcode of Life Data system (BOLD) identification engine. Of these, 91% were correctly assigned to a unique species when compared to the new reference library alone, 85% were correctly identified when compared to BOLD with the new material included, and 75% with the new material excluded. To capitalize on this resource, we used the new reference material to train a probabilistic taxonomic assignment tool, FinPROTAX, scoring high success. For the full-length barcode region, the accuracy of taxonomic assignments at the level of classes, orders, families, subfamilies, tribes, genera, and species reached 99.9%, 99.9%, 99.8%, 99.7%, 99.4%, 96.8%, and 88.5%, respectively. The FinBOL arthropod reference library and FinPROTAX are available through the Finnish Biodiversity Information Facility (www.laji.fi) at https://laji.fi/en/theme/protax. Overall, the FinBOL investment represents a massive capacity-transfer from the taxonomic community of Finland to all sectors of society.",FinBOL,0.991614699,NA,0,FinBOL,0.991614699,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/3/2021 +32248568,http://www.findbase.org,"Documentation of clinically relevant genomic biomarker allele frequencies in the next-generation FINDbase worldwide database. FINDbase (http://www.findbase.org) is a comprehensive data resource recording the prevalence of clinically relevant genomic variants in various populations worldwide, such as pathogenic variants underlying genetic disorders as well as pharmacogenomic biomarkers that can guide drug treatment. Here, we report significant new developments and technological advancements in the database architecture, leading to a completely revamped database structure, querying interface, accompanied with substantial extensions of data content and curation. In particular, the FINDbase upgrade further improves the user experience by introducing responsive features that support a wide variety of mobile and stationary devices, while enhancing computational runtime due to the use of a modern Javascript framework such as ReactJS. Data collection is significantly enriched, with the data records being divided in a Public and Private version, the latter being accessed on the basis of data contribution, according to the microattribution approach, while the front end was redesigned to support the new functionalities and querying tools. The abovementioned updates further enhance the impact of FINDbase, improve the overall user experience, facilitate further data sharing by microattribution, and strengthen the role of FINDbase as a key resource for personalized medicine applications and personalized public health.",FINDbase,0.99834466,NA,0,FINDbase,0.99834466,1,27924022,27924022,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,4/14/2020 +27924022,"http://www.findbase.org, http://www.getpivot.com","Expanded national database collection and data coverage in the FINDbase worldwide database for clinically relevant genomic variation allele frequencies. FINDbase (http://www.findbase.org) is a comprehensive data repository that records the prevalence of clinically relevant genomic variants in various populations worldwide, such as pathogenic variants leading mostly to monogenic disorders and pharmacogenomics biomarkers. The database also records the incidence of rare genetic diseases in various populations, all in well-distinct data modules. Here, we report extensive data content updates in all data modules, with direct implications to clinical pharmacogenomics. Also, we report significant new developments in FINDbase, namely (i) the release of a new version of the ETHNOS software that catalyzes development curation of national/ethnic genetic databases, (ii) the migration of all FINDbase data content into 90 distinct national/ethnic mutation databases, all built around Microsoft's PivotViewer (http://www.getpivot.com) software (iii) new data visualization tools and (iv) the interrelation of FINDbase with DruGeVar database with direct implications in clinical pharmacogenomics. The abovementioned updates further enhance the impact of FINDbase, as a key resource for Genomic Medicine applications.",FINDbase,0.996567488,NA,0,FINDbase,0.996567488,1,32248568,32248568,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,10/18/2016 +24243844,http://firedb.bioinfo.cnio.es,"FireDB: a compendium of biological and pharmacologically relevant ligands. FireDB (http://firedb.bioinfo.cnio.es) is a curated inventory of catalytic and biologically relevant small ligand-binding residues culled from the protein structures in the Protein Data Bank. Here we present the important new additions since the publication of FireDB in 2007. The database now contains an extensive list of manually curated biologically relevant compounds. Biologically relevant compounds are informative because of their role in protein function, but they are only a small fraction of the entire ligand set. For the remaining ligands, the FireDB provides cross-references to the annotations from publicly available biological, chemical and pharmacological compound databases. FireDB now has external references for 95% of contacting small ligands, making FireDB a more complete database and providing the scientific community with easy access to the pharmacological annotations of PDB ligands. In addition to the manual curation of ligands, FireDB also provides insights into the biological relevance of individual binding sites. Here, biological relevance is calculated from the multiple sequence alignments of related binding sites that are generated from all-against-all comparison of each FireDB binding site. The database can be accessed by RESTful web services and is available for download via MySQL.",FireDB,0.992642879,NA,0,FireDB,0.992642879,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2013 +33166383,http://loschmidt.chemi.muni.cz/fireprotdb,"FireProtDB: database of manually curated protein stability data. The majority of naturally occurring proteins have evolved to function under mild conditions inside the living organisms. One of the critical obstacles for the use of proteins in biotechnological applications is their insufficient stability at elevated temperatures or in the presence of salts. Since experimental screening for stabilizing mutations is typically laborious and expensive, in silico predictors are often used for narrowing down the mutational landscape. The recent advances in machine learning and artificial intelligence further facilitate the development of such computational tools. However, the accuracy of these predictors strongly depends on the quality and amount of data used for training and testing, which have often been reported as the current bottleneck of the approach. To address this problem, we present a novel database of experimental thermostability data for single-point mutants FireProtDB. The database combines the published datasets, data extracted manually from the recent literature, and the data collected in our laboratory. Its user interface is designed to facilitate both types of the expected use: (i) the interactive explorations of individual entries on the level of a protein or mutation and (ii) the construction of highly customized and machine learning-friendly datasets using advanced searching and filtering. The database is freely available at https://loschmidt.chemi.muni.cz/fireprotdb.",FireProtDB,0.994380713,NA,0,FireProtDB,0.994380713,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +26980518,http://mail.nbfgr.res.in/Fish_Karyome,"Fish Karyome version 2.1: a chromosome database of fishes and other aquatic organisms. . A voluminous information is available on karyological studies of fishes; however, limited efforts were made for compilation and curation of the available karyological data in a digital form. 'Fish Karyome' database was the preliminary attempt to compile and digitize the available karyological information on finfishes belonging to the Indian subcontinent. But the database had limitations since it covered data only on Indian finfishes with limited search options. Perceiving the feedbacks from the users and its utility in fish cytogenetic studies, the Fish Karyome database was upgraded by applying Linux, Apache, MySQL and PHP (pre hypertext processor) (LAMP) technologies. In the present version, the scope of the system was increased by compiling and curating the available chromosomal information over the globe on fishes and other aquatic organisms, such as echinoderms, molluscs and arthropods, especially of aquaculture importance. Thus, Fish Karyome version 2.1 presently covers 866 chromosomal records for 726 species supported with 253 published articles and the information is being updated regularly. The database provides information on chromosome number and morphology, sex chromosomes, chromosome banding, molecular cytogenetic markers, etc. supported by fish and karyotype images through interactive tools. It also enables the users to browse and view chromosomal information based on habitat, family, conservation status and chromosome number. The system also displays chromosome number in model organisms, protocol for chromosome preparation and allied techniques and glossary of cytogenetic terms. A data submission facility has also been provided through data submission panel. The database can serve as a unique and useful resource for cytogenetic characterization, sex determination, chromosomal mapping, cytotaxonomy, karyo-evolution and systematics of fishes. Database URL: http://mail.nbfgr.res.in/Fish_Karyome.",Fish Karyome,0.700258017,NA,0,Fish Karyome,0.700258017,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/15/2016 +33203359,http://fishdb.ihb.ac.cn,"FishDB: an integrated functional genomics database for fishes. Background Hundreds of genomes and transcriptomes of fish species have been sequenced in recent years. However, fish scholarship currently lacks a comprehensive, integrated, and up-to-date collection of fish genomic data. Results Here we present FishDB, the first database for fish multi-level omics data, available online at http://fishdb.ihb.ac.cn . The database contains 233 fish genomes, 201 fish transcriptomes, 5841 fish mitochondrial genomes, 88 fish gene sets, 16,239 miRNAs of 65 fishes, 1,330,692 piRNAs and 4852 lncRNAs of Danio rerio, 59,040√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâMb untranslated regions (UTR) of 230 fishes, and 31,918√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâMb coding sequences (CDS) of 230 fishes. Among these, we newly generated a total of 11 fish genomes and 53 fish transcriptomes. Conclusions This release contains over 410,721.67√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâMb sequences and provides search functionality, a BLAST server, JBrowse, and PrimerServer modules.",FishDB,0.997222185,NA,0,FishDB,0.997222185,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2020 +30611878,http://mail.nbfgr.res.in/FisOmics,"FisOmics: A portal of fish genomic resources. An online portal, accessible at URL: http://mail.nbfgr.res.in/FisOmics/, was developed that features different genomic databases and tools. The portal, named as FisOmics, acts as a platform for sharing fish genomic sequences and related information in addition to facilitating the access of high-performance computational resources for genome and proteome data analyses. It provides the ability for quarrying, analysing and visualizing genomic sequences and related information. The featured databases in FisOmics are in the World Wide Web domain already. The aim to develop portal was to provide a nodal point to access the featured databases and work conveniently. Presently, FisOmics includes databases on barcode sequences, microsatellite markers, mitogenome sequences, hypoxia-responsive genes and karyology of fishes. Besides, it has a link to other molecular resources and reports on the on-going activities and research achievements.",FisOmics,0.993046761,NA,0,FisOmics,0.993046761,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/3/2019 +24705206,http://www.fixpred.com,"FixPred: a resource for correction of erroneous protein sequences. Protein databases are heavily contaminated with erroneous (mispredicted, abnormal and incomplete) sequences and these erroneous data significantly distort the conclusions drawn from genome-scale protein sequence analyses. In our earlier work we described the MisPred resource that serves to identify erroneous sequences; here we present the FixPred computational pipeline that automatically corrects sequences identified by MisPred as erroneous. The current version of the associated FixPred database contains corrected UniProtKB/Swiss-Prot and NCBI/RefSeq sequences from Homo sapiens, Mus musculus, Rattus norvegicus, Monodelphis domestica, Gallus gallus, Xenopus tropicalis, Danio rerio, Fugu rubripes, Ciona intestinalis, Branchostoma floridae, Drosophila melanogaster and Caenorhabditis elegans; future releases of the FixPred database will include corrected sequences of additional Metazoan species. The FixPred computational pipeline and database (http://www.fixpred.com) are easily accessible through a simple web interface coupled to a powerful query engine and a standard web service. The content is completely or partially downloadable in a variety of formats. Database URL: http://www.fixpred.com.",FixPred,0.988829434,NA,0,FixPred,0.988829434,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/4/2014 +26456067,http://forensic.ugent.be/FLAD,"Forensic Loci Allele Database (FLAD): Automatically generated, permanent identifiers for sequenced forensic alleles. It is difficult to predict if and when massively parallel sequencing of forensic STR loci will replace capillary electrophoresis as the new standard technology in forensic genetics. The main benefits of sequencing are increased multiplexing scales and SNP detection. There is not yet a consensus on how sequenced profiles should be reported. We present the Forensic Loci Allele Database (FLAD) service, made freely available on http://forensic.ugent.be/FLAD/. It offers permanent identifiers for sequenced forensic alleles (STR or SNP) and their microvariants for use in forensic allele nomenclature. Analogous to Genbank, its aim is to provide permanent identifiers for forensically relevant allele sequences. Researchers that are developing forensic sequencing kits or are performing population studies, can register on http://forensic.ugent.be/FLAD/ and add loci and allele sequences with a short and simple application interface (API).",FLAD,0.979612629,Forensic Loci Allele Database,0.973130558,FLAD,0.979612629,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/21/2015 +31696036,http://bioinfo.net.in/flavodb/home.html,"FlavoDb: a web-based chemical repository of flavonoid compounds. There are many online resources that focus on chemical diversity of natural compounds, but only handful of resources exist that focus solely on flavonoid compounds and integrate structural and functional properties; however, extensive collated flavonoid literature is still unavailable to scientific community. Here we present an open access database 'FlavoDb' that is focused on providing physicochemical properties as well as topological descriptors that can be effectively implemented in deducing large scale quantitative structure property models of flavonoid compounds. In the current version of database, we present data on 1, 19,400 flavonoid compounds, thereby covering most of the known structural space of flavonoid class of compounds. Moreover, effective structure searching tool presented here is expected to provide an interactive and easy-to-use tool for obtaining flavonoid-based literature and allied information. Data from FlavoDb can be freely accessed via its intuitive graphical user interface made available at following web address: http://bioinfo.net.in/flavodb/home.html.",FlavoDb,0.994724214,NA,0,FlavoDb,0.994724214,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2019 +29059383,http://cosylab.iiitd.edu.in/flavordb,"FlavorDB: a database of flavor molecules. Flavor is an expression of olfactory and gustatory sensations experienced through a multitude of chemical processes triggered by molecules. Beyond their key role in defining taste and smell, flavor molecules also regulate metabolic processes with consequences to health. Such molecules present in natural sources have been an integral part of human history with limited success in attempts to create synthetic alternatives. Given their utility in various spheres of life such as food and fragrances, it is valuable to have a repository of flavor molecules, their natural sources, physicochemical properties, and sensory responses. FlavorDB (http://cosylab.iiitd.edu.in/flavordb) comprises of 25,595 flavor molecules representing an array of tastes and odors. Among these 2254 molecules are associated with 936 natural ingredients belonging to 34 categories. The dynamic, user-friendly interface of the resource facilitates exploration of flavor molecules for divergent applications: finding molecules matching a desired flavor or structure; exploring molecules of an ingredient; discovering novel food pairings; finding the molecular essence of food ingredients; associating chemical features with a flavor and more. Data-driven studies based on FlavorDB can pave the way for an improved understanding of flavor mechanisms.",FlavorDB,0.997247577,NA,0,FlavorDB,0.997247577,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +26476447,http://www.flor-id.org,"FLOR-ID: an interactive database of flowering-time gene networks in Arabidopsis thaliana. Flowering is a hot topic in Plant Biology and important progress has been made in Arabidopsis thaliana toward unraveling the genetic networks involved. The increasing complexity and the explosion of literature however require development of new tools for information management and update. We therefore created an evolutive and interactive database of flowering time genes, named FLOR-ID (Flowering-Interactive Database), which is freely accessible at http://www.flor-id.org. The hand-curated database contains information on 306 genes and links to 1595 publications gathering the work of >4500 authors. Gene/protein functions and interactions within the flowering pathways were inferred from the analysis of related publications, included in the database and translated into interactive manually drawn snapshots.",FLOR-ID,0.993054897,Flowering-Interactive Database,0.88401364,FLOR-ID,0.993054897,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/17/2015 +27698587,http://flora-on.pt,"Flora-On: Occurrence data of the vascular flora of mainland Portugal. The Flora-On dataset currently includes 253,310 occurrence records for the class Embryopsidae (vascular plants), comprising data collated via the platform http://flora-on.pt/ relating to observation records of vascular plants across mainland Portugal. Observations are uploaded directly to the database primarily by experienced botanists and naturalists, typically on a weekly basis, and consist of geo-referenced data points for species (or infraspecific taxa) along with their date of observation and phenological state. The Flora-On project aims to compile and make publicly accessible chorological, ecological, morphological and photographic information for the entire vascular flora of Portugal. The project's website offers powerful query and visualization capabilities, of which we highlight the probabilistic bioclimatic and phenological queries which operate based on the empirical density distributions of species in those variables. Flora-On was created and continues to be maintained by volunteers who are Associate members of Sociedade Portuguesa de Bot√ɬÉ√ǬÉ√ɬÇ√Ǭ¢nica (Botanical Society of Portugal). Given its focus on research-grade and current data, the Flora-On project represents a significant contribution to the knowledge of the present distribution and status of the Portuguese flora.",Flora-On,0.985483035,NA,0,Flora-On,0.985483035,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/9/2016 +22649282,"http://data.gbif.org/datasets/resource/10969/, http://projects.biodiversity.be/ifblAll","Florabank1: a grid-based database on vascular plant distribution in the northern part of Belgium (Flanders and the Brussels Capital region). Florabank1 is a database that contains distributional data on the wild flora (indigenous species, archeophytes and naturalised aliens) of Flanders and the Brussels Capital Region. It holds about 3 million records of vascular plants, dating from 1800 till present. Furthermore, it includes ecological data on vascular plant species, redlist category information, Ellenberg values, legal status, global distribution, seed bank etc. The database is an initiative of ""Flo.Wer"" (www.plantenwerkgroep.be), the Research Institute for Nature and Forest (INBO: www.inbo.be) and the National Botanic Garden of Belgium (www.br.fgov.be). Florabank aims at centralizing botanical distribution data gathered by both professional and amateur botanists and to make these data available to the benefit of nature conservation, policy and scientific research.The occurrence data contained in Florabank1 are extracted from checklists, literature and herbarium specimen information. Of survey lists, the locality name (verbatimLocality), species name, observation date and IFBL square code, the grid system used for plant mapping in Belgium (Van Rompaey 1943), is recorded. For records dating from the period 1972-2004 all pertinent botanical journals dealing with Belgian flora were systematically screened. Analysis of herbarium specimens in the collection of the National Botanic Garden of Belgium, the University of Ghent and the University of Li√ɬÉ√ǬÉ√ɬÇ√Ǭ®ge provided interesting distribution knowledge concerning rare species, this information is also included in Florabank1. The data recorded before 1972 is available through the Belgian GBIF node (http://data.gbif.org/datasets/resource/10969/), not through FLORABANK1, to avoid duplication of information. A dedicated portal providing access to all published Belgian IFBL records at this moment is available at: http://projects.biodiversity.be/ifblAll data in Florabank1 is georeferenced. Every record holds the decimal centroid coordinates of the IFBL square containing the observation. The uncertainty radius is the smallest circle possible covering the whole IFBL square, which can measure 1 Km√ɬÉ√ǬÇ√ɬÇ√Ǭ≤ or 4 Km√ɬÉ√ǬÇ√ɬÇ√Ǭ≤. Florabank is a work in progress and new occurrences are added as they become available; the dataset will be updated through GBIF on a regularly base.",Florabank1,0.991928935,NA,0,Florabank1,0.991928935,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/16/2012 +31774482,http://www.jianglab.tech/FluReassort,"FluReassort: a database for the study of genomic reassortments among influenza viruses. Genomic reassortment is an important genetic event in the generation of emerging influenza viruses, which can cause numerous serious flu endemics and epidemics within hosts or even across different hosts. However, there is no dedicated and comprehensive repository for reassortment events among influenza viruses. Here, we present FluReassort, a database for understanding the genomic reassortment events in influenza viruses. Through manual curation of thousands of literature references, the database compiles 204 reassortment events among 56 subtypes of influenza A viruses isolated in 37 different countries. FluReassort provides an interface for the visualization and evolutionary analysis of reassortment events, allowing users to view the events through the phylogenetic analysis with varying parameters. The reassortment networks in FluReassort graphically summarize the correlation and causality between different subtypes of the influenza virus and facilitate the description and interpretation of the reassortment preference among subtypes. We believe FluReassort is a convenient and powerful platform for understanding the evolution of emerging influenza viruses. FluReassort is freely available at https://www.jianglab.tech/FluReassort.",FluReassort,0.980082214,NA,0,FluReassort,0.980082214,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +23203866,"http://flyatlas.org, http://flyatlas.gla.ac.uk","FlyAtlas: database of gene expression in the tissues of Drosophila melanogaster. The FlyAtlas resource contains data on the expression of the genes of Drosophila melanogaster in different tissues (currently 25-17 adult and 8 larval) obtained by hybridization of messenger RNA to Affymetrix Drosophila Genome 2 microarrays. The microarray probe sets cover 13,250 Drosophila genes, detecting 12,533 in an unambiguous manner. The data underlying the original web application (http://flyatlas.org) have been restructured into a relational database and a Java servlet written to provide a new web interface, FlyAtlas 2 (http://flyatlas.gla.ac.uk/), which allows several additional queries. Users can retrieve data for individual genes or for groups of genes belonging to the same or related ontological categories. Assistance in selecting valid search terms is provided by an Ajax 'autosuggest' facility that polls the database as the user types. Searches can also focus on particular tissues, and data can be retrieved for the most highly expressed genes, for genes of a particular category with above-average expression or for genes with the greatest difference in expression between the larval and adult stages. A novel facility allows the database to be queried with a specific gene to find other genes with a similar pattern of expression across the different tissues.",FlyAtlas,0.997430265,NA,0,FlyAtlas,0.997430265,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +22127867,"http://flybase.org, http://www.modencode.org","FlyBase 101--the basics of navigating FlyBase. FlyBase (http://flybase.org) is the leading database and web portal for genetic and genomic information on the fruit fly Drosophila melanogaster and related fly species. Whether you use the fruit fly as an experimental system or want to apply Drosophila biological knowledge to another field of study, FlyBase can help you successfully navigate the wealth of available Drosophila data. Here, we review the FlyBase web site with novice and less-experienced users of FlyBase in mind and point out recent developments stemming from the availability of genome-wide data from the modENCODE project. The first section of this paper explains the organization of the web site and describes the report pages available on FlyBase, focusing on the most popular, the Gene Report. The next section introduces some of the search tools available on FlyBase, in particular, our heavily used and recently redesigned search tool QuickSearch, found on the FlyBase homepage. The final section concerns genomic data, including recent modENCODE (http://www.modencode.org) data, available through our Genome Browser, GBrowse.",FlyBase,0.9871732,NA,0,FlyBase,0.9871732,1,"24234449.0, 26935103.0","24234449.0, 26935103.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/29/2011 +"24234449, 26935103",http://flybase.org,"FlyBase 102--advanced approaches to interrogating FlyBase. FlyBase (http://flybase.org) is the leading website and database of Drosophila genes and genomes. Whether you are using the fruit fly Drosophila melanogaster as an experimental system or wish to understand Drosophila biological knowledge in relation to human disease or to other model systems, FlyBase can help you successfully find the information you are looking for. Here, we demonstrate some of our more advanced searching systems and highlight some of our new tools for searching the wealth of data on FlyBase. The first section explores gene function in FlyBase, using our TermLink tool to search with Controlled Vocabulary terms and our new RNA-Seq Search tool to search gene expression. The second section of this article describes a few ways to search genomic data in FlyBase, using our BLAST server and the new implementation of GBrowse 2, as well as our new FeatureMapper tool. Finally, we move on to discuss our most powerful search tool, QueryBuilder, before describing pre-computed cuts of the data and how to query the database programmatically.",FlyBase,0.981176376,NA,0,FlyBase,0.981176376,2,22127867,22127867,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,3/1/2016 +21994220,http://www.flyexpress.net,"FlyExpress: visual mining of spatiotemporal patterns for genes and publications in Drosophila embryogenesis. Summary Images containing spatial expression patterns illuminate the roles of different genes during embryogenesis. In order to generate initial clues to regulatory interactions, biologists frequently need to know the set of genes expressed at the same time at specific locations in a developing embryo, as well as related research publications. However, text-based mining of image annotations and research articles cannot produce all relevant results, because the primary data are images that exist as graphical objects. We have developed a unique knowledge base (FlyExpress) to facilitate visual mining of images from Drosophila melanogaster embryogenesis. By clicking on specific locations in pictures of fly embryos from different stages of development and different visual projections, users can produce a list of genes and publications instantly. In FlyExpress, each queryable embryo picture is a heat-map that captures the expression patterns of more than 4500 genes and more than 2600 published articles. In addition, one can view spatial patterns for particular genes over time as well as find other genes with similar expression patterns at a given developmental stage. Therefore, FlyExpress is a unique tool for mining spatiotemporal expression patterns in a format readily accessible to the scientific community. Availability http://www.flyexpress.net Contact s.kumar@asu.edu.",FlyExpress,0.970165312,NA,0,FlyExpress,0.970165312,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/12/2011 +22067456,http://www.flyrnai.org,"FlyRNAi.org--the database of the Drosophila RNAi screening center: 2012 update. FlyRNAi (http://www.flyrnai.org), the database and website of the Drosophila RNAi Screening Center (DRSC) at Harvard Medical School, serves a dual role, tracking both production of reagents for RNA interference (RNAi) screening in Drosophila cells and RNAi screen results. The database and website is used as a platform for community availability of protocols, tools, and other resources useful to researchers planning, conducting, analyzing or interpreting the results of Drosophila RNAi screens. Based on our own experience and user feedback, we have made several changes. Specifically, we have restructured the database to accommodate new types of reagents; added information about new RNAi libraries and other reagents; updated the user interface and website; and added new tools of use to the Drosophila community and others. Overall, the result is a more useful, flexible and comprehensive website and database.",FlyRNAi,0.996583045,Drosophila RNAi screening,0.770914784,FlyRNAi,0.996583045,1,NA,27924039,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/8/2011 +27924039,http://fgr.hms.harvard.edu,"FlyRNAi.org-the database of the Drosophila RNAi screening center and transgenic RNAi project: 2017 update. The FlyRNAi database of the Drosophila RNAi Screening Center (DRSC) and Transgenic RNAi Project (TRiP) at Harvard Medical School and associated DRSC/TRiP Functional Genomics Resources website (http://fgr.hms.harvard.edu) serve as a reagent production tracking system, screen data repository, and portal to the community. Through this portal, we make available protocols, online tools, and other resources useful to researchers at all stages of high-throughput functional genomics screening, from assay design and reagent identification to data analysis and interpretation. In this update, we describe recent changes and additions to our website, database and suite of online tools. Recent changes reflect a shift in our focus from a single technology (RNAi) and model species (Drosophila) to the application of additional technologies (e.g. CRISPR) and support of integrated, cross-species approaches to uncovering gene function using functional genomics and other approaches.",FlyRNAi,0.99652952,NA,0,FlyRNAi,0.99652952,1,NA,22067456,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/23/2016 +29890119,http://prodata.swmed.edu/FlyXCDB,"FlyXCDB-A Resource for Drosophila Cell Surface and Secreted Proteins and Their Extracellular Domains. Genomes of metazoan organisms possess a large number of genes encoding cell surface and secreted (CSS) proteins that carry out crucial functions in cell adhesion and communication, signal transduction, extracellular matrix establishment, nutrient digestion and uptake, immunity, and developmental processes. We developed the FlyXCDB database (http://prodata.swmed.edu/FlyXCDB) that provides a comprehensive resource to investigate extracellular (XC) domains in CSS proteins of Drosophila melanogaster, the most studied insect model organism in various aspects of animal biology. More than 300 Drosophila XC domains were discovered in Drosophila CSS proteins encoded by over 2500 genes through analyses of computational predictions of signal peptide, transmembrane (TM) segment, and GPI-anchor signal sequence, profile-based sequence similarity searches, gene ontology, and literature. These domains were classified into six classes mainly based on their molecular functions, including protein-protein interactions (class P), signaling molecules (class S), binding of non-protein molecules or groups (class B), enzyme homologs (class E), enzyme regulation and inhibition (class R), and unknown molecular function (class U). Main cellular functions such as cell adhesion, cell signaling, and extracellular matrix composition were described for the most abundant domains in each functional class. We assigned cell membrane topology categories (E, secreted; S, type I/III single-pass TM; T, type II single-pass TM; M, multi-pass TM; and G, GPI-anchored) to the products of genes with XC domains and investigated their regulation by mechanisms such as alternative splicing and stop codon readthrough.",FlyXCDB,0.993109286,NA,0,FlyXCDB,0.993109286,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/8/2018 +26317619,http://mail.nbfgr.res.in/fmir,"FMiR: A Curated Resource of Mitochondrial DNA Information for Fish. Mitochondrial genome sequences have been widely used for evolutionary and phylogenetic studies. Among vertebrates, fish are an important, diverse group, and their mitogenome sequences are growing rapidly in public repositories. To facilitate mitochondrial genome analysis and to explore the valuable genetic information, we developed the Fish Mitogenome Resource (FMiR) database to provide a workbench for mitogenome annotation, species identification and microsatellite marker mining. The microsatellites are also known as simple sequence repeats (SSRs) and used as molecular markers in studies on population genetics, gene duplication and marker assisted selection. Here, easy-to-use tools have been implemented for mining SSRs and for designing primers to identify species/habitat specific markers. In addition, FMiR can analyze complete or partial mitochondrial genome sequence to identify species and to deduce relational distances among sequences across species. The database presently contains curated mitochondrial genomes from 1302 fish species belonging to 297 families and 47 orders reported from saltwater and freshwater ecosystems. In addition, the database covers information on fish species such as conservation status, ecosystem, family, distribution and occurrence downloaded from the FishBase and IUCN Red List databases. Those fish information have been used to browse mitogenome information for the species belonging to a particular category. The database is scalable in terms of content and inclusion of other analytical modules. The FMiR is running under Linux operating platform on high performance server accessible at URL http://mail.nbfgr.res.in/fmir.",FMiR,0.989025816,Mitogenome Resource,0.922336429,FMiR,0.989025816,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/28/2015 +23951158,http://www.nipgr.res.in/foxtail.html,"FmMDb: a versatile database of foxtail millet markers for millets and bioenergy grasses research. The prominent attributes of foxtail millet (Setaria italica L.) including its small genome size, short life cycle, inbreeding nature, and phylogenetic proximity to various biofuel crops have made this crop an excellent model system to investigate various aspects of architectural, evolutionary and physiological significances in Panicoid bioenergy grasses. After release of its whole genome sequence, large-scale genomic resources in terms of molecular markers were generated for the improvement of both foxtail millet and its related species. Hence it is now essential to congregate, curate and make available these genomic resources for the benefit of researchers and breeders working towards crop improvement. In view of this, we have constructed the Foxtail millet Marker Database (FmMDb; http://www.nipgr.res.in/foxtail.html), a comprehensive online database for information retrieval, visualization and management of large-scale marker datasets with unrestricted public access. FmMDb is the first database which provides complete marker information to the plant science community attempting to produce elite cultivars of millet and bioenergy grass species, thus addressing global food insecurity.",FmMDb,0.998117232,Foxtail millet Marker Database,0.978147492,FmMDb,0.998117232,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/12/2013 +33511845,http://drugdesign.riken.jp/FMODB,"FMODB: The World's First Database of Quantum Mechanical Calculations for Biomacromolecules Based on the Fragment Molecular Orbital Method. We developed the world's first web-based public database for the storage, management, and sharing of fragment molecular orbital (FMO) calculation data sets describing the complex interactions between biomacromolecules, named FMO Database (https://drugdesign.riken.jp/FMODB/). Each entry in the database contains relevant background information on how the data was compiled as well as the total energy of each molecular system and interfragment interaction energy (IFIE) and pair interaction energy decomposition analysis (PIEDA) values. Currently, the database contains more than 13√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ600 FMO calculation data sets, and a comprehensive search function implemented at the front-end. The procedure for selecting target proteins, preprocessing the experimental structures, construction of the database, and details of the database front-end were described. Then, we demonstrated a use of the FMODB by comparing IFIE value distributions of hydrogen bond, ion-pair, and XH/√ɬÉ√Ǭè√ɬÇ√Ç¬Ä interactions obtained by FMO method to those by molecular mechanics approach. From the comparison, the statistical analysis of the data provided standard reference values for the three types of interactions that will be useful for determining whether each interaction in a given system is relatively strong or weak compared to the interactions contained within the data in the FMODB. In the final part, we demonstrate the use of the database to examine the contribution of halogen atoms to the binding affinity between human cathepsin L and its inhibitors. We found that the electrostatic term derived by PIEDA greatly correlated with the binding affinities of the halogen containing cathepsin L inhibitors, indicating the importance of QM calculation for quantitative analysis of halogen interactions. Thus, the FMO calculation data in FMODB will be useful for conducting statistical analyses to drug discovery, for conducting molecular recognition studies in structural biology, and for other studies involving quantum mechanics-based interactions.",FMODB,0.992171586,FMO Database,0.612398446,FMODB,0.992171586,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/29/2021 +25005261,http://59.163.192.91/FmTFDb/index.html,"FmTFDb: a foxtail millet transcription factors database for expediting functional genomics in millets. Foxtail millet has recently been regarded as a model crop for studying the systems biology of millets and bioenergy grass species. For expediting the functional genomic studies in this model crop as well as in the related millets and bioenergy grasses, we have developed a comprehensive transcription factor database. Our foxtail millet transcription factors database (FmTFDb: http://59.163.192.91/FmTFDb/index.html ) encompasses 2,297 putative TFs in 55 families along with its sequence features, chromosomal locations, tissue-specific gene expression data, gene ontology (GO) assignment, and phylogeny. FmTFDb is intended to provide the users an unrestricted public access in retrieving and visualizing the individual members of a TF family through a set of query interfaces and analysis tools, including the BLAST search, annotation query interfaces, and tools to identify enriched GO terms and to visualize physical maps. This FmTFDb will serve as a promising central resource for researchers as well as breeders who are dedicated towards crop improvement of millets and bioenergy grasses.",FmTFDb,0.99798429,foxtail millet transcription factors database,0.956081793,FmTFDb,0.99798429,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/9/2014 +25260589,http://portal.nersc.gov/project/m1317/FOAM,"FOAM (Functional Ontology Assignments for Metagenomes): a Hidden Markov Model (HMM) database with environmental focus. A new functional gene database, FOAM (Functional Ontology Assignments for Metagenomes), was developed to screen environmental metagenomic sequence datasets. FOAM provides a new functional ontology dedicated to classify gene functions relevant to environmental microorganisms based on Hidden Markov Models (HMMs). Sets of aligned protein sequences (i.e. 'profiles') were tailored to a large group of target KEGG Orthologs (KOs) from which HMMs were trained. The alignments were checked and curated to make them specific to the targeted KO. Within this process, sequence profiles were enriched with the most abundant sequences available to maximize the yield of accurate classifier models. An associated functional ontology was built to describe the functional groups and hierarchy. FOAM allows the user to select the target search space before HMM-based comparison steps and to easily organize the results into different functional categories and subcategories. FOAM is publicly available at http://portal.nersc.gov/project/m1317/FOAM/.",FOAM,0.983061552,Functional Ontology Assignments for Metagenomes,0.933293728,FOAM,0.983061552,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/26/2014 +31686102,http://foldamerdb.ttk.hu,"FoldamerDB: a database of peptidic foldamers. Foldamers are non-natural oligomers that mimic the structural behaviour of natural peptides, proteins and nucleotides by folding into a well-defined 3D conformation in solution. Since their first description about two decades ago, numerous studies have been undertaken dealing with the design, synthesis, characterization and application of foldamers. They have huge application potential as antimicrobial, anticancer and anti-HIV agents and in materials science. Despite their importance, there is no publicly available web resource providing comprehensive information on these compounds. Here we describe FoldamerDB, an open-source, fully annotated and manually curated database of peptidic foldamers. FoldamerDB holds the information about the sequence, structure and biological activities of the foldamer entries. It contains the information on over 1319 species and 1018 activities, collected from more than 160 research papers. The web-interface is designed to be clutter-free, user-friendly and it is compatible with devices of different screen sizes. The interface allows the user to search the database, browse and filter the foldamers using multiple criteria. It also offers a detailed help page to assist new users. FoldamerDB is hoped to bridge the gap in the freely available web-based resources on foldamers and will be of interest to diverse groups of scientists from chemists to biologists. The database can be accessed at http://foldamerdb.ttk.hu/.",FoldamerDB,0.997375488,NA,0,FoldamerDB,0.997375488,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24234003,http://floresta.eead.csic.es/footprintdb,"footprintDB: a database of transcription factors with annotated cis elements and binding interfaces. Motivation Traditional and high-throughput techniques for determining transcription factor (TF) binding specificities are generating large volumes of data of uneven quality, which are scattered across individual databases. Results FootprintDB integrates some of the most comprehensive freely available libraries of curated DNA binding sites and systematically annotates the binding interfaces of the corresponding TFs. The first release contains 2422 unique TF sequences, 10 112 DNA binding sites and 3662 DNA motifs. A survey of the included data sources, organisms and TF families was performed together with proprietary database TRANSFAC, finding that footprintDB has a similar coverage of multicellular organisms, while also containing bacterial regulatory data. A search engine has been designed that drives the prediction of DNA motifs for input TFs, or conversely of TF sequences that might recognize input regulatory sequences, by comparison with database entries. Such predictions can also be extended to a single proteome chosen by the user, and results are ranked in terms of interface similarity. Benchmark experiments with bacterial, plant and human data were performed to measure the predictive power of footprintDB searches, which were able to correctly recover 10, 55 and 90% of the tested sequences, respectively. Correctly predicted TFs had a higher interface similarity than the average, confirming its diagnostic value. Availability and implementation Web site implemented in PHP,Perl, MySQL and Apache. Freely available from http://floresta.eead.csic.es/footprintdb.",footprintDB,0.995756149,NA,0,footprintDB,0.995756149,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/14/2013 +22080509,http://webapps2.ucalgary.ca,"Database for bacterial group II introns. The Database for Bacterial Group II Introns (http://webapps2.ucalgary.ca/~groupii/index.html#) provides a catalogue of full-length, non-redundant group II introns present in bacterial DNA sequences in GenBank. The website is divided into three sections. The first section provides general information on group II intron properties, structures and classification. The second and main section lists information for individual introns, including insertion sites, DNA sequences, intron-encoded protein sequences and RNA secondary structure models. The final section provides tools for identification and analysis of intron sequences. These include a step-by-step guide to identify introns in genomic sequences, a local BLAST tool to identify closest intron relatives to a query sequence, and a boundary-finding tool that predicts 5' and 3' intron-exon junctions in an input DNA sequence. Finally, selected intron data can be downloaded in FASTA format. It is hoped that this database will be a useful resource not only to group II intron and RNA researchers, but also to microbiologists who encounter these unexpected introns in genomic sequences.",NA,0,for Bacterial Group II Introns,0.84846305,for Bacterial Group II Introns,0.84846305,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/10/2011 +30398470,http://www.immport.org/resources/mod,"Enabling precision medicine in neonatology, an integrated repository for preterm birth research. Preterm birth, or the delivery of an infant prior to 37 weeks of gestation, is a significant cause of infant morbidity and mortality. In the last decade, the advent and continued development of molecular profiling technologies has enabled researchers to generate vast amount of 'omics' data, which together with integrative computational approaches, can help refine the current knowledge about disease mechanisms, diagnostics, and therapeutics. Here we describe the March of Dimes' Database for Preterm Birth Research (http://www.immport.org/resources/mod), a unique resource that contains a variety of 'omics' datasets related to preterm birth. The database is open publicly, and as of January 2018, links 13 molecular studies with data across tens of thousands of patients from 6 measurement modalities. The data in the repository are highly diverse and include genomic, transcriptomic, immunological, and microbiome data. Relevant datasets are augmented with additional molecular characterizations of almost 25,000 biological samples from public databases. We believe our data-sharing efforts will lead to enhanced research collaborations and coordination accelerating the overall pace of discovery in preterm birth research.",NA,0,for Preterm Birth Research,0.829118824,for Preterm Birth Research,0.829118824,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/6/2018 +34583740,http://gitlab.com/vishsoft/fpadmet,"FP-ADMET: a compendium of fingerprint-based ADMET prediction models. Motivation The absorption, distribution, metabolism, excretion, and toxicity (ADMET) of drugs plays a key role in determining which among the potential candidates are to be prioritized. In silico approaches based on machine learning methods are becoming increasing popular, but are nonetheless limited by the availability of data. With a view to making both data and models available to the scientific community, we have developed FPADMET which is a repository of molecular fingerprint-based predictive models for ADMET properties. In this article, we have examined the efficacy of fingerprint-based machine learning models for a large number of ADMET-related properties. The predictive ability of a set of 20 different binary fingerprints (based on substructure keys, atom pairs, local path environments, as well as custom fingerprints such as all-shortest paths) for over 50 ADMET and ADMET-related endpoints have been evaluated as part of the study. We find that for a majority of the properties, fingerprint-based random forest models yield comparable or better performance compared with traditional 2D/3D molecular descriptors. Availability The models are made available as part of open access software that can be downloaded from https://gitlab.com/vishsoft/fpadmet .",FPADMET,0.995484829,NA,0,FPADMET,0.995484829,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/28/2021 +24885079,http://peroxidase.riceblast.snu.ac.kr,"fPoxDB: fungal peroxidase database for comparative genomics. Background Peroxidases are a group of oxidoreductases which mediate electron transfer from hydrogen peroxide (H2O2) and organic peroxide to various electron acceptors. They possess a broad spectrum of impact on industry and fungal biology. There are numerous industrial applications using peroxidases, such as to catalyse highly reactive pollutants and to breakdown lignin for recycling of carbon sources. Moreover, genes encoding peroxidases play important roles in fungal pathogenicity in both humans and plants. For better understanding of fungal peroxidases at the genome-level, a novel genomics platform is required. To this end, Fungal Peroxidase Database (fPoxDB; http://peroxidase.riceblast.snu.ac.kr/) has been developed to provide such a genomics platform for this important gene family. Description In order to identify and classify fungal peroxidases, 24 sequence profiles were built and applied on 331 genomes including 216 from fungi and Oomycetes. In addition, NoxR, which is known to regulate NADPH oxidases (NoxA and NoxB) in fungi, was also added to the pipeline. Collectively, 6,113 genes were predicted to encode 25 gene families, presenting well-separated distribution along the taxonomy. For instance, the genes encoding lignin peroxidase, manganese peroxidase, and versatile peroxidase were concentrated in the rot-causing basidiomycetes, reflecting their ligninolytic capability. As a genomics platform, fPoxDB provides diverse analysis resources, such as gene family predictions based on fungal sequence profiles, pre-computed results of eight bioinformatics programs, similarity search tools, a multiple sequence alignment tool, domain analysis functions, and taxonomic distribution summary, some of which are not available in the previously developed peroxidase resource. In addition, fPoxDB is interconnected with other family web systems, providing extended analysis opportunities. Conclusions fPoxDB is a fungi-oriented genomics platform for peroxidases. The sequence-based prediction and diverse analysis toolkits with easy-to-follow web interface offer a useful workbench to study comparative and evolutionary genomics of peroxidases in fungi.",fPoxDB,0.997609806,Fungal Peroxidase Database,0.992474094,fPoxDB,0.997609806,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/8/2014 +25725058,http://www.fruitech.org,"FR database 1.0: a resource focused on fruit development and ripening. . Fruits form unique growing period in the life cycle of higher plants. They provide essential nutrients and have beneficial effects on human health. Characterizing the genes involved in fruit development and ripening is fundamental to understanding the biological process and improving horticultural crops. Although, numerous genes that have been characterized are participated in regulating fruit development and ripening at different stages, no dedicated bioinformatic resource for fruit development and ripening is available. In this study, we have developed such a database, FR database 1.0, using manual curation from 38√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ423 articles published before 1 April 2014, and integrating protein interactomes and several transcriptome datasets. It provides detailed information for 904 genes derived from 53 organisms reported to participate in fleshy fruit development and ripening. Genes from climacteric and non-climacteric fruits are also annotated, with several interesting Gene Ontology (GO) terms being enriched for these two gene sets and seven ethylene-related GO terms found only in the climacteric fruit group. Furthermore, protein-protein interaction analysis by integrating information from FR database presents the possible function network that affects fleshy fruit size formation. Collectively, FR database will be a valuable platform for comprehensive understanding and future experiments in fruit biology. Database URL: http://www.fruitech.org/",FR,0.466398388,NA,0,FR,0.466398388,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/27/2015 +26973684,http://pathways.cgrb.oregonstate.edu,"FragariaCyc: A Metabolic Pathway Database for Woodland Strawberry Fragaria vesca. FragariaCyc is a strawberry-specific cellular metabolic network based on the annotated genome sequence of Fragaria vesca L. ssp. vesca, accession Hawaii 4. It was built on the Pathway-Tools platform using MetaCyc as the reference. The experimental evidences from published literature were used for supporting/editing existing entities and for the addition of new pathways, enzymes, reactions, compounds, and small molecules in the database. To date, FragariaCyc comprises 66 super-pathways, 488 unique pathways, 2348 metabolic reactions, 3507 enzymes, and 2134 compounds. In addition to searching and browsing FragariaCyc, researchers can compare pathways across various plant metabolic networks and analyze their data using Omics Viewer tool. We view FragariaCyc as a resource for the community of researchers working with strawberry and related fruit crops. It can help understanding the regulation of overall metabolism of strawberry plant during development and in response to diseases and abiotic stresses. FragariaCyc is available online at http://pathways.cgrb.oregonstate.edu.",FragariaCyc,0.99653697,NA,0,FragariaCyc,0.99653697,1,25538713,NA,NA,NA,do not merge,NA,NA,NA,NA,3/4/2016 +32120139,http://www.rxnfinder.org/frcd,"FRCD: A comprehensive food risk component database with molecular scaffold, chemical diversity, toxicity, and biodegradability analysis. The presence of natural toxins, pesticide residues, and illegal additives in food products has been associated with a range of potential health hazards. However, no systematic database exists that comprehensively includes and integrates all research information on these compounds, and valuable information remains scattered across numerous databases and extensive literature reports. Thus, using natural language processing technology, we curated 12,018 food risk components from 152,737 literature reports, 12 authoritative databases, and numerous related regulatory documents. Data on molecular structures, physicochemical properties, chemical taxonomy, absorption, distribution, metabolism, excretion, toxicity properties, and physiological targets within the human body were integrated to afford the comprehensive food risk component database (FRCD, http://www.rxnfinder.org/frcd/). We also analyzed the molecular scaffold and chemical diversity, in addition to evaluating the toxicity and biodegradability of the food risk components. The FRCD could be considered a highly promising tool for future food safety studies.",FRCD,0.996773586,NA,0,FRCD,0.996773586,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/24/2020 +28245064,http://roots.ornl.gov,"A global Fine-Root Ecology Database to address below-ground challenges in plant ecology. Variation and tradeoffs within and among plant traits are increasingly being harnessed by empiricists and modelers to understand and predict ecosystem processes under changing environmental conditions. While fine roots play an important role in ecosystem functioning, fine-root traits are underrepresented in global trait databases. This has hindered efforts to analyze fine-root trait variation and link it with plant function and environmental conditions at a global scale. This Viewpoint addresses the need for a centralized fine-root trait database, and introduces the Fine-Root Ecology Database (FRED, http://roots.ornl.gov) which so far includes >√ɬÉ√ǬÇ√ɬÇ√Ǭ†70√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 observations encompassing a broad range of root traits and also includes associated environmental data. FRED represents a critical step toward improving our understanding of below-ground plant ecology. For example, FRED facilitates the quantification of variation in fine-root traits across root orders, species, biomes, and environmental gradients while also providing a platform for assessments of covariation among root, leaf, and wood traits, the role of fine roots in ecosystem functioning, and the representation of fine roots in terrestrial biosphere models. Continued input of observations into FRED to fill gaps in trait coverage will improve our understanding of changes in fine-root traits across space and time.",FRED,0.980674982,Fine-Root Ecology Database,0.815621734,FRED,0.980674982,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/28/2017 +24928188,http://www.escholarship.org/uc/item/6sd403pz,"FreeSolv: a database of experimental and calculated hydration free energies, with input files. This work provides a curated database of experimental and calculated hydration free energies for small neutral molecules in water, along with molecular structures, input files, references, and annotations. We call this the Free Solvation Database, or FreeSolv. Experimental values were taken from prior literature and will continue to be curated, with updated experimental references and data added as they become available. Calculated values are based on alchemical free energy calculations using molecular dynamics simulations. These used the GAFF small molecule force field in TIP3P water with AM1-BCC charges. Values were calculated with the GROMACS simulation package, with full details given in references cited within the database itself. This database builds in part on a previous, 504-molecule database containing similar information. However, additional curation of both experimental data and calculated values has been done here, and the total number of molecules is now up to 643. Additional information is now included in the database, such as SMILES strings, PubChem compound IDs, accurate reference DOIs, and others. One version of the database is provided in the Supporting Information of this article, but as ongoing updates are envisioned, the database is now versioned and hosted online. In addition to providing the database, this work describes its construction process. The database is available free-of-charge via http://www.escholarship.org/uc/item/6sd403pz .",FreeSolv,0.989120781,NA,0,FreeSolv,0.989120781,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/14/2014 +23459781,http://foodcast.sissa.it/neuroscience,"The FoodCast research image database (FRIDa). In recent years we have witnessed an increasing interest in food processing and eating behaviors. This is probably due to several reasons. The biological relevance of food choices, the complexity of the food-rich environment in which we presently live (making food-intake regulation difficult), and the increasing health care cost due to illness associated with food (food hazards, food contamination, and aberrant food-intake). Despite the importance of the issues and the relevance of this research, comprehensive and validated databases of stimuli are rather limited, outdated, or not available for non-commercial purposes to independent researchers who aim at developing their own research program. The FoodCast Research Image Database (FRIDa) we present here includes 877 images belonging to eight different categories: natural-food (e.g., strawberry), transformed-food (e.g., french fries), rotten-food (e.g., moldy banana), natural-non-food items (e.g., pinecone), artificial food-related objects (e.g., teacup), artificial objects (e.g., guitar), animals (e.g., camel), and scenes (e.g., airport). FRIDa has been validated on a sample of healthy participants (N = 73) on standard variables (e.g., valence, familiarity, etc.) as well as on other variables specifically related to food items (e.g., perceived calorie content); it also includes data on the visual features of the stimuli (e.g., brightness, high frequency power, etc.). FRIDa is a well-controlled, flexible, validated, and freely available (http://foodcast.sissa.it/neuroscience/) tool for researchers in a wide range of academic fields and industry.",FRIDa,0.992402077,FoodCast Research Image Database,0.776939595,FRIDa,0.992402077,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2013 +29175726,http://frog.med.yale.edu/FrogKB,"The redesigned Forensic Research/Reference on Genetics-knowledge base, FROG-kb. The Forensic Resource/Reference on Genetics-knowledge base (FROG-kb) web site was introduced in 2011 and in the five years since the previous publication ongoing research into how the database can better serve forensics has resulted in extensive redesign of the database interface and functionality. Originally designed as a prototype to support forensic use of single nucleotide polymorphisms (SNPs), FROG-kb provides a freely accessible web interface that facilitates forensic practice and can be useful for teaching and research. Based on knowledge gained through its use, the web interface has been redesigned for easier navigation through the multiple components. The site also has functional enhancements, extensive new documentation, and new reference panels of SNPs with new curated data. FROG-kb focuses on single nucleotide polymorphisms (SNPs) and provides reference population data for several published panels of individual identification SNPs (IISNPs) and several published panels of ancestry inference SNPs (AISNPs). For each of the various marker panels with reference population data, FROG-kb calculates random match probabilities (RMP) and relative likelihoods of ancestry for a user-entered genotype profile (either completely or partially specified). Example genotype profiles are available and the User's Manual presents interpretation guidelines for the calculations. The extensive documentation along with ongoing updates makes FROG-kb a comprehensive tool in facilitating use of SNPs in forensic practice and education. An overview of the new FROG-kb with examples and material explaining the results of its use are presented here.",FROG-kb,0.992679045,Forensic Resource/Reference on Genetics-knowledge base,0.930164516,FROG-kb,0.992679045,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/14/2017 +32123502,http://www.frogid.net.au,"The FrogID dataset: expert-validated occurrence records of Australia's frogs collected by citizen scientists. This dataset represents expert-validated occurrence records of calling frogs across Australia collected via the national citizen science project FrogID (http://www.frogid.net.au). FrogID relies on participants recording calling frogs using smartphone technology, after which point the frogs are identified by expert validators, resulting in a database of georeferenced frog species records. This dataset represents one full year of the project (10 November 2017-9 November 2018), including 54,864 records of 172 species, 71% of the known frog species in Australia. This is the first instalment of the dataset, and we anticipate providing updated datasets on an annual basis.",FrogID,0.994272232,NA,0,FrogID,0.994272232,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/17/2020 +29688353,http://www.fung-stress.org,"Fungal Stress Database (FSD)--a repository of fungal stress physiological data. . The construction of the Fungal Stress Database (FSD) was initiated and fueled by two major goals. At first, some outstandingly important groups of filamentous fungi including the aspergilli possess remarkable capabilities to adapt to a wide spectrum of environmental stress conditions but the underlying mechanisms of this stress tolerance have remained yet to be elucidated. Furthermore, the lack of any satisfactory interlaboratory standardization of stress assays, e.g. the widely used stress agar plate experiments, often hinders the direct comparison and discussion of stress physiological data gained for various fungal species by different research groups. In order to overcome these difficulties and to promote multilevel, e.g. combined comparative physiology-based and comparative genomics-based, stress research in filamentous fungi, we constructed FSD, which currently stores 1412 photos taken on Aspergillus colonies grown under precisely defined stress conditions. This study involved altogether 18 Aspergillus strains representing 17 species with two different strains for Aspergillus niger and covered six different stress conditions. Stress treatments were selected considering the frequency of various stress tolerance studies published in the last decade in the aspergilli and included oxidative (H2O2, menadione sodium bisulphite), high-osmolarity (NaCl, sorbitol), cell wall integrity (Congo Red) and heavy metal (CdCl2) stress exposures. In the future, we would like to expand this database to accommodate further fungal species and stress treatments.URL: http://www.fung-stress.org/",FSD,0.992719412,Fungal Stress Database,0.971582294,FSD,0.992719412,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +23757396,http://internal.med.unideb.hu/fsrd,"FSRD: fungal stress response database. Adaptation to different types of environmental stress is a common part of life for today's fungi. A deeper understanding of the organization, regulation and evolution of fungal stress response systems may lead to the development of novel antifungal drugs and technologies or the engineering of industrial strains with elevated stress tolerance. Here we present the Fungal Stress Response Database (http://internal.med.unideb.hu/fsrd) aimed to stimulate further research on stress biology of fungi. The database incorporates 1985 fungal stress response proteins with verified physiological function(s) and their orthologs identified and annotated in 28 species including human and plant pathogens, as well as important industrial fungi. The database will be extended continuously to cover other fully sequenced fungal species. Our database, as a starting point for future stress research, facilitates the analysis of literature data on stress and the identification of ortholog groups of stress response proteins in newly sequenced fungal genomes. Database URL: http://internal.med.unideb.hu/fsrd",FSRD,0.955492318,Fungal Stress Response Database,0.942606161,FSRD,0.955492318,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/11/2013 +"29165593, 33539890",http://funcoup.sbc.su.se,"FunCoup 4: new species, data, and visualization. This release of the FunCoup database (http://funcoup.sbc.su.se) is the fourth generation of one of the most comprehensive databases for genome-wide functional association networks. These functional associations are inferred via integrating various data types using a naive Bayesian algorithm and orthology based information transfer across different species. This approach provides high coverage of the included genomes as well as high quality of inferred interactions. In this update of FunCoup we introduce four new eukaryotic species: Schizosaccharomyces pombe, Plasmodium falciparum, Bos taurus, Oryza sativa and open the database to the prokaryotic domain by including networks for Escherichia coli and Bacillus subtilis. The latter allows us to also introduce a new class of functional association between genes - co-occurrence in the same operon. We also supplemented the existing classes of functional association: metabolic, signaling, complex and physical protein interaction with up-to-date information. In this release we switched to InParanoid v8 as the source of orthology and base for calculation of phylogenetic profiles. While populating all other evidence types with new data we introduce a new evidence type based on quantitative mass spectrometry data. Finally, the new JavaScript based network viewer provides the user an intuitive and responsive platform to further evaluate the results.",FunCoup,0.996292353,NA,0,FunCoup,0.996292353,2,NA,"22110034.0, 24185702.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,2/2/2021 +"22110034, 24185702",http://FunCoup.sbc.su.se,"Comparative interactomics with Funcoup 2.0. FunCoup (http://FunCoup.sbc.su.se) is a database that maintains and visualizes global gene/protein networks of functional coupling that have been constructed by Bayesian integration of diverse high-throughput data. FunCoup achieves high coverage by orthology-based integration of data sources from different model organisms and from different platforms. We here present release 2.0 in which the data sources have been updated and the methodology has been refined. It contains a new data type Genetic Interaction, and three new species: chicken, dog and zebra fish. As FunCoup extensively transfers functional coupling information between species, the new input datasets have considerably improved both coverage and quality of the networks. The number of high-confidence network links has increased dramatically. For instance, the human network has more than eight times as many links above confidence 0.5 as the previous release. FunCoup provides facilities for analysing the conservation of subnetworks in multiple species. We here explain how to do comparative interactomics on the FunCoup website.",FunCoup,0.996186793,NA,0,FunCoup,0.996186793,2,NA,"29165593.0, 33539890.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/31/2013 +28077567,http://staff.washington.edu/jwallace/farme,"FARME DB: a functional antibiotic resistance element database. . Antibiotic resistance (AR) is a major global public health threat but few resources exist that catalog AR genes outside of a clinical context. Current AR sequence databases are assembled almost exclusively from genomic sequences derived from clinical bacterial isolates and thus do not include many microbial sequences derived from environmental samples that confer resistance in functional metagenomic studies. These environmental metagenomic sequences often show little or no similarity to AR sequences from clinical isolates using standard classification criteria. In addition, existing AR databases provide no information about flanking sequences containing regulatory or mobile genetic elements. To help address this issue, we created an annotated database of DNA and protein sequences derived exclusively from environmental metagenomic sequences showing AR in laboratory experiments. Our Functional Antibiotic Resistant Metagenomic Element (FARME) database is a compilation of publically available DNA sequences and predicted protein sequences conferring AR as well as regulatory elements, mobile genetic elements and predicted proteins flanking antibiotic resistant genes. FARME is the first database to focus on functional metagenomic AR gene elements and provides a resource to better understand AR in the 99% of bacteria which cannot be cultured and the relationship between environmental AR sequences and antibiotic resistant genes derived from cultured isolates.Database URL: http://staff.washington.edu/jwallace/farme.",FARME,0.964730322,Functional Antibiotic Resistant Metagenomic Element,0.970071673,Functional Antibiotic Resistant Metagenomic Element,0.970071673,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/10/2017 +25786896,http://unite.ut.ee/repository.php,"A Comprehensive, Automatically Updated Fungal ITS Sequence Dataset for Reference-Based Chimera Control in Environmental Sequencing Efforts. The nuclear ribosomal internal transcribed spacer (ITS) region is the most commonly chosen genetic marker for the molecular identification of fungi in environmental sequencing and molecular ecology studies. Several analytical issues complicate such efforts, one of which is the formation of chimeric-artificially joined-DNA sequences during PCR amplification or sequence assembly. Several software tools are currently available for chimera detection, but rely to various degrees on the presence of a chimera-free reference dataset for optimal performance. However, no such dataset is available for use with the fungal ITS region. This study introduces a comprehensive, automatically updated reference dataset for fungal ITS sequences based on the UNITE database for the molecular identification of fungi. This dataset supports chimera detection throughout the fungal kingdom and for full-length ITS sequences as well as partial (ITS1 or ITS2 only) datasets. The performance of the dataset on a large set of artificial chimeras was above 99.5%, and we subsequently used the dataset to remove nearly 1,000 compromised fungal ITS sequences from public circulation. The dataset is available at http://unite.ut.ee/repository.php and is subject to web-based third-party curation.",NA,0,Fungal,0.552512705,Fungal,0.552512705,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/19/2015 +24564786,"http://pcwde.riceblast.snu.ac.kr/, http://cfgp.snu.ac.kr","Fungal plant cell wall-degrading enzyme database: a platform for comparative and evolutionary genomics in fungi and Oomycetes. Background Plant cell wall-degrading enzymes (PCWDEs) play significant roles throughout the fungal life including acquisition of nutrients and decomposition of plant cell walls. In addition, many of PCWDEs are also utilized by biofuel and pulp industries. In order to develop a comparative genomics platform focused in fungal PCWDEs and provide a resource for evolutionary studies, Fungal PCWDE Database (FPDB) is constructed (http://pcwde.riceblast.snu.ac.kr/). Results In order to archive fungal PCWDEs, 22 sequence profiles were constructed and searched on 328 genomes of fungi, Oomycetes, plants and animals. A total of 6,682 putative genes encoding PCWDEs were predicted, showing differential distribution by their life styles, host ranges and taxonomy. Genes known to be involved in fungal pathogenicity, including polygalacturonase (PG) and pectin lyase, were enriched in plant pathogens. Furthermore, crop pathogens had more PCWDEs than those of rot fungi, implying that the PCWDEs analysed in this study are more needed for invading plant hosts than wood-decaying processes. Evolutionary analysis of PGs in 34 selected genomes revealed that gene duplication and loss events were mainly driven by taxonomic divergence and partly contributed by those events in species-level, especially in plant pathogens. Conclusions The FPDB would provide a fungi-specialized genomics platform, a resource for evolutionary studies of PCWDE gene families and extended analysis option by implementing Favorite, which is a data exchange and analysis hub built in Comparative Fungal Genomics Platform (CFGP 2.0; http://cfgp.snu.ac.kr/).",FPDB,0.979104906,Fungal PCWDE Database,0.985806865,Fungal PCWDE Database,0.985806865,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/16/2013 +24101916,http://fungene.cme.msu.edu,"FunGene: the functional gene pipeline and repository. Ribosomal RNA genes have become the standard molecular markers for microbial community analysis for good reasons, including universal occurrence in cellular organisms, availability of large databases, and ease of rRNA gene region amplification and analysis. As markers, however, rRNA genes have some significant limitations. The rRNA genes are often present in multiple copies, unlike most protein-coding genes. The slow rate of change in rRNA genes means that multiple species sometimes share identical 16S rRNA gene sequences, while many more species share identical sequences in the short 16S rRNA regions commonly analyzed. In addition, the genes involved in many important processes are not distributed in a phylogenetically coherent manner, potentially due to gene loss or horizontal gene transfer. While rRNA genes remain the most commonly used markers, key genes in ecologically important pathways, e.g., those involved in carbon and nitrogen cycling, can provide important insights into community composition and function not obtainable through rRNA analysis. However, working with ecofunctional gene data requires some tools beyond those required for rRNA analysis. To address this, our Functional Gene Pipeline and Repository (FunGene; http://fungene.cme.msu.edu/) offers databases of many common ecofunctional genes and proteins, as well as integrated tools that allow researchers to browse these collections and choose subsets for further analysis, build phylogenetic trees, test primers and probes for coverage, and download aligned sequences. Additional FunGene tools are specialized to process coding gene amplicon data. For example, FrameBot produces frameshift-corrected protein and DNA sequences from raw reads while finding the most closely related protein reference sequence. These tools can help provide better insight into microbial communities by directly studying key genes involved in important ecological processes.",FunGene,0.989818652,Gene Pipeline,0.61831975,FunGene,0.989818652,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2013 +22766416,http://www.fungene-db.org,"FunGene-DB: a web-based tool for Polyporales strains authentication. Polyporales are extensively studied wood-decaying fungi with applications in white and green biotechnologies and in medicinal chemistry. We developed an open-access, user-friendly, bioinformatics tool named FunGene-DB (http://www.fungene-db.org). The goal was to facilitate the molecular authentication of Polyporales strains and fruit-bodies, otherwise subjected to morphological studies. This tool includes a curated database that contains ITS1-5.8S-ITS2 rDNA genes screened through a semi-automated pipeline from the International Nucleotide Sequence Database (INSD), and the similarity search BLASTn program. Today, the web-accessible database compiles 2379 accepted sequences, among which 386 were selected as reference sequences (most often fully identified ITS sequences for which a voucher, strain or specimen, has been deposited in a public-access collection). The restriction of the database to one reference sequence per species (or per clade for species complex) allowed most often unequivocal analysis. We conclude that FunGene-DB is a promising tool for molecular authentication of Polyporales. It should be especially useful for scientists who are not expert mycologists but who need to check the identity of strains (e.g. for culture collections, for applied microbiology).",FunGene-DB,0.99046123,NA,0,FunGene-DB,0.99046123,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/2/2012 +22064857,"http://FungiDB.org, http://EuPathDB.org","FungiDB: an integrated functional genomics database for fungi. FungiDB (http://FungiDB.org) is a functional genomic resource for pan-fungal genomes that was developed in partnership with the Eukaryotic Pathogen Bioinformatic resource center (http://EuPathDB.org). FungiDB uses the same infrastructure and user interface as EuPathDB, which allows for sophisticated and integrated searches to be performed using an intuitive graphical system. The current release of FungiDB contains genome sequence and annotation from 18 species spanning several fungal classes, including the Ascomycota classes, Eurotiomycetes, Sordariomycetes, Saccharomycetes and the Basidiomycota orders, Pucciniomycetes and Tremellomycetes, and the basal 'Zygomycete' lineage Mucormycotina. Additionally, FungiDB contains cell cycle microarray data, hyphal growth RNA-sequence data and yeast two hybrid interaction data. The underlying genomic sequence and annotation combined with functional data, additional data from the FungiDB standard analysis pipeline and the ability to leverage orthology provides a powerful resource for in silico experimentation.",FungiDB,0.997218907,NA,0,FungiDB,0.997218907,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/7/2011 +34964845,"http://www.fungiofpakistan.com, http://fungiofpakistan.com","https://www.fungiofpakistan.com: a continuously updated online database of fungi in Pakistan. . The website fungiofpakistan.com is a collection of all the available data about macro- as well as micro-fungi collected from Pakistan. This website comprises reported fungal species with isolation source or host record, locality and updated classification. The data on this website is based on old literature (library data, personal data of specific authors or books that were not easily accessible to public) and recent publications. This website is an important potential platform for researchers, government officials, industries and other users. Users can provide their inputs related to missing taxa, new genera, the new record and new data. They also have the opportunity to express their opinions on valid names, invalid names and illegitimate names, with notes published in the 'Notes' section of webpage provided following review and editing by curators and fungal taxonomists. This website plays a significant contribution to our knowledge of the rich fungal diversity of Pakistan. However, much more sustained and detailed research is needed to fully evaluate fungal diversity in Pakistan. Undoubtedly, that many more fungi will be discovered and added in the future. https://fungiofpakistan.com/.",fungiofpakistan,0.679958761,NA,0,fungiofpakistan,0.679958761,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2021 +29220485,"http://funricegenes.github.io/, http://funricegenes.ncpgr.cn","funRiceGenes dataset for comprehensive understanding and application of rice functional genes. Background As a main staple food, rice is also a model plant for functional genomic studies of monocots. Decoding of every DNA element of the rice genome is essential for genetic improvement to address increasing food demands. The past 15 years have witnessed extraordinary advances in rice functional genomics. Systematic characterization and proper deposition of every rice gene are vital for both functional studies and crop genetic improvement. Findings We built a comprehensive and accurate dataset of √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº2800 functionally characterized rice genes and √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº5000 members of different gene families by integrating data from available databases and reviewing every publication on rice functional genomic studies. The dataset accounts for 19.2% of the 39 045 annotated protein-coding rice genes, which provides the most exhaustive archive for investigating the functions of rice genes. We also constructed 214 gene interaction networks based on 1841 connections between 1310 genes. The largest network with 762 genes indicated that pleiotropic genes linked different biological pathways. Increasing degree of conservation of the flowering pathway was observed among more closely related plants, implying substantial value of rice genes for future dissection of flowering regulation in other crops. All data are deposited in the funRiceGenes database (https://funricegenes.github.io/). Functionality for advanced search and continuous updating of the database are provided by a Shiny application (http://funricegenes.ncpgr.cn/). Conclusions The funRiceGenes dataset would enable further exploring of the crosslink between gene functions and natural variations in rice, which can also facilitate breeding design to improve target agronomic traits of rice.",funRiceGenes,0.978388309,NA,0,funRiceGenes,0.978388309,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +25522231,http://funrna.riceblast.snu.ac.kr,"funRNA: a fungi-centered genomics platform for genes encoding key components of RNAi. Background RNA interference (RNAi) is involved in genome defense as well as diverse cellular, developmental, and physiological processes. Key components of RNAi are Argonaute, Dicer, and RNA-dependent RNA polymerase (RdRP), which have been functionally characterized mainly in model organisms. The key components are believed to exist throughout eukaryotes; however, there is no systematic platform for archiving and dissecting these important gene families. In addition, few fungi have been studied to date, limiting our understanding of RNAi in fungi. Here we present funRNA http://funrna.riceblast.snu.ac.kr/, a fungal kingdom-wide comparative genomics platform for putative genes encoding Argonaute, Dicer, and RdRP. Description To identify and archive genes encoding the abovementioned key components, protein domain profiles were determined from reference sequences obtained from UniProtKB/SwissProt. The domain profiles were searched using fungal, metazoan, and plant genomes, as well as bacterial and archaeal genomes. 1,163, 442, and 678 genes encoding Argonaute, Dicer, and RdRP, respectively, were predicted. Based on the identification results, active site variation of Argonaute, diversification of Dicer, and sequence analysis of RdRP were discussed in a fungus-oriented manner. funRNA provides results from diverse bioinformatics programs and job submission forms for BLAST, BLASTMatrix, and ClustalW. Furthermore, sequence collections created in funRNA are synced with several gene family analysis portals and databases, offering further analysis opportunities. Conclusions funRNA provides identification results from a broad taxonomic range and diverse analysis functions, and could be used in diverse comparative and evolutionary studies. It could serve as a versatile genomics workbench for key components of RNAi.",funRNA,0.992636442,NA,0,funRNA,0.992636442,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/8/2014 +21300622,http://proteomics.ysu.edu/secretomes/fungi.php,"FunSecKB: the Fungal Secretome KnowledgeBase. The Fungal Secretome KnowledgeBase (FunSecKB) provides a resource of secreted fungal proteins, i.e. secretomes, identified from all available fungal protein data in the NCBI RefSeq database. The secreted proteins were identified using a well evaluated computational protocol which includes SignalP, WolfPsort and Phobius for signal peptide or subcellular location prediction, TMHMM for identifying membrane proteins, and PS-Scan for identifying endoplasmic reticulum (ER) target proteins. The entries were mapped to the UniProt database and any annotations of subcellular locations that were either manually curated or computationally predicted were included in FunSecKB. Using a web-based user interface, the database is searchable, browsable and downloadable by using NCBI's RefSeq accession or gi number, UniProt accession number, keyword or by species. A BLAST utility was integrated to allow users to query the database by sequence similarity. A user submission tool was implemented to support community annotation of subcellular locations of fungal proteins. With the complete fungal data from RefSeq and associated web-based tools, FunSecKB will be a valuable resource for exploring the potential applications of fungal secreted proteins. Database URL: http://proteomics.ysu.edu/secretomes/fungi.php.",FunSecKB,0.997838616,Fungal Secretome KnowledgeBase,0.966365695,FunSecKB,0.997838616,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/6/2011 +"26590404, 30298402",http://www.funtree.info,"FunTree: advances in a resource for exploring and contextualising protein function evolution. FunTree is a resource that brings together protein sequence, structure and functional information, including overall chemical reaction and mechanistic data, for structurally defined domain superfamilies. Developed in tandem with the CATH database, the original FunTree contained just 276 superfamilies focused on enzymes. Here, we present an update of FunTree that has expanded to include 2340 superfamilies including both enzymes and proteins with non-enzymatic functions annotated by Gene Ontology (GO) terms. This allows the investigation of how novel functions have evolved within a structurally defined superfamily and provides a means to analyse trends across many superfamilies. This is done not only within the context of a protein's sequence and structure but also the relationships of their functions. New measures of functional similarity have been integrated, including for enzymes comparisons of overall reactions based on overall bond changes, reaction centres (the local environment atoms involved in the reaction) and the sub-structure similarities of the metabolites involved in the reaction and for non-enzymes semantic similarities based on the GO. To identify and highlight changes in function through evolution, ancestral character estimations are made and presented. All this is accessible through a new re-designed web interface that can be found at http://www.funtree.info.",FunTree,0.994794667,NA,0,FunTree,0.994794667,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +21541042,http://www.nuolan.net/substrates.html,"FurinDB: A database of 20-residue furin cleavage site motifs, substrates and their associated drugs. FurinDB (freely available online at http://www.nuolan.net/substrates.html) is a database of furin substrates. This database includes experimentally verified furin cleavage sites, substrates, species, experimental methods, original publications of experiments and associated drugs targeting furin substrates. The current database release contains 126 furin cleavage sites from three species: mammals, bacteria and viruses. A main feature of this database is that all furin cleavage sites are recorded as a 20-residue motif, including one core region (eight amino acids, P6-P2') and two flanking solvent accessible regions (eight amino acids, P7-P14, and four amino acids, P3'-P6'), that represent our current understanding of the molecular biology of furin cleavage. This database is important for understanding the molecular evolution and relationships between sequence motifs, 3D structures, cellular functions and physical properties required by furin for cleavage, and for elucidating the molecular mechanisms and the progression of furin cleavage associated human diseases, including pathogenic infections, neurological disorders, tumorigenesis, tumor invasion, angiogenesis, and metastasis. FurinDB database will be a solid addition to the publicly available infrastructure for scientists in the field of molecular biology.",FurinDB,0.997923672,NA,0,FurinDB,0.997923672,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/8/2011 +26215638,http://donglab.ecnu.edu.cn/databases/FusionCancer/Conclusion,"FusionCancer: a database of cancer fusion genes derived from RNA-seq data. Background Fusion genes are chimeric results originated from previous separate genes with aberrant functions. The resulting protein products may lead to abnormal status of expression levels, functions and action sites, which in return may cause the abnormal proliferation of cells and cancer development. Results With the emergence of next-generation sequencing technology, RNA-seq has spurred gene fusion discovery in various cancer types. In this work, we compiled 591 recently published RNA-seq datasets in 15 kinds of human cancer, and the gene fusion events were comprehensively identified. Based on the results, a database was developed for gene fusion in cancers (FusionCancer), with the attempt to provide a user-friendly utility for the cancer research community. A flexible query engine has been developed for the acquisition of annotated information of cancer fusion genes, which would help users to determine the chimera events leading to functional changes. FusionCancer can be accessible at the following hyperlink website: http://donglab.ecnu.edu.cn/databases/FusionCancer/Conclusion To the best of our knowledge, FusionCancer is the first comprehensive fusion gene database derived only from cancer RNA-seq data.",FusionCancer,0.993119001,NA,0,FusionCancer,0.993119001,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/28/2015 +30407583,http://ccsm.uth.edu/FusionGDB,"FusionGDB: fusion gene annotation DataBase. Gene fusion is one of the hallmarks of cancer genome via chromosomal rearrangement initiated by DNA double-strand breakage. To date, many fusion genes (FGs) have been established as important biomarkers and therapeutic targets in multiple cancer types. To better understand the function of FGs in cancer types and to promote the discovery of clinically relevant FGs, we built FusionGDB (Fusion Gene annotation DataBase) available at https://ccsm.uth.edu/FusionGDB. We collected 48 117 FGs across pan-cancer from three representative fusion gene resources: the improved database of chimeric transcripts and RNA-seq data (ChiTaRS 3.1), an integrative resource for cancer-associated transcript fusions (TumorFusions), and The Cancer Genome Atlas (TCGA) fusions by Gao et√ɬÉ√ǬÇ√ɬÇ√Ǭ†al. For these √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº48K FGs, we performed functional annotations including gene assessment across pan-cancer fusion genes, open reading frame (ORF) assignment, and retention search of 39 protein features based on gene structures of multiple isoforms with different breakpoints. We also provided the fusion transcript and amino acid sequences according to multiple breakpoints and transcript isoforms. Our analyses identified 331, 303√ɬÉ√ǬÇ√ɬÇ√Ǭ†and 667 in-frame FGs with retaining kinase, DNA-binding, and epigenetic factor domains, respectively, as well as 976 FGs lost protein-protein interaction. FusionGDB provides six categories of annotations: FusionGeneSummary, FusionProtFeature, FusionGeneSequence, FusionGenePPI, RelatedDrug and RelatedDisease.",FusionGDB,0.992917299,fusion gene annotation DataBase,0.742048061,FusionGDB,0.992917299,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +25149689,http://fusobacterium.um.edu.my,"FusoBase: an online Fusobacterium comparative genomic analysis platform. . Fusobacterium are anaerobic gram-negative bacteria that have been associated with a wide spectrum of human infections and diseases. As the biology of Fusobacterium is still not well understood, comparative genomic analysis on members of this species will provide further insights on their taxonomy, phylogeny, pathogenicity and other information that may contribute to better management of infections and diseases. To facilitate the ongoing genomic research on Fusobacterium, a specialized database with easy-to-use analysis tools is necessary. Here we present FusoBase, an online database providing access to genome-wide annotated sequences of Fusobacterium strains as well as bioinformatics tools, to support the expanding scientific community. Using our custom-developed Pairwise Genome Comparison tool, we demonstrate how differences between two user-defined genomes and how insertion of putative prophages can be identified. In addition, Pathogenomics Profiling Tool is capable of clustering predicted genes across Fusobacterium strains and visualizing the results in the form of a heat map with dendrogram. http://fusobacterium.um.edu.my.",FusoBase,0.997971714,NA,0,FusoBase,0.997971714,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/22/2014 +29976644,http://fusoportal.org,"FusoPortal: an Interactive Repository of Hybrid MinION-Sequenced Fusobacterium Genomes Improves Gene Identification and Characterization. . Here we present FusoPortal, an interactive repository of Fusobacterium genomes that were sequenced using a hybrid MinION long-read sequencing pipeline, followed by assembly and annotation using a diverse portfolio of predominantly open-source software. Significant efforts were made to provide genomic and bioinformatic data as downloadable files, including raw sequencing reads, genome maps, gene annotations, protein functional analysis and classifications, and a custom BLAST server for FusoPortal genomes. FusoPortal has been initiated with eight complete genomes, of which seven were previously only drafts that ranged from 24 to 67 contigs. We have showcased that the genomes in FusoPortal provide accurate open reading frame annotations and have corrected a number of large (>3-kb) genes that were previously misannotated due to contig boundaries. In summary, FusoPortal (http://fusoportal.org) is the first database of MinION-sequenced and completely assembled Fusobacterium genomes, and this central Fusobacterium genomic and bioinformatic resource will aid the scientific community in developing a deeper understanding of how this human pathogen contributes to an array of diseases, including periodontitis and colorectal cancer.IMPORTANCE In this report, we describe a hybrid MinION whole-genome sequencing pipeline and the genomic characteristics of the first eight Fusobacterium strains deposited in the FusoPortal database. This collection of highly accurate and complete genomes drastically improves upon previous multicontig assemblies by correcting and newly identifying a significant number of open reading frames. We believe that the availability of this resource will result in the discovery of proteins and molecular mechanisms used by an oral pathogen, with the potential to further our understanding of how Fusobacterium nucleatum contributes to a repertoire of diseases, including periodontitis, preterm birth, and colorectal cancer.",FusoPortal,0.994699359,NA,0,FusoPortal,0.994699359,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/5/2018 +27794553,http://protdyn-database.org,"FuzDB: database of fuzzy complexes, a tool to develop stochastic structure-function relationships for protein complexes and higher-order assemblies. FuzDB (http://protdyn-database.org) compiles experimentally observed fuzzy protein complexes, where intrinsic disorder (ID) is maintained upon interacting with a partner (protein, nucleic acid or small molecule) and directly impacts biological function. Entries in the database have both (i) structural evidence demonstrating the structural multiplicity or dynamic disorder of the ID region(s) in the partner bound form of the protein and (ii) in vitro or in vivo biological evidence that indicates the significance of the fuzzy region(s) in the formation, function or regulation of the assembly. Unlike the other intrinsically disordered or unfolded protein databases, FuzDB focuses on ID regions within a biological context, including higher-order assemblies and presents a detailed analysis of the structural and functional data. FuzDB also provides interpretation of experimental results to elucidate the molecular mechanisms by which fuzzy regions-classified on the basis of topology and mechanism-interfere with the structural ensembles and activity of protein assemblies. Regulatory sites generated by alternative splicing (AS) or post-translational modifications (PTMs) are also collected. By assembling all this information, FuzDB could be utilized to develop stochastic structure-function relationships for proteins and could contribute to the emergence of a new paradigm.",FuzDB,0.998405516,complexes,0.655352414,FuzDB,0.998405516,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2016 +34485385,http://fuzzle.uni-bayreuth.de/2.0,"Fuzzle 2.0: Ligand Binding in Natural Protein Building Blocks. Modern proteins have been shown to share evolutionary relationships via subdomain-sized fragments. The assembly of such fragments through duplication and recombination events led to the complex structures and functions we observe today. We previously implemented a pipeline that identified more than 1,000 of these fragments that are shared by different protein folds and developed a web interface to analyze and search for them. This resource named Fuzzle helps structural and evolutionary biologists to identify and analyze conserved parts of a protein but it also provides protein engineers with building blocks for example to design proteins by fragment combination. Here, we describe a new version of this web resource that was extended to include ligand information. This addition is a significant asset to the database since now protein fragments that bind specific ligands can be identified and analyzed. Often the mode of ligand binding is conserved in proteins thereby supporting a common evolutionary origin. The same can now be explored for subdomain-sized fragments within this database. This ligand binding information can also be used in protein engineering to graft binding pockets into other protein scaffolds or to transfer functional sites via recombination of a specific fragment. Fuzzle 2.0 is freely available at https://fuzzle.uni-bayreuth.de/2.0.",Fuzzle,0.910124898,NA,0,Fuzzle,0.910124898,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/18/2021 +29126995,http://bioinfo.ihb.ac.cn/fvd,"FVD: The fish-associated virus database. With the expanding of marine and freshwater aquaculture, the outbreaks of aquatic animal diseases have increasingly become the major threats to the healthy development of aquaculture industries. Notably, viral infections lead to massive fish deaths and result in great economic loss every year across the world. Hence, it is meaningful to clarify the biodiversity, geographical distribution and host specificity of fish-associated viruses. In this study, viral sequences detected in fish samples were manually collected from public resources, along with the related metadata, such as sampling time, location, specimen type and fish species. Moreover, the information regarding the host fish, including aliases, diet type and geographic distribution were also integrated into a database (FVD). To date, FVD covers the information of 4860 fish-associated viruses belonging to 15 viral families, which were detected from 306 fish species in 57 countries. Meanwhile, sequence alignment, live data statistics and download function are available. Through the user-friendly interface, FVD provides a practical platform that would not only benefit virologists who want to disclose the spread of fish-associated viruses, but also zoologists who focus on the health of domestic and wild animals. Furthermore, it may facilitate the surveillance and prevention of fish viral diseases. Database URL: http://bioinfo.ihb.ac.cn/fvd.",FVD,0.953768969,The fish-associated virus database,0.77134944,FVD,0.953768969,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/8/2017 +30510599,http://dx.doi.org/10.5676/EUM,"The GEWEX Water Vapor Assessment archive of water vapour products from satellite observations and reanalyses. The Global Energy and Water cycle Exchanges (GEWEX) Data and Assessments Panel (GDAP) initiated the GEWEX Water Vapor Assessment (G-VAP), which has the main objectives to quantify the current state of art in water vapour products being constructed for climate applications and to support the selection process of suitable water vapour products by GDAP for its production of globally consistent water and energy cycle products. During the construction of the G-VAP data archive, freely available and mature satellite and reanalysis data records with a minimum temporal coverage of 10 years were considered. The archive contains total column water vapour (TCWV) as well as specific humidity and temperature at four pressure levels (1000, 700, 500, 300 hPa) from 22 different data records. All data records were remapped to a regular longitude/latitude grid of 2√ɬÉ√ǬÇ√ɬÇ√Ǭ∞x2√ɬÉ√ǬÇ√ɬÇ√Ǭ∞. The archive consists of four different folders: 22 TCWV data records covering the period 2003-2008, 11 TCWV data records covering the period 1988-2008, as well as seven specific humidity and seven temperature data records covering the period 1988-2009. The G-VAP data archive is referenced under the following digital object identifier (doi): http://dx.doi.org/10.5676/EUM SAF CM/GVAP/V001. Within G-VAP, the characterisation of water vapour products is, among other ways, achieved through intercomparisons of the considered data records, as a whole and grouped into three classes of predominant retrieval condition: clear-sky, cloudy-sky and all-sky. Associated results are shown using the 22 TCWV data records. The standard deviations among the 22 TCWV data records have been analysed and exhibit distinct maxima over central Africa and the tropical warm pool (in absolute terms) as well as over the poles and mountain regions (in relative terms). The variability in TCWV within each class can be large and prohibits conclusions on systematic differences in TCWV between the classes.",G-VAP,0.951357603,cycle Exchanges,0.584009568,G-VAP,0.951357603,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,6/15/2018 +27905517,http://bsbe.iiti.ac.in/bsbe/ipdb/index.php,"G4IPDB: A database for G-quadruplex structure forming nucleic acid interacting proteins. Nucleic acid G-quadruplex structure (G4) Interacting Proteins DataBase (G4IPDB) is an important database that contains detailed information about proteins interacting with nucleic acids that forms G-quadruplex structures. G4IPDB is the first database that provides comprehensive information about this interaction at a single platform. This database contains more than 200 entries with details of interaction such as interacting protein name and their synonyms, their UniProt-ID, source organism, target name and its sequences, √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√ǬÜTm, binding/dissociation constants, protein gene name, protein FASTA sequence, interacting residue in protein, related PDB entries, interaction ID, graphical view, PMID, author's name and techniques that were used to detect their interactions. G4IPDB also provides an efficient web-based ""G-quadruplex predictor tool"" that searches putative G-quadruplex forming sequences simultaneously in both sense and anti-sense strands of the query nucleotide sequence and provides the predicted G score. Studying the interaction between proteins and nucleic acids forming G-quadruplex structures could be of therapeutic significance for various diseases including cancer and neurological disease, therefore, having detail information about their interactions on a single platform would be helpful for the discovery and development of novel therapeutics. G4IPDB can be routinely updated (twice in year) and freely available on http://bsbe.iiti.ac.in/bsbe/ipdb/index.php.",G4IPDB,0.99755083,Nucleic,0.905615389,G4IPDB,0.99755083,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2016 +23161677,http://www.g4ldb.org,"G4LDB: a database for discovering and studying G-quadruplex ligands. The G-quadruplex ligands database (G4LDB, http://www.g4ldb.org) provides a unique collection of reported G-quadruplex ligands to streamline ligand/drug discovery targeting G-quadruplexes. G-quadruplexes are guanine-rich nucleic acid sequences in human telomeres and gene promoter regions. There is a growing recognition for their profound roles in a wide spectrum of diseases, such as cancer, diabetes and cardiovascular disease. Ligands that affect the structure and activity of G-quadruplexes can shed light on the search for G-quadruplex-targeting drugs. Therefore, we built the G4LDB to (i) compile a data set covering various physical properties and 3D structure of G-quadruplex ligands; (ii) provide Web-based tools for G-quadruplex ligand design; and (iii) to facilitate the discovery of novel therapeutic and diagnostic agents targeting G-quadruplexes. G4LDB currently contains >800√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâG-quadruplex ligands with √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº4000 activity records, which, to our knowledge, is the most extensive collection of its kind. It offers a user friendly interface that can meet a variety of data inquiries from researchers. For example, ligands can be searched for by name, molecular properties, structures, ligand activities and so on. Building on the reported data, the database also provides an online ligand design module that can predict ligand binding affinity in real time.",G4LDB,0.997929025,G-quadruplex ligands database,0.988358639,G4LDB,0.997929025,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2012 +22293322,http://202.120.189.88/mutdb,"Glucose-6-phosphate dehydrogenase (G6PD) mutations database: review of the ""old"" and update of the new mutations. In the present paper we have updated the G6PD mutations database, including all the last discovered G6PD genetic variants. We underline that the last database has been published by Vulliamy et al. [1] who analytically reported 140 G6PD mutations: along with Vulliamy's database, there are two main sites, such as http://202.120.189.88/mutdb/ and www.LOVD.nl/MR, where almost all G6PD mutations can be found. Compared to the previous mutation reports, in our paper we have included for each mutation some additional information, such as: the secondary structure and the enzyme 3D position involving by mutation, the creation or abolition of a restriction site (with the enzyme involved) and the conservation score associated with each amino acid position. The mutations reported in the present tab have been divided according to the gene's region involved (coding and non-coding) and mutations affecting the coding region in: single, multiple (at least with two bases involved) and deletion. We underline that for the listed mutations, reported in italic, literature doesn't provide all the biochemical or bio-molecular information or the research data. Finally, for the ""old"" mutations, we tried to verify features previously reported and, when subsequently modified, we updated the specific information using the latest literature data.",G6PD,0.940137466,NA,0,G6PD,0.940137466,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/30/2012 +30268934,http://gaad.medgenius.info,"GAAD: A Gene and Autoimmiune Disease Association Database. Autoimmune diseases (ADs) arise from an abnormal immune response of the body against substances and tissues normally present in the body. More than a hundred of ADs have been described in the literature so far. Although their etiology remains largely unclear, various types of ADs tend to share more associated genes with other types of ADs than with non-AD types. Here we present GAAD, a gene and AD association database. In GAAD, we collected 44,762 associations between 49 ADs and 4249 genes from public databases and MEDLINE documents. We manually verified the associations to ensure the quality and credibility. We reconstructed and recapitulated the relationships among ADs using their shared genes, which further validated the quality of our data. We also provided a list of significantly co-occurring gene pairs among ADs; with embedded tools, users can query gene co-occurrences and construct customized co-occurrence network with genes of interest. To make GAAD more straightforward to experimental biologists and medical scientists, we extracted additional information describing the associations through text mining, including the putative diagnostic value of the associations, type and position of gene polymorphisms, expression changes of implicated genes, as well as the phenotypical consequences, and grouped the associations accordingly. GAAD is freely available at http://gaad.medgenius.info.",GAAD,0.996480703,Gene and Autoimmiune Disease Association Database,0.918741842,GAAD,0.996480703,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2018 +28013277,http://www.gabi-kat.de/db/genehits.php,"Enhancing the GABI-Kat Arabidopsis thaliana T-DNA Insertion Mutant Database by Incorporating Araport11 Annotation. SimpleSearch provides access to a database containing information about T-DNA insertion lines of the GABI-Kat collection of Arabidopsis thaliana mutants. These mutants are an important tool for reverse genetics, and GABI-Kat is the second largest collection of such T-DNA insertion mutants. Insertion sites were deduced from flanking sequence tags (FSTs), and the database contains information about mutant plant lines as well as insertion alleles. Here, we describe improvements within the interface (available at http://www.gabi-kat.de/db/genehits.php) and with regard to the database content that have been realized in the last five years. These improvements include the integration of the Araport11 genome sequence annotation data containing the recently updated A. thaliana structural gene descriptions, an updated visualization component that displays groups of insertions with very similar insertion positions, mapped confirmation sequences, and primers. The visualization component provides a quick way to identify insertions of interest, and access to improved data about the exact structure of confirmed insertion alleles. In addition, the database content has been extended by incorporating additional insertion alleles that were detected during the confirmation process, as well as by adding new FSTs that have been produced during continued efforts to complement gaps in FST availability. Finally, the current database content regarding predicted and confirmed insertion alleles as well as primer sequences has been made available as downloadable flat files.",GABI-Kat,0.946155384,NA,0,GABI-Kat,0.946155384,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2017 +23872200,http://gag.genouest.org,"The GAG database: a new resource to gather genomic annotation cross-references. Several institutions provide genomic annotation data, and therefore these data show a significant segmentation and redundancy. Public databases allow access, through their own methods, to genomic and proteomic sequences and related annotation. Although some cross-reference tables are available, they don't cover the complete datasets provided by these databases. The Genomic Annotation Gathering project intends to unify annotation data provided by GenBank and Ensembl. We introduce an intra-species, cross-bank method. Generated results provide an enriched set of cross- references. This method allows for identifying an average of 30% of new cross-references that can be integrated to other utilities dedicated to analyzing related annotation data. By using only sequence comparison, we are able to unify two datasets that previously didn't share any stable cross-bank accession method. The whole process is hosted by the GenOuest platform to provide public access to newly generated cross-references and to allow for regular updates (http://gag.genouest.org).",GAG,0.715960622,NA,0,GAG,0.715960622,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/16/2013 +24990533,http://bioinformatica.isa.cnr.it/GALT/GALT2.0,"GALT protein database: querying structural and functional features of GALT enzyme. Knowledge of the impact of variations on protein structure can enhance the comprehension of the mechanisms of genetic diseases related to that protein. Here, we present a new version of GALT Protein Database, a Web-accessible data repository for the storage and interrogation of structural effects of variations of the enzyme galactose-1-phosphate uridylyltransferase (GALT), the impairment of which leads to classic Galactosemia, a rare genetic disease. This new version of this database now contains the models of 201 missense variants of GALT enzyme, including heterozygous variants, and it allows users not only to retrieve information about the missense variations affecting this protein, but also to investigate their impact on substrate binding, intersubunit interactions, stability, and other structural features. In addition, it allows the interactive visualization of the models of variants collected into the database. We have developed additional tools to improve the use of the database by nonspecialized users. This Web-accessible database (http://bioinformatica.isa.cnr.it/GALT/GALT2.0) represents a model of tools potentially suitable for application to other proteins that are involved in human pathologies and that are subjected to genetic variations.",GALT Protein Database,0.571744546,NA,0,GALT Protein Database,0.571744546,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/23/2014 +27037912,http://gamdb.liu-lab.com/index.php,"GAMDB: a web resource to connect microRNAs with autophagy in gerontology. Objectives MicroRNAs (miRNAs) are endogenous ~23 nucleotides (nt) RNAs, regulating gene expression by pairing to the mRNAs of protein-coding genes to direct their post-transcriptional repression. Both in normal and aberrant activities, miRNAs contribute to a recurring paradigm of cellular behaviors in pathological settings, especially in gerontology. Autophagy, a multi-step lysosomal degradation process with function to degrade long-lived proteins and damaged organelles, has significant impact on gerontology. Thus, elucidating how miRNAs participate in autophagy may enlarge the scope of miRNA in autophagy and facilitate researches in gerontology. Materials and methods Herein, based upon the published studies, predicted targets and gerontology-related diseases, we constructed a web resource named Gerontology-Autophagic-MicroRNA Database (GAMDB) (http://gamdb.liu-lab.com/index.php), which contained 836 autophagy-related miRNAs, 197 targeted genes/proteins and 56 aging-related diseases such as Parkinson' disease, Alzheimer's disease and Huntington's disease. Results and conclusion We made use of large amounts of data to elucidate the intricate relationships between microRNA-regulated autophagic mechanisms and gerontology. This database will facilitate better understanding of autophagy regulation network in gerontology and thus promoting gerontology-related therapy in the future.",GAMDB,0.99449718,Gerontology-Autophagic-MicroRNA Database,0.98334261,GAMDB,0.99449718,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/31/2016 +26851225,http://gametsepi.nwsuaflmz.com,"GED: a manually curated comprehensive resource for epigenetic modification of gametogenesis. Reproductive infertility affects seventh of couples, which is most attributed to the obstacle of gametogenesis. Characterizing the epigenetic modification factors involved in gametogenesis is fundamental to understand the molecular mechanisms and to develop treatments for human infertility. Although the genetic factors have been implicated in gametogenesis, no dedicated bioinformatics resource for gametogenesis is available. To elucidate the relationship of epigenetic modification and mammalian gametogenesis, we developed a new database, gametogenesis epigenetic modification database (GED), a manually curated database, which aims at providing a comprehensive resource of epigenetic modification of gametogenesis. The database integrates three kinds information of epigenetic modifications during gametogenesis (DNA methylation, histone modification and RNA regulation), and the gametogenesis has been detailed as 16 stages in seven mammal species (Homo sapiens, Mus musculus, Rattus norvegicus, Sus scrofa, Bos taurus, Capra hircus and Ovis aries). Besides, we have predicted the linear pathways of epigenetic modification which were composed of 211 genes/proteins and microRNAs that were involved in gametogenesis. GED is a user-friendly Web site, through which users can obtain the comprehensive epigenetic factor information and molecular pathways by visiting our database freely. GED is free available at http://gametsepi.nwsuaflmz.com.",GED,0.943311706,gametogenesis epigenetic modification database,0.95555062,gametogenesis epigenetic modification database,0.95555062,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/5/2016 +27102089,http://gat.sit.kmutt.ac.th,"Gene-set activity toolbox (GAT): A platform for microarray-based cancer diagnosis using an integrative gene-set analysis approach. Cancer is a complex disease that cannot be diagnosed reliably using only single gene expression analysis. Using gene-set analysis on high throughput gene expression profiling controlled by various environmental factors is a commonly adopted technique used by the cancer research community. This work develops a comprehensive gene expression analysis tool (gene-set activity toolbox: (GAT)) that is implemented with data retriever, traditional data pre-processing, several gene-set analysis methods, network visualization and data mining tools. The gene-set analysis methods are used to identify subsets of phenotype-relevant genes that will be used to build a classification model. To evaluate GAT performance, we performed a cross-dataset validation study on three common cancers namely colorectal, breast and lung cancers. The results show that GAT can be used to build a reasonable disease diagnostic model and the predicted markers have biological relevance. GAT can be accessed from http://gat.sit.kmutt.ac.th where GAT's java library for gene-set analysis, simple classification and a database with three cancer benchmark datasets can be downloaded.",GAT,0.967916131,gene-set activity toolbox,0.814590596,GAT,0.967916131,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/15/2016 +25313160,http://toxico.nibio.go.jp/english/index.html,"Open TG-GATEs: a large-scale toxicogenomics database. Toxicogenomics focuses on assessing the safety of compounds using gene expression profiles. Gene expression signatures from large toxicogenomics databases are expected to perform better than small databases in identifying biomarkers for the prediction and evaluation of drug safety based on a compound's toxicological mechanisms in animal target organs. Over the past 10 years, the Japanese Toxicogenomics Project consortium (TGP) has been developing a large-scale toxicogenomics database consisting of data from 170 compounds (mostly drugs) with the aim of improving and enhancing drug safety assessment. Most of the data generated by the project (e.g. gene expression, pathology, lot number) are freely available to the public via Open TG-GATEs (Toxicogenomics Project-Genomics Assisted Toxicity Evaluation System). Here, we provide a comprehensive overview of the database, including both gene expression data and metadata, with a description of experimental conditions and procedures used to generate the database. Open TG-GATEs is available from http://toxico.nibio.go.jp/english/index.html.",GATEs,0.843082771,Toxicogenomics Project,0.668451498,GATEs,0.843082771,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/13/2014 +25953079,http://gbis.ipk-gatersleben.de,"GBIS: the information system of the German Genebank. The German Federal ex situ Genebank of Agricultural and Horticultural Crop Species is the largest collection of its kind in the countries of the European Union and amongst the 10 largest collections worldwide. Beside its enormous scientific value as a safeguard of plant biodiversity, the plant genetic resources maintained are also of high importance for breeders to provide new impulses. The complex processes of managing such a collection are supported by the Genebank Information System (GBIS). GBIS is an important source of information for researchers and plant breeders, e.g. for identifying appropriate germplasm for breeding purposes. In addition, the access to genebank material as a sovereign task is also of high interest to the general public. Moreover, GBIS acts as a data source for global information systems, such as the Global Biodiversity Information Facility (GBIF) or the European Search Catalogue for Plant Genetic Resources (EURISCO). Database URL: http://gbis.ipk-gatersleben.de/",GBIS,0.697626531,NA,0,GBIS,0.697626531,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/7/2015 +25010047,http://gbm-biodp.nci.nih.gov,"Visualizing molecular profiles of glioblastoma with GBM-BioDP. Validation of clinical biomarkers and response to therapy is a challenging topic in cancer research. An important source of information for virtual validation is the datasets generated from multi-center cancer research projects such as The Cancer Genome Atlas project (TCGA). These data enable investigation of genetic and epigenetic changes responsible for cancer onset and progression, response to cancer therapies, and discovery of the molecular profiles of various cancers. However, these analyses often require bulk download of data and substantial bioinformatics expertise, which can be intimidating for investigators. Here, we report on the development of a new resource available to scientists: a data base called Glioblastoma Bio Discovery Portal (GBM-BioDP). GBM-BioDP is a free web-accessible resource that hosts a subset of the glioblastoma TCGA data and enables an intuitive query and interactive display of the resultant data. This resource provides visualization tools for the exploration of gene, miRNA, and protein expression, differential expression within the subtypes of GBM, and potential associations with clinical outcome, which are useful for virtual biological validation. The tool may also enable generation of hypotheses on how therapies impact GBM molecular profiles, which can help in personalization of treatment for optimal outcome. The resource can be accessed freely at http://gbm-biodp.nci.nih.gov (a tutorial is included).",GBM-BioDP,0.991873372,Glioblastoma Bio Discovery Portal,0.790780693,GBM-BioDP,0.991873372,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/10/2014 +30235322,http://www.sing-group.org/gc4s,"GC4S: A bioinformatics-oriented Java software library of reusable graphical user interface components. Modern bioinformatics and computational biology are fields of study driven by the availability of effective software required for conducting appropriate research tasks. Apart from providing reliable and fast implementations of different data analysis algorithms, these software applications should also be clear and easy to use through proper user interfaces, providing appropriate data management and visualization capabilities. In this regard, the user experience obtained by interacting with these applications via their Graphical User Interfaces (GUI) is a key factor for their final success and real utility for researchers. Despite the existence of different packages and applications focused on advanced data visualization, there is a lack of specific libraries providing pertinent GUI components able to help scientific bioinformatics software developers. To that end, this paper introduces GC4S, a bioinformatics-oriented collection of high-level, extensible, and reusable Java GUI elements specifically designed to speed up bioinformatics software development. Within GC4S, developers of new applications can focus on the specific GUI requirements of their projects, relying on GC4S for generalities and abstractions. GC4S is free software distributed under the terms of GNU Lesser General Public License and both source code and documentation are publicly available at http://www.sing-group.org/gc4s.",GC4S,0.951814741,NA,0,GC4S,0.951814741,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/20/2018 +27127885,http://gcgene.bioinfo-minzhao.org,"GCGene: a gene resource for gastric cancer with literature evidence. Gastric cancer (GC) is the fifth most common cancer and third leading cause of cancer-related deaths worldwide. Its lethality primarily stems from a lack of detection strategies for early stages of GC and a lack of noninvasive detection strategies for advanced stages. The development of early diagnostic biomarkers largely depends on understanding the biological pathways and regulatory mechanisms associated with putative GC genes. Unfortunately, the GC-implicated genes that have been identified thus far are scattered among thousands of published studies, and no systematic summary is available, which hinders the development of a large-scale genetic screen. To provide a publically accessible resource tool to meet this need, we constructed a literature-based database GCGene (Gastric Cancer Gene database) with comprehensive annotations supported by a user-friendly website. In the current release, we have collected 1,815 unique human genes including 1,678 protein-coding and 137 non-coding genes curated from extensive examination of 3,142 PubMed abstracts. The resulting database has a convenient web-based interface to facilitate both textual and sequence-based searches. All curated genes in GCGene are downloadable for advanced bioinformatics data mining. Gene prioritization was performed to rank the relative relevance of these genes in GC development. The 100 top-ranked genes are highly mutated according to the cohort of published studies we reviewed. By conducting a network analysis of these top-ranked GC-associated genes in the human interactome, we were able to identify strong links between 8 highly connected genes with low expression and patient survival time. GCGene is freely available to academic users at http://gcgene.bioinfo-minzhao.org/.",GCGene,0.995825529,Gastric Cancer Gene database,0.884341019,GCGene,0.995825529,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2016 +31592084,http://122.112.216.104,"The Grass Carp Genomic Visualization Database (GCGVD): an informational platform for genome biology of grass carp. With the release of the draft genome of the grass carp, researches on the grass carp from the genetic level and the further molecular mechanisms of economically valuable physiological behaviors have gained great attention. In this paper, we integrated a large number of genomic, genetic and some other data resources and established a web-based grass carp genomic visualization database (GCGVD). To view these data more effectively, we visualized grass carp and zebrafish gene collinearity and genetic linkage map using Scalable Vector Graphics (SVG) format in the browser, and genomic annotations by JBrowse. Furthermore, we carried out some preliminary study on a whole-genome alternative splicing (AS)of the grass carp. The RNA-seq reads of 15 samples were aligned to the reference genome of the grass carp by Bowtie2 software. RNA-seq reads of each sample and density map of reads were also exhibited in JBrowse. Additionally, we designed a universal grass carp genome annotation data model to improve the retrieval speed and scalability. Compared with the published database GCGD previously, we newly added the visualization of some more genomic annotations, conserved domain and RNA-seq reads aligned to the reference genome. GCGVD can be accessed at http://122.112.216.104.",GCGVD,0.976200461,The Grass Carp Genomic Visualization Database,0.885653893,GCGVD,0.976200461,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/7/2019 +30365027,http://gcmeta.wdcm.org,"gcMeta: a Global Catalogue of Metagenomics platform to support the archiving, standardization and analysis of microbiome data. Meta-omics approaches have been increasingly used to study the structure and function of the microbial communities. A variety of large-scale collaborative projects are being conducted to encompass samples from diverse environments and habitats. This change has resulted in enormous demands for long-term data maintenance and capacity for data analysis. The Global Catalogue of Metagenomics (gcMeta) is a part of the 'Chinese Academy of Sciences Initiative of Microbiome√ɬÉ√ǬÇ√ɬÇ√Ǭ†(CAS-CMI)', which focuses on studying the human and environmental microbiome, establishing depositories of samples, strains and data, as well as promoting international collaboration. To accommodate and rationally organize massive datasets derived from several thousands of human and environmental microbiome samples, gcMeta features a database management system for archiving and publishing data in a standardized way. Another main feature is the integration of more than ninety web-based data analysis tools and workflows through a Docker platform which enables data analysis by using various operating systems. This platform has been rapidly expanding, and now hosts data from the CAS-CMI and a number of other ongoing research projects. In conclusion, this platform presents a powerful and user-friendly service to support worldwide collaborative efforts in the field of meta-omics research. This platform is freely accessible at https://gcmeta.wdcm.org/.",gcMeta,0.99671793,Global Catalogue of Metagenomics,0.914486587,gcMeta,0.99671793,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33119759,http://gctype.wdcm.org,"gcType: a high-quality type strain genome database for microbial phylogenetic and functional research. Taxonomic and functional research of microorganisms has increasingly relied upon genome-based data and methods. As the depository of the Global Catalogue of Microorganisms (GCM) 10K prokaryotic type strain sequencing project, Global Catalogue of Type Strain (gcType) has published 1049√ɬÉ√ǬÇ√ɬÇ√Ǭ†type strain genomes sequenced by the GCM 10K project which are preserved in global culture collections with a valid published status. Additionally, the information provided through gcType includes >12 000 publicly available type strain genome sequences from GenBank incorporated using quality control criteria and standard data annotation pipelines to form a high-quality reference database. This database integrates type strain sequences with their phenotypic information to facilitate phenotypic and genotypic analyses. Multiple formats of cross-genome searches and interactive interfaces have allowed extensive exploration of the database's resources. In this study, we describe web-based data analysis pipelines for genomic analyses and genome-based taxonomy, which could serve as a one-stop platform for the identification of prokaryotic species. The number of type strain genomes that are published will continue to increase as the GCM 10K project increases its collaboration with culture collections worldwide. Data of this project is shared with the International Nucleotide Sequence Database Collaboration. Access to gcType is free at http://gctype.wdcm.org/.",gcType,0.995411217,Global Catalogue of Type Strain,0.834868125,gcType,0.995411217,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +23209799,http://dgd.genouest.org,"The duplicated genes database: identification and functional annotation of co-localised duplicated genes across genomes. Background There has been a surge in studies linking genome structure and gene expression, with special focus on duplicated genes. Although initially duplicated from the same sequence, duplicated genes can diverge strongly over evolution and take on different functions or regulated expression. However, information on the function and expression of duplicated genes remains sparse. Identifying groups of duplicated genes in different genomes and characterizing their expression and function would therefore be of great interest to the research community. The 'Duplicated Genes Database' (DGD) was developed for this purpose. Methodology Nine species were included in the DGD. For each species, BLAST analyses were conducted on peptide sequences corresponding to the genes mapped on a same chromosome. Groups of duplicated genes were defined based on these pairwise BLAST comparisons and the genomic location of the genes. For each group, Pearson correlations between gene expression data and semantic similarities between functional GO annotations were also computed when the relevant information was available. Conclusions The Duplicated Gene Database provides a list of co-localised and duplicated genes for several species with the available gene co-expression level and semantic similarity value of functional annotation. Adding these data to the groups of duplicated genes provides biological information that can prove useful to gene expression analyses. The Duplicated Gene Database can be freely accessed through the DGD website at http://dgd.genouest.org.",GD,0.901952982,Duplicated Genes Database,0.815995583,GD,0.901952982,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/28/2012 +32117874,"http://gdb.unibe.ch, http://faerun.gdb.tools","ChEMBL-Likeness Score and Database GDBChEMBL. The generated database GDB17 enumerates 166.4 billion molecules up to 17 atoms of C, N, O, S and halogens following simple rules of chemical stability and synthetic feasibility. However, most molecules in GDB17 are too complex to be considered for chemical synthesis. To address this limitation, we report GDBChEMBL as a subset of GDB17 featuring 10 million molecules selected according to a ChEMBL-likeness score (CLscore) calculated from the frequency of occurrence of circular substructures in ChEMBL, followed by uniform sampling across molecular size, stereocenters and heteroatoms. Compared to the previously reported subsets FDB17 and GDBMedChem selected from GDB17 by fragment-likeness, respectively, medicinal chemistry criteria, our new subset features molecules with higher synthetic accessibility and possibly bioactivity yet retains a broad and continuous coverage of chemical space typical of the entire GDB17. GDBChEMBL is accessible at http://gdb.unibe.ch for download and for browsing using an interactive chemical space map at http://faerun.gdb.tools.",GDB17,0.992214759,NA,0,GDB17,0.992214759,1,31169974,NA,NA,NA,do not merge,NA,NA,NA,NA,2/4/2020 +31169974,http://gdb.unibe.ch,"Medicinal Chemistry Aware Database GDBMedChem. The generated database GDB17 enumerates 166.4√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬÖbillion possible molecules up to 17√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬÖatoms of C, N, O, S and halogens following simple chemical stability and synthetic feasibility rules, however medicinal chemistry criteria are not taken into account. Here we applied rules inspired by medicinal chemistry to exclude problematic functional groups and complex molecules from GDB17, and sampled the resulting subset uniformly across molecular size, stereochemistry and polarity to form GDBMedChem as a compact collection of 10 million small molecules. This collection has reduced complexity and better synthetic accessibility than the entire GDB17 but retains higher sp3 -carbon fraction and natural product likeness scores compared to known drugs. GDBMedChem molecules are more diverse and very different from known molecules in terms of substructures and represent an unprecedented source of diversity for drug design. GDBMedChem is available for 3D-visualization, similarity searching and for download at http://gdb.unibe.ch.",GDBMedChem,0.997045815,Chemistry,0.490931183,GDBMedChem,0.997045815,1,32117874,NA,NA,NA,do not merge,NA,NA,NA,NA,6/6/2019 +28453687,http://gdisc.bme.gatech.edu,"GDISC: a web portal for integrative analysis of gene-drug interaction for survival in cancer. Summary Survival analysis has been applied to The Cancer Genome Atlas (TCGA) data. Although drug exposure records are available in TCGA, existing survival analyses typically did not consider drug exposure, partly due to naming inconsistencies in the data. We have spent extensive effort to standardize the drug exposure data, which enabled us to perform survival analysis on drug-stratified subpopulations of cancer patients. Using this strategy, we integrated gene copy number data, drug exposure data and patient survival data to infer gene-drug interactions that impact survival. The collection of all analyzed gene-drug interactions in 32 cancer types are organized and presented in a searchable web-portal called gene-drug Interaction for survival in cancer (GDISC). GDISC allows biologists and clinicians to interactively explore the gene-drug interactions identified in the context of TCGA, and discover interactions associated to their favorite cancer, drug and/or gene of interest. In addition, GDISC provides the standardized drug exposure data, which is a valuable resource for developing new methods for drug-specific analysis. Availability and implementation GDISC is available at https://gdisc.bme.gatech.edu/. Contact peng.qiu@bme.gatech.edu.",GDISC,0.982105613,gene,0.701045394,GDISC,0.982105613,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2017 +30357347,http://www.rosaceae.org,"15 years of GDR: New data and functionality in the Genome Database for Rosaceae. The Genome Database for Rosaceae (GDR, https://www.rosaceae.org) is an integrated web-based community database resource providing access to publicly available genomics, genetics and breeding data and data-mining tools to facilitate basic, translational and applied research in Rosaceae. The volume of data in GDR has increased greatly over the last 5√ɬÉ√ǬÇ√ɬÇ√Ǭ†years. The GDR now houses multiple versions of whole genome assembly and annotation data from 14 species, made available by recent advances in sequencing technology. Annotated and searchable reference transcriptomes, RefTrans, combining peer-reviewed published RNA-Seq as well as EST datasets, are newly available for major crop species. Significantly more quantitative trait loci, genetic maps and markers are available in MapViewer, a new visualization tool that better integrates with other pages in GDR. Pathways can be accessed through the new GDR Cyc Pathways databases, and synteny among the newest genome assemblies from eight species can be viewed through the new synteny browser, SynView. Collated single-nucleotide polymorphism diversity data and phenotypic data from publicly available breeding datasets are integrated with other relevant data. Also, the new Breeding Information Management System allows breeders to upload, manage and analyze their private breeding data within the secure GDR server with an option to release data publicly.",GDR,0.997720838,Genome Database for Rosaceae,0.967752824,GDR,0.997720838,1,NA,24247530,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +24247530,http://www.rosaceae.org/breeders_toolbox,"Addition of a breeding database in the Genome Database for Rosaceae. Breeding programs produce large datasets that require efficient management systems to keep track of performance, pedigree, geographical and image-based data. With the development of DNA-based screening technologies, more breeding programs perform genotyping in addition to phenotyping for performance evaluation. The integration of breeding data with other genomic and genetic data is instrumental for the refinement of marker-assisted breeding tools, enhances genetic understanding of important crop traits and maximizes access and utility by crop breeders and allied scientists. Development of new infrastructure in the Genome Database for Rosaceae (GDR) was designed and implemented to enable secure and efficient storage, management and analysis of large datasets from the Washington State University apple breeding program and subsequently expanded to fit datasets from other Rosaceae breeders. The infrastructure was built using the software Chado and Drupal, making use of the Natural Diversity module to accommodate large-scale phenotypic and genotypic data. Breeders can search accessions within the GDR to identify individuals with specific trait combinations. Results from Search by Parentage lists individuals with parents in common and results from Individual Variety pages link to all data available on each chosen individual including pedigree, phenotypic and genotypic information. Genotypic data are searchable by markers and alleles; results are linked to other pages in the GDR to enable the user to access tools such as GBrowse and CMap. This breeding database provides users with the opportunity to search datasets in a fully targeted manner and retrieve and compare performance data from multiple selections, years and sites, and to output the data needed for variety release publications and patent applications. The breeding database facilitates efficient program management. Storing publicly available breeding data in a database together with genomic and genetic data will further accelerate the cross-utilization of diverse data types by researchers from various disciplines. Database URL: http://www.rosaceae.org/breeders_toolbox.",GDR,0.979074816,Genome Database for Rosaceae,0.827349126,GDR,0.979074816,1,NA,30357347,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/18/2013 +31277321,http://bioinfo.life.hust.edu.cn/web/GEDS,"GEDS: A Gene Expression Display Server for mRNAs, miRNAs and Proteins. . High-throughput technologies generate a tremendous amount of expression data on mRNA, miRNA and protein levels. Mining and visualizing the large amount of expression data requires sophisticated computational skills. An easy to use and user-friendly web-server for the visualization of gene expression profiles could greatly facilitate data exploration and hypothesis generation for biologists. Here, we curated and normalized the gene expression data on mRNA, miRNA and protein levels in 23315, 9009 and 9244 samples, respectively, from 40 tissues (The Cancer Genome Atlas (TCGA) and Genotype-Tissue Expression (GETx)) and 1594 cell lines (Cancer Cell Line Encyclopedia (CCLE) and MD Anderson Cell Lines Project (MCLP)). Then, we constructed the Gene Expression Display Server (GEDS), a web-based tool for quantification, comparison and visualization of gene expression data. GEDS integrates multiscale expression data and provides multiple types of figures and tables to satisfy several kinds of user requirements. The comprehensive expression profiles plotted in the one-stop GEDS platform greatly facilitate experimental biologists utilizing big data for better experimental design and analysis. GEDS is freely available on http://bioinfo.life.hust.edu.cn/web/GEDS/.",GEDS,0.955554724,Expression Display Server,0.659791191,GEDS,0.955554724,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/3/2019 +21803806,http://tinyurl.com/geefu,"Gee Fu: a sequence version and web-services database tool for genomic assembly, genome feature and NGS data. Summary Scientists now use high-throughput sequencing technologies and short-read assembly methods to create draft genome assemblies in just days. Tools and pipelines like the assembler, and the workflow management environments make it easy for a non-specialist to implement complicated pipelines to produce genome assemblies and annotations very quickly. Such accessibility results in a proliferation of assemblies and associated files, often for many organisms. These assemblies get used as a working reference by lots of different workers, from a bioinformatician doing gene prediction or a bench scientist designing primers for PCR. Here we describe Gee Fu, a database tool for genomic assembly and feature data, including next-generation sequence alignments. Gee Fu is an instance of a Ruby-On-Rails web application on a feature database that provides web and console interfaces for input, visualization of feature data via AnnoJ, access to data through a web-service interface, an API for direct data access by Ruby scripts and access to feature data stored in BAM files. Gee Fu provides a platform for storing and sharing different versions of an assembly and associated features that can be accessed and updated by bench biologists and bioinformaticians in ways that are easy and useful for each. Availability http://tinyurl.com/geefu Contact dan.maclean@tsl.ac.uk.",Gee Fu,0.952340484,NA,0,Gee Fu,0.952340484,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/29/2011 +24150938,http://geisha.arizona.edu,"GEISHA: an evolving gene expression resource for the chicken embryo. GEISHA (Gallus Expression In Situ Hybridization Analysis; http://geisha.arizona.edu) is an in situ hybridization gene expression and genomic resource for the chicken embryo. This update describes modifications that enhance its utility to users. During the past 5 years, GEISHA has undertaken a significant restructuring to more closely conform to the data organization and formatting of Model Organism Databases in other species. This has involved migrating from an entry-centric format to one that is gene-centered. Database restructuring has enabled the inclusion of data pertaining to chicken genes and proteins and their orthologs in other species. This new information is presented through an updated user interface. In situ hybridization data in mouse, frog, zebrafish and fruitfly are integrated with chicken genomic and expression information. A resource has also been developed that integrates the GEISHA interface information with the Online Mendelian Inheritance in Man human disease gene database. Finally, the Chicken Gene Nomenclature Committee database and the GEISHA database have been integrated so that they draw from the same data resources.",GEISHA,0.996660411,Gallus Expression In Situ Hybridization Analysis,0.966287035,GEISHA,0.996660411,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2013 +34158935,http://dx.deepcarbon.net/11121/6200-6954-6634-8243-CC,"Global earth mineral inventory: A data legacy. Minerals contain important clues to understanding the complex geologic history of Earth and other planetary bodies. Therefore, geologists have been collecting mineral samples and compiling data about these samples for centuries. These data have been used to better understand the movement of continental plates, the oxidation of Earth's atmosphere and the water regime of ancient martian landscapes. Datasets found at 'RRUFF.info/Evolution' and 'mindat.org' have documented a wealth of mineral occurrences around the world. One of the main goals in geoinformatics has been to facilitate discovery by creating and merging datasets from various scientific fields and using statistical methods and visualization tools to inspire and test hypotheses applicable to modelling Earth's past environments. To help achieve this goal, we have compiled physical, chemical and geological properties of minerals and linked them to the above-mentioned mineral occurrence datasets. As a part of the Deep Time Data Infrastructure, funded by the W.M. Keck Foundation, with significant support from the Deep Carbon Observatory (DCO) and the A.P. Sloan Foundation, GEMI ('Global Earth Mineral Inventory') was developed from the need of researchers to have all of the required mineral data visible in a single portal, connected by a robust, yet easy to understand schema. Our data legacy integrates these resources into a digestible format for exploration and analysis and has allowed researchers to gain valuable insights from mineralogical data. GEMI can be considered a network, with every node representing some feature of the datasets, for example, a node can represent geological parameters like colour, hardness or lustre. Exploring subnetworks gives the researcher a specific view of the data required for the task at hand. GEMI is accessible through the DCO Data Portal (https://dx.deepcarbon.net/11121/6200-6954-6634-8243-CC). We describe our efforts in compiling GEMI, the Data Policies for usage and sharing, and the evaluation metrics for this data legacy.",GEMI,0.990873575,Global Earth Mineral Inventory,0.72698319,GEMI,0.990873575,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2020 +33599246,http://gemma.msl.ubc.ca/home.html,"Curation of over 10 000 transcriptomic studies to enable data reuse. . Vast amounts of transcriptomic data reside in public repositories, but effective reuse remains challenging. Issues include unstructured dataset metadata, inconsistent data processing and quality control, and inconsistent probe-gene mappings across microarray technologies. Thus, extensive curation and data reprocessing are necessary prior to any reuse. The Gemma bioinformatics system was created to help address these issues. Gemma consists of a database of curated transcriptomic datasets, analytical software, a web interface and web services. Here we present an update on Gemma's holdings, data processing and analysis pipelines, our curation guidelines, and software features. As of June 2020, Gemma contains 10 811 manually curated datasets (primarily human, mouse and rat), over 395 000 samples and hundreds of curated transcriptomic platforms (both microarray and RNA sequencing). Dataset topics were represented with 10 215 distinct terms from 12 ontologies, for a total of 54 316 topic annotations (mean topics/dataset = 5.2). While Gemma has broad coverage of conditions and tissues, it captures a large majority of available brain-related datasets, accounting for 34% of its holdings. Users can access the curated data and differential expression analyses through the Gemma website, RESTful service and an R package. Database URL: https://gemma.msl.ubc.ca/home.html.",Gemma,0.981963456,NA,0,Gemma,0.981963456,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2021 +25414350,http://www.ncbi.nlm.nih.gov/genbank,"GenBank. GenBank(®) (http://www.ncbi.nlm.nih.gov/genbank/) is a comprehensive database that contains publicly available nucleotide sequences for over 300 000 formally described species. These sequences are obtained primarily through submissions from individual laboratories and batch submissions from large-scale sequencing projects, including whole-genome shotgun and environmental sampling projects. Most submissions are made using the web-based BankIt or standalone Sequin programs, and GenBank staff assign accession numbers upon data receipt. Daily data exchange with the European Nucleotide Archive and the DNA Data Bank of Japan ensures worldwide coverage. GenBank is accessible through the NCBI Entrez retrieval system, which integrates data from the major DNA and protein sequence databases along with taxonomy, genome, mapping, protein structure and domain information, and the biomedical journal literature via PubMed. BLAST provides sequence similarity searches of GenBank and other sequence databases. Complete bimonthly releases and daily updates of the GenBank database are available by FTP.",GenBank,0.990686178,NA,0,GenBank,0.990686178,1,33196830,"23193287.0, 29688360.0",NA,NA,conflicting record(s) to be removed,"merge all ""dup name"" IDs",NA,NA,NA,11/20/2014 +23193287,http://www.ncbi.nlm.nih.gov,"GenBank. GenBank® (http://www.ncbi.nlm.nih.gov) is a comprehensive database that contains publicly available nucleotide sequences for almost 260 000 formally described species. These sequences are obtained primarily through submissions from individual laboratories and batch submissions from large-scale sequencing projects, including whole-genome shotgun (WGS) and environmental sampling projects. Most submissions are made using the web-based BankIt or standalone Sequin programs, and GenBank staff assigns accession numbers upon data receipt. Daily data exchange with the European Nucleotide Archive (ENA) and the DNA Data Bank of Japan (DDBJ) ensures worldwide coverage. GenBank is accessible through the NCBI Entrez retrieval system, which integrates data from the major DNA and protein sequence databases along with taxonomy, genome, mapping, protein structure and domain information, and the biomedical journal literature via PubMed. BLAST provides sequence similarity searches of GenBank and other sequence databases. Complete bimonthly releases and daily updates of the GenBank database are available by FTP. To access GenBank and its related retrieval and analysis services, begin at the NCBI home page: www.ncbi.nlm.nih.gov.",GenBank,0.986147463,NA,0,GenBank,0.986147463,1,22737589,"25414350.0, 29688360.0",NA,NA,conflicting record(s) to be removed,"merge all ""dup name"" IDs",NA,NA,NA,11/27/2012 +29688360,http://www.ncbi.nlm.nih.gov/biocollections,"The NCBI BioCollections Database. . The rapidly growing set of GenBank submissions includes sequences that are derived from vouchered specimens. These are associated with culture collections, museums, herbaria and other natural history collections, both living and preserved. Correct identification of the specimens studied, along with a method to associate the sample with its institution, is critical to the outcome of related studies and analyses. The National Center for Biotechnology Information BioCollections Database was established to allow the association of specimen vouchers and related sequence records to their home institutions. This process also allows cross-linking from the home institution for quick identification of all records originating from each collection. Database URL: https://www.ncbi.nlm.nih.gov/biocollections",GenBank,0.944846928,NA,0,GenBank,0.944846928,1,NA,"23193287.0, 25414350.0",low_prob_best_name,remove,NA,"merge all ""dup name"" IDs",FALSE POS: INCORRECT NAME,NA,NA,1/1/2018 +33196830,http://www.ncbi.nlm.nih.gov/genbank,"GenBank. GenBank® (https://www.ncbi.nlm.nih.gov/genbank/) is a comprehensive, public database that contains 9.9 trillion base pairs from over 2.1 billion nucleotide sequences for 478 000 formally described species. Daily data exchange with the European Nucleotide Archive and the DNA Data Bank of Japan ensures worldwide coverage. Recent updates include new resources for data from the SARS-CoV-2 virus, updates to the NCBI Submission Portal and associated submission wizards for dengue and SARS-CoV-2 viruses, new taxonomy queries for viruses and prokaryotes, and simplified submission processes for EST and GSS sequences.",GenBankÂ,0.844256401,NA,0,GenBankÂ,0.844256401,1,25414350,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2021 +"30357393, 33270111",http://www.gencodegenes.org,"GENCODE reference annotation for the human and mouse genomes. The accurate identification and description of the genes in the human and mouse genomes is a fundamental requirement for high quality analysis of data informing both genome biology and clinical genomics. Over the last 15 years, the GENCODE consortium has been producing reference quality gene annotations to provide this foundational resource. The GENCODE consortium includes both experimental and computational biology groups who work together to improve and extend the GENCODE gene annotation. Specifically, we generate primary data, create bioinformatics tools and provide analysis to support the work of expert manual gene annotators and automated gene annotation pipelines. In addition, manual and computational annotation workflows use any and all publicly available data and analysis, along with the research literature to identify and characterise gene loci to the highest standard. GENCODE gene annotations are accessible via the Ensembl and UCSC Genome Browsers, the Ensembl FTP site, Ensembl Biomart, Ensembl Perl and REST APIs as well as https://www.gencodegenes.org.",GENCODE,0.998485327,NA,0,GENCODE,0.998485327,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +27235414,http://retina.tigem.it,"An atlas of gene expression and gene co-regulation in the human retina. The human retina is a specialized tissue involved in light stimulus transduction. Despite its unique biology, an accurate reference transcriptome is still missing. Here, we performed gene expression analysis (RNA-seq) of 50 retinal samples from non-visually impaired post-mortem donors. We identified novel transcripts with high confidence (Observed Transcriptome (ObsT)) and quantified the expression level of known transcripts (Reference Transcriptome (RefT)). The ObsT included 77 623 transcripts (23 960 genes) covering 137 Mb (35 Mb new transcribed genome). Most of the transcripts (92%) were multi-exonic: 81% with known isoforms, 16% with new isoforms and 3% belonging to new genes. The RefT included 13 792 genes across 94 521 known transcripts. Mitochondrial genes were among the most highly expressed, accounting for about 10% of the reads. Of all the protein-coding genes in Gencode, 65% are expressed in the retina. We exploited inter-individual variability in gene expression to infer a gene co-expression network and to identify genes specifically expressed in photoreceptor cells. We experimentally validated the photoreceptors localization of three genes in human retina that had not been previously reported. RNA-seq data and the gene co-expression network are available online (http://retina.tigem.it).",Gencode,0.638335466,NA,0,Gencode,0.638335466,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME; no name in abstract,NA,NA,5/27/2016 +30864352,http://genconet.kalis-amts.de,"GenCoNet - A Graph Database for the Analysis of Comorbidities by Gene Networks. . The prevalence of comorbid diseases poses a major health issue for millions of people worldwide and an enormous socio-economic burden for society. The molecular mechanisms for the development of comorbidities need to be investigated. For this purpose, a workflow system was developed to aggregate data on biomedical entities from heterogeneous data sources. The process of integrating and merging all data sources of the workflow system was implemented as a semi-automatic pipeline that provides the import, fusion, and analysis of the highly connected biomedical data in a Neo4j database GenCoNet. As a starting point, data on the common comorbid diseases essential hypertension and bronchial asthma was integrated. GenCoNet (https://genconet.kalis-amts.de) is a curated database that provides a better understanding of hereditary bases of comorbidities.",GenCoNet,0.993595004,NA,0,GenCoNet,0.993595004,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/25/2018 +24904731,http://gendermeddb.charite.de,"GenderMedDB: an interactive database of sex and gender-specific medical literature. Background Searches for sex and gender-specific publications are complicated by the absence of a specific algorithm within search engines and by the lack of adequate archives to collect the retrieved results. We previously addressed this issue by initiating the first systematic archive of medical literature containing sex and/or gender-specific analyses. This initial collection has now been greatly enlarged and re-organized as a free user-friendly database with multiple functions: GenderMedDB (http://gendermeddb.charite.de). Description GenderMedDB retrieves the included publications from the PubMed database. Manuscripts containing sex and/or gender-specific analysis are continuously screened and the relevant findings organized systematically into disciplines and diseases. Publications are furthermore classified by research type, subject and participant numbers. More than 11,000 abstracts are currently included in the database, after screening more than 40,000 publications. The main functions of the database include searches by publication data or content analysis based on pre-defined classifications. In addition, registrants are enabled to upload relevant publications, access descriptive publication statistics and interact in an open user forum. Conclusions Overall, GenderMedDB offers the advantages of a discipline-specific search engine as well as the functions of a participative tool for the gender medicine community.",GenderMedDB,0.995782018,NA,0,GenderMedDB,0.995782018,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/23/2014 +22912585,http://genomics.senescence.info/diet,"Dissecting the gene network of dietary restriction to identify evolutionarily conserved pathways and new functional genes. Dietary restriction (DR), limiting nutrient intake from diet without causing malnutrition, delays the aging process and extends lifespan in multiple organisms. The conserved life-extending effect of DR suggests the involvement of fundamental mechanisms, although these remain a subject of debate. To help decipher the life-extending mechanisms of DR, we first compiled a list of genes that if genetically altered disrupt or prevent the life-extending effects of DR. We called these DR-essential genes and identified more than 100 in model organisms such as yeast, worms, flies, and mice. In order for other researchers to benefit from this first curated list of genes essential for DR, we established an online database called GenDR (http://genomics.senescence.info/diet/). To dissect the interactions of DR-essential genes and discover the underlying lifespan-extending mechanisms, we then used a variety of network and systems biology approaches to analyze the gene network of DR. We show that DR-essential genes are more conserved at the molecular level and have more molecular interactions than expected by chance. Furthermore, we employed a guilt-by-association method to predict novel DR-essential genes. In budding yeast, we predicted nine genes related to vacuolar functions; we show experimentally that mutations deleting eight of those genes prevent the life-extending effects of DR. Three of these mutants (OPT2, FRE6, and RCR2) had extended lifespan under ad libitum, indicating that the lack of further longevity under DR is not caused by a general compromise of fitness. These results demonstrate how network analyses of DR using GenDR can be used to make phenotypically relevant predictions. Moreover, gene-regulatory circuits reveal that the DR-induced transcriptional signature in yeast involves nutrient-sensing, stress responses and meiotic transcription factors. Finally, comparing the influence of gene expression changes during DR on the interactomes of multiple organisms led us to suggest that DR commonly suppresses translation, while stimulating an ancient reproduction-related process.",GenDR,0.975972056,NA,0,GenDR,0.975972056,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/9/2012 +22064864,http://www.ebi.ac.uk/gxa,"Gene Expression Atlas update--a value-added database of microarray and sequencing-based functional genomics experiments. Gene Expression Atlas (http://www.ebi.ac.uk/gxa) is an added-value database providing information about gene expression in different cell types, organism parts, developmental stages, disease states, sample treatments and other biological/experimental conditions. The content of this database derives from curation, re-annotation and statistical analysis of selected data from the ArrayExpress Archive and the European Nucleotide Archive. A simple interface allows the user to query for differential gene expression either by gene names or attributes or by biological conditions, e.g. diseases, organism parts or cell types. Since our previous report we made 20 monthly releases and, as of Release 11.08 (August 2011), the database supports 19 species, which contains expression data measured for 19,014 biological conditions in 136,551 assays from 5598 independent studies.",Gene Expression Atlas,0.77896291,NA,0,Gene Expression Atlas,0.77896291,1,"24304889.0, 26481351.0, 29165655.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,11/7/2011 +21177656,http://rafalab.jhsph.edu/barcode,"The Gene Expression Barcode: leveraging public data repositories to begin cataloging the human and murine transcriptomes. Various databases have harnessed the wealth of publicly available microarray data to address biological questions ranging from across-tissue differential expression to homologous gene expression. Despite their practical value, these databases rely on relative measures of expression and are unable to address the most fundamental question--which genes are expressed in a given cell type. The Gene Expression Barcode is the first database to provide reliable absolute measures of expression for most annotated genes for 131 human and 89 mouse tissue types, including diseased tissue. This is made possible by a novel algorithm that leverages information from the GEO and ArrayExpress public repositories to build statistical models that permit converting data from a single microarray into expressed/unexpressed calls for each gene. For selected platforms, users may upload data and obtain results in a matter of seconds. The raw data, curated annotation, and code used to create our resource are also available at http://rafalab.jhsph.edu/barcode.",Gene Expression Barcode,0.718592152,NA,0,Gene Expression Barcode,0.718592152,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2011 +"22139938, 24270792, 26578585, 29112716",http://gene3d.biochem.ucl.ac.uk,"Gene3D: a domain-based resource for comparative genomics, functional annotation and protein network analysis. Gene3D http://gene3d.biochem.ucl.ac.uk is a comprehensive database of protein domain assignments for sequences from the major sequence databases. Domains are directly mapped from structures in the CATH database or predicted using a library of representative profile HMMs derived from CATH superfamilies. As previously described, Gene3D integrates many other protein family and function databases. These facilitate complex associations of molecular function, structure and evolution. Gene3D now includes a domain functional family (FunFam) level below the homologous superfamily level assignments. Additions have also been made to the interaction data. More significantly, to help with the visualization and interpretation of multi-genome scale data sets, we have developed a new, revamped website. Searching has been simplified with more sophisticated filtering of results, along with new tools based on Cytoscape Web, for visualizing protein-protein interaction networks, differences in domain composition between genomes and the taxonomic distribution of individual superfamilies.",Gene3D,0.990353435,NA,0,Gene3D,0.990353435,4,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31642496,http://www.genemed.tech/gene4denovo,"Gene4Denovo: an integrated database and analytic platform for de novo mutations in humans. De novo mutations (DNMs) significantly contribute to sporadic diseases, particularly in neuropsychiatric disorders. Whole-exome sequencing (WES) and whole-genome sequencing (WGS) provide effective methods for detecting DNMs and prioritizing candidate genes. However, it remains a challenge for scientists, clinicians, and biologists to conveniently access and analyse data regarding DNMs and candidate genes from scattered publications. To fill the unmet need, we integrated 580 799 DNMs, including 30 060 coding DNMs detected by WES/WGS from 23 951 individuals across 24 phenotypes and prioritized a list of candidate genes with different degrees of statistical evidence, including 346 genes with false discovery rates <0.05. We then developed a database called Gene4Denovo (http://www.genemed.tech/gene4denovo/), which allowed these genetic data to be conveniently catalogued, searched, browsed, and analysed. In addition, Gene4Denovo integrated data from >60 genomic sources to provide comprehensive variant-level and gene-level annotation and information regarding the DNMs and candidate genes. Furthermore, Gene4Denovo provides end-users with limited bioinformatics skills to analyse their own genetic data, perform comprehensive annotation, and prioritize candidate genes using custom parameters. In conclusion, Gene4Denovo conveniently allows for the accelerated interpretation of DNM pathogenicity and the clinical implication of DNMs in humans.",Gene4Denovo,0.993604973,NA,0,Gene4Denovo,0.993604973,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +34733322,http://www.genemed.tech/gene4hl,"Gene4HL: An Integrated Genetic Database for Hearing Loss. Hearing loss (HL) is one of the most common disabilities in the world. In industrialized countries, HL occurs in 1-2/1,000 newborns, and approximately 60% of HL is caused by genetic factors. Next generation sequencing (NGS) has been widely used to identify many candidate genes and variants in patients with HL, but the data are scattered in multitudinous studies. It is a challenge for scientists, clinicians, and biologists to easily obtain and analyze HL genes and variant data from these studies. Thus, we developed a one-stop database of HL-related genes and variants, Gene4HL (http://www.genemed.tech/gene4hl/), making it easy to catalog, search, browse and analyze the genetic data. Gene4HL integrates the detailed genetic and clinical data of 326 HL-related genes from 1,608 published studies, along with 62 popular genetic data sources to provide comprehensive knowledge of candidate genes and variants associated with HL. Additionally, Gene4HL supports the users to analyze their own genetic engineering network data, performs comprehensive annotation, and prioritizes candidate genes and variations using custom parameters. Thus, Gene4HL can help users explain the function of HL genes and the clinical significance of variants by correlating the genotypes and phenotypes in humans.",Gene4HL,0.989707867,NA,0,Gene4HL,0.989707867,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/18/2021 +33981200,http://genemed.tech/gene4pd,"Gene4PD: A Comprehensive Genetic Database of Parkinson's Disease. Parkinson's disease (PD) is a complex neurodegenerative disorder with a strong genetic component. A growing number of variants and genes have been reported to be associated with PD; however, there is no database that integrate different type of genetic data, and support analyzing of PD-associated genes (PAGs). By systematic review and curation of multiple lines of public studies, we integrate multiple layers of genetic data (rare variants and copy-number variants identified from patients with PD, associated variants identified from genome-wide association studies, differentially expressed genes, and differential DNA methylation genes) and age at onset in PD. We integrated five layers of genetic data (8302 terms) with different levels of evidences from more than 3,000 studies and prioritized 124 PAGs with strong or suggestive evidences. These PAGs were identified to be significantly interacted with each other and formed an interconnected functional network enriched in several functional pathways involved in PD, suggesting these genes may contribute to the pathogenesis of PD. Furthermore, we identified 10 genes were associated with a juvenile-onset (age ≤ 30 years), 11 genes were associated with an early-onset (age of 30-50 years), whereas another 10 genes were associated with a late-onset (age > 50 years). Notably, the AAOs of patients with loss of function variants in five genes were significantly lower than that of patients with deleterious missense variants, while patients with VPS13C (P = 0.01) was opposite. Finally, we developed an online database named Gene4PD (http://genemed.tech/gene4pd) which integrated published genetic data in PD, the PAGs, and 63 popular genomic data sources, as well as an online pipeline for prioritize risk variants in PD. In conclusion, Gene4PD provides researchers and clinicians comprehensive genetic knowledge and analytic platform for PD, and would also improve the understanding of pathogenesis in PD.",Gene4PD,0.994049946,NA,0,Gene4PD,0.994049946,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/26/2021 +30349118,http://geneatlas.roslin.ed.ac.uk,"An atlas of genetic associations in UK Biobank. Genome-wide association studies (GWAS) have identified many loci contributing to variation in complex traits, yet the majority of loci that contribute to the heritability of complex traits remain elusive. Large study populations with sufficient statistical power are required to detect the small effect sizes of the yet unidentified genetic variants. However, the analysis of huge cohorts, like UK Biobank, is challenging. Here, we present an atlas of genetic associations for 118 non-binary and 660 binary traits of 452,264 UK Biobank participants of European ancestry. Results are compiled in a publicly accessible database that allows querying genome-wide association results for 9,113,133 genetic variants, as well as downloading GWAS summary statistics for over 30 million imputed genetic variants (>23 billion phenotype-genotype pairs). Our atlas of associations (GeneATLAS, http://geneatlas.roslin.ed.ac.uk ) will help researchers to query UK Biobank results in an easy and uniform way without the need to incur high computational costs.",GeneATLAS,0.691261196,NA,0,GeneATLAS,0.691261196,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/22/2018 +"22155609, 27048349",http://www.genecards.org,"In-silico human genomics with GeneCards. Since 1998, the bioinformatics, systems biology, genomics and medical communities have enjoyed a synergistic relationship with the GeneCards database of human genes (http://www.genecards.org). This human gene compendium was created to help to introduce order into the increasing chaos of information flow. As a consequence of viewing details and deep links related to specific genes, users have often requested enhanced capabilities, such that, over time, GeneCards has blossomed into a suite of tools (including GeneDecks, GeneALaCart, GeneLoc, GeneNote and GeneAnnot) for a variety of analyses of both single human genes and sets thereof. In this paper, we focus on inhouse and external research activities which have been enabled, enhanced, complemented and, in some cases, motivated by GeneCards. In turn, such interactions have often inspired and propelled improvements in GeneCards. We describe here the evolution and architecture of this project, including examples of synergistic applications in diverse areas such as synthetic lethality in cancer, the annotation of genetic variations in disease, omics integration in a systems biology approach to kidney disease, and bioinformatics tools.",GeneCards,0.995489895,Human Integrated Protein Expression Database,0.944921303,GeneCards,0.995489895,2,28605766,NA,NA,NA,do not merge,NA,NA,NA,NA,4/5/2016 +22116062,http://www.genedb.org,"GeneDB--an annotation database for pathogens. GeneDB (http://www.genedb.org) is a genome database for prokaryotic and eukaryotic pathogens and closely related organisms. The resource provides a portal to genome sequence and annotation data, which is primarily generated by the Pathogen Genomics group at the Wellcome Trust Sanger Institute. It combines data from completed and ongoing genome projects with curated annotation, which is readily accessible from a web based resource. The development of the database in recent years has focused on providing database-driven annotation tools and pipelines, as well as catering for increasingly frequent assembly updates. The website has been significantly redesigned to take advantage of current web technologies, and improve usability. The current release stores 41 data sets, of which 17 are manually curated and maintained by biologists, who review and incorporate data from the scientific literature, as well as other sources. GeneDB is primarily a production and annotation database for the genomes of predominantly pathogenic organisms.",GeneDB,0.993148148,NA,0,GeneDB,0.993148148,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/23/2011 +25361971,http://www.GeneFriends.org,"GeneFriends: a human RNA-seq-based gene and transcript co-expression database. Co-expression networks have proven effective at assigning putative functions to genes based on the functional annotation of their co-expressed partners, in candidate gene prioritization studies and in improving our understanding of regulatory networks. The growing number of genome resequencing efforts and genome-wide association studies often identify loci containing novel genes and there is a need to infer their functions and interaction partners. To facilitate this we have expanded GeneFriends, an online database that allows users to identify co-expressed genes with one or more user-defined genes. This expansion entails an RNA-seq-based co-expression map that includes genes and transcripts that are not present in the microarray-based co-expression maps, including over 10,000 non-coding RNAs. The results users obtain from GeneFriends include a co-expression network as well as a summary of the functional enrichment among the co-expressed genes. Novel insights can be gathered from this database for different splice variants and ncRNAs, such as microRNAs and lincRNAs. Furthermore, our updated tool allows candidate transcripts to be linked to diseases and processes using a guilt-by-association approach. GeneFriends is freely available from http://www.GeneFriends.org and can be used to quickly identify and rank candidate targets relevant to the process or disease under study.",GeneFriends,0.990629911,NA,0,GeneFriends,0.990629911,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2014 +28605766,http://www.genecards.org,"GeneHancer: genome-wide integration of enhancers and target genes in GeneCards. . A major challenge in understanding gene regulation is the unequivocal identification of enhancer elements and uncovering their connections to genes. We present GeneHancer, a novel database of human enhancers and their inferred target genes, in the framework of GeneCards. First, we integrated a total of 434 000 reported enhancers from four different genome-wide databases: the Encyclopedia of DNA Elements (ENCODE), the Ensembl regulatory build, the functional annotation of the mammalian genome (FANTOM) project and the VISTA Enhancer Browser. Employing an integration algorithm that aims to remove redundancy, GeneHancer portrays 285 000 integrated candidate enhancers (covering 12.4% of the genome), 94 000 of which are derived from more than one source, and each assigned an annotation-derived confidence score. GeneHancer subsequently links enhancers to genes, using: tissue co-expression correlation between genes and enhancer RNAs, as well as enhancer-targeted transcription factor genes; expression quantitative trait loci for variants within enhancers; and capture Hi-C, a promoter-specific genome conformation assay. The individual scores based on each of these four methods, along with gene–enhancer genomic distances, form the basis for GeneHancer’s combinatorial likelihood-based scores for enhancer–gene pairing. Finally, we define ‘elite’ enhancer–gene relations reflecting both a high-likelihood enhancer definition and a strong enhancer–gene association.GeneHancer predictions are fully integrated in the widely used GeneCards Suite, whereby candidate enhancers and their annotations are displayed on every relevant GeneCard. This assists in the mapping of non-coding variants to enhancers, and via the linked genes, forms a basis for variant–phenotype interpretation of whole-genome sequences in health and disease. http://www.genecards.org/.",GeneHancer,0.987699628,NA,0,GeneHancer,0.987699628,1,"22155609.0, 27048349.0",NA,NA,NA,do not merge,NA,NA,NA,NA,1/1/2017 +"29652620, 33080015",http://genelab.nasa.gov,"NASA GeneLab Project: Bridging Space Radiation Omics with Ground Studies. Accurate assessment of risks of long-term space missions is critical for human space exploration. It is essential to have a detailed understanding of the biological effects on humans living and working in deep space. Ionizing radiation from galactic cosmic rays (GCR) is a major health risk factor for astronauts on extended missions outside the protective effects of the Earth's magnetic field. Currently, there are gaps in our knowledge of the health risks associated with chronic low-dose, low-dose-rate ionizing radiation, specifically ions associated with high (H) atomic number (Z) and energy (E). The NASA GeneLab project ( https://genelab.nasa.gov/ ) aims to provide a detailed library of omics datasets associated with biological samples exposed to HZE. The GeneLab Data System (GLDS) includes datasets from both spaceflight and ground-based studies, a majority of which involve exposure to ionizing radiation. In addition to detailed information on radiation exposure for ground-based studies, GeneLab is adding detailed, curated dosimetry information for spaceflight experiments. GeneLab is the first comprehensive omics database for space-related research from which an investigator can generate hypotheses to direct future experiments, utilizing both ground and space biological radiation data. The GLDS is continually expanding as omics-related data are generated by the space life sciences community. Here we provide a brief summary of the space radiation-related data available at GeneLab.",GeneLab,0.978558898,NA,0,GeneLab,0.978558898,2,30329036,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2021 +24174536,http://www.geneprof.org,"GeneProf data: a resource of curated, integrated and reusable high-throughput genomics experiments. GeneProf Data (http://www.geneprof.org) is an open web resource for analysed functional genomics experiments. We have built up a large collection of completely processed RNA-seq and ChIP-seq studies by carefully and transparently reanalysing and annotating high-profile public data sets. GeneProf makes these data instantly accessible in an easily interpretable, searchable and reusable manner and thus opens up the path to the advantages and insights gained from genome-scale experiments to a broader scientific audience. Moreover, GeneProf supports programmatic access to these data via web services to further facilitate the reuse of experimental data across tools and laboratories.",GeneProf,0.996735215,NA,0,GeneProf,0.996735215,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/29/2013 +22748121,http://actin.pharm.mssm.edu/genes2FANs,"Genes2FANs: connecting genes through functional association networks. Background Protein-protein, cell signaling, metabolic, and transcriptional interaction networks are useful for identifying connections between lists of experimentally identified genes/proteins. However, besides physical or co-expression interactions there are many ways in which pairs of genes, or their protein products, can be associated. By systematically incorporating knowledge on shared properties of genes from diverse sources to build functional association networks (FANs), researchers may be able to identify additional functional interactions between groups of genes that are not readily apparent. Results Genes2FANs is a web based tool and a database that utilizes 14 carefully constructed FANs and a large-scale protein-protein interaction (PPI) network to build subnetworks that connect lists of human and mouse genes. The FANs are created from mammalian gene set libraries where mouse genes are converted to their human orthologs. The tool takes as input a list of human or mouse Entrez gene symbols to produce a subnetwork and a ranked list of intermediate genes that are used to connect the query input list. In addition, users can enter any PubMed search term and then the system automatically converts the returned results to gene lists using GeneRIF. This gene list is then used as input to generate a subnetwork from the user's PubMed query. As a case study, we applied Genes2FANs to connect disease genes from 90 well-studied disorders. We find an inverse correlation between the counts of links connecting disease genes through PPI and links connecting diseases genes through FANs, separating diseases into two categories. Conclusions Genes2FANs is a useful tool for interpreting the relationships between gene/protein lists in the context of their various functions and networks. Combining functional association interactions with physical PPIs can be useful for revealing new biology and help form hypotheses for further experimentation. Our finding that disease genes in many cancers are mostly connected through PPIs whereas other complex diseases, such as autism and type-2 diabetes, are mostly connected through FANs without PPIs, can guide better strategies for disease gene discovery. Genes2FANs is available at: http://actin.pharm.mssm.edu/genes2FANs.",Genes2FANs,0.972529342,NA,0,Genes2FANs,0.972529342,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/2/2012 +23650583,http://genesetdb.auckland.ac.nz/haeremai.html,"GeneSetDB: A comprehensive meta-database, statistical and visualisation framework for gene set analysis. Most ""omics"" experiments require comprehensive interpretation of the biological meaning of gene lists. To address this requirement, a number of gene set analysis (GSA) tools have been developed. Although the biological value of GSA is strictly limited by the breadth of the gene sets used, very few methods exist for simultaneously analysing multiple publically available gene set databases. Therefore, we constructed GeneSetDB (http://genesetdb.auckland.ac.nz/haeremai.html), a comprehensive meta-database, which integrates 26 public databases containing diverse biological information with a particular focus on human disease and pharmacology. GeneSetDB enables users to search for gene sets containing a gene identifier or keyword, generate their own gene sets, or statistically test for enrichment of an uploaded gene list across all gene sets, and visualise gene set enrichment and overlap using a clustered heat map.",GeneSetDB,0.996939301,NA,0,GeneSetDB,0.996939301,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/17/2012 +22110038,"http://www.genesigdb.org, http://compbio.dfci.harvard.edu/genesigdb","GeneSigDB: a manually curated database and resource for analysis of gene expression signatures. GeneSigDB (http://www.genesigdb.org or http://compbio.dfci.harvard.edu/genesigdb/) is a database of gene signatures that have been extracted and manually curated from the published literature. It provides a standardized resource of published prognostic, diagnostic and other gene signatures of cancer and related disease to the community so they can compare the predictive power of gene signatures or use these in gene set enrichment analysis. Since GeneSigDB release 1.0, we have expanded from 575 to 3515 gene signatures, which were collected and transcribed from 1604 published articles largely focused on gene expression in cancer, stem cells, immune cells, development and lung disease. We have made substantial upgrades to the GeneSigDB website to improve accessibility and usability, including adding a tag cloud browse function, facetted navigation and a 'basket' feature to store genes or gene signatures of interest. Users can analyze GeneSigDB gene signatures, or upload their own gene list, to identify gene signatures with significant gene overlap and results can be viewed on a dynamic editable heatmap that can be downloaded as a publication quality image. All data in GeneSigDB can be downloaded in numerous formats including .gmt file format for gene set enrichment analysis or as a R/Bioconductor data file. GeneSigDB is available from http://www.genesigdb.org.",GeneSigDB,0.998035491,NA,0,GeneSigDB,0.998035491,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2011 +26567549,http://genestation.org,"GEneSTATION 1.0: a synthetic resource of diverse evolutionary and functional genomic data for studying the evolution of pregnancy-associated tissues and phenotypes. Mammalian gestation and pregnancy are fast evolving processes that involve the interaction of the fetal, maternal and paternal genomes. Version 1.0 of the GEneSTATION database (http://genestation.org) integrates diverse types of omics data across mammals to advance understanding of the genetic basis of gestation and pregnancy-associated phenotypes and to accelerate the translation of discoveries from model organisms to humans. GEneSTATION is built using tools from the Generic Model Organism Database project, including the biology-aware database CHADO, new tools for rapid data integration, and algorithms that streamline synthesis and user access. GEneSTATION contains curated life history information on pregnancy and reproduction from 23 high-quality mammalian genomes. For every human gene, GEneSTATION contains diverse evolutionary (e.g. gene age, population genetic and molecular evolutionary statistics), organismal (e.g. tissue-specific gene and protein expression, differential gene expression, disease phenotype), and molecular data types (e.g. Gene Ontology Annotation, protein interactions), as well as links to many general (e.g. Entrez, PubMed) and pregnancy disease-specific (e.g. PTBgene, dbPTB) databases. By facilitating the synthesis of diverse functional and evolutionary data in pregnancy-associated tissues and phenotypes and enabling their quick, intuitive, accurate and customized meta-analysis, GEneSTATION provides a novel platform for comprehensive investigation of the function and evolution of mammalian pregnancy.",GEneSTATION,0.995596051,NA,0,GEneSTATION,0.995596051,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/14/2015 +23161689,http://topaz.gatech.edu/GeneTack/db.html,"GeneTack database: genes with frameshifts in prokaryotic genomes and eukaryotic mRNA sequences. Database annotations of prokaryotic genomes and eukaryotic mRNA sequences pay relatively low attention to frame transitions that disrupt protein-coding genes. Frame transitions (frameshifts) could be caused by sequencing errors or indel mutations inside protein-coding regions. Other observed frameshifts are related to recoding events (that evolved to control expression of some genes). Earlier, we have developed an algorithm and software program GeneTack for ab initio frameshift finding in intronless genes. Here, we describe a database (freely available at http://topaz.gatech.edu/GeneTack/db.html) containing genes with frameshifts (fs-genes) predicted by GeneTack. The database includes 206 991 fs-genes from 1106 complete prokaryotic genomes and 45 295 frameshifts predicted in mRNA sequences from 100 eukaryotic genomes. The whole set of fs-genes was grouped into clusters based on sequence similarity between fs-proteins (conceptually translated fs-genes), conservation of the frameshift position and frameshift direction (-1, +1). The fs-genes can be retrieved by similarity search to a given query sequence via a web interface, by fs-gene cluster browsing, etc. Clusters of fs-genes are characterized with respect to their likely origin, such as pseudogenization, phase variation, etc. The largest clusters contain fs-genes with programed frameshifts (related to recoding events).",GeneTack,0.659076989,NA,0,GeneTack,0.659076989,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/17/2012 +22080549,http://www.GeneWeaver.org,"GeneWeaver: a web-based system for integrative functional genomics. High-throughput genome technologies have produced a wealth of data on the association of genes and gene products to biological functions. Investigators have discovered value in combining their experimental results with published genome-wide association studies, quantitative trait locus, microarray, RNA-sequencing and mutant phenotyping studies to identify gene-function associations across diverse experiments, species, conditions, behaviors or biological processes. These experimental results are typically derived from disparate data repositories, publication supplements or reconstructions from primary data stores. This leaves bench biologists with the complex and unscalable task of integrating data by identifying and gathering relevant studies, reanalyzing primary data, unifying gene identifiers and applying ad hoc computational analysis to the integrated set. The freely available GeneWeaver (http://www.GeneWeaver.org) powered by the Ontological Discovery Environment is a curated repository of genomic experimental results with an accompanying tool set for dynamic integration of these data sets, enabling users to interactively address questions about sets of biological functions and their relations to sets of genes. Thus, large numbers of independently published genomic results can be organized into new conceptual frameworks driven by the underlying, inferred biological relationships rather than a pre-existing semantic framework. An empirical 'ontology' is discovered from the aggregate of experimental knowledge around user-defined areas of biological inquiry.",GeneWeaver,0.980351031,NA,0,GeneWeaver,0.980351031,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2011 +31844835,"http://mandadilab.webfactional.com/home/, http://mandadilab.webfactional.com/home/dload","GenFam: A web application and database for gene family-based classification and functional enrichment analysis. Genome-scale studies using high-throughput sequencing (HTS) technologies generate substantial lists of differentially expressed genes under different experimental conditions. These gene lists need to be further mined to narrow down biologically relevant genes and associated functions in order to guide downstream functional genetic analyses. A popular approach is to determine statistically overrepresented genes in a user-defined list through enrichment analysis tools, which rely on functional annotations of genes based on Gene Ontology (GO) terms. Here, we propose a new computational approach, GenFam, which allows annotation, classification, and enrichment of genes based on their gene family, thus simplifying identification of candidate gene families and associated genes that may be relevant to the query. GenFam and its integrated database comprises of three hundred and eighty-four unique gene families and supports gene family analyses for sixty plant genomes. Four comparative case studies with plant species belonging to different clades and families were performed using GenFam which demonstrated its robustness and comprehensiveness over preexisting functional enrichment tools. To make it readily accessible for plant biologists, GenFam is available as a web-based application where users can input gene IDs and export enrichment results in both tabular and graphical formats. Users can also customize analysis parameters by choosing from the various statistical enrichment tests and multiple testing correction methods. Additionally, the web-based application, source code, and database are freely available to use and download. Website: http://mandadilab.webfactional.com/home/. Source code and database: http://mandadilab.webfactional.com/home/dload/.",GenFam,0.990917087,NA,0,GenFam,0.990917087,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/4/2019 +22383735,http://born.nii.ac.jp,"GENI-DB: a database of global events for epidemic intelligence. Unlabelled We present a novel public health database (GENI-DB) in which news events on the topic of over 176 infectious diseases and chemicals affecting human and animal health are compiled from surveillance of the global online news media in 10 languages. News event frequency data were gathered systematically through the BioCaster public health surveillance system from July 2009 to the present and is available to download by the research community for purposes of analyzing trends in the global burden of infectious diseases. Database search can be conducted by year, country, disease and language. Availability The GENI-DB is freely available via a web portal at http://born.nii.ac.jp/.",GENI-DB,0.995142937,NA,0,GENI-DB,0.995142937,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2012 +29512401,http://genipac.cancerresearch.my,"GENIPAC: A Genomic Information Portal for Head and Neck Cancer Cell Systems. Head and neck cancer (HNC)-derived cell lines represent fundamental models for studying the biological mechanisms underlying cancer development and precision therapies. However, mining the genomic information of HNC cells from available databases requires knowledge on bioinformatics and computational skill sets. Here, we developed a user-friendly web resource for exploring, visualizing, and analyzing genomics information of commonly used HNC cell lines. We populated the current version of GENIPAC with 44 HNC cell lines from 3 studies: ORL Series, OPC-22, and H Series. Specifically, the mRNA expressions for all the 3 studies were derived with RNA-seq. The copy number alterations analysis of ORL Series was performed on the Genome Wide Human Cytoscan HD array, while copy number alterations for OPC-22 were derived from whole exome sequencing. Mutations from ORL Series and H Series were derived from RNA-seq information, while OPC-22 was based on whole exome sequencing. All genomic information was preprocessed with customized scripts and underwent data validation and correction through data set validator tools provided by cBioPortal. The clinical and genomic information of 44 HNC cell lines are easily assessable in GENIPAC. The functional utility of GENIPAC was demonstrated with some of the genomic alterations that are commonly reported in HNC, such as TP53, EGFR, CCND1, and PIK3CA. We showed that these genomic alterations as reported in The Cancer Genome Atlas database were recapitulated in the HNC cell lines in GENIPAC. Importantly, genomic alterations within pathways could be simultaneously visualized. We developed GENIPAC to create access to genomic information on HNC cell lines. This cancer omics initiative will help the research community to accelerate better understanding of HNC and the development of new precision therapeutic options for HNC treatment. GENIPAC is freely available at http://genipac.cancerresearch.my/ .",GENIPAC,0.994062304,NA,0,GENIPAC,0.994062304,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/7/2018 +25399415,http://ecoli.naist.jp/GB,"GenoBase: comprehensive resource database of Escherichia coli K-12. Comprehensive experimental resources, such as ORFeome clone libraries and deletion mutant collections, are fundamental tools for elucidation of gene function. Data sets by omics analysis using these resources provide key information for functional analysis, modeling and simulation both in individual and systematic approaches. With the long-term goal of complete understanding of a cell, we have over the past decade created a variety of clone and mutant sets for functional genomics studies of Escherichia coli K-12. We have made these experimental resources freely available to the academic community worldwide. Accordingly, these resources have now been used in numerous investigations of a multitude of cell processes. Quality control is extremely important for evaluating results generated by these resources. Because the annotation has been changed since 2005, which we originally used for the construction, we have updated these genomic resources accordingly. Here, we describe GenoBase (http://ecoli.naist.jp/GB/), which contains key information about comprehensive experimental resources of E. coli K-12, their quality control and several omics data sets generated using these resources.",GenoBase,0.995594084,NA,0,GenoBase,0.995594084,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2014 +"29036542, 30365034",http://bigd.big.ac.cn,"Database Resources of the BIG Data Center in 2018. The BIG Data Center at Beijing Institute of Genomics (BIG) of the Chinese Academy of Sciences provides freely open access to a suite of database resources in support of worldwide research activities in both academia and industry. With the vast amounts of omics data generated at ever-greater scales and rates, the BIG Data Center is continually expanding, updating and enriching its core database resources through big-data integration and value-added curation, including BioCode (a repository archiving bioinformatics tool codes), BioProject (a biological project library), BioSample (a biological sample library), Genome Sequence Archive (GSA, a data repository for archiving raw sequence reads), Genome Warehouse (GWH, a centralized resource housing genome-scale data), Genome Variation Map (GVM, a public repository of genome variations), Gene Expression Nebulas (GEN, a database of gene expression profiles based on RNA-Seq data), Methylation Bank (MethBank, an integrated databank of DNA methylomes), and Science Wikis (a series of biological knowledge wikis for community annotations). In addition, three featured web services are provided, viz., BIG Search (search as a service; a scalable inter-domain text search engine), BIG SSO (single sign-on as a service; a user access control system to gain access to multiple independent systems with a single ID and password) and Gsub (submission as a service; a unified submission service for all relevant resources). All of these resources are publicly accessible through the home page of the BIG Data Center at http://bigd.big.ac.cn.",NA,0,Genome,0.749958634,Genome,0.749958634,2,33175170,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: CLASS,NA,NA,1/1/2019 +22417913,http://esper.lab.nig.ac.jp/genome-composition-database,"A new database (GCD) on genome composition for eukaryote and prokaryote genome sequences and their initial analyses. Eukaryote genomes contain many noncoding regions, and they are quite complex. To understand these complexities, we constructed a database, Genome Composition Database, for the whole genome composition statistics for 101 eukaryote genome data, as well as more than 1,000 prokaryote genomes. Frequencies of all possible one to ten oligonucleotides were counted for each genome, and these observed values were compared with expected values computed under observed oligonucleotide frequencies of length 1-4. Deviations from expected values were much larger for eukaryotes than prokaryotes, except for fungal genomes. Mammalian genomes showed the largest deviation among animals. The results of comparison are available online at http://esper.lab.nig.ac.jp/genome-composition-database/.",GCD,0.811255554,Genome Composition Database,0.859661317,Genome Composition Database,0.859661317,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/14/2012 +25392413,http://coffee-genome.org,"The coffee genome hub: a resource for coffee genomes. The whole genome sequence of Coffea canephora, the perennial diploid species known as Robusta, has been recently released. In the context of the C. canephora genome sequencing project and to support post-genomics efforts, we developed the Coffee Genome Hub (http://coffee-genome.org/), an integrative genome information system that allows centralized access to genomics and genetics data and analysis tools to facilitate translational and applied research in coffee. We provide the complete genome sequence of C. canephora along with gene structure, gene product information, metabolism, gene families, transcriptomics, syntenic blocks, genetic markers and genetic maps. The hub relies on generic software (e.g. GMOD tools) for easy querying, visualizing and downloading research data. It includes a Genome Browser enhanced by a Community Annotation System, enabling the improvement of automatic gene annotation through an annotation editor. In addition, the hub aims at developing interoperability among other existing South Green tools managing coffee data (phylogenomics resources, SNPs) and/or supporting data analyses with the Galaxy workflow manager.",NA,0,Genome Hub,0.447384566,Genome Hub,0.447384566,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/11/2014 +23245398,http://gwidd.bioinformatics.ku.edu,"GWIDD: a comprehensive resource for genome-wide structural modeling of protein-protein interactions. Protein-protein interactions are a key component of life processes. The knowledge of the three-dimensional structure of these interactions is important for understanding protein function. Genome-Wide Docking Database (http://gwidd.bioinformatics.ku.edu) offers an extensive source of data for structural studies of protein-protein complexes on genome scale. The current release of the database combines the available experimental data on the structure and characteristics of protein interactions with structural modeling of protein complexes for 771 organisms spanned over the entire universe of life from viruses to humans. The interactions are stored in a relational database with user-friendly interface that includes various search options. The search results can be interactively previewed; the structures, downloaded, along with the interaction characteristics.",GWIDD,0.74214983,Genome-Wide Docking Database,0.819814461,Genome-Wide Docking Database,0.819814461,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/11/2012 +"25348407, 31733063",http://www.genome3d.eu,"Genome3D: exploiting structure to help users understand their sequences. Genome3D (http://www.genome3d.eu) is a collaborative resource that provides predicted domain annotations and structural models for key sequences. Since introducing Genome3D in a previous NAR paper, we have substantially extended and improved the resource. We have annotated representatives from Pfam families to improve coverage of diverse sequences and added a fast sequence search to the website to allow users to find Genome3D-annotated sequences similar to their own. We have improved and extended the Genome3D data, enlarging the source data set from three model organisms to 10, and adding VIVACE, a resource new to Genome3D. We have analysed and updated Genome3D's SCOP/CATH mapping. Finally, we have improved the superposition tools, which now give users a more powerful interface for investigating similarities and differences between structural models.",Genome3D,0.994092405,NA,0,Genome3D,0.994092405,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +27789686,http://genomecrispr.org,"GenomeCRISPR - a database for high-throughput CRISPR/Cas9 screens. Over the past years, CRISPR/Cas9 mediated genome editing has developed into a powerful tool for modifying genomes in various organisms. In high-throughput screens, CRISPR/Cas9 mediated gene perturbations can be used for the systematic functional analysis of whole genomes. Discoveries from such screens provide a wealth of knowledge about gene to phenotype relationships in various biological model systems. However, a database resource to query results efficiently has been lacking. To this end, we developed GenomeCRISPR (http://genomecrispr.org), a database for genome-scale CRISPR/Cas9 screens. Currently, GenomeCRISPR contains data on more than 550 000 single guide RNAs (sgRNA) derived from 84 different experiments performed in 48 different human cell lines, comprising all screens in human cells using CRISPR/Cas published to date. GenomeCRISPR provides data mining options and tools, such as gene or genomic region search. Phenotypic and genome track views allow users to investigate and compare the results of different screens, or the impact of different sgRNAs on the gene of interest. An Application Programming Interface (API) allows for automated data access and batch download. As more screening data will become available, we also aim at extending the database to include functional genomic data from other organisms and enable cross-species comparisons.",GenomeCRISPR,0.993304312,NA,0,GenomeCRISPR,0.993304312,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/26/2016 +23193271,http://www.genomernai.org,"GenomeRNAi: a database for cell-based and in vivo RNAi phenotypes, 2013 update. RNA interference (RNAi) represents a powerful method to systematically study loss-of-function phenotypes on a large scale with a wide variety of biological assays, constituting a rich source for the assignment of gene function. The GenomeRNAi database (http://www.genomernai.org) makes available RNAi phenotype data extracted from the literature for human and Drosophila. It also provides RNAi reagent information, along with an assessment as to their efficiency and specificity. This manuscript describes an update of the database previously featured in the NAR Database Issue. The new version has undergone a complete re-design of the user interface, providing an intuitive, flexible framework for additional functionalities. Screen information and gene-reagent-phenotype associations are now available for download. The integration with other resources has been improved by allowing in-links via GenomeRNAi screen IDs, or external gene or reagent identifiers. A distributed annotation system (DAS) server enables the visualization of the phenotypes and reagents in the context of a genome browser. We have added a page listing 'frequent hitters', i.e. genes that show a phenotype in many screens, which might guide on-going RNAi studies. Structured annotation guidelines have been established to facilitate consistent curation, and a submission template for direct submission by data producers is available for download.",GenomeRNAi,0.972192883,NA,0,GenomeRNAi,0.972192883,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/27/2012 +25428358,http://www.ncbi.nlm.nih.gov/genome/viruses,"NCBI viral genomes resource. Recent technological innovations have ignited an explosion in virus genome sequencing that promises to fundamentally alter our understanding of viral biology and profoundly impact public health policy. Yet, any potential benefits from the billowing cloud of next generation sequence data hinge upon well implemented reference resources that facilitate the identification of sequences, aid in the assembly of sequence reads and provide reference annotation sources. The NCBI Viral Genomes Resource is a reference resource designed to bring order to this sequence shockwave and improve usability of viral sequence data. The resource can be accessed at http://www.ncbi.nlm.nih.gov/genome/viruses/ and catalogs all publicly available virus genome sequences and curates reference genome sequences. As the number of genome sequences has grown, so too have the difficulties in annotating and maintaining reference sequences. The rapid expansion of the viral sequence universe has forced a recalibration of the data model to better provide extant sequence representation and enhanced reference sequence products to serve the needs of the various viral communities. This, in turn, has placed increased emphasis on leveraging the knowledge of individual scientific communities to identify important viral sequences and develop well annotated reference virus genome sets.",NA,0,Genomes,0.515958428,Genomes,0.515958428,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/26/2014 +26780094,http://www.genomespace.org,"Integrative genomic analysis by interoperation of bioinformatics tools in GenomeSpace. Complex biomedical analyses require the use of multiple software tools in concert and remain challenging for much of the biomedical research community. We introduce GenomeSpace (http://www.genomespace.org), a cloud-based, cooperative community resource that currently supports the streamlined interaction of 20 bioinformatics tools and data resources. To facilitate integrative analysis by non-programmers, it offers a growing set of 'recipes', short workflows to guide investigators through high-utility analysis tasks.",GenomeSpace,0.996023118,NA,0,GenomeSpace,0.996023118,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/18/2016 +28651291,http://www.ncbi.nlm.nih.gov/bioproject/PRJNA325670,"Development of a Reference Standard Library of Chloroplast Genome Sequences, GenomeTrakrCP. Precise, species-level identification of plants in foods and dietary supplements is difficult. While the use of DNA barcoding regions (short regions of DNA with diagnostic utility) has been effective for many inquiries, it is not always a robust approach for closely related species, especially in highly processed products. The use of fully sequenced chloroplast genomes, as an alternative to short diagnostic barcoding regions, has demonstrated utility for closely related species. The U. S. Food and Drug Administration (FDA) has also developed species-specific DNA-based assays targeting plant species of interest by utilizing chloroplast genome sequences. Here, we introduce a repository of complete chloroplast genome sequences called GenomeTrakrCP, which will be publicly available at the National Center for Biotechnology Information (NCBI). Target species for inclusion are plants found in foods and dietary supplements, toxin producers, common contaminants and adulterants, and their close relatives. Publicly available data will include annotated assemblies, raw sequencing data, and voucher information with each NCBI accession associated with an authenticated reference herbarium specimen. To date, 40 complete chloroplast genomes have been deposited in GenomeTrakrCP (https://www.ncbi.nlm.nih.gov/bioproject/PRJNA325670/), and this will be expanded in the future.",GenomeTrakrCP,0.995048881,of Chloroplast Genome Sequences,0.815545069,GenomeTrakrCP,0.995048881,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/26/2017 +26272709,http://genomewidepdb.proteomix.org,"GenomewidePDB 2.0: A Newly Upgraded Versatile Proteogenomic Database for the Chromosome-Centric Human Proteome Project. Since the launch of the Chromosome-centric Human Proteome Project (C-HPP) in 2012, the number of ""missing"" proteins has fallen to 2932, down from ∼5932 since the number was first counted in 2011. We compared the characteristics of missing proteins with those of already annotated proteins with respect to transcriptional expression pattern and the time periods in which newly identified proteins were annotated. We learned that missing proteins commonly exhibit lower levels of transcriptional expression and less tissue-specific expression compared with already annotated proteins. This makes it more difficult to identify missing proteins as time goes on. One of the C-HPP goals is to identify alternative spliced product of proteins (ASPs), which are usually difficult to find by shot-gun proteomic methods due to their sequence similarities with the representative proteins. To resolve this problem, it may be necessary to use a targeted proteomics approach (e.g., selected and multiple reaction monitoring [S/MRM] assays) and an innovative bioinformatics platform that enables the selection of target peptides for rarely expressed missing proteins or ASPs. Given that the success of efforts to identify missing proteins may rely on more informative public databases, it was necessary to upgrade the available integrative databases. To this end, we attempted to improve the features and utility of GenomewidePDB by integrating transcriptomic information (e.g., alternatively spliced transcripts), annotated peptide information, and an advanced search interface that can find proteins of interest when applying a targeted proteomics strategy. This upgraded version of the database, GenomewidePDB 2.0, may not only expedite identification of the remaining missing proteins but also enhance the exchange of information among the proteome community. GenomewidePDB 2.0 is available publicly at http://genomewidepdb.proteomix.org/.",GenomewidePDB,0.987685204,Chromosome-centric,0.595189404,GenomewidePDB,0.987685204,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/19/2015 +23193262,http://www.dyogen.ens.fr/genomicus,"Genomicus: five genome browsers for comparative genomics in eukaryota. Genomicus (http://www.dyogen.ens.fr/genomicus/) is a database and an online tool that allows easy comparative genomic visualization in >150 eukaryote genomes. It provides a way to explore spatial information related to gene organization within and between genomes and temporal relationships related to gene and genome evolution. For the specific vertebrate phylum, it also provides access to ancestral gene order reconstructions and conserved non-coding elements information. We extended the Genomicus database originally dedicated to vertebrate to four new clades, including plants, non-vertebrate metazoa, protists and fungi. This visualization tool allows evolutionary phylogenomics analysis and exploration. Here, we describe the graphical modules of Genomicus and show how it is capable of revealing differential gene loss and gain, segmental or genome duplications and study the evolution of a locus through homology relationships.",Genomicus,0.997919381,NA,0,Genomicus,0.997919381,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +25432975,http://www.genomicus.biologie.ens.fr/genomicus-plants,"GenomicusPlants: a web resource to study genome evolution in flowering plants. Comparative genomics combined with phylogenetic reconstructions are powerful approaches to study the evolution of genes and genomes. However, the current rapid expansion of the volume of genomic information makes it increasingly difficult to interrogate, integrate and synthesize comparative genome data while taking into account the maximum breadth of information available. GenomicusPlants (http://www.genomicus.biologie.ens.fr/genomicus-plants) is an extension of the Genomicus webserver that addresses this issue by allowing users to explore flowering plant genomes in an intuitive way, across the broadest evolutionary scales. Extant genomes of 26 flowering plants can be analyzed, as well as 23 ancestral reconstructed genomes. Ancestral gene order provides a long-term chronological view of gene order evolution, greatly facilitating comparative genomics and evolutionary studies. Four main interfaces ('views') are available where: (i) PhyloView combines phylogenetic trees with comparisons of genomic loci across any number of genomes; (ii) AlignView projects loci of interest against all other genomes to visualize its topological conservation; (iii) MatrixView compares two genomes in a classical dotplot representation; and (iv) Karyoview visualizes chromosome karyotypes 'painted' with colours of another genome of interest. All four views are interconnected and benefit from many customizable features.",GenomicusPlants,0.995462537,NA,0,GenomicusPlants,0.995462537,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2014 +34538772,http://genorigin.chenzxlab.cn,"GenOrigin: A comprehensive protein-coding gene origination database on the evolutionary timescale of life. The origination of new genes contributes to the biological diversity of life. New genes may quickly build their network, exert important functions, and generate novel phenotypes. Dating gene age and inferring the origination mechanisms of new genes, like primate-specific genes, is the basis for the functional study of the genes. However, no comprehensive resource of gene age estimates across species is available. Here, we systematically date the age of 9,102,113 protein-coding genes from 565 species in the Ensembl and Ensembl Genomes databases, including 82 bacteria, 57 protists, 134 fungi, 58 plants, 56 metazoa, and 178 vertebrates, using a protein-family-based pipeline with Wagner parsimony algorithm. We also collect gene age estimate data from other studies and uniformly distribute the gene age estimates to time ranges in a million years for comparison across studies. All the data are cataloged into GenOrigin (http://genorigin.chenzxlab.cn/), a user-friendly new database of gene age estimates, where users can browse gene age estimates by species, age, and gene ontology. In GenOrigin, the information such as gene age estimates, annotation, gene ontology, ortholog, and paralog, as well as detailed gene presence/absence views for gene age inference based on the species tree with evolutionary timescale, is provided to researchers for exploring gene functions.",GenOrigin,0.986215472,NA,0,GenOrigin,0.986215472,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/14/2021 +21695066,"http://medicalgenome.kribb.re.kr/GENT/, http://genome.kobic.re.kr/GENT","GENT: gene expression database of normal and tumor tissues. Background Some oncogenes such as ERBB2 and EGFR are over-expressed in only a subset of patients. Cancer outlier profile analysis is one of computational approaches to identify outliers in gene expression data. A database with a large sample size would be a great advantage when searching for genes over-expressed in only a subset of patients. Description GENT (Gene Expression database of Normal and Tumor tissues) is a web-accessible database that provides gene expression patterns across diverse human cancer and normal tissues. More than 40000 samples, profiled by Affymetrix U133A or U133plus2 platforms in many different laboratories across the world, were collected from public resources and combined into two large data sets, helping the identification of cancer outliers that are over-expressed in only a subset of patients. Gene expression patterns in nearly 1000 human cancer cell lines are also provided. In each tissue, users can retrieve gene expression patterns classified by more detailed clinical information. Conclusions The large samples size (>24300 for U133plus2 and >16400 for U133A) of GENT provides an advantage in identifying cancer outliers. A cancer cell line gene expression database is useful for target validation by in vitro experiment. We hope GENT will be a useful resource for cancer researchers in many stages from target discovery to target validation. GENT is available at http://medicalgenome.kribb.re.kr/GENT/ or http://genome.kobic.re.kr/GENT/.",GENT,0.995685935,Gene Expression database of Normal and Tumor tissues,0.976307766,GENT,0.995685935,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/9/2011 +31296229,http://gent2.appex.kr,"GENT2: an updated gene expression database for normal and tumor tissues. Background Gene Expression database of Normal and Tumor tissues 2 (GENT2) is an updated version of GENT, which has provided a user-friendly search platform for gene expression patterns across different normal and tumor tissues compiled from public gene expression data sets. Results We refactored GENT2 with recent technologies such as Apache Lucene indexing for fast search and Google Web Toolkit (GWT) framework for a user-friendly web interface. Now, GENT2 contains more than 68,000 samples and has several new useful functions. First, GENT2 now provides gene expression across 72 different tissues compared to 57 in GENT. Second, with increasing importance of tumor subtypes, GENT2 provides an option to study the differential expression and its prognostic significance based on tumor subtypes. Third, whenever available, GENT2 provides prognostic information of a gene of interest. Fourth, GENT2 provides a meta-analysis of survival information to provide users more reliable prognostic value of a gene of interest. Conclusions In conclusion, with these significant improvements, GENT2 will continue to be a useful tool to a wide range of researchers. GENT2 is freely available at http://gent2.appex.kr .",GENT2,0.991305053,Gene Expression database of Normal and Tumor tissues 2,0.9513432,GENT2,0.991305053,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/11/2019 +21982653,http://gentac.rti.org,"The National Registry of Genetically Triggered Thoracic Aortic Aneurysms and Cardiovascular Conditions (GenTAC): results from phase I and scientific opportunities in phase II. Background Genetically triggered thoracic aortic conditions (GenTACs) represent an important problem for patients and their families. Accordingly, the National Heart, Lung, and Blood Institute established the first phase of its national GenTAC Registry in 2006. Enrollment and diagnoses Between 2007 and 2010, 6 enrolling centers established the GenTAC I Registry consisting of 2,046 patients (Marfan syndrome 576 [28.2%], bicuspid aortic valve disease 504 [24.6%], aneurysm or dissection age <50 years 369 [18%], and others). Biologic samples for DNA analyses (white blood cells or saliva) are available in 97%, and stored plasma is available in 60% of enrollees. Results Initial scientific inquiry using the GenTAC Registry has included validation studies of genetic causes for aortic syndromes, potential usefulness of transforming growth factor beta (TGFB) blood levels in Marfan subjects, and current surgical approaches to ascending aortic conditions. Future opportunity The second phase of GenTAC will allow biannual follow-up of GenTAC I enrollees for up to 9 years, enrollment of an additional 1,500 subjects, further integration of imaging findings with clinical and genetic data through utilization of an imaging core laboratory, important validation of phenotype-genotype correlations through a phenotyping core laboratory, and integration of a scientific advisory committee to help define the full range and depth of the Registry's scientific capabilities. The registry resources are available to the external scientific community through an application process accessible at https://gentac.rti.org.",GenTAC,0.966817975,National Registry of Genetically Triggered Thoracic Aortic Aneurysms and Cardiovascular Conditions,0.941658658,GenTAC,0.966817975,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/1/2011 +31584078,http://genus.fuw.edu.pl,"Genus for biomolecules. The 'Genus for biomolecules' database (http://genus.fuw.edu.pl) collects information about topological structure and complexity of proteins and RNA chains, which is captured by the genus of a given chain and its subchains. For each biomolecule, this information is shown in the form of a genus trace plot, as well as a genus matrix diagram. We assemble such information for all and RNA structures deposited in the Protein Data Bank (PDB). This database presents also various statistics and extensive information about the biological function of the analyzed biomolecules. The database is regularly self-updating, once new structures are deposited in the PDB. Moreover, users can analyze their own structures.",Genus,0.941580832,biomolecules,0.538757384,Genus,0.941580832,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2020 +"23193258, 27008011",http://www.ncbi.nlm.nih.gov/geo,"NCBI GEO: archive for functional genomics data sets--update. The Gene Expression Omnibus (GEO, http://www.ncbi.nlm.nih.gov/geo/) is an international public repository for high-throughput microarray and next-generation sequence functional genomic data sets submitted by the research community. The resource supports archiving of raw data, processed data and metadata which are indexed, cross-linked and searchable. All data are freely available for download in a variety of formats. GEO also provides several web-based tools and strategies to assist users to query, analyse and visualize data. This article reports current status and recent database developments, including the release of GEO2R, an R-based web application that helps users analyse GEO data.",GEO,0.986045718,NA,0,GEO,0.986045718,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2016 +32330167,http://www.geoboundaries.org,"geoBoundaries: A global database of political administrative boundaries. We present the geoBoundaries Global Administrative Database (geoBoundaries): an online, open license resource of the geographic boundaries of political administrative divisions (i.e., state, county). Contrasted to other resources geoBoundaries (1) provides detailed information on the legal open license for every boundary in the repository, and (2) focuses on provisioning highly precise boundary data to support accurate, replicable scientific inquiry. Further, all data is released in a structured form, allowing for the integration of geoBoundaries with large-scale computational workflows. Our database has records for every country around the world, with up to 5 levels of administrative hierarchy. The database is accessible at http://www.geoboundaries.org, and a static version is archived on the Harvard Dataverse.",geoBoundaries,0.997810702,geoBoundaries Global Administrative Database,0.939703067,geoBoundaries,0.997810702,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/24/2020 +28771471,http://www.geome-db.org,"The Genomic Observatories Metadatabase (GeOMe): A new repository for field and sampling event metadata associated with genetic samples. The Genomic Observatories Metadatabase (GeOMe, http://www.geome-db.org/) is an open access repository for geographic and ecological metadata associated with biosamples and genetic data. Whereas public databases have served as vital repositories for nucleotide sequences, they do not accession all the metadata required for ecological or evolutionary analyses. GeOMe fills this need, providing a user-friendly, web-based interface for both data contributors and data recipients. The interface allows data contributors to create a customized yet standard-compliant spreadsheet that captures the temporal and geospatial context of each biosample. These metadata are then validated and permanently linked to archived genetic data stored in the National Center for Biotechnology Information's (NCBI's) Sequence Read Archive (SRA) via unique persistent identifiers. By linking ecologically and evolutionarily relevant metadata with publicly archived sequence data in a structured manner, GeOMe sets a gold standard for data management in biodiversity science.",GeOMe,0.991666436,Genomic Observatories Metadatabase,0.737673,GeOMe,0.991666436,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/3/2017 +34877793,http://gepsdb.ahau-edu.cn,"GEPSdb: The Gene Expression Database of Poplar under Stress. As a model tree species, poplar (Populus L.) has important economic and ecological value. Here, we constructed the GEPSdb (Gene Expression Database of Poplar under Stress; http://gepsdb.ahau-edu.cn/), which is an integrated database of poplar gene expression profiles derived from RNA-seq and microarray library data. This database provides a comprehensive collection of gene expression data from poplar exposed to 14 types of environmental stress from 11 high-quality RNA-seq experiments and 51 microarray libraries. The GEPSdb includes 56 genes from previous literature that have been examined in poplar and functionally verified. By incorporating data from numerous expression analyses, GEPSdb provides a user-friendly web interface for querying, browsing, and visualizing the expression profiles of related genes. Consequently, GEPSdb can be used to link transcription data with phenotypes and can enhance our understanding of important biological processes and mechanisms underlying complex agronomic traits in poplar.",GEPSdb,0.998211384,Gene Expression Database of Poplar under Stress,0.982485765,GEPSdb,0.998211384,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/8/2021 +25982314,http://germlncrna.cbiit.cuhk.edu.hk,"GermlncRNA: a unique catalogue of long non-coding RNAs and associated regulations in male germ cell development. Spermatogenic failure is a major cause of male infertility, which affects millions of couples worldwide. Recent discovery of long non-coding RNAs (lncRNAs) as critical regulators in normal and disease development provides new clues for delineating the molecular regulation in male germ cell development. However, few functional lncRNAs have been characterized to date. A major limitation in studying lncRNA in male germ cell development is the absence of germ cell-specific lncRNA annotation. Current lncRNA annotations are assembled by transcriptome data from heterogeneous tissue sources; specific germ cell transcript information of various developmental stages is therefore under-represented, which may lead to biased prediction or fail to identity important germ cell-specific lncRNAs. GermlncRNA provides the first comprehensive web-based and open-access lncRNA catalogue for three key male germ cell stages, including type A spermatogonia, pachytene spermatocytes and round spermatids. This information has been developed by integrating male germ transcriptome resources derived from RNA-Seq, tiling microarray and GermSAGE. Characterizations on lncRNA-associated regulatory features, potential coding gene and microRNA targets are also provided. Search results from GermlncRNA can be exported to Galaxy for downstream analysis or downloaded locally. Taken together, GermlncRNA offers a new avenue to better understand the role of lncRNAs and associated targets during spermatogenesis. Database URL: http://germlncrna.cbiit.cuhk.edu.hk/",GermlncRNA,0.997644544,NA,0,GermlncRNA,0.997644544,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/17/2015 +26342919,http://geroprotectors.org,"Geroprotectors.org: a new, structured and curated database of current therapeutic interventions in aging and age-related disease. As the level of interest in aging research increases, there is a growing number of geroprotectors, or therapeutic interventions that aim to extend the healthy lifespan and repair or reduce aging-related damage in model organisms and, eventually, in humans. There is a clear need for a manually-curated database of geroprotectors to compile and index their effects on aging and age-related diseases and link these effects to relevant studies and multiple biochemical and drug databases. Here, we introduce the first such resource, Geroprotectors (http://geroprotectors.org). Geroprotectors is a public, rapidly explorable database that catalogs over 250 experiments involving over 200 known or candidate geroprotectors that extend lifespan in model organisms. Each compound has a comprehensive profile complete with biochemistry, mechanisms, and lifespan effects in various model organisms, along with information ranging from chemical structure, side effects, and toxicity to FDA drug status. These are presented in a visually intuitive, efficient framework fit for casual browsing or in-depth research alike. Data are linked to the source studies or databases, providing quick and convenient access to original data. The Geroprotectors database facilitates cross-study, cross-organism, and cross-discipline analysis and saves countless hours of inefficient literature and web searching. Geroprotectors is a one-stop, knowledge-sharing, time-saving resource for researchers seeking healthy aging solutions.",Geroprotectors,0.934666336,NA,0,Geroprotectors,0.934666336,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2015 +27242038,"http://gesdb.nhri.org.twDatabase, http://gesdb.nhri.org.tw","GESDB: a platform of simulation resources for genetic epidemiology studies. . Computer simulations are routinely conducted to evaluate new statistical methods, to compare the properties among different methods, and to mimic the observed data in genetic epidemiology studies. Conducting simulation studies can become a complicated task as several challenges can occur, such as the selection of an appropriate simulation tool and the specification of parameters in the simulation model. Although abundant simulated data have been generated for human genetic research, currently there is no public database designed specifically as a repository for these simulated data. With the lack of such a database, for similar studies, similar simulations may have been repeated, which resulted in redundant work. Thus, we created an online platform, the Genetic Epidemiology Simulation Database (GESDB), for simulation data sharing and discussion of simulation techniques for genetic epidemiology studies. GESDB consists of a database for storing simulation scripts, simulated data and documentation from published articles as well as a discussion forum, which provides a platform for discussion of the simulated data and exchanging simulation ideas. Moreover, summary statistics such as the simulation tools that are most commonly used and datasets that are most frequently downloaded are provided. The statistics will be informative for researchers to choose an appropriate simulation tool or select a common dataset for method comparisons. GESDB can be accessed at http://gesdb.nhri.org.twDatabase URL: http://gesdb.nhri.org.tw.",GESDB,0.99487108,Genetic Epidemiology Simulation Database,0.805485106,GESDB,0.99487108,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/30/2016 +31630971,http://gesur.cancer-pku.cn,"Identification of transcriptional isoforms associated with survival in cancer patient. The Cancer Genome Atlas (TCGA) project produced RNA-Seq data for tens of thousands of cancer and non-cancer samples with clinical survival information, providing an unprecedented opportunity for analyzing prognostic genes and their isoforms. In this study, we performed the first large-scale identification of transcriptional isoforms that are specifically associated with patient prognosis, even without gene-level association. These specific isoforms are defined as Transcripts Associated with Patient Prognosis (TAPPs). Although a group of TAPPs are the principal isoforms of their genes with intact functional protein domains, another group of TAPPs lack important protein domains found in their canonical gene isoforms. This dichotomy in the distribution of protein domains may indicate different patterns of TAPPs association with cancer. TAPPs in protein-coding genes, especially those with altered protein domains, are rich in known cancer driver genes. We further identified multiple types of cancer recurrent TAPPs, such as DCAF17-201, providing a new approach for the detection of cancer-associated events. In order to make the wide research community to study prognostic isoforms, we developed a portal named GESUR (http://gesur.cancer-pku.cn/), which illustrates the detailed prognostic characteristics of TAPPs and other isoforms. Overall, our integrated analysis of gene expression and clinical parameters provides a new perspective for understanding the applications of different gene isoforms in tumor progression.",GESUR,0.972582638,NA,0,GESUR,0.972582638,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/25/2019 +28053161,http://bbcftools.epfl.ch/getprime,"GETPrime 2.0: gene- and transcript-specific qPCR primers for 13 species including polymorphisms. GETPrime (http://bbcftools.epfl.ch/getprime) is a database with a web frontend providing gene- and transcript-specific, pre-computed qPCR primer pairs. The primers have been optimized for genome-wide specificity and for allowing the selective amplification of one or several splice variants of most known genes. To ease selection, primers have also been ranked according to defined criteria such as genome-wide specificity (with BLAST), amplicon size, and isoform coverage. Here, we report a major upgrade (2.0) of the database: eight new species (yeast, chicken, macaque, chimpanzee, rat, platypus, pufferfish, and Anolis carolinensis) now complement the five already included in the previous version (human, mouse, zebrafish, fly, and worm). Furthermore, the genomic reference has been updated to Ensembl v81 (while keeping earlier versions for backward compatibility) as a result of re-designing the back-end database and automating the import of relevant sections of the Ensembl database in species-independent fashion. This also allowed us to map known polymorphisms to the primers (on average three per primer for human), with the aim of reducing experimental error when targeting specific strains or individuals. Another consequence is that the inclusion of future Ensembl releases and other species has now become a relatively straightforward task.",GETPrime,0.997328401,NA,0,GETPrime,0.997328401,1,NA,21917859,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/7/2016 +21917859,"http://updepla1srv1.epfl.ch/getprime/, http://deplanckelab.epfl.ch","GETPrime: a gene- or transcript-specific primer database for quantitative real-time PCR. The vast majority of genes in humans and other organisms undergo alternative splicing, yet the biological function of splice variants is still very poorly understood in large part because of the lack of simple tools that can map the expression profiles and patterns of these variants with high sensitivity. High-throughput quantitative real-time polymerase chain reaction (qPCR) is an ideal technique to accurately quantify nucleic acid sequences including splice variants. However, currently available primer design programs do not distinguish between splice variants and also differ substantially in overall quality, functionality or throughput mode. Here, we present GETPrime, a primer database supported by a novel platform that uniquely combines and automates several features critical for optimal qPCR primer design. These include the consideration of all gene splice variants to enable either gene-specific (covering the majority of splice variants) or transcript-specific (covering one splice variant) expression profiling, primer specificity validation, automated best primer pair selection according to strict criteria and graphical visualization of the latter primer pairs within their genomic context. GETPrime primers have been extensively validated experimentally, demonstrating high transcript specificity in complex samples. Thus, the free-access, user-friendly GETPrime database allows fast primer retrieval and visualization for genes or groups of genes of most common model organisms, and is available at http://updepla1srv1.epfl.ch/getprime/. Database URL: http://deplanckelab.epfl.ch.",GETPrime,0.99608171,NA,0,GETPrime,0.99608171,1,NA,28053161,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,9/14/2011 +27242033,http://geve.med.u-tokai.ac.jp,"gEVE: a genome-based endogenous viral element database provides comprehensive viral protein-coding sequences in mammalian genomes. . In mammals, approximately 10% of genome sequences correspond to endogenous viral elements (EVEs), which are derived from ancient viral infections of germ cells. Although most EVEs have been inactivated, some open reading frames (ORFs) of EVEs obtained functions in the hosts. However, EVE ORFs usually remain unannotated in the genomes, and no databases are available for EVE ORFs. To investigate the function and evolution of EVEs in mammalian genomes, we developed EVE ORF databases for 20 genomes of 19 mammalian species. A total of 736,771 non-overlapping EVE ORFs were identified and archived in a database named gEVE (http://geve.med.u-tokai.ac.jp). The gEVE database provides nucleotide and amino acid sequences, genomic loci and functional annotations of EVE ORFs for all 20 genomes. In analyzing RNA-seq data with the gEVE database, we successfully identified the expressed EVE genes, suggesting that the gEVE database facilitates studies of the genomic analyses of various mammalian species.Database URL: http://geve.med.u-tokai.ac.jp.",gEVE,0.957576275,NA,0,gEVE,0.957576275,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/30/2016 +28090394,http://genome.sfu.ca/gexplore,"GExplore 1.4: An expanded web interface for queries on Caenorhabditis elegans protein and gene function. Genetic high-throughput experiments often result in hundreds or thousands of genes satisfying certain experimental conditions. Grouping and prioritizing a large number of genes for further analysis can be a time-consuming challenge. In 2009 we developed a web-based user interface, GExplore, to assist with large-scale data-mining related to gene function in Caenorhabditis elegans. The underlying database contained information about Caenorhabditis elegans genes and proteins including domain organization of the proteins, phenotypic descriptions, expression data and Gene Ontology Consortium annotations. These data enable users to quickly obtain an overview of biological and biochemical functions of a large number of genes at once. Since its inception the underlying database has been updated and expanded significantly. Here we describe the current version of GExplore 1.4, documenting the changes since the original release. GExplore 1.4 now contains information about the domain organization of the proteomes of 9 nematode species, can display the location of Caenorhabditis elegans mutations with respect to the domain organization of the proteins, and includes stage-specific RNAseq gene expression data generated by the modENCODE project. The underlying database has been reorganized to facilitate independent updates of the different parts of the database and to allow the addition of novel data sets in the future. The web interface is available under http://genome.sfu.ca/gexplore.",GExplore,0.892829657,NA,0,GExplore,0.892829657,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/19/2016 +23104379,http://www.glycanstructure.org,"Glycan fragment database: a database of PDB-based glycan 3D structures. The glycan fragment database (GFDB), freely available at http://www.glycanstructure.org, is a database of the glycosidic torsion angles derived from the glycan structures in the Protein Data Bank (PDB). Analogous to protein structure, the structure of an oligosaccharide chain in a glycoprotein, referred to as a glycan, can be characterized by the torsion angles of glycosidic linkages between relatively rigid carbohydrate monomeric units. Knowledge of accessible conformations of biologically relevant glycans is essential in understanding their biological roles. The GFDB provides an intuitive glycan sequence search tool that allows the user to search complex glycan structures. After a glycan search is complete, each glycosidic torsion angle distribution is displayed in terms of the exact match and the fragment match. The exact match results are from the PDB entries that contain the glycan sequence identical to the query sequence. The fragment match results are from the entries with the glycan sequence whose substructure (fragment) or entire sequence is matched to the query sequence, such that the fragment results implicitly include the influences from the nearby carbohydrate residues. In addition, clustering analysis based on the torsion angle distribution can be performed to obtain the representative structures among the searched glycan structures.",GFDB,0.991825804,glycan fragment database,0.877565131,GFDB,0.991825804,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/26/2012 +24137012,http://data.ggbn.org,"The Global Genome Biodiversity Network (GGBN) Data Portal. The Global Genome Biodiversity Network (GGBN) was formed in 2011 with the principal aim of making high-quality well-documented and vouchered collections that store DNA or tissue samples of biodiversity, discoverable for research through a networked community of biodiversity repositories. This is achieved through the GGBN Data Portal (http://data.ggbn.org), which links globally distributed databases and bridges the gap between biodiversity repositories, sequence databases and research results. Advances in DNA extraction techniques combined with next-generation sequencing technologies provide new tools for genome sequencing. Many ambitious genome sequencing projects with the potential to revolutionize biodiversity research consider access to adequate samples to be a major bottleneck in their workflow. This is linked not only to accelerating biodiversity loss and demands to improve conservation efforts but also to a lack of standardized methods for providing access to genomic samples. Biodiversity biobank-holding institutions urgently need to set a standard of collaboration towards excellence in collections stewardship, information access and sharing and responsible and ethical use of such collections. GGBN meets these needs by enabling and supporting accessibility and the efficient coordinated expansion of biodiversity biobanks worldwide.",GGBN,0.978382245,Global Genome Biodiversity Network,0.861103143,GGBN,0.978382245,1,NA,27694206,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/16/2013 +27694206,http://terms.tdwg.org/wiki/GGBN_Data_Standard,"The Global Genome Biodiversity Network (GGBN) Data Standard specification. . Genomic samples of non-model organisms are becoming increasingly important in a broad range of studies from developmental biology, biodiversity analyses, to conservation. Genomic sample definition, description, quality, voucher information and metadata all need to be digitized and disseminated across scientific communities. This information needs to be concise and consistent in today's ever-increasing bioinformatic era, for complementary data aggregators to easily map databases to one another. In order to facilitate exchange of information on genomic samples and their derived data, the Global Genome Biodiversity Network (GGBN) Data Standard is intended to provide a platform based on a documented agreement to promote the efficient sharing and usage of genomic sample material and associated specimen information in a consistent way. The new data standard presented here build upon existing standards commonly used within the community extending them with the capability to exchange data on tissue, environmental and DNA sample as well as sequences. The GGBN Data Standard will reveal and democratize the hidden contents of biodiversity biobanks, for the convenience of everyone in the wider biobanking community. Technical tools exist for data providers to easily map their databases to the standard.Database URL: http://terms.tdwg.org/wiki/GGBN_Data_Standard.",GGBN,0.873388514,Genome,0.565536976,GGBN,0.873388514,1,NA,24137012,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/2/2016 +33965348,http://animal.nwsuaf.edu.cn/GoatVar,"GGVD: A goat genome variation database for tracking the dynamic evolutionary process of selective signatures and ancient introgressions. Understanding the evolutionary history and adaptive process depends on the knowledge that we can acquire from both ancient and modern genomic data. With the availability of a deluge of whole-genome sequencing data from ancient and modern goat samples, a user-friendly database making efficient reuse of these important resources is needed. Here, we use the genomes of 208 modern domestic goats, 24 bezoars, 46 wild ibexes, and 82 ancient goats to present a comprehensive goat genome variation database (GGVD). GGVD hosts a total of ∼41.44 million SNPs, ∼5.14 million indels, 6,193 selected loci, and 112 introgression regions. Users can freely visualize the frequency of genomic variations in geographical maps, selective sweeps in interactive tables, Manhattan plots, or line charts, as well as the heatmap patterns of the SNP genotype. Ancient data can be shown in haplotypes to track the state of genetic variants of selection and introgression events in the early, middle, and late stages. For facilitating access to sequence features, the UCSC Genome Browser, BLAT, BLAST, LiftOver, and pcadapt are also integrated into GGVD. GGVD will be a convenient tool for population genetic studies and molecular marker designing in goat breeding programs, and it is publicly available at http://animal.nwsuaf.edu.cn/GoatVar.",GGVD,0.992996335,goat genome variation database,0.746109948,GGVD,0.992996335,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +34699529,http://gh19ed.biocatnet.de,"The GH19 Engineering Database: Sequence diversity, substrate scope, and evolution in glycoside hydrolase family 19. The glycoside hydrolase 19 (GH19) is a bifunctional family of chitinases and endolysins, which have been studied for the control of plant fungal pests, the recycle of chitin biomass, and the treatment of multi-drug resistant bacteria. The GH19 domain-containing sequences (22,461) were divided into a chitinase and an endolysin subfamily by analyzing sequence networks, guided by taxonomy and the substrate specificity of characterized enzymes. The chitinase subfamily was split into seventeen groups, thus extending the previous classification. The endolysin subfamily is more diverse and consists of thirty-four groups. Despite their sequence diversity, twenty-six residues are conserved in chitinases and endolysins, which can be distinguished by two specific sequence patterns at six and four positions, respectively. Their location outside the catalytic cleft suggests a possible mechanism for substrate specificity that goes beyond the direct interaction with the substrate. The evolution of the GH19 catalytic domain was investigated by large-scale phylogeny. The inferred evolutionary history and putative horizontal gene transfer events differ from previous works. While no clear patterns were detected in endolysins, chitinases varied in sequence length by up to four loop insertions, causing at least eight distinct presence/absence loop combinations. The annotated GH19 sequences and structures are accessible via the GH19 Engineering Database (GH19ED, https://gh19ed.biocatnet.de). The GH19ED has been developed to support the prediction of substrate specificity and the search for novel GH19 enzymes from neglected taxonomic groups or in regions of the sequence space where few sequences have been described yet.",GH19ED,0.938115016,NA,0,GH19ED,0.938115016,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/26/2021 +24336862,http://giga.nova.edu,"The Global Invertebrate Genomics Alliance (GIGA): developing community resources to study diverse invertebrate genomes. Over 95% of all metazoan (animal) species comprise the ""invertebrates,"" but very few genomes from these organisms have been sequenced. We have, therefore, formed a ""Global Invertebrate Genomics Alliance"" (GIGA). Our intent is to build a collaborative network of diverse scientists to tackle major challenges (e.g., species selection, sample collection and storage, sequence assembly, annotation, analytical tools) associated with genome/transcriptome sequencing across a large taxonomic spectrum. We aim to promote standards that will facilitate comparative approaches to invertebrate genomics and collaborations across the international scientific community. Candidate study taxa include species from Porifera, Ctenophora, Cnidaria, Placozoa, Mollusca, Arthropoda, Echinodermata, Annelida, Bryozoa, and Platyhelminthes, among others. GIGA will target 7000 noninsect/nonnematode species, with an emphasis on marine taxa because of the unrivaled phyletic diversity in the oceans. Priorities for selecting invertebrates for sequencing will include, but are not restricted to, their phylogenetic placement; relevance to organismal, ecological, and conservation research; and their importance to fisheries and human health. We highlight benefits of sequencing both whole genomes (DNA) and transcriptomes and also suggest policies for genomic-level data access and sharing based on transparency and inclusiveness. The GIGA Web site (http://giga.nova.edu) has been launched to facilitate this collaborative venture.",GIGA,0.974462986,The Global Invertebrate Genomics Alliance,0.78900679,GIGA,0.974462986,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2014 +24622612,http://www.gigadb.org,"GigaDB: promoting data dissemination and reproducibility. Often papers are published where the underlying data supporting the research are not made available because of the limitations of making such large data sets publicly and permanently accessible. Even if the raw data are deposited in public archives, the essential analysis intermediaries, scripts or software are frequently not made available, meaning the science is not reproducible. The GigaScience journal is attempting to address this issue with the associated data storage and dissemination portal, the GigaScience database (GigaDB). Here we present the current version of GigaDB and reveal plans for the next generation of improvements. However, most importantly, we are soliciting responses from you, the users, to ensure that future developments are focused on the data storage and dissemination issues that still need resolving. Database URL: http://www.gigadb.org.",GigaDB,0.98824966,NA,0,GigaDB,0.98824966,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/12/2014 +33045729,http://idrblab.org/gimica,"GIMICA: host genetic and immune factors shaping human microbiota. Besides the environmental factors having tremendous impacts on the composition of microbial community, the host factors have recently gained extensive attentions on their roles in shaping human microbiota. There are two major types of host factors: host genetic factors (HGFs) and host immune factors (HIFs). These factors of each type are essential for defining the chemical and physical landscapes inhabited by microbiota, and the collective consideration of both types have great implication to serve comprehensive health management. However, no database was available to provide the comprehensive factors of both types. Herein, a database entitled 'Host Genetic and Immune Factors Shaping Human Microbiota (GIMICA)' was constructed. Based on the 4257 microbes confirmed to inhabit nine sites of human body, 2851 HGFs (1368 single nucleotide polymorphisms (SNPs), 186 copy number variations (CNVs), and 1297 non-coding ribonucleic acids (RNAs)) modulating the expression of 370 microbes were collected, and 549 HIFs (126 lymphocytes and phagocytes, 387 immune proteins, and 36 immune pathways) regulating the abundance of 455 microbes were also provided. All in all, GIMICA enabled the collective consideration not only between different types of host factor but also between the host and environmental ones, which is freely accessible without login requirement at: https://idrblab.org/gimica/.",GIMICA,0.988819897,Host Genetic and Immune Factors Shaping Human Microbiota,0.927597477,GIMICA,0.988819897,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29649979,http://ginsengdb.snu.ac.kr,"Ginseng Genome Database: an open-access platform for genomics of Panax ginseng. BACKGROUND:The ginseng (Panax ginseng C.A. Meyer) is a perennial herbaceous plant that has been used in traditional oriental medicine for thousands of years. Ginsenosides, which have significant pharmacological effects on human health, are the foremost bioactive constituents in this plant. Having realized the importance of this plant to humans, an integrated omics resource becomes indispensable to facilitate genomic research, molecular breeding and pharmacological study of this herb. DESCRIPTION:The first draft genome sequences of P. ginseng cultivar ""Chunpoong"" were reported recently. Here, using the draft genome, transcriptome, and functional annotation datasets of P. ginseng, we have constructed the Ginseng Genome Database http://ginsengdb.snu.ac.kr /, the first open-access platform to provide comprehensive genomic resources of P. ginseng. The current version of this database provides the most up-to-date draft genome sequence (of approximately 3000 Mbp of scaffold sequences) along with the structural and functional annotations for 59,352 genes and digital expression of genes based on transcriptome data from different tissues, growth stages and treatments. In addition, tools for visualization and the genomic data from various analyses are provided. All data in the database were manually curated and integrated within a user-friendly query page. CONCLUSION:This database provides valuable resources for a range of research fields related to P. ginseng and other species belonging to the Apiales order as well as for plant research communities in general. Ginseng genome database can be accessed at http://ginsengdb.snu.ac.kr /.",NA,0,Ginseng Genome Database,0.915763418,Ginseng Genome Database,0.915763418,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/12/2018 +21609420,http://gisao.genome.tugraz.at,"GiSAO.db: a database for ageing research. Background Age-related gene expression patterns of Homo sapiens as well as of model organisms such as Mus musculus, Saccharomyces cerevisiae, Caenorhabditis elegans and Drosophila melanogaster are a basis for understanding the genetic mechanisms of ageing. For an effective analysis and interpretation of expression profiles it is necessary to store and manage huge amounts of data in an organized way, so that these data can be accessed and processed easily. Description GiSAO.db (Genes involved in senescence, apoptosis and oxidative stress database) is a web-based database system for storing and retrieving ageing-related experimental data. Expression data of genes and miRNAs, annotation data like gene identifiers and GO terms, orthologs data and data of follow-up experiments are stored in the database. A user-friendly web application provides access to the stored data. KEGG pathways were incorporated and links to external databases augment the information in GiSAO.db. Search functions facilitate retrieval of data which can also be exported for further processing. Conclusions We have developed a centralized database that is very well suited for the management of data for ageing research. The database can be accessed at https://gisao.genome.tugraz.at and all the stored data can be viewed with a guest account.",GiSAO.db,0.948274958,Genes,0.665788551,GiSAO.db,0.948274958,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/24/2011 +25971743,http://zhanglab.ccmb.med.umich.edu/GLASS,"GLASS: a comprehensive database for experimentally validated GPCR-ligand associations. Motivation G protein-coupled receptors (GPCRs) are probably the most attractive drug target membrane proteins, which constitute nearly half of drug targets in the contemporary drug discovery industry. While the majority of drug discovery studies employ existing GPCR and ligand interactions to identify new compounds, there remains a shortage of specific databases with precisely annotated GPCR-ligand associations. Results We have developed a new database, GLASS, which aims to provide a comprehensive, manually curated resource for experimentally validated GPCR-ligand associations. A new text-mining algorithm was proposed to collect GPCR-ligand interactions from the biomedical literature, which is then crosschecked with five primary pharmacological datasets, to enhance the coverage and accuracy of GPCR-ligand association data identifications. A special architecture has been designed to allow users for making homologous ligand search with flexible bioactivity parameters. The current database contains ∼500 000 unique entries, of which the vast majority stems from ligand associations with rhodopsin- and secretin-like receptors. The GLASS database should find its most useful application in various in silico GPCR screening and functional annotation studies. Availability and implementation The website of GLASS database is freely available at http://zhanglab.ccmb.med.umich.edu/GLASS/. Contact zhng@umich.edu Supplementary information Supplementary data are available at Bioinformatics online.",GLASS,0.917461634,NA,0,GLASS,0.917461634,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/13/2015 +30329036,http://genelab.nasa.gov,"GeneLab: Omics database for spaceflight experiments. Motivation To curate and organize expensive spaceflight experiments conducted aboard space stations and maximize the scientific return of investment, while democratizing access to vast amounts of spaceflight related omics data generated from several model organisms. Results The GeneLab Data System (GLDS) is an open access database containing fully coordinated and curated 'omics' (genomics, transcriptomics, proteomics, metabolomics) data, detailed metadata and radiation dosimetry for a variety of model organisms. GLDS is supported by an integrated data system allowing federated search across several public bioinformatics repositories. Archived datasets can be queried using full-text search (e.g. keywords, Boolean and wildcards) and results can be sorted in multifactorial manner using assistive filters. GLDS also provides a collaborative platform built on GenomeSpace for sharing files and analyses with collaborators. It currently houses 172 datasets and supports standard guidelines for submission of datasets, MIAME (for microarray), ENCODE Consortium Guidelines (for RNA-seq) and MIAPE Guidelines (for proteomics). Availability and implementation https://genelab.nasa.gov/.",GLDS,0.96340251,NA,0,GLDS,0.96340251,1,"29652620.0, 33080015.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,5/1/2019 +31811943,http://bigd.big.ac.cn/gliomaDB,"GliomaDB: A Web Server for Integrating Glioma Omics Data and Interactive Analysis. Gliomas are one of the most common types of brain cancers. Numerous efforts have been devoted to studying the mechanisms of glioma genesis and identifying biomarkers for diagnosis and treatment. To help further investigations, we present a comprehensive database named GliomaDB. GliomaDB includes 21,086 samples from 4303 patients and integrates genomic, transcriptomic, epigenomic, clinical, and gene-drug association data regarding glioblastoma multiforme (GBM) and low-grade glioma (LGG) from The Cancer Genome Atlas (TCGA), Gene Expression Omnibus (GEO), the Chinese Glioma Genome Atlas (CGGA), the Memorial Sloan Kettering Cancer Center Integrated Mutation Profiling of Actionable Cancer Targets (MSK-IMPACT), the US Food and Drug Administration (FDA), and PharmGKB. GliomaDB offers a user-friendly interface for two main types of functionalities. The first comprises queries of (i) somatic mutations, (ii) gene expression, (iii) microRNA (miRNA) expression, and (iv) DNA methylation. In addition, queries can be executed at the gene, region, and base level. Second, GliomaDB allows users to perform survival analysis, coexpression network visualization, multi-omics data visualization, and targeted drug recommendations based on personalized variations. GliomaDB bridges the gap between glioma genomics big data and the delivery of integrated information for end users, thus enabling both researchers and clinicians to effectively use publicly available data and empowering the progression of precision medicine in glioma. GliomaDB is freely accessible at http://bigd.big.ac.cn/gliomaDB.",GliomaDB,0.996975422,NA,0,GliomaDB,0.996975422,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2019 +24377417,http://gcm.wfcc.info,"Global catalogue of microorganisms (gcm): a comprehensive database and information retrieval, analysis, and visualization system for microbial resources. Background Throughout the long history of industrial and academic research, many microbes have been isolated, characterized and preserved (whenever possible) in culture collections. With the steady accumulation in observational data of biodiversity as well as microbial sequencing data, bio-resource centers have to function as data and information repositories to serve academia, industry, and regulators on behalf of and for the general public. Hence, the World Data Centre for Microorganisms (WDCM) started to take its responsibility for constructing an effective information environment that would promote and sustain microbial research data activities, and bridge the gaps currently present within and outside the microbiology communities. Description Strain catalogue information was collected from collections by online submission. We developed tools for automatic extraction of strain numbers and species names from various sources, including Genbank, Pubmed, and SwissProt. These new tools connect strain catalogue information with the corresponding nucleotide and protein sequences, as well as to genome sequence and references citing a particular strain. All information has been processed and compiled in order to create a comprehensive database of microbial resources, and was named Global Catalogue of Microorganisms (GCM). The current version of GCM contains information of over 273,933 strains, which includes 43,436 bacterial, fungal and archaea species from 52 collections in 25 countries and regions.A number of online analysis and statistical tools have been integrated, together with advanced search functions, which should greatly facilitate the exploration of the content of GCM. Conclusion A comprehensive dynamic database of microbial resources has been created, which unveils the resources preserved in culture collections especially for those whose informatics infrastructures are still under development, which should foster cumulative research, facilitating the activities of microbiologists world-wide, who work in both public and industrial research centres. This database is available from http://gcm.wfcc.info.",GCM,0.755801558,Global catalogue of microorganisms,0.813462162,Global catalogue of microorganisms,0.813462162,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/30/2013 +32661237,http://globalfungi.com,"GlobalFungi, a global database of fungal occurrences from high-throughput-sequencing metabarcoding studies. Fungi are key players in vital ecosystem services, spanning carbon cycling, decomposition, symbiotic associations with cultivated and wild plants and pathogenicity. The high importance of fungi in ecosystem processes contrasts with the incompleteness of our understanding of the patterns of fungal biogeography and the environmental factors that drive those patterns. To reduce this gap of knowledge, we collected and validated data published on the composition of soil fungal communities in terrestrial environments including soil and plant-associated habitats and made them publicly accessible through a user interface at https://globalfungi.com . The GlobalFungi database contains over 600 million observations of fungal sequences across > 17 000 samples with geographical locations and additional metadata contained in 178 original studies with millions of unique nucleotide sequences (sequence variants) of the fungal internal transcribed spacers (ITS) 1 and 2 representing fungal species and genera. The study represents the most comprehensive atlas of global fungal distribution, and it is framed in such a way that third-party data addition is possible.",GlobalFungi,0.989663839,NA,0,GlobalFungi,0.989663839,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/13/2020 +25753716,http://www.glyco3d.cermav.cnrs.fr,"Glyco3D: a portal for structural glycosciences. The present work describes, in a detailed way, a family of databases covering the three-dimensional features of monosaccharides, disaccharides, oligosaccharides, polysaccharides, glycosyltransferases, lectins, monoclonal antibodies against carbohydrates, and glycosaminoglycan-binding proteins. These databases have been developed with non-proprietary software, and they are open freely to the scientific community. They are accessible through the common portal called ""Glyco3D"" http://www.glyco3d.cermav.cnrs.fr. The databases are accompanied by a user-friendly graphical user interface (GUI) which offers several search options. All three-dimensional structures are available for visual consultations (with basic measurements possibilities) and can be downloaded in commonly used formats for further uses.",Glyco3D,0.945883989,NA,0,Glyco3D,0.945883989,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2015 +22847935,http://glycosciences.de/glycocd/index.php,GlycoCD: a repository for carbohydrate-related CD antigens. Summary The open access comprehensive GlycoCD database application is for representation and retrieval of carbohydrate-related clusters of differentiation (CDs). The main objective of this database platform is to provide information about interactions of carbohydrate moieties with proteins that are important for identification of specific cell surface molecule with a focus on the integration of data from carbohydrate microarray databases. GlycoCD database comprises two sections: the carbohydrate recognition CD and glycan CD. It allows easy access through a user-friendly web interface to all carbohydrate-defined CDs and those that interact with carbohydrates along with other relevant information. Availability The database is freely available at http://glycosciences.de/glycocd/index.php Contact r.s-albiez@dkfz.de.,GlycoCD,0.997937024,NA,0,GlycoCD,0.997937024,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/30/2012 +21591763,http://betenbaugh.jhu.edu/GlycoFish,"GlycoFish: a database of zebrafish N-linked glycoproteins identified using SPEG method coupled with LC/MS. Zebrafish (Danio rerio) is a model organism that is used to study the mechanisms and pathways of human disorders. Many dysfunctions in neurological, development, and neuromuscular systems are due to glycosylation deficiencies, but the glycoproteins involved in zebrafish embryonic development have not been established. In this study, a mass spectrometry-based glycoproteomic characterization of zebrafish embryos was performed to identify the N-linked glycoproteins and N-linked glycosylation sites. To increase the number of glycopeptides, proteins from zebrafish were digested with two different proteases--chymotrypsin and trypsin--into peptides of different length. The N-glycosylated peptides of zebrafish were then captured by the solid-phase extraction of N-linked glycopeptides (SPEG) method and the peptides were identified with an LTQ OrbiTrap Velos mass spectrometer. From 265 unique glycopeptides, including 269 consensus NXT/S glycosites, we identified 169 different N-glycosylated proteins. The identified glycoproteins were highly abundant in proteins belonging to the transporter, cell adhesion, and ion channel/ion binding categories, which are important to embryonic, organ, and central nervous system development. This proteomics data will expand our knowledge about glycoproteins in zebrafish and may be used to elucidate the role that glycosylation plays in cellular processes and disease. The glycoprotein data are available through the GlycoFish database (http://betenbaugh.jhu.edu/GlycoFish) introduced in this paper.",GlycoFish,0.797046423,NA,0,GlycoFish,0.797046423,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/8/2011 +27436239,http://apps.connexios.com/glycogait,"GlycoGAIT: A web database to browse glycogenes and lectins under gastric inflammatory diseases. The perplexing nature of dynamic glycosylation modification plays imperative role in determining the regulatory role of key glycoconjugates involved in immune system. Systematic analysis of change in expression pattern of glycogenes and lectins can bring in a comprehensive understanding of genetic basis of the glycobiological changes occurring in pathological condition. Advancement in the field of glycobiology has capacitated the process of linking gene expression changes of glycogenes with its biological function. This instigated us to systematically analyze changes in expression patterns focusing on glycome genomics under diverse gastrointestinal immune dysfunction background. To necessitate this, as a pilot project, we carefully integrated several publically available databases to construct a glycosylation process associated gene set as well as public expression microarray data associated with gastrointestinal infections into an online database called Glycosylation and Gut Associated Immune Tolerance (GlycoGAIT). Currently the database comprises of 548 well characterized genes belonging to glycogenes and lectins along with gene expression data obtained from human biopsy samples under both H. pylori infection and inflammatory bowel disease (IBD) condition. The user-friendly interface enables the users to quickly compare and interpret changes in expression patterns of glycome genomics under different gut associated inflammatory conditions. The database is available online at: https://apps.connexios.com/glycogait/.",GlycoGAIT,0.980752434,Glycosylation and Gut Associated Immune,0.728825476,GlycoGAIT,0.980752434,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/18/2016 +26314736,http://www.glycomob.org,"GlycoMob: an ion mobility-mass spectrometry collision cross section database for glycomics. Ion mobility mass spectrometry (IM-MS) is a promising analytical technique for glycomics that separates glycan ions based on their collision cross section (CCS) and provides glycan precursor and fragment masses. It has been shown that isomeric oligosaccharide species can be separated by IM and identified on basis of their CCS and fragmentation. These results indicate that adding CCSs information for glycans and glycan fragments to searchable databases and analysis pipelines will increase identification confidence and accuracy. We have developed a freely accessible database, GlycoMob ( http://www.glycomob.org ), containing over 900 CCSs values of glycans, oligosaccharide standards and their fragments that will be continually updated. We have measured the absolute CCSs of calibration standards, biologically derived and synthetic N-glycans ionized with various adducts in positive and negative mode or as protonated (positive ion) and deprotonated (negative ion) ions.",GlycoMob,0.989852786,NA,0,GlycoMob,0.989852786,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/28/2015 +33174597,http://glycopost.glycosmos.org,"GlycoPOST realizes FAIR principles for glycomics mass spectrometry data. For the reproducibility and sustainability of scientific research, FAIRness (Findable, Accessible, Interoperable and Re-usable), with respect to the release of raw data obtained by researchers, is one of the most important principles underpinning the future of open science. In genomics and transcriptomics, the sharing of raw data from next-generation sequencers is made possible through public repositories. In addition, in proteomics, the deposition of raw data from mass spectrometry (MS) experiments into repositories is becoming standardized. However, a standard repository for such MS data had not yet been established in glycomics. With the increasing number of glycomics MS data, therefore, we have developed GlycoPOST (https://glycopost.glycosmos.org/), a repository for raw MS data generated from glycomics experiments. In just the first year since the release of GlycoPOST, 73 projects have already been registered by researchers around the world, and the number of registered projects is continuously growing, making a significant contribution to the future FAIRness of the glycomics field. GlycoPOST is a free resource to the community and accepts (and will continue to accept in the future) raw data regardless of vendor-specific formats.",GlycoPOST,0.993143857,NA,0,GlycoPOST,0.993143857,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +30357361,http://www.glycosciences.de/database,"Glycosciences.DB: an annotated data collection linking glycomics and proteomics data (2018 update). Glycosciences.DB, the glycan structure database of the Glycosciences.de portal, collects various kinds of data on glycan structures, including carbohydrate moieties from worldwide Protein Data Bank (wwPDB) structures. This way it forms a bridge between glycomics and proteomics resources. A major update of this database combines a redesigned web interface with a series of new functions. These include separate entry pages not only for glycan structures but also for literature references and wwPDB entries, improved substructure search options, a newly available keyword search covering all types of entries in one query, and new types of information that is added to glycan structures. These new features are described in detail in this article, and options how users can provide information to the database are discussed as well. Glycosciences.DB is available at http://www.glycosciences.de/database/ and can be freely accessed.",Glycosciences.DB,0.974128564,NA,0,Glycosciences.DB,0.974128564,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +31516400,http://nglycositeatlas.biomarkercenter.org,"N-GlycositeAtlas: a database resource for mass spectrometry-based human N-linked glycoprotein and glycosylation site mapping. Background N-linked glycoprotein is a highly interesting class of proteins for clinical and biological research. The large-scale characterization of N-linked glycoproteins accomplished by mass spectrometry-based glycoproteomics has provided valuable insights into the interdependence of glycoprotein structure and protein function. However, these studies focused mainly on the analysis of specific sample type, and lack the integration of glycoproteomic data from different tissues, body fluids or cell types. Methods In this study, we collected the human glycosite-containing peptides identified through their de-glycosylated forms by mass spectrometry from over 100 publications and unpublished datasets generated from our laboratory. A database resource termed N-GlycositeAtlas was created and further used for the distribution analyses of glycoproteins among different human cells, tissues and body fluids. Finally, a web interface of N-GlycositeAtlas was created to maximize the utility and value of the database. Results The N-GlycositeAtlas database contains more than 30,000 glycosite-containing peptides (representing > 14,000 N-glycosylation sites) from more than 7200 N-glycoproteins from different biological sources including human-derived tissues, body fluids and cell lines from over 100 studies. Conclusions The entire human N-glycoproteome database as well as 22 sub-databases associated with individual tissues or body fluids can be downloaded from the N-GlycositeAtlas website at http://nglycositeatlas.biomarkercenter.org.",GlycositeAtlas,0.808731914,NA,0,GlycositeAtlas,0.808731914,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,9/7/2019 +31841142,http://www.glycanstructure.org/glymdb,"GlyMDB: Glycan Microarray Database and analysis toolset. Motivation Glycan microarrays are capable of illuminating the interactions of glycan-binding proteins (GBPs) against hundreds of defined glycan structures, and have revolutionized the investigations of protein-carbohydrate interactions underlying numerous critical biological activities. However, it is difficult to interpret microarray data and identify structural determinants promoting glycan binding to glycan-binding proteins due to the ambiguity in microarray fluorescence intensity and complexity in branched glycan structures. To facilitate analysis of glycan microarray data alongside protein structure, we have built the Glycan Microarray Database (GlyMDB), a web-based resource including a searchable database of glycan microarray samples and a toolset for data/structure analysis. Results The current GlyMDB provides data visualization and glycan-binding motif discovery for 5203 glycan microarray samples collected from the Consortium for Functional Glycomics. The unique feature of GlyMDB is to link microarray data to PDB structures. The GlyMDB provides different options for database query, and allows users to upload their microarray data for analysis. After search or upload is complete, users can choose the criterion for binder versus non-binder classification. They can view the signal intensity graph including the binder/non-binder threshold followed by a list of glycan-binding motifs. One can also compare the fluorescence intensity data from two different microarray samples. A protein sequence-based search is performed using BLAST to match microarray data with all available PDB structures containing glycans. The glycan ligand information is displayed, and links are provided for structural visualization and redirection to other modules in GlycanStructure.ORG for further investigation of glycan-binding sites and glycan structures. Availability and implementation http://www.glycanstructure.org/glymdb. Supplementary information Supplementary data are available at Bioinformatics online.",GlyMDB,0.990882347,Glycan Microarray Database,0.747555542,GlyMDB,0.990882347,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2020 +34314492,http://data.glygen.org,"Enhancing the interoperability of glycan data flow between ChEBI, PubChem and GlyGen. Glycans play a vital role in health, disease, bioenergy, biomaterials and bio-therapeutics. As a result, there is keen interest to identify and increase glycan data in bioinformatics databases like ChEBI and PubChem, and connecting them to resources at the EMBL-EBI and NCBI to facilitate access to important annotations at a global level. GlyTouCan is a comprehensive archival database that contains glycans obtained primarily through batch upload from glycan repositories, glycoprotein databases and individual laboratories. In many instances, the glycan structures deposited in GlyTouCan may not be fully defined or have supporting experimental evidence and citations. Databases like ChEBI and PubChem were designed to accommodate complete atomistic structures with well-defined chemical linkages. As a result, they cannot easily accommodate the structural ambiguity inherent in glycan databases. Consequently, there is a need to improve the organization of glycan data coherently to enhance connectivity across the major NCBI, EMBL-EBI and glycoscience databases. This paper outlines a workflow developed in collaboration between GlyGen, ChEBI and PubChem to improve the visibility and connectivity of glycan data across these resources. GlyGen hosts a subset of glycans (~29,000) from the GlyTouCan database and has submitted valuable glycan annotations to the PubChem database and integrated over 10,500 (including ambiguously defined) glycans into the ChEBI database. The integrated glycans were prioritized based on links to PubChem and connectivity to glycoprotein data. The pipeline provides a blueprint for how glycan data can be harmonized between different resources. The current PubChem, ChEBI and GlyTouCan mappings can be downloaded from GlyGen (https://data.glygen.org).",GlyTouCan,0.997777939,NA,0,GlyTouCan,0.997777939,1,NA,"26476458.0, 33125071.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/1/2021 +"26476458, 33125071",http://glytoucan.org,"GlyTouCan 1.0--The international glycan structure repository. Glycans are known as the third major class of biopolymers, next to DNA and proteins. They cover the surfaces of many cells, serving as the 'face' of cells, whereby other biomolecules and viruses interact. The structure of glycans, however, differs greatly from DNA and proteins in that they are branched, as opposed to linear sequences of amino acids or nucleotides. Therefore, the storage of glycan information in databases, let alone their curation, has been a difficult problem. This has caused many duplicated efforts when integration is attempted between different databases, making an international repository for glycan structures, where unique accession numbers are assigned to every identified glycan structure, necessary. As such, an international team of developers and glycobiologists have collaborated to develop this repository, called GlyTouCan and is available at http://glytoucan.org/, to provide a centralized resource for depositing glycan structures, compositions and topologies, and to retrieve accession numbers for each of these registered entries. This will thus enable researchers to reference glycan structures simply by accession number, as opposed to by chemical structure, which has been a burden to integrate glycomics databases in the past.",GlyTouCan,0.994329453,NA,0,GlyTouCan,0.994329453,2,NA,34314492,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +25084271,http://biotechlab.fudan.edu.cn/database/gmenzy,"GMEnzy: a genetically modified enzybiotic database. GMEs are genetically modified enzybiotics created through molecular engineering approaches to deal with the increasing problem of antibiotic resistance prevalence. We present a fully manually curated database, GMEnzy, which focuses on GMEs and their design strategies, production and purification methods, and biological activity data. GMEnzy collects and integrates all available GMEs and their related information into one web based database. Currently GMEnzy holds 186 GMEs from published literature. The GMEnzy interface is easy to use, and allows users to rapidly retrieve data according to desired search criteria. GMEnzy's construction will increase the efficiency and convenience of improving these bioactive proteins for specific requirements, and will expand the arsenal available for researches to control drug-resistant pathogens. This database will prove valuable for researchers interested in genetically modified enzybiotics studies. GMEnzy is freely available on the Web at http://biotechlab.fudan.edu.cn/database/gmenzy/.",GMEnzy,0.996969402,NA,0,GMEnzy,0.996969402,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2014 +31504765,http://gmrepo.humangut.info,"GMrepo: a database of curated and consistently annotated human gut metagenomes. GMrepo (data repository for Gut Microbiota) is a database of curated and consistently annotated human gut metagenomes. Its main purpose is to facilitate the reusability and accessibility of the rapidly growing human metagenomic data. This is achieved by consistently annotating the microbial contents of collected samples using state-of-art toolsets and by manual curation of the meta-data of the corresponding human hosts. GMrepo organizes the collected samples according to their associated phenotypes and includes all possible related meta-data such as age, sex, country, body-mass-index (BMI) and recent antibiotics usage. To make relevant information easier to access, GMrepo is equipped with a graphical query builder, enabling users to make customized, complex and biologically relevant queries. For example, to find (1) samples from healthy individuals of 18 to 25 years old with BMIs between 18.5 and 24.9, or (2) projects that are related to colorectal neoplasms, with each containing >100 samples and both patients and healthy controls. Precomputed species/genus relative abundances, prevalence within and across phenotypes, and pairwise co-occurrence information are all available at the website and accessible through programmable interfaces. So far, GMrepo contains 58 903 human gut samples/runs (including 17 618 metagenomes and 41 285 amplicons) from 253 projects concerning 92 phenotypes. GMrepo is freely available at: https://gmrepo.humangut.info.",GMrepo,0.998026729,data repository for Gut Microbiota,0.834152177,GMrepo,0.998026729,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24767249,http://mtb.dobzhanskycenter.org,"Genome-wide Mycobacterium tuberculosis variation (GMTV) database: a new tool for integrating sequence variations and epidemiology. Background Tuberculosis (TB) poses a worldwide threat due to advancing multidrug-resistant strains and deadly co-infections with Human immunodeficiency virus. Today large amounts of Mycobacterium tuberculosis whole genome sequencing data are being assessed broadly and yet there exists no comprehensive online resource that connects M. tuberculosis genome variants with geographic origin, with drug resistance or with clinical outcome. Description Here we describe a broadly inclusive unifying Genome-wide Mycobacterium tuberculosis Variation (GMTV) database, (http://mtb.dobzhanskycenter.org) that catalogues genome variations of M. tuberculosis strains collected across Russia. GMTV contains a broad spectrum of data derived from different sources and related to M. tuberculosis molecular biology, epidemiology, TB clinical outcome, year and place of isolation, drug resistance profiles and displays the variants across the genome using a dedicated genome browser. GMTV database, which includes 1084 genomes and over 69,000 SNP or Indel variants, can be queried about M. tuberculosis genome variation and putative associations with drug resistance, geographical origin, and clinical stages and outcomes. Conclusions Implementation of GMTV tracks the pattern of changes of M. tuberculosis strains in different geographical areas, facilitates disease gene discoveries associated with drug resistance or different clinical sequelae, and automates comparative genomic analyses among M. tuberculosis strains.",GMTV,0.963946044,Genome-wide Mycobacterium tuberculosis Variation,0.959191367,GMTV,0.963946044,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/25/2014 +34859531,http://gnomad.broadinstitute.org,"Variant interpretation using population databases: Lessons from gnomAD. Reference population databases are an essential tool in variant and gene interpretation. Their use guides the identification of pathogenic variants amidst the sea of benign variation present in every human genome, and supports the discovery of new disease-gene relationships. The Genome Aggregation Database (gnomAD) is currently the largest and most widely used publicly available collection of population variation from harmonized sequencing data. The data is available through the online gnomAD browser (https://gnomad.broadinstitute.org/) that enables rapid and intuitive variant analysis. This review provides guidance on the content of the gnomAD browser, and its usage for variant and gene interpretation. We introduce key features including allele frequency, per-base expression levels, constraint scores, and variant co-occurrence, alongside guidance on how to use these in analysis, with a focus on the interpretation of candidate variants and novel genes in rare disease.",gnomAD,0.988560289,Genome Aggregation Database,0.841847384,gnomAD,0.988560289,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/16/2021 +27504778,http://gnps.ucsd.edu,"Sharing and community curation of mass spectrometry data with Global Natural Products Social Molecular Networking. The potential of the diverse chemistries present in natural products (NP) for biotechnology and medicine remains untapped because NP databases are not searchable with raw data and the NP community has no way to share data other than in published papers. Although mass spectrometry (MS) techniques are well-suited to high-throughput characterization of NP, there is a pressing need for an infrastructure to enable sharing and curation of data. We present Global Natural Products Social Molecular Networking (GNPS; http://gnps.ucsd.edu), an open-access knowledge base for community-wide organization and sharing of raw, processed or identified tandem mass (MS/MS) spectrometry data. In GNPS, crowdsourced curation of freely available community-wide reference MS libraries will underpin improved annotations. Data-driven social-networking should facilitate identification of spectra and foster collaborations. We also introduce the concept of 'living data' through continuous reanalysis of deposited data.",GNPS,0.987443785,Global Natural Products Social Molecular Networking,0.953693228,GNPS,0.987443785,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2016 +"27899567, 30395331",http://geneontology.org,"Expansion of the Gene Ontology knowledgebase and resources. The Gene Ontology (GO) is a comprehensive resource of computable knowledge regarding the functions of genes and gene products. As such, it is extensively used by the biomedical research community for the analysis of -omics and related data. Our continued focus is on improving the quality and utility of the GO resources, and we welcome and encourage input from researchers in all areas of biology. In this update, we summarize the current contents of the GO knowledgebase, and present several new features and improvements that have been made to the ontology, the annotations and the tools. Among the highlights are 1) developments that facilitate access to, and application of, the GO knowledgebase, and 2) extensions to the resource as well as increasing support for descriptions of causal models of biological systems and network biology. To learn more, visit http://geneontology.org/.",GO,0.940556675,Ontology resource,0.794861913,GO,0.940556675,2,33290552,25428369,low_prob_best_name,do not remove,merge on record with best name prob,conflicting record(s) to be removed,NA,NA,NA,1/1/2019 +25428369,http://www.geneontology.org,"Gene Ontology Consortium: going forward. The Gene Ontology (GO; http://www.geneontology.org) is a community-based bioinformatics resource that supplies information about gene product function using ontologies to represent biological knowledge. Here we describe improvements and expansions to several branches of the ontology, as well as updates that have allowed us to more efficiently disseminate the GO and capture feedback from the research community. The Gene Ontology Consortium (GOC) has expanded areas of the ontology such as cilia-related terms, cell-cycle terms and multicellular organism processes. We have also implemented new tools for generating ontology terms based on a set of logical rules making use of templates, and we have made efforts to increase our use of logical definitions. The GOC has a new and improved web site summarizing new developments and documentation, serving as a portal to GO data. Users can perform GO enrichment analysis, and search the GO for terms, annotations to gene products, and associated metadata across multiple species using the all-new AmiGO 2 browser. We encourage and welcome the input of the research community in all biological areas in our continued effort to improve the Gene Ontology.",GO,0.802682489,The,0.355363667,GO,0.802682489,1,"22102568.0, 23161678.0","27899567.0, 30395331.0",low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,11/26/2014 +"22123736, 25378336",http://www.ebi.ac.uk/GOA,"The UniProt-GO Annotation database in 2011. The GO annotation dataset provided by the UniProt Consortium (GOA: http://www.ebi.ac.uk/GOA) is a comprehensive set of evidenced-based associations between terms from the Gene Ontology resource and UniProtKB proteins. Currently supplying over 100 million annotations to 11 million proteins in more than 360,000 taxa, this resource has increased 2-fold over the last 2 years and has benefited from a wealth of checks to improve annotation correctness and consistency as well as now supplying a greater information content enabled by GO Consortium annotation format developments. Detailed, manual GO annotations obtained from the curation of peer-reviewed papers are directly contributed by all UniProt curators and supplemented with manual and electronic annotations from 36 model organism and domain-focused scientific resources. The inclusion of high-quality, automatic annotation predictions ensures the UniProt GO annotation dataset supplies functional information to a wide range of proteins, including those from poorly characterized, non-model organism species. UniProt GO annotations are freely available in a range of formats accessible by both file downloads and web-based views. In addition, the introduction of a new, normalized file format in 2010 has made for easier handling of the complete UniProt-GOA data set.",GOA,0.988438288,Gene Ontology Annotation,0.705664259,GOA,0.988438288,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2014 +25189782,http://mygoblet.org/training-portal,"The GOBLET training portal: a global repository of bioinformatics training materials, courses and trainers. Summary Rapid technological advances have led to an explosion of biomedical data in recent years. The pace of change has inspired new collaborative approaches for sharing materials and resources to help train life scientists both in the use of cutting-edge bioinformatics tools and databases and in how to analyse and interpret large datasets. A prototype platform for sharing such training resources was recently created by the Bioinformatics Training Network (BTN). Building on this work, we have created a centralized portal for sharing training materials and courses, including a catalogue of trainers and course organizers, and an announcement service for training events. For course organizers, the portal provides opportunities to promote their training events; for trainers, the portal offers an environment for sharing materials, for gaining visibility for their work and promoting their skills; for trainees, it offers a convenient one-stop shop for finding suitable training resources and identifying relevant training events and activities locally and worldwide. Availability and implementation http://mygoblet.org/training-portal.",GOBLET,0.625621259,NA,0,GOBLET,0.625621259,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/4/2014 +33290552,http://geneontology.org,"The Gene Ontology resource: enriching a GOld mine. The Gene Ontology Consortium (GOC) provides the most comprehensive resource currently available for computable knowledge regarding the functions of genes and gene products. Here, we report the advances of the consortium over the past two years. The new GO-CAM annotation framework was notably improved, and we formalized the model with a computational schema to check and validate the rapidly increasing repository of 2838 GO-CAMs. In addition, we describe the impacts of several collaborations to refine GO and report a 10% increase in the number of GO annotations, a 25% increase in annotated gene products, and over 9,400 new scientific articles annotated. As the project matures, we continue our efforts to review older annotations in light of newer findings, and, to maintain consistency with other ontologies. As a result, 20 000 annotations derived from experimental data were reviewed, corresponding to 2.5% of experimental GO annotations. The website (http://geneontology.org) was redesigned for quick access to documentation, downloads and tools. To maintain an accurate resource and support traceability and reproducibility, we have made available a historical archive covering the past 15 years of GO data with a consistent format and file structure for both the ontology and annotations.",GOC,0.980871697,Gene Ontology resource,0.604975864,GOC,0.980871697,1,"27899567.0, 30395331.0",23161678,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +23161678,http://www.geneontology.org,"Gene Ontology annotations and resources. The Gene Ontology (GO) Consortium (GOC, http://www.geneontology.org) is a community-based bioinformatics resource that classifies gene product function through the use of structured, controlled vocabularies. Over the past year, the GOC has implemented several processes to increase the quantity, quality and specificity of GO annotations. First, the number of manual, literature-based annotations has grown at an increasing rate. Second, as a result of a new 'phylogenetic annotation' process, manually reviewed, homology-based annotations are becoming available for a broad range of species. Third, the quality of GO annotations has been improved through a streamlined process for, and automated quality checks of, GO annotations deposited by different annotation groups. Fourth, the consistency and correctness of the ontology itself has increased by using automated reasoning tools. Finally, the GO has been expanded not only to cover new areas of biology through focused interaction with experts, but also to capture greater specificity in all areas of the ontology using tools for adding new combinatorial terms. The GOC works closely with other ontology developers to support integrated use of terminologies. The GOC supports its user community through the use of e-mail lists, social media and web-based resources.",GOC,0.951027632,Ontology,0.711801529,GOC,0.951027632,1,"22102568.0, 25428369.0",33290552,low_prob_best_name,do not remove,conflicting record(s) to be removed,"merge all ""dup name"" IDs",NA,NA,NA,11/17/2012 +"27794040, 30357420, 33152092",http://gold.jgi.doe.gov,"Genomes OnLine Database (GOLD) v.6: data updates and feature enhancements. The Genomes Online Database (GOLD) (https://gold.jgi.doe.gov) is a manually curated data management system that catalogs sequencing projects with associated metadata from around the world. In the current version of GOLD (v.6), all projects are organized based on a four level classification system in the form of a Study, Organism (for isolates) or Biosample (for environmental samples), Sequencing Project and Analysis Project. Currently, GOLD provides information for 26 117 Studies, 239 100 Organisms, 15 887 Biosamples, 97 212 Sequencing Projects and 78 579 Analysis Projects. These are integrated with over 312 metadata fields from which 58 are controlled vocabularies with 2067 terms. The web interface facilitates submission of a diverse range of Sequencing Projects (such as isolate genome, single-cell genome, metagenome, metatranscriptome) and complex Analysis Projects (such as genome from metagenome, or combined assembly from multiple Sequencing Projects). GOLD provides a seamless interface with the Integrated Microbial Genomes (IMG) system and supports and promotes the Genomic Standards Consortium (GSC) Minimum Information standards. This paper describes the data updates and additional features added during the last two years.",GOLD,0.995036284,Genomes OnLine Database,0.965856183,GOLD,0.995036284,3,NA,"22135293.0, 25348402.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +"22135293, 25348402",http://www.genomesonline.org,"The Genomes OnLine Database (GOLD) v.4: status of genomic and metagenomic projects and their associated metadata. The Genomes OnLine Database (GOLD, http://www.genomesonline.org/) is a comprehensive resource for centralized monitoring of genome and metagenome projects worldwide. Both complete and ongoing projects, along with their associated metadata, can be accessed in GOLD through precomputed tables and a search page. As of September 2011, GOLD, now on version 4.0, contains information for 11,472 sequencing projects, of which 2907 have been completed and their sequence data has been deposited in a public repository. Out of these complete projects, 1918 are finished and 989 are permanent drafts. Moreover, GOLD contains information for 340 metagenome studies associated with 1927 metagenome samples. GOLD continues to expand, moving toward the goal of providing the most comprehensive repository of metadata information related to the projects and their organisms/environments in accordance with the Minimum Information about any (x) Sequence specification and beyond.",GOLD,0.951975763,Genomes OnLine Database,0.946679926,GOLD,0.951975763,2,NA,"27794040.0, 30357420.0, 33152092.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/27/2014 +23813641,http://gosyn.bioapps.biozentrum.uni-wuerzburg.de,"GoSynthetic database tool to analyse natural and engineered molecular processes. An essential topic for synthetic biologists is to understand the structure and function of biological processes and involved proteins and plan experiments accordingly. Remarkable progress has been made in recent years towards this goal. However, efforts to collect and present all information on processes and functions are still cumbersome. The database tool GoSynthetic provides a new, simple and fast way to analyse biological processes applying a hierarchical database. Four different search modes are implemented. Furthermore, protein interaction data, cross-links to organism-specific databases (17 organisms including six model organisms and their interactions), COG/KOG, GO and IntAct are warehoused. The built in connection to technical and engineering terms enables a simple switching between biological concepts and concepts from engineering, electronics and synthetic biology. The current version of GoSynthetic covers more than one million processes, proteins, COGs and GOs. It is illustrated by various application examples probing process differences and designing modifications. Database URL: http://gosyn.bioapps.biozentrum.uni-wuerzburg.de.",GoSynthetic,0.995571911,NA,0,GoSynthetic,0.995571911,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/27/2013 +29483591,http://www.gourdbase.cn,"GourdBase: a genome-centered multi-omics database for the bottle gourd (Lagenaria siceraria), an economically important cucurbit crop. GourdBase is an integrative data platform for the bottle gourd to examine its multifarious intuitive morphology and annotated genome. GourdBase consists of six main modules that store and interlink multi-omic data: the genome (with transcriptomic data integrated) module, the phenome module, the markers/QTLs module, the maps (genetic, physical and comparative) module, the cultivars module, and the publications module. These modules provide access to various type of data including the annotated reference genome sequence, gene models, transcriptomic data from various tissues, physical and comparative genome maps, molecular markers in different types, phenotypic data for featuring traits including fruit shape and umami taste, and quantitative trait loci (QTLs) that underlie these traits. GourdBase is intuitive, user-friendly and interlinked and is designed to allow researchers, breeders and trained farmers to browse, search and fetch information on interests and assist in genomics-driven studies and breeding. The knowledge base and web interface can be accessed at http://www.gourdbase.cn/ .",GourdBase,0.99763602,NA,0,GourdBase,0.99763602,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/26/2018 +26039571,http://biocc.hrbmu.edu.cn/GPA,"Gene Perturbation Atlas (GPA): a single-gene perturbation repository for characterizing functional mechanisms of coding and non-coding genes. Genome-wide transcriptome profiling after gene perturbation is a powerful means of elucidating gene functional mechanisms in diverse contexts. The comprehensive collection and analysis of the resulting transcriptome profiles would help to systematically characterize context-dependent gene functional mechanisms and conduct experiments in biomedical research. To this end, we collected and curated over 3000 transcriptome profiles in human and mouse from diverse gene perturbation experiments, which involved 1585 different perturbed genes (microRNAs, lncRNAs and protein-coding genes) across 1170 different cell lines/tissues. For each profile, we identified differential genes and their associated functions and pathways, constructed perturbation networks, predicted transcription regulation and cancer/drug associations, and assessed cooperative perturbed genes. Based on these transcriptome analyses, the Gene Perturbation Atlas (GPA) can be used to detect (i) novel or cell-specific functions and pathways affected by perturbed genes, (ii) protein interactions and regulatory cascades affected by perturbed genes, and (iii) perturbed gene-mediated cooperative effects. The GPA is a user-friendly database to support the rapid searching and exploration of gene perturbations. Particularly, we visualized functional effects of perturbed genes from multiple perspectives. In summary, the GPA is a valuable resource for characterizing gene functions and regulatory mechanisms after single-gene perturbations. The GPA is freely accessible at http://biocc.hrbmu.edu.cn/GPA/.",GPA,0.900930822,Gene Perturbation Atlas,0.898518195,GPA,0.900930822,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/3/2015 +33868597,http://genemed.tech/gpcards,"GPCards: An integrated database of genotype-phenotype correlations in human genetic diseases. Genotype-phenotype correlations are the basis of precision medicine of human genetic diseases. However, it remains a challenge for clinicians and researchers to conveniently access detailed individual-level clinical phenotypic features of patients with various genetic variants. To address this urgent need, we manually searched for genetic studies in PubMed and catalogued 8,309 genetic variants in 1,288 genes from 17,738 patients with detailed clinical phenotypic features from 1,855 publications. Based on genotype-phenotype correlations in this dataset, we developed an user-friendly online database called GPCards (http://genemed.tech/gpcards/), which not only provided the association between genetic diseases and disease genes, but also the prevalence of various clinical phenotypes related to disease genes and the patient-level mapping between these clinical phenotypes and genetic variants. To accelerate the interpretation of genetic variants, we integrated 62 well-known variant-level and gene-level genomic data sources, including functional predictions, allele frequencies in different populations, and disease-related information. Furthermore, GPCards enables automatic analyses of users' own genetic data, comprehensive annotation, prioritization of candidate functional variants, and identification of genotype-phenotype correlations using custom parameters. In conclusion, GPCards is expected to accelerate the interpretation of genotype-phenotype correlations, subtype classification, and candidate gene prioritisation in human genetic diseases.",GPCards,0.997809052,NA,0,GPCards,0.997809052,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/22/2021 +33270898,http://gpcrdb.org,"GPCRdb in 2021: integrating GPCR sequence, structure and function. G protein-coupled receptors (GPCRs) form both the largest family of membrane proteins and drug targets, mediating the action of one-third of medicines. The GPCR database, GPCRdb serves >4 000 researchers every month and offers reference data, analysis of own or literature data, experiment design and dissemination of published datasets. Here, we describe new and updated GPCRdb resources with a particular focus on integration of sequence, structure and function. GPCRdb contains all human non-olfactory GPCRs (and >27 000 orthologs), G-proteins and arrestins. It includes over 2 000 drug and in-trial agents and nearly 200 000 ligands with activity and availability data. GPCRdb annotates all published GPCR structures (updated monthly), which are also offered in a refined version (with re-modeled missing/distorted regions and reverted mutations) and provides structure models of all human non-olfactory receptors in inactive, intermediate and active states. Mutagenesis data in the GPCRdb spans natural genetic variants, GPCR-G protein interfaces, ligand sites and thermostabilising mutations. A new sequence signature tool for identification of functional residue determinants has been added and two data driven tools to design ligand site mutations and constructs for structure determination have been updated extending their coverage of receptors and modifications. The GPCRdb is available at https://gpcrdb.org.",GPCRdb,0.998430192,NA,0,GPCRdb,0.998430192,1,NA,"29155946.0, 30664776.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +"29155946, 30664776",http://www.gpcrdb.org,"GPCRdb in 2018: adding GPCR structure models and ligands. G protein-coupled receptors are the most abundant mediators of both human signalling processes and therapeutic effects. Herein, we report GPCRome-wide homology models of unprecedented quality, and roughly 150 000 GPCR ligands with data on biological activities and commercial availability. Based on the strategy of 'Less model - more Xtal', each model exploits both a main template and alternative local templates. This achieved higher similarity to new structures than any of the existing resources, and refined crystal structures with missing or distorted regions. Models are provided for inactive, intermediate and active states-except for classes C and F that so far only have inactive templates. The ligand database has separate browsers for: (i) target selection by receptor, family or class, (ii) ligand filtering based on cross-experiment activities (min, max and mean) or chemical properties, (iii) ligand source data and (iv) commercial availability. SMILES structures and activity spreadsheets can be downloaded for further processing. Furthermore, three recent landmark publications on GPCR drugs, G protein selectivity and genetic variants have been accompanied with resources that now let readers view and analyse the findings themselves in GPCRdb. Altogether, this update will enable scientific investigation for the wider GPCR community. GPCRdb is available at http://www.gpcrdb.org.",GPCRdb,0.993975937,NA,0,GPCRdb,0.993975937,2,NA,33270898,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/21/2019 +24304901,http://www.gpcr.org/7tm,"GPCRDB: an information system for G protein-coupled receptors. For the past 20 years, the GPCRDB (G protein-coupled receptors database; http://www.gpcr.org/7tm/) has been a 'one-stop shop' for G protein-coupled receptor (GPCR)-related data. The GPCRDB contains experimental data on sequences, ligand-binding constants, mutations and oligomers, as well as many different types of computationally derived data, such as multiple sequence alignments and homology models. The GPCRDB also provides visualization and analysis tools, plus a number of query systems. In the latest GPCRDB release, all multiple sequence alignments, and >65,000 homology models, have been significantly improved, thanks to a recent flurry of GPCR X-ray structure data. Tools were introduced to browse X-ray structures, compare binding sites, profile similar receptors and generate amino acid conservation statistics. Snake plots and helix box diagrams can now be custom coloured (e.g. by chemical properties or mutation data) and saved as figures. A series of sequence alignment visualization tools has been added, and sequence alignments can now be created for subsets of sequences and sequence positions, and alignment statistics can be produced for any of these subsets.",GPCRDB,0.998344004,protein-coupled receptors database,0.987734778,GPCRDB,0.998344004,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/3/2013 +21337704,http://gpde.sourceforge.net,"GPDE: A biological proteomic database for biomarker discovery and evaluation. Clinical proteomics faces extremely complex and variable data. Here, we present an updated version of the Griss Proteomics Database Engine (GPDE): A free biological proteomic database specifically designed for clinical proteomics and biomarker discovery (http://gpde.sourceforge.net). It combines experiments based on investigated cell types thereby supporting customizable biological meta-analyses. Through the new features described here, the GPDE now became a powerful yet easy-to-use tool to support the fast identification and reliable evaluation of biomarker candidates.",GPDE,0.997722995,Griss Proteomics Database Engine,0.943638495,GPDE,0.997722995,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/27/2011 +34378141,http://www.gpgenome.com,"Global Pharmacopoeia Genome Database is an integrated and mineable genomic database for traditional medicines derived from eight international pharmacopoeias. Genomic data have demonstrated considerable traction in accelerating contemporary studies in traditional medicine. However, the lack of a uniform format and dispersed storage limits the full potential of herb genomic data. In this study, we developed a Global Pharmacopoeia Genome Database (GPGD). The database contains 34,346 records for 903 herb species from eight global pharmacopoeias (Brazilian, Egyptian, European, Indian, Japanese, Korean, the Pharmacopoeia of the People's Republic of China, and U.S. Pharmacopoeia's Herbal Medicines Compendium). In particular, the GPGD contains 21,872 DNA barcodes from 867 species, 2,203 organelle genomes from 674 species, 55 whole genomes from 49 species, 534 genomic sequencing datasets from 366 species, and 9,682 transcriptome datasets from 350 species. Among the organelle genomes, 534 genomes from 366 species were newly generated in this study. Whole genomes, organelle genomes, genomic fragments, transcriptomes, and DNA barcodes were uniformly formatted and arranged by species. The GPGD is publicly accessible at http://www.gpgenome.com and serves as an essential resource for species identification, decomposition of biosynthetic pathways, and molecular-assisted breeding analysis. Thus, the database is an invaluable resource for future studies on herbal medicine safety, drug discovery, and the protection and rational use of herbal resources.",GPGD,0.995944738,Global Pharmacopoeia Genome Database,0.966468737,GPGD,0.995944738,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/6/2021 +23798574,http://ssfa-gphr.de,"Research resource: novel structural insights bridge gaps in glycoprotein hormone receptor analyses. The first version of a glycoprotein hormone receptor (GPHR) information resource was designed to link functional with structural GPHR information, in order to support sequence-structure-function analysis of the LH, FSH, and TSH receptors (http://ssfa-gphr.de). However, structural information on a binding- and signaling-sensitive extracellular fragment (∼100 residues), the hinge region, had been lacking. A new FSHR crystal structure of the hormone-bound extracellular domain has recently been solved. The structure comprises the leucine-rich repeat domain and most parts of the hinge region. We have not only integrated the new FSHR/FSH structure and the derived homology models of TSHR/TSH, LHCGR/CG, and LHCGR/LH into our web-based information resource, but have additionally provided novel tools to analyze the advanced structural features, with the common characteristics and distinctions between GPHRs, in a more precise manner. The hinge region with its second hormone-binding site allows us to assign functional data to the new structural features between hormone and receptor, such as binding details of a sulfated tyrosine (conserved throughout the GPHRs) extending into a pocket of the hormone. We have also implemented a protein interface analysis tool that enables the identification and visualization of extracellular contact points between interaction partners. This provides a starting point for comparing the binding patterns of GPHRs. Together with the mutagenesis data stored in the database, this will help to decipher the essential residues for ligand recognition and the molecular mechanisms of signal transduction, extending from the extracellular hormone-binding site toward the intracellular G protein-binding sites.",GPHR,0.621493533,NA,0,GPHR,0.621493533,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/24/2013 +27045824,http://www.bioinformatics.deib.polimi.it/GPKB,"Integration and Querying of Genomic and Proteomic Semantic Annotations for Biomedical Knowledge Extraction. Understanding complex biological phenomena involves answering complex biomedical questions on multiple biomolecular information simultaneously, which are expressed through multiple genomic and proteomic semantic annotations scattered in many distributed and heterogeneous data sources; such heterogeneity and dispersion hamper the biologists' ability of asking global queries and performing global evaluations. To overcome this problem, we developed a software architecture to create and maintain a Genomic and Proteomic Knowledge Base (GPKB), which integrates several of the most relevant sources of such dispersed information (including Entrez Gene, UniProt, IntAct, Expasy Enzyme, GO, GOA, BioCyc, KEGG, Reactome, and OMIM). Our solution is general, as it uses a flexible, modular, and multilevel global data schema based on abstraction and generalization of integrated data features, and a set of automatic procedures for easing data integration and maintenance, also when the integrated data sources evolve in data content, structure, and number. These procedures also assure consistency, quality, and provenance tracking of all integrated data, and perform the semantic closure of the hierarchical relationships of the integrated biomedical ontologies. At http://www.bioinformatics.deib.polimi.it/GPKB/, a Web interface allows graphical easy composition of queries, although complex, on the knowledge base, supporting also semantic query expansion and comprehensive explorative search of the integrated data to better sustain biomedical knowledge extraction.",GPKB,0.936947525,Genomic and Proteomic Knowledge Base,0.911863849,GPKB,0.936947525,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/1/2016 +30364992,http://www.ebi.ac.uk/interpro/genomeproperties,"Genome properties in 2019: a new companion database to InterPro for the inference of complete functional attributes. Automatic annotation of protein function is routinely applied to newly sequenced genomes. While this provides a fine-grained view of an organism's functional protein repertoire, proteins, more commonly function in a coordinated manner, such as in pathways or multimeric complexes. Genome Properties (GPs) define such functional entities as a series of steps, originally described by either TIGRFAMs or Pfam entries. To increase the scope of coverage, we have migrated GPs to function as a companion resource utilizing InterPro entries. Having introduced GPs-specific versioned releases, we provide software and data via a GitHub repository, and have developed a new web interface to GPs (available at https://www.ebi.ac.uk/interpro/genomeproperties). In addition to exploring each of the 1286 GPs, the website contains GPs pre-calculated for a representative set of proteomes; these results can be used to profile GPs phylogenetically via an interactive viewer. Users can upload novel data to the viewer for comparison with the pre-calculated results. Over the last year, we have added ∼700 new GPs, increasing the coverage of eukaryotic systems, as well as increasing general coverage through automatic generation of GPs from related resources. All data are freely available via the website and the GitHub repository.",GPs,0.745130181,NA,0,GPs,0.745130181,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +32576192,http://hanlab.uth.edu/GPSno,"The genetic and pharmacogenomic landscape of snoRNAs in human cancer. Emerging evidence has revealed significant roles for small nucleolar RNAs (snoRNAs) in tumorigenesis. However, the genetic and pharmacogenomic landscape of snoRNAs has not been characterized. Using the genotype and snoRNA expression data from The Cancer Genome Atlas, we characterized the effects of genetic variants on snoRNAs across 29 cancer types and further linked related alleles with patient survival as well as genome-wide association study risk loci. Furthermore, we characterized the impact of snoRNA expression on drug response in patients to facilitate the clinical utility of snoRNAs in cancer. We also developed a user-friendly data resource, GPSno (http://hanlab.uth.edu/GPSno), with multiple modules for researchers to visualize, browse, and download multi-dimensional data. Our study provides a comprehensive genetic and pharmacogenomic landscape of snoRNAs, which will shed light on future clinical considerations for the development of snoRNA-based targeted therapies.",GPSno,0.970677733,NA,0,GPSno,0.970677733,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/23/2020 +31210272,"http://wheat.pw.usda.gov, http://graingenes.org","GrainGenes: centralized small grain resources and digital platform for geneticists and breeders. . GrainGenes (https://wheat.pw.usda.gov or https://graingenes.org) is an international centralized repository for curated, peer-reviewed datasets useful to researchers working on wheat, barley, rye and oat. GrainGenes manages genomic, genetic, germplasm and phenotypic datasets through a dynamically generated web interface for facilitated data discovery. Since 1992, GrainGenes has served geneticists and breeders in both the public and private sectors on six continents. Recently, several new datasets were curated into the database along with new tools for analysis. The GrainGenes homepage was enhanced by making it more visually intuitive and by adding links to commonly used pages. Several genome assemblies and genomic tracks are displayed through the genome browsers at GrainGenes, including the Triticum aestivum (bread wheat) cv. 'Chinese Spring' IWGSC RefSeq v1.0 genome assembly, the Aegilops tauschii (D genome progenitor) Aet v4.0 genome assembly, the Triticum turgidum ssp. dicoccoides (wild emmer wheat) cv. 'Zavitan' WEWSeq v.1.0 genome assembly, a T. aestivum (bread wheat) pangenome, the Hordeum vulgare (barley) cv. 'Morex' IBSC genome assembly, the Secale cereale (rye) select 'Lo7' assembly, a partial hexaploid Avena sativa (oat) assembly and the Triticum durum cv. 'Svevo' (durum wheat) RefSeq Release 1.0 assembly. New genetic maps and markers were added and can be displayed through CMAP. Quantitative trait loci, genetic maps and genes from the Wheat Gene Catalogue are indexed and linked through the Wheat Information System (WheatIS) portal. Training videos were created to help users query and reach the data they need. GSP (Genome Specific Primers) and PIECE2 (Plant Intron Exon Comparison and Evolution) tools were implemented and are available to use. As more small grains reference sequences become available, GrainGenes will play an increasingly vital role in helping researchers improve crops.",GrainGenes,0.994854987,Genome Specific Primers,0.700459212,GrainGenes,0.994854987,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +32163115,http://ifm.chimie.unistra.fr/grall,"The Glycine Receptor Allosteric Ligands Library (GRALL). MOTIVATION:Glycine receptors (GlyRs) mediate fast inhibitory neurotransmission in the brain and have been recognized as key pharmacological targets for pain. A large number of chemically diverse compounds that are able to modulate GlyR function both positively and negatively have been reported, which provides useful information for the development of pharmacological strategies and models for the allosteric modulation of these ion channels. RESULTS:Based on existing literature, we have collected 218 unique chemical entities with documented modulatory activities at homomeric GlyR-α1 and -α3 and built a database named GRALL. This collection includes agonists, antagonists, positive and negative allosteric modulators and a number of experimentally inactive compounds. Most importantly, for a large fraction of them a structural annotation based on their putative binding site on the receptor is provided. This type of annotation, which is currently missing in other drug banks, along with the availability of cooperativity factors from radioligand displacement experiments are expected to improve the predictivity of in silico methodologies for allosteric drug discovery and boost the development of conformation-based pharmacological approaches. AVAILABILITY AND IMPLEMENTATION:The GRALL library is distributed as a web-accessible database at the following link: https://ifm.chimie.unistra.fr/grall. For each molecular entry, it provides information on the chemical structure, the ligand-binding site, the direction of modulation, the potency, the 3D molecular structure and quantum-mechanical charges as determined by our in-house pipeline. CONTACT:mcecchini@unistra.fr. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",GRALL,0.987117052,The Glycine Receptor Allosteric Ligands Library,0.934507086,GRALL,0.987117052,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2020 +22374386,http://www.gramene.org/biomart/martview,"GrameneMart: the BioMart data portal for the Gramene project. Gramene is a well-established resource for plant comparative genome analysis. Data are generated through automated and curated analyses and made available through web interfaces such as GrameneMart. The Gramene project was an early adopter of the BioMart software, which remains an integral and well-used component of the Gramene website. BioMart accessible data sets include plant gene annotations, plant variation catalogues, genetic markers, physical mapping entities, public DNA/mRNA sequences of various types and curated quantitative trait loci for various species. DATABASE URL: http://www.gramene.org/biomart/martview.",Gramene,0.975419879,NA,0,Gramene,0.975419879,1,NA,"20931385.0, 24217918.0, 28713666.0, 29165610.0, 33170273.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,2/28/2012 +"20931385, 24217918, 28713666, 29165610, 33170273",http://www.gramene.org,"Gramene database: a hub for comparative plant genomics. The rich collection of known genetic information and the recent completion of rice genome sequencing project provided the cereal plant researchers a useful tool to investigate the roles of genes and genomic organization that contribute to numerous agronomic traits. Gramene ( http://www.gramene.org ) is a unique database where users are allowed to query and explore the power of genomic colinearity and comparative genomics for genetic and genomic studies on plant genomes. Gramene presents a wholesome perspective by assimilating data from a broad range of publicly available data sources for cereals like rice, sorghum, maize, wild rice, wheat, oats, barley, and other agronomically important crop plants such as poplar and grape, and the model plant Arabidopsis. As part of the process, it preserves the original data, but also reanalyzes for integration into several knowledge domains of maps, markers, genes, proteins, pathways, phenotypes, including Quantitative Trait Loci (QTL) and genetic diversity/natural variation. This allows researchers to use this information resource to decipher the known and predicted interactions between the components of biological systems, and how these interactions regulate plant development. Using examples from rice, this article describes how the database can be helpful to researchers representing an array of knowledge domains ranging from plant biology, plant breeding, molecular biology, genomics, biochemistry, genetics, bioinformatics, and phylogenomics.",Gramene,0.934200406,Gramene Database,0.524727866,Gramene,0.934200406,5,NA,22374386,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +25982315,http://structuralbiology.cau.edu.cn/GraP,"GraP: platform for functional genomics analysis of Gossypium raimondii. Cotton (Gossypium spp.) is one of the most important natural fiber and oil crops worldwide. Improvement of fiber yield and quality under changing environments attract much attention from cotton researchers; however, a functional analysis platform integrating omics data is still missing. The success of cotton genome sequencing and large amount of available transcriptome data allows the opportunity to establish a comprehensive analysis platform for integrating these data and related information. A comprehensive database, Platform of Functional Genomics Analysis in Gossypium raimondii (GraP), was constructed to provide multi-dimensional analysis, integration and visualization tools. GraP includes updated functional annotation, gene family classifications, protein-protein interaction networks, co-expression networks and microRNA-target pairs. Moreover, gene set enrichment analysis and cis-element significance analysis tools are also provided for gene batch analysis of high-throughput data sets. Based on these effective services, GraP may offer further information for subsequent studies of functional genes and in-depth analysis of high-throughput data. GraP is publically accessible at http://structuralbiology.cau.edu.cn/GraP/, with all data available for downloading.",GraP,0.972606599,of,0.77096051,GraP,0.972606599,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/17/2015 +27098585,http://biodb.sdau.edu.cn/gc/index.html,"Identification of genomic sites for CRISPR/Cas9-based genome editing in the Vitis vinifera genome. Background CRISPR/Cas9 has been recently demonstrated as an effective and popular genome editing tool for modifying genomes of humans, animals, microorganisms, and plants. Success of such genome editing is highly dependent on the availability of suitable target sites in the genomes to be edited. Many specific target sites for CRISPR/Cas9 have been computationally identified for several annual model and crop species, but such sites have not been reported for perennial, woody fruit species. In this study, we identified and characterized five types of CRISPR/Cas9 target sites in the widely cultivated grape species Vitis vinifera and developed a user-friendly database for editing grape genomes in the future. Results A total of 35,767,960 potential CRISPR/Cas9 target sites were identified from grape genomes in this study. Among them, 22,597,817 target sites were mapped to specific genomic locations and 7,269,788 were found to be highly specific. Protospacers and PAMs were found to distribute uniformly and abundantly in the grape genomes. They were present in all the structural elements of genes with the coding region having the highest abundance. Five PAM types, TGG, AGG, GGG, CGG and NGG, were observed. With the exception of the NGG type, they were abundantly present in the grape genomes. Synteny analysis of similar genes revealed that the synteny of protospacers matched the synteny of homologous genes. A user-friendly database containing protospacers and detailed information of the sites was developed and is available for public use at the Grape-CRISPR website ( http://biodb.sdau.edu.cn/gc/index.html ). Conclusion Grape genomes harbour millions of potential CRISPR/Cas9 target sites. These sites are widely distributed among and within chromosomes with predominant abundance in the coding regions of genes. We developed a publicly-accessible Grape-CRISPR database for facilitating the use of the CRISPR/Cas9 system as a genome editing tool for functional studies and molecular breeding of grapes. Among other functions, the database allows users to identify and select multi-protospacers for editing similar sequences in grape genomes simultaneously.",Grape-CRISPR,0.896898484,NA,0,Grape-CRISPR,0.896898484,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/21/2016 +24007337,http://www.graspdb.net,"GRASP [Genomic Resource Access for Stoichioproteomics]: comparative explorations of the atomic content of 12 Drosophila proteomes. Background""Stoichioproteomics"" relates the elemental composition of proteins and proteomes to variation in the physiological and ecological environment. To help harness and explore the wealth of hypotheses made possible under this framework, we introduce GRASP (http://www.graspdb.net), a public bioinformatic knowledgebase containing information on the frequencies of 20 amino acids and atomic composition of their side chains. GRASP integrates comparative protein composition data with annotation data from multiple public databases. Currently, GRASP includes information on proteins of 12 sequenced Drosophila (fruit fly) proteomes, which will be expanded to include increasingly diverse organisms over time. In this paper we illustrate the potential of GRASP for testing stoichioproteomic hypotheses by conducting an exploratory investigation into the composition of 12 Drosophila proteomes, testing the prediction that protein atomic content is associated with species ecology and with protein expression levels. Results Elements varied predictably along multivariate axes. Species were broadly similar, with the D. willistoni proteome a clear outlier. As expected, individual protein atomic content within proteomes was influenced by protein function and amino acid biochemistry. Evolution in elemental composition across the phylogeny followed less predictable patterns, but was associated with broad ecological variation in diet. Using expression data available for D. melanogaster, we found evidence consistent with selection for efficient usage of elements within the proteome: as expected, nitrogen content was reduced in highly expressed proteins in most tissues, most strongly in the gut, where nutrients are assimilated, and least strongly in the germline. Conclusions The patterns identified here using GRASP provide a foundation on which to base future research into the evolution of atomic composition in Drosophila and other taxa.",GRASP,0.996128678,Genomic Resource Access for Stoichioproteomics,0.944524201,GRASP,0.996128678,1,NA,"24931982.0, 25428361.0",NA,NA,NA,do not merge,NA,NA,NA,9/4/2013 +24931982,http://apps.nhlbi.nih.gov/grasp,"GRASP: analysis of genotype-phenotype results from 1390 genome-wide association studies and corresponding open access database. Summary We created a deeply extracted and annotated database of genome-wide association studies (GWAS) results. GRASP v1.0 contains >6.2 million SNP-phenotype association from among 1390 GWAS studies. We re-annotated GWAS results with 16 annotation sources including some rarely compared to GWAS results (e.g. RNAediting sites, lincRNAs, PTMs). Motivation To create a high-quality resource to facilitate further use and interpretation of human GWAS results in order to address important scientific questions. Results GWAS have grown exponentially, with increases in sample sizes and markers tested, and continuing bias toward European ancestry samples. GRASP contains >100 000 phenotypes, roughly: eQTLs (71.5%), metabolite QTLs (21.2%), methylation QTLs (4.4%) and diseases, biomarkers and other traits (2.8%). cis-eQTLs, meQTLs, mQTLs and MHC region SNPs are highly enriched among significant results. After removing these categories, GRASP still contains a greater proportion of studies and results than comparable GWAS catalogs. Cardiovascular disease and related risk factors pre-dominate remaining GWAS results, followed by immunological, neurological and cancer traits. Significant results in GWAS display a highly gene-centric tendency. Sex chromosome X (OR = 0.18[0.16-0.20]) and Y (OR = 0.003[0.001-0.01]) genes are depleted for GWAS results. Gene length is correlated with GWAS results at nominal significance (P ≤ 0.05) levels. We show this gene-length correlation decays at increasingly more stringent P-value thresholds. Potential pleotropic genes and SNPs enriched for multi-phenotype association in GWAS are identified. However, we note possible population stratification at some of these loci. Finally, via re-annotation we identify compelling functional hypotheses at GWAS loci, in some cases unrealized in studies to date. Conclusion Pooling summary-level GWAS results and re-annotating with bioinformatics predictions and molecular features provides a good platform for new insights. Availability The GRASP database is available at http://apps.nhlbi.nih.gov/grasp.",GRASP,0.99731338,NA,0,GRASP,0.99731338,1,NA,"24007337.0, 25428361.0",NA,NA,NA,merge only:,NA,NA,"24931982.0, 25428361.0; URLs DO NOT RESOLVE",6/1/2014 +25428361,http://apps.nhlbi.nih.gov/Grasp/Overview.aspx,"GRASP v2.0: an update on the Genome-Wide Repository of Associations between SNPs and phenotypes. Here, we present an update on the Genome-Wide Repository of Associations between SNPs and Phenotypes (GRASP) database version 2.0 (http://apps.nhlbi.nih.gov/Grasp/Overview.aspx). GRASP is a centralized repository of publically available genome-wide association study (GWAS) results. GRASP v2.0 contains ∼ 8.87 million SNP associations reported in 2082 studies, an increase of ∼ 2.59 million SNP associations (41.4% increase) and 693 studies (48.9% increase) from our previous version. Our goal in developing and maintaining GRASP is to provide a user-friendly means for diverse sets of researchers to query reported SNP associations (P ≤ 0.05) with human traits, including methylation and expression quantitative trait loci (QTL) studies. Therefore, in addition to making the full database available for download, we developed a user-friendly web interface that allows for direct querying of GRASP. We provide details on the use of this web interface and what information may be gleaned from using this interactive option. Additionally, we describe potential uses of GRASP and how the scientific community may benefit from the convenient availability of all SNP association results from GWAS (P ≤ 0.05). We plan to continue updating GRASP with newly published GWAS and increased annotation depth.",GRASP,0.997216702,Genome-Wide Repository of Associations between SNPs and Phenotypes,0.985747047,GRASP,0.997216702,1,NA,"24007337.0, 24931982.0",NA,NA,NA,merge only:,NA,NA,"24931982.0, 25428361.0; URLs DO NOT RESOLVE",11/26/2014 +25797358,http://grdr.ncats.nih.gov/index.php?option=com_content&view=article&id=3&Itemid=5,"NIH/NCATS/GRDR® Common Data Elements: A leading force for standardized data collection. The main goal of the NIH/NCATS GRDR® program is to serve as a central web-based global data repository to integrate de-identified patient clinical data from rare disease registries, and other data sources, in a standardized manner, to be available to researchers for conducting various biomedical studies, including clinical trials and to support analyses within and across diseases. The aim of the program is to advance research for many rare diseases. One of the first tasks toward achieving this goal was the development of a set of Common Data Elements (CDEs), which are controlled terminologies that represent collected data. A list of 75 CDEs was developed by a national committee and was validated and implemented during a period of 2 year proof of concept. Access to GRDR CDEs is freely available at: https://grdr.ncats.nih.gov/index.php?option=com_content&view=article&id=3&Itemid=5. The GRDR CDEs have been the cornerstone of the GRDR repository, as well as of several other national and international patient registries. The establishment of the GRDR program has elevated the issue of data standardization and interoperability for rare disease patient registries, to international attention, resulting in a global dialog and significant change in the mindset of registry developers, patient advocacy groups, and other national and international organizations.",GRDRÂ,0.934958398,NA,0,GRDRÂ,0.934958398,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/20/2015 +34673265,http://www.ebi.ac.uk/QuickGO/targetset/dbTF,"A GO catalogue of human DNA-binding transcription factors. To control gene transcription, DNA-binding transcription factors recognise specific sequence motifs in gene regulatory regions. A complete and reliable GO annotation of all DNA-binding transcription factors is key to investigating the delicate balance of gene regulation in response to environmental and developmental stimuli. The need for such information is demonstrated by the many lists of transcription factors that have been produced over the past decade. The COST Action Gene Regulation Ensemble Effort for the Knowledge Commons (GREEKC) Consortium brought together experts in the field of transcription with the aim of providing high quality and interoperable gene regulatory data. The Gene Ontology (GO) Consortium provides strict definitions for gene product function, including factors that regulate transcription. The collaboration between the GREEKC and GO Consortia has enabled the application of those definitions to produce a new curated catalogue of over 1400 human DNA-binding transcription factors, that can be accessed at https://www.ebi.ac.uk/QuickGO/targetset/dbTF. This catalogue has facilitated an improvement in the GO annotation of human DNA-binding transcription factors and led to the GO annotation of almost sixty thousand DNA-binding transcription factors in over a hundred species. Thus, this work will aid researchers investigating the regulation of transcription in both biomedical and basic science.",GREEKC,0.699543521,NA,0,GREEKC,0.699543521,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,10/18/2021 +28231303,http://www.psychology.nottingham.ac.uk/greeklex,"GreekLex 2: A comprehensive lexical database with part-of-speech, syllabic, phonological, and stress information. Databases containing lexical properties on any given orthography are crucial for psycholinguistic research. In the last ten years, a number of lexical databases have been developed for Greek. However, these lack important part-of-speech information. Furthermore, the need for alternative procedures for calculating syllabic measurements and stress information, as well as combination of several metrics to investigate linguistic properties of the Greek language are highlighted. To address these issues, we present a new extensive lexical database of Modern Greek (GreekLex 2) with part-of-speech information for each word and accurate syllabification and orthographic information predictive of stress, as well as several measurements of word similarity and phonetic information. The addition of detailed statistical information about Greek part-of-speech, syllabification, and stress neighbourhood allowed novel analyses of stress distribution within different grammatical categories and syllabic lengths to be carried out. Results showed that the statistical preponderance of stress position on the pre-final syllable that is reported for Greek language is dependent upon grammatical category. Additionally, analyses showed that a proportion higher than 90% of the tokens in the database would be stressed correctly solely by relying on stress neighbourhood information. The database and the scripts for orthographic and phonological syllabification as well as phonetic transcription are available at http://www.psychology.nottingham.ac.uk/greeklex/.",GreekLex,0.941649795,NA,0,GreekLex,0.941649795,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/23/2017 +"26578586, 30945200",http://greenc.sciencedesigners.com,"GREENC: a Wiki-based database of plant lncRNAs. Long non-coding RNAs (lncRNAs) are functional non-translated molecules greater than 200 nt. Their roles are diverse and they are usually involved in transcriptional regulation. LncRNAs still remain largely uninvestigated in plants with few exceptions. Experimentally validated plant lncRNAs have been shown to regulate important agronomic traits such as phosphate starvation response, flowering time and interaction with symbiotic organisms, making them of great interest in plant biology and in breeding. There is still a lack of lncRNAs in most sequenced plant species, and in those where they have been annotated, different methods have been used, so making the lncRNAs less useful in comparisons within and between species. We developed a pipeline to annotate lncRNAs and applied it to 37 plant species and six algae, resulting in the annotation of more than 120 000 lncRNAs. To facilitate the study of lncRNAs for the plant research community, the information gathered is organised in the Green Non-Coding Database (GreeNC, http://greenc.sciencedesigners.com/).",GreeNC,0.990412146,Non-Coding Database,0.685981143,GreeNC,0.990412146,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +32510565,http://greencirc.cn,"GreenCircRNA: a database for plant circRNAs that act as miRNA decoys. . Circular RNAs (circRNAs) are endogenous non-coding RNAs that form a covalently closed continuous loop, are widely distributed and play important roles in a series of developmental processes. In plants, an increasing number of studies have found that circRNAs can regulate plant metabolism and are involved in plant responses to biotic or abiotic stress. Acting as miRNA decoys is a critical way for circRNAs to perform their functions. Therefore, we developed GreenCircRNA-a database for plant circRNAs acting as miRNA decoys that is dedicated to providing a plant-based platform for detailed exploration of plant circRNAs and their potential decoy functions. This database includes over 210 000 circRNAs from 69 species of plants; the main data sources of circRNAs in this database are NCBI, EMBL-EBI and Phytozome. To investigate the function of circRNAs as competitive endogenous RNAs, the possibility of circRNAs from 38 plants to act as miRNA decoys was predicted. Moreover, we provide basic information for the circRNAs in the database, including their locations, host genes and relative expression levels, as well as full-length sequences, host gene GO (Gene Ontology) numbers and circRNA visualization. GreenCircRNA is the first database for the prediction of circRNAs that act as miRNA decoys and contains the largest number of plant species. Database URL: http://greencirc.cn.",GreenCircRNA,0.994266331,NA,0,GreenCircRNA,0.994266331,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +34639237,http://bioinfo.usu.edu/GreeningDB,"GreeningDB: A Database of Host-Pathogen Protein-Protein Interactions and Annotation Features of the Bacteria Causing Huanglongbing HLB Disease. . The Citrus genus comprises some of the most important and commonly cultivated fruit plants. Within the last decade, citrus greening disease (also known as huanglongbing or HLB) has emerged as the biggest threat for the citrus industry. This disease does not have a cure yet and, thus, many efforts have been made to find a solution to this devastating condition. There are challenges in the generation of high-yield resistant cultivars, in part due to the limited and sparse knowledge about the mechanisms that are used by the Liberibacter bacteria to proliferate the infection in Citrus plants. Here, we present GreeningDB, a database implemented to provide the annotation of Liberibacter proteomes, as well as the host-pathogen comparactomics tool, a novel platform to compare the predicted interactomes of two HLB host-pathogen systems. GreeningDB is built to deliver a user-friendly interface, including network visualization and links to other resources. We hope that by providing these characteristics, GreeningDB can become a central resource to retrieve HLB-related protein annotations, and thus, aid the community that is pursuing the development of molecular-based strategies to mitigate this disease's impact. The database is freely available at http://bioinfo.usu.edu/GreeningDB/ (accessed on 11 August 2021).",GreeningDB,0.995977521,NA,0,GreeningDB,0.995977521,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/8/2021 +33237299,http://www.greenphyl.org,"GreenPhylDB v5: a comparative pangenomic database for plant genomes. Comparative genomics is the analysis of genomic relationships among different species and serves as a significant base for evolutionary and functional genomic studies. GreenPhylDB (https://www.greenphyl.org) is a database designed to facilitate the exploration of gene families and homologous relationships among plant genomes, including staple crops critically important for global food security. GreenPhylDB is available since 2007, after the release of the Arabidopsis thaliana and Oryza sativa genomes and has undergone multiple releases. With the number of plant genomes currently available, it becomes challenging to select a single reference for comparative genomics studies but there is still a lack of databases taking advantage several genomes by species for orthology detection. GreenPhylDBv5 introduces the concept of comparative pangenomics by harnessing multiple genome sequences by species. We created 19 pangenes and processed them with other species still relying on one genome. In total, 46 plant species were considered to build gene families and predict their homologous relationships through phylogenetic-based analyses. In addition, since the previous publication, we rejuvenated the website and included a new set of original tools including protein-domain combination, tree topologies searches and a section for users to store their own results in order to support community curation efforts.",GreenPhylDB,0.997109532,NA,0,GreenPhylDB,0.997109532,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +32055858,http://mora-lab.github.io/projects/greg.html,"GREG-studying transcriptional regulation using integrative graph databases. . A gene regulatory process is the result of the concerted action of transcription factors, co-factors, regulatory non-coding RNAs (ncRNAs) and chromatin interactions. Therefore, the combination of protein-DNA, protein-protein, ncRNA-DNA, ncRNA-protein and DNA-DNA data in a single graph database offers new possibilities regarding generation of biological hypotheses. GREG (The Gene Regulation Graph Database) is an integrative database and web resource that allows the user to visualize and explore the network of all above-mentioned interactions for a query transcription factor, long non-coding RNA, genomic range or DNA annotation, as well as extracting node and interaction information, identifying connected nodes and performing advanced graphical queries directly on the regulatory network, in a simple and efficient way. In this article, we introduce GREG together with some application examples (including exploratory research of Nanog's regulatory landscape and the etiology of chronic obstructive pulmonary disease), which we use as a demonstration of the advantages of using graph databases in biomedical research. Database URL: https://mora-lab.github.io/projects/greg.html, www.moralab.science/GREG/.",GREG,0.972979486,The Gene Regulation Graph Database,0.943068614,GREG,0.972979486,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +33252190,http://lmc.uab.es/grindb,"GRIN database: A unified and manually curated repertoire of GRIN variants. Glutamatergic neurotransmission is crucial for brain development, wiring neuronal function, and synaptic plasticity mechanisms. Recent genetic studies showed the existence of autosomal dominant de novo GRIN gene variants associated with GRIN-related disorders (GRDs), a rare pediatric neurological disorder caused by N-methyl- d-aspartate receptor (NMDAR) dysfunction. Notwithstanding, GRIN variants identification is exponentially growing and their clinical, genetic, and functional annotations remain highly fragmented, representing a bottleneck in GRD patient's stratification. To shorten the gap between GRIN variant identification and patient stratification, we present the GRIN database (GRINdb), a publicly available, nonredundant, updated, and curated database gathering all available genetic, functional, and clinical data from more than 4000 GRIN variants. The manually curated GRINdb outputs on a web server, allowing query and retrieval of reported GRIN variants, and thus representing a fast and reliable bioinformatics resource for molecular clinical advice. Furthermore, the comprehensive mapping of GRIN variants' genetic and clinical information along NMDAR structure revealed important differences in GRIN variants' pathogenicity and clinical phenotypes, shedding light on GRIN-specific fingerprints. Overall, the GRINdb and web server is a resource for molecular stratification of GRIN variants, delivering clinical and investigational insights into GRDs. GRINdb is accessible at http://lmc.uab.es/grindb.",GRINdb,0.997000337,RIN database,0.806897382,GRINdb,0.997000337,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/30/2020 +21410407,http://grip.cbrc.jp/GDB/index.html,"GRIPDB - G protein coupled Receptor Interaction Partners DataBase. The G protein Coupled Receptor (GPCR) superfamily is one of the most important pharmaceutical targets. Studies of GPCRs have long been performed under the assumption that GPCRs function as monomers. However, recent studies have revealed that many GPCRs function as homo- and/or hetero-dimers or higher-order oligomeric molecular complexes. As a result, information about GPCR oligomerization is rapidly accumulating, although the molecular mechanisms of oligomerization are not fully understood. A comprehensive collection of information about oligomerization would accelerate investigations of the molecular mechanisms of GPCRs' oligomerization and involvement in signaling. Hence, we have developed a database, G protein coupled Receptor Interaction Partners DataBase (GRIPDB), which provides information about GPCR oligomerization. The entries in the database are divided into two sections: (I) Experiment Information section and (II) Prediction Information section. The Experiment Information section contains (I-i) experimentally indentified GPCR oligomers and their annotations, and (I-ii) experimentally suggested interfaces for the oligomerization. Since the number of experimentally suggested interfaces is limited, the entries in the Prediction Information section have been introduced to provide information about the oligomerization interfaces predicted by our computational method. The experimentally suggested or computationally predicted interfaces are displayed by 3D graphics, using GPCRs with available coordinates. The information in the GRIPDB, especially that about the interfaces, is useful to investigate the molecular mechanisms of signal transduction via GPCR oligomerization. The GRIPDB is available on the web at the following URL: http://grip.cbrc.jp/GDB/index.html .",GRIPDB,0.932066238,coupled Receptor Interaction Partners DataBase,0.720138676,GRIPDB,0.932066238,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/17/2011 +33151298,http://www.grndb.com,"GRNdb: decoding the gene regulatory networks in diverse human and mouse conditions. Gene regulatory networks (GRNs) formed by transcription factors (TFs) and their downstream target genes play essential roles in gene expression regulation. Moreover, GRNs can be dynamic changing across different conditions, which are crucial for understanding the underlying mechanisms of disease pathogenesis. However, no existing database provides comprehensive GRN information for various human and mouse normal tissues and diseases at the single-cell level. Based on the known TF-target relationships and the large-scale single-cell RNA-seq data collected from public databases as well as the bulk data of The Cancer Genome Atlas and the Genotype-Tissue Expression project, we systematically predicted the GRNs of 184 different physiological and pathological conditions of human and mouse involving >633 000 cells and >27 700 bulk samples. We further developed GRNdb, a freely accessible and user-friendly database (http://www.grndb.com/) for searching, comparing, browsing, visualizing, and downloading the predicted information of 77 746 GRNs, 19 687 841 TF-target pairs, and related binding motifs at single-cell/bulk resolution. GRNdb also allows users to explore the gene expression profile, correlations, and the associations between expression levels and the patient survival of diverse cancers. Overall, GRNdb provides a valuable and timely resource to the scientific community to elucidate the functions and mechanisms of gene expression regulation in various conditions.",GRNdb,0.997971952,NA,0,GRNdb,0.997971952,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +22238269,http://virtualchemistry.org,"GROMACS molecule & liquid database. Motivation The molecular dynamics simulation package GROMACS is a widely used tool used in a broad range of different applications within physics, chemistry and biology. It is freely available, user friendly and extremely efficient. The GROMACS software is force field agnostic, and compatible with many molecular dynamics force fields; coarse-grained, unified atom, all atom as well as polarizable models based on the charge on a spring concept. To validate simulations, it is necessary to compare results from the simulations to experimental data. To ease the process of setting up topologies and structures for simulations, as well as providing pre-calculated physical properties along with experimental values for the same we provide a web-based database, containing 145 organic molecules at present. Results Liquid properties of 145 organic molecules have been simulated using two different force fields, OPLS all atom and Generalized Amber Force Field. So far, eight properties have been calculated (the density, enthalpy of vaporization, surface tension, heat capacity at constant volume and pressure, isothermal compressibility, volumetric expansion coefficient and the static dielectric constant). The results, together with experimental values are available through the database, along with liquid structures and topologies for the 145 molecules, in the two force fields. Availability The database is freely available under http://virtualchemistry.org.",GROMACS,0.992195368,NA,0,GROMACS,0.992195368,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/11/2012 +31725863,http://bioinfo.tmu.edu.cn/GRONS,"GRONS: a comprehensive genetic resource of nicotine and smoking. . Nicotine, the primary psychoactive component in tobacco, can exert a broad impact on both the central and peripheral nervous systems. During the past years, a tremendous amount of efforts has been put to exploring the molecular mechanisms underlying tobacco smoking related behaviors and diseases, and many susceptibility genes have been identified via various genomic approaches. For many human complex diseases, there is a trend towards collecting and integrating the data from genetic studies and the biological information related to them into a comprehensive resource for further investigation, but we have not found such an effort for nicotine addiction or smoking-related phenotypes yet. To collect, curate, and integrate cross-platform genetic data so as to make them interpretable and easily accessible, we developed Genetic Resources Of Nicotine and Smoking (GRONS), a comprehensive database for genes related to biological response to nicotine exposure, tobacco smoking related behaviors or diseases. GRONS deposits genes from nicotine addiction studies in the following four categories, i.e. association study, genome-wide linkage scan, expression analysis on genes/proteins via high-throughput technologies, as well as single gene/protein-based experimental studies via literature search. Moreover, GRONS not only provides tools for data browse, search and graphical presentation of gene prioritization, but also presents the results from comprehensive bioinformatics analyses for the prioritized genes associated with nicotine addiction. With more and more genetic data and analysis tools integrated, GRONS will become a useful resource for studies focusing on nicotine addiction or tobacco smoking. Database URL: http://bioinfo.tmu.edu.cn/GRONS/.",GRONS,0.979251385,Genetic Resources Of Nicotine and Smoking,0.928520481,GRONS,0.979251385,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +28365739,http://www.grtedb.org,"GrTEdb: the first web-based database of transposable elements in cotton (Gossypium raimondii). . Although several diploid and tetroploid Gossypium species genomes have been sequenced, the well annotated web-based transposable elements (TEs) database is lacking. To better understand the roles of TEs in structural, functional and evolutionary dynamics of the cotton genome, a comprehensive, specific, and user-friendly web-based database, Gossypium raimondii transposable elements database (GrTEdb), was constructed. A total of 14 332 TEs were structurally annotated and clearly categorized in G. raimondii genome, and these elements have been classified into seven distinct superfamilies based on the order of protein-coding domains, structures and/or sequence similarity, including 2929 Copia-like elements, 10 368 Gypsy-like elements, 299 L1 , 12 Mutators , 435 PIF-Harbingers , 275 CACTAs and 14 Helitrons . Meanwhile, the web-based sequence browsing, searching, downloading and blast tool were implemented to help users easily and effectively to annotate the TEs or TE fragments in genomic sequences from G. raimondii and other closely related Gossypium species. GrTEdb provides resources and information related with TEs in G. raimondii , and will facilitate gene and genome analyses within or across Gossypium species, evaluating the impact of TEs on their host genomes, and investigating the potential interaction between TEs and protein-coding genes in Gossypium species. http://www.grtedb.org/.",GrTEdb,0.997847676,Gossypium raimondii transposable elements database,0.843352804,GrTEdb,0.997847676,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +28387199,"http://bigd.big.ac.cn/gsa, http://gsa.big.ac.cn","GSA: Genome Sequence Archive. With the rapid development of sequencing technologies towards higher throughput and lower cost, sequence data are generated at an unprecedentedly explosive rate. To provide an efficient and easy-to-use platform for managing huge sequence data, here we present Genome Sequence Archive (GSA; http://bigd.big.ac.cn/gsa or http://gsa.big.ac.cn), a data repository for archiving raw sequence data. In compliance with data standards and structures of the International Nucleotide Sequence Database Collaboration (INSDC), GSA adopts four data objects (BioProject, BioSample, Experiment, and Run) for data organization, accepts raw sequence reads produced by a variety of sequencing platforms, stores both sequence reads and metadata submitted from all over the world, and makes all these data publicly available to worldwide scientific communities. In the era of big data, GSA is not only an important complement to existing INSDC members by alleviating the increasing burdens of handling sequence data deluge, but also takes the significant responsibility for global big data archive and provides free unrestricted access to all publicly available data in support of research activities throughout the world.",GSA,0.994967302,Genome Sequence Archive,0.958378598,GSA,0.994967302,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/2/2017 +31608375,http://www.asteraceaegenomesize.com,"Progress in the study of genome size evolution in Asteraceae: analysis of the last update. . The Genome Size in Asteraceae Database (GSAD, http://www.asteraceaegenomesize.com) has been recently updated, with data from papers published or in press until July 2018. This constitutes the third release of GSAD, currently containing 4350 data entries for 1496 species, which represent a growth of 22.52% in the number of species with available genome size data compared with the previous release, and a growth of 57.72% in terms of entries. Approximately 6% of Asteraceae species are covered in terms of known genome sizes. The number of source papers included in this release (198) means a 48.87% increase with respect to release 2.0. The significant data increase was exploited to study the genome size evolution in the family from a phylogenetic perspective. Our results suggest that the role of chromosome number in genome size diversity within Asteraceae is basically associated to polyploidy, while dysploidy would only cause minor variation in the DNA amount along the family. Among diploid taxa, we found that the evolution of genome size shows a strong phylogenetic signal. However, this trait does not seem to evolve evenly across the phylogeny, but there could be significant scale and clade-dependent patterns. Our analyses indicate that the phylogenetic signal is stronger at low taxonomic levels, with certain tribes standing out as hotspots of autocorrelation between genome size and phylogeny. Finally, we also observe meaningful associations among nuclear DNA content on Asteraceae species and other phenotypical and ecological traits (i.e. plant habit and invasion ability). Overall, this study emphasizes the need to continue generating and analysing genome size data in order to puzzle out the evolution of this parameter and its many biological correlates.",GSAD,0.995496213,Genome Size in Asteraceae Database,0.987456696,GSAD,0.995496213,1,NA,24288377,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,1/1/2019 +24288377,"http://data.kew.org/cvalues, http://www.asteraceaegenomesize.com","Recent updates and developments to plant genome size databases. Two plant genome size databases have been recently updated and/or extended: the Plant DNA C-values database (http://data.kew.org/cvalues), and GSAD, the Genome Size in Asteraceae database (http://www.asteraceaegenomesize.com). While the first provides information on nuclear DNA contents across land plants and some algal groups, the second is focused on one of the largest and most economically important angiosperm families, Asteraceae. Genome size data have numerous applications: they can be used in comparative studies on genome evolution, or as a tool to appraise the cost of whole-genome sequencing programs. The growing interest in genome size and increasing rate of data accumulation has necessitated the continued update of these databases. Currently, the Plant DNA C-values database (Release 6.0, Dec. 2012) contains data for 8510 species, while GSAD has 1219 species (Release 2.0, June 2013), representing increases of 17 and 51%, respectively, in the number of species with genome size data, compared with previous releases. Here we provide overviews of the most recent releases of each database, and outline new features of GSAD. The latter include (i) a tool to visually compare genome size data between species, (ii) the option to export data and (iii) a webpage containing information about flow cytometry protocols.",GSAD,0.932329893,NA,0,GSAD,0.932329893,1,NA,31608375,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: URL scramble,NA,NA,11/27/2013 +32758136,http://sysbio.rnet.missouri.edu/3dgenome/GSDB,"GSDB: a database of 3D chromosome and genome structures reconstructed from Hi-C data. Advances in the study of chromosome conformation capture technologies, such as Hi-C technique - capable of capturing chromosomal interactions in a genome-wide scale - have led to the development of three-dimensional chromosome and genome structure reconstruction methods from Hi-C data. The three dimensional genome structure is important because it plays a role in a variety of important biological activities such as DNA replication, gene regulation, genome interaction, and gene expression. In recent years, numerous Hi-C datasets have been generated, and likewise, a number of genome structure construction algorithms have been developed.In this work, we outline the construction of a novel Genome Structure Database (GSDB) to create a comprehensive repository that contains 3D structures for Hi-C datasets constructed by a variety of 3D structure reconstruction tools. The GSDB contains over 50,000 structures from 12 state-of-the-art Hi-C data structure prediction algorithms for 32 Hi-C datasets.GSDB functions as a centralized collection of genome structures which will enable the exploration of the dynamic architectures of chromosomes and genomes for biomedical research. GSDB is accessible at http://sysbio.rnet.missouri.edu/3dgenome/GSDB.",GSDB,0.924460858,Genome Structure Database,0.767952025,GSDB,0.924460858,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/5/2020 +23193275,http://www.ncbi.nlm.nih.gov/gtr,"The NIH genetic testing registry: a new, centralized database of genetic tests to enable access to comprehensive information and improve transparency. The National Institutes of Health Genetic Testing Registry (GTR; available online at http://www.ncbi.nlm.nih.gov/gtr/) maintains comprehensive information about testing offered worldwide for disorders with a genetic basis. Information is voluntarily submitted by test providers. The database provides details of each test (e.g. its purpose, target populations, methods, what it measures, analytical validity, clinical validity, clinical utility, ordering information) and laboratory (e.g. location, contact information, certifications and licenses). Each test is assigned a stable identifier of the format GTR000000000, which is versioned when the submitter updates information. Data submitted by test providers are integrated with basic information maintained in National Center for Biotechnology Information's databases and presented on the web and through FTP (ftp.ncbi.nih.gov/pub/GTR/_README.html).",GTR,0.947066327,Testing Registry,0.783265024,GTR,0.947066327,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/27/2012 +"30445619, 33231677",http://gtrd.biouml.org,"GTRD: a database on gene transcription regulation-2019 update. The current version of the Gene Transcription Regulation Database (GTRD; http://gtrd.biouml.org) contains information about: (i) transcription factor binding sites (TFBSs) and transcription coactivators identified by ChIP-seq experiments for Homo sapiens, Mus musculus, Rattus norvegicus, Danio rerio, Caenorhabditis elegans, Drosophila melanogaster, Saccharomyces cerevisiae, Schizosaccharomyces pombe and Arabidopsis thaliana; (ii) regions of open chromatin and TFBSs (DNase footprints) identified by DNase-seq; (iii) unmappable regions where TFBSs cannot be identified due to repeats; (iv) potential TFBSs for both human and mouse using position weight matrices from the HOCOMOCO database. Raw ChIP-seq and DNase-seq data were obtained from ENCODE and SRA, and uniformly processed. ChIP-seq peaks were called using four different methods: MACS, SISSRs, GEM and PICS. Moreover, peaks for the same factor and peak calling method, albeit using different experiment conditions (cell line, treatment, etc.), were merged into clusters. To reduce noise, such clusters for different peak calling methods were merged into meta-clusters; these were considered to be non-redundant TFBS sets. Moreover, extended quality control was applied to all ChIP-seq data. Web interface to access GTRD was developed using the BioUML platform. It provides browsing and displaying information, advanced search possibilities and an integrated genome browser.",GTRD,0.998101741,Gene Transcription Regulation Database,0.985109007,GTRD,0.998101741,2,NA,27924024,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +27924024,http://gtrd.biouml.org)-is,"GTRD: a database of transcription factor binding sites identified by ChIP-seq experiments. GTRD-Gene Transcription Regulation Database (http://gtrd.biouml.org)-is a database of transcription factor binding sites (TFBSs) identified by ChIP-seq experiments for human and mouse. Raw ChIP-seq data were obtained from ENCODE and SRA and uniformly processed: (i) reads were aligned using Bowtie2; (ii) ChIP-seq peaks were called using peak callers MACS, SISSRs, GEM and PICS; (iii) peaks for the same factor and peak callers, but different experiment conditions (cell line, treatment, etc.), were merged into clusters; (iv) such clusters for different peak callers were merged into metaclusters that were considered as non-redundant sets of TFBSs. In addition to information on location in genome, the sets contain structured information about cell lines and experimental conditions extracted from descriptions of corresponding ChIP-seq experiments. A web interface to access GTRD was developed using the BioUML platform. It provides: (i) browsing and displaying information; (ii) advanced search possibilities, e.g. search of TFBSs near the specified gene or search of all genes potentially regulated by a specified transcription factor; (iii) integrated genome browser that provides visualization of the GTRD data: read alignments, peaks, clusters, metaclusters and information about gene structures from the Ensembl database and binding sites predicted using position weight matrices from the HOCOMOCO database.",GTRD,0.998025298,Transcription Regulation Database,0.78921698,GTRD,0.998025298,1,NA,"30445619.0, 33231677.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/24/2016 +26673694,http://gtrnadb.ucsc.edu,"GtRNAdb 2.0: an expanded database of transfer RNA genes identified in complete and draft genomes. Transfer RNAs represent the largest, most ubiquitous class of non-protein coding RNA genes found in all living organisms. The tRNAscan-SE search tool has become the de facto standard for annotating tRNA genes in genomes, and the Genomic tRNA Database (GtRNAdb) was created as a portal for interactive exploration of these gene predictions. Since its published description in 2009, the GtRNAdb has steadily grown in content, and remains the most commonly cited web-based source of tRNA gene information. In this update, we describe not only a major increase in the number of tRNA predictions (>367000) and genomes analyzed (>4370), but more importantly, the integration of new analytic and functional data to improve the quality and biological context of tRNA gene predictions. New information drawn from other sources includes tRNA modification data, epigenetic data, single nucleotide polymorphisms, gene expression and evolutionary conservation. A richer set of analytic data is also presented, including better tRNA functional prediction, non-canonical features, predicted structural impacts from sequence variants and minimum free energy structural predictions. Views of tRNA genes in genomic context are provided via direct links to the UCSC genome browsers. The database can be searched by sequence or gene features, and is available at http://gtrnadb.ucsc.edu/.",GtRNAdb,0.991023004,Genomic,0.679828405,GtRNAdb,0.991023004,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/15/2015 +24428872,http://www.GuavaH.org,"GuavaH: a compendium of host genomic data in HIV biology and disease. Background There is an ever-increasing volume of data on host genes that are modulated during HIV infection, influence disease susceptibility or carry genetic variants that impact HIV infection. We created GuavaH (Genomic Utility for Association and Viral Analyses in HIV, http://www.GuavaH.org), a public resource that supports multipurpose analysis of genome-wide genetic variation and gene expression profile across multiple phenotypes relevant to HIV biology. Findings We included original data from 8 genome and transcriptome studies addressing viral and host responses in and ex vivo. These studies cover phenotypes such as HIV acquisition, plasma viral load, disease progression, viral replication cycle, latency and viral-host genome interaction. This represents genome-wide association data from more than 4,000 individuals, exome sequencing data from 392 individuals, in vivo transcriptome microarray data from 127 patients/conditions, and 60 sets of RNA-seq data. Additionally, GuavaH allows visualization of protein variation in ~8,000 individuals from the general population. The publicly available GuavaH framework supports queries on (i) unique single nucleotide polymorphism across different HIV related phenotypes, (ii) gene structure and variation, (iii) in vivo gene expression in the setting of human infection (CD4+ T cells), and (iv) in vitro gene expression data in models of permissive infection, latency and reactivation. Conclusions The complexity of the analysis of host genetic influences on HIV biology and pathogenesis calls for comprehensive motors of research on curated data. The tool developed here allows queries and supports validation of the rapidly growing body of host genomic information pertinent to HIV research.",GuavaH,0.995271742,NA,0,GuavaH,0.995271742,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/15/2014 +31509535,http://hive.biochemistry.gwu.edu/gfkb,"Baseline human gut microbiota profile in healthy people and standard reporting template. A comprehensive knowledge of the types and ratios of microbes that inhabit the healthy human gut is necessary before any kind of pre-clinical or clinical study can be performed that attempts to alter the microbiome to treat a condition or improve therapy outcome. To address this need we present an innovative scalable comprehensive analysis workflow, a healthy human reference microbiome list and abundance profile (GutFeelingKB), and a novel Fecal Biome Population Report (FecalBiome) with clinical applicability. GutFeelingKB provides a list of 157 organisms (8 phyla, 18 classes, 23 orders, 38 families, 59 genera and 109 species) that forms the baseline biome and therefore can be used as healthy controls for studies related to dysbiosis. This list can be expanded to 863 organisms if closely related proteomes are considered. The incorporation of microbiome science into routine clinical practice necessitates a standard report for comparison of an individual's microbiome to the growing knowledgebase of ""normal"" microbiome data. The FecalBiome and the underlying technology of GutFeelingKB address this need. The knowledgebase can be useful to regulatory agencies for the assessment of fecal transplant and other microbiome products, as it contains a list of organisms from healthy individuals. In addition to the list of organisms and their abundances, this study also generated a collection of assembled contiguous sequences (contigs) of metagenomics dark matter. In this study, metagenomic dark matter represents sequences that cannot be mapped to any known sequence but can be assembled into contigs of 10,000 nucleotides or higher. These sequences can be used to create primers to study potential novel organisms. All data is freely available from https://hive.biochemistry.gwu.edu/gfkb and NCBI's Short Read Archive.",GutFeelingKB,0.991220474,NA,0,GutFeelingKB,0.991220474,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/11/2019 +31584099,http://bio-annotation.cn/gutMDisorder,"gutMDisorder: a comprehensive database for dysbiosis of the gut microbiota in disorders and interventions. gutMDisorder (http://bio-annotation.cn/gutMDisorder), a manually curated database, aims at providing a comprehensive resource of dysbiosis of the gut microbiota in disorders and interventions. Alterations in the composition of the gut microbial community play crucial roles in the development of chronic disorders. And the beneficial effects of drugs, foods and other intervention measures on disorders could be microbially mediated. The current version of gutMDisorder documents 2263 curated associations between 579 gut microbes and 123 disorders or 77 intervention measures in Human, and 930 curated associations between 273 gut microbes and 33 disorders or 151 intervention measures in Mouse. Each entry in the gutMDisorder contains detailed information on an association, including an intestinal microbe, a disorder name, intervention measures, experimental technology and platform, characteristic of samples, web sites for downloading the sequencing data, a brief description of the association, a literature reference, and so on. gutMDisorder provides a user-friendly interface to browse, retrieve each entry using gut microbes, disorders, and intervention measures. It also offers pages for downloading all the entries and submitting new experimentally validated associations.",gutMDisorder,0.997505665,NA,0,gutMDisorder,0.997505665,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +32496513,http://gutmega.omicsbio.info,"gutMEGA: a database of the human gut MEtaGenome Atlas. . The gut microbiota plays important roles in human health through regulating both physiological homeostasis and disease emergence. The accumulation of metagenomic sequencing studies enables us to better understand the temporal and spatial variations of the gut microbiota under different physiological and pathological conditions. However, it is inconvenient for scientists to query and retrieve published data; thus, a comprehensive resource for the quantitative gut metagenome is urgently needed. In this study, we developed gut MEtaGenome Atlas (gutMEGA), a well-annotated comprehensive database, to curate and host published quantitative gut microbiota datasets from Homo sapiens. By carefully curating the gut microbiota composition, phenotypes and experimental information, gutMEGA finally integrated 59 132 quantification events for 6457 taxa at seven different levels (kingdom, phylum, class, order, family, genus and species) under 776 conditions. Moreover, with various browsing and search functions, gutMEGA provides a fast and simple way for users to obtain the relative abundances of intestinal microbes among phenotypes. Overall, gutMEGA is a convenient and comprehensive resource for gut metagenome research, which can be freely accessed at http://gutmega.omicsbio.info.",gutMEGA,0.989511013,gut MEtaGenome Atlas,0.956748178,gutMEGA,0.989511013,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +"29069473, 33170268",http://bigd.big.ac.cn/gvm,"Genome Variation Map: a data repository of genome variations in BIG Data Center. The Genome Variation Map (GVM; http://bigd.big.ac.cn/gvm/) is a public data repository of genome variations. As a core resource in the BIG Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences, GVM dedicates to collect, integrate and visualize genome variations for a wide range of species, accepts submissions of different types of genome variations from all over the world and provides free open access to all publicly available data in support of worldwide research activities. Unlike existing related databases, GVM features integration of a large number of genome variations for a broad diversity of species including human, cultivated plants and domesticated animals. Specifically, the current implementation of GVM not only houses a total of ∼4.9 billion variants for 19 species including chicken, dog, goat, human, poplar, rice and tomato, but also incorporates 8669 individual genotypes and 13 262 manually curated high-quality genotype-to-phenotype associations for non-human species. In addition, GVM provides friendly intuitive web interfaces for data submission, browse, search and visualization. Collectively, GVM serves as an important resource for archiving genomic variation data, helpful for better understanding population genetic diversity and deciphering complex mechanisms associated with different phenotypes.",GVM,0.997632504,Genome Variation Map,0.979110241,GVM,0.997632504,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +30445434,http://www.ebi.ac.uk/gwas,"The NHGRI-EBI GWAS Catalog of published genome-wide association studies, targeted arrays and summary statistics 2019. The GWAS Catalog delivers a high-quality curated collection of all published genome-wide association studies enabling investigations to identify causal variants, understand disease mechanisms, and establish targets for novel therapies. The scope of the Catalog has also expanded to targeted and exome arrays with 1000 new associations added for these technologies. As of September 2018, the Catalog contains 5687 GWAS comprising 71673 variant-trait associations from 3567 publications. New content includes 284 full P-value summary statistics datasets for genome-wide and new targeted array studies, representing 6 × 109 individual variant-trait statistics. In the last 12 months, the Catalog's user interface was accessed by ∼90000 unique users who viewed >1 million pages. We have improved data access with the release of a new RESTful API to support high-throughput programmatic access, an improved web interface and a new summary statistics database. Summary statistics provision is supported by a new format proposed as a community standard for summary statistics data representation. This format was derived from our experience in standardizing heterogeneous submissions, mapping formats and in harmonizing content. Availability: https://www.ebi.ac.uk/gwas/.",GWAS,0.68312782,NA,0,GWAS,0.68312782,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +31566222,http://bigd.big.ac.cn/gwas,"GWAS Atlas: a curated resource of genome-wide variant-trait associations in plants and animals. GWAS Atlas (https://bigd.big.ac.cn/gwas/) is a manually curated resource of genome-wide variant-trait associations for a wide range of species. Unlike existing related resources, it features comprehensive integration of a high-quality collection of 75 467 variant-trait associations for 614 traits across 7 cultivated plants (cotton, Japanese apricot, maize, rapeseed, rice, sorghum and soybean) and two domesticated animals (goat and pig), which were manually curated from 254 publications. We integrated these associations into GWAS Atlas and presented them in terms of variants, genes, traits, studies and publications. More importantly, all associations and traits were annotated and organized based on a suite of ontologies (Plant Trait Ontology, Animal Trait Ontology for Livestock, etc.). Taken together, GWAS Atlas integrates high-quality curated GWAS associations for animals and plants and provides user-friendly web interfaces for data browsing and downloading, accordingly serving as a valuable resource for genetic research of important traits and breeding application.",GWAS Atlas,0.990907868,NA,0,GWAS Atlas,0.990907868,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +"24301061, 31612961",http://www.gwascentral.org,"GWAS Central: a comprehensive resource for the comparison and interrogation of genome-wide association studies. To facilitate broad and convenient integrative visualization of and access to GWAS data, we have created the GWAS Central resource (http://www.gwascentral.org). This database seeks to provide a comprehensive collection of summary-level genetic association data, structured both for maximal utility and for safe open access (i.e., non-directional signals to fully preclude research subject identification). The resource emphasizes on advanced tools that allow comparison and discovery of relevant data sets from the perspective of genes, genome regions, phenotypes or traits. Tested markers and relevant genomic features can be visually interrogated across up to 16 multiple association data sets in a single view, starting at a chromosome-wide view and increasing in resolution down to individual bases. In addition, users can privately upload and view their own data as temporary files. Search and display utility is further enhanced by exploiting phenotype ontology annotations to allow genetic variants associated with phenotypes and traits of interest to be precisely identified, across all studies. Data submissions are accepted from individual researchers, groups and consortia, whereas we also actively gather data sets from various public sources. As a result, the resource now provides over 67 million P-values for over 1600 studies, making it the world's largest openly accessible online collection of summary-level GWAS association information.",GWAS Central,0.977628668,NA,0,GWAS Central,0.977628668,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +"22139925, 26615194",http://jjwanglab.org/gwasdb,"GWASdb: a database for human genetic variants identified by genome-wide association studies. Recent advances in genome-wide association studies (GWAS) have enabled us to identify thousands of genetic variants (GVs) that are associated with human diseases. As next-generation sequencing technologies become less expensive, more GVs will be discovered in the near future. Existing databases, such as NHGRI GWAS Catalog, collect GVs with only genome-wide level significance. However, many true disease susceptibility loci have relatively moderate P values and are not included in these databases. We have developed GWASdb that contains 20 times more data than the GWAS Catalog and includes less significant GVs (P < 1.0 × 10(-3)) manually curated from the literature. In addition, GWASdb provides comprehensive functional annotations for each GV, including genomic mapping information, regulatory effects (transcription factor binding sites, microRNA target sites and splicing sites), amino acid substitutions, evolution, gene expression and disease associations. Furthermore, GWASdb classifies these GVs according to diseases using Disease-Ontology Lite and Human Phenotype Ontology. It can conduct pathway enrichment and PPI network association analysis for these diseases. GWASdb provides an intuitive, multifunctional database for biologists and clinicians to explore GVs and their functional inferences. It is freely available at http://jjwanglab.org/gwasdb and will be updated frequently.",GWASdb,0.991770446,NA,0,GWASdb,0.991770446,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2015 +34175476,"http://ngdc.cncb.ac.cn, http://ngdc.cncb.ac.cn/gwh","Genome Warehouse: A Public Repository Housing Genome-scale Data. The Genome Warehouse (GWH) is a public repository housing genome assembly data for a wide range of species and delivering a series of web services for genome data submission, storage, release, and sharing. As one of the core resources in the National Genomics Data Center (NGDC), part of the China National Center for Bioinformation (CNCB; https://ngdc.cncb.ac.cn), GWH accepts both full and partial (chloroplast, mitochondrion, and plasmid) genome sequences with different assembly levels, as well as an update of existing genome assemblies. For each assembly, GWH collects detailed genome-related metadata of biological project, biological sample, and genome assembly, in addition to genome sequence and annotation. To archive high-quality genome sequences and annotations, GWH is equipped with a uniform and standardized procedure for quality control. Besides basic browse and search functionalities, all released genome sequences and annotations can be visualized with JBrowse. By May 21, 2021, GWH has received 19,124 direct submissions covering a diversity of 1108 species and has released 8772 of them. Collectively, GWH serves as an important resource for genome-scale data management and provides free and publicly accessible data to support research activities throughout the world. GWH is publicly accessible at https://ngdc.cncb.ac.cn/gwh.",GWH,0.995265027,Genome Warehouse,0.928986808,GWH,0.995265027,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/24/2021 +31231515,http://vri1.gxbsidra.org/dm3/geneBrowser/list,"A curated transcriptome dataset collection to investigate the blood transcriptional response to viral respiratory tract infection and vaccination. The human immune defense mechanisms and factors associated with good versus poor health outcomes following viral respiratory tract infections (VRTI), as well as correlates of protection following vaccination against respiratory viruses, remain incompletely understood. To shed further light into these mechanisms, a number of systems-scale studies have been conducted to measure transcriptional changes in blood leukocytes of either naturally or experimentally infected individuals, or in individual's post-vaccination. Here we are making available a public repository, for research investigators for interpretation, a collection of transcriptome datasets obtained from human whole blood and peripheral blood mononuclear cells (PBMC) to investigate the transcriptional responses following viral respiratory tract infection or vaccination against respiratory viruses. In total, Thirty one31 datasets, associated to viral respiratory tract infections and their related vaccination studies, were identified and retrieved from the NCBI Gene Expression Omnibus (GEO) and loaded in a custom web application designed for interactive query and visualization of integrated large-scale data. Quality control checks, using relevant biological markers, were performed. Multiple sample groupings and rank lists were created to facilitate dataset query and interpretation. Via this interface, users can generate web links to customized graphical views, which may be subsequently inserted into manuscripts to report novel findings. The GXB tool enables browsing of a single gene across projects, providing new perspectives on the role of a given molecule across biological systems in the diagnostic and prognostic following VRTI but also in identifying new correlates of protection. This dataset collection is available at: http://vri1.gxbsidra.org/dm3/geneBrowser/list.",GXB,0.976282716,NA,0,GXB,0.976282716,1,NA,"29527288.0, 31290545.0, 31559014.0, 27158451.0, 27158452.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,3/13/2019 +29527288,http://breastcancer.gxbsidra.org/dm3/geneBrowser/list,"A collection of annotated and harmonized human breast cancer transcriptome datasets, including immunologic classification. The increased application of high-throughput approaches in translational research has expanded the number of publicly available data repositories. Gathering additional valuable information contained in the datasets represents a crucial opportunity in the biomedical field. To facilitate and stimulate utilization of these datasets, we have recently developed an interactive data browsing and visualization web application, the Gene Expression Browser (GXB). In this note, we describe a curated compendium of 13 public datasets on human breast cancer, representing a total of 2142 transcriptome profiles. We classified the samples according to different immune based classification systems and integrated this information into the datasets. Annotated and harmonized datasets were uploaded to GXB. Study samples were categorized in different groups based on their immunologic tumor response profiles, intrinsic molecular subtypes and multiple clinical parameters. Ranked gene lists were generated based on relevant group comparisons. In this data note, we demonstrate the utility of GXB to evaluate the expression of a gene of interest, find differential gene expression between groups and investigate potential associations between variables with a specific focus on immunologic classification in breast cancer. This interactive resource is publicly available online at: http://breastcancer.gxbsidra.org/dm3/geneBrowser/list.",GXB,0.973645926,NA,0,GXB,0.973645926,1,NA,"31231515.0, 31290545.0, 31559014.0, 27158451.0, 27158452.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,3/20/2017 +"27158451, 27158452",http://monocyte.gxbsidra.org/dm3/landing.gsp,"A compendium of monocyte transcriptome datasets to foster biomedical knowledge discovery. Systems-scale profiling approaches have become widely used in translational research settings. The resulting accumulation of large-scale datasets in public repositories represents a critical opportunity to promote insight and foster knowledge discovery. However, resources that can serve as an interface between biomedical researchers and such vast and heterogeneous dataset collections are needed in order to fulfill this potential. Recently, we have developed an interactive data browsing and visualization web application, the Gene Expression Browser (GXB). This tool can be used to overlay deep molecular phenotyping data with rich contextual information about analytes, samples and studies along with ancillary clinical or immunological profiling data. In this note, we describe a curated compendium of 93 public datasets generated in the context of human monocyte immunological studies, representing a total of 4,516 transcriptome profiles. Datasets were uploaded to an instance of GXB along with study description and sample annotations. Study samples were arranged in different groups. Ranked gene lists were generated based on relevant group comparisons. This resource is publicly available online at http://monocyte.gxbsidra.org/dm3/landing.gsp.",GXB,0.902687609,NA,0,GXB,0.902687609,2,NA,"29527288.0, 31231515.0, 31290545.0, 31559014.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,4/25/2016 +31559014,http://pid.gxbsidra.org/dm3/geneBrowser/list,"A curated transcriptome dataset collection to investigate inborn errors of immunity. Primary immunodeficiencies (PIDs) are a heterogeneous group of inherited disorders, frequently caused by loss-of-function and less commonly by gain-of-function mutations, which can result in susceptibility to a broad or a very narrow range of infections but also in inflammatory, allergic or malignant diseases. Owing to the wide range in clinical manifestations and variability in penetrance and expressivity, there is an urgent need to better understand the underlying molecular, cellular and immunological phenotypes in PID patients in order to improve clinical diagnosis and management. Here we have compiled a manually curated collection of public transcriptome datasets mainly obtained from human whole blood, peripheral blood mononuclear cells (PBMCs) or fibroblasts of patients with PIDs and of control subjects for subsequent meta-analysis, query and interpretation. A total of eighteen (18) datasets derived from studies of PID patients were identified and retrieved from the NCBI Gene Expression Omnibus (GEO) database and loaded in GXB, a custom web application designed for interactive query and visualization of integrated large-scale data. The dataset collection includes samples from well characterized PID patients that were stimulated ex vivo under a variety of conditions to assess the molecular consequences of the underlying, naturally occurring gene defects on a genome-wide scale. Multiple sample groupings and rank lists were generated to facilitate comparisons of the transcriptional responses between different PID patients and control subjects. The GXB tool enables browsing of a single transcript across studies, thereby providing new perspectives on the role of a given molecule across biological systems and PID patients. This dataset collection is available at http://pid.gxbsidra.org/dm3/geneBrowser/list.",GXB,0.899784207,NA,0,GXB,0.899784207,1,NA,"29527288.0, 31231515.0, 31290545.0, 27158451.0, 27158452.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,2/15/2019 +31290545,http://ige.gxbsidra.org/dm3/geneBrowser/list,"A curated collection of transcriptome datasets to investigate the molecular mechanisms of immunoglobulin E-mediated atopic diseases. . Prevalence of allergies has reached ~20% of population in developed countries and sensitization rate to one or more allergens among school age children are approaching 50%. However, the combination of the complexity of atopic allergy susceptibility/development and environmental factors has made identification of gene biomarkers challenging. The amount of publicly accessible transcriptomic data presents an unprecedented opportunity for mechanistic discoveries and validation of complex disease signatures across studies. However, this necessitates structured methodologies and visual tools for the interpretation of results. Here, we present a curated collection of transcriptomic datasets relevant to immunoglobin E-mediated atopic diseases (ranging from allergies to primary immunodeficiencies). Thirty-three datasets from the Gene Expression Omnibus, encompassing 1860 transcriptome profiles, were made available on the Gene Expression Browser (GXB), an online and open-source web application that allows for the query, visualization and annotation of metadata. The thematic compositions, disease categories, sample number and platforms of the collection are described. Ranked gene lists and sample grouping are used to facilitate data visualization/interpretation and are available online via GXB (http://ige.gxbsidra.org/dm3/geneBrowser/list). Dataset validation using associated publications showed good concordance in GXB gene expression trend and fold-change.",GXB,0.651388884,NA,0,GXB,0.651388884,1,NA,"29527288.0, 31231515.0, 31559014.0, 27158451.0, 27158452.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2019 +24163257,http://www.informatics.jax.org/expression.shtml,"The mouse Gene Expression Database (GXD): 2014 update. The Gene Expression Database (GXD; http://www.informatics.jax.org/expression.shtml) is an extensive and well-curated community resource of mouse developmental expression information. GXD collects different types of expression data from studies of wild-type and mutant mice, covering all developmental stages and including data from RNA in situ hybridization, immunohistochemistry, RT-PCR, northern blot and western blot experiments. The data are acquired from the scientific literature and from researchers, including groups doing large-scale expression studies. Integration with the other data in Mouse Genome Informatics (MGI) and interconnections with other databases places GXD's gene expression information in the larger biological and biomedical context. Since the last report, the utility of GXD has been greatly enhanced by the addition of new data and by the implementation of more powerful and versatile search and display features. Web interface enhancements include the capability to search for expression data for genes associated with specific phenotypes and/or human diseases; new, more interactive data summaries; easy downloading of data; direct searches of expression images via associated metadata; and new displays that combine image data and their associated annotations. At present, GXD includes >1.4 million expression results and 250,000 images that are accessible to our search tools.",GXD,0.998189569,Expression Database,0.871181101,GXD,0.998189569,1,NA,33104772,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/25/2013 +33104772,"http://www.ebi.ac.uk/arrayexpress/, http://www.ncbi.nlm.nih.gov/geo","The mouse Gene Expression Database (GXD): 2021 update. The Gene Expression Database (GXD; www.informatics.jax.org/expression.shtml) is an extensive and well-curated community resource of mouse developmental gene expression information. For many years, GXD has collected and integrated data from RNA in situ hybridization, immunohistochemistry, RT-PCR, northern blot, and western blot experiments through curation of the scientific literature and by collaborations with large-scale expression projects. Since our last report in 2019, we have continued to acquire these classical types of expression data; developed a searchable index of RNA-Seq and microarray experiments that allows users to quickly and reliably find specific mouse expression studies in ArrayExpress (https://www.ebi.ac.uk/arrayexpress/) and GEO (https://www.ncbi.nlm.nih.gov/geo/); and expanded GXD to include RNA-Seq data. Uniformly processed RNA-Seq data are imported from the EBI Expression Atlas and then integrated with the other types of expression data in GXD, and with the genetic, functional, phenotypic and disease-related information in Mouse Genome Informatics (MGI). This integration has made the RNA-Seq data accessible via GXD's enhanced searching and filtering capabilities. Further, we have embedded the Morpheus heat map utility into the GXD user interface to provide additional tools for display and analysis of RNA-Seq data, including heat map visualization, sorting, filtering, hierarchical clustering, nearest neighbors analysis and visual enrichment.",GXD,0.997449532,The mouse Gene Expression Database,0.87091248,GXD,0.997449532,1,NA,24163257,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +22016855,http://gydb.org/index.php/Mobilomics,"GyDB mobilomics: LTR retroelements and integrase-related transposons of the pea aphid Acyrthosiphon pisum genome. The Gypsy Database concerning Mobile Genetic Elements (release 2.0) is a wiki-style project devoted to the phylogenetic classification of LTR retroelements and their viral and host gene relatives characterized from distinct organisms. Furthermore, GyDB 2.0 is concerned with studying mobile elements within genomes. Therefore, an in-progress repository was created for databases with annotations of mobile genetic elements from particular genomes. This repository is called Mobilomics and the first uploaded database contains 549 LTR retroelements and related transposases which have been annotated from the genome of the Pea aphid Acyrthosiphon pisum. Mobilomics is accessible from the GyDB 2.0 project using the URL: http://gydb.org/index.php/Mobilomics.",GyDB,0.945335805,Gypsy Database concerning Mobile Genetic,0.892279019,GyDB,0.945335805,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2011 +23197657,http://www.h-invitational.jp,"H-InvDB in 2013: an omics study platform for human functional gene and transcript discovery. H-InvDB (http://www.h-invitational.jp/) is a comprehensive human gene database started in 2004. In the latest version, H-InvDB 8.0, a total of 244 709 human complementary DNA was mapped onto the hg19 reference genome and 43 829 gene loci, including nonprotein-coding ones, were identified. Of these loci, 35 631 were identified as potential protein-coding genes, and 22 898 of these were identical to known genes. In our analysis, 19 309 annotated genes were specific to H-InvDB and not found in RefSeq and Ensembl. In fact, 233 genes of the 19 309 turned out to have protein functions in this version of H-InvDB; they were annotated as unknown protein functions in the previous version. Furthermore, 11 genes were identified as known Mendelian disorder genes. It is advantageous that many biologically functional genes are hidden in the H-InvDB unique genes. As large-scale proteomic projects have been conducted to elucidate the functions of all human proteins, we have enhanced the proteomic information with an advanced protein view and new subdatabase of protein complexes (Protein Complex Database with quality index). We propose that H-InvDB is an important resource for finding novel candidate targets for medical care and drug development.",H-InvDB,0.99724789,NA,0,H-InvDB,0.99724789,1,NA,23245335,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/28/2012 +23245335,"http://hinv.jp/, http://hinv.jp/hinv/h-epd","Full-length transcriptome-based H-InvDB throws a new light on chromosome-centric proteomics. H-Invitational Database (H-InvDB; http://hinv.jp/ ) is an integrated database of all human genes and transcripts that started in an international collaborative research project for establishing a functional annotation database of human full-length cDNAs. Because H-InvDB contains an abundance of information for human transcripts, including not only well-characterized protein-coding transcripts but also those without experimental evidence at the protein level, this will be a useful information resource for identifying novel and uncharacterized human proteins (so-called missing proteins). By extending predicted protein data in H-InvDB, we developed the H-Inv Extended Protein Database (H-EPD; http://hinv.jp/hinv/h-epd/ ). From now on, we plan to carry out a database-driven proteome research that makes full use of H-EPD to promote discoveries in the current and future C-HPP. Furthermore, we will push forward with the integration of genome, transcriptome, and proteome databases using a unique tool for connecting distributed databases and would like to develop a knowledge discovery system by incorporating data mining tools.",H-InvDB,0.994180217,H-Invitational Database,0.770428264,H-InvDB,0.994180217,1,NA,23197657,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/17/2012 +23193255,http://tga.nig.ac.jp/h2db,"H2DB: a heritability database across multiple species by annotating trait-associated genomic loci. H2DB (http://tga.nig.ac.jp/h2db/), an annotation database of genetic heritability estimates for humans and other species, has been developed as a knowledge database to connect trait-associated genomic loci. Heritability estimates have been investigated for individual species, particularly in human twin studies and plant/animal breeding studies. However, there appears to be no comprehensive heritability database for both humans and other species. Here, we introduce an annotation database for genetic heritabilities of various species that was annotated by manually curating online public resources in PUBMED abstracts and journal contents. The proposed heritability database contains attribute information for trait descriptions, experimental conditions, trait-associated genomic loci and broad- and narrow-sense heritability specifications. Annotated trait-associated genomic loci, for which most are single-nucleotide polymorphisms derived from genome-wide association studies, may be valuable resources for experimental scientists. In addition, we assigned phenotype ontologies to the annotated traits for the purposes of discussing heritability distributions based on phenotypic classifications.",H2DB,0.996937255,NA,0,H2DB,0.996937255,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +33413085,http://www.zhounan.org/h2v,"H2V: a database of human genes and proteins that respond to SARS-CoV-2, SARS-CoV, and MERS-CoV infection. Background The ongoing global COVID-19 pandemic is caused by SARS-CoV-2, a novel coronavirus first discovered at the end of 2019. It has led to more than 50 million confirmed cases and more than 1 million deaths across 219 countries as of 11 November 2020, according to WHO statistics. SARS-CoV-2, SARS-CoV, and MERS-CoV are similar. They are highly pathogenic and threaten public health, impair the economy, and inflict long-term impacts on society. No drug or vaccine has been approved as a treatment for these viruses. Efforts to develop antiviral measures have been hampered by the insufficient understanding of how the human body responds to viral infections at the cellular and molecular levels. Results In this study, journal articles and transcriptomic and proteomic data surveying coronavirus infections were collected. Response genes and proteins were then identified by differential analyses comparing gene/protein levels between infected and control samples. Finally, the H2V database was created to contain the human genes and proteins that respond to SARS-CoV-2, SARS-CoV, and MERS-CoV infection. Conclusions H2V provides molecular information about the human response to infection. It can be a powerful tool to discover cellular pathways and processes relevant for viral pathogenesis to identify potential drug targets. It is expected to accelerate the process of antiviral agent development and to inform preparations for potential future coronavirus-related emergencies. The database is available at: http://www.zhounan.org/h2v .",H2V,0.848025173,NA,0,H2V,0.848025173,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/7/2021 +30247654,http://bioinfo.vanderbilt.edu/AE/HACER,"HACER: an atlas of human active enhancers to interpret regulatory variants. Recent studies have shown that disease-susceptibility variants frequently lie in cell-type-specific enhancer elements. To identify, interpret, and prioritize such risk variants, we must identify the enhancers active in disease-relevant cell types, their upstream transcription factor (TF) binding, and their downstream target genes. To address this need, we built HACER (http://bioinfo.vanderbilt.edu/AE/HACER/), an atlas of Human ACtive Enhancers to interpret Regulatory variants. The HACER atlas catalogues and annotates in-vivo transcribed cell-type-specific enhancers, as well as placing enhancers within transcriptional regulatory networks by integrating ENCODE TF ChIP-Seq and predicted/validated chromatin interaction data. We demonstrate the utility of HACER in (i) offering a mechanistic hypothesis to explain the association of SNP rs614367 with ER-positive breast cancer risk, (ii) exploring tumor-specific enhancers in selective MYC dysregulation and (iii) prioritizing/annotating non-coding regulatory regions targeting CCND1. HACER provides a valuable resource for studies of GWAS, non-coding variants, and enhancer-mediated regulation.",HACER,0.997927487,NA,0,HACER,0.997927487,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30395284,http://www.haemosphere.org,"Haemopedia RNA-seq: a database of gene expression during haematopoiesis in mice and humans. During haematopoiesis, haematopoietic stem cells differentiate into restricted potential progenitors before maturing into the many lineages required for oxygen transport, wound healing and immune response. We have updated Haemopedia, a database of gene-expression profiles from a broad spectrum of haematopoietic cells, to include RNA-seq gene-expression data from both mice and humans. The Haemopedia RNA-seq data set covers a wide range of lineages and progenitors, with 57 mouse blood cell types (flow sorted populations from healthy mice) and 12 human blood cell types. This data set has been made accessible for exploration and analysis, to researchers and clinicians with limited bioinformatics experience, on our online portal Haemosphere: https://www.haemosphere.org. Haemosphere also includes nine other publicly available high-quality data sets relevant to haematopoiesis. We have added the ability to compare gene expression across data sets and species by curating data sets with shared lineage designations or to view expression gene vs gene, with all plots available for download by the user.",Haemopedia,0.996026993,NA,0,Haemopedia,0.996026993,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +"23193293, 29121237",http://genomics.senescence.info,"Human Ageing Genomic Resources: integrated databases and tools for the biology and genetics of ageing. The Human Ageing Genomic Resources (HAGR, http://genomics.senescence.info) is a freely available online collection of research databases and tools for the biology and genetics of ageing. HAGR features now several databases with high-quality manually curated data: (i) GenAge, a database of genes associated with ageing in humans and model organisms; (ii) AnAge, an extensive collection of longevity records and complementary traits for >4000 vertebrate species; and (iii) GenDR, a newly incorporated database, containing both gene mutations that interfere with dietary restriction-mediated lifespan extension and consistent gene expression changes induced by dietary restriction. Since its creation about 10 years ago, major efforts have been undertaken to maintain the quality of data in HAGR, while further continuing to develop, improve and extend it. This article briefly describes the content of HAGR and details the major updates since its previous publications, in terms of both structure and content. The completely redesigned interface, more intuitive and more integrative of HAGR resources, is also presented. Altogether, we hope that through its improvements, the current version of HAGR will continue to provide users with the most comprehensive and accessible resources available today in the field of biogerontology.",HAGR,0.996995687,Human Ageing Genomic Resources,0.98925361,HAGR,0.996995687,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +33259604,http://www.hahmirdb.in,"HAHmiR.DB: a server platform for high-altitude human miRNA-gene coregulatory networks and associated regulatory circuits. . Around 140 million people live in high-altitude (HA) conditions! and even a larger number visit such places for tourism, adventure-seeking or sports training. Rapid ascent to HA can cause severe damage to the body organs and may lead to many fatal disorders. During induction to HA, human body undergoes various physiological, biochemical, hematological and molecular changes to adapt to the extreme environmental conditions. Several literature references hint that gene-expression-regulation and regulatory molecules like miRNAs and transcription factors (TFs) control adaptive responses during HA stress. These biomolecules are known to interact in a complex combinatorial manner to fine-tune the gene expression and help in controlling the molecular responses during this stress and ultimately help in acclimatization. High-Altitude Human miRNA Database (HAHmiR.DB) is a unique, comprehensive and curated collection of miRNAs that have been experimentally validated to be associated with HA stress, their level of expression in different altitudes, fold change, experiment duration, biomarker association, disease and drug association, tissue-specific expression level, Gene Ontology (GO) and Kyoto Encyclopaedia of Gene and Genomes (KEGG) pathway associations. As a server platform, it also uniquely constructs and analyses interactive miRNA-TF-gene coregulatory networks and extracts regulatory circuits/feed-forward loops (FFLs). These regulatory circuits help to offer mechanistic insights into complex regulatory mechanisms during HA stress. The server can also build these regulatory networks between two and more miRNAs of the database and also identify the regulatory circuits from this network. Hence, HAHmiR.DB is the first-of-its-kind database in HA research, which is a reliable platform to explore, compare, analyse and retrieve miRNAs associated with HA stress, their coregulatory networks and FFL regulatory-circuits. HAHmiR.DB is freely accessible at http://www.hahmirdb.in.",HAHmiR.DB,0.977624993,High-Altitude Human miRNA Database,0.911724165,HAHmiR.DB,0.977624993,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2020 +29796383,http://www.halodom.bio.auth.gr,"HaloDom: a new database of halophiles across all life domains. Background Halophilic organisms may thrive in or tolerate high salt concentrations. They have been studied for decades and a considerable number of papers reporting new halophilic species are being published every year. However, an extensive collection of these salt-loving organisms does not exist nowadays. Halophilic life forms have representatives from all three life domains, Archaea, Bacteria and Eukarya. The purpose of this study was to search for all documented halophilic species in the scientific literature and accommodate this information in the form of an online database. Results We recorded more than 1000 halophilic species from the scientific literature. From these, 21.9% belong to Archaea, 50.1% to Bacteria and 27.9% to Eukaryotes. Our records contain basic information such as the salinity that a particular organism was found, its taxonomy and genomic information via NCBI and other links. The online database named ""HaloDom"" can be accessed at http://www.halodom.bio.auth.gr. Conclusions Over the last few years, data on halophiles are growing fast. Compared to previous efforts, this new halophiles database expands its coverage to all life domains and offers a valuable reference system for studies in biotechnology, early life evolution and comparative genomics.",HaloDom,0.946077734,NA,0,HaloDom,0.946077734,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/15/2018 +22613085,http://haltorf.roucoulab.com,"HAltORF: a database of predicted out-of-frame alternative open reading frames in human. Human alternative open reading frames (HAltORF) is a publicly available and searchable online database referencing putative products of out-of-frame alternative translation initiation (ATI) in human mRNAs. Out-of-frame ATI is a process by which a single mRNA encodes independent proteins, when distinct initiation codons located in different reading frames are recognized by a ribosome to initiate translation. This mechanism is largely used in viruses to increase the coding potential of small viral genomes. There is increasing evidence that out-of-frame ATI is also used in eukaryotes, including human, and may contribute to the diversity of the human proteome. HAltORF is the first web-based searchable database that allows thorough investigation in the human transcriptome of out-of-frame alternative open reading frames with a start codon located in a strong Kozak context, and are thus the more likely to be expressed. It is also the first large scale study on the human transcriptome to successfully predict the expression of out-of-frame ATI protein products that were previously discovered experimentally. HAltORF will be a useful tool for the identification of human genes with multiple coding sequences, and will help to better define and understand the complexity of the human proteome. Database URL: http://haltorf.roucoulab.com/.",HAltORF,0.995966196,Human alternative open reading frames,0.858935988,HAltORF,0.995966196,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/20/2012 +30066211,http://hamdb.scbdd.com,"HAMdb: a database of human autophagy modulators with specific pathway and disease information. Autophagy is an important homeostatic cellular recycling mechanism responsible for degrading unnecessary or dysfunctional cellular organelles and proteins in all living cells. In addition to its vital homeostatic role, this degradation pathway also involves in various human disorders, including metabolic conditions, neurodegenerative diseases, cancers and infectious diseases. Therefore, the comprehensive understanding of autophagy process, autophagy-related modulators and corresponding pathway and disease information will be of great help for identifying the new autophagy modulators, potential drug candidates, new diagnostic and therapeutic targets. In recent years, some autophagy databases providing structural and functional information were developed, but the specific databases covering autophagy modulator (proteins, chemicals and microRNAs)-related target, pathway and disease information do not exist. Hence, we developed an online resource, Human Autophagy Modulator Database (HAMdb, http://hamdb.scbdd.com ), to provide researchers related pathway and disease information as many as possible. HAMdb contains 796 proteins, 841 chemicals and 132 microRNAs. Their specific effects on autophagy, physicochemical information, biological information and disease information were manually collected and compiled. Additionally, lots of external links were available for more information covering extensive biomedical knowledge. HAMdb provides a user-friendly interface to query, search, browse autophagy modulators and their comprehensive related information. HAMdb will help researchers understand the whole autophagy process and provide detailed information about related diseases. Furthermore, it can give hints for the identification of new diagnostic and therapeutic targets and the discovery of new autophagy modulators. In a word, we hope that HAMdb has the potential to promote the autophagy research in pharmacological and pathophysiological area.",HAMdb,0.991403401,Human Autophagy Modulator Database,0.978727545,HAMdb,0.991403401,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/31/2018 +22105744,http://www.chogenome.org,Chinese hamster genome database: an online resource for the CHO community at www.CHOgenome.org. The Chinese hamster genome database (http://www.chogenome.org/) is an online resource for the Chinese hamster (Cricetulus griseus) and Chinese hamster ovary (CHO) cell communities. CHO cells are important for biomedical research and are widely used in industry for the production of biopharmaceuticals. The genome of the CHO-K1 cell line was recently sequenced and the CHO community has developed an online resource to facilitate accessibility of the genomic data and the development of genomic tools.,NA,0,hamster,0.673994482,hamster,0.673994482,1,28605771,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,11/22/2011 +26510927,http://www.handdatabase.org,"The HAND Database: a gateway to understanding the role of HIV in HIV-associated neurocognitive disorders. Background Despite an augmented research effort and scale-up of highly active antiretroviral therapy, a high prevalence of HIV-1-associated neurocognitive disorders (HAND) persists in the HIV-infected population. Nearly 50 % of all HIV-1-infected individuals suffer from a neurocognitive disorder due to neural and synaptodendritic damage. Challenges in HAND research, including limited availability of brain tissue from HIV patients, variation in HAND study protocols, and virus genotyping inconsistency and errors, however, have resulted in studies with insufficient power to delineate molecular mechanisms underlying HAND pathogenesis. There exists, therefore, a great need for a reliable and centralized resource specific to HAND research, particularly for epidemiological study and surveillance in resource-limited countries where severe forms of HAND persist. Description To address the aforementioned imperative need, here we present the HAND Database, a resource containing well-curated and up-to-date HAND virus information and associated clinical and epidemiological data. This database provides information on 5,783 non-redundant HIV-1 sequences from global HAND research published to date, representing a total of 163 unique individuals that have been assessed for HAND. A user-friendly interface allows for flexible searching, filtering, browsing, and downloading of data. The most comprehensive database of its kind, the HAND Database not only bolsters current HAND research by increasing sampling power and reducing study biases caused by protocol variation and genotyping inconsistency, it allows for comparison between HAND studies across different dimensions. Development of the HAND Database has also revealed significant knowledge gaps in HIV-driven neuropathology. These gaps include inadequate sequencing of viral genes beyond env, lack of HAND viral data from HIV epidemiologically important regions including Asian and Sub-Saharan African countries, and biased sampling toward the male gender, all factors that impede efforts toward providing an improved quality of life to HIV-infected individuals, and toward elimination of viruses in the brain. Conclusion Our aim with the HAND database is to provide researchers in both the HIV and neuroscience fields a comprehensive and rigorous data source toward better understanding virus compartmentalization and to help in design of improved strategies against HAND viruses. We also expect this resource, which will be updated on a regular basis, to be useful as a reliable reference for further HAND epidemiology studies. The HAND Database is freely available and accessible online at http://www.handdatabase.org .",HAND,0.570083857,NA,0,HAND,0.570083857,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/28/2015 +22064851,http://compbio.mit.edu/HaploReg,"HaploReg: a resource for exploring chromatin states, conservation, and regulatory motif alterations within sets of genetically linked variants. The resolution of genome-wide association studies (GWAS) is limited by the linkage disequilibrium (LD) structure of the population being studied. Selecting the most likely causal variants within an LD block is relatively straightforward within coding sequence, but is more difficult when all variants are intergenic. Predicting functional non-coding sequence has been recently facilitated by the availability of conservation and epigenomic information. We present HaploReg, a tool for exploring annotations of the non-coding genome among the results of published GWAS or novel sets of variants. Using LD information from the 1000 Genomes Project, linked SNPs and small indels can be visualized along with their predicted chromatin state in nine cell types, conservation across mammals and their effect on regulatory motifs. Sets of SNPs, such as those resulting from GWAS, are analyzed for an enrichment of cell type-specific enhancers. HaploReg will be useful to researchers developing mechanistic hypotheses of the impact of non-coding variants on clinical phenotypes and normal variation. The HaploReg database is available at http://compbio.mit.edu/HaploReg.",HaploReg,0.996389806,NA,0,HaploReg,0.996389806,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/7/2011 +28212602,http://discovery.informatics.uab.edu/HAPPI,"HAPPI-2: a Comprehensive and High-quality Map of Human Annotated and Predicted Protein Interactions. Background Human protein-protein interaction (PPI) data is essential to network and systems biology studies. PPI data can help biochemists hypothesize how proteins form complexes by binding to each other, how extracellular signals propagate through post-translational modification of de-activated signaling molecules, and how chemical reactions are coupled by enzymes involved in a complex biological process. Our capability to develop good public database resources for human PPI data has a direct impact on the quality of future research on genome biology and medicine. Results The database of Human Annotated and Predicted Protein Interactions (HAPPI) version 2.0 is a major update to the original HAPPI 1.0 database. It contains 2,922,202 unique protein-protein interactions (PPI) linked by 23,060 human proteins, making it the most comprehensive database covering human PPI data today. These PPIs contain both physical/direct interactions and high-quality functional/indirect interactions. Compared with the HAPPI 1.0 database release, HAPPI database version 2.0 (HAPPI-2) represents a 485% of human PPI data coverage increase and a 73% protein coverage increase. The revamped HAPPI web portal provides users with a friendly search, curation, and data retrieval interface, allowing them to retrieve human PPIs and available annotation information on the interaction type, interaction quality, interacting partner drug targeting data, and disease information. The updated HAPPI-2 can be freely accessed by Academic users at http://discovery.informatics.uab.edu/HAPPI . Conclusions While the underlying data for HAPPI-2 are integrated from a diverse data sources, the new HAPPI-2 release represents a good balance between data coverage and data quality of human PPIs, making it ideally suited for network biology.",HAPPI,0.973021567,Human Annotated and Predicted Protein Interactions,0.80146156,HAPPI,0.973021567,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/17/2017 +23095476,http://algae.manoa.hawaii.edu/hfwadb,"The Hawaiian Freshwater Algal Database (HfwADB): a laboratory LIMS and online biodiversity resource. Background Biodiversity databases serve the important role of highlighting species-level diversity from defined geographical regions. Databases that are specially designed to accommodate the types of data gathered during regional surveys are valuable in allowing full data access and display to researchers not directly involved with the project, while serving as a Laboratory Information Management System (LIMS). The Hawaiian Freshwater Algal Database, or HfwADB, was modified from the Hawaiian Algal Database to showcase non-marine algal specimens collected from the Hawaiian Archipelago by accommodating the additional level of organization required for samples including multiple species. Description The Hawaiian Freshwater Algal Database is a comprehensive and searchable database containing photographs and micrographs of samples and collection sites, geo-referenced collecting information, taxonomic data and standardized DNA sequence data. All data for individual samples are linked through unique 10-digit accession numbers (""Isolate Accession""), the first five of which correspond to the collection site (""Environmental Accession""). Users can search online for sample information by accession number, various levels of taxonomy, habitat or collection site. HfwADB is hosted at the University of Hawaii, and was made publicly accessible in October 2011. At the present time the database houses data for over 2,825 samples of non-marine algae from 1,786 collection sites from the Hawaiian Archipelago. These samples include cyanobacteria, red and green algae and diatoms, as well as lesser representation from some other algal lineages. Conclusions HfwADB is a digital repository that acts as a Laboratory Information Management System for Hawaiian non-marine algal data. Users can interact with the repository through the web to view relevant habitat data (including geo-referenced collection locations) and download images of collection sites, specimen photographs and micrographs, and DNA sequences. It is publicly available at http://algae.manoa.hawaii.edu/hfwadb/.",HfwADB,0.964678764,Hawaiian Freshwater Algal Database,0.969474773,Hawaiian Freshwater Algal Database,0.969474773,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/25/2012 +31976536,http://hbdb.cmdm.tw,"Human Breathomics Database. . Breathomics is a special branch of metabolomics that quantifies volatile organic compounds (VOCs) from collected exhaled breath samples. Understanding how breath molecules are related to diseases, mechanisms and pathways identified from experimental analytical measurements is challenging due to the lack of an organized resource describing breath molecules, related references and biomedical information embedded in the literature. To provide breath VOCs, related references and biomedical information, we aim to organize a database composed of manually curated information and automatically extracted biomedical information. First, VOCs-related disease information was manually organized from 207 literature linked to 99 VOCs and known Medical Subject Headings (MeSH) terms. Then an automated text mining algorithm was used to extract biomedical information from this literature. In the end, the manually curated information and auto-extracted biomedical information was combined to form a breath molecule database-the Human Breathomics Database (HBDB). We first manually curated and organized disease information including MeSH term from 207 literatures associated with 99 VOCs. Then, an automatic pipeline of text mining approach was used to collect 2766 literatures and extract biomedical information from breath researches. We combined curated information with automatically extracted biomedical information to assemble a breath molecule database, the HBDB. The HBDB is a database that includes references, VOCs and diseases associated with human breathomics. Most of these VOCs were detected in human breath samples or exhaled breath condensate samples. So far, the database contains a total of 913 VOCs in relation to human exhaled breath researches reported in 2766 publications. The HBDB is the most comprehensive HBDB of VOCs in human exhaled breath to date. It is a useful and organized resource for researchers and clinicians to identify and further investigate potential biomarkers from the breath of patients. Database URL: https://hbdb.cmdm.tw.",HBDB,0.696552753,Human Breathomics Database,0.63233763,HBDB,0.696552753,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +34642750,http://bmbl.bmi.osumc.edu/HBFP,"HBFP: a new repository for human body fluid proteome. . Body fluid proteome has been intensively studied as a primary source for disease biomarker discovery. Using advanced proteomics technologies, early research success has resulted in increasingly accumulated proteins detected in different body fluids, among which many are promising biomarkers. However, despite a handful of small-scale and specific data resources, current research is clearly lacking effort compiling published body fluid proteins into a centralized and sustainable repository that can provide users with systematic analytic tools. In this study, we developed a new database of human body fluid proteome (HBFP) that focuses on experimentally validated proteome in 17 types of human body fluids. The current database archives 11 827 unique proteins reported by 164 scientific publications, with a maximal false discovery rate of 0.01 on both the peptide and protein levels since 2001, and enables users to query, analyze and download protein entries with respect to each body fluid. Three unique features of this new system include the following: (i) the protein annotation page includes detailed abundance information based on relative qualitative measures of peptides reported in the original references, (ii) a new score is calculated on each reported protein to indicate the discovery confidence and (iii) HBFP catalogs 7354 proteins with at least two non-nested uniquely mapping peptides of nine amino acids according to the Human Proteome Project Data Interpretation Guidelines, while the remaining 4473 proteins have more than two unique peptides without given sequence information. As an important resource for human protein secretome, we anticipate that this new HBFP database can be a powerful tool that facilitates research in clinical proteomics and biomarker discovery. Database URL: https://bmbl.bmi.osumc.edu/HBFP/.",HBFP,0.931257725,human body fluid proteome,0.65359441,HBFP,0.931257725,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/1/2021 +33125055,http://globin.bx.psu.edu/hbvar,"Clinically relevant updates of the HbVar database of human hemoglobin variants and thalassemia mutations. HbVar (http://globin.bx.psu.edu/hbvar) is a widely-used locus-specific database (LSDB) launched 20 years ago by a multi-center academic effort to provide timely information on the numerous genomic variants leading to hemoglobin variants and all types of thalassemia and hemoglobinopathies. Here, we report several advances for the database. We made clinically relevant updates of HbVar, implemented as additional querying options in the HbVar query page, allowing the user to explore the clinical phenotype of compound heterozygous patients. We also made significant improvements to the HbVar front page, making comparative data querying, analysis and output more user-friendly. We continued to expand and enrich the regular data content, involving 1820 variants, 230 of which are new entries. We also increased the querying potential and expanded the usefulness of HbVar database in the clinical setting. These several additions, expansions and updates should improve the utility of HbVar both for the globin research community and in a clinical setting.",HbVar,0.997882962,NA,0,HbVar,0.997882962,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +23125365,http://hbvdb.ibcp.fr,"HBVdb: a knowledge database for Hepatitis B Virus. We have developed a specialized database, HBVdb (http://hbvdb.ibcp.fr), allowing the researchers to investigate the genetic variability of Hepatitis B Virus (HBV) and viral resistance to treatment. HBV is a major health problem worldwide with more than 350 million individuals being chronically infected. HBV is an enveloped DNA virus that replicates by reverse transcription of an RNA intermediate. HBV genome is optimized, being circular and encoding four overlapping reading frames. Indeed, each nucleotide of the genome takes part in the coding of at least one protein. However, HBV shows some genome variability leading to at least eight different genotypes and recombinant forms. The main drugs used to treat infected patients are nucleos(t)ides analogs (reverse transcriptase inhibitors). Unfortunately, HBV mutants resistant to these drugs may be selected and be responsible for treatment failure. HBVdb contains a collection of computer-annotated sequences based on manually annotated reference genomes. The database can be accessed through a web interface that allows static and dynamic queries and offers integrated generic sequence analysis tools and specialized analysis tools (e.g. annotation, genotyping, drug resistance profiling).",HBVdb,0.996498883,NA,0,HBVdb,0.996498883,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/3/2012 +30266410,http://lifeome.net/database/hccdb,"HCCDB: A Database of Hepatocellular Carcinoma Expression Atlas. Hepatocellular carcinoma (HCC) is highly heterogeneous in nature and has been one of the most common cancer types worldwide. To ensure repeatability of identified gene expression patterns and comprehensively annotate the transcriptomes of HCC, we carefully curated 15 public HCC expression datasets that cover around 4000 clinical samples and developed the database HCCDB to serve as a one-stop online resource for exploring HCC gene expression with user-friendly interfaces. The global differential gene expression landscape of HCC was established by analyzing the consistently differentially expressed genes across multiple datasets. Moreover, a 4D metric was proposed to fully characterize the expression pattern of each gene by integrating data from The Cancer Genome Atlas (TCGA) and Genotype-Tissue Expression (GTEx). To facilitate a comprehensive understanding of gene expression patterns in HCC, HCCDB also provides links to third-party databases on drug, proteomics, and literatures, and graphically displays the results from computational analyses, including differential expression analysis, tissue-specific and tumor-specific expression analysis, survival analysis, and co-expression analysis. HCCDB is freely accessible at http://lifeome.net/database/hccdb.",HCCDB,0.998168528,Carcinoma,0.526366234,HCCDB,0.998168528,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2018 +29088455,http://hcmdb.i-sanger.com/index,"HCMDB: the human cancer metastasis database. Metastasis is the main event leading to death in cancer patients. Over the past decade, high-throughput technologies have provided genome-wide view of transcriptomic changes associated with cancer metastases. Many microarray and RNA sequencing studies have addressed metastases-related expression patterns in various types of cancer, and the number of relevant works continues to increase rapidly. These works have characterized genes that orchestrate the metastatic phenotype of cancer cells. However, these expression data have been deposited in various repositories, and efficiently analyzing these data is still difficult because of the lack of an integrated data mining platform. To facilitate the in-depth analyses of transcriptome data on metastasis, it is quite important to make a comprehensive integration of these metastases-related expression data. Here, we presented a database, HCMDB (the human cancer metastasis database, http://hcmdb.i-sanger.com/index), which is freely accessible to the research community query cross-platform transcriptome data on metastases. HCMDB is developed and maintained as a useful resource for building the systems-biology understanding of metastasis.",HCMDB,0.996219456,human cancer metastasis database,0.887695861,HCMDB,0.996219456,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +28529078,http://bioinfo.au.tsinghua.edu.cn/member/xwwang/HCSGD,"HCSGD: An integrated database of human cellular senescence genes. Cellular senescence is an irreversible cell cycle arrest program in response to various exogenous and endogenous stimuli like telomere dysfunction and DNA damage. It has been widely accepted as an anti-tumor program and is also found closely related to embryo development, tissue repair, organismal aging and age-related degenerative diseases. In the past decades, numerous efforts have been made to uncover the gene regulatory mechanisms of cellular senescence. There is a strong demand to integrate these data from various resources into one open platform. To facilitate researchers on cellular senescence, we have developed Human Cellular Senescence Gene Database (HCSGD) by integrating multiple online published data sources into a comprehensive senescence gene annotation platform (http://bioinfo.au.tsinghua.edu.cn/member/xwwang/HCSGD). Potential Human Cellular Senescence Genes (HCSGS) were collected by combining information from published literatures, gene expression profiling data and Protein-Protein Interaction networks. Additionally, genes are annotated with gene ontology annotation and microRNA/drug/compound target information. HCSGD provides a valuable resource to visualize cellular senescence gene networks, browse annotated functional information, and retrieve senescence-associated genes with a user-friendly web interface.",HCSGD,0.997187793,Human Cellular Senescence Gene Database,0.987302474,HCSGD,0.997187793,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/29/2017 +27527702,http://www.hcvivdb.org,"HCVIVdb: The hepatitis-C IRES variation database. Background Sequence variability in the hepatitis C virus (HCV) genome has led to the development and classification of six genotypes and a number of subtypes. The HCV 5' untranslated region mainly comprises an internal ribosomal entry site (IRES) responsible for cap-independent synthesis of the viral polyprotein and is conserved among all HCV genotypes. Description Considering the possible high impact of variations in HCV IRES on viral protein production and thus virus replication, we decided to collect the available data on known nucleotide variants in the HCV IRES and their impact on IRES function in translation initiation. The HCV IRES variation database (HCVIVdb) is a collection of naturally occurring and engineered mutation entries for the HCV IRES. Each entry contains contextual information pertaining to the entry such as the HCV genotypic background and links to the original publication. Where available, quantitative data on the IRES efficiency in translation have been collated along with details on the reporter system used to generate the data. Data are displayed both in a tabular and graphical formats and allow direct comparison of results from different experiments. Together the data provide a central resource for researchers in the IRES and hepatitis C-oriented fields. Conclusion The collation of over 1900 mutations enables systematic analysis of the HCV IRES. The database is mainly dedicated to detailed comparative and functional analysis of all the HCV IRES domains, which can further lead to the development of site-specific drug designs and provide a guide for future experiments. HCVIVdb is available at http://www.hcvivdb.org .",HCVIVdb,0.992048419,HCV IRES variation database,0.664026876,HCVIVdb,0.992048419,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/15/2016 +21930248,"http://apps.sanbi.ac.za/hcvpro/, http://cbrc.kaust.edu.sa/hcvpro","HCVpro: hepatitis C virus protein interaction database. It is essential to catalog characterized hepatitis C virus (HCV) protein-protein interaction (PPI) data and the associated plethora of vital functional information to augment the search for therapies, vaccines and diagnostic biomarkers. In furtherance of these goals, we have developed the hepatitis C virus protein interaction database (HCVpro) by integrating manually verified hepatitis C virus-virus and virus-human protein interactions curated from literature and databases. HCVpro is a comprehensive and integrated HCV-specific knowledgebase housing consolidated information on PPIs, functional genomics and molecular data obtained from a variety of virus databases (VirHostNet, VirusMint, HCVdb and euHCVdb), and from BIND and other relevant biology repositories. HCVpro is further populated with information on hepatocellular carcinoma (HCC) related genes that are mapped onto their encoded cellular proteins. Incorporated proteins have been mapped onto Gene Ontologies, canonical pathways, Online Mendelian Inheritance in Man (OMIM) and extensively cross-referenced to other essential annotations. The database is enriched with exhaustive reviews on structure and functions of HCV proteins, current state of drug and vaccine development and links to recommended journal articles. Users can query the database using specific protein identifiers (IDs), chromosomal locations of a gene, interaction detection methods, indexed PubMed sources as well as HCVpro, BIND and VirusMint IDs. The use of HCVpro is free and the resource can be accessed via http://apps.sanbi.ac.za/hcvpro/ or http://cbrc.kaust.edu.sa/hcvpro/.",HCVpro,0.997242883,hepatitis C virus protein interaction database,0.859147181,HCVpro,0.997242883,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/9/2011 +23369322,http://www.megabionet.org/HDAM,"HDAM: a resource of human disease associated mutations from next generation sequencing studies. Background Next generation sequencing (NGS) technologies have greatly facilitated the rapid and economical detection of pathogenic mutations in human disorders. However, mutation descriptions are hard to be compared and integrated due to various reference sequences and annotation tools adopted in different articles as well as the nomenclature of diseases/traits. Description The Human Disease Associated Mutation (HDAM) database is dedicated to collect, standardize and re-annotate mutations for human diseases discovered by NGS studies. In the current release, HDAM contains 1,114 mutations, located in 669 genes and associated with 125 human diseases through literature mining. All mutation records have uniform and unequivocal descriptions of sequence changes according to the Human Genome Sequence Variation Society (HGVS) nomenclature recommendations. Each entry displays comprehensive information, including mutation location in genome (hg18/hg19), gene functional annotation, protein domain annotation, susceptible diseases, the first literature report of the mutation and etc. Moreover, new mutation-disease relationships predicted by Bayesian network are also presented under each mutation. Conclusion HDAM contains hundreds rigorously curated human mutations from NGS studies and was created to provide a comprehensive view of these mutations that confer susceptibility to the common disorders. HDAM can be freely accessed at http://www.megabionet.org/HDAM.",HDAM,0.990325689,Human Disease Associated Mutation,0.949979091,HDAM,0.990325689,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/23/2013 +33439542,http://humandiseasegenes.info,"Human disease genes website series: An international, open and dynamic library for up-to-date clinical information. Since the introduction of next-generation sequencing, an increasing number of disorders have been discovered to have genetic etiology. To address diverse clinical questions and coordinate research activities that arise with the identification of these rare disorders, we developed the Human Disease Genes website series (HDG website series): an international digital library that records detailed information on the clinical phenotype of novel genetic variants in the human genome (https://humandiseasegenes.info/). Each gene website is moderated by a dedicated team of clinicians and researchers, focused on specific genes, and provides up-to-date-including unpublished-clinical information. The HDG website series is expanding rapidly with 424 genes currently adopted by 325 moderators from across the globe. On average, a gene website has detailed phenotypic information of 14.4 patients. There are multiple examples of added value, one being the ARID1B gene website, which was recently utilized in research to collect clinical information of 81 new patients. Additionally, several gene websites have more data available than currently published in the literature. In conclusion, the HDG website series provides an easily accessible, open and up-to-date clinical data resource for patients with pathogenic variants of individual genes. This is a valuable resource not only for clinicians dealing with rare genetic disorders such as developmental delay and autism, but other professionals working in diagnostics and basic research. Since the HDG website series is a dynamic platform, its data also include the phenotype of yet unpublished patients curated by professionals providing higher quality clinical detail to improve management of these rare disorders.",HDG,0.937619388,Human Disease Genes website series,0.917995095,HDG,0.937619388,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/13/2021 +26631432,http://humandiseaseinsight.com,"Human Disease Insight: An integrated knowledge-based platform for disease-gene-drug information. The scope of the Human Disease Insight (HDI) database is not limited to researchers or physicians as it also provides basic information to non-professionals and creates disease awareness, thereby reducing the chances of patient suffering due to ignorance. HDI is a knowledge-based resource providing information on human diseases to both scientists and the general public. Here, our mission is to provide a comprehensive human disease database containing most of the available useful information, with extensive cross-referencing. HDI is a knowledge management system that acts as a central hub to access information about human diseases and associated drugs and genes. In addition, HDI contains well-classified bioinformatics tools with helpful descriptions. These integrated bioinformatics tools enable researchers to annotate disease-specific genes and perform protein analysis, search for biomarkers and identify potential vaccine candidates. Eventually, these tools will facilitate the analysis of disease-associated data. The HDI provides two types of search capabilities and includes provisions for downloading, uploading and searching disease/gene/drug-related information. The logistical design of the HDI allows for regular updating. The database is designed to work best with Mozilla Firefox and Google Chrome and is freely accessible at http://humandiseaseinsight.com.",HDI,0.979960263,Human Disease Insight,0.578439275,HDI,0.979960263,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2015 +30053237,http://hdncrna.cardiacdev.com,"HDncRNA: a comprehensive database of non-coding RNAs associated with heart diseases. Heart diseases (HDs) represent a common group of diseases that involve the heart, a number of which are characterized by high morbidity and lethality. Recently, increasing evidence demonstrates diverse non-coding RNAs (ncRNAs) play critical roles in HDs. However, currently there lacks a systematic investigation of the association between HDs and ncRNAs. Here, we developed a Heart Disease-related Non-coding RNAs Database (HDncRNA), to curate the HDs-ncRNA associations from 3 different sources including 1904 published articles, 3 existing databases [the Human microRNA Disease Database (HMDD), miR2disease and lncRNAdisease] and 5 RNA-seq datasets. The HDs-ncRNA associations with experimental validations curated from these articles, HMDD, miR2disease and part of data from lncRNAdisease were 'direct evidence'. Relationships got from high-through data in lncRNAdisease and annotated differential expressed lncRNAs from RNA-seq data were defined as 'high-throughput associations'. Novel lncRNAs identified from RNA-seq data in HDs had least credibility and were defined as 'predicted associations'. Currently, the database contains 2304 HDs-ncRNA associations for 133 HDs in 6 species including human, mouse, rat, pig, calf and dog. The database also has the following features: (i) A user-friendly web interface for browsing and searching the data; (ii) a visualization tool to plot miRNA and lncRNA locations in the human and mouse genomes; (iii) information about neighboring genes of lncRNAs and (iv) links to some mainstream databases including miRbase, Ensemble and Fantom Cat for the annotated lncRNAs and miRNAs. In summary, HDncRNA provides an excellent platform for exploring HDs related ncRNAs.Database URL: http://hdncrna.cardiacdev.com.",HDncRNA,0.996445835,Heart Disease-related Non-coding RNAs Database,0.990663501,HDncRNA,0.996445835,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +28701700,http://hdnetdb.sysbiolab.eu,"HDNetDB: A Molecular Interaction Database for Network-Oriented Investigations into Huntington's Disease. Huntington's disease (HD) is a progressive and fatal neurodegenerative disorder caused by an expanded CAG repeat in the huntingtin gene. Although HD is monogenic, its molecular manifestation appears highly complex and involves multiple cellular processes. The recent application of high throughput platforms such as microarrays and mass-spectrometry has indicated multiple pathogenic routes. The massive data generated by these techniques together with the complexity of the pathogenesis, however, pose considerable challenges to researchers. Network-based methods can provide valuable tools to consolidate newly generated data with existing knowledge, and to decipher the interwoven molecular mechanisms underlying HD. To facilitate research on HD in a network-oriented manner, we have developed HDNetDB, a database that integrates molecular interactions with many HD-relevant datasets. It allows users to obtain, visualize and prioritize molecular interaction networks using HD-relevant gene expression, phenotypic and other types of data obtained from human samples or model organisms. We illustrated several HDNetDB functionalities through a case study and identified proteins that constitute potential cross-talk between HD and the unfolded protein response (UPR). HDNetDB is publicly accessible at http://hdnetdb.sysbiolab.eu .",HDNetDB,0.996480525,NA,0,HDNetDB,0.996480525,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/12/2017 +24985530,http://ohdsi.org,"Bridging islands of information to establish an integrated knowledge base of drugs and health outcomes of interest. The entire drug safety enterprise has a need to search, retrieve, evaluate, and synthesize scientific evidence more efficiently. This discovery and synthesis process would be greatly accelerated through access to a common framework that brings all relevant information sources together within a standardized structure. This presents an opportunity to establish an open-source community effort to develop a global knowledge base, one that brings together and standardizes all available information for all drugs and all health outcomes of interest (HOIs) from all electronic sources pertinent to drug safety. To make this vision a reality, we have established a workgroup within the Observational Health Data Sciences and Informatics (OHDSI, http://ohdsi.org) collaborative. The workgroup's mission is to develop an open-source standardized knowledge base for the effects of medical products and an efficient procedure for maintaining and expanding it. The knowledge base will make it simpler for practitioners to access, retrieve, and synthesize evidence so that they can reach a rigorous and accurate assessment of causal relationships between a given drug and HOI. Development of the knowledge base will proceed with the measureable goal of supporting an efficient and thorough evidence-based assessment of the effects of 1,000 active ingredients across 100 HOIs. This non-trivial task will result in a high-quality and generally applicable drug safety knowledge base. It will also yield a reference standard of drug-HOI pairs that will enable more advanced methodological research that empirically evaluates the performance of drug safety analysis methods.",HDSI,0.787382841,Observational Health Data Sciences and,0.729369034,HDSI,0.787382841,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,8/1/2014 +32422927,http://hdvdb.bio.wzw.tum.de,"HDVdb: A Comprehensive Hepatitis D Virus Database. . Hepatitis D virus (HDV) causes the most severe form of viral hepatitis, which may rapidly progress to liver cirrhosis and hepatocellular carcinoma (HCC). It has been estimated that 15-20 million people worldwide are suffering from the chronic HDV infection. Currently, no effective therapies are available to treat acute or chronic HDV infection. The remarkable sequence variability of the HDV genome, particularly within the hypervariable region has resulted in the provisional classification of eight major genotypes and various subtypes. We have developed a specialized database, HDVdb (http://hdvdb.bio.wzw.tum.de/), which contains a collection of partial and complete HDV genomic sequences obtained from the GenBank and from our own patient cohort. HDVdb enables the researchers to investigate the genetic variability of all available HDV sequences, correlation of genotypes to epidemiology and pathogenesis. Additionally, it will contribute in understanding the drug resistant mutations and develop effective vaccines against HDV infection. The database can be accessed through a web interface that allows for static and dynamic queries and offers integrated generic and specialized sequence analysis tools, such as annotation, genotyping, primer prediction, and phylogenetic analyses.",HDVdb,0.997277975,NA,0,HDVdb,0.997277975,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/14/2020 +23681723,http://peptaibiotics-database.boku.ac.at,"The comprehensive peptaibiotics database. Peptaibiotics are nonribosomally biosynthesized peptides, which - according to definition - contain the marker amino acid α-aminoisobutyric acid (Aib) and possess antibiotic properties. Being known since 1958, a constantly increasing number of peptaibiotics have been described and investigated with a particular emphasis on hypocrealean fungi. Starting from the existing online 'Peptaibol Database', first published in 1997, an exhaustive literature survey of all known peptaibiotics was carried out and resulted in a list of 1043 peptaibiotics. The gathered information was compiled and used to create the new 'The Comprehensive Peptaibiotics Database', which is presented here. The database was devised as a software tool based on Microsoft (MS) Access. It is freely available from the internet at http://peptaibiotics-database.boku.ac.at and can easily be installed and operated on any computer offering a Windows XP/7 environment. It provides useful information on characteristic properties of the peptaibiotics included such as peptide category, group name of the microheterogeneous mixture to which the peptide belongs, amino acid sequence, sequence length, producing fungus, peptide subfamily, molecular formula, and monoisotopic mass. All these characteristics can be used and combined for automated search within the database, which makes The Comprehensive Peptaibiotics Database a versatile tool for the retrieval of valuable information about peptaibiotics. Sequence data have been considered as to December 14, 2012.",he,0.515990496,NA,0,he,0.515990496,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,5/1/2013 +34910407,http://hersenonderzoek.nl,"[The Swiss Brain Health Registry : a national infrastructure for Alzheimer's research]. The Memory Centres of several Swiss hospitals have set up a national online registry for Alzheimer's research, called www.BHR-suisse.org. This type of registry already exists in the United States (www.brainhealthregistry.org/) and the Netherlands (https://hersenonderzoek.nl/). It contributes, as do these initiating sites, to the creation of a global database of research partnersb who wish to contribute by participating in studies on neurodegenerative diseases and more particularly on Alzheimer's disease. By registering, they provide a certain amount of information and become potential research partners. Researchers can then select a panel of volunteers according to the selection and exclusion criteria of their studies, contact them and include them in their studies.",NA,0,he Swiss Brain Health Registry,0.814125395,he Swiss Brain Health Registry,0.814125395,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,12/1/2021 +33382884,http://www.heartbioportal.com,"HeartBioPortal2.0: new developments and updates for genetic ancestry and cardiometabolic quantitative traits in diverse human populations. . Cardiovascular disease (CVD) is the leading cause of death worldwide for all genders and across most racial and ethnic groups. However, different races and ethnicities exhibit different rates of CVD and its related cardiorenal and metabolic comorbidities, suggesting differences in genetic predisposition and risk of onset, as well as socioeconomic and lifestyle factors (diet, exercise, etc.) that act upon an individual's unique underlying genetic background. Here, we present HeartBioPortal2.0, a major update to HeartBioPortal, the world's largest CVD genetics data precision medicine platform for harmonized CVD-relevant genetic variants, which now enables search and analysis of human genetic information related to heart disease across ethnically diverse populations and cardiovascular/renal/metabolic quantitative traits pertinent to CVD pathophysiology. HeartBioPortal2.0 is structured as a cloud-based computing platform and knowledge portal that consolidates a multitude of CVD-relevant genomic data modalities into a single powerful query and browsing interface between data and user via a user-friendly web application publicly available to the scientific research community. Since its initial release, HeartBioPortal2.0 has added new cardiovascular/renal/metabolic disease-relevant gene expression data as well as genetic association data from numerous large-scale genome-wide association study consortiums such as CARDIoGRAMplusC4D, TOPMed, FinnGen, AFGen, MESA, MEGASTROKE, UK Biobank, CHARGE, Biobank Japan and MyCode, among other studies. In addition, HeartBioPortal2.0 now includes support for quantitative traits and ethnically diverse populations, allowing users to investigate the shared genetic architecture of any gene or its variants across the continuous cardiometabolic spectrum from health (e.g. blood pressure traits) to disease (e.g. hypertension), facilitating the understanding of CVD trait genetics that inform health-to-disease transitions and endophenotypes. Custom visualizations in the new and improved user interface, including performance enhancements and new security features such as user authentication, collectively re-imagine HeartBioPortal's user experience and provide a data commons that co-locates data, storage and computing infrastructure in the context of studying the genetic basis behind the leading cause of global mortality. Database URL: https://www.heartbioportal.com/.",HeartBioPortal,0.983459532,NA,0,HeartBioPortal,0.983459532,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +29077884,http://zdzlab.einstein.yu.edu/1/hedd.php,"HEDD: Human Enhancer Disease Database. Enhancers, as specialized genomic cis-regulatory elements, activate transcription of their target genes and play an important role in pathogenesis of many human complex diseases. Despite recent systematic identification of them in the human genome, currently there is an urgent need for comprehensive annotation databases of human enhancers with a focus on their disease connections. In response, we built the Human Enhancer Disease Database (HEDD) to facilitate studies of enhancers and their potential roles in human complex diseases. HEDD currently provides comprehensive genomic information for ∼2.8 million human enhancers identified by ENCODE, FANTOM5 and RoadMap with disease association scores based on enhancer-gene and gene-disease connections. It also provides Web-based analytical tools to visualize enhancer networks and score enhancers given a set of selected genes in a specific gene network. HEDD is freely accessible at http://zdzlab.einstein.yu.edu/1/hedd.php.",HEDD,0.997758329,Human Enhancer Disease Database,0.981465423,HEDD,0.997758329,1,NA,28025347,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2018 +28025347,http://hedds.org,"HEDD: the human epigenetic drug database. . Epigenetic drugs are chemical compounds that target disordered post-translational modification of histone proteins and DNA through enzymes, and the recognition of these changes by adaptor proteins. Epigenetic drug-related experimental data such as gene expression probed by high-throughput sequencing, co-crystal structure probed by X-RAY diffraction and binding constants probed by bio-assay have become widely available. The mining and integration of multiple kinds of data can be beneficial to drug discovery and drug repurposing. HEMD and other epigenetic databases store comprehensively epigenetic data where users can acquire segmental information of epigenetic drugs. However, some data types such as high-throughput datasets are not provide by these databases and they do not support flexible queries for epigenetic drug-related experimental data. Therefore, in reference to HEMD and other epigenetic databases, we developed a relatively comprehensive database for human epigenetic drugs. The human epigenetic drug database (HEDD) focuses on the storage and integration of epigenetic drug datasets obtained from laboratory experiments and manually curated information. The latest release of HEDD incorporates five kinds of datasets: (i) drug, (ii) target, (iii) disease, (vi) high-throughput and (v) complex. In order to facilitate data extraction, flexible search options were built in HEDD, which allowed an unlimited condition query for specific kinds of datasets using drug names, diseases and experiment types.Database URL: http://hedds.org/.",HEDD,0.985895932,human epigenetic drug database,0.860206032,HEDD,0.985895932,1,NA,29077884,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,12/26/2016 +25030426,http://helicobacter.um.edu.my,"HelicoBase: a Helicobacter genomic resource and analysis platform. Background Helicobacter is a genus of Gram-negative bacteria, possessing a characteristic helical shape that has been associated with a wide spectrum of human diseases. Although much research has been done on Helicobacter and many genomes have been sequenced, currently there is no specialized Helicobacter genomic resource and analysis platform to facilitate analysis of these genomes. With the increasing number of Helicobacter genomes being sequenced, comparative genomic analysis on members of this species will provide further insights on their taxonomy, phylogeny, pathogenicity and other information that may contribute to better management of diseases caused by Helicobacter pathogens. Description To facilitate the ongoing research on Helicobacter, a specialized central repository and analysis platform for the Helicobacter research community is needed to host the fast-growing amount of genomic data and facilitate the analysis of these data, particularly comparative analysis. Here we present HelicoBase, a user-friendly Helicobacter resource platform with diverse functionality for the analysis of Helicobacter genomic data for the Helicobacter research communities. HelicoBase hosts a total of 13 species and 166 genome sequences of Helicobacter spp. Genome annotations such as gene/protein sequences, protein function and sub-cellular localisation are also included. Our web implementation supports diverse query types and seamless searching of annotations using an AJAX-based real-time searching system. JBrowse is also incorporated to allow rapid and seamless browsing of Helicobacter genomes and annotations. Advanced bioinformatics analysis tools consisting of standard BLAST for similarity search, VFDB BLAST for sequence similarity search against the Virulence Factor Database (VFDB), Pairwise Genome Comparison (PGC) tool for comparative genomic analysis, and a newly designed Pathogenomics Profiling Tool (PathoProT) for comparative pathogenomic analysis are also included to facilitate the analysis of Helicobacter genomic data. Conclusions HelicoBase offers access to a range of genomic resources as well as tools for the analysis of Helicobacter genome data. HelicoBase can be accessed at http://helicobacter.um.edu.my.",HelicoBase,0.996223032,NA,0,HelicoBase,0.996223032,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/16/2014 +21760913,http://www.nematode.net/helmcop.html,"HelmCoP: an online resource for helminth functional genomics and drug and vaccine targets prioritization. A vast majority of the burden from neglected tropical diseases result from helminth infections (nematodes and platyhelminthes). Parasitic helminthes infect over 2 billion, exerting a high collective burden that rivals high-mortality conditions such as AIDS or malaria, and cause devastation to crops and livestock. The challenges to improve control of parasitic helminth infections are multi-fold and no single category of approaches will meet them all. New information such as helminth genomics, functional genomics and proteomics coupled with innovative bioinformatic approaches provide fundamental molecular information about these parasites, accelerating both basic research as well as development of effective diagnostics, vaccines and new drugs. To facilitate such studies we have developed an online resource, HelmCoP (Helminth Control and Prevention), built by integrating functional, structural and comparative genomic data from plant, animal and human helminthes, to enable researchers to develop strategies for drug, vaccine and pesticide prioritization, while also providing a useful comparative genomics platform. HelmCoP encompasses genomic data from several hosts, including model organisms, along with a comprehensive suite of structural and functional annotations, to assist in comparative analyses and to study host-parasite interactions. The HelmCoP interface, with a sophisticated query engine as a backbone, allows users to search for multi-factorial combinations of properties and serves readily accessible information that will assist in the identification of various genes of interest. HelmCoP is publicly available at: http://www.nematode.net/helmcop.html.",HelmCoP,0.995496631,Helminth Control,0.556904441,HelmCoP,0.995496631,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/8/2011 +23281827,http://estexplorer.biolinfo.org/hsd,"Helminth secretome database (HSD): a collection of helminth excretory/secretory proteins predicted from expressed sequence tags (ESTs). Background Helminths are important socio-economic organisms, responsible for causing major parasitic infections in humans, other animals and plants. These infections impose a significant public health and economic burden globally. Exceptionally, some helminth organisms like Caenorhabditis elegans are free-living in nature and serve as model organisms for studying parasitic infections. Excretory/secretory proteins play an important role in parasitic helminth infections which make these proteins attractive targets for therapeutic use. In the case of helminths, large volume of expressed sequence tags (ESTs) has been generated to understand parasitism at molecular level and for predicting excretory/secretory proteins for developing novel strategies to tackle parasitic infections. However, mostly predicted ES proteins are not available for further analysis and there is no repository available for such predicted ES proteins. Furthermore, predictions have, in the main, focussed on classical secretory pathways while it is well established that helminth parasites also utilise non-classical secretory pathways. Results We developed a free Helminth Secretome Database (HSD), which serves as a repository for ES proteins predicted using classical and non-classical secretory pathways, from EST data for 78 helminth species (64 nematodes, 7 trematodes and 7 cestodes) ranging from parasitic to free-living organisms. Approximately 0.9 million ESTs compiled from the largest EST database, dbEST were cleaned, assembled and analysed by different computational tools in our bioinformatics pipeline and predicted ES proteins were submitted to HSD. Conclusion We report the large-scale prediction and analysis of classically and non-classically secreted ES proteins from diverse helminth organisms. All the Unigenes (contigs and singletons) and excretory/secretory protein datasets generated from this analysis are freely available. A BLAST server is available at http://estexplorer.biolinfo.org/hsd, for checking the sequence similarity of new protein sequences against predicted helminth ES proteins.",HSD,0.909700066,Helminth Secretome Database,0.980656524,Helminth Secretome Database,0.980656524,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/13/2012 +23143109,http://servers.binf.ku.dk/hemaexplorer,"HemaExplorer: a database of mRNA expression profiles in normal and malignant haematopoiesis. The HemaExplorer (http://servers.binf.ku.dk/hemaexplorer) is a curated database of processed mRNA Gene expression profiles (GEPs) that provides an easy display of gene expression in haematopoietic cells. HemaExplorer contains GEPs derived from mouse/human haematopoietic stem and progenitor cells as well as from more differentiated cell types. Moreover, data from distinct subtypes of human acute myeloid leukemia is included in the database allowing researchers to directly compare gene expression of leukemic cells with those of their closest normal counterpart. Normalization and batch correction lead to full integrity of the data in the database. The HemaExplorer has comprehensive visualization interface that can make it useful as a daily tool for biologists and cancer researchers to assess the expression patterns of genes encountered in research or literature. HemaExplorer is relevant for all research within the fields of leukemia, immunology, cell differentiation and the biology of the haematopoietic system.",HemaExplorer,0.986351252,NA,0,HemaExplorer,0.986351252,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/9/2012 +22761927,http://mdl.shsmu.edu.cn/HEMD,"HEMD: an integrated tool of human epigenetic enzymes and chemical modulators for therapeutics. Background Epigenetic mechanisms mainly include DNA methylation, post-translational modifications of histones, chromatin remodeling and non-coding RNAs. All of these processes are mediated and controlled by enzymes. Abnormalities of the enzymes are involved in a variety of complex human diseases. Recently, potent natural or synthetic chemicals are utilized to establish the quantitative contributions of epigenetic regulation through the enzymes and provide novel insight for developing new therapeutics. However, the development of more specific and effective epigenetic therapeutics requires a more complete understanding of the chemical epigenomic landscape. Description Here, we present a human epigenetic enzyme and modulator database (HEMD), the database which provides a central resource for the display, search, and analysis of the structure, function, and related annotation for human epigenetic enzymes and chemical modulators focused on epigenetic therapeutics. Currently, HEMD contains 269 epigenetic enzymes and 4377 modulators in three categories (activators, inhibitors, and regulators). Enzymes are annotated with detailed description of epigenetic mechanisms, catalytic processes, and related diseases, and chemical modulators with binding sites, pharmacological effect, and therapeutic uses. Integrating the information of epigenetic enzymes in HEMD should allow for the prediction of conserved features for proteins and could potentially classify them as ideal targets for experimental validation. In addition, modulators curated in HEMD can be used to investigate potent epigenetic targets for the query compound and also help chemists to implement structural modifications for the design of novel epigenetic drugs. Conclusions HEMD could be a platform and a starting point for biologists and medicinal chemists for furthering research on epigenetic therapeutics. HEMD is freely available at http://mdl.shsmu.edu.cn/HEMD/.",HEMD,0.996305764,human epigenetic enzyme and modulator database,0.944263808,HEMD,0.996305764,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/25/2012 +28708269,http://www.researchdsf.unict.it/hemeoxdb,"Heme Oxygenase Database (HemeOxDB) and QSAR Analysis of Isoform 1 Inhibitors. Due to increasing interest in the field of heme oxygenases (HOs), we built a ligand database called HemeOxDB that includes the entire set of known HO-1 and HO-2 inhibitors, resulting in more than 400 compounds. The HemeOxDB is available online at http://www.researchdsf.unict.it/hemeoxdb/, and having a robust search engine allows end users to build complex queries, sort tabulated results, and generate color-coded two- and three-dimensional graphs. This database will grow to be a tool for the design of potent and selective HO-1 or HO-2 inhibitors. We were also interested in virtually searching for alternative inhibitors, and, for the first time in the field of HOs, a quantitative structure-activity relationship (QSAR) model was built using half-maximal inhibitory concentration (IC50 ) values of the whole set of known HO-1 inhibitors, taken from the HemeOxDB and employing the Monte Carlo technique. The statistical quality suggested that the model is robust and possesses desirable predictive potential. The screening of US Food and Drug Administration (FDA)-approved drugs, external to our dataset, suggested new predicted inhibitors, opening the way for replacing imidazole groups. The HemeOxDB and the QSAR model reported herein may help in prospectively identifying or repurposing new drugs with optimal structural attributes for HO enzyme inhibition.",HemeOxDB,0.994890809,Heme Oxygenase Database,0.953735838,HemeOxDB,0.994890809,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/9/2017 +24174543,http://crdd.osdd.net/raghava/hemolytik,"Hemolytik: a database of experimentally determined hemolytic and non-hemolytic peptides. Hemolytik (http://crdd.osdd.net/raghava/hemolytik/) is a manually curated database of experimentally determined hemolytic and non-hemolytic peptides. Data were compiled from a large number of published research articles and various databases like Antimicrobial Peptide Database, Collection of Anti-microbial Peptides, Dragon Antimicrobial Peptide Database and Swiss-Prot. The current release of Hemolytik database contains ∼3000 entries that include ∼2000 unique peptides whose hemolytic activities were evaluated on erythrocytes isolated from as many as 17 different sources. Each entry in Hemolytik provides comprehensive information about a peptide, like its name, sequence, origin, reported function, property such as chirality, types (linear and cyclic), end modifications as well as details pertaining to its hemolytic activity. In addition, tertiary structure of each peptide has been predicted, and secondary structure states have been assigned. To facilitate the scientific community, a user-friendly interface has been developed with various tools for data searching and analysis. We hope, Hemolytik will be useful for researchers working in the field of designing therapeutic peptides.",Hemolytik,0.997379065,NA,0,Hemolytik,0.997379065,1,NA,26953092,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/29/2013 +26953092,http://crdd.osdd.net/raghava/hemopi,"A Web Server and Mobile App for Computing Hemolytic Potency of Peptides. Numerous therapeutic peptides do not enter the clinical trials just because of their high hemolytic activity. Recently, we developed a database, Hemolytik, for maintaining experimentally validated hemolytic and non-hemolytic peptides. The present study describes a web server and mobile app developed for predicting, and screening of peptides having hemolytic potency. Firstly, we generated a dataset HemoPI-1 that contains 552 hemolytic peptides extracted from Hemolytik database and 552 random non-hemolytic peptides (from Swiss-Prot). The sequence analysis of these peptides revealed that certain residues (e.g., L, K, F, W) and motifs (e.g., ""FKK"", ""LKL"", ""KKLL"", ""KWK"", ""VLK"", ""CYCR"", ""CRR"", ""RFC"", ""RRR"", ""LKKL"") are more abundant in hemolytic peptides. Therefore, we developed models for discriminating hemolytic and non-hemolytic peptides using various machine learning techniques and achieved more than 95% accuracy. We also developed models for discriminating peptides having high and low hemolytic potential on different datasets called HemoPI-2 and HemoPI-3. In order to serve the scientific community, we developed a web server, mobile app and JAVA-based standalone software (http://crdd.osdd.net/raghava/hemopi/).",Hemolytik,0.971642554,NA,0,Hemolytik,0.971642554,1,NA,24174543,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,3/8/2016 +23913812,"http://hadb.org.uk/, http://www.kcl.ac.uk/ip/petergreen/haemBdatabase.html","The Canadian ""National Program for hemophilia mutation testing"" database: a ten-year review. A reference genotyping laboratory was established in 2000 at Queen's University, Kingston, to provide genetic testing for Hemophilia A (HA) and B (HB) and create a Canadian mutation database. Canadian hemophilia treatment centers and genetics clinics provided DNA and clinical information from November 2000 to March 2011. The factor VIII (F8) gene was analyzed in 1,177 patients (47% of HA population) and 787 female family members and the factor IX (F9) gene in 267 patients (47% of HB population) and 123 female family members, using Southern Blot, PCR, conformation sensitive gel electrophoresis, and/or direct sequencing. The mutation detection rates for HA and HB were 91% and 94%, respectively. 380 different F8 mutations were identified: inversions of intron 22 and intron 1, 229 missense, 45 nonsense, eight deletions, 70 frameshifts, 25 splice site, and one compound mutation with a splice site and intron 1 inversion. Of these mutations, 228 were novel to the Hemophilia A Database (HADB, http://hadb.org.uk/). A total 125 different F9 mutations were identified: 80 missense, 12 frameshift, 12 splice site, nine nonsense and seven promoter mutations, three large deletions, and two compound mutations with both missense and nonsense changes. Of these mutations, 36 were novel to the International Haemophilia B Mutation database (http://www.kcl.ac.uk/ip/petergreen/haemBdatabase.html). The Canadian F8 and F9 mutation database reflects the allelic heterogeneity of HA and HB, and is similar to previously described populations. This report represents the largest and longest duration experience of a national hemophilia genotyping program documented, to date.",NA,0,Hemophilia A Database,0.782407534,Hemophilia A Database,0.782407534,1,NA,NA,low_prob_best_name,do not remove,NA,NA,TRUE POS: two resources; name and URL of first will be correct; second is lost,NA,NA,9/9/2013 +23280990,http://www.cdc.gov/hemophiliamutations,"The CDC Hemophilia A Mutation Project (CHAMP) mutation list: a new online resource. Genotyping efforts in hemophilia A (HA) populations in many countries have identified large numbers of unique mutations in the Factor VIII gene (F8). To assist HA researchers conducting genotyping analyses, we have developed a listing of F8 mutations including those listed in existing locus-specific databases as well as those identified in patient populations and reported in the literature. Each mutation was reviewed and uniquely identified using Human Genome Variation Society (HGVS) nomenclature standards for coding DNA and predicted protein changes as well as traditional nomenclature based on the mature, processed protein. Listings also include the associated hemophilia severity classified by International Society of Thrombosis and Haemostasis (ISTH) criteria, associations of the mutations with inhibitors, and reference information. The mutation list currently contains 2,537 unique mutations known to cause HA. HA severity caused by the mutation is available for 2,022 mutations (80%) and information on inhibitors is available for 1,816 mutations (72%). The CDC Hemophilia A Mutation Project (CHAMP) Mutation List is available at http://www.cdc.gov/hemophiliamutations for download and search and will be updated quarterly based on periodic literature reviews and submitted reports.",HAMP,0.670017004,Hemophilia A Mutation Project,0.841739901,Hemophilia A Mutation Project,0.841739901,1,24498619,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,12/26/2012 +22064861,http://hfv.lanl.gov,"The LANL hemorrhagic fever virus database, a new platform for analyzing biothreat viruses. Hemorrhagic fever viruses (HFVs) are a diverse set of over 80 viral species, found in 10 different genera comprising five different families: arena-, bunya-, flavi-, filo- and togaviridae. All these viruses are highly variable and evolve rapidly, making them elusive targets for the immune system and for vaccine and drug design. About 55,000 HFV sequences exist in the public domain today. A central website that provides annotated sequences and analysis tools will be helpful to HFV researchers worldwide. The HFV sequence database collects and stores sequence data and provides a user-friendly search interface and a large number of sequence analysis tools, following the model of the highly regarded and widely used Los Alamos HIV database [Kuiken, C., B. Korber, and R.W. Shafer, HIV sequence databases. AIDS Rev, 2003. 5: p. 52-61]. The database uses an algorithm that aligns each sequence to a species-wide reference sequence. The NCBI RefSeq database [Sayers et al. (2011) Database resources of the National Center for Biotechnology Information. Nucleic Acids Res., 39, D38-D51.] is used for this; if a reference sequence is not available, a Blast search finds the best candidate. Using this method, sequences in each genus can be retrieved pre-aligned. The HFV website can be accessed via http://hfv.lanl.gov.",HFV,0.767522514,hemorrhagic fever virus database,0.78576076,hemorrhagic fever virus database,0.78576076,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/7/2011 +32179762,http://www.hepamine.de,"Hepamine - A Liver Disease Microarray Database, Visualization Platform and Data-Mining Resource. Numerous gene expression profiling data on liver diseases were generated and stored in public databases. Only few were used for additional analyses by the hepatology research community. This may mostly be due to limited bioinformatics knowledge of most biomedical research personnel. In order to support an easy translation of bioinformatics data into translational hepatology research, we created Hepamine, a liver disease gene expression, visualization platform and data-mining resource. Microarray data were obtained from the NCBI GEO database. Pre-analysis of expression data was performed using R statistical software and the limma microarray analysis package from the Bioconductor repository. We generated Hepamine, a web-based repository of pre-analyzed microarray data for various liver diseases. At its initial release Hepamine contains 13 gene expression datasets, 20 microarray experiments and approximately 400 000 gene expression measurements. A self-explanatory website offers open and easy access to gene expression profiles. Results are furthermore visualized in simple three-color tables indicating differential expression. All data were linked to common functional and genetic databases particularly through the DAVID bioinformatics suite. Hepamine provides comprehensive data and easy access to hepatologic gene expression data even without in depth bioinformatics or microarray profiling experience. http://www.hepamine.de.",Hepamine,0.994047046,NA,0,Hepamine,0.994047046,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/16/2020 +27976751,http://www.expmedndm.ox.ac.uk/hepitopes,"Hepitopes: A live interactive database of HLA class I epitopes in hepatitis B virus. Increased clinical and scientific scrutiny is being applied to hepatitis B virus (HBV), with focus on the development of new therapeutic approaches, ultimately aiming for cure. Defining the optimum natural CD8+ T cell immune responses that arise in HBV, mediated by HLA class I epitope presentation, may help to inform novel immunotherapeutic strategies. Therefore, we have set out to develop a comprehensive database of these epitopes in HBV, coined 'Hepitopes'. This undertaking has its foundations in a systematic literature review to identify the sites and sequences of all published class I epitopes in HBV. We also collected information regarding the methods used to define each epitope, and any reported associations between an immune response to this epitope and disease outcome. The results of this search have been collated into a new open-access interactive database that is available at http://www.expmedndm.ox.ac.uk/hepitopes. Over time, we will continue to refine and update this resource, as well as inviting contributions from others in the field to support its development. This unique new database is an important foundation for ongoing investigations into the nature and impact of the CD8+ T cell response to HBV.",Hepitopes,0.971835365,NA,0,Hepitopes,0.971835365,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/15/2016 +33119754,http://hanlab.uth.edu/HeRA,"HeRA: an atlas of enhancer RNAs across human tissues. Enhancer RNA (eRNA) is a type of long non-coding RNA transcribed from DNA enhancer regions. Despite critical roles of eRNA in gene regulation, the expression landscape of eRNAs in normal human tissue remains unexplored. Using numerous samples from the Genotype-Tissue Expression project, we characterized 45 411 detectable eRNAs and identified tens of thousands of associations between eRNAs and traits, including gender, race, and age. We constructed a co-expression network to identify millions of putative eRNA regulators and target genes across different tissues. We further constructed a user-friendly data portal, Human enhancer RNA Atlas (HeRA, https://hanlab.uth.edu/HeRA/). In HeRA, users can search, browse, and download the eRNA expression profile, trait-related eRNAs, and eRNA co-expression network by searching the eRNA ID, gene symbol, and genomic region in one or multiple tissues. HeRA is the first data portal to characterize eRNAs from 9577 samples across 54 human tissues and facilitates functional and mechanistic investigations of eRNAs.",HeRA,0.993696928,Human enhancer RNA Atlas,0.875278735,HeRA,0.993696928,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +33264402,http://herb.ac.cn,"HERB: a high-throughput experiment- and reference-guided database of traditional Chinese medicine. Pharmacotranscriptomics has become a powerful approach for evaluating the therapeutic efficacy of drugs and discovering new drug targets. Recently, studies of traditional Chinese medicine (TCM) have increasingly turned to high-throughput transcriptomic screens for molecular effects of herbs/ingredients. And numerous studies have examined gene targets for herbs/ingredients, and link herbs/ingredients to various modern diseases. However, there is currently no systematic database organizing these data for TCM. Therefore, we built HERB, a high-throughput experiment- and reference-guided database of TCM, with its Chinese name as BenCaoZuJian. We re-analyzed 6164 gene expression profiles from 1037 high-throughput experiments evaluating TCM herbs/ingredients, and generated connections between TCM herbs/ingredients and 2837 modern drugs by mapping the comprehensive pharmacotranscriptomics dataset in HERB to CMap, the largest such dataset for modern drugs. Moreover, we manually curated 1241 gene targets and 494 modern diseases for 473 herbs/ingredients from 1966 references published recently, and cross-referenced this novel information to databases containing such data for drugs. Together with database mining and statistical inference, we linked 12 933 targets and 28 212 diseases to 7263 herbs and 49 258 ingredients and provided six pairwise relationships among them in HERB. In summary, HERB will intensively support the modernization of TCM and guide rational modern drug discovery efforts. And it is accessible through http://herb.ac.cn/.",HERB,0.972203016,NA,0,HERB,0.972203016,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +24670875,http://crdd.osdd.net/raghava/herceptinr,"Herceptin resistance database for understanding mechanism of resistance in breast cancer patients. Monoclonal antibody Trastuzumab/Herceptin is considered as frontline therapy for Her2-positive breast cancer patients. However, it is not effective against several patients due to acquired or de novo resistance. In last one decade, several assays have been performed to understand the mechanism of Herceptin resistance with/without supplementary drugs. This manuscript describes a database HerceptinR, developed for understanding the mechanism of resistance at genetic level. HerceptinR maintains information about 2500 assays performed against various breast cancer cell lines (BCCs), for improving sensitivity of Herceptin with or without supplementary drugs. In order to understand Herceptin resistance at genetic level, we integrated genomic data of BCCs that include expression, mutations and copy number variations in different cell lines. HerceptinR will play a vital role in i) designing biomarkers to identify patients eligible for Herceptin treatment and ii) identification of appropriate supplementary drug for a particular patient. HerceptinR is available at http://crdd.osdd.net/raghava/herceptinr/.",HerceptinR,0.99444294,Herceptin,0.511912823,HerceptinR,0.99444294,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/27/2014 +28549078,http://bidd2.nus.edu.sg/herod/index.php,"HEROD: a human ethnic and regional specific omics database. Motivation Genetic and gene expression variations within and between populations and across geographical regions have substantial effects on the biological phenotypes, diseases, and therapeutic response. The development of precision medicines can be facilitated by the OMICS studies of the patients of specific ethnicity and geographic region. However, there is an inadequate facility for broadly and conveniently accessing the ethnic and regional specific OMICS data. Results Here, we introduced a new free database, HEROD, a human ethnic and regional specific OMICS database. Its first version contains the gene expression data of 53 070 patients of 169 diseases in seven ethnic populations from 193 cities/regions in 49 nations curated from the Gene Expression Omnibus (GEO), the ArrayExpress Archive of Functional Genomics Data (ArrayExpress), the Cancer Genome Atlas (TCGA) and the International Cancer Genome Consortium (ICGC). Geographic region information of curated patients was mainly manually extracted from referenced publications of each original study. These data can be accessed and downloaded via keyword search, World map search, and menu-bar search of disease name, the international classification of disease code, geographical region, location of sample collection, ethnic population, gender, age, sample source organ, patient type (patient or healthy), sample type (disease or normal tissue) and assay type on the web interface. Availability and implementation The HEROD database is freely accessible at http://bidd2.nus.edu.sg/herod/index.php. The database and web interface are implemented in MySQL, PHP and HTML with all major browsers supported. Contact phacyz@nus.edu.sg.",HEROD,0.983515739,NA,0,HEROD,0.983515739,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2017 +24857969,http://www.jcbi.ru/lp_baze,"HeteroGenome: database of genome periodicity. . We present the first release of the HeteroGenome database collecting latent periodicity regions in genomes. Tandem repeats and highly divergent tandem repeats along with the regions of a new type of periodicity, known as profile periodicity, have been collected for the genomes of Saccharomyces cerevisiae, Arabidopsis thaliana, Caenorhabditis elegans and Drosophila melanogaster. We obtained data with the aid of a spectral-statistical approach to search for reliable latent periodicity regions (with periods up to 2000 bp) in DNA sequences. The original two-level mode of data presentation (a broad view of the region of latent periodicity and a second level indicating conservative fragments of its structure) was further developed to enable us to obtain the estimate, without redundancy, that latent periodicity regions make up ~10% of the analyzed genomes. Analysis of the quantitative and qualitative content of located periodicity regions on all chromosomes of the analyzed organisms revealed dominant characteristic types of periodicity in the genomes. The pattern of density distribution of latent periodicity regions on chromosome unambiguously characterizes each chromosome in genome. Database URL: http://www.jcbi.ru/lp_baze/",HeteroGenome,0.967456996,NA,0,HeteroGenome,0.967456996,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/24/2014 +30196115,http://qianlab.genetics.ac.cn/HeteroMeth,"HeteroMeth: A Database of Cell-to-cell Heterogeneity in DNA Methylation. DNA methylation is an important epigenetic mark that plays a vital role in gene expression and cell differentiation. The average DNA methylation level among a group of cells has been extensively documented. However, the cell-to-cell heterogeneity in DNA methylation, which reflects the differentiation of epigenetic status among cells, remains less investigated. Here we established a gold standard of the cell-to-cell heterogeneity in DNA methylation based on single-cell bisulfite sequencing (BS-seq) data. With that, we optimized a computational pipeline for estimating the heterogeneity in DNA methylation from bulk BS-seq data. We further built HeteroMeth, a database for searching, browsing, visualizing, and downloading the data for heterogeneity in DNA methylation for a total of 141 samples in humans, mice, Arabidopsis, and rice. Three genes are used as examples to illustrate the power of HeteroMeth in the identification of unique features in DNA methylation. The optimization of the computational strategy and the construction of the database in this study complement the recent experimental attempts on single-cell DNA methylomes and will facilitate the understanding of epigenetic mechanisms underlying cell differentiation and embryonic development. HeteroMeth is publicly available at http://qianlab.genetics.ac.cn/HeteroMeth.",HeteroMeth,0.987609446,NA,0,HeteroMeth,0.987609446,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2018 +22874333,http://hetop.eu,"Teaching medicine with a terminology/ontology portal. Unlabelled The Health Terminology/Ontology Portal (HeTOP) was developed to provide easy access to health terminologies and ontologie. The repository is not only dedicated to professionals but is also a valuable teaching tool. Currently, it provides access to thirty two health terminologies and ontologies available mainly in French or in English, but also in German, Italian, Chinese, etc. HeTOP can be used by both humans and computers via Web services. To integrate new resources into HeTOP, three steps are necessary: (1) designing a meta-model into which each terminology (or ontology) can be integrated, (2) developing a process to include terminologies into HeTOP, (3) building and integrating existing and new inter & intra-terminology semantic harmonization into HeTOP. Currently, 600 unique machines use the MeSH version of HeTOP every day and restricted terminologies/ontologies are used for teaching purposes in several medical schools in France. The multilingual version of HeTOP is available (URL: http://hetop.eu/) and provides free access to ICD10 and FMA in ten languages. Conclusion HeTOP is a rich tool, useful for a wide range of applications and users, especially in education and resource indexing but also in information retrieval or performing audits in terminology management.",HeTOP,0.993553936,The Health Terminology/Ontology Portal,0.898625046,HeTOP,0.993553936,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2012 +23118488,http://hexevent.mmg.uci.edu,"HEXEvent: a database of Human EXon splicing Events. HEXEvent (http://hexevent.mmg.uci.edu) is a new database that permits the user to compile genome-wide exon data sets of human internal exons showing selected splicing events. User queries can be customized based on the type and the frequency of alternative splicing events. For each splicing version of an exon, an ESTs count is given, specifying the frequency of the event. A user-specific definition of constitutive exons can be entered to designate an exon exclusion level still acceptable for an exon to be considered as constitutive. Similarly, the user has the option to define a maximum inclusion level for an exon to be called an alternatively spliced exon. Unlike other existing splicing databases, HEXEvent permits the user to easily extract alternative splicing information for individual, multiple or genome-wide human internal exons. Importantly, the generated data sets are downloadable for further analysis.",HEXEvent,0.995822012,NA,0,HEXEvent,0.995822012,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2012 +34164644,http://sysbio.org.cn/HFBD,"HFBD: a biomarker knowledge database for heart failure heterogeneity and personalized applications. . Heart failure (HF) is a cardiovascular disease with a high incidence around the world. Accumulating studies have focused on the identification of biomarkers for HF precision medicine. To understand the HF heterogeneity and provide biomarker information for the personalized diagnosis and treatment of HF, a knowledge database collecting the distributed and multiple-level biomarker information is necessary. In this study, the HF biomarker knowledge database (HFBD) was established by manually collecting the data and knowledge from literature in PubMed. HFBD contains 2618 records and 868 HF biomarkers (731 single and 137 combined) extracted from 1237 original articles. The biomarkers were classified into proteins, RNAs, DNAs, and the others at molecular, image, cellular and physiological levels. The biomarkers were annotated with biological, clinical and article information as well as the experimental methods used for the biomarker discovery. With its user-friendly interface, this knowledge database provides a unique resource for the systematic understanding of HF heterogeneity and personalized diagnosis and treatment of HF in the era of precision medicine. The platform is openly available at http://sysbio.org.cn/HFBD/.",HFBD,0.994234284,HF biomarker knowledge database,0.968227565,HFBD,0.994234284,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/23/2021 +34791105,http://heartfailure.medical-bigdata.com,"HFIP: an integrated multi-omics data and knowledge platform for the precision medicine of heart failure. . As the terminal clinical phenotype of almost all types of cardiovascular diseases, heart failure (HF) is a complex and heterogeneous syndrome leading to considerable morbidity and mortality. Existing HF-related omics studies mainly focus on case/control comparisons, small cohorts of special subtypes, etc., and a large amount of multi-omics data and knowledge have been generated. However, it is difficult for researchers to obtain biological and clinical insights from these scattered data and knowledge. In this paper, we built the Heart Failure Integrated Platform (HFIP) for data exploration, fusion analysis and visualization by collecting and curating existing multi-omics data and knowledge from various public sources and also provided an auto-updating mechanism for future integration. The developed HFIP contained 253 datasets (7842 samples), multiple analysis flow, and 14 independent tools. In addition, based on the integration of existing databases and literature, a knowledge base for HF was constructed with a scoring system for evaluating the relationship between molecular signals and HF. The knowledge base includes 1956 genes and annotation information. The literature mining module was developed to assist the researcher to overview the hotspots and contexts in basic and clinical research. HFIP can be used as a data-driven and knowledge-guided platform for the basic and clinical research of HF. Database URL: http://heartfailure.medical-bigdata.com.",HFIP,0.989193519,Heart Failure Integrated Platform,0.772608399,HFIP,0.989193519,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2021 +30032758,http://www.fecalmetabolome.ca,"A review on human fecal metabolomics: Methods, applications and the human fecal metabolome database. Metabolomic analysis of human biospecimens had progressed quickly over the past decade. Technological and methodological advances have led to the comprehensive characterization of human serum, urine, cerebrospinal fluid and saliva metabolomes, and the creation of freely available metabolome reference databases. Unfortunately, the characterization of the human fecal metabolome still lags behind these other metabolomes in terms of the availability of standardized methods and freely available resources. The purpose of this review is to bring the knowledge of the human fecal metabolome, and the methods to characterize it, to the same level as most other human biofluid metabolomes. More specifically, this review is intended to critically assess the field of fecal metabolomics and to provide a comprehensive review of the current state of knowledge with regard to the protocols, technologies and remaining challenges in fecal metabolite analysis. In addition to providing an overview of fecal metabolomics and some consensus recommendations, we also present the human fecal metabolome database (HFMDB - http://www.fecalmetabolome.ca), a freely available, manually curated resource that currently contains over 6000 identified human fecal metabolites. Each entry in the HFMDB includes extensive chemical information, metabolite descriptions and reference data in the same format as the Human Metabolome Database (HMDB).",HFMDB,0.992333651,human fecal metabolome database,0.964461486,HFMDB,0.992333651,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/12/2018 +23430897,"http://hgddatabase.cvtisr.sk/, http://www.alkaptonuria.cib.csic.es","Identification of 11 Novel Homogentisate 1,2 Dioxygenase Variants in Alkaptonuria Patients and Establishment of a Novel LOVD-Based HGD Mutation Database. Enzymatic loss in alkaptonuria (AKU), an autosomal recessive disorder, is caused by mutations in the homogentisate 1,2 dioxygenase (HGD) gene, which decrease or completely inactivate the function of the HGD protein to metabolize homogentisic acid (HGA). AKU shows a very low prevalence (1:100,000-250,000) in most ethnic groups, but there are countries with much higher incidence, such as Slovakia and the Dominican Republic. In this work, we report 11 novel HGD mutations identified during analysis of 36 AKU patients and 41 family members from 27 families originating from 9 different countries, mainly from Slovakia and France. In Slovak patients, we identified two additional mutations, thus a total number of HGD mutations identified in this small country is 12. In order to record AKU-causing mutations and variants of the HGD gene, we have created a HGD mutation database that is open for future submissions and is available online ( http://hgddatabase.cvtisr.sk/ ). It is founded on the Leiden Open (source) Variation Database (LOVD) system and includes data from the original AKU database ( http://www.alkaptonuria.cib.csic.es ) and also all so far reported variants and AKU patients. Where available, HGD-haplotypes associated with the mutations are also presented. Currently, this database contains 148 unique variants, of which 115 are reported pathogenic mutations. It provides a valuable tool for information exchange in AKU research and care fields and certainly presents a useful data source for genotype-phenotype correlations and also for future clinical trials.",HGD,0.54280597,NA,0,HGD,0.54280597,1,NA,"26578564.0, 29761469.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,10/20/2011 +26578564,http://HymenopteraGenome.org,"Hymenoptera Genome Database: integrating genome annotations in HymenopteraMine. We report an update of the Hymenoptera Genome Database (HGD) (http://HymenopteraGenome.org), a model organism database for insect species of the order Hymenoptera (ants, bees and wasps). HGD maintains genomic data for 9 bee species, 10 ant species and 1 wasp, including the versions of genome and annotation data sets published by the genome sequencing consortiums and those provided by NCBI. A new data-mining warehouse, HymenopteraMine, based on the InterMine data warehousing system, integrates the genome data with data from external sources and facilitates cross-species analyses based on orthology. New genome browsers and annotation tools based on JBrowse/WebApollo provide easy genome navigation, and viewing of high throughput sequence data sets and can be used for collaborative genome annotation. All of the genomes and annotation data sets are combined into a single BLAST server that allows users to select and combine sequence data sets to search.",HGD,0.997601748,Hymenoptera Genome Database,0.993258144,HGD,0.997601748,1,NA,"23430897.0, 29761469.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/17/2015 +29761469,http://hymenopteragenome.org,"Hymenoptera Genome Database: Using HymenopteraMine to Enhance Genomic Studies of Hymenopteran Insects. The Hymenoptera Genome Database (HGD; http://hymenopteragenome.org ) is a genome informatics resource for insects of the order Hymenoptera, which includes bees, ants and wasps. HGD provides genome browsers with manual annotation tools (JBrowse/Apollo), BLAST, bulk data download, and a data mining warehouse (HymenopteraMine). This chapter focuses on the use of HymenopteraMine to create annotation data sets that can be exported for use in downstream analyses. HymenopteraMine leverages the InterMine platform to combine genome assemblies and official gene sets with data from OrthoDB, RefSeq, FlyBase, Gene Ontology, UniProt, InterPro, KEGG, Reactome, dbSNP, PubMed, and BioGrid, as well as precomputed gene expression information based on publicly available RNAseq. Built-in template queries provide starting points for data exploration, while the QueryBuilder tool supports construction of complex custom queries. The List Analysis and Genomic Regions search tools execute queries based on uploaded lists of identifiers and genome coordinates, respectively. HymenopteraMine facilitates cross-species data mining based on orthology and supports meta-analyses by tracking identifiers across gene sets and genome assemblies.",HGD,0.993132909,Hymenoptera Genome Database,0.990225715,HGD,0.993132909,1,NA,"23430897.0, 26578564.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2018 +33417691,http://hgfdb.ynau.edu.cn,"HGFDB: a collective database of helmeted guinea fowl genomics. . As a vigorous and hardy and an almost disease-free game bird, the domestic helmeted guinea fowl (Numida meleagris, hereafter HGF) has attracted considerable attention in a large number of genetic study projects. However, none of the current/recent avian databases are related to this agriculturally and commercially important poultry species. To address this data gap, we developed Helmeted Guinea Fowl Database (HGFDB), which manages and shares HGF genomic and genetic data. By processing the data of genome assembly, sequencing reads and genetic variations, we organized them into eight modules, which correspond to 'Home', 'Genome', 'Re-sequence', 'Gene', 'Variation', 'Download', 'Tools' and 'Help', HGFDB provides the most comprehensive view of the HGF genome to date and will be relevant for future studies on HGF structural and functional genomics and genetic improvement. Database URL: http://hgfdb.ynau.edu.cn/.",HGFDB,0.996328712,Helmeted Guinea Fowl Database,0.954183638,HGFDB,0.996328712,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +"22948725, 24077912, 28349240",http://www.hgmd.org,"The Human Gene Mutation Database (HGMD) and its exploitation in the fields of personalized genomics and molecular evolution. The Human Gene Mutation Database (HGMD) constitutes a comprehensive core collection of data on germ-line mutations in nuclear genes underlying or associated with human inherited disease (http://www.hgmd.org). Data cataloged include single-base-pair substitutions in coding, regulatory, and splicing-relevant regions, micro-deletions and micro-insertions, indels, and triplet repeat expansions, as well as gross gene deletions, insertions, duplications, and complex rearrangements. Each mutation is entered into HGMD only once, in order to avoid confusion between recurrent and identical-by-descent lesions. By March 2012, the database contained in excess of 123,600 different lesions (HGMD Professional release 2012.1) detected in 4,514 different nuclear genes, with new entries currently accumulating at a rate in excess of 10,000 per annum. ∼6,000 of these entries constitute disease-associated and functional polymorphisms. HGMD also includes cDNA reference sequences for more than 98% of the listed genes.",HGMD,0.993344128,Human Gene Mutation Database,0.954743373,HGMD,0.993344128,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/27/2017 +"23245209, 30304474",http://www.genenames.org,"Gene family matters: expanding the HGNC resource. The HUGO Gene Nomenclature Committee (HGNC) assigns approved gene symbols to human loci. There are currently over 33,000 approved gene symbols, the majority of which represent protein-coding genes, but we also name other locus types such as non-coding RNAs, pseudogenes and phenotypic loci. Where relevant, the HGNC organise these genes into gene families and groups. The HGNC website http://www.genenames.org/ is an online repository of HGNC-approved gene nomenclature and associated resources for human genes, and includes links to genomic, proteomic and phenotypic information. In addition to this, we also have dedicated gene family web pages and are currently expanding and generating more of these pages using data curated by the HGNC and from information derived from external resources that focus on particular gene families. Here, we review our current online resources with a particular focus on our gene family data, using it to highlight our new Gene Symbol Report and gene family data downloads.",HGNC,0.797803879,NA,0,HGNC,0.797803879,2,"27799471.0, 23161694.0, 25361968.0",27799471,low_prob_best_name,do not remove,conflicting record(s) to be removed,conflicting record(s) to be removed,NA,NA,NA,1/1/2019 +27799471,"http://www.genenames.org, http://vertebrate.genenames.org","Genenames.org: the HGNC and VGNC resources in 2017. The HUGO Gene Nomenclature Committee (HGNC) based at the European Bioinformatics Institute (EMBL-EBI) assigns unique symbols and names to human genes. Currently the HGNC database contains almost 40 000 approved gene symbols, over 19 000 of which represent protein-coding genes. In addition to naming genomic loci we manually curate genes into family sets based on shared characteristics such as homology, function or phenotype. We have recently updated our gene family resources and introduced new improved visualizations which can be seen alongside our gene symbol reports on our primary website http://www.genenames.org In 2016 we expanded our remit and formed the Vertebrate Gene Nomenclature Committee (VGNC) which is responsible for assigning names to vertebrate species lacking a dedicated nomenclature group. Using the chimpanzee genome as a pilot project we have approved symbols and names for over 14 500 protein-coding genes in chimpanzee, and have developed a new website http://vertebrate.genenames.org to distribute these data. Here, we review our online data and resources, focusing particularly on the improvements and new developments made during the last two years.",HGNC,0.600545228,NA,0,HGNC,0.600545228,1,"23245209.0, 30304474.0, 23161694.0, 25361968.0","23245209.0, 30304474.0",low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,10/30/2016 +22140100,http://www.HGPD.jp,"HGPD: Human Gene and Protein Database, 2012 update. The Human Gene and Protein Database (HGPD; http://www.HGPD.jp/) is a unique database that stores information on a set of human Gateway entry clones in addition to protein expression and protein synthesis data. The HGPD was launched in November 2008, and 33,275 human Gateway entry clones have been constructed from the open reading frames (ORFs) of full-length cDNA, thus representing the largest collection in the world. Recently, research objectives have focused on the development of new medicines and the establishment of novel diagnostic methods and medical treatments. And, studies using proteins and protein information, which are closely related to gene function, have been undertaken. For this update, we constructed an additional 9974 human Gateway entry clones, giving a total of 43,249. This set of human Gateway entry clones was named the Human Proteome Expression Resource, known as the 'HuPEX'. In addition, we also classified the clones into 10 groups according to protein function. Moreover, in vivo cellular localization data of proteins for 32,651 human Gateway entry clones were included for retrieval from the HGPD. In 'Information Overview', which presents the search results, the ORF region of each cDNA is now displayed allowing the Gateway entry clones to be searched more easily.",HGPD,0.996275783,Human Gene and Protein Database,0.967167735,HGPD,0.996275783,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/2/2011 +23717556,http://www.bioapp.org/hgpgd,"HGPGD: the human gene population genetic difference database. Demographic events such as migration, and evolutionary events like mutation and recombination, have contributed to the genetic variations that are found in the human genome. During the evolution and differentiation of human populations, different functional genes and pathways (a group of genes that act together to perform specific biological tasks) would have displayed different degrees of genetic diversity or evolutionary conservatism. To query the genetic differences of functional genes or pathways in populations, we have developed the human gene population genetic difference (HGPGD) database. Currently, 11 common population genetic features, 18,158 single human genes, 220 KEGG (Kyoto Encyclopedia of Genes and Genomes) human pathways and 4,639 Gene Ontology (GO) categories (3,269 in biological process; 862 in molecular function; and 508 in cellular component) are available in the HGPGD database. The 11 population genetic features are related mainly to three aspects: allele frequency, linkage disequilibrium pattern, and transferability of tagSNPs. By entering a list of Gene IDs, KEGG pathway IDs or GO category IDs and selecting a population genetic feature, users can search the genetic differences between pairwise HapMap populations. We hope that, when the researchers carry out gene-based, KEGG pathway-based or GO category-based research, they can take full account of the genetic differences between populations. The HGPGD database (V1.0) is available at http://www.bioapp.org/hgpgd.",HGPGD,0.977447295,human gene population genetic difference database,0.925682147,HGPGD,0.977447295,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/22/2013 +26578597,http://hgtree.snu.ac.kr,"HGTree: database of horizontally transferred genes determined by tree reconciliation. The HGTree database provides putative genome-wide horizontal gene transfer (HGT) information for 2472 completely sequenced prokaryotic genomes. This task is accomplished by reconstructing approximate maximum likelihood phylogenetic trees for each orthologous gene and corresponding 16S rRNA reference species sets and then reconciling the two trees under parsimony framework. The tree reconciliation method is generally considered to be a reliable way to detect HGT events but its practical use has remained limited because the method is computationally intensive and conceptually challenging. In this regard, HGTree (http://hgtree.snu.ac.kr) represents a useful addition to the biological community and enables quick and easy retrieval of information for HGT-acquired genes to better understand microbial taxonomy and evolution. The database is freely available and can be easily scaled and updated to keep pace with the rapid rise in genomic information.",HGTree,0.998615086,NA,0,HGTree,0.998615086,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +25502817,http://genome.igib.res.in/hgvtb/index.html,"HGV&TB: a comprehensive online resource on human genes and genetic variants associated with tuberculosis. Tuberculosis (TB) is an infectious disease caused by fastidious pathogen Mycobacterium tuberculosis. TB has emerged as one of the major causes of mortality in the developing world. Role of host genetic factors that modulate disease susceptibility have not been studied widely. Recent studies have reported few genetic loci that provide impetus to this area of research. The availability of tools has enabled genome-wide scans for disease susceptibility loci associated with infectious diseases. Till now, information on human genetic variations and their associated genes that modulate TB susceptibility have not been systematically compiled. In this work, we have created a resource: HGV&TB, which hosts genetic variations reported to be associated with TB susceptibility in humans. It currently houses information on 307 variations in 98 genes. In total, 101 of these variations are exonic, whereas 78 fall in intronic regions. We also analysed the pathogenicity of the genetic variations, their phenotypic consequences and ethnic origin. Using various computational analyses, 30 variations of the 101 exonic variations were predicted to be pathogenic. The resource is freely available at http://genome.igib.res.in/hgvtb/index.html. Using integrative analysis, we have shown that the disease associated variants are selectively enriched in the immune signalling pathways which are crucial in the pathophysiology of TB. Database URL: http://genome.igib.res.in/hgvtb/index.html",HGV&TB,0.993074507,NA,0,HGV&TB,0.993074507,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/13/2014 +28083826,http://medisp.bme.teiath.gr/hicl,"Development of a Reference Image Collection Library for Histopathology Image Processing, Analysis and Decision Support Systems Research. Histopathology image processing, analysis and computer-aided diagnosis have been shown as effective assisting tools towards reliable and intra-/inter-observer invariant decisions in traditional pathology. Especially for cancer patients, decisions need to be as accurate as possible in order to increase the probability of optimal treatment planning. In this study, we propose a new image collection library (HICL-Histology Image Collection Library) comprising 3831 histological images of three different diseases, for fostering research in histopathology image processing, analysis and computer-aided diagnosis. Raw data comprised 93, 116 and 55 cases of brain, breast and laryngeal cancer respectively collected from the archives of the University Hospital of Patras, Greece. The 3831 images were generated from the most representative regions of the pathology, specified by an experienced histopathologist. The HICL Image Collection is free for access under an academic license at http://medisp.bme.teiath.gr/hicl/ . Potential exploitations of the proposed library may span over a board spectrum, such as in image processing to improve visualization, in segmentation for nuclei detection, in decision support systems for second opinion consultations, in statistical analysis for investigation of potential correlations between clinical annotations and imaging findings and, generally, in fostering research on histopathology image processing and analysis. To the best of our knowledge, the HICL constitutes the first attempt towards creation of a reference image collection library in the field of traditional histopathology, publicly and freely available to the scientific community.",HICL,0.993846953,NA,0,HICL,0.993846953,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2017 +27742821,http://hieranoiDB.sbc.su.se,"HieranoiDB: a database of orthologs inferred by Hieranoid. HieranoiDB (http://hieranoiDB.sbc.su.se) is a freely available on-line database for hierarchical groups of orthologs inferred by the Hieranoid algorithm. It infers orthologs at each node in a species guide tree with the InParanoid algorithm as it progresses from the leaves to the root. Here we present a database HieranoiDB with a web interface that makes it easy to search and visualize the output of Hieranoid, and to download it in various formats. Searching can be performed using protein description, identifier or sequence. In this first version, orthologs are available for the 66 Quest for Orthologs reference proteomes. The ortholog trees are shown graphically and interactively with marked speciation and duplication nodes that show the inferred evolutionary scenario, and allow for correct extraction of predicted orthologs from the Hieranoid trees.",HieranoiDB,0.9942801,NA,0,HieranoiDB,0.9942801,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/13/2016 +25450223,http://bioserver1.physics.iisc.ernet.in/HIGDB,"Haemophilus influenzae Genome Database (HIGDB): a single point web resource for Haemophilus influenzae. Background Haemophilus influenzae (H. Influenzae) is the causative agent of pneumonia, bacteraemia and meningitis. The organism is responsible for large number of deaths in both developed and developing countries. Even-though the first bacterial genome to be sequenced was that of H. Influenzae, there is no exclusive database dedicated for H. Influenzae. This prompted us to develop the Haemophilus influenzae Genome Database (HIGDB). Methods All data of HIGDB are stored and managed in MySQL database. The HIGDB is hosted on Solaris server and developed using PERL modules. Ajax and JavaScript are used for the interface development. Results The HIGDB contains detailed information on 42,741 proteins, 18,077 genes including 10 whole genome sequences and also 284 three dimensional structures of proteins of H. influenzae. In addition, the database provides ""Motif search"" and ""GBrowse"". The HIGDB is freely accessible through the URL: http://bioserver1.physics.iisc.ernet.in/HIGDB/. Discussion The HIGDB will be a single point access for bacteriological, clinical, genomic and proteomic information of H. influenzae. The database can also be used to identify DNA motifs within H. influenzae genomes and to compare gene or protein sequences of a particular strain with other strains of H. influenzae.",HIGDB,0.955801189,Haemophilus influenzae Genome Database,0.94999705,HIGDB,0.955801189,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/14/2014 +22846459,http://hint.yulab.org,"HINT: High-quality protein interactomes and their applications in understanding human disease. Background A global map of protein-protein interactions in cellular systems provides key insights into the workings of an organism. A repository of well-validated high-quality protein-protein interactions can be used in both large- and small-scale studies to generate and validate a wide range of functional hypotheses. Results We develop HINT (http://hint.yulab.org) - a database of high-quality protein-protein interactomes for human, Saccharomyces cerevisiae, Schizosaccharomyces pombe, and Oryza sativa. These were collected from several databases and filtered both systematically and manually to remove low-quality/erroneous interactions. The resulting datasets are classified by type (binary physical interactions vs. co-complex associations) and data source (high-throughput systematic setups vs. literature-curated small-scale experiments). We find strong sociological sampling biases in literature-curated datasets of small-scale interactions. An interactome without such sampling biases was used to understand network properties of human disease-genes - hubs are unlikely to cause disease, but if they do, they usually cause multiple disorders. Conclusions HINT is of significant interest to researchers in all fields of biology as it addresses the ubiquitous need of having a repository of high-quality protein-protein interactions. These datasets can be utilized to generate specific hypotheses about specific proteins and/or pathways, as well as analyzing global properties of cellular networks. HINT will be regularly updated and all versions will be tracked.",HINT,0.995340347,NA,0,HINT,0.995340347,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/30/2012 +22123737,http://hiPathDB.kobic.re.kr,"hiPathDB: a human-integrated pathway database with facile visualization. One of the biggest challenges in the study of biological regulatory networks is the systematic organization and integration of complex interactions taking place within various biological pathways. Currently, the information of the biological pathways is dispersed in multiple databases in various formats. hiPathDB is an integrated pathway database that combines the curated human pathway data of NCI-Nature PID, Reactome, BioCarta and KEGG. In total, it includes 1661 pathways consisting of 8976 distinct physical entities. hiPathDB provides two different types of integration. The pathway-level integration, conceptually a simple collection of individual pathways, was achieved by devising an elaborate model that takes distinct features of four databases into account and subsequently reformatting all pathways in accordance with our model. The entity-level integration creates a single unified pathway that encompasses all pathways by merging common components. Even though the detailed molecular-level information such as complex formation or post-translational modifications tends to be lost, such integration makes it possible to investigate signaling network over the entire pathways and allows identification of pathway cross-talks. Another strong merit of hiPathDB is the built-in pathway visualization module that supports explorative studies of complex networks in an interactive fashion. The layout algorithm is optimized for virtually automatic visualization of the pathways. hiPathDB is available at http://hiPathDB.kobic.re.kr.",hiPathDB,0.997191489,NA,0,hiPathDB,0.997191489,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2011 +23958730,"http://www.nyu.edu/projects/arora/hippdb, http://code.google.com/p/helidb","HippDB: a database of readily targeted helical protein-protein interactions. Summary HippDB catalogs every protein-protein interaction whose structure is available in the Protein Data Bank and which exhibits one or more helices at the interface. The Web site accepts queries on variables such as helix length and sequence, and it provides computational alanine scanning and change in solvent-accessible surface area values for every interfacial residue. HippDB is intended to serve as a starting point for structure-based small molecule and peptidomimetic drug development. Availability and implementation HippDB is freely available on the web at http://www.nyu.edu/projects/arora/hippdb. The Web site is implemented in PHP, MySQL and Apache. Source code freely available for download at http://code.google.com/p/helidb, implemented in Perl and supported on Linux. Contact arora@nyu.edu.",HippDB,0.994108021,NA,0,HippDB,0.994108021,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/19/2013 +27113915,http://hipposeq.janelia.org,"Hipposeq: a comprehensive RNA-seq database of gene expression in hippocampal principal neurons. Clarifying gene expression in narrowly defined neuronal populations can provide insight into cellular identity, computation, and functionality. Here, we used next-generation RNA sequencing (RNA-seq) to produce a quantitative, whole genome characterization of gene expression for the major excitatory neuronal classes of the hippocampus; namely, granule cells and mossy cells of the dentate gyrus, and pyramidal cells of areas CA3, CA2, and CA1. Moreover, for the canonical cell classes of the trisynaptic loop, we profiled transcriptomes at both dorsal and ventral poles, producing a cell-class- and region-specific transcriptional description for these populations. This dataset clarifies the transcriptional properties and identities of lesser-known cell classes, and moreover reveals unexpected variation in the trisynaptic loop across the dorsal-ventral axis. We have created a public resource, Hipposeq (http://hipposeq.janelia.org), which provides analysis and visualization of these data and will act as a roadmap relating molecules to cells, circuits, and computation in the hippocampus.",Hipposeq,0.914798081,NA,0,Hipposeq,0.914798081,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/26/2016 +27733501,"http://www.hipsci.org/lines, http://www.hipsci.org/data/trackhubs","The human-induced pluripotent stem cell initiative-data resources for cellular genetics. The Human Induced Pluripotent Stem Cell Initiative (HipSci) isf establishing a large catalogue of human iPSC lines, arguably the most well characterized collection to date. The HipSci portal enables researchers to choose the right cell line for their experiment, and makes HipSci's rich catalogue of assay data easy to discover and reuse. Each cell line has genomic, transcriptomic, proteomic and cellular phenotyping data. Data are deposited in the appropriate EMBL-EBI archives, including the European Nucleotide Archive (ENA), European Genome-phenome Archive (EGA), ArrayExpress and PRoteomics IDEntifications (PRIDE) databases. The project will make 500 cell lines from healthy individuals, and from 150 patients with rare genetic diseases; these will be available through the European Collection of Authenticated Cell Cultures (ECACC). As of August 2016, 238 cell lines are available for purchase. Project data is presented through the HipSci data portal (http://www.hipsci.org/lines) and is downloadable from the associated FTP site (ftp://ftp.hipsci.ebi.ac.uk/vol1/ftp). The data portal presents a summary matrix of the HipSci cell lines, showing available data types. Each line has its own page containing descriptive metadata, quality information, and links to archived assay data. Analysis results are also available in a Track Hub, allowing visualization in the context of public genomic annotations (http://www.hipsci.org/data/trackhubs).",HipSci,0.994878662,Human Induced Pluripotent Stem Cell Initiative,0.975116302,HipSci,0.994878662,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/12/2016 +33677507,http://human.biomedtzc.cn,"HIR V2: a human interactome resource for the biological interpretation of differentially expressed genes via gene set linkage analysis. . To facilitate biomedical studies of disease mechanisms, a high-quality interactome that connects functionally related genes is needed to help investigators formulate pathway hypotheses and to interpret the biological logic of a phenotype at the biological process level. Interactions in the updated version of the human interactome resource (HIR V2) were inferred from 36 mathematical characterizations of six types of data that suggest functional associations between genes. This update of the HIR consists of 88 069 pairs of genes (23.2% functional interactions of HIR V2 are in common with the previous version of HIR), representing functional associations that are of strengths similar to those between well-studied protein interactions. Among these functional interactions, 57% may represent protein interactions, which are expected to cover 32% of the true human protein interactome. The gene set linkage analysis (GSLA) tool is developed based on the high-quality HIR V2 to identify the potential functional impacts of the observed transcriptomic changes, helping to elucidate their biological significance and complementing the currently widely used enrichment-based gene set interpretation tools. A case study shows that the annotations reported by the HIR V2/GSLA system are more comprehensive and concise compared to those obtained by the widely used gene set annotation tools such as PANTHER and DAVID. The HIR V2 and GSLA are available at http://human.biomedtzc.cn.",HIR,0.787795722,NA,0,HIR,0.787795722,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/1/2021 +31725860,http://biokb.ncpsb.org/HisgAtlas,"HisgAtlas 1.0: a human immunosuppression gene database. . Immunosuppression is body's state in which the activation or efficacy of immune system is weakened. It is associated with a wide spectrum of human diseases. In the last two decades, tremendous efforts have been made to elucidate the mechanism of hundreds of immunosuppression genes. Immunosuppression genes could be valuable drug targets or biomarkers for the immunotherapeutic treatment of different diseases. However, the information of all previously identified immunosuppression genes is dispersed in thousands of publications. Here, we provide the HisgAtlas database that collects 995 previously identified human immunosuppression genes using text mining and manual curation. We believe HisgAtlas will be a valuable resource to search human immunosuppression genes as well as to investigate their functions in further research. Database URL: http://biokb.ncpsb.org/HisgAtlas/.",HisgAtlas,0.997663379,NA,0,HisgAtlas,0.997663379,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +33984507,http://reprod.njmu.edu.cn/hisphossite,"HisPhosSite: A comprehensive database of histidine phosphorylated proteins and sites. Histidine phosphorylation is critically important in a variety of cellular processes including signal transduction, cell cycle, proliferation, differentiation, and apoptosis. It is estimated to account for 6% of all phosphorylated amino acids. However, due to the acid lability of the PN bond, the study of pHis lags far behind that of pSer, pThr, and pTyr. Recently, the development and use of pHis-specific antibodies and methodologies have led to a resurgence in the study of histidine phosphorylation. Although a considerable number of pHis proteins and sites have been discovered, most of them have not been manually curated and integrated to any databases. There is a lack of a data repository for pHis, and such work is expected to help further systemic studies of pHis. Thus, we present a comprehensive resource database of histidine phosphorylation (HisPhosSite) by curating experimentally validated pHis proteins and sites and compiling putative pHis sites with ortholog search. HisPhosSite contains 776 verified pHis sites and 2702 verified pHis proteins in 38 eukaryotic and prokaryotic species and 15,378 putative pHis sites and 10,816 putative pHis proteins in 1366 species. HisPhosSite provides rich annotations of pHis sites and proteins and multiple search engines (including motif search and BLAST search) for users to locate pHis sites of interest. HisPhosSite is available at http://reprod.njmu.edu.cn/hisphossite. SIGNIFICANCE: Histidine phosphorylation is involved in a variety of cellular processes as well as cancers, and it has been proved to be more common than previously thought. The HisPhosSite database was developed to collect pHis data from published literatures with experimental evidences. Unification of the identified pHis proteins and sites will give researchers an informative resource for histidine phosphorylation. HisPhosSite has a user-friendly interface with multiple search engines for users to locate pHis sites of interest. In addition, the database provides rich structural and functional annotations. HisPhosSite will help future studies and elucidation of the functions of histidine phosphorylation.",HisPhosSite,0.988221288,NA,0,HisPhosSite,0.988221288,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/10/2021 +22140112,"http://www.iiserpune.ac.in/Ã, http://www.actrec.gov.in/histome","HIstome--a relational knowledgebase of human histone proteins and histone modifying enzymes. Histones are abundant nuclear proteins that are essential for the packaging of eukaryotic DNA into chromosomes. Different histone variants, in combination with their modification 'code', control regulation of gene expression in diverse cellular processes. Several enzymes that catalyze the addition and removal of multiple histone modifications have been discovered in the past decade, enabling investigations of their role(s) in normal cellular processes and diverse pathological conditions. This sudden influx of data, however, has resulted in need of an updated knowledgebase that compiles, organizes and presents curated scientific information to the user in an easily accessible format. Here, we present HIstome, a browsable, manually curated, relational database that provides information about human histone proteins, their sites of modifications, variants and modifying enzymes. HIstome is a knowledgebase of 55 human histone proteins, 106 distinct sites of their post-translational modifications (PTMs) and 152 histone-modifying enzymes. Entries have been grouped into 5 types of histones, 8 types of post-translational modifications and 14 types of enzymes that catalyze addition and removal of these modifications. The resource will be useful for epigeneticists, pharmacologists and clinicians. HIstome: The Histone Infobase is available online at http://www.iiserpune.ac.in/∼coee/histome/ and http://www.actrec.gov.in/histome/.",HIstome,0.995750725,NA,0,HIstome,0.995750725,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/2/2011 +26212453,http://www.histoneantibodies.com,"An Interactive Database for the Assessment of Histone Antibody Specificity. Access to high-quality antibodies is a necessity for the study of histones and their posttranslational modifications (PTMs). Here we debut the Histone Antibody Specificity Database (http://www.histoneantibodies.com), an online and expanding resource cataloging the behavior of widely used, commercially available histone antibodies by peptide microarray. This interactive web portal provides a critical resource to the biological research community that routinely uses these antibodies as detection reagents for a wide range of applications.",NA,0,Histone Antibody Specificity Database,0.951532856,Histone Antibody Specificity Database,0.951532856,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/23/2015 +26989147,http://www.ncbi.nlm.nih.gov/projects/HistoneDB2.0,"HistoneDB 2.0: a histone database with variants--an integrated resource to explore histones and their variants. . Compaction of DNA into chromatin is a characteristic feature of eukaryotic organisms. The core (H2A, H2B, H3, H4) and linker (H1) histone proteins are responsible for this compaction through the formation of nucleosomes and higher order chromatin aggregates. Moreover, histones are intricately involved in chromatin functioning and provide a means for genome dynamic regulation through specific histone variants and histone post-translational modifications. 'HistoneDB 2.0--with variants' is a comprehensive database of histone protein sequences, classified by histone types and variants. All entries in the database are supplemented by rich sequence and structural annotations with many interactive tools to explore and compare sequences of different variants from various organisms. The core of the database is a manually curated set of histone sequences grouped into 30 different variant subsets with variant-specific annotations. The curated set is supplemented by an automatically extracted set of histone sequences from the non-redundant protein database using algorithms trained on the curated set. The interactive web site supports various searching strategies in both datasets: browsing of phylogenetic trees; on-demand generation of multiple sequence alignments with feature annotations; classification of histone-like sequences and browsing of the taxonomic diversity for every histone variant. HistoneDB 2.0 is a resource for the interactive comparative analysis of histone protein sequences and their implications for chromatin function. Database URL: http://www.ncbi.nlm.nih.gov/projects/HistoneDB2.0.",HistoneDB,0.99104923,NA,0,HistoneDB,0.99104923,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/17/2016 +26708988,http://hintdb.hgc.jp/htp,"HitPredict version 4: comprehensive reliability scoring of physical protein-protein interactions from more than 100 species. . HitPredict is a consolidated resource of experimentally identified, physical protein-protein interactions with confidence scores to indicate their reliability. The study of genes and their inter-relationships using methods such as network and pathway analysis requires high quality protein-protein interaction information. Extracting reliable interactions from most of the existing databases is challenging because they either contain only a subset of the available interactions, or a mixture of physical, genetic and predicted interactions. Automated integration of interactions is further complicated by varying levels of accuracy of database content and lack of adherence to standard formats. To address these issues, the latest version of HitPredict provides a manually curated dataset of 398 696 physical associations between 70 808 proteins from 105 species. Manual confirmation was used to resolve all issues encountered during data integration. For improved reliability assessment, this version combines a new score derived from the experimental information of the interactions with the original score based on the features of the interacting proteins. The combined interaction score performs better than either of the individual scores in HitPredict as well as the reliability score of another similar database. HitPredict provides a web interface to search proteins and visualize their interactions, and the data can be downloaded for offline analysis. Data usability has been enhanced by mapping protein identifiers across multiple reference databases. Thus, the latest version of HitPredict provides a significantly larger, more reliable and usable dataset of protein-protein interactions from several species for the study of gene groups. Database URL: http://hintdb.hgc.jp/htp.",HitPredict,0.99754715,NA,0,HitPredict,0.99754715,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/26/2015 +25378338,http://www.ncbi.nlm.nih.gov/genome/viruses/retroviruses/hiv-1/interactions,"HIV-1, human interaction database: current status and new features. The 'Human Immunodeficiency Virus Type 1 (HIV-1), Human Interaction Database', available through the National Library of Medicine at http://www.ncbi.nlm.nih.gov/genome/viruses/retroviruses/hiv-1/interactions, serves the scientific community exploring the discovery of novel HIV vaccine candidates and therapeutic targets. Each HIV-1 human protein interaction can be retrieved without restriction by web-based downloads and ftp protocols and includes: Reference Sequence (RefSeq) protein accession numbers, National Center for Biotechnology Information Gene identification numbers, brief descriptions of the interactions, searchable keywords for interactions and PubMed identification numbers (PMIDs) of journal articles describing the interactions. In addition to specific HIV-1 protein-human protein interactions, included are interaction effects upon HIV-1 replication resulting when individual human gene expression is blocked using siRNA. A total of 3142 human genes are described participating in 12,786 protein-protein interactions, along with 1316 replication interactions described for each of 1250 human genes identified using small interfering RNA (siRNA). Together the data identifies 4006 human genes involved in 14,102 interactions. With the inclusion of siRNA interactions we introduce a redesigned web interface to enhance viewing, filtering and downloading of the combined data set.",HIV-1),0.726562852,interaction database,0.548142433,HIV-1),0.726562852,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/6/2014 +28358052,http://hivlatency.erc.monash.edu,"HIVed, a knowledgebase for differentially expressed human genes and proteins during HIV infection, replication and latency. Measuring the altered gene expression level and identifying differentially expressed genes/proteins during HIV infection, replication and latency is fundamental for broadening our understanding of the mechanisms of HIV infection and T-cell dysfunction. Such studies are crucial for developing effective strategies for virus eradication from the body. Inspired by the availability and enrichment of gene expression data during HIV infection, replication and latency, in this study, we proposed a novel compendium termed HIVed (HIV expression database; http://hivlatency.erc.monash.edu/) that harbours comprehensive functional annotations of proteins, whose genes have been shown to be dysregulated during HIV infection, replication and latency using different experimental designs and measurements. We manually curated a variety of third-party databases for structural and functional annotations of the protein entries in HIVed. With the goal of benefiting HIV related research, we collected a number of biological annotations for all the entries in HIVed besides their expression profile, including basic protein information, Gene Ontology terms, secondary structure, HIV-1 interaction and pathway information. We hope this comprehensive protein-centric knowledgebase can bridge the gap between the understanding of differentially expressed genes and the functions of their protein products, facilitating the generation of novel hypotheses and treatment strategies to fight against the HIV pandemic.",HIVed,0.991903603,database,0.602060437,HIVed,0.991903603,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/30/2017 +28365729,http://portugene.com/HIVoligoDB,"The HIV oligonucleotide database (HIVoligoDB). . The human immunodeficiency virus (HIV) is associated with one of the most widespread infectious disease, the acquired immunodeficiency syndrome (AIDS). The development of antiretroviral drugs and methods for virus detection requires a comprehensive analysis of the HIV genomic diversity, particularly in the binding sites of oligonucleotides. Here, we describe a versatile online database (HIVoligoDB) with oligonucleotides selected for the diagnosis of HIV and treatment of AIDS. Currently, the database provides an interface for visualization, analysis and download of 380 HIV-1 and 65 HIV-2 oligonucleotides annotated according to curated reference genomes. The database also allows the selection of the most conserved HIV genomic regions for the development of molecular diagnostic assays and sequence-based candidate therapeutics. http://portugene.com/HIVoligoDB.",HIVoligoDB,0.98352025,The HIV oligonucleotide database,0.841203025,HIVoligoDB,0.98352025,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +22022467,http://crdd.osdd.net/raghava/hivsir,"HIVsirDB: a database of HIV inhibiting siRNAs. Background Human immunodeficiency virus (HIV) is responsible for millions of deaths every year. The current treatment involves the use of multiple antiretroviral agents that may harm patients due to their toxic nature. RNA interference (RNAi) is a potent candidate for the future treatment of HIV, uses short interfering RNA (siRNA/shRNA) for silencing HIV genes. In this study, attempts have been made to create a database HIVsirDB of siRNAs responsible for silencing HIV genes. Descriptions HIVsirDB is a manually curated database of HIV inhibiting siRNAs that provides comprehensive information about each siRNA or shRNA. Information was collected and compiled from literature and public resources. This database contains around 750 siRNAs that includes 75 partially complementary siRNAs differing by one or more bases with the target sites and over 100 escape mutant sequences. HIVsirDB structure contains sixteen fields including siRNA sequence, HIV strain, targeted genome region, efficacy and conservation of target sequences. In order to facilitate user, many tools have been integrated in this database that includes; i) siRNAmap for mapping siRNAs on target sequence, ii) HIVsirblast for BLAST search against database, iii) siRNAalign for aligning siRNAs. Conclusion HIVsirDB is a freely accessible database of siRNAs which can silence or degrade HIV genes. It covers 26 types of HIV strains and 28 cell types. This database will be very useful for developing models for predicting efficacy of HIV inhibiting siRNAs. In summary this is a useful resource for researchers working in the field of siRNA based HIV therapy. HIVsirDB database is accessible at http://crdd.osdd.net/raghava/hivsir/.",HIVsirDB,0.950383782,NA,0,HIVsirDB,0.950383782,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/11/2011 +31783725,http://zhaoserver.com.cn/HKPocket/HKPocket.html,"HKPocket: human kinase pocket database for drug design. BACKGROUND:The kinase pocket structural information is important for drug discovery targeting cancer or other diseases. Although some kinase sequence, structure or drug databases have been developed, the databases cannot be directly used in the kinase drug study. Therefore, a comprehensive database of human kinase protein pockets is urgently needed to be developed. RESULTS:Here, we have developed HKPocket, a comprehensive Human Kinase Pocket database. This database provides sequence, structure, hydrophilic-hydrophobic, critical interactions, and druggability information including 1717 pockets from 255 kinases. We further divided these pockets into 91 pocket clusters using structural and position features in each kinase group. The pocket structural information would be useful for preliminary drug screening. Then, the potential drugs can be further selected and optimized by analyzing the sequence conservation, critical interactions, and hydrophobicity of identified drug pockets. HKPocket also provides online visualization and pse files of all identified pockets. CONCLUSION:The HKPocket database would be helpful for drug screening and optimization. Besides, drugs targeting the non-catalytic pockets would cause fewer side effects. HKPocket is available at http://zhaoserver.com.cn/HKPocket/HKPocket.html.",HKPocket,0.97285378,Human Kinase Pocket,0.730029374,HKPocket,0.97285378,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/29/2019 +33858848,http://hla-ligand-atlas.org,"HLA Ligand Atlas: a benign reference of HLA-presented peptides to improve T-cell-based cancer immunotherapy. . The human leucocyte antigen (HLA) complex controls adaptive immunity by presenting defined fractions of the intracellular and extracellular protein content to immune cells. Understanding the benign HLA ligand repertoire is a prerequisite to define safe T-cell-based immunotherapies against cancer. Due to the poor availability of benign tissues, if available, normal tissue adjacent to the tumor has been used as a benign surrogate when defining tumor-associated antigens. However, this comparison has proven to be insufficient and even resulted in lethal outcomes. In order to match the tumor immunopeptidome with an equivalent counterpart, we created the HLA Ligand Atlas, the first extensive collection of paired HLA-I and HLA-II immunopeptidomes from 227 benign human tissue samples. This dataset facilitates a balanced comparison between tumor and benign tissues on HLA ligand level. Human tissue samples were obtained from 16 subjects at autopsy, five thymus samples and two ovary samples originating from living donors. HLA ligands were isolated via immunoaffinity purification and analyzed in over 1200 liquid chromatography mass spectrometry runs. Experimentally and computationally reproducible protocols were employed for data acquisition and processing. The initial release covers 51 HLA-I and 86 HLA-II allotypes presenting 90,428 HLA-I- and 142,625 HLA-II ligands. The HLA allotypes are representative for the world population. We observe that immunopeptidomes differ considerably between tissues and individuals on source protein and HLA-ligand level. Moreover, we discover 1407 HLA-I ligands from non-canonical genomic regions. Such peptides were previously described in tumors, peripheral blood mononuclear cells (PBMCs), healthy lung tissues and cell lines. In a case study in glioblastoma, we show that potential on-target off-tumor adverse events in immunotherapy can be avoided by comparing tumor immunopeptidomes to the provided multi-tissue reference. Given that T-cell-based immunotherapies, such as CAR-T cells, affinity-enhanced T cell transfer, cancer vaccines and immune checkpoint inhibition, have significant side effects, the HLA Ligand Atlas is the first step toward defining tumor-associated targets with an improved safety profile. The resource provides insights into basic and applied immune-associated questions in the context of cancer immunotherapy, infection, transplantation, allergy and autoimmunity. It is publicly available and can be browsed in an easy-to-use web interface at https://hla-ligand-atlas.org .",HLA,0.571328402,NA,0,HLA,0.571328402,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,4/1/2021 +27189608,http://www.allelefrequencies.net/hla-adr,"A web resource for mining HLA associations with adverse drug reactions: HLA-ADR. . Human leukocyte antigens (HLA) are an important family of genes involved in the immune system. Their primary function is to allow the host immune system to be able to distinguish between self and non-self peptides-e.g. derived from invading pathogens. However, these genes have also been implicated in immune-mediated adverse drug reactions (ADRs), presenting a problem to patients, clinicians and pharmaceutical companies. We have previously developed the Allele Frequency Net Database (AFND) that captures the allelic and haplotype frequencies for these HLA genes across many healthy populations from around the world. Here, we report the development and release of the HLA-ADR database that captures data from publications where HLA alleles and haplotypes have been associated with ADRs (e.g. Stevens-Johnson Syndrome/toxic epidermal necrolysis and drug-induced liver injury). HLA-ADR was created by using data obtained through systematic review of the literature and semi-automated literature mining. The database also draws on data already present in AFND allowing users to compare and analyze allele frequencies in both ADR patients and healthy populations. The HLA-ADR database provides clinicians and researchers with a centralized resource from which to investigate immune-mediated ADRs.Database URL: http://www.allelefrequencies.net/hla-adr/.",HLA-ADR,0.913742805,Allele Frequency Net Database,0.807270408,HLA-ADR,0.913742805,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/17/2016 +23584836,http://lysosome.unipg.it,"hLGDB: a database of human lysosomal genes and their regulation. Lysosomes are cytoplasmic organelles present in almost all eukaryotic cells, which play a fundamental role in key aspects of cellular homeostasis such as membrane repair, autophagy, endocitosis and protein metabolism. The characterization of the genes and enzymes constituting the lysosome represents a central issue to be addressed toward a better understanding of the biology of this organelle. In humans, mutations that cause lysosomal enzyme deficiencies result in >50 different disorders and severe pathologies. So far, many experimental efforts using different methodologies have been carried out to identity lysosomal genes. The Human Lysosome Gene Database (hLGDB) is the first resource that provides a comprehensive and accessible census of the human genes belonging to the lysosomal system. This database was developed by collecting and annotating gene lists from many different sources. References to the studies that have identified each gene are provided together with cross databases gene related information. Special attention has been given to the regulation of the genes through microRNAs and the transcription factor EB. The hLGDB can be easily queried to retrieve, combine and analyze information on different lists of lysosomal genes and their regulation by microRNA (binding sites predicted by five different algorithms). The hLGDB is an open access dynamic project that will permit in the future to collapse in a unique publicly accessible resource all the available biological information about lysosome genes and their regulation. Database URL: http://lysosome.unipg.it/.",hLGDB,0.994743705,Human Lysosome Gene Database,0.979168455,hLGDB,0.994743705,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/12/2013 +26209309,http://www.metabolicatlas.org,"Human metabolic atlas: an online resource for human metabolism. Human tissue-specific genome-scale metabolic models (GEMs) provide comprehensive understanding of human metabolism, which is of great value to the biomedical research community. To make this kind of data easily accessible to the public, we have designed and deployed the human metabolic atlas (HMA) website (http://www.metabolicatlas.org). This online resource provides comprehensive information about human metabolism, including the results of metabolic network analyses. We hope that it can also serve as an information exchange interface for human metabolism knowledge within the research community. The HMA consists of three major components: Repository, Hreed (Human REaction Entities Database) and Atlas. Repository is a collection of GEMs for specific human cell types and human-related microorganisms in SBML (System Biology Markup Language) format. The current release consists of several types of GEMs: a generic human GEM, 82 GEMs for normal cell types, 16 GEMs for different cancer cell types, 2 curated GEMs and 5 GEMs for human gut bacteria. Hreed contains detailed information about biochemical reactions. A web interface for Hreed facilitates an access to the Hreed reaction data, which can be easily retrieved by using specific keywords or names of related genes, proteins, compounds and cross-references. Atlas web interface can be used for visualization of the GEMs collection overlaid on KEGG metabolic pathway maps with a zoom/pan user interface. The HMA is a unique tool for studying human metabolism, ranging in scope from an individual cell, to a specific organ, to the overall human body. This resource is freely available under a Creative Commons Attribution-NonCommercial 4.0 International License.",HMA,0.983201583,human metabolic atlas,0.734993324,HMA,0.983201583,1,32209698,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,NA,7/24/2015 +21450710,http://jilab.biostat.jhsph.edu/database/cgi-bin/hmChIP.pl,"hmChIP: a database and web server for exploring publicly available human and mouse ChIP-seq and ChIP-chip data. Unlabelled hmChIP is a database of genome-wide chromatin immunoprecipitation (ChIP) data in human and mouse. Currently, the database contains 2016 samples from 492 ChIP-seq and ChIP-chip experiments, representing a total of 170 proteins and 11 069 914 protein-DNA interactions. A web server provides interface for database query. Protein-DNA binding intensities can be retrieved from individual samples for user-provided genomic regions. The retrieved intensities can be used to cluster samples and genomic regions to facilitate exploration of combinatorial patterns, cell-type dependencies, and cross-sample variability of protein-DNA interactions. Availability http://jilab.biostat.jhsph.edu/database/cgi-bin/hmChIP.pl.",hmChIP,0.998008847,NA,0,hmChIP,0.998008847,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/30/2011 +24194601,"http://cmbi.bjmu.edu.cn/hmdd, http://202.38.126.151/hmdd/tools/hmdd2.html","HMDD v2.0: a database for experimentally supported human microRNA and disease associations. The Human microRNA Disease Database (HMDD; available via the Web site at http://cmbi.bjmu.edu.cn/hmdd and http://202.38.126.151/hmdd/tools/hmdd2.html) is a collection of experimentally supported human microRNA (miRNA) and disease associations. Here, we describe the HMDD v2.0 update that presented several novel options for users to facilitate exploration of the data in the database. In the updated database, miRNA-disease association data were annotated in more details. For example, miRNA-disease association data from genetics, epigenetics, circulating miRNAs and miRNA-target interactions were integrated into the database. In addition, HMDD v2.0 presented more data that were generated based on concepts derived from the miRNA-disease association data, including disease spectrum width of miRNAs and miRNA spectrum width of human diseases. Moreover, we provided users a link to download all the data in the HMDD v2.0 and a link to submit novel data into the database. Meanwhile, we also maintained the old version of HMDD. By keeping data sets up-to-date, HMDD should continue to serve as a valuable resource for investigating the roles of miRNAs in human disease.",HMDD,0.997899234,Human microRNA Disease Database,0.991232157,HMDD,0.997899234,1,NA,30364956,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/4/2013 +30364956,http://www.cuilab.cn/hmdd,"HMDD v3.0: a database for experimentally supported human microRNA-disease associations. Comprehensive databases of microRNA-disease associations are continuously demanded in biomedical researches. The recently launched version 3.0 of Human MicroRNA Disease Database (HMDD v3.0) manually collects a significant number of miRNA-disease association entries from literature. Comparing to HMDD v2.0, this new version contains 2-fold more entries. Besides, the associations have been more accurately classified based on literature-derived evidence code, which results in six generalized categories (genetics, epigenetics, target, circulation, tissue and other) covering 20 types of detailed evidence code. Furthermore, we added new functionalities like network visualization on the web interface. To exemplify the utility of the database, we compared the disease spectrum width of miRNAs (DSW) and the miRNA spectrum width of human diseases (MSW) between version 3.0 and 2.0 of HMDD. HMDD is freely accessible at http://www.cuilab.cn/hmdd. With accumulating evidence of miRNA-disease associations, HMDD database will keep on growing in the future.",HMDD,0.982956916,Human MicroRNA Disease Database,0.938454998,HMDD,0.982956916,1,NA,24194601,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/1/2019 +22139932,http://www.hmtdb.uniba.it:8080/hmdb,"HmtDB, a genomic resource for mitochondrion-based human variability studies. HmtDB (http://www.hmtdb.uniba.it:8080/hmdb) is a open resource created to support population genetics and mitochondrial disease studies. The database hosts human mitochondrial genome sequences annotated with population and variability data, the latter being estimated through the application of the SiteVar software based on site-specific nucleotide and amino acid variability calculations. The annotations are manually curated thus adding value to the quality of the information provided to the end-user. Classifier tools implemented in HmtDB allow the prediction of the haplogroup for any human mitochondrial genome currently stored in HmtDB or externally submitted de novo by an end-user. Haplogroup definition is based on the Phylotree system. End-users accessing HmtDB are hence allowed to (i) browse the database through the use of a multi-criterion 'query' system; (ii) analyze their own human mitochondrial sequences via the 'classify' tool (for complete genomes) or by downloading the 'fragment-classifier' tool (for partial sequences); (iii) download multi-alignments with reference genomes as well as variability data.",HmtDB,0.998390019,NA,0,HmtDB,0.998390019,1,NA,27899581,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,12/1/2011 +27899581,http://www.hmtdb.uniba.it,"HmtDB 2016: data update, a better performing query system and human mitochondrial DNA haplogroup predictor. The HmtDB resource hosts a database of human mitochondrial genome sequences from individuals with healthy and disease phenotypes. The database is intended to support both population geneticists as well as clinicians undertaking the task to assess the pathogenicity of specific mtDNA mutations. The wide application of next-generation sequencing (NGS) has provided an enormous volume of high-resolution data at a low price, increasing the availability of human mitochondrial sequencing data, which called for a cogent and significant expansion of HmtDB data content that has more than tripled in the current release. We here describe additional novel features, including: (i) a complete, user-friendly restyling of the web interface, (ii) links to the command-line stand-alone and web versions of the MToolBox package, an up-to-date tool to reconstruct and analyze human mitochondrial DNA from NGS data and (iii) the implementation of the Reconstructed Sapiens Reference Sequence (RSRS) as mitochondrial reference sequence. The overall update renders HmtDB an even more handy and useful resource as it enables a more rapid data access, processing and analysis. HmtDB is accessible at http://www.hmtdb.uniba.it/.",HmtDB,0.998296142,NA,0,HmtDB,0.998296142,1,NA,22139932,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/28/2016 +30371888,http://www.hmtvar.uniba.it,"HmtVar: a new resource for human mitochondrial variations and pathogenicity data. Interest in human mitochondrial genetic data is constantly increasing among both clinicians and researchers, due to the involvement of mitochondrial DNA (mtDNA) in a number of physiological and pathological processes. Thanks to new sequencing technologies and modern databases, the large amount of information on mtDNA variability may be exploited to gain insights into the relationship between mtDNA variants, phenotypes and diseases. To facilitate this process, we have developed the HmtVar resource, a variant-focused database that allows the exploration of a dataset of over 40 000 human mitochondrial variants. Mitochondrial variation data, initially gathered from the HmtDB platform, are integrated with in-house pathogenicity assessments based on various evaluation criteria and with a set of additional annotations from third-party resources. The result is a comprehensive collection of information of crucial importance for human mitochondrial variation studies and investigation of common and rare diseases in which the mitochondrion may be involved. HmtVar is accessible at https://www.hmtvar.uniba.it and data may be retrieved using either a web interface through the Query page or a state-of-the-art API for programmatic access.",HmtVar,0.997302175,NA,0,HmtVar,0.997302175,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +31139565,http://hncdb.cancerbio.info,"HNCDB: An Integrated Gene and Drug Database for Head and Neck Cancer. Head and neck cancer (HNC) is the sixth most common cancer worldwide. Over the last decade, an enormous amount of well-annotated gene and drug data has accumulated for HNC. However, a comprehensive repository is not yet available. Here, we constructed the Head and Neck Cancer Database (HNCDB: http://hncdb.cancerbio.info) using text mining followed by manual curation of the literature to collect reliable information on the HNC-related genes and drugs. The high-throughput gene expression data for HNC were also integrated into HNCDB. HNCDB includes the following three separate but closely related components: ""HNC GENE,"" ""Connectivity Map,"" and ""ANALYSIS."" The ""HNC GENE"" component contains comprehensive information for the 1,173 HNC-related genes manually curated from 2,564 publications. The ""Connectivity Map"" includes information on the potential connections between the 176 drugs manually curated from 2,032 publications and the 1,173 HNC-related genes. The ""ANALYSIS"" component allows users to conduct correlation, differential expression, and survival analyses in the 2,403 samples from 78 HNC gene expression datasets. Taken together, we believe that HNCDB will be of significant benefit for the HNC community and promote further advances for precision medicine research on HNC.",HNCDB,0.998145655,Head and Neck Cancer Database,0.987272012,HNCDB,0.998145655,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/14/2019 +22024348,http://gyanxet.com/hno.html,"HNOCDB: a comprehensive database of genes and miRNAs relevant to head and neck and oral cancer. In spite of the wide prevalence of head, neck and oral cancer, HNOC, there is no integrated database on genes and miRNAs associated with all the carcinoma subtypes of HNOC. The objective is to compile a multilayered and comprehensive database of HNOC as a user-friendly resource for researchers devising novel therapeutic strategies. We present HNOCDB, the head, neck and oral cancer database, with the following key features: (i) it tabulates all the different categories of HNOC separately under appropriate subtype-names, and then puts them together in a table headlined All; (ii) the oncogenes/oncomiRs that cause HNOC are listed; their mutations, methylations and polymorphisms loci are marked, and the variations in their expression profiles relative to the normal are recorded; (iii) HNOCDB contains a chromosomal map of HNOC genes and miRNA; (iv) contains references that experimentally validate the reason for the inclusion of the genes and the miRNAs in HNOCDB. HNOCDB is freely accessible for academic and non-profit users via http://gyanxet.com/hno.html.",HNOCDB,0.997382939,NA,0,HNOCDB,0.997382939,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2011 +23175603,"http://autosome.ru/HOCOMOCO/, http://cbrc.kaust.edu.sa/hocomoco","HOCOMOCO: a comprehensive collection of human transcription factor binding sites models. Transcription factor (TF) binding site (TFBS) models are crucial for computational reconstruction of transcription regulatory networks. In existing repositories, a TF often has several models (also called binding profiles or motifs), obtained from different experimental data. Having a single TFBS model for a TF is more pragmatic for practical applications. We show that integration of TFBS data from various types of experiments into a single model typically results in the improved model quality probably due to partial correction of source specific technique bias. We present the Homo sapiens comprehensive model collection (HOCOMOCO, http://autosome.ru/HOCOMOCO/, http://cbrc.kaust.edu.sa/hocomoco/) containing carefully hand-curated TFBS models constructed by integration of binding sequences obtained by both low- and high-throughput methods. To construct position weight matrices to represent these TFBS models, we used ChIPMunk software in four computational modes, including newly developed periodic positional prior mode associated with DNA helix pitch. We selected only one TFBS model per TF, unless there was a clear experimental evidence for two rather distinct TFBS models. We assigned a quality rating to each model. HOCOMOCO contains 426 systematically curated TFBS models for 401 human TFs, where 172 models are based on more than one data source.",HOCOMOCO,0.995230079,Homo sapiens comprehensive model collection,0.977648824,HOCOMOCO,0.995230079,1,NA,26586801,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/21/2012 +26586801,"http://hocomoco.autosome.ru, http://www.cbrc.kaust.edu.sa/hocomoco10","HOCOMOCO: expansion and enhancement of the collection of transcription factor binding sites models. Models of transcription factor (TF) binding sites provide a basis for a wide spectrum of studies in regulatory genomics, from reconstruction of regulatory networks to functional annotation of transcripts and sequence variants. While TFs may recognize different sequence patterns in different conditions, it is pragmatic to have a single generic model for each particular TF as a baseline for practical applications. Here we present the expanded and enhanced version of HOCOMOCO (http://hocomoco.autosome.ru and http://www.cbrc.kaust.edu.sa/hocomoco10), the collection of models of DNA patterns, recognized by transcription factors. HOCOMOCO now provides position weight matrix (PWM) models for binding sites of 601 human TFs and, in addition, PWMs for 396 mouse TFs. Furthermore, we introduce the largest up to date collection of dinucleotide PWM models for 86 (52) human (mouse) TFs. The update is based on the analysis of massive ChIP-Seq and HT-SELEX datasets, with the validation of the resulting models on in vivo data. To facilitate a practical application, all HOCOMOCO models are linked to gene and protein databases (Entrez Gene, HGNC, UniProt) and accompanied by precomputed score thresholds. Finally, we provide command-line tools for PWM and diPWM threshold estimation and motif finding in nucleotide sequences.",HOCOMOCO,0.994640231,NA,0,HOCOMOCO,0.994640231,1,NA,23175603,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/19/2015 +21435384,http://hoctar.tigem.it,"HOCTAR database: a unique resource for microRNA target prediction. microRNAs (miRNAs) are the most abundant class of small RNAs in mammals. They play an important role in regulation of gene expression by inducing mRNA cleavage or translational inhibition. Each miRNA targets an average of 100-200 genes by binding, preferentially, to their 3' UTRs by means of partial sequence complementarity. Most miRNAs are localized within transcriptional units, termed host genes, and show similar expression behavior with respect to their corresponding host genes. Considering the impact of miRNA in the regulation of gene expression and their involvement in a growing number of human disorders, it is vital to develop sensitive computational approaches able to identify miRNA target genes. The HOCTAR database (db) is a publicly available resource collecting ranked list of predicted target genes for 290 intragenic miRNAs annotated in human. HOCTARdb is a unique resource that integrates miRNA target prediction genes and transcriptomic data to score putative miRNA targets looking at the expression behavior of their host genes. We demonstrated, by testing 135 known validated target genes (either at the translational or transcriptional level) for different miRNAs, that the miRNA target prediction lists present in HOCTARdb are highly reliable. Moreover, HOCTARdb associates biological roles to each miRNA-controlled transcriptional network by means of Gene Ontology analysis. This information is easily accessible through a user-friendly query page. The HOCTARdb is available at http://hoctar.tigem.it/. We believe that a detailed relationship between miRNAs and their target genes and a constant update of the information contained in HOCTARdb will provide an extremely valuable resource to assist the researcher in the discovery of miRNA target genes.",HOCTARdb,0.978390336,NA,0,HOCTARdb,0.978390336,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/22/2011 +34846641,http://bio-bigdata.cn/HODD,"HODD: A Manually Curated Database of Human Ophthalmic Diseases with Symptom Characteristics and Genetic Variants Towards Facilitating Quick and Definite Diagnosis. Ophthalmic diseases are disorders that affect the eyes. Hundreds of causal genes and biological pathways have been reported to be closely correlated with ophthalmic diseases. However, these information are scattered across various resources, which has hindered a thorough and deep understanding of ophthalmic diseases. In the present work, we proposed the Human Ophthalmic Diseases Database (HODD), which currently deposits 730 ophthalmic diseases and 653 related genes and is available at http://bio-bigdata.cn/HODD/ . The disease-related information and genes related to ophthalmic diseases were collected from the several well-known databases. To comprehensively understand the ophthalmic diseases, the basic information was provided for each disease, including disease description, related genes, gene location, ocular and extraocular effect of the disease, protein-protein interaction and disease-associated pathways. All these data were reorganized and made accessible through multiple entrances. We hope that HODD will facilitate studies on ophthalmic diseases. The workflow for the construction of the HODD (Human Ophthalmic Diseases Database, http://bio-bigdata.cn/HODD/ ) database.",HODD,0.994872093,Human Ophthalmic Diseases Database,0.989201119,HODD,0.994872093,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/30/2021 +24931751,http://www.carstens-stiftung.de/hombrex,"Geographical and temporal distribution of basic research experiments in homeopathy. The database HomBRex (Homeopathy Basic Research experiments) was established in 2002 to provide an overview of the basic research already done on homeopathy (http://www.carstens-stiftung.de/hombrex). By this means, it facilitates the exploration of the Similia Principle and the working mechanism of homeopathy. Since 2002, the total number of experiments listed has almost doubled. The current review reports the history of basic research in homeopathy as evidenced by publication dates and origin of publications. In July 2013, the database held 1868 entries. Most publications were reported from France (n = 267), followed by Germany (n = 246) and India (n = 237). In the last ten years, the number of publications from Brazil dramatically increased from n = 13 (before 2004) to n = 164 (compared to n = 251 published in France before 2004, and n = 16 between 2004 and 2013). The oldest database entry was from Germany (1832).",HomBRex,0.99716574,Homeopathy Basic Research experiments,0.989604324,HomBRex,0.99716574,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2014 +29760467,http://www.sklod.org/ombc,"The Oral Microbiome Bank of China. The human microbiome project (HMP) promoted further understanding of human oral microbes. However, research on the human oral microbiota has not made as much progress as research on the gut microbiota. Currently, the causal relationship between the oral microbiota and oral diseases remains unclear, and little is known about the link between the oral microbiota and human systemic diseases. To further understand the contribution of the oral microbiota in oral diseases and systemic diseases, a Human Oral Microbiome Database (HOMD) was established in the US. The HOMD includes 619 taxa in 13 phyla, and most of the microorganisms are from American populations. Due to individual differences in the microbiome, the HOMD does not reflect the Chinese oral microbial status. Herein, we established a new oral microbiome database-the Oral Microbiome Bank of China (OMBC, http://www.sklod.org/ombc ). Currently, the OMBC includes information on 289 bacterial strains and 720 clinical samples from the Chinese population, along with lab and clinical information. The OMBC is the first curated description of a Chinese-associated microbiome; it provides tools for use in investigating the role of the oral microbiome in health and diseases, and will give the community abundant data and strain information for future oral microbial studies.",HOMD,0.944250524,Human Oral Microbiome Database,0.942544252,HOMD,0.944250524,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/3/2018 +23016940,http://homeodb.zoo.ox.ac.uk,"HomeoDB2: functional expansion of a comparative homeobox gene database for evolutionary developmental biology. Homeobox gene database (HomeoDB), a manually curated database of homeobox genes and their classification, has been well received since its release in 2008. Here, we report HomeoDB2, an expansion and improvement of the original database that provides greater functionality for the user. HomeoDB2 includes all homeobox loci from 10 animal genomes (human, mouse, chicken, frog, zebrafish, amphioxus, nematode, fruitfly, beetle, honeybee) plus tools for downloading sequences, comparing between species and BLAST searching. HomeoDB2 provides a resource for studying the dynamics of homeobox gene evolution, and is freely accessible at http://homeodb.zoo.ox.ac.uk.",HomeoDB2,0.997463286,Homeobox gene database,0.984303606,HomeoDB2,0.997463286,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2011 +22165817,http://bio.informatics.iupui.edu/homer,"HOMER: a human organ-specific molecular electronic repository. Background Each organ has a specific function in the body. ""Organ-specificity"" refers to differential expressions of the same gene across different organs. An organ-specific gene/protein is defined as a gene/protein whose expression is significantly elevated in a specific human organ. An ""organ-specific marker"" is defined as an organ-specific gene/protein that is also implicated in human diseases related to the organ. Previous studies have shown that identifying specificity for the organ in which a gene or protein is significantly differentially expressed, can lead to discovery of its function. Most currently available resources for organ-specific genes/proteins either allow users to access tissue-specific expression over a limited range of organs, or do not contain disease information such as disease-organ relationship and disease-gene relationship. Results We designed an integrated Human Organ-specific Molecular Electronic Repository (HOMER, http://bio.informatics.iupui.edu/homer), defining human organ-specific genes/proteins, based on five criteria: 1) comprehensive organ coverage; 2) gene/protein to disease association; 3) disease-organ association; 4) quantification of organ-specificity; and 5) cross-linking of multiple available data sources.HOMER is a comprehensive database covering about 22,598 proteins, 52 organs, and 4,290 diseases integrated and filtered from organ-specific proteins/genes and disease databases like dbEST, TiSGeD, HPA, CTD, and Disease Ontology. The database has a Web-based user interface that allows users to find organ-specific genes/proteins by gene, protein, organ or disease, to explore the histogram of an organ-specific gene/protein, and to identify disease-related organ-specific genes by browsing the disease data online.Moreover, the quality of the database was validated with comparison to other known databases and two case studies: 1) an association analysis of organ-specific genes with disease and 2) a gene set enrichment analysis of organ-specific gene expression data. Conclusions HOMER is a new resource for analyzing, identifying, and characterizing organ-specific molecules in association with disease-organ and disease-gene relationships. The statistical method we developed for organ-specific gene identification can be applied to other organism. The current HOMER database can successfully answer a variety of questions related to organ specificity in human diseases and can help researchers in discovering and characterizing organ-specific genes/proteins with disease relevance.",HOMER,0.992719769,Human Organ-specific Molecular,0.831779265,HOMER,0.992719769,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/18/2011 +24137008,http://mips.helmholtz-muenchen.de/HoPaCI,"HoPaCI-DB: host-Pseudomonas and Coxiella interaction database. Bacterial infectious diseases are the result of multifactorial processes affected by the interplay between virulence factors and host targets. The host-Pseudomonas and Coxiella interaction database (HoPaCI-DB) is a publicly available manually curated integrative database (http://mips.helmholtz-muenchen.de/HoPaCI/) of host-pathogen interaction data from Pseudomonas aeruginosa and Coxiella burnetii. The resource provides structured information on 3585 experimentally validated interactions between molecules, bioprocesses and cellular structures extracted from the scientific literature. Systematic annotation and interactive graphical representation of disease networks make HoPaCI-DB a versatile knowledge base for biologists and network biology approaches.",HoPaCI-DB,0.997813225,host-Pseudomonas and Coxiella interaction database,0.856298451,HoPaCI-DB,0.997813225,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/16/2013 +28415075,"http://hopbase.org, http://hopbase.cgrb.oregonstate.edu","HopBase: a unified resource for Humulus genomics. . Hop (Humulus lupulus L. var lupulus) is a dioecious plant of worldwide significance, used primarily for bittering and flavoring in brewing beer. Studies on the medicinal properties of several unique compounds produced by hop have led to additional interest from pharmacy and healthcare industries as well as livestock production as a natural antibiotic. Genomic research in hop has resulted a published draft genome and transcriptome assemblies. As research into the genomics of hop has gained interest, there is a critical need for centralized online genomic resources. To support the growing research community, we report the development of an online resource ""HopBase.org."" In addition to providing a gene annotation to the existing Shinsuwase draft genome, HopBase makes available genome assemblies and annotations for both the cultivar ""Teamaker"" and male hop accession number USDA 21422M. These genome assemblies, gene annotations, along with other common data, coupled with a genome browser and BLAST database enable the hop community to enter the genomic age. The HopBase genomic resource is accessible at http://hopbase.org and http://hopbase.cgrb.oregonstate.edu.",HopBase,0.980602145,NA,0,HopBase,0.980602145,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +23585031,http://genome.weizmann.ac.il/horde,"HORDE: comprehensive resource for olfactory receptor genomics. Olfactory receptors (ORs) constitute the largest gene family in the mammalian genome. The existence of these proteins underlies the nature of, and variability in, odorant perception. The Human Olfactory Receptor Data Explorer (HORDE, http://genome.weizmann.ac.il/horde/ ) is a free online resource, which presents a complete compendium of all OR genes and pseudogenes in the genome of human and four other vertebrates. HORDE includes three parts: (1) an automated pipeline, which mines OR gene and pseudogene sequences out of complete genomes, and generates gene symbols based on sequence similarity; (2) a card generator that obtains and displays annotative information on individual ORs retrieved from external databases and relevant studies; and (3) a search engine that allows user retrieval of OR information. For human ORs, HORDE specifically addresses the universe of interindividual variation, as obtained from several sources, including whole genome sequences made possible by next-generation sequencing. This encompasses single nucleotide polymorphisms (SNP) and copy number variation (CNV), including deleterious mutational events. HORDE also hosts a number of tools designed specifically to assist in the study of OR evolution and function. In this chapter, we describe the status of HORDE (build #43). We also discuss plans for future enhancements and a road map for HORDE to become a better community-based bioinformatics tool. We highlight HORDE's role as a major research tool in the study of an expanding cohort of OR repertoires.",HORDE,0.991316095,Human Olfactory Receptor Data Explorer,0.980134517,HORDE,0.991316095,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2013 +29315358,"http://lpa.saogabriel.unipampa.edu.br, http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase","HTT-DB: new features and updates. . Horizontal Transfer (HT) of genetic material between species is a common phenomenon among Bacteria and Archaea species and several databases are available for information retrieval and data mining. However, little attention has been given to this phenomenon among eukaryotic species mainly due to the lower proportion of these events. In the last years, a vertiginous amount of new HT events involving eukaryotic species was reported in the literature, highlighting the need of a common repository to keep the scientific community up to date and describe overall trends. Recently, we published the first HT database focused on HT of transposable elements among eukaryotes: the Horizontal Transposon Transfer DataBase (http://lpa.saogabriel.unipampa.edu.br: 8080/httdatabase/). Here, we present new features and updates of this unique database: (i) its expansion to include virus-host exchange of genetic material, which we called Horizontal Virus Transfer (HVT) and (ii) the availability of a web server for HT detection, where we implemented the online version of vertical and horizontal inheritance consistence analysis (VHICA), an R package developed for HT detection. These improvements will help researchers to navigate through known HVT cases, take data-informed decision and export figures based on keywords searches. Moreover, the availability of the VHICA as an online tool will make this software easily reachable even for researchers with no or little computation knowledge as well as foster our capability to detect new HT events in a wide variety of taxa. (Database URL: http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase/).",HTT-DB,0.735216126,Horizontal Transposon Transfer DataBase,0.841958195,Horizontal Transposon Transfer DataBase,0.841958195,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +25178365,http://snugenome2.snu.ac.kr/HSDB,"Thoroughbred Horse Single Nucleotide Polymorphism and Expression Database: HSDB. Genetics is important for breeding and selection of horses but there is a lack of well-established horse-related browsers or databases. In order to better understand horses, more variants and other integrated information are needed. Thus, we construct a horse genomic variants database including expression and other information. Horse Single Nucleotide Polymorphism and Expression Database (HSDB) (http://snugenome2.snu.ac.kr/HSDB) provides the number of unexplored genomic variants still remaining to be identified in the horse genome including rare variants by using population genome sequences of eighteen horses and RNA-seq of four horses. The identified single nucleotide polymorphisms (SNPs) were confirmed by comparing them with SNP chip data and variants of RNA-seq, which showed a concordance level of 99.02% and 96.6%, respectively. Moreover, the database provides the genomic variants with their corresponding transcriptional profiles from the same individuals to help understand the functional aspects of these variants. The database will contribute to genetic improvement and breeding strategies of Thoroughbreds.",HSDB,0.980438848,Horse Single Nucleotide Polymorphism and Expression Database,0.986536476,Horse Single Nucleotide Polymorphism and Expression Database,0.986536476,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2014 +22080558,http://prism.ccbb.ku.edu.tr/hotregion,"HotRegion: a database of predicted hot spot clusters. Hot spots are energetically important residues at protein interfaces and they are not randomly distributed across the interface but rather clustered. These clustered hot spots form hot regions. Hot regions are important for the stability of protein complexes, as well as providing specificity to binding sites. We propose a database called HotRegion, which provides the hot region information of the interfaces by using predicted hot spot residues, and structural properties of these interface residues such as pair potentials of interface residues, accessible surface area (ASA) and relative ASA values of interface residues of both monomer and complex forms of proteins. Also, the 3D visualization of the interface and interactions among hot spot residues are provided. HotRegion is accessible at http://prism.ccbb.ku.edu.tr/hotregion.",HotRegion,0.989635229,NA,0,HotRegion,0.989635229,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2011 +29028885,http://hotresdb.bu.edu,"HoTResDB: host transcriptional response database for viral hemorrhagic fevers. SUMMARY:High-throughput screening of the host transcriptional response to various viral infections provides a wealth of data, but utilization of microarray and next generation sequencing (NGS) data for analysis can be difficult. The Host Transcriptional Response DataBase (HoTResDB), allows visitors to access already processed microarray and NGS data from non-human primate models of viral hemorrhagic fever to better understand the host transcriptional response. AVAILABILITY:HoTResDB is freely available at http://hotresdb.bu.edu.",HoTResDB,0.99197798,Host Transcriptional Response DataBase,0.976372804,HoTResDB,0.99197798,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +32315389,http://niulab.scgrid.cn/HotSpot3D,"HotSpot3D web server: an integrated resource for mutation analysis in protein 3D structures. Motivation HotSpot3D is a widely used software for identifying mutation hotspots on the 3D structures of proteins. To further assist users, we developed a new HotSpot3D web server to make this software more versatile, convenient and interactive. Results The HotSpot3D web server performs data pre-processing, clustering, visualization and log-viewing on one stop. Users can interactively explore each cluster and easily re-visualize the mutational clusters within browsers. We also provide a database that allows users to search and visualize proximal mutations from 33 cancers in the Cancer Genome Atlas. Availability and implementation http://niulab.scgrid.cn/HotSpot3D/. Supplementary information Supplementary data are available at Bioinformatics online.",HotSpot3D,0.995569845,NA,0,HotSpot3D,0.995569845,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2020 +"29532461, 33074547",http://cell-innovation.nig.ac.jp/Hpul,"HpBase: A genome database of a sea urchin, Hemicentrotus pulcherrimus. To understand the mystery of life, it is important to accumulate genomic information for various organisms because the whole genome encodes the commands for all the genes. Since the genome of Strongylocentrotus purpratus was sequenced in 2006 as the first sequenced genome in echinoderms, the genomic resources of other North American sea urchins have gradually been accumulated, but no sea urchin genomes are available in other areas, where many scientists have used the local species and reported important results. In this manuscript, we report a draft genome of the sea urchin Hemincentrotus pulcherrimus because this species has a long history as the target of developmental and cell biology in East Asia. The genome of H. pulcherrimus was assembled into 16,251 scaffold sequences with an N50 length of 143 kbp, and approximately 25,000 genes were identified in the genome. The size of the genome and the sequencing coverage were estimated to be approximately 800 Mbp and 100×, respectively. To provide these data and information of annotation, we constructed a database, HpBase (http://cell-innovation.nig.ac.jp/Hpul/). In HpBase, gene searches, genome browsing, and blast searches are available. In addition, HpBase includes the ""recipes"" for experiments from each lab using H. pulcherrimus. These recipes will continue to be updated according to the circumstances of individual scientists and can be powerful tools for experimental biologists and for the community. HpBase is a suitable dataset for evolutionary, developmental, and cell biologists to compare H. pulcherrimus genomic information with that of other species and to isolate gene information.",HpBase,0.998306155,NA,0,HpBase,0.998306155,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +27374121,http://www.agbase.msstate.edu/hpi/main.html,"HPIDB 2.0: a curated database for host-pathogen interactions. . Identification and analysis of host-pathogen interactions (HPI) is essential to study infectious diseases. However, HPI data are sparse in existing molecular interaction databases, especially for agricultural host-pathogen systems. Therefore, resources that annotate, predict and display the HPI that underpin infectious diseases are critical for developing novel intervention strategies. HPIDB 2.0 (http://www.agbase.msstate.edu/hpi/main.html) is a resource for HPI data, and contains 45, 238 manually curated entries in the current release. Since the first description of the database in 2010, multiple enhancements to HPIDB data and interface services were made that are described here. Notably, HPIDB 2.0 now provides targeted biocuration of molecular interaction data. As a member of the International Molecular Exchange consortium, annotations provided by HPIDB 2.0 curators meet community standards to provide detailed contextual experimental information and facilitate data sharing. Moreover, HPIDB 2.0 provides access to rapidly available community annotations that capture minimum molecular interaction information to address immediate researcher needs for HPI network analysis. In addition to curation, HPIDB 2.0 integrates HPI from existing external sources and contains tools to infer additional HPI where annotated data are scarce. Compared to other interaction databases, our data collection approach ensures HPIDB 2.0 users access the most comprehensive HPI data from a wide range of pathogens and their hosts (594 pathogen and 70 host species, as of February 2016). Improvements also include enhanced search capacity, addition of Gene Ontology functional information, and implementation of network visualization. The changes made to HPIDB 2.0 content and interface ensure that users, especially agricultural researchers, are able to easily access and analyse high quality, comprehensive HPI data. All HPIDB 2.0 data are updated regularly, are publically available for direct download, and are disseminated to other molecular interaction resources.Database URL: http://www.agbase.msstate.edu/hpi/main.html.",HPIDB,0.988542557,NA,0,HPIDB,0.988542557,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/3/2016 +26578596,http://www.hpmcd.org,"HPMCD: the database of human microbial communities from metagenomic datasets and microbial reference genomes. The Human Pan-Microbe Communities (HPMC) database (http://www.hpmcd.org/) provides a manually curated, searchable, metagenomic resource to facilitate investigation of human gastrointestinal microbiota. Over the past decade, the application of metagenome sequencing to elucidate the microbial composition and functional capacity present in the human microbiome has revolutionized many concepts in our basic biology. When sufficient high quality reference genomes are available, whole genome metagenomic sequencing can provide direct biological insights and high-resolution classification. The HPMC database provides species level, standardized phylogenetic classification of over 1800 human gastrointestinal metagenomic samples. This is achieved by combining a manually curated list of bacterial genomes from human faecal samples with over 21000 additional reference genomes representing bacteria, viruses, archaea and fungi with manually curated species classification and enhanced sample metadata annotation. A user-friendly, web-based interface provides the ability to search for (i) microbial groups associated with health or disease state, (ii) health or disease states and community structure associated with a microbial group, (iii) the enrichment of a microbial gene or sequence and (iv) enrichment of a functional annotation. The HPMC database enables detailed analysis of human microbial communities and supports research from basic microbiology and immunology to therapeutic development in human health and disease.",HPMC,0.989222422,Human Pan-Microbe Communities,0.749896427,HPMC,0.989222422,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +24217912,http://www.human-phenotype-ontology.org,"The Human Phenotype Ontology project: linking molecular biology and disease through phenotype data. The Human Phenotype Ontology (HPO) project, available at http://www.human-phenotype-ontology.org, provides a structured, comprehensive and well-defined set of 10,088 classes (terms) describing human phenotypic abnormalities and 13,326 subclass relations between the HPO classes. In addition we have developed logical definitions for 46% of all HPO classes using terms from ontologies for anatomy, cell types, function, embryology, pathology and other domains. This allows interoperability with several resources, especially those containing phenotype information on model organisms such as mouse and zebrafish. Here we describe the updated HPO database, which provides annotations of 7,278 human hereditary syndromes listed in OMIM, Orphanet and DECIPHER to classes of the HPO. Various meta-attributes such as frequency, references and negations are associated with each annotation. Several large-scale projects worldwide utilize the HPO for describing phenotype information in their datasets. We have therefore generated equivalence mappings to other phenotype vocabularies such as LDDB, Orphanet, MedDRA, UMLS and phenoDB, allowing integration of existing datasets and interoperability with multiple biomedical resources. We have created various ways to access the HPO database content using flat files, a MySQL database, and Web-based tools. All data and documentation on the HPO project can be found online.",HPO,0.964736124,The Human Phenotype Ontology project,0.657915694,HPO,0.964736124,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/11/2013 +33136065,http://bioinfo.bdu.ac.in/hprep,"HPREP: a comprehensive database for human proteome repeats. . Amino acid repeats are found to play important roles in both structures and functions of the proteins. These are commonly found in all kingdoms of life, especially in eukaryotes and a larger fraction of human proteins composed of repeats. Further, the abnormal expansions of shorter repeats cause various diseases to humans. Therefore, the analysis of repeats of the entire human proteome along with functional, mutational and disease information would help to better understand their roles in proteins. To fulfill this need, we developed a web database HPREP (http://bioinfo.bdu.ac.in/hprep) for human proteome repeats using Perl and HTML programming. We identified different categories of well-characterized repeats and domain repeats that are present in the human proteome of UniProtKB/Swiss-Prot by using in-house Perl programming and novel repeats by using the repeat detection T-REKS tool as well as XSTREAM web server. Further, these proteins are annotated with functional, mutational and disease information and grouped according to specific repeat types. The developed database enables the users to search by specific repeat type in order to understand their involvement in proteins. Thus, the HPREP database is expected to be a useful resource to gain better insight regarding the different repeats in human proteome and their biological roles.",HPREP,0.984554529,NA,0,HPREP,0.984554529,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/3/2020 +25468930,http://webapp.cabgrid.res.in/protein,"The Halophile protein database. Halophilic archaea/bacteria adapt to different salt concentration, namely extreme, moderate and low. These type of adaptations may occur as a result of modification of protein structure and other changes in different cell organelles. Thus proteins may play an important role in the adaptation of halophilic archaea/bacteria to saline conditions. The Halophile protein database (HProtDB) is a systematic attempt to document the biochemical and biophysical properties of proteins from halophilic archaea/bacteria which may be involved in adaptation of these organisms to saline conditions. In this database, various physicochemical properties such as molecular weight, theoretical pI, amino acid composition, atomic composition, estimated half-life, instability index, aliphatic index and grand average of hydropathicity (Gravy) have been listed. These physicochemical properties play an important role in identifying the protein structure, bonding pattern and function of the specific proteins. This database is comprehensive, manually curated, non-redundant catalogue of proteins. The database currently contains 59 897 proteins properties extracted from 21 different strains of halophilic archaea/bacteria. The database can be accessed through link. Database URL: http://webapp.cabgrid.res.in/protein/",HProtDB,0.989957288,Halophile protein database,0.916799814,HProtDB,0.989957288,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2014 +32707486,http://hpscreg.eu,"Access to stem cell data and registration of pluripotent cell lines: The Human Pluripotent Stem Cell Registry (hPSCreg). The value of human pluripotent stem cells (hPSC) in regenerative medicine has yet to reach its full potential. The road from basic research tool to clinically validated PSC-derived cell therapy products is a long and winding one, leading researchers, clinicians, industry and regulators alike into undiscovered territory. All stakeholders must work together to ensure the development of safe and effective cell therapies. Similarly, utilization of hPSC in meaningful and controlled disease modeling and drug screening applications requires information on the quality and suitability of the applied cell lines. Central to these common goals is the complete documentation of hPSC data, including the ethical provenance of the source material, the hPSC line derivation, culture conditions and genetic constitution of the lines. Data surrounding hPSC is scattered amongst diverse sources, including publications, supplemental data, researcher lab books, accredited lab reports, certificates of analyses and public data repositories. Not all of these data sources are publicly accessible nor associated with metadata nor stored in a standard manner, such that data can be easily found and retrieved. The Human Pluripotent Stem Cell Registry (hPSCreg; https://hpscreg.eu/) was started in 2007 to impart provenance and transparency towards hPSC research by registering and collecting standard properties of hPSC lines. In this chapter, we present a short primer on the history of stem cell-based products, summarize the ethical and regulatory issues introduced in the course of working with hPSC-derived products and their associated data, and finally present the Human Pluripotent Stem Cell Registry as a valuable resource for all stakeholders in therapies and disease modeling based on hPSC-derived cells.",hPSCreg,0.988741887,Human Pluripotent Stem Cell Registry,0.973995652,hPSCreg,0.988741887,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/27/2020 +24150944,http://bioinfo.protres.ru/hrap,"HRaP: database of occurrence of HomoRepeats and patterns in proteomes. We focus our attention on multiple repeats of one amino acid (homorepeats) and create a new database (named HRaP, at http://bioinfo.protres.ru/hrap/) of occurrence of homorepeats and disordered patterns in different proteomes. HRaP is aimed at understanding the amino acid tandem repeat function in different proteomes. Therefore, the database includes 122 proteomes, 97 eukaryotic and 25 bacterial ones that can be divided into 9 kingdoms and 5 phyla of bacteria. The database includes 1,449,561 protein sequences and 771,786 sequences of proteins with GO annotations. We have determined homorepeats and patterns that are associated with some function. Through our web server, the user can do the following: (i) search for proteins with the given homorepeat in 122 proteomes, including GO annotation for these proteins; (ii) search for proteins with the given disordered pattern from the library of disordered patterns constructed on the clustered Protein Data Bank in 122 proteomes, including GO annotations for these proteins; (iii) analyze lengths of homorepeats in different proteomes; (iv) investigate disordered regions in the chosen proteins in 122 proteomes; (v) study the coupling of different homorepeats in one protein; (vi) determine longest runs for each amino acid inside each proteome; and (vii) download the full list of proteins with the given length of a homorepeat.",HRaP,0.996109843,NA,0,HRaP,0.996109843,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2013 +26657893,http://plantgrn.noble.org/hrgrn,"HRGRN: A Graph Search-Empowered Integrative Database of Arabidopsis Signaling Transduction, Metabolism and Gene Regulation Networks. The biological networks controlling plant signal transduction, metabolism and gene regulation are composed of not only tens of thousands of genes, compounds, proteins and RNAs but also the complicated interactions and co-ordination among them. These networks play critical roles in many fundamental mechanisms, such as plant growth, development and environmental response. Although much is known about these complex interactions, the knowledge and data are currently scattered throughout the published literature, publicly available high-throughput data sets and third-party databases. Many 'unknown' yet important interactions among genes need to be mined and established through extensive computational analysis. However, exploring these complex biological interactions at the network level from existing heterogeneous resources remains challenging and time-consuming for biologists. Here, we introduce HRGRN, a graph search-empowered integrative database of Arabidopsis signal transduction, metabolism and gene regulatory networks. HRGRN utilizes Neo4j, which is a highly scalable graph database management system, to host large-scale biological interactions among genes, proteins, compounds and small RNAs that were either validated experimentally or predicted computationally. The associated biological pathway information was also specially marked for the interactions that are involved in the pathway to facilitate the investigation of cross-talk between pathways. Furthermore, HRGRN integrates a series of graph path search algorithms to discover novel relationships among genes, compounds, RNAs and even pathways from heterogeneous biological interaction data that could be missed by traditional SQL database search methods. Users can also build subnetworks based on known interactions. The outcomes are visualized with rich text, figures and interactive network graphs on web pages. The HRGRN database is freely available at http://plantgrn.noble.org/hrgrn/.",HRGRN,0.979772747,NA,0,HRGRN,0.979772747,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/12/2015 +23936191,http://mips.helmholtz-muenchen.de/HSC,"HSC-explorer: a curated database for hematopoietic stem cells. HSC-Explorer (http://mips.helmholtz-muenchen.de/HSC/) is a publicly available, integrative database containing detailed information about the early steps of hematopoiesis. The resource aims at providing fast and easy access to relevant information, in particular to the complex network of interacting cell types and molecules, from the wealth of publications in the field through visualization interfaces. It provides structured information on more than 7000 experimentally validated interactions between molecules, bioprocesses and environmental factors. Information is manually derived by critical reading of the scientific literature from expert annotators. Hematopoiesis-relevant interactions are accompanied with context information such as model organisms and experimental methods for enabling assessment of reliability and relevance of experimental results. Usage of established vocabularies facilitates downstream bioinformatics applications and to convert the results into complex networks. Several predefined datasets (Selected topics) offer insights into stem cell behavior, the stem cell niche and signaling processes supporting hematopoietic stem cell maintenance. HSC-Explorer provides a versatile web-based resource for scientists entering the field of hematopoiesis enabling users to inspect the associated biological processes through interactive graphical presentation.",HSC-Explorer,0.983055544,NA,0,HSC-Explorer,0.983055544,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/30/2013 +34032471,http://salivaryproteome.nidcr.nih.gov,"The Human Salivary Proteome Wiki: A Community-Driven Research Platform. Saliva has become an attractive body fluid for on-site, remote, and real-time monitoring of oral and systemic health. At the same time, the scientific community needs a saliva-centered information platform that keeps pace with the rapid accumulation of new data and knowledge by annotating, refining, and updating the salivary proteome catalog. We developed the Human Salivary Proteome (HSP) Wiki as a public data platform for researching and retrieving custom-curated data and knowledge on the saliva proteome. The HSP Wiki is dynamically compiled and updated based on published saliva proteome studies and up-to-date protein reference records. It integrates a wide range of available information by funneling in data from established external protein, genome, transcriptome, and glycome databases. In addition, the HSP Wiki incorporates data from human disease-related studies. Users can explore the proteome of saliva simply by browsing the database, querying the available data, performing comparisons of data sets, and annotating existing protein entries using a simple, intuitive interface. The annotation process includes both user feedback and curator committee review to ensure the quality and validity of each entry. Here, we present the first overview of features and functions the HSP Wiki offers. As a saliva proteome-centric, publicly accessible database, the HSP Wiki will advance the knowledge of saliva composition and function in health and disease for users across a wide range of disciplines. As a community-based data- and knowledgebase, the HSP Wiki will serve as a worldwide platform to exchange salivary proteome information, inspire novel research ideas, and foster cross-discipline collaborations. The HSP Wiki will pave the way for harnessing the full potential of the salivary proteome for diagnosis, risk prediction, therapy of oral and systemic diseases, and preparedness for emerging infectious diseases.Database URL: https://salivaryproteome.nidcr.nih.gov/.",HSP,0.977086564,Human Salivary Proteome Wiki,0.836235136,HSP,0.977086564,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/25/2021 +22923302,http://pdslab.biochem.iisc.ernet.in/hspir,"HSPIR: a manually annotated heat shock protein information resource. Summary Heat shock protein information resource (HSPIR) is a concerted database of six major heat shock proteins (HSPs), namely, Hsp70, Hsp40, Hsp60, Hsp90, Hsp100 and small HSP. The HSPs are essential for the survival of all living organisms, as they protect the conformations of proteins on exposure to various stress conditions. They are a highly conserved group of proteins involved in diverse physiological functions, including de novo folding, disaggregation and protein trafficking. Moreover, their critical role in the control of disease progression made them a prime target of research. Presently, limited information is available on HSPs in reference to their identification and structural classification across genera. To that extent, HSPIR provides manually curated information on sequence, structure, classification, ontology, domain organization, localization and possible biological functions extracted from UniProt, GenBank, Protein Data Bank and the literature. The database offers interactive search with incorporated tools, which enhances the analysis. HSPIR is a reliable resource for researchers exploring structure, function and evolution of HSPs. Availability http://pdslab.biochem.iisc.ernet.in/hspir/",HSPIR,0.99661684,Heat shock protein information resource,0.949629581,HSPIR,0.99661684,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/24/2012 +32090260,http://bioinfo.imtech.res.in/bvs/hspmdb/index.php,"HSPMdb: a computational repository of heat shock protein modulators. . Heat shock proteins (Hsp) are among highly conserved proteins across all domains of life. Though originally discovered as a cellular response to stress, these proteins are also involved in a wide range of cellular functions such as protein refolding, protein trafficking and cellular signalling. A large number of potential Hsp modulators are under clinical trials against various human diseases. As the number of modulators targeting Hsps is growing, there is a need to develop a comprehensive knowledge repository of these findings which is largely scattered. We have thus developed a web-accessible database, HSPMdb, which is a first of its kind manually curated repository of experimentally validated Hsp modulators (activators and inhibitors). The data was collected from 176 research articles and current version of HSPMdb holds 10 223 entries of compounds that are known to modulate activities of five major Hsps (Hsp100, Hsp90, Hsp70, Hsp60 and Hsp40) originated from 15 different organisms (i.e. human, yeast, bacteria, virus, mouse, rat, bovine, porcine, canine, chicken, Trypanosoma brucei and Plasmodium falciparum). HSPMdb provides comprehensive information on biological activities as well as the chemical properties of Hsp modulators. The biological activities of modulators are presented as enzymatic activity and cellular activity. Under the enzymatic activity field, parameters such as IC50, EC50, DC50, Ki and KD have been provided. In the cellular activity field, complete information on cellular activities (percentage cell growth inhibition, EC50 and GI50), type of cell viability assays and cell line used has been provided. One of the important features of HSPMdb is that it allows users to screen whether or not their compound of interest has any similarity with the previously known Hsp modulators. We anticipate that HSPMdb would become a valuable resource for the broader scientific community working in the area of chaperone biology and protein misfolding diseases. HSPMdb is freely accessible at http://bioinfo.imtech.res.in/bvs/hspmdb/index.php.",HSPMdb,0.998334646,NA,0,HSPMdb,0.998334646,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24558441,http://htd.cbi.pku.edu.cn,"Human transporter database: comprehensive knowledge and discovery tools in the human transporter genes. Transporters are essential in homeostatic exchange of endogenous and exogenous substances at the systematic, organic, cellular, and subcellular levels. Gene mutations of transporters are often related to pharmacogenetics traits. Recent developments in high throughput technologies on genomics, transcriptomics and proteomics allow in depth studies of transporter genes in normal cellular processes and diverse disease conditions. The flood of high throughput data have resulted in urgent need for an updated knowledgebase with curated, organized, and annotated human transporters in an easily accessible way. Using a pipeline with the combination of automated keywords query, sequence similarity search and manual curation on transporters, we collected 1,555 human non-redundant transporter genes to develop the Human Transporter Database (HTD) (http://htd.cbi.pku.edu.cn). Based on the extensive annotations, global properties of the transporter genes were illustrated, such as expression patterns and polymorphisms in relationships with their ligands. We noted that the human transporters were enriched in many fundamental biological processes such as oxidative phosphorylation and cardiac muscle contraction, and significantly associated with Mendelian and complex diseases such as epilepsy and sudden infant death syndrome. Overall, HTD provides a well-organized interface to facilitate research communities to search detailed molecular and genetic information of transporters for development of personalized medicine.",HTD,0.994018674,Transporter Database,0.934127271,HTD,0.994018674,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/18/2014 +32858223,http://bioinfo.life.hust.edu.cn/hTFtarget,"hTFtarget: A Comprehensive Database for Regulations of Human Transcription Factors and Their Targets. Transcription factors (TFs) as key regulators play crucial roles in biological processes. The identification of TF-target regulatory relationships is a key step for revealing functions of TFs and their regulations on gene expression. The accumulated data of chromatin immunoprecipitation sequencing (ChIP-seq) provide great opportunities to discover the TF-target regulations across different conditions. In this study, we constructed a database named hTFtarget, which integrated huge human TF target resources (7190 ChIP-seq samples of 659 TFs and high-confidence binding sites of 699 TFs) and epigenetic modification information to predict accurate TF-target regulations. hTFtarget offers the following functions for users to explore TF-target regulations: (1) browse or search general targets of a query TF across datasets; (2) browse TF-target regulations for a query TF in a specific dataset or tissue; (3) search potential TFs for a given target gene or non-coding RNA; (4) investigate co-association between TFs in cell lines; (5) explore potential co-regulations for given target genes or TFs; (6) predict candidate TF binding sites on given DNA sequences; (7) visualize ChIP-seq peaks for different TFs and conditions in a genome browser. hTFtarget provides a comprehensive, reliable and user-friendly resource for exploring human TF-target regulations, which will be very useful for a wide range of users in the TF and gene expression regulation community. hTFtarget is available at http://bioinfo.life.hust.edu.cn/hTFtarget.",hTFtarget,0.996637821,NA,0,hTFtarget,0.996637821,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2020 +22900683,http://www.lbbc.ibb.unesp.br/htri,"HTRIdb: an open-access database for experimentally verified human transcriptional regulation interactions. Background The modeling of interactions among transcription factors (TFs) and their respective target genes (TGs) into transcriptional regulatory networks is important for the complete understanding of regulation of biological processes. In the case of experimentally verified human TF-TG interactions, there is no database at present that explicitly provides such information even though many databases containing human TF-TG interaction data have been available. In an effort to provide researchers with a repository of experimentally verified human TF-TG interactions from which such interactions can be directly extracted, we present here the Human Transcriptional Regulation Interactions database (HTRIdb). Description The HTRIdb is an open-access database that can be searched via a user-friendly web interface and the retrieved TF-TG interactions data and the associated protein-protein interactions can be downloaded or interactively visualized as a network through the web version of the popular Cytoscape visualization tool, the Cytoscape Web. Moreover, users can improve the database quality by uploading their own interactions and indicating inconsistencies in the data. So far, HTRIdb has been populated with 284 TFs that regulate 18302 genes, totaling 51871 TF-TG interactions. HTRIdb is freely available at http://www.lbbc.ibb.unesp.br/htri. Conclusions HTRIdb is a powerful user-friendly tool from which human experimentally validated TF-TG interactions can be easily extracted and used to construct transcriptional regulation interaction networks enabling researchers to decipher the regulation of biological processes.",HTRIdb,0.992245674,Human Transcriptional Regulation Interactions database,0.976327971,HTRIdb,0.992245674,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/17/2012 +24122843,http://hts.cancerresearchuk.org/db/public,"HTS-DB: an online resource to publish and query data from functional genomics high-throughput siRNA screening projects. High-throughput screening (HTS) uses technologies such as RNA interference to generate loss-of-function phenotypes on a genomic scale. As these technologies become more popular, many research institutes have established core facilities of expertise to deal with the challenges of large-scale HTS experiments. As the efforts of core facility screening projects come to fruition, focus has shifted towards managing the results of these experiments and making them available in a useful format that can be further mined for phenotypic discovery. The HTS-DB database provides a public view of data from screening projects undertaken by the HTS core facility at the CRUK London Research Institute. All projects and screens are described with comprehensive assay protocols, and datasets are provided with complete descriptions of analysis techniques. This format allows users to browse and search data from large-scale studies in an informative and intuitive way. It also provides a repository for additional measurements obtained from screens that were not the focus of the project, such as cell viability, and groups these data so that it can provide a gene-centric summary across several different cell lines and conditions. All datasets from our screens that can be made available can be viewed interactively and mined for further hit lists. We believe that in this format, the database provides researchers with rapid access to results of large-scale experiments that might facilitate their understanding of genes/compounds identified in their own research. DATABASE URL: http://hts.cancerresearchuk.org/db/public.",HTS-DB,0.993688151,NA,0,HTS-DB,0.993688151,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/11/2013 +25940562,http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase,"HTT-DB: horizontally transferred transposable elements database. Motivation Horizontal transfer of transposable (HTT) elements among eukaryotes was discovered in the mid-1980s. As then, >300 new cases have been described. New findings about HTT are revealing the evolutionary impact of this phenomenon on host genomes. In order to provide an up to date, interactive and expandable database for such events, we developed the HTT-DB database. Results HTT-DB allows easy access to most of HTT cases reported along with rich information about each case. Moreover, it allows the user to generate tables and graphs based on searches using Transposable elements and/or host species classification and export them in several formats. Availability and implementation This database is freely available on the web at http://lpa.saogabriel.unipampa.edu.br:8080/httdatabase. HTT-DB was developed based on Java and MySQL with all major browsers supported. Tools and software packages used are free for personal or non-profit projects. Contact bdotto82@gmail.com or gabriel.wallau@gmail.com.",HTT-DB,0.982042775,NA,0,HTT-DB,0.982042775,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/4/2015 +33973408,http://humap2.proteincomplexes.org,"hu.MAP 2.0: integration of over 15,000 proteomic experiments builds a global compendium of human multiprotein assemblies. A general principle of biology is the self-assembly of proteins into functional complexes. Characterizing their composition is, therefore, required for our understanding of cellular functions. Unfortunately, we lack knowledge of the comprehensive set of identities of protein complexes in human cells. To address this gap, we developed a machine learning framework to identify protein complexes in over 15,000 mass spectrometry experiments which resulted in the identification of nearly 7,000 physical assemblies. We show our resource, hu.MAP 2.0, is more accurate and comprehensive than previous state of the art high-throughput protein complex resources and gives rise to many new hypotheses, including for 274 completely uncharacterized proteins. Further, we identify 253 promiscuous proteins that participate in multiple complexes pointing to possible moonlighting roles. We have made hu.MAP 2.0 easily searchable in a web interface (http://humap2.proteincomplexes.org/), which will be a valuable resource for researchers across a broad range of interests including systems biology, structural biology, and molecular explanations of disease.",hu.MAP,0.995122343,NA,0,hu.MAP,0.995122343,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +22134927,http://202.38.126.151/hmdd/hubi,"hUbiquitome: a database of experimentally verified ubiquitination cascades in humans. Protein ubiquitination is an evolutionarily conserved and functionally diverse post-translational modification achieved through the sequential action of E1-activating enzymes, E2-conjugating enzymes and E3 ligases. A summary of validated ubiquitination substrates have been presented and a prediction of new substrates have been conducted in yeast. However, a systematic summary of human ubiquitination substrates containing experimental evidence and the enzymatic cascade of each substrate is not available. In the present study, hUbiquitome web resource is introduced, a public resource for the retrieval of experimentally verified human ubiquitination enzymes and substrates. hUbiquitome is the first comprehensive database of human ubiquitination cascades. Currently, hUbiquitome has in its repertoire curated data comprising 1 E1 enzyme, 12 E2 enzymes, 138 E3 ligases or complexes, 279 different substrate proteins and 17 deubiquitination enzyme terms. The biological functions of substrates from different kinds of E3s were analyzed using the collected data. The findings show that substrates ubiquitinated by RING (Really Interesting New Gene) E3s are enriched most in apoptosis-related processes, whereas substrates ubiquitinated by other E3s are enriched in gene expression-associated processes. An analysis of the data demonstrates the biological process preferences of the different kinds of E3s. hUbiquitome is the first database to systematically collect experimentally validated ubiquitinated proteins and related ubiquitination cascade enzymes which might be helpful in the field of ubiquitination-modification research. Database URL: http://202.38.126.151/hmdd/hubi/",hUbiquitome,0.99319154,Really Interesting New Gene,0.800637662,hUbiquitome,0.99319154,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/30/2011 +28967693,http://huma.rubi.ru.ac.za,"HUMA: A platform for the analysis of genetic variation in humans. The completion of the human genome project at the beginning of the 21st century, along with the rapid advancement of sequencing technologies thereafter, has resulted in exponential growth of biological data. In genetics, this has given rise to numerous variation databases, created to store and annotate the ever-expanding dataset of known mutations. Usually, these databases focus on variation at the sequence level. Few databases focus on the analysis of variation at the 3D level, that is, mapping, visualizing, and determining the effects of variation in protein structures. Additionally, these Web servers seldom incorporate tools to help analyze these data. Here, we present the Human Mutation Analysis (HUMA) Web server and database. HUMA integrates sequence, structure, variation, and disease data into a single, connected database. A user-friendly interface provides click-based data access and visualization, whereas a RESTful Web API provides programmatic access to the data. Tools have been integrated into HUMA to allow initial analyses to be carried out on the server. Furthermore, users can upload their private variation datasets, which are automatically mapped to public data and can be analyzed using the integrated tools. HUMA is freely accessible at https://huma.rubi.ru.ac.za.",HUMA,0.995447814,Analysis,0.605870187,HUMA,0.995447814,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/17/2017 +21752111,http://www.proteinatlas.org,"The Human Protein Atlas as a proteomic resource for biomarker discovery. The analysis of tissue-specific expression at both the gene and protein levels is vital for understanding human biology and disease. Antibody-based proteomics provides a strategy for the systematic generation of antibodies against all human proteins to combine with protein profiling in tissues and cells using tissue microarrays, immunohistochemistry and immunofluorescence. The Human Protein Atlas project was launched in 2003 with the aim of creating a map of protein expression patterns in normal cells, tissues and cancer. At present, 11,200 unique proteins corresponding to over 50% of all human protein-encoding genes have been analysed. All protein expression data, including underlying high-resolution images, are published on the free and publically available Human Protein Atlas portal (http://www.proteinatlas.org). This database provides an important source of information for numerous biomedical research projects, including biomarker discovery efforts. Moreover, the global analysis of how our genome is expressed at the protein level has provided basic knowledge on the ubiquitous expression of a large proportion of our proteins and revealed the paucity of cell- and tissue-type-specific proteins.",Human,0.681255281,NA,0,Human,0.681255281,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,8/3/2011 +25348409,http://www.disease-ontology.org,"Disease Ontology 2015 update: an expanded and updated database of human diseases for linking biomedical knowledge through disease data. The current version of the Human Disease Ontology (DO) (http://www.disease-ontology.org) database expands the utility of the ontology for the examination and comparison of genetic variation, phenotype, protein, drug and epitope data through the lens of human disease. DO is a biomedical resource of standardized common and rare disease concepts with stable identifiers organized by disease etiology. The content of DO has had 192 revisions since 2012, including the addition of 760 terms. Thirty-two percent of all terms now include definitions. DO has expanded the number and diversity of research communities and community members by 50+ during the past two years. These community members actively submit term requests, coordinate biomedical resource disease representation and provide expert curation guidance. Since the DO 2012 NAR paper, there have been hundreds of term requests and a steady increase in the number of DO listserv members, twitter followers and DO website usage. DO is moving to a multi-editor model utilizing Protégé to curate DO in web ontology language. This will enable closer collaboration with the Human Phenotype Ontology, EBI's Ontology Working Group, Mouse Genome Informatics and the Monarch Initiative among others, and enhance DO's current asserted view and multiple inferred views through reasoning.",NA,0,Human Disease Ontology,0.669468194,Human Disease Ontology,0.669468194,1,30407550,NA,low_prob_best_name,do not remove,conflicting record(s) to be removed,NA,NA,NA,NA,10/27/2014 +26911352,http://www.genome.med.kyoto-u.ac.jp/SnpDB,"Human genetic variation database, a reference database of genetic variations in the Japanese population. Whole-genome and -exome resequencing using next-generation sequencers is a powerful approach for identifying genomic variations that are associated with diseases. However, systematic strategies for prioritizing causative variants from many candidates to explain the disease phenotype are still far from being established, because the population-specific frequency spectrum of genetic variation has not been characterized. Here, we have collected exomic genetic variation from 1208 Japanese individuals through a collaborative effort, and aggregated the data into a prevailing catalog. In total, we identified 156 622 previously unreported variants. The allele frequencies for the majority (88.8%) were lower than 0.5% in allele frequency and predicted to be functionally deleterious. In addition, we have constructed a Japanese-specific major allele reference genome by which the number of unique mapping of the short reads in our data has increased 0.045% on average. Our results illustrate the importance of constructing an ethnicity-specific reference genome for identifying rare variants. All the collected data were centralized to a newly developed database to serve as useful resources for exploring pathogenic variations. Public access to the database is available at http://www.genome.med.kyoto-u.ac.jp/SnpDB/.",NA,0,Human genetic variation database,0.794143543,Human genetic variation database,0.794143543,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/25/2016 +23504933,"http://www.humanproteinpedia.org, http://www.hprd.org","Access guide to human proteinpedia. Human Proteinpedia (http://www.humanproteinpedia.org) is a publicly available proteome repository for sharing human protein data derived from multiple experimental platforms. It incorporates diverse features of the human proteome including protein-protein interactions, enzyme-substrate relationships, PTMs, subcellular localization, and expression of proteins in various human tissues and cell lines in diverse biological conditions including diseases. Through a publicly distributed annotation system developed especially for proteomic data, investigators across the globe can upload, view, and edit proteomic data even before they are published. Inclusion of information on investigators and laboratories that generated the data, as well as visualization of tandem mass spectra, stained tissue sections, protein/peptide microarrays, fluorescent micrographs, and western blots, ensures quality of proteomic data assimilated in Human Proteinpedia. Many of the protein annotations submitted to Human Proteinpedia have also been made available to the scientific community through Human Protein Reference Database (http://www.hprd.org), another resource developed by our group. In this protocol, we describe how to submit, edit, and retrieve proteomic data in Human Proteinpedia.",Human Proteinpedia,0.979407251,NA,0,Human Proteinpedia,0.979407251,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2013 +23436708,http://reprod.njmu.edu.cn/htpd,"Scanning of novel cancer/testis proteins by human testis proteomic analysis. The testes are where spermatogenesis, the sperm-generating process that is unique to men, occurs. Importantly, human spermatogenesis and tumorigenesis share key similarities. Until now, only a few proteins in the human testis have been identified due to limitations of available technology. In this paper, using an advanced proteomics platform, we have identified 7346 unique proteins within the human testis with a high degree of confidence. Immunohistochemistry data from the Human Protein Atlas database show over 90% (1833/2020) of identified proteins can be detected in the human testis using specific antibodies. To make the data widely available to the scientific community, an online Human Testis Proteome Database (HTPD, http://reprod.njmu.edu.cn/htpd/) was built. Many of the identified human testicular proteins are associated with human infertility, especially human testicular predominantly expressed proteins. We characterized six novel cancer/testis genes (TMPRSS12, TPPP2, PRSS55, DMRT1, PIWIL1, HEMGN), which map to cancer-associated genetic variants positions, in both the cancer and testis tissues using genome-wide analyses. Our results provide a molecular connection between spermatogenesis and tumorigenesis and broaden the range of cancer antigen choice available for immunotherapy.",HTPD,0.979909778,Human Testis Proteome Database,0.981758612,Human Testis Proteome Database,0.981758612,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/6/2013 +32209698,http://www.metabolicatlas.org,"An atlas of human metabolism. . Genome-scale metabolic models (GEMs) are valuable tools to study metabolism and provide a scaffold for the integrative analysis of omics data. Researchers have developed increasingly comprehensive human GEMs, but the disconnect among different model sources and versions impedes further progress. We therefore integrated and extensively curated the most recent human metabolic models to construct a consensus GEM, Human1. We demonstrated the versatility of Human1 through the generation and analysis of cell- and tissue-specific models using transcriptomic, proteomic, and kinetic data. We also present an accompanying web portal, Metabolic Atlas (https://www.metabolicatlas.org/), which facilitates further exploration and visualization of Human1 content. Human1 was created using a version-controlled, open-source model development framework to enable community-driven curation and refinement. This framework allows Human1 to be an evolving shared resource for future studies of human health and disease.",Human1,0.937185824,NA,0,Human1,0.937185824,1,26209309,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: INCORRECT NAME,NA,NA,3/24/2020 +33221926,http://webapp.ufz.de/hmgdb,"HumanMetagenomeDB: a public repository of curated and standardized metadata for human metagenomes. Metagenomics became a standard strategy to comprehend the functional potential of microbial communities, including the human microbiome. Currently, the number of metagenomes in public repositories is increasing exponentially. The Sequence Read Archive (SRA) and the MG-RAST are the two main repositories for metagenomic data. These databases allow scientists to reanalyze samples and explore new hypotheses. However, mining samples from them can be a limiting factor, since the metadata available in these repositories is often misannotated, misleading, and decentralized, creating an overly complex environment for sample reanalysis. The main goal of the HumanMetagenomeDB is to simplify the identification and use of public human metagenomes of interest. HumanMetagenomeDB version 1.0 contains metadata of 69 822 metagenomes. We standardized 203 attributes, based on standardized ontologies, describing host characteristics (e.g. sex, age and body mass index), diagnosis information (e.g. cancer, Crohn's disease and Parkinson), location (e.g. country, longitude and latitude), sampling site (e.g. gut, lung and skin) and sequencing attributes (e.g. sequencing platform, average length and sequence quality). Further, HumanMetagenomeDB version 1.0 metagenomes encompass 58 countries, 9 main sample sites (i.e. body parts), 58 diagnoses and multiple ages, ranging from just born to 91 years old. The HumanMetagenomeDB is publicly available at https://webapp.ufz.de/hmgdb/.",HumanMetagenomeDB,0.983011484,NA,0,HumanMetagenomeDB,0.983011484,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +30418591,http://www.inetbio.org/humannet,"HumanNet v2: human gene networks for disease research. Human gene networks have proven useful in many aspects of disease research, with numerous network-based strategies developed for generating hypotheses about gene-disease-drug associations. The ability to predict and organize genes most relevant to a specific disease has proven especially important. We previously developed a human functional gene network, HumanNet, by integrating diverse types of omics data using Bayesian statistics framework and demonstrated its ability to retrieve disease genes. Here, we present HumanNet v2 (http://www.inetbio.org/humannet), a database of human gene networks, which was updated by incorporating new data types, extending data sources and improving network inference algorithms. HumanNet now comprises a hierarchy of human gene networks, allowing for more flexible incorporation of network information into studies. HumanNet performs well in ranking disease-linked gene sets with minimal literature-dependent biases. We observe that incorporating model organisms' protein-protein interactions does not markedly improve disease gene predictions, suggesting that many of the disease gene associations are now captured directly in human-derived datasets. With an improved interactive user interface for disease network analysis, we expect HumanNet will be a useful resource for network medicine.",HumanNet,0.986103296,NA,0,HumanNet,0.986103296,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30999860,http://webs.iiitd.edu.in/raghava/humcfs,"HumCFS: a database of fragile sites in human chromosomes. Background Fragile sites are the chromosomal regions that are susceptible to breakage, and their frequency varies among the human population. Based on the frequency of fragile site induction, they are categorized as common and rare fragile sites. Common fragile sites are sensitive to replication stress and often rearranged in cancer. Rare fragile sites are the archetypal trinucleotide repeats. Fragile sites are known to be involved in chromosomal rearrangements in tumors. Human miRNA genes are also present at fragile sites. A better understanding of genes and miRNAs lying in the fragile site regions and their association with disease progression is required. Result HumCFS is a manually curated database of human chromosomal fragile sites. HumCFS provides useful information on fragile sites such as coordinates on the chromosome, cytoband, their chemical inducers and frequency of fragile site (rare or common), genes and miRNAs lying in fragile sites. Protein coding genes in the fragile sites were identified by mapping the coordinates of fragile sites with human genome Ensembl (GRCh38/hg38). Genes present in fragile sites were further mapped to DisGenNET database, to understand their possible link with human diseases. Human miRNAs from miRBase was also mapped on fragile site coordinates. In brief, HumCFS provides useful information about 125 human chromosomal fragile sites and their association with 4921 human protein-coding genes and 917 human miRNA's. Conclusion User-friendly web-interface of HumCFS and hyper-linking with other resources will help researchers to search for genes, miRNAs efficiently and to intersect the relationship among them. For easy data retrieval and analysis, we have integrated standard web-based tools, such as JBrowse, BLAST etc. Also, the user can download the data in various file formats such as text files, gff3 files and Bed-format files which can be used on UCSC browser. Database URL: http://webs.iiitd.edu.in/raghava/humcfs/.",HumCFS,0.988169968,NA,0,HumCFS,0.988169968,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/18/2019 +34330336,http://arken.nmbu.no,"HumGut: a comprehensive human gut prokaryotic genomes collection filtered by metagenome data. Background A major bottleneck in the use of metagenome sequencing for human gut microbiome studies has been the lack of a comprehensive genome collection to be used as a reference database. Several recent efforts have been made to re-construct genomes from human gut metagenome data, resulting in a huge increase in the number of relevant genomes. In this work, we aimed to create a collection of the most prevalent healthy human gut prokaryotic genomes, to be used as a reference database, including both MAGs from the human gut and ordinary RefSeq genomes. Results We screened > 5,700 healthy human gut metagenomes for the containment of > 490,000 publicly available prokaryotic genomes sourced from RefSeq and the recently announced UHGG collection. This resulted in a pool of > 381,000 genomes that were subsequently scored and ranked based on their prevalence in the healthy human metagenomes. The genomes were then clustered at a 97.5% sequence identity resolution, and cluster representatives (30,691 in total) were retained to comprise the HumGut collection. Using the Kraken2 software for classification, we find superior performance in the assignment of metagenomic reads, classifying on average 94.5% of the reads in a metagenome, as opposed to 86% with UHGG and 44% when using standard Kraken2 database. A coarser HumGut collection, consisting of genomes dereplicated at 95% sequence identity-similar to UHGG, classified 88.25% of the reads. HumGut, half the size of standard Kraken2 database and directly comparable to the UHGG size, outperforms them both. Conclusions The HumGut collection contains > 30,000 genomes clustered at a 97.5% sequence identity resolution and ranked by human gut prevalence. We demonstrate how metagenomes from IBD-patients map equally well to this collection, indicating this reference is relevant also for studies well outside the metagenome reference set used to obtain HumGut. All data and metadata, as well as helpful code, are available at http://arken.nmbu.no/~larssn/humgut/ . Video Abstract.",HumGut,0.968026817,NA,0,HumGut,0.968026817,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/31/2021 +22804825,http://hupho.uniroma2.it,"HuPho: the human phosphatase portal. Phosphatases and kinases contribute to the regulation of protein phosphorylation homeostasis in the cell. Phosphorylation is a key post-translational modification underlying the regulation of many cellular processes. Thus, a comprehensive picture of phosphatase function and the identification of their target substrates would aid a systematic approach to a mechanistic description of cell signalling. Here we present a website designed to facilitate the retrieval of information about human protein phosphatases. To this end we developed a search engine to recover and integrate information annotated in several publicly available web resources. In addition we present a text-mining-assisted annotation effort aimed at extracting phosphatase related data reported in the scientific literature. The HuPho (human phosphatases) website can be accessed at http://hupho.uniroma2.it.",HuPho,0.995498061,human phosphatase portal,0.886762607,HuPho,0.995498061,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/24/2012 +30703169,http://www.iitm.ac.in/bioinfo/huvarbase,"HuVarBase: A human variant database with comprehensive information at gene and protein levels. Human variant databases could be better exploited if the variant data available in multiple resources is integrated in a single comprehensive resource along with sequence and structural features. Such integration would improve the analyses of variants for disease prediction, prevention or treatment. The HuVarBase (HUmanVARiantdataBASE) assimilates publicly available human variant data at protein level and gene level into a comprehensive resource. Protein level data such as amino acid sequence, secondary structure of the mutant residue, domain, function, subcellular location and post-translational modification are integrated with gene level data such as gene name, chromosome number & genome position, DNA mutation, mutation type origin and rs ID number. Disease class has been added for the disease causing variants. The database is publicly available at https://www.iitm.ac.in/bioinfo/huvarbase. A total of 774,863 variant records, integrated in the HuVarBase, can be searched with options to display, visualize and download the results.",HuVarBase,0.980674505,NA,0,HuVarBase,0.980674505,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/31/2019 +34461244,http://hvdb.dqweilab-sjtu.com/index.php,"HantavirusesDB: Vaccinomics and RNA-based therapeutics database for the potentially emerging human respiratory pandemic agents. Hantaviruses are etiological agents of several severe respiratory illnesses in humans and their human-to-human transmission has been reported. To cope with any potential pandemic, this group of viruses needs further research and a data platform. Therefore, herein we developed a database ""HantavirusesDB (HVdb)"", where genomics, proteomics, immune resource, RNAi based therapeutics and information on the 3D structures of druggable targets of the Orthohantaviruses are provided on a single platform. The database allows the researchers to effectively map the therapeutic strategies by designing multi-epitopes subunit vaccine and RNA based therapeutics. Moreover, the ease of the web interface allow the users to retrieve specific information from the database. Because of the high quality and excellent functionality of the HVdb, therapeutic research of Hantaviruses can be accelerated, and data analysis might be a foundation to design better treatment strategies targeting the hantaviruses. The database is accessible at http://hvdb.dqweilab-sjtu.com/index.php.",HVdb,0.98835696,NA,0,HVdb,0.98835696,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/28/2021 +33515030,http://zzdlab.com/hvidb,"HVIDB: a comprehensive database for human-virus protein-protein interactions. While leading to millions of people's deaths every year the treatment of viral infectious diseases remains a huge public health challenge.Therefore, an in-depth understanding of human-virus protein-protein interactions (PPIs) as the molecular interface between a virus and its host cell is of paramount importance to obtain new insights into the pathogenesis of viral infections and development of antiviral therapeutic treatments. However, current human-virus PPI database resources are incomplete, lack annotation and usually do not provide the opportunity to computationally predict human-virus PPIs. Here, we present the Human-Virus Interaction DataBase (HVIDB, http://zzdlab.com/hvidb/) that provides comprehensively annotated human-virus PPI data as well as seamlessly integrates online PPI prediction tools. Currently, HVIDB highlights 48 643 experimentally verified human-virus PPIs covering 35 virus families, 6633 virally targeted host complexes, 3572 host dependency/restriction factors as well as 911 experimentally verified/predicted 3D complex structures of human-virus PPIs. Furthermore, our database resource provides tissue-specific expression profiles of 6790 human genes that are targeted by viruses and 129 Gene Expression Omnibus series of differentially expressed genes post-viral infections. Based on these multifaceted and annotated data, our database allows the users to easily obtain reliable information about PPIs of various human viruses and conduct an in-depth analysis of their inherent biological significance. In particular, HVIDB also integrates well-performing machine learning models to predict interactions between the human host and viral proteins that are based on (i) sequence embedding techniques, (ii) interolog mapping and (iii) domain-domain interaction inference. We anticipate that HVIDB will serve as a one-stop knowledge base to further guide hypothesis-driven experimental efforts to investigate human-virus relationships.",HVIDB,0.997510344,Human-Virus Interaction DataBase,0.954682939,HVIDB,0.997510344,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +21901790,http://www.genomed.org/LOVD,"Novel LOVD databases for hereditary breast cancer and colorectal cancer genes in the Chinese population. The Human Variome Project (HVP) is an international consortium of clinicians, geneticists, and researchers from over 30 countries, aiming to facilitate the establishment and maintenance of standards, systems, and infrastructure for the worldwide collection and sharing of all genetic variations effecting human disease. The HVP-China Node will build new and supplement existing databases of genetic diseases. As the first effort, we have created a novel variant database of BRCA1 and BRCA2, mismatch repair genes (MMR), and APC genes for breast cancer, Lynch syndrome, and familial adenomatous polyposis (FAP), respectively, in the Chinese population using the Leiden Open Variation Database (LOVD) format. We searched PubMed and some Chinese search engines to collect all the variants of these genes in the Chinese population that have already been detected and reported. There are some differences in the gene variants between the Chinese population and that of other ethnicities. The database is available online at http://www.genomed.org/LOVD/. Our database will appear to users who survey other LOVD databases (e.g., by Google search, or by NCBI GeneTests search). Remote submissions are accepted, and the information is updated monthly.",HVP,0.96673274,Variome,0.536253095,HVP,0.96673274,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME; no name in abstract,NA,NA,10/20/2011 +31524396,http://www.idruglab.com/HybridMolDB/index.php,"HybridMolDB: A Manually Curated Database Dedicated to Hybrid Molecules for Chemical Biology and Drug Discovery. Hybrid-molecule-based drug design is the combination of two or more bioactive molecules into a single chemical entity. This strategy may be used to achieve better affinity and efficacy or improved properties compared with the parent molecules, to interact with two or multiple targets, to reduce undesirable side effects, to decrease drug-drug interactions, or to reduce the emergence of drug resistance. The approach offers the prospect of better drugs for the treatment of many human diseases. Research activity in this area is increasing and has attracted many practitioners worldwide. To accelerate the design and discovery of new hybrid-molecule-based drugs, it is essential to properly collect and annotate experimental data obtained from known hybrid molecules. To address this need, we have developed HybridMolDB ( http://www.idruglab.com/HybridMolDB/index.php ), a manually curated database dedicated to hybrid molecules for chemical biology and drug discovery. It contains structures, manually annotated design protocols, pharmacological data, some physicochemical properties, ligand efficiency, drug-likeness, and ADMET characteristics, and the biological targets of known hybrid molecules. HybridMolDB supports a range of query types, including searches by text, protein sequence, chemical structure similarity, and property ranges. The database serves as an open source facilitating the development and/or optimization of related in silico tools for the design and discovery of hybrid-molecule-based drugs and chemical probes.",HybridMolDB,0.995581388,NA,0,HybridMolDB,0.995581388,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/25/2019 +21584190,http://www.trimslabs.com/database/hypodb/index.html,"A database of six eukaryotic hypothetical genes and proteins. Unlabelled Assigning functions to proteins of unknown function is of considerable interest to the proteomic researchers as the genes encoding them are conserved over various species. Here, we describe HypoDB, a database of hypothetical genes and proteins in six eukaryotes. The database was collected and organized based on the number of entries in each chromosome with few annotations. Hypothetical protein database contains information related to gene and protein sequences, chromosome number and location, secondary and tertiary structure related data. Availability The database is available for free at http://www.trimslabs.com/database/hypodb/index.html.",HypoDB,0.997157693,NA,0,HypoDB,0.997157693,1,NA,30152276,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,4/22/2011 +30152276,http://www.bioclues.org/hypo2,"HYPO: A Database of Human Hypothetical Proteins. Background There are genes whose function remains obscure as they may not have similarities to known regions in the genome. Such known 'unknown' genes constituting the Open Reading Frames (ORF) that remain in the epigenome are termed as orphan genes and the proteins encoded by them but having no experimental evidence of translation are termed as 'Hypothetical Proteins' (HPs). Objectives We have enhanced our former database of Hypothetical Proteins (HP) in human (HypoDB) with added annotation, application programming interfaces and descriptive features. The database hosts 1000+ manually curated records of the known 'unknown' regions in the human genome. The new updated version of HypoDB with functionalities (Blast, Match) is freely accessible at http://www.bioclues.org/hypo2. Methods The total collection of HPs were checked using experimentally validated sets (from Swiss-Prot) or non-experimentally validated set (TrEMBL) or the complete set (UniProtKB). The database was designed with java at the core backend, integrated with databases, viz. EMBL, PIR, HPRD and those including descriptors for structural databases, interaction and association databases. Results The HypoDB constituted Application Programming Interfaces (API) for implicitly searching resources linking them to other databases like NCBI Link-out in addition to multiple search capabilities along with advanced searches using integrated bio-tools, viz. Match and BLAST were incorporated. Conclusion The HypoDB is perhaps the only open-source HP database with a range of tools for common bioinformatics retrievals and serves as a standby reference to researchers who are interested in finding candidate sequences for their potential experimental work.",HypoDB,0.993183434,NA,0,HypoDB,0.993183434,1,NA,21584190,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +24178989,http://www.hypoxiadb.com,"HypoxiaDB: a database of hypoxia-regulated proteins. There has been intense interest in the cellular response to hypoxia, and a large number of differentially expressed proteins have been identified through various high-throughput experiments. These valuable data are scattered, and there have been no systematic attempts to document the various proteins regulated by hypoxia. Compilation, curation and annotation of these data are important in deciphering their role in hypoxia and hypoxia-related disorders. Therefore, we have compiled HypoxiaDB, a database of hypoxia-regulated proteins. It is a comprehensive, manually-curated, non-redundant catalog of proteins whose expressions are shown experimentally to be altered at different levels and durations of hypoxia. The database currently contains 72 000 manually curated entries taken on 3500 proteins extracted from 73 peer-reviewed publications selected from PubMed. HypoxiaDB is distinctive from other generalized databases: (i) it compiles tissue-specific protein expression changes under different levels and duration of hypoxia. Also, it provides manually curated literature references to support the inclusion of the protein in the database and establish its association with hypoxia. (ii) For each protein, HypoxiaDB integrates data on gene ontology, KEGG (Kyoto Encyclopedia of Genes and Genomes) pathway, protein-protein interactions, protein family (Pfam), OMIM (Online Mendelian Inheritance in Man), PDB (Protein Data Bank) structures and homology to other sequenced genomes. (iii) It also provides pre-compiled information on hypoxia-proteins, which otherwise requires tedious computational analysis. This includes information like chromosomal location, identifiers like Entrez, HGNC, Unigene, Uniprot, Ensembl, Vega, GI numbers and Genbank accession numbers associated with the protein. These are further cross-linked to respective public databases augmenting HypoxiaDB to the external repositories. (iv) In addition, HypoxiaDB provides an online sequence-similarity search tool for users to compare their protein sequences with HypoxiaDB protein database. We hope that HypoxiaDB will enrich our knowledge about hypoxia-related biology and eventually will lead to the development of novel hypothesis and advancements in diagnostic and therapeutic activities. HypoxiaDB is freely accessible for academic and non-profit users via http://www.hypoxiadb.com.",HypoxiaDB,0.996842146,NA,0,HypoxiaDB,0.996842146,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2013 +25332403,http://i5k.nal.usda.gov,"The i5k Workspace@NAL--enabling genomic data access, visualization and curation of arthropod genomes. The 5000 arthropod genomes initiative (i5k) has tasked itself with coordinating the sequencing of 5000 insect or related arthropod genomes. The resulting influx of data, mostly from small research groups or communities with little bioinformatics experience, will require visualization, dissemination and curation, preferably from a centralized platform. The National Agricultural Library (NAL) has implemented the i5k Workspace@NAL (http://i5k.nal.usda.gov/) to help meet the i5k initiative's genome hosting needs. Any i5k member is encouraged to contact the i5k Workspace with their genome project details. Once submitted, new content will be accessible via organism pages, genome browsers and BLAST search engines, which are implemented via the open-source Tripal framework, a web interface for the underlying Chado database schema. We also implement the Web Apollo software for groups that choose to curate gene models. New content will add to the existing body of 35 arthropod species, which include species relevant for many aspects of arthropod genomic research, including agriculture, invasion biology, systematics, ecology and evolution, and developmental research.",i5k,0.991027579,5000 arthropod genomes,0.851789331,i5k,0.991027579,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/20/2014 +27328919,http://p53.iarc.fr,"TP53 Variations in Human Cancers: New Lessons from the IARC TP53 Database and Genomics Data. TP53 gene mutations are one of the most frequent somatic events in cancer. The IARC TP53 Database (http://p53.iarc.fr) is a popular resource that compiles occurrence and phenotype data on TP53 germline and somatic variations linked to human cancer. The deluge of data coming from cancer genomic studies generates new data on TP53 variations and attracts a growing number of database users for the interpretation of TP53 variants. Here, we present the current contents and functionalities of the IARC TP53 Database and perform a systematic analysis of TP53 somatic mutation data extracted from this database and from genomic data repositories. This analysis showed that IARC has more TP53 somatic mutation data than genomic repositories (29,000 vs. 4,000). However, the more complete screening achieved by genomic studies highlighted some overlooked facts about TP53 mutations, such as the presence of a significant number of mutations occurring outside the DNA-binding domain in specific cancer types. We also provide an update on TP53 inherited variants including the ones that should be considered as neutral frequent variations. We thus provide an update of current knowledge on TP53 variations in human cancer as well as inform users on the efficient use of the IARC TP53 Database.",IARC,0.700865626,NA,0,IARC,0.700865626,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,7/8/2016 +33929018,http://www.cbrc.kaust.edu.sa/ibd,"IBDDB: a manually curated and text-mining-enhanced database of genes involved in inflammatory bowel disease. . To date, research on inflammatory bowel disease (IBD, encompassing Crohn's disease and ulcerative colitis), a chronic complex disorder, has generated a large amount of data scattered across published literature (1 06 333) listed in PubMed on 14 October 2020, and no dedicated database currently exists that catalogues information on genes associated with IBD. We aimed to manually curate 289 genes that are experimentally validated to be linked with IBD and its known phenotypes. Furthermore, we have developed an integrated platform providing information about different aspects of these genes by incorporating several resources and an extensive text-mined knowledgebase. The curated IBD database (IBDDB) allows the selective display of collated 34 subject-specific concepts (listed as columns) exportable through a user-friendly IBDDB portal. The information embedded in concepts was acquired via text-mining of PubMed (manually cleaned and curated), accompanied by data-mining from varied resources. The user can also explore different biomedical entities and their co-occurrence with other entities (about one million) from 11 curated dictionaries in the indexed PubMed records. This functionality permits the user to generate and cross-examine a new hypothesis that is otherwise not easy to comprehend by just reading the published abstracts and papers. Users can download required information using various file formats and can display information in the form of networks. To our knowledge, no curated database of IBD-related genes is available so far. IBDDB is free for academic users and can be accessed at https://www.cbrc.kaust.edu.sa/ibd/.",IBDDB,0.994403943,IBD database,0.644221008,IBDDB,0.994403943,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2021 +23095257,"http://www.itb.cnr.it/ibd, http://www.itb.cnr.it/galaxy","IBDsite: a Galaxy-interacting, integrative database for supporting inflammatory bowel disease high throughput data analysis. Background Inflammatory bowel diseases (IBD) refer to a group of inflammatory conditions concerning colon and small intestine, which cause socially uncomfortable symptoms and often are associated with an increased risk of colon cancer. IBD are complex disorders, which rely on genetic susceptibility, environmental factors, deregulation of the immune system, and host relationship with commensal microbiota. The complexity of these pathologies makes difficult to clearly understand the mechanisms of their onset. Therefore, the study of IBD must be faced exploiting an integrated and multilevel approach, ranging from genes, transcripts and proteins to pathways altered in affected tissues, and carefully considering their regulatory mechanisms, which may intervene in the pathology onset. It is also crucial to have a knowledge base about the symbiotic bacteria that are hosted in the human gut. To date, much data exist regarding IBD and human commensal bacteria, but this information is sparse in literature and no free resource provides a homogeneously and rationally integrated view of biomolecular data related to these pathologies. Methods Human genes altered in IBD have been collected from literature, paying particular interest for the immune system alterations prompted by the interaction with the gut microbiome. This process has been performed manually to assure the reliability of collected data. Heterogeneous metadata from different sources have been automatically formatted and integrated in order to enrich information about these altered genes. A user-friendly web interface has been created for easy access to structured data. Tools such as gene clustering coefficients, all-pairs shortest paths and pathway lengths calculation have been developed to provide data analysis support. Moreover, the implemented resource is compliant to the Galaxy framework, allowing the collected data to be exploited in the context of high throughput bioinformatics analysis. Results To fill the lack of a reference resource for 'omics' science analysis in the context of IBD, we developed the IBDsite (available at http://www.itb.cnr.it/ibd), a disease-oriented platform, which collects data related to biomolecular mechanisms involved in the IBD onset. The resource provides a section devoted to human genes identified as altered in IBD, which can be queried at different biomolecular levels and visualised in gene-centred report pages. Furthermore, the system presents information related to the gut microbiota involved in IBD affected patients. The IBDsite is compliant with all Galaxy installations (in particular, it can be accessed from our custom version of Galaxy, http://www.itb.cnr.it/galaxy), in order to facilitate high-throughput data integration and to enable evaluations of the genomic basis of these diseases, complementing the tools embedded in the IBDsite. Conclusions Lots of sparse data exist concerning IBD studies, but no on-line resource homogeneously and rationally integrate and collect them. The IBDsite is an attempt to group available information regarding human genes and microbial aspects related to IBD, by means of a multilevel mining tool. Moreover, it constitutes a knowledge base to filter, annotate and understand new experimental data in order to formulate new scientific hypotheses, thanks to the possibility of integrating genomics aspects by employing the Galaxy framework. Discussed use-cases demonstrate that the developed system is useful to infer not trivial knowledge from the existing widespread data or from novel experiments.",IBDsite,0.995800972,NA,0,IBDsite,0.995800972,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/7/2012 +"25378303, 29069517",http://ibeetle-base.uni-goettingen.de,"iBeetle-Base: a database for RNAi phenotypes in the red flour beetle Tribolium castaneum. The iBeetle-Base (http://ibeetle-base.uni-goettingen.de) makes available annotations of RNAi phenotypes, which were gathered in a large scale RNAi screen in the red flour beetle Tribolium castaneum (iBeetle screen). In addition, it provides access to sequence information and links for all Tribolium castaneum genes. The iBeetle-Base contains the annotations of phenotypes of several thousands of genes knocked down during embryonic and metamorphic epidermis and muscle development in addition to phenotypes linked to oogenesis and stink gland biology. The phenotypes are described according to the EQM (entity, quality, modifier) system using controlled vocabularies and the Tribolium morphological ontology (TrOn). Furthermore, images linked to the respective annotations are provided. The data are searchable either for specific phenotypes using a complex 'search for morphological defects' or a 'quick search' for gene names and IDs. The red flour beetle Tribolium castaneum has become an important model system for insect functional genetics and is a representative of the most species rich taxon, the Coleoptera, which comprise several devastating pests. It is used for studying insect typical development, the evolution of development and for research on metabolism and pest control. Besides Drosophila, Tribolium is the first insect model organism where large scale unbiased screens have been performed.",iBeetle-Base,0.997434308,NA,0,iBeetle-Base,0.997434308,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24616562,http://www.nabg-nbaii.res.in/barcode,"Insect barcode information system. Unlabelled Insect Barcode Information System called as Insect Barcode Informática (IBIn) is an online database resource developed by the National Bureau of Agriculturally Important Insects, Bangalore. This database provides acquisition, storage, analysis and publication of DNA barcode records of agriculturally important insects, for researchers specifically in India and other countries. It bridges a gap in bioinformatics by integrating molecular, morphological and distribution details of agriculturally important insects. IBIn was developed using PHP/My SQL by using relational database management concept. This database is based on the client- server architecture, where many clients can access data simultaneously. IBIn is freely available on-line and is user-friendly. IBIn allows the registered users to input new information, search and view information related to DNA barcode of agriculturally important insects.This paper provides a current status of insect barcode in India and brief introduction about the database IBIn. Availability http://www.nabg-nbaii.res.in/barcode.",IBIn,0.980620027,Insect Barcode,0.783612788,IBIn,0.980620027,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/19/2014 +22102591,http://www.ncbi.nlm.nih.gov/Structure/ibis/ibis.cgi,"IBIS (Inferred Biomolecular Interaction Server) reports, predicts and integrates multiple types of conserved interactions for proteins. We have recently developed the Inferred Biomolecular Interaction Server (IBIS) and database, which reports, predicts and integrates different types of interaction partners and locations of binding sites in proteins based on the analysis of homologous structural complexes. Here, we highlight several new IBIS features and options. The server's webpage is now redesigned to allow users easier access to data for different interaction types. An entry page is added to give a quick summary of available results and to now accept protein sequence accessions. To elucidate the formation of protein complexes, not just binary interactions, IBIS currently presents an expandable interaction network. Previously, IBIS provided annotations for four different types of binding partners: proteins, small molecules, nucleic acids and peptides; in the current version a new protein-ion interaction type has been added. Several options provide easy downloads of IBIS data for all Protein Data Bank (PDB) protein chains and the results for each query. In this study, we show that about one-third of all RefSeq sequences can be annotated with IBIS interaction partners and binding sites. The IBIS server is available at http://www.ncbi.nlm.nih.gov/Structure/ibis/ibis.cgi and updated biweekly.",IBIS,0.99052155,Inferred Biomolecular Interaction Server,0.952923278,IBIS,0.99052155,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2011 +26519466,http://ic4r.org,"Information Commons for Rice (IC4R). Rice is the most important staple food for a large part of the world's human population and also a key model organism for plant research. Here, we present Information Commons for Rice (IC4R; http://ic4r.org), a rice knowledgebase featuring adoption of an extensible and sustainable architecture that integrates multiple omics data through community-contributed modules. Each module is developed and maintained by different committed groups, deals with data collection, processing and visualization, and delivers data on-demand via web services. In the current version, IC4R incorporates a variety of rice data through multiple committed modules, including genome-wide expression profiles derived entirely from RNA-Seq data, resequencing-based genomic variations obtained from re-sequencing data of thousands of rice varieties, plant homologous genes covering multiple diverse plant species, post-translational modifications, rice-related literatures and gene annotations contributed by the rice research community. Unlike extant related databases, IC4R is designed for scalability and sustainability and thus also features collaborative integration of rice data and low costs for database update and maintenance. Future directions of IC4R include incorporation of other omics data and association of multiple omics data with agronomically important traits, dedicating to build IC4R into a valuable knowledgebase for both basic and translational researches in rice.",IC4R,0.995473579,Information Commons for Rice,0.858307824,IC4R,0.995473579,1,NA,32619768,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,10/30/2015 +32619768,http://sr4r.ic4r.org,"SR4R: An Integrative SNP Resource for Genomic Breeding and Population Research in Rice. The information commons for rice (IC4R) database is a collection of 18 million single nucleotide polymorphisms (SNPs) identified by resequencing of 5152 rice accessions. Although IC4R offers ultra-high density rice variation map, these raw SNPs are not readily usable for the public. To satisfy different research utilizations of SNPs for population genetics, evolutionary analysis, association studies, and genomic breeding in rice, raw genotypic data of these 18 million SNPs were processed by unified bioinformatics pipelines. The outcomes were used to develop a daughter database of IC4R - SnpReady for Rice (SR4R). SR4R presents four reference SNP panels, including 2,097,405 hapmapSNPs after data filtration and genotype imputation, 156,502 tagSNPs selected from linkage disequilibrium-based redundancy removal, 1180 fixedSNPs selected from genes exhibiting selective sweep signatures, and 38 barcodeSNPs selected from DNA fingerprinting simulation. SR4R thus offers a highly efficient rice variation map that combines reduced SNP redundancy with extensive data describing the genetic diversity of rice populations. In addition, SR4R provides rice researchers with a web interface that enables them to browse all four SNP panels, use online toolkits, as well as retrieve the original data and scripts for a variety of population genetics analyses on local computers. SR4R is freely available to academic users at http://sr4r.ic4r.org/.",IC4R,0.975306451,commons for rice,0.746143917,IC4R,0.975306451,1,NA,26519466,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,4/1/2020 +29041922,http://ican.ils.seu.edu.cn,"Institute collection and analysis of Nanobodies (iCAN): a comprehensive database and analysis platform for nanobodies. Background Nanobodies are single-domain antibodies that contain the unique structural and functional properties of naturally-occurring heavy chain in camelidae. As a novel class of antibody, they show many advantages compared with traditional antibodies such as smaller size, higher stability, improved specificity, more easily expressed in microorganisms. These unusual hallmarks make them as promising tools in basic research and clinical practice. Although thousands of nanobodies are known to be published, no single database provides searchable, unified annotation and integrative analysis tools for these various nanobodies. Results Here, we present the database of Institute Collection and Analysis of Nanobodies (iCAN). It is built for the aim that addressing the above gap to expand and accelerate the nanobody research. iCAN, as the first database of nanobody, contains the most comprehensive information to date on nanobodies and related antigens. So far, iCAN incorporates 2391 entries which include 2131 from patents and 260 from publications and provides a simple user interface for researchers to retrieve and view the detailed information of nanobodies. In addition to the data collection, iCAN also provides online bioinformatic tools for sequence analysis and characteristic feature extraction. Conclusions In summary, iCAN enables researchers to analyze nanobody features and explore the applications of nanobodies more efficiently. iCAN is freely available at http://ican.ils.seu.edu.cn .",iCAN,0.989729762,Institute Collection and Analysis of Nanobodies,0.950395688,iCAN,0.989729762,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/17/2017 +34907423,http://icav.omicsbio.info,"iCAV: an integrative database of cancer-associated viruses. . To date, various studies have found that the occurrence of cancer may be related to viral infections. Therefore, it is important to explore the relationship between viruses and diseases. The International Agency for Research on Cancer has defined six types of viruses as Class 1 human carcinogens, including Epstein-Barr virus, hepatitis C virus, hepatitis B virus, human T-cell lymphotropic virus, human herpesvirus 8 and human papillomavirus, while Merkel cell polyomavirus is classified as 'probably carcinogenic to humans' (Group 2A). Therefore, in-depth research on these viruses will help clarify their relationship with diseases, and substantial efforts have been made to sequence their genomes. However, there is no complete database documenting these cancer-associated viruses, and researchers are not able to easily access and retrieve the published genomes. In this study, we developed iCAV, a database that integrates the genomes of cancer-related viruses and the corresponding phenotypes. We collected a total of 18 649 genome sequences from seven human disease-related viruses, and each virus was further classified by the associated disease, sample and country. iCAV is a comprehensive resource of cancer-associated viruses that provides browse and download functions for viral genomes. Database URL: http://icav.omicsbio.info/.",iCAV,0.99172157,NA,0,iCAV,0.99172157,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2021 +28557712,"http://ice.ntp.niehs.nih.gov, http://doi.org/10.1289/EHP1759","An Integrated Chemical Environment to Support 21st-Century Toxicology. SUMMARY: Access to high-quality reference data is essential for the development, validation, and implementation of in vitro and in silico approaches that reduce and replace the use of animals in toxicity testing. Currently, these data must often be pooled from a variety of disparate sources to efficiently link a set of assay responses and model predictions to an outcome or hazard classification. To provide a central access point for these purposes, the National Toxicology Program Interagency Center for the Evaluation of Alternative Toxicological Methods developed the Integrated Chemical Environment (ICE) web resource. The ICE data integrator allows users to retrieve and combine data sets and to develop hypotheses through data exploration. Open-source computational workflows and models will be available for download and application to local data. ICE currently includes curated in vivo test data, reference chemical information, in vitro assay data (including Tox21TM/ToxCast™ high-throughput screening data), and in silico model predictions. Users can query these data collections focusing on end points of interest such as acute systemic toxicity, endocrine disruption, skin sensitization, and many others. ICE is publicly accessible at https://ice.ntp.niehs.nih.gov. https://doi.org/10.1289/EHP1759.",ICE,0.860381097,Chemical Environment,0.508983597,ICE,0.860381097,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/25/2017 +"22009673, 30407568",http://db-mml.sjtu.edu.cn/ICEberg,"ICEberg: a web-based resource for integrative and conjugative elements found in Bacteria. ICEberg (http://db-mml.sjtu.edu.cn/ICEberg/) is an integrated database that provides comprehensive information about integrative and conjugative elements (ICEs) found in bacteria. ICEs are conjugative self-transmissible elements that can integrate into and excise from a host chromosome. An ICE contains three typical modules, integration and excision, conjugation, and regulation modules, that collectively promote vertical inheritance and periodic lateral gene flow. Many ICEs carry likely virulence determinants, antibiotic-resistant factors and/or genes coding for other beneficial traits. ICEberg offers a unique, highly organized, readily explorable archive of both predicted and experimentally supported ICE-relevant data. It currently contains details of 428 ICEs found in representatives of 124 bacterial species, and a collection of >400 directly related references. A broad range of similarity search, sequence alignment, genome context browser, phylogenetic and other functional analysis tools are readily accessible via ICEberg. We propose that ICEberg will facilitate efficient, multi-disciplinary and innovative exploration of bacterial ICEs and be of particular interest to researchers in the broad fields of prokaryotic evolution, pathogenesis, biotechnology and metabolism. The ICEberg database will be maintained, updated and improved regularly to ensure its ongoing maximum utility to the research community.",ICEberg,0.640117407,NA,0,ICEberg,0.640117407,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +26430546,http://www.ciml.univ-mrs.fr/EWBANK_jonathan/software.html,"ICeE an interface for C. elegans experiments. An increasing number of laboratories are using the COPAS Biosort™ to implement high-throughput approaches to tackle diverse biological problems. While providing a powerful tool for generating quantitative data, the utility of the Biosort is currently limited by the absence of resources for data management. We describe a simple electronic database designed to allow easy storage and retrieval of Biosort data for C. elegans, but that has a wide potential application for organizing electronic files and data sets. ICeE is an Open Source application. The code and accompanying documentation are freely available via the web at http://www.ciml.univ-mrs.fr/EWBANK_jonathan/software.html.",ICeE,0.880001485,NA,0,ICeE,0.880001485,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2014 +29036693,http://icg.big.ac.cn,"ICG: a wiki-driven knowledgebase of internal control genes for RT-qPCR normalization. Real-time quantitative PCR (RT-qPCR) has become a widely used method for accurate expression profiling of targeted mRNA and ncRNA. Selection of appropriate internal control genes for RT-qPCR normalization is an elementary prerequisite for reliable expression measurement. Here, we present ICG (http://icg.big.ac.cn), a wiki-driven knowledgebase for community curation of experimentally validated internal control genes as well as their associated experimental conditions. Unlike extant related databases that focus on qPCR primers in model organisms (mainly human and mouse), ICG features harnessing collective intelligence in community integration of internal control genes for a variety of species. Specifically, it integrates a comprehensive collection of more than 750 internal control genes for 73 animals, 115 plants, 12 fungi and 9 bacteria, and incorporates detailed information on recommended application scenarios corresponding to specific experimental conditions, which, collectively, are of great help for researchers to adopt appropriate internal control genes for their own experiments. Taken together, ICG serves as a publicly editable and open-content encyclopaedia of internal control genes and accordingly bears broad utility for reliable RT-qPCR normalization and gene expression characterization in both model and non-model organisms.",ICG,0.994124234,NA,0,ICG,0.994124234,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31600197,http://icite.od.nih.gov,"The NIH Open Citation Collection: A public access, broad coverage resource. Citation data have remained hidden behind proprietary, restrictive licensing agreements, which raises barriers to entry for analysts wishing to use the data, increases the expense of performing large-scale analyses, and reduces the robustness and reproducibility of the conclusions. For the past several years, the National Institutes of Health (NIH) Office of Portfolio Analysis (OPA) has been aggregating and enhancing citation data that can be shared publicly. Here, we describe the NIH Open Citation Collection (NIH-OCC), a public access database for biomedical research that is made freely available to the community. This dataset, which has been carefully generated from unrestricted data sources such as MedLine, PubMed Central (PMC), and CrossRef, now underlies the citation statistics delivered in the NIH iCite analytic platform. We have also included data from a machine learning pipeline that identifies, extracts, resolves, and disambiguates references from full-text articles available on the internet. Open citation links are available to the public in a major update of iCite (https://icite.od.nih.gov).",iCite,0.957005501,Open Citation Collection,0.840594471,iCite,0.957005501,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/10/2019 +32075414,"http://cartilage.org/society/icrs-patient-registry/, http://cartilage.org/society/icrs-patient-registry/registry-annual-reports","The First Report of the International Cartilage Regeneration and Joint Preservation Society's Global Registry. Objective The International Cartilage Regeneration and Joint Preservation Society's (ICRS's) global registry, aims to be the best source of information for patients and an unbiased resource of evidence-based medicine for scientists and clinicians working to help those unfortunate enough to suffer the pain and disability associated with articular cartilage lesions. This article constitutes the scientific summary of the reports' main findings. Design The article outlines the historical precedents in the development of orthopedic registries from the earliest tumor registries, then local arthroplasty databases that led ultimately to international collaborations between national arthroplasty and soft tissue registries. The ICRS global cartilage registry was designed from the outset as a GDPR (General Data Protection Regulation) compliant, multilingual, multinational cooperative system. It is a web-based user-friendly, live in 11 languages by end 2019, which can be accessed via https://cartilage.org/society/icrs-patient-registry/. Patients and clinicians enter data by smartphone, tablet, or computer on any knee cartilage regeneration and joint preservation treatment, including the use of focal arthroplasty. Knee Injury and Osteoarthritis Outcome Score and Kujala patient-reported outcome measures are collected preoperatively, 6 months, 12 months, and annually for ten years thereafter. EQ-5D data collection will allow cost-effectiveness analysis. Strengths, weaknesses, and future plans are discussed. Results Since inception the registry has 264 users across 50 countries. Major findings are presented and discussed, while the entire first ICRS global registry report is available at https://cartilage.org/society/icrs-patient-registry/registry-annual-reports/. Conclusion. A measure of the maturity of any registry is the publication of its findings in the peer reviewed literature. With the publication of its first report, the ICRS global registry has achieved that milestone.",ICRS,0.605600119,International Cartilage Regeneration,0.595255792,ICRS,0.605600119,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,2/19/2020 +33740463,http://icscb.stemcellinformatics.org,"Integrated Collection of Stem Cell Bank Data, a Data Portal for Standardized Stem Cell Information. The past decade has witnessed an extremely rapid increase in the number of newly established stem cell lines. However, due to the lack of a standardized format, data exchange among stem cell line resources has been challenging, and no system can search all stem cell lines across resources worldwide. To solve this problem, we have developed the Integrated Collection of Stem Cell Bank data (ICSCB) (http://icscb.stemcellinformatics.org/), the largest database search portal for stem cell line information, based on the standardized data items and terms of the MIACARM framework. Currently, ICSCB can retrieve >16,000 cell lines from four major data resources in Europe, Japan, and the United States. ICSCB is automatically updated to provide the latest cell line information, and its integrative search helps users collect cell line information for over 1,000 diseases, including many rare diseases worldwide, which has been a formidable task, thereby distinguishing itself from other database search portals.",ICSCB,0.996838212,Integrated Collection of Stem Cell Bank data,0.959731273,ICSCB,0.996838212,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/18/2021 +33137185,http://www.kobic.re.kr/icsdb,"iCSDB: an integrated database of CRISPR screens. High-throughput screening based on CRISPR-Cas9 libraries has become an attractive and powerful technique to identify target genes for functional studies. However, accessibility of public data is limited due to the lack of user-friendly utilities and up-to-date resources covering experiments from third parties. Here, we describe iCSDB, an integrated database of CRISPR screening experiments using human cell lines. We compiled two major sources of CRISPR-Cas9 screening: the DepMap portal and BioGRID ORCS. DepMap portal itself is an integrated database that includes three large-scale projects of CRISPR screening. We additionally aggregated CRISPR screens from BioGRID ORCS that is a collection of screening results from PubMed articles. Currently, iCSDB contains 1375 genome-wide screens across 976 human cell lines, covering 28 tissues and 70 cancer types. Importantly, the batch effects from different CRISPR libraries were removed and the screening scores were converted into a single metric to estimate the knockout efficiency. Clinical and molecular information were also integrated to help users to select cell lines of interest readily. Furthermore, we have implemented various interactive tools and viewers to facilitate users to choose, examine and compare the screen results both at the gene and guide RNA levels. iCSDB is available at https://www.kobic.re.kr/icsdb/.",iCSDB,0.994947433,NA,0,iCSDB,0.994947433,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29040670,"http://ictv.global, http://ictv.global/report","Virus taxonomy: the database of the International Committee on Taxonomy of Viruses (ICTV). The International Committee on Taxonomy of Viruses (ICTV) is charged with the task of developing, refining, and maintaining a universal virus taxonomy. This task encompasses the classification of virus species and higher-level taxa according to the genetic and biological properties of their members; naming virus taxa; maintaining a database detailing the currently approved taxonomy; and providing the database, supporting proposals, and other virus-related information from an open-access, public web site. The ICTV web site (http://ictv.global) provides access to the current taxonomy database in online and downloadable formats, and maintains a complete history of virus taxa back to the first release in 1971. The ICTV has also published the ICTV Report on Virus Taxonomy starting in 1971. This Report provides a comprehensive description of all virus taxa covering virus structure, genome structure, biology and phylogenetics. The ninth ICTV report, published in 2012, is available as an open-access online publication from the ICTV web site. The current, 10th report (http://ictv.global/report/), is being published online, and is replacing the previous hard-copy edition with a completely open access, continuously updated publication. No other database or resource exists that provides such a comprehensive, fully annotated compendium of information on virus taxa and taxonomy.",ICTV,0.662099421,on Taxonomy of,0.613117501,ICTV,0.662099421,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +33406221,http://icysmod.omicsbio.info,"iCysMod: an integrative database for protein cysteine modifications in eukaryotes. . As important post-translational modifications, protein cysteine modifications (PCMs) occurring at cysteine thiol group play critical roles in the regulation of various biological processes in eukaryotes. Due to the rapid advancement of high-throughput proteomics technologies, a large number of PCM events have been identified but remain to be curated. Thus, an integrated resource of eukaryotic PCMs will be useful for the research community. In this work, we developed an integrative database for protein cysteine modifications in eukaryotes (iCysMod), which curated and hosted 108 030 PCM events for 85 747 experimentally identified sites on 31 483 proteins from 48 eukaryotes for 8 types of PCMs, including oxidation, S-nitrosylation (-SNO), S-glutathionylation (-SSG), disulfide formation (-SSR), S-sulfhydration (-SSH), S-sulfenylation (-SOH), S-sulfinylation (-SO2H) and S-palmitoylation (-S-palm). Then, browse and search options were provided for accessing the dataset, while various detailed information about the PCM events was well organized for visualization. With human dataset in iCysMod, the sequence features around the cysteine modification sites for each PCM type were analyzed, and the results indicated that various types of PCMs presented distinct sequence recognition preferences. Moreover, different PCMs can crosstalk with each other to synergistically orchestrate specific biological processes, and 37 841 PCM events involved in 119 types of PCM co-occurrences at the same cysteine residues were finally obtained. Taken together, we anticipate that the database of iCysMod would provide a useful resource for eukaryotic PCMs to facilitate related researches, while the online service is freely available at http://icysmod.omicsbio.info.",iCysMod,0.997266591,for protein cysteine modifications in eukaryotes,0.853484929,iCysMod,0.997266591,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2021 +34700680,http://portal.imagingdatacommons.cancer.gov,"NCI Imaging Data Commons. Purpose/objective(s)National Cancer Institute (NCI) Cancer Research Data Commons (CRDC) aims to establish a cloud-based data science infrastructure. Imaging Data Commons (IDC) is a component of CRDC supported by the Cancer Moonshot™, which aims to enable access and exploration of de-identified imaging data, and to support integrated analyses with non-imaging data. IDC will interoperate with other components of CRDC, which include repositories of other types of data, such as genomics and proteomics repositories, and computational resources to perform analysis of the data. IDC builds on the strengths of the established efforts such as The Cancer Imaging Archive (TCIA) to collect and share FAIR (Findable Accessible Interoperable Reusable) imaging data. Materials/methods IDC uses a combination of commercially available tools and capabilities provided by Google Cloud Platform (GCP) together with a range of open-source components. While the initial focus is to support clinical radiology and radiotherapy data, IDC aims to provide similar capabilities for brightfield microscopy, multi-channel immunofluorescence and other imaging modalities. Equally important is the ability to support the results of imaging data analysis, such as annotations of regions of interest in the images or various descriptors of image findings. The IDC search portal provides an interface for exploring the data, defining cohorts, and summarizing attributes of the cohort. Images can be viewed in the integrated browser-based viewer, which uses DICOMweb to access the IDC data. IDC data is public and contains no Protected Health Information (PHI). As CDRC grows, imaging datasets will be increasingly cross-linked to genomic, proteomic, and clinical data about the subjects. Results The pilot of IDC was released in October 2020, including 28 collections of the TCIA: radiology images related to The Cancer Genome Atlas (TCGA) project, and several collections prioritized to establish the capabilities of IDC in handling image-derived data. DICOM and collection-level metadata is available from the BigQuery tables, and does not require a project configured with billing. The IDC portal is available at https://portal.imagingdatacommons.cancer.gov, and integrates a customized web viewer that supports visualization of both the images and image annotations (specifically, visualization of DICOM Segmentation and Radiotherapy Structure Set is supported, including multiplanar reformatting). IDC also provides documentation and a user forum. Conclusion The IDC pilot available to the cancer research community explores the promise of cloud-hosted public imaging collections co-located with the compute resources and a growing number of tools to support data analysis. Production release of IDC is planned for Fall 2021, and will include all of the public TCIA collections, including those that contain imaging and annotation data from radiotherapy studies and clinical trials.",IDC,0.976822138,NA,0,IDC,0.976822138,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/1/2021 +32941628,http://mdl.shsmu.edu.cn/IDDB,"IDDB: a comprehensive resource featuring genes, variants and characteristics associated with infertility. Infertility is a complex multifactorial disease that affects up to 10% of couples across the world. However, many mechanisms of infertility remain unclear due to the lack of studies based on systematic knowledge, leading to ineffective treatment and/or transmission of genetic defects to offspring. Here, we developed an infertility disease database to provide a comprehensive resource featuring various factors involved in infertility. Features in the current IDDB version were manually curated as follows: (i) a total of 307 infertility-associated genes in human and 1348 genes associated with reproductive disorder in 9 model organisms; (ii) a total of 202 chromosomal abnormalities leading to human infertility, including aneuploidies and structural variants; and (iii) a total of 2078 pathogenic variants from infertility patients' samples across 60 different diseases causing infertility. Additionally, the characteristics of clinically diagnosed infertility patients (i.e. causative variants, laboratory indexes and clinical manifestations) were collected. To the best of our knowledge, the IDDB is the first infertility database serving as a systematic resource for biologists to decipher infertility mechanisms and for clinicians to achieve better diagnosis/treatment of patients from disease phenotype to genetic factors. The IDDB is freely available at http://mdl.shsmu.edu.cn/IDDB/.",IDDB,0.976911902,infertility disease database,0.88215218,IDDB,0.976911902,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +22067451,http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL,"IDEAL: Intrinsically Disordered proteins with Extensive Annotations and Literature. IDEAL, Intrinsically Disordered proteins with Extensive Annotations and Literature (http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/), is a collection of knowledge on experimentally verified intrinsically disordered proteins. IDEAL contains manual annotations by curators on intrinsically disordered regions, interaction regions to other molecules, post-translational modification sites, references and structural domain assignments. In particular, IDEAL explicitly describes protean segments that can be transformed from a disordered state to an ordered state. Since in most cases they can act as molecular recognition elements upon binding of partner proteins, IDEAL provides a data resource for functional regions of intrinsically disordered proteins. The information in IDEAL is provided on a user-friendly graphical view and in a computer-friendly XML format.",IDEAL,0.995868742,Intrinsically Disordered proteins with Extensive Annotations and Literature,0.937364954,IDEAL,0.995868742,1,24178034,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/8/2011 +27903890,http://pharos.nih.gov,"Pharos: Collating protein information to shed light on the druggable genome. The 'druggable genome' encompasses several protein families, but only a subset of targets within them have attracted significant research attention and thus have information about them publicly available. The Illuminating the Druggable Genome (IDG) program was initiated in 2014, has the goal of developing experimental techniques and a Knowledge Management Center (KMC) that would collect and organize information about protein targets from four families, representing the most common druggable targets with an emphasis on understudied proteins. Here, we describe two resources developed by the KMC: the Target Central Resource Database (TCRD) which collates many heterogeneous gene/protein datasets and Pharos (https://pharos.nih.gov), a multimodal web interface that presents the data from TCRD. We briefly describe the types and sources of data considered by the KMC and then highlight features of the Pharos interface designed to enable intuitive access to the IDG knowledgebase. The aim of Pharos is to encourage 'serendipitous browsing', whereby related, relevant information is made easily discoverable. We conclude by describing two use cases that highlight the utility of Pharos and TCRD.",IDG,0.972324689,Illuminating the Druggable Genome,0.809028856,IDG,0.972324689,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,11/29/2016 +30371881,http://bigd.big.ac.cn/idog,"iDog: an integrated resource for domestic dogs and wild canids. The domestic dog (Canis lupus familiaris) is indisputably one of man's best friends. It is also a fundamental model for many heritable human diseases. Here, we present iDog (http://bigd.big.ac.cn/idog), the first integrated resource dedicated to domestic dogs and wild canids. It incorporates a variety of omics data, including genome sequences assemblies for dhole and wolf, genomic variations extracted from hundreds of dog/wolf whole genomes, phenotype/disease traits curated from dog research communities and public resources, gene expression profiles derived from published RNA-Seq data, gene ontology for functional annotation, homolog gene information for multiple organisms and disease-related literature. Additionally, iDog integrates sequence alignment tools for data analyses and a genome browser for data visualization. iDog will not only benefit the global dog research community, but also provide access to a user-friendly consolidation of dog information to a large number of dog enthusiasts.",iDog,0.997192383,NA,0,iDog,0.997192383,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +29548284,http://liulab.csrc.ac.cn/idpm,"IDPM: an online database for ion distribution in protein molecules. Background Interactions between ions and proteins have been extensively studied, yet most of the studies focus on the ion binding site. The binding mechanism for many ion binding sites can be clearly described from high resolution structures. Although knowledge accumulated on a case-by-case basis is valuable, it is also important to study the ion-protein interaction statistically. From experimentally determined structures, it is possible to examine the ion distribution around each amino acid. Such distributions can reveal relation between ions and amino acids, so it is desirable to carry out a systematic survey of 'ion-amino acid' pairing interaction and share the information with a publicly available database. Results The survey in the Protein Data Bank (PDB) revealed that approximately 40% of molecules records contain at least one ion. To reduce the bias resulted from protein redundancy, the statistics were extracted from a non-redundant dataset by excluding the proteins with similar sequences. Based on the structures of protein molecules and the location of ions, the statistical distributions of ions around each proteinogenic amino acid type were investigated and further summarized in a database. To systematically quantify the interactions between ions and each amino acid, the positions of ions were mapped to the coordinate system centered at each neighboring amino acid. It was found that the distribution of ions follows the expected rules governed by the physicochemical interactions in general. Large variations were observed, reflecting the preference in 'ion-amino acid' interactions. The analysis program is written in the Python programming language. The statistical results and program are available from the online database: ion distribution in protein molecules (IDPM) at http://liulab.csrc.ac.cn/idpm/ . Conclusion The spatial distribution of ions around amino acids is documented and analyzed. The statistics can be useful for identifying ion types for a given site in biomolecules, and can be potentially used in ion position prediction for given structures.",IDPM,0.976840337,ion distribution in protein molecules,0.878570855,IDPM,0.976840337,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/16/2018 +22681406,http://iedb.org,"The immune epitope database: a historical retrospective of the first decade. As the amount of biomedical information available in the literature continues to increase, databases that aggregate this information continue to grow in importance and scope. The population of databases can occur either through fully automated text mining approaches or through manual curation by human subject experts. We here report our experiences in populating the National Institute of Allergy and Infectious Diseases sponsored Immune Epitope Database and Analysis Resource (IEDB, http://iedb.org), which was created in 2003, and as of 2012 captures the epitope information from approximately 99% of all papers published to date that describe immune epitopes (with the exception of cancer and HIV data). This was achieved using a hybrid model based on automated document categorization and extensive human expert involvement. This task required automated scanning of over 22 million PubMed abstracts followed by classification and curation of over 13 000 references, including over 7000 infectious disease-related manuscripts, over 1000 allergy-related manuscripts, roughly 4000 related to autoimmunity, and 1000 transplant/alloantigen-related manuscripts. The IEDB curation involves an unprecedented level of detail, capturing for each paper the actual experiments performed for each different epitope structure. Key to enabling this process was the extensive use of ontologies to ensure rigorous and consistent data representation as well as interoperability with other bioinformatics resources, including the Protein Data Bank, Chemical Entities of Biological Interest, and the NIAID Bioinformatics Resource Centers. A growing fraction of the IEDB data derives from direct submissions by research groups engaged in epitope discovery, and is being facilitated by the implementation of novel data submission tools. The present explosion of information contained in biological databases demands effective query and display capabilities to optimize the user experience. Accordingly, the development of original ways to query the database, on the basis of ontologically driven hierarchical trees, and display of epitope data in aggregate in a biologically intuitive yet rigorous fashion is now at the forefront of the IEDB efforts. We also highlight advances made in the realm of epitope analysis and predictive tools available in the IEDB.",IEDB,0.993056357,Immune Epitope Database and Analysis Resource,0.933946027,IEDB,0.993056357,1,NA,"23734660.0, 33772585.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/1/2012 +"23734660, 33772585",http://www.iedb.org,"Query enhancement through the practical application of ontology: the IEDB and OBI. Ontologies categorize entities, express relationships between them, and provide standardized definitions. Thus, they can be used to present and enforce the specific relationships between database components. The Immune Epitope Database (IEDB, http://www.iedb.org) utilizes the Ontology for Biomedical Investigations (OBI) and several additional ontologies to represent immune epitope mapping experiments. Here, we describe our experiences utilizing this representation in order to provide enhanced database search functionality. We applied a simple approach to incorporate the benefits of the information captured in a formal ontology directly into the user web interface, resulting in an improved user experience with minimal changes to the database itself. The integration is easy to maintain, provides standardized terms and definitions, and allows for subsumption queries. In addition to these immediate benefits, our long-term goal is to enable true semantic integration of data and knowledge in the biomedical domain. We describe our progress towards that goal and what we perceive as the main obstacles.",IEDB,0.991275489,Immune Epitope Database,0.88714237,IEDB,0.991275489,2,NA,22681406,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,3/1/2021 +"22610854, 31114900",http://tools.iedb.org,"Immune epitope database analysis resource. The immune epitope database analysis resource (IEDB-AR: http://tools.iedb.org) is a collection of tools for prediction and analysis of molecular targets of T- and B-cell immune responses (i.e. epitopes). Since its last publication in the NAR webserver issue in 2008, a new generation of peptide:MHC binding and T-cell epitope predictive tools have been added. As validated by different labs and in the first international competition for predicting peptide:MHC-I binding, their predictive performances have improved considerably. In addition, a new B-cell epitope prediction tool was added, and the homology mapping tool was updated to enable mapping of discontinuous epitopes onto 3D structures. Furthermore, to serve a wider range of users, the number of ways in which IEDB-AR can be accessed has been expanded. Specifically, the predictive tools can be programmatically accessed using a web interface and can also be downloaded as software packages.",IEDB-AR,0.986699712,Immune Epitope Database Analysis Resource,0.965692446,IEDB-AR,0.986699712,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2019 +30380109,http://iekpd.biocuckoo.org,"iEKPD 2.0: an update with rich annotations for eukaryotic protein kinases, protein phosphatases and proteins containing phosphoprotein-binding domains. Here, we described the updated database iEKPD 2.0 (http://iekpd.biocuckoo.org) for eukaryotic protein kinases (PKs), protein phosphatases (PPs) and proteins containing phosphoprotein-binding domains (PPBDs), which are key molecules responsible for phosphorylation-dependent signalling networks and participate in the regulation of almost all biological processes and pathways. In total, iEKPD 2.0 contained 197 348 phosphorylation regulators, including 109 912 PKs, 23 294 PPs and 68 748 PPBD-containing proteins in 164 eukaryotic species. In particular, we provided rich annotations for the regulators of eight model organisms, especially humans, by compiling and integrating the knowledge from 100 widely used public databases that cover 13 aspects, including cancer mutations, genetic variations, disease-associated information, mRNA expression, DNA & RNA elements, DNA methylation, molecular interactions, drug-target relations, protein 3D structures, post-translational modifications, protein expressions/proteomics, subcellular localizations and protein functional annotations. Compared with our previously developed EKPD 1.0 (∼0.5 GB), iEKPD 2.0 contains ∼99.8 GB of data with an ∼200-fold increase in data volume. We anticipate that iEKPD 2.0 represents a more useful resource for further study of phosphorylation regulators.",iEKPD,0.997320652,NA,0,iEKPD,0.997320652,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +24923821,"http://cefg.uestc.edu.cn/ifim/, http://cefg.cn/ifim","IFIM: a database of integrated fitness information for microbial genes. . Knowledge of an organism's fitness for survival is important for a complete understanding of microbial genetics and effective drug design. Current essential gene databases provide only binary essentiality data from genome-wide experiments. We therefore developed a new database that Integrates quantitative Fitness Information for Microbial genes (IFIM). The IFIM database currently contains data from 16 experiments and 2186 theoretical predictions. The highly significant correlation between the experiment-derived fitness data and our computational simulations demonstrated that the computer-generated predictions were often as reliable as the experimental data. The data in IFIM can be accessed easily, and the interface allows users to browse through the gene fitness information that it contains. IFIM is the first resource that allows easy access to fitness data of microbial genes. We believe this database will contribute to a better understanding of microbial genetics and will be useful in designing drugs to resist microbial pathogens, especially when experimental data are unavailable. Database URL: http://cefg.uestc.edu.cn/ifim/ or http://cefg.cn/ifim/",IFIM,0.941411674,NA,0,IFIM,0.941411674,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/11/2014 +30967549,http://ifish4u.org,"iFISH is a publically available resource enabling versatile DNA FISH to study genome architecture. DNA fluorescence in situ hybridization (DNA FISH) is a powerful method to study chromosomal organization in single cells. At present, there is a lack of free resources of DNA FISH probes and probe design tools which can be readily applied. Here, we describe iFISH, an open-source repository currently comprising 380 DNA FISH probes targeting multiple loci on the human autosomes and chromosome X, as well as a genome-wide database of optimally designed oligonucleotides and a freely accessible web interface ( http://ifish4u.org ) that can be used to design DNA FISH probes. We individually validate 153 probes and take advantage of our probe repository to quantify the extent of intermingling between multiple heterologous chromosome pairs, showing a much higher extent of intermingling in human embryonic stem cells compared to fibroblasts. In conclusion, iFISH is a versatile and expandable resource, which can greatly facilitate the use of DNA FISH in research and diagnostics.",iFISH,0.997145891,NA,0,iFISH,0.997145891,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/9/2019 +21914464,http://www.bioinfo-cbs.org/igd,"IGD: a resource for intronless genes in the human genome. Intronless genes (IGs) fraction varies between 2.7 and 97.7% in eukaryotic genomes. Although many databases on exons and introns exist, there was no curated database for such genes which allowed their study in a concerted manner. Such a database would be useful to identify the functional features and the distribution of these genes across the genome. Here, a new database of IGs in eukaryotes based on GenBank data was described. This database, called IGD (Intronless Gene Database), is a collection of gene sequences that were annotated and curated. The current version of IGD contains 687 human intronless genes with their protein and CDS sequences. Some features of the entries are given in this paper. Data was extracted from GenBank release 183 using a Perl script. Data extraction was followed by a manual curation step. Intronless genes were then analyzed based on their RefSeq annotation and Gene Ontology functional class. IGD represents a useful resource for retrieval and in silico study of intronless genes. IGD is available at http://www.bioinfo-cbs.org/igd with comprehensive help and FAQ pages that illustrate the main uses of this resource.",IGD,0.97653389,Intronless Gene Database,0.875607576,IGD,0.97653389,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/2/2011 +22139933,http://igdb.nsclc.ibms.sinica.edu.tw,"IGDB.NSCLC: integrated genomic database of non-small cell lung cancer. Lung cancer is the most common cause of cancer-related mortality with more than 1.4 million deaths per year worldwide. To search for significant somatic alterations in lung cancer, we analyzed, integrated and manually curated various data sets and literatures to present an integrated genomic database of non-small cell lung cancer (IGDB.NSCLC, http://igdb.nsclc.ibms.sinica.edu.tw). We collected data sets derived from hundreds of human NSCLC (lung adenocarcinomas and/or squamous cell carcinomas) to illustrate genomic alterations [chromosomal regions with copy number alterations (CNAs), gain/loss and loss of heterozygosity], aberrant expressed genes and microRNAs, somatic mutations and experimental evidence and clinical information of alterations retrieved from literatures. IGDB.NSCLC provides user friendly interfaces and searching functions to display multiple layers of evidence especially emphasizing on concordant alterations of CNAs with co-localized altered gene expression, aberrant microRNAs expression, somatic mutations or genes with associated clinicopathological features. These significant concordant alterations in NSCLC are graphically or tabularly presented to facilitate and prioritize as the putative cancer targets for pathological and mechanistic studies of lung tumorigenesis and for developing new strategies in clinical interventions.",IGDB.NSCLC,0.982541544,NA,0,IGDB.NSCLC,0.982541544,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2011 +27465544,http://bio.njfu.edu.cn/igdd,"IGDD: a database of intronless genes in dicots. Background Intronless genes are a significant characteristic of prokaryotes. Systematic identification and annotation are primary and crucial steps for determining the functions of intronless genes and understanding their occurrence in eukaryotes. Description In this paper, we describe the construction of the Intronless Genes Database in Dicots (IGDD; available at http://bio.njfu.edu.cn/igdd/ ), which contains data for five well-annotated plants including Arabidopsis thaliana, Carica papaya, Populus trichocarpa, Salix suchowensis and Vitis vinifera. Using highly visual settings, IGDD displays the structural and functional annotations, the homolog groups, the syntenic relationships, the expression patterns, and the statistical characteristics of intronless genes. In addition, useful tools such as an advanced search and local BLAST are available through a user-friendly and intuitive web interface. Conclusion In conclusion, the IGDD provides a comprehensive and up-to-date platform for researchers to assist the exploration of intronless genes in dicot plants.",IGDD,0.995845596,Intronless Genes Database in Dicots,0.981970423,IGDD,0.995845596,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/27/2016 +22698731,http://www.adelaide.edu.au/igfmutation,"The insulin-like growth factor mutation database (IGFmdb). Insulin-like growth factors (IGF-I and IGF-II), and insulin are evolutionarily conserved hormonal regulators of eukaryotic growth and development. Through interactions with their cognate receptors, all three molecules can influence cellular growth, proliferation, differentiation, migration, and survival, as well as metabolic processes. As such, perturbations in signaling by IGFs and insulin are a well-documented cause of altered growth, development and survival during both embryonic and post-natal life. A key approach in understanding how IGFs and insulin elicit their biological effects has been through identifying structural features of the ligands that influence their receptor interactions. Over the years, the study of many hundreds of specifically engineered IGF and insulin analogues has provided a wealth of knowledge about how specific residues of these ligands contribute to ligand:receptor interactions. Some analogues have even provided the basis for designing therapeutic agents for the treatment of IGF and insulin-related diseases. As the list of IGF and insulin analogues continues to grow we find that, while many have been produced and studied, it would be of considerable value to have a central repository from which information about specific analogues and their receptor binding data were readily available in an easily searchable and comparable format. To address this, we have created the ""Insulin-like growth factor mutation database"" (IGFmdb). The IGFmdb is a web-based curated database of annotated ligand analogues and their receptor binding affinities that can be accessed via http://www.adelaide.edu.au/igfmutation. Currently the IGFmdb contains receptor-binding data for 67 IGF-II analogues that were publicly accessible prior to 2012, as well as 67 IGF-I analogues, including all of those produced and characterised in our laboratory. A small number of these are IGF species homologues. There are also 32 insulin analogues within IGFmdb that were reported within the included IGF analogue studies, representing only a small fraction of existing insulin mutants. Future developments of the IGFmdb will incorporate receptor-binding data for all publicly accessible IGF-I analogues and the data will be expanded to include IGF-binding protein (IGFBP) binding affinities.",IGFmdb,0.971408069,The insulin-like growth factor mutation database,0.886121653,IGFmdb,0.971408069,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/13/2012 +26582920,http://gnmdb.csb.pitt.edu,"iGNM 2.0: the Gaussian network model database for biomolecular structural dynamics. Gaussian network model (GNM) is a simple yet powerful model for investigating the dynamics of proteins and their complexes. GNM analysis became a broadly used method for assessing the conformational dynamics of biomolecular structures with the development of a user-friendly interface and database, iGNM, in 2005. We present here an updated version, iGNM 2.0 http://gnmdb.csb.pitt.edu/, which covers more than 95% of the structures currently available in the Protein Data Bank (PDB). Advanced search and visualization capabilities, both 2D and 3D, permit users to retrieve information on inter-residue and inter-domain cross-correlations, cooperative modes of motion, the location of hinge sites and energy localization spots. The ability of iGNM 2.0 to provide structural dynamics data on the large majority of PDB structures and, in particular, on their biological assemblies makes it a useful resource for establishing the bridge between structure, dynamics and function.",iGNM,0.993238688,NA,0,iGNM,0.993238688,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +"27638885, 31584097",http://www.internationalgenome.org,"The international Genome sample resource (IGSR): A worldwide collection of genome variation incorporating the 1000 Genomes Project data. The International Genome Sample Resource (IGSR; http://www.internationalgenome.org) expands in data type and population diversity the resources from the 1000 Genomes Project. IGSR represents the largest open collection of human variation data and provides easy access to these resources. IGSR was established in 2015 to maintain and extend the 1000 Genomes Project data, which has been widely used as a reference set of human variation and by researchers developing analysis methods. IGSR has mapped all of the 1000 Genomes sequence to the newest human reference (GRCh38), and will release updated variant calls to ensure maximal usefulness of the existing data. IGSR is collecting new structural variation data on the 1000 Genomes samples from long read sequencing and other technologies, and will collect relevant functional data into a single comprehensive resource. IGSR is extending coverage with new populations sequenced by collaborating groups. Here, we present the new data and analysis that IGSR has made available. We have also introduced a new data portal that increases discoverability of our data-previously only browseable through our FTP site-by focusing on particular samples, populations or data sets of interest.",IGSR,0.970552325,International Genome Sample Resource,0.881040025,IGSR,0.970552325,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +27863956,http://epigenomesportal.ca/ihec,"The International Human Epigenome Consortium Data Portal. The International Human Epigenome Consortium (IHEC) coordinates the production of reference epigenome maps through the characterization of the regulome, methylome, and transcriptome from a wide range of tissues and cell types. To define conventions ensuring the compatibility of datasets and establish an infrastructure enabling data integration, analysis, and sharing, we developed the IHEC Data Portal (http://epigenomesportal.ca/ihec). The portal provides access to >7,000 reference epigenomic datasets, generated from >600 tissues, which have been contributed by seven international consortia: ENCODE, NIH Roadmap, CEEHRC, Blueprint, DEEP, AMED-CREST, and KNIH. The portal enhances the utility of these reference maps by facilitating the discovery, visualization, analysis, download, and sharing of epigenomics data. The IHEC Data Portal is the official source to navigate through IHEC datasets and represents a strategy for unifying the distributed data produced by international research consortia.",IHEC,0.982326448,NA,0,IHEC,0.982326448,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2016 +28187703,http://www.tongjidmb.com/human/index.html,"iHMS: a database integrating human histone modification data across developmental stages and tissues. Background Differences in chromatin states are critical to the multiplicity of cell states. Recently genome-wide histone modification maps of diverse human developmental stages and tissues have been charted. Description To facilitate the investigation of epigenetic dynamics and regulatory mechanisms in cellular differentiation processes, we developed iHMS, an integrated human histone modification database that incorporates massive histone modification maps spanning different developmental stages, lineages and tissues ( http://www.tongjidmb.com/human/index.html ). It also includes genome-wide expression data of different conditions, reference gene annotations, GC content and CpG island information. By providing an intuitive and user-friendly query interface, iHMS enables comprehensive query and comparative analysis based on gene names, genomic region locations, histone modification marks and cell types. Moreover, it offers an efficient browser that allows users to visualize and compare multiple genome-wide histone modification maps and related expression profiles across different developmental stages and tissues. Conclusion iHMS is of great helpfulness to understand how global histone modification state transitions impact cellular phenotypes across different developmental stages and tissues in the human genome. This extensive catalog of histone modification states thus presents an important resource for epigenetic and developmental studies.",iHMS,0.988777816,NA,0,iHMS,0.988777816,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/11/2017 +"26516188, 30407591",http://ophid.utoronto.ca/iid,"Integrated interactions database: tissue-specific view of the human and model organism interactomes. IID (Integrated Interactions Database) is the first database providing tissue-specific protein-protein interactions (PPIs) for model organisms and human. IID covers six species (S. cerevisiae (yeast), C. elegans (worm), D. melonogaster (fly), R. norvegicus (rat), M. musculus (mouse) and H. sapiens (human)) and up to 30 tissues per species. Users query IID by providing a set of proteins or PPIs from any of these organisms, and specifying species and tissues where IID should search for interactions. If query proteins are not from the selected species, IID enables searches across species and tissues automatically by using their orthologs; for example, retrieving interactions in a given tissue, conserved in human and mouse. Interaction data in IID comprises three types of PPI networks: experimentally detected PPIs from major databases, orthologous PPIs and high-confidence computationally predicted PPIs. Interactions are assigned to tissues where their proteins pairs or encoding genes are expressed. IID is a major replacement of the I2D interaction database, with larger PPI networks (a total of 1,566,043 PPIs among 68,831 proteins), tissue annotations for interactions, and new query, analysis and data visualization capabilities. IID is available at http://ophid.utoronto.ca/iid.",IID,0.992068827,Integrated Interactions Database,0.880897582,IID,0.992068827,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +22984621,http://www.vanderbilt.edu/IIID,"Insect Innate Immunity Database (IIID): an annotation tool for identifying immune genes in insect genomes. The innate immune system is an ancient component of host defense. Since innate immunity pathways are well conserved throughout many eukaryotes, immune genes in model animals can be used to putatively identify homologous genes in newly sequenced genomes of non-model organisms. With the initiation of the ""i5k"" project, which aims to sequence 5,000 insect genomes by 2016, many novel insect genomes will soon become publicly available, yet few annotation resources are currently available for insects. Thus, we developed an online tool called the Insect Innate Immunity Database (IIID) to provide an open access resource for insect immunity and comparative biology research (http://www.vanderbilt.edu/IIID). The database provides users with simple exploratory tools to search the immune repertoires of five insect models (including Nasonia), spanning three orders, for specific immunity genes or genes within a particular immunity pathway. As a proof of principle, we used an initial database with only four insect models to annotate potential immune genes in the parasitoid wasp genus Nasonia. Results specify 306 putative immune genes in the genomes of N. vitripennis and its two sister species N. giraulti and N. longicornis. Of these genes, 146 were not found in previous annotations of Nasonia immunity genes. Combining these newly identified immune genes with those in previous annotations, Nasonia possess 489 putative immunity genes, the largest immune repertoire found in insects to date. While these computational predictions need to be complemented with functional studies, the IIID database can help initiate and augment annotations of the immune system in the plethora of insect genomes that will soon become available.",IIID,0.976448039,Insect Innate Immunity Database,0.973630855,IIID,0.976448039,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/12/2012 +25707505,http://syslab.nchu.edu.tw/IIIDB,"IIIDB: a database for isoform-isoform interactions and isoform network modules. Background Protein-protein interactions (PPIs) are key to understanding diverse cellular processes and disease mechanisms. However, current PPI databases only provide low-resolution knowledge of PPIs, in the sense that ""proteins"" of currently known PPIs generally refer to ""genes."" It is known that alternative splicing often impacts PPI by either directly affecting protein interacting domains, or by indirectly impacting other domains, which, in turn, impacts the PPI binding. Thus, proteins translated from different isoforms of the same gene can have different interaction partners. Results Due to the limitations of current experimental capacities, little data is available for PPIs at the resolution of isoforms, although such high-resolution data is crucial to map pathways and to understand protein functions. In fact, alternative splicing can often change the internal structure of a pathway by rearranging specific PPIs. To fill the gap, we systematically predicted genome-wide isoform-isoform interactions (IIIs) using RNA-seq datasets, domain-domain interaction and PPIs. Furthermore, we constructed an III database (IIIDB) that is a resource for studying PPIs at isoform resolution. To discover functional modules in the III network, we performed III network clustering, and then obtained 1025 isoform modules. To evaluate the module functionality, we performed the GO/pathway enrichment analysis for each isoform module. Conclusions The IIIDB provides predictions of human protein-protein interactions at the high resolution of transcript isoforms that can facilitate detailed understanding of protein functions and biological pathways. The web interface allows users to search for IIIs or III network modules. The IIIDB is freely available at http://syslab.nchu.edu.tw/IIIDB.",IIIDB,0.965002835,NA,0,IIIDB,0.965002835,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/21/2015 +23991755,http://metabolomics.pharm.uconn.edu/iimdb,"In silico enzymatic synthesis of a 400,000 compound biochemical database for nontargeted metabolomics. Current methods of structure identification in mass-spectrometry-based nontargeted metabolomics rely on matching experimentally determined features of an unknown compound to those of candidate compounds contained in biochemical databases. A major limitation of this approach is the relatively small number of compounds currently included in these databases. If the correct structure is not present in a database, it cannot be identified, and if it cannot be identified, it cannot be included in a database. Thus, there is an urgent need to augment metabolomics databases with rationally designed biochemical structures using alternative means. Here we present the In Vivo/In Silico Metabolites Database (IIMDB), a database of in silico enzymatically synthesized metabolites, to partially address this problem. The database, which is available at http://metabolomics.pharm.uconn.edu/iimdb/, includes ~23,000 known compounds (mammalian metabolites, drugs, secondary plant metabolites, and glycerophospholipids) collected from existing biochemical databases plus more than 400,000 computationally generated human phase-I and phase-II metabolites of these known compounds. IIMDB features a user-friendly web interface and a programmer-friendly RESTful web service. Ninety-five percent of the computationally generated metabolites in IIMDB were not found in any existing database. However, 21,640 were identical to compounds already listed in PubChem, HMDB, KEGG, or HumanCyc. Furthermore, the vast majority of these in silico metabolites were scored as biological using BioSM, a software program that identifies biochemical structures in chemical structure space. These results suggest that in silico biochemical synthesis represents a viable approach for significantly augmenting biochemical databases for nontargeted metabolomics applications.",IIMDB,0.996076167,In Vivo/In Silico Metabolites Database,0.911021487,IIMDB,0.996076167,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/12/2013 +27081555,http://ijgvd.megabank.tohoku.ac.jp,"iJGVD: an integrative Japanese genome variation database based on whole-genome sequencing. The integrative Japanese Genome Variation Database (iJGVD; http://ijgvd.megabank.tohoku.ac.jp/) provides genomic variation data detected by whole-genome sequencing (WGS) of Japanese individuals. Specifically, the database contains variants detected by WGS of 1,070 individuals who participated in a genome cohort study of the Tohoku Medical Megabank Project. In the first release, iJGVD includes >4,300,000 autosomal single nucleotide variants (SNVs) whose minor allele frequencies are >5.0%.",iJGVD,0.988222253,integrative Japanese Genome Variation Database,0.978778683,iJGVD,0.988222253,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/26/2015 +26340938,http://www.mousephenotype.org,"Beyond knockouts: the International Knockout Mouse Consortium delivers modular and evolving tools for investigating mammalian genes. The International Knockout Mouse Consortium (IKMC; http://www.mousephenotype.org ) has generated mutations in almost every protein-coding mouse gene and is completing the companion Cre driver resource to expand tissue-specific conditional mutagenesis. Accordingly, the IKMC has carried out high-throughput gene trapping and targeting producing conditional mutations in murine embryonic stem cells in more than 18,500 genes, from which at least 4900 mutant mouse lines have been established to date. This resource is currently being upgraded with more powerful tools, such as visualization and manipulation cassettes that can be easily introduced into IKMC alleles for multifaceted functional studies. In addition, we discuss how existing IKMC products can be used in combination with CRISPR technology to accelerate genome engineering projects. All information and materials from this extraordinary biological resource together with coordinated phenotyping efforts can be retrieved at www.mousephenotype.org . The comprehensive IKMC knockout resource in combination with an extensive set of modular gene cassettes will continue to enhance functional gene annotation in the future and solidify its impact on biomedical research.",IKMC,0.861618261,International Knockout Mouse Consortium,0.697445065,IKMC,0.861618261,1,24194600,NA,low_prob_best_name,do not remove,do not merge,NA,NA,NA,NA,9/4/2015 +33308175,http://ildgdb.org,"ILDGDB: a manually curated database of genomics, transcriptomics, proteomics and drug information for interstitial lung diseases. Background Interstitial lung diseases (ILDs), a diverse group of diffuse lung diseases, mainly affect the lung parenchyma. The low-throughput 'omics' technologies (genomics, transcriptomics, proteomics) and relative drug information have begun to reshaped our understanding of ILDs, whereas, these data are scattered among massive references and are difficult to be fully exploited. Therefore, we manually mined and summarized these data at a database (ILDGDB, http://ildgdb.org/ ) and will continue to update it in the future. Main body The current version of ILDGDB incorporates 2018 entries representing 20 ILDs and over 600 genes obtained from over 3000 articles in four species. Each entry contains detailed information, including species, disease type, detailed description of gene (e.g. official symbol of gene), and the original reference etc. ILDGDB is free, and provides a user-friendly web page. Users can easily search for genes of interest, view their expression pattern and detailed information, manage genes sets and submit novel ILDs-gene association. Conclusion The main principle behind ILDGDB's design is to provide an exploratory platform, with minimum filtering and interpretation, while making the presentation of the data very accessible, which will provide great help for researchers to decipher gene mechanisms and improve the prevention, diagnosis and therapy of ILDs.",ILDGDB,0.998209894,NA,0,ILDGDB,0.998209894,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/11/2020 +29897484,http://14.139.240.55/ildgendb/index.php,"ILDgenDB: integrated genetic knowledge resource for interstitial lung diseases (ILDs). . Interstitial lung diseases (ILDs) are a diverse group of ∼200 acute and chronic pulmonary disorders that are characterized by variable amounts of inflammation, fibrosis and architectural distortion with substantial morbidity and mortality. Inaccurate and delayed diagnoses increase the risk, especially in developing countries. Studies have indicated the significant roles of genetic elements in ILDs pathogenesis. Therefore, the first genetic knowledge resource, ILDgenDB, has been developed with an objective to provide ILDs genetic data and their integrated analyses for the better understanding of disease pathogenesis and identification of diagnostics-based biomarkers. This resource contains literature-curated disease candidate genes (DCGs) enriched with various regulatory elements that have been generated using an integrated bioinformatics workflow of databases searches, literature-mining and DCGs-microRNA (miRNAs)-single nucleotide polymorphisms (SNPs) association analyses. To provide statistical significance to disease-gene association, ILD-specificity index and hypergeomatric test scores were also incorporated. Association analyses of miRNAs, SNPs and pathways responsible for the pathogenesis of different sub-classes of ILDs were also incorporated. Manually verified 299 DCGs and their significant associations with 1932 SNPs, 2966 miRNAs and 9170 miR-polymorphisms were also provided. Furthermore, 216 literature-mined and proposed biomarkers were identified. The ILDgenDB resource provides user-friendly browsing and extensive query-based information retrieval systems. Additionally, this resource also facilitates graphical view of predicted DCGs-SNPs/miRNAs and literature associated DCGs-ILDs interactions for each ILD to facilitate efficient data interpretation. Outcomes of analyses suggested the significant involvement of immune system and defense mechanisms in ILDs pathogenesis. This resource may potentially facilitate genetic-based disease monitoring and diagnosis.Database URL: http://14.139.240.55/ildgendb/index.php.",ILDgenDB,0.996150136,NA,0,ILDgenDB,0.996150136,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +27484196,http://ilir.warwick.ac.uk,"iLIR database: A web resource for LIR motif-containing proteins in eukaryotes. Atg8-family proteins are the best-studied proteins of the core autophagic machinery. They are essential for the elongation and closure of the phagophore into a proper autophagosome. Moreover, Atg8-family proteins are associated with the phagophore from the initiation of the autophagic process to, or just prior to, the fusion between autophagosomes with lysosomes. In addition to their implication in autophagosome biogenesis, they are crucial for selective autophagy through their ability to interact with selective autophagy receptor proteins necessary for the specific targeting of substrates for autophagic degradation. In the past few years it has been revealed that Atg8-interacting proteins include not only receptors but also components of the core autophagic machinery, proteins associated with vesicles and their transport, and specific proteins that are selectively degraded by autophagy. Atg8-interacting proteins contain a short linear LC3-interacting region/LC3 recognition sequence/Atg8-interacting motif (LIR/LRS/AIM) motif which is responsible for their interaction with Atg8-family proteins. These proteins are referred to as LIR-containing proteins (LIRCPs). So far, many experimental efforts have been carried out to identify new LIRCPs, leading to the characterization of some of them in the past 10 years. Given the need for the identification of LIRCPs in various organisms, we developed the iLIR database ( https://ilir.warwick.ac.uk ) as a freely available web resource, listing all the putative canonical LIRCPs identified in silico in the proteomes of 8 model organisms using the iLIR server, combined with a Gene Ontology (GO) term analysis. Additionally, a curated text-mining analysis of the literature permitted us to identify novel putative LICRPs in mammals that have not previously been associated with autophagy.",iLIR,0.989266038,NA,0,iLIR,0.989266038,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/2/2016 +28806134,http://ilir.uk/virus,"iLIR@viral: A web resource for LIR motif-containing proteins in viruses. Macroautophagy/autophagy has been shown to mediate the selective lysosomal degradation of pathogenic bacteria and viruses (xenophagy), and to contribute to the activation of innate and adaptative immune responses. Autophagy can serve as an antiviral defense mechanism but also as a proviral process during infection. Atg8-family proteins play a central role in the autophagy process due to their ability to interact with components of the autophagy machinery as well as selective autophagy receptors and adaptor proteins. Such interactions are usually mediated through LC3-interacting region (LIR) motifs. So far, only one viral protein has been experimentally shown to have a functional LIR motif, leaving open a vast field for investigation. Here, we have developed the iLIR@viral database ( http://ilir.uk/virus/ ) as a freely accessible web resource listing all the putative canonical LIR motifs identified in viral proteins. Additionally, we used a curated text-mining analysis of the literature to identify novel putative LIR motif-containing proteins (LIRCPs) in viruses. We anticipate that iLIR@viral will assist with elucidating the full complement of LIRCPs in viruses.",iLIR@viral,0.916833331,NA,0,iLIR@viral,0.916833331,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/14/2017 +21624339,http://www.ipasvi.roma.it/ita/ILISI,"[A new resource for the bibliography research: project experience ILISI® Index of Italian Literature on Nursing Sciences]. Since July 2008 the ILISI (Index of Italian Literature on Nursing Sciences) elaborated by the IPASVI nursing college of Rome has been available on-line at the page http://www.ipasvi.roma.it/ita/ILISI/ . The aim of this is to make Italian nursing literature more available and to favor bibliographic research. About 3000 articles and 30 nursing journals are available : the necessary software is open source (free) and has been adapted to allow searches by author, topic or word content. Indexation has been carried out by a group of volunteer nurses using a Thesaurus created by the project group. This article describes the aims of the project , how it has been created , the resources employed and the potential of the database. Use of the latter is on the increase: in fact, during the first 12 months of availability , the number of consultations reached 9000.",ILISI,0.987797678,Index of Italian Literature on Nursing Sciences,0.942488663,ILISI,0.987797678,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2011 +32228437,http://www.marmotdb.org,"iMarmot: an integrative platform for comparative and functional genomics of marmots. Background Marmots are large Holarctic rodents with unique biological features, making them potential animal models in various research fields. Due to the rapid accumulation of the genetic data in marmots, a highly integrative database is urgent needed. Description iMarmot is freely available on the web at http://www.marmotdb.org/ and currently contains the biological information of 14 marmots, genomic sequence of 6 marmots, syntenic relationship and orthologs among 3 marmots, and expression profiles of several hibernators and plague hosts. To assist with the genomic and transcriptomic analysis, we also integrated a set of analysis and visualization tools, such as KEGG or GO enrichment analysis, PCA, Blast, Muscle, GeneWise, Lastz, and JBrowse. Particularly, one DEGs (differentially expressed genes) module has been implemented in this database to visualize the gene expression changes in hibernators and plague hosts. Conclusion This database will provide comprehensive information and analysis platform for researchers interested in understanding the biological features of marmots.",iMarmot,0.997347414,NA,0,iMarmot,0.997347414,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/30/2020 +29619235,http://imethyl.iwate-megabank.org,"iMETHYL: an integrative database of human DNA methylation, gene expression, and genomic variation. We launched an integrative multi-omics database, iMETHYL (http://imethyl.iwate-megabank.org). iMETHYL provides whole-DNA methylation (~24 million autosomal CpG sites), whole-genome (~9 million single-nucleotide variants), and whole-transcriptome (>14 000 genes) data for CD4+ T-lymphocytes, monocytes, and neutrophils collected from approximately 100 subjects. These data were obtained from whole-genome bisulfite sequencing, whole-genome sequencing, and whole-transcriptome sequencing, making iMETHYL a comprehensive database.",iMETHYL,0.997811019,NA,0,iMETHYL,0.997811019,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/29/2018 +"26173699, 27903896",http://img.jgi.doe.gov/abc,"IMG-ABC: A Knowledge Base To Fuel Discovery of Biosynthetic Gene Clusters and Novel Secondary Metabolites. Unlabelled In the discovery of secondary metabolites, analysis of sequence data is a promising exploration path that remains largely underutilized due to the lack of computational platforms that enable such a systematic approach on a large scale. In this work, we present IMG-ABC (https://img.jgi.doe.gov/abc), an atlas of biosynthetic gene clusters within the Integrated Microbial Genomes (IMG) system, which is aimed at harnessing the power of ""big"" genomic data for discovering small molecules. IMG-ABC relies on IMG's comprehensive integrated structural and functional genomic data for the analysis of biosynthetic gene clusters (BCs) and associated secondary metabolites (SMs). SMs and BCs serve as the two main classes of objects in IMG-ABC, each with a rich collection of attributes. A unique feature of IMG-ABC is the incorporation of both experimentally validated and computationally predicted BCs in genomes as well as metagenomes, thus identifying BCs in uncultured populations and rare taxa. We demonstrate the strength of IMG-ABC's focused integrated analysis tools in enabling the exploration of microbial secondary metabolism on a global scale, through the discovery of phenazine-producing clusters for the first time in Alphaproteobacteria. IMG-ABC strives to fill the long-existent void of resources for computational exploration of the secondary metabolism universe; its underlying scalable framework enables traversal of uncovered phylogenetic and chemical structure space, serving as a doorway to a new era in the discovery of novel molecules. Importance IMG-ABC is the largest publicly available database of predicted and experimental biosynthetic gene clusters and the secondary metabolites they produce. The system also includes powerful search and analysis tools that are integrated with IMG's extensive genomic/metagenomic data and analysis tool kits. As new research on biosynthetic gene clusters and secondary metabolites is published and more genomes are sequenced, IMG-ABC will continue to expand, with the goal of becoming an essential component of any bioinformatic exploration of the secondary metabolism world.",IMG-ABC,0.986837733,Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters,0.984186777,IMG-ABC,0.986837733,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2016 +27799466,http://img.jgi.doe.gov/vr,"IMG/VR: a database of cultured and uncultured DNA Viruses and retroviruses. Viruses represent the most abundant life forms on the planet. Recent experimental and computational improvements have led to a dramatic increase in the number of viral genome sequences identified primarily from metagenomic samples. As a result of the expanding catalog of metagenomic viral sequences, there exists a need for a comprehensive computational platform integrating all these sequences with associated metadata and analytical tools. Here we present IMG/VR (https://img.jgi.doe.gov/vr/), the largest publicly available database of 3908 isolate reference DNA viruses with 264 413 computationally identified viral contigs from >6000 ecologically diverse metagenomic samples. Approximately half of the viral contigs are grouped into genetically distinct quasi-species clusters. Microbial hosts are predicted for 20 000 viral sequences, revealing nine microbial phyla previously unreported to be infected by viruses. Viral sequences can be queried using a variety of associated metadata, including habitat type and geographic location of the samples, or taxonomic classification according to hallmark viral genes. IMG/VR has a user-friendly interface that allows users to interrogate all integrated data and interact by comparing with external sequences, thus serving as an essential resource in the viral genomics community.",IMG/VR,0.986604303,NA,0,IMG/VR,0.986604303,1,33137183,33137183,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,10/30/2016 +33137183,"http://img.jgi.doe.gov/vr, http://genome.jgi.doe.gov/portal/IMG_VR","IMG/VR v3: an integrated ecological and evolutionary framework for interrogating genomes of uncultivated viruses. Viruses are integral components of all ecosystems and microbiomes on Earth. Through pervasive infections of their cellular hosts, viruses can reshape microbial community structure and drive global nutrient cycling. Over the past decade, viral sequences identified from genomes and metagenomes have provided an unprecedented view of viral genome diversity in nature. Since 2016, the IMG/VR database has provided access to the largest collection of viral sequences obtained from (meta)genomes. Here, we present the third version of IMG/VR, composed of 18 373 cultivated and 2 314 329 uncultivated viral genomes (UViGs), nearly tripling the total number of sequences compared to the previous version. These clustered into 935 362 viral Operational Taxonomic Units (vOTUs), including 188 930 with two or more members. UViGs in IMG/VR are now reported as single viral contigs, integrated proviruses or genome bins, and are annotated with a new standardized pipeline including genome quality estimation using CheckV, taxonomic classification reflecting the latest ICTV update, and expanded host taxonomy prediction. The new IMG/VR interface enables users to efficiently browse, search, and select UViGs based on genome features and/or sequence similarity. IMG/VR v3 is available at https://img.jgi.doe.gov/vr, and the underlying data are available to download at https://genome.jgi.doe.gov/portal/IMG_VR.",IMG/VR,0.956313595,NA,0,IMG/VR,0.956313595,1,27799466,27799466,low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +23080122,http://www.ebi.ac.uk/imgt/hla,"The IMGT/HLA database. It is 14 years since the IMGT/HLA database was first released, providing the HLA community with a searchable repository of highly curated HLA sequences. The HLA complex is located within the 6p21.3 region of human chromosome 6 and contains more than 220 genes of diverse function. Of these, 21 genes encode proteins of the immune system that are highly polymorphic. The naming of these HLA genes and alleles and their quality control is the responsibility of the World Health Organization Nomenclature Committee for Factors of the HLA System. Through the work of the HLA Informatics Group and in collaboration with the European Bioinformatics Institute, we are able to provide public access to these data through the website http://www.ebi.ac.uk/imgt/hla/. Regular updates to the website ensure that new and confirmatory sequences are dispersed to the HLA community and the wider research and clinical communities. This article describes the latest updates and additional tools added to the IMGT/HLA project.",IMGT/H,0.7965283,NA,0,IMGT/H,0.7965283,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/17/2012 +28025339,http://gene.cqu.edu.cn/iMITEdb,"iMITEdb: the genome-wide landscape of miniature inverted-repeat transposable elements in insects. . Miniature inverted-repeat transposable elements (MITEs) have attracted much attention due to their widespread occurrence and high copy numbers in eukaryotic genomes. However, the systematic knowledge about MITEs in insects and other animals is still lacking. In this study, we identified 6012 MITE families from 98 insect species genomes. Comparison of these MITEs with known MITEs in the NCBI non-redundant database and Repbase showed that 5701(∼95%) of 6012 MITE families are novel. The abundance of MITEs varies drastically among different insect species, and significantly correlates with genome size. In general, larger genomes contain more MITEs than small genomes. Furthermore, all identified MITEs were included in a newly constructed database (iMITEdb) (http://gene.cqu.edu.cn/iMITEdb/), which has functions such as browse, search, BLAST and download. Overall, our results not only provide insight on insect MITEs but will also improve assembly and annotation of insect genomes. More importantly, the results presented in this study will promote studies of MITEs function, evolution and application in insects. DATABASE URL: http://gene.cqu.edu.cn/iMITEdb/.",iMITEdb,0.966844749,NA,0,iMITEdb,0.966844749,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/26/2016 +34755873,http://bio-bigdata.hrbmu.edu.cn/ImmReg,"ImmReg: the regulon atlas of immune-related pathways across cancer types. Immune system gene regulation perturbation has been found to be a major cause of the development of various types of cancer. Numbers of mechanisms contribute to gene expression regulation, thus, systematically identification of potential regulons of immune-related pathways is critical to cancer immunotherapy. Here, we comprehensively chart the landscape of transcription factors, microRNAs, RNA binding proteins and long noncoding RNAs regulation in 17 immune-related pathways across 33 cancers. The potential immunology regulons are likely to exhibit higher expressions in immune cells, show expression perturbations in cancer, and are significantly correlated with immune cell infiltrations. We also identify a panel of clinically relevant immunology regulons across cancers. Moreover, the regulon atlas of immune-related pathways helps prioritizing cancer-related genes (i.e. ETV7, miR-146a-5p, ZFP36 and HCP5). We further identified two molecular subtypes of glioma (cold and hot tumour phenotypes), which were characterized by differences in immune cell infiltrations, expression of checkpoints, and prognosis. Finally, we developed a user-friendly resource, ImmReg (http://bio-bigdata.hrbmu.edu.cn/ImmReg/), with multiple modules to visualize, browse, and download immunology regulation. Our study provides a comprehensive landscape of immunology regulons, which will shed light on future development of RNA-based cancer immunotherapies.",ImmReg,0.986415982,NA,0,ImmReg,0.986415982,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2021 +34127402,http://bioinfo.vanderbilt.edu/database/Immu-Mela,"Immu-Mela: An open resource for exploring immunotherapy-related multidimensional genomic profiles in melanoma. There are increasing studies aimed to reveal genomic hallmarks predictive of immune checkpoint blockade (ICB) treatment response, which generated a large number of data and provided an unprecedented opportunity to identify response-related features and evaluate their robustness across cohorts. However, those valuable data sets are not easily accessible to the research community. To take full advantage of existing large-scale immuno-genomic profiles, we developed Immu-Mela (http://bioinfo.vanderbilt.edu/database/Immu-Mela/), a multidimensional immuno-genomic portal that provides interactive exploration of associations between ICB responsiveness and multi-omics features in melanoma, including genetic, transcriptomics, immune cells, and single-cell populations. Immu-Mela also enables integrative analysis of any two genomic features. We demonstrated the value of Immu-Mela by identifying known and novel genomic features associated with ICB response. In addition, Immu-Mela allows users to upload their data sets (unrestricted to any cancer types) and co-analyze with existing data to identify and validate signatures of interest. Immu-Mela reduces barriers between researchers and complex genomic data, facilitating discoveries in cancer immunotherapy.",Immu-Mela,0.990034401,NA,0,Immu-Mela,0.990034401,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/14/2021 +34456903,http://wap-lab.org:3200/ImmuCellDB,"ImmuCellDB: An Indicative Database of Immune Cell Composition From Different Tissues and Disease Conditions in Mouse and Human. Immune cell composition is highly divergent across different tissues and diseases. A comprehensive resource of tissue immune cells across different conditions in mouse and human will thus provide great understanding of the immune microenvironment of many diseases. Recently, computational methods for estimating immune cell abundance from tissue transcriptome data have been developed and are now widely used. Using these computational tools, large-scale estimation of immune cell composition across tissues and conditions should be possible using gene expression data collected from public databases. In total, 266 tissue types and 706 disease types in humans, as well as 143 tissue types and 61 disease types, and 206 genotypes in mouse had been included in a database we have named ImmuCellDB (http://wap-lab.org:3200/ImmuCellDB/). In ImmuCellDB, users can search and browse immune cell proportions based on tissues, disease or genotype in mouse or humans. Additionally, the variation and correlation of immune cell abundance and gene expression level between different conditions can be compared and viewed in this database. We believe that ImmuCellDB provides not only an indicative view of tissue-dependent or disease-dependent immune cell profiles, but also represents an easy way to pre-determine immune cell abundance and gene expression profiles for specific situations.",ImmuCellDB,0.99683398,NA,0,ImmuCellDB,0.99683398,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/12/2021 +25326331,http://immuco.bjmu.edu.cn,"ImmuCo: a database of gene co-expression in immune cells. Current gene co-expression databases and correlation networks do not support cell-specific analysis. Gene co-expression and expression correlation are subtly different phenomena, although both are likely to be functionally significant. Here, we report a new database, ImmuCo (http://immuco.bjmu.edu.cn), which is a cell-specific database that contains information about gene co-expression in immune cells, identifying co-expression and correlation between any two genes. The strength of co-expression of queried genes is indicated by signal values and detection calls, whereas expression correlation and strength are reflected by Pearson correlation coefficients. A scatter plot of the signal values is provided to directly illustrate the extent of co-expression and correlation. In addition, the database allows the analysis of cell-specific gene expression profile across multiple experimental conditions and can generate a list of genes that are highly correlated with the queried genes. Currently, the database covers 18 human cell groups and 10 mouse cell groups, including 20,283 human genes and 20,963 mouse genes. More than 8.6 × 10(8) and 7.4 × 10(8) probe set combinations are provided for querying each human and mouse cell group, respectively. Sample applications support the distinctive advantages of the database.",ImmuCo,0.996915221,NA,0,ImmuCo,0.996915221,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/17/2014 +28124611,http://www.biominingbu.org/immunemir,"ImmunemiR - A Database of Prioritized Immune miRNA Disease Associations and its Interactome. Background MicroRNAs are the key regulators of gene expression and their abnormal expression in the immune system may be associated with several human diseases such as inflammation, cancer and autoimmune diseases. Elucidation of miRNA disease association through the interactome will deepen the understanding of its disease mechanisms. A specialized database for immune miRNAs is highly desirable to demonstrate the immune miRNA disease associations in the interactome. Methods miRNAs specific to immune related diseases were retrieved from curated databases such as HMDD, miR2disease and PubMed literature based on MeSH classification of immune system diseases. The additional data such as miRNA target genes, genes coding protein-protein interaction information were compiled from related resources. Further, miRNAs were prioritized to specific immune diseases using random walk ranking algorithm. Results In total 245 immune miRNAs associated with 92 OMIM disease categories were identified from external databases. The resultant data were compiled as ImmunemiR, a database of prioritized immune miRNA disease associations. This database provides both text based annotation information and network visualization of its interactome. Conclusion To our knowledge, ImmunemiR is the first available database to provide a comprehensive repository of human immune disease associated miRNAs with network visualization options of its target genes, protein-protein interactions (PPI) and its disease associations. It is freely available at http://www.biominingbu.org/immunemir/.",ImmunemiR,0.989842415,NA,0,ImmunemiR,0.989842415,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +26362267,http://immunet.princeton.edu,"Interactive Big Data Resource to Elucidate Human Immune Pathways and Diseases. Many functionally important interactions between genes and proteins involved in immunological diseases and processes are unknown. The exponential growth in public high-throughput data offers an opportunity to expand this knowledge. To unlock human-immunology-relevant insight contained in the global biomedical research effort, including all public high-throughput datasets, we performed immunological-pathway-focused Bayesian integration of a comprehensive, heterogeneous compendium comprising 38,088 genome-scale experiments. The distillation of this knowledge into immunological networks of functional relationships between molecular entities (ImmuNet), and tools to mine this resource, are accessible to the public at http://immunet.princeton.edu. The predictive capacity of ImmuNet, established by rigorous statistical validation, is easily accessed by experimentalists to generate data-driven hypotheses. We demonstrate the power of this approach through the identification of unique host-virus interaction responses, and we show how ImmuNet complements genetic studies by predicting disease-associated genes. ImmuNet should be widely beneficial for investigating the mechanisms of the human immune system and immunological diseases.",ImmuNet,0.993981779,NA,0,ImmuNet,0.993981779,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/8/2015 +25988315,http://202.85.212.211/Account/ImmuSort.html,"ImmuSort, a database on gene plasticity and electronic sorting for immune cells. Gene expression is highly dynamic and plastic. We present a new immunological database, ImmuSort. Unlike other gene expression databases, ImmuSort provides a convenient way to view global differential gene expression data across thousands of experimental conditions in immune cells. It enables electronic sorting, which is a bioinformatics process to retrieve cell states associated with specific experimental conditions that are mainly based on gene expression intensity. A comparison of gene expression profiles reveals other applications, such as the evaluation of immune cell biomarkers and cell subsets, identification of cell specific and/or disease-associated genes or transcripts, comparison of gene expression in different transcript variants and probe set quality evaluation. A plasticity score is introduced to measure gene plasticity. Average rank and marker evaluation scores are used to evaluate biomarkers. The current version includes 31 human and 17 mouse immune cell groups, comprising 10,422 and 3,929 microarrays derived from public databases, respectively. A total of 20,283 human and 20,963 mouse genes are available to query in the database. Examples show the distinct advantages of the database. The database URL is http://202.85.212.211/Account/ImmuSort.html.",ImmuSort,0.995690107,NA,0,ImmuSort,0.995690107,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/19/2015 +28977416,http://ccb-web.cs.uni-saarland.de/imota,"IMOTA: an interactive multi-omics tissue atlas for the analysis of human miRNA-target interactions. Web repositories for almost all 'omics' types have been generated-detailing the repertoire of representatives across different tissues or cell types. A logical next step is the combination of these valuable sources. With IMOTA (interactive multi omics tissue atlas), we developed a database that includes 23 725 relations between miRNAs and 23 tissues, 310 932 relations between mRNAs and the same tissues as well as 63 043 relations between proteins and the 23 tissues in Homo sapiens. IMOTA also contains data on tissue-specific interactions, e.g. information on 331 413 miRNAs and target gene pairs that are jointly expressed in the considered tissues. By using intuitive filter and visualization techniques, it is with minimal effort possible to answer various questions. These include rather general questions but also requests specific for genes, miRNAs or proteins. An example for a general task could be 'identify all miRNAs, genes and proteins in the lung that are highly expressed and where experimental evidence proves that the miRNAs target the genes'. An example for a specific request for a gene and a miRNA could for example be 'In which tissues is miR-34c and its target gene BCL2 expressed?'. The IMOTA repository is freely available online at https://ccb-web.cs.uni-saarland.de/imota/.",IMOTA,0.99115777,NA,0,IMOTA,0.99115777,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24194600,http://www.mousephenotype.org,"The International Mouse Phenotyping Consortium Web Portal, a unified point of access for knockout mice and related phenotyping data. The International Mouse Phenotyping Consortium (IMPC) web portal (http://www.mousephenotype.org) provides the biomedical community with a unified point of access to mutant mice and rich collection of related emerging and existing mouse phenotype data. IMPC mouse clinics worldwide follow rigorous highly structured and standardized protocols for the experimentation, collection and dissemination of data. Dedicated 'data wranglers' work with each phenotyping center to collate data and perform quality control of data. An automated statistical analysis pipeline has been developed to identify knockout strains with a significant change in the phenotype parameters. Annotation with biomedical ontologies allows biologists and clinicians to easily find mouse strains with phenotypic traits relevant to their research. Data integration with other resources will provide insights into mammalian gene function and human disease. As phenotype data become available for every gene in the mouse, the IMPC web portal will become an invaluable tool for researchers studying the genetic contributions of genes to human diseases.",IMPC,0.863769869,NA,0,IMPC,0.863769869,1,26340938,NA,low_prob_best_name,do not remove,do not merge,NA,NA,NA,NA,11/4/2013 +29531263,http://cb.imsc.res.in/imppat,"IMPPAT: A curated database of Indian Medicinal Plants, Phytochemistry And Therapeutics. Phytochemicals of medicinal plants encompass a diverse chemical space for drug discovery. India is rich with a flora of indigenous medicinal plants that have been used for centuries in traditional Indian medicine to treat human maladies. A comprehensive online database on the phytochemistry of Indian medicinal plants will enable computational approaches towards natural product based drug discovery. In this direction, we present, IMPPAT, a manually curated database of 1742 Indian Medicinal Plants, 9596 Phytochemicals, And 1124 Therapeutic uses spanning 27074 plant-phytochemical associations and 11514 plant-therapeutic associations. Notably, the curation effort led to a non-redundant in silico library of 9596 phytochemicals with standard chemical identifiers and structure information. Using cheminformatic approaches, we have computed the physicochemical, ADMET (absorption, distribution, metabolism, excretion, toxicity) and drug-likeliness properties of the IMPPAT phytochemicals. We show that the stereochemical complexity and shape complexity of IMPPAT phytochemicals differ from libraries of commercial compounds or diversity-oriented synthesis compounds while being similar to other libraries of natural products. Within IMPPAT, we have filtered a subset of 960 potential druggable phytochemicals, of which majority have no significant similarity to existing FDA approved drugs, and thus, rendering them as good candidates for prospective drugs. IMPPAT database is openly accessible at: https://cb.imsc.res.in/imppat .",IMPPAT,0.988455057,NA,0,IMPPAT,0.988455057,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/12/2018 +31284879,http://bioinfodbs.kantiana.ru/ImtRDB,"ImtRDB: a database and software for mitochondrial imperfect interspersed repeats annotation. Background Mitochondria is a powerhouse of all eukaryotic cells that have its own circular DNA (mtDNA) encoding various RNAs and proteins. Somatic perturbations of mtDNA are accumulating with age thus it is of great importance to uncover the main sources of mtDNA instability. Recent analyses demonstrated that somatic mtDNA deletions depend on imperfect repeats of various nature between distant mtDNA segments. However, till now there are no comprehensive databases annotating all types of imperfect repeats in numerous species with sequenced complete mitochondrial genome as well as there are no algorithms capable to call all types of imperfect repeats in circular mtDNA. Results We implemented naïve algorithm of pattern recognition by analogy to standard dot-plot construction procedures allowing us to find both perfect and imperfect repeats of four main types: direct, inverted, mirror and complementary. Our algorithm is adapted to specific characteristics of mtDNA such as circularity and an excess of short repeats - it calls imperfect repeats starting from the length of 10 b.p. We constructed interactive web available database ImtRDB depositing perfect and imperfect repeats positions in mtDNAs of more than 3500 Vertebrate species. Additional tools, such as visualization of repeats within a genome, comparison of repeat densities among different genomes and a possibility to download all results make this database useful for many biologists. Our first analyses of the database demonstrated that mtDNA imperfect repeats (i) are usually short; (ii) associated with unfolded DNA structures; (iii) four types of repeats positively correlate with each other forming two equivalent pairs: direct and mirror versus inverted and complementary, with identical nucleotide content and similar distribution between species; (iv) abundance of repeats is negatively associated with GC content; (v) dinucleotides GC versus CG are overrepresented on light chain of mtDNA covered by repeats. Conclusions ImtRDB is available at http://bioinfodbs.kantiana.ru/ImtRDB/ . It is accompanied by the software calling all types of interspersed repeats with different level of degeneracy in circular DNA. This database and software can become a very useful tool in various areas of mitochondrial and chloroplast DNA research.",ImtRDB,0.997494876,NA,0,ImtRDB,0.997494876,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/8/2019 +29892515,http://cib.cf.ocha.ac.jp/slc,"iMusta4SLC: Database for the structural property and variations of solute carrier transporters. Membrane transporter proteins play important roles in transport of nutrients into the cell, in transport of waste out of the cell, in maintenance of homeostasis, and in signal transduction. Solute carrier (SLC) transporter is the superfamily, which has the largest number of genes (>400 in humans) in membrane transporter and consists of 52 families. SLC transporters carry a wide variety of substrates such as amino acids, peptides, saccharides, ions, neurotransmitters, lipids, hormones and related materials. Despite the apparent importance for the substrate transport, the information of sequence variation and three-dimensional structures have not been integrated to the level of providing new knowledge on the relationship to, for instance, diseases. We, therefore, built a new database named iMusta4SLC, which is available at http://cib.cf.ocha.ac.jp/slc/, that connected the data of structural properties and of pathogenic mutations on human SLC transporters. iMusta4SLC helps to investigate the structural features of pathogenic mutations on SLC transporters. With this database, we found that the mutations at the conserved arginine were frequently involved in diseases, and were located at a border between the membrane and the cytoplasm. Especially in SLC families 2 and 22, the conserved residues formed a large cluster at the border. In SLC2A1, one third of the reported pathogenic missense mutations were found in this conserved cluster.",iMusta4SLC,0.976851185,NA,0,iMusta4SLC,0.976851185,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/27/2018 +22127860,http://indel.bioinfo.sdu.edu.cn,"IndelFR: a database of indels in protein structures and their flanking regions. Insertion/deletion (indel) is one of the most common methods of protein sequence variation. Recent studies showed that indels could affect their flanking regions and they are important for protein function and evolution. Here, we describe the Indel Flanking Region Database (IndelFR, http://indel.bioinfo.sdu.edu.cn), which provides sequence and structure information about indels and their flanking regions in known protein domains. The indels were obtained through the pairwise alignment of homologous structures in SCOP superfamilies. The IndelFR database contains 2,925,017 indels with flanking regions extracted from 373,402 structural alignment pairs of 12,573 non-redundant domains from 1053 superfamilies. IndelFR provides access to information about indels and their flanking regions, including amino acid sequences, lengths, locations, secondary structure constitutions, hydrophilicity/hydrophobicity, domain information, 3D structures and so on. IndelFR has already been used for molecular evolution studies and may help to promote future functional studies of indels and their flanking regions.",IndelFR,0.997826949,Indel Flanking Region Database,0.962372184,IndelFR,0.997826949,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2011 +23750084,http://www.indiamed.info,"InDiaMed: A Comprehensive Database of Indian Medicinal plants for Diabetes. Unlabelled According to International Diabetes Federation (IDF), India has 62.4 million people with diabetes and by 2030 it is predicted that the number will rise to 100 million. Studies claim that there are around 410 experimentally proven Indian medicinal plants which have anti-diabetic activity, of which the mechanism of action of 109 plants has been elucidated or reported. So, the need of the hour is to explore the claims of Indian medicinal flora and open up the facets of many Indian plants which are being examined for their beneficial role in diabetes. So, we created a database (InDiaMed) of Indian medicinal plants that captures their role in anti-diabetic activity. InDiaMed's features include chemical, pharmacological, biochemical and geographical information of the medicinal plant, scientifically relevant information of the plant, and the coherent research done on it in the field of diabetes. The database also includes the list of poly-herbal formulations which are used for treatment of diabetes in India. Availability http://www.indiamed.info.",InDiaMed,0.99003005,NA,0,InDiaMed,0.99003005,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/13/2013 +33095885,http://clingen.igib.res.in/indigen,"IndiGenomes: a comprehensive resource of genetic variants from over 1000 Indian genomes. With the advent of next-generation sequencing, large-scale initiatives for mining whole genomes and exomes have been employed to better understand global or population-level genetic architecture. India encompasses more than 17% of the world population with extensive genetic diversity, but is under-represented in the global sequencing datasets. This gave us the impetus to perform and analyze the whole genome sequencing of 1029 healthy Indian individuals under the pilot phase of the 'IndiGen' program. We generated a compendium of 55,898,122 single allelic genetic variants from geographically distinct Indian genomes and calculated the allele frequency, allele count, allele number, along with the number of heterozygous or homozygous individuals. In the present study, these variants were systematically annotated using publicly available population databases and can be accessed through a browsable online database named as 'IndiGenomes' http://clingen.igib.res.in/indigen/. The IndiGenomes database will help clinicians and researchers in exploring the genetic component underlying medical conditions. Till date, this is the most comprehensive genetic variant resource for the Indian population and is made freely available for academic utility. The resource has also been accessed extensively by the worldwide community since it's launch.",IndiGenomes,0.907384753,NA,0,IndiGenomes,0.907384753,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +24324765,http://www.cbrc.kaust.edu.sa/indigo,"INDIGO - INtegrated data warehouse of microbial genomes with examples from the red sea extremophiles. Background The next generation sequencing technologies substantially increased the throughput of microbial genome sequencing. To functionally annotate newly sequenced microbial genomes, a variety of experimental and computational methods are used. Integration of information from different sources is a powerful approach to enhance such annotation. Functional analysis of microbial genomes, necessary for downstream experiments, crucially depends on this annotation but it is hampered by the current lack of suitable information integration and exploration systems for microbial genomes. Results We developed a data warehouse system (INDIGO) that enables the integration of annotations for exploration and analysis of newly sequenced microbial genomes. INDIGO offers an opportunity to construct complex queries and combine annotations from multiple sources starting from genomic sequence to protein domain, gene ontology and pathway levels. This data warehouse is aimed at being populated with information from genomes of pure cultures and uncultured single cells of Red Sea bacteria and Archaea. Currently, INDIGO contains information from Salinisphaera shabanensis, Haloplasma contractile, and Halorhabdus tiamatea - extremophiles isolated from deep-sea anoxic brine lakes of the Red Sea. We provide examples of utilizing the system to gain new insights into specific aspects on the unique lifestyle and adaptations of these organisms to extreme environments. Conclusions We developed a data warehouse system, INDIGO, which enables comprehensive integration of information from various resources to be used for annotation, exploration and analysis of microbial genomes. It will be regularly updated and extended with new genomes. It is aimed to serve as a resource dedicated to the Red Sea microbes. In addition, through INDIGO, we provide our Automatic Annotation of Microbial Genomes (AAMG) pipeline. The INDIGO web server is freely available at http://www.cbrc.kaust.edu.sa/indigo.",INDIGO,0.95893389,Annotation of Microbial Genomes,0.724162723,INDIGO,0.95893389,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/6/2013 +29047407,http://fmf.igh.cnrs.fr/ISSAID/infevers,"A web-based collection of genotype-phenotype associations in hereditary recurrent fevers from the Eurofever registry. Background Hereditary recurrent fevers (HRF) are a group of rare monogenic diseases leading to recurrent inflammatory flares. A large number of variants has been described for the four genes associated with the best known HRF, namely MEFV, NLRP3, MVK, TNFRSF1A. The Infevers database ( http://fmf.igh.cnrs.fr/ISSAID/infevers ) is a large international registry collecting variants reported in these genes. However, no genotype-phenotype associations are provided, but only the clinical phenotype of the first patient(s) described for each mutation. The aim of this study is to develop a registry of genotype-phenotype associations observed in patients with HRF, enrolled and validated in the Eurofever registry. Results Genotype-phenotype associations observed in all the patients with HRF enrolled in the Eurofever registry were retrospectively analyzed. For autosomal dominant diseases (CAPS and TRAPS), all mutations were individually analyzed. For autosomal recessive diseases (FMF and MKD), homozygous and heterozygous combinations were described. Mean age of onset, disease course (recurrent or chronic), mean duration of fever episodes, clinical manifestations associated with fever episodes, atypical manifestations, complications and response to treatment were also studied. Data observed in 751 patients (346 FMF, 133 CAPS, 114 MKD, 158 TRAPS) included in the Eurofever registry and validated by experts were summarized in Tables. A total of 149 variants were described: 46 TNFRSF1A and 27 NLRP3 variants, as well as various combinations of 48 MVK and 28 MEFV variants were available. Conclusions We provide a potentially useful tool for physicians dealing with HRF, namely a registry of genotype-phenotype associations for patients enrolled in the Eurofever registry. This tool is complementary to the Infevers database and will be available at the Eurofever and Infevers websites.",Infevers,0.637892187,NA,0,Infevers,0.637892187,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/18/2017 +31142855,http://ibdmdb.org,"Multi-omics of the gut microbial ecosystem in inflammatory bowel diseases. Inflammatory bowel diseases, which include Crohn's disease and ulcerative colitis, affect several million individuals worldwide. Crohn's disease and ulcerative colitis are complex diseases that are heterogeneous at the clinical, immunological, molecular, genetic, and microbial levels. Individual contributing factors have been the focus of extensive research. As part of the Integrative Human Microbiome Project (HMP2 or iHMP), we followed 132 subjects for one year each to generate integrated longitudinal molecular profiles of host and microbial activity during disease (up to 24 time points each; in total 2,965 stool, biopsy, and blood specimens). Here we present the results, which provide a comprehensive view of functional dysbiosis in the gut microbiome during inflammatory bowel disease activity. We demonstrate a characteristic increase in facultative anaerobes at the expense of obligate anaerobes, as well as molecular disruptions in microbial transcription (for example, among clostridia), metabolite pools (acylcarnitines, bile acids, and short-chain fatty acids), and levels of antibodies in host serum. Periods of disease activity were also marked by increases in temporal variability, with characteristic taxonomic, functional, and biochemical shifts. Finally, integrative analysis identified microbial, biochemical, and host factors central to this dysregulation. The study's infrastructure resources, results, and data, which are available through the Inflammatory Bowel Disease Multi'omics Database ( http://ibdmdb.org ), provide the most comprehensive description to date of host and microbial activities in inflammatory bowel diseases.",NA,0,Inflammatory Bowel,0.714331329,Inflammatory Bowel,0.714331329,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/29/2019 +23180781,http://www.innatedb.com,"InnateDB: systems biology of innate immunity and beyond--recent updates and continuing curation. InnateDB (http://www.innatedb.com) is an integrated analysis platform that has been specifically designed to facilitate systems-level analyses of mammalian innate immunity networks, pathways and genes. In this article, we provide details of recent updates and improvements to the database. InnateDB now contains >196 000 human, mouse and bovine experimentally validated molecular interactions and 3000 pathway annotations of relevance to all mammalian cellular systems (i.e. not just immune relevant pathways and interactions). In addition, the InnateDB team has, to date, manually curated in excess of 18 000 molecular interactions of relevance to innate immunity, providing unprecedented insight into innate immunity networks, pathways and their component molecules. More recently, InnateDB has also initiated the curation of allergy- and asthma-related interactions. Furthermore, we report a range of improvements to our integrated bioinformatics solutions including web service access to InnateDB interaction data using Proteomics Standards Initiative Common Query Interface, enhanced Gene Ontology analysis for innate immunity, and the availability of new network visualizations tools. Finally, the recent integration of bovine data makes InnateDB the first integrated network analysis platform for this agriculturally important model organism.",InnateDB,0.998630166,NA,0,InnateDB,0.998630166,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/24/2012 +22120663,http://www.inoh.org,"INOH: ontology-based highly structured database of signal transduction pathways. The Integrating Network Objects with Hierarchies (INOH) database is a highly structured, manually curated database of signal transduction pathways including Mammalia, Xenopus laevis, Drosophila melanogaster, Caenorhabditis elegans and canonical. Since most pathway knowledge resides in scientific articles, the database focuses on curating and encoding textual knowledge into a machine-processable form. We use a hierarchical pathway representation model with a compound graph, and every pathway component in the INOH database is annotated by a set of uniquely developed ontologies. Finally, we developed the Similarity Search using the combination of a compound graph and hierarchical ontologies. The INOH database is to be a good resource for many users who want to analyze a large protein network. INOH ontologies and 73 signal transduction and 29 metabolic pathway diagrams (including over 6155 interactions and 3395 protein entities) are freely available in INOH XML and BioPAX formats. Database URL: http://www.inoh.org/",INOH,0.981793324,Integrating Network Objects with Hierarchies,0.783673547,INOH,0.981793324,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/26/2011 +"22080546, 26657633, 29190397, 33166387",http://www.insdc.org,"The International Nucleotide Sequence Database Collaboration. The members of the International Nucleotide Sequence Database Collaboration (INSDC; http://www.insdc.org) set out to capture, preserve and present globally comprehensive public domain nucleotide sequence information. The work of the long-standing collaboration includes the provision of data formats, annotation conventions and routine global data exchange. Among the many developments to INSDC resources in 2011 are the newly launched BioProject database and improved handling of assembly information. In this article, we outline INSDC services and update the reader on developments in 2011.",INSDC,0.978026807,International Nucleotide Sequence Database Collaboration,0.816639259,INSDC,0.978026807,4,23180798,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2021 +26578584,http://www.insect-genome.com,"InsectBase: a resource for insect genomes and transcriptomes. The genomes and transcriptomes of hundreds of insects have been sequenced. However, insect community lacks an integrated, up-to-date collection of insect gene data. Here, we introduce the first release of InsectBase, available online at http://www.insect-genome.com. The database encompasses 138 insect genomes, 116 insect transcriptomes, 61 insect gene sets, 36 gene families of 60 insects, 7544 miRNAs of 69 insects, 96,925 piRNAs of Drosophila melanogaster and Chilo suppressalis, 2439 lncRNA of Nilaparvata lugens, 22,536 pathways of 78 insects, 678,881 untranslated regions (UTR) of 84 insects and 160,905 coding sequences (CDS) of 70 insects. This release contains over 12 million sequences and provides search functionality, a BLAST server, GBrowse, insect pathway construction, a Facebook-like network for the insect community (iFacebook), and phylogenetic analysis of selected genes.",InsectBase,0.997729719,NA,0,InsectBase,0.997729719,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +33507270,http://www.insect-genome.com/Sexdb,"InSexBase: an annotated genomic resource of sex chromosomes and sex-biased genes in insects. . Sex determination and the regulation of sexual dimorphism are among the most fascinating topics in modern biology. As the most species-rich group of sexually reproducing organisms on Earth, insects have multiple sex determination systems. Though sex chromosomes and sex-biased genes are well-studied in dozens of insects, their gene sequences are scattered in various databases. Moreover, a shortage of annotation hinders the deep mining of these data. Here, we collected the chromosome-level sex chromosome data of 49 insect species, including 34 X chromosomes, 15 Z chromosomes, 5 W chromosomes and 2 Y chromosomes. We also obtained Y-linked contigs of four insects species-Anopheles gambiae, Drosophila innubila, Drosophila yakuba and Tribolium castaneum. The unannotated chromosome-level sex chromosomes were annotated using a standard pipeline, yielding a total of 123 030 protein-coding genes, 2 159 427 repeat sequences, 894 miRNAs, 1574 rRNAs, 5105 tRNAs, 395 snoRNAs (small nucleolar RNA), 54 snRNAs (small nuclear RNA) and 5959 other ncRNAs (non-coding RNA). In addition, 36 781 sex-biased genes were identified by analyzing 62 RNA-seq (RNA sequencing) datasets. Together with 5707 sex-biased genes from the Drosophila genus collected from the Sex-Associated Gene Database, we obtained a total of 42 488 sex-biased genes from 13 insect species. All these data were deposited into InSexBase, a new user-friendly database of insect sex chromosomes and sex-biased genes. Database URL: http://www.insect-genome.com/Sexdb/.",InSexBase,0.984699667,NA,0,InSexBase,0.984699667,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +23599502,http://instruct.yulab.org,"INstruct: a database of high-quality 3D structurally resolved protein interactome networks. Unlabelled INstruct is a database of high-quality, 3D, structurally resolved protein interactome networks in human and six model organisms. INstruct combines the scale of available high-quality binary protein interaction data with the specificity of atomic-resolution structural information derived from co-crystal evidence using a tested interaction interface inference method. Its web interface is designed to allow for flexible search based on standard and organism-specific protein and gene-naming conventions, visualization of protein architecture highlighting interaction interfaces and viewing and downloading custom 3D structurally resolved interactome datasets. Availability INstruct is freely available on the web at http://instruct.yulab.org with all major browsers supported.",INstruct,0.997730315,NA,0,INstruct,0.997730315,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/18/2013 +24234451,"http://www.ebi.ac.uk/intact, http://www.imexconsortium.org","The MIntAct project--IntAct as a common curation platform for 11 molecular interaction databases. IntAct (freely available at http://www.ebi.ac.uk/intact) is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. IntAct has developed a sophisticated web-based curation tool, capable of supporting both IMEx- and MIMIx-level curation. This tool is now utilized by multiple additional curation teams, all of whom annotate data directly into the IntAct database. Members of the IntAct team supply appropriate levels of training, perform quality control on entries and take responsibility for long-term data maintenance. Recently, the MINT and IntAct databases decided to merge their separate efforts to make optimal use of limited developer resources and maximize the curation output. All data manually curated by the MINT curators have been moved into the IntAct database at EMBL-EBI and are merged with the existing IntAct dataset. Both IntAct and MINT are active contributors to the IMEx consortium (http://www.imexconsortium.org).",IntAct,0.997548819,NA,0,IntAct,0.997548819,1,33206959,"22121220.0, 33206959.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/13/2013 +22121220,"http://www.imexconsortium.org, http://www.ebi.ac.uk/intact","The IntAct molecular interaction database in 2012. IntAct is an open-source, open data molecular interaction database populated by data either curated from the literature or from direct data depositions. Two levels of curation are now available within the database, with both IMEx-level annotation and less detailed MIMIx-compatible entries currently supported. As from September 2011, IntAct contains approximately 275,000 curated binary interaction evidences from over 5000 publications. The IntAct website has been improved to enhance the search process and in particular the graphical display of the results. New data download formats are also available, which will facilitate the inclusion of IntAct's data in the Semantic Web. IntAct is an active contributor to the IMEx consortium (http://www.imexconsortium.org). IntAct source code and data are freely available at http://www.ebi.ac.uk/intact.",IntAct,0.997128367,NA,0,IntAct,0.997128367,1,NA,"24234451.0, 33206959.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/24/2011 +33206959,http://www.ebi.ac.uk/intact,"The IMEx coronavirus interactome: an evolving map of Coronaviridae-host molecular interactions. . The current coronavirus disease of 2019 (COVID-19) pandemic, caused by the severe acute respiratory syndrome coronavirus (SARS-CoV)-2, has spurred a wave of research of nearly unprecedented scale. Among the different strategies that are being used to understand the disease and develop effective treatments, the study of physical molecular interactions can provide fine-grained resolution of the mechanisms behind the virus biology and the human organism response. We present a curated dataset of physical molecular interactions focused on proteins from SARS-CoV-2, SARS-CoV-1 and other members of the Coronaviridae family that has been manually extracted by International Molecular Exchange (IMEx) Consortium curators. Currently, the dataset comprises over 4400 binarized interactions extracted from 151 publications. The dataset can be accessed in the standard formats recommended by the Proteomics Standards Initiative (HUPO-PSI) at the IntAct database website (https://www.ebi.ac.uk/intact) and will be continuously updated as research on COVID-19 progresses.",IntAct,0.970127106,NA,0,IntAct,0.970127106,1,24234451,"22121220.0, 24234451.0",low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2020 +24001185,http://www.evocell.org/inTB,"inTB - a data integration platform for molecular and clinical epidemiological analysis of tuberculosis. Background Tuberculosis is currently the second highest cause of death from infectious diseases worldwide. The emergence of multi and extensive drug resistance is threatening to make tuberculosis incurable. There is growing evidence that the genetic diversity of Mycobacterium tuberculosis may have important clinical consequences. Therefore, combining genetic, clinical and socio-demographic data is critical to understand the epidemiology of this infectious disease, and how virulence and other phenotypic traits evolve over time. This requires dedicated bioinformatics platforms, capable of integrating and enabling analyses of this heterogeneous data. Results We developed inTB, a web-based system for integrated warehousing and analysis of clinical, socio-demographic and molecular data for Mycobacterium sp. isolates. As a database it can organize and display data from any of the standard genotyping methods (SNP, MIRU-VNTR, RFLP and spoligotype), as well as an extensive array of clinical and socio-demographic variables that are used in multiple countries to characterize the disease. Through the inTB interface it is possible to insert and download data, browse the database and search specific parameters. New isolates are automatically classified into strains according to an internal reference, and data uploaded or typed in is checked for internal consistency. As an analysis framework, the system provides simple, point and click analysis tools that allow multiple types of data plotting, as well as simple ways to download data for external analysis. Individual trees for each genotyping method are available, as well as a super tree combining all of them. The integrative nature of inTB grants the user the ability to generate trees for filtered subsets of data crossing molecular and clinical/socio-demografic information. inTB is built on open source software, can be easily installed locally and easily adapted to other diseases. Its design allows for use by research laboratories, hospitals or public health authorities. The full source code as well as ready to use packages is available at http://www.evocell.org/inTB. Conclusions To the best of our knowledge, this is the only system capable of integrating different types of molecular data with clinical and socio-demographic data, empowering researchers and clinicians with easy to use analysis tools that were not possible before.",inTB,0.991402507,NA,0,inTB,0.991402507,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/30/2013 +22493526,http://introndb.bicpu.edu.in,"IntDb: A comprehensive database for classified introns of saccharomyces & human. Unlabelled Introns (intra-genic) are non-coding regions of several eukaryotic genes. However, their role in regulation of transcription, embryonic development, stimulate gene (HEG) is apparent in recent years. Thus current research focuses on mutation in introns and their influence in causing various diseases. Though many available intron databases like YIDB, IDB, ExInt, GISSD, FUGOID, etc. discusses on various aspects of introns but none of them have classified the introns where identification of start intron is found to be important which mainly regulates the various activities of protein at gene level. This lead to an idea for development of ""Intdb""; a database meant for classifying introns as start, middle and stop on the basis of position of specific consensus site. Information provided in IntDb is useful for gene prediction, determination of splicing sites and identification of diseases. In addition, the main focus is on violation of consensus rule and frequency of other deviations observed in classified introns. Further, GC content, length variations according to the biased residues and occurrence of consensus pattern to discover potential role of introns is also emphasized in IntDb. Availability The database is available for free at http://introndb.bicpu.edu.in/",IntDb,0.989881754,NA,0,IntDb,0.989881754,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/17/2012 +22792232,http://www.hmpdacc-resources.org/imgm_hmp,"IMG/M-HMP: a metagenome comparative analysis system for the Human Microbiome Project. The Integrated Microbial Genomes and Metagenomes (IMG/M) resource is a data management system that supports the analysis of sequence data from microbial communities in the integrated context of all publicly available draft and complete genomes from the three domains of life as well as a large number of plasmids and viruses. IMG/M currently contains thousands of genomes and metagenome samples with billions of genes. IMG/M-HMP is an IMG/M data mart serving the US National Institutes of Health (NIH) Human Microbiome Project (HMP), focussed on HMP generated metagenome datasets, and is one of the central resources provided from the HMP Data Analysis and Coordination Center (DACC). IMG/M-HMP is available at http://www.hmpdacc-resources.org/imgm_hmp/.",IMG/M,0.961702744,Integrated Microbial Genomes and Metagenomes,0.979933851,Integrated Microbial Genomes and Metagenomes,0.979933851,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/5/2012 +31665416,"http://img.jgi.doe.gov/abc-public, http://gold.jgi.doe.gov","IMG-ABC v.5.0: an update to the IMG/Atlas of Biosynthetic Gene Clusters Knowledgebase. Microbial secondary metabolism is a reservoir of bioactive compounds of immense biotechnological and biomedical potential. The biosynthetic machinery responsible for the production of these secondary metabolites (SMs) (also called natural products) is often encoded by collocated groups of genes called biosynthetic gene clusters (BGCs). High-throughput genome sequencing of both isolates and metagenomic samples combined with the development of specialized computational workflows is enabling systematic identification of BGCs and the discovery of novel SMs. In order to advance exploration of microbial secondary metabolism and its diversity, we developed the largest publicly available database of predicted BGCs combined with experimentally verified BGCs, the Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters (IMG-ABC) (https://img.jgi.doe.gov/abc-public). Here we describe the first major content update of the IMG-ABC knowledgebase, since its initial release in 2015, refreshing the BGC prediction pipeline with the latest version of antiSMASH (v5) as well as presenting the data in the context of underlying environmental metadata sourced from GOLD (https://gold.jgi.doe.gov/). This update has greatly improved the quality and expanded the types of predicted BGCs compared to the previous version.",IMG-ABC,0.966223431,Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters,0.978851591,Integrated Microbial Genomes Atlas of Biosynthetic gene Clusters,0.978851591,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +30535108,http://interacdome.princeton.edu,"Systematic domain-based aggregation of protein structures highlights DNA-, RNA- and other ligand-binding positions. Domains are fundamental subunits of proteins, and while they play major roles in facilitating protein-DNA, protein-RNA and other protein-ligand interactions, a systematic assessment of their various interaction modes is still lacking. A comprehensive resource identifying positions within domains that tend to interact with nucleic acids, small molecules and other ligands would expand our knowledge of domain functionality as well as aid in detecting ligand-binding sites within structurally uncharacterized proteins. Here, we introduce an approach to identify per-domain-position interaction 'frequencies' by aggregating protein co-complex structures by domain and ascertaining how often residues mapping to each domain position interact with ligands. We perform this domain-based analysis on ∼91000 co-complex structures, and infer positions involved in binding DNA, RNA, peptides, ions or small molecules across 4128 domains, which we refer to collectively as the InteracDome. Cross-validation testing reveals that ligand-binding positions for 2152 domains are highly consistent and can be used to identify residues facilitating interactions in ∼63-69% of human genes. Our resource of domain-inferred ligand-binding sites should be a great aid in understanding disease etiology: whereas these sites are enriched in Mendelian-associated and cancer somatic mutations, they are depleted in polymorphisms observed across healthy populations. The InteracDome is available at http://interacdome.princeton.edu.",InteracDome,0.943789065,NA,0,InteracDome,0.943789065,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +22053089,http://biodev.cea.fr/interevol,"InterEvol database: exploring the structure and evolution of protein complex interfaces. Capturing how the structures of interacting partners evolved at their binding interfaces is a fundamental issue for understanding interactomes evolution. In that scope, the InterEvol database was designed for exploring 3D structures of homologous interfaces of protein complexes. For every chain forming a complex in the protein data bank (PDB), close and remote structural interologs were identified providing essential snapshots for studying interfaces evolution. The database provides tools to retrieve and visualize these structures. In addition, pre-computed multiple sequence alignments of most likely interologs retrieved from a wide range of species can be downloaded to enrich the analysis. The database can be queried either directly by pdb code or keyword but also from the sequence of one or two partners. Interologs multiple sequence alignments can also be recomputed online with tailored parameters using the InterEvolAlign facility. Last, an InterEvol PyMol plugin was developed to improve interactive exploration of structures versus sequence alignments at the interfaces of complexes. Based on a series of automatic methods to extract structural and sequence data, the database will be monthly updated. Structures coordinates and sequence alignments can be queried and downloaded from the InterEvol web interface at http://biodev.cea.fr/interevol/.",InterEvol,0.992820978,NA,0,InterEvol,0.992820978,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/3/2011 +25097383,http://intergenicdb.bioinfoucs.com,"IntergenicDB: a database for intergenic sequences. Unlabelled A whole genome contains not only coding regions, but also non-coding regions. These are located between the end of a given coding region and the beginning of the following coding region. For this reason, the information about gene regulation process underlies in intergenic regions. There is no easy way to obtain intergenic regions from current available databases. IntergenicDB was developed to integrate data of intergenic regions and their gene related information from NCBI databases. The main goal of INTERGENICDB is to offer friendly database for intergenic sequences of bacterial genomes. Availability http://intergenicdb.bioinfoucs.com/",IntergenicDB,0.977055252,NA,0,IntergenicDB,0.977055252,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/30/2014 +33502860,http://intermetaldb.biotech.uni.wroc.pl,"InterMetalDB: A Database and Browser of Intermolecular Metal Binding Sites in Macromolecules with Structural Information. InterMetalDB is a free-of-charge database and browser of intermolecular metal binding sites that are present on the interfaces of macromolecules forming larger assemblies based on structural information deposited in Protein Data Bank (PDB). It can be found and freely used at https://intermetaldb.biotech.uni.wroc.pl/. InterMetalDB collects the interfacial binding sites with involvement of metal ions and clusters them on the basis of 50% sequence similarity and the nearest metal environment (5 Å radius). The data are available through the web interface where they can be queried, viewed, and downloaded. Complexity of the query depends on the user, because the questions in the query are connected with each other by a logical AND. InterMetalDB offers several useful options for filtering records including searching for structures by particular parameters such as structure resolution, structure description, and date of deposition. Records can be filtered by coordinated metal ion, number of bound amino acid residues, coordination sphere, and other features. InterMetalDB is regularly updated and will continue to be regularly updated with new content in the future. InterMetalDB is a useful tool for all researchers interested in metalloproteins, protein engineering, and metal-driven oligomerization.",InterMetalDB,0.997499466,NA,0,InterMetalDB,0.997499466,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/27/2021 +23180798,http://www.insdc.org,"The International Nucleotide Sequence Database Collaboration. The International Nucleotide Sequence Database Collaboration (INSDC; http://www.insdc.org), one of the longest-standing global alliances of biological data archives, captures, preserves and provides comprehensive public domain nucleotide sequence information. Three partners of the INSDC work in cooperation to establish formats for data and metadata and protocols that facilitate reliable data submission to their databases and support continual data exchange around the world. In this article, the INSDC current status and update for the year of 2012 are presented. Among discussed items of international collaboration meeting in 2012, BioSample database and changes in submission are described as topics.",INSDC,0.902326147,International Nucleotide Sequence Database Collaboration,0.915257774,International Nucleotide Sequence Database Collaboration,0.915257774,1,"22080546.0, 26657633.0, 29190397.0, 33166387.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,11/24/2012 +31838187,http://isaregistries.org,"International Severe Asthma Registry: Mission Statement. Regional and/or national severe asthma registries provide valuable country-specific information. However, they are often limited in scope within the broader definitions of severe asthma, have insufficient statistical power to answer many research questions, lack intraoperability to share lessons learned, and have fundamental differences in data collected, making cross comparisons difficult. What is missing is a worldwide registry which brings all severe asthma data together in a cohesive way, under a single umbrella, based on standardized data collection protocols, permitting data to be shared seamlessly. The International Severe Asthma Registry (ISAR; http://isaregistries.org/) is the first global adult severe asthma registry. It is a joint initiative where national registries (both newly created and preexisting) retain ownership of their own data but open their borders and share data with ISAR for ethically approved research purposes. Its strength comes from collection of patient-level, anonymous, longitudinal, real-life, standardized, high-quality data (using a core set of variables) from countries across the world, combined with organizational structure, database experience, inclusivity/openness, and clinical, academic, and database expertise. This gives ISAR sufficient statistical power to answer important research questions, sufficient data standardization to compare across countries and regions, and the structure and expertise necessary to ensure its continuance and the scientific integrity and clinical applicability of its research. ISAR offers a unique opportunity to implement existing knowledge, generate new knowledge, and identify the unknown, therefore promoting new research. The aim of this commentary is to fully describe how ISAR may improve our understanding of severe asthma.",ISAR,0.945667505,International Severe Asthma Registry,0.96681873,International Severe Asthma Registry,0.96681873,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/12/2019 +"22096229, 25428371, 27899635, 30398656, 33156333",http://www.ebi.ac.uk/interpro,"InterPro in 2011: new developments in the family and domain prediction database. InterPro (http://www.ebi.ac.uk/interpro/) is a database that integrates diverse information about protein families, domains and functional sites, and makes it freely available to the public via Web-based interfaces and services. Central to the database are diagnostic models, known as signatures, against which protein sequences can be searched to determine their potential function. InterPro has utility in the large-scale analysis of whole genomes and meta-genomes, as well as in characterizing individual protein sequences. Herein we give an overview of new developments in the database and its associated software since 2009, including updates to database content, curation processes and Web and programmatic interfaces.",InterPro,0.995504975,NA,0,InterPro,0.995504975,5,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +22494395,http://www.interstoredb.org,"InterStoreDB: a generic integration resource for genetic and genomic data. Associating phenotypic traits and quantitative trait loci (QTL) to causative regions of the underlying genome is a key goal in agricultural research. InterStoreDB is a suite of integrated databases designed to assist in this process. The individual databases are species independent and generic in design, providing access to curated datasets relating to plant populations, phenotypic traits, genetic maps, marker loci and QTL, with links to functional gene annotation and genomic sequence data. Each component database provides access to associated metadata, including data provenance and parameters used in analyses, thus providing users with information to evaluate the relative worth of any associations identified. The databases include CropStoreDB, for management of population, genetic map, QTL and trait measurement data, SeqStoreDB for sequence-related data and AlignStoreDB, which stores sequence alignment information, and allows navigation between genetic and genomic datasets. Genetic maps are visualized and compared using the CMAP tool, and functional annotation from sequenced genomes is provided via an EnsEMBL-based genome browser. This framework facilitates navigation of the multiple biological domains involved in genetics and genomics research in a transparent manner within a single portal. We demonstrate the value of InterStoreDB as a tool for Brassica research. InterStoreDB is available from: http://www.interstoredb.org.",InterStoreDB,0.997662365,NA,0,InterStoreDB,0.997662365,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2012 +23282057,http://compbio.ddns.comp.nus.edu.sg:8080/IntPath,"IntPath--an integrated pathway gene relationship database for model organisms and important pathogens. Background Pathway data are important for understanding the relationship between genes, proteins and many other molecules in living organisms. Pathway gene relationships are crucial information for guidance, prediction, reference and assessment in biochemistry, computational biology, and medicine. Many well-established databases--e.g., KEGG, WikiPathways, and BioCyc--are dedicated to collecting pathway data for public access. However, the effectiveness of these databases is hindered by issues such as incompatible data formats, inconsistent molecular representations, inconsistent molecular relationship representations, inconsistent referrals to pathway names, and incomprehensive data from different databases. Results In this paper, we overcome these issues through extraction, normalization and integration of pathway data from several major public databases (KEGG, WikiPathways, BioCyc, etc). We build a database that not only hosts our integrated pathway gene relationship data for public access but also maintains the necessary updates in the long run. This public repository is named IntPath (Integrated Pathway gene relationship database for model organisms and important pathogens). Four organisms--S. cerevisiae, M. tuberculosis H37Rv, H. Sapiens and M. musculus--are included in this version (V2.0) of IntPath. IntPath uses the ""full unification"" approach to ensure no deletion and no introduced noise in this process. Therefore, IntPath contains much richer pathway-gene and pathway-gene pair relationships and much larger number of non-redundant genes and gene pairs than any of the single-source databases. The gene relationships of each gene (measured by average node degree) per pathway are significantly richer. The gene relationships in each pathway (measured by average number of gene pairs per pathway) are also considerably richer in the integrated pathways. Moderate manual curation are involved to get rid of errors and noises from source data (e.g., the gene ID errors in WikiPathways and relationship errors in KEGG). We turn complicated and incompatible xml data formats and inconsistent gene and gene relationship representations from different source databases into normalized and unified pathway-gene and pathway-gene pair relationships neatly recorded in simple tab-delimited text format and MySQL tables, which facilitates convenient automatic computation and large-scale referencing in many related studies. IntPath data can be downloaded in text format or MySQL dump. IntPath data can also be retrieved and analyzed conveniently through web service by local programs or through web interface by mouse clicks. Several useful analysis tools are also provided in IntPath. Conclusions We have overcome in IntPath the issues of compatibility, consistency, and comprehensiveness that often hamper effective use of pathway databases. We have included four organisms in the current release of IntPath. Our methodology and programs described in this work can be easily applied to other organisms; and we will include more model organisms and important pathogens in future releases of IntPath. IntPath maintains regular updates and is freely available at http://compbio.ddns.comp.nus.edu.sg:8080/IntPath.",IntPath,0.992235899,Integrated Pathway gene relationship database,0.725374699,IntPath,0.992235899,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/12/2012 +24178034,http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL,"IDEAL in 2014 illustrates interaction networks composed of intrinsically disordered proteins and their binding partners. IDEAL (Intrinsically Disordered proteins with Extensive Annotations and Literature, http://www.ideal.force.cs.is.nagoya-u.ac.jp/IDEAL/) is a collection of intrinsically disordered proteins (IDPs) that cannot adopt stable globular structures under physiological conditions. Since its previous publication in 2012, the number of entries in IDEAL has almost tripled (120 to 340). In addition to the increase in quantity, the quality of IDEAL has been significantly improved. The new IDEAL incorporates the interactions of IDPs and their binding partners more explicitly, and illustrates the protein-protein interaction (PPI) networks and the structures of protein complexes. Redundant experimental data are arranged based on the clustering of Protein Data Bank entries, and similar sequences with the same binding mode are grouped. As a result, the new IDEAL presents more concise and informative experimental data. Nuclear magnetic resonance (NMR) disorder is annotated in a systematic manner, by identifying the regions with large deviations among the NMR models. The ordered/disordered and new domain predictions by DICHOT are available, as well as the domain assignments by HMMER. Some examples of the PPI networks and the highly deviated regions derived from NMR models will be described, together with other advances. These enhancements will facilitate deeper understanding of IDPs, in terms of their flexibility, plasticity and promiscuity.",IDEAL,0.991460741,Intrinsically Disordered proteins with Extensive Annotations and Literature,0.993429282,Intrinsically Disordered proteins with Extensive Annotations and Literature,0.993429282,1,22067451,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,10/30/2013 +30949679,http://www.nextgenbioinformatics.org/IntronDB,"IntronDB: a database for eukaryotic intron features. Summary The rate and extent of unbalanced eukaryotic intron changes exhibit dynamic patterns for different lineages of species or certain functional groups of genes with varied spatio-temporal expression modes affected by selective pressure. To date, only a few key conserved splicing signals or regulatory elements have been identified in introns and little is known about the remaining intronic regions. To trace the evolutionary trajectory of spliceosomal introns from available genomes under a unified framework, we present IntronDB, which catalogs ∼50 000 000 introns from over 1000 genomes spanning the major eukaryotic clades in the tree of life. Based on the position of introns relative to coding regions, it categorizes introns into three groups, such as 5'UTR, CDS and 3'UTR and subsequently divides CDS introns into three categories, such as phase 0, phase 1 and phase 2. It provides the quality evaluation for each sequence entry and characterizes the intronic parameters including number, size, sequence composition and positioning information as well as the features for exons and genes, making possible the comparisons between introns and exons. It reports the dinucleotides around the intron boundary and displays the consensus sequence features for all introns, small introns and large introns for each genome. By incorporating the taxonomic assignment of genomes, it performs high-level or genome-wide statistical analysis for single feature and coupled features both in a single genome and across multiple genomes. It offers functionalities to browse the data from representative protein-coding transcripts and download the data from all transcripts from protein-coding genes. Availability and implementation http://www.nextgenbioinformatics.org/IntronDB. Supplementary information Supplementary data are available at Bioinformatics online.",IntronDB,0.99790293,NA,0,IntronDB,0.99790293,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2019 +27888793,http://ciencias.medellin.unal.edu.co/gruposdeinvestigacion/prospeccionydisenobiomoleculas/InverPep/public/home_en,"InverPep: A database of invertebrate antimicrobial peptides. Objectives The aim of this work was to construct InverPep, a database specialised in experimentally validated antimicrobial peptides (AMPs) from invertebrates. Methods AMP data contained in InverPep were manually curated from other databases and the scientific literature. MySQL was integrated with the development platform Laravel; this framework allows to integrate programming in PHP with HTML and was used to design the InverPep web page's interface. InverPep contains 18 separated fields, including InverPep code, phylum and species source, peptide name, sequence, peptide length, secondary structure, molar mass, charge, isoelectric point, hydrophobicity, Boman index, aliphatic index and percentage of hydrophobic amino acids. CALCAMPI, an algorithm to calculate the physicochemical properties of multiple peptides simultaneously, was programmed in PERL language. Results To date, InverPep contains 702 experimentally validated AMPs from invertebrate species. All of the peptides contain information associated with their source, physicochemical properties, secondary structure, biological activity and links to external literature. Most AMPs in InverPep have a length between 10 and 50 amino acids, a positive charge, a Boman index between 0 and 2 kcal/mol, and 30-50% hydrophobic amino acids. InverPep includes 33 AMPs not reported in other databases. Besides, CALCAMPI and statistical analysis of InverPep data is presented. The InverPep database is available in English and Spanish. Conclusions InverPep is a useful database to study invertebrate AMPs and its information could be used for the design of new peptides. The user-friendly interface of InverPep and its information can be freely accessed via a web-based browser at http://ciencias.medellin.unal.edu.co/gruposdeinvestigacion/prospeccionydisenobiomoleculas/InverPep/public/home_en.",InverPep,0.99586463,NA,0,InverPep,0.99586463,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/19/2016 +24253300,http://invfestdb.uab.cat,"InvFEST, a database integrating information of polymorphic inversions in the human genome. The newest genomic advances have uncovered an unprecedented degree of structural variation throughout genomes, with great amounts of data accumulating rapidly. Here we introduce InvFEST (http://invfestdb.uab.cat), a database combining multiple sources of information to generate a complete catalogue of non-redundant human polymorphic inversions. Due to the complexity of this type of changes and the underlying high false-positive discovery rate, it is necessary to integrate all the available data to get a reliable estimate of the real number of inversions. InvFEST automatically merges predictions into different inversions, refines the breakpoint locations, and finds associations with genes and segmental duplications. In addition, it includes data on experimental validation, population frequency, functional effects and evolutionary history. All this information is readily accessible through a complete and user-friendly web report for each inversion. In its current version, InvFEST combines information from 34 different studies and contains 1092 candidate inversions, which are categorized based on internal scores and manual curation. Therefore, InvFEST aims to represent the most reliable set of human inversions and become a central repository to share information, guide future studies and contribute to the analysis of the functional and evolutionary impact of inversions on the human genome.",InvFEST,0.995598376,NA,0,InvFEST,0.995598376,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2013 +23046449,http://bioinfo.hsc.unt.edu/ipad,"IPAD: the Integrated Pathway Analysis Database for Systematic Enrichment Analysis. Background Next-Generation Sequencing (NGS) technologies and Genome-Wide Association Studies (GWAS) generate millions of reads and hundreds of datasets, and there is an urgent need for a better way to accurately interpret and distill such large amounts of data. Extensive pathway and network analysis allow for the discovery of highly significant pathways from a set of disease vs. healthy samples in the NGS and GWAS. Knowledge of activation of these processes will lead to elucidation of the complex biological pathways affected by drug treatment, to patient stratification studies of new and existing drug treatments, and to understanding the underlying anti-cancer drug effects. There are approximately 141 biological human pathway resources as of Jan 2012 according to the Pathguide database. However, most currently available resources do not contain disease, drug or organ specificity information such as disease-pathway, drug-pathway, and organ-pathway associations. Systematically integrating pathway, disease, drug and organ specificity together becomes increasingly crucial for understanding the interrelationships between signaling, metabolic and regulatory pathway, drug action, disease susceptibility, and organ specificity from high-throughput omics data (genomics, transcriptomics, proteomics and metabolomics). Results We designed the Integrated Pathway Analysis Database for Systematic Enrichment Analysis (IPAD, http://bioinfo.hsc.unt.edu/ipad), defining inter-association between pathway, disease, drug and organ specificity, based on six criteria: 1) comprehensive pathway coverage; 2) gene/protein to pathway/disease/drug/organ association; 3) inter-association between pathway, disease, drug, and organ; 4) multiple and quantitative measurement of enrichment and inter-association; 5) assessment of enrichment and inter-association analysis with the context of the existing biological knowledge and a ""gold standard"" constructed from reputable and reliable sources; and 6) cross-linking of multiple available data sources.IPAD is a comprehensive database covering about 22,498 genes, 25,469 proteins, 1956 pathways, 6704 diseases, 5615 drugs, and 52 organs integrated from databases including the BioCarta, KEGG, NCI-Nature curated, Reactome, CTD, PharmGKB, DrugBank, PharmGKB, and HOMER. The database has a web-based user interface that allows users to perform enrichment analysis from genes/proteins/molecules and inter-association analysis from a pathway, disease, drug, and organ.Moreover, the quality of the database was validated with the context of the existing biological knowledge and a ""gold standard"" constructed from reputable and reliable sources. Two case studies were also presented to demonstrate: 1) self-validation of enrichment analysis and inter-association analysis on brain-specific markers, and 2) identification of previously undiscovered components by the enrichment analysis from a prostate cancer study. Conclusions IPAD is a new resource for analyzing, identifying, and validating pathway, disease, drug, organ specificity and their inter-associations. The statistical method we developed for enrichment and similarity measurement and the two criteria we described for setting the threshold parameters can be extended to other enrichment applications. Enriched pathways, diseases, drugs, organs and their inter-associations can be searched, displayed, and downloaded from our online user interface. The current IPAD database can help users address a wide range of biological pathway related, disease susceptibility related, drug target related and organ specificity related questions in human disease studies.",IPAD,0.993825734,Integrated Pathway Analysis Database for Systematic Enrichment Analysis,0.92183505,IPAD,0.993825734,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/11/2012 +25388589,http://ento.njau.edu.cn/ipath,"iPathCons and iPathDB: an improved insect pathway construction tool and the database. . Insects are one of the most successful animal groups on earth. Some insects, such as the silkworm and honeybee, are beneficial to humans, whereas others are notorious pests of crops. At present, the genomes of 38 insects have been sequenced and made publically available. In addition, the transcriptomes of dozens of insects have been sequenced. As gene data rapidly accumulate, constructing the pathway of molecular interactions becomes increasingly important for entomological research. Here, we developed an improved tool, iPathCons, for knowledge-based construction of pathways from the transcriptomes or the official gene sets of genomes. Considering the high evolution diversity in insects, iPathCons uses a voting system for Kyoto Encyclopedia of Genes and Genomes Orthology assignment. Both stand-alone software and a web server of iPathCons are provided. Using iPathCons, we constructed the pathways of molecular interactions of 52 insects, including 37 genome-sequenced and 15 transcriptome-sequenced ones. These pathways are available in the iPathDB, which provides searches, web server, data downloads, etc. This database will be highly useful for the insect research community. Database URL: http://ento.njau.edu.cn/ipath/",iPathCons,0.982198,NA,0,iPathCons,0.982198,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +22140115,http://ipavs.cidms.org,"IPAVS: Integrated Pathway Resources, Analysis and Visualization System. Integrated Pathway Resources, Analysis and Visualization System (iPAVS) is an integrated biological pathway database designed to support pathway discovery in the fields of proteomics, transcriptomics, metabolomics and systems biology. The key goal of IPAVS is to provide biologists access to expert-curated pathways from experimental data belonging to specific biological contexts related to cell types, tissues, organs and diseases. IPAVS currently integrates over 500 human pathways (consisting of 24, 574 interactions) that include metabolic-, signaling- and disease-related pathways, drug-action pathways and several large process maps collated from other pathway resources. IPAVS web interface allows biologists to browse and search pathway resources and provides tools for data import, management, visualization and analysis to support the interpretation of biological data in light of cellular processes. Systems Biology Graphical Notations (SBGN) and Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway notations are used for the visual display of pathway information. The integrated datasets in IPAVS are made available in several standard data formats that can be downloaded. IPAVS is available at: http://ipavs.cidms.org.",IPAVS,0.987295449,Integrated Pathway Resources,0.952554718,IPAVS,0.987295449,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/2/2011 +"23180793, 25414341, 29858800, 31641782",http://www.ebi.ac.uk/ipd,"IPD--the Immuno Polymorphism Database. The Immuno Polymorphism Database (IPD), http://www.ebi.ac.uk/ipd/ is a set of specialist databases related to the study of polymorphic genes in the immune system. The IPD project works with specialist groups or nomenclature committees who provide and curate individual sections before they are submitted to IPD for online publication. The IPD project stores all the data in a set of related databases. IPD currently consists of four databases: IPD-KIR, contains the allelic sequences of killer-cell immunoglobulin-like receptors, IPD-MHC, a database of sequences of the major histocompatibility complex of different species; IPD-HPA, alloantigens expressed only on platelets; and IPD-ESTDAB, which provides access to the European Searchable Tumour Cell-Line Database, a cell bank of immunologically characterized melanoma cell lines. The data is currently available online from the website and FTP directory. This article describes the latest updates and additional tools added to the IPD project.",IPD,0.984152168,The Immuno Polymorphism Database,0.944300856,IPD,0.984152168,4,NA,25048120,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/22/2019 +25048120,"http://www.ebi.ac.uk/ipd/imgt/hla/, http://www.ebi.ac.uk/ipd","IMGT/HLA and the Immuno Polymorphism Database. The IMGT/HLA Database (http://www.ebi.ac.uk/ipd/imgt/hla/) was first released over 15 years ago, providing the HLA community with a searchable repository of highly curated HLA sequences. The HLA complex is located within the 6p21.3 region of human chromosome 6 and contains more than 220 genes of diverse function. Many of the genes encode proteins of the immune system and are highly polymorphic, with some genes currently having over 3,000 known allelic variants. The Immuno Polymorphism Database (IPD) (http://www.ebi.ac.uk/ipd/) expands on this model, with a further set of specialist databases related to the study of polymorphic genes in the immune system. The IPD project works with specialist groups or nomenclature committees who provide and curate individual sections before they are submitted to IPD for online publication. IPD currently consists of four databases: IPD-KIR contains the allelic sequences of killer-cell immunoglobulin-like receptors; IPD-MHC is a database of sequences of the major histocompatibility complex of different species; IPD-HPA, alloantigens expressed only on platelets; and IPD-ESTDAB, which provides access to the European Searchable Tumour Cell-Line Database, a cell bank of immunologically characterized melanoma cell lines. Through the work of the HLA Informatics Group and in collaboration with the European Bioinformatics Institute we are able to provide public access to this data through the website http://www.ebi.ac.uk/ipd/.",IPD,0.978224158,Immuno Polymorphism Database,0.807653854,IPD,0.978224158,1,NA,"23180793.0, 25414341.0, 29858800.0, 31641782.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2014 +31667505,http://www.ebi.ac.uk/ipd/imgt/hla,"IPD-IMGT/HLA Database. The IPD-IMGT/HLA Database, http://www.ebi.ac.uk/ipd/imgt/hla/, currently contains over 25 000 allele sequence for 45 genes, which are located within the Major Histocompatibility Complex (MHC) of the human genome. This region is the most polymorphic region of the human genome, and the levels of polymorphism seen exceed most other genes. Some of the genes have several thousand variants and are now termed hyperpolymorphic, rather than just simply polymorphic. The IPD-IMGT/HLA Database has provided a stable, highly accessible, user-friendly repository for this information, providing the scientific and medical community access to the many variant sequences of this gene system, that are critical for the successful outcome of transplantation. The number of currently known variants, and dramatic increase in the number of new variants being identified has necessitated a dedicated resource with custom tools for curation and publication. The challenge for the database is to continue to provide a highly curated database of sequence variants, while supporting the increased number of submissions and complexity of sequences. In order to do this, traditional methods of accessing and presenting data will be challenged, and new methods will need to be utilized to keep pace with new discoveries.",IPD-IMGT,0.80879631,NA,0,IPD-IMGT,0.80879631,1,26826444,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2020 +26826444,http://www.ebi.ac.uk/ipd/imgt/hla,"The IPD-IMGT/HLA Database - New developments in reporting HLA variation. IPD-IMGT/HLA is a constituent of the Immuno Polymorphism Database (IPD), which was developed to provide a centralised system for the study of polymorphism in genes of the immune system. The IPD project works with specialist groups of nomenclature committees who provide and curate individual sections before they are submitted to IPD for online publication. The primary database within the IPD project is the IPD-IMGT/HLA Database, which provides a locus-specific database for the hyper-polymorphic allele sequences of the genes in the HLA system, also known as the human Major Histocompatibility Complex. The IPD-IMGT/HLA Database was first released over 17 years ago, building on the work of the WHO Nomenclature Committee for Factors of the HLA system that was initiated in 1968. The IPD-IMGT/HLA Database enhanced this work by providing the HLA community with an online, searchable repository of highly curated HLA sequences. Many of the genes encode proteins of the immune system and are hyper polymorphic, with some genes currently having over 4000 known allelic variants. Through the work of the HLA Informatics Group and in collaboration with the European Bioinformatics Institute we are able to provide public access to this data through the website, http://www.ebi.ac.uk/ipd/imgt/hla.",IPD-IMGT/HLA,0.737059861,Polymorphism,0.601328015,IPD-IMGT/HLA,0.737059861,1,31667505,NA,low_prob_best_name,do not remove,conflicting record(s) to be removed,NA,NA,NA,NA,1/27/2016 +27899604,http://www.ebi.ac.uk/ipd/mhc,"IPD-MHC 2.0: an improved inter-species database for the study of the major histocompatibility complex. The IPD-MHC Database project (http://www.ebi.ac.uk/ipd/mhc/) collects and expertly curates sequences of the major histocompatibility complex from non-human species and provides the infrastructure and tools to enable accurate analysis. Since the first release of the database in 2003, IPD-MHC has grown and currently hosts a number of specific sections, with more than 7000 alleles from 70 species, including non-human primates, canines, felines, equids, ovids, suids, bovins, salmonids and murids. These sequences are expertly curated and made publicly available through an open access website. The IPD-MHC Database is a key resource in its field, and this has led to an average of 1500 unique visitors and more than 5000 viewed pages per month. As the database has grown in size and complexity, it has created a number of challenges in maintaining and organizing information, particularly the need to standardize nomenclature and taxonomic classification, while incorporating new allele submissions. Here, we describe the latest database release, the IPD-MHC 2.0 and discuss planned developments. This release incorporates sequence updates and new tools that enhance database queries and improve the submission procedure by utilizing common tools that are able to handle the varied requirements of each MHC-group.",IPD-MHC,0.969788027,NA,0,IPD-MHC,0.969788027,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/28/2016 +24297255,http://ipfam.org,"iPfam: a database of protein family and domain interactions found in the Protein Data Bank. The database iPfam, available at http://ipfam.org, catalogues Pfam domain interactions based on known 3D structures that are found in the Protein Data Bank, providing interaction data at the molecular level. Previously, the iPfam domain-domain interaction data was integrated within the Pfam database and website, but it has now been migrated to a separate database. This allows for independent development, improving data access and giving clearer separation between the protein family and interactions datasets. In addition to domain-domain interactions, iPfam has been expanded to include interaction data for domain bound small molecule ligands. Functional annotations are provided from source databases, supplemented by the incorporation of Wikipedia articles where available. iPfam (version 1.0) contains >9500 domain-domain and 15 500 domain-ligand interactions. The new website provides access to this data in a variety of ways, including interactive visualizations of the interaction data.",iPfam,0.995806396,NA,0,iPfam,0.995806396,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2013 +31231133,http://www.igb.cnr.it/ipgb,"The Incontinentia Pigmenti Genetic Biobank: study design and cohort profile to facilitate research into a rare disease worldwide. Incontinentia pigmenti (IP; OMIM#308300) is a rare genetic disease resulting in neuroectodermal defects, which can lead to disability. At present, there is neither definitive cure available nor are there any sufficiently reliable insights to predict the severity of the disease. We launched the Incontinentia Pigmenti Genetic Biobank (IPGB) project ( http://www.igb.cnr.it/ipgb ) in 2015 to establish a large-scale deposit of biological samples, to provide detailed clinical information about children diagnosed with IP and to facilitate research. We have built a cohort comprising samples of 381 clinically confirmed patients with IP and 633 healthy individuals recruited through IP patients' associations. The collection includes 269 trios, 83 duos, and 95 families with at least two affected members and represents an extensive dataset (200 cooperative medical institutes, 139 in Italy and 61 worldwide) that enables a comprehensive phenotyping. Joining the IPGB guarantees all participants access to the results including the genetic testing of IP and the long-term storage of the samples. The IPGB is the largest IP sample collection and one of the largest rare-disease-oriented collections in the world and will be open to requests for access to data by the national and international scientific community.",IPGB,0.957984984,Incontinentia Pigmenti Genetic Biobank,0.896460912,IPGB,0.957984984,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/23/2019 +26432833,http://www.ippidb.cdithem.fr,"iPPI-DB: an online database of modulators of protein-protein interactions. In order to boost the identification of low-molecular-weight drugs on protein-protein interactions (PPI), it is essential to properly collect and annotate experimental data about successful examples. This provides the scientific community with the necessary information to derive trends about privileged physicochemical properties and chemotypes that maximize the likelihood of promoting a given chemical probe to the most advanced stages of development. To this end we have developed iPPI-DB (freely accessible at http://www.ippidb.cdithem.fr), a database that contains the structure, some physicochemical characteristics, the pharmacological data and the profile of the PPI targets of several hundreds modulators of protein-protein interactions. iPPI-DB is accessible through a web application and can be queried according to two general approaches: using physicochemical/pharmacological criteria; or by chemical similarity to a user-defined structure input. In both cases the results are displayed as a sortable and exportable datasheet with links to external databases such as Uniprot, PubMed. Furthermore each compound in the table has a link to an individual ID card that contains its physicochemical and pharmacological profile derived from iPPI-DB data. This includes information about its binding data, ligand and lipophilic efficiencies, location in the PPI chemical space, and importantly similarity with known drugs, and links to external databases like PubChem, and ChEMBL.",iPPI-DB,0.996882915,NA,0,iPPI-DB,0.996882915,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2015 +30397019,http://www.flyrnai.org/tools/iproteindb,"iProteinDB: An Integrative Database of Drosophila Post-translational Modifications. Post-translational modification (PTM) serves as a regulatory mechanism for protein function, influencing their stability, interactions, activity and localization, and is critical in many signaling pathways. The best characterized PTM is phosphorylation, whereby a phosphate is added to an acceptor residue, most commonly serine, threonine and tyrosine in metazoans. As proteins are often phosphorylated at multiple sites, identifying those sites that are important for function is a challenging problem. Considering that any given phosphorylation site might be non-functional, prioritizing evolutionarily conserved phosphosites provides a general strategy to identify the putative functional sites. To facilitate the identification of conserved phosphosites, we generated a large-scale phosphoproteomics dataset from Drosophila embryos collected from six closely-related species. We built iProteinDB (https://www.flyrnai.org/tools/iproteindb/), a resource integrating these data with other high-throughput PTM datasets, including vertebrates, and manually curated information for Drosophila At iProteinDB, scientists can view the PTM landscape for any Drosophila protein and identify predicted functional phosphosites based on a comparative analysis of data from closely-related Drosophila species. Further, iProteinDB enables comparison of PTM data from Drosophila to that of orthologous proteins from other model organisms, including human, mouse, rat, Xenopus tropicalis, Danio rerio, and Caenorhabditis elegans.",iProteinDB,0.996118069,NA,0,iProteinDB,0.996118069,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/9/2019 +30252093,http://www.iprox.org,"iProX: an integrated proteome resource. Sharing of research data in public repositories has become best practice in academia. With the accumulation of massive data, network bandwidth and storage requirements are rapidly increasing. The ProteomeXchange (PX) consortium implements a mode of centralized metadata and distributed raw data management, which promotes effective data sharing. To facilitate open access of proteome data worldwide, we have developed the integrated proteome resource iProX (http://www.iprox.org) as a public platform for collecting and sharing raw data, analysis results and metadata obtained from proteomics experiments. The iProX repository employs a web-based proteome data submission process and open sharing of mass spectrometry-based proteomics datasets. Also, it deploys extensive controlled vocabularies and ontologies to annotate proteomics datasets. Users can use a GUI to provide and access data through a fast Aspera-based transfer tool. iProX is a full member of the PX consortium; all released datasets are freely accessible to the public. iProX is based on a high availability architecture and has been deployed as part of the proteomics infrastructure of China, ensuring long-term and stable resource support. iProX will facilitate worldwide data analysis and sharing of proteomics experiments.",iProX,0.984180152,NA,0,iProX,0.984180152,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +"28150246, 29145615",http://proteininformationresource.org/iPTMnet,"iPTMnet: Integrative Bioinformatics for Studying PTM Networks. Protein post-translational modification (PTM) is an essential cellular regulatory mechanism, and disruptions in PTM have been implicated in disease. PTMs are an active area of study in many fields, leading to a wealth of PTM information in the scientific literature. There is a need for user-friendly bioinformatics resources that capture PTM information from the literature and support analyses of PTMs and their functional consequences. This chapter describes the use of iPTMnet ( http://proteininformationresource.org/iPTMnet/ ), a resource that integrates PTM information from text mining, curated databases, and ontologies and provides visualization tools for exploring PTM networks, PTM crosstalk, and PTM conservation across species. We present several PTM-related queries and demonstrate how they can be addressed using iPTMnet.",iPTMnet,0.997314811,NA,0,iPTMnet,0.997314811,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24030781,http://IQdb.cbi.pku.edu.cn,"IQdb: an intelligence quotient score-associated gene resource for human intelligence. Intelligence quotient (IQ) is the most widely used phenotype to characterize human cognitive abilities. Recent advances in studies on human intelligence have identified many new susceptibility genes. However, the genetic mechanisms involved in IQ score and the relationship between IQ score and the risk of mental disorders have won little attention. To address the genetic complexity of IQ score, we have developed IQdb (http://IQdb.cbi.pku.edu.cn), a publicly available database for exploring IQ-associated human genes. In total, we collected 158 experimental verified genes from literature as a core dataset in IQdb. In addition, 46 genomic regions related to IQ score have been curated from literature. Based on the core dataset and 46 confirmed linked genomic regions, more than 6932 potential IQ-related genes are expanded using data of protein-protein interactions. A systematic gene ranking approach was applied to all the collected and expanded genes to represent the relative importance of all the 7090 genes in IQdb. Our further systematic pathway analysis reveals that IQ-associated genes are significantly enriched in multiple signal events, especially related to cognitive systems. Of the 158 genes in the core dataset, 81 are involved in various psychotic and mental disorders. This comprehensive gene resource illustrates the importance of IQdb to our understanding on human intelligence, and highlights the utility of IQdb for elucidating the functions of IQ-associated genes and the cross-talk mechanisms among cognition-related pathways in some mental disorders for community. Database URL: http://IQdb.cbi.pku.edu.cn.",IQdb,0.993846714,NA,0,IQdb,0.993846714,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/11/2013 +28168018,http://irbas.cesab.org,"IRBAS: An online database to collate, analyze, and synthesize data on the biodiversity and ecology of intermittent rivers worldwide. Key questions dominating contemporary ecological research and management concern interactions between biodiversity, ecosystem processes, and ecosystem services provision in the face of global change. This is particularly salient for freshwater biodiversity and in the context of river drying and flow-regime change. Rivers that stop flowing and dry, herein intermittent rivers, are globally prevalent and dynamic ecosystems on which the body of research is expanding rapidly, consistent with the era of big data. However, the data encapsulated by this work remain largely fragmented, limiting our ability to answer the key questions beyond a case-by-case basis. To this end, the Intermittent River Biodiversity Analysis and Synthesis (IRBAS; http://irbas.cesab.org) project has collated, analyzed, and synthesized data from across the world on the biodiversity and environmental characteristics of intermittent rivers. The IRBAS database integrates and provides free access to these data, contributing to the growing, and global, knowledge base on these ubiquitous and important river systems, for both theoretical and applied advancement. The IRBAS database currently houses over 2000 data samples collected from six countries across three continents, primarily describing aquatic invertebrate taxa inhabiting intermittent rivers during flowing hydrological phases. As such, there is room to expand the biogeographic and taxonomic coverage, for example, through addition of data collected during nonflowing and dry hydrological phases. We encourage contributions and provide guidance on how to contribute and access data. Ultimately, the IRBAS database serves as a portal, storage, standardization, and discovery tool, enabling collation, synthesis, and analysis of data to elucidate patterns in river biodiversity and guide management. Contribution creates high visibility for datasets, facilitating collaboration. The IRBAS database will grow in content as the study of intermittent rivers continues and data retrieval will allow for networking, meta-analyses, and testing of generalizations across multiple systems, regions, and taxa.",IRBAS,0.990202904,Intermittent River Biodiversity Analysis and Synthesis,0.962772516,IRBAS,0.990202904,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/3/2017 +"22260278, 27679478",http://www.fludb.org,"Influenza research database: an integrated bioinformatics resource for influenza research and surveillance. Background The recent emergence of the 2009 pandemic influenza A/H1N1 virus has highlighted the value of free and open access to influenza virus genome sequence data integrated with information about other important virus characteristics. Design The Influenza Research Database (IRD, http://www.fludb.org) is a free, open, publicly-accessible resource funded by the U.S. National Institute of Allergy and Infectious Diseases through the Bioinformatics Resource Centers program. IRD provides a comprehensive, integrated database and analysis resource for influenza sequence, surveillance, and research data, including user-friendly interfaces for data retrieval, visualization and comparative genomics analysis, together with personal log in-protected 'workbench' spaces for saving data sets and analysis results. IRD integrates genomic, proteomic, immune epitope, and surveillance data from a variety of sources, including public databases, computational algorithms, external research groups, and the scientific literature. Results To demonstrate the utility of the data and analysis tools available in IRD, two scientific use cases are presented. A comparison of hemagglutinin sequence conservation and epitope coverage information revealed highly conserved protein regions that can be recognized by the human adaptive immune system as possible targets for inducing cross-protective immunity. Phylogenetic and geospatial analysis of sequences from wild bird surveillance samples revealed a possible evolutionary connection between influenza virus from Delaware Bay shorebirds and Alberta ducks. Conclusions The IRD provides a wealth of integrated data and information about influenza virus to support research of the genetic determinants dictating virus pathogenicity, host range restriction and transmission, and to facilitate development of vaccines, diagnostics, and therapeutics.",IRD,0.993188262,Influenza Research Database,0.962002027,IRD,0.993188262,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/26/2016 +21873645,http://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads/ppiTrim.html,"ppiTrim: constructing non-redundant and up-to-date interactomes. Robust advances in interactome analysis demand comprehensive, non-redundant and consistently annotated data sets. By non-redundant, we mean that the accounting of evidence for every interaction should be faithful: each independent experimental support is counted exactly once, no more, no less. While many interactions are shared among public repositories, none of them contains the complete known interactome for any model organism. In addition, the annotations of the same experimental result by different repositories often disagree. This brings up the issue of which annotation to keep while consolidating evidences that are the same. The iRefIndex database, including interactions from most popular repositories with a standardized protein nomenclature, represents a significant advance in all aspects, especially in comprehensiveness. However, iRefIndex aims to maintain all information/annotation from original sources and requires users to perform additional processing to fully achieve the aforementioned goals. Another issue has to do with protein complexes. Some databases represent experimentally observed complexes as interactions with more than two participants, while others expand them into binary interactions using spoke or matrix model. To avoid untested interaction information buildup, it is preferable to replace the expanded protein complexes, either from spoke or matrix models, with a flat list of complex members. To address these issues and to achieve our goals, we have developed ppiTrim, a script that processes iRefIndex to produce non-redundant, consistently annotated data sets of physical interactions. Our script proceeds in three stages: mapping all interactants to gene identifiers and removing all undesired raw interactions, deflating potentially expanded complexes, and reconciling for each interaction the annotation labels among different source databases. As an illustration, we have processed the three largest organismal data sets: yeast, human and fruitfly. While ppiTrim can resolve most apparent conflicts between different labelings, we also discovered some unresolvable disagreements mostly resulting from different annotation policies among repositories. Database URL: http://www.ncbi.nlm.nih.gov/CBBresearch/Yu/downloads/ppiTrim.html.",iRefIndex,0.996739507,NA,0,iRefIndex,0.996739507,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/27/2011 +"24203342, 33125652",http://wodaklab.org/iRefWeb,"Navigating the global protein-protein interaction landscape using iRefWeb. iRefWeb is a bioinformatics resource that offers access to a large collection of data on protein-protein interactions in over a thousand organisms. This collection is consolidated from 14 major public databases that curate the scientific literature. The collection is enhanced with a range of versatile data filters and search options that categorize various types of protein-protein interactions and protein complexes. Users of iRefWeb are able to retrieve all curated interactions for a given organism or those involving a given protein (or a list of proteins), narrow down their search results based on different supporting evidence, and assess the reliability of these interactions using various criteria. They may also examine all data and annotations related to any publication that described the interaction-detection experiments. iRefWeb is freely available to the research community worldwide at http://wodaklab.org/iRefWeb .",iRefWeb,0.995616853,NA,0,iRefWeb,0.995616853,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +33942874,http://cobishss0.im.nuk.edu.tw/Human_IRES_Atlas,"Human IRES Atlas: an integrative platform for studying IRES-driven translational regulation in humans. . It is now known that cap-independent translation initiation facilitated by internal ribosome entry sites (IRESs) is vital in selective cellular protein synthesis under stress and different physiological conditions. However, three problems make it hard to understand transcriptome-wide cellular IRES-mediated translation initiation mechanisms: (i) complex interplay between IRESs and other translation initiation-related information, (ii) reliability issue of in silico cellular IRES investigation and (iii) labor-intensive in vivo IRES identification. In this research, we constructed the Human IRES Atlas database for a comprehensive understanding of cellular IRESs in humans. First, currently available and suitable IRES prediction tools (IRESfinder, PatSearch and IRESpy) were used to obtain transcriptome-wide human IRESs. Then, we collected eight genres of translation initiation-related features to help study the potential molecular mechanisms of each of the putative IRESs. Three functional tests (conservation, structural RNA-protein scores and conditional translation efficiency) were devised to evaluate the functionality of the identified putative IRESs. Moreover, an easy-to-use interface and an IRES-translation initiation interaction map for each gene transcript were implemented to help understand the interactions between IRESs and translation initiation-related features. Researchers can easily search/browse an IRES of interest using the web interface and deduce testable mechanism hypotheses of human IRES-driven translation initiation based on the integrated results. In summary, Human IRES Atlas integrates putative IRES elements and translation initiation-related experiments for better usage of these data and deduction of mechanism hypotheses. Database URL: http://cobishss0.im.nuk.edu.tw/Human_IRES_Atlas/.",IRES,0.658625901,NA,0,IRES,0.658625901,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/1/2021 +32512182,http://reprod.njmu.edu.cn/cgi-bin/iresbase/index.php,"IRESbase: A Comprehensive Database of Experimentally Validated Internal Ribosome Entry Sites. Internal ribosome entry sites (IRESs) are functional RNA elements that can directly recruit ribosomes to an internal position of the mRNA in a cap-independent manner to initiate translation. Recently, IRES elements have attracted much attention for their critical roles in various processes including translation initiation of a new type of RNA, circular RNA (circRNA), with no 5' cap to support classical cap-dependent translation. Thus, an integrative data resource of IRES elements with experimental evidence will be useful for further studies. In this study, we present IRESbase, a comprehensive database of IRESs, by curating the experimentally validated functional minimal IRES elements from literature and annotating their host linear and circular RNAs. The current version of IRESbase contains 1328 IRESs, including 774 eukaryotic IRESs and 554 viral IRESs from 11 eukaryotic organisms and 198 viruses, respectively. As IRESbase collects only IRES of minimal length with functional evidence, the median length of IRESs in IRESbase is 174 nucleotides. By mapping IRESs to human circRNAs and long non-coding RNAs (lncRNAs), 2191 circRNAs and 168 lncRNAs were found to contain at least one entire or partial IRES sequence. IRESbase is available at http://reprod.njmu.edu.cn/cgi-bin/iresbase/index.php.",IRESbase,0.987502873,NA,0,IRESbase,0.987502873,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2020 +26473382,http://mirgenedb.org,"A Uniform System for the Annotation of Vertebrate microRNA Genes and the Evolution of the Human microRNAome. Although microRNAs (miRNAs) are among the most intensively studied molecules of the past 20 years, determining what is and what is not a miRNA has not been straightforward. Here, we present a uniform system for the annotation and nomenclature of miRNA genes. We show that less than a third of the 1,881 human miRBase entries, and only approximately 16% of the 7,095 metazoan miRBase entries, are robustly supported as miRNA genes. Furthermore, we show that the human repertoire of miRNAs has been shaped by periods of intense miRNA innovation and that mature gene products show a very different tempo and mode of sequence evolution than star products. We establish a new open access database--MirGeneDB ( http://mirgenedb.org )--to catalog this set of miRNAs, which complements the efforts of miRBase but differs from it by annotating the mature versus star products and by imposing an evolutionary hierarchy upon this curated and consistently named repertoire.",irGeneDB,0.959409177,NA,0,irGeneDB,0.959409177,1,31598695,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,10/14/2015 +27841751,http://www.proteindiffraction.org,"A public database of macromolecular diffraction experiments. The low reproducibility of published experimental results in many scientific disciplines has recently garnered negative attention in scientific journals and the general media. Public transparency, including the availability of `raw' experimental data, will help to address growing concerns regarding scientific integrity. Macromolecular X-ray crystallography has led the way in requiring the public dissemination of atomic coordinates and a wealth of experimental data, making the field one of the most reproducible in the biological sciences. However, there remains no mandate for public disclosure of the original diffraction data. The Integrated Resource for Reproducibility in Macromolecular Crystallography (IRRMC) has been developed to archive raw data from diffraction experiments and, equally importantly, to provide related metadata. Currently, the database of our resource contains data from 2920 macromolecular diffraction experiments (5767 data sets), accounting for around 3% of all depositions in the Protein Data Bank (PDB), with their corresponding partially curated metadata. IRRMC utilizes distributed storage implemented using a federated architecture of many independent storage servers, which provides both scalability and sustainability. The resource, which is accessible via the web portal at http://www.proteindiffraction.org, can be searched using various criteria. All data are available for unrestricted access and download. The resource serves as a proof of concept and demonstrates the feasibility of archiving raw diffraction data and associated metadata from X-ray crystallographic studies of biological macromolecules. The goal is to expand this resource and include data sets that failed to yield X-ray structures in order to facilitate collaborative efforts that will improve protein structure-determination methods and to ensure the availability of `orphan' data left behind for various reasons by individual investigators and/or extinct structural genomics projects.",IRRMC,0.994473457,Integrated Resource for Reproducibility in Macromolecular Crystallography,0.81369595,IRRMC,0.994473457,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2016 +22592381,http://ir.hgc.jp,"IRView: a database and viewer for protein interacting regions. Unlabelled Protein-protein interactions (PPIs) are mediated through specific regions on proteins. Some proteins have two or more protein interacting regions (IRs) and some IRs are competitively used for interactions with different proteins. IRView currently contains data for 3417 IRs in human and mouse proteins. The data were obtained from different sources and combined with annotated region data from InterPro. Information on non-synonymous single nucleotide polymorphism sites and variable regions owing to alternative mRNA splicing is also included. The IRView web interface displays all IR data, including user-uploaded data, on reference sequences so that the positional relationship between IRs can be easily understood. IRView should be useful for analyzing underlying relationships between the proteins behind the PPI networks. Availability IRView is publicly available on the web at http://ir.hgc.jp/",IRView,0.997845531,NA,0,IRView,0.997845531,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/15/2012 +28322240,"http://commondataelements.ninds.nih.gov/SCI.aspx, http://www.iscos.org.uk/international-sci-data-sets","International spinal cord injury endocrine and metabolic extended data set. Objective The objective of this study was to develop the International Spinal Cord Injury (SCI) Endocrine and Metabolic Extended Data Set (ISCIEMEDS) within the framework of the International SCI Data Sets that would facilitate consistent collection and reporting of endocrine and metabolic findings in the SCI population. Setting This study was conducted in an international setting. Methods The ISCIEMEDS was developed by a working group. The initial ISCIEMEDS was revised based on suggestions from members of the International SCI Data Sets Committee, the International Spinal Cord Society (ISCoS) Executive and Scientific Committees, American Spinal Injury Association (ASIA) Board, other interested organizations, societies and individual reviewers. The data set was posted for two months on ISCoS and ASIA websites for comments. Variable names were standardized, and a suggested database structure for the ISCIEMEDS was provided by the Common Data Elements (CDEs) project at the National Institute on Neurological Disorders and Stroke (NINDS) of the US National Institute of Health (NIH), and are available at https://commondataelements.ninds.nih.gov/SCI.aspx#tab=Data_Standards. Results The final ISCIEMEDS contains questions on the endocrine and metabolic conditions related to SCI. Because the information may be collected at any time, the date of data collection is important to determine the time after SCI. ISCIEMEDS includes information on carbohydrate metabolism (6 variables), calcium and bone metabolism (12 variables), thyroid function (9 variables), adrenal function (2 variables), gonadal function (7 variables), pituitary function (6 variables), sympathetic nervous system function (1 variable) and renin-aldosterone axis function (2 variables). Conclusion The complete instructions for data collection and the data sheet itself are freely available on the website of ISCoS (http://www.iscos.org.uk/international-sci-data-sets).",ISCIEMEDS,0.977421165,Endocrine and Metabolic Extended Data Set,0.951809481,ISCIEMEDS,0.977421165,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/21/2017 +25378302,http://bioinformatics.sandia.gov/islander,"Islander: a database of precisely mapped genomic islands in tRNA and tmRNA genes. Genomic islands are mobile DNAs that are major agents of bacterial and archaeal evolution. Integration into prokaryotic chromosomes usually occurs site-specifically at tRNA or tmRNA gene (together, tDNA) targets, catalyzed by tyrosine integrases. This splits the target gene, yet sequences within the island restore the disrupted gene; the regenerated target and its displaced fragment precisely mark the endpoints of the island. We applied this principle to search for islands in genomic DNA sequences. Our algorithm identifies tDNAs, finds fragments of those tDNAs in the same replicon and removes unlikely candidate islands through a series of filters. A search for islands in 2168 whole prokaryotic genomes produced 3919 candidates. The website Islander (recently moved to http://bioinformatics.sandia.gov/islander/) presents these precisely mapped candidate islands, the gene content and the island sequence. The algorithm further insists that each island encode an integrase, and attachment site sequence identity is carefully noted; therefore, the database also serves in the study of integrase site-specificity and its evolution.",Islander,0.601636887,NA,0,Islander,0.601636887,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/5/2014 +34189203,http://isoarch.eu,"Collagen stable isotope data from East and Northeast Asia, c. 7000 BC-1000 AD. Stable isotope analysis is routinely used in archaeology to answer questions related to past diets. As the technique matures, data from archaeological sites have been generated at an exponential rate over the past several decades, thus provided an invaluable opportunity to examine past dietary practices and subsistence economies in much larger geographical and temporal settings. In Asia, a significant proportion of isotopic data is published in non-English journals or in grey literature, therefore remains largely inaccessible to general researchers. In order to provide easier access to these data, and to encourage future large-scale meta-data analyses in Asia, this collection presents the most comprehensive set of collagen stable isotope data of carbon, nitrogen, and sulfur from East and Northeast Asia (29-51˚N, 96-136˚ E) to date, including sites located within the modern territories of the People's Republic of China, Mongolia, the Russian Federation, and the Republic of Korea. Using academic search engines such as Google Scholar, the Chinese National Knowledge Infrastructure (CNKI), and ScienceON, a total of 3,304 previously published archaeological human and faunal stable isotope data from 136 archaeological sites in East and Northeast Asia, spanning over a period of 8,000 years (c. 7000 BC to AD 1000) are collected. The collated data are deposited on the open-access platform IsoArcH (https://isoarch.eu/) for any interested parties to use.",IsoArcH,0.689917505,NA,0,IsoArcH,0.689917505,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/10/2021 +25848172,http://www.snakebd.com,"ISOB: A Database of Indigenous Snake Species of Bangladesh with respective known venom composition. Unlabelled At present there is no well structured database available for the venomous snakes and venom composition of snakes in the world although venom has immense importance in biomedical research. Searching for a specific venom component from NCBI, PDB or public databases is troublesome, because they contain huge amount of data entries. Therefore, we created a database named ""ISOB"" which is a web accessible unique secondary database that represents the first online available bioinformatics resource showing venom composition of snakes. This database provides a comprehensive overview of seventy-eight indigenous snake species covering description of snakes supplemented with structural information of the relevant individual available venom proteins. We strongly believe that this database will contribute significantly in the field of bioinformatics, environmental research, proteomics, drug development and rationale drug designing. Availability The database is freely available at http://www.snakebd.com/.",ISOB,0.992247462,NA,0,ISOB,0.992247462,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/28/2015 +21177658,http://isobase.csail.mit.edu,"IsoBase: a database of functionally related proteins across PPI networks. We describe IsoBase, a database identifying functionally related proteins, across five major eukaryotic model organisms: Saccharomyces cerevisiae, Drosophila melanogaster, Caenorhabditis elegans, Mus musculus and Homo Sapiens. Nearly all existing algorithms for orthology detection are based on sequence comparison. Although these have been successful in orthology prediction to some extent, we seek to go beyond these methods by the integration of sequence data and protein-protein interaction (PPI) networks to help in identifying true functionally related proteins. With that motivation, we introduce IsoBase, the first publicly available ortholog database that focuses on functionally related proteins. The groupings were computed using the IsoRankN algorithm that uses spectral methods to combine sequence and PPI data and produce clusters of functionally related proteins. These clusters compare favorably with those from existing approaches: proteins within an IsoBase cluster are more likely to share similar Gene Ontology (GO) annotation. A total of 48,120 proteins were clustered into 12,693 functionally related groups. The IsoBase database may be browsed for functionally related proteins across two or more species and may also be queried by accession numbers, species-specific identifiers, gene name or keyword. The database is freely available for download at http://isobase.csail.mit.edu/.",IsoBase,0.996967256,NA,0,IsoBase,0.996967256,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2011 +25166490,http://isometlin.scripps.edu,"isoMETLIN: a database for isotope-based metabolomics. The METLIN metabolite database has become one of the most widely used resources in metabolomics for making metabolite identifications. However, METLIN is not designed to identify metabolites that have been isotopically labeled. As a result, unbiasedly tracking the transformation of labeled metabolites with isotope-based metabolomics is a challenge. Here, we introduce a new database, called isoMETLIN (http://isometlin.scripps.edu/), that has been developed specifically to identify metabolites incorporating isotopic labels. isoMETLIN enables users to search all computed isotopologues derived from METLIN on the basis of mass-to-charge values and specified isotopes of interest, such as (13)C or (15)N. Additionally, isoMETLIN contains experimental MS/MS data on hundreds of isotopomers. These data assist in localizing the position of isotopic labels within a metabolite. From these experimental MS/MS isotopomer spectra, precursor atoms can be mapped to fragments. The MS/MS spectra of additional isotopomers can then be computationally generated and included within isoMETLIN. Given that isobaric isotopomers cannot be separated chromatographically or by mass but are likely to occur simultaneously in a biological system, we have also implemented a spectral-mixing function in isoMETLIN. This functionality allows users to combine MS/MS spectra from various isotopomers in different ratios to obtain a theoretical MS/MS spectrum that matches the MS/MS spectrum from a biological sample. Thus, by searching MS and MS/MS experimental data, isoMETLIN facilitates the identification of isotopologues as well as isotopomers from biological samples and provides a platform to drive the next generation of isotope-based metabolomic studies.",isoMETLIN,0.988883018,NA,0,isoMETLIN,0.988883018,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/19/2014 +27153728,http://mcg.ustc.edu.cn/bsc/isomir/Contacts,"IsomiR Bank: a research resource for tracking IsomiRs. Unlabelled: Next-Generation Sequencing (NGS) technology has revealed that microRNAs (miRNAs) are capable of exhibiting frequent differences from their corresponding mature reference sequences, generating multiple variants: the isoforms of miRNAs (isomiRs). These isomiRs mainly originate via the imprecise and alternative cleavage during the pre-miRNA processing and post-transcriptional modifications that influence miRNA stability, their sub-cellular localization and target selection. Although several tools for the identification of isomiR have been reported, no bioinformatics resource dedicated to gather isomiRs from public NGS data and to provide functional analysis of these isomiRs is available to date. Thus, a free online database, IsomiR Bank has been created to integrate isomiRs detected by our previously published algorithm CPSS. In total, 2727 samples (Small RNA NGS data downloaded from ArrayExpress) from eight species (Arabidopsis thaliana, Drosophila melanogaster, Danio rerio, Homo sapiens, Mus musculus, Oryza sativa, Solanum lycopersicum and Zea mays) are analyzed. At present, 308 919 isomiRs from 4706 mature miRNAs are collected into IsomiR Bank. In addition, IsomiR Bank provides target prediction and enrichment analysis to evaluate the effects of isomiRs on target selection. Availability and implementation IsomiR Bank is implemented in PHP/PERL + MySQL + R format and can be freely accessed at http://mcg.ustc.edu.cn/bsc/isomir/Contacts: aoli@ustc.edu.cn or qshi@ustc.edu.cn Supplementary information Supplementary data are available at Bioinformatics online.",IsomiR Bank,0.97647199,NA,0,IsomiR Bank,0.97647199,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/2/2016 +25204646,http://spp1316.uni-wuerzburg.de/bioinformatics/isotopo,"'Isotopo' a database application for facile analysis and management of mass isotopomer data. . The composition of stable-isotope labelled isotopologues/isotopomers in metabolic products can be measured by mass spectrometry and supports the analysis of pathways and fluxes. As a prerequisite, the original mass spectra have to be processed, managed and stored to rapidly calculate, analyse and compare isotopomer enrichments to study, for instance, bacterial metabolism in infection. For such applications, we provide here the database application 'Isotopo'. This software package includes (i) a database to store and process isotopomer data, (ii) a parser to upload and translate different data formats for such data and (iii) an improved application to process and convert signal intensities from mass spectra of (13)C-labelled metabolites such as tertbutyldimethylsilyl-derivatives of amino acids. Relative mass intensities and isotopomer distributions are calculated applying a partial least square method with iterative refinement for high precision data. The data output includes formats such as graphs for overall enrichments in amino acids. The package is user-friendly for easy and robust data management of multiple experiments. The 'Isotopo' software is available at the following web link (section Download): http://spp1316.uni-wuerzburg.de/bioinformatics/isotopo/. The package contains three additional files: software executable setup (installer), one data set file (discussed in this article) and one excel file (which can be used to convert data from excel to '.iso' format). The 'Isotopo' software is compatible only with the Microsoft Windows operating system. http://spp1316.uni-wuerzburg.de/bioinformatics/isotopo/.",Isotopo,0.985290567,NA,0,Isotopo,0.985290567,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/9/2014 +21965461,http://www.iss.it/ampp/dati/cont.php?id=233&lang=1&tipo=7,"The new ISSMIC database on in vivo micronucleus and its role in assessing genotoxicity testing strategies. This paper presents a new curated database on in vivo micronucleus mutagenicity results, called ISSMIC. It is freely available at: http://www.iss.it/ampp/dati/cont.php?id=233&lang=1&tipo=7. The experimental results were critically reviewed, and evidence on target cell exposure was considered as well. The inspection of ISSMIC demonstrates that a large proportion of reported negative results in the literature (231 out 566 ISSMIC chemicals) lack a clear-cut, direct demonstration of toxicity at the target cells. Using this updated database, the predictive value of a compilation of Structural Alerts (SA) for in vivo micronucleus recently implemented in the expert system Toxtree was investigated. Individually, most of the SA showed a high Positive Predictivity (∼80%), but the need for further expanding the list of alerts was pointed out as well. The role of in vivo micronucleus in strategies for carcinogenicity prediction was re-evaluated. In agreement with previous analyses, the data point to a low overall correlation with carcinogenicity. In addition, given the cost in animal lives and the time required for the experimentation, in many programs, the in vivo tests are used only to assess in vitro positive results. The ability of in vivo micronucleus to identify real positives (i.e. carcinogens) among chemicals positive in Salmonella or among chemicals inducing in vitro chromosomal aberrations was studied. It appears that the in vivo micronucleus test does not have added value and rather impairs the prediction ability of the in vitro tests alone. The overall evidence indicates that in vivo micronucleus--in its present form--cannot be considered an useful tool for routine genotoxicity testing but should be used in targeted mechanistic studies.",ISSMIC,0.998472512,NA,0,ISSMIC,0.998472512,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/30/2011 +31265791,http://doi.org/10.23641/asha.8330009,"Linguistic Materials and Metrics for the Creation of Well-Controlled Swedish Speech Perception Tests. Purpose As factors influencing human word perception are important in the construction of speech perception tests used within the speech and hearing sciences, the purposes of this study were as follows: first, to develop algorithms that can be used to calculate different types of word metrics that influence the speed and accuracy of word perception and, second, to create a database in which those word metrics were calculated for a large set of Swedish words. Method Based on a revision of a large Swedish phonetic dictionary, data and algorithms were developed by which various frequency metrics, word length metrics, semantic metrics, neighborhood metrics, phonotactic metrics, and orthographic transparency metrics were calculated for each word in the dictionary. Of the various word metric algorithms used, some were Swedish language reimplementations of previously published algorithms, and some were developed in this study. Results The results of this study have been gathered in a Swedish word metric database called the AFC-list. The AFC-list consists of 816,404 phonetically transcribed Swedish words, all supplied with the word metric data calculated. The full AFC-list has been made publicly available under the Creative Commons Attribution 4.0 International license. Conclusion The results of this study constitute an extensive linguistic resource for the process of selecting test items in new well-controlled speech perception tests in the Swedish language. Supplemental Material https://doi.org/10.23641/asha.8330009.",ist,0.533448696,NA,0,ist,0.533448696,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,7/2/2019 +28592645,http://isvdb.unc.edu,"Inbred Strain Variant Database (ISVdb): A Repository for Probabilistically Informed Sequence Differences Among the Collaborative Cross Strains and Their Founders. The Collaborative Cross (CC) is a panel of recently established multiparental recombinant inbred mouse strains. For the CC, as for any multiparental population (MPP), effective experimental design and analysis benefit from detailed knowledge of the genetic differences between strains. Such differences can be directly determined by sequencing, but until now whole-genome sequencing was not publicly available for individual CC strains. An alternative and complementary approach is to infer genetic differences by combining two pieces of information: probabilistic estimates of the CC haplotype mosaic from a custom genotyping array, and probabilistic variant calls from sequencing of the CC founders. The computation for this inference, especially when performed genome-wide, can be intricate and time-consuming, requiring the researcher to generate nontrivial and potentially error-prone scripts. To provide standardized, easy-to-access CC sequence information, we have developed the Inbred Strain Variant Database (ISVdb). The ISVdb provides, for all the exonic variants from the Sanger Institute mouse sequencing dataset, direct sequence information for CC founders and, critically, the imputed sequence information for CC strains. Notably, the ISVdb also: (1) provides predicted variant consequence metadata; (2) allows rapid simulation of F1 populations; and (3) preserves imputation uncertainty, which will allow imputed data to be refined in the future as additional sequencing and genotyping data are collected. The ISVdb information is housed in an SQL database and is easily accessible through a custom online interface (http://isvdb.unc.edu), reducing the analytic burden on any researcher using the CC.",ISVdb,0.981565356,Inbred Strain Variant Database,0.950276415,ISVdb,0.981565356,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/7/2017 +"29036527, 30417254",http://research.bioinformatics.udel.edu/iSyTE,"iSyTE 2.0: a database for expression-based gene discovery in the eye. Although successful in identifying new cataract-linked genes, the previous version of the database iSyTE (integrated Systems Tool for Eye gene discovery) was based on expression information on just three mouse lens stages and was functionally limited to visualization by only UCSC-Genome Browser tracks. To increase its efficacy, here we provide an enhanced iSyTE version 2.0 (URL: http://research.bioinformatics.udel.edu/iSyTE) based on well-curated, comprehensive genome-level lens expression data as a one-stop portal for the effective visualization and analysis of candidate genes in lens development and disease. iSyTE 2.0 includes all publicly available lens Affymetrix and Illumina microarray datasets representing a broad range of embryonic and postnatal stages from wild-type and specific gene-perturbation mouse mutants with eye defects. Further, we developed a new user-friendly web interface for direct access and cogent visualization of the curated expression data, which supports convenient searches and a range of downstream analyses. The utility of these new iSyTE 2.0 features is illustrated through examples of established genes associated with lens development and pathobiology, which serve as tutorials for its application by the end-user. iSyTE 2.0 will facilitate the prioritization of eye development and disease-linked candidate genes in studies involving transcriptomics or next-generation sequencing data, linkage analysis and GWAS approaches.",iSyTE,0.93340987,NA,0,iSyTE,0.93340987,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/11/2018 +26653323,http://sites.google.com/a/vt.edu/biomolecular-engineering-lab/software,"iTAP: integrated transcriptomics and phenotype database for stress response of Escherichia coli and Saccharomyces cerevisiae. Background Organisms are subject to various stress conditions, which affect both the organism's gene expression and phenotype. It is critical to understand microbial responses to stress conditions and uncover the underlying molecular mechanisms. To this end, it is necessary to build a database that collects transcriptomics and phenotypic data of microbes growing under various stress factors for in-depth systems biology analysis. Despite of numerous databases that collect gene expression profiles, to our best knowledge, there are few, if any, databases that collect both transcriptomics and phenotype data simultaneously. In light of this, we have developed an open source, web-based database, namely integrated transcriptomics and phenotype (iTAP) database, that records and links the transcriptomics and phenotype data for two model microorganisms, Escherichia coli and Saccharomyces cerevisiae in response to exposure of various stress conditions. Results To collect the data, we chose relevant research papers from the PubMed database containing all the necessary information for data curation including experimental conditions, transcriptomics data, and phenotype data. The transcriptomics data, including the p value and fold change, were obtained through the comparison of test strains against control strains using Gene Expression Omnibus's GEO2R analyzer. The phenotype data, including the cell growth rate and the productivity, volumetric rate, and mass-based yield of byproducts, were calculated independently from charts or graphs within the reference papers. Since the phenotype data was never reported in a standardized format, the curation of correlated transcriptomics-phenotype datasets became extremely tedious and time-consuming. Despite the challenges, till now, we successfully correlated 57 and 143 datasets of transcriptomics and phenotype for E. coli and S. cerevisiae, respectively, and applied a regression model within the iTAP database to accurately predict over 93 and 73 % of the growth rates of E. coli and S. cerevisiae, respectively, directly from the transcriptomics data. Conclusion This is the first time that transcriptomics and phenotype data are categorized and correlated in an open-source database. This allows biologists to access the database and utilize it to predict the phenotype of microorganisms from their transcriptomics data. The iTAP database is freely available at https://sites.google.com/a/vt.edu/biomolecular-engineering-lab/software .",iTAP,0.940019906,NA,0,iTAP,0.940019906,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/12/2015 +25058394,http://www.ithanet.eu/db/ithagenes,"IthaGenes: an interactive database for haemoglobin variations and epidemiology. Inherited haemoglobinopathies are the most common monogenic diseases, with millions of carriers and patients worldwide. At present, we know several hundred disease-causing mutations on the globin gene clusters, in addition to numerous clinically important trans-acting disease modifiers encoded elsewhere and a multitude of polymorphisms with relevance for advanced diagnostic approaches. Moreover, new disease-linked variations are discovered every year that are not included in traditional and often functionally limited locus-specific databases. This paper presents IthaGenes, a new interactive database of haemoglobin variations, which stores information about genes and variations affecting haemoglobin disorders. In addition, IthaGenes organises phenotype, relevant publications and external links, while embedding the NCBI Sequence Viewer for graphical representation of each variation. Finally, IthaGenes is integrated with the companion tool IthaMaps for the display of corresponding epidemiological data on distribution maps. IthaGenes is incorporated in the ITHANET community portal and is free and publicly available at http://www.ithanet.eu/db/ithagenes.",IthaGenes,0.993503034,NA,0,IthaGenes,0.993503034,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/24/2014 +26248563,http://its2.bioapps.biozentrum.uni-wuerzburg.de,"ITS2 Database V: Twice as Much. The internal transcribed spacer 2 (ITS2) is a well-established marker for phylogenetic analyses in eukaryotes. A reliable resource for reference sequences and their secondary structures is the ITS2 database (http://its2.bioapps.biozentrum.uni-wuerzburg.de/). However, the database was last updated in 2011. Here, we present a major update of the underlying data almost doubling the number of entities. This increases the number of taxa represented within all major eukaryotic clades. Moreover, additional data has been added to underrepresented groups and some new groups have been added. The broader coverage across the tree of life improves phylogenetic analyses and the capability of ITS2 as a DNA barcode.",ITS2,0.728715201,NA,0,ITS2,0.728715201,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/6/2015 +22786784,http://itsonedb.ba.itb.cnr.it,"Reference databases for taxonomic assignment in metagenomics. Metagenomics is providing an unprecedented access to the environmental microbial diversity. The amplicon-based metagenomics approach involves the PCR-targeted sequencing of a genetic locus fitting different features. Namely, it must be ubiquitous in the taxonomic range of interest, variable enough to discriminate between different species but flanked by highly conserved sequences, and of suitable size to be sequenced through next-generation platforms. The internal transcribed spacers 1 and 2 (ITS1 and ITS2) of the ribosomal DNA operon and one or more hyper-variable regions of 16S ribosomal RNA gene are typically used to identify fungal and bacterial species, respectively. In this context, reliable reference databases and taxonomies are crucial to assign amplicon sequence reads to the correct phylogenetic ranks. Several resources provide consistent phylogenetic classification of publicly available 16S ribosomal DNA sequences, whereas the state of ribosomal internal transcribed spacers reference databases is notably less advanced. In this review, we aim to give an overview of existing reference resources for both types of markers, highlighting strengths and possible shortcomings of their use for metagenomics purposes. Moreover, we present a new database, ITSoneDB, of well annotated and phylogenetically classified ITS1 sequences to be used as a reference collection in metagenomic studies of environmental fungal communities. ITSoneDB is available for download and browsing at http://itsonedb.ba.itb.cnr.it/.",ITSoneDB,0.997155786,NA,0,ITSoneDB,0.997155786,1,NA,29036529,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,7/10/2012 +29036529,http://itsonedb.cloud.ba.infn.it,"ITSoneDB: a comprehensive collection of eukaryotic ribosomal RNA Internal Transcribed Spacer 1 (ITS1) sequences. A holistic understanding of environmental communities is the new challenge of metagenomics. Accordingly, the amplicon-based or metabarcoding approach, largely applied to investigate bacterial microbiomes, is moving to the eukaryotic world too. Indeed, the analysis of metabarcoding data may provide a comprehensive assessment of both bacterial and eukaryotic composition in a variety of environments, including human body. In this respect, whereas hypervariable regions of the 16S rRNA are the de facto standard barcode for bacteria, the Internal Transcribed Spacer 1 (ITS1) of ribosomal RNA gene cluster has shown a high potential in discriminating eukaryotes at deep taxonomic levels. As metabarcoding data analysis rely on the availability of a well-curated barcode reference resource, a comprehensive collection of ITS1 sequences supplied with robust taxonomies, is highly needed. To address this issue, we created ITSoneDB (available at http://itsonedb.cloud.ba.infn.it/) which in its current version hosts 985 240 ITS1 sequences spanning over 134 000 eukaryotic species. Each ITS1 is mapped on the NCBI reference taxonomy with its start and end positions precisely annotated. ITSoneDB has been developed in agreement to the FAIR guidelines by enabling the users to query and download its content through a simple web-interface and access relevant metadata by cross-linking to European Nucleotide Archive.",ITSoneDB,0.99713707,NA,0,ITSoneDB,0.99713707,1,NA,22786784,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +"22674159, 23087376",http://www.iuphar-db.org,"How to use the IUPHAR receptor database to navigate pharmacological data. Today's data-intensive, interdisciplinary research challenges scientists to keep up to date with key experimental techniques and tools reported in the literature. The International Union of Basic and Clinical Pharmacology Database (IUPHAR-DB) goes some way to addressing this need by providing expert-curated information sourced from primary literature and displayed in a user-friendly manner online. The database provides a channel for the IUPHAR Nomenclature Committee (NC-IUPHAR) to provide recommendations on the nomenclature of receptors and ion channels, to document their properties and the ligands that are useful for receptor characterization. Here we describe IUPHAR-DB's main features and provide examples of techniques for navigating and exploring the information. The database is freely available online at http://www.iuphar-db.org/.",IUPHAR-DB,0.994823694,of Basic and Clinical Pharmacology Database,0.740390394,IUPHAR-DB,0.994823694,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/18/2012 +29106644,http://iuucd.biocuckoo.org,"iUUCD 2.0: an update with rich annotations for ubiquitin and ubiquitin-like conjugations. Here, we described the updated database iUUCD 2.0 (http://iuucd.biocuckoo.org/) for ubiquitin-activating enzymes (E1s), ubiquitin-conjugating enzymes (E2s), ubiquitin-protein ligases (E3s), deubiquitinating enzymes (DUBs), ubiquitin/ubiquitin-like binding domains (UBDs) and ubiquitin-like domains (ULDs), which act as key regulators in modulating ubiquitin and ubiquitin-like (UB/UBL) conjugations. In total, iUUCD 2.0 contained 136 512 UB/UBL regulators, including 1230 E1s, 5636 E2s, 93 343 E3s, 9548 DUBs, 30 173 UBDs and 11 099 ULDs in 148 eukaryotic species. In particular, we provided rich annotations for regulators of eight model organisms, especially in humans, by compiling and integrating the knowledge from nearly 70 widely used public databases that cover cancer mutations, single nucleotide polymorphisms (SNPs), mRNA expression, DNA and RNA elements, protein-protein interactions, protein 3D structures, disease-associated information, drug-target relations, post-translational modifications, DNA methylation and protein expression/proteomics. Compared with our previously developed UUCD 1.0 (∼0.41 GB), iUUCD 2.0 has a size of ∼32.1 GB of data with a >75-fold increase in data volume. We anticipate that iUUCD 2.0 can be a more useful resource for further study of UB/UBL conjugations.",iUUCD,0.997192144,NA,0,iUUCD,0.997192144,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +"24194598, 26531826, 29140473, 31701148",http://jaspar.genereg.net,"JASPAR 2014: an extensively expanded and updated open-access database of transcription factor binding profiles. JASPAR (http://jaspar.genereg.net) is the largest open-access database of matrix-based nucleotide profiles describing the binding preference of transcription factors from multiple species. The fifth major release greatly expands the heart of JASPAR-the JASPAR CORE subcollection, which contains curated, non-redundant profiles-with 135 new curated profiles (74 in vertebrates, 8 in Drosophila melanogaster, 10 in Caenorhabditis elegans and 43 in Arabidopsis thaliana; a 30% increase in total) and 43 older updated profiles (36 in vertebrates, 3 in D. melanogaster and 4 in A. thaliana; a 9% update in total). The new and updated profiles are mainly derived from published chromatin immunoprecipitation-seq experimental datasets. In addition, the web interface has been enhanced with advanced capabilities in browsing, searching and subsetting. Finally, the new JASPAR release is accompanied by a new BioPython package, a new R tool package and a new R/Bioconductor data package to facilitate access for both manual and automated methods.",JASPAR,0.993904173,NA,0,JASPAR,0.993904173,4,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24905498,http://www.addgene.org,"The plant glycosyltransferase clone collection for functional genomics. The glycosyltransferases (GTs) are an important and functionally diverse family of enzymes involved in glycan and glycoside biosynthesis. Plants have evolved large families of GTs which undertake the array of glycosylation reactions that occur during plant development and growth. Based on the Carbohydrate-Active enZymes (CAZy) database, the genome of the reference plant Arabidopsis thaliana codes for over 450 GTs, while the rice genome (Oryza sativa) contains over 600 members. Collectively, GTs from these reference plants can be classified into over 40 distinct GT families. Although these enzymes are involved in many important plant specific processes such as cell-wall and secondary metabolite biosynthesis, few have been functionally characterized. We have sought to develop a plant GTs clone resource that will enable functional genomic approaches to be undertaken by the plant research community. In total, 403 (88%) of CAZy defined Arabidopsis GTs have been cloned, while 96 (15%) of the GTs coded by rice have been cloned. The collection resulted in the update of a number of Arabidopsis GT gene models. The clones represent full-length coding sequences without termination codons and are Gateway® compatible. To demonstrate the utility of this JBEI GT Collection, a set of efficient particle bombardment plasmids (pBullet) was also constructed with markers for the endomembrane. The utility of the pBullet collection was demonstrated by localizing all members of the Arabidopsis GT14 family to the Golgi apparatus or the endoplasmic reticulum (ER). Updates to these resources are available at the JBEI GT Collection website http://www.addgene.org/.",JBEI,0.662117004,NA,0,JBEI,0.662117004,1,25392412,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: CLASS,NA,NA,7/9/2014 +31874631,http://jcdb.xtbg.ac.cn,"JCDB: a comprehensive knowledge base for Jatropha curcas, an emerging model for woody energy plants. Background Jatropha curcas is an oil-bearing plant, and has seeds with high oil content (~ 40%). Several advantages, such as easy genetic transformation and short generation duration, have led to the emergence of J. curcas as a model for woody energy plants. With the development of high-throughput sequencing, the genome of Jatropha curcas has been sequenced by different groups and a mass of transcriptome data was released. How to integrate and analyze these omics data is crucial for functional genomics research on J. curcas. Results By establishing pipelines for processing novel gene identification, gene function annotation, and gene network construction, we systematically integrated and analyzed a series of J. curcas transcriptome data. Based on these data, we constructed a J. curcas database (JCDB), which not only includes general gene information, gene functional annotation, gene interaction networks, and gene expression matrices but also provides tools for browsing, searching, and downloading data, as well as online BLAST, the JBrowse genome browser, ID conversion, heatmaps, and gene network analysis tools. Conclusions JCDB is the most comprehensive and well annotated knowledge base for J. curcas. We believe it will make a valuable contribution to the functional genomics study of J. curcas. The database is accessible at http://jcdb.xtbg.ac.cn.",JCDB,0.97426182,curcas database,0.846007625,JCDB,0.97426182,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/24/2019 +27899675,http://www.jet2viewer.upmc.fr,"JET2 Viewer: a database of predicted multiple, possibly overlapping, protein-protein interaction sites for PDB structures. The database JET2 Viewer, openly accessible at http://www.jet2viewer.upmc.fr/, reports putative protein binding sites for all three-dimensional (3D) structures available in the Protein Data Bank (PDB). This knowledge base was generated by applying the computational method JET2 at large-scale on more than 20 000 chains. JET2 strategy yields very precise predictions of interacting surfaces and unravels their evolutionary process and complexity. JET2 Viewer provides an online intelligent display, including interactive 3D visualization of the binding sites mapped onto PDB structures and suitable files recording JET2 analyses. Predictions were evaluated on more than 15 000 experimentally characterized protein interfaces. This is, to our knowledge, the largest evaluation of a protein binding site prediction method. The overall performance of JET2 on all interfaces are: Sen = 52.52, PPV = 51.24, Spe = 80.05, Acc = 75.89. The data can be used to foster new strategies for protein-protein interactions modulation and interaction surface redesign.",JET2,0.992096901,NA,0,JET2,0.992096901,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +22110030,"http://genome.jgi.doe.gov, http://jgi.doe.gov/fungi","The genome portal of the Department of Energy Joint Genome Institute. The Department of Energy (DOE) Joint Genome Institute (JGI) is a national user facility with massive-scale DNA sequencing and analysis capabilities dedicated to advancing genomics for bioenergy and environmental applications. Beyond generating tens of trillions of DNA bases annually, the Institute develops and maintains data management systems and specialized analytical capabilities to manage and interpret complex genomic data sets, and to enable an expanding community of users around the world to analyze these data in different contexts over the web. The JGI Genome Portal (http://genome.jgi.doe.gov) provides a unified access point to all JGI genomic databases and analytical tools. A user can find all DOE JGI sequencing projects and their status, search for and download assemblies and annotations of sequenced genomes, and interactively explore those genomes and compare them with other sequenced microbes, fungi, plants or metagenomes using specialized systems tailored to each particular class of organisms. We describe here the general organization of the Genome Portal and the most recent addition, MycoCosm (http://jgi.doe.gov/fungi), a new integrated fungal genomics resource.",JGI,0.907077233,NA,0,JGI,0.907077233,1,24225321,24225321,low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,11/22/2011 +24225321,http://genome.jgi.doe.gov,"The genome portal of the Department of Energy Joint Genome Institute: 2014 updates. The U.S. Department of Energy (DOE) Joint Genome Institute (JGI), a national user facility, serves the diverse scientific community by providing integrated high-throughput sequencing and computational analysis to enable system-based scientific approaches in support of DOE missions related to clean energy generation and environmental characterization. The JGI Genome Portal (http://genome.jgi.doe.gov) provides unified access to all JGI genomic databases and analytical tools. The JGI maintains extensive data management systems and specialized analytical capabilities to manage and interpret complex genomic data. A user can search, download and explore multiple data sets available for all DOE JGI sequencing projects including their status, assemblies and annotations of sequenced genomes. Here we describe major updates of the Genome Portal in the past 2 years with a specific emphasis on efficient handling of the rapidly growing amount of diverse genomic data accumulated in JGI.",JGI,0.841488481,NA,0,JGI,0.841488481,1,22110030,22110030,low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: CLASS,NA,NA,11/12/2013 +28416714,http://jinglebells.bgu.ac.il,"JingleBells: A Repository of Immune-Related Single-Cell RNA-Sequencing Datasets. Recent advances in single-cell RNA-sequencing (scRNA-seq) technology increase the understanding of immune differentiation and activation processes, as well as the heterogeneity of immune cell types. Although the number of available immune-related scRNA-seq datasets increases rapidly, their large size and various formats render them hard for the wider immunology community to use, and read-level data are practically inaccessible to the non-computational immunologist. To facilitate datasets reuse, we created the JingleBells repository for immune-related scRNA-seq datasets ready for analysis and visualization of reads at the single-cell level (http://jinglebells.bgu.ac.il/). To this end, we collected the raw data of publicly available immune-related scRNA-seq datasets, aligned the reads to the relevant genome, and saved aligned reads in a uniform format, annotated for cell of origin. We also added scripts and a step-by-step tutorial for visualizing each dataset at the single-cell level, through the commonly used Integrated Genome Viewer (www.broadinstitute.org/igv/). The uniform scRNA-seq format used in JingleBells can facilitate reuse of scRNA-seq data by computational biologists. It also enables immunologists who are interested in a specific gene to visualize the reads aligned to this gene to estimate cell-specific preferences for splicing, mutation load, or alleles. Thus JingleBells is a resource that will extend the usefulness of scRNA-seq datasets outside the programming aficionado realm.",JingleBells,0.996701837,NA,0,JingleBells,0.996701837,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2017 +"29069501, 33179747",http://jmorp.megabank.tohoku.ac.jp,"jMorp: Japanese Multi Omics Reference Panel. We developed jMorp, a new database containing metabolome and proteome data for plasma obtained from >5000 healthy Japanese volunteers from the Tohoku Medical Megabank Cohort Study, which is available at https://jmorp.megabank.tohoku.ac.jp. Metabolome data were measured by proton nuclear magnetic resonance (NMR) and liquid chromatography-mass spectrometry (LC-MS), while proteome data were obtained by nanoLC-MS. We released the concentration distributions of 37 metabolites identified by NMR, distributions of peak intensities of 257 characterized metabolites by LC-MS, and observed frequencies of 256 abundant proteins. Additionally, correlation networks for the metabolites can be observed using an interactive network viewer. Compared with some existing databases, jMorp has some unique features: (i) Metabolome data were obtained using a single protocol in a single institute, ensuring that measurement biases were significantly minimized; (ii) The database contains large-scale data for healthy volunteers with various health records and genome data and (iii) Correlations between metabolites can be easily observed using the graphical viewer. Metabolites data are becoming important intermediate markers for evaluating the health states of humans, and thus jMorp is an outstanding resource for a wide range of researchers, particularly those in the fields of medical science, applied molecular biology, and biochemistry.",jMorp,0.996074796,Japanese Multi Omics Reference Panel,0.883684933,jMorp,0.996074796,2,31240104,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,NA,1/1/2021 +30295851,http://jpostdb.org,"The jPOST environment: an integrated proteomics data repository and database. Rapid progress is being made in mass spectrometry (MS)-based proteomics, yielding an increasing number of larger datasets with higher quality and higher throughput. To integrate proteomics datasets generated from various projects and institutions, we launched a project named jPOST (Japan ProteOme STandard Repository/Database, https://jpostdb.org/) in 2015. Its proteomics data repository, jPOSTrepo, began operations in 2016 and has accepted more than 10 TB of MS-based proteomics datasets in the past two years. In addition, we have developed a new proteomics database named jPOSTdb in which the published raw datasets in jPOSTrepo are reanalyzed using standardized protocol. jPOSTdb provides viewers showing the frequency of detected post-translational modifications, the co-occurrence of phosphorylation sites on a peptide and peptide sharing among proteoforms. jPOSTdb also provides basic statistical analysis tools to compare proteomics datasets.",jPOST,0.991378427,Japan ProteOme,0.680601423,jPOST,0.991378427,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +27899654,http://repository.jpostdb.org,"jPOSTrepo: an international standard data repository for proteomes. Major advancements have recently been made in mass spectrometry-based proteomics, yielding an increasing number of datasets from various proteomics projects worldwide. In order to facilitate the sharing and reuse of promising datasets, it is important to construct appropriate, high-quality public data repositories. jPOSTrepo (https://repository.jpostdb.org/) has successfully implemented several unique features, including high-speed file uploading, flexible file management and easy-to-use interfaces. This repository has been launched as a public repository containing various proteomic datasets and is available for researchers worldwide. In addition, our repository has joined the ProteomeXchange consortium, which includes the most popular public repositories such as PRIDE in Europe for MS/MS datasets and PASSEL for SRM datasets in the USA. Later MassIVE was introduced in the USA and accepted into the ProteomeXchange, as was our repository in July 2016, providing important datasets from Asia/Oceania. Accordingly, this repository thus contributes to a global alliance to share and store all datasets from a wide variety of proteomics experiments. Thus, the repository is expected to become a major repository, particularly for data collected in the Asia/Oceania region.",jPOSTrepo,0.978089273,NA,0,jPOSTrepo,0.978089273,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +26424080,http://gmo-crl.jrc.ec.europa.eu/jrcgmoamplicons,"JRC GMO-Amplicons: a collection of nucleic acid sequences related to genetically modified organisms. . The DNA target sequence is the key element in designing detection methods for genetically modified organisms (GMOs). Unfortunately this information is frequently lacking, especially for unauthorized GMOs. In addition, patent sequences are generally poorly annotated, buried in complex and extensive documentation and hard to link to the corresponding GM event. Here, we present the JRC GMO-Amplicons, a database of amplicons collected by screening public nucleotide sequence databanks by in silico determination of PCR amplification with reference methods for GMO analysis. The European Union Reference Laboratory for Genetically Modified Food and Feed (EU-RL GMFF) provides these methods in the GMOMETHODS database to support enforcement of EU legislation and GM food/feed control. The JRC GMO-Amplicons database is composed of more than 240 000 amplicons, which can be easily accessed and screened through a web interface. To our knowledge, this is the first attempt at pooling and collecting publicly available sequences related to GMOs in food and feed. The JRC GMO-Amplicons supports control laboratories in the design and assessment of GMO methods, providing inter-alia in silico prediction of primers specificity and GM targets coverage. The new tool can assist the laboratories in the analysis of complex issues, such as the detection and identification of unauthorized GMOs. Notably, the JRC GMO-Amplicons database allows the retrieval and characterization of GMO-related sequences included in patents documentation. Finally, it can help annotating poorly described GM sequences and identifying new relevant GMO-related sequences in public databases. The JRC GMO-Amplicons is freely accessible through a web-based portal that is hosted on the EU-RL GMFF website. Database URL: http://gmo-crl.jrc.ec.europa.eu/jrcgmoamplicons/.",JRC GMO-Amplicons,0.748133315,NA,0,JRC GMO-Amplicons,0.748133315,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/30/2015 +26519469,http://juncdb.carmelab.huji.ac.il,"JuncDB: an exon-exon junction database. Intron positions upon the mRNA transcript are sometimes remarkably conserved even across distantly related eukaryotic species. This has made the comparison of intron-exon architectures across orthologous transcripts a very useful tool for studying various evolutionary processes. Moreover, the wide range of functions associated with introns may confer biological meaning to evolutionary changes in gene architectures. Yet, there is currently no database that offers such comparative information. Here, we present JuncDB (http://juncdb.carmelab.huji.ac.il/), an exon-exon junction database dedicated to the comparison of architectures between orthologous transcripts. It covers nearly 40,000 sets of orthologous transcripts spanning 88 eukaryotic species. JuncDB offers a user-friendly interface, access to detailed information, instructive graphical displays of the comparative data and easy ways to download data to a local computer. In addition, JuncDB allows the analysis to be carried out either on specific genes, or at a genome-wide level for any selected group of species.",JuncDB,0.996719241,NA,0,JuncDB,0.996719241,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/30/2015 +28130238,http://jjj.bio.vu.nl/models/experiments,"The JWS online simulation database. Summary JWS Online is a web-based platform for construction, simulation and exchange of models in standard formats. We have extended the platform with a database for curated simulation experiments that can be accessed directly via a URL, allowing one-click reproduction of published results. Users can modify the simulation experiments and export them in standard formats. The Simulation database thus lowers the bar on exploring computational models, helps users create valid simulation descriptions and improves the reproducibility of published simulation experiments. Availability and implementation The Simulation Database is available on line at https://jjj.bio.vu.nl/models/experiments/ . Contact jls@sun.ac.za .",JWS,0.891816378,NA,0,JWS,0.891816378,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/1/2017 +33645624,http://kaikobase.dna.affrc.go.jp,"An update of KAIKObase, the silkworm genome database. . KAIKObase was established in 2009 as the genome database of the domesticated silkworm Bombyx mori. It provides several gene sets and genetic maps as well as genome annotation obtained from the sequencing project of the International Silkworm Genome Consortium in 2008. KAIKObase has been used widely for silkworm and insect studies even though there are some erroneous predicted genes due to misassembly and gaps in the genome. In 2019, we released a new silkworm genome assembly, showing improvements in gap closure and covering more and longer gene models. Therefore, there is a need to include new genome and new gene models to KAIKObase. In this article, we present the updated contents of KAIKObase and the methods to generate, integrate and analyze the data sets. Database URL: https://kaikobase.dna.affrc.go.jp.",KAIKObase,0.99772799,silkworm genome,0.654908737,KAIKObase,0.99772799,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2021 +27087309,http://kaliumdb.org,"Kalium: a database of potassium channel toxins from scorpion venom. . Kalium (http://kaliumdb.org/) is a manually curated database that accumulates data on potassium channel toxins purified from scorpion venom (KTx). This database is an open-access resource, and provides easy access to pages of other databases of interest, such as UniProt, PDB, NCBI Taxonomy Browser, and PubMed. General achievements of Kalium are a strict and easy regulation of KTx classification based on the unified nomenclature supported by researchers in the field, removal of peptides with partial sequence and entries supported by transcriptomic information only, classification of β-family toxins, and addition of a novel λ-family. Molecules presented in the database can be processed by the Clustal Omega server using a one-click option. Molecular masses of mature peptides are calculated and available activity data are compiled for all KTx. We believe that Kalium is not only of high interest to professional toxinologists, but also of general utility to the scientific community.Database URL:http://kaliumdb.org/.",Kalium,0.988748372,NA,0,Kalium,0.988748372,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/17/2016 +30046160,http://wakanmoview.inm.u-toyama.ac.jp/kampo,"KampoDB, database of predicted targets and functional annotations of natural medicines. Natural medicines (i.e., herbal medicines, traditional formulas) are useful for treatment of multifactorial and chronic diseases. Here, we present KampoDB ( http://wakanmoview.inm.u-toyama.ac.jp/kampo/ ), a novel platform for the analysis of natural medicines, which provides various useful scientific resources on Japanese traditional formulas Kampo medicines, constituent herbal drugs, constituent compounds, and target proteins of these constituent compounds. Potential target proteins of these constituent compounds were predicted by docking simulations and machine learning methods based on large-scale omics data (e.g., genome, proteome, metabolome, interactome). The current version of KampoDB contains 42 Kampo medicines, 54 crude drugs, 1230 constituent compounds, 460 known target proteins, and 1369 potential target proteins, and has functional annotations for biological pathways and molecular functions. KampoDB is useful for mode-of-action analysis of natural medicines and prediction of new indications for a wide range of diseases.",KampoDB,0.997830033,NA,0,KampoDB,0.997830033,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/25/2018 +24507667,http://www.motorprotein.de/kassiopeia,"Kassiopeia: a database and web application for the analysis of mutually exclusive exomes of eukaryotes. Background Alternative splicing is an important process in higher eukaryotes that allows obtaining several transcripts from one gene. A specific case of alternative splicing is mutually exclusive splicing, in which exactly one exon out of a cluster of neighbouring exons is spliced into the mature transcript. Recently, a new algorithm for the prediction of these exons has been developed based on the preconditions that the exons of the cluster have similar lengths, sequence homology, and conserved splice sites, and that they are translated in the same reading frame. Description In this contribution we introduce Kassiopeia, a database and web application for the generation, storage, and presentation of genome-wide analyses of mutually exclusive exomes. Currently, Kassiopeia provides access to the mutually exclusive exomes of twelve Drosophila species, the thale cress Arabidopsis thaliana, the flatworm Caenorhabditis elegans, and human. Mutually exclusive spliced exons (MXEs) were predicted based on gene reconstructions from Scipio. Based on the standard prediction values, with which 83.5% of the annotated MXEs of Drosophila melanogaster were reconstructed, the exomes contain surprisingly more MXEs than previously supposed and identified. The user can search Kassiopeia using BLAST or browse the genes of each species optionally adjusting the parameters used for the prediction to reveal more divergent or only very similar exon candidates. Conclusions We developed a pipeline to predict MXEs in the genomes of several model organisms and a web interface, Kassiopeia, for their visualization. For each gene Kassiopeia provides a comprehensive gene structure scheme, the sequences and predicted secondary structures of the MXEs, and, if available, further evidence for MXE candidates from cDNA/EST data, predictions of MXEs in homologous genes of closely related species, and RNA secondary structure predictions. Kassiopeia can be accessed at http://www.motorprotein.de/kassiopeia.",Kassiopeia,0.946248412,NA,0,Kassiopeia,0.946248412,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/10/2014 +25320561,http://marker.kazusa.or.jp,"Kazusa Marker DataBase: a database for genomics, genetics, and molecular breeding in plants. In order to provide useful genomic information for agronomical plants, we have established a database, the Kazusa Marker DataBase (http://marker.kazusa.or.jp). This database includes information on DNA markers, e.g., SSR and SNP markers, genetic linkage maps, and physical maps, that were developed at the Kazusa DNA Research Institute. Keyword searches for the markers, sequence data used for marker development, and experimental conditions are also available through this database. Currently, 10 plant species have been targeted: tomato (Solanum lycopersicum), pepper (Capsicum annuum), strawberry (Fragaria × ananassa), radish (Raphanus sativus), Lotus japonicus, soybean (Glycine max), peanut (Arachis hypogaea), red clover (Trifolium pratense), white clover (Trifolium repens), and eucalyptus (Eucalyptus camaldulensis). In addition, the number of plant species registered in this database will be increased as our research progresses. The Kazusa Marker DataBase will be a useful tool for both basic and applied sciences, such as genomics, genetics, and molecular breeding in crops.",DataBase,0.645969987,Kazusa Marker DataBase,0.685222085,Kazusa Marker DataBase,0.685222085,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2014 +22736877,"http://kb.phenoscape.org, http://zfin.org","500,000 fish phenotypes: The new informatics landscape for evolutionary and developmental biology of the vertebrate skeleton. The rich phenotypic diversity that characterizes the vertebrate skeleton results from evolutionary changes in regulation of genes that drive development. Although relatively little is known about the genes that underlie the skeletal variation among fish species, significant knowledge of genetics and development is available for zebrafish. Because developmental processes are highly conserved, this knowledge can be leveraged for understanding the evolution of skeletal diversity. We developed the Phenoscape Knowledgebase (KB; http://kb.phenoscape.org) to yield testable hypotheses of candidate genes involved in skeletal evolution. We developed a community anatomy ontology for fishes and ontology-based methods to represent complex free-text character descriptions of species in a computable format. With these tools, we populated the KB with comparative morphological data from the literature on over 2,500 teleost fishes (mainly Ostariophysi) resulting in over 500,000 taxon phenotype annotations. The KB integrates these data with similarly structured phenotype data from zebrafish genes (http://zfin.org). Using ontology-based reasoning, candidate genes can be inferred for the phenotypes that vary across taxa, thereby uniting genetic and phenotypic data to formulate evo-devo hypotheses. The morphological data in the KB can be browsed, sorted, and aggregated in ways that provide unprecedented possibilities for data mining and discovery.",KB,0.750262052,NA,0,KB,0.750262052,1,23180778,NA,low_prob_best_name,do not remove,do not merge,NA,NA,NA,NA,5/21/2012 +27630202,http://www.kawasakidisease.kr,"Establishment of Kawasaki disease database based on metadata standard. . Kawasaki disease (KD) is a rare disease that occurs predominantly in infants and young children. To identify KD susceptibility genes and to develop a diagnostic test, a specific therapy, or prevention method, collecting KD patients' clinical and genomic data is one of the major issues. For this purpose, Kawasaki Disease Database (KDD) was developed based on the efforts of Korean Kawasaki Disease Genetics Consortium (KKDGC). KDD is a collection of 1292 clinical data and genomic samples of 1283 patients from 13 KKDGC-participating hospitals. Each sample contains the relevant clinical data, genomic DNA and plasma samples isolated from patients' blood, omics data and KD-associated genotype data. Clinical data was collected and saved using the common data elements based on the ISO/IEC 11179 metadata standard. Two genome-wide association study data of total 482 samples and whole exome sequencing data of 12 samples were also collected. In addition, KDD includes the rare cases of KD (16 cases with family history, 46 cases with recurrence, 119 cases with intravenous immunoglobulin non-responsiveness, and 52 cases with coronary artery aneurysm). As the first public database for KD, KDD can significantly facilitate KD studies. All data in KDD can be searchable and downloadable. KDD was implemented in PHP, MySQL and Apache, with all major browsers supported.Database URL: http://www.kawasakidisease.kr.",KDD,0.99066178,Kawasaki Disease Database,0.97202076,KDD,0.99066178,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2016 +23584834,"http://www.allelefrequencies.net, http://www.allelefrequencies.net/diseases","A database for curating the associations between killer cell immunoglobulin-like receptors and diseases in worldwide populations. The killer cell immunoglobulin-like receptors (KIR) play a fundamental role in the innate immune system, through their interactions with human leucocyte antigen (HLA) molecules, leading to the modulation of activity in natural killer (NK) cells, mainly related to killing pathogen-infected cells. KIR genes are hugely polymorphic both in the number of genes an individual carries and in the number of alleles identified. We have previously developed the Allele Frequency Net Database (AFND, http://www.allelefrequencies.net), which captures worldwide frequencies of alleles, genes and haplotypes for several immune genes, including KIR genes, in healthy populations, covering >4 million individuals. Here, we report the creation of a new database within AFND, named KIR and Diseases Database (KDDB), capturing a large quantity of data derived from publications in which KIR genes, alleles, genotypes and/or haplotypes have been associated with infectious diseases (e.g. hepatitis C, HIV, malaria), autoimmune disorders (e.g. type I diabetes, rheumatoid arthritis), cancer and pregnancy-related complications. KDDB has been created through an extensive manual curation effort, extracting data on more than a thousand KIR-disease records, comprising >50 000 individuals. KDDB thus provides a new community resource for understanding not only how KIR genes are associated with disease, but also, by working in tandem with the large data sets already present in AFND, where particular genes, genotypes or haplotypes are present in worldwide populations or different ethnic groups. We anticipate that KDDB will be an important resource for researchers working in immunogenetics. Database URL: http://www.allelefrequencies.net/diseases/.",KDDB,0.99216032,Allele Frequency Net Database,0.929808199,KDDB,0.99216032,1,"25414323.0, 29858801.0, 33755549.0",NA,NA,NA,do not merge,NA,NA,NA,NA,4/12/2013 +"21882442, 23192552",http://www.genome.jp/kegg,"Knowledge-Based Analysis of Protein Interaction Networks in Neurodegenerative Diseases. The large-scale datasets generated by gene sequencing, proteomics, and other high-throughput experimental technologies are the bases for understanding life as a molecular system and for developing medical, industrial, and other practical applications. In order to facilitate bioinformatics analysis of such large-scale datasets, it is essential to organize our knowledge on higher levels of systemic functions in a computable form, so that it can be used as a reference for inferring molecular systems from the information contained in the building blocks. Thus, we have been developing the KEGG (Kyoto Encyclopedia of Genes and Genomes) database (http://www.genome.jp/kegg/), an integrated resource of about 20 databases (1). The main component is the KEGG PATHWAY database, consisting of manually drawn graphical diagrams of molecular networks, called pathway maps, and representing various cellular processes and organism behaviors. KEGG PATHWAY is a reference database for pathway mapping, which is the process to match, for example, a genomic or transcriptomic content of genes against KEGG reference pathway maps to infer systemic functions of the cell or the organism. As part of the KEGG PATHWAY database, we organize disease pathway maps representing our knowledge of causative genes and molecular networks related to them for human diseases, including cancers, immune disorders, neurodegenerative diseases, metabolic disorders, and infectious diseases. Here we focus on neurodegenerative diseases, which were among the first to be made available on the KEGG PATHWAY database. A diverse range of neurodegenerative diseases is commonly characterized by the accumulation of abnormal protein aggregates. Causative genes, including those that produce abnormal proteins, have been identified in various neurodegenerative diseases. The current information is not sufficient to find common molecular mechanisms of the diseases. In this chapter we first present an overview of KEGG, including the KEGG DISEASE and KEGG DRUG databases, and describe the KEGG PATHWAY maps for six neurodegenerative diseases: Alzheimer’s disease (AD), Parkinson’s disease (PD), amyotrophic lateral sclerosis (ALS), Huntington’s disease (HD), dentatorubropallidoluysian atrophy (DRPLA), and prion diseases (PRION). We then present bioinformatics analysis to combine and expand these pathway maps toward identification of common proteins and common interactions, which may lead to a better understanding of common molecular pathogenic mechanisms (2).",KEGG,0.998268664,Kyoto Encyclopedia of Genes and Genomes,0.968456638,KEGG,0.998268664,2,NA,"22080510.0, 26519400.0, 33125081.0, 24214961.0, 26476454.0, 27899662.0, 30321428.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2013 +"26519400, 33125081",http://www.kegg.jp,"KEGG Bioinformatics Resource for Plant Genomics and Metabolomics. In the era of high-throughput biology it is necessary to develop not only elaborate computational methods but also well-curated databases that can be used as reference for data interpretation. KEGG ( http://www.kegg.jp/ ) is such a reference knowledge base with two specific aims. One is to compile knowledge on high-level functions of the cell and the organism in terms of the molecular interaction and reaction networks, which is implemented in KEGG pathway maps, BRITE functional hierarchies, and KEGG modules. The other is to expand knowledge on genes and proteins involved in the molecular networks from experimentally observed organisms to other organisms using the concept of orthologs, which is implemented in the KEGG Orthology (KO) system. Thus, KEGG is a generic resource applicable to all organisms and enables interpretation of high-level functions from genomic and molecular data. Here we first present a brief overview of the entire KEGG resource, and then give an introduction of how to use KEGG in plant genomics and metabolomics research.",KEGG,0.998182952,NA,0,KEGG,0.998182952,2,NA,"22080510.0, 21882442.0, 23192552.0, 24214961.0, 26476454.0, 27899662.0, 30321428.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +"24214961, 26476454, 27899662, 30321428","http://www.kegg.jp/, http://www.genome.jp/kegg","Data, information, knowledge and principle: back to metabolism in KEGG. In the hierarchy of data, information and knowledge, computational methods play a major role in the initial processing of data to extract information, but they alone become less effective to compile knowledge from information. The Kyoto Encyclopedia of Genes and Genomes (KEGG) resource (http://www.kegg.jp/ or http://www.genome.jp/kegg/) has been developed as a reference knowledge base to assist this latter process. In particular, the KEGG pathway maps are widely used for biological interpretation of genome sequences and other high-throughput data. The link from genomes to pathways is made through the KEGG Orthology system, a collection of manually defined ortholog groups identified by K numbers. To better automate this interpretation process the KEGG modules defined by Boolean expressions of K numbers have been expanded and improved. Once genes in a genome are annotated with K numbers, the KEGG modules can be computationally evaluated revealing metabolic capacities and other phenotypic features. The reaction modules, which represent chemical units of reactions, have been used to analyze design principles of metabolic networks and also to improve the definition of K numbers and associated annotations. For translational bioinformatics, the KEGG MEDICUS resource has been developed by integrating drug labels (package inserts) used in society.",KEGG,0.997738063,Kyoto Encyclopedia of Genes and Genomes,0.966118443,KEGG,0.997738063,4,NA,"22080510.0, 21882442.0, 23192552.0, 26519400.0, 33125081.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +22080510,"http://www.genome.jp/kegg/, http://www.kegg.jp","KEGG for integration and interpretation of large-scale molecular data sets. Kyoto Encyclopedia of Genes and Genomes (KEGG, http://www.genome.jp/kegg/ or http://www.kegg.jp/) is a database resource that integrates genomic, chemical and systemic functional information. In particular, gene catalogs from completely sequenced genomes are linked to higher-level systemic functions of the cell, the organism and the ecosystem. Major efforts have been undertaken to manually create a knowledge base for such systemic functions by capturing and organizing experimental knowledge in computable forms; namely, in the forms of KEGG pathway maps, BRITE functional hierarchies and KEGG modules. Continuous efforts have also been made to develop and improve the cross-species annotation procedure for linking genomes to the molecular networks through the KEGG Orthology system. Here we report KEGG Mapper, a collection of tools for KEGG PATHWAY, BRITE and MODULE mapping, enabling integration and interpretation of large-scale data sets. We also report a variant of the KEGG mapping procedure to extend the knowledge base, where different types of data and knowledge, such as disease genes and drug targets, are integrated as part of the KEGG molecular networks. Finally, we describe recent enhancements to the KEGG content, especially the incorporation of disease and drug information used in practice and in society, to support translational bioinformatics.",KEGG,0.995484889,Kyoto Encyclopedia of Genes and Genomes,0.887442343,KEGG,0.995484889,1,NA,"21882442.0, 23192552.0, 26519400.0, 33125081.0, 24214961.0, 26476454.0, 27899662.0, 30321428.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/10/2011 +23193276,http://www.genome.jp/tools/oc,"KEGG OC: a large-scale automatic construction of taxonomy-based ortholog clusters. The identification of orthologous genes in an increasing number of fully sequenced genomes is a challenging issue in recent genome science. Here we present KEGG OC (http://www.genome.jp/tools/oc/), a novel database of ortholog clusters (OCs). The current version of KEGG OC contains 1 176 030 OCs, obtained by clustering 8 357 175 genes in 2112 complete genomes (153 eukaryotes, 1830 bacteria and 129 archaea). The OCs were constructed by applying the quasi-clique-based clustering method to all possible protein coding genes in all complete genomes, based on their amino acid sequence similarities. It is computationally efficient to calculate OCs, which enables to regularly update the contents. KEGG OC has the following two features: (i) It consists of all complete genomes of a wide variety of organisms from three domains of life, and the number of organisms is the largest among the existing databases; and (ii) It is compatible with the KEGG database by sharing the same sets of genes and identifiers, which leads to seamless integration of OCs with useful components in KEGG such as biological pathways, pathway modules, functional hierarchy, diseases and drugs. The KEGG OC resources are accessible via OC Viewer that provides an interactive visualization of OCs at different taxonomic levels.",KEGG OC,0.981904417,NA,0,KEGG OC,0.981904417,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +27789704,http://www.igenomed.org/keris,"KERIS: kaleidoscope of gene responses to inflammation between species. A cornerstone of modern biomedical research is the use of animal models to study disease mechanisms and to develop new therapeutic approaches. In order to help the research community to better explore the similarities and differences of genomic response between human inflammatory diseases and murine models, we developed KERIS: kaleidoscope of gene responses to inflammation between species (available at http://www.igenomed.org/keris/). As of June 2016, KERIS includes comparisons of the genomic response of six human inflammatory diseases (burns, trauma, infection, sepsis, endotoxin and acute respiratory distress syndrome) and matched mouse models, using 2257 curated samples from the Inflammation and the Host Response to Injury Glue Grant studies and other representative studies in Gene Expression Omnibus. A researcher can browse, query, visualize and compare the response patterns of genes, pathways and functional modules across different diseases and corresponding murine models. The database is expected to help biologists choosing models when studying the mechanisms of particular genes and pathways in a disease and prioritizing the translation of findings from disease models into clinical studies.",KERIS,0.998085916,NA,0,KERIS,0.998085916,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/26/2016 +31406905,http://www.grap.udl.cat/publicacions/datasets.html,"KFuji RGB-DS database: Fuji apple multi-modal images for fruit detection with color, depth and range-corrected IR data. This article contains data related to the research article entitle ""Multi-modal Deep Learning for Fruit Detection Using RGB-D Cameras and their Radiometric Capabilities"" [1]. The development of reliable fruit detection and localization systems is essential for future sustainable agronomic management of high-value crops. RGB-D sensors have shown potential for fruit detection and localization since they provide 3D information with color data. However, the lack of substantial datasets is a barrier for exploiting the use of these sensors. This article presents the KFuji RGB-DS database which is composed by 967 multi-modal images of Fuji apples on trees captured using Microsoft Kinect v2 (Microsoft, Redmond, WA, USA). Each image contains information from 3 different modalities: color (RGB), depth (D) and range corrected IR intensity (S). Ground truth fruit locations were manually annotated, labeling a total of 12,839 apples in all the dataset. The current dataset is publicly available at http://www.grap.udl.cat/publicacions/datasets.html.",KFuji,0.965932965,NA,0,KFuji,0.965932965,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,7/19/2019 +26376976,http://kgcak.big.ac.cn/KGCAK,"KGCAK: a K-mer based database for genome-wide phylogeny and complexity evaluation. Background The K-mer approach, treating genomic sequences as simple characters and counting the relative abundance of each string upon a fixed K, has been extensively applied to phylogeny inference for genome assembly, annotation, and comparison. Results To meet increasing demands for comparing large genome sequences and to promote the use of the K-mer approach, we develop a versatile database, KGCAK ( http://kgcak.big.ac.cn/KGCAK/ ), containing ~8,000 genomes that include genome sequences of diverse life forms (viruses, prokaryotes, protists, animals, and plants) and cellular organelles of eukaryotic lineages. It builds phylogeny based on genomic elements in an alignment-free fashion and provides in-depth data processing enabling users to compare the complexity of genome sequences based on K-mer distribution. Conclusion We hope that KGCAK becomes a powerful tool for exploring relationship within and among groups of species in a tree of life based on genomic data.",KGCAK,0.998066127,NA,0,KGCAK,0.998066127,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/16/2015 +23626002,http://biomi.cdc.go.kr/KGVDB,"KGVDB: a population-based genomic map of CNVs tagged by SNPs in Koreans. Summary Despite a growing interest in a correlation between copy number variations (CNVs) and flanking single nucleotide polymorphisms, few databases provide such information. In particular, most information on CNV available so far was obtained in Caucasian and Yoruba populations, and little is known about CNV in Asian populations. This article presents a database that provides CNV regions tagged by single nucleotide polymorphisms in about 4700 Koreans, which were detected under strict quality control, manually curated and experimentally validated. Availability KGVDB is freely available for non-commercial use at http://biomi.cdc.go.kr/KGVDB. Supplementary information Supplementary data are available at Bioinformatics online.",KGVDB,0.986184359,NA,0,KGVDB,0.986184359,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/26/2013 +31180159,http://genomes.vn,"A Vietnamese human genetic variation database. Large scale human genome projects have created tremendous human genome databases for some well-studied populations. Vietnam has about 95 million people (the 14th largest country by population in the world) of which more than 86% are Kinh people. To date, genetic studies for Vietnamese people mostly rely on genetic information from other populations. Building a Vietnamese human genetic variation database is a must for properly interpreting Vietnamese genetic variants. To this end, we sequenced 105 whole genomes and 200 whole exomes of 305 unrelated Kinh Vietnamese (KHV) people. We also included 101 other previously published KHV genomes to build a Vietnamese human genetic variation database of 406 KHV people. The KHV database contains 24.81 million variants (22.47 million single nucleotide polymorphisms (SNPs) and 2.34 million indels) of which 0.71 million variants are novel. It includes more than 99.3% of variants with a frequency of >1% in the KHV population. Noticeably, the KHV database revealed 107 variants reported in the human genome mutation database as pathological mutations with a frequency above 1% in the KHV population. The KHV database (available at https://genomes.vn) would be beneficial for genetic studies and medical applications not only for the Vietnamese population but also for other closely related populations.",KHV,0.773043275,NA,0,KHV,0.773043275,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/3/2019 +21492431,http://www.moseslab.csb.utoronto.ca/KID,"A quantitative literature-curated gold standard for kinase-substrate pairs. We describe the Yeast Kinase Interaction Database (KID, http://www.moseslab.csb.utoronto.ca/KID/), which contains high- and low-throughput data relevant to phosphorylation events. KID includes 6,225 low-throughput and 21,990 high-throughput interactions, from greater than 35,000 experiments. By quantitatively integrating these data, we identified 517 high-confidence kinase-substrate pairs that we consider a gold standard. We show that this gold standard can be used to assess published high-throughput datasets, suggesting that it will enable similar rigorous assessments in the future.",KID,0.998009106,Yeast Kinase Interaction Database,0.994851972,KID,0.998009106,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/14/2011 +23193279,http://gemdock.life.nctu.edu.tw/KIDFamMap,"KIDFamMap: a database of kinase-inhibitor-disease family maps for kinase inhibitor selectivity and binding mechanisms. Kinases play central roles in signaling pathways and are promising therapeutic targets for many diseases. Designing selective kinase inhibitors is an emergent and challenging task, because kinases share an evolutionary conserved ATP-binding site. KIDFamMap (http://gemdock.life.nctu.edu.tw/KIDFamMap/) is the first database to explore kinase-inhibitor families (KIFs) and kinase-inhibitor-disease (KID) relationships for kinase inhibitor selectivity and mechanisms. This database includes 1208 KIFs, 962 KIDs, 55 603 kinase-inhibitor interactions (KIIs), 35 788 kinase inhibitors, 399 human protein kinases, 339 diseases and 638 disease allelic variants. Here, a KIF can be defined as follows: (i) the kinases in the KIF with significant sequence similarity, (ii) the inhibitors in the KIF with significant topology similarity and (iii) the KIIs in the KIF with significant interaction similarity. The KIIs within a KIF are often conserved on some consensus KIDFamMap anchors, which represent conserved interactions between the kinase subsites and consensus moieties of their inhibitors. Our experimental results reveal that the members of a KIF often possess similar inhibition profiles. The KIDFamMap anchors can reflect kinase conformations types, kinase functions and kinase inhibitor selectivity. We believe that KIDFamMap provides biological insights into kinase inhibitor selectivity and binding mechanisms.",KIDFamMap,0.99645108,NA,0,KIDFamMap,0.99645108,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2012 +31201317,http://hbcreports.med.harvard.edu/fmm,"Multi omics analysis of fibrotic kidneys in two mouse models. Kidney fibrosis represents an urgent unmet clinical need due to the lack of effective therapies and an inadequate understanding of the molecular pathogenesis. We have generated a comprehensive and combined multi-omics dataset (proteomics, mRNA and small RNA transcriptomics) of fibrotic kidneys that is searchable through a user-friendly web application: http://hbcreports.med.harvard.edu/fmm/ . Two commonly used mouse models were utilized: a reversible chemical-induced injury model (folic acid (FA) induced nephropathy) and an irreversible surgically-induced fibrosis model (unilateral ureteral obstruction (UUO)). mRNA and small RNA sequencing, as well as 10-plex tandem mass tag (TMT) proteomics were performed with kidney samples from different time points over the course of fibrosis development. The bioinformatics workflow used to process, technically validate, and combine the single omics data will be described. In summary, we present temporal multi-omics data from fibrotic mouse kidneys that are accessible through an interrogation tool (Mouse Kidney Fibromics browser) to provide a searchable transcriptome and proteome for kidney fibrosis researchers.",NA,0,Kidney Fibromics,0.737329566,Kidney Fibromics,0.737329566,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,6/14/2019 +"25115331, 33247931",http://kimosys.org,"KiMoSys: a web-based repository of experimental data for KInetic MOdels of biological SYStems. Background The kinetic modeling of biological systems is mainly composed of three steps that proceed iteratively: model building, simulation and analysis. In the first step, it is usually required to set initial metabolite concentrations, and to assign kinetic rate laws, along with estimating parameter values using kinetic data through optimization when these are not known. Although the rapid development of high-throughput methods has generated much omics data, experimentalists present only a summary of obtained results for publication, the experimental data files are not usually submitted to any public repository, or simply not available at all. In order to automatize as much as possible the steps of building kinetic models, there is a growing requirement in the systems biology community for easily exchanging data in combination with models, which represents the main motivation of KiMoSys development. Description KiMoSys is a user-friendly platform that includes a public data repository of published experimental data, containing concentration data of metabolites and enzymes and flux data. It was designed to ensure data management, storage and sharing for a wider systems biology community. This community repository offers a web-based interface and upload facility to turn available data into publicly accessible, centralized and structured-format data files. Moreover, it compiles and integrates available kinetic models associated with the data.KiMoSys also integrates some tools to facilitate the kinetic model construction process of large-scale metabolic networks, especially when the systems biologists perform computational research. Conclusions KiMoSys is a web-based system that integrates a public data and associated model(s) repository with computational tools, providing the systems biology community with a novel application facilitating data storage and sharing, thus supporting construction of ODE-based kinetic models and collaborative research projects.The web application implemented using Ruby on Rails framework is freely available for web access at http://kimosys.org, along with its full documentation.",KiMoSys,0.998547196,NA,0,KiMoSys,0.998547196,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2020 +25414382,http://kin-driver.leloir.org.ar,"Kin-Driver: a database of driver mutations in protein kinases. Somatic mutations in protein kinases (PKs) are frequent driver events in many human tumors, while germ-line mutations are associated with hereditary diseases. Here we present Kin-driver, the first database that compiles driver mutations in PKs with experimental evidence demonstrating their functional role. Kin-driver is a manual expert-curated database that pays special attention to activating mutations (AMs) and can serve as a validation set to develop new generation tools focused on the prediction of gain-of-function driver mutations. It also offers an easy and intuitive environment to facilitate the visualization and analysis of mutations in PKs. Because all mutations are mapped onto a multiple sequence alignment, analogue positions between kinases can be identified and tentative new mutations can be proposed for studying by transferring annotation. Finally, our database can also be of use to clinical and translational laboratories, helping them to identify uncommon AMs that can correlate with response to new antitumor drugs. The website was developed using PHP and JavaScript, which are supported by all major browsers; the database was built using MySQL server. Kin-driver is available at: http://kin-driver.leloir.org.ar/",Kin-driver,0.993340989,NA,0,Kin-driver,0.993340989,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/19/2014 +33137204,http://bioinfo.uth.edu/kmd,"KinaseMD: kinase mutations and drug response database. Mutations in kinases are abundant and critical to study signaling pathways and regulatory roles in human disease, especially in cancer. Somatic mutations in kinase genes can affect drug treatment, both sensitivity and resistance, to clinically used kinase inhibitors. Here, we present a newly constructed database, KinaseMD (kinase mutations and drug response), to structurally and functionally annotate kinase mutations. KinaseMD integrates 679 374 somatic mutations, 251 522 network-rewiring events, and 390 460 drug response records curated from various sources for 547 kinases. We uniquely annotate the mutations and kinase inhibitor response in four types of protein substructures (gatekeeper, A-loop, G-loop and αC-helix) that are linked to kinase inhibitor resistance in literature. In addition, we annotate functional mutations that may rewire kinase regulatory network and report four phosphorylation signals (gain, loss, up-regulation and down-regulation). Overall, KinaseMD provides the most updated information on mutations, unique annotations of drug response especially drug resistance and functional sites of kinases. KinaseMD is accessible at https://bioinfo.uth.edu/kmd/, having functions for searching, browsing and downloading data. To our knowledge, there has been no systematic annotation of these structural mutations linking to kinase inhibitor response. In summary, KinaseMD is a centralized database for kinase mutations and drug response.",KinaseMD,0.997730494,NA,0,KinaseMD,0.997730494,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +26989151,http://lightning.med.monash.edu/kinetochoreDB2,"KinetochoreDB: a comprehensive online resource for the kinetochore and its related proteins. . KinetochoreDB is an online resource for the kinetochore and its related proteins. It provides comprehensive annotations on 1554 related protein entries in terms of their amino acid sequence, protein domain context, protein 3D structure, predicted intrinsically disordered region, protein-protein interaction, post-translational modification site, functional domain and key metabolic/signaling pathways, integrating several public databases, computational annotations and experimental results. KinetochoreDB provides interactive and customizable search and data display functions that allow users to interrogate the database in an efficient and user-friendly manner. It uses PSI-BLAST searches to retrieve the homologs of all entries and generate multiple sequence alignments that contain important evolutionary information. This knowledgebase also provides annotations of single point mutations for entries with respect to their pathogenicity, which may be useful for generation of new hypotheses on their functions, as well as follow-up studies of human diseases. Database URL: http://lightning.med.monash.edu/kinetochoreDB2/.",KinetochoreDB,0.998664141,NA,0,KinetochoreDB,0.998664141,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/17/2016 +29220464,http://bioinfo.icgeb.res.in/kipho,"KiPho: malaria parasite kinome and phosphatome portal. . The Plasmodium kinases and phosphatases play an essential role in the regulation of substrate reversible-phosphorylation and overall cellular homeostasis. Reversible phosphorylation is one of the key post-translational modifications (PTMs) essential for parasite survival. Thus, a complete and comprehensive information of malarial kinases and phosphatases as a single web resource will not only aid in systematic and better understanding of the PTMs, but also facilitate efforts to look for novel drug targets for malaria. In the current work, we have developed KiPho, a comprehensive and one step web-based information resource for Plasmodium kinases and phosphatases. To develop KiPho, we have made use of search methods to retrieve, consolidate and integrate predicted as well as annotated information from several publically available web repositories. Additionally, we have incorporated relevant and manually curated data, which will be updated from time to time with the availability of new information. The KiPho (Malaria Parasite Kinome-Phosphatome) resource is freely available at http://bioinfo.icgeb.res.in/kipho.",KiPho,0.995313227,Malaria Parasite Kinome-Phosphatome,0.971182257,KiPho,0.995313227,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +26656885,http://bdg.hfut.edu.cn/kir/index.html,"Kiwifruit Information Resource (KIR): a comparative platform for kiwifruit genomics. . The Kiwifruit Information Resource (KIR) is dedicated to maintain and integrate comprehensive datasets on genomics, functional genomics and transcriptomics of kiwifruit (Actinidiaceae). KIR serves as a central access point for existing/new genomic and genetic data. KIR also provides researchers with a variety of visualization and analysis tools. Current developments include the updated genome structure of Actinidia chinensis cv. Hongyang and its newest genome annotation, putative transcripts, gene expression, physical markers of genetic traits as well as relevant publications based on the latest genome assembly. Nine thousand five hundred and forty-seven new transcripts are detected and 21 132 old transcripts are changed. At the present release, the next-generation transcriptome sequencing data has been incorporated into gene models and splice variants. Protein-protein interactions are also identified based on experimentally determined orthologous interactions. Furthermore, the experimental results reported in peer-reviewed literature are manually extracted and integrated within a well-developed query page. In total, 122 identifications are currently associated, including commonly used gene names and symbols. All KIR datasets are helpful to facilitate a broad range of kiwifruit research topics and freely available to the research community. Database URL: http://bdg.hfut.edu.cn/kir/index.html.",KIR,0.991020302,Kiwifruit Information Resource,0.94024086,KIR,0.991020302,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/9/2015 +32821400,http://kiwifruitgenome.org,"Kiwifruit Genome Database (KGD): a comprehensive resource for kiwifruit genomics. Kiwifruit (Actinidia spp.) plants produce economically important fruits containing abundant, balanced phytonutrients with extraordinarily high vitamin C contents. Since the release of the first kiwifruit reference genome sequence in 2013, large volumes of genome and transcriptome data have been rapidly accumulated for a handful of kiwifruit species. To efficiently store, analyze, integrate, and disseminate these large-scale datasets to the research community, we constructed the Kiwifruit Genome Database (KGD; http://kiwifruitgenome.org/). The database currently contains all publicly available genome and gene sequences, gene annotations, biochemical pathways, transcriptome profiles derived from public RNA-Seq datasets, and comparative genomic analysis results such as syntenic blocks and homologous gene pairs between different kiwifruit genome assemblies. A set of user-friendly query interfaces, analysis tools and visualization modules have been implemented in KGD to facilitate translational and applied research in kiwifruit, which include JBrowse, a popular genome browser, and the NCBI BLAST sequence search tool. Other notable tools developed within KGD include a genome synteny viewer and tools for differential gene expression analysis as well as gene ontology (GO) term and pathway enrichment analysis.",KGD,0.986381908,Kiwifruit Genome Database,0.988579522,Kiwifruit Genome Database,0.988579522,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2020 +29097748,http://www.nipgr.res.in/kixbase/home.php,"KIXBASE: A comprehensive web resource for identification and exploration of KIX domains. The KIX domain has emerged in the last two decades as a critical site of interaction for transcriptional assembly, regulation and gene expression. Discovered in 1994, this conserved, triple helical globular domain has been characterised in various coactivator proteins of yeast, mammals and plants, including the p300/CBP (a histone acetyl transferase), MED15 (a subunit of the mediator complex of RNA polymerase II), and RECQL5 helicases. In this work, we describe the first rigorous meta analysis of KIX domains across all forms of life, leading to the development of KIXBASE, a predictive web server and global repository for detection and analysis of KIX domains. To our knowledge, KIXBASE comprises the largest online collection of KIX sequences, enabling assessments at the level of both sequence and structure, incorporating PSIPRED and MUSTER at the backend for further annotation and quality assessment. In addition, KIXBASE provides useful information about critical aspects of KIX domains such as their intrinsic disorder, hydrophobicity profiles, functional classification and annotation based on domain architectures. KIXBASE represents a significant enrichment of the currently annotated KIX dataset, especially in the plant kingdom, thus highlighting potential targets for biochemical characterization. The KIX webserver and database are both freely available to the scientific community, at http://www.nipgr.res.in/kixbase/home.php .",KIXBASE,0.981071115,NA,0,KIXBASE,0.981071115,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/2/2017 +26496949,http://klifs.vu-compmedchem.nl,"KLIFS: a structural kinase-ligand interaction database. Protein kinases play a crucial role in cell signaling and are important drug targets in several therapeutic areas. The KLIFS database contains detailed structural kinase-ligand interaction information derived from all (>2900) structures of catalytic domains of human and mouse protein kinases deposited in the Protein Data Bank in order to provide insights into the structural determinants of kinase-ligand binding and selectivity. The kinase structures have been processed in a consistent manner by systematically analyzing the structural features and molecular interaction fingerprints (IFPs) of a predefined set of 85 binding site residues with bound ligands. KLIFS has been completely rebuilt and extended (>65% more structures) since its first release as a data set, including: novel automated annotation methods for (i) the assessment of ligand-targeted subpockets and the analysis of (ii) DFG and (iii) αC-helix conformations; improved and automated protocols for (iv) the generation of sequence/structure alignments, (v) the curation of ligand atom and bond typing for accurate IFP analysis and (vi) weekly database updates. KLIFS is now accessible via a website (http://klifs.vu-compmedchem.nl) that provides a comprehensive visual presentation of different types of chemical, biological and structural chemogenomics data, and allows the user to easily access, compare, search and download the data.",KLIFS,0.998473823,NA,0,KLIFS,0.998473823,1,NA,33084889,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/22/2015 +33084889,http://klifs.net,"KLIFS: an overhaul after the first 5 years of supporting kinase research. Kinases are a prime target of drug development efforts with >60 drug approvals in the past two decades. Due to the research into this protein family, a wealth of data has been accumulated that keeps on growing. KLIFS-Kinase-Ligand Interaction Fingerprints and Structures-is a structural database focusing on how kinase inhibitors interact with their targets. The aim of KLIFS is to support (structure-based) kinase research through the systematic collection, annotation, and processing of kinase structures. Now, 5 years after releasing the initial KLIFS website, the database has undergone a complete overhaul with a new website, new logo, and new functionalities. In this article, we start by looking back at how KLIFS has been used by the research community, followed by a description of the renewed KLIFS, and conclude with showcasing the functionalities of KLIFS. Major changes include the integration of approved drugs and inhibitors in clinical trials, extension of the coverage to atypical kinases, and a RESTful API for programmatic access. KLIFS is available at the new domain https://klifs.net.",KLIFS,0.995591462,inase-L,0.635523836,KLIFS,0.995591462,1,NA,26496949,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +25907632,http://mutview.dmb.med.keio.ac.jp,"The KM-parkin-DB: A Sub-set MutationView Database Specialized for PARK2 (PARKIN) Variants. We previously isolated PARKIN (PARK2) as a gene responsible for a unique sort of Parkinson disease, namely Autosomal Recessive Juvenile Parkinsonism (ARJP). In this study, we surveyed all the available literature describing PARK2 gene/Parkin protein mutations found in Parkinson disease patients. Only carefully evaluated data were deposited in the graphical database MutationView (http://mutview.dmb.med.keio.ac.jp) to construct KM-parkin-DB, an independent sub-set database. Forty-four articles were selected for data curation regarding clinical information such as ethnic origins, manifested symptoms, onset age, and hereditary patterns as well as mutation details including base changes and zygosity. A total of 366 cases were collected from 39 ethnic origins and 96 pathogenic mutations were found. PARK2 gene mutations were found also in some general Parkinson disease patients. The majority (63%) of mutations in PARK2 were restricted to two particular domains (UBL and RING1) of the Parkin protein. In these domains, two major mutations, a large deletion (DelEx3) and a point mutation (p.Arg275Trp), were located.",KM-parkin-DB,0.96272862,NA,0,KM-parkin-DB,0.96272862,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/3/2015 +23292603,"http://kanaya.naist.jp/KNApSAcK_Family/, http://kanaya.naist.jp/knapsack3d","KNApSAcK-3D: a three-dimensional structure database of plant metabolites. Studies on plant metabolites have attracted significant attention in recent years. Over the past 8 years, we have constructed a unique metabolite database, called KNApSAcK, that contains information on the relationships between metabolites and their expressing organism(s). In the present paper, we introduce KNApSAcK-3D, which contains the three-dimensional (3D) structures of all of the metabolic compounds included in the original KNApSAcK database. The 3D structure for each compound was optimized using the Merck Molecular Force Field (MMFF94), and a multiobjective genetic algorithm was used to search extensively for possible conformations and locate the global minimum. The resulting set of structures may be used for docking studies to identify new and potentially unexpected binding sites for target proteins. The 3D structures may also be utilized for more qualitative studies, such as the estimation of biological activities using 3D-QSAR. The database can be accessed via a link from the KNApSAcK Family website (http://kanaya.naist.jp/KNApSAcK_Family/) or directory at http://kanaya.naist.jp/knapsack3d/.",KNApSAcK,0.997961819,NA,0,KNApSAcK,0.997961819,1,NA,22123792,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/3/2013 +22123792,http://kanaya.naist.jp/KNApSAcK_Family,"KNApSAcK family databases: integrated metabolite-plant species databases for multifaceted plant research. A database (DB) describing the relationships between species and their metabolites would be useful for metabolomics research, because it targets systematic analysis of enormous numbers of organic compounds with known or unknown structures in metabolomics. We constructed an extensive species-metabolite DB for plants, the KNApSAcK Core DB, which contains 101,500 species-metabolite relationships encompassing 20,741 species and 50,048 metabolites. We also developed a search engine within the KNApSAcK Core DB for use in metabolomics research, making it possible to search for metabolites based on an accurate mass, molecular formula, metabolite name or mass spectra in several ionization modes. We also have developed databases for retrieving metabolites related to plants used for a range of purposes. In our multifaceted plant usage DB, medicinal/edible plants are related to the geographic zones (GZs) where the plants are used, their biological activities, and formulae of Japanese and Indonesian traditional medicines (Kampo and Jamu, respectively). These data are connected to the species-metabolites relationship DB within the KNApSAcK Core DB, keyed via the species names. All databases can be accessed via the website http://kanaya.naist.jp/KNApSAcK_Family/. KNApSAcK WorldMap DB comprises 41,548 GZ-plant pair entries, including 222 GZs and 15,240 medicinal/edible plants. The KAMPO DB consists of 336 formulae encompassing 278 medicinal plants; the JAMU DB consists of 5,310 formulae encompassing 550 medicinal plants. The Biological Activity DB consists of 2,418 biological activities and 33,706 pairwise relationships between medicinal plants and their biological activities. Current statistics of the binary relationships between individual databases were characterized by the degree distribution analysis, leading to a prediction of at least 1,060,000 metabolites within all plants. In the future, the study of metabolomics will need to take this huge number of metabolites into consideration.",KNApSAcK,0.885880351,NA,0,KNApSAcK,0.885880351,1,NA,23292603,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/28/2011 +33147622,http://knindex.pufengdu.org,"KNIndex: a comprehensive database of physicochemical properties for k-tuple nucleotides. . With the development of high-throughput sequencing technology, the genomic sequences increased exponentially over the last decade. In order to decode these new genomic data, machine learning methods were introduced for genome annotation and analysis. Due to the requirement of most machines learning methods, the biological sequences must be represented as fixed-length digital vectors. In this representation procedure, the physicochemical properties of k-tuple nucleotides are important information. However, the values of the physicochemical properties of k-tuple nucleotides are scattered in different resources. To facilitate the studies on genomic sequences, we developed the first comprehensive database, namely KNIndex (https://knindex.pufengdu.org), for depositing and visualizing physicochemical properties of k-tuple nucleotides. Currently, the KNIndex database contains 182 properties including one for mononucleotide (DNA), 169 for dinucleotide (147 for DNA and 22 for RNA) and 12 for trinucleotide (DNA). KNIndex database also provides a user-friendly web-based interface for the users to browse, query, visualize and download the physicochemical properties of k-tuple nucleotides. With the built-in conversion and visualization functions, users are allowed to display DNA/RNA sequences as curves of multiple physicochemical properties. We wish that the KNIndex will facilitate the related studies in computational biology.",KNIndex,0.974327445,NA,0,KNIndex,0.974327445,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2021 +31598675,http://www.licpathway.net/KnockTF/index.html,"KnockTF: a comprehensive human gene expression profile database with knockdown/knockout of transcription factors. Transcription factors (TFs) and their target genes have important functions in human diseases and biological processes. Gene expression profile analysis before and after knockdown or knockout is one of the most important strategies for obtaining target genes of TFs and exploring TF functions. Human gene expression profile datasets with TF knockdown and knockout are accumulating rapidly. Based on the urgent need to comprehensively and effectively collect and process these data, we developed KnockTF (http://www.licpathway.net/KnockTF/index.html), a comprehensive human gene expression profile database of TF knockdown and knockout. KnockTF provides a number of resources for human gene expression profile datasets associated with TF knockdown and knockout and annotates TFs and their target genes in a tissue/cell type-specific manner. The current version of KnockTF has 570 manually curated RNA-seq and microarray datasets associated with 308 TFs disrupted by different knockdown and knockout techniques and across multiple tissue/cell types. KnockTF collects upstream pathway information of TFs and functional annotation results of downstream target genes. It provides details about TFs binding to promoters, super-enhancers and typical enhancers of target genes. KnockTF constructs a TF-differentially expressed gene network and performs network analyses for genes of interest. KnockTF will help elucidate TF-related functions and potential biological effects.",KnockTF,0.989506125,NA,0,KnockTF,0.989506125,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +34023905,http://www.lbgi.fr/AnnotSV,"AnnotSV and knotAnnotSV: a web server for human structural variations annotations, ranking and analysis. With the dramatic increase of pangenomic analysis, Human geneticists have generated large amount of genomic data including millions of small variants (SNV/indel) but also thousands of structural variations (SV) mainly from next-generation sequencing and array-based techniques. While the identification of the complete SV repertoire of a patient is getting possible, the interpretation of each SV remains challenging. To help identifying human pathogenic SV, we have developed a web server dedicated to their annotation and ranking (AnnotSV) as well as their visualization and interpretation (knotAnnotSV) freely available at the following address: https://www.lbgi.fr/AnnotSV/. A large amount of annotations from >20 sources is integrated in our web server including among others genes, haploinsufficiency, triplosensitivity, regulatory elements, known pathogenic or benign genomic regions, phenotypic data. An ACMG/ClinGen compliant prioritization module allows the scoring and the ranking of SV into 5 SV classes from pathogenic to benign. Finally, the visualization interface displays the annotated SV in an interactive way including popups, search fields, filtering options, advanced colouring to highlight pathogenic SV and hyperlinks to the UCSC genome browser or other public databases. This web server is designed for diagnostic and research analysis by providing important resources to the user.",knotAnnotSV,0.989982367,NA,0,knotAnnotSV,0.989982367,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +25361973,http://knotprot.cent.uw.edu.pl,"KnotProt: a database of proteins with knots and slipknots. The protein topology database KnotProt, http://knotprot.cent.uw.edu.pl/, collects information about protein structures with open polypeptide chains forming knots or slipknots. The knotting complexity of the cataloged proteins is presented in the form of a matrix diagram that shows users the knot type of the entire polypeptide chain and of each of its subchains. The pattern visible in the matrix gives the knotting fingerprint of a given protein and permits users to determine, for example, the minimal length of the knotted regions (knot's core size) or the depth of a knot, i.e. how many amino acids can be removed from either end of the cataloged protein structure before converting it from a knot to a different type of knot. In addition, the database presents extensive information about the biological functions, families and fold types of proteins with non-trivial knotting. As an additional feature, the KnotProt database enables users to submit protein or polymer chains and generate their knotting fingerprints.",KnotProt,0.995327413,NA,0,KnotProt,0.995327413,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2014 +23837716,http://dbm.dna.affrc.go.jp/px,"KONAGAbase: a genomic and transcriptomic database for the diamondback moth, Plutella xylostella. Background The diamondback moth (DBM), Plutella xylostella, is one of the most harmful insect pests for crucifer crops worldwide. DBM has rapidly evolved high resistance to most conventional insecticides such as pyrethroids, organophosphates, fipronil, spinosad, Bacillus thuringiensis, and diamides. Therefore, it is important to develop genomic and transcriptomic DBM resources for analysis of genes related to insecticide resistance, both to clarify the mechanism of resistance of DBM and to facilitate the development of insecticides with a novel mode of action for more effective and environmentally less harmful insecticide rotation. To contribute to this goal, we developed KONAGAbase, a genomic and transcriptomic database for DBM (KONAGA is the Japanese word for DBM). Description KONAGAbase provides (1) transcriptomic sequences of 37,340 ESTs/mRNAs and 147,370 RNA-seq contigs which were clustered and assembled into 84,570 unigenes (30,695 contigs, 50,548 pseudo singletons, and 3,327 singletons); and (2) genomic sequences of 88,530 WGS contigs with 246,244 degenerate contigs and 106,455 singletons from which 6,310 de novo identified repeat sequences and 34,890 predicted gene-coding sequences were extracted. The unigenes and predicted gene-coding sequences were clustered and 32,800 representative sequences were extracted as a comprehensive putative gene set. These sequences were annotated with BLAST descriptions, Gene Ontology (GO) terms, and Pfam descriptions, respectively. KONAGAbase contains rich graphical user interface (GUI)-based web interfaces for easy and efficient searching, browsing, and downloading sequences and annotation data. Five useful search interfaces consisting of BLAST search, keyword search, BLAST result-based search, GO tree-based search, and genome browser are provided. KONAGAbase is publicly available from our website (http://dbm.dna.affrc.go.jp/px/) through standard web browsers. Conclusions KONAGAbase provides DBM comprehensive transcriptomic and draft genomic sequences with useful annotation information with easy-to-use web interfaces, which helps researchers to efficiently search for target sequences such as insect resistance-related genes. KONAGAbase will be continuously updated and additional genomic/transcriptomic resources and analysis tools will be provided for further efficient analysis of the mechanism of insecticide resistance and the development of effective insecticides with a novel mode of action for DBM.",KONAGAbase,0.980475366,NA,0,KONAGAbase,0.980475366,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/9/2013 +32133509,http://coda.nih.go.kr/coda/KRGDB/index.jsp,"KRGDB: the large-scale variant database of 1722 Koreans based on whole genome sequencing. . Since 2012, the Center for Genome Science of the Korea National Institute of Health (KNIH) has been sequencing complete genomes of 1722 Korean individuals. As a result, more than 32 million variant sites have been identified, and a large proportion of the variant sites have been detected for the first time. In this article, we describe the Korean Reference Genome Database (KRGDB) and its genome browser. The current version of our database contains both single nucleotide and short insertion/deletion variants. The DNA samples were obtained from four different origins and sequenced in different sequencing depths (10× coverage of 63 individuals, 20× coverage of 194 individuals, combined 10× and 20× coverage of 135 individuals, 30× coverage of 230 individuals and 30× coverage of 1100 individuals). The major features of the KRGDB are that it contains information on the Korean genomic variant frequency, frequency difference between the Korean and other populations and the variant functional annotation (such as regulatory elements in ENCODE regions and coding variant functions) of the variant sites. Additionally, we performed the genome-wide association study (GWAS) between Korean genome variant sites for the 30×230 individuals and three major common diseases (diabetes, hypertension and metabolic syndrome). The association results are displayed on our browser. The KRGDB uses the MySQL database and Apache-Tomcat web server adopted with Java Server Page (JSP) and is freely available at http://coda.nih.go.kr/coda/KRGDB/index.jsp. Availability: http://coda.nih.go.kr/coda/KRGDB/index.jsp.",KRGDB,0.981609166,Korean Reference Genome Database,0.956574035,KRGDB,0.981609166,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +21624162,http://www.e-lico.eu/kupkb,"Developing a kidney and urinary pathway knowledge base. Background Chronic renal disease is a global health problem. The identification of suitable biomarkers could facilitate early detection and diagnosis and allow better understanding of the underlying pathology. One of the challenges in meeting this goal is the necessary integration of experimental results from multiple biological levels for further analysis by data mining. Data integration in the life science is still a struggle, and many groups are looking to the benefits promised by the Semantic Web for data integration. Results We present a Semantic Web approach to developing a knowledge base that integrates data from high-throughput experiments on kidney and urine. A specialised KUP ontology is used to tie the various layers together, whilst background knowledge from external databases is incorporated by conversion into RDF. Using SPARQL as a query mechanism, we are able to query for proteins expressed in urine and place these back into the context of genes expressed in regions of the kidney. Conclusions The KUPKB gives KUP biologists the means to ask queries across many resources in order to aggregate knowledge that is necessary for answering biological questions. The Semantic Web technologies we use, together with the background knowledge from the domain's ontologies, allows both rapid conversion and integration of this knowledge base. The KUPKB is still relatively small, but questions remain about scalability, maintenance and availability of the knowledge itself. Availability The KUPKB may be accessed via http://www.e-lico.eu/kupkb.",KUPKB,0.995265484,a kidney and urinary pathway,0.738103741,KUPKB,0.995265484,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/17/2011 +33287903,http://bioinfo.zju.edu.cn/KVarPredDB,"KVarPredDB: a database for predicting pathogenicity of missense sequence variants of keratin genes associated with genodermatoses. Background Germline variants of ten keratin genes (K1, K2, K5, K6A, K6B, K9, K10, K14, K16, and K17) have been reported for causing different types of genodermatoses with an autosomal dominant mode of inheritance. Among all the variants of these ten keratin genes, most of them are missense variants. Unlike pathogenic and likely pathogenic variants, understanding the clinical importance of novel missense variants or variants of uncertain significance (VUS) is the biggest challenge for clinicians or medical geneticists. Functional characterization is the only way to understand the clinical association of novel missense variants or VUS but it is time consuming, costly, and depends on the availability of patient's samples. Existing databases report the pathogenic variants of the keratin genes, but never emphasize the systematic effects of these variants on keratin protein structure and genotype-phenotype correlation. Results To address this need, we developed a comprehensive database KVarPredDB, which contains information of all ten keratin genes associated with genodermatoses. We integrated and curated 400 reported pathogenic missense variants as well as 4629 missense VUS. KVarPredDB predicts the pathogenicity of novel missense variants as well as to understand the severity of disease phenotype, based on four criteria; firstly, the difference in physico-chemical properties between the wild type and substituted amino acids; secondly, the loss of inter/intra-chain interactions; thirdly, evolutionary conservation of the wild type amino acids and lastly, the effect of the substituted amino acids in the heptad repeat. Molecular docking simulations based on resolved crystal structures were adopted to predict stability changes and get the binding energy to compare the wild type protein with the mutated one. We use this basic information to determine the structural and functional impact of novel missense variants on the keratin coiled-coil heterodimer. KVarPredDB was built under the integrative web application development framework SSM (SpringBoot, Spring MVC, MyBatis) and implemented in Java, Bootstrap, React-mutation-mapper, MySQL, Tomcat. The website can be accessed through http://bioinfo.zju.edu.cn/KVarPredDB . The genomic variants and analysis results are freely available under the Creative Commons license. Conclusions KVarPredDB provides an intuitive and user-friendly interface with computational analytical investigation for each missense variant of the keratin genes associated with genodermatoses.",KVarPredDB,0.997992098,VarPredDB,0.663375258,KVarPredDB,0.997992098,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/7/2020 +27924012,http://l1base.charite.de,"L1Base 2: more retrotransposition-active LINE-1s, more mammalian genomes. LINE-1 (L1) insertions comprise as much as 17% of the human genome sequence, and similar proportions have been recorded for other mammalian species. Given the established role of L1 retrotransposons in shaping mammalian genomes, it becomes an important task to track and annotate the sources of this activity: full length elements, able to encode the cis and trans acting components of the retrotransposition machinery. The L1Base database (http://l1base.charite.de) contains annotated full-length sequences of LINE-1 transposons including putatively active L1s. For the new version of L1Base, a LINE-1 annotation tool, L1Xplorer, has been used to mine potentially active L1 retrotransposons from the reference genome sequences of 17 mammals. The current release of the human genome, GRCh38, contains 146 putatively active L1 elements or full length intact L1 elements (FLIs). The newest versions of the mouse, GRCm38 and the rat, Rnor_6.0, genomes contain 2811 and 492 FLIs, respectively. Most likely reflecting the current level of completeness of the genome project, the latest reference sequence of the common chimpanzee genome, PT 2.19, only contains 19 FLIs. Of note, the current assemblies of the dog, CF 3.1 and the sheep, OA 3.1, genomes contain 264 and 598 FLIs, respectively. Further developments in the new version of L1Base include an updated website with implementation of modern web server technologies. including a more responsive design for an improved user experience, as well as the addition of data sharing capabilities for L1Xplorer annotation.",L1Base,0.945930322,NA,0,L1Base,0.945930322,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/18/2016 +25220766,http://ftp.ncbi.nlm.nih.gov/pub/lu/LabeledIn,"LabeledIn: cataloging labeled indications for human drugs. Drug-disease treatment relationships, i.e., which drug(s) are indicated to treat which disease(s), are among the most frequently sought information in PubMed®. Such information is useful for feeding the Google Knowledge Graph, designing computational methods to predict novel drug indications, and validating clinical information in EMRs. Given the importance and utility of this information, there have been several efforts to create repositories of drugs and their indications. However, existing resources are incomplete. Furthermore, they neither label indications in a structured way nor differentiate them by drug-specific properties such as dosage form, and thus do not support computer processing or semantic interoperability. More recently, several studies have proposed automatic methods to extract structured indications from drug descriptions; however, their performance is limited by natural language challenges in disease named entity recognition and indication selection. In response, we report LabeledIn: a human-reviewed, machine-readable and source-linked catalog of labeled indications for human drugs. More specifically, we describe our semi-automatic approach to derive LabeledIn from drug descriptions through human annotations with aids from automatic methods. As the data source, we use the drug labels (or package inserts) submitted to the FDA by drug manufacturers and made available in DailyMed. Our machine-assisted human annotation workflow comprises: (i) a grouping method to remove redundancy and identify representative drug labels to be used for human annotation, (ii) an automatic method to recognize and normalize mentions of diseases in drug labels as candidate indications, and (iii) a two-round annotation workflow for human experts to judge the pre-computed candidates and deliver the final gold standard. In this study, we focused on 250 highly accessed drugs in PubMed Health, a newly developed public web resource for consumers and clinicians on prevention and treatment of diseases. These 250 drugs corresponded to more than 8000 drug labels (500 unique) in DailyMed in which 2950 candidate indications were pre-tagged by an automatic tool. After being reviewed independently by two experts, 1618 indications were selected, and additional 97 (missed by computer) were manually added, with an inter-annotator agreement of 88.35% as measured by the Kappa coefficient. Our final annotation results in LabeledIn consist of 7805 drug-disease treatment relationships where drugs are represented as a triplet of ingredient, dose form, and strength. A systematic comparison of LabeledIn with an existing computer-derived resource revealed significant discrepancies, confirming the need to involve humans in the creation of such a resource. In addition, LabeledIn is unique in that it contains detailed textual context of the selected indications in drug labels, making it suitable for the development of advanced computational methods for the automatic extraction of indications from free text. Finally, motivated by the studies on drug nomenclature and medication errors in EMRs, we adopted a fine-grained drug representation scheme, which enables the automatic identification of drugs with indications specific to certain dose forms or strengths. Future work includes expanding our coverage to more drugs and integration with other resources. The LabeledIn dataset and the annotation guidelines are available at http://ftp.ncbi.nlm.nih.gov/pub/lu/LabeledIn/.",LabeledIn,0.984398663,NA,0,LabeledIn,0.984398663,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/23/2014 +32502232,"http://gitlab.com/vejnar/labxdb, http://labxdb.vejnar.org","LabxDB: versatile databases for genomic sequencing and lab management. Summary Experimental laboratory management and data-driven science require centralized software for sharing information, such as lab collections or genomic sequencing datasets. Although database servers such as PostgreSQL can store such information with multiple-user access, they lack user-friendly graphical and programmatic interfaces for easy data access and inputting. We developed LabxDB, a versatile open-source solution for organizing and sharing structured data. We provide several out-of-the-box databases for deployment in the cloud including simple mutant or plasmid collections and purchase-tracking databases. We also developed a high-throughput sequencing (HTS) database, LabxDB seq, dedicated to storage of hierarchical sample annotations. Scientists can import their own or publicly available HTS data into LabxDB seq to manage them from production to publication. Using LabxDB's programmatic access (REST API), annotations can be easily integrated into bioinformatics pipelines. LabxDB is modular, offering a flexible framework that scientists can leverage to build new database interfaces adapted to their needs. Availability and implementation LabxDB is available at https://gitlab.com/vejnar/labxdb and https://labxdb.vejnar.org for documentation. LabxDB is licensed under the terms of the Mozilla Public License 2.0. Supplementary information Supplementary data are available at Bioinformatics online.",LabxDB,0.991126418,NA,0,LabxDB,0.991126418,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2020 +22570419,http://homingendonuclease.net,"LAHEDES: the LAGLIDADG homing endonuclease database and engineering server. LAGLIDADG homing endonucleases (LHEs) are DNA cleaving enzymes, also termed 'meganucleases' that are employed as gene-targeting reagents. This use of LHEs requires that their DNA specificity be altered to match sequences in genomic targets. The choice of the most appropriate LHE to target a particular gene is facilitated by the growing number of such enzymes with well-characterized activities and structures. 'LAHEDES' (The LAGLIDADG Homing Endonuclease Database and Engineering Server) provides both an online archive of LHEs with validated DNA cleavage specificities and DNA-binding interactions, as well as a tool for the identification of DNA sequences that might be targeted by various LHEs. Searches can be performed using four separate scoring algorithms and user-defined choices of LHE scaffolds. The webserver subsequently provides information regarding clusters of amino acids that should be interrogated during engineering and selection experiments. The webserver is fully open access and can be found at http://homingendonuclease.net.",LAHEDES,0.989177001,LAGLIDADG,0.901116014,LAHEDES,0.989177001,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/8/2012 +23825543,http://biotechlab.fudan.edu.cn/database/lamp,"LAMP: A Database Linking Antimicrobial Peptides. The frequent emergence of drug-resistant bacteria has created an urgent demand for new antimicrobial agents. Traditional methods of novel antibiotic development are almost obsolete. Antimicrobial peptides (AMPs) are now regarded as a potential solution to revive the traditional methods of antibiotic development, although, until now, many AMPs have failed in clinical trials. A comprehensive database of AMPs with information about their antimicrobial activity and cytotoxicity will help promote the process of finding novel AMPs with improved antimicrobial activity and reduced cytotoxicity and eventually accelerate the speed of translating the discovery of new AMPs into clinical or preclinical trials. LAMP, a database linking AMPs, serves as a tool to aid the discovery and design of AMPs as new antimicrobial agents. The current version of LAMP has 5,547 entries, comprising 3,904 natural AMPs and 1,643 synthetic peptides. The database can be queried using either simply keywords or combinatorial conditions searches. Equipped with the detailed antimicrobial activity and cytotoxicity data, the cross-linking and top similar AMPs functions implemented in LAMP will help enhance our current understanding of AMPs and this may speed up the development of new AMPs for medical applications. LAMP is freely available at: http://biotechlab.fudan.edu.cn/database/lamp.",LAMP,0.990910769,NA,0,LAMP,0.990910769,1,NA,23193253,NA,NA,NA,do not merge,NA,NA,NA,6/18/2013 +23193253,http://www.llamp.net,"Library of Apicomplexan Metabolic Pathways: a manually curated database for metabolic pathways of apicomplexan parasites. The Library of Apicomplexan Metabolic Pathways (LAMP, http://www.llamp.net) is a web database that provides near complete mapping from genes to the central metabolic functions for some of the prominent intracellular parasites of the phylum Apicomplexa. This phylum includes the causative agents of malaria, toxoplasmosis and theileriosis-diseases with a huge economic and social impact. A number of apicomplexan genomes have been sequenced, but the accurate annotation of gene function remains challenging. We have adopted an approach called metabolic reconstruction, in which genes are systematically assigned to functions within pathways/networks for Toxoplasma gondii, Neospora caninum, Cryptosporidium and Theileria species, and Babesia bovis. Several functions missing from pathways have been identified, where the corresponding gene for an essential process appears to be absent from the current genome annotation. For each species, LAMP contains interactive diagrams of each pathway, hyperlinked to external resources and annotated with detailed information, including the sources of evidence used. We have also developed a section to highlight the overall metabolic capabilities of each species, such as the ability to synthesize or the dependence on the host for a particular metabolite. We expect this new database will become a valuable resource for fundamental and applied research on the Apicomplexa.",LAMP,0.989832759,Library of Apicomplexan Metabolic Pathways,0.983664009,LAMP,0.989832759,1,NA,23825543,NA,NA,NA,do not merge,NA,NA,NA,11/27/2012 +29179110,http://geomorphology.irpi.cnr.it/tools,"LAND-deFeND - An innovative database structure for landslides and floods and their consequences. Information on historical landslides and floods - collectively called ""geo-hydrological hazards - is key to understand the complex dynamics of the events, to estimate the temporal and spatial frequency of damaging events, and to quantify their impact. A number of databases on geo-hydrological hazards and their consequences have been developed worldwide at different geographical and temporal scales. Of the few available database structures that can handle information on both landslides and floods some are outdated and others were not designed to store, organize, and manage information on single phenomena or on the type and monetary value of the damages and the remediation actions. Here, we present the LANDslides and Floods National Database (LAND-deFeND), a new database structure able to store, organize, and manage in a single digital structure spatial information collected from various sources with different accuracy. In designing LAND-deFeND, we defined four groups of entities, namely: nature-related, human-related, geospatial-related, and information-source-related entities that collectively can describe fully the geo-hydrological hazards and their consequences. In LAND-deFeND, the main entities are the nature-related entities, encompassing: (i) the ""phenomenon"", a single landslide or local inundation, (ii) the ""event"", which represent the ensemble of the inundations and/or landslides occurred in a conventional geographical area in a limited period, and (iii) the ""trigger"", which is the meteo-climatic or seismic cause (trigger) of the geo-hydrological hazards. LAND-deFeND maintains the relations between the nature-related entities and the human-related entities even where the information is missing partially. The physical model of the LAND-deFeND contains 32 tables, including nine input tables, 21 dictionary tables, and two association tables, and ten views, including specific views that make the database structure compliant with the EC INSPIRE and the Floods Directives. The LAND-deFeND database structure is open, and freely available from http://geomorphology.irpi.cnr.it/tools.",LAND-deFeND,0.969560434,LANDslides and Floods National Database,0.881252799,LAND-deFeND,0.969560434,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/24/2017 +32028878,http://zzdlab.com/plappisite/index.php,"PlaPPISite: a comprehensive resource for plant protein-protein interaction sites. BACKGROUND:Protein-protein interactions (PPIs) play very important roles in diverse biological processes. Experimentally validated or predicted PPI data have become increasingly available in diverse plant species. To further explore the biological functions of PPIs, understanding the interaction details of plant PPIs (e.g., the 3D structural contexts of interaction sites) is necessary. By integrating bioinformatics algorithms, interaction details can be annotated at different levels and then compiled into user-friendly databases. In our previous study, we developed AraPPISite, which aimed to provide interaction site information for PPIs in the model plant Arabidopsis thaliana. Considering that the application of AraPPISite is limited to one species, it is very natural that AraPPISite should be evolved into a new database that can provide interaction details of PPIs in multiple plants. DESCRIPTION:PlaPPISite (http://zzdlab.com/plappisite/index.php) is a comprehensive, high-coverage and interaction details-oriented database for 13 plant interactomes. In addition to collecting 121 experimentally verified structures of protein complexes, the complex structures of experimental/predicted PPIs in the 13 plants were also constructed, and the corresponding interaction sites were annotated. For the PPIs whose 3D structures could not be modelled, the associated domain-domain interactions (DDIs) and domain-motif interactions (DMIs) were inferred. To facilitate the reliability assessment of predicted PPIs, the source species of interolog templates, GO annotations, subcellular localizations and gene expression similarities are also provided. JavaScript packages were employed to visualize structures of protein complexes, protein interaction sites and protein interaction networks. We also developed an online tool for homology modelling and protein interaction site annotation of protein complexes. All data contained in PlaPPISite are also freely available on the Download page. CONCLUSION:PlaPPISite provides the plant research community with an easy-to-use and comprehensive data resource for the search and analysis of protein interaction details from the 13 important plant species.",laPPISite,0.994788826,NA,0,laPPISite,0.994788826,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/6/2020 +21498547,http://www.lcced.uni-stuttgart.de,"The Laccase Engineering Database: a classification and analysis system for laccases and related multicopper oxidases. Laccases and their homologues form the protein superfamily of multicopper oxidases (MCO). They catalyze the oxidation of many, particularly phenolic substances, and, besides playing an important role in many cellular activities, are of interest in biotechnological applications. The Laccase Engineering Database (LccED, http://www.lcced.uni-stuttgart.de) was designed to serve as a tool for a systematic sequence-based classification and analysis of the diverse multicopper oxidase protein family. More than 2200 proteins were classified into 11 superfamilies and 56 homologous families. For each family, the LccED provides multiple sequence alignments, phylogenetic trees and family-specific HMM profiles. The integration of structures for 14 different proteins allows a comprehensive comparison of sequences and structures to derive biochemical properties. Among the families, the distribution of the proteins regarding different kingdoms was investigated. The database was applied to perform a comprehensive analysis by MCO- and laccase-specific patterns. The LccED combines information of sequences and structures of MCOs. It serves as a classification tool to assign new proteins to a homologous family and can be applied to investigate sequence-structure-function relationship and to guide protein engineering. Database URL: http://www.lcced.uni-stuttgart.de.",LccED,0.97930038,The Laccase Engineering Database,0.850810488,LccED,0.97930038,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/15/2011 +22267903,http://lcgbase.big.ac.cn/LCGbase,"LCGbase: A Comprehensive Database for Lineage-Based Co-regulated Genes. Animal genes of different lineages, such as vertebrates and arthropods, are well-organized and blended into dynamic chromosomal structures that represent a primary regulatory mechanism for body development and cellular differentiation. The majority of genes in a genome are actually clustered, which are evolutionarily stable to different extents and biologically meaningful when evaluated among genomes within and across lineages. Until now, many questions concerning gene organization, such as what is the minimal number of genes in a cluster and what is the driving force leading to gene co-regulation, remain to be addressed. Here, we provide a user-friendly database-LCGbase (a comprehensive database for lineage-based co-regulated genes)-hosting information on evolutionary dynamics of gene clustering and ordering within animal kingdoms in two different lineages: vertebrates and arthropods. The database is constructed on a web-based Linux-Apache-MySQL-PHP framework and effective interactive user-inquiry service. Compared to other gene annotation databases with similar purposes, our database has three comprehensible advantages. First, our database is inclusive, including all high-quality genome assemblies of vertebrates and representative arthropod species. Second, it is human-centric since we map all gene clusters from other genomes in an order of lineage-ranks (such as primates, mammals, warm-blooded, and reptiles) onto human genome and start the database from well-defined gene pairs (a minimal cluster where the two adjacent genes are oriented as co-directional, convergent, and divergent pairs) to large gene clusters. Furthermore, users can search for any adjacent genes and their detailed annotations. Third, the database provides flexible parameter definitions, such as the distance of transcription start sites between two adjacent genes, which is extendable to genes that flanking the cluster across species. We also provide useful tools for sequence alignment, gene ontology (GO) annotation, promoter identification, gene expression (co-expression), and evolutionary analysis. This database not only provides a way to define lineage-specific and species-specific gene clusters but also facilitates future studies on gene co-regulation, epigenetic control of gene expression (DNA methylation and histone marks), and chromosomal structures in a context of gene clusters and species evolution. LCGbase is freely available at http://lcgbase.big.ac.cn/LCGbase.",LCGbase,0.97235316,NA,0,LCGbase,0.97235316,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/13/2011 +34976312,http://cosbi7.ee.ncku.edu.tw/LCMD,"LCMD: Lung Cancer Metabolome Database. Lung cancer, one of the most common causes of cancer-related death worldwide, has been associated with high treatment cost and imposed great burdens. The 5-year postoperative survival rate of lung cancer (13%) is lower than many other leading cancers indicating the urgent needs to dissect its pathogenic mechanisms and discover specific biomarkers. Although several proteins have been proposed to be potential candidates for the diagnosis of lung cancer, they present low accuracy in clinical settings. Metabolomics has thus emerged as a very promising tool for biomarker discovery. To date, many lung cancer-related metabolites have been highlighted in the literature but no database is available for scientists to retrieve this information. Herein, we construct and introduce the first Lung Cancer Metabolome Database (LCMD), a freely available online database depositing 2013 lung cancer-related metabolites identified from 65 mass spectrometry-based lung cancer metabolomics studies. Researchers are able to explore LCMD via two ways. Firstly, by applying various filters in the ""Browse Metabolites"" mode, users can access a list of lung cancer-related metabolites that satisfy the filter specifications. For each metabolite, users can acquire the value of the fold change (cancer/normal), statistical significance (p-value) of the fold change, and the comparative research designs of all the mass spectrometry-based lung cancer metabolomics studies that identify this metabolite. Secondly, by applying various filters in the ""Browse Studies"" mode, users can obtain a list of mass spectrometry-based lung cancer metabolomics studies that satisfy the filter specifications. For each study, users can view the type of studied specimen, mass spectrometry (MS) method, MS data processing software, and differential analysis method, as well as all the identified lung cancer-related metabolites. Furthermore, the overview of each study is clearly illustrated by a graphical summary. The LCMD (http://cosbi7.ee.ncku.edu.tw/LCMD/) is the first database that brings together the meaningful information of lung cancer-related metabolites. The development of the LCMD is envisioned to promote the biomarker discovery of lung cancer.",LCMD,0.993581444,Lung Cancer Metabolome Database,0.985615373,LCMD,0.993581444,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/7/2021 +31780665,http://www.ebi.ac.uk/metabolights/MTBLS999,"A database of high-resolution MS/MS spectra for lichen metabolites. While analytical techniques in natural products research massively shifted to liquid chromatography-mass spectrometry, lichen chemistry remains reliant on limited analytical methods, Thin Layer Chromatography being the gold standard. To meet the modern standards of metabolomics within lichenochemistry, we announce the publication of an open access MS/MS library with 250 metabolites, coined LDB for Lichen DataBase, providing a comprehensive coverage of lichen chemodiversity. These were donated by the Berlin Garden and Botanical Museum from the collection of Siegfried Huneck to be analyzed by LC-MS/MS. Spectra at individual collision energies were submitted to MetaboLights (https://www.ebi.ac.uk/metabolights/MTBLS999) while merged spectra were uploaded to the GNPS platform (CCMSLIB00004751209 to CCMSLIB00004751517). Technical validation was achieved by dereplicating three lichen extracts using a Molecular Networking approach, revealing the detection of eleven unique molecules that would have been missed without LDB implementation to the GNPS. From a chemist's viewpoint, this database should help streamlining the isolation of formerly unreported metabolites. From a taxonomist perspective, the LDB offers a versatile tool for the chemical profiling of newly reported species.",LDB,0.982422173,Lichen DataBase,0.793945372,LDB,0.982422173,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2019 +29697370,http://histone.scse.ntu.edu.sg/LDSplitDB,"LDSplitDB: a database for studies of meiotic recombination hotspots in MHC using human genomic data. BACKGROUND:Meiotic recombination happens during the process of meiosis when chromosomes inherited from two parents exchange genetic materials to generate chromosomes in the gamete cells. The recombination events tend to occur in narrow genomic regions called recombination hotspots. Its dysregulation could lead to serious human diseases such as birth defects. Although the regulatory mechanism of recombination events is still unclear, DNA sequence polymorphisms have been found to play crucial roles in the regulation of recombination hotspots. METHOD:To facilitate the studies of the underlying mechanism, we developed a database named LDSplitDB which provides an integrative and interactive data mining and visualization platform for the genome-wide association studies of recombination hotspots. It contains the pre-computed association maps of the major histocompatibility complex (MHC) region in the 1000 Genomes Project and the HapMap Phase III datasets, and a genome-scale study of the European population from the HapMap Phase II dataset. Besides the recombination profiles, related data of genes, SNPs and different types of epigenetic modifications, which could be associated with meiotic recombination, are provided for comprehensive analysis. To meet the computational requirement of the rapidly increasing population genomics data, we prepared a lookup table of 400 haplotypes for recombination rate estimation using the well-known LDhat algorithm which includes all possible two-locus haplotype configurations. CONCLUSION:To the best of our knowledge, LDSplitDB is the first large-scale database for the association analysis of human recombination hotspots with DNA sequence polymorphisms. It provides valuable resources for the discovery of the mechanism of meiotic recombination hotspots. The information about MHC in this database could help understand the roles of recombination in human immune system. DATABASE URL: http://histone.scse.ntu.edu.sg/LDSplitDB.",LDSplitDB,0.995853007,NA,0,LDSplitDB,0.995853007,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/20/2018 +29899596,http://lege.ciimar.up.pt,"Cyanobacterial diversity held in microbial biological resource centers as a biotechnological asset: the case study of the newly established LEGE culture collection. Cyanobacteria are a well-known source of bioproducts which renders culturable strains a valuable resource for biotechnology purposes. We describe here the establishment of a cyanobacterial culture collection (CC) and present the first version of the strain catalog and its online database (http://lege.ciimar.up.pt/). The LEGE CC holds 386 strains, mainly collected in coastal (48%), estuarine (11%), and fresh (34%) water bodies, for the most part from Portugal (84%). By following the most recent taxonomic classification, LEGE CC strains were classified into at least 46 genera from six orders (41% belong to the Synechococcales), several of them are unique among the phylogenetic diversity of the cyanobacteria. For all strains, primary data were obtained and secondary data were surveyed and reviewed, which can be reached through the strain sheets either in the catalog or in the online database. An overview on the notable biodiversity of LEGE CC strains is showcased, including a searchable phylogenetic tree and images for all strains. With this work, 80% of the LEGE CC strains have now their 16S rRNA gene sequences deposited in GenBank. Also, based in primary data, it is demonstrated that several LEGE CC strains are a promising source of extracellular polymeric substances (EPS). Through a review of previously published data, it is exposed that LEGE CC strains have the potential or actual capacity to produce a variety of biotechnologically interesting compounds, including common cyanotoxins or unprecedented bioactive molecules. Phylogenetic diversity of LEGE CC strains does not entirely reflect chemodiversity. Further bioprospecting should, therefore, account for strain specificity of the valuable cyanobacterial holdings of LEGE CC.",LEGE,0.943019152,NA,0,LEGE,0.943019152,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/6/2018 +31605615,http://www.legoo.org,"LeGOO: An Expertized Knowledge Database for the Model Legume Medicago truncatula. Medicago truncatula was proposed, about three decades ago, as a model legume to study the Rhizobium-legume symbiosis. It has now been adopted to study a wide range of biological questions, including various developmental processes (in particular root, symbiotic nodule and seed development), symbiotic (nitrogen-fixing and arbuscular mycorrhizal endosymbioses) and pathogenic interactions, as well as responses to abiotic stress. With a number of tools and resources set up in M. truncatula for omics, genetics and reverse genetics approaches, massive amounts of data have been produced, as well as four genome sequence releases. Many of these data were generated with heterogeneous tools, notably for transcriptomics studies, and are consequently difficult to integrate. This issue is addressed by the LeGOO (for Legume Graph-Oriented Organizer) knowledge base (https://www.legoo.org), which finds the correspondence between the multiple identifiers of the same gene. Furthermore, an important goal of LeGOO is to collect and represent biological information from peer-reviewed publications, whatever the technical approaches used to obtain this information. The information is modeled in a graph-oriented database, which enables flexible representation, with currently over 200,000 relations retrieved from 298 publications. LeGOO also provides the user with mining tools, including links to the Mt5.0 genome browser and associated information (on gene functional annotation, expression, methylome, natural diversity and available insertion mutants), as well as tools to navigate through different model species. LeGOO is, therefore, an innovative database that will be useful to the Medicago and legume community to better exploit the wealth of data produced on this model species.",LeGOO,0.99293381,Legume,0.559114456,LeGOO,0.99293381,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +21353266,http://bioinfo.noble.org/manuscript-support/legumedb,"A legume specific protein database (LegProt) improves the number of identified peptides, confidence scores and overall protein identification success rates for legume proteomics. A legume specific protein database (LegProt) has been created containing sequences from seven legume species, i.e., Glycine max, Lotus japonicus, Medicago sativa, Medicago truncatula, Lupinusalbus, Phaseolus vulgaris, and Pisum sativum. The database consists of amino acid sequences translated from predicted gene models and 6-frame translations of tentative consensus (TC) sequences assembled from expressed sequence tags (ESTs) and singleton ESTs. This database was queried using mass spectral data for protein identification and identification success rates were compared to the NCBI nr database. Specifically, Mascot MS/MS ion searches of tandem nano-LC Q-TOFMS/MS mass spectral data showed that relative to the NCBI nr protein database, the LegProt database yielded a 54% increase in the average protein score (i.e., from NCBI nr 480 to LegProt 739) and a 50% increase in the average number of matched peptides (i.e., from NCBI nr 8 to LegProt 12). The overall identification success rate also increased from 88% (NCBI nr) to 93% (LegProt). Mascot peptide mass fingerprinting (PMF) searches of the LegProt database using MALDI-TOFMS data yielded a significant increase in the identification success rate from 19% (NCBI nr) to 34% (LegProt) while the average scores and average number of matched peptides showed insignificant changes. The results demonstrate that the LegProt database significantly increases legume protein identification success rates and the confidence levels compared to the commonly used NCBI nr. These improvements are primarily due to the presence of a large number of legume specific TC sequences in the LegProt database that were not found in NCBI nr. The LegProt database is freely available for download (http://bioinfo.noble.org/manuscript-support/legumedb) and will serve as a valuable resource for legume proteomics.",LegProt,0.981720328,legume,0.68427968,LegProt,0.981720328,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/23/2011 +"22110036, 26578557",http://plantgrn.noble.org/LegumeIP,"LegumeIP: an integrative database for comparative genomics and transcriptomics of model legumes. Legumes play a vital role in maintaining the nitrogen cycle of the biosphere. They conduct symbiotic nitrogen fixation through endosymbiotic relationships with bacteria in root nodules. However, this and other characteristics of legumes, including mycorrhization, compound leaf development and profuse secondary metabolism, are absent in the typical model plant Arabidopsis thaliana. We present LegumeIP (http://plantgrn.noble.org/LegumeIP/), an integrative database for comparative genomics and transcriptomics of model legumes, for studying gene function and genome evolution in legumes. LegumeIP compiles gene and gene family information, syntenic and phylogenetic context and tissue-specific transcriptomic profiles. The database holds the genomic sequences of three model legumes, Medicago truncatula, Glycine max and Lotus japonicus plus two reference plant species, A. thaliana and Populus trichocarpa, with annotations based on UniProt, InterProScan, Gene Ontology and the Kyoto Encyclopedia of Genes and Genomes databases. LegumeIP also contains large-scale microarray and RNA-Seq-based gene expression data. Our new database is capable of systematic synteny analysis across M. truncatula, G. max, L. japonicas and A. thaliana, as well as construction and phylogenetic analysis of gene families across the five hosted species. Finally, LegumeIP provides comprehensive search and visualization tools that enable flexible queries based on gene annotation, gene family, synteny and relative gene expression.",LegumeIP,0.996482372,NA,0,LegumeIP,0.996482372,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +34768782,http://bioinfo.usu.edu/legumeSSRdb,"LegumeSSRdb: A Comprehensive Microsatellite Marker Database of Legumes for Germplasm Characterization and Crop Improvement. . Microsatellites, or simple sequence repeats (SSRs), are polymorphic loci that play a major role as molecular markers for genome analysis and plant breeding. The legume SSR database is a webserver which contains simple sequence repeats (SSRs) from genomes of 13 legume species. A total of 3,706,276 SSRs are present in the database, 698,509 of which are genic SSRs, and 3,007,772 are non-genic. This webserver is an integrated tool to perform end-to-end marker selection right from generating SSRs to designing and validating primers, visualizing the results and blasting the genomic sequences at one place without juggling between several resources. The user-friendly web interface allows users to browse SSRs based on the genomic region, chromosome, motif type, repeat motif sequence, frequency of motif, and advanced searches allow users to search based on chromosome location range and length of SSR. Users can give their desired flanking region around repeat and obtain the sequence, they can explore the genes in which the SSRs are present or the genes between which the SSRs are bound design custom primers, and perform in silico validation using PCR. An SSR prediction pipeline is implemented where the user can submit their genomic sequence to generate SSRs. This webserver will be frequently updated with more species, in time. We believe that legumeSSRdb would be a useful resource for marker-assisted selection and mapping quantitative trait loci (QTLs) to practice genomic selection and improve crop health. The database can be freely accessed at http://bioinfo.usu.edu/legumeSSRdb/.",legumeSSRdb,0.996245384,NA,0,legumeSSRdb,0.996245384,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/21/2021 +25125444,http://biomedinformri.com/leishmicrosat,"LeishMicrosatDB: open source database of repeat sequences detected in six fully sequenced Leishmania genomes. . A Leishmania Microsatellite Database (LeishMicrosatDB) is reported for genome wise mining of microsatellites in six Leishmania species, using in silico techniques. This was created to provide parasitologists a platform to understand the genome characterization, mapping, phylogeny and evolutionary analysis. The present version of the database contains 1,738,669 simple sequence repeats of which 181 s756 repeats are present in compound form. The repeats can be sought in a chromosome using input parameters such as repeat type (mono- hexa), coding status, repeat unit length and repeat sequence motif. The genic repeats have been further hyperlinked with their corresponding locus id, and the database is appended with primer3 plus for primer designing of selected repeats with left and right flanking sequences up to 250 bp. Information on clustering and polymorphic repeats can also be retrieved. This database may also be adopted as a tool to study the relative occurrence and distribution of microsatellites across the parasitic genome. The database can enable a biologist to select markers at desired intervals over the chromosomes, and can be accessed as an open source repository at http://biomedinformri.com/leishmicrosat. http://biomedinformri.com/leishmicrosat.",LeishMicrosatDB,0.997771621,Leishmania Microsatellite Database,0.960049748,LeishMicrosatDB,0.997771621,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/14/2014 +24194591,http://caps.ncbs.res.in/lenvardb,"LenVarDB: database of length-variant protein domains. Protein domains are functionally and structurally independent modules, which add to the functional variety of proteins. This array of functional diversity has been enabled by evolutionary changes, such as amino acid substitutions or insertions or deletions, occurring in these protein domains. Length variations (indels) can introduce changes at structural, functional and interaction levels. LenVarDB (freely available at http://caps.ncbs.res.in/lenvardb/) traces these length variations, starting from structure-based sequence alignments in our Protein Alignments organized as Structural Superfamilies (PASS2) database, across 731 structural classification of proteins (SCOP)-based protein domain superfamilies connected to 2 730 625 sequence homologues. Alignment of sequence homologues corresponding to a structural domain is available, starting from a structure-based sequence alignment of the superfamily. Orientation of the length-variant (indel) regions in protein domains can be visualized by mapping them on the structure and on the alignment. Knowledge about location of length variations within protein domains and their visual representation will be useful in predicting changes within structurally or functionally relevant sites, which may ultimately regulate protein function. Non-technical summary: Evolutionary changes bring about natural changes to proteins that may be found in many organisms. Such changes could be reflected as amino acid substitutions or insertions-deletions (indels) in protein sequences. LenVarDB is a database that provides an early overview of observed length variations that were set among 731 protein families and after examining >2 million sequences. Indels are followed up to observe if they are close to the active site such that they can affect the activity of proteins. Inclusion of such information can aid the design of bioengineering experiments.",LenVarDB,0.996493101,Protein Alignments organized as Structural Superfamilies,0.726639379,LenVarDB,0.996493101,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/4/2013 +23262288,http://bioinformatics.biol.uoa.gr/LepChorionDB,"LepChorionDB, a database of Lepidopteran chorion proteins and a set of tools useful for the identification of chorion proteins in Lepidopteran proteomes. Chorion proteins of Lepidoptera have a tripartite structure, which consists of a central domain and two, more variable, flanking arms. The central domain is highly conserved and it is used for the classification of chorion proteins into two major classes, A and B. Annotated and unreviewed Lepidopteran chorion protein sequences are available in various databases. A database, named LepChorionDB, was constructed by searching 5 different protein databases using class A and B central domain-specific profile Hidden Markov Models (pHMMs), developed in this work. A total of 413 Lepidopteran chorion proteins from 9 moths and 1 butterfly species were retrieved. These data were enriched and organised in order to populate LepChorionDB, the first relational database, available on the web, containing Lepidopteran chorion proteins grouped in A and B classes. LepChorionDB may provide insights in future functional and evolutionary studies of Lepidopteran chorion proteins and thus, it will be a useful tool for the Lepidopteran scientific community and Lepidopteran genome annotators, since it also provides access to the two pHMMs developed in this work, which may be used to discriminate A and B class chorion proteins. LepChorionDB is freely available at http://bioinformatics.biol.uoa.gr/LepChorionDB.",LepChorionDB,0.994472027,NA,0,LepChorionDB,0.994472027,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/20/2012 +29905762,http://leptonet.org.in,"LeptoDB: an integrated database of genomics and proteomics resource of Leptospira. . Leptospirosis is a potentially fatal zoo-anthroponosis caused by pathogenic species of Leptospira belonging to the family of Leptospiraceae, with a worldwide distribution and effect, in terms of its burden and risk to human health. The 'LeptoDB' is a single window dedicated architecture (5 948 311 entries), modeled using heterogeneous data as a core resource for global Leptospira species. LeptoDB facilitates well-structured knowledge of genomics, proteomics and therapeutic aspects with more than 500 assemblies including 17 complete and 496 draft genomes encoding 1.7 million proteins for 23 Leptospira species with more than 250 serovars comprising pathogenic, intermediate and saprophytic strains. Also, it seeks to be a dynamic compendium for therapeutically essential components such as epitope, primers, CRISPR/Cas9 and putative drug targets. Integration of JBrowse provides elaborated locus centric description of sequence or contig. Jmol for structural visualization of protein structures, MUSCLE for interactive multiple sequence alignment annotation and analysis. The data on genomic islands will definitely provide an understanding of virulence and pathogenicity. Phylogenetics analysis integrated suggests the evolutionary division of strains. Easily accessible on a public web server, we anticipate wide use of this metadata on Leptospira for the development of potential therapeutics.Database URL: http://leptonet.org.in.",LeptoDB,0.995939851,NA,0,LeptoDB,0.995939851,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +32128558,http://tdb.ccmb.res.in/LeukmiR,"LeukmiR: a database for miRNAs and their targets in acute lymphoblastic leukemia. . Acute lymphoblastic leukemia (ALL) is one of the most common hematological malignancies in children. Recent studies suggest the involvement of multiple microRNAs in the tumorigenesis of various leukemias. However, until now, no comprehensive database exists for miRNAs and their cognate target genes involved specifically in ALL. Therefore, we developed 'LeukmiR' a dynamic database comprising in silico predicted microRNAs, and experimentally validated miRNAs along with the target genes they regulate in mouse and human. LeukmiR is a user-friendly platform with search strings for ALL-associated microRNAs, their sequences, description of target genes, their location on the chromosomes and the corresponding deregulated signaling pathways. For the user query, different search modules exist where either quick search can be carried out using any fuzzy term or by providing exact terms in specific modules. All entries for both human and mouse genomes can be retrieved through multiple options such as miRNA ID, their accession number, sequence, target genes, Ensemble-ID or Entrez-ID. User can also access miRNA: mRNA interaction networks in different signaling pathways, the genomic location of the targeted regions such as 3'UTR, 5'UTR and exons with their gene ontology and disease ontology information in both human and mouse systems. Herein, we also report 51 novel microRNAs which are not described earlier for ALL. Thus, LeukmiR database will be a valuable source of information for researchers to understand and investigate miRNAs and their targets with diagnostic and therapeutic potential in ALL. Database URL: http://tdb.ccmb.res.in/LeukmiR/.",LeukmiR,0.997031927,NA,0,LeukmiR,0.997031927,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +23874394,http://discovery.lifemapsc.com,"LifeMap Discovery™: the embryonic development, stem cells, and regenerative medicine research portal. LifeMap Discovery™ provides investigators with an integrated database of embryonic development, stem cell biology and regenerative medicine. The hand-curated reconstruction of cell ontology with stem cell biology; including molecular, cellular, anatomical and disease-related information, provides efficient and easy-to-use, searchable research tools. The database collates in vivo and in vitro gene expression and guides translation from in vitro data to the clinical utility, and thus can be utilized as a powerful tool for research and discovery in stem cell biology, developmental biology, disease mechanisms and therapeutic discovery. LifeMap Discovery is freely available to academic nonprofit institutions at http://discovery.lifemapsc.com.",LifeMap Discovery,0.972304722,NA,0,LifeMap Discovery,0.972304722,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/17/2013 +27493549,http://ligandbox.protein.osaka-u.ac.jp/ligandbox,"LigandBox: A database for 3D structures of chemical compounds. A database for the 3D structures of available compounds is essential for the virtual screening by molecular docking. We have developed the LigandBox database (http://ligandbox.protein.osaka-u.ac.jp/ligandbox/) containing four million available compounds, collected from the catalogues of 37 commercial suppliers, and approved drugs and biochemical compounds taken from KEGG_DRUG, KEGG_COMPOUND and PDB databases. Each chemical compound in the database has several 3D conformers with hydrogen atoms and atomic charges, which are ready to be docked into receptors using docking programs. The 3D conformations were generated using our molecular simulation program package, myPresto. Various physical properties, such as aqueous solubility (LogS) and carcinogenicity have also been calculated to characterize the ADME-Tox properties of the compounds. The Web database provides two services for compound searches: a property/chemical ID search and a chemical structure search. The chemical structure search is performed by a descriptor search and a maximum common substructure (MCS) search combination, using our program kcombu. By specifying a query chemical structure, users can find similar compounds among the millions of compounds in the database within a few minutes. Our database is expected to assist a wide range of researchers, in the fields of medical science, chemical biology, and biochemistry, who are seeking to discover active chemical compounds by the virtual screening.",LigandBox,0.997803092,NA,0,LigandBox,0.997803092,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/7/2013 +33709443,http://limonada.univ-reims.fr,"LIMONADA: A database dedicated to the simulation of biological membranes. Cellular membranes are composed of a wide diversity of lipid species in varying proportions and these compositions are representative of the organism, cellular type and organelle to which they belong. Because models of these molecular systems simulated by MD steadily gain in size and complexity, they are increasingly representative of specific compositions and behaviors of biological membranes. Due to the number of lipid species involved, of force fields and topologies and because of the complexity of membrane objects that have been simulated, LIMONADA has been developed as an open database allowing to handle the various aspects of lipid membrane simulation. LIMONADA presents published membrane patches with their simulation files and the cellular membrane it models. Their compositions are then detailed based on the lipid identification from LIPID MAPS database plus the lipid topologies and the force field used. LIMONADA is freely accessible on the web at https://limonada.univ-reims.fr/.",LIMONADA,0.994873464,NA,0,LIMONADA,0.994873464,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/11/2021 +26508761,http://openbis-eln-lims.ethz.ch,"openBIS ELN-LIMS: an open-source database for academic laboratories. Unlabelled The open-source platform openBIS (open Biology Information System) offers an Electronic Laboratory Notebook and a Laboratory Information Management System (ELN-LIMS) solution suitable for the academic life science laboratories. openBIS ELN-LIMS allows researchers to efficiently document their work, to describe materials and methods and to collect raw and analyzed data. The system comes with a user-friendly web interface where data can be added, edited, browsed and searched. Availability and implementation The openBIS software, a user guide and a demo instance are available at https://openbis-eln-lims.ethz.ch. The demo instance contains some data from our laboratory as an example to demonstrate the possibilities of the ELN-LIMS (Ottoz et al., 2014). For rapid local testing, a VirtualBox image of the ELN-LIMS is also available.",LIMS,0.736932799,NA,0,LIMS,0.736932799,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/27/2015 +23793747,http://www.bioinfo.tsinghua.edu.cn,"Linc2GO: a human LincRNA function annotation resource based on ceRNA hypothesis. Unlabelled Large numbers of long intergenic non-coding RNA (lincRNA) have been detected through high-throughput sequencing technology. However, currently we still know very little about their functions. Therefore, a lincRNA function annotation database is needed to facilitate the study in this field. In this article, we present Linc2GO, a web resource that aims to provide comprehensive functional annotations for human lincRNA. MicroRNA-mRNA and microRNA-lincRNA interaction data were integrated to generate lincRNA functional annotations based on the 'competing endogenous RNA hypothesis'. To the best of our knowledge, Linc2GO is the first database that makes use of the 'competing endogenous RNA hypothesis' to predict lincRNA functions. Availability Freely available at http://www.bioinfo.tsinghua.edu.cn/~liuke/Linc2GO/index.html",Linc2GO,0.995874822,NA,0,Linc2GO,0.995874822,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/22/2013 +31701147,http://lincsportal.ccs.miami.edu/signatures,"LINCS Data Portal 2.0: next generation access point for perturbation-response signatures. The Library of Integrated Network-Based Cellular Signatures (LINCS) is an NIH Common Fund program with the goal of generating a large-scale and comprehensive catalogue of perturbation-response signatures by utilizing a diverse collection of perturbations across many model systems and assay types. The LINCS Data Portal (LDP) has been the primary access point for the compendium of LINCS data and has been widely utilized. Here, we report the first major update of LDP (http://lincsportal.ccs.miami.edu/signatures) with substantial changes in the data architecture and APIs, a completely redesigned user interface, and enhanced curated metadata annotations to support more advanced, intuitive and deeper querying, exploration and analysis capabilities. The cornerstone of this update has been the decision to reprocess all high-level LINCS datasets and make them accessible at the data point level enabling users to directly access and download any subset of signatures across the entire library independent from the originating source, project or assay. Access to the individual signatures also enables the newly implemented signature search functionality, which utilizes the iLINCS platform to identify conditions that mimic or reverse gene set queries. A newly designed query interface enables global metadata search with autosuggest across all annotations associated with perturbations, model systems, and signatures.",LINCS,0.992289344,of Integrated Network-Based Cellular Signatures,0.737134829,LINCS,0.992289344,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +"24885522, 27924020, 33219661",http://bioinfo.hrbmu.edu.cn/LincSNP,"LincSNP: a database of linking disease-associated SNPs to human large intergenic non-coding RNAs. Background Genome-wide association studies (GWAS) have successfully identified a large number of single nucleotide polymorphisms (SNPs) that are associated with a wide range of human diseases. However, many of these disease-associated SNPs are located in non-coding regions and have remained largely unexplained. Recent findings indicate that disease-associated SNPs in human large intergenic non-coding RNA (lincRNA) may lead to susceptibility to diseases through their effects on lincRNA expression. There is, therefore, a need to specifically record these SNPs and annotate them as potential candidates for disease. Description We have built LincSNP, an integrated database, to identify and annotate disease-associated SNPs in human lincRNAs. The current release of LincSNP contains approximately 140,000 disease-associated SNPs (or linkage disequilibrium SNPs), which can be mapped to around 5,000 human lincRNAs, together with their comprehensive functional annotations. The database also contains annotated, experimentally supported SNP-lincRNA-disease associations and disease-associated lincRNAs. It provides flexible search options for data extraction and searches can be performed by disease/phenotype name, SNP ID, lincRNA name and chromosome region. In addition, we provide users with a link to download all the data from LincSNP and have developed a web interface for the submission of novel identified SNP-lincRNA-disease associations. Conclusions The LincSNP database aims to integrate disease-associated SNPs and human lincRNAs, which will be an important resource for the investigation of the functions and mechanisms of lincRNAs in human disease. The database is available at http://bioinfo.hrbmu.edu.cn/LincSNP.",LincSNP,0.997198164,NA,0,LincSNP,0.997198164,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +34378177,http://linguapix.uni-mannheim.de,"LinguaPix database: A megastudy of picture-naming norms. The major aim of the present megastudy of picture-naming norms was to address the shortcomings of the available picture data sets used in psychological and linguistic research by creating a new database of normed colour images that researchers from around the world can rely upon in their investigations. In order to do this, we employed a new form of normative study, namely a megastudy, whereby 1620 colour photographs of items spanning across 42 semantic categories were named and rated by a group of German speakers. This was done to establish the following linguistic norms: speech onset times (SOT), name agreement, accuracy, familiarity, visual complexity, valence, and arousal. The data, including over 64,000 audio files, were used to create the LinguaPix database of pictures, audio recordings, and linguistic norms, which to our knowledge, is the largest available research tool of its kind ( http://linguapix.uni-mannheim.de ). In this paper, we present the tool and the analysis of the major variables.",LinguaPix,0.955935061,NA,0,LinguaPix,0.955935061,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/10/2021 +29136207,http://www.linkedomics.org,"LinkedOmics: analyzing multi-omics data within and across 32 cancer types. The LinkedOmics database contains multi-omics data and clinical data for 32 cancer types and a total of 11 158 patients from The Cancer Genome Atlas (TCGA) project. It is also the first multi-omics database that integrates mass spectrometry (MS)-based global proteomics data generated by the Clinical Proteomic Tumor Analysis Consortium (CPTAC) on selected TCGA tumor samples. In total, LinkedOmics has more than a billion data points. To allow comprehensive analysis of these data, we developed three analysis modules in the LinkedOmics web application. The LinkFinder module allows flexible exploration of associations between a molecular or clinical attribute of interest and all other attributes, providing the opportunity to analyze and visualize associations between billions of attribute pairs for each cancer cohort. The LinkCompare module enables easy comparison of the associations identified by LinkFinder, which is particularly useful in multi-omics and pan-cancer analyses. The LinkInterpreter module transforms identified associations into biological understanding through pathway and network analysis. Using five case studies, we demonstrate that LinkedOmics provides a unique platform for biologists and clinicians to access, analyze and compare cancer multi-omics data within and across tumor types. LinkedOmics is freely available at http://www.linkedomics.org.",LinkedOmics,0.990786731,NA,0,LinkedOmics,0.990786731,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +27794552,http://linkprot.cent.uw.edu.pl,"LinkProt: a database collecting information about biological links. Protein chains are known to fold into topologically complex shapes, such as knots, slipknots or complex lassos. This complex topology of the chain can be considered as an additional feature of a protein, separate from secondary and tertiary structures. Moreover, the complex topology can be defined also as one additional structural level. The LinkProt database (http://linkprot.cent.uw.edu.pl) collects and displays information about protein links - topologically non-trivial structures made by up to four chains and complexes of chains (e.g. in capsids). The database presents deterministic links (with loops closed, e.g. by two disulfide bonds), links formed probabilistically and macromolecular links. The structures are classified according to their topology and presented using the minimal surface area method. The database is also equipped with basic tools which allow users to analyze the topology of arbitrary (bio)polymers.",LinkProt,0.987855673,NA,0,LinkProt,0.987855673,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2016 +34415996,http://mahshaaban.shinyapps.io/LINPSAPP,"LINPS: a database for cancer-cell-specific perturbations of biological networks. . Screening for potential cancer therapies using existing large datasets of drug perturbations requires expertise and resources not available to all. This is often a barrier for lab scientists to tap into these valuable resources. To address these issues, one can take advantage of prior knowledge especially those coded in standard formats such as causal biological networks (CBN). Large datasets can be converted into appropriate structures, analyzed once and the results made freely available in easy-to-use formats. We used the Library of Integrated Cellular Signatures to model the cell-specific effect of hundreds of drug treatments on gene expression. These signatures were then used to predict the effect of the treatments on several CBN using the network perturbation amplitudes analysis. We packaged the pre-computed scores in a database with an interactive web interface. The intuitive user-friendly interface can be used to query the database for drug perturbations and quantify their effect on multiple key biological functions in cancer cell lines. In addition to describing the process of building the database and the interface, we provide a realistic use case to explain how to use and interpret the results. To sum, we pre-computed cancer-cell-specific perturbation amplitudes of several biological networks and made the output available in a database with an interactive web interface. Database URL https://mahshaaban.shinyapps.io/LINPSAPP/.",LINPS,0.818079352,NA,0,LINPS,0.818079352,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/1/2021 +22112530,http://www.lipabase-pfba-tun.org,"LIPABASE: a database for 'true' lipase family enzymes. Lipase enzymes play an important role in lipid metabolism and are produced by a variety of species. Compared with animal, bacterial and fungal, little is known about plant lipases. Although lipases belong to many different protein families, they have the same architecture, the ?/?-hydrolase fold and a conserved active site signature, the Gly-Xaa-Ser-Xaa-Gly motif. Several studies on enzymatic activity and interfacial activation phenomenon of lipases confirm the presence of consensus sequence and a conserved domain. Lipases can be divided into two main groups: carboxylesterases (EC 3.1.1.1); 'true' lipases (EC 3.1.1.3), which differ in several biochemical features, which allow us to develop a database that regroups all 'true' lipase proprieties to establish relationship between structure and function. LIPABASE is a centralised resource database, which provides information about 'true' lipase from different species. It includes general, taxonomic, physicochemical and molecular data. Access to LIPABASE is free and available at http://www.lipabase-pfba-tun.org.",LIPABASE,0.963444293,NA,0,LIPABASE,0.963444293,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2011 +23667450,http://www.ebi.ac.uk/apweiler-srv/lipidhome,"LipidHome: a database of theoretical lipids optimized for high throughput mass spectrometry lipidomics. Protein sequence databases are the pillar upon which modern proteomics is supported, representing a stable reference space of predicted and validated proteins. One example of such resources is UniProt, enriched with both expertly curated and automatic annotations. Taken largely for granted, similar mature resources such as UniProt are not available yet in some other ""omics"" fields, lipidomics being one of them. While having a seasoned community of wet lab scientists, lipidomics lies significantly behind proteomics in the adoption of data standards and other core bioinformatics concepts. This work aims to reduce the gap by developing an equivalent resource to UniProt called 'LipidHome', providing theoretically generated lipid molecules and useful metadata. Using the 'FASTLipid' Java library, a database was populated with theoretical lipids, generated from a set of community agreed upon chemical bounds. In parallel, a web application was developed to present the information and provide computational access via a web service. Designed specifically to accommodate high throughput mass spectrometry based approaches, lipids are organised into a hierarchy that reflects the variety in the structural resolution of lipid identifications. Additionally, cross-references to other lipid related resources and papers that cite specific lipids were used to annotate lipid records. The web application encompasses a browser for viewing lipid records and a 'tools' section where an MS1 search engine is currently implemented. LipidHome can be accessed at http://www.ebi.ac.uk/apweiler-srv/lipidhome.",LipidHome,0.980949104,NA,0,LipidHome,0.980949104,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/7/2013 +29648583,http://lipidpedia.cmdm.tw,"LipidPedia: a comprehensive lipid knowledgebase. Motivation Lipids are divided into fatty acyls, glycerolipids, glycerophospholipids, sphingolipids, saccharolipids, sterols, prenol lipids and polyketides. Fatty acyls and glycerolipids are commonly used as energy storage, whereas glycerophospholipids, sphingolipids, sterols and saccharolipids are common used as components of cell membranes. Lipids in fatty acyls, glycerophospholipids, sphingolipids and sterols classes play important roles in signaling. Although more than 36 million lipids can be identified or computationally generated, no single lipid database provides comprehensive information on lipids. Furthermore, the complex systematic or common names of lipids make the discovery of related information challenging. Results Here, we present LipidPedia, a comprehensive lipid knowledgebase. The content of this database is derived from integrating annotation data with full-text mining of 3923 lipids and more than 400 000 annotations of associated diseases, pathways, functions and locations that are essential for interpreting lipid functions and mechanisms from over 1 400 000 scientific publications. Each lipid in LipidPedia also has its own entry containing a text summary curated from the most frequently cited diseases, pathways, genes, locations, functions, lipids and experimental models in the biomedical literature. LipidPedia aims to provide an overall synopsis of lipids to summarize lipid annotations and provide a detailed listing of references for understanding complex lipid functions and mechanisms. Availability and implementation LipidPedia is available at http://lipidpedia.cmdm.tw. Supplementary information Supplementary data are available at Bioinformatics online.",LipidPedia,0.990220785,NA,0,LipidPedia,0.990220785,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2018 +30357370,http://bioinfo5.ugr.es/liqdb,"liqDB: a small-RNAseq knowledge discovery database for liquid biopsy studies. MiRNAs are important regulators of gene expression and are frequently deregulated under pathologic conditions. They are highly stable in bodily fluids which makes them feasible candidates to become minimally invasive biomarkers. In fact, several studies already proposed circulating miRNA-based biomarkers for different types of neoplastic, cardiovascular and degenerative diseases. However, many of these studies rely on small RNA sequencing experiments that are based on different RNA extraction and processing protocols, rendering results incomparable. We generated liqDB, a database for liquid biopsy small RNA sequencing profiles that provides users with meaningful information to guide their small RNA liquid biopsy research and to overcome technical and conceptual problems. By means of a user-friendly web interface, miRNA expression profiles from 1607 manually annotated samples can be queried and explored at different levels. Result pages include downloadable expression matrices, differential expression analysis, most stably expressed miRNAs, cluster analysis and relevant visualizations by means of boxplots and heatmaps. We anticipate that liqDB will be a useful tool in liquid biopsy research as it provides a consistently annotated large compilation of experiments together with tools for reproducible analysis, comparison and hypothesis generation. LiqDB is available at http://bioinfo5.ugr.es/liqdb.",liqDB,0.997288704,NA,0,liqDB,0.997288704,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +26546515,"http://legumeinfo.org, http://legumefederation.org","Legume information system (LegumeInfo.org): a key component of a set of federated data resources for the legume family. Legume Information System (LIS), at http://legumeinfo.org, is a genomic data portal (GDP) for the legume family. LIS provides access to genetic and genomic information for major crop and model legumes. With more than two-dozen domesticated legume species, there are numerous specialists working on particular species, and also numerous GDPs for these species. LIS has been redesigned in the last three years both to better integrate data sets across the crop and model legumes, and to better accommodate specialized GDPs that serve particular legume species. To integrate data sets, LIS provides genome and map viewers, holds synteny mappings among all sequenced legume species and provides a set of gene families to allow traversal among orthologous and paralogous sequences across the legumes. To better accommodate other specialized GDPs, LIS uses open-source GMOD components where possible, and advocates use of common data templates, formats, schemas and interfaces so that data collected by one legume research community are accessible across all legume GDPs, through similar interfaces and using common APIs. This federated model for the legumes is managed as part of the 'Legume Federation' project (accessible via http://legumefederation.org), which can be thought of as an umbrella project encompassing LIS and other legume GDPs.",LIS,0.903893352,Legume Information System,0.759577766,LIS,0.903893352,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/5/2015 +26444974,http://listeria.um.edu.my,"Development of ListeriaBase and comparative analysis of Listeria monocytogenes. Background Listeria consists of both pathogenic and non-pathogenic species. Reports of similarities between the genomic content between some pathogenic and non-pathogenic species necessitates the investigation of these species at the genomic level to understand the evolution of virulence-associated genes. With Listeria genome data growing exponentially, comparative genomic analysis may give better insights into evolution, genetics and phylogeny of Listeria spp., leading to better management of the diseases caused by them. Description With this motivation, we have developed ListeriaBase, a web Listeria genomic resource and analysis platform to facilitate comparative analysis of Listeria spp. ListeriaBase currently houses 850,402 protein-coding genes, 18,113 RNAs and 15,576 tRNAs from 285 genome sequences of different Listeria strains. An AJAX-based real time search system implemented in ListeriaBase facilitates searching of this huge genomic data. Our in-house designed comparative analysis tools such as Pairwise Genome Comparison (PGC) tool allowing comparison between two genomes, Pathogenomics Profiling Tool (PathoProT) for comparing the virulence genes, and ListeriaTree for phylogenic classification, were customized and incorporated in ListeriaBase facilitating comparative genomic analysis of Listeria spp. Interestingly, we identified a unique genomic feature in the L. monocytogenes genomes in our analysis. The Auto protein sequences of the serotype 4 and the non-serotype 4 strains of L. monocytogenes possessed unique sequence signatures that can differentiate the two groups. We propose that the aut gene may be a potential gene marker for differentiating the serotype 4 strains from other serotypes of L. monocytogenes. Conclusions ListeriaBase is a useful resource and analysis platform that can facilitate comparative analysis of Listeria for the scientific communities. We have successfully demonstrated some key utilities of ListeriaBase. The knowledge that we obtained in the analyses of L. monocytogenes may be important for functional works of this human pathogen in future. ListeriaBase is currently available at http://listeria.um.edu.my .",ListeriaBase,0.993109643,NA,0,ListeriaBase,0.993109643,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/6/2015 +33166392,http://www.ncbi.nlm.nih.gov/research/coronavirus,"LitCovid: an open database of COVID-19 literature. Since the outbreak of the current pandemic in 2020, there has been a rapid growth of published articles on COVID-19 and SARS-CoV-2, with about 10,000 new articles added each month. This is causing an increasingly serious information overload, making it difficult for scientists, healthcare professionals and the general public to remain up to date on the latest SARS-CoV-2 and COVID-19 research. Hence, we developed LitCovid (https://www.ncbi.nlm.nih.gov/research/coronavirus/), a curated literature hub, to track up-to-date scientific information in PubMed. LitCovid is updated daily with newly identified relevant articles organized into curated categories. To support manual curation, advanced machine-learning and deep-learning algorithms have been developed, evaluated and integrated into the curation workflow. To the best of our knowledge, LitCovid is the first-of-its-kind COVID-19-specific literature resource, with all of its collected articles and curated data freely available. Since its release, LitCovid has been widely used, with millions of accesses by users worldwide for various information needs, such as evidence synthesis, drug discovery and text and data mining, among others.",LitCovid,0.996545017,NA,0,LitCovid,0.996545017,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +21707958,http://mbio-serv2.mbioekol.lu.se/Littorina,"The Littorina sequence database (LSD)--an online resource for genomic data. We present an interactive, searchable expressed sequence tag database for the periwinkle snail Littorina saxatilis, an upcoming model species in evolutionary biology. The database is the result of a hybrid assembly between Sanger and 454 sequences, 1290 and 147,491 sequences respectively. Normalized and non-normalized cDNA was obtained from different ecotypes of L. saxatilis collected in the UK and Sweden. The Littorina sequence database (LSD) contains 26,537 different contigs, of which 2453 showed similarity with annotated proteins in UniProt. Querying the LSD permits the selection of the taxonomic origin of blast hits for each contig, and the search can be restricted to particular taxonomic groups. The database allows access to UniProt annotations, blast output, protein family domains (PFAM) and Gene Ontology. The database will allow users to search for genetic markers and identifying candidate genes or genes for expression analyses. It is open for additional deposition of sequence information for L. saxatilis and other species of the genus Littorina. The LSD is available at http://mbio-serv2.mbioekol.lu.se/Littorina/.",LSD,0.87695243,Littorina sequence database,0.878103534,Littorina sequence database,0.878103534,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/28/2011 +33877974,http://live.ece.utexas.edu/research/LIVE_NFLX_II/live_nflx_plus.html,"Towards Perceptually Optimized Adaptive Video Streaming-A Realistic Quality of Experience Database. Measuring Quality of Experience (QoE) and integrating these measurements into video streaming algorithms is a multi-faceted problem that fundamentally requires the design of comprehensive subjective QoE databases and objective QoE prediction models. To achieve this goal, we have recently designed the LIVE-NFLX-II database, a highly-realistic database which contains subjective QoE responses to various design dimensions, such as bitrate adaptation algorithms, network conditions and video content. Our database builds on recent advancements in content-adaptive encoding and incorporates actual network traces to capture realistic network variations on the client device. The new database focuses on low bandwidth conditions which are more challenging for bitrate adaptation algorithms, which often must navigate tradeoffs between rebuffering and video quality. Using our database, we study the effects of multiple streaming dimensions on user experience and evaluate video quality and quality of experience models and analyze their strengths and weaknesses. We believe that the tools introduced here will help inspire further progress on the development of perceptually-optimized client adaptation and video streaming strategies. The database is publicly available at http://live.ece.utexas.edu/research/LIVE_NFLX_II/live_nflx_plus.html.",LIVE-NFLX-II,0.79076913,NA,0,LIVE-NFLX-II,0.79076913,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/25/2021 +23601370,http://liveratlas.hupo.org.cn,"LiverAtlas: a unique integrated knowledge database for systems-level research of liver and hepatic disease. Background A large amount of liver-related physiological and pathological data exist in publicly available biological and bibliographic databases, which are usually far from comprehensive or integrated. Data collection, integration and mining processes pose a great challenge to scientific researchers and clinicians interested in the liver. Method To address these problems, we constructed LiverAtlas (http://liveratlas.hupo.org.cn), a comprehensive resource of biomedical knowledge related to the liver and various hepatic diseases by incorporating 53 databases. Results In the present version, LiverAtlas covers data on liver-related genomics, transcriptomics, proteomics, metabolomics and hepatic diseases. Additionally, LiverAtlas provides a wealth of manually curated information, relevant literature citations and cross-references to other databases. Importantly, an expert-confirmed Human Liver Disease Ontology, including relevant information for 227 types of hepatic disease, has been constructed and is used to annotate LiverAtlas data. Furthermore, we have demonstrated two examples of applying LiverAtlas data to identify candidate markers for hepatocellular carcinoma (HCC) at the systems level and to develop a systems biology-based classifier by combining the differential gene expression with topological features of human protein interaction networks to enhance the ability of HCC differential diagnosis. Conclusion LiverAtlas is the most comprehensive liver and hepatic disease resource, which helps biologists and clinicians to analyse their data at the systems level and will contribute much to the biomarker discovery and diagnostic performance enhancement for liver diseases.",LiverAtlas,0.997491121,NA,0,LiverAtlas,0.997491121,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/21/2013 +22369201,http://liverome.kobic.re.kr,"Liverome: a curated database of liver cancer-related gene signatures with self-contained context information. Background Hepatocellular carcinoma (HCC) is the fifth most common cancer worldwide. A number of molecular profiling studies have investigated the changes in gene and protein expression that are associated with various clinicopathological characteristics of HCC and generated a wealth of scattered information, usually in the form of gene signature tables. A database of the published HCC gene signatures would be useful to liver cancer researchers seeking to retrieve existing differential expression information on a candidate gene and to make comparisons between signatures for prioritization of common genes. A challenge in constructing such database is that a direct import of the signatures as appeared in articles would lead to a loss or ambiguity of their context information that is essential for a correct biological interpretation of a gene's expression change. This challenge arises because designation of compared sample groups is most often abbreviated, ad hoc, or even missing from published signature tables. Without manual curation, the context information becomes lost, leading to uninformative database contents. Although several databases of gene signatures are available, none of them contains informative form of signatures nor shows comprehensive coverage on liver cancer. Thus we constructed Liverome, a curated database of liver cancer-related gene signatures with self-contained context information. Description Liverome's data coverage is more than three times larger than any other signature database, consisting of 143 signatures taken from 98 HCC studies, mostly microarray and proteome, and involving 6,927 genes. The signatures were post-processed into an informative and uniform representation and annotated with an itemized summary so that all context information is unambiguously self-contained within the database. The signatures were further informatively named and meaningfully organized according to ten functional categories for guided browsing. Its web interface enables a straightforward retrieval of known differential expression information on a query gene and a comparison of signatures to prioritize common genes. The utility of Liverome-collected data is shown by case studies in which useful biological insights on HCC are produced. Conclusion Liverome database provides a comprehensive collection of well-curated HCC gene signatures and straightforward interfaces for gene search and signature comparison as well. Liverome is available at http://liverome.kobic.re.kr.",Liverome,0.81696564,NA,0,Liverome,0.81696564,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/30/2011 +29029599,http://liverwiki.hupo.org.cn,"LiverWiki: a wiki-based database for human liver. Background Recent advances in omics technology have produced a large amount of liver-related data. A comprehensive and up-to-date source of liver-related data is needed to allow biologists to access the latest data. However, current liver-related data sources each cover only a specific part of the liver. It is difficult for them to keep pace with the rapid increase of liver-related data available at those data resources. Integrating diverse liver-related data is a critical yet formidable challenge, as it requires sustained human effort. Results We present LiverWiki, a first wiki-based database that integrates liver-related genes, homolog genes, gene expressions in microarray datasets and RNA-Seq datasets, proteins, protein interactions, post-translational modifications, associated pathways, diseases, metabolites identified in the metabolomics datasets, and literatures into an easily accessible and searchable resource for community-driven sharing. LiverWiki houses information in a total of 141,897 content pages, including 19,787 liver-related gene pages, 17,077 homolog gene pages, 50,251 liver-related protein pages, 36,122 gene expression pages, 2067 metabolites identified in the metabolomics datasets, 16,366 disease-related molecules, and 227 liver disease pages. Other than assisting users in searching, browsing, reviewing, refining the contents on LiverWiki, the most important contribution of LiverWiki is to allow the community to create and update biological data of liver in visible and editable tables. This integrates newly produced data with existing knowledge. Implemented in mediawiki, LiverWiki provides powerful extensions to support community contributions. Conclusions The main goal of LiverWiki is to provide the research community with comprehensive liver-related data, as well as to allow the research community to share their liver-related data flexibly and efficiently. It also enables rapid sharing new discoveries by allowing the discoveries to be integrated and shared immediately, rather than relying on expert curators. The database is available online at http://liverwiki.hupo.org.cn /.",LiverWiki,0.995711923,NA,0,LiverWiki,0.995711923,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/13/2017 +23452239,http://ljgea.noble.org,"Establishment of the Lotus japonicus Gene Expression Atlas (LjGEA) and its use to explore legume seed maturation. Lotus japonicus is a model species for legume genomics. To accelerate legume functional genomics, we developed a Lotus japonicus Gene Expression Atlas (LjGEA), which provides a global view of gene expression in all organ systems of this species, including roots, nodules, stems, petioles, leaves, flowers, pods and seeds. Time-series data covering multiple stages of developing pod and seed are included in the LjGEA. In addition, previously published L. japonicus Affymetrix data are included in the database, making it a 'one-stop shop' for transcriptome analysis of this species. The LjGEA web server (http://ljgea.noble.org/) enables flexible, multi-faceted analyses of the transcriptome. Transcript data may be accessed using the Affymetrix probe identification number, DNA sequence, gene name, functional description in natural language, and GO and KEGG annotation terms. Genes may be discovered through co-expression or differential expression analysis. Users may select a subset of experiments and visualize and compare expression profiles of multiple genes simultaneously. Data may be downloaded in a tabular form compatible with common analytical and visualization software. To illustrate the power of LjGEA, we explored the transcriptome of developing seeds. Genes represented by 36 474 probe sets were expressed at some stage during seed development, and almost half of these genes displayed differential expression during development. Among the latter were 624 transcription factor genes, some of which are orthologs of transcription factor genes that are known to regulate seed development in other species, while most are novel and represent attractive targets for reverse genetics approaches to determine their roles in this important organ.",LjGEA,0.995089054,Lotus japonicus Gene Expression Atlas,0.94991678,LjGEA,0.995089054,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/4/2013 +34726633,http://www.cryst.ehu.es,"Layer groups: Brillouin-zone and crystallographic databases on the Bilbao Crystallographic Server. The section of the Bilbao Crystallographic Server (https://www.cryst.ehu.es/) dedicated to subperiodic groups contains crystallographic and Brillouin-zone databases for the layer groups. The crystallographic databases include the generators/general positions (GENPOS), Wyckoff positions (WYCKPOS) and maximal subgroups (MAXSUB). The Brillouin-zone database (LKVEC) offers k-vector tables and Brillouin-zone figures of all 80 layer groups which form the background of the classification of their irreducible representations. The symmetry properties of the wavevectors are described applying the so-called reciprocal-space-group approach and this classification scheme is compared with that of Litvin & Wike [(1991), Character Tables and Compatibility Relations of the Eighty Layer Groups and Seventeen Plane Groups. New York: Plenum Press]. The specification of independent parameter ranges of k vectors in the representation domains of the Brillouin zones provides a solution to the problems of uniqueness and completeness of layer-group representations. The Brillouin-zone figures and k-vector tables are described in detail and illustrated by several examples.",LKVEC,0.784514523,Bilbao Crystallographic Server,0.710330635,LKVEC,0.784514523,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,9/24/2021 +31906602,"http://bio-comp.ucas.ac.cn/llpsdb, http://bio-comp.org.cn/llpsdb","LLPSDB: a database of proteins undergoing liquid-liquid phase separation in vitro. Liquid-liquid phase separation (LLPS) leads to a conversion of homogeneous solution into a dense phase that often resembles liquid droplets, and a dilute phase. An increasing number of investigations have shown that biomolecular condensates formed by LLPS play important roles in both physiology and pathology. It has been suggested the phase behavior of proteins would be not only determined by sequences, but controlled by micro-environmental conditions. Here, we introduce LLPSDB (http://bio-comp.ucas.ac.cn/llpsdb or http://bio-comp.org.cn/llpsdb), a web-accessible database providing comprehensive, carefully curated collection of proteins involved in LLPS as well as corresponding experimental conditions in vitro from published literatures. The current release of LLPSDB incorporates 1182 entries with 273 independent proteins and 2394 specific conditions. The database provides a variety of data including biomolecular information (protein sequence, protein modification, nucleic acid, etc.), specific phase separation information (experimental conditions, phase behavior description, etc.) and comprehensive annotations. To our knowledge, LLPSDB is the first available database designed for LLPS related proteins specifically. It offers plenty of valuable resources for exploring the relationship between protein sequence and phase behavior, and will enhance the development of phase separation prediction methods, which may further provide more insights into a comprehensive understanding of LLPS in cellular function and related diseases.",LLPSDB,0.997212887,NA,0,LLPSDB,0.997212887,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24106090,http://www.lm.lncc.br,"Laminin-database v.2.0: an update on laminins in health and neuromuscular disorders. The laminin (LM)-database, hosted at http://www.lm.lncc.br, was published in the NAR database 2011 edition. It was the first database that provided comprehensive information concerning a non-collagenous family of extracellular matrix proteins, the LMs. In its first version, this database contained a large amount of information concerning LMs related to health and disease, with particular emphasis on the haemopoietic system. Users can easily access several tabs for LMs and LM-related molecules, as well as LM nomenclatures and direct links to PubMed. The LM-database version 2.0 integrates data from several publications to achieve a more comprehensive knowledge of LMs in health and disease. The novel features include the addition of two new tabs, 'Neuromuscular Disorders' and 'miRNA--LM Relationship'. More specifically, in this updated version, an expanding set of data has been displayed concerning the role of LMs in neuromuscular and neurodegenerative diseases, as well as the putative involvement of microRNAs. Given the importance of LMs in several biological processes, such as cell adhesion, proliferation, differentiation, migration and cell death, this upgraded version expands for users a panoply of information, regarding complex molecular circuitries that involve LMs in health and disease, including neuromuscular and neurodegenerative disorders.",LM)-database,0.866516441,Laminin,0.567927003,LM)-database,0.866516441,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/7/2013 +25776024,http://bicresources.jcbose,"LMPID: a manually curated database of linear motifs mediating protein-protein interactions. . Linear motifs (LMs), used by a subset of all protein-protein interactions (PPIs), bind to globular receptors or domains and play an important role in signaling networks. LMPID (Linear Motif mediated Protein Interaction Database) is a manually curated database which provides comprehensive experimentally validated information about the LMs mediating PPIs from all organisms on a single platform. About 2200 entries have been compiled by detailed manual curation of PubMed abstracts, of which about 1000 LM entries were being annotated for the first time, as compared with the Eukaryotic LM resource. The users can submit their query through a user-friendly search page and browse the data in the alphabetical order of the bait gene names and according to the domains interacting with the LM. LMPID is freely accessible at http://bicresources.jcbose. ac.in/ssaha4/lmpid and contains 1750 unique LM instances found within 1181 baits interacting with 552 prey proteins. In summary, LMPID is an attempt to enrich the existing repertoire of resources available for studying the LMs implicated in PPIs and may help in understanding the patterns of LMs binding to a specific domain and develop prediction model to identify novel LMs specific to a domain and further able to predict inhibitors/modulators of PPI of interest.",LMPID,0.998238027,Linear Motif mediated Protein Interaction Database,0.925727314,LMPID,0.998238027,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/16/2015 +33219685,"http://www.bio-bigdata.net/lnc2cancer, http://bio-bigdata.hrbmu.edu.cn/lnc2cancer","Lnc2Cancer 3.0: an updated resource for experimentally supported lncRNA/circRNA cancer associations and web tools based on RNA-seq and scRNA-seq data. An updated Lnc2Cancer 3.0 (http://www.bio-bigdata.net/lnc2cancer or http://bio-bigdata.hrbmu.edu.cn/lnc2cancer) database, which includes comprehensive data on experimentally supported long non-coding RNAs (lncRNAs) and circular RNAs (circRNAs) associated with human cancers. In addition, web tools for analyzing lncRNA expression by high-throughput RNA sequencing (RNA-seq) and single-cell RNA-seq (scRNA-seq) are described. Lnc2Cancer 3.0 was updated with several new features, including (i) Increased cancer-associated lncRNA entries over the previous version. The current release includes 9254 lncRNA-cancer associations, with 2659 lncRNAs and 216 cancer subtypes. (ii) Newly adding 1049 experimentally supported circRNA-cancer associations, with 743 circRNAs and 70 cancer subtypes. (iii) Experimentally supported regulatory mechanisms of cancer-related lncRNAs and circRNAs, involving microRNAs, transcription factors (TF), genetic variants, methylation and enhancers were included. (iv) Appending experimentally supported biological functions of cancer-related lncRNAs and circRNAs including cell growth, apoptosis, autophagy, epithelial mesenchymal transformation (EMT), immunity and coding ability. (v) Experimentally supported clinical relevance of cancer-related lncRNAs and circRNAs in metastasis, recurrence, circulation, drug resistance, and prognosis was included. Additionally, two flexible online tools, including RNA-seq and scRNA-seq web tools, were developed to enable fast and customizable analysis and visualization of lncRNAs in cancers. Lnc2Cancer 3.0 is a valuable resource for elucidating the associations between lncRNA, circRNA and cancer.",Lnc2Cancer,0.996302497,NA,0,Lnc2Cancer,0.996302497,1,"26481356.0, 30407549.0","26481356.0, 30407549.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +"26481356, 30407549",http://www.bio-bigdata.net/lnc2cancer,"Lnc2Cancer: a manually curated database of experimentally supported lncRNAs associated with various human cancers. Lnc2Cancer (http://www.bio-bigdata.net/lnc2cancer) is a manually curated database of cancer-associated long non-coding RNAs (lncRNAs) with experimental support that aims to provide a high-quality and integrated resource for exploring lncRNA deregulation in various human cancers. LncRNAs represent a large category of functional RNA molecules that play a significant role in human cancers. A curated collection and summary of deregulated lncRNAs in cancer is essential to thoroughly understand the mechanisms and functions of lncRNAs. Here, we developed the Lnc2Cancer database, which contains 1057 manually curated associations between 531 lncRNAs and 86 human cancers. Each association includes lncRNA and cancer name, the lncRNA expression pattern, experimental techniques, a brief functional description, the original reference and additional annotation information. Lnc2Cancer provides a user-friendly interface to conveniently browse, retrieve and download data. Lnc2Cancer also offers a submission page for researchers to submit newly validated lncRNA-cancer associations. With the rapidly increasing interest in lncRNAs, Lnc2Cancer will significantly improve our understanding of lncRNA deregulation in cancer and has the potential to be a timely and valuable resource.",Lnc2Cancer,0.99007107,NA,0,Lnc2Cancer,0.99007107,2,33219685,33219685,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +29069510,http://www.bio-bigdata.com/Lnc2Meth,"Lnc2Meth: a manually curated database of regulatory relationships between long non-coding RNAs and DNA methylation associated with human disease. Lnc2Meth (http://www.bio-bigdata.com/Lnc2Meth/), an interactive resource to identify regulatory relationships between human long non-coding RNAs (lncRNAs) and DNA methylation, is not only a manually curated collection and annotation of experimentally supported lncRNAs-DNA methylation associations but also a platform that effectively integrates tools for calculating and identifying the differentially methylated lncRNAs and protein-coding genes (PCGs) in diverse human diseases. The resource provides: (i) advanced search possibilities, e.g. retrieval of the database by searching the lncRNA symbol of interest, DNA methylation patterns, regulatory mechanisms and disease types; (ii) abundant computationally calculated DNA methylation array profiles for the lncRNAs and PCGs; (iii) the prognostic values for each hit transcript calculated from the patients clinical data; (iv) a genome browser to display the DNA methylation landscape of the lncRNA transcripts for a specific type of disease; (v) tools to re-annotate probes to lncRNA loci and identify the differential methylation patterns for lncRNAs and PCGs with user-supplied external datasets; (vi) an R package (LncDM) to complete the differentially methylated lncRNAs identification and visualization with local computers. Lnc2Meth provides a timely and valuable resource that can be applied to significantly expand our understanding of the regulatory relationships between lncRNAs and DNA methylation in various human diseases.",Lnc2Meth,0.998092294,NA,0,Lnc2Meth,0.998092294,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +30476305,http://www.bio-bigdata.net/LncACTdb,"LncACTdb 2.0: an updated database of experimentally supported ceRNA interactions curated from low- and high-throughput experiments. We describe LncACTdb 2.0 (http://www.bio-bigdata.net/LncACTdb/), an updated and significantly expanded database which provides comprehensive information of competing endogenous RNAs (ceRNAs) in different species and diseases. We have updated LncACTdb 2.0 with more data and several new features, including (i) manually curating 2663 experimentally supported ceRNA interactions from >5000 published literatures; (ii) expanding the scope of the database up to 23 species and 213 diseases/phenotypes; (iii) curating more ceRNA types such as circular RNAs and pseudogenes; (iv) identifying and scoring candidate lncRNA-associated ceRNA interactions across 33 cancer types from TCGA data; (v) providing illustration of survival, network and cancer hallmark information for ceRNAs. Furthermore, several flexible online tools including LncACT-Get, LncACT-Function, LncACT-Survival, LncACT-Network and LncACTBrowser have been developed to perform customized analysis, functional analysis, survival analysis, network illustration and genomic visualization. LncACTdb 2.0 also provides newly designed, user-friendly web interfaces to search, browse and download all the data. The BLAST interface is convenient for users to query dataset by inputting custom sequences. The Hot points interface provides users the most studied items by others. LncACTdb 2.0 is a continually updated database and will serve as an important resource to explore ceRNAs in physiological and pathological processes.",LncACTdb,0.993309855,NA,0,LncACTdb,0.993309855,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +26787663,http://lncanet.bioinfo-minzhao.org/Contact,"lnCaNet: pan-cancer co-expression network for human lncRNA and cancer genes. Unlabelled Thousands of human long non-coding RNAs (lncRNAs) have been identified in cancers and played important roles in a wide range of tumorigenesis. However, the functions of vast majority of human lncRNAs are still elusive. Emerging studies revealed that the expression level of majority lncRNAs shows discordant expression pattern with their protein-coding gene neighbors in various model organisms. Therefore, it may be useful to infer lncRNAs' potential biological function in cancer development by more comprehensive functional views of co-expressed cancer genes beyond mere physical proximity of genes. To this aim, we performed thorough searches and analyses of the interactions between lncRNA and non-neighboring cancer genes and provide a comprehensive co-expression data resource, LnCaNet. In current version, LnCaNet contains the pre-computed 8 494 907 significant co-expression pairs of 9641 lncRNAs and 2544 well-classified cancer genes in 2922 matched TCGA samples. In detail, we integrated 10 cancer gene lists from public database and calculate the co-expression with all the lncRNAs in 11 TCGA cancer types separately. Based on the resulted 110 co-expression networks, we identified 17 common regulatory pairs related to extracellular space shared in 11 cancers. We expect LnCaNet will enable researcher to explore lncRNA expression pattern, their affected cancer genes and pathways, biological significance in the context of specific cancer types and other useful annotation related to particular kind of lncRNA-cancer gene interaction. Availability and implementation http://lncanet.bioinfo-minzhao.org/Contact: m.zhao@uq.edu.au Supplementary information Supplementary data are available at Bioinformatics online.",LnCaNet,0.995760918,NA,0,LnCaNet,0.995760918,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/18/2016 +32820322,http://lncrna2as.cd120.com,"LncAS2Cancer: a comprehensive database for alternative splicing of lncRNAs across human cancers. . Accumulating studies demonstrated that the roles of lncRNAs for tumorigenesis were isoform-dependent and their aberrant splicing patterns in cancers contributed to function specificity. However, there is no existing database focusing on cancer-related alternative splicing of lncRNAs. Here, we developed a comprehensive database called LncAS2Cancer, which collected 5335 bulk RNA sequencing and 1826 single-cell RNA sequencing samples, covering over 30 cancer types. By applying six state-of-the-art splicing algorithms, 50 859 alternative splicing events for 8 splicing types were identified and deposited in the database. In addition, the database contained the following information: (i) splicing patterns of lncRNAs under seven different conditions, such as gene interference, which facilitated to infer potential regulators; (ii) annotation information derived from eight sources and manual curation, to understand the functional impact of affected sequences; (iii) survival analysis to explore potential biomarkers; as well as (iv) a suite of tools to browse, search, visualize and download interesting information. LncAS2Cancer could not only confirm the known cancer-associated lncRNA isoforms but also indicate novel ones. Using the data deposited in LncAS2Cancer, we compared gene model and transcript overlap between lncRNAs and protein-coding genes and discusses how these factors, along with sequencing depth, affected the interpretation of splicing signals. Based on recurrent signals and potential confounders, we proposed a reliable score to prioritize splicing events for further elucidation. Together, with the broad collection of lncRNA splicing patterns and annotation, LncAS2Cancer will provide important new insights into the diverse functional roles of lncRNA isoforms in human cancers. LncAS2Cancer is freely available at https://lncrna2as.cd120.com/.",LncAS2Cancer,0.992546678,NA,0,LncAS2Cancer,0.992546678,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +26944085,http://biocc.hrbmu.edu.cn/LNCat,"A comprehensive overview of lncRNA annotation resources. Long noncoding RNAs (lncRNAs) are emerging as a class of important regulators participating in various biological functions and disease processes. With the widespread application of next-generation sequencing technologies, large numbers of lncRNAs have been identified, producing plenty of lncRNA annotation resources in different contexts. However, at present, we lack a comprehensive overview of these lncRNA annotation resources. In this study, we reviewed 24 currently available lncRNA annotation resources referring to > 205 000 lncRNAs in over 50 tissues and cell lines. We characterized these annotation resources from different aspects, including exon structure, expression, histone modification and function. We found many distinct properties among these annotation resources. Especially, these resources showed diverse chromatin signatures, remarkable tissue and cell type dependence and functional specificity. Our results suggested the incompleteness and complementarity of current lncRNA annotations and the necessity of integration of multiple resources to comprehensively characterize lncRNAs. Finally, we developed 'LNCat' (lncRNA atlas, freely available at http://biocc.hrbmu.edu.cn/LNCat/), a user-friendly database that provides a genome browser of lncRNA structures, visualization of different resources from multiple angles and download of different combinations of lncRNA annotations, and supports rapid exploration, comparison and integration of lncRNA annotation resources. Overall, our study provides a comprehensive comparison of numerous lncRNA annotations, and can facilitate understanding of lncRNAs in human disease.",LNCat,0.995651901,lncRNA,0.774182886,LNCat,0.995651901,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2017 +30329098,http://bigd.big.ac.cn/lncbook,"LncBook: a curated knowledgebase of human long non-coding RNAs. Long non-coding RNAs (lncRNAs) have significant functions in a wide range of important biological processes. Although the number of known human lncRNAs has dramatically increased, they are poorly annotated, posing great challenges for better understanding their functional significance and elucidating their complex functioning molecular mechanisms. Here, we present LncBook (http://bigd.big.ac.cn/lncbook), a curated knowledgebase of human lncRNAs that features a comprehensive collection of human lncRNAs and systematic curation of lncRNAs by multi-omics data integration, functional annotation and disease association. In the present version, LncBook houses a large number of 270 044 lncRNAs and includes 1867 featured lncRNAs with 3762 lncRNA-function associations. It also integrates an abundance of multi-omics data from expression, methylation, genome variation and lncRNA-miRNA interaction. Also, LncBook incorporates 3772 experimentally validated lncRNA-disease associations and further identifies a total of 97 998 lncRNAs that are putatively disease-associated. Collectively, LncBook is dedicated to the integration and curation of human lncRNAs as well as their associated data and thus bears great promise to serve as a valuable knowledgebase for worldwide research communities.",LncBook,0.997623146,NA,0,LncBook,0.997623146,1,NA,31524988,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2019 +31524988,"http://lncrna.big.ac.cn/index.php/Main_Page, http://bigd.big.ac.cn/lncbook","Community Curation and Expert Curation of Human Long Noncoding RNAs with LncRNAWiki and LncBook. In recent years, the number of human long noncoding RNAs (lncRNAs) that have been identified has increased exponentially. However, these lncRNAs are poorly annotated compared to protein-coding genes, posing great challenges for a better understanding of their functional significance and elucidating their complex functioning molecular mechanisms. Here we employ both community and expert curation to yield a comprehensive collection of human lncRNAs and their annotations. Specifically, LncRNAWiki (http://lncrna.big.ac.cn/index.php/Main_Page) uses a wiki-based community curation model, thus showing great promise in dealing with the flood of biological knowledge, while LncBook (http://bigd.big.ac.cn/lncbook) is an expert curation-based database that provides a complement to LncRNAWiki. LncBook features a comprehensive collection of human lncRNAs and a systematic curation of lncRNAs by multi-omics data integration, functional annotation, and disease association. These protocols provide step-by-step instructions on how to browse and search a specific lncRNA and how to obtain a range of related information including expression, methylation, variation, function, and disease association. © 2019 by John Wiley & Sons, Inc.",LncBook,0.996044576,NA,0,LncBook,0.996044576,1,NA,30329098,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,9/1/2019 +29961817,http://lnccerbase.it1004.com,"LncCeRBase: a database of experimentally validated human competing endogenous long non-coding RNAs. . Long non-coding RNAs (lncRNAs) are endogenous molecules longer than 200 nucleotides, and lack coding potential. LncRNAs that interact with microRNAs (miRNAs) are known as a competing endogenous RNAs (ceRNAs) and have the ability to regulate the expression of target genes. The ceRNAs play an important role in the initiation and progression of various cancers. However, until now, there is no a database including a collection of experimentally verified, human ceRNAs. We developed the LncCeRBase database, which encompasses 432 lncRNA-miRNA-mRNA interactions, including 130 lncRNAs, 214 miRNAs and 245 genes from 300 publications. In addition, we compiled the signaling pathways associated with the included lncRNA-miRNA-mRNA interactions as a tool to explore their functions. LncCeRBase is useful for understanding the regulatory mechanisms of lncRNA.Database URL: http://lnccerbase.it1004.com.",LncCeRBase,0.998007238,NA,0,LncCeRBase,0.998007238,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +33219686,"http://www.bio-bigdata.net/LnCeCell/, http://bio-bigdata.hrbmu.edu.cn/LnCeCell","LnCeCell: a comprehensive database of predicted lncRNA-associated ceRNA networks at single-cell resolution. Within the tumour microenvironment, cells exhibit different behaviours driven by fine-tuning of gene regulation. Identification of cellular-specific gene regulatory networks will deepen the understanding of disease pathology at single-cell resolution and contribute to the development of precision medicine. Here, we describe a database, LnCeCell (http://www.bio-bigdata.net/LnCeCell/ or http://bio-bigdata.hrbmu.edu.cn/LnCeCell/), which aims to document cellular-specific long non-coding RNA (lncRNA)-associated competing endogenous RNA (ceRNA) networks for personalised characterisation of diseases based on the 'One Cell, One World' theory. LnCeCell is curated with cellular-specific ceRNA regulations from >94 000 cells across 25 types of cancers and provides >9000 experimentally supported lncRNA biomarkers, associated with tumour metastasis, recurrence, prognosis, circulation, drug resistance, etc. For each cell, LnCeCell illustrates a global map of ceRNA sub-cellular locations, which have been manually curated from the literature and related data sources, and portrays a functional state atlas for a single cancer cell. LnCeCell also provides several flexible tools to infer ceRNA functions based on a specific cellular background. LnCeCell serves as an important resource for investigating the gene regulatory networks within a single cell and can help researchers understand the regulatory mechanisms underlying complex microbial ecosystems and individual phenotypes.",LnCeCell,0.997025967,NA,0,LnCeCell,0.997025967,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +24926662,http://gyanxet-beta.com/lncedb,"lnCeDB: database of human long noncoding RNA acting as competing endogenous RNA. Unlabelled Long noncoding RNA (lncRNA) influences post-transcriptional regulation by interfering with the microRNA (miRNA) pathways, acting as competing endogenous RNA (ceRNA). These lncRNAs have miRNA responsive elements (MRE) in them, and control endogenous miRNAs available for binding with their target mRNAs, thus reducing the repression of these mRNAs. lnCeDB provides a database of human lncRNAs (from GENCODE 19 version) that can potentially act as ceRNAs. The putative mRNA targets of human miRNAs and the targets mapped to AGO clipped regions are collected from TargetScan and StarBase respectively. The lncRNA targets of human miRNAs (up to GENCODE 11) are downloaded from miRCode database. miRNA targets on the rest of the GENCODE 19 lncRNAs are predicted by our algorithm for finding seed-matched target sites. These putative miRNA-lncRNA interactions are mapped to the Ago interacting regions within lncRNAs. To find out the likelihood of an lncRNA-mRNA pair for actually being ceRNA we take recourse to two methods. First, a ceRNA score is calculated from the ratio of the number of shared MREs between the pair with the total number of MREs of the individual candidate gene. Second, the P-value for each ceRNA pair is determined by hypergeometric test using the number of shared miRNAs between the ceRNA pair against the number of miRNAs interacting with the individual RNAs. Typically, in a pair of RNAs being targeted by common miRNA(s), there should be a correlation of expression so that the increase in level of one ceRNA results in the increased level of the other ceRNA. Near-equimolar concentration of the competing RNAs is associated with more profound ceRNA effect. In lnCeDB one can not only browse for lncRNA-mRNA pairs having common targeting miRNAs, but also compare the expression of the pair in 22 human tissues to estimate the chances of the pair for actually being ceRNAs. Availability Downloadable freely from http://gyanxet-beta.com/lncedb/.",lnCeDB,0.99820447,NA,0,lnCeDB,0.99820447,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/13/2014 +27651464,http://bioinfo.life.hust.edu.cn/LNCediting,"LNCediting: a database for functional effects of RNA editing in lncRNAs. RNA editing is a widespread post-transcriptional mechanism that can make a single base change on specific nucleotide sequence in an RNA transcript. RNA editing events can result in missense codon changes and modulation of alternative splicing in mRNA, and modification of regulatory RNAs and their binding sites in noncoding RNAs. Recent computational studies accurately detected more than 2 million A-to-I RNA editing sites from next-generation sequencing (NGS). However, the vast majority of these RNA editing sites have unknown functions and are in noncoding regions of the genome. To provide a useful resource for the functional effects of RNA editing in long noncoding RNAs (lncRNAs), we systematically analyzed the A-to-I editing sites in lncRNAs across human, rhesus, mouse, and fly, and observed an appreciable number of RNA editing sites which can significantly impact the secondary structures of lncRNAs and lncRNA-miRNA interactions. All the data were compiled into LNCediting, a user-friendly database (http://bioinfo.life.hust.edu.cn/LNCediting/). LNCediting provides customized tools to predict functional effects of novel editing sites in lncRNAs. We hope that it will become an important resource for exploring functions of RNA editing sites in lncRNAs.",LNCediting,0.996848762,NA,0,LNCediting,0.996848762,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/19/2016 +25308527,http://bioinfo.hrbmu.edu.cn/lncefdb,"A computational frame and resource for understanding the lncRNA-environmental factor associations and prediction of environmental factors implicated in diseases. The complex traits of an organism are associated with a complex interplay between genetic factors (GFs) and environmental factors (EFs). However, compared with protein-coding genes and microRNAs, there is a paucity of computational methods and bioinformatic resource platform for understanding the associations between lncRNA and EF. In this study, we developed a novel computational method to identify potential associations between lncRNA and EF, and released LncEnvironmentDB, a user-friendly web-based database aiming to provide a comprehensive resource platform for lncRNA and EF. Topological analysis of EF-related networks revealed the small world, scale-free and modularity structure. We also found that lncRNA and EF significantly enriched interacting miRNAs are functionally more related by analyzing their related diseases, implying that the predicted lncRNA signature of EF can reflect the functional characteristics to some degree. Finally, we developed a random walk with a restart-based computational model (RWREFD) to predict potential disease-related EFs by integrating lncRNA-EF associations and EF-disease associations. The performance of RWREFD was evaluated by experimentally verified EF-disease associations based on leave-one-out cross-validation and achieved an AUC value of 0.71, which is higher than randomization test, indicating that the RWREFD method has a reliable and high accuracy of prediction. To the best of our knowledge, LncEnvironmentDB is the first attempt to predict and house the experimental and predicted associations between lncRNA and EF. LncEnvironmentDB is freely available on the web at http://bioinfo.hrbmu.edu.cn/lncefdb/.",LncEnvironmentDB,0.989758611,NA,0,LncEnvironmentDB,0.989758611,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2014 +31617563,http://www.bio-bigdata.net/LnCeVar,"LnCeVar: a comprehensive database of genomic variations that disturb ceRNA network regulation. LnCeVar (http://www.bio-bigdata.net/LnCeVar/) is a comprehensive database that aims to provide genomic variations that disturb lncRNA-associated competing endogenous RNA (ceRNA) network regulation curated from the published literature and high-throughput data sets. LnCeVar curated 119 501 variation-ceRNA events from thousands of samples and cell lines, including: (i) more than 2000 experimentally supported circulating, drug-resistant and prognosis-related lncRNA biomarkers; (ii) 11 418 somatic mutation-ceRNA events from TCGA and COSMIC; (iii) 112 674 CNV-ceRNA events from TCGA; (iv) 67 066 SNP-ceRNA events from the 1000 Genomes Project. LnCeVar provides a user-friendly searching and browsing interface. In addition, as an important supplement of the database, several flexible tools have been developed to aid retrieval and analysis of the data. The LnCeVar-BLAST interface is a convenient way for users to search ceRNAs by interesting sequences. LnCeVar-Function is a tool for performing functional enrichment analysis. LnCeVar-Hallmark identifies dysregulated cancer hallmarks of variation-ceRNA events. LnCeVar-Survival performs COX regression analyses and produces survival curves for variation-ceRNA events. LnCeVar-Network identifies and creates a visualization of dysregulated variation-ceRNA networks. Collectively, LnCeVar will serve as an important resource for investigating the functions and mechanisms of personalized genomic variations that disturb ceRNA network regulation in human diseases.",LnCeVar,0.998445213,NA,0,LnCeVar,0.998445213,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +33045751,http://bigd.big.ac.cn/lncexpdb,"LncExpDB: an expression database of human long non-coding RNAs. Expression profiles of long non-coding RNAs (lncRNAs) across diverse biological conditions provide significant insights into their biological functions, interacting targets as well as transcriptional reliability. However, there lacks a comprehensive resource that systematically characterizes the expression landscape of human lncRNAs by integrating their expression profiles across a wide range of biological conditions. Here, we present LncExpDB (https://bigd.big.ac.cn/lncexpdb), an expression database of human lncRNAs that is devoted to providing comprehensive expression profiles of lncRNA genes, exploring their expression features and capacities, identifying featured genes with potentially important functions, and building interactions with protein-coding genes across various biological contexts/conditions. Based on comprehensive integration and stringent curation, LncExpDB currently houses expression profiles of 101 293 high-quality human lncRNA genes derived from 1977 samples of 337 biological conditions across nine biological contexts. Consequently, LncExpDB estimates lncRNA genes' expression reliability and capacities, identifies 25 191 featured genes, and further obtains 28 443 865 lncRNA-mRNA interactions. Moreover, user-friendly web interfaces enable interactive visualization of expression profiles across various conditions and easy exploration of featured lncRNAs and their interacting partners in specific contexts. Collectively, LncExpDB features comprehensive integration and curation of lncRNA expression profiles and thus will serve as a fundamental resource for functional studies on human lncRNAs.",LncExpDB,0.998109341,NA,0,LncExpDB,0.998109341,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +34464437,http://lncexplore.bmi.nycu.edu.tw,"lncExplore: a database of pan-cancer analysis and systematic functional annotation for lncRNAs from RNA-sequencing data. . Over the past few years, with the rapid growth of deep-sequencing technology and the development of computational prediction algorithms, a large number of long non-coding RNAs (lncRNAs) have been identified in various types of human cancers. Therefore, it has become critical to determine how to properly annotate the potential function of lncRNAs from RNA-sequencing (RNA-seq) data and arrange the robust information and analysis into a useful system readily accessible by biological and clinical researchers. In order to produce a collective interpretation of lncRNA functions, it is necessary to integrate different types of data regarding the important functional diversity and regulatory role of these lncRNAs. In this study, we utilized transcriptomic sequencing data to systematically observe and identify lncRNAs and their potential functions from 5034 The Cancer Genome Atlas RNA-seq datasets covering 24 cancers. Then, we constructed the 'lncExplore' database that was developed to comprehensively integrate various types of genomic annotation data for collective interpretation. The distinctive features in our lncExplore database include (i) novel lncRNAs verified by both coding potential and translation efficiency score, (ii) pan-cancer analysis for studying the significantly aberrant expression across 24 human cancers, (iii) genomic annotation of lncRNAs, such as cis-regulatory information and gene ontology, (iv) observation of the regulatory roles as enhancer RNAs and competing endogenous RNAs and (v) the findings of the potential lncRNA biomarkers for the user-interested cancers by integrating clinical information and disease specificity score. The lncExplore database is to our knowledge the first public lncRNA annotation database providing cancer-specific lncRNA expression profiles for not only known but also novel lncRNAs, enhancer RNAs annotation and clinical analysis based on pan-cancer analysis. lncExplore provides a more complete pathway to highly efficient, novel and more comprehensive translation of laboratory discoveries into the clinical context and will assist in reinterpreting the biological regulatory function of lncRNAs in cancer research. Database URL http://lncexplore.bmi.nycu.edu.tw.",lncExplore,0.996928811,NA,0,lncExplore,0.996928811,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2021 +29788225,http://biocc.hrbmu.edu.cn/LnChrom,"LnChrom: a resource of experimentally validated lncRNA-chromatin interactions in human and mouse. . Long non-coding RNAs (lncRNAs) constitute an important layer of chromatin regulation that contributes to various biological processes and diseases. By interacting with chromatin, many lncRNAs can regulate that state of chromatin by recruiting chromatin-modifying complexes and thus control large-scale gene expression programs. However, the available information on interactions between lncRNAs and chromatin is hidden in a large amount of dispersed literature and has not been extensively collected. We established the LnChrom database, a manually curated resource of experimentally validated lncRNA-chromatin interactions. The current release of LnChrom includes 382 743 interactions in human and mouse. We also manually collected detailed metadata for each interaction pair, including those of chromatin modifying factors, epigenetic marks and disease associations. LnChrom provides a user-friendly interface to facilitate browsing, searching and retrieving of lncRNA-chromatin interaction data. Additionally, a large amount of multi-omics data was integrated into LnChrom to aid in characterizing the effects of lncRNA-chromatin interactions on epigenetic modifications and transcriptional expression. We believe that LnChrom is a timely and valuable resource that can greatly motivate mechanistic research into lncRNAs.Database URL: http://biocc.hrbmu.edu.cn/LnChrom/.",LnChrom,0.996899307,NA,0,LnChrom,0.996899307,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +30371849,http://lncipedia.org,"LNCipedia 5: towards a reference set of human long non-coding RNAs. While long non-coding RNA (lncRNA) research in the past has primarily focused on the discovery of novel genes, today it has shifted towards functional annotation of this large class of genes. With thousands of lncRNA studies published every year, the current challenge lies in keeping track of which lncRNAs are functionally described. This is further complicated by the fact that lncRNA nomenclature is not straightforward and lncRNA annotation is scattered across different resources with their own quality metrics and definition of a lncRNA. To overcome this issue, large scale curation and annotation is needed. Here, we present the fifth release of the human lncRNA database LNCipedia (https://lncipedia.org). The most notable improvements include manual literature curation of 2482 lncRNA articles and the use of official gene symbols when available. In addition, an improved filtering pipeline results in a higher quality reference lncRNA gene set.",LNCipedia,0.997884691,NA,0,LNCipedia,0.997884691,1,NA,"23042674.0, 25378313.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +"23042674, 25378313",http://www.lncipedia.org,"LNCipedia: a database for annotated human lncRNA transcript sequences and structures. Here, we present LNCipedia (http://www.lncipedia.org), a novel database for human long non-coding RNA (lncRNA) transcripts and genes. LncRNAs constitute a large and diverse class of non-coding RNA genes. Although several lncRNAs have been functionally annotated, the majority remains to be characterized. Different high-throughput methods to identify new lncRNAs (including RNA sequencing and annotation of chromatin-state maps) have been applied in various studies resulting in multiple unrelated lncRNA data sets. LNCipedia offers 21 488 annotated human lncRNA transcripts obtained from different sources. In addition to basic transcript information and gene structure, several statistics are determined for each entry in the database, such as secondary structure information, protein coding potential and microRNA binding sites. Our analyses suggest that, much like microRNAs, many lncRNAs have a significant secondary structure, in-line with their presumed association with proteins or protein complexes. Available literature on specific lncRNAs is linked, and users or authors can submit articles through a web interface. Protein coding potential is assessed by two different prediction algorithms: Coding Potential Calculator and HMMER. In addition, a novel strategy has been integrated for detecting potentially coding lncRNAs by automatically re-analysing the large body of publicly available mass spectrometry data in the PRIDE database. LNCipedia is publicly available and allows users to query and download lncRNA sequences and structures based on different search criteria. The database may serve as a resource to initiate small- and large-scale lncRNA studies. As an example, the LNCipedia content was used to develop a custom microarray for expression profiling of all available lncRNAs.",LNCipedia,0.997421205,NA,0,LNCipedia,0.997421205,2,NA,30371849,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/5/2014 +28751672,http://www.bio-bigdata.com/LNCmap,"The LncRNA Connectivity Map: Using LncRNA Signatures to Connect Small Molecules, LncRNAs, and Diseases. Well characterized the connections among diseases, long non-coding RNAs (lncRNAs) and drugs are important for elucidating the key roles of lncRNAs in biological mechanisms in various biological states. In this study, we constructed a database called LNCmap (LncRNA Connectivity Map), available at http://www.bio-bigdata.com/LNCmap/ , to establish the correlations among diseases, physiological processes, and the action of small molecule therapeutics by attempting to describe all biological states in terms of lncRNA signatures. By reannotating the microarray data from the Connectivity Map database, the LNCmap obtained 237 lncRNA signatures of 5916 instances corresponding to 1262 small molecular drugs. We provided a user-friendly interface for the convenient browsing, retrieval and download of the database, including detailed information and the associations of drugs and corresponding affected lncRNAs. Additionally, we developed two enrichment analysis methods for users to identify candidate drugs for a particular disease by inputting the corresponding lncRNA expression profiles or an associated lncRNA list and then comparing them to the lncRNA signatures in our database. Overall, LNCmap could significantly improve our understanding of the biological roles of lncRNAs and provide a unique resource to reveal the connections among drugs, lncRNAs and diseases.",LNCmap,0.997964621,LncRNA Connectivity Map,0.948274367,LNCmap,0.997964621,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/27/2017 +32766766,http://lncR2metasta.wchoda.com,"LncR2metasta: a manually curated database for experimentally supported lncRNAs during various cancer metastatic events. . Mounting evidence has shown the involvement of long non-coding RNAs (lncRNAs) during various cancer metastatic events (abbreviated as CMEs, e.g. cancer cell invasion, intravasation, extravasation, proliferation, etc.) that may cooperatively facilitate malignant tumor spread and cause massive patient deaths. The study of lncRNA-CME associations might help understand lncRNA functions in metastasis and present reliable biomarkers for early dissemination detection and optimized treatment. Therefore, we developed a database named 'lncR2metasta' by manually compiling experimentally supported lncRNAs during various CMEs from existing studies. LncR2metasta documents 1238 associations between 304 lncRNAs and 39 CMEs across 54 human cancer subtypes. Each entry of lncR2metasta contains detailed information on a lncRNA-CME association, including lncRNA symbol, a specific CME, brief description of the association, lncRNA category, lncRNA Entrez or Ensembl ID, lncRNA genomic location and strand, lncRNA experiment, lncRNA expression pattern, detection method, target gene (or pathway) of lncRNA, lncRNA regulatory role on a CME, cancer name and the literature reference. An easy-to-use web interface was deployed in lncR2metasta for its users to easily browse, search and download as well as to submit novel lncRNA-CME associations. LncR2metasta will be a useful resource in cancer research community. It is freely available at http://lncR2metasta.wchoda.com.",lncR2metasta,0.978646653,NA,0,lncR2metasta,0.978646653,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +25233092,http://bicresources.jcbose.ac.in/zhumur/lncrbase,"LncRBase: an enriched resource for lncRNA information. Long noncoding RNAs (lncRNAs) are noncoding transcripts longer than 200 nucleotides, which show evidence of pervasive transcription and participate in a plethora of cellular regulatory processes. Although several noncoding transcripts have been functionally annotated as lncRNAs within the genome, not all have been proven to fulfill the criteria for a functional regulator and further analyses have to be done in order to include them in a functional cohort. LncRNAs are being classified and reclassified in an ongoing annotation process, and the challenge is fraught with ambiguity, as newer evidences of their biogenesis and functional implication come into light. In our effort to understand the complexity of this still enigmatic biomolecule, we have developed a new database entitled ""LncRBase"" where we have classified and characterized lncRNAs in human and mouse. It is an extensive resource of human and mouse lncRNA transcripts belonging to fourteen distinct subtypes, with a total of 83,201 entries for mouse and 133,361 entries for human: among these, we have newly annotated 8,507 mouse and 14,813 human non coding RNA transcripts (from UCSC and H-InvDB 8.0) as lncRNAs. We have especially considered protein coding gene loci which act as hosts for non coding transcripts. LncRBase includes different lncRNA transcript variants of protein coding genes within LncRBase. LncRBase provides information about the genomic context of different lncRNA subtypes, their interaction with small non coding RNAs (ncRNAs) viz. piwi interacting RNAs (piRNAs) and microRNAs (miRNAs) and their mode of regulation, via association with diverse other genomic elements. Adequate knowledge about genomic origin and molecular features of lncRNAs is essential to understand their functional and behavioral complexities. Overall, LncRBase provides a thorough study on various aspects of lncRNA origin and function and a user-friendly interface to search for lncRNA information. LncRBase is available at http://bicresources.jcbose.ac.in/zhumur/lncrbase.",LncRBase,0.9981637,NA,0,LncRBase,0.9981637,1,NA,33112702,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,9/18/2014 +33112702,http://dibresources.jcbose.ac.in/zhumur/lncrbase2,"LncRBase V.2: an updated resource for multispecies lncRNAs and ClinicLSNP hosting genetic variants in lncRNAs for cancer patients. The recent discovery of long non-coding RNA as a regulatory molecule in the cellular system has altered the concept of the functional aptitude of the genome. Since our publication of the first version of LncRBase in 2014, there has been an enormous increase in the number of annotated lncRNAs of multiple species other than Human and Mouse. LncRBase V.2 hosts information of 549,648 lncRNAs corresponding to six additional species besides Human and Mouse, viz. Rat, Fruitfly, Zebrafish, Chicken, Cow and C.elegans. It provides additional distinct features such as (i) Transcription Factor Binding Site (TFBS) in the lncRNA promoter region, (ii) sub-cellular localization pattern of lncRNAs (iii) lnc-pri-miRNAs (iv) Possible small open reading frames (sORFs) within lncRNA. (v) Manually curated information of interacting target molecules and disease association of lncRNA genes (vi) Distribution of lncRNAs across multiple tissues of all species. Moreover, we have hosted ClinicLSNP within LncRBase V.2. ClinicLSNP has a comprehensive catalogue of lncRNA variants present within breast, ovarian, and cervical cancer inferred from 561 RNA-Seq data corresponding to these cancers. Further, we have checked whether these lncRNA variants overlap with (i)Repeat elements,(ii)CGI, (iii)TFBS within lncRNA loci (iv)SNP localization in trait-associated Linkage Disequilibrium(LD) region, (v)predicted the potentially pathogenic variants and (vi)effect of SNP on lncRNA secondary structure. Overall, LncRBaseV.2 is a user-friendly database to survey, search and retrieve information about multi-species lncRNAs. Further, ClinicLSNP will serve as a useful resource for cancer specific lncRNA variants and their related information. The database is freely accessible and available at http://dibresources.jcbose.ac.in/zhumur/lncrbase2/.",LncRBase,0.992285967,NA,0,LncRBase,0.992285967,1,NA,25233092,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/28/2020 +26363021,http://bioinformatics.ustc.edu.cn/lncreg,"LncReg: a reference resource for lncRNA-associated regulatory networks. . Long non-coding RNAs (lncRNAs) are critical in the regulation of various biological processes. In recent years, plethora of lncRNAs have been identified in mammalian genomes through different approaches, and the researchers are constantly reporting the regulatory roles of these lncRNAs, which leads to complexity of literature about particular lncRNAs. Therefore, for the convenience of the researchers, we collected regulatory relationships of the lncRNAs and built a database called 'LncReg'. This database is developed by collecting 1081 validated lncRNA-associated regulatory entries, including 258 non-redundant lncRNAs and 571 non-redundant genes. With regulatory relationships information, LncReg can provide overall perspectives of regulatory networks of lncRNAs and comprehensive data for bioinformatics research, which is useful for understanding the functional roles of lncRNAs. Database URL: http://bioinformatics.ustc.edu.cn/lncreg/.",LncReg,0.997745812,NA,0,LncReg,0.997745812,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/10/2015 +25399422,http://www.lncrna2target.org,"LncRNA2Target: a database for differentially expressed genes after lncRNA knockdown or overexpression. Long non-coding RNAs (lncRNAs) have emerged as critical regulators of genes at epigenetic, transcriptional and post-transcriptional levels, yet what genes are regulated by a specific lncRNA remains to be characterized. To assess the effects of the lncRNA on gene expression, an increasing number of researchers profiled the genome-wide or individual gene expression level change after knocking down or overexpressing the lncRNA. Herein, we describe a curated database named LncRNA2Target, which stores lncRNA-to-target genes and is publicly accessible at http://www.lncrna2target.org. A gene was considered as a target of a lncRNA if it is differentially expressed after the lncRNA knockdown or overexpression. LncRNA2Target provides a web interface through which its users can search for the targets of a particular lncRNA or for the lncRNAs that target a particular gene. Both search types are performed either by browsing a provided catalog of lncRNA names or by inserting lncRNA/target gene IDs/names in a search box.",LncRNA2Target,0.983380377,NA,0,LncRNA2Target,0.983380377,1,NA,30380072,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,11/15/2014 +30380072,http://123.59.132.21/lncrna2target,"LncRNA2Target v2.0: a comprehensive database for target genes of lncRNAs in human and mouse. Long non-coding RNAs (lncRNAs) play crucial roles in regulating gene expression, and a growing number of researchers have focused on the identification of target genes of lncRNAs. However, no online repository is available to collect the information on target genes regulated by lncRNAs. To make it convenient for researchers to know what genes are regulated by a lncRNA of interest, we developed a database named lncRNA2Target to provide a comprehensive resource of lncRNA target genes in 2015. To update the database this year, we retrieved all new lncRNA-target relationships from papers published from 1 August 2014 to 30 April 2018 and RNA-seq datasets before and after knockdown or overexpression of a specific lncRNA. LncRNA2Target database v2.0 provides a web interface through which its users can search for the targets of a particular lncRNA or for the lncRNAs that target a particular gene, and is freely accessible at http://123.59.132.21/lncrna2target.",LncRNA2Target,0.942110872,NA,0,LncRNA2Target,0.942110872,1,NA,25399422,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2019 +25332394,http://lncrnadb.org,"lncRNAdb v2.0: expanding the reference database for functional long noncoding RNAs. Despite the prevalence of long noncoding RNA (lncRNA) genes in eukaryotic genomes, only a small proportion have been examined for biological function. lncRNAdb, available at http://lncrnadb.org, provides users with a comprehensive, manually curated reference database of 287 eukaryotic lncRNAs that have been described independently in the scientific literature. In addition to capturing a great proportion of the recent literature describing functions for individual lncRNAs, lncRNAdb now offers an improved user interface enabling greater accessibility to sequence information, expression data and the literature. The new features in lncRNAdb include the integration of Illumina Body Atlas expression profiles, nucleotide sequence information, a BLAST search tool and easy export of content via direct download or a REST API. lncRNAdb is now endorsed by RNAcentral and is in compliance with the International Nucleotide Sequence Database Collaboration.",lncRNAdb,0.996756196,NA,0,lncRNAdb,0.996756196,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/20/2014 +23175614,http://cmbi.bjmu.edu.cn/lncrnadisease,"LncRNADisease: a database for long-non-coding RNA-associated diseases. In this article, we describe a long-non-coding RNA (lncRNA) and disease association database (LncRNADisease), which is publicly accessible at http://cmbi.bjmu.edu.cn/lncrnadisease. In recent years, a large number of lncRNAs have been identified and increasing evidence shows that lncRNAs play critical roles in various biological processes. Therefore, the dysfunctions of lncRNAs are associated with a wide range of diseases. It thus becomes important to understand lncRNAs' roles in diseases and to identify candidate lncRNAs for disease diagnosis, treatment and prognosis. For this purpose, a high-quality lncRNA-disease association database would be extremely beneficial. Here, we describe the LncRNADisease database that collected and curated approximately 480 entries of experimentally supported lncRNA-disease associations, including 166 diseases. LncRNADisease also curated 478 entries of lncRNA interacting partners at various molecular levels, including protein, RNA, miRNA and DNA. Moreover, we annotated lncRNA-disease associations with genomic information, sequences, references and species. We normalized the disease name and the type of lncRNA dysfunction and provided a detailed description for each entry. Finally, we developed a bioinformatic method to predict novel lncRNA-disease associations and integrated the method and the predicted associated diseases of 1564 human lncRNAs into the database.",LncRNADisease,0.963101439,and disease association database,0.769336358,LncRNADisease,0.963101439,1,NA,"30285109.0, 31942978.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/21/2012 +"30285109, 31942978",http://www.rnanut.net/lncrnadisease,"LncRNADisease 2.0: an updated database of long non-coding RNA-associated diseases. Mounting evidence suggested that dysfunction of long non-coding RNAs (lncRNAs) is involved in a wide variety of diseases. A knowledgebase with systematic collection and curation of lncRNA-disease associations is critically important for further examining their underlying molecular mechanisms. In 2013, we presented the first release of LncRNADisease, representing a database for collection of experimental supported lncRNA-disease associations. Here, we describe an update of the database. The new developments in LncRNADisease 2.0 include (i) an over 40-fold lncRNA-disease association enhancement compared with the previous version; (ii) providing the transcriptional regulatory relationships among lncRNA, mRNA and miRNA; (iii) providing a confidence score for each lncRNA-disease association; (iv) integrating experimentally supported circular RNA disease associations. LncRNADisease 2.0 documents more than 200 000 lncRNA-disease associations. We expect that this database will continue to serve as a valuable source for potential clinical application related to lncRNAs. LncRNADisease 2.0 is freely available at http://www.rnanut.net/lncrnadisease/.",LncRNADisease,0.815076832,non-coding RNA disease database,0.798822011,LncRNADisease,0.815076832,2,NA,23175614,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/1/2020 +24525374,http://lncRNAMap.mbc.nctu.edu.tw,"lncRNAMap: a map of putative regulatory functions in the long non-coding transcriptome. Background Recent studies have demonstrated the importance of long non-coding RNAs (lncRNAs) in chromatin remodeling, and in transcriptional and post-transcriptional regulation. However, only a few specific lncRNAs are well understood, whereas others are completely uncharacterized. To address this, there is a need for user-friendly platform to studying the putative regulatory functions of human lncRNAs. Description lncRNAMap is an integrated and comprehensive database relating to exploration of the putative regulatory functions of human lncRNAs with two mechanisms of regulation, by encoding siRNAs and by acting as miRNA decoys. To investigate lncRNAs producing siRNAs that regulate protein-coding genes, lncRNAMap integrated small RNAs (sRNAs) that were supported by publicly available deep sequencing data from various sRNA libraries and constructed lncRNA-derived siRNA-target interactions. In addition, lncRNAMap demonstrated that lncRNAs can act as targets for miRNAs that would otherwise regulate protein-coding genes. Previously studies indicated that intergenic lncRNAs (lincRNAs) either positive or negative regulated neighboring genes, therefore, lncRNAMap surveyed neighboring genes within a 1Mb distance from the genomic location of specific lncRNAs and provided the expression profiles of lncRNA and its neighboring genes. The gene expression profiles may supply the relationship between lncRNA and its neighboring genes. Conclusions lncRNAMap is a powerful user-friendly platform for the investigation of putative regulatory functions of human lncRNAs with producing siRNAs and acting as miRNA decoy. lncRNAMap is freely available on the web at http://lncRNAMap.mbc.nctu.edu.tw/.",lncRNAMap,0.98099345,NA,0,lncRNAMap,0.98099345,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/23/2014 +30276831,http://lnc.rnanet.org,"LncRNAnet: a comprehensive Sus scrofa lncRNA database. Long noncoding RNAs (lncRNAs) constitute a large class of functional non-coding RNAs that play important roles in many biological processes. Thousands of lncRNAs have been identified in mammals. Pig is an important farm animal and biomedical model. It is essential to create a Sus scrofa lncRNA database to enable further study of the function and evolution of lncRNAs. In this study, we built a systematic S. scrofa lncRNA database named lncRNAnet that contains 53 468 S. scrofa lncRNAs with their sequence characteristics, genomic locations, conservation, overlapping SNPs and QTLs, and transcript abundance across nine tissues in pigs. We also integrated 212 922 human and mouse lncRNAs sequences into lncRNAnet. This database will provide for a systematic S. scrofa lncRNA classification and help investigators browse, search for and analyze lncRNAs as well as do blast searches among human, mouse and pig lncRNAs. Thus, lncRNAnet should improve the understanding of the biological functions of lncRNA. The database is freely accessible at http://lnc.rnanet.org/.",lncRNAnet,0.986113548,NA,0,lncRNAnet,0.986113548,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/2/2018 +29077939,http://bioinfo.life.hust.edu.cn/lncRNASNP2,"lncRNASNP2: an updated database of functional SNPs and mutations in human and mouse lncRNAs. Long non-coding RNAs (lncRNAs) are emerging as important regulators in different biological processes through various ways. Because the related data, especially mutations in cancers, increased sharply, we updated the lncRNASNP to version 2 (http://bioinfo.life.hust.edu.cn/lncRNASNP2). lncRNASNP2 provides comprehensive information of SNPs and mutations in lncRNAs, as well as their impacts on lncRNA structure and function. lncRNASNP2 contains 7260238 SNPs on 141353 human lncRNA transcripts and 3921448 SNPs on 117405 mouse lncRNA transcripts. Besides the SNP information in the first version, the following new features were developed to improve the lncRNASNP2. (i) noncoding variants from COSMIC cancer data (859534) in lncRNAs and their effects on lncRNA structure and function; (ii) TCGA cancer mutations (315234) in lncRNAs and their impacts; (iii) lncRNA expression profiling of 20 cancer types in both tumor and its adjacent samples; (iv) expanded lncRNA-associated diseases; (v) optimized the results about lncRNAs structure change induced by variants; (vi) reduced false positives in miRNA and lncRNA interaction results. Furthermore, we developed online tools for users to analyze new variants in lncRNA. We aim to maintain the lncRNASNP as a useful resource for lncRNAs and their variants.",lncRNASNP,0.996916771,NA,0,lncRNASNP,0.996916771,1,NA,25332392,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +25332392,http://bioinfo.life.hust.edu.cn/lncRNASNP,"lncRNASNP: a database of SNPs in lncRNAs and their potential functions in human and mouse. Long non-coding RNAs (lncRNAs) play key roles in various cellular contexts and diseases by diverse mechanisms. With the rapid growth of identified lncRNAs and disease-associated single nucleotide polymorphisms (SNPs), there is a great demand to study SNPs in lncRNAs. Aiming to provide a useful resource about lncRNA SNPs, we systematically identified SNPs in lncRNAs and analyzed their potential impacts on lncRNA structure and function. In total, we identified 495,729 and 777,095 SNPs in more than 30,000 lncRNA transcripts in human and mouse, respectively. A large number of SNPs were predicted with the potential to impact on the miRNA-lncRNA interaction. The experimental evidence and conservation of miRNA-lncRNA interaction, as well as miRNA expressions from TCGA were also integrated to prioritize the miRNA-lncRNA interactions and SNPs on the binding sites. Furthermore, by mapping SNPs to GWAS results, we found that 142 human lncRNA SNPs are GWAS tagSNPs and 197,827 lncRNA SNPs are in the GWAS linkage disequilibrium regions. All these data for human and mouse lncRNAs were imported into lncRNASNP database (http://bioinfo.life.hust.edu.cn/lncRNASNP/), which includes two sub-databases lncRNASNP-human and lncRNASNP-mouse. The lncRNASNP database has a user-friendly interface for searching and browsing through the SNP, lncRNA and miRNA sections.",lncRNASNP,0.996575296,NA,0,lncRNASNP,0.996575296,1,NA,29077939,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/20/2014 +24813212,http://lncrnator.ewha.ac.kr,"lncRNAtor: a comprehensive resource for functional investigation of long non-coding RNAs. Motivation A number of long non-coding RNAs (lncRNAs) have been identified by deep sequencing methods, but their molecular and cellular functions are known only for a limited number of lncRNAs. Current databases on lncRNAs are mostly for cataloging purpose without providing in-depth information required to infer functions. A comprehensive resource on lncRNA function is an immediate need. Results We present a database for functional investigation of lncRNAs that encompasses annotation, sequence analysis, gene expression, protein binding and phylogenetic conservation. We have compiled lncRNAs for six species (human, mouse, zebrafish, fruit fly, worm and yeast) from ENSEMBL, HGNC, MGI and lncRNAdb. Each lncRNA was analyzed for coding potential and phylogenetic conservation in different lineages. Gene expression data of 208 RNA-Seq studies (4995 samples), collected from GEO, ENCODE, modENCODE and TCGA databases, were used to provide expression profiles in various tissues, diseases and developmental stages. Importantly, we analyzed RNA-Seq data to identify coexpressed mRNAs that would provide ample insights on lncRNA functions. The resulting gene list can be subject to enrichment analysis such as Gene Ontology or KEGG pathways. Furthermore, we compiled protein-lncRNA interactions by collecting and analyzing publicly available CLIP-seq or PAR-CLIP sequencing data. Finally, we explored evolutionarily conserved lncRNAs with correlated expression between human and six other organisms to identify functional lncRNAs. The whole contents are provided in a user-friendly web interface. Availability and implementation lncRNAtor is available at http://lncrnator.ewha.ac.kr/. Supplementary information Supplementary data are available at Bioinformatics online.",lncRNAtor,0.842248619,NA,0,lncRNAtor,0.842248619,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/9/2014 +23846593,http://genome.igib.res.in/lncRNome,"lncRNome: a comprehensive knowledgebase of human long noncoding RNAs. The advent of high-throughput genome scale technologies has enabled us to unravel a large amount of the previously unknown transcriptionally active regions of the genome. Recent genome-wide studies have provided annotations of a large repertoire of various classes of noncoding transcripts. Long noncoding RNAs (lncRNAs) form a major proportion of these novel annotated noncoding transcripts, and presently known to be involved in a number of functionally distinct biological processes. Over 18,000 transcripts are presently annotated as lncRNA, and encompass previously annotated classes of noncoding transcripts including large intergenic noncoding RNA, antisense RNA and processed pseudogenes. There is a significant gap in the resources providing a stable annotation, cross-referencing and biologically relevant information. lncRNome has been envisioned with the aim of filling this gap by integrating annotations on a wide variety of biologically significant information into a comprehensive knowledgebase. To the best of our knowledge, lncRNome is one of the largest and most comprehensive resources for lncRNAs. Database URL: http://genome.igib.res.in/lncRNome.",lncRNome,0.995862365,NA,0,lncRNome,0.995862365,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/11/2013 +33045741,http://bio.liclab.net/LncSEA/index.php,"LncSEA: a platform for long non-coding RNA related sets and enrichment analysis. Long non-coding RNAs (lncRNAs) have been proven to play important roles in transcriptional processes and various biological functions. Establishing a comprehensive collection of human lncRNA sets is urgent work at present. Using reference lncRNA sets, enrichment analyses will be useful for analyzing lncRNA lists of interest submitted by users. Therefore, we developed a human lncRNA sets database, called LncSEA, which aimed to document a large number of available resources for human lncRNA sets and provide annotation and enrichment analyses for lncRNAs. LncSEA supports >40 000 lncRNA reference sets across 18 categories and 66 sub-categories, and covers over 50 000 lncRNAs. We not only collected lncRNA sets based on downstream regulatory data sources, but also identified a large number of lncRNA sets regulated by upstream transcription factors (TFs) and DNA regulatory elements by integrating TF ChIP-seq, DNase-seq, ATAC-seq and H3K27ac ChIP-seq data. Importantly, LncSEA provides annotation and enrichment analyses of lncRNA sets associated with upstream regulators and downstream targets. In summary, LncSEA is a powerful platform that provides a variety of types of lncRNA sets for users, and supports lncRNA annotations and enrichment analyses. The LncSEA database is freely accessible at http://bio.liclab.net/LncSEA/index.php.",LncSEA,0.996705592,NA,0,LncSEA,0.996705592,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +32193291,http://bio-bigdata.hrbmu.edu.cn/LncSpA,"LncSpA: LncRNA Spatial Atlas of Expression across Normal and Cancer Tissues. Long noncoding RNAs (lncRNA) play important roles in maintaining morphology and function of tissues, and their regulatory effectiveness is closely associated with spatial expression. To provide a comprehensive spatial atlas of expression for lncRNA, we propose LncSpA (http://bio-bigdata.hrbmu.edu.cn/LncSpA) to explore tissue-elevated (TE) lncRNA across human normal and adult and pediatric cancer tissues. In total, 71,131 and 12,007 TE lncRNAs and 634 clinical-related TE lncRNAs were identified across 38 normal and 33 adult cancer tissues. Moreover, 4,688 TE and 413 clinical-related lncRNAs were identified in pediatric cancer. By quick searching or query options, users can obtain eight major types of detailed information for lncRNA via various visualization techniques, including qualitative and quantitative spatial expression in different resources, coexpressed mRNAs, predicted function, known disease association, and the potential to serve as diagnostic or prognostic markers. LncSpA will be a valuable resource to understand lncRNA functions across tissues and cancers, leading to enhanced therapeutic strategies in precision oncology. SIGNIFICANCE: LncSpA is a new interactive resource that provides the spatial expression pattern of lncRNA across thousands of normal and cancer samples representing major tissue types.",LncSpA,0.997582436,NA,0,LncSpA,0.997582436,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/19/2020 +31713618,"http://biocc.hrbmu.edu.cn/LncTarD/, http://bio-bigdata.hrbmu.edu.cn/LncTarD","LncTarD: a manually-curated database of experimentally-supported functional lncRNA-target regulations in human diseases. Long non-coding RNAs (lncRNAs) are associated with human diseases. Although lncRNA-disease associations have received significant attention, no online repository is available to collect lncRNA-mediated regulatory mechanisms, key downstream targets, and important biological functions driven by disease-related lncRNAs in human diseases. We thus developed LncTarD (http://biocc.hrbmu.edu.cn/LncTarD/ or http://bio-bigdata.hrbmu.edu.cn/LncTarD), a manually-curated database that provides a comprehensive resource of key lncRNA-target regulations, lncRNA-influenced functions, and lncRNA-mediated regulatory mechanisms in human diseases. LncTarD offers (i) 2822 key lncRNA-target regulations involving 475 lncRNAs and 1039 targets associated with 177 human diseases; (ii) 1613 experimentally-supported functional regulations and 1209 expression associations in human diseases; (iii) important biological functions driven by disease-related lncRNAs in human diseases; (iv) lncRNA-target regulations responsible for drug resistance or sensitivity in human diseases and (v) lncRNA microarray, lncRNA sequence data and transcriptome data of an 11 373 pan-cancer patient cohort from TCGA to help characterize the functional dynamics of these lncRNA-target regulations. LncTarD also provides a user-friendly interface to conveniently browse, search, and download data. LncTarD will be a useful resource platform for the further understanding of functions and molecular mechanisms of lncRNA deregulation in human disease, which will help to identify novel and sensitive biomarkers and therapeutic targets.",LncTarD,0.998159111,NA,0,LncTarD,0.998159111,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +27605101,http://bioinfo.ibp.ac.cn/LncVar,"LncVar: a database of genetic variation associated with long non-coding genes. Motivation Long non-coding RNAs (lncRNAs) are essential in many molecular pathways, and are frequently associated with disease but the mechanisms of most lncRNAs have not yet been characterized. Genetic variations, including single nucleotide polymorphisms (SNPs) and structural variations, are widely distributed in the genome, including lncRNA gene regions. As the number of studies on lncRNAs grows rapidly, it is necessary to evaluate the effects of genetic variations on lncRNAs. Results Here, we present LncVar, a database of genetic variation associated with long non-coding genes in six species. We collected lncRNAs from the NONCODE database, and evaluated their conservation. We systematically integrated transcription factor binding sites and m6A modification sites of lncRNAs and provided comprehensive effects of SNPs on transcription and modification of lncRNAs. We collected putatively translated open reading frames (ORFs) in lncRNAs, and identified both synonymous and non-synonymous SNPs in ORFs. We also collected expression quantitative trait loci of lncRNAs from the literature. Furthermore, we identified lncRNAs in CNV regions as prognostic biomarker candidates of cancers and predicted lncRNA gene fusion events from RNA-seq data from cell lines. The LncVar database can be used as a resource to evaluate the effects of the variations on the biological function of lncRNAs. Availability and implementation LncVar is available at http://bioinfo.ibp.ac.cn/LncVar CONTACT: rschen@ibp.ac.cnSupplementary information: Supplementary materials are available at Bioinformatics online.",LncVar,0.995780408,NA,0,LncVar,0.995780408,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/6/2016 +25725059,http://genome.unmc.edu/LocSigDB,"LocSigDB: a database of protein localization signals. . LocSigDB (http://genome.unmc.edu/LocSigDB/) is a manually curated database of experimental protein localization signals for eight distinct subcellular locations; primarily in a eukaryotic cell with brief coverage of bacterial proteins. Proteins must be localized at their appropriate subcellular compartment to perform their desired function. Mislocalization of proteins to unintended locations is a causative factor for many human diseases; therefore, collection of known sorting signals will help support many important areas of biomedical research. By performing an extensive literature study, we compiled a collection of 533 experimentally determined localization signals, along with the proteins that harbor such signals. Each signal in the LocSigDB is annotated with its localization, source, PubMed references and is linked to the proteins in UniProt database along with the organism information that contain the same amino acid pattern as the given signal. From LocSigDB webserver, users can download the whole database or browse/search for data using an intuitive query interface. To date, LocSigDB is the most comprehensive compendium of protein localization signals for eight distinct subcellular locations. Database URL: http://genome.unmc.edu/LocSigDB/",LocSigDB,0.997438669,NA,0,LocSigDB,0.997438669,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/27/2015 +23998809,http://genomics.senescence.info/longevity,"LongevityMap: a database of human genetic variants associated with longevity. Understanding the genetic basis of human longevity remains a challenge but could lead to life-extending interventions and better treatments for age-related diseases. Toward this end we developed the LongevityMap (http://genomics.senescence.info/longevity/), the first database of genes, loci, and variants studied in the context of human longevity and healthy ageing. We describe here its content and interface, and discuss how it can help to unravel the genetics of human longevity.",LongevityMap,0.995389521,NA,0,LongevityMap,0.995389521,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/30/2013 +24150937,http://www.weizmann.ac.il/molgen/loqate,"LoQAtE--Localization and Quantitation ATlas of the yeast proteomE. A new tool for multiparametric dissection of single-protein behavior in response to biological perturbations in yeast. Living organisms change their proteome dramatically to sustain a stable internal milieu in fluctuating environments. To study the dynamics of proteins during stress, we measured the localization and abundance of the Saccharomyces cerevisiae proteome under various growth conditions and genetic backgrounds using the GFP collection. We created a database (DB) called 'LoQAtE' (Localizaiton and Quantitation Atlas of the yeast proteomE), available online at http://www.weizmann.ac.il/molgen/loqate/, to provide easy access to these data. Using LoQAtE DB, users can get a profile of changes for proteins of interest as well as querying advanced intersections by either abundance changes, primary localization or localization shifts over the tested conditions. Currently, the DB hosts information on 5330 yeast proteins under three external perturbations (DTT, H₂O₂ and nitrogen starvation) and two genetic mutations [in the chaperonin containing TCP1 (CCT) complex and in the proteasome]. Additional conditions will be uploaded regularly. The data demonstrate hundreds of localization and abundance changes, many of which were not detected at the level of mRNA. LoQAtE is designed to allow easy navigation for non-experts in high-content microscopy and data are available for download. These data should open up new perspectives on the significant role of proteins while combating external and internal fluctuations.",LoQAtE,0.994121492,Localizaiton and Quantitation Atlas of the yeast proteomE,0.870577331,LoQAtE,0.994121492,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2013 +28008948,http://lotus.au.dk,"Lotus Base: An integrated information portal for the model legume Lotus japonicus. Lotus japonicus is a well-characterized model legume widely used in the study of plant-microbe interactions. However, datasets from various Lotus studies are poorly integrated and lack interoperability. We recognize the need for a comprehensive repository that allows comprehensive and dynamic exploration of Lotus genomic and transcriptomic data. Equally important are user-friendly in-browser tools designed for data visualization and interpretation. Here, we present Lotus Base, which opens to the research community a large, established LORE1 insertion mutant population containing an excess of 120,000 lines, and serves the end-user tightly integrated data from Lotus, such as the reference genome, annotated proteins, and expression profiling data. We report the integration of expression data from the L. japonicus gene expression atlas project, and the development of tools to cluster and export such data, allowing users to construct, visualize, and annotate co-expression gene networks. Lotus Base takes advantage of modern advances in browser technology to deliver powerful data interpretation for biologists. Its modular construction and publicly available application programming interface enable developers to tap into the wealth of integrated Lotus data. Lotus Base is freely accessible at: https://lotus.au.dk.",Lotus Base,0.940765053,NA,0,Lotus Base,0.940765053,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/23/2016 +25819075,http://lotus-db.wbgcas.cn,"LOTUS-DB: an integrative and interactive database for Nelumbo nucifera study. Besides its important significance in plant taxonomy and phylogeny, sacred lotus (Nelumbo nucifera Gaertn.) might also hold the key to the secrets of aging, which attracts crescent attentions from researchers all over the world. The genetic or molecular studies on this species depend on its genome information. In 2013, two publications reported the sequencing of its full genome, based on which we constructed a database named as LOTUS-DB. It will provide comprehensive information on the annotation, gene function and expression for the sacred lotus. The information will facilitate users to efficiently query and browse genes, graphically visualize genome and download a variety of complex data information on genome DNA, coding sequence (CDS), transcripts or peptide sequences, promoters and markers. It will accelerate researches on gene cloning, functional identification of sacred lotus, and hence promote the studies on this species and plant genomics as well. Database URL: http://lotus-db.wbgcas.cn",LOTUS-DB,0.744012758,NA,0,LOTUS-DB,0.744012758,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/27/2015 +21520333,http://www.LOVD.nl,"LOVD v.2.0: the next generation in gene variant databases. Locus-Specific DataBases (LSDBs) store information on gene sequence variation associated with human phenotypes and are frequently used as a reference by researchers and clinicians. We developed the Leiden Open-source Variation Database (LOVD) as a platform-independent Web-based LSDB-in-a-Box package. LOVD was designed to be easy to set up and maintain and follows the Human Genome Variation Society (HGVS) recommendations. Here we describe LOVD v.2.0, which adds enhanced flexibility and functionality and has the capacity to store sequence variants in multiple genes per patient. To reduce redundancy, patient and sequence variant data are stored in separate tables. Tables are linked to generate connections between sequence variant data for each gene and every patient. The dynamic structure allows database managers to add custom columns. The database structure supports fast queries and allows storage of sequence variants from high-throughput sequence analysis, as demonstrated by the X-chromosomal Mental Retardation LOVD installation. LOVD contains measures to ensure database security from unauthorized access. Currently, the LOVD Website (http://www.LOVD.nl/) lists 71 public LOVD installations hosting 3,294 gene variant databases with 199,000 variants in 84,000 patients. To promote LSDB standardization and thereby database interoperability, we offer free server space and help to establish an LSDB on our Leiden server.",LOVD,0.994573355,Leiden Open-source Variation Database,0.861171469,LOVD,0.994573355,1,NA,21618345,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,2/22/2011 +21618345,http://genome.igib.res.in/mirlovd,miRvar: A comprehensive database for genomic variations in microRNAs. microRNAs are a recently discovered and well studied class of small noncoding functional RNAs. The regulatory role of microRNAs (miRNAs) has been well studied in a wide variety of biological processes but there have been no systematic effort to understand and analyze the genetic variations in miRNA loci and study its functional consequences. We have comprehensively curated genetic variations in miRNA loci in the human genome and established a computational pipeline to assess potential functional consequences of these variants along with methods for systematic curation and reporting of variations in these loci. The data is made available on the Leiden Open (source) Variation Database (LOVD) platform at http://genome.igib.res.in/mirlovd to provide ease of aggregation and analysis and is open for community curation efforts.,LOVD,0.961074889,ource,0.583477616,LOVD,0.961074889,1,NA,21520333,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,2/24/2011 +21720920,http://crr.ugent.be/blp,"The British Lexicon Project: lexical decision data for 28,730 monosyllabic and disyllabic English words. We present a new database of lexical decision times for English words and nonwords, for which two groups of British participants each responded to 14,365 monosyllabic and disyllabic words and the same number of nonwords for a total duration of 16 h (divided over multiple sessions). This database, called the British Lexicon Project (BLP), fills an important gap between the Dutch Lexicon Project (DLP; Keuleers, Diependaele, & Brysbaert, Frontiers in Language Sciences. Psychology, 1, 174, 2010) and the English Lexicon Project (ELP; Balota et al., 2007), because it applies the repeated measures design of the DLP to the English language. The high correlation between the BLP and ELP data indicates that a high percentage of variance in lexical decision data sets is systematic variance, rather than noise, and that the results of megastudies are rather robust with respect to the selection and presentation of the stimuli. Because of its design, the BLP makes the same analyses possible as the DLP, offering researchers with a new interesting data set of word-processing times for mixed effects analyses and mathematical modeling. The BLP data are available at http://crr.ugent.be/blp and as Electronic Supplementary Materials.",LP,0.601316571,NA,0,LP,0.601316571,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/1/2012 +24243842,http://www.bacterio.net,"LPSN--list of prokaryotic names with standing in nomenclature. The List of Prokaryotic Names with Standing in Nomenclature (LPSN; http://www.bacterio.net) is a database that lists the names of prokaryotes (Bacteria and Archaea) that have been validly published in the International Journal of Systematic and Evolutionary Microbiology directly or by inclusion in a Validation List, under the Rules of International Code of Nomenclature of Bacteria. Currently there are 15 974 taxa listed. In addition, LPSN has an up-to-date classification of prokaryotes and information on prokaryotic nomenclature and culture collections.",LPSN,0.997364774,List of Prokaryotic Names with Standing in Nomenclature,0.985107541,LPSN,0.997364774,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2013 +24185698,http://www.eplantsenescence.org,"LSD 2.0: an update of the leaf senescence database. This manuscript describes an update of the leaf senescence database (LSD) previously featured in the 2011 NAR Database Issue. LSD provides comprehensive information concerning senescence-associated genes (SAGs) and their corresponding mutants. We have made extensive annotations for these SAGs through both manual and computational approaches. Recently, we updated LSD to a new version LSD 2.0 (http://www.eplantsenescence.org/), which contains 5356 genes and 322 mutants from 44 species, an extension from the previous version containing 1145 genes and 154 mutants from 21 species. In the current version, we also included several new features: (i) Primer sequences retrieved based on experimental evidence or designed for high-throughput analysis were added; (ii) More than 100 images of Arabidopsis SAG mutants were added; (iii) Arabidopsis seed information obtained from The Arabidopsis Information Resource (TAIR) was integrated; (iv) Subcellular localization information of SAGs in Arabidopsis mined from literature or generated from the SUBA3 program was presented; (v) Quantitative Trait Loci information was added with links to the original database and (vi) New options such as primer and miRNA search for database query were implemented. The updated database will be a valuable and informative resource for basic research of leaf senescence and for the manipulation of traits of agronomically important plants.",LSD,0.993650754,leaf senescence database,0.934000885,LSD,0.993650754,1,NA,"27987180.0, 31599330.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,10/31/2013 +31599330,http://bigd.big.ac.cn/lsd,"LSD 3.0: a comprehensive resource for the leaf senescence research community. The leaf senescence database (LSD) is a comprehensive resource of senescence-associated genes (SAGs) and their corresponding mutants. Through manual curation and extensive annotation, we updated the LSD to a new version LSD 3.0, which contains 5853 genes and 617 mutants from 68 species. To provide sustainable and reliable services for the plant research community, LSD 3.0 (https://bigd.big.ac.cn/lsd/) has been moved to and maintained by the National Genomics Data Center at Beijing Institute of Genomics, Chinese Academy of Sciences. In the current release, we added some new features: (i) Transcriptome data of leaf senescence in poplar were integrated; (ii) Leaf senescence-associated transcriptome data information in Arabidopsis, rice and soybean were included; (iii) Senescence-differentially expressed small RNAs (Sen-smRNA) in Arabidopsis were identified; (iv) Interaction pairs between Sen-smRNAs and senescence-associated transcription factors (Sen-TF) were established; (v) Senescence phenotypes of 90 natural accessions (ecotypes) and 42 images of ecotypes in Arabidopsis were incorporated; (vi) Mutant seed information of SAGs in rice obtained from Kitbase was integrated; (vii) New options of search engines for ecotypes and transcriptome data were implemented. Together, the updated database bears great utility to continue to provide users with useful resources for studies of leaf senescence.",LSD,0.98986119,leaf senescence database,0.935123432,LSD,0.98986119,1,NA,"24185698.0, 27987180.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/1/2020 +27987180,"http://www.eplantsenescence.org/, http://psd.cbi.pku.edu.cn","Construction of the Leaf Senescence Database and Functional Assessment of Senescence-Associated Genes. Leaf senescence is the last phase of plant development and a highly coordinated process regulated by a large number of senescence-associated genes (SAGs). By broad literature survey, we constructed a leaf senescence database (LSD) in 2011 and updated it to Version 2.0 in 2014 ( http://www.eplantsenescence.org/ and http://psd.cbi.pku.edu.cn/ ) which contains a total of 5357 genes and 324 mutants from 44 species. These SAGs were retrieved based on genetic, genomic, proteomic, physiological, or other experimental evidence and were classified into different categories according to their functions in leaf senescence or morphological phenotype of mutants. To provide comprehensive information for SAGs, we made extensive annotation by both manual and computational approaches. In addition, we predicted putative orthologues of the SAGs in other species. LSD has a user-friendly interface to allow users to make text queries or BLAST searches and to download SAGs sequences for local analysis. Functional analyses of putative SAGs reveal that WRKY75, AZF2, NAC16, and WRKY26 are positive regulators of leaf senescence, while MKP2 and CTR1 perform negative regulation to leaf senescence. This database has been served as a valuable resource for basic research on the function of SAGs and evolution of plant leaf senescence, as well as for the exploration of genetic traits in agronomically important plants.",LSD,0.988338788,leaf senescence database,0.92701484,LSD,0.988338788,1,NA,"24185698.0, 31599330.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/1/2017 +25630312,http://www.bcbl.eu/databases/lse,"LSE-Sign: A lexical database for Spanish Sign Language. The LSE-Sign database is a free online tool for selecting Spanish Sign Language stimulus materials to be used in experiments. It contains 2,400 individual signs taken from a recent standardized LSE dictionary, and a further 2,700 related nonsigns. Each entry is coded for a wide range of grammatical, phonological, and articulatory information, including handshape, location, movement, and non-manual elements. The database is accessible via a graphically based search facility which is highly flexible both in terms of the search options available and the way the results are displayed. LSE-Sign is available at the following website: http://www.bcbl.eu/databases/lse/.",LSE-Sign,0.994305208,NA,0,LSE-Sign,0.994305208,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2016 +22750101,http://www.vit.ac.in/leprosy/leprosy.htm,"LSHGD: a database for human leprosy susceptible genes. Studies aiming to explore the involvement of host genetic factors to determine susceptibility to develop disease and individual's response to the infection with Mycobacterium leprae have increased in recent years. To address this issue, we have developed a Leprosy Susceptible Human Gene Database (LSHGD) to integrate leprosy and human associated 45 genes by profound literature search. This will serve as a user-friendly and interactive platform to understand the involvement of human polymorphisms (SNPs) in leprosy, independent genetic control over both susceptibility to leprosy and its association with multi-drug resistance of M. leprae. As the first human genetic database in leprosy it aims to provide information about the associated genes, corresponding protein sequences, available three dimensional structures and polymorphism related to leprosy. In conclusion, this will serve as a multifunctional valuable tool and convenient information platform which is freely available at http://www.vit.ac.in/leprosy/leprosy.htm and enables the user to retrieve information of their interest.",LSHGD,0.994387046,Leprosy Susceptible Human Gene Database,0.994129015,LSHGD,0.994387046,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/30/2012 +23193296,http://eeb.princeton.edu/lucapedia,"LUCApedia: a database for the study of ancient life. Organisms represented by the root of the universal evolutionary tree were most likely complex cells with a sophisticated protein translation system and a DNA genome encoding hundreds of genes. The growth of bioinformatics data from taxonomically diverse organisms has made it possible to infer the likely properties of early life in greater detail. Here we present LUCApedia, (http://eeb.princeton.edu/lucapedia), a unified framework for simultaneously evaluating multiple data sets related to the Last Universal Common Ancestor (LUCA) and its predecessors. This unification is achieved by mapping eleven such data sets onto UniProt, KEGG and BioCyc IDs. LUCApedia may be used to rapidly acquire evidence that a certain gene or set of genes is ancient, to examine the early evolution of metabolic pathways, or to test specific hypotheses related to ancient life by corroborating them against the rest of the database.",LUCApedia,0.997837782,NA,0,LUCApedia,0.997837782,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +32499815,http://luluseqdb.umk.pl/basic/web/index.php,"LuluDB-The Database Created Based on Small RNA, Transcriptome, and Degradome Sequencing Shows the Wide Landscape of Non-coding and Coding RNA in Yellow Lupine (Lupinus luteus L.) Flowers and Pods. Yellow lupine (Lupinus luteus L.) belongs to a legume family that benefits from symbiosis with nitrogen-fixing bacteria. Its seeds are rich in protein, which makes it a valuable food source for animals and humans. Yellow lupine is also the model plant for basic research on nodulation or abscission of organs. Nevertheless, the knowledge about the molecular regulatory mechanisms of its generative development is still incomplete. The RNA-Seq technique is becoming more prominent in high-throughput identification and expression profiling of both coding and non-coding RNA sequences. However, the huge amount of data generated with this method may discourage other scientific groups from making full use of them. To overcome this inconvenience, we have created a database containing analysis-ready information about non-coding and coding L. luteus RNA sequences (LuluDB). LuluDB was created on the basis of RNA-Seq analysis of small RNA, transcriptome, and degradome libraries obtained from yellow lupine cv. Taper flowers, pod walls, and seeds in various stages of development, flower pedicels, and pods undergoing abscission or maintained on the plant. It contains sequences of miRNAs and phased siRNAs identified in L. luteus, information about their expression in individual samples, and their target sequences. LuluDB also contains identified lncRNAs and protein-coding RNA sequences with their organ expression and annotations to widely used databases like GO, KEGG, NCBI, Rfam, Pfam, etc. The database also provides sequence homology search by BLAST using, e.g., an unknown sequence as a query. To present the full capabilities offered by our database, we performed a case study concerning transcripts annotated as DCL 1-4 (DICER LIKE 1-4) homologs involved in small non-coding RNA biogenesis and identified miRNAs that most likely regulate DCL1 and DCL2 expression in yellow lupine. LuluDB is available at http://luluseqdb.umk.pl/basic/web/index.php.",LuluDB,0.996738493,NA,0,LuluDB,0.996738493,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/15/2020 +34936882,http://www.lungmap.net/cell-cards,"A census of the lung: CellCards from LungMAP. The human lung plays vital roles in respiration, host defense, and basic physiology. Recent technological advancements such as single-cell RNA sequencing and genetic lineage tracing have revealed novel cell types and enriched functional properties of existing cell types in lung. The time has come to take a new census. Initiated by members of the NHLBI-funded LungMAP Consortium and aided by experts in the lung biology community, we synthesized current data into a comprehensive and practical cellular census of the lung. Identities of cell types in the normal lung are captured in individual cell cards with delineation of function, markers, developmental lineages, heterogeneity, regenerative potential, disease links, and key experimental tools. This publication will serve as the starting point of a live, up-to-date guide for lung research at https://www.lungmap.net/cell-cards/. We hope that Lung CellCards will promote the community-wide effort to establish, maintain, and restore respiratory health.",Lung CellCards,0.907546788,NA,0,Lung CellCards,0.907546788,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/21/2021 +32618424,http://bmm-lab.github.io/LymphoAtlas,"LymphoAtlas: a dynamic and integrated phosphoproteomic resource of TCR signaling in primary T cells reveals ITSN2 as a regulator of effector functions. T-cell receptor (TCR) ligation-mediated protein phosphorylation regulates the activation, cellular responses, and fates of T cells. Here, we used time-resolved high-resolution phosphoproteomics to identify, quantify, and characterize the phosphorylation dynamics of thousands of phosphorylation sites in primary T cells during the first 10 min after TCR stimulation. Bioinformatic analysis of the data revealed a coherent orchestration of biological processes underlying T-cell activation. In particular, functional modules associated with cytoskeletal remodeling, transcription, translation, and metabolic processes were mobilized within seconds after TCR engagement. Among proteins whose phosphorylation was regulated by TCR stimulation, we demonstrated, using a fast-track gene inactivation approach in primary lymphocytes, that the ITSN2 adaptor protein regulated T-cell effector functions. This resource, called LymphoAtlas, represents an integrated pipeline to further decipher the organization of the signaling network encoding T-cell activation. LymphoAtlas is accessible to the community at: https://bmm-lab.github.io/LymphoAtlas.",LymphoAtlas,0.957420588,NA,0,LymphoAtlas,0.957420588,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2020 +26708986,http://www.lymphos.org,"LymPHOS 2.0: an update of a phosphosite database of primary human T cells. . LymPHOS is a web-oriented database containing peptide and protein sequences and spectrometric information on the phosphoproteome of primary human T-Lymphocytes. Current release 2.0 contains 15 566 phosphorylation sites from 8273 unique phosphopeptides and 4937 proteins, which correspond to a 45-fold increase over the original database description. It now includes quantitative data on phosphorylation changes after time-dependent treatment with activators of the TCR-mediated signal transduction pathway. Sequence data quality has also been improved with the use of multiple search engines for database searching. LymPHOS can be publicly accessed at http://www.lymphos.org. Database URL: http://www.lymphos.org.",LymPHOS,0.99777627,NA,0,LymPHOS,0.99777627,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/26/2015 +24270788,http://lynx.ci.uchicago.edu)--a,"Lynx: a database and knowledge extraction engine for integrative medicine. We have developed Lynx (http://lynx.ci.uchicago.edu)--a web-based database and a knowledge extraction engine, supporting annotation and analysis of experimental data and generation of weighted hypotheses on molecular mechanisms contributing to human phenotypes and disorders of interest. Its underlying knowledge base (LynxKB) integrates various classes of information from >35 public databases and private collections, as well as manually curated data from our group and collaborators. Lynx provides advanced search capabilities and a variety of algorithms for enrichment analysis and network-based gene prioritization to assist the user in extracting meaningful knowledge from LynxKB and experimental data, whereas its service-oriented architecture provides public access to LynxKB and its analytical tools via user-friendly web services and interfaces.",Lynx,0.996176362,NA,0,Lynx,0.996176362,1,NA,26590263,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/21/2013 +26590263,http://lynx.ci.uchicago.edu,"Lynx: a knowledge base and an analytical workbench for integrative medicine. Lynx (http://lynx.ci.uchicago.edu) is a web-based database and a knowledge extraction engine. It supports annotation and analysis of high-throughput experimental data and generation of weighted hypotheses regarding genes and molecular mechanisms contributing to human phenotypes or conditions of interest. Since the last release, the Lynx knowledge base (LynxKB) has been periodically updated with the latest versions of the existing databases and supplemented with additional information from public databases. These additions have enriched the data annotations provided by Lynx and improved the performance of Lynx analytical tools. Moreover, the Lynx analytical workbench has been supplemented with new tools for reconstruction of co-expression networks and feature-and-network-based prioritization of genetic factors and molecular mechanisms. These developments facilitate the extraction of meaningful knowledge from experimental data and LynxKB. The Service Oriented Architecture provides public access to LynxKB and its analytical tools via user-friendly web services and interfaces.",Lynx,0.980447114,NA,0,Lynx,0.980447114,1,NA,24270788,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/20/2015 +24002112,http://prodata.swmed.edu/M2S/mut2seq.cgi,"M2SG: mapping human disease-related genetic variants to protein sequences and genomic loci. Summary Online Mendelian Inheritance in Man (OMIM) is a manually curated compendium of human genetic variants and the corresponding phenotypes, mostly human diseases. Instead of directly documenting the native sequences for gene entries, OMIM links its entries to protein and DNA sequences in other databases. However, because of the existence of gene isoforms and errors in OMIM records, mapping a specific OMIM mutation to its corresponding protein sequence is not trivial. Combining computer programs and extensive manual curation of OMIM full-text descriptions and original literature, we mapped 98% of OMIM amino acid substitutions (AASs) and all SwissProt Variant (SwissVar) disease-related AASs to reference sequences and confidently mapped 99.96% of all AASs to the genomic loci. Based on the results, we developed an online database and interactive web server (M2SG) to (i) retrieve the mapped OMIM and SwissVar variants for a given protein sequence; and (ii) obtain related proteins and mutations for an input disease phenotype. This database will be useful for analyzing sequences, understanding the effect of mutations, identifying important genetic variations and designing experiments on a protein of interest. Availability and implementation The database and web server are freely available at http://prodata.swmed.edu/M2S/mut2seq.cgi.",M2SG,0.962027684,Mendelian Inheritance in Man,0.855303322,M2SG,0.962027684,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/3/2013 +33906563,http://m6add.edbc.org,"M6ADD: a comprehensive database of m6A modifications in diseases. N6-methyladenosine (m6A) modification is an important regulatory factor affecting diseases, including multiple cancers and it is a developing direction for targeted disease therapy. Here, we present the M6ADD (m6A-diseases database) database, a public data resource containing manually curated data on potential m6A-disease associations for which some experimental evidence is available; the related high-throughput sequencing data are also provided and analysed by using different computational methods. To give researchers a tool to query the m6A modification data, the M6ADD was designed as a web-based comprehensive resource focusing on the collection, storage and online analysis of m6A modifications, aimed at exploring the associations between m6A modification and gene disorders and diseases. The M6ADD includes 222 experimentally confirmed m6A-disease associations, involving 59 diseases from a review of more than 2000 published papers. The M6ADD also includes 409,229 m6A-disease associations obtained by computational and statistical methods from 30 high-throughput sequencing datasets. In addition, we provide data on 5239 potential m6A regulatory proteins related to 24 cancers based on network analysis prediction methods. In addition, we have developed a tool to explore the function of m6A-modified genes through the protein-protein interaction networks. The M6ADD can be accessed at http://m6add.edbc.org/.",M6ADD,0.994489074,m6A-diseases database,0.816149268,M6ADD,0.994489074,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/27/2021 +29036329,http://m6avar.renlab.org,"m6AVar: a database of functional variants involved in m6A modification. Identifying disease-causing variants among a large number of single nucleotide variants (SNVs) is still a major challenge. Recently, N6-methyladenosine (m6A) has become a research hotspot because of its critical roles in many fundamental biological processes and a variety of diseases. Therefore, it is important to evaluate the effect of variants on m6A modification, in order to gain a better understanding of them. Here, we report m6AVar (http://m6avar.renlab.org), a comprehensive database of m6A-associated variants that potentially influence m6A modification, which will help to interpret variants by m6A function. The m6A-associated variants were derived from three different m6A sources including miCLIP/PA-m6A-seq experiments (high confidence), MeRIP-Seq experiments (medium confidence) and transcriptome-wide predictions (low confidence). Currently, m6AVar contains 16 132 high, 71 321 medium and 326 915 low confidence level m6A-associated variants. We also integrated the RBP-binding regions, miRNA-targets and splicing sites associated with variants to help users investigate the effect of m6A-associated variants on post-transcriptional regulation. Because it integrates the data from genome-wide association studies (GWAS) and ClinVar, m6AVar is also a useful resource for investigating the relationship between the m6A-associated variants and disease. Overall, m6AVar will serve as a useful resource for annotating variants and identifying disease-causing variants.",m6AVar,0.985386446,NA,0,m6AVar,0.985386446,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +23658631,http://mabscessus.um.edu.my,"MabsBase: a Mycobacterium abscessus genome and annotation database. Summary Mycobacterium abscessus is a rapidly growing non-tuberculous mycobacterial species that has been associated with a wide spectrum of human infections. As the classification and biology of this organism is still not well understood, comparative genomic analysis on members of this species may provide further insights on their taxonomy, phylogeny, pathogenicity and other information that may contribute to better management of infections. The MabsBase described in this paper is a user-friendly database providing access to whole-genome sequences of newly discovered M. abscessus strains as well as resources for whole-genome annotations and computational predictions, to support the expanding scientific community interested in M. abscessus research. The MabsBase is freely available at http://mabscessus.um.edu.my.",MabsBase,0.996929765,NA,0,MabsBase,0.996929765,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/29/2013 +25536965,http://mace.sookmyung.ac.kr,"MACE: mutation-oriented profiling of chemical response and gene expression in cancers. Summary The mutational status of specific cancer lineages can affect the sensitivity to or resistance against cancer drugs. The MACE database provides web-based interactive tools for interpreting large chemical screening and gene expression datasets of cancer cell lines in terms of mutation and lineage categories. GI50 data of chemicals against individual NCI60 cell lines were normalized and organized to statistically identify mutation- or lineage-specific chemical responses. Similarly, DNA microarray data on NCI60 cell lines were processed to analyze mutation- or lineage-specific gene expression signatures. A combined analysis of GI50 and gene expression data to find potential associations between chemicals and genes is also a capability of this system. This database will provide extensive, systematic information to identify lineage- or mutation-specific anticancer agents and related gene targets. Availability and implementation The MACE web database is available at http://mace.sookmyung.ac.kr/. Supplementary information Supplementary data are available at Bioinformatics online. Contact yoonsj@sookmyung.ac.kr.",MACE,0.964918494,NA,0,MACE,0.964918494,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/22/2014 +22058127,http://www.ebi.ac.uk/thornton-srv/databases/MACiE,"MACiE: exploring the diversity of biochemical reactions. MACiE (which stands for Mechanism, Annotation and Classification in Enzymes) is a database of enzyme reaction mechanisms, and can be accessed from http://www.ebi.ac.uk/thornton-srv/databases/MACiE/. This article presents the release of Version 3 of MACiE, which not only extends the dataset to 335 entries, covering 182 of the EC sub-subclasses with a crystal structure available (~90%), but also incorporates greater chemical and structural detail. This version of MACiE represents a shift in emphasis for new entries, from non-homologous representatives covering EC reaction space to enzymes with mechanisms of interest to our users and collaborators with a view to exploring the chemical diversity of life. We present new tools for exploring the data in MACiE and comparing entries as well as new analyses of the data and new searches, many of which can now be accessed via dedicated Perl scripts.",MACiE,0.995728016,Annotation,0.672201276,MACiE,0.995728016,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/3/2011 +22954629,http://www.maconda.bham.ac.uk,"MaConDa: a publicly accessible mass spectrometry contaminants database. Unlabelled Mass spectrometry is widely used in bioanalysis, including the fields of metabolomics and proteomics, to simultaneously measure large numbers of molecules in complex biological samples. Contaminants routinely occur within these samples, for example, originating from the solvents or plasticware. Identification of these contaminants is crucial to enable their removal before data analysis, in particular to maintain the validity of conclusions drawn from uni- and multivariate statistical analyses. Although efforts have been made to report contaminants within mass spectra, this information is fragmented and its accessibility is relatively limited. In response to the needs of the bioanalytical community, here we report the creation of an extensive manually well-annotated database of currently known small molecule contaminants. Availability The Mass spectrometry Contaminants Database (MaConDa) is freely available and accessible through all major browsers or by using the MaConDa web service http://www.maconda.bham.ac.uk.",MaConDa,0.993165344,Mass spectrometry Contaminants Database,0.910215296,MaConDa,0.993165344,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/6/2012 +32367112,http://big.cdu.edu.cn/macsnvdb,"MACSNVdb: a high-quality SNV database for interspecies genetic divergence investigation among macaques. . Macaques are the most widely used non-human primates in biomedical research. The genetic divergence between these animal models is responsible for their phenotypic differences in response to certain diseases. However, the macaque single nucleotide polymorphism resources mainly focused on rhesus macaque (Macaca mulatta), which hinders the broad research and biomedical application of other macaques. In order to overcome these limitations, we constructed a database named MACSNVdb that focuses on the interspecies genetic diversity among macaque genomes. MACSNVdb is a web-enabled database comprising ~74.51 million high-quality non-redundant single nucleotide variants (SNVs) identified among 20 macaque individuals from six species groups (muttla, fascicularis, sinica, arctoides, silenus, sylvanus). In addition to individual SNVs, MACSNVdb also allows users to browse and retrieve groups of user-defined SNVs. In particular, users can retrieve non-synonymous SNVs that may have deleterious effects on protein structure or function within macaque orthologs of human disease and drug-target genes. Besides position, alleles and flanking sequences, MACSNVdb integrated additional genomic information including SNV annotations and gene functional annotations. MACSNVdb will facilitate biomedical researchers to discover molecular mechanisms of diverse responses to diseases as well as primatologist to perform population genetic studies. We will continue updating MACSNVdb with newly available sequencing data and annotation to keep the resource up to date. Database URL: http://big.cdu.edu.cn/macsnvdb/.",MACSNVdb,0.99785769,NA,0,MACSNVdb,0.99785769,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +31353404,http://madb.ethz.ch,"Mammalian Annotation Database for improved annotation and functional classification of Omics datasets from less well-annotated organisms. . Next-generation sequencing technologies and the availability of an increasing number of mammalian and other genomes allow gene expression studies, particularly RNA sequencing, in many non-model organisms. However, incomplete genome annotation and assignments of genes to functional annotation databases can lead to a substantial loss of information in downstream data analysis. To overcome this, we developed Mammalian Annotation Database tool (MAdb, https://madb.ethz.ch) to conveniently provide homologous gene information for selected mammalian species. The assignment between species is performed in three steps: (i) matching official gene symbols, (ii) using ortholog information contained in Ensembl Compara and (iii) pairwise BLAST comparisons of all transcripts. In addition, we developed a new tool (AnnOverlappeR) for the reliable assignment of the National Center for Biotechnology Information (NCBI) and Ensembl gene IDs. The gene lists translated to gene IDs of well-annotated species such as a human can be used for improved functional annotation with relevant tools based on Gene Ontology and molecular pathway information. We tested the MAdb on a published RNA-seq data set for the pig and showed clearly improved overrepresentation analysis results based on the assigned human homologous gene identifiers. Using the MAdb revealed a similar list of human homologous genes and functional annotation results regardless of whether starting with gene IDs from NCBI or Ensembl. The MAdb database is accessible via a web interface and a Galaxy application.",MAdb,0.965469927,Mammalian Annotation Database tool,0.926022192,MAdb,0.965469927,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +31665439,http://magen.whu.edu.cn,"MaGenDB: a functional genomics hub for Malvaceae plants. Malvaceae is a family of flowering plants containing many economically important plant species including cotton, cacao and durian. Recently, the genomes of several Malvaceae species have been decoded, and many omics data were generated for individual species. However, no integrative database of multiple species, enabling users to jointly compare and analyse relevant data, is available for Malvaceae. Thus, we developed a user-friendly database named MaGenDB (http://magen.whu.edu.cn) as a functional genomics hub for the plant community. We collected the genomes of 13 Malvaceae species, and comprehensively annotated genes from different perspectives including functional RNA/protein element, gene ontology, KEGG orthology, and gene family. We processed 374 sets of diverse omics data with the ENCODE pipelines and integrated them into a customised genome browser, and designed multiple dynamic charts to present gene/RNA/protein-level knowledge such as dynamic expression profiles and functional elements. We also implemented a smart search system for efficiently mining genes. In addition, we constructed a functional comparison system to help comparative analysis between genes on multiple features in one species or across closely related species. This database and associated tools will allow users to quickly retrieve large-scale functional information for biological discovery.",MaGenDB,0.993712246,NA,0,MaGenDB,0.993712246,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24143056,http://www.tnaugenomics.com/mango/index.php,"MAGICdb - Mango Genetic stocks Identification and Characterisation database. MAGICdb is a unique database that integrates the morphological, fruit quality and the marker data of most popular and widely cultivated commercially important mango cultivars. The main objective of MAGICdb is to provide the end users with an integrated dataset of each mango variety cultivated widely in Tamil Nadu. MAGICdb structure is categorized in to three domains namely Morphological Data Search, Fruit Quality Search and Marker Search which in further contains details on Tree Character, Bearing Habit, Season of fruiting, Number of inflorescence/Sq.m, Percentage of hermaphrodite flower(%), Fruit set percentage(%), Number of fruits/ tree, Fruit weight (g) and, Yield (Kg/ tree). This database is equipped with a user friendly interface enabling the users to retrieve the information with ease. Database is available at http://www.tnaugenomics.com/mango/index.php.",MAGICdb,0.998173594,Mango Genetic stocks,0.772508487,MAGICdb,0.998173594,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/23/2013 +28943872,http://www.mahmi.org,"In Silico Screening of the Human Gut Metaproteome Identifies Th17-Promoting Peptides Encrypted in Proteins of Commensal Bacteria. Scientific studies focused on the role of the human microbiome over human health have generated billions of gigabits of genetic information during the last decade. Nowadays integration of all this information in public databases and development of pipelines allowing us to biotechnologically exploit this information are urgently needed. Prediction of the potential bioactivity of the products encoded by the human gut microbiome, or metaproteome, is the first step for identifying proteins responsible for the molecular interaction between microorganisms and the immune system. We have recently published the Mechanism of Action of the Human Microbiome (MAHMI) database (http://www.mahmi.org), conceived as a resource compiling peptide sequences with a potential immunomodulatory activity. Fifteen out of the 300 hundred million peptides contained in the MAHMI database were synthesized. These peptides were identified as being encrypted in proteins produced by gut microbiota members, they do not contain cleavage points for the major intestinal endoproteases and displayed high probability to have immunomodulatory bioactivity. The bacterial peptides FR-16 and LR-17 encrypted in proteins from Bifidobacterium longum DJ010A and Bifidobacterium fragilis YCH46 respectively, showed the higher immune modulation capability over human peripheral blood mononuclear cells. Both peptides modulated the immune response toward increases in the Th17 and decreases in the Th1 cell response, together with an induction of IL-22 production. These results strongly suggest the combined use of bioinformatics and in vitro tools as a first stage in the screening of bioactive peptides encrypted in the human gut metaproteome.",MAHMI,0.993577361,Mechanism of Action of the Human Microbiome,0.958739316,MAHMI,0.993577361,1,NA,28077565,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,9/8/2017 +28077565,http://mahmi.org,"MAHMI database: a comprehensive MetaHit-based resource for the study of the mechanism of action of the human microbiota. . The Mechanism of Action of the Human Microbiome (MAHMI) database is a unique resource that provides comprehensive information about the sequence of potential immunomodulatory and antiproliferative peptides encrypted in the proteins produced by the human gut microbiota. Currently, MAHMI database contains over 300 hundred million peptide entries, with detailed information about peptide sequence, sources and potential bioactivity. The reference peptide data section is curated manually by domain experts. The in silico peptide data section is populated automatically through the systematic processing of publicly available exoproteomes of the human microbiome. Bioactivity prediction is based on the global alignment of the automatically processed peptides with experimentally validated immunomodulatory and antiproliferative peptides, in the reference section. MAHMI provides researchers with a comparative tool for inspecting the potential immunomodulatory or antiproliferative bioactivity of new amino acidic sequences and identifying promising peptides to be further investigated. Moreover, researchers are welcome to submit new experimental evidence on peptide bioactivity, namely, empiric and structural data, as a proactive, expert means to keep the database updated and improve the implemented bioactivity prediction method. Bioactive peptides identified by MAHMI have a huge biotechnological potential, including the manipulation of aberrant immune responses and the design of new functional ingredients/foods based on the genetic sequences of the human microbiome. Hopefully, the resources provided by MAHMI will be useful to those researching gastrointestinal disorders of autoimmune and inflammatory nature, such as Inflammatory Bowel Diseases. MAHMI database is routinely updated and is available free of charge. Database URL: http://mahmi.org/.",MAHMI,0.988386333,Mechanism of Action of the Human Microbiome,0.950187612,MAHMI,0.988386333,1,NA,28943872,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/10/2017 +"26432828, 26519406, 30407532",http://www.maizegdb.org,"MaizeGDB update: new tools, data and interface for the maize model organism database. MaizeGDB is a highly curated, community-oriented database and informatics service to researchers focused on the crop plant and model organism Zea mays ssp. mays. Although some form of the maize community database has existed over the last 25 years, there have only been two major releases. In 1991, the original maize genetics database MaizeDB was created. In 2003, the combined contents of MaizeDB and the sequence data from ZmDB were made accessible as a single resource named MaizeGDB. Over the next decade, MaizeGDB became more sequence driven while still maintaining traditional maize genetics datasets. This enabled the project to meet the continued growing and evolving needs of the maize research community, yet the interface and underlying infrastructure remained unchanged. In 2015, the MaizeGDB team completed a multi-year effort to update the MaizeGDB resource by reorganizing existing data, upgrading hardware and infrastructure, creating new tools, incorporating new data types (including diversity data, expression data, gene models, and metabolic pathways), and developing and deploying a modern interface. In addition to coordinating a data resource, the MaizeGDB team coordinates activities and provides technical support to the maize research community. MaizeGDB is accessible online at http://www.maizegdb.org.",MaizeGDB,0.995507419,and,0.607942879,MaizeGDB,0.995507419,3,NA,21961731,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,1/1/2019 +21961731,http://MaizeArrayAnnot.bi.up.ac.za,"Maize microarray annotation database. Background Microarray technology has matured over the past fifteen years into a cost-effective solution with established data analysis protocols for global gene expression profiling. The Agilent-016047 maize 44 K microarray was custom-designed from EST sequences, but only reporter sequences with EST accession numbers are publicly available. The following information is lacking: (a) reporter - gene model match, (b) number of reporters per gene model, (c) potential for cross hybridization, (d) sense/antisense orientation of reporters, (e) position of reporter on B73 genome sequence (for eQTL studies), and (f) functional annotations of genes represented by reporters. To address this, we developed a strategy to annotate the Agilent-016047 maize microarray, and built a publicly accessible annotation database. Description Genomic annotation of the 42,034 reporters on the Agilent-016047 maize microarray was based on BLASTN results of the 60-mer reporter sequences and their corresponding ESTs against the maize B73 RefGen v2 ""Working Gene Set"" (WGS) predicted transcripts and the genome sequence. The agreement between the EST, WGS transcript and gDNA BLASTN results were used to assign the reporters into six genomic annotation groups. These annotation groups were: (i) ""annotation by sense gene model"" (23,668 reporters), (ii) ""annotation by antisense gene model"" (4,330); (iii) ""annotation by gDNA"" without a WGS transcript hit (1,549); (iv) ""annotation by EST"", in which case the EST from which the reporter was designed, but not the reporter itself, has a WGS transcript hit (3,390); (v) ""ambiguous annotation"" (2,608); and (vi) ""inconclusive annotation"" (6,489). Functional annotations of reporters were obtained by BLASTX and Blast2GO analysis of corresponding WGS transcripts against GenBank.The annotations are available in the Maize Microarray Annotation Database http://MaizeArrayAnnot.bi.up.ac.za/, as well as through a GBrowse annotation file that can be uploaded to the MaizeGDB genome browser as a custom track.The database was used to re-annotate lists of differentially expressed genes reported in case studies of published work using the Agilent-016047 maize microarray. Up to 85% of reporters in each list could be annotated with confidence by a single gene model, however up to 10% of reporters had ambiguous annotations. Overall, more than 57% of reporters gave a measurable signal in tissues as diverse as anthers and leaves. Conclusions The Maize Microarray Annotation Database will assist users of the Agilent-016047 maize microarray in (i) refining gene lists for global expression analysis, and (ii) confirming the annotation of candidate genes before functional studies.",MaizeGDB,0.842145503,NA,0,MaizeGDB,0.842145503,1,NA,"26432828.0, 26519406.0, 30407532.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,10/1/2011 +33193550,http://maizemine.maizegdb.org,"MaizeMine: A Data Mining Warehouse for the Maize Genetics and Genomics Database. MaizeMine is the data mining resource of the Maize Genetics and Genome Database (MaizeGDB; http://maizemine.maizegdb.org). It enables researchers to create and export customized annotation datasets that can be merged with their own research data for use in downstream analyses. MaizeMine uses the InterMine data warehousing system to integrate genomic sequences and gene annotations from the Zea mays B73 RefGen_v3 and B73 RefGen_v4 genome assemblies, Gene Ontology annotations, single nucleotide polymorphisms, protein annotations, homologs, pathways, and precomputed gene expression levels based on RNA-seq data from the Z. mays B73 Gene Expression Atlas. MaizeMine also provides database cross references between genes of alternative gene sets from Gramene and NCBI RefSeq. MaizeMine includes several search tools, including a keyword search, built-in template queries with intuitive search menus, and a QueryBuilder tool for creating custom queries. The Genomic Regions search tool executes queries based on lists of genome coordinates, and supports both the B73 RefGen_v3 and B73 RefGen_v4 assemblies. The List tool allows you to upload identifiers to create custom lists, perform set operations such as unions and intersections, and execute template queries with lists. When used with gene identifiers, the List tool automatically provides gene set enrichment for Gene Ontology (GO) and pathways, with a choice of statistical parameters and background gene sets. With the ability to save query outputs as lists that can be input to new queries, MaizeMine provides limitless possibilities for data integration and meta-analysis.",MaizeMine,0.995152235,Maize Genetics and Genome Database,0.688180787,MaizeMine,0.995152235,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2020 +21841810,http://2d.bjmu.edu.cn,"Proteome reference map and regulation network of neonatal rat cardiomyocyte. Aim To study and establish a proteome reference map and regulation network of neonatal rat cardiomyocyte. Methods Cultured cardiomyocytes of neonatal rats were used. All proteins expressed in the cardiomyocytes were separated and identified by two-dimensional polyacrylamide gel electrophoresis (2-DE) and matrix-assisted laser desorption/ionization-time of flight mass spectrometry (MALDI-TOF MS). Biological networks and pathways of the neonatal rat cardiomyocytes were analyzed using the Ingenuity Pathway Analysis (IPA) program (www.ingenuity.com). A 2-DE database was made accessible on-line by Make2ddb package on a web server. Results More than 1000 proteins were separated on 2D gels, and 148 proteins were identified. The identified proteins were used for the construction of an extensible markup language-based database. Biological networks and pathways were constructed to analyze the functions associate with cardiomyocyte proteins in the database. The 2-DE database of rat cardiomyocyte proteins can be accessed at http://2d.bjmu.edu.cn. Conclusion A proteome reference map and regulation network of the neonatal rat cardiomyocytes have been established, which may serve as an international platform for storage, analysis and visualization of cardiomyocyte proteomic data.",Make2ddb,0.640331,NA,0,Make2ddb,0.640331,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,8/15/2011 +23584832,http://www.malacards.org,"MalaCards: an integrated compendium for diseases and their annotation. Comprehensive disease classification, integration and annotation are crucial for biomedical discovery. At present, disease compilation is incomplete, heterogeneous and often lacking systematic inquiry mechanisms. We introduce MalaCards, an integrated database of human maladies and their annotations, modeled on the architecture and strategy of the GeneCards database of human genes. MalaCards mines and merges 44 data sources to generate a computerized card for each of 16 919 human diseases. Each MalaCard contains disease-specific prioritized annotations, as well as inter-disease connections, empowered by the GeneCards relational database, its searches and GeneDecks set analyses. First, we generate a disease list from 15 ranked sources, using disease-name unification heuristics. Next, we use four schemes to populate MalaCards sections: (i) directly interrogating disease resources, to establish integrated disease names, synonyms, summaries, drugs/therapeutics, clinical features, genetic tests and anatomical context; (ii) searching GeneCards for related publications, and for associated genes with corresponding relevance scores; (iii) analyzing disease-associated gene sets in GeneDecks to yield affiliated pathways, phenotypes, compounds and GO terms, sorted by a composite relevance score and presented with GeneCards links; and (iv) searching within MalaCards itself, e.g. for additional related diseases and anatomical context. The latter forms the basis for the construction of a disease network, based on shared MalaCards annotations, embodying associations based on etiology, clinical features and clinical conditions. This broadly disposed network has a power-law degree distribution, suggesting that this might be an inherent property of such networks. Work in progress includes hierarchical malady classification, ontological mapping and disease set analyses, striving to make MalaCards an even more effective tool for biomedical research. Database URL: http://www.malacards.org/",MalaCards,0.994350731,NA,0,MalaCards,0.994350731,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/12/2013 +32766702,http://mfga.uni-muenster.de,"The Male Fertility Gene Atlas: a web tool for collecting and integrating OMICS data in the context of male infertility. Study question How can one design and implement a system that provides a comprehensive overview of research results in the field of epi-/genetics of male infertility and germ cells? Summary answer Working at the interface of literature search engines and raw data repositories, the newly developed Male Fertility Gene Atlas (MFGA) provides a system that can represent aggregated results from scientific publications in a standardized way and perform advanced searches, for example based on the conditions (phenotypes) and genes related to male infertility. What is known already PubMed and Google Scholar are established search engines for research literature. Additionally, repositories like Gene Expression Omnibus and Sequence Read Archive provide access to raw data. Selected processed data can be accessed by visualization tools like the ReproGenomics Viewer. Study design, size, duration The MFGA was developed in a time frame of 18 months under a rapid prototyping approach. Participants/materials, setting, methods In the context of the Clinical Research Unit 'Male Germ Cells' (CRU326), a group of around 50 domain experts in the fields of male infertility and germ cells helped to develop the requirements engineering and feedback loops. They provided a set of 39 representative and heterogeneous publications to establish a basis for the system requirements. Main results and the role of chance The MFGA is freely available online at https://mfga.uni-muenster.de. To date, it contains 115 data sets corresponding to 54 manually curated publications and provides an advanced search function based on study conditions, meta-information and genes, whereby it returns the publications' exact tables and figures that fit the search request as well as a list of the most frequently investigated genes in the result set. Currently, study data for 31 different tissue types, 32 different cell types and 20 conditions are available. Also, ∼8000 and ∼1000 distinct genes have been found to be mentioned in at least 10 and 15 of the publications, respectively. Large scale data Not applicable because no novel data were produced. Limitations, reasons for caution For the most part, the content of the system currently includes the selected publications from the development process. However, a structured process for the prospective literature search and inclusion into the MFGA has been defined and is currently implemented. Wider implications of the findings The technical implementation of the MFGA allows for accommodating a wide range of heterogeneous data from aggregated research results. This implementation can be transferred to other diseases to establish comparable systems and generally support research in the medical field. Study funding/competing interest(s)This work was carried out within the frame of the German Research Foundation (DFG) Clinical Research Unit 'Male Germ Cells: from Genes to Function' (CRU326). The authors declare no conflicts of interest.",MFGA,0.8441058,Male Fertility Gene Atlas,0.886184371,Male Fertility Gene Atlas,0.886184371,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2020 +27974320,"http://hpcwebapps.cit.nih.gov/ESBL/Database/MetabolicEnzymes/MetabolicEnzymeDatabase.html, http://hpcwebapps.cit.nih.gov/ESBL/Database/MetabolicEnzymes","From 20th century metabolic wall charts to 21st century systems biology: database of mammalian metabolic enzymes. The organization of the mammalian genome into gene subsets corresponding to specific functional classes has provided key tools for systems biology research. Here, we have created a web-accessible resource called the Mammalian Metabolic Enzyme Database (https://hpcwebapps.cit.nih.gov/ESBL/Database/MetabolicEnzymes/MetabolicEnzymeDatabase.html) keyed to the biochemical reactions represented on iconic metabolic pathway wall charts created in the previous century. Overall, we have mapped 1,647 genes to these pathways, representing ~7 percent of the protein-coding genome. To illustrate the use of the database, we apply it to the area of kidney physiology. In so doing, we have created an additional database (Database of Metabolic Enzymes in Kidney Tubule Segments: https://hpcwebapps.cit.nih.gov/ESBL/Database/MetabolicEnzymes/), mapping mRNA abundance measurements (mined from RNA-Seq studies) for all metabolic enzymes to each of 14 renal tubule segments. We carry out bioinformatics analysis of the enzyme expression pattern among renal tubule segments and mine various data sources to identify vasopressin-regulated metabolic enzymes in the renal collecting duct.",NA,0,Mammalian Metabolic Enzyme Database,0.845961971,Mammalian Metabolic Enzyme Database,0.845961971,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/14/2016 +26912953,http://www.iitm.ac.in/bioinfo/mmndb,"Mammalian Mitochondrial ncRNA Database. Unlabelled Mammalian Mitochondrial ncRNA is a web-based database, which provides specific information on non-coding RNA in mammals. This database includes easy searching, comparing with BLAST and retrieving information on predicted structure and its function about mammalian ncRNAs. Availability The database is available for free at http://www.iitm.ac.in/bioinfo/mmndb/.",Unlabelled Mammalian Mitochondrial ncRNA,0.79049837,Mammalian Mitochondrial ncRNA Database,0.941759574,Mammalian Mitochondrial ncRNA Database,0.941759574,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/30/2015 +30820574,http://msgp.pt,"MSGP: the first database of the protein components of the mammalian stress granules. . In response to different stress stimuli, cells transiently form stress granules (SGs) in order to protect themselves and re-establish homeostasis. Besides these important cellular functions, SGs are now being implicated in different human diseases, such as neurodegenerative disorders and cancer. SGs are ribonucleoprotein granules, constituted by a variety of different types of proteins, RNAs, factors involved in translation and signaling molecules, being capable of regulating mRNA translation to facilitate stress response. However, until now a complete list of the SG components has not been available. Therefore, we aimer at identifying and linting in an open access database all the proteins described so far as components of SGs. The identification was made through an exhaustive search of studies listed in PubMed and double checked. Moreover, for each identified protein several details were also gathered from public databases, such as the molecular function, the cell types in which they were detected, the type of stress stimuli used to induce SG formation and the reference of the study describing the recruitment of the component to SGs. Expression levels in the context of different neurodegenerative diseases were also obtained and are also described in the database. The Mammalian Stress Granules Proteome is available at https://msgp.pt/, being a new and unique open access online database, the first to list all the protein components of the SGs identified so far. The database constitutes an important and valuable tool for researchers in this research area of growing interest.",NA,0,Mammalian Stress Granules Proteome,0.95231396,Mammalian Stress Granules Proteome,0.95231396,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +31648227,http://manet.illinois.edu,"MANET 3.0: Hierarchy and modularity in evolving metabolic networks. Enzyme recruitment is a fundamental evolutionary driver of modern metabolism. We see evidence of recruitment at work in the metabolic Molecular Ancestry Networks (MANET) database, an online resource that integrates data from KEGG, SCOP and structural phylogenomic reconstruction. The database, which was introduced in 2006, traces the deep history of the structural domains of enzymes in metabolic pathways. Here we release version 3.0 of MANET, which updates data from KEGG and SCOP, links enzyme and PDB information with PDBsum, and traces evolutionary information of domains defined at fold family level of SCOP classification in metabolic subnetwork diagrams. Compared to SCOP folds used in the previous versions, fold families are cohesive units of functional similarity that are highly conserved at sequence level and offer a 10-fold increase of data entries. We surveyed enzymatic, functional and catalytic site distributions among superkingdoms showing that ancient enzymatic innovations followed a biphasic temporal pattern of diversification typical of module innovation. We grouped enzymatic activities of MANET into a hierarchical system of subnetworks and mesonetworks matching KEGG classification. The evolutionary growth of these modules of metabolic activity was studied using bipartite networks and their one-mode projections at enzyme, subnetwork and mesonetwork levels of organization. Evolving metabolic networks revealed patterns of enzyme sharing that transcended mesonetwork boundaries and supported the patchwork model of metabolic evolution. We also explored the scale-freeness, randomness and small-world properties of evolving networks as possible organizing principles of network growth and diversification. The network structure shows an increase in hierarchical modularity and scale-free behavior as metabolic networks unfold in evolutionary time. Remarkably, this evolutionary constraint on structure was stronger at lower levels of metabolic organization. Evolving metabolic structure reveals a 'principle of granularity', an evolutionary increase of the cohesiveness of lower-level parts of a hierarchical system. MANET is available at http://manet.illinois.edu.",MANET,0.982270757,metabolic Molecular Ancestry Networks,0.82753098,MANET,0.982270757,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/24/2019 +27924046,http://manteia.igbmc.fr,"New data and features for advanced data mining in Manteia. Manteia is an integrative database available online at http://manteia.igbmc.fr which provides a large array of OMICs data related to the development of the mouse, chicken, zebrafish and human. The system is designed to use different types of data together in order to perform advanced datamining, test hypotheses or provide candidate genes involved in biological processes or responsible for human diseases. In this new version of the database, Manteia has been enhanced with new expression data originating from microarray and next generation sequencing experiments. In addition, the system includes new statistics tools to analyze lists of genes in order to compare their functions and highlight their specific features. One of the main novelties of this release is the integration of a machine learning tool called Lookalike that we have developed to analyze the different datasets present in the system in order to identify new disease genes. This tool identifies the key features of known disease genes to provide and rank new candidates with similar properties from the genome. It is also designed to highlight and take into account the specificities of a disease in order to increase the accuracy of its predictions.",Manteia,0.997795522,NA,0,Manteia,0.997795522,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/24/2016 +24558125,http://mantra.tigem.it,"Mantra 2.0: an online collaborative resource for drug mode of action and repurposing by network analysis. Summary Elucidation of molecular targets of a compound [mode of action (MoA)] and its off-targets is a crucial step in drug development. We developed an online collaborative resource (MANTRA 2.0) that supports this process by exploiting similarities between drug-induced transcriptional profiles. Drugs are organized in a network of nodes (drugs) and edges (similarities) highlighting 'communities' of drugs sharing a similar MoA. A user can upload gene expression profiles before and after drug treatment in one or multiple cell types. An automated processing pipeline transforms the gene expression profiles into a unique drug 'node' embedded in the drug-network. Visual inspection of the neighbouring drugs and communities helps in revealing its MoA and to suggest new applications of known drugs (drug repurposing). MANTRA 2.0 allows storing and sharing user-generated network nodes, thus making MANTRA 2.0 a collaborative ever-growing resource. Availability and implementation The web tool is freely available for academic use at http://mantra.tigem.it.",MANTRA,0.991984367,NA,0,MANTRA,0.991984367,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/20/2014 +33903708,http://stablab.uniroma2.it/MAP,"The microRNA analysis portal is a next-generation tool for exploring and analyzing miRNA-focused data in the literature. MicroRNAs constitute a class of noncoding small RNAs involved in the posttranscriptional regulation of many biological pathways. In recent years, microRNAs have also been associated with regulation across kingdoms, demonstrating that exogenous miRNAs can function in mammals in a fashion similar to mammalian miRNAs. The growing interest in microRNAs and the increasing amount of literature and molecular and biomedical data available make it difficult to identify records of interest and keep up to date with novel findings. For these reasons, we developed the microRNA Analysis Portal (MAP). MAP selects relevant miRNA-focused articles from PubMed, links biomedical and molecular data and applies bioinformatics modules. At the time of this writing, MAP represents the richest, most complete and integrated database focused on microRNAs. MAP also integrates an updated version of MirCompare (2.0), a computational platform used for selecting plant microRNAs on the basis of their ability to regulate mammalian genes. Both MAP and MirCompare functionalities were used to predict that microRNAs from Moringa oleifera have putative roles across kingdoms by regulating human genes coding for proteins of the immune system. Starting from a selection of 94 human microRNAs, MirCompare selected 6 Moringa oleifera functional homologs. The subsequent prediction of human targets and areas of functional enrichment highlighted the central involvement of these genes in regulating immune system processes, particularly the host-virus interaction processes in hepatitis B, cytomegalovirus, papillomavirus and coronavirus. This case of use showed how MAP can help to perform complex queries without any computational background. MAP is available at http://stablab.uniroma2.it/MAP .",MAP,0.872308294,microRNA Analysis Portal,0.776325062,MAP,0.872308294,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/26/2021 +22121218,http://genome.ufl.edu/mapperdb,"The MAPPER2 Database: a multi-genome catalog of putative transcription factor binding sites. The mapper(2) Database (http://genome.ufl.edu/mapperdb) is a component of mapper(2), a web-based system for the analysis of transcription factor binding sites in multiple genomes. The database contains predicted binding sites identified in the promoters of all human, mouse and Drosophila genes using 1017 probabilistic models representing over 600 different transcription factors. In this article we outline the current contents of the database and we describe its web-based user interface in detail. We then discuss ongoing work to extend the database contents to experimental data and to add analysis capabilities. Finally, we provide information about recent improvements to the hardware and software platform that mapper(2) is based on.",mapper(2,0.977444768,NA,0,mapper(2,0.977444768,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/24/2011 +24391364,http://www.mapsdatabase.com,"MAPS Database: Medicinal plant Activities, Phytochemical and Structural Database. Unlabelled Drug development from natural sources is an important and fast developing area. Natural sources (plants) have been used to cure a range of diseases for Thousands of years. Different online medicinal plant databases provide information about classifications, activities, phytochemicals and structure of phytochemicals in different formats. These databases do not cover all aspects of medicinal plants. MAPS (Medicinal plant Activities, Phytochemicals & structural database) has been constructed with uniqueness that it combines all information in one web resource and additionally provides test targets on which particular plant found to be effective with reference to the original paper as well. MAPS database is user friendly information resource, including the data of > 500 medicinal plants. This database includes phytochemical constituents, their structure in mol format, different activities possessed by the medicinal plant with the targets reported in literature. Availability http://www.mapsdatabase.com.",MAPS,0.955033183,Medicinal plant Activities,0.944907701,MAPS,0.955033183,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/6/2013 +33685383,http://lncrnapipe.cimap.res.in,"lncRNADetector: a bioinformatics pipeline for long non-coding RNA identification and MAPslnc: a repository of medicinal and aromatic plant lncRNAs. Long non-coding RNAs (lncRNAs) are an emerging class of non-coding RNAs and potent regulatory elements in the living cells. High throughput RNA sequencing analyses have generated a tremendous amount of transcript sequence data. A large proportion of these transcript sequences does not code for proteins and are known as non-coding RNAs. Among them, lncRNAs are a unique class of transcripts longer than 200 nucleotides with diverse biological functions and regulatory mechanisms. Recent emerging studies and next-generation sequencing technologies show a substantial amount of lncRNAs within the plant genome, which are yet to be identified. The computational identification of lncRNAs from these transcripts is a challenging task due to the involvement of a series of filtering steps. We have developed lncRNADetector, a bioinformatics pipeline for the identification of novel lncRNAs, especially from medicinal and aromatic plant (MAP) species. The lncRNADetector has been utilized to analyse and identify more than 88,459 lncRNAs from 21 species of MAPs. To provide a knowledge resource for the plant research community towards elucidating the diversity of biological roles of lncRNAs, the information generated about MAP lncRNAs (post-filtering steps) through lncRNADetector has been stored and organized in MAPslnc database (MAPslnc, https://lncrnapipe.cimap.res.in). The lncRNADetector web server and MAPslnc database have been developed in order to facilitate researchers for accurate identification of lncRNAs from the next-generation sequencing data of different organisms for downstream studies. To the best of our knowledge no such MAPslnc database is available till date.",MAPslnc,0.99292849,NA,0,MAPslnc,0.99292849,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/18/2021 +29897419,http://www.mardy.net,"MARDy: Mycology Antifungal Resistance Database. Summary The increase of antifungal drug resistance is a major global human health concern and threatens agriculture and food security; in order to tackle these concerns, it is important to understand the mechanisms that cause antifungal resistance. The curated Mycology Antifungal Resistance Database (MARDy) is a web-service of antifungal drug resistance mechanisms, including amino acid substitutions, tandem repeat sequences and genome ploidy. MARDy is implemented on a Linux, Apache, MySQL and PHP web development platform and includes a local installation of BLASTn of the database of curated genes. Availability and implementation MARDy can be accessed at http://www.mardy.net and is free to use. The complete database can be retrieved, ordered by organism, gene and drug. Missing or new mycological antifungal resistance data can be relayed to the development team through a contribute entry form. Updates and news will be publicized via a dedicated Twitter feed: @MARDYfungi.",MARDy,0.99679625,Mycology Antifungal Resistance Database,0.973570075,MARDy,0.99679625,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2018 +24125644,http://marinegenomics.oist.jp,"MarinegenomicsDB: an integrated genome viewer for community-based annotation of genomes. We constructed a web-based genome annotation platform, MarinegenomicsDB, to integrate genome data from various marine organisms including the pearl oyster Pinctada fucata and the coral Acropora digitifera. This newly developed viewer application provides open access to published data and a user-friendly environment for community-based manual gene annotation. Development on a flexible framework enables easy expansion of the website on demand. To date, more than 2000 genes have been annotated using this system. In the future, the website will be expanded to host a wider variety of data, more species, and different types of genome-wide analyses. The website is available at the following URL: http://marinegenomics.oist.jp.",MarinegenomicsDB,0.994072795,NA,0,MarinegenomicsDB,0.994072795,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2013 +25428892,http://59.163.192.83/ltrdb/index.html,"Genome-wide development of transposable elements-based markers in foxtail millet and construction of an integrated database. Transposable elements (TEs) are major components of plant genome and are reported to play significant roles in functional genome diversity and phenotypic variations. Several TEs are highly polymorphic for insert location in the genome and this facilitates development of TE-based markers for various genotyping purposes. Considering this, a genome-wide analysis was performed in the model plant foxtail millet. A total of 30,706 TEs were identified and classified as DNA transposons (24,386), full-length Copia type (1,038), partial or solo Copia type (10,118), full-length Gypsy type (1,570), partial or solo Gypsy type (23,293) and Long- and Short-Interspersed Nuclear Elements (3,659 and 53, respectively). Further, 20,278 TE-based markers were developed, namely Retrotransposon-Based Insertion Polymorphisms (4,801, ∼24%), Inter-Retrotransposon Amplified Polymorphisms (3,239, ∼16%), Repeat Junction Markers (4,451, ∼22%), Repeat Junction-Junction Markers (329, ∼2%), Insertion-Site-Based Polymorphisms (7,401, ∼36%) and Retrotransposon-Microsatellite Amplified Polymorphisms (57, 0.2%). A total of 134 Repeat Junction Markers were screened in 96 accessions of Setaria italica and 3 wild Setaria accessions of which 30 showed polymorphism. Moreover, an open access database for these developed resources was constructed (Foxtail millet Transposable Elements-based Marker Database; http://59.163.192.83/ltrdb/index.html). Taken together, this study would serve as a valuable resource for large-scale genotyping applications in foxtail millet and related grass species.",NA,0,Marker Database,0.663708647,Marker Database,0.663708647,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/26/2014 +33245771,http://markerdb.ca,"MarkerDB: an online database of molecular biomarkers. MarkerDB is a freely available electronic database that attempts to consolidate information on all known clinical and a selected set of pre-clinical molecular biomarkers into a single resource. The database includes four major types of molecular biomarkers (chemical, protein, DNA [genetic] and karyotypic) and four biomarker categories (diagnostic, predictive, prognostic and exposure). MarkerDB provides information such as: biomarker names and synonyms, associated conditions or pathologies, detailed disease descriptions, detailed biomarker descriptions, biomarker specificity, sensitivity and ROC curves, standard reference values (for protein and chemical markers), variants (for SNP or genetic markers), sequence information (for genetic and protein markers), molecular structures (for protein and chemical markers), tissue or biofluid sources (for protein and chemical markers), chromosomal location and structure (for genetic and karyotype markers), clinical approval status and relevant literature references. Users can browse the data by conditions, condition categories, biomarker types, biomarker categories or search by sequence similarity through the advanced search function. Currently, the database contains 142 protein biomarkers, 1089 chemical biomarkers, 154 karyotype biomarkers and 26 374 genetic markers. These are categorized into 25 560 diagnostic biomarkers, 102 prognostic biomarkers, 265 exposure biomarkers and 6746 predictive biomarkers or biomarker panels. Collectively, these markers can be used to detect, monitor or predict 670 specific human conditions which are grouped into 27 broad condition categories. MarkerDB is available at https://markerdb.ca.",MarkerDB,0.996113002,NA,0,MarkerDB,0.996113002,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +24330312,http://marmal-aid.org,"Marmal-aid--a database for Infinium HumanMethylation450. Background DNA methylation is indispensible for normal human genome function. Currently there is an increasingly large number of DNA methylomic data being released in the public domain allowing for an opportunity to investigate the relationships between the DNA methylome, genome function, and human phenotypes. The Illumina450K is one of the most popular platforms for assessing DNA methylation with over 10,000 samples available in the public domain. However, accessing all this data requires downloading each individual experiment and due to inconsistent annotation, accessing the right data can be a challenge. Description Here we introduce 'Marmal-aid', the first standardised database for DNA methylation (freely available at http://marmal-aid.org). In Marmal-aid, the majority of publicly available Illumina HumanMethylation450 data is incorporated into a single repository allowing for re-processing of data including normalisation and imputation of missing values. The database is accessible in two ways: (1) Using an R package to allow for incorporation into existing analysis pipelines which can then be easily queried to gain insight into the functionality of certain CpG sites. This is aimed at a bioinformatician with experience in R. (2) Using a graphical interface allowing general biologists to query a pre-defined set of tissues (currently 15) providing a reference database of the methylation state in these tissues for the 450,000 CpG sites profiled by the Illumina HumanMethylation450. Conclusion Marmal-aid is the largest publicly available Illumina HumanMethylation450 methylation database combining Illumina HumanMethylation450 data from a number of sources into a single location with a single common annotation format. This allows for automated extraction using the R package and inclusion into existing analysis pipelines. Marmal-aid also provides a easy to use GUI to visualise methylation data in user defined genomic regions for various reference tissues.",Marmal-aid,0.976534307,NA,0,Marmal-aid,0.976534307,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/12/2013 +24167507,http://gator.masc-proteomics.org,"MASCP gator: an overview of the Arabidopsis proteomic aggregation portal. A key challenge in the area of bioinformatics in the coming decades is the ability to manage the wealth of information that is being generated from the variety of high throughput methodologies currently being undertaken in laboratories across the world. While these approaches have made available large volumes of data to the research community, less attention has been given to the problem of how to intuitively present the data to enable greater biological insights. Recently, an attempt was made to tackle this problem in the area of Arabidopsis proteomics. The model plant has been the target of countless proteomics surveys producing an exhaustive array of data and online repositories. The MASCP Gator is an aggregation portal for proteomic data currently being produced by the community and unites a large collection of specialized resources to a single portal (http://gator.masc-proteomics.org/). Here we describe the latest additions, upgrades and features to this resource further expanding its role into protein modifications and genome sequence variations.",MASCP Gator,0.840257004,NA,0,MASCP Gator,0.840257004,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/23/2013 +33125077,http://www.aiddlab.com/MASI,"MASI: microbiota-active substance interactions database. Xenobiotic and host active substances interact with gut microbiota to influence human health and therapeutics. Dietary, pharmaceutical, herbal and environmental substances are modified by microbiota with altered bioavailabilities, bioactivities and toxic effects. Xenobiotics also affect microbiota with health implications. Knowledge of these microbiota and active substance interactions is important for understanding microbiota-regulated functions and therapeutics. Established microbiota databases provide useful information about the microbiota-disease associations, diet and drug interventions, and microbiota modulation of drugs. However, there is insufficient information on the active substances modified by microbiota and the abundance of gut bacteria in humans. Only ∼7% drugs are covered by the established databases. To complement these databases, we developed MASI, Microbiota-Active Substance Interactions database, for providing the information about the microbiota alteration of various substances, substance alteration of microbiota, and the abundance of gut bacteria in humans. These include 1,051 pharmaceutical, 103 dietary, 119 herbal, 46 probiotic, 142 environmental substances interacting with 806 microbiota species linked to 56 diseases and 784 microbiota-disease associations. MASI covers 11 215 bacteria-pharmaceutical, 914 bacteria-herbal, 309 bacteria-dietary, 753 bacteria-environmental substance interactions and the abundance profiles of 259 bacteria species in 3465 patients and 5334 healthy individuals. MASI is freely accessible at http://www.aiddlab.com/MASI.",MASI,0.987803578,microbiota-active substance interactions database,0.924483865,MASI,0.987803578,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +22545773,http://bat.infspire.org/databases/masivedb,"MASiVEdb: the Sirevirus Plant Retrotransposon Database. Background Sireviruses are an ancient genus of the Copia superfamily of LTR retrotransposons, and the only one that has exclusively proliferated within plant genomes. Based on experimental data and phylogenetic analyses, Sireviruses have successfully infiltrated many branches of the plant kingdom, extensively colonizing the genomes of grass species. Notably, it was recently shown that they have been a major force in the make-up and evolution of the maize genome, where they currently occupy ~21% of the nuclear content and ~90% of the Copia population. It is highly likely, therefore, that their life dynamics have been fundamental in the genome composition and organization of a plethora of plant hosts. To assist studies into their impact on plant genome evolution and also facilitate accurate identification and annotation of transposable elements in sequencing projects, we developed MASiVEdb (Mapping and Analysis of SireVirus Elements Database), a collective and systematic resource of Sireviruses in plants. Description Taking advantage of the increasing availability of plant genomic sequences, and using an updated version of MASiVE, an algorithm specifically designed to identify Sireviruses based on their highly conserved genome structure, we populated MASiVEdb (http://bat.infspire.org/databases/masivedb/) with data on 16,243 intact Sireviruses (total length >158Mb) discovered in 11 fully-sequenced plant genomes. MASiVEdb is unlike any other transposable element database, providing a multitude of highly curated and detailed information on a specific genus across its hosts, such as complete set of coordinates, insertion age, and an analytical breakdown of the structure and gene complement of each element. All data are readily available through basic and advanced query interfaces, batch retrieval, and downloadable files. A purpose-built system is also offered for detecting and visualizing similarity between user sequences and Sireviruses, as well as for coding domain discovery and phylogenetic analysis. Conclusion MASiVEdb is currently the most comprehensive directory of Sireviruses, and as such complements other efforts in cataloguing plant transposable elements and elucidating their role in host genome evolution. Such insights will gradually deepen, as we plan to further improve MASiVEdb by phylogenetically mapping Sireviruses into families, by including data on fragments and solo LTRs, and by incorporating elements from newly-released genomes.",MASiVEdb,0.994499087,Mapping and Analysis of SireVirus Elements Database,0.726713588,MASiVEdb,0.994499087,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/30/2012 +34177338,"http://webs2.kazusa.or.jp/massbase/, http://webs2.kazusa.or.jp/km2","MassBase: A large-scaled depository of mass spectrometry datasets for metabolome analysis. Depository of low-molecular-weight compounds or metabolites detected in various organisms in a non-targeted manner is indispensable for metabolomics research. Due to the diverse chemical compounds, various mass spectrometry (MS) setups with state-of-the-art technologies have been used. Over the past two decades, we have analyzed various biological samples by using gas chromatography-mass spectrometry, liquid chromatography-mass spectrometry, or capillary electrophoresis-mass spectrometry, and archived the datasets in the depository MassBase (http://webs2.kazusa.or.jp/massbase/). As the format of MS datasets depends on the MS setup used, we converted each raw binary dataset of the mass chromatogram to text file format, and thereafter, information of the chromatograph peak was extracted in the text file from the converted file. In total, the depository comprises 46,493 datasets, of which 38,750 belong to the plant species and 7,743 are authentic or mixed chemicals as well as other sources (microorganisms, animals, and foods), as on August 1, 2020. All files in the depository can be downloaded in bulk from the website. Mass chromatograms of 90 plant species obtained by LC-Fourier transform ion cyclotron resonance MS or Orbitrap MS, which detect the ionized molecules with high accuracy allowing speculation of chemical compositions, were converted to text files by the software PowerGet, and the chemical annotation of each peak was added. The processed datasets were deposited in the annotation database KomicMarket2 (http://webs2.kazusa.or.jp/km2/). The archives provide fundamental resources for comparative metabolomics and functional genomics, which may result in deeper understanding of living organisms.",MassBase,0.987924576,NA,0,MassBase,0.987924576,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +31189922,http://www.2dmatpedia.org,"2DMatPedia, an open computational database of two-dimensional materials from top-down and bottom-up approaches. Two-dimensional (2D) materials have been a hot research topic in the last decade, due to novel fundamental physics in the reduced dimension and appealing applications. Systematic discovery of functional 2D materials has been the focus of many studies. Here, we present a large dataset of 2D materials, with more than 6,000 monolayer structures, obtained from both top-down and bottom-up discovery procedures. First, we screened all bulk materials in the database of Materials Project for layered structures by a topology-based algorithm and theoretically exfoliated them into monolayers. Then, we generated new 2D materials by chemical substitution of elements in known 2D materials by others from the same group in the periodic table. The structural, electronic and energetic properties of these 2D materials are consistently calculated, to provide a starting point for further material screening, data mining, data analysis and artificial intelligence applications. We present the details of computational methodology, data record and technical validation of our publicly available data ( http://www.2dmatpedia.org/ ).",MatPedia,0.677466869,NA,0,MatPedia,0.677466869,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,6/12/2019 +31586405,http://www.pepchem.org/matrisomedb,"MatrisomeDB: the ECM-protein knowledge database. The extracellular matrix (ECM) is a complex and dynamic meshwork of cross-linked proteins that supports cell polarization and functions and tissue organization and homeostasis. Over the past few decades, mass-spectrometry-based proteomics has emerged as the method of choice to characterize the composition of the ECM of normal and diseased tissues. Here, we present a new release of MatrisomeDB, a searchable collection of curated proteomic data from 17 studies on the ECM of 15 different normal tissue types, six cancer types (different grades of breast cancers, colorectal cancer, melanoma, and insulinoma) and other diseases including vascular defects and lung and liver fibroses. MatrisomeDB (http://www.pepchem.org/matrisomedb) was built by retrieving raw mass spectrometry data files and reprocessing them using the same search parameters and criteria to allow for a more direct comparison between the different studies. The present release of MatrisomeDB includes 847 human and 791 mouse ECM proteoforms and over 350 000 human and 600 000 mouse ECM-derived peptide-to-spectrum matches. For each query, a hierarchically-clustered tissue distribution map, a peptide coverage map, and a list of post-translational modifications identified, are generated. MatrisomeDB is the most complete collection of ECM proteomic data to date and allows the building of a comprehensive ECM atlas.",MatrisomeDB,0.997006476,the ECM-protein knowledge database,0.631314657,MatrisomeDB,0.997006476,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25378329,http://matrixdb.ibcp.fr,"MatrixDB, the extracellular matrix interaction database: updated content, a new navigator and expanded functionalities. MatrixDB (http://matrixdb.ibcp.fr) is a freely available database focused on interactions established by extracellular proteins and polysaccharides. It is an active member of the International Molecular Exchange (IMEx) consortium and has adopted the PSI-MI standards for annotating and exchanging interaction data, either at the MIMIx or IMEx level. MatrixDB content has been updated by curation and by importing extracellular interaction data from other IMEx databases. Other major changes include the creation of a new website and the development of a novel graphical navigator, iNavigator, to build and expand interaction networks. Filters may be applied to build sub-networks based on a list of biomolecules, a specified interaction detection method and/or an expression level by tissue, developmental stage, and health state (UniGene data). Any molecule of the network may be selected and its partners added to the network at any time. Networks may be exported under Cytoscape and tabular formats and as images, and may be saved for subsequent re-use.",MatrixDB,0.997984529,the extracellular matrix interaction database,0.829914348,MatrixDB,0.997984529,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2014 +31679514,http://www.mavedb.org,"MaveDB: an open-source platform to distribute and interpret data from multiplexed assays of variant effect. Multiplex assays of variant effect (MAVEs), such as deep mutational scans and massively parallel reporter assays, test thousands of sequence variants in a single experiment. Despite the importance of MAVE data for basic and clinical research, there is no standard resource for their discovery and distribution. Here, we present MaveDB ( https://www.mavedb.org ), a public repository for large-scale measurements of sequence variant impact, designed for interoperability with applications to interpret these datasets. We also describe the first such application, MaveVis, which retrieves, visualizes, and contextualizes variant effect maps. Together, the database and applications will empower the community to mine these powerful datasets.",MaveDB,0.996891856,NA,0,MaveDB,0.996891856,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/4/2019 +22301388,http://www.biochem.mpg.de/maxqb,"Analysis of high accuracy, quantitative proteomics data in the MaxQB database. MS-based proteomics generates rapidly increasing amounts of precise and quantitative information. Analysis of individual proteomic experiments has made great strides, but the crucial ability to compare and store information across different proteome measurements still presents many challenges. For example, it has been difficult to avoid contamination of databases with low quality peptide identifications, to control for the inflation in false positive identifications when combining data sets, and to integrate quantitative data. Although, for example, the contamination with low quality identifications has been addressed by joint analysis of deposited raw data in some public repositories, we reasoned that there should be a role for a database specifically designed for high resolution and quantitative data. Here we describe a novel database termed MaxQB that stores and displays collections of large proteomics projects and allows joint analysis and comparison. We demonstrate the analysis tools of MaxQB using proteome data of 11 different human cell lines and 28 mouse tissues. The database-wide false discovery rate is controlled by adjusting the project specific cutoff scores for the combined data sets. The 11 cell line proteomes together identify proteins expressed from more than half of all human genes. For each protein of interest, expression levels estimated by label-free quantification can be visualized across the cell lines. Similarly, the expression rank order and estimated amount of each protein within each proteome are plotted. We used MaxQB to calculate the signal reproducibility of the detected peptides for the same proteins across different proteomes. Spearman rank correlation between peptide intensity and detection probability of identified proteins was greater than 0.8 for 64% of the proteome, whereas a minority of proteins have negative correlation. This information can be used to pinpoint false protein identifications, independently of peptide database scores. The information contained in MaxQB, including high resolution fragment spectra, is accessible to the community via a user-friendly web interface at http://www.biochem.mpg.de/maxqb.",MaxQB,0.992410779,NA,0,MaxQB,0.992410779,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/2/2012 +"23118485, 25398900, 30462302",http://mbgd.genome.ad.jp,"MBGD update 2013: the microbial genome database for exploring the diversity of microbial world. The microbial genome database for comparative analysis (MBGD, available at http://mbgd.genome.ad.jp/) is a platform for microbial genome comparison based on orthology analysis. As its unique feature, MBGD allows users to conduct orthology analysis among any specified set of organisms; this flexibility allows MBGD to adapt to a variety of microbial genomic study. Reflecting the huge diversity of microbial world, the number of microbial genome projects now becomes several thousands. To efficiently explore the diversity of the entire microbial genomic data, MBGD now provides summary pages for pre-calculated ortholog tables among various taxonomic groups. For some closely related taxa, MBGD also provides the conserved synteny information (core genome alignment) pre-calculated using the CoreAligner program. In addition, efficient incremental updating procedure can create extended ortholog table by adding additional genomes to the default ortholog table generated from the representative set of genomes. Combining with the functionalities of the dynamic orthology calculation of any specified set of organisms, MBGD is an efficient and flexible tool for exploring the microbial genome diversity.",MBGD,0.990653872,microbial genome database for comparative analysis,0.900049647,MBGD,0.990653872,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +29186510,"http://www.ebi.ac.uk, http://www.ebi.ac.uk/services","The European Bioinformatics Institute in 2017: data coordination and integration. The European Bioinformatics Institute (EMBL-EBI) supports life-science research throughout the world by providing open data, open-source software and analytical tools, and technical infrastructure (https://www.ebi.ac.uk). We accommodate an increasingly diverse range of data types and integrate them, so that biologists in all disciplines can explore life in ever-increasing detail. We maintain over 40 data resources, many of which are run collaboratively with partners in 16 countries (https://www.ebi.ac.uk/services). Submissions continue to increase exponentially: our data storage has doubled in less than two years to 120 petabytes. Recent advances in cellular imaging and single-cell sequencing techniques are generating a vast amount of high-dimensional data, bringing to light new cell types and new perspectives on anatomy. Accordingly, one of our main focus areas is integrating high-quality information from bioimaging, biobanking and other types of molecular data. This is reflected in our deep involvement in Open Targets, stewarding of plant phenotyping standards (MIAPPE) and partnership in the Human Cell Atlas data coordination platform, as well as the 2017 launch of the Omics Discovery Index. This update gives a birds-eye view of EMBL-EBI's approach to data integration and service development as genomics begins to enter the clinic.",MBL-EBI,0.829999638,NA,0,MBL-EBI,0.829999638,1,"24271396.0, 31701143.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: CLASS,NA,NA,1/1/2018 +22547615,"http://www.mbled.uni-stuttgart.de, http://www.lahey.org/Studies","Systematic analysis of metallo-β-lactamases using an automated database. Metallo-β-lactamases (MBLs) are enzymes that hydrolyze β-lactam antibiotics, resulting in bacterial resistance to these drugs. These proteins have caused concerns due to their facile transference, broad substrate spectra, and the absence of clinically useful inhibitors. To facilitate the classification, nomenclature, and analysis of MBLs, an automated database system was developed, the Metallo-β-Lactamase Engineering Database (MBLED) (http://www.mbled.uni-stuttgart.de). It contains information on MBLs retrieved from the NCBI peptide database while strictly following the nomenclature by Jacoby and Bush (http://www.lahey.org/Studies/) and the generally accepted class B β-lactamase (BBL) standard numbering scheme for MBLs. The database comprises 597 MBL protein sequences and enables systematic analyses of these sequences. A systematic analysis employing the database resulted in the generation of mutation profiles of assigned IMP- and VIM-type MBLs, the identification of five MBL protein entries from the NCBI peptide database that were inconsistent with the Jacoby and Bush nomenclature, and the identification of 15 new IMP candidates and 9 new VIM candidates. Furthermore, the database was used to identify residues with high mutation frequencies and variability (mutation hot spots) that were unexpectedly distant from the active site located in the ββ sandwich: positions 208 and 266 in the IMP family and positions 215 and 258 in the VIM family. We expect that the MBLED will be a valuable tool for systematically cataloguing and analyzing the increasing number of MBLs being reported.",MBLED,0.99639684,Lactamase Engineering Database,0.869486794,MBLED,0.99639684,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/30/2012 +34729303,http://www.combio-lezhang.online/MCDB/index_html,"MCDB: A comprehensive curated mitotic catastrophe database for retrieval, protein sequence alignment, and target prediction. Mitotic catastrophe (MC) is a form of programmed cell death induced by mitotic process disorders, which is very important in tumor prevention, development, and drug resistance. Because rapidly increased data for MC is vigorously promoting the tumor-related biomedical and clinical study, it is urgent for us to develop a professional and comprehensive database to curate MC-related data. Mitotic Catastrophe Database (MCDB) consists of 1214 genes/proteins and 5014 compounds collected and organized from more than 8000 research articles. Also, MCDB defines the confidence level, classification criteria, and uniform naming rules for MC-related data, which greatly improves data reliability and retrieval convenience. Moreover, MCDB develops protein sequence alignment and target prediction functions. The former can be used to predict new potential MC-related genes and proteins, and the latter can facilitate the identification of potential target proteins of unknown MC-related compounds. In short, MCDB is such a proprietary, standard, and comprehensive database for MC-relate data that will facilitate the exploration of MC from chemists to biologists in the fields of medicinal chemistry, molecular biology, bioinformatics, oncology and so on. The MCDB is distributed on http://www.combio-lezhang.online/MCDB/index_html/.",MCDB,0.984939098,Mitotic Catastrophe Database,0.963184257,MCDB,0.984939098,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/7/2021 +24214963,http://www.genomeindia.org/biocuration,"Manually curated database of rice proteins. 'Manually Curated Database of Rice Proteins' (MCDRP) available at http://www.genomeindia.org/biocuration is a unique curated database based on published experimental data. Semantic integration of scientific data is essential to gain a higher level of understanding of biological systems. Since the majority of scientific data is available as published literature, text mining is an essential step before the data can be integrated and made available for computer-based search in various databases. However, text mining is a tedious exercise and thus, there is a large gap in the data available in curated databases and published literature. Moreover, data in an experiment can be perceived from several perspectives, which may not reflect in the text-based curation. In order to address such issues, we have demonstrated the feasibility of digitizing the experimental data itself by creating a database on rice proteins based on in-house developed data curation models. Using these models data of individual experiments have been digitized with the help of universal ontologies. Currently, the database has data for over 1800 rice proteins curated from >4000 different experiments of over 400 research articles. Since every aspect of the experiment such as gene name, plant type, tissue and developmental stage has been digitized, experimental data can be rapidly accessed and integrated.",MCDRP,0.995357215,Manually Curated Database of Rice Proteins,0.991914093,MCDRP,0.995357215,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/7/2013 +30057343,http://bioinformatics.cau.edu.cn/MCENet,"MCENet: A database for maize conditional co-expression network and network characterization collaborated with multi-dimensional omics levels. Maize (Zea mays) is the most widely grown grain crop in the world, playing important roles in agriculture and industry. However, the functions of maize genes remain largely unknown. High-quality genome-wide transcriptome datasets provide important biological knowledge which has been widely and successfully used in plants not only by measuring gene expression levels but also by enabling co-expression analysis for predicting gene functions and modules related to agronomic traits. Recently, thousands of maize transcriptomic data are available across different inbred lines, development stages, tissues, and treatments, or even across different tissue sections and cell lines. Here, we integrated 701 transcriptomic and 108 epigenomic data and studied the different conditional networks with multi-dimensional omics levels. We constructed a searchable, integrative, one-stop online platform, the maize conditional co-expression network (MCENet) platform. MCENet provides 10 global/conditional co-expression networks, 5 network accessional analysis toolkits (i.e., Network Search, Network Remodel, Module Finder, Network Comparison, and Dynamic Expression View) and multiple network functional support toolkits (e.g., motif and module enrichment analysis). We hope that our database might help plant research communities to identify maize functional genes or modules that regulate important agronomic traits. MCENet is publicly accessible at http://bioinformatics.cau.edu.cn/MCENet/.",MCENet,0.991767049,NA,0,MCENet,0.991767049,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/18/2018 +25432973,http://maize.jcvi.org/cellgenomics,"A maize database resource that captures tissue-specific and subcellular-localized gene expression, via fluorescent tags and confocal imaging (Maize Cell Genomics Database). Maize is a global crop and a powerful system among grain crops for genetic and genomic studies. However, the development of novel biological tools and resources to aid in the functional identification of gene sequences is greatly needed. Towards this goal, we have developed a collection of maize marker lines for studying native gene expression in specific cell types and subcellular compartments using fluorescent proteins (FPs). To catalog FP expression, we have developed a public repository, the Maize Cell Genomics (MCG) Database, (http://maize.jcvi.org/cellgenomics), to organize a large data set of confocal images generated from the maize marker lines. To date, the collection represents major subcellular structures and also developmentally important progenitor cell populations. The resource is available to the research community, for example to study protein localization or interactions under various experimental conditions or mutant backgrounds. A subset of the marker lines can also be used to induce misexpression of target genes through a transactivation system. For future directions, the image repository can be expanded to accept new image submissions from the research community, and to perform customized large-scale computational image analysis. This community resource will provide a suite of new tools for gaining biological insights by following the dynamics of protein expression at the subcellular, cellular and tissue levels.",MCG,0.953158339,Maize Cell Genomics,0.678850925,MCG,0.953158339,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/27/2014 +28481982,http://friedmanlab.weizmann.ac.il/McPAS-TCR,"McPAS-TCR: a manually curated catalogue of pathology-associated T cell receptor sequences. Motivation While growing numbers of T cell receptor (TCR) repertoires are being mapped by high-throughput sequencing, existing methods do not allow for computationally connecting a given TCR sequence to its target antigen, or relating it to a specific pathology. As an alternative, a manually-curated database can relate TCR sequences with their cognate antigens and associated pathologies based on published experimental data. Results We present McPAS-TCR, a manually curated database of TCR sequences associated with various pathologies and antigens based on published literature. Our database currently contains more than 5000 sequences of TCRs associated with various pathologic conditions (including pathogen infections, cancer and autoimmunity) and their respective antigens in humans and in mice. A web-based tool allows for searching the database based on different criteria, and for finding annotated sequences from the database in users' data. The McPAS-TCR website assembles information from a large number of studies that is very hard to dissect otherwise. Initial analyses of the data provide interesting insights on pathology-associated TCR sequences. Availability and implementation Free access at http://friedmanlab.weizmann.ac.il/McPAS-TCR/ . Contact nir.friedman@weizmann.ac.il.",McPAS-TCR,0.978659749,NA,0,McPAS-TCR,0.978659749,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2017 +33780471,http://mcpdb.mbi.ucla.edu,"MCPdb: The bacterial microcompartment database. Bacterial microcompartments are organelle-like structures composed entirely of proteins. They have evolved to carry out several distinct and specialized metabolic functions in a wide variety of bacteria. Their outer shell is constructed from thousands of tessellating protein subunits, encapsulating enzymes that carry out the internal metabolic reactions. The shell proteins are varied, with single, tandem and permuted versions of the PF00936 protein family domain comprising the primary structural component of their polyhedral architecture, which is reminiscent of a viral capsid. While considerable amounts of structural and biophysical data have been generated in the last 15 years, the existing functionalities of current resources have limited our ability to rapidly understand the functional and structural properties of microcompartments (MCPs) and their diversity. In order to make the remarkable structural features of bacterial microcompartments accessible to a broad community of scientists and non-specialists, we developed MCPdb: The Bacterial Microcompartment Database (https://mcpdb.mbi.ucla.edu/). MCPdb is a comprehensive resource that categorizes and organizes known microcompartment protein structures and their larger assemblies. To emphasize the critical roles symmetric assembly and architecture play in microcompartment function, each structure in the MCPdb is validated and annotated with respect to: (1) its predicted natural assembly state (2) tertiary structure and topology and (3) the metabolic compartment type from which it derives. The current database includes 163 structures and is available to the public with the anticipation that it will serve as a growing resource for scientists interested in understanding protein-based metabolic organelles in bacteria.",MCPdb,0.99681139,The bacterial microcompartment database,0.613576069,MCPdb,0.99681139,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/29/2021 +27069559,http://spellchecker.mfldclin.edu,"MD-CTS: An integrated terminology reference of clinical and translational medicine. New vocabularies are rapidly evolving in the literature relative to the practice of clinical medicine and translational research. To provide integrated access to new terms, we developed a mobile and desktop online reference-Marshfield Dictionary of Clinical and Translational Science (MD-CTS). It is the first public resource that comprehensively integrates Wiktionary (word definition), BioPortal (ontology), Wiki (image reference), and Medline abstract (word usage) information. MD-CTS is accessible at http://spellchecker.mfldclin.edu/. The website provides a broadened capacity for the wider clinical and translational science community to keep pace with newly emerging scientific vocabulary. An initial evaluation using 63 randomly selected biomedical words suggests that online references generally provided better coverage (73%-95%) than paper-based dictionaries (57-71%).",MD-CTS,0.988920406,of Clinical and,0.767214457,MD-CTS,0.988920406,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/2/2016 +30764761,http://csc.columbusstate.edu/carroll/MDB,"MultiDomainBenchmark: a multi-domain query and subject database suite. Background Genetic sequence database retrieval benchmarks play an essential role in evaluating the performance of sequence searching tools. To date, all phylogenetically diverse benchmarks known to the authors include only query sequences with single protein domains. Domains are the primary building blocks of protein structure and function. Independently, each domain can fulfill a single function, but most proteins (>80% in Metazoa) exist as multi-domain proteins. Multiple domain units combine in various arrangements or architectures to create different functions and are often under evolutionary pressures to yield new ones. Thus, it is crucial to create gold standards reflecting the multi-domain complexity of real proteins to more accurately evaluate sequence searching tools. Description This work introduces MultiDomainBenchmark (MDB), a database suite of 412 curated multi-domain queries and 227,512 target sequences, representing at least 5108 species and 1123 phylogenetically divergent protein families, their relevancy annotation, and domain location. Here, we use the benchmark to evaluate the performance of two commonly used sequence searching tools, BLAST/PSI-BLAST and HMMER. Additionally, we introduce a novel classification technique for multi-domain proteins to evaluate how well an algorithm recovers a domain architecture. Conclusion MDB is publicly available at http://csc.columbusstate.edu/carroll/MDB/ .",MDB,0.958996102,NA,0,MDB,0.958996102,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/14/2019 +26513174,http://mdp.unimore.it,"MDP, a database linking drug response data to genomic information, identifies dasatinib and statins as a combinatorial strategy to inhibit YAP/TAZ in cancer cells. Targeted anticancer therapies represent the most effective pharmacological strategies in terms of clinical responses. In this context, genetic alteration of several oncogenes represents an optimal predictor of response to targeted therapy. Integration of large-scale molecular and pharmacological data from cancer cell lines promises to be effective in the discovery of new genetic markers of drug sensitivity and of clinically relevant anticancer compounds. To define novel pharmacogenomic dependencies in cancer, we created the Mutations and Drugs Portal (MDP, http://mdp.unimore.it), a web accessible database that combines the cell-based NCI60 screening of more than 50,000 compounds with genomic data extracted from the Cancer Cell Line Encyclopedia and the NCI60 DTP projects. MDP can be queried for drugs active in cancer cell lines carrying mutations in specific cancer genes or for genetic markers associated to sensitivity or resistance to a given compound. As proof of performance, we interrogated MDP to identify both known and novel pharmacogenomics associations and unveiled an unpredicted combination of two FDA-approved compounds, namely statins and Dasatinib, as an effective strategy to potently inhibit YAP/TAZ in cancer cells.",MDP,0.993027329,and Drugs Portal,0.741629084,MDP,0.993027329,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2015 +31240103,http://mdr.xieslab.org,"MDR: an integrative DNA N6-methyladenine and N4-methylcytosine modification database for Rosaceae. Eukaryotic DNA methylation has been receiving increasing attention for its crucial epigenetic regulatory function. The recently developed single-molecule real-time (SMRT) sequencing technology provides an efficient way to detect DNA N6-methyladenine (6mA) and N4-methylcytosine (4mC) modifications at a single-nucleotide resolution. The family Rosaceae contains horticultural plants with a wide range of economic importance. However, little is currently known regarding the genome-wide distribution patterns and functions of 6mA and 4mC modifications in the Rosaceae. In this study, we present an integrated DNA 6mA and 4mC modification database for the Rosaceae (MDR, http://mdr.xieslab.org). MDR, the first repository for displaying and storing DNA 6mA and 4mC methylomes from SMRT sequencing data sets for Rosaceae, includes meta and statistical information, methylation densities, Gene Ontology enrichment analyses, and genome search and browse for methylated sites in NCBI. MDR provides important information regarding DNA 6mA and 4mC methylation and may help users better understand epigenetic modifications in the family Rosaceae.",MDR,0.993409554,NA,0,MDR,0.993409554,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/15/2019 +30584170,http://www.mdsgene.org,"MDSGene: Closing Data Gaps in Genotype-Phenotype Correlations of Monogenic Parkinson's Disease. Given the rapidly increasing number of reported movement disorder genes and clinical-genetic desciptions of mutation carriers, the International Parkinson's Disease and Movement Disorder Society Gene Database (MDSGene) initiative has been launched in 2016 and grown to become a large international project (http://www.mdsgene.org). MDSGene currently contains >1150 variants described in ∼5700 movement disorder patients in almost 1000 publications including monogenic forms of PD clinically resembling idiopathic (PARK-PINK1, PARK-Parkin, PARK-DJ-1, PARK-SNCA, PARK-VPS35, PARK-LRRK2), as well as of atypical PD (PARK-SYNJ1, PARK-DNAJC6, PARK-ATP13A2, PARK-FBXO7). Inclusion of genes is based on standardized published criteria for determining causation. Clinical and genetic information can be filtered according to demographic, clinical or genetic criteria and summary statistics are automatically generated by the MDSGene online tool. Despite MDSGene's novel approach and features, it also faces several challenges: i) The criteria for designating genes as causative will require further refinement, as well as time and support to replace the faulty list of 'PARKs'. ii) MDSGene has uncovered extensive clinical data gaps. iii) The quickly growing body of clinical and genetic data require a large number of experts worldwide posing logistic challenges. iv) MDSGene currently captures published data only, i.e., a small fraction of the available information on monogenic PD available. Thus, an important future aim is to extend MDSGene to unpublished cases in order to provide the broad data base to the PD community that is necessary to comprehensively inform genetic counseling, therapeutic approaches and clinical trials, as well as basic and clinical research studies in monogenic PD.",MDSGene,0.993123114,Database,0.699059725,MDSGene,0.993123114,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +33084905,http://das.chenlulab.com,"MeDAS: a Metazoan Developmental Alternative Splicing database. Alternative splicing is widespread throughout eukaryotic genomes and greatly increases transcriptomic diversity. Many alternative isoforms have functional roles in developmental processes and are precisely temporally regulated. To facilitate the study of alternative splicing in a developmental context, we created MeDAS, a Metazoan Developmental Alternative Splicing database. MeDAS is an added-value resource that re-analyses publicly archived RNA-seq libraries to provide quantitative data on alternative splicing events as they vary across the time course of development. It has broad temporal and taxonomic scope and is intended to assist the user in identifying trends in alternative splicing throughout development. To create MeDAS, we re-analysed a curated set of 2232 Illumina polyA+ RNA-seq libraries that chart detailed time courses of embryonic and post-natal development across 18 species with a taxonomic range spanning the major metazoan lineages from Caenorhabditis elegans to human. MeDAS is freely available at https://das.chenlulab.com both as raw data tables and as an interactive browser allowing searches by species, tissue, or genomic feature (gene, transcript or exon ID and sequence). Results will provide details on alternative splicing events identified for the queried feature and can be visualised at the gene-, transcript- and exon-level as time courses of expression and inclusion levels, respectively.",MeDAS,0.996930599,Metazoan Developmental Alternative Splicing database,0.747435543,MeDAS,0.996930599,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +25098325,http://mediadb.systemsbiology.net,"MediaDB: a database of microbial growth conditions in defined media. Isolating pure microbial cultures and cultivating them in the laboratory on defined media is used to more fully characterize the metabolism and physiology of organisms. However, identifying an appropriate growth medium for a novel isolate remains a challenging task. Even organisms with sequenced and annotated genomes can be difficult to grow, despite our ability to build genome-scale metabolic networks that connect genomic data with metabolic function. The scientific literature is scattered with information about defined growth media used successfully for cultivating a wide variety of organisms, but to date there exists no centralized repository to inform efforts to cultivate less characterized organisms by bridging the gap between genomic data and compound composition for growth media. Here we present MediaDB, a manually curated database of defined media that have been used for cultivating organisms with sequenced genomes, with an emphasis on organisms with metabolic network models. The database is accessible online, can be queried by keyword searches or downloaded in its entirety, and can generate exportable individual media formulation files. The data assembled in MediaDB facilitate comparative studies of organism growth media, serve as a starting point for formulating novel growth media, and contribute to formulating media for in silico investigation of metabolic networks. MediaDB is freely available for public use at https://mediadb.systemsbiology.net.",MediaDB,0.991332531,NA,0,MediaDB,0.991332531,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/6/2014 +22701463,http://phospho.medicago.wisc.edu,"Medicago PhosphoProtein Database: a repository for Medicago truncatula phosphoprotein data. The ability of legume crops to fix atmospheric nitrogen via a symbiotic association with soil rhizobia makes them an essential component of many agricultural systems. Initiation of this symbiosis requires protein phosphorylation-mediated signaling in response to rhizobial signals named Nod factors. Medicago truncatula (Medicago) is the model system for studying legume biology, making the study of its phosphoproteome essential. Here, we describe the Medicago PhosphoProtein Database (MPPD; http://phospho.medicago.wisc.edu), a repository built to house phosphoprotein, phosphopeptide, and phosphosite data specific to Medicago. Currently, the MPPD holds 3,457 unique phosphopeptides that contain 3,404 non-redundant sites of phosphorylation on 829 proteins. Through the web-based interface, users are allowed to browse identified proteins or search for proteins of interest. Furthermore, we allow users to conduct BLAST searches of the database using both peptide sequences and phosphorylation motifs as queries. The data contained within the database are available for download to be investigated at the user's discretion. The MPPD will be updated continually with novel phosphoprotein and phosphopeptide identifications, with the intent of constructing an unparalleled compendium of large-scale Medicago phosphorylation data.",MPPD,0.981770674,Medicago PhosphoProtein Database,0.982081629,Medicago PhosphoProtein Database,0.982081629,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/11/2012 +25432968,http://www.MedicagoGenome.org,"MTGD: The Medicago truncatula genome database. Medicago truncatula, a close relative of alfalfa (Medicago sativa), is a model legume used for studying symbiotic nitrogen fixation, mycorrhizal interactions and legume genomics. J. Craig Venter Institute (JCVI; formerly TIGR) has been involved in M. truncatula genome sequencing and annotation since 2002 and has maintained a web-based resource providing data to the community for this entire period. The website (http://www.MedicagoGenome.org) has seen major updates in the past year, where it currently hosts the latest version of the genome (Mt4.0), associated data and legacy project information, presented to users via a rich set of open-source tools. A JBrowse-based genome browser interface exposes tracks for visualization. Mutant gene symbols originally assembled and curated by the Frugoli lab are now hosted at JCVI and tie into our community annotation interface, Medicago EuCAP (to be integrated soon with our implementation of WebApollo). Literature pertinent to M. truncatula is indexed and made searchable via the Textpresso search engine. The site also implements MedicMine, an instance of InterMine that offers interconnectivity with other plant 'mines' such as ThaleMine and PhytoMine, and other model organism databases (MODs). In addition to these new features, we continue to provide keyword- and locus identifier-based searches served via a Chado-backed Tripal Instance, a BLAST search interface and bulk downloads of data sets from the iPlant Data Store (iDS). Finally, we maintain an E-mail helpdesk, facilitated by a JIRA issue tracking system, where we receive and respond to questions about the website and requests for specific data sets from the community.",MedicMine,0.649071693,Medicago truncatula genome database,0.925092287,Medicago truncatula genome database,0.925092287,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/28/2014 +30381914,http://bif.uohyd.ac.in/medserver,"MedPServer: A database for identification of therapeutic targets and novel leads pertaining to natural products. Natural products have been the source of treatment for various human diseases from time immemorial. Interests in natural product-based scaffolds for the discovery of modern drugs have grown in recent years. However, research on exploring the traditional medicinal systems for modern therapeutics is severely limited due to our incomplete understanding of the therapeutic mechanism of action. One possible solution is to develop computational approaches, based on ligand- and structure-based screening tools, for fast and plausible target identification, leading to elucidation of the therapeutic mechanism. In the present work, we present two methods based on shape-based and pharmacophore search to predict targets of natural products and elucidate their mechanism, and to identify natural product-based leads. These methods were tested on an in-house developed database of medicinal plants that include information from a largely unexplored North-East region of India, known as one of the twelve mega biodiversity regions. However, depending on the choice of the lead molecules, any existing databases can be used for screening. MedPServer is an open access resource available at http://bif.uohyd.ac.in/medserver/.",MedPServer,0.69970268,NA,0,MedPServer,0.69970268,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/28/2018 +29145608,http://medreader.org,"MeDReaders: a database for transcription factors that bind to methylated DNA. Understanding the molecular principles governing interactions between transcription factors (TFs) and DNA targets is one of the main subjects for transcriptional regulation. Recently, emerging evidence demonstrated that some TFs could bind to DNA motifs containing highly methylated CpGs both in vitro and in vivo. Identification of such TFs and elucidation of their physiological roles now become an important stepping-stone toward understanding the mechanisms underlying the methylation-mediated biological processes, which have crucial implications for human disease and disease development. Hence, we constructed a database, named as MeDReaders, to collect information about methylated DNA binding activities. A total of 731 TFs, which could bind to methylated DNA sequences, were manually curated in human and mouse studies reported in the literature. In silico approaches were applied to predict methylated and unmethylated motifs of 292 TFs by integrating whole genome bisulfite sequencing (WGBS) and ChIP-Seq datasets in six human cell lines and one mouse cell line extracted from ENCODE and GEO database. MeDReaders database will provide a comprehensive resource for further studies and aid related experiment designs. The database implemented unified access for users to most TFs involved in such methylation-associated binding actives. The website is available at http://medreader.org/.",MeDReaders,0.957522929,NA,0,MeDReaders,0.957522929,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +35424258,http://cb.imsc.res.in/mefsat,"MeFSAT: a curated natural product database specific to secondary metabolites of medicinal fungi. Fungi are a rich source of secondary metabolites which constitutes a valuable and diverse chemical space of natural products. Medicinal fungi have been used in traditional medicine to treat human ailments for centuries. To date, there is no devoted resource on secondary metabolites and therapeutic uses of medicinal fungi. Such a dedicated resource compiling dispersed information on medicinal fungi across published literature will facilitate ongoing efforts towards natural product based drug discovery. Here, we present the first comprehensive manually curated database on Medicinal Fungi Secondary metabolites And Therapeutics (MeFSAT) that compiles information on 184 medicinal fungi, 1830 secondary metabolites and 149 therapeutics uses. Importantly, MeFSAT contains a non-redundant in silico natural product library of 1830 secondary metabolites along with information on their chemical structures, computed physicochemical properties, drug-likeness properties, predicted ADMET properties, molecular descriptors and predicted human target proteins. By comparing the physicochemical properties of secondary metabolites in MeFSAT with other small molecules collections, we find that fungal secondary metabolites have high stereochemical complexity and shape complexity similar to other natural product libraries. Based on multiple scoring schemes, we have filtered a subset of 228 drug-like secondary metabolites in MeFSAT database. By constructing and analyzing chemical similarity networks, we show that the chemical space of secondary metabolites in MeFSAT is highly diverse. The compiled information in MeFSAT database is openly accessible at: https://cb.imsc.res.in/mefsat/.",MeFSAT,0.988593876,Medicinal Fungi Secondary metabolites And Therapeutics,0.926579752,MeFSAT,0.988593876,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/12/2021 +29745830,http://www.bi.cs.titech.ac.jp/megadock-web,"MEGADOCK-Web: an integrated database of high-throughput structure-based protein-protein interaction predictions. BACKGROUND:Protein-protein interactions (PPIs) play several roles in living cells, and computational PPI prediction is a major focus of many researchers. The three-dimensional (3D) structure and binding surface are important for the design of PPI inhibitors. Therefore, rigid body protein-protein docking calculations for two protein structures are expected to allow elucidation of PPIs different from known complexes in terms of 3D structures because known PPI information is not explicitly required. We have developed rapid PPI prediction software based on protein-protein docking, called MEGADOCK. In order to fully utilize the benefits of computational PPI predictions, it is necessary to construct a comprehensive database to gather prediction results and their predicted 3D complex structures and to make them easily accessible. Although several databases exist that provide predicted PPIs, the previous databases do not contain a sufficient number of entries for the purpose of discovering novel PPIs. RESULTS:In this study, we constructed an integrated database of MEGADOCK PPI predictions, named MEGADOCK-Web. MEGADOCK-Web provides more than 10 times the number of PPI predictions than previous databases and enables users to conduct PPI predictions that cannot be found in conventional PPI prediction databases. In MEGADOCK-Web, there are 7528 protein chains and 28,331,628 predicted PPIs from all possible combinations of those proteins. Each protein structure is annotated with PDB ID, chain ID, UniProt AC, related KEGG pathway IDs, and known PPI pairs. Additionally, MEGADOCK-Web provides four powerful functions: 1) searching precalculated PPI predictions, 2) providing annotations for each predicted protein pair with an experimentally known PPI, 3) visualizing candidates that may interact with the query protein on biochemical pathways, and 4) visualizing predicted complex structures through a 3D molecular viewer. CONCLUSION:MEGADOCK-Web provides a huge amount of comprehensive PPI predictions based on docking calculations with biochemical pathways and enables users to easily and quickly assess PPI feasibilities by archiving PPI predictions. MEGADOCK-Web also promotes the discovery of new PPIs and protein functions and is freely available for use at http://www.bi.cs.titech.ac.jp/megadock-web/ .",MEGADOCK,0.884224534,NA,0,MEGADOCK,0.884224534,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/8/2018 +28791657,http://sedufau.shinyapps.io/megalex,"MEGALEX: A megastudy of visual and auditory word recognition. Using the megastudy approach, we report a new database (MEGALEX) of visual and auditory lexical decision times and accuracy rates for tens of thousands of words. We collected visual lexical decision data for 28,466 French words and the same number of pseudowords, and auditory lexical decision data for 17,876 French words and the same number of pseudowords (synthesized tokens were used for the auditory modality). This constitutes the first large-scale database for auditory lexical decision, and the first database to enable a direct comparison of word recognition in different modalities. Different regression analyses were conducted to illustrate potential ways to exploit this megastudy database. First, we compared the proportions of variance accounted for by five word frequency measures. Second, we conducted item-level regression analyses to examine the relative importance of the lexical variables influencing performance in the different modalities (visual and auditory). Finally, we compared the similarities and differences between the two modalities. All data are freely available on our website ( https://sedufau.shinyapps.io/megalex/ ) and are searchable at www.lexique.org , inside the Open Lexique search engine.",MEGALEX,0.995782244,NA,0,MEGALEX,0.995782244,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2018 +"27899569, 31722416",http://megares.meglab.org,"MEGARes: an antimicrobial resistance database for high throughput sequencing. Antimicrobial resistance has become an imminent concern for public health. As methods for detection and characterization of antimicrobial resistance move from targeted culture and polymerase chain reaction to high throughput metagenomics, appropriate resources for the analysis of large-scale data are required. Currently, antimicrobial resistance databases are tailored to smaller-scale, functional profiling of genes using highly descriptive annotations. Such characteristics do not facilitate the analysis of large-scale, ecological sequence datasets such as those produced with the use of metagenomics for surveillance. In order to overcome these limitations, we present MEGARes (https://megares.meglab.org), a hand-curated antimicrobial resistance database and annotation structure that provides a foundation for the development of high throughput acyclical classifiers and hierarchical statistical analysis of big data. MEGARes can be browsed as a stand-alone resource through the website or can be easily integrated into sequence analysis pipelines through download. Also via the website, we provide documentation for AmrPlusPlus, a user-friendly Galaxy pipeline for the analysis of high throughput sequencing data that is pre-packaged for use with the MEGARes database.",MEGARes,0.933923781,NA,0,MEGARes,0.933923781,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +25566299,http://meiosis.ibcas.ac.cn,"MeioBase: a comprehensive database for meiosis. Meiosis is a special type of cell division process necessary for the sexual reproduction of all eukaryotes. The ever expanding meiosis research calls for an effective and specialized database that is not readily available yet. To fill this gap, we have developed a knowledge database MeioBase (http://meiosis.ibcas.ac.cn), which is comprised of two core parts, Resources and Tools. In the Resources part, a wealth of meiosis data collected by curation and manual review from published literatures and biological databases are integrated and organized into various sections, such as Cytology, Pathway, Species, Interaction, and Expression. In the Tools part, some useful tools have been integrated into MeioBase, such as Search, Download, Blast, Comparison, My Favorites, Submission, and Advice. With a simplified and efficient web interface, users are able to search against the database with gene model IDs or keywords, and batch download the data for local investigation. We believe that MeioBase can greatly facilitate the researches related to meiosis.",MeioBase,0.997380674,NA,0,MeioBase,0.997380674,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/16/2014 +34485275,http://mcg.ustc.edu.cn/bsc/meiosis/index.html,"MeiosisOnline: A Manually Curated Database for Tracking and Predicting Genes Associated With Meiosis. Meiosis, an essential step in gametogenesis, is the key event in sexually reproducing organisms. Thousands of genes have been reported to be involved in meiosis. Therefore, a specialist database is much needed for scientists to know about the function of these genes quickly and to search for genes with potential roles in meiosis. Here, we developed ""MeiosisOnline,"" a publicly accessible, comprehensive database of known functional genes and potential candidates in meiosis (https://mcg.ustc.edu.cn/bsc/meiosis/index.html). A total of 2,052 meiotic genes were manually curated from literature resource and were classified into different categories. Annotation information was provided for both meiotic genes and predicted candidates, including basic information, function, protein-protein interaction (PPI), and expression data. On the other hand, 165 mouse genes were predicted as potential candidates in meiosis using the ""Greed AUC Stepwise"" algorithm. Thus, MeiosisOnline provides the most updated and detailed information of experimental verified and predicted genes in meiosis. Furthermore, the searching tools and friendly interface of MeiosisOnline will greatly help researchers in studying meiosis in an easy and efficient way.",MeiosisOnline,0.993370068,NA,0,MeiosisOnline,0.993370068,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/13/2021 +24828308,http://prime.psc.riken.jp/meko,"Metabolomic Characterization of Knockout Mutants in Arabidopsis: Development of a Metabolite Profiling Database for Knockout Mutants in Arabidopsis. Despite recent intensive research efforts in functional genomics, the functions of only a limited number of Arabidopsis (Arabidopsis thaliana) genes have been determined experimentally, and improving gene annotation remains a major challenge in plant science. As metabolite profiling can characterize the metabolomic phenotype of a genetic perturbation in the plant metabolism, it provides clues to the function(s) of genes of interest. We chose 50 Arabidopsis mutants, including a set of characterized and uncharacterized mutants, that resemble wild-type plants. We performed metabolite profiling of the plants using gas chromatography-mass spectrometry. To make the data set available as an efficient public functional genomics tool for hypothesis generation, we developed the Metabolite Profiling Database for Knock-Out Mutants in Arabidopsis (MeKO). It allows the evaluation of whether a mutation affects metabolism during normal plant growth and contains images of mutants, data on differences in metabolite accumulation, and interactive analysis tools. Nonprocessed data, including chromatograms, mass spectra, and experimental metadata, follow the guidelines set by the Metabolomics Standards Initiative and are freely downloadable. Proof-of-concept analysis suggests that MeKO is highly useful for the generation of hypotheses for genes of interest and for improving gene annotation. MeKO is publicly available at http://prime.psc.riken.jp/meko/.",MeKO,0.985758662,Metabolite Profiling Database for Knock-O,0.698294673,MeKO,0.985758662,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/14/2014 +31504189,http://melad.ddtmlab.org,"MeLAD: an integrated resource for metalloenzyme-ligand associations. Motivation Metalloenzymes are attractive targets for therapeutic intervention owing to their central roles in various biological processes and pathological situations. The fast-growing body of structural data on metalloenzyme-ligand interactions is facilitating efficient drug discovery targeting metalloenzymes. However, there remains a shortage of specific databases that can provide centralized, interconnected information exclusive to metalloenzyme-ligand associations. Results We created a Metalloenzyme-Ligand Association Database (MeLAD), which is designed to provide curated structural data and information exclusive to metalloenzyme-ligand interactions, and more uniquely, present expanded associations that are represented by metal-binding pharmacophores (MBPs), metalloenzyme structural similarity (MeSIM) and ligand chemical similarity (LigSIM). MeLAD currently contains 6086 structurally resolved interactions of 1416 metalloenzymes with 3564 ligands, of which classical metal-binding, non-classical metal-binding, non-metal-binding and metal water-bridging interactions account for 63.0%, 2.3%, 34.4% and 0.3%, respectively. A total of 263 monodentate, 191 bidentate and 15 tridentate MBP chemotypes were included in MeLAD, which are linked to different active site metal ions and coordination modes. 3726 and 52 740 deductive metalloenzyme-ligand associations by MeSIM and LigSIM analyses, respectively, were included in MeLAD. An online server is provided for users to conduct metalloenzyme profiling prediction for small molecules of interest. MeLAD is searchable by multiple criteria, e.g. metalloenzyme name, ligand identifier, functional class, bioinorganic class, metal ion and metal-containing cofactor, which will serve as a valuable, integrative data source to foster metalloenzyme related research, particularly involved in drug discovery targeting metalloenzymes. Availability and implementation MeLAD is accessible at https://melad.ddtmlab.org. Supplementary information Supplementary data are available at Bioinformatics online.",MeLAD,0.996227562,Metalloenzyme-Ligand Association Database,0.90846928,MeLAD,0.996227562,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2020 +23875173,"http://genesetdb.auckland.ac.nz/melanomadb/about.html, http://www.biomatters.com/apps/melanoma-profiler-for-research","MelanomaDB: A Web Tool for Integrative Analysis of Melanoma Genomic Information to Identify Disease-Associated Molecular Pathways. Despite on-going research, metastatic melanoma survival rates remain low and treatment options are limited. Researchers can now access a rapidly growing amount of molecular and clinical information about melanoma. This information is becoming difficult to assemble and interpret due to its dispersed nature, yet as it grows it becomes increasingly valuable for understanding melanoma. Integration of this information into a comprehensive resource to aid rational experimental design and patient stratification is needed. As an initial step in this direction, we have assembled a web-accessible melanoma database, MelanomaDB, which incorporates clinical and molecular data from publically available sources, which will be regularly updated as new information becomes available. This database allows complex links to be drawn between many different aspects of melanoma biology: genetic changes (e.g., mutations) in individual melanomas revealed by DNA sequencing, associations between gene expression and patient survival, data concerning drug targets, biomarkers, druggability, and clinical trials, as well as our own statistical analysis of relationships between molecular pathways and clinical parameters that have been produced using these data sets. The database is freely available at http://genesetdb.auckland.ac.nz/melanomadb/about.html. A subset of the information in the database can also be accessed through a freely available web application in the Illumina genomic cloud computing platform BaseSpace at http://www.biomatters.com/apps/melanoma-profiler-for-research. The MelanomaDB database illustrates dysregulation of specific signaling pathways across 310 exome-sequenced melanomas and in individual tumors and identifies the distribution of somatic variants in melanoma. We suggest that MelanomaDB can provide a context in which to interpret the tumor molecular profiles of individual melanoma patients relative to biological information and available drug therapies.",MelanomaDB,0.994151652,NA,0,MelanomaDB,0.994151652,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/16/2013 +25380778,http://www.melgene.org,"A Web-based database of genetic association studies in cutaneous melanoma enhanced with network-driven data exploration tools. . The publicly available online database MelGene provides a comprehensive, regularly updated, collection of data from genetic association studies in cutaneous melanoma (CM), including random-effects meta-analysis results of all eligible polymorphisms. The updated database version includes data from 192 publications with information on 1114 significantly associated polymorphisms across 280 genes, along with new front-end and back-end capabilities. Various types of relationships between data are calculated and visualized as networks. We constructed 13 different networks containing the polymorphisms and the genes included in MelGene. We explored the derived network representations under the following questions: (i) are there nodes that deserve consideration regarding their network connectivity characteristics? (ii) What is the relation of either the genome-wide or nominally significant CM polymorphisms/genes with the ones highlighted by the network representation? We show that our network approach using the MelGene data reveals connections between statistically significant genes/ polymorphisms and other genes/polymorphisms acting as 'hubs' in the reconstructed networks. To the best of our knowledge, this is the first database containing data from a comprehensive field synopsis and systematic meta-analyses of genetic polymorphisms in CM that provides user-friendly tools for in-depth molecular network visualization and exploration. The proposed network connections highlight potentially new loci requiring further investigation of their relation to melanoma risk. Database URL: http://www.melgene.org.",MelGene,0.995092273,NA,0,MelGene,0.995092273,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/7/2014 +29795526,http://melonomics.net,"An improved assembly and annotation of the melon (Cucumis melo L.) reference genome. We report an improved assembly (v3.6.1) of the melon (Cucumis melo L.) genome and a new genome annotation (v4.0). The optical mapping approach allowed correcting the order and the orientation of 21 previous scaffolds and permitted to correctly define the gap-size extension along the 12 pseudomolecules. A new comprehensive annotation was also built in order to update the previous annotation v3.5.1, released more than six years ago. Using an integrative annotation pipeline, based on exhaustive RNA-Seq collections and ad-hoc transposable element annotation, we identified 29,980 protein-coding loci. Compared to the previous version, the v4.0 annotation improved gene models in terms of completeness of gene structure, UTR regions definition, intron-exon junctions and reduction of fragmented genes. More than 8,000 new genes were identified, one third of them being well supported by RNA-Seq data. To make all the new resources easily exploitable and completely available for the scientific community, a redesigned Melonomics genomic platform was released at http://melonomics.net . The resources produced in this work considerably increase the reliability of the melon genome assembly and resolution of the gene models paving the way for further studies in melon and related species.",Melonomics,0.821713448,NA,0,Melonomics,0.821713448,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/24/2018 +27510400,http://membranome.org,"Membranome: a database for proteome-wide analysis of single-pass membrane proteins. The Membranome database was developed to assist analysis and computational modeling of single-pass (bitopic) transmembrane (TM) proteins and their complexes by providing structural information about these proteins on a genomic scale. The database currently collects data on >6000 bitopic proteins from Homo sapiens, Arabidopsis thaliana, Dictyostelium discoideum, Saccharomyces cerevisiae, Escherichia coli and Methanocaldococcus jannaschii It presents the following data: (i) hierarchical classification of bitopic proteins into 15 functional classes, 689 structural superfamilies and 1404 families; (ii) 446 complexes of bitopic proteins with known three-dimensional (3D) structures classified into 129 families; (iii) computationally generated three-dimensional models of TM α-helices positioned in membranes; (iv) amino acid sequences, domain architecture, functional annotation and available experimental structures of bitopic proteins; (v) TM topology and intracellular localization, (vi) physical interactions between proteins from the database along with links to other resources. The database is freely accessible at http://membranome.org There is a variety of options for browsing, sorting, searching and retrieval of the content, including downloadable coordinate files of TM domains with calculated membrane boundaries.",Membranome,0.99290514,NA,0,Membranome,0.99290514,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/10/2016 +33119751,http://memmorf.hegelab.org,"The MemMoRF database for recognizing disordered protein regions interacting with cellular membranes. Protein and lipid membrane interactions play fundamental roles in a large number of cellular processes (e.g. signalling, vesicle trafficking, or viral invasion). A growing number of examples indicate that such interactions can also rely on intrinsically disordered protein regions (IDRs), which can form specific reversible interactions not only with proteins but also with lipids. We named IDRs involved in such membrane lipid-induced disorder-to-order transition as MemMoRFs, in an analogy to IDRs exhibiting disorder-to-order transition upon interaction with protein partners termed Molecular Recognition Features (MoRFs). Currently, both the experimental detection and computational characterization of MemMoRFs are challenging, and information about these regions are scattered in the literature. To facilitate the related investigations we generated a comprehensive database of experimentally validated MemMoRFs based on manual curation of literature and structural data. To characterize the dynamics of MemMoRFs, secondary structure propensity and flexibility calculated from nuclear magnetic resonance chemical shifts were incorporated into the database. These data were supplemented by inclusion of sentences from papers, functional data and disease-related information. The MemMoRF database can be accessed via a user-friendly interface at https://memmorf.hegelab.org, potentially providing a central resource for the characterization of disordered regions in transmembrane and membrane-associated proteins.",MemMoRF,0.971484363,NA,0,MemMoRF,0.971484363,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +30418645,http://memprotmd.bioch.ox.ac.uk,"The MemProtMD database: a resource for membrane-embedded protein structures and their lipid interactions. Integral membrane proteins fulfil important roles in many crucial biological processes, including cell signalling, molecular transport and bioenergetic processes. Advancements in experimental techniques are revealing high resolution structures for an increasing number of membrane proteins. Yet, these structures are rarely resolved in complex with membrane lipids. In 2015, the MemProtMD pipeline was developed to allow the automated lipid bilayer assembly around new membrane protein structures, released from the Protein Data Bank (PDB). To make these data available to the scientific community, a web database (http://memprotmd.bioch.ox.ac.uk) has been developed. Simulations and the results of subsequent analysis can be viewed using a web browser, including interactive 3D visualizations of the assembled bilayer and 2D visualizations of lipid contact data and membrane protein topology. In addition, ensemble analyses are performed to detail conserved lipid interaction information across proteins, families and for the entire database of 3506 PDB entries. Proteins may be searched using keywords, PDB or Uniprot identifier, or browsed using classification systems, such as Pfam, Gene Ontology annotation, mpstruc or the Transporter Classification Database. All files required to run further molecular simulations of proteins in the database are provided.",MemProtMD,0.975355506,NA,0,MemProtMD,0.975355506,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +31157825,http://menda.cqmu.edu.cn:8080/index.php,"MENDA: a comprehensive curated resource of metabolic characterization in depression. Depression is a seriously disabling psychiatric disorder with a significant burden of disease. Metabolic abnormalities have been widely reported in depressed patients and animal models. However, there are few systematic efforts that integrate meaningful biological insights from these studies. Herein, available metabolic knowledge in the context of depression was integrated to provide a systematic and panoramic view of metabolic characterization. After screening more than 10 000 citations from five electronic literature databases and five metabolomics databases, we manually curated 5675 metabolite entries from 464 studies, including human, rat, mouse and non-human primate, to develop a new metabolite-disease association database, called MENDA (http://menda.cqmu.edu.cn:8080/index.php). The standardized data extraction process was used for data collection, a multi-faceted annotation scheme was developed, and a user-friendly search engine and web interface were integrated for database access. To facilitate data analysis and interpretation based on MENDA, we also proposed a systematic analytical framework, including data integration and biological function analysis. Case studies were provided that identified the consistently altered metabolites using the vote-counting method, and that captured the underlying molecular mechanism using pathway and network analyses. Collectively, we provided a comprehensive curation of metabolic characterization in depression. Our model of a specific psychiatry disorder may be replicated to study other complex diseases.",MENDA,0.996490777,NA,0,MENDA,0.996490777,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2020 +33822911,http://www.moreiralab.com/resources/mensadb,"MENSAdb: a thorough structural analysis of membrane protein dimers. . Membrane proteins (MPs) are key players in a variety of different cellular processes and constitute the target of around 60% of all Food and Drug Administration-approved drugs. Despite their importance, there is still a massive lack of relevant structural, biochemical and mechanistic information mainly due to their localization within the lipid bilayer. To help fulfil this gap, we developed the MEmbrane protein dimer Novel Structure Analyser database (MENSAdb). This interactive web application summarizes the evolutionary and physicochemical properties of dimeric MPs to expand the available knowledge on the fundamental principles underlying their formation. Currently, MENSAdb contains features of 167 unique MPs (63% homo- and 37% heterodimers) and brings insights into the conservation of residues, accessible solvent area descriptors, average B-factors, intermolecular contacts at 2.5 Å and 4.0 Å distance cut-offs, hydrophobic contacts, hydrogen bonds, salt bridges, π-π stacking, T-stacking and cation-π interactions. The regular update and organization of all these data into a unique platform will allow a broad community of researchers to collect and analyse a large number of features efficiently, thus facilitating their use in the development of prediction models associated with MPs. Database URL: http://www.moreiralab.com/resources/mensadb.",MENSAdb,0.992488444,MEmbrane protein dimer Novel Structure Analyser database,0.932648856,MENSAdb,0.992488444,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2021 +23219992,http://mgrc.kribb.re.kr:8080/MENT,"MENT: methylation and expression database of normal and tumor tissues. Integrated analysis of DNA methylation and gene expression can reveal specific epigenetic patterns that are important during carcinogenesis. We built an integrated database of DNA methylation and gene expression termed MENT (Methylation and Expression database of Normal and Tumor tissues) to provide researchers information on both DNA methylation and gene expression in diverse cancers. It contains integrated data of DNA methylation, gene expression, correlation of DNA methylation and gene expression in paired samples, and clinicopathological conditions gathered from the GEO (Gene Expression Omnibus) and TCGA (The Cancer Genome Atlas). A user-friendly interface allows users to search for differential DNA methylation by either 'gene search' or 'dataset search'. The 'gene search' returns which conditions are differentially methylated in a gene of interest, while 'dataset search' returns which genes are differentially methylated in a condition of interest based on filtering options such as direction, DM (differential methylation value), and p-value. MENT is the first database which provides both DNA methylation and gene expression information in diverse normal and tumor tissues. Its user-friendly interface allows users to easily search and view both DNA methylation and gene expression patterns. MENT is freely available at http://mgrc.kribb.re.kr:8080/MENT/.",MENT,0.987894893,Methylation and Expression database of,0.812570736,MENT,0.987894893,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/7/2012 +26450962,http://mepd.cos.uni-heidelberg.de,"MEPD: medaka expression pattern database, genes and more. The Medaka Expression Pattern Database (MEPD; http://mepd.cos.uni-heidelberg.de/) is designed as a repository of medaka expression data for the scientific community. In this update we present two main improvements. First, we have changed the previous clone-centric view for in situ data to a gene-centric view. This is possible because now we have linked all the data present in MEPD to the medaka gene annotation in ENSEMBL. In addition, we have also connected the medaka genes in MEPD to their corresponding orthologous gene in zebrafish, again using the ENSEMBL database. Based on this, we provide a link to the Zebrafish Model Organism Database (ZFIN) to allow researches to compare expression data between these two fish model organisms. As a second major improvement, we have modified the design of the database to enable it to host regulatory elements, promoters or enhancers, expression patterns in addition to gene expression. The combination of gene expression, by traditional in situ, and regulatory element expression, typically by fluorescence reporter gene, within the same platform assures consistency in terms of annotation. In our opinion, this will allow researchers to uncover new insights between the expression domain of genes and their regulatory landscape.",MEPD,0.996673008,Medaka Expression Pattern Database,0.981669056,MEPD,0.996673008,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/7/2015 +31231773,http://mepmirdb.cn/mepmirdb/index.html,"MepmiRDB: a medicinal plant microRNA database. . MicroRNAs (miRNAs) have been recognized as a key regulator in plant development and metabolism. Recent reports showed that the miRNAs of medicinal plants not only act as a critical modulator in secondary metabolism but also had a great potential of performing cross-kingdom gene regulation. Although several plant miRNA repositories have been publicly available, no miRNA database specific for medicinal plants has been reported to date. Here, we report the first version of MepmiRDB (medicinal plant microRNA database), which is freely accessible at http://mepmirdb.cn/mepmirdb/index.html. This database accommodates thousands of miRNA candidates belonging to 29 medicinal plant species. The miRNA information on sequences, expression patterns and regulatory networks has been included in the functional modules of the database. Specifically, the 'Sequence' module provides the sequences of the mature miRNAs and their precursors, and the structure information of the precursors. Moreover, the processing and small RNA accumulation signals on the miRNA precursors are also included in the 'Sequence' module. The organ/growth condition-specific expression information of the mature miRNAs has been stored in the 'Expression' module. The 'Interaction' module offers the information of the degradome-validated miRNA-target pairs of eight plant species. The 'Search' module enables users to search for the miRNAs by plant species and miRNA families, or by sequences. All data in this database are available for download. Taken together, the functional modules of MepmiRDB ensure its importance and timeliness for mechanistic and functional studies on the medicinal plant miRNAs.",MepmiRDB,0.99640429,medicinal plant microRNA database,0.712243244,MepmiRDB,0.99640429,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +"29145643, 32920969",http://www.ebi.ac.uk/merops,"The MEROPS database of proteolytic enzymes, their substrates and inhibitors in 2017 and a comparison with peptidases in the PANTHER database. The MEROPS database (http://www.ebi.ac.uk/merops/) is an integrated source of information about peptidases, their substrates and inhibitors. The hierarchical classification is: protein-species, family, clan, with an identifier at each level. The MEROPS website moved to the EMBL-EBI in 2017, requiring refactoring of the code-base and services provided. The interface to sequence searching has changed and the MEROPS protein sequence libraries can be searched at the EMBL-EBI with HMMER, FastA and BLASTP. Cross-references have been established between MEROPS and the PANTHER database at both the family and protein-species level, which will help to improve curation and coverage between the resources. Because of the increasing size of the MEROPS sequence collection, in future only sequences of characterized proteins, and from completely sequenced genomes of organisms of evolutionary, medical or commercial significance will be added. As an example, peptidase homologues in four proteomes from the Asgard superphylum of Archaea have been identified and compared to other archaean, bacterial and eukaryote proteomes. This has given insights into the origins and evolution of peptidase families, including an expansion in the number of proteasome components in Asgard archaeotes and as organisms increase in complexity. Novel structures for proteasome complexes in archaea are postulated.",MEROPS,0.995625496,NA,0,MEROPS,0.995625496,2,NA,"22086950.0, 23584835.0, 24157837.0, 26527717.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/3/2020 +"22086950, 23584835, 24157837, 26527717",http://merops.sanger.ac.uk,"MEROPS: the database of proteolytic enzymes, their substrates and inhibitors. Peptidases, their substrates and inhibitors are of great relevance to biology, medicine and biotechnology. The MEROPS database (http://merops.sanger.ac.uk) aims to fulfil the need for an integrated source of information about these. The database has hierarchical classifications in which homologous sets of peptidases and protein inhibitors are grouped into protein species, which are grouped into families, which are in turn grouped into clans. The database has been expanded to include proteolytic enzymes other than peptidases. Special identifiers for peptidases from a variety of model organisms have been established so that orthologues can be detected in other species. A table of predicted active-site residue and metal ligand positions and the residue ranges of the peptidase domains in orthologues has been added to each peptidase summary. New displays of tertiary structures, which can be rotated or have the surfaces displayed, have been added to the structure pages. New indexes for gene names and peptidase substrates have been made available. Among the enhancements to existing features are the inclusion of small-molecule inhibitors in the tables of peptidase-inhibitor interactions, a table of known cleavage sites for each protein substrate, and tables showing the substrate-binding preferences of peptidases derived from combinatorial peptide substrate libraries.",MEROPS,0.980729818,NA,0,MEROPS,0.980729818,4,NA,"29145643.0, 32920969.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/2/2015 +21668943,http://www.cbib.u-bordeaux2.fr/MERYB/index.php,"MeRy-B: a web knowledgebase for the storage, visualization, analysis and annotation of plant NMR metabolomic profiles. Background Improvements in the techniques for metabolomics analyses and growing interest in metabolomic approaches are resulting in the generation of increasing numbers of metabolomic profiles. Platforms are required for profile management, as a function of experimental design, and for metabolite identification, to facilitate the mining of the corresponding data. Various databases have been created, including organism-specific knowledgebases and analytical technique-specific spectral databases. However, there is currently no platform meeting the requirements for both profile management and metabolite identification for nuclear magnetic resonance (NMR) experiments. Description MeRy-B, the first platform for plant (1)H-NMR metabolomic profiles, is designed (i) to provide a knowledgebase of curated plant profiles and metabolites obtained by NMR, together with the corresponding experimental and analytical metadata, (ii) for queries and visualization of the data, (iii) to discriminate between profiles with spectrum visualization tools and statistical analysis, (iv) to facilitate compound identification. It contains lists of plant metabolites and unknown compounds, with information about experimental conditions, the factors studied and metabolite concentrations for several plant species, compiled from more than one thousand annotated NMR profiles for various organs or tissues. Conclusion MeRy-B manages all the data generated by NMR-based plant metabolomics experiments, from description of the biological source to identification of the metabolites and determinations of their concentrations. It is the first database allowing the display and overlay of NMR metabolomic profiles selected through queries on data or metadata. MeRy-B is available from http://www.cbib.u-bordeaux2.fr/MERYB/index.php.",MeRy-B,0.987240215,NA,0,MeRy-B,0.987240215,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/13/2011 +21177657,http://konulab.fen.bilkent.edu.tr/mirna,"mESAdb: microRNA expression and sequence analysis database. microRNA expression and sequence analysis database (http://konulab.fen.bilkent.edu.tr/mirna/) (mESAdb) is a regularly updated database for the multivariate analysis of sequences and expression of microRNAs from multiple taxa. mESAdb is modular and has a user interface implemented in PHP and JavaScript and coupled with statistical analysis and visualization packages written for the R language. The database primarily comprises mature microRNA sequences and their target data, along with selected human, mouse and zebrafish expression data sets. mESAdb analysis modules allow (i) mining of microRNA expression data sets for subsets of microRNAs selected manually or by motif; (ii) pair-wise multivariate analysis of expression data sets within and between taxa; and (iii) association of microRNA subsets with annotation databases, HUGE Navigator, KEGG and GO. The use of existing and customized R packages facilitates future addition of data sets and analysis tools. Furthermore, the ability to upload and analyze user-specified data sets makes mESAdb an interactive and expandable analysis tool for microRNA sequence and expression data.",mESAdb,0.995116591,microRNA expression and sequence analysis database,0.857367776,mESAdb,0.995116591,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2011 +35559777,http://aliayadi.github.io/MESOCOSM-database,"MESOCOSM: A mesocosm database management system for environmental nanosafety. Engineered nanomaterials (ENMs) are intentionally designed and produced by humans to revolutionize the manufacturing sector, such as electronic goods, paints, tires, clothes, cosmetic products, and biomedicine. With the spread of these ENMs in our daily lives, scientific research have generated a huge amount of data related to their potential impacts on human and environment health. To date, these data are gathered in databases mainly focused on the (eco)toxicity and occupational exposure to ENMs. These databases are therefore not suitable to build well-informed environmental exposure scenarios covering the life cycle of ENMs. In this paper, we report the construction of one of the first centralized mesocosm database management system for environmental nanosafety (called MESOCOSM) containing experimental data collected from mesocosm experiments suited for understanding and quantifying both the environmental hazard and exposure. The database, which is publicly available through https://aliayadi.github.io/MESOCOSM-database/, contains 5200 entities covering tens of unique experiments investigating Ag, CeO2, CuO, TiO2-based ENMs as well as nano-enabled products. These entities are divided into different groups i.e. physicochemical properties of ENMS, environmental, exposure and hazard endpoints, and other general information about the mesocosm testing, resulting in more than forty parameters in the database. The MESOCOSM database is equipped with a powerful application, consisting of a graphical user interface (GUI), allowing users to manage and search data using complex queries without relying on programmers. MESOCOSM aims to predict and explain ENMs behavior and fate in different ecosystems as well as their potential impacts on the environment at different stages of the nanoproducts lifecycle. MESOCOSM is expected to benefit the nanosafety community by providing a continuous source of critical information and additional characterization factors for predicting ENMs interactions with the environment and their risks.",MESOCOSM,0.996743441,NA,0,MESOCOSM,0.996743441,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/22/2020 +31836897,http://mesophotic.org,"Mesophotic.org: a repository for scientific information on mesophotic ecosystems. . Mesophotic coral ecosystems (MCEs) and temperate mesophotic ecosystems (TMEs) occur at depths of roughly 30-150 m depth and are characterized by the presence of photosynthetic organisms despite reduced light availability. Exploration of these ecosystems dates back several decades, but our knowledge remained extremely limited until about a decade ago, when a renewed interest resulted in the establishment of a rapidly growing research community. Here, we present the 'mesophotic.org' database, a comprehensive and curated repository of scientific literature on mesophotic ecosystems. Through both manually curated and automatically extracted metadata, the repository facilitates rapid retrieval of available information about particular topics (e.g. taxa or geographic regions), exploration of spatial/temporal trends in research and identification of knowledge gaps. The repository can be queried to comprehensively obtain available data to address large-scale questions and guide future research directions. Overall, the 'mesophotic.org' repository provides an independent and open-source platform for the ever-growing research community working on MCEs and TMEs to collate and expedite our understanding of the occurrence, composition and functioning of these ecosystems. Database URL: http://mesophotic.org/.",mesophotic.org,0.972254475,NA,0,mesophotic.org,0.972254475,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +25378335,http://compgenomics.utsa.edu/methylation,"MeT-DB: a database of transcriptome methylation in mammalian cells. Methyltranscriptome is an exciting new area that studies the mechanisms and functions of methylation in transcripts. The MethylTranscriptome DataBase (MeT-DB, http://compgenomics.utsa.edu/methylation/) is the first comprehensive resource for N6-methyladenosine (m(6)A) in mammalian transcriptome. It includes a database that records publicaly available data sets from methylated RNA immunoprecipitation sequencing (MeRIP-Seq), a recently developed technology for interrogating m(6)A methyltranscriptome. MeT-DB includes ∼ 300 k m(6)A methylation sites in 74 MeRIP-Seq samples from 22 different experimental conditions predicted by exomePeak and MACS2 algorithms. To explore this rich information, MeT-DB also provides a genome browser to query and visualize context-specific m(6)A methylation under different conditions. MeT-DB also includes the binding site data of microRNA, splicing factor and RNA binding proteins in the browser window for comparison with m(6)A sites and for exploring the potential functions of m(6)A. Analysis of differential m(6)A methylation and the related differential gene expression under two conditions is also available in the browser. A global perspective of the genome-wide distribution of m(6)A methylation in all the data is provided in circular ideograms, which also act as a navigation portal. The query results and the entire data set can be exported to assist publication and additional analysis.",MeT-DB,0.995145404,MethylTranscriptome DataBase,0.968732161,MeT-DB,0.995145404,1,NA,33835460,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/6/2014 +33835460,http://compgenomics.utsa.edu/MeTDB,"MeT-DB V2.0: Elucidating Context-Specific Functions of N6-Methyl-Adenosine Methyltranscriptome. N6-methyladenosine (m6A) is the most prevalent posttranscriptional modification in eukaryotes and plays a pivotal role in various biological processes. A knowledge base with the systematic collection and curation of context specific transcriptome-wide methylations is critical for elucidating their biological functions as well as for developing bioinformatics tools. In this chapter, we present a comprehensive platform MeT-DB V2.0 for elucidating context-specific functions of N6-methyl-adenosine methyltranscriptome. Met-DB V2.0 database contains context specific m6A peaks and single-base sites predicted from 185 samples for 7 species from 26 independent studies. Moreover, it is also integrated with a new database for targets of m6A readers, erasers and writers and expanded with more collections of functional data. The Met-DB V2.0 web interface and genome browser provide more friendly, powerful, and informative ways to query and visualize the data. More importantly, MeT-DB V2.0 offers for the first time a series of tools specifically designed for understanding m6A functions. The MeT-DB V2.0 web server is freely available at: http://compgenomics.utsa.edu/MeTDB and www.xjtlu.edu.cn/metdb2 .",MeT-DB,0.983205318,NA,0,MeT-DB,0.983205318,1,NA,25378335,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +29126312,"http://compgenomics.utsa.edu/MeTDB/, http://www.xjtlu.edu.cn/metdb2","MeT-DB V2.0: elucidating context-specific functions of N6-methyl-adenosine methyltranscriptome. Methyltranscriptome is an exciting new area that studies the mechanisms and functions of methylation in transcripts. A knowledge base with the systematic collection and curation of context specific transcriptome-wide methylations is critical for elucidating their biological functions as well as for developing bioinformatics tools. Since its inception in 2014, the Met-DB (Liu, H., Flores, M.A., Meng, J., Zhang, L., Zhao, X., Rao, M.K., Chen, Y. and Huang, Y. (2015) MeT-DB: a database of transcriptome methylation in mammalian cells. Nucleic Acids Res., 43, D197-D203), has become an important resource for methyltranscriptome, especially in the N6-methyl-adenosine (m6A) research community. Here, we report Met-DB v2.0, the significantly improved second version of Met-DB, which is entirely redesigned to focus more on elucidating context-specific m6A functions. Met-DB v2.0 has a major increase in context-specific m6A peaks and single-base sites predicted from 185 samples for 7 species from 26 independent studies. Moreover, it is also integrated with a new database for targets of m6A readers, erasers and writers and expanded with more collections of functional data. The redesigned Met-DB v2.0 web interface and genome browser provide more friendly, powerful, and informative ways to query and visualize the data. More importantly, MeT-DB v2.0 offers for the first time a series of tools specifically designed for understanding m6A functions. Met-DB V2.0 will be a valuable resource for m6A methyltranscriptome research. The Met-DB V2.0 database is available at http://compgenomics.utsa.edu/MeTDB/ and http://www.xjtlu.edu.cn/metdb2.",Met-DB,0.965683778,NA,0,Met-DB,0.965683778,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +23521697,http://www.lmmd.org/online_services/metaadedb,"Adverse drug events: database construction and in silico prediction. Adverse drug events (ADEs) are the harms associated with uses of given medications at normal dosages, which are crucial for a drug to be approved in clinical use or continue to stay on the market. Many ADEs are not identified in trials until the drug is approved for clinical use, which results in adverse morbidity and mortality. To date, millions of ADEs have been reported around the world. Methods to avoid or reduce ADEs are an important issue for drug discovery and development. Here, we reported a comprehensive database of adverse drug events (namely MetaADEDB), which included more than 520,000 drug-ADE associations among 3059 unique compounds (including 1330 drugs) and 13,200 ADE items by data integration and text mining. All compounds and ADEs were annotated with the most commonly used concepts defined in Medical Subject Headings (MeSH). Meanwhile, a computational method, namely the phenotypic network inference model (PNIM), was developed for prediction of potential ADEs based on the database. The area under the receive operating characteristic curve (AUC) is more than 0.9 by 10-fold cross validation, while the AUC value was 0.912 for an external validation set extracted from the US-FDA Adverse Events Reporting System, which indicated that the prediction capability of the method was reliable. MetaADEDB is accessible free of charge at http://www.lmmd.org/online_services/metaadedb/. The database and the method provide us a useful tool to search for known side effects or predict potential side effects for a given drug or compound.",MetaADEDB,0.991302609,NA,0,MetaADEDB,0.991302609,1,NA,33306787,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,4/8/2013 +33306787,http://lmmd.ecust.edu.cn/metaadedb,"MetaADEDB 2.0: a comprehensive database on adverse drug events. Summary MetaADEDB is an online database we developed to integrate comprehensive information on adverse drug events (ADEs). The first version of MetaADEDB was released in 2013 and has been widely used by researchers. However, it has not been updated for more than seven years. Here, we reported its second version by collecting more and newer data from the U.S. FDA Adverse Event Reporting System (FAERS) and Canada Vigilance Adverse Reaction Online Database, in addition to the original three sources. The new version consists of 744 709 drug-ADE associations between 8498 drugs and 13 193 ADEs, which has an over 40% increase in drug-ADE associations compared to the previous version. Meanwhile, we developed a new and user-friendly web interface for data search and analysis. We hope that MetaADEDB 2.0 could provide a useful tool for drug safety assessment and related studies in drug discovery and development. Availability and implementation The database is freely available at: http://lmmd.ecust.edu.cn/metaadedb/. Supplementary information Supplementary data are available at Bioinformatics online.",MetaADEDB,0.990036309,NA,0,MetaADEDB,0.990036309,1,NA,23521697,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,8/1/2021 +22139927,http://MetaDatabase.Org,"MetaBase--the wiki-database of biological databases. Biology is generating more data than ever. As a result, there is an ever increasing number of publicly available databases that analyse, integrate and summarize the available data, providing an invaluable resource for the biological community. As this trend continues, there is a pressing need to organize, catalogue and rate these resources, so that the information they contain can be most effectively exploited. MetaBase (MB) (http://MetaDatabase.Org) is a community-curated database containing more than 2000 commonly used biological databases. Each entry is structured using templates and can carry various user comments and annotations. Entries can be searched, listed, browsed or queried. The database was created using the same MediaWiki technology that powers Wikipedia, allowing users to contribute on many different levels. The initial release of MB was derived from the content of the 2007 Nucleic Acids Research (NAR) Database Issue. Since then, approximately 100 databases have been manually collected from the literature, and users have added information for over 240 databases. MB is synchronized annually with the static Molecular Biology Database Collection provided by NAR. To date, there have been 19 significant contributors to the project; each one is listed as an author here to highlight the community aspect of the project.",MetaBase (MB,0.764882758,NA,0,MetaBase (MB,0.764882758,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,12/1/2011 +26322134,http://minedatabase.mcs.anl.gov,"MINEs: open access databases of computationally predicted enzyme promiscuity products for untargeted metabolomics. BACKGROUND:In spite of its great promise, metabolomics has proven difficult to execute in an untargeted and generalizable manner. Liquid chromatography-mass spectrometry (LC-MS) has made it possible to gather data on thousands of cellular metabolites. However, matching metabolites to their spectral features continues to be a bottleneck, meaning that much of the collected information remains uninterpreted and that new metabolites are seldom discovered in untargeted studies. These challenges require new approaches that consider compounds beyond those available in curated biochemistry databases. DESCRIPTION:Here we present Metabolic In silico Network Expansions (MINEs), an extension of known metabolite databases to include molecules that have not been observed, but are likely to occur based on known metabolites and common biochemical reactions. We utilize an algorithm called the Biochemical Network Integrated Computational Explorer (BNICE) and expert-curated reaction rules based on the Enzyme Commission classification system to propose the novel chemical structures and reactions that comprise MINE databases. Starting from the Kyoto Encyclopedia of Genes and Genomes (KEGG) COMPOUND database, the MINE contains over 571,000 compounds, of which 93% are not present in the PubChem database. However, these MINE compounds have on average higher structural similarity to natural products than compounds from KEGG or PubChem. MINE databases were able to propose annotations for 98.6% of a set of 667 MassBank spectra, 14% more than KEGG alone and equivalent to PubChem while returning far fewer candidates per spectra than PubChem (46 vs. 1715 median candidates). Application of MINEs to LC-MS accurate mass data enabled the identity of an unknown peak to be confidently predicted. CONCLUSIONS:MINE databases are freely accessible for non-commercial use via user-friendly web-tools at http://minedatabase.mcs.anl.gov and developer-friendly APIs. MINEs improve metabolomics peak identification as compared to general chemical databases whose results include irrelevant synthetic compounds. Furthermore, MINEs complement and expand on previous in silico generated compound databases that focus on human metabolism. We are actively developing the database; future versions of this resource will incorporate transformation rules for spontaneous chemical reactions and more advanced filtering and prioritization of candidate structures. Graphical abstractMINE database construction and access methods. The process of constructing a MINE database from the curated source databases is depicted on the left. The methods for accessing the database are shown on the right.",MINEs,0.649246573,Metabolic In silico Network Expansions,0.687488483,Metabolic In silico Network Expansions,0.687488483,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/28/2015 +23935057,http://www.metabolicmine.org,"metabolicMine: an integrated genomics, genetics and proteomics data warehouse for common metabolic disease research. Common metabolic and endocrine diseases such as diabetes affect millions of people worldwide and have a major health impact, frequently leading to complications and mortality. In a search for better prevention and treatment, there is ongoing research into the underlying molecular and genetic bases of these complex human diseases, as well as into the links with risk factors such as obesity. Although an increasing number of relevant genomic and proteomic data sets have become available, the quantity and diversity of the data make their efficient exploitation challenging. Here, we present metabolicMine, a data warehouse with a specific focus on the genomics, genetics and proteomics of common metabolic diseases. Developed in collaboration with leading UK metabolic disease groups, metabolicMine integrates data sets from a range of experiments and model organisms alongside tools for exploring them. The current version brings together information covering genes, proteins, orthologues, interactions, gene expression, pathways, ontologies, diseases, genome-wide association studies and single nucleotide polymorphisms. Although the emphasis is on human data, key data sets from mouse and rat are included. These are complemented by interoperation with the RatMine rat genomics database, with a corresponding mouse version under development by the Mouse Genome Informatics (MGI) group. The web interface contains a number of features including keyword search, a library of Search Forms, the QueryBuilder and list analysis tools. This provides researchers with many different ways to analyse, view and flexibly export data. Programming interfaces and automatic code generation in several languages are supported, and many of the features of the web interface are available through web services. The combination of diverse data sets integrated with analysis tools and a powerful query system makes metabolicMine a valuable research resource. The web interface makes it accessible to first-time users, whereas the Application Programming Interface (API) and web services provide convenient data access and tools for bioinformaticians. metabolicMine is freely available online at http://www.metabolicmine.org Database URL: http://www.metabolicmine.org.",metabolicMine,0.990483761,NA,0,metabolicMine,0.990483761,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/9/2013 +"23060735, 23109552, 23630246",http://www.ebi.ac.uk/metabolights,"MetaboLights: towards a new COSMOS of metabolomics data management. Exciting funding initiatives are emerging in Europe and the US for metabolomics data production, storage, dissemination and analysis. This is based on a rich ecosystem of resources around the world, which has been build during the past ten years, including but not limited to resources such as MassBank in Japan and the Human Metabolome Database in Canada. Now, the European Bioinformatics Institute has launched MetaboLights, a database for metabolomics experiments and the associated metadata (http://www.ebi.ac.uk/metabolights). It is the first comprehensive, cross-species, cross-platform metabolomics database maintained by one of the major open access data providers in molecular biology. In October, the European COSMOS consortium will start its work on Metabolomics data standardization, publication and dissemination workflows. The NIH in the US is establishing 6-8 metabolomics services cores as well as a national metabolomics repository. This communication reports about MetaboLights as a new resource for Metabolomics research, summarises the related developments and outlines how they may consolidate the knowledge management in this third large omics field next to proteomics and genomics.",MetaboLights,0.997964621,NA,0,MetaboLights,0.997964621,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/29/2013 +25905099,http://metabolonote.kazusa.or.jp,"Metabolonote: a wiki-based database for managing hierarchical metadata of metabolome analyses. Metabolomics - technology for comprehensive detection of small molecules in an organism - lags behind the other ""omics"" in terms of publication and dissemination of experimental data. Among the reasons for this are difficulty precisely recording information about complicated analytical experiments (metadata), existence of various databases with their own metadata descriptions, and low reusability of the published data, resulting in submitters (the researchers who generate the data) being insufficiently motivated. To tackle these issues, we developed Metabolonote, a Semantic MediaWiki-based database designed specifically for managing metabolomic metadata. We also defined a metadata and data description format, called ""Togo Metabolome Data"" (TogoMD), with an ID system that is required for unique access to each level of the tree-structured metadata such as study purpose, sample, analytical method, and data analysis. Separation of the management of metadata from that of data and permission to attach related information to the metadata provide advantages for submitters, readers, and database developers. The metadata are enriched with information such as links to comparable data, thereby functioning as a hub of related data resources. They also enhance not only readers' understanding and use of data but also submitters' motivation to publish the data. The metadata are computationally shared among other systems via APIs, which facilitate the construction of novel databases by database developers. A permission system that allows publication of immature metadata and feedback from readers also helps submitters to improve their metadata. Hence, this aspect of Metabolonote, as a metadata preparation tool, is complementary to high-quality and persistent data repositories such as MetaboLights. A total of 808 metadata for analyzed data obtained from 35 biological species are published currently. Metabolonote and related tools are available free of cost at http://metabolonote.kazusa.or.jp/.",Metabolonote,0.994843602,Metabolome,0.64512068,Metabolonote,0.994843602,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/7/2015 +22086948,http://metacrop.ipk-gatersleben.de,"MetaCrop 2.0: managing and exploring information about crop plant metabolism. MetaCrop is a manually curated repository of high-quality data about plant metabolism, providing different levels of detail from overview maps of primary metabolism to kinetic data of enzymes. It contains information about seven major crop plants with high agronomical importance and two model plants. MetaCrop is intended to support research aimed at the improvement of crops for both nutrition and industrial use. It can be accessed via web, web services and an add-on to the Vanted software. Here, we present several novel developments of the MetaCrop system and the extended database content. MetaCrop is now available in version 2.0 at http://metacrop.ipk-gatersleben.de.",MetaCrop,0.99725759,NA,0,MetaCrop,0.99725759,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2011 +29059334,"http://MetaCyc.org, http://BioCyc.org","The MetaCyc database of metabolic pathways and enzymes. MetaCyc (https://MetaCyc.org) is a comprehensive reference database of metabolic pathways and enzymes from all domains of life. It contains more than 2570 pathways derived from >54 000 publications, making it the largest curated collection of metabolic pathways. The data in MetaCyc is strictly evidence-based and richly curated, resulting in an encyclopedic reference tool for metabolism. MetaCyc is also used as a knowledge base for generating thousands of organism-specific Pathway/Genome Databases (PGDBs), which are available in the BioCyc (https://BioCyc.org) and other PGDB collections. This article provides an update on the developments in MetaCyc during the past two years, including the expansion of data and addition of new features.",MetaCyc,0.99694097,NA,0,MetaCyc,0.99694097,1,NA,22102576,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2018 +22102576,"http://metacyc.org/, http://biocyc.org","The MetaCyc database of metabolic pathways and enzymes and the BioCyc collection of pathway/genome databases. The MetaCyc database (http://metacyc.org/) provides a comprehensive and freely accessible resource for metabolic pathways and enzymes from all domains of life. The pathways in MetaCyc are experimentally determined, small-molecule metabolic pathways and are curated from the primary scientific literature. MetaCyc contains more than 1800 pathways derived from more than 30,000 publications, and is the largest curated collection of metabolic pathways currently available. Most reactions in MetaCyc pathways are linked to one or more well-characterized enzymes, and both pathways and enzymes are annotated with reviews, evidence codes and literature citations. BioCyc (http://biocyc.org/) is a collection of more than 1700 organism-specific Pathway/Genome Databases (PGDBs). Each BioCyc PGDB contains the full genome and predicted metabolic network of one organism. The network, which is predicted by the Pathway Tools software using MetaCyc as a reference database, consists of metabolites, enzymes, reactions and metabolic pathways. BioCyc PGDBs contain additional features, including predicted operons, transport systems and pathway-hole fillers. The BioCyc website and Pathway Tools software offer many tools for querying and analysis of PGDBs, including Omics Viewers and comparative analysis. New developments include a zoomable web interface for diagrams; flux-balance analysis model generation from PGDBs; web services; and a new tool called Web Groups.",MetaCyc,0.994340897,NA,0,MetaCyc,0.994340897,1,NA,29059334,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/18/2011 +24850854,http://bioinfo.hrbmu.edu.cn/MetaImprint,"MetaImprint: an information repository of mammalian imprinted genes. Genomic imprinting is a complex genetic and epigenetic phenomenon that plays important roles in mammalian development and diseases. Mammalian imprinted genes have been identified widely by experimental strategies or predicted using computational methods. Systematic information for these genes would be necessary for the identification of novel imprinted genes and the analysis of their regulatory mechanisms and functions. Here, a well-designed information repository, MetaImprint (http://bioinfo.hrbmu.edu.cn/MetaImprint), is presented, which focuses on the collection of information concerning mammalian imprinted genes. The current version of MetaImprint incorporates 539 imprinted genes, including 255 experimentally confirmed genes, and their detailed research courses from eight mammalian species. MetaImprint also hosts genome-wide genetic and epigenetic information of imprinted genes, including imprinting control regions, single nucleotide polymorphisms, non-coding RNAs, DNA methylation and histone modifications. Information related to human diseases and functional annotation was also integrated into MetaImprint. To facilitate data extraction, MetaImprint supports multiple search options, such as by gene ID and disease name. Moreover, a configurable Imprinted Gene Browser was developed to visualize the information on imprinted genes in a genomic context. In addition, an Epigenetic Changes Analysis Tool is provided for online analysis of DNA methylation and histone modification differences of imprinted genes among multiple tissues and cell types. MetaImprint provides a comprehensive information repository of imprinted genes, allowing researchers to investigate systematically the genetic and epigenetic regulatory mechanisms of imprinted genes and their functions in development and diseases.",MetaImprint,0.993931651,NA,0,MetaImprint,0.993931651,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/21/2014 +"23155064, 29077942",http://metalweb.cerm.unifi.it,"MetalPDB: a database of metal sites in biological macromolecular structures. We present here MetalPDB (freely accessible at http://metalweb.cerm.unifi.it), a novel resource aimed at conveying the information available on the three-dimensional (3D) structures of metal-binding biological macromolecules in a consistent and effective manner. This is achieved through the systematic and automated representation of metal-binding sites in proteins and nucleic acids by way of Minimal Functional Sites (MFSs). MFSs are 3D templates that describe the local environment around the metal(s) independently of the larger context of the macromolecular structure embedding the site(s), and are the central objects of MetalPDB design. MFSs are grouped into equistructural (broadly defined as sites found in corresponding positions in similar structures) and equivalent sites (equistructural sites that contain the same metals), allowing users to easily analyse similarities and variations in metal-macromolecule interactions, and to link them to functional information. The web interface of MetalPDB allows access to a comprehensive overview of metal-containing biological structures, providing a basis to investigate the basic principles governing the properties of these systems. MetalPDB is updated monthly in an automated manner.",MetalPDB,0.997881174,NA,0,MetalPDB,0.997881174,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24475242,http://mmdb.aori.u-tokyo.ac.jp,"MetaMetaDB: a database and analytic system for investigating microbial habitability. MetaMetaDB (http://mmdb.aori.u-tokyo.ac.jp/) is a database and analytic system for investigating microbial habitability, i.e., how a prokaryotic group can inhabit different environments. The interaction between prokaryotes and the environment is a key issue in microbiology because distinct prokaryotic communities maintain distinct ecosystems. Because 16S ribosomal RNA (rRNA) sequences play pivotal roles in identifying prokaryotic species, a system that comprehensively links diverse environments to 16S rRNA sequences of the inhabitant prokaryotes is necessary for the systematic understanding of the microbial habitability. However, existing databases are biased to culturable prokaryotes and exhibit limitations in the comprehensiveness of the data because most prokaryotes are unculturable. Recently, metagenomic and 16S rRNA amplicon sequencing approaches have generated abundant 16S rRNA sequence data that encompass unculturable prokaryotes across diverse environments; however, these data are usually buried in large databases and are difficult to access. In this study, we developed MetaMetaDB (Meta-Metagenomic DataBase), which comprehensively and compactly covers 16S rRNA sequences retrieved from public datasets. Using MetaMetaDB, users can quickly generate hypotheses regarding the types of environments a prokaryotic group may be adapted to. We anticipate that MetaMetaDB will improve our understanding of the diversity and evolution of prokaryotes.",MetaMetaDB,0.99579674,Metagenomic DataBase,0.651920029,MetaMetaDB,0.99579674,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/27/2014 +25861770,http://fgfr.ibms.sinic.aedu.tw/MetaMirClust,"MetaMirClust: Discovery and Exploration of Evolutionarily Conserved miRNA Clusters. Recent emerging studies suggest that a substantial fraction of microRNA (miRNA) genes is likely to form clusters in terms of evolutionary conservation and biological implications, posing a significant challenge for the research community and shifting the bottleneck of scientific discovery from miRNA singletons to miRNA clusters. In addition, the advance in molecular sequencing technique such as next-generation sequencing (NGS) has facilitated researchers to comprehensively characterize miRNAs with low abundance on genome-wide scale in multiple species. Taken together, a large scale, cross-species survey of grouped miRNAs based on genomic location would be valuable for investigating their biological functions and regulations in an evolutionary perspective. In the present chapter, we describe the application of effective and efficient bioinformatics tools on the identification of clustered miRNAs and illustrate how to use the recently developed Web-based database, MetaMirClust (http://fgfr.ibms.sinic.aedu.tw/MetaMirClust) to discover evolutionarily conserved pattern of miRNA clusters across metazoans.",MetaMirClust,0.966021895,NA,0,MetaMirClust,0.966021895,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2016 +34156446,http://metamorf.hb.univ-amu.fr,"MetamORF: a repository of unique short open reading frames identified by both experimental and computational approaches for gene and metagene analyses. . The development of high-throughput technologies revealed the existence of non-canonical short open reading frames (sORFs) on most eukaryotic ribonucleic acids. They are ubiquitous genetic elements conserved across species and suspected to be involved in numerous cellular processes. MetamORF (https://metamorf.hb.univ-amu.fr/) aims to provide a repository of unique sORFs identified in the human and mouse genomes with both experimental and computational approaches. By gathering publicly available sORF data, normalizing them and summarizing redundant information, we were able to identify a total of 1 162 675 unique sORFs. Despite the usual characterization of ORFs as short, upstream or downstream, there is currently no clear consensus regarding the definition of these categories. Thus, the data have been reprocessed using a normalized nomenclature. MetamORF enables new analyses at locus, gene, transcript and ORF levels, which should offer the possibility to address new questions regarding sORF functions in the future. The repository is available through an user-friendly web interface, allowing easy browsing, visualization, filtering over multiple criteria and export possibilities. sORFs can be searched starting from a gene, a transcript and an ORF ID, looking in a genome area or browsing the whole repository for a species. The database content has also been made available through track hubs at UCSC Genome Browser. Finally, we demonstrated an enrichment of genes harboring upstream ORFs among genes expressed in response to reticular stress. Database URL  https://metamorf.hb.univ-amu.fr/.",MetamORF,0.997968137,NA,0,MetamORF,0.997968137,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2021 +26527720,http://www.metanetx.org,"MetaNetX/MNXref--reconciliation of metabolites and biochemical reactions to bring together genome-scale metabolic networks. MetaNetX is a repository of genome-scale metabolic networks (GSMNs) and biochemical pathways from a number of major resources imported into a common namespace of chemical compounds, reactions, cellular compartments--namely MNXref--and proteins. The MetaNetX.org website (http://www.metanetx.org/) provides access to these integrated data as well as a variety of tools that allow users to import their own GSMNs, map them to the MNXref reconciliation, and manipulate, compare, analyze, simulate (using flux balance analysis) and export the resulting GSMNs. MNXref and MetaNetX are regularly updated and freely available.",MetaNetX,0.997939885,NA,0,MetaNetX,0.997939885,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/2/2015 +33156326,"http://www.metanetx.org/, http://rdf.metanetx.org","MetaNetX/MNXref: unified namespace for metabolites and biochemical reactions in the context of metabolic models. MetaNetX/MNXref is a reconciliation of metabolites and biochemical reactions providing cross-links between major public biochemistry and Genome-Scale Metabolic Network (GSMN) databases. The new release brings several improvements with respect to the quality of the reconciliation, with particular attention dedicated to preserving the intrinsic properties of GSMN models. The MetaNetX website (https://www.metanetx.org/) provides access to the full database and online services. A major improvement is for mapping of user-provided GSMNs to MXNref, which now provides diagnostic messages about model content. In addition to the website and flat files, the resource can now be accessed through a SPARQL endpoint (https://rdf.metanetx.org).",MetaNetX/MNXref,0.987576434,NA,0,MetaNetX/MNXref,0.987576434,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29630066,http://microbe.directory,"The Microbe Directory: An annotated, searchable inventory of microbes' characteristics. The Microbe Directory is a collective research effort to profile and annotate more than 7,500 unique microbial species from the MetaPhlAn2 database that includes bacteria, archaea, viruses, fungi, and protozoa. By collecting and summarizing data on various microbes' characteristics, the project comprises a database that can be used downstream of large-scale metagenomic taxonomic analyses, allowing one to interpret and explore their taxonomic classifications to have a deeper understanding of the microbial ecosystem they are studying. Such characteristics include, but are not limited to: optimal pH, optimal temperature, Gram stain, biofilm-formation, spore-formation, antimicrobial resistance, and COGEM class risk rating. The database has been manually curated by trained student-researchers from Weill Cornell Medicine and CUNY-Hunter College, and its analysis remains an ongoing effort with open-source capabilities so others can contribute. Available in SQL, JSON, and CSV (i.e. Excel) formats, the Microbe Directory can be queried for the aforementioned parameters by a microorganism's taxonomy. In addition to the raw database, The Microbe Directory has an online counterpart ( https://microbe.directory/) that provides a user-friendly interface for storage, retrieval, and analysis into which other microbial database projects could be incorporated. The Microbe Directory was primarily designed to serve as a resource for researchers conducting metagenomic analyses, but its online web interface should also prove useful to any individual who wishes to learn more about any particular microbe.",MetaPhlAn2,0.87283659,NA,0,MetaPhlAn2,0.87283659,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,1/5/2018 +25288655,http://metaprox.uwaterloo.ca,"MetaProx: the database of metagenomic proximons. . MetaProx is the database of metagenomic proximons: a searchable repository of proximon objects conceived with two specific goals. The first objective is to accelerate research involving metagenomic functional interactions by providing a database of metagenomic operon candidates. Proximons represent a special subset of directons (series of contiguous co-directional genes) where each member gene is in close proximity to its neighbours with respect to intergenic distance. As a result, proximons represent significant operon candidates where some subset of proximons is the set of true metagenomic operons. Proximons are well suited for the inference of metagenomic functional networks because predicted functional linkages do not rely on homology-dependent information that is frequently unavailable in metagenomic scenarios. The second objective is to explore representations for semistructured biological data that can offer an alternative to the traditional relational database approach. In particular, we use a serialized object implementation and advocate a Data as Data policy where the same serialized objects can be used at all levels (database, search tool and saved user file) without conversion or the use of human-readable markups. MetaProx currently includes 4,210,818 proximons consisting of 8 \,926,993 total member genes. Database URL: http://metaprox.uwaterloo.ca.",MetaProx,0.996964276,NA,0,MetaProx,0.996964276,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/6/2014 +24203705,http://metaref.org,"MetaRef: a pan-genomic database for comparative and community microbial genomics. Microbial genome sequencing is one of the longest-standing areas of biological database development, but high-throughput, low-cost technologies have increased its throughput to an unprecedented number of new genomes per year. Several thousand microbial genomes are now available, necessitating new approaches to organizing information on gene function, phylogeny and microbial taxonomy to facilitate downstream biological interpretation. MetaRef, available at http://metaref.org, is a novel online resource systematically cataloguing a comprehensive pan-genome of all microbial clades with sequenced isolates. It organizes currently available draft and finished bacterial and archaeal genomes into quality-controlled clades, reports all core and pan gene families at multiple levels in the resulting taxonomy, and it annotates families' conservation, phylogeny and consensus functional information. MetaRef also provides a comprehensive non-redundant reference gene catalogue for metagenomic studies, including the abundance and prevalence of all gene families in the >700 shotgun metagenomic samples of the Human Microbiome Project. This constitutes a systematic mapping of clade-specific microbial functions within the healthy human microbiome across multiple body sites and can be used as reference for identifying potential functional biomarkers in disease-associate microbiomes. MetaRef provides all information both as an online browsable resource and as downloadable sequences and tabular data files that can be used for subsequent offline studies.",MetaRef,0.996002674,NA,0,MetaRef,0.996002674,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2013 +33238004,http://www.introni.it/Metastasis/metastasis.html,"MetaTropismDB: a database of organ-specific metastasis induced by human cancer cell lines in mouse models. . The organotropism is the propensity of metastatic cancer cells to colonize preferably certain distant organs, resulting in a non-random distribution of metastases. In order to shed light on this behaviour, several studies were performed by the injection of human cancer cell lines into immunocompromised mouse models. However, the information about these experiments is spread in the literature. For each xenograft experiment reported in the literature, we annotated both the experimental conditions and outcomes, including details on inoculated human cell lines, mouse models, injection methods, sites of metastasis, organs not colonized, rate of metastasis, latency time, overall survival and the involved genes. We created MetaTropismDB, a freely available database collecting hand-curated data useful to highlight the mechanisms of organ-specific metastasis. Currently, it stores the results of 513 experiments in which injections of 219 human cell lines have been carried out in mouse models. Notably, 296 genes involved in organotropic metastases have been collected. This specialized database allows the researchers to compare the current results about organotropism and plan future experiments in order to identify which tumour molecular signatures establish if and where the metastasis will develop. Database URL:  http://www.introni.it/Metastasis/metastasis.html.",MetaTropismDB,0.996033907,NA,0,MetaTropismDB,0.996033907,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2020 +29967752,http://microbiology.se/software/metaxa2,"A reference cytochrome c oxidase subunit I database curated for hierarchical classification of arthropod metabarcoding data. Metabarcoding is a popular application which warrants continued methods optimization. To maximize barcoding inferences, hierarchy-based sequence classification methods are increasingly common. We present methods for the construction and curation of a database designed for hierarchical classification of a 157 bp barcoding region of the arthropod cytochrome c oxidase subunit I (COI) locus. We produced a comprehensive arthropod COI amplicon dataset including annotated arthropod COI sequences and COI sequences extracted from arthropod whole mitochondrion genomes, the latter of which provided the only source of representation for Zoraptera, Callipodida and Holothyrida. The database contains extracted sequences of the target amplicon from all major arthropod clades, including all insect orders, all arthropod classes and Onychophora, Tardigrada and Mollusca outgroups. During curation, we extracted the COI region of interest from approximately 81 percent of the input sequences, corresponding to 73 percent of the genus-level diversity found in the input data. Further, our analysis revealed a high degree of sequence redundancy within the NCBI nucleotide database, with a mean of approximately 11 sequence entries per species in the input data. The curated, low-redundancy database is included in the Metaxa2 sequence classification software (http://microbiology.se/software/metaxa2/). Using this database with the Metaxa2 classifier, we performed a cross-validation analysis to characterize the relationship between the Metaxa2 reliability score, an estimate of classification confidence, and classification error probability. We used this analysis to select a reliability score threshold which minimized error. We then estimated classification sensitivity, false discovery rate and overclassification, the propensity to classify sequences from taxa not represented in the reference database. Our work will help researchers design and evaluate classification databases and conduct metabarcoding on arthropods and alternate taxa.",Metaxa2,0.596945107,NA,0,Metaxa2,0.596945107,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/26/2018 +26255309,http://proteomics.ysu.edu/secretomes/animal/index.php,"MetazSecKB: the human and animal secretome and subcellular proteome knowledgebase. . The subcellular location of a protein is a key factor in determining the molecular function of the protein in an organism. MetazSecKB is a secretome and subcellular proteome knowledgebase specifically designed for metazoan, i.e. human and animals. The protein sequence data, consisting of over 4 million entries with 121 species having a complete proteome, were retrieved from UniProtKB. Protein subcellular locations including secreted and 15 other subcellular locations were assigned based on either curated experimental evidence or prediction using seven computational tools. The protein or subcellular proteome data can be searched and downloaded using several different types of identifiers, gene name or keyword(s), and species. BLAST search and community annotation of subcellular locations are also supported. Our primary analysis revealed that the proteome sizes, secretome sizes and other subcellular proteome sizes vary tremendously in different animal species. The proportions of secretomes vary from 3 to 22% (average 8%) in metazoa species. The proportions of other major subcellular proteomes ranged approximately 21-43% (average 31%) in cytoplasm, 20-37% (average 30%) in nucleus, 3-19% (average 12%) as plasma membrane proteins and 3-9% (average 6%) in mitochondria. We also compared the protein families in secretomes of different primates. The Gene Ontology and protein family domain analysis of human secreted proteins revealed that these proteins play important roles in regulation of human structure development, signal transduction, immune systems and many other biological processes. Database URL: http://proteomics.ysu.edu/secretomes/animal/index.php.",MetazSecKB,0.997505963,NA,0,MetazSecKB,0.997505963,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/8/2015 +25604335,http://metanogen.biotech.uni.wroc.pl,"Methanogenic archaea database containing physiological and biochemical characteristics. The methanogenic archaea are a group of micro-organisms that have developed a unique metabolic pathway for obtaining energy. There are 150 characterized species in this group; however, novel species continue to be discovered. Since methanogens are considered a crucial part of the carbon cycle in the anaerobic ecosystem, characterization of these micro-organisms is important for understanding anaerobic ecology. A methanogens database (MDB; http://metanogen.biotech.uni.wroc.pl/), including physiological and biochemical characteristics of methanogens, was constructed based on the descriptions of isolated type strains. Analysis of the data revealed that methanogens are able to grow from 0 to 122 °C. Methanogens growing at the same temperature may have very different growth rates. There is no clear correlation between the optimal growth temperature and the DNA G+C content. The following substrate preferences are observed in the database: 74.5% of archaea species utilize H2+CO2, 33% utilize methyl compounds and 8.5% utilize acetate. Utilization of methyl compounds (mainly micro-organisms belonging to the genera Methanosarcina and Methanolobus ) is seldom accompanied by an ability to utilize H2+CO2. Very often, data for described species are incomplete, especially substrate preferences. Additional research leading to completion of missing information and development of standards, especially for substrate utilization, would be very helpful.",MDB,0.949380875,Methanogenic archaea,0.958338092,Methanogenic archaea,0.958338092,1,29624889,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: INCORRECT NAME,NA,NA,1/20/2015 +29161430,http://bigd.big.ac.cn/methbank,"MethBank 3.0: a database of DNA methylomes across a variety of species. MethBank (http://bigd.big.ac.cn/methbank) is a database that integrates high-quality DNA methylomes across a variety of species and provides an interactive browser for visualization of methylation data. Here, we present an updated implementation of MethBank (version 3.0) by incorporating more DNA methylomes from multiple species and equipping with more enhanced functionalities for data annotation and more friendly web interfaces for data presentation, search and visualization. MethBank 3.0 features large-scale integration of high-quality methylomes, involving 34 consensus reference methylomes derived from a large number of human samples, 336 single-base resolution methylomes from different developmental stages and/or tissues of five plants, and 18 single-base resolution methylomes from gametes and early embryos at multiple stages of two animals. Additionally, it is enhanced by improving the functionalities for data annotation, which accordingly enables systematic identification of methylation sites closely associated with age, sites with constant methylation levels across different ages, differentially methylated promoters, age-specific differentially methylated cytosines/regions, and methylated CpG islands. Moreover, MethBank provides tools to estimate human methylation age online and to identify differentially methylated promoters, respectively. Taken together, MethBank is upgraded with significant improvements and advances over the previous version, which is of great help for deciphering DNA methylation regulatory mechanisms for epigenetic studies.",MethBank,0.994774401,NA,0,MethBank,0.994774401,1,NA,25294826,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +25294826,http://dnamethylome.org,"MethBank: a database integrating next-generation sequencing single-base-resolution DNA methylation programming data. DNA methylation plays crucial roles during embryonic development. Here we present MethBank (http://dnamethylome.org), a DNA methylome programming database that integrates the genome-wide single-base nucleotide methylomes of gametes and early embryos in different model organisms. Unlike extant relevant databases, MethBank incorporates the whole-genome single-base-resolution methylomes of gametes and early embryos at multiple different developmental stages in zebrafish and mouse. MethBank allows users to retrieve methylation levels, differentially methylated regions, CpG islands, gene expression profiles and genetic polymorphisms for a specific gene or genomic region. Moreover, it offers a methylome browser that is capable of visualizing high-resolution DNA methylation profiles as well as other related data in an interactive manner and thus is of great helpfulness for users to investigate methylation patterns and changes of gametes and early embryos at different developmental stages. Ongoing efforts are focused on incorporation of methylomes and related data from other organisms. Together, MethBank features integration and visualization of high-resolution DNA methylation data as well as other related data, enabling identification of potential DNA methylation signatures in different developmental stages and accordingly providing an important resource for the epigenetic and developmental studies.",MethBank,0.993737161,NA,0,MethBank,0.993737161,1,NA,29161430,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/7/2014 +29433427,http://cgma.scu.edu.cn/MethCNA,"MethCNA: a database for integrating genomic and epigenomic data in human cancer. Background The integration of DNA methylation and copy number alteration data promises to provide valuable insight into the underlying molecular mechanisms responsible for cancer initiation and progression. However, the generation and processing of these datasets are costly and time-consuming if carried out separately. The Illumina Infinium HumanMethylation450 BeadChip, initially designed for the evaluation of DNA methylation levels, allows copy number variant calling using bioinformatics tools. Results A substantial amount of Infinium HumanMethylation450 data across various cancer types has been accumulated in recent years and is a valuable resource for large-scale data analysis. Here we present MethCNA, a comprehensive database for genomic and epigenomic data integration in human cancer. In the current release, MethCNA contains about 10,000 tumor samples representing 37 cancer types. All raw array data were collected from The Cancer Genome Atlas and NCBI Gene Expression Omnibus database and analyzed using a pipeline that integrated multiple computational resources and tools. The normalized copy number aberration data and DNA methylation alterations were obtained. We provide a user-friendly web-interface for data mining and visualization. Conclusions The Illumina Infinium HumanMethylation450 BeadChip enables the interrogation and integration of both genomic and epigenomic data from exactly the same DNA specimen, and thus can aid in distinguishing driver from passenger mutations in cancer. We expect MethCNA will enable researchers to explore DNA methylation and copy number alteration patterns, identify key oncogenic drivers in cancer, and assist in the development of targeted therapies. MethCNA is publicly available online at http://cgma.scu.edu.cn/MethCNA .",MethCNA,0.993689835,NA,0,MethCNA,0.993689835,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/13/2018 +25398901,http://MethHC.mbc.nctu.edu.tw,"MethHC: a database of DNA methylation and gene expression in human cancer. We present MethHC (http://MethHC.mbc.nctu.edu.tw), a database comprising a systematic integration of a large collection of DNA methylation data and mRNA/microRNA expression profiles in human cancer. DNA methylation is an important epigenetic regulator of gene transcription, and genes with high levels of DNA methylation in their promoter regions are transcriptionally silent. Increasing numbers of DNA methylation and mRNA/microRNA expression profiles are being published in different public repositories. These data can help researchers to identify epigenetic patterns that are important for carcinogenesis. MethHC integrates data such as DNA methylation, mRNA expression, DNA methylation of microRNA gene and microRNA expression to identify correlations between DNA methylation and mRNA/microRNA expression from TCGA (The Cancer Genome Atlas), which includes 18 human cancers in more than 6000 samples, 6548 microarrays and 12 567 RNA sequencing data.",MethHC,0.997448325,NA,0,MethHC,0.997448325,1,NA,33270889,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,11/14/2014 +33270889,http://awi.cuhk.edu.cn/Ã,"MethHC 2.0: information repository of DNA methylation and gene expression in human cancer. DNA methylation is an important epigenetic regulator in gene expression and has several roles in cancer and disease progression. MethHC version 2.0 (MethHC 2.0) is an integrated and web-based resource focusing on the aberrant methylomes of human diseases, specifically cancer. This paper presents an updated implementation of MethHC 2.0 by incorporating additional DNA methylomes and transcriptomes from several public repositories, including 33 human cancers, over 50 118 microarray and RNA sequencing data from TCGA and GEO, and accumulating up to 3586 manually curated data from >7000 collected published literature with experimental evidence. MethHC 2.0 has also been equipped with enhanced data annotation functionality and a user-friendly web interface for data presentation, search, and visualization. Provided features include clinical-pathological data, mutation and copy number variation, multiplicity of information (gene regions, enhancer regions, and CGI regions), and circulating tumor DNA methylation profiles, available for research such as biomarker panel design, cancer comparison, diagnosis, prognosis, therapy study and identifying potential epigenetic biomarkers. MethHC 2.0 is now available at http://awi.cuhk.edu.cn/∼MethHC.",MethHC,0.976259843,NA,0,MethHC,0.976259843,1,NA,25398901,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2021 +30380113,http://bioinfo-csi.nus.edu.sg/methmotif,"MethMotif: an integrative cell specific database of transcription factor binding motifs coupled with DNA methylation profiles. Several recent studies have portrayed DNA methylation as a new player in the recruitment of transcription factors (TF) within chromatin, highlighting a need to connect TF binding sites (TFBS) with their respective DNA methylation profiles. However, current TFBS databases are restricted to DNA binding motif sequences. Here, we present MethMotif, a two-dimensional TFBS database that records TFBS position weight matrices along with cell type specific CpG methylation information computed from a combination of ChIP-seq and whole genome bisulfite sequencing datasets. Integrating TFBS motifs with TFBS DNA methylation better portrays the features of DNA loci recognised by TFs. In particular, we found that DNA methylation patterns within TFBS can be cell specific (e.g. MAFF). Furthermore, for a given TF, different DNA methylation profiles are associated with different DNA binding motifs (e.g. REST). To date, MethMotif database records over 500 TFBSs computed from over 2000 ChIP-seq datasets in 11 different cell types. MethMotif portal is accessible through an open source web interface (https://bioinfo-csi.nus.edu.sg/methmotif) that allows users to intuitively explore the entire dataset and perform both single, and batch queries.",MethMotif,0.995933473,NA,0,MethMotif,0.995933473,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +27924023,http://sysbio.sysu.edu.cn/methsmrt,"MethSMRT: an integrative database for DNA N6-methyladenine and N4-methylcytosine generated by single-molecular real-time sequencing. DNA methylation is an important type of epigenetic modifications, where 5- methylcytosine (5mC), 6-methyadenine (6mA) and 4-methylcytosine (4mC) are the most common types. Previous efforts have been largely focused on 5mC, providing invaluable insights into epigenetic regulation through DNA methylation. Recently developed single-molecule real-time (SMRT) sequencing technology provides a unique opportunity to detect the less studied DNA 6mA and 4mC modifications at single-nucleotide resolution. With a rapidly increased amount of SMRT sequencing data generated, there is an emerging demand to systematically explore DNA 6mA and 4mC modifications from these data sets. MethSMRT is the first resource hosting DNA 6mA and 4mC methylomes. All the data sets were processed using the same analysis pipeline with the same quality control. The current version of the database provides a platform to store, browse, search and download epigenome-wide methylation profiles of 156 species, including seven eukaryotes such as Arabidopsis, C. elegans, Drosophila, mouse and yeast, as well as 149 prokaryotes. It also offers a genome browser to visualize the methylation sites and related information such as single nucleotide polymorphisms (SNP) and genomic annotation. Furthermore, the database provides a quick summary of statistics of methylome of 6mA and 4mC and predicted methylation motifs for each species. MethSMRT is publicly available at http://sysbio.sysu.edu.cn/methsmrt/ without use restriction.",MethSMRT,0.995354295,NA,0,MethSMRT,0.995354295,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/18/2016 +22140101,http://epigenomics.columbia.edu/methylomedb/index.html,"MethylomeDB: a database of DNA methylation profiles of the brain. MethylomeDB (http://epigenomics.columbia.edu/methylomedb/index.html) is a new database containing genome-wide brain DNA methylation profiles. DNA methylation is an important epigenetic mark in the mammalian brain. In human studies, aberrant DNA methylation alterations have been associated with various neurodevelopmental and neuropsychiatric disorders such as schizophrenia, and depression. In this database, we present methylation profiles of carefully selected non-psychiatric control, schizophrenia, and depression samples. We also include data on one mouse forebrain sample specimen to allow for cross-species comparisons. In addition to our DNA methylation data generated in-house, we have and will continue to include published DNA methylation data from other research groups with the focus on brain development and function. Users can view the methylation data at single-CpG resolution with the option of wiggle and microarray formats. They can also download methylation data for individual samples. MethylomeDB offers an important resource for research into brain function and behavior. It provides the first source of comprehensive brain methylome data, encompassing whole-genome DNA methylation profiles of human and mouse brain specimens that facilitate cross-species comparative epigenomic investigations, as well as investigations of schizophrenia and depression methylomes.",MethylomeDB,0.997244477,NA,0,MethylomeDB,0.997244477,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/2/2011 +30150755,"http://xcmsonline-mrm.scripps.edu/, http://metlin.scripps.edu","XCMS-MRM and METLIN-MRM: a cloud library and public resource for targeted analysis of small molecules. We report XCMS-MRM and METLIN-MRM ( http://xcmsonline-mrm.scripps.edu/ and http://metlin.scripps.edu/ ), a cloud-based data-analysis platform and a public multiple-reaction monitoring (MRM) transition repository for small-molecule quantitative tandem mass spectrometry. This platform provides MRM transitions for more than 15,500 molecules and facilitates data sharing across different instruments and laboratories.",METLIN-MRM,0.899676859,NA,0,METLIN-MRM,0.899676859,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: URL scramble,NA,NA,8/27/2018 +23066841,"http://www.metnetonline.org, http://www.metnetonline.org/tutorial","MetNet Online: a novel integrated resource for plant systems biology. Background Plants are important as foods, pharmaceuticals, biorenewable chemicals, fuel resources, bioremediation tools and general tools for recombinant technology. The study of plant biological pathways is advanced by easy access to integrated data sources. Today, various plant data sources are scattered throughout the web, making it increasingly complicated to build comprehensive datasets. Results MetNet Online is a web-based portal that provides access to a regulatory and metabolic plant pathway database. The database and portal integrate Arabidopsis, soybean (Glycine max) and grapevine (Vitis vinifera) data. Pathways are enriched with known or predicted information on sub cellular location. MetNet Online enables pathways, interactions and entities to be browsed or searched by multiple categories such as sub cellular compartment, pathway ontology, and GO term. In addition to this, the ""My MetNet"" feature allows registered users to bookmark content and track, import and export customized lists of entities. Users can also construct custom networks using existing pathways and/or interactions as building blocks. Conclusion The site can be reached at http://www.metnetonline.org. Extensive video tutorials on how to use the site are available through http://www.metnetonline.org/tutorial/.",MetNet,0.945627868,NA,0,MetNet,0.945627868,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/15/2012 +31197322,http://metosite.uma.es,"MetOSite: an integrated resource for the study of methionine residues sulfoxidation. Motivation The oxidation of protein-bound methionine to form methionine sulfoxide has traditionally been regarded as an oxidative damage. However, growing evidences support the view of this reversible reaction also as a regulatory post-translational modification. Thus, the oxidation of methionine residues has been reported to have multiple and varied implications for protein function. However, despite the importance of this modification and the abundance of reports, all these data are scattered in the literature. No database/resource on methionine sulfoxidation exists currently. Since this information is useful to gain further insights into the redox regulation of cellular proteins, we have created a primary database of experimentally confirmed sulfoxidation sites. Results MetOSite currently contains 7242 methionine sulfoxide sites found in 3562 different proteins from 23 species, with Homo sapiens, Arabidopsis thaliana and Bacillus cereus as the main contributors. Each collected site has been classified according to the effect of its sulfoxidation on the biological properties of the modified protein. Thus, MetOSite documents cases where the sulfoxidation of methionine leads to (i) gain of activity, (ii) loss of activity, (iii) increased protein-protein interaction susceptibility, (iv) decreased protein-protein interaction susceptibility, (v) changes in protein stability and (vi) changes in subcellular location. Availability and implementation MetOSite is available at https://metosite.uma.es.",MetOSite,0.994946599,NA,0,MetOSite,0.994946599,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2019 +26106450,http://www-metrabase.ch.cam.ac.uk,"Metrabase: a cheminformatics and bioinformatics database for small molecule transporter data analysis and (Q)SAR modeling. Abstract Both metabolism and transport are key elements defining the bioavailability and biological activity of molecules, i.e. their adverse and therapeutic effects. Structured and high quality experimental data stored in a suitable container, such as a relational database, facilitates easy computational processing and thus allows for high quality information/knowledge to be efficiently inferred by computational analyses. Our aim was to create a freely accessible database that would provide easy access to data describing interactions between proteins involved in transport and xenobiotic metabolism and their small molecule substrates and modulators. We present Metrabase, an integrated cheminformatics and bioinformatics resource containing curated data related to human transport and metabolism of chemical compounds. Its primary content includes over 11,500 interaction records involving nearly 3,500 small molecule substrates and modulators of transport proteins and, currently to a much smaller extent, cytochrome P450 enzymes. Data was manually extracted from the published literature and supplemented with data integrated from other available resources. Metrabase version 1.0 is freely available under a CC BY-SA 4.0 license at http://www-metrabase.ch.cam.ac.uk.",Metrabase,0.998185456,NA,0,Metrabase,0.998185456,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/23/2015 +23019219,http://metscout.mpg.de,"METscout: a pathfinder exploring the landscape of metabolites, enzymes and transporters. METscout (http://metscout.mpg.de) brings together metabolism and gene expression landscapes. It is a MySQL relational database linking biochemical pathway information with 3D patterns of gene expression determined by robotic in situ hybridization in the E14.5 mouse embryo. The sites of expression of ∼1500 metabolic enzymes and of ∼350 solute carriers (SLCs) were included and are accessible as single cell resolution images and in the form of semi-quantitative image abstractions. METscout provides several graphical web-interfaces allowing navigation through complex anatomical and metabolic information. Specifically, the database shows where in the organism each of the many metabolic reactions take place and where SLCs transport metabolites. To link enzymatic reactions and transport, the KEGG metabolic reaction network was extended to include metabolite transport. This network in conjunction with spatial expression pattern of the network genes allows for a tracing of metabolic reactions and transport processes across the entire body of the embryo.",METscout,0.998384833,NA,0,METscout,0.998384833,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/27/2012 +28968812,http://www.bio-annotation.cn/MetSigDis,"MetSigDis: a manually curated resource for the metabolic signatures of diseases. Complex diseases cannot be understood only on the basis of single gene, single mRNA transcript or single protein but the effect of their collaborations. The combination consequence in molecular level can be captured by the alterations of metabolites. With the rapidly developing of biomedical instruments and analytical platforms, a large number of metabolite signatures of complex diseases were identified and documented in the literature. Biologists' hardship in the face of this large amount of papers recorded metabolic signatures of experiments' results calls for an automated data repository. Therefore, we developed MetSigDis aiming to provide a comprehensive resource of metabolite alterations in various diseases. MetSigDis is freely available at http://www.bio-annotation.cn/MetSigDis/. By reviewing hundreds of publications, we collected 6849 curated relationships between 2420 metabolites and 129 diseases across eight species involving Homo sapiens and model organisms. All of these relationships were used in constructing a metabolite disease network (MDN). This network displayed scale-free characteristics according to the degree distribution (power-law distribution with R2 = 0.909), and the subnetwork of MDN for interesting diseases and their related metabolites can be visualized in the Web. The common alterations of metabolites reflect the metabolic similarity of diseases, which is measured using Jaccard index. We observed that metabolite-based similar diseases are inclined to share semantic associations of Disease Ontology. A human disease network was then built, where a node represents a disease, and an edge indicates similarity of pair-wise diseases. The network validated the observation that linked diseases based on metabolites should have more overlapped genes.",MetSigDis,0.997604191,NA,0,MetSigDis,0.997604191,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33382886,http://www.healthdisparityinformatics.com/MetSRR,"Creating a Metabolic Syndrome Research Resource using the National Health and Nutrition Examination Survey. . Metabolic syndrome (MetS) is multifaceted. Risk factors include visceral adiposity, dyslipidemia, hyperglycemia, hypertension and environmental stimuli. MetS leads to an increased risk of cardiovascular disease, type 2 diabetes and stroke. Comparative studies, however, have identified heterogeneity in the pathology of MetS across groups though the etiology of these differences has yet to be elucidated. The Metabolic Syndrome Research Resource (MetSRR) described in this report is a curated database that provides access to MetS-associated biological and ancillary data and pools current and potential biomarkers of MetS extracted from relevant National Health and Nutrition Examination Survey (NHANES) data from 1999-2016. Each potential biomarker was selected following the review of over 100 peer-reviewed articles. MetSRR includes 28 demographics, survey and known MetS-related variables, including 9 curated categorical variables and 42 potentially novel biomarkers. All measures are captured from over 90 000 individuals. This biocuration effort provides increased access to curated MetS-related data and will serve as a hypothesis-generating tool to aid in novel biomarker discovery. In addition, MetSRR provides the ability to generate and export ethnic group-/race-, sex- and age-specific curated datasets, thus broadening participation in research efforts to identify clinically evaluative MetS biomarkers for disparate populations. Although there are other databases, such as BioM2MetDisease, designed to explore metabolic diseases through analysis of miRNAs and disease phenotypes, MetSRR is the only MetS-specific database designed to explore etiology of MetS across groups, through the biocuration of demographic, biological samples and biometric data. Database URL:  http://www.healthdisparityinformatics.com/MetSRR.",MetSRR,0.996502876,Metabolic Syndrome Research Resource,0.966591418,MetSRR,0.996502876,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +29036655,http://mfib.enzim.ttk.mta.hu,"MFIB: a repository of protein complexes with mutual folding induced by binding. Motivation It is commonplace that intrinsically disordered proteins (IDPs) are involved in crucial interactions in the living cell. However, the study of protein complexes formed exclusively by IDPs is hindered by the lack of data and such analyses remain sporadic. Systematic studies benefited other types of protein-protein interactions paving a way from basic science to therapeutics; yet these efforts require reliable datasets that are currently lacking for synergistically folding complexes of IDPs. Results Here we present the Mutual Folding Induced by Binding (MFIB) database, the first systematic collection of complexes formed exclusively by IDPs. MFIB contains an order of magnitude more data than any dataset used in corresponding studies and offers a wide coverage of known IDP complexes in terms of flexibility, oligomeric composition and protein function from all domains of life. The included complexes are grouped using a hierarchical classification and are complemented with structural and functional annotations. MFIB is backed by a firm development team and infrastructure, and together with possible future community collaboration it will provide the cornerstone for structural and functional studies of IDP complexes. Availability and implementation MFIB is freely accessible at http://mfib.enzim.ttk.mta.hu/. The MFIB application is hosted by Apache web server and was implemented in PHP. To enrich querying features and to enhance backend performance a MySQL database was also created. Contact simon.istvan@ttk.mta.hu, meszaros.balint@ttk.mta.hu. Supplementary information Supplementary data are available at Bioinformatics online.",MFIB,0.981060028,Mutual Folding Induced by,0.731679062,MFIB,0.981060028,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2017 +26173767,http://mfmd.pasteur.ma,"Mediterranean Founder Mutation Database (MFMD): Taking Advantage from Founder Mutations in Genetics Diagnosis, Genetic Diversity and Migration History of the Mediterranean Population. The Mediterranean basin has been the theater of migration crossroads followed by settlement of several societies and cultures in prehistoric and historical times, with important consequences on genetic and genomic determinisms. Here, we present the Mediterranean Founder Mutation Database (MFMD), established to offer web-based access to founder mutation information in the Mediterranean population. Mutation data were collected from the literature and other online resources and systematically reviewed and assembled into this database. The information provided for each founder mutation includes DNA change, amino-acid change, mutation type and mutation effect, as well as mutation frequency and coalescence time when available. Currently, the database contains 383 founder mutations found in 210 genes related to 219 diseases. We believe that MFMD will help scientists and physicians to design more rapid and less expensive genetic diagnostic tests. Moreover, the coalescence time of founder mutations gives an overview about the migration history of the Mediterranean population. MFMD can be publicly accessed from http://mfmd.pasteur.ma.",MFMD,0.991956189,Mediterranean Founder Mutation Database,0.945347416,MFMD,0.991956189,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/30/2015 +26656948,http://metagenomics.anl.gov,"The MG-RAST metagenomics database and portal in 2015. MG-RAST (http://metagenomics.anl.gov) is an open-submission data portal for processing, analyzing, sharing and disseminating metagenomic datasets. The system currently hosts over 200,000 datasets and is continuously updated. The volume of submissions has increased 4-fold over the past 24 months, now averaging 4 terabasepairs per month. In addition to several new features, we report changes to the analysis workflow and the technologies used to scale the pipeline up to the required throughput levels. To show possible uses for the data from MG-RAST, we present several examples integrating data and analyses from MG-RAST into popular third-party analysis tools or sequence alignment tools.",MG-RAST,0.996288791,NA,0,MG-RAST,0.996288791,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/9/2015 +29069466,http://ccg.vital-it.ch/mga,"MGA repository: a curated data resource for ChIP-seq and other genome annotated data. The Mass Genome Annotation (MGA) repository is a resource designed to store published next generation sequencing data and other genome annotation data (such as gene start sites, SNPs, etc.) in a completely standardised format. Each sample has undergone local processing in order the meet the strict MGA format requirements. The original data source, the reformatting procedure and the biological characteristics of the samples are described in an accompanying documentation file manually edited by data curators. 10 model organisms are currently represented: Homo sapiens, Mus musculus, Danio rerio, Drosophila melanogaster, Apis mellifera, Caenorhabditis elegans, Arabidopsis thaliana, Zea mays, Saccharomyces cerevisiae and Schizosaccharomyces pombe. As of today, the resource contains over 24 000 samples. In conjunction with other tools developed by our group (the ChIP-Seq and SSA servers), it allows users to carry out a great variety of analysis task with MGA samples, such as making aggregation plots and heat maps for selected genomic regions, finding peak regions, generating custom tracks for visualizing genomic features in a UCSC genome browser window, or downloading chromatin data in a table format suitable for local processing with more advanced statistical analysis software such as R. Home page: http://ccg.vital-it.ch/mga/.",MGA,0.93783768,Genome,0.568572879,MGA,0.93783768,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +30841849,http://mgap.ohsu.edu,"mGAP: the macaque genotype and phenotype resource, a framework for accessing and interpreting macaque variant data, and identifying new models of human disease. Background Non-human primates (NHPs), particularly macaques, serve as critical and highly relevant pre-clinical models of human disease. The similarity in human and macaque natural disease susceptibility, along with parallel genetic risk alleles, underscores the value of macaques in the development of effective treatment strategies. Nonetheless, there are limited genomic resources available to support the exploration and discovery of macaque models of inherited disease. Notably, there are few public databases tailored to searching NHP sequence variants, and no other database making use of centralized variant calling, or providing genotype-level data and predicted pathogenic effects for each variant. Results The macaque Genotype And Phenotype (mGAP) resource is the first public website providing searchable, annotated macaque variant data. The mGAP resource includes a catalog of high confidence variants, derived from whole genome sequence (WGS). The current mGAP release at time of publication (1.7) contains 17,087,212 variants based on the sequence analysis of 293 rhesus macaques. A custom pipeline was developed to enable annotation of the macaque variants, leveraging human data sources that include regulatory elements (ENCODE, RegulomeDB), known disease- or phenotype-associated variants (GRASP), predicted impact (SIFT, PolyPhen2), and sequence conservation (Phylop, PhastCons). Currently mGAP includes 2767 variants that are identical to alleles listed in the human ClinVar database, of which 276 variants, spanning 258 genes, are identified as pathogenic. An additional 12,472 variants are predicted as high impact (SnpEff) and 13,129 are predicted as damaging (PolyPhen2). In total, these variants are predicted to be associated with more than 2000 human disease or phenotype entries reported in OMIM (Online Mendelian Inheritance in Man). Importantly, mGAP also provides genotype-level data for all subjects, allowing identification of specific individuals harboring alleles of interest. Conclusions The mGAP resource provides variant and genotype data from hundreds of rhesus macaques, processed in a consistent manner across all subjects ( https://mgap.ohsu.edu ). Together with the extensive variant annotations, mGAP presents unprecedented opportunity to investigate potential genetic associations with currently characterized disease models, and to uncover new macaque models based on parallels with human risk alleles.",mGAP,0.988449275,macaque Genotype And Phenotype,0.870370652,mGAP,0.988449275,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/6/2019 +"22075990, 23175610, 24285300, 25348401, 26578600, 27899570, 29092072, 30407599, 33231642",http://www.informatics.jax.org,"The Mouse Genome Database (MGD): comprehensive resource for genetics and genomics of the laboratory mouse. The Mouse Genome Database (MGD, http://www.informatics.jax.org) is the international community resource for integrated genetic, genomic and biological data about the laboratory mouse. Data in MGD are obtained through loads from major data providers and experimental consortia, electronic submissions from laboratories and from the biomedical literature. MGD maintains a comprehensive, unified, non-redundant catalog of mouse genome features generated by distilling gene predictions from NCBI, Ensembl and VEGA. MGD serves as the authoritative source for the nomenclature of mouse genes, mutations, alleles and strains. MGD is the primary source for evidence-supported functional annotations for mouse genes and gene products using the Gene Ontology (GO). MGD provides full annotation of phenotypes and human disease associations for mouse models (genotypes) using terms from the Mammalian Phenotype Ontology and disease names from the Online Mendelian Inheritance in Man (OMIM) resource. MGD is freely accessible online through our website, where users can browse and search interactively, access data in bulk using Batch Query or BioMart, download data files or use our web services Application Programming Interface (API). Improvements to MGD include expanded genome feature classifications, inclusion of new mutant allele sets and phenotype associations and extensions of GO to include new relationships and a new stream of annotations via phylogenetic-based approaches.",MGD,0.99467206,Mouse Genome Database,0.976606995,MGD,0.99467206,9,"23110975.0, 29761459.0, 34698891.0",NA,NA,NA,do not merge,NA,NA,NA,NA,1/1/2021 +26424083,http://bioinfo.ahu.edu.cn:8080/Melanoma/index.jsp,"MGDB: a comprehensive database of genes involved in melanoma. . The Melanoma Gene Database (MGDB) is a manually curated catalog of molecular genetic data relating to genes involved in melanoma. The main purpose of this database is to establish a network of melanoma related genes and to facilitate the mechanistic study of melanoma tumorigenesis. The entries describing the relationships between melanoma and genes in the current release were manually extracted from PubMed abstracts, which contains cumulative to date 527 human melanoma genes (422 protein-coding and 105 non-coding genes). Each melanoma gene was annotated in seven different aspects (General Information, Expression, Methylation, Mutation, Interaction, Pathway and Drug). In addition, manually curated literature references have also been provided to support the inclusion of the gene in MGDB and establish its association with melanoma. MGDB has a user-friendly web interface with multiple browse and search functions. We hoped MGDB will enrich our knowledge about melanoma genetics and serve as a useful complement to the existing public resources. Database URL: http://bioinfo.ahu.edu.cn:8080/Melanoma/index.jsp.",MGDB,0.996766806,Melanoma Gene Database,0.976466978,MGDB,0.996766806,1,NA,21904429,NA,NA,NA,do not merge,NA,NA,NA,9/30/2015 +21904429,http://mpgdb.ibioinformatics.org/mpgdb.php,Mycobacteriophage genome database. Unlabelled Mycobacteriophage genome database (MGDB) is an exclusive repository of the 64 completely sequenced mycobacteriophages with annotated information. It is a comprehensive compilation of the various gene parameters captured from several databases pooled together to empower mycobacteriophage researchers. The MGDB (Version No.1.0) comprises of 6086 genes from 64 mycobacteriophages classified into 72 families based on ACLAME database. Manual curation was aided by information available from public databases which was enriched further by analysis. Its web interface allows browsing as well as querying the classification. The main objective is to collect and organize the complexity inherent to mycobacteriophage protein classification in a rational way. The other objective is to browse the existing and new genomes and describe their functional annotation. Availability The database is available for free at http://mpgdb.ibioinformatics.org/mpgdb.php.,MGDB,0.991700888,Unlabelled Mycobacteriophage genome database,0.943340257,MGDB,0.991700888,1,NA,26424083,NA,NA,NA,do not merge,NA,NA,NA,8/2/2011 +23860041,http://mgdd.pasteur.ma,"The Moroccan Genetic Disease Database (MGDD): a database for DNA variations related to inherited disorders and disease susceptibility. National and ethnic mutation databases provide comprehensive information about genetic variations reported in a population or an ethnic group. In this paper, we present the Moroccan Genetic Disease Database (MGDD), a catalogue of genetic data related to diseases identified in the Moroccan population. We used the PubMed, Web of Science and Google Scholar databases to identify available articles published until April 2013. The Database is designed and implemented on a three-tier model using Mysql relational database and the PHP programming language. To date, the database contains 425 mutations and 208 polymorphisms found in 301 genes and 259 diseases. Most Mendelian diseases in the Moroccan population follow autosomal recessive mode of inheritance (74.17%) and affect endocrine, nutritional and metabolic physiology. The MGDD database provides reference information for researchers, clinicians and health professionals through a user-friendly Web interface. Its content should be useful to improve researches in human molecular genetics, disease diagnoses and design of association studies. MGDD can be publicly accessed at http://mgdd.pasteur.ma.",MGDD,0.994052202,Moroccan Genetic Disease Database,0.972358629,MGDD,0.994052202,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/17/2013 +22606288,http://resource.ibab.ac.in/MGEx-Udb,"MGEx-Udb: a mammalian uterus database for expression-based cataloguing of genes across conditions, including endometriosis and cervical cancer. Background Gene expression profiling of uterus tissue has been performed in various contexts, but a significant amount of the data remains underutilized as it is not covered by the existing general resources. Methodology/principal findings We curated 2254 datasets from 325 uterus related mass scale gene expression studies on human, mouse, rat, cow and pig species. We then computationally derived a 'reliability score' for each gene's expression status (transcribed/dormant), for each possible combination of conditions and locations, based on the extent of agreement or disagreement across datasets. The data and derived information has been compiled into the Mammalian Gene Expression Uterus database (MGEx-Udb, http://resource.ibab.ac.in/MGEx-Udb/). The database can be queried with gene names/IDs, sub-tissue locations, as well as various conditions such as the cervical cancer, endometrial cycles and disorders, and experimental treatments. Accordingly, the output would be a) transcribed and dormant genes listed for the queried condition/location, or b) expression profile of the gene of interest in various uterine conditions. The results also include the reliability score for the expression status of each gene. MGEx-Udb also provides information related to Gene Ontology annotations, protein-protein interactions, transcripts, promoters, and expression status by other sequencing techniques, and facilitates various other types of analysis of the individual genes or co-expressed gene clusters. Conclusions/significance In brief, MGEx-Udb enables easy cataloguing of co-expressed genes and also facilitates bio-marker discovery for various uterine conditions.",MGEx-Udb,0.998166392,Mammalian Gene Expression Uterus database,0.957160369,MGEx-Udb,0.998166392,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/11/2012 +"23110975, 29761459, 34698891",http://www.informatics.jax.org,"Manual Gene Ontology annotation workflow at the Mouse Genome Informatics Database. The Mouse Genome Database, the Gene Expression Database and the Mouse Tumor Biology database are integrated components of the Mouse Genome Informatics (MGI) resource (http://www.informatics.jax.org). The MGI system presents both a consensus view and an experimental view of the knowledge concerning the genetics and genomics of the laboratory mouse. From genotype to phenotype, this information resource integrates information about genes, sequences, maps, expression analyses, alleles, strains and mutant phenotypes. Comparative mammalian data are also presented particularly in regards to the use of the mouse as a model for the investigation of molecular and genetic components of human diseases. These data are collected from literature curation as well as downloads of large datasets (SwissProt, LocusLink, etc.). MGI is one of the founding members of the Gene Ontology (GO) and uses the GO for functional annotation of genes. Here, we discuss the workflow associated with manual GO annotation at MGI, from literature collection to display of the annotations. Peer-reviewed literature is collected mostly from a set of journals available electronically. Selected articles are entered into a master bibliography and indexed to one of eight areas of interest such as 'GO' or 'homology' or 'phenotype'. Each article is then either indexed to a gene already contained in the database or funneled through a separate nomenclature database to add genes. The master bibliography and associated indexing provide information for various curator-reports such as 'papers selected for GO that refer to genes with NO GO annotation'. Once indexed, curators who have expertise in appropriate disciplines enter pertinent information. MGI makes use of several controlled vocabularies that ensure uniform data encoding, enable robust analysis and support the construction of complex queries. These vocabularies range from pick-lists to structured vocabularies such as the GO. All data associations are supported with statements of evidence as well as access to source publications.",MGI,0.983090103,Mouse Genome Informatics,0.812098155,MGI,0.983090103,3,"22075990.0, 23175610.0, 24285300.0, 25348401.0, 26578600.0, 27899570.0, 29092072.0, 30407599.0, 33231642.0",NA,NA,NA,do not merge,NA,NA,NA,NA,10/26/2021 +31696235,http://www.ebi.ac.uk/metagenomics,"MGnify: the microbiome analysis resource in 2020. MGnify (http://www.ebi.ac.uk/metagenomics) provides a free to use platform for the assembly, analysis and archiving of microbiome data derived from sequencing microbial populations that are present in particular environments. Over the past 2 years, MGnify (formerly EBI Metagenomics) has more than doubled the number of publicly available analysed datasets held within the resource. Recently, an updated approach to data analysis has been unveiled (version 5.0), replacing the previous single pipeline with multiple analysis pipelines that are tailored according to the input data, and that are formally described using the Common Workflow Language, enabling greater provenance, reusability, and reproducibility. MGnify's new analysis pipelines offer additional approaches for taxonomic assertions based on ribosomal internal transcribed spacer regions (ITS1/2) and expanded protein functional annotations. Biochemical pathways and systems predictions have also been added for assembled contigs. MGnify's growing focus on the assembly of metagenomic data has also seen the number of datasets it has assembled and analysed increase six-fold. The non-redundant protein database constructed from the proteins encoded by these assemblies now exceeds 1 billion sequences. Meanwhile, a newly developed contig viewer provides fine-grained visualisation of the assembled contigs and their enriched annotations.",MGnify,0.997318149,NA,0,MGnify,0.997318149,1,26582919,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2020 +32386298,http://research.nhgri.nih.gov/mnemiopsis,"The Mnemiopsis Genome Project Portal: integrating new gene expression resources and improving data visualization. . Following the completion of the genome sequencing and gene prediction of Mnemiopsis leidyi, a lobate ctenophore that is native to the coastal waters of the western Atlantic Ocean, we developed and implemented the Mnemiopsis Genome Project Portal (MGP Portal), a comprehensive Web-based data portal for navigating the genome sequence and gene annotations. In the years following the first release of the MGP Portal, it has become evident that the inclusion of data from significant published studies on Mnemiopsis has been critical to its adoption as the centralized resource for this emerging model organism. With this most recent update, the Portal has significantly expanded to include in situ images, temporal developmental expression profiles and single-cell expression data. Recent enhancements also include implementations of an updated BLAST interface, new graphical visualization tools and updates to gene pages that integrate all new data types. Database URL: https://research.nhgri.nih.gov/mnemiopsis/.",MGP Portal,0.925392497,Mnemiopsis Genome Project Portal,0.882159429,MGP Portal,0.925392497,1,24773765,NA,low_prob_best_name,do not remove,conflicting record(s) to be removed,NA,NA,NA,NA,1/1/2020 +27167218,http://47.88.84.236/Miasdb,"MiasDB: A Database of Molecular Interactions Associated with Alternative Splicing of Human Pre-mRNAs. Alternative splicing (AS) is pervasive in human multi-exon genes and is a major contributor to expansion of the transcriptome and proteome diversity. The accurate recognition of alternative splice sites is regulated by information contained in networks of protein-protein and protein-RNA interactions. However, the mechanisms leading to splice site selection are not fully understood. Although numerous databases have been built to describe AS, molecular interaction databases associated with AS have only recently emerged. In this study, we present a new database, MiasDB, that provides a description of molecular interactions associated with human AS events. This database covers 938 interactions between human splicing factors, RNA elements, transcription factors, kinases and modified histones for 173 human AS events. Every entry includes the interaction partners, interaction type, experimental methods, AS type, tissue specificity or disease-relevant information, a simple description of the functionally tested interaction in the AS event and references. The database can be queried easily using a web server (http://47.88.84.236/Miasdb). We display some interaction figures for several genes. With this database, users can view the regulation network describing AS events for 12 given genes.",MiasDB,0.997192025,NA,0,MiasDB,0.997192025,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/11/2016 +31612915,http://mibig.secondarymetabolites.org,"MIBiG 2.0: a repository for biosynthetic gene clusters of known function. Fueled by the explosion of (meta)genomic data, genome mining of specialized metabolites has become a major technology for drug discovery and studying microbiome ecology. In these efforts, computational tools like antiSMASH have played a central role through the analysis of Biosynthetic Gene Clusters (BGCs). Thousands of candidate BGCs from microbial genomes have been identified and stored in public databases. Interpreting the function and novelty of these predicted BGCs requires comparison with a well-documented set of BGCs of known function. The MIBiG (Minimum Information about a Biosynthetic Gene Cluster) Data Standard and Repository was established in 2015 to enable curation and storage of known BGCs. Here, we present MIBiG 2.0, which encompasses major updates to the schema, the data, and the online repository itself. Over the past five years, 851 new BGCs have been added. Additionally, we performed extensive manual data curation of all entries to improve the annotation quality of our repository. We also redesigned the data schema to ensure the compliance of future annotations. Finally, we improved the user experience by adding new features such as query searches and a statistics page, and enabled direct link-outs to chemical structure databases. The repository is accessible online at https://mibig.secondarymetabolites.org/.",MIBiG,0.996939003,Minimum Information about a Biosynthetic Gene Cluster,0.971989518,MIBiG,0.996939003,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25022454,http://www.polyu.edu.hk/bmi/dipp,"Disease-specific target gene expression profiling of molecular imaging probes: database development and clinical validation. . Molecular imaging probes can target abnormal gene expression patterns in patients and allow early diagnosis of disease. For selecting a suitable imaging probe, the current Molecular Imaging and Contrast Agent Database (MICAD) provides descriptive and qualitative information on imaging probe characteristics and properties. However, MICAD does not support linkage with the expression profiles of target genes. The proposed Disease-specific Imaging Probe Profiling (DIPP) database quantitatively archives and presents the gene expression profiles of targets across different diseases, anatomic regions, and subcellular locations, providing an objective reference for selecting imaging probes. The DIPP database was validated with a clinical positron emission tomography (PET) study on lung cancer and an in vitro study on neuroendocrine cancer. The retrieved records show that choline kinase beta and glucose transporters were positively and significantly associated with lung cancer among the targets of 11C-choline and [18F]fluoro-2-deoxy-2-d-glucose (FDG), respectively. Their significant overexpressions corresponded to the findings that the uptake rate of FDG increased with tumor size but that of 11C-choline remained constant. Validated with the in vitro study, the expression profiles of disease-associated targets can indicate the eligibility of patients for clinical trials of the treatment probe. A Web search tool of the DIPP database is available at http://www.polyu.edu.hk/bmi/dipp/.",MICAD,0.960550686,Molecular Imaging and,0.806334178,MICAD,0.960550686,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2014 +24536078,http://www.cdfd.org.in/micas,"MICdb3.0: a comprehensive resource of microsatellite repeats from prokaryotic genomes. The MICdb is a comprehensive relational database of perfect microsatellites extracted from completely sequenced and annotated genomes of bacteria and archaea. The current version MICdb3.0 is an updated and revised version of MICdb2.0. As compared with the previous version MICdb2.0, the current release is significantly improved in terms of much larger coverage of genomes, improved presentation of queried results, user-friendly administration module to manage Simple Sequence Repeat (SSR) data such as addition of new genomes, deletion of obsolete data, etc., and also removal of certain features deemed to be redundant. The new web-interface to the database called Microsatellite Analysis Server (MICAS) version 3.0 has been improved by the addition of powerful high-quality visualization tools to view the query results in the form of pie charts and bar graphs. All the query results and graphs can be exported in different formats so that the users can use them for further analysis. MICAS3.0 is also equipped with a unique genome comparison module using which users can do pair-wise comparison of genomes with regard to their microsatellite distribution. The advanced search module can be used to filter the repeats based on certain criteria such as filtering repeats of a particular motif/repeat size, extracting repeats of coding/non-coding regions, sort repeats, etc. The MICdb database has, therefore, been made portable to be administered by a person with the necessary administrative privileges. The MICdb3.0 database and analysis server can be accessed for free from www.cdfd.org.in/micas. Database URL: http://www.cdfd.org.in/micas.",MICdb,0.989687264,NA,0,MICdb,0.989687264,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/17/2014 +26286928,http://micrhode.sb-roscoff.fr,"MicRhoDE: a curated database for the analysis of microbial rhodopsin diversity and evolution. . Microbial rhodopsins are a diverse group of photoactive transmembrane proteins found in all three domains of life and in viruses. Today, microbial rhodopsin research is a flourishing research field in which new understandings of rhodopsin diversity, function and evolution are contributing to broader microbiological and molecular knowledge. Here, we describe MicRhoDE, a comprehensive, high-quality and freely accessible database that facilitates analysis of the diversity and evolution of microbial rhodopsins. Rhodopsin sequences isolated from a vast array of marine and terrestrial environments were manually collected and curated. To each rhodopsin sequence are associated related metadata, including predicted spectral tuning of the protein, putative activity and function, taxonomy for sequences that can be linked to a 16S rRNA gene, sampling date and location, and supporting literature. The database currently covers 7857 aligned sequences from more than 450 environmental samples or organisms. Based on a robust phylogenetic analysis, we introduce an operational classification system with multiple phylogenetic levels ranging from superclusters to species-level operational taxonomic units. An integrated pipeline for online sequence alignment and phylogenetic tree construction is also provided. With a user-friendly interface and integrated online bioinformatics tools, this unique resource should be highly valuable for upcoming studies of the biogeography, diversity, distribution and evolution of microbial rhodopsins. Database URL: http://micrhode.sb-roscoff.fr.",MicRhoDE,0.990796208,NA,0,MicRhoDE,0.990796208,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/18/2015 +33329482,http://gsbios.com/index/experimental/dts,"Human Gut Microbiome-Based Knowledgebase as a Biomarker Screening Tool to Improve the Predicted Probability for Colorectal Cancer. Colorectal cancer (CRC) is a common clinical malignancy globally ranked as the fourth leading cause of cancer mortality. Some microbes are known to contribute to adenoma-carcinoma transition and possess diagnostic potential. Advances in high-throughput sequencing technology and functional studies have provided significant insights into the landscape of the gut microbiome and the fundamental roles of its components in carcinogenesis. Integration of scattered knowledge is highly beneficial for future progress. In this study, literature review and information extraction were performed, with the aim of integrating the available data resources and facilitating comparative research. A knowledgebase of the human CRC microbiome was compiled to facilitate understanding of diagnosis, and the global signatures of CRC microbes, sample types, algorithms, differential microorganisms and various panels of markers plus their diagnostic performance were evaluated based on statistical and phylogenetic analyses. Additionally, prospects about current changelings and solution strategies were outlined for identifying future research directions. This type of data integration strategy presents an effective platform for inquiry and comparison of relevant information, providing a tool for further study about CRC-related microbes and exploration of factors promoting clinical transformation (available at: http://gsbios.com/index/experimental/dts_ mben?id=1).",NA,0,Microbiome,0.517021239,Microbiome,0.517021239,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/19/2020 +31042284,http://knights-lab.github.io/MLRepo,"Microbiome Learning Repo (ML Repo): A public repository of microbiome regression and classification tasks. . The use of machine learning in high-dimensional biological applications, such as the human microbiome, has grown exponentially in recent years, but algorithm developers often lack the domain expertise required for interpretation and curation of the heterogeneous microbiome datasets. We present Microbiome Learning Repo (ML Repo, available at https://knights-lab.github.io/MLRepo/), a public, web-based repository of 33 curated classification and regression tasks from 15 published human microbiome datasets. We highlight the use of ML Repo in several use cases to demonstrate its wide application, and we expect it to be an important resource for algorithm developers.",ML Repo,0.96120888,Microbiome Learning Repo,0.96743991,Microbiome Learning Repo,0.96743991,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/1/2019 +23178820,"http://bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp, http://pha-bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp","MicrobPad MD: microbial pathogen diagnostic methods database. Medical pathogens induce infections, illnesses and sometimes serious medical conditions in the infected hosts. Diagnosis of these pathogens is important for proper treatment and investigation of pathogenesis processes. Molecular techniques have been developed for facilitating accurate, sensitive and low-cost diagnosis of these pathogens. Based on these techniques, diagnostic devices have been developed for a number of pathogens. More devices are needed for comprehensive coverage of medical pathogens. To facilitate the development of these devices, a database with integrated information about diagnostic methods, targets, and primers/probes for the known bacterial, fungal and viral pathogens is needed. We developed the microbial pathogen diagnostic methods database MicrobPad MD (http://bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp or http://pha-bidd.nus.edu.sg/group/MicrobPad/MicrobPad.asp) to provide comprehensive information about the molecular diagnostic techniques, targets, primers/probes, detection procedures and conditions, and tested diagnostic accuracies and limit of diagnosis for 314 bacterial, fungal and viral species from 61 genera. While available, additional information such as pathogen strains and hosts, tissue distribution or habitats, cultivation methods, biochemical characteristics, virulence factors, morphology, diseases, symptoms, treatment and prevention methods are provided. Our Database covers 242 gene targets, 700 primers/probes, 340 virulence factors, and 261 diseases. Cross-links to the NCBI genome and SwissProt/UniProt databases are provided.",MicrobPad MD,0.995625615,microbial pathogen diagnostic methods database,0.982669552,MicrobPad MD,0.995625615,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2012 +25392421,http://microkit.biocuckoo.org,"MiCroKiTS 4.0: a database of midbody, centrosome, kinetochore, telomere and spindle. We reported an updated database of MiCroKiTS 4.0 (http://microkit.biocuckoo.org) for proteins temporally and spatially localized in distinct subcellular positions including midbody, centrosome, kinetochore, telomere and mitotic spindle during cell division/mitosis. The database was updated from our previously developed database of MiCroKit 3.0, which contained 1489 proteins mostly forming super-complexes at midbody, centrosome and kinetochore from seven eukaryotes. Since the telomere and spindle apparatus are critical for cell division, the proteins localized at the two positions were also integrated. From the scientific literature, we curated 1872 experimentally identified proteins which at least locate in one of the five positions from eight species. Then the ortholog detection was performed to identify potential MiCroKiTS proteins from 144 eukaryotic organisms, which contains 66, 45 and 33 species of animals, fungi and plants, respectively. In total, 87,983 unique proteins with corresponding localization information were integrated into the database. The primary references of experimentally identified localizations were provided and the fluorescence microscope figures for the localizations of human proteins were shown. The orthologous relations between predicted and experimental localizations were also present. Taken together, we anticipate the database can serve as a useful resource for further analyzing the molecular mechanisms during cell division.",MiCroKiTS,0.991478801,NA,0,MiCroKiTS,0.991478801,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +31667690,http://www.microndata.net,"A simple, web-based repository for the management, access and analysis of micrographic images. Microscopy is advancing at a rapid pace, enabling high-speed, high-resolution analyses to be conducted in a wide range of cellular contexts. For example, the capacity to quickly capture high-resolution images from multiple optical sections over multiple channels with confocal microscopy has allowed researchers to gain deeper understanding of tissue morphology via techniques such as three-dimensional rendering, as have more recent advances such as lattice light sheet microscopy and superresolution structured illumination microscopy. With this, though, comes the challenge of storing, curating, analysing and sharing data. While there are ways in which this has been attempted previously, few approaches have provided a central repository in which all of these different aspects of microscopy can be seamlessly integrated. Here, we describe a web-based storage and analysis platform called Microndata, that enables relatively straightforward storage, annotation, tracking, analysis and multi-user access to micrographs. This easy to use tool will simplify and harmonise laboratory work flows, and, importantly, will provide a central storage repository that is readily accessed, even after the researcher responsible for capturing the images has left the laboratory. Microndata is open-source software, available at http://www.microndata.net/.",Microndata,0.973953187,NA,0,Microndata,0.973953187,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/30/2019 +33418085,"http://www.liwzlab.cn/microphenodb, http://lilab2.sysu.edu.cn/microphenodb","MicroPhenoDB Associates Metagenomic Data with Pathogenic Microbes, Microbial Core Genes, and Human Disease Phenotypes. Microbes play important roles in human health and disease. The interaction between microbes and hosts is a reciprocal relationship, which remains largely under-explored. Current computational resources lack manually and consistently curated data to connect metagenomic data to pathogenic microbes, microbial core genes, and disease phenotypes. We developed the MicroPhenoDB database by manually curating and consistently integrating microbe-disease association data. MicroPhenoDB provides 5677 non-redundant associations between 1781 microbes and 542 human disease phenotypes across more than 22 human body sites. MicroPhenoDB also provides 696,934 relationships between 27,277 unique clade-specific core genes and 685 microbes. Disease phenotypes are classified and described using the Experimental Factor Ontology (EFO). A refined score model was developed to prioritize the associations based on evidential metrics. The sequence search option in MicroPhenoDB enables rapid identification of existing pathogenic microbes in samples without running the usual metagenomic data processing and assembly. MicroPhenoDB offers data browsing, searching, and visualization through user-friendly web interfaces and web service application programming interfaces. MicroPhenoDB is the first database platform to detail the relationships between pathogenic microbes, core genes, and disease phenotypes. It will accelerate metagenomic data analysis and assist studies in decoding microbes related to human diseases. MicroPhenoDB is available through http://www.liwzlab.cn/microphenodb and http://lilab2.sysu.edu.cn/microphenodb.",MicroPhenoDB,0.990126431,NA,0,MicroPhenoDB,0.990126431,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +25425035,http://www4a.biotec.or.th/micropir2,"microPIR2: a comprehensive database for human-mouse comparative study of microRNA-promoter interactions. microRNA (miRNA)-promoter interaction resource (microPIR) is a public database containing over 15 million predicted miRNA target sites located within human promoter sequences. These predicted targets are presented along with their related genomic and experimental data, making the microPIR database the most comprehensive repository of miRNA promoter target sites. Here, we describe major updates of the microPIR database including new target predictions in the mouse genome and revised human target predictions. The updated database (microPIR2) now provides ∼80 million human and 40 million mouse predicted target sites. In addition to being a reference database, microPIR2 is a tool for comparative analysis of target sites on the promoters of human-mouse orthologous genes. In particular, this new feature was designed to identify potential miRNA-promoter interactions conserved between species that could be stronger candidates for further experimental validation. We also incorporated additional supporting information to microPIR2 such as nuclear and cytoplasmic localization of miRNAs and miRNA-disease association. Extra search features were also implemented to enable various investigations of targets of interest. Database URL: http://www4a.biotec.or.th/micropir2",microPIR,0.996132895,promoter interaction resource,0.824124861,microPIR,0.996132895,1,NA,22439011,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,11/25/2014 +22439011,http://www4a.biotec.or.th/micropir,"microPIR: an integrated database of microRNA target sites within human promoter sequences. Background microRNAs are generally understood to regulate gene expression through binding to target sequences within 3'-UTRs of mRNAs. Therefore, computational prediction of target sites is usually restricted to these gene regions. Recent experimental studies though have suggested that microRNAs may alternatively modulate gene expression by interacting with promoters. A database of potential microRNA target sites in promoters would stimulate research in this field leading to more understanding of complex microRNA regulatory mechanism. Methodology We developed a database hosting predicted microRNA target sites located within human promoter sequences and their associated genomic features, called microPIR (microRNA-Promoter Interaction Resource). microRNA seed sequences were used to identify perfect complementary matching sequences in the human promoters and the potential target sites were predicted using the RNAhybrid program. >15 million target sites were identified which are located within 5000 bp upstream of all human genes, on both sense and antisense strands. The experimentally confirmed argonaute (AGO) binding sites and EST expression data including the sequence conservation across vertebrate species of each predicted target are presented for researchers to appraise the quality of predicted target sites. The microPIR database integrates various annotated genomic sequence databases, e.g. repetitive elements, transcription factor binding sites, CpG islands, and SNPs, offering users the facility to extensively explore relationships among target sites and other genomic features. Furthermore, functional information of target genes including gene ontologies, KEGG pathways, and OMIM associations are provided. The built-in genome browser of microPIR provides a comprehensive view of multidimensional genomic data. Finally, microPIR incorporates a PCR primer design module to facilitate experimental validation. Conclusions The proposed microPIR database is a useful integrated resource of microRNA-promoter target interactions for experimental microRNA researchers and computational biologists to study the microRNA regulation through gene promoter. The database can be freely accessed from: http://www4a.biotec.or.th/micropir.",microPIR,0.981774867,microRNA-Promoter Interaction Resource,0.708554874,microPIR,0.981774867,1,NA,25425035,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,3/16/2012 +"23193269, 27899624, 28968784",http://www.genoscope.cns.fr/agc/microscope,"MicroScope--an integrated microbial resource for the curation and comparative analysis of genomic and metabolic data. MicroScope is an integrated platform dedicated to both the methodical updating of microbial genome annotation and to comparative analysis. The resource provides data from completed and ongoing genome projects (automatic and expert annotations), together with data sources from post-genomic experiments (i.e. transcriptomics, mutant collections) allowing users to perfect and improve the understanding of gene functions. MicroScope (http://www.genoscope.cns.fr/agc/microscope) combines tools and graphical interfaces to analyse genomes and to perform the manual curation of gene annotations in a comparative context. Since its first publication in January 2006, the system (previously named MaGe for Magnifying Genomes) has been continuously extended both in terms of data content and analysis tools. The last update of MicroScope was published in 2009 in the Database journal. Today, the resource contains data for >1600 microbial genomes, of which ∼300 are manually curated and maintained by biologists (1200 personal accounts today). Expert annotations are continuously gathered in the MicroScope database (∼50 000 a year), contributing to the improvement of the quality of microbial genomes annotations. Improved data browsing and searching tools have been added, original tools useful in the context of expert annotation have been developed and integrated and the website has been significantly redesigned to be more user-friendly. Furthermore, in the context of the European project Microme (Framework Program 7 Collaborative Project), MicroScope is becoming a resource providing for the curation and analysis of both genomic and metabolic data. An increasing number of projects are related to the study of environmental bacterial (meta)genomes that are able to metabolize a large variety of chemical compounds that may be of high industrial interest.",MicroScope,0.975439906,NA,0,MicroScope,0.975439906,3,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2019 +28365734,http://www.midasfieldguide.org,"MiDAS 2.0: an ecosystem-specific taxonomy and online database for the organisms of wastewater treatment systems expanded for anaerobic digester groups. . Wastewater is increasingly viewed as a resource, with anaerobic digester technology being routinely implemented for biogas production. Characterising the microbial communities involved in wastewater treatment facilities and their anaerobic digesters is considered key to their optimal design and operation. Amplicon sequencing of the 16S rRNA gene allows high-throughput monitoring of these systems. The MiDAS field guide is a public resource providing amplicon sequencing protocols and an ecosystem-specific taxonomic database optimized for use with wastewater treatment facility samples. The curated taxonomy endeavours to provide a genus-level-classification for abundant phylotypes and the online field guide links this identity to published information regarding their ecology, function and distribution. This article describes the expansion of the database resources to cover the organisms of the anaerobic digester systems fed primary sludge and surplus activated sludge. The updated database includes descriptions of the abundant genus-level-taxa in influent wastewater, activated sludge and anaerobic digesters. Abundance information is also included to allow assessment of the role of emigration in the ecology of each phylotype. MiDAS is intended as a collaborative resource for the progression of research into the ecology of wastewater treatment, by providing a public repository for knowledge that is accessible to all interested in these biotechnologically important systems. http://www.midasfieldguide.org.",MiDAS,0.995514631,NA,0,MiDAS,0.995514631,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +34363073,http://mik.bicnirrh.res.in,"Male Infertility Knowledgebase: decoding the genetic and disease landscape. . Male infertility is a multifactorial condition that contributes to around one-third of cases of infertility worldwide. Several chromosomal aberrations, single-gene and polygenic associations with male factor defects have been reported. These defects manifest as sperm number or sperm quality defects leading to infertility. However, in almost 40% of cases, the genetic etiology of male infertility remains unexplained. Understanding the causal genetic factors is crucial for effective patient management and counseling. Integrating the vast amount of available omics data on male infertility is a first step towards understanding, delineating and prioritizing genes associated with the different male reproductive disorders. The Male Infertility Knowledgebase (MIK) is a manually curated repository developed to boost research on the elusive genetic etiology of male infertility. It integrates information on ∼17 000 genes, their associated pathways, gene ontology, diseases and gene and sequence-based analysis tools. In addition, it also incorporates information on reported chromosomal aberrations and syndromic associations with male infertility. Disease enrichment of genes in MIK indicate a shared genetic etiology between cancer, male and female infertility disorders. While the genes involved in cancer pathways were found to be common causal factors for sperm number and sperm quality defects, the interleukin pathways were found to be shared and enriched between male factor defects and non-reproductive conditions like cardiovascular diseases, metabolic diseases, etc. Disease information in MIK can be explored further to identify high-risk conditions associated with male infertility and delineate shared genetic etiology. Utility of the knowledgebase in predicting novel genes is illustrated by identification of 149 novel candidates for cryptorchidism using gene prioritization and network analysis. MIK will serve as a platform for review of genetic information on male infertility, identification pleiotropic genes, prediction of novel candidate genes for the different male infertility diseases and for portending future high-risk diseases associated with male infertility. Database URL: http://mik.bicnirrh.res.in/.",MIK,0.993417561,Male Infertility Knowledgebase,0.925598693,MIK,0.993417561,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2021 +34900127,http://www.sysbio.org.cn/mikb,"MIKB: A manually curated and comprehensive knowledge base for myocardial infarction. Myocardial infarction knowledge base (MIKB; http://www.sysbio.org.cn/mikb/; latest update: December 31, 2020) is an open-access and manually curated database dedicated to integrating knowledge about MI to improve the efficiency of translational MI research. MIKB is an updated and expanded version of our previous MI Risk Knowledge Base (MIRKB), which integrated MI-related risk factors and risk models for providing help in risk assessment or diagnostic prediction of MI. The updated MIRKB includes 9701 records with 2054 single factors, 209 combined factors, 243 risk models, 37 MI subtypes and 3406 interactions between single factors and MIs collected from 4817 research articles. The expanded functional module, i.e. MIGD, is a database including not only MI associated genetic variants, but also the other multi-omics factors and the annotations for their functional alterations. The goal of MIGD is to provide a multi-omics level understanding of the molecular pathogenesis of MI. MIGD includes 1782 omics factors, 28 MI subtypes and 2347 omics factor-MI interactions as well as 1253 genes and 6 chromosomal alterations collected from 2647 research articles. The functions of MI associated genes and their interaction with drugs were analyzed. MIKB will be continuously updated and optimized to provide precision and comprehensive knowledge for the study of heterogeneous and personalized MI.",MIKB,0.997301161,Myocardial infarction knowledge base,0.9902739,MIKB,0.997301161,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/16/2021 +28490127,http://mbpdb.nws.oregonstate.edu,"Milk bioactive peptide database: A comprehensive database of milk protein-derived bioactive peptides and novel visualization. During processing and digestion, milk proteins are disassembled into peptides with an array of biological functions, including antimicrobial, angiotensin-converting enzyme inhibition, antioxidant, opioid, and immunomodulation. These functions are summarized in numerous reviews, yet information on which peptides have which functions remains scattered across hundreds of research articles. We systematically searched the literature for all instances of bioactive peptides derived from milk proteins from any mammalian source. The data were compiled into a comprehensive database, which can be used to search for specific functions, peptides, or proteins (http://mbpdb.nws.oregonstate.edu). To review this large dataset, the bioactive peptides reported in the literature were visually mapped on the parent protein sequences, providing information on sites with highest abundance of bioactive peptides.",NA,0,Milk bioactive peptide database,0.679362587,Milk bioactive peptide database,0.679362587,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/12/2017 +22053087,http://immunet.cn/mimodb,"MimoDB 2.0: a mimotope database and beyond. Mimotopes are peptides with affinities to given targets. They are readily obtained through biopanning against combinatorial peptide libraries constructed by phage display and other display technologies such as mRNA display, ribosome display, bacterial display and yeast display. Mimotopes have been used to infer the protein interaction sites and networks; they are also ideal candidates for developing new diagnostics, therapeutics and vaccines. However, such valuable peptides are not collected in the central data resources such as UniProt and NCBI GenPept due to their 'unnatural' short sequences. The MimoDB database is an information portal to biopanning results of random libraries. In version 2.0, it has 15,633 peptides collected from 849 papers and grouped into 1818 sets. Besides the core data on panning experiments and their results, broad background information on target, template, library and structure is included. An accompanied benchmark has also been compiled for bioinformaticians to develop and evaluate their new models, algorithms and programs. In addition, the MimoDB database provides tools for simple and advanced searches, structure visualization, BLAST and alignment view on the fly. The experimental biologists can easily use the database as a virtual control to exclude possible target-unrelated peptides. The MimoDB database is freely available at http://immunet.cn/mimodb.",MimoDB,0.995547354,NA,0,MimoDB,0.995547354,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/3/2011 +22096233,http://www.minas.uzh.ch,"MINAS--a database of Metal Ions in Nucleic AcidS. Correctly folded into the respective native 3D structure, RNA and DNA are responsible for uncountable key functions in any viable organism. In order to exert their function, metal ion cofactors are closely involved in folding, structure formation and, e.g. in ribozymes, also the catalytic mechanism. The database MINAS, Metal Ions in Nucleic AcidS (http://www.minas.uzh.ch), compiles the detailed information on innersphere, outersphere and larger coordination environment of >70,000 metal ions of 36 elements found in >2000 structures of nucleic acids contained today in the PDB and NDB. MINAS is updated monthly with new structures and offers a multitude of search functions, e.g. the kind of metal ion, metal-ligand distance, innersphere and outersphere ligands defined by element or functional group, residue, experimental method, as well as PDB entry-related information. The results of each search can be saved individually for later use with so-called miniPDB files containing the respective metal ion together with the coordination environment within a 15 Å radius. MINAS thus offers a unique way to explore the coordination geometries and ligands of metal ions together with the respective binding pockets in nucleic acids.",MINAS,0.994471371,Metal Ions in Nucleic AcidS,0.86654482,MINAS,0.994471371,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/16/2011 +22096227,"http://mint.bio.uniroma2.it/mint/, http://mint.bio.uniroma2.it/mint/download.do","MINT, the molecular interaction database: 2012 update. The Molecular INTeraction Database (MINT, http://mint.bio.uniroma2.it/mint/) is a public repository for protein-protein interactions (PPI) reported in peer-reviewed journals. The database grows steadily over the years and at September 2011 contains approximately 235,000 binary interactions captured from over 4750 publications. The web interface allows the users to search, visualize and download interactions data. MINT is one of the members of the International Molecular Exchange consortium (IMEx) and adopts the Molecular Interaction Ontology of the Proteomics Standard Initiative (PSI-MI) standards for curation and data exchange. MINT data are freely accessible and downloadable at http://mint.bio.uniroma2.it/mint/download.do. We report here the growth of the database, the major changes in curation policy and a new algorithm to assign a confidence to each interaction.",MINT,0.995954633,Molecular INTeraction Database,0.968848467,MINT,0.995954633,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/16/2011 +30268942,http://mipanda.org,"MiPanda: A Resource for Analyzing and Visualizing Next-Generation Sequencing Transcriptomics Data. The Michigan Portal for the Analysis of NGS data portal (http://mipanda.org) is an open-access online resource that provides the scientific community with access to the results of a large-scale computational analysis of thousands of high-throughput RNA sequencing (RNA-seq) samples. The portal provides access to gene expression profiles, enabling users to interrogate expression of genes across myriad normal and cancer tissues and cell lines. From these data, tissue- and cancer-specific expression patterns can be identified. Gene-gene coexpression profiles can also be interrogated. The current portal contains data for over 20,000 RNA-seq samples and will be continually updated.",MiPanda,0.688638806,NA,0,MiPanda,0.688638806,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/27/2018 +29109711,http://proteininformatics.org/mkumar/mipepbase,"miPepBase: A Database of Experimentally Verified Peptides Involved in Molecular Mimicry. Autoimmune diseases emerge due to several reasons, of which molecular mimicry i.e., similarity between the host's and pathogen's interacting peptides is an important reason. In the present study we have reported a database of only experimentally verified peptide sequences, which exhibit molecular mimicry. The database is named as miPepBase (Mimicry Peptide Database) and contains comprehensive information about mimicry proteins and peptides of both host (and model organism) and pathogen. It also provides information about physicochemical properties of protein and mimicry peptides, which might be helpful in predicting the nature of protein and optimization of protein expression. The miPepBase can be searched using a keyword or, by autoimmune disease(s) or by a combination of host and pathogen taxonomic group or their name. To facilitate the search of proteins and/or epitope in miPepBase, which is similar to the user's interest, BLAST search tool is also incorporated. miPepBase is an open access database and available at http://proteininformatics.org/mkumar/mipepbase.",miPepBase,0.993406117,Mimicry Peptide Database,0.776584784,miPepBase,0.993406117,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/23/2017 +22080560,http://bioinfo.iitk.ac.in/MIPModDB,"MIPModDB: a central resource for the superfamily of major intrinsic proteins. The channel proteins belonging to the major intrinsic proteins (MIP) superfamily are diverse and are found in all forms of life. Water-transporting aquaporin and glycerol-specific aquaglyceroporin are the prototype members of the MIP superfamily. MIPs have also been shown to transport other neutral molecules and gases across the membrane. They have internal homology and possess conserved sequence motifs. By analyzing a large number of publicly available genome sequences, we have identified more than 1000 MIPs from diverse organisms. We have developed a database MIPModDB which will be a unified resource for all MIPs. For each MIP entry, this database contains information about the source, gene structure, sequence features, substitutions in the conserved NPA motifs, structural model, the residues forming the selectivity filter and channel radius profile. For selected set of MIPs, it is possible to derive structure-based sequence alignment and evolutionary relationship. Sequences and structures of selected MIPs can be downloaded from MIPModDB database which is freely available at http://bioinfo.iitk.ac.in/MIPModDB.",MIPModDB,0.992227316,NA,0,MIPModDB,0.992227316,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2011 +23044546,http://microrna.osumc.edu/mireditar,"miR-EdiTar: a database of predicted A-to-I edited miRNA target sites. Motivation A-to-I RNA editing is an important mechanism that consists of the conversion of specific adenosines into inosines in RNA molecules. Its dysregulation has been associated to several human diseases including cancer. Recent work has demonstrated a role for A-to-I editing in microRNA (miRNA)-mediated gene expression regulation. In fact, edited forms of mature miRNAs can target sets of genes that differ from the targets of their unedited forms. The specific deamination of mRNAs can generate novel binding sites in addition to potentially altering existing ones. Results This work presents miR-EdiTar, a database of predicted A-to-I edited miRNA binding sites. The database contains predicted miRNA binding sites that could be affected by A-to-I editing and sites that could become miRNA binding sites as a result of A-to-I editing. Availability miR-EdiTar is freely available online at http://microrna.osumc.edu/mireditar. Contact alessandro.lagana@osumc.edu or carlo.croce@osumc.edu Supplementary information Supplementary data are available at Bioinformatics online.",miR-EdiTar,0.902254691,NA,0,miR-EdiTar,0.902254691,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/7/2012 +30649247,http://www.insect-genome.com/miR-pathway,"miR+Pathway: the integration and visualization of miRNA and KEGG pathways. miRNAs represent a type of noncoding small molecule RNA. Many studies have shown that miRNAs are widely involved in the regulation of various pathways. The key to fully understanding the regulatory function of miRNAs is the determination of the pathways in which the miRNAs participate. However, the major pathway databases such as KEGG only include information regarding protein-coding genes. Here, we redesigned a pathway database (called miR+Pathway) by integrating and visualizing the 8882 human experimentally validated miRNA-target interactions (MTIs) and 150 KEGG pathways. This database is freely accessible at http://www.insect-genome.com/miR-pathway. Researchers can intuitively determine the pathways and the genes in the pathways that are regulated by miRNAs as well as the miRNAs that target the pathways. To determine the pathways in which targets of a certain miRNA or multiple miRNAs are enriched, we performed a KEGG analysis miRNAs by using the hypergeometric test. In addition, miR+Pathway provides information regarding MTIs, PubMed IDs and the experimental verification method. Users can retrieve pathways regulated by an miRNA or a gene by inputting its names.",miR+Pathway,0.936659026,NA,0,miR+Pathway,0.936659026,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/1/2020 +32436932,http://ccsm.uth.edu/miRactDB,"miRactDB characterizes miRNA-gene relation switch between normal and cancer tissues across pan-cancer. . It has been increasingly accepted that microRNA (miRNA) can both activate and suppress gene expression, directly or indirectly, under particular circumstances. Yet, a systematic study on the switch in their interaction pattern between activation and suppression and between normal and cancer conditions based on multi-omics evidences is not available. We built miRactDB, a database for miRNA-gene interaction, at https://ccsm.uth.edu/miRactDB, to provide a versatile resource and platform for annotation and interpretation of miRNA-gene relations. We conducted a comprehensive investigation on miRNA-gene interactions and their biological implications across tissue types in both tumour and normal conditions, based on TCGA, CCLE and GTEx databases. We particularly explored the genetic and epigenetic mechanisms potentially contributing to the positive correlation, including identification of miRNA binding sites in the gene coding sequence (CDS) and promoter regions of partner genes. Integrative analysis based on this resource revealed that top-ranked genes derived from TCGA tumour and adjacent normal samples share an overwhelming part of biological processes, which are quite different than those from CCLE and GTEx. The most active miRNAs predicted to target CDS and promoter regions are largely overlapped. These findings corroborate that adjacent normal tissues might have undergone significant molecular transformations towards oncogenesis before phenotypic and histological change; and there probably exists a small yet critical set of miRNAs that profoundly influence various cancer hallmark processes. miRactDB provides a unique resource for the cancer and genomics communities to screen, prioritize and rationalize their candidates of miRNA-gene interactions, in both normal and cancer scenarios.",miRactDB,0.998288214,NA,0,miRactDB,0.998288214,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +28049134,http://mirandb.ir,"miRandb: a resource of online services for miRNA research. Recent discovery of thousands of small and large noncoding RNAs, in parallel to technical improvements enabling scientists to study the transcriptome in much higher depth, has resulted in massive data generation. This burst of information prompts the development of easily accessible resources for storage, retrieval and analysis of raw and processed data, and hundreds of Web-based tools dedicated to these tasks have been made available. However, the increasing number and diversity of bioinformatics tools, each covering a specific and specialized area, as well as their redundancies, represent potential sources of complication for end users. To overcome these issues, we are introducing an easy-to-follow classification of microRNA (miRNA)-related bioinformatics tools for biologists interested in studying this important class of small noncoding RNAs. We also developed our miRNA database miRNA algorithmic network database (miRandb) that is a meta-database, which presents a survey of > 180 Web-based miRNA databases. These include miRNA sequence, discovery, target prediction, target validation, expression and regulation, functions and their roles in diseases, interactions in cellular pathways and networks and deep sequencing. miRandb recapitulates the diverse possibilities and facilitates that access to the different categories of miRNA resources. Researchers can easily select the category of miRNA information and desired organism, in result eligible databases with their features are presented. This database introducing an easy-to-follow classification of available resources that can facilitate selection of appropriate resources for miRNA-related bioinformatics tools. Finally, we described current shortages and future necessities that assist researchers to use these tools easily. Our database is accessible at http://mirandb.ir.",miRandb,0.993180573,miRNA algorithmic network database,0.859572917,miRandb,0.993180573,1,NA,30963485,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,3/1/2018 +30963485,http://miRandb.ir,"miRandb: A Metadatabase of Online Resources of miRNA and miRNA Targets. MicroRNA (miRNA) studies deliver numerous types of information including miRNA identification, sequence of miRNAs, target prediction, roles in diseases, and interactions in signaling pathways. Considering the different types of miRNA data, the number of miRNA databases has been increasing quickly. While resources have been planned to simplify miRNA analysis, scientists are facing the challenging task of choosing the most proper tool to retrieve related information. In this chapter, we introduce the use of miRandb, a resource that we have established to present an outline of different types of miRNA online resources and to simplify finding the right miRNA information that scientists need for their research. miRandb offers a user-friendly platform to find related information about any miRNA data among more than 188 present miRNA databases. miRandb has an easy procedure, and information can be retrieved by miRNA category resources. Each database comprises numerous kinds of information including database activity, description, main and unique features, organism, URL, publication, category, published year, citations per year, last update, and relative popularity. miRandb provides several opportunities and facilitates access to diverse classes of microRNA resources. miRandb is available at http://miRandb.ir .",miRandb,0.989271283,NA,0,miRandb,0.989271283,1,NA,28049134,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +29036351,http://mirandola.iit.cnr.it,"miRandola 2017: a curated knowledge base of non-invasive biomarkers. miRandola (http://mirandola.iit.cnr.it/) is a database of extracellular non-coding RNAs (ncRNAs) that was initially published in 2012, foreseeing the relevance of ncRNAs as non-invasive biomarkers. An increasing amount of experimental evidence shows that ncRNAs are frequently dysregulated in diseases. Further, ncRNAs have been discovered in different extracellular forms, such as exosomes, which circulate in human body fluids. Thus, miRandola 2017 is an effort to update and collect the accumulating information on extracellular ncRNAs that is spread across scientific publications and different databases. Data are manually curated from 314 articles that describe miRNAs, long non-coding RNAs and circular RNAs. Fourteen organisms are now included in the database, and associations of ncRNAs with 25 drugs, 47 sample types and 197 diseases. miRandola also classifies extracellular RNAs based on their extracellular form: Argonaute2 protein, exosome, microvesicle, microparticle, membrane vesicle, high density lipoprotein and circulating. We also implemented a new web interface to improve the user experience.",miRandola,0.992442071,NA,0,miRandola,0.992442071,1,NA,23094086,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +23094086,http://atlas.dmi.unict.it/mirandola/index.html,"miRandola: extracellular circulating microRNAs database. MicroRNAs are small noncoding RNAs that play an important role in the regulation of various biological processes through their interaction with cellular messenger RNAs. They are frequently dysregulated in cancer and have shown great potential as tissue-based markers for cancer classification and prognostication. microRNAs are also present in extracellular human body fluids such as serum, plasma, saliva, and urine. Most of circulating microRNAs are present in human plasma and serum cofractionate with the Argonaute2 (Ago2) protein. However, circulating microRNAs have been also found in membrane-bound vesicles such as exosomes. Since microRNAs circulate in the bloodstream in a highly stable, extracellular form, they may be used as blood-based biomarkers for cancer and other diseases. A knowledge base of extracellular circulating miRNAs is a fundamental tool for biomedical research. In this work, we present miRandola, a comprehensive manually curated classification of extracellular circulating miRNAs. miRandola is connected to miRò, the miRNA knowledge base, allowing users to infer the potential biological functions of circulating miRNAs and their connections with phenotypes. The miRandola database contains 2132 entries, with 581 unique mature miRNAs and 21 types of samples. miRNAs are classified into four categories, based on their extracellular form: miRNA-Ago2 (173 entries), miRNA-exosome (856 entries), miRNA-HDL (20 entries) and miRNA-circulating (1083 entries). miRandola is available online at: http://atlas.dmi.unict.it/mirandola/index.html.",miRandola,0.990178049,extracellular circulating microRNAs,0.69080558,miRandola,0.990178049,1,NA,29036351,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/19/2012 +30423142,http://mirbase.org,"miRBase: from microRNA sequences to function. miRBase catalogs, names and distributes microRNA gene sequences. The latest release of miRBase (v22) contains microRNA sequences from 271 organisms: 38 589 hairpin precursors and 48 860 mature microRNAs. We describe improvements to the database and website to provide more information about the quality of microRNA gene annotations, and the cellular functions of their products. We have collected 1493 small RNA deep sequencing datasets and mapped a total of 5.5 billion reads to microRNA sequences. The read mapping patterns provide strong support for the validity of between 20% and 65% of microRNA annotations in different well-studied animal genomes, and evidence for the removal of >200 sequences from the database. To improve the availability of microRNA functional information, we are disseminating Gene Ontology terms annotated against miRBase sequences. We have also used a text-mining approach to search for microRNA gene names in the full-text of open access articles. Over 500 000 sentences from 18 542 papers contain microRNA names. We score these sentences for functional information and link them with 12 519 microRNA entries. The sentences themselves, and word clouds built from them, provide effective summaries of the functional information about specific microRNAs. miRBase is publicly and freely available at http://mirbase.org/.",miRBase,0.998149395,NA,0,miRBase,0.998149395,1,NA,24275495,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +24275495,http://www.mirbase.org,"miRBase: annotating high confidence microRNAs using deep sequencing data. We describe an update of the miRBase database (http://www.mirbase.org/), the primary microRNA sequence repository. The latest miRBase release (v20, June 2013) contains 24 521 microRNA loci from 206 species, processed to produce 30 424 mature microRNA products. The rate of deposition of novel microRNAs and the number of researchers involved in their discovery continue to increase, driven largely by small RNA deep sequencing experiments. In the face of these increases, and a range of microRNA annotation methods and criteria, maintaining the quality of the microRNA sequence data set is a significant challenge. Here, we describe recent developments of the miRBase database to address this issue. In particular, we describe the collation and use of deep sequencing data sets to assign levels of confidence to miRBase entries. We now provide a high confidence subset of miRBase entries, based on the pattern of mapped reads. The high confidence microRNA data set is available alongside the complete microRNA collection at http://www.mirbase.org/. We also describe embedding microRNA-specific Wikipedia pages on the miRBase website to encourage the microRNA community to contribute and share textual and functional information.",miRBase,0.997651339,NA,0,miRBase,0.997651339,1,NA,30423142,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/25/2013 +23325619,http://mircancer.ecu.edu,"miRCancer: a microRNA-cancer association database constructed by text mining on literature. Motivation Research interests in microRNAs have increased rapidly in the past decade. Many studies have showed that microRNAs have close relationships with various human cancers, and they potentially could be used as cancer indicators in diagnosis or as a suppressor for treatment purposes. There are several databases that contain microRNA-cancer associations predicted by computational methods but few from empirical results. Despite the fact that abundant experiments investigating microRNA expressions in cancer cells have been carried out, the results have remain scattered in the literature. We propose to extract microRNA-cancer associations by text mining and store them in a database called miRCancer. Results The text mining is based on 75 rules we have constructed, which represent the common sentence structures typically used to state microRNA expressions in cancers. The microRNA-cancer association database, miRCancer, is updated regularly by running the text mining algorithm against PubMed. All miRNA-cancer associations are confirmed manually after automatic extraction. miRCancer currently documents 878 relationships between 236 microRNAs and 79 human cancers through the processing of >26 000 published articles. Availability miRCancer is freely available on the web at http://mircancer.ecu.edu/",miRCancer,0.985522926,NA,0,miRCancer,0.985522926,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/16/2013 +29036653,http://mircarta.cs.uni-saarland.de,"miRCarta: a central repository for collecting miRNA candidates. The continuous increase of available biological data as consequence of modern high-throughput technologies poses new challenges for analysis techniques and database applications. Especially for miRNAs, one class of small non-coding RNAs, many algorithms have been developed to predict new candidates from next-generation sequencing data. While the amount of publications describing novel miRNA candidates keeps steadily increasing, the current gold standard database for miRNAs - miRBase - has not been updated since June 2014. As a result, publications describing new miRNA candidates in the last three to five years might have a substantial overlap of candidates without noticing. With miRCarta we implemented a database to collect novel miRNA candidates and augment the information provided by miRBase. In the first stage, miRCarta is thought to be a highly sensitive collection of potential miRNA candidates with a high degree of analysis functionality, annotations and details on each miRNA. We added-besides the full content of the miRBase-12,857 human miRNA precursors to miRCarta. Users can match their own predictions to the entries of miRCarta to reduce potential redundancies in their studies. miRCarta provides the most comprehensive collection of human miRNAs and miRNA candidates to form a basis for further refinement and validation studies. The database is freely accessible at https://mircarta.cs.uni-saarland.de/.",miRCarta,0.995174766,NA,0,miRCarta,0.995174766,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +"25378301, 31504780",http://mirdb.org,"miRDB: an online resource for microRNA target prediction and functional annotations. MicroRNAs (miRNAs) are small non-coding RNAs that are extensively involved in many physiological and disease processes. One major challenge in miRNA studies is the identification of genes regulated by miRNAs. To this end, we have developed an online resource, miRDB (http://mirdb.org), for miRNA target prediction and functional annotations. Here, we describe recently updated features of miRDB, including 2.1 million predicted gene targets regulated by 6709 miRNAs. In addition to presenting precompiled prediction data, a new feature is the web server interface that allows submission of user-provided sequences for miRNA target prediction. In this way, users have the flexibility to study any custom miRNAs or target genes of interest. Another major update of miRDB is related to functional miRNA annotations. Although thousands of miRNAs have been identified, many of the reported miRNAs are not likely to play active functional roles or may even have been falsely identified as miRNAs from high-throughput studies. To address this issue, we have performed combined computational analyses and literature mining, and identified 568 and 452 functional miRNAs in humans and mice, respectively. These miRNAs, as well as associated functional annotations, are presented in the FuncMir Collection in miRDB.",miRDB,0.996351361,NA,0,miRDB,0.996351361,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +29194489,http://ophid.utoronto.ca/mirDIP,"mirDIP 4.1-integrative database of human microRNA target predictions. MicroRNAs are important regulators of gene expression, achieved by binding to the gene to be regulated. Even with modern high-throughput technologies, it is laborious and expensive to detect all possible microRNA targets. For this reason, several computational microRNA-target prediction tools have been developed, each with its own strengths and limitations. Integration of different tools has been a successful approach to minimize the shortcomings of individual databases. Here, we present mirDIP v4.1, providing nearly 152 million human microRNA-target predictions, which were collected across 30 different resources. We also introduce an integrative score, which was statistically inferred from the obtained predictions, and was assigned to each unique microRNA-target interaction to provide a unified measure of confidence. We demonstrate that integrating predictions across multiple resources does not cumulate prediction bias toward biological processes or pathways. mirDIP v4.1 is freely available at http://ophid.utoronto.ca/mirDIP/.",mirDIP,0.971328497,NA,0,mirDIP,0.971328497,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +27799474,http://www.wzgenomics.cn/mirdnmr,"mirDNMR: a gene-centered database of background de novo mutation rates in human. De novo germline mutations (DNMs) are the rarest genetic variants proven to cause a considerable number of sporadic genetic diseases, such as autism spectrum disorders, epileptic encephalopathy, schizophrenia, congenital heart disease, type 1 diabetes, and hearing loss. However, it is difficult to accurately assess the cause of DNMs and identify disease-causing genes from the considerable number of DNMs in probands. A common method to this problem is to identify genes that harbor significantly more DNMs than expected by chance, with accurate background DNM rate (DNMR) required. Therefore, in this study, we developed a novel database named mirDNMR for the collection of gene-centered background DNMRs obtained from different methods and population variation data. The database has the following functions: (i) browse and search the background DNMRs of each gene predicted by four different methods, including GC content (DNMR-GC), sequence context (DNMR-SC), multiple factors (DNMR-MF) and local DNA methylation level (DNMR-DM); (ii) search variant frequencies in publicly available databases, including ExAC, ESP6500, UK10K, 1000G and dbSNP and (iii) investigate the DNM burden to prioritize candidate genes based on the four background DNMRs using three statistical methods (TADA, Binomial and Poisson test). As a case study, we successfully employed our database in candidate gene prioritization for a sporadic complex disease: intellectual disability. In conclusion, mirDNMR (https://www.wzgenomics.cn/mirdnmr/) can be widely used to identify the genetic basis of sporadic genetic diseases.",mirDNMR,0.996765435,NA,0,mirDNMR,0.996765435,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/30/2016 +31404401,http://mirdrn.ncu.edu.tw/mirdrn,"miRDRN-miRNA disease regulatory network: a tool for exploring disease and tissue-specific microRNA regulatory networks. Background MicroRNA (miRNA) regulates cellular processes by acting on specific target genes, and cellular processes proceed through multiple interactions often organized into pathways among genes and gene products. Hundreds of miRNAs and their target genes have been identified, as are many miRNA-disease associations. These, together with huge amounts of data on gene annotation, biological pathways, and protein-protein interactions are available in public databases. Here, using such data we built a database and web service platform, miRNA disease regulatory network (miRDRN), for users to construct disease and tissue-specific miRNA-protein regulatory networks, with which they may explore disease related molecular and pathway associations, or find new ones, and possibly discover new modes of drug action. Methods Data on disease-miRNA association, miRNA-target association and validation, gene-tissue association, gene-tumor association, biological pathways, human protein interaction, gene ID, gene ontology, gene annotation, and product were collected from publicly available databases and integrated. A large set of miRNA target-specific regulatory sub-pathways (RSPs) having the form (T, G 1, G 2) was built from the integrated data and stored, where T is a miRNA-associated target gene, G 1 (G 2) is a gene/protein interacting with T (G 1). Each sequence (T, G 1, G 2) was assigned a p-value weighted by the participation of the three genes in molecular interactions and reaction pathways. Results A web service platform, miRDRN (http://mirdrn.ncu.edu.tw/mirdrn/), was built. The database part of miRDRN currently stores 6,973,875 p-valued RSPs associated with 116 diseases in 78 tissue types built from 207 diseases-associated miRNA regulating 389 genes. miRDRN also provides facilities for the user to construct disease and tissue-specific miRNA regulatory networks from RSPs it stores, and to download and/or visualize parts or all of the product. User may use miRDRN to explore a single disease, or a disease-pair to gain insights on comorbidity. As demonstrations, miRDRN was applied: to explore the single disease colorectal cancer (CRC), in which 26 novel potential CRC target genes were identified; to study the comorbidity of the disease-pair Alzheimer's disease-Type 2 diabetes, in which 18 novel potential comorbid genes were identified; and, to explore possible causes that may shed light on recent failures of late-phase trials of anti-AD, BACE1 inhibitor drugs, in which genes downstream to BACE1 whose suppression may affect signal transduction were identified.",miRDRN,0.996445715,miRNA,0.549448252,miRDRN,0.996445715,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/6/2019 +22276777,http://mirdsnp.ccr.buffalo.edu,"miRdSNP: a database of disease-associated SNPs and microRNA target sites on 3'UTRs of human genes. Background Single nucleotide polymorphisms (SNPs) can lead to the susceptibility and onset of diseases through their effects on gene expression at the posttranscriptional level. Recent findings indicate that SNPs could create, destroy, or modify the efficiency of miRNA binding to the 3'UTR of a gene, resulting in gene dysregulation. With the rapidly growing number of published disease-associated SNPs (dSNPs), there is a strong need for resources specifically recording dSNPs on the 3'UTRs and their nucleotide distance from miRNA target sites. We present here miRdSNP, a database incorporating three important areas of dSNPs, miRNA target sites, and diseases. Description miRdSNP provides a unique database of dSNPs on the 3'UTRs of human genes manually curated from PubMed. The current release includes 786 dSNP-disease associations for 630 unique dSNPs and 204 disease types. miRdSNP annotates genes with experimentally confirmed targeting by miRNAs and indexes miRNA target sites predicted by TargetScan and PicTar as well as potential miRNA target sites newly generated by dSNPs. A robust web interface and search tools are provided for studying the proximity of miRNA binding sites to dSNPs in relation to human diseases. Searches can be dynamically filtered by gene name, miRBase ID, target prediction algorithm, disease, and any nucleotide distance between dSNPs and miRNA target sites. Results can be viewed at the sequence level showing the annotated locations for miRNA target sites and dSNPs on the entire 3'UTR sequences. The integration of dSNPs with the UCSC Genome browser is also supported. Conclusion miRdSNP provides a comprehensive data source of dSNPs and robust tools for exploring their distance from miRNA target sites on the 3'UTRs of human genes. miRdSNP enables researchers to further explore the molecular mechanism of gene dysregulation for dSNPs at posttranscriptional level. miRdSNP is freely available on the web at http://mirdsnp.ccr.buffalo.edu.",miRdSNP,0.998023212,NA,0,miRdSNP,0.998023212,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/25/2012 +25889518,http://www.mirecdb.org,"miREC: a database of miRNAs involved in the development of endometrial cancer. Background Endometrial cancer (EC) is the most frequently diagnosed gynecological malignancy and the fourth most common cancer diagnosis overall among women. As with many other forms of cancer, it has been shown that certain miRNAs are differentially expressed in EC and these miRNAs are believed to play important roles as regulators of processes involved in the development of the disease. With the rapidly growing number of studies of miRNA expression in EC, there is a need to organize the data, combine the findings from experimental studies of EC with information from various miRNA databases, and make the integrated information easily accessible for the EC research community. Findings The miREC database is an organized collection of data and information about miRNAs shown to be differentially expressed in EC. The database can be used to map connections between miRNAs and their target genes in order to identify specific miRNAs that are potentially important for the development of EC. The aim of the miREC database is to integrate all available information about miRNAs and target genes involved in the development of endometrial cancer, and to provide a comprehensive, up-to-date, and easily accessible source of knowledge regarding the role of miRNAs in the development of EC. Database URL: http://www.mirecdb.org . Conclusions Several databases have been published that store information about all miRNA targets that have been predicted or experimentally verified to date. It would be a time-consuming task to navigate between these different data sources and literature to gather information about a specific disease, such as endometrial cancer. The miREC database is a specialized data repository that, in addition to miRNA target information, keeps track of the differential expression of genes and miRNAs potentially involved in endometrial cancer development. By providing flexible search functions it becomes easy to search for EC-associated genes and miRNAs from different starting points, such as differential expression and genomic loci (based on genomic aberrations).",miREC,0.907016993,NA,0,miREC,0.907016993,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/28/2015 +34349127,http://ncrnaome.osumc.edu/miredibase,"MiREDiBase, a manually curated database of validated and putative editing events in microRNAs. MicroRNAs (miRNAs) are regulatory small non-coding RNAs that function as translational repressors. MiRNAs are involved in most cellular processes, and their expression and function are presided by several factors. Amongst, miRNA editing is an epitranscriptional modification that alters the original nucleotide sequence of selected miRNAs, possibly influencing their biogenesis and target-binding ability. A-to-I and C-to-U RNA editing are recognized as the canonical types, with the A-to-I type being the predominant one. Albeit some bioinformatics resources have been implemented to collect RNA editing data, it still lacks a comprehensive resource explicitly dedicated to miRNA editing. Here, we present MiREDiBase, a manually curated catalog of editing events in miRNAs. The current version includes 3,059 unique validated and putative editing sites from 626 pre-miRNAs in humans and three primates. Editing events in mature human miRNAs are supplied with miRNA-target predictions and enrichment analysis, while minimum free energy structures are inferred for edited pre-miRNAs. MiREDiBase represents a valuable tool for cell biology and biomedical research and will be continuously updated and expanded at https://ncrnaome.osumc.edu/miredibase .",MiREDiBase,0.997606039,NA,0,MiREDiBase,0.997606039,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/4/2021 +26243198,http://bnet.egr.vcu.edu/miRegulome,"miRegulome: a knowledge-base of miRNA regulomics and analysis. Unlabelled miRNAs regulate post transcriptional gene expression by targeting multiple mRNAs and hence can modulate multiple signalling pathways, biological processes, and patho-physiologies. Therefore, understanding of miRNA regulatory networks is essential in order to modulate the functions of a miRNA. The focus of several existing databases is to provide information on specific aspects of miRNA regulation. However, an integrated resource on the miRNA regulome is currently not available to facilitate the exploration and understanding of miRNA regulomics. miRegulome attempts to bridge this gap. The current version of miRegulome v1.0 provides details on the entire regulatory modules of miRNAs altered in response to chemical treatments and transcription factors, based on validated data manually curated from published literature. Modules of miRegulome (upstream regulators, downstream targets, miRNA regulated pathways, functions, diseases, etc) are hyperlinked to an appropriate external resource and are displayed visually to provide a comprehensive understanding. Four analysis tools are incorporated to identify relationships among different modules based on user specified datasets. miRegulome and its tools are helpful in understanding the biology of miRNAs and will also facilitate the discovery of biomarkers and therapeutics. With added features in upcoming releases, miRegulome will be an essential resource to the scientific community. Availability http://bnet.egr.vcu.edu/miRegulome.",miRegulome,0.996385574,NA,0,miRegulome,0.996385574,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/5/2015 +21984757,http://cmbi.bjmu.edu.cn/miren,"miREnvironment database: providing a bridge for microRNAs, environmental factors and phenotypes. Unlabelled The interaction between genetic factors and environmental factors has critical roles in determining the phenotype of an organism. In recent years, a number of studies have reported that the dysfunctions on microRNA (miRNAs), environmental factors and their interactions have strong effects on phenotypes and even may result in abnormal phenotypes and diseases, whereas there has been no a database linking miRNAs, environmental factors and phenotypes. Such a resource platform is believed to be of great value in the understanding of miRNAs, environmental factors, especially drugs and diseases. In this study, we constructed the miREnvironment database, which contains a comprehensive collection and curation of experimentally supported interactions among miRNAs, environmental factors and phenotypes. The names of miRNAs, phenotypes, environmental factors, conditions of environmental factors, samples, species, evidence and references were further annotated. miREnvironment represents a biomedical resource for researches on miRNAs, environmental factors and diseases. Availability http://cmbi.bjmu.edu.cn/miren. Contact cuiqinghua@hsc.pku.edu.cn.",miREnvironment,0.989526451,NA,0,miREnvironment,0.989526451,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/7/2011 +34052284,http://www.physio.wzw.tum.de/mirev,"miREV: An Online Database and Tool to Uncover Potential Reference RNAs and Biomarkers in Small-RNA Sequencing Data Sets from Extracellular Vesicles Enriched Samples. Extracellular vesicles (EVs) are nano-sized, membrane-enclosed vesicles released by cells for intercellular communication. EVs are involved in pathological processes and miRNAs in EVs have gained interest as easily accessible biomolecules in liquid biopsies for diagnostic purposes. To validate potential miRNA biomarker, transcriptome analyses must be carried out to detect suitable reference miRNAs. miREV is a database with over 400 miRNA sequencing data sets and helps the researcher to find suitable reference miRNAs for their individual experimental setup. The researcher can put together a specific sample set in miREV, which is similar to his own experimental concept in order to find the most suitable references. This allows to run validation experiments without having to carry out a complex and costly transcriptome analysis priorly. Additional read count tables of each generated sample set are downloadable for further analysis. miREV is freely available at https://www.physio.wzw.tum.de/mirev/.",miREV,0.99435091,NA,0,miREV,0.99435091,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/28/2021 +22013167,http://bioinfo.amu.edu.pl/mirex,"mirEX: a platform for comparative exploration of plant pri-miRNA expression data. mirEX is a comprehensive platform for comparative analysis of primary microRNA expression data. RT-qPCR-based gene expression profiles are stored in a universal and expandable database scheme and wrapped by an intuitive user-friendly interface. A new way of accessing gene expression data in mirEX includes a simple mouse operated querying system and dynamic graphs for data mining analyses. In contrast to other publicly available databases, the mirEX interface allows a simultaneous comparison of expression levels between various microRNA genes in diverse organs and developmental stages. Currently, mirEX integrates information about the expression profile of 190 Arabidopsis thaliana pri-miRNAs in seven different developmental stages: seeds, seedlings and various organs of mature plants. Additionally, by providing RNA structural models, publicly available deep sequencing results, experimental procedure details and careful selection of auxiliary data in the form of web links, mirEX can function as a one-stop solution for Arabidopsis microRNA information. A web-based mirEX interface can be accessed at http://bioinfo.amu.edu.pl/mirex.",mirEX,0.982228279,NA,0,mirEX,0.982228279,1,NA,26141515,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,10/19/2011 +26141515,http://www.combio.pl/mirex,"mirEX 2.0 - an integrated environment for expression profiling of plant microRNAs. Background MicroRNAs are the key post-transcriptional regulators of gene expression in development and stress responses. Thus, precisely quantifying the level of each particular microRNA is of utmost importance when studying the biology of any organism. Description The mirEX 2.0 web portal ( http://www.combio.pl/mirex ) provides a comprehensive platform for the exploration of microRNA expression data based on quantitative Real Time PCR and NGS sequencing experiments, covering various developmental stages, from wild-type to mutant plants. The portal includes mature and pri-miRNA expression levels detected in three plant species (Arabidopsis thaliana, Hordeum vulgare and Pellia endiviifolia), and in A. thaliana miRNA biogenesis pathway mutants. In total, the database contains information about the expression of 461 miRNAs representing 268 families. The data can be explored through the use of advanced web tools, including (i) a graphical query builder system allowing a combination of any given species, developmental stages and tissues, (ii) a modular presentation of the results in the form of thematic windows, and (iii) a number of user-friendly utilities such as a community-building discussion system and extensive tutorial documentation (e.g., tooltips, exemplary videos and presentations). All data contained within the mirEX 2.0 database can be downloaded for use in further applications in a context-based way from the result windows or from a dedicated web page. Conclusions The mirEX 2.0 portal provides the plant research community with easily accessible data and powerful tools for application in multi-conditioned analyses of miRNA expression from important plant species in different biological and developmental backgrounds.",mirEX,0.959979296,NA,0,mirEX,0.959979296,1,NA,22013167,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,6/16/2015 +"25858286, 28439836","http://mirgate.bioinfo.cnio.es, http://mirgate.bioinfo.cnio.es/API","miRGate: a curated database of human, mouse and rat miRNA-mRNA targets. MicroRNAs (miRNAs) are small non-coding elements involved in the post-transcriptional down-regulation of gene expression through base pairing with messenger RNAs (mRNAs). Through this mechanism, several miRNA-mRNA pairs have been described as critical in the regulation of multiple cellular processes, including early embryonic development and pathological conditions. Many of these pairs (such as miR-15 b/BCL2 in apoptosis or BART-6/BCL6 in diffuse large B-cell lymphomas) were experimentally discovered and/or computationally predicted. Available tools for target prediction are usually based on sequence matching, thermodynamics and conservation, among other approaches. Nevertheless, the main issue on miRNA-mRNA pair prediction is the little overlapping results among different prediction methods, or even with experimentally validated pairs lists, despite the fact that all rely on similar principles. To circumvent this problem, we have developed miRGate, a database containing novel computational predicted miRNA-mRNA pairs that are calculated using well-established algorithms. In addition, it includes an updated and complete dataset of sequences for both miRNA and mRNAs 3'-Untranslated region from human (including human viruses), mouse and rat, as well as experimentally validated data from four well-known databases. The underlying methodology of miRGate has been successfully applied to independent datasets providing predictions that were convincingly validated by functional assays. miRGate is an open resource available at http://mirgate.bioinfo.cnio.es. For programmatic access, we have provided a representational state transfer web service application programming interface that allows accessing the database at http://mirgate.bioinfo.cnio.es/API/ Database URL: http://mirgate.bioinfo.cnio.es",miRGate,0.99535054,NA,0,miRGate,0.99535054,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +31598695,http://mirgenedb.org,"MirGeneDB 2.0: the metazoan microRNA complement. Small non-coding RNAs have gained substantial attention due to their roles in animal development and human disorders. Among them, microRNAs are special because individual gene sequences are conserved across the animal kingdom. In addition, unique and mechanistically well understood features can clearly distinguish bona fide miRNAs from the myriad other small RNAs generated by cells. However, making this distinction is not a common practice and, thus, not surprisingly, the heterogeneous quality of available miRNA complements has become a major concern in microRNA research. We addressed this by extensively expanding our curated microRNA gene database - MirGeneDB - to 45 organisms, encompassing a wide phylogenetic swath of animal evolution. By consistently annotating and naming 10,899 microRNA genes in these organisms, we show that previous microRNA annotations contained not only many false positives, but surprisingly lacked >2000 bona fide microRNAs. Indeed, curated microRNA complements of closely related organisms are very similar and can be used to reconstruct ancestral miRNA repertoires. MirGeneDB represents a robust platform for microRNA-based research, providing deeper and more significant insights into the biology and evolution of miRNAs as well as biomedical and biomarker research. MirGeneDB is publicly and freely available at http://mirgenedb.org/.",MirGeneDB,0.99370259,NA,0,MirGeneDB,0.99370259,1,26473382,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,NA,1/1/2020 +29220447,http://www.miriad-database.org,"MiRIAD update: using alternative polyadenylation, protein interaction network analysis and additional species to enhance exploration of the role of intragenic miRNAs and their host genes. . http://www.miriad-database.org.",MiRIAD,0.840786457,NA,0,MiRIAD,0.840786457,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2017 +28108447,http://guanlab.ccmb.med.umich.edu/mirmine,"miRmine: a database of human miRNA expression profiles. Motivation MicroRNAs (miRNAs) are small non-coding RNAs that are involved in post-transcriptional regulation of gene expression. In this high-throughput sequencing era, a tremendous amount of RNA-seq data is accumulating, and full utilization of publicly available miRNA data is an important challenge. These data are useful to determine expression values for each miRNA, but quantification pipelines are in a primitive stage and still evolving; there are many factors that affect expression values significantly. Results We used 304 high-quality microRNA sequencing (miRNA-seq) datasets from NCBI-SRA and calculated expression profiles for different tissues and cell-lines. In each miRNA-seq dataset, we found an average of more than 500 miRNAs with higher than 5x coverage, and we explored the top five highly expressed miRNAs in each tissue and cell-line. This user-friendly miRmine database has options to retrieve expression profiles of single or multiple miRNAs for a specific tissue or cell-line, either normal or with disease information. Results can be displayed in multiple interactive, graphical and downloadable formats. Availability and implementation http://guanlab.ccmb.med.umich.edu/mirmine. Contact bharatpa@umich.edu. Supplementary information Supplementary data are available at Bioinformatics online.",miRmine,0.975829124,NA,0,miRmine,0.975829124,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/1/2017 +31792281,http://mirna-coadread.omics.si,"The integrative knowledge base for miRNA-mRNA expression in colorectal cancer. ""miRNA colorectal cancer"" (https://mirna-coadread.omics.si/) is a freely available web application for studying microRNA and mRNA expression and their correlation in colorectal cancer. To the best of our knowledge, ""miRNA colorectal cancer"" has the largest knowledge base of miRNA-target gene expressions and correlations in colorectal cancer, based on the largest available sample size from the same source of data. Data from high-throughput molecular profiling of 295 colon and rectum adenocarcinoma samples from The Cancer Genome Atlas was analyzed and integrated into our knowledge base. The objective of developing this web application was to help researchers to discover the behavior and role of miRNA-target gene interactions in colorectal cancer. For this purpose, results of differential expression and correlation analyses of miRNA and mRNA data collected in our knowledge base are available through web forms. To validate our knowledge base experimentally, we selected genes FN1, TGFB2, RND3, ZEB1 and ZEB2 and miRNAs hsa-miR-200a/b/c-3p, hsa-miR-141-3p and hsa-miR-429. Both approaches revealed a negative correlation between miRNA hsa-miR-200b/c-3p and its target gene FN1 and between hsa-miR-200a-3p and its target TGFB2, thus supporting the usefulness of the developed knowledge base.",miRNA,0.692498068,NA,0,miRNA,0.692498068,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,12/2/2019 +28365733,http://www.mirnalyze.in,"miRnalyze: an interactive database linking tool to unlock intuitive microRNA regulation of cell signaling pathways. . The various pathophysiological processes occurring in living systems are known to be orchestrated by delicate interplays and cross-talks between different genes and their regulators. Among the various regulators of genes, there is a class of small non-coding RNA molecules known as microRNAs. Although, the relative simplicity of miRNAs and their ability to modulate cellular processes make them attractive therapeutic candidates, their presence in large numbers make it challenging for experimental researchers to interpret the intricacies of the molecular processes they regulate. Most of the existing bioinformatic tools fail to address these challenges. Here, we present a new web resource 'miRnalyze' that has been specifically designed to directly identify the putative regulation of cell signaling pathways by miRNAs. The tool integrates miRNA-target predictions with signaling cascade members by utilizing TargetScanHuman 7.1 miRNA-target prediction tool and the KEGG pathway database, and thus provides researchers with in-depth insights into modulation of signal transduction pathways by miRNAs. miRnalyze is capable of identifying common miRNAs targeting more than one gene in the same signaling pathway-a feature that further increases the probability of modulating the pathway and downstream reactions when using miRNA modulators. Additionally, miRnalyze can sort miRNAs according to the seed-match types and TargetScan Context ++ score, thus providing a hierarchical list of most valuable miRNAs. Furthermore, in order to provide users with comprehensive information regarding miRNAs, genes and pathways, miRnalyze also links to expression data of miRNAs (miRmine) and genes (TiGER) and proteome abundance (PaxDb) data. To validate the capability of the tool, we have documented the correlation of miRnalyze's prediction with experimental confirmation studies. http://www.mirnalyze.in.",miRnalyze,0.994418919,NA,0,miRnalyze,0.994418919,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +25877638,http://bioinfo.life,"An update of miRNASNP database for better SNP selection by GWAS data, miRNA expression and online tools. MicroRNAs (miRNAs) are key regulators of gene expression involved in a broad range of biological processes. MiRNASNP aims to provide single nucleotide polymorphisms (SNPs) in miRNAs and genes that may impact miRNA biogenesis and/or miRNA target binding. Advanced miRNA research provided abundant data about miRNA expression, validated targets and related phenotypic variants. In miRNASNP v2.0, we have updated our previous database with several new data and features, including: (i) expression level and expression correlation of miRNAs and target genes in different tissues, (ii) linking SNPs to the results of genome-wide association studies, (iii) integrating experimentally validated miRNA:mRNA interactions, (iv) adding multiple filters to prioritize functional SNPs. In addition, as a supplement of the database, we have set up three flexible online tools to analyse the influence of novel variants on miRNA:mRNA binding. A new nice web interface was designed for miRNASNP v2.0 allowing users to browse, search and download. We aim to maintain the miRNASNP as a solid resource for function, genetics and disease studies of miRNA-related SNPs. Database URL: http://bioinfo.life. hust.edu.cn/miRNASNP2/",miRNASNP,0.998172939,NA,0,miRNASNP,0.998172939,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/15/2015 +32990748,http://bioinfo.life.hust.edu.cn/miRNASNP,"miRNASNP-v3: a comprehensive database for SNPs and disease-related variations in miRNAs and miRNA targets. MicroRNAs (miRNAs) related single-nucleotide variations (SNVs), including single-nucleotide polymorphisms (SNPs) and disease-related variations (DRVs) in miRNAs and miRNA-target binding sites, can affect miRNA functions and/or biogenesis, thus to impact on phenotypes. miRNASNP is a widely used database for miRNA-related SNPs and their effects. Here, we updated it to miRNASNP-v3 (http://bioinfo.life.hust.edu.cn/miRNASNP/) with tremendous number of SNVs and new features, especially the DRVs data. We analyzed the effects of 7 161 741 SNPs and 505 417 DRVs on 1897 pre-miRNAs (2630 mature miRNAs) and 3'UTRs of 18 152 genes. miRNASNP-v3 provides a one-stop resource for miRNA-related SNVs research with the following functions: (i) explore associations between miRNA-related SNPs/DRVs and diseases; (ii) browse the effects of SNPs/DRVs on miRNA-target binding; (iii) functional enrichment analysis of miRNA target gain/loss caused by SNPs/DRVs; (iv) investigate correlations between drug sensitivity and miRNA expression; (v) inquire expression profiles of miRNAs and their targets in cancers; (vi) browse the effects of SNPs/DRVs on pre-miRNA secondary structure changes; and (vii) predict the effects of user-defined variations on miRNA-target binding or pre-miRNA secondary structure. miRNASNP-v3 is a valuable and long-term supported resource in functional variation screening and miRNA function studies.",miRNASNP-v3,0.997226487,NA,0,miRNASNP-v3,0.997226487,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +"22135287, 24243848",http://mirnest.amu.edu.pl,"miRNEST database: an integrative approach in microRNA search and annotation. Despite accumulating data on animal and plant microRNAs and their functions, existing public miRNA resources usually collect miRNAs from a very limited number of species. A lot of microRNAs, including those from model organisms, remain undiscovered. As a result there is a continuous need to search for new microRNAs. We present miRNEST (http://mirnest.amu.edu.pl), a comprehensive database of animal, plant and virus microRNAs. The core part of the database is built from our miRNA predictions conducted on Expressed Sequence Tags of 225 animal and 202 plant species. The miRNA search was performed based on sequence similarity and as many as 10,004 miRNA candidates in 221 animal and 199 plant species were discovered. Out of them only 299 have already been deposited in miRBase. Additionally, miRNEST has been integrated with external miRNA data from literature and 13 databases, which includes miRNA sequences, small RNA sequencing data, expression, polymorphisms and targets data as well as links to external miRNA resources, whenever applicable. All this makes miRNEST a considerable miRNA resource in a sense of number of species (544) that integrates a scattered miRNA data into a uniform format with a user-friendly web interface.",miRNEST,0.990006804,NA,0,miRNEST,0.990006804,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2013 +"27742822, 31691816",http://mpd.bioinf.uni-sb.de,"miRPathDB: a new dictionary on microRNAs and target pathways. In the last decade, miRNAs and their regulatory mechanisms have been intensively studied and many tools for the analysis of miRNAs and their targets have been developed. We previously presented a dictionary on single miRNAs and their putative target pathways. Since then, the number of miRNAs has tripled and the knowledge on miRNAs and targets has grown substantially. This, along with changes in pathway resources such as KEGG, leads to an improved understanding of miRNAs, their target genes and related pathways. Here, we introduce the miRNA Pathway Dictionary Database (miRPathDB), freely accessible at https://mpd.bioinf.uni-sb.de/ With the database we aim to complement available target pathway web-servers by providing researchers easy access to the information which pathways are regulated by a miRNA, which miRNAs target a pathway and how specific these regulations are. The database contains a large number of miRNAs (2595 human miRNAs), different miRNA target sets (14 773 experimentally validated target genes as well as 19 281 predicted targets genes) and a broad selection of functional biochemical categories (KEGG-, WikiPathways-, BioCarta-, SMPDB-, PID-, Reactome pathways, functional categories from gene ontology (GO), protein families from Pfam and chromosomal locations totaling 12 875 categories). In addition to Homo sapiens, also Mus musculus data are stored and can be compared to human target pathways.",miRPathDB,0.997528672,miRNA Pathway Dictionary Database,0.964912802,miRPathDB,0.997528672,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25527833,http://www.microrna.gr/mirpub,"mirPub: a database for searching microRNA publications. Summary Identifying, amongst millions of publications available in MEDLINE, those that are relevant to specific microRNAs (miRNAs) of interest based on keyword search faces major obstacles. References to miRNA names in the literature often deviate from standard nomenclature for various reasons, since even the official nomenclature evolves. For instance, a single miRNA name may identify two completely different molecules or two different names may refer to the same molecule. mirPub is a database with a powerful and intuitive interface, which facilitates searching for miRNA literature, addressing the aforementioned issues. To provide effective search services, mirPub applies text mining techniques on MEDLINE, integrates data from several curated databases and exploits data from its user community following a crowdsourcing approach. Other key features include an interactive visualization service that illustrates intuitively the evolution of miRNA data, tag clouds summarizing the relevance of publications to particular diseases, cell types or tissues and access to TarBase 6.0 data to oversee genes related to miRNA publications. Availability and implementation mirPub is freely available at http://www.microrna.gr/mirpub/. Contact vergoulis@imis.athena-innovation.gr or dalamag@imis.athena-innovation.gr Supplementary information Supplementary data are available at Bioinformatics online.",mirPub,0.996326447,NA,0,mirPub,0.996326447,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/20/2014 +23173617,http://cmbi.bjmu.edu.cn/mirsnp,"MirSNP, a database of polymorphisms altering miRNA target sites, identifies miRNA-related SNPs in GWAS SNPs and eQTLs. Background Numerous single nucleotide polymorphisms (SNPs) associated with complex diseases have been identified by genome-wide association studies (GWAS) and expression quantitative trait loci (eQTLs) studies. However, few of these SNPs have explicit biological functions. Recent studies indicated that the SNPs within the 3'UTR regions of susceptibility genes could affect complex traits/diseases by affecting the function of miRNAs. These 3'UTR SNPs are functional candidates and therefore of interest to GWAS and eQTL researchers. Description We developed a publicly available online database, MirSNP (http://cmbi.bjmu.edu.cn/mirsnp), which is a collection of human SNPs in predicted miRNA-mRNA binding sites. We identified 414,510 SNPs that might affect miRNA-mRNA binding. Annotations were added to these SNPs to predict whether a SNP within the target site would decrease/break or enhance/create an miRNA-mRNA binding site. By applying MirSNP database to three brain eQTL data sets, we identified four unreported SNPs (rs3087822, rs13042, rs1058381, and rs1058398), which might affect miRNA binding and thus affect the expression of their host genes in the brain. We also applied the MirSNP database to our GWAS for schizophrenia: seven predicted miRNA-related SNPs (p < 0.0001) were found in the schizophrenia GWAS. Our findings identified the possible functions of these SNP loci, and provide the basis for subsequent functional research. Conclusion MirSNP could identify the putative miRNA-related SNPs from GWAS and eQTLs researches and provide the direction for subsequent functional researches.",MirSNP,0.993562758,NA,0,MirSNP,0.993562758,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/23/2012 +24244721,http://mudshark.brookes.ac.uk/MirStress,"Meta-analysis using a novel database, miRStress, reveals miRNAs that are frequently associated with the radiation and hypoxia stress-responses. Organisms are often exposed to environmental pressures that affect homeostasis, so it is important to understand the biological basis of stress-response. Various biological mechanisms have evolved to help cells cope with potentially cytotoxic changes in their environment. miRNAs are small non-coding RNAs which are able to regulate mRNA stability. It has been suggested that miRNAs may tip the balance between continued cytorepair and induction of apoptosis in response to stress. There is a wealth of data in the literature showing the effect of environmental stress on miRNAs, but it is scattered in a large number of disparate publications. Meta-analyses of this data would produce added insight into the molecular mechanisms of stress-response. To facilitate this we created and manually curated the miRStress database, which describes the changes in miRNA levels following an array of stress types in eukaryotic cells. Here we describe this database and validate the miRStress tool for analysing miRNAs that are regulated by stress. To validate the database we performed a cross-species analysis to identify miRNAs that respond to radiation. The analysis tool confirms miR-21 and miR-34a as frequently deregulated in response to radiation, but also identifies novel candidates as potentially important players in this stress response, including miR-15b, miR-19b, and miR-106a. Similarly, we used the miRStress tool to analyse hypoxia-responsive miRNAs. The most frequently deregulated miRNAs were miR-210 and miR-21, as expected. Several other miRNAs were also found to be associated with hypoxia, including miR-181b, miR-26a/b, miR-106a, miR-213 and miR-192. Therefore the miRStress tool has identified miRNAs with hitherto unknown or under-appreciated roles in the response to specific stress types. The miRStress tool, which can be used to uncover new insight into the biological roles of miRNAs, and also has the potential to unearth potential biomarkers for therapeutic response, is freely available at http://mudshark.brookes.ac.uk/MirStress.",miRStress,0.982590735,NA,0,miRStress,0.982590735,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/14/2013 +23200141,http://www.isical.ac.in,"miRT: a database of validated transcription start sites of human microRNAs. MicroRNAs (miRNAs) are small endogenous non-coding RNAs of about 22 nt in length that take crucial roles in many biological processes. These short RNAs regulate the expression of mRNAs by binding to their 3'-UTRs or by translational repression. Many of the current studies focus on how mature miRNAs regulate mRNAs, however, very limited knowledge is available regarding their transcriptional loci. It is known that primary miRNAs (pri-miRs) are first transcribed from the DNA, followed by the formation of precursor miRNAs (pre-miRs) by endonuclease activity, which finally produces the mature miRNAs. Till date, many of the pre-miRs and mature miRNAs have been experimentally verified. But unfortunately, identification of the loci of pri-miRs, promoters and associated transcription start sites (TSSs) are still in progress. TSSs of only about 40% of the known mature miRNAs in human have been reported. This information, albeit limited, may be useful for further study of the regulation of miRNAs. In this paper, we provide a novel database of validated miRNA TSSs, miRT, by collecting data from several experimental studies that validate miRNA TSSs and are available for full download. We present miRT as a web server and it is also possible to convert the TSS loci between different genome built. miRT might be a valuable resource for advanced research on miRNA regulation, which is freely accessible at: http://www.isical.ac.in/~bioinfo_miu/miRT/miRT.php.",miRT,0.972974837,NA,0,miRT,0.972974837,1,29201145,NA,low_prob_best_name,do not remove,do not merge,NA,NA,NA,NA,9/29/2012 +31647101,http://miRTarBase.cuhk.edu.cn,"miRTarBase 2020: updates to the experimentally validated microRNA-target interaction database. MicroRNAs (miRNAs) are small non-coding RNAs (typically consisting of 18-25 nucleotides) that negatively control expression of target genes at the post-transcriptional level. Owing to the biological significance of miRNAs, miRTarBase was developed to provide comprehensive information on experimentally validated miRNA-target interactions (MTIs). To date, the database has accumulated >13,404 validated MTIs from 11,021 articles from manual curations. In this update, a text-mining system was incorporated to enhance the recognition of MTI-related articles by adopting a scoring system. In addition, a variety of biological databases were integrated to provide information on the regulatory network of miRNAs and its expression in blood. Not only targets of miRNAs but also regulators of miRNAs are provided to users for investigating the up- and downstream regulations of miRNAs. Moreover, the number of MTIs with high-throughput experimental evidence increased remarkably (validated by CLIP-seq technology). In conclusion, these improvements promote the miRTarBase as one of the most comprehensively annotated and experimentally validated miRNA-target interaction databases. The updated version of miRTarBase is now available at http://miRTarBase.cuhk.edu.cn/.",miRTarBase,0.997143626,NA,0,miRTarBase,0.997143626,1,NA,"24304892.0, 26590260.0, 29126174.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2020 +24304892,http://mirtarbase.mbc.nctu.edu.tw,"miRTarBase update 2014: an information resource for experimentally validated miRNA-target interactions. MicroRNAs (miRNAs) are small non-coding RNA molecules capable of negatively regulating gene expression to control many cellular mechanisms. The miRTarBase database (http://mirtarbase.mbc.nctu.edu.tw/) provides the most current and comprehensive information of experimentally validated miRNA-target interactions. The database was launched in 2010 with data sources for >100 published studies in the identification of miRNA targets, molecular networks of miRNA targets and systems biology, and the current release (2013, version 4) includes significant expansions and enhancements over the initial release (2010, version 1). This article reports the current status of and recent improvements to the database, including (i) a 14-fold increase to miRNA-target interaction entries, (ii) a miRNA-target network, (iii) expression profile of miRNA and its target gene, (iv) miRNA target-associated diseases and (v) additional utilities including an upgrade reminder and an error reporting/user feedback system.",miRTarBase,0.9938097,NA,0,miRTarBase,0.9938097,1,NA,"31647101.0, 26590260.0, 29126174.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/4/2013 +"26590260, 29126174",http://miRTarBase.mbc.nctu.edu.tw,"miRTarBase 2016: updates to the experimentally validated miRNA-target interactions database. MicroRNAs (miRNAs) are small non-coding RNAs of approximately 22 nucleotides, which negatively regulate the gene expression at the post-transcriptional level. This study describes an update of the miRTarBase (http://miRTarBase.mbc.nctu.edu.tw/) that provides information about experimentally validated miRNA-target interactions (MTIs). The latest update of the miRTarBase expanded it to identify systematically Argonaute-miRNA-RNA interactions from 138 crosslinking and immunoprecipitation sequencing (CLIP-seq) data sets that were generated by 21 independent studies. The database contains 4966 articles, 7439 strongly validated MTIs (using reporter assays or western blots) and 348 007 MTIs from CLIP-seq. The number of MTIs in the miRTarBase has increased around 7-fold since the 2014 miRTarBase update. The miRNA and gene expression profiles from The Cancer Genome Atlas (TCGA) are integrated to provide an effective overview of this exponential growth in the miRNA experimental data. These improvements make the miRTarBase one of the more comprehensively annotated, experimentally validated miRNA-target interactions databases and motivate additional miRNA research efforts.",miRTarBase,0.993033171,NA,0,miRTarBase,0.993033171,2,NA,"24304892.0, 31647101.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +29077896,"http://mcube.nju.edu.cn/jwang/lab/soft/mirtrans/, http://120.27.239.192/mirtrans","mirTrans: a resource of transcriptional regulation on microRNAs for human cell lines. The cell-specific information of transcriptional regulation on microRNAs (miRNAs) is crucial to the precise understanding of gene regulations in various physiological and pathological processes existed in different tissues and cell types. The database, mirTrans, provides comprehensive information about cell-specific transcription of miRNAs including the transcriptional start sites (TSSs) of miRNAs, transcription factor (TF) to miRNA regulations and miRNA promoter sequences. mirTrans also maps the experimental H3K4me3 and DHS (DNase-I hypersensitive site) marks within miRNA promoters and expressed sequence tags (ESTs) within transcribed regions. The current version of database covers 35 259 TSSs and over 2.3 million TF-miRNA regulations for 1513 miRNAs in a total of 54 human cell lines. These cell lines span most of the biological systems, including circulatory system, digestive system and nervous system. Information for both the intragenic miRNAs and intergenic miRNAs is offered. Particularly, the quality of miRNA TSSs and TF-miRNA regulations is evaluated by literature curation. 23 447 TSS records and 2148 TF-miRNA regulations are supported by special experiments as a result of literature curation. EST coverage is also used to evaluate the accuracy of miRNA TSSs. Interface of mirTrans is friendly designed and convenient to make downloads (http://mcube.nju.edu.cn/jwang/lab/soft/mirtrans/ or http://120.27.239.192/mirtrans/).",mirTrans,0.996958256,NA,0,mirTrans,0.996958256,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +30874795,http://mirtrondb.cp.utfpr.edu.br,"mirtronDB: a mirtron knowledge base. Motivation Mirtrons arise from short introns with atypical cleavage by using the splicing mechanism. In the current literature, there is no repository centralizing and organizing the data available to the public. To fill this gap, we developed mirtronDB, the first knowledge database dedicated to mirtron, and it is available at http://mirtrondb.cp.utfpr.edu.br/. MirtronDB currently contains a total of 1407 mirtron precursors and 2426 mirtron mature sequences in 18 species. Results Through a user-friendly interface, users can now browse and search mirtrons by organism, organism group, type and name. MirtronDB is a specialized resource that provides free and user-friendly access to knowledge on mirtron data. Availability and implementation MirtronDB is available at http://mirtrondb.cp.utfpr.edu.br/. Supplementary information Supplementary data are available at Bioinformatics online.",MirtronDB,0.997429311,NA,0,MirtronDB,0.997429311,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2019 +30065744,http://mirvit.ipsp.cnr.it,"miRVIT: A Novel miRNA Database and Its Application to Uncover Vitis Responses to Flavescence dorée Infection. Micro(mi)RNAs play crucial roles in plant developmental processes and in defense responses to biotic and abiotic stresses. In the last years, many works on small RNAs in grapevine (Vitis spp.) were published, and several conserved and putative novel grapevine-specific miRNAs were identified. In order to reorganize the high quantity of available data, we produced ""miRVIT,"" the first database of all novel grapevine miRNA candidates characterized so far, and still not deposited in miRBase. To this aim, each miRNA accession was renamed, repositioned in the last version of the grapevine genome, and compared with all the novel and conserved miRNAs detected in grapevine. Conserved and novel miRNAs cataloged in miRVIT were then used for analyzing Vitis vinifera plants infected by Flavescence dorée (FD), one of the most severe phytoplasma diseases affecting grapevine. The analysis of small RNAs from healthy, recovered (plants showing spontaneous and stable remission of symptoms), and FD-infected ""Barbera"" grapevines showed that FD altered the expression profiles of several miRNAs, including those involved in cell development and photosynthesis, jasmonate signaling, and disease resistance response. The application of miRVIT in a biological context confirmed the effectiveness of the followed approach, especially for the identification of novel miRNA candidates in grapevine. miRVIT database is available at http://mirvit.ipsp.cnr.it. Highlights: The application of the newly produced database of grapevine novel miRNAs to the analysis of plants infected by Flavescence dorée reveals key roles of miRNAs in photosynthesis and jasmonate signaling.",miRVIT,0.992824743,NA,0,miRVIT,0.992824743,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/17/2018 +"21605702, 25055920",http://mirwalk.uni-hd.de,"miRWalk--database: prediction of possible miRNA binding sites by ""walking"" the genes of three genomes. MicroRNAs are small, non-coding RNA molecules that can complementarily bind to the mRNA 3'-UTR region to regulate the gene expression by transcriptional repression or induction of mRNA degradation. Increasing evidence suggests a new mechanism by which miRNAs may regulate target gene expression by binding in promoter and amino acid coding regions. Most of the existing databases on miRNAs are restricted to mRNA 3'-UTR region. To address this issue, we present miRWalk, a comprehensive database on miRNAs, which hosts predicted as well as validated miRNA binding sites, information on all known genes of human, mouse and rat. All mRNAs, mitochondrial genes and 10 kb upstream flanking regions of all known genes of human, mouse and rat were analyzed by using a newly developed algorithm named 'miRWalk' as well as with eight already established programs for putative miRNA binding sites. An automated and extensive text-mining search was performed on PubMed database to extract validated information on miRNAs. Combined information was put into a MySQL database. miRWalk presents predicted and validated information on miRNA-target interaction. Such a resource enables researchers to validate new targets of miRNA not only on 3'-UTR, but also on the other regions of all known genes. The 'Validated Target module' is updated every month and the 'Predicted Target module' is updated every 6 months. miRWalk is freely available at http://mirwalk.uni-hd.de/.",miRWalk,0.998156488,NA,0,miRWalk,0.998156488,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2014 +25953081,http://guanlab.ccmb.med.umich.edu/misomine,"MIsoMine: a genome-scale high-resolution data portal of expression, function and networks at the splice isoform level in the mouse. Products of multiexon genes, especially in higher organisms, are a mixture of isoforms with different or even opposing functions, and therefore need to be treated separately. However, most studies and available resources such as Gene Ontology provide only gene-level function annotations, and therefore lose the differential information at the isoform level. Here we report MIsoMine, a high-resolution portal to multiple levels of functional information of alternatively spliced isoforms in the mouse. This data portal provides tissue-specific expression patterns and co-expression networks, along with such previously published functional genomic data as protein domains, predicted isoform-level functions and functional relationships. The core utility of MIsoMine is allowing users to explore a preprocessed, quality-controlled set of RNA-seq data encompassing diverse tissues and cell lineages. Tissue-specific co-expression networks were established, allowing a 2D ranking of isoforms and tissues by co-expression patterns. The results of the multiple isoforms of the same gene are presented in parallel to facilitate direct comparison, with cross-talking to prioritized functions at the isoform level. MIsoMine provides the first isoform-level resolution effort at genome-scale. We envision that this data portal will be a valuable resource for exploring functional genomic data, and will complement the existing functionalities of the mouse genome informatics database and the gene expression database for the laboratory mouse. Database URL: http://guanlab.ccmb.med.umich.edu/misomine/",MIsoMine,0.996668458,NA,0,MIsoMine,0.996668458,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/7/2015 +23864220,http://www.mispred.com,"MisPred: a resource for identification of erroneous protein sequences in public databases. Correct prediction of the structure of protein-coding genes of higher eukaryotes is still a difficult task; therefore, public databases are heavily contaminated with mispredicted sequences. The high rate of misprediction has serious consequences because it significantly affects the conclusions that may be drawn from genome-scale sequence analyses of eukaryotic genomes. Here we present the MisPred database and computational pipeline that provide efficient means for the identification of erroneous sequences in public databases. The MisPred database contains a collection of abnormal, incomplete and mispredicted protein sequences from 19 metazoan species identified as erroneous by MisPred quality control tools in the UniProtKB/Swiss-Prot, UniProtKB/TrEMBL, NCBI/RefSeq and EnsEMBL databases. Major releases of the database are automatically generated and updated regularly. The database (http://www.mispred.com) is easily accessible through a simple web interface coupled to a powerful query engine and a standard web service. The content is completely or partially downloadable in a variety of formats. DATABASE URL: http://www.mispred.com.",MisPred,0.997501791,NA,0,MisPred,0.997501791,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/17/2013 +33502607,http://missense3d.bc.ic.ac.uk,"Missense3D-DB web catalogue: an atom-based analysis and repository of 4M human protein-coding genetic variants. The interpretation of human genetic variation is one of the greatest challenges of modern genetics. New approaches are urgently needed to prioritize variants, especially those that are rare or lack a definitive clinical interpretation. We examined 10,136,597 human missense genetic variants from GnomAD, ClinVar and UniProt. We were able to perform large-scale atom-based mapping and phenotype interpretation of 3,960,015 of these variants onto 18,874 experimental and 84,818 in house predicted three-dimensional coordinates of the human proteome. We demonstrate that 14% of amino acid substitutions from the GnomAD database that could be structurally analysed are predicted to affect protein structure (n = 568,548, of which 566,439 rare or extremely rare) and may, therefore, have a yet unknown disease-causing effect. The same is true for 19.0% (n = 6266) of variants of unknown clinical significance or conflicting interpretation reported in the ClinVar database. The results of the structural analysis are available in the dedicated web catalogue Missense3D-DB ( http://missense3d.bc.ic.ac.uk/ ). For each of the 4 M variants, the results of the structural analysis are presented in a friendly concise format that can be included in clinical genetic reports. A detailed report of the structural analysis is also available for the non-experts in structural biology. Population frequency and predictions from SIFT and PolyPhen are included for a more comprehensive variant interpretation. This is the first large-scale atom-based structural interpretation of human genetic variation and offers geneticists and the biomedical community a new approach to genetic variant interpretation.",Missense3D-DB,0.892035206,NA,0,Missense3D-DB,0.892035206,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/27/2021 +29155944,http://fgrtools.hms.harvard.edu/MIST,"Molecular Interaction Search Tool (MIST): an integrated resource for mining gene and protein interaction data. Model organism and human databases are rich with information about genetic and physical interactions. These data can be used to interpret and guide the analysis of results from new studies and develop new hypotheses. Here, we report the development of the Molecular Interaction Search Tool (MIST; http://fgrtools.hms.harvard.edu/MIST/). The MIST database integrates biological interaction data from yeast, nematode, fly, zebrafish, frog, rat and mouse model systems, as well as human. For individual or short gene lists, the MIST user interface can be used to identify interacting partners based on protein-protein and genetic interaction (GI) data from the species of interest as well as inferred interactions, known as interologs, and to view a corresponding network. The data, interologs and search tools at MIST are also useful for analyzing 'omics datasets. In addition to describing the integrated database, we also demonstrate how MIST can be used to identify an appropriate cut-off value that balances false positive and negative discovery, and present use-cases for additional types of analysis. Altogether, the MIST database and search tools support visualization and navigation of existing protein and GI data, as well as comparison of new and existing data.",MIST,0.989567836,Molecular Interaction Search Tool,0.924612188,MIST,0.989567836,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31754718,http://mistdb.com,"MiST 3.0: an updated microbial signal transduction database with an emphasis on chemosensory systems. Bacteria and archaea employ dedicated signal transduction systems that modulate gene expression, second-messenger turnover, quorum sensing, biofilm formation, motility, host-pathogen and beneficial interactions. The updated MiST database provides a comprehensive classification of microbial signal transduction systems. This update is a result of a substantial scaling to accommodate constantly growing microbial genomic data. More than 125 000 genomes, 516 million genes and almost 100 million unique protein sequences are currently stored in the database. For each bacterial and archaeal genome, MiST 3.0 provides a complete signal transduction profile, thus facilitating theoretical and experimental studies on signal transduction and gene regulation. New software infrastructure and distributed pipeline implemented in MiST 3.0 enable regular genome updates based on the NCBI RefSeq database. A novel MiST feature is the integration of unique profile HMMs to link complex chemosensory systems with corresponding chemoreceptors in bacterial and archaeal genomes. The data can be explored online or via RESTful API (freely available at https://mistdb.com).",MiST,0.747567356,NA,0,MiST,0.747567356,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +28608363,http://misynpat.org,"MiSynPat: An integrated knowledge base linking clinical, genetic, and structural data for disease-causing mutations in human mitochondrial aminoacyl-tRNA synthetases. Numerous mutations in each of the mitochondrial aminoacyl-tRNA synthetases (aaRSs) have been implicated in human diseases. The mutations are autosomal and recessive and lead mainly to neurological disorders, although with pleiotropic effects. The processes and interactions that drive the etiology of the disorders associated with mitochondrial aaRSs (mt-aaRSs) are far from understood. The complexity of the clinical, genetic, and structural data requires concerted, interdisciplinary efforts to understand the molecular biology of these disorders. Toward this goal, we designed MiSynPat, a comprehensive knowledge base together with an ergonomic Web server designed to organize and access all pertinent information (sequences, multiple sequence alignments, structures, disease descriptions, mutation characteristics, original literature) on the disease-linked human mt-aaRSs. With MiSynPat, a user can also evaluate the impact of a possible mutation on sequence-conservation-structure in order to foster the links between basic and clinical researchers and to facilitate future diagnosis. The proposed integrated view, coupled with research on disease-related mt-aaRSs, will help to reveal new functions for these enzymes and to open new vistas in the molecular biology of the cell. The purpose of MiSynPat, freely available at http://misynpat.org, is to constitute a reference and a converging resource for scientists and clinicians.",MiSynPat,0.997369468,NA,0,MiSynPat,0.997369468,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/27/2017 +24170808,http://mitobreak.portugene.com,"MitoBreak: the mitochondrial DNA breakpoints database. Mitochondrial DNA (mtDNA) rearrangements are key events in the development of many diseases. Investigations of mtDNA regions affected by rearrangements (i.e. breakpoints) can lead to important discoveries about rearrangement mechanisms and can offer important clues about the causes of mitochondrial diseases. Here, we present the mitochondrial DNA breakpoints database (MitoBreak; http://mitobreak.portugene.com), a free, web-accessible comprehensive list of breakpoints from three classes of somatic mtDNA rearrangements: circular deleted (deletions), circular partially duplicated (duplications) and linear mtDNAs. Currently, MitoBreak contains >1400 mtDNA rearrangements from seven species (Homo sapiens, Mus musculus, Rattus norvegicus, Macaca mulatta, Drosophila melanogaster, Caenorhabditis elegans and Podospora anserina) and their associated phenotypic information collected from nearly 400 publications. The database allows researchers to perform multiple types of data analyses through user-friendly interfaces with full or partial datasets. It also permits the download of curated data and the submission of new mtDNA rearrangements. For each reported case, MitoBreak also documents the precise breakpoint positions, junction sequences, disease or associated symptoms and links to the related publications, providing a useful resource to study the causes and consequences of mtDNA structural alterations.",MitoBreak,0.998078406,mitochondrial DNA breakpoints database,0.974964321,MitoBreak,0.998078406,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2013 +33174596,http://www.broadinstitute.org/mitocarta,"MitoCarta3.0: an updated mitochondrial proteome now with sub-organelle localization and pathway annotations. The mammalian mitochondrial proteome is under dual genomic control, with 99% of proteins encoded by the nuclear genome and 13 originating from the mitochondrial DNA (mtDNA). We previously developed MitoCarta, a catalogue of over 1000 genes encoding the mammalian mitochondrial proteome. This catalogue was compiled using a Bayesian integration of multiple sequence features and experimental datasets, notably protein mass spectrometry of mitochondria isolated from fourteen murine tissues. Here, we introduce MitoCarta3.0. Beginning with the MitoCarta2.0 inventory, we performed manual review to remove 100 genes and introduce 78 additional genes, arriving at an updated inventory of 1136 human genes. We now include manually curated annotations of sub-mitochondrial localization (matrix, inner membrane, intermembrane space, outer membrane) as well as assignment to 149 hierarchical 'MitoPathways' spanning seven broad functional categories relevant to mitochondria. MitoCarta3.0, including sub-mitochondrial localization and MitoPathway annotations, is freely available at http://www.broadinstitute.org/mitocarta and should serve as a continued community resource for mitochondrial biology and medicine.",MitoCarta,0.984350145,NA,0,MitoCarta,0.984350145,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29129553,http://clingen.igib.res.in/mitoepigenome,"MitoepigenomeKB a comprehensive resource for human mitochondrial epigenetic data. Epigenetic modifications in the mitochondrial genome has been an emerging area of interest in the recent years in the field of mitochondrial biology. The renewed interest in the area has been largely fueled by a number of reports in the recent years suggesting the presence of epigenetic modifications in human mitochondrial genome and their associations with exposure to environmental factors and human diseases and or traits. Nevertheless there has been no systematic effort to curate, organize this information to enable cross-comparison between studies and datasets. We compiled 62 datasets from 9 studies on the epigenetic modifications in human mitochondrial genome to create a comprehensive catalog. This catalog is available as a user friendly interface - mitoepigenomeKB, where the data could be searched, browsed or visualized. The resource is available at URL: http://clingen.igib.res.in/mitoepigenome/. We hope mitoepigenomeKB would emerge as a central resource for datasets on epigenetic modifications in human mitochondria and would serve as the starting point to understanding the biology of human mitochondrial epigenome.",mitoepigenomeKB,0.912198901,NA,0,mitoepigenomeKB,0.912198901,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/9/2017 +"23955518, 29668970",http://mitofish.aori.u-tokyo.ac.jp,"MitoFish and MitoAnnotator: a mitochondrial genome database of fish with an accurate and automatic annotation pipeline. Mitofish is a database of fish mitochondrial genomes (mitogenomes) that includes powerful and precise de novo annotations for mitogenome sequences. Fish occupy an important position in the evolution of vertebrates and the ecology of the hydrosphere, and mitogenomic sequence data have served as a rich source of information for resolving fish phylogenies and identifying new fish species. The importance of a mitogenomic database continues to grow at a rapid pace as massive amounts of mitogenomic data are generated with the advent of new sequencing technologies. A severe bottleneck seems likely to occur with regard to mitogenome annotation because of the overwhelming pace of data accumulation and the intrinsic difficulties in annotating sequences with degenerating transfer RNA structures, divergent start/stop codons of the coding elements, and the overlapping of adjacent elements. To ease this data backlog, we developed an annotation pipeline named MitoAnnotator. MitoAnnotator automatically annotates a fish mitogenome with a high degree of accuracy in approximately 5 min; thus, it is readily applicable to data sets of dozens of sequences. MitoFish also contains re-annotations of previously sequenced fish mitogenomes, enabling researchers to refer to them when they find annotations that are likely to be erroneous or while conducting comparative mitogenomic analyses. For users who need more information on the taxonomy, habitats, phenotypes, or life cycles of fish, MitoFish provides links to related databases. MitoFish and MitoAnnotator are freely available at http://mitofish.aori.u-tokyo.ac.jp/ (last accessed August 28, 2013); all of the data can be batch downloaded, and the annotation pipeline can be used via a web interface.",MitoFish,0.996900797,NA,0,MitoFish,0.996900797,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2018 +23585830,http://mitolsdb.igib.res.in,"MitoLSDB: a comprehensive resource to study genotype to phenotype correlations in human mitochondrial DNA variations. Human mitochondrial DNA (mtDNA) encodes a set of 37 genes which are essential structural and functional components of the electron transport chain. Variations in these genes have been implicated in a broad spectrum of diseases and are extensively reported in literature and various databases. In this study, we describe MitoLSDB, an integrated platform to catalogue disease association studies on mtDNA (http://mitolsdb.igib.res.in). The main goal of MitoLSDB is to provide a central platform for direct submissions of novel variants that can be curated by the Mitochondrial Research Community. MitoLSDB provides access to standardized and annotated data from literature and databases encompassing information from 5231 individuals, 675 populations and 27 phenotypes. This platform is developed using the Leiden Open (source) Variation Database (LOVD) software. MitoLSDB houses information on all 37 genes in each population amounting to 132397 variants, 5147 unique variants. For each variant its genomic location as per the Revised Cambridge Reference Sequence, codon and amino acid change for variations in protein-coding regions, frequency, disease/phenotype, population, reference and remarks are also listed. MitoLSDB curators have also reported errors documented in literature which includes 94 phantom mutations, 10 NUMTs, six documentation errors and one artefactual recombination. MitoLSDB is the largest repository of mtDNA variants systematically standardized and presented using the LOVD platform. We believe that this is a good starting resource to curate mtDNA variants and will facilitate direct submissions enhancing data coverage, annotation in context of pathogenesis and quality control by ensuring non-redundancy in reporting novel disease associated variants.",MitoLSDB,0.996946871,NA,0,MitoLSDB,0.996946871,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/9/2013 +"22121219, 26432830, 30398659",http://mitominer.mrc-mbu.cam.ac.uk,"MitoMiner: a data warehouse for mitochondrial proteomics data. MitoMiner (http://mitominer.mrc-mbu.cam.ac.uk/) is a data warehouse for the storage and analysis of mitochondrial proteomics data gathered from publications of mass spectrometry and green fluorescent protein tagging studies. In MitoMiner, these data are integrated with data from UniProt, Gene Ontology, Online Mendelian Inheritance in Man, HomoloGene, Kyoto Encyclopaedia of Genes and Genomes and PubMed. The latest release of MitoMiner stores proteomics data sets from 46 studies covering 11 different species from eumetazoa, viridiplantae, fungi and protista. MitoMiner is implemented by using the open source InterMine data warehouse system, which provides a user interface allowing users to upload data for analysis, personal accounts to store queries and results and enables queries of any data in the data model. MitoMiner also provides lists of proteins for use in analyses, including the new MitoMiner mitochondrial proteome reference sets that specify proteins with substantial experimental evidence for mitochondrial localization. As further mitochondrial proteomics data sets from normal and diseased tissue are published, MitoMiner can be used to characterize the variability of the mitochondrial proteome between tissues and investigate how changes in the proteome may contribute to mitochondrial dysfunction and mitochondrial-associated diseases such as cancer, neurodegenerative diseases, obesity, diabetes, heart failure and the ageing process.",MitoMiner,0.994106472,NA,0,MitoMiner,0.994106472,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +24561221,http://compubio.in/mitosatplant,"MitoSatPlant: mitochondrial microsatellites database of viridiplantae. Microsatellites also known as simple sequence repeats (SSRs) consist of 1-6 nucleotide long repeating units. The importance of mitochondrial SSRs (mtSSRs) in fields like population genetics, plant phylogenetics and genome mapping motivated us to develop MitoSatPlant, a repository of plant mtSSRs. It contains information for perfect, imperfect and compound SSRs mined from 92 mitochondrial genomes of green plants, available at NCBI (as of 1 Feb 2014). A total of 72,798 SSRs were found, of which PCR primers were designed for 72,495 SSRs. Among all sequences, tetranucleotide repeats (26,802) were found to be most abundant whereas hexanucleotide repeats (2751) were detected with least frequency. MitoSatPlant was developed using SQL server 2008 and can be accessed through a front end designed in ASP.Net. It is an easy to use, user-friendly database and will prove to be a useful resource for plant scientists. To the best of our knowledge MitoSatPlant is the only database available for plant mtSSRs and can be freely accessed at http://compubio.in/mitosatplant/.",MitoSatPlant,0.997031987,NA,0,MitoSatPlant,0.997031987,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/19/2014 +34266386,http://www.mitotox.org,"MitoTox: a comprehensive mitochondrial toxicity database. Background Mitochondria play essential roles in regulating cellular functions. Some drug treatments and molecular interventions have been reported to have off-target effects damaging mitochondria and causing severe side effects. The development of a database for the management of mitochondrial toxicity-related molecules and their targets is important for further analyses. Results To correlate chemical, biological and mechanistic information on clinically relevant mitochondria-related toxicity, a comprehensive mitochondrial toxicity database (MitoTox) was developed. MitoTox is an electronic repository that integrates comprehensive information about mitochondria-related toxins and their targets. Information and data related to mitochondrial toxicity originate from various sources, including scientific journals and other electronic databases. These resources were manually verified and extracted into MitoTox. The database currently contains over 1400 small-molecule compounds, 870 mitochondrial targets, and more than 4100  mitochondrial toxin-target associations. Each MitoTox data record contains over 30 fields, including biochemical properties, therapeutic classification, target proteins, toxicological data, mechanistic information, clinical side effects, and references. Conclusions MitoTox provides a fully searchable database with links to references and other databases. Potential applications of MitoTox include toxicity classification, prediction, reference and education. MitoTox is available online at http://www.mitotox.org .",MitoTox,0.988955975,toxicity database,0.584199995,MitoTox,0.988955975,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/15/2021 +22123747,http://www.caspur.it/mitozoa,"MitoZoa 2.0: a database resource and search tools for comparative and evolutionary analyses of mitochondrial genomes in Metazoa. The MITOchondrial genome database of metaZOAns (MitoZoa) is a public resource for comparative analyses of metazoan mitochondrial genomes (mtDNA) at both the sequence and genomic organizational levels. The main characteristics of the MitoZoa database are the careful revision of mtDNA entry annotations and the possibility of retrieving gene order and non-coding region (NCR) data in appropriate formats. The MitoZoa retrieval system enables basic and complex queries at various taxonomic levels using different search menus. MitoZoa 2.0 has been enhanced in several aspects, including: a re-annotation pipeline to check the correctness of protein-coding gene predictions; a standardized annotation of introns and of precursor ORFs whose functionality is post-transcriptionally recovered by RNA editing or programmed translational frameshifting; updates of taxon-related fields and a BLAST sequence similarity search tool. Database novelties and the definition of standard mtDNA annotation rules, together with the user-friendly retrieval system and the BLAST service, make MitoZoa a valuable resource for comparative and evolutionary analyses as well as a reference database to assist in the annotation of novel mtDNA sequences. MitoZoa is freely accessible at http://www.caspur.it/mitozoa.",MitoZoa,0.997121572,MITOchondrial genome database of metaZOAns,0.70377599,MitoZoa,0.997121572,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2011 +25347823,http://bio.scu.edu.cn:8085/MitProNet,"MitProNet: A knowledgebase and analysis platform of proteome, interactome and diseases for mammalian mitochondria. Mitochondrion plays a central role in diverse biological processes in most eukaryotes, and its dysfunctions are critically involved in a large number of diseases and the aging process. A systematic identification of mitochondrial proteomes and characterization of functional linkages among mitochondrial proteins are fundamental in understanding the mechanisms underlying biological functions and human diseases associated with mitochondria. Here we present a database MitProNet which provides a comprehensive knowledgebase for mitochondrial proteome, interactome and human diseases. First an inventory of mammalian mitochondrial proteins was compiled by widely collecting proteomic datasets, and the proteins were classified by machine learning to achieve a high-confidence list of mitochondrial proteins. The current version of MitProNet covers 1124 high-confidence proteins, and the remainders were further classified as middle- or low-confidence. An organelle-specific network of functional linkages among mitochondrial proteins was then generated by integrating genomic features encoded by a wide range of datasets including genomic context, gene expression profiles, protein-protein interactions, functional similarity and metabolic pathways. The functional-linkage network should be a valuable resource for the study of biological functions of mitochondrial proteins and human mitochondrial diseases. Furthermore, we utilized the network to predict candidate genes for mitochondrial diseases using prioritization algorithms. All proteins, functional linkages and disease candidate genes in MitProNet were annotated according to the information collected from their original sources including GO, GEO, OMIM, KEGG, MIPS, HPRD and so on. MitProNet features a user-friendly graphic visualization interface to present functional analysis of linkage networks. As an up-to-date database and analysis platform, MitProNet should be particularly helpful in comprehensive studies of complicated biological mechanisms underlying mitochondrial functions and human mitochondrial diseases. MitProNet is freely accessible at http://bio.scu.edu.cn:8085/MitProNet.",MitProNet,0.994459689,NA,0,MitProNet,0.994459689,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/27/2014 +23071556,http://mdd.psych.ac.cn,"MK4MDD: a multi-level knowledge base and analysis platform for major depressive disorder. Background Major depressive disorder (MDD) is a complex neuropsychiatric syndrome with high heterogeneity. There are different levels of biological components that underlie MDD and interact with each other. To uncover the disease mechanism, large numbers of studies at different levels have been conducted. There is a growing need to integrate data from multiple levels of research into a database to provide a systematic review of current research results. The cross level integration will also help bridge gaps of different research levels for further understanding on MDD. So far, there has been no such effort for MDD. Descriptions We offer researchers a Multi-level Knowledge base for MDD (MK4MDD) to study the interesting interplay of components in the pathophysiological cascade of MDD from genetic variations to diagnostic syndrome. MK4MDD contains 2,341 components and 5,206 relationships between components based on reported experimental results obtained by diligent literature reading with manual curation. All components were well classified with careful curation and supplementary annotation. The powerful search and visualization tools make all data in MK4MDD form a cross-linked network to be applied to a broad range of both basic and applied research. Conclusions MK4MDD aims to provide researchers with a central knowledge base and analysis platform for MDD etiological and pathophysiological mechanisms research. MK4MDD is freely available at http://mdd.psych.ac.cn.",MK4MDD,0.993700966,evel Knowledge base for MDD,0.929511756,MK4MDD,0.993700966,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/5/2012 +33126250,http://mlodis.phasep.pro,"MloDisDB: a manually curated database of the relations between membraneless organelles and diseases. . Cells are compartmentalized by numerous membrane-bounded organelles and membraneless organelles (MLOs) to ensure temporal and spatial regulation of various biological processes. A number of MLOs, such as nucleoli, nuclear speckles and stress granules, exist as liquid droplets within the cells and arise from the condensation of proteins and RNAs via liquid-liquid phase separation (LLPS). By concentrating certain proteins and RNAs, MLOs accelerate biochemical reactions and protect cells during stress, and dysfunction of MLOs is associated with various pathological processes. With the development in this field, more and more relations between the MLOs and diseases have been described; however, these results have not been made available in a centralized resource. Herein, we build MloDisDB, a database which aims to gather the relations between MLOs and diseases from dispersed literature. In addition, the relations between LLPS and diseases were included as well. Currently, MloDisDB contains 771 curated entries from 607 publications; each entry in MloDisDB contains detailed information about the MLO, the disease and the functional factor in the relation. Furthermore, an efficient and user-friendly interface for users to search, browse and download all entries was provided. MloDisDB is the first comprehensive database of the relations between MLOs and diseases so far, and the database is freely accessible at http://mlodis.phasep.pro/.",MloDisDB,0.996973872,NA,0,MloDisDB,0.996973872,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +28293068,http://14.139.229.199/home.aspx,"A web-based microsatellite database for the Magnaporthe oryzae genome. Microsatellites have been widely utilized for molecular marker development. Codominant and multiallelic nature of these simple repeats have several advantages over other types of molecular markers. Their broad applicability in the area of molecular biology like gene mapping, genome characterization, genome evolution, and gene regulation has been reported in various crop plants, animals and fungi. Considering these benefits of the SSR markers, a MMDB (Magnaporthe oryzae Microsatellite Database) was developed to help in understanding about the pathogen and its diversity at strains level of a particular geographic region, which can help us to make a proper utilization of blast resistance genes in the region. This microsatellite database is based on whole genome sequence of two M. oryzae isolates, RML-29 (2665 SSRs from 43037792 bp) and RP-2421 (3169 SSRs from 45510614 bp). Although, first M. oryzae genome (70-15) was sequenced in 2005, but this sequenced isolate is not a true field isolate of M. oryzae. Therefore, MMDB has great potential in the study of diversification and characterization of M. oryzae and other related fungi. Availability http://14.139.229.199/home.aspx.",MMDB,0.978604794,Magnaporthe oryzae Microsatellite Database,0.971214314,MMDB,0.978604794,1,NA,"22135289.0, 24319143.0",NA,NA,NA,do not merge,NA,NA,NA,11/29/2016 +22135289,http://www.ncbi.nlm.nih.gov/structure,"MMDB: 3D structures and macromolecular interactions. Close to 60% of protein sequences tracked in comprehensive databases can be mapped to a known three-dimensional (3D) structure by standard sequence similarity searches. Potentially, a great deal can be learned about proteins or protein families of interest from considering 3D structure, and to this day 3D structure data may remain an underutilized resource. Here we present enhancements in the Molecular Modeling Database (MMDB) and its data presentation, specifically pertaining to biologically relevant complexes and molecular interactions. MMDB is tightly integrated with NCBI's Entrez search and retrieval system, and mirrors the contents of the Protein Data Bank. It links protein 3D structure data with sequence data, sequence classification resources and PubChem, a repository of small-molecule chemical structures and their biological activities, facilitating access to 3D structure data not only for structural biologists, but also for molecular biologists and chemists. MMDB provides a complete set of detailed and pre-computed structural alignments obtained with the VAST algorithm, and provides visualization tools for 3D structure and structure/sequence alignment via the molecular graphics viewer Cn3D. MMDB can be accessed at http://www.ncbi.nlm.nih.gov/structure.",MMDB,0.99548922,Molecular Modeling Database,0.961676583,MMDB,0.99548922,1,NA,"24319143.0, 28293068.0",NA,NA,NA,merge only:,NA,NA,"22135289.0, 24319143.0",12/1/2011 +24319143,http://www.ncbi.nlm.nih.gov/Structure,"MMDB and VAST+: tracking structural similarities between macromolecular complexes. The computational detection of similarities between protein 3D structures has become an indispensable tool for the detection of homologous relationships, the classification of protein families and functional inference. Consequently, numerous algorithms have been developed that facilitate structure comparison, including rapid searches against a steadily growing collection of protein structures. To this end, NCBI's Molecular Modeling Database (MMDB), which is based on the Protein Data Bank (PDB), maintains a comprehensive and up-to-date archive of protein structure similarities computed with the Vector Alignment Search Tool (VAST). These similarities have been recorded on the level of single proteins and protein domains, comprising in excess of 1.5 billion pairwise alignments. Here we present VAST+, an extension to the existing VAST service, which summarizes and presents structural similarity on the level of biological assemblies or macromolecular complexes. VAST+ simplifies structure neighboring results and shows, for macromolecular complexes tracked in MMDB, lists of similar complexes ranked by the extent of similarity. VAST+ replaces the previous VAST service as the default presentation of structure neighboring data in NCBI's Entrez query and retrieval system. MMDB and VAST+ can be accessed via http://www.ncbi.nlm.nih.gov/Structure.",MMDB,0.983329371,Molecular Modeling Database,0.892572984,MMDB,0.983329371,1,NA,"22135289.0, 28293068.0",NA,NA,NA,merge only:,NA,NA,"22135289.0, 24319143.0",12/6/2013 +32159764,http://biodb.swu.edu.cn/mmdb,"MMHub, a database for the mulberry metabolome. . Mulberry is an important economic crop plant and traditional medicine. It contains a huge array of bioactive metabolites such as flavonoids, amino acids, alkaloids and vitamins. Consequently, mulberry has received increasing attention in recent years. MMHub (version 1.0) is the first open public repository of mass spectra of small chemical compounds (<1000 Da) in mulberry leaves. The database contains 936 electrospray ionization tandem mass spectrometry (ESI-MS2) data and lists the specific distribution of compounds in 91 mulberry resources with two biological duplicates. ESI-MS2 data were obtained under non-standardized and independent experimental conditions. In total, 124 metabolites were identified or tentatively annotated and details of 90 metabolites with associated chemical structures have been deposited in the database. Supporting information such as PubChem compound information, molecular formula and metabolite classification are also provided in the MS2 spectral tag library. The MMHub provides important and comprehensive metabolome data for scientists working with mulberry. This information will be useful for the screening of quality resources and specific metabolites of mulberry. Database URL: https://biodb.swu.edu.cn/mmdb/.",MMHub,0.995018721,NA,0,MMHub,0.995018721,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +22139941,http://mmmdb.iab.keio.ac.jp,"MMMDB: Mouse Multiple Tissue Metabolome Database. The Mouse Multiple Tissue Metabolome Database (MMMDB) provides comprehensive and quantitative metabolomic information for multiple tissues from single mice. Manually curated databases that integrate literature-based individual metabolite information have been available so far. However, data sets on the absolute concentration of a single metabolite integrated from multiple resources are often difficult to be used when different metabolomic studies are compared because the relative balance of the multiple metabolite concentrations in the metabolic pathways as a snapshot of a dynamic system is more important than the absolute concentration of a single metabolite. We developed MMMDB by performing non-targeted analyses of cerebra, cerebella, thymus, spleen, lung, liver, kidney, heart, pancreas, testis and plasma using capillary electrophoresis time-of-flight mass spectrometry and detected 428 non-redundant features from which 219 metabolites were successfully identified. Quantified concentrations of the individual metabolites and the corresponding processed raw data; for example, the electropherograms and mass spectra with their annotations, such as isotope and fragment information, are stored in the database. MMMDB is designed to normalize users' data, which can be submitted online and used to visualize overlaid electropherograms. Thus, MMMDB allows newly measured data to be compared with the other data in the database. MMMDB is available at: http://mmmdb.iab.keio.ac.jp.",MMMDB,0.997343063,Mouse Multiple Tissue Metabolome Database,0.988576792,MMMDB,0.997343063,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2011 +27509041,http://clri.res.in/subramanian/databases/mmpi/index.php,"MMpI: A WideRange of Available Compounds of Matrix Metalloproteinase Inhibitors. Matrix metalloproteinases (MMPs) are a family of zinc-dependent proteinases involved in the regulation of the extracellular signaling and structural matrix environment of cells and tissues. MMPs are considered as promising targets for the treatment of many diseases. Therefore, creation of database on the inhibitors of MMP would definitely accelerate the research activities in this area due to its implication in above-mentioned diseases and associated limitations in the first and second generation inhibitors. In this communication, we report the development of a new MMpI database which provides resourceful information for all researchers working in this field. It is a web-accessible, unique resource that contains detailed information on the inhibitors of MMP including small molecules, peptides and MMP Drug Leads. The database contains entries of ~3000 inhibitors including ~72 MMP Drug Leads and ~73 peptide based inhibitors. This database provides the detailed molecular and structural details which are necessary for the drug discovery and development. The MMpI database contains physical properties, 2D and 3D structures (mol2 and pdb format files) of inhibitors of MMP. Other data fields are hyperlinked to PubChem, ChEMBL, BindingDB, DrugBank, PDB, MEROPS and PubMed. The database has extensive searching facility with MMpI ID, IUPAC name, chemical structure and with the title of research article. The MMP inhibitors provided in MMpI database are optimized using Python-based Hierarchical Environment for Integrated Xtallography (Phenix) software. MMpI Database is unique and it is the only public database that contains and provides the complete information on the inhibitors of MMP. Database URL: http://clri.res.in/subramanian/databases/mmpi/index.php.",MMpI,0.985122144,NA,0,MMpI,0.985122144,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/10/2016 +31352145,http://mmrdb.org,"MMRdb: Measles, mumps, and rubella viruses database and analysis resource. Measles, mumps, and rubella viruses are well known human pathogens that cause mild to severe illnesses. Despite the existence of MMR vaccines since 1971, outbreaks have been largely documented even in highly vaccinated populations. There is a pressing need to develop a resource to monitor genetic and antigenic variations among these viruses. Here, we introduced MMRdb, a web central database and analysis resource for measles, mumps, and rubella viruses. Users can search viruses at gene level and obtain sequence information based on gene product, geographic location, year, or host. The MMRdb also catalogs experimentally verified B cells and T cells antigenic epitopes data. A set of computation tools such as multiple sequence alignment, Geo Chart, and sequence similarity BLAST search has been implemented in a user-friendly database. The main features of this database will assist researchers in monitoring genetics and antigenic variations, tracking geographic spread with regards of sequence information, and facilitate the development of diagnostics, vaccines, and immunotherapeutics. Database URL: http://mmrdb.org.",MMRdb,0.9871943,NA,0,MMRdb,0.9871943,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/26/2019 +32833025,http://www.rna-society.org/mndr,"MNDR v3.0: mammal ncRNA-disease repository with increased coverage and annotation. Many studies have indicated that non-coding RNA (ncRNA) dysfunction is closely related to numerous diseases. Recently, accumulated ncRNA-disease associations have made related databases insufficient to meet the demands of biomedical research. The constant updating of ncRNA-disease resources has become essential. Here, we have updated the mammal ncRNA-disease repository (MNDR, http://www.rna-society.org/mndr/) to version 3.0, containing more than one million entries, four-fold increment in data compared to the previous version. Experimental and predicted circRNA-disease associations have been integrated, increasing the number of categories of ncRNAs to five, and the number of mammalian species to 11. Moreover, ncRNA-disease related drug annotations and associations, as well as ncRNA subcellular localizations and interactions, were added. In addition, three ncRNA-disease (miRNA/lncRNA/circRNA) prediction tools were provided, and the website was also optimized, making it more practical and user-friendly. In summary, MNDR v3.0 will be a valuable resource for the investigation of disease mechanisms and clinical treatment strategies.",MNDR,0.994394898,mammal ncRNA-disease repository,0.987229202,MNDR,0.994394898,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +24773765,http://research.nhgri.nih.gov/mnemiopsis,"A customized Web portal for the genome of the ctenophore Mnemiopsis leidyi. Background Mnemiopsis leidyi is a ctenophore native to the coastal waters of the western Atlantic Ocean. A number of studies on Mnemiopsis have led to a better understanding of many key biological processes, and these studies have contributed to the emergence of Mnemiopsis as an important model for evolutionary and developmental studies. Recently, we sequenced, assembled, annotated, and performed a preliminary analysis on the 150-megabase genome of the ctenophore, Mnemiopsis. This sequencing effort has produced the first set of whole-genome sequencing data on any ctenophore species and is amongst the first wave of projects to sequence an animal genome de novo solely using next-generation sequencing technologies. Description The Mnemiopsis Genome Project Portal (http://research.nhgri.nih.gov/mnemiopsis/) is intended both as a resource for obtaining genomic information on Mnemiopsis through an intuitive and easy-to-use interface and as a model for developing customized Web portals that enable access to genomic data. The scope of data available through this Portal goes well beyond the sequence data available through GenBank, providing key biological information not available elsewhere, such as pathway and protein domain analyses; it also features a customized genome browser for data visualization. Conclusions We expect that the availability of these data will allow investigators to advance their own research projects aimed at understanding phylogenetic diversity and the evolution of proteins that play a fundamental role in metazoan development. The overall approach taken in the development of this Web site can serve as a viable model for disseminating data from whole-genome sequencing projects, framed in a way that best-serves the specific needs of the scientific community.",Mnemiopsis,0.595096231,NA,0,Mnemiopsis,0.595096231,1,32386298,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,4/28/2014 +22146221,"http://minimotifminer.org, http://mnm.engr.uconn.edu","Minimotif Miner 3.0: database expansion and significantly improved reduction of false-positive predictions from consensus sequences. Minimotif Miner (MnM available at http://minimotifminer.org or http://mnm.engr.uconn.edu) is an online database for identifying new minimotifs in protein queries. Minimotifs are short contiguous peptide sequences that have a known function in at least one protein. Here we report the third release of the MnM database which has now grown 60-fold to approximately 300,000 minimotifs. Since short minimotifs are by their nature not very complex we also summarize a new set of false-positive filters and linear regression scoring that vastly enhance minimotif prediction accuracy on a test data set. This online database can be used to predict new functions in proteins and causes of disease.",MnM,0.994092762,Minimotif Miner,0.651109982,MnM,0.994092762,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/6/2011 +25725060,http://morus.swu.edu.cn/mntedb,"MnTEdb, a collective resource for mulberry transposable elements. . Mulberry has been used as an economically important food crop for the domesticated silkworm for thousands of years, resulting in one of the oldest and well-known plant-herbivore interactions. The genome of Morus notabilis has now been sequenced and there is an opportunity to mine the transposable element (TE) data. To better understand the roles of TEs in structural, functional and evolutionary dynamics of the mulberry genome, a specific, comprehensive and user-friendly web-based database, MnTEdb, was constructed. It was built based on a detailed and accurate identification of all TEs in mulberry. A total of 5925 TEs belonging to 13 superfamilies and 1062 families were deposited in this database. MnTEdb enables users to search, browse and download the mulberry TE sequences. Meanwhile, data mining tools, including BLAST, GetORF, HMMER, Sequence Extractor and JBrowse were also integrated into MnTEdb. MnTEdb will assist researchers to efficiently take advantage of our newly annotated TEs, which facilitate their studies in the origin, amplification and evolution of TEs, as well as the comparative analysis among the different species. Database URL: http://morus.swu.edu.cn/mntedb/",MnTEdb,0.992904842,NA,0,MnTEdb,0.992904842,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/27/2015 +"22661649, 25361972, 32696355",http://mobidb.bio.unipd.it,"MobiDB: a comprehensive database of intrinsic protein disorder annotations. Motivation Disordered protein regions are key to the function of numerous processes within an organism and to the determination of a protein's biological role. The most common source for protein disorder annotations, DisProt, covers only a fraction of the available sequences. Alternatively, the Protein Data Bank (PDB) has been mined for missing residues in X-ray crystallographic structures. Herein, we provide a centralized source for data on different flavours of disorder in protein structures, MobiDB, building on and expanding the content provided by already existing sources. In addition to the DisProt and PDB X-ray structures, we have added experimental information from NMR structures and five different flavours of two disorder predictors (ESpritz and IUpred). These are combined into a weighted consensus disorder used to classify disordered regions into flexible and constrained disorder. Users are encouraged to submit manual annotations through a submission form. MobiDB features experimental annotations for 17 285 proteins, covering the entire PDB and predictions for the SwissProt database, with 565 200 annotated sequences. Depending on the disorder flavour, 6-20% of the residues are predicted as disordered. Availability The database is freely available at http://mobidb.bio.unipd.it/. Contact silvio.tosatto@unipd.it.",MobiDB,0.997653842,NA,0,MobiDB,0.997653842,3,NA,33237329,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2020 +33237329,http://mobidb.org,"MobiDB: intrinsically disordered proteins in 2021. The MobiDB database (URL: https://mobidb.org/) provides predictions and annotations for intrinsically disordered proteins. Here, we report recent developments implemented in MobiDB version 4, regarding the database format, with novel types of annotations and an improved update process. The new website includes a re-designed user interface, a more effective search engine and advanced API for programmatic access. The new database schema gives more flexibility for the users, as well as simplifying the maintenance and updates. In addition, the new entry page provides more visualisation tools including customizable feature viewer and graphs of the residue contact maps. MobiDB v4 annotates the binding modes of disordered proteins, whether they undergo disorder-to-order transitions or remain disordered in the bound state. In addition, disordered regions undergoing liquid-liquid phase separation or post-translational modifications are defined. The integrated information is presented in a simplified interface, which enables faster searches and allows large customized datasets to be downloaded in TSV, Fasta or JSON formats. An alternative advanced interface allows users to drill deeper into features of interest. A new statistics page provides information at database and proteome levels. The new MobiDB version presents state-of-the-art knowledge on disordered proteins and improves data accessibility for both computational and experimental users.",MobiDB,0.989758193,NA,0,MobiDB,0.989758193,1,NA,"22661649.0, 25361972.0, 32696355.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +27822553,http://caporaso-lab.github.io/mockrobiota,"mockrobiota: a Public Resource for Microbiome Bioinformatics Benchmarking. . Mock communities are an important tool for validating, optimizing, and comparing bioinformatics methods for microbial community analysis. We present mockrobiota, a public resource for sharing, validating, and documenting mock community data resources, available at http://caporaso-lab.github.io/mockrobiota/. The materials contained in mockrobiota include data set and sample metadata, expected composition data (taxonomy or gene annotations or reference sequences for mock community members), and links to raw data (e.g., raw sequence data) for each mock community data set. mockrobiota does not supply physical sample materials directly, but the data set metadata included for each mock community indicate whether physical sample materials are available. At the time of this writing, mockrobiota contains 11 mock community data sets with known species compositions, including bacterial, archaeal, and eukaryotic mock communities, analyzed by high-throughput marker gene sequencing. IMPORTANCE The availability of standard and public mock community data will facilitate ongoing method optimizations, comparisons across studies that share source data, and greater transparency and access and eliminate redundancy. These are also valuable resources for bioinformatics teaching and training. This dynamic resource is intended to expand and evolve to meet the changing needs of the omics community.",mockrobiota,0.997430325,NA,0,mockrobiota,0.997430325,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2016 +22856649,http://bl210.caspur.it/MODEL-DB/MODEL-DB_web/MODindex.php.Operating,"A resource for benchmarking the usefulness of protein structure models. Background Increasingly, biologists and biochemists use computational tools to design experiments to probe the function of proteins and/or to engineer them for a variety of different purposes. The most effective strategies rely on the knowledge of the three-dimensional structure of the protein of interest. However it is often the case that an experimental structure is not available and that models of different quality are used instead. On the other hand, the relationship between the quality of a model and its appropriate use is not easy to derive in general, and so far it has been analyzed in detail only for specific application. Results This paper describes a database and related software tools that allow testing of a given structure based method on models of a protein representing different levels of accuracy. The comparison of the results of a computational experiment on the experimental structure and on a set of its decoy models will allow developers and users to assess which is the specific threshold of accuracy required to perform the task effectively. Conclusions The ModelDB server automatically builds decoy models of different accuracy for a given protein of known structure and provides a set of useful tools for their analysis. Pre-computed data for a non-redundant set of deposited protein structures are available for analysis and download in the ModelDB database. IMPLEMENTATION, AVAILABILITY AND REQUIREMENTS: Project name: A resource for benchmarking the usefulness of protein structure models. Project home page: http://bl210.caspur.it/MODEL-DB/MODEL-DB_web/MODindex.php.Operating system(s): Platform independent. Programming language: Perl-BioPerl (program); mySQL, Perl DBI and DBD modules (database); php, JavaScript, Jmol scripting (web server). Other requirements: Java Runtime Environment v1.4 or later, Perl, BioPerl, CPAN modules, HHsearch, Modeller, LGA, NCBI Blast package, DSSP, Speedfill (Surfnet) and PSAIA. License: Free. Any restrictions to use by non-academics: No.",ModelDB,0.958948493,NA,0,ModelDB,0.958948493,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/2/2012 +32986834,http://modelseed.org/biochem,"The ModelSEED Biochemistry Database for the integration of metabolic annotations and the reconstruction, comparison and analysis of metabolic models for plants, fungi and microbes. For over 10 years, ModelSEED has been a primary resource for the construction of draft genome-scale metabolic models based on annotated microbial or plant genomes. Now being released, the biochemistry database serves as the foundation of biochemical data underlying ModelSEED and KBase. The biochemistry database embodies several properties that, taken together, distinguish it from other published biochemistry resources by: (i) including compartmentalization, transport reactions, charged molecules and proton balancing on reactions; (ii) being extensible by the user community, with all data stored in GitHub; and (iii) design as a biochemical 'Rosetta Stone' to facilitate comparison and integration of annotations from many different tools and databases. The database was constructed by combining chemical data from many resources, applying standard transformations, identifying redundancies and computing thermodynamic properties. The ModelSEED biochemistry is continually tested using flux balance analysis to ensure the biochemical network is modeling-ready and capable of simulating diverse phenotypes. Ontologies can be designed to aid in comparing and reconciling metabolic reconstructions that differ in how they represent various metabolic pathways. ModelSEED now includes 33,978 compounds and 36,645 reactions, available as a set of extensible files on GitHub, and available to search at https://modelseed.org/biochem and KBase.",ModelSEED,0.991726339,NA,0,ModelSEED,0.991726339,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +27504011,http://modem.hzau.edu.cn,"MODEM: multi-omics data envelopment and mining in maize. . MODEM is a comprehensive database of maize multidimensional omics data, including genomic, transcriptomic, metabolic and phenotypic information from the cellular to individual plant level. This initial release contains approximately 1.06 M high quality SNPs for 508 diverse inbred lines obtained by combining variations from RNA sequencing on whole kernels (15 days after pollination) of 368 lines and a 50 K array for all 508 individuals. As all of these data were derived from the same diverse panel of lines, the database also allows various types of genetic mapping (including characterization of phenotypic QTLs, pQTLs; expression QTLs, eQTLs and metabolic QTLs, mQTLs). MODEM is thus designed to promote a better understanding of maize genetic architecture and deep functional annotation of the complex maize genome (and potentially those of other crop plants) and to explore the genotype-phenotype relationships and regulation of maize kernel development at multiple scales, which is also comprehensive for developing novel methods. MODEM is additionally designed to link with other databases to make full use of current resources, and it provides visualization tools for easy browsing. All of the original data and the related mapping results are freely available for easy query and download. This platform also provides helpful tools for general analyses and will be continually updated with additional materials, features and public data related to maize genetics or regulation as they become available.Database URL: (http://modem.hzau.edu.cn).",MODEM,0.9967134,NA,0,MODEM,0.9967134,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/7/2016 +21856757,http://www.modencode.org,"The modENCODE Data Coordination Center: lessons in harvesting comprehensive experimental details. The model organism Encyclopedia of DNA Elements (modENCODE) project is a National Human Genome Research Institute (NHGRI) initiative designed to characterize the genomes of Drosophila melanogaster and Caenorhabditis elegans. A Data Coordination Center (DCC) was created to collect, store and catalog modENCODE data. An effective DCC must gather, organize and provide all primary, interpreted and analyzed data, and ensure the community is supplied with the knowledge of the experimental conditions, protocols and verification checks used to generate each primary data set. We present here the design principles of the modENCODE DCC, and describe the ramifications of collecting thorough and deep metadata for describing experiments, including the use of a wiki for capturing protocol and reagent information, and the BIR-TAB specification for linking biological samples to experimental results. modENCODE data can be found at http://www.modencode.org.",modENCODE,0.994001627,model organism Encyclopedia of DNA Elements,0.97074911,modENCODE,0.994001627,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/19/2011 +29284660,http://epic.gs.washington.edu/modERN,"The ModERN Resource: Genome-Wide Binding Profiles for Hundreds of Drosophila and Caenorhabditis elegans Transcription Factors. To develop a catalog of regulatory sites in two major model organisms, Drosophila melanogaster and Caenorhabditis elegans, the modERN (model organism Encyclopedia of Regulatory Networks) consortium has systematically assayed the binding sites of transcription factors (TFs). Combined with data produced by our predecessor, modENCODE (Model Organism ENCyclopedia Of DNA Elements), we now have data for 262 TFs identifying 1.23 M sites in the fly genome and 217 TFs identifying 0.67 M sites in the worm genome. Because sites from different TFs are often overlapping and tightly clustered, they fall into 91,011 and 59,150 regions in the fly and worm, respectively, and these binding sites span as little as 8.7 and 5.8 Mb in the two organisms. Clusters with large numbers of sites (so-called high occupancy target, or HOT regions) predominantly associate with broadly expressed genes, whereas clusters containing sites from just a few factors are associated with genes expressed in tissue-specific patterns. All of the strains expressing GFP-tagged TFs are available at the stock centers, and the chromatin immunoprecipitation sequencing data are available through the ENCODE Data Coordinating Center and also through a simple interface (http://epic.gs.washington.edu/modERN/) that facilitates rapid accessibility of processed data sets. These data will facilitate a vast number of scientific inquiries into the function of individual TFs in key developmental, metabolic, and defense and homeostatic regulatory pathways, as well as provide a broader perspective on how individual TFs work together in local networks and globally across the life spans of these two key model organisms.",ModERN,0.978945613,Model Organism ENCyclopedia,0.767016259,ModERN,0.978945613,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/28/2017 +29618898,"http://cib.res.in, http://cib.res.in","An Integrated Molecular Database on Indian Insects. MOlecular Database on Indian Insects (MODII) is an online database linking several databases like Insect Pest Info, Insect Barcode Information System (IBIn), Insect Whole Genome sequence, Other Genomic Resources of National Bureau of Agricultural Insect Resources (NBAIR), Whole Genome sequencing of Honey bee viruses, Insecticide resistance gene database and Genomic tools. This database was developed with a holistic approach for collecting information about phenomic and genomic information of agriculturally important insects. This insect resource database is available online for free at http://cib.res.in. Availability http://cib.res.in/.",MODII,0.976052999,MOlecular Database on Indian Insects,0.967991948,MODII,0.976052999,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/28/2018 +22080565,http://intermine.modencode.org,"modMine: flexible access to modENCODE data. In an effort to comprehensively characterize the functional elements within the genomes of the important model organisms Drosophila melanogaster and Caenorhabditis elegans, the NHGRI model organism Encyclopaedia of DNA Elements (modENCODE) consortium has generated an enormous library of genomic data along with detailed, structured information on all aspects of the experiments. The modMine database (http://intermine.modencode.org) described here has been built by the modENCODE Data Coordination Center to allow the broader research community to (i) search for and download data sets of interest among the thousands generated by modENCODE; (ii) access the data in an integrated form together with non-modENCODE data sets; and (iii) facilitate fine-grained analysis of the above data. The sophisticated search features are possible because of the collection of extensive experimental metadata by the consortium. Interfaces are provided to allow both biologists and bioinformaticians to exploit these rich modENCODE data sets now available via modMine.",modMine,0.931703269,NA,0,modMine,0.931703269,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/12/2011 +"23118484, 29106616",http://modomics.genesilico.pl,"MODOMICS: a database of RNA modification pathways--2013 update. MODOMICS is a database of RNA modifications that provides comprehensive information concerning the chemical structures of modified ribonucleosides, their biosynthetic pathways, RNA-modifying enzymes and location of modified residues in RNA sequences. In the current database version, accessible at http://modomics.genesilico.pl, we included new features: a census of human and yeast snoRNAs involved in RNA-guided RNA modification, a new section covering the 5'-end capping process, and a catalogue of 'building blocks' for chemical synthesis of a large variety of modified nucleosides. The MODOMICS collections of RNA modifications, RNA-modifying enzymes and modified RNAs have been also updated. A number of newly identified modified ribonucleosides and more than one hundred functionally and structurally characterized proteins from various organisms have been added. In the RNA sequences section, snRNAs and snoRNAs with experimentally mapped modified nucleosides have been added and the current collection of rRNA and tRNA sequences has been substantially enlarged. To facilitate literature searches, each record in MODOMICS has been cross-referenced to other databases and to selected key publications. New options for database searching and querying have been implemented, including a BLAST search of protein sequences and a PARALIGN search of the collected nucleic acid sequences.",MODOMICS,0.997817814,NA,0,MODOMICS,0.997817814,2,NA,33835459,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +33835459,http://www.genesilico.pl/modomics,"MODOMICS: An Operational Guide to the Use of the RNA Modification Pathways Database. MODOMICS is an established database of RNA modifications that provides comprehensive information concerning chemical structures of modified ribonucleosides, their biosynthetic pathways, the location of modified residues in RNA sequences, and RNA-modifying enzymes. This chapter covers the resources available on MODOMICS web server and the basic steps that can be undertaken by the user to explore them. MODOMICS is available at http://www.genesilico.pl/modomics .",MODOMICS,0.996208847,Modification Pathways Database,0.882010448,MODOMICS,0.996208847,1,NA,"23118484.0, 29106616.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +34510194,http://modb.ytu.edu.cn,"MODB: a comprehensive mitochondrial genome database for Mollusca. . Mollusca is the largest marine phylum, comprising about 23% of all named marine organisms, Mollusca systematics are still in flux, and an increase in human activities has affected Molluscan reproduction and development, strongly impacting diversity and classification. Therefore, it is necessary to explore the mitochondrial genome of Mollusca. The Mollusca mitochondrial database (MODB) was established for the Life and Health Big Data Center of Yantai University. This database is dedicated to collecting, sorting and sharing basic information regarding mollusks, especially their mitochondrial genome information. We also integrated a series of analysis and visualization tools, such as BLAST, MUSCLE, GENEWISE and LASTZ. In particular, a phylogenetic tree was implemented in this database to visualize the evolutionary relationships between species. The original version contains 616 species whose mitochondrial genomes have been sequenced. The database provides comprehensive information and analysis platform for researchers interested in understanding the biological characteristics of mollusks. Database URL: http://modb.ytu.edu.cn/.",MODB,0.971826156,Mollusca mitochondrial database,0.986387879,Mollusca mitochondrial database,0.986387879,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2021 +33219670,http://mgbase.qnlm.ac,"MolluscDB: an integrated functional and evolutionary genomics database for the hyper-diverse animal phylum Mollusca. Mollusca represents the second largest animal phylum but remains poorly explored from a genomic perspective. While the recent increase in genomic resources holds great promise for a deep understanding of molluscan biology and evolution, access and utilization of these resources still pose a challenge. Here, we present the first comprehensive molluscan genomics database, MolluscDB (http://mgbase.qnlm.ac), which compiles and integrates current molluscan genomic/transcriptomic resources and provides convenient tools for multi-level integrative and comparative genomic analyses. MolluscDB enables a systematic view of genomic information from various aspects, such as genome assembly statistics, genome phylogenies, fossil records, gene information, expression profiles, gene families, transcription factors, transposable elements and mitogenome organization information. Moreover, MolluscDB offers valuable customized datasets or resources, such as gene coexpression networks across various developmental stages and adult tissues/organs, core gene repertoires inferred for major molluscan lineages, and macrosynteny analysis for chromosomal evolution. MolluscDB presents an integrative and comprehensive genomics platform that will allow the molluscan community to cope with ever-growing genomic resources and will expedite new scientific discoveries for understanding molluscan biology and evolution.",MolluscDB,0.998527646,NA,0,MolluscDB,0.998527646,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +23143105,http://monarchbase.umassmed.edu,"MonarchBase: the monarch butterfly genome database. The monarch butterfly (Danaus plexippus) is emerging as a model organism to study the mechanisms of circadian clocks and animal navigation, and the genetic underpinnings of long-distance migration. The initial assembly of the monarch genome was released in 2011, and the biological interpretation of the genome focused on the butterfly's migration biology. To make the extensive data associated with the genome accessible to the general biological and lepidopteran communities, we established MonarchBase (available at http://monarchbase.umassmed.edu). The database is an open-access, web-available portal that integrates all available data associated with the monarch butterfly genome. Moreover, MonarchBase provides access to an updated version of genome assembly (v3) upon which all data integration is based. These include genes with systematic annotation, as well as other molecular resources, such as brain expressed sequence tags, migration expression profiles and microRNAs. MonarchBase utilizes a variety of retrieving methods to access data conveniently and for integrating biological interpretations.",MonarchBase,0.996606827,NA,0,MonarchBase,0.996606827,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/9/2012 +21880229,http://www.kovlerdiabetescenter.org/registry,"Creation of the Web-based University of Chicago Monogenic Diabetes Registry: using technology to facilitate longitudinal study of rare subtypes of diabetes. Background Monogenic diabetes is a group of disorders caused by mutations in any one of a number of genes. Although a monogenic diagnosis--estimated to represent as much as 2% of all diabetes patients--can have a transformational impact on treatment, the majority of monogenic cases remain unidentified and little is known about their natural history. We thus created the first United States Monogenic Diabetes Registry (http://www.kovlerdiabetescenter.org/registry/) for individuals with either neonatal diabetes diagnosed before 1 year of age or with a phenotype suggestive of maturity-onset diabetes of the young. Methods Inclusion criteria and consent documents are viewable on our Web site, which allows secure collection of contact information to facilitate telephone consent and enrollment. Relevant medical, family, and historical data are collected longitudinally from a variety of sources and stored in our Web-accessible secure database. Results We have enrolled well over 700 subjects in the registry so far, with steady recruitment of those diagnosed under 1 year of age and increasing enrollment of those diagnosed later in life. Initially, participants were mostly self-referred but are increasingly being referred by their physicians. Comprehensive survey and medical records data are collected at enrollment, with ongoing collection of longitudinal data. Associated private Facebook and email discussion groups that we established have already fostered active participation. Conclusions Our early success with the Monogenic Diabetes Registry demonstrates the effectiveness of low-cost Web-based tools, including surveys, the Research Electronic Data Capture database program, and discussion groups, for efficient enrollment and support of rare patients, and collection and maintenance of their data.",NA,0,Monogenic Diabetes Registry,0.641969562,Monogenic Diabetes Registry,0.641969562,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2011 +33909069,http://www.bio.iitb.ac.in/mbpd,"Monosaccharide biosynthesis pathways database. A distinctive feature of glycans vis-à-vis proteins and nucleic acids is its structural complexity, which arises from the huge repertoire of monosaccharides, isomeric linkages and branching. A very large number of monosaccharides have so far been discovered in natural glycans. Experimentally, pathways for the biosynthesis have been characterized completely for 55 monosaccharides and partially for a few more. However, there is no single platform, which provides information about monosaccharide biosynthesis pathways and associated enzymes We have gathered 572 experimentally characterized enzymes of 66 biosynthesis pathways from literature and set up a first of its kind database called the Monosaccharide Biosynthesis Pathways Database http://www.bio.iitb.ac.in/mbpd/). Annotations such as the reaction catalyzed, substrate specificity, biosynthesis pathway and PubMed IDs are provided for all the enzymes in the database. Sequence homologs of the experimentally characterized enzymes found in nearly 13,000 completely sequenced genomes from Bacteria and Archaea have also been included in the database. This platform will help in the deduction of evolutionary relationships among enzymes such as aminotransferases, nucleotidyltransferases, acetyltransferases and SDR family enzymes. It can also facilitate experimental studies such as direct enzyme assays to validate putative annotations, establish structure-function relationship, expression profiling to determine the function, determine the phenotypic consequences of gene knock-out/knock-in and complementation studies.",NA,0,Monosaccharide Biosynthesis Pathways Database,0.936623167,Monosaccharide Biosynthesis Pathways Database,0.936623167,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2021 +30944327,http://www.ebi.ac.uk/metabolights/MTBLS142,"Collected mass spectrometry data on monoterpene indole alkaloids from natural product chemistry research. This Data Descriptor announces the submission to public repositories of the monoterpene indole alkaloid database (MIADB), a cumulative collection of 172 tandem mass spectrometry (MS/MS) spectra from multiple research projects conducted in eight natural product chemistry laboratories since the 1960s. All data have been annotated and organized to promote reuse by the community. Being a unique collection of these complex natural products, these data can be used to guide the dereplication and targeting of new related monoterpene indole alkaloids within complex mixtures when applying computer-based approaches, such as molecular networking. Each spectrum has its own accession number from CCMSLIB00004679916 to CCMSLIB00004680087 on the GNPS. The MIADB is available for download from MetaboLights under the identifier: MTBLS142 ( https://www.ebi.ac.uk/metabolights/MTBLS142 ).",MIADB,0.96922636,monoterpene indole alkaloid database,0.976412416,monoterpene indole alkaloid database,0.976412416,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/3/2019 +30371819,http://moondb.hb.univ-amu.fr,"MoonDB 2.0: an updated database of extreme multifunctional and moonlighting proteins. MoonDB 2.0 (http://moondb.hb.univ-amu.fr/) is a database of predicted and manually curated extreme multifunctional (EMF) and moonlighting proteins, i.e. proteins that perform multiple unrelated functions. We have previously shown that such proteins can be predicted through the analysis of their molecular interaction subnetworks, their functional annotations and their association to distinct groups of proteins that are involved in unrelated functions. In MoonDB 2.0, we updated the set of human EMF proteins (238 proteins), using the latest functional annotations and protein-protein interaction networks. Furthermore, for the first time, we applied our method to four additional model organisms - mouse, fly, worm and yeast - and identified 54 novel EMF proteins in these species. In addition to novel predictions, this update contains 63 human and yeast proteins that were manually curated from literature, including descriptions of moonlighting functions and associated references. Importantly, MoonDB's interface was fully redesigned and improved, and its entries are now cross-referenced in the UniProt Knowledgebase (UniProtKB). MoonDB will be updated once a year with the novel EMF candidates calculated from the latest available protein interactions and functional annotations.",MoonDB,0.993003726,NA,0,MoonDB,0.993003726,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +25324305,http://www.moonlightingproteins.org,"MoonProt: a database for proteins that are known to moonlight. Moonlighting proteins comprise a class of multifunctional proteins in which a single polypeptide chain performs multiple biochemical functions that are not due to gene fusions, multiple RNA splice variants or pleiotropic effects. The known moonlighting proteins perform a variety of diverse functions in many different cell types and species, and information about their structures and functions is scattered in many publications. We have constructed the manually curated, searchable, internet-based MoonProt Database (http://www.moonlightingproteins.org) with information about the over 200 proteins that have been experimentally verified to be moonlighting proteins. The availability of this organized information provides a more complete picture of what is currently known about moonlighting proteins. The database will also aid researchers in other fields, including determining the functions of genes identified in genome sequencing projects, interpreting data from proteomics projects and annotating protein sequence and structural databases. In addition, information about the structures and functions of moonlighting proteins can be helpful in understanding how novel protein functional sites evolved on an ancient protein scaffold, which can also help in the design of proteins with novel functions.",MoonProt,0.995451093,NA,0,MoonProt,0.995451093,1,NA,"29126295.0, 33245761.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/16/2014 +"29126295, 33245761",http://moonlightingproteins.org,"MoonProt 2.0: an expansion and update of the moonlighting proteins database. MoonProt 2.0 (http://moonlightingproteins.org) is an updated, comprehensive and open-access database storing expert-curated annotations for moonlighting proteins. Moonlighting proteins contain two or more physiologically relevant distinct functions performed by a single polypeptide chain. Here, we describe developments in the MoonProt website and database since our previous report in the Database Issue of Nucleic Acids Research. For this V 2.0 release, we expanded the number of proteins annotated to 370 and modified several dozen protein annotations with additional or updated information, including more links to protein structures in the Protein Data Bank, compared with the previous release. The new entries include more examples from humans and several model organisms, more proteins involved in disease, and proteins with different combinations of functions. The updated web interface includes a search function using BLAST to enable users to search the database for proteins that share amino acid sequence similarity with a protein of interest. The updated website also includes additional background information about moonlighting proteins and an expanded list of links to published articles about moonlighting proteins.",MoonProt,0.958251595,NA,0,MoonProt,0.958251595,2,NA,25324305,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +"24350770, 25404128",http://moped.proteinspire.org,"MOPED enables discoveries through consistently processed proteomics data. The Model Organism Protein Expression Database (MOPED, http://moped.proteinspire.org) is an expanding proteomics resource to enable biological and biomedical discoveries. MOPED aggregates simple, standardized and consistently processed summaries of protein expression and metadata from proteomics (mass spectrometry) experiments from human and model organisms (mouse, worm, and yeast). The latest version of MOPED adds new estimates of protein abundance and concentration as well as relative (differential) expression data. MOPED provides a new updated query interface that allows users to explore information by organism, tissue, localization, condition, experiment, or keyword. MOPED supports the Human Proteome Project's efforts to generate chromosome- and diseases-specific proteomes by providing links from proteins to chromosome and disease information as well as many complementary resources. MOPED supports a new omics metadata checklist to harmonize data integration, analysis, and use. MOPED's development is driven by the user community, which spans 90 countries and guides future development that will transform MOPED into a multiomics resource. MOPED encourages users to submit data in a simple format. They can use the metadata checklist to generate a data publication for this submission. As a result, MOPED will provide even greater insights into complex biological processes and systems and enable deeper and more comprehensive biological and biomedical discoveries.",MOPED,0.996681213,Model Organism Protein Expression Database,0.981790811,MOPED,0.996681213,2,24910945,24910945,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/17/2014 +24910945,"http://moped.proteinspire.org, http://delsaglobal.org","MOPED 2.5--an integrated multi-omics resource: multi-omics profiling expression database now includes transcriptomics data. Multi-omics data-driven scientific discovery crucially rests on high-throughput technologies and data sharing. Currently, data are scattered across single omics repositories, stored in varying raw and processed formats, and are often accompanied by limited or no metadata. The Multi-Omics Profiling Expression Database (MOPED, http://moped.proteinspire.org ) version 2.5 is a freely accessible multi-omics expression database. Continual improvement and expansion of MOPED is driven by feedback from the Life Sciences Community. In order to meet the emergent need for an integrated multi-omics data resource, MOPED 2.5 now includes gene relative expression data in addition to protein absolute and relative expression data from over 250 large-scale experiments. To facilitate accurate integration of experiments and increase reproducibility, MOPED provides extensive metadata through the Data-Enabled Life Sciences Alliance (DELSA Global, http://delsaglobal.org ) metadata checklist. MOPED 2.5 has greatly increased the number of proteomics absolute and relative expression records to over 500,000, in addition to adding more than four million transcriptomics relative expression records. MOPED has an intuitive user interface with tabs for querying different types of omics expression data and new tools for data visualization. Summary information including expression data, pathway mappings, and direct connection between proteins and genes can be viewed on Protein and Gene Details pages. These connections in MOPED provide a context for multi-omics expression data exploration. Researchers are encouraged to submit omics data which will be consistently processed into expression summaries. MOPED as a multi-omics data resource is a pivotal public database, interdisciplinary knowledge resource, and platform for multi-omics understanding.",MOPED,0.971850872,Multi-Omics Profiling Expression Database,0.936902801,MOPED,0.971850872,1,"24350770.0, 25404128.0","24350770.0, 25404128.0",low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,6/1/2014 +30858555,http://morcvd.sblab-nsit.net/About,"MorCVD: A Unified Database for Host-Pathogen Protein-Protein Interactions of Cardiovascular Diseases Related to Microbes. Microbe induced cardiovascular diseases (CVDs) are less studied at present. Host-pathogen interactions (HPIs) between human proteins and microbial proteins associated with CVD can be found dispersed in existing molecular interaction databases. MorCVD database is a curated resource that combines 23,377 protein interactions between human host and 432 unique pathogens involved in CVDs in a single intuitive web application. It covers endocarditis, myocarditis, pericarditis and 16 other microbe induced CVDs. The HPI information has been compiled, curated, and presented in a freely accessible web interface ( http://morcvd.sblab-nsit.net/About ). Apart from organization, enrichment of the HPI data was done by adding hyperlinked protein ID, PubMed, gene ontology records. For each protein in the database, drug target and interactors (same as well as different species) information has been provided. The database can be searched by disease, protein ID, pathogen name or interaction detection method. Interactions detected by more than one method can also be listed. The information can be presented in tabular form or downloaded. A comprehensive help file has been developed to explain the various options available. Hence, MorCVD acts as a unified resource for retrieval of HPI data for researchers in CVD and microbiology.",MorCVD,0.99740231,NA,0,MorCVD,0.99740231,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/11/2019 +24923822,http://morus.swu.edu.cn/morusdb,"MorusDB: a resource for mulberry genomics and genome biology. . Mulberry is an important cultivated plant that has received the attention of biologists interested in sericulture and plant-insect interaction. Morus notabilis, a wild mulberry species with a minimal chromosome number is an ideal material for whole-genome sequencing and assembly. The genome and transcriptome of M. notabilis were sequenced and analyzed. In this article, a web-based and open-access database, the Morus Genome Database (MorusDB), was developed to enable easy-to-access and data mining. The MorusDB provides an integrated data source and an easy accession of mulberry large-scale genomic sequencing and assembly, predicted genes and functional annotations, expressed sequence tags (ESTs), transposable elements (TEs), Gene Ontology (GO) terms, horizontal gene transfers between mulberry and silkworm and ortholog and paralog groups. Transcriptome sequencing data for M. notabilis root, leaf, bark, winter bud and male flower can also be searched and downloaded. Furthermore, MorusDB provides an analytical workbench with some built-in tools and pipelines, such as BLAST, Search GO, Mulberry GO and Mulberry GBrowse, to facilitate genomic studies and comparative genomics. The MorusDB provides important genomic resources for scientists working with mulberry and other Moraceae species, which include many important fruit crops. Designed as a basic platform and accompanied by the SilkDB, MorusDB strives to be a comprehensive platform for the silkworm-mulberry interaction studies. Database URL: http://morus.swu.edu.cn/morusdb.",MorusDB,0.993190169,Morus Genome Database,0.793358922,MorusDB,0.993190169,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/11/2014 +29206899,http://mosaic.cs.umn.edu,"MOSAIC: a chemical-genetic interaction data repository and web resource for exploring chemical modes of action. Summary:Chemical-genomic approaches that map interactions between small molecules and genetic perturbations offer a promising strategy for functional annotation of uncharacterized bioactive compounds. We recently developed a new high-throughput platform for mapping chemical-genetic (CG) interactions in yeast that can be scaled to screen large compound collections, and we applied this system to generate CG interaction profiles for more than 13 000 compounds. When integrated with the existing global yeast genetic interaction network, CG interaction profiles can enable mode-of-action prediction for previously uncharacterized compounds as well as discover unexpected secondary effects for known drugs. To facilitate future analysis of these valuable data, we developed a public database and web interface named MOSAIC. The website provides a convenient interface for querying compounds, bioprocesses (Gene Ontology terms) and genes for CG information including direct CG interactions, bioprocesses and gene-level target predictions. MOSAIC also provides access to chemical structure information of screened molecules, chemical-genomic profiles and the ability to search for compounds sharing structural and functional similarity. This resource will be of interest to chemical biologists for discovering new small molecule probes with specific modes-of-action as well as computational biologists interested in analysing CG interaction networks. Availability and implementation:MOSAIC is available at http://mosaic.cs.umn.edu. Contact:hisyo@riken.jp, yoshidam@riken.jp, charlie.boone@utoronto.ca or chadm@umn.edu. Supplementary information:Supplementary data are available at Bioinformatics online.",MOSAIC,0.996641636,NA,0,MOSAIC,0.996641636,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2018 +32911083,"http://mosaicbase.com/, http://49.4.21.8:8000","MosaicBase: A Knowledgebase of Postzygotic Mosaic Variants in Noncancer Disease-related and Healthy Human Individuals. Mosaic variants resulting from postzygotic mutations are prevalent in the human genome and play important roles in human diseases. However, except for cancer-related variants, there is no collection of postzygotic mosaic variants in noncancer disease-related and healthy individuals. Here, we present MosaicBase, a comprehensive database that includes 6698 mosaic variants related to 266 noncancer diseases and 27,991 mosaic variants identified in 422 healthy individuals. Genomic and phenotypic information of each variant was manually extracted and curated from 383 publications. MosaicBase supports the query of variants with Online Mendelian Inheritance in Man (OMIM) entries, genomic coordinates, gene symbols, or Entrez IDs. We also provide an integrated genome browser for users to easily access mosaic variants and their related annotations for any genomic region. By analyzing the variants collected in MosaicBase, we find that mosaic variants that directly contribute to disease phenotype show features distinct from those of variants in individuals with mild or no phenotypes, in terms of their genomic distribution, mutation signatures, and fraction of mutant cells. MosaicBase will not only assist clinicians in genetic counseling and diagnosis but also provide a useful resource to understand the genomic baseline of postzygotic mutations in the general human population. MosaicBase is publicly available at http://mosaicbase.com/ or http://49.4.21.8:8000.",MosaicBase,0.997231185,NA,0,MosaicBase,0.997231185,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2020 +23970545,http://rna.bgsu.edu/motifs,"Automated classification of RNA 3D motifs and the RNA 3D Motif Atlas. The analysis of atomic-resolution RNA three-dimensional (3D) structures reveals that many internal and hairpin loops are modular, recurrent, and structured by conserved non-Watson-Crick base pairs. Structurally similar loops define RNA 3D motifs that are conserved in homologous RNA molecules, but can also occur at nonhomologous sites in diverse RNAs, and which often vary in sequence. To further our understanding of RNA motif structure and sequence variability and to provide a useful resource for structure modeling and prediction, we present a new method for automated classification of internal and hairpin loop RNA 3D motifs and a new online database called the RNA 3D Motif Atlas. To classify the motif instances, a representative set of internal and hairpin loops is automatically extracted from a nonredundant list of RNA-containing PDB files. Their structures are compared geometrically, all-against-all, using the FR3D program suite. The loops are clustered into motif groups, taking into account geometric similarity and structural annotations and making allowance for a variable number of bulged bases. The automated procedure that we have implemented identifies all hairpin and internal loop motifs previously described in the literature. All motif instances and motif groups are assigned unique and stable identifiers and are made available in the RNA 3D Motif Atlas (http://rna.bgsu.edu/motifs), which is automatically updated every four weeks. The RNA 3D Motif Atlas provides an interactive user interface for exploring motif diversity and tools for programmatic data access.",Motif,0.707507372,NA,0,Motif,0.707507372,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,8/22/2013 +25145340,http://mouseidgenes.helmholtz-muenchen.de,"Mouse IDGenes: a reference database for genetic interactions in the developing mouse brain. . The study of developmental processes in the mouse and other vertebrates includes the understanding of patterning along the anterior-posterior, dorsal-ventral and medial- lateral axis. Specifically, neural development is also of great clinical relevance because several human neuropsychiatric disorders such as schizophrenia, autism disorders or drug addiction and also brain malformations are thought to have neurodevelopmental origins, i.e. pathogenesis initiates during childhood and adolescence. Impacts during early neurodevelopment might also predispose to late-onset neurodegenerative disorders, such as Parkinson's disease. The neural tube develops from its precursor tissue, the neural plate, in a patterning process that is determined by compartmentalization into morphogenetic units, the action of local signaling centers and a well-defined and locally restricted expression of genes and their interactions. While public databases provide gene expression data with spatio-temporal resolution, they usually neglect the genetic interactions that govern neural development. Here, we introduce Mouse IDGenes, a reference database for genetic interactions in the developing mouse brain. The database is highly curated and offers detailed information about gene expressions and the genetic interactions at the developing mid-/hindbrain boundary. To showcase the predictive power of interaction data, we infer new Wnt/β-catenin target genes by machine learning and validate one of them experimentally. The database is updated regularly. Moreover, it can easily be extended by the research community. Mouse IDGenes will contribute as an important resource to the research on mouse brain development, not exclusively by offering data retrieval, but also by allowing data input. http://mouseidgenes.helmholtz-muenchen.de.",Mouse IDGenes,0.925365269,NA,0,Mouse IDGenes,0.925365269,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/20/2014 +31559753,http://mouseliver.com,"[Mouse liver proteome database]. The liver is the metabolic center of mammalian body. Systematic study on liver's proteome expression under different physiological and pathological conditions helps us understand the functional mechanisms of the liver. With the rapid development of liquid chromatography tandem mass spectrometry technique, numerous studies on liver physiology and pathology features produced a large number of proteomics data. In this paper, 834 proteomics experiments of mouse liver were systematically collected and the mouse liver proteome database (Mouse Liver Portal, http://mouseliver.com) was established. The Mouse Liver Portal contains the liver's proteomics data under different physiology and pathology conditions, such as different gender, age, circadian rhythm, cell type and different phase of partial hepatectomy, non-alcoholic fatty liver. This portal provides the changes in proteins' expression in different conditions of the liver, differently expressed proteins and the biological processes which they are involved in, potential signal transduction and regulatory networks. As the most comprehensive mouse liver proteome database, it can provide important resources and clues for liver biology research.",Mouse Liver Portal,0.730381191,NA,0,Mouse Liver Portal,0.730381191,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2019 +31825307,http://touchscreencognition.org,"MouseBytes, an open-access high-throughput pipeline and database for rodent touchscreen-based cognitive assessment. . Open Science has changed research by making data accessible and shareable, contributing to replicability to accelerate and disseminate knowledge. However, for rodent cognitive studies the availability of tools to share and disseminate data is scarce. Automated touchscreen-based tests enable systematic cognitive assessment with easily standardised outputs that can facilitate data dissemination. Here we present an integration of touchscreen cognitive testing with an open-access database public repository (mousebytes.ca), as well as a Web platform for knowledge dissemination (https://touchscreencognition.org). We complement these resources with the largest dataset of age-dependent high-level cognitive assessment of mouse models of Alzheimer's disease, expanding knowledge of affected cognitive domains from male and female mice of three strains. We envision that these new platforms will enhance sharing of protocols, data availability and transparency, allowing meta-analysis and reuse of mouse cognitive data to increase the replicability/reproducibility of datasets.",MouseBytes,0.869805098,NA,0,MouseBytes,0.869805098,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/11/2019 +26527726,http://www.inetbio.org/mousenet,"MouseNet v2: a database of gene networks for studying the laboratory mouse and eight other model vertebrates. Laboratory mouse, Mus musculus, is one of the most important animal tools in biomedical research. Functional characterization of the mouse genes, hence, has been a long-standing goal in mammalian and human genetics. Although large-scale knockout phenotyping is under progress by international collaborative efforts, a large portion of mouse genome is still poorly characterized for cellular functions and associations with disease phenotypes. A genome-scale functional network of mouse genes, MouseNet, was previously developed in context of MouseFunc competition, which allowed only limited input data for network inferences. Here, we present an improved mouse co-functional network, MouseNet v2 (available at http://www.inetbio.org/mousenet), which covers 17 714 genes (>88% of coding genome) with 788 080 links, along with a companion web server for network-assisted functional hypothesis generation. The network database has been substantially improved by large expansion of genomics data. For example, MouseNet v2 database contains 183 co-expression networks inferred from 8154 public microarray samples. We demonstrated that MouseNet v2 is predictive for mammalian phenotypes as well as human diseases, which suggests its usefulness in discovery of novel disease genes and dissection of disease pathways. Furthermore, MouseNet v2 database provides functional networks for eight other vertebrate models used in various research fields.",MouseNet,0.958441854,NA,0,MouseNet,0.958441854,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/2/2015 +24194596,http://proteinformatics.charite.de/mppd,"MP:PD--a data base of internal packing densities, internal packing defects and internal waters of helical membrane proteins. The membrane protein packing database (MP:PD) (http://proteinformatics.charite.de/mppd) is a database of helical membrane proteins featuring internal atomic packing densities, cavities and waters. Membrane proteins are not tightly packed but contain a considerable number of internal cavities that differ in volume, polarity and solvent accessibility as well as in their filling with internal water. Internal cavities are supposed to be regions of high physical compressibility. By serving as mobile hydrogen bonding donors or acceptors, internal waters likely facilitate transition between different functional states. Despite these distinct functional roles, internal cavities of helical membrane proteins are not well characterized, mainly because most internal waters are not resolved by crystal structure analysis. Here we combined various computational biophysical techniques to characterize internal cavities, reassign positions of internal waters and calculate internal packing densities of all available helical membrane protein structures and stored them in MP:PD. The database can be searched using keywords and entries can be downloaded. Each entry can be visualized in Provi, a Jmol-based protein viewer that provides an integrated display of low energy waters alongside membrane planes, internal packing density, hydrophobic cavities and hydrogen bonds.",MP:PD,0.98293969,membrane protein packing database,0.938239947,MP:PD,0.98293969,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/4/2013 +29697364,http://tanlab.ucdenver.edu/IMPACT,"IMPACT web portal: oncology database integrating molecular profiles with actionable therapeutics. BACKGROUND:With the advancement of next generation sequencing technology, researchers are now able to identify important variants and structural changes in DNA and RNA in cancer patient samples. With this information, we can now correlate specific variants and/or structural changes with actionable therapeutics known to inhibit these variants. We introduce the creation of the IMPACT Web Portal, a new online resource that connects molecular profiles of tumors to approved drugs, investigational therapeutics and pharmacogenetics associated drugs. RESULTS:IMPACT Web Portal contains a total of 776 drugs connected to 1326 target genes and 435 target variants, fusion, and copy number alterations. The online IMPACT Web Portal allows users to search for various genetic alterations and connects them to three levels of actionable therapeutics. The results are categorized into 3 levels: Level 1 contains approved drugs separated into two groups; Level 1A contains approved drugs with variant specific information while Level 1B contains approved drugs with gene level information. Level 2 contains drugs currently in oncology clinical trials. Level 3 provides pharmacogenetic associations between approved drugs and genes. CONCLUSION:IMPACT Web Portal allows for sequencing data to be linked to actionable therapeutics for translational and drug repurposing research. The IMPACT Web Portal online resource allows users to query genes and variants to approved and investigational drugs. We envision that this resource will be a valuable database for personalized medicine and drug repurposing. IMPACT Web Portal is freely available for non-commercial use at http://tanlab.ucdenver.edu/IMPACT .",MPACT,0.66159296,NA,0,MPACT,0.66159296,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,4/20/2018 +"22102583, 29136208, 31696236",http://phenome.jax.org,"Mouse Phenome Database (MPD). The Mouse Phenome Project was launched a decade ago to complement mouse genome sequencing efforts by promoting new phenotyping initiatives under standardized conditions and collecting the data in a central public database, the Mouse Phenome Database (MPD; http://phenome.jax.org). MPD houses a wealth of strain characteristics data to facilitate the use of the laboratory mouse in translational research for human health and disease, helping alleviate problems involving experimentation in humans that cannot be done practically or ethically. Data sets are voluntarily contributed by researchers from a variety of institutions and settings, or in some cases, retrieved by MPD staff from public sources. MPD maintains a growing collection of standardized reference data that assists investigators in selecting mouse strains for research applications; houses treatment/control data for drug studies and other interventions; offers a standardized platform for discovering genotype-phenotype relationships; and provides tools for hypothesis testing. MPD improvements and updates since our last NAR report are presented, including the addition of new tools and features to facilitate navigation and data mining as well as the acquisition of new data (phenotypic, genotypic and gene expression).",MPD,0.996881326,Mouse Phenome Database,0.988143757,MPD,0.996881326,3,NA,29917040,NA,NA,NA,do not merge,NA,NA,NA,1/1/2020 +29917040,http://data.mypathogen.org,"MPD: a pathogen genome and metagenome database. . Advances in high-throughput sequencing have led to unprecedented growth in the amount of available genome sequencing data, especially for bacterial genomes, which has been accompanied by a challenge for the storage and management of such huge datasets. To facilitate bacterial research and related studies, we have developed the Mypathogen database (MPD), which provides access to users for searching, downloading, storing and sharing bacterial genomics data. The MPD represents the first pathogenic database for microbial genomes and metagenomes, and currently covers pathogenic microbial genomes (6604 genera, 11 071 species, 41 906 strains) and metagenomic data from host, air, water and other sources (28 816 samples). The MPD also functions as a management system for statistical and storage data that can be used by different organizations, thereby facilitating data sharing among different organizations and research groups. A user-friendly local client tool is provided to maintain the steady transmission of big sequencing data. The MPD is a useful tool for analysis and management in genomic research, especially for clinical Centers for Disease Control and epidemiological studies, and is expected to contribute to advancing knowledge on pathogenic bacteria genomes and metagenomes.Database URL: http://data.mypathogen.org.",MPD,0.986540794,pathogen,0.605002284,MPD,0.986540794,1,NA,"22102583.0, 29136208.0, 31696236.0",NA,NA,NA,do not merge,NA,NA,NA,1/1/2018 +27681445,http://bioinform.info,"MPD3: a useful medicinal plants database for drug designing. Medicinal plants are the main natural pools for the discovery and development of new drugs. In the modern era of computer-aided drug designing (CADD), there is need of prompt efforts to design and construct useful database management system that allows proper data storage, retrieval and management with user-friendly interface. An inclusive database having information about classification, activity and ready-to-dock library of medicinal plant's phytochemicals is therefore required to assist the researchers in the field of CADD. The present work was designed to merge activities of phytochemicals from medicinal plants, their targets and literature references into a single comprehensive database named as Medicinal Plants Database for Drug Designing (MPD3). The newly designed online and downloadable MPD3 contains information about more than 5000 phytochemicals from around 1000 medicinal plants with 80 different activities, more than 900 literature references and 200 plus targets. The designed database is deemed to be very useful for the researchers who are engaged in medicinal plants research, CADD and drug discovery/development with ease of operation and increased efficiency. The designed MPD3 is a comprehensive database which provides most of the information related to the medicinal plants at a single platform. MPD3 is freely available at: http://bioinform.info .",MPD3,0.968090475,Medicinal Plants Database for,0.855982542,MPD3,0.968090475,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/28/2016 +28104956,http://pranag.physics.iisc.ernet.in/mpdb,"MPDB: Molecular Pathways Brain Database. Molecular Pathways Brain Database (MPDB), is a novel database for molecular information of the brain pathways and is an initiative to provide an organized platform for researchers in the field of neuro-informatics. The database currently has information from 1850 molecules for three different sensory pathways namely olfactory transduction, photo transduction and long-term potentiation. The usefulness of the database is demonstrated by an analysis of the olfactory transduction pathway which helps understand their olfactory specifity and further indicates that some of the molecules have evolved independently among these organisms as per the need of time and function. The database is available for free at http://pranag.physics.iisc.ernet.in/mpdb/.",MPDB,0.995024717,Molecular Pathways Brain Database,0.991229546,MPDB,0.995024717,1,NA,34362451,NA,NA,NA,do not merge,NA,NA,NA,4/10/2016 +34362451,http://www.medicinalplantbd.com,"MPDB 2.0: a large scale and integrated medicinal plant database of Bangladesh. Objective MPDB 2.0 is built to be the continuation of MPDB 1.0, to serve as a more comprehensive data repertoire for Bangladeshi medicinal plants, and to provide a user-friendly interface for researchers, health practitioners, drug developers, and students who wish to study the various medicinal & nutritive plants scattered around Bangladesh and the underlying phytochemicals contributing to their efficacy in Bangladeshi folk medicine. Results MPDB 2.0 database ( https://www.medicinalplantbd.com/ ) comprises a collection of more than five hundred Bangladeshi medicinal plants, alongside a record of their corresponding scientific, family, and local names together with their utilized parts, information regarding ailments, active compounds, and PubMed ID of related publications. While medicinal plants are not limited to the borders of any country, Bangladesh and its Southeast Asian neighbors do boast a huge collection of potent medicinal plants with considerable folk-medicinal history compared to most other countries in the world. Development of MPDB 2.0 has been highly focused upon human diseases, albeit many of the plants indexed here can serve in developing biofuel (e.g.: Jatropha curcas used in biofuel) or bioremediation technologies (e.g.: Amaranthus cruentus helps to reduce cadmium level in soil) or nutritive diets (Terminalia chebula can be used in nutritive diets) or cosmetics (Aloe vera used in cosmetics), etc.",MPDB,0.990668297,NA,0,MPDB,0.990668297,1,NA,28104956,NA,NA,NA,do not merge,NA,NA,NA,8/6/2021 +30092360,"http://www.mpds-diabetes.in, http://www.way2drug.com/passonline","Molecular property diagnostic suite for diabetes mellitus (MPDSDM): An integrated web portal for drug discovery and drug repurposing. Molecular Property Diagnostic Suite - Diabetes Mellitus (MPDSDM) is a Galaxy-based, open source disease-specific web portal for diabetes. It consists of three modules namely (i) data library (ii) data processing and (iii) data analysis tools. The data library (target library and literature) module provide extensive and curated information about the genes involved in type 1 and type 2 diabetes onset and progression stage (available at http://www.mpds-diabetes.in). The database also contains information on drug targets, biomarkers, therapeutics and associated genes specific to type 1, and type 2 diabetes. A unique MPDS identification number has been assigned for each gene involved in diabetes mellitus and the corresponding card contains chromosomal data, gene information, protein UniProt ID, functional domains, druggability and related pathway information. One of the objectives of the web portal is to have an open source data repository that contains all information on diabetes and use this information for developing therapeutics to cure diabetes. We also make an attempt for computational drug repurposing for the validated diabetes targets. We performed virtual screening of 1455 FDA approved drugs on selected 20 type 1 and type 2 diabetes proteins using docking protocol and their biological activity was predicted using ""PASS Online"" server (http://www.way2drug.com/passonline) towards anti-diabetic activity, resulted in the identification of 41 drug molecules. Five drug molecules (which are earlier known for anti-malarial/microbial, anti-viral, anti-cancer, anti-pulmonary activities) were proposed to have a better repurposing potential for type 2 anti-diabetic activity and good binding affinity towards type 2 diabetes target proteins.",MPDSDM,0.995594293,Molecular property diagnostic suite for diabetes mellitus,0.949859989,MPDSDM,0.995594293,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/6/2018 +21349870,http://biolinfo.org/mpid-t2,"MPID-T2: a database for sequence-structure-function analyses of pMHC and TR/pMHC structures. Unlabelled Sequence-structure-function information is critical in understanding the mechanism of pMHC and TR/pMHC binding and recognition. A database for sequence-structure-function information on pMHC and TR/pMHC interactions, MHC-Peptide Interaction Database-TR version 2 (MPID-T2), is now available augmented with the latest PDB and IMGT/3Dstructure-DB data, advanced features and new parameters for the analysis of pMHC and TR/pMHC structures. Availability http://biolinfo.org/mpid-t2. Contact shoba.ranganathan@mq.edu.au Supplementary information Supplementary data are available at Bioinformatics online.",MPID-T2,0.942223958,MHC-Peptide Interaction Database-T,0.729910028,MPID-T2,0.942223958,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/23/2011 +33514395,http://mutanome.lerner.ccf.org,"My personal mutanome: a computational genomic medicine platform for searching network perturbing alleles linking genotype to phenotype. Massive genome sequencing data have inspired new challenges in personalized treatments and facilitated oncological drug discovery. We present a comprehensive database, My Personal Mutanome (MPM), for accelerating the development of precision cancer medicine protocols. MPM contains 490,245 mutations from over 10,800 tumor exomes across 33 cancer types in The Cancer Genome Atlas mapped to 94,563 structure-resolved/predicted protein-protein interaction interfaces (""edgetic"") and 311,022 functional sites (""nodetic""), including ligand-protein binding sites and 8 types of protein posttranslational modifications. In total, 8884 survival results and 1,271,132 drug responses are obtained for these mapped interactions. MPM is available at https://mutanome.lerner.ccf.org .",MPM,0.994437575,My Personal Mutanome,0.963305771,MPM,0.994437575,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/29/2021 +23894139,http://bioinformatics.biol.uoa.gr/mpMoRFsDB,"mpMoRFsDB: a database of molecular recognition features in membrane proteins. Summary Molecular recognition features (MoRFs) are small, intrinsically disordered regions in proteins that undergo a disorder-to-order transition on binding to their partners. MoRFs are involved in protein-protein interactions and may function as the initial step in molecular recognition. The aim of this work was to collect, organize and store all membrane proteins that contain MoRFs. Membrane proteins constitute ∼30% of fully sequenced proteomes and are responsible for a wide variety of cellular functions. MoRFs were classified according to their secondary structure, after interacting with their partners. We identified MoRFs in transmembrane and peripheral membrane proteins. The position of transmembrane protein MoRFs was determined in relation to a protein's topology. All information was stored in a publicly available mySQL database with a user-friendly web interface. A Jmol applet is integrated for visualization of the structures. mpMoRFsDB provides valuable information related to disorder-based protein-protein interactions in membrane proteins. Availability http://bioinformatics.biol.uoa.gr/mpMoRFsDB",mpMoRFsDB,0.948040187,NA,0,mpMoRFsDB,0.948040187,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/26/2013 +34156447,http://bis.zju.edu.cn/mppi,"mPPI: a database extension to visualize structural interactome in a one-to-many manner. . Protein-protein interaction (PPI) databases with structural information are useful to investigate biological functions at both systematic and atomic levels. However, most existing PPI databases only curate binary interactome. From the perspective of the display and function of PPI, as well as the structural binding interface, the related database and resources are summarized. We developed a database extension, named mPPI, for PPI structural visualization. Comparing with the existing structural interactomes that curate resolved PPI conformation in pairs, mPPI can visualize target protein and its multiple interactors simultaneously, which facilitates multi-target drug discovery and structure prediction of protein macro-complexes. By employing a protein-protein docking algorithm, mPPI largely extends the coverage of structural interactome from experimentally resolved complexes. mPPI is designed to be a customizable and convenient plugin for PPI databases. It possesses wide potential applications for various PPI databases, and it has been used for a neurodegenerative disease-related PPI database as demonstration. Scripts and implementation guidelines of mPPI are documented at the database tool website. Database URL  http://bis.zju.edu.cn/mppi/.",mPPI,0.952142298,NA,0,mPPI,0.952142298,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/1/2021 +34147352,"http://www.ncbi.nlm.nih.gov/geo/, http://www.ufrgs.br/mpsbase","MPSBase: Comprehensive repository of differentially expressed genes for mucopolysaccharidoses. Mucopolysaccharidoses (MPS) are lysosomal storage diseases (LSDs) caused by the deficiency of enzymes essential for the metabolism of extracellular matrix components called glycosaminoglycans (GAGs). To understand the physiopathology and alterations due to the lysosomal accumulation resulting from enzymatic deficiencies and their secondary outcomes can improve the diagnosis and treatment of rare genetic diseases. This work presents a database for differentially expressed genes from different public MPS data. We developed our database, including 13 studies previously deposited in the GEO (https://www.ncbi.nlm.nih.gov/geo/). The website is hosted in the UFRGS data processing center (CPD) and is available at . The site was constructed in PHP, and the analyses were performed in R. The organisms represented by the datasets are Canis lupus familiaris, Homo sapiens, Mus musculus, and Rattus norvegicus. The user can search for the differentially expressed genes and ontologies by species, MPS type, or tissue type. For each comparison, a heatmap with the 50 top differentially expressed genes is available as well as dot plots for the 30 top ontologies divided by biological process, cellular component, KEGG pathways, and molecular function. This data is also fully available in tables. There are 54 possible comparisons involving about 5000 to 10,000 genes each. This website is the only specific database for MPS with filtering and presenting their results in a one-click approach to the best of our knowledge. The development of such analytical and automated strategies accessible to health professionals is essential for fostering MPS research. The MPSBase is a web user-friendly, comprehensive repository of differentially expressed genes and ontologies regarding the MPS data.",MPSBase,0.996857762,NA,0,MPSBase,0.996857762,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/15/2021 +32337573,http://www.iitm.ac.in/bioinfo/mptherm,"MPTherm: database for membrane protein thermodynamics for understanding folding and stability. The functions of membrane proteins (MPs) are attributed to their structure and stability. Factors influencing the stability of MPs differ from globular proteins due to the presence of membrane spanning regions. Thermodynamic data of MPs aid to understand the relationship among their structure, stability and function. Although a wealth of experimental data on thermodynamics of MPs are reported in the literature, there is no database available explicitly for MPs. In this work, we have developed a database for MP thermodynamics, MPTherm, which contains more than 7000 thermodynamic data from about 320 MPs. Each entry contains protein sequence and structural information, membrane topology, experimental conditions, thermodynamic parameters such as melting temperature, free energy, enthalpy etc. and literature information. MPTherm assists users to retrieve the data by using different search and display options. We have also provided the sequence and structure visualization as well as cross-links to UniProt and PDB databases. MPTherm database is freely available at http://www.iitm.ac.in/bioinfo/mptherm/. It is implemented in HTML, PHP, MySQL and JavaScript, and supports the latest versions of major browsers, such as Firefox, Chrome and Opera. MPTherm would serve as an effective resource for understanding the stability of MPs, development of prediction tools and identifying drug targets for diseases associated with MPs.",MPTherm,0.997999251,NA,0,MPTherm,0.997999251,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +31231774,http://bioinfo.life.hust.edu.cn/mrvc,"Mr.Vc: a database of microarray and RNA-seq of Vibrio cholerae. . Gram-negative bacterium Vibrio cholerae is the causative agent of cholera, a life-threatening diarrheal disease. During its infectious cycle, V. cholerae routinely switches niches between aquatic environment and host gastrointestinal tract, in which V. cholerae modulates its transcriptome pattern accordingly for better survival and proliferation. A comprehensive resource for V. cholerae transcriptome will be helpful for cholera research, including prevention, diagnosis and intervention strategies. In this study, we constructed a microarray and RNA-seq database of V. cholerae (Mr.Vc), containing gene transcriptional expression data of 145 experimental conditions of V. cholerae from various sources, covering 25 937 entries of differentially expressed genes. In addition, we collected relevant information including gene annotation, operons they may belong to and possible interaction partners of their protein products. With Mr.Vc, users can easily find transcriptome data they are interested in, such as the experimental conditions in which a gene of interest was differentially expressed in, or all genes that were differentially expressed in an experimental condition. We believe that Mr.Vc database is a comprehensive data repository dedicated to V. cholerae and could be a useful resource for all researchers in related fields. Mr.Vc is available for free at http://bioinfo.life.hust.edu.cn/mrvc.",Mr.Vc,0.98936215,NA,0,Mr.Vc,0.98936215,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +23619930,http://www3.imperial.ac.uk/bioinfsupport/resources/software/mridb,"MRIdb: medical image management for biobank research. Clinical picture archiving and communications systems provide convenient, efficient access to digital medical images from multiple modalities but can prove challenging to deploy, configure and use. MRIdb is a self-contained image database, particularly suited to the storage and management of magnetic resonance imaging data sets for population phenotyping. It integrates a mature image archival system with an intuitive web-based user interface that provides visualisation and export functionality. In addition, utilities for auditing, data migration and system monitoring are included in a virtual machine image that is easily deployed with minimal configuration. The result is a freely available turnkey solution, designed to support epidemiological and imaging genetics research. It allows the management of patient data sets in a secure, scalable manner without requiring the installation of any bespoke software on end users' workstations. MRIdb is an open-source software, available for download at http://www3.imperial.ac.uk/bioinfsupport/resources/software/mridb .",MRIdb,0.995113969,NA,0,MRIdb,0.995113969,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2013 +33683131,http://mrmassaydb.proteincentre.com,"An Update on MRMAssayDB: A Comprehensive Resource for Targeted Proteomics Assays in the Community. Precise multiplexed quantification of proteins in biological samples can be achieved by targeted proteomics using multiple or parallel reaction monitoring (MRM/PRM). Combined with internal standards, the method achieves very good repeatability and reproducibility enabling excellent protein quantification and allowing longitudinal and cohort studies. A laborious part of performing such experiments lies in the preparation steps dedicated to the development and validation of individual protein assays. Several public repositories host information on targeted proteomics assays, including NCI's Clinical Proteomic Tumor Analysis Consortium assay portals, PeptideAtlas SRM Experiment Library, SRMAtlas, PanoramaWeb, and PeptideTracker, with all offering varying levels of details. We introduced MRMAssayDB in 2018 as an integrated resource for targeted proteomics assays. The Web-based application maps and links the assays from the repositories, includes comprehensive up-to-date protein and sequence annotations, and provides multiple visualization options on the peptide and protein level. We have extended MRMAssayDB with more assays and extensive annotations. Currently it contains >828 000 assays covering >51 000 proteins from 94 organisms, of which >17 000 proteins are present in >2400 biological pathways, and >48 000 mapping to >21 000 Gene Ontology terms. This is an increase of about four times the number of assays since introduction. We have expanded annotations of interaction, biological pathways, and disease associations. A newly added visualization module for coupled molecular structural annotation browsing allows the user to interactively examine peptide sequence and any known PTMs and disease mutations, and map all to available protein 3D structures. Because of its integrative approach, MRMAssayDB enables a holistic view of suitable proteotypic peptides and commonly used transitions in empirical data. Availability: http://mrmassaydb.proteincentre.com.",MRMAssayDB,0.997641921,NA,0,MRMAssayDB,0.997641921,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/8/2021 +27899620,http://MRPrimerV.com,"MRPrimerV: a database of PCR primers for RNA virus detection. Many infectious diseases are caused by viral infections, and in particular by RNA viruses such as MERS, Ebola and Zika. To understand viral disease, detection and identification of these viruses are essential. Although PCR is widely used for rapid virus identification due to its low cost and high sensitivity and specificity, very few online database resources have compiled PCR primers for RNA viruses. To effectively detect viruses, the MRPrimerV database (http://MRPrimerV.com) contains 152 380 247 PCR primer pairs for detection of 1818 viruses, covering 7144 coding sequences (CDSs), representing 100% of the RNA viruses in the most up-to-date NCBI RefSeq database. Due to rigorous similarity testing against all human and viral sequences, every primer in MRPrimerV is highly target-specific. Because MRPrimerV ranks CDSs by the penalty scores of their best primer, users need only use the first primer pair for a single-phase PCR or the first two primer pairs for two-phase PCR. Moreover, MRPrimerV provides the list of genome neighbors that can be detected using each primer pair, covering 22 192 variants of 532 RefSeq RNA viruses. We believe that the public availability of MRPrimerV will facilitate viral metagenomics studies aimed at evaluating the variability of viruses, as well as other scientific tasks.",MRPrimerV,0.993523359,NA,0,MRPrimerV,0.993523359,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2016 +22961451,http://mrtdd.mbc.nctu.edu.tw,"Identifying cancer highly-expressed membrane receptors for targeted drug delivery. Currently, the accompanying side effects of anti-cancer drugs owing to incorrect delivery to normal tissues should be reduced. We present a database (MRTDD) with identified cancer highly-expressed membrane receptors (CHMRs) which can be used in targeted drug delivery. To evaluate the probability of occurrence of incorrect delivery, we calculate tissue index for each CHMR and expect to identify good candidates. The information provided includes: (1) genomic annotations; (2) gene expression profiles of membrane receptors in cancer tissue vs. corresponding normal tissue, normal tissues of body and cancer cell-lines; (3) available antibody services of manufacturers. MRTDD is available at http://mrtdd.mbc.nctu.edu.tw/.",MRTDD,0.996869192,NA,0,MRTDD,0.996869192,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2012 +22449400,http://cluster.physics.iisc.ernet.in/sms,"SMS 2.0: an updated database to study the structural plasticity of short peptide fragments in non-redundant proteins. The function of a protein molecule is greatly influenced by its three-dimensional (3D) structure and therefore structure prediction will help identify its biological function. We have updated Sequence, Motif and Structure (SMS), the database of structurally rigid peptide fragments, by combining amino acid sequences and the corresponding 3D atomic coordinates of non-redundant (25%) and redundant (90%) protein chains available in the Protein Data Bank (PDB). SMS 2.0 provides information pertaining to the peptide fragments of length 5-14 residues. The entire dataset is divided into three categories, namely, same sequence motifs having similar, intermediate or dissimilar 3D structures. Further, options are provided to facilitate structural superposition using the program structural alignment of multiple proteins (STAMP) and the popular JAVA plug-in (Jmol) is deployed for visualization. In addition, functionalities are provided to search for the occurrences of the sequence motifs in other structural and sequence databases like PDB, Genome Database (GDB), Protein Information Resource (PIR) and Swiss-Prot. The updated database along with the search engine is available over the World Wide Web through the following URL http://cluster.physics.iisc.ernet.in/sms/.",MS,0.642874658,Motif and,0.628312707,MS,0.642874658,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,2/1/2012 +28854643,http://tdb.ccmb.res.in/msdb,"MSDB: A Comprehensive Database of Simple Sequence Repeats. Microsatellites, also known as Simple Sequence Repeats (SSRs), are short tandem repeats of 1-6 nt motifs present in all genomes, particularly eukaryotes. Besides their usefulness as genome markers, SSRs have been shown to perform important regulatory functions, and variations in their length at coding regions are linked to several disorders in humans. Microsatellites show a taxon-specific enrichment in eukaryotic genomes, and some may be functional. MSDB (Microsatellite Database) is a collection of >650 million SSRs from 6,893 species including Bacteria, Archaea, Fungi, Plants, and Animals. This database is by far the most exhaustive resource to access and analyze SSR data of multiple species. In addition to exploring data in a customizable tabular format, users can view and compare the data of multiple species simultaneously using our interactive plotting system. MSDB is developed using the Django framework and MySQL. It is freely available at http://tdb.ccmb.res.in/msdb.",MSDB,0.971259832,Microsatellite Database,0.94480021,MSDB,0.971259832,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/1/2017 +29106642,http://www.bio-bigdata.com/msdd,"MSDD: a manually curated database of experimentally supported associations among miRNAs, SNPs and human diseases. The MiRNA SNP Disease Database (MSDD, http://www.bio-bigdata.com/msdd/) is a manually curated database that provides comprehensive experimentally supported associations among microRNAs (miRNAs), single nucleotide polymorphisms (SNPs) and human diseases. SNPs in miRNA-related functional regions such as mature miRNAs, promoter regions, pri-miRNAs, pre-miRNAs and target gene 3'-UTRs, collectively called 'miRSNPs', represent a novel category of functional molecules. miRSNPs can lead to miRNA and its target gene dysregulation, and resulting in susceptibility to or onset of human diseases. A curated collection and summary of miRSNP-associated diseases is essential for a thorough understanding of the mechanisms and functions of miRSNPs. Here, we describe MSDD, which currently documents 525 associations among 182 human miRNAs, 197 SNPs, 153 genes and 164 human diseases through a review of more than 2000 published papers. Each association incorporates information on the miRNAs, SNPs, miRNA target genes and disease names, SNP locations and alleles, the miRNA dysfunctional pattern, experimental techniques, a brief functional description, the original reference and additional annotation. MSDD provides a user-friendly interface to conveniently browse, retrieve, download and submit novel data. MSDD will significantly improve our understanding of miRNA dysfunction in disease, and thus, MSDD has the potential to serve as a timely and valuable resource.",MSDD,0.997410993,MiRNA SNP Disease Database,0.985756731,MSDD,0.997410993,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +"25542617, 26919060",http://mseqdr.org,"Mitochondrial Disease Sequence Data Resource (MSeqDR): a global grass-roots consortium to facilitate deposition, curation, annotation, and integrated analysis of genomic data for the mitochondrial disease clinical and research communities. Success rates for genomic analyses of highly heterogeneous disorders can be greatly improved if a large cohort of patient data is assembled to enhance collective capabilities for accurate sequence variant annotation, analysis, and interpretation. Indeed, molecular diagnostics requires the establishment of robust data resources to enable data sharing that informs accurate understanding of genes, variants, and phenotypes. The ""Mitochondrial Disease Sequence Data Resource (MSeqDR) Consortium"" is a grass-roots effort facilitated by the United Mitochondrial Disease Foundation to identify and prioritize specific genomic data analysis needs of the global mitochondrial disease clinical and research community. A central Web portal (https://mseqdr.org) facilitates the coherent compilation, organization, annotation, and analysis of sequence data from both nuclear and mitochondrial genomes of individuals and families with suspected mitochondrial disease. This Web portal provides users with a flexible and expandable suite of resources to enable variant-, gene-, and exome-level sequence analysis in a secure, Web-based, and user-friendly fashion. Users can also elect to share data with other MSeqDR Consortium members, or even the general public, either by custom annotation tracks or through the use of a convenient distributed annotation system (DAS) mechanism. A range of data visualization and analysis tools are provided to facilitate user interrogation and understanding of genomic, and ultimately phenotypic, data of relevance to mitochondrial biology and disease. Currently available tools for nuclear and mitochondrial gene analyses include an MSeqDR GBrowse instance that hosts optimized mitochondrial disease and mitochondrial DNA (mtDNA) specific annotation tracks, as well as an MSeqDR locus-specific database (LSDB) that curates variant data on more than 1300 genes that have been implicated in mitochondrial disease and/or encode mitochondria-localized proteins. MSeqDR is integrated with a diverse array of mtDNA data analysis tools that are both freestanding and incorporated into an online exome-level dataset curation and analysis resource (GEM.app) that is being optimized to support needs of the MSeqDR community. In addition, MSeqDR supports mitochondrial disease phenotyping and ontology tools, and provides variant pathogenicity assessment features that enable community review, feedback, and integration with the public ClinVar variant annotation resource. A centralized Web-based informed consent process is being developed, with implementation of a Global Unique Identifier (GUID) system to integrate data deposited on a given individual from different sources. Community-based data deposition into MSeqDR has already begun. Future efforts will enhance capabilities to incorporate phenotypic data that enhance genomic data analyses. MSeqDR will fill the existing void in bioinformatics tools and centralized knowledge that are necessary to enable efficient nuclear and mtDNA genomic data interpretation by a range of shareholders across both clinical diagnostic and research settings. Ultimately, MSeqDR is focused on empowering the global mitochondrial disease community to better define and explore mitochondrial diseases.",MSeqDR,0.998155057,Mitochondrial Disease Sequence Data Resource,0.910063028,MSeqDR,0.998155057,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/21/2016 +26486520,http://MSGene.bioinfo-minzhao.org,"An evidence-based knowledgebase of metastasis suppressors to identify key pathways relevant to cancer metastasis. Metastasis suppressor genes (MS genes) are genes that play important roles in inhibiting the process of cancer metastasis without preventing growth of the primary tumor. Identification of these genes and understanding their functions are critical for investigation of cancer metastasis. Recent studies on cancer metastasis have identified many new susceptibility MS genes. However, the comprehensive illustration of diverse cellular processes regulated by metastasis suppressors during the metastasis cascade is lacking. Thus, the relationship between MS genes and cancer risk is still unclear. To unveil the cellular complexity of MS genes, we have constructed MSGene (http://MSGene.bioinfo-minzhao.org/), the first literature-based gene resource for exploring human MS genes. In total, we manually curated 194 experimentally verified MS genes and mapped to 1448 homologous genes from 17 model species. Follow-up functional analyses associated 194 human MS genes with epithelium/tissue morphogenesis and epithelia cell proliferation. In addition, pathway analysis highlights the prominent role of MS genes in activation of platelets and coagulation system in tumor metastatic cascade. Moreover, global mutation pattern of MS genes across multiple cancers may reveal common cancer metastasis mechanisms. All these results illustrate the importance of MSGene to our understanding on cell development and cancer metastasis.",MSGene,0.992402017,NA,0,MSGene,0.992402017,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/21/2015 +21546393,http://www.broadinstitute.org/msigdb,"Molecular signatures database (MSigDB) 3.0. Motivation Well-annotated gene sets representing the universe of the biological processes are critical for meaningful and insightful interpretation of large-scale genomic data. The Molecular Signatures Database (MSigDB) is one of the most widely used repositories of such sets. Results We report the availability of a new version of the database, MSigDB 3.0, with over 6700 gene sets, a complete revision of the collection of canonical pathways and experimental signatures from publications, enhanced annotations and upgrades to the web site. Availability and implementation MSigDB is freely available for non-commercial use at http://www.broadinstitute.org/msigdb.",MSigDB,0.988693297,Molecular signatures database,0.879078257,MSigDB,0.988693297,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/5/2011 +29145625,http://tardis.cgu.edu.tw/msignaturedb,"mSignatureDB: a database for deciphering mutational signatures in human cancers. Cancer is a genetic disease caused by somatic mutations; however, the understanding of the causative biological processes generating these mutations is limited. A cancer genome bears the cumulative effects of mutational processes during tumor development. Deciphering mutational signatures in cancer is a new topic in cancer research. The Wellcome Trust Sanger Institute (WTSI) has categorized 30 reference signatures in the COSMIC database based on the analyses of ∼10 000 sequencing datasets from TCGA and ICGC. Large cohorts and bioinformatics skills are required to perform the same analysis as WTSI. The quantification of known signatures in custom cohorts is not possible under the current framework of the COSMIC database, which motivates us to construct a database for mutational signatures in cancers and make such analyses more accessible to general researchers. mSignatureDB (http://tardis.cgu.edu.tw/msignaturedb) integrates R packages and in-house scripts to determine the contributions of the published signatures in 15 780 individual tumors from 73 TCGA/ICGC cancer projects, making comparison of signature patterns within and between projects become possible. mSignatureDB also allows users to perform signature analysis on their own datasets, quantifying contributions of signatures at sample resolution, which is a unique feature of mSignatureDB not available in other related databases.",mSignatureDB,0.991612613,NA,0,mSignatureDB,0.991612613,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +32777102,http://mskkp.org,"The Musculoskeletal Knowledge Portal: Making Omics Data Useful to the Broader Scientific Community. The development of high-throughput genotyping technologies and large biobank collections, complemented with rapid methodological advances in statistical genetics, has enabled hypothesis-free genome-wide association studies (GWAS), which have identified hundreds of genetic variants across many loci associated with musculoskeletal conditions. Similarly, basic scientists have valuable molecular cellular and animal data based on musculoskeletal disease that would be enhanced by being able to determine the human translation of their findings. By integrating these large-scale human genomic musculoskeletal datasets with complementary evidence from model organisms, new and existing genetic loci can be statistically fine-mapped to plausibly causal variants, candidate genes, and biological pathways. Genes and pathways identified using this approach can be further prioritized as drug targets, including side-effect profiling and the potential for new indications. To bring together these big data, and to realize the vision of creating a knowledge portal, the International Federation of Musculoskeletal Research Societies (IFMRS) established a working group to collaborate with scientists from the Broad Institute to create the Musculoskeletal Knowledge Portal (MSK-KP)(http://mskkp.org/). The MSK consolidates omics datasets from humans, cellular experiments, and model organisms into a central repository that can be accessed by researchers. The vision of the MSK-KP is to enable better understanding of the biological mechanisms underlying musculoskeletal disease and apply this knowledge to identify and develop new disease interventions. © 2020 American Society for Bone and Mineral Research (ASBMR).",MSK-KP,0.953117967,Musculoskeletal Knowledge Portal,0.928650388,MSK-KP,0.953117967,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2020 +22491796,http://decrypthon.igbmc.fr/msv3d,"MSV3d: database of human MisSense Variants mapped to 3D protein structure. The elucidation of the complex relationships linking genotypic and phenotypic variations to protein structure is a major challenge in the post-genomic era. We present MSV3d (Database of human MisSense Variants mapped to 3D protein structure), a new database that contains detailed annotation of missense variants of all human proteins (20 199 proteins). The multi-level characterization includes details of the physico-chemical changes induced by amino acid modification, as well as information related to the conservation of the mutated residue and its position relative to functional features in the available or predicted 3D model. Major releases of the database are automatically generated and updated regularly in line with the dbSNP (database of Single Nucleotide Polymorphism) and SwissVar releases, by exploiting the extensive Décrypthon computational grid resources. The database (http://decrypthon.igbmc.fr/msv3d) is easily accessible through a simple web interface coupled to a powerful query engine and a standard web service. The content is completely or partially downloadable in XML or flat file formats. Database URL: http://decrypthon.igbmc.fr/msv3d.",MSV3d,0.996012549,Database of human MisSense Variants,0.791921243,MSV3d,0.996012549,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/3/2012 +"25332399, 26302176",http://tumor.informatics.jax.org,"Mouse Tumor Biology (MTB): a database of mouse models for human cancer. The Mouse Tumor Biology (MTB; http://tumor.informatics.jax.org) database is a unique online compendium of mouse models for human cancer. MTB provides online access to expertly curated information on diverse mouse models for human cancer and interfaces for searching and visualizing data associated with these models. The information in MTB is designed to facilitate the selection of strains for cancer research and is a platform for mining data on tumor development and patterns of metastases. MTB curators acquire data through manual curation of peer-reviewed scientific literature and from direct submissions by researchers. Data in MTB are also obtained from other bioinformatics resources including PathBase, the Gene Expression Omnibus and ArrayExpress. Recent enhancements to MTB improve the association between mouse models and human genes commonly mutated in a variety of cancers as identified in large-scale cancer genomics studies, provide new interfaces for exploring regions of the mouse genome associated with cancer phenotypes and incorporate data and information related to Patient-Derived Xenograft models of human cancers.",MTB,0.995452483,Mouse Tumor Biology Database,0.991739839,MTB,0.995452483,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/21/2015 +21584191,http://www.bicjbtdrc-mgims.in/MTB-PCDB,"MTB-PCDB: Mycobacterium tuberculosis proteome comparison database. Unlabelled The Mycobacterium tuberculosis Proteome Comparison Database (MTB-PCDB) is an online database providing integrated access to proteome sequence comparison data for five strains of Mycobacterium tuberculosis (H37Rv, H37Ra, CDC 1551, F11 and KZN 1435) sequenced completely so far. MTB-PCDB currently hosts 40252 protein sequence comparison data obtained through inter-strain proteome comparison of five different strains of MTB. 2373 proteins were found to be identical in all 5 strains using MTB H(37)Rv as reference strain. To enable wide use of this data, MTB-PCDB provides a set of tools for searching, browsing, analyzing and downloading the data. By bringing together, M. tuberculosis proteome comparison among virulent & avirulent strains and also drug susceptible & drug resistance strains MTB-PCDB provides a unique discovery platform for comparative proteomics among these strains which may give insights into the discovery & development of TB drugs, vaccines and biomarkers. Availability The database is available for free at http://www.bicjbtdrc-mgims.in/MTB-PCDB/",MTB-PCDB,0.996515731,The Mycobacterium tuberculosis Proteome Comparison Database,0.917592347,MTB-PCDB,0.996515731,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/22/2011 +30738202,http://ab-openlab.csir.res.in/cgi-bin/gb2/gbrowse,"MtBrowse: An integrative genomics browser for human mitochondrial DNA. The human mitochondrion is a unique semi-autonomous organelle with a genome of its own and also requires nuclear encoded components to carry out its functions. In addition to being the powerhouse of the cell, mitochondria plays a central role in several metabolic pathways. It is therefore challenging to delineate the cause-effect relationship in context of mitochondrial dysfunction. Several studies implicate mutations in mitochondrial DNA (mtDNA) in various complex diseases. The human mitochondrial DNA (mtDNA) encodes a set of 37 genes, 13 protein coding, 22 tRNAs and two ribosomal RNAs, which are essential structural and functional components of the electron transport chain. As mentioned above, variations in these genes have been implicated in a broad spectrum of diseases and are extensively reported in literature and various databases. A large number of databases and prediction methods have been published to elucidate the role of human mitochondrial DNA in various disease phenotypes. However, there is no centralized resource to visualize this genotype-phenotype data. Towards this, we have developed MtBrowse: an integrative genomics browser for human mtDNA. As of now, MtBrowse has four categories - Gene, Disease, Reported variation and Variation prediction. These categories have 105 tracks and house data on mitochondrial reference genes, around 600 variants reported in literature with respect to various disease phenotypes and predictions for potential pathogenic variations in protein-coding genes. MtBrowse also hosts genomic variation data from over 5000 individuals on 22 disease phenotypes. MtBrowse may be accessed at http://ab-openlab.csir.res.in/cgi-bin/gb2/gbrowse.",MtBrowse,0.99495405,NA,0,MtBrowse,0.99495405,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/7/2019 +21880546,http://bmi.icmr.org.in/mtbsd/MtbSD.php,"MtbSD--a comprehensive structural database for Mycobacterium tuberculosis. The Mycobacterium tuberculosis Structural Database (MtbSD) (http://bmi.icmr.org.in/mtbsd/MtbSD.php) is a relational database for the study of protein structures of M. tuberculosis. It currently holds information on description, reaction catalyzed and domains involved, active sites, structural homologues and similarities between bound and cognate ligands, for all the 857 protein structures that are available for M. tb proteins. The database will be a valuable resource for TB researchers to select the appropriate protein-ligand complex of a given protein for molecular modelling, docking, virtual screening and structure-based drug designing.",MtbSD,0.997164249,Mycobacterium tuberculosis Structural Database,0.987535059,MtbSD,0.997164249,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/30/2011 +22209237,http://ccbb.jnu.ac.in/Tb,"MTCID: a database of genetic polymorphisms in clinical isolates of Mycobacterium tuberculosis. Tuberculosis (TB) is a major cause of morbidity and mortality throughout the world, particularly in developing countries. The response of the patients and treatment outcome depends, in addition to diagnosis, appropriate and timely treatment and host factors, on the virulence of Mycobacterium tuberculosis and genetic polymorphism prevalent in clinical isolates of the bacterium. A number of studies have been carried out to characterize clinical isolates of M. tuberculosis obtained from TB patients. However, the data is scattered in a large number of publications. Though attempts have been made to catalog the observed variations, there is no database that has been developed for cataloging, storing and dissemination of genetic polymorphism information. MTCID (M. tuberculosis clinical isolate genetic polymorphism database) is an attempt to provide a comprehensive repository to store, access and disseminate single nucleotide polymorphism (SNPs) and spoligotyping profiles of M. tuberculosis. It can be used to automatically upload the information available with a user that adds to the existing database at the backend. Besides it may also aid in maintaining clinical profiles of TB and treatment of patients. The database has 'search' features and is available at http://ccbb.jnu.ac.in/Tb.",MTCID,0.993752122,uberculosis clinical isolate genetic polymorphism database,0.893513964,MTCID,0.993752122,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/29/2011 +26822098,http://mtd.cbi.ac.cn,"MTD: a mammalian transcriptomic database to explore gene expression and regulation. A systematic transcriptome survey is essential for the characterization and comprehension of the molecular basis underlying phenotypic variations. Recently developed RNA-seq methodology has facilitated efficient data acquisition and information mining of transcriptomes in multiple tissues/cell lines. Current mammalian transcriptomic databases are either tissue-specific or species-specific, and they lack in-depth comparative features across tissues and species. Here, we present a mammalian transcriptomic database (MTD) that is focused on mammalian transcriptomes, and the current version contains data from humans, mice, rats and pigs. Regarding the core features, the MTD browses genes based on their neighboring genomic coordinates or joint KEGG pathway and provides expression information on exons, transcripts and genes by integrating them into a genome browser. We developed a novel nomenclature for each transcript that considers its genomic position and transcriptional features. The MTD allows a flexible search of genes or isoforms with user-defined transcriptional characteristics and provides both table-based descriptions and associated visualizations. To elucidate the dynamics of gene expression regulation, the MTD also enables comparative transcriptomic analysis in both intraspecies and interspecies manner. The MTD thus constitutes a valuable resource for transcriptomic and evolutionary studies. The MTD is freely accessible at http://mtd.cbi.ac.cn.",MTD,0.986400823,mammalian transcriptomic database,0.553143755,MTD,0.986400823,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/27/2016 +22309450,http://bioinformatics.cau.edu.cn/MtTransporter,"Medicago truncatula transporter database: a comprehensive database resource for M. truncatula transporters. Background Medicago truncatula has been chosen as a model species for genomic studies. It is closely related to an important legume, alfalfa. Transporters are a large group of membrane-spanning proteins. They deliver essential nutrients, eject waste products, and assist the cell in sensing environmental conditions by forming a complex system of pumps and channels. Although studies have effectively characterized individual M. truncatula transporters in several databases, until now there has been no available systematic database that includes all transporters in M. truncatula. Description The M. truncatula transporter database (MTDB) contains comprehensive information on the transporters in M. truncatula. Based on the TransportTP method, we have presented a novel prediction pipeline. A total of 3,665 putative transporters have been annotated based on International Medicago Genome Annotated Group (IMGAG) V3.5 V3 and the M. truncatula Gene Index (MTGI) V10.0 releases and assigned to 162 families according to the transporter classification system. These families were further classified into seven types according to their transport mode and energy coupling mechanism. Extensive annotations referring to each protein were generated, including basic protein function, expressed sequence tag (EST) mapping, genome locus, three-dimensional template prediction, transmembrane segment, and domain annotation. A chromosome distribution map and text-based Basic Local Alignment Search Tools were also created. In addition, we have provided a way to explore the expression of putative M. truncatula transporter genes under stress treatments. Conclusions In summary, the MTDB enables the exploration and comparative analysis of putative transporters in M. truncatula. A user-friendly web interface and regular updates make MTDB valuable to researchers in related fields. The MTDB is freely available now to all users at http://bioinformatics.cau.edu.cn/MtTransporter/.",MTDB,0.993324598,truncatula transporter database,0.971297204,MTDB,0.993324598,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/6/2012 +34245304,http://medicago.toulouse.inrae.fr/MtExpress,"MtExpress, a Comprehensive and Curated RNAseq-based Gene Expression Atlas for the Model Legume Medicago truncatula. Although RNA sequencing (RNAseq) has been becoming the main transcriptomic approach in the model legume Medicago truncatula, there is currently no genome-wide gene expression atlas covering the whole set of RNAseq data published for this species. Nowadays, such a tool is highly valuable to provide a global view of gene expression in a wide range of conditions and tissues/organs. Here, we present MtExpress, a gene expression atlas that compiles an exhaustive set of published M. truncatula RNAseq data (https://medicago.toulouse.inrae.fr/MtExpress). MtExpress makes use of recent releases of M. truncatula genome sequence and annotation, as well as up-to-date tools to perform mapping, quality control, statistical analysis and normalization of RNAseq data. MtExpress combines semi-automated pipelines with manual re-labeling and organization of samples to produce an attractive and user-friendly interface, fully integrated with other available Medicago genomic resources. Importantly, MtExpress is highly flexible, in terms of both queries, e.g. allowing searches with gene names and orthologous gene IDs from Arabidopsis and other legume species, and outputs, to customize visualization and redirect gene study to relevant Medicago webservers. Thanks to its semi-automated pipeline, MtExpress will be frequently updated to follow the rapid pace of M. truncatula RNAseq data publications, as well as the constant improvement of genome annotation. MtExpress also hosts legacy GeneChip expression data originally stored in the Medicago Gene Expression Atlas, as a very valuable and complementary resource.",MtExpress,0.993226945,NA,0,MtExpress,0.993226945,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2021 +26490638,http://mtibase.sysu.edu.cn,"MtiBase: a database for decoding microRNA target sites located within CDS and 5'UTR regions from CLIP-Seq and expression profile datasets. . MicroRNAs (miRNAs) play an important role in the regulation of gene expression. Previous studies on miRNA functions mainly focused on their target sites in the 3' untranslated regions (UTRs) of mRNAs. However, increasing evidence has revealed that miRNAs can also induce mRNA degradation and mediate translational repression via complementary interactions with the coding sequence (CDS) and 5'UTR of mRNAs. In this study, we developed a novel database, MtiBase, to facilitate the comprehensive exploration of CDS- and 5'UTR-located miRNA target sites identified from cross-linking immunoprecipitation sequencing (CLIP-Seq) datasets and to uncover their regulatory effects on mRNA stability and translation from expression profile datasets. By integrating 61 Argonaute protein-binding CLIP-Seq datasets and miRNA target sites predicted by five commonly used programs, we identified approximately 4 400 000 CDS-located and 470 000 5'UTR-located miRNA target sites. Moreover, we evaluated the regulatory effects of miRNAs on mRNA stability and translation using the data from 222 gene expression profiles, and 28 ribosome-protected fragment sequencing, and six pulsed stable isotope labeling with amino acids in culture. Finally, the effects of SNPs on the functions of miRNA target sites were systematically evaluated. Our study provides a useful tool for functional studies of miRNAs in regulating physiology and pathology. Database URL: http://mtibase.sysu.edu.cn.",MtiBase,0.993986785,NA,0,MtiBase,0.993986785,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2015 +30813887,http://bioinfodbs.kantiana.ru/mtProtEvol,"mtProtEvol: the resource presenting molecular evolution analysis of proteins involved in the function of Vertebrate mitochondria. BACKGROUND:Heterotachy is the variation in the evolutionary rate of aligned sites in different parts of the phylogenetic tree. It occurs mainly due to epistatic interactions among the substitutions, which are highly complex and make it difficult to study protein evolution. The vast majority of computational evolutionary approaches for studying these epistatic interactions or their evolutionary consequences in proteins require high computational time. However, recently, it has been shown that the evolution of residue solvent accessibility (RSA) is tightly linked with changes in protein fitness and intra-protein epistatic interactions. This provides a computationally fast alternative, based on comparison of evolutionary rates of amino acid replacements with the rates of RSA evolutionary changes in order to recognize any shifts in epistatic interaction. RESULTS:Based on RSA information, data randomization and phylogenetic approaches, we constructed a software pipeline, which can be used to analyze the evolutionary consequences of intra-protein epistatic interactions with relatively low computational time. We analyzed the evolution of 512 protein families tightly linked to mitochondrial function in Vertebrates and created ""mtProtEvol"", the web resource with data on protein evolution. In strict agreement with lifespan and metabolic rate data, we demonstrated that different functional categories of mitochondria-related proteins subjected to selection on accelerated and decelerated RSA rates in rodents and primates. For example, accelerated RSA evolution in rodents has been shown for Krebs cycle enzymes, respiratory chain and reactive oxygen species metabolism, while in primates these functions are stress-response, translation and mtDNA integrity. Decelerated RSA evolution in rodents has been demonstrated for translational machinery and oxidative stress response components. CONCLUSIONS:mtProtEvol is an interactive resource focused on evolutionary analysis of epistatic interactions in protein families involved in Vertebrata mitochondria function and available at http://bioinfodbs.kantiana.ru/mtProtEvol /. This resource and the devised software pipeline may be useful tool for researchers in area of protein evolution.",mtProtEvol,0.955864847,NA,0,mtProtEvol,0.955864847,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/26/2019 +32079733,http://mtsspdb.noble.org,"MtSSPdb: The Medicago truncatula Small Secreted Peptide Database. A growing number of small secreted peptides (SSPs) in plants are recognized as important regulatory molecules with roles in processes such as growth, development, reproduction, stress tolerance, and pathogen defense. Recent discoveries further implicate SSPs in regulating root nodule development, which is of particular significance for legumes. SSP-coding genes are frequently overlooked, because genome annotation pipelines generally ignore small open reading frames, which are those most likely to encode SSPs. Also, SSP-coding small open reading frames are often expressed at low levels or only under specific conditions, and thus are underrepresented in non-tissue-targeted or non-condition-optimized RNA-sequencing projects. We previously identified 4,439 SSP-encoding genes in the model legume Medicago truncatula To support systematic characterization and annotation of these putative SSP-encoding genes, we developed the M. truncatula Small Secreted Peptide Database (MtSSPdb; https://mtsspdb.noble.org/). MtSSPdb currently hosts (1) a compendium of M. truncatula SSP candidates with putative function and family annotations; (2) a large-scale M. truncatula RNA-sequencing-based gene expression atlas integrated with various analytical tools, including differential expression, coexpression, and pathway enrichment analyses; (3) an online plant SSP prediction tool capable of analyzing protein sequences at the genome scale using the same protocol as for the identification of SSP genes; and (4) information about a library of synthetic peptides and root and nodule phenotyping data from synthetic peptide screens in planta. These datasets and analytical tools make MtSSPdb a unique and valuable resource for the plant research community. MtSSPdb also has the potential to become the most complete database of SSPs in plants.",MtSSPdb,0.998309374,truncatula Small Secreted Peptide Database,0.937849825,MtSSPdb,0.998309374,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/20/2020 +29869221,http://hdl.handle.net/2374.MIA/6067,"Miami University deception detection database. In the present work, we introduce the Miami University Deception Detection Database (MU3D), a free resource containing 320 videos of target individuals telling truths and lies. Eighty (20 Black female, 20 Black male, 20 White female, and 20 White male) different targets were recorded speaking honestly and dishonestly about their social relationships. Each target generated four different videos (i.e., positive truth, negative truth, positive lie, negative lie), yielding 320 videos fully crossing target race, target gender, statement valence, and statement veracity. These videos were transcribed by trained research assistants and evaluated by naïve raters. Descriptive analyses of the video characteristics (e.g., length) and subjective ratings (e.g., target attractiveness) are provided. The stimuli and an information codebook can be accessed free of charge for academic research purposes from http://hdl.handle.net/2374.MIA/6067 . The MU3D offers scholars the ability to conduct research using standardized stimuli that can aid in building more comprehensive theories of interpersonal sensitivity, enhance replication among labs, facilitate the use of signal detection analyses, and promote consideration of race, gender, and their interactive effects in deception detection research.",MU3D,0.996364331,Miami University Deception Detection Database,0.954041362,MU3D,0.996364331,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2019 +24465676,http://reprod.njmu.edu.cn/mUbiSiDa,"mUbiSiDa: a comprehensive database for protein ubiquitination sites in mammals. Motivation Protein ubiquitination is one of the important post-translational modifications by attaching ubiquitin to specific lysine (K) residues in target proteins, and plays important regulatory roles in many cell processes. Recent studies indicated that abnormal protein ubiquitination have been implicated in many diseases by degradation of many key regulatory proteins including tumor suppressor, oncoprotein, and cell cycle regulator. The detailed information of protein ubiquitination sites is useful for scientists to investigate the mechanism of many cell activities and related diseases. Results In this study we established mUbiSida for mammalian Ubiquitination Site Database, which provides a scientific community with a comprehensive, freely and high-quality accessible resource of mammalian protein ubiquitination sites. In mUbiSida, we deposited about 35,494 experimentally validated ubiquitinated proteins with 110,976 ubiquitination sites from five species. The mUbiSiDa can also provide blast function to predict novel protein ubiquitination sites in other species by blast the query sequence in the deposit sequences in mUbiSiDa. The mUbiSiDa was designed to be a widely used tool for biologists and biomedical researchers with a user-friendly interface, and facilitate the further research of protein ubiquitination, biological networks and functional proteomics. The mUbiSiDa database is freely available at http://reprod.njmu.edu.cn/mUbiSiDa.",mUbiSiDa,0.997054636,mammalian Ubiquitination Site Database,0.863494439,mUbiSiDa,0.997054636,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/17/2014 +25559128,http://mufold.org/mufolddb.php,"MUFOLD-DB: a processed protein structure database for protein structure prediction and analysis. Background Protein structure data in Protein Data Bank (PDB) are widely used in studies of protein function and evolution and in protein structure prediction. However, there are two main barriers in large-scale usage of PDB data: 1) PDB data are highly redundant in terms of sequence and structure similarity; and 2) many PDB files have issues due to inconsistency of data and standards as well as missing residues, so that automated retrieval and analysis are often difficult. Description To address these issues, we have created MUFOLD-DB http://mufold.org/mufolddb.php, a web-based database, to collect and process the weekly PDB files thereby providing users with non-redundant, cleaned and partially-predicted structure data. For each of the non-redundant sequences, we annotate the SCOP domain classification and predict structures of missing regions by loop modelling. In addition, evolutional information, secondary structure, disorder region, and processed three-dimensional structure are computed and visualized to help users better understand the protein. Conclusions MUFOLD-DB integrates processed PDB sequence and structure data and multiple computational results, provides a friendly interface for users to retrieve, browse and download these data, and offers several useful functionalities to facilitate users' data operation.",MUFOLD-DB,0.985240579,NA,0,MUFOLD-DB,0.985240579,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/16/2014 +24253302,http://wallace.uab.es/multitask,"MultitaskProtDB: a database of multitasking proteins. We have compiled MultitaskProtDB, available online at http://wallace.uab.es/multitask, to provide a repository where the many multitasking proteins found in the literature can be stored. Multitasking or moonlighting is the capability of some proteins to execute two or more biological functions. Usually, multitasking proteins are experimentally revealed by serendipity. This ability of proteins to perform multitasking functions helps us to understand one of the ways used by cells to perform many complex functions with a limited number of genes. Even so, the study of this phenomenon is complex because, among other things, there is no database of moonlighting proteins. The existence of such a tool facilitates the collection and dissemination of these important data. This work reports the database, MultitaskProtDB, which is designed as a friendly user web page containing >288 multitasking proteins with their NCBI and UniProt accession numbers, canonical and additional biological functions, monomeric/oligomeric states, PDB codes when available and bibliographic references. This database also serves to gain insight into some characteristics of multitasking proteins such as frequencies of the different pairs of functions, phylogenetic conservation and so forth.",MultitaskProtDB,0.998039424,NA,0,MultitaskProtDB,0.998039424,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2013 +29136215,http://wallace.uab.es/multitaskII,"MultitaskProtDB-II: an update of a database of multitasking/moonlighting proteins. Multitasking, or moonlighting, is the capability of some proteins to execute two or more biological functions. MultitaskProtDB-II is a database of multifunctional proteins that has been updated. In the previous version, the information contained was: NCBI and UniProt accession numbers, canonical and additional biological functions, organism, monomeric/oligomeric states, PDB codes and bibliographic references. In the present update, the number of entries has been increased from 288 to 694 moonlighting proteins. MultitaskProtDB-II is continually being curated and updated. The new database also contains the following information: GO descriptors for the canonical and moonlighting functions, three-dimensional structure (for those proteins lacking PDB structure, a model was made using Itasser and Phyre), the involvement of the proteins in human diseases (78% of human moonlighting proteins) and whether the protein is a target of a current drug (48% of human moonlighting proteins). These numbers highlight the importance of these proteins for the analysis and explanation of human diseases and target-directed drug design. Moreover, 25% of the proteins of the database are involved in virulence of pathogenic microorganisms, largely in the mechanism of adhesion to the host. This highlights their importance for the mechanism of microorganism infection and vaccine design. MultitaskProtDB-II is available at http://wallace.uab.es/multitaskII.",MultitaskProtDB-I,0.860877524,NA,0,MultitaskProtDB-I,0.860877524,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2018 +31652812,http://leb.snu.ac.kr/mmdb,"Introducing Murine Microbiome Database (MMDB): A Curated Database with Taxonomic Profiling of the Healthy Mouse Gastrointestinal Microbiome. . The gut microbiota modulates overall metabolism, the immune system and brain development of the host. The majority of mammalian gut microbiota consists of bacteria. Among various model animals, the mouse has been most widely used in pre-clinical biological experiments. The significant compositional differences in taxonomic profiles among different mouse strains due to gastrointestinal locations, genotypes and vendors have been well documented. However, details of such variations are yet to be elucidated. This study compiled and analyzed 16S rRNA gene-based taxonomic profiles of 554 healthy mouse samples from 14 different projects to construct a comprehensive database of the microbiome of a healthy mouse gastrointestinal tract. The database, named Murine Microbiome Database, should provide researchers with useful taxonomic information and better biological insight about how each taxon, such as genus and species, is associated with locations in the gastrointestinal tract, genotypes and vendors. The database is freely accessible over the Internet at http://leb.snu.ac.kr/mmdb/.",MMDB,0.96417357,Murine Microbiome Database,0.973936637,Murine Microbiome Database,0.973936637,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/23/2019 +30837356,http://bioinfnrcb.byethost7.com/nrcbbio,"MusatransSSRDB (a transcriptome derived SSR database) - An advanced tool for banana improvement. . Availability of transcriptome datasets for use in accelerated molecular-based breeding in Musa species is limited. Illumina Hiseq technology was employed to determine differential gene expression between the contrasting cultivars for three different stresses (Eumusae leaf spot -Mycosphaerella eumusae, root lesion nematode - Pratylenchus coffeae and moisture deficit stress) under challenged and unchallenged conditions. An average of 34.72 million of reads was assembled into ~47629 contigs, and ~5,466 simple sequence repeats (SSR) from each library were identified. GO annotation and KEGG pathway analysis were carried for all the transcripts and the SSR, SNPs were also detected. Based on this information, a MusatransSSRDB has been developed. Currently, the database consists of 32,800 SSRs with the unique information like putative function of the SSR-containing genes and their metabolic pathway and expression profiling under various stress conditions. This database provides information on in silico polymorphic SSRs (2830 SSRs) between the contrasting cultivars for each stress and within stress. Information on in silico polymorphic SSRs specific to differentially expressed genes under challenged condition for each stress can also be accessed. This database facilitates the retrieval of results by navigating the tabs for cultivars, stress and polymorphism. This database was developed using HTML, Java and PHP; datasets are stored in MySQL database and accessible in the public domain (http://bioinfnrcb.byethost7.com/nrcbbio/). This unique information facilitates the banana breeder to select the SSR primers based on specific objectives. MusatransSSRDB along with other genomics databases will facilitate the genetic dissection and breeding for complex traits in banana. Thus, this database is a step forward in economizing cost, time, manpower and other resources. Keywords.",MusatransSSRDB,0.983545184,NA,0,MusatransSSRDB,0.983545184,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2019 +30907069,http://tubulinmutations.bio.uci.edu,"""The Missing Link"": The Tubulin Mutation Database Connects Over 1500 Missense Mutations With Phenotypes Across Eukaryotes. As outlined in their recent paper (A Tubulin Mutation Database: A Resource for the Cytoskeletal Community), Catherine Pham and Naomi Morrissette from the University of California, Irvine, scoured the literature and catalogued data for 489 point mutations for 𝛂-tubulin, 729 for β-tubulin, and 343 for 𝛄, ẟ, 𝛆, and 𝛇 tubulins to create the tubulin mutation database (http://tubulinmutations.bio.uci.edu). The database is a searchable catalog of missense mutations and phenotypes that is expected to grow with biannual updates. Data entries regarding the species and isoform, as well as links to available sequences and the original study which characterized the mutant are intuitively displayed and color coded (Pham & Morrissette, 2019). This database represents a unique opportunity for clinicians and cell biologists to rapidly connect sequence data to mutant phenotypes and gather primary literature which promises to facilitate discoveries on topics including microtubule dynamics, antimitotic drug use and resistance, and evolution. We expect that many researchers will find this tool of great use to their research. This article is protected by copyright. All rights reserved.",NA,0,Mutation,0.560572505,Mutation,0.560572505,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,2/1/2019 +26590264,http://www.mutationaligner.org,"MutationAligner: a resource of recurrent mutation hotspots in protein domains in cancer. The MutationAligner web resource, available at http://www.mutationaligner.org, enables discovery and exploration of somatic mutation hotspots identified in protein domains in currently (mid-2015) more than 5000 cancer patient samples across 22 different tumor types. Using multiple sequence alignments of protein domains in the human genome, we extend the principle of recurrence analysis by aggregating mutations in homologous positions across sets of paralogous genes. Protein domain analysis enhances the statistical power to detect cancer-relevant mutations and links mutations to the specific biological functions encoded in domains. We illustrate how the MutationAligner database and interactive web tool can be used to explore, visualize and analyze mutation hotspots in protein domains across genes and tumor types. We believe that MutationAligner will be an important resource for the cancer research community by providing detailed clues for the functional importance of particular mutations, as well as for the design of functional genomics experiments and for decision support in precision medicine. MutationAligner is slated to be periodically updated to incorporate additional analyses and new data from cancer genomics projects.",MutationAligner,0.997395098,NA,0,MutationAligner,0.997395098,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2015 +22730453,http://www.MuteinDB.org,"MuteinDB: the mutein database linking substrates, products and enzymatic reactions directly with genetic variants of enzymes. Mutational events as well as the selection of the optimal variant are essential steps in the evolution of living organisms. The same principle is used in laboratory to extend the natural biodiversity to obtain better catalysts for applications in biomanufacturing or for improved biopharmaceuticals. Furthermore, single mutation in genes of drug-metabolizing enzymes can also result in dramatic changes in pharmacokinetics. These changes are a major cause of patient-specific drug responses and are, therefore, the molecular basis for personalized medicine. MuteinDB systematically links laboratory-generated enzyme variants (muteins) and natural isoforms with their biochemical properties including kinetic data of catalyzed reactions. Detailed information about kinetic characteristics of muteins is available in a systematic way and searchable for known mutations and catalyzed reactions as well as their substrates and known products. MuteinDB is broadly applicable to any known protein and their variants and makes mutagenesis and biochemical data searchable and comparable in a simple and easy-to-use manner. For the import of new mutein data, a simple, standardized, spreadsheet-based data format has been defined. To demonstrate the broad applicability of the MuteinDB, first data sets have been incorporated for selected cytochrome P450 enzymes as well as for nitrilases and peroxidases. Database URL: http://www.MuteinDB.org.",MuteinDB,0.990509152,NA,0,MuteinDB,0.990509152,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/21/2012 +31588509,http://www.innovebioinfo.com/Databases/Mutationdb_About.php,"MutEx: a multifaceted gateway for exploring integrative pan-cancer genomic data. Somatic mutation and gene expression dysregulation are considered two major tumorigenesis factors. While independent investigations of either factor pervade, studies of associations between somatic mutations and gene expression changes have been sporadic and nonsystematic. Utilizing genomic data collected from 11 315 subjects of 33 distinct cancer types, we constructed MutEx, a pan-cancer integrative genomic database. This database records the relationships among gene expression, somatic mutation and survival data for cancer patients. MutEx can be used to swiftly explore the relationship between these genomic/clinic features within and across cancer types and, more importantly, search for corroborating evidence for hypothesis inception. Our database also incorporated Gene Ontology and several pathway databases to enhance functional annotation, and elastic net and a gene expression composite score to aid in survival analysis. To demonstrate the usability of MutEx, we provide several application examples, including top somatic mutations associated with the most extensive expression dysregulation in breast cancer, differential mutational burden downstream of DNA mismatch repair gene mutations and composite gene expression score-based survival difference in breast cancer. MutEx can be accessed at http://www.innovebioinfo.com/Databases/Mutationdb_About.php.",MutEx,0.993316352,NA,0,MutEx,0.993316352,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2020 +29401218,http://www.iitm.ac.in/bioinfo/MutHTP,"MutHTP: mutations in human transmembrane proteins. Motivation:Existing sources of experimental mutation data do not consider the structural environment of amino acid substitutions and distinguish between soluble and membrane proteins. They also suffer from a number of further limitations, including data redundancy, lack of disease classification, incompatible information content, and ambiguous annotations (e.g. the same mutation being annotated as disease and benign). Results:We have developed a novel database, MutHTP, which contains information on 183 395 disease-associated and 17 827 neutral mutations in human transmembrane proteins. For each mutation site MutHTP provides a description of its location with respect to the membrane protein topology, structural environment (if available) and functional features. Comprehensive visualization, search, display and download options are available. Availability and implementation:The database is publicly available at http://www.iitm.ac.in/bioinfo/MutHTP/. The website is implemented using HTML, PHP and javascript and supports recent versions of all major browsers, such as Firefox, Chrome and Opera. Supplementary information:Supplementary data are available at Bioinformatics online.",MutHTP,0.994622827,mutations in human transmembrane proteins,0.955130294,MutHTP,0.994622827,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2018 +27907895,http://zhaobioinfo.org/mutLBSgeneDB,"mutLBSgeneDB: mutated ligand binding site gene DataBase. Mutations at the ligand binding sites (LBSs) can influence protein structure stability, binding affinity with small molecules, and drug resistance in cancer patients. Our recent analysis revealed that ligand binding residues had a significantly higher mutation rate than other parts of the protein. Here, we built mutLBSgeneDB (mutated Ligand Binding Site gene DataBase) available at http://zhaobioinfo.org/mutLBSgeneDB We collected and curated over 2300 genes (mutLBSgenes) having ∼12 000 somatic mutations at ∼10 000 LBSs across 16 cancer types and selected 744 drug targetable genes (targetable_mutLBSgenes) by incorporating kinases, transcription factors, pharmacological genes, and cancer driver genes. We analyzed LBS mutation information, differential gene expression network, drug response correlation with gene expression, and protein stability changes for all mutLBSgenes using integrated genetic, genomic, transcriptomic, proteomic, network and functional information. We calculated and compared the binding affinities of 20 carefully selected genes with their drugs in wild type and mutant forms. mutLBSgeneDB provides a user-friendly web interface for searching and browsing through seven categories of annotations: Gene summary, Mutated information, Protein structure related information, Differential gene expression and gene-gene network, Phenotype information, Pharmacological information, and Conservation information. mutLBSgeneDB provides a useful resource for functional genomics, protein structure, drug and disease research communities.",mutLBSgeneDB,0.983370721,mutated ligand binding site gene DataBase,0.615566436,mutLBSgeneDB,0.983370721,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/7/2016 +33600011,http://brb.nci.nih.gov/splicing,"MutSpliceDB: A database of splice sites variants with RNA-seq based evidence on effects on splicing. Splice site variants may lead to transcript alterations, causing exons inclusion, exclusion, truncation, or intron retention. Interpreting the consequences of a specific splice site variant is not straightforward, especially if the variant is located outside of the canonical splice sites. We developed MutSpliceDB: https://brb.nci.nih.gov/splicing, a public resource to facilitate the interpretation of splice sites variants effects on splicing based on manually reviewed RNA-seq BAM files from samples with splice site variants.",MutSpliceDB,0.996531308,NA,0,MutSpliceDB,0.996531308,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +30053238,http://lms.snu.edu.in/mutTCPDB/index.php,"mutTCPdb: a comprehensive database for genomic variants of a tropical country neglected disease-tropical calcific pancreatitis. Tropical calcific pancreatitis (TCP) is a juvenile, non-alcoholic form of chronic pancreatitis with its exclusive presence in tropical regions associated with the low economic status. TCP initiates in the childhood itself and then proliferates silently. mutTCPdb is a manually curated and comprehensive disease specific single nucleotide variant (SNV) database. Extensive search strategies were employed to create a repository while SNV information was collected from published articles. Several existing databases such as the dbSNP, Uniprot, miRTarBase2.0, HGNC, PFAM, KEGG, PROSITE, MINT, BIOGRID 3.4 and Ensemble Genome Browser 87 were queried to collect information specific to the gene. mutTCPdb is running on the XAMPP web server with MYSQL database in the backend for data storage and management. Currently, the mutTCPdb enlists 100 variants of all 11 genes identified in TCP, out of which 45 are non-synonymous (missense, nonsense, deletions and insertions), 46 are present in non-coding regions (UTRs, promoter region and introns) and 9 are synonymous variants. The database is highly curated for disease-specific gene variants and provides complete information on function, transcript information, pathways, interactions, miRNAs and PubMed references along with remarks. It is an informative portal for clinicians and researchers for a better understanding of the disease, as it may help in identifying novel targets and diagnostic markers, hence, can be a source to improve the strategies for TCP management.Database URL: http://lms.snu.edu.in/mutTCPDB/index.php.",mutTCPdb,0.996738315,NA,0,mutTCPdb,0.996738315,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +"24311565, 29106611",http://bioinformatics.charite.de/mvoc,"mVOC: a database of microbial volatiles. Scents are well known to be emitted from flowers and animals. In nature, these volatiles are responsible for inter- and intra-organismic communication, e.g. attraction and defence. Consequently, they influence and improve the establishment of organisms and populations in ecological niches by acting as single compounds or in mixtures. Despite the known wealth of volatile organic compounds (VOCs) from species of the plant and animal kingdom, in the past, less attention has been focused on volatiles of microorganisms. Although fast and affordable sequencing methods facilitate the detection of microbial diseases, however, the analysis of signature or fingerprint volatiles will be faster and easier. Microbial VOCs (mVOCs) are presently used as marker to detect human diseases, food spoilage or moulds in houses. Furthermore, mVOCs exhibited antagonistic potential against pathogens in vitro, but their biological roles in the ecosystems remain to be investigated. Information on volatile emission from bacteria and fungi is presently scattered in the literature, and no public and up-to-date collection on mVOCs is available. To address this need, we have developed mVOC, a database available online at http://bioinformatics.charite.de/mvoc.",mVOC,0.945702612,NA,0,mVOC,0.945702612,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +29177508,http://mvp.medgenius.info,"MVP: a microbe-phage interaction database. Phages invade microbes, accomplish host lysis and are of vital importance in shaping the community structure of environmental microbiota. More importantly, most phages have very specific hosts; they are thus ideal tools to manipulate environmental microbiota at species-resolution. The main purpose of MVP (Microbe Versus Phage) is to provide a comprehensive catalog of phage-microbe interactions and assist users to select phage(s) that can target (and potentially to manipulate) specific microbes of interest. We first collected 50 782 viral sequences from various sources and clustered them into 33 097 unique viral clusters based on sequence similarity. We then identified 26 572 interactions between 18 608 viral clusters and 9245 prokaryotes (i.e. bacteria and archaea); we established these interactions based on 30 321 evidence entries that we collected from published datasets, public databases and re-analysis of genomic and metagenomic sequences. Based on these interactions, we calculated the host range for each of the phage clusters and accordingly grouped them into subgroups such as 'species-', 'genus-' and 'family-' specific phage clusters. MVP is equipped with a modern, responsive and intuitive interface, and is freely available at: http://mvp.medgenius.info.",MVP,0.808669806,Microbe Versus,0.708312586,MVP,0.808669806,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +26166372,http://bioinf.xmu.edu.cn/MVsCarta,"MVsCarta: A protein database of matrix vesicles to aid understanding of biomineralization. Matrix vesicles (MVs) are membranous nanovesicles released by chondrocytes, osteoblasts, and odontoblasts. They play a critical role in modulating mineralization. Here, we present a manually curated database of MV proteins, namely MVsCara to provide comprehensive information on MVs of protein components. In the current version, the database contains 2,713 proteins of six organisms identified in bone, cartilage, tooth tissues, and cells capable of producing a mineralized bone matrix. The MVsCarta database is now freely assessed at http://bioinf.xmu.edu.cn/MVsCarta. The search and browse methods were developed for better retrieval of data. In addition, bioinformatic tools like Gene Ontology (GO) analysis, network visualization and protein-protein interaction analysis were implemented for a functional understanding of MVs components. Similar database hasn't been reported yet. We believe that this free web-based database might serve as a useful repository to elucidate the novel function and regulation of MVs during mineralization, and to stimulate the advancement of MV studies.",MVsCarta,0.977910161,NA,0,MVsCarta,0.977910161,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/1/2015 +28454513,http://bicresources.jcbose.ac.in/ssaha4/mycbase,"MYCbase: a database of functional sites and biochemical properties of Myc in both normal and cancer cells. Background Myc is an essential gene having multiple functions such as in cell growth, differentiation, apoptosis, genomic stability, angiogenesis, and disease biology. A large number of researchers dedicated to Myc biology are generating a substantial amount of data in normal and cancer cells/tissues including Burkitt's lymphoma and ovarian cancer. Results MYCbase ( http://bicresources.jcbose.ac.in/ssaha4/mycbase ) is a collection of experimentally supported functional sites in Myc that can influence the biological cellular processes. The functional sites were compiled according to their role which includes mutation, methylation pattern, post-translational modifications, protein-protein interactions (PPIs), and DNA interactions. In addition, biochemical properties of Myc are also compiled, which includes metabolism/pathway, protein abundance, and modulators of protein-protein interactions. The OMICS data related to Myc- like gene expression, proteomics expression using mass-spectrometry and miRNAs targeting Myc were also compiled in MYCbase. The mutation and pathway data from the MYCbase were analyzed to look at the patterns and distributions across different diseases. There were few proteins/genes found common in Myc-protein interactions and Myc-DNA binding, and these can play a significant role in transcriptional feedback loops. Conclusion In this report, we present a comprehensive integration of relevant information regarding Myc in the form of MYCbase. The data compiled in MYCbase provides a reliable data resource for functional sites at the residue level and biochemical properties of Myc in various cancers.",MYCbase,0.986929297,NA,0,MYCbase,0.986929297,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/28/2017 +24592289,http://mycobacteriarv.igib.res.in,"Integrative immunoinformatics for Mycobacterial diseases in R platform. The sequencing of genomes of the pathogenic Mycobacterial species causing pulmonary and extrapulmonary tuberculosis, leprosy and other atypical mycobacterial infections, offer immense opportunities for discovering new therapeutics and identifying new vaccine candidates. Enhanced RV, which uses additional algorithms to Reverse Vaccinology (RV), has increased potential to reduce likelihood of undesirable features including allergenicity and immune cross reactivity to host. The starting point for MycobacRV database construction includes collection of known vaccine candidates and a set of predicted vaccine candidates identified from the whole genome sequences of 22 mycobacterium species and strains pathogenic to human and one non-pathogenic Mycobacterium tuberculosis H37Ra strain. These predicted vaccine candidates are the adhesins and adhesin-like proteins obtained using SPAAN at Pad > 0.6 and screening for putative extracellular or surface localization characteristics using PSORTb v.3.0 at very stringent cutoff. Subsequently, these protein sequences were analyzed through 21 publicly available algorithms to obtain Orthologs, Paralogs, BetaWrap Motifs, Transmembrane Domains, Signal Peptides, Conserved Domains, and similarity to human proteins, T cell epitopes, B cell epitopes, Discotopes and potential Allergens predictions. The Enhanced RV information was analysed in R platform through scripts following well structured decision trees to derive a set of nonredundant 233 most probable vaccine candidates. Additionally, the degree of conservation of potential epitopes across all orthologs has been obtained with reference to the M. tuberculosis H37Rv strain, the most commonly used strain in M. tuberculosis studies. Utilities for the vaccine candidate search and analysis of epitope conservation across the orthologs with reference to M. tuberculosis H37Rv strain are available in the mycobacrvR package in R platform accessible from the ""Download"" tab of MycobacRV webserver. MycobacRV an immunoinformatics database of known and predicted mycobacterial vaccine candidates has been developed and is freely available at http://mycobacteriarv.igib.res.in.",MycobacRV,0.986873567,NA,0,MycobacRV,0.986873567,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/15/2014 +33653882,http://msrdb.org,"A Mycobacterial Systems Resource for the Research Community. . Functional characterization of bacterial proteins lags far behind the identification of new protein families. This is especially true for bacterial species that are more difficult to grow and genetically manipulate than model systems such as Escherichia coli and Bacillus subtilis To facilitate functional characterization of mycobacterial proteins, we have established a Mycobacterial Systems Resource (MSR) using the model organism Mycobacterium smegmatis This resource focuses specifically on 1,153 highly conserved core genes that are common to many mycobacterial species, including Mycobacterium tuberculosis, in order to provide the most relevant information and resources for the mycobacterial research community. The MSR includes both biological and bioinformatic resources. The biological resource includes (i) an expression plasmid library of 1,116 genes fused to a fluorescent protein for determining protein localization; (ii) a library of 569 precise deletions of nonessential genes; and (iii) a set of 843 CRISPR-interference (CRISPRi) plasmids specifically targeted to silence expression of essential core genes and genes for which a precise deletion was not obtained. The bioinformatic resource includes information about individual genes and a detailed assessment of protein localization. We anticipate that integration of these initial functional analyses and the availability of the biological resource will facilitate studies of these core proteins in many Mycobacterium species, including the less experimentally tractable pathogens M. abscessus, M. avium, M. kansasii, M. leprae, M. marinum, M. tuberculosis, and M. ulceransIMPORTANCE Diseases caused by mycobacterial species result in millions of deaths per year globally, and present a substantial health and economic burden, especially in immunocompromised patients. Difficulties inherent in working with mycobacterial pathogens have hampered the development and application of high-throughput genetics that can inform genome annotations and subsequent functional assays. To facilitate mycobacterial research, we have created a biological and bioinformatic resource (https://msrdb.org/) using Mycobacterium smegmatis as a model organism. The resource focuses specifically on 1,153 proteins that are highly conserved across the mycobacterial genus and, therefore, likely perform conserved mycobacterial core functions. Thus, functional insights from the MSR will apply to all mycobacterial species. We believe that the availability of this mycobacterial systems resource will accelerate research throughout the mycobacterial research community.",MSR,0.835704486,Mycobacterial Systems Resource,0.923476199,Mycobacterial Systems Resource,0.923476199,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/2/2021 +25754864,http://mycoclap.fungalgenomics.ca,"mycoCLAP, the database for characterized lignocellulose-active proteins of fungal origin: resource and text mining curation support. . Enzymes active on components of lignocellulosic biomass are used for industrial applications ranging from food processing to biofuels production. These include a diverse array of glycoside hydrolases, carbohydrate esterases, polysaccharide lyases and oxidoreductases. Fungi are prolific producers of these enzymes, spurring fungal genome sequencing efforts to identify and catalogue the genes that encode them. To facilitate the functional annotation of these genes, biochemical data on over 800 fungal lignocellulose-degrading enzymes have been collected from the literature and organized into the searchable database, mycoCLAP (http://mycoclap.fungalgenomics.ca). First implemented in 2011, and updated as described here, mycoCLAP is capable of ranking search results according to closest biochemically characterized homologues: this improves the quality of the annotation, and significantly decreases the time required to annotate novel sequences. The database is freely available to the scientific community, as are the open source applications based on natural language processing developed to support the manual curation of mycoCLAP. Database URL: http://mycoclap.fungalgenomics.ca.",mycoCLAP,0.991243362,NA,0,mycoCLAP,0.991243362,1,NA,21622642,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,3/8/2015 +21622642,http://mycoCLAP.fungalgenomics.ca,"Curation of characterized glycoside hydrolases of fungal origin. Fungi produce a wide range of extracellular enzymes to break down plant cell walls, which are composed mainly of cellulose, lignin and hemicellulose. Among them are the glycoside hydrolases (GH), the largest and most diverse family of enzymes active on these substrates. To facilitate research and development of enzymes for the conversion of cell-wall polysaccharides into fermentable sugars, we have manually curated a comprehensive set of characterized fungal glycoside hydrolases. Characterized glycoside hydrolases were retrieved from protein and enzyme databases, as well as literature repositories. A total of 453 characterized glycoside hydrolases have been cataloged. They come from 131 different fungal species, most of which belong to the phylum Ascomycota. These enzymes represent 46 different GH activities and cover 44 of the 115 CAZy GH families. In addition to enzyme source and enzyme family, available biochemical properties such as temperature and pH optima, specific activity, kinetic parameters and substrate specificities were recorded. To simplify comparative studies, enzyme and species abbreviations have been standardized, Gene Ontology terms assigned and reference to supporting evidence provided. The annotated genes have been organized in a searchable, online database called mycoCLAP (Characterized Lignocellulose-Active Proteins of fungal origin). It is anticipated that this manually curated collection of biochemically characterized fungal proteins will be used to enhance functional annotation of novel GH genes. Database URL: http://mycoCLAP.fungalgenomics.ca/.",mycoCLAP,0.783426881,Characterized,0.764837186,mycoCLAP,0.783426881,1,NA,25754864,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,5/26/2011 +24297253,http://jgi.doe.gov/fungi,"MycoCosm portal: gearing up for 1000 fungal genomes. MycoCosm is a fungal genomics portal (http://jgi.doe.gov/fungi), developed by the US Department of Energy Joint Genome Institute to support integration, analysis and dissemination of fungal genome sequences and other 'omics' data by providing interactive web-based tools. MycoCosm also promotes and facilitates user community participation through the nomination of new species of fungi for sequencing, and the annotation and analysis of resulting data. By efficiently filling gaps in the Fungal Tree of Life, MycoCosm will help address important problems associated with energy and the environment, taking advantage of growing fungal genomics resources.",MycoCosm,0.99669534,NA,0,MycoCosm,0.99669534,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2013 +23275726,http://www.bicjbtdrc-mgims.in/MycoProtease-DB,"MycoProtease-DB: Useful resource for Mycobacterium tuberculosis complex and nontuberculous mycobacterial proteases. Unlabelled MycoProtease-DB is an online MS SQL and CGI-PERL driven relational database that domiciles protease information of Mycobacterium tuberculosis (MTB) complex and Nontuberculous Mycobacteria (NTM), whose complete genome sequence is available. Our effort is to provide comprehensive information on proteases of 5 strains of Mycobacterium tuberculosis (H(37)Rv, H(37)Ra, CDC1551, F11 and KZN 1435), 3 strains of Mycobacterium bovis (AF2122/97, BCG Pasteur 1173P2 and BCG Tokyo 172) and 4 strains of NTM (Mycobacterium avium 104, Mycobacterium smegmatis MC2 155, Mycobacterium avium paratuberculosis K-10 and Nocardia farcinica IFM 10152) at gene, protein and structural level. MycoProtease-DB currently hosts 1324 proteases, which include 906 proteases from MTB complex with 237distinct proteases & 418 from NTM with 404 distinct proteases. Flexible database design and easy expandability & retrieval of information are the main features of MycoProtease-DB. All the data were validated with various online resources and published literatures for reliable serving as comprehensive resources of various Mycobacterial proteases. Availability The Database is publicly available at http://www.bicjbtdrc-mgims.in/MycoProtease-DB/",MycoProtease-DB,0.981270339,NA,0,MycoProtease-DB,0.981270339,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/8/2012 +22563442,http://mycorrdb.uohbif.in,"MycoRRdb: a database of computationally identified regulatory regions within intergenic sequences in mycobacterial genomes. The identification of regulatory regions for a gene is an important step towards deciphering the gene regulation. Regulatory regions tend to be conserved under evolution that facilitates the application of comparative genomics to identify such regions. The present study is an attempt to make use of this attribute to identify regulatory regions in the Mycobacterium species followed by the development of a database, MycoRRdb. It consist the regulatory regions identified within the intergenic distances of 25 mycobacterial species. MycoRRdb allows to retrieve the identified intergenic regulatory elements in the mycobacterial genomes. In addition to the predicted motifs, it also allows user to retrieve the Reciprocal Best BLAST Hits across the mycobacterial genomes. It is a useful resource to understand the transcriptional regulatory mechanism of mycobacterial species. This database is first of its kind which specifically addresses cis-regulatory regions and also comprehensive to the mycobacterial species. Database URL: http://mycorrdb.uohbif.in.",MycoRRdb,0.989182293,NA,0,MycoRRdb,0.989182293,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/26/2012 +23952586,http://www.bicnbu.in/mycosec,"A database for Mycobacterium secretome analysis: 'MycoSec' to accelerate global health research. Abstract Members of the genus Mycobacterium are notorious for their pathogenesis. Investigations from various perspectives have identified the pathogenic strategies employed by these lethal pathogens. Secretomes are believed to play crucial roles in host cell recognition and cross-talks, in cellular attachment, and in triggering other functions related to host pathogen interactions. However, a proper idea of the mycobacterial secretomes and their mechanism of functionality still remains elusive. In the present study, we have developed a comprehensive database of potential mycobacterial secretomes (MycoSec) using pre-existing algorithms for secretome prediction for researchers interested in this particular field. The database provides a platform for retrieval and analysis of identified secretomes in all finished genomes of the family Mycobacteriaceae. The database contains valuable information regarding secretory signal peptides (Sec type), lipoprotein signal peptides (Lipo type), and Twin arginine (RR/KR) signal peptides (TAT type), prevalent in mycobacteria. Information pertaining to COG analysis, codon usage, and gene expression of the predicted secretomes has also been incorporated in the database. MycoSec promises to be a useful repertoire providing a plethora of information regarding mycobacterial secretomes and may well be a platform to speed global health research. MycoSec is freely accessible at http://www.bicnbu.in/mycosec .",MycoSec,0.997377336,Mycobacterium,0.634468615,MycoSec,0.997377336,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/16/2013 +34025934,http://139.59.12.92,"Computational modeling and bioinformatic analyses of functional mutations in drug target genes in Mycobacterium tuberculosis. Tuberculosis (TB) continues to be the leading cause of deaths due to its persistent drug resistance and the consequent ineffectiveness of anti-TB treatment. Recent years witnessed huge amount of sequencing data, revealing mutations responsible for drug resistance. However, the lack of an up-to-date repository remains a barrier towards utilization of these data and identifying major mutations-associated with resistance. Amongst all mutations, non-synonymous mutations alter the amino acid sequence of a protein and have a much greater effect on pathogenicity. Hence, this type of gene mutation is of prime interest of the present study. The purpose of this study is to develop an updated database comprising almost all reported substitutions within the Mycobacterium tuberculosis (M.tb) drug target genes rpoB, inhA, katG, pncA, gyrA and gyrB. Various bioinformatics prediction tools were used to assess the structural and biophysical impacts of the resistance causing non-synonymous single nucleotide polymorphisms (nsSNPs) at the molecular level. This was followed by evaluating the impact of these mutations on binding affinity of the drugs to target proteins. We have developed a comprehensive online resource named MycoTRAP-DB (Mycobacterium tuberculosis Resistance Associated Polymorphisms Database) that connects mutations in genes with their structural, functional and pathogenic implications on protein. This database is accessible at http://139.59.12.92. This integrated platform would enable comprehensive analysis and prioritization of SNPs for the development of improved diagnostics and antimycobacterial medications. Moreover, our study puts forward secondary mutations that can be important for prognostic assessments of drug-resistance mechanism and actionable anti-TB drugs.",MycoTRAP-DB,0.982121785,Mycobacterium tuberculosis Resistance Associated,0.798473769,MycoTRAP-DB,0.982121785,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/19/2021 +25378328,http://mympn.crg.eu,"MyMpn: a database for the systems biology model organism Mycoplasma pneumoniae. MyMpn (http://mympn.crg.eu) is an online resource devoted to studying the human pathogen Mycoplasma pneumoniae, a minimal bacterium causing lower respiratory tract infections. Due to its small size, its ability to grow in vitro, and the amount of data produced over the past decades, M. pneumoniae is an interesting model organisms for the development of systems biology approaches for unicellular organisms. Our database hosts a wealth of omics-scale datasets generated by hundreds of experimental and computational analyses. These include data obtained from gene expression profiling experiments, gene essentiality studies, protein abundance profiling, protein complex analysis, metabolic reactions and network modeling, cell growth experiments, comparative genomics and 3D tomography. In addition, the intuitive web interface provides access to several visualization and analysis tools as well as to different data search options. The availability and--even more relevant--the accessibility of properly structured and organized data are of up-most importance when aiming to understand the biology of an organism on a global scale. Therefore, MyMpn constitutes a unique and valuable new resource for the large systems biology and microbiology community.",MyMpn,0.998210311,NA,0,MyMpn,0.998210311,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2014 +34527188,http://myodata.bio.unipd.it,"MyoData: An expression knowledgebase at single cell/nucleus level for the discovery of coding-noncoding RNA functional interactions in skeletal muscle. Non-coding RNAs represent the largest part of transcribed mammalian genomes and prevalently exert regulatory functions. Long non-coding RNAs (lncRNAs) and microRNAs (miRNAs) can modulate the activity of each other. Skeletal muscle is the most abundant tissue in mammals. It is composed of different cell types with myofibers that represent the smallest complete contractile system. Considering that lncRNAs and miRNAs are more cell type-specific than coding RNAs, to understand their function it is imperative to evaluate their expression and action within single myofibers. In this database, we collected gene expression data for coding and non-coding genes in single myofibers and used them to produce interaction networks based on expression correlations. Since biological pathways are more informative than networks based on gene expression correlation, to understand how altered genes participate in the studied phenotype, we integrated KEGG pathways with miRNAs and lncRNAs. The database also integrates single nucleus gene expression data on skeletal muscle in different patho-physiological conditions. We demonstrated that these networks can serve as a framework from which to dissect new miRNA and lncRNA functions to experimentally validate. Some interactions included in the database have been previously experimentally validated using high throughput methods. These can be the basis for further functional studies. Using database information, we demonstrate the involvement of miR-149, -214 and let-7e in mitochondria shaping; the ability of the lncRNA Pvt1 to mitigate the action of miR-27a via sponging; and the regulatory activity of miR-214 on Sox6 and Slc16a3. The MyoData is available at https://myodata.bio.unipd.it.",MyoData,0.9587152,NA,0,MyoData,0.9587152,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/26/2021 +32393257,http://www.sys-myo.com/myominer,"MyoMiner: explore gene co-expression in normal and pathological muscle. Background High-throughput transcriptomics measures mRNA levels for thousands of genes in a biological sample. Most gene expression studies aim to identify genes that are differentially expressed between different biological conditions, such as between healthy and diseased states. However, these data can also be used to identify genes that are co-expressed within a biological condition. Gene co-expression is used in a guilt-by-association approach to prioritize candidate genes that could be involved in disease, and to gain insights into the functions of genes, protein relations, and signaling pathways. Most existing gene co-expression databases are generic, amalgamating data for a given organism regardless of tissue-type. Methods To study muscle-specific gene co-expression in both normal and pathological states, publicly available gene expression data were acquired for 2376 mouse and 2228 human striated muscle samples, and separated into 142 categories based on species (human or mouse), tissue origin, age, gender, anatomic part, and experimental condition. Co-expression values were calculated for each category to create the MyoMiner database. Results Within each category, users can select a gene of interest, and the MyoMiner web interface will return all correlated genes. For each co-expressed gene pair, adjusted p-value and confidence intervals are provided as measures of expression correlation strength. A standardized expression-level scatterplot is available for every gene pair r-value. MyoMiner has two extra functions: (a) a network interface for creating a 2-shell correlation network, based either on the most highly correlated genes or from a list of genes provided by the user with the option to include linked genes from the database and (b) a comparison tool from which the users can test whether any two correlation coefficients from different conditions are significantly different. Conclusions These co-expression analyses will help investigators to delineate the tissue-, cell-, and pathology-specific elements of muscle protein interactions, cell signaling and gene regulation. Changes in co-expression between pathologic and healthy tissue may suggest new disease mechanisms and help define novel therapeutic targets. Thus, MyoMiner is a powerful muscle-specific database for the discovery of genes that are associated with related functions based on their co-expression. MyoMiner is freely available at https://www.sys-myo.com/myominer.",MyoMiner,0.994202495,NA,0,MyoMiner,0.994202495,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/11/2020 +32451429,http://www.myomirdb.in,"MyomirDB: A unified database and server platform for muscle atrophy myomiRs, coregulatory networks and regulons. Muscular atrophy or muscle loss is a multifactorial clinical condition during many critical illnesses like cancer, cardiovascular diseases, diabetes, pulmonary diseases etc. leading to fatigue and weakness and contributes towards a decreased quality of life. The proportion of older adults (>65 y) in the overall population is also growing and aging is another important factor causing muscle loss. Some muscle miRNAs (myomiRs) and their target genes have even been proposed as potential diagnostic, therapeutic and predictive markers for muscular atrophy. MyomirDB (http://www.myomirdb.in/) is a unique resource that provides a comprehensive, curated, user- friendly and detailed compilation of various miRNA bio-molecular interactions; miRNA-Transcription Factor-Target Gene co-regulatory networks and ~8000 tripartite regulons associated with 247 myomiRs which have been experimentally validated to be associated with various muscular atrophy conditions. For each database entry, MyomirDB compiles source organism, muscle atrophic condition, experiment duration, its level of expression, fold change, tissue of expression, experimental validation, disease and drug association, tissue-specific expression level, Gene Ontology and KEGG pathway associations. The web resource is a unique server platform which uses in-house scripts to construct miRNA-Transcription Factor-Target Gene co-regulatory networks and extract tri-partite regulons also called Feed Forward Loops. These unique features helps to offer mechanistic insights in disease pathology. Hence, MyomirDB is a unique platform for researchers working in this area to explore, fetch, compare and analyse atrophy associated miRNAs, their co-regulatory networks and FFL regulons.",MyomirDB,0.99731797,NA,0,MyomirDB,0.99731797,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/25/2020 +23189029,http://caps.ncbs.res.in/myosinome,"Myosinome: a database of myosins from select eukaryotic genomes to facilitate analysis of sequence-structure-function relationships. Myosins are one of the largest protein superfamilies with 24 classes. They have conserved structural features and catalytic domains yet show huge variation at different domains resulting in a variety of functions. Myosins are molecules driving various kinds of cellular processes and motility until the level of organisms. These are ATPases that utilize the chemical energy released by ATP hydrolysis to bring about conformational changes leading to a motor function. Myosins are important as they are involved in almost all cellular activities ranging from cell division to transcriptional regulation. They are crucial due to their involvement in many congenital diseases symptomatized by muscular malfunctions, cardiac diseases, deafness, neural and immunological dysfunction, and so on, many of which lead to death at an early age. We present Myosinome, a database of selected myosin classes (myosin II, V, and VI) from five model organisms. This knowledge base provides the sequences, phylogenetic clustering, domain architectures of myosins and molecular models, structural analyses, and relevant literature of their coiled-coil domains. In the current version of Myosinome, information about 71 myosin sequences belonging to three myosin classes (myosin II, V, and VI) in five model organisms (Homo Sapiens, Mus musculus, D. melanogaster, C. elegans and S. cereviseae) identified using bioinformatics surveys are presented, and several of them are yet to be functionally characterized. As these proteins are involved in congenital diseases, such a database would be useful in short-listing candidates for gene therapy and drug development. The database can be accessed from http://caps.ncbs.res.in/myosinome.",Myosinome,0.969296992,NA,0,Myosinome,0.969296992,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/12/2012 +34389843,http://nabe.denglab.org,"Nabe: an energetic database of amino acid mutations in protein-nucleic acid binding interfaces. . Protein-nucleic acid complexes play essential roles in regulating transcription, translation, DNA replication, repair and recombination, RNA processing and translocation. Site-directed mutagenesis has been extremely useful in understanding the principles of protein-DNA and protein-RNA interactions, and experimentally determined mutagenesis data are prerequisites for designing effective algorithms for predicting the binding affinity change upon mutation. However, a vital challenge in this area is the lack of sufficient public experimentally recognized mutation data, which leads to difficulties in developing computational prediction methods. In this article, we present Nabe, an integrated database of amino acid mutations and their effects on the binding free energy in protein-DNA and protein-RNA interactions for which binding affinities have been experimentally determined. Compared with existing databases and data sets, Nabe is the largest protein-nucleic acid mutation database, containing 2506 mutations in 473 protein-DNA and protein-RNA complexes, and of that 1751 are alanine mutations in 405 protein-nucleic acid complexes. For researchers to conveniently utilize the data, Nabe assembles protein-DNA and protein-RNA benchmark databases by adopting the data-processing procedures in the majority of models. To further facilitate users to query data, Nabe provides a searchable and graphical web page. Database URL: http://nabe.denglab.org.",Nabe,0.985496402,NA,0,Nabe,0.985496402,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2021 +26912952,http://nabic.rda.go.kr/DNAchip,"NABIC Microarray: an integrated database of high throughput data for gene expression profiles. Unlabelled The National Agricultural Biotechnology Information Center (NABIC) in Korea constructed a web-based database to provide information about gene expression profiles identified in the microorganism, plants, and animals. The deposited archive of NABIC microarray database consists of metadata spreadsheet, matrix spreadsheet, and raw data files. It provides three major functions such as microarray search, viewer and download option of raw data. An information table of five fields (i.e., ownership, basic, series, samples, and protocols) shows the specific description of data for selected DNA microarray. Availability The database is available online for free at http://nabic.rda.go.kr/DNAchip.",NABIC,0.985369205,NA,0,NABIC,0.985369205,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/30/2015 +28086860,http://nadh.ice.mpg.de,"Nicotiana attenuata Data Hub (NaDH): an integrative platform for exploring genomic, transcriptomic and metabolomic data in wild tobacco. Background Nicotiana attenuata (coyote tobacco) is an ecological model for studying plant-environment interactions and plant gene function under real-world conditions. During the last decade, large amounts of genomic, transcriptomic and metabolomic data have been generated with this plant which has provided new insights into how native plants interact with herbivores, pollinators and microbes. However, an integrative and open access platform that allows for the efficient mining of these -omics data remained unavailable until now. Description We present the Nicotiana attenuata Data Hub (NaDH) as a centralized platform for integrating and visualizing genomic, phylogenomic, transcriptomic and metabolomic data in N. attenuata. The NaDH currently hosts collections of predicted protein coding sequences of 11 plant species, including two recently sequenced Nicotiana species, and their functional annotations, 222 microarray datasets from 10 different experiments, a transcriptomic atlas based on 20 RNA-seq expression profiles and a metabolomic atlas based on 895 metabolite spectra analyzed by mass spectrometry. We implemented several visualization tools, including a modified version of the Electronic Fluorescent Pictograph (eFP) browser, co-expression networks and the Interactive Tree Of Life (iTOL) for studying gene expression divergence among duplicated homologous. In addition, the NaDH allows researchers to query phylogenetic trees of 16,305 gene families and provides tools for analyzing their evolutionary history. Furthermore, we also implemented tools to identify co-expressed genes and metabolites, which can be used for predicting the functions of genes. Using the transcription factor NaMYB8 as an example, we illustrate that the tools and data in NaDH can facilitate identification of candidate genes involved in the biosynthesis of specialized metabolites. Conclusion The NaDH provides interactive visualization and data analysis tools that integrate the expression and evolutionary history of genes in Nicotiana, which can facilitate rapid gene discovery and comparative genomic analysis. Because N. attenuata shares many genome-wide features with other Nicotiana species including cultivated tobacco, and hence NaDH can be a resource for exploring the function and evolution of genes in Nicotiana species in general. The NaDH can be accessed at: http://nadh.ice.mpg.de/ .",NaDH,0.970333695,Nicotiana attenuata Data Hub,0.910567932,NaDH,0.970333695,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/13/2017 +25172923,http://www.naked-mole-rat.org,"The Naked Mole Rat Genome Resource: facilitating analyses of cancer and longevity-related adaptations. Motivation The naked mole rat (Heterocephalus glaber) is an exceptionally long-lived and cancer-resistant rodent native to East Africa. Although its genome was previously sequenced, here we report a new assembly sequenced by us with substantially higher N50 values for scaffolds and contigs. Results We analyzed the annotation of this new improved assembly and identified candidate genomic adaptations which may have contributed to the evolution of the naked mole rat's extraordinary traits, including in regions of p53, and the hyaluronan receptors CD44 and HMMR (RHAMM). Furthermore, we developed a freely available web portal, the Naked Mole Rat Genome Resource (http://www.naked-mole-rat.org), featuring the data and results of our analysis, to assist researchers interested in the genome and genes of the naked mole rat, and also to facilitate further studies on this fascinating species.",NA,0,Naked Mole Rat Genome Resource,0.875342856,Naked Mole Rat Genome Resource,0.875342856,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/28/2014 +26896846,http://bsbe.iiti.ac.in/bsbe/naldb/HOME.php,"NALDB: nucleic acid ligand database for small molecules targeting nucleic acid. . Nucleic acid ligand database (NALDB) is a unique database that provides detailed information about the experimental data of small molecules that were reported to target several types of nucleic acid structures. NALDB is the first ligand database that contains ligand information for all type of nucleic acid. NALDB contains more than 3500 ligand entries with detailed pharmacokinetic and pharmacodynamic information such as target name, target sequence, ligand 2D/3D structure, SMILES, molecular formula, molecular weight, net-formal charge, AlogP, number of rings, number of hydrogen bond donor and acceptor, potential energy along with their Ki, Kd, IC50 values. All these details at single platform would be helpful for the development and betterment of novel ligands targeting nucleic acids that could serve as a potential target in different diseases including cancers and neurological disorders. With maximum 255 conformers for each ligand entry, our database is a multi-conformer database and can facilitate the virtual screening process. NALDB provides powerful web-based search tools that make database searching efficient and simplified using option for text as well as for structure query. NALDB also provides multi-dimensional advanced search tool which can screen the database molecules on the basis of molecular properties of ligand provided by database users. A 3D structure visualization tool has also been included for 3D structure representation of ligands. NALDB offers an inclusive pharmacological information and the structurally flexible set of small molecules with their three-dimensional conformers that can accelerate the virtual screening and other modeling processes and eventually complement the nucleic acid-based drug discovery research. NALDB can be routinely updated and freely available on bsbe.iiti.ac.in/bsbe/naldb/HOME.php. Database URL: http://bsbe.iiti.ac.in/bsbe/naldb/HOME.php.",NALDB,0.99670428,Nucleic acid ligand database,0.987690696,NALDB,0.99670428,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/20/2016 +33103271,http://nandesyn.single-cell.cn,"The NanDeSyn database for Nannochloropsis systems and synthetic biology. Nannochloropsis species, unicellular industrial oleaginous microalgae, are model organisms for microalgal systems and synthetic biology. To facilitate community-based annotation and mining of the rapidly accumulating functional genomics resources, we have initiated an international consortium and present a comprehensive multi-omics resource database named Nannochloropsis Design and Synthesis (NanDeSyn; http://nandesyn.single-cell.cn). Via the Tripal toolkit, it features user-friendly interfaces hosting genomic resources with gene annotations and transcriptomic and proteomic data for six Nannochloropsis species, including two updated genomes of Nannochloropsis oceanica IMET1 and Nannochloropsis salina CCMP1776. Toolboxes for search, Blast, synteny view, enrichment analysis, metabolic pathway analysis, a genome browser, etc. are also included. In addition, functional validation of genes is indicated based on phenotypes of mutants and relevant bibliography. Furthermore, epigenomic resources are also incorporated, especially for sequencing of small RNAs including microRNAs and circular RNAs. Such comprehensive and integrated landscapes of Nannochloropsis genomics and epigenomics will promote and accelerate community efforts in systems and synthetic biology of these industrially important microalgae.",NanDeSyn,0.989316845,Nannochloropsis Design and Synthesis,0.926788456,NanDeSyn,0.989316845,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2020 +24098075,http://www.nanomaterialregistry.org,"The Nanomaterial Registry: facilitating the sharing and analysis of data in the diverse nanomaterial community. The amount of data being generated in the nanotechnology research space is significant, and the coordination, sharing, and downstream analysis of the data is complex and consistently deliberated. The complexities of the data are due in large part to the inherently complicated characteristics of nanomaterials. Also, testing protocols and assays used for nanomaterials are diverse and lacking standardization. The Nanomaterial Registry has been developed to address such challenges as the need for standard methods, data formatting, and controlled vocabularies for data sharing. The Registry is an authoritative, web-based tool whose purpose is to simplify the community's level of effort in assessing nanomaterial data from environmental and biological interaction studies. Because the Registry is meant to be an authoritative resource, all data-driven content is systematically archived and reviewed by subject-matter experts. To support and advance nanomaterial research, a set of minimal information about nanomaterials (MIAN) has been developed and is foundational to the Registry data model. The MIAN has been used to create evaluation and similarity criteria for nanomaterials that are curated into the Registry. The Registry is a publicly available resource that is being built through collaborations with many stakeholder groups in the nanotechnology community, including industry, regulatory, government, and academia. Features of the Registry website (http://www.nanomaterialregistry.org) currently include search, browse, side-by-side comparison of nanomaterials, compliance ratings based on the quality and quantity of data, and the ability to search for similar nanomaterials within the Registry. This paper is a modification and extension of a proceedings paper for the Institute of Electrical and Electronics Engineers.",Nanomaterial,0.530851781,Nanomaterial Registry,0.508820176,Nanomaterial,0.530851781,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,9/16/2013 +23874618,http://nanominer.cs.tut.fi,"NanoMiner - integrative human transcriptomics data resource for nanoparticle research. The potential impact of nanoparticles on the environment and on human health has attracted considerable interest worldwide. The amount of transcriptomics data, in which tissues and cell lines are exposed to nanoparticles, increases year by year. In addition to the importance of the original findings, this data can have value in broader context when combined with other previously acquired and published results. In order to facilitate the efficient usage of the data, we have developed the NanoMiner web resource (http://nanominer.cs.tut.fi/), which contains 404 human transcriptome samples exposed to various types of nanoparticles. All the samples in NanoMiner have been annotated, preprocessed and normalized using standard methods that ensure the quality of the data analyses and enable the users to utilize the database systematically across the different experimental setups and platforms. With NanoMiner it is possible to 1) search and plot the expression profiles of one or several genes of interest, 2) cluster the samples within the datasets, 3) find differentially expressed genes in various nanoparticle studies, 4) detect the nanoparticles causing differential expression of selected genes, 5) analyze enriched Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways and Gene Ontology (GO) terms for the detected genes and 6) search the expression values and differential expressions of the genes belonging to a specific KEGG pathway or Gene Ontology. In sum, NanoMiner database is a valuable collection of microarray data which can be also used as a data repository for future analyses.",NanoMiner,0.984119594,NA,0,NanoMiner,0.984119594,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/12/2013 +21928249,http://serviziweb.ulss12.ve.it/firbabo,"Development of a website and biobank database for the Nanosized Cancer Polymarker Biochip Project: a Multicenter Italian Experience. The Nanosized Cancer Polymarker Biochip Project (RBLA03S4SP) funded by an Italian MIUR-FIRB grant (Italian Ministry of University and Research - Investment Funds for Basic Research) has led to the creation of a free-access dynamic website, available at the web address https://serviziweb.ulss12.ve.it/firbabo, and of a centralized database with password-restricted access. The project network is composed of 9 research units (RUs) and has been active since 2005. The aim of the FIRB project was the design, production and validation of optoelectronic and chemoelectronic biosensors for the simultaneous detection of a novel class of cancer biomarkers associated with immunoglobulins of the M class (IgM) for early diagnosis of cancer. Biomarker immune complexes (BM-ICs) were assessed on samples of clinical cases and matched controls for breast, colorectal, liver, ovarian and prostate malignancies. This article describes in detail the architecture of the project website, the central database application, and the biobank developed for the FIRB Nanosized Cancer Polymarker Biochip Project. The article also illustrates many unique aspects that should be considered when developing a database within a multidisciplinary scenario. The main deliverables of the project were numerous, including the development of an online database which archived 1400 case report forms (700 cases and 700 matched controls) and more than 2700 experimental results relative to the BM-ICs assayed. The database also allowed for the traceability and retrieval of 21,000 aliquots archived in the centralized bank and stored as backup in the RUs, and for the development of a centralized biological bank in the coordinating unit with 6300 aliquots of serum. The constitution of the website and biobank database enabled optimal coordination of the RUs involved, highlighting the importance of sharing samples and scientific data in a multicenter setting for the achievement of the project goals.",RBLA0,0.827072576,Nanosized Cancer Polymarker Biochip Project,0.891584954,Nanosized Cancer Polymarker Biochip Project,0.891584954,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2011 +28641017,http://african-compounds.org/nanpdb,"NANPDB: A Resource for Natural Products from Northern African Sources. Natural products (NPs) are often regarded as sources of drugs or drug leads or simply as a ""source of inspiration"" for the discovery of novel drugs. We have built the Northern African Natural Products Database (NANPDB) by collecting information on ∼4500 NPs, covering literature data for the period from 1962 to 2016. The data cover compounds isolated mainly from plants, with contributions from some endophyte, animal (e.g., coral), fungal, and bacterial sources. The compounds were identified from 617 source species, belonging to 146 families. Computed physicochemical properties, often used to predict drug metabolism and pharmacokinetics, as well as predicted toxicity information, have been included for each compound in the data set. This is the largest collection of annotated natural compounds produced by native organisms from Northern Africa. While the database includes well-known drugs and drug leads, the medical potential of a majority of the molecules is yet to be investigated. The database could be useful for drug discovery efforts, analysis of the bioactivity of selected compounds, or the discovery of synthesis routes toward secondary metabolites. The current version of NANPDB is available at http://african-compounds.org/nanpdb/ .",NANPDB,0.997249413,Northern African Natural Products Database,0.983017099,NANPDB,0.997249413,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/22/2017 +23996831,http://naps.nencki.gov.pl,"The Nencki Affective Picture System (NAPS): introduction to a novel, standardized, wide-range, high-quality, realistic picture database. Selecting appropriate stimuli to induce emotional states is essential in affective research. Only a few standardized affective stimulus databases have been created for auditory, language, and visual materials. Numerous studies have extensively employed these databases using both behavioral and neuroimaging methods. However, some limitations of the existing databases have recently been reported, including limited numbers of stimuli in specific categories or poor picture quality of the visual stimuli. In the present article, we introduce the Nencki Affective Picture System (NAPS), which consists of 1,356 realistic, high-quality photographs that are divided into five categories (people, faces, animals, objects, and landscapes). Affective ratings were collected from 204 mostly European participants. The pictures were rated according to the valence, arousal, and approach-avoidance dimensions using computerized bipolar semantic slider scales. Normative ratings for the categories are presented for each dimension. Validation of the ratings was obtained by comparing them to ratings generated using the Self-Assessment Manikin and the International Affective Picture System. In addition, the physical properties of the photographs are reported, including luminance, contrast, and entropy. The new database, with accompanying ratings and image parameters, allows researchers to select a variety of visual stimulus materials specific to their experimental questions of interest. The NAPS system is freely accessible to the scientific community for noncommercial use by request at http://naps.nencki.gov.pl .",NAPS,0.940910459,Nencki Affective Picture System,0.825295125,NAPS,0.940910459,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/1/2014 +30626175,"http://academic.oup.com/nar, http://www.oxfordjournals.org/nar/database/c","The 26th annual Nucleic Acids Research database issue and Molecular Biology Database Collection. The 2019 Nucleic Acids Research (NAR) Database Issue contains 168 papers spanning molecular biology. Among them, 64 are new and another 92 are updates describing resources that appeared in the Issue previously. The remaining 12 are updates on databases most recently published elsewhere. This Issue contains two Breakthrough articles, on the Virtual Metabolic Human (VMH) database which links human and gut microbiota metabolism with diet and disease, and Vibrism DB, a database of mouse brain anatomy and gene (co-)expression with sophisticated visualization and session sharing. Major returning nucleic acid databases include RNAcentral, miRBase and LncRNA2Target. Protein sequence databases include UniProtKB, InterPro and Pfam, while wwPDB and RCSB cover protein structure. STRING and KEGG update in the section on metabolism and pathways. Microbial genomes are covered by IMG/M and resources for human and model organism genomics include Ensembl, UCSC Genome Browser, GENCODE and Flybase. Genomic variation and disease are well-covered by GWAS Catalog, PopHumanScan, OMIM and COSMIC, CADD being another major newcomer. Major new proteomics resources reporting here include iProX and jPOSTdb. The entire database issue is freely available online on the NAR website (https://academic.oup.com/nar). The NAR online Molecular Biology Database Collection has been updated, reviewing 506 entries, adding 66 new resources and eliminating 147 discontinued URLs, bringing the current total to 1613 databases. It is available at http://www.oxfordjournals.org/nar/database/c.",NAR,0.830088238,Acids,0.523489654,NAR,0.830088238,1,"29316735.0, 31906604.0, 29316735.0, 31906604.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: CLASS,NA,NA,1/1/2019 +31640730,http://nard.macrogen.com,"NARD: whole-genome reference panel of 1779 Northeast Asians improves imputation accuracy of rare and low-frequency variants. Here, we present the Northeast Asian Reference Database (NARD), including whole-genome sequencing data of 1779 individuals from Korea, Mongolia, Japan, China, and Hong Kong. NARD provides the genetic diversity of Korean (n = 850) and Mongolian (n = 384) ancestries that were not present in the 1000 Genomes Project Phase 3 (1KGP3). We combined and re-phased the genotypes from NARD and 1KGP3 to construct a union set of haplotypes. This approach established a robust imputation reference panel for Northeast Asians, which yields the greatest imputation accuracy of rare and low-frequency variants compared with the existing panels. NARD imputation panel is available at https://nard.macrogen.com/ .",NARD,0.994531229,Northeast Asian Reference Database,0.981074795,NARD,0.994531229,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2019 +26507856,http://nbdb.bii.a-star.edu.sg,"Nucleotide binding database NBDB--a collection of sequence motifs with specific protein-ligand interactions. NBDB database describes protein motifs, elementary functional loops (EFLs) that are involved in binding of nucleotide-containing ligands and other biologically relevant cofactors/coenzymes, including ATP, AMP, ATP, GMP, GDP, GTP, CTP, PAP, PPS, FMN, FAD(H), NAD(H), NADP, cAMP, cGMP, c-di-AMP and c-di-GMP, ThPP, THD, F-420, ACO, CoA, PLP and SAM. The database is freely available online at http://nbdb.bii.a-star.edu.sg. In total, NBDB contains data on 249 motifs that work in interactions with 24 ligands. Sequence profiles of EFL motifs were derived de novo from nonredundant Uniprot proteome sequences. Conserved amino acid residues in the profiles interact specifically with distinct chemical parts of nucleotide-containing ligands, such as nitrogenous bases, phosphate groups, ribose, nicotinamide, and flavin moieties. Each EFL profile in the database is characterized by a pattern of corresponding ligand-protein interactions found in crystallized ligand-protein complexes. NBDB database helps to explore the determinants of nucleotide and cofactor binding in different protein folds and families. NBDB can also detect fragments that match to profiles of particular EFLs in the protein sequence provided by user. Comprehensive information on sequence, structures, and interactions of EFLs with ligands provides a foundation for experimental and computational efforts on design of required protein functions.",NBDB,0.986459076,Nucleotide binding database,0.770806229,NBDB,0.986459076,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/26/2015 +30576482,http://integbio.jp/rdf,"NBDC RDF portal: a comprehensive repository for semantic data in life sciences. . In the life sciences, researchers increasingly want to access multiple databases in an integrated way. However, different databases currently use different formats and vocabularies, hindering the proper integration of heterogeneous life science data. Adopting the Resource Description Framework (RDF) has the potential to address such issues by improving database interoperability, leading to advances in automatic data processing. Based on this idea, we have advised many Japanese database development groups to expose their databases in RDF. To further promote such activities, we have developed an RDF-based life science dataset repository called the National Bioscience Database Center (NBDC) RDF portal. All the datasets in this repository have been reviewed by the NBDC to ensure interoperability and queryability. As of July 2018, the service includes 21 RDF datasets, comprising over 45.5 billion triples. It provides SPARQL endpoints for all datasets, useful metadata and the ability to download RDF files. The NBDC RDF portal can be accessed at https://integbio.jp/rdf/.",NBDC,0.63131541,NA,0,NBDC,0.63131541,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2018 +33359127,http://nbigv.org,"NBIGV-DB: A dedicated database of non-B cell derived immunoglobulin variable region. Immunoglobulins (Ig) are important immune molecules that possess highly diverse variable region sequences enabling antigen recognition. According to classical immune theory, B lymphocytes have been considered the only source of Ig production (B-Igs). However, accumulating evidence have suggested that Igs are also produced by many non-B cells (non-B Igs), including epithelial cells, neurons, germ cells, as well as myeloid cells of hemopoietic system. Besides acting as bona fide antibodies, Non-B Igs have alternative cellular functions, such as promotion of cell survival, adhesion and migration. More importantly, Unlike the unlimited sequence diversity of B-Igs, the non-B Igs exhibit conserved V(D)J patterns across the same lineages. To support the analysis and comparison of variable region sequences from Igs, produced by B and non-B cells, we established a database (NBIGV) constituted by a non-B Ig variable region repertoire, which includes 727,989 VHDJH and VκJκ recombination sequences of non-B Igs sequenced from mouse samples. Upon database search, users can view, browse and investigate the variable region sequences of non-B Igs according to respective mice strains and tissues as well as Ig classes. Moreover, users can easily download selected sequences and/or compare sequences of interest with known non-B Ig sequences present in the database using NCBI-BLAST algorithms. Additionally, our database integrates a submission page and supplementary sample information. The NBIGV database may serve as a valuable resource for sequence analyses of Non-B Igs. NBIGV database is freely available at http://nbigv.org.",NBIGV,0.983493745,NA,0,NBIGV,0.983493745,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/23/2020 +32117995,http://nc2eye.bio-data.cn,"Nc2Eye: A Curated ncRNAomics Knowledgebase for Bridging Basic and Clinical Research in Eye Diseases. Eye diseases (EDs) represent a group of disorders affecting the visual system, most of which can lead to visual impairment and blindness. Accumulating evidence reveals that non-coding RNAs (ncRNAs) are closely associated with a wide variety of EDs. However, abundant associations between ncRNAs and EDs are scattered across the published literature, obstructing a global view of ncRNA-ED associations. A public resource of high-quality manually curated ncRNAomics knowledge associated with EDs remains unavailable. To address this gap, we thus developed Nc2Eye (http://nc2eye.bio-data.cn/), which is the first knowledgebase dedicated to providing a comprehensive ncRNAomics resource for bridging basic and clinical research in EDs. Through a comprehensive review of more than 2400 published papers, Nc2Eye catalogs 7088 manually curated ncRNA-ED associations involving 4363 ncRNAs across eight species. We also provide detailed descriptions and annotation information for each ncRNA-disease association such as ncRNA categories, experimental methods, expression pattern and related clinical drugs. To further expand the pathogenic ncRNAs, we also collected more than 90 high-throughput EDs-related transcriptome datasets. Furthermore, a user-friendly interface was constructed for convenient and flexible data browsing, querying, and retrieving. We believe that Nc2Eye is a timely and valuable knowledgebase for significantly improving and useful for discovery of new diagnostic and therapeutic biomarkers.",Nc2Eye,0.997420222,NA,0,Nc2Eye,0.997420222,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/14/2020 +24393765,http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/DISEASE,"NCBI disease corpus: a resource for disease name recognition and concept normalization. Information encoded in natural language in biomedical literature publications is only useful if efficient and reliable ways of accessing and analyzing that information are available. Natural language processing and text mining tools are therefore essential for extracting valuable information, however, the development of powerful, highly effective tools to automatically detect central biomedical concepts such as diseases is conditional on the availability of annotated corpora. This paper presents the disease name and concept annotations of the NCBI disease corpus, a collection of 793 PubMed abstracts fully annotated at the mention and concept level to serve as a research resource for the biomedical natural language processing community. Each PubMed abstract was manually annotated by two annotators with disease mentions and their corresponding concepts in Medical Subject Headings (MeSH®) or Online Mendelian Inheritance in Man (OMIM®). Manual curation was performed using PubTator, which allowed the use of pre-annotations as a pre-step to manual annotations. Fourteen annotators were randomly paired and differing annotations were discussed for reaching a consensus in two annotation phases. In this setting, a high inter-annotator agreement was observed. Finally, all results were checked against annotations of the rest of the corpus to assure corpus-wide consistency. The public release of the NCBI disease corpus contains 6892 disease mentions, which are mapped to 790 unique disease concepts. Of these, 88% link to a MeSH identifier, while the rest contain an OMIM identifier. We were able to link 91% of the mentions to a single disease concept, while the rest are described as a combination of concepts. In order to help researchers use the corpus to design and test disease identification methods, we have prepared the corpus as training, testing and development sets. To demonstrate its utility, we conducted a benchmarking experiment where we compared three different knowledge-based disease normalization methods with a best performance in F-measure of 63.7%. These results show that the NCBI disease corpus has the potential to significantly improve the state-of-the-art in disease name recognition and normalization research, by providing a high-quality gold standard thus enabling the development of machine-learning based approaches for such tasks. The NCBI disease corpus, guidelines and other associated resources are available at: http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/DISEASE/.",NCBI,0.96602881,NA,0,NCBI,0.96602881,1,NA,22737589,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,1/3/2014 +22737589,http://www.ncbi.nlm.nih.gov,"Fungal genome resources at NCBI. The National Center for Biotechnology Information (NCBI) is well known for the nucleotide sequence archive, GenBank and sequence analysis tool BLAST. However, NCBI integrates many types of biomolecular data from variety of sources and makes it available to the scientific community as interactive web resources as well as organized releases of bulk data. These tools are available to explore and compare fungal genomes. Searching all databases with Fungi [organism] at http://www.ncbi.nlm.nih.gov/ is the quickest way to find resources of interest with fungal entries. Some tools though are resources specific and can be indirectly accessed from a particular database in the Entrez system. These include graphical viewers and comparative analysis tools such as TaxPlot, TaxMap and UniGene DDD (found via UniGene Homepage). Gene and BioProject pages also serve as portals to external data such as community annotation websites, BioGrid and UniProt. There are many different ways of accessing genomic data at NCBI. Depending on the focus and goal of research projects or the level of interest, a user would select a particular route for accessing genomic databases and resources. This review article describes methods of accessing fungal genome data and provides examples that illustrate the use of analysis tools.",NCBI,0.624692917,NA,0,NCBI,0.624692917,1,23193287,24393765,low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: CLASS,NA,NA,9/1/2011 +22139910,http://www.ncbi.nlm.nih.gov/taxonomy,"The NCBI Taxonomy database. The NCBI Taxonomy database (http://www.ncbi.nlm.nih.gov/taxonomy) is the standard nomenclature and classification repository for the International Nucleotide Sequence Database Collaboration (INSDC), comprising the GenBank, ENA (EMBL) and DDBJ databases. It includes organism names and taxonomic lineages for each of the sequences represented in the INSDC's nucleotide and protein sequence databases. The taxonomy database is manually curated by a small group of scientists at the NCBI who use the current taxonomic literature to maintain a phylogenetic taxonomy for the source organisms represented in the sequence databases. The taxonomy database is a central organizing hub for many of the resources at the NCBI, and provides a means for clustering elements within other domains of NCBI web site, for internal linking between domains of the Entrez system and for linking out to taxon-specific external resources on the web. Our primary purpose is to index the domain of sequences as conveniently as possible for our user community.",NCBI Taxonomy,0.87996386,NA,0,NCBI Taxonomy,0.87996386,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2011 +28961690,http://www.jianglab.cn/ncDR,"ncDR: a comprehensive resource of non-coding RNAs involved in drug resistance. Summary As a promising field of individualized therapy, non-coding RNA pharmacogenomics promotes the understanding of different individual responses to certain drugs and acts as a reasonable reference for clinical treatment. However, relevant information is scattered across the published literature, which is inconvenient for researchers to explore non-coding RNAs that are involved in drug resistance. To address this, we systemically identified validated and predicted drug resistance-associated microRNAs and long non-coding RNAs through manual curation and computational analysis. Subsequently, we constructed an omnibus repository named ncDR, which furnishes a user-friendly interface that allows for convenient browsing, visualization, querying and downloading of data. Given the rapidly increasing interest in precision medicine, ncDR will significantly improve our understanding of the roles of regulatory non-coding RNAs in drug resistance and has the potential to be a timely and valuable resource. Availability and implementation http://www.jianglab.cn/ncDR/. Contact jiangwei@hrbmu.edu.cn or lw2247@yeah.net. Supplementary information Supplementary data are available at Bioinformatics online.",ncDR,0.99832958,NA,0,ncDR,0.99832958,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2017 +23322530,http://www.ncdr.com,"The NCDR CathPCI Registry: a US national perspective on care and outcomes for percutaneous coronary intervention. Aims The NCDR CathPCI Registry collects detailed clinical, process-of-care and outcomes data for patients undergoing coronary angiography and percutaneous coronary intervention (PCI) in the USA. The registry contributes to quality of care by providing data feedback on a wide range of performance metrics to participating centres and by facilitating local and national quality improvement efforts. Interventions No treatments are mandated, participating centres receive routine quality-of-care and outcomes performance feedback reports and access to a quality dashboard for personalized performance reports. Population Patients undergoing cardiac catheterization and PCI are retrospectively identified. No informed consent is required, as data are anonymised. From inception in 1998, more than 12 million records have been submitted from 1577 participating US centres. Baseline data Approximately 250 fields encompassing patient demographics, medical history and risk factors, hospital presentation, initial cardiac status, procedural details, medications, laboratory values, and in-hospital outcomes. Linkages with outside sources of data have permitted longitudinal outcomes assessment in some cases. Centre personnel enter the data into the registry, in some cases facilitated by software vendors. There are non-financial incentives for centre participation. Data completeness is noteworthy with most fields missing at rates less than 5%. A comprehensive data quality program is employed to enhance data validity. Endpoints Main outcome measures include quality process metrics and in-hospital patient outcomes. Data are available for research by application to: http://www.ncdr.com.",NCDR CathPCI,0.706379139,NA,0,NCDR CathPCI,0.706379139,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/15/2013 +32105730,http://www.jianglab.cn/ncEP,"ncEP: A Manually Curated Database for Experimentally Validated ncRNA-encoded Proteins or Peptides. Noncoding RNAs (ncRNAs), such as lncRNAs, circRNAs and pri-miRNAs, play important roles in physiological and pathological processes. Recently, it was demonstrated that they could encode proteins or peptides. However, relevant information is scattered across numerous published articles, which is inconvenient for the exploration of ncRNA translation by researchers. In this study, we presented an ncEP database, which records the low-throughput experimentally validated (LTEV) proteins or peptides encoded by ncRNAs, from published articles. Collectively, ncEP contains 80 entries including 74 proteins or peptides, 22 lncRNAs, 11 circRNAs, 9 pri-miRNAs and 37 other ncRNAs across 18 species from more than 50 articles of over 2000 candidate articles. We have provided a user-friendly interface for users to search, browse, visualize, download and submit data. In summary, ncEP provides a relatively comprehensive repository of the LTEV proteins or peptides encoded by ncRNAs and will enrich the knowledge for translation process. ncEP is freely available at http://www.jianglab.cn/ncEP/.",ncEP,0.997201324,NA,0,ncEP,0.997201324,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/24/2020 +26516186,http://ncg.kcl.ac.uk,"NCG 5.0: updates of a manually curated repository of cancer genes and associated properties from cancer mutational screenings. The Network of Cancer Genes (NCG, http://ncg.kcl.ac.uk/) is a manually curated repository of cancer genes derived from the scientific literature. Due to the increasing amount of cancer genomic data, we have introduced a more robust procedure to extract cancer genes from published cancer mutational screenings and two curators independently reviewed each publication. NCG release 5.0 (August 2015) collects 1571 cancer genes from 175 published studies that describe 188 mutational screenings of 13 315 cancer samples from 49 cancer types and 24 primary sites. In addition to collecting cancer genes, NCG also provides information on the experimental validation that supports the role of these genes in cancer and annotates their properties (duplicability, evolutionary origin, expression profile, function and interactions with proteins and miRNAs).",NCG,0.989955544,of,0.724374533,NCG,0.989955544,1,NA,"22080562.0, 24608173.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/29/2015 +24608173,http://bio.ieo.eu/ncg,"NCG 4.0: the network of cancer genes in the era of massive mutational screenings of cancer genomes. NCG 4.0 is the latest update of the Network of Cancer Genes, a web-based repository of systems-level properties of cancer genes. In its current version, the database collects information on 537 known (i.e. experimentally supported) and 1463 candidate (i.e. inferred using statistical methods) cancer genes. Candidate cancer genes derive from the manual revision of 67 original publications describing the mutational screening of 3460 human exomes and genomes in 23 different cancer types. For all 2000 cancer genes, duplicability, evolutionary origin, expression, functional annotation, interaction network with other human proteins and with microRNAs are reported. In addition to providing a substantial update of cancer-related information, NCG 4.0 also introduces two new features. The first is the annotation of possible false-positive cancer drivers, defined as candidate cancer genes inferred from large-scale screenings whose association with cancer is likely to be spurious. The second is the description of the systems-level properties of 64 human microRNAs that are causally involved in cancer progression (oncomiRs). Owing to the manual revision of all information, NCG 4.0 constitutes a complete and reliable resource on human coding and non-coding genes whose deregulation drives cancer onset and/or progression. NCG 4.0 can also be downloaded as a free application for Android smart phones. Database URL: http://bio.ieo.eu/ncg/.",NCG,0.973342955,NA,0,NCG,0.973342955,1,NA,"22080562.0, 26516186.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,3/7/2014 +22080562,http://bio.ifom-ieo-campus.it/ncg,"Network of Cancer Genes (NCG 3.0): integration and analysis of genetic and network properties of cancer genes. The identification of a constantly increasing number of genes whose mutations are causally implicated in tumor initiation and progression (cancer genes) requires the development of tools to store and analyze them. The Network of Cancer Genes (NCG 3.0) collects information on 1494 cancer genes that have been found mutated in 16 different cancer types. These genes were collected from the Cancer Gene Census as well as from 18 whole exome and 11 whole-genome screenings of cancer samples. For each cancer gene, NCG 3.0 provides a summary of the gene features and the cross-reference to other databases. In addition, it describes duplicability, evolutionary origin, orthology, network properties, interaction partners, microRNA regulation and functional roles of cancer genes and of all genes that are related to them. This integrated network of information can be used to better characterize cancer genes in the context of the system in which they act. The data can also be used to identify novel candidates that share the same properties of known cancer genes and may therefore play a similar role in cancer. NCG 3.0 is freely available at http://bio.ifom-ieo-campus.it/ncg.",NCG,0.929202914,Network of Cancer Genes,0.920580149,NCG,0.929202914,1,NA,"24608173.0, 26516186.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/12/2011 +32487016,http://www.jianglab.cn/ncRI,"ncRI: a manually curated database for experimentally validated non-coding RNAs in inflammation. Background Inflammation has been considered to be central to the onset, progression, and outcome of infectious diseases, especially as one of the hallmarks of cancer. Non-coding RNAs (ncRNAs), such as miRNAs and lncRNAs, have emerged as vital regulators in control of immune and inflammatory processes, and also play important roles in the inflammatory disease and immunotherapy. Results In this study, we presented a database ncRI, which documented experimentally verified ncRNAs in inflammatory diseases, from published articles. Each entry contained the detailed information about ncRNA name, inflammatory diseases, mechanism, experimental techniques (e.g., microarray, RNA-seq, qRT-PCR), experimental samples (cell line and/or tissue), expression patterns of ncRNA (up-regulated or down-regulated), reference information (PubMed ID, year of publication, title of paper) and so on. Collectively, ncRI recorded 11,166 entries that include 1976 miRNAs, 1377 lncRNAs and 107 other ncRNAs across 3 species (human, mouse, and rat) from more than 2000 articles. All these data are free for users to search, browse and download. Conclusion In summary, the presented database ncRI provides a relatively comprehensive credible repository about ncRNAs and their roles in inflammatory diseases, and will be helpful for research on immunotherapy. The ncRI is now freely available to all users at http://www.jianglab.cn/ncRI/.",ncRI,0.994199097,NA,0,ncRI,0.994199097,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2020 +25540777,http://ncrnadb.scienze.univr.it/ncrnadb,"Comprehensive reconstruction and visualization of non-coding regulatory networks in human. Research attention has been powered to understand the functional roles of non-coding RNAs (ncRNAs). Many studies have demonstrated their deregulation in cancer and other human disorders. ncRNAs are also present in extracellular human body fluids such as serum and plasma, giving them a great potential as non-invasive biomarkers. However, non-coding RNAs have been relatively recently discovered and a comprehensive database including all of them is still missing. Reconstructing and visualizing the network of ncRNAs interactions are important steps to understand their regulatory mechanism in complex systems. This work presents ncRNA-DB, a NoSQL database that integrates ncRNAs data interactions from a large number of well established on-line repositories. The interactions involve RNA, DNA, proteins, and diseases. ncRNA-DB is available at http://ncrnadb.scienze.univr.it/ncrnadb/. It is equipped with three interfaces: web based, command-line, and a Cytoscape app called ncINetView. By accessing only one resource, users can search for ncRNAs and their interactions, build a network annotated with all known ncRNAs and associated diseases, and use all visual and mining features available in Cytoscape.",ncRNA-DB,0.996957827,NA,0,ncRNA-DB,0.996957827,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/10/2014 +31410488,http://ibi.hzau.edu.cn/ncRNA-eQTL,"ncRNA-eQTL: a database to systematically evaluate the effects of SNPs on non-coding RNA expression across cancer types. Numerous studies indicate that non-coding RNAs (ncRNAs) have critical functions across biological processes, and single-nucleotide polymorphisms (SNPs) could contribute to diseases or traits through influencing ncRNA expression. However, the associations between SNPs and ncRNA expression are largely unknown. Therefore, genome-wide expression quantitative trait loci (eQTL) analysis to assess the effects of SNPs on ncRNA expression, especially in multiple cancer types, will help to understand how risk alleles contribute toward tumorigenesis and cancer development. Using genotype data and expression profiles of ncRNAs of >8700 samples from The Cancer Genome Atlas (TCGA), we developed a computational pipeline to systematically identify ncRNA-related eQTLs (ncRNA-eQTLs) across 33 cancer types. We identified a total of 6 133 278 and 721 122 eQTL-ncRNA pairs in cis-eQTL and trans-eQTL analyses, respectively. Further survival analyses identified 8312 eQTLs associated with patient survival times. Furthermore, we linked ncRNA-eQTLs to genome-wide association study (GWAS) data and found 262 332 ncRNA-eQTLs overlapping with known disease- and trait-associated loci. Finally, a user-friendly database, ncRNA-eQTL (http://ibi.hzau.edu.cn/ncRNA-eQTL), was developed for free searching, browsing and downloading of all ncRNA-eQTLs. We anticipate that such an integrative and comprehensive resource will improve our understanding of the mechanistic basis of human complex phenotypic variation, especially for ncRNA- and cancer-related studies.",ncRNA-eQTL,0.982337432,NA,0,ncRNA-eQTL,0.982337432,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +31637139,http://www.biomed-bigdata.com:50020/index.html,"ncRNA2MetS: a manually curated database for non-coding RNAs associated with metabolic syndrome. Metabolic syndrome is a cluster of the most dangerous heart attack risk factors (diabetes and raised fasting plasma glucose, abdominal obesity, high cholesterol and high blood pressure), and has become a major global threat to human health. A number of studies have demonstrated that hundreds of non-coding RNAs, including miRNAs and lncRNAs, are involved in metabolic syndrome-related diseases such as obesity, type 2 diabetes mellitus, hypertension, etc. However, these research results are distributed in a large number of literature, which is not conducive to analysis and use. There is an urgent need to integrate these relationship data between metabolic syndrome and non-coding RNA into a specialized database. To address this need, we developed a metabolic syndrome-associated non-coding RNA database (ncRNA2MetS) to curate the associations between metabolic syndrome and non-coding RNA. Currently, ncRNA2MetS contains 1,068 associations between five metabolic syndrome traits and 627 non-coding RNAs (543 miRNAs and 84 lncRNAs) in four species. Each record in ncRNA2MetS database represents a pair of disease-miRNA (lncRNA) association consisting of non-coding RNA category, miRNA (lncRNA) name, name of metabolic syndrome trait, expressive patterns of non-coding RNA, method for validation, specie involved, a brief introduction to the association, the article referenced, etc. We also developed a user-friendly website so that users can easily access and download all data. In short, ncRNA2MetS is a complete and high-quality data resource for exploring the role of non-coding RNA in the pathogenesis of metabolic syndrome and seeking new treatment options. The website is freely available at http://www.biomed-bigdata.com:50020/index.html.",ncRNA2MetS,0.990252088,metabolic syndrome-associated non,0.63908056,ncRNA2MetS,0.990252088,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/15/2019 +33275967,http://www.liwzlab.cn/ncrnavar,"ncRNAVar: A Manually Curated Database for Identification of Noncoding RNA Variants Associated with Human Diseases. While variants of noncoding RNAs (ncRNAs) have been experimentally validated as a new class of biomarkers and drug targets, the discovery and interpretation of relationships between ncRNA variants and human diseases become important and challenging. Here we present ncRNAVar (http://www.liwzlab.cn/ncrnavar/), the first database that provides association data between validated ncRNA variants and human diseases through manual curation on 2650 publications and computational annotation. ncRNAVar contains 4565 associations between 711 human disease phenotypes and 3112 variants from 2597 ncRNAs. Each association was reviewed by professional curators, incorporated with valuable annotation and cross references, and designated with an association score by our refined score model. ncRNAVar offers web applications including association prioritization, network visualization, and relationship mapping. ncRNAVar, presenting a landscape of ncRNA variants in human diseases and a useful resource for subsequent software development, will improve our insight of relationships between ncRNA variants and human health.",ncRNAVar,0.998307347,NA,0,ncRNAVar,0.998307347,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +27152146,http://purl.obolibrary.org/obo/ncro.owl,"The Non-Coding RNA Ontology (NCRO): a comprehensive resource for the unification of non-coding RNA biology. In recent years, sequencing technologies have enabled the identification of a wide range of non-coding RNAs (ncRNAs). Unfortunately, annotation and integration of ncRNA data has lagged behind their identification. Given the large quantity of information being obtained in this area, there emerges an urgent need to integrate what is being discovered by a broad range of relevant communities. To this end, the Non-Coding RNA Ontology (NCRO) is being developed to provide a systematically structured and precisely defined controlled vocabulary for the domain of ncRNAs, thereby facilitating the discovery, curation, analysis, exchange, and reasoning of data about structures of ncRNAs, their molecular and cellular functions, and their impacts upon phenotypes. The goal of NCRO is to serve as a common resource for annotations of diverse research in a way that will significantly enhance integrative and comparative analysis of the myriad resources currently housed in disparate sources. It is our belief that the NCRO ontology can perform an important role in the comprehensive unification of ncRNA biology and, indeed, fill a critical gap in both the Open Biological and Biomedical Ontologies (OBO) Library and the National Center for Biomedical Ontology (NCBO) BioPortal. Our initial focus is on the ontological representation of small regulatory ncRNAs, which we see as the first step in providing a resource for the annotation of data about all forms of ncRNAs. The NCRO ontology is free and open to all users, accessible at: http://purl.obolibrary.org/obo/ncro.owl.",NCRO,0.979602456,The Non-Coding RNA Ontology,0.890178517,NCRO,0.979602456,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/4/2016 +32122231,http://lilab2.sysu.edu.cn/ncrpheno,"ncRPheno: a comprehensive database platform for identification and validation of disease related noncoding RNAs. Noncoding RNAs (ncRNAs) play critical roles in many critical biological processes and have become a novel class of potential targets and bio-markers for disease diagnosis, therapy, and prognosis. Annotating and analysing ncRNA-disease association data are essential but challenging. Current computational resources lack comprehensive database platforms to consistently interpret and prioritize ncRNA-disease association data for biomedical investigation and application. Here, we present the ncRPheno database platform (http://lilab2.sysu.edu.cn/ncrpheno), which comprehensively integrates and annotates ncRNA-disease association data and provides novel searches, visualizations, and utilities for association identification and validation. ncRPheno contains 482,751 non-redundant associations between 14,494 ncRNAs and 3,210 disease phenotypes across 11 species with supporting evidence in the literature. A scoring model was refined to prioritize the associations based on evidential metrics. Moreover, ncRPheno provides user-friendly web interfaces, novel visualizations, and programmatic access to enable easy exploration, analysis, and utilization of the association data. A case study through ncRPheno demonstrated a comprehensive landscape of ncRNAs dysregulation associated with 22 cancers and uncovered 821 cancer-associated common ncRNAs. As a unique database platform, ncRPheno outperforms the existing similar databases in terms of data coverage and utilities, and it will assist studies in encoding ncRNAs associated with phenotypes ranging from genetic disorders to complex diseases. Abbreviations APIs: application programming interfaces; circRNA: circular RNA; ECO: Evidence & Conclusion Ontology; EFO: Experimental Factor Ontology; FDR: false discovery rate; GO: Gene Ontology; GWAS: genome wide association studies; HPO: Human Phenotype Ontology; ICGC: International Cancer Genome Consortium; lncRNA: long noncoding RNA; miRNA: micro RNA; ncRNA: noncoding RNA; NGS: next generation sequencing; OMIM: Online Mendelian Inheritance in Man; piRNA: piwi-interacting RNA; snoRNA: small nucleolar RNA; TCGA: The Cancer Genome Atlas.",ncRPheno,0.996450782,Online Mendelian Inheritance in Man,0.861822203,ncRPheno,0.996450782,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/26/2020 +24185695,http://ndbserver.rutgers.edu,"The Nucleic Acid Database: new features and capabilities. The Nucleic Acid Database (NDB) (http://ndbserver.rutgers.edu) is a web portal providing access to information about 3D nucleic acid structures and their complexes. In addition to primary data, the NDB contains derived geometric data, classifications of structures and motifs, standards for describing nucleic acid features, as well as tools and software for the analysis of nucleic acids. A variety of search capabilities are available, as are many different types of reports. This article describes the recent redesign of the NDB Web site with special emphasis on new RNA-derived data and annotations and their implementation and integration into the search capabilities.",NDB,0.992138386,Nucleic Acid Database,0.922201002,NDB,0.992138386,1,NA,33021634,NA,NA,NA,do not merge,NA,NA,NA,10/31/2013 +33021634,http://ndb.rice.edu,"The Nucleome Data Bank: web-based resources to simulate and analyze the three-dimensional genome. We introduce the Nucleome Data Bank (NDB), a web-based platform to simulate and analyze the three-dimensional (3D) organization of genomes. The NDB enables physics-based simulation of chromosomal structural dynamics through the MEGABASE + MiChroM computational pipeline. The input of the pipeline consists of epigenetic information sourced from the Encode database; the output consists of the trajectories of chromosomal motions that accurately predict Hi-C and fluorescence insitu hybridization data, as well as multiple observations of chromosomal dynamics in vivo. As an intermediate step, users can also generate chromosomal sub-compartment annotations directly from the same epigenetic input, without the use of any DNA-DNA proximity ligation data. Additionally, the NDB freely hosts both experimental and computational structural genomics data. Besides being able to perform their own genome simulations and download the hosted data, users can also analyze and visualize the same data through custom-designed web-based tools. In particular, the one-dimensional genetic and epigenetic data can be overlaid onto accurate 3D structures of chromosomes, to study the spatial distribution of genetic and epigenetic features. The NDB aims to be a shared resource to biologists, biophysicists and all genome scientists. The NDB is available at https://ndb.rice.edu.",NDB,0.968252078,Nucleome Data Bank,0.928795207,NDB,0.968252078,1,NA,24185695,low_prob_best_name,do not remove,NA,do not merge,NA,NA,NA,1/1/2021 +24297257,http://nectarmutation.org,"NECTAR: a database of codon-centric missense variant annotations. NECTAR (Non-synonymous Enriched Coding muTation ARchive; http://nectarmutation.org) is a database and web application to annotate disease-related and functionally important amino acids in human proteins. A number of tools are available to facilitate the interpretation of DNA variants identified in diagnostic or research sequencing. These typically identify previous reports of DNA variation at a given genomic location, predict its effects on transcript and protein sequence and may predict downstream functional consequences. Previous reports and functional annotations are typically linked by the genomic location of the variant observed. NECTAR collates disease-causing variants and functionally important amino acid residues from a number of sources. Importantly, rather than simply linking annotations by a shared genomic location, NECTAR annotates variants of interest with details of previously reported variation affecting the same codon. This provides a much richer data set for the interpretation of a novel DNA variant. NECTAR also identifies functionally equivalent amino acid residues in evolutionarily related proteins (paralogues) and, where appropriate, transfers annotations between them. As well as accessing these data through a web interface, users can upload batches of variants in variant call format (VCF) for annotation on-the-fly. The database is freely available to download from the ftp site: ftp://ftp.nectarmutation.org.",NECTAR,0.995319307,Non-synonymous Enriched Coding muTation ARchive,0.943871379,NECTAR,0.995319307,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2013 +24214996,http://mips.helmholtz-muenchen.de/proj/ppi/negatome,"Negatome 2.0: a database of non-interacting proteins derived by literature mining, manual annotation and protein structure analysis. Knowledge about non-interacting proteins (NIPs) is important for training the algorithms to predict protein-protein interactions (PPIs) and for assessing the false positive rates of PPI detection efforts. We present the second version of Negatome, a database of proteins and protein domains that are unlikely to engage in physical interactions (available online at http://mips.helmholtz-muenchen.de/proj/ppi/negatome). Negatome is derived by manual curation of literature and by analyzing three-dimensional structures of protein complexes. The main methodological innovation in Negatome 2.0 is the utilization of an advanced text mining procedure to guide the manual annotation process. Potential non-interactions were identified by a modified version of Excerbt, a text mining tool based on semantic sentence analysis. Manual verification shows that nearly a half of the text mining results with the highest confidence values correspond to NIP pairs. Compared to the first version the contents of the database have grown by over 300%.",Negatome,0.987791359,NA,0,Negatome,0.987791359,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2013 +27017950,http://neisseria.um.edu.my,"NeisseriaBase: a specialised Neisseria genomic resource and analysis platform. Background. The gram-negative Neisseria is associated with two of the most potent human epidemic diseases: meningococcal meningitis and gonorrhoea. In both cases, disease is caused by bacteria colonizing human mucosal membrane surfaces. Overall, the genus shows great diversity and genetic variation mainly due to its ability to acquire and incorporate genetic material from a diverse range of sources through horizontal gene transfer. Although a number of databases exist for the Neisseria genomes, they are mostly focused on the pathogenic species. In this present study we present the freely available NeisseriaBase, a database dedicated to the genus Neisseria encompassing the complete and draft genomes of 15 pathogenic and commensal Neisseria species. Methods. The genomic data were retrieved from National Center for Biotechnology Information (NCBI) and annotated using the RAST server which were then stored into the MySQL database. The protein-coding genes were further analyzed to obtain information such as calculation of GC content (%), predicted hydrophobicity and molecular weight (Da) using in-house Perl scripts. The web application was developed following the secure four-tier web application architecture: (1) client workstation, (2) web server, (3) application server, and (4) database server. The web interface was constructed using PHP, JavaScript, jQuery, AJAX and CSS, utilizing the model-view-controller (MVC) framework. The in-house developed bioinformatics tools implemented in NeisseraBase were developed using Python, Perl, BioPerl and R languages. Results. Currently, NeisseriaBase houses 603,500 Coding Sequences (CDSs), 16,071 RNAs and 13,119 tRNA genes from 227 Neisseria genomes. The database is equipped with interactive web interfaces. Incorporation of the JBrowse genome browser in the database enables fast and smooth browsing of Neisseria genomes. NeisseriaBase includes the standard BLAST program to facilitate homology searching, and for Virulence Factor Database (VFDB) specific homology searches, the VFDB BLAST is also incorporated into the database. In addition, NeisseriaBase is equipped with in-house designed tools such as the Pairwise Genome Comparison tool (PGC) for comparative genomic analysis and the Pathogenomics Profiling Tool (PathoProT) for the comparative pathogenomics analysis of Neisseria strains. Discussion. This user-friendly database not only provides access to a host of genomic resources on Neisseria but also enables high-quality comparative genome analysis, which is crucial for the expanding scientific community interested in Neisseria research. This database is freely available at http://neisseria.um.edu.my.",NeisseriaBase,0.990827322,NA,0,NeisseriaBase,0.990827322,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/17/2016 +22139919,http://nematode.net,"Nematode.net update 2011: addition of data sets and tools featuring next-generation sequencing data. Nematode.net (http://nematode.net) has been a publicly available resource for studying nematodes for over a decade. In the past 3 years, we reorganized Nematode.net to provide more user-friendly navigation through the site, a necessity due to the explosion of data from next-generation sequencing platforms. Organism-centric portals containing dynamically generated data are available for over 56 different nematode species. Next-generation data has been added to the various data-mining portals hosted, including NemaBLAST and NemaBrowse. The NemaPath metabolic pathway viewer builds associations using KOs, rather than ECs to provide more accurate and fine-grained descriptions of proteins. Two new features for data analysis and comparative genomics have been added to the site. NemaSNP enables the user to perform population genetics studies in various nematode populations using next-generation sequencing data. HelmCoP (Helminth Control and Prevention) as an independent component of Nematode.net provides an integrated resource for storage, annotation and comparative genomics of helminth genomes to aid in learning more about nematode genomes, as well as drug, pesticide, vaccine and drug target discovery. With this update, Nematode.net will continue to realize its original goal to disseminate diverse bioinformatic data sets and provide analysis tools to the broad scientific community in a useful and user-friendly manner.",Nematode.net,0.99133563,NA,0,Nematode.net,0.99133563,1,NA,25392426,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,12/1/2011 +25392426,"http://www.helminth.net, http://nematode.net","Helminth.net: expansions to Nematode.net and an introduction to Trematode.net. Helminth.net (http://www.helminth.net) is the new moniker for a collection of databases: Nematode.net and Trematode.net. Within this collection we provide services and resources for parasitic roundworms (nematodes) and flatworms (trematodes), collectively known as helminths. For over a decade we have provided resources for studying nematodes via our veteran site Nematode.net (http://nematode.net). In this article, (i) we provide an update on the expansions of Nematode.net that hosts omics data from 84 species and provides advanced search tools to the broad scientific community so that data can be mined in a useful and user-friendly manner and (ii) we introduce Trematode.net, a site dedicated to the dissemination of data from flukes, flatworm parasites of the class Trematoda, phylum Platyhelminthes. Trematode.net is an independent component of Helminth.net and currently hosts data from 16 species, with information ranging from genomic, functional genomic data, enzymatic pathway utilization to microbiome changes associated with helminth infections. The databases' interface, with a sophisticated query engine as a backbone, is intended to allow users to search for multi-factorial combinations of species' omics properties. This report describes updates to Nematode.net since its last description in NAR, 2012, and also introduces and presents its new sibling site, Trematode.net.",Nematode.net,0.755417025,NA,0,Nematode.net,0.755417025,1,NA,22139919,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,11/11/2014 +22419844,http://bif.uohyd.ac.in/nemedplant/orhttp://202.41.85.11/nemedplant,"NeMedPlant: a database of therapeutic applications and chemical constituents of medicinal plants from north-east region of India. Unlabelled The North-East region of India is one of the twelve mega biodiversity region, containing many rare and endangered species. A curated database of medicinal and aromatic plants from the regions called NeMedPlant is developed. The database contains traditional, scientific and medicinal information about plants and their active constituents, obtained from scholarly literature and local sources. The database is cross-linked with major biochemical databases and analytical tools. The integrated database provides resource for investigations into hitherto unexplored medicinal plants and serves to speed up the discovery of natural productsbased drugs. Availability The database is available for free at http://bif.uohyd.ac.in/nemedplant/orhttp://202.41.85.11/nemedplant/",NeMedPlant,0.994859576,NA,0,NeMedPlant,0.994859576,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/28/2012 +22833564,http://prodata.swmed.edu/LRNes,"NESdb: a database of NES-containing CRM1 cargoes. The leucine-rich nuclear export signal (NES) is the only known class of targeting signal that directs macromolecules out of the cell nucleus. NESs are short stretches of 8-15 amino acids with regularly spaced hydrophobic residues that bind the export karyopherin CRM1. NES-containing proteins are involved in numerous cellular and disease processes. We compiled a database named NESdb that contains 221 NES-containing CRM1 cargoes that were manually curated from the published literature. Each NESdb entry is annotated with information about sequence and structure of both the NES and the cargo protein, as well as information about experimental evidence of NES-mapping and CRM1-mediated nuclear export. NESdb will be updated regularly and will serve as an important resource for nuclear export signals. NESdb is freely available to nonprofit organizations at http://prodata.swmed.edu/LRNes.",NESdb,0.996034861,NA,0,NESdb,0.996034861,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/25/2012 +34630517,http://rbc-dsai-iitm.github.io/NetGenes,"NetGenes: A Database of Essential Genes Predicted Using Features From Interaction Networks. Essential gene prediction models built so far are heavily reliant on sequence-based features, and the scope of network-based features has been narrow. Previous work from our group demonstrated the importance of using network-based features for predicting essential genes with high accuracy. Here, we apply our approach for the prediction of essential genes to organisms from the STRING database and host the results in a standalone website. Our database, NetGenes, contains essential gene predictions for 2,700+ bacteria predicted using features derived from STRING protein-protein functional association networks. Housing a total of over 2.1 million genes, NetGenes offers various features like essentiality scores, annotations, and feature vectors for each gene. NetGenes database is available from https://rbc-dsai-iitm.github.io/NetGenes/.",NetGenes,0.995349348,NA,0,NetGenes,0.995349348,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/23/2021 +25527095,http://www.netgestalt.org,"Empowering biologists with multi-omics data: colorectal cancer as a paradigm. Motivation Recent completion of the global proteomic characterization of The Cancer Genome Atlas (TCGA) colorectal cancer (CRC) cohort resulted in the first tumor dataset with complete molecular measurements at DNA, RNA and protein levels. Using CRC as a paradigm, we describe the application of the NetGestalt framework to provide easy access and interpretation of multi-omics data. Results The NetGestalt CRC portal includes genomic, epigenomic, transcriptomic, proteomic and clinical data for the TCGA CRC cohort, data from other CRC tumor cohorts and cell lines, and existing knowledge on pathways and networks, giving a total of more than 17 million data points. The portal provides features for data query, upload, visualization and integration. These features can be flexibly combined to serve various needs of the users, maximizing the synergy among omics data, human visualization and quantitative analysis. Using three case studies, we demonstrate that the portal not only provides user-friendly data query and visualization but also enables efficient data integration within a single omics data type, across multiple omics data types, and over biological networks. Availability and implementation The NetGestalt CRC portal can be freely accessed at http://www.netgestalt.org. Contact bing.zhang@vanderbilt.edu Supplementary information Supplementary data are available at Bioinformatics online.",NetGestalt,0.99514091,NA,0,NetGestalt,0.99514091,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/18/2014 +21959865,http://www.netpath.org/netslim,"NetSlim: high-confidence curated signaling maps. We previously developed NetPath as a resource for comprehensive manually curated signal transduction pathways. The pathways in NetPath contain a large number of molecules and reactions which can sometimes be difficult to visualize or interpret given their complexity. To overcome this potential limitation, we have developed a set of more stringent curation and inclusion criteria for pathway reactions to generate high-confidence signaling maps. NetSlim is a new resource that contains this 'core' subset of reactions for each pathway for easy visualization and manipulation. The pathways in NetSlim are freely available at http://www.netpath.org/netslim.",NetPath,0.996446371,NA,0,NetPath,0.996446371,1,NA,"30084000.0, 21996254.0, 27139435.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,9/29/2011 +"21996254, 27139435",http://www.netpath.org,"A comprehensive curated resource for follicle stimulating hormone signaling. Background Follicle stimulating hormone (FSH) is an important hormone responsible for growth, maturation and function of the human reproductive system. FSH regulates the synthesis of steroid hormones such as estrogen and progesterone, proliferation and maturation of follicles in the ovary and spermatogenesis in the testes. FSH is a glycoprotein heterodimer that binds and acts through the FSH receptor, a G-protein coupled receptor. Although online pathway repositories provide information about G-protein coupled receptor mediated signal transduction, the signaling events initiated specifically by FSH are not cataloged in any public database in a detailed fashion. Findings We performed comprehensive curation of the published literature to identify the components of FSH signaling pathway and the molecular interactions that occur upon FSH receptor activation. Our effort yielded 64 reactions comprising 35 enzyme-substrate reactions, 11 molecular association events, 11 activation events and 7 protein translocation events that occur in response to FSH receptor activation. We also cataloged 265 genes, which were differentially expressed upon FSH stimulation in normal human reproductive tissues. Conclusions We anticipate that the information provided in this resource will provide better insights into the physiological role of FSH in reproductive biology, its signaling mediators and aid in further research in this area. The curated FSH pathway data is freely available through NetPath (http://www.netpath.org), a pathway resource developed previously by our group.",NetPath,0.888923526,NA,0,NetPath,0.888923526,2,30084000,"21959865.0, 30084000.0",low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,5/3/2016 +30084000,"http://www.netpath.org, http://www.netpath.org/pathways?path_id=NetPath_172","A network map of netrin receptor UNC5B-mediated signaling. UNC-5 Homolog B (UNC5B) is a member of the dependence receptor family. This family of receptors can induce two opposite intracellular signaling cascades depending on the presence or absence of the ligand and is thus capable of driving two opposing processes. UNC5B signaling has been implicated in several cancers, where it induces cell death in the absence of its ligand Netrin-1 and promotes cell survival in its presence. In addition, inhibition of Netrin-1 ligand has been reported to decrease invasiveness and angiogenesis in tumors. UNC5B signaling pathway has also been reported to be involved in several processes such as neural development, developmental angiogenesis and inflammatory processes. However, literature pertaining to UNC5B signaling is scarce and scattered. Considering the importance of UNC5B signaling, we developed a resource of signaling events mediated by UNC5B. Using data mined from published literature, we compiled an integrated pathway map consisting of 88 UNC5B-mediated signaling events and 55 proteins. These signaling events include 27 protein-protein interaction events, 33 catalytic events involving various post-translational modifications, 9 events of UNC5B-mediated protein activation/inhibition, 27 gene regulation events and 2 events of translocation. This pathway resource has been made available to the research community through NetPath ( http://www.netpath.org /), a manually curated resource of signaling pathways (Database URL: http://www.netpath.org/pathways?path_id=NetPath_172 ). The current resource provides a foundation for the understanding of UNC5B-mediated cellular responses. The development of resource will serve researchers to explore the mechanisms of UNC-5B signaling in cancers.",NetPath,0.885460615,NA,0,NetPath,0.885460615,1,"21996254.0, 27139435.0","21959865.0, 21996254.0, 27139435.0",low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,8/6/2018 +23203867,http://ophid.utoronto.ca/networx,"NetwoRx: connecting drugs to networks and phenotypes in Saccharomyces cerevisiae. Drug modes of action are complex and still poorly understood. The set of known drug targets is widely acknowledged to be biased and incomplete, and so gives only limited insight into the system-wide effects of drugs. But a high-throughput assay unique to yeast-barcode-based chemogenomic screens-can measure the individual drug response of every yeast deletion mutant in parallel. NetwoRx (http://ophid.utoronto.ca/networx) is the first resource to store data from these extremely valuable yeast chemogenomics experiments. In total, NetwoRx stores data on 5924 genes and 466 drugs. In addition, we applied data-mining approaches to identify yeast pathways, functions and phenotypes that are targeted by particular drugs, compute measures of drug-drug similarity and construct drug-phenotype networks. These data are all available to search or download through NetwoRx; users can search by drug name, gene name or gene set identifier. We also set up automated analysis routines in NetwoRx; users can query new gene sets against the entire collection of drug profiles and retrieve the drugs that target them. We demonstrate with use case examples how NetwoRx can be applied to target specific phenotypes, repurpose drugs using mode of action analysis, investigate bipartite networks and predict new drugs that affect yeast aging.",NetwoRx,0.998048842,NA,0,NetwoRx,0.998048842,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +26183225,http://genetics-db.neu.edu.tr,"Near East University Genetic Mutation Database (NEU-GD): The first mutation database of Northern Cyprus. The health care system is negatively affected by the genetic disorders that lead to an increasing rate of morbidity and neonatal deaths and affect adults as well. These create a substantial government's psychosocial and economic burden on clinicians, patients and their families with the advancement in the field of genetics. There has been a tremendous increase in the rate in which diseases associated with variant DNA sequences are being sought and identified. The goal behind the creation of Near East University Genetic Mutation Database (NEU-GD) is to map and apprehend the patterns of common genetic diversity in the human genetic makeup in order to accelerate the search for the genetic causes of human disease. NEU-GD will allow scientists to generate extraordinarily useful information such as allelic variations among population, and description of the genetic blueprint of mutations occurring in human beings. In this communication we report the construction of the first genetic mutation database for the people belonging to different ethnic groups living in North Cyprus (http://genetics-db.neu.edu.tr/). Therefore NEU-GD can serve as an important tool available online for molecular genetic testing of inherited disorder and persuade for further investigation of novel genetic disorders in North Cyprus population.",NEU-GD,0.990502596,Near East University Genetic Mutation Database,0.902750194,NEU-GD,0.990502596,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/14/2015 +23286825,http://bioschool.iitd.ac.in/NeuroDNet,"NeuroDNet - an open source platform for constructing and analyzing neurodegenerative disease networks. Background Genetic networks control cellular functions. Aberrations in normal cellular function are caused by mutations in genes that disrupt the fine tuning of genetic networks and cause disease or disorder. However, the large number of signalling molecules, genes and proteins that constitute such networks, and the consequent complexity of interactions, has restrained progress in research elucidating disease mechanisms. Hence, carrying out a systematic analysis of how diseases alter the character of these networks is important. We illustrate this through our work on neurodegenerative disease networks. We created a database, NeuroDNet, which brings together relevant information about signalling molecules, genes and proteins, and their interactions, for constructing neurodegenerative disease networks. Description NeuroDNet is a database with interactive tools that enables the creation of interaction networks for twelve neurodegenerative diseases under one portal for interrogation and analyses. It is the first of its kind, which enables the construction and analysis of neurodegenerative diseases through protein interaction networks, regulatory networks and Boolean networks. The database has a three-tier architecture - foundation, function and interface. The foundation tier contains the human genome data with 23857 protein-coding genes linked to more than 300 genes reported in clinical studies of neurodegenerative diseases. The database architecture was designed to retrieve neurodegenerative disease information seamlessly through the interface tier using specific functional information. Features of this database enable users to extract, analyze and display information related to a disease in many different ways. Conclusions The application of NeuroDNet was illustrated using three case studies. Through these case studies, the construction and analyses of a PPI network for angiogenin protein in amyotrophic lateral sclerosis, a signal-gene-protein interaction network for presenilin protein in Alzheimer's disease and a Boolean network for a mammalian cell cycle was demonstrated. NeuroDNet is accessible at http://bioschool.iitd.ac.in/NeuroDNet/.",NeuroDNet,0.995239019,NA,0,NeuroDNet,0.995239019,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/3/2013 +24229347,http://chibi.ubc.ca/neurogem,"NeuroGeM, a knowledgebase of genetic modifiers in neurodegenerative diseases. Background Neurodegenerative diseases (NDs) are characterized by the progressive loss of neurons in the human brain. Although the majority of NDs are sporadic, evidence is accumulating that they have a strong genetic component. Therefore, significant efforts have been made in recent years to not only identify disease-causing genes but also genes that modify the severity of NDs, so-called genetic modifiers. To date there exists no compendium that lists and cross-links genetic modifiers of different NDs. Description In order to address this need, we present NeuroGeM, the first comprehensive knowledgebase providing integrated information on genetic modifiers of nine different NDs in the model organisms D. melanogaster, C. elegans, and S. cerevisiae. NeuroGeM cross-links curated genetic modifier information from the different NDs and provides details on experimental conditions used for modifier identification, functional annotations, links to homologous proteins and color-coded protein-protein interaction networks to visualize modifier interactions. We demonstrate how this database can be used to generate new understanding through meta-analysis. For instance, we reveal that the Drosophila genes DnaJ-1, thread, Atx2, and mub are generic modifiers that affect multiple if not all NDs. Conclusion As the first compendium of genetic modifiers, NeuroGeM will assist experimental and computational scientists in their search for the pathophysiological mechanisms underlying NDs. http://chibi.ubc.ca/neurogem.",NeuroGeM,0.99263829,NA,0,NeuroGeM,0.99263829,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/14/2013 +28651363,http://neurommsig.scai.fraunhofer.de,"Multimodal mechanistic signatures for neurodegenerative diseases (NeuroMMSig): a web server for mechanism enrichment. Motivation The concept of a 'mechanism-based taxonomy of human disease' is currently replacing the outdated paradigm of diseases classified by clinical appearance. We have tackled the paradigm of mechanism-based patient subgroup identification in the challenging area of research on neurodegenerative diseases. Results We have developed a knowledge base representing essential pathophysiology mechanisms of neurodegenerative diseases. Together with dedicated algorithms, this knowledge base forms the basis for a 'mechanism-enrichment server' that supports the mechanistic interpretation of multiscale, multimodal clinical data. Availability and implementation NeuroMMSig is available at http://neurommsig.scai.fraunhofer.de/. Contact martin.hofmann-apitius@scai.fraunhofer.de. Supplementary information Supplementary data are available at Bioinformatics online.",NeuroMMSig,0.995421886,Multimodal mechanistic signatures for neurodegenerative diseases,0.627636355,NeuroMMSig,0.995421886,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2017 +30684219,http://yu-mbl-muscledb.com/NeuroMuscleDB,"NeuroMuscleDB: a Database of Genes Associated with Muscle Development, Neuromuscular Diseases, Ageing, and Neurodegeneration. Skeletal muscle is a highly complex, heterogeneous tissue that serves a multitude of biological functions in living organisms. With the advent of methods, such as microarrays, transcriptome analysis, and proteomics, studies have been performed at the genome level to gain insight of changes in the expression profiles of genes during different stages of muscle development and of associated diseases. In the present study, a database was conceived for the straightforward retrieval of information on genes involved in skeletal muscle formation, neuromuscular diseases (NMDs), ageing, and neurodegenerative disorders (NDs). The resulting database named NeuroMuscleDB ( http://yu-mbl-muscledb.com/NeuroMuscleDB ) is the result of a wide literature survey, database searches, and data curation. NeuroMuscleDB contains information of genes in Homo sapiens, Mus musculus, and Bos Taurus, and their promoter sequences and specified roles at different stages of muscle development and in associated myopathies. The database contains information on ~ 1102 genes, 6030 mRNAs, and 5687 proteins, and embedded analytical tools that can be used to perform tasks related to gene sequence usage. The authors believe NeuroMuscleDB provides a platform for obtaining desired information on genes related to myogenesis and their associations with various diseases (NMDs, ageing, and NDs). NeuroMuscleDB is freely available on the web at http://yu-mbl-muscledb.com/NeuroMuscleDB and supports all major browsers.",NeuroMuscleDB,0.997726917,NA,0,NeuroMuscleDB,0.997726917,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/25/2019 +21821666,http://proteomics.ucsd.edu/Software/NeuroPedia.html,"NeuroPedia: neuropeptide database and spectral library. Summary Neuropeptides are essential for cell-cell communication in neurological and endocrine physiological processes in health and disease. While many neuropeptides have been identified in previous studies, the resulting data has not been structured to facilitate further analysis by tandem mass spectrometry (MS/MS), the main technology for high-throughput neuropeptide identification. Many neuropeptides are difficult to identify when searching MS/MS spectra against large protein databases because of their atypical lengths (e.g. shorter/longer than common tryptic peptides) and lack of tryptic residues to facilitate peptide ionization/fragmentation. NeuroPedia is a neuropeptide encyclopedia of peptide sequences (including genomic and taxonomic information) and spectral libraries of identified MS/MS spectra of homolog neuropeptides from multiple species. Searching neuropeptide MS/MS data against known NeuroPedia sequences will improve the sensitivity of database search tools. Moreover, the availability of neuropeptide spectral libraries will also enable the utilization of spectral library search tools, which are known to further improve the sensitivity of peptide identification. These will also reinforce the confidence in peptide identifications by enabling visual comparisons between new and previously identified neuropeptide MS/MS spectra. Availability http://proteomics.ucsd.edu/Software/NeuroPedia.html Contact bandeira@ucsd.edu Supplementary information Supplementary materials are available at Bioinformatics online.",NeuroPedia,0.996082842,NA,0,NeuroPedia,0.996082842,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/5/2011 +25931458,http://isyslab.info/NeuroPep,"NeuroPep: a comprehensive resource of neuropeptides. Neuropeptides play a variety of roles in many physiological processes and serve as potential therapeutic targets for the treatment of some nervous-system disorders. In recent years, there has been a tremendous increase in the number of identified neuropeptides. Therefore, we have developed NeuroPep, a comprehensive resource of neuropeptides, which holds 5949 non-redundant neuropeptide entries originating from 493 organisms belonging to 65 neuropeptide families. In NeuroPep, the number of neuropeptides in invertebrates and vertebrates is 3455 and 2406, respectively. It is currently the most complete neuropeptide database. We extracted entries deposited in UniProt, the database (www.neuropeptides.nl) and NeuroPedia, and used text mining methods to retrieve entries from the MEDLINE abstracts and full text articles. All the entries in NeuroPep have been manually checked. 2069 of the 5949 (35%) neuropeptide sequences were collected from the scientific literature. Moreover, NeuroPep contains detailed annotations for each entry, including source organisms, tissue specificity, families, names, post-translational modifications, 3D structures (if available) and literature references. Information derived from these peptide sequences such as amino acid compositions, isoelectric points, molecular weight and other physicochemical properties of peptides are also provided. A quick search feature allows users to search the database with keywords such as sequence, name, family, etc., and an advanced search page helps users to combine queries with logical operators like AND/OR. In addition, user-friendly web tools like browsing, sequence alignment and mapping are also integrated into the NeuroPep database. Database URL: http://isyslab.info/NeuroPep",NeuroPep,0.99364078,NA,0,NeuroPep,0.99364078,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/29/2015 +22039101,http://newt-omics.mpi-bn.mpg.de,"Newt-omics: a comprehensive repository for omics data from the newt Notophthalmus viridescens. Notophthalmus viridescens, a member of the salamander family is an excellent model organism to study regenerative processes due to its unique ability to replace lost appendages and to repair internal organs. Molecular insights into regenerative events have been severely hampered by the lack of genomic, transcriptomic and proteomic data, as well as an appropriate database to store such novel information. Here, we describe 'Newt-omics' (http://newt-omics.mpi-bn.mpg.de), a database, which enables researchers to locate, retrieve and store data sets dedicated to the molecular characterization of newts. Newt-omics is a transcript-centred database, based on an Expressed Sequence Tag (EST) data set from the newt, covering ~50,000 Sanger sequenced transcripts and a set of high-density microarray data, generated from regenerating hearts. Newt-omics also contains a large set of peptides identified by mass spectrometry, which was used to validate 13,810 ESTs as true protein coding. Newt-omics is open to implement additional high-throughput data sets without changing the database structure. Via a user-friendly interface Newt-omics allows access to a huge set of molecular data without the need for prior bioinformatical expertise.",Newt-omics,0.960447629,NA,0,Newt-omics,0.960447629,1,25740498,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,10/27/2011 +25740498,http://newt-omics.mpi-bn.mpg.de,"Data mining in newt-omics, the repository for omics data from the newt. Salamanders are an excellent model organism to study regenerative processes due to their unique ability to regenerate lost appendages or organs. Straightforward bioinformatics tools to analyze and take advantage of the growing number of ""omics"" studies performed in salamanders were lacking so far. To overcome this limitation, we have generated a comprehensive data repository for the red-spotted newt Notophthalmus viridescens, named newt-omics, merging omics style datasets on the transcriptome and proteome level including expression values and annotations. The resource is freely available via a user-friendly Web-based graphical user interface ( http://newt-omics.mpi-bn.mpg.de) that allows access and queries to the database without prior bioinformatical expertise. The repository is updated regularly, incorporating new published datasets from omics technologies.",newt-omics,0.924527243,NA,0,newt-omics,0.924527243,1,22039101,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,1/1/2015 +24271398,http://www.nexontology.org)-an,"NeXO Web: the NeXO ontology database and visualization platform. The Network-extracted Ontology (NeXO) is a gene ontology inferred directly from large-scale molecular networks. While most ontologies are constructed through manual expert curation, NeXO uses a principled computational approach which integrates evidence from hundreds of thousands of individual gene and protein interactions to construct a global hierarchy of cellular components and processes. Here, we describe the development of the NeXO Web platform (http://www.nexontology.org)-an online database and graphical user interface for visualizing, browsing and performing term enrichment analysis using NeXO and the gene ontology. The platform applies state-of-the-art web technology and visualization techniques to provide an intuitive framework for investigating biological machinery captured by both data-driven and manually curated ontologies.",NeXO,0.978267789,NA,0,NeXO,0.978267789,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/23/2013 +"22139911, 25593349, 27899619, 31724716",http://www.nextprot.org,"neXtProt: a knowledge platform for human proteins. neXtProt (http://www.nextprot.org/) is a new human protein-centric knowledge platform. Developed at the Swiss Institute of Bioinformatics (SIB), it aims to help researchers answer questions relevant to human proteins. To achieve this goal, neXtProt is built on a corpus containing both curated knowledge originating from the UniProtKB/Swiss-Prot knowledgebase and carefully selected and filtered high-throughput data pertinent to human proteins. This article presents an overview of the database and the data integration process. We also lay out the key future directions of neXtProt that we consider the necessary steps to make neXtProt the one-stop-shop for all research projects focusing on human proteins.",neXtProt,0.98649776,NA,0,neXtProt,0.98649776,4,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +21884625,http://nfiregulome.ccr.buffalo.edu,"The NFI-Regulome Database: A tool for annotation and analysis of control regions of genes regulated by Nuclear Factor I transcription factors. Background Genome annotation plays an essential role in the interpretation and use of genome sequence information. While great strides have been made in the annotation of coding regions of genes, less success has been achieved in the annotation of the regulatory regions of genes, including promoters, enhancers/silencers, and other regulatory elements. One reason for this disparity in annotated information is that coding regions can be assessed using high-throughput techniques such as EST sequencing, while annotation of regulatory regions often requires a gene-by-gene approach. Results The NFI-Regulome database http://nfiregulome.ccr.buffalo.edu was designed to promote easy annotation of the regulatory regions of genes that contain binding sites for the NFI (Nuclear Factor I) family of transcription factors, using data from the published literature. Binding sites are annotated together with the sequence of the gene, obtained from the UCSC Genome site, and the locations of all binding sites for multiple genes can be displayed in a number of formats designed to facilitate inter-gene comparisons. Classes of genes based on expression pattern, disease involvement, or types of binding sites present can be readily compared in order to assess common ""architectural"" structures in the regulatory regions. Conclusions The NFI-Regulome database allows rapid display of the relative locations and number of transcription factor binding sites of individual or defined sets of genes that contain binding sites for NFI transcription factors. This database may in the future be expanded into a distributed database structure including other families of transcription factors. Such databases may be useful for identifying common regulatory structures in genes essential for organ development, tissue-specific gene expression or those genes related to specific diseases.",NFI-Regulome,0.677506616,Database,0.495260537,NFI-Regulome,0.677506616,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/20/2011 +23601383,http://www.fao.org/infoods/biodiversity/index_en.stm,"FAO/INFOODS food composition database for biodiversity. Nutrient content can vary as much between different varieties of the same foods, as they do among different foods. Knowledge of varietal differences can therefore mean the difference between nutrient adequacy and inadequacy. The FAO/INFOODS food composition database for biodiversity has been developed with analytical data for foods described at the level of variety, cultivar and breed, and for underutilized and wild foods. It contains 6411 food entries and values for 451 components together with the bibliographic references and other information. The database is in MS Excel format and can be downloaded free-of-charge from the INFOODS website http://www.fao.org/infoods/biodiversity/index_en.stm. It is intended to annually publish new editions, making these data available for national and regional food composition databases. This database could be used to raise the awareness, promote and investigate food biodiversity and help to better estimate nutrient intakes.",NFOODS,0.797613025,NA,0,NFOODS,0.797613025,1,23993619,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,9/13/2012 +33514746,http://nelumbo.biocloud.net,"Nelumbo genome database, an integrative resource for gene expression and variants of Nelumbo nucifera. Sacred lotus (Nelumbo nucifera, or lotus) is one of the most widely grown aquatic plant species with important uses, such as in water gardening and in vegetable and herbal medicine. A public genomic database of lotus would facilitate studies of lotus and other aquatic plant species. Here, we constructed an integrative database: the Nelumbo Genome Database (NGD, http://nelumbo.biocloud.net ). This database is a collection of the most updated lotus genome assembly and contains information on both gene expression in different tissues and coexpression networks. In the NGD, we also integrated genetic variants and key traits from our 62 newly sequenced lotus cultivars and 26 previously reported cultivars, which are valuable for lotus germplasm studies. As applications including BLAST, BLAT, Primer, Annotation Search, Variant and Trait Search are deployed, users can perform sequence analyses and gene searches via the NGD. Overall, the valuable genomic resources provided in the NGD will facilitate future studies on population genetics and molecular breeding of lotus.",NGD,0.874260724,Nelumbo Genome Database,0.871381362,NGD,0.874260724,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/29/2021 +22517761,http://bioinfo.mc.vanderbilt.edu/NGS/index.html,"NGS catalog: A database of next generation sequencing studies in humans. Next generation sequencing (NGS) technologies have been rapidly applied in biomedical and biological research since its advent only a few years ago, and they are expected to advance at an unprecedented pace in the following years. To provide the research community with a comprehensive NGS resource, we have developed the database Next Generation Sequencing Catalog (NGS Catalog, http://bioinfo.mc.vanderbilt.edu/NGS/index.html), a continually updated database that collects, curates and manages available human NGS data obtained from published literature. NGS Catalog deposits publication information of NGS studies and their mutation characteristics (SNVs, small insertions/deletions, copy number variations, and structural variants), as well as mutated genes and gene fusions detected by NGS. Other functions include user data upload, NGS general analysis pipelines, and NGS software. NGS Catalog is particularly useful for investigators who are new to NGS but would like to take advantage of these powerful technologies for their own research. Finally, based on the data deposited in NGS Catalog, we summarized features and findings from whole exome sequencing, whole genome sequencing, and transcriptome sequencing studies for human diseases or traits.",NGS,0.890904665,Next Generation Sequencing Catalog,0.871557927,NGS,0.890904665,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/19/2012 +"24271385, 27794041",http://bioinfo2.ugr.es/NGSmethDB,"NGSmethDB: an updated genome resource for high quality, single-cytosine resolution methylomes. The updated release of 'NGSmethDB' (http://bioinfo2.ugr.es/NGSmethDB) is a repository for single-base whole-genome methylome maps for the best-assembled eukaryotic genomes. Short-read data sets from NGS bisulfite-sequencing projects of cell lines, fresh and pathological tissues are first pre-processed and aligned to the corresponding reference genome, and then the cytosine methylation levels are profiled. One major improvement is the application of a unique bioinformatics protocol to all data sets, thereby assuring the comparability of all values with each other. We implemented stringent quality controls to minimize important error sources, such as sequencing errors, bisulfite failures, clonal reads or single nucleotide variants (SNVs). This leads to reliable and high-quality methylomes, all obtained under uniform settings. Another significant improvement is the detection in parallel of SNVs, which might be crucial for many downstream analyses (e.g. SNVs and differential-methylation relationships). A next-generation methylation browser allows fast and smooth scrolling and zooming, thus speeding data download/upload, at the same time requiring fewer server resources. Several data mining tools allow the comparison/retrieval of methylation levels in different tissues or genome regions. NGSmethDB methylomes are also available as native tracks through a UCSC hub, which allows comparison with a wide range of third-party annotations, in particular phenotype or disease annotations.",NGSmethDB,0.998551369,NA,0,NGSmethDB,0.998551369,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/27/2016 +34452955,http://databases.lovd.nl/shared/genes/SDHB,"International initiative for a curated SDHB variant database improving the diagnosis of hereditary paraganglioma and pheochromocytoma. BackgroundSDHB is one of the major genes predisposing to paraganglioma/pheochromocytoma (PPGL). Identifying pathogenic SDHB variants in patients with PPGL is essential to the management of patients and relatives due to the increased risk of recurrences, metastases and the emergence of non-PPGL tumours. In this context, the 'NGS and PPGL (NGSnPPGL) Study Group' initiated an international effort to collect, annotate and classify SDHB variants and to provide an accurate, expert-curated and freely available SDHB variant database. Methods A total of 223 distinct SDHB variants from 737 patients were collected worldwide. Using multiple criteria, each variant was first classified according to a 5-tier grouping based on American College of Medical Genetics and NGSnPPGL standardised recommendations and was then manually reviewed by a panel of experts in the field. Results This multistep process resulted in 23 benign/likely benign, 149 pathogenic/likely pathogenic variants and 51 variants of unknown significance (VUS). Expert curation reduced by half the number of variants initially classified as VUS. Variant classifications are publicly accessible via the Leiden Open Variation Database system (https://databases.lovd.nl/shared/genes/SDHB). Conclusion This international initiative by a panel of experts allowed us to establish a consensus classification for 223 SDHB variants that should be used as a routine tool by geneticists in charge of PPGL laboratory diagnosis. This accurate classification of SDHB genetic variants will help to clarify the diagnosis of hereditary PPGL and to improve the clinical care of patients and relatives with PPGL.",NGSnPPGL,0.645407125,NA,0,NGSnPPGL,0.645407125,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,8/27/2021 +25392405,http://nhprtr.org,"Tissue-specific transcriptome sequencing analysis expands the non-human primate reference transcriptome resource (NHPRTR). The non-human primate reference transcriptome resource (NHPRTR, available online at http://nhprtr.org/) aims to generate comprehensive RNA-seq data from a wide variety of non-human primates (NHPs), from lemurs to hominids. In the 2012 Phase I of the NHPRTR project, 19 billion fragments or 3.8 terabases of transcriptome sequences were collected from pools of ∼ 20 tissues in 15 species and subspecies. Here we describe a major expansion of NHPRTR by adding 10.1 billion fragments of tissue-specific RNA-seq data. For this effort, we selected 11 of the original 15 NHP species and subspecies and constructed total RNA libraries for the same ∼ 15 tissues in each. The sequence quality is such that 88% of the reads align to human reference sequences, allowing us to compute the full list of expression abundance across all tissues for each species, using the reads mapped to human genes. This update also includes improved transcript annotations derived from RNA-seq data for rhesus and cynomolgus macaques, two of the most commonly used NHP models and additional RNA-seq data compiled from related projects. Together, these comprehensive reference transcriptomes from multiple primates serve as a valuable community resource for genome annotation, gene dynamics and comparative functional analysis.",NHPRTR,0.997961,non-human primate reference transcriptome resource,0.973988083,NHPRTR,0.997961,1,23203872,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/11/2014 +24501396,http://www.css.cornell.edu/faculty/buckley/nifh.htm,"A comprehensive aligned nifH gene database: a multipurpose tool for studies of nitrogen-fixing bacteria. We describe a nitrogenase gene sequence database that facilitates analysis of the evolution and ecology of nitrogen-fixing organisms. The database contains 32 954 aligned nitrogenase nifH sequences linked to phylogenetic trees and associated sequence metadata. The database includes 185 linked multigene entries including full-length nifH, nifD, nifK and 16S ribosomal RNA (rRNA) gene sequences. Evolutionary analyses enabled by the multigene entries support an ancient horizontal transfer of nitrogenase genes between Archaea and Bacteria and provide evidence that nifH has a different history of horizontal gene transfer from the nifDK enzyme core. Further analyses show that lineages in nitrogenase cluster I and cluster III have different rates of substitution within nifD, suggesting that nifD is under different selection pressure in these two lineages. Finally, we find that that the genetic divergence of nifH and 16S rRNA genes does not correlate well at sequence dissimilarity values used commonly to define microbial species, as stains having <3% sequence dissimilarity in their 16S rRNA genes can have up to 23% dissimilarity in nifH. The nifH database has a number of uses including phylogenetic and evolutionary analyses, the design and assessment of primers/probes and the evaluation of nitrogenase sequence diversity. Database URL: http://www.css.cornell.edu/faculty/buckley/nifh.htm.",nifH,0.961427331,NA,0,nifH,0.961427331,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/5/2014 +26013919,http://molossinus.lab.nig.ac.jp/msmdb,"NIG_MoG: a mouse genome navigator for exploring intersubspecific genetic polymorphisms. The National Institute of Genetics Mouse Genome database (NIG_MoG; http://molossinus.lab.nig.ac.jp/msmdb/) primarily comprises the whole-genome sequence data of two inbred mouse strains, MSM/Ms and JF1/Ms. These strains were established at NIG and originated from the Japanese subspecies Mus musculus molossinus. NIG_MoG provides visualized genome polymorphism information, browsing single-nucleotide polymorphisms and short insertions and deletions in the genomes of MSM/Ms and JF1/Ms with respect to C57BL/6J (whose genome is predominantly derived from the West European subspecies M. m. domesticus). This allows users, especially wet-lab biologists, to intuitively recognize intersubspecific genome divergence in these mouse strains using visual data. The database also supports the in silico screening of bacterial artificial chromosome (BAC) clones that contain genomic DNA from MSM/Ms and the standard classical laboratory strain C57BL/6N. NIG_MoG is thus a valuable navigator for exploring mouse genome polymorphisms and BAC clones that are useful for studies of gene function and regulation based on intersubspecific genome divergence.",NIG_MoG,0.989151716,of Genetics Mouse Genome database,0.914421072,NIG_MoG,0.989151716,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/27/2015 +32090261,http://bioinfo.imtech.res.in/manojk/nipahvr,"NipahVR: a resource of multi-targeted putative therapeutics and epitopes for the Nipah virus. . Nipah virus (NiV) is an emerging and priority pathogen from the Paramyxoviridae family with a high fatality rate. It causes various diseases such as respiratory ailments and encephalitis and poses a great threat to humans and livestock. Despite various efforts, there is no approved antiviral treatment available. Therefore, to expedite and assist the research, we have developed an integrative resource NipahVR (http://bioinfo.imtech.res.in/manojk/nipahvr/) for the multi-targeted putative therapeutics and epitopes for NiV. It is structured into different sections, i.e. genomes, codon usage, phylogenomics, molecular diagnostic primers, therapeutics (siRNAs, sgRNAs, miRNAs) and vaccine epitopes (B-cell, CTL, MHC-I and -II binders). Most decisively, potentially efficient therapeutic regimens targeting different NiV proteins and genes were anticipated and projected. We hope this computational resource would be helpful in developing combating strategies against this deadly pathogen. Database URL: http://bioinfo.imtech.res.in/manojk/nipahvr/.",NipahVR,0.995369494,NA,0,NipahVR,0.995369494,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +27530928,http://nldb.hgc.jp,"NLDB: a database for 3D protein-ligand interactions in enzymatic reactions. NLDB (Natural Ligand DataBase; URL: http://nldb.hgc.jp ) is a database of automatically collected and predicted 3D protein-ligand interactions for the enzymatic reactions of metabolic pathways registered in KEGG. Structural information about these reactions is important for studying the molecular functions of enzymes, however a large number of the 3D interactions are still unknown. Therefore, in order to complement such missing information, we predicted protein-ligand complex structures, and constructed a database of the 3D interactions in reactions. NLDB provides three different types of data resources; the natural complexes are experimentally determined protein-ligand complex structures in PDB, the analog complexes are predicted based on known protein structures in a complex with a similar ligand, and the ab initio complexes are predicted by docking simulations. In addition, NLDB shows the known polymorphisms found in human genome on protein structures. The database has a flexible search function based on various types of keywords, and an enrichment analysis function based on a set of KEGG compound IDs. NLDB will be a valuable resource for experimental biologists studying protein-ligand interactions in specific reactions, and for theoretical researchers wishing to undertake more precise simulations of interactions.",NLDB,0.996441364,Natural Ligand DataBase,0.982187194,NLDB,0.996441364,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/16/2016 +29106588,http://rostlab.org/services/nlsdb,"NLSdb-major update for database of nuclear localization signals and nuclear export signals. NLSdb is a database collecting nuclear export signals (NES) and nuclear localization signals (NLS) along with experimentally annotated nuclear and non-nuclear proteins. NES and NLS are short sequence motifs related to protein transport out of and into the nucleus. The updated NLSdb now contains 2253 NLS and introduces 398 NES. The potential sets of novel NES and NLS have been generated by a simple 'in silico mutagenesis' protocol. We started with motifs annotated by experiments. In step 1, we increased specificity such that no known non-nuclear protein matched the refined motif. In step 2, we increased the sensitivity trying to match several different families with a motif. We then iterated over steps 1 and 2. The final set of 2253 NLS motifs matched 35% of 8421 experimentally verified nuclear proteins (up from 21% for the previous version) and none of 18 278 non-nuclear proteins. We updated the web interface providing multiple options to search protein sequences for NES and NLS motifs, and to evaluate your own signal sequences. NLSdb can be accessed via Rostlab services at: https://rostlab.org/services/nlsdb/.",NLSdb,0.997089744,NA,0,NLSdb,0.997089744,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +21216786,http://libaio.biol.mcgill.ca/lps-annotate.html,"LPS-annotate: complete annotation of compositionally biased regions in the protein knowledgebase. Compositional bias (i.e. a skew in the composition of a biological sequence towards a subset of residue types) can occur at a wide variety of scales, from compositional biases of whole genomes, down to short regions in individual protein and gene-DNA sequences that are compositionally biased (CB regions). Such CB regions are made from a subset of residue types that are strewn along the length of the region in an irregular way. Here, we have developed the database server LPS-annotate, for the analysis of such CB regions, and protein disorder in protein sequences. The algorithm defines compositional bias through a thorough search for lowest-probability subsequences (LPSs) (i.e., the least likely sequence regions in terms of composition). Users can (i) initially annotate CB regions in input protein or nucleotide sequences of interest, and then (ii) query a database of greater than 1,500,000 pre-calculated protein-CB regions, for investigation of further functional hypotheses and inferences, about the specific CB regions that were discovered, and their protein disorder propensities. We demonstrate how a user can search for CB regions of similar compositional bias and protein disorder, with a worked example. We show that our annotations substantially augment the CB-region annotations that already exist in the UniProt database, with more comprehensive annotation of more complex CB regions. Our analysis indicates tens of thousands of CB regions that do not comprise globular domains or transmembrane domains, and that do not have a propensity to protein disorder, indicating a large cohort of protein-CB regions of biophysically uncharacterized types. This server and database is a conceptually novel addition to the workbench of tools now available to molecular biologists to generate hypotheses and inferences about the proteins that they are investigating. It can be accessed at http://libaio.biol.mcgill.ca/lps-annotate.html. Database URL: http://libaio.biol.mcgill.ca/lps-annotate.html.",nnotate,0.606898546,NA,0,nnotate,0.606898546,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/6/2011 +"23161694, 25361968",http://www.genenames.org,"Genenames.org: the HGNC resources in 2013. The HUGO Gene Nomenclature Committee situated at the European Bioinformatics Institute assigns unique symbols and names to human genes. Since 2011, the data within our database has expanded largely owing to an increase in naming pseudogenes and non-coding RNA genes, and we now have >33,500 approved symbols. Our gene families and groups have also increased to nearly 500, with ∼45% of our gene entries associated to at least one family or group. We have also redesigned the HUGO Gene Nomenclature Committee website http://www.genenames.org creating a constant look and feel across the site and improving usability and readability for our users. The site provides a public access portal to our database with no restrictions imposed on access or the use of the data. Within this article, we review our online resources and data with particular emphasis on the updates to our website.",HUGO,0.656705081,Nomenclature,0.687022626,Nomenclature,0.687022626,2,"27799471.0, 23245209.0, 30304474.0",34310736,low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,10/31/2014 +34310736,http://allergen.org,"Newly defined allergens in the WHO/IUIS Allergen Nomenclature Database during 01/2019-03/2021. The WHO/IUIS Allergen Nomenclature Database (http://allergen.org) provides up-to-date expert-reviewed data on newly discovered allergens and their unambiguous nomenclature to allergen researchers worldwide. This review discusses the 106 allergens that were accepted by the Allergen Nomenclature Sub-Committee between 01/2019 and 03/2021. Information about protein family membership, patient cohorts, and assays used for allergen characterization is summarized. A first allergenic fungal triosephosphate isomerase, Asp t 36, was discovered in Aspergillus terreus. Plant allergens contained 1 contact, 38 respiratory, and 16 food allergens. Can s 4 from Indian hemp was identified as the first allergenic oxygen-evolving enhancer protein 2 and Cic a 1 from chickpeas as the first allergenic group 4 late embryogenesis abundant protein. Among the animal allergens were 19 respiratory, 28 food, and 3 venom allergens. Important discoveries include Rap v 2, an allergenic paramyosin in molluscs, and Sal s 4 and Pan h 4, allergenic fish tropomyosins. Paramyosins and tropomyosins were previously known mainly as arthropod allergens. Collagens from barramundi, Lat c 6, and salmon, Sal s 6, were the first members from the collagen superfamily added to the database. In summary, the addition of 106 new allergens to the previously listed 930 allergens reflects the continuous linear growth of the allergen database. In addition, 17 newly described allergen sources were included.",NA,0,Nomenclature,0.619582832,Nomenclature,0.619582832,1,NA,"23161694.0, 25361968.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,8/5/2021 +23125372,http://nonb.abcc.ncifcrf.gov,"Non-B DB v2.0: a database of predicted non-B DNA-forming motifs and its associated tools. The non-B DB, available at http://nonb.abcc.ncifcrf.gov, catalogs predicted non-B DNA-forming sequence motifs, including Z-DNA, G-quadruplex, A-phased repeats, inverted repeats, mirror repeats, direct repeats and their corresponding subsets: cruciforms, triplexes and slipped structures, in several genomes. Version 2.0 of the database revises and re-implements the motif discovery algorithms to better align with accepted definitions and thresholds for motifs, expands the non-B DNA-forming motifs coverage by including short tandem repeats and adds key visualization tools to compare motif locations relative to other genomic annotations. Non-B DB v2.0 extends the ability for comparative genomics by including re-annotation of the five organisms reported in non-B DB v1.0, human, chimpanzee, dog, macaque and mouse, and adds seven additional organisms: orangutan, rat, cow, pig, horse, platypus and Arabidopsis thaliana. Additionally, the non-B DB v2.0 provides an overall improved graphical user interface and faster query performance.",non-B,0.956009358,NA,0,non-B,0.956009358,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/3/2012 +23203872,http://nhprtr.org,"The non-human primate reference transcriptome resource (NHPRTR) for comparative functional genomics. RNA-based next-generation sequencing (RNA-Seq) provides a tremendous amount of new information regarding gene and transcript structure, expression and regulation. This is particularly true for non-coding RNAs where whole transcriptome analyses have revealed that the much of the genome is transcribed and that many non-coding transcripts have widespread functionality. However, uniform resources for raw, cleaned and processed RNA-Seq data are sparse for most organisms and this is especially true for non-human primates (NHPs). Here, we describe a large-scale RNA-Seq data and analysis infrastructure, the NHP reference transcriptome resource (http://nhprtr.org); it presently hosts data from12 species of primates, to be expanded to 15 species/subspecies spanning great apes, old world monkeys, new world monkeys and prosimians. Data are collected for each species using pools of RNA from comparable tissues. We provide data access in advance of its deposition at NCBI, as well as browsable tracks of alignments against the human genome using the UCSC genome browser. This resource will continue to host additional RNA-Seq data, alignments and assemblies as they are generated over the coming years and provide a key resource for the annotation of NHP genomes as well as informing primate studies on evolution, reproduction, infection, immunity and pharmacology.",NHPRTR,0.710883155,non-human primate reference transcriptome resource,0.920451568,non-human primate reference transcriptome resource,0.920451568,1,25392405,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,11/29/2012 +24573879,http://nonatobase.ufsc.br,"NONATObase: a database for Polychaeta (Annelida) from the Southwestern Atlantic Ocean. Networks can greatly advance data sharing attitudes by providing organized and useful data sets on marine biodiversity in a friendly and shared scientific environment. NONATObase, the interactive database on polychaetes presented herein, will provide new macroecological and taxonomic insights of the Southwestern Atlantic region. The database was developed by the NONATO network, a team of South American researchers, who integrated available information on polychaetes from between 5°N and 80°S in the Atlantic Ocean and near the Antarctic. The guiding principle of the database is to keep free and open access to data based on partnerships. Its architecture consists of a relational database integrated in the MySQL and PHP framework. Its web application allows access to the data from three different directions: species (qualitative data), abundance (quantitative data) and data set (reference data). The database has built-in functionality, such as the filter of data on user-defined taxonomic levels, characteristics of site, sample, sampler, and mesh size used. Considering that there are still many taxonomic issues related to poorly known regional fauna, a scientific committee was created to work out consistent solutions to current misidentifications and equivocal taxonomy status of some species. Expertise from this committee will be incorporated by NONATObase continually. The use of quantitative data was possible by standardization of a sample unit. All data, maps of distribution and references from a data set or a specified query can be visualized and exported to a commonly used data format in statistical analysis or reference manager software. The NONATO network has initialized with NONATObase, a valuable resource for marine ecologists and taxonomists. The database is expected to grow in functionality as it comes in useful, particularly regarding the challenges of dealing with molecular genetic data and tools to assess the effects of global environment change. Database URL: http://nonatobase.ufsc.br/.",NONATObase,0.994081736,NA,0,NONATObase,0.994081736,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/25/2014 +"24285305, 26586799, 29140524","http://www.bioinfo.org/noncode/, http://www.noncode.org","NONCODEv4: exploring the world of long non-coding RNA genes. NONCODE (http://www.bioinfo.org/noncode/) is an integrated knowledge database dedicated to non-coding RNAs (excluding tRNAs and rRNAs). Non-coding RNAs (ncRNAs) have been implied in diseases and identified to play important roles in various biological processes. Since NONCODE version 3.0 was released 2 years ago, discovery of novel ncRNAs has been promoted by high-throughput RNA sequencing (RNA-Seq). In this update of NONCODE, we expand the ncRNA data set by collection of newly identified ncRNAs from literature published in the last 2 years and integration of the latest version of RefSeq and Ensembl. Particularly, the number of long non-coding RNA (lncRNA) has increased sharply from 73 327 to 210 831. Owing to similar alternative splicing pattern to mRNAs, the concept of lncRNA genes was put forward to help systematic understanding of lncRNAs. The 56 018 and 46 475 lncRNA genes were generated from 95 135 and 67 628 lncRNAs for human and mouse, respectively. Additionally, we present expression profile of lncRNA genes by graphs based on public RNA-seq data for human and mouse, as well as predict functions of these lncRNA genes. The improvements brought to the database also include an incorporation of an ID conversion tool from RefSeq or Ensembl ID to NONCODE ID and a service of lncRNA identification. NONCODE is also accessible through http://www.noncode.org/.",NONCODE,0.997087181,NA,0,NONCODE,0.997087181,3,NA,"22135294.0, 33196801.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2018 +"22135294, 33196801",http://www.noncode.org,"NONCODE v3.0: integrative annotation of long noncoding RNAs. Facilitated by the rapid progress of high-throughput sequencing technology, a large number of long noncoding RNAs (lncRNAs) have been identified in mammalian transcriptomes over the past few years. LncRNAs have been shown to play key roles in various biological processes such as imprinting control, circuitry controlling pluripotency and differentiation, immune responses and chromosome dynamics. Notably, a growing number of lncRNAs have been implicated in disease etiology. With the increasing number of published lncRNA studies, the experimental data on lncRNAs (e.g. expression profiles, molecular features and biological functions) have accumulated rapidly. In order to enable a systematic compilation and integration of this information, we have updated the NONCODE database (http://www.noncode.org) to version 3.0 to include the first integrated collection of expression and functional lncRNA data obtained from re-annotated microarray studies in a single database. NONCODE has a user-friendly interface with a variety of search or browse options, a local Genome Browser for visualization and a BLAST server for sequence-alignment search. In addition, NONCODE provides a platform for the ongoing collation of ncRNAs reported in the literature. All data in NONCODE are open to users, and can be downloaded through the website or obtained through the SOAP API and DAS services.",NONCODE,0.990737677,NA,0,NONCODE,0.990737677,2,NA,"24285305.0, 26586799.0, 29140524.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2021 +21491493,http://recerca.upc.edu/imem/index.htm,"Integrating the intrinsic conformational preferences of noncoded α-amino acids modified at the peptide bond into the noncoded amino acids database. Recently, we reported a database (Noncoded Amino acids Database; http://recerca.upc.edu/imem/index.htm) that was built to compile information about the intrinsic conformational preferences of nonproteinogenic residues determined by quantum mechanical calculations, as well as bibliographic information about their synthesis, physical and spectroscopic characterization, the experimentally established conformational propensities, and applications (Revilla-López et al., J Phys Chem B 2010;114:7413-7422). The database initially contained the information available for α-tetrasubstituted α-amino acids. In this work, we extend NCAD to three families of compounds, which can be used to engineer peptides and proteins incorporating modifications at the--NHCO--peptide bond. Such families are: N-substituted α-amino acids, thio-α-amino acids, and diamines and diacids used to build retropeptides. The conformational preferences of these compounds have been analyzed and described based on the information captured in the database. In addition, we provide an example of the utility of the database and of the compounds it compiles in protein and peptide engineering. Specifically, the symmetry of a sequence engineered to stabilize the 3(10)-helix with respect to the α-helix has been broken without perturbing significantly the secondary structure through targeted replacements using the information contained in the database.",NCAD,0.637544513,Noncoded Amino acids Database,0.975250321,Noncoded Amino acids Database,0.975250321,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/12/2011 +26721496,"http://www.noncode.org/, http://www.bioinfo.org","NONCODEv4: Annotation of Noncoding RNAs with Emphasis on Long Noncoding RNAs. The rapid development of high-throughput sequencing technologies and bioinformatics algorithms now enables detection and profiling of a large number of noncoding transcripts. Long noncoding RNAs (lncRNAs), which are longer than 200 nucleotides, are accumulating with important roles involved in biological processes and tissue physiology. In this chapter, we describe the use of NONCODEv4, a database that provide a comprehensive catalog of noncoding RNAs with particularly detailed annotations for lncRNAs. NONCODEv4 stores more than half million transcripts, of which more than 200,000 are lncRNAs. NONCODEv4 raises the concept of lncRNA genes and explores their expression and functions based on public transcriptome data. NONCODEv4 also integrated a series of online tools and have a web interface easy to use. NONCODEv4 is available at http://www.noncode.org/ http://www.bioinfo.org/ noncode.",NONCODEv4,0.99214226,NA,0,NONCODEv4,0.99214226,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2016 +32111231,http://www.ncdtcdb.cn:8080/NoncoRNA,"NoncoRNA: a database of experimentally supported non-coding RNAs and drug targets in cancer. NoncoRNA (http://www.ncdtcdb.cn:8080/NoncoRNA/) is a manually curated database of experimentally supported non-coding RNAs (ncRNAs) and drug target associations that aim to potentially provide a high-quality data resource for exploring drug sensitivity/resistance-related ncRNAs in various human cancers. ncRNA are RNA molecular that do not encode proteins, but are involved in gene regulation and cellular functions in variety of human diseases, including neurodegenerative diseases and cancers. Here, we developed NoncoRNA which contained 8233 entries between 5568 ncRNAs and 154 drugs in 134 cancers. Each entry in the NoncoRNA contains detailed information on the ncRNAs, drugs, and cancers, the ncRNA expression pattern and experimental detection techniques, drug response and other targets, literature references, and other information. NoncoRNA offers a user-friendly, open access web interface to easily browse, search, and download data. NoncoRNA also provides a submission page for researchers to submit newly validated ncRNA-drug-cancer associations. NoncoRNA might serve as an immeasurable resource for understanding the roles of ncRNAs in cancer therapy.",NoncoRNA,0.997252762,NA,0,NoncoRNA,0.997252762,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/28/2020 +29082924,http://bioinfo.lifl.fr/NRP,"Norine: A powerful resource for novel nonribosomal peptide discovery. Since its first release in 2008, Norine remains the unique resource completely devoted to nonribosomal peptides (NRPs). They are very attractive microbial secondary metabolites, displaying a remarkable diversity of structure and functions. Norine (http://bioinfo.lifl.fr/NRP) includes a database now containing more than 1160 annotated peptides and user-friendly interfaces enabling the querying of the database, through the annotations or the structure of the peptides. Dedicated tools are associated for structural comparison of the compounds and prediction of their biological activities. In this paper, we start by describing the knowledgebase and the dedicated tools. We then present some user cases to show how useful Norine is for the discovery of novel nonribosomal peptides.",Norine,0.987097681,NA,0,Norine,0.987097681,1,NA,31691799,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,6/1/2016 +31691799,http://bioinfo.cristal.univ-lille.fr/norine,"Norine: update of the nonribosomal peptide resource. Norine, the unique resource dedicated to nonribosomal peptides (NRPs), is now updated with a new pipeline to automate massive sourcing and enhance annotation. External databases are mined to extract NRPs that are not yet in Norine. To maintain a high data quality, successive filters are applied to automatically validate the NRP annotations and only validated data is inserted in the database. External databases were also used to complete annotations of NRPs already in Norine. Besides, annotation consistency inside Norine and between Norine and external sources have reported annotation errors. Some can be corrected automatically, while others need manual curation. This new approach led to the insertion of 539 new NRPs and the addition or correction of annotations of nearly all Norine entries. Two new tools to analyse the chemical structures of NRPs (rBAN) and to infer a molecular formula from the mass-to-charge ratio of an NRP (Kendrick Formula Predictor) were also integrated. Norine is freely accessible from the following URL: https://bioinfo.cristal.univ-lille.fr/norine/.",Norine,0.944806576,NA,0,Norine,0.944806576,1,NA,29082924,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2020 +22581809,http://www.nirs.go.jp/db/anzendb/NORMDB/ENG/index.php,"Database of the radioactivity of norm used as industrial raw materials. Most ores used as industrial raw materials are imported mainly because Japan has poor natural resources. The activity concentrations in these materials should be investigated to evaluate the radiation exposure of workers. In this study, imported industrial raw materials were collected, and the activity concentrations in these resources were measured by using inductively coupled plasma mass spectrometry and gamma ray spectrometry. Furthermore,  a database of activity concentrations of NORMs was developed by referring to the measured results as well as referring to the literature, and a database on the web was published. The purpose of the database is to relieve anxieties among the general public and to provide extensive data regarding NORM for researchers and regulators. The database provides more than 900 activity concentrations in worldwide NORMs at no fee. (NORM database; http://www.nirs.go.jp/db/anzendb/NORMDB/ENG/index.php).",NORM,0.674789657,NA,0,NORM,0.674789657,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/11/2012 +27285615,http://nepiac.nehu.ac.in/index.php,"Northeast India Helminth Parasite Information Database (NEIHPID): Knowledge Base for Helminth Parasites. Most metazoan parasites that invade vertebrate hosts belong to three phyla: Platyhelminthes, Nematoda and Acanthocephala. Many of the parasitic members of these phyla are collectively known as helminths and are causative agents of many debilitating, deforming and lethal diseases of humans and animals. The North-East India Helminth Parasite Information Database (NEIHPID) project aimed to document and characterise the spectrum of helminth parasites in the north-eastern region of India, providing host, geographical distribution, diagnostic characters and image data. The morphology-based taxonomic data are supplemented with information on DNA sequences of nuclear, ribosomal and mitochondrial gene marker regions that aid in parasite identification. In addition, the database contains raw next generation sequencing (NGS) data for 3 foodborne trematode parasites, with more to follow. The database will also provide study material for students interested in parasite biology. Users can search the database at various taxonomic levels (phylum, class, order, superfamily, family, genus, and species), or by host, habitat and geographical location. Specimen collection locations are noted as co-ordinates in a MySQL database and can be viewed on Google maps, using Google Maps JavaScript API v3. The NEIHPID database has been made freely available at http://nepiac.nehu.ac.in/index.php.",NEIHPID,0.965815055,Northeast India Helminth Parasite Information Database,0.971249071,Northeast India Helminth Parasite Information Database,0.971249071,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/10/2016 +25931459,http://www.nii.ac.in/novptmenzy.html,"novPTMenzy: a database for enzymes involved in novel post-translational modifications. With the recent discoveries of novel post-translational modifications (PTMs) which play important roles in signaling and biosynthetic pathways, identification of such PTM catalyzing enzymes by genome mining has been an area of major interest. Unlike well-known PTMs like phosphorylation, glycosylation, SUMOylation, no bioinformatics resources are available for enzymes associated with novel and unusual PTMs. Therefore, we have developed the novPTMenzy database which catalogs information on the sequence, structure, active site and genomic neighborhood of experimentally characterized enzymes involved in five novel PTMs, namely AMPylation, Eliminylation, Sulfation, Hydroxylation and Deamidation. Based on a comprehensive analysis of the sequence and structural features of these known PTM catalyzing enzymes, we have created Hidden Markov Model profiles for the identification of similar PTM catalyzing enzymatic domains in genomic sequences. We have also created predictive rules for grouping them into functional subfamilies and deciphering their mechanistic details by structure-based analysis of their active site pockets. These analytical modules have been made available as user friendly search interfaces of novPTMenzy database. It also has a specialized analysis interface for some PTMs like AMPylation and Eliminylation. The novPTMenzy database is a unique resource that can aid in discovery of unusual PTM catalyzing enzymes in newly sequenced genomes. Database URL: http://www.nii.ac.in/novptmenzy.html",novPTMenzy,0.971780777,NA,0,novPTMenzy,0.971780777,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/29/2015 +23203877,http://crdd.osdd.net/raghava/npact,"NPACT: Naturally Occurring Plant-based Anti-cancer Compound-Activity-Target database. Plant-derived molecules have been highly valued by biomedical researchers and pharmaceutical companies for developing drugs, as they are thought to be optimized during evolution. Therefore, we have collected and compiled a central resource Naturally Occurring Plant-based Anti-cancer Compound-Activity-Target database (NPACT, http://crdd.osdd.net/raghava/npact/) that gathers the information related to experimentally validated plant-derived natural compounds exhibiting anti-cancerous activity (in vitro and in vivo), to complement the other databases. It currently contains 1574 compound entries, and each record provides information on their structure, manually curated published data on in vitro and in vivo experiments along with reference for users referral, inhibitory values (IC(50)/ED(50)/EC(50)/GI(50)), properties (physical, elemental and topological), cancer types, cell lines, protein targets, commercial suppliers and drug likeness of compounds. NPACT can easily be browsed or queried using various options, and an online similarity tool has also been made available. Further, to facilitate retrieval of existing data, each record is hyperlinked to similar databases like SuperNatural, Herbal Ingredients' Targets, Comparative Toxicogenomics Database, PubChem and NCI-60 GI(50) data.",NPACT,0.997277677,Naturally Occurring Plant-based Anti-cancer Compound-Activity-Target database,0.987839665,NPACT,0.997277677,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +33306802,http://www.organchem.csdb.cn/scdb/NPBS,"NPBS database: a chemical data resource with relational data between natural products and biological sources. . NPBS (Natural Products & Biological Sources) database is a chemical data resource with relational data between natural products and biological sources, manually curated from literatures of natural product researches. The relational data link a specific species and all the natural products derived from it and contrarily link a specific natural product and all the biological sources. The biological sources cover diverse species of plant, bacterial, fungal and marine organisms; the natural molecules have proper chemical structure data and computable molecular properties and all the relational data have corresponding references. NPBS database provides a wider choice of biological sources and can be used for dereplication to prevent re-isolation and re-characterization of already known natural products. Database URL: http://www.organchem.csdb.cn/scdb/NPBS.",NPBS,0.996700525,Natural Products & Biological Sources,0.790207267,NPBS,0.996700525,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +28184254,http://silver.sejong.ac.kr/npcare,"NPCARE: database of natural products and fractional extracts for cancer regulation. Background Natural products have increasingly attracted much attention as a valuable resource for the development of anticancer medicines due to the structural novelty and good bioavailability. This necessitates a comprehensive database for the natural products and the fractional extracts whose anticancer activities have been verified. Description NPCARE (http://silver.sejong.ac.kr/npcare) is a publicly accessible online database of natural products and fractional extracts for cancer regulation. At NPCARE, one can explore 6578 natural compounds and 2566 fractional extracts isolated from 1952 distinct biological species including plants, marine organisms, fungi, and bacteria whose anticancer activities were validated with 1107 cell lines for 34 cancer types. Each entry in NPCARE is annotated with the cancer type, genus and species names of the biological resource, the cell line used for demonstrating the anticancer activity, PubChem ID, and a wealth of information about the target gene or protein. Besides the augmentation of plant entries up to 743 genus and 197 families, NPCARE is further enriched with the natural products and the fractional extracts of diverse non-traditional biological resources. Conclusions NPCARE is anticipated to serve as a dominant gateway for the discovery of new anticancer medicines due to the inclusion of a large number of the fractional extracts as well as the natural compounds isolated from a variety of biological resources.",NPCARE,0.992175639,NA,0,NPCARE,0.992175639,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/5/2017 +25178289,http://npcdb.snu.ac.kr,"Native Pig and Chicken Breed Database: NPCDB. Indigenous (native) breeds of livestock have higher disease resistance and adaptation to the environment due to high genetic diversity. Even though their extinction rate is accelerated due to the increase of commercial breeds, natural disaster, and civil war, there is a lack of well-established databases for the native breeds. Thus, we constructed the native pig and chicken breed database (NPCDB) which integrates available information on the breeds from around the world. It is a nonprofit public database aimed to provide information on the genetic resources of indigenous pig and chicken breeds for their conservation. The NPCDB (http://npcdb.snu.ac.kr/) provides the phenotypic information and population size of each breed as well as its specific habitat. In addition, it provides information on the distribution of genetic resources across the country. The database will contribute to understanding of the breed's characteristics such as disease resistance and adaptation to environmental changes as well as the conservation of indigenous genetic resources.",NPCDB,0.993397549,Native Pig and Chicken Breed Database,0.977203727,NPCDB,0.993397549,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2014 +26656949,http://npidb.belozersky.msu.ru,"An updated version of NPIDB includes new classifications of DNA-protein complexes and their families. The recent upgrade of nucleic acid-protein interaction database (NPIDB, http://npidb.belozersky.msu.ru/) includes a newly elaborated classification of complexes of protein domains with double-stranded DNA and a classification of families of related complexes. Our classifications are based on contacting structural elements of both DNA: the major groove, the minor groove and the backbone; and protein: helices, beta-strands and unstructured segments. We took into account both hydrogen bonds and hydrophobic interaction. The analyzed material contains 1942 structures of protein domains from 748 PDB entries. We have identified 97 interaction modes of individual protein domain-DNA complexes and 17 DNA-protein interaction classes of protein domain families. We analyzed the sources of diversity of DNA-protein interaction modes in different complexes of one protein domain family. The observed interaction mode is sometimes influenced by artifacts of crystallization or diversity in secondary structure assignment. The interaction classes of domain families are more stable and thus possess more biological sense than a classification of single complexes. Integration of the classification into NPIDB allows the user to browse the database according to the interacting structural elements of DNA and protein molecules. For each family, we present average DNA shape parameters in contact zones with domains of the family.",NPIDB,0.997779801,nucleic acid-protein interaction database,0.971818737,NPIDB,0.997779801,1,23193292,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,12/9/2015 +24217916,http://www.bioinfo.org/NPInter,"NPInter v2.0: an updated database of ncRNA interactions. NPInter (http://www.bioinfo.org/NPInter) is a database that integrates experimentally verified functional interactions between noncoding RNAs (excluding tRNAs and rRNAs) and other biomolecules (proteins, RNAs and genomic DNAs). Extensive studies on ncRNA interactions have shown that ncRNAs could act as part of enzymatic or structural complexes, gene regulators or other functional elements. With the development of high-throughput biotechnology, such as cross-linking immunoprecipitation and high-throughput sequencing (CLIP-seq), the number of known ncRNA interactions, especially those formed by protein binding, has grown rapidly in recent years. In this work, we updated NPInter to version 2.0 by collecting ncRNA interactions from recent literature and related databases, expanding the number of entries to 201 107 covering 18 species. In addition, NPInter v2.0 incorporated a service for the BLAST alignment search as well as visualization of interactions.",NPInter,0.993997335,NA,0,NPInter,0.993997335,1,NA,31670377,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/11/2013 +31670377,http://bigdata.ibp.ac.cn/npinter,"NPInter v4.0: an integrated database of ncRNA interactions. Noncoding RNAs (ncRNAs) play crucial regulatory roles in a variety of biological circuits. To document regulatory interactions between ncRNAs and biomolecules, we previously created the NPInter database (http://bigdata.ibp.ac.cn/npinter). Since the last version of NPInter was issued, a rapidly growing number of studies have reported novel interactions and accumulated numerous high-throughput interactome data. We have therefore updated NPInter to its fourth edition in which are integrated 600 000 new experimentally identified ncRNA interactions. ncRNA-DNA interactions derived from ChIRP-seq data and circular RNA interactions have been included in the database. Additionally, disease associations were annotated to the interacting molecules. The database website has also been redesigned with a more user-friendly interface and several additional functional modules. Overall, NPInter v4.0 now provides more comprehensive data and services for researchers working on ncRNAs and their interactions with other biomolecules.",NPInter,0.984493434,NA,0,NPInter,0.984493434,1,NA,24217916,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/1/2020 +30354114,http://nr-dbind.drugdesign.fr,"Nuclear Receptors Database Including Negative Data (NR-DBIND): A Database Dedicated to Nuclear Receptors Binding Data Including Negative Data and Pharmacological Profile. Nuclear receptors (NRs) are transcription factors that regulate gene expression in various physiological processes through their interactions with small hydrophobic molecules. They constitute an important class of targets for drugs and endocrine disruptors and are widely studied for both health and environment concerns. Since the integration of negative data can be critical for accurate modeling of ligand activity profiles, we manually collected and annotated NRs interaction data (positive and negative) through a sharp review of the corresponding literature. 15 116 positive and negative interactions data are provided for 28 NRs together with 593 PDB structures in the freely available Nuclear Receptors Database Including Negative Data ( http://nr-dbind.drugdesign.fr ). The NR-DBIND contains the most extensive information about interaction data on NRs, which should bring valuable information to chemists, biologists, pharmacologists and toxicologists.",NR-DBIND,0.99506779,Nuclear Receptors Database Including Negative Data,0.977114015,NR-DBIND,0.99506779,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2018 +25262355,http://proline.biochem.iisc.ernet.in/NRICHD,"NrichD database: sequence databases enriched with computationally designed protein-like sequences aid in remote homology detection. NrichD (http://proline.biochem.iisc.ernet.in/NRICHD/) is a database of computationally designed protein-like sequences, augmented into natural sequence databases that can perform hops in protein sequence space to assist in the detection of remote relationships. Establishing protein relationships in the absence of structural evidence or natural 'intermediately related sequences' is a challenging task. Recently, we have demonstrated that the computational design of artificial intermediary sequences/linkers is an effective approach to fill naturally occurring voids in protein sequence space. Through a large-scale assessment we have demonstrated that such sequences can be plugged into commonly employed search databases to improve the performance of routinely used sequence search methods in detecting remote relationships. Since it is anticipated that such data sets will be employed to establish protein relationships, two databases that have already captured these relationships at the structural and functional domain level, namely, the SCOP database and the Pfam database, have been 'enriched' with these artificial intermediary sequences. NrichD database currently contains 3,611,010 artificial sequences that have been generated between 27,882 pairs of families from 374 SCOP folds. The data sets are freely available for download. Additional features include the design of artificial sequences between any two protein families of interest to the user.",NrichD,0.996541321,NA,0,NrichD,0.996541321,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/27/2014 +24666037,http://nrlist.drugdesign.fr,"NRLiSt BDB, the manually curated nuclear receptors ligands and structures benchmarking database. Nuclear receptors (NRs) constitute an important class of drug targets. We created the most exhaustive NR-focused benchmarking database to date, the NRLiSt BDB (NRs ligands and structures benchmarking database). The 9905 compounds and 339 structures of the NRLiSt BDB are ready for structure-based and ligand-based virtual screening. In the present study, we detail the protocol used to generate the NRLiSt BDB and its features. We also give some examples of the errors that we found in ChEMBL that convinced us to manually review all original papers. Since extensive and manually curated experimental data about NR ligands and structures are provided in the NRLiSt BDB, it should become a powerful tool to assess the performance of virtual screening methods on NRs, to assist the understanding of NR's function and modulation, and to support the discovery of new drugs targeting NRs. NRLiSt BDB is freely available online at http://nrlist.drugdesign.fr .",NRLiSt BDB,0.936061025,NA,0,NRLiSt BDB,0.936061025,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/25/2014 +27899613,http://www.bio-bigdata.net/nsdna,"NSDNA: a manually curated database of experimentally supported ncRNAs associated with nervous system diseases. The Nervous System Disease NcRNAome Atlas (NSDNA) (http://www.bio-bigdata.net/nsdna/) is a manually curated database that provides comprehensive experimentally supported associations about nervous system diseases (NSDs) and noncoding RNAs (ncRNAs). NSDs represent a common group of disorders, some of which are characterized by high morbidity and disabilities. The pathogenesis of NSDs at the molecular level remains poorly understood. ncRNAs are a large family of functionally important RNA molecules. Increasing evidence shows that diverse ncRNAs play a critical role in various NSDs. Mining and summarizing NSD-ncRNA association data can help researchers discover useful information. Hence, we developed an NSDNA database that documents 24 713 associations between 142 NSDs and 8593 ncRNAs in 11 species, curated from more than 1300 articles. This database provides a user-friendly interface for browsing and searching and allows for data downloading flexibility. In addition, NSDNA offers a submission page for researchers to submit novel NSD-ncRNA associations. It represents an extremely useful and valuable resource for researchers who seek to understand the functions and molecular mechanisms of ncRNA involved in NSDs.",NSDNA,0.996830682,Nervous System Disease NcRNAome Atlas,0.991699129,NSDNA,0.996830682,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +22369214,http://nsltpdb.life.nthu.edu.tw,"Construction and analysis of a plant non-specific lipid transfer protein database (nsLTPDB). Background Plant non-specific lipid transfer proteins (nsLTPs) are small and basic proteins. Recently, nsLTPs have been reported involved in many physiological functions such as mediating phospholipid transfer, participating in plant defence activity against bacterial and fungal pathogens, and enhancing cell wall extension in tobacco. However, the lipid transfer mechanism of nsLTPs is still unclear, and comprehensive information of nsLTPs is difficult to obtain. Methods In this study, we identified 595 nsLTPs from 121 different species and constructed an nsLTPs database--nsLTPDB--which comprises the sequence information, structures, relevant literatures, and biological data of all plant nsLTPs http://nsltpdb.life.nthu.edu.tw/. Results Meanwhile, bioinformatics and statistics methods were implemented to develop a classification method for nsLTPs based on the patterns of the eight highly-conserved cysteine residues, and to suggest strict Prosite-styled patterns for Type I and Type II nsLTPs. The pattern of Type I is C X2 V X5-7 C [V, L, I] × Y [L, A, V] X8-13 CC × G X12 D × [Q, K, R] X2 CXC X16-21 P X2 C X13-15C, and that of Type II is C X4 L X2 C X9-11 P [S, T] X2 CC X5 Q X2-4 C[L, F]C X2 [A, L, I] × [D, N] P X10-12 [K, R] X4-5 C X3-4 P X0-2 C. Moreover, we referred the Prosite-styled patterns to the experimental mutagenesis data that previously established by our group, and found that the residues with higher conservation played an important role in the structural stability or lipid binding ability of nsLTPs. Conclusions Taken together, this research has suggested potential residues that might be essential to modulate the structural and functional properties of plant nsLTPs. Finally, we proposed some biologically important sites of the nsLTPs, which are described by using a new Prosite-styled pattern that we defined.",nsLTPDB,0.937543948,plant non-specific lipid transfer protein database,0.758426607,nsLTPDB,0.937543948,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/17/2012 +23084778,http://www.nsort.org/db,"NSort/DB: an intranuclear compartment protein database. Distinct substructures within the nucleus are associated with a wide variety of important nuclear processes. Structures such as chromatin and nuclear pores have specific roles, while others such as Cajal bodies are more functionally varied. Understanding the roles of these membraneless intra-nuclear compartments requires extensive data sets covering nuclear and compartment-associated proteins. NSort/DB is a database providing access to intra- or sub-nuclear compartment associations for the mouse nuclear proteome. Based on resources ranging from large-scale curated data sets to detailed experiments, this data set provides a high-quality set of annotations of non-exclusive association of nuclear proteins with structures such as promyelocytic leukaemia bodies and chromatin. The database is searchable by protein identifier or compartment, and has a documented web service API. The search interface, web service and data download are all freely available online at http://www.nsort.org/db/. Availability of this data set will enable systematic analyses of the protein complements of nuclear compartments, improving our understanding of the diverse functional repertoire of these structures.",NSort/DB,0.925428107,NA,0,NSort/DB,0.925428107,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/25/2012 +23330984,http://nubbe.iq.unesp.br/nubbeDB.html,"Development of a natural products database from the biodiversity of Brazil. We describe herein the design and development of an innovative tool called the NuBBE database (NuBBEDB), a new Web-based database, which incorporates several classes of secondary metabolites and derivatives from the biodiversity of Brazil. This natural product database incorporates botanical, chemical, pharmacological, and toxicological compound information. The NuBBEDB provides specialized information to the worldwide scientific community and can serve as a useful tool for studies on the multidisciplinary interfaces related to chemistry and biology, including virtual screening, dereplication, metabolomics, and medicinal chemistry. The NuBBEDB site is at http://nubbe.iq.unesp.br/nubbeDB.html .",NuBBEDB,0.996761322,NuBBE database,0.589097545,NuBBEDB,0.996761322,1,NA,28775335,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/18/2013 +28775335,http://nubbe.iq.unesp.br/portal/nubbedb.html,"NuBBEDB: an updated database to uncover chemical and biological information from Brazilian biodiversity. The intrinsic value of biodiversity extends beyond species diversity, genetic heritage, ecosystem variability and ecological services, such as climate regulation, water quality, nutrient cycling and the provision of reproductive habitats it is also an inexhaustible source of molecules and products beneficial to human well-being. To uncover the chemistry of Brazilian natural products, the Nuclei of Bioassays, Ecophysiology and Biosynthesis of Natural Products Database (NuBBEDB) was created as the first natural product library from Brazilian biodiversity. Since its launch in 2013, the NuBBEDB has proven to be an important resource for new drug design and dereplication studies. Consequently, continuous efforts have been made to expand its contents and include a greater diversity of natural sources to establish it as a comprehensive compendium of available biogeochemical information about Brazilian biodiversity. The content in the NuBBEDB is freely accessible online (https://nubbe.iq.unesp.br/portal/nubbedb.html) and provides validated multidisciplinary information, chemical descriptors, species sources, geographic locations, spectroscopic data (NMR) and pharmacological properties. Herein, we report the latest advancements concerning the interface, content and functionality of the NuBBEDB. We also present a preliminary study on the current profile of the compounds present in Brazilian territory.",NuBBEDB,0.995596101,of Natural Products Database,0.788312316,NuBBEDB,0.995596101,1,NA,23330984,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,8/3/2017 +23193292,http://npidb.belozersky.msu.ru,"NPIDB: Nucleic acid-Protein Interaction DataBase. The Nucleic acid-Protein Interaction DataBase (http://npidb.belozersky.msu.ru/) contains information derived from structures of DNA-protein and RNA-protein complexes extracted from the Protein Data Bank (3846 complexes in October 2012). It provides a web interface and a set of tools for extracting biologically meaningful characteristics of nucleoprotein complexes. The content of the database is updated weekly. The current version of the Nucleic acid-Protein Interaction DataBase is an upgrade of the version published in 2007. The improvements include a new web interface, new tools for calculation of intermolecular interactions, a classification of SCOP families that contains DNA-binding protein domains and data on conserved water molecules on the DNA-protein interface.",NA,0,Nucleic acid-Protein Interaction DataBase,0.976899055,Nucleic acid-Protein Interaction DataBase,0.976899055,1,26656949,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,11/27/2012 +24316579,"http://www.oxfordjournals.org/nar/database/c/, http://nar.oxfordjournals.org","The 2014 Nucleic Acids Research Database Issue and an updated NAR online Molecular Biology Database Collection. The 2014 Nucleic Acids Research Database Issue includes descriptions of 58 new molecular biology databases and recent updates to 123 databases previously featured in NAR or other journals. For convenience, the issue is now divided into eight sections that reflect major subject categories. Among the highlights of this issue are six databases of the transcription factor binding sites in various organisms and updates on such popular databases as CAZy, Database of Genomic Variants (DGV), dbGaP, DrugBank, KEGG, miRBase, Pfam, Reactome, SEED, TCDB and UniProt. There is a strong block of structural databases, which includes, among others, the new RNA Bricks database, updates on PDBe, PDBsum, ArchDB, Gene3D, ModBase, Nucleic Acid Database and the recently revived iPfam database. An update on the NCBI's MMDB describes VAST+, an improved tool for protein structure comparison. Two articles highlight the development of the Structural Classification of Proteins (SCOP) database: one describes SCOPe, which automates assignment of new structures to the existing SCOP hierarchy; the other one describes the first version of SCOP2, with its more flexible approach to classifying protein structures. This issue also includes a collection of articles on bacterial taxonomy and metagenomics, which includes updates on the List of Prokaryotic Names with Standing in Nomenclature (LPSN), Ribosomal Database Project (RDP), the Silva/LTP project and several new metagenomics resources. The NAR online Molecular Biology Database Collection, http://www.oxfordjournals.org/nar/database/c/, has been expanded to 1552 databases. The entire Database Issue is freely available online on the Nucleic Acids Research website (http://nar.oxfordjournals.org/).",NA,0,Nucleic Acids,0.610987711,Nucleic Acids,0.610987711,1,"21177655.0, 25593347.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: CLASS,NA,NA,12/6/2013 +30335176,http://bigd.big.ac.cn/nucmap,"NucMap: a database of genome-wide nucleosome positioning map across species. Dynamics of nucleosome positioning affects chromatin state, transcription and all other biological processes occurring on genomic DNA. While MNase-Seq has been used to depict nucleosome positioning map in eukaryote in the past years, nucleosome positioning data is increasing dramatically. To facilitate the usage of published data across studies, we developed a database named nucleosome positioning map (NucMap, http://bigd.big.ac.cn/nucmap). NucMap includes 798 experimental data from 477 samples across 15 species. With a series of functional modules, users can search profile of nucleosome positioning at the promoter region of each gene across all samples and make enrichment analysis on nucleosome positioning data in all genomic regions. Nucleosome browser was built to visualize the profiles of nucleosome positioning. Users can also visualize multiple sources of omics data with the nucleosome browser and make side-by-side comparisons. All processed data in the database are freely available. NucMap is the first comprehensive nucleosome positioning platform and it will serve as an important resource to facilitate the understanding of chromatin regulation.",NucMap,0.996892318,nucleosome positioning map,0.91362164,NucMap,0.996892318,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +34120586,http://compbio-zhanglab.org/NUCOME,"NUCOME: A comprehensive database of nucleosome organization referenced landscapes in mammalian genomes. Background Nucleosome organization is involved in many regulatory activities in various organisms. However, studies integrating nucleosome organization in mammalian genomes are very limited mainly due to the lack of comprehensive data quality control (QC) assessment and uneven data quality of public data sets. Results The NUCOME is a database focused on filtering qualified nucleosome organization referenced landscapes covering various cell types in human and mouse based on QC metrics. The filtering strategy guarantees the quality of nucleosome organization referenced landscapes and exempts users from redundant data set selection and processing. The NUCOME database provides standardized, qualified data source and informative nucleosome organization features at a whole-genome scale and on the level of individual loci. Conclusions The NUCOME provides valuable data resources for integrative analyses focus on nucleosome organization. The NUCOME is freely available at http://compbio-zhanglab.org/NUCOME .",NUCOME,0.996816218,NA,0,NUCOME,0.996816218,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/13/2021 +23196988,http://shark.abl.ku.edu/nurbs,"NURBS: a database of experimental and predicted nuclear receptor binding sites of mouse. Summary Nuclear receptors (NRs) are a class of transcription factors playing important roles in various biological processes. An NR often impacts numerous genes and different NRs share overlapped target networks. To fulfil the need for a database incorporating binding sites of different NRs at various conditions for easy comparison and visualization to improve our understanding of NR binding mechanisms, we have developed NURBS, a database for experimental and predicted nuclear receptor binding sites of mouse (NURBS). NURBS currently contains binding sites across the whole-mouse genome of 8 NRs identified in 40 chromatin immunoprecipitation with massively parallel DNA sequencing experiments. All datasets are processed using a widely used procedure and same statistical criteria to ensure the binding sites derived from different datasets are comparable. NURBS also provides predicted binding sites using NR-HMM, a Hidden Markov Model (HMM) model. Availability The GBrowse-based user interface of NURBS is freely accessible at http://shark.abl.ku.edu/nurbs/. NR-HMM and all results can be downloaded for free at the website. Contact jwfang@ku.edu",NURBS,0.996146083,NA,0,NURBS,0.996146083,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +29739837,http://nvertx.kahikai.org,"NvERTx: a gene expression database to compare embryogenesis and regeneration in the sea anemone Nematostella vectensis. . For over a century, researchers have been comparing embryogenesis and regeneration hoping that lessons learned from embryonic development will unlock hidden regenerative potential. This problem has historically been a difficult one to investigate because the best regenerative model systems are poor embryonic models and vice versa. Recently, however, there has been renewed interest in this question, as emerging models have allowed researchers to investigate these processes in the same organism. This interest has been further fueled by the advent of high-throughput transcriptomic analyses that provide virtual mountains of data. Here, we present Nematostella vectensis Embryogenesis and Regeneration Transcriptomics (NvERTx), a platform for comparing gene expression during embryogenesis and regeneration. NvERTx consists of close to 50 transcriptomic data sets spanning embryogenesis and regeneration in Nematostella These data were used to perform a robust de novo transcriptome assembly, with which users can search, conduct BLAST analyses, and plot the expression of multiple genes during these two developmental processes. The site is also home to the results of gene clustering analyses, to further mine the data and identify groups of co-expressed genes. The site can be accessed at http://nvertx.kahikai.org.",NvERTx,0.99687469,Nematostella vectensis Embryogenesis and Regeneration Transcriptomics,0.891611405,NvERTx,0.99687469,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/17/2018 +33442735,http://oglcnac.org,"O-GlcNAcAtlas: A database of experimentally identified O-GlcNAc sites and proteins. O-linked β-N-acetylglucosamine (O-GlcNAc) is a post-translational modification (i.e., O-GlcNAcylation) on the serine/threonine residues of proteins. As a unique intracellular monosaccharide modification, protein O-GlcNAcylation plays important roles in almost all biochemical processes examined. Aberrant O-GlcNAcylation underlies the etiologies of a number of chronic diseases. With the tremendous improvement of techniques, thousands of proteins along with their O-GlcNAc sites have been reported. However, until now, there are few databases dedicated to accommodate the rapid accumulation of such information. Thus, O-GlcNAcAtlas is created to integrate all experimentally identified O-GlcNAc sites and proteins. O-GlcNAcAtlas consists of two datasets (Dataset-I and Dataset-II, for unambiguously identified sites and ambiguously identified sites, respectively), representing a total number of 4571 O-GlcNAc modified proteins from all species studied from 1984 to 31 Dec 2019. For each protein, comprehensive information (including species, sample type, gene symbol, modified peptides and/or modification sites, site mapping methods and literature references) is provided. To solve the heterogeneity among the data collected from different sources, the sequence identity of these reported O-GlcNAc peptides are mapped to the UniProtKB protein entries. To our knowledge, O-GlcNAcAtlas is a highly comprehensive and rigorously curated database encapsulating all O-GlcNAc sites and proteins identified in the past 35 years. We expect that O-GlcNAcAtlas will be a useful resource to facilitate O-GlcNAc studies and computational analyses of protein O-GlcNAcylation. The public version of the web interface to the O-GlcNAcAtlas can be found at http://oglcnac.org/.",O-GlcNAcAtlas,0.984077953,NA,0,O-GlcNAcAtlas,0.984077953,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2021 +34655133,http://opig.stats.ox.ac.uk/webapps/oas,"Observed Antibody Space: A diverse database of cleaned, annotated, and translated unpaired and paired antibody sequences. The antibody repertoires of individuals and groups have been used to explore disease states, understand vaccine responses, and drive therapeutic development. The arrival of B-cell receptor repertoire sequencing has enabled researchers to get a snapshot of these antibody repertoires, and as more data are generated, increasingly in-depth studies are possible. However, most publicly available data only exist as raw FASTQ files, making the data hard to access, process, and compare. The Observed Antibody Space (OAS) database was created in 2018 to offer clean, annotated, and translated repertoire data. In this paper, we describe an update to OAS that has been driven by the increasing volume of data and the appearance of paired (VH/VL) sequence data. OAS is now accessible via a new web server, with standardized search parameters and a new sequence-based search option. The new database provides both nucleotides and amino acids for every sequence, with additional sequence annotations to make the data Minimal Information about Adaptive Immune Receptor Repertoire compliant, and comments on potential problems with the sequence. OAS now contains 25 new studies, including severe acute respiratory syndrome coronavirus 2 data and paired sequencing data. The new database is accessible at http://opig.stats.ox.ac.uk/webapps/oas/, and all data are freely available for download.",OAS,0.967805743,Antibody Space,0.754744515,OAS,0.967805743,1,NA,30217829,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/29/2021 +30217829,http://antibodymap.org,"Observed Antibody Space: A Resource for Data Mining Next-Generation Sequencing of Antibody Repertoires. Abs are immune system proteins that recognize noxious molecules for elimination. Their sequence diversity and binding versatility have made Abs the primary class of biopharmaceuticals. Recently, it has become possible to query their immense natural diversity using next-generation sequencing of Ig gene repertoires (Ig-seq). However, Ig-seq outputs are currently fragmented across repositories and tend to be presented as raw nucleotide reads, which means nontrivial effort is required to reuse the data for analysis. To address this issue, we have collected Ig-seq outputs from 55 studies, covering more than half a billion Ab sequences across diverse immune states, organisms (primarily human and mouse), and individuals. We have sorted, cleaned, annotated, translated, and numbered these sequences and make the data available via our Observed Antibody Space (OAS) resource at http://antibodymap.org The data within OAS will be regularly updated with newly released Ig-seq datasets. We believe OAS will facilitate data mining of immune repertoires for improved understanding of the immune system and development of better biotherapeutics.",OAS,0.866781175,Antibody,0.6304847,OAS,0.866781175,1,NA,34655133,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,9/14/2018 +29201145,http://www.isical.ac.in,"OCDD: an obesity and co-morbid disease database. Background Obesity is a medical condition that is known for increased body mass index (BMI). It is also associated with chronic low level inflammation. Obesity disrupts the immune-metabolic homeostasis by changing the secretion of adipocytes. This affects the end-organs, and gives rise to several diseases including type 2 diabetes, asthma, non-alcoholic fatty liver diseases and cancers. These diseases are known as co-morbid diseases. Several studies have explored the underlying molecular mechanisms of developing obesity associated comorbid diseases. To understand the development and progression of diseases associated with obesity, we need a detailed scenario of gene interactions and the distribution of the responsible genes in human system. Results Obesity and Co-morbid Disease Database (OCDD) is designed for relating obesity and its co-morbid diseases using literature mining, and computational and systems biology approaches. OCDD is aimed to investigate the genes associated with comorbidity. Several existing databases have been used to extract molecular interactions and functional annotations of each gene. The degree of co-morbid associations has been measured and made available to the users. The database is available at http://www.isical.ac.in/~systemsbiology/OCDD/home.php. Conclusions The main objective of the database is to derive the relations among the genes that are involved in both obesity and its co-morbid diseases. Functional annotation of common genes, gene interaction networks and key driver analyses have made the database a valuable and comprehensive resource for investigating the causal links between obesity and co-morbid diseases.",OCDD,0.967281461,Obesity and Co-morbid Disease Database,0.974759728,Obesity and Co-morbid Disease Database,0.974759728,1,23200141,NA,low_prob_best_name,do not remove,do not merge,NA,NA,NA,NA,11/21/2017 +28365722,http://ocappidb.uca.works,"OCaPPI-Db: an oligonucleotide probe database for pathogen identification through hybridization capture. . The detection and identification of bacterial pathogens involved in acts of bio- and agroterrorism are essential to avoid pathogen dispersal in the environment and propagation within the population. Conventional molecular methods, such as PCR amplification, DNA microarrays or shotgun sequencing, are subject to various limitations when assessing environmental samples, which can lead to inaccurate findings. We developed a hybridization capture strategy that uses a set of oligonucleotide probes to target and enrich biomarkers of interest in environmental samples. Here, we present Oligonucleotide Capture Probes for Pathogen Identification Database (OCaPPI-Db), an online capture probe database containing a set of 1,685 oligonucleotide probes allowing for the detection and identification of 30 biothreat agents up to the species level. This probe set can be used in its entirety as a comprehensive diagnostic tool or can be restricted to a set of probes targeting a specific pathogen or virulence factor according to the user's needs. : http://ocappidb.uca.works.",OCaPPI-Db,0.996962115,Oligonucleotide Capture Probes for Pathogen Identification Database,0.991928976,OCaPPI-Db,0.996962115,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +34241085,http://oscar.as.uky.edu,"OCELOT: An infrastructure for data-driven research to discover and design crystalline organic semiconductors. Materials design and discovery are often hampered by the slow pace and materials and human costs associated with Edisonian trial-and-error screening approaches. Recent advances in computational power, theoretical methods, and data science techniques, however, are being manifest in a convergence of these tools to enable in silico materials discovery. Here, we present the development and deployment of computational materials data and data analytic approaches for crystalline organic semiconductors. The OCELOT (Organic Crystals in Electronic and Light-Oriented Technologies) infrastructure, consisting of a Python-based OCELOT application programming interface and OCELOT database, is designed to enable rapid materials exploration. The database contains a descriptor-based schema for high-throughput calculations that have been implemented on more than 56 000 experimental crystal structures derived from 47 000 distinct molecular structures. OCELOT is open-access and accessible via a web-user interface at https://oscar.as.uky.edu.",OCELOT,0.996991932,Organic Crystals in Electronic and,0.949127361,OCELOT,0.996991932,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +32974523,http://ocins.cftri.com/ocins,"Ocins database: a database of bug-busters from Bifidobacterium, Lactobacillus, and Enterococcus. The ocins are antimicrobial polypeptides produced by probiotic microbes, such as Lactobacillus , Enterococcus , Streptococcus , Leuconostoc and Bifidobacterium . They are produced in response to stress and for the self-defense of the bacterium. It is indispensable to understand their mechanistic characteristics, structures, and functions, if the food industry is to reduce contamination levels and produce germfree foods. Databases of the ocins that are readily accessible to the food industry are scarce, but urgently required. Therefore, we established a very useful, unique, and a simple ocin database, which not merely provides information about ocins, but also directs their utilisation in the food industry. The database includes information about each ocin, its amino acid sequence, molecular weight, and isoelectric point. The database also possesses all the currently known ocin (probiotic origin only) sequences and structures, target organisms, and relevant to food industries (aqua culture, dairy and meat industries), which is hard to obtain in other databases. The database is free for public and accessed at http://ocins.cftri.com/ocins/.",Ocins,0.64978832,NA,0,Ocins,0.64978832,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/13/2019 +25604238,http://ocri-genomics.org/ocsESTdb,"ocsESTdb: a database of oil crop seed EST sequences for comparative analysis and investigation of a global metabolic network and oil accumulation metabolism. Background Oil crop seeds are important sources of fatty acids (FAs) for human and animal nutrition. Despite their importance, there is a lack of an essential bioinformatics resource on gene transcription of oil crops from a comparative perspective. In this study, we developed ocsESTdb, the first database of expressed sequence tag (EST) information on seeds of four large-scale oil crops with an emphasis on global metabolic networks and oil accumulation metabolism that target the involved unigenes. Description A total of 248,522 ESTs and 106,835 unigenes were collected from the cDNA libraries of rapeseed (Brassica napus), soybean (Glycine max), sesame (Sesamum indicum) and peanut (Arachis hypogaea). These unigenes were annotated by a sequence similarity search against databases including TAIR, NR protein database, Gene Ontology, COG, Swiss-Prot, TrEMBL and Kyoto Encyclopedia of Genes and Genomes (KEGG). Five genome-scale metabolic networks that contain different numbers of metabolites and gene-enzyme reaction-association entries were analysed and constructed using Cytoscape and yEd programs. Details of unigene entries, deduced amino acid sequences and putative annotation are available from our database to browse, search and download. Intuitive and graphical representations of EST/unigene sequences, functional annotations, metabolic pathways and metabolic networks are also available. ocsESTdb will be updated regularly and can be freely accessed at http://ocri-genomics.org/ocsESTdb/ . Conclusion ocsESTdb may serve as a valuable and unique resource for comparative analysis of acyl lipid synthesis and metabolism in oilseed plants. It also may provide vital insights into improving oil content in seeds of oil crop species by transcriptional reconstruction of the metabolic network.",ocsESTdb,0.997286081,NA,0,ocsESTdb,0.997286081,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/21/2015 +26995712,http://csbg.cnb.csic.es/odcs,"Rare disease relations through common genes and protein interactions. ODCs (Orphan Disease Connections), available at http://csbg.cnb.csic.es/odcs, is a novel resource to explore potential molecular relations between rare diseases. These molecular relations have been established through the integration of disease susceptibility genes and human protein-protein interactions. The database currently contains 54,941 relations between 3032 diseases.",ODCs,0.995768189,Orphan Disease Connections,0.98984336,ODCs,0.995768189,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/16/2016 +31662803,http://db.cger.nies.go.jp/dataset/ODIAC,"The Open-source Data Inventory for Anthropogenic Carbon dioxide (CO2), version 2016 (ODIAC2016): A global, monthly fossil-fuel CO2 gridded emission data product for tracer transport simulations and surface flux inversions. The Open-source Data Inventory for Anthropogenic CO2 (ODIAC) is a global high-spatial resolution gridded emission data product that distributes carbon dioxide (CO2) emissions from fossil fuel combustion. The emission spatial distributions are estimated at a 1×1 km spatial resolution over land using power plant profiles (emission intensity and geographical location) and satellite-observed nighttime lights. This paper describes the year 2016 version of the ODIAC emission data product (ODIAC2016) and presents analyses that help guiding data users, especially for atmospheric CO2 tracer transport simulations and flux inversion analysis. Since the original publication in 2011, we have made modifications to our emission modeling framework in order to deliver a comprehensive global gridded emission data product. Major changes from the 2011 publication are 1) the use of emissions estimates made by the Carbon Dioxide Information Analysis Center (CDIAC) at the Oak Ridge National Laboratory (ORNL) by fuel type (solid, liquid, gas, cement manufacturing, gas flaring and international aviation and marine bunkers), 2) the use of multiple spatial emission proxies by fuel type such as nightlight data specific to gas flaring and ship/aircraft fleet tracks and 3) the inclusion of emission temporal variations. Using global fuel consumption data, we extrapolated the CDIAC emissions estimates for the recent years and produced the ODIAC2016 emission data product that covers 2000-2015. Our emission data can be viewed as an extended version of CDIAC gridded emission data product, which should allow data users to impose global fossil fuel emissions in more comprehensive manner than original CDIAC product. Our new emission modeling framework allows us to produce future versions of ODIAC emission data product with a timely update. Such capability has become more significant given the CDIAC/ORNL's shutdown. ODIAC data product could play an important role to support carbon cycle science, especially modeling studies with space-based CO2 data collected near real time by ongoing carbon observing missions such as Japanese Greenhouse Observing SATellite (GOSAT), NASA's Orbiting Carbon Observatory 2 (OCO-2) and upcoming future missions. The ODIAC emission data product including the latest version of the ODIAC emission data (ODIAC2017, 2000-2016), is distributed from http://db.cger.nies.go.jp/dataset/ODIAC/ with a DOI.",ODIAC,0.977021754,Open-source Data Inventory for Anthropogenic CO2,0.870113115,ODIAC,0.977021754,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/18/2018 +32324748,http://www.odobd.org,"OdoBD: An online database for the dragonflies and damselflies of Bangladesh. Combining scientific data over a long-time period is necessary for generating large-scale datasets, which are an essential component of comparative analysis for understanding evolutionary processes. Furthermore, monitoring temporal and spatial distributions of animals at a global and regional scale is essential for studying climate change driven extinction risks. Regional and global datasets focusing on different animal groups are on the rise to meet such challenges. Although being one of the earliest and best-known insect groups, the data on Odonata remains rudimentary and dispersed, especially in the South Asian region. Bangladesh, being located within a biodiversity hotspot, possesses a large number of odonate species and many of them are endemic to the South Asian region. We have developed an online database for the Odonata of Bangladesh by compiling and digitizing data from our last four years of field studies, from previously published research articles and field guides, and also by collecting data from citizen scientists. The Odonata of Bangladesh database (accessible at http://www.odobd.org) contains phenotypic, genotypic, photographic, taxonomic, biogeographic and faunistic data of the Odonata of Bangladesh. The database will be a valuable resource for understanding diversity, distributions, extinction risks and conservation planning of the Odonata of Bangladesh. Finally, phenotypic, spatial and temporal data of Odonata of Bangladesh datasets can be integrated with other regional datasets for analyzing macroevolutionary trends and to monitor the effect of climate change on odonates.",OdoBD,0.832681954,NA,0,OdoBD,0.832681954,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/23/2020 +24909981,http://www.ceams-carsm.ca/en/MASS,"Montreal Archive of Sleep Studies: an open-access resource for instrument benchmarking and exploratory research. Manual processing of sleep recordings is extremely time-consuming. Efforts to automate this process have shown promising results, but automatic systems are generally evaluated on private databases, not allowing accurate cross-validation with other systems. In lacking a common benchmark, the relative performances of different systems are not compared easily and advances are compromised. To address this fundamental methodological impediment to sleep study, we propose an open-access database of polysomnographic biosignals. To build this database, whole-night recordings from 200 participants [97 males (aged 42.9 ± 19.8 years) and 103 females (aged 38.3 ± 18.9 years); age range: 18-76 years] were pooled from eight different research protocols performed in three different hospital-based sleep laboratories. All recordings feature a sampling frequency of 256 Hz and an electroencephalography (EEG) montage of 4-20 channels plus standard electro-oculography (EOG), electromyography (EMG), electrocardiography (ECG) and respiratory signals. Access to the database can be obtained through the Montreal Archive of Sleep Studies (MASS) website (http://www.ceams-carsm.ca/en/MASS), and requires only affiliation with a research institution and prior approval by the applicant's local ethical review board. Providing the research community with access to this free and open sleep database is expected to facilitate the development and cross-validation of sleep analysis automation systems. It is also expected that such a shared resource will be a catalyst for cross-centre collaborations on difficult topics such as improving inter-rater agreement on sleep stage scoring.",NA,0,of,0.834123671,of,0.834123671,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,6/9/2014 +22009674,http://darcsite.genzentrum.lmu.de/darc,"The DARC site: a database of aligned ribosomal complexes. The ribosome is a highly dynamic machine responsible for protein synthesis within the cell. Cryo-electron microscopy (cryo-EM) and X-ray crystallography structures of ribosomal particles, alone and in complex with diverse ligands (protein factors, RNAs and small molecules), have revealed the dynamic nature of the ribosome and provided much needed insight into translation and its regulation. In the past years, there has been exponential growth in the deposition of cryo-EM maps into the Electron Microscopy Data Bank (EMDB) as well as atomic structures into the Protein Data Bank (PDB). Unfortunately, the deposited ribosomal particles usually have distinct orientations with respect to one another, which complicate the comparison of the available structures. To simplify this, we have developed a Database of Aligned Ribosomal Complexes, the DARC site (http://darcsite.genzentrum.lmu.de/darc/), which houses the available cryo-EM maps and atomic coordinates of ribosomal particles from the EMDB and PDB aligned within a common coordinate system. An easy-to-use, searchable interface allows users to access and download >130 cryo-EM maps and >300 atomic models in the format of brix and pdb files, respectively. The aligned coordinate system substantially simplifies direct visualization of conformational changes in the ribosome, such as subunit rotation and head-swiveling, as well as direct comparison of bound ligands, such as antibiotics or translation factors.",DARC,0.665185809,of Aligned Ribosomal Complexes,0.849380463,of Aligned Ribosomal Complexes,0.849380463,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/18/2011 +25305456,http://www.epregistry.com.br,"First report on the antibody verification of HLA-DR, HLA-DQ and HLA-DP epitopes recorded in the HLA Epitope Registry. The International Registry of Antibody-Defined HLA Epitopes (http://www.epregistry.com.br) has been recently established as a tool to understand humoral responses to HLA mismatches. These epitopes can be structurally defined as eplets by three-dimensional molecular modeling and amino acid sequence differences between HLA antigens. A major goal is to identify HLA eplets that have been verified experimentally with informative antibodies. This report addresses class II epitopes encoded by genes in the HLA-D region. Our analysis included reviews of many publications about epitope specificity of class II reactive human and murine monoclonal antibodies and informative alloantibodies from HLA sensitized patients as well as our own antibody testing results. As of July 1, 2014, 24 HLA-DRB1/3/4/5, 15 DQB, 3 DQA and 8 DPB antibody-verified epitopes have been identified and recorded. The Registry is still a work-in-progress and will become a useful resource for HLA professionals interested in histocompatibility testing at the epitope level and investigating antibody responses to HLA mismatches in transplant patients.",NA,0,of Antibody-Defined,0.614211764,of Antibody-Defined,0.614211764,1,30458204,NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,10/13/2014 +22086951,http://genome.ucsc.edu,"The UCSC Genome Browser database: extensions and updates 2011. The University of California Santa Cruz Genome Browser (http://genome.ucsc.edu) offers online public access to a growing database of genomic sequence and annotations for a wide variety of organisms. The Browser is an integrated tool set for visualizing, comparing, analyzing and sharing both publicly available and user-generated genomic data sets. In the past year, the local database has been updated with four new species assemblies, and we anticipate another four will be released by the end of 2011. Further, a large number of annotation tracks have been either added, updated by contributors, or remapped to the latest human reference genome. Among these are new phenotype and disease annotations, UCSC genes, and a major dbSNP update, which required new visualization methods. Growing beyond the local database, this year we have introduced 'track data hubs', which allow the Genome Browser to provide access to remotely located sets of annotations. This feature is designed to significantly extend the number and variety of annotation tracks that are publicly available for visualization and analysis from within our site. We have also introduced several usability features including track search and a context-sensitive menu of options available with a right-click anywhere on the Browser's image.",NA,0,of California Santa Cruz Genome Browser,0.76435371,of California Santa Cruz Genome Browser,0.76435371,1,"23155063.0, 24270787.0, 25428374.0, 27899642.0, 30407534.0, 26590259.0, 33221922.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,11/15/2011 +34048545,http://research.nibmg.ac.in/dbcares/dbgenvoc,"dbGENVOC: database of GENomic Variants of Oral Cancer, with special reference to India. . Oral cancer is highly prevalent in India and is the most frequent cancer type among Indian males. It is also very common in southeast Asia. India has participated in the International Cancer Genome Consortium (ICGC) and some national initiatives to generate large-scale genomic data on oral cancer patients and analyze to identify associations and systematically catalog the associated variants. We have now created an open, web-accessible database of these variants found significantly associated with Indian oral cancer patients, with a user-friendly interface to enable easy mining. We have value added to this database by including relevant data collated from various sources on other global populations, thereby providing opportunities of comparative geographical and/or ethnic analyses. Currently, no other database of similar nature is available on oral cancer. We have developed Database of GENomic Variants of Oral Cancer, a browsable online database framework for storage, retrieval and analysis of large-scale data on genomic variants and make it freely accessible to the scientific community. Presently, the web-accessible database allows potential users to mine data on ∼24 million clinically relevant somatic and germline variants derived from exomes (n = 100) and whole genomes (n = 5) of Indian oral cancer patients; all generated by us. Variant data from The Cancer Genome Atlas and data manually curated from peer-reviewed publications were also incorporated into the database for comparative analyses. It allows users to query the database by a single gene, multiple genes, multiple variant sites, genomic region, patient ID and pathway identities. Database URL: http://research.nibmg.ac.in/dbcares/dbgenvoc/.",dbGENVOC,0.722545326,of GENomic Variants of,0.726372351,of GENomic Variants of,0.726372351,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/1/2021 +35033323,http://umid-aiims.icmr.org.in,"Development of the first DNA database and identification portal for identification of Unidentified bodies in India - UMID. Identifying missing persons and unidentified dead bodies is a well-documented global problem in recent years. To curb this issue, countries such as the USA, UK, and Australia already have well-established DNA databases. Considering the alarming number of unidentified/unclaimed dead bodies reported in India every year, it is evident that the current practices are not sufficient to establish their identities. Forensic medicine professionals are ethically, morally, and dutybound to collect information about missing and unidentified persons and work with the government agencies to determine their identity. Concerning the social and public interest, we have developed the first-ever identification portal and DNA database of unidentified dead bodies autopsied at the Department of Forensic Medicine and Toxicology, AIIMS, New Delhi, India. After the investigation officer's informed consent, biological samples from unidentified dead bodies and a detailed phenotypic description, anthropological data and other visual characteristics of the deceased are recorded at the time of autopsy. This information is uploaded on our database which is available for public access, and the genotypic information generated through STR analysis is only available for internal usage.Claimants (biological relatives) may browse through the URL (https://umid-aiims.icmr.org.in/), and if they wish to claim an unidentified dead body, they may approach as per the given guidelines. The DNA profiles generated include a total of 16 STRs (15 autosomal tetranucleotide microsatellite STRs and 1 Sex Chromosome Specific STR). The claimant's STR profile is run through the questioned database to look for a potential match. If positive, the investigating officer of that particular case is informed for further necessary action. Until December 2020, our database consisted the information of 255 individuals and two unidentified cadavers were identified. This project's success can also lead to a pioneering National DNA database of unidentified and missing persons in India.",UMID,0.578501821,of Unidentified bodies in India,0.900330096,of Unidentified bodies in India,0.900330096,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,12/9/2021 +33247934,http://ogda.ytu.edu.cn,"OGDA: a comprehensive organelle genome database for algae. . Algae are the oldest taxa on Earth, with an evolutionary relationship that spans prokaryotes (Cyanobacteria) and eukaryotes. A long evolutionary history has led to high algal diversity. Their organelle DNAs are characterized by uniparental inheritance and a compact genome structure compared with nuclear genomes; thus, they are efficient molecular tools for the analysis of gene structure, genome structure, organelle function and evolution. However, an integrated organelle genome database for algae, which could enable users to both examine and use relevant data, has not previously been developed. Therefore, to provide an organelle genome platform for algae, we have developed a user-friendly database named Organelle Genome Database for Algae (OGDA, http://ogda.ytu.edu.cn/). OGDA contains organelle genome data either retrieved from several public databases or sequenced in our laboratory (Laboratory of Genetics and Breeding of Marine Organism [MOGBL]), which are continuously updated. The first release of OGDA contains 1055 plastid genomes and 755 mitochondrial genomes. Additionally, a variety of applications have been integrated into this platform to analyze the structural characteristics, collinearity and phylogeny of organellar genomes for algae. This database represents a useful tool for users, enabling the rapid retrieval and analysis of information related to organellar genomes for biological discovery.",OGDA,0.98520869,Organelle Genome Database for Algae,0.973292843,OGDA,0.98520869,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2020 +26827236,http://www.bioinfo-cbs.org/ogdd,"OGDD (Olive Genetic Diversity Database): a microsatellite markers' genotypes database of worldwide olive trees for cultivar identification and virgin olive oil traceability. . Olive (Olea europaea), whose importance is mainly due to nutritional and health features, is one of the most economically significant oil-producing trees in the Mediterranean region. Unfortunately, the increasing market demand towards virgin olive oil could often result in its adulteration with less expensive oils, which is a serious problem for the public and quality control evaluators of virgin olive oil. Therefore, to avoid frauds, olive cultivar identification and virgin olive oil authentication have become a major issue for the producers and consumers of quality control in the olive chain. Presently, genetic traceability using SSR is the cost effective and powerful marker technique that can be employed to resolve such problems. However, to identify an unknown monovarietal virgin olive oil cultivar, a reference system has become necessary. Thus, an Olive Genetic Diversity Database (OGDD) (http://www.bioinfo-cbs.org/ogdd/) is presented in this work. It is a genetic, morphologic and chemical database of worldwide olive tree and oil having a double function. In fact, besides being a reference system generated for the identification of unkown olive or virgin olive oil cultivars based on their microsatellite allele size(s), it provides users additional morphological and chemical information for each identified cultivar. Currently, OGDD is designed to enable users to easily retrieve and visualize biologically important information (SSR markers, and olive tree and oil characteristics of about 200 cultivars worldwide) using a set of efficient query interfaces and analysis tools. It can be accessed through a web service from any modern programming language using a simple hypertext transfer protocol call. The web site is implemented in java, JavaScript, PHP, HTML and Apache with all major browsers supported. Database URL: http://www.bioinfo-cbs.org/ogdd/.",OGDD,0.991602957,Olive Genetic Diversity Database,0.950038569,OGDD,0.991602957,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/30/2016 +27799467,http://ogee.medgenius.info,"OGEE v2: an update of the online gene essentiality database with special focus on differentially essential genes in human cancer cell lines. OGEE is an Online GEne Essentiality database. To enhance our understanding of the essentiality of genes, in OGEE we collected experimentally tested essential and non-essential genes, as well as associated gene properties known to contribute to gene essentiality. We focus on large-scale experiments, and complement our data with text-mining results. We organized tested genes into data sets according to their sources, and tagged those with variable essentiality statuses across data sets as conditionally essential genes, intending to highlight the complex interplay between gene functions and environments/experimental perturbations. Developments since the last public release include increased numbers of species and gene essentiality data sets, inclusion of non-coding essential sequences and genes with intermediate essentiality statuses. In addition, we included 16 essentiality data sets from cancer cell lines, corresponding to 9 human cancers; with OGEE, users can easily explore the shared and differentially essential genes within and between cancer types. These genes, especially those derived from cell lines that are similar to tumor samples, could reveal the oncogenic drivers, paralogous gene expression pattern and chromosomal structure of the corresponding cancer types, and can be further screened to identify targets for cancer therapy and/or new drug development. OGEE is freely available at http://ogee.medgenius.info.",OGEE,0.996867657,NA,0,OGEE,0.996867657,1,NA,"22075992.0, 33084874.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/30/2016 +22075992,http://ogeedb.embl.de,"OGEE: an online gene essentiality database. OGEE is an Online GEne Essentiality database. Its main purpose is to enhance our understanding of the essentiality of genes. This is achieved by collecting not only experimentally tested essential and non-essential genes, but also associated gene features such as expression profiles, duplication status, conservation across species, evolutionary origins and involvement in embryonic development. We focus on large-scale experiments and complement our data with text-mining results. Genes are organized into data sets according to their sources. Genes with variable essentiality status across data sets are tagged as conditionally essential, highlighting the complex interplay between gene functions and environments. Linked tools allow the user to compare gene essentiality among different gene groups, or compare features of essential genes to non-essential genes, and visualize the results. OGEE is freely available at http://ogeedb.embl.de.",OGEE,0.996816039,NA,0,OGEE,0.996816039,1,NA,"27799467.0, 33084874.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/10/2011 +33084874,http://v3.ogee.info,"OGEE v3: Online GEne Essentiality database with increased coverage of organisms and human cell lines. OGEE is an Online GEne Essentiality database. Gene essentiality is not a static and binary property, rather a context-dependent and evolvable property in all forms of life. In OGEE we collect not only experimentally tested essential and non-essential genes, but also associated gene properties that contributes to gene essentiality. We tagged conditionally essential genes that show variable essentiality statuses across datasets to highlight complex interplays between gene functions and environmental/experimental perturbations. OGEE v3 contains gene essentiality datasets for 91 species; almost doubled from 48 species in previous version. To accommodate recent advances on human cancer essential genes (as known as tumor dependency genes) that could serve as targets for cancer treatment and/or drug development, we expanded the collection of human essential genes from 16 cell lines in previous to 581. These human cancer cell lines were tested with high-throughput experiments such as CRISPR-Cas9 and RNAi; in total, 150 of which were tested by both techniques. We also included factors known to contribute to gene essentiality for these cell lines, such as genomic mutation, methylation and gene expression, along with extensive graphical visualizations for ease of understanding of these factors. OGEE v3 can be accessible freely at https://v3.ogee.info.",OGEE,0.996505618,NA,0,OGEE,0.996505618,1,NA,"22075992.0, 27799467.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +30535146,http://ogob.ie,"Comparative Analysis of Oomycete Genome Evolution Using the Oomycete Gene Order Browser (OGOB). The oomycetes are a class of microscopic, filamentous eukaryotes within the stramenopiles-alveolates-rhizaria eukaryotic supergroup. They include some of the most destructive pathogens of animals and plants, such as Phytophthora infestans, the causative agent of late potato blight. Despite the threat they pose to worldwide food security and natural ecosystems, there is a lack of tools and databases available to study oomycete genetics and evolution. To this end, we have developed the Oomycete Gene Order Browser (OGOB), a curated database that facilitates comparative genomic and syntenic analyses of oomycete species. OGOB incorporates genomic data for 20 oomycete species including functional annotations and a number of bioinformatics tools. OGOB hosts a robust set of orthologous oomycete genes for evolutionary analyses. Here, we present the structure and function of OGOB as well as a number of comparative genomic analyses we have performed to better understand oomycete genome evolution. We analyze the extent of oomycete gene duplication and identify tandem gene duplication as a driving force of the expansion of secreted oomycete genes. We identify core genes that are present and microsyntenically conserved (termed syntenologs) in oomycete lineages and identify the degree of microsynteny between each pair of the 20 species housed in OGOB. Consistent with previous comparative synteny analyses between a small number of oomycete species, our results reveal an extensive degree of microsyntenic conservation amongst genes with housekeeping functions within the oomycetes. OGOB is available at https://ogob.ie.",OGOB,0.957013845,Oomycete Gene Order Browser,0.890639731,OGOB,0.957013845,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +33581334,"http://www.oglyp.org/, http://www.oglyp.org/download.php","OGP: A Repository of Experimentally Characterized O-glycoproteins to Facilitate Studies on O-glycosylation. Numerous studies on cancers, biopharmaceuticals, and clinical trials have necessitated comprehensive and precise analysis of protein O-glycosylation. However, the lack of updated and convenient databases deters the storage of and reference to emerging O-glycoprotein data. To resolve this issue, an O-glycoprotein repository named OGP was established in this work. It was constructed with a collection of O-glycoprotein data from different sources. OGP contains 9354 O-glycosylation sites and 11,633 site-specific O-glycans mapping to 2133 O-glycoproteins, and it is the largest O-glycoprotein repository thus far. Based on the recorded O-glycosylation sites, an O-glycosylation site prediction tool was developed. Moreover, an OGP-based website is already available (https://www.oglyp.org/). The website comprises four specially designed and user-friendly modules: statistical analysis, database search, site prediction, and data submission. The first version of OGP repository and the website allow users to obtain various O-glycoprotein-related information, such as protein accession Nos., O-glycosylation sites, O-glycopeptide sequences, site-specific O-glycan structures, experimental methods, and potential O-glycosylation sites. O-glycosylation data mining can be performed efficiently on this website, which will greatly facilitate related studies. In addition, the database is accessible from OGP website (https://www.oglyp.org/download.php).",OGP,0.993771553,NA,0,OGP,0.993771553,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/10/2021 +31566225,http://ogrdb.airr-community.org,"OGRDB: a reference database of inferred immune receptor genes. High-throughput sequencing of the adaptive immune receptor repertoire (AIRR-seq) is providing unprecedented insights into the immune response to disease and into the development of immune disorders. The accurate interpretation of AIRR-seq data depends on the existence of comprehensive germline gene reference sets. Current sets are known to be incomplete and unrepresentative of the degree of polymorphism and diversity in human and animal populations. A key issue is the complexity of the genomic regions in which they lie, which, because of the presence of multiple repeats, insertions and deletions, have not proved tractable with short-read whole genome sequencing. Recently, tools and methods for inferring such gene sequences from AIRR-seq datasets have become available, and a community approach has been developed for the expert review and publication of such inferences. Here, we present OGRDB, the Open Germline Receptor Database (https://ogrdb.airr-community.org), a public resource for the submission, review and publication of previously unknown receptor germline sequences together with supporting evidence.",OGRDB,0.995836377,Germline Receptor Database,0.822012579,OGRDB,0.995836377,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +27234245,http://qtaro.abr.affrc.go.jp/ogro,"OGRO: The Overview of functionally characterized Genes in Rice online database. Background The high-quality sequence information and rich bioinformatics tools available for rice have contributed to remarkable advances in functional genomics. To facilitate the application of gene function information to the study of natural variation in rice, we comprehensively searched for articles related to rice functional genomics and extracted information on functionally characterized genes. Results As of 31 March 2012, 702 functionally characterized genes were annotated. This number represents about 1.6% of the predicted loci in the Rice Annotation Project Database. The compiled gene information is organized to facilitate direct comparisons with quantitative trait locus (QTL) information in the Q-TARO database. Comparison of genomic locations between functionally characterized genes and the QTLs revealed that QTL clusters were often co-localized with high-density gene regions, and that the genes associated with the QTLs in these clusters were different genes, suggesting that these QTL clusters are likely to be explained by tightly linked but distinct genes. Information on the functionally characterized genes compiled during this study is now available in the O verview of Functionally Characterized G enes in R ice O nline database (OGRO) on the Q-TARO website ( http://qtaro.abr.affrc.go.jp/ogro ). The database has two interfaces: a table containing gene information, and a genome viewer that allows users to compare the locations of QTLs and functionally characterized genes. Conclusions OGRO on Q-TARO will facilitate a candidate-gene approach to identifying the genes responsible for QTLs. Because the QTL descriptions in Q-TARO contain information on agronomic traits, such comparisons will also facilitate the annotation of functionally characterized genes in terms of their effects on traits important for rice breeding. The increasing amount of information on rice gene function being generated from mutant panels and other types of studies will make the OGRO database even more valuable in the future.",OGRO,0.748835266,NA,0,OGRO,0.748835266,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/24/2012 +31612943,http://ohnologs.curie.fr,"OHNOLOGS v2: a comprehensive resource for the genes retained from whole genome duplication in vertebrates. All vertebrates including human have evolved from an ancestor that underwent two rounds of whole genome duplication (2R-WGD). In addition, teleost fish underwent an additional third round of genome duplication (3R-WGD). The genes retained from these genome duplications, so-called ohnologs, have been instrumental in the evolution of vertebrate complexity, development and susceptibility to genetic diseases. However, the identification of vertebrate ohnologs has been challenging, due to lineage specific genome rearrangements since 2R- and 3R-WGD. We previously identified vertebrate ohnologs using a novel synteny comparison across multiple genomes. Here, we refine and apply this approach on 27 vertebrate genomes to identify ohnologs from both 2R- and 3R-WGD, while taking into account the phylogenetically biased sampling of available species. We assemble vertebrate ohnolog pairs and families in an expanded OHNOLOGS v2 database. We find that teleost fish have retained more 2R-WGD ohnologs than mammals and sauropsids, and that these 2R-ohnologs have retained significantly more ohnologs from the subsequent 3R-WGD than genes without 2R-ohnologs. Interestingly, species with fewer extant genes, such as sauropsids, have retained similar or higher proportions of ohnologs. OHNOLOGS v2 should allow deeper evolutionary genomic analysis of the impact of WGD on vertebrates and can be freely accessed at http://ohnologs.curie.fr.",OHNOLOGS,0.941329122,NA,0,OHNOLOGS,0.941329122,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +21819938,http://www.genolevures.org,"The Génolevures database. The Génolevures online database (URL: http://www.genolevures.org) stores and provides the data and results obtained by the Génolevures Consortium through several campaigns of genome annotation of the yeasts in the Saccharomycotina subphylum (hemiascomycetes). This database is dedicated to large-scale comparison of these genomes, storing not only the different chromosomal elements detected in the sequences, but also the logical relations between them. The database is divided into a public part, accessible to anyone through Internet, and a private part where the Consortium members make genome annotations with our Magus annotation system; this system is used to annotate several related genomes in parallel. The public database is widely consulted and offers structured data, organized using a REST web site architecture that allows for automated requests. The implementation of the database, as well as its associated tools and methods, is evolving to cope with the influx of genome sequences produced by Next Generation Sequencing (NGS).",olevures,0.587894917,NA,0,olevures,0.587894917,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,6/30/2011 +22102592,http://oligogenome.stanford.edu,"The Human OligoGenome Resource: a database of oligonucleotide capture probes for resequencing target regions across the human genome. Recent exponential growth in the throughput of next-generation DNA sequencing platforms has dramatically spurred the use of accessible and scalable targeted resequencing approaches. This includes candidate region diagnostic resequencing and novel variant validation from whole genome or exome sequencing analysis. We have previously demonstrated that selective genomic circularization is a robust in-solution approach for capturing and resequencing thousands of target human genome loci such as exons and regulatory sequences. To facilitate the design and production of customized capture assays for any given region in the human genome, we developed the Human OligoGenome Resource (http://oligogenome.stanford.edu/). This online database contains over 21 million capture oligonucleotide sequences. It enables one to create customized and highly multiplexed resequencing assays of target regions across the human genome and is not restricted to coding regions. In total, this resource provides 92.1% in silico coverage of the human genome. The online server allows researchers to download a complete repository of oligonucleotide probes and design customized capture assays to target multiple regions throughout the human genome. The website has query tools for selecting and evaluating capture oligonucleotides from specified genomic regions.",OligoGenome,0.700042248,Human OligoGenome Resource,0.658058562,OligoGenome,0.700042248,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/18/2011 +29688352,http://www.mccordresearch.com.au,"OliveNet™: a comprehensive library of compounds from Olea europaea. . Accumulated epidemiological, clinical and experimental evidence has indicated the beneficial health effects of the Mediterranean diet, which is typified by the consumption of virgin olive oil (VOO) as a main source of dietary fat. At the cellular level, compounds derived from various olive (Olea europaea), matrices, have demonstrated potent antioxidant and anti-inflammatory effects, which are thought to account, at least in part, for their biological effects. Research efforts are expanding into the characterization of compounds derived from Olea europaea, however, the considerable diversity and complexity of the vast array of chemical compounds have made their precise identification and quantification challenging. As such, only a relatively small subset of olive-derived compounds has been explored for their biological activity and potential health effects to date. Although there is adequate information describing the identification or isolation of olive-derived compounds, these are not easily searchable, especially when attempting to acquire chemical or biological properties. Therefore, we have created the OliveNet™ database containing a comprehensive catalogue of compounds identified from matrices of the olive, including the fruit, leaf and VOO, as well as in the wastewater and pomace accrued during oil production. From a total of 752 compounds, chemical analysis was sufficient for 676 individual compounds, which have been included in the database. The database is curated and comprehensively referenced containing information for the 676 compounds, which are divided into 13 main classes and 47 subclasses. Importantly, with respect to current research trends, the database includes 222 olive phenolics, which are divided into 13 subclasses. To our knowledge, OliveNet™ is currently the only curated open access database with a comprehensive collection of compounds associated with Olea europaea.Database URL: https://www.mccordresearch.com.au.",OliveNetâ,0.963371992,NA,0,OliveNetâ,0.963371992,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2018 +"25399418, 29106550, 33174605",http://omabrowser.org,"The OMA orthology database in 2015: function predictions, better plant support, synteny view and other improvements. The Orthologous Matrix (OMA) project is a method and associated database inferring evolutionary relationships amongst currently 1706 complete proteomes (i.e. the protein sequence associated for every protein-coding gene in all genomes). In this update article, we present six major new developments in OMA: (i) a new web interface; (ii) Gene Ontology function predictions as part of the OMA pipeline; (iii) better support for plant genomes and in particular homeologs in the wheat genome; (iv) a new synteny viewer providing the genomic context of orthologs; (v) statically computed hierarchical orthologous groups subsets downloadable in OrthoXML format; and (vi) possibility to export parts of the all-against-all computations and to combine them with custom data for 'client-side' orthology prediction. OMA can be accessed through the OMA Browser and various programmatic interfaces at http://omabrowser.org.",OMA,0.997215748,Orthologous Matrix,0.856745347,OMA,0.997215748,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +28182744,http://omdb.diracmaterials.org,"Organic materials database: An open-access online database for data mining. We present an organic materials database (OMDB) hosting thousands of Kohn-Sham electronic band structures, which is freely accessible online at http://omdb.diracmaterials.org. The OMDB focus lies on electronic structure, density of states and other properties for purely organic and organometallic compounds that are known to date. The electronic band structures are calculated using density functional theory for the crystal structures contained in the Crystallography Open Database. The OMDB web interface allows users to retrieve materials with specified target properties using non-trivial queries about their electronic structure. We illustrate the use of the OMDB and how it can become an organic part of search and prediction of novel functional materials via data mining techniques. As a specific example, we provide data mining results for metals and semiconductors, which are known to be rare in the class of organic materials.",OMDB,0.992427528,Organic materials database,0.874303088,OMDB,0.992427528,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/9/2017 +32556221,http://occupationalcohorts.net,"The OMEGA-NET International Inventory of Occupational Cohorts. In a recent count of cohort studies in Europe capturing information on occupation and/or occupational exposures, we estimated that there are more than 60 major studies with some type of occupational information that enrolled over 30 million persons. With few exceptions there have been no large-scale analyses systematically combining cohorts from this extraordinary resource. We present the development of an inventory of cohorts with occupational information in Europe and internationally and describe the online interactive tool with detailed information on existing cohorts. The OMEGA-NET inventory can be accessed at http://occupationalcohorts.net/ includes cohorts, case-control studies nested within cohorts and intervention studies that are active or can substantiate that their data are potentially accessible; that include data on occupation and/or industry or at least one occupational exposure; and that have at least one follow-up, either already conducted or planned. We expect that this open access inventory will be an important prerequisite for use of this resource of existing studies for research and policy development.",OMEGA-NET,0.988661706,NA,0,OMEGA-NET,0.988661706,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2020 +25024350,http://omictools.com,"OMICtools: an informative directory for multi-omic data analysis. . Recent advances in 'omic' technologies have created unprecedented opportunities for biological research, but current software and database resources are extremely fragmented. OMICtools is a manually curated metadatabase that provides an overview of more than 4400 web-accessible tools related to genomics, transcriptomics, proteomics and metabolomics. All tools have been classified by omic technologies (next-generation sequencing, microarray, mass spectrometry and nuclear magnetic resonance) associated with published evaluations of tool performance. Information about each tool is derived either from a diverse set of developers, the scientific literature or from spontaneous submissions. OMICtools is expected to serve as a useful didactic resource not only for bioinformaticians but also for experimental researchers and clinicians. Database URL: http://omictools.com/.",OMICtools,0.998182535,NA,0,OMICtools,0.998182535,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/14/2014 +25428349,http://omim.org,"OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders. Online Mendelian Inheritance in Man, OMIM(®), is a comprehensive, authoritative and timely research resource of curated descriptions of human genes and phenotypes and the relationships between them. The new official website for OMIM, OMIM.org (http://omim.org), was launched in January 2011. OMIM is based on the published peer-reviewed biomedical literature and is used by overlapping and diverse communities of clinicians, molecular biologists and genome scientists, as well as by students and teachers of these disciplines. Genes and phenotypes are described in separate entries and are given unique, stable six-digit identifiers (MIM numbers). OMIM entries have a structured free-text format that provides the flexibility necessary to describe the complex and nuanced relationships between genes and genetic phenotypes in an efficient manner. OMIM also has a derivative table of genes and genetic phenotypes, the Morbid Map. OMIM.org has enhanced search capabilities such as genome coordinate searching and thesaurus-enhanced search term options. Phenotypic series have been created to facilitate viewing genetic heterogeneity of phenotypes. Clinical synopsis features are enhanced with UMLS, Human Phenotype Ontology and Elements of Morphology terms and image links. All OMIM data are available for FTP download and through an API. MIMmatch is a novel outreach feature to disseminate updates and encourage collaboration.",OMIM,0.977684259,Online Mendelian Inheritance in Man,0.973285995,OMIM,0.977684259,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/26/2014 +33749993,http://omnipathdb.org,"Integrated intra- and intercellular signaling knowledge for multicellular omics analysis. Molecular knowledge of biological processes is a cornerstone in omics data analysis. Applied to single-cell data, such analyses provide mechanistic insights into individual cells and their interactions. However, knowledge of intercellular communication is scarce, scattered across resources, and not linked to intracellular processes. To address this gap, we combined over 100 resources covering interactions and roles of proteins in inter- and intracellular signaling, as well as transcriptional and post-transcriptional regulation. We added protein complex information and annotations on function, localization, and role in diseases for each protein. The resource is available for human, and via homology translation for mouse and rat. The data are accessible via OmniPath's web service (https://omnipathdb.org/), a Cytoscape plug-in, and packages in R/Bioconductor and Python, providing access options for computational and experimental scientists. We created workflows with tutorials to facilitate the analysis of cell-cell interactions and affected downstream intracellular signaling processes. OmniPath provides a single access point to knowledge spanning intra- and intercellular processes for data analysis, as we demonstrate in applications studying SARS-CoV-2 infection and ulcerative colitis.",OmniPath,0.994131088,NA,0,OmniPath,0.994131088,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +27515825,http://www.scfbio-iitd.res.in/software/onco/NavSite/index.htm,"Onco-Regulon: an integrated database and software suite for site specific targeting of transcription factors of cancer genes. . Transcription factors (TFs) bind at multiple sites in the genome and regulate expression of many genes. Regulating TF binding in a gene specific manner remains a formidable challenge in drug discovery because the same binding motif may be present at multiple locations in the genome. Here, we present Onco-Regulon (http://www.scfbio-iitd.res.in/software/onco/NavSite/index.htm), an integrated database of regulatory motifs of cancer genes clubbed with Unique Sequence-Predictor (USP) a software suite that identifies unique sequences for each of these regulatory DNA motifs at the specified position in the genome. USP works by extending a given DNA motif, in 5'→3', 3' →5' or both directions by adding one nucleotide at each step, and calculates the frequency of each extended motif in the genome by Frequency Counter programme. This step is iterated till the frequency of the extended motif becomes unity in the genome. Thus, for each given motif, we get three possible unique sequences. Closest Sequence Finder program predicts off-target drug binding in the genome. Inclusion of DNA-Protein structural information further makes Onco-Regulon a highly informative repository for gene specific drug development. We believe that Onco-Regulon will help researchers to design drugs which will bind to an exclusive site in the genome with no off-target effects, theoretically.Database URL: http://www.scfbio-iitd.res.in/software/onco/NavSite/index.htm.",Onco-Regulon,0.983626167,NA,0,Onco-Regulon,0.983626167,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/10/2016 +30445567,http://www.oncobase.biols.ac.cn,"OncoBase: a platform for decoding regulatory somatic mutations in human cancers. Whole-exome and whole-genome sequencing have revealed millions of somatic mutations associated with different human cancers, and the vast majority of them are located outside of coding sequences, making it challenging to directly interpret their functional effects. With the rapid advances in high-throughput sequencing technologies, genome-scale long-range chromatin interactions were detected, and distal target genes of regulatory elements were determined using three-dimensional (3D) chromatin looping. Herein, we present OncoBase (http://www.oncobase.biols.ac.cn/), an integrated database for annotating 81 385 242 somatic mutations in 68 cancer types from more than 120 cancer projects by exploring their roles in distal interactions between target genes and regulatory elements. OncoBase integrates local chromatin signatures, 3D chromatin interactions in different cell types and reconstruction of enhancer-target networks using state-of-the-art algorithms. It employs informative visualization tools to display the integrated local and 3D chromatin signatures and effects of somatic mutations on regulatory elements. Enhancer-promoter interactions estimated from chromatin interactions are integrated into a network diffusion system that quantitatively prioritizes somatic mutations and target genes from a large pool. Thus, OncoBase is a useful resource for the functional annotation of regulatory noncoding regions and systematically benchmarking the regulatory effects of embedded noncoding somatic mutations in human carcinogenesis.",OncoBase,0.998138845,NA,0,OncoBase,0.998138845,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +24651967,http://bioinfo.au.tsinghua.edu.cn/oncomirdb/Contact,"OncomiRDB: a database for the experimentally verified oncogenic and tumor-suppressive microRNAs. Summary MicroRNAs (miRNAs), a class of small regulatory RNAs, play important roles in cancer initiation, progression and therapy. MiRNAs are found to regulate diverse cancer-related processes by targeting a large set of oncogenic and tumor-suppressive genes. To establish a high-confidence reference resource for studying the miRNA-regulated target genes and cellular processes in cancer, we manually curated 2259 entries of cancer-related miRNA regulations with direct experimental evidence from ∼9000 abstracts, covering more than 300 miRNAs and 829 target genes across 25 cancer tissues. A web-based portal named oncomiRDB, which provides both graphical and text-based interfaces, was developed for easily browsing and searching all the annotations. It should be a useful resource for both the computational analysis and experimental study on miRNA regulatory networks and functions in cancer. Availability and implementation http://bioinfo.au.tsinghua.edu.cn/oncomirdb/Contact jgu@tsinghua.edu.cn Supplementary information Supplementary data are available at Bioinformatics online.",oncomiRDB,0.994694889,NA,0,oncomiRDB,0.994694889,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/20/2014 +24428888,http://tdb.ccmb.res.in/OncomiRdbB/index.htm,"OncomiRdbB: a comprehensive database of microRNAs and their targets in breast cancer. Background Given the estimate that 30% of our genes are controlled by microRNAs, it is essential that we understand the precise relationship between microRNAs and their targets. OncomiRs are microRNAs (miRNAs) that have been frequently shown to be deregulated in cancer. However, although several oncomiRs have been identified and characterized, there is as yet no comprehensive compilation of this data which has rendered it underutilized by cancer biologists. There is therefore an unmet need in generating bioinformatic platforms to speed the identification of novel therapeutic targets. Description We describe here OncomiRdbB, a comprehensive database of oncomiRs mined from different existing databases for mouse and humans along with novel oncomiRs that we have validated in human breast cancer samples. The database also lists their respective predicted targets, identified using miRanda, along with their IDs, sequences, chromosome location and detailed description. This database facilitates querying by search strings including microRNA name, sequence, accession number, target genes and organisms. The microRNA networks and their hubs with respective targets at 3'UTR, 5'UTR and exons of different pathway genes were also deciphered using the 'R' algorithm. Conclusion OncomiRdbB is a comprehensive and integrated database of oncomiRs and their targets in breast cancer with multiple query options which will help enhance both understanding of the biology of breast cancer and the development of new and innovative microRNA based diagnostic tools and targets of therapeutic significance. OncomiRdbB is freely available for download through the URL link http://tdb.ccmb.res.in/OncomiRdbB/index.htm.",OncomiRdbB,0.993847489,NA,0,OncomiRdbB,0.993847489,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/15/2014 +29657279,http://rna.sysu.edu.cn/onconcrna,"oncoNcRNA: A Web Portal for Exploring the Non-Coding RNAs with Oncogenic Potentials in Human Cancers. . Non-coding RNAs (ncRNAs) have been shown to contribute to tumorigenesis and progression. However, the functions of the majority of ncRNAs remain unclear. Through integrating published large-scale somatic copy number alterations (SCNAs) data from various human cancer types, we have developed oncoNcRNA, a user-friendly web portal to explore ncRNAs with oncogenic potential in human cancers. The portal characterizes the SCNAs of over 58,000 long non-coding RNAs (lncRNAs), 34,000 piwi-interacting RNAs (piRNAs), 2700 microRNAs (miRNAs), 600 transfer RNAs (tRNAs) and 400 small nucleolar RNAs (snoRNAs) in 64 human cancer types. It enables researchers to rapidly and intuitively analyze the oncogenic potential of ncRNAs of interest. Indeed, we have discovered a large number of ncRNAs which are frequently amplified or deleted within and across tumor types. Moreover, we built a web-based tool, Correlations, to explore the relationships between gene expression and copy number from ~10,000 tumor samples in 36 cancer types identified by The Cancer Genome Atlas (TCGA). oncoNcRNA is a valuable tool for investigating the function and clinical relevance of ncRNAs in human cancers. oncoNcRNA is freely available at http://rna.sysu.edu.cn/onconcrna/.",oncoNcRNA,0.992065489,NA,0,oncoNcRNA,0.992065489,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/7/2017 +29186335,http://oncoppi.emory.edu,"The OncoPPi Portal: an integrative resource to explore and prioritize protein-protein interactions for cancer target discovery. Motivation:As cancer genomics initiatives move toward comprehensive identification of genetic alterations in cancer, attention is now turning to understanding how interactions among these genes lead to the acquisition of tumor hallmarks. Emerging pharmacological and clinical data suggest a highly promising role of cancer-specific protein-protein interactions (PPIs) as druggable cancer targets. However, large-scale experimental identification of cancer-related PPIs remains challenging, and currently available resources to explore oncogenic PPI networks are limited. Results:Recently, we have developed a PPI high-throughput screening platform to detect PPIs between cancer-associated proteins in the context of cancer cells. Here, we present the OncoPPi Portal, an interactive web resource that allows investigators to access, manipulate and interpret a high-quality cancer-focused network of PPIs experimentally detected in cancer cell lines. To facilitate prioritization of PPIs for further biological studies, this resource combines network connectivity analysis, mutual exclusivity analysis of genomic alterations, cellular co-localization of interacting proteins and domain-domain interactions. Estimates of PPI essentiality allow users to evaluate the functional impact of PPI disruption on cancer cell proliferation. Furthermore, connecting the OncoPPi network with the approved drugs and compounds in clinical trials enables discovery of new tumor dependencies to inform strategies to interrogate undruggable targets like tumor suppressors. The OncoPPi Portal serves as a resource for the cancer research community to facilitate discovery of cancer targets and therapeutic development. Availability and implementation:The OncoPPi Portal is available at http://oncoppi.emory.edu. Contact:andrey.ivanov@emory.edu or hfu@emory.edu.",OncoPPi,0.987840295,NA,0,OncoPPi,0.987840295,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2018 +32597311,http://bioinformatics.zju.edu.cn/OncotRF,"OncotRF: an online resource for exploration of tRNA-derived fragments in human cancers. Transfer RNA-derived fragments (tRFs) are a new class of small non-coding RNAs whose biological roles in cancers are not well understood. Emerging evidence suggests that tRFs are involved in gene regulation at multiple levels. In this study, we constructed an integrative database, OncotRF (http://bioinformatics.zju.edu.cn/OncotRF), for in silico exploration of tRF functions, and identification of diagnostic and prognostic biomarkers in cancers. The database contains an analysis pipeline for tRF identification and characterization, analysis results of 11,211 small RNA sequencing samples and 8,776 RNA sequencing samples, and clinicopathologic annotation data from The Cancer Genome Atlas (TCGA). The results include: tRF identification and quantification across 33 cancers, abnormally expressed tRFs and genes, tRF-gene correlations, tRF-gene networks, survival analyses, and tRF-related functional enrichment analyses. Users are also able to identify differentially expressed tRFs, predict their functions, and assess the relevance of the tRF expression levels to the clinical outcome according to user-defined groups. Additionally, an online Kaplan-Meier plotter is available in OncotRF for plotting survival curves according to user-defined groups. OncotRF will be a valuable online database and functional annotation tool for researchers studying the roles, functions, and mechanisms of tRFs in human cancers.",OncotRF,0.998194695,NA,0,OncotRF,0.998194695,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/28/2020 +31195415,http://www.prostata-ca.net,"[The Onkonet database: taking stock of an Internet-based, multi-centre database on surgical prostate cancer treatment]. Background  The Onkonet database has been developed and coordinated by the Berliner Tumorzentrum e. V. (http://www.prostata-ca.net) and contains data on pre-, peri- and postoperative parameters of radical prostatectomy documented since January 2005. With its user-friendly interface and its integrated benchmarking tool, the main goal of Onkonet was to outline and improve the surgical care of prostate cancer patients in Germany. This study aimed to analyse all Onkonet data documented from the beginning of the project until June 2018. We focused on the completeness and plausibility of data to investigate and define the possibilities and limits of further analyses. Patients and methods  All patients who underwent radical prostatectomy in one of the urological clinics participating in this project until June 2018 were included in this retrospective study. The completeness of all documented patient data was analysed using Excel 2013. The statistical analysis was descriptive. Results  A total of 21 474 patients were documented in Onkonet. 58,6 % (12 591) of them had a complete dataset including date of birth, date of surgery, dates of hospitalisation and discharge, initial PSA value, Gleason score of the biopsy, clinical T stage, pathological T stage, pathological Gleason score, as well as information on the surgical technique. Mean completeness of pre-operative parameters was 26,8 %, of hospitalisation parameters 64,5 %, and of pathological parameters 58,1 %. Amongst these, the documentation of the pathological T stage was complete in 80,1 %, documentation of N stage in 78,8 %, of M stage in 74,8 %, of pathological Gleason Score in 78,7 %, and of R1 status in 78,7 %. Completeness of follow-up data was 8,1 %, with PSA data being available in 27,2 %, continence data in 23,0 %, and potency data in 13,9 %. Conclusions  Comprising 21 474 documented patients and over 200 parameters, Onkonet is one of the most comprehensive clinical registers for the documentation of prostate cancer patients in Germany. The data analysis showed that the limitations of such a database are mainly due to the high number of parameters and the high susceptibility to errors due to manual data submission.",Onkonet,0.994386017,NA,0,Onkonet,0.994386017,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/13/2019 +26637529,http://www.onrldb.org,"ONRLDB--manually curated database of experimentally validated ligands for orphan nuclear receptors: insights into new drug discovery. . Orphan nuclear receptors are potential therapeutic targets. The Orphan Nuclear Receptor Ligand Binding Database (ONRLDB) is an interactive, comprehensive and manually curated database of small molecule ligands targeting orphan nuclear receptors. Currently, ONRLDB consists of ∼11,000 ligands, of which ∼6500 are unique. All entries include information for the ligand, such as EC50 and IC50, number of aromatic rings and rotatable bonds, XlogP, hydrogen donor and acceptor count, molecular weight (MW) and structure. ONRLDB is a cross-platform database, where either the cognate small molecule modulators of a receptor or the cognate receptors to a ligand can be searched. The database can be searched using three methods: text search, advanced search or similarity search. Substructure search, cataloguing tools, and clustering tools can be used to perform advanced analysis of the ligand based on chemical similarity fingerprints, hierarchical clustering, binning partition and multidimensional scaling. These tools, together with the Tree function provided, deliver an interactive platform and a comprehensive resource for identification of common and unique scaffolds. As demonstrated, ONRLDB is designed to allow selection of ligands based on various properties and for designing novel ligands or to improve the existing ones. Database URL: http://www.onrldb.org/.",ONRLDB,0.997567415,Orphan Nuclear Receptor Ligand Binding Database,0.977192277,ONRLDB,0.997567415,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/4/2015 +24271386,http://bhapp.c2b2.columbia.edu/OnTheFly/index.php,"OnTheFly: a database of Drosophila melanogaster transcription factors and their binding sites. We present OnTheFly (http://bhapp.c2b2.columbia.edu/OnTheFly/index.php), a database comprising a systematic collection of transcription factors (TFs) of Drosophila melanogaster and their DNA-binding sites. TFs predicted in the Drosophila melanogaster genome are annotated and classified and their structures, obtained via experiment or homology models, are provided. All known preferred TF DNA-binding sites obtained from the B1H, DNase I and SELEX methodologies are presented. DNA shape parameters predicted for these sites are obtained from a high throughput server or from crystal structures of protein-DNA complexes where available. An important feature of the database is that all DNA-binding domains and their binding sites are fully annotated in a eukaryote using structural criteria and evolutionary homology. OnTheFly thus provides a comprehensive view of TFs and their binding sites that will be a valuable resource for deciphering non-coding regulatory DNA.",OnTheFly,0.997192144,NA,0,OnTheFly,0.997192144,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/22/2013 +26519402,http://www.plantontology.org,"The Plant Ontology: A Tool for Plant Genomics. The use of controlled, structured vocabularies (ontologies) has become a critical tool for scientists in the post-genomic era of massive datasets. Adoption and integration of common vocabularies and annotation practices enables cross-species comparative analyses and increases data sharing and reusability. The Plant Ontology (PO; http://www.plantontology.org/ ) describes plant anatomy, morphology, and the stages of plant development, and offers a database of plant genomics annotations associated to the PO terms. The scope of the PO has grown from its original design covering only rice, maize, and Arabidopsis, and now includes terms to describe all green plants from angiosperms to green algae.This chapter introduces how the PO and other related ontologies are constructed and organized, including languages and software used for ontology development, and provides an overview of the key features. Detailed instructions illustrate how to search and browse the PO database and access the associated annotation data. Users are encouraged to provide input on the ontology through the online term request form and contribute datasets for integration in the PO database.",NA,0,Ontology,0.690171361,Ontology,0.690171361,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2016 +31500643,http://www.lovd.nl/OPA1,"OPA1: 516 unique variants and 831 patients registered in an updated centralized Variome database. BACKGROUND:The dysfunction of OPA1, a dynamin GTPase involved in mitochondrial fusion, is responsible for a large spectrum of neurological disorders, each of which includes optic neuropathy. The database dedicated to OPA1 ( https://www.lovd.nl/OPA1 ), created in 2005, has now evolved towards a centralized and more reliable database using the Global Variome shared Leiden Open-source Variation Database (LOVD) installation. RESULTS:The updated OPA1 database, which registers all the patients from our center as well as those reported in the literature, now covers a total of 831 patients: 697 with isolated dominant optic atrophy (DOA), 47 with DOA ""plus"", and 83 with asymptomatic or unclassified DOA. It comprises 516 unique OPA1 variants, of which more than 80% (414) are considered pathogenic. Full clinical data for 118 patients are documented using the Human Phenotype Ontology, a standard vocabulary for referencing phenotypic abnormalities. Contributors may now make online submissions of phenotypes related to OPA1 mutations, giving clinical and molecular descriptions together with detailed ophthalmological and neurological data, according to an international thesaurus. CONCLUSIONS:The evolution of the OPA1 database towards the LOVD, using unified nomenclature, should ensure its interoperability with other databases and prove useful for molecular diagnoses based on gene-panel sequencing, large-scale mutation statistics, and genotype-phenotype correlations.",OPA1,0.989455819,NA,0,OPA1,0.989455819,1,NA,25243597,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,9/10/2019 +25243597,http://opa1.mitodyn.org,"Improved locus-specific database for OPA1 mutations allows inclusion of advanced clinical data. Autosomal-dominant optic atrophy (ADOA) is the most common inherited optic neuropathy, due to mutations in the optic atrophy 1 gene (OPA1) in about 60%-80% of cases. At present, the clinical heterogeneity of patients carrying OPA1 variants renders genotype-phenotype correlations difficulty. Since 2005, when we published the first locus-specific database (LSDB) dedicated to OPA1, a large amount of new clinical and genetic knowledge has emerged, prompting us to update this database. We have used the Leiden Open-Source Variation Database to develop a clinico-biological database, aiming to add clinical phenotypes related to OPA1 variants. As a first step, we validated this new database by registering several patients previously reported in the literature, as well as new patients from our own institution. Contributors may now make online submissions of clinical and molecular descriptions of phenotypes due to OPA1 variants, including detailed ophthalmological and neurological data, with due respect to patient anonymity. The updated OPA1 LSDB (http://opa1.mitodyn.org/) should prove useful for molecular diagnoses, large-scale variant statistics, and genotype-phenotype correlations in ADOA studies.",OPA1,0.913838148,NA,0,OPA1,0.913838148,1,NA,31500643,low_prob_best_name,remove,NA,"merge all ""dup name"" IDs",FALSE POS: PARTIAL NAME,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/1/2014 +31831730,http://www.odonatephenotypicdatabase.org,"The odonate phenotypic database, a new open data resource for comparative studies of an old insect order. We present The Odonate Phenotypic Database (OPD): an online data resource of dragonfly and damselfly phenotypes (Insecta: Odonata). Odonata is a relatively small insect order that currently consists of about 6400 species belonging to 32 families. The database consists of multiple morphological, life-history and behavioral traits, and biogeographical information collected from literature sources. We see taxon-specific phenotypic databases from Odonata and other organismal groups as becoming an increasing valuable resource in comparative studies. Our database has phenotypic records for 1011 of all 6400 known odonate species. The database is accessible at http://www.odonatephenotypicdatabase.org/, and a static version with an information file about the variables in the database is archived at Dryad.",OPD,0.9961472,Odonate Phenotypic Database,0.971511943,OPD,0.9961472,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/12/2019 +33361798,http://octad.org,"OCTAD: an open workspace for virtually screening therapeutics targeting precise cancer patient groups using gene expression features. As the field of precision medicine progresses, treatments for patients with cancer are starting to be tailored to their molecular as well as their clinical features. The emerging cancer subtypes defined by these molecular features require that dedicated resources be used to assist the discovery of drug candidates for preclinical evaluation. Voluminous gene expression profiles of patients with cancer have been accumulated in public databases, enabling the creation of cancer-specific expression signatures. Meanwhile, large-scale gene expression profiles of cellular responses to chemical compounds have also recently became available. By matching the cancer-specific expression signature to compound-induced gene expression profiles from large drug libraries, researchers can prioritize small molecules that present high potency to reverse expression of signature genes for further experimental testing of their efficacy. This approach has proven to be an efficient and cost-effective way to identify efficacious drug candidates. However, the success of this approach requires multiscale procedures, imposing considerable challenges to many labs. To address this, we developed Open Cancer TherApeutic Discovery (OCTAD; http://octad.org ): an open workspace for virtually screening compounds targeting precise groups of patients with cancer using gene expression features. Its database includes 19,127 patient tissue samples covering more than 50 cancer types and expression profiles for 12,442 distinct compounds. The program is used to perform deep-learning-based reference tissue selection, disease gene expression signature creation, drug reversal potency scoring and in silico validation. OCTAD is available as a web portal and a standalone R package to allow experimental and computational scientists to easily navigate the tool.",OCTAD,0.955712438,Open Cancer TherApeutic Discovery,0.961413613,Open Cancer TherApeutic Discovery,0.961413613,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/23/2020 +33045747,http://genetics.opentargets.org,"Open Targets Genetics: systematic identification of trait-associated genes using large-scale genetics and functional genomics. Open Targets Genetics (https://genetics.opentargets.org) is an open-access integrative resource that aggregates human GWAS and functional genomics data including gene expression, protein abundance, chromatin interaction and conformation data from a wide range of cell types and tissues to make robust connections between GWAS-associated loci, variants and likely causal genes. This enables systematic identification and prioritisation of likely causal variants and genes across all published trait-associated loci. In this paper, we describe the public resources we aggregate, the technology and analyses we use, and the functionality that the portal offers. Open Targets Genetics can be searched by variant, gene or study/phenotype. It offers tools that enable users to prioritise causal variants and genes at disease-associated loci and access systematic cross-disease and disease-molecular trait colocalization analysis across 92 cell types and tissues including the eQTL Catalogue. Data visualizations such as Manhattan-like plots, regional plots, credible sets overlap between studies and PheWAS plots enable users to explore GWAS signals in depth. The integrated data is made available through the web portal, for bulk download and via a GraphQL API, and the software is open source. Applications of this integrated data include identification of novel targets for drug discovery and drug repurposing.",Open Targets Genetics,0.843843058,Targets,0.592151999,Open Targets Genetics,0.843843058,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +32780568,http://openprot.org,"How to Illuminate the Dark Proteome Using the Multi-omic OpenProt Resource. Ten of thousands of open reading frames (ORFs) are hidden within genomes. These alternative ORFs, or small ORFs, have eluded annotations because they are either small or within unsuspected locations. They are found in untranslated regions or overlap a known coding sequence in messenger RNA and anywhere in a ""non-coding"" RNA. Serendipitous discoveries have highlighted these ORFs' importance in biological functions and pathways. With their discovery came the need for deeper ORF annotation and large-scale mining of public repositories to gather supporting experimental evidence. OpenProt, accessible at https://openprot.org/, is the first proteogenomic resource enforcing a polycistronic model of annotation across an exhaustive transcriptome for 10 species. Moreover, OpenProt reports experimental evidence cumulated across a re-analysis of 114 mass spectrometry and 87 ribosome profiling datasets. The multi-omics OpenProt resource also includes the identification of predicted functional domains and evaluation of conservation for all predicted ORFs. The OpenProt web server provides two query interfaces and one genome browser. The query interfaces allow for exploration of the coding potential of genes or transcripts of interest as well as custom downloads of all information contained in OpenProt. © 2020 The Authors. Basic Protocol 1: Using the Search interface Basic Protocol 2: Using the Downloads interface.",OpenProt,0.995658994,NA,0,OpenProt,0.995658994,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2020 +26496950,http://opentein.hgc.jp,"OpenTein: a database of digital whole-slide images of stem cell-derived teratomas. Human stem cells are promising sources for regenerative therapy. To ensure safety of future therapeutic applications, the differentiation potency of stem cells has to be tested and be widely opened to the public. The potency is generally assessed by teratoma formation comprising differentiated cells from all three germ layers, and the teratomas can be inspected through high-quality digital images. The teratoma assay, however, lacks consistency in transplantation protocols and even in interpretation, which needs community-based efforts for improving the assay quality. Here, we have developed a novel database OpenTein (Open Teratoma Investigation, http://opentein.hgc.jp/) to archive and freely distribute high-resolution whole-slide images and relevant records. OpenTein has been designed as a searchable, zoomable and annotatable web-based repository system. We have deposited 468 images of teratomas derived by our transplantation of human stem cells, and users can freely access and process such digital teratoma images. Approximately, the current version of OpenTein responds within 11.2 min for processing 2.03 gigapixel teratoma images. Our system offers valuable tools and resources in the new era of stem cell biology.",OpenTein,0.988018394,Teratoma Investigation,0.906912729,OpenTein,0.988018394,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2015 +21890895,"http://opm.phar.umich.edu, http://opm.phar.umich.edu/server.php","OPM database and PPM web server: resources for positioning of proteins in membranes. The Orientations of Proteins in Membranes (OPM) database is a curated web resource that provides spatial positions of membrane-bound peptides and proteins of known three-dimensional structure in the lipid bilayer, together with their structural classification, topology and intracellular localization. OPM currently contains more than 1200 transmembrane and peripheral proteins and peptides from approximately 350 organisms that represent approximately 3800 Protein Data Bank entries. Proteins are classified into classes, superfamilies and families and assigned to 21 distinct membrane types. Spatial positions of proteins with respect to the lipid bilayer are optimized by the PPM 2.0 method that accounts for the hydrophobic, hydrogen bonding and electrostatic interactions of the proteins with the anisotropic water-lipid environment described by the dielectric constant and hydrogen-bonding profiles. The OPM database is freely accessible at http://opm.phar.umich.edu. Data can be sorted, searched or retrieved using the hierarchical classification, source organism, localization in different types of membranes. The database offers downloadable coordinates of proteins and peptides with membrane boundaries. A gallery of protein images and several visualization tools are provided. The database is supplemented by the PPM server (http://opm.phar.umich.edu/server.php) which can be used for calculating spatial positions in membranes of newly determined proteins structures or theoretical models.",OPM,0.993578792,Orientations of Proteins in Membranes,0.970261431,OPM,0.993578792,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/2/2011 +30760842,http://ssr.icar.gov.in/index.php,"Development and validation of whole genome-wide and genic microsatellite markers in oil palm (Elaeis guineensis Jacq.): First microsatellite database (OpSatdb). The availability of large expressed sequence tag (EST) and whole genome databases of oil palm enabled the development of a data base of microsatellite markers. For this purpose, an EST database consisting of 40,979 EST sequences spanning 27 Mb and a chromosome-wise whole genome databases were downloaded. A total of 3,950 primer pairs were identified and developed from EST sequences. The tri and tetra nucleotide repeat motifs were most prevalent (each 24.75%) followed by di-nucleotide repeat motifs. Whole genome-wide analysis found a total of 245,654 SSR repeats across the 16 chromosomes of oil palm, of which 38,717 were compound microsatellite repeats. A web application, OpSatdb, the first microsatellite database of oil palm, was developed using the PHP and MySQL database ( https://ssr.icar.gov.in/index.php ). It is a simple and systematic web-based search engine for searching SSRs based on repeat motif type, repeat type, and primer details. High synteny was observed between oil palm and rice genomes. The mapping of ESTs having SSRs by Blast2GO resulted in the identification of 19.2% sequences with gene ontology (GO) annotations. Randomly, a set of ten genic SSRs and five genomic SSRs were used for validation and genetic diversity on 100 genotypes belonging to the world oil palm genetic resources. The grouping pattern was observed to be broadly in accordance with the geographical origin of the genotypes. The identified genic and genome-wide SSRs can be effectively useful for various genomic applications of oil palm, such as genetic diversity, linkage map construction, mapping of QTLs, marker-assisted selection, and comparative population studies.",OpSatdb,0.978444099,NA,0,OpSatdb,0.978444099,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/13/2019 +23272737,http://www.optimas-bioenergy.org/optimas_dw,"OPTIMAS-DW: a comprehensive transcriptomics, metabolomics, ionomics, proteomics and phenomics data resource for maize. Background Maize is a major crop plant, grown for human and animal nutrition, as well as a renewable resource for bioenergy. When looking at the problems of limited fossil fuels, the growth of the world's population or the world's climate change, it is important to find ways to increase the yield and biomass of maize and to study how it reacts to specific abiotic and biotic stress situations. Within the OPTIMAS systems biology project maize plants were grown under a large set of controlled stress conditions, phenotypically characterised and plant material was harvested to analyse the effect of specific environmental conditions or developmental stages. Transcriptomic, metabolomic, ionomic and proteomic parameters were measured from the same plant material allowing the comparison of results across different omics domains. A data warehouse was developed to store experimental data as well as analysis results of the performed experiments. Description The OPTIMAS Data Warehouse (OPTIMAS-DW) is a comprehensive data collection for maize and integrates data from different data domains such as transcriptomics, metabolomics, ionomics, proteomics and phenomics. Within the OPTIMAS project, a 44K oligo chip was designed and annotated to describe the functions of the selected unigenes. Several treatment- and plant growth stage experiments were performed and measured data were filled into data templates and imported into the data warehouse by a Java based import tool. A web interface allows users to browse through all stored experiment data in OPTIMAS-DW including all data domains. Furthermore, the user can filter the data to extract information of particular interest. All data can be exported into different file formats for further data analysis and visualisation. The data analysis integrates data from different data domains and enables the user to find answers to different systems biology questions. Finally, maize specific pathway information is provided. Conclusions With OPTIMAS-DW a data warehouse for maize was established, which is able to handle different data domains, comprises several analysis results that will support researchers within their work and supports systems biological research in particular. The system is available at http://www.optimas-bioenergy.org/optimas_dw.",OPTIMAS-DW,0.971398151,NA,0,OPTIMAS-DW,0.971398151,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/29/2012 +29913065,http://www.optobase.org,"OptoBase: A Web Platform for Molecular Optogenetics. OptoBase is an online platform for molecular optogenetics. At its core is a hand-annotated and ontology-supported database that aims to cover all existing optogenetic switches and publications, which is further complemented with a collection of convenient optogenetics-related web tools. OptoBase is meant both for expert optogeneticists to easily keep track of the field, as well as for all researchers who find optogenetics inviting as a powerful tool to address their biological questions of interest. It is available at https://www.optobase.org . This work also presents OptoBase-based analysis of the trends in molecular optogenetics.",OptoBase,0.9975577,NA,0,OptoBase,0.9975577,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/3/2018 +21735248,http://www.utwente.nl/choir/orchestra,"ORchestra: an online reference database of OR/MS literature in health care. We introduce the categorized reference database ORchestra, which is available online at http://www.utwente.nl/choir/orchestra/.",ORchestra,0.99726969,NA,0,ORchestra,0.99726969,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/7/2011 +21245031,http://lab.fhes.tn.edu.tw/est,"OrchidBase: a collection of sequences of the transcriptome derived from orchids. Orchids are one of the most ecological and evolutionarily significant plants, and the Orchidaceae is one of the most abundant families of the angiosperms. Genetic databases will be useful not only for gene discovery but also for future genomic annotation. For this purpose, OrchidBase was established from 37,979,342 sequence reads collected from 11 in-house Phalaenopsis orchid cDNA libraries. Among them, 41,310 expressed sequence tags (ESTs) were obtained by using Sanger sequencing, whereas 37,908,032 reads were obtained by using next-generation sequencing (NGS) including both Roche 454 and Solexa Illumina sequencers. These reads were assembled into 8,501 contigs and 76,116 singletons, resulting in 84,617 non-redundant transcribed sequences with an average length of 459 bp. The analysis pipeline of the database is an automated system written in Perl and C#, and consists of the following components: automatic pre-processing of EST reads, assembly of raw sequences, annotation of the assembled sequences and storage of the analyzed information in SQL databases. A web application was implemented with HTML and a Microsoft .NET Framework C# program for browsing and querying the database, creating dynamic web pages on the client side, analyzing gene ontology (GO) and mapping annotated enzymes to KEGG pathways. The online resources for putative annotation can be searched either by text or by using BLAST, and the results can be explored on the website and downloaded. Consequently, the establishment of OrchidBase will provide researchers with a high-quality genetic resource for data mining and facilitate efficient experimental studies on orchid biology and biotechnology. The OrchidBase database is freely available at http://lab.fhes.tn.edu.tw/est.",OrchidBase,0.997832179,NA,0,OrchidBase,0.997832179,1,NA,34384382,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/17/2011 +34384382,http://orchidbase.itps.ncku.edu.tw,"OrchidBase 4.0: a database for orchid genomics and molecular biology. Background The Orchid family is the largest families of the monocotyledons and an economically important ornamental plant worldwide. Given the pivotal role of this plant to humans, botanical researchers and breeding communities should have access to valuable genomic and transcriptomic information of this plant. Previously, we established OrchidBase, which contains expressed sequence tags (ESTs) from different tissues and developmental stages of Phalaenopsis as well as biotic and abiotic stress-treated Phalaenopsis. The database includes floral transcriptomic sequences from 10 orchid species across all the five subfamilies of Orchidaceae. Description Recently, the whole-genome sequences of Apostasia shenzhenica, Dendrobium catenatum, and Phalaenopsis equestris were de novo assembled and analyzed. These datasets were used to develop OrchidBase 4.0, including genomic and transcriptomic data for these three orchid species. OrchidBase 4.0 offers information for gene annotation, gene expression with fragments per kilobase of transcript per millions mapped reads (FPKM), KEGG pathways and BLAST search. In addition, assembled genome sequences and location of genes and miRNAs could be visualized by the genome browser. The online resources in OrchidBase 4.0 can be accessed by browsing or using BLAST. Users can also download the assembled scaffold sequences and the predicted gene and protein sequences of these three orchid species. Conclusions OrchidBase 4.0 is the first database that contain the whole-genome sequences and annotations of multiple orchid species. OrchidBase 4.0 is available at http://orchidbase.itps.ncku.edu.tw/.",OrchidBase,0.992340922,NA,0,OrchidBase,0.992340922,1,NA,21245031,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,8/12/2021 +23324169,http://orchidstra.abrc.sinica.edu.tw,"Orchidstra: an integrated orchid functional genomics database. A specialized orchid database, named Orchidstra (URL: http://orchidstra.abrc.sinica.edu.tw), has been constructed to collect, annotate and share genomic information for orchid functional genomics studies. The Orchidaceae is a large family of Angiosperms that exhibits extraordinary biodiversity in terms of both the number of species and their distribution worldwide. Orchids exhibit many unique biological features; however, investigation of these traits is currently constrained due to the limited availability of genomic information. Transcriptome information for five orchid species and one commercial hybrid has been included in the Orchidstra database. Altogether, these comprise >380,000 non-redundant orchid transcript sequences, of which >110,000 are protein-coding genes. Sequences from the transcriptome shotgun assembly (TSA) were obtained either from output reads from next-generation sequencing technologies assembled into contigs, or from conventional cDNA library approaches. An annotation pipeline using Gene Ontology, KEGG and Pfam was built to assign gene descriptions and functional annotation to protein-coding genes. Deep sequencing of small RNA was also performed for Phalaenopsis aphrodite to search for microRNAs (miRNAs), extending the information archived for this species to miRNA annotation, precursors and putative target genes. The P. aphrodite transcriptome information was further used to design probes for an oligonucleotide microarray, and expression profiling analysis was carried out. The intensities of hybridized probes derived from microarray assays of various tissues were incorporated into the database as part of the functional evidence. In the future, the content of the Orchidstra database will be expanded with transcriptome data and genomic information from more orchid species.",Orchidstra,0.991553485,NA,0,Orchidstra,0.991553485,1,NA,28111366,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/16/2013 +28111366,http://orchidstra2.abrc.sinica.edu.tw,"Orchidstra 2.0-A Transcriptomics Resource for the Orchid Family. Orchidaceae, the orchid family, encompasses more than 25,000 species and five subfamilies. Due to their beautiful and exotic flowers, distinct biological and ecological features, orchids have aroused wide interest among both researchers and the general public. We constructed the Orchidstra database, a resource for orchid transcriptome assembly and gene annotations. The Orchistra database has been under active development since 2013. To accommodate the increasing amount of orchid transcriptome data and house more comprehensive information, Orchidstra 2.0 has been built with a new database system to store the annotations of 510,947 protein-coding genes and 161,826 noncoding transcripts, covering 18 orchid species belonging to 12 genera in five subfamilies of Orchidaceae. We have improved the N50 size of protein-coding genes, provided new functional annotations (including protein-coding gene annotations, protein domain/family information, pathways analysis, Gene Ontology term assignments, orthologous genes across orchid species, cross-links to the database of model species, and miRNA information), and improved the user interface with better website performance. We also provide new database functionalities for database searching and sequence retrieval. Moreover, the Orchidstra 2.0 database incorporates detailed RNA-Seq gene expression data from various tissues and developmental stages in different orchid species. The database will be useful for gene prediction and gene family studies, and for exploring gene expression in orchid species. The Orchidstra 2.0 database is freely accessible at http://orchidstra2.abrc.sinica.edu.tw.",Orchidstra,0.983537138,NA,0,Orchidstra,0.983537138,1,NA,23324169,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2017 +32693783,http://order.jic.ac.uk,"The oilseed rape developmental expression resource: a resource for the investigation of gene expression dynamics during the floral transition in oilseed rape. Background Transcriptome time series can be used to track the expression of genes during development, allowing the timing, intensity, and dynamics of genetic programmes to be determined. Furthermore, time series analysis can reveal causal relationships between genes, leading to an understanding of how the regulatory networks are rewired during development. Due to its impact on yield, a developmental transition of agricultural interest in crops is the switch from vegetative to floral growth. We previously reported the collection of genome-wide gene expression data during the floral transition in the allopolyploid crop Brassica napus (oilseed rape, OSR). To provide the OSR research community with easy access to this dataset, we have developed the Oilseed Rape Developmental Expression Resource (ORDER; http://order.jic.ac.uk ). Results ORDER enables users to search for genes of interest and plot expression patterns during the floral transition in both a winter and a spring variety of OSR. We illustrate the utility of ORDER using two case studies: the first investigating the interaction between transcription factors, the second comparing genes that mediate the vernalisation response between OSR and radish (Raphanus sativus L.). All the data is downloadable and the generic website platform underlying ORDER, called AionPlot, is made freely and openly available to facilitate the dissemination of other time series datasets. Conclusions ORDER provides the OSR research community with access to a dataset focused on a period of OSR development important for yield. AionPlot, the platform on which ORDER is built, will allow researchers from all fields to share similar time series datasets.",ORDER,0.97873052,Oilseed Rape Developmental Expression Resource,0.972284428,ORDER,0.97873052,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/21/2020 +26578589,http://www.oreganno.org,"ORegAnno 3.0: a community-driven resource for curated regulatory annotation. The Open Regulatory Annotation database (ORegAnno) is a resource for curated regulatory annotation. It contains information about regulatory regions, transcription factor binding sites, RNA binding sites, regulatory variants, haplotypes, and other regulatory elements. ORegAnno differentiates itself from other regulatory resources by facilitating crowd-sourced interpretation and annotation of regulatory observations from the literature and highly curated resources. It contains a comprehensive annotation scheme that aims to describe both the elements and outcomes of regulatory events. Moreover, ORegAnno assembles these disparate data sources and annotations into a single, high quality catalogue of curated regulatory information. The current release is an update of the database previously featured in the NAR Database Issue, and now contains 1 948 307 records, across 18 species, with a combined coverage of 334 215 080 bp. Complete records, annotation, and other associated data are available for browsing and download at http://www.oreganno.org/.",ORegAnno,0.998606801,Open Regulatory Annotation database,0.859214735,ORegAnno,0.998606801,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +25313158,http://mips.helmholtz-muenchen.de/Organ_System_Heterogeneity,"Organ system heterogeneity DB: a database for the visualization of phenotypes at the organ system level. Perturbations of mammalian organisms including diseases, drug treatments and gene perturbations in mice affect organ systems differently. Some perturbations impair relatively few organ systems while others lead to highly heterogeneous or systemic effects. Organ System Heterogeneity DB (http://mips.helmholtz-muenchen.de/Organ_System_Heterogeneity/) provides information on the phenotypic effects of 4865 human diseases, 1667 drugs and 5361 genetically modified mouse models on 26 different organ systems. Disease symptoms, drug side effects and mouse phenotypes are mapped to the System Organ Class (SOC) level of the Medical Dictionary of Regulatory Activities (MedDRA). Then, the organ system heterogeneity value, a measurement of the systemic impact of a perturbation, is calculated from the relative frequency of phenotypic features across all SOCs. For perturbations of interest, the database displays the distribution of phenotypic effects across organ systems along with the heterogeneity value and the distance between organ system distributions. In this way, it allows, in an easy and comprehensible fashion, the comparison of the phenotypic organ system distributions of diseases, drugs and their corresponding genetically modified mouse models of associated disease genes and drug targets. The Organ System Heterogeneity DB is thus a platform for the visualization and comparison of organ system level phenotypic effects of drugs, diseases and genes.",NA,0,Organ System Heterogeneity,0.808443427,Organ System Heterogeneity,0.808443427,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/13/2014 +31724725,http://rnabiology.ircm.qc.ca/oRNAment,"oRNAment: a database of putative RNA binding protein target sites in the transcriptomes of model species. Protein-RNA interactions are essential for controlling most aspects of RNA metabolism, including synthesis, processing, trafficking, stability and degradation. In vitro selection methods, such as RNAcompete and RNA Bind-n-Seq, have defined the consensus target motifs of hundreds of RNA-binding proteins (RBPs). However, readily available information about the distribution features of these motifs across full transcriptomes was hitherto lacking. Here, we introduce oRNAment (o RNA motifs enrichment in transcriptomes), a database that catalogues the putative motif instances of 223 RBPs, encompassing 453 motifs, in a transcriptome-wide fashion. The database covers 525 718 complete coding and non-coding RNA species across the transcriptomes of human and four prominent model organisms: Caenorhabditis elegans, Danio rerio, Drosophila melanogaster and Mus musculus. The unique features of oRNAment include: (i) hosting of the most comprehensive mapping of RBP motif instances to date, with 421 133 612 putative binding sites described across five species; (ii) options for the user to filter the data according to a specific threshold; (iii) a user-friendly interface and efficient back-end allowing the rapid querying of the data through multiple angles (i.e. transcript, RBP, or sequence attributes) and (iv) generation of several interactive data visualization charts describing the results of user queries. oRNAment is freely available at http://rnabiology.ircm.qc.ca/oRNAment/.",oRNAment,0.992074907,NA,0,oRNAment,0.992074907,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +"23180791, 30395283",http://www.orthodb.org,"OrthoDB: a hierarchical catalog of animal, fungal and bacterial orthologs. The concept of orthology provides a foundation for formulating hypotheses on gene and genome evolution, and thus forms the cornerstone of comparative genomics, phylogenomics and metagenomics. We present the update of OrthoDB-the hierarchical catalog of orthologs (http://www.orthodb.org). From its conception, OrthoDB promoted delineation of orthologs at varying resolution by explicitly referring to the hierarchy of species radiations, now also adopted by other resources. The current release provides comprehensive coverage of animals and fungi representing 252 eukaryotic species, and is now extended to prokaryotes with the inclusion of 1115 bacteria. Functional annotations of orthologous groups are provided through mapping to InterPro, GO, OMIM and model organism phenotypes, with cross-references to major resources including UniProt, NCBI and FlyBase. Uniquely, OrthoDB provides computed evolutionary traits of orthologs, such as gene duplicability and loss profiles, divergence rates, sibling groups, and now extended with exon-intron architectures, syntenic orthologs and parent-child trees. The interactive web interface allows navigation along the species phylogenies, complex queries with various identifiers, annotation keywords and phrases, as well as with gene copy-number profiles and sequence homology searches. With the explosive growth of available data, OrthoDB also provides mapping of newly sequenced genomes and transcriptomes to the current orthologous groups.",OrthoDB,0.997942567,NA,0,OrthoDB,0.997942567,2,25428351,"25428351.0, 27899580.0, 33196836.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +"27899580, 33196836",http://orthodb.org,"OrthoDB v9.1: cataloging evolutionary and functional annotations for animal, fungal, plant, archaeal, bacterial and viral orthologs. OrthoDB is a comprehensive catalog of orthologs, genes inherited by extant species from a single gene in their last common ancestor. In 2016 OrthoDB reached its 9th release, growing to over 22 million genes from over 5000 species, now adding plants, archaea and viruses. In this update we focused on usability of this fast-growing wealth of data: updating the user and programmatic interfaces to browse and query the data, and further enhancing the already extensive integration of available gene functional annotations. Collating functional annotations from over 100 resources, and enabled us to propose descriptive titles for 87% of ortholog groups. Additionally, OrthoDB continues to provide computed evolutionary annotations and to allow user queries by sequence homology. The OrthoDB resource now enables users to generate publication-quality comparative genomics charts, as well as to upload, analyze and interactively explore their own private data. OrthoDB is available from http://orthodb.org.",OrthoDB,0.997299731,NA,0,OrthoDB,0.997299731,2,NA,"25428351.0, 23180791.0, 30395283.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +25428351,"http://www.orthodb.org, http://www.orthodb.org/software","OrthoDB v8: update of the hierarchical catalog of orthologs and the underlying free software. Orthology, refining the concept of homology, is the cornerstone of evolutionary comparative studies. With the ever-increasing availability of genomic data, inference of orthology has become instrumental for generating hypotheses about gene functions crucial to many studies. This update of the OrthoDB hierarchical catalog of orthologs (http://www.orthodb.org) covers 3027 complete genomes, including the most comprehensive set of 87 arthropods, 61 vertebrates, 227 fungi and 2627 bacteria (sampling the most complete and representative genomes from over 11,000 available). In addition to the most extensive integration of functional annotations from UniProt, InterPro, GO, OMIM, model organism phenotypes and COG functional categories, OrthoDB uniquely provides evolutionary annotations including rates of ortholog sequence divergence, copy-number profiles, sibling groups and gene architectures. We re-designed the entirety of the OrthoDB website from the underlying technology to the user interface, enabling the user to specify species of interest and to select the relevant orthology level by the NCBI taxonomy. The text searches allow use of complex logic with various identifiers of genes, proteins, domains, ontologies or annotation keywords and phrases. Gene copy-number profiles can also be queried. This release comes with the freely available underlying ortholog clustering pipeline (http://www.orthodb.org/software).",OrthoDB,0.997169971,NA,0,OrthoDB,0.997169971,1,"23180791.0, 30395283.0","27899580.0, 33196836.0, 23180791.0, 30395283.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/26/2014 +30380106,http://lbgi.fr/orthoinspectorv3,"OrthoInspector 3.0: open portal for comparative genomics. OrthoInspector is one of the leading software suites for orthology relations inference. In this paper, we describe a major redesign of the OrthoInspector online resource along with a significant increase in the number of species: 4753 organisms are now covered across the three domains of life, making OrthoInspector the most exhaustive orthology resource to date in terms of covered species (excluding viruses). The new website integrates original data exploration and visualization tools in an ergonomic interface. Distributions of protein orthologs are represented by heatmaps summarizing their evolutionary histories, and proteins with similar profiles can be directly accessed. Two novel tools have been implemented for comparative genomics: a phylogenetic profile search that can be used to find proteins with a specific presence-absence profile and investigate their functions and, inversely, a GO profiling tool aimed at deciphering evolutionary histories of molecular functions, processes or cell components. In addition to the re-designed website, the OrthoInspector resource now provides a REST interface for programmatic access. OrthoInspector 3.0 is available at http://lbgi.fr/orthoinspectorv3.",OrthoInspector,0.992683411,NA,0,OrthoInspector,0.992683411,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +23203876,http://www.pathogenomics.sfu.ca/ortholugedb,"OrtholugeDB: a bacterial and archaeal orthology resource for improved comparative genomic analysis. Prediction of orthologs (homologous genes that diverged because of speciation) is an integral component of many comparative genomics methods. Although orthologs are more likely to have similar function versus paralogs (genes that diverged because of duplication), recent studies have shown that their degree of functional conservation is variable. Also, there are inherent problems with several large-scale ortholog prediction approaches. To address these issues, we previously developed Ortholuge, which uses phylogenetic distance ratios to provide more precise ortholog assessments for a set of predicted orthologs. However, the original version of Ortholuge required manual intervention and was not easily accessible; therefore, we now report the development of OrtholugeDB, available online at http://www.pathogenomics.sfu.ca/ortholugedb. OrtholugeDB provides ortholog predictions for completely sequenced bacterial and archaeal genomes from NCBI based on reciprocal best Basic Local Alignment Search Tool hits, supplemented with further evaluation by the more precise Ortholuge method. The OrtholugeDB web interface facilitates user-friendly and flexible ortholog analysis, from single genes to genomes, plus flexible data download options. We compare Ortholuge with similar methods, showing how it may more consistently identify orthologs with conserved features across a wide range of taxonomic distances. OrtholugeDB facilitates rapid, and more accurate, bacterial and archaeal comparative genomic analysis and large-scale ortholog predictions.",OrtholugeDB,0.973012209,NA,0,OrtholugeDB,0.973012209,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/29/2012 +24723423,http://www.orthomam.univ-montp2.fr,"OrthoMaM v8: a database of orthologous exons and coding sequences for comparative genomics in mammals. Comparative genomic studies extensively rely on alignments of orthologous sequences. Yet, selecting, gathering, and aligning orthologous exons and protein-coding sequences (CDS) that are relevant for a given evolutionary analysis can be a difficult and time-consuming task. In this context, we developed OrthoMaM, a database of ORTHOlogous MAmmalian Markers describing the evolutionary dynamics of orthologous genes in mammalian genomes using a phylogenetic framework. Since its first release in 2007, OrthoMaM has regularly evolved, not only to include newly available genomes but also to incorporate up-to-date software in its analytic pipeline. This eighth release integrates the 40 complete mammalian genomes available in Ensembl v73 and provides alignments, phylogenies, evolutionary descriptor information, and functional annotations for 13,404 single-copy orthologous CDS and 6,953 long exons. The graphical interface allows to easily explore OrthoMaM to identify markers with specific characteristics (e.g., taxa availability, alignment size, %G+C, evolutionary rate, chromosome location). It hence provides an efficient solution to sample preprocessed markers adapted to user-specific needs. OrthoMaM has proven to be a valuable resource for researchers interested in mammalian phylogenomics, evolutionary genomics, and has served as a source of benchmark empirical data sets in several methodological studies. OrthoMaM is available for browsing, query and complete or filtered downloads at http://www.orthomam.univ-montp2.fr/.",OrthoMaM,0.997223616,NA,0,OrthoMaM,0.997223616,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/9/2014 +23203890,http://www.genoportal.org/PSSP/index.do,"OrysPSSP: a comparative platform for small secreted proteins from rice and other plants. Plants have large diverse families of small secreted proteins (SSPs) that play critical roles in the processes of development, differentiation, defense, flowering, stress response, symbiosis, etc. Oryza sativa is one of the major crops worldwide and an excellent model for monocotyledonous plants. However, there had not been any effort to systematically analyze rice SSPs. Here, we constructed a comparative platform, OrysPSSP (http://www.genoportal.org/PSSP/index.do), involving >100 000 SSPs from rice and 25 plant species. OrysPSSP is composed of a core SSP database and a dynamic web interface that integrates a variety of user tools and resources. The current release (v0530) of core SSP database contains a total of 101 048 predicted SSPs, which were generated through a rigid computation/curation pipeline. The web interface consists of eight different modules, providing users with rich resources/functions, e.g. browsing SSP by chromosome, searching and filtering SSP, validating SSP with omics data, comparing SSP among multiple species and querying core SSP database with BLAST. Some cases of application are discussed to demonstrate the utility of OrysPSSP. OrysPSSP serves as a comprehensive resource to explore SSP on the genome scale and across the phylogeny of plant species.",OrysPSSP,0.995186687,NA,0,OrysPSSP,0.995186687,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +33661371,"http://viewer.shigen.info/oryzagenome21detail/index.xhtml, http://shigen.nig.ac.jp/rice/oryzabase","OryzaGenome2.1: Database of Diverse Genotypes in Wild Oryza Species. Background OryzaGenome ( http://viewer.shigen.info/oryzagenome21detail/index.xhtml ), a feature within Oryzabase ( https://shigen.nig.ac.jp/rice/oryzabase/ ), is a genomic database for wild Oryza species that provides comparative and evolutionary genomics approaches for the rice research community. Results Here we release OryzaGenome2.1, the first major update of OryzaGenome. The main feature in this version is the inclusion of newly sequenced genotypes and their meta-information, giving a total of 217 accessions of 19 wild Oryza species (O. rufipogon, O. barthii, O. longistaminata, O. meridionalis, O. glumaepatula, O. punctata, O. minuta, O. officinalis, O. rhizomatis, O. eichingeri, O. latifolia, O. alta, O. grandiglumis, O. australiensis, O. brachyantha, O. granulata, O. meyeriana, O. ridleyi, and O. longiglumis). These 19 wild species belong to 9 genome types (AA, BB, CC, BBCC, CCDD, EE, FF, GG, and HHJJ), representing wide genomic diversity in the genus. Using the genotype information, we analyzed the genome diversity of Oryza species. Other features of OryzaGenome facilitate the use of information on single nucleotide polymorphisms (SNPs) between O. sativa and its wild progenitor O. rufipogon in rice research, including breeding as well as basic science. For example, we provide Variant Call Format (VCF) files for genome-wide SNPs of 33 O. rufipogon accessions against the O. sativa reference genome, IRGSP1.0. In addition, we provide a new SNP Effect Table function, allowing users to identify SNPs or small insertion/deletion polymorphisms in the 33 O. rufipogon accessions and to search for the effect of these polymorphisms on protein function if they reside in the coding region (e.g., are missense or nonsense mutations). Furthermore, the SNP Viewer for 446 O. rufipogon accessions was updated by implementing new tracks for possible selective sweep regions and highly mutated regions that were potentially exposed to selective pressures during the process of domestication. Conclusion OryzaGenome2.1 focuses on comparative genomic analysis of diverse wild Oryza accessions collected around the world and on the development of resources to speed up the identification of critical trait-related genes, especially from O. rufipogon. It aims to promote the use of genotype information from wild accessions in rice breeding and potential future crop improvements. Diverse genotypes will be a key resource for evolutionary studies in Oryza, including polyploid biology.",OryzaGenome,0.993803144,NA,0,OryzaGenome,0.993803144,1,NA,26578696,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,3/4/2021 +26578696,http://viewer.shigen.info/oryzagenome,"OryzaGenome: Genome Diversity Database of Wild Oryza Species. The species in the genus Oryza, encompassing nine genome types and 23 species, are a rich genetic resource and may have applications in deeper genomic analyses aiming to understand the evolution of plant genomes. With the advancement of next-generation sequencing (NGS) technology, a flood of Oryza species reference genomes and genomic variation information has become available in recent years. This genomic information, combined with the comprehensive phenotypic information that we are accumulating in our Oryzabase, can serve as an excellent genotype-phenotype association resource for analyzing rice functional and structural evolution, and the associated diversity of the Oryza genus. Here we integrate our previous and future phenotypic/habitat information and newly determined genotype information into a united repository, named OryzaGenome, providing the variant information with hyperlinks to Oryzabase. The current version of OryzaGenome includes genotype information of 446 O. rufipogon accessions derived by imputation and of 17 accessions derived by imputation-free deep sequencing. Two variant viewers are implemented: SNP Viewer as a conventional genome browser interface and Variant Table as a text-based browser for precise inspection of each variant one by one. Portable VCF (variant call format) file or tab-delimited file download is also available. Following these SNP (single nucleotide polymorphism) data, reference pseudomolecules/scaffolds/contigs and genome-wide variation information for almost all of the closely and distantly related wild Oryza species from the NIG Wild Rice Collection will be available in future releases. All of the resources can be accessed through http://viewer.shigen.info/oryzagenome/.",OryzaGenome,0.958561838,NA,0,OryzaGenome,0.958561838,1,NA,33661371,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/16/2015 +21486466,http://oryzapg.iab.keio.ac.jp,"OryzaPG-DB: rice proteome database based on shotgun proteogenomics. Background Proteogenomics aims to utilize experimental proteome information for refinement of genome annotation. Since mass spectrometry-based shotgun proteomics approaches provide large-scale peptide sequencing data with high throughput, a data repository for shotgun proteogenomics would represent a valuable source of gene expression evidence at the translational level for genome re-annotation. Description Here, we present OryzaPG-DB, a rice proteome database based on shotgun proteogenomics, which incorporates the genomic features of experimental shotgun proteomics data. This version of the database was created from the results of 27 nanoLC-MS/MS runs on a hybrid ion trap-orbitrap mass spectrometer, which offers high accuracy for analyzing tryptic digests from undifferentiated cultured rice cells. Peptides were identified by searching the product ion spectra against the protein, cDNA, transcript and genome databases from Michigan State University, and were mapped to the rice genome. Approximately 3200 genes were covered by these peptides and 40 of them contained novel genomic features. Users can search, download or navigate the database per chromosome, gene, protein, cDNA or transcript and download the updated annotations in standard GFF3 format, with visualization in PNG format. In addition, the database scheme of OryzaPG was designed to be generic and can be reused to host similar proteogenomic information for other species. OryzaPG is the first proteogenomics-based database of the rice proteome, providing peptide-based expression profiles, together with the corresponding genomic origin, including the annotation of novelty for each peptide. Conclusions The OryzaPG database was constructed and is freely available at http://oryzapg.iab.keio.ac.jp/.",OryzaPG,0.995268643,NA,0,OryzaPG,0.995268643,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/12/2011 +31086734,http://biokb.ncpsb.org/osteoporosis,"OsteoporosAtlas: a human osteoporosis-related gene database. Background Osteoporosis is a common, complex disease of bone with a strong heritable component, characterized by low bone mineral density, microarchitectural deterioration of bone tissue and an increased risk of fracture. Due to limited drug selection for osteoporosis and increasing morbidity, mortality of osteoporotic fractures, osteoporosis has become a major health burden in aging societies. Current researches for identifying specific loci or genes involved in osteoporosis contribute to a greater understanding of the pathogenesis of osteoporosis and the development of better diagnosis, prevention and treatment strategies. However, little is known about how most causal genes work and interact to influence osteoporosis. Therefore, it is greatly significant to collect and analyze the studies involved in osteoporosis-related genes. Unfortunately, the information about all these osteoporosis-related genes is scattered in a large amount of extensive literature. Currently, there is no specialized database for easily accessing relevant information about osteoporosis-related genes and miRNAs. Methods We extracted data from literature abstracts in PubMed by text-mining and manual curation. Moreover, a local MySQL database containing all the data was developed with PHP on a Windows server. Results OsteoporosAtlas (http://biokb.ncpsb.org/osteoporosis/), the first specialized database for easily accessing relevant information such as osteoporosis-related genes and miRNAs, was constructed and served for researchers. OsteoporosAtlas enables users to retrieve, browse and download osteoporosis-related genes and miRNAs. Gene ontology and pathway analyses were integrated into OsteoporosAtlas. It currently includes 617 human encoding genes, 131 human non-coding miRNAs, and 128 functional roles. We think that OsteoporosAtlas will be an important bioinformatics resource to facilitate a better understanding of the pathogenesis of osteoporosis and developing better diagnosis, prevention and treatment strategies.",OsteoporosAtlas,0.993293166,NA,0,OsteoporosAtlas,0.993293166,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/26/2019 +22768977,http://www.eumicrobedb.org/transcripts,"Oomycete Transcriptomics Database: a resource for oomycete transcriptomes. Background Oomycete pathogens have attracted significant attention in recent years due to their economic impact. With improving sequencing technologies, large amounts of oomycete transcriptomics data are now available which have great biological utility. A known bottleneck with next generation sequencing data however lies with their analysis, interpretation, organization, storage and visualization. A number of efforts have been made in this respect resulting in development of a myriad of resources. Most of the existing NGS browsers work as standalone applications that need processed data to be uploaded to the browser locally for visualization. At the same time, several oomycete EST databases such as PFGD, ESTAP and SPC, are not available anymore, so there is an immediate need for a database resource that can store and disseminate this legacy information in addition to NGS data. Description Oomycetes Transcriptomics Database is an integrated transcriptome and EST data resource for oomycete pathogens. The database currently stores processed ABI SOLiD transcript sequences from Phytophthora sojae and its host soybean (P. sojae mycelia, healthy soybean and P. sojae-infected soybean) as well as Illumina transcript sequences from five Hyaloperonospora arabidopsidis libraries. In addition to those resources, it has also a complete set of Sanger EST sequences from P. sojae, P. infestans and H. arabidopsidis grown under various conditions. A web-based transcriptome browser was created for visualization of assembled transcripts, their mapping to the reference genome, expression profiling and depth of read coverage for particular locations on the genome. The transcriptome browser merges EST-derived contigs with NGS-derived assembled transcripts on the fly and displays the consensus. OTD possesses strong query features and the database interacts with the VBI Microbial Database as well as the Phytophthora Transcriptomics Database. Conclusion Oomycete Transcriptomics Database provides access to NGS transcript and EST data for oomycete pathogens and soybean. The OTD browser is a light weight transcriptome browser that displays the raw read alignment as well as the transcript assembly and expression information quantitatively. The query features offer a wide variety of options including querying data from the VBI microbial database and the Phytophthora transcriptomics database. The database is publicly available at http://www.eumicrobedb.org/transcripts/.",OTD,0.98326385,Oomycete Transcriptomics Database,0.776904251,OTD,0.98326385,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/6/2012 +29069459,http://overgenedb.amu.edu.pl,"OverGeneDB: a database of 5' end protein coding overlapping genes in human and mouse genomes. Gene overlap plays various regulatory functions on transcriptional and post-transcriptional levels. Most current studies focus on protein-coding genes overlapping with non-protein-coding counterparts, the so called natural antisense transcripts. Considerably less is known about the role of gene overlap in the case of two protein-coding genes. Here, we provide OverGeneDB, a database of human and mouse 5' end protein-coding overlapping genes. The database contains 582 human and 113 mouse gene pairs that are transcribed using overlapping promoters in at least one analyzed library. Gene pairs were identified based on the analysis of the transcription start site (TSS) coordinates in 73 human and 10 mouse organs, tissues and cell lines. Beside TSS data, resources for 26 human lung adenocarcinoma cell lines also contain RNA-Seq and ChIP-Seq data for seven histone modifications and RNA Polymerase II activity. The collected data revealed that the overlap region is rarely conserved between the studied species and tissues. In ∼50% of the overlapping genes, transcription started explicitly in the overlap regions. In the remaining half of overlapping genes, transcription was initiated both from overlapping and non-overlapping TSSs. OverGeneDB is accessible at http://overgenedb.amu.edu.pl.",OverGeneDB,0.993383467,NA,0,OverGeneDB,0.993383467,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +34420978,http://hdsheep.cer.auckland.ac.nz,"A Multi-Omic Huntington's Disease Transgenic Sheep-Model Database for Investigating Disease Pathogenesis. Background The pathological mechanism of cellular dysfunction and death in Huntington's disease (HD) is not well defined. Our transgenic HD sheep model (OVT73) was generated to investigate these mechanisms and for therapeutic testing. One particular cohort of animals has undergone focused investigation resulting in a large interrelated multi-omic dataset, with statistically significant changes observed comparing OVT73 and control 'omic' profiles and reported in literature. Objective Here we make this dataset publicly available for the advancement of HD pathogenic mechanism discovery. Methods To enable investigation in a user-friendly format, we integrated seven multi-omic datasets from a cohort of 5-year-old OVT73 (n = 6) and control (n = 6) sheep into a single database utilising the programming language R. It includes high-throughput transcriptomic, metabolomic and proteomic data from blood, brain, and other tissues. Results We present the 'multi-omic' HD sheep database as a queryable web-based platform that can be used by the wider HD research community (https://hdsheep.cer.auckland.ac.nz/). The database is supported with a suite of simple automated statistical analysis functions for rapid exploratory analyses. We present examples of its use that validates the integrity relative to results previously reported. The data may also be downloaded for user determined analysis. Conclusion We propose the use of this online database as a hypothesis generator and method to confirm/refute findings made from patient samples and alternate model systems, to expand our understanding of HD pathogenesis. Importantly, additional tissue samples are available for further investigation of this cohort.",OVT73,0.766715392,Disease,0.640458047,OVT73,0.766715392,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,1/1/2021 +24174541,http://pmite.hzau.edu.cn/django/mite,"P-MITE: a database for plant miniature inverted-repeat transposable elements. Miniature inverted-repeat transposable elements (MITEs) are prevalent in eukaryotic species including plants. MITE families vary dramatically and usually cannot be identified based on homology. In this study, we de novo identified MITEs from 41 plant species, using computer programs MITE Digger, MITE-Hunter and/or Repetitive Sequence with Precise Boundaries (RSPB). MITEs were found in all, but one (Cyanidioschyzon merolae), species. Combined with the MITEs identified previously from the rice genome, >2.3 million sequences from 3527 MITE families were obtained from 41 plant species. In general, higher plants contain more MITEs than lower plants, with a few exceptions such as papaya, with only 538 elements. The largest number of MITEs is found in apple, with 237 302 MITE sequences. The number of MITE sequences in a genome is significantly correlated with genome size. A series of databases (plant MITE databases, P-MITE), available online at http://pmite.hzau.edu.cn/django/mite/, was constructed to host all MITE sequences from the 41 plant genomes. The databases are available for sequence similarity searches (BLASTN), and MITE sequences can be downloaded by family or by genome. The databases can be used to study the origin and amplification of MITEs, MITE-derived small RNAs and roles of MITEs on gene and genome evolution.",P-MITE,0.894164824,plant MITE databases,0.776570714,P-MITE,0.894164824,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/29/2013 +29855811,http://p-pal.di.uminho.pt/tools,"Procura-PALavras (P-PAL): A Web-based interface for a new European Portuguese lexical database. In this article, we present Procura-PALavras (P-PAL), a Web-based interface for a new European Portuguese (EP) lexical database. Based on a contemporary printed corpus of over 227 million words, P-PAL provides a broad range of word attributes and statistics, including several measures of word frequency (e.g., raw counts, per-million word frequency, logarithmic Zipf scale), morpho-syntactic information (e.g., parts of speech [PoSs], grammatical gender and number, dominant PoS, and frequency and relative frequency of the dominant PoS), as well as several lexical and sublexical orthographic (e.g., number of letters; consonant-vowel orthographic structure; density and frequency of orthographic neighbors; orthographic Levenshtein distance; orthographic uniqueness point; orthographic syllabification; and trigram, bigram, and letter type and token frequencies), and phonological measures (e.g., pronunciation, number of phonemes, stress, density and frequency of phonological neighbors, transposed and phonographic neighbors, syllabification, and biphone and phone type and token frequencies) for ~53,000 lemmatized and ~208,000 nonlemmatized EP word forms. To obtain these metrics, researchers can choose between two word queries in the application: (i) analyze words previously selected for specific attributes and/or lexical and sublexical characteristics, or (ii) generate word lists that meet word requirements defined by the user in the menu of analyses. For the measures it provides and the flexibility it allows, P-PAL will be a key resource to support research in all cognitive areas that use EP verbal stimuli. P-PAL is freely available at http://p-pal.di.uminho.pt/tools .",P-PAL,0.997003108,Procura-PALavras,0.787817964,P-PAL,0.997003108,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2018 +24243849,http://p3db.org,"P³DB 3.0: From plant phosphorylation sites to protein networks. In the past few years, the Plant Protein Phosphorylation Database (P(3)DB, http://p3db.org) has become one of the most significant in vivo data resources for studying plant phosphoproteomics. We have substantially updated P(3)DB with respect to format, new datasets and analytic tools. In the P(3)DB 3.0, there are altogether 47 923 phosphosites in 16 477 phosphoproteins curated across nine plant organisms from 32 studies, which have met our multiple quality standards for acquisition of in vivo phosphorylation site data. Centralized by these phosphorylation data, multiple related data and annotations are provided, including protein-protein interaction (PPI), gene ontology, protein tertiary structures, orthologous sequences, kinase/phosphatase classification and Kinase Client Assay (KiC Assay) data--all of which provides context for the phosphorylation event. In addition, P(3)DB 3.0 incorporates multiple network viewers for the above features, such as PPI network, kinase-substrate network, phosphatase-substrate network, and domain co-occurrence network to help study phosphorylation from a systems point of view. Furthermore, the new P(3)DB reflects a community-based design through which users can share datasets and automate data depository processes for publication purposes. Each of these new features supports the goal of making P(3)DB a comprehensive, systematic and interactive platform for phosphoproteomics research.",P(3)DB,0.997485409,Plant Protein Phosphorylation Database,0.983375771,P(3)DB,0.997485409,1,28150236,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/15/2013 +25324303,http://www.p2cs.org,"P2CS: updates of the prokaryotic two-component systems database. The P2CS database (http://www.p2cs.org/) is a comprehensive resource for the analysis of Prokaryotic Two-Component Systems (TCSs). TCSs are comprised of a receptor histidine kinase (HK) and a partner response regulator (RR) and control important prokaryotic behaviors. The latest incarnation of P2CS includes 164,651 TCS proteins, from 2758 sequenced prokaryotic genomes. Several important new features have been added to P2CS since it was last described. Users can search P2CS via BLAST, adding hits to their cart, and homologous proteins can be aligned using MUSCLE and viewed using Jalview within P2CS. P2CS also provides phylogenetic trees based on the conserved signaling domains of the RRs and HKs from entire genomes. HK and RR trees are annotated with gene organization and domain architecture, providing insights into the evolutionary origin of the contemporary gene set. The majority of TCSs are encoded by adjacent HK and RR genes, however, 'orphan' unpaired TCS genes are also abundant and identifying their partner proteins is challenging. P2CS now provides paired HK and RR trees with proteins from the same genetic locus indicated. This allows the appraisal of evolutionary relationships across entire TCSs and in some cases the identification of candidate partners for orphan TCS proteins.",P2CS,0.982351462,prokaryotic two-component systems,0.689270794,P2CS,0.982351462,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/16/2014 +23153078,http://www.p2tf.org,"P2TF: a comprehensive resource for analysis of prokaryotic transcription factors. Background Transcription factors (TFs) are DNA-binding proteins that regulate gene expression by activating or repressing transcription. Some have housekeeping roles, while others regulate the expression of specific genes in response to environmental change. The majority of TFs are multi-domain proteins, and they can be divided into families according to their domain organisation. There is a need for user-friendly, rigorous and consistent databases to allow researchers to overcome the inherent variability in annotation between genome sequences. Description P2TF (Predicted Prokaryotic Transcription Factors) is an integrated and comprehensive database relating to transcription factor proteins. The current version of the database contains 372,877 TFs from 1,987 completely sequenced prokaryotic genomes and 43 metagenomes. The database provides annotation, classification and visualisation of TF genes and their genetic context, providing researchers with a one-stop shop in which to investigate TFs. The P2TF database analyses TFs in both predicted proteomes and reconstituted ORFeomes, recovering approximately 3% more TF proteins than just screening predicted proteomes. Users are able to search the database with sequence or domain architecture queries, and resulting hits can be aligned to investigate evolutionary relationships and conservation of residues. To increase utility, all searches can be filtered by taxonomy, TF genes can be added to the P2TF cart, and gene lists can be exported for external analysis in a variety of formats. Conclusions P2TF is an open resource for biologists, allowing exploration of all TFs within prokaryotic genomes and metagenomes. The database enables a variety of analyses, and results are presented for user exploration as an interactive web interface, which provides different ways to access and download the data. The database is freely available at http://www.p2tf.org/.",P2TF,0.991170208,Predicted Prokaryotic Transcription Factors,0.982642752,P2TF,0.991170208,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2012 +28150236,http://p3db.org,"Bioinformatics Analysis of Protein Phosphorylation in Plant Systems Biology Using P3DB. Protein phosphorylation is one of the most pervasive protein post-translational modification events in plant cells. It is involved in many plant biological processes, such as plant growth, organ development, and plant immunology, by regulating or switching signaling and metabolic pathways. High-throughput experimental methods like mass spectrometry can easily characterize hundreds to thousands of phosphorylation events in a single experiment. With the increasing volume of the data sets, Plant Protein Phosphorylation DataBase (P3DB, http://p3db.org ) provides a comprehensive, systematic, and interactive online platform to deposit, query, analyze, and visualize these phosphorylation events in many plant species. It stores the protein phosphorylation sites in the context of identified mass spectra, phosphopeptides, and phosphoproteins contributed from various plant proteome studies. In addition, P3DB associates these plant phosphorylation sites to protein physicochemical information in the protein charts and tertiary structures, while various protein annotations from hierarchical kinase phosphatase families, protein domains, and gene ontology are also added into the database. P3DB not only provides rich information, but also interconnects and provides visualization of the data in networks, in systems biology context. Currently, P3DB includes the KiC (Kinase Client) assay network, the protein-protein interaction network, the kinase-substrate network, the phosphatase-substrate network, and the protein domain co-occurrence network. All of these are available to query for and visualize existing phosphorylation events. Although P3DB only hosts experimentally identified phosphorylation data, it provides a plant phosphorylation prediction model for any unknown queries on the fly. P3DB is an entry point to the plant phosphorylation community to deposit and visualize any customized data sets within this systems biology framework. Nowadays, P3DB has become one of the major bioinformatics platforms of protein phosphorylation in plant biology.",P3DB,0.995969633,Plant Protein Phosphorylation DataBase,0.941485636,P3DB,0.995969633,1,24243849,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2017 +34332522,http://clinicaltrials.gov/ct2/show/record/NCT04675918?cond=pediatric+cardiac+arrest&draw=2&rank=10,"Pediatric In-Hospital Cardiac Arrest International Registry (PACHIN): protocol for a prospective international multicenter register of cardiac arrest in children. Background and aims Cardiac arrest (CA) in children is a major public health problem. Thanks to advances in cardiopulmonary resuscitation (CPR) guidelines and teaching skills, results in children have improved. However, pediatric CA has a very high mortality. In the treatment of in-hospital CA there are still multiple controversies. The objective of this study is to develop a multicenter and international registry of in-hospital pediatric cardiac arrest including the diversity of management in different clinical and social contexts. Participation in this register will enable the evaluation of the diagnosis of CA, CPR and post-resuscitation care and its influence in survival and neurological prognosis. Methods An intrahospital CA data recording protocol has been designed following the Utstein model. Database is hosted according to European legislation regarding patient data protection. It is drafted in English and Spanish. Invitation to participate has been sent to Spanish, European and Latinamerican hospitals. Variables included, asses hospital characteristics, the resuscitation team, patient's demographics and background, CPR, post-resuscitation care, mortality, survival and long-term evolution. Survival at hospital discharge will be evaluated as a primary outcome and survival with good neurological status as a secondary outcome, analyzing the different factors involved in them. The study design is prospective, observational registry of a cohort of pediatric CA. Conclusions This study represents the development of a registry of in-hospital CA in childhood. Its development will provide access to CPR data in different hospital settings and will allow the analysis of current controversies in the treatment of pediatric CA and post-resuscitation care. The results may contribute to the development of further international recommendations. Trial register: ClinicalTrials.gov Identifier: NCT04675918. Registered 19 December 2020 - Retrospectively registered, https://clinicaltrials.gov/ct2/show/record/NCT04675918?cond=pediatric+cardiac+arrest&draw=2&rank=10.",PACHIN,0.991595972,Pediatric In-Hospital Cardiac Arrest International Registry,0.979042335,PACHIN,0.991595972,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/31/2021 +30134653,http://chemyang.ccnu.edu.cn/ccb/database/PADFrag,"PADFrag: A Database Built for the Exploration of Bioactive Fragment Space for Drug Discovery. Structural analyses of drugs and pesticides can enable the identification of new bioactive compounds with novel and diverse scaffolds as well as improve our understanding of the bioactive fragment space. The Pesticide And Drug Fragments (PADFrag) database is a unique bioinformatic-cheminformatic cross-referencing resource that combines detailed bioactive fragment data and potential targets with a strong focus on quantitative, analytic, and molecular-scale information for the exploration of bioactive fragment space for drug discovery ( http://chemyang.ccnu.edu.cn/ccb/database/PADFrag/ ). The main applications of PADFrag are the analysis of the privileged structures within known bioactive molecules, ab initio molecule library design, and core fragment discovery for fragment-based drug design. Other potential applications include prediction of fragment interactions and general pharmaceutical research.",PADFrag,0.997416735,Pesticide And Drug Fragments,0.979545602,PADFrag,0.997416735,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/6/2018 +23046413,http://bio.informatics.iupui.edu/PAGED,"PAGED: a pathway and gene-set enrichment database to enable molecular phenotype discoveries. Background Over the past decade, pathway and gene-set enrichment analysis has evolved into the study of high-throughput functional genomics. Owing to poorly annotated and incomplete pathway data, researchers have begun to combine pathway and gene-set enrichment analysis as well as network module-based approaches to identify crucial relationships between different molecular mechanisms. Methods To meet the new challenge of molecular phenotype discovery, in this work, we have developed an integrated online database, the Pathway And Gene Enrichment Database (PAGED), to enable comprehensive searches for disease-specific pathways, gene signatures, microRNA targets, and network modules by integrating gene-set-based prior knowledge as molecular patterns from multiple levels: the genome, transcriptome, post-transcriptome, and proteome. Results The online database we developed, PAGED http://bio.informatics.iupui.edu/PAGED is by far the most comprehensive public compilation of gene sets. In its current release, PAGED contains a total of 25,242 gene sets, 61,413 genes, 20 organisms, and 1,275,560 records from five major categories. Beyond its size, the advantage of PAGED lies in the explorations of relationships between gene sets as gene-set association networks (GSANs). Using colorectal cancer expression data analysis as a case study, we demonstrate how to query this database resource to discover crucial pathways, gene signatures, and gene network modules specific to colorectal cancer functional genomics. Conclusions This integrated online database lays a foundation for developing tools beyond third-generation pathway analysis approaches on for discovering molecular phenotypes, especially for disease-associated pathway/gene-set enrichment analysis.",PAGED,0.99276948,Pathway And Gene Enrichment Database,0.900361799,PAGED,0.99276948,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/11/2012 +24312499,http://bioinf.xmu.edu.cn/PaGenBase,"PaGenBase: a pattern gene database for the global and dynamic understanding of gene function. Pattern genes are a group of genes that have a modularized expression behavior under serial physiological conditions. The identification of pattern genes will provide a path toward a global and dynamic understanding of gene functions and their roles in particular biological processes or events, such as development and pathogenesis. In this study, we present PaGenBase, a novel repository for the collection of tissue- and time-specific pattern genes, including specific genes, selective genes, housekeeping genes and repressed genes. The PaGenBase database is now freely accessible at http://bioinf.xmu.edu.cn/PaGenBase/. In the current version (PaGenBase 1.0), the database contains 906,599 pattern genes derived from the literature or from data mining of more than 1,145,277 gene expression profiles in 1,062 distinct samples collected from 11 model organisms. Four statistical parameters were used to quantitatively evaluate the pattern genes. Moreover, three methods (quick search, advanced search and browse) were designed for rapid and customized data retrieval. The potential applications of PaGenBase are also briefly described. In summary, PaGenBase will serve as a resource for the global and dynamic understanding of gene function and will facilitate high-level investigations in a variety of fields, including the study of development, pathogenesis and novel drug discovery.",PaGenBase,0.997129798,NA,0,PaGenBase,0.997129798,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/2/2013 +26072489,http://discovery.informatics.iupui.edu/PAGER,"PAGER: constructing PAGs and new PAG-PAG relationships for network biology. In this article, we described a new database framework to perform integrative ""gene-set, network, and pathway analysis"" (GNPA). In this framework, we integrated heterogeneous data on pathways, annotated list, and gene-sets (PAGs) into a PAG electronic repository (PAGER). PAGs in the PAGER database are organized into P-type, A-type and G-type PAGs with a three-letter-code standard naming convention. The PAGER database currently compiles 44 313 genes from 5 species including human, 38 663 PAGs, 324 830 gene-gene relationships and two types of 3 174 323 PAG-PAG regulatory relationships-co-membership based and regulatory relationship based. To help users assess each PAG's biological relevance, we developed a cohesion measure called Cohesion Coefficient (CoCo), which is capable of disambiguating between biologically significant PAGs and random PAGs with an area-under-curve performance of 0.98. PAGER database was set up to help users to search and retrieve PAGs from its online web interface. PAGER enable advanced users to build PAG-PAG regulatory networks that provide complementary biological insights not found in gene set analysis or individual gene network analysis. We provide a case study using cancer functional genomics data sets to demonstrate how integrative GNPA help improve network biology data coverage and therefore biological interpretability. The PAGER database can be accessible openly at http://discovery.informatics.iupui.edu/PAGER/.",PAGER,0.964891553,NA,0,PAGER,0.964891553,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/1/2015 +33245774,http://discovery.informatics.uab.edu/PAGER-CoV,"PAGER-CoV: a comprehensive collection of pathways, annotated gene-lists and gene signatures for coronavirus disease studies. PAGER-CoV (http://discovery.informatics.uab.edu/PAGER-CoV/) is a new web-based database that can help biomedical researchers interpret coronavirus-related functional genomic study results in the context of curated knowledge of host viral infection, inflammatory response, organ damage, and tissue repair. The new database consists of 11 835 PAGs (Pathways, Annotated gene-lists, or Gene signatures) from 33 public data sources. Through the web user interface, users can search by a query gene or a query term and retrieve significantly matched PAGs with all the curated information. Users can navigate from a PAG of interest to other related PAGs through either shared PAG-to-PAG co-membership relationships or PAG-to-PAG regulatory relationships, totaling 19 996 993. Users can also retrieve enriched PAGs from an input list of COVID-19 functional study result genes, customize the search data sources, and export all results for subsequent offline data analysis. In a case study, we performed a gene set enrichment analysis (GSEA) of a COVID-19 RNA-seq data set from the Gene Expression Omnibus database. Compared with the results using the standard PAGER database, PAGER-CoV allows for more sensitive matching of known immune-related gene signatures. We expect PAGER-CoV to be invaluable for biomedical researchers to find molecular biology mechanisms and tailored therapeutics to treat COVID-19 patients.",PAGER-CoV,0.984723121,NA,0,PAGER-CoV,0.984723121,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +25336619,http://www.paidb.re.kr,"PAIDB v2.0: exploration and analysis of pathogenicity and resistance islands. Pathogenicity is a complex multifactorial process confounded by the concerted activity of genetic regions associated with virulence and/or resistance determinants. Pathogenicity islands (PAIs) and resistance islands (REIs) are key to the evolution of pathogens and appear to play complimentary roles in the process of bacterial infection. While PAIs promote disease development, REIs give a fitness advantage to the host against multiple antimicrobial agents. The Pathogenicity Island Database (PAIDB, http://www.paidb.re.kr) has been the only database dedicated to providing comprehensive information on all reported PAIs and candidate PAIs in prokaryotic genomes. In this study, we present PAIDB v2.0, whose functionality is extended to incorporate REIs. PAIDB v2.0 contains 223 types of PAIs with 1331 accessions, and 88 types of REIs with 108 accessions. With an improved detection scheme, 2673 prokaryotic genomes were analyzed to locate candidate PAIs and REIs. With additional quantitative and qualitative advancements in database content and detection accuracy, PAIDB will continue to facilitate pathogenomic studies of both pathogenic and non-pathogenic organisms.",PAIDB,0.989453137,Pathogenicity Island Database,0.901366401,PAIDB,0.989453137,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/21/2014 +30239681,http://palmxplore.mpob.gov.my,"PalmXplore: oil palm gene database. . A set of Elaeis guineensis genes had been generated by combining two gene prediction pipelines: Fgenesh++ developed by Softberry and Seqping by the Malaysian Palm Oil Board. PalmXplore was developed to provide a scalable data repository and a user-friendly search engine system to efficiently store, manage and retrieve the oil palm gene sequences and annotations. Information deposited in PalmXplore includes predicted genes, their genomic coordinates, as well as the annotations derived from external databases, such as Pfam, Gene Ontology and Kyoto Encyclopedia of Genes and Genomes. Information about genes related to important traits, such as those involved in fatty acid biosynthesis (FAB) and disease resistance, is also provided. The system offers Basic Local Alignment Search Tool homology search, where the results can be downloaded or visualized in the oil palm genome browser (MYPalmViewer). PalmXplore is regularly updated offering new features, improvements to genome annotation and new genomic sequences. The system is freely accessible at http://palmxplore.mpob.gov.my.",PalmXplore,0.977348566,NA,0,PalmXplore,0.977348566,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +29106626,http://pseudomonas.umaryland.edu,"PAMDB: a comprehensive Pseudomonas aeruginosa metabolome database. The Pseudomonas aeruginosaMetabolome Database (PAMDB, http://pseudomonas.umaryland.edu) is a searchable, richly annotated metabolite database specific to P. aeruginosa. P. aeruginosa is a soil organism and significant opportunistic pathogen that adapts to its environment through a versatile energy metabolism network. Furthermore, P. aeruginosa is a model organism for the study of biofilm formation, quorum sensing, and bioremediation processes, each of which are dependent on unique pathways and metabolites. The PAMDB is modelled on the Escherichia coli (ECMDB), yeast (YMDB) and human (HMDB) metabolome databases and contains >4370 metabolites and 938 pathways with links to over 1260 genes and proteins. The database information was compiled from electronic databases, journal articles and mass spectrometry (MS) metabolomic data obtained in our laboratories. For each metabolite entered, we provide detailed compound descriptions, names and synonyms, structural and physiochemical information, nuclear magnetic resonance (NMR) and MS spectra, enzymes and pathway information, as well as gene and protein sequences. The database allows extensive searching via chemical names, structure and molecular weight, together with gene, protein and pathway relationships. The PAMBD and its future iterations will provide a valuable resource to biologists, natural product chemists and clinicians in identifying active compounds, potential biomarkers and clinical diagnostics.",PAMBD,0.997342527,Pseudomonas aeruginosaMetabolome Database,0.988374015,PAMBD,0.997342527,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +30874591,http://bioinfo.icgeb.res.in/pamirdb,"PAmiRDB: A web resource for plant miRNAs targeting viruses. MicroRNAs (miRNAs) have emerged to be essential constituents of host antiviral-defense mechanisms. The miRNA mediated antiviral mechanism was first experimentally established in animals, which proved that host miRNAs regulate viral gene expression by targeting the animal virus mRNAs. There are comparatively fewer reports about such interactions in plants, however, artificial miRNA studies prove that miRNAs play similar antiviral role in plants too. To explore the extent of this phenomenon in plant genomes, and in the absence of any publicly available resource for prediction of plant miRNAs targeting viruses, we were motivated to predict such interactions of plant miRNAs and viral genes. The intriguing results of the predictions are compiled as a database, which we have named as PAmiRDB. The current version of PAmiRDB includes more than 2600 plant miRNAs and their specific interactions with corresponding targets in approximately 500 viral species (predominantly from the major plant-infecting virus families of geminiviruses and potyviruses). PAmiRDB is a database of known plant miRNAs and their predicted targets in virus genomes. The innovative database query-interface enables global and comprehensive investigation of such predicted interactions between host miRNAs and viral genes. The database integrated-tools also helps researchers to design experiments to confirm such interactions. PAmiRDB is available at http://bioinfo.icgeb.res.in/pamirdb.",PAmiRDB,0.994146883,NA,0,PAmiRDB,0.994146883,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/15/2019 +31373607,http://db.cngb.org/pird,"PIRD: Pan Immune Repertoire Database. Motivation T and B cell receptors (TCRs and BCRs) play a pivotal role in the adaptive immune system by recognizing an enormous variety of external and internal antigens. Understanding these receptors is critical for exploring the process of immunoreaction and exploiting potential applications in immunotherapy and antibody drug design. Although a large number of samples have had their TCR and BCR repertoires sequenced using high-throughput sequencing in recent years, very few databases have been constructed to store these kinds of data. To resolve this issue, we developed a database. Results We developed a database, the Pan Immune Repertoire Database (PIRD), located in China National GeneBank (CNGBdb), to collect and store annotated TCR and BCR sequencing data, including from Homo sapiens and other species. In addition to data storage, PIRD also provides functions of data visualization and interactive online analysis. Additionally, a manually curated database of TCRs and BCRs targeting known antigens (TBAdb) was also deposited in PIRD. Availability and implementation PIRD can be freely accessed at https://db.cngb.org/pird.",PIRD,0.95986867,Pan Immune Repertoire Database,0.967511244,Pan Immune Repertoire Database,0.967511244,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/1/2020 +30203047,http://bioinfo.life.hust.edu.cn/Pancan-meQTL,"Pancan-meQTL: a database to systematically evaluate the effects of genetic variants on methylation in human cancer. DNA methylation is an important epigenetic mechanism for regulating gene expression. Aberrant DNA methylation has been observed in various human diseases, including cancer. Single-nucleotide polymorphisms can contribute to tumor initiation, progression and prognosis by influencing DNA methylation, and DNA methylation quantitative trait loci (meQTL) have been identified in physiological and pathological contexts. However, no database has been developed to systematically analyze meQTLs across multiple cancer types. Here, we present Pancan-meQTL, a database to comprehensively provide meQTLs across 23 cancer types from The Cancer Genome Atlas by integrating genome-wide genotype and DNA methylation data. In total, we identified 8 028 964 cis-meQTLs and 965 050 trans-meQTLs. Among these, 23 432 meQTLs are associated with patient overall survival times. Furthermore, we identified 2 214 458 meQTLs that overlap with known loci identified through genome-wide association studies. Pancan-meQTL provides a user-friendly web interface (http://bioinfo.life.hust.edu.cn/Pancan-meQTL/) that is convenient for browsing, searching and downloading data of interest. This database is a valuable resource for investigating the roles of genetics and epigenetics in cancer.",Pancan-meQTL,0.99280415,NA,0,Pancan-meQTL,0.99280415,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +29036324,http://bioinfo.life.hust.edu.cn/PancanQTL,"PancanQTL: systematic identification of cis-eQTLs and trans-eQTLs in 33 cancer types. Expression quantitative trait locus (eQTL) analysis, which links variations in gene expression to genotypes, is essential to understanding gene regulation and to interpreting disease-associated loci. Currently identified eQTLs are mainly in samples of blood and other normal tissues. However, no database comprehensively provides eQTLs in large number of cancer samples. Using the genotype and expression data of 9196 tumor samples in 33 cancer types from The Cancer Genome Atlas (TCGA), we identified 5 606 570 eQTL-gene pairs in the cis-eQTL analysis and 231 210 eQTL-gene pairs in the trans-eQTL analysis. We further performed survival analysis and identified 22 212 eQTLs associated with patient overall survival. Furthermore, we linked the eQTLs to genome-wide association studies (GWAS) data and identified 337 131 eQTLs that overlap with existing GWAS loci. We developed PancanQTL, a user-friendly database (http://bioinfo.life.hust.edu.cn/PancanQTL/), to store cis-eQTLs, trans-eQTLs, survival-associated eQTLs and GWAS-related eQTLs to enable searching, browsing and downloading. PancanQTL could help the research community understand the effects of inherited variants in tumorigenesis and development.",PancanQTL,0.996932268,NA,0,PancanQTL,0.996932268,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +33294866,http://www.pancreatlas.org,"Pancreatlas: Applying an Adaptable Framework to Map the Human Pancreas in Health and Disease. Human tissue phenotyping generates complex spatial information from numerous imaging modalities, yet images typically become static figures for publication, and original data and metadata are rarely available. While comprehensive image maps exist for some organs, most resources have limited support for multiplexed imaging or have non-intuitive user interfaces. Therefore, we built a Pancreatlas resource that integrates several technologies into a unique interface, allowing users to access richly annotated web pages, drill down to individual images, and deeply explore data online. The current version of Pancreatlas contains over 800 unique images acquired by whole-slide scanning, confocal microscopy, and imaging mass cytometry, and is available at https://www.pancreatlas.org. To create this human pancreas-specific biological imaging resource, we developed a React-based web application and Python-based application programming interface, collectively called Flexible Framework for Integrating and Navigating Data (FFIND), which can be adapted beyond Pancreatlas to meet countless imaging or other structured data-management needs.",Pancreatlas,0.996841073,NA,0,Pancreatlas,0.996841073,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/5/2020 +30349509,http://www.bioinfo.iicb.res.in/pangfr-hm,"PanGFR-HM: A Dynamic Web Resource for Pan-Genomic and Functional Profiling of Human Microbiome With Comparative Features. The conglomerate of microorganisms inhabiting various body-sites of human, known as the human microbiome, is one of the key determinants of human health and disease. Comprehensive pan-genomic and functional analysis approach for human microbiome components can enrich our understanding about impact of microbiome on human health. By utilizing this approach we developed PanGFR-HM (http://www.bioinfo.iicb.res.in/pangfr-hm/) - a novel dynamic web-resource that integrates genomic and functional characteristics of 1293 complete microbial genomes available from Human Microbiome Project. The resource allows users to explore genomic/functional diversity and genome-based phylogenetic relationships between human associated microbial genomes, not provided by any other resource. The key features implemented here include pan-genome and functional analysis of organisms based on taxonomy or body-site, and comparative analysis between groups of organisms. The first feature can also identify probable gene-loss events and significantly over/under represented KEGG/COG categories within pan-genome. The unique second feature can perform comparative genomic, functional and pathways analysis between 4 groups of microbes. The dynamic nature of this resource enables users to define parameters for orthologous clustering and to select any set of organisms for analysis. As an application for comparative feature of PanGFR-HM, we performed a comparative analysis with 67 Lactobacillus genomes isolated from human gut, oral cavity and urogenital tract, and therefore characterized the body-site specific genes, enzymes and pathways. Altogether, PanGFR-HM, being unique in its content and functionality, is expected to provide a platform for microbiome-based comparative functional and evolutionary genomics.",PanGFR-HM,0.976677001,NA,0,PanGFR-HM,0.976677001,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/8/2018 +25102069,http://panoramaweb.org,"Panorama: a targeted proteomics knowledge base. Panorama is a web application for storing, sharing, analyzing, and reusing targeted assays created and refined with Skyline,1 an increasingly popular Windows client software tool for targeted proteomics experiments. Panorama allows laboratories to store and organize curated results contained in Skyline documents with fine-grained permissions, which facilitates distributed collaboration and secure sharing of published and unpublished data via a web-browser interface. It is fully integrated with the Skyline workflow and supports publishing a document directly to a Panorama server from the Skyline user interface. Panorama captures the complete Skyline document information content in a relational database schema. Curated results published to Panorama can be aggregated and exported as chromatogram libraries. These libraries can be used in Skyline to pick optimal targets in new experiments and to validate peak identification of target peptides. Panorama is open-source and freely available. It is distributed as part of LabKey Server,2 an open source biomedical research data management system. Laboratories and organizations can set up Panorama locally by downloading and installing the software on their own servers. They can also request freely hosted projects on https://panoramaweb.org , a Panorama server maintained by the Department of Genome Sciences at the University of Washington.",Panorama,0.978761613,NA,0,Panorama,0.978761613,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/18/2014 +29487113,http://panoramaweb.org/public.url,"Panorama Public: A Public Repository for Quantitative Data Sets Processed in Skyline. To address the growing need for a centralized, community resource of published results processed with Skyline, and to provide reviewers and readers immediate visual access to the data behind published conclusions, we present Panorama Public (https://panoramaweb.org/public.url), a repository of Skyline documents supporting published results. Panorama Public is built on Panorama, an open source data management system for mass spectrometry data processed with the Skyline targeted mass spectrometry environment. The Panorama web application facilitates viewing, sharing, and disseminating results contained in Skyline documents via a web-browser. Skyline users can easily upload their documents to a Panorama server and allow other researchers to explore uploaded results in the Panorama web-interface through a variety of familiar summary graphs as well as annotated views of the chromatographic peaks processed with Skyline. This makes Panorama ideal for sharing targeted, quantitative results contained in Skyline documents with collaborators, reviewers, and the larger proteomics community. The Panorama Public repository employs the full data visualization capabilities of Panorama which facilitates sharing results with reviewers during manuscript review.",Panorama Public,0.976007561,NA,0,Panorama Public,0.976007561,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/27/2018 +21731755,http://www4a.biotec.or.th/PASNP,"PanSNPdb: the Pan-Asian SNP genotyping database. The HUGO Pan-Asian SNP consortium conducted the largest survey to date of human genetic diversity among Asians by sampling 1,719 unrelated individuals among 71 populations from China, India, Indonesia, Japan, Malaysia, the Philippines, Singapore, South Korea, Taiwan, and Thailand. We have constructed a database (PanSNPdb), which contains these data and various new analyses of them. PanSNPdb is a research resource in the analysis of the population structure of Asian peoples, including linkage disequilibrium patterns, haplotype distributions, and copy number variations. Furthermore, PanSNPdb provides an interactive comparison with other SNP and CNV databases, including HapMap3, JSNP, dbSNP and DGV and thus provides a comprehensive resource of human genetic diversity. The information is accessible via a widely accepted graphical interface used in many genetic variation databases. Unrestricted access to PanSNPdb and any associated files is available at: http://www4a.biotec.or.th/PASNP.",PanSNPdb,0.997955024,sian,0.570886314,PanSNPdb,0.997955024,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/23/2011 +"26578592, 27899595, 30407594",http://pantherdb.org,"PANTHER version 10: expanded protein families and functions, and analysis tools. PANTHER (Protein Analysis THrough Evolutionary Relationships, http://pantherdb.org) is a widely used online resource for comprehensive protein evolutionary and functional classification, and includes tools for large-scale biological data analysis. Recent development has been focused in three main areas: genome coverage, functional information ('annotation') coverage and accuracy, and improved genomic data analysis tools. The latest version of PANTHER, 10.0, includes almost 5000 new protein families (for a total of over 12 000 families), each with a reference phylogenetic tree including protein-coding genes from 104 fully sequenced genomes spanning all kingdoms of life. Phylogenetic trees now include inference of horizontal transfer events in addition to speciation and gene duplication events. Functional annotations are regularly updated using the models generated by the Gene Ontology Phylogenetic Annotation Project. For the data analysis tools, PANTHER has expanded the number of different 'functional annotation sets' available for functional enrichment testing, allowing analyses to access all Gene Ontology annotations--updated monthly from the Gene Ontology database--in addition to the annotations that have been inferred through evolutionary relationships. The Prowler (data browser) has been updated to enable users to more efficiently browse the entire database, and to create custom gene lists using the multiple axes of classification in PANTHER.",PANTHER,0.998102546,Protein Analysis Through Evolutionary Relationships,0.987256037,PANTHER,0.998102546,3,NA,"23193289.0, 23868073.0, 33290554.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +"23868073, 33290554",http://www.pantherdb.org,"Large-scale gene function analysis with the PANTHER classification system. The PANTHER (protein annotation through evolutionary relationship) classification system (http://www.pantherdb.org/) is a comprehensive system that combines gene function, ontology, pathways and statistical analysis tools that enable biologists to analyze large-scale, genome-wide data from sequencing, proteomics or gene expression experiments. The system is built with 82 complete genomes organized into gene families and subfamilies, and their evolutionary relationships are captured in phylogenetic trees, multiple sequence alignments and statistical models (hidden Markov models or HMMs). Genes are classified according to their function in several different ways: families and subfamilies are annotated with ontology terms (Gene Ontology (GO) and PANTHER protein class), and sequences are assigned to PANTHER pathways. The PANTHER website includes a suite of tools that enable users to browse and query gene functions, and to analyze large-scale experimental data with a number of statistical tests. It is widely used by bench scientists, bioinformaticians, computer scientists and systems biologists. In the 2013 release of PANTHER (v.8.0), in addition to an update of the data content, we redesigned the website interface to improve both user experience and the system's analytical capability. This protocol provides a detailed description of how to analyze genome-wide experimental data with the PANTHER classification system.",PANTHER,0.997525811,protein annotation through evolutionary relationship,0.985444486,PANTHER,0.997525811,2,NA,"23193289.0, 26578592.0, 27899595.0, 30407594.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +23193289,http://pantherdb.org-have,"PANTHER in 2013: modeling the evolution of gene function, and other gene attributes, in the context of phylogenetic trees. The data and tools in PANTHER-a comprehensive, curated database of protein families, trees, subfamilies and functions available at http://pantherdb.org-have undergone continual, extensive improvement for over a decade. Here, we describe the current PANTHER process as a whole, as well as the website tools for analysis of user-uploaded data. The main goals of PANTHER remain essentially unchanged: the accurate inference (and practical application) of gene and protein function over large sequence databases, using phylogenetic trees to extrapolate from the relatively sparse experimental information from a few model organisms. Yet the focus of PANTHER has continually shifted toward more accurate and detailed representations of evolutionary events in gene family histories. The trees are now designed to represent gene family evolution, including inference of evolutionary events, such as speciation and gene duplication. Subfamilies are still curated and used to define HMMs, but gene ontology functional annotations can now be made at any node in the tree, and are designed to represent gain and loss of function by ancestral genes during evolution. Finally, PANTHER now includes stable database identifiers for inferred ancestral genes, which are used to associate inferred gene attributes with particular genes in the common ancestral genomes of extant species.",PANTHER,0.996814907,NA,0,PANTHER,0.996814907,1,NA,"26578592.0, 27899595.0, 30407594.0, 23868073.0, 33290554.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/27/2012 +31733062,http://paramecium.i2bc.paris-saclay.fr,"ParameciumDB 2019: integrating genomic data across the genus for functional and evolutionary biology. ParameciumDB (https://paramecium.i2bc.paris-saclay.fr) is a community model organism database for the genome and genetics of the ciliate Paramecium. ParameciumDB development relies on the GMOD (www.gmod.org) toolkit. The ParameciumDB web site has been publicly available since 2006 when the P. tetraurelia somatic genome sequence was released, revealing that a series of whole genome duplications punctuated the evolutionary history of the species. The genome is linked to available genetic data and stocks. ParameciumDB has undergone major changes in its content and website since the last update published in 2011. Genomes from multiple Paramecium species, especially from the P. aurelia complex, are now included in ParameciumDB. A new modern web interface accompanies this transition to a database for the whole Paramecium genus. Gene pages have been enriched with orthology relationships, among the Paramecium species and with a panel of model organisms across the eukaryotic tree. This update also presents expert curation of Paramecium mitochondrial genomes.",ParameciumDB,0.996724844,NA,0,ParameciumDB,0.996724844,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24923818,http://crdd.osdd.net/raghava/parapep,"ParaPep: a web resource for experimentally validated antiparasitic peptide sequences and their structures. . ParaPep is a repository of antiparasitic peptides, which provides comprehensive information related to experimentally validated antiparasitic peptide sequences and their structures. The data were collected and compiled from published research papers, patents and from various databases. The current release of ParaPep holds 863 entries among which 519 are unique peptides. In addition to peptides having natural amino acids, ParaPep also consists of peptides having d-amino acids and chemically modified residues. In ParaPep, most of the peptides have been evaluated for growth inhibition of various species of Plasmodium, Leishmania and Trypanosoma. We have provided comprehensive information about these peptides that include peptide sequence, chemical modifications, stereochemistry, antiparasitic activity, origin, nature of peptide, assay types, type of parasite, mode of action and hemolytic activity. Structures of peptides consisting of natural, as well as modified amino acids have been determined using state-of-the-art software, PEPstr. To facilitate users, various user-friendly web tools, for data fetching, analysis and browsing, have been integrated. We hope that ParaPep will be advantageous in designing therapeutic peptides against parasitic diseases. Database URL: http://crdd.osdd.net/raghava/parapep/",ParaPep,0.996673524,NA,0,ParaPep,0.996673524,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/12/2014 +21593080,http://www2.cancer.ucl.ac.uk/Parkinson_Db2,"ParkDB: a Parkinson's disease gene expression database. Parkinson's disease (PD) is a common, adult-onset, neuro-degenerative disorder characterized by the degeneration of cardinal motor signs mainly due to the loss of dopaminergic neurons in the substantia nigra. To date, researchers still have limited understanding of the key molecular events that provoke neurodegeneration in this disease. Here, we present ParkDB, the first queryable database dedicated to gene expression in PD. ParkDB contains a complete set of re-analyzed, curated and annotated microarray datasets. This resource enables scientists to identify and compare expression signatures involved in PD and dopaminergic neuron differentiation under different biological conditions and across species. Database URL: http://www2.cancer.ucl.ac.uk/Parkinson_Db2/",ParkDB,0.997004271,NA,0,ParkDB,0.997004271,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/18/2011 +26612862,"http://mmb.irbbarcelona.org/BIGNASim/, http://mmb.irbbarcelona.org/BIGNASim/SuppMaterial","BIGNASim: a NoSQL database structure and analysis portal for nucleic acids simulation data. Molecular dynamics simulation (MD) is, just behind genomics, the bioinformatics tool that generates the largest amounts of data, and that is using the largest amount of CPU time in supercomputing centres. MD trajectories are obtained after months of calculations, analysed in situ, and in practice forgotten. Several projects to generate stable trajectory databases have been developed for proteins, but no equivalence exists in the nucleic acids world. We present here a novel database system to store MD trajectories and analyses of nucleic acids. The initial data set available consists mainly of the benchmark of the new molecular dynamics force-field, parmBSC1. It contains 156 simulations, with over 120 μs of total simulation time. A deposition protocol is available to accept the submission of new trajectory data. The database is based on the combination of two NoSQL engines, Cassandra for storing trajectories and MongoDB to store analysis results and simulation metadata. The analyses available include backbone geometries, helical analysis, NMR observables and a variety of mechanical analyses. Individual trajectories and combined meta-trajectories can be downloaded from the portal. The system is accessible through http://mmb.irbbarcelona.org/BIGNASim/. Supplementary Material is also available on-line at http://mmb.irbbarcelona.org/BIGNASim/SuppMaterial/.",parmBSC1,0.789278269,NA,0,parmBSC1,0.789278269,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,11/26/2015 +23448274,"http://hi.ustc.edu.cn:8080/PASmiR, http://pcsb.ahau.edu.cn:8080/PASmiR","PASmiR: a literature-curated database for miRNA molecular regulation in plant response to abiotic stress. Background Over 200 published studies of more than 30 plant species have reported a role for miRNAs in regulating responses to abiotic stresses. However, data from these individual reports has not been collected into a single database. The lack of a curated database of stress-related miRNAs limits research in this field, and thus a cohesive database system should necessarily be constructed for data deposit and further application. Description PASmiR, a literature-curated and web-accessible database, was developed to provide detailed, searchable descriptions of miRNA molecular regulation in different plant abiotic stresses. PASmiR currently includes data from ~200 published studies, representing 1038 regulatory relationships between 682 miRNAs and 35 abiotic stresses in 33 plant species. PASmiR's interface allows users to retrieve miRNA-stress regulatory entries by keyword search using plant species, abiotic stress, and miRNA identifier. Each entry upon keyword query contains detailed regulation information for a specific miRNA, including species name, miRNA identifier, stress name, miRNA expression pattern, detection method for miRNA expression, a reference literature, and target gene(s) of the miRNA extracted from the corresponding reference or miRBase. Users can also contribute novel regulatory entries by using a web-based submission page. The PASmiR database is freely accessible from the two URLs of http://hi.ustc.edu.cn:8080/PASmiR, and http://pcsb.ahau.edu.cn:8080/PASmiR. Conclusion The PASmiR database provides a solid platform for collection, standardization, and searching of miRNA-abiotic stress regulation data in plants. As such this database will be a comprehensive repository for miRNA regulatory mechanisms involved in plant response to abiotic stresses for the plant stress physiology community.",PASmiR,0.993088782,NA,0,PASmiR,0.993088782,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2013 +33363449,http://musaelab.ca/pass-database,"PASS: A Multimodal Database of Physical Activity and Stress for Mobile Passive Body/ Brain-Computer Interface Research. With the burgeoning of wearable devices and passive body/brain-computer interfaces (B/BCIs), automated stress monitoring in everyday settings has gained significant attention recently, with applications ranging from serious games to clinical monitoring. With mobile users, however, challenges arise due to other overlapping (and potentially confounding) physiological responses (e.g., due to physical activity) that may mask the effects of stress, as well as movement artifacts that can be introduced in the measured signals. For example, the classical increase in heart rate can no longer be attributed solely to stress and could be caused by the activity itself. This makes the development of mobile passive B/BCIs challenging. In this paper, we introduce PASS, a multimodal database of Physical Activity and StresS collected from 48 participants. Participants performed tasks of varying stress levels at three different activity levels and provided quantitative ratings of their perceived stress and fatigue levels. To manipulate stress, two video games (i.e., a calm exploration game and a survival game) were used. Peripheral physical activity (electrocardiography, electrodermal activity, breathing, skin temperature) as well as cerebral activity (electroencephalography) were measured throughout the experiment. A complete description of the experimental protocol is provided and preliminary analyses are performed to investigate the physiological reactions to stress in the presence of physical activity. The PASS database, including raw data and subjective ratings has been made available to the research community at http://musaelab.ca/pass-database/. It is hoped that this database will help advance mobile passive B/BCIs for use in everyday settings.",PASS,0.929875195,NA,0,PASS,0.929875195,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/8/2020 +22123743,http://caps.ncbs.res.in/pass2,"PASS2 version 4: an update to the database of structure-based sequence alignments of structural domain superfamilies. Accurate structure-based sequence alignments of distantly related proteins are crucial in gaining insight about protein domains that belong to a superfamily. The PASS2 database provides alignments of proteins related at the superfamily level and are characterized by low sequence identity. We thus report an automated, updated version of the superfamily alignment database known as PASS2.4, consisting of 1961 superfamilies and 10,569 protein domains, which is in direct correspondence with SCOP (1.75) database. Database organization, improved methods for efficient structure-based sequence alignments and the analysis of extreme distantly related proteins within superfamilies formed the focus of this update. Alignment of family-specific functional residues can be realized using such alignments and is shown using one superfamily as an example. The database of alignments and other related features can be accessed at http://caps.ncbs.res.in/pass2/.",PASS2.4,0.720472276,NA,0,PASS2.4,0.720472276,1,26553811,NA,low_prob_best_name,do not remove,conflicting record(s) to be removed,NA,NA,NA,NA,11/28/2011 +31733064,http://ophid.utoronto.ca/pathDIP,"pathDIP 4: an extended pathway annotations and enrichment analysis resource for human, model organisms and domesticated species. PathDIP was introduced to increase proteome coverage of literature-curated human pathway databases. PathDIP 4 now integrates 24 major databases. To further reduce the number of proteins with no curated pathway annotation, pathDIP integrates pathways with physical protein-protein interactions (PPIs) to predict significant physical associations between proteins and curated pathways. For human, it provides pathway annotations for 5366 pathway orphans. Integrated pathway annotation now includes six model organisms and ten domesticated animals. A total of 6401 core and ortholog pathways have been curated from the literature or by annotating orthologs of human proteins in the literature-curated pathways. Extended pathways are the result of combining these pathways with protein-pathway associations that are predicted using organism-specific PPIs. Extended pathways expand proteome coverage from 81 088 to 120 621 proteins, making pathDIP 4 the largest publicly available pathway database for these organisms and providing a necessary platform for comprehensive pathway-enrichment analysis. PathDIP 4 users can customize their search and analysis by selecting organism, identifier and subset of pathways. Enrichment results and detailed annotations for input list can be obtained in different formats and views. To support automated bioinformatics workflows, Java, R and Python APIs are available for batch pathway annotation and enrichment analysis. PathDIP 4 is publicly available at http://ophid.utoronto.ca/pathDIP.",PathDIP,0.994651794,NA,0,PathDIP,0.994651794,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +34521345,http://pathfams.uwaterloo.ca,"PathFams: statistical detection of pathogen-associated protein domains. Background A substantial fraction of genes identified within bacterial genomes encode proteins of unknown function. Identifying which of these proteins represent potential virulence factors, and mapping their key virulence determinants, is a challenging but important goal. Results To facilitate virulence factor discovery, we performed a comprehensive analysis of 17,929 protein domain families within the Pfam database, and scored them based on their overrepresentation in pathogenic versus non-pathogenic species, taxonomic distribution, relative abundance in metagenomic datasets, and other factors. Conclusions We identify pathogen-associated domain families, candidate virulence factors in the human gut, and eukaryotic-like mimicry domains with likely roles in virulence. Furthermore, we provide an interactive database called PathFams to allow users to explore pathogen-associated domains as well as identify pathogen-associated domains and domain architectures in user-uploaded sequences of interest. PathFams is freely available at https://pathfams.uwaterloo.ca .",PathFams,0.991846502,NA,0,PathFams,0.991846502,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/14/2021 +31160594,http://patho.phenomebrowser.net,"PathoPhenoDB, linking human pathogens to their phenotypes in support of infectious disease research. Understanding the relationship between the pathophysiology of infectious disease, the biology of the causative agent and the development of therapeutic and diagnostic approaches is dependent on the synthesis of a wide range of types of information. Provision of a comprehensive and integrated disease phenotype knowledgebase has the potential to provide novel and orthogonal sources of information for the understanding of infectious agent pathogenesis, and support for research on disease mechanisms. We have developed PathoPhenoDB, a database containing pathogen-to-phenotype associations. PathoPhenoDB relies on manual curation of pathogen-disease relations, on ontology-based text mining as well as manual curation to associate host disease phenotypes with infectious agents. Using Semantic Web technologies, PathoPhenoDB also links to knowledge about drug resistance mechanisms and drugs used in the treatment of infectious diseases. PathoPhenoDB is accessible at http://patho.phenomebrowser.net/ , and the data are freely available through a public SPARQL endpoint.",PathoPhenoDB,0.995027721,henomebrowser,0.623123646,PathoPhenoDB,0.995027721,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/3/2019 +24727366,http://www.pathoplant.de/expression_analysis.php,"'In silico expression analysis', a novel PathoPlant web tool to identify abiotic and biotic stress conditions associated with specific cis-regulatory sequences. Using bioinformatics, putative cis-regulatory sequences can be easily identified using pattern recognition programs on promoters of specific gene sets. The abundance of predicted cis-sequences is a major challenge to associate these sequences with a possible function in gene expression regulation. To identify a possible function of the predicted cis-sequences, a novel web tool designated 'in silico expression analysis' was developed that correlates submitted cis-sequences with gene expression data from Arabidopsis thaliana. The web tool identifies the A. thaliana genes harbouring the sequence in a defined promoter region and compares the expression of these genes with microarray data. The result is a hierarchy of abiotic and biotic stress conditions to which these genes are most likely responsive. When testing the performance of the web tool, known cis-regulatory sequences were submitted to the 'in silico expression analysis' resulting in the correct identification of the associated stress conditions. When using a recently identified novel elicitor-responsive sequence, a WT-box (CGACTTTT), the 'in silico expression analysis' predicts that genes harbouring this sequence in their promoter are most likely Botrytis cinerea induced. Consistent with this prediction, the strongest induction of a reporter gene harbouring this sequence in the promoter is observed with B. cinerea in transgenic A. thaliana. DATABASE URL: http://www.pathoplant.de/expression_analysis.php.",PathoPlant,0.994976938,NA,0,PathoPlant,0.994976938,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/10/2014 +27625390,http://pathoyeastract.org,"The PathoYeastract database: an information system for the analysis of gene and genomic transcription regulation in pathogenic yeasts. We present the PATHOgenic YEAst Search for Transcriptional Regulators And Consensus Tracking (PathoYeastract - http://pathoyeastract.org) database, a tool for the analysis and prediction of transcription regulatory associations at the gene and genomic levels in the pathogenic yeasts Candida albicans and C. glabrata Upon data retrieval from hundreds of publications, followed by curation, the database currently includes 28 000 unique documented regulatory associations between transcription factors (TF) and target genes and 107 DNA binding sites, considering 134 TFs in both species. Following the structure used for the YEASTRACT database, PathoYeastract makes available bioinformatics tools that enable the user to exploit the existing information to predict the TFs involved in the regulation of a gene or genome-wide transcriptional response, while ranking those TFs in order of their relative importance. Each search can be filtered based on the selection of specific environmental conditions, experimental evidence or positive/negative regulatory effect. Promoter analysis tools and interactive visualization tools for the representation of TF regulatory networks are also provided. The PathoYeastract database further provides simple tools for the prediction of gene and genomic regulation based on orthologous regulatory associations described for other yeast species, a comparative genomics setup for the study of cross-species evolution of regulatory networks.",PathoYeastract,0.99473387,Search for Transcriptional Regulators And,0.811140098,PathoYeastract,0.99473387,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/12/2016 +25591449,http://proteomeview.hupo.org.cn/PathPPI/PathPPI.html,"PathPPI: an integrated dataset of human pathways and protein-protein interactions. Integration of pathway and protein-protein interaction (PPI) data can provide more information that could lead to new biological insights. PPIs are usually represented by a simple binary model, whereas pathways are represented by more complicated models. We developed a series of rules for transforming protein interactions from pathway to binary model, and the protein interactions from seven pathway databases, including PID, BioCarta, Reactome, NetPath, INOH, SPIKE and KEGG, were transformed based on these rules. These pathway-derived binary protein interactions were integrated with PPIs from other five PPI databases including HPRD, IntAct, BioGRID, MINT and DIP, to develop integrated dataset (named PathPPI). More detailed interaction type and modification information on protein interactions can be preserved in PathPPI than other existing datasets. Comparison analysis results indicate that most of the interaction overlaps values (O AB) among these pathway databases were less than 5%, and these databases must be used conjunctively. The PathPPI data was provided at http://proteomeview.hupo.org.cn/PathPPI/PathPPI.html.",PathPPI,0.955567122,NA,0,PathPPI,0.955567122,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/15/2015 +31647099,http://www.pathwaycommons.org,"Pathway Commons 2019 Update: integration, analysis and exploration of pathway data. Pathway Commons (https://www.pathwaycommons.org) is an integrated resource of publicly available information about biological pathways including biochemical reactions, assembly of biomolecular complexes, transport and catalysis events and physical interactions involving proteins, DNA, RNA, and small molecules (e.g. metabolites and drug compounds). Data is collected from multiple providers in standard formats, including the Biological Pathway Exchange (BioPAX) language and the Proteomics Standards Initiative Molecular Interactions format, and then integrated. Pathway Commons provides biologists with (i) tools to search this comprehensive resource, (ii) a download site offering integrated bulk sets of pathway data (e.g. tables of interactions and gene sets), (iii) reusable software libraries for working with pathway information in several programming languages (Java, R, Python and Javascript) and (iv) a web service for programmatically querying the entire dataset. Visualization of pathways is supported using the Systems Biological Graphical Notation (SBGN). Pathway Commons currently contains data from 22 databases with 4794 detailed human biochemical processes (i.e. pathways) and ∼2.3 million interactions. To enhance the usability of this large resource for end-users, we develop and maintain interactive web applications and training materials that enable pathway exploration and advanced analysis.",Pathway Commons,0.995383561,NA,0,Pathway Commons,0.995383561,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +30395323,http://www.patlas.site,"Plasmid ATLAS: plasmid visual analytics and identification in high-throughput sequencing data. Plasmid ATLAS (pATLAS, http://www.patlas.site) provides an easy-to-use web accessible database with visual analytics tools to explore the relationships of plasmids available in NCBI's RefSeq database. pATLAS has two main goals: (i) to provide an easy way to search for plasmids deposited in NCBI RefSeq and their associated metadata; (ii) to visualize the relationships of plasmids in a graph, allowing the exploration of plasmid evolution. pATLAS allows searching by plasmid name, bacterial host taxa, antibiotic resistance and virulence genes, plasmid families, and by sequence length and similarity. pATLAS is also able to represent in the plasmid network, plasmid sets identified by external pipelines using mapping, mash screen or assembly from high-throughput sequencing data. By representing the identified hits within the network of relationships between plasmids, allowing the possibility of removing redundant results, and by taking advantage of the browsing capabilities of pATLAS, users can more easily interpret the pipelines' results. All these analyses can be saved to a JSON file for sharing and future re-evaluation. Furthermore, by offering a REST-API, the pATLAS database and network display are easily accessible by other interfaces or pipelines.",pATLAS,0.996197641,NA,0,pATLAS,0.996197641,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +"24225323, 31667520",http://www.patricbrc.org,"PATRIC, the bacterial bioinformatics database and analysis resource. The Pathosystems Resource Integration Center (PATRIC) is the all-bacterial Bioinformatics Resource Center (BRC) (http://www.patricbrc.org). A joint effort by two of the original National Institute of Allergy and Infectious Diseases-funded BRCs, PATRIC provides researchers with an online resource that stores and integrates a variety of data types [e.g. genomics, transcriptomics, protein-protein interactions (PPIs), three-dimensional protein structures and sequence typing data] and associated metadata. Datatypes are summarized for individual genomes and across taxonomic levels. All genomes in PATRIC, currently more than 10,000, are consistently annotated using RAST, the Rapid Annotations using Subsystems Technology. Summaries of different data types are also provided for individual genes, where comparisons of different annotations are available, and also include available transcriptomic data. PATRIC provides a variety of ways for researchers to find data of interest and a private workspace where they can store both genomic and gene associations, and their own private data. Both private and public data can be analyzed together using a suite of tools to perform comparative genomic or transcriptomic analysis. PATRIC also includes integrated information related to disease and PPIs. All the data and integrated analysis and visualization tools are freely available. This manuscript describes updates to the PATRIC since its initial report in the 2007 NAR Database Issue.",PATRIC,0.997826576,Center,0.560106337,PATRIC,0.997826576,2,21712250,25273106,NA,NA,conflicting record(s) to be removed,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2020 +25273106,http://patricbrc.org,"Curation, integration and visualization of bacterial virulence factors in PATRIC. Motivation We've developed a highly curated bacterial virulence factor (VF) library in PATRIC (Pathosystems Resource Integration Center, www.patricbrc.org) to support infectious disease research. Although several VF databases are available, there is still a need to incorporate new knowledge found in published experimental evidence and integrate these data with other information known for these specific VF genes, including genomic and other omics data. This integration supports the identification of VFs, comparative studies and hypothesis generation, which facilitates the understanding of virulence and pathogenicity. Results We have manually curated VFs from six prioritized NIAID (National Institute of Allergy and Infectious Diseases) category A-C bacterial pathogen genera, Mycobacterium, Salmonella, Escherichia, Shigella, Listeria and Bartonella, using published literature. This curated information on virulence has been integrated with data from genomic functional annotations, trancriptomic experiments, protein-protein interactions and disease information already present in PATRIC. Such integration gives researchers access to a broad array of information about these individual genes, and also to a suite of tools to perform comparative genomic and transcriptomics analysis that are available at PATRIC. Availability and implementation All tools and data are freely available at PATRIC (http://patricbrc.org). Supplementary information Supplementary data are available at Bioinformatics online.",PATRIC,0.997595549,NA,0,PATRIC,0.997595549,1,NA,"24225323.0, 31667520.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,9/30/2014 +"23093593, 28053164",http://pave.niaid.nih.gov,"The Papillomavirus Episteme: a central resource for papillomavirus sequence data and analysis. The goal of the Papillomavirus Episteme (PaVE) is to provide an integrated resource for the analysis of papillomavirus (PV) genome sequences and related information. The PaVE is a freely accessible, web-based tool (http://pave.niaid.nih.gov) created around a relational database, which enables storage, analysis and exchange of sequence information. From a design perspective, the PaVE adopts an Open Source software approach and stresses the integration and reuse of existing tools. Reference PV genome sequences have been extracted from publicly available databases and reannotated using a custom-created tool. To date, the PaVE contains 241 annotated PV genomes, 2245 genes and regions, 2004 protein sequences and 47 protein structures, which users can explore, analyze or download. The PaVE provides scientists with the data and tools needed to accelerate scientific progress for the study and treatment of diseases caused by PVs.",PaVE,0.990848164,Papillomavirus Episteme,0.861218005,PaVE,0.990848164,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/5/2016 +22535208,http://pax-db.org,"PaxDb, a database of protein abundance averages across all three domains of life. Although protein expression is regulated both temporally and spatially, most proteins have an intrinsic, ""typical"" range of functionally effective abundance levels. These extend from a few molecules per cell for signaling proteins, to millions of molecules for structural proteins. When addressing fundamental questions related to protein evolution, translation and folding, but also in routine laboratory work, a simple rough estimate of the average wild type abundance of each detectable protein in an organism is often desirable. Here, we introduce a meta-resource dedicated to integrating information on absolute protein abundance levels; we place particular emphasis on deep coverage, consistent post-processing and comparability across different organisms. Publicly available experimental data are mapped onto a common namespace and, in the case of tandem mass spectrometry data, re-processed using a standardized spectral counting pipeline. By aggregating and averaging over the various samples, conditions and cell-types, the resulting integrated data set achieves increased coverage and a high dynamic range. We score and rank each contributing, individual data set by assessing its consistency against externally provided protein-network information, and demonstrate that our weighted integration exhibits more consistency than the data sets individually. The current PaxDb-release 2.1 (at http://pax-db.org/) presents whole-organism data as well as tissue-resolved data, and covers 85,000 proteins in 12 model organisms. All values can be seamlessly compared across organisms via pre-computed orthology relationships.",PaxDb,0.987990916,NA,0,PaxDb,0.987990916,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/24/2012 +31950190,http://www.sysbio.org.cn/pcalistdb,"PCaLiStDB: a lifestyle database for precision prevention of prostate cancer. . The interaction between genes, lifestyles and environmental factors makes the genesis and progress of prostate cancer (PCa) very heterogeneous. Positive lifestyle is important to the prevention and controlling of PCa. To investigate the relationship between PCa and lifestyle at systems level, we established a PCa related lifestyle database (PCaLiStDB) and collected the PCa-related lifestyles including foods, nutrients, life habits and social and environmental factors as well as associated genes and physiological and biochemical indexes together with the disease phenotypes and drugs. Data format standardization was implemented for the future Lifestyle-Wide Association Studies of PCa (PCa_LWAS). Currently, 2290 single-factor lifestyles and 856 joint effects of two or more lifestyles were collected. Among these, 394 are protective factors, 556 are risk factors, 45 are no-influencing factors, 52 are factors with contradictory views and 1977 factors are lacking effective literatures support. PCaLiStDB is expected to facilitate the prevention and control of PCa, as well as the promotion of mechanistic study of lifestyles on PCa. Database URL: http://www.sysbio.org.cn/pcalistdb/.",PCaLiStDB,0.995350957,PCa,0.603868067,PCaLiStDB,0.995350957,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +32810235,"http://pcat.zhenglab.info, http://www.pedtranscriptome.org","PCAT: an integrated portal for genomic and preclinical testing data of pediatric cancer patient-derived xenograft models. Although cancer is the leading cause of disease-related mortality in children, the relative rarity of pediatric cancers poses a significant challenge for developing novel therapeutics to further improve prognosis. Patient-derived xenograft (PDX) models, which are usually developed from high-risk tumors, are a useful platform to study molecular driver events, identify biomarkers and prioritize therapeutic agents. Here, we develop PDX for Childhood Cancer Therapeutics (PCAT), a new integrated portal for pediatric cancer PDX models. Distinct from previously reported PDX portals, PCAT is focused on pediatric cancer models and provides intuitive interfaces for querying and data mining. The current release comprises 324 models and their associated clinical and genomic data, including gene expression, mutation and copy number alteration. Importantly, PCAT curates preclinical testing results for 68 models and 79 therapeutic agents manually collected from individual agent testing studies published since 2008. To facilitate comparisons of patterns between patient tumors and PDX models, PCAT curates clinical and molecular data of patient tumors from the TARGET project. In addition, PCAT provides access to gene fusions identified in nearly 1000 TARGET samples. PCAT was built using R-shiny and MySQL. The portal can be accessed at http://pcat.zhenglab.info or http://www.pedtranscriptome.org.",PCAT,0.967368126,PDX for Childhood Cancer Therapeutics,0.790382832,PCAT,0.967368126,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +22800569,http://strees.protres.ru,"Modeling of folds and folding pathways for some protein families of (α + β)- and (α/β)-classes. In this paper, updated structural trees for α/β-proteins containing five- and seven-segment (α/β)-motifs are represented. Novel structural motifs occurring in some families of (α + β)- and (α/β)-proteins are also characterized. Databases of these proteins have been compiled from the Protein Data Bank (PDB) and Structural Classification of Proteins (SCOP) and the corresponding structural trees have been constructed. The classification of these proteins has been developed and organized as an extension of the PCBOST database, which is available at http://strees.protres.ru . In total, the updated Protein Classification Based on Structural Trees database contains 11 structural trees, 106 levels, 635 folds, 4911 proteins and domains, and 14,202 PDB entries.",PCBOST,0.969115496,Protein Classification Based on Structural Trees,0.917069605,PCBOST,0.969115496,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/16/2012 +24839966,http://www.pancreaticcancerdatabase.org,"Pancreatic Cancer Database: an integrative resource for pancreatic cancer. Pancreatic cancer is the fourth leading cause of cancer-related death in the world. The etiology of pancreatic cancer is heterogeneous with a wide range of alterations that have already been reported at the level of the genome, transcriptome, and proteome. The past decade has witnessed a large number of experimental studies using high-throughput technology platforms to identify genes whose expression at the transcript or protein levels is altered in pancreatic cancer. Based on expression studies, a number of molecules have also been proposed as potential biomarkers for diagnosis and prognosis of this deadly cancer. Currently, there are no repositories which provide an integrative view of multiple Omics data sets from published research on pancreatic cancer. Here, we describe the development of a web-based resource, Pancreatic Cancer Database (http://www.pancreaticcancerdatabase.org), as a unified platform for pancreatic cancer research. PCD contains manually curated information pertaining to quantitative alterations in miRNA, mRNA, and proteins obtained from small-scale as well as high-throughput studies of pancreatic cancer tissues and cell lines. We believe that PCD will serve as an integrative platform for scientific community involved in pancreatic cancer research.",PCD,0.993127882,Pancreatic Cancer Database,0.967810631,PCD,0.993127882,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/19/2014 +"22674824, 27613420",http://pcddb.cryst.bbk.ac.uk,"Circular dichroism spectral data and metadata in the Protein Circular Dichroism Data Bank (PCDDB): a tutorial guide to accession and deposition. The Protein Circular Dichroism Data Bank (PCDDB) is a web-based resource containing circular dichroism (CD) and synchrotron radiation circular dichroism spectral and associated metadata located at http://pcddb.cryst.bbk.ac.uk. This resource provides a freely available, user-friendly means of accessing validated CD spectra and their associated experimental details and metadata, thereby enabling broad usage of this material and new developments across the structural biology, chemistry, and bioinformatics communities. The resource also enables researchers utilizing CD as an experimental technique to have a means of storing their data at a secure site from which it is easily retrievable, thereby making their results publicly accessible, a current requirement of many grant-funding agencies world-wide, as well as meeting the data-sharing requirements for journal publications. This tutorial provides extensive information on searching, accessing, and downloading procedures for those who wish to utilize the data available in the data bank, and detailed information on deposition procedures for creating and validating entries, including comprehensive explanations of their contents and formats, for those who wish to include their data in the data bank.",PCDDB,0.998229384,Protein Circular Dichroism Data Bank,0.960739791,PCDDB,0.998229384,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/8/2016 +23282181,http://h-invitational.jp/hinv/pcdq,"PCDq: human protein complex database with quality index which summarizes different levels of evidences of protein complexes predicted from h-invitational protein-protein interactions integrative dataset. Background Proteins interact with other proteins or biomolecules in complexes to perform cellular functions. Existing protein-protein interaction (PPI) databases and protein complex databases for human proteins are not organized to provide protein complex information or facilitate the discovery of novel subunits. Data integration of PPIs focused specifically on protein complexes, subunits, and their functions. Predicted candidate complexes or subunits are also important for experimental biologists. Description Based on integrated PPI data and literature, we have developed a human protein complex database with a complex quality index (PCDq), which includes both known and predicted complexes and subunits. We integrated six PPI data (BIND, DIP, MINT, HPRD, IntAct, and GNP_Y2H), and predicted human protein complexes by finding densely connected regions in the PPI networks. They were curated with the literature so that missing proteins were complemented and some complexes were merged, resulting in 1,264 complexes comprising 9,268 proteins with 32,198 PPIs. The evidence level of each subunit was assigned as a categorical variable. This indicated whether it was a known subunit, and a specific function was inferable from sequence or network analysis. To summarize the categories of all the subunits in a complex, we devised a complex quality index (CQI) and assigned it to each complex. We examined the proportion of consistency of Gene Ontology (GO) terms among protein subunits of a complex. Next, we compared the expression profiles of the corresponding genes and found that many proteins in larger complexes tend to be expressed cooperatively at the transcript level. The proportion of duplicated genes in a complex was evaluated. Finally, we identified 78 hypothetical proteins that were annotated as subunits of 82 complexes, which included known complexes. Of these hypothetical proteins, after our prediction had been made, four were reported to be actual subunits of the assigned protein complexes. Conclusions We constructed a new protein complex database PCDq including both predicted and curated human protein complexes. CQI is a useful source of experimentally confirmed information about protein complexes and subunits. The predicted protein complexes can provide functional clues about hypothetical proteins. PCDq is freely available at http://h-invitational.jp/hinv/pcdq/.",PCDq,0.992855191,complex,0.558051407,PCDq,0.992855191,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/12/2012 +28053167,http://bis.zju.edu.cn/pcernadb/index.jsp,"PceRBase: a database of plant competing endogenous RNA. Competition for microRNA (miRNA) binding between RNA molecules has emerged as a novel mechanism for the regulation of eukaryotic gene expression. Competing endogenous RNA (ceRNA) can act as decoys for miRNA binding, thereby forming a ceRNA network by regulating the abundance of other RNA transcripts which share the same or similar microRNA response elements. Although this type of RNA cross talk was first described in Arabidopsis, and was subsequently shown to be active in animal models, there is no database collecting potential ceRNA data for plants. We have developed a Plant ceRNA database (PceRBase, http://bis.zju.edu.cn/pcernadb/index.jsp) which contains potential ceRNA target-target, and ceRNA target-mimic pairs from 26 plant species. For example, in Arabidopsis lyrata, 311 candidate ceRNAs are identified which could affect 2646 target-miRNA-target interactions. Predicted pairing structure between miRNAs and their target mRNA transcripts, expression levels of ceRNA pairs and associated GO annotations are also stored in the database. A web interface provides convenient browsing and searching for specific genes of interest. Tools are available for the visualization and enrichment analysis of genes in the ceRNA networks. Moreover, users can use PceRBase to predict novel competing mimic-target and target-target interactions from their own data.",PceRBase,0.983400321,ceRNA database,0.803619842,PceRBase,0.983400321,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/7/2016 +24569397,http://crdd.osdd.net/raghava/pcmdb,"PCMdb: pancreatic cancer methylation database. Pancreatic cancer is the fifth most aggressive malignancy and urgently requires new biomarkers to facilitate early detection. For providing impetus to the biomarker discovery, we have developed Pancreatic Cancer Methylation Database (PCMDB, http://crdd.osdd.net/raghava/pcmdb/), a comprehensive resource dedicated to methylation of genes in pancreatic cancer. Data was collected and compiled manually from published literature. PCMdb has 65907 entries for methylation status of 4342 unique genes. In PCMdb, data was compiled for both cancer cell lines (53565 entries for 88 cell lines) and cancer tissues (12342 entries for 3078 tissue samples). Among these entries, 47.22% entries reported a high level of methylation for the corresponding genes while 10.87% entries reported low level of methylation. PCMdb covers five major subtypes of pancreatic cancer; however, most of the entries were compiled for adenocarcinomas (88.38%) and mucinous neoplasms (5.76%). A user-friendly interface has been developed for data browsing, searching and analysis. We anticipate that PCMdb will be helpful for pancreatic cancer biomarker discovery.",PCMdb,0.994843006,Pancreatic Cancer Methylation Database,0.990767524,PCMdb,0.994843006,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/26/2014 +28011869,http://pcomdb.lowtem.hokudai.ac.jp/proteins/top,"PCoM-DB Update: A Protein Co-Migration Database for Photosynthetic Organisms. The identification of protein complexes is important for the understanding of protein structure and function and the regulation of cellular processes. We used blue-native PAGE and tandem mass spectrometry to identify protein complexes systematically, and built a web database, the protein co-migration database (PCoM-DB, http://pcomdb.lowtem.hokudai.ac.jp/proteins/top), to provide prediction tools for protein complexes. PCoM-DB provides migration profiles for any given protein of interest, and allows users to compare them with migration profiles of other proteins, showing the oligomeric states of proteins and thus identifying potential interaction partners. The initial version of PCoM-DB (launched in January 2013) included protein complex data for Synechocystis whole cells and Arabidopsis thaliana thylakoid membranes. Here we report PCoM-DB version 2.0, which includes new data sets and analytical tools. Additional data are included from whole cells of the pelagic marine picocyanobacterium Prochlorococcus marinus, the thermophilic cyanobacterium Thermosynechococcus elongatus, the unicellular green alga Chlamydomonas reinhardtii and the bryophyte Physcomitrella patens. The Arabidopsis protein data now include data for intact mitochondria, intact chloroplasts, chloroplast stroma and chloroplast envelopes. The new tools comprise a multiple-protein search form and a heat map viewer for protein migration profiles. Users can compare migration profiles of a protein of interest among different organelles or compare migration profiles among different proteins within the same sample. For Arabidopsis proteins, users can compare migration profiles of a protein of interest with putative homologous proteins from non-Arabidopsis organisms. The updated PCoM-DB will help researchers find novel protein complexes and estimate their evolutionary changes in the green lineage.",PCoM-DB,0.992738867,protein co-migration database,0.720944236,PCoM-DB,0.992738867,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +31725861,http://pcosbase.org,"PCOSBase: a manually curated database of polycystic ovarian syndrome. . Polycystic ovarian syndrome (PCOS) is one of the main causes of infertility and affects 5-20% women of reproductive age. Despite the increased prevalence of PCOS, the mechanisms involved in its pathogenesis and pathophysiology remains unclear. The expansion of omics on studying the mechanisms of PCOS has lead into vast amounts of proteins related to PCOS resulting to a challenge in collating and depositing this deluge of data into one place. A knowledge-based repository named as PCOSBase was developed to systematically store all proteins related to PCOS. These proteins were compiled from various online databases and published expression studies. Rigorous criteria were developed to identify those that were highly related to PCOS. They were manually curated and analysed to provide additional information on gene ontologies, pathways, domains, tissue localizations and diseases that associate with PCOS. Other proteins that might interact with PCOS-related proteins identified from this study were also included. Currently, 8185 PCOS-related proteins were identified and assigned to 13 237 gene ontology vocabulary, 1004 pathways, 7936 domains, 29 disease classes, 1928 diseases, 91 tissues and 320 472 interactions. All publications related to PCOS are also indexed in PCOSBase. Data entries are searchable in the main page, search, browse and datasets tabs. Protein advanced search is provided to search for specific proteins. To date, PCOSBase has the largest collection of PCOS-related proteins. PCOSBase aims to become a self-contained database that can be used to further understand the PCOS pathogenesis and towards the identification of potential PCOS biomarkers. Database URL: http://pcosbase.org.",PCOSBase,0.996945679,NA,0,PCOSBase,0.996945679,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +26578565,http://pcoskb.bicnirrh.res.in,"PCOSKB: A KnowledgeBase on genes, diseases, ontology terms and biochemical pathways associated with PolyCystic Ovary Syndrome. Polycystic ovary syndrome (PCOS) is one of the major causes of female subfertility worldwide and ≈ 7-10% of women in reproductive age are affected by it. The affected individuals exhibit varying types and levels of comorbid conditions, along with the classical PCOS symptoms. Extensive studies on PCOS across diverse ethnic populations have resulted in a plethora of information on dysregulated genes, gene polymorphisms and diseases linked to PCOS. However, efforts have not been taken to collate and link these data. Our group, for the first time, has compiled PCOS-related information available through scientific literature; cross-linked it with molecular, biochemical and clinical databases and presented it as a user-friendly, web-based online knowledgebase for the benefit of the scientific and clinical community. Manually curated information on associated genes, single nucleotide polymorphisms, diseases, gene ontology terms and pathways along with supporting reference literature has been collated and included in PCOSKB (http://pcoskb.bicnirrh.res.in).",PCOSKB,0.997247696,NA,0,PCOSKB,0.997247696,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +32895427,http://www.pcoskb.bicnirrh.res.in,"PCOSKBR2: a database of genes, diseases, pathways, and networks associated with polycystic ovary syndrome. PolyCystic Ovary Syndrome KnowledgeBase (PCOSKBR2) is a manually curated database with information on 533 genes, 145 SNPs, 29 miRNAs, 1,150 pathways, and 1,237 diseases associated with PCOS. This data has been retrieved based on evidence gleaned by critically reviewing literature and related records available for PCOS in databases such as KEGG, DisGeNET, OMIM, GO, Reactome, STRING, and dbSNP. Since PCOS is associated with multiple genes and comorbidities, data mining algorithms for comorbidity prediction and identification of enriched pathways and hub genes are integrated in PCOSKBR2, making it an ideal research platform for PCOS. PCOSKBR2 is freely accessible at http://www.pcoskb.bicnirrh.res.in/ .",PCOSKBR2,0.997740662,PolyCystic Ovary Syndrome KnowledgeBase,0.975534484,PCOSKBR2,0.997740662,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/7/2020 +33997360,http://p450.biodesign.ac.cn,"PCPD: Plant cytochrome P450 database and web-based tools for structural construction and ligand docking. Plant cytochrome P450s play key roles in the diversification and functional modification of plant natural products. Although over 200,000 plant P450 gene sequences have been recorded, only seven crystalized P450 genes severely hampered the functional characterization, gene mining and engineering of important P450s. Here, we combined Rosetta homologous modeling and MD-based refinement to construct a high-resolution P450 structure prediction process (PCPCM), which was applied to 181 plant P450s with identified functions. Furthermore, we constructed a ligand docking process (PCPLD) that can be applied for plant P450s virtual screening. 10 examples of virtual screening indicated the process can reduce about 80% screening space for next experimental verification. Finally, we constructed a plant P450 database (PCPD: http://p450.biodesign.ac.cn/), which includes the sequences, structures and functions of the 181 plant P450s, and a web service based on PCPCM and PCPLD. Our study not only developed methods for the P450-specific structure analysis, but also introduced a universal approach that can assist the mining and functional analysis of P450 enzymes.",PCPD,0.983033021,Plant cytochrome P450 database,0.944938992,PCPD,0.983033021,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/24/2021 +28365721,http://bdg.hfut.edu.cn/pcppi/index.html,"PCPPI: a comprehensive database for the prediction of Penicillium-crop protein-protein interactions. . Penicillium expansum , the causal agent of blue mold, is one of the most prevalent post-harvest pathogens, infecting a wide range of crops after harvest. In response, crops have evolved various defense systems to protect themselves against this and other pathogens. Penicillium -crop interaction is a multifaceted process and mediated by pathogen- and host-derived proteins. Identification and characterization of the inter-species protein-protein interactions (PPIs) are fundamental to elucidating the molecular mechanisms underlying infection processes between P. expansum and plant crops. Here, we have developed PCPPI, the Penicillium -Crop Protein-Protein Interactions database, which is constructed based on the experimentally determined orthologous interactions in pathogen-plant systems and available domain-domain interactions (DDIs) in each PPI. Thus far, it stores information on 9911 proteins, 439 904 interactions and seven host species, including apple, kiwifruit, maize, pear, rice, strawberry and tomato. Further analysis through the gene ontology (GO) annotation indicated that proteins with more interacting partners tend to execute the essential function. Significantly, semantic statistics of the GO terms also provided strong support for the accuracy of our predicted interactions in PCPPI. We believe that all the PCPPI datasets are helpful to facilitate the study of pathogen-crop interactions and freely available to the research community. : http://bdg.hfut.edu.cn/pcppi/index.html.",PCPPI,0.994418144,Penicillium -Crop Protein-Protein Interactions,0.860982812,PCPPI,0.994418144,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +29040761,http://systemsbiology.cau.edu.cn/chromstates,"PCSD: a plant chromatin state database. Genome-wide maps of chromatin states have become a powerful representation of genome annotation and regulatory activity. We collected public and in-house plant epigenomic data sets and applied a Hidden Markov Model to define chromatin states, which included 290 553 (36 chromatin states), 831 235 (38 chromatin states) and 3 936 844 (26 chromatin states) segments across the whole genome of Arabidopsis thaliana, Oryza sativa and Zea mays, respectively. We constructed a Plant Chromatin State Database (PCSD, http://systemsbiology.cau.edu.cn/chromstates) to integrate detailed information about chromatin states, including the features and distribution of states, segments in states and related genes with segments. The self-organization mapping (SOM) results for these different chromatin signatures and UCSC Genome Browser for visualization were also integrated into the PCSD database. We further provided differential SOM maps between two epigenetic marks for chromatin state comparison and custom tools for new data analysis. The segments and related genes in SOM maps can be searched and used for motif and GO analysis, respectively. In addition, multi-species integration can be used to discover conserved features at the epigenomic level. In summary, our PCSD database integrated the identified chromatin states with epigenetic features and may be beneficial for communities to discover causal functions hidden in plant chromatin.",PCSD,0.967814763,Plant Chromatin State Database,0.940431786,PCSD,0.967814763,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +25551368,http://bioinfo.hrbmu.edu.cn/pd_ngsatlas,"PD_NGSAtlas: a reference database combining next-generation sequencing epigenomic and transcriptomic data for psychiatric disorders. Background Psychiatric disorders such as schizophrenia (SZ) and bipolar disorder (BP) are projected to lead the global disease burden within the next decade. Several lines of evidence suggest that epigenetic- or genetic-mediated dysfunction is frequently present in these disorders. To date, the inheritance patterns have been complicated by the problem of integrating epigenomic and transcriptomic factors that have yet to be elucidated. Therefore, there is a need to build a comprehensive database for storing epigenomic and transcriptomic data relating to psychiatric disorders. Description We have developed the PD_NGSAtlas, which focuses on the efficient storage of epigenomic and transcriptomic data based on next-generation sequencing and on the quantitative analyses of epigenetic and transcriptional alterations involved in psychiatric disorders. The current release of the PD_NGSAtlas contains 43 DNA methylation profiles and 37 transcription profiles detected by MeDIP-Seq and RNA-Seq, respectively, in two distinct brain regions and peripheral blood of SZ, BP and non-psychiatric controls. In addition to these data that were generated in-house, we have included, and will continue to include, published DNA methylation and gene expression data from other research groups, with a focus on psychiatric disorders. A flexible query engine has been developed for the acquisition of methylation profiles and transcription profiles for special genes or genomic regions of interest of the selected samples. Furthermore, the PD_NGSAtlas offers online tools for identifying aberrantly methylated and expressed events involved in psychiatric disorders. A genome browser has been developed to provide integrative and detailed views of multidimensional data in a given genomic context, which can help researchers understand molecular mechanisms from epigenetic and transcriptional perspectives. Moreover, users can download the methylation and transcription data for further analyses. Conclusions The PD_NGSAtlas aims to provide storage of epigenomic and transcriptomic data as well as quantitative analyses of epigenetic and transcriptional alterations involved in psychiatric disorders. The PD_NGSAtlas will be a valuable data resource and will enable researchers to investigate the pathophysiology and aetiology of disease in detail. The database is available at http://bioinfo.hrbmu.edu.cn/pd_ngsatlas/.",PD_NGSAtlas,0.995458275,NA,0,PD_NGSAtlas,0.995458275,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/31/2014 +29864163,http://quatstruct.rcsb.org,"Investigation of protein quaternary structure via stoichiometry and symmetry information. The Protein Data Bank (PDB) is the single worldwide archive of experimentally-determined three-dimensional (3D) structures of proteins and nucleic acids. As of January 2017, the PDB housed more than 125,000 structures and was growing by more than 11,000 structures annually. Since the 3D structure of a protein is vital to understand the mechanisms of biological processes, diseases, and drug design, correct oligomeric assembly information is of critical importance. Unfortunately, the biologically relevant oligomeric form of a 3D structure is not directly obtainable by X-ray crystallography, whilst in solution methods (NMR or single particle EM) it is known from the experiment. Instead, this information may be provided by the PDB Depositor as metadata coming from additional experiments, be inferred by sequence-sequence comparisons with similar proteins of known oligomeric state, or predicted using software, such as PISA (Proteins, Interfaces, Structures and Assemblies) or EPPIC (Evolutionary Protein Protein Interface Classifier). Despite significant efforts by professional PDB Biocurators during data deposition, there remain a number of structures in the archive with incorrect quaternary structure descriptions (or annotations). Further investigation is, therefore, needed to evaluate the correctness of quaternary structure annotations. In this study, we aim to identify the most probable oligomeric states for proteins represented in the PDB. Our approach evaluated the performance of four independent prediction methods, including text mining of primary publications, inference from homologous protein structures, and two computational methods (PISA and EPPIC). Aggregating predictions to give consensus results outperformed all four of the independent prediction methods, yielding 83% correct, 9% wrong, and 8% inconclusive predictions, when tested with a well-curated benchmark dataset. We have developed a freely-available web-based tool to make this approach accessible to researchers and PDB Biocurators (http://quatstruct.rcsb.org/).",PDB,0.9718527,Data Bank,0.765477806,PDB,0.9718527,1,NA,"28296894.0, 32558264.0, 34303324.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: CLASS,NA,NA,6/4/2018 +34303324,http://pitgroup.org/amyloid,"On the border of the amyloidogenic sequences: prefix analysis of the parallel beta sheets in the PDB_Amyloid collection. The Protein Data Bank (PDB) today contains more than 174,000 entries with the 3-dimensional structures of biological macromolecules. Using the rich resources of this repository, it is possible identifying subsets with specific, interesting properties for different applications. Our research group prepared an automatically updated list of amyloid- and probably amyloidogenic molecules, the PDB_Amyloid collection, which is freely available at the address http://pitgroup.org/amyloid. This resource applies exclusively the geometric properties of the steric structures for identifying amyloids. In the present contribution, we analyze the starting (i.e., prefix) subsequences of the characteristic, parallel beta-sheets of the structures in the PDB_Amyloid collection, and identify further appearances of these length-5 prefix subsequences in the whole PDB data set. We have identified this way numerous proteins, whose normal or irregular functions involve amyloid formation, structural misfolding, or anti-coagulant properties, simply by containing these prefixes: including the T-cell receptor (TCR), bound with the major histocompatibility complexes MHC-1 and MHC-2; the p53 tumor suppressor protein; a mycobacterial RNA polymerase transcription initialization complex; the human bridging integrator protein BIN-1; and the tick anti-coagulant peptide TAP.",PDB,0.981093069,Data Bank,0.642776877,PDB,0.981093069,1,NA,"28296894.0, 29864163.0, 32558264.0",NA,NA,NA,do not merge,NA,NA,noting predicted name incorrect,7/26/2021 +28296894,http://wwpdb.org,"Impact of genetic variation on three dimensional structure and function of proteins. The Protein Data Bank (PDB; http://wwpdb.org) was established in 1971 as the first open access digital data resource in biology with seven protein structures as its initial holdings. The global PDB archive now contains more than 126,000 experimentally determined atomic level three-dimensional (3D) structures of biological macromolecules (proteins, DNA, RNA), all of which are freely accessible via the Internet. Knowledge of the 3D structure of the gene product can help in understanding its function and role in disease. Of particular interest in the PDB archive are proteins for which 3D structures of genetic variant proteins have been determined, thus revealing atomic-level structural differences caused by the variation at the DNA level. Herein, we present a systematic and qualitative analysis of such cases. We observe a wide range of structural and functional changes caused by single amino acid differences, including changes in enzyme activity, aggregation propensity, structural stability, binding, and dissociation, some in the context of large assemblies. Structural comparison of wild type and mutated proteins, when both are available, provide insights into atomic-level structural differences caused by the genetic variation.",PDB,0.993884663,Protein Data Bank,0.784306149,PDB,0.993884663,1,"25540181.0, 27450113.0","29864163.0, 32558264.0, 34303324.0",NA,NA,merge on record with best name prob,merge only:,NA,NA,"28296894, 32558264",3/15/2017 +32558264,http://pdb101.rcsb.org,"Insights from 20 years of the Molecule of the Month. For 20 years, Molecule of the Month articles have highlighted the functional stories of 3D structures found in the Protein Data Bank (PDB). The PDB is the primary archive of atomic structures of biological molecules, currently providing open access to more than 150,000 structures studied by researchers around the world. The wealth of knowledge embodied in this resource is remarkable, with structures that allow exploration of nearly any biomolecular topic, including the basic science of genetic mechanisms, mechanisms of photosynthesis and bioenergetics, and central biomedical topics like cancer therapy and the fight against infectious disease. The central motivation behind the Molecule of the Month is to provide a user-friendly introduction to this rich body of data, charting a path for users to get started with finding and exploring the many available structures. The Molecule of the Month and related materials are updated regularly at the education portal PDB-101 (http://pdb101.rcsb.org/), offering an ongoing resource for molecular biology educators and students around the world.",PDB,0.919216275,NA,0,PDB,0.919216275,1,NA,"28296894.0, 29864163.0, 34303324.0",low_prob_best_name,do not remove,NA,merge only:,NA,NA,"28296894, 32558264",6/17/2020 +32404014,http://bioinfo.bdu.ac.in/pb3,"PDB-2-PBv3.0: An updated protein block database. Our protein block (PB) sequence database PDB-2-PBv1.0 provides PB sequences and dihedral angles for 74,297 protein structures comprising of 103,252 protein chains of Protein Data Bank (PDB) as on 2011. Since there are a lot of practical applications of PB and also as the size of PDB database increases, it becomes necessary to provide the PB sequences for all PDB protein structures. The current updated PDB-2-PBv3.0 contains PB sequences for 147,602 PDB structures comprising of 400,355 protein chains as on October 2019. When compared to our previous version PDB-2-PBv1.0, the current PDB-2-PBv3.0 contains 2- and 4-fold increase in the number of protein structures and chains, respectively. Notably, it provides PB information for any protein chain, regardless of the missing atom records of protein structure data in PDB. It includes protein interaction information with DNA and RNA along with their corresponding functional classes from Nucleic Acid Database (NDB) and PDB. Now, the updated version allows the user to download multiple PB records by parameter search and/or by a given list. This database is freely accessible at http://bioinfo.bdu.ac.in/pb3.",PDB-2-PBv3.0,0.936054283,NA,0,PDB-2-PBv3.0,0.936054283,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/1/2020 +25301850,http://www.pdbbind-cn.org,"PDB-wide collection of binding data: current status of the PDBbind database. Motivation Molecular recognition between biological macromolecules and organic small molecules plays an important role in various life processes. Both structural information and binding data of biomolecular complexes are indispensable for depicting the underlying mechanism in such an event. The PDBbind database was created to collect experimentally measured binding data for the biomolecular complexes throughout the Protein Data Bank (PDB). It thus provides the linkage between structural information and energetic properties of biomolecular complexes, which is especially desirable for computational studies or statistical analyses. Results Since its first public release in 2004, the PDBbind database has been updated on an annual basis. The latest release (version 2013) provides experimental binding affinity data for 10,776 biomolecular complexes in PDB, including 8302 protein-ligand complexes and 2474 other types of complexes. In this article, we will describe the current methods used for compiling PDBbind and the updated status of this database. We will also review some typical applications of PDBbind published in the scientific literature. Availability and implementation All contents of this database are freely accessible at the PDBbind-CN Web server at http://www.pdbbind-cn.org/. Contact wangrx@mail.sioc.ac.cn. Supplementary information Supplementary data are available at Bioinformatics online.",PDBbind,0.995774388,NA,0,PDBbind,0.995774388,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/9/2014 +26476444,http://pdbe.org,"PDBe: improved accessibility of macromolecular structure data from PDB and EMDB. The Protein Data Bank in Europe (http://pdbe.org) accepts and annotates depositions of macromolecular structure data in the PDB and EMDB archives and enriches, integrates and disseminates structural information in a variety of ways. The PDBe website has been redesigned based on an analysis of user requirements, and now offers intuitive access to improved and value-added macromolecular structure information. Unique value-added information includes lists of reviews and research articles that cite or mention PDB entries as well as access to figures and legends from full-text open-access publications that describe PDB entries. A powerful new query system not only shows all the PDB entries that match a given query, but also shows the 'best structures' for a given macromolecule, ligand complex or sequence family using data-quality information from the wwPDB validation reports. A PDBe RESTful API has been developed to provide unified access to macromolecular structure data available in the PDB and EMDB archives as well as value-added annotations, e.g. regarding structure quality and up-to-date cross-reference information from the SIFTS resource. Taken together, these new developments facilitate unified access to macromolecular structure data in an intuitive way for non-expert users and support expert users in analysing macromolecular structure data.",PDBe,0.99717021,NA,0,PDBe,0.99717021,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/17/2015 +31584092,http://pdbe-kb.org,"PDBe-KB: a community-driven resource for structural and functional annotations. The Protein Data Bank in Europe-Knowledge Base (PDBe-KB, https://pdbe-kb.org) is a community-driven, collaborative resource for literature-derived, manually curated and computationally predicted structural and functional annotations of macromolecular structure data, contained in the Protein Data Bank (PDB). The goal of PDBe-KB is two-fold: (i) to increase the visibility and reduce the fragmentation of annotations contributed by specialist data resources, and to make these data more findable, accessible, interoperable and reusable (FAIR) and (ii) to place macromolecular structure data in their biological context, thus facilitating their use by the broader scientific community in fundamental and applied research. Here, we describe the guidelines of this collaborative effort, the current status of contributed data, and the PDBe-KB infrastructure, which includes the data exchange format, the deposition system for added value annotations, the distributable database containing the assembled data, and programmatic access endpoints. We also describe a series of novel web-pages-the PDBe-KB aggregated views of structure data-which combine information on macromolecular structures from many PDB entries. We have recently released the first set of pages in this series, which provide an overview of available structural and functional information for a protein of interest, referenced by a UniProtKB accession.",PDBe-KB,0.995546019,Protein Data Bank in Europe-Knowledge Base,0.907250391,PDBe-KB,0.995546019,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +26615193,http://pdbflex.org,"PDBFlex: exploring flexibility in protein structures. The PDBFlex database, available freely and with no login requirements at http://pdbflex.org, provides information on flexibility of protein structures as revealed by the analysis of variations between depositions of different structural models of the same protein in the Protein Data Bank (PDB). PDBFlex collects information on all instances of such depositions, identifying them by a 95% sequence identity threshold, performs analysis of their structural differences and clusters them according to their structural similarities for easy analysis. The PDBFlex contains tools and viewers enabling in-depth examination of structural variability including: 2D-scaling visualization of RMSD distances between structures of the same protein, graphs of average local RMSD in the aligned structures of protein chains, graphical presentation of differences in secondary structure and observed structural disorder (unresolved residues), difference distance maps between all sets of coordinates and 3D views of individual structures and simulated transitions between different conformations, the latter displayed using JSMol visualization software.",PDBFlex,0.998265803,NA,0,PDBFlex,0.998265803,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2015 +"21976737, 27789697",http://pdbj.org,"Protein Data Bank Japan (PDBj): maintaining a structural data archive and resource description framework format. The Protein Data Bank Japan (PDBj, http://pdbj.org) is a member of the worldwide Protein Data Bank (wwPDB) and accepts and processes the deposited data of experimentally determined macromolecular structures. While maintaining the archive in collaboration with other wwPDB partners, PDBj also provides a wide range of services and tools for analyzing structures and functions of proteins, which are summarized in this article. To enhance the interoperability of the PDB data, we have recently developed PDB/RDF, PDB data in the Resource Description Framework (RDF) format, along with its ontology in the Web Ontology Language (OWL) based on the PDB mmCIF Exchange Dictionary. Being in the standard format for the Semantic Web, the PDB/RDF data provide a means to integrate the PDB with other biological information resources.",PDBj,0.996294558,Protein Data Bank Japan,0.880288279,PDBj,0.996294558,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/26/2016 +23203988,http://pdbtm.enzim.hu,"PDBTM: Protein Data Bank of transmembrane proteins after 8 years. The PDBTM database (available at http://pdbtm.enzim.hu), the first comprehensive and up-to-date transmembrane protein selection of the Protein Data Bank, was launched in 2004. The database was created and has been continuously updated by the TMDET algorithm that is able to distinguish between transmembrane and non-transmembrane proteins using their 3D atomic coordinates only. The TMDET algorithm can locate the spatial positions of transmembrane proteins in lipid bilayer as well. During the last 8 years not only the size of the PDBTM database has been steadily growing from ∼400 to 1700 entries but also new structural elements have been identified, in addition to the well-known α-helical bundle and β-barrel structures. Numerous 'exotic' transmembrane protein structures have been solved since the first release, which has made it necessary to define these new structural elements, such as membrane loops or interfacial helices in the database. This article reports the new features of the PDBTM database that have been added since its first release, and our current efforts to keep the database up-to-date and easy to use so that it may continue to serve as a fundamental resource for the scientific community.",PDBTM,0.995859504,Protein Data Bank of transmembrane proteins,0.802712626,PDBTM,0.995859504,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/30/2012 +26504143,http://biomine.ece.ualberta.ca/PDID,"PDID: database of molecular-level putative protein-drug interactions in the structural human proteome. Motivation Many drugs interact with numerous proteins besides their intended therapeutic targets and a substantial portion of these interactions is yet to be elucidated. Protein-Drug Interaction Database (PDID) addresses incompleteness of these data by providing access to putative protein-drug interactions that cover the entire structural human proteome. Results PDID covers 9652 structures from 3746 proteins and houses 16 800 putative interactions generated from close to 1.1 million accurate, all-atom structure-based predictions for several dozens of popular drugs. The predictions were generated with three modern methods: ILbind, SMAP and eFindSite. They are accompanied by propensity scores that quantify likelihood of interactions and coordinates of the putative location of the binding drugs in the corresponding protein structures. PDID complements the current databases that focus on the curated interactions and the BioDrugScreen database that relies on docking to find putative interactions. Moreover, we also include experimentally curated interactions which are linked to their sources: DrugBank, BindingDB and Protein Data Bank. Our database can be used to facilitate studies related to polypharmacology of drugs including repurposing and explaining side effects of drugs. Availability and implementation PDID database is freely available at http://biomine.ece.ualberta.ca/PDID/.",PDID,0.99190706,Protein-Drug Interaction Database,0.975201716,PDID,0.99190706,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/26/2015 +32103267,http://drosophila.biomedtzc.cn,"Predicted Drosophila Interactome Resource and web tool for functional interpretation of differentially expressed genes. . Drosophila melanogaster is a well-established model organism that is widely used in genetic studies. This species enjoys the availability of a wide range of research tools, well-annotated reference databases and highly similar gene circuitry to other insects. To facilitate molecular mechanism studies in Drosophila, we present the Predicted Drosophila Interactome Resource (PDIR), a database of high-quality predicted functional gene interactions. These interactions were inferred from evidence in 10 public databases providing information for functional gene interactions from diverse perspectives. The current version of PDIR includes 102 835 putative functional associations with balanced sensitivity and specificity, which are expected to cover 22.56% of all Drosophila protein interactions. This set of functional interactions is a good reference for hypothesis formulation in molecular mechanism studies. At the same time, these interactions also serve as a high-quality reference interactome for gene set linkage analysis (GSLA), which is a web tool for the interpretation of the potential functional impacts of a set of changed genes observed in transcriptomics analyses. In a case study, we show that the PDIR/GSLA system was able to produce a more comprehensive and concise interpretation of the collective functional impact of multiple simultaneously changed genes compared with the widely used gene set annotation tools, including PANTHER and David. PDIR and its associated GSLA service can be accessed at http://drosophila.biomedtzc.cn.",PDIR,0.982636213,Predicted Drosophila Interactome Resource,0.975733578,PDIR,0.982636213,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +33304468,http://ageing.shinyapps.io/pdmethdb,"PDmethDB: A curated Parkinson's disease associated methylation information database. Parkinson's disease (PD) is the second most common neurodegenerative disease, of which the histopathological hallmark is the formation of Lewy bodies consisting of α-synuclein as the major component. α-Synuclein can sequester DNA Methyltransferase 1 (DNMT1), the maintenance DNA methylation enzyme, from the nucleus and into the cytoplasm, leading to global DNA hypomethylation in human brain. As DNA methylation is a major epigenetic modification that regulates gene expression and there is no specific database storing PD associated methylation information, PDmethDB (Parkinson's Disease Methylation Database) aims to curate PD associated methylation information from literature to facilitate the study of the relationship between PD and methylation. Currently, PDmethDB contains 97,077 PD methylation associated entries among 12,308 molecules, 37,944 CpG sites, 31 tissues and 3 species through a review of about 1600 published papers. This includes information concerning the gene/molecule name, CpG site, methylation alteration, expression alteration, tissue, PMID, experimental method, and a brief description about the entry. PDmethDB provides a user-friendly interface to search, browse, download and submit data. PDmethDB supports browsing by molecule, species, tissue, gene region, methylation alteration and experimental methods. PDmethDB also shows the entry gene interaction network including protein-protein interactions and miRNA-targets interactions with a highlight of PD associated genes from DisGeNET database. PDmethDB aims to facilitate the understanding of the relationship between PD and methylation. Database URL: https://ageing.shinyapps.io/pdmethdb/.",PDmethDB,0.997210741,Parkinson's Disease Methylation Database,0.937413712,PDmethDB,0.997210741,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2020 +30115014,http://pdumbase.gdcb.iastate.edu,"PdumBase: a transcriptome database and research tool for Platynereis dumerilii and early development of other metazoans. Background The marine polychaete annelid Platynereis dumerilii has recently emerged as a prominent organism for the study of development, evolution, stem cells, regeneration, marine ecology, chronobiology and neurobiology within metazoans. Its phylogenetic position within the spiralian/ lophotrochozoan clade, the comparatively high conservation of ancestral features in the Platynereis genome, and experimental access to any stage within its life cycle, make Platynereis an important model for elucidating the complex regulatory and functional molecular mechanisms governing early development, later organogenesis, and various features of its larval and adult life. High resolution RNA-seq gene expression data obtained from specific developmental stages can be used to dissect early developmental mechanisms. However, the potential for discovery of these mechanisms relies on tools to search, retrieve, and compare genome-wide information within Platynereis, and across other metazoan taxa. Results To facilitate exploration and discovery by the broader scientific community, we have developed a web-based, searchable online research tool, PdumBase, featuring the first comprehensive transcriptome database for Platynereis dumerilii during early stages of development (2 h ~ 14 h). Our database also includes additional stages over the P. dumerilii life cycle and provides access to the expression data of 17,213 genes (31,806 transcripts) along with annotation information sourced from Swiss-Prot, Gene Ontology, KEGG pathways, Pfam domains, TmHMM, SingleP, and EggNOG orthology. Expression data for each gene includes the stage, the normalized FPKM, the raw read counts, and information that can be leveraged for statistical analyses of differential gene expression and the construction of genome-wide co-expression networks. In addition, PdumBase offers early stage transcriptome expression data from five further species as a valuable resource for investigators interested in comparing early development in different organisms. To understand conservation of Platynereis gene models and to validate gene annotation, most Platynereis gene models include a comprehensive phylogenetic analysis across 18 species representing diverse metazoan taxa. Conclusions PdumBase represents the first online resource for the early developmental transcriptome of Platynereis dumerilii. It serves as a research platform for discovery and exploration of gene expression during early stages, throughout the Platynereis life cycle, and enables comparison to other model organisms. PdumBase is freely available at http://pdumbase.gdcb.iastate.edu .",PdumBase,0.998220384,NA,0,PdumBase,0.998220384,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/16/2018 +29743053,http://www.picb.ac.cn/PDXliver,"PDXliver: a database of liver cancer patient derived xenograft mouse models. Background Liver cancer is the second leading cause of cancer-related deaths and characterized by heterogeneity and drug resistance. Patient-derived xenograft (PDX) models have been widely used in cancer research because they reproduce the characteristics of original tumors. However, the current studies of liver cancer PDX mice are scattered and the number of available PDX models are too small to represent the heterogeneity of liver cancer patients. To improve this situation and to complement available PDX models related resources, here we constructed a comprehensive database, PDXliver, to integrate and analyze liver cancer PDX models. Description Currently, PDXliver contains 116 PDX models from Chinese liver cancer patients, 51 of them were established by the in-house PDX platform and others were curated from the public literatures. These models are annotated with complete information, including clinical characteristics of patients, genome-wide expression profiles, germline variations, somatic mutations and copy number alterations. Analysis of expression subtypes and mutated genes show that PDXliver represents the diversity of human patients. Another feature of PDXliver is storing drug response data of PDX mice, which makes it possible to explore the association between molecular profiles and drug sensitivity. All data can be accessed via the Browse and Search pages. Additionally, two tools are provided to interactively visualize the omics data of selected PDXs or to compare two groups of PDXs. Conclusion As far as we known, PDXliver is the first public database of liver cancer PDX models. We hope that this comprehensive resource will accelerate the utility of PDX models and facilitate liver cancer research. The PDXliver database is freely available online at: http://www.picb.ac.cn/PDXliver/.",PDXliver,0.993864596,NA,0,PDXliver,0.993864596,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/9/2018 +29699484,http://www.actrec.gov.in:8080/pdzscape,"PDZscape: a comprehensive PDZ-protein database. PDZ-containing proteins comprise one of the most widely distributed protein families playing major role in localization and membrane receptor clustering. They are hence important regulators of signal transduction in cellular pathways. Although knowledge on these proteins has increased exponentially, the existing database 'PDZBase' is limited by presence of only 339 proteins as it dates back to 2004 when very little data was available. Thus, lack of exclusive information on this protein family led us to develop PDZscape. 'PDZscape' encompasses the complete available information on 58,648 PDZ-containing proteins with their known and putative binding partners on one platform. It has a user-friendly web interface that can be easily queried with external protein identifiers. With unique integration of prominent databases including NCBI, UniProtKB, Swiss-Prot, Pubmed, PDB, STRING, IntAct, KEGG, Pfam and Protein Mutant Database, it provides detailed information on PDZ interactome apart from the customized BLAST option. Most importantly, this database encompasses the mutations and diseases associated with PDZ containing proteins manually curated by our group, thus making it a comprehensive compilation. It also features tools to query the database using sequence (PDZ-Blast) and to find if protein of interest is a PDZ-binding protein. PDZscape is freely available at http://www.actrec.gov.in:8080/pdzscape .",PDZBase,0.993412837,NA,0,PDZBase,0.993412837,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/25/2018 +24174539,http://pedb.vib.be,"pE-DB: a database of structural ensembles of intrinsically disordered and of unfolded proteins. The goal of pE-DB (http://pedb.vib.be) is to serve as an openly accessible database for the deposition of structural ensembles of intrinsically disordered proteins (IDPs) and of denatured proteins based on nuclear magnetic resonance spectroscopy, small-angle X-ray scattering and other data measured in solution. Owing to the inherent flexibility of IDPs, solution techniques are particularly appropriate for characterizing their biophysical properties, and structural ensembles in agreement with these data provide a convenient tool for describing the underlying conformational sampling. Database entries consist of (i) primary experimental data with descriptions of the acquisition methods and algorithms used for the ensemble calculations, and (ii) the structural ensembles consistent with these data, provided as a set of models in a Protein Data Bank format. PE-DB is open for submissions from the community, and is intended as a forum for disseminating the structural ensembles and the methodologies used to generate them. While the need to represent the IDP structures is clear, methods for determining and evaluating the structural ensembles are still evolving. The availability of the pE-DB database is expected to promote the development of new modeling methods and leads to a better understanding of how function arises from disordered states.",pE-DB,0.995654404,NA,0,pE-DB,0.995654404,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/29/2013 +29216377,http://hpc-bioinformatics.cineca.it/peach,"PeachVar-DB: A Curated Collection of Genetic Variations for the Interactive Analysis of Peach Genome Data. Applying next-generation sequencing (NGS) technologies to species of agricultural interest has the potential to accelerate the understanding and exploration of genetic resources. The storage, availability and maintenance of huge quantities of NGS-generated data remains a major challenge. The PeachVar-DB portal, available at http://hpc-bioinformatics.cineca.it/peach, is an open-source catalog of genetic variants present in peach (Prunus persica L. Batsch) and wild-related species of Prunus genera, annotated from 146 samples publicly released on the Sequence Read Archive (SRA). We designed a user-friendly web-based interface of the database, providing search tools to retrieve single nucleotide polymorphism (SNP) and InDel variants, along with useful statistics and information. PeachVar-DB results are linked to the Genome Database for Rosaceae (GDR) and the Phytozome database to allow easy access to other external useful plant-oriented resources. In order to extend the genetic diversity covered by the PeachVar-DB further, and to allow increasingly powerful comparative analysis, we will progressively integrate newly released data.",PeachVar-DB,0.993295565,NA,0,PeachVar-DB,0.993295565,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22712730,http://bioinfolab.muohio.edu/txid3818v1,"PeanutDB: an integrated bioinformatics web portal for Arachis hypogaea transcriptomics. Background The peanut (Arachis hypogaea) is an important crop cultivated worldwide for oil production and food sources. Its complex genetic architecture (e.g., the large and tetraploid genome possibly due to unique cross of wild diploid relatives and subsequent chromosome duplication: 2n = 4x = 40, AABB, 2800 Mb) presents a major challenge for its genome sequencing and makes it a less-studied crop. Without a doubt, transcriptome sequencing is the most effective way to harness the genome structure and gene expression dynamics of this non-model species that has a limited genomic resource. Description With the development of next generation sequencing technologies such as 454 pyro-sequencing and Illumina sequencing by synthesis, the transcriptomics data of peanut is rapidly accumulated in both the public databases and private sectors. Integrating 187,636 Sanger reads (103,685,419 bases), 1,165,168 Roche 454 reads (333,862,593 bases) and 57,135,995 Illumina reads (4,073,740,115 bases), we generated the first release of our peanut transcriptome assembly that contains 32,619 contigs. We provided EC, KEGG and GO functional annotations to these contigs and detected SSRs, SNPs and other genetic polymorphisms for each contig. Based on both open-source and our in-house tools, PeanutDB presents many seamlessly integrated web interfaces that allow users to search, filter, navigate and visualize easily the whole transcript assembly, its annotations and detected polymorphisms and simple sequence repeats. For each contig, sequence alignment is presented in both bird's-eye view and nucleotide level resolution, with colorfully highlighted regions of mismatches, indels and repeats that facilitate close examination of assembly quality, genetic polymorphisms, sequence repeats and/or sequencing errors. Conclusion As a public genomic database that integrates peanut transcriptome data from different sources, PeanutDB (http://bioinfolab.muohio.edu/txid3818v1) provides the Peanut research community with an easy-to-use web portal that will definitely facilitate genomics research and molecular breeding in this less-studied crop.",PeanutDB,0.997910798,NA,0,PeanutDB,0.997910798,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/19/2012 +"24163255, 29059374",http://www.pancreasexpression.org,"The pancreatic expression database: recent extensions and updates. The Pancreatic Expression Database (PED, http://www.pancreasexpression.org) is the only device currently available for mining of pancreatic cancer literature data. It brings together the largest collection of multidimensional pancreatic data from the literature including genomic, proteomic, microRNA, methylomic and transcriptomic profiles. PED allows the user to ask specific questions on the observed levels of deregulation among a broad range of specimen/experimental types including healthy/patient tissue and body fluid specimens, cell lines and murine models as well as related treatments/drugs data. Here we provide an update to PED, which has been previously featured in the Database issue of this journal. Briefly, PED data content has been substantially increased and expanded to cover methylomics studies. We introduced an extensive controlled vocabulary that records specific details on the samples and added data from large-scale meta-analysis studies. The web interface has been improved/redesigned with a quick search option to rapidly extract information about a gene/protein of interest and an upload option allowing users to add their own data to PED. We added a user guide and implemented integrated graphical tools to overlay and visualize retrieved information. Interoperability with biomart-compatible data sets was significantly improved to allow integrative queries with pancreatic cancer data.",PED,0.995105187,Pancreatic Expression Database,0.927990806,PED,0.995105187,2,NA,"30364952.0, 33305318.0, 34252246.0",NA,NA,NA,do not merge,NA,NA,NA,1/1/2018 +30364952,http://bigd.big.ac.cn/ped,"Plant editosome database: a curated database of RNA editosome in plants. RNA editing plays an important role in plant development and growth, enlisting a number of editing factors in the editing process and accordingly revealing the diversity of plant editosomes for RNA editing. However, there is no resource available thus far that integrates editosome data for a variety of plants. Here, we present Plant Editosome Database (PED; http://bigd.big.ac.cn/ped), a curated database of RNA editosome in plants that is dedicated to the curation, integration and standardization of plant editosome data. Unlike extant relevant databases, PED incorporates high-quality editosome data manually curated from related publications and organelle genome annotations. In the current version, PED integrates a complete collection of 98 RNA editing factors and 20 836 RNA editing events, covering 203 organelle genes and 1621 associated species. In addition, it contains functional effects of editing factors in regulating plant phenotypes and includes detailed experimental evidence. Together, PED serves as an important resource to help researchers investigate the RNA editing process across a wide range of plants and thus would be of broad utility for the global plant research community.",PED,0.992514034,Plant Editosome Database,0.983514047,PED,0.992514034,1,NA,"33305318.0, 34252246.0, 24163255.0, 29059374.0",NA,NA,NA,do not merge,NA,NA,NA,1/1/2019 +"33305318, 34252246",http://proteinensemble.org,"PED in 2021: a major update of the protein ensemble database for intrinsically disordered proteins. The Protein Ensemble Database (PED) (https://proteinensemble.org), which holds structural ensembles of intrinsically disordered proteins (IDPs), has been significantly updated and upgraded since its last release in 2016. The new version, PED 4.0, has been completely redesigned and reimplemented with cutting-edge technology and now holds about six times more data (162 versus 24 entries and 242 versus 60 structural ensembles) and a broader representation of state of the art ensemble generation methods than the previous version. The database has a completely renewed graphical interface with an interactive feature viewer for region-based annotations, and provides a series of descriptors of the qualitative and quantitative properties of the ensembles. High quality of the data is guaranteed by a new submission process, which combines both automatic and manual evaluation steps. A team of biocurators integrate structured metadata describing the ensemble generation methodology, experimental constraints and conditions. A new search engine allows the user to build advanced queries and search all entry fields including cross-references to IDP-related resources such as DisProt, MobiDB, BMRB and SASBDB. We expect that the renewed PED will be useful for researchers interested in the atomic-level understanding of IDP function, and promote the rational, structure-based design of IDP-targeting drugs.",PED,0.985365828,Protein Ensemble Database,0.779749259,PED,0.985365828,2,NA,"30364952.0, 24163255.0, 29059374.0",NA,NA,NA,do not merge,NA,NA,NA,7/1/2021 +29126123,http://www.unimd.org/pedam,"PedAM: a database for Pediatric Disease Annotation and Medicine. There is a significant number of children around the world suffering from the consequence of the misdiagnosis and ineffective treatment for various diseases. To facilitate the precision medicine in pediatrics, a database namely the Pediatric Disease Annotations & Medicines (PedAM) has been built to standardize and classify pediatric diseases. The PedAM integrates both biomedical resources and clinical data from Electronic Medical Records to support the development of computational tools, by which enables robust data analysis and integration. It also uses disease-manifestation (D-M) integrated from existing biomedical ontologies as prior knowledge to automatically recognize text-mined, D-M-specific syntactic patterns from 774 514 full-text articles and 8 848 796 abstracts in MEDLINE. Additionally, disease connections based on phenotypes or genes can be visualized on the web page of PedAM. Currently, the PedAM contains standardized 8528 pediatric disease terms (4542 unique disease concepts and 3986 synonyms) with eight annotation fields for each disease, including definition synonyms, gene, symptom, cross-reference (Xref), human phenotypes and its corresponding phenotypes in the mouse. The database PedAM is freely accessible at http://www.unimd.org/pedam/.",PedAM,0.973055482,Pediatric Disease Annotations & Medicines,0.695434553,PedAM,0.973055482,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +24857432,http://pediamecum.es,"[Pediamécum: one year of experience]. In 2011, the Spanish Association of Pediatrics decided to support the most ambitious project of its newly created Committee for Medicinal Products: Pediamécum. This is the first free on-line database with information on medicinal products for pediatric use in Spain. The web page http://pediamecum.es/ started on December 17 December 2012. One year later, Pediamécum includes 580 registered drugs. The website achieved more than one million page views by the end of 2013. Because of the first anniversary of Pediamécum, a survey was performed to request the feeling of users. Four hundred eighty-three responses were obtained. Ninety-five percent believed that it is easy to navigate through the web, and 74% said that their doubts about the use of medicines in children were always resolved. The overall rating of Pediamécum is 7.5/10. The aims of Pediamécum are being accomplished; which is reflected essentially due to it becoming a useful tool for all professionals who care for children in their daily clinical practice.",PediamÃ,0.916845679,NA,0,PediamÃ,0.916845679,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/22/2014 +26073932,http://pedican.bioinfo-minzhao.org,"Pedican: an online gene resource for pediatric cancers with literature evidence. Pediatric cancer (PC), that is cancer occurring in children, is the leading cause of death among children worldwide, with an incidence of 175,000 per year. Elucidating the genetic abnormalities and underlying cellular mechanisms may provide less toxic curative treatments. Therefore, it is important to understand the pathology of pediatric cancer at the genetic, genomic and epigenetic level. To unveil the cellular complexity of PC, we have developed a database of pediatric cancers (Pedican), the first literature-based pediatric gene data resource by comprehensive literature curation and data integration. In the current release, Pedican contains 735 human genes, 88 gene fusion and 24 chromosome abnormal events curated from 2245 PubMed abstracts. Pedican provides detailed annotations for each gene, such as Entrez gene information, involved pathways, protein-protein interactions, mutations, gene expression, methylation sites, TF regulation, and post-translational modification. Additionally Pedican has a user-friendly web interface, which allows sophisticated text query, sequence searches, and browsing by highlighted literature evidence and hundreds of cancer types. Overall, our curated pediatric cancer-related gene list maps the genomic and cellular landscape for various pediatric cancers, providing a valuable resource for further experiment design. The Pedican is available at http://pedican.bioinfo-minzhao.org/.",Pedican,0.98098731,of pediatric cancers,0.786508222,Pedican,0.98098731,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/15/2015 +26048622,http://www.pediatricmri.nih.gov,"The diffusion tensor imaging (DTI) component of the NIH MRI study of normal brain development (PedsDTI). The NIH MRI Study of normal brain development sought to characterize typical brain development in a population of infants, toddlers, children and adolescents/young adults, covering the socio-economic and ethnic diversity of the population of the United States. The study began in 1999 with data collection commencing in 2001 and concluding in 2007. The study was designed with the final goal of providing a controlled-access database; open to qualified researchers and clinicians, which could serve as a powerful tool for elucidating typical brain development and identifying deviations associated with brain-based disorders and diseases, and as a resource for developing computational methods and image processing tools. This paper focuses on the DTI component of the NIH MRI study of normal brain development. In this work, we describe the DTI data acquisition protocols, data processing steps, quality assessment procedures, and data included in the database, along with database access requirements. For more details, visit http://www.pediatricmri.nih.gov. This longitudinal DTI dataset includes raw and processed diffusion data from 498 low resolution (3 mm) DTI datasets from 274 unique subjects, and 193 high resolution (2.5 mm) DTI datasets from 152 unique subjects. Subjects range in age from 10 days (from date of birth) through 22 years. Additionally, a set of age-specific DTI templates are included. This forms one component of the larger NIH MRI study of normal brain development which also includes T1-, T2-, proton density-weighted, and proton magnetic resonance spectroscopy (MRS) imaging data, and demographic, clinical and behavioral data.",PedsDTI,0.868158847,NIH MRI study of normal brain development,0.851746321,PedsDTI,0.868158847,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/3/2015 +34504668,http://combio.snu.ac.kr/pen,"Protein-gene Expression Nexus: Comprehensive characterization of human cancer cell lines with proteogenomic analysis. Researchers have gained new therapeutic insights using multi-omics platform approaches to study DNA, RNA, and proteins of comprehensively characterized human cancer cell lines. To improve our understanding of the molecular features associated with oncogenic modulation in cancer, we proposed a proteogenomic database for human cancer cell lines, called Protein-gene Expression Nexus (PEN). We have expanded the characterization of cancer cell lines to include genetic, mRNA, and protein data of 145 cancer cell lines from various public studies. PEN contains proteomic and phosphoproteomic data on 4,129,728 peptides, 13,862 proteins, 7,138 phosphorylation site-associated genomic variations, 117 studies, and 12 cancer. We analyzed functional characterizations along with the integrated datasets, such as cis/trans association for copy number alteration (CNA), single amino acid variation for coding genes, post-translation modification site variation for Single Amino Acid Variation, and novel peptide expression for noncoding regions and fusion genes. PEN provides a user-friendly interface for searching, browsing, and downloading data and also supports the visualization of genome-wide association between CNA and expression, novel peptide landscape, mRNA-protein abundance, and functional annotation. Together, this dataset and PEN data portal provide a resource to accelerate cancer research using model cancer cell lines. PEN is freely accessible at http://combio.snu.ac.kr/pen.",PEN,0.994843125,Protein-gene Expression Nexus,0.95734026,PEN,0.994843125,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/17/2021 +22701464,"http://www.ebi.ac.uk/pride, http://gator.masc-proteomics.org","pep2pro: the high-throughput proteomics data processing, analysis, and visualization tool. The pep2pro database was built to support effective high-throughput proteome data analysis. Its database schema allows the coherent integration of search results from different database-dependent search algorithms and filtering of the data including control for unambiguous assignment of peptides to proteins. The capacity of the pep2pro database has been exploited in data analysis of various Arabidopsis proteome datasets. The diversity of the datasets and the associated scientific questions required thorough querying of the data. This was supported by the relational format structure of the data that links all information on the sample, spectrum, search database, and algorithm to peptide and protein identifications and their post-translational modifications. After publication of datasets they are made available on the pep2pro website at www.pep2pro.ethz.ch. Further, the pep2pro data analysis pipeline also handles data export do the PRIDE database (http://www.ebi.ac.uk/pride) and data retrieval by the MASCP Gator (http://gator.masc-proteomics.org/). The utility of pep2pro will continue to be used for analysis of additional datasets and as a data warehouse. The capacity of the pep2pro database for proteome data analysis has now also been made publicly available through the release of pep2pro4all, which consists of a database schema and a script that will populate the database with mass spectrometry data provided in mzIdentML format.",pep2pro,0.995340317,NA,0,pep2pro,0.995340317,1,"21063943.0, 23203882.0, 30395289.0",NA,NA,NA,do not merge,NA,NA,NA,NA,6/11/2012 +29455297,http://www.pep725.eu,"Pan European Phenological database (PEP725): a single point of access for European data. The Pan European Phenology (PEP) project is a European infrastructure to promote and facilitate phenological research, education, and environmental monitoring. The main objective is to maintain and develop a Pan European Phenological database (PEP725) with an open, unrestricted data access for science and education. PEP725 is the successor of the database developed through the COST action 725 ""Establishing a European phenological data platform for climatological applications"" working as a single access point for European-wide plant phenological data. So far, 32 European meteorological services and project partners from across Europe have joined and supplied data collected by volunteers from 1868 to the present for the PEP725 database. Most of the partners actively provide data on a regular basis. The database presently holds almost 12 million records, about 46 growing stages and 265 plant species (including cultivars), and can be accessed via http://www.pep725.eu/ . Users of the PEP725 database have studied a diversity of topics ranging from climate change impact, plant physiological question, phenological modeling, and remote sensing of vegetation to ecosystem productivity.",PEP725,0.994008164,Pan European Phenological database,0.893914139,PEP725,0.994008164,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/18/2018 +29982280,http://huanglab.phys.hust.edu.cn/pepbdb,"PepBDB: a comprehensive structural database of biological peptide-protein interactions. Summary A structural database of peptide-protein interactions is important for drug discovery targeting peptide-mediated interactions. Although some peptide databases, especially for special types of peptides, have been developed, a comprehensive database of cleaned peptide-protein complex structures is still not available. Such cleaned structures are valuable for docking and scoring studies in structure-based drug design. Here, we have developed PepBDB-a curated Peptide Binding DataBase of biological complex structures from the Protein Data Bank (PDB). PepBDB presents not only cleaned structures but also extensive information about biological peptide-protein interactions, and allows users to search the database with a variety of options and interactively visualize the search results. Availability and implementation PepBDB is available at http://huanglab.phys.hust.edu.cn/pepbdb/.",PepBDB,0.992508531,Peptide Binding DataBase,0.660811494,PepBDB,0.992508531,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +23696878,http://wukong.tongji.edu.cn/pepid,"PEpiD: a prostate epigenetic database in mammals. Epigenetic mechanisms play key roles in initiation and progression of prostate cancer by changing gene expression. The Prostate Epigenetic Database (PEpiD: http://wukong.tongji.edu.cn/pepid) archives the three extensively characterized epigenetic mechanisms DNA methylation, histone modification, and microRNA implicated in prostate cancer of human, mouse, and rat. PEpiD uses a distinct color scheme to present the three types of epigenetic data and provides a user-friendly interface for flexible query. The retrieved information includes Refseq ID, gene symbol, gene alias, genomic loci of epigenetic changes, tissue source, experimental method, and supportive references. The change of histone modification (hyper or hypo) and the corresponding gene expression change (up or down) are also indicated. A graphic view of DNA methylation with exon-intron structure and predicted CpG islands is provided as well. Moreover, the prostate-related ENCODE tracks (DNA methylation, histone modifications, chromatin remodelers), and other key transcription factors with reported roles in prostate are displayed in the browser as well. The reversibility of epigenetic aberrations has made them potential markers for diagnosis and prognosis, and targets for treatment of cancers. This curated information will improve our understanding of epigenetic mechanisms of gene regulation in prostate cancer, and serve as an important resource for epigenetic research in prostate cancer.",PEpiD,0.998079658,Prostate Epigenetic Database,0.979941408,PEpiD,0.998079658,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/16/2013 +27819351,http://crdd.osdd.net/raghava/peplife,"PEPlife: A Repository of the Half-life of Peptides. Short half-life is one of the key challenges in the field of therapeutic peptides. Various studies have reported enhancement in the stability of peptides using methods like chemical modifications, D-amino acid substitution, cyclization, replacement of labile aminos acids, etc. In order to study this scattered data, there is a pressing need for a repository dedicated to the half-life of peptides. To fill this lacuna, we have developed PEPlife (http://crdd.osdd.net/raghava/peplife), a manually curated resource of experimentally determined half-life of peptides. PEPlife contains 2229 entries covering 1193 unique peptides. Each entry provides detailed information of the peptide, like its name, sequence, half-life, modifications, the experimental assay for determining half-life, biological nature and activity of the peptide. We also maintain SMILES and structures of peptides. We have incorporated web-based modules to offer user-friendly data searching and browsing in the database. PEPlife integrates numerous tools to perform various types of analysis such as BLAST, Smith-Waterman algorithm, GGSEARCH, Jalview and MUSTANG. PEPlife would augment the understanding of different factors that affect the half-life of peptides like modifications, sequence, length, route of delivery of the peptide, etc. We anticipate that PEPlife will be useful for the researchers working in the area of peptide-based therapeutics.",PEPlife,0.99723953,NA,0,PEPlife,0.99723953,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/7/2016 +33647438,http://peptherdia.herokuapp.com,"PepTherDia: database and structural composition analysis of approved peptide therapeutics and diagnostics. As of 2020, there were >100 approved peptides with therapeutic or diagnostic applications. However, a complete database providing information on marketed peptides is not freely available, making the peptide chemists' job of designing future peptide drug candidates challenging. Unlike the rules for small-molecule drugs, there is no general set of guidelines for designing a successful peptide-based drug. In this review, together with our freely available database (PepTherDia, http://peptherdia.herokuapp.com), we provide insights into what a successful peptide therapeutic or diagnostic agent looks like and lay the foundation for establishing a set of rules to help future medicinal chemists to design peptide candidates with increased approval rates.",PepTherDia,0.833288401,NA,0,PepTherDia,0.833288401,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/26/2021 +24939129,http://www.peptideatlas.org,"Using PeptideAtlas, SRMAtlas, and PASSEL: Comprehensive Resources for Discovery and Targeted Proteomics. PeptideAtlas, SRMAtlas, and PASSEL are Web-accessible resources to support discovery and targeted proteomics research. PeptideAtlas is a multi-species compendium of shotgun proteomic data provided by the scientific community; SRMAtlas is a resource of high-quality, complete proteome SRM assays generated in a consistent manner for the targeted identification and quantification of proteins; and PASSEL is a repository that compiles and represents selected reaction monitoring data, all in an easy-to-use interface. The databases are generated from native mass spectrometry data files that are analyzed in a standardized manner including statistical validation of the results. Each resource offers search functionalities and can be queried by user-defined constraints; the query results are provided in tables or are graphically displayed. PeptideAtlas, SRMAtlas, and PASSEL are publicly available freely via the Web site http://www.peptideatlas.org. In this protocol, we describe the use of these resources, we highlight how to submit, search, collate and download data.",PeptideAtlas,0.972864509,NA,0,PeptideAtlas,0.972864509,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/17/2014 +24406170,http://peptisite.ucsd.edu,"PeptiSite: a structural database of peptide binding sites in 4D. We developed PeptiSite, a comprehensive and reliable database of biologically and structurally characterized peptide-binding sites, in which each site is represented by an ensemble of its complexes with protein, peptide and small molecule partners. The unique features of the database include: (1) the ensemble site representation that provides a fourth dimension to the otherwise three dimensional data, (2) comprehensive characterization of the binding site architecture that may consist of a multimeric protein assembly with cofactors and metal ions and (3) analysis of consensus interaction motifs within the ensembles and identification of conserved determinants of these interactions. Currently the database contains 585 proteins with 650 peptide-binding sites. http://peptisite.ucsd.edu/ link allows searching for the sites of interest and interactive visualization of the ensembles using the ActiveICM web-browser plugin. This structural database for protein-peptide interactions enables understanding of structural principles of these interactions and may assist the development of an efficient peptide docking benchmark.",PeptiSite,0.997222483,NA,0,PeptiSite,0.997222483,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/6/2014 +31629694,http://bioinformatics.biol.uoa.gr/db=permemdb,"PerMemDB: A database for eukaryotic peripheral membrane proteins. The majority of all proteins in cells interact with membranes either permanently or temporarily. Peripheral membrane proteins form transient complexes with membrane proteins and/or lipids, via non-covalent interactions and are of outmost importance, due to numerous cellular functions in which they participate. In an effort to collect data regarding this heterogeneous group of proteins we designed and constructed a database, called PerMemDB. PerMemDB is currently the most complete and comprehensive repository of data for eukaryotic peripheral membrane proteins deposited in UniProt or predicted with the use of MBPpred - a computational method that specializes in the detection of proteins that interact non-covalently with membrane lipids, via membrane binding domains. The first version of the database contains 231,770 peripheral membrane proteins from 1009 organisms. All entries have cross-references to other databases, literature references and annotation regarding their interactions with other proteins. Moreover, additional sequence annotation of the characteristic domains that allow these proteins to interact with membranes is available, due to the application of MBPpred. Through the web interface of PerMemDB, users can browse the contents of the database, submit advanced text searches and BLAST queries against the protein sequences deposited in PerMemDB. We expect this repository to serve as a source of information that will allow the scientific community to gain a deeper understanding of the evolution and function of peripheral membrane proteins via the enhancement of proteome-wide analyses. The database is available at: http://bioinformatics.biol.uoa.gr/db=permemdb.",PerMemDB,0.997093499,NA,0,PerMemDB,0.997093499,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/17/2019 +31259547,http://permm.phar.umich.edu,"PerMM: A Web Tool and Database for Analysis of Passive Membrane Permeability and Translocation Pathways of Bioactive Molecules. The PerMM web server and database were developed for quantitative analysis and visualization of passive translocation of bioactive molecules across lipid membranes. The server is the first physics-based web tool that calculates membrane binding energies and permeability coefficients of diverse molecules through artificial and natural membranes (phospholipid bilayers, PAMPA-DS, blood-brain barrier, and Caco-2/MDCK cell membranes). It also visualizes the transmembrane translocation pathway as a sequence of translational and rotational positions of a permeant as it moves across the lipid bilayer, along with the corresponding changes in solvation energy. The server can be applied for prediction of permeability coefficients of compounds with diverse chemical scaffolds to facilitate selection and optimization of potential drug leads. The complementary PerMM database allows comparison of computationally and experimentally determined permeability coefficients for more than 500 compounds in different membrane systems. The website and database are freely accessible at https://permm.phar.umich.edu/ .",PerMM,0.995338023,NA,0,PerMM,0.995338023,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2019 +23180785,http://peroxibase.toulouse.inra.fr,"PeroxiBase: a database for large-scale evolutionary analysis of peroxidases. The PeroxiBase (http://peroxibase.toulouse.inra.fr/) is a specialized database devoted to peroxidases' families, which are major actors of stress responses. In addition to the increasing number of sequences and the complete modification of the Web interface, new analysis tools and functionalities have been developed since the previous publication in the NAR database issue. Nucleotide sequences and graphical representation of the gene structure can now be included for entries containing genomic cross-references. An expert semi-automatic annotation strategy is being developed to generate new entries from genomic sequences and from EST libraries. Plus, new internal and automatic controls have been included to improve the quality of the entries. To compare gene structure organization among families' members, two new tools are available, CIWOG to detect common introns and GECA to visualize gene structure overlaid with sequence conservation. The multicriteria search tool was greatly improved to allow simple and combined queries. After such requests or a BLAST search, different analysis processes are suggested, such as multiple alignments with ClustalW or MAFFT, a platform for phylogenetic analysis and GECA's display in association with a phylogenetic tree. Finally, we updated our family specific profiles implemented in the PeroxiScan tool and made new profiles to consider new sub-families.",PeroxiBase,0.998095334,NA,0,PeroxiBase,0.998095334,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/24/2012 +33080028,http://dianalab.e-ce.uth.gr/peryton,"Peryton: a manual collection of experimentally supported microbe-disease associations. We present Peryton (https://dianalab.e-ce.uth.gr/peryton/), a database of experimentally supported microbe-disease associations. Its first version constitutes a novel resource hosting more than 7900 entries linking 43 diseases with 1396 microorganisms. Peryton's content is exclusively sustained by manual curation of biomedical articles. Diseases and microorganisms are provided in a systematic, standardized manner using reference resources to create database dictionaries. Information about the experimental design, study cohorts and the applied high- or low-throughput techniques is meticulously annotated and catered to users. Several functionalities are provided to enhance user experience and enable ingenious use of Peryton. One or more microorganisms and/or diseases can be queried at the same time. Advanced filtering options and direct text-based filtering of results enable refinement of returned information and the conducting of tailored queries suitable to different research questions. Peryton also provides interactive visualizations to effectively capture different aspects of its content and results can be directly downloaded for local storage and downstream analyses. Peryton will serve as a valuable source, enabling scientists of microbe-related disease fields to form novel hypotheses but, equally importantly, to assist in cross-validation of findings.",Peryton,0.997871995,NA,0,Peryton,0.997871995,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +23084601,http://bejerano.stanford.edu/pesnpdb,"PESNPdb: a comprehensive database of SNPs studied in association with pre-eclampsia. Pre-eclampsia is a pregnancy specific disorder that can be life threatening for mother and child. Multiple studies have been carried out in an attempt to identify SNPs that contribute to the genetic susceptibility of the disease. Here we describe PESNPdb (http://bejerano.stanford.edu/pesnpdb), a database aimed at centralizing SNP and study details investigated in association with pre-eclampsia. We also describe a Placenta Disorders ontology that utilizes information from PESNPdb. The main focus of PESNPdb is to help researchers study the genetic complexity of pre-eclampsia through a user-friendly interface that encourages community participation.",PESNPdb,0.998070478,NA,0,PESNPdb,0.998070478,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/18/2012 +27936097,http://petmbase.org,"PeTMbase: A Database of Plant Endogenous Target Mimics (eTMs). MicroRNAs (miRNA) are small endogenous RNA molecules, which regulate target gene expression at post-transcriptional level. Besides, miRNA activity can be controlled by a newly discovered regulatory mechanism called endogenous target mimicry (eTM). In target mimicry, eTMs bind to the corresponding miRNAs to block the binding of specific transcript leading to increase mRNA expression. Thus, miRNA-eTM-target-mRNA regulation modules involving a wide range of biological processes; an increasing need for a comprehensive eTM database arose. Except miRSponge with limited number of Arabidopsis eTM data no available database and/or repository was developed and released for plant eTMs yet. Here, we present an online plant eTM database, called PeTMbase (http://petmbase.org), with a highly efficient search tool. To establish the repository a number of identified eTMs was obtained utilizing from high-throughput RNA-sequencing data of 11 plant species. Each transcriptome libraries is first mapped to corresponding plant genome, then long non-coding RNA (lncRNA) transcripts are characterized. Furthermore, additional lncRNAs retrieved from GREENC and PNRD were incorporated into the lncRNA catalog. Then, utilizing the lncRNA and miRNA sources a total of 2,728 eTMs were successfully predicted. Our regularly updated database, PeTMbase, provides high quality information regarding miRNA:eTM modules and will aid functional genomics studies particularly, on miRNA regulatory networks.",PeTMbase,0.988303959,NA,0,PeTMbase,0.988303959,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/9/2016 +25198774,http://pfaldb.jnu.ac.in,"PfalDB: an integrated drug target and chemical database for Plasmodium falciparum. Plasmodium falciparum is one of the deadliest protozoan parasite species among those that cause malaria. Uncontrolled use of antimalarial drugs has resulted in evolutionary selection pressure favoring high levels of resistance to antimalarials; currently P.falciparum shows resistance to all classes of antimalarials. Therefore it is essential to identify novel drug targets, and design selective anti-malarials which can overcome resistance. While many drug targets are freely available in various public domain resources, a single comprehensive source of data containing easily searchable and retrievable information is currently lacking. To facilitate the total integration and mining of data emerging from different drug consortia and also to prioritize drug targets for structure-based drug design, an open-access, inclusive comprehensive database for Plasmodium falciparum was established. Meta data of known/modeled structures along with binding site parameters of drug targets have been included in the database. Additionally, chemical compounds showing a positive inhibitory assay against Plasmodium falciparum or known drug targets have also been provided. The database is accessible at http://pfaldb.jnu.ac.in. The database provides diverse information regarding the structure, sequence, stage specific gene expression, pathway, action mechanism, essentiality and druggability for each drug target, and literature to assess the validation status of individual drug targets. It also includes information on individual anti-malarials with their activity and bioassay.",PfalDB,0.975739539,NA,0,PfalDB,0.975739539,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2014 +24288371,"http://pfam.sanger.ac.uk/, http://pfam.janelia.org","Pfam: the protein families database. Pfam, available via servers in the UK (http://pfam.sanger.ac.uk/) and the USA (http://pfam.janelia.org/), is a widely used database of protein families, containing 14 831 manually curated entries in the current release, version 27.0. Since the last update article 2 years ago, we have generated 1182 new families and maintained sequence coverage of the UniProt Knowledgebase (UniProtKB) at nearly 80%, despite a 50% increase in the size of the underlying sequence database. Since our 2012 article describing Pfam, we have also undertaken a comprehensive review of the features that are provided by Pfam over and above the basic family data. For each feature, we determined the relevance, computational burden, usage statistics and the functionality of the feature in a website context. As a consequence of this review, we have removed some features, enhanced others and developed new ones to meet the changing demands of computational biology. Here, we describe the changes to Pfam content. Notably, we now provide family alignments based on four different representative proteome sequence data sets and a new interactive DNA search interface. We also discuss the mapping between Pfam and known 3D structures.",Pfam,0.990840673,the protein families database,0.763704188,Pfam,0.990840673,1,NA,"26673716.0, 30357350.0, 33125078.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/27/2013 +"26673716, 30357350, 33125078",http://pfam.xfam.org,"The Pfam protein families database: towards a more sustainable future. In the last two years the Pfam database (http://pfam.xfam.org) has undergone a substantial reorganisation to reduce the effort involved in making a release, thereby permitting more frequent releases. Arguably the most significant of these changes is that Pfam is now primarily based on the UniProtKB reference proteomes, with the counts of matched sequences and species reported on the website restricted to this smaller set. Building families on reference proteomes sequences brings greater stability, which decreases the amount of manual curation required to maintain them. It also reduces the number of sequences displayed on the website, whilst still providing access to many important model organisms. Matches to the full UniProtKB database are, however, still available and Pfam annotations for individual UniProtKB sequences can still be retrieved. Some Pfam entries (1.6%) which have no matches to reference proteomes remain; we are working with UniProt to see if sequences from them can be incorporated into reference proteomes. Pfam-B, the automatically-generated supplement to Pfam, has been removed. The current release (Pfam 29.0) includes 16 295 entries and 559 clans. The facility to view the relationship between families within a clan has been improved by the introduction of a new tool.",Pfam,0.862235963,The,0.562504411,Pfam,0.862235963,3,NA,24288371,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +30733462,http://lee.kias.re.kr,"PFDB: A standardized protein folding database with temperature correction. We constructed a standardized protein folding kinetics database (PFDB) in which the logarithmic rate constants of all listed proteins are calculated at the standard temperature (25 °C). A temperature correction based on the Eyring-Kramers equation was introduced for proteins whose folding kinetics were originally measured at temperatures other than 25 °C. We verified the temperature correction by comparing the logarithmic rate constants predicted and experimentally observed at 25 °C for 14 different proteins, and the results demonstrated improvement of the quality of the database. PFDB consists of 141 (89 two-state and 52 non-two-state) single-domain globular proteins, which has the largest number among the currently available databases of protein folding kinetics. PFDB is thus intended to be used as a standard for developing and testing future predictive and theoretical studies of protein folding. PFDB can be accessed from the following link: http://lee.kias.re.kr/~bala/PFDB .",PFDB,0.996777177,protein folding kinetics database,0.818020368,PFDB,0.996777177,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/7/2019 +25828689,http://pfr2.sb-roscoff.fr,"PFR²: a curated database of planktonic foraminifera 18S ribosomal DNA as a resource for studies of plankton ecology, biogeography and evolution. Planktonic foraminifera (Rhizaria) are ubiquitous marine pelagic protists producing calcareous shells with conspicuous morphology. They play an important role in the marine carbon cycle, and their exceptional fossil record serves as the basis for biochronostratigraphy and past climate reconstructions. A major worldwide sampling effort over the last two decades has resulted in the establishment of multiple large collections of cryopreserved individual planktonic foraminifera samples. Thousands of 18S rDNA partial sequences have been generated, representing all major known morphological taxa across their worldwide oceanic range. This comprehensive data coverage provides an opportunity to assess patterns of molecular ecology and evolution in a holistic way for an entire group of planktonic protists. We combined all available published and unpublished genetic data to build PFR(2), the Planktonic foraminifera Ribosomal Reference database. The first version of the database includes 3322 reference 18S rDNA sequences belonging to 32 of the 47 known morphospecies of extant planktonic foraminifera, collected from 460 oceanic stations. All sequences have been rigorously taxonomically curated using a six-rank annotation system fully resolved to the morphological species level and linked to a series of metadata. The PFR(2) website, available at http://pfr2.sb-roscoff.fr, allows downloading the entire database or specific sections, as well as the identification of new planktonic foraminiferal sequences. Its novel, fully documented curation process integrates advances in morphological and molecular taxonomy. It allows for an increase in its taxonomic resolution and assures that integrity is maintained by including a complete contingency tracking of annotations and assuring that the annotations remain internally consistent.",PFR(2,0.927200019,Planktonic foraminifera Ribosomal Reference,0.830445404,PFR(2,0.927200019,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,4/15/2015 +20672376,http://pfs.nus.edu.sg,"pfSNP: An integrated potentially functional SNP resource that facilitates hypotheses generation through knowledge syntheses. Currently, >14,000,000 single nucleotide polymorphisms (SNPs) are reported. Identifying phenotype-affecting SNPs among these many SNPs pose significant challenges. Although several Web resources are available that can inform about the functionality of SNPs, these resources are mainly annotation databases and are not very comprehensive. In this article, we present a comprehensive, well-annotated, integrated pfSNP (potentially functional SNPs) Web resource (http://pfs.nus.edu.sg/), which is aimed to facilitate better hypothesis generation through knowledge syntheses mediated by better data integration and a user-friendly Web interface. pfSNP integrates >40 different algorithms/resources to interrogate >14,000,000 SNPs from the dbSNP database for SNPs of potential functional significance based on previous published reports, inferred potential functionality from genetic approaches as well as predicted potential functionality from sequence motifs. Its query interface has the user-friendly ""auto-complete, prompt-as-you-type"" feature and is highly customizable, facilitating different combination of queries using Boolean-logic. Additionally, to facilitate better understanding of the results and aid in hypotheses generation, gene/pathway-level information with text clouds highlighting enriched tissues/pathways as well as detailed-related information are also provided on the results page. Hence, the pfSNP resource will be of great interest to scientists focusing on association studies as well as those interested to experimentally address the functionality of SNPs.",pfSNP,0.996151626,SNPs,0.563585818,pfSNP,0.996151626,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2011 +27824078,http://wgmlstdb.imst.nsysu.edu.tw,"PGAdb-builder: A web service tool for creating pan-genome allele database for molecular fine typing. With the advance of next generation sequencing techniques, whole genome sequencing (WGS) is expected to become the optimal method for molecular subtyping of bacterial isolates. To use WGS as a general subtyping method for disease outbreak investigation and surveillance, the layout of WGS-based typing must be comparable among laboratories. Whole genome multilocus sequence typing (wgMLST) is an approach that achieves this requirement. To apply wgMLST as a standard subtyping approach, a pan-genome allele database (PGAdb) for the population of a bacterial organism must first be established. We present a free web service tool, PGAdb-builder (http://wgmlstdb.imst.nsysu.edu.tw), for the construction of bacterial PGAdb. The effectiveness of PGAdb-builder was tested by constructing a pan-genome allele database for Salmonella enterica serovar Typhimurium, with the database being applied to create a wgMLST tree for a panel of epidemiologically well-characterized S. Typhimurium isolates. The performance of the wgMLST-based approach was as high as that of the SNP-based approach in Leekitcharoenphon's study used for discerning among epidemiologically related and non-related isolates.",PGAdb-builder,0.779213417,NA,0,PGAdb-builder,0.779213417,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/8/2016 +21765097,http://nwrce.org/pgat,"PGAT: a multistrain analysis resource for microbial genomes. Motivation The Prokaryotic-genome Analysis Tool (PGAT) is a web-based database application for comparing gene content and sequence across multiple microbial genomes facilitating the discovery of genetic differences that may explain observed phenotypes. PGAT supports database queries to identify genes that are present or absent in user-selected genomes, comparison of sequence polymorphisms in sets of orthologous genes, multigenome display of regions surrounding a query gene, comparison of the distribution of genes in metabolic pathways and manual community annotation. Availability and implementation The PGAT website may be accessed at http://nwrce.org/pgat. Contact mbrittna@uw.edu.",PGAT,0.996497869,Prokaryotic-genome Analysis Tool,0.875871877,PGAT,0.996497869,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/15/2011 +30245835,http://pineapple.angiosperms.org/pineapple/html/index.html,"PGD: Pineapple Genomics Database. Pineapple occupies an important phylogenetic position as its reference genome is a model for studying the evolution the Bromeliaceae family and the crassulacean acid metabolism (CAM) photosynthesis. Here, we developed a pineapple genomics database (PGD, http://pineapple.angiosperms.org/pineapple/html/index.html) as a central online platform for storing and integrating genomic, transcriptomic, function annotation and genetic marker data for pineapple (Ananas comosus (L.) Merr.). The PGD currently hosts significant search tools and available datasets for researchers to study comparative genomics, gene expression, gene co-expression molecular marker, and gene annotation of A. comosus (L). PGD also performed a series of additional pages for a genomic browser that visualizes genomic data interactively, bulk data download, a detailed user manual, and data integration information. PGD was developed with the capacity to integrate future data resources, and will be used as a long-term and open access database to facilitate the study of the biology, distribution, and the evolution of pineapple and the relative plant species. An email-based helpdesk is also available to offer support with the website and requests of specific datasets from the research community.",PGD,0.995591462,Pineapple Genomics Database,0.916814101,PGD,0.995591462,1,NA,27616775,NA,NA,NA,do not merge,NA,NA,NA,9/17/2018 +27616775,http://pangolin-genome.um.edu.my,"PGD: a pangolin genome hub for the research community. . Pangolins (order Pholidota) are the only mammals covered by scales. We have recently sequenced and analyzed the genomes of two critically endangered Asian pangolin species, namely the Malayan pangolin (Manis javanica) and the Chinese pangolin (Manis pentadactyla). These complete genome sequences will serve as reference sequences for future research to address issues of species conservation and to advance knowledge in mammalian biology and evolution. To further facilitate the global research effort in pangolin biology, we developed the Pangolin Genome Database (PGD), as a future hub for hosting pangolin genomic and transcriptomic data and annotations, and with useful analysis tools for the research community. Currently, the PGD provides the reference pangolin genome and transcriptome data, gene sequences and functional information, expressed transcripts, pseudogenes, genomic variations, organ-specific expression data and other useful annotations. We anticipate that the PGD will be an invaluable platform for researchers who are interested in pangolin and mammalian research. We will continue updating this hub by including more data, annotation and analysis tools particularly from our research consortium.Database URL: http://pangolin-genome.um.edu.my.",PGD,0.98055391,Pangolin Genome Database,0.940320601,PGD,0.98055391,1,NA,30245835,NA,NA,NA,do not merge,NA,NA,NA,9/11/2016 +27987164,http://pgdbj,"Plant Genome DataBase Japan (PGDBj). A portal website that integrates a variety of information related to genomes of model and crop plants from databases (DBs) and the literature was generated. This website, named the Plant Genome DataBase Japan (PGDBj, http://pgdbj. jp/en/ ), is comprised of three component DBs and a cross-search engine which provides a seamless search over their contents. One of the three component DBs is the Ortholog DB, which provides gene cluster information based on the amino acid sequence similarity. Over 1,000,000 amino acid sequences of 40 Viridiplantae species were collected from the public DNA DBs, and plant genome DBs such as TAIR and RAP-DB were subjected to reciprocal BLAST searches for clustering. Another component DB is the Plant Resource DB for genomic- and bio-resources. This DB also integrates the SABRE DB, which provides cDNA and genome sequence resources maintained in the RIKEN BioResource Center and National BioResource Projects Japan. The third component DB of PGDBj is the DNA Marker DB, which manually or automatically collects curated information on DNA markers, quantitative trait loci (QTL), and related genetic linkage maps, from the literature and external DBs. By combining these component DBs and a cross-search engine, PGDBj serves as a useful platform to study genetic systems for both fundamental and applied researches for a wide range of plant species.",PGDBj,0.997523487,Plant Genome DataBase Japan,0.973046175,PGDBj,0.997523487,1,NA,24363285,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2017 +24363285,http://pgdbj.jp/?ln=en,"Plant Genome DataBase Japan (PGDBj): a portal website for the integration of plant genome-related databases. The Plant Genome DataBase Japan (PGDBj, http://pgdbj.jp/?ln=en) is a portal website that aims to integrate plant genome-related information from databases (DBs) and the literature. The PGDBj is comprised of three component DBs and a cross-search engine, which provides a seamless search over the contents of the DBs. The three DBs are as follows. (i) The Ortholog DB, providing gene cluster information based on the amino acid sequence similarity. Over 500,000 amino acid sequences of 20 Viridiplantae species were subjected to reciprocal BLAST searches and clustered. Sequences from plant genome DBs (e.g. TAIR10 and RAP-DB) were also included in the cluster with a direct link to the original DB. (ii) The Plant Resource DB, integrating the SABRE DB, which provides cDNA and genome sequence resources accumulated and maintained in the RIKEN BioResource Center and National BioResource Projects. (iii) The DNA Marker DB, providing manually or automatically curated information of DNA markers, quantitative trait loci and related linkage maps, from the literature and external DBs. As the PGDBj targets various plant species, including model plants, algae, and crops important as food, fodder and biofuel, researchers in the field of basic biology as well as a wide range of agronomic fields are encouraged to perform searches using DNA sequences, gene names, traits and phenotypes of interest. The PGDBj will return the search results from the component DBs and various types of linked external DBs.",PGDBj,0.985787213,Plant Genome DataBase Japan,0.963867758,PGDBj,0.985787213,1,NA,27987164,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,12/19/2013 +"23180799, 27987177",http://chibba.agtec.uga.edu/duplication,"PGDD: a database of gene and genome duplication in plants. Genome duplication (GD) has permanently shaped the architecture and function of many higher eukaryotic genomes. The angiosperms (flowering plants) are outstanding models in which to elucidate consequences of GD for higher eukaryotes, owing to their propensity for chromosomal duplication or even triplication in a few cases. Duplicated genome structures often require both intra- and inter-genome alignments to unravel their evolutionary history, also providing the means to deduce both obvious and otherwise-cryptic orthology, paralogy and other relationships among genes. The burgeoning sets of angiosperm genome sequences provide the foundation for a host of investigations into the functional and evolutionary consequences of gene and GD. To provide genome alignments from a single resource based on uniform standards that have been validated by empirical studies, we built the Plant Genome Duplication Database (PGDD; freely available at http://chibba.agtec.uga.edu/duplication/), a web service providing synteny information in terms of colinearity between chromosomes. At present, PGDD contains data for 26 plants including bryophytes and chlorophyta, as well as angiosperms with draft genome sequences. In addition to the inclusion of new genomes as they become available, we are preparing new functions to enhance PGDD.",PGDD,0.996008992,Plant Genome Duplication Database,0.988166434,PGDD,0.996008992,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +31584086,"http://www.pgghan.org, http://www.hanchinesegenomes.org","PGG.Han: the Han Chinese genome database and analysis platform. As the largest ethnic group in the world, the Han Chinese population is nonetheless underrepresented in global efforts to catalogue the genomic variability of natural populations. Here, we developed the PGG.Han, a population genome database to serve as the central repository for the genomic data of the Han Chinese Genome Initiative (Phase I). In its current version, the PGG.Han archives whole-genome sequences or high-density genome-wide single-nucleotide variants (SNVs) of 114 783 Han Chinese individuals (a.k.a. the Han100K), representing geographical sub-populations covering 33 of the 34 administrative divisions of China, as well as Singapore. The PGG.Han provides: (i) an interactive interface for visualization of the fine-scale genetic structure of the Han Chinese population; (ii) genome-wide allele frequencies of hierarchical sub-populations; (iii) ancestry inference for individual samples and controlling population stratification based on nested ancestry informative markers (AIMs) panels; (iv) population-structure-aware shared control data for genotype-phenotype association studies (e.g. GWASs) and (v) a Han-Chinese-specific reference panel for genotype imputation. Computational tools are implemented into the PGG.Han, and an online user-friendly interface is provided for data analysis and results visualization. The PGG.Han database is freely accessible via http://www.pgghan.org or https://www.hanchinesegenomes.org.",PGG.Han,0.980565399,NA,0,PGG.Han,0.980565399,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +29112749,http://www.pggpopulation.org,"PGG.Population: a database for understanding the genomic diversity and genetic ancestry of human populations. There are a growing number of studies focusing on delineating genetic variations that are associated with complex human traits and diseases due to recent advances in next-generation sequencing technologies. However, identifying and prioritizing disease-associated causal variants relies on understanding the distribution of genetic variations within and among populations. The PGG.Population database documents 7122 genomes representing 356 global populations from 107 countries and provides essential information for researchers to understand human genomic diversity and genetic ancestry. These data and information can facilitate the design of research studies and the interpretation of results of both evolutionary and medical studies involving human populations. The database is carefully maintained and constantly updated when new data are available. We included miscellaneous functions and a user-friendly graphical interface for visualization of genomic diversity, population relationships (genetic affinity), ancestral makeup, footprints of natural selection, and population history etc. Moreover, PGG.Population provides a useful feature for users to analyze data and visualize results in a dynamic style via online illustration. The long-term ambition of the PGG.Population, together with the joint efforts from other researchers who contribute their data to our database, is to create a comprehensive depository of geographic and ethnic variation of human genome, as well as a platform bringing influence on future practitioners of medicine and clinical investigators. PGG.Population is available at https://www.pggpopulation.org.",PGG.Population,0.989457592,NA,0,PGG.Population,0.989457592,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31640808,http://www.pggsnv.org,"PGG.SNV: understanding the evolutionary and medical implications of human single nucleotide variations in diverse populations. Despite the tremendous growth of the DNA sequencing data in the last decade, our understanding of the human genome is still in its infancy. To understand the implications of genetic variants in the light of population genetics and molecular evolution, we developed a database, PGG.SNV ( https://www.pggsnv.org ), which gives much higher weight to previously under-investigated indigenous populations in Asia. PGG.SNV archives 265 million SNVs across 220,147 present-day genomes and 1018 ancient genomes, including 1009 newly sequenced genomes, representing 977 global populations. Moreover, estimation of population genetic diversity and evolutionary parameters is available in PGG.SNV, a unique feature compared with other databases.",PGG.SNV,0.993890333,NA,0,PGG.SNV,0.993890333,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/22/2019 +29223505,http://www.pakmutation.com,"Pakistan Genetic Mutation Database (PGMD); A centralized Pakistani mutome data source. The development and advancement of next generation sequencing have not only sped up the process of identifying rare variants, but have also enabled scientists to explore all variants in a single individual. The Pakistani population has a high ratio of first degree consanguinity, which is why it is a rich source for various kinds of genetic disorders. Due to the heterogeneous composition of Pakistani population, the likelihood of genetic heterogeneity for each disorder is high. Therefore, the compilation and organization of such vast genetic data is necessary to facilitate access for analysis and interpretation to researchers and medical geneticists. The increased research on Pakistani ethnic families for disease gene identification has revealed many mutations, which has led us to develop a Pakistani mutome database entitled ""Pakistan Genetic Mutation Database (PGMD)"". In PGMD, the medico-genetic information about diseases are mainly compiled into Syndromic and Non-syndromic disorders. It is a public database, which can be freely accessed from http://www.pakmutation.com. At present, we have registered more than 1000 mutations, reported in about 130 different kinds of genetic disorders. Practically, PGMD will assist researchers, clinicians, and geneticists in genetic counseling and screening of population-specific mutations, which will also aid in personalized healthcare.",PGMD,0.981539965,Pakistan Genetic Mutation Database,0.853109753,PGMD,0.981539965,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/7/2017 +28365761,http://phagesdb.org,"PhagesDB: the actinobacteriophage database. The Actinobacteriophage Database (PhagesDB) is a comprehensive, interactive, database-backed website that collects and shares information related to the discovery, characterization and genomics of viruses that infect Actinobacterial hosts. To date, more than 8000 bacteriophages-including over 1600 with sequenced genomes-have been entered into the database. PhagesDB plays a crucial role in organizing the discoveries of phage biologists around the world-including students in the SEA-PHAGES program-and has been cited in over 50 peer-reviewed articles. Availability and implementation http://phagesdb.org/. Contact gfh@pitt.edu.",PhagesDB,0.997792363,Actinobacteriophage Database,0.878269814,PhagesDB,0.997792363,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2017 +21624156,http://purl.bioontology.org/ontology/PHARE,"Integration and publication of heterogeneous text-mined relationships on the Semantic Web. Background Advances in Natural Language Processing (NLP) techniques enable the extraction of fine-grained relationships mentioned in biomedical text. The variability and the complexity of natural language in expressing similar relationships causes the extracted relationships to be highly heterogeneous, which makes the construction of knowledge bases difficult and poses a challenge in using these for data mining or question answering. Results We report on the semi-automatic construction of the PHARE relationship ontology (the PHArmacogenomic RElationships Ontology) consisting of 200 curated relations from over 40,000 heterogeneous relationships extracted via text-mining. These heterogeneous relations are then mapped to the PHARE ontology using synonyms, entity descriptions and hierarchies of entities and roles. Once mapped, relationships can be normalized and compared using the structure of the ontology to identify relationships that have similar semantics but different syntax. We compare and contrast the manual procedure with a fully automated approach using WordNet to quantify the degree of integration enabled by iterative curation and refinement of the PHARE ontology. The result of such integration is a repository of normalized biomedical relationships, named PHARE-KB, which can be queried using Semantic Web technologies such as SPARQL and can be visualized in the form of a biological network. Conclusions The PHARE ontology serves as a common semantic framework to integrate more than 40,000 relationships pertinent to pharmacogenomics. The PHARE ontology forms the foundation of a knowledge base named PHARE-KB. Once populated with relationships, PHARE-KB (i) can be visualized in the form of a biological network to guide human tasks such as database curation and (ii) can be queried programmatically to guide bioinformatics applications such as the prediction of molecular interactions. PHARE is available at http://purl.bioontology.org/ontology/PHARE.",PHARE-KB,0.872125208,NA,0,PHARE-KB,0.872125208,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/17/2011 +22748168,"http://www.i-pharm.org/, http://biomart.i-pharm.org","Rational drug repositioning guided by an integrated pharmacological network of protein, disease and drug. Background The process of drug discovery and development is time-consuming and costly, and the probability of success is low. Therefore, there is rising interest in repositioning existing drugs for new medical indications. When successful, this process reduces the risk of failure and costs associated with de novo drug development. However, in many cases, new indications of existing drugs have been found serendipitously. Thus there is a clear need for establishment of rational methods for drug repositioning. Results In this study, we have established a database we call ""PharmDB"" which integrates data associated with disease indications, drug development, and associated proteins, and known interactions extracted from various established databases. To explore linkages of known drugs to diseases of interest from within PharmDB, we designed the Shared Neighborhood Scoring (SNS) algorithm. And to facilitate exploration of tripartite (Drug-Protein-Disease) network, we developed a graphical data visualization software program called phExplorer, which allows us to browse PharmDB data in an interactive and dynamic manner. We validated this knowledge-based tool kit, by identifying a potential application of a hypertension drug, benzthiazide (TBZT), to induce lung cancer cell death. Conclusions By combining PharmDB, an integrated tripartite database, with Shared Neighborhood Scoring (SNS) algorithm, we developed a knowledge platform to rationally identify new indications for known FDA approved drugs, which can be customized to specific projects using manual curation. The data in PharmDB is open access and can be easily explored with phExplorer and accessed via BioMart web service (http://www.i-pharm.org/, http://biomart.i-pharm.org/).",PharmDB,0.995387569,NA,0,PharmDB,0.995387569,1,26555441,NA,NA,NA,do not merge,NA,NA,NA,NA,7/2/2012 +26555441,"http://pharmdb-k.org, http://biomart.i-pharm.org","PharmDB-K: Integrated Bio-Pharmacological Network Database for Traditional Korean Medicine. Despite the growing attention given to Traditional Medicine (TM) worldwide, there is no well-known, publicly available, integrated bio-pharmacological Traditional Korean Medicine (TKM) database for researchers in drug discovery. In this study, we have constructed PharmDB-K, which offers comprehensive information relating to TKM-associated drugs (compound), disease indication, and protein relationships. To explore the underlying molecular interaction of TKM, we integrated fourteen different databases, six Pharmacopoeias, and literature, and established a massive bio-pharmacological network for TKM and experimentally validated some cases predicted from the PharmDB-K analyses. Currently, PharmDB-K contains information about 262 TKMs, 7,815 drugs, 3,721 diseases, 32,373 proteins, and 1,887 side effects. One of the unique sets of information in PharmDB-K includes 400 indicator compounds used for standardization of herbal medicine. Furthermore, we are operating PharmDB-K via phExplorer (a network visualization software) and BioMart (a data federation framework) for convenient search and analysis of the TKM network. Database URL: http://pharmdb-k.org, http://biomart.i-pharm.org.",PharmDB-K,0.986358921,NA,0,PharmDB-K,0.986358921,1,22748168,NA,NA,NA,do not merge,NA,NA,NA,NA,11/10/2015 +34387941,http://www.pharmgkb.org,"PharmGKB, an Integrated Resource of Pharmacogenomic Knowledge. The Pharmacogenomics Knowledgebase (PharmGKB) is an integrated online knowledge resource for the understanding of how genetic variation contributes to variation in drug response. Our focus includes not only pharmacogenomic information useful for clinical implementation (e.g., drug dosing guidelines and annotated drug labels), but also information to catalyze scientific research and drug discovery (e.g., variant-drug annotations and drug-centered pathways). As of April 2021, the annotated content of PharmGKB spans 715 drugs, 1761 genes, 227 diseases, 165 clinical guidelines, and 784 drug labels. We have manually curated data from more than 9000 published papers to generate the content of PharmGKB. Recently, we have also implemented an automated natural language processing (NLP) tool to broaden our coverage of the pharmacogenomic literature. This article contains a basic protocol describing how to navigate the PharmGKB website to retrieve information on how genes and genetic variations affect drug efficacy and toxicity. It also includes a protocol on how to use PharmGKB to facilitate interpretation of findings for a pharmacogenomic variant genotype or metabolizer phenotype. PharmGKB is freely available at http://www.pharmgkb.org. © 2021 Wiley Periodicals LLC. Basic Protocol 1: Navigating the homepage of PharmGKB and searching by drug Basic Protocol 2: Using PharmGKB to facilitate interpretation of pharmacogenomic variant genotypes or metabolizer phenotypes.",PharmGKB,0.997299695,Pharmacogenomics Knowledgebase,0.730918014,PharmGKB,0.997299695,1,NA,22564364,NA,NA,NA,do not merge,NA,NA,NA,8/1/2021 +22564364,http://bioai4core.fulton.asu.edu/snpshot,"A SNPshot of PubMed to associate genetic variants with drugs, diseases, and adverse reactions. Motivation Genetic factors determine differences in pharmacokinetics, drug efficacy, and drug responses between individuals and sub-populations. Wrong dosages of drugs can lead to severe adverse drug reactions in individuals whose drug metabolism drastically differs from the ""assumed average"". Databases such as PharmGKB are excellent sources of pharmacogenetic information on enzymes, genetic variants, and drug response affected by changes in enzymatic activity. Here, we seek to aid researchers, database curators, and clinicians in their search for relevant information by automatically extracting these data from literature. Approach We automatically populate a repository of information on genetic variants, relations to drugs, occurrence in sub-populations, and associations with disease. We mine textual data from PubMed abstracts to discover such genotype-phenotype associations, focusing on SNPs that can be associated with variations in drug response. The overall repository covers relations found between genes, variants, alleles, drugs, diseases, adverse drug reactions, populations, and allele frequencies. We cross-reference these data to EntrezGene, PharmGKB, PubChem, and others. Results The performance regarding entity recognition and relation extraction yields a precision of 90-92% for the major entity types (gene, drug, disease), and 76-84% for relations involving these types. Comparison of our repository to PharmGKB reveals a coverage of 93% of gene-drug associations in PharmGKB and 97% of the gene-variant mappings based on 180,000 PubMed abstracts. Availability http://bioai4core.fulton.asu.edu/snpshot.",PharmGKB,0.992674232,NA,0,PharmGKB,0.992674232,1,NA,34387941,NA,NA,NA,do not merge,NA,NA,noting predicted name incorrect,4/30/2012 +31584089,http://db.phasep.pro,"PhaSepDB: a database of liquid-liquid phase separation related proteins. It's widely appreciated that liquid-liquid phase separation (LLPS) underlies the formation of membraneless organelles, which function to concentrate proteins and nucleic acids. In the past few decades, major efforts have been devoted to identify the phase separation associated proteins and elucidate their functions. To better utilize the knowledge dispersed in published literature, we developed PhaSepDB (http://db.phasep.pro/), a manually curated database of phase separation associated proteins. Currently, PhaSepDB includes 2914 non-redundant proteins localized in different organelles curated from published literature and database. PhaSepDB provides protein summary, publication reference and sequence features of phase separation associated proteins. The sequence features which reflect the LLPS behavior are also available for other human protein candidates. The online database provides a convenient interface for the research community to easily browse, search and download phase separation associated proteins. As a centralized resource, we believe PhaSepDB will facilitate the future study of phase separation.",PhaSepDB,0.996137023,NA,0,PhaSepDB,0.996137023,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +31612960,http://phasepro.elte.hu,"PhaSePro: the database of proteins driving liquid-liquid phase separation. Membraneless organelles (MOs) are dynamic liquid condensates that host a variety of specific cellular processes, such as ribosome biogenesis or RNA degradation. MOs form through liquid-liquid phase separation (LLPS), a process that relies on multivalent weak interactions of the constituent proteins and other macromolecules. Since the first discoveries of certain proteins being able to drive LLPS, it emerged as a general mechanism for the effective organization of cellular space that is exploited in all kingdoms of life. While numerous experimental studies report novel cases, the computational identification of LLPS drivers is lagging behind, and many open questions remain about the sequence determinants, composition, regulation and biological relevance of the resulting condensates. Our limited ability to overcome these issues is largely due to the lack of a dedicated LLPS database. Therefore, here we introduce PhaSePro (https://phasepro.elte.hu), an openly accessible, comprehensive, manually curated database of experimentally validated LLPS driver proteins/protein regions. It not only provides a wealth of information on such systems, but improves the standardization of data by introducing novel LLPS-specific controlled vocabularies. PhaSePro can be accessed through an appealing, user-friendly interface and thus has definite potential to become the central resource in this dynamically developing field.",PhaSePro,0.996815085,NA,0,PhaSePro,0.996815085,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +27026615,http://phekb.org,"PheKB: a catalog and workflow for creating electronic phenotype algorithms for transportability. Objective Health care generated data have become an important source for clinical and genomic research. Often, investigators create and iteratively refine phenotype algorithms to achieve high positive predictive values (PPVs) or sensitivity, thereby identifying valid cases and controls. These algorithms achieve the greatest utility when validated and shared by multiple health care systems.Materials and Methods We report the current status and impact of the Phenotype KnowledgeBase (PheKB, http://phekb.org), an online environment supporting the workflow of building, sharing, and validating electronic phenotype algorithms. We analyze the most frequent components used in algorithms and their performance at authoring institutions and secondary implementation sites. Results As of June 2015, PheKB contained 30 finalized phenotype algorithms and 62 algorithms in development spanning a range of traits and diseases. Phenotypes have had over 3500 unique views in a 6-month period and have been reused by other institutions. International Classification of Disease codes were the most frequently used component, followed by medications and natural language processing. Among algorithms with published performance data, the median PPV was nearly identical when evaluated at the authoring institutions (n = 44; case 96.0%, control 100%) compared to implementation sites (n = 40; case 97.5%, control 100%). Discussion These results demonstrate that a broad range of algorithms to mine electronic health record data from different health systems can be developed with high PPV, and algorithms developed at one site are generally transportable to others. Conclusion By providing a central repository, PheKB enables improved development, transportability, and validity of algorithms for research-grade phenotypes using health care generated data.",PheKB,0.997518778,Phenotype KnowledgeBase,0.822092161,PheKB,0.997518778,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/28/2016 +33245779,http://phelige.com,"PheLiGe: an interactive database of billions of human genotype-phenotype associations. Genome-wide association studies have provided a vast array of publicly available SNP × phenotype association results. However, they are often in disparate repositories and formats, making downstream analyses difficult and time consuming. PheLiGe (https://phelige.com) is a database that provides easy access to such results via a web interface. The underlying database currently stores >75 billion genotype-phenotype associations from 7347 genome-wide and 1.2 million region-wide (e.g. cis-eQTL) association scans. The web interface allows for investigation of regional genotype-phenotype associations across many phenotypes, giving insights into the biological function affected by the variant in question. Furthermore, PheLiGe can compare regional patterns of association between different traits. This analysis can ascertain whether a co-association is due to pleiotropy or linkage. Moreover, comparison of association patterns for a complex trait of interest and gene expression and protein levels can implicate causal genes.",PheLiGe,0.99728775,NA,0,PheLiGe,0.99728775,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +21507258,http://phemadb.sourceforge.net,"PheMaDB: a solution for storage, retrieval, and analysis of high throughput phenotype data. Background OmniLog™ phenotype microarrays (PMs) have the capability to measure and compare the growth responses of biological samples upon exposure to hundreds of growth conditions such as different metabolites and antibiotics over a time course of hours to days. In order to manage the large amount of data produced from the OmniLog™ instrument, PheMaDB (Phenotype Microarray DataBase), a web-based relational database, was designed. PheMaDB enables efficient storage, retrieval and rapid analysis of the OmniLog™ PM data. Description PheMaDB allows the user to quickly identify records of interest for data analysis by filtering with a hierarchical ordering of Project, Strain, Phenotype, Replicate, and Temperature. PheMaDB then provides various statistical analysis options to identify specific growth pattern characteristics of the experimental strains, such as: outlier analysis, negative controls analysis (signal/background calibration), bar plots, pearson's correlation matrix, growth curve profile search, k-means clustering, and a heat map plot. This web-based database management system allows for both easy data sharing among multiple users and robust tools to phenotype organisms of interest. Conclusions PheMaDB is an open source system standardized for OmniLog™ PM data. PheMaDB could facilitate the banking and sharing of phenotype data. The source code is available for download at http://phemadb.sourceforge.net.",PheMaDB,0.986041546,Phenotype,0.610215828,PheMaDB,0.986041546,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/20/2011 +23378291,http://phenodb.net,"PhenoDB: a new web-based tool for the collection, storage, and analysis of phenotypic features. To interpret whole exome/genome sequence data for clinical and research purposes, comprehensive phenotypic information, knowledge of pedigree structure, and results of previous clinical testing are essential. With these requirements in mind and to meet the needs of the Centers for Mendelian Genomics project, we have developed PhenoDB (http://phenodb.net), a secure, Web-based portal for entry, storage, and analysis of phenotypic and other clinical information. The phenotypic features are organized hierarchically according to the major headings and subheadings of the Online Mendelian Inheritance in Man (OMIM®) clinical synopses, with further subdivisions according to structure and function. Every string allows for a free-text entry. All of the approximately 2,900 features use the preferred term from Elements of Morphology and are fully searchable and mapped to the Human Phenotype Ontology and Elements of Morphology. The PhenoDB allows for ascertainment of relevant information from a case in a family or cohort, which is then searchable by family, OMIM number, phenotypic feature, mode of inheritance, genes screened, and so on. The database can also be used to format phenotypic data for submission to dbGaP for appropriately consented individuals. PhenoDB was built using Django, an open source Web development tool, and is freely available through the Johns Hopkins McKusick-Nathans Institute of Genetic Medicine (http://phenodb.net).",PhenoDB,0.997127712,Mendelian Inheritance in,0.610693395,PhenoDB,0.997127712,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/4/2013 +29370821,http://mips.helmholtz-muenchen.de/phenodis,"PhenoDis: a comprehensive database for phenotypic characterization of rare cardiac diseases. BACKGROUND:Thoroughly annotated data resources are a key requirement in phenotype dependent analysis and diagnosis of diseases in the area of precision medicine. Recent work has shown that curation and systematic annotation of human phenome data can significantly improve the quality and selectivity for the interpretation of inherited diseases. We have therefore developed PhenoDis, a comprehensive, manually annotated database providing symptomatic, genetic and imprinting information about rare cardiac diseases. RESULTS:PhenoDis includes 214 rare cardiac diseases from Orphanet and 94 more from OMIM. For phenotypic characterization of the diseases, we performed manual annotation of diseases with articles from the biomedical literature. Detailed description of disease symptoms required the use of 2247 different terms from the Human Phenotype Ontology (HPO). Diseases listed in PhenoDis frequently cover a broad spectrum of symptoms with 28% from the branch of 'cardiovascular abnormality' and others from areas such as neurological (11.5%) and metabolism (6%). We collected extensive information on the frequency of symptoms in respective diseases as well as on disease-associated genes and imprinting data. The analysis of the abundance of symptoms in patient studies revealed that most of the annotated symptoms (71%) are found in less than half of the patients of a particular disease. Comprehensive and systematic characterization of symptoms including their frequency is a pivotal prerequisite for computer based prediction of diseases and disease causing genetic variants. To this end, PhenoDis provides in-depth annotation for a complete group of rare diseases, including information on pathogenic and likely pathogenic genetic variants for 206 diseases as listed in ClinVar. We integrated all results in an online database ( http://mips.helmholtz-muenchen.de/phenodis/ ) with multiple search options and provide the complete dataset for download. CONCLUSION:PhenoDis provides a comprehensive set of manually annotated rare cardiac diseases that enables computational approaches for disease prediction via decision support systems and phenotype-driven strategies for the identification of disease causing genes.",PhenoDis,0.995657861,NA,0,PhenoDis,0.995657861,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/25/2018 +31228159,http://phenogen.org,"Networking in Biology: The Hybrid Rat Diversity Panel. One of the most fruitful resources for systems genetic studies of nonhuman mammals is a panel of inbred strains that exhibits significant genetic diversity between strains but genetic stability (isogenicity) within strains. These characteristics allow for fine mapping of complex phenotypes (QTLs) and provide statistical power to identify loci which contribute nominally to the phenotype. This type of resource also allows the planning and performance of investigations using the same genetic backgrounds over several generations of the test animals. Often, rats are preferred over mice for physiologic and behavioral studies because of their larger size and more distinguishable anatomy (particularly for their central nervous system). The Hybrid Rat Diversity Panel (HRDP) is a panel of inbred rat strains, which combines two recombinant inbred panels (the HXB/BXH, 30 strains; the LEXF/FXLE, 34 strains and 35 more strains of inbred rats which were selected for genetic diversity, based on their fully sequenced genomes and/or thorough genotyping). The genetic diversity and statistical power of this panel for mapping studies rivals or surpasses currently available panels in mouse. The genetic stability of this panel makes it particularly suitable for collection of high-throughput omics data as relevant technology becomes available for engaging in truly integrative systems biology. The PhenoGen website ( http://phenogen.org ) is the repository for the initial transcriptome data, making the raw data, the processed data, and the analysis results, e.g., organ-specific protein coding and noncoding transcripts, isoform analysis, expression quantitative trait loci, and co-expression networks, available to the research public. The data sets and tools being developed will complement current efforts to analyze the human transcriptome and its genetic controls (the Genotype-Tissue Expression Project (GTEx)) and allow for dissection of genetic networks that predispose to particular phenotypes and gene-by-environment interactions that are difficult or even impossible to study in humans. The HRDP is an essential population for exploring truly integrative systems genetics.",PhenoGen,0.95278728,Rat,0.555455029,PhenoGen,0.95278728,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +22009677,http://phenom.ccbr.utoronto.ca,"PhenoM: a database of morphological phenotypes caused by mutation of essential genes in Saccharomyces cerevisiae. About one-fifth of the genes in the budding yeast are essential for haploid viability and cannot be functionally assessed using standard genetic approaches such as gene deletion. To facilitate genetic analysis of essential genes, we and others have assembled collections of yeast strains expressing temperature-sensitive (ts) alleles of essential genes. To explore the phenotypes caused by essential gene mutation we used a panel of genetically engineered fluorescent markers to explore the morphology of cells in the ts strain collection using high-throughput microscopy. Here, we describe the design and implementation of an online database, PhenoM (Phenomics of yeast Mutants), for storing, retrieving, visualizing and data mining the quantitative single-cell measurements extracted from micrographs of the ts mutant cells. PhenoM allows users to rapidly search and retrieve raw images and their quantified morphological data for genes of interest. The database also provides several data-mining tools, including a PhenoBlast module for phenotypic comparison between mutant strains and a Gene Ontology module for functional enrichment analysis of gene sets showing similar morphological alterations. The current PhenoM version 1.0 contains 78,194 morphological images and 1,909,914 cells covering six subcellular compartments or structures for 775 ts alleles spanning 491 essential genes. PhenoM is freely available at http://phenom.ccbr.utoronto.ca/.",PhenoM,0.993650556,Phenomics of yeast Mutants,0.973869509,PhenoM,0.993650556,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/18/2011 +26251998,http://phenomecentral.org,"PhenomeCentral: a portal for phenotypic and genotypic matchmaking of patients with rare genetic diseases. The discovery of disease-causing mutations typically requires confirmation of the variant or gene in multiple unrelated individuals, and a large number of rare genetic diseases remain unsolved due to difficulty identifying second families. To enable the secure sharing of case records by clinicians and rare disease scientists, we have developed the PhenomeCentral portal (https://phenomecentral.org). Each record includes a phenotypic description and relevant genetic information (exome or candidate genes). PhenomeCentral identifies similar patients in the database based on semantic similarity between clinical features, automatically prioritized genes from whole-exome data, and candidate genes entered by the users, enabling both hypothesis-free and hypothesis-driven matchmaking. Users can then contact other submitters to follow up on promising matches. PhenomeCentral incorporates data for over 1,000 patients with rare genetic diseases, contributed by the FORGE and Care4Rare Canada projects, the US NIH Undiagnosed Diseases Program, the EU Neuromics and ANDDIrare projects, as well as numerous independent clinicians and scientists. Though the majority of these records have associated exome data, most lack a molecular diagnosis. PhenomeCentral has already been used to identify causative mutations for several patients, and its ability to find matching patients and diagnose these diseases will grow with each additional patient that is entered.",PhenomeCentral,0.995991707,NA,0,PhenomeCentral,0.995991707,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/31/2015 +22255115,http://faculty.neu.edu.cn/bmie/han/PhenOMIM,"PhenOMIM: an OMIM-based secondary database purported for phenotypic comparison. Phenotypic comparison may provide crucial information for obtaining insights into molecular interactions underlying various diseases. However, few attempts have been made to systematically analyze the phenotypes of hereditary disorders, mainly owing to the poor quality of text descriptions and lack of a unified system of descriptors. Here we present a secondary database, PHENOMIM, for translating the phenotypic data obtained from the Online Mendelian Inheritance in Man (OMIM) database into a structured form. Moreover, a web interface has also been developed for visualizing the data and related information from the OMIM and PhenOMIM databases. The data is freely available online for reviewing and commenting purposes and can be found at http://faculty.neu.edu.cn/bmie/han/PhenOMIM/.",PHENOMIM,0.989852965,NA,0,PHENOMIM,0.989852965,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2011 +25632109,http://rgd.mcw.edu,"PhenoMiner: a quantitative phenotype database for the laboratory rat, Rattus norvegicus. Application in hypertension and renal disease. . Rats have been used extensively as animal models to study physiological and pathological processes involved in human diseases. Numerous rat strains have been selectively bred for certain biological traits related to specific medical interests. Recently, the Rat Genome Database (http://rgd.mcw.edu) has initiated the PhenoMiner project to integrate quantitative phenotype data from the PhysGen Program for Genomic Applications and the National BioResource Project in Japan as well as manual annotations from biomedical literature. PhenoMiner, the search engine for these integrated phenotype data, facilitates mining of data sets across studies by searching the database with a combination of terms from four different ontologies/vocabularies (Rat Strain Ontology, Clinical Measurement Ontology, Measurement Method Ontology and Experimental Condition Ontology). In this study, salt-induced hypertension was used as a model to retrieve blood pressure records of Brown Norway, Fawn-Hooded Hypertensive (FHH) and Dahl salt-sensitive (SS) rat strains. The records from these three strains served as a basis for comparing records from consomic/congenic/mutant offspring derived from them. We examined the cardiovascular and renal phenotypes of consomics derived from FHH and SS, and of SS congenics and mutants. The availability of quantitative records across laboratories in one database, such as these provided by PhenoMiner, can empower researchers to make the best use of publicly available data. Database URL: http://rgd.mcw.edu.",PhenoMiner,0.969527185,Genome Database,0.76131473,PhenoMiner,0.969527185,1,"21296746.0, 21321022.0, 21478484.0, 23255149.0, 23603846.0, 23794737.0, 23881287.0, 25355511.0, 27602200.0, 27736745.0, 29761460.0, 31713623.0, 34741192.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: CLASS,NA,NA,1/28/2015 +31642469,http://www.biosino.org/PhenoModifier,"PhenoModifier: a genetic modifier database for elucidating the genetic basis of human phenotypic variation. From clinical observations to large-scale sequencing studies, the phenotypic impact of genetic modifiers is evident. To better understand the full spectrum of the genetic contribution to human disease, concerted efforts are needed to construct a useful modifier resource for interpreting the information from sequencing data. Here, we present the PhenoModifier (https://www.biosino.org/PhenoModifier), a manually curated database that provides a comprehensive overview of human genetic modifiers. By manually curating over ten thousand published articles, 3078 records of modifier information were entered into the current version of PhenoModifier, related to 288 different disorders, 2126 genetic modifier variants and 843 distinct modifier genes. To help users probe further into the mechanism of their interested modifier genes, we extended the yeast genetic interaction data and yeast quantitative trait loci to the human and we also integrated GWAS data into the PhenoModifier to assist users in evaluating all possible phenotypes associated with a modifier allele. As the first comprehensive resource of human genetic modifiers, PhenoModifier provides a more complete spectrum of genetic factors contributing to human phenotypic variation. The portal has a broad scientific and clinical scope, spanning activities relevant to variant interpretation for research purposes as well as clinical decision making.",PhenoModifier,0.996493459,NA,0,PhenoModifier,0.996493459,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +28748223,http://www.phenoplasm.org,"PhenoPlasm: a database of disruption phenotypes for malaria parasite genes. Two decades after the first Plasmodium transfection, attempts have been made to disrupt more than 3,151 genes in malaria parasites, across five Plasmodium species. While results from rodent malaria transfections have been curated and systematised, empowering large-scale analysis, phenotypic data from human malaria parasite transfections currently exists as individual reports scattered across a the literature. To facilitate systematic analysis of published experimental genetic data across Plasmodium species, we have built PhenoPlasm ( http://www.phenoplasm.org), a database of phenotypes generated by transfection experiments in all Plasmodium parasites. The site provides a simple interface linking citation-backed Plasmodium reverse-genetic phenotypes to gene IDs. The database has been populated with phenotypic data on 367 P. falciparum genes, curated from 176 individual publications, as well as existing data on rodent Plasmodium species from RMgmDB and PlasmoGEM. This is the first time that all available data on P. falciparum transfection experiments has been brought together in a single place. These data are presented using ortholog mapping to allow a researcher interested in a gene in one species to see results across other Plasmodium species. The collaborative nature of the database enables any researcher to add new phenotypes as they are discovered. As an example of database utility, we use the currently available datasets to identify RAP (RNA-binding domain abundant in Apicomplexa)-domain containing proteins as crucial to parasite survival.",PhenoPlasm,0.98693198,NA,0,PhenoPlasm,0.98693198,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/24/2017 +21554668,http://bioweb.supagro.inra.fr/phenopsis,"PHENOPSIS DB: an information system for Arabidopsis thaliana phenotypic data in an environmental context. Background Renewed interest in plant×environment interactions has risen in the post-genomic era. In this context, high-throughput phenotyping platforms have been developed to create reproducible environmental scenarios in which the phenotypic responses of multiple genotypes can be analysed in a reproducible way. These platforms benefit hugely from the development of suitable databases for storage, sharing and analysis of the large amount of data collected. In the model plant Arabidopsis thaliana, most databases available to the scientific community contain data related to genetic and molecular biology and are characterised by an inadequacy in the description of plant developmental stages and experimental metadata such as environmental conditions. Our goal was to develop a comprehensive information system for sharing of the data collected in PHENOPSIS, an automated platform for Arabidopsis thaliana phenotyping, with the scientific community. Description PHENOPSIS DB is a publicly available (URL: http://bioweb.supagro.inra.fr/phenopsis/) information system developed for storage, browsing and sharing of online data generated by the PHENOPSIS platform and offline data collected by experimenters and experimental metadata. It provides modules coupled to a Web interface for (i) the visualisation of environmental data of an experiment, (ii) the visualisation and statistical analysis of phenotypic data, and (iii) the analysis of Arabidopsis thaliana plant images. Conclusions Firstly, data stored in the PHENOPSIS DB are of interest to the Arabidopsis thaliana community, particularly in allowing phenotypic meta-analyses directly linked to environmental conditions on which publications are still scarce. Secondly, data or image analysis modules can be downloaded from the Web interface for direct usage or as the basis for modifications according to new requirements. Finally, the structure of PHENOPSIS DB provides a useful template for the development of other similar databases related to genotype×environment interactions.",PHENOPSIS,0.950341225,NA,0,PHENOPSIS,0.950341225,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/9/2011 +31307376,http://phenpath.biocomp.unibo.it,"PhenPath: a tool for characterizing biological functions underlying different phenotypes. Background Many diseases are associated with complex patterns of symptoms and phenotypic manifestations. Parsimonious explanations aim at reconciling the multiplicity of phenotypic traits with the perturbation of one or few biological functions. For this, it is necessary to characterize human phenotypes at the molecular and functional levels, by exploiting gene annotations and known relations among genes, diseases and phenotypes. This characterization makes it possible to implement tools for retrieving functions shared among phenotypes, co-occurring in the same patient and facilitating the formulation of hypotheses about the molecular causes of the disease. Results We introduce PhenPath, a new resource consisting of two parts: PhenPathDB and PhenPathTOOL. The former is a database collecting the human genes associated with the phenotypes described in Human Phenotype Ontology (HPO) and OMIM Clinical Synopses. Phenotypes are then associated with biological functions and pathways by means of NET-GE, a network-based method for functional enrichment of sets of genes. The present version considers only phenotypes related to diseases. PhenPathDB collects information for 18 OMIM Clinical synopses and 7137 HPO phenotypes, related to 4292 diseases and 3446 genes. Enrichment of Gene Ontology annotations endows some 87.7, 86.9 and 73.6% of HPO phenotypes with Biological Process, Molecular Function and Cellular Component terms, respectively. Furthermore, 58.8 and 77.8% of HPO phenotypes are also enriched for KEGG and Reactome pathways, respectively. Based on PhenPathDB, PhenPathTOOL analyzes user-defined sets of phenotypes retrieving diseases, genes and functional terms which they share. This information can provide clues for interpreting the co-occurrence of phenotypes in a patient. Conclusions The resource allows finding molecular features useful to investigate diseases characterized by multiple phenotypes, and by this, it can help researchers and physicians in identifying molecular mechanisms and biological functions underlying the concomitant manifestation of phenotypes. The resource is freely available at http://phenpath.biocomp.unibo.it .",PhenPath,0.98982805,NA,0,PhenPath,0.98982805,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/16/2019 +25414340,http://www.phi-base.org,"The Pathogen-Host Interactions database (PHI-base): additions and future developments. Rapidly evolving pathogens cause a diverse array of diseases and epidemics that threaten crop yield, food security as well as human, animal and ecosystem health. To combat infection greater comparative knowledge is required on the pathogenic process in multiple species. The Pathogen-Host Interactions database (PHI-base) catalogues experimentally verified pathogenicity, virulence and effector genes from bacterial, fungal and protist pathogens. Mutant phenotypes are associated with gene information. The included pathogens infect a wide range of hosts including humans, animals, plants, insects, fish and other fungi. The current version, PHI-base 3.6, available at http://www.phi-base.org, stores information on 2875 genes, 4102 interactions, 110 host species, 160 pathogenic species (103 plant, 3 fungal and 54 animal infecting species) and 181 diseases drawn from 1243 references. Phenotypic and gene function information has been obtained by manual curation of the peer-reviewed literature. A controlled vocabulary consisting of nine high-level phenotype terms permits comparisons and data analysis across the taxonomic space. PHI-base phenotypes were mapped via their associated gene information to reference genomes available in Ensembl Genomes. Virulence genes and hotspots can be visualized directly in genome browsers. Future plans for PHI-base include development of tools facilitating community-led curation and inclusion of the corresponding host target(s).",PHI-base,0.987774253,Pathogen-Host Interactions database,0.932421378,PHI-base,0.987774253,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2014 +23497033,http://www.phibiotics.org,"phiBIOTICS: catalogue of therapeutic enzybiotics, relevant research studies and practical applications. Background The incidence of bacterial infections in humans along with the growing problem of antibiotic resistance is a major public health concern worldwide. Therefore it is necessary to develop novel therapeutic agents to control microbial pathogens. In this regard, enzybiotics, lytic enzymes endowed with the capacity to degrade bacterial cell wall, are a very promising group of alternative antimicrobials. Description Numerous experimental studies have confirmed unique therapeutic capabilities of enzybiotics and hence they are worth of wider attention of the medical community. In order to summarize the state of current knowledge of enzybiotics, we have developed phiBIOTICS, an information portal about known and studied therapeutic enzybiotics. phiBIOTICS contains information on chemical and biological properties of enzybiotics together with compendium of facts retrieved from research studies, where enzybiotics were applied. Our auxiliary phiBiScan program utility is dedicated for prediction of novel potential enzybiotics. Conclusions phiBIOTICS presents a solid body of knowledge about all studied therapeutic enzybiotics to date. The database brings high-value information on outcomes of applied research and pre-clinical trials of these prospective antimicrobial agents. This information which was scattered in research papers with heterogeneous quality and relevance is now available in the form of manually curated database. phiBIOTICS and phiBiScan are freely accessible at http://www.phibiotics.org/.",phiBIOTICS,0.983398318,NA,0,phiBIOTICS,0.983398318,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/6/2013 +26225242,http://cadd.pharmacy.nankai.edu.cn/phin,"PhIN: A Protein Pharmacology Interaction Network Database. Network pharmacology is a new and hot concept in drug discovery for its ability to investigate the complexity of polypharmacology, and becomes more and more important in drug development. Here we report a protein pharmacology interaction network database (PhIN), aiming to assist multitarget drug discovery by providing comprehensive and flexible network pharmacology analysis. Overall, PhIN contains 1,126,060 target-target interaction pairs in terms of shared compounds and 3,428,020 pairs in terms of shared scaffolds, which involve 12,419,700 activity data, 9,414 targets, 314 viral targets, 652 pathways, 1,359,400 compounds, and 309,556 scaffolds. Using PhIN, users can obtain interacting target networks within or across human pathways, between human and virus, by defining the number of shared compounds or scaffolds under an activity cutoff. We expect PhIN to be a useful tool for multitarget drug development. PhIN is freely available at http://cadd.pharmacy.nankai.edu.cn/phin/.",PhIN,0.994730279,protein pharmacology interaction network database,0.937448184,PhIN,0.994730279,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/18/2015 +30055873,http://www.scbit.org/phopepmass/index.html,"PhoPepMass: A database and search tool assisting human phosphorylation peptide identification from mass spectrometry data. Protein phosphorylation, one of the most important protein post-translational modifications, is involved in various biological processes, and the identification of phosphorylation peptides (phosphopeptides) and their corresponding phosphorylation sites (phosphosites) will facilitate the understanding of the molecular mechanism and function of phosphorylation. Mass spectrometry (MS) provides a high-throughput technology that enables the identification of large numbers of phosphosites. PhoPepMass is designed to assist human phosphopeptide identification from MS data based on a specific database of phophopeptide masses and a multivariate hypergeometric matching algorithm. It contains 244,915 phosphosites from several public sources. Moreover, the accurate masses of peptides and fragments with phosphosites were calculated. It is the first database that provides a systematic resource for the query of phosphosites on peptides and their corresponding masses. This allows researchers to search certain proteins of which phosphosites have been reported, to browse detailed phosphopeptide and fragment information, to match masses from MS analyses with defined threshold to the corresponding phosphopeptide, and to compare proprietary phosphopeptide discovery results with results from previous studies. Additionally, a database search software is created and a ""two-stage search strategy"" is suggested to identify phosphopeptides from tandem mass spectra of proteomics data. We expect PhoPepMass to be a useful tool and a source of reference for proteomics researchers. PhoPepMass is available at https://www.scbit.org/phopepmass/index.html.",PhoPepMass,0.996143401,NA,0,PhoPepMass,0.996143401,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/19/2018 +23172287,http://phosphat.mpimp-golm.mpg.de,"PhosPhAt goes kinases--searchable protein kinase target information in the plant phosphorylation site database PhosPhAt. Reversible phosphorylation is a key mechanism for regulating protein function. Thus it is of high interest to know which kinase can phosphorylate which proteins. Comprehensive information about phosphorylation sites in Arabidopsis proteins is hosted within the PhosPhAt database (http://phosphat.mpimp-golm.mpg.de). However, our knowledge of the kinases that phosphorylate those sites is dispersed throughout the literature and very difficult to access, particularly for investigators seeking to interpret large scale and high-throughput experiments. Therefore, we aimed to compile information on kinase-substrate interactions and kinase-specific regulatory information and make this available via a new functionality embedded in PhosPhAt. Our approach involved systematic surveying of the literature for regulatory information on the members of the major kinase families in Arabidopsis thaliana, such as CDPKs, MPK(KK)s, AGC kinases and SnRKs, as well as individual kinases from other families. To date, we have researched more than 4450 kinase-related publications, which collectively contain information on about 289 kinases. Users can now query the PhosPhAt database not only for experimental and predicted phosphorylation sites of individual proteins, but also for known substrates for a given kinase or kinase family. Further developments include addition of new phosphorylation sites and visualization of clustered phosphorylation events, known as phosphorylation hotspots.",PhosPhAt,0.993283868,NA,0,PhosPhAt,0.993283868,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2012 +23674503,http://www.phosphogrid.org,"The PhosphoGRID Saccharomyces cerevisiae protein phosphorylation site database: version 2.0 update. PhosphoGRID is an online database that curates and houses experimentally verified in vivo phosphorylation sites in the Saccharomyces cerevisiae proteome (www.phosphogrid.org). Phosphosites are annotated with specific protein kinases and/or phosphatases, along with the condition(s) under which the phosphorylation occurs and/or the effects on protein function. We report here an updated data set, including nine additional high-throughput (HTP) mass spectrometry studies. The version 2.0 data set contains information on 20 177 unique phosphorylated residues, representing a 4-fold increase from version 1.0, and includes 1614 unique phosphosites derived from focused low-throughput (LTP) studies. The overlap between HTP and LTP studies represents only ∼3% of the total unique sites, but importantly 45% of sites from LTP studies with defined function were discovered in at least two independent HTP studies. The majority of new phosphosites in this update occur on previously documented proteins, suggesting that coverage of phosphoproteins in the yeast proteome is approaching saturation. We will continue to update the PhosphoGRID data set, with the expectation that the integration of information from LTP and HTP studies will enable the development of predictive models of phosphorylation-based signaling networks. Database URL: http://www.phosphogrid.org/",PhosphoGRID,0.998093545,NA,0,PhosphoGRID,0.998093545,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/13/2013 +24227675,http://www.phosphonetworks.org,"PhosphoNetworks: a database for human phosphorylation networks. Summary Phosphorylation plays an important role in cellular signal transduction. Current phosphorylation-related databases often focus on the phosphorylation sites, which are mainly determined by mass spectrometry. Here, we present PhosphoNetworks, a phosphorylation database built on a high-resolution map of phosphorylation networks. This high-resolution map of phosphorylation networks provides not only the kinase-substrate relationships (KSRs), but also the specific phosphorylation sites on which the kinases act on the substrates. The database contains the most comprehensive dataset for KSRs, including the relationships from a recent high-throughput project for identification of KSRs using protein microarrays, as well as known KSRs curated from the literature. In addition, the database also includes several analytical tools for dissecting phosphorylation networks. PhosphoNetworks is expected to play a prominent role in proteomics and phosphorylation-related disease research. Availability and implementation http://www.phosphonetworks.org",PhosphoNetworks,0.997868598,NA,0,PhosphoNetworks,0.997868598,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/13/2013 +22135298,http://www.phosphosite.org,"PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. PhosphoSitePlus (http://www.phosphosite.org) is an open, comprehensive, manually curated and interactive resource for studying experimentally observed post-translational modifications, primarily of human and mouse proteins. It encompasses 1,30,000 non-redundant modification sites, primarily phosphorylation, ubiquitinylation and acetylation. The interface is designed for clarity and ease of navigation. From the home page, users can launch simple or complex searches and browse high-throughput data sets by disease, tissue or cell line. Searches can be restricted by specific treatments, protein types, domains, cellular components, disease, cell types, cell lines, tissue and sequences or motifs. A few clicks of the mouse will take users to substrate pages or protein pages with sites, sequences, domain diagrams and molecular visualization of side-chains known to be modified; to site pages with information about how the modified site relates to the functions of specific proteins and cellular processes and to curated information pages summarizing the details from one record. PyMOL and Chimera scripts that colorize reactive groups on residues that are modified can be downloaded. Features designed to facilitate proteomic analyses include downloads of modification sites, kinase-substrate data sets, sequence logo generators, a Cytoscape plugin and BioPAX download to enable pathway visualization of the kinase-substrate interactions in PhosphoSitePlus®.",PhosphoSitePlus,0.998392642,NA,0,PhosphoSitePlus,0.998392642,1,25514926,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,12/1/2011 +33104790,http://phycocosm.jgi.doe.gov,"PhycoCosm, a comparative algal genomics resource. Algae are a diverse, polyphyletic group of photosynthetic eukaryotes spanning nearly all eukaryotic lineages of life and collectively responsible for ∼50% of photosynthesis on Earth. Sequenced algal genomes, critical to understanding their complex biology, are growing in number and require efficient tools for analysis. PhycoCosm (https://phycocosm.jgi.doe.gov) is an algal multi-omics portal, developed by the US Department of Energy Joint Genome Institute to support analysis and distribution of algal genome sequences and other 'omics' data. PhycoCosm provides integration of genome sequence and annotation for >100 algal genomes with available multi-omics data and interactive web-based tools to enable algal research in bioenergy and the environment, encouraging community engagement and data exchange, and fostering new sequencing projects that will further these research goals.",PhycoCosm,0.996997952,NA,0,PhycoCosm,0.996997952,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +24275491,http://phylomedb.org,"PhylomeDB v4: zooming into the plurality of evolutionary histories of a genome. Phylogenetic trees representing the evolutionary relationships of homologous genes are the entry point for many evolutionary analyses. For instance, the use of a phylogenetic tree can aid in the inference of orthology and paralogy relationships, and in the detection of relevant evolutionary events such as gene family expansions and contractions, horizontal gene transfer, recombination or incomplete lineage sorting. Similarly, given the plurality of evolutionary histories among genes encoded in a given genome, there is a need for the combined analysis of genome-wide collections of phylogenetic trees (phylomes). Here, we introduce a new release of PhylomeDB (http://phylomedb.org), a public repository of phylomes. Currently, PhylomeDB hosts 120 public phylomes, comprising >1.5 million maximum likelihood trees and multiple sequence alignments. In the current release, phylogenetic trees are annotated with taxonomic, protein-domain arrangement, functional and evolutionary information. PhylomeDB is also a major source for phylogeny-based predictions of orthology and paralogy, covering >10 million proteins across 1059 sequenced species. Here we describe newly implemented PhylomeDB features, and discuss a benchmark of the orthology predictions provided by the database, the impact of proteome updates and the use of the phylome approach in the analysis of newly sequenced genomes and transcriptomes.",PhylomeDB,0.996954501,NA,0,PhylomeDB,0.996954501,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/25/2013 +26117828,http://www.bioinfo.org/phyloNoncode,"Evolutionary annotation of conserved long non-coding RNAs in major mammalian species. Mammalian genomes contain tens of thousands of long non-coding RNAs (lncRNAs) that have been implicated in diverse biological processes. However, the lncRNA transcriptomes of most mammalian species have not been established, limiting the evolutionary annotation of these novel transcripts. Based on RNA sequencing data from six tissues of nine species, we built comprehensive lncRNA catalogs (4,142-42,558 lncRNAs) covering the major mammalian species. Compared to protein- coding RNAs, expression of lncRNAs exhibits striking lineage specificity. Notably, although 30%-99% human lncRNAs are conserved across different species on DNA locus level, only 20%-27% of these conserved lncRNA loci are detected to transcription, which represents a stark contrast to the proportion of conserved protein-coding genes (48%-80%). This finding provides a valuable resource for experimental scientists to study the mechanisms of lncRNAs. Moreover, we constructed lncRNA expression phylogenetic trees across nine mammals and demonstrated that lncRNA expression profiles can reliably determine phylogenic placement in a manner similar to their coding counterparts. Our data also reveal that the evolutionary rate of lncRNA expression varies among tissues and is significantly higher than those for protein-coding genes. To streamline the processes of browsing lncRNAs and detecting their evolutionary statuses, we integrate all the data produced in this study into a database named PhyloNONCODE (http://www.bioinfo.org/phyloNoncode). Our work starts to place mammalian lncRNAs in an evolutionary context and represent a rich resource for comparative and functional analyses of this critical layer of genome.",PhyloNONCODE,0.804293454,NA,0,PhyloNONCODE,0.804293454,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/27/2015 +24771669,http://g2im.u-clermont1.fr/phylopdb,"PhylOPDb: a 16S rRNA oligonucleotide probe database for prokaryotic identification. In recent years, high-throughput molecular tools have led to an exponential growth of available 16S rRNA gene sequences. Incorporating such data, molecular tools based on target-probe hybridization were developed to monitor microbial communities within complex environments. Unfortunately, only a few 16S rRNA gene-targeted probe collections were described. Here, we present PhylOPDb, an online resource for a comprehensive phylogenetic oligonucleotide probe database. PhylOPDb provides a convivial and easy-to-use web interface to browse both regular and explorative 16S rRNA-targeted probes. Such probes set or subset could be used to globally monitor known and unknown prokaryotic communities through various techniques including DNA microarrays, polymerase chain reaction (PCR), fluorescent in situ hybridization (FISH), targeted gene capture or in silico rapid sequence identification. PhylOPDb contains 74 003 25-mer probes targeting 2178 genera including Bacteria and Archaea. Database URL: http://g2im.u-clermont1.fr/phylopdb/",PhylOPDb,0.997332573,NA,0,PhylOPDb,0.997332573,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/26/2014 +26980519,http://www.compsysbio.org/phylopro,"PhyloPro2.0: a database for the dynamic exploration of phylogenetically conserved proteins and their domain architectures across the Eukarya. . PhyloPro is a database and accompanying web-based application for the construction and exploration of phylogenetic profiles across the Eukarya. In this update article, we present six major new developments in PhyloPro: (i) integration of Pfam-A domain predictions for all proteins; (ii) new summary heatmaps and detailed level views of domain conservation; (iii) an interactive, network-based visualization tool for exploration of domain architectures and their conservation; (iv) ability to browse based on protein functional categories (GOSlim); (v) improvements to the web interface to enhance drill down capability from the heatmap view; and (vi) improved coverage including 164 eukaryotes and 12 reference species. In addition, we provide improved support for downloading data and images in a variety of formats. Among the existing tools available for phylogenetic profiles, PhyloPro provides several innovative domain-based features including a novel domain adjacency visualization tool. These are designed to allow the user to identify and compare proteins with similar domain architectures across species and thus develop hypotheses about the evolution of lineage-specific trajectories. Database URL: http://www.compsysbio.org/phylopro/.",PhyloPro,0.998065591,NA,0,PhyloPro,0.998065591,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/15/2016 +29624889,http://metanogen.biotech.uni.wroc.pl,"PhyMet2 : a database and toolkit for phylogenetic and metabolic analyses of methanogens. The vast biodiversity of the microbial world and how little is known about it, has already been revealed by extensive metagenomics analyses. Our rudimentary knowledge of microbes stems from difficulties concerning their isolation and culture in laboratory conditions, which is necessary for describing their phenotype, among other things, for biotechnological purposes. An important component of the understudied ecosystems is methanogens, archaea producing a potent greenhouse-effect gas methane. Therefore, we created PhyMet2 , the first database that combines descriptions of methanogens and their culturing conditions with genetic information. The database contains a set of utilities that facilitate interactive data browsing, data comparison, phylogeny exploration and searching for sequence homologues. The most unique feature of the database is the web server MethanoGram, which can be used to significantly reduce the time and cost of searching for the optimal culturing conditions of methanogens by predicting them based on 16S RNA sequences. The database will aid many researchers in exploring the world of methanogens and their applications in biotechnological processes. PhyMet2 with the MethanoGram predictor is available at http://metanogen.biotech.uni.wroc.pl.",PhyMet2,0.994540513,NA,0,PhyMet2,0.994540513,1,25604335,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,NA,6/1/2018 +26441671,http://physiome.jp,"Databases for multilevel biophysiology research available at Physiome.jp. Physiome.jp (http://physiome.jp) is a portal site inaugurated in 2007 to support model-based research in physiome and systems biology. At Physiome.jp, several tools and databases are available to support construction of physiological, multi-hierarchical, large-scale models. There are three databases in Physiome.jp, housing mathematical models, morphological data, and time-series data. In late 2013, the site was fully renovated, and in May 2015, new functions were implemented to provide information infrastructure to support collaborative activities for developing models and performing simulations within the database framework. This article describes updates to the databases implemented since 2013, including cooperation among the three databases, interactive model browsing, user management, version management of models, management of parameter sets, and interoperability with applications.",Physiome.jp,0.977062356,NA,0,Physiome.jp,0.977062356,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/9/2015 +23750083,http://www.gbpuat-cbsh.ac.in/departments/bi/database/phytodiabcare/HOME%20PAGE/Home%20page.html,"Phyto diab care: Phytoremedial database for antidiabetics. Unlabelled Diabetes, a chronic disease debilitating to normal healthy lifestyle, onsets due to insufficient amount of insulin production or ineffective utilization of the amount produced. Although, pharmaceutical research has brought up remedial drugs and numerous candidates in various phases of clinical trials, off-target effects and unwanted physiological actions are a constant source of concern and contra indicatory in case of diabetic patients. Here we present a phytoremedial database, Phyto Diab Care, broadly applicable to any known anti-diabetic medicinal plant and phytochemicals sourced from them. Utilization of the traditional medicine knowledge for combating diabetes without creating unwanted physiological actions is our major emphasis. Data collected from peer-reviewed publications and phytochemicals were added to the customizable database by means of an extended relational design. The strength of this resource is in providing rapid retrieval of data from large volumes of text at a high degree of accuracy. Enhanced web interface allows multi-criteria based information filtering. Furthermore, the availability of 2D and 3D structures from molecular docking studies with any efficacy on the insulin signaling pathway makes the resource searchable and comparable in an intuitive manner. Phyto Diab Care compendium is publicly available and can be found in online. Availability http://www.gbpuat-cbsh.ac.in/departments/bi/database/phytodiabcare/HOME%20PAGE/Home%20page.html.",Phyto,0.879301965,NA,0,Phyto,0.879301965,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,4/13/2013 +25740460,http://phytoref.fr,"PhytoREF: a reference database of the plastidial 16S rRNA gene of photosynthetic eukaryotes with curated taxonomy. Photosynthetic eukaryotes have a critical role as the main producers in most ecosystems of the biosphere. The ongoing environmental metabarcoding revolution opens the perspective for holistic ecosystems biological studies of these organisms, in particular the unicellular microalgae that often lack distinctive morphological characters and have complex life cycles. To interpret environmental sequences, metabarcoding necessarily relies on taxonomically curated databases containing reference sequences of the targeted gene (or barcode) from identified organisms. To date, no such reference framework exists for photosynthetic eukaryotes. In this study, we built the PhytoREF database that contains 6490 plastidial 16S rDNA reference sequences that originate from a large diversity of eukaryotes representing all known major photosynthetic lineages. We compiled 3333 amplicon sequences available from public databases and 879 sequences extracted from plastidial genomes, and generated 411 novel sequences from cultured marine microalgal strains belonging to different eukaryotic lineages. A total of 1867 environmental Sanger 16S rDNA sequences were also included in the database. Stringent quality filtering and a phylogeny-based taxonomic classification were applied for each 16S rDNA sequence. The database mainly focuses on marine microalgae, but sequences from land plants (representing half of the PhytoREF sequences) and freshwater taxa were also included to broaden the applicability of PhytoREF to different aquatic and terrestrial habitats. PhytoREF, accessible via a web interface (http://phytoref.fr), is a new resource in molecular ecology to foster the discovery, assessment and monitoring of the diversity of photosynthetic eukaryotes using high-throughput sequencing.",PhytoREF,0.99713105,NA,0,PhytoREF,0.99713105,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/6/2015 +24870500,http://www.lea.esalq.usp.br/phytoseiidae,"Phytoseiidae database: a website for taxonomic and distributional information on phytoseiid mites (Acari). This paper announces a database on the taxonomy and distribution of mites of the family Phytoseiidae Berlese, which is available online at http://www.lea.esalq.usp.br/phytoseiidae/. Synthesis of species diversity per genus, subfamily and country are given. Information about use of the database is provided.",NA,0,Phytoseiidae database,0.598889927,Phytoseiidae database,0.598889927,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/15/2014 +30576486,http://phytotypedb.bio.unipd.it,"PhytoTypeDB: a database of plant protein inter-cultivar variability and function. . Despite a fast-growing number of available plant genomes, available computational resources are poorly integrated and provide only limited access to the underlying data. Most existing databases focus on DNA/RNA data or specific gene families, with less emphasis on protein structure, function and variability. In particular, despite the economic importance of many plant accessions, there are no straightforward ways to retrieve or visualize information on their differences. To fill this gap, we developed PhytoTypeDB (http://phytotypedb.bio.unipd.it/), a scalable database containing plant protein annotations and genetic variants from resequencing of different accessions. The database content is generated by an integrated pipeline, exploiting state-of-the-art methods for protein characterization requiring only the proteome reference sequence and variant calling files. Protein names for unknown proteins are inferred by homology for over 95% of the entries. Single-nucleotide variants are visualized along with protein annotation in a user-friendly web interface. The server offers an effective querying system, which allows to compare variability among different species and accessions, to generate custom data sets based on shared functional features or to perform sequence searches. A documented set of exposed RESTful endpoints make the data accessible programmatically by third-party clients.",PhytoTypeDB,0.998071671,NA,0,PhytoTypeDB,0.998071671,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22110026,http://www.phytozome.net,"Phytozome: a comparative platform for green plant genomics. The number of sequenced plant genomes and associated genomic resources is growing rapidly with the advent of both an increased focus on plant genomics from funding agencies, and the application of inexpensive next generation sequencing. To interact with this increasing body of data, we have developed Phytozome (http://www.phytozome.net), a comparative hub for plant genome and gene family data and analysis. Phytozome provides a view of the evolutionary history of every plant gene at the level of sequence, gene structure, gene family and genome organization, while at the same time providing access to the sequences and functional annotations of a growing number (currently 25) of complete plant genomes, including all the land plants and selected algae sequenced at the Joint Genome Institute, as well as selected species sequenced elsewhere. Through a comprehensive plant genome database and web portal, these data and analyses are available to the broader plant science research community, providing powerful comparative genomics tools that help to link model systems with other plants of economic and ecological importance.",Phytozome,0.998088956,NA,0,Phytozome,0.998088956,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/22/2011 +23060433,http://www.drpias.net,"Dr. PIAS 2.0: an update of a database of predicted druggable protein-protein interactions. Druggable Protein-protein Interaction Assessment System (Dr. PIAS) is a database of druggable protein-protein interactions (PPIs) predicted by our support vector machine (SVM)-based method. Since the first publication of this database, Dr. PIAS has been updated to version 2.0. PPI data have been increased considerably, from 71,500 to 83,324 entries. As the new positive instances in our method, 4 PPIs and 10 tertiary structures have been added. This addition increases the prediction accuracy of our SVM classifier in comparison with the previous classifier, despite the number of added PPIs and structures is small. We have introduced the novel concept of 'similar positives' of druggable PPIs, which will help researchers discover small compounds that can inhibit predicted druggable PPIs. Dr. PIAS will aid the effective search for druggable PPIs from a mine of interactome data being rapidly accumulated. Dr. PIAS 2.0 is available at http://www.drpias.net.",PIAS,0.920802236,Druggable Protein-protein Interaction Assessment System,0.894154012,PIAS,0.920802236,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/10/2012 +21801404,http://www-cryst.bioc.cam.ac.uk/piccolo,"Comprehensive, atomic-level characterization of structurally characterized protein-protein interactions: the PICCOLO database. Background Structural studies are increasingly providing huge amounts of information on multi-protein assemblies. Although a complete understanding of cellular processes will be dependent on an explicit characterization of the intermolecular interactions that underlie these assemblies and mediate molecular recognition, these are not well described by standard representations. Results Here we present PICCOLO, a comprehensive relational database capturing the details of structurally characterized protein-protein interactions. Interactions are described at the level of interacting pairs of atoms, residues and polypeptide chains, with the physico-chemical nature of the interactions being characterized. Distance and angle terms are used to distinguish 12 different interaction types, including van der Waals contacts, hydrogen bonds and hydrophobic contacts. The explicit aim of PICCOLO is to underpin large-scale analyses of the properties of protein-protein interfaces. This is exemplified by an analysis of residue propensity and interface contact preferences derived from a much larger data set than previously reported. However, PICCOLO also supports detailed inspection of particular systems of interest. Conclusions The current PICCOLO database comprises more than 260 million interacting atom pairs from 38,202 protein complexes. A web interface for the database is available at http://www-cryst.bioc.cam.ac.uk/piccolo.",PICCOLO,0.993021965,NA,0,PICCOLO,0.993021965,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/29/2011 +29077937,http://pickles.hart-lab.org,"PICKLES: the database of pooled in-vitro CRISPR knockout library essentiality screens. The adaptation of CRISPR/Cas9 systems for pooled library genetic knockout screens in mammalian cells has substantially advanced the state of the art in human functional genomics. Screening panels of cell lines for genes whose knockout imposes a significant fitness defect has dramatically expanded our catalog of high-confidence essential genes, and has already proven useful in identifying tumor-specific essential genes for the development of targeted therapies. However, nonexperts currently lack an easy to use way to access this data and to identify whether their genes of interest are essential across different genetic backgrounds. The volume of screening data is expected to grow massively, making the problem more intractable. Here we describe PICKLES, the database of Pooled In vitro CRISPR Knockout Library Essentiality Screens, where end users can display and download raw or normalized essentiality profiles for more that 18 000 protein-coding genes across more than 50 cell lines. An additional data set with 15,000 genes targeted by pooled library shRNA in over 100 cell lines is also included. Researchers can see at a glance the relative fitness defect and tissue specificity of their genes of interest, generate and save figures locally, and download all raw data. The database is available at http://pickles.hart-lab.org.",PICKLES,0.996508896,CRISPR Knockout,0.593691432,PICKLES,0.996508896,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +23826978,http://bioinformatics.psb.ugent.be/pico-plaza,"pico-PLAZA, a genome database of microbial photosynthetic eukaryotes. With the advent of next generation genome sequencing, the number of sequenced algal genomes and transcriptomes is rapidly growing. Although a few genome portals exist to browse individual genome sequences, exploring complete genome information from multiple species for the analysis of user-defined sequences or gene lists remains a major challenge. pico-PLAZA is a web-based resource (http://bioinformatics.psb.ugent.be/pico-plaza/) for algal genomics that combines different data types with intuitive tools to explore genomic diversity, perform integrative evolutionary sequence analysis and study gene functions. Apart from homologous gene families, multiple sequence alignments, phylogenetic trees, Gene Ontology, InterPro and text-mining functional annotations, different interactive viewers are available to study genome organization using gene collinearity and synteny information. Different search functions, documentation pages, export functions and an extensive glossary are available to guide non-expert scientists. To illustrate the versatility of the platform, different case studies are presented demonstrating how pico-PLAZA can be used to functionally characterize large-scale EST/RNA-Seq data sets and to perform environmental genomics. Functional enrichments analysis of 16 Phaeodactylum tricornutum transcriptome libraries offers a molecular view on diatom adaptation to different environments of ecological relevance. Furthermore, we show how complementary genomic data sources can easily be combined to identify marker genes to study the diversity and distribution of algal species, for example in metagenomes, or to quantify intraspecific diversity from environmental strains.",pico-PLAZA,0.990684529,NA,0,pico-PLAZA,0.990684529,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/4/2013 +34111777,http://biodb.sdau.edu.cn/PID/index.php,"PID: An integrative and comprehensive platform of plant intron. Intron is a non-coding sequence of a broken gene and participates in important biological processes, such as transcription regulation, alternative splicing, and nuclear export. With the development of plant genomes, a comprehensive platform for intron analysis in plants must be established. Plant Intron Database (PID), a publicly available searchable database, was developed to efficiently store, query, analyze, and integrate intron resources in plants. The information of intron, exon, and gene can be searched by key words in PID. Users cannot only view intron length distribution pie chart and 5' and 3' splice site sequence feature maps in a statistical interface but can also browse intron information in a graphical visualization interface through JBrowse. ViroBlast for sequence homology searches, Intron detection and sequence interception tools were also provided. PID contains annotated genes from 118 sequenced plants, 24,782,048 introns, 30,843,049 exons, and 414 visual maps. This tool will greatly accelerate research on the distribution, length characteristics, and functions of introns in plants. PID is accessible at http://biodb.sdau.edu.cn/PID/index.php.",PID,0.98083353,Plant Intron Database,0.959512129,PID,0.98083353,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/5/2021 +23607573,http://www.pid-net.org,"The German national registry for primary immunodeficiencies (PID). In 2009, a federally funded clinical and research consortium (PID-NET, http://www.pid-net.org) established the first national registry for primary immunodeficiencies (PID) in Germany. The registry contains clinical and genetic information on PID patients and is set up within the framework of the existing European Database for Primary Immunodeficiencies, run by the European Society for Primary Immunodeficiencies. Following the example of other national registries, a central data entry clerk has been employed to support data entry at the participating centres. Regulations for ethics approvals have presented a major challenge for participation of individual centres and have led to a delay in data entry in some cases. Data on 630 patients, entered into the European registry between 2004 and 2009, were incorporated into the national registry. From April 2009 to March 2012, the number of contributing centres increased from seven to 21 and 738 additional patients were reported, leading to a total number of 1368 patients, of whom 1232 were alive. The age distribution of living patients differs significantly by gender, with twice as many males than females among children, but 15% more women than men in the age group 30 years and older. The diagnostic delay between onset of symptoms and diagnosis has decreased for some PID over the past 20 years, but remains particularly high at a median of 4 years in common variable immunodeficiency (CVID), the most prevalent PID.",PID-NET,0.746310925,NA,0,PID-NET,0.746310925,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/1/2013 +27742820,"http://probes.pw.usda.gov/piece, http://aegilops.wheat.ucdavis.edu/piece","PIECE 2.0: an update for the plant gene structure comparison and evolution database. PIECE (Plant Intron Exon Comparison and Evolution) is a web-accessible database that houses intron and exon information of plant genes. PIECE serves as a resource for biologists interested in comparing intron-exon organization and provides valuable insights into the evolution of gene structure in plant genomes. Recently, we updated PIECE to a new version, PIECE 2.0 (http://probes.pw.usda.gov/piece or http://aegilops.wheat.ucdavis.edu/piece). PIECE 2.0 contains annotated genes from 49 sequenced plant species as compared to 25 species in the previous version. In the current version, we also added several new features: (i) a new viewer was developed to show phylogenetic trees displayed along with the structure of individual genes; (ii) genes in the phylogenetic tree can now be also grouped according to KOG (The annotation of Eukaryotic Orthologous Groups) and KO (KEGG Orthology) in addition to Pfam domains; (iii) information on intronless genes are now included in the database; (iv) a statistical summary of global gene structure information for each species and its comparison with other species was added; and (v) an improved GSDraw tool was implemented in the web server to enhance the analysis and display of gene structure. The updated PIECE 2.0 database will be a valuable resource for the plant research community for the study of gene structure and evolution.",PIECE,0.998071253,Plant Intron Exon Comparison and Evolution,0.978025717,PIECE,0.998071253,1,NA,23180792,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,10/13/2016 +23180792,http://wheat.pw.usda.gov/piece,"PIECE: a database for plant gene structure comparison and evolution. Gene families often show degrees of differences in terms of exon-intron structures depending on their distinct evolutionary histories. Comparative analysis of gene structures is important for understanding their evolutionary and functional relationships within plant species. Here, we present a comparative genomics database named PIECE (http://wheat.pw.usda.gov/piece) for Plant Intron and Exon Comparison and Evolution studies. The database contains all the annotated genes extracted from 25 sequenced plant genomes. These genes were classified based on Pfam motifs. Phylogenetic trees were pre-constructed for each gene category. PIECE provides a user-friendly interface for different types of searches and a graphical viewer for displaying a gene structure pattern diagram linked to the resulting bootstrapped dendrogram for each gene family. The gene structure evolution of orthologous gene groups was determined using the GLOOME, Exalign and GECA software programs that can be accessed within the database. PIECE also provides a web server version of the software, GSDraw, for drawing schematic diagrams of gene structures. PIECE is a powerful tool for comparing gene sequences and provides valuable insights into the evolution of gene structure in plant genomes.",PIECE,0.997208118,NA,0,PIECE,0.997208118,1,NA,27742820,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,11/24/2012 +25270086,http://pigd.ahau.edu.cn,"PIGD: a database for intronless genes in the Poaceae. Background Intronless genes are a feature of prokaryotes; however, they are widespread and unequally distributed among eukaryotes and represent an important resource to study the evolution of gene architecture. Although many databases on exons and introns exist, there is currently no cohesive database that collects intronless genes in plants into a single database. Description In this study, we present the Poaceae Intronless Genes Database (PIGD), a user-friendly web interface to explore information on intronless genes from different plants. Five Poaceae species, Sorghum bicolor, Zea mays, Setaria italica, Panicum virgatum and Brachypodium distachyon, are included in the current release of PIGD. Gene annotations and sequence data were collected and integrated from different databases. The primary focus of this study was to provide gene descriptions and gene product records. In addition, functional annotations, subcellular localization prediction and taxonomic distribution are reported. PIGD allows users to readily browse, search and download data. BLAST and comparative analyses are also provided through this online database, which is available at http://pigd.ahau.edu.cn/. Conclusion PIGD provides a solid platform for the collection, integration and analysis of intronless genes in the Poaceae. As such, this database will be useful for subsequent bio-computational analysis in comparative genomics and evolutionary studies.",PIGD,0.992457569,Poaceae Intronless Genes Database,0.982717186,PIGD,0.992457569,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2014 +27478368,http://caps.ncbs.res.in/pimadb,"PIMADb: A Database of Protein-Protein Interactions in Huge Macromolecular Assemblies. Protein-protein interactions play a very important role in the process of cellular functionality. Intricate details about the interactions between the proteins in a macromolecular assembly are important to understand the function and significance of protein complexes. We are reporting about a database of protein-protein interactions in huge macromolecular assemblies (PIMADb) that records the intrinsic details of 189,532 interchain interactions in 40,049 complexes from the Protein Data Bank. These details include the results of the quantification and analysis of all the interactions in the complex. The availability of interprotomer interaction networks can enable the design of point mutation experiments. PIMADb can be accessed from the URL: http://caps.ncbs.res.in/pimadb.",PIMADb,0.994888186,protein interactions in huge macromolecular assemblies,0.905600566,PIMADb,0.994888186,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/19/2016 +30147056,http://pimbase.kalis-amts.de,"Data-Driven Assessment of Potentially Inappropriate Medication in the Elderly. Multimorbid patients taking polypharmacy represent a growing population at high risk for inappropriate prescribing. Various lists for identifying potentially inappropriate medication are spread across scientific journals and difficult to access. To address this ongoing need, a new database named PIMBase is developed which integrates these well-known lists and unifies their rating scales. The analysis of the pharmacovigilance data reveals the benefits of combining the lists. PIMBase is meant to be a web-based system and starting point for the data-driven assessment of polypharmacy to identify inappropriate medication and to improve the quality of prescribing. PIMBase is available at https://pimbase.kalis-amts.de.",PIMBase,0.997270882,NA,0,PIMBase,0.997270882,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22067443,http://cbg.garvan.unsw.edu.au/pina,"PINA v2.0: mining interactome modules. The Protein Interaction Network Analysis (PINA) platform is a comprehensive web resource, which includes a database of unified protein-protein interaction data integrated from six manually curated public databases, and a set of built-in tools for network construction, filtering, analysis and visualization. The second version of PINA enhances its utility for studies of protein interactions at a network level, by including multiple collections of interaction modules identified by different clustering approaches from the whole network of protein interactions ('interactome') for six model organisms. All identified modules are fully annotated by enriched Gene Ontology terms, KEGG pathways, Pfam domains and the chemical and genetic perturbations collection from MSigDB. Moreover, a new tool is provided for module enrichment analysis in addition to simple query function. The interactome data are also available on the web site for further bioinformatics analysis. PINA is freely accessible at http://cbg.garvan.unsw.edu.au/pina/.",PINA,0.992636144,Protein Interaction Network Analysis,0.964843237,PINA,0.992636144,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2011 +28096778,http://app.bioelm.com,"PineElm_SSRdb: a microsatellite marker database identified from genomic, chloroplast, mitochondrial and EST sequences of pineapple (Ananas comosus (L.) Merrill). Background Simple Sequence Repeats or microsatellites are resourceful molecular genetic markers. There are only few reports of SSR identification and development in pineapple. Complete genome sequence of pineapple available in the public domain can be used to develop numerous novel SSRs. Therefore, an attempt was made to identify SSRs from genomic, chloroplast, mitochondrial and EST sequences of pineapple which will help in deciphering genetic makeup of its germplasm resources. Results A total of 359511 SSRs were identified in pineapple (356385 from genome sequence, 45 from chloroplast sequence, 249 in mitochondrial sequence and 2832 from EST sequences). The list of EST-SSR markers and their details are available in the database. Conclusions PineElm_SSRdb is an open source database available for non-commercial academic purpose at http://app.bioelm.com/ with a mapping tool which can develop circular maps of selected marker set. This database will be of immense use to breeders, researchers and graduates working on Ananas spp. and to others working on cross-species transferability of markers, investigating diversity, mapping and DNA fingerprinting.",PineElm_SSRdb,0.885082662,NA,0,PineElm_SSRdb,0.885082662,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/24/2016 +34107869,http://pinir.ncl.res.in,"PINIR: a comprehensive information resource for Pin-II type protease inhibitors. Background Serine protease inhibitors belonging to the Potato type-II Inhibitor family Protease Inhibitors (Pin-II type PIs) are essential plant defense molecules. They are characterized by multiple inhibitory repeat domains, conserved disulfide bond pattern, and a tripeptide reactive center loop. These features of Pin-II type PIs make them potential molecules for protein engineering and designing inhibitors for agricultural and therapeutic applications. However, the diversity in these PIs remains unexplored due to the lack of annotated protein sequences and their functional attributes in the available databases. Results We have developed a database, PINIR (Pin-II type PIs Information Resource), by systematic collection and manual annotation of 415 Pin-II type PI protein sequences. For each PI, the number and position for signature sequences are specified: 695 domains, 75 linkers, 63 reactive center loops, and 10 disulfide bond patterns are identified and mapped. Database analysis revealed novel subcategories of PIs, species-correlated occurrence of inhibitory domains, reactive center loops, and disulfide bond patterns. By analyzing linker regions, we predict that alternative processing at linker regions could generate PI variants in the Solanaceae family. Conclusion PINIR ( https://pinir.ncl.res.in ) provides a web interface for browsing and analyzing the protein sequences of Pin-II type PIs. Information about signature sequences, spatio-temporal expression, biochemical properties, gene sequences, and literature references are provided. Analysis of PINIR depicts conserved species-specific features of Pin-II type PI protein sequences. Diversity in the sequence of inhibitory domains and reactive loops directs potential applications to engineer Pin-II type PIs. The PINIR database will serve as a comprehensive information resource for further research into Pin-II type PIs.",PINIR,0.995020032,Pin-II type PIs Information Resource,0.970687181,PINIR,0.995020032,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/9/2021 +25252779,http://www.pip-db.org,"PIP-DB: the Protein Isoelectric Point database. Unlabelled A protein's isoelectric point or pI corresponds to the solution pH at which its net surface charge is zero. Since the early days of solution biochemistry, the pI has been recorded and reported, and thus literature reports of pI abound. The Protein Isoelectric Point database (PIP-DB) has collected and collated these data to provide an increasingly comprehensive database for comparison and benchmarking purposes. A web application has been developed to warehouse this database and provide public access to this unique resource. PIP-DB is a web-enabled SQL database with an HTML GUI front-end. PIP-DB is fully searchable across a range of properties. Availability and implementation The PIP-DB database and documentation are available at http://www.pip-db.org.",PIP-DB,0.989458632,Protein Isoelectric Point database,0.94516476,PIP-DB,0.989458632,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/23/2014 +23396298,http://cabindb.iasri.res.in/pigeonpea,"PIPEMicroDB: microsatellite database and primer generation tool for pigeonpea genome. Molecular markers play a significant role for crop improvement in desirable characteristics, such as high yield, resistance to disease and others that will benefit the crop in long term. Pigeonpea (Cajanus cajan L.) is the recently sequenced legume by global consortium led by ICRISAT (Hyderabad, India) and been analysed for gene prediction, synteny maps, markers, etc. We present PIgeonPEa Microsatellite DataBase (PIPEMicroDB) with an automated primer designing tool for pigeonpea genome, based on chromosome wise as well as location wise search of primers. Total of 123 387 Short Tandem Repeats (STRs) were extracted from pigeonpea genome, available in public domain using MIcroSAtellite tool (MISA). The database is an online relational database based on 'three-tier architecture' that catalogues information of microsatellites in MySQL and user-friendly interface is developed using PHP. Search for STRs may be customized by limiting their location on chromosome as well as number of markers in that range. This is a novel approach and is not been implemented in any of the existing marker database. This database has been further appended with Primer3 for primer designing of selected markers with left and right flankings of size up to 500 bp. This will enable researchers to select markers of choice at desired interval over the chromosome. Furthermore, one can use individual STRs of a targeted region over chromosome to narrow down location of gene of interest or linked Quantitative Trait Loci (QTLs). Although it is an in silico approach, markers' search based on characteristics and location of STRs is expected to be beneficial for researchers. Database URL: http://cabindb.iasri.res.in/pigeonpea/",PIPEMicroDB,0.981419305,PIgeonPEa Microsatellite DataBase,0.70065501,PIPEMicroDB,0.981419305,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/8/2013 +"25425034, 30371818",http://www.regulatoryrna.org/database/piRNA,"piRBase: a web resource assisting piRNA functional study. piRNAs are a class of small RNAs that is most abundantly expressed in the animal germ line. Presently, substantial research is going on to reveal the functions of piRNAs in the epigenetic and post-transcriptional regulation of transposons and genes. A piRNA database for collection, annotation and structuring of these data will be a valuable contribution to the field, and we have therefore developed the piRBase platform which integrates various piRNA-related high-throughput data. piRBase has the largest collection of piRNAs among existing databases, and contains at present 77 million piRNA sequences from nine organisms. Repeat-derived and gene-derived piRNAs, which possibly participate in the regulation of the corresponding elements, have been given particular attention. Furthermore, epigenetic data and reported piRNA targets were also collected. To our knowledge, this is the first piRNA database that systematically integrates epigenetic and post-transcriptional regulation data to support piRNA functional analysis. We believe that piRBase will contribute to a better understanding of the piRNA functions. Database URL: http://www.regulatoryrna.org/database/piRNA/",piRBase,0.997035384,NA,0,piRBase,0.997035384,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33330918,http://njmu-edu.cn:3838/piRNA-eQTL,"Systematic evaluation of the effects of genetic variants on PIWI-interacting RNA expression across 33 cancer types. PIWI-interacting RNAs (piRNAs) are an emerging class of non-coding RNAs involved in tumorigenesis. Expression quantitative trait locus (eQTL) analysis has been demonstrated to help reveal the genetic mechanism of single nucleotide polymorphisms (SNPs) in cancer etiology. However, there are no databases that have been constructed to provide an eQTL analysis between SNPs and piRNA expression. In this study, we collected genotyping and piRNA expression data for 10 997 samples across 33 cancer types from The Cancer Genome Atlas (TCGA). Using linear regression cis-eQTL analysis with adjustment of appropriate covariates, we identified millions of SNP-piRNA pairs in tumor (76 924 831) and normal (24 431 061) tissues. Further, we performed differential expression and survival analyses, and linked the eQTLs to genome-wide association study (GWAS) data to comprehensively decipher the functional roles of identified cis-piRNA eQTLs. Finally, we developed a user-friendly database, piRNA-eQTL (http://njmu-edu.cn:3838/piRNA-eQTL/), to help users query, browse and download corresponding eQTL results. In summary, piRNA-eQTL could serve as an important resource to assist the research community in understanding the roles of genetic variants and piRNAs in the development of cancers.",piRNA-eQTL,0.997409006,NA,0,piRNA-eQTL,0.997409006,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +24997126,http://bicresources.jcbose.ac.in/zhumur/pirnaquest,"piRNAQuest: searching the piRNAome for silencers. Background PIWI-interacting RNA (piRNA) is a novel and emerging class of small non-coding RNA (sncRNA). Ranging in length from 26-32 nucleotides, this sncRNA is a potent player in guiding the vital regulatory processes within a cellular system. Inspite of having such a wide role within cellular systems, piRNAs are not well organized and classified, so that a researcher can pool out the biologically relevant information concerning this class. Description Here we present piRNAQuest- a unified and comprehensive database of 41749 human, 890078 mouse and 66758 rat piRNAs obtained from NCBI and different small RNA sequence experiments. This database provides piRNA annotation based on their localization in gene, intron, intergenic, CDS, 5/UTR, 3/UTR and repetitive regions which has not been done so far. We have also annotated piRNA clusters and have elucidated characteristic motifs within them. We have looked for the presence of piRNAs and piRNA clusters in pseudogenes, which are known to regulate the expression of protein coding transcripts by generating small RNAs. All these will help researchers progress towards solving the unanswered queries on piRNA biogenesis and their mode of action. Further, expression profile for piRNA in different tissues and from different developmental stages has been provided. In addition, we have provided several tools like 'homology search', 'dynamic cluster search' and 'pattern search'. Overall, piRNAQuest will serve as a useful resource for exploring human, mouse and rat piRNAome. The database is freely accessible and available at http://bicresources.jcbose.ac.in/zhumur/pirnaquest/. Conclusion piRNAs play a remarkable role in stem cell self-renewal and various vital processes of developmental biology. Although researchers are mining different features on piRNAs, the exact regulatory mechanism is still fuzzy. Thus, understanding the true potential of these small regulatory molecules with respect to their origin, localization and mode of biogenesis is crucial. piRNAQuest will provide us with a better insight on piRNA origin and function which will help to explore the true potential of these sncRNAs.",piRNAQuest,0.996235311,NA,0,piRNAQuest,0.996235311,1,NA,34965192,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,7/4/2014 +34965192,http://dibresources.jcbose.ac.in/zhumur/pirnaquest2,"piRNAQuest V.2: an updated resource for searching through the piRNAome of multiple species. PIWI interacting RNAs (piRNAs) have emerged as important gene regulators in recent times. Since the release of our first version of piRNAQuest in 2014, lots of novel piRNAs have been annotated in different species other than human, mouse and rat. Such new developments in piRNA research have led us to develop an updated database piRNAQuest V.2. It consists of 92,77,689 piRNA entries for 25 new species of different phylum along with human, mouse and rat. Besides providing primary piRNA features which include their genomic location, with further information on piRNAs overlapping with repeat elements, pseudogenes and syntenic regions, etc., the novel features of this version includes (i) density based cluster prediction, (ii) piRNA expression profile across various healthy and disease systems and (iii) piRNA target prediction. The concept of density-based piRNA cluster identification is robust as it does not consider parametric distribution in its model. The piRNA expression profile for 21 disease systems including cancer have been hosted in addition to 32 tissue specific piRNA expression profile for various species. Further, the piRNA target prediction section includes both predicted and curated piRNA targets within eight disease systems and developmental stages of mouse testis. Further, users can visualize the piRNA-target duplex structure and the ping-pong signature pattern for all the ping-pong piRNA partners in different species. Overall, piRNAQuest V.2 is an updated user-friendly database which will serve as a useful resource to survey, search and retrieve information on piRNAs for multiple species. This freely accessible database is available at http://dibresources.jcbose.ac.in/zhumur/pirnaquest2.",piRNAQuest,0.991596937,NA,0,piRNAQuest,0.991596937,1,NA,24997126,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/31/2021 +30357353,http://cosbi6.ee.ncku.edu.tw/piRTarBase,"piRTarBase: a database of piRNA targeting sites and their roles in gene regulation. PIWI-interacting RNAs (piRNAs) are a class of small noncoding RNAs that guard animal genomes against mutation by silencing transposons. In addition, recent studies have reported that piRNAs silence various endogenous genes. Tens of thousands of distinct piRNAs made in animals do not pair well to transposons and currently the functions and targets of piRNAs are largely unexplored. piRTarBase provides a user-friendly interface to access both predicted and experimentally identified piRNA targeting sites in Caenorhabditis elegans. The user can input genes of interest and retrieve a list of piRNA targeting sites on the input genes. Alternatively, the user can input a piRNA and retrieve a list of its mRNA targets. Additionally, piRTarBase integrates published mRNA and small RNA sequencing data, which will help users identify biologically relevant targeting events. Importantly, our analyses suggest that the piRNA sites found by both predictive and experimental approaches are more likely to exhibit silencing effects on their targets than each method alone. Taken together, piRTarBase offers an integrative platform that will help users to identify functional piRNA target sites by evaluating various information. piRTarBase is freely available for academic use at http://cosbi6.ee.ncku.edu.tw/piRTarBase/.",piRTarBase,0.99358052,NA,0,piRTarBase,0.99358052,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30053269,http://pitdb.org,"PITDB: a database of translated genomic elements. PITDB is a freely available database of translated genomic elements (TGEs) that have been observed in PIT (proteomics informed by transcriptomics) experiments. In PIT, a sample is analyzed using both RNA-seq transcriptomics and proteomic mass spectrometry. Transcripts assembled from RNA-seq reads are used to create a library of sample-specific amino acid sequences against which the acquired mass spectra are searched, permitting detection of any TGE, not just those in canonical proteome databases. At the time of writing, PITDB contains over 74 000 distinct TGEs from four species, supported by more than 600 000 peptide spectrum matches. The database, accessible via http://pitdb.org, provides supporting evidence for each TGE, often from multiple experiments and an indication of the confidence in the TGE's observation and its type, ranging from known protein (exact match to a UniProt protein sequence), through multiple types of protein variant including various splice isoforms, to a putative novel molecule. PITDB's modern web interface allows TGEs to be viewed individually or by species or experiment, and downloaded for further analysis. PITDB is for bench scientists seeking to share their PIT results, for researchers investigating novel genome products in model organisms and for those wishing to construct proteomes for lesser studied species.",PITDB,0.992001295,NA,0,PITDB,0.992001295,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +30805645,http://compbio.clemson.edu/pkad,"PKAD: a database of experimentally measured pKa values of ionizable groups in proteins. . Ionizable residues play key roles in many biological phenomena including protein folding, enzyme catalysis and binding. We present PKAD, a database of experimentally measured pKas of protein residues reported in the literature or taken from existing databases. The database contains pKa data for 1350 residues in 157 wild-type proteins and for 232 residues in 45 mutant proteins. Most of these values are for Asp, Glu, His and Lys amino acids. The database is available as downloadable file as well as a web server (http://compbio.clemson.edu/pkad). The PKAD database can be used as a benchmarking source for development and improvement of pKa's prediction methods. The web server provides additional information taken from the corresponding structures and amino acid sequences, which allows for easy search and grouping of the experimental pKas according to various biophysical characteristics, amino acid type and others.",PKAD,0.989784598,NA,0,PKAD,0.989784598,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +"29662024, 32679723",http://www.icoa.fr/pkidb,"PKIDB: A Curated, Annotated and Updated Database of Protein Kinase Inhibitors in Clinical Trials. . The number of protein kinase inhibitors (PKIs) approved worldwide continues to grow steadily, with 39 drugs approved in the period between 2001 and January 2018. PKIs on the market have been the subject of many reviews, and structure-property relationships specific to this class of drugs have been inferred. However, the large number of PKIs under development is often overlooked. In this paper, we present PKIDB (Protein Kinase Inhibitor Database), a monthly-updated database gathering approved PKIs as well as PKIs currently in clinical trials. The database compiles currently 180 inhibitors ranging from phase 0 to 4 clinical trials along with annotations extracted from seven public resources. The distribution and property ranges of standard physicochemical properties are presented. They can be used as filters to better prioritize compound selection for future screening campaigns. Interestingly, more than one-third of the kinase inhibitors violate at least one Lipinski's rule. A Principal Component Analysis (PCA) reveals that Type-II inhibitors are mapped to a distinct chemical space as compared to orally administrated drugs as well as to other types of kinase inhibitors. Using a Principal Moment of Inertia (PMI) analysis, we show that PKIs under development tend to explore new shape territories as compared to approved PKIs. In order to facilitate the analysis of the protein space, the kinome tree has been annotated with all protein kinases being targeted by PKIs. Finally, we analyzed the pipeline of the pharmaceutical companies having PKIs on the market or still under development. We hope that this work will assist researchers in the kinase field in identifying and designing the next generation of kinase inhibitors for still untargeted kinases. The PKIDB database is freely accessible from a website at http://www.icoa.fr/pkidb and can be easily browsed through a user-friendly spreadsheet-like interface.",PKIDB,0.987339139,Protein Kinase Inhibitor Database,0.97000488,PKIDB,0.987339139,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/15/2020 +22559792,http://cadd.suda.edu.cn/admet,"ADMET evaluation in drug discovery. 11. PharmacoKinetics Knowledge Base (PKKB): a comprehensive database of pharmacokinetic and toxic properties for drugs. Good and extensive experimental ADMET (absorption, distribution, metabolism, excretion, and toxicity) data is critical for developing reliable in silico ADMET models. Here we develop a PharmacoKinetics Knowledge Base (PKKB) to compile comprehensive information about ADMET properties into a single electronic repository. We incorporate more than 10 000 experimental ADMET measurements of 1685 drugs into the PKKB. The ADMET properties in the PKKB include octanol/water partition coefficient, solubility, dissociation constant, intestinal absorption, Caco-2 permeability, human bioavailability, plasma protein binding, blood-plasma partitioning ratio, volume of distribution, metabolism, half-life, excretion, urinary excretion, clearance, toxicity, half lethal dose in rat or mouse, etc. The PKKB provides the most extensive collection of freely available data for ADMET properties up to date. All these ADMET properties, as well as the pharmacological information and the calculated physiochemical properties are integrated into a web-based information system. Eleven separated data sets for octanol/water partition coefficient, solubility, blood-brain partitioning, intestinal absorption, Caco-2 permeability, human oral bioavailability, and P-glycoprotein inhibitors have been provided for free download and can be used directly for ADMET modeling. The PKKB is available online at http://cadd.suda.edu.cn/admet.",PKKB,0.895442531,PharmacoKinetics Knowledge Base,0.882700513,PKKB,0.895442531,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/15/2012 +31103066,http://pat.nichd.nih.gov,"The Placental Atlas Tool (PAT): A collaborative research and discovery platform for the placental research community. Introduction The placenta is one of the least understood, yet arguably one of the most important organs for human health and development. While there have been numerous research efforts dedicated to understanding the placenta's critical role, these studies and the data they produced remain separated and largely disparate. In order to facilitate placental research, the Eunice Kennedy Shriver National Institute of Child and Human Development (NICHD) released in October 2018 the Placental Atlas Tool (PAT) (https://pat.nichd.nih.gov/), an internet-based platform offering users a centralized placental database of molecular datasets, analytic tools, and images. Methods PAT is a cloud-based system developed by the business requirements defined by NICHD leadership and extramural placental researchers. PAT employs a metadata-driven web interface to provide curated placental datasets and images, enriched with structured, descriptive metadata to enhance data discoverability. PAT also incorporates open source molecular data analytical tools to provide a flexible analytics workflow for placental researchers. Results PAT launched with 426 analyzable molecular placental datasets consisting of over 12,500 samples from 10 distinct species, all systematically annotated and processed for enhanced research utility. 828 placental images, consisting of 7 imaging modalities across 47 species, and nearly 300 annotated linked publications supplement the datasets to facilitate knowledge integration and hypothesis generation across disparate molecular studies. Discussion PAT will maximize the NICHD's investment in placental research by reinforcing open scientific inquiry, facilitating reuse of datasets, promoting novel research and testing of new hypotheses and analytic methods, and facilitating education of new researchers.",PAT,0.751513183,Placental Atlas Tool,0.817075777,Placental Atlas Tool,0.817075777,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/1/2019 +30266409,"http://systbio.cau.edu.cn/plad/index.php, http://zzdlab.com/plad/index.php","PlaD: A Transcriptomics Database for Plant Defense Responses to Pathogens, Providing New Insights into Plant Immune System. High-throughput transcriptomics technologies have been widely used to study plant transcriptional reprogramming during the process of plant defense responses, and a large quantity of gene expression data have been accumulated in public repositories. However, utilization of these data is often hampered by the lack of standard metadata annotation. In this study, we curated 2444 public pathogenesis-related gene expression samples from the model plant Arabidopsis and three major crops (maize, rice, and wheat). We organized the data into a user-friendly database termed as PlaD. Currently, PlaD contains three key features. First, it provides large-scale curated data related to plant defense responses, including gene expression and gene functional annotation data. Second, it provides the visualization of condition-specific expression profiles. Third, it allows users to search co-regulated genes under the infections of various pathogens. Using PlaD, we conducted a large-scale transcriptome analysis to explore the global landscape of gene expression in the curated data. We found that only a small fraction of genes were differentially expressed under multiple conditions, which might be explained by their tendency of having more network connections and shorter network distances in gene networks. Collectively, we hope that PlaD can serve as an important and comprehensive knowledgebase to the community of plant sciences, providing insightful clues to better understand the molecular mechanisms underlying plant immune responses. PlaD is freely available at http://systbio.cau.edu.cn/plad/index.php or http://zzdlab.com/plad/index.php.",PlaD,0.996840239,NA,0,PlaD,0.996840239,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2018 +27924044,http://www.systembioinfo.org/plamom,"PlaMoM: a comprehensive database compiles plant mobile macromolecules. In plants, various phloem-mobile macromolecules including noncoding RNAs, mRNAs and proteins are suggested to act as important long-distance signals in regulating crucial physiological and morphological transition processes such as flowering, plant growth and stress responses. Given recent advances in high-throughput sequencing technologies, numerous mobile macromolecules have been identified in diverse plant species from different plant families. However, most of the identified mobile macromolecules are not annotated in current versions of species-specific databases and are only available as non-searchable datasheets. To facilitate study of the mobile signaling macromolecules, we compiled the PlaMoM (Plant Mobile Macromolecules) database, a resource that provides convenient and interactive search tools allowing users to retrieve, to analyze and also to predict mobile RNAs/proteins. Each entry in the PlaMoM contains detailed information such as nucleotide/amino acid sequences, ortholog partners, related experiments, gene functions and literature. For the model plant Arabidopsis thaliana, protein-protein interactions of mobile transcripts are presented as interactive molecular networks. Furthermore, PlaMoM provides a built-in tool to identify potential RNA mobility signals such as tRNA-like structures. The current version of PlaMoM compiles a total of 17 991 mobile macromolecules from 14 plant species/ecotypes from published data and literature. PlaMoM is available at http://www.systembioinfo.org/plamom/.",PlaMoM,0.996463239,Plant Mobile Macromolecules,0.947892308,PlaMoM,0.996463239,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/24/2016 +30101318,http://planc-te.cp.utfpr.edu.br,"PlaNC-TE: a comprehensive knowledgebase of non-coding RNAs and transposable elements in plants. Transposable elements (TEs) play an essential role in the genetic variability of eukaryotic species. In plants, they may comprise up to 90% of the total genome. Non-coding RNAs (ncRNAs) are known to control gene expression and regulation. Although the relationship between ncRNAs and TEs is known, obtaining the organized data for sequenced genomes is not straightforward. In this study, we describe the PlaNC-TE (http://planc-te.cp.utfpr.edu.br), a user-friendly portal harboring a knowledgebase created by integrating and analysing plant ncRNA-TE data. We identified a total of 14 350 overlaps between ncRNAs and TEs in 40 plant genomes. The database allows users to browse, search and download all ncRNA and TE data analysed. Overall, PlaNC-TE not only organizes data and provides insights about the relationship between ncRNA and TEs in plants but also helps improve genome annotation strategies. Moreover, this is the first database to provide resources to broadly investigate functions and mechanisms involving TEs and ncRNAs in plants.",PlaNC-TE,0.996604753,NA,0,PlaNC-TE,0.996604753,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +23688397,http://planex.plantbioinformatics.org,"PLANEX: the plant co-expression database. Background The PLAnt co-EXpression database (PLANEX) is a new internet-based database for plant gene analysis. PLANEX (http://planex.plantbioinformatics.org) contains publicly available GeneChip data obtained from the Gene Expression Omnibus (GEO) of the National Center for Biotechnology Information (NCBI). PLANEX is a genome-wide co-expression database, which allows for the functional identification of genes from a wide variety of experimental designs. It can be used for the characterization of genes for functional identification and analysis of a gene's dependency among other genes. Gene co-expression databases have been developed for other species, but gene co-expression information for plants is currently limited. Description We constructed PLANEX as a list of co-expressed genes and functional annotations for Arabidopsis thaliana, Glycine max, Hordeum vulgare, Oryza sativa, Solanum lycopersicum, Triticum aestivum, Vitis vinifera and Zea mays. PLANEX reports Pearson's correlation coefficients (PCCs; r-values) that distribute from a gene of interest for a given microarray platform set corresponding to a particular organism. To support PCCs, PLANEX performs an enrichment test of Gene Ontology terms and Cohen's Kappa value to compare functional similarity for all genes in the co-expression database. PLANEX draws a cluster network with co-expressed genes, which is estimated using the k-mean method. To construct PLANEX, a variety of datasets were interpreted by the IBM supercomputer Advanced Interactive eXecutive (AIX) in a supercomputing center. Conclusion PLANEX provides a correlation database, a cluster network and an interpretation of enrichment test results for eight plant species. A typical co-expressed gene generates lists of co-expression data that contain hundreds of genes of interest for enrichment analysis. Also, co-expressed genes can be identified and cataloged in terms of comparative genomics by using the 'Co-expression gene compare' feature. This type of analysis will help interpret experimental data and determine whether there is a common term among genes of interest.",PLANEX,0.997026294,PLAnt co-EXpression database,0.942242801,PLANEX,0.997026294,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/20/2013 +23426257,http://planform.daniel-lobo.com,"Planform: an application and database of graph-encoded planarian regenerative experiments. Summary Understanding the mechanisms governing the regeneration capabilities of many organisms is a fundamental interest in biology and medicine. An ever-increasing number of manipulation and molecular experiments are attempting to discover a comprehensive model for regeneration, with the planarian flatworm being one of the most important model species. Despite much effort, no comprehensive, constructive, mechanistic models exist yet, and it is now clear that computational tools are needed to mine this huge dataset. However, until now, there is no database of regenerative experiments, and the current genotype-phenotype ontologies and databases are based on textual descriptions, which are not understandable by computers. To overcome these difficulties, we present here Planform (Planarian formalization), a manually curated database and software tool for planarian regenerative experiments, based on a mathematical graph formalism. The database contains more than a thousand experiments from the main publications in the planarian literature. The software tool provides the user with a graphical interface to easily interact with and mine the database. The presented system is a valuable resource for the regeneration community and, more importantly, will pave the way for the application of novel artificial intelligence tools to extract knowledge from this dataset. Availability The database and software tool are freely available at http://planform.daniel-lobo.com.",Planform,0.994146824,lanarian,0.698465049,Planform,0.994146824,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/19/2013 +"26578570, 30496475",http://planmine.mpi-cbg.de,"PlanMine--a mineable resource of planarian biology and biodiversity. Planarian flatworms are in the midst of a renaissance as a model system for regeneration and stem cells. Besides two well-studied model species, hundreds of species exist worldwide that present a fascinating diversity of regenerative abilities, tissue turnover rates, reproductive strategies and other life history traits. PlanMine (http://planmine.mpi-cbg.de/) aims to accomplish two primary missions: First, to provide an easily accessible platform for sharing, comparing and value-added mining of planarian sequence data. Second, to catalyze the comparative analysis of the phenotypic diversity amongst planarian species. Currently, PlanMine houses transcriptomes independently assembled by our lab and community contributors. Detailed assembly/annotation statistics, a custom-developed BLAST viewer and easy export options enable comparisons at the contig and assembly level. Consistent annotation of all transcriptomes by an automated pipeline, the integration of published gene expression information and inter-relational query tools provide opportunities for mining planarian gene sequences and functions. For inter-species comparisons, we include transcriptomes of, so far, six planarian species, along with images, expert-curated information on their biology and pre-calculated cross-species sequence homologies. PlanMine is based on the popular InterMine system in order to make the rich biology of planarians accessible to the general life sciences research community.",PlanMine,0.994910002,NA,0,PlanMine,0.994910002,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +34403192,http://plantcyc.org,"Plant Metabolic Network 15: A resource of genome-wide metabolism databases for 126 plants and algae. To understand and engineer plant metabolism, we need a comprehensive and accurate annotation of all metabolic information across plant species. As a step towards this goal, we generated genome-scale metabolic pathway databases of 126 algal and plant genomes, ranging from model organisms to crops to medicinal plants (https://plantcyc.org). Of these, 104 have not been reported before. We systematically evaluated the quality of the databases, which revealed that our semi-automated validation pipeline dramatically improves the quality. We then compared the metabolic content across the 126 organisms using multiple correspondence analysis and found that Brassicaceae, Poaceae, and Chlorophyta appeared as metabolically distinct groups. To demonstrate the utility of this resource, we used recently published sorghum transcriptomics data to discover previously unreported trends of metabolism underlying drought tolerance. We also used single-cell transcriptomics data from the Arabidopsis root to infer cell type-specific metabolic pathways. This work shows the quality and quantity of our resource and demonstrates its wide-ranging utility in integrating metabolism with other areas of plant biology.",NA,0,Plant Metabolic Network,0.702312887,Plant Metabolic Network,0.702312887,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/27/2021 +24980131,http://www.plantrdnadatabase.com,"Plant rDNA database: update and new features. . The Plant rDNA database (www.plantrdnadatabase.com) is an open access online resource providing detailed information on numbers, structures and positions of 5S and 18S-5.8S-26S (35S) ribosomal DNA loci. The data have been obtained from >600 publications on plant molecular cytogenetics, mostly based on fluorescent in situ hybridization (FISH). This edition of the database contains information on 1609 species derived from 2839 records, which means an expansion of 55.76 and 94.45%, respectively. It holds the data for angiosperms, gymnosperms, bryophytes and pteridophytes available as of June 2013. Information from publications reporting data for a single rDNA (either 5S or 35S alone) and annotation regarding transcriptional activity of 35S loci now appears in the database. Preliminary analyses suggest greater variability in the number of rDNA loci in gymnosperms than in angiosperms. New applications provide ideograms of the species showing the positions of rDNA loci as well as a visual representation of their genome sizes. We have also introduced other features to boost the usability of the Web interface, such as an application for convenient data export and a new section with rDNA-FISH-related information (mostly detailing protocols and reagents). In addition, we upgraded and/or proofread tabs and links and modified the website for a more dynamic appearance. This manuscript provides a synopsis of these changes and developments. http://www.plantrdnadatabase.com.",Plant rDNA,0.623982986,Plant rDNA database,0.521373582,Plant rDNA,0.623982986,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/30/2014 +"27799469, 31680153",http://plantreactome.gramene.org,"Plant Reactome: a resource for plant pathways and comparative analysis. Plant Reactome (http://plantreactome.gramene.org/) is a free, open-source, curated plant pathway database portal, provided as part of the Gramene project. The database provides intuitive bioinformatics tools for the visualization, analysis and interpretation of pathway knowledge to support genome annotation, genome analysis, modeling, systems biology, basic research and education. Plant Reactome employs the structural framework of a plant cell to show metabolic, transport, genetic, developmental and signaling pathways. We manually curate molecular details of pathways in these domains for reference species Oryza sativa (rice) supported by published literature and annotation of well-characterized genes. Two hundred twenty-two rice pathways, 1025 reactions associated with 1173 proteins, 907 small molecules and 256 literature references have been curated to date. These reference annotations were used to project pathways for 62 model, crop and evolutionarily significant plant species based on gene homology. Database users can search and browse various components of the database, visualize curated baseline expression of pathway-associated genes provided by the Expression Atlas and upload and analyze their Omics datasets. The database also offers data access via Application Programming Interfaces (APIs) and in various standardized pathway formats, such as SBML and BioPAX.",Plant Reactome,0.934845229,NA,0,Plant Reactome,0.934845229,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +33137192,http://gong_lab.hzau.edu.cn/Plant_imputeDB,"Plant-ImputeDB: an integrated multiple plant reference panel database for genotype imputation. Genotype imputation is a process that estimates missing genotypes in terms of the haplotypes and genotypes in a reference panel. It can effectively increase the density of single nucleotide polymorphisms (SNPs), boost the power to identify genetic association and promote the combination of genetic studies. However, there has been a lack of high-quality reference panels for most plants, which greatly hinders the application of genotype imputation. Here, we developed Plant-ImputeDB (http://gong_lab.hzau.edu.cn/Plant_imputeDB/), a comprehensive database with reference panels of 12 plant species for online genotype imputation, SNP and block search and free download. By integrating genotype data and whole-genome resequencing data of plants from various studies and databases, the current Plant-ImputeDB provides high-quality reference panels of 12 plant species, including ∼69.9 million SNPs from 34 244 samples. It also provides an easy-to-use online tool with the option of two popular tools specifically designed for genotype imputation. In addition, Plant-ImputeDB accepts submissions of different types of genomic variations, and provides free and open access to all publicly available data in support of related research worldwide. In general, Plant-ImputeDB may serve as an important resource for plant genotype imputation and greatly facilitate the research on plant genetic research.",Plant-ImputeDB,0.997689724,NA,0,Plant-ImputeDB,0.997689724,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +25435546,http://plant-pras.riken.jp,"Plant-PrAS: a database of physicochemical and structural properties and novel functional regions in plant proteomes. Arabidopsis thaliana is an important model species for studies of plant gene functions. Research on Arabidopsis has resulted in the generation of high-quality genome sequences, annotations and related post-genomic studies. The amount of annotation, such as gene-coding regions and structures, is steadily growing in the field of plant research. In contrast to the genomics resource of animals and microorganisms, there are still some difficulties with characterization of some gene functions in plant genomics studies. The acquisition of information on protein structure can help elucidate the corresponding gene function because proteins encoded in the genome possess highly specific structures and functions. In this study, we calculated multiple physicochemical and secondary structural parameters of protein sequences, including length, hydrophobicity, the amount of secondary structure, the number of intrinsically disordered regions (IDRs) and the predicted presence of transmembrane helices and signal peptides, using a total of 208,333 protein sequences from the genomes of six representative plant species, Arabidopsis thaliana, Glycine max (soybean), Populus trichocarpa (poplar), Oryza sativa (rice), Physcomitrella patens (moss) and Cyanidioschyzon merolae (alga). Using the PASS tool and the Rosetta Stone method, we annotated the presence of novel functional regions in 1,732 protein sequences that included unannotated sequences from the Arabidopsis and rice proteomes. These results were organized into the Plant Protein Annotation Suite database (Plant-PrAS), which can be freely accessed online at http://plant-pras.riken.jp/.",Plant-PrAS,0.983476034,Plant Protein Annotation Suite database,0.848614266,Plant-PrAS,0.983476034,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2014 +31612325,http://bioinformatics.cimap.res.in/sharma/PlantAFP,"PlantAFP: a curated database of plant-origin antifungal peptides. Emerging infectious diseases (EIDs) are a severe problem caused by fungi in human and plant species across the world. They pose a worldwide threat to food security as well as human health. Fungal infections are increasing now day by day worldwide, and the current antimycotic drugs are not effective due to the emergence of resistant strains. Therefore, it is an urgent need for the finding of new plant-origin antifungal peptides (PhytoAFPs). Huge numbers of peptides were extracted from different plant species which play a protective role against fungal infection. Hundreds of plant-origin peptides with antifungal activity have already been reported. So there is a requirement of a dedicated platform which systematically catalogs plant-origin peptides along with their antifungal properties. PlantAFP database is a resource of experimentally verified plant-origin antifungal peptides, collected from research articles, patents, and public databases. The current release of PlantAFP database contains 2585 peptide entries among which 510 are unique peptides. Each entry provides comprehensive information of a peptide that includes its peptide sequence, peptide name, peptide class, length of the peptide, molecular mass, antifungal activity, and origin of peptides. Besides this primary information, PlantAFP stores peptide sequences in SMILES format. In order to facilitate the user, many tools have been integrated into this database that includes BLAST search, peptide search, SMILES search, and peptide-mapping is also included in the database. PlantAFP database is accessible at http://bioinformatics.cimap.res.in/sharma/PlantAFP/.",PlantAFP,0.990319967,NA,0,PlantAFP,0.990319967,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/14/2019 +33546584,http://www.machado.cnptia.embrapa.br/plantannot,"Plant Co-expression Annotation Resource: a web server for identifying targets for genetically modified crop breeding pipelines. The development of genetically modified crops (GM) includes the discovery of candidate genes through bioinformatics analysis using genomics data, gene expression, and others. Proteins of unknown function (PUFs) are interesting targets for GM crops breeding pipelines for the novelty associated with such targets and also to avoid copyright protection. One method of inferring the putative function of PUFs is by relating them to factors of interest such as abiotic stresses using orthology and co-expression networks, in a guilt-by-association manner. In this regard, we have downloaded, analyzed, and processed genomics data of 53 angiosperms, totaling 1,862,010 genes and 2,332,974 RNA. Diamond and InterproScan were used to discover 72,266 PUFs for all organisms. RNA-seq datasets related to abiotic stresses were downloaded from NCBI/GEO. The RNA-seq data was used as input to the LSTrAP software to construct co-expression networks. LSTrAP also created clusters of transcripts with correlated expression, whose members are more probably related to the molecular mechanisms associated with abiotic stresses in the plants. Orthologous groups were created (OrhtoMCL) using all 2,332,974 proteins in order to associate PUFs to abiotic stress-related clusters of co-expression and therefore infer their function in a guilt-by-association manner. A freely available web resource named ""Plant Co-expression Annotation Resource"" ( https://www.machado.cnptia.embrapa.br/plantannot ), Plantannot, was created to provide indexed queries to search for PUF putatively associated with abiotic stresses. The web interface also allows browsing, querying, and retrieving of public genomics data from 53 plants. We hope Plantannot to be useful for researchers trying to obtain novel GM crops resistant to climate change hazards.",Plantannot,0.995641232,Plant Co-expression Annotation Resource,0.853625529,Plantannot,0.995641232,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/5/2021 +25125445,http://cys.bios.niu.edu/plantcazyme,"PlantCAZyme: a database for plant carbohydrate-active enzymes. . PlantCAZyme is a database built upon dbCAN (database for automated carbohydrate active enzyme annotation), aiming to provide pre-computed sequence and annotation data of carbohydrate active enzymes (CAZymes) to plant carbohydrate and bioenergy research communities. The current version contains data of 43,790 CAZymes of 159 protein families from 35 plants (including angiosperms, gymnosperms, lycophyte and bryophyte mosses) and chlorophyte algae with fully sequenced genomes. Useful features of the database include: (i) a BLAST server and a HMMER server that allow users to search against our pre-computed sequence data for annotation purpose, (ii) a download page to allow batch downloading data of a specific CAZyme family or species and (iii) protein browse pages to provide an easy access to the most comprehensive sequence and annotation data. http://cys.bios.niu.edu/plantcazyme/",PlantCAZyme,0.998365402,NA,0,PlantCAZyme,0.998365402,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/14/2014 +31725858,http://bis.zju.edu.cn/plantcircnet/index.php,"PlantCircNet: a database for plant circRNA-miRNA-mRNA regulatory networks. . Circular RNA (circRNA) is a novel type of endogenous noncoding RNA with covalently closed loop structures, which are widely expressed in various tissues and have functional implications in cellular processes. Acting as competing endogenous RNAs (ceRNAs), circRNAs are important regulators of miRNA activities. The identification of these circRNAs underlines the increasing complexity of ncRNA-mediated regulatory networks. However, more biological evidence is required to infer direct circRNA-miRNA associations while little attention has been paid to circRNAs in plants as compared to the abundant research in mammals. PlantCircNet is presented as an integrated database that provides visualized plant circRNA-miRNA-mRNA regulatory networks containing identified circRNAs in eight model plants. The bioinformatics integration of data from multiple sources reveals circRNA-miRNA-mRNA regulatory networks and helps identify mechanisms underlying metabolic effects of circRNAs. An enrichment analysis tool was implemented to detect significantly overrepresented Gene Ontology categories of miRNA targets. The genomic annotations, sequences and isoforms of circRNAs were also investigated. PlantCircNet provides a user-friendly interface for querying detailed information of specific plant circRNAs. The database may serve as a resource to facilitate plant circRNA research. Several circRNAs were identified to play potential regulatory roles in flower development and response to environmental stress from regulatory networks related with miR156a and AT5G59720, respectively. This present research indicated that circRNAs could be involved in diverse biological processes. Database URL: http://bis.zju.edu.cn/plantcircnet/index.php.",PlantCircNet,0.997203588,NA,0,PlantCircNet,0.997203588,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +26400163,http://plantdhs.org,"PlantDHS: a database for DNase I hypersensitive sites in plants. Gene expression is regulated by orchestrated binding of regulatory proteins to promoters and other cis-regulatory DNA elements (CREs). Several plant databases have been developed for mapping promoters or DNA motifs associated with promoters. However, there is a lack of databases that allow investigation for all CREs. Here we present PlantDHS (http://plantdhs.org), a plant DNase I hypersensitive site (DHS) database that integrates histone modification, RNA sequencing, nucleosome positioning/occupancy, transcription factor binding sites, and genomic sequence within an easily navigated user interface. DHSs are indicative of all CREs, including promoters, enhancers, silencers, insulators and transcription factor binding sites; all of which play immense roles in global gene expression regulation. PlantDHS provides a platform to predict all CREs associated with individual genes from three model plant species, including Arabidopsis thaliana, Brachypodium distachyon and rice (Oryza sativa). PlantDHS is especially valuable in the detection of distant CREs that are located away from promoters.",PlantDHS,0.996344864,NA,0,PlantDHS,0.996344864,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/22/2015 +26887375,http://bioinfo-tool.cp.utfpr.edu.br/plantemirdb,"PlanTE-MIR DB: a database for transposable element-related microRNAs in plant genomes. Transposable elements (TEs) comprise a major fraction of many plant genomes and are known to drive their organization and evolution. Several studies show that these repetitive elements have a prominent role in shaping noncoding regions of the genome such as microRNA (miRNA) loci, which are components of post-transcriptional regulation mechanisms. Although some studies have reported initial formation of miRNA loci from TE sequences, especially in model plants, the approaches that were used did not employ systems that would allow results to be delivered by a user-friendly database. In this study, we identified 152 precursor miRNAs overlapping TEs in 10 plant species. PlanTE-MIR DB was designed to assemble this data and deliver it to the scientific community interested in miRNA origin, evolution, and regulation pathways. Users can browse the database through a web interface and search for entries using various parameters. This resource is cross-referenced with repetitive element (Repbase Update) and miRNA (miRBase) repositories, where sequences can be checked for further analysis. All data in PlanTE-MIR DB are publicly available for download in several file formats to facilitate their understanding and use. The database is hosted at http://bioinfo-tool.cp.utfpr.edu.br/plantemirdb/ .",PlanTE-MIR DB,0.85333695,NA,0,PlanTE-MIR DB,0.85333695,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/18/2016 +28158643,http://plantomics.mind.meiji.ac.jp/PlantExpress,"PlantExpress: A Database Integrating OryzaExpress and ArthaExpress for Single-species and Cross-species Gene Expression Network Analyses with Microarray-Based Transcriptome Data. Publicly available microarray-based transcriptome data on plants are remarkably valuable in terms of abundance and variation of samples, particularly for Oryza sativa (rice) and Arabidopsis thaliana (Arabidopsis). Here, we introduce the web database PlantExpress (http://plantomics.mind.meiji.ac.jp/PlantExpress/) as a platform for gene expression network (GEN) analysis with the public microarray data of rice and Arabidopsis. PlantExpress has two functional modes. The single-species mode is specialized for GEN analysis within one of the species, while the cross-species mode is optimized for comparative GEN analysis between the species. The single-species mode for rice is the new version of OryzaExpress, which we have maintained since 2006. The single-species mode for Arabidopsis, named ArthaExpress, was newly developed. PlantExpress stores data obtained from three microarrays, the Affymetrix Rice Genome Array, the Agilent Rice Gene Expression 4x44K Microarray, and the Affymetrix Arabidopsis ATH1 Genome Array, with respective totals of 2,678, 1,206, and 10,940 samples. This database employs a ‘MyList’ function with which users may save lists of arbitrary genes and samples (experimental conditions) to use in analyses. In cross-species mode, the MyList function allows performing comparative GEN analysis between rice and Arabidopsis. In addition, the gene lists saved in MyList can be directly exported to the PODC database, which provides information and a platform for comparative GEN analysis based on RNA-seq data and knowledge-based functional annotation of plant genes. PlantExpress will facilitate understanding the biological functions of plant genes.",PlantExpress,0.993935406,NA,0,PlantExpress,0.993935406,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +22080512,http://www.plantmetabolomics.org,"Plantmetabolomics.org: mass spectrometry-based Arabidopsis metabolomics--database and tools update. The PlantMetabolomics (PM) database (http://www.plantmetabolomics.org) contains comprehensive targeted and untargeted mass spectrum metabolomics data for Arabidopsis mutants across a variety of metabolomics platforms. The database allows users to generate hypotheses about the changes in metabolism for mutants with genes of unknown function. Version 2.0 of PlantMetabolomics.org currently contains data for 140 mutant lines along with the morphological data. A web-based data analysis wizard allows researchers to select preprocessing and data-mining procedures to discover differences between mutants. This community resource enables researchers to formulate models of the metabolic network of Arabidopsis and enhances the research community's ability to formulate testable hypotheses concerning gene functions. PM features new web-based tools for data-mining analysis, visualization tools and enhanced cross links to other databases. The database is publicly available. PM aims to provide a hypothesis building platform for the researchers interested in any of the mutant lines or metabolites.",PlantMetabolomics,0.962467134,NA,0,PlantMetabolomics,0.962467134,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/10/2011 +22058132,http://bis.zju.edu.cn/pnatdb,"PlantNATsDB: a comprehensive database of plant natural antisense transcripts. Natural antisense transcripts (NATs), as one type of regulatory RNAs, occur prevalently in plant genomes and play significant roles in physiological and pathological processes. Although their important biological functions have been reported widely, a comprehensive database is lacking up to now. Consequently, we constructed a plant NAT database (PlantNATsDB) involving approximately 2 million NAT pairs in 69 plant species. GO annotation and high-throughput small RNA sequencing data currently available were integrated to investigate the biological function of NATs. PlantNATsDB provides various user-friendly web interfaces to facilitate the presentation of NATs and an integrated, graphical network browser to display the complex networks formed by different NATs. Moreover, a 'Gene Set Analysis' module based on GO annotation was designed to dig out the statistical significantly overrepresented GO categories from the specific NAT network. PlantNATsDB is currently the most comprehensive resource of NATs in the plant kingdom, which can serve as a reference database to investigate the regulatory function of NATs. The PlantNATsDB is freely available at http://bis.zju.edu.cn/pnatdb/.",PlantNATsDB,0.995334884,NAT database,0.576344937,PlantNATsDB,0.995334884,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/3/2011 +26112452,http://bioinfolab.miamioh.edu/plantordb,"PlantOrDB: a genome-wide ortholog database for land plants and green algae. Background Genes with different functions are originally generated from some ancestral genes by gene duplication, mutation and functional recombination. It is widely accepted that orthologs are homologous genes evolved from speciation events while paralogs are homologous genes resulted from gene duplication events.With the rapid increase of genomic data, identifying and distinguishing these genes among different species is becoming an important part of functional genomics research. Description Using 35 plant and 6 green algal genomes from Phytozome v9, we clustered 1,291,670 peptide sequences into 49,355 homologous gene families in terms of sequence similarity. For each gene family, we have generated a peptide sequence alignment and phylogenetic tree, and identified the speciation/duplication events for every node within the tree. For each node, we also identified and highlighted diagnostic characters that facilitate appropriate addition of a new query sequence into the existing phylogenetic tree and sequence alignment of its best matched gene family. Based on a desired species or subgroup of all species, users can view the phylogenetic tree, sequence alignment and diagnostic characters for a given gene family selectively. PlantOrDB not only allows users to identify orthologs or paralogs from phylogenetic trees, but also provides all orthologs that are built using Reciprocal Best Hit (RBH) pairwise alignment method. Users can upload their own sequences to find the best matched gene families, and visualize their query sequences within the relevant phylogenetic trees and sequence alignments. Conclusion PlantOrDB ( http://bioinfolab.miamioh.edu/plantordb ) is a genome-wide ortholog database for land plants and green algae. PlantOrDB offers highly interactive visualization, accurate query classification and powerful search functions useful for functional genomic research.",PlantOrDB,0.997664332,NA,0,PlantOrDB,0.997664332,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/26/2015 +30395277,http://PlantPAN.itps.ncku.edu.tw,"PlantPAN3.0: a new and updated resource for reconstructing transcriptional regulatory networks from ChIP-seq experiments in plants. The Plant Promoter Analysis Navigator (PlantPAN; http://PlantPAN.itps.ncku.edu.tw/) is an effective resource for predicting regulatory elements and reconstructing transcriptional regulatory networks for plant genes. In this release (PlantPAN 3.0), 17 230 TFs were collected from 78 plant species. To explore regulatory landscapes, genomic locations of TFBSs have been captured from 662 public ChIP-seq samples using standard data processing. A total of 1 233 999 regulatory linkages were identified from 99 regulatory factors (TFs, histones and other DNA-binding proteins) and their target genes across seven species. Additionally, this new version added 2449 matrices extracted from ChIP-seq peaks for cis-regulatory element prediction. In addition to integrated ChIP-seq data, four major improvements were provided for more comprehensive information of TF binding events, including (i) 1107 experimentally verified TF matrices from the literature, (ii) gene regulation network comparison between two species, (iii) 3D structures of TFs and TF-DNA complexes and (iv) condition-specific co-expression networks of TFs and their target genes extended to four species. The PlantPAN 3.0 can not only be efficiently used to investigate critical cis- and trans-regulatory elements in plant promoters, but also to reconstruct high-confidence relationships among TF-targets under specific conditions.",PlantPAN,0.996323383,Plant Promoter Analysis Navigator,0.927395006,PlantPAN,0.996323383,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +21418024,http://www.plantpis.ba.itb.cnr.it,"PlantPIs--an interactive web resource on plant protease inhibitors. PlantPIs is a web querying system for a database collection of plant protease inhibitors data. Protease inhibitors in plants are naturally occurring proteins that inhibit the function of endogenous and exogenous proteases. In this paper the design and development of a web framework providing a clear and very flexible way of querying plant protease inhibitors data is reported. The web resource is based on a relational database, containing data of plants protease inhibitors publicly accessible, and a graphical user interface providing all the necessary browsing tools, including a data exporting function. PlantPIs contains information extracted principally from MEROPS database, filtered, annotated and compared with data stored in other protein and gene public databases, using both automated techniques and domain expert evaluations. The data are organized to allow a flexible and easy way to access stored information. The database is accessible at http://www.plantpis.ba.itb.cnr.it/.",PlantPIs,0.9976331,NA,0,PlantPIs,0.9976331,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2011 +28111365,"http://probes.pw.usda.gov/plantrgdb, http://aegilops.wheat.ucdavis.edu/plantrgdb","PlantRGDB: A Database of Plant Retrocopied Genes. RNA-based gene duplication, known as retrocopy, plays important roles in gene origination and genome evolution. The genomes of many plants have been sequenced, offering an opportunity to annotate and mine the retrocopies in plant genomes. However, comprehensive and unified annotation of retrocopies in these plants is still lacking. In this study I constructed the PlantRGDB (Plant Retrocopied Gene DataBase), the first database of plant retrocopies, to provide a putatively complete centralized list of retrocopies in plant genomes. The database is freely accessible at http://probes.pw.usda.gov/plantrgdb or http://aegilops.wheat.ucdavis.edu/plantrgdb. It currently integrates 49 plant species and 38,997 retrocopies along with characterization information. PlantRGDB provides a user-friendly web interface for searching, browsing and downloading the retrocopies in the database. PlantRGDB also offers graphical viewer-integrated sequence information for displaying the structure of each retrocopy. The attributes of the retrocopies of each species are reported using a browse function. In addition, useful tools, such as an advanced search and BLAST, are available to search the database more conveniently. In conclusion, the database will provide a web platform for obtaining valuable insight into the generation of retrocopies and will supplement research on gene duplication and genome evolution in plants.",PlantRGDB,0.995906234,Plant Retrocopied Gene DataBase,0.938694141,PlantRGDB,0.995906234,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +23066098,http://plantrna.ibmp.cnrs.fr,"PlantRNA, a database for tRNAs of photosynthetic eukaryotes. PlantRNA database (http://plantrna.ibmp.cnrs.fr/) compiles transfer RNA (tRNA) gene sequences retrieved from fully annotated plant nuclear, plastidial and mitochondrial genomes. The set of annotated tRNA gene sequences has been manually curated for maximum quality and confidence. The novelty of this database resides in the inclusion of biological information relevant to the function of all the tRNAs entered in the library. This includes 5'- and 3'-flanking sequences, A and B box sequences, region of transcription initiation and poly(T) transcription termination stretches, tRNA intron sequences, aminoacyl-tRNA synthetases and enzymes responsible for tRNA maturation and modification. Finally, data on mitochondrial import of nuclear-encoded tRNAs as well as the bibliome for the respective tRNAs and tRNA-binding proteins are also included. The current annotation concerns complete genomes from 11 organisms: five flowering plants (Arabidopsis thaliana, Oryza sativa, Populus trichocarpa, Medicago truncatula and Brachypodium distachyon), a moss (Physcomitrella patens), two green algae (Chlamydomonas reinhardtii and Ostreococcus tauri), one glaucophyte (Cyanophora paradoxa), one brown alga (Ectocarpus siliculosus) and a pennate diatom (Phaeodactylum tricornutum). The database will be regularly updated and implemented with new plant genome annotations so as to provide extensive information on tRNA biology to the research community.",PlantRNA,0.962213755,NA,0,PlantRNA,0.962213755,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/12/2012 +26527721,"http://pgsb.helmholtz-muenchen.de/plant/index.jsp, http://transplantdb.eu","PGSB PlantsDB: updates to the database framework for comparative plant genome research. PGSB (Plant Genome and Systems Biology: formerly MIPS) PlantsDB (http://pgsb.helmholtz-muenchen.de/plant/index.jsp) is a database framework for the comparative analysis and visualization of plant genome data. The resource has been updated with new data sets and types as well as specialized tools and interfaces to address user demands for intuitive access to complex plant genome data. In its latest incarnation, we have re-worked both the layout and navigation structure and implemented new keyword search options and a new BLAST sequence search functionality. Actively involved in corresponding sequencing consortia, PlantsDB has dedicated special efforts to the integration and visualization of complex triticeae genome data, especially for barley, wheat and rye. We enhanced CrowsNest, a tool to visualize syntenic relationships between genomes, with data from the wheat sub-genome progenitor Aegilops tauschii and added functionality to the PGSB RNASeqExpressionBrowser. GenomeZipper results were integrated for the genomes of barley, rye, wheat and perennial ryegrass and interactive access is granted through PlantsDB interfaces. Data exchange and cross-linking between PlantsDB and other plant genome databases is stimulated by the transPLANT project (http://transplantdb.eu/).",PlantsDB,0.988374114,Plant Genome and Systems Biology,0.978462391,PlantsDB,0.988374114,1,NA,23203886,NA,NA,NA,do not merge,NA,NA,NA,11/2/2015 +23203886,http://mips.helmholtz-muenchen.de/plant/genomes.jsp,"MIPS PlantsDB: a database framework for comparative plant genome research. The rapidly increasing amount of plant genome (sequence) data enables powerful comparative analyses and integrative approaches and also requires structured and comprehensive information resources. Databases are needed for both model and crop plant organisms and both intuitive search/browse views and comparative genomics tools should communicate the data to researchers and help them interpret it. MIPS PlantsDB (http://mips.helmholtz-muenchen.de/plant/genomes.jsp) was initially described in NAR in 2007 [Spannagl,M., Noubibou,O., Haase,D., Yang,L., Gundlach,H., Hindemitt, T., Klee,K., Haberer,G., Schoof,H. and Mayer,K.F. (2007) MIPSPlantsDB-plant database resource for integrative and comparative plant genome research. Nucleic Acids Res., 35, D834-D840] and was set up from the start to provide data and information resources for individual plant species as well as a framework for integrative and comparative plant genome research. PlantsDB comprises database instances for tomato, Medicago, Arabidopsis, Brachypodium, Sorghum, maize, rice, barley and wheat. Building up on that, state-of-the-art comparative genomics tools such as CrowsNest are integrated to visualize and investigate syntenic relationships between monocot genomes. Results from novel genome analysis strategies targeting the complex and repetitive genomes of triticeae species (wheat and barley) are provided and cross-linked with model species. The MIPS Repeat Element Database (mips-REdat) and Catalog (mips-REcat) as well as tight connections to other databases, e.g. via web services, are further important components of PlantsDB.",PlantsDB,0.96487546,Element Database,0.734058082,PlantsDB,0.96487546,1,NA,26527721,low_prob_best_name,do not remove,NA,do not merge,NA,NA,NA,11/29/2012 +24174544,http://planttfdb.cbi.pku.edu.cn,"PlantTFDB 3.0: a portal for the functional and evolutionary study of plant transcription factors. With the aim to provide a resource for functional and evolutionary study of plant transcription factors (TFs), we updated the plant TF database PlantTFDB to version 3.0 (http://planttfdb.cbi.pku.edu.cn). After refining the TF classification pipeline, we systematically identified 129 288 TFs from 83 species, of which 67 species have genome sequences, covering main lineages of green plants. Besides the abundant annotation provided in the previous version, we generated more annotations for identified TFs, including expression, regulation, interaction, conserved elements, phenotype information, expert-curated descriptions derived from UniProt, TAIR and NCBI GeneRIF, as well as references to provide clues for functional studies of TFs. To help identify evolutionary relationship among identified TFs, we assigned 69 450 TFs into 3924 orthologous groups, and constructed 9217 phylogenetic trees for TFs within the same families or same orthologous groups, respectively. In addition, we set up a TF prediction server in this version for users to identify TFs from their own sequences.",PlantTFDB,0.997673213,NA,0,PlantTFDB,0.997673213,1,NA,27924042,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/29/2013 +27924042,"http://planttfdb.cbi.pku.edu.cn/, http://plantregmap.cbi.pku.edu.cn","PlantTFDB 4.0: toward a central hub for transcription factors and regulatory interactions in plants. With the goal of providing a comprehensive, high-quality resource for both plant transcription factors (TFs) and their regulatory interactions with target genes, we upgraded plant TF database PlantTFDB to version 4.0 (http://planttfdb.cbi.pku.edu.cn/). In the new version, we identified 320 370 TFs from 165 species, presenting a more comprehensive genomic TF repertoires of green plants. Besides updating the pre-existing abundant functional and evolutionary annotation for identified TFs, we generated three new types of annotation which provide more directly clues to investigate functional mechanisms underlying: (i) a set of high-quality, non-redundant TF binding motifs derived from experiments; (ii) multiple types of regulatory elements identified from high-throughput sequencing data; (iii) regulatory interactions curated from literature and inferred by combining TF binding motifs and regulatory elements. In addition, we upgraded previous TF prediction server, and set up four novel tools for regulation prediction and functional enrichment analyses. Finally, we set up a novel companion portal PlantRegMap (http://plantregmap.cbi.pku.edu.cn) for users to access the regulation resource and analysis tools conveniently.",PlantTFDB,0.996748269,NA,0,PlantTFDB,0.996748269,1,NA,24174544,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/24/2016 +28592293,http://genome.lcqb.upmc.fr/plasmobase,"Plasmobase: a comparative database of predicted domain architectures for Plasmodium genomes. Background With the availability of complete genome sequences of both human and non-human Plasmodium parasites, it is now possible to use comparative genomics to look for orthology across Plasmodium species and for species specific genes. This comparative analyses could provide important clues for the development of new strategies to prevent and treat malaria in humans, however, the number of functionally annotated proteins is still low for all Plasmodium species. In the context of genomes that are hard to annotate because of sequence divergence, such as Plasmodium, domain co-occurrence becomes particularly important to trust predictions. In particular, domain architecture prediction can be used to improve the performance of existing annotation methods since homologous proteins might share their architectural context. Results Plasmobase is a unique database designed for the comparative study of Plasmodium genomes. Domain architecture reconstruction in Plasmobase relies on DAMA, the state-of-the-art method in architecture prediction, while domain annotation is realised with CLADE, a novel annotation tool based on a multi-source strategy. Plasmobase significantly increases the Pfam domain coverage of all Plasmodium genomes, it proposes new domain architectures as well as new domain families that have never been reported before for these genomes. It proposes a visualization of domain architectures and allows for an easy comparison among architectures within Plasmodium species and with other species, described in UniProt. Conclusions Plasmobase is a valuable new resource for domain annotation in Plasmodium genomes. Its graphical presentation of protein sequences, based on domain architectures, will hopefully be of interest for comparative genomic studies. It should help to discover species-specific genes, possibly underlying important phenotypic differences between parasites, and orthologous gene families for deciphering the biology of these complex and important Apicomplexan organisms. In conclusion, Plasmobase is a flexible and rich site where any biologist can find something of his/her own interest. Availability Plasmobase is accessible at http://genome.lcqb.upmc.fr/plasmobase/ .",Plasmobase,0.995240629,NA,0,Plasmobase,0.995240629,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/7/2017 +25593348,http://plasmogem.sanger.ac.uk,"PlasmoGEM, a database supporting a community resource for large-scale experimental genetics in malaria parasites. The Plasmodium Genetic Modification (PlasmoGEM) database (http://plasmogem.sanger.ac.uk) provides access to a resource of modular, versatile and adaptable vectors for genome modification of Plasmodium spp. parasites. PlasmoGEM currently consists of >2000 plasmids designed to modify the genome of Plasmodium berghei, a malaria parasite of rodents, which can be requested by non-profit research organisations free of charge. PlasmoGEM vectors are designed with long homology arms for efficient genome integration and carry gene specific barcodes to identify individual mutants. They can be used for a wide array of applications, including protein localisation, gene interaction studies and high-throughput genetic screens. The vector production pipeline is supported by a custom software suite that automates both the vector design process and quality control by full-length sequencing of the finished vectors. The PlasmoGEM web interface allows users to search a database of finished knock-out and gene tagging vectors, view details of their designs, download vector sequence in different formats and view available quality control data as well as suggested genotyping strategies. We also make gDNA library clones and intermediate vectors available for researchers to produce vectors for themselves.",PlasmoGEM,0.99704349,Plasmodium Genetic Modification,0.977940926,PlasmoGEM,0.99704349,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2015 +25378306,http://lcgbase.big.ac.cn/plastid-LCGbase,"Plastid-LCGbase: a collection of evolutionarily conserved plastid-associated gene pairs. Plastids carry their own genetic material that encodes a variable set of genes that are limited in number but functionally important. Aside from orthology, the lineage-specific order and orientation of these genes are also relevant. Here, we develop a database, Plastid-LCGbase (http://lcgbase.big.ac.cn/plastid-LCGbase/), which focuses on organizational variability of plastid genes and genomes from diverse taxonomic groups. The current Plastid-LCGbase contains information from 470 plastid genomes and exhibits several unique features. First, through a genome-overview page generated from OrganellarGenomeDRAW, it displays general arrangement of all plastid genes (circular or linear). Second, it shows patterns and modes of all paired plastid genes and their physical distances across user-defined lineages, which are facilitated by a step-wise stratification of taxonomic groups. Third, it divides the paired genes into three categories (co-directionally-paired genes or CDPGs, convergently-paired genes or CPGs and divergently-paired genes or DPGs) and three patterns (separation, overlap and inclusion) and provides basic statistics for each species. Fourth, the gene pairing scheme is expandable, where neighboring genes can also be included in species-/lineage-specific comparisons. We hope that Plastid-LCGbase facilitates gene variation (insertion-deletion, translocation and rearrangement) and transcription-level studies of plastid genomes.",Plastid-LCGbase,0.993730698,NA,0,Plastid-LCGbase,0.993730698,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/5/2014 +"25324309, 27987171, 29069403",http://bioinformatics.psb.ugent.be/plaza,"PLAZA 3.0: an access point for plant comparative genomics. Comparative sequence analysis has significantly altered our view on the complexity of genome organization and gene functions in different kingdoms. PLAZA 3.0 is designed to make comparative genomics data for plants available through a user-friendly web interface. Structural and functional annotation, gene families, protein domains, phylogenetic trees and detailed information about genome organization can easily be queried and visualized. Compared with the first version released in 2009, which featured nine organisms, the number of integrated genomes is more than four times higher, and now covers 37 plant species. The new species provide a wider phylogenetic range as well as a more in-depth sampling of specific clades, and genomes of additional crop species are present. The functional annotation has been expanded and now comprises data from Gene Ontology, MapMan, UniProtKB/Swiss-Prot, PlnTFDB and PlantTFDB. Furthermore, we improved the algorithms to transfer functional annotation from well-characterized plant genomes to other species. The additional data and new features make PLAZA 3.0 (http://bioinformatics.psb.ugent.be/plaza/) a versatile and comprehensible resource for users wanting to explore genome information to study different aspects of plant biology, both in model and non-model organisms.",PLAZA,0.996272564,Comparative Genomic Database,0.86294961,PLAZA,0.996272564,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +26746174,http://bioinf.mind.meiji.ac.jp/phapt,"Pleurochrysome: A Web Database of Pleurochrysis Transcripts and Orthologs Among Heterogeneous Algae. Pleurochrysis is a coccolithophorid genus, which belongs to the Coccolithales in the Haptophyta. The genus has been used extensively for biological research, together with Emiliania in the Isochrysidales, to understand distinctive features between the two coccolithophorid-including orders. However, molecular biological research on Pleurochrysis such as elucidation of the molecular mechanism behind coccolith formation has not made great progress at least in part because of lack of comprehensive gene information. To provide such information to the research community, we built an open web database, the Pleurochrysome (http://bioinf.mind.meiji.ac.jp/phapt/), which currently stores 9,023 unique gene sequences (designated as UNIGENEs) assembled from expressed sequence tag sequences of P. haptonemofera as core information. The UNIGENEs were annotated with gene sequences sharing significant homology, conserved domains, Gene Ontology, KEGG Orthology, predicted subcellular localization, open reading frames and orthologous relationship with genes of 10 other algal species, a cyanobacterium and the yeast Saccharomyces cerevisiae. This sequence and annotation information can be easily accessed via several search functions. Besides fundamental functions such as BLAST and keyword searches, this database also offers search functions to explore orthologous genes in the 12 organisms and to seek novel genes. The Pleurochrysome will promote molecular biological and phylogenetic research on coccolithophorids and other haptophytes by helping scientists mine data from the primary transcriptome of P. haptonemofera.",Pleurochrysome,0.995230675,NA,0,Pleurochrysome,0.995230675,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/7/2016 +22084198,http://www.plexdb.org,"PLEXdb: gene expression resources for plants and plant pathogens. PLEXdb (http://www.plexdb.org), in partnership with community databases, supports comparisons of gene expression across multiple plant and pathogen species, promoting individuals and/or consortia to upload genome-scale data sets to contrast them to previously archived data. These analyses facilitate the interpretation of structure, function and regulation of genes in economically important plants. A list of Gene Atlas experiments highlights data sets that give responses across different developmental stages, conditions and tissues. Tools at PLEXdb allow users to perform complex analyses quickly and easily. The Model Genome Interrogator (MGI) tool supports mapping gene lists onto corresponding genes from model plant organisms, including rice and Arabidopsis. MGI predicts homologies, displays gene structures and supporting information for annotated genes and full-length cDNAs. The gene list-processing wizard guides users through PLEXdb functions for creating, analyzing, annotating and managing gene lists. Users can upload their own lists or create them from the output of PLEXdb tools, and then apply diverse higher level analyses, such as ANOVA and clustering. PLEXdb also provides methods for users to track how gene expression changes across many different experiments using the Gene OscilloScope. This tool can identify interesting expression patterns, such as up-regulation under diverse conditions or checking any gene's suitability as a steady-state control.",PLEXdb,0.997321129,NA,0,PLEXdb,0.997321129,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/13/2011 +28529077,http://plmd.biocuckoo.org,"PLMD: An updated data resource of protein lysine modifications. Post-translational modifications (PTMs) occurring at protein lysine residues, or protein lysine modifications (PLMs), play critical roles in regulating biological processes. Due to the explosive expansion of the amount of PLM substrates and the discovery of novel PLM types, here we greatly updated our previous studies, and presented a much more integrative resource of protein lysine modification database (PLMD). In PLMD, we totally collected and integrated 284,780 modification events in 53,501 proteins across 176 eukaryotes and prokaryotes for up to 20 types of PLMs, including ubiquitination, acetylation, sumoylation, methylation, succinylation, malonylation, glutarylation, glycation, formylation, hydroxylation, butyrylation, propionylation, crotonylation, pupylation, neddylation, 2-hydroxyisobutyrylation, phosphoglycerylation, carboxylation, lipoylation and biotinylation. Using the data set, a motif-based analysis was performed for each PLM type, and the results demonstrated that different PLM types preferentially recognize distinct sequence motifs for the modifications. Moreover, various PLMs synergistically orchestrate specific cellular biological processes by mutual crosstalks with each other, and we totally found 65,297 PLM events involved in 90 types of PLM co-occurrences on the same lysine residues. Finally, various options were provided for accessing the data, while original references and other annotations were also present for each PLM substrate. Taken together, we anticipated the PLMD database can serve as a useful resource for further researches of PLMs. PLMD 3.0 was implemented in PHP + MySQL and freely available at http://plmd.biocuckoo.org.",PLMD,0.97490716,protein lysine modification database,0.703303804,PLMD,0.97490716,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/3/2017 +28203705,http://bioinformatics.caf.ac.cn/Pln24NT,"Pln24NT: a web resource for plant 24-nt siRNA producing loci. Abstract In plants, 24 nucleotide small interfering RNAs (24-nt siRNAs) account for a large percentage of the total siRNA pool, and they play an important role in guiding plant-specific RNA-directed DNA methylation (RdDM), which transcriptionally silences transposon elements, transgenes, repetitive sequences and some endogenous genes. Several loci in plant genomes produce clusters of 24-nt RNAs, and these loci are receiving increasing attention from the research community. However, at present there is no bioinformatics resource dedicated to 24-nt siRNA loci and their derived 24-nt siRNAs. Thus, in this study, Pln24NT, a freely available web resource, was created to centralize 24-nt siRNA loci and 24-nt siRNA information, including fundamental locus information, expression profiles and annotation of transposon elements, from next-generation sequencing (NGS) data for 10 popular plant species. An intuitive web interface was also developed for convenient searching and browsing, and analytical tools were included to help users flexibly analyze their own siRNA NGS data. Pln24NT will help the plant research community to discover and characterize 24-nt siRNAs, and may prove useful for studying the roles of siRNA in RNA-directed DNA methylation in plants. Availability and implementation http://bioinformatics.caf.ac.cn/Pln24NT . Contact suxh@caf.ac.cn. Supplementary information Supplementary data are available at Bioinformatics online.",Pln24NT,0.996295124,NA,0,Pln24NT,0.996295124,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2017 +23476021,http://chualab.rockefeller.edu/gbrowse2/homepage.html,"PLncDB: plant long non-coding RNA database. Summary Plant long non-coding RNA database (PLncDB) attempts to provide the following functions related to long non-coding RNAs (lncRNAs): (i) Genomic information for a large number of lncRNAs collected from various resources; (ii) an online genome browser for plant lncRNAs based on a platform similar to that of the UCSC Genome Browser; (iii) Integration of transcriptome datasets derived from various samples including different tissues, developmental stages, mutants and stress treatments; and (iv) A list of epigenetic modification datasets and small RNA datasets. Currently, our PLncDB provides a comprehensive genomic view of Arabidopsis lncRNAs for the plant research community. This database will be regularly updated with new plant genome when available so as to greatly facilitate future investigations on plant lncRNAs. Availability PLncDB is freely accessible at http://chualab.rockefeller.edu/gbrowse2/homepage.html and all results can be downloaded for free at the website.",PLncDB,0.997522697,Plant long non-coding RNA database,0.98458527,PLncDB,0.997522697,1,NA,33079992,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,3/7/2013 +33079992,http://plncdb.tobaccodb.org,"PLncDB V2.0: a comprehensive encyclopedia of plant long noncoding RNAs. Long noncoding RNAs (lncRNAs) are transcripts longer than 200 nucleotides with little or no protein coding potential. The expanding list of lncRNAs and accumulating evidence of their functions in plants have necessitated the creation of a comprehensive database for lncRNA research. However, currently available plant lncRNA databases have some deficiencies, including the lack of lncRNA data from some model plants, uneven annotation standards, a lack of visualization for expression patterns, and the absence of epigenetic information. To overcome these problems, we upgraded our Plant Long noncoding RNA Database (PLncDB, http://plncdb.tobaccodb.org/), which was based on a uniform annotation pipeline. PLncDB V2.0 currently contains 1 246 372 lncRNAs for 80 plant species based on 13 834 RNA-Seq datasets, integrating lncRNA information from four other resources including EVLncRNAs, RNAcentral and etc. Expression patterns and epigenetic signals can be visualized using multiple tools (JBrowse, eFP Browser and EPexplorer). Targets and regulatory networks for lncRNAs are also provided for function exploration. In addition, PLncDB V2.0 is hierarchical and user-friendly and has five built-in search engines. We believe PLncDB V2.0 is useful for the plant lncRNA community and data mining studies and provides a comprehensive resource for data-driven lncRNA research in plants.",PLncDB,0.996884823,Plant Long noncoding RNA Database,0.983137565,PLncDB,0.996884823,1,NA,23476021,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/1/2021 +26211629,http://bioinformatics.ahau.edu.cn/PLNlncRbase,"PLNlncRbase: A resource for experimentally identified lncRNAs in plants. Accumulating published reports have confirmed the critical biological role (e.g., cell differentiation, gene regulation, stress response) for plant long non-coding RNAs (lncRNAs). However, a literature-derived database with the aim of lncRNA curation, data deposit and further distribution remains still absent for this particular lncRNA clade. PLNlncRbase has been designed as an easy-to-use resource to provide detailed information for experimentally identified plant lncRNAs. In the current version, PLNlncRbase has manually collected data from nearly 200 published literature, covering a total of 1187 plant lncRNAs in 43 plant species. The user can retrieve plant lncRNA entries from a well-organized interface through a keyword search by using the name of plant species or a lncRNA identifier. Each entry upon a query will be returned with detailed information for a specific plant lncRNA, including the species name, a lncRNA identifier, a brief description of the potential biological role, the lncRNA sequence, the lncRNA classification, an expression pattern of the lncRNA, the tissue/developmental stage/condition for lncRNA expression, the detection method for lncRNA expression, a reference literature, and the potential target gene(s) of the lncRNA extracted from the original reference. This database will be regularly updated to greatly facilitate future investigations of plant lncRNAs pertaining to their biological significance. The PLNlncRbase database is now freely available at http://bioinformatics.ahau.edu.cn/PLNlncRbase.",PLNlncRbase,0.997170389,NA,0,PLNlncRbase,0.997170389,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/23/2015 +32190163,http://www.insight-group.org/variants/databases,"The Prospective Lynch Syndrome Database reports enable evidence-based personal precision health care. The aims of the Prospective Lynch Syndrome Database (PLSD) are to provide empirical prospectively observed data on the incidences of cancer in different organs, survival following cancer and the effects of interventions in carriers of pathogenic variants of the mismatch repair genes (path_MMR) categorized by age, gene and gender. Although PLSD is assumption-free, as with any study the ascertainment procedures used to identify the study cohort will introduce selection biases which have to be declared and considered in detail in order to provide robust and valid results. This paper provides a commentary on the methods used and considers how results from the PLSD reports should be interpreted. A number of the results from PLSD were novel and some in conflict with previous assumptions. Notably, colonoscopic surveillance did not prevent colo-rectal cancer, survival after colo-rectal, endometrial and ovarian cancer was good, no survival gain was observed with more frequent colonoscopy, new causes of cancer-related death were observed in survivors of first cancers due to later cancers in other organs, variants in the different MMR genes caused distinct multi-cancer syndromes characterized by different penetrance and phenotypes. The www.PLSD.eu website together with the InSiGHT database website (https://www.insight-group.org/variants/databases/) now facilitate evidence-based personalized precision health care for individual carriers at increased risk of cancer. The arguments are summarized in a final discussion on how to conceptualize current knowledge for the different practical purposes of treating cancers, genetic counselling and prevention, and for understanding /research on carcinogenetic mechanisms.",PLSD,0.991903663,Prospective Lynch Syndrome Database,0.954046357,PLSD,0.991903663,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/14/2020 +30380090,http://ccb-microbe.cs.uni-saarland.de/plsdb,"PLSDB: a resource of complete bacterial plasmids. The study of bacterial isolates or communities requires the analysis of the therein included plasmids in order to provide an extensive characterization of the organisms. Plasmids harboring resistance and virulence factors are of especial interest as they contribute to the dissemination of antibiotic resistance. As the number of newly sequenced bacterial genomes is growing a comprehensive resource is required which will allow to browse and filter the available plasmids, and to perform sequence analyses. Here, we present PLSDB, a resource containing 13 789 plasmid records collected from the NCBI nucleotide database. The web server provides an interactive view of all obtained plasmids with additional meta information such as sequence characteristics, sample-related information and taxonomy. Moreover, nucleotide sequence data can be uploaded to search for short nucleotide sequences (e.g. specific genes) in the plasmids, to compare a given plasmid to the records in the collection or to determine whether a sample contains one or multiple of the known plasmids (containment analysis). The resource is freely accessible under https://ccb-microbe.cs.uni-saarland.de/plsdb/.",PLSDB,0.992310107,NA,0,PLSDB,0.992310107,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +31738435,http://pmbd.genome-mining.cn/home,"PMBD: a Comprehensive Plastics Microbial Biodegradation Database. . Since the invention over a hundred years ago, plastics have been used in many applications, and they are involved in every aspect of our lives. The extensive usage of plastics results in a tremendous amount of waste, which has become a severe burden on the environment. Several degradation approaches exist in nature to cope with ever-increasing plastic waste. Among these approaches, biodegradation by microorganisms has emerged as a natural way, which is favored by many environmentally conscious societies. To facilitate the study on biodegradation of plastics, we developed an online resource, Plastics Microbial Biodegradation Database (PMBD), to gather and present the information about microbial biodegradation of plastics. In this database, 949 microorganisms-plastics relationships and 79 genes involved in the biodegradation of plastics were manually collected and confirmed through literature searching. In addition, more than 8000 automatically annotated enzyme sequences, which were predicted to be involved in the plastics biodegradation, were extracted from the TrEMBL section of the UniProt database. The PMBD database is presented with a website at http://pmbd.genome-mining.cn/home. Data may be accessed through browsing or searching. Also included on the website are a sequence alignment tool and a function prediction tool.",PMBD,0.987427726,Plastics Microbial Biodegradation Database,0.977427036,PMBD,0.987427726,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +27733507,http://www.sesame-bioinfo.org/PMDBase,"PMDBase: a database for studying microsatellite DNA and marker development in plants. Microsatellite DNAs (or SSRs) are important genomic components involved in many important biological functions. SSRs have been extensively exploited as molecular markers for diverse applications including genetic diversity, linkage/association mapping of gene/QTL, marker-assisted selection, variety identification and evolution analysis. However, a comprehensive database or web service for studying microsatellite DNAs and marker development in plants is lacking. Here, we developed a database, PMDBase, which integrates large amounts of microsatellite DNAs from genome sequenced plant species and includes a web service for microsatellite DNAs identification. In PMDBase, 26 230 099 microsatellite DNAs were identified spanning 110 plant species. Up to three pairs of primers were supplied for every microsatellite DNA. For 81 species, genomic features of the microsatellite DNAs (genic or non-genic) were supplied with the corresponding genes or transcripts from public databases. Microsatellite DNAs can be explored through browsing and searching modules with a user-friendly web interface and customized software. Furthermore, we developed MISAweb and embedded Primer3web to help users to identify microsatellite DNAs and design corresponding primers in their own genomic sequences online. All datasets of microsatellite DNAs can be downloaded conveniently. PMDBase will be updated regularly with new available genome data and can be accessed freely via the address http://www.sesame-bioinfo.org/PMDBase.",PMDBase,0.997673035,NA,0,PMDBase,0.997673035,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/12/2016 +33554247,http://easybioai.com/PMIDB,"Prediction and collection of protein-metabolite interactions. . Interactions between proteins and small molecule metabolites play vital roles in regulating protein functions and controlling various cellular processes. The activities of metabolic enzymes, transcription factors, transporters and membrane receptors can all be mediated through protein-metabolite interactions (PMIs). Compared with the rich knowledge of protein-protein interactions, little is known about PMIs. To the best of our knowledge, no existing database has been developed for collecting PMIs. The recent rapid development of large-scale mass spectrometry analysis of biomolecules has led to the discovery of large amounts of PMIs. Therefore, we developed the PMI-DB to provide a comprehensive and accurate resource of PMIs. A total of 49 785 entries were manually collected in the PMI-DB, corresponding to 23 small molecule metabolites, 9631 proteins and 4 species. Unlike other databases that only provide positive samples, the PMI-DB provides non-interaction between proteins and metabolites, which not only reduces the experimental cost for biological experimenters but also facilitates the construction of more accurate algorithms for researchers using machine learning. To show the convenience of the PMI-DB, we developed a deep learning-based method to predict PMIs in the PMI-DB and compared it with several methods. The experimental results show that the area under the curve and area under the precision-recall curve of our method are 0.88 and 0.95, respectively. Overall, the PMI-DB provides a user-friendly interface for browsing the biological functions of metabolites/proteins of interest, and experimental techniques for identifying PMIs in different species, which provides important support for furthering the understanding of cellular processes. The PMI-DB is freely accessible at http://easybioai.com/PMIDB.",PMI-DB,0.978619933,NA,0,PMI-DB,0.978619933,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2021 +31602478,http://www.pmiren.com,"PmiREN: a comprehensive encyclopedia of plant miRNAs. MicroRNAs (miRNAs) are small non-coding RNA molecules that function as diverse endogenous gene regulators at the post-transcriptional level. In the past two decades, as research effort on miRNA identification, function and evolution has soared, so has the demand for miRNA databases. However, the current plant miRNA databases suffer from several typical drawbacks, including a lack of entries for many important species, uneven annotation standards across different species, abundant questionable entries, and limited annotation. To address these issues, we developed a knowledge-based database called Plant miRNA Encyclopedia (PmiREN, http://www.pmiren.com/), which was based on uniform processing of sequenced small RNA libraries using miRDeep-P2, followed by manual curation using newly updated plant miRNA identification criteria, and comprehensive annotation. PmiREN currently contains 16,422 high confidence novel miRNA loci in 88 plant species and 3,966 retrieved from miRBase. For every miRNA entry, information on precursor sequence, precursor secondary structure, expression pattern, clusters and synteny in the genome, potential targets supported by Parallel Analysis of RNA Ends (PARE) sequencing, and references is attached whenever possible. PmiREN is hierarchically accessible and has eight built-in search engines. We believe PmiREN is useful for plant miRNA cataloguing and data mining, therefore a resource for data-driven miRNA research in plants.",PmiREN,0.997984946,Plant miRNA Encyclopedia,0.926905349,PmiREN,0.997984946,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +27789569,http://pmkb.weill.cornell.edu,"The cancer precision medicine knowledge base for structured clinical-grade mutations and interpretations. Objective This paper describes the Precision Medicine Knowledge Base (PMKB; https://pmkb.weill.cornell.edu ), an interactive online application for collaborative editing, maintenance, and sharing of structured clinical-grade cancer mutation interpretations. Materials and methods PMKB was built using the Ruby on Rails Web application framework. Leveraging existing standards such as the Human Genome Variation Society variant description format, we implemented a data model that links variants to tumor-specific and tissue-specific interpretations. Key features of PMKB include support for all major variant types, standardized authentication, distinct user roles including high-level approvers, and detailed activity history. A REpresentational State Transfer (REST) application-programming interface (API) was implemented to query the PMKB programmatically. Results At the time of writing, PMKB contains 457 variant descriptions with 281 clinical-grade interpretations. The EGFR, BRAF, KRAS, and KIT genes are associated with the largest numbers of interpretable variants. PMKB's interpretations have been used in over 1500 AmpliSeq tests and 750 whole-exome sequencing tests. The interpretations are accessed either directly via the Web interface or programmatically via the existing API. Discussion An accurate and up-to-date knowledge base of genomic alterations of clinical significance is critical to the success of precision medicine programs. The open-access, programmatically accessible PMKB represents an important attempt at creating such a resource in the field of oncology. Conclusion The PMKB was designed to help collect and maintain clinical-grade mutation interpretations and facilitate reporting for clinical cancer genomic testing. The PMKB was also designed to enable the creation of clinical cancer genomics automated reporting pipelines via an API.",PMKB,0.992120802,Precision Medicine Knowledge Base,0.967980176,PMKB,0.992120802,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2017 +23624946,http://www.proteinmodelportal.org,"The Protein Model Portal--a comprehensive resource for protein structure and model information. The Protein Model Portal (PMP) has been developed to foster effective use of 3D molecular models in biomedical research by providing convenient and comprehensive access to structural information for proteins. Both experimental structures and theoretical models for a given protein can be searched simultaneously and analyzed for structural variability. By providing a comprehensive view on structural information, PMP offers the opportunity to apply consistent assessment and validation criteria to the complete set of structural models available for proteins. PMP is an open project so that new methods developed by the community can contribute to PMP, for example, new modeling servers for creating homology models and model quality estimation servers for model validation. The accuracy of participating modeling servers is continuously evaluated by the Continuous Automated Model EvaluatiOn (CAMEO) project. The PMP offers a unique interface to visualize structural coverage of a protein combining both theoretical models and experimental structures, allowing straightforward assessment of the model quality and hence their utility. The portal is updated regularly and actively developed to include latest methods in the field of computational structural biology. Database URL: http://www.proteinmodelportal.org.",PMP,0.941905677,Protein Model,0.727932513,PMP,0.941905677,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/26/2013 +28862395,http://pmsdn.hms.harvard.edu,"Phelan-McDermid syndrome data network: Integrating patient reported outcomes with clinical notes and curated genetic reports. The heterogeneity of patient phenotype data are an impediment to the research into the origins and progression of neuropsychiatric disorders. This difficulty is compounded in the case of rare disorders such as Phelan-McDermid Syndrome (PMS) by the paucity of patient clinical data. PMS is a rare syndromic genetic cause of autism and intellectual deficiency. In this paper, we describe the Phelan-McDermid Syndrome Data Network (PMS_DN), a platform that facilitates research into phenotype-genotype correlation and progression of PMS by: a) integrating knowledge of patient phenotypes extracted from Patient Reported Outcomes (PRO) data and clinical notes-two heterogeneous, underutilized sources of knowledge about patient phenotypes-with curated genetic information from the same patient cohort and b) making this integrated knowledge, along with a suite of statistical tools, available free of charge to authorized investigators on a Web portal https://pmsdn.hms.harvard.edu. PMS_DN is a Patient Centric Outcomes Research Initiative (PCORI) where patients and their families are involved in all aspects of the management of patient data in driving research into PMS. To foster collaborative research, PMS_DN also makes patient aggregates from this knowledge available to authorized investigators using distributed research networks such as the PCORnet PopMedNet. PMS_DN is hosted on a scalable cloud based environment and complies with all patient data privacy regulations. As of October 31, 2016, PMS_DN integrates high-quality knowledge extracted from the clinical notes of 112 patients and curated genetic reports of 176 patients with preprocessed PRO data from 415 patients.",PMS_DN,0.993797079,Phelan-McDermid syndrome data network,0.926657796,PMS_DN,0.993797079,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2017 +23725466,http://pmted.agrinome.org,"PMTED: a plant microRNA target expression database. Background MicroRNAs (miRNAs) are identified in nearly all plants where they play important roles in development and stress responses by target mRNA cleavage or translation repression. MiRNAs exert their functions by sequence complementation with target genes and hence their targets can be predicted using bioinformatics algorithms. In the past two decades, microarray technology has been employed to study genes involved in important biological processes such as biotic response, abiotic response, and specific tissues and developmental stages, many of which are miRNA targets. Despite their value in assisting research work for plant biologists, miRNA target genes are difficult to access without pre-processing and assistance of necessary analytical and visualization tools because they are embedded in a large body of microarray data that are scattered around in public databases. Description Plant MiRNA Target Expression Database (PMTED) is designed to retrieve and analyze expression profiles of miRNA targets represented in the plethora of existing microarray data that are manually curated. It provides a Basic Information query function for miRNAs and their target sequences, gene ontology, and differential expression profiles. It also provides searching and browsing functions for a global Meta-network among species, bioprocesses, conditions, and miRNAs, meta-terms curated from well annotated microarray experiments. Networks are displayed through a Cytoscape Web-based graphical interface. In addition to conserved miRNAs, PMTED provides a target prediction portal for user-defined novel miRNAs and corresponding target expression profile retrieval. Hypotheses that are suggested by miRNA-target networks should provide starting points for further experimental validation. Conclusions PMTED exploits value-added microarray data to study the contextual significance of miRNA target genes and should assist functional investigation for both miRNAs and their targets. PMTED will be updated over time and is freely available for non-commercial use at http://pmted.agrinome.org.",PMTED,0.991327643,Plant MiRNA Target Expression Database,0.971420904,PMTED,0.991327643,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/3/2013 +34393586,http://www.pndb.fr).In,"Kakila database: Towards a FAIR community approved database of cetacean presence in the waters of the Guadeloupe Archipelago, based on citizen science. Background In the French West Indies, more than 20 species of cetaceans have been observed over the last decades. The recognition of this hotspot of biodiversity of marine mammals, observed in the French Exclusive Economic Zone of the West Indies, motivated the French government to create in 2010 a marine protected area (MPA) dedicated to the conservation of marine mammals: the Agoa Sanctuary. Threats that cetacean populations face are multiple, but well-documented. Cetacean conservation can only be achieved if relevant and reliable data are available, starting by occurrence data. In the Guadeloupe Archipelago and in addition to some data collected by the Agoa Sanctuary, occurrence data are mainly available through the contribution of citizen science and of local stakeholders (i.e. non-profit organisations (NPO) and whale-watchers). However, no observation network has been coordinated and no standards exist for cetacean presence data collection and management. New information In recent years, several whale watchers and NPOs regularly collected cetacean observation data around the Guadeloupe Archipelago. Our objective was to gather datasets from three Guadeloupean whale watchers, two NPOs and the Agoa Sanctuary, that agreed to share their data. These heterogeneous data went through a careful process of curation and standardisation in order to create a new extended database, using a newly-designed metadata set. This aggregated dataset contains a total of 4,704 records of 21 species collected in the Guadeloupe Archipelago from 2000 to 2019. The database was called Kakila (""who is there?"" in Guadeloupean Creole). The Kakila database was developed following the FAIR principles with the ultimate objective of ensuring sustainability. All these data were transferred into the PNDB repository (Pöle National de Données de Biodiversité, Biodiversity French Data Hub, https://www.pndb.fr).In the Agoa Sanctuary and surrounding waters, marine mammals have to interact with increasing anthropogenic pressure from growing human activities. In this context, the Kakila database fulfils the need for an organised system to structure marine mammal occurrences collected by multiple local stakeholders with a common objective: contribute to the knowledge and conservation of cetaceans living in the French Antilles waters. Much needed data analysis will enable us to identify high cetacean presence areas, to document the presence of rarer species and to determine areas of possible negative interactions with anthropogenic activities.",PNDB,0.970943987,NA,0,PNDB,0.970943987,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,7/22/2021 +25398903,http://structuralbiology.cau.edu.cn/PNRD,"PNRD: a plant non-coding RNA database. The first ncRNA found was an alanine tRNA in baker's yeast, and the first detected microRNAs (miRNAs) promoted ncRNA research to a whole new level. Research on ncRNAs in animals has focused on the medical field, while in plant scientists are more concerned with improving agronomic traits. In 2010, we constructed a plant miRNA database named PMRD to meet the demand for miRNA research in plants. To provide a way to do fundamental research on plant ncRNAs and take full advantage of tremendous public resources, we designed an updated platform called plant ncRNA database (PNRD) based on its predecessor PMRD, which is accessible at http://structuralbiology.cau.edu.cn/PNRD. We collected a total of 25739 entries of 11 different types of ncRNAs from 150 plant species. Targets of miRNAs were extended to 178138 pairs in 46 species, while the number of miRNA expression profiles reached 35. Improvements in PNRD are not only the larger amounts of data, but also better service, such as a more user-friendly interface, more multifunctional and browsing options and more background data for users to download. We also integrated currently prevalent technologies and toolkits to strengthen the capability of the database and provide a one-stop service for scientific users.",PNRD,0.991778493,plant ncRNA database,0.92003082,PNRD,0.991778493,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/14/2014 +24092884,http://podb.nibb.ac.jp/Organellome,"The Plant Organelles Database 3 (PODB3) update 2014: integrating electron micrographs and new options for plant organelle research. The Plant Organelles Database 2 (PODB2), which was first launched in 2006 as PODB, provides static image and movie data of plant organelles, protocols for plant organelle research and external links to relevant websites. PODB2 has facilitated plant organellar research and the understanding of plant organelle dynamics. To provide comprehensive information on plant organelles in more detail, PODB2 was updated to PODB3 (http://podb.nibb.ac.jp/Organellome/). PODB3 contains two additional components: the electron micrograph database and the perceptive organelles database. Through the electron micrograph database, users can examine the subcellular and/or suborganellar structures in various organs of wild-type and mutant plants. The perceptive organelles database provides information on organelle dynamics in response to external stimuli. In addition to the extra components, the user interface for access has been enhanced in PODB3. The data in PODB3 are directly submitted by plant researchers and can be freely downloaded for use in further analysis. PODB3 contains all the information included in PODB2, and the volume of data and protocols deposited in PODB3 continue to grow steadily. We welcome contributions of data from all plant researchers to enhance the utility and comprehensiveness of PODB3.",PODB,0.986917019,Plant Organelles Database,0.934489107,PODB,0.986917019,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/3/2013 +25505034,http://bioinf.mind.meiji.ac.jp/podc,"Plant Omics Data Center: an integrated web repository for interspecies gene expression networks with NLP-based curation. Comprehensive integration of large-scale omics resources such as genomes, transcriptomes and metabolomes will provide deeper insights into broader aspects of molecular biology. For better understanding of plant biology, we aim to construct a next-generation sequencing (NGS)-derived gene expression network (GEN) repository for a broad range of plant species. So far we have incorporated information about 745 high-quality mRNA sequencing (mRNA-Seq) samples from eight plant species (Arabidopsis thaliana, Oryza sativa, Solanum lycopersicum, Sorghum bicolor, Vitis vinifera, Solanum tuberosum, Medicago truncatula and Glycine max) from the public short read archive, digitally profiled the entire set of gene expression profiles, and drawn GENs by using correspondence analysis (CA) to take advantage of gene expression similarities. In order to understand the evolutionary significance of the GENs from multiple species, they were linked according to the orthology of each node (gene) among species. In addition to other gene expression information, functional annotation of the genes will facilitate biological comprehension. Currently we are improving the given gene annotations with natural language processing (NLP) techniques and manual curation. Here we introduce the current status of our analyses and the web database, PODC (Plant Omics Data Center; http://bioinf.mind.meiji.ac.jp/podc/), now open to the public, providing GENs, functional annotations and additional comprehensive omics resources.",PODC,0.992418885,Center,0.597645581,PODC,0.992418885,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/11/2014 +28966675,http://bioinfo.ahau.edu.cn/pogd,"A database for orphan genes in Poaceae. Orphan genes refer to a group of protein-coding genes lacking recognizable homologs in the other organisms. Extensive studies have demonstrated that numerous newly sequenced genomes contain a significant number of orphan genes, which have important roles in plant's responses to the environment. Due to a lack of phylogenetic conservation, the origin of orphan genes and their functions are currently not well defined. In the present study, a Poaceae orphan genes database (POGD; http://bioinfo.ahau.edu.cn/pogd) was established to serve as a user-friendly web interface for entry browsing, searching and downloading orphan genes from various plants. Four Poaceae species, including Brachypodium distachyon, Oryza sativa, Sorghum bicolor and Zea mays, are included in the current version of POGD. The database provides gene descriptions (chromosome strands, physical location), gene product records (protein length, isoelectric point, molecular weight as well as gene and protein sequences) and functional annotations (cellular role, gene ontology category, subcellular localization prediction). Basic Local Alignment Search Tool and comparative analyses were also provided on the website. POGD will serve as a comprehensive and reliable repository, which will help uncover regulatory mechanisms of orphan genes and may assist in the development of comparative genomics in plant biology.",POGD,0.997819841,Poaceae orphan genes database,0.986579394,POGD,0.997819841,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/9/2017 +24198250,http://pogo.ece.drexel.edu,"POGO-DB--a database of pairwise-comparisons of genomes and conserved orthologous genes. POGO-DB (http://pogo.ece.drexel.edu/) provides an easy platform for comparative microbial genomics. POGO-DB allows users to compare genomes using pre-computed metrics that were derived from extensive computationally intensive BLAST comparisons of >2000 microbes. These metrics include (i) average protein sequence identity across all orthologs shared by two genomes, (ii) genomic fluidity (a measure of gene content dissimilarity), (iii) number of 'orthologs' shared between two genomes, (iv) pairwise identity of the 16S ribosomal RNA genes and (v) pairwise identity of an additional 73 marker genes present in >90% prokaryotes. Users can visualize these metrics against each other in a 2D plot for exploratory analysis of genome similarity and of how different aspects of genome similarity relate to each other. The results of these comparisons are fully downloadable. In addition, users can download raw BLAST results for all or user-selected comparisons. Therefore, we provide users with full flexibility to carry out their own downstream analyses, by creating easy access to data that would normally require heavy computational resources to generate. POGO-DB should prove highly useful for researchers interested in comparative microbiology and benefit the microbiome/metagenomic communities by providing the information needed to select suitable phylogenetic marker genes within particular lineages.",POGO-DB,0.998100138,NA,0,POGO-DB,0.998100138,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/5/2013 +24340041,http://pogs.uoregon.edu,"POGs2: a web portal to facilitate cross-species inferences about protein architecture and function in plants. The Putative orthologous Groups 2 Database (POGs2) (http://pogs.uoregon.edu/) integrates information about the inferred proteomes of four plant species (Arabidopsis thaliana, Zea mays, Orza sativa, and Populus trichocarpa) in a display that facilitates comparisons among orthologs and extrapolation of annotations among species. A single-page view collates key functional data for members of each Putative Orthologous Group (POG): graphical representations of InterPro domains, predicted and established intracellular locations, and imported gene descriptions. The display incorporates POGs predicted by two different algorithms as well as gene trees, allowing users to evaluate the validity of POG memberships. The web interface provides ready access to sequences and alignments of POG members, as well as sequences, alignments, and domain architectures of closely-related paralogs. A simple and flexible search interface permits queries by BLAST and by any combination of gene identifier, keywords, domain names, InterPro identifiers, and intracellular location. The concurrent display of domain architectures for orthologous proteins highlights errors in gene models and false-negatives in domain predictions. The POGs2 layout is also useful for exploring candidate genes identified by transposon tagging, QTL mapping, map-based cloning, and proteomics, and for navigating between orthologous groups that belong to the same gene family.",POGs2,0.997185946,Putative orthologous Groups 2 Database,0.964314751,POGs2,0.997185946,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/10/2013 +33186585,http://polarprotdb.enzim.hu,"PolarProtDb: A Database of Transmembrane and Secreted Proteins showing Apical-Basal Polarity. Most cells in multicellular organisms are somehow asymmetric, polarized: maintaining separate membrane domains. Typical examples are the epithelial cells (apical-basal polarization), neurons (dendritic-axonal domains), or migratory cells (with a leading and a trailing edge). Here we present the most comprehensive database containing experimentally verified mammalian proteins that display polarized sorting or secretion, focusing on epithelial polarity. In addition to the source cells or tissues, homology-based inferences and transmembrane topology (if applicable) are all provided. PolarProtDb also offers a detailed interface displaying all information that may be relevant for trafficking: including post-translational modifications (glycosylations and phosphorylations), known or predicted short linear motifs conserved across orthologs, as well as potential interaction partners. Data on polarized sorting has so far been scattered across myriads of publications, hence difficult to access. This information can help researchers in several areas, such as scanning for potential entry points of viral agents like COVID-19. PolarProtDb shall be a useful resource to design future experiments as well as for comparative analyses. The database is available at http://polarprotdb.enzim.hu.",PolarProtDb,0.996842086,NA,0,PolarProtDb,0.996842086,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/10/2020 +21993301,http://polbase.neb.com,"Polbase: a repository of biochemical, genetic and structural information about DNA polymerases. Polbase (http://polbase.neb.com) is a freely accessible database of DNA polymerases and related references. It has been developed in a collaborative model with experts whose contributions reflect their varied backgrounds in genetics, structural biology and biochemistry. Polbase is designed to compile detailed results of polymerase experimentation, presenting them in a dynamic view to inform further research. After validation, results from references are displayed in context with relevant experimental details and are always traceable to their source publication. Polbase is connected to other resources, including PubMed, UniProt and the RCSB Protein Data Bank, to provide multi-faceted views of polymerase knowledge. In addition to a simple web interface, Polbase data is exposed for custom analysis by external software. With the contributions of many polymerase investigators, Polbase has become a powerful research tool covering most important aspects of polymerases, from sequence and structure to biochemistry.",Polbase,0.996193171,NA,0,Polbase,0.996193171,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/12/2011 +29069441,http://www.polya-db.org/v3,"PolyA_DB 3 catalogs cleavage and polyadenylation sites identified by deep sequencing in multiple genomes. PolyA_DB is a database cataloging cleavage and polyadenylation sites (PASs) in several genomes. Previous versions were based mainly on expressed sequence tags (ESTs), which had a limited amount and could lead to inaccurate PAS identification due to the presence of internal A-rich sequences in transcripts. Here, we present an updated version of the database based solely on deep sequencing data. First, PASs are mapped by the 3' region extraction and deep sequencing (3'READS) method, ensuring unequivocal PAS identification. Second, a large volume of data based on diverse biological samples increases PAS coverage by 3.5-fold over the EST-based version and provides PAS usage information. Third, strand-specific RNA-seq data are used to extend annotated 3' ends of genes to obtain more thorough annotations of alternative polyadenylation (APA) sites. Fourth, conservation information of PAS across mammals sheds light on significance of APA sites. The database (URL: http://www.polya-db.org/v3) currently holds PASs in human, mouse, rat and chicken, and has links to the UCSC genome browser for further visualization and for integration with other genomic data.",PolyA_DB,0.997724459,NA,0,PolyA_DB,0.997724459,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31617559,http://polyasite.unibas.ch,"PolyASite 2.0: a consolidated atlas of polyadenylation sites from 3' end sequencing. Generated by 3' end cleavage and polyadenylation at alternative polyadenylation (poly(A)) sites, alternative terminal exons account for much of the variation between human transcript isoforms. More than a dozen protocols have been developed so far for capturing and sequencing RNA 3' ends from a variety of cell types and species. In previous studies, we have used these data to uncover novel regulatory signals and cell type-specific isoforms. Here we present an update of the PolyASite (https://polyasite.unibas.ch) resource of poly(A) sites, constructed from publicly available human, mouse and worm 3' end sequencing datasets by enforcing uniform quality measures, including the flagging of putative internal priming sites. Through integrated processing of all data, we identified and clustered sites that are closely spaced and share polyadenylation signals, as these are likely the result of stochastic variations in processing. For each cluster, we identified the representative - most frequently processed - site and estimated the relative use in the transcriptome across all samples. We have established a modern web portal for efficient finding, exploration and export of data. Database generation is fully automated, greatly facilitating incorporation of new datasets and the updating of underlying genome resources.",PolyASite,0.985653996,NA,0,PolyASite,0.985653996,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +"22080514, 24163105",http://compbio.uthsc.edu/miRSNP,"PolymiRTS Database 2.0: linking polymorphisms in microRNA target sites with human diseases and complex traits. The polymorphism in microRNA target site (PolymiRTS) database aims to identify single-nucleotide polymorphisms (SNPs) that affect miRNA targeting in human and mouse. These polymorphisms can disrupt the regulation of gene expression by miRNAs and are candidate genetic variants responsible for transcriptional and phenotypic variation. The database is therefore organized to provide links between SNPs in miRNA target sites, cis-acting expression quantitative trait loci (eQTLs), and the results of genome-wide association studies (GWAS) of human diseases. Here, we describe new features that have been integrated in the PolymiRTS database, including: (i) polymiRTSs in genes associated with human diseases and traits in GWAS, (ii) polymorphisms in target sites that have been supported by a variety of experimental methods and (iii) polymorphisms in miRNA seed regions. A large number of newly identified microRNAs and SNPs, recently published mouse phenotypes, and human and mouse eQTLs have also been integrated into the database. The PolymiRTS database is available at http://compbio.uthsc.edu/miRSNP/.",PolymiRTS,0.997374415,polymorphism in microRNA target site,0.918009511,PolymiRTS,0.997374415,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/24/2013 +26980520,http://lightning.med.monash.edu/polyq2,"PolyQ 2.0: an improved version of PolyQ, a database of human polyglutamine proteins. . Proteins with expanded polyglutamine (polyQ) repeats are involved in human neurodegenerative diseases, via a gain-of-function mechanism of neuronal toxicity involving protein conformational changes that result in the formation and deposition of β-sheet-rich aggregates. Aggregation is dependent on the context and properties of the host protein, such as domain context and location of the repeat tract. In order to explore this relationship in greater detail, here we describe PolyQ 2.0, an updated database that provides a comprehensive knowledgebase for human polyQ proteins. Compared with the previous PolyQ database, our new database provides a variety of substantial updates including detailed biological annotations and search options. Biological annotations in terms of domain context information, protein structural and functional annotation, single point mutations, predicted disordered regions, protein-protein interaction partners, metabolic/signaling pathways, post-translational modification sites and evolutionary information are made available. Several new database functionalities have also been provided, including search using multiple/combinatory keywords, and submission of new data entries. Also, several third-party plug-ins are employed to enhance data visualization in PolyQ 2.0. In PolyQ 2.0 the proteins are reclassified into 3 new categories and contain 9 reviewed disease-associated polyQ proteins, 105 reviewed non-disease polyQ proteins and 146 un-reviewed polyQ proteins (reviewed by UniProt curators). We envisage that this updated database will be a useful resource for functional and structural investigation of human polyQ proteins. Database URL: http://lightning.med.monash.edu/polyq2/.",PolyQ,0.976776481,NA,0,PolyQ,0.976776481,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/15/2016 +23151233,http://polysac3db.cermav.cnrs.fr,"PolySac3DB: an annotated data base of 3 dimensional structures of polysaccharides. Background Polysaccharides are ubiquitously present in the living world. Their structural versatility makes them important and interesting components in numerous biological and technological processes ranging from structural stabilization to a variety of immunologically important molecular recognition events. The knowledge of polysaccharide three-dimensional (3D) structure is important in studying carbohydrate-mediated host-pathogen interactions, interactions with other bio-macromolecules, drug design and vaccine development as well as material science applications or production of bio-ethanol. Description PolySac3DB is an annotated database that contains the 3D structural information of 157 polysaccharide entries that have been collected from an extensive screening of scientific literature. They have been systematically organized using standard names in the field of carbohydrate research into 18 categories representing polysaccharide families. Structure-related information includes the saccharides making up the repeat unit(s) and their glycosidic linkages, the expanded 3D representation of the repeat unit, unit cell dimensions and space group, helix type, diffraction diagram(s) (when applicable), experimental and/or simulation methods used for structure description, link to the abstract of the publication, reference and the atomic coordinate files for visualization and download. The database is accompanied by a user-friendly graphical user interface (GUI). It features interactive displays of polysaccharide structures and customized search options for beginners and experts, respectively. The site also serves as an information portal for polysaccharide structure determination techniques. The web-interface also references external links where other carbohydrate-related resources are available. Conclusion PolySac3DB is established to maintain information on the detailed 3D structures of polysaccharides. All the data and features are available via the web-interface utilizing the search engine and can be accessed at http://polysac3db.cermav.cnrs.fr.",PolySac3DB,0.961255148,NA,0,PolySac3DB,0.961255148,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/14/2012 +22509333,http://crdd.osdd.net/raghava/polysacdb,"PolysacDB: a database of microbial polysaccharide antigens and their antibodies. Vaccines based on microbial cell surface polysaccharides have long been considered as attractive means to control infectious diseases. To realize this goal, detailed systematic information about the antigenic polysaccharide is necessary. However, only a few databases that provide limited knowledge in this area are available. This paper describes PolysacDB, a manually curated database of antigenic polysaccharides. We collected and compiled comprehensive information from literature and web resources about antigenic polysaccharides of microbial origin. The current version of the database has 1,554 entries of 149 different antigenic polysaccharides from 347 different microbes. Each entry provides comprehensive information about an antigenic polysaccharide, i.e., its origin, function, protocols for its conjugation to carriers, antibodies produced, details of assay systems, specificities of antibodies, proposed epitopes involved and antibody utilities. For convenience to the user, we have integrated web interface for searching, advanced searching and browsing data in database. This database will be useful for researchers working on polysaccharide-based vaccines. It is freely available from the URL: http://crdd.osdd.net/raghava/polysacdb/.",PolysacDB,0.988787591,NA,0,PolysacDB,0.988787591,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/11/2012 +24855436,http://polytraits.lifewatchgreece.eu,"Polytraits: A database on biological traits of marine polychaetes. The study of ecosystem functioning - the role which organisms play in an ecosystem - is becoming increasingly important in marine ecological research. The functional structure of a community can be represented by a set of functional traits assigned to behavioural, reproductive and morphological characteristics. The collection of these traits from the literature is however a laborious and time-consuming process, and gaps of knowledge and restricted availability of literature are a common problem. Trait data are not yet readily being shared by research communities, and even if they are, a lack of trait data repositories and standards for data formats leads to the publication of trait information in forms which cannot be processed by computers. This paper describes Polytraits (http://polytraits.lifewatchgreece.eu), a database on biological traits of marine polychaetes (bristle worms, Polychaeta: Annelida). At present, the database contains almost 20,000 records on morphological, behavioural and reproductive characteristics of more than 1,000 marine polychaete species, all referenced by literature sources. All data can be freely accessed through the project website in different ways and formats, both human-readable and machine-readable, and have been submitted to the Encyclopedia of Life for archival and integration with trait information from other sources.",Polytraits,0.996589065,NA,0,Polytraits,0.996589065,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/17/2014 +25361970,http://www.pombase.org,"PomBase 2015: updates to the fission yeast database. PomBase (http://www.pombase.org) is the model organism database for the fission yeast Schizosaccharomyces pombe. PomBase provides a central hub for the fission yeast community, supporting both exploratory and hypothesis-driven research. It provides users easy access to data ranging from the sequence level, to molecular and phenotypic annotations, through to the display of genome-wide high-throughput studies. Recent improvements to the site extend annotation specificity, improve usability and allow for monthly data updates. Both in-house curators and community researchers provide manually curated data to PomBase. The genome browser provides access to published high-throughput data sets and the genomes of three additional Schizosaccharomyces species (Schizosaccharomyces cryophilus, Schizosaccharomyces japonicus and Schizosaccharomyces octosporus).",PomBase,0.994939148,NA,0,PomBase,0.994939148,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2014 +33560568,http://www.pompevariantdatabase.nl,"Update of the Pompe variant database for the prediction of clinical phenotypes: Novel disease-associated variants, common sequence variants, and results from newborn screening. Pompe disease is an inherited disorder caused by disease-associated variants in the acid α-glucosidase gene (GAA). The Pompe disease GAA variant database (http://www.pompevariantdatabase.nl) is a curated, open-source, disease-specific database, and lists disease-associated GAA variants, in silico predictions, and clinical phenotypes reported until 2016. Here, we provide an update to include 226 disease-associated variants that were published until 2020. We also listed 148 common GAA sequence variants that do not cause Pompe disease. GAA variants with unknown severity that were identified only in newborn screening programs were listed as a new feature to indicate the reason why phenotypes were still unknown. Expression studies were performed for common missense variants to predict their severity. The updated Pompe disease GAA variant database now includes 648 disease-associated variants, 26 variants from newborn screening, and 237 variants with unknown severity. Regular updates of the Pompe disease GAA variant database will be required to improve genetic counseling and the study of genotype-phenotype relationships.",NA,0,Pompe,0.462291837,Pompe,0.462291837,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,12/21/2020 +30335169,http://pophumanscan.uab.cat,"PopHumanScan: the online catalog of human genome adaptation. Since the migrations that led humans to colonize Earth, our species has faced frequent adaptive challenges that have left signatures in the landscape of genetic variation and that we can identify in our today's genomes. Here, we (i) perform an outlier approach on eight different population genetic statistics for 22 non-admixed human populations of the Phase III of the 1000 Genomes Project to detect selective sweeps at different historical ages, as well as events of recurrent positive selection in the human lineage; and (ii) create PopHumanScan, an online catalog that compiles and annotates all candidate regions under selection to facilitate their validation and thoroughly analysis. Well-known examples of human genetic adaptation published elsewhere are included in the catalog, as well as hundreds of other attractive candidates that will require further investigation. Designed as a collaborative database, PopHumanScan aims to become a central repository to share information, guide future studies and help advance our understanding of how selection has modeled our genomes as a response to changes in the environment or lifestyle of human populations. PopHumanScan is open and freely available at https://pophumanscan.uab.cat.",PopHumanScan,0.995905161,NA,0,PopHumanScan,0.995905161,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +27515999,"http://bioinformatics.caf.ac.cn/PoplarGene, http://124.127.201.25/PoplarGene","PoplarGene: poplar gene network and resource for mining functional information for genes from woody plants. Poplar is not only an important resource for the production of paper, timber and other wood-based products, but it has also emerged as an ideal model system for studying woody plants. To better understand the biological processes underlying various traits in poplar, e.g., wood development, a comprehensive functional gene interaction network is highly needed. Here, we constructed a genome-wide functional gene network for poplar (covering ~70% of the 41,335 poplar genes) and created the network web service PoplarGene, offering comprehensive functional interactions and extensive poplar gene functional annotations. PoplarGene incorporates two network-based gene prioritization algorithms, neighborhood-based prioritization and context-based prioritization, which can be used to perform gene prioritization in a complementary manner. Furthermore, the co-functional information in PoplarGene can be applied to other woody plant proteomes with high efficiency via orthology transfer. In addition to poplar gene sequences, the webserver also accepts Arabidopsis reference gene as input to guide the search for novel candidate functional genes in PoplarGene. We believe that PoplarGene (http://bioinformatics.caf.ac.cn/PoplarGene and http://124.127.201.25/PoplarGene) will greatly benefit the research community, facilitating studies of poplar and other woody plants.",PoplarGene,0.995907903,NA,0,PoplarGene,0.995907903,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/12/2016 +21366916,http://www.popoolation.at/pgt,"PoPoolation DB: a user-friendly web-based database for the retrieval of natural polymorphisms in Drosophila. Background The enormous potential of natural variation for the functional characterization of genes has been neglected for a long time. Only since recently, functional geneticists are starting to account for natural variation in their analyses. With the new sequencing technologies it has become feasible to collect sequence information for multiple individuals on a genomic scale. In particular sequencing pooled DNA samples has been shown to provide a cost-effective approach for characterizing variation in natural populations. While a range of software tools have been developed for mapping these reads onto a reference genome and extracting SNPs, linking this information to population genetic estimators and functional information still poses a major challenge to many researchers. Results We developed PoPoolation DB a user-friendly integrated database. Popoolation DB links variation in natural populations with functional information, allowing a wide range of researchers to take advantage of population genetic data. PoPoolation DB provides the user with population genetic parameters (Watterson's θ or Tajima's π), Tajima's D, SNPs, allele frequencies and indels in regions of interest. The database can be queried by gene name, chromosomal position, or a user-provided query sequence or GTF file. We anticipate that PoPoolation DB will be a highly versatile tool for functional geneticists as well as evolutionary biologists. Conclusions PoPoolation DB, available at http://www.popoolation.at/pgt, provides an integrated platform for researchers to investigate natural polymorphism and associated functional annotations from UCSC and Flybase genome browsers, population genetic estimators and RNA-seq information.",PoPoolation,0.903602421,NA,0,PoPoolation,0.903602421,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/2/2011 +28830355,http://www.ars.usda.gov/Services/docs.htm?docid=6065,"The porcine translational research database: a manually curated, genomics and proteomics-based research resource. Background The use of swine in biomedical research has increased dramatically in the last decade. Diverse genomic- and proteomic databases have been developed to facilitate research using human and rodent models. Current porcine gene databases, however, lack the robust annotation to study pig models that are relevant to human studies and for comparative evaluation with rodent models. Furthermore, they contain a significant number of errors due to their primary reliance on machine-based annotation. To address these deficiencies, a comprehensive literature-based survey was conducted to identify certain selected genes that have demonstrated function in humans, mice or pigs. Results The process identified 13,054 candidate human, bovine, mouse or rat genes/proteins used to select potential porcine homologs by searching multiple online sources of porcine gene information. The data in the Porcine Translational Research Database (( http://www.ars.usda.gov/Services/docs.htm?docid=6065 ) is supported by >5800 references, and contains 65 data fields for each entry, including >9700 full length (5' and 3') unambiguous pig sequences, >2400 real time PCR assays and reactivity information on >1700 antibodies. It also contains gene and/or protein expression data for >2200 genes and identifies and corrects 8187 errors (gene duplications artifacts, mis-assemblies, mis-annotations, and incorrect species assignments) for 5337 porcine genes. Conclusions This database is the largest manually curated database for any single veterinary species and is unique among porcine gene databases in regard to linking gene expression to gene function, identifying related gene pathways, and connecting data with other porcine gene databases. This database provides the first comprehensive description of three major Super-families or functionally related groups of proteins (Cluster of Differentiation (CD) Marker genes, Solute Carrier Superfamily, ATP binding Cassette Superfamily), and a comparative description of porcine microRNAs.",NA,0,porcine translational research database,0.834548388,porcine translational research database,0.834548388,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/22/2017 +21472892,http://www.lovd.nl/porcn,"Mutation update for the PORCN gene. Mutations in the PORCN gene were first identified in Goltz-Gorlin syndrome patients in 2007. Since then, several reports have been published describing a large variety of genetic defects resulting in the Goltz-Gorlin syndrome, and mutations or deletions were also reported in angioma serpiginosum, the pentalogy of Cantrell and Limb-Body Wall Complex. Here we present a review of the published mutations in the PORCN gene to date and report on seven new mutations together with the corresponding clinical data. Based on the review we have created a Web-based locus-specific database that lists all identified variants and allows the inclusion of future reports. The database is based on the Leiden Open (source) Variation Database (LOVD) software, and is accessible online at http://www.lovd.nl/porcn. At present, the database contains 106 variants, representing 68 different mutations, scattered along the whole coding sequence of the PORCN gene, and 12 large gene rearrangements, which brings up to 80 the number of unique mutations identified in Goltz-Gorlin syndrome patients.",PORCN,0.596062005,NA,0,PORCN,0.596062005,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/21/2011 +24285306,http://porteco.org,"PortEco: a resource for exploring bacterial biology through high-throughput data and analysis tools. PortEco (http://porteco.org) aims to collect, curate and provide data and analysis tools to support basic biological research in Escherichia coli (and eventually other bacterial systems). PortEco is implemented as a 'virtual' model organism database that provides a single unified interface to the user, while integrating information from a variety of sources. The main focus of PortEco is to enable broad use of the growing number of high-throughput experiments available for E. coli, and to leverage community annotation through the EcoliWiki and GONUTS systems. Currently, PortEco includes curated data from hundreds of genome-wide RNA expression studies, from high-throughput phenotyping of single-gene knockouts under hundreds of annotated conditions, from chromatin immunoprecipitation experiments for tens of different DNA-binding factors and from ribosome profiling experiments that yield insights into protein expression. Conditions have been annotated with a consistent vocabulary, and data have been consistently normalized to enable users to find, compare and interpret relevant experiments. PortEco includes tools for data analysis, including clustering, enrichment analysis and exploration via genome browsers. PortEco search and data analysis tools are extensively linked to the curated gene, metabolic pathway and regulation content at its sister site, EcoCyc.",PortEco,0.997819364,NA,0,PortEco,0.997819364,1,22064863,NA,NA,NA,do not merge,NA,NA,NA,NA,11/26/2013 +25404129,"http://possum.cbrc.jp/PoSSuM/, http://possum.cbrc.jp/PoSSuM/drug_search","PoSSuM v.2.0: data update and a new function for investigating ligand analogs and target proteins of small-molecule drugs. PoSSuM (http://possum.cbrc.jp/PoSSuM/) is a database for detecting similar small-molecule binding sites on proteins. Since its initial release in 2011, PoSSuM has grown to provide information related to 49 million pairs of similar binding sites discovered among 5.5 million known and putative binding sites. This enlargement of the database is expected to enhance opportunities for biological and pharmaceutical applications, such as predictions of new functions and drug discovery. In this release, we have provided a new service named PoSSuM drug search (PoSSuMds) at http://possum.cbrc.jp/PoSSuM/drug_search/, in which we selected 194 approved drug compounds retrieved from ChEMBL, and detected their known binding pockets and pockets that are similar to them. Users can access and download all of the search results via a new web interface, which is useful for finding ligand analogs as well as potential target proteins. Furthermore, PoSSuMds enables users to explore the binding pocket universe within PoSSuM. Additionally, we have improved the web interface with new functions, including sortable tables and a viewer for visualizing and downloading superimposed pockets.",PoSSuM,0.997448564,PoSSuM drug search,0.722460806,PoSSuM,0.997448564,1,NA,22135290,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/17/2014 +22135290,http://possum.cbrc.jp/PoSSuM,"PoSSuM: a database of similar protein-ligand binding and putative pockets. Numerous potential ligand-binding sites are available today, along with hundreds of thousands of known binding sites observed in the PDB. Exhaustive similarity search for such vastly numerous binding site pairs is useful to predict protein functions and to enable rapid screening of target proteins for drug design. Existing databases of ligand-binding sites offer databases of limited scale. For example, SitesBase covers only ~33,000 known binding sites. Inferring protein function and drug discovery purposes, however, demands a much more comprehensive database including known and putative-binding sites. Using a novel algorithm, we conducted a large-scale all-pairs similarity search for 1.8 million known and potential binding sites in the PDB, and discovered over 14 million similar pairs of binding sites. Here, we present the results as a relational database Pocket Similarity Search using Multiple-sketches (PoSSuM) including all the discovered pairs with annotations of various types. PoSSuM enables rapid exploration of similar binding sites among structures with different global folds as well as similar ones. Moreover, PoSSuM is useful for predicting the binding ligand for unbound structures, which provides important clues for characterizing protein structures with unclear functions. The PoSSuM database is freely available at http://possum.cbrc.jp/PoSSuM/.",PoSSuM,0.947280467,NA,0,PoSSuM,0.947280467,1,NA,25404129,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,12/1/2011 +28053162,http://POSTAR.ncrnalab.org,"POSTAR: a platform for exploring post-transcriptional regulation coordinated by RNA-binding proteins. We present POSTAR (http://POSTAR.ncrnalab.org), a resource of POST-trAnscriptional Regulation coordinated by RNA-binding proteins (RBPs). Precise characterization of post-transcriptional regulatory maps has accelerated dramatically in the past few years. Based on new studies and resources, POSTAR supplies the largest collection of experimentally probed (∼23 million) and computationally predicted (approximately 117 million) RBP binding sites in the human and mouse transcriptomes. POSTAR annotates every transcript and its RBP binding sites using extensive information regarding various molecular regulatory events (e.g., splicing, editing, and modification), RNA secondary structures, disease-associated variants, and gene expression and function. Moreover, POSTAR provides a friendly, multi-mode, integrated search interface, which helps users to connect multiple RBP binding sites with post-transcriptional regulatory events, phenotypes, and diseases. Based on our platform, we were able to obtain novel insights into post-transcriptional regulation, such as the putative association between CPSF6 binding, RNA structural domains, and Li-Fraumeni syndrome SNPs. In summary, POSTAR represents an early effort to systematically annotate post-transcriptional regulatory maps and explore the putative roles of RBPs in human diseases.",POSTAR,0.967771292,NA,0,POSTAR,0.967771292,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/5/2016 +30239819,http://lulab.life.tsinghua.edu.cn/postar,"POSTAR2: deciphering the post-transcriptional regulatory logics. Post-transcriptional regulation of RNAs is critical to the diverse range of cellular processes. The volume of functional genomic data focusing on post-transcriptional regulation logics continues to grow in recent years. In the current database version, POSTAR2 (http://lulab.life.tsinghua.edu.cn/postar), we included the following new features and data: updated ∼500 CLIP-seq datasets (∼1200 CLIP-seq datasets in total) from six species, including human, mouse, fly, worm, Arabidopsis and yeast; added a new module 'Translatome', which is derived from Ribo-seq datasets and contains ∼36 million open reading frames (ORFs) in the genomes from the six species; updated and unified post-transcriptional regulation and variation data. Finally, we improved web interfaces for searching and visualizing protein-RNA interactions with multi-layer information. Meanwhile, we also merged our CLIPdb database into POSTAR2. POSTAR2 will help researchers investigate the post-transcriptional regulatory logics coordinated by RNA-binding proteins and translational landscape of cellular RNAs.",POSTAR2,0.963751674,NA,0,POSTAR2,0.963751674,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +24304897,http://www.plasmaproteomedatabase.org,"Plasma Proteome Database as a resource for proteomics research: 2014 update. Plasma Proteome Database (PPD; http://www.plasmaproteomedatabase.org/) was initially described in the year 2005 as a part of Human Proteome Organization's (HUPO's) pilot initiative on Human Plasma Proteome Project. Since then, improvements in proteomic technologies and increased throughput have led to identification of a large number of novel plasma proteins. To keep up with this increase in data, we have significantly enriched the proteomic information in PPD. This database currently contains information on 10,546 proteins detected in serum/plasma of which 3784 have been reported in two or more studies. The latest version of the database also incorporates mass spectrometry-derived data including experimentally verified proteotypic peptides used for multiple reaction monitoring assays. Other novel features include published plasma/serum concentrations for 1278 proteins along with a separate category of plasma-derived extracellular vesicle proteins. As plasma proteins have become a major thrust in the field of biomarkers, we have enabled a batch-based query designated Plasma Proteome Explorer, which will permit the users in screening a list of proteins or peptides against known plasma proteins to assess novelty of their data set. We believe that PPD will facilitate both clinical and basic research by serving as a comprehensive reference of plasma proteins in humans and accelerate biomarker discovery and translation efforts.",PPD,0.996921718,Plasma Proteome Database,0.985280633,PPD,0.996921718,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/3/2013 +27987179,http://ppdb.agr.gifu-u.ac.jp,"Plant Promoter Database (PPDB). ppdb ( http://ppdb.agr.gifu-u.ac.jp ) is a web-based plant promoter database that provides promoter information of each gene in genomes of Arabidopsis, rice, poplar, and Physcomitrella patens. In this database, recognition of a promoter structure is achieved by annotating genome sequences with our sequence lists of bioinformatically identified octamers for core promoter structure (TATA boxes, Initiators, Y Patches, GA and CA Elements) and regulatory element groups (REGs), together with information of transcription start sites (TSSs) that have been experimentally identified. Our promoter elements are octamer sequences that show strongly biased localization profiles in the promoter region, extracted by the local distribution of short sequence (LDSS) analysis. In addition, REGs are linked with the information of the PLACE database and also with their physiological roles that are predicted using large-scale gene expression data.",PPDB,0.988334405,Plant Promoter Database,0.942410922,PPDB,0.988334405,1,24194597,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2017 +24194597,http://ppdb.agr.gifu-u.ac.jp,"ppdb: plant promoter database version 3.0. ppdb (http://ppdb.agr.gifu-u.ac.jp) is a plant promoter database that provides information on transcription start sites (TSSs), core promoter structure (TATA boxes, Initiators, Y Patches, GA and CA elements) and regulatory element groups (REGs) as putative and comprehensive transcriptional regulatory elements. Since the last report in this journal, the database has been updated in three areas to version 3.0. First, new genomes have been included in the database, and now ppdb provides information on Arabidopsis thaliana, rice, Physcomitrella patens and poplar. Second, new TSS tag data (34 million) from A. thaliana, determined by a high throughput sequencer, has been added to give a ∼200-fold increase in TSS data compared with version 1.0. This results in a much higher coverage of ∼27,000 A. thaliana genes and finer positioning of promoters even for genes with low expression levels. Third, microarray data-based predictions have been appended as REG annotations which inform their putative physiological roles.",ppdb,0.985931218,plant promoter database,0.84149456,ppdb,0.985931218,1,27987179,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/4/2013 +30804701,http://ppeao.ird.fr,"The PPEAO experimental fishing dataset: Fish from West African estuaries, lagoons and reservoirs. Background This paper describes a dataset of fish, crustacean and mollusc occurrences extracted from the ""Experimental Fishing"" section of the IRD's PPEAO information system. PPEAO stands for ""Fish communities and artisanal fisheries of West African estuarine, lagoon and freshwater ecosystems"". This database contains information collected using two different methods: experimental fishing and surveys of the artisanal fisheries that exploit these ecosystems. The database is accessible at http://ppeao.ird.fr. New information The current dataset is available on GBIF.org at 10.15468/ra4voa. It comprises the occurrences of 314 fish, crustacean and mollusc taxa collected in experimental sampling surveys of different aquatic ecosystems in West Africa between 1979 and 2013. Different types of fishing gear were used including purse seines, gill nets and fyke nets. The taxa were identified by IRD scientists or by scientific partners well trained in systematics. Most taxa were identified at species level (97% of cases). This dataset is the result of 213 fishing surveys, 5,362 fishing hauls and 31,709 occurrences (28,428 of fish taxa and 3,281 of crustaceans and molluscs). The number of individuals per species and per haul is included and 80% of occurrences are geolocated.",PPEAO,0.983579457,NA,0,PPEAO,0.983579457,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/14/2019 +30217145,http://ciliates.ihb.ac.cn/database/home,"Pseudocohnilembus persalinus genome database - the first genome database of facultative scuticociliatosis pathogens. Background Pseudocohnilembus persalinus, a unicellular ciliated protozoan, is one of commonest facultative pathogens. We sequenced the macronuclear genome of P. persalinus in 2015, which provided new insights into its pathogenicity. Results Here, we present the P. persalinus genome database (PPGD) ( http://ciliates.ihb.ac.cn/database/home/#pp ), the first genome database for the scuticociliatosis pathogens. PPGD integrates P. persalinus macronuclear genomic and transcriptomic data, including genome sequence, transcript, gene expression data, and gene annotation, as well as relevant information on its biology, morphology and taxonomy. The database also provides functions for visualizing, analyzing, and downloading the data. Conclusion PPGD is a useful resource for studying scuticociliates or scuticociliatosis. We will continue to update the PPGD by integrating more data and aim to integrate the PPGD with other ciliate databases to build a comprehensive ciliate genome database.",PPGD,0.992584169,persalinus genome database,0.981605089,PPGD,0.992584169,1,29351734,NA,NA,NA,do not merge,NA,NA,NA,NA,9/14/2018 +27551106,http://biodev.cea.fr/interevol/ppi4dock,"PPI4DOCK: large scale assessment of the use of homology models in free docking over more than 1000 realistic targets. Motivation Protein-protein docking methods are of great importance for understanding interactomes at the structural level. It has become increasingly appealing to use not only experimental structures but also homology models of unbound subunits as input for docking simulations. So far we are missing a large scale assessment of the success of rigid-body free docking methods on homology models. Results We explored how we could benefit from comparative modelling of unbound subunits to expand docking benchmark datasets. Starting from a collection of 3157 non-redundant, high X-ray resolution heterodimers, we developed the PPI4DOCK benchmark containing 1417 docking targets based on unbound homology models. Rigid-body docking by Zdock showed that for 1208 cases (85.2%), at least one correct decoy was generated, emphasizing the efficiency of rigid-body docking in generating correct assemblies. Overall, the PPI4DOCK benchmark contains a large set of realistic cases and provides new ground for assessing docking and scoring methodologies. Availability and implementation Benchmark sets can be downloaded from http://biodev.cea.fr/interevol/ppi4dock/ CONTACT: guerois@cea.frSupplementary information: Supplementary data are available at Bioinformatics online.",PPI4DOCK,0.968132639,NA,0,PPI4DOCK,0.968132639,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/22/2016 +26620522,http://comp-sysbio.org/ppim,"PPIM: A Protein-Protein Interaction Database for Maize. Maize (Zea mays) is one of the most important crops worldwide. To understand the biological processes underlying various traits of the crop (e.g. yield and response to stress), a detailed protein-protein interaction (PPI) network is highly demanded. Unfortunately, there are very few such PPIs available in the literature. Therefore, in this work, we present the Protein-Protein Interaction Database for Maize (PPIM), which covers 2,762,560 interactions among 14,000 proteins. The PPIM contains not only accurately predicted PPIs but also those molecular interactions collected from the literature. The database is freely available at http://comp-sysbio.org/ppim with a user-friendly powerful interface. We believe that the PPIM resource can help biologists better understand the maize crop.",PPIM,0.989891842,Protein-Protein Interaction Database for Maize,0.972055906,PPIM,0.989891842,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/30/2015 +30127348,http://www.cup.edu.in:99/ppins/home.php,"PPInS: a repository of protein-protein interaction sitesbase. Protein-Protein Interaction Sitesbase (PPInS), a high-performance database of protein-protein interacting interfaces, is presented. The atomic level information of the molecular interaction happening amongst various protein chains in protein-protein complexes (as reported in the Protein Data Bank [PDB]) together with their evolutionary information in Structural Classification of Proteins (SCOPe release 2.06), is made available in PPInS. Total 32468 PDB files representing X-ray crystallized multimeric protein-protein complexes with structural resolution better than 2.5 Å had been shortlisted to demarcate the protein-protein interaction interfaces (PPIIs). A total of 111857 PPIIs with ~32.24 million atomic contact pairs (ACPs) were generated and made available on a web server for on-site analysis and downloading purpose. All these PPIIs and protein-protein interacting patches (PPIPs) involved in them, were also analyzed in terms of a number of residues contributing in patch formation, their hydrophobic nature, amount of surface area they contributed in binding, and their homo and heterodimeric nature, to describe the diversity of information covered in PPInS. It was observed that 42.37% of total PPIPs were made up of 6-20 interacting residues, 53.08% PPIPs had interface area ≤1000 Å2 in PPII formation, 82.64% PPIPs were reported with hydrophobicity score of ≤10, and 73.26% PPIPs were homologous to each other with the sequence similarity score ranging from 75-100%. A subset ""Non-Redundant Database (NRDB)"" of the PPInS containing 2265 PPIIs, with over 1.8 million ACPs corresponding to the 1931 protein-protein complexes (PDBs), was also designed by removing structural redundancies at the level of SCOP superfamily (SCOP release 1.75). The web interface of the PPInS ( http://www.cup.edu.in:99/ppins/home.php ) offers an easy-to-navigate, intuitive and user-friendly environment, and can be accessed by providing PDB ID, SCOP superfamily ID, and protein sequence.",PPInS,0.995528519,Protein-Protein Interaction Sitesbase,0.959092394,PPInS,0.995528519,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/20/2018 +21786137,http://protein.cau.edu.cn/ppira,"Prediction of protein-protein interactions between Ralstonia solanacearum and Arabidopsis thaliana. Ralstonia solanacearum is a devastating bacterial pathogen that has an unusually wide host range. R. solanacearum, together with Arabidopsis thaliana, has become a model system for studying the molecular basis of plant-pathogen interactions. Protein-protein interactions (PPIs) play a critical role in the infection process, and some PPIs can initiate a plant defense response. However, experimental investigations have rarely addressed such PPIs. Using two computational methods, the interolog and the domain-based methods, we predicted 3,074 potential PPIs between 119 R. solanacearum and 1,442 A. thaliana proteins. Interestingly, we found that the potential pathogen-targeted proteins are more important in the A. thaliana PPI network. To facilitate further studies, all predicted PPI data were compiled into a database server called PPIRA (http://protein.cau.edu.cn/ppira/). We hope that our work will provide new insights for future research addressing the pathogenesis of R. solanacearum.",PPIRA,0.925762713,NA,0,PPIRA,0.925762713,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/24/2011 +34716373,http://ppmdb.easyomics.org,"PlantPathMarks (PPMdb): an interactive hub for pathways-based markers in plant genomes. ABSTRACT: Over the past decade, the problem of finding an efficient gene-targeting marker set or signature for plant trait characterization has remained challenging. Many databases focusing on pathway mining have been released with one major deficiency, as they lack to develop marker sets that target only genes controlling a specific pathway or certain biological process. Herein, we present the PlantPathMarks database (PPMdb) as a comprehensive, web-based, user-friendly, and interactive hub for pathway-based markers in plant genomes. Based on our newly developed pathway gene set mining approach, two novel pathway-based marker systems called pathway gene-targeted markers (PGTMs) and pathway microsatellite-targeted markers (PMTMs) were developed as a novel class of annotation-based markers. In the PPMdb database, 2,690,742 pathway-based markers reflecting 9,894 marker panels were developed across 82 plant genomes. The markers include 691,555 PGTMs and 1,999,187 PMTMs. Across these genomes, 165,378 enzyme-coding genes were mapped against 126 KEGG reference pathway maps. PPMdb is furnished with three interactive visualization tools (Map Browse, JBrowse and Species Comparison) to visualize, map, and compare the developed markers over their KEGG reference pathway maps. All the stored marker panels can be freely downloaded. PPMdb promises to create a radical shift in the paradigm of the area of molecular marker research. The use of PPMdb as a mega-tool represents an impediment for non-bioinformatician plant scientists and breeders. PPMdb is freely available at http://ppmdb.easyomics.org .",PPMdb,0.994303644,PlantPathMarks,0.944572687,PPMdb,0.994303644,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/29/2021 +31337335,http://pptdb.cgu.edu.tw,"Pathogenic Protist Transmembranome database (PPTdb): a web-based platform for searching and analysis of protist transmembrane proteins. Background Pathogenic protist membrane transporter proteins play important roles not only in exchanging molecules into and out of cells but also in acquiring nutrients and biosynthetic compounds from their hosts. Currently, there is no centralized protist membrane transporter database published, which makes system-wide comparisons and studies of host-pathogen membranomes difficult to achieve. Results We analyzed over one million protein sequences from 139 protists with full or partial genome sequences. Putative transmembrane proteins were annotated by primary sequence alignments, conserved secondary structural elements, and functional domains. We have constructed the PPTdb (Pathogenic Protist Transmembranome database), a comprehensive membrane transporter protein portal for pathogenic protists and their human hosts. The PPTdb is a web-based database with a user-friendly searching and data querying interface, including hierarchical transporter classification (TC) numbers, protein sequences, functional annotations, conserved functional domains, batch sequence retrieving and downloads. The PPTdb also serves as an analytical platform to provide useful comparison/mining tools, including transmembrane ability evaluation, annotation of unknown proteins, informative visualization charts, and iterative functional mining of host-pathogen transporter proteins. Conclusions The PPTdb collected putative protist transporter proteins and offers a user-friendly data retrieving interface. Moreover, a pairwise functional comparison ability can provide useful information for identifying functional uniqueness of each protist. Finally, the host and non-host protein similarity search can fulfill the needs of comprehensive studies of protists and their hosts. The PPTdb is freely accessible at http://pptdb.cgu.edu.tw .",PPTdb,0.99392122,Pathogenic Protist Transmembranome database,0.98263188,PPTdb,0.99392122,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/24/2019 +28651001,http://14.139.229.199/PpTFDB/Home.aspx,"PpTFDB: A pigeonpea transcription factor database for exploring functional genomics in legumes. Pigeonpea (Cajanus cajan L.), a diploid legume crop, is a member of the tribe Phaseoleae. This tribe is descended from the millettioid (tropical) clade of the subfamily Papilionoideae, which includes many important legume crop species such as soybean (Glycine max), mung bean (Vigna radiata), cowpea (Vigna ungiculata), and common bean (Phaseolus vulgaris). It plays major role in food and nutritional security, being rich source of proteins, minerals and vitamins. We have developed a comprehensive Pigeonpea Transcription Factors Database (PpTFDB) that encompasses information about 1829 putative transcription factors (TFs) and their 55 TF families. PpTFDB provides a comprehensive information about each of the identified TFs that includes chromosomal location, protein physicochemical properties, sequence data, protein functional annotation, simple sequence repeats (SSRs) with primers derived from their motifs, orthology with related legume crops, and gene ontology (GO) assignment to respective TFs. (PpTFDB: http://14.139.229.199/PpTFDB/Home.aspx) is a freely available and user friendly web resource that facilitates users to retrieve the information of individual members of a TF family through a set of query interfaces including TF ID or protein functional annotation. In addition, users can also get the information by browsing interfaces, which include browsing by TF Categories and by, GO Categories. This PpTFDB will serve as a promising central resource for researchers as well as breeders who are working towards crop improvement of legume crops.",PpTFDB,0.990379,Pigeonpea Transcription Factors Database,0.94807246,PpTFDB,0.990379,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/26/2017 +23193267,http://ssu-rrna.org,"The Protist Ribosomal Reference database (PR2): a catalog of unicellular eukaryote small sub-unit rRNA sequences with curated taxonomy. The interrogation of genetic markers in environmental meta-barcoding studies is currently seriously hindered by the lack of taxonomically curated reference data sets for the targeted genes. The Protist Ribosomal Reference database (PR(2), http://ssu-rrna.org/) provides a unique access to eukaryotic small sub-unit (SSU) ribosomal RNA and DNA sequences, with curated taxonomy. The database mainly consists of nuclear-encoded protistan sequences. However, metazoans, land plants, macrosporic fungi and eukaryotic organelles (mitochondrion, plastid and others) are also included because they are useful for the analysis of high-troughput sequencing data sets. Introns and putative chimeric sequences have been also carefully checked. Taxonomic assignation of sequences consists of eight unique taxonomic fields. In total, 136 866 sequences are nuclear encoded, 45 708 (36 501 mitochondrial and 9657 chloroplastic) are from organelles, the remaining being putative chimeric sequences. The website allows the users to download sequences from the entire and partial databases (including representative sequences after clustering at a given level of similarity). Different web tools also allow searches by sequence similarity. The presence of both rRNA and rDNA sequences, taking into account introns (crucial for eukaryotic sequences), a normalized eight terms ranked-taxonomy and updates of new GenBank releases were made possible by a long-term collaboration between experts in taxonomy and computer scientists.",PR(2,0.98287158,Protist Ribosomal Reference database,0.980404947,PR(2,0.98287158,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +23055619,http://pri.hgc.jp,"PRD: A protein-RNA interaction database. Unlabelled Although protein-RNA interactions (PRIs) are involved in various important cellular processes, compiled data on PRIs are still limited. This contrasts with protein-protein interactions, which have been intensively recorded in public databases and subjected to network level analysis. Here, we introduce PRD, an online database of PRIs, dispersed across several sources, including scientific literature. Currently, over 10,000 interactions have been stored in PRD using PSI-MI 2.5, which is a standard model for describing detailed molecular interactions, with an emphasis on gene level data. Users can browse all recorded interactions and execute flexible keyword searches against the database via a web interface. Our database is not only a reference of PRIs, but will also be a valuable resource for studying characteristics of PRI networks. Availability PRD can be freely accessed at http://pri.hgc.jp/",PRD,0.985122204,NA,0,PRD,0.985122204,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/3/2012 +22589183,http://bioinfo.montp.cnrs.fr/?r=repeatDB,"PRDB: Protein Repeat DataBase. Rapidly increasing genomic data present new challenges for scientists: making sense of millions of amino acid sequences requires a systematic approach and information about their 3D structure, function, and evolution. Over the last decade, numerous studies demonstrated the fundamental importance of protein tandem repeats and their involvement in human diseases. Bioinformatics analysis of these regions requires special computer programs and databases, since the conventional approaches predominantly developed for globular domains have limited success. To perform a global comparative analysis of protein tandem repeats, we developed the Protein Tandem Repeat DataBase (PRDB). PRDB is a curated database that includes the protein tandem repeats found in sequence databanks by the T-REKS program. The database is available at http://bioinfo.montp.cnrs.fr/?r=repeatDB.",PRDB,0.969944358,Protein Tandem Repeat DataBase,0.902798961,PRDB,0.969944358,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/1/2012 +26200753,http://pregi.bi.up.ac.za/index.php,"Pre_GI: a global map of ontological links between horizontally transferred genomic islands in bacterial and archaeal genomes. The Predicted Genomic Islands database (Pre_GI) is a comprehensive repository of prokaryotic genomic islands (islands, GIs) freely accessible at http://pregi.bi.up.ac.za/index.php. Pre_GI, Version 2015, catalogues 26 744 islands identified in 2407 bacterial/archaeal chromosomes and plasmids. It provides an easy-to-use interface which allows users the ability to query against the database with a variety of fields, parameters and associations. Pre_GI is constructed to be a web-resource for the analysis of ontological roads between islands and cartographic analysis of the global fluxes of mobile genetic elements through bacterial and archaeal taxonomic borders. Comparison of newly identified islands against Pre_GI presents an alternative avenue to identify their ontology, origin and relative time of acquisition. Pre_GI aims to aid research on horizontal transfer events and materials through providing data and tools for holistic investigation of migration of genes through ecological niches and taxonomic boundaries.",Pre_GI,0.997470955,Predicted Genomic Islands database,0.986275991,Pre_GI,0.997470955,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/17/2015 +29530937,http://public.synergylab.cn/pair,"Predicted Arabidopsis Interactome Resource and Gene Set Linkage Analysis: A Transcriptomic Analysis Resource. An advanced functional understanding of omics data is important for elucidating the design logic of physiological processes in plants and effectively controlling desired traits in plants. We present the latest versions of the Predicted Arabidopsis Interactome Resource (PAIR) and of the gene set linkage analysis (GSLA) tool, which enable the interpretation of an observed transcriptomic change (differentially expressed genes [DEGs]) in Arabidopsis (Arabidopsis thaliana) with respect to its functional impact for biological processes. PAIR version 5.0 integrates functional association data between genes in multiple forms and infers 335,301 putative functional interactions. GSLA relies on this high-confidence inferred functional association network to expand our perception of the functional impacts of an observed transcriptomic change. GSLA then interprets the biological significance of the observed DEGs using established biological concepts (annotation terms), describing not only the DEGs themselves but also their potential functional impacts. This unique analytical capability can help researchers gain deeper insights into their experimental results and highlight prospective directions for further investigation. We demonstrate the utility of GSLA with two case studies in which GSLA uncovered how molecular events may have caused physiological changes through their collective functional influence on biological processes. Furthermore, we showed that typical annotation-enrichment tools were unable to produce similar insights to PAIR/GSLA. The PAIR version 5.0-inferred interactome and GSLA Web tool both can be accessed at http://public.synergylab.cn/pair/.",AIR,0.706697822,Predicted Arabidopsis Interactome Resource,0.918171836,Predicted Arabidopsis Interactome Resource,0.918171836,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/12/2018 +29425804,http://peve.med.u-tokai.ac.jp,"Systematic survey of non-retroviral virus-like elements in eukaryotic genomes. Endogenous viral elements (EVEs) are viral sequences that are endogenized in the host cell. Recently, several eukaryotic genomes have been shown to contain EVEs. To improve the understanding of EVEs in eukaryotes, we have developed a system for detecting EVE-like sequences in eukaryotes and conducted a large-scale nucleotide sequence similarity search using all available eukaryotic and viral genome assembly sequences (excluding those from retroviruses) stored in the National Center for Biotechnology Information genome database (as of August 14, 2017). We found that 3856 of 7007 viral genomes were similar to 4098 of 4102 eukaryotic genomes. For those EVE-like sequences, we constructed a database, Predicted Endogenous Viral Elements (pEVE, http://peve.med.u-tokai.ac.jp) which provides comprehensive search results summarized from an evolutionary viewpoint. A comparison of EVE-like sequences among closely related species may be useful to avoid false-positive hits. We believe that our search system and database will facilitate studies on EVEs.",pEVE,0.907541066,Predicted Endogenous Viral Elements,0.975142624,Predicted Endogenous Viral Elements,0.975142624,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/6/2018 +25558364,http://www.predicts.org.uk,"The PREDICTS database: a global database of how local terrestrial biodiversity responds to human impacts. Biodiversity continues to decline in the face of increasing anthropogenic pressures such as habitat destruction, exploitation, pollution and introduction of alien species. Existing global databases of species' threat status or population time series are dominated by charismatic species. The collation of datasets with broad taxonomic and biogeographic extents, and that support computation of a range of biodiversity indicators, is necessary to enable better understanding of historical declines and to project - and avert - future declines. We describe and assess a new database of more than 1.6 million samples from 78 countries representing over 28,000 species, collated from existing spatial comparisons of local-scale biodiversity exposed to different intensities and types of anthropogenic pressures, from terrestrial sites around the world. The database contains measurements taken in 208 (of 814) ecoregions, 13 (of 14) biomes, 25 (of 35) biodiversity hotspots and 16 (of 17) megadiverse countries. The database contains more than 1% of the total number of all species described, and more than 1% of the described species within many taxonomic groups - including flowering plants, gymnosperms, birds, mammals, reptiles, amphibians, beetles, lepidopterans and hymenopterans. The dataset, which is still being added to, is therefore already considerably larger and more representative than those used by previous quantitative models of biodiversity trends and responses. The database is being assembled as part of the PREDICTS project (Projecting Responses of Ecological Diversity In Changing Terrestrial Systems - http://www.predicts.org.uk). We make site-level summary data available alongside this article. The full database will be publicly available in 2015.",PREDICTS,0.901059151,NA,0,PREDICTS,0.901059151,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/2/2014 +25725063,http://server.cs.ucf.edu/predrem,"PreDREM: a database of predicted DNA regulatory motifs from 349 human cell and tissue samples. . PreDREM is a database of DNA regulatory motifs and motifs modules predicted from DNase I hypersensitive sites in 349 human cell and tissue samples. It contains 845-1325 predicted motifs in each sample, which result in a total of 2684 non-redundant motifs. In comparison with seven large collections of known motifs, more than 84% of the 2684 predicted motifs are similar to the known motifs, and 54-76% of the known motifs are similar to the predicted motifs. PreDREM also stores 43 663-20 13 288 motif modules in each sample, which provide the cofactor motifs of each predicted motif. Compared with motifs of known interacting transcription factor (TF) pairs in eight resources, on average, 84% of motif pairs corresponding to known interacting TF pairs are included in the predicted motif modules. Through its web interface, PreDREM allows users to browse motif information by tissues, datasets, individual non-redundant motifs, etc. Users can also search motifs, motif modules, instances of motifs and motif modules in given genomic regions, tissue or cell types a motif occurs, etc. PreDREM thus provides a useful resource for the understanding of cell- and tissue-specific gene regulation in the human genome. Database URL: http://server.cs.ucf.edu/predrem/.",PreDREM,0.996430159,NA,0,PreDREM,0.996430159,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/27/2015 +23193263,http://bhapp.c2b2.columbia.edu/PrePPI,"PrePPI: a structure-informed database of protein-protein interactions. PrePPI (http://bhapp.c2b2.columbia.edu/PrePPI) is a database that combines predicted and experimentally determined protein-protein interactions (PPIs) using a Bayesian framework. Predicted interactions are assigned probabilities of being correct, which are derived from calculated likelihood ratios (LRs) by combining structural, functional, evolutionary and expression information, with the most important contribution coming from structure. Experimentally determined interactions are compiled from a set of public databases that manually collect PPIs from the literature and are also assigned LRs. A final probability is then assigned to every interaction by combining the LRs for both predicted and experimentally determined interactions. The current version of PrePPI contains ∼2 million PPIs that have a probability more than ∼0.1 of which ∼60 000 PPIs for yeast and ∼370 000 PPIs for human are considered high confidence (probability > 0.5). The PrePPI database constitutes an integrated resource that enables users to examine aggregate information on PPIs, including both known and potentially novel interactions, and that provides structural models for many of the PPIs.",PrePPI,0.997049689,NA,0,PrePPI,0.997049689,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +"23161682, 29156057",http://prgdb.org,"PRGdb 2.0: towards a community-based database model for the analysis of R-genes in plants. The Plant Resistance Genes database (PRGdb; http://prgdb.org) is a comprehensive resource on resistance genes (R-genes), a major class of genes in plant genomes that convey disease resistance against pathogens. Initiated in 2009, the database has grown more than 6-fold to recently include annotation derived from recent plant genome sequencing projects. Release 2.0 currently hosts useful biological information on a set of 112 known and 104 310 putative R-genes present in 233 plant species and conferring resistance to 122 different pathogens. Moreover, the website has been completely redesigned with the implementation of Semantic MediaWiki technologies, which makes our repository freely accessed and easily edited by any scientists. To this purpose, we encourage plant biologist experts to join our annotation effort and share their knowledge on resistance-gene biology with the rest of the scientific community.",PRGdb,0.998317122,Plant Resistance Genes database,0.982363117,PRGdb,0.998317122,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +33216897,http://rat.biomedtzc.cn,"Predicted rat interactome database and gene set linkage analysis. . Rattus norvegicus, or the rat, has been widely used as animal models for a diversity of human diseases in the last 150 years. The rat, as a disease model, has the advantage of relatively large body size and highly similar physiology to humans. In drug discovery, rat models are routinely used in drug efficacy and toxicity assessments. To facilitate molecular pharmacology studies in rats, we present the predicted rat interactome database (PRID), which is a database of high-quality predicted functional gene interactions with balanced sensitivity and specificity. PRID integrates functional gene association data from 10 public databases and infers 305 939 putative functional associations, which are expected to include 13.02% of all rat protein interactions, and 52.59% of these function associations may represent protein interactions. This set of functional interactions may not only facilitate hypothesis formulation in molecular mechanism studies, but also serve as a reference interactome for users to perform gene set linkage analysis (GSLA), which is a web-based tool to infer the potential functional impacts of a set of changed genes observed in transcriptomics analyses. In a case study, we show that GSLA based on PRID may provide more precise and informative annotations for investigators to understand the physiological mechanisms underlying a phenotype and lead investigators to testable hypotheses for further studies. Widely used functional annotation tools such as Gene Ontology (GO) analysis, and Database for Annotation, Visualization and Integrated Discovery (DAVID) did not provide similar insights. Database URL: http://rat.biomedtzc.cn.",PRID,0.985948801,predicted rat interactome database,0.924220908,PRID,0.985948801,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2020 +"21063943, 23203882, 30395289",http://www.ebi.ac.uk/pride,"PRIDE and ""Database on Demand"" as valuable tools for computational proteomics. The Proteomics Identifications Database (PRIDE, http://www.ebi.ac.uk/pride ) provides users with the ability to explore and compare mass spectrometry-based proteomics experiments that reveal details of the protein expression found in a broad range of taxonomic groups, tissues, and disease states. A PRIDE experiment typically includes identifications of proteins, peptides, and protein modifications. Additionally, many of the submitted experiments also include the mass spectra that provide the evidence for these identifications. Finally, one of the strongest advantages of PRIDE in comparison with other proteomics repositories is the amount of metadata it contains, a key point to put the above-mentioned data in biological and/or technical context. Several informatics tools have been developed in support of the PRIDE database. The most recent one is called ""Database on Demand"" (DoD), which allows custom sequence databases to be built in order to optimize the results from search engines. We describe the use of DoD in this chapter. Additionally, in order to show the potential of PRIDE as a source for data mining, we also explore complex queries using federated BioMart queries to integrate PRIDE data with other resources, such as Ensembl, Reactome, or UniProt.",PRIDE,0.996937911,Proteomics Identifications Database,0.96215572,PRIDE,0.996937911,3,22701464,26527722,NA,NA,do not merge,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2019 +26527722,http://www.ebi.ac.uk/pride/archive,"2016 update of the PRIDE database and its related tools. The PRoteomics IDEntifications (PRIDE) database is one of the world-leading data repositories of mass spectrometry (MS)-based proteomics data. Since the beginning of 2014, PRIDE Archive (http://www.ebi.ac.uk/pride/archive/) is the new PRIDE archival system, replacing the original PRIDE database. Here we summarize the developments in PRIDE resources and related tools since the previous update manuscript in the Database Issue in 2013. PRIDE Archive constitutes a complete redevelopment of the original PRIDE, comprising a new storage backend, data submission system and web interface, among other components. PRIDE Archive supports the most-widely used PSI (Proteomics Standards Initiative) data standard formats (mzML and mzIdentML) and implements the data requirements and guidelines of the ProteomeXchange Consortium. The wide adoption of ProteomeXchange within the community has triggered an unprecedented increase in the number of submitted data sets (around 150 data sets per month). We outline some statistics on the current PRIDE Archive data contents. We also report on the status of the PRIDE related stand-alone tools: PRIDE Inspector, PRIDE Converter 2 and the ProteomeXchange submission tool. Finally, we will give a brief update on the resources under development 'PRIDE Cluster' and 'PRIDE Proteomes', which provide a complementary view and quality-scored information of the peptide and protein identification data available in PRIDE Archive.",PRIDE,0.993364791,PRoteomics IDEntifications,0.859798441,PRIDE,0.993364791,1,NA,"21063943.0, 23203882.0, 30395289.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/2/2015 +23292601,http://prime.psc.riken.jp,"PRIMe Update: innovative content for plant metabolomics and integration of gene expression and metabolite accumulation. PRIMe (http://prime.psc.riken.jp/), the Platform for RIKEN Metabolomics, is a website that was designed and implemented to support research and analyses ranging from metabolomics to transcriptomics. To achieve functional genomics and annotation of unknown metabolites, we established the following PRIMe contents: MS2T, a library comprising >1 million entries of untargeted tandem mass spectrometry (MS/MS) data of plant metabolites; AtMetExpress LC-MS, a database of transcriptomics and metabolomics approaches in Arabidopsis developmental stages (AtMetExpress Development LC-MS) and a data set of the composition of secondary metabolites among 20 Arabidopsis ecotypes (AtMetExpress 20 ecotypes LC-MS); and ReSpect, hybrid reference MS/MS data resources (acquisitions and literature). PRIMeLink is a new web application that allows access to the innovative data resources of PRIMe. The MS2T library was generated from a set of MS/MS spectra acquired using the automatic data acquisition function of mass spectrometry. To increase the understanding of mechanisms driving variations in metabolic profiles among plant tissues, we further provided the AtMetExpress Development LC-MS database in PRIMe, facilitating the investigation of relationships between gene expression and metabolite accumulation. This information platform therefore provides an integrative analysis resource by linking Arabidopsis transcriptome and metabolome data. Moreover, we developed the ReSpect database, a plant-specific MS/MS data resource, which allows users to identify candidate structures from the suite of complex phytochemical structures. Finally, we integrated the three databases into PRIMeLink and established a walk-through link between transcriptome and metabolome information. PRIMeLink offers a bi-directional searchable function, from the gene and the metabolite perspective, to search for targets seamlessly and effectively.",PRIMe,0.993307292,NA,0,PRIMe,0.993307292,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/3/2013 +22086960,http://pga.mgh.harvard.edu/primerbank,"PrimerBank: a PCR primer database for quantitative gene expression analysis, 2012 update. Optimization of primer sequences for polymerase chain reaction (PCR) and quantitative PCR (qPCR) and reaction conditions remains an experimental challenge. We have developed a resource, PrimerBank, which contains primers that can be used for PCR and qPCR under stringent and allele-invariant amplification conditions. A distinguishing feature of PrimerBank is the experimental validation of primer pairs covering most known mouse genes. Here, we describe a major update of PrimerBank that includes the design of new primers covering 17,076 and 18,086 genes for the human and mouse species, respectively. As a result of this update, PrimerBank contains 497,156 primers (an increase of 62% from the previous version) that cover 36,928 human and mouse genes, corresponding to around 94% of all known protein-coding gene sequences. An updated algorithm based on our previous approach was used to design new primers using current genomic information available from the National Center for Biotechnology Information (NCBI). PrimerBank primers work under uniform PCR conditions, and can be used for high-throughput or genome-wide qPCR. Because of their broader linear dynamic range and greater sensitivity, qPCR approaches are used to reanalyze changes in expression suggested by exploratory technologies such as microarrays and RNA-Seq. The primers and all experimental validation data can be freely accessed from the PrimerBank website, http://pga.mgh.harvard.edu/primerbank/.",PrimerBank,0.995503724,NA,0,PrimerBank,0.995503724,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2011 +23772554,http://primos.fh-hagenberg.at,"PRIMOS: an integrated database of reassessed protein-protein interactions providing web-based access to in silico validation of experimentally derived data. Steady improvements in proteomics present a bioinformatic challenge to retrieve, store, and process the accumulating and often redundant amount of information. In particular, a large-scale comparison and analysis of protein-protein interaction (PPI) data requires tools for data interpretation as well as validation. At this juncture, the Protein Interaction and Molecule Search (PRIMOS) platform represents a novel web portal that unifies six primary PPI databases (BIND, Biomolecular Interaction Network Database; DIP, Database of Interacting Proteins; HPRD, Human Protein Reference Database; IntAct; MINT, Molecular Interaction Database; and MIPS, Munich Information Center for Protein Sequences) into a single consistent repository, which currently includes more than 196,700 redundancy-removed PPIs. PRIMOS supports three advanced search strategies centering on disease-relevant PPIs, on inter- and intra-organismal crosstalk relations (e.g., pathogen-host interactions), and on highly connected protein nodes analysis (""hub"" identification). The main novelties distinguishing PRIMOS from other secondary PPI databases are the reassessment of known PPIs, and the capacity to validate personal experimental data by our peer-reviewed, homology-based validation. This article focuses on definite PRIMOS use cases (presentation of embedded biological concepts, example applications) to demonstrate its broad functionality and practical value. PRIMOS is publicly available at http://primos.fh-hagenberg.at.",PRIMOS,0.99499613,Protein Interaction and Molecule Search,0.874199007,PRIMOS,0.99499613,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2013 +22363733,http://libaio.biol.mcgill.ca/prion,"PrionHome: a database of prions and other sequences relevant to prion phenomena. Prions are units of propagation of an altered state of a protein or proteins; prions can propagate from organism to organism, through cooption of other protein copies. Prions contain no necessary nucleic acids, and are important both as both pathogenic agents, and as a potential force in epigenetic phenomena. The original prions were derived from a misfolded form of the mammalian Prion Protein PrP. Infection by these prions causes neurodegenerative diseases. Other prions cause non-Mendelian inheritance in budding yeast, and sometimes act as diseases of yeast. We report the bioinformatic construction of the PrionHome, a database of >2000 prion-related sequences. The data was collated from various public and private resources and filtered for redundancy. The data was then processed according to a transparent classification system of prionogenic sequences (i.e., sequences that can make prions), prionoids (i.e., proteins that propagate like prions between individual cells), and other prion-related phenomena. There are eight PrionHome classifications for sequences. The first four classifications are derived from experimental observations: prionogenic sequences, prionoids, other prion-related phenomena, and prion interactors. The second four classifications are derived from sequence analysis: orthologs, paralogs, pseudogenes, and candidate-prionogenic sequences. Database entries list: supporting information for PrionHome classifications, prion-determinant areas (where relevant), and disordered and compositionally-biased regions. Also included are literature references for the PrionHome classifications, transcripts and genomic coordinates, and structural data (including comparative models made for the PrionHome from manually curated alignments). We provide database usage examples for both vertebrate and fungal prion contexts. Using the database data, we have performed a detailed analysis of the compositional biases in known budding-yeast prionogenic sequences, showing that the only abundant bias pattern is for asparagine bias with subsidiary serine bias. We anticipate that this database will be a useful experimental aid and reference resource. It is freely available at: http://libaio.biol.mcgill.ca/prion.",PrionHome,0.987491906,NA,0,PrionHome,0.987491906,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/20/2012 +31161204,http://prismoid.erc.monash.edu,"PRISMOID: a comprehensive 3D structure database for post-translational modifications and mutations with functional impact. Post-translational modifications (PTMs) play very important roles in various cell signaling pathways and biological process. Due to PTMs' extremely important roles, many major PTMs have been studied, while the functional and mechanical characterization of major PTMs is well documented in several databases. However, most currently available databases mainly focus on protein sequences, while the real 3D structures of PTMs have been largely ignored. Therefore, studies of PTMs 3D structural signatures have been severely limited by the deficiency of the data. Here, we develop PRISMOID, a novel publicly available and free 3D structure database for a wide range of PTMs. PRISMOID represents an up-to-date and interactive online knowledge base with specific focus on 3D structural contexts of PTMs sites and mutations that occur on PTMs and in the close proximity of PTM sites with functional impact. The first version of PRISMOID encompasses 17 145 non-redundant modification sites on 3919 related protein 3D structure entries pertaining to 37 different types of PTMs. Our entry web page is organized in a comprehensive manner, including detailed PTM annotation on the 3D structure and biological information in terms of mutations affecting PTMs, secondary structure features and per-residue solvent accessibility features of PTM sites, domain context, predicted natively disordered regions and sequence alignments. In addition, high-definition JavaScript packages are employed to enhance information visualization in PRISMOID. PRISMOID equips a variety of interactive and customizable search options and data browsing functions; these capabilities allow users to access data via keyword, ID and advanced options combination search in an efficient and user-friendly way. A download page is also provided to enable users to download the SQL file, computational structural features and PTM sites' data. We anticipate PRISMOID will swiftly become an invaluable online resource, assisting both biologists and bioinformaticians to conduct experiments and develop applications supporting discovery efforts in the sequence-structural-functional relationship of PTMs and providing important insight into mutations and PTM sites interaction mechanisms. The PRISMOID database is freely accessible at http://prismoid.erc.monash.edu/. The database and web interface are implemented in MySQL, JSP, JavaScript and HTML with all major browsers supported.",PRISMOID,0.997668028,NA,0,PRISMOID,0.997668028,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2020 +32542382,http://www.biosequencing.cn/PRMdb,"PRMdb: A Repository of Predicted RNA Modifications in Plants. Evidence is mounting that RNA modifications play essential roles in posttranscriptional regulation of gene expression. So far, over 150 RNA modifications catalyzed by distinct enzymes have been documented. In plants, genome-wide identification of RNA modifications is largely limited to the model species Arabidopsis thaliana, while lacking in diverse non-model plants. Here, we present PRMdb, a plant RNA modification database, based on the analysis of thousands of RNA-seq, degradome-seq and small RNA-seq data from a wide range of plant species using the well-documented tool HAMR (high-throughput analysis of modified ribonucleotide). PRMdb provides a user-friendly interface that enables easy browsing and searching of the tRNA and mRNA modification data. We show that PRMdb collects high-confidence RNA modifications including novel RNA modification sites that can be validated by genomic PCR and reverse transcription PCR. In summary, PRMdb provides a valuable web resource for deciphering the epitranscriptomes in diverse plant species and will facilitate functional studies of RNA modifications in plants. RPMdb is available via http://www.biosequencing.cn/PRMdb/.",PRMdb,0.995993018,high-throughput analysis of modified,0.833429269,PRMdb,0.995993018,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2020 +21031599,http://bioinfo.hr/pro-mine,"PRO-MINE: A bioinformatics repository and analytical tool for TARDBP mutations. TDP-43 is a multifunctional RNA-binding protein found to be a major protein component of intracellular inclusions found in neurodegenerative disorders such as Fronto Temporal Lobar Degeneration, Amyotrophic Lateral Sclerosis, and Alzheimer Disease. PRO-MINE (PROtein Mutations In NEurodegeneration) is a database populated with manually curated data from the literature regarding all TDP-43/TDP43/TARDBP gene disease-associated mutations identified to date. A web server interface has been developed to query the database and to provide tools for the analysis of already reported or novel TDP-43 gene mutations. As is usually the case with genetic association studies, assessing the potential impact of identified mutations is of crucial importance, and in order to avoid prediction biases it is essential to compare the prediction results. However, in most cases mutations have to be submitted separately to various prediction tools and the individual results manually merged together afterwards. The implemented web server aims to overcome the problem by providing simultaneous access to several prediction tools and by displaying the results into a single output. Furthermore, the results are displayed together in a comprehensive output for a more convenient analysis and are enriched with additional information about mutations. In addition, our web server can also display the mutation(s) of interest within an alignment of annotated TDP-43 protein sequences from different vertebrate species. In this way, the degree of sequence conservation where the mutation(s) occur can be easily tracked and visualized. The web server is freely available to researchers and can be accessed at http://bioinfo.hr/pro-mine.",PRO-MINE,0.97720556,PROtein Mutations In NEurodegeneration,0.793561556,PRO-MINE,0.97720556,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2011 +28171531,http://lin.uestc.edu.cn/database/pro54db,"Pro54DB: a database for experimentally verified sigma-54 promoters. Summary In prokaryotes, the σ54 promoters are unique regulatory elements and have attracted much attention because they are in charge of the transcription of carbon and nitrogen-related genes and participate in numerous ancillary processes and environmental responses. All findings on σ54 promoters are favorable for a better understanding of their regulatory mechanisms in gene transcription and an accurate discovery of genes missed by the wet experimental evidences. In order to provide an up-to-date, interactive and extensible database for σ54 promoter, a free and easy accessed database called Pro54DB (σ54 promoter database) was built to collect information of σ54 promoter. In the current version, it has stored 210 experimental-confirmed σ54 promoters with 297 regulated genes in 43 species manually extracted from 133 publications, which is helpful for researchers in fields of bioinformatics and molecular biology. Availability and implementation Pro54DB is freely available on the web at http://lin.uestc.edu.cn/database/pro54db with all major browsers supported. Contacts greatchen@ncst.edu.cn or hlin@uestc.edu.cn",Pro54DB,0.984204948,NA,0,Pro54DB,0.984204948,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2017 +25097386,http://bicmku.in/ProADD,"ProADD: A database on Protein Aggregation Diseases. Unlabelled ProADD, a database for protein aggregation diseases, is developed to organize the data under a single platform to facilitate easy access for researchers. Diseases caused due to protein aggregation and the proteins involved in each of these diseases are integrated. The database helps in classification of proteins involved in the protein aggregation diseases based on sequence and structural analysis. Analysis of proteins can be done to mine patterns prevailing among the aggregating proteins. Availability http://bicmku.in/ProADD.",ProADD,0.992834508,NA,0,ProADD,0.992834508,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/30/2014 +26586809,http://www.probebase.net,"probeBase--an online resource for rRNA-targeted oligonucleotide probes and primers: new features 2016. probeBase http://www.probebase.net is a manually maintained and curated database of rRNA-targeted oligonucleotide probes and primers. Contextual information and multiple options for evaluating in silico hybridization performance against the most recent rRNA sequence databases are provided for each oligonucleotide entry, which makes probeBase an important and frequently used resource for microbiology research and diagnostics. Here we present a major update of probeBase, which was last featured in the NAR Database Issue 2007. This update describes a complete remodeling of the database architecture and environment to accommodate computationally efficient access. Improved search functions, sequence match tools and data output now extend the opportunities for finding suitable hierarchical probe sets that target an organism or taxon at different taxonomic levels. To facilitate the identification of complementary probe sets for organisms represented by short rRNA sequence reads generated by amplicon sequencing or metagenomic analysis with next generation sequencing technologies such as Illumina and IonTorrent, we introduce a novel tool that recovers surrogate near full-length rRNA sequences for short query sequences and finds matching oligonucleotides in probeBase.",probeBase,0.996019483,NA,0,probeBase,0.996019483,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/19/2015 +22268964,http://probis.cmm.ki.si/database,"ProBiS-database: precalculated binding site similarities and local pairwise alignments of PDB structures. ProBiS-Database is a searchable repository of precalculated local structural alignments in proteins detected by the ProBiS algorithm in the Protein Data Bank. Identification of functionally important binding regions of the protein is facilitated by structural similarity scores mapped to the query protein structure. PDB structures that have been aligned with a query protein may be rapidly retrieved from the ProBiS-Database, which is thus able to generate hypotheses concerning the roles of uncharacterized proteins. Presented with uncharacterized protein structure, ProBiS-Database can discern relationships between such a query protein and other better known proteins in the PDB. Fast access and a user-friendly graphical interface promote easy exploration of this database of over 420 million local structural alignments. The ProBiS-Database is updated weekly and is freely available online at http://probis.cmm.ki.si/database.",ProBiS-Database,0.990155792,NA,0,ProBiS-Database,0.990155792,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/7/2012 +34319727,http://probis-dock-database.insilab.org,"ProBiS-Dock Database: A Web Server and Interactive Web Repository of Small Ligand-Protein Binding Sites for Drug Design. We have developed a new system, ProBiS-Dock, which can be used to determine the different types of protein binding sites for small ligands. The binding sites identified this way are then used to construct a new binding site database, the ProBiS-Dock Database, that allows for the ranking of binding sites according to their utility for drug development. The newly constructed database currently has more than 1.4 million binding sites and offers the possibility to investigate potential drug targets originating from different biological species. The interactive ProBiS-Dock Database, a web server and repository that consists of all small-molecule ligand binding sites in all of the protein structures in the Protein Data Bank, is freely available at http://probis-dock-database.insilab.org. The ProBiS-Dock Database will be regularly updated to keep pace with the growth of the Protein Data Bank, and our anticipation is that it will be useful in drug discovery.",ProBiS-Dock,0.990430673,NA,0,ProBiS-Dock,0.990430673,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/28/2021 +27153608,http://probonto.org,"ProbOnto: ontology and knowledge base of probability distributions. Motivation Probability distributions play a central role in mathematical and statistical modelling. The encoding, annotation and exchange of such models could be greatly simplified by a resource providing a common reference for the definition of probability distributions. Although some resources exist, no suitably detailed and complex ontology exists nor any database allowing programmatic access. Results ProbOnto, is an ontology-based knowledge base of probability distributions, featuring more than 80 uni- and multivariate distributions with their defining functions, characteristics, relationships and re-parameterization formulas. It can be used for model annotation and facilitates the encoding of distribution-based models, related functions and quantities. Availability and implementation http://probonto.org Contact mjswat@ebi.ac.uk Supplementary information Supplementary data are available at Bioinformatics online.",ProbOnto,0.962836146,NA,0,ProbOnto,0.962836146,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/3/2016 +32119071,http://web.iitm.ac.in/bioinfo2/procaff,"ProCaff: protein-carbohydrate complex binding affinity database. MOTIVATION:Protein-carbohydrate interactions perform several cellular and biological functions and their structure and function are mainly dictated by their binding affinity. Although plenty of experimental data on binding affinity are available, there is no reliable and comprehensive database in the literature. RESULTS:We have developed a database on binding affinity of protein-carbohydrate complexes, ProCaff, which contains 3122 entries on dissociation constant (Kd), Gibbs free energy change (ΔG), experimental conditions, sequence, structure and literature information. Additional features include the options to search, display, visualization, download and upload the data. AVAILABILITY AND IMPLEMENTATION:The database is freely available at http://web.iitm.ac.in/bioinfo2/procaff/. The website is implemented using HTML and PHP and supports recent versions of major browsers such as Chrome, Firefox, IE10 and Opera. CONTACT:gromiha@iitm.ac.in. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",ProCaff,0.978805065,carbohydrate complex binding affinity database,0.665413107,ProCaff,0.978805065,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2020 +31598690,http://www.procarbdb.science/procarb,"ProCarbDB: a database of carbohydrate-binding proteins. Carbohydrate-binding proteins play crucial roles across all organisms and viruses. The complexity of carbohydrate structures, together with inconsistencies in how their 3D structures are reported, has led to difficulties in characterizing the protein-carbohydrate interfaces. In order to better understand protein-carbohydrate interactions, we have developed an open-access database, ProCarbDB, which, unlike the Protein Data Bank (PDB), clearly distinguishes between the complete carbohydrate ligands and their monomeric units. ProCarbDB is a comprehensive database containing over 5200 3D X-ray crystal structures of protein-carbohydrate complexes. In ProCarbDB, the complete carbohydrate ligands are annotated and all their interactions are displayed. Users can also select any protein residue in the proximity of the ligand to inspect its interactions with the carbohydrate ligand and with other neighbouring protein residues. Where available, additional curated information on the binding affinity of the complex and the effects of mutations on the binding have also been provided in the database. We believe that ProCarbDB will be an invaluable resource for understanding protein-carbohydrate interfaces. The ProCarbDB web server is freely available at http://www.procarbdb.science/procarb.",ProCarbDB,0.995657146,NA,0,ProCarbDB,0.995657146,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +29136200,http://www.prodoric2.de,"PRODORIC2: the bacterial gene regulation database in 2018. Bacteria adapt to changes in their environment via differential gene expression mediated by DNA binding transcriptional regulators. The PRODORIC2 database hosts one of the largest collections of DNA binding sites for prokaryotic transcription factors. It is the result of the thoroughly redesigned PRODORIC database. PRODORIC2 is more intuitive and user-friendly. Besides significant technical improvements, the new update offers more than 1000 new transcription factor binding sites and 110 new position weight matrices for genome-wide pattern searches with the Virtual Footprint tool. Moreover, binding sites deduced from high-throughput experiments were included. Data for 6 new bacterial species including bacteria of the Rhodobacteraceae family were added. Finally, a comprehensive collection of sigma- and transcription factor data for the nosocomial pathogen Clostridium difficile is now part of the database. PRODORIC2 is publicly available at http://www.prodoric2.de.",PRODORIC2,0.997216105,NA,0,PRODORIC2,0.997216105,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24270047,http://profileDB.-microdiscovery.de,"ProfileDB: a resource for proteomics and cross-omics biomarker discovery. The increasing size and complexity of high-throughput datasets pose a growing challenge for researchers. Often very different (cross-omics) techniques with individual data analysis pipelines are employed making a unified biomarker discovery strategy and a direct comparison of different experiments difficult and time consuming. Here we present the comprehensive web-based application ProfileDB. The application is designed to integrate data from different high-throughput 'omics' data types (Transcriptomics, Proteomics, Metabolomics) with clinical parameters and prior knowledge on pathways and ontologies. Beyond data storage, ProfileDB provides a set of dedicated tools for study inspection and data visualization. The user can gain insights into a complex experiment with just a few mouse clicks. We will demonstrate the application by presenting typical use cases for the identification of proteomics biomarkers. All presented analyses can be reproduced using the public ProfileDB web server. The ProfileDB application is available by standard browser (Firefox 18+, Internet Explorer Version 9+) technology via http://profileDB.-microdiscovery.de/ (login and pass-word: profileDB). The installation contains several public datasets including different cross-'omics' experiments. This article is part of a Special Issue entitled: Biomarkers: A Proteomic Challenge.",ProfileDB,0.997333407,NA,0,ProfileDB,0.997333407,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2013 +24225322,http://www.progenetix.org,"Progenetix: 12 years of oncogenomic data curation. DNA copy number aberrations (CNAs) can be found in the majority of cancer genomes and are crucial for understanding the potential mechanisms underlying tumor initiation and progression. Since the first release in 2001, the Progenetix project (http://www.progenetix.org) has provided a reference resource dedicated to provide the most comprehensive collection of genome-wide CNA profiles. Reflecting the application of comparative genomic hybridization techniques to tens of thousands of cancer genomes, over the past 12 years our data curation efforts have resulted in a more than 60-fold increase in the number of cancer samples presented through Progenetix. In addition, new data exploration tools and visualization options have been added. In particular, the gene-specific CNA frequency analysis should facilitate the assignment of cancer genes to related cancer types. In addition, the new user file processing interface allows users to take advantage of the online tools, including various data representation options for proprietary data pre-publication. In this update article, we report recent improvements of the database in terms of content, user interface and online tools.",Progenetix,0.996031821,NA,0,Progenetix,0.996031821,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2013 +28053165,http://progenomes.embl.de,"proGenomes: a resource for consistent functional and taxonomic annotations of prokaryotic genomes. The availability of microbial genomes has opened many new avenues of research within microbiology. This has been driven primarily by comparative genomics approaches, which rely on accurate and consistent characterization of genomic sequences. It is nevertheless difficult to obtain consistent taxonomic and integrated functional annotations for defined prokaryotic clades. Thus, we developed proGenomes, a resource that provides user-friendly access to currently 25 038 high-quality genomes whose sequences and consistent annotations can be retrieved individually or by taxonomic clade. These genomes are assigned to 5306 consistent and accurate taxonomic species clusters based on previously established methodology. proGenomes also contains functional information for almost 80 million protein-coding genes, including a comprehensive set of general annotations and more focused annotations for carbohydrate-active enzymes and antibiotic resistance genes. Additionally, broad habitat information is provided for many genomes. All genomes and associated information can be downloaded by user-selected clade or multiple habitat-specific sets of representative genomes. We expect that the availability of high-quality genomes with comprehensive functional annotations will promote advances in clinical microbial genomics, functional evolution and other subfields of microbiology. proGenomes is available at http://progenomes.embl.de.",proGenomes,0.984422565,NA,0,proGenomes,0.984422565,1,31647096,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,10/24/2016 +31647096,http://progenomes.embl.de,"proGenomes2: an improved database for accurate and consistent habitat, taxonomic and functional annotations of prokaryotic genomes. Microbiology depends on the availability of annotated microbial genomes for many applications. Comparative genomics approaches have been a major advance, but consistent and accurate annotations of genomes can be hard to obtain. In addition, newer concepts such as the pan-genome concept are still being implemented to help answer biological questions. Hence, we present proGenomes2, which provides 87 920 high-quality genomes in a user-friendly and interactive manner. Genome sequences and annotations can be retrieved individually or by taxonomic clade. Every genome in the database has been assigned to a species cluster and most genomes could be accurately assigned to one or multiple habitats. In addition, general functional annotations and specific annotations of antibiotic resistance genes and single nucleotide variants are provided. In short, proGenomes2 provides threefold more genomes, enhanced habitat annotations, updated taxonomic and functional annotation and improved linkage to the NCBI BioSample database. The database is available at http://progenomes.embl.de/.",proGenomes2,0.9957847,NA,0,proGenomes2,0.9957847,1,28053165,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2020 +22039152,http://www.proglycprot.org,"ProGlycProt: a repository of experimentally characterized prokaryotic glycoproteins. ProGlycProt (http://www.proglycprot.org/) is an open access, manually curated, comprehensive repository of bacterial and archaeal glycoproteins with at least one experimentally validated glycosite (glycosylated residue). To facilitate maximum information at one point, the database is arranged under two sections: (i) ProCGP-the main data section consisting of 95 entries with experimentally characterized glycosites and (ii) ProUGP-a supplementary data section containing 245 entries with experimentally identified glycosylation but uncharacterized glycosites. Every entry in the database is fully cross-referenced and enriched with available published information about source organism, coding gene, protein, glycosites, glycosylation type, attached glycan, associated oligosaccharyl/glycosyl transferases (OSTs/GTs), supporting references, and applicable additional information. Interestingly, ProGlycProt contains as many as 174 entries for which information is unavailable or the characterized glycosites are unannotated in Swiss-Prot release 2011_07. The website supports a dedicated structure gallery of homology models and crystal structures of characterized glycoproteins in addition to two new tools developed in view of emerging information about prokaryotic sequons (conserved sequences of amino acids around glycosites) that are never or rarely seen in eukaryotic glycoproteins. ProGlycProt provides an extensive compilation of experimentally identified glycosites (334) and glycoproteins (340) of prokaryotes that could serve as an information resource for research and technology applications in glycobiology.",ProGlycProt,0.997674525,NA,0,ProGlycProt,0.997674525,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2011 +31620779,http://bigd.big.ac.cn/padsarsenal,"PADS Arsenal: a database of prokaryotic defense systems related genes. Defense systems are vital weapons for prokaryotes to resist heterologous DNA and survive from the constant invasion of viruses, and they are widely used in biochemistry investigation and antimicrobial drug research. So far, numerous types of defense systems have been discovered, but there is no comprehensive defense systems database to organize prokaryotic defense gene datasets. To fill this gap, we unveil the prokaryotic antiviral defense system (PADS) Arsenal (https://bigd.big.ac.cn/padsarsenal), a public database dedicated to gathering, storing, analyzing and visualizing prokaryotic defense gene datasets. The initial version of PADS Arsenal integrates 18 distinctive categories of defense system with the annotation of 6 600 264 genes retrieved from 63,701 genomes across 33 390 species of archaea and bacteria. PADS Arsenal provides various ways to retrieve defense systems related genes information and visualize them with multifarious function modes. Moreover, an online analysis pipeline is integrated into PADS Arsenal to facilitate annotation and evolutionary analysis of defense genes. PADS Arsenal can also visualize the dynamic variation information of defense genes from pan-genome analysis. Overall, PADS Arsenal is a state-of-the-art open comprehensive resource to accelerate the research of prokaryotic defense systems.",PADS,0.94088002,prokaryotic antiviral defense system,0.965218917,prokaryotic antiviral defense system,0.965218917,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +33539888,http://lin-group.cn/database/ppd,"PPD: A Manually Curated Database for Experimentally Verified Prokaryotic Promoters. As a key region, promoter plays a key role in transcription regulation. A eukaryotic promoter database called EPD has been constructed to store eukaryotic POL II promoters. Although there are some promoter databases for specific prokaryotic species or specific promoter type, such as RegulonDB for Escherichia coli K-12, DBTBS for Bacillus subtilis and Pro54DB for sigma 54 promoter, because of the diversity of prokaryotes and the development of sequencing technology, huge amounts of prokaryotic promoters are scattered in numerous published articles, which is inconvenient for researchers to explore the process of gene regulation in prokaryotes. In this study, we constructed a Prokaryotic Promoter Database (PPD), which records the experimentally validated promoters in prokaryotes, from published articles. Up to now, PPD has stored 129,148 promoters across 63 prokaryotic species manually extracted from published papers. We provided a friendly interface for users to browse, search, blast, visualize, submit and download data. The PPD will provide relatively comprehensive resources of prokaryotic promoter for the study of prokaryotic gene transcription. The PPD is freely available and easy accessed at http://lin-group.cn/database/ppd/.",PPD,0.750178277,Prokaryotic Promoter Database,0.956242067,Prokaryotic Promoter Database,0.956242067,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/2/2021 +25382819,http://vulcan.cs.uga.edu/prokino,"ProKinO: a unified resource for mining the cancer kinome. Protein kinases represent a large and diverse family of evolutionarily related proteins that are abnormally regulated in human cancers. Although genome sequencing studies have revealed thousands of variants in protein kinases, translating ""big"" genomic data into biological knowledge remains a challenge. Here, we describe an ontological framework for integrating and conceptualizing diverse forms of information related to kinase activation and regulatory mechanisms in a machine readable, human understandable form. We demonstrate the utility of this framework in analyzing the cancer kinome, and in generating testable hypotheses for experimental studies. Through the iterative process of aggregate ontology querying, hypothesis generation and experimental validation, we identify a novel mutational hotspot in the αC-β4 loop of the kinase domain and demonstrate the functional impact of the identified variants in epidermal growth factor receptor (EGFR) constitutive activity and inhibitor sensitivity. We provide a unified resource for the kinase and cancer community, ProKinO, housed at http://vulcan.cs.uga.edu/prokino.",ProKinO,0.995589495,NA,0,ProKinO,0.995589495,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2015 +21781326,http://nucleix.mbu.iisc.ernet.in/prombase,"PromBase: a web resource for various genomic features and predicted promoters in prokaryotic genomes. Background As more and more genomes are being sequenced, an overview of their genomic features and annotation of their functional elements, which control the expression of each gene or transcription unit of the genome, is a fundamental challenge in genomics and bioinformatics. Findings Relative stability of DNA sequence has been used to predict promoter regions in 913 microbial genomic sequences with GC-content ranging from 16.6% to 74.9%. Irrespective of the genome GC-content the relative stability based promoter prediction method has already been proven to be robust in terms of recall and precision. The predicted promoter regions for the 913 microbial genomes have been accumulated in a database called PromBase. Promoter search can be carried out in PromBase either by specifying the gene name or the genomic position. Each predicted promoter region has been assigned to a reliability class (low, medium, high, very high and highest) based on the difference between its average free energy and the downstream region. The recall and precision values for each class are shown graphically in PromBase. In addition, PromBase provides detailed information about base composition, CDS and CG/TA skews for each genome and various DNA sequence dependent structural properties (average free energy, curvature and bendability) in the vicinity of all annotated translation start sites (TLS). Conclusion PromBase is a database, which contains predicted promoter regions and detailed analysis of various genomic features for 913 microbial genomes. PromBase can serve as a valuable resource for comparative genomics study and help the experimentalist to rapidly access detailed information on various genomic features and putative promoter regions in any given genome. This database is freely accessible for academic and non- academic users via the worldwide web http://nucleix.mbu.iisc.ernet.in/prombase/.",PromBase,0.990113139,NA,0,PromBase,0.990113139,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/22/2011 +33196798,http://bioinformatics.charite.de/promiscuous2,"PROMISCUOUS 2.0: a resource for drug-repositioning. The development of new drugs for diseases is a time-consuming, costly and risky process. In recent years, many drugs could be approved for other indications. This repurposing process allows to effectively reduce development costs, time and, ultimately, save patients' lives. During the ongoing COVID-19 pandemic, drug repositioning has gained widespread attention as a fast opportunity to find potential treatments against the newly emerging disease. In order to expand this field to researchers with varying levels of experience, we made an effort to open it to all users (meaning novices as well as experts in cheminformatics) by significantly improving the entry-level user experience. The browsing functionality can be used as a global entry point to collect further information with regards to small molecules (∼1 million), side-effects (∼110 000) or drug-target interactions (∼3 million). The drug-repositioning tab for small molecules will also suggest possible drug-repositioning opportunities to the user by using structural similarity measurements for small molecules using two different approaches. Additionally, using information from the Promiscuous 2.0 Database, lists of candidate drugs for given indications were precomputed, including a section dedicated to potential treatments for COVID-19. All the information is interconnected by a dynamic network-based visualization to identify new indications for available compounds. Promiscuous 2.0 is unique in its functionality and is publicly available at http://bioinformatics.charite.de/promiscuous2.",Promiscuous,0.892922521,NA,0,Promiscuous,0.892922521,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +32358997,http://ccrcc.cptac-network-view.org,"ProNetView-ccRCC: A Web-Based Portal to Interactively Explore Clear Cell Renal Cell Carcinoma Proteogenomics Networks. To better understand the molecular basis of cancer, the NCI's Clinical Proteomics Tumor Analysis Consortium (CPTAC) has been performing comprehensive large-scale proteogenomic characterizations of multiple cancer types. Gene and protein regulatory networks are subsequently being derived based on these proteogenomic profiles, which serve as tools to gain systems-level understanding of the molecular regulatory factories underlying these diseases. On the other hand, it remains a challenge to effectively visualize and navigate the resulting network models, which capture higher order structures in the proteogenomic profiles. There is a pressing need to have a new open community resource tool for intuitive visual exploration, interpretation, and communication of these gene/protein regulatory networks by the cancer research community. In this work, ProNetView-ccRCC (http://ccrcc.cptac-network-view.org/), an interactive web-based network exploration portal for investigating phosphopeptide co-expression network inferred based on the CPTAC clear cell renal cell carcinoma (ccRCC) phosphoproteomics data is introduced. ProNetView-ccRCC enables quick, user-intuitive visual interactions with the ccRCC tumor phosphoprotein co-expression network comprised of 3614 genes, as well as 30 functional pathway-enriched network modules. Users can interact with the network portal and can conveniently query for association between abundance of each phosphopeptide in the network and clinical variables such as tumor grade.",ProNetView-ccRCC,0.936549442,NA,0,ProNetView-ccRCC,0.936549442,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/27/2020 +26089836,http://tagc.univ-mrs.fr/pronto,"PrOnto database : GO term functional dissimilarity inferred from biological data. Moonlighting proteins are defined by their involvement in multiple, unrelated functions. The computational prediction of such proteins requires a formal method of assessing the similarity of cellular processes, for example, by identifying dissimilar Gene Ontology terms. While many measures of Gene Ontology term similarity exist, most depend on abstract mathematical analyses of the structure of the GO tree and do not necessarily represent the underlying biology. Here, we propose two metrics of GO term functional dissimilarity derived from biological information, one based on the protein annotations and the other on the interactions between proteins. They have been collected in the PrOnto database, a novel tool which can be of particular use for the identification of moonlighting proteins. The database can be queried via an web-based interface which is freely available at http://tagc.univ-mrs.fr/pronto.",PrOnto,0.986176848,NA,0,PrOnto,0.986176848,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/3/2015 +22096236,http://operons.ibt.unam.mx/OperonPredictor,"ProOpDB: Prokaryotic Operon DataBase. The Prokaryotic Operon DataBase (ProOpDB, http://operons.ibt.unam.mx/OperonPredictor) constitutes one of the most precise and complete repositories of operon predictions now available. Using our novel and highly accurate operon identification algorithm, we have predicted the operon structures of more than 1200 prokaryotic genomes. ProOpDB offers diverse alternatives by which a set of operon predictions can be retrieved including: (i) organism name, (ii) metabolic pathways, as defined by the KEGG database, (iii) gene orthology, as defined by the COG database, (iv) conserved protein domains, as defined by the Pfam database, (v) reference gene and (vi) reference operon, among others. In order to limit the operon output to non-redundant organisms, ProOpDB offers an efficient method to select the most representative organisms based on a precompiled phylogenetic distances matrix. In addition, the ProOpDB operon predictions are used directly as the input data of our Gene Context Tool to visualize their genomic context and retrieve the sequence of their corresponding 5' regulatory regions, as well as the nucleotide or amino acid sequences of their genes.",ProOpDB,0.997804239,Prokaryotic Operon DataBase,0.958016768,ProOpDB,0.997804239,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/16/2011 +33388027,http://bioinfo.dcc.ufmg.br/propedia,"Propedia: a database for protein-peptide identification based on a hybrid clustering algorithm. Background Protein-peptide interactions play a fundamental role in a wide variety of biological processes, such as cell signaling, regulatory networks, immune responses, and enzyme inhibition. Peptides are characterized by low toxicity and small interface areas; therefore, they are good targets for therapeutic strategies, rational drug planning and protein inhibition. Approximately 10% of the ethical pharmaceutical market is protein/peptide-based. Furthermore, it is estimated that 40% of protein interactions are mediated by peptides. Despite the fast increase in the volume of biological data, particularly on sequences and structures, there remains a lack of broad and comprehensive protein-peptide databases and tools that allow the retrieval, characterization and understanding of protein-peptide recognition and consequently support peptide design. Results We introduce Propedia, a comprehensive and up-to-date database with a web interface that permits clustering, searching and visualizing of protein-peptide complexes according to varied criteria. Propedia comprises over 19,000 high-resolution structures from the Protein Data Bank including structural and sequence information from protein-peptide complexes. The main advantage of Propedia over other peptide databases is that it allows a more comprehensive analysis of similarity and redundancy. It was constructed based on a hybrid clustering algorithm that compares and groups peptides by sequences, interface structures and binding sites. Propedia is available through a graphical, user-friendly and functional interface where users can retrieve, and analyze complexes and download each search data set. We performed case studies and verified that the utility of Propedia scores to rank promissing interacting peptides. In a study involving predicting peptides to inhibit SARS-CoV-2 main protease, we showed that Propedia scores related to similarity between different peptide complexes with SARS-CoV-2 main protease are in agreement with molecular dynamics free energy calculation. Conclusions Propedia is a database and tool to support structure-based rational design of peptides for special purposes. Protein-peptide interactions can be useful to predict, classifying and scoring complexes or for designing new molecules as well. Propedia is up-to-date as a ready-to-use webserver with a friendly and resourceful interface and is available at: https://bioinfo.dcc.ufmg.br/propedia.",Propedia,0.98778069,NA,0,Propedia,0.98778069,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/2/2021 +26450949,http://propepper.net,"ProPepper: a curated database for identification and analysis of peptide and immune-responsive epitope composition of cereal grain protein families. . ProPepper is a database that contains prolamin proteins identified from true grasses (Poaceae), their peptides obtained with single- and multi-enzyme in silico digestions as well as linear T- and B-cell-specific epitopes that are responsible for wheat-related food disorders. The integrated database and analysis platform contains datasets that are collected from multiple public databases (UniprotKB, IEDB, NCBI GenBank), manually curated and annotated, and interpreted in three main data tables: Protein-, Peptide- and Epitope list views that are cross-connected by unique identifications. Altogether 21 genera and 80 different species are represented. Currently, the database contains 2146 unique and complete protein sequences related to 2618 GenBank entries and 35 657 unique peptide sequences that are a result of 575 110 unique digestion events obtained by in silico digestion methods involving six proteolytic enzymes and their combinations. The interface allows advanced global and parametric search functions along with a download option, with direct connections to the relevant public databases. Database URL: https://propepper.net.",ProPepper,0.996425271,NA,0,ProPepper,0.996425271,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/8/2015 +22102570,http://proportal.mit.edu,"ProPortal: a resource for integrated systems biology of Prochlorococcus and its phage. ProPortal (http://proportal.mit.edu/) is a database containing genomic, metagenomic, transcriptomic and field data for the marine cyanobacterium Prochlorococcus. Our goal is to provide a source of cross-referenced data across multiple scales of biological organization--from the genome to the ecosystem--embracing the full diversity of ecotypic variation within this microbial taxon, its sister group, Synechococcus and phage that infect them. The site currently contains the genomes of 13 Prochlorococcus strains, 11 Synechococcus strains and 28 cyanophage strains that infect one or both groups. Cyanobacterial and cyanophage genes are clustered into orthologous groups that can be accessed by keyword search or through a genome browser. Users can also identify orthologous gene clusters shared by cyanobacterial and cyanophage genomes. Gene expression data for Prochlorococcus ecotypes MED4 and MIT9313 allow users to identify genes that are up or downregulated in response to environmental stressors. In addition, the transcriptome in synchronized cells grown on a 24-h light-dark cycle reveals the choreography of gene expression in cells in a 'natural' state. Metagenomic sequences from the Global Ocean Survey from Prochlorococcus, Synechococcus and phage genomes are archived so users can examine the differences between populations from diverse habitats. Finally, an example of cyanobacterial population data from the field is included.",ProPortal,0.995003283,NA,0,ProPortal,0.995003283,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2011 +22102581,http://prorepeat.bioinformatics.nl,"ProRepeat: an integrated repository for studying amino acid tandem repeats in proteins. ProRepeat (http://prorepeat.bioinformatics.nl/) is an integrated curated repository and analysis platform for in-depth research on the biological characteristics of amino acid tandem repeats. ProRepeat collects repeats from all proteins included in the UniProt knowledgebase, together with 85 completely sequenced eukaryotic proteomes contained within the RefSeq collection. It contains non-redundant perfect tandem repeats, approximate tandem repeats and simple, low-complexity sequences, covering the majority of the amino acid tandem repeat patterns found in proteins. The ProRepeat web interface allows querying the repeat database using repeat characteristics like repeat unit and length, number of repetitions of the repeat unit and position of the repeat in the protein. Users can also search for repeats by the characteristics of repeat containing proteins, such as entry ID, protein description, sequence length, gene name and taxon. ProRepeat offers powerful analysis tools for finding biological interesting properties of repeats, such as the strong position bias of leucine repeats in the N-terminus of eukaryotic protein sequences, the differences of repeat abundance among proteomes, the functional classification of repeat containing proteins and GC content constrains of repeats' corresponding codons.",ProRepeat,0.997152328,NA,0,ProRepeat,0.997152328,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2011 +29575358,http://protabank.org,"ProtaBank: A repository for protein design and engineering data. We present ProtaBank, a repository for storing, querying, analyzing, and sharing protein design and engineering data in an actively maintained and updated database. ProtaBank provides a format to describe and compare all types of protein mutational data, spanning a wide range of properties and techniques. It features a user-friendly web interface and programming layer that streamlines data deposition and allows for batch input and queries. The database schema design incorporates a standard format for reporting protein sequences and experimental data that facilitates comparison of results across different data sets. A suite of analysis and visualization tools are provided to facilitate discovery, to guide future designs, and to benchmark and train new predictive tools and algorithms. ProtaBank will provide a valuable resource to the protein engineering community by storing and safeguarding newly generated data, allowing for fast searching and identification of relevant data from the existing literature, and exploring correlations between disparate data sets. ProtaBank invites researchers to contribute data to the database to make it accessible for search and analysis. ProtaBank is available at https://protabank.org.",ProtaBank,0.99739188,NA,0,ProtaBank,0.99739188,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/30/2018 +33010159,http://cadd.zju.edu.cn/protacdb,"PROTAC-DB: an online database of PROTACs. Proteolysis-targeting chimeras (PROTACs), which selectively degrade targeted proteins by the ubiquitin-proteasome system, have emerged as a novel therapeutic technology with potential advantages over traditional inhibition strategies. In the past few years, this technology has achieved substantial progress and two PROTACs have been advanced into phase I clinical trials. However, this technology is still maturing and the design of PROTACs remains a great challenge. In order to promote the rational design of PROTACs, we present PROTAC-DB, a web-based open-access database that integrates structural information and experimental data of PROTACs. Currently, PROTAC-DB consists of 1662 PROTACs, 202 warheads (small molecules that target the proteins of interest), 65 E3 ligands (small molecules capable of recruiting E3 ligases) and 806 linkers, as well as their chemical structures, biological activities, and physicochemical properties. Except the biological activities of warheads and E3 ligands, PROTAC-DB also provides the degradation capacities, binding affinities and cellular activities for PROTACs. PROTAC-DB can be queried with two general searching approaches: text-based (target name, compound name or ID) and structure-based. In addition, for the convenience of users, a filtering tool for the searching results based on the physicochemical properties of compounds is also offered. PROTAC-DB is freely accessible at http://cadd.zju.edu.cn/protacdb/.",PROTAC-DB,0.975279614,NA,0,PROTAC-DB,0.975279614,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +31844049,http://protargetminer.genexplain.com,"ProTargetMiner as a proteome signature library of anticancer molecules for functional discovery. Deconvolution of targets and action mechanisms of anticancer compounds is fundamental in drug development. Here, we report on ProTargetMiner as a publicly available expandable proteome signature library of anticancer molecules in cancer cell lines. Based on 287 A549 adenocarcinoma proteomes affected by 56 compounds, the main dataset contains 7,328 proteins and 1,307,859 refined protein-drug pairs. These proteomic signatures cluster by compound targets and action mechanisms. The targets and mechanistic proteins are deconvoluted by partial least square modeling, provided through the website http://protargetminer.genexplain.com. For 9 molecules representing the most diverse mechanisms and the common cancer cell lines MCF-7, RKO and A549, deep proteome datasets are obtained. Combining data from the three cell lines highlights common drug targets and cell-specific differences. The database can be easily extended and merged with new compound signatures. ProTargetMiner serves as a chemical proteomics resource for the cancer research community, and can become a valuable tool in drug discovery.",ProTargetMiner,0.99520582,NA,0,ProTargetMiner,0.99520582,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/16/2019 +22110041,http://pcidb.russelllab.org,"ProtChemSI: a network of protein-chemical structural interactions. Progress in structure determination methods means that the set of experimentally determined 3D structures of proteins in complex with small molecules is growing exponentially. ProtChemSI exploits and extends this useful set of structures by both collecting and annotating the existing data as well as providing models of potential complexes inferred by protein or chemical structure similarity. The database currently includes 7704 proteins from 1803 organisms, 11,324 chemical compounds and 202, 289 complexes including 178,974 predicted. It is publicly available at http://pcidb.russelllab.org.",ProtChemSI,0.998405278,NA,0,ProtChemSI,0.998405278,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2011 +29377907,http://profiles.bs.ipm.ir/softwares/protdatatherm,"ProtDataTherm: A database for thermostability analysis and engineering of proteins. Protein thermostability engineering is a powerful tool to improve resistance of proteins against high temperatures and thereafter broaden their applications. For efficient protein thermostability engineering, different thermostability-classified data sources including sequences and 3D structures are needed for different protein families. However, no data source is available providing such data easily. It is the first release of ProtDataTherm database for analysis and engineering of protein thermostability which contains more than 14 million protein sequences categorized based on their thermal stability and protein family. This database contains data needed for better understanding protein thermostability and stability engineering. Providing categorized protein sequences and structures as psychrophilic, mesophilic and thermophilic makes this database useful for the development of new tools in protein stability prediction. This database is available at http://profiles.bs.ipm.ir/softwares/protdatatherm. As a proof of concept, the thermostability that improves mutations were suggested for one sample protein belonging to one of protein families with more than 20 mesophilic and thermophilic sequences and with known experimentally measured ΔT of mutations available within ProTherm database.",ProtDataTherm,0.960978508,NA,0,ProtDataTherm,0.960978508,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/29/2018 +25252780,http://tarshish.md.biu.ac.il/Ã,"PHI-DAC: protein homology database through dihedral angle conservation. Unlabelled Finding related conformations in the Protein Data Bank is essential in many areas of bioscience. To assist this task, we designed a dihedral angle database for searching protein segment homologs. The search engine relies on encoding of the protein coordinates into text characters representing amino acid sequence, φ and ψ dihedral angles. The search engine is advantageous owing to its high speed and interactive nature and is expected to assist scientists in discovering conformation homologs and evolutionary kinship. The search engine is fast, with query times lasting a few seconds, and freely available at http://tarshish.md.biu.ac.il/∼samsona. Supplementary information Supplementary data are available at Bioinformatics online.",NA,0,protein homology database,0.7521066,protein homology database,0.7521066,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,9/23/2014 +30985146,http://massive.ucsd.edu/ProteoSAFe/protein_explorer_splash.jsp,"ProteinExplorer: A Repository-Scale Resource for Exploration of Protein Detection in Public Mass Spectrometry Data Sets. High-throughput tandem mass spectrometry has enabled the detection and identification of over 75% of all proteins predicted to result in translated gene products in the human genome. In fact, the galloping rate of data acquisition and sharing of mass spectrometry data has led to the current availability of many tens of terabytes of public data in thousands of human data sets. The systematic reanalysis of these public data sets has been used to build a community-scale spectral library of 2.1 million precursors for over 1 million unique sequences from over 19,000 proteins (including spectra of synthetic peptides). However, it has remained challenging to find and inspect spectra of peptides covering functional protein regions or matching novel proteins. ProteinExplorer addresses these challenges with an intuitive interface mapping tens of millions of identifications to functional sites on nearly all human proteins while maintaining provenance for every identification back to the original data set and data file. Additionally, ProteinExplorer facilitates the selection and inspection of HPP-compliant peptides whose spectra can be matched to spectra of synthetic peptides and already includes HPP-compliant evidence for 107 missing (PE2, PE3, and PE4) and 23 dubious (PE5) proteins. Finally, ProteinExplorer allows users to rate spectra and to contribute to a community library of peptides entitled PrEdict (Protein Existance dictionary) mapping to novel proteins but whose preliminary identities have not yet been fully established with community-scale false discovery rates and synthetic peptide spectra. ProteinExplorer can be now be accessed at https://massive.ucsd.edu/ProteoSAFe/protein_explorer_splash.jsp .",ProteinExplorer,0.992175162,NA,0,ProteinExplorer,0.992175162,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/15/2018 +29040688,http://iomics.ugent.be/tabloidproteome,"The online Tabloid Proteome: an annotated database of protein associations. A complete knowledge of the proteome can only be attained by determining the associations between proteins, along with the nature of these associations (e.g. physical contact in protein-protein interactions, participation in complex formation or different roles in the same pathway). Despite extensive efforts in elucidating direct protein interactions, our knowledge on the complete spectrum of protein associations remains limited. We therefore developed a new approach that detects protein associations from identifications obtained after re-processing of large-scale, public mass spectrometry-based proteomics data. Our approach infers protein association based on the co-occurrence of proteins across many different proteomics experiments, and provides information that is almost completely complementary to traditional direct protein interaction studies. We here present a web interface to query and explore the associations derived from this method, called the online Tabloid Proteome. The online Tabloid Proteome also integrates biological knowledge from several existing resources to annotate our derived protein associations. The online Tabloid Proteome is freely available through a user-friendly web interface, which provides intuitive navigation and data exploration options for the user at http://iomics.ugent.be/tabloidproteome.",NA,0,Proteome,0.495420039,Proteome,0.495420039,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2018 +27789699,http://isoelectricpointdb.org,"Proteome-pI: proteome isoelectric point database. Proteome-pI is an online database containing information about predicted isoelectric points for 5029 proteomes calculated using 18 methods. The isoelectric point, the pH at which a particular molecule carries no net electrical charge, is an important parameter for many analytical biochemistry and proteomics techniques, especially for 2D gel electrophoresis (2D-PAGE), capillary isoelectric focusing, liquid chromatography-mass spectrometry and X-ray protein crystallography. The database, available at http://isoelectricpointdb.org allows the retrieval of virtual 2D-PAGE plots and the development of customised fractions of proteome based on isoelectric point and molecular weight. Moreover, Proteome-pI facilitates statistical comparisons of the various prediction methods as well as biological investigation of protein isoelectric point space in all kingdoms of life. For instance, using Proteome-pI data, it is clear that Eukaryotes, which evolved tight control of homeostasis, encode proteins with pI values near the cell pH. In contrast, Archaea living frequently in extreme environments can possess proteins with a wide range of isoelectric points. The database includes various statistics and tools for interactive browsing, searching and sorting. Apart from data for individual proteomes, datasets corresponding to major protein databases such as UniProtKB/TrEMBL and the NCBI non-redundant (nr) database have also been precalculated and made available in CSV format.",Proteome-pI,0.946856538,NA,0,Proteome-pI,0.946856538,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/26/2016 +25414335,http://proteomescout.wustl.edu,"ProteomeScout: a repository and analysis resource for post-translational modifications and proteins. ProteomeScout (https://proteomescout.wustl.edu) is a resource for the study of proteins and their post-translational modifications (PTMs) consisting of a database of PTMs, a repository for experimental data, an analysis suite for PTM experiments, and a tool for visualizing the relationships between complex protein annotations. The PTM database is a compendium of public PTM data, coupled with user-uploaded experimental data. ProteomeScout provides analysis tools for experimental datasets, including summary views and subset selection, which can identify relationships within subsets of data by testing for statistically significant enrichment of protein annotations. Protein annotations are incorporated in the ProteomeScout database from external resources and include terms such as Gene Ontology annotations, domains, secondary structure and non-synonymous polymorphisms. These annotations are available in the database download, in the analysis tools and in the protein viewer. The protein viewer allows for the simultaneous visualization of annotations in an interactive web graphic, which can be exported in Scalable Vector Graphics (SVG) format. Finally, quantitative data measurements associated with public experiments are also easily viewable within protein records, allowing researchers to see how PTMs change across different contexts. ProteomeScout should prove useful for protein researchers and should benefit the proteomics community by providing a stable repository for PTM experiments.",ProteomeScout,0.997024179,NA,0,ProteomeScout,0.997024179,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2014 +33995478,http://www.proteomexchange.org,"Construction of Unified Human Antimicrobial and Immunomodulatory Peptide Database and Examination of Antimicrobial and Immunomodulatory Peptides in Alzheimer's Disease Using Network Analysis of Proteomics Datasets. The reanalysis of genomics and proteomics datasets by bioinformatics approaches is an appealing way to examine large amounts of reliable data. This can be especially true in cases such as Alzheimer's disease, where the access to biological samples, along with well-defined patient information can be challenging. Considering the inflammatory part of Alzheimer's disease, our aim was to examine the presence of antimicrobial and immunomodulatory peptides in human proteomic datasets deposited in the publicly available proteomics database ProteomeXchange (http://www.proteomexchange.org/). First, a unified, comprehensive human antimicrobial and immunomodulatory peptide database, containing all known human antimicrobial and immunomodulatory peptides was constructed and used along with the datasets containing high-quality proteomics data originating from the examination of Alzheimer's disease and control groups. A throughout network analysis was carried out, and the enriched GO functions were examined. Less than 1% of all identified proteins in the brain were antimicrobial and immunomodulatory peptides, but the alterations characteristic of Alzheimer's disease could be recapitulated with their analysis. Our data emphasize the key role of the innate immune system and blood clotting in the development of Alzheimer's disease. The central role of antimicrobial and immunomodulatory peptides suggests their utilization as potential targets for mechanistic studies and future therapies.",ProteomeXchange,0.762454331,NA,0,ProteomeXchange,0.762454331,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,4/28/2021 +26316313,http://cancerproteomics.uio.no,"Consolidation of proteomics data in the Cancer Proteomics database. Cancer is a class of diseases characterized by abnormal cell growth and one of the major reasons for human deaths. Proteins are involved in the molecular mechanisms leading to cancer, furthermore they are affected by anti-cancer drugs, and protein biomarkers can be used to diagnose certain cancer types. Therefore, it is important to explore the proteomics background of cancer. In this report, we developed the Cancer Proteomics database to re-interrogate published proteome studies investigating cancer. The database is divided in three sections related to cancer processes, cancer types, and anti-cancer drugs. Currently, the Cancer Proteomics database contains 9778 entries of 4118 proteins extracted from 143 scientific articles covering all three sections: cell death (cancer process), prostate cancer (cancer type) and platinum-based anti-cancer drugs including carboplatin, cisplatin, and oxaliplatin (anti-cancer drugs). The detailed information extracted from the literature includes basic information about the articles (e.g., PubMed ID, authors, journal name, publication year), information about the samples (type, study/reference, prognosis factor), and the proteomics workflow (Subcellular fractionation, protein, and peptide separation, mass spectrometry, quantification). Useful annotations such as hyperlinks to UniProt and PubMed were included. In addition, many filtering options were established as well as export functions. The database is freely available at http://cancerproteomics.uio.no.",Proteomics,0.714998305,NA,0,Proteomics,0.714998305,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/26/2015 +"29106664, 31665479",http://www.ProteomicsDB.org,"ProteomicsDB. ProteomicsDB (https://www.ProteomicsDB.org) is a protein-centric in-memory database for the exploration of large collections of quantitative mass spectrometry-based proteomics data. ProteomicsDB was first released in 2014 to enable the interactive exploration of the first draft of the human proteome. To date, it contains quantitative data from 78 projects totalling over 19k LC-MS/MS experiments. A standardized analysis pipeline enables comparisons between multiple datasets to facilitate the exploration of protein expression across hundreds of tissues, body fluids and cell lines. We recently extended the data model to enable the storage and integrated visualization of other quantitative omics data. This includes transcriptomics data from e.g. NCBI GEO, protein-protein interaction information from STRING, functional annotations from KEGG, drug-sensitivity/selectivity data from several public sources and reference mass spectra from the ProteomeTools project. The extended functionality transforms ProteomicsDB into a multi-purpose resource connecting quantification and meta-data for each protein. The rich user interface helps researchers to navigate all data sources in either a protein-centric or multi-protein-centric manner. Several options are available to download data manually, while our application programming interface enables accessing quantitative data systematically.",ProteomicsDB,0.997059524,NA,0,ProteomicsDB,0.997059524,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +21536137,http://www.proteopedia.org,"Proteopedia: a status report on the collaborative, 3D web-encyclopedia of proteins and other biomolecules. Proteopedia is a collaborative, 3D web-encyclopedia of protein, nucleic acid and other biomolecule structures. Created as a means for communicating biomolecule structures to a diverse scientific audience, Proteopedia (http://www.proteopedia.org) presents structural annotation in an intuitive, interactive format and allows members of the scientific community to easily contribute their own annotations. Here, we provide a status report on Proteopedia by describing advances in the web resource since its inception three and a half years ago, focusing on features of potential direct use to the scientific community. We discuss its progress as a collaborative 3D-encyclopedia of structures as well as its use as a complement to scientific publications and PowerPoint presentations. We also describe Proteopedia's use for 3D visualization in structure-related pedagogy.",Proteopedia,0.995484591,NA,0,Proteopedia,0.995484591,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/23/2011 +27115628,http://www.abren.net/protherm,"Applications of Protein Thermodynamic Database for Understanding Protein Mutant Stability and Designing Stable Mutants. Protein stability is the free energy difference between unfolded and folded states of a protein, which lies in the range of 5-25 kcal/mol. Experimentally, protein stability is measured with circular dichroism, differential scanning calorimetry, and fluorescence spectroscopy using thermal and denaturant denaturation methods. These experimental data have been accumulated in the form of a database, ProTherm, thermodynamic database for proteins and mutants. It also contains sequence and structure information of a protein, experimental methods and conditions, and literature information. Different features such as search, display, and sorting options and visualization tools have been incorporated in the database. ProTherm is a valuable resource for understanding/predicting the stability of proteins and it can be accessed at http://www.abren.net/protherm/ . ProTherm has been effectively used to examine the relationship among thermodynamics, structure, and function of proteins. We describe the recent progress on the development of methods for understanding/predicting protein stability, such as (1) general trends on mutational effects on stability, (2) relationship between the stability of protein mutants and amino acid properties, (3) applications of protein three-dimensional structures for predicting their stability upon point mutations, (4) prediction of protein stability upon single mutations from amino acid sequence, and (5) prediction methods for addressing double mutants. A list of online resources for predicting has also been provided.",ProTherm,0.994540989,NA,0,ProTherm,0.994540989,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2016 +"33196841, 34826364",http://web.iitm.ac.in/bioinfo2/prothermdb/index.html,"ProThermDB: thermodynamic database for proteins and mutants revisited after 15 years. ProThermDB is an updated version of the thermodynamic database for proteins and mutants (ProTherm), which has ∼31 500 data on protein stability, an increase of 84% from the previous version. It contains several thermodynamic parameters such as melting temperature, free energy obtained with thermal and denaturant denaturation, enthalpy change and heat capacity change along with experimental methods and conditions, sequence, structure and literature information. Besides, the current version of the database includes about 120 000 thermodynamic data obtained for different organisms and cell lines, which are determined by recent high throughput proteomics techniques using whole-cell approaches. In addition, we provided a graphical interface for visualization of mutations at sequence and structure levels. ProThermDB is cross-linked with other relevant databases, PDB, UniProt, PubMed etc. It is freely available at https://web.iitm.ac.in/bioinfo2/prothermdb/index.html without any login requirements. It is implemented in Python, HTML and JavaScript, and supports the latest versions of major browsers, such as Firefox, Chrome and Safari.",ProThermDB,0.998294175,NA,0,ProThermDB,0.998294175,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2021 +25911153,http://www.protobug.cs.huji.ac.il,"ProtoBug: functional families from the complete proteomes of insects. ProtoBug (http://www.protobug.cs.huji.ac.il) is a database and resource of protein families in Arthropod genomes. ProtoBug platform presents the relatedness of complete proteomes from 17 insects as well as a proteome of the crustacean, Daphnia pulex. The represented proteomes from insects include louse, bee, beetle, ants, flies and mosquitoes. Based on an unsupervised clustering method, protein sequences were clustered into a hierarchical tree, called ProtoBug. ProtoBug covers about 300,000 sequences that are partitioned to families. At the default setting, all sequences are partitioned to ∼20,000 families (excluding singletons). From the species perspective, each of the 18 analysed proteomes is composed of 5000-8000 families. In the regime of the advanced operational mode, the ProtoBug provides rich navigation capabilities for touring the hierarchy of the families at any selected resolution. A proteome viewer shows the composition of sequences from any of the 18 analysed proteomes. Using functional annotation from an expert system (Pfam) we assigned domains, families and repeats by 4400 keywords that cover 73% of the sequences. A strict inference protocol is applied for expanding the functional knowledge. Consequently, secured annotations were associated with 81% of the proteins, and with 70% of the families (≥10 proteins each). ProtoBug is a database and webtool with rich visualization and navigation tools. The properties of each family in relation to other families in the ProtoBug tree, and in view of the taxonomy composition are reported. Furthermore, the user can paste its own sequences to find relatedness to any of the ProtoBug families. The database and the navigation tools are the basis for functional discoveries that span 350 million years of evolution of Arthropods. ProtoBug is available with no restriction at: www.protobug.cs.huji.ac.il. Database URL: www.protobug.cs.huji.ac.il",ProtoBug,0.979299188,NA,0,ProtoBug,0.979299188,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/24/2015 +28498885,http://www.iitm.ac.in/bioinfo/PROXiMATE,"PROXiMATE: a database of mutant protein-protein complex thermodynamics and kinetics. Summary We have developed PROXiMATE, a database of thermodynamic data for more than 6000 missense mutations in 174 heterodimeric protein-protein complexes, supplemented with interaction network data from STRING database, solvent accessibility, sequence, structural and functional information, experimental conditions and literature information. Additional features include complex structure visualization, search and display options, download options and a provision for users to upload their data. Availability and implementation The database is freely available at http://www.iitm.ac.in/bioinfo/PROXiMATE/ . The website is implemented in Python, and supports recent versions of major browsers such as IE10, Firefox, Chrome and Opera. Contact gromiha@iitm.ac.in. Supplementary information Supplementary data are available at Bioinformatics online.",PROXiMATE,0.997301996,NA,0,PROXiMATE,0.997301996,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2017 +31796964,http://scbb.ihbt.res.in/PRP,"Plant Regulomics Portal (PRP): a comprehensive integrated regulatory information and analysis portal for plant genomes. . Gene regulation is a highly complex and networked phenomenon where multiple tiers of control determine the cell state in a spatio-temporal manner. Among these, the transcription factors, DNA and histone modifications, and post-transcriptional control by small RNAs like miRNAs serve as major regulators. An understanding of the integrative and spatio-temporal impact of these regulatory factors can provide better insights into the state of a 'cell system'. Yet, there are limited resources available to this effect. Therefore, we hereby report an integrative information portal (Plant Regulomics Portal; PRP) for plants for the first time. The portal has been developed by integrating a huge amount of curated data from published sources, RNA-, methylome- and sRNA/miRNA sequencing, histone modifications and repeats, gene ontology, digital gene expression and characterized pathways. The key features of the portal include a regulatory search engine for fetching numerous analytical outputs and tracks of the abovementioned regulators and also a genome browser for integrated visualization of the search results. It also has numerous analytical features for analyses of transcription factors (TFs) and sRNA/miRNA, spot-specific methylation, gene expression and interactions and details of pathways for any given genomic element. It can also provide information on potential RdDM regulation, while facilitating enrichment analysis, generation of visually rich plots and downloading of data in a selective manner. Visualization of intricate biological networks is an important feature which utilizes the Neo4j Graph database making analysis of relationships and long-range system viewing possible. Till date, PRP hosts 571-GB processed data for four plant species namely Arabidopsis thaliana, Oryza sativa subsp. japonica, Zea mays and Glycine max. Database URL: https://scbb.ihbt.res.in/PRP.",PRP,0.96961385,Plant Regulomics Portal,0.915672481,PRP,0.96961385,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +22669905,http://pocket.uchicago.edu/psc,"PSC: protein surface classification. We recently proposed to classify proteins by their functional surfaces. Using the structural attributes of functional surfaces, we inferred the pairwise relationships of proteins and constructed an expandable database of protein surface classification (PSC). As the functional surface(s) of a protein is the local region where the protein performs its function, our classification may reflect the functional relationships among proteins. Currently, PSC contains a library of 1974 surface types that include 25,857 functional surfaces identified from 24,170 bound structures. The search tool in PSC empowers users to explore related surfaces that share similar local structures and core functions. Each functional surface is characterized by structural attributes, which are geometric, physicochemical or evolutionary features. The attributes have been normalized as descriptors and integrated to produce a profile for each functional surface in PSC. In addition, binding ligands are recorded for comparisons among homologs. PSC allows users to exploit related binding surfaces to reveal the changes in functionally important residues on homologs that have led to functional divergence during evolution. The substitutions at the key residues of a spatial pattern may determine the functional evolution of a protein. In PSC (http://pocket.uchicago.edu/psc/), a pool of changes in residues on similar functional surfaces is provided.",PSC,0.840825796,protein surface classification,0.706926028,PSC,0.840825796,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/4/2012 +22080505,http://idp1.force.cs.is.nagoya-u.ac.jp/pscdb,"PSCDB: a database for protein structural change upon ligand binding. Proteins are flexible molecules that undergo structural changes to function. The Protein Data Bank contains multiple entries for identical proteins determined under different conditions, e.g. with and without a ligand molecule, which provides important information for understanding the structural changes related to protein functions. We gathered 839 protein structural pairs of ligand-free and ligand-bound states from monomeric or homo-dimeric proteins, and constructed the Protein Structural Change DataBase (PSCDB). In the database, we focused on whether the motions were coupled with ligand binding. As a result, the protein structural changes were classified into seven classes, i.e. coupled domain motion (59 structural changes), independent domain motion (70), coupled local motion (125), independent local motion (135), burying ligand motion (104), no significant motion (311) and other type motion (35). PSCDB provides lists of each class. On each entry page, users can view detailed information about the motion, accompanied by a morphing animation of the structural changes. PSCDB is available at http://idp1.force.cs.is.nagoya-u.ac.jp/pscdb/.",PSCDB,0.963445306,Protein Structural Change DataBase,0.849862774,PSCDB,0.963445306,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/10/2011 +32345779,http://bicresources.jcbose.ac.in,"PSCRIdb: A database of regulatory interactions and networks of pluripotent stem cell lines. . Pluripotency in stem cells is regulated by a complex network between the transcription factors, signaling molecules, mRNAs, and epigenetic regulators like non-coding RNAs. Different pluripotent stem cell (PSC) lines were isolated and characterized to study the regulatory network topology to understand the mechanism that control developmental potential of pluripotent cells. PSCRIdb is a manually curated database of regulatory interactions including protein-protein, protein-DNA, gene-gene, and miRNA-mRNA interactions in mouse and human pluripotent stem cells including embryonic stem cells and embryonic carcinoma cells. At present, 22 different mouse and human pluripotent stem-cell-line-specific regulatory interactions are compiled in the database. Detailed information of the four types of interaction data are presented in tabular format and graphical network view in Cytoscape layout. The database is available at http://bicresources.jcbose.ac.in/ ssaha4/pscridb. The database contains 3037 entries of experimentally validated molecular interactions that can be useful for systematic study of pluripotency integrating multi-omics data. In summary, the database can be a useful resource for identification of regulatory networks present in different pluripotent stem cell lines.",PSCRIdb,0.996715069,NA,0,PSCRIdb,0.996715069,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +34122478,http://forestry.fafu.edu.cn/db/SDX,"PSDX: A Comprehensive Multi-Omics Association Database of Populus trichocarpa With a Focus on the Secondary Growth in Response to Stresses. Populus trichocarpa (P. trichocarpa) is a model tree for the investigation of wood formation. In recent years, researchers have generated a large number of high-throughput sequencing data in P. trichocarpa. However, no comprehensive database that provides multi-omics associations for the investigation of secondary growth in response to diverse stresses has been reported. Therefore, we developed a public repository that presents comprehensive measurements of gene expression and post-transcriptional regulation by integrating 144 RNA-Seq, 33 ChIP-seq, and six single-molecule real-time (SMRT) isoform sequencing (Iso-seq) libraries prepared from tissues subjected to different stresses. All the samples from different studies were analyzed to obtain gene expression, co-expression network, and differentially expressed genes (DEG) using unified parameters, which allowed comparison of results from different studies and treatments. In addition to gene expression, we also identified and deposited pre-processed data about alternative splicing (AS), alternative polyadenylation (APA) and alternative transcription initiation (ATI). The post-transcriptional regulation, differential expression, and co-expression network datasets were integrated into a new P. trichocarpa Stem Differentiating Xylem (PSDX) database (http://forestry.fafu.edu.cn/db/SDX), which further highlights gene families of RNA-binding proteins and stress-related genes. The PSDX also provides tools for data query, visualization, a genome browser, and the BLAST option for sequence-based query. Much of the data is also available for bulk download. The availability of PSDX contributes to the research related to the secondary growth in response to stresses in P. trichocarpa, which will provide new insights that can be useful for the improvement of stress tolerance in woody plants.",PSDX,0.995595694,trichocarpa Stem Differentiating,0.907250769,PSDX,0.995595694,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/20/2021 +23396300,http://pseudomap.mbc.nctu.edu.tw,"pseudoMap: an innovative and comprehensive resource for identification of siRNA-mediated mechanisms in human transcribed pseudogenes. RNA interference (RNAi) is a gene silencing process within living cells, which is controlled by the RNA-induced silencing complex with a sequence-specific manner. In flies and mice, the pseudogene transcripts can be processed into short interfering RNAs (siRNAs) that regulate protein-coding genes through the RNAi pathway. Following these findings, we construct an innovative and comprehensive database to elucidate siRNA-mediated mechanism in human transcribed pseudogenes (TPGs). To investigate TPG producing siRNAs that regulate protein-coding genes, we mapped the TPGs to small RNAs (sRNAs) that were supported by publicly deep sequencing data from various sRNA libraries and constructed the TPG-derived siRNA-target interactions. In addition, we also presented that TPGs can act as a target for miRNAs that actually regulate the parental gene. To enable the systematic compilation and updating of these results and additional information, we have developed a database, pseudoMap, capturing various types of information, including sequence data, TPG and cognate annotation, deep sequencing data, RNA-folding structure, gene expression profiles, miRNA annotation and target prediction. As our knowledge, pseudoMap is the first database to demonstrate two mechanisms of human TPGs: encoding siRNAs and decoying miRNAs that target the parental gene. pseudoMap is freely accessible at http://pseudomap.mbc.nctu.edu.tw/. Database URL: http://pseudomap.mbc.nctu.edu.tw/",pseudoMap,0.987089634,NA,0,pseudoMap,0.987089634,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/8/2013 +26578582,http://www.pseudomonas.com,"Enhanced annotations and features for comparing thousands of Pseudomonas genomes in the Pseudomonas genome database. The Pseudomonas Genome Database (http://www.pseudomonas.com) is well known for the application of community-based annotation approaches for producing a high-quality Pseudomonas aeruginosa PAO1 genome annotation, and facilitating whole-genome comparative analyses with other Pseudomonas strains. To aid analysis of potentially thousands of complete and draft genome assemblies, this database and analysis platform was upgraded to integrate curated genome annotations and isolate metadata with enhanced tools for larger scale comparative analysis and visualization. Manually curated gene annotations are supplemented with improved computational analyses that help identify putative drug targets and vaccine candidates or assist with evolutionary studies by identifying orthologs, pathogen-associated genes and genomic islands. The database schema has been updated to integrate isolate metadata that will facilitate more powerful analysis of genomes across datasets in the future. We continue to place an emphasis on providing high-quality updates to gene annotations through regular review of the scientific literature and using community-based approaches including a major new Pseudomonas community initiative for the assignment of high-quality gene ontology terms to genes. As we further expand from thousands of genomes, we plan to provide enhancements that will aid data visualization and analysis arising from whole-genome comparative studies including more pan-genome and population-based approaches.",Pseudomonas Genome,0.462780903,NA,0,Pseudomonas Genome,0.462780903,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/17/2015 +31599098,http://big.cdu.edu.cn/psmd,"PSMD: An extensive database for pan-species microsatellite investigation and marker development. Microsatellites are widely distributed throughout nearly all genomes which have been extensively exploited as powerful genetic markers for diverse applications due to their high polymorphisms. Their length variations are involved in gene regulation and implicated in numerous genetic diseases even in cancers. Although much effort has been devoted in microsatellite database construction, the existing microsatellite databases still had some drawbacks, such as limited number of species, unfriendly export format, missing marker development, lack of compound microsatellites and absence of gene annotation, which seriously restricted researchers to perform downstream analysis. In order to overcome the above limitations, we developed PSMD (Pan-Species Microsatellite Database, http://big.cdu.edu.cn/psmd/) as a web-based database to facilitate researchers to easily identify microsatellites, exploit reliable molecular markers and compare microsatellite distribution pattern on genome-wide scale. In current release, PSMD comprises 678,106,741 perfect microsatellites and 43,848,943 compound microsatellites from 18,408 organisms, which covered almost all species with available genomic data. In addition to interactive browse interface, PSMD also offers a flexible filter function for users to quickly gain desired microsatellites from large data sets. PSMD allows users to export GFF3 formatted file and CSV formatted statistical file for downstream analysis. We also implemented an online tool for analysing occurrence of microsatellites with user-defined parameters. Furthermore, Primer3 was embedded to help users to design high-quality primers with customizable settings. To our knowledge, PSMD is the most extensive resource which is likely to be adopted by scientists engaged in biological, medical, environmental and agricultural research.",PSMD,0.98416996,Pan-Species Microsatellite Database,0.979983436,PSMD,0.98416996,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2019 +26759061,http://www.bio-bigdata.com/Psmir,"Psmir: a database of potential associations between small molecules and miRNAs. miRNAs are key post-transcriptional regulators of many essential biological processes, and their dysregulation has been validated in almost all human cancers. Restoring aberrantly expressed miRNAs might be a novel therapeutics. Recently, many studies have demonstrated that small molecular compounds can affect miRNA expression. Thus, prediction of associations between small molecules and miRNAs is important for investigation of miRNA-targeted drugs. Here, we analyzed 39 miRNA-perturbed gene expression profiles, and then calculated the similarity of transcription responses between miRNA perturbation and drug treatment to predict drug-miRNA associations. At the significance level of 0.05, we obtained 6501 candidate associations between 1295 small molecules and 25 miRNAs, which included 624 FDA approved drugs. Finally, we constructed the Psmir database to store all potential associations and the related materials. In a word, Psmir served as a valuable resource for dissecting the biological significance in small molecules' effects on miRNA expression, which will facilitate developing novel potential therapeutic targets or treatments for human cancers. Psmir is supported by all major browsers, and is freely available at http://www.bio-bigdata.com/Psmir/.",Psmir,0.9962219,NA,0,Psmir,0.9962219,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/13/2016 +"26602691, 33313828",http://db.psort.org,"PSORTdb: expanding the bacteria and archaea protein subcellular localization database to better reflect diversity in cell envelope structures. Protein subcellular localization (SCL) is important for understanding protein function, genome annotation, and has practical applications such as identification of potential vaccine components or diagnostic/drug targets. PSORTdb (http://db.psort.org) comprises manually curated SCLs for proteins which have been experimentally verified (ePSORTdb), as well as pre-computed SCL predictions for deduced proteomes from bacterial and archaeal complete genomes available from NCBI (cPSORTdb). We now report PSORTdb 3.0. It features improvements increasing user-friendliness, and further expands both ePSORTdb and cPSORTdb with a focus on improving protein SCL data in cases where it is most difficult-proteins associated with non-classical Gram-positive/Gram-negative/Gram-variable cell envelopes. ePSORTdb data curation was expanded, including adding in additional cell envelope localizations, and incorporating markers for cPSORTdb to automatically computationally identify if new genomes to be analysed fall into certain atypical cell envelope categories (i.e. Deinococcus-Thermus, Thermotogae, Corynebacteriales/Corynebacterineae, including Mycobacteria). The number of predicted proteins in cPSORTdb has increased from 3,700,000 when PSORTdb 2.0 was released to over 13,000,000 currently. PSORTdb 3.0 will be of wider use to researchers studying a greater diversity of monoderm or diderm microbes, including medically, agriculturally and industrially important species that have non-classical outer membranes or other cell envelope features.",PSORTdb,0.997708738,NA,0,PSORTdb,0.997708738,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +25514926,http://www.phosphosite.org,"PhosphoSitePlus, 2014: mutations, PTMs and recalibrations. PhosphoSitePlus(®) (PSP, http://www.phosphosite.org/), a knowledgebase dedicated to mammalian post-translational modifications (PTMs), contains over 330,000 non-redundant PTMs, including phospho, acetyl, ubiquityl and methyl groups. Over 95% of the sites are from mass spectrometry (MS) experiments. In order to improve data reliability, early MS data have been reanalyzed, applying a common standard of analysis across over 1,000,000 spectra. Site assignments with P > 0.05 were filtered out. Two new downloads are available from PSP. The 'Regulatory sites' dataset includes curated information about modification sites that regulate downstream cellular processes, molecular functions and protein-protein interactions. The 'PTMVar' dataset, an intersect of missense mutations and PTMs from PSP, identifies over 25,000 PTMVars (PTMs Impacted by Variants) that can rewire signaling pathways. The PTMVar data include missense mutations from UniPROTKB, TCGA and other sources that cause over 2000 diseases or syndromes (MIM) and polymorphisms, or are associated with hundreds of cancers. PTMVars include 18 548 phosphorlyation sites, 3412 ubiquitylation sites, 2316 acetylation sites, 685 methylation sites and 245 succinylation sites.",PSP,0.959233403,NA,0,PSP,0.959233403,1,22135298,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,12/16/2014 +30587128,http://syslab5.nchu.edu.tw/PSRN,"Plant stress RNA-seq Nexus: a stress-specific transcriptome database in plant cells. Background Abiotic and biotic stresses severely affect the growth and reproduction of plants and crops. Determining the critical molecular mechanisms and cellular processes in response to stresses will provide biological insight for addressing both climate change and food crises. RNA sequencing (RNA-Seq) is a revolutionary tool that has been used extensively in plant stress research. However, no existing large-scale RNA-Seq database has been designed to provide information on the stress-specific differentially expressed transcripts that occur across diverse plant species and various stresses. Results We have constructed a comprehensive database, the plant stress RNA-Seq nexus (PSRN), which includes 12 plant species, 26 plant-stress RNA-Seq datasets, and 937 samples. All samples are assigned to 133 stress-specific subsets, which are constructed into 254 subset pairs, a comparison between selected two subsets, for stress-specific differentially expressed transcript identification. Conclusions PSRN is an open resource for intuitive data exploration, providing expression profiles of coding-transcript/lncRNA and identifying which transcripts are differentially expressed between different stress-specific subsets, in order to support researchers generating new biological insights and hypotheses in molecular breeding or evolution. PSRN is freely available at http://syslab5.nchu.edu.tw/PSRN .",PSRN,0.994711161,plant stress RNA-Seq nexus,0.983671112,PSRN,0.994711161,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/27/2018 +34059664,http://www.pssrd.info,"Comprehensive analysis of SSRs and database construction using all complete gene-coding sequences in major horticultural and representative plants. Simple sequence repeats (SSRs) are one of the most important genetic markers and widely exist in most species. Here, we identified 249,822 SSRs from 3,951,919 genes in 112 plants. Then, we conducted a comprehensive analysis of these SSRs and constructed a plant SSR database (PSSRD). Interestingly, more SSRs were found in lower plants than in higher plants, showing that lower plants needed to adapt to early extreme environments. Four specific enriched functional terms in the lower plant Chlamydomonas reinhardtii were detected when it was compared with seven other higher plants. In addition, Guanylate_cyc existed in more genes of lower plants than of higher plants. In our PSSRD, we constructed an interactive plotting function in the chart interface, and users can easily view the detailed information of SSRs. All SSR information, including sequences, primers, and annotations, can be downloaded from our database. Moreover, we developed Web SSR Finder and Batch SSR Finder tools, which can be easily used for identifying SSRs. Our database was developed using PHP, HTML, JavaScript, and MySQL, which are freely available at http://www.pssrd.info/ . We conducted an analysis of the Myb gene families and flowering genes as two applications of the PSSRD. Further analysis indicated that whole-genome duplication and whole-genome triplication played a major role in the expansion of the Myb gene families. These SSR markers in our database will greatly facilitate comparative genomics and functional genomics studies in the future.",PSSRD,0.96689924,SSR database,0.615605434,PSSRD,0.96689924,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/1/2021 +25964630,"http://www.psygenet.org/, http://opendatacommons.org/licenses/odbl/1.0","PsyGeNET: a knowledge platform on psychiatric disorders and their genes. Unlabelled PsyGeNET (Psychiatric disorders and Genes association NETwork) is a knowledge platform for the exploratory analysis of psychiatric diseases and their associated genes. PsyGeNET is composed of a database and a web interface supporting data search, visualization, filtering and sharing. PsyGeNET integrates information from DisGeNET and data extracted from the literature by text mining, which has been curated by domain experts. It currently contains 2642 associations between 1271 genes and 37 psychiatric disease concepts. In its first release, PsyGeNET is focused on three psychiatric disorders: major depression, alcohol and cocaine use disorders. PsyGeNET represents a comprehensive, open access resource for the analysis of the molecular mechanisms underpinning psychiatric disorders and their comorbidities. Availability and implementation The PysGeNET platform is freely available at http://www.psygenet.org/. The PsyGeNET database is made available under the Open Database License (http://opendatacommons.org/licenses/odbl/1.0/). Contact lfurlong@imim.es Supplementary information Supplementary data are available at Bioinformatics online.",PsyGeNET,0.997668445,Psychiatric disorders and Genes association NETwork,0.906647378,PsyGeNET,0.997668445,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/11/2015 +31809863,http://psymukb.net,"PsyMuKB: An Integrative De Novo Variant Knowledge Base for Developmental Disorders. De novo variants (DNVs) are one of the most significant contributors to severe early-onset genetic disorders such as autism spectrum disorder, intellectual disability, and other developmental and neuropsychiatric (DNP) disorders. Presently, a plethora of DNVs have been identified using next-generation sequencing, and many efforts have been made to understand their impact at the gene level. However, there has been little exploration of the effects at the isoform level. The brain contains a high level of alternative splicing and regulation, and exhibits a more divergent splicing program than other tissues. Therefore, it is crucial to explore variants at the transcriptional regulation level to better interpret the mechanisms underlying DNP disorders. To facilitate a better usage and improve the isoform-level interpretation of variants, we developed NeuroPsychiatric Mutation Knowledge Base (PsyMuKB). It contains a comprehensive, carefully curated list of DNVs with transcriptional and translational annotations to enable identification of isoform-specific mutations. PsyMuKB allows a flexible search of genes or variants and provides both table-based descriptions and associated visualizations, such as expression, transcript genomic structures, protein interactions, and the mutation sites mapped on the protein structures. It also provides an easy-to-use web interface, allowing users to rapidly visualize the locations and characteristics of mutations and the expression patterns of the impacted genes and isoforms. PsyMuKB thus constitutes a valuable resource for identifying tissue-specific DNVs for further functional studies of related disorders. PsyMuKB is freely accessible at http://psymukb.net.",PsyMuKB,0.996618569,NeuroPsychiatric Mutation Knowledge Base,0.886189427,PsyMuKB,0.996618569,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2019 +23162083,http://lilab.ecust.edu.cn/ptid,"PTID: an integrated web resource and computational tool for agrochemical discovery. Summary Although in silico drug discovery approaches are crucial for the development of pharmaceuticals, their potential advantages in agrochemical industry have not been realized. The challenge for computer-aided methods in agrochemical arena is a lack of sufficient information for both pesticides and their targets. Therefore, it is important to establish such knowledge repertoire that contains comprehensive pesticides' profiles, which include physicochemical properties, environmental fates, toxicities and mode of actions. Here, we present an integrated platform called Pesticide-Target interaction database (PTID), which comprises a total of 1347 pesticides with rich annotation of ecotoxicological and toxicological data as well as 13 738 interactions of pesticide-target and 4245 protein terms via text mining. Additionally, through the integration of ChemMapper, an in-house computational approach to polypharmacology, PTID can be used as a computational platform to identify pesticides targets and design novel agrochemical products. Availability http://lilab.ecust.edu.cn/ptid/. Contact hlli@ecust.edu.cn; xhqian@ecust.edu.cn Supplementary information Supplementary data are available at Bioinformatics online.",PTID,0.993905127,Pesticide-Target interaction database,0.957429435,PTID,0.993905127,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2012 +24857970,http://www.dsimb.inserm.fr/dsimb_tools/PTM-SD,"PTM-SD: a database of structurally resolved and annotated posttranslational modifications in proteins. . Posttranslational modifications (PTMs) define covalent and chemical modifications of protein residues. They play important roles in modulating various biological functions. Current PTM databases contain important sequence annotations but do not provide informative 3D structural resource about these modifications. Posttranslational modification structural database (PTM-SD) provides access to structurally solved modified residues, which are experimentally annotated as PTMs. It combines different PTM information and annotation gathered from other databases, e.g. Protein DataBank for the protein structures and dbPTM and PTMCuration for fine sequence annotation. PTM-SD gives an accurate detection of PTMs in structural data. PTM-SD can be browsed by PDB id, UniProt accession number, organism and classic PTM annotation. Advanced queries can also be performed, i.e. detailed PTM annotations, amino acid type, secondary structure, SCOP class classification, PDB chain length and number of PTMs by chain. Statistics and analyses can be computed on a selected dataset of PTMs. Each PTM entry is detailed in a dedicated page with information on the protein sequence, local conformation with secondary structure and Protein Blocks. PTM-SD gives valuable information on observed PTMs in protein 3D structure, which is of great interest for studying sequence-structure- function relationships at the light of PTMs, and could provide insights for comparative modeling and PTM predictions protocols. Database URL: PTM-SD can be accessed at http://www.dsimb.inserm.fr/dsimb_tools/PTM-SD/.",PTM-SD,0.924359113,Posttranslational modification structural database,0.737245253,PTM-SD,0.924359113,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/24/2014 +26043787,http://gcode.kaist.ac.kr/ptmsnp,"Detection and analysis of disease-associated single nucleotide polymorphism influencing post-translational modification. Post-translational modification (PTM) plays a crucial role in biological functions and corresponding disease developments. Discovering disease-associated non-synonymous SNPs (nsSNPs) altering PTM sites can help to estimate the various PTM candidates involved in diseases, therefore, an integrated analysis between SNPs, PTMs and diseases is necessary. However, only a few types of PTMs affected by nsSNPs have been studied without considering disease-association until now. In this study, we developed a new database called PTM-SNP which contains a comprehensive collection of human nsSNPs that affect PTM sites, together with disease information. Total 179,325 PTM-SNPs were collected by aligning missense SNPs and stop-gain SNPs on PTM sites (position 0) or their flanking region (position -7 to 7). Disease-associated SNPs from GWAS catalogs were also matched with detected PTM-SNP to find disease associated PTM-SNPs. Our result shows PTM-SNPs are highly associated with diseases, compared with other nsSNP sites and functional classes including near gene, intron and so on. PTM-SNP can provide an insight about discovering important PTMs involved in the diseases easily through the web site. PTM-SNP is freely available at http://gcode.kaist.ac.kr/ptmsnp.",PTM-SNP,0.989071167,NA,0,PTM-SNP,0.989071167,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/29/2015 +"23193284, 25361965",http://ptmcode.embl.de,"PTMcode: a database of known and predicted functional associations between post-translational modifications in proteins. Post-translational modifications (PTMs) are involved in the regulation and structural stabilization of eukaryotic proteins. The combination of individual PTM states is a key to modulate cellular functions as became evident in a few well-studied proteins. This combinatorial setting, dubbed the PTM code, has been proposed to be extended to whole proteomes in eukaryotes. Although we are still far from deciphering such a complex language, thousands of protein PTM sites are being mapped by high-throughput technologies, thus providing sufficient data for comparative analysis. PTMcode (http://ptmcode.embl.de) aims to compile known and predicted PTM associations to provide a framework that would enable hypothesis-driven experimental or computational analysis of various scales. In its first release, PTMcode provides PTM functional associations of 13 different PTM types within proteins in 8 eukaryotes. They are based on five evidence channels: a literature survey, residue co-evolution, structural proximity, PTMs at the same residue and location within PTM highly enriched protein regions (hotspots). PTMcode is presented as a protein-based searchable database with an interactive web interface providing the context of the co-regulation of nearly 75 000 residues in >10 000 proteins.",PTMcode,0.961656034,NA,0,PTMcode,0.961656034,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/31/2014 +30244175,http://ptmd.biocuckoo.org,"PTMD: A Database of Human Disease-associated Post-translational Modifications. Various posttranslational modifications (PTMs) participate in nearly all aspects of biological processes by regulating protein functions, and aberrant states of PTMs are frequently implicated in human diseases. Therefore, an integral resource of PTM-disease associations (PDAs) would be a great help for both academic research and clinical use. In this work, we reported PTMD, a well-curated database containing PTMs that are associated with human diseases. We manually collected 1950 known PDAs in 749 proteins for 23 types of PTMs and 275 types of diseases from the literature. Database analyses show that phosphorylation has the largest number of disease associations, whereas neurologic diseases have the largest number of PTM associations. We classified all known PDAs into six classes according to the PTM status in diseases and demonstrated that the upregulation and presence of PTM events account for a predominant proportion of disease-associated PTM events. By reconstructing a disease-gene network, we observed that breast cancers have the largest number of associated PTMs and AKT1 has the largest number of PTMs connected to diseases. Finally, the PTMD database was developed with detailed annotations and can be a useful resource for further analyzing the relations between PTMs and human diseases. PTMD is freely accessible at http://ptmd.biocuckoo.org.",PTMD,0.985867262,NA,0,PTMD,0.985867262,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2018 +23911837,http://www.PTP-central.org,"PTP-central: a comprehensive resource of protein tyrosine phosphatases in eukaryotic genomes. Reversible tyrosine phosphorylation is a fundamental signaling mechanism controlling a diversity of cellular processes. Whereas protein tyrosine kinases have long been implicated in many diseases, aberrant protein tyrosine phosphatase (PTP) activity is also increasingly being associated with a wide spectrum of conditions. PTPs are now regarded as key regulators of biochemical processes instead of simple ""off"" switches operating in tyrosine kinase signaling pathways. Despite the central importance that PTPs play in the cell's biochemistry, the tyrosine phosphatomes of most species remain uncharted. Here we present a highly sensitive and specific sequence-based method for the automatic classification of PTPs. As proof of principle we re-annotated the human tyrosine phosphatome, and discovered four new PTP genes that had not been reported before. Our method and the predicted tyrosine phosphatomes of 65 eukaryotic genomes are accessible online through the user-friendly PTP-central resource (http://www.PTP-central.org/), where users can also submit their own sequences for prediction. PTP-central is a comprehensive and continually developing resource that currently integrates the predicted tyrosine phosphatomes with structural data and genetic association disease studies, as well as homology relationships. PTP-central thus fills an important void for the systematic study of PTPs, both in model organisms and from an evolutionary perspective.",PTP-central,0.99302128,NA,0,PTP-central,0.99302128,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/31/2013 +29939244,http://www.nipgr.res.in/PtRFdb,"PtRFdb: a database for plant transfer RNA-derived fragments. . Transfer RNA-derived fragments (tRFs) represent a novel class of small RNAs (sRNAs) generated through endonucleolytic cleavage of both mature and precursor transfer RNAs (tRNAs). These 14-28 nt length tRFs that have been extensively studied in animal kingdom are to be explored in plants. In this study, we introduce a database of plant tRFs named PtRFdb (www.nipgr.res.in/PtRFdb), for the scientific community. We analyzed a total of 1344 sRNA sequencing datasets of 10 different plant species and identified a total of 5607 unique tRFs (758 tRF-1, 2269 tRF-3 and 2580 tRF-5), represented by 487 765 entries. In PtRFdb, detailed and comprehensive information is available for each tRF entry. Apart from the core information consisting of the tRF type, anticodon, source organism, tissue, sequence and the genomic location; additional information like PubMed identifier (PMID), Sample accession number (GSM), sequence length and frequency relevant to the tRFs may be of high utility to the user. Two different types of search modules (Basic Search and Advanced Search), sequence similarity search (by BLAST) and Browse option with data download facility for each search is provided in this database. We believe that PtRFdb is a unique database of its kind and it will be beneficial in the validation and further characterization of plant tRFs.Database URL: http://www.nipgr.res.in/PtRFdb/.",PtRFdb,0.996222079,NA,0,PtRFdb,0.996222079,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +25392416,http://www.megabionet.org/aspd,"PubAngioGen: a database and knowledge for angiogenesis and related diseases. Angiogenesis is the process of generating new blood vessels based on existing ones, which is involved in many diseases including cancers, cardiovascular diseases and diabetes mellitus. Recently, great efforts have been made to explore the mechanisms of angiogenesis in various diseases and many angiogenic factors have been discovered as therapeutic targets in anti- or pro-angiogenic drug development. However, the resulted information is sparsely distributed and no systematical summarization has been made. In order to integrate these related results and facilitate the researches for the community, we conducted manual text-mining from published literature and built a database named as PubAngioGen (http://www.megabionet.org/aspd/). Our online application displays a comprehensive network for exploring the connection between angiogenesis and diseases at multilevels including protein-protein interaction, drug-target, disease-gene and signaling pathways among various cells and animal models recorded through text-mining. To enlarge the scope of the PubAngioGen application, our database also links to other common resources including STRING, DrugBank and OMIM databases, which will facilitate understanding the underlying molecular mechanisms of angiogenesis and drug development in clinical therapy.",PubAngioGen,0.991985381,NA,0,PubAngioGen,0.991985381,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +"22140110, 24198245, 26400175, 27899599, 30371825, 33151290",http://pubchem.ncbi.nlm.nih.gov,"PubChem's BioAssay Database. PubChem (http://pubchem.ncbi.nlm.nih.gov) is a public repository for biological activity data of small molecules and RNAi reagents. The mission of PubChem is to deliver free and easy access to all deposited data, and to provide intuitive data analysis tools. The PubChem BioAssay database currently contains 500,000 descriptions of assay protocols, covering 5000 protein targets, 30,000 gene targets and providing over 130 million bioactivity outcomes. PubChem's bioassay data are integrated into the NCBI Entrez information retrieval system, thus making PubChem data searchable and accessible by Entrez queries. Also, as a repository, PubChem constantly optimizes and develops its deposition system answering many demands of both high- and low-volume depositors. The PubChem information platform allows users to search, review and download bioassay description and data. The PubChem platform also enables researchers to collect, compare and analyze biological test results through web-based and programmatic tools. In this work, we provide an update for the PubChem BioAssay resource, including information content growth, data model extension and new developments of data submission, retrieval, analysis and download tools.",PubChem,0.99054718,NA,0,PubChem,0.99054718,6,29718389,28346087,NA,NA,do not merge,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +28346087,http://www.ncbi.nlm.nih.gov/pcassay,"PubChem BioAssay: A Decade's Development toward Open High-Throughput Screening Data Sharing. High-throughput screening (HTS) is now routinely conducted for drug discovery by both pharmaceutical companies and screening centers at academic institutions and universities. Rapid advance in assay development, robot automation, and computer technology has led to the generation of terabytes of data in screening laboratories. Despite the technology development toward HTS productivity, fewer efforts were devoted to HTS data integration and sharing. As a result, the huge amount of HTS data was rarely made available to the public. To fill this gap, the PubChem BioAssay database ( https://www.ncbi.nlm.nih.gov/pcassay/ ) was set up in 2004 to provide open access to the screening results tested on chemicals and RNAi reagents. With more than 10 years' development and contributions from the community, PubChem has now become the largest public repository for chemical structures and biological data, which provides an information platform to worldwide researchers supporting drug development, medicinal chemistry study, and chemical biology research. This work presents a review of the HTS data content in the PubChem BioAssay database and the progress of data deposition to stimulate knowledge discovery and data sharing. It also provides a description of the database's data standard and basic utilities facilitating information access and use for new users.",PubChem,0.990066409,NA,0,PubChem,0.990066409,1,NA,"22140110.0, 24198245.0, 26400175.0, 27899599.0, 30371825.0, 33151290.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/13/2017 +28481528,http://pubchemqc.riken.jp,"PubChemQC Project: A Large-Scale First-Principles Electronic Structure Database for Data-Driven Chemistry. Large-scale molecular databases play an essential role in the investigation of various subjects such as the development of organic materials, in silico drug design, and data-driven studies with machine learning. We have developed a large-scale quantum chemistry database based on first-principles methods. Our database currently contains the ground-state electronic structures of 3 million molecules based on density functional theory (DFT) at the B3LYP/6-31G* level, and we successively calculated 10 low-lying excited states of over 2 million molecules via time-dependent DFT with the B3LYP functional and the 6-31+G* basis set. To select the molecules calculated in our project, we referred to the PubChem Project, which was used as the source of the molecular structures in short strings using the InChI and SMILES representations. Accordingly, we have named our quantum chemistry database project ""PubChemQC"" ( http://pubchemqc.riken.jp/ ) and placed it in the public domain. In this paper, we show the fundamental features of the PubChemQC database and discuss the techniques used to construct the data set for large-scale quantum chemistry calculations. We also present a machine learning approach to predict the electronic structure of molecules as an example to demonstrate the suitability of the large-scale quantum chemistry database.",PubChemQC,0.988919353,NA,0,PubChemQC,0.988919353,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/19/2017 +21980353,http://www.pubmed2ensembl.org,"pubmed2ensembl: a resource for mining the biological literature on genes. Background The last two decades have witnessed a dramatic acceleration in the production of genomic sequence information and publication of biomedical articles. Despite the fact that genome sequence data and publications are two of the most heavily relied-upon sources of information for many biologists, very little effort has been made to systematically integrate data from genomic sequences directly with the biological literature. For a limited number of model organisms dedicated teams manually curate publications about genes; however for species with no such dedicated staff many thousands of articles are never mapped to genes or genomic regions. Methodology/principal findings To overcome the lack of integration between genomic data and biological literature, we have developed pubmed2ensembl (http://www.pubmed2ensembl.org), an extension to the BioMart system that links over 2,000,000 articles in PubMed to nearly 150,000 genes in Ensembl from 50 species. We use several sources of curated (e.g., Entrez Gene) and automatically generated (e.g., gene names extracted through text-mining on MEDLINE records) sources of gene-publication links, allowing users to filter and combine different data sources to suit their individual needs for information extraction and biological discovery. In addition to extending the Ensembl BioMart database to include published information on genes, we also implemented a scripting language for automated BioMart construction and a novel BioMart interface that allows text-based queries to be performed against PubMed and PubMed Central documents in conjunction with constraints on genomic features. Finally, we illustrate the potential of pubmed2ensembl through typical use cases that involve integrated queries across the biomedical literature and genomic data. Conclusion/significance By allowing biologists to find the relevant literature on specific genomic regions or sets of functionally related genes more easily, pubmed2ensembl offers a much-needed genome informatics inspired solution to accessing the ever-increasing biomedical literature.",pubmed2ensembl,0.975213031,NA,0,pubmed2ensembl,0.975213031,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/29/2011 +29718389,http://pubchem.ncbi.nlm.nih.gov,"An update on PUG-REST: RESTful interface for programmatic access to PubChem. PubChem (https://pubchem.ncbi.nlm.nih.gov) is one of the largest open chemical information resources available. It currently receives millions of unique users per month on average, serving as a key resource for many research fields such as cheminformatics, chemical biology, medicinal chemistry, and drug discovery. PubChem provides multiple programmatic access routes to its data and services. One of them is PUG-REST, a Representational State Transfer (REST)-like web service interface to PubChem. On average, PUG-REST receives more than a million requests per day from tens of thousands of unique users. The present paper provides an update on PUG-REST since our previous paper published in 2015. This includes access to new kinds of data (e.g. concise bioactivity data, table of contents headings, etc.), full implementation of synchronous fast structure search, support for assay data retrieval using accession identifiers in response to the deprecation of NCBI's GI numbers, data exchange between PUG-REST and NCBI's E-Utilities through the List Gateway, implementation of dynamic traffic control through throttling, and enhanced usage policies. In addition, example Perl scripts are provided, which the user can easily modify, run, or translate into another scripting language.",PUG-REST,0.995439017,NA,0,PUG-REST,0.995439017,1,"22140110.0, 24198245.0, 26400175.0, 27899599.0, 30371825.0, 33151290.0",NA,NA,NA,do not merge,NA,NA,NA,NA,7/1/2018 +29316735,"http://academic.oup.com/nar, http://www.oxfordjournals.org/nar/database/c","The 2018 Nucleic Acids Research database issue and the online molecular biology database collection. The 2018 Nucleic Acids Research Database Issue contains 181 papers spanning molecular biology. Among them, 82 are new and 84 are updates describing resources that appeared in the Issue previously. The remaining 15 cover databases most recently published elsewhere. Databases in the area of nucleic acids include 3DIV for visualisation of data on genome 3D structure and RNArchitecture, a hierarchical classification of RNA families. Protein databases include the established SMART, ELM and MEROPS while GPCRdb and the newcomer STCRDab cover families of biomedical interest. In the area of metabolism, HMDB and Reactome both report new features while PULDB appears in NAR for the first time. This issue also contains reports on genomics resources including Ensembl, the UCSC Genome Browser and ENCODE. Update papers from the IUPHAR/BPS Guide to Pharmacology and DrugBank are highlights of the drug and drug target section while a number of proteomics databases including proteomicsDB are also covered. The entire Database Issue is freely available online on the Nucleic Acids Research website (https://academic.oup.com/nar). The NAR online Molecular Biology Database Collection has been updated, reviewing 138 entries, adding 88 new resources and eliminating 47 discontinued URLs, bringing the current total to 1737 databases. It is available at http://www.oxfordjournals.org/nar/database/c/.",PULDB,0.894896626,NA,0,PULDB,0.894896626,1,"30626175.0, 31906604.0, 30626175.0, 31906604.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: CLASS,NA,NA,1/1/2018 +31949184,http://pulmondb.liigh.unam.mx,"PulmonDB: a curated lung disease gene expression database. Chronic Obstructive Pulmonary Disease (COPD) and Idiopathic Pulmonary Fibrosis (IPF) have contrasting clinical and pathological characteristics and interesting whole-genome transcriptomic profiles. However, data from public repositories are difficult to reprocess and reanalyze. Here, we present PulmonDB, a web-based database (http://pulmondb.liigh.unam.mx/) and R library that facilitates exploration of gene expression profiles for these diseases by integrating transcriptomic data and curated annotation from different sources. We demonstrated the value of this resource by presenting the expression of already well-known genes of COPD and IPF across multiple experiments and the results of two differential expression analyses in which we successfully identified differences and similarities. With this first version of PulmonDB, we create a new hypothesis and compare the two diseases from a transcriptomics perspective.",PulmonDB,0.997862995,NA,0,PulmonDB,0.997862995,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/16/2020 +22424087,http://cwtung.kmu.edu.tw/pupdb,"PupDB: a database of pupylated proteins. Background Prokaryotic ubiquitin-like protein (Pup), the firstly identified post-translational protein modifier in prokaryotes, is an important signal for the selective degradation of proteins. Recently, large-scale proteomics technology has been applied to identify a large number of pupylated proteins. The development of a database for managing pupylated proteins and pupylation sites is important for further analyses. Description A database named PupDB is constructed by collecting experimentally identified pupylated proteins and pupylation sites from published studies and integrating the information of pupylated proteins with corresponding structures and functional annotations. PupDB is a web-based database with tools for browses and searches of pupylated proteins and interactive displays of protein structures and pupylation sites. Conclusions The structured and searchable database PupDB is expected to provide a useful resource for further analyzing the substrate specificity, identifying pupylated proteins in other organisms and developing computational tools for predicting pupylation sites. PupDB is freely available at http://cwtung.kmu.edu.tw/pupdb.",PupDB,0.998188615,NA,0,PupDB,0.998188615,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/16/2012 +27789703,http://dmk-brain.ecn.uiowa.edu/pVOGs,"Prokaryotic Virus Orthologous Groups (pVOGs): a resource for comparative genomics and protein family annotation. Viruses are the most abundant and diverse biological entities on earth, and while most of this diversity remains completely unexplored, advances in genome sequencing have provided unprecedented glimpses into the virosphere. The Prokaryotic Virus Orthologous Groups (pVOGs, formerly called Phage Orthologous Groups, POGs) resource has aided in this task over the past decade by using automated methods to keep pace with the rapid increase in genomic data. The uses of pVOGs include functional annotation of viral proteins, identification of genes and viruses in uncharacterized DNA samples, phylogenetic analysis, large-scale comparative genomics projects, and more. The pVOGs database represents a comprehensive set of orthologous gene families shared across multiple complete genomes of viruses that infect bacterial or archaeal hosts (viruses of eukaryotes will be added at a future date). The pVOGs are constructed within the Clusters of Orthologous Groups (COGs) framework that is widely used for orthology identification in prokaryotes. Since the previous release of the POGs, the size has tripled to nearly 3000 genomes and 300 000 proteins, and the number of conserved orthologous groups doubled to 9518. User-friendly webpages are available, including multiple sequence alignments and HMM profiles for each VOG. These changes provide major improvements to the pVOGs database, at a time of rapid advances in virus genomics. The pVOGs database is hosted jointly at the University of Iowa at http://dmk-brain.ecn.uiowa.edu/pVOGs and the NCBI at ftp://ftp.ncbi.nlm.nih.gov/pub/kristensen/pVOGs/home.html.",pVOGs,0.934616673,Prokaryotic Virus Orthologous Groups,0.87238429,pVOGs,0.934616673,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/26/2016 +32542363,http://www.scfbio-iitd.res.in/PvP01,"PvP01-DB: computational structural and functional characterization of soluble proteome of PvP01 strain of Plasmodium vivax. . Despite Plasmodium vivax being the main offender in the majority of malarial infections, very little information is available about its adaptation and development in humans. Its capability for activating relapsing infections through its dormant liver stage and resistance to antimalarial drugs makes it as one of the major challenges in eradicating malaria. Noting the immediate necessity for the availability of a comprehensive and reliable structural and functional repository for P. vivax proteome, here we developed a web resource for the new reference genome, PvP01, furnishing information on sequence, structure, functions, active sites and metabolic pathways compiled and predicted using some of the state-of-the-art methods in respective fields. The PvP01 web resource comprises organized data on the soluble proteome consisting of 3664 proteins in blood and liver stages of malarial cycle. The current public resources represent only 163 proteins of soluble proteome of PvP01, with complete information about their molecular function, biological process and cellular components. Also, only 46 proteins of P. vivax have experimentally determined structures. In this milieu of extreme scarcity of structural and functional information, PvP01 web resource offers meticulously validated structures of 3664 soluble proteins. The sequence and structure-based functional characterization led to a quantum leap from 163 proteins available presently to whole soluble proteome offered through PvP01 web resource. We believe PvP01 web resource will serve the researchers in identifying novel protein drug targets and in accelerating the development of structure-based new drug candidates to combat malaria. Database Availability: http://www.scfbio-iitd.res.in/PvP01.",PvP01,0.951967716,NA,0,PvP01,0.951967716,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +30307523,http://www.nipgr.res.in/PVsiRNAdb,"PVsiRNAdb: a database for plant exclusive virus-derived small interfering RNAs. . Ribonucleic acids (RNA) interference mechanism has been proved to be an important regulator of both transcriptional and post-transcription controls of gene expression during biotic and abiotic stresses in plants. Virus-derived small interfering RNAs (vsiRNAs) are established components of the RNA silencing mechanism for incurring anti-viral resistance in plants. Some databases like siRNAdb, HIVsirDB and VIRsiRNAdb are available online pertaining to siRNAs as well as vsiRNAs generated during viral infection in humans; however, currently there is a lack of repository for plant exclusive vsiRNAs. We have developed `PVsiRNAdb (http://www.nipgr.res.in/PVsiRNAdb)', a manually curated plant-exclusive database harboring information related to vsiRNAs found in different virus-infected plants collected by exhaustive data mining of published literature so far. This database contains a total of 322 214 entries and 282 549 unique sequences of vsiRNAs. In PVsiRNAdb, detailed and comprehensive information is available for each vsiRNA sequence. Apart from the core information consisting of plant, tissue, virus name and vsiRNA sequence, additional information of each vsiRNAs (map position, length, coordinates, strand information and predicted structure) may be of high utility to the user. Different types of search and browse modules with three different tools namely BLAST, Smith-Waterman Align and Mapping are provided at PVsiRNAdb. Thus, this database being one of its kind will surely be of much use to molecular biologists for exploring the complex viral genetics and genomics, viral-host interactions and beneficial to the scientific community and can prove to be very advantageous in the field of agriculture for producing viral resistance transgenic crops.",PVsiRNAdb,0.97544,NA,0,PVsiRNAdb,0.97544,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +27465131,http://www.multiomics.in/PvTFDB,"PvTFDB: a Phaseolus vulgaris transcription factors database for expediting functional genomics in legumes. . The common bean [Phaseolus vulgaris (L.)] is one of the essential proteinaceous vegetables grown in developing countries. However, its production is challenged by low yields caused by numerous biotic and abiotic stress conditions. Regulatory transcription factors (TFs) symbolize a key component of the genome and are the most significant targets for producing stress tolerant crop and hence functional genomic studies of these TFs are important. Therefore, here we have constructed a web-accessible TFs database for P. vulgaris, called PvTFDB, which contains 2370 putative TF gene models in 49 TF families. This database provides a comprehensive information for each of the identified TF that includes sequence data, functional annotation, SSRs with their primer sets, protein physical properties, chromosomal location, phylogeny, tissue-specific gene expression data, orthologues, cis-regulatory elements and gene ontology (GO) assignment. Altogether, this information would be used in expediting the functional genomic studies of a specific TF(s) of interest. The objectives of this database are to understand functional genomics study of common bean TFs and recognize the regulatory mechanisms underlying various stress responses to ease breeding strategy for variety production through a couple of search interfaces including gene ID, functional annotation and browsing interfaces including by family and by chromosome. This database will also serve as a promising central repository for researchers as well as breeders who are working towards crop improvement of legume crops. In addition, this database provide the user unrestricted public access and the user can download entire data present in the database freely.Database URL: http://www.multiomics.in/PvTFDB/.",PvTFDB,0.984973252,NA,0,PvTFDB,0.984973252,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/27/2016 +33002111,http://pydish.bio.info.hiroshima-cu.ac.jp,"PyDISH: database and analysis tools for heme porphyrin distortion in heme proteins. . Heme participates in a wide range of biological functions such as oxygen transport, electron transport, oxygen reduction, transcriptional regulation and so on. While the mechanism of each function has been investigated for many heme proteins, the origin of the diversity of the heme functions is still unclear and a crucial scientific issue. We have constructed a database of heme proteins, named Python-based database and analyzer for DIStortion of Heme porphyrin (PyDISH), which also contains some analysis tools. The aim of PyDISH is to integrate the information on the structures of hemes and heme proteins and the functions of heme proteins. This database will provide the structure-function relationships focusing on heme porphyrin distortion and lead to the elucidation of the origin of the functional diversity of heme proteins. In addition, the insights obtained from the database can be used for the design of protein function. PyDISH contains the structural data of more than 13 000 hemes extracted from the Protein Data Bank, including heme porphyrin distortion, axial ligands coordinating to the heme and the orientation of the propionate sidechains of heme. PyDISH also has information about the protein domains, including Uniprot ID, protein fold by CATH ID, organism, coordination distance and so on. The analytical tools implemented in PyDISH allow users to not only browse and download the data but also analyze the structures of heme porphyrin by using the analytical tools implemented in PyDISH. PyDISH users will be able to utilize the obtained results for the design of protein function. Database URL: http://pydish.bio.info.hiroshima-cu.ac.jp/.",PyDISH,0.98944056,DIStortion of Heme porphyrin,0.780717987,PyDISH,0.98944056,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2020 +25392411,http://dunbrack2.fccc.edu/pyigclassify,"PyIgClassify: a database of antibody CDR structural classifications. Classification of the structures of the complementarity determining regions (CDRs) of antibodies is critically important for antibody structure prediction and computational design. We have previously performed a clustering of antibody CDR conformations and defined a systematic nomenclature consisting of the CDR, length and an integer starting from the largest to the smallest cluster in the data set (e.g. L1-11-1). We present PyIgClassify (for Python-based immunoglobulin classification; available at http://dunbrack2.fccc.edu/pyigclassify/), a database and web server that provides access to assignments of all CDR structures in the PDB to our classification system. The database includes assignments to the IMGT germline V regions for heavy and light chains for several species. For humanized antibodies, the assignment of the frameworks is to human germlines and the CDRs to the germlines of mice or other species sources. The database can be searched by PDB entry, cluster identifier and IMGT germline group (e.g. human IGHV1). The entire database is downloadable so that users may filter the data as needed for antibody structure analysis, prediction and design.",PyIgClassify,0.995087743,NA,0,PyIgClassify,0.995087743,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +24715219,http://quail.anatomyportal.org,"The quail anatomy portal. The Japanese quail is a widely used model organism for the study of embryonic development; however, anatomical resources are lacking. The Quail Anatomy Portal (QAP) provides 22 detailed three-dimensional (3D) models of quail embryos during development from embryonic day (E)1 to E15 generated using optical projection tomography. The 3D models provided can be virtually sectioned to investigate anatomy. Furthermore, using the 3D nature of the models, we have generated a tool to assist in the staging of quail samples. Volume renderings of each stage are provided and can be rotated to allow visualization from multiple angles allowing easy comparison of features both between stages in the database and between images or samples in the laboratory. The use of JavaScript, PHP and HTML ensure the database is accessible to users across different operating systems, including mobile devices, facilitating its use in the laboratory.The QAP provides a unique resource for researchers using the quail model. The ability to virtually section anatomical models throughout development provides the opportunity for researchers to virtually dissect the quail and also provides a valuable tool for the education of students and researchers new to the field. DATABASE URL: http://quail.anatomyportal.org (For review username: demo, password: quail123).",QAP,0.926871697,Quail Anatomy Portal,0.80078907,QAP,0.926871697,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/7/2014 +21965557,http://nabg.iasri.res.in:8080/qlic-rice,"QlicRice: a web interface for abiotic stress responsive QTL and loci interaction channels in rice. The QlicRice database is designed to host publicly accessible, abiotic stress responsive quantitative trait loci (QTLs) in rice (Oryza sativa) and their corresponding sequenced gene loci. It provides a platform for the data mining of abiotic stress responsive QTLs, as well as browsing and annotating associated traits, their location on a sequenced genome, mapped expressed sequence tags (ESTs) and tissue and growth stage-specific expressions on the whole genome. Information on QTLs related to abiotic stresses and their corresponding loci from a genomic perspective has not yet been integrated on an accessible, user-friendly platform. QlicRice offers client-responsive architecture to retrieve meaningful biological information--integrated and named 'Qlic Search'--embedded in a query phrase autocomplete feature, coupled with multiple search options that include trait names, genes and QTL IDs. A comprehensive physical and genetic map and vital statistics have been provided in a graphical manner for deciphering the position of QTLs on different chromosomes. A convenient and intuitive user interface have been designed to help users retrieve associations to agronomically important QTLs on abiotic stress response in rice. Database URL: http://nabg.iasri.res.in:8080/qlic-rice/.",QlicRice,0.997935176,NA,0,QlicRice,0.997935176,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/30/2011 +30380102,http://qphos.cancerbio.info,"qPhos: a database of protein phosphorylation dynamics in humans. Temporal and spatial protein phosphorylation dynamically orchestrates a broad spectrum of biological processes and plays various physiological and pathological roles in diseases and cancers. Recent advancements in high-throughput proteomics techniques greatly promoted the profiling and quantification of phosphoproteome. However, although several comprehensive databases have reserved the phosphorylated proteins and sites, a resource for phosphorylation quantification still remains to be constructed. In this study, we developed the qPhos (http://qphos.cancerbio.info) database to integrate and host the data on phosphorylation dynamics. A total of 3 537 533 quantification events for 199 071 non-redundant phosphorylation sites on 18 402 proteins under 484 conditions were collected through exhaustive curation of published literature. The experimental details, including sample materials, conditions and methods, were recorded. Various annotations, such as protein sequence and structure properties, potential upstream kinases and their inhibitors, were systematically integrated and carefully organized to present details about the quantified phosphorylation sites. Various browse and search functions were implemented for the user-defined filtering of samples, conditions and proteins. Furthermore, the qKinAct service was developed to dissect the kinase activity profile from user-submitted quantitative phosphoproteome data through annotating the kinase activity-related phosphorylation sites. Taken together, the qPhos database provides a comprehensive resource for protein phosphorylation dynamics to facilitate related investigations.",qPhos,0.993786156,NA,0,qPhos,0.993786156,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +31868683,http://rpq-qpn.ca/en,"The Quebec Parkinson Network: A Researcher-Patient Matching Platform and Multimodal Biorepository. Background Genetic, biologic and clinical data suggest that Parkinson's disease (PD) is an umbrella for multiple disorders with clinical and pathological overlap, yet with different underlying mechanisms. To better understand these and to move towards neuroprotective treatment, we have established the Quebec Parkinson Network (QPN), an open-access patient registry, and data and bio-samples repository. Objective To present the QPN and to perform preliminary analysis of the QPN data. Methods A total of 1,070 consecutively recruited PD patients were included in the analysis. Demographic and clinical data were analyzed, including comparisons between males and females, PD patients with and without RBD, and stratified analyses comparing early and late-onset PD and different age groups. Results QPN patients exhibit a male:female ratio of 1.8:1, an average age-at-onset of 58.6 years, an age-at-diagnosis of 60.4 years, and average disease duration of 8.9 years. REM-sleep behavior disorder (RBD) was more common among men, and RBD was associated with other motor and non-motor symptoms including dyskinesia, fluctuations, postural hypotension and hallucinations. Older patients had significantly higher rates of constipation and cognitive impairment, and longer disease duration was associated with higher rates of dyskinesia, fluctuations, freezing of gait, falls, hallucinations and cognitive impairment. Since QPN's creation, over 60 studies and 30 publications have included patients and data from the QPN. Conclusions The QPN cohort displays typical PD demographics and clinical features. These data are open-access upon application (http://rpq-qpn.ca/en/), and will soon include genetic, imaging and bio-samples. We encourage clinicians and researchers to perform studies using these resources.",QPN,0.877652884,Network,0.656310976,QPN,0.877652884,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +28977518,http://biodb.swu.edu.cn/qprimerdb,"qPrimerDB: a thermodynamics-based gene-specific qPCR primer database for 147 organisms. Real-time quantitative polymerase chain reaction (qPCR) is one of the most important methods for analyzing the expression patterns of target genes. However, successful qPCR experiments rely heavily on the use of high-quality primers. Various qPCR primer databases have been developed to address this issue, but these databases target only a few important organisms. Here, we developed the qPrimerDB database, founded on an automatic gene-specific qPCR primer design and thermodynamics-based validation workflow. The qPrimerDB database is the most comprehensive qPCR primer database available to date, with a web front-end providing gene-specific and pre-computed primer pairs across 147 important organisms, including human, mouse, zebrafish, yeast, thale cress, rice and maize. In this database, we provide 3331426 of the best primer pairs for each gene, based on primer pair coverage, as well as 47760359 alternative gene-specific primer pairs, which can be conveniently batch downloaded. The specificity and efficiency was validated for qPCR primer pairs for 66 randomly selected genes, in six different organisms, through qPCR assays and gel electrophoresis. The qPrimerDB database represents a valuable, timesaving resource for gene expression analysis. This resource, which will be routinely updated, is publically accessible at http://biodb.swu.edu.cn/qprimerdb.",qPrimerDB,0.992642581,NA,0,qPrimerDB,0.992642581,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +34559210,http://qsdb.org,"QSDB-a graphical Quorum Sensing Database. . The human microbiome is largely shaped by the chemical interactions of its microbial members, which includes cross-talk via shared signals or quenching of the signalling of other species. Quorum sensing is a process that allows microbes to coordinate their behaviour in dependence of their population density and to adjust gene expression accordingly. We present the Quorum Sensing Database (QSDB), a comprehensive database of all published sensing and quenching relations between organisms and signalling molecules of the human microbiome, as well as an interactive web interface that allows browsing the database, provides graphical depictions of sensing mechanisms as Systems Biology Graphical Notation diagrams and links to other databases. Database URL: QSDB (Quorum Sensing DataBase) is freely available via an interactive web interface and as a downloadable csv file at http://qsdb.org.",QSDB,0.994050586,Quorum Sensing Database,0.967420578,QSDB,0.994050586,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/24/2021 +33003203,http://qsidb.lbci.net,"QSIdb: quorum sensing interference molecules. . Quorum sensing interference (QSI), the disruption and manipulation of quorum sensing (QS) in the dynamic control of bacteria populations could be widely applied in synthetic biology to realize dynamic metabolic control and develop potential clinical therapies. Conventionally, limited QSI molecules (QSIMs) were developed based on molecular structures or for specific QS receptors, which are in short supply for various interferences and manipulations of QS systems. In this study, we developed QSIdb (http://qsidb.lbci.net/), a specialized repository of 633 reported QSIMs and 73 073 expanded QSIMs including both QS agonists and antagonists. We have collected all reported QSIMs in literatures focused on the modifications of N-acyl homoserine lactones, natural QSIMs and synthetic QS analogues. Moreover, we developed a pipeline with SMILES-based similarity assessment algorithms and docking-based validations to mine potential QSIMs from existing 138 805 608 compounds in the PubChem database. In addition, we proposed a new measure, pocketedit, for assessing the similarities of active protein pockets or QSIMs crosstalk, and obtained 273 possible potential broad-spectrum QSIMs. We provided user-friendly browsing and searching facilities for easy data retrieval and comparison. QSIdb could assist the scientific community in understanding QS-related therapeutics, manipulating QS-based genetic circuits in metabolic engineering, developing potential broad-spectrum QSIMs and expanding new ligands for other receptors.",QSIdb,0.99794662,quorum sensing interference molecules,0.608476996,QSIdb,0.99794662,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +31598699,http://mulinlab.org/qtlbase,"QTLbase: an integrative resource for quantitative trait loci across multiple human molecular phenotypes. Recent advances in genome sequencing and functional genomic profiling have promoted many large-scale quantitative trait locus (QTL) studies, which connect genotypes with tissue/cell type-specific cellular functions from transcriptional to post-translational level. However, no comprehensive resource can perform QTL lookup across multiple molecular phenotypes and investigate the potential cascade effect of functional variants. We developed a versatile resource, named QTLbase, for interpreting the possible molecular functions of genetic variants, as well as their tissue/cell-type specificity. Overall, QTLbase has five key functions: (i) curating and compiling genome-wide QTL summary statistics for 13 human molecular traits from 233 independent studies; (ii) mapping QTL-relevant tissue/cell types to 78 unified terms according to a standard anatomogram; (iii) normalizing variant and trait information uniformly, yielding >170 million significant QTLs; (iv) providing a rich web client that enables phenome- and tissue-wise visualization; and (v) integrating the most comprehensive genomic features and functional predictions to annotate the potential QTL mechanisms. QTLbase provides a one-stop shop for QTL retrieval and comparison across multiple tissues and multiple layers of molecular complexity, and will greatly help researchers interrogate the biological mechanism of causal variants and guide the direction of functional validation. QTLbase is freely available at http://mulinlab.org/qtlbase.",QTLbase,0.997860312,NA,0,QTLbase,0.997860312,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +"23180796, 26602686",http://www.animalgenome.org/QTLdb,"Animal QTLdb: an improved database tool for livestock animal QTL/association data dissemination in the post-genome era. The Animal QTL database (QTLdb; http://www.animalgenome.org/QTLdb) is designed to house all publicly available QTL and single-nucleotide polymorphism/gene association data on livestock animal species. An earlier version was published in the Nucleic Acids Research Database issue in 2007. Since then, we have continued our efforts to develop new and improved database tools to allow more data types, parameters and functions. Our efforts have transformed the Animal QTLdb into a tool that actively serves the research community as a quality data repository and more importantly, a provider of easily accessible tools and functions to disseminate QTL and gene association information. The QTLdb has been heavily used by the livestock genomics community since its first public release in 2004. To date, there are 5920 cattle, 3442 chicken, 7451 pigs, 753 sheep and 88 rainbow trout data points in the database, and at least 290 publications that cite use of the database. The rapid advancement in genomic studies of cattle, chicken, pigs, sheep and other livestock animals has presented us with challenges, as well as opportunities for the QTLdb to meet the evolving needs of the research community. Here, we report our progress over the recent years and highlight new functions and services available to the general public.",QTLdb,0.998483792,Animal QTL database,0.808126822,QTLdb,0.998483792,2,30407520,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/23/2015 +21656910,http://proteome.moffitt.org/QUAD,"A database of reaction monitoring mass spectrometry assays for elucidating therapeutic response in cancer. Purpose The Quantitative Assay Database (QuAD), http://proteome.moffitt.org/QUAD/, facilitates widespread implementation of quantitative mass spectrometry in cancer biology and clinical research through sharing of methods and reagents for monitoring protein expression and modification. Experimental design Liquid chromatography coupled to multiple reaction monitoring (LC-MRM) mass spectrometry assays are developed using SDS-PAGE fractionated lysates from cancer cell lines. Pathway maps created using GeneGO Metacore provide the biological relationships between proteins and illustrate concepts for multiplexed analysis; each protein can be selected to examine assay development at the protein and peptide levels. Results The coupling of SDS-PAGE and multiple reaction monitoring mass spectrometry screening has been used to detect 876 peptides from 218 cancer-related proteins in model systems including colon, lung, melanoma, leukemias, and myeloma, which has led to the development of 95 quantitative assays including stable-isotope-labeled peptide standards. Methods are published online and peptide standards are made available to the research community. Protein expression measurements for heat shock proteins, including a comparison with ELISA and monitoring response to the HSP90 inhibitor, 17-(dimethylaminoethylamino)-17-demethoxygeldanamycin (17-DMAG), are used to illustrate the components of the QuAD and its potential utility. Conclusions and clinical relevance This resource enables quantitative assessment of protein components of signaling pathways and biological processes and holds promise for systematic investigation of treatment responses in cancer.",QuAD,0.991928503,The Quantitative Assay Database,0.939238191,QuAD,0.991928503,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/8/2011 +22726705,http://applications.bioanalysis.org/quartetsdb,"QuartetS-DB: a large-scale orthology database for prokaryotes and eukaryotes inferred by evolutionary evidence. Background The concept of orthology is key to decoding evolutionary relationships among genes across different species using comparative genomics. QuartetS is a recently reported algorithm for large-scale orthology detection. Based on the well-established evolutionary principle that gene duplication events discriminate paralogous from orthologous genes, QuartetS has been shown to improve orthology detection accuracy while maintaining computational efficiency. Description QuartetS-DB is a new orthology database constructed using the QuartetS algorithm. The database provides orthology predictions among 1621 complete genomes (1365 bacterial, 92 archaeal, and 164 eukaryotic), covering more than seven million proteins and four million pairwise orthologs. It is a major source of orthologous groups, containing more than 300,000 groups of orthologous proteins and 236,000 corresponding gene trees. The database also provides over 500,000 groups of inparalogs. In addition to its size, a distinguishing feature of QuartetS-DB is the ability to allow users to select a cutoff value that modulates the balance between prediction accuracy and coverage of the retrieved pairwise orthologs. The database is accessible at https://applications.bioanalysis.org/quartetsdb. Conclusions QuartetS-DB is one of the largest orthology resources available to date. Because its orthology predictions are underpinned by evolutionary evidence obtained from sequenced genomes, we expect its accuracy to continue to increase in future releases as the genomes of additional species are sequenced.",QuartetS-DB,0.972562802,NA,0,QuartetS-DB,0.972562802,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/22/2012 +23180797,http://quorumpeps.ugent.be,"Quorumpeps database: chemical space, microbial origin and functionality of quorum sensing peptides. Quorum-sensing (QS) peptides are biologically attractive molecules, with a wide diversity of structures and prone to modifications altering or presenting new functionalities. Therefore, the Quorumpeps database (http://quorumpeps.ugent.be) is developed to give a structured overview of the QS oligopeptides, describing their microbial origin (species), functionality (method, result and receptor), peptide links and chemical characteristics (3D-structure-derived physicochemical properties). The chemical diversity observed within this group of QS signalling molecules can be used to develop new synthetic bio-active compounds.",Quorumpeps,0.993253469,NA,0,Quorumpeps,0.993253469,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/24/2012 +"22121227, 27899586",http://rloop.bii.a-star.edu.sg,"Quantitative model of R-loop forming structures reveals a novel level of RNA-DNA interactome complexity. R-loop is the structure co-transcriptionally formed between nascent RNA transcript and DNA template, leaving the non-transcribed DNA strand unpaired. This structure can be involved in the hyper-mutation and dsDNA breaks in mammalian immunoglobulin (Ig) genes, oncogenes and neurodegenerative disease related genes. R-loops have not been studied at the genome scale yet. To identify the R-loops, we developed a computational algorithm and mapped R-loop forming sequences (RLFS) onto 66,803 sequences defined by UCSC as 'known' genes. We found that ∼59% of these transcribed sequences contain at least one RLFS. We created R-loopDB (http://rloop.bii.a-star.edu.sg/), the database that collects all RLFS identified within over half of the human genes and links to the UCSC Genome Browser for information integration and visualisation across a variety of bioinformatics sources. We found that many oncogenes and tumour suppressors (e.g. Tp53, BRCA1, BRCA2, Kras and Ptprd) and neurodegenerative diseases related genes (e.g. ATM, Park2, Ptprd and GLDC) could be prone to significant R-loop formation. Our findings suggest that R-loops provide a novel level of RNA-DNA interactome complexity, playing key roles in gene expression controls, mutagenesis, recombination process, chromosomal rearrangement, alternative splicing, DNA-editing and epigenetic modifications. RLFSs could be used as a novel source of prospective therapeutic targets.",R-loopDB,0.997868508,NA,0,R-loopDB,0.997868508,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +30010730,http://www.picb.ac.cn/RabGTD,"RabGTD: a comprehensive database of rabbit genome and transcriptome. . The rabbit is a very important species for both biomedical research and agriculture animal breeding. They are not only the most-used experimental animals for the production of antibodies, but also widely used for studying a variety of human diseases. Here we developed RabGTD, the first comprehensive rabbit database containing both genome and transcriptome data generated by next-generation sequencing. Genomic variations coming from 79 samples were identified and annotated, including 33 samples of wild rabbits and 46 samples of domestic rabbits with diverse populations. Gene expression profiles of 86 tissue samples were complied, including those from the most commonly used models for hyperlipidemia and atherosclerosis. RabGTD is a web-based and open-access resource, which also provides convenient functions and friendly interfaces of searching, browsing and downloading for users to explore the big data.Database URL: http://www.picb.ac.cn/RabGTD/.",RabGTD,0.996257365,NA,0,RabGTD,0.996257365,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22140215,http://www2.chi.unsw.edu.au/rac,"RAC: Repository of Antibiotic resistance Cassettes. Antibiotic resistance in bacteria is often due to acquisition of resistance genes associated with different mobile genetic elements. In Gram-negative bacteria, many resistance genes are found as part of small mobile genetic elements called gene cassettes, generally found integrated into larger elements called integrons. Integrons carrying antibiotic resistance gene cassettes are often associated with mobile elements and here are designated 'mobile resistance integrons' (MRIs). More than one cassette can be inserted in the same integron to create arrays that contribute to the spread of multi-resistance. In many sequences in databases such as GenBank, only the genes within cassettes, rather than whole cassettes, are annotated and the same gene/cassette may be given different names in different entries, hampering analysis. We have developed the Repository of Antibiotic resistance Cassettes (RAC) website to provide an archive of gene cassettes that includes alternative gene names from multiple nomenclature systems and allows the community to contribute new cassettes. RAC also offers an additional function that allows users to submit sequences containing cassettes or arrays for annotation using the automatic annotation system Attacca. Attacca recognizes features (gene cassettes, integron regions) and identifies cassette arrays as patterns of features and can also distinguish minor cassette variants that may encode different resistance phenotypes (aacA4 cassettes and bla cassettes-encoding β-lactamases). Gaps in annotations are manually reviewed and those found to correspond to novel cassettes are assigned unique names. While there are other websites dedicated to integrons or antibiotic resistance genes, none includes a complete list of antibiotic resistance gene cassettes in MRI or offers consistent annotation and appropriate naming of all of these cassettes in submitted sequences. RAC thus provides a unique resource for researchers, which should reduce confusion and improve the quality of annotations of gene cassettes in integrons associated with antibiotic resistance. DATABASE URL: http://www2.chi.unsw.edu.au/rac.",RAC,0.979894936,Repository of Antibiotic resistance Cassettes,0.828436719,RAC,0.979894936,1,NA,29373760,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,12/2/2011 +29373760,http://mara.spokade.com,"Automated annotation of mobile antibiotic resistance in Gram-negative bacteria: the Multiple Antibiotic Resistance Annotator (MARA) and database. Background Multiresistance in Gram-negative bacteria is often due to acquisition of several different antibiotic resistance genes, each associated with a different mobile genetic element, that tend to cluster together in complex conglomerations. Accurate, consistent annotation of resistance genes, the boundaries and fragments of mobile elements, and signatures of insertion, such as DR, facilitates comparative analysis of complex multiresistance regions and plasmids to better understand their evolution and how resistance genes spread. Objectives To extend the Repository of Antibiotic resistance Cassettes (RAC) web site, which includes a database of 'features', and the Attacca automatic DNA annotation system, to encompass additional resistance genes and all types of associated mobile elements. Methods Antibiotic resistance genes and mobile elements were added to RAC, from existing registries where possible. Attacca grammars were extended to accommodate the expanded database, to allow overlapping features to be annotated and to identify and annotate features such as composite transposons and DR. Results The Multiple Antibiotic Resistance Annotator (MARA) database includes antibiotic resistance genes and selected mobile elements from Gram-negative bacteria, distinguishing important variants. Sequences can be submitted to the MARA web site for annotation. A list of positions and orientations of annotated features, indicating those that are truncated, DR and potential composite transposons is provided for each sequence, as well as a diagram showing annotated features approximately to scale. Conclusions The MARA web site (http://mara.spokade.com) provides a comprehensive database for mobile antibiotic resistance in Gram-negative bacteria and accurately annotates resistance genes and associated mobile elements in submitted sequences to facilitate comparative analysis.",RAC,0.924949805,Repository of Antibiotic resistance Cassettes,0.901370605,RAC,0.924949805,1,NA,22140215,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,4/1/2018 +24163250,http://RNAedit.com,"RADAR: a rigorously annotated database of A-to-I RNA editing. We present RADAR--a rigorously annotated database of A-to-I RNA editing (available at http://RNAedit.com). The identification of A-to-I RNA editing sites has been dramatically accelerated in the past few years by high-throughput RNA sequencing studies. RADAR includes a comprehensive collection of A-to-I RNA editing sites identified in humans (Homo sapiens), mice (Mus musculus) and flies (Drosophila melanogaster), together with extensive manually curated annotations for each editing site. RADAR also includes an expandable listing of tissue-specific editing levels for each editing site, which will facilitate the assignment of biological functions to specific editing sites.",RADAR--a,0.97970136,NA,0,RADAR--a,0.97970136,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/25/2013 +32338561,http://biokb.ncpsb.org/radatlas,"RadAtlas 1.0: a knowledgebase focusing on radiation-associated genes. Purpose: Ionizing radiation has very complex biological effects, such as inducing damage to DNA and proteins, ionizing water molecules to produce toxic free radicals, and triggering genetic and somatic effects. Understanding the biomolecular response mechanism of radiation is very important for the prevention and treatment of radiation diseases. However, function information of these radiation-associated genes is hidden in numbers of scientific papers and databases, making it difficult to understand the response mechanism of ionizing radiation.Materials and methods: We collected radiation-associated genes by literature and database mining. Literature and database mining was performed on the basis of biomedical literature from PubMed and gene expression datasets from GEO respectively.Results: We built an ionizing radiation related knowledgebase RadAtlas 1.0 (http://biokb.ncpsb.org/radatlas), which contains 598 radiation-associated genes compiled from literature mining, and 611 potential radiation-associated genes collected from gene expression datasets by differential gene expression analysis. We also provide a user-friendly web interface that offers multiple search methods.Conclusions: RadAtlas collected a large amount of information about genes, biological processes, and pathways related to ionizing radiation. It is the first attempt to provide a comprehensive catalog of radiation-associated genes with literature evidence and potential radiation-associated genes with differential expression evidence. We believe that RadAtlas would be a helpful tool to understand the response mechanism to ionizing radiation.",RadAtlas,0.993727684,NA,0,RadAtlas,0.993727684,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/12/2020 +25228593,http://www.bioapp.org/RADB,"RADB: a database of rheumatoid arthritis-related polymorphisms. . Rheumatoid arthritis (RA) is an autoimmune disease that has a complex genetic basis. Therefore, it is important to explore the genetic background of RA. The extensive recent application of polymorphic genetic markers, especially single nucleotide polymorphisms, has presented us with a large quantity of genetic data. In this study, we developed the Database of Rheumatoid Arthritis-related Polymorphisms (RADB), to integrate all the RA-related genetic polymorphisms and provide a useful resource for researchers. We manually extracted the RA-related polymorphisms from 686 published reports, including RA susceptibility loci, polymorphisms associated with particular clinical features of RA, polymorphisms associated with drug response in RA and polymorphisms associated with a higher risk of cardiovascular disease in RA. Currently, RADB V1.0 contains 3235 polymorphisms that are associated with 636 genes and refer to 68 countries. The detailed information extracted from the literature includes basic information about the articles (e.g., PubMed ID, title and abstract), population information (e.g., country, geographic area and sample size) and polymorphism information (e.g., polymorphism name, gene, genotype, odds ratio and 95% confidence interval, P-value and risk allele). Meanwhile, useful annotations, such as hyperlinks to dbSNP, GenBank, UCSC, Gene Ontology and Kyoto Encyclopedia of Genes and Genomes pathway, are included. In addition, a tool for meta-analysis was developed to summarize the results of multiple studies. The database is freely available at http://www.bioapp.org/RADB. Database URL: http://www.bioapp.org/RADB.",RADB,0.987689435,Database of Rheumatoid Arthritis-related Polymorphisms,0.919500697,RADB,0.987689435,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/15/2014 +23239846,http://bioinfo.bti.cornell.edu/radish,"RadishBase: a database for genomics and genetics of radish. Radish is an economically important vegetable crop. During the past several years, large-scale genomics and genetics resources have been accumulated for this species. To store, query, analyze and integrate these radish resources efficiently, we have developed RadishBase (http://bioinfo.bti.cornell.edu/radish), a genomics and genetics database of radish. Currently the database contains radish mitochondrial genome sequences, expressed sequence tag (EST) and unigene sequences and annotations, biochemical pathways, EST-derived single nucleotide polymorphism (SNP) and simple sequence repeat (SSR) markers, and genetic maps. RadishBase is designed to enable users easily to retrieve and visualize biologically important information through a set of efficient query interfaces and analysis tools, including the BLAST search and unigene annotation query interfaces, and tools to classify unigenes functionally, to identify enriched gene ontology (GO) terms and to visualize genetic maps. A database containing radish pathways predicted from unigene sequences is also included in RadishBase. The tools and interfaces in RadishBase allow efficient mining of recently released and continually expanding large-scale radish genomics and genetics data sets, including the radish genome sequences and RNA-seq data sets.",RadishBase,0.997795284,NA,0,RadishBase,0.997795284,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/13/2012 +30329070,"http://lipid-raft-database.di.uq.edu.au/, http://raftprot.org","RaftProt V2: understanding membrane microdomain function through lipid raft proteomes. Cellular membranes feature dynamic submicrometer-scale lateral domains termed lipid rafts, membrane rafts or glycosphingolipid-enriched microdomains (GEM). Numerous proteomics studies have been conducted on the lipid raft proteome, however, interpretation of individual studies is limited by potential undefined contaminant proteins. To enable integrated analyses, we previously developed RaftProt (http://lipid-raft-database.di.uq.edu.au/), a searchable database of mammalian lipid raft-associated proteins. Despite being a highly used resource, further developments in annotation and utilities were required. Here, we present RaftProt V2 (http://raftprot.org), an improved update of RaftProt. Besides the addition of new datasets and re-mapping of all entries to both UniProt and UniRef IDs, we have implemented a stringent annotation based on experimental evidence level to assist in identification of possible contaminant proteins. RaftProt V2 allows for simultaneous search of multiple proteins/experiments at the cell/tissue type and UniRef/Gene level, where correlations, interactions or overlaps can be investigated. The web-interface has been completely re-designed to enable interactive data and subset selection, correlation analysis and network visualization. Overall, RaftProt aims to advance our understanding of lipid raft function through integrative analysis of datasets collected from diverse tissue and conditions. Database URL: http://raftprot.org.",RaftProt,0.997017503,NA,0,RaftProt,0.997017503,1,NA,25392410,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +25392410,http://lipid-raft-database.di.uq.edu.au,"RaftProt: mammalian lipid raft proteome database. RaftProt (http://lipid-raft-database.di.uq.edu.au/) is a database of mammalian lipid raft-associated proteins as reported in high-throughput mass spectrometry studies. Lipid rafts are specialized membrane microdomains enriched in cholesterol and sphingolipids thought to act as dynamic signalling and sorting platforms. Given their fundamental roles in cellular regulation, there is a plethora of information on the size, composition and regulation of these membrane microdomains, including a large number of proteomics studies. To facilitate the mining and analysis of published lipid raft proteomics studies, we have developed a searchable database RaftProt. In addition to browsing the studies, performing basic queries by protein and gene names, searching experiments by cell, tissue and organisms; we have implemented several advanced features to facilitate data mining. To address the issue of potential bias due to biochemical preparation procedures used, we have captured the lipid raft preparation methods and implemented advanced search option for methodology and sample treatment conditions, such as cholesterol depletion. Furthermore, we have identified a list of high confidence proteins, and enabled searching only from this list of likely bona fide lipid raft proteins. Given the apparent biological importance of lipid raft and their associated proteins, this database would constitute a key resource for the scientific community.",RaftProt,0.996259689,mammalian lipid raft proteome database,0.892292529,RaftProt,0.996259689,1,NA,30329070,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/11/2014 +24803509,http://www.rna-society.org/raid,"RAID: a comprehensive resource for human RNA-associated (RNA-RNA/RNA-protein) interaction. Transcriptomic analyses have revealed an unexpected complexity in the eukaryote transcriptome, which includes not only protein-coding transcripts but also an expanding catalog of noncoding RNAs (ncRNAs). Diverse coding and noncoding RNAs (ncRNAs) perform functions through interaction with each other in various cellular processes. In this project, we have developed RAID (http://www.rna-society.org/raid), an RNA-associated (RNA-RNA/RNA-protein) interaction database. RAID intends to provide the scientific community with all-in-one resources for efficient browsing and extraction of the RNA-associated interactions in human. This version of RAID contains more than 6100 RNA-associated interactions obtained by manually reviewing more than 2100 published papers, including 4493 RNA-RNA interactions and 1619 RNA-protein interactions. Each entry contains detailed information on an RNA-associated interaction, including RAID ID, RNA/protein symbol, RNA/protein categories, validated method, expressing tissue, literature references (Pubmed IDs), and detailed functional description. Users can query, browse, analyze, and manipulate RNA-associated (RNA-RNA/RNA-protein) interaction. RAID provides a comprehensive resource of human RNA-associated (RNA-RNA/RNA-protein) interaction network. Furthermore, this resource will help in uncovering the generic organizing principles of cellular function network.",RAID,0.7922194,NA,0,RAID,0.7922194,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/6/2014 +28077569,http://rth.dk/resources/rain,"RAIN: RNA-protein Association and Interaction Networks. . Protein association networks can be inferred from a range of resources including experimental data, literature mining and computational predictions. These types of evidence are emerging for non-coding RNAs (ncRNAs) as well. However, integration of ncRNAs into protein association networks is challenging due to data heterogeneity. Here, we present a database of ncRNA-RNA and ncRNA-protein interactions and its integration with the STRING database of protein-protein interactions. These ncRNA associations cover four organisms and have been established from curated examples, experimental data, interaction predictions and automatic literature mining. RAIN uses an integrative scoring scheme to assign a confidence score to each interaction. We demonstrate that RAIN outperforms the underlying microRNA-target predictions in inferring ncRNA interactions. RAIN can be operated through an easily accessible web interface and all interaction data can be downloaded.Database URL: http://rth.dk/resources/rain.",RAIN,0.991763949,RNA-protein Association and Interaction Networks,0.843281314,RAIN,0.991763949,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/10/2017 +26478709,http://ipt.biodiversity.aq/resource.do?r=rams,"The Register of Antarctic Marine Species (RAMS): a ten-year appraisal. The Register of Antarctic Marine Species (RAMS) is a marine species database that manages an authoritative taxonomic list of species occurring in the Southern Ocean. RAMS links with several other initiatives managing biogeographic or genomics information. The current paper aims to briefly present RAMS and provides an updated snapshot of its contents, in the form of a DarwinCore checklist (available through http://ipt.biodiversity.aq/resource.do?r=rams) and illustrative barplots. Moreover, this article presents a ten year appraisal (since the creation of RAMS). This appraisal first focuses on RAMS bibliometrics. We observed that RAMS was cited (Google Scholar) in 50 distinct publications among which 32 were peer-reviewed in 18 different journals. Three journals (Antarctic Science, Polar Biology, ZooKeys) represent almost 40% of these peer-review publications. The second appraisal focuses on the evolution of new RAMS records. We observed an important decrease in data additions since 2011. As a case study, we focused on an original dataset for a specific group (Asteroidea, Echinodermata). It appears that around one hundred species of asteroids are lacking in RAMS despite the relatively high availability of these data. This suggests that the users' community (or collaborative projects such as AquaRES) could be helpful in order to maintain the RAMS database over the long term.",RAMS,0.954435706,The Register of Antarctic Marine Species,0.92820104,RAMS,0.954435706,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/30/2015 +23299411,http://rapdb.dna.affrc.go.jp,"Rice Annotation Project Database (RAP-DB): an integrative and interactive database for rice genomics. The Rice Annotation Project Database (RAP-DB, http://rapdb.dna.affrc.go.jp/) has been providing a comprehensive set of gene annotations for the genome sequence of rice, Oryza sativa (japonica group) cv. Nipponbare. Since the first release in 2005, RAP-DB has been updated several times along with the genome assembly updates. Here, we present our newest RAP-DB based on the latest genome assembly, Os-Nipponbare-Reference-IRGSP-1.0 (IRGSP-1.0), which was released in 2011. We detected 37,869 loci by mapping transcript and protein sequences of 150 monocot species. To provide plant researchers with highly reliable and up to date rice gene annotations, we have been incorporating literature-based manually curated data, and 1,626 loci currently incorporate literature-based annotation data, including commonly used gene names or gene symbols. Transcriptional activities are shown at the nucleotide level by mapping RNA-Seq reads derived from 27 samples. We also mapped the Illumina reads of a Japanese leading japonica cultivar, Koshihikari, and a Chinese indica cultivar, Guangluai-4, to the genome and show alignments together with the single nucleotide polymorphisms (SNPs) and gene functional annotations through a newly developed browser, Short-Read Assembly Browser (S-RAB). We have developed two satellite databases, Plant Gene Family Database (PGFD) and Integrative Database of Cereal Gene Phylogeny (IDCGP), which display gene family and homologous gene relationships among diverse plant species. RAP-DB and the satellite databases offer simple and user-friendly web interfaces, enabling plant and genome researchers to access the data easily and facilitating a broad range of plant research topics.",RAP-DB,0.992282307,Rice Annotation Project Database,0.925042021,RAP-DB,0.992282307,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/7/2013 +21729256,"http://202.141.47.181/rasond/, http://www.aiims.edu/RAS.html","RASOnD-a comprehensive resource and search tool for RAS superfamily oncogenes from various species. Background The Ras superfamily plays an important role in the control of cell signalling and division. Mutations in the Ras genes convert them into active oncogenes. The Ras oncogenes form a major thrust of global cancer research as they are involved in the development and progression of tumors. This has resulted in the exponential growth of data on Ras superfamily across different public databases and in literature. However, no dedicated public resource is currently available for data mining and analysis on this family. The present database was developed to facilitate straightforward accession, retrieval and analysis of information available on Ras oncogenes from one particular site. Description We have developed the RAS Oncogene Database (RASOnD) as a comprehensive knowledgebase that provides integrated and curated information on a single platform for oncogenes of Ras superfamily. RASOnD encompasses exhaustive genomics and proteomics data existing across diverse publicly accessible databases. This resource presently includes overall 199,046 entries from 101 different species. It provides a search tool to generate information about their nucleotide and amino acid sequences, single nucleotide polymorphisms, chromosome positions, orthologies, motifs, structures, related pathways and associated diseases. We have implemented a number of user-friendly search interfaces and sequence analysis tools. At present the user can (i) browse the data (ii) search any field through a simple or advance search interface and (iii) perform a BLAST search and subsequently CLUSTALW multiple sequence alignment by selecting sequences of Ras oncogenes. The Generic gene browser, GBrowse, JMOL for structural visualization and TREEVIEW for phylograms have been integrated for clear perception of retrieved data. External links to related databases have been included in RASOnD. Conclusions This database is a resource and search tool dedicated to Ras oncogenes. It has utility to cancer biologists and cell molecular biologists as it is a ready source for research, identification and elucidation of the role of these oncogenes. The data generated can be used for understanding the relationship between the Ras oncogenes and their association with cancer. The database updated monthly is freely accessible online at http://202.141.47.181/rasond/ and http://www.aiims.edu/RAS.html.",RASOnD,0.995079184,RAS Oncogene Database,0.942642281,RASOnD,0.995079184,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/5/2011 +33068412,http://rasp.zhanglab.net,"RASP: an atlas of transcriptome-wide RNA secondary structure probing data. RNA molecules fold into complex structures that are important across many biological processes. Recent technological developments have enabled transcriptome-wide probing of RNA secondary structure using nucleases and chemical modifiers. These approaches have been widely applied to capture RNA secondary structure in many studies, but gathering and presenting such data from very different technologies in a comprehensive and accessible way has been challenging. Existing RNA structure probing databases usually focus on low-throughput or very specific datasets. Here, we present a comprehensive RNA structure probing database called RASP (RNA Atlas of Structure Probing) by collecting 161 deduplicated transcriptome-wide RNA secondary structure probing datasets from 38 papers. RASP covers 18 species across animals, plants, bacteria, fungi, and also viruses, and categorizes 18 experimental methods including DMS-seq, SHAPE-Seq, SHAPE-MaP, and icSHAPE, etc. Specially, RASP curates the up-to-date datasets of several RNA secondary structure probing studies for the RNA genome of SARS-CoV-2, the RNA virus that caused the on-going COVID-19 pandemic. RASP also provides a user-friendly interface to query, browse, and visualize RNA structure profiles, offering a shortcut to accessing RNA secondary structures grounded in experimental data. The database is freely available at http://rasp.zhanglab.net.",RASP,0.992025733,RNA Atlas of Structure Probing,0.901401392,RASP,0.992025733,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +"30453895, 32009518",http://www.niehs.nih.gov/ratemirs,"RATEmiRs: the rat atlas of tissue-specific and enriched miRNAs database. Background MicroRNAs (miRNAs) regulate gene expression and have been targeted as indicators of environmental/toxicologic stressors. Using the data from our deep sequencing of miRNAs in an extensive sampling of rat tissues, we developed a database called RATEmiRs for the Rat Atlas of Tissue-specific and Enriched miRNAs to allow users to dynamically determine mature-, iso- and pre-miR expression abundance, enrichment and specificity in rat tissues and organs. Results Illumina sequencing count data from mapped reads and meta data from the miRNA body atlas consisting of 21 and 23 tissues (14 organs) of toxicologic interest from 12 to 13 week old male and female Sprague Dawley rats respectively, were managed in a relational database with a user-friendly query interface. Data-driven pipelines are available to tailor the identification of tissue-enriched (TE) and tissue-specific (TS) miRNAs. Data-driven organ-specific (OS) pipelines reveal miRNAs that are expressed predominately in a given organ. A user-driven approach is also available to assess the tissue expression of user-specified miRNAs. Using one tissue vs other tissues and tissue(s) of an organ vs other organs, we illustrate the utility of RATEmiRs to facilitate the identification of candidate miRNAs. As a use case example, RATEmiRs revealed two TS miRNAs in the liver: rno-miR-122-3p and rno-miR-122-5p. When liver is compared to just the brain tissues for example, rno-miR-192-5p, rno-miR-193-3p, rno-miR-203b-3p, rno-miR-3559-5p, rno-miR-802-3p and rno-miR-802-5p are also detected as abundantly expressed in liver. As another example, 55 miRNAs from the RATEmiRs query of ileum vs brain tissues overlapped with miRNAs identified from the same comparison of tissues in an independent, publicly available dataset of 10 week old male rat microarray data suggesting that these miRNAs are likely not age-specific, platform-specific nor pipeline-dependent. Lastly, we identified 10 miRNAs that have conserved tissue/organ-specific expression between the rat and human species. Conclusions RATEmiRs provides a new platform for identification of TE, TS and OS miRNAs in a broad array of rat tissues. RATEmiRs is available at: https://www.niehs.nih.gov/ratemirs.",RATEmiRs,0.978475153,Rat Atlas of Tissue-specific and Enriched miRNAs,0.862209494,RATEmiRs,0.978475153,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/12/2020 +24158836,http://hinv.jp/hinv/rav,"RAvariome: a genetic risk variants database for rheumatoid arthritis based on assessment of reproducibility between or within human populations. Rheumatoid arthritis (RA) is a common autoimmune inflammatory disease of the joints and is caused by both genetic and environmental factors. In the past six years, genome-wide association studies (GWASs) have identified many risk variants associated with RA. However, not all associations reported from GWASs are reproduced when tested in follow-up studies. To establish a reliable set of RA risk variants, we systematically classified common variants identified in GWASs by the degree of reproducibility among independent studies. We collected comprehensive genetic associations from 90 papers of GWASs and meta-analysis. The genetic variants were assessed according to the statistical significance and reproducibility between or within nine geographical populations. As a result, 82 and 19 single nucleotide polymorphisms (SNPs) were confirmed as intra- and inter-population-reproduced variants, respectively. Interestingly, majority of the intra-population-reproduced variants from European and East Asian populations were not common in two populations, but their nearby genes appeared to be the components of common pathways. Furthermore, a tool to predict the individual's genetic risk of RA was developed to facilitate personalized medicine and preventive health care. For further clinical researches, the list of reliable genetic variants of RA and the genetic risk prediction tool are provided by open access database RAvariome. DATABASE URL: http://hinv.jp/hinv/rav/.",RAvariome,0.715638518,NA,0,RAvariome,0.715638518,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/23/2013 +29039006,"http://www.brachypodium.org, http://brachy.bmep.riken.jp/ver.1/index.pl","Information Resources for Functional Genomics Studies in Brachypodium distachyon. Online tools and databases play an essential role in the promotion of functional genomics studies. Several resources for information regarding Brachypodium distachyon (Brachypodium) are available on the Web. In this chapter, we focus on recently published resources for Brachypodium research. The Brachypodium.org website ( http://www.brachypodium.org /) is an information portal that provides links to various genomic resources regarding Brachypodium, including genome annotation and re-sequencing datasets of accessions. RIKEN Full-length cDNA Database (RBFLDB, http://brachy.bmep.riken.jp/ver.1/index.pl ) is a web-accessible database that provides information of Brachypodium full-length cDNAs (FLcDNAs) collected in RIKEN and updated gene structures of Brachypodium based on the FLcDNA sequences as well as results of comparative analyses with available sequence resources for Triticeae crops, wheat, and barley. We introduce the functionalities and availability of these important information resources. Furthermore, we also present brief descriptions of useful online tools that facilitate Brachypodium functional genomics studies.",RBFLDB,0.992151543,RIKEN Full-length cDNA Database,0.988669369,RBFLDB,0.992151543,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +26635394,http://www.rbp-var.biols.ac.cn,"RBP-Var: a database of functional variants involved in regulation mediated by RNA-binding proteins. Transcription factors bind to the genome by forming specific contacts with the primary DNA sequence; however, RNA-binding proteins (RBPs) have greater scope to achieve binding specificity through the RNA secondary structure. It has been revealed that single nucleotide variants (SNVs) that alter RNA structure, also known as RiboSNitches, exhibit 3-fold greater local structure changes than replicates of the same DNA sequence, demonstrated by the fact that depletion of RiboSNitches could result in the alteration of specific RNA shapes at thousands of sites, including 3' UTRs, binding sites of microRNAs and RBPs. However, the network between SNVs and post-transcriptional regulation remains unclear. Here, we developed RBP-Var, a database freely available at http://www.rbp-var.biols.ac.cn/, which provides annotation of functional variants involved in post-transcriptional interaction and regulation. RBP-Var provides an easy-to-use web interface that allows users to rapidly find whether SNVs of interest can transform the secondary structure of RNA and identify RBPs whose binding may be subsequently disrupted. RBP-Var integrates DNA and RNA biology to understand how various genetic variants and post-transcriptional mechanisms cooperate to orchestrate gene expression. In summary, RBP-Var is useful in selecting candidate SNVs for further functional studies and exploring causal SNVs underlying human diseases.",RBP-Var,0.991189197,NA,0,RBP-Var,0.991189197,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/3/2015 +33196814,http://RBP2GO.DKFZ.de,"RBP2GO: a comprehensive pan-species database on RNA-binding proteins, their interactions and functions. RNA-protein complexes have emerged as central players in numerous key cellular processes with significant relevance in health and disease. To further deepen our knowledge of RNA-binding proteins (RBPs), multiple proteome-wide strategies have been developed to identify RBPs in different species leading to a large number of studies contributing experimentally identified as well as predicted RBP candidate catalogs. However, the rapid evolution of the field led to an accumulation of isolated datasets, hampering the access and comparison of their valuable content. Moreover, tools to link RBPs to cellular pathways and functions were lacking. Here, to facilitate the efficient screening of the RBP resources, we provide RBP2GO (https://RBP2GO.DKFZ.de), a comprehensive database of all currently available proteome-wide datasets for RBPs across 13 species from 53 studies including 105 datasets identifying altogether 22 552 RBP candidates. These are combined with the information on RBP interaction partners and on the related biological processes, molecular functions and cellular compartments. RBP2GO offers a user-friendly web interface with an RBP scoring system and powerful advanced search tools allowing forward and reverse searches connecting functions and RBPs to stimulate new research directions.",RBP2GO,0.996937677,NA,0,RBP2GO,0.996937677,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29931156,http://rbpmetadb.yubiolab.org,"RBPMetaDB: a comprehensive annotation of mouse RNA-Seq datasets with perturbations of RNA-binding proteins. . RNA-binding proteins (RBPs) may play a critical role in gene regulation in various diseases or biological processes by controlling post-transcriptional events such as polyadenylation, splicing and mRNA stabilization via binding activities to RNA molecules. Owing to the importance of RBPs in gene regulation, a great number of studies have been conducted, resulting in a large amount of RNA-Seq datasets. However, these datasets usually do not have structured organization of metadata, which limits their potentially wide use. To bridge this gap, the metadata of a comprehensive set of publicly available mouse RNA-Seq datasets with perturbed RBPs were collected and integrated into a database called RBPMetaDB. This database contains 292 mouse RNA-Seq datasets for a comprehensive list of 187 RBPs. These RBPs account for only ∼10% of all known RBPs annotated in Gene Ontology, indicating that most are still unexplored using high-throughput sequencing. This negative information provides a great pool of candidate RBPs for biologists to conduct future experimental studies. In addition, we found that DNA-binding activities are significantly enriched among RBPs in RBPMetaDB, suggesting that prior studies of these DNA- and RNA-binding factors focus more on DNA-binding activities instead of RNA-binding activities. This result reveals the opportunity to efficiently reuse these data for investigation of the roles of their RNA-binding activities. A web application has also been implemented to enable easy access and wide use of RBPMetaDB. It is expected that RBPMetaDB will be a great resource for improving understanding of the biological roles of RBPs.Database URL: http://rbpmetadb.yubiolab.org.",RBPMetaDB,0.994099975,NA,0,RBPMetaDB,0.994099975,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22608002,http://www.juit.ac.in/attachments/jsr/rcdb/homenew.html,"RCDB: Renal Cancer Gene Database. Background Renal cell carcinoma or RCC is one of the common and most lethal urological cancers, with 40% of the patients succumbing to death because of metastatic progression of the disease. Treatment of metastatic RCC remains highly challenging because of its resistance to chemotherapy as well as radiotherapy, besides surgical resection. Whereas RCC comprises tumors with differing histological types, clear cell RCC remains the most common. A major problem in the clinical management of patients presenting with localized ccRCC is the inability to determine tumor aggressiveness and accurately predict the risk of metastasis following surgery. As a measure to improve the diagnosis and prognosis of RCC, researchers have identified several molecular markers through a number of techniques. However the wealth of information available is scattered in literature and not easily amenable to data-mining. To reduce this gap, this work describes a comprehensive repository called Renal Cancer Gene Database, as an integrated gateway to study renal cancer related data. Findings Renal Cancer Gene Database is a manually curated compendium of 240 protein-coding and 269 miRNA genes contributing to the etiology and pathogenesis of various forms of renal cell carcinomas. The protein coding genes have been classified according to the kind of gene alteration observed in RCC. RCDB also includes the miRNAsdysregulated in RCC, along with the corresponding information regarding the type of RCC and/or metastatic or prognostic significance. While some of the miRNA genes showed an association with other types of cancers few were unique to RCC. Users can query the database using keywords, category and chromosomal location of the genes. The knowledgebase can be freely accessed via a user-friendly web interface at http://www.juit.ac.in/attachments/jsr/rcdb/homenew.html. Conclusions It is hoped that this database would serve as a useful complement to the existing public resources and as a good starting point for researchers and physicians interested in RCC genetics.",RCDB,0.995647073,Renal Cancer Gene Database,0.962979174,RCDB,0.995647073,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/18/2012 +21586118,http://rcgdb.bioinf.uni-sb.de/MutomeWeb,"The Roche Cancer Genome Database 2.0. Background Cancer is a disease of genome alterations that arise through the acquisition of multiple somatic DNA sequence mutations. Some of these mutations can be critical for the development of a tumor and can be useful to characterize tumor types or predict outcome. Description We have constructed an integrated biological information system termed the Roche Cancer Genome Database (RCGDB) combining different human mutation databases already publicly available. This data is further extended by hand-curated information from publications.The current version of the RCGDB provides a user-friendly graphical interface that gives access to the data in different ways: (1) Single interactive search by genes, samples, cell lines, diseases, as well as pathways, (2) batch searches for genes and cell lines, (3) customized searches for regularly occurring requests, and (4) an advanced query interface enabling the user to query for samples and mutations by various filter criteria. Conclusion The interfaces of the presented database enable the user to search and view mutations in an intuitive and straight-forward manner. The database is freely accessible at http://rcgdb.bioinf.uni-sb.de/MutomeWeb/.",RCGDB,0.987567484,Roche Cancer Genome Database,0.956028092,RCGDB,0.987567484,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/17/2011 +22207818,http://www.infosyslab.fr/archaeocyatha,"Cambrian archaeocyathan metazoans: revision of morphological characters and standardization of genus descriptions to establish an online identification tool. Archaeocyatha represent the oldest calcified sponges and the first metazoans to build bioconstructions in association with calcimicrobes. They are a key group in biology, evolutionary studies, biostratigraphy, paleoecology and paleogeography of the early Cambrian times. The establishing of a new standardized terminology for archaeocyathans description has permitted the creation of the first knowledge base in English including descriptions of all archaeocyathan genera. This base, using the XPER² software package, is an integral part of the -Archaeocyatha- a knowledge base website, freely available at url http://www.infosyslab.fr/archaeocyatha. The website is composed of common information about Archaeocyatha, general remarks about the knowledge base, the description of the 307 genera recognized with images of type-specimens of type-species for each genus, as well as additional morphological data, an interactive free access key and its user guide.The automatic analysis and comparison of the digitized descriptions have identified some genera with highly similar morphology. These results are a great help for future taxonomic revisions and suggest a number of possible synonymies that require further study.",rchaeocyatha,0.695955813,NA,0,rchaeocyatha,0.695955813,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/28/2011 +23457042,http://www.bioinfo.mochsl.org.br/rcpedia,"RCPedia: a database of retrocopied genes. Motivation Retrocopies are copies of mature RNAs that are usually devoid of regulatory sequences and introns. They have routinely been classified as processed pseudo-genes with little or no biological relevance. However, recent findings have revealed functional roles for retrocopies, as well as their high frequency in some organisms, such as primates. Despite their increasing importance, there is no user-friendly and publicly available resource for the study of retrocopies. Results Here, we present RCPedia, an integrative and user-friendly database designed for the study of retrocopied genes. RCPedia contains a complete catalogue of the retrocopies that are known to be present in human and five other primate genomes, their genomic context, inter-species conservation and gene expression data. RCPedia also offers a streamlined data representation and an efficient query system. Availability and implementation RCPedia is available at http://www.bioinfo.mochsl.org.br/rcpedia.",RCPedia,0.993256867,NA,0,RCPedia,0.993256867,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2013 +"23193259, 25428375",http://www.rcsb.org,"The RCSB Protein Data Bank: new resources for research and education. The Research Collaboratory for Structural Bioinformatics Protein Data Bank (RCSB PDB) develops tools and resources that provide a structural view of biology for research and education. The RCSB PDB web site (http://www.rcsb.org) uses the curated 3D macromolecular data contained in the PDB archive to offer unique methods to access, report and visualize data. Recent activities have focused on improving methods for simple and complex searches of PDB data, creating specialized access to chemical component data and providing domain-based structural alignments. New educational resources are offered at the PDB-101 educational view of the main web site such as Author Profiles that display a researcher's PDB entries in a timeline. To promote different kinds of access to the RCSB PDB, Web Services have been expanded, and an RCSB PDB Mobile application for the iPhone/iPad has been released. These improvements enable new opportunities for analyzing and understanding structure data.",RCSB PDB,0.994570589,Data Bank,0.701618314,RCSB PDB,0.994570589,2,NA,27794042,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/26/2014 +27794042,http://rcsb.org,"The RCSB protein data bank: integrative view of protein, gene and 3D structural information. The Research Collaboratory for Structural Bioinformatics Protein Data Bank (RCSB PDB, http://rcsb.org), the US data center for the global PDB archive, makes PDB data freely available to all users, from structural biologists to computational biologists and beyond. New tools and resources have been added to the RCSB PDB web portal in support of a 'Structural View of Biology.' Recent developments have improved the User experience, including the high-speed NGL Viewer that provides 3D molecular visualization in any web browser, improved support for data file download and enhanced organization of website pages for query, reporting and individual structure exploration. Structure validation information is now visible for all archival entries. PDB data have been integrated with external biological resources, including chromosomal position within the human genome; protein modifications; and metabolic pathways. PDB-101 educational materials have been reorganized into a searchable website and expanded to include new features such as the Geis Digital Archive.",RCSB PDB,0.987921262,Data Bank,0.828813583,RCSB PDB,0.987921262,1,NA,"23193259.0, 25428375.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/27/2016 +26912951,http://rabies.mscwbif.org/home.html,"RDIS: The Rabies Disease Information System. Unlabelled Rabies is a deadly viral disease causing acute inflammation or encephalitis of the brain in human beings and other mammals. Therefore, it is of interest to collect information related to the disease from several sources including known literature databases for further analysis and interpretation. Hence, we describe the development of a database called the Rabies Disease Information System (RDIS) for this purpose. The online database describes the etiology, epidemiology, pathogenesis and pathology of the disease using diagrammatic representations. It provides information on several carriers of the rabies viruses like dog, bat, fox and civet, and their distributions around the world. Information related to the urban and sylvatic cycles of transmission of the virus is also made available. The database also contains information related to available diagnostic methods and vaccines for human and other animals. This information is of use to medical, veterinary and paramedical practitioners, students, researchers, pet owners, animal lovers, livestock handlers, travelers and many others. Availability The database is available for free http://rabies.mscwbif.org/home.html.",RDIS,0.970782856,The Rabies Disease Information System,0.871801784,RDIS,0.970782856,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/30/2015 +24288368,http://rdp.cme.msu.edu,"Ribosomal Database Project: data and tools for high throughput rRNA analysis. Ribosomal Database Project (RDP; http://rdp.cme.msu.edu/) provides the research community with aligned and annotated rRNA gene sequence data, along with tools to allow researchers to analyze their own rRNA gene sequences in the RDP framework. RDP data and tools are utilized in fields as diverse as human health, microbial ecology, environmental microbiology, nucleic acid chemistry, taxonomy and phylogenetics. In addition to aligned and annotated collections of bacterial and archaeal small subunit rRNA genes, RDP now includes a collection of fungal large subunit rRNA genes. RDP tools, including Classifier and Aligner, have been updated to work with this new fungal collection. The use of high-throughput sequencing to characterize environmental microbial populations has exploded in the past several years, and as sequence technologies have improved, the sizes of environmental datasets have increased. With release 11, RDP is providing an expanded set of tools to facilitate analysis of high-throughput data, including both single-stranded and paired-end reads. In addition, most tools are now available as open source packages for download and local use by researchers with high-volume needs or who would like to develop custom analysis pipelines.",RDP,0.996806602,Ribosomal Database Project,0.967247033,RDP,0.996806602,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2013 +"31691815, 31802127, 32486891",http://reactome.org,"The reactome pathway knowledgebase. The Reactome Knowledgebase (https://reactome.org) provides molecular details of signal transduction, transport, DNA replication, metabolism and other cellular processes as an ordered network of molecular transformations in a single consistent data model, an extended version of a classic metabolic map. Reactome functions both as an archive of biological processes and as a tool for discovering functional relationships in data such as gene expression profiles or somatic mutation catalogs from tumor cells. To extend our ability to annotate human disease processes, we have implemented a new drug class and have used it initially to annotate drugs relevant to cardiovascular disease. Our annotation model depends on external domain experts to identify new areas for annotation and to review new content. New web pages facilitate recruitment of community experts and allow those who have contributed to Reactome to identify their contributions and link them to their ORCID records. To improve visualization of our content, we have implemented a new tool to automatically lay out the components of individual reactions with multiple options for downloading the reaction diagrams and associated data, and a new display of our event hierarchy that will facilitate visual interpretation of pathway analysis results.",Reactome,0.940237164,NA,0,Reactome,0.940237164,3,29145629,"22012987.0, 24243840.0, 26087747.0",low_prob_best_name,do not remove,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,6/2/2020 +"22012987, 24243840, 26087747",http://www.reactome.org,"The Reactome BioMart. Reactome is an open source, expert-authored, manually curated and peer-reviewed database of reactions, pathways and biological processes. We provide an intuitive web-based user interface to pathway knowledge and a suite of data analysis tools. The Reactome BioMart provides biologists and bioinformaticians with a single web interface for performing simple or elaborate queries of the Reactome database, aggregating data from different sources and providing an opportunity to integrate experimental and computational results with information relating to biological pathways. Database URL: http://www.reactome.org.",Reactome,0.799830675,NA,0,Reactome,0.799830675,3,NA,"31691815.0, 31802127.0, 32486891.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,6/19/2015 +29145629,http://reactome.org,"The Reactome Pathway Knowledgebase. The Reactome Knowledgebase (https://reactome.org) provides molecular details of signal transduction, transport, DNA replication, metabolism, and other cellular processes as an ordered network of molecular transformations-an extended version of a classic metabolic map, in a single consistent data model. Reactome functions both as an archive of biological processes and as a tool for discovering unexpected functional relationships in data such as gene expression profiles or somatic mutation catalogues from tumor cells. To support the continued brisk growth in the size and complexity of Reactome, we have implemented a graph database, improved performance of data analysis tools, and designed new data structures and strategies to boost diagram viewer performance. To make our website more accessible to human users, we have improved pathway display and navigation by implementing interactive Enhanced High Level Diagrams (EHLDs) with an associated icon library, and subpathway highlighting and zooming, in a simplified and reorganized web site with adaptive design. To encourage re-use of our content, we have enabled export of pathway diagrams as 'PowerPoint' files.",Reactome Knowledgebase,0.694057018,NA,0,Reactome Knowledgebase,0.694057018,1,"31691815.0, 31802127.0, 32486891.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,1/1/2018 +30020436,http://realdb.algaegenome.org,"realDB: a genome and transcriptome resource for the red algae (phylum Rhodophyta). . With over 6000 species in seven classes, red algae (Rhodophyta) have diverse economic, ecological, experimental and evolutionary values. However, red algae are usually absent or rare in comparative analyses because genomic information of this phylum is often under-represented in various comprehensive genome databases. To improve the accessibility to the ome data and omics tools for red algae, we provided 10 genomes and 27 transcriptomes representing all seven classes of Rhodophyta. Three genomes and 18 transcriptomes were de novo assembled and annotated in this project. User-friendly BLAST suit, Jbrowse tools and search system were developed for online analyses. Detailed introductions to red algae taxonomy and the sequencing status are also provided. In conclusion, realDB (realDB.algaegenome.org) provides a platform covering the most genome and transcriptome data for red algae and a suite of tools for online analyses, and will attract both red algal biologists and those working on plant ecology, evolution and development.Database URL: http://realdb.algaegenome.org/.",realDB,0.993008554,NA,0,realDB,0.993008554,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +25378308,http://rebase.neb.com,"REBASE--a database for DNA restriction and modification: enzymes, genes and genomes. REBASE is a comprehensive and fully curated database of information about the components of restriction-modification (RM) systems. It contains fully referenced information about recognition and cleavage sites for both restriction enzymes and methyltransferases as well as commercial availability, methylation sensitivity, crystal and sequence data. All genomes that are completely sequenced are analyzed for RM system components, and with the advent of PacBio sequencing, the recognition sequences of DNA methyltransferases (MTases) are appearing rapidly. Thus, Type I and Type III systems can now be characterized in terms of recognition specificity merely by DNA sequencing. The contents of REBASE may be browsed from the web http://rebase.neb.com and selected compilations can be downloaded by FTP (ftp.neb.com). Monthly updates are also available via email.",REBASE,0.997635245,NA,0,REBASE,0.997635245,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/5/2014 +27821657,http://www.lovd.nl/LDLR,"The UCL low-density lipoprotein receptor gene variant database: pathogenicity update. Background Familial hypercholesterolaemia (OMIM 143890) is most frequently caused by variations in the low-density lipoprotein receptor (LDLR) gene. Predicting whether novel variants are pathogenic may not be straightforward, especially for missense and synonymous variants. In 2013, the Association of Clinical Genetic Scientists published guidelines for the classification of variants, with categories 1 and 2 representing clearly not or unlikely pathogenic, respectively, 3 representing variants of unknown significance (VUS), and 4 and 5 representing likely to be or clearly pathogenic, respectively. Here, we update the University College London (UCL) LDLR variant database according to these guidelines. Methods PubMed searches and alerts were used to identify novel LDLR variants for inclusion in the database. Standard in silico tools were used to predict potential pathogenicity. Variants were designated as class 4/5 only when the predictions from the different programs were concordant and as class 3 when predictions were discordant. Results The updated database (http://www.lovd.nl/LDLR) now includes 2925 curated variants, representing 1707 independent events. All 129 nonsense variants, 337 small frame-shifting and 117/118 large rearrangements were classified as 4 or 5. Of the 795 missense variants, 115 were in classes 1 and 2, 605 in class 4 and 75 in class 3. 111/181 intronic variants, 4/34 synonymous variants and 14/37 promoter variants were assigned to classes 4 or 5. Overall, 112 (7%) of reported variants were class 3. Conclusions This study updates the LDLR variant database and identifies a number of reported VUS where additional family and in vitro studies will be required to confirm or refute their pathogenicity.",NA,0,receptor gene,0.58098039,receptor gene,0.58098039,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,11/7/2016 +33238002,http://cosylab.iiitd.edu.in/recipedb,"RecipeDB: a resource for exploring recipes. . Cooking is the act of turning nature into the culture, which has enabled the advent of the omnivorous human diet. The cultural wisdom of processing raw ingredients into delicious dishes is embodied in their cuisines. Recipes thus are the cultural capsules that encode elaborate cooking protocols for evoking sensory satiation as well as providing nourishment. As we stand on the verge of an epidemic of diet-linked disorders, it is eminently important to investigate the culinary correlates of recipes to probe their association with sensory responses as well as consequences for nutrition and health. RecipeDB (https://cosylab.iiitd.edu.in/recipedb) is a structured compilation of recipes, ingredients and nutrition profiles interlinked with flavor profiles and health associations. The repertoire comprises of meticulous integration of 118 171 recipes from cuisines across the globe (6 continents, 26 geocultural regions and 74 countries), cooked using 268 processes (heat, cook, boil, simmer, bake, etc.), by blending over 20 262 diverse ingredients, which are further linked to their flavor molecules (FlavorDB), nutritional profiles (US Department of Agriculture) and empirical records of disease associations obtained from MEDLINE (DietRx). This resource is aimed at facilitating scientific explorations of the culinary space (recipe, ingredient, cooking processes/techniques, dietary styles, etc.) linked to taste (flavor profile) and health (nutrition and disease associations) attributes seeking for divergent applications. Database URL:  https://cosylab.iiitd.edu.in/recipedb.",RecipeDB,0.99755621,NA,0,RecipeDB,0.99755621,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2020 +34844637,http://rna.recount.bio,"recount3: summaries and queries for large-scale RNA-seq expression and splicing. We present recount3, a resource consisting of over 750,000 publicly available human and mouse RNA sequencing (RNA-seq) samples uniformly processed by our new Monorail analysis pipeline. To facilitate access to the data, we provide the recount3 and snapcount R/Bioconductor packages as well as complementary web resources. Using these tools, data can be downloaded as study-level summaries or queried for specific exon-exon junctions, genes, samples, or other features. Monorail can be used to process local and/or private data, allowing results to be directly compared to any study in recount3. Taken together, our tools help biologists maximize the utility of publicly available RNA-seq data, especially to improve their understanding of newly collected data. recount3 is available from http://rna.recount.bio .",recount3,0.966189086,NA,0,recount3,0.966189086,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/29/2021 +22139942,http://recountdb.cbrc.jp,"RecountDB: a database of mapped and count corrected transcribed sequences. The field of gene expression analysis continues to benefit from next-generation sequencing generated data, which enables transcripts to be measured with unmatched accuracy and resolution. But the high-throughput reads from these technologies also contain many errors, which can compromise the ability to accurately detect and quantify rare transcripts. Fortunately, techniques exist to ameliorate the affects of sequencer error. We present RecountDB, a secondary database derived from primary data in NCBI's short read archive. RecountDB holds sequence counts from RNA-seq and 5' capped transcription start site experiments, corrected and mapped to the relevant genome. Via a searchable and browseable interface users can obtain corrected data in formats useful for transcriptomic analysis. The database is currently populated with 2265 entries from 45 organisms and continuously growing. RecountDB is publicly available at: http://recountdb.cbrc.jp.",RecountDB,0.995889306,NA,0,RecountDB,0.995889306,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2011 +28529082,http://expression.ic4r.org,"Rice Expression Database (RED): An integrated RNA-Seq-derived gene expression database for rice. Rice is one of the most important stable food as well as a monocotyledonous model organism for the plant research community. Here, we present RED (Rice Expression Database; http://expression.ic4r.org), an integrated database of rice gene expression profiles derived entirely from RNA-Seq data. RED features a comprehensive collection of 284 high-quality RNA-Seq experiments, integrates a large number of gene expression profiles and covers a wide range of rice growth stages as well as various treatments. Based on massive expression profiles, RED provides a list of housekeeping and tissue-specific genes and dynamically constructs co-expression networks for gene(s) of interest. Besides, it provides user-friendly web interfaces for querying, browsing and visualizing expression profiles of concerned genes. Together, as a core resource in BIG Data Center, RED bears great utility for characterizing the function of rice genes and better understanding important biological processes and mechanisms underlying complex agronomic traits in rice.",RED,0.91165034,Rice Expression Database,0.904367864,RED,0.91165034,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/4/2017 +30329093,http://redfly.ccr.buffalo.edu,"REDfly: the transcriptional regulatory element database for Drosophila. The REDfly database provides a comprehensive curation of experimentally-validated Drosophila transcriptional cis-regulatory elements and includes information on DNA sequence, experimental evidence, patterns of regulated gene expression, and more. Now in its thirteenth year, REDfly has grown to over 23 000 records of tested reporter gene constructs and 2200 tested transcription factor binding sites. Recent developments include the start of curation of predicted cis-regulatory modules in addition to experimentally-verified ones, improved search and filtering, and increased interaction with the authors of curated papers. An expanded data model that will capture information on temporal aspects of gene regulation, regulation in response to environmental and other non-developmental cues, sexually dimorphic gene regulation, and non-endogenous (ectopic) aspects of reporter gene expression is under development and expected to be in place within the coming year. REDfly is freely accessible at http://redfly.ccr.buffalo.edu, and news about database updates and new features can be followed on Twitter at @REDfly_database.",REDfly,0.997037947,NA,0,REDfly,0.997037947,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +29696033,http://srv00.recas.ba.infn.it/redidb/index.html,"REDIdb 3.0: A Comprehensive Collection of RNA Editing Events in Plant Organellar Genomes. RNA editing is an important epigenetic mechanism by which genome-encoded transcripts are modified by substitutions, insertions and/or deletions. It was first discovered in kinetoplastid protozoa followed by its reporting in a wide range of organisms. In plants, RNA editing occurs mostly by cytidine (C) to uridine (U) conversion in translated regions of organelle mRNAs and tends to modify affected codons restoring evolutionary conserved aminoacid residues. RNA editing has also been described in non-protein coding regions such as group II introns and structural RNAs. Despite its impact on organellar transcriptome and proteome complexity, current primary databases still do not provide a specific field for RNA editing events. To overcome these limitations, we developed REDIdb a specialized database for RNA editing modifications in plant organelles. Hereafter we describe its third release containing more than 26,000 events in a completely novel web interface to accommodate RNA editing in its genomics, biological and evolutionary context through whole genome maps and multiple sequence alignments. REDIdb is freely available at http://srv00.recas.ba.infn.it/redidb/index.html.",REDIdb,0.993994772,NA,0,REDIdb,0.993994772,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/11/2018 +27587585,http://srv00.recas.ba.infn.it/atlas,"REDIportal: a comprehensive database of A-to-I RNA editing events in humans. RNA editing by A-to-I deamination is the prominent co-/post-transcriptional modification in humans. It is carried out by ADAR enzymes and contributes to both transcriptomic and proteomic expansion. RNA editing has pivotal cellular effects and its deregulation has been linked to a variety of human disorders including neurological and neurodegenerative diseases and cancer. Despite its biological relevance, many physiological and functional aspects of RNA editing are yet elusive. Here, we present REDIportal, available online at http://srv00.recas.ba.infn.it/atlas/, the largest and comprehensive collection of RNA editing in humans including more than 4.5 millions of A-to-I events detected in 55 body sites from thousands of RNAseq experiments. REDIportal embeds RADAR database and represents the first editing resource designed to answer functional questions, enabling the inspection and browsing of editing levels in a variety of human samples, tissues and body sites. In contrast with previous RNA editing databases, REDIportal comprises its own browser (JBrowse) that allows users to explore A-to-I changes in their genomic context, empathizing repetitive elements in which RNA editing is prominent.",REDIportal,0.996106863,NA,0,REDIportal,0.996106863,1,NA,33104797,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,9/1/2016 +33104797,http://srv00.recas.ba.infn.it/atlas/index.html,"REDIportal: millions of novel A-to-I RNA editing events from thousands of RNAseq experiments. RNA editing is a relevant epitranscriptome phenomenon able to increase the transcriptome and proteome diversity of eukaryotic organisms. ADAR mediated RNA editing is widespread in humans in which millions of A-to-I changes modify thousands of primary transcripts. RNA editing has pivotal roles in the regulation of gene expression or modulation of the innate immune response or functioning of several neurotransmitter receptors. Massive transcriptome sequencing has fostered the research in this field. Nonetheless, different aspects of the RNA editing biology are still unknown and need to be elucidated. To support the study of A-to-I RNA editing we have updated our REDIportal catalogue raising its content to about 16 millions of events detected in 9642 human RNAseq samples from the GTEx project by using a dedicated pipeline based on the HPC version of the REDItools software. REDIportal now allows searches at sample level, provides overviews of RNA editing profiles per each RNAseq experiment, implements a Gene View module to look at individual events in their genic context and hosts the CLAIRE database. Starting from this novel version, REDIportal will start collecting non-human RNA editing changes for comparative genomics investigations. The database is freely available at http://srv00.recas.ba.infn.it/atlas/index.html.",REDIportal,0.995939016,NA,0,REDIportal,0.995939016,1,NA,27587585,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +22833525,http://biocomputer.bio.cuhk.edu.hk/RedoxDB,"RedoxDB--a curated database for experimentally verified protein oxidative modification. Summary Redox regulation and signaling, which are involved in various cellular processes, have become one of the research focuses in the past decade. Cysteine thiol groups are particularly susceptible to post-translational modification, and their reversible oxidation is of critical role in redox regulation and signaling. With the tremendous improvement of techniques, hundreds of redox proteins along with their redox-sensitive cysteines have been reported, and the number is still fast growing. However, until now there is no database to accommodate the rapid accumulation of information on protein oxidative modification. Here we present RedoxDB-a manually curated database for experimentally validated redox proteins. RedoxDB (version 1.0) consists of two datasets (A and B, for proteins with or without verified modified cysteines, respectively) and includes 2157 redox proteins containing 2203 cysteine residues with oxidative modification. For each modified cysteine, the exact position, modification type and flanking sequence are provided. Additional information, including gene name, organism, sequence, literature references and links to UniProt and PDB, is also supplied. The database supports several functions including data search, blast and browsing. Bulk download of the entire dataset is also available. We expect that RedoxDB will be useful for both experimental studies and computational analyses of protein oxidative modification. Availability The database is freely available at: http://biocomputer.bio.cuhk.edu.hk/RedoxDB. Contact djguo@cuhk.edu.hk Supplementary information Supplementary data are available at Bioinformatics Online.",RedoxDB,0.996839046,NA,0,RedoxDB,0.996839046,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/25/2012 +35111182,http://integrape.eu/resources/genes-genomes,"The Grape Gene Reference Catalogue as a Standard Resource for Gene Selection and Genetic Improvement. Effective crop improvement, whether through selective breeding or biotech strategies, is largely dependent on the cumulative knowledge of a species' pangenome and its containing genes. Acquiring this knowledge is specially challenging in grapevine, one of the oldest fruit crops grown worldwide, which is known to have more than 30,000 genes. Well-established research communities studying model organisms have created and maintained, through public and private funds, a diverse range of online tools and databases serving as repositories of genomes and gene function data. The lack of such resources for the non-model, but economically important, Vitis vinifera species has driven the need for a standardised collection of genes within the grapevine community. In an effort led by the Integrape COST Action CA17111, we have recently developed the first grape gene reference catalogue, where genes are ascribed to functional data, including their accession identifiers from different genome-annotation versions (https://integrape.eu/resources/genes-genomes/). We present and discuss this gene repository together with a validation-level scheme based on varied supporting evidence found in current literature. The catalogue structure and online submission form provided permits community curation. Finally, we present the Gene Cards tool, developed within the Vitis Visualization (VitViz) platform, to visualize the data collected in the catalogue and link gene function with tissue-specific expression derived from public transcriptomic data. This perspective article aims to present these resources to the community as well as highlight their potential use, in particular for plant-breeding applications.",NA,0,Reference,0.612519324,Reference,0.612519324,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2021 +28850115,http://refex.dbcls.jp,"RefEx, a reference gene expression dataset as a web tool for the functional analysis of genes. Gene expression data are exponentially accumulating; thus, the functional annotation of such sequence data from metadata is urgently required. However, life scientists have difficulty utilizing the available data due to its sheer magnitude and complicated access. We have developed a web tool for browsing reference gene expression pattern of mammalian tissues and cell lines measured using different methods, which should facilitate the reuse of the precious data archived in several public databases. The web tool is called Reference Expression dataset (RefEx), and RefEx allows users to search by the gene name, various types of IDs, chromosomal regions in genetic maps, gene family based on InterPro, gene expression patterns, or biological categories based on Gene Ontology. RefEx also provides information about genes with tissue-specific expression, and the relative gene expression values are shown as choropleth maps on 3D human body images from BodyParts3D. Combined with the newly incorporated Functional Annotation of Mammals (FANTOM) dataset, RefEx provides insight regarding the functional interpretation of unfamiliar genes. RefEx is publicly available at http://refex.dbcls.jp/.",RefEx,0.944570541,Reference Expression dataset,0.787088712,RefEx,0.944570541,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/29/2017 +28438161,http://p4d-info.nig.ac.jp/refolddb,"REFOLDdb: a new and sustainable gateway to experimental protocols for protein refolding. Background More than 7000 papers related to ""protein refolding"" have been published to date, with approximately 300 reports each year during the last decade. Whilst some of these papers provide experimental protocols for protein refolding, a survey in the structural life science communities showed a necessity for a comprehensive database for refolding techniques. We therefore have developed a new resource - ""REFOLDdb"" that collects refolding techniques into a single, searchable repository to help researchers develop refolding protocols for proteins of interest. Results We based our resource on the existing REFOLD database, which has not been updated since 2009. We redesigned the data format to be more concise, allowing consistent representations among data entries compared with the original REFOLD database. The remodeled data architecture enhances the search efficiency and improves the sustainability of the database. After an exhaustive literature search we added experimental refolding protocols from reports published 2009 to early 2017. In addition to this new data, we fully converted and integrated existing REFOLD data into our new resource. REFOLDdb contains 1877 entries as of March 17th, 2017, and is freely available at http://p4d-info.nig.ac.jp/refolddb/ . Conclusion REFOLDdb is a unique database for the life sciences research community, providing annotated information for designing new refolding protocols and customizing existing methodologies. We envisage that this resource will find wide utility across broad disciplines that rely on the production of pure, active, recombinant proteins. Furthermore, the database also provides a useful overview of the recent trends and statistics in refolding technology development.",REFOLDdb,0.993212402,NA,0,REFOLDdb,0.993212402,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/24/2017 +"24259432, 26553804, 29112715, 33270901",http://www.ncbi.nlm.nih.gov/refseq,"RefSeq: an update on mammalian reference sequences. The National Center for Biotechnology Information (NCBI) Reference Sequence (RefSeq) database is a collection of annotated genomic, transcript and protein sequence records derived from data in public sequence archives and from computation, curation and collaboration (http://www.ncbi.nlm.nih.gov/refseq/). We report here on growth of the mammalian and human subsets, changes to NCBI's eukaryotic annotation pipeline and modifications affecting transcript and protein records. Recent changes to NCBI's eukaryotic genome annotation pipeline provide higher throughput, and the addition of RNAseq data to the pipeline results in a significant expansion of the number of transcripts and novel exons annotated on mammalian RefSeq genomes. Recent annotation changes include reporting supporting evidence for transcript records, modification of exon feature annotation and the addition of a structured report of gene and sequence attributes of biological interest. We also describe a revised protein annotation policy for alternatively spliced transcripts with more divergent predicted proteins and we summarize the current status of the RefSeqGene project.",RefSeq,0.996263564,Prokaryotic Genome Annotation Pipeline,0.812073847,RefSeq,0.996263564,4,NA,"22121212.0, 24316578.0, 25510495.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +22121212,http://www.ncbi.nlm.nih.gov/RefSeq,"NCBI Reference Sequences (RefSeq): current status, new features and genome annotation policy. The National Center for Biotechnology Information (NCBI) Reference Sequence (RefSeq) database is a collection of genomic, transcript and protein sequence records. These records are selected and curated from public sequence archives and represent a significant reduction in redundancy compared to the volume of data archived by the International Nucleotide Sequence Database Collaboration. The database includes over 16,00 organisms, 2.4 × 0(6) genomic records, 13 × 10(6) proteins and 2 × 10(6) RNA records spanning prokaryotes, eukaryotes and viruses (RefSeq release 49, September 2011). The RefSeq database is maintained by a combined approach of automated analyses, collaboration and manual curation to generate an up-to-date representation of the sequence, its features, names and cross-links to related sources of information. We report here on recent growth, the status of curating the human RefSeq data set, more extensive feature annotation and current policy for eukaryotic genome annotation via the NCBI annotation pipeline. More information about the resource is available online (see http://www.ncbi.nlm.nih.gov/RefSeq/).",RefSeq,0.988386571,Sequences,0.715523958,RefSeq,0.988386571,1,NA,"24316578.0, 25510495.0, 24259432.0, 26553804.0, 29112715.0, 33270901.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/24/2011 +"24316578, 25510495",http://www.ncbi.nlm.nih.gov/genome,"RefSeq microbial genomes database: new representation and annotation strategy. The source of the microbial genomic sequences in the RefSeq collection is the set of primary sequence records submitted to the International Nucleotide Sequence Database public archives. These can be accessed through the Entrez search and retrieval system at http://www.ncbi.nlm.nih.gov/genome. Next-generation sequencing has enabled researchers to perform genomic sequencing at rates that were unimaginable in the past. Microbial genomes can now be sequenced in a matter of hours, which has led to a significant increase in the number of assembled genomes deposited in the public archives. This huge increase in DNA sequence data presents new challenges for the annotation, analysis and visualization bioinformatics tools. New strategies have been developed for the annotation and representation of reference genomes and sequence variations derived from population studies and clinical outbreaks.",RefSeq,0.96413511,NA,0,RefSeq,0.96413511,2,NA,"22121212.0, 24259432.0, 26553804.0, 29112715.0, 33270901.0",low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/15/2014 +31075273,http://reftss.clst.riken.jp,"refTSS: A Reference Data Set for Human and Mouse Transcription Start Sites. Transcription starts at genomic positions called transcription start sites (TSSs), producing RNAs, and is mainly regulated by genomic elements and transcription factors binding around these TSSs. This indicates that TSSs may be a better unit to integrate various data sources related to transcriptional events, including regulation and production of RNAs. However, although several TSS datasets and promoter atlases are available, a comprehensive reference set that integrates all known TSSs is lacking. Thus, we constructed a reference dataset of TSSs (refTSS) for the human and mouse genomes by collecting publicly available TSS annotations and promoter resources, such as FANTOM5, DBTSS, EPDnew, and ENCODE. The data set consists of genomic coordinates of TSS peaks, their gene annotations, quality check results, and conservation between human and mouse. We also developed a web interface to browse the refTSS (http://reftss.clst.riken.jp/). Users can access the resource for collecting and integrating data and information about transcriptional regulation and transcription products.",refTSS,0.954385519,NA,0,refTSS,0.954385519,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/8/2019 +26975833,http://REGene.bioinfo-minzhao.org,"REGene: a literature-based knowledgebase of animal regeneration that bridge tissue regeneration and cancer. Regeneration is a common phenomenon across multiple animal phyla. Regeneration-related genes (REGs) are critical for fundamental cellular processes such as proliferation and differentiation. Identification of REGs and elucidating their functions may help to further develop effective treatment strategies in regenerative medicine. So far, REGs have been largely identified by small-scale experimental studies and a comprehensive characterization of the diverse biological processes regulated by REGs is lacking. Therefore, there is an ever-growing need to integrate REGs at the genomics, epigenetics, and transcriptome level to provide a reference list of REGs for regeneration and regenerative medicine research. Towards achieving this, we developed the first literature-based database called REGene (REgeneration Gene database). In the current release, REGene contains 948 human (929 protein-coding and 19 non-coding genes) and 8445 homologous genes curated from gene ontology and extensive literature examination. Additionally, the REGene database provides detailed annotations for each REG, including: gene expression, methylation sites, upstream transcription factors, and protein-protein interactions. An analysis of the collected REGs reveals strong links to a variety of cancers in terms of genetic mutation, protein domains, and cellular pathways. We have prepared a web interface to share these regeneration genes, supported by refined browsing and searching functions at http://REGene.bioinfo-minzhao.org/.",REGene,0.984254122,REgeneration Gene database,0.634848428,REGene,0.984254122,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/15/2016 +24912499,http://mgrc.kribb.re.kr/regnet,"REGNET: mining context-specific human transcription networks using composite genomic information. Background Genome-wide expression profiles reflect the transcriptional networks specific to the given cell context. However, most statistical models try to estimate the average connectivity of the networks from a collection of gene expression data, and are unable to characterize the context-specific transcriptional regulations. We propose an approach for mining context-specific transcription networks from a large collection of gene expression fold-change profiles and composite gene-set information. Results Using a composite gene-set analysis method, we combine the information of transcription factor binding sites, Gene Ontology or pathway gene sets and gene expression fold-change profiles for a variety of cell conditions. We then collected all the significant patterns and constructed a database of context-specific transcription networks for human (REGNET). As a result, context-specific roles of transcription factors as well as their functional targets are readily explored. To validate the approach, nine predicted targets of E2F1 in HeLa cells were tested using chromatin immunoprecipitation assay. Among them, five (Gadd45b, Dusp6, Mll5, Bmp2 and E2f3) were successfully bound by E2F1. c-JUN and the EMT transcription networks were also validated from literature. Conclusions REGNET is a useful tool for exploring the ternary relationships among the transcription factors, their functional targets and the corresponding cell conditions. It is able to provide useful clues for novel cell-specific transcriptional regulations. The REGNET database is available at http://mgrc.kribb.re.kr/regnet.",REGNET,0.995660305,NA,0,REGNET,0.995660305,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/9/2014 +26424082,http://www.regnetworkweb.org,"RegNetwork: an integrated database of transcriptional and post-transcriptional regulatory networks in human and mouse. . Transcriptional and post-transcriptional regulation of gene expression is of fundamental importance to numerous biological processes. Nowadays, an increasing amount of gene regulatory relationships have been documented in various databases and literature. However, to more efficiently exploit such knowledge for biomedical research and applications, it is necessary to construct a genome-wide regulatory network database to integrate the information on gene regulatory relationships that are widely scattered in many different places. Therefore, in this work, we build a knowledge-based database, named 'RegNetwork', of gene regulatory networks for human and mouse by collecting and integrating the documented regulatory interactions among transcription factors (TFs), microRNAs (miRNAs) and target genes from 25 selected databases. Moreover, we also inferred and incorporated potential regulatory relationships based on transcription factor binding site (TFBS) motifs into RegNetwork. As a result, RegNetwork contains a comprehensive set of experimentally observed or predicted transcriptional and post-transcriptional regulatory relationships, and the database framework is flexibly designed for potential extensions to include gene regulatory networks for other organisms in the future. Based on RegNetwork, we characterized the statistical and topological properties of genome-wide regulatory networks for human and mouse, we also extracted and interpreted simple yet important network motifs that involve the interplays between TF-miRNA and their targets. In summary, RegNetwork provides an integrated resource on the prior information for gene regulatory relationships, and it enables us to further investigate context-specific transcriptional and post-transcriptional regulatory interactions based on domain-specific experimental data. Database URL: http://www.regnetworkweb.org.",RegNetwork,0.983025074,NA,0,RegNetwork,0.983025074,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/30/2015 +24771658,http://csb.cse.yzu.edu.tw/RegPhos2,"RegPhos 2.0: an updated resource to explore protein kinase-substrate phosphorylation networks in mammals. Protein phosphorylation catalyzed by kinases plays crucial roles in regulating a variety of intracellular processes. Owing to an increasing number of in vivo phosphorylation sites that have been identified by mass spectrometry (MS)-based proteomics, the RegPhos, available online at http://csb.cse.yzu.edu.tw/RegPhos2/, was developed to explore protein phosphorylation networks in human. In this update, we not only enhance the data content in human but also investigate kinase-substrate phosphorylation networks in mouse and rat. The experimentally validated phosphorylation sites as well as their catalytic kinases were extracted from public resources, and MS/MS phosphopeptides were manually curated from research articles. RegPhos 2.0 aims to provide a more comprehensive view of intracellular signaling networks by integrating the information of metabolic pathways and protein-protein interactions. A case study shows that analyzing the phosphoproteome profile of time-dependent cell activation obtained from Liquid chromatography-mass spectrometry (LC-MS/MS) analysis, the RegPhos deciphered not only the consistent scheme in B cell receptor (BCR) signaling pathway but also novel regulatory molecules that may involve in it. With an attempt to help users efficiently identify the candidate biomarkers in cancers, 30 microarray experiments, including 39 cancerous versus normal cells, were analyzed for detecting cancer-specific expressed genes coding for kinases and their substrates. Furthermore, this update features an improved web interface to facilitate convenient access to the exploration of phosphorylation networks for a group of genes/proteins. Database URL: http://csb.cse.yzu.edu.tw/RegPhos2/",RegPhos,0.992758036,NA,0,RegPhos,0.992758036,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/25/2014 +"24060102, 24175918",http://regprecise.lbl.gov,"Comparative genomics of metabolic capacities of regulons controlled by cis-regulatory RNA motifs in bacteria. Background In silico comparative genomics approaches have been efficiently used for functional prediction and reconstruction of metabolic and regulatory networks. Riboswitches are metabolite-sensing structures often found in bacterial mRNA leaders controlling gene expression on transcriptional or translational levels.An increasing number of riboswitches and other cis-regulatory RNAs have been recently classified into numerous RNA families in the Rfam database. High conservation of these RNA motifs provides a unique advantage for their genomic identification and comparative analysis. Results A comparative genomics approach implemented in the RegPredict tool was used for reconstruction and functional annotation of regulons controlled by RNAs from 43 Rfam families in diverse taxonomic groups of Bacteria. The inferred regulons include ~5200 cis-regulatory RNAs and more than 12000 target genes in 255 microbial genomes. All predicted RNA-regulated genes were classified into specific and overall functional categories. Analysis of taxonomic distribution of these categories allowed us to establish major functional preferences for each analyzed cis-regulatory RNA motif family. Overall, most RNA motif regulons showed predictable functional content in accordance with their experimentally established effector ligands. Our results suggest that some RNA motifs (including thiamin pyrophosphate and cobalamin riboswitches that control the cofactor metabolism) are widespread and likely originated from the last common ancestor of all bacteria. However, many more analyzed RNA motifs are restricted to a narrow taxonomic group of bacteria and likely represent more recent evolutionary innovations. Conclusions The reconstructed regulatory networks for major known RNA motifs substantially expand the existing knowledge of transcriptional regulation in bacteria. The inferred regulons can be used for genetic experiments, functional annotations of genes, metabolic reconstruction and evolutionary analysis. The obtained genome-wide collection of reference RNA motif regulons is available in the RegPrecise database (http://regprecise.lbl.gov/).",RegPrecise,0.986235499,NA,0,RegPrecise,0.986235499,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2013 +23547897,http://regtransbase.lbl.gov,"RegTransBase--a database of regulatory sequences and interactions based on literature: a resource for investigating transcriptional regulation in prokaryotes. Background Due to the constantly growing number of sequenced microbial genomes, comparative genomics has been playing a major role in the investigation of regulatory interactions in bacteria. Regulon inference mostly remains a field of semi-manual examination since absence of a knowledgebase and informatics platform for automated and systematic investigation restricts opportunities for computational prediction. Additionally, confirming computationally inferred regulons by experimental data is critically important. Description RegTransBase is an open-access platform with a user-friendly web interface publicly available at http://regtransbase.lbl.gov. It consists of two databases - a manually collected hierarchical regulatory interactions database based on more than 7000 scientific papers which can serve as a knowledgebase for verification of predictions, and a large set of curated by experts transcription factor binding sites used in regulon inference by a variety of tools. RegTransBase captures the knowledge from published scientific literature using controlled vocabularies and contains various types of experimental data, such as: the activation or repression of transcription by an identified direct regulator; determination of the transcriptional regulatory function of a protein (or RNA) directly binding to DNA or RNA; mapping of binding sites for a regulatory protein; characterization of regulatory mutations. Analysis of the data collected from literature resulted in the creation of Putative Regulons from Experimental Data that are also available in RegTransBase. Conclusions RegTransBase is a powerful user-friendly platform for the investigation of regulation in prokaryotes. It uses a collection of validated regulatory sequences that can be easily extracted and used to infer regulatory interactions by comparative genomics techniques thus assisting researchers in the interpretation of transcriptional regulation data.",RegTransBase,0.996879399,NA,0,RegTransBase,0.996879399,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/2/2013 +24599084,http://cord-db.org,"The CO-Regulation Database (CORD): a tool to identify coordinately expressed genes. Background Meta-analysis of gene expression array databases has the potential to reveal information about gene function. The identification of gene-gene interactions may be inferred from gene expression information but such meta-analysis is often limited to a single microarray platform. To address this limitation, we developed a gene-centered approach to analyze differential expression across thousands of gene expression experiments and created the CO-Regulation Database (CORD) to determine which genes are correlated with a queried gene. Results Using the GEO and ArrayExpress database, we analyzed over 120,000 group by group experiments from gene microarrays to determine the correlating genes for over 30,000 different genes or hypothesized genes. CORD output data is presented for sample queries with focus on genes with well-known interaction networks including p16 (CDKN2A), vimentin (VIM), MyoD (MYOD1). CDKN2A, VIM, and MYOD1 all displayed gene correlations consistent with known interacting genes. Conclusions We developed a facile, web-enabled program to determine gene-gene correlations across different gene expression microarray platforms. Using well-characterized genes, we illustrate how CORD's identification of co-expressed genes contributes to a better understanding a gene's potential function. The website is found at http://cord-db.org.",ORD,0.55172354,Regulation Database,0.558999516,Regulation Database,0.558999516,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/5/2014 +25880930,http://www.bioinformatics.org/regulator,"REGULATOR: a database of metazoan transcription factors and maternal factors for developmental studies. Background Genes encoding transcription factors that constitute gene-regulatory networks and maternal factors accumulating in egg cytoplasm are two classes of essential genes that play crucial roles in developmental processes. Transcription factors control the expression of their downstream target genes by interacting with cis-regulatory elements. Maternal factors initiate embryonic developmental programs by regulating the expression of zygotic genes and various other events during early embryogenesis. Results This article documents the transcription factors of 77 metazoan species as well as human and mouse maternal factors. We improved the previous method using a statistical approach adding Gene Ontology information to Pfam based identification of transcription factors. This method detects previously un-discovered transcription factors. The novel features of this database are: (1) It includes both transcription factors and maternal factors, although the number of species, in which maternal factors are listed, is limited at the moment. (2) Ontological representation at the cell, tissue, organ, and system levels has been specially designed to facilitate development studies. This is the unique feature in our database and is not available in other transcription factor databases. Conclusions A user-friendly web interface, REGULATOR ( http://www.bioinformatics.org/regulator/ ), which can help researchers to efficiently identify, validate, and visualize the data analyzed in this study, are provided. Using this web interface, users can browse, search, and download detailed information on species of interest, genes, transcription factor families, or developmental ontology terms.",REGULATOR,0.697461545,NA,0,REGULATOR,0.697461545,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/10/2015 +26876983,http://www.lerner.ccf.org/cancerbio/heemers/RAAR/search,"Regulators of Androgen Action Resource: a one-stop shop for the comprehensive study of androgen receptor action. . Androgen receptor (AR) is a ligand-activated transcription factor that is the main target for treatment of non-organ-confined prostate cancer (CaP). Failure of life-prolonging AR-targeting androgen deprivation therapy is due to flexibility in steroidogenic pathways that control intracrine androgen levels and variability in the AR transcriptional output. Androgen biosynthesis enzymes, androgen transporters and AR-associated coregulators are attractive novel CaP treatment targets. These proteins, however, are characterized by multiple transcript variants and isoforms, are subject to genomic alterations, and are differentially expressed among CaPs. Determining their therapeutic potential requires evaluation of extensive, diverse datasets that are dispersed over multiple databases, websites and literature reports. Mining and integrating these datasets are cumbersome, time-consuming tasks and provide only snapshots of relevant information. To overcome this impediment to effective, efficient study of AR and potential drug targets, we developed the Regulators of Androgen Action Resource (RAAR), a non-redundant, curated and user-friendly searchable web interface. RAAR centralizes information on gene function, clinical relevance, and resources for 55 genes that encode proteins involved in biosynthesis, metabolism and transport of androgens and for 274 AR-associated coregulator genes. Data in RAAR are organized in two levels: (i) Information pertaining to production of androgens is contained in a 'pre-receptor level' database, and coregulator gene information is provided in a 'post-receptor level' database, and (ii) an 'other resources' database contains links to additional databases that are complementary to and useful to pursue further the information provided in RAAR. For each of its 329 entries, RAAR provides access to more than 20 well-curated publicly available databases, and thus, access to thousands of data points. Hyperlinks provide direct access to gene-specific entries in the respective database(s). RAAR is a novel, freely available resource that provides fast, reliable and easy access to integrated information that is needed to develop alternative CaP therapies. Database URL: http://www.lerner.ccf.org/cancerbio/heemers/RAAR/search/.",RAAR,0.942830563,Regulators of Androgen Action Resource,0.965198304,Regulators of Androgen Action Resource,0.965198304,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/13/2016 +26527724,http://regulondb.ccg.unam.mx,"RegulonDB version 9.0: high-level integration of gene regulation, coexpression, motif clustering and beyond. RegulonDB (http://regulondb.ccg.unam.mx) is one of the most useful and important resources on bacterial gene regulation,as it integrates the scattered scientific knowledge of the best-characterized organism, Escherichia coli K-12, in a database that organizes large amounts of data. Its electronic format enables researchers to compare their results with the legacy of previous knowledge and supports bioinformatics tools and model building. Here, we summarize our progress with RegulonDB since our last Nucleic Acids Research publication describing RegulonDB, in 2013. In addition to maintaining curation up-to-date, we report a collection of 232 interactions with small RNAs affecting 192 genes, and the complete repertoire of 189 Elementary Genetic Sensory-Response units (GENSOR units), integrating the signal, regulatory interactions, and metabolic pathways they govern. These additions represent major progress to a higher level of understanding of regulated processes. We have updated the computationally predicted transcription factors, which total 304 (184 with experimental evidence and 120 from computational predictions); we updated our position-weight matrices and have included tools for clustering them in evolutionary families. We describe our semiautomatic strategy to accelerate curation, including datasets from high-throughput experiments, a novel coexpression distance to search for 'neighborhood' genes to known operons and regulons, and computational developments.",RegulonDB,0.998138189,NA,0,RegulonDB,0.998138189,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/2/2015 +31665499,http://remap.univ-amu.fr,"ReMap 2020: a database of regulatory regions from an integrative analysis of Human and Arabidopsis DNA-binding sequencing experiments. ReMap (http://remap.univ-amu.fr) aims to provide the largest catalogs of high-quality regulatory regions resulting from a large-scale integrative analysis of hundreds of transcription factors and regulators from DNA-binding experiments in Human and Arabidopsis (Arabidopsis thaliana). In this 2020 update of ReMap we have collected, analyzed and retained after quality control 2764 new human ChIP-seq and 208 ChIP-exo datasets available from public sources. The updated human atlas totalize 5798 datasets covering a total of 1135 transcriptional regulators (TRs) with a catalog of 165 million (M) peaks. This ReMap update comes with two unique Arabidopsis regulatory catalogs. First, a catalog of 372 Arabidopsis TRs across 2.6M peaks as a result of the integration of 509 ChIP-seq and DAP-seq datasets. Second, a catalog of 33 histone modifications and variants across 4.5M peaks from the integration of 286 ChIP-seq datasets. All catalogs are made available through track hubs at Ensembl and UCSC Genome Browsers. Additionally, this update comes with a new web framework providing an interactive user-interface, including improved search features. Finally, full programmatically access to the underlying data is available using a RESTful API together with a new R Shiny interface for a TRs binding enrichment analysis tool.",ReMap,0.994606137,NA,0,ReMap,0.994606137,1,NA,29126285,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2020 +29126285,http://remap.cisreg.eu,"ReMap 2018: an updated atlas of regulatory regions from an integrative analysis of DNA-binding ChIP-seq experiments. With this latest release of ReMap (http://remap.cisreg.eu), we present a unique collection of regulatory regions in human, as a result of a large-scale integrative analysis of ChIP-seq experiments for hundreds of transcriptional regulators (TRs) such as transcription factors, transcriptional co-activators and chromatin regulators. In 2015, we introduced the ReMap database to capture the genome regulatory space by integrating public ChIP-seq datasets, covering 237 TRs across 13 million (M) peaks. In this release, we have extended this catalog to constitute a unique collection of regulatory regions. Specifically, we have collected, analyzed and retained after quality control a total of 2829 ChIP-seq datasets available from public sources, covering a total of 485 TRs with a catalog of 80M peaks. Additionally, the updated database includes new search features for TR names as well as aliases, including cell line names and the ability to navigate the data directly within genome browsers via public track hubs. Finally, full access to this catalog is available online together with a TR binding enrichment analysis tool. ReMap 2018 provides a significant update of the ReMap database, providing an in depth view of the complexity of the regulatory landscape in human.",ReMap,0.989697456,NA,0,ReMap,0.989697456,1,NA,31665499,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +34042771,http://remedy.mssm.edu,"Introducing a Platform for Integrating and Sharing Stem Cell Research Data. Advancements in regenerative medicine have highlighted the need for increased standardization and sharing of stem cell products to help drive these innovative interventions toward public availability and to increase collaboration in the scientific community. Although numerous attempts and numerous databases have been made to store this data, there is still a lack of a platform that incorporates heterogeneous stem cell information into a harmonized project-based framework. The aim of the platform described in this study, ReMeDy, is to provide an intelligent informatics solution which integrates diverse stem cell product characteristics with study subject and omics information. In the resulting platform, heterogeneous data is validated using predefined ontologies and stored in a relational database. In this initial feasibility study, testing of the ReMeDy functionality was performed using published, publically-available induced pluripotent stem cell projects conducted in in vitro, preclinical and intervention evaluations. It demonstrated the robustness of ReMeDy for storing diverse iPSC data, by seamlessly harmonizing diverse common data elements, and the potential utility of this platform for driving knowledge generation from the aggregation of this shared data. Next steps include increasing the number of curated projects by developing a crowdsourcing framework for data upload and an automated pipeline for metadata abstraction. The database is publically accessible at https://remedy.mssm.edu/.",ReMeDy,0.99511075,NA,0,ReMeDy,0.99511075,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +28011754,http://renaldb.uni-frankfurt.de,"Logic programming to infer complex RNA expression patterns from RNA-seq data. To meet the increasing demand in the field, numerous long noncoding RNA (lncRNA) databases are available. Given many lncRNAs are specifically expressed in certain cell types and/or time-dependent manners, most lncRNA databases fall short of providing such profiles. We developed a strategy using logic programming to handle the complex organization of organs, their tissues and cell types as well as gender and developmental time points. To showcase this strategy, we introduce 'RenalDB' (http://renaldb.uni-frankfurt.de), a database providing expression profiles of RNAs in major organs focusing on kidney tissues and cells. RenalDB uses logic programming to describe complex anatomy, sample metadata and logical relationships defining expression, enrichment or specificity. We validated the content of RenalDB with biological experiments and functionally characterized two long intergenic noncoding RNAs: LOC440173 is important for cell growth or cell survival, whereas PAXIP1-AS1 is a regulator of cell death. We anticipate RenalDB will be used as a first step toward functional studies of lncRNAs in the kidney.",RenalDB,0.996478856,NA,0,RenalDB,0.996478856,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2018 +23389821,http://www.renaltube.com,"RenalTube: a network tool for clinical and genetic diagnosis of primary tubulopathies. Unlabelled The main purpose was to build a database while facilitating access to genotyping in order to improve the clinical and molecular knowledge of primary tubulopathies. Three tertiary referral centers of Spain collect clinical data through the site http://www.renaltube.com , while offering the analysis of 22 genes corresponding to 23 primary tubulopathies. There are three ways of collaboration: option 1 consists of adding patients to the database with clinical and biochemical information and requesting for genetic study, option 2 requires the payment of a fee for genetic analysis exclusively, and option 3 allows the enrollment of patients with a previously confirmed mutation. After 2 years of activity, RenalTube has collected data from 222 patients, the majority from Spain and Latin America (85.3 %). The most common tubulopathies are distal renal tubular acidosis (22.5 %) and classical Bartter syndrome (19.3 %) followed by familial hypomagnesemia with hypercalciuria and nephrocalcinosis (15.7 %) and Gitelman syndrome (15 %). Option 1 is the collaborating method preferred by doctors (62.3 %) followed by option 3 (36.3 %). Conclusion RenalTube is a network-based registry that can be easily reached and filled out worldwide. A web-based approach with a multilateral collaboration scheme enhances the recruitment of data and promotes the understanding of underlying mechanisms of rare inherited diseases, defines more accurate diagnostic and follow-up criteria, develops new molecular techniques and will improve the overall care of the patients.",RenalTube,0.996694028,NA,0,RenalTube,0.996694028,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/7/2013 +33237313,http://repeatsdb.org,"RepeatsDB in 2021: improved data and extended classification for protein tandem repeat structures. The RepeatsDB database (URL: https://repeatsdb.org/) provides annotations and classification for protein tandem repeat structures from the Protein Data Bank (PDB). Protein tandem repeats are ubiquitous in all branches of the tree of life. The accumulation of solved repeat structures provides new possibilities for classification and detection, but also increasing the need for annotation. Here we present RepeatsDB 3.0, which addresses these challenges and presents an extended classification scheme. The major conceptual change compared to the previous version is the hierarchical classification combining top levels based solely on structural similarity (Class > Topology > Fold) with two new levels (Clan > Family) requiring sequence similarity and describing repeat motifs in collaboration with Pfam. Data growth has been addressed with improved mechanisms for browsing the classification hierarchy. A new UniProt-centric view unifies the increasingly frequent annotation of structures from identical or similar sequences. This update of RepeatsDB aligns with our commitment to develop a resource that extracts, organizes and distributes specialized information on tandem repeat protein structures.",RepeatsDB,0.997238636,NA,0,RepeatsDB,0.997238636,1,NA,"24311564.0, 27899671.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +"24311564, 27899671",http://repeatsdb.bio.unipd.it,"RepeatsDB: a database of tandem repeat protein structures. RepeatsDB (http://repeatsdb.bio.unipd.it/) is a database of annotated tandem repeat protein structures. Tandem repeats pose a difficult problem for the analysis of protein structures, as the underlying sequence can be highly degenerate. Several repeat types haven been studied over the years, but their annotation was done in a case-by-case basis, thus making large-scale analysis difficult. We developed RepeatsDB to fill this gap. Using state-of-the-art repeat detection methods and manual curation, we systematically annotated the Protein Data Bank, predicting 10,745 repeat structures. In all, 2797 structures were classified according to a recently proposed classification schema, which was expanded to accommodate new findings. In addition, detailed annotations were performed in a subset of 321 proteins. These annotations feature information on start and end positions for the repeat regions and units. RepeatsDB is an ongoing effort to systematically classify and annotate structural protein repeats in a consistent way. It provides users with the possibility to access and download high-quality datasets either interactively or programmatically through web services.",RepeatsDB,0.994790673,NA,0,RepeatsDB,0.994790673,2,NA,33237313,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,11/29/2016 +32345346,http://repicmod.uchicago.edu/repic,"REPIC: a database for exploring the N6-methyladenosine methylome. The REPIC (RNA EPItranscriptome Collection) database records about 10 million peaks called from publicly available m6A-seq and MeRIP-seq data using our unified pipeline. These data were collected from 672 samples of 49 studies, covering 61 cell lines or tissues in 11 organisms. REPIC allows users to query N6-methyladenosine (m6A) modification sites by specific cell lines or tissue types. In addition, it integrates m6A/MeRIP-seq data with 1418 histone ChIP-seq and 118 DNase-seq data tracks from the ENCODE project in a modern genome browser to present a comprehensive atlas of m6A methylation sites, histone modification sites, and chromatin accessibility regions. REPIC is accessible at https://repicmod.uchicago.edu/repic.",REPIC,0.998654127,RNA EPItranscriptome Collection,0.978067458,REPIC,0.998654127,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/28/2020 +26322066,http://reprolive.eez.csic.es,"ReprOlive: a database with linked data for the olive tree (Olea europaea L.) reproductive transcriptome. Plant reproductive transcriptomes have been analyzed in different species due to the agronomical and biotechnological importance of plant reproduction. Here we presented an olive tree reproductive transcriptome database with samples from pollen and pistil at different developmental stages, and leaf and root as control vegetative tissues http://reprolive.eez.csic.es). It was developed from 2,077,309 raw reads to 1,549 Sanger sequences. Using a pre-defined workflow based on open-source tools, sequences were pre-processed, assembled, mapped, and annotated with expression data, descriptions, GO terms, InterPro signatures, EC numbers, KEGG pathways, ORFs, and SSRs. Tentative transcripts (TTs) were also annotated with the corresponding orthologs in Arabidopsis thaliana from TAIR and RefSeq databases to enable Linked Data integration. It results in a reproductive transcriptome comprising 72,846 contigs with average length of 686 bp, of which 63,965 (87.8%) included at least one functional annotation, and 55,356 (75.9%) had an ortholog. A minimum of 23,568 different TTs was identified and 5,835 of them contain a complete ORF. The representative reproductive transcriptome can be reduced to 28,972 TTs for further gene expression studies. Partial transcriptomes from pollen, pistil, and vegetative tissues as control were also constructed. ReprOlive provides free access and download capability to these results. Retrieval mechanisms for sequences and transcript annotations are provided. Graphical localization of annotated enzymes into KEGG pathways is also possible. Finally, ReprOlive has included a semantic conceptualisation by means of a Resource Description Framework (RDF) allowing a Linked Data search for extracting the most updated information related to enzymes, interactions, allergens, structures, and reactive oxygen species.",ReprOlive,0.997865617,NA,0,ReprOlive,0.997865617,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/11/2015 +25183748,http://www.iom.edu/Activities/Veterans/TwinsStudy.aspx,"Cohort Profile: The National Academy of Sciences-National Research Council Twin Registry (NAS-NRC Twin Registry). The National Academy of Sciences-National Research Council Twin Registry (NAS-NRC Twin Registry) is a comprehensive registry of White male twin pairs born in the USA between 1917 and 1927, both of the twins having served in the military. The purpose was medical research and ultimately improved clinical care. The cohort was assembled in the early 1960s with identification of approximately 16,000 twin pairs, review of service records, a brief mailed questionnaire assessing zygosity, and a health survey largely comparable to questionnaires used at that time with Scandinavian twin registries. Subsequent large-scale data collection occurred in 1974, 1985 and 1998, repeating the health survey and including information on education, employment history and earnings. Self-reported data have been supplemented with mortality, disability and medical data through record linkage. Potential collaborators should access the study website [http://www.iom.edu/Activities/Veterans/TwinsStudy.aspx] or e-mail the Medical Follow-up Agency at [Twins@nas.edu]. Questionnaire data are being prepared for future archiving with the National Archive of Computerized Data on Aging (NACDA) at the Inter-University Consortium for Political and Social Research (ICPSR), University of Michigan, MI.",NA,0,Research Council Twin Registry,0.605523437,Research Council Twin Registry,0.605523437,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,9/1/2014 +28862214,http://ridb.subdic-bioinformatics-nitrr.in,"Respiratory cancer database: An open access database of respiratory cancer gene and miRNA. Aims Respiratory cancer database (RespCanDB) is a genomic and proteomic database of cancer of respiratory organ. It also includes the information of medicinal plants used for the treatment of various respiratory cancers with structure of its active constituents as well as pharmacological and chemical information of drug associated with various respiratory cancers. Materials and methods Data in RespCanDB has been manually collected from published research article and from other databases. Data has been integrated using MySQL an object-relational database management system. MySQL manages all data in the back-end and provides commands to retrieve and store the data into the database. The web interface of database has been built in ASP. Results and conclusions RespCanDB is expected to contribute to the understanding of scientific community regarding respiratory cancer biology as well as developments of new way of diagnosing and treating respiratory cancer. Currently, the database consist the oncogenomic information of lung cancer, laryngeal cancer, and nasopharyngeal cancer. Data for other cancers, such as oral and tracheal cancers, will be added in the near future. The URL of RespCanDB is http://ridb.subdic-bioinformatics-nitrr.in/.",RespCanDB,0.997904003,Respiratory cancer database,0.9777426,RespCanDB,0.997904003,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2017 +30794542,http://www.dsimb.inserm.fr/respire,"Repository of Enriched Structures of Proteins Involved in the Red Blood Cell Environment (RESPIRE). The Red Blood Cell (RBC) is a metabolically-driven cell vital for processes such a gas transport and homeostasis. RBC possesses at its surface exposing antigens proteins that are critical in blood transfusion. Due to their importance, numerous studies address the cell function as a whole but more and more details of RBC structure and protein content are now studied using massive state-of-the art characterisation techniques. Yet, the resulting information is frequently scattered in many scientific articles, in many databases and specialized web servers. To provide a more compendious view of erythrocytes and of their protein content, we developed a dedicated database called RESPIRE that aims at gathering a comprehensive and coherent ensemble of information and data about proteins in RBC. This cell-driven database lists proteins found in erythrocytes. For a given protein entry, initial data are processed from external portals and enriched by using state-of-the-art bioinformatics methods. As structural information is extremely useful to understand protein function and predict the impact of mutations, a strong effort has been put on the prediction of protein structures with a special treatment for membrane proteins. Browsing the database is available through text search for reference gene names or protein identifiers, through pre-defined queries or via hyperlinks. The RESPIRE database provides valuable information and unique annotations that should be useful to a wide audience of biologists, clinicians and structural biologists. Database URL: http://www.dsimb.inserm.fr/respire.",RESPIRE,0.993819237,of Proteins Involved in the Red Blood Cell Environment,0.857962569,RESPIRE,0.993819237,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/22/2019 +24939193,http://www.retinogenetics.org,"'RetinoGenetics': a comprehensive mutation database for genes related to inherited retinal degeneration. . Inherited retinal degeneration (IRD), a leading cause of human blindness worldwide, is exceptionally heterogeneous with clinical heterogeneity and genetic variety. During the past decades, tremendous efforts have been made to explore the complex heterogeneity, and massive mutations have been identified in different genes underlying IRD with the significant advancement of sequencing technology. In this study, we developed a comprehensive database, 'RetinoGenetics', which contains informative knowledge about all known IRD-related genes and mutations for IRD. 'RetinoGenetics' currently contains 4270 mutations in 186 genes, with detailed information associated with 164 phenotypes from 934 publications and various types of functional annotations. Then extensive annotations were performed to each gene using various resources, including Gene Ontology, KEGG pathways, protein-protein interaction, mutational annotations and gene-disease network. Furthermore, by using the search functions, convenient browsing ways and intuitive graphical displays, 'RetinoGenetics' could serve as a valuable resource for unveiling the genetic basis of IRD. Taken together, 'RetinoGenetics' is an integrative, informative and updatable resource for IRD-related genetic predispositions. Database URL: http://www.retinogenetics.org/.",RetinoGenetics,0.990538726,NA,0,RetinoGenetics,0.990538726,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/17/2014 +24739306,http://retrogenedb.amu.edu.pl,"RetrogeneDB--a database of animal retrogenes. Retrocopies of protein-coding genes, reverse transcribed and inserted into the genome copies of mature RNA, have commonly been categorized as pseudogenes with no biological importance. However, recent studies showed that they play important role in the genomes evolution and shaping interspecies differences. Here, we present RetrogeneDB, a database of retrocopies in 62 animal genomes. RetrogeneDB contains information about retrocopies, their genomic localization, parental genes, ORF conservation, and expression. To our best knowledge, this is the most complete retrocopies database providing information for dozens of species previously never analyzed in the context of protein-coding genes retroposition. The database is available at http://retrogenedb.amu.edu.pl.",RetrogeneDB,0.99268949,NA,0,RetrogeneDB,0.99268949,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/16/2014 +30321422,http://retrorules.org,"RetroRules: a database of reaction rules for engineering biology. RetroRules is a database of reaction rules for metabolic engineering (https://retrorules.org). Reaction rules are generic descriptions of chemical reactions that can be used in retrosynthesis workflows in order to enumerate all possible biosynthetic routes connecting a target molecule to its precursors. The use of such rules is becoming increasingly important in the context of synthetic biology applied to de novo pathway discovery and in systems biology to discover underground metabolism due to enzyme promiscuity. Here, we provide for the first time a complete set containing >400 000 stereochemistry-aware reaction rules extracted from public databases and expressed in the community-standard SMARTS (SMIRKS) format, augmented by a rule representation at different levels of specificity (the atomic environment around the reaction center). Such numerous representations of reactions expand natural chemical diversity by predicting de novo reactions of promiscuous enzymes.",RetroRules,0.970715165,NA,0,RetroRules,0.970715165,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +22415763,http://www.rettdatabasenetwork.org,"Rett networked database: an integrated clinical and genetic network of Rett syndrome databases. Rett syndrome (RTT) is a neurodevelopmental disorder with one principal phenotype and several distinct, atypical variants (Zappella, early seizure onset and congenital variants). Mutations in MECP2 are found in most cases of classic RTT but at least two additional genes, CDKL5 and FOXG1, can underlie some (usually variant) cases. There is only limited correlation between genotype and phenotype. The Rett Networked Database (http://www.rettdatabasenetwork.org/) has been established to share clinical and genetic information. Through an ""adaptor"" process of data harmonization, a set of 293 clinical items and 16 genetic items was generated; 62 clinical and 7 genetic items constitute the core dataset; 23 clinical items contain longitudinal information. The database contains information on 1838 patients from 11 countries (December 2011), with or without mutations in known genes. These numbers can expand indefinitely. Data are entered by a clinician in each center who supervises accuracy. This network was constructed to make available pooled international data for the study of RTT natural history and genotype-phenotype correlation and to indicate the proportion of patients with specific clinical features and mutations. We expect that the network will serve for the recruitment of patients into clinical trials and for developing quality measures to drive up standards of medical management.",NA,0,Rett Networked Database,0.598606253,Rett Networked Database,0.598606253,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/13/2012 +34224878,http://reva.gao-lab.org,"REVA as A Well-curated Database for Human Expression-modulating Variants. More than 90% of disease- and trait-associated human variants are noncoding. By systematically screening multiple large-scale studies, we compiled REVA, a manually curated database for over 11.8 million experimentally tested noncoding variants with expression-modulating potentials. We provided 2424 functional annotations that could be used to pinpoint the plausible regulatory mechanism of these variants. We further benchmarked multiple state-of-the-art computational tools and found that their limited sensitivity remains a serious challenge for effective large-scale analysis. REVA provides high-quality experimentally tested expression-modulating variants with extensive functional annotations, which will be useful for users in the noncoding variant community. REVA is freely available at http://reva.gao-lab.org.",REVA,0.989323854,NA,0,REVA,0.989323854,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/3/2021 +30622655,"http://repeatexplorer-elixir.cerit-sc.cz/)Ã, http://repeatexplorer.org","Systematic survey of plant LTR-retrotransposons elucidates phylogenetic relationships of their polyprotein domains and provides a reference for element classification. Background Plant LTR-retrotransposons are classified into two superfamilies, Ty1/copia and Ty3/gypsy. They are further divided into an enormous number of families which are, due to the high diversity of their nucleotide sequences, usually specific to a single or a group of closely related species. Previous attempts to group these families into broader categories reflecting their phylogenetic relationships were limited either to analyzing a narrow range of plant species or to analyzing a small numbers of elements. Furthermore, there is no reference database that allows for similarity based classification of LTR-retrotransposons. Results We have assembled a database of retrotransposon encoded polyprotein domains sequences extracted from 5410 Ty1/copia elements and 8453 Ty3/gypsy elements sampled from 80 species representing major groups of green plants (Viridiplantae). Phylogenetic analysis of the three most conserved polyprotein domains (RT, RH and INT) led to dividing Ty1/copia and Ty3/gypsy retrotransposons into 16 and 14 lineages respectively. We also characterized various features of LTR-retrotransposon sequences including additional polyprotein domains, extra open reading frames and primer binding sites, and found that the occurrence and/or type of these features correlates with phylogenies inferred from the three protein domains. Conclusions We have established an improved classification system applicable to LTR-retrotransposons from a wide range of plant species. This system reflects phylogenetic relationships as well as distinct sequence and structural features of the elements. A comprehensive database of retrotransposon protein domains (REXdb) that reflects this classification provides a reference for efficient and unified annotation of LTR-retrotransposons in plant genomes. Access to REXdb related tools is implemented in the RepeatExplorer web server (https://repeatexplorer-elixir.cerit-sc.cz/) or using a standalone version of REXdb that can be downloaded seaparately from RepeatExplorer web page (http://repeatexplorer.org/).",REXdb,0.975555122,retrotransposon,0.663883686,REXdb,0.975555122,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/3/2019 +23125362,"http://rfam.sanger.ac.uk, http://rfam.janelia.org","Rfam 11.0: 10 years of RNA families. The Rfam database (available via the website at http://rfam.sanger.ac.uk and through our mirror at http://rfam.janelia.org) is a collection of non-coding RNA families, primarily RNAs with a conserved RNA secondary structure, including both RNA genes and mRNA cis-regulatory elements. Each family is represented by a multiple sequence alignment, predicted secondary structure and covariance model. Here we discuss updates to the database in the latest release, Rfam 11.0, including the introduction of genome-based alignments for large families, the introduction of the Rfam Biomart as well as other user interface improvements. Rfam is available under the Creative Commons Zero license.",Rfam,0.996679723,NA,0,Rfam,0.996679723,1,NA,"29112718.0, 29927072.0, 33211869.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/3/2012 +"29112718, 29927072, 33211869",http://rfam.org,"Rfam 13.0: shifting to a genome-centric resource for non-coding RNA families. The Rfam database is a collection of RNA families in which each family is represented by a multiple sequence alignment, a consensus secondary structure, and a covariance model. In this paper we introduce Rfam release 13.0, which switches to a new genome-centric approach that annotates a non-redundant set of reference genomes with RNA families. We describe new web interface features including faceted text search and R-scape secondary structure visualizations. We discuss a new literature curation workflow and a pipeline for building families based on RNAcentral. There are 236 new families in release 13.0, bringing the total number of families to 2687. The Rfam website is http://rfam.org.",Rfam,0.992107749,NA,0,Rfam,0.992107749,3,NA,23125362,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +"21296746, 21321022, 21478484, 23255149, 23603846, 23794737, 23881287, 25355511, 27602200, 27736745, 29761460, 31713623, 34741192",http://rgd.mcw.edu,"RGD: a comparative genomics platform. The Rat Genome Database (RGD) (http://rgd.mcw.edu) provides a comprehensive platform for comparative genomics and genetics research. RGD houses gene, QTL and polymorphic marker data for rat, mouse and human and provides easy access to data through sophisticated searches, disease portals, interactive pathway diagrams and rat and human genome browsers.",RGD,0.993250608,Rat Genome Database,0.930061355,RGD,0.993250608,13,25632109,27009807,NA,NA,conflicting record(s) to be removed,"merge all ""dup name"" IDs",NA,NA,NA,11/5/2021 +27009807,"http://rgd.mcw.edu/, http://rgd.mcw.edu","The Disease Portals, disease-gene annotation and the RGD disease ontology at the Rat Genome Database. . The Rat Genome Database (RGD;http://rgd.mcw.edu/) provides critical datasets and software tools to a diverse community of rat and non-rat researchers worldwide. To meet the needs of the many users whose research is disease oriented, RGD has created a series of Disease Portals and has prioritized its curation efforts on the datasets important to understanding the mechanisms of various diseases. Gene-disease relationships for three species, rat, human and mouse, are annotated to capture biomarkers, genetic associations, molecular mechanisms and therapeutic targets. To generate gene-disease annotations more effectively and in greater detail, RGD initially adopted the MEDIC disease vocabulary from the Comparative Toxicogenomics Database and adapted it for use by expanding this framework with the addition of over 1000 terms to create the RGD Disease Ontology (RDO). The RDO provides the foundation for, at present, 10 comprehensive disease area-related dataset and analysis platforms at RGD, the Disease Portals. Two major disease areas are the focus of data acquisition and curation efforts each year, leading to the release of the related Disease Portals. Collaborative efforts to realize a more robust disease ontology are underway. Database URL:http://rgd.mcw.edu.",RGD,0.984376073,Rat Genome Database,0.897326604,RGD,0.984376073,1,NA,"21296746.0, 21321022.0, 21478484.0, 23255149.0, 23603846.0, 23794737.0, 23881287.0, 25355511.0, 27602200.0, 27736745.0, 29761460.0, 31713623.0, 34741192.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,3/23/2016 +25252782,http://rged.wall-eva.net,"Renal Gene Expression Database (RGED): a relational database of gene expression profiles in kidney disease. . We present a bioinformatics database named Renal Gene Expression Database (RGED), which contains comprehensive gene expression data sets from renal disease research. The web-based interface of RGED allows users to query the gene expression profiles in various kidney-related samples, including renal cell lines, human kidney tissues and murine model kidneys. Researchers can explore certain gene profiles, the relationships between genes of interests and identify biomarkers or even drug targets in kidney diseases. The aim of this work is to provide a user-friendly utility for the renal disease research community to query expression profiles of genes of their own interest without the requirement of advanced computational skills. Website is implemented in PHP, R, MySQL and Nginx and freely available from http://rged.wall-eva.net. http://rged.wall-eva.net.",RGED,0.994495869,Renal Gene Expression Database,0.971099293,RGED,0.994495869,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/24/2014 +23193278,http://rgkbase.big.ac.cn/RGKbase,"The Rice Genome Knowledgebase (RGKbase): an annotation database for rice comparative genomics and evolutionary biology. Over the past 10 years, genomes of cultivated rice cultivars and their wild counterparts have been sequenced although most efforts are focused on genome assembly and annotation of two major cultivated rice (Oryza sativa L.) subspecies, 93-11 (indica) and Nipponbare (japonica). To integrate information from genome assemblies and annotations for better analysis and application, we now introduce a comparative rice genome database, the Rice Genome Knowledgebase (RGKbase, http://rgkbase.big.ac.cn/RGKbase/). RGKbase is built to have three major components: (i) integrated data curation for rice genomics and molecular biology, which includes genome sequence assemblies, transcriptomic and epigenomic data, genetic variations, quantitative trait loci (QTLs) and the relevant literature; (ii) User-friendly viewers, such as Gbrowse, GeneBrowse and Circos, for genome annotations and evolutionary dynamics and (iii) Bioinformatic tools for compositional and synteny analyses, gene family classifications, gene ontology terms and pathways and gene co-expression networks. RGKbase current includes data from five rice cultivars and species: Nipponbare (japonica), 93-11 (indica), PA64s (indica), the African rice (Oryza glaberrima) and a wild rice species (Oryza brachyantha). We are also constantly introducing new datasets from variety of public efforts, such as two recent releases-sequence data from ∼1000 rice varieties, which are mapped into the reference genome, yielding ample high-quality single-nucleotide polymorphisms and insertions-deletions.",RGKbase,0.996097326,Rice Genome Knowledgebase,0.938942921,RGKbase,0.996097326,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2012 +27789701,http://www.rhea-db.org,"Updates in Rhea - an expert curated resource of biochemical reactions. Rhea (http://www.rhea-db.org) is a comprehensive and non-redundant resource of expert-curated biochemical reactions designed for the functional annotation of enzymes and the description of metabolic networks. Rhea describes enzyme-catalyzed reactions covering the IUBMB Enzyme Nomenclature list as well as additional reactions, including spontaneously occurring reactions, using entities from the ChEBI (Chemical Entities of Biological Interest) ontology of small molecules. Here we describe developments in Rhea since our last report in the database issue of Nucleic Acids Research. These include the first implementation of a simple hierarchical classification of reactions, improved coverage of the IUBMB Enzyme Nomenclature list and additional reactions through continuing expert curation, and the development of a new website to serve this improved dataset.",Rhea,0.994225621,NA,0,Rhea,0.994225621,1,30272209,"30272209.0, 22135291.0, 25332395.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/26/2016 +30272209,"http://www.rhea-db.org, http://sparql.rhea-db.org/sparql","Updates in Rhea: SPARQLing biochemical reaction data. Rhea (http://www.rhea-db.org) is a comprehensive and non-redundant resource of over 11 000 expert-curated biochemical reactions that uses chemical entities from the ChEBI ontology to represent reaction participants. Originally designed as an annotation vocabulary for the UniProt Knowledgebase (UniProtKB), Rhea also provides reaction data for a range of other core knowledgebases and data repositories including ChEBI and MetaboLights. Here we describe recent developments in Rhea, focusing on a new resource description framework representation of Rhea reaction data and an SPARQL endpoint (https://sparql.rhea-db.org/sparql) that provides access to it. We demonstrate how federated queries that combine the Rhea SPARQL endpoint and other SPARQL endpoints such as that of UniProt can provide improved metabolite annotation and support integrative analyses that link the metabolome through the proteome to the transcriptome and genome. These developments will significantly boost the utility of Rhea as a means to link chemistry and biology for a more holistic understanding of biological systems and their function in health and disease.",Rhea,0.992570519,NA,0,Rhea,0.992570519,1,27789701,"27789701.0, 22135291.0, 25332395.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2019 +"22135291, 25332395",http://www.ebi.ac.uk/rhea,"Rhea--a manually curated resource of biochemical reactions. Rhea (http://www.ebi.ac.uk/rhea) is a comprehensive resource of expert-curated biochemical reactions. Rhea provides a non-redundant set of chemical transformations for use in a broad spectrum of applications, including metabolic network reconstruction and pathway inference. Rhea includes enzyme-catalyzed reactions (covering the IUBMB Enzyme Nomenclature list), transport reactions and spontaneously occurring reactions. Rhea reactions are described using chemical species from the Chemical Entities of Biological Interest ontology (ChEBI) and are stoichiometrically balanced for mass and charge. They are extensively manually curated with links to source literature and other public resources on metabolism including enzyme and pathway databases. This cross-referencing facilitates the mapping and reconciliation of common reactions and compounds between distinct resources, which is a common first step in the reconstruction of genome scale metabolic networks and models.",Rhea,0.982805073,Entities of,0.594281514,Rhea,0.982805073,2,NA,"27789701.0, 30272209.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/20/2014 +33994075,http://www.rheference.org,"A Review of the Literature Organized Into a New Database: RHeference. Hundreds of articles containing heterogeneous data describe D variants or add to the knowledge of known alleles. Data can be difficult to find despite existing online blood group resources and genetic and literature databases. We have developed a modern, elaborate database for D variants, thanks to an extensive literature search with meticulous curation of 387 peer-reviewed articles and 80 abstracts from major conferences and other sources. RHeference contains entries for 710 RHD alleles, 11 RHCE alleles, 30 phenotype descriptions (preventing data loss from historical sources), 35 partly characterized alleles, 3 haplotypes, and 16 miscellaneous entries. The entries include molecular, phenotypic, serological, alloimmunization, haplotype, geographical, and other data, detailed for each source. The main characteristics are summarized for each entry. The sources for all information are included and easily accessible through doi and PMID links. Overall, the database contains more than 10,000 individual pieces of data. We have set up the database architecture based on our previous expertise on database setup and biocuration for other topics, using modern technologies such as the Django framework, BioPython, Bootstrap, and Jquery. This architecture allows an easy access to data and enables simple and complex queries: combining multiple mutations, keywords, or any of the characteristics included in the database. RHeference provides a complement to existing resources and will continue to grow as our knowledge expands and new articles are published. The database url is http://www.rheference.org/.",RHeference,0.994272649,NA,0,RHeference,0.994272649,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/20/2021 +22965133,http://www.rhesusbase.org,"RhesusBase: a knowledgebase for the monkey research community. Although the rhesus macaque is a unique model for the translational study of human diseases, currently its use in biomedical research is still in its infant stage due to error-prone gene structures and limited annotations. Here, we present RhesusBase for the monkey research community (http://www.rhesusbase.org). We performed strand-specific RNA-Seq studies in 10 macaque tissues and generated 1.2 billion 90-bp paired-end reads, covering >97.4% of the putative exon in macaque transcripts annotated by Ensembl. We found that at least 28.7% of the macaque transcripts were previously mis-annotated, mainly due to incorrect exon-intron boundaries, incomplete untranslated regions (UTRs) and missed exons. Compared with the previous gene models, the revised transcripts show clearer sequence motifs near splicing junctions and the end of UTRs, as well as cleaner patterns of exon-intron distribution for expression tags and cross-species conservation scores. Strikingly, 1292 exon-intron boundary revisions between coding exons corrected the previously mis-annotated open reading frames. The revised gene models were experimentally verified in randomly selected cases. We further integrated functional genomics annotations from >60 categories of public and in-house resources and developed an online accessible database. User-friendly interfaces were developed to update, retrieve, visualize and download the RhesusBase meta-data, providing a 'one-stop' resource for the monkey research community.",RhesusBase,0.997771919,NA,0,RhesusBase,0.997771919,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/10/2012 +34022814,http://bioinfor.kib.ac.cn/RPGD,"The Rhododendron Plant Genome Database (RPGD): a comprehensive online omics database for Rhododendron. Background The genus Rhododendron L. has been widely cultivated for hundreds of years around the world. Members of this genus are known for great ornamental and medicinal value. Owing to advances in sequencing technology, genomes and transcriptomes of members of the Rhododendron genus have been sequenced and published by various laboratories. With increasing amounts of omics data available, a centralized platform is necessary for effective storage, analysis, and integration of these large-scale datasets to ensure consistency, independence, and maintainability. Results Here, we report our development of the Rhododendron Plant Genome Database (RPGD; http://bioinfor.kib.ac.cn/RPGD/ ), which represents the first comprehensive database of Rhododendron genomics information. It includes large amounts of omics data, including genome sequence assemblies for R. delavayi, R. williamsianum, and R. simsii, gene expression profiles derived from public RNA-Seq data, functional annotations, gene families, transcription factor identification, gene homology, simple sequence repeats, and chloroplast genome. Additionally, many useful tools, including BLAST, JBrowse, Orthologous Groups, Genome Synteny Browser, Flanking Sequence Finder, Expression Heatmap, and Batch Download were integrated into the platform. Conclusions RPGD is designed to be a comprehensive and helpful platform for all Rhododendron researchers. Believe that RPGD will be an indispensable hub for Rhododendron studies.",RPGD,0.988055825,Rhododendron Plant Genome Database,0.988815002,Rhododendron Plant Genome Database,0.988815002,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/22/2021 +33685493,http://www.ribocirc.com,"riboCIRC: a comprehensive database of translatable circRNAs. riboCIRC is a translatome data-oriented circRNA database specifically designed for hosting, exploring, analyzing, and visualizing translatable circRNAs from multi-species. The database provides a comprehensive repository of computationally predicted ribosome-associated circRNAs; a manually curated collection of experimentally verified translated circRNAs; an evaluation of cross-species conservation of translatable circRNAs; a systematic de novo annotation of putative circRNA-encoded peptides, including sequence, structure, and function; and a genome browser to visualize the context-specific occupant footprints of circRNAs. It represents a valuable resource for the circRNA research community and is publicly available at http://www.ribocirc.com .",riboCIRC,0.996479809,NA,0,riboCIRC,0.996479809,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/8/2021 +30726866,http://ribod.iiserkol.ac.in,"RiboD: a comprehensive database for prokaryotic riboswitches. SUMMARY:Riboswitches are cis-regulatory non-coding genomic segments that control the expression of downstream genes by undergoing conformational change upon ligand binding. We present a comprehensive database of prokaryotic riboswitches that allows the user to search for riboswitches using multiple criteria, extract information about riboswitch location and gene/operon it regulates. RiboD provides a very useful resource that can be utilized for the better understanding of riboswitch-based gene regulation in bacteria and archaea. AVAILABILITY AND IMPLEMENTATION:RiboD can be freely accessed on the web at http://ribod.iiserkol.ac.in/.",RiboD,0.977628946,NA,0,RiboD,0.977628946,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2019 +27189556,http://ribodb.univ-lyon1.fr,"RiboDB Database: A Comprehensive Resource for Prokaryotic Systematics. Ribosomal proteins (r-proteins) are increasingly used as an alternative to ribosomal rRNA for prokaryotic systematics. However, their routine use is difficult because r-proteins are often not or wrongly annotated in complete genome sequences, and there is currently no dedicated exhaustive database of r-proteins. RiboDB aims at fulfilling this gap. This weekly updated comprehensive database allows the fast and easy retrieval of r-protein sequences from publicly available complete prokaryotic genome sequences. The current version of RiboDB contains 90 r-proteins from 3,750 prokaryotic complete genomes encompassing 38 phyla/major classes and 1,759 different species. RiboDB is accessible at http://ribodb.univ-lyon1.fr and through ACNUC interfaces.",RiboDB,0.997285545,NA,0,RiboDB,0.997285545,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/5/2016 +27845739,http://bioinformatics.fafu.edu.cn,"Impact of SNPs on Protein Phosphorylation Status in Rice (Oryza sativa L.). . Single nucleotide polymorphisms (SNPs) are widely used in functional genomics and genetics research work. The high-quality sequence of rice genome has provided a genome-wide SNP and proteome resource. However, the impact of SNPs on protein phosphorylation status in rice is not fully understood. In this paper, we firstly updated rice SNP resource based on the new rice genome Ver. 7.0, then systematically analyzed the potential impact of Non-synonymous SNPs (nsSNPs) on the protein phosphorylation status. There were 3,897,312 SNPs in Ver. 7.0 rice genome, among which 9.9% was nsSNPs. Whilst, a total 2,508,261 phosphorylated sites were predicted in rice proteome. Interestingly, we observed that 150,197 (39.1%) nsSNPs could influence protein phosphorylation status, among which 52.2% might induce changes of protein kinase (PK) types for adjacent phosphorylation sites. We constructed a database, SNP_rice, to deposit the updated rice SNP resource and phosSNPs information. It was freely available to academic researchers at http://bioinformatics.fafu.edu.cn. As a case study, we detected five nsSNPs that potentially influenced heterotrimeric G proteins phosphorylation status in rice, indicating that genetic polymorphisms showed impact on the signal transduction by influencing the phosphorylation status of heterotrimeric G proteins. The results in this work could be a useful resource for future experimental identification and provide interesting information for better rice breeding.",rice,0.868407339,NA,0,rice,0.868407339,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/11/2016 +24147765,http://ricedb.plantenergy.uwa.edu.au,"Rice DB: an Oryza Information Portal linking annotation, subcellular location, function, expression, regulation, and evolutionary information for rice and Arabidopsis. Omics research in Oryza sativa (rice) relies on the use of multiple databases to obtain different types of information to define gene function. We present Rice DB, an Oryza information portal that is a functional genomics database, linking gene loci to comprehensive annotations, expression data and the subcellular location of encoded proteins. Rice DB has been designed to integrate the direct comparison of rice with Arabidopsis (Arabidopsis thaliana), based on orthology or 'expressology', thus using and combining available information from two pre-eminent plant models. To establish Rice DB, gene identifiers (more than 40 types) and annotations from a variety of sources were compiled, functional information based on large-scale and individual studies was manually collated, hundreds of microarrays were analysed to generate expression annotations, and the occurrences of potential functional regulatory motifs in promoter regions were calculated. A range of computational subcellular localization predictions were also run for all putative proteins encoded in the rice genome, and experimentally confirmed protein localizations have been collated, curated and linked to functional studies in rice. A single search box allows anything from gene identifiers (for rice and/or Arabidopsis), motif sequences, subcellular location, to keyword searches to be entered, with the capability of Boolean searches (such as AND/OR). To demonstrate the utility of Rice DB, several examples are presented including a rice mitochondrial proteome, which draws on a variety of sources for subcellular location data within Rice DB. Comparisons of subcellular location, functional annotations, as well as transcript expression in parallel with Arabidopsis reveals examples of conservation between rice and Arabidopsis, using Rice DB (http://ricedb.plantenergy.uwa.edu.au).",Rice,0.762478769,NA,0,Rice,0.762478769,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/29/2013 +24279809,http://www.ricearray.org,"The Rice Oligonucleotide Array Database: an atlas of rice gene expression. Background Microarray technologies facilitate high-throughput gene expression analysis. However, the diversity of platforms for rice gene expression analysis hinders efficient analysis. Tools to broadly integrate microarray data from different platforms are needed. Results In this study, we developed the Rice Oligonucleotide Array Database (ROAD, http://www.ricearray.org) to explore gene expression across 1,867 publicly available rice microarray hybridizations. The ROAD's user-friendly web interface and variety of visualization tools facilitate the extraction of gene expression profiles using gene and microarray element identifications. The ROAD supports meta-analysis of genes expressed in different tissues and at developmental stages. Co-expression analysis tool provides information on co-regulation between genes under general, abiotic and biotic stress conditions. Additionally, functional analysis tools, such as Gene Ontology and KEGG (Kyoto Encyclopedia of Genes and Genomes) Orthology, are embedded in the ROAD. These tools facilitate the identification of meaningful biological patterns in a list of query genes. Conclusions The Rice Oligonucleotide Array Database provides comprehensive gene expression profiles for all rice genes, and will be a useful resource for researchers of rice and other grass species.",ROAD,0.928594371,Rice Oligonucleotide Array Database,0.973416291,Rice Oligonucleotide Array Database,0.973416291,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/19/2012 +21216747,http://agri-trait.dna.affrc.go.jp,"Rice TOGO Browser: A platform to retrieve integrated information on rice functional and applied genomics. The Rice TOGO Browser is an online public resource designed to facilitate integration and visualization of mapping data of bacterial artificial chromosome (BAC)/P1-derived artificial chromosome (PAC) clones, genes, restriction fragment length polymorphism (RFLP)/simple sequence repeat (SSR) markers and phenotype data represented as quantitative trait loci (QTLs) onto the genome sequence, and to provide a platform for more efficient utilization of genome information from the point of view of applied genomics as well as functional genomics. Three search options, namely keyword search, region search and trait search, generate various types of data in a user-friendly interface with three distinct viewers, a chromosome viewer, an integrated map viewer and a sequence viewer, thereby providing the opportunity to view the position of genes and/or QTLs at the chromosomal level and to retrieve any sequence information in a user-defined genome region. Furthermore, the gene list, marker list and genome sequence in a specified region delineated by RFLP/SSR markers and any sequences designed as primers can be viewed and downloaded to support forward genetics approaches. An additional feature of this database is the graphical viewer for BLAST search to reveal information not only for regions with significant sequence similarity but also for regions adjacent to those with similarity but with no hits between sequences. An easy to use and intuitive user interface can help a wide range of users in retrieving integrated mapping information including agronomically important traits on the rice genome sequence. The database can be accessed at http://agri-trait.dna.affrc.go.jp/.",Rice TOGO Browser,0.827372536,NA,0,Rice TOGO Browser,0.827372536,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/6/2011 +28025342,http://syslab3.nchu.edu.tw/rice,"RiceATM: a platform for identifying the association between rice agronomic traits and miRNA expression. . MicroRNAs (miRNAs) are known to play critical roles in plant development and stress-response regulation, and they frequently display multi-targeting characteristics. The control of defined rice phenotypes occurs through multiple genes; however, evidence demonstrating the relationship between agronomic traits and miRNA expression profiles is lacking. In this study, we investigated eight yield-related traits in 187 local rice cultivars and profiled the expression levels of 193 miRNAs in these cultivars using microarray analyses. By integrating the miRBase database, the rice annotation project database, and the miRanda and psRNATarget web servers, we constructed a database (RiceATM) that can be employed to investigate the association between rice agronomic traits and miRNA expression. The functions of this platform include phenotype selection, sample grouping, microarray data pretreatment, statistical analysis and target gene predictions. To demonstrate the utility of RiceATM, we used the database to identify four miRNAs associated with the heading date and validated their expression trends in the cultivars with early or late heading date by real-time PCR. RiceATM is a useful tool for researchers seeking to characterize the role of certain miRNAs for a specific phenotype and discover potential biomarkers for breeding or functional studies.Database URL: http://syslab3.nchu.edu.tw/rice/.",RiceATM,0.990825713,NA,0,RiceATM,0.990825713,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/26/2016 +27515824,http://ricebase.org,"Ricebase: a breeding and genetics platform for rice, integrating individual molecular markers, pedigrees and whole-genome-based data. . Ricebase (http://ricebase.org) is an integrative genomic database for rice (Oryza sativa) with an emphasis on combining datasets in a way that maintains the key links between past and current genetic studies. Ricebase includes DNA sequence data, gene annotations, nucleotide variation data and molecular marker fragment size data. Rice research has benefited from early adoption and extensive use of simple sequence repeat (SSR) markers; however, the majority of rice SSR markers were developed prior to the latest rice pseudomolecule assembly. Interpretation of new research using SNPs in the context of literature citing SSRs requires a common coordinate system. A new pipeline, using a stepwise relaxation of stringency, was used to map SSR primers onto the latest rice pseudomolecule assembly. The SSR markers and experimentally assayed amplicon sizes are presented in a relational database with a web-based front end, and are available as a track loaded in a genome browser with links connecting the browser and database. The combined capabilities of Ricebase link genetic markers, genome context, allele states across rice germplasm and potentially user curated phenotypic interpretations as a community resource for genetic discovery and breeding in rice.",Ricebase,0.991965771,NA,0,Ricebase,0.991965771,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/10/2016 +24280345,http://www.gramene.org/pathway,"A genome scale metabolic network for rice and accompanying analysis of tryptophan, auxin and serotonin biosynthesis regulation under biotic stress. Background Functional annotations of large plant genome projects mostly provide information on gene function and gene families based on the presence of protein domains and gene homology, but not necessarily in association with gene expression or metabolic and regulatory networks. These additional annotations are necessary to understand the physiology, development and adaptation of a plant and its interaction with the environment. Results RiceCyc is a metabolic pathway networks database for rice. It is a snapshot of the substrates, metabolites, enzymes, reactions and pathways of primary and intermediary metabolism in rice. RiceCyc version 3.3 features 316 pathways and 6,643 peptide-coding genes mapped to 2,103 enzyme-catalyzed and 87 protein-mediated transport reactions. The initial functional annotations of rice genes with InterPro, Gene Ontology, MetaCyc, and Enzyme Commission (EC) numbers were enriched with annotations provided by KEGG and Gramene databases. The pathway inferences and the network diagrams were first predicted based on MetaCyc reference networks and plant pathways from the Plant Metabolic Network, using the Pathologic module of Pathway Tools. This was enriched by manually adding metabolic pathways and gene functions specifically reported for rice. The RiceCyc database is hierarchically browsable from pathway diagrams to the associated genes, metabolites and chemical structures. Through the integrated tool OMICs Viewer, users can upload transcriptomic, proteomic and metabolomic data to visualize expression patterns in a virtual cell. RiceCyc, along with additional species-specific pathway databases hosted in the Gramene project, facilitates comparative pathway analysis. Conclusions Here we describe the RiceCyc network development and discuss its contribution to rice genome annotations. As a case study to demonstrate the use of RiceCyc network as a discovery environment we carried out an integrated bioinformatic analysis of rice metabolic genes that are differentially regulated under diurnal photoperiod and biotic stress treatments. The analysis of publicly available rice transcriptome datasets led to the hypothesis that the complete tryptophan biosynthesis and its dependent metabolic pathways including serotonin biosynthesis are induced by taxonomically diverse pathogens while also being under diurnal regulation. The RiceCyc database is available online for free access at http://www.gramene.org/pathway/.",RiceCyc,0.990686595,NA,0,RiceCyc,0.990686595,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/29/2013 +23180784,http://ricefrend.dna.affrc.go.jp,"RiceFREND: a platform for retrieving coexpressed gene networks in rice. Similarity of gene expression across a wide range of biological conditions can be efficiently used in characterization of gene function. We have constructed a rice gene coexpression database, RiceFREND (http://ricefrend.dna.affrc.go.jp/), to identify gene modules with similar expression profiles and provide a platform for more accurate prediction of gene functions. Coexpression analysis of 27 201 genes was performed against 815 microarray data derived from expression profiling of various organs and tissues at different developmental stages, mature organs throughout the growth from transplanting until harvesting in the field and plant hormone treatment conditions, using a single microarray platform. The database is provided with two search options, namely, 'single guide gene search' and 'multiple guide gene search' to efficiently retrieve information on coexpressed genes. A user-friendly web interface facilitates visualization and interpretation of gene coexpression networks in HyperTree, Cytoscape Web and Graphviz formats. In addition, analysis tools for identification of enriched Gene Ontology terms and cis-elements provide clue for better prediction of biological functions associated with the coexpressed genes. These features allow users to clarify gene functions and gene regulatory networks that could lead to a more thorough understanding of many complex agronomic traits.",RiceFREND,0.970509291,NA,0,RiceFREND,0.970509291,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/24/2012 +28964253,http://14.139.229.201,"RiceMetaSys for salt and drought stress responsive genes in rice: a web interface for crop improvement. Background Genome-wide microarray has enabled development of robust databases for functional genomics studies in rice. However, such databases do not directly cater to the needs of breeders. Here, we have attempted to develop a web interface which combines the information from functional genomic studies across different genetic backgrounds with DNA markers so that they can be readily deployed in crop improvement. In the current version of the database, we have included drought and salinity stress studies since these two are the major abiotic stresses in rice. Results RiceMetaSys, a user-friendly and freely available web interface provides comprehensive information on salt responsive genes (SRGs) and drought responsive genes (DRGs) across genotypes, crop development stages and tissues, identified from multiple microarray datasets. 'Physical position search' is an attractive tool for those using QTL based approach for dissecting tolerance to salt and drought stress since it can provide the list of SRGs and DRGs in any physical interval. To identify robust candidate genes for use in crop improvement, the 'common genes across varieties' search tool is useful. Graphical visualization of expression profiles across genes and rice genotypes has been enabled to facilitate the user and to make the comparisons more impactful. Simple Sequence Repeat (SSR) search in the SRGs and DRGs is a valuable tool for fine mapping and marker assisted selection since it provides primers for survey of polymorphism. An external link to intron specific markers is also provided for this purpose. Bulk retrieval of data without any limit has been enabled in case of locus and SSR search. Conclusions The aim of this database is to facilitate users with a simple and straight-forward search options for identification of robust candidate genes from among thousands of SRGs and DRGs so as to facilitate linking variation in expression profiles to variation in phenotype. Database URL: http://14.139.229.201.",RiceMetaSys,0.956623197,NA,0,RiceMetaSys,0.956623197,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/30/2017 +23986771,http://ricephylogenomics.ucdavis.edu/cellwalls/gh,"Construction of a rice glycoside hydrolase phylogenomic database and identification of targets for biofuel research. Glycoside hydrolases (GH) catalyze the hydrolysis of glycosidic bonds in cell wall polymers and can have major effects on cell wall architecture. Taking advantage of the massive datasets available in public databases, we have constructed a rice phylogenomic database of GHs (http://ricephylogenomics.ucdavis.edu/cellwalls/gh/). This database integrates multiple data types including the structural features, orthologous relationships, mutant availability, and gene expression patterns for each GH family in a phylogenomic context. The rice genome encodes 437 GH genes classified into 34 families. Based on pairwise comparison with eight dicot and four monocot genomes, we identified 138 GH genes that are highly diverged between monocots and dicots, 57 of which have diverged further in rice as compared with four monocot genomes scanned in this study. Chromosomal localization and expression analysis suggest a role for both whole-genome and localized gene duplications in expansion and diversification of GH families in rice. We examined the meta-profiles of expression patterns of GH genes in twenty different anatomical tissues of rice. Transcripts of 51 genes exhibit tissue or developmental stage-preferential expression, whereas, seventeen other genes preferentially accumulate in actively growing tissues. When queried in RiceNet, a probabilistic functional gene network that facilitates functional gene predictions, nine out of seventeen genes form a regulatory network with the well-characterized genes involved in biosynthesis of cell wall polymers including cellulose synthase and cellulose synthase-like genes of rice. Two-thirds of the GH genes in rice are up regulated in response to biotic and abiotic stress treatments indicating a role in stress adaptation. Our analyses identify potential GH targets for cell wall modification.",RiceNet,0.55981946,NA,0,RiceNet,0.55981946,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,8/26/2013 +25489177,http://nabic.rda.go.kr/gere/rice/geneticMap,"RiceQTLPro: an integrated database for quantitative trait loci marker mapping in rice plant. Unlabelled The National Agricultural Biotechnology Information Center (NABIC) in South Korea reconstructed a RiceQTLPro database for gene positional analysis and structure prediction of the chromosomes. This database is an integrated web-based system providing information about quantitative trait loci (QTL) markers in rice plant. The RiceQTLPro has the three main features namely, (1) QTL markers list, (2) searching of markers using keyword, and (3) searching of marker position on the rice chromosomes. This updated database provides 112 QTL markers information with 817 polymorphic markers on each of the 12 chromosomes in rice. Availability The database is available for free at http://nabic.rda.go.kr/gere/rice/geneticMap/",RiceQTLPro,0.997575998,NA,0,RiceQTLPro,0.997575998,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/30/2014 +22645600,http://www.bioinformatics2.wsu.edu/RiceRBP,"RiceRBP: A Resource for Experimentally Identified RNA Binding Proteins in Oryza sativa. RNA binding proteins (RBPs) play an important role not only in nuclear gene expression, but also in cytosolic events, including RNA transport, localization, translation, and stability. Although over 200 RBPs are predicted from the Arabidopsis genome alone, relatively little is known about these proteins in plants as many exhibit no homology to known RBPs in other eukaryotes. Furthermore, RBPs likely have low expression levels making them difficult to identify and study. As part of our continuing efforts to understand plant cytosolic gene expression and the factors involved, we employed a combination of affinity chromatography and proteomic techniques to enrich for low abundance RBPs in developing rice seed. Our results have been compiled into RiceRBP (http://www.bioinformatics2.wsu.edu/RiceRBP), a database that contains 257 experimentally identified proteins, many of which have not previously been predicted to be RBPs. For each of the identified proteins, RiceRBP provides information on transcript and protein sequence, predicted protein domains, details of the experimental identification, and whether antibodies have been generated for public use. In addition, tools are available to analyze expression patterns for the identified genes, view phylogentic relationships and search for orthologous proteins. RiceRBP is a valuable tool for the community in the study of plant RBPs.",RiceRBP,0.996419489,NA,0,RiceRBP,0.996419489,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/14/2012 +23660286,http://www.nipgr.res.in/RiceSRTFDB.html,"RiceSRTFDB: a database of rice transcription factors containing comprehensive expression, cis-regulatory element and mutant information to facilitate gene function analysis. Rice is one of the most important crop plants, representing the staple food for more than half the world's population. However, its productivity is challenged by various stresses, including drought and salinity. Transcription factors (TFs) represent a regulatory component of the genome and are the most important targets for engineering stress tolerance. Here, we constructed a database, RiceSRTFDB, which provides comprehensive expression information for rice TFs during drought and salinity stress conditions and various stages of development. This information will be useful to identify the target TF(s) involved in stress response at a particular stage of development. The curated information for cis-regulatory elements present in their promoters has also been provided, which will be important to study the binding proteins. In addition, we have provided the available mutants and their phenotype information for rice TFs. All these information have been integrated in the database to facilitate the selection of target TFs of interest for functional analysis. This database aims to accelerate functional genomics research of rice TFs and understand the regulatory mechanisms underlying abiotic stress responses. Database URL: http://www.nipgr.res.in/RiceSRTFDB.html",RiceSRTFDB,0.988805354,NA,0,RiceSRTFDB,0.988805354,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/9/2013 +24136999,http://ricewiki.big.ac.cn,"RiceWiki: a wiki-based database for community curation of rice genes. Rice is the most important staple food for a large part of the world's human population and also a key model organism for biological studies of crops as well as other related plants. Here we present RiceWiki (http://ricewiki.big.ac.cn), a wiki-based, publicly editable and open-content platform for community curation of rice genes. Most existing related biological databases are based on expert curation; with the exponentially exploding volume of rice knowledge and other relevant data, however, expert curation becomes increasingly laborious and time-consuming to keep knowledge up-to-date, accurate and comprehensive, struggling with the flood of data and requiring a large number of people getting involved in rice knowledge curation. Unlike extant relevant databases, RiceWiki features harnessing collective intelligence in community curation of rice genes, quantifying users' contributions in each curated gene and providing explicit authorship for each contributor in any given gene, with the aim to exploit the full potential of the scientific community for rice knowledge curation. Based on community curation, RiceWiki bears the potential to make it possible to build a rice encyclopedia by and for the scientific community that harnesses community intelligence for collaborative knowledge curation, covers all aspects of biological knowledge and keeps evolving with novel knowledge.",RiceWiki,0.996466279,NA,0,RiceWiki,0.996466279,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/16/2013 +23180765,http://ricexpro.dna.affrc.go.jp,"RiceXPro version 3.0: expanding the informatics resource for rice transcriptome. A wide range of resources on gene expression profiling enhance various strategies in plant molecular biology particularly in characterization of gene function. We have updated our gene expression profile database, RiceXPro (http://ricexpro.dna.affrc.go.jp/), to provide more comprehensive information on the transcriptome of rice encompassing the entire growth cycle and various experimental conditions. The gene expression profiles are currently grouped into three categories, namely, 'field/development' with 572 data corresponding to 12 data sets, 'plant hormone' with 143 data corresponding to 13 data sets and 'cell- and tissue-type' comprising of 38 microarray data. In addition to the interface for retrieving expression information of a gene/genes in each data set, we have incorporated an interface for a global approach in searching an overall view of the gene expression profiles from multiple data sets within each category. Furthermore, we have also added a BLAST search function that enables users to explore expression profile of a gene/genes with similarity to a query sequence. Therefore, the updated version of RiceXPro can be used more efficiently to survey the gene expression signature of rice in sufficient depth and may also provide clues on gene function of other cereal crops.",RiceXPro,0.97669971,NA,0,RiceXPro,0.97669971,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/23/2012 +29989091,http://server.malab.cn/Ricyer/index.html,"RicyerDB: A Database For Collecting Rice Yield-related Genes with Biological Analysis. The Rice Yield-related Database (RicyerDB) was created to complement with related research of influence rice (Oryza sativa L.) yield in multiple traits by manually curating the related databases and literature, and genomics and proteomics information that could be useful for comprehensive understanding of the rice biology. RicyerDB provides a more valuable resource in which to efficiently investigate, browse and analyze yield-related genes. The whole data set can be easily queried and downloaded through the webpage. In addition, RicyerDB also constructed a protein-protein interaction network with biological analysis. The combined rice database opens a new path to facilitate researchers achieving information on rice gene in terms of their effects on traits important for rice breeding. The web server is freely available at: http://server.malab.cn/Ricyer/index.html.",RicyerDB,0.99671818,Rice Yield-related Database,0.840729028,RicyerDB,0.99671818,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/22/2018 +27377064,"http://rid.ncifcrf.gov, http://home.ncifcrf.gov/hivdrp/resources.htm","Retrovirus Integration Database (RID): a public database for retroviral insertion sites into host genomes. Unlabelled The NCI Retrovirus Integration Database is a MySql-based relational database created for storing and retrieving comprehensive information about retroviral integration sites, primarily, but not exclusively, HIV-1. The database is accessible to the public for submission or extraction of data originating from experiments aimed at collecting information related to retroviral integration sites including: the site of integration into the host genome, the virus family and subtype, the origin of the sample, gene exons/introns associated with integration, and proviral orientation. Information about the references from which the data were collected is also stored in the database. Tools are built into the website that can be used to map the integration sites to UCSC genome browser, to plot the integration site patterns on a chromosome, and to display provirus LTRs in their inserted genome sequence. The website is robust, user friendly, and allows users to query the database and analyze the data dynamically. Availability https://rid.ncifcrf.gov ; or http://home.ncifcrf.gov/hivdrp/resources.htm .",RID,0.911651358,Retrovirus Integration Database,0.863141191,RID,0.911651358,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/4/2016 +21938212,http://115.111.90.196/ridsdb/index.php,"RiDs db: Repeats in diseases database. Unlabelled The non-coding fraction of the human genome, which is approximately 98%, is mainly constituted by repeats. Transpositions, expansions and deletions of these repeat elements contribute to a number of diseases. None of the available databases consolidates information on both tandem and interspersed repeats with the flexibility of FASTA based homology search with reference to disease genes. Repeats in diseases database (RiDs db) is a web accessible relational database, which aids analysis of repeats associated with Mendelian disorders. It is a repository of disease genes, which can be searched by FASTA program or by limitedor free- text keywords. Unlike other databases, RiDs db contains the sequences of these genes with access to corresponding information on both interspersed and tandem repeats contained within them, on a unified platform. Comparative analysis of novel or patient sequences with the reference sequences in RiDs db using FASTA search will indicate change in structure of repeats, if any, with a particular disorder. This database also provides links to orthologs in model organisms such as zebrafish, mouse and Drosophila. Availability The database is available for free at http://115.111.90.196/ridsdb/index.php.",RiDs db,0.977813244,Repeats in diseases database,0.806841683,RiDs db,0.977813244,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/6/2011 +32849839,http://www.rigdb.cn,"RIGD: A Database for Intronless Genes in the Rosaceae. Most eukaryotic genes are interrupted by one or more introns, and only prokaryotic genomes are composed of mainly single-exon genes without introns. Due to the absence of introns, intronless genes in eukaryotes have become important materials for comparative genomics and evolutionary biology. There is currently no cohesive database that collects intronless genes in plants into a single database, although many databases on exons and introns exist. In this study, we constructed the Rosaceae Intronless Genes Database (RIGD), a user-friendly web interface to explore and collect information on intronless genes from different plants. Six Rosaceae species, Pyrus bretschneideri, Pyrus communis, Malus domestica, Prunus persica, Prunus mume, and Fragaria vesca, are included in the current release of the RIGD. Sequence data and gene annotation were collected from different databases and integrated. The main purpose of this study is to provide gene sequence data. In addition, attribute analysis, functional annotations, subcellular localization prediction, and GO analysis are reported. The RIGD allows users to browse, search, and download data with ease. Blast and comparative analyses are also provided through this online database, which is available at http://www.rigdb.cn/.",RIGD,0.99081533,Rosaceae Intronless Genes Database,0.965823472,RIGD,0.99081533,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/7/2020 +29040625,http://rise.zhanglab.net,"RISE: a database of RNA interactome from sequencing experiments. We present RISE (http://rise.zhanglab.net), a database of RNA Interactome from Sequencing Experiments. RNA-RNA interactions (RRIs) are essential for RNA regulation and function. RISE provides a comprehensive collection of RRIs that mainly come from recent transcriptome-wide sequencing-based experiments like PARIS, SPLASH, LIGR-seq, and MARIO, as well as targeted studies like RIA-seq, RAP-RNA and CLASH. It also includes interactions aggregated from other primary databases and publications. The RISE database currently contains 328,811 RNA-RNA interactions mainly in human, mouse and yeast. While most existing RNA databases mainly contain interactions of miRNA targeting, notably, more than half of the RRIs in RISE are among mRNA and long non-coding RNAs. We compared different RRI datasets in RISE and found limited overlaps in interactions resolved by different techniques and in different cell lines. It may suggest technology preference and also dynamic natures of RRIs. We also analyzed the basic features of the human and mouse RRI networks and found that they tend to be scale-free, small-world, hierarchical and modular. The analysis may nominate important RNAs or RRIs for further investigation. Finally, RISE provides a Circos plot and several table views for integrative visualization, with extensive molecular and functional annotations to facilitate exploration of biological functions for any RRI of interest.",RISE,0.989733219,NA,0,RISE,0.989733219,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +23144556,http://bioinformatics.towson.edu/RKN,"RKN Lethal DB: A database for the identification of Root Knot Nematode (Meloidogyne spp.) candidate lethal genes. Unlabelled Root Knot nematode (RKN; Meloidogyne spp.) is one of the most devastating parasites that infect the roots of hundreds of plant species. RKN cannot live independently from their hosts and are the biggest contributors to the loss of the world's primary foods. RNAi gene silencing studies have demonstrated that there are fewer galls and galls are smaller when RNAi constructs targeted to silence certain RKN genes are expressed in plant roots. We conducted a comparative genomics analysis, comparing RKN genes of six species: Meloidogyne Arenaria, Meloidogyne Chitwoodi, Meloidogyne Hapla, Meloidogyne Incognita, Meloidogyne Javanica, and Meloidogyne Paranaensis to that of the free living nematode Caenorhabditis elegans, to identify candidate genes that will be lethal to RKN when silenced or mutated. Our analysis yielded a number of such candidate lethal genes in RKN, some of which have been tested and proven to be effective in soybean roots. A web based database was built to house and allow scientists to search the data. This database will be useful to scientists seeking to identify candidate genes as targets for gene silencing to confer resistance in plants to RKN. Availability The database can be accessed from http://bioinformatics.towson.edu/RKN/",RKN,0.919945776,NA,0,RKN,0.919945776,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/1/2012 +26464443,http://mirlab.sysu.edu.cn/rmbase,"RMBase: a resource for decoding the landscape of RNA modifications from high-throughput sequencing data. Although more than 100 different types of RNA modifications have been characterized across all living organisms, surprisingly little is known about the modified positions and their functions. Recently, various high-throughput modification sequencing methods have been developed to identify diverse post-transcriptional modifications of RNA molecules. In this study, we developed a novel resource, RMBase (RNA Modification Base, http://mirlab.sysu.edu.cn/rmbase/), to decode the genome-wide landscape of RNA modifications identified from high-throughput modification data generated by 18 independent studies. The current release of RMBase includes ∼ 9500 pseudouridine (Ψ) modifications generated from Pseudo-seq and CeU-seq sequencing data, ∼ 1000 5-methylcytosines (m(5)C) predicted from Aza-IP data, ∼ 124 200 N6-Methyladenosine (m(6)A) modifications discovered from m(6)A-seq and ∼ 1210 2'-O-methylations (2'-O-Me) identified from RiboMeth-seq data and public resources. Moreover, RMBase provides a comprehensive listing of other experimentally supported types of RNA modifications by integrating various resources. It provides web interfaces to show thousands of relationships between RNA modification sites and microRNA target sites. It can also be used to illustrate the disease-related SNPs residing in the modification sites/regions. RMBase provides a genome browser and a web-based modTool to query, annotate and visualize various RNA modifications. This database will help expand our understanding of potential functions of RNA modifications.",RMBase,0.992835402,RNA Modification Base,0.927216482,RMBase,0.992835402,1,NA,29040692,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,10/12/2015 +29040692,http://rna.sysu.edu.cn/rmbase,"RMBase v2.0: deciphering the map of RNA modifications from epitranscriptome sequencing data. More than 100 distinct chemical modifications to RNA have been characterized so far. However, the prevalence, mechanisms and functions of various RNA modifications remain largely unknown. To provide transcriptome-wide landscapes of RNA modifications, we developed the RMBase v2.0 (http://rna.sysu.edu.cn/rmbase/), which is a comprehensive database that integrates epitranscriptome sequencing data for the exploration of post-transcriptional modifications of RNAs and their relationships with miRNA binding events, disease-related single-nucleotide polymorphisms (SNPs) and RNA-binding proteins (RBPs). RMBase v2.0 was expanded with ∼600 datasets and ∼1 397 000 modification sites from 47 studies among 13 species, which represents an approximately 10-fold expansion when compared with the previous release. It contains ∼1 373 000 N6-methyladenosines (m6A), ∼5400 N1-methyladenosines (m1A), ∼9600 pseudouridine (Ψ) modifications, ∼1000 5-methylcytosine (m5C) modifications, ∼5100 2'-O-methylations (2'-O-Me), and ∼2800 modifications of other modification types. Moreover, we built a new module called 'Motif' that provides the visualized logos and position weight matrices (PWMs) of the modification motifs. We also constructed a novel module termed 'modRBP' to study the relationships between RNA modifications and RBPs. Additionally, we developed a novel web-based tool named 'modMetagene' to plot the metagenes of RNA modification along a transcript model. This database will help researchers investigate the potential functions and mechanisms of RNA modifications.",RMBase,0.990800381,NA,0,RMBase,0.990800381,1,NA,26464443,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2018 +"22976082, 30053264",http://rmdb.stanford.edu,"An RNA Mapping DataBase for curating RNA structure mapping experiments. Summary We have established an RNA mapping database (RMDB) to enable structural, thermodynamic and kinetic comparisons across single-nucleotide-resolution RNA structure mapping experiments. The volume of structure mapping data has greatly increased since the development of high-throughput sequencing techniques, accelerated software pipelines and large-scale mutagenesis. For scientists wishing to infer relationships between RNA sequence/structure and these mapping data, there is a need for a database that is curated, tagged with error estimates and interfaced with tools for sharing, visualization, search and meta-analysis. Through its on-line front-end, the RMDB allows users to explore single-nucleotide-resolution mapping data in heat-map, bar-graph and colored secondary structure graphics; to leverage these data to generate secondary structure hypotheses; and to download the data in standardized and computer-friendly files, including the RDAT and community-consensus SNRNASM formats. At the time of writing, the database houses 53 entries, describing more than 2848 experiments of 1098 RNA constructs in several solution conditions and is growing rapidly. Availability Freely available on the web at http://rmdb.stanford.edu. Contact rhiju@stanford.edu. Supplementary information Supplementary data are available at Bioinformatics Online.",RMDB,0.97517405,Mapping DataBase,0.709348813,RMDB,0.97517405,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +21464840,http://biomedinformri.org/calp,"A comparative protein function analysis databaseof different Leishmania strains. A complete understanding of different protein functional families and template information opens new avenues for novel drug development. Protein identification and analysis software performs a central role in the investigation of proteins and leads to the development of refined database for description of proteins of different Leishmania strains. There are certain databases for different strains that lack template information and functional family annotation. Rajendra Memorial Research Institute of Medical Sciences (RMRIMS) has developed a web-based unique database to provide information about functional families of different proteins and its template information in different Leishmania species. Based on the template information users can model the tertiary structure of protein. The database facilitates significant relationship between template information and possible protein functional families assigned to different proteins by SVMProt. This database is designed to provide comprehensive descriptions of certain important proteins found in four different species of Leishmania i.e. L. donovani, L. infantum, L. major and L. braziliensis. A specific characterization information table provides information related to species and specific functional families. This database aims to be a resource for scientists working on proteomics. The database is freely available at http://biomedinformri.org/calp/.",RMRIMS,0.633491713,Research,0.512701094,RMRIMS,0.633491713,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/2/2011 +33021671,http://rmvar.renlab.org,"RMVar: an updated database of functional variants involved in RNA modifications. Distinguishing the few disease-related variants from a massive number of passenger variants is a major challenge. Variants affecting RNA modifications that play critical roles in many aspects of RNA metabolism have recently been linked to many human diseases, such as cancers. Evaluating the effect of genetic variants on RNA modifications will provide a new perspective for understanding the pathogenic mechanism of human diseases. Previously, we developed a database called 'm6AVar' to host variants associated with m6A, one of the most prevalent RNA modifications in eukaryotes. To host all RNA modification (RM)-associated variants, here we present an updated version of m6AVar renamed RMVar (http://rmvar.renlab.org). In this update, RMVar contains 1 678 126 RM-associated variants for 9 kinds of RNA modifications, namely m6A, m6Am, m1A, pseudouridine, m5C, m5U, 2'-O-Me, A-to-I and m7G, at three confidence levels. Moreover, RBP binding regions, miRNA targets, splicing events and circRNAs were integrated to assist investigations of the effects of RM-associated variants on posttranscriptional regulation. In addition, disease-related information was integrated from ClinVar and other genome-wide association studies (GWAS) to investigate the relationship between RM-associated variants and diseases. We expect that RMVar may boost further functional studies on genetic variants affecting RNA modifications.",RMVar,0.992782235,NA,0,RMVar,0.992782235,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +24220091,http://iimcb.genesilico.pl/rnabricks,"RNA Bricks--a database of RNA 3D motifs and their interactions. The RNA Bricks database (http://iimcb.genesilico.pl/rnabricks), stores information about recurrent RNA 3D motifs and their interactions, found in experimentally determined RNA structures and in RNA-protein complexes. In contrast to other similar tools (RNA 3D Motif Atlas, RNA Frabase, Rloom) RNA motifs, i.e. 'RNA bricks' are presented in the molecular environment, in which they were determined, including RNA, protein, metal ions, water molecules and ligands. All nucleotide residues in RNA bricks are annotated with structural quality scores that describe real-space correlation coefficients with the electron density data (if available), backbone geometry and possible steric conflicts, which can be used to identify poorly modeled residues. The database is also equipped with an algorithm for 3D motif search and comparison. The algorithm compares spatial positions of backbone atoms of the user-provided query structure and of stored RNA motifs, without relying on sequence or secondary structure information. This enables the identification of local structural similarities among evolutionarily related and unrelated RNA molecules. Besides, the search utility enables searching 'RNA bricks' according to sequence similarity, and makes it possible to identify motifs with modified ribonucleotide residues at specific positions.",RNA Bricks,0.949595173,NA,0,RNA Bricks,0.949595173,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/12/2013 +31950189,http://rnacossmos.com,"RNA CoSSMos 2.0: an improved searchable database of secondary structure motifs in RNA three-dimensional structures. . The RNA Characterization of Secondary Structure Motifs, RNA CoSSMos, database is a freely accessible online database that allows users to identify secondary structure motifs among RNA 3D structures and explore their structural features. RNA CoSSMos 2.0 now requires two closing base pairs for all RNA loop motifs to create a less redundant database of secondary structures. Furthermore, RNA CoSSMos 2.0 represents an upgraded database with new features that summarize search findings and aid in the search for 3D structural patterns among RNA secondary structure motifs. Previously, users were limited to viewing search results individually, with no built-in tools to compare search results. RNA CoSSMos 2.0 provides two new features, allowing users to summarize, analyze and compare their search result findings. A function has been added to the website that calculates the average and representative structures of the search results. Additionally, users can now view a summary page of their search results that reports percentages of each structural feature found, including sugar pucker, glycosidic linkage, hydrogen bonding patterns and stacking interactions. Other upgrades include a newly embedded NGL structural viewer, the option to download the clipped structure coordinates in *.pdb format and improved NMR structure results. RNA CoSSMos 2.0 is no longer simply a search engine for a structure database; it now has the capability of analyzing, comparing and summarizing search results. Database URL: http://rnacossmos.com.",CoSSMos,0.890264094,RNA Characterization of Secondary Structure Motifs,0.966837181,RNA Characterization of Secondary Structure Motifs,0.966837181,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +22127861,http://cossmos.slu.edu,"RNA CoSSMos: Characterization of Secondary Structure Motifs--a searchable database of secondary structure motifs in RNA three-dimensional structures. RNA secondary structure is important for designing therapeutics, understanding protein-RNA binding and predicting tertiary structure of RNA. Several databases and downloadable programs exist that specialize in the three-dimensional (3D) structure of RNA, but none focus specifically on secondary structural motifs such as internal, bulge and hairpin loops. The RNA Characterization of Secondary Structure Motifs (RNA CoSSMos) database is a freely accessible and searchable online database and website of 3D characteristics of secondary structure motifs. To create the RNA CoSSMos database, 2156 Protein Data Bank (PDB) files were searched for internal, bulge and hairpin loops, and each loop's structural information, including sugar pucker, glycosidic linkage, hydrogen bonding patterns and stacking interactions, was included in the database. False positives were defined, identified and reclassified or omitted from the database to ensure the most accurate results possible. Users can search via general PDB information, experimental parameters, sequence and specific motif and by specific structural parameters in the subquery page after the initial search. Returned results for each search can be viewed individually or a complete set can be downloaded into a spreadsheet to allow for easy comparison. The RNA CoSSMos database is automatically updated weekly and is available at http://cossmos.slu.edu.",RNA CoSSMos,0.782637835,RNA Characterization of Secondary Structure Motifs,0.77531596,RNA CoSSMos,0.782637835,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/29/2011 +29222504,http://structurome.bb.iastate.edu,"RNAStructuromeDB: A genome-wide database for RNA structural inference. RNA plays important roles in almost every aspect of biology, and every aspect of RNA biology is influenced by its folding. This is a particularly important consideration in the era of high-throughput sequencing, when the discovery of novel transcripts far outpaces our knowledge of their functions. To gain a comprehensive picture of biology requires a structural framework for making functional inferences on RNA. To this end we have developed the RNA Structurome Database ( https://structurome.bb.iastate.edu ), a comprehensive repository of RNA secondary structural information that spans the entire human genome. Here, we compile folding information for every base pair of the genome that may be transcribed: coding, noncoding, and intergenic regions, as well as repetitive elements, telomeres, etc. This was done by fragmenting the GRCh38 reference genome into 154,414,320 overlapping sequence fragments and, for each fragment, calculating a set of metrics based on the sequence's folding properties. These data will facilitate a wide array of investigations: e.g. discovery of structured regulatory elements in differential gene expression data or noncoding RNA discovery, as well as allow genome-scale analyses of RNA folding.",NA,0,RNA Structurome Database,0.584489805,RNA Structurome Database,0.584489805,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/8/2017 +22345621,http://medicalgenomics.org/rna_seq_atlas,"RNA-Seq Atlas--a reference database for gene expression profiling in normal tissue by next-generation sequencing. Motivation Next-generation sequencing technology enables an entirely new perspective for clinical research and will speed up personalized medicine. In contrast to microarray-based approaches, RNA-Seq analysis provides a much more comprehensive and unbiased view of gene expression. Although the perspective is clear and the long-term success of this new technology obvious, bioinformatics resources making these data easily available especially to the biomedical research community are still evolving. Results We have generated RNA-Seq Atlas, a web-based repository of RNA-Seq gene expression profiles and query tools. The website offers open and easy access to RNA-Seq gene expression profiles and tools to both compare tissues and find genes with specific expression patterns. To enlarge the scope of the RNA-Seq Atlas, the data were linked to common functional and genetic databases, in particular offering information on the respective gene, signaling pathway analysis and evaluation of biological functions by means of gene ontologies. Additionally, data were linked to several microarray gene profiles, including BioGPS normal tissue profiles and NCI60 cancer cell line expression data. Our data search interface allows an integrative detailed comparison between our RNA-Seq data and the microarray information. This is the first database providing data mining tools and open access to large scale RNA-Seq expression profiles. Its applications will be versatile, as it will be beneficial in identifying tissue specific genes and expression profiles, comparison of gene expression profiles among diverse tissues, but also systems biology approaches linking tissue function to gene expression changes. Availability and implementation http://medicalgenomics.org/rna_seq_atlas.",RNA-Seq Atlas,0.958900797,NA,0,RNA-Seq Atlas,0.958900797,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/17/2012 +31799597,http://bio-bigdata.hrbmu.edu.cn/RNAactDrug,"RNAactDrug: a comprehensive database of RNAs associated with drug sensitivity from multi-omics data. Drug sensitivity has always been at the core of individualized cancer chemotherapy. However, we have been overwhelmed by large-scale pharmacogenomic data in the era of next-generation sequencing technology, which makes it increasingly challenging for researchers, especially those without bioinformatic experience, to perform data integration, exploration and analysis. To bridge this gap, we developed RNAactDrug, a comprehensive database of RNAs associated with drug sensitivity from multi-omics data, which allows users to explore drug sensitivity and RNA molecule associations directly. It provides association data between drug sensitivity and RNA molecules including mRNAs, long non-coding RNAs (lncRNAs) and microRNAs (miRNAs) at four molecular levels (expression, copy number variation, mutation and methylation) from integrated analysis of three large-scale pharmacogenomic databases (GDSC, CellMiner and CCLE). RNAactDrug currently stores more than 4 924 200 associations of RNA molecules and drug sensitivity at four molecular levels covering more than 19 770 mRNAs, 11 119 lncRNAs, 438 miRNAs and 4155 drugs. A user-friendly interface enriched with various browsing sections augmented with advance search facility for querying the database is offered for users retrieving. RNAactDrug provides a comprehensive resource for RNA molecules acting in drug sensitivity, and it could be used to prioritize drug sensitivity-related RNA molecules, further promoting the identification of clinically actionable biomarkers in drug sensitivity and drug development more cost-efficiently by making this knowledge accessible to both basic researchers and clinical practitioners. Database URL: http://bio-bigdata.hrbmu.edu.cn/RNAactDrug.",RNAactDrug,0.996623337,NA,0,RNAactDrug,0.996623337,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +"25352543, 27794554, 30395267, 33106848",http://rnacentral.org,"RNAcentral: an international database of ncRNA sequences. The field of non-coding RNA biology has been hampered by the lack of availability of a comprehensive, up-to-date collection of accessioned RNA sequences. Here we present the first release of RNAcentral, a database that collates and integrates information from an international consortium of established RNA sequence databases. The initial release contains over 8.1 million sequences, including representatives of all major functional classes. A web portal (http://rnacentral.org) provides free access to data, search functionality, cross-references, source code and an integrated genome browser for selected species.",RNAcentral,0.993771434,NA,0,RNAcentral,0.993771434,4,NA,25593347,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,1/1/2021 +25593347,"http://www.oxfordjournals.org/nar/database/a/, http://nar.oxfordjournals.org","The 2015 Nucleic Acids Research Database Issue and molecular biology database collection. The 2015 Nucleic Acids Research Database Issue contains 172 papers that include descriptions of 56 new molecular biology databases, and updates on 115 databases whose descriptions have been previously published in NAR or other journals. Following the classification that has been introduced last year in order to simplify navigation of the entire issue, these articles are divided into eight subject categories. This year's highlights include RNAcentral, an international community portal to various databases on noncoding RNA; ValidatorDB, a validation database for protein structures and their ligands; SASBDB, a primary repository for small-angle scattering data of various macromolecular complexes; MoonProt, a database of 'moonlighting' proteins, and two new databases of protein-protein and other macromolecular complexes, ComPPI and the Complex Portal. This issue also includes an unusually high number of cancer-related databases and other databases dedicated to genomic basics of disease and potential drugs and drug targets. The size of NAR online Molecular Biology Database Collection, http://www.oxfordjournals.org/nar/database/a/, remained approximately the same, following the addition of 74 new resources and removal of 77 obsolete web sites. The entire Database Issue is freely available online on the Nucleic Acids Research web site (http://nar.oxfordjournals.org/).",RNAcentral,0.746270657,Molecular Biology Database Collection,0.612197742,RNAcentral,0.746270657,1,"21177655.0, 21177655.0, 24316579.0","25352543.0, 27794554.0, 30395267.0, 33106848.0",low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,1/1/2015 +22700939,http://www.rnaiatlas.ethz.ch,"RNAiAtlas: a database for RNAi (siRNA) libraries and their specificity. Large-scale RNA interference (RNAi) experiments, especially the ones based on short-interfering RNA (siRNA) technology became increasingly popular over the past years. For such knock-down/screening purposes, different companies offer sets of oligos/reagents targeting the whole genome or a subset of it for various organisms. Obviously, the sequence (and structure) of the corresponding oligos is a key factor in obtaining reliable results in these large-scale studies and the companies use a variety of (often not fully public) algorithms to design them. Nevertheless, as the genome annotations are still continuously changing, oligos may become obsolete, so siRNA reagents should be periodically re-annotated according to the latest version of the sequence database (which of course has serious consequences also on the interpretation of the screening results). In our article, we would like to introduce a new software/database tool, the RNAiAtlas. It has been created for exploration, analysis and distribution of large scale RNAi libraries (currently limited to the human genome) with their latest annotation (including former history) but in addition it contains also specific on-target analysis results (design quality, side effects, off-targets). Database URL: http://www.rnaiatlas.ethz.ch.",RNAiAtlas,0.844358683,NA,0,RNAiAtlas,0.844358683,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/14/2012 +22411954,http://rnaimmuno.ibch.poznan.pl,"RNAimmuno: a database of the nonspecific immunological effects of RNA interference and microRNA reagents. The RNAimmuno database was created to provide easy access to information regarding the nonspecific effects generated in cells by RNA interference triggers and microRNA regulators. Various RNAi and microRNA reagents, which differ in length and structure, often cause non-sequence-specific immune responses, in addition to triggering the intended sequence-specific effects. The activation of the cellular sensors of foreign RNA or DNA may lead to the induction of type I interferon and proinflammatory cytokine release. Subsequent changes in the cellular transcriptome and proteome may result in adverse effects, including cell death during therapeutic treatments or the misinterpretation of experimental results in research applications. The manually curated RNAimmuno database gathers the majority of the published data regarding the immunological side effects that are caused in investigated cell lines, tissues, and model organisms by different reagents. The database is accessible at http://rnaimmuno.ibch.poznan.pl and may be helpful in the further application and development of RNAi- and microRNA-based technologies.",RNAimmuno,0.986062527,NA,0,RNAimmuno,0.986062527,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/12/2012 +31906603,"http://www.rna-society.org/rnainter/, http://www.rna-society.org/raid","RNAInter in 2020: RNA interactome repository with increased coverage and annotation. Research on RNA-associated interactions has exploded in recent years, and increasing numbers of studies are not limited to RNA-RNA and RNA-protein interactions but also include RNA-DNA/compound interactions. To facilitate the development of the interactome and promote understanding of the biological functions and molecular mechanisms of RNA, we updated RAID v2.0 to RNAInter (RNA Interactome Database), a repository for RNA-associated interactions that is freely accessible at http://www.rna-society.org/rnainter/ or http://www.rna-society.org/raid/. Compared to RAID v2.0, new features in RNAInter include (i) 8-fold more interaction data and 94 additional species; (ii) more definite annotations organized, including RNA editing/localization/modification/structure and homology interaction; (iii) advanced functions including fuzzy/batch search, interaction network and RNA dynamic expression and (iv) four embedded RNA interactome tools: RIscoper, IntaRNA, PRIdictor and DeepBind. Consequently, RNAInter contains >41 million RNA-associated interaction entries, involving more than 450 thousand unique molecules, including RNA, protein, DNA and compound. Overall, RNAInter provides a comprehensive RNA interactome resource for researchers and paves the way to investigate the regulatory landscape of cellular RNAs.",RNAInter,0.994603395,NA,0,RNAInter,0.994603395,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +27543076,http://www.rna-society.org/rnalocate,"RNALocate: a resource for RNA subcellular localizations. Increasing evidence has revealed that RNA subcellular localization is a very important feature for deeply understanding RNA's biological functions after being transported into intra- or extra-cellular regions. RNALocate is a web-accessible database that aims to provide a high-quality RNA subcellular localization resource and facilitate future researches on RNA function or structure. The current version of RNALocate documents more than 37 700 manually curated RNA subcellular localization entries with experimental evidence, involving more than 21 800 RNAs with 42 subcellular localizations in 65 species, mainly including Homo sapiens, Mus musculus and Saccharomyces cerevisiae etc. Besides, RNA homology, sequence and interaction data have also been integrated into RNALocate. Users can access these data through online search, browse, blast and visualization tools. In conclusion, RNALocate will be of help in elucidating the entirety of RNA subcellular localization, and developing new prediction methods. The database is available at http://www.rna-society.org/rnalocate/.",RNALocate,0.997115135,NA,0,RNALocate,0.997115135,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/19/2016 +23155061,http://iimcb.genesilico.pl/rnapathwaysdb,"RNApathwaysDB--a database of RNA maturation and decay pathways. Many RNA molecules undergo complex maturation, involving e.g. excision from primary transcripts, removal of introns, post-transcriptional modification and polyadenylation. The level of mature, functional RNAs in the cell is controlled not only by the synthesis and maturation but also by degradation, which proceeds via many different routes. The systematization of data about RNA metabolic pathways and enzymes taking part in RNA maturation and degradation is essential for the full understanding of these processes. RNApathwaysDB, available online at http://iimcb.genesilico.pl/rnapathwaysdb, is an online resource about maturation and decay pathways involving RNA as the substrate. The current release presents information about reactions and enzymes that take part in the maturation and degradation of tRNA, rRNA and mRNA, and describes pathways in three model organisms: Escherichia coli, Saccharomyces cerevisiae and Homo sapiens. RNApathwaysDB can be queried with keywords, and sequences of protein enzymes involved in RNA processing can be searched with BLAST. Options for data presentation include pathway graphs and tables with enzymes and literature data. Structures of macromolecular complexes involving RNA and proteins that act on it are presented as 'potato models' using DrawBioPath-a new javascript tool.",RNApathwaysDB,0.971205235,NA,0,RNApathwaysDB,0.971205235,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/15/2012 +29069520,http://iimcb.genesilico.pl/RNArchitecture,"RNArchitecture: a database and a classification system of RNA families, with a focus on structural information. RNArchitecture is a database that provides a comprehensive description of relationships between known families of structured non-coding RNAs, with a focus on structural similarities. The classification is hierarchical and similar to the system used in the SCOP and CATH databases of protein structures. Its central level is Family, which builds on the Rfam catalog and gathers closely related RNAs. Consensus structures of Families are described with a reduced secondary structure representation. Evolutionarily related Families are grouped into Superfamilies. Similar structures are further grouped into Architectures. The highest level, Class, organizes families into very broad structural categories, such as simple or complex structured RNAs. Some groups at different levels of the hierarchy are currently labeled as 'unclassified'. The classification is expected to evolve as new data become available. For each Family with an experimentally determined three-diemsional (3D) structure(s), a representative one is provided. RNArchitecture also presents theoretical models of RNA 3D structure and is open for submission of structural models by users. Compared to other databases, RNArchitecture is unique in its focus on structure-based RNA classification, and in providing a platform for storing RNA 3D structure predictions. RNArchitecture can be accessed at http://iimcb.genesilico.pl/RNArchitecture/.",RNArchitecture,0.993474245,NA,0,RNArchitecture,0.993474245,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +26323714,http://rnaseqmetadb.ece.tamu.edu,"RNASeqMetaDB: a database and web server for navigating metadata of publicly available mouse RNA-Seq datasets. Unlabelled Gene targeting is a protocol for introducing a mutation to a specific gene in an organism. Because of the importance of in vivo assessment of gene function and modeling of human diseases, this technique has been widely adopted to generate a large number of mutant mouse models. Due to the recent breakthroughs in high-throughput sequencing technologies, RNA-Seq experiments have been performed on many of these mouse models, leading to hundreds of publicly available datasets. To facilitate the reuse of these datasets, we collected the associated metadata and organized them in a database called RNASeqMetaDB. The metadata were manually curated to ensure annotation consistency. We developed a web server to allow easy database navigation and data querying. Users can search the database using multiple parameters like genes, diseases, tissue types, keywords and associated publications in order to find datasets that match their interests. Summary statistics of the metadata are also presented on the web server showing interesting global patterns of RNA-Seq studies. Availability and implementation Freely available on the web at http://rnaseqmetadb.ece.tamu.edu.",RNASeqMetaDB,0.962606609,NA,0,RNASeqMetaDB,0.962606609,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/30/2015 +32608478,http://rnawre.bio2db.com,"RNAWRE: a resource of writers, readers and erasers of RNA modifications. . RNA modifications are involved in various kinds of cellular biological processes. Accumulated evidences have demonstrated that the functions of RNA modifications are determined by the effectors that can catalyze, recognize and remove RNA modifications. They are called 'writers', 'readers' and 'erasers'. The identification of RNA modification effectors will be helpful for understanding the regulatory mechanisms and biological functions of RNA modifications. In this work, we developed a database called RNAWRE that specially deposits RNA modification effectors. The current version of RNAWRE stored 2045 manually curated writers, readers and erasers for the six major kinds of RNA modifications, namely Cap, m1A, m6A, m5C, ψ and Poly A. The main modules of RNAWRE not only allow browsing and downloading the RNA modification effectors but also support the BLAST search of the potential RNA modification effectors in other species. We hope that RNAWRE will be helpful for the researches on RNA modifications. Database URL: http://rnawre.bio2db.com.",RNAWRE,0.990312815,NA,0,RNAWRE,0.990312815,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +24917120,http://rth.dk/resources/rnannotator/susscr102/version1.02,"Structured RNAs and synteny regions in the pig genome. Background Annotating mammalian genomes for noncoding RNAs (ncRNAs) is nontrivial since far from all ncRNAs are known and the computational models are resource demanding. Currently, the human genome holds the best mammalian ncRNA annotation, a result of numerous efforts by several groups. However, a more direct strategy is desired for the increasing number of sequenced mammalian genomes of which some, such as the pig, are relevant as disease models and production animals. Results We present a comprehensive annotation of structured RNAs in the pig genome. Combining sequence and structure similarity search as well as class specific methods, we obtained a conservative set with a total of 3,391 structured RNA loci of which 1,011 and 2,314, respectively, hold strong sequence and structure similarity to structured RNAs in existing databases. The RNA loci cover 139 cis-regulatory element loci, 58 lncRNA loci, 11 conflicts of annotation, and 3,183 ncRNA genes. The ncRNA genes comprise 359 miRNAs, 8 ribozymes, 185 rRNAs, 638 snoRNAs, 1,030 snRNAs, 810 tRNAs and 153 ncRNA genes not belonging to the here fore mentioned classes. When running the pipeline on a local shuffled version of the genome, we obtained no matches at the highest confidence level. Additional analysis of RNA-seq data from a pooled library from 10 different pig tissues added another 165 miRNA loci, yielding an overall annotation of 3,556 structured RNA loci. This annotation represents our best effort at making an automated annotation. To further enhance the reliability, 571 of the 3,556 structured RNAs were manually curated by methods depending on the RNA class while 1,581 were declared as pseudogenes. We further created a multiple alignment of pig against 20 representative vertebrates, from which RNAz predicted 83,859 de novo RNA loci with conserved RNA structures. 528 of the RNAz predictions overlapped with the homology based annotation or novel miRNAs. We further present a substantial synteny analysis which includes 1,004 lineage specific de novo RNA loci and 4 ncRNA loci in the known annotation specific for Laurasiatheria (pig, cow, dolphin, horse, cat, dog, hedgehog). Conclusions We have obtained one of the most comprehensive annotations for structured ncRNAs of a mammalian genome, which is likely to play central roles in both health modelling and production. The core annotation is available in Ensembl 70 and the complete annotation is available at http://rth.dk/resources/rnannotator/susscr102/version1.02.",RNAz,0.746822834,NA,0,RNAz,0.746822834,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME; no name in abstract,NA,NA,6/10/2014 +33046717,http://lod.proconsortium.org,"Protein ontology on the semantic web for knowledge discovery. The Protein Ontology (PRO) provides an ontological representation of protein-related entities, ranging from protein families to proteoforms to complexes. Protein Ontology Linked Open Data (LOD) exposes, shares, and connects knowledge about protein-related entities on the Semantic Web using Resource Description Framework (RDF), thus enabling integration with other Linked Open Data for biological knowledge discovery. For example, proteins (or variants thereof) can be retrieved on the basis of specific disease associations. As a community resource, we strive to follow the Findability, Accessibility, Interoperability, and Reusability (FAIR) principles, disseminate regular updates of our data, support multiple methods for accessing, querying and downloading data in various formats, and provide documentation both for scientists and programmers. PRO Linked Open Data can be browsed via faceted browser interface and queried using SPARQL via YASGUI. RDF data dumps are also available for download. Additionally, we developed RESTful APIs to support programmatic data access. We also provide W3C HCLS specification compliant metadata description for our data. The PRO Linked Open Data is available at https://lod.proconsortium.org/ .",RO,0.523091912,NA,0,RO,0.523091912,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/12/2020 +27940610,"http://cgm.sjtu.edu.cn/3kricedb/, http://www.rmbreeding.cn/pan3k","RPAN: rice pan-genome browser for ∼3000 rice genomes. A pan-genome is the union of the gene sets of all the individuals of a clade or a species and it provides a new dimension of genome complexity with the presence/absence variations (PAVs) of genes among these genomes. With the progress of sequencing technologies, pan-genome study is becoming affordable for eukaryotes with large-sized genomes. The Asian cultivated rice, Oryza sativa L., is one of the major food sources for the world and a model organism in plant biology. Recently, the 3000 Rice Genome Project (3K RGP) sequenced more than 3000 rice genomes with a mean sequencing depth of 14.3×, which provided a tremendous resource for rice research. In this paper, we present a genome browser, Rice Pan-genome Browser (RPAN), as a tool to search and visualize the rice pan-genome derived from 3K RGP. RPAN contains a database of the basic information of 3010 rice accessions, including genomic sequences, gene annotations, PAV information and gene expression data of the rice pan-genome. At least 12 000 novel genes absent in the reference genome were included. RPAN also provides multiple search and visualization functions. RPAN can be a rich resource for rice biology and rice breeding. It is available at http://cgm.sjtu.edu.cn/3kricedb/ or http://www.rmbreeding.cn/pan3k.",RPAN,0.977469802,Rice Pan-genome Browser,0.900623749,RPAN,0.977469802,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/10/2016 +26026167,http://bioinformatics.ustc.edu.cn/rpdb,"RPdb: a database of experimentally verified cellular reprogramming records. Unlabelled Many cell lines can be reprogrammed to other cell lines by forced expression of a few transcription factors or by specifically designed culture methods, which have attracted a great interest in the field of regenerative medicine and stem cell research. Plenty of cell lines have been used to generate induced pluripotent stem cells (IPSCs) by expressing a group of genes and microRNAs. These IPSCs can differentiate into somatic cells to promote tissue regeneration. Similarly, many somatic cells can be directly reprogrammed to other cells without a stem cell state. All these findings are helpful in searching for new reprogramming methods and understanding the biological mechanism inside. However, to the best of our knowledge, there is still no database dedicated to integrating the reprogramming records. We built RPdb (cellular reprogramming database) to collect cellular reprogramming information and make it easy to access. All entries in RPdb are manually extracted from more than 2000 published articles, which is helpful for researchers in regenerative medicine and cell biology. Availability and implementation RPdb is freely available on the web at http://bioinformatics.ustc.edu.cn/rpdb with all major browsers supported. Contact aoli@ustc.edu.cn Supplementary information Supplementary data are available at Bioinformatics online.",RPdb,0.979530334,reprogramming database,0.584708124,RPdb,0.979530334,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/29/2015 +26433228,http://sysbio.sysu.edu.cn/rpfdb/index.html,"RPFdb: a database for genome wide information of translated mRNA generated from ribosome profiling. Translational control is crucial in the regulation of gene expression and deregulation of translation is associated with a wide range of cancers and human diseases. Ribosome profiling is a technique that provides genome wide information of mRNA in translation based on deep sequencing of ribosome protected mRNA fragments (RPF). RPFdb is a comprehensive resource for hosting, analyzing and visualizing RPF data, available at www.rpfdb.org or http://sysbio.sysu.edu.cn/rpfdb/index.html. The current version of database contains 777 samples from 82 studies in 8 species, processed and reanalyzed by a unified pipeline. There are two ways to query the database: by keywords of studies or by genes. The outputs are presented in three levels. (i) Study level: including meta information of studies and reprocessed data for gene expression of translated mRNAs; (ii) Sample level: including global perspective of translated mRNA and a list of the most translated mRNA of each sample from a study; (iii) Gene level: including normalized sequence counts of translated mRNA on different genomic location of a gene from multiple samples and studies. To explore rich information provided by RPF, RPFdb also provides a genome browser to query and visualize context-specific translated mRNA. Overall our database provides a simple way to search, analyze, compare, visualize and download RPF data sets.",RPFdb,0.996379256,NA,0,RPFdb,0.996379256,1,NA,30335166,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,10/3/2015 +30335166,"http://www.rpfdb.org, http://sysbio.sysu.edu.cn/rpfdb","RPFdb v2.0: an updated database for genome-wide information of translated mRNA generated from ribosome profiling. RPFdb (http://www.rpfdb.org or http://sysbio.sysu.edu.cn/rpfdb) is a public database for hosting, analyzing and visualizing ribosome profiling (ribo-seq) data. Since its initial release in 2015, the amount of new ribo-seq data has been considerably enlarged with the increasing popularity of ribo-seq technique. Here, we describe an updated version, RPFdb v2.0, which brings significant data expansion, feature improvements, and functionality optimization: (i) RPFdb v2.0 currently hosts 2884 ribo-seq datasets from 293 studies, covering 29 different species, in comparison with 777 datasets from 82 studies and 8 species in the previous version; (ii) A refined analysis pipeline with multi-step quality controls has been applied to improve the pre-processing and alignment of ribo-seq data; (iii) New functional modules have been added to provide actively translated open reading frames (ORFs) information for each ribo-seq data; (iv) More features have been made available to increase database usability. With these additions and enhancements, RPFdb v2.0 will represent a more valuable and comprehensive database for the gene regulation community.",RPFdb,0.996272862,NA,0,RPFdb,0.996272862,1,NA,26433228,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/1/2019 +34496744,http://zhaoserver.com.cn/RPocket/RPocket.html,"RPocket: an intuitive database of RNA pocket topology information with RNA-ligand data resources. Background RNA regulates a variety of biological functions by interacting with other molecules. The ligand often binds in the RNA pocket to trigger structural changes or functions. Thus, it is essential to explore and visualize the RNA pocket to elucidate the structural and recognition mechanism for the RNA-ligand complex formation. Results In this work, we developed one user-friendly bioinformatics tool, RPocket. This database provides geometrical size, centroid, shape, secondary structure element for RNA pocket, RNA-ligand interaction information, and functional sites. We extracted 240 RNA pockets from 94 non-redundant RNA-ligand complex structures. We developed RPDescriptor to calculate the pocket geometrical property quantitatively. The geometrical information was then subjected to RNA-ligand binding analysis by incorporating the sequence, secondary structure, and geometrical combinations. This new approach takes advantage of both the atom-level precision of the structure and the nucleotide-level tertiary interactions. The results show that the higher-level topological pattern indeed improves the tertiary structure prediction. We also proposed a potential mechanism for RNA-ligand complex formation. The electrostatic interactions are responsible for long-range recognition, while the Van der Waals and hydrophobic contacts for short-range binding and optimization. These interaction pairs can be considered as distance constraints to guide complex structural modeling and drug design. Conclusion RPocket database would facilitate RNA-ligand engineering to regulate the complex formation for biological or medical applications. RPocket is available at http://zhaoserver.com.cn/RPocket/RPocket.html .",RPocket,0.952259183,NA,0,RPocket,0.952259183,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/8/2021 +29028888,http://huanglab.phys.hust.edu.cn/RRDbenchmark,"RRDB: a comprehensive and non-redundant benchmark for RNA-RNA docking and scoring. Motivation:With the discovery of more and more noncoding RNAs and their versatile functions, RNA-RNA interactions have received increased attention. Therefore, determination of their complex structures is valuable to understand the molecular mechanism of the interactions. Given the high cost of experimental methods, computational approaches like molecular docking have played an important role in the determination of complex structures, in which a benchmark is critical for the development of docking algorithms. Results:Meeting the need, we have developed the first comprehensive and nonredundant RNA-RNA docking benchmark (RRDB). The diverse dataset of 123 targets consists of 78 unbound-unbound and 45 bound-unbound (or unbound-bound) test cases. The dataset was classified into three groups according to the interface conformational changes between bound and unbound structures: 47 'easy', 38 'medium' and 38 'difficult' targets. A docking test with the benchmark using ZDOCK 2.1 demonstrated the challenging nature of the RNA-RNA docking problem and the important value of the present benchmark. The bound and unbound cases of the benchmark will be beneficial for the development and optimization of docking and scoring algorithms for RNA-RNA interactions. Availability and implementation:The benchmark is available at http://huanglab.phys.hust.edu.cn/RRDbenchmark/. Contact:huangsy@hust.edu.cn. Supplementary information:Supplementary data are available at Bioinformatics online.",RRDB,0.973243475,RNA-RNA docking benchmark,0.873065448,RRDB,0.973243475,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/1/2018 +25414355,http://rrndb.umms.med.umich.edu,"rrnDB: improved tools for interpreting rRNA gene abundance in bacteria and archaea and a new foundation for future development. Microbiologists utilize ribosomal RNA genes as molecular markers of taxonomy in surveys of microbial communities. rRNA genes are often co-located as part of an rrn operon, and multiple copies of this operon are present in genomes across the microbial tree of life. rrn copy number variability provides valuable insight into microbial life history, but introduces systematic bias when measuring community composition in molecular surveys. Here we present an update to the ribosomal RNA operon copy number database (rrnDB), a publicly available, curated resource for copy number information for bacteria and archaea. The redesigned rrnDB (http://rrndb.umms.med.umich.edu/) brings a substantial increase in the number of genomes described, improved curation, mapping of genomes to both NCBI and RDP taxonomies, and refined tools for querying and analyzing these data. With these changes, the rrnDB is better positioned to remain a comprehensive resource under the torrent of microbial genome sequencing. The enhanced rrnDB will contribute to the analysis of molecular surveys and to research linking genomic characteristics to life history.",rrnDB,0.994640529,ribosomal RNA operon copy number database,0.776851568,rrnDB,0.994640529,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2014 +24285297,http://rsnp.psych.ac.cn,"rSNPBase: a database for curated regulatory SNPs. In recent years, human regulatory SNPs (rSNPs) have been widely studied. Here, we present database rSNPBase, freely available at http://rsnp.psych.ac.cn/, to provide curated rSNPs that analyses the regulatory features of all SNPs in the human genome with reference to experimentally supported regulatory elements. In contrast with previous SNP functional annotation databases, rSNPBase is characterized by several unique features. (i) To improve reliability, all SNPs in rSNPBase are annotated with reference to experimentally supported regulatory elements. (ii) rSNPBase focuses on rSNPs involved in a wide range of regulation types, including proximal and distal transcriptional regulation and post-transcriptional regulation, and identifies their potentially regulated genes. (iii) Linkage disequilibrium (LD) correlations between SNPs were analysed so that the regulatory feature is annotated to SNP-set rather than a single SNP. (iv) rSNPBase provides the spatio-temporal labels and experimental eQTL labels for SNPs. In summary, rSNPBase provides more reliable, comprehensive and user-friendly regulatory annotations on rSNPs and will assist researchers in selecting candidate SNPs for further genetic studies and in exploring causal SNPs for in-depth molecular mechanisms of complex phenotypes.",rSNPBase,0.998359084,NA,0,rSNPBase,0.998359084,1,NA,29140525,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,11/26/2013 +29140525,http://rsnp3.psych.ac.cn,"rSNPBase 3.0: an updated database of SNP-related regulatory elements, element-gene pairs and SNP-based gene regulatory networks. Here, we present the updated rSNPBase 3.0 database (http://rsnp3.psych.ac.cn), which provides human SNP-related regulatory elements, element-gene pairs and SNP-based regulatory networks. This database is the updated version of the SNP regulatory annotation database rSNPBase and rVarBase. In comparison to the last two versions, there are both structural and data adjustments in rSNPBase 3.0: (i) The most significant new feature is the expansion of analysis scope from SNP-related regulatory elements to include regulatory element-target gene pairs (E-G pairs), therefore it can provide SNP-based gene regulatory networks. (ii) Web function was modified according to data content and a new network search module is provided in the rSNPBase 3.0 in addition to the previous regulatory SNP (rSNP) search module. The two search modules support data query for detailed information (related-elements, element-gene pairs, and other extended annotations) on specific SNPs and SNP-related graphic networks constructed by interacting transcription factors (TFs), miRNAs and genes. (3) The type of regulatory elements was modified and enriched. To our best knowledge, the updated rSNPBase 3.0 is the first data tool supports SNP functional analysis from a regulatory network prospective, it will provide both a comprehensive understanding and concrete guidance for SNP-related regulatory studies.",rSNPBase,0.997623026,NA,0,rSNPBase,0.997623026,1,NA,24285297,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,1/1/2018 +31872320,http://bioinformatics.fafu.edu.cn/RSRS,"Rice Stress-Resistant SNP Database. Background Rice (Oryza sativa L.) yield is limited inherently by environmental stresses, including biotic and abiotic stresses. Thus, it is of great importance to perform in-depth explorations on the genes that are closely associated with the stress-resistant traits in rice. The existing rice SNP databases have made considerable contributions to rice genomic variation information but none of them have a particular focus on integrating stress-resistant variation and related phenotype data into one web resource. Results Rice Stress-Resistant SNP database (http://bioinformatics.fafu.edu.cn/RSRS) mainly focuses on SNPs specific to biotic and abiotic stress-resistant ability in rice, and presents them in a unified web resource platform. The Rice Stress-Resistant SNP (RSRS) database contains over 9.5 million stress-resistant SNPs and 797 stress-resistant candidate genes in rice, which were detected from more than 400 stress-resistant rice varieties. We incorporated the SNPs function, genome annotation and phenotype information into this database. Besides, the database has a user-friendly web interface for users to query, browse and visualize a specific SNP efficiently. RSRS database allows users to query the SNP information and their relevant annotations for individual variety or more varieties. The search results can be visualized graphically in a genome browser or displayed in formatted tables. Users can also align SNPs between two or more rice accessions. Conclusion RSRS database shows great utility for scientists to further characterize the function of variants related to environmental stress-resistant ability in rice.",RSRS,0.890815914,Rice Stress-Resistant SNP database,0.872037998,RSRS,0.890815914,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/23/2019 +32382747,http://taolab.nwafu.edu.cn/rsvdb,"RSVdb: a comprehensive database of transcriptome RNA structure. . RNA fulfills a crucial regulatory role in cells by folding into a complex RNA structure. To date, a chemical compound, dimethyl sulfate (DMS), has been developed to probe the RNA structure at the transcriptome level effectively. We proposed a database, RSVdb (https://taolab.nwafu.edu.cn/rsvdb/), for the browsing and visualization of transcriptome RNA structures. RSVdb, including 626 225 RNAs with validated DMS reactivity from 178 samples in eight species, supports four main functions: information retrieval, research overview, structure prediction and resource download. Users can search for species, studies, transcripts and genes of interest; browse the quality control of sequencing data and statistical charts of RNA structure information; preview and perform online prediction of RNA structures in silico and under DMS restraint of different experimental treatments and download RNA structure data for species and studies. Together, RSVdb provides a reference for RNA structure and will support future research on the function of RNA structure at the transcriptome level.",RSVdb,0.997209489,NA,0,RSVdb,0.997209489,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2021 +29155231,http://tools.ibg.deu.edu.tr/rtfa,"RTFAdb: A database of computationally predicted associations between retrotransposons and transcription factors in the human and mouse genomes. In recent years, retrotransposons have gained increasing attention as a source of binding motifs for transcription factors (TFs). Despite the substantial roles of these mobile genetic elements in the regulation of gene expression, a comprehensive resource enabling the investigation of retrotransposon species that are bound by TFs is still lacking. Herein, I introduce for the first time a novel database called RTFAdb, which allows exploring computationally predicted associations between retrotransposons and TFs in diverse cell lines and tissues of human and mouse. My database, using over 3.000 TF ChIP-seq binding profiles collected from human and mouse samples, makes possible searching more than 1.500 retrotransposon species in the binding sites of a total of 596 TFs. RTFAdb is freely available at http://tools.ibg.deu.edu.tr/rtfa/ and has the potential to offer novel insights into mammalian transcriptional networks by providing an additional layer of information regarding the regulatory roles of retrotransposons.",RTFAdb,0.981455028,NA,0,RTFAdb,0.981455028,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2017 +26503253,http://rv.psych.ac.cn,"rVarBase: an updated database for regulatory features of human variants. We present here the rVarBase database (http://rv.psych.ac.cn), an updated version of the rSNPBase database, to provide reliable and detailed regulatory annotations for known and novel human variants. This update expands the database to include additional types of human variants, such as copy number variations (CNVs) and novel variants, and include additional types of regulatory features. Now rVarBase annotates variants in three dimensions: chromatin states of the surrounding regions, overlapped regulatory elements and variants' potential target genes. Two new types of regulatory elements (lncRNAs and miRNA target sites) have been introduced to provide additional annotation. Detailed information about variants' overlapping transcription factor binding sites (TFBSs) (often less than 15 bp) within experimentally supported TF-binding regions (∼ 150 bp) is provided, along with the binding motifs of matched TF families. Additional types of extended variants and variant-associated phenotypes were also added. In addition to the enrichment in data content, an element-centric search module was added, and the web interface was refined. In summary, rVarBase hosts more types of human variants and includes more types of up-to-date regulatory information to facilitate in-depth functional research and to provide practical clues for experimental design.",rVarBase,0.998465121,NA,0,rVarBase,0.998465121,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/25/2015 +26746786,http://rvs.u.hpc.mssm.edu,"Integrating 400 million variants from 80,000 human samples with extensive annotations: towards a knowledge base to analyze disease cohorts. Background Data from a plethora of high-throughput sequencing studies is readily available to researchers, providing genetic variants detected in a variety of healthy and disease populations. While each individual cohort helps gain insights into polymorphic and disease-associated variants, a joint perspective can be more powerful in identifying polymorphisms, rare variants, disease-associations, genetic burden, somatic variants, and disease mechanisms. Description We have set up a Reference Variant Store (RVS) containing variants observed in a number of large-scale sequencing efforts, such as 1000 Genomes, ExAC, Scripps Wellderly, UK10K; various genotyping studies; and disease association databases. RVS holds extensive annotations pertaining to affected genes, functional impacts, disease associations, and population frequencies. RVS currently stores 400 million distinct variants observed in more than 80,000 human samples. Conclusions RVS facilitates cross-study analysis to discover novel genetic risk factors, gene-disease associations, potential disease mechanisms, and actionable variants. Due to its large reference populations, RVS can also be employed for variant filtration and gene prioritization. Availability A web interface to public datasets and annotations in RVS is available at https://rvs.u.hpc.mssm.edu/.",RVS,0.919026732,NA,0,RVS,0.919026732,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/8/2016 +22099701,http://bioinfo.cis.nctu.edu.tw/samotifbase,"Structural alphabet motif discovery and a structural motif database. This study proposes a general framework for structural motif discovery. The framework is based on a modular design in which the system components can be modified or replaced independently to increase its applicability to various studies. It is a two-stage approach that first converts protein 3D structures into structural alphabet sequences, and then applies a sequence motif-finding tool to these sequences to detect conserved motifs. We named the structural motif database we built the SA-Motifbase, which provides the structural information conserved at different hierarchical levels in SCOP. For each motif, SA-Motifbase presents its 3D view; alphabet letter preference; alphabet letter frequency distribution; and the significance. SA-Motifbase is available at http://bioinfo.cis.nctu.edu.tw/samotifbase/.",SA-Motifbase,0.899282444,NA,0,SA-Motifbase,0.899282444,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/17/2011 +24214988,http://opig.stats.ox.ac.uk/webapps/sabdab,"SAbDab: the structural antibody database. Structural antibody database (SAbDab; http://opig.stats.ox.ac.uk/webapps/sabdab) is an online resource containing all the publicly available antibody structures annotated and presented in a consistent fashion. The data are annotated with several properties including experimental information, gene details, correct heavy and light chain pairings, antigen details and, where available, antibody-antigen binding affinity. The user can select structures, according to these attributes as well as structural properties such as complementarity determining region loop conformation and variable domain orientation. Individual structures, datasets and the complete database can be downloaded.",SAbDab,0.997705543,Structural antibody database,0.975666851,SAbDab,0.997705543,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2013 +22102587,http://sabio.h-its.org,"SABIO-RK--database for biochemical reaction kinetics. SABIO-RK (http://sabio.h-its.org/) is a web-accessible database storing comprehensive information about biochemical reactions and their kinetic properties. SABIO-RK offers standardized data manually extracted from the literature and data directly submitted from lab experiments. The database content includes kinetic parameters in relation to biochemical reactions and their biological sources with no restriction on any particular set of organisms. Additionally, kinetic rate laws and corresponding equations as well as experimental conditions are represented. All the data are manually curated and annotated by biological experts, supported by automated consistency checks. SABIO-RK can be accessed via web-based user interfaces or automatically via web services that allow direct data access by other tools. Both interfaces support the export of the data together with its annotations in SBML (Systems Biology Markup Language), e.g. for import in modelling tools.",SABIO-RK,0.997967561,NA,0,SABIO-RK,0.997967561,1,NA,29092055,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/18/2011 +29092055,http://sabiork.h-its.org,"SABIO-RK: an updated resource for manually curated biochemical reaction kinetics. SABIO-RK (http://sabiork.h-its.org/) is a manually curated database containing data about biochemical reactions and their reaction kinetics. The data are primarily extracted from scientific literature and stored in a relational database. The content comprises both naturally occurring and alternatively measured biochemical reactions and is not restricted to any organism class. The data are made available to the public by a web-based search interface and by web services for programmatic access. In this update we describe major improvements and extensions of SABIO-RK since our last publication in the database issue of Nucleic Acid Research (2012). (i) The website has been completely revised and (ii) allows now also free text search for kinetics data. (iii) Additional interlinkages with other databases in our field have been established; this enables users to gain directly comprehensive knowledge about the properties of enzymes and kinetics beyond SABIO-RK. (iv) Vice versa, direct access to SABIO-RK data has been implemented in several systems biology tools and workflows. (v) On request of our experimental users, the data can be exported now additionally in spreadsheet formats. (vi) The newly established SABIO-RK Curation Service allows to respond to specific data requirements.",SABIO-RK,0.994631102,NA,0,SABIO-RK,0.994631102,1,NA,22102587,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2018 +24323624,http://sabre.epd.brc.riken.jp/SABRE2.html,"SABRE2: a database connecting plant EST/full-length cDNA clones with Arabidopsis information. The SABRE (Systematic consolidation of Arabidopsis and other Botanical REsources) database cross-searches plant genetic resources through publicly available Arabidopsis information. In SABRE, plant expressed sequence tag (EST)/cDNA clones are related to TAIR (The Arabidoposis Information Resource) gene models and their annotations through sequence similarity. By entering a keyword, SABRE searches and retrieves TAIR gene models and annotations, together with homologous gene clones from various plant species. SABRE thus facilitates using TAIR annotations of Arabidopsis genes for research on homologous genes from other model plants. To expand the application range of SABRE to crop breeding, we have recently upgraded SABRE to SABRE2 (http://sabre.epd.brc.riken.jp/SABRE2.html), by newly adding six model plants (including the major crops barley, soybean, tomato and wheat), and by improving the retrieval interface. The present version has integrated information on >1.5 million plant EST/cDNA clones from the National BioResource Project (NBRP) of Japan. All clones are actual experimental resources from 14 plant species (Arabidoposis, barley, cassava, Chinese cabbage, lotus, morning glory, poplar, Physcomitrella patens, Striga hermonthica, soybean, Thellungiella halophila, tobacco, tomato and wheat), and are available from the core facilities of the NBRP. SABRE2 is thus a useful tool that can contribute towards the improvement of important crop breeds by connecting basic research and crop breeding.",SABRE,0.995861411,Systematic consolidation of Arabidopsis and,0.888674723,SABRE,0.995861411,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/9/2013 +29774137,http://teeqrani1.wixsite.com/sapd,"Saudi anti-human cancer plants database (SACPD): A collection of plants with anti-human cancer activities. Several anticancer drugs have been developed from natural products such as plants. Successful experiments in inhibiting the growth of human cancer cell lines using Saudi plants were published over the last three decades. Up to date, there is no Saudi anticancer plants database as a comprehensive source for the interesting data generated from these experiments. Therefore, there was a need for creating a database to collect, organize, search and retrieve such data. As a result, the current paper describes the generation of the Saudi anti-human cancer plants database (SACPD). The database contains most of the reported information about the naturally growing Saudi anticancer plants. SACPD comprises the scientific and local names of 91 plant species that grow naturally in Saudi Arabia. These species belong to 38 different taxonomic families. In Addition, 18 species that represent16 family of medicinal plants and are intensively sold in the local markets in Saudi Arabia were added to the database. The website provides interesting details, including plant part containing the anticancer bioactive compounds, plants locations and cancer/cell type against which they exhibit their anticancer activity. Our survey revealed that breast, liver and leukemia were the most studied cancer cell lines in Saudi Arabia with percentages of 27%, 19% and 15%, respectively. The current SACPD represents a nucleus around which more development efforts can expand to accommodate all future submissions about new Saudi plant species with anticancer activities. SACPD will provide an excellent starting point for researchers and pharmaceutical companies who are interested in developing new anticancer drugs. SACPD is available online at https://teeqrani1.wixsite.com/sapd.",SACPD,0.991069496,Saudi anti-human cancer plants database,0.969169753,SACPD,0.991069496,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +30380119,http://bioinfo.life.hust.edu.cn/SAGD,"SAGD: a comprehensive sex-associated gene database from transcriptomes. Many animal species present sex differences. Sex-associated genes (SAGs), which have female-biased or male-biased expression, have major influences on the remarkable sex differences in important traits such as growth, reproduction, disease resistance and behaviors. However, the SAGs resulting in the vast majority of phenotypic sex differences are still unknown. To provide a useful resource for the functional study of SAGs, we manually curated public RNA-seq datasets with paired female and male biological replicates from the same condition and systematically re-analyzed the datasets using standardized methods. We identified 27,793 female-biased SAGs and 64,043 male-biased SAGs from 2,828 samples of 21 species, including human, chimpanzee, macaque, mouse, rat, cow, horse, chicken, zebrafish, seven fly species and five worm species. All these data were cataloged into SAGD, a user-friendly database of SAGs (http://bioinfo.life.hust.edu.cn/SAGD) where users can browse SAGs by gene, species, drug and dataset. In SAGD, the expression, annotation, targeting drugs, homologs, ontology and related RNA-seq datasets of SAGs are provided to help researchers to explore their functions and potential applications in agriculture and human health.",SAGD,0.992830694,NA,0,SAGD,0.992830694,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +32621601,http://sampgr.org.cn,"SAGER: a database of Symbiodiniaceae and Algal Genomic Resource. . Symbiodiniaceae dinoflagellates are essential endosymbionts of reef building corals and some other invertebrates. Information of their genome structure and function is critical for understanding coral symbiosis and bleaching. With the rapid development of sequencing technology, genome draft assemblies of several Symbiodiniaceae species and diverse marine algal genomes have become publicly available but spread in multiple separate locations. Here, we present a Symbiodiniaceae and Algal Genomic Resource Database (SAGER), a user-friendly online repository for integrating existing genomic data of Symbiodiniaceae species and diverse marine algal gene sets from MMETSP and PhyloDB databases. Relevant algal data are included to facilitate comparative analyses. The database is freely accessible at http://sampgr.org.cn. It provides comprehensive tools for studying gene function, expression and comparative genomics, including search tools to identify gene information from Symbiodiniaceae species, and BLAST tool to find orthologs from marine algae and protists. Moreover, SAGER integrates transcriptome datasets derived from diverse culture conditions of corresponding Symbiodiniaceae species. SAGER was developed with the capacity to incorporate future Symbiodiniaceae and algal genome and transcriptome data, and will serve as an open-access and sustained platform providing genomic and molecular tools that can be conveniently used to study Symbiodiniaceae and other marine algae. Database URL: http://sampgr.org.cn.",SAGER,0.995087028,Symbiodiniaceae and Algal Genomic Resource Database,0.986229761,SAGER,0.995087028,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25881271,http://me.lzu.edu.cn/stpd,"The salinity tolerant poplar database (STPD): a comprehensive database for studying tree salt-tolerant adaption and poplar genomics. Background Soil salinity is a significant factor that impairs plant growth and agricultural productivity, and numerous efforts are underway to enhance salt tolerance of economically important plants. Populus species are widely cultivated for diverse uses. Especially, they grow in different habitats, from salty soil to mesophytic environment, and are therefore used as a model genus for elucidating physiological and molecular mechanisms of stress tolerance in woody plants. Description The Salinity Tolerant Poplar Database (STPD) is an integrative database for salt-tolerant poplar genome biology. Currently the STPD contains Populus euphratica genome and its related genetic resources. P. euphratica, with a preference of the salty habitats, has become a valuable genetic resource for the exploitation of tolerance characteristics in trees. This database contains curated data including genomic sequence, genes and gene functional information, non-coding RNA sequences, transposable elements, simple sequence repeats and single nucleotide polymorphisms information of P. euphratica, gene expression data between P. euphratica and Populus tomentosa, and whole-genome alignments between Populus trichocarpa, P. euphratica and Salix suchowensis. The STPD provides useful searching and data mining tools, including GBrowse genome browser, BLAST servers and genome alignments viewer, which can be used to browse genome regions, identify similar sequences and visualize genome alignments. Datasets within the STPD can also be downloaded to perform local searches. Conclusions A new Salinity Tolerant Poplar Database has been developed to assist studies of salt tolerance in trees and poplar genomics. The database will be continuously updated to incorporate new genome-wide data of related poplar species. This database will serve as an infrastructure for researches on the molecular function of genes, comparative genomics, and evolution in closely related species as well as promote advances in molecular breeding within Populus. The STPD can be accessed at http://me.lzu.edu.cn/stpd/ .",STPD,0.778577745,Salinity Tolerant Poplar Database,0.957052559,Salinity Tolerant Poplar Database,0.957052559,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/17/2015 +28651544,http://www.salmobase.org,"SalmoBase: an integrated molecular data resource for Salmonid species. Background Salmonids are ray-finned fishes which constitute 11 genera and at least 70 species including Atlantic salmon, whitefishes, graylings, rainbow trout, and char. The common ancestor of all Salmonidae experienced a whole genome duplication (WGD) ~80 million years ago, resulting in an autotetraploid genome. Genomic rediplodization is still going on in salmonid species, providing an unique system for studying evolutionary consequences of whole genome duplication. In recent years, high quality genome sequences of Atlantic salmon and Rainbow trout has been established, due to their scientific and commercial values. In this paper we introduce SalmoBase ( http://www.salmobase.org/ ), a tool for making molecular resources for salmonids public available in a framework of visualizations and analytic tools. Results SalmoBase has been developed as a part of the ELIXIR.NO project. Currently, SalmoBase contains molecular resources for Atlantic salmon and Rainbow trout. Data can be accessed through BLAST, Genome Browser (GBrowse), Genetic Variation Browser (GVBrowse) and Gene Expression Browser (GEBrowse). Conclusions To the best of our knowledge, SalmoBase is the first database which integrates salmonids data and allow users to study salmonids in an integrated framework. The database and its tools (e.g., comparative genomics tools, synteny browsers) will be expanded as additional public resources describing other Salmonidae genomes become available.",SalmoBase,0.996645689,NA,0,SalmoBase,0.996645689,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/26/2017 +22120661,http://genomicasalmones.dim.uchile.cl,"SalmonDB: a bioinformatics resource for Salmo salar and Oncorhynchus mykiss. SalmonDB is a new multiorganism database containing EST sequences from Salmo salar, Oncorhynchus mykiss and the whole genome sequence of Danio rerio, Gasterosteus aculeatus, Tetraodon nigroviridis, Oryzias latipes and Takifugu rubripes, built with core components from GMOD project, GOPArc system and the BioMart project. The information provided by this resource includes Gene Ontology terms, metabolic pathways, SNP prediction, CDS prediction, orthologs prediction, several precalculated BLAST searches and domains. It also provides a BLAST server for matching user-provided sequences to any of the databases and an advanced query tool (BioMart) that allows easy browsing of EST databases with user-defined criteria. These tools make SalmonDB database a valuable resource for researchers searching for transcripts and genomic information regarding S. salar and other salmonid species. The database is expected to grow in the near feature, particularly with the S. salar genome sequencing project. Database URL: http://genomicasalmones.dim.uchile.cl/",SalmonDB,0.996498466,NA,0,SalmonDB,0.996498466,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/26/2011 +23055621,http://www.salmonellabase.com,"SALMONELLABASE - An online database of druggable targets of Salmonella species. Unlabelled Salmonellosis is one of the most common and widely distributed food borne diseases caused by Salmonella serovars. The emergence of multi drug resistant strains has become a threatening public health problem and targeting unique effectors of this pathogen can be considered as a powerful strategy for drug design. SalmonellaBase is an online web portal serving as an integrated source of information about Salmonella serovars with the data required for the structural and functional studies and the analysis of druggable targets in Salmonella. We have identified several target proteins, which helps in the pathogenicity of the organism and predicted their structures. The database will have the information on completely sequenced genomes of Salmonella species with the complete set of protein sequences of the respective strains, determined structures, predicted protein structures and biochemical pathways of the respective strains. In addition, we have provided information about name and source of the protein, Uniprot and Protein Data Bank codes and literature information. Furthermore, SalmonellaBase is linked to related databases and other resources. We have set up a web interface with different search and display options so that users have the ability to get the data in several ways. SalmonellaBase is a freely available database. Availability http://www.salmonellabase.com/",SalmonellaBase,0.992680967,NA,0,SalmonellaBase,0.992680967,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/3/2012 +29057095,http://salmonet.org,"SalmoNet, an integrated network of ten Salmonella enterica strains reveals common and distinct pathways to host adaptation. Salmonella enterica is a prominent bacterial pathogen with implications on human and animal health. Salmonella serovars could be classified as gastro-intestinal or extra-intestinal. Genome-wide comparisons revealed that extra-intestinal strains are closer relatives of gastro-intestinal strains than to each other indicating a parallel evolution of this trait. Given the complexity of the differences, a systems-level comparison could reveal key mechanisms enabling extra-intestinal serovars to cause systemic infections. Accordingly, in this work, we introduce a unique resource, SalmoNet, which combines manual curation, high-throughput data and computational predictions to provide an integrated network for Salmonella at the metabolic, transcriptional regulatory and protein-protein interaction levels. SalmoNet provides the networks separately for five gastro-intestinal and five extra-intestinal strains. As a multi-layered, multi-strain database containing experimental data, SalmoNet is the first dedicated network resource for Salmonella. It comprehensively contains interactions between proteins encoded in Salmonella pathogenicity islands, as well as regulatory mechanisms of metabolic processes with the option to zoom-in and analyze the interactions at specific loci in more detail. Application of SalmoNet is not limited to strain comparisons as it also provides a Salmonella resource for biochemical network modeling, host-pathogen interaction studies, drug discovery, experimental validation of novel interactions, uncovering new pathological mechanisms from emergent properties and epidemiological studies. SalmoNet is available at http://salmonet.org.",SalmoNet,0.995892107,NA,0,SalmoNet,0.995892107,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/18/2017 +"26097510, 33952332",http://sancdb.rubi.ru.ac.za,"SANCDB: a South African natural compound database. Background Natural products (NPs) are important to the drug discovery process. NP research efforts are expanding world-wide and South Africa is no exception to this. While freely-accessible small molecule databases, containing compounds isolated from indigenous sources, have been established in a number of other countries, there is currently no such online database in South Africa. Description The current research presents a South African natural compound database, named SANCDB. This is a curated and fully-referenced database containing compound information for 600 natural products extracted directly from journal articles, book chapters and theses. There is a web interface to the database, which is simple and easy to use, while allowing for compounds to be searched by a number of different criteria. Being fully referenced, each compound page contains links to the original referenced work from which the information was obtained. Further, the website provides a submission pipeline, allowing researchers to deposit compounds from their own research into the database. Conclusions SANCDB is currently the only web-based NP database in Africa. It aims to provide a useful resource for the in silico screening of South African NPs for drug discovery purposes. The database is supported by a submission pipeline to allow growth by entries from researchers. As such, we currently present SANCDB the starting point of a platform for a community-driven, curated database to further natural products research in South Africa. SANCDB is freely available at https://sancdb.rubi.ru.ac.za/.",SANCDB,0.994873464,South African Natural Compounds Database,0.975358397,SANCDB,0.994873464,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/5/2021 +32096105,http://psico.fcep.urv.cat/exp/files/SANDchild.xlsx,"Spanish affective normative data for 1,406 words rated by children and adolescents (SANDchild). Most research on the relationship between emotion and language in children relies on the use of words whose affective properties have been assessed by adults. To overcome this limitation, in the current study we introduce SANDchild, the Spanish affective database for children. This dataset reports ratings in the valence and the arousal dimensions for a large corpus of 1406 Spanish words rated by a large sample of 1276 children and adolescents from four different age groups (7, 9, 11 and 13 years old). We observed high inter-rater reliabilities for both valence and arousal in the four age groups. However, some age differences were found. In this sense, ratings for both valence and arousal decreased with age. Furthermore, the youngest children consider more words to be positive than adolescents. We also found sex differences in valence scores since boys gave higher valence ratings than girls, while girls considered more words to be negative than boys. The norms provided in this database will allow us to further extend our knowledge on the acquisition, development and processing of emotional language from childhood to adolescence. The complete database can be downloaded from https://psico.fcep.urv.cat/exp/files/SANDchild.xlsx .",SANDchild,0.992918432,NA,0,SANDchild,0.992918432,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2020 +33892308,http://webs.iiitd.edu.in/raghava/sapdb,"SAPdb: A database of short peptides and the corresponding nanostructures formed by self-assembly. Nanostructures generated by self-assembly of peptides yield nanomaterials that have many therapeutic applications, including drug delivery and biomedical engineering, due to their low cytotoxicity and higher uptake by targeted cells owing to their high affinity and specificity towards cell surface receptors. Despite the promising implications of this rapidly expanding field, there is no dedicated resource to study peptide nanostructures. This study endeavours to create a repository of short peptides, which may prove to be the best models to study ordered nanostructures formed by peptide self-assembly. SAPdb has a repertoire of 1049 entries of experimentally validated nanostructures formed by the self-assembly of small peptides. It consists of 328 tripeptides, 701 dipeptides, and 20 single amino acids with some conjugate partners. Each entry encompasses comprehensive information about the peptide, such as chemical modifications, the type of nanostructure formed, experimental conditions like pH, temperature, solvent required for the self-assembly, etc. Our analysis indicates that peptides containing aromatic amino acids favour the formation of self-assembling nanostructures. Additionally, we observed that these peptides form different nanostructures under different experimental conditions. SAPdb provides this comprehensive information in a hassle-free tabulated manner at a glance. User-friendly browsing, searching, and analysis modules have been integrated for easy data retrieval, data comparison, and examination of properties. We anticipate SAPdb to be a valuable repository for researchers engaged in the burgeoning arena of nanobiotechnology. It is freely available at https://webs.iiitd.edu.in/raghava/sapdb.",SAPdb,0.994092166,NA,0,SAPdb,0.994092166,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/10/2021 +32709339,"http://47.92.73.208:8082/, http://cpu-smsd.com","A mass spectrometry database for identification of saponins in plants. Saponins constitute an important class of secondary metabolites of the plant kingdom. Here, we present a mass spectrometry-based database for rapid and easy identification of saponins henceforth referred to as saponin mass spectrometry database (SMSD). With a total of 4196 saponins, 214 of which were obtained from commercial sources. Through liquid chromatography-tandem high-resolution/mass spectrometry (HR/MS) analysis under negative ion mode, the fragmentation behavior for all parent fragment ions almost conformed to successive losses of sugar moieties, α-dissociation and McLafferty rearrangement of aglycones in high-energy collision induced dissociation. The saccharide moieties produced sugar fragment ions from m/z (monosaccharide) to m/z (polysaccharides). The parent and sugar fragment ions of other saponins were predicted using the above mentioned fragmentation pattern. The SMSD is freely accessible at http://47.92.73.208:8082/ or http://cpu-smsd.com (preferrably using google). It provides three search modes (""CLASSIFY"", ""SEARCH"" and ""METABOLITE""). Under the ""CLASSIFY"" function, saponins are classified with high predictive accuracies from all metabolites by establishment of logistic regression model through their mass data from HR/MS input as a csv file, where the first column is ID and the second column is mass. For the ""SEARCH"" function, saponins are searched against parent ions with certain mass tolerance in ""MS Ion Search"". Then, daughter ions with certain mass tolerance are input into ""MS/MS Ion Search"". The optimal candidates were screened out according to the match count and match rate values in comparison with fragment data in database. Additionally, another logistic regression model completely differentiated between parent and sugar fragment ions. This function designed in front web is conducive to search and recheck. With the ""METABOLITE"" function, saponins are searched using their common names, where both full and partial name searches are supported. With these modes, saponins of diverse chemical composition can be explored, grouped and identified with a high degree of predictive accuracy. This specialized database would aid in the identification of saponins in complex matrices particular in the study of traditional Chinese medicines or plant metabolomics.",SMSD,0.893061325,saponin mass spectrometry database,0.897874147,saponin mass spectrometry database,0.897874147,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/3/2020 +29625201,http://bioinfo.imtech.res.in/manojk/sarna,"saRNAdb: Resource of Small Activating RNAs for Up-regulating the Gene Expression. RNA activation (RNAa) is the process of enhancing selective gene expression at transcriptional level using double-stranded RNAs, targeting gene promoter. These RNA molecules are usually 21 nucleotides long and termed as small activating RNAs (saRNAs). They are involved in gene regulation, epigenetics, gain-of-function studies and have potential therapeutic applications for various diseases especially cancer. RNAa is opposite to RNA interference in functionality; however, both processes share some protein machinery. There are many RNA interference centered online resources but no one for saRNAs; therefore, we developed ""saRNAdb"" database (http://bioinfo.imtech.res.in/manojk/sarna/). It contains 2150 manually curated saRNA entries with detailed information about their nucleotide sequences, activities, corresponding target gene, promoter and other experimental data. Besides, saRNA-promoter binding location, predicted saRNA features, tools (off-target, map) and RNAa-related proteins with their interacting partners are provided. saRNAdb is expected to assist in RNA research especially for nucleic acid-based therapeutics development.",saRNAdb,0.994530654,NA,0,saRNAdb,0.994530654,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/3/2018 +33416848,http://sars3d.com,"SARS-CoV-2 3D database: understanding the coronavirus proteome and evaluating possible drug targets. The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) is a rapidly growing infectious disease, widely spread with high mortality rates. Since the release of the SARS-CoV-2 genome sequence in March 2020, there has been an international focus on developing target-based drug discovery, which also requires knowledge of the 3D structure of the proteome. Where there are no experimentally solved structures, our group has created 3D models with coverage of 97.5% and characterized them using state-of-the-art computational approaches. Models of protomers and oligomers, together with predictions of substrate and allosteric binding sites, protein-ligand docking, SARS-CoV-2 protein interactions with human proteins, impacts of mutations, and mapped solved experimental structures are freely available for download. These are implemented in SARS CoV-2 3D, a comprehensive and user-friendly database, available at https://sars3d.com/. This provides essential information for drug discovery, both to evaluate targets and design new potential therapeutics.",SARS CoV-2,0.557417756,NA,0,SARS CoV-2,0.557417756,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/1/2021 +33553941,http://sarscovidb.org,"SARSCOVIDB-A New Platform for the Analysis of the Molecular Impact of SARS-CoV-2 Viral Infection. The COVID-19 pandemic caused by the new coronavirus (SARS-CoV-2) has become a global emergency issue for public health. This threat has led to an acceleration in related research and, consequently, an unprecedented volume of clinical and experimental data that include changes in gene expression resulting from infection. The SARS-CoV-2 infection database (SARSCOVIDB: https://sarscovidb.org/) was created to mitigate the difficulties related to this scenario. The SARSCOVIDB is an online platform that aims to integrate all differential gene expression data, at messenger RNA and protein levels, helping to speed up analysis and research on the molecular impact of COVID-19. The database can be searched from different experimental perspectives and presents all related information from published data, such as viral strains, hosts, methodological approaches (proteomics or transcriptomics), genes/proteins, and samples (clinical or experimental). All information was taken from 24 articles related to analyses of differential gene expression out of 5,554 COVID-19/SARS-CoV-2-related articles published so far. The database features 12,535 genes whose expression has been identified as altered due to SARS-CoV-2 infection. Thus, the SARSCOVIDB is a new resource to support the health workers and the scientific community in understanding the pathogenesis and molecular impact caused by SARS-CoV-2.",SARSCOVIDB,0.995992124,infection database,0.97165823,SARSCOVIDB,0.995992124,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/21/2021 +26527728,http://crdd.osdd.net/raghava/satpdb,"SATPdb: a database of structurally annotated therapeutic peptides. SATPdb (http://crdd.osdd.net/raghava/satpdb/) is a database of structurally annotated therapeutic peptides, curated from 22 public domain peptide databases/datasets including 9 of our own. The current version holds 19192 unique experimentally validated therapeutic peptide sequences having length between 2 and 50 amino acids. It covers peptides having natural, non-natural and modified residues. These peptides were systematically grouped into 10 categories based on their major function or therapeutic property like 1099 anticancer, 10585 antimicrobial, 1642 drug delivery and 1698 antihypertensive peptides. We assigned or annotated structure of these therapeutic peptides using structural databases (Protein Data Bank) and state-of-the-art structure prediction methods like I-TASSER, HHsearch and PEPstrMOD. In addition, SATPdb facilitates users in performing various tasks that include: (i) structure and sequence similarity search, (ii) peptide browsing based on their function and properties, (iii) identification of moonlighting peptides and (iv) searching of peptides having desired structure and therapeutic activities. We hope this database will be useful for researchers working in the field of peptide-based therapeutics.",SATPdb,0.998526931,NA,0,SATPdb,0.998526931,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/2/2015 +24504151,http://www.bioafrica.net/regadb,"Southern African Treatment Resistance Network (SATuRN) RegaDB HIV drug resistance and clinical management database: supporting patient management, surveillance and research in southern Africa. Substantial amounts of data have been generated from patient management and academic exercises designed to better understand the human immunodeficiency virus (HIV) epidemic and design interventions to control it. A number of specialized databases have been designed to manage huge data sets from HIV cohort, vaccine, host genomic and drug resistance studies. Besides databases from cohort studies, most of the online databases contain limited curated data and are thus sequence repositories. HIV drug resistance has been shown to have a great potential to derail the progress made thus far through antiretroviral therapy. Thus, a lot of resources have been invested in generating drug resistance data for patient management and surveillance purposes. Unfortunately, most of the data currently available relate to subtype B even though >60% of the epidemic is caused by HIV-1 subtype C. A consortium of clinicians, scientists, public health experts and policy markers working in southern Africa came together and formed a network, the Southern African Treatment and Resistance Network (SATuRN), with the aim of increasing curated HIV-1 subtype C and tuberculosis drug resistance data. This article describes the HIV-1 data curation process using the SATuRN Rega database. The data curation is a manual and time-consuming process done by clinical, laboratory and data curation specialists. Access to the highly curated data sets is through applications that are reviewed by the SATuRN executive committee. Examples of research outputs from the analysis of the curated data include trends in the level of transmitted drug resistance in South Africa, analysis of the levels of acquired resistance among patients failing therapy and factors associated with the absence of genotypic evidence of drug resistance among patients failing therapy. All these studies have been important for informing first- and second-line therapy. This database is a free password-protected open source database available on www.bioafrica.net. Database URL: http://www.bioafrica.net/regadb/",SATuRN,0.765131068,African Treatment Resistance Network,0.749985605,SATuRN,0.765131068,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/6/2014 +33177514,http://doi.org/10.35115/37n9-5738,"SAVI, in silico generation of billions of easily synthesizable compounds through expert-system type rules. We have made available a database of over 1 billion compounds predicted to be easily synthesizable, called Synthetically Accessible Virtual Inventory (SAVI). They have been created by a set of transforms based on an adaptation and extension of the CHMTRN/PATRAN programming languages describing chemical synthesis expert knowledge, which originally stem from the LHASA project. The chemoinformatics toolkit CACTVS was used to apply a total of 53 transforms to about 150,000 readily available building blocks (enamine.net). Only single-step, two-reactant syntheses were calculated for this database even though the technology can execute multi-step reactions. The possibility to incorporate scoring systems in CHMTRN allowed us to subdivide the database of 1.75 billion compounds in sets according to their predicted synthesizability, with the most-synthesizable class comprising 1.09 billion synthetic products. Properties calculated for all SAVI products show that the database should be well-suited for drug discovery. It is being made publicly available for free download from https://doi.org/10.35115/37n9-5738.",SAVI,0.993400097,Synthetically Accessible Virtual Inventory,0.951206068,SAVI,0.993400097,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2020 +21948792,http://bugs.sgul.ac.uk/bugsbase,"BμG@Sbase--a microbial gene expression and comparative genomic database. The reducing cost of high-throughput functional genomic technologies is creating a deluge of high volume, complex data, placing the burden on bioinformatics resources and tool development. The Bacterial Microarray Group at St George's (BμG@S) has been at the forefront of bacterial microarray design and analysis for over a decade and while serving as a hub of a global network of microbial research groups has developed BμG@Sbase, a microbial gene expression and comparative genomic database. BμG@Sbase (http://bugs.sgul.ac.uk/bugsbase/) is a web-browsable, expertly curated, MIAME-compliant database that stores comprehensive experimental annotation and multiple raw and analysed data formats. Consistent annotation is enabled through a structured set of web forms, which guide the user through the process following a set of best practices and controlled vocabulary. The database currently contains 86 expertly curated publicly available data sets (with a further 124 not yet published) and full annotation information for 59 bacterial microarray designs. The data can be browsed and queried using an explorer-like interface; integrating intuitive tree diagrams to present complex experimental details clearly and concisely. Furthermore the modular design of the database will provide a robust platform for integrating other data types beyond microarrays into a more Systems analysis based future.",Sbase,0.948678176,Bacterial Microarray,0.667608842,Sbase,0.948678176,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,9/24/2011 +29059366,http://sbcddb.moffitt.org,"SBCDDB: Sleeping Beauty Cancer Driver Database for gene discovery in mouse models of human cancers. Large-scale oncogenomic studies have identified few frequently mutated cancer drivers and hundreds of infrequently mutated drivers. Defining the biological context for rare driving events is fundamentally important to increasing our understanding of the druggable pathways in cancer. Sleeping Beauty (SB) insertional mutagenesis is a powerful gene discovery tool used to model human cancers in mice. Our lab and others have published a number of studies that identify cancer drivers from these models using various statistical and computational approaches. Here, we have integrated SB data from primary tumor models into an analysis and reporting framework, the Sleeping Beauty Cancer Driver DataBase (SBCDDB, http://sbcddb.moffitt.org), which identifies drivers in individual tumors or tumor populations. Unique to this effort, the SBCDDB utilizes a single, scalable, statistical analysis method that enables data to be grouped by different biological properties. This allows for SB drivers to be evaluated (and re-evaluated) under different contexts. The SBCDDB provides visual representations highlighting the spatial attributes of transposon mutagenesis and couples this functionality with analysis of gene sets, enabling users to interrogate relationships between drivers. The SBCDDB is a powerful resource for comparative oncogenomic analyses with human cancer genomics datasets for driver prioritization.",SBCDDB,0.994983792,Sleeping Beauty Cancer Driver Database,0.762524348,SBCDDB,0.994983792,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24563838,http://aftol.umn.edu,"Research and teaching with the AFTOL SBD: an informatics resource for fungal subcellular and biochemical data. The Structural and Biochemical Database (SBD), developed as part of the US NSF-funded Assembling the Fungal Tree of Life (AFTOL), is a multi-investigator project. It is a major resource to present and manage morphological and biochemical information on Fungi and serves as a phyloinformatics tool for the scientific community. It also is an important resource for teaching mycology. The database, available at http://aftol.umn.edu, includes new and previously published subcellular data on Fungi, supplemented with images and literature links. Datasets automatically combined in NEXUS format from the site permit independent and combined (with molecular data) phylogenetic analyses. Character lists, a major feature of the site, serve as primary reference documents of subcellular and biochemical characters that distinguish taxa across the major fungal lineages. The character lists illustrated with images and drawings are informative for evolutionary and developmental biologists as well as educators, students and the public. Fungal Subcellular Ontology (FSO), developed as part of this effort is a primary initiative to provide a controlled vocabulary describing subcellular structures unique to Fungi. FSO establishes a full complement of terms that provide an operating ontological framework for the database. Examples are provided for using the database for teaching.",SBD,0.975190625,Structural and Biochemical Database,0.948247939,SBD,0.975190625,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/29/2013 +21472436,http://sbkb.org,"The Structural Biology Knowledgebase: a portal to protein structures, sequences, functions, and methods. The Protein Structure Initiative's Structural Biology Knowledgebase (SBKB, URL: http://sbkb.org ) is an open web resource designed to turn the products of the structural genomics and structural biology efforts into knowledge that can be used by the biological community to understand living systems and disease. Here we will present examples on how to use the SBKB to enable biological research. For example, a protein sequence or Protein Data Bank (PDB) structure ID search will provide a list of related protein structures in the PDB, associated biological descriptions (annotations), homology models, structural genomics protein target status, experimental protocols, and the ability to order available DNA clones from the PSI:Biology-Materials Repository. A text search will find publication and technology reports resulting from the PSI's high-throughput research efforts. Web tools that aid in research, including a system that accepts protein structure requests from the community, will also be described. Created in collaboration with the Nature Publishing Group, the Structural Biology Knowledgebase monthly update also provides a research library, editorials about new research advances, news, and an events calendar to present a broader view of structural genomics and structural biology.",SBKB,0.985803445,Structural Biology Knowledgebase,0.919640875,SBKB,0.985803445,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/7/2011 +26647370,http://webapp.cabgrid.res.in/sbmdb,"SBMDb: first whole genome putative microsatellite DNA marker database of sugarbeet for bioenergy and industrial applications. . DNA marker plays important role as valuable tools to increase crop productivity by finding plausible answers to genetic variations and linking the Quantitative Trait Loci (QTL) of beneficial trait. Prior approaches in development of Short Tandem Repeats (STR) markers were time consuming and inefficient. Recent methods invoking the development of STR markers using whole genomic or transcriptomics data has gained wide importance with immense potential in developing breeding and cultivator improvement approaches. Availability of whole genome sequences and in silico approaches has revolutionized bulk marker discovery. We report world's first sugarbeet whole genome marker discovery having 145 K markers along with 5 K functional domain markers unified in common platform using MySQL, Apache and PHP in SBMDb. Embedded markers and corresponding location information can be selected for desired chromosome, location/interval and primers can be generated using Primer3 core, integrated at backend. Our analyses revealed abundance of 'mono' repeat (76.82%) over 'di' repeats (13.68%). Highest density (671.05 markers/Mb) was found in chromosome 1 and lowest density (341.27 markers/Mb) in chromosome 6. Current investigation of sugarbeet genome marker density has direct implications in increasing mapping marker density. This will enable present linkage map having marker distance of ∼2 cM, i.e. from 200 to 2.6 Kb, thus facilitating QTL/gene mapping. We also report e-PCR-based detection of 2027 polymorphic markers in panel of five genotypes. These markers can be used for DUS test of variety identification and MAS/GAS in variety improvement program. The present database presents wide source of potential markers for developing and implementing new approaches for molecular breeding required to accelerate industrious use of this crop, especially for sugar, health care products, medicines and color dye. Identified markers will also help in improvement of bioenergy trait of bioethanol and biogas production along with reaping advantage of crop efficiency in terms of low water and carbon footprint especially in era of climate change. Database URL: http://webapp.cabgrid.res.in/sbmdb/.",SBMDb,0.995680153,NA,0,SBMDb,0.995680153,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/8/2015 +26590403,http://sbrblood.nhgri.nih.gov,"SBR-Blood: systems biology repository for hematopoietic cells. Extensive research into hematopoiesis (the development of blood cells) over several decades has generated large sets of expression and epigenetic profiles in multiple human and mouse blood cell types. However, there is no single location to analyze how gene regulatory processes lead to different mature blood cells. We have developed a new database framework called hematopoietic Systems Biology Repository (SBR-Blood), available online at http://sbrblood.nhgri.nih.gov, which allows user-initiated analyses for cell type correlations or gene-specific behavior during differentiation using publicly available datasets for array- and sequencing-based platforms from mouse hematopoietic cells. SBR-Blood organizes information by both cell identity and by hematopoietic lineage. The validity and usability of SBR-Blood has been established through the reproduction of workflows relevant to expression data, DNA methylation, histone modifications and transcription factor occupancy profiles.",SBR-Blood,0.985119432,hematopoietic Systems Biology Repository,0.887098688,SBR-Blood,0.985119432,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2015 +"21398668, 25300483",http://bioinfo-pharma.u-strasbg.fr/scPDB,"sc-PDB: a database for identifying variations and multiplicity of 'druggable' binding sites in proteins. Background The sc-PDB database is an annotated archive of druggable binding sites extracted from the Protein Data Bank. It contains all-atoms coordinates for 8166 protein-ligand complexes, chosen for their geometrical and physico-chemical properties. The sc-PDB provides a functional annotation for proteins, a chemical description for ligands and the detailed intermolecular interactions for complexes. The sc-PDB now includes a hierarchical classification of all the binding sites within a functional class. Method The sc-PDB entries were first clustered according to the protein name indifferent of the species. For each cluster, we identified dissimilar sites (e.g. catalytic and allosteric sites of an enzyme). SCOPE AND APPLICATIONS: The classification of sc-PDB targets by binding site diversity was intended to facilitate chemogenomics approaches to drug design. In ligand-based approaches, it avoids comparing ligands that do not share the same binding site. In structure-based approaches, it permits to quantitatively evaluate the diversity of the binding site definition (variations in size, sequence and/or structure). Availability The sc-PDB database is freely available at: http://bioinfo-pharma.u-strasbg.fr/scPDB.",sc-PDB,0.996898383,NA,0,sc-PDB,0.996898383,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/9/2014 +24991975,http://bioinfo-pharma.u-strasbg.fr/scPDBFrag,"sc-PDB-Frag: a database of protein-ligand interaction patterns for Bioisosteric replacements. Bioisosteric replacement plays an important role in medicinal chemistry by keeping the biological activity of a molecule while changing either its core scaffold or substituents, thereby facilitating lead optimization and patenting. Bioisosteres are classically chosen in order to keep the main pharmacophoric moieties of the substructure to replace. However, notably when changing a scaffold, no attention is usually paid as whether all atoms of the reference scaffold are equally important for binding to the desired target. We herewith propose a novel database for bioisosteric replacement (scPDBFrag), capitalizing on our recently published structure-based approach to scaffold hopping, focusing on interaction pattern graphs. Protein-bound ligands are first fragmented and the interaction of the corresponding fragments with their protein environment computed-on-the-fly. Using an in-house developed graph alignment tool, interaction patterns graphs can be compared, aligned, and sorted by decreasing similarity to any reference. In the herein presented sc-PDB-Frag database ( http://bioinfo-pharma.u-strasbg.fr/scPDBFrag ), fragments, interaction patterns, alignments, and pairwise similarity scores have been extracted from the sc-PDB database of 8077 druggable protein-ligand complexes and further stored in a relational database. We herewith present the database, its Web implementation, and procedures for identifying true bioisosteric replacements based on conserved interaction patterns.",sc-PDB-Frag,0.918389161,NA,0,sc-PDB-Frag,0.918389161,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/17/2014 +33010177,http://easybioai.com/sc2disease,"SC2disease: a manually curated database of single-cell transcriptome for human diseases. SC2disease (http://easybioai.com/sc2disease/) is a manually curated database that aims to provide a comprehensive and accurate resource of gene expression profiles in various cell types for different diseases. With the development of single-cell RNA sequencing (scRNA-seq) technologies, uncovering cellular heterogeneity of different tissues for different diseases has become feasible by profiling transcriptomes across cell types at the cellular level. In particular, comparing gene expression profiles between different cell types and identifying cell-type-specific genes in various diseases offers new possibilities to address biological and medical questions. However, systematic, hierarchical and vast databases of gene expression profiles in human diseases at the cellular level are lacking. Thus, we reviewed the literature prior to March 2020 for studies which used scRNA-seq to study diseases with human samples, and developed the SC2disease database to summarize all the data by different diseases, tissues and cell types. SC2disease documents 946 481 entries, corresponding to 341 cell types, 29 tissues and 25 diseases. Each entry in the SC2disease database contains comparisons of differentially expressed genes between different cell types, tissues and disease-related health status. Furthermore, we reanalyzed gene expression matrix by unified pipeline to improve the comparability between different studies. For each disease, we also compare cell-type-specific genes with the corresponding genes of lead single nucleotide polymorphisms (SNPs) identified in genome-wide association studies (GWAS) to implicate cell type specificity of the traits.",SC2disease,0.997551847,NA,0,SC2disease,0.997551847,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +34046592,"http://structurome.bb.iastate.edu/sars-cov-2, http://structurome.bb.iastate.edu/sars-cov-2-global-model-comparisons","A map of the SARS-CoV-2 RNA structurome. SARS-CoV-2 has exploded throughout the human population. To facilitate efforts to gain insights into SARS-CoV-2 biology and to target the virus therapeutically, it is essential to have a roadmap of likely functional regions embedded in its RNA genome. In this report, we used a bioinformatics approach, ScanFold, to deduce the local RNA structural landscape of the SARS-CoV-2 genome with the highest likelihood of being functional. We recapitulate previously-known elements of RNA structure and provide a model for the folding of an essential frameshift signal. Our results find that SARS-CoV-2 is greatly enriched in unusually stable and likely evolutionarily ordered RNA structure, which provides a large reservoir of potential drug targets for RNA-binding small molecules. Results are enhanced via the re-analyses of publicly-available genome-wide biochemical structure probing datasets that are broadly in agreement with our models. Additionally, ScanFold was updated to incorporate experimental data as constraints in the analysis to facilitate comparisons between ScanFold and other RNA modelling approaches. Ultimately, ScanFold was able to identify eight highly structured/conserved motifs in SARS-CoV-2 that agree with experimental data, without explicitly using these data. All results are made available via a public database (the RNAStructuromeDB: https://structurome.bb.iastate.edu/sars-cov-2) and model comparisons are readily viewable at https://structurome.bb.iastate.edu/sars-cov-2-global-model-comparisons.",ScanFold,0.918983519,NA,0,ScanFold,0.918983519,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,5/22/2021 +28984188,http://bioinfo.wilmer.jhu.edu/ScaPD,"ScaPD: a database for human scaffold proteins. Background Scaffold proteins play a critical role in an increasing number of biological signaling processes, including simple tethering mechanism, regulating selectivity in pathways, shaping cellular behaviors. While many databases document the signaling pathways, few databases are devoted to the scaffold proteins that medicate signal transduction. Results Here, we have developed a user-friendly database, ScaPD, to describe computationally predicted, experimentally validated scaffold proteins and associated signaling pathways. It currently contains 273 scaffold proteins and 1118 associated signaling pathways. The database allows users to search, navigate and download the scaffold protein-mediated signaling networks. Conclusions Manually curated and predicted scaffold protein data will be a foundation for further investigation of the scaffold protein in the signal transduction. With maintained up-to-date data, ScaPD ( http://bioinfo.wilmer.jhu.edu/ScaPD ) will be a valuable resource for understanding how individual signaling pathways are regulated.",ScaPD,0.998022795,NA,0,ScaPD,0.998022795,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/3/2017 +32487193,http://www.stomachcancerdb.org,"SCDb: an integrated database of stomach cancer. Background Stomach cancer (SC) is a type of cancer, which is derived from the stomach mucous membrane. As there are non-specific symptoms or no noticeable symptoms observed at the early stage, newly diagnosed SC cases usually reach an advanced stage and are thus difficult to cure. Therefore, in this study, we aimed to develop an integrated database of SC. Methods SC-related genes were identified through literature mining and by analyzing the publicly available microarray datasets. Using the RNA-seq, miRNA-seq and clinical data downloaded from The Cancer Genome Atlas (TCGA), the Kaplan-Meier (KM) survival curves for all the SC-related genes were generated and analyzed. The miRNAs (miRanda, miRTarget2, PicTar, PITA and TargetScan databases), SC-related miRNAs (HMDD and miR2Disease databases), single nucleotide polymorphisms (SNPs, dbSNP database), and SC-related SNPs (ClinVar database) were also retrieved from the indicated databases. Moreover, gene_disease (OMIM and GAD databases), copy number variation (CNV, DGV database), methylation (PubMeth database), drug (WebGestalt database), and transcription factor (TF, TRANSFAC database) analyses were performed for the differentially expressed genes (DEGs). Results In total, 9990 SC-related genes (including 8347 up-regulated genes and 1643 down-regulated genes) were identified, among which, 65 genes were further confirmed as SC-related genes by performing enrichment analysis. Besides this, 457 miRNAs, 20 SC-related miRNAs, 1570 SNPs, 108 SC-related SNPs, 419 TFs, 44,605 CNVs, 3404 drug-associated genes, 63 genes with methylation, and KM survival curves of 20,264 genes were obtained. By integrating these datasets, an integrated database of stomach cancer, designated as SCDb, (available at http://www.stomachcancerdb.org/) was established. Conclusions As a comprehensive resource for human SC, SCDb database will be very useful for performing SC-related research in future, and will thus promote the understanding of the pathogenesis of SC.",SCDb,0.972417712,NA,0,SCDb,0.972417712,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/2/2020 +31611909,http://scdevdb.deepomics.org,"SCDevDB: A Database for Insights Into Single-Cell Gene Expression Profiles During Human Developmental Processes. Single-cell RNA-seq studies profile thousands of cells in developmental processes. Current databases for human single-cell expression atlas only provide search and visualize functions for a selected gene in specific cell types or subpopulations. These databases are limited to technical properties or visualization of single-cell RNA-seq data without considering the biological relations of their collected cell groups. Here, we developed a database to investigate single-cell gene expression profiling during different developmental pathways (SCDevDB). In this database, we collected 10 human single-cell RNA-seq datasets, split these datasets into 176 developmental cell groups, and constructed 24 different developmental pathways. SCDevDB allows users to search the expression profiles of the interested genes across different developmental pathways. It also provides lists of differentially expressed genes during each developmental pathway, T-distributed stochastic neighbor embedding maps showing the relationships between developmental stages based on these differentially expressed genes, Gene Ontology, and Kyoto Encyclopedia of Genes and Genomes analysis results of these differentially expressed genes. This database is freely available at https://scdevdb.deepomics.org.",SCDevDB,0.997864783,NA,0,SCDevDB,0.997864783,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/26/2019 +27800578,http://www.scenegrammarlab.com/research/scegram-database,"SCEGRAM: An image database for semantic and syntactic inconsistencies in scenes. Our visual environment is not random, but follows compositional rules according to what objects are usually found where. Despite the growing interest in how such semantic and syntactic rules - a scene grammar - enable effective attentional guidance and object perception, no common image database containing highly-controlled object-scene modifications has been publically available. Such a database is essential in minimizing the risk that low-level features drive high-level effects of interest, which is being discussed as possible source of controversial study results. To generate the first database of this kind - SCEGRAM - we took photographs of 62 real-world indoor scenes in six consistency conditions that contain semantic and syntactic (both mild and extreme) violations as well as their combinations. Importantly, always two scenes were paired, so that an object was semantically consistent in one scene (e.g., ketchup in kitchen) and inconsistent in the other (e.g., ketchup in bathroom). Low-level salience did not differ between object-scene conditions and was generally moderate. Additionally, SCEGRAM contains consistency ratings for every object-scene condition, as well as object-absent scenes and object-only images. Finally, a cross-validation using eye-movements replicated previous results of longer dwell times for both semantic and syntactic inconsistencies compared to consistent controls. In sum, the SCEGRAM image database is the first to contain well-controlled semantic and syntactic object-scene inconsistencies that can be used in a broad range of cognitive paradigms (e.g., verbal and pictorial priming, change detection, object identification, etc.) including paradigms addressing developmental aspects of scene grammar. SCEGRAM can be retrieved for research purposes from http://www.scenegrammarlab.com/research/scegram-database/ .",SCEGRAM,0.992693186,NA,0,SCEGRAM,0.992693186,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2017 +22140105,http://ural.wustl.edu/ScerTF,"ScerTF: a comprehensive database of benchmarked position weight matrices for Saccharomyces species. Saccharomyces cerevisiae is a primary model for studies of transcriptional control, and the specificities of most yeast transcription factors (TFs) have been determined by multiple methods. However, it is unclear which position weight matrices (PWMs) are most useful; for the roughly 200 TFs in yeast, there are over 1200 PWMs in the literature. To address this issue, we created ScerTF, a comprehensive database of 1226 motifs from 11 different sources. We identified a single matrix for each TF that best predicts in vivo data by benchmarking matrices against chromatin immunoprecipitation and TF deletion experiments. We also used in vivo data to optimize thresholds for identifying regulatory sites with each matrix. To correct for biases from different methods, we developed a strategy to combine matrices. These aligned matrices outperform the best available matrix for several TFs. We used the matrices to predict co-occurring regulatory elements in the genome and identified many known TF combinations. In addition, we predict new combinations and provide evidence of combinatorial regulation from gene expression data. The database is available through a web interface at http://ural.wustl.edu/ScerTF. The site allows users to search the database with a regulatory site or matrix to identify the TFs most likely to bind the input sequence.",ScerTF,0.989538968,NA,0,ScerTF,0.989538968,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/2/2011 +23161692,http://SchistoDB.net,"SchistoDB: an updated genome resource for the three key schistosomes of humans. The new release of SchistoDB (http://SchistoDB.net) provides a rich resource of genomic data for key blood flukes (genus Schistosoma) which cause disease in hundreds of millions of people worldwide. SchistoDB integrates whole-genome sequence and annotation of three species of the genus and provides enhanced bioinformatics analyses and data-mining tools. A simple, yet comprehensive web interface provided through the Strategies Web Development Kit is available for the mining and visualization of the data. Genomic scale data can be queried based on BLAST searches, annotation keywords and gene ID searches, gene ontology terms, sequence motifs, protein characteristics and phylogenetic relationships. Search strategies can be saved within a user's profile for future retrieval and may also be shared with other researchers using a unique web address.",SchistoDB,0.998987973,NA,0,SchistoDB,0.998987973,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2012 +34514416,http://thecailab.com/scissor,"SCISSOR™: a single-cell inferred site-specific omics resource for tumor microenvironment association study. Tumor tissues are heterogeneous with different cell types in tumor microenvironment, which play an important role in tumorigenesis and tumor progression. Several computational algorithms and tools have been developed to infer the cell composition from bulk transcriptome profiles. However, they ignore the tissue specificity and thus a new resource for tissue-specific cell transcriptomic reference is needed for inferring cell composition in tumor microenvironment and exploring their association with clinical outcomes and tumor omics. In this study, we developed SCISSOR™ (https://thecailab.com/scissor/), an online open resource to fulfill that demand by integrating five orthogonal omics data of >6031 large-scale bulk samples, patient clinical outcomes and 451 917 high-granularity tissue-specific single-cell transcriptomic profiles of 16 cancer types. SCISSOR™ provides five major analysis modules that enable flexible modeling with adjustable parameters and dynamic visualization approaches. SCISSOR™ is valuable as a new resource for promoting tumor heterogeneity and tumor-tumor microenvironment cell interaction research, by delineating cells in the tissue-specific tumor microenvironment and characterizing their associations with tumor omics and clinical outcomes.",SCISSORâ,0.996242762,NA,0,SCISSORâ,0.996242762,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/9/2021 +25097385,http://bioinformatics.towson.edu/Soybean_SCN_proteins_2D_Gel_DB/Gel1.aspx,"SCNProDB: A database for the identification of soybean cyst nematode proteins. Soybean cyst nematode (Heterodera glycines, SCN) is the most destructive pathogen of soybean around the world. Crop rotation and resistant cultivars are used to mitigate the damage of SCN, but these approaches are not completely successful because of the varied SCN populations. Thus, the limitations of these practices with soybean dictate investigation of other avenues of protection of soybean against SCN, perhaps through genetically engineering of broad resistance to SCN. For better understanding of the consequences of genetic manipulation, elucidation of SCN protein composition at the subunit level is necessary. We have conducted studies to determine the composition of SCN proteins using a proteomics approach in our laboratory using twodimensional polyacrylamide gel electrophoresis (2D-PAGE) to separate SCN proteins and to characterize the proteins further using mass spectrometry. Our analysis resulted in the identification of several hundred proteins. In this investigation, we developed a web based database (SCNProDB) containing protein information obtained from our previous published studies. This database will be useful to scientists who wish to develop SCN resistant soybean varieties through genetic manipulation and breeding efforts. The database is freely accessible from: http://bioinformatics.towson.edu/Soybean_SCN_proteins_2D_Gel_DB/Gel1.aspx.",SCNProDB,0.972986102,NA,0,SCNProDB,0.972986102,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/30/2014 +31724711,http://scop.mrc-lmb.cam.ac.uk,"The SCOP database in 2020: expanded classification of representative family and superfamily domains of known protein structures. The Structural Classification of Proteins (SCOP) database is a classification of protein domains organised according to their evolutionary and structural relationships. We report a major effort to increase the coverage of structural data, aiming to provide classification of almost all domain superfamilies with representatives in the PDB. We have also improved the database schema, provided a new API and modernised the web interface. This is by far the most significant update in coverage since SCOP 1.75 and builds on the advances in schema from the SCOP 2 prototype. The database is accessible from http://scop.mrc-lmb.cam.ac.uk.",SCOP,0.993221243,Structural Classification of Proteins,0.824369984,SCOP,0.993221243,1,NA,31906604,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,1/1/2020 +31906604,"http://academic.oup.com/nar, http://www.oxfordjournals.org/nar/database/c","The 27th annual Nucleic Acids Research database issue and molecular biology database collection. The 2020 Nucleic Acids Research Database Issue contains 148 papers spanning molecular biology. They include 59 papers reporting on new databases and 79 covering recent changes to resources previously published in the issue. A further ten papers are updates on databases most recently published elsewhere. This issue contains three breakthrough articles: AntiBodies Chemically Defined (ABCD) curates antibody sequences and their cognate antigens; SCOP returns with a new schema and breaks away from a purely hierarchical structure; while the new Alliance of Genome Resources brings together a number of Model Organism databases to pool knowledge and tools. Major returning nucleic acid databases include miRDB and miRTarBase. Databases for protein sequence analysis include CDD, DisProt and ELM, alongside no fewer than four newcomers covering proteins involved in liquid-liquid phase separation. In metabolism and signaling, Pathway Commons, Reactome and Metabolights all contribute papers. PATRIC and MicroScope update in microbial genomes while human and model organism genomics resources include Ensembl, Ensembl genomes and UCSC Genome Browser. Immune-related proteins are covered by updates from IPD-IMGT/HLA and AFND, as well as newcomers VDJbase and OGRDB. Drug design is catered for by updates from the IUPHAR/BPS Guide to Pharmacology and the Therapeutic Target Database. The entire Database Issue is freely available online on the Nucleic Acids Research website (https://academic.oup.com/nar). The NAR online Molecular Biology Database Collection has been revised, updating 305 entries, adding 65 new resources and eliminating 125 discontinued URLs; so bringing the current total to 1637 databases. It is available at http://www.oxfordjournals.org/nar/database/c/.",SCOP,0.867926598,AntiBodies Chemically,0.559194303,SCOP,0.867926598,1,"29316735.0, 30626175.0, 29316735.0, 30626175.0",31724711,low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: CLASS,NA,NA,1/1/2020 +32508104,http://iomics.ugent.be/scop3p,"Scop3P: A Comprehensive Resource of Human Phosphosites within Their Full Context. Protein phosphorylation is a key post-translational modification in many biological processes and is associated to human diseases such as cancer and metabolic disorders. The accurate identification, annotation, and functional analysis of phosphosites are therefore crucial to understand their various roles. Phosphosites are mainly analyzed through phosphoproteomics, which has led to increasing amounts of publicly available phosphoproteomics data. Several resources have been built around the resulting phosphosite information, but these are usually restricted to the protein sequence and basic site metadata. What is often missing from these resources, however, is context, including protein structure mapping, experimental provenance information, and biophysical predictions. We therefore developed Scop3P: a comprehensive database of human phosphosites within their full context. Scop3P integrates sequences (UniProtKB/Swiss-Prot), structures (PDB), and uniformly reprocessed phosphoproteomics data (PRIDE) to annotate all known human phosphosites. Furthermore, these sites are put into biophysical context by annotating each phosphoprotein with per-residue structural propensity, solvent accessibility, disordered probability, and early folding information. Scop3P, available at https://iomics.ugent.be/scop3p, presents a unique resource for visualization and analysis of phosphosites and for understanding of phosphosite structure-function relationships.",Scop3P,0.996602729,NA,0,Scop3P,0.996602729,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/18/2020 +"24304899, 27914894",http://scop.berkeley.edu,"SCOPe: Structural Classification of Proteins--extended, integrating SCOP and ASTRAL data and classification of new structures. Structural Classification of Proteins-extended (SCOPe, http://scop.berkeley.edu) is a database of protein structural relationships that extends the SCOP database. SCOP is a manually curated ordering of domains from the majority of proteins of known structure in a hierarchy according to structural and evolutionary relationships. Development of the SCOP 1.x series concluded with SCOP 1.75. The ASTRAL compendium provides several databases and tools to aid in the analysis of the protein structures classified in SCOP, particularly through the use of their sequences. SCOPe extends version 1.75 of the SCOP database, using automated curation methods to classify many structures released since SCOP 1.75. We have rigorously benchmarked our automated methods to ensure that they are as accurate as manual curation, though there are many proteins to which our methods cannot be applied. SCOPe is also partially manually curated to correct some errors in SCOP. SCOPe aims to be backward compatible with SCOP, providing the same parseable files and a history of changes between all stable SCOP and SCOPe releases. SCOPe also incorporates and updates the ASTRAL database. The latest release of SCOPe, 2.03, contains 59 514 Protein Data Bank (PDB) entries, increasing the number of structures classified in SCOP by 55% and including more than 65% of the protein structures in the PDB.",SCOPe,0.985311985,Structural Classification of Proteins-extended,0.87680452,SCOPe,0.985311985,2,NA,26553811,NA,NA,NA,conflicting record(s) to be removed,NA,NA,NA,11/30/2016 +26553811,http://caps.ncbs.res.in/pass2,"PASS2 database for the structure-based sequence alignment of distantly related SCOP domain superfamilies: update to version 5 and added features. Structure-based sequence alignment is an essential step in assessing and analysing the relationship of distantly related proteins. PASS2 is a database that records such alignments for protein domain superfamilies and has been constantly updated periodically. This update of the PASS2 version, named as PASS2.5, directly corresponds to the SCOPe 2.04 release. All SCOPe structural domains that share less than 40% sequence identity, as defined by the ASTRAL compendium of protein structures, are included. The current version includes 1977 superfamilies and has been assembled utilizing the structure-based sequence alignment protocol. Such an alignment is obtained initially through MATT, followed by a refinement through the COMPARER program. The JOY program has been used for structural annotations of such alignments. In this update, we have automated the protocol and focused on inclusion of new features such as mapping of GO terms, absolutely conserved residues among the domains in a superfamily and inclusion of PDBs, that are absent in SCOPe 2.04, using the HMM profiles from the alignments of the superfamily members and are provided as a separate list. We have also implemented a more user-friendly manner of data presentation and options for downloading more features. PASS2.5 version is available at http://caps.ncbs.res.in/pass2/.",SCOPe,0.92723459,NA,0,SCOPe,0.92723459,1,22123743,"24304899.0, 27914894.0",low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,11/8/2015 +29045713,http://single-cell.clst.riken.jp,"SCPortalen: human and mouse single-cell centric database. Published single-cell datasets are rich resources for investigators who want to address questions not originally asked by the creators of the datasets. The single-cell datasets might be obtained by different protocols and diverse analysis strategies. The main challenge in utilizing such single-cell data is how we can make the various large-scale datasets to be comparable and reusable in a different context. To challenge this issue, we developed the single-cell centric database 'SCPortalen' (http://single-cell.clst.riken.jp/). The current version of the database covers human and mouse single-cell transcriptomics datasets that are publicly available from the INSDC sites. The original metadata was manually curated and single-cell samples were annotated with standard ontology terms. Following that, common quality assessment procedures were conducted to check the quality of the raw sequence. Furthermore, primary data processing of the raw data followed by advanced analyses and interpretation have been performed from scratch using our pipeline. In addition to the transcriptomics data, SCPortalen provides access to single-cell image files whenever available. The target users of SCPortalen are all researchers interested in specific cell types or population heterogeneity. Through the web interface of SCPortalen users are easily able to search, explore and download the single-cell datasets of their interests.",SCPortalen,0.995376956,NA,0,SCPortalen,0.995376956,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22067445,http://dcv.uhnres.utoronto.ca/SCRIPDB,"SCRIPDB: a portal for easy access to syntheses, chemicals and reactions in patents. The patent literature is a rich catalog of biologically relevant chemicals; many public and commercial molecular databases contain the structures disclosed in patent claims. However, patents are an equally rich source of metadata about bioactive molecules, including mechanism of action, disease class, homologous experimental series, structural alternatives, or the synthetic pathways used to produce molecules of interest. Unfortunately, this metadata is discarded when chemical structures are deposited separately in databases. SCRIPDB is a chemical structure database designed to make this metadata accessible. SCRIPDB provides the full original patent text, reactions and relationships described within any individual patent, in addition to the molecular files common to structural databases. We discuss how such information is valuable in medical text mining, chemical image analysis, reaction extraction and in silico pharmaceutical lead optimization. SCRIPDB may be searched by exact chemical structure, substructure or molecular similarity and the results may be restricted to patents describing synthetic routes. SCRIPDB is available at http://dcv.uhnres.utoronto.ca/SCRIPDB.",SCRIPDB,0.997730494,NA,0,SCRIPDB,0.997730494,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2011 +29617941,http://www.firmiana.org/responders,"A reference peptide database for proteome quantification based on experimental mass spectrum response curves. Motivation Mass spectrometry (MS) based quantification of proteins/peptides has become a powerful tool in biological research with high sensitivity and throughput. The accuracy of quantification, however, has been problematic as not all peptides are suitable for quantification. Several methods and tools have been developed to identify peptides that response well in mass spectrometry and they are mainly based on predictive models, and rarely consider the linearity of the response curve, limiting the accuracy and applicability of the methods. An alternative solution is to select empirically superior peptides that offer satisfactory MS response intensity and linearity in a wide dynamic range of peptide concentration. Results We constructed a reference database for proteome quantification based on experimental mass spectrum response curves. The intensity and dynamic range of over 2 647 773 transitions from 121 318 peptides were obtained from a set of dilution experiments, covering 11 040 gene products. These transitions and peptides were evaluated and presented in a database named SCRIPT-MAP. We showed that the best-responder (BR) peptide approach for quantification based on SCRIPT-MAP database is robust, repeatable and accurate in proteome-scale protein quantification. This study provides a reference database as well as a peptides/transitions selection method for quantitative proteomics. Availability and implementation SCRIPT-MAP database is available at http://www.firmiana.org/responders/. Supplementary information Supplementary data are available at Bioinformatics online.",SCRIPT-MAP,0.988087222,NA,0,SCRIPT-MAP,0.988087222,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2018 +29961821,http://sda.denglab.org,"SDADB: a functional annotation database of protein structural domains. . Annotating functional terms with individual domains is essential for understanding the functions of full-length proteins. We describe SDADB, a functional annotation database for structural domains. SDADB provides associations between gene ontology (GO) terms and SCOP domains calculated with an integrated framework. GO annotations are assigned probabilities of being correct, which are estimated with a Bayesian network by taking advantage of structural neighborhood mappings, SCOP-InterPro domain mapping information, position-specific scoring matrices (PSSMs) and sequence homolog features, with the most substantial contribution coming from high-coverage structure-based domain-protein mappings. The domain-protein mappings are computed using large-scale structure alignment. SDADB contains ontological terms with probabilistic scores for more than 214 000 distinct SCOP domains. It also provides additional features include 3D structure alignment visualization, GO hierarchical tree view, search, browse and download options.Database URL: http://sda.denglab.org.",SDADB,0.996021926,NA,0,SDADB,0.996021926,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +30714194,http://sdred.biocatnet.de,"The Short-chain Dehydrogenase/Reductase Engineering Database (SDRED): A classification and analysis system for a highly diverse enzyme family. The Short-chain Dehydrogenases/Reductases Engineering Database (SDRED) covers one of the largest known protein families (168 150 proteins). Assignment to the superfamilies of Classical and Extended SDRs was achieved by global sequence similarity and by identification of family-specific sequence motifs. Two standard numbering schemes were established for Classical and Extended SDRs that allow for the determination of conserved amino acid residues, such as cofactor specificity determining positions or superfamily specific sequence motifs. The comprehensive sequence dataset of the SDRED facilitates the refinement of family-specific sequence motifs. The glycine-rich motifs for Classical and Extended SDRs were refined to improve the precision of superfamily classification. In each superfamily, the majority of sequences formed a tightly connected sequence network and belonged to a large homologous family. Despite their different sequence motifs and their different sequence length, the two sequence networks of Classical and Extended SDRs are not separate, but connected by edges at a threshold of 40% sequence similarity, indicating that all SDRs belong to a large, connected network. The SDRED is accessible at https://sdred.biocatnet.de/.",SDRED,0.985853091,Short-chain Dehydrogenases/Reductases Engineering Database,0.874342166,SDRED,0.985853091,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/25/2019 +32611314,http://bioinfo.dcc.ufmg.br/glutantbase,"Glutantβase: a database for improving the rational design of glucose-tolerant β-glucosidases. Β-glucosidases are key enzymes used in second-generation biofuel production. They act in the last step of the lignocellulose saccharification, converting cellobiose in glucose. However, most of the β-glucosidases are inhibited by high glucose concentrations, which turns it a limiting step for industrial production. Thus, β-glucosidases have been targeted by several studies aiming to understand the mechanism of glucose tolerance, pH and thermal resistance for constructing more efficient enzymes. In this paper, we present a database of β-glucosidase structures, called Glutantβase. Our database includes 3842 GH1 β-glucosidase sequences collected from UniProt. We modeled the sequences by comparison and predicted important features in the 3D-structure of each enzyme. Glutantβase provides information about catalytic and conserved amino acids, residues of the coevolution network, protein secondary structure, and residues located in the channel that guides to the active site. We also analyzed the impact of beneficial mutations reported in the literature, predicted in analogous positions, for similar enzymes. We suggested these mutations based on six previously described mutants that showed high catalytic activity, glucose tolerance, or thermostability (A404V, E96K, H184F, H228T, L441F, and V174C). Then, we used molecular docking to verify the impact of the suggested mutations in the affinity of protein and ligands (substrate and product). Our results suggest that only mutations based on the H228T mutant can reduce the affinity for glucose (product) and increase affinity for cellobiose (substrate), which indicates an increment in the resistance to product inhibition and agrees with computational and experimental results previously reported in the literature. More resistant β-glucosidases are essential to saccharification in industrial applications. However, thermostable and glucose-tolerant β-glucosidases are rare, and their glucose tolerance mechanisms appear to be related to multiple and complex factors. We gather here, a set of information, and made predictions aiming to provide a tool for supporting the rational design of more efficient β-glucosidases. We hope that Glutantβase can help improve second-generation biofuel production. Glutantβase is available at http://bioinfo.dcc.ufmg.br/glutantbase .",se,0.917423546,NA,0,se,0.917423546,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,7/1/2020 +24907201,http://seabase.core.cli.mbl.edu,"SeaBase: a multispecies transcriptomic resource and platform for gene network inference. Marine and aquatic animals are extraordinarily useful as models for identifying mechanisms of development and evolution, regeneration, resistance to cancer, longevity and symbiosis, among many other areas of research. This is due to the great diversity of these organisms and their wide-ranging capabilities. Genomics tools are essential for taking advantage of these ""free lessons"" of nature. However, genomics and transcriptomics are challenging in emerging model systems. Here, we present SeaBase, a tool for helping to meet these needs. Specifically, SeaBase provides a platform for sharing and searching transcriptome data. More importantly, SeaBase will support a growing number of tools for inferring gene network mechanisms. The first dataset available on SeaBase is a developmental transcriptomic profile of the sea anemone Nematostella vectensis (Anthozoa, Cnidaria). Additional datasets are currently being prepared and we are aiming to expand SeaBase to include user-supplied data for any number of marine and aquatic organisms, thereby supporting many potentially new models for gene network studies. SeaBase can be accessed online at: http://seabase.core.cli.mbl.edu.",SeaBase,0.991243899,NA,0,SeaBase,0.991243899,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/6/2014 +23193298,http://db-mml.sjtu.edu.cn/SecReT4,"SecReT4: a web-based bacterial type IV secretion system resource. SecReT4 (http://db-mml.sjtu.edu.cn/SecReT4/) is an integrated database providing comprehensive information of type IV secretion systems (T4SSs) in bacteria. T4SSs are versatile assemblages that promote genetic exchange and/or effector translocation with consequent impacts on pathogenesis and genome plasticity. T4SSs have been implicated in conjugation, DNA uptake and release and effector translocation. The effectors injected into eukaryotic target cells can lead to alteration of host cellular processes during infection. SecReT4 offers a unique, highly organized, readily exploreable archive of known and putative T4SSs and cognate effectors in bacteria. It currently contains details of 10 752 core components mapping to 808 T4SSs and 1884 T4SS effectors found in representatives of 289 bacterial species, as well as a collection of more than 900 directly related references. A broad range of similarity search, sequence alignment, phylogenetic, primer design and other functional analysis tools are readily accessible via SecReT4. We propose that SecReT4 will facilitate efficient investigation of large numbers of these systems, recognition of diverse patterns of sequence-, gene- and/or functional conservation and an improved understanding of the biological roles and significance of these versatile molecular machines. SecReT4 will be regularly updated to ensure its ongoing maximum utility to the research community.",SecReT4,0.996587932,NA,0,SecReT4,0.996587932,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2012 +25640659,http://db-mml.sjtu.edu.cn/SecReT6,"SecReT6: a web-based resource for type VI secretion systems found in bacteria. SecReT6 (http://db-mml.sjtu.edu.cn/SecReT6/) is an integrated database providing comprehensive information on type VI secretion systems (T6SSs) in bacteria. T6SSs are a class of sophisticated cell contact-dependent apparatuses involved in mediating antagonistic or synergistic communications between bacteria and/or bacteria and eukaryotes. These apparatuses have recently been found to be widely distributed among Gram-negative bacterial species. SecReT6 offers a unique, readily explorable archive of known and putative T6SSs, and cognate effectors found in bacteria. It currently contains data on 11 167 core T6SS components mapping to 906 T6SSs found in 498 bacterial strains representing 240 species, as well as a collection of over 600 directly relevant references. Also collated and archived were 1340 diverse candidate secreted effectors which were experimentally shown and/or predicted to be delivered by T6SSs into target eukaryotic and/or prokaryotic cells as well as 196 immunity proteins. A broad range of T6SS gene cluster detection and comparative analysis tools are readily accessible via SecReT6, which may aid identification of effectors and immunity proteins around the T6SS core components. This database will be regularly updated to ensure its ongoing maximal utility and relevance to the scientific research community.",SecReT6,0.997667551,NA,0,SecReT6,0.997667551,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2015 +30371817,http://www.licpathway.net/sedb,"SEdb: a comprehensive human super-enhancer database. Super-enhancers are important for controlling and defining the expression of cell-specific genes. With research on human disease and biological processes, human H3K27ac ChIP-seq datasets are accumulating rapidly, creating the urgent need to collect and process these data comprehensively and efficiently. More importantly, many studies showed that super-enhancer-associated single nucleotide polymorphisms (SNPs) and transcription factors (TFs) strongly influence human disease and biological processes. Here, we developed a comprehensive human super-enhancer database (SEdb, http://www.licpathway.net/sedb) that aimed to provide a large number of available resources on human super-enhancers. The database was annotated with potential functions of super-enhancers in the gene regulation. The current version of SEdb documented a total of 331 601 super-enhancers from 542 samples. Especially, unlike existing super-enhancer databases, we manually curated and classified 410 available H3K27ac samples from >2000 ChIP-seq samples from NCBI GEO/SRA. Furthermore, SEdb provides detailed genetic and epigenetic annotation information on super-enhancers. Information includes common SNPs, motif changes, expression quantitative trait locus (eQTL), risk SNPs, transcription factor binding sites (TFBSs), CRISPR/Cas9 target sites and Dnase I hypersensitivity sites (DHSs) for in-depth analyses of super-enhancers. SEdb will help elucidate super-enhancer-related functions and find potential biological effects.",SEdb,0.99522537,comprehensive human super-enhancer database,0.846477594,SEdb,0.99522537,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +29069402,http://biocc.hrbmu.edu.cn/SEECancer,"SEECancer: a resource for somatic events in evolution of cancer genome. Cancer cells progressively evolve from a premalignant to a malignant state, which is driven by accumulating somatic alterations that confer normal cells a fitness advantage. Improvements in high-throughput sequencing techniques have led to an increase in construction of tumor phylogenetics and identification of somatic driver events that specifically occurred in different tumor progression stages. Here, we developed the SEECancer database (http://biocc.hrbmu.edu.cn/SEECancer), which aims to present the comprehensive cancer evolutionary stage-specific somatic events (including early-specific, late-specific, relapse-specific, metastasis-specific, drug-resistant and drug-induced genomic events) and their temporal orders. By manually curating over 10 000 published articles, 1231 evolutionary stage-specific genomic events and 5772 temporal orders involving 82 human cancers and 23 tissue origins were collected and deposited in the SEECancer database. Each entry contains the somatic event, evolutionary stage, cancer type, detection approach and relevant evidence. SEECancer provides a user-friendly interface for browsing, searching and downloading evolutionary stage-specific somatic events and temporal relationships in various cancers. With increasing attention on cancer genome evolution, the necessary information in SEECancer will facilitate understanding of cancer etiology and development of evolutionary therapeutics, and help clinicians to discover biomarkers for monitoring tumor progression.",SEECancer,0.996404111,NA,0,SEECancer,0.996404111,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24293654,"http://pubseed.theseed.org/, http://rast.nmpdr.org","The SEED and the Rapid Annotation of microbial genomes using Subsystems Technology (RAST). In 2004, the SEED (http://pubseed.theseed.org/) was created to provide consistent and accurate genome annotations across thousands of genomes and as a platform for discovering and developing de novo annotations. The SEED is a constantly updated integration of genomic data with a genome database, web front end, API and server scripts. It is used by many scientists for predicting gene functions and discovering new pathways. In addition to being a powerful database for bioinformatics research, the SEED also houses subsystems (collections of functionally related protein families) and their derived FIGfams (protein families), which represent the core of the RAST annotation engine (http://rast.nmpdr.org/). When a new genome is submitted to RAST, genes are called and their annotations are made by comparison to the FIGfam collection. If the genome is made public, it is then housed within the SEED and its proteins populate the FIGfam collection. This annotation cycle has proven to be a robust and scalable solution to the problem of annotating the exponentially increasing number of genomes. To date, >12 000 users worldwide have annotated >60 000 distinct genomes using RAST. Here we describe the interconnectedness of the SEED database and RAST, the RAST annotation pipeline and updates to both resources.",SEED,0.982991219,NA,0,SEED,0.982991219,1,NA,23110173,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/29/2013 +23110173,http://www.theseed.org/servers,"SEED servers: high-performance access to the SEED genomes, annotations, and metabolic models. The remarkable advance in sequencing technology and the rising interest in medical and environmental microbiology, biotechnology, and synthetic biology resulted in a deluge of published microbial genomes. Yet, genome annotation, comparison, and modeling remain a major bottleneck to the translation of sequence information into biological knowledge, hence computational analysis tools are continuously being developed for rapid genome annotation and interpretation. Among the earliest, most comprehensive resources for prokaryotic genome analysis, the SEED project, initiated in 2003 as an integration of genomic data and analysis tools, now contains >5,000 complete genomes, a constantly updated set of curated annotations embodied in a large and growing collection of encoded subsystems, a derived set of protein families, and hundreds of genome-scale metabolic models. Until recently, however, maintaining current copies of the SEED code and data at remote locations has been a pressing issue. To allow high-performance remote access to the SEED database, we developed the SEED Servers (http://www.theseed.org/servers): four network-based servers intended to expose the data in the underlying relational database, support basic annotation services, offer programmatic access to the capabilities of the RAST annotation server, and provide access to a growing collection of metabolic models that support flux balance analysis. The SEED servers offer open access to regularly updated data, the ability to annotate prokaryotic genomes, the ability to create metabolic reconstructions and detailed models of metabolism, and access to hundreds of existing metabolic models. This work offers and supports a framework upon which other groups can build independent research efforts. Large integrations of genomic data represent one of the major intellectual resources driving research in biology, and programmatic access to the SEED data will provide significant utility to a broad collection of potential users.",SEED,0.908952594,NA,0,SEED,0.908952594,1,NA,24293654,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/24/2012 +25352728,http://www.gbpuat-cbsh.ac.in/departments/bi/database/seed_pro_nutra_care,"Seed Pro-Nutra Care: A tool for characterization of seed storage proteins and database of bioactive peptides having potential health benefits. Unlabelled Seed storage proteins, the major food proteins, possess unique physicochemical characteristics which determine their nutritional importance and influence their utilization by humans. Here, we describe a database driven tool named Seed Pro-Nutra Care which comprises a systematic compendium of seed storage proteins and their bioactive peptides influencing several vital organ systems for maintenance of health. Seed Pro-Nutra Careis an integrated resource on seed storage protein. This resource help in the (I) Characterization of proteins whether they belong to seed storage protein group or not. (II) Identification the bioactive peptides with their sequences using peptide name (III) Determination of physico chemical properties of seed storage proteins. (IV) Epitope identification and mapping (V) Allergenicity prediction and characterization. Seed Pro-Nutra Care is a compilation of data on bioactive peptides present in seed storage proteins from our own collections and other published and unpublished sources. The database provides an information resource of a variety of seed related biological information and its use for nutritional and biomedical application. Availability http://www.gbpuat-cbsh.ac.in/departments/bi/database/seed_pro_nutra_care/",Seed,0.841600955,NA,0,Seed,0.841600955,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,9/30/2014 +23087698,http://www.chernobylproteomics.sav.sk,"Seeds in Chernobyl: the database on proteome response on radioactive environment. Two serious nuclear accidents during the last quarter century (Chernobyl, 1986 and Fukushima, 2011) contaminated large agricultural areas with radioactivity. The database ""Seeds in Chernobyl"" (http://www.chernobylproteomics.sav.sk) contains the information about the abundances of hundreds of proteins from on-going investigation of mature and developing seed harvested from plants grown in radioactive Chernobyl area. This database provides a useful source of information concerning the response of the seed proteome to permanently increased level of ionizing radiation in a user-friendly format.",Seeds,0.803177357,NA,0,Seeds,0.803177357,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/10/2012 +29228298,http://www.seedstor.ac.uk,"SeedStor: A Germplasm Information Management System and Public Database. SeedStor (https://www.seedstor.ac.uk) acts as the publicly available database for the seed collections held by the Germplasm Resources Unit (GRU) based at the John Innes Centre, Norwich, UK. The GRU is a national capability supported by the Biotechnology and Biological Sciences Research Council (BBSRC). The GRU curates germplasm collections of a range of temperate cereal, legume and Brassica crops and their associated wild relatives, as well as precise genetic stocks, near-isogenic lines and mapping populations. With >35,000 accessions, the GRU forms part of the UK's plant conservation contribution to the Multilateral System (MLS) of the International Treaty for Plant Genetic Resources for Food and Agriculture (ITPGRFA) for wheat, barley, oat and pea. SeedStor is a fully searchable system that allows our various collections to be browsed species by species through to complicated multipart phenotype criteria-driven queries. The results from these searches can be downloaded for later analysis or used to order germplasm via our shopping cart. The user community for SeedStor is the plant science research community, plant breeders, specialist growers, hobby farmers and amateur gardeners, and educationalists. Furthermore, SeedStor is much more than a database; it has been developed to act internally as a Germplasm Information Management System that allows team members to track and process germplasm requests, determine regeneration priorities, handle cost recovery and Material Transfer Agreement paperwork, manage the Seed Store holdings and easily report on a wide range of the aforementioned tasks.",SeedStor,0.993587971,NA,0,SeedStor,0.993587971,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22171328,http://www.bios.unc.edu/research/genomic_software/seeQTL,"seeQTL: a searchable database for human eQTLs. Summary seeQTL is a comprehensive and versatile eQTL database, including various eQTL studies and a meta-analysis of HapMap eQTL information. The database presents eQTL association results in a convenient browser, using both segmented local-association plots and genome-wide Manhattan plots. Availability and implementation seeQTL is freely available for non-commercial use at http://www.bios.unc.edu/research/genomic_software/seeQTL/. Contact fred_wright@unc.edu; kxia@bios.unc.edu Supplementary information Supplementary data are available at Bioinformatics online.",seeQTL,0.995602727,NA,0,seeQTL,0.995602727,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/13/2011 +29309507,http://bioinfo.life.hust.edu.cn/SEGreg,"SEGreg: a database for human specifically expressed genes and their regulations in cancer and normal tissue. Human specifically expressed genes (SEGs) usually serve as potential biomarkers for disease diagnosis and treatment. However, the regulation underlying their specific expression remains to be revealed. In this study, we constructed SEG regulation database (SEGreg; available at http://bioinfo.life.hust.edu.cn/SEGreg) for showing SEGs and their transcription factors (TFs) and microRNA (miRNA) regulations under different physiological conditions, which include normal tissue, cancer tissue and cell line. In total, SEGreg collected 6387, 1451, 4506 and 5320 SEGs from expression profiles of 34 cancer types and 55 tissues of The Cancer Genome Atlas, Cancer Cell Line Encyclopedia, Human Body Map and Genotype-Tissue Expression databases/projects, respectively. The cancer or tissue corresponding expressed miRNAs and TFs were identified from miRNA and gene expression profiles, and their targets were collected from several public resources. Then the regulatory networks of all SEGs were constructed and integrated into SEGreg. Through a user-friendly interface, users can browse and search SEGreg by gene name, data source, tissue, cancer type and regulators. In summary, SEGreg is a specialized resource to explore SEGs and their regulations, which provides clues to reveal the mechanisms of carcinogenesis and biological processes.",SEGreg,0.997626861,SEG regulation database,0.98023203,SEGreg,0.997626861,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2019 +32785571,http://intranet.fm.usp.br/sela,"SELAdb: A database of exonic variants in a Brazilian population referred to a quaternary medical center in São Paulo. Objectives High-throughput sequencing of genomes, exomes, and disease-focused gene panels is becoming increasingly common for molecular diagnostics. However, identifying a single clinically relevant pathogenic variant among thousands of genetic polymorphisms is a challenging task. Publicly available genomic databases are useful resources to filter out common genetic variants present in the population and enable the identification of each disease-causing variant. Based on our experience applying these technologies at Hospital das Clínicas da Faculdade de Medicina da Universidade de São Paulo (HCFMUSP), São Paulo, Brazil, we recognized that the Brazilian population is not adequately represented in widely available genomic databases. Methods Here, we took advantage of our 5-year experience as a high-throughput sequencing core facility focused on individuals with putative genetic disorders to build a genomic database that may serve as a more accurate reference for our patient population: SELAdb. Results/conclusions Currently, our database comprises a final cohort of 523 unrelated individuals, including patients or family members managed by different clinics of HCFMUSP. We compared SELAdb with other publicly available genomic databases and demonstrated that this population is very heterogeneous, largely resembling Latin American individuals of mixed origin, rather than individuals of pure European ancestry. Interestingly, exclusively through SELAdb, we identified a spectrum of known and potentially novel pathogenic variants in genes associated with highly penetrant Mendelian disorders, illustrating that pathogenic variants circulating in the Brazilian population that is treated in our clinics are underrepresented in other population databases. SELAdb is freely available for public consultation at: http://intranet.fm.usp.br/sela.",SELAdb,0.976938367,das,0.545053124,SELAdb,0.976938367,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/10/2020 +24225318,http://selectome.unil.ch,"Selectome update: quality control and computational improvements to a database of positive selection. Selectome (http://selectome.unil.ch/) is a database of positive selection, based on a branch-site likelihood test. This model estimates the number of nonsynonymous substitutions (dN) and synonymous substitutions (dS) to evaluate the variation in selective pressure (dN/dS ratio) over branches and over sites. Since the original release of Selectome, we have benchmarked and implemented a thorough quality control procedure on multiple sequence alignments, aiming to provide minimum false-positive results. We have also improved the computational efficiency of the branch-site test implementation, allowing larger data sets and more frequent updates. Release 6 of Selectome includes all gene trees from Ensembl for Primates and Glires, as well as a large set of vertebrate gene trees. A total of 6810 gene trees have some evidence of positive selection. Finally, the web interface has been improved to be more responsive and to facilitate searches and browsing.",Selectome,0.979073048,NA,0,Selectome,0.979073048,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2013 +24194593,http://www.selenodb.org,"SelenoDB 2.0: annotation of selenoprotein genes in animals and their genetic diversity in humans. SelenoDB (http://www.selenodb.org) aims to provide high-quality annotations of selenoprotein genes, proteins and SECIS elements. Selenoproteins are proteins that contain the amino acid selenocysteine (Sec) and the first release of the database included annotations for eight species. Since the release of SelenoDB 1.0 many new animal genomes have been sequenced. The annotations of selenoproteins in new genomes usually contain many errors in major databases. For this reason, we have now fully annotated selenoprotein genes in 58 animal genomes. We provide manually curated annotations for human selenoproteins, whereas we use an automatic annotation pipeline to annotate selenoprotein genes in other animal genomes. In addition, we annotate the homologous genes containing cysteine (Cys) instead of Sec. Finally, we have surveyed genetic variation in the annotated genes in humans. We use exon capture and resequencing approaches to identify single-nucleotide polymorphisms in more than 50 human populations around the world. We thus present a detailed view of the genetic divergence of Sec- and Cys-containing genes in animals and their diversity in humans. The addition of these datasets into the second release of the database provides a valuable resource for addressing medical and evolutionary questions in selenium biology.",SelenoDB,0.994665325,NA,0,SelenoDB,0.994665325,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/4/2013 +23044550,http://skr3.nlm.nih.gov/SemMedDB,"SemMedDB: a PubMed-scale repository of biomedical semantic predications. Summary Effective access to the vast biomedical knowledge present in the scientific literature is challenging. Semantic relations are increasingly used in knowledge management applications supporting biomedical research to help address this challenge. We describe SemMedDB, a repository of semantic predications (subject-predicate-object triples) extracted from the entire set of PubMed citations. We propose the repository as a knowledge resource that can assist in hypothesis generation and literature-based discovery in biomedicine as well as in clinical decision-making support. Availability and implementation The SemMedDB repository is available as a MySQL database for non-commercial use at http://skr3.nlm.nih.gov/SemMedDB. An UMLS Metathesaurus license is required. Contact kilicogluh@mail.nih.gov.",SemMedDB,0.998119652,NA,0,SemMedDB,0.998119652,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/8/2012 +22419780,http://SEQanswers.com,"SEQanswers: an open access community for collaboratively decoding genomes. Summary The affordability of high-throughput sequencing has created an unprecedented surge in the use of genomic data in basic, translational and clinical research. The rapid evolution of sequencing technology, coupled with its broad adoption across biology and medicine, necessitates fast, collaborative interdisciplinary discussion. SEQanswers provides a real-time knowledge-sharing resource to address this need, covering experimental and computational aspects of sequencing and sequence analysis. Developers of popular analysis tools are among the >4000 active members, and ~40 peer-reviewed publications have referenced SEQanswers. Availability The SEQanswers community is freely accessible at http://SEQanswers.com/",SEQanswers,0.993383706,NA,0,SEQanswers,0.993383706,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/13/2012 +30202870,http://education.knoweng.org/sequenceng,SequencEnG: an interactive knowledge base of sequencing techniques. Summary Next-generation sequencing (NGS) techniques are revolutionizing biomedical research by providing powerful methods for generating genomic and epigenomic profiles. The rapid progress is posing an acute challenge to students and researchers to stay acquainted with the numerous available methods. We have developed an interactive online educational resource called Sequencing Techniques Engine for Genomics (SequencEnG) to provide a tree-structured knowledge base of 66 different sequencing techniques and step-by-step NGS data analysis pipelines comparing popular tools. SequencEnG is designed to facilitate barrier-free learning of current NGS techniques and provides a user-friendly interface for searching through experimental and analysis methods. Availability and implementation SequencEnG is part of the project Knowledge Engine for Genomics (KnowEnG) and is freely available at http://education.knoweng.org/sequenceng/.,SequencEnG,0.989631832,Sequencing Techniques Engine for Genomics,0.744912343,SequencEnG,0.989631832,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2019 +22086956,"http://SEQanswers.com/, http://wiki.SEQanswers.com","The SEQanswers wiki: a wiki database of tools for high-throughput sequencing analysis. Recent advances in sequencing technology have created unprecedented opportunities for biological research. However, the increasing throughput of these technologies has created many challenges for data management and analysis. As the demand for sophisticated analyses increases, the development time of software and algorithms is outpacing the speed of traditional publication. As technologies continue to be developed, methods change rapidly, making publications less relevant for users. The SEQanswers wiki (SEQwiki) is a wiki database that is actively edited and updated by the members of the SEQanswers community (http://SEQanswers.com/). The wiki provides an extensive catalogue of tools, technologies and tutorials for high-throughput sequencing (HTS), including information about HTS service providers. It has been implemented in MediaWiki with the Semantic MediaWiki and Semantic Forms extensions to collect structured data, providing powerful navigation and reporting features. Within 2 years, the community has created pages for over 500 tools, with approximately 400 literature references and 600 web links. This collaborative effort has made SEQwiki the most comprehensive database of HTS tools anywhere on the web. The wiki includes task-focused mini-reviews of commonly used tools, and a growing collection of more than 100 HTS service providers. SEQwiki is available at: http://wiki.SEQanswers.com/.",SEQwiki,0.989065051,SEQanswers,0.585303307,SEQwiki,0.989065051,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/15/2011 +27307138,http://www.seriport.in,"A comprehensive view of the web-resources related to sericulture. . Recent progress in the field of sequencing and analysis has led to a tremendous spike in data and the development of data science tools. One of the outcomes of this scientific progress is development of numerous databases which are gaining popularity in all disciplines of biology including sericulture. As economically important organism, silkworms are studied extensively for their numerous applications in the field of textiles, biomaterials, biomimetics, etc. Similarly, host plants, pests, pathogens, etc. are also being probed to understand the seri-resources more efficiently. These studies have led to the generation of numerous seri-related databases which are extremely helpful for the scientific community. In this article, we have reviewed all the available online resources on silkworm and its related organisms, including databases as well as informative websites. We have studied their basic features and impact on research through citation count analysis, finally discussing the role of emerging sequencing and analysis technologies in the field of seri-data science. As an outcome of this review, a web portal named SeriPort, has been created which will act as an index for the various sericulture-related databases and web resources available in cyberspace.Database URL: http://www.seriport.in/.",SeriPort,0.92696017,NA,0,SeriPort,0.92696017,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/15/2016 +28539606,http://ncgr.ac.cn/SesameFG,"SesameFG: an integrated database for the functional genomics of sesame. Sesame (Sesamum indicum L.) has high oil content, a small diploid genome and a short growth period, making it an attractive species for genetic studies on oilseed crops. With the advancement of next-generation sequencing technology, genomics and functional genomics research of sesame has developed quickly in the last few years, and large amounts of data have been generated. However, these results are distributed in many different publications, and there is a lack of integration. To promote functional genomics research of sesame, we collected genetic information combined with comprehensive phenotypic information and integrated them in the web-based database named SesameFG. The current version of SesameFG contains phenotypic information on agronomic traits of 705 sesame accessions, de novo assembled genomes of three sesame varieties, massive numbers of identified SNPs, gene expression profiles of five tissues, gene families, candidate genes for the important agronomic traits and genomic-SSR markers. All phenotypic and genotypic information in SesameFG is available for online queries and can be downloaded freely. SesameFG provides useful search functions and data mining tools, including Genome Browser and local BLAST services. SesameFG is freely accessible at http://ncgr.ac.cn/SesameFG/. SesameFG provides valuable resources and tools for functional genomics research and the molecular breeding of sesame.",SesameFG,0.989373446,NA,0,SesameFG,0.989373446,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/24/2017 +"23180763, 25392407, 31740968",http://seva.cnb.csic.es,"The Standard European Vector Architecture (SEVA): a coherent platform for the analysis and deployment of complex prokaryotic phenotypes. The 'Standard European Vector Architecture' database (SEVA-DB, http://seva.cnb.csic.es) was conceived as a user-friendly, web-based resource and a material clone repository to assist in the choice of optimal plasmid vectors for de-constructing and re-constructing complex prokaryotic phenotypes. The SEVA-DB adopts simple design concepts that facilitate the swapping of functional modules and the extension of genome engineering options to microorganisms beyond typical laboratory strains. Under the SEVA standard, every DNA portion of the plasmid vectors is minimized, edited for flaws in their sequence and/or functionality, and endowed with physical connectivity through three inter-segment insulators that are flanked by fixed, rare restriction sites. Such a scaffold enables the exchangeability of multiple origins of replication and diverse antibiotic selection markers to shape a frame for their further combination with a large variety of cargo modules that can be used for varied end-applications. The core collection of constructs that are available at the SEVA-DB has been produced as a starting point for the further expansion of the formatted vector platform. We argue that adoption of the SEVA format can become a shortcut to fill the phenomenal gap between the existing power of DNA synthesis and the actual engineering of predictable and efficacious bacteria.",SEVA-DB,0.994396701,Standard European Vector Architecture,0.724981114,SEVA-DB,0.994396701,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +29892516,"http://sevens.cbrc.jp, http://sevens.chem.aoyama.ac.jp","SEVENS: a database for comprehensive GPCR genes obtained from genomes: -Update to 68 eukaryotes. We report the development of the SEVENS database, which contains information on G-protein coupled receptor (GPCR) genes that are identified with high confidence levels (A, B, C, and D) from various eukaryotic genomes, by using a pipeline comprising bioinformatics softwares, including a gene finder, a sequence alignment tool, a motif and domain assignment tool, and a transmembrane helix predictor. SEVENS compiles detailed information on GPCR genes, such as chromosomal mapping position, phylogenetic tree, sequence similarity to known genes, and protein function described by motif/domain and transmembrane helices. They are presented in a user-friendly interface. Because of the comprehensive gene findings from genomes, SEVENS contains a larger data set than that of previous databases and enables the performance of a genome-scale overview of all the GPCR genes. We surveyed the complete genomes of 68 eukaryotes, and found that there were between 6 and 3,470 GPCR genes for each genome (Level A data). Within these genes, the number of receptors for various molecules, including biological amines, peptides, and lipids, were conserved in mammals, birds, and fishes, whereas the numbers of odorant receptors and pheromone receptors were highly diverse in mammals. SEVENS is freely available at http://sevens.cbrc.jp or http://sevens.chem.aoyama.ac.jp.",SEVENS,0.997551143,NA,0,SEVENS,0.997551143,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/27/2018 +24090431,http://gene.sfari.org/autdb/GS_Home.do,"SFARI Gene 2.0: a community-driven knowledgebase for the autism spectrum disorders (ASDs). New technologies enabling genome-wide interrogation have led to a large and rapidly growing number of autism spectrum disorder (ASD) candidate genes. Although encouraging, the volume and complexity of these data make it challenging for scientists, particularly non-geneticists, to comprehensively evaluate available evidence for individual genes. Described here is the Gene Scoring module within SFARI Gene 2.0 (https://gene.sfari.org/autdb/GS_Home.do), a platform developed to enable systematic community driven assessment of genetic evidence for individual genes with regard to ASD.",SFARI,0.879653931,NA,0,SFARI,0.879653931,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,10/3/2013 +24712981,http://bioinformatics.cau.edu.cn/SFGD,"SFGD: a comprehensive platform for mining functional information from soybean transcriptome data and its use in identifying acyl-lipid metabolism pathways. Background Soybean (Glycine max L.) is one of the world's most important leguminous crops producing high-quality protein and oil. Increasing the relative oil concentration in soybean seeds is many researchers' goal, but a complete analysis platform of functional annotation for the genes involved in the soybean acyl-lipid pathway is still lacking. Following the success of soybean whole-genome sequencing, functional annotation has become a major challenge for the scientific community. Whole-genome transcriptome analysis is a powerful way to predict genes with biological functions. It is essential to build a comprehensive analysis platform for integrating soybean whole-genome sequencing data, the available transcriptome data and protein information. This platform could also be used to identify acyl-lipid metabolism pathways. Description In this study, we describe our construction of the Soybean Functional Genomics Database (SFGD) using Generic Genome Browser (Gbrowse) as the core platform. We integrated microarray expression profiling with 255 samples from 14 groups' experiments and mRNA-seq data with 30 samples from four groups' experiments, including spatial and temporal transcriptome data for different soybean development stages and environmental stresses. The SFGD includes a gene co-expression regulatory network containing 23,267 genes and 1873 miRNA-target pairs, and a group of acyl-lipid pathways containing 221 enzymes and more than 1550 genes. The SFGD also provides some key analysis tools, i.e. BLAST search, expression pattern search and cis-element significance analysis, as well as gene ontology information search and single nucleotide polymorphism display. Conclusion The SFGD is a comprehensive database integrating genome and transcriptome data, and also for soybean acyl-lipid metabolism pathways. It provides useful toolboxes for biologists to improve the accuracy and robustness of soybean functional genomics analysis, further improving understanding of gene regulatory networks for effective crop improvement. The SFGD is publically accessible at http://bioinformatics.cau.edu.cn/SFGD/, with all data available for downloading.",SFGD,0.987578392,Soybean Functional Genomics Database,0.935117344,SFGD,0.987578392,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/8/2014 +"24271399, 25501940",http://sfld.rbvi.ucsf.edu,"The Structure-Function Linkage Database. The Structure-Function Linkage Database (SFLD, http://sfld.rbvi.ucsf.edu/) is a manually curated classification resource describing structure-function relationships for functionally diverse enzyme superfamilies. Members of such superfamilies are diverse in their overall reactions yet share a common ancestor and some conserved active site features associated with conserved functional attributes such as a partial reaction. Thus, despite their different functions, members of these superfamilies 'look alike', making them easy to misannotate. To address this complexity and enable rational transfer of functional features to unknowns only for those members for which we have sufficient functional information, we subdivide superfamily members into subgroups using sequence information, and lastly into families, sets of enzymes known to catalyze the same reaction using the same mechanistic strategy. Browsing and searching options in the SFLD provide access to all of these levels. The SFLD offers manually curated as well as automatically classified superfamily sets, both accompanied by search and download options for all hierarchical levels. Additional information includes multiple sequence alignments, tab-separated files of functional and other attributes, and sequence similarity networks. The latter provide a new and intuitively powerful way to visualize functional trends mapped to the context of sequence similarity.",SFLD,0.998137712,Structure-Function Linkage Database,0.984608392,SFLD,0.998137712,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/12/2014 +"22110037, 23487186, 24265222, 27252399, 29140510, 32128557",http://www.yeastgenome.org,"Saccharomyces Genome Database: the genomics resource of budding yeast. The Saccharomyces Genome Database (SGD, http://www.yeastgenome.org) is the community resource for the budding yeast Saccharomyces cerevisiae. The SGD project provides the highest-quality manually curated information from peer-reviewed literature. The experimental results reported in the literature are extracted and integrated within a well-developed database. These data are combined with quality high-throughput results and provided through Locus Summary pages, a powerful query engine and rich genome browser. The acquisition, integration and retrieval of these data allow SGD to facilitate experimental design and analysis by providing an encyclopedia of the yeast genome, its chromosomal features, their functions and interactions. Public access to these data is provided to researchers and educators via web pages designed for optimal ease of use.",SGD,0.991999149,Saccharomyces Genome Database,0.977382439,SGD,0.991999149,6,NA,26631132,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2020 +26631132,http://yeastgenome.org,"The Saccharomyces Genome Database: A Tool for Discovery. The Saccharomyces Genome Database (SGD) is the main community repository of information for the budding yeast, Saccharomyces cerevisiae. The SGD has collected published results on chromosomal features, including genes and their products, and has become an encyclopedia of information on the biology of the yeast cell. This information includes gene and gene product function, phenotype, interactions, regulation, complexes, and pathways. All information has been integrated into a unique web resource, accessible via http://yeastgenome.org. The website also provides custom tools to allow useful searches and visualization of data. The experimentally defined functions of genes, mutant phenotypes, and sequence homologies archived in the SGD provide a platform for understanding many fields of biological research. The mission of SGD is to provide public access to all published experimental results on yeast to aid life science students, educators, and researchers. As such, the SGD has become an essential tool for the design of experiments and for the analysis of experimental results.",SGD,0.98170195,The Saccharomyces Genome Database,0.976465479,SGD,0.98170195,1,NA,"22110037.0, 23487186.0, 24265222.0, 27252399.0, 29140510.0, 32128557.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,12/2/2015 +25428362,http://solgenomics.net,"The Sol Genomics Network (SGN)--from genotype to phenotype to breeding. The Sol Genomics Network (SGN, http://solgenomics.net) is a web portal with genomic and phenotypic data, and analysis tools for the Solanaceae family and close relatives. SGN hosts whole genome data for an increasing number of Solanaceae family members including tomato, potato, pepper, eggplant, tobacco and Nicotiana benthamiana. The database also stores loci and phenotype data, which researchers can upload and edit with user-friendly web interfaces. Tools such as BLAST, GBrowse and JBrowse for browsing genomes, expression and map data viewers, a locus community annotation system and a QTL analysis tools are available. A new tool was recently implemented to improve Virus-Induced Gene Silencing (VIGS) constructs called the SGN VIGS tool. With the growing genomic and phenotypic data in the database, SGN is now advancing to develop new web-based breeding tools and implement the code and database structure for other species or clade-specific databases.",SGN,0.983287116,Sol Genomics Network,0.855045378,SGN,0.983287116,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/26/2014 +23730305,http://systems.genetics.ucla.edu,"The systems genetics resource: a web application to mine global data for complex disease traits. The Systems Genetics Resource (SGR) (http://systems.genetics.ucla.edu) is a new open-access web application and database that contains genotypes and clinical and intermediate phenotypes from both human and mouse studies. The mouse data include studies using crosses between specific inbred strains and studies using the Hybrid Mouse Diversity Panel. SGR is designed to assist researchers studying genes and pathways contributing to complex disease traits, including obesity, diabetes, atherosclerosis, heart failure, osteoporosis, and lipoprotein metabolism. Over the next few years, we hope to add data relevant to deafness, addiction, hepatic steatosis, toxin responses, and vascular injury. The intermediate phenotypes include expression array data for a variety of tissues and cultured cells, metabolite levels, and protein levels. Pre-computed tables of genetic loci controlling intermediate and clinical phenotypes, as well as phenotype correlations, are accessed via a user-friendly web interface. The web site includes detailed protocols for all of the studies. Data from published studies are freely available; unpublished studies have restricted access during their embargo period.",SGR,0.993947387,Systems Genetics Resource,0.927626471,SGR,0.993947387,1,NA,24364888,NA,NA,NA,do not merge,NA,NA,NA,5/20/2013 +24364888,http://bioinformatics.towson.edu/strawberry/Default.aspx,"SGR: an online genomic resource for the woodland strawberry. Background Fragaria vesca, a diploid strawberry species commonly known as the alpine or woodland strawberry, is a versatile experimental plant system and an emerging model for the Rosaceae family. An ancestral F. vesca genome contributed to the genome of the octoploid dessert strawberry (F. ×ananassa), and the extant genome exhibits synteny with other commercially important members of the Rosaceae family such as apple and peach. To provide a molecular description of floral organ and fruit development at the resolution of specific tissues and cell types, RNAs from flowers and early developmental stage fruit tissues of the inbred F. vesca line YW5AF7 were extracted and the resulting cDNA libraries sequenced using an Illumina HiSeq2000. To enable easy access as well as mining of this two-dimensional (stage and tissue) transcriptome dataset, a web-based database, the Strawberry Genomic Resource (SGR), was developed. Description SGR is a web accessible database that contains sample description, sample statistics, gene annotation, and gene expression analysis. This information can be accessed publicly from a web-based interface at http://bioinformatics.towson.edu/strawberry/Default.aspx. The SGR website provides user friendly search and browse capabilities for all the data stored in the database. Users are able to search for genes using a gene ID or description or obtain differentially expressed genes by entering different comparison parameters. Search results can be downloaded in a tabular format compatible with Microsoft excel application. Aligned reads to individual genes and exon/intron structures are displayed using the genome browser, facilitating gene re-annotation by individual users. Conclusions The SGR database was developed to facilitate dissemination and data mining of extensive floral and fruit transcriptome data in the woodland strawberry. It enables users to mine the data in different ways to study different pathways or biological processes during reproductive development.",SGR,0.919245839,NA,0,SGR,0.919245839,1,NA,23730305,low_prob_best_name,do not remove,NA,do not merge,NA,NA,NA,12/23/2013 +23724943,http://www.sgwhc.org,"Advancing sex and gender competency in medicine: sex & gender women's health collaborative. Research conducted to date has deepened our understanding of sex and gender differences in the etiology, diagnosis, treatment, and outcomes for many conditions that affect both women and men. The Sex and Gender Women's Health Collaborative (SGWHC) is supported by the coordinated efforts of our founding partners: the American Medical Women's Association, the American College of Women's Health Physicians and Society for Women's Health Research to address the gaps in medical education with regard to sex and gender competency in the care of women. The SGWHC initiated and continues to build a novel digital resource library of sex and gender specific materials to be adopted and adapted into medical education and clinical practice, residing @ http://www.sgwhc.org. This article presents a case for the inclusion of sex and gender focused content into medical curricula and describes a means for students, faculty, and practitioners to access a centralized, interactive repository for these resources.",SGWHC,0.941945419,Sex and Gender Women's Health Collaborative,0.814800464,SGWHC,0.941945419,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/1/2013 +28361715,http://bal.ym.edu.tw/SheddomeDB,"SheddomeDB: the ectodomain shedding database for membrane-bound shed markers. Background A number of membrane-anchored proteins are known to be released from cell surface via ectodomain shedding. The cleavage and release of membrane proteins has been shown to modulate various cellular processes and disease pathologies. Numerous studies revealed that cell membrane molecules of diverse functional groups are subjected to proteolytic cleavage, and the released soluble form of proteins may modulate various signaling processes. Therefore, in addition to the secreted protein markers that undergo secretion through the secretory pathway, the shed membrane proteins may comprise an additional resource of noninvasive and accessible biomarkers. In this context, identifying the membrane-bound proteins that will be shed has become important in the discovery of clinically noninvasive biomarkers. Nevertheless, a data repository for biological and clinical researchers to review the shedding information, which is experimentally validated, for membrane-bound protein shed markers is still lacking. Results In this study, the database SheddomeDB was developed to integrate publicly available data of the shed membrane proteins. A comprehensive literature survey was performed to collect the membrane proteins that were verified to be cleaved or released in the supernatant by immunological-based validation experiments. From 436 studies on shedding, 401 validated shed membrane proteins were included, among which 199 shed membrane proteins have not been annotated or validated yet by existing cleavage databases. SheddomeDB attempted to provide a comprehensive shedding report, including the regulation of shedding machinery and the related function or diseases involved in the shedding events. In addition, our published tool ShedP was embedded into SheddomeDB to support researchers for predicting the shedding event on unknown or unrecorded membrane proteins. Conclusions To the best of our knowledge, SheddomeDB is the first database for the identification of experimentally validated shed membrane proteins and currently may provide the most number of membrane proteins for reviewing the shedding information. The database included membrane-bound shed markers associated with numerous cellular processes and diseases, and some of these markers are potential novel markers because they are not annotated or validated yet in other databases. SheddomeDB may provide a useful resource for discovering membrane-bound shed markers. The interactive web of SheddomeDB is publicly available at http://bal.ym.edu.tw/SheddomeDB/ .",SheddomeDB,0.996616364,NA,0,SheddomeDB,0.996616364,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/14/2017 +24952385,http://shrimpgpat.sc.mahidol.ac.th,"ShrimpGPAT: a gene and protein annotation tool for knowledge sharing and gene discovery in shrimp. Background Although captured and cultivated marine shrimp constitute highly important seafood in terms of both economic value and production quantity, biologists have little knowledge of the shrimp genome and this partly hinders their ability to improve shrimp aquaculture. To help improve this situation, the Shrimp Gene and Protein Annotation Tool (ShrimpGPAT) was conceived as a community-based annotation platform for the acquisition and updating of full-length complementary DNAs (cDNAs), Expressed Sequence Tags (ESTs), transcript contigs and protein sequences of penaeid shrimp and their decapod relatives and for in-silico functional annotation and sequence analysis. Description ShrimpGPAT currently holds quality-filtered, molecular sequences of 14 decapod species (~500,000 records for six penaeid shrimp and eight other decapods). The database predominantly comprises transcript sequences derived by both traditional EST Sanger sequencing and more recently by massive-parallel sequencing technologies. The analysis pipeline provides putative functions in terms of sequence homologs, gene ontologies and protein-protein interactions. Data retrieval can be conducted easily either by a keyword text search or by a sequence query via BLAST, and users can save records of interest for later investigation using tools such as multiple sequence alignment and BLAST searches against pre-defined databases. In addition, ShrimpGPAT provides space for community insights by allowing functional annotation with tags and comments on sequences. Community-contributed information will allow for continuous database enrichment, for improvement of functions and for other aspects of sequence analysis. Conclusions ShrimpGPAT is a new, free and easily accessed service for the shrimp research community that provides a comprehensive and up-to-date database of quality-filtered decapod gene and protein sequences together with putative functional prediction and sequence analysis tools. An important feature is its community-based functional annotation capability that allows the research community to contribute knowledge and insights about the properties of molecular sequences for better, shared, functional characterization of shrimp genes. Regularly updated and expanded with data on more decapods, ShrimpGPAT is publicly available at http://shrimpgpat.sc.mahidol.ac.th/.",ShrimpGPAT,0.997447491,Shrimp Gene and Protein Annotation Tool,0.836638801,ShrimpGPAT,0.997447491,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/21/2014 +27297221,http://forge.info.univ-angers.fr,"sHSPdb: a database for the analysis of small Heat Shock Proteins. Background small Heat Shock Proteins (sHSP) is a wide proteins family. SHSP are found in all kingdoms and they play critical roles in plant stress tolerance mechanisms (as well as in pathogenic microorganisms and are implicated in human diseases). Results sHSPdb (small Heat Shock Proteins database) is an integrated resource containing non-redundant, full-length and curated sequences of sHSP, classified on the basis of amino acids motifs and physico-chemical properties. sHSPdb gathers data about sHSP defined by various databases (Uniprot, PFAM, CDD, InterPro). It provides a browser interface for retrieving information from the whole database and a search interface using various criteria for retrieving a refined subset of entries. Physicochemical properties, amino acid composition and combinations are calculated for each entry. sHSPdb provides automatic statistical analysis of all sHSP properties. Among various possibilities, sHSPdb allows BLAST searches, alignment of selected sequences and submission of sequences. Conclusions sHSPdb is a new database containing information about sHSP from all kingdoms. sHSPdb provides a classification of sHSP, as well as tools and data for the analysis of the structure - function relationships of sHSP. Data are mainly related to various physico-chemical properties of the amino acids sequences of sHSP. sHSPdb is accessible at http://forge.info.univ-angers.fr/~gh/Shspdb/index.php .",sHSPdb,0.981707454,Heat Shock Proteins database,0.912849665,sHSPdb,0.981707454,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/13/2016 +24146757,http://mlg.hit.edu.cn/SIDD,"SIDD: a semantically integrated database towards a global view of human disease. Background A number of databases have been developed to collect disease-related molecular, phenotypic and environmental features (DR-MPEs), such as genes, non-coding RNAs, genetic variations, drugs, phenotypes and environmental factors. However, each of current databases focused on only one or two DR-MPEs. There is an urgent demand to develop an integrated database, which can establish semantic associations among disease-related databases and link them to provide a global view of human disease at the biological level. This database, once developed, will facilitate researchers to query various DR-MPEs through disease, and investigate disease mechanisms from different types of data. Methodology To establish an integrated disease-associated database, disease vocabularies used in different databases are mapped to Disease Ontology (DO) through semantic match. 4,284 and 4,186 disease terms from Medical Subject Headings (MeSH) and Online Mendelian Inheritance in Man (OMIM) respectively are mapped to DO. Then, the relationships between DR-MPEs and diseases are extracted and merged from different source databases for reducing the data redundancy. Conclusions A semantically integrated disease-associated database (SIDD) is developed, which integrates 18 disease-associated databases, for researchers to browse multiple types of DR-MPEs in a view. A web interface allows easy navigation for querying information through browsing a disease ontology tree or searching a disease term. Furthermore, a network visualization tool using Cytoscape Web plugin has been implemented in SIDD. It enhances the SIDD usage when viewing the relationships between diseases and DR-MPEs. The current version of SIDD (Jul 2013) documents 4,465,131 entries relating to 139,365 DR-MPEs, and to 3,824 human diseases. The database can be freely accessed from: http://mlg.hit.edu.cn/SIDD.",SIDD,0.992121279,NA,0,SIDD,0.992121279,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/11/2013 +23203869,http://pdbe.org/sifts,"SIFTS: Structure Integration with Function, Taxonomy and Sequences resource. The Structure Integration with Function, Taxonomy and Sequences resource (SIFTS; http://pdbe.org/sifts) is a close collaboration between the Protein Data Bank in Europe (PDBe) and UniProt. The two teams have developed a semi-automated process for maintaining up-to-date cross-reference information to UniProt entries, for all protein chains in the PDB entries present in the UniProt database. This process is carried out for every weekly PDB release and the information is stored in the SIFTS database. The SIFTS process includes cross-references to other biological resources such as Pfam, SCOP, CATH, GO, InterPro and the NCBI taxonomy database. The information is exported in XML format, one file for each PDB entry, and is made available by FTP. Many bioinformatics resources use SIFTS data to obtain cross-references between the PDB and other biological databases so as to provide their users with up-to-date information.",SIFTS,0.997229233,Structure Integration with Function,0.943572327,SIFTS,0.997229233,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +22784567,http://sysbio.kribb.re.kr/sigcs,"SigCS base: an integrated genetic information resource for human cerebral stroke. Background To understand how stroke risk factors mechanistically contribute to stroke, the genetic components regulating each risk factor need to be integrated and evaluated with respect to biological function and through pathway-based algorithms. This resource will provide information to researchers studying the molecular and genetic causes of stroke in terms of genomic variants, genes, and pathways. Methods Reported genetic variants, gene structure, phenotypes, and literature information regarding stroke were collected and extracted from publicly available databases describing variants, genome, proteome, functional annotation, and disease subtypes. Stroke related candidate pathways and etiologic genes that participate significantly in risk were analyzed in terms of canonical pathways in public biological pathway databases. These efforts resulted in a relational database of genetic signals of cerebral stroke, SigCS base, which implements an effective web retrieval system. Results The current version of SigCS base documents 1943 non-redundant genes with 11472 genetic variants and 165 non-redundant pathways. The web retrieval system of SigCS base consists of two principal search flows, including: 1) a gene-based variant search using gene table browsing or a keyword search, and, 2) a pathway-based variant search using pathway table browsing. SigCS base is freely accessible at http://sysbio.kribb.re.kr/sigcs. Conclusions SigCS base is an effective tool that can assist researchers in the identification of the genetic factors associated with stroke by utilizing existing literature information, selecting candidate genes and variants for experimental studies, and examining the pathways that contribute to the pathophysiological mechanisms of stroke.",SigCS base,0.819766919,NA,0,SigCS base,0.819766919,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/14/2011 +26490957,http://bioinfo.imtech.res.in/manojk/sigmol,"SigMol: repertoire of quorum sensing signaling molecules in prokaryotes. Quorum sensing is a widespread phenomenon in prokaryotes that helps them to communicate among themselves and with eukaryotes. It is driven through quorum sensing signaling molecules (QSSMs) in a density dependent manner that assists in numerous biological functions like biofilm formation, virulence factors secretion, swarming motility, bioluminescence, etc. Despite immense implications, dedicated resources of QSSMs are lacking. Therefore, we have developed SigMol (http://bioinfo.imtech.res.in/manojk/sigmol), a specialized repository of these molecules in prokaryotes. SigMol harbors information on QSSMs pertaining to different quorum sensing signaling systems namely acylated homoserine lactones (AHLs), diketopiperazines (DKPs), 4-hydroxy-2-alkylquinolines (HAQs), diffusible signal factors (DSFs), autoinducer-2 (AI-2) and others. Database contains 1382: entries of 182: unique signaling molecules from 215: organisms. It encompasses biological as well as chemical aspects of signaling molecules. Biological information includes genes, preliminary bioassays, identification assays and applications, while chemical detail comprises of IUPAC name, SMILES and structure. We have provided user-friendly browsing and searching facilities for easy data retrieval and comparison. We have gleaned information of diverse QSSMs reported in literature at a single platform 'SigMol'. This comprehensive resource will assist the scientific community in understanding intraspecies, interspecies or interkingdom networking and further help to unfold different facets of quorum sensing and related therapeutics.",SigMol,0.986595631,NA,0,SigMol,0.986595631,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/20/2015 +27097230,http://signafish.org,"SignaFish: A Zebrafish-Specific Signaling Pathway Resource. Understanding living systems requires an in-depth knowledge of the signaling networks that drive cellular homeostasis, regulate intercellular communication, and contribute to cell fates during development. Several resources exist to provide high-throughput data sets or manually curated interaction information from human or invertebrate model organisms. We previously developed SignaLink, a uniformly curated, multi-layered signaling resource containing information for human and for the model organisms nematode Caenorhabditis elegans and fruit fly Drosophila melanogaster. Until now, the use of the SignaLink database for zebrafish pathway analysis was limited. To overcome this limitation, we created SignaFish ( http://signafish.org ), a fish-specific signaling resource, built using the concept of SignaLink. SignaFish contains more than 200 curation-based signaling interactions, 132 further interactions listed in other resources, and it also lists potential miRNA-based regulatory connections for seven major signaling pathways. From the SignaFish website, users can reach other web resources, such as ZFIN. SignaFish provides signaling or signaling-related interactions that can be examined for each gene or downloaded for each signaling pathway. We believe that the SignaFish resource will serve as a novel navigating point for experimental design and evaluation for the zebrafish community and for researchers focusing on nonmodel fish species, such as cyclids.",SignaFish,0.99351877,Pathway,0.51736623,SignaFish,0.99351877,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/20/2016 +23715991,http://signalink.org,"Uniform curation protocol of metazoan signaling pathways to predict novel signaling components. A relatively large number of signaling databases available today have strongly contributed to our understanding of signaling pathway properties. However, pathway comparisons both within and across databases are currently severely hampered by the large variety of data sources and the different levels of detail of their information content (on proteins and interactions). In this chapter, we present a protocol for a uniform curation method of signaling pathways, which intends to overcome this insufficiency. This uniformly curated database called SignaLink ( http://signalink.org ) allows us to systematically transfer pathway annotations between different species, based on orthology, and thereby to predict novel signaling pathway components. Thus, this method enables the compilation of a comprehensive signaling map of a given species and identification of new potential drug targets in humans. We strongly believe that the strict curation protocol we have established to compile a signaling pathway database can also be applied for the compilation of other (e.g., metabolic) databases. Similarly, the detailed guide to the orthology-based prediction of novel signaling components across species may also be utilized for predicting components of other biological processes.",SignaLink,0.985477149,NA,0,SignaLink,0.985477149,1,NA,23331499,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2013 +23331499,http://SignaLink.org,"SignaLink 2 - a signaling pathway resource with multi-layered regulatory networks. Background Signaling networks in eukaryotes are made up of upstream and downstream subnetworks. The upstream subnetwork contains the intertwined network of signaling pathways, while the downstream regulatory part contains transcription factors and their binding sites on the DNA as well as microRNAs and their mRNA targets. Currently, most signaling and regulatory databases contain only a subsection of this network, making comprehensive analyses highly time-consuming and dependent on specific data handling expertise. The need for detailed mapping of signaling systems is also supported by the fact that several drug development failures were caused by undiscovered cross-talk or regulatory effects of drug targets. We previously created a uniformly curated signaling pathway resource, SignaLink, to facilitate the analysis of pathway cross-talks. Here, we present SignaLink 2, which significantly extends the coverage and applications of its predecessor. Description We developed a novel concept to integrate and utilize different subsections (i.e., layers) of the signaling network. The multi-layered (onion-like) database structure is made up of signaling pathways, their pathway regulators (e.g., scaffold and endocytotic proteins) and modifier enzymes (e.g., phosphatases, ubiquitin ligases), as well as transcriptional and post-transcriptional regulators of all of these components. The user-friendly website allows the interactive exploration of how each signaling protein is regulated. The customizable download page enables the analysis of any user-specified part of the signaling network. Compared to other signaling resources, distinctive features of SignaLink 2 are the following: 1) it involves experimental data not only from humans but from two invertebrate model organisms, C. elegans and D. melanogaster; 2) combines manual curation with large-scale datasets; 3) provides confidence scores for each interaction; 4) operates a customizable download page with multiple file formats (e.g., BioPAX, Cytoscape, SBML). Non-profit users can access SignaLink 2 free of charge at http://SignaLink.org. Conclusions With SignaLink 2 as a single resource, users can effectively analyze signaling pathways, scaffold proteins, modifier enzymes, transcription factors and miRNAs that are important in the regulation of signaling processes. This integrated resource allows the systems-level examination of how cross-talks and signaling flow are regulated, as well as provide data for cross-species comparisons and drug discovery analyses.",SignaLink,0.975684285,NA,0,SignaLink,0.975684285,1,NA,23715991,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/18/2013 +"26467481, 28654729, 31665520",http://signor.uniroma2.it,"SIGNOR: a database of causal relationships between biological entities. Assembly of large biochemical networks can be achieved by confronting new cell-specific experimental data with an interaction subspace constrained by prior literature evidence. The SIGnaling Network Open Resource, SIGNOR (available on line at http://signor.uniroma2.it), was developed to support such a strategy by providing a scaffold of prior experimental evidence of causal relationships between biological entities. The core of SIGNOR is a collection of approximately 12,000 manually-annotated causal relationships between over 2800 human proteins participating in signal transduction. Other entities annotated in SIGNOR are complexes, chemicals, phenotypes and stimuli. The information captured in SIGNOR can be represented as a signed directed graph illustrating the activation/inactivation relationships between signalling entities. Each entry is associated to the post-translational modifications that cause the activation/inactivation of the target proteins. More than 4900 modified residues causing a change in protein concentration or activity have been curated and linked to the modifying enzymes (about 351 human kinases and 94 phosphatases). Additional modifications such as ubiquitinations, sumoylations, acetylations and their effect on the modified target proteins are also annotated. This wealth of structured information can support experimental approaches based on multi-parametric analysis of cell systems after physiological or pathological perturbations and to assemble large logic models.",SIGNOR,0.967104197,SIGnaling Network Open Resource,0.931980938,SIGNOR,0.967104197,3,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +33045745,http://health.tsinghua.edu.cn/silencerdb,"SilencerDB: a comprehensive database of silencers. Gene regulatory elements, including promoters, enhancers, silencers, etc., control transcriptional programs in a spatiotemporal manner. Though these elements are known to be able to induce either positive or negative transcriptional control, the community has been mostly studying enhancers which amplify transcription initiation, with less emphasis given to silencers which repress gene expression. To facilitate the study of silencers and the investigation of their potential roles in transcriptional control, we developed SilencerDB (http://health.tsinghua.edu.cn/silencerdb/), a comprehensive database of silencers by manually curating silencers from 2300 published articles. The current version, SilencerDB 1.0, contains (1) 33 060 validated silencers from experimental methods, and (ii) 5 045 547 predicted silencers from state-of-the-art machine learning methods. The functionality of SilencerDB includes (a) standardized categorization of silencers in a tree-structured class hierarchy based on species, organ, tissue and cell line and (b) comprehensive annotations of silencers with the nearest gene and potential regulatory genes. SilencerDB, to the best of our knowledge, is the first comprehensive database at this scale dedicated to silencers, with reliable annotations and user-friendly interactive database features. We believe this database has the potential to enable advanced understanding of silencers in regulatory mechanisms and to empower researchers to devise diverse applications of silencers in disease development.",SilencerDB,0.997530401,NA,0,SilencerDB,0.997530401,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +31642484,http://silkdb.bioinfotoolkits.net,"SilkDB 3.0: visualizing and exploring multiple levels of data for silkworm. SilkDB is an open-accessibility database and powerful platform that provides comprehensive information on the silkworm (Bombyx mori) genome. Since SilkDB 2.0 was released 10 years ago, vast quantities of data about multiple aspects of the silkworm have been generated, including genome, transcriptome, Hi-C and pangenome. To visualize data at these different biological levels, we present SilkDB 3.0 (https://silkdb.bioinfotoolkits.net), a visual analytic tool for exploring silkworm data through an interactive user interface. The database contains a high-quality chromosome-level assembly of the silkworm genome, and its coding sequences and gene sets are more accurate than those in the previous version. SilkDB 3.0 provides a view of the information for each gene at the levels of sequence, protein structure, gene family, orthology, synteny, genome organization and gives access to gene expression information, genetic variation and genome interaction map. A set of visualization tools are available to display the abundant information in the above datasets. With an improved interactive user interface for the integration of large data sets, the updated SilkDB 3.0 database will be a valuable resource for the silkworm and insect research community.",SilkDB,0.996998668,NA,0,SilkDB,0.996998668,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +28365723,http://silkpathdb.swu.edu.cn,"SilkPathDB: a comprehensive resource for the study of silkworm pathogens. . Silkworm pathogens have been heavily impeding the development of sericultural industry and play important roles in lepidopteran ecology, and some of which are used as biological insecticides. Rapid advances in studies on the omics of silkworm pathogens have produced a large amount of data, which need to be brought together centrally in a coherent and systematic manner. This will facilitate the reuse of these data for further analysis. We have collected genomic data for 86 silkworm pathogens from 4 taxa (fungi, microsporidia, bacteria and viruses) and from 4 lepidopteran hosts, and developed the open-access Silkworm Pathogen Database (SilkPathDB) to make this information readily available. The implementation of SilkPathDB involves integrating Drupal and GBrowse as a graphic interface for a Chado relational database which houses all of the datasets involved. The genomes have been assembled and annotated for comparative purposes and allow the search and analysis of homologous sequences, transposable elements, protein subcellular locations, including secreted proteins, and gene ontology. We believe that the SilkPathDB will aid researchers in the identification of silkworm parasites, understanding the mechanisms of silkworm infections, and the developmental ecology of silkworm parasites (gene expression) and their hosts. http://silkpathdb.swu.edu.cn.",SilkPathDB,0.989426136,Pathogen,0.592897296,SilkPathDB,0.989426136,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +"23193283, 24293649",http://www.arb-silva.de,"The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. SILVA (from Latin silva, forest, http://www.arb-silva.de) is a comprehensive web resource for up to date, quality-controlled databases of aligned ribosomal RNA (rRNA) gene sequences from the Bacteria, Archaea and Eukaryota domains and supplementary online services. The referred database release 111 (July 2012) contains 3 194 778 small subunit and 288 717 large subunit rRNA gene sequences. Since the initial description of the project, substantial new features have been introduced, including advanced quality control procedures, an improved rRNA gene aligner, online tools for probe and primer evaluation and optimized browsing, searching and downloading on the website. Furthermore, the extensively curated SILVA taxonomy and the new non-redundant SILVA datasets provide an ideal reference for high-throughput classification of data from next-generation sequencing approaches.",SILVA,0.996267796,NA,0,SILVA,0.996267796,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2013 +24165881,http://mips.gsf.de/simap,"SIMAP--the database of all-against-all protein sequence similarities and annotations with new interfaces and increased coverage. The Similarity Matrix of Proteins (SIMAP, http://mips.gsf.de/simap/) database has been designed to massively accelerate computationally expensive protein sequence analysis tasks in bioinformatics. It provides pre-calculated sequence similarities interconnecting the entire known protein sequence universe, complemented by pre-calculated protein features and domains, similarity clusters and functional annotations. SIMAP covers all major public protein databases as well as many consistently re-annotated metagenomes from different repositories. As of September 2013, SIMAP contains >163 million proteins corresponding to √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº70 million non-redundant sequences. SIMAP uses the sensitive FASTA search heuristics, the Smith-Waterman alignment algorithm, the InterPro database of protein domain models and the BLAST2GO functional annotation algorithm. SIMAP assists biologists by facilitating the interactive exploration of the protein sequence universe. Web-Service and DAS interfaces allow connecting SIMAP with any other bioinformatic tool and resource. All-against-all protein sequence similarity matrices of project-specific protein collections are generated on request. Recent improvements allow SIMAP to cover the rapidly growing sequenced protein sequence universe. New Web-Service interfaces enhance the connectivity of SIMAP. Novel tools for interactive extraction of protein similarity networks have been added. Open access to SIMAP is provided through the web portal; the portal also contains instructions and links for software access and flat file downloads.",SIMAP,0.967703819,Similarity Matrix of Proteins,0.952235826,SIMAP,0.967703819,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/27/2013 +22080561,http://www.GABI-Kat.de,"GABI-Kat SimpleSearch: new features of the Arabidopsis thaliana T-DNA mutant database. T-DNA insertion mutants are very valuable for reverse genetics in Arabidopsis thaliana. Several projects have generated large sequence-indexed collections of T-DNA insertion lines, of which GABI-Kat is the second largest resource worldwide. User access to the collection and its Flanking Sequence Tags (FSTs) is provided by the front end SimpleSearch (http://www.GABI-Kat.de). Several significant improvements have been implemented recently. The database now relies on the TAIRv10 genome sequence and annotation dataset. All FSTs have been newly mapped using an optimized procedure that leads to improved accuracy of insertion site predictions. A fraction of the collection with weak FST yield was re-analysed by generating new FSTs. Along with newly found predictions for older sequences about 20,000 new FSTs were included in the database. Information about groups of FSTs pointing to the same insertion site that is found in several lines but is real only in a single line are included, and many problematic FST-to-line links have been corrected using new wet-lab data. SimpleSearch currently contains data from ~71,000 lines with predicted insertions covering 62.5% of the 27,206 nuclear protein coding genes, and offers insertion allele-specific data from 9545 confirmed lines that are available from the Nottingham Arabidopsis Stock Centre.",SimpleSearch,0.99120003,NA,0,SimpleSearch,0.99120003,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/12/2011 +30721533,http://fornerislab.unipv.it/SiMPLOD,"SiMPLOD, a Structure-Integrated Database of Collagen Lysyl Hydroxylase (LH/PLOD) Enzyme Variants. PLOD genes encode for procollagen lysyl hydroxylase enzymes (LH/PLOD), a family of proteins essential for collagen biosynthesis. Several mutations affect these genes, causing severe disorders, such as Ehlers-Danlos and Bruck syndrome, as well a connective tissue disease with phenotype resembling osteogenesis imperfecta caused by lack of LH3 functions. The recently determined three-dimensional (3D) structures of the full-length human LH3/PLOD3 isoform, together with the structure of a fragment of a viral LH/PLOD homolog, are now allowing molecular mapping of the numerous disease-causing mutations, providing insights often suitable for the interpretation of the resulting disease phenotypes. However, the added value of molecular structure interpretation is affected by the limited accessibility of complex molecular data to scientific communities lacking direct expertise in structural biology. In this work, we present a Structurally-integrated database for Mutations of PLOD genes (SiMPLOD), a publicly-available manually-curated online database with an embedded molecular viewer interface for the visualization and interpretation of LH/PLOD mutations on available molecular models. Each SiMPLOD entry is accompanied by manual annotations extrapolated from literature references and comments about the localization of the amino acid variants on the molecular structure. Additional links to the appropriate online resources for clinically-relevant as well as biochemical data are also provided in a standardized format. The web application is available at http://fornerislab.unipv.it/SiMPLOD. √ɬÉ√ǬÇ√ɬÇ√Ǭ© 2019 American Society for Bone and Mineral Research.",SiMPLOD,0.995890737,Structurally-integrated database for Mutations of PLOD genes,0.912969387,SiMPLOD,0.995890737,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/12/2019 +25480115,http://ocri-genomics.org/Sinbase,"Sinbase: an integrated database to study genomics, genetics and comparative genomics in Sesamum indicum. Sesame (Sesamum indicum L.) is an ancient and important oilseed crop grown widely in tropical and subtropical areas. It belongs to the gigantic order Lamiales, which includes many well-known or economically important species, such as olive (Olea europaea), leonurus (Leonurus japonicus) and lavender (Lavandula spica), many of which have important pharmacological properties. Despite their importance, genetic and genomic analyses on these species have been insufficient due to a lack of reference genome information. The now available S. indicum genome will provide an unprecedented opportunity for studying both S. indicum genetic traits and comparative genomics. To deliver S. indicum genomic information to the worldwide research community, we designed Sinbase, a web-based database with comprehensive sesame genomic, genetic and comparative genomic information. Sinbase includes sequences of assembled sesame pseudomolecular chromosomes, protein-coding genes (27,148), transposable elements (372,167) and non-coding RNAs (1,748). In particular, Sinbase provides unique and valuable information on colinear regions with various plant genomes, including Arabidopsis thaliana, Glycine max, Vitis vinifera and Solanum lycopersicum. Sinbase also provides a useful search function and data mining tools, including a keyword search and local BLAST service. Sinbase will be updated regularly with new features, improvements to genome annotation and new genomic sequences, and is freely accessible at http://ocri-genomics.org/Sinbase/.",Sinbase,0.989667535,NA,0,Sinbase,0.989667535,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/4/2014 +23203982,http://sines.eimb.ru,"SINEBase: a database and tool for SINE analysis. SINEBase (http://sines.eimb.ru) integrates the revisited body of knowledge about short interspersed elements (SINEs). A set of formal definitions concerning SINEs was introduced. All available sequence data were screened through these definitions and the genetic elements misidentified as SINEs were discarded. As a result, 175 SINE families have been recognized in animals, flowering plants and green algae. These families were classified by the modular structure of their nucleotide sequences and the frequencies of different patterns were evaluated. These data formed the basis for the database of SINEs. The SINEBase website can be used in two ways: first, to explore the database of SINE families, and second, to analyse candidate SINE sequences using specifically developed tools. This article presents an overview of the database and the process of SINE identification and analysis.",SINEBase,0.997263968,NA,0,SINEBase,0.997263968,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/30/2012 +33507271,http://v2.sinex.cl,"SinEx DB 2.0 update 2020: database for eukaryotic single-exon coding sequences. . Single-exon coding sequences (CDSs), also known as 'single-exon genes' (SEGs), are defined as nuclear, protein-coding genes that lack introns in their CDSs. They have been studied not only to determine their origin and evolution but also because their expression has been linked to several types of human cancers and neurological/developmental disorders, and many exhibit tissue-specific transcription. We developed SinEx DB that houses DNA and protein sequence information of SEGs from 10 mammalian genomes including human. SinEx DB includes their functional predictions (KOG (euKaryotic Orthologous Groups)) and the relative distribution of these functions within species. Here, we report SinEx 2.0, a major update of SinEx DB that includes information of the occurrence, distribution and functional prediction of SEGs from 60 completely sequenced eukaryotic genomes, representing animals, fungi, protists and plants. The information is stored in a relational database built with MySQL Server 5.7, and the complete dataset of SEG sequences and their GO (Gene Ontology) functional assignations are available for downloading. SinEx DB 2.0 was built with a novel pipeline that helps disambiguate single-exon isoforms from SEGs. SinEx DB 2.0 is the largest available database for SEGs and provides a rich source of information for advancing our understanding of the evolution, function of SEGs and their associations with disorders including cancers and neurological and developmental diseases. Database URL: http://v2.sinex.cl/.",SinEx,0.876471639,NA,0,SinEx,0.876471639,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2021 +26818131,http://crdd.osdd.net/servers/sirnamod,"siRNAmod: A database of experimentally validated chemically modified siRNAs. Small interfering RNA (siRNA) technology has vast potential for functional genomics and development of therapeutics. However, it faces many obstacles predominantly instability of siRNAs due to nuclease digestion and subsequently biologically short half-life. Chemical modifications in siRNAs provide means to overcome these shortcomings and improve their stability and potency. Despite enormous utility bioinformatics resource of these chemically modified siRNAs (cm-siRNAs) is lacking. Therefore, we have developed siRNAmod, a specialized databank for chemically modified siRNAs. Currently, our repository contains a total of 4894 chemically modified-siRNA sequences, comprising 128 unique chemical modifications on different positions with various permutations and combinations. It incorporates important information on siRNA sequence, chemical modification, their number and respective position, structure, simplified molecular input line entry system canonical (SMILES), efficacy of modified siRNA, target gene, cell line, experimental methods, reference etc. It is developed and hosted using Linux Apache MySQL PHP (LAMP) software bundle. Standard user-friendly browse, search facility and analysis tools are also integrated. It would assist in understanding the effect of chemical modifications and further development of stable and efficacious siRNAs for research as well as therapeutics. siRNAmod is freely available at: http://crdd.osdd.net/servers/sirnamod.",siRNAmod,0.993242323,NA,0,siRNAmod,0.993242323,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/28/2016 +34014674,http://sistematx.ufpb.br,"The SistematX Web Portal of Natural Products: An Update. Natural products and their secondary metabolites are promising starting points for the development of drug prototypes and new drugs, as many current treatments for numerous diseases are directly or indirectly related to such compounds. State-of-the-art, curated, integrated, and frequently updated databases of secondary metabolites are thus highly relevant to drug discovery. The SistematX Web Portal, introduced in 2018, is undergoing development to address this need and documents crucial information about plant secondary metabolites, including the exact location of the species from which the compounds were isolated. SistematX also allows registered users to log in to the data management area and gain access to administrative pages. This study reports recent updates and modifications to the SistematX Web Portal, including a batch download option, the generation and visualization of 1H and 13C nuclear magnetic resonance spectra, and the calculation of physicochemical (drug-like and lead-like) properties and biological activity profiles. The SistematX Web Portal is freely available at http://sistematx.ufpb.br.",SistematX,0.941165566,NA,0,SistematX,0.941165566,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/20/2021 +26800248,http://lfz.corefacility.ca/sistr-app,"The Salmonella In Silico Typing Resource (SISTR): An Open Web-Accessible Tool for Rapidly Typing and Subtyping Draft Salmonella Genome Assemblies. For nearly 100 years serotyping has been the gold standard for the identification of Salmonella serovars. Despite the increasing adoption of DNA-based subtyping approaches, serotype information remains a cornerstone in food safety and public health activities aimed at reducing the burden of salmonellosis. At the same time, recent advances in whole-genome sequencing (WGS) promise to revolutionize our ability to perform advanced pathogen characterization in support of improved source attribution and outbreak analysis. We present the Salmonella In Silico Typing Resource (SISTR), a bioinformatics platform for rapidly performing simultaneous in silico analyses for several leading subtyping methods on draft Salmonella genome assemblies. In addition to performing serovar prediction by genoserotyping, this resource integrates sequence-based typing analyses for: Multi-Locus Sequence Typing (MLST), ribosomal MLST (rMLST), and core genome MLST (cgMLST). We show how phylogenetic context from cgMLST analysis can supplement the genoserotyping analysis and increase the accuracy of in silico serovar prediction to over 94.6% on a dataset comprised of 4,188 finished genomes and WGS draft assemblies. In addition to allowing analysis of user-uploaded whole-genome assemblies, the SISTR platform incorporates a database comprising over 4,000 publicly available genomes, allowing users to place their isolates in a broader phylogenetic and epidemiological context. The resource incorporates several metadata driven visualizations to examine the phylogenetic, geospatial and temporal distribution of genome-sequenced isolates. As sequencing of Salmonella isolates at public health laboratories around the world becomes increasingly common, rapid in silico analysis of minimally processed draft genome assemblies provides a powerful approach for molecular epidemiology in support of public health investigations. Moreover, this type of integrated analysis using multiple sequence-based methods of sub-typing allows for continuity with historical serotyping data as we transition towards the increasing adoption of genomic analyses in epidemiology. The SISTR platform is freely available on the web at https://lfz.corefacility.ca/sistr-app/.",SISTR,0.992295837,Salmonella In Silico Typing Resource,0.952685045,SISTR,0.992295837,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/22/2016 +"22139920, 28110602",http://www-bionet.sscc.ru/sitex,"SitEx: a computer system for analysis of projections of protein functional sites on eukaryotic genes. Search of interrelationships between the structural-functional protein organization and exon structure of encoding gene provides insights into issues concerned with the function, origin and evolution of genes and proteins. The functions of proteins and their domains are defined mostly by functional sites. The relation of the exon-intron structure of the gene to the protein functional sites has been little studied. Development of resources containing data on projections of protein functional sites on eukaryotic genes is needed. We have developed SitEx, a database that contains information on functional site amino acid positions in the exon structure of encoding gene. SitEx is integrated with the BLAST and 3DExonScan programs. BLAST is used for searching sequence similarity between the query protein and polypeptides encoded by single exons stored in SitEx. The 3DExonScan program is used for searching for structural similarity of the given protein with these polypeptides using superimpositions. The developed computer system allows users to analyze the coding features of functional sites by taking into account the exon structure of the gene, to detect the exons involved in shuffling in protein evolution, also to design protein-engineering experiments. SitEx is accessible at http://www-bionet.sscc.ru/sitex/. Currently, it contains information about 9994 functional sites presented in 2021 proteins described in proteomes of 17 organisms.",SitEx,0.971261322,NA,0,SitEx,0.971261322,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/23/2017 +30593925,http://www.pasteur-guadeloupe.fr:8081/SITVIT2,"Macro-geographical specificities of the prevailing tuberculosis epidemic as seen through SITVIT2, an updated version of the Mycobacterium tuberculosis genotyping database. In order to provide a global overview of genotypic, epidemiologic, demographic, phylogeographical, and drug resistance characteristics related to the prevailing tuberculosis (TB) epidemic, we hereby report an update of the 6th version of the international genotyping database SITVIT2. We also make all the available information accessible through a dedicated website (available at http://www.pasteur-guadeloupe.fr:8081/SITVIT2). Thanks to the public release of SITVIT2 which is currently the largest international multimarker genotyping database with a compilation of 111,635 clinical isolates from 169 countries of patient origin (131 countries of isolation, representing 1032 cities), our major aim is to highlight macro- and micro-geographical cleavages and phylogeographical specificities of circulating Mycobacterium tuberculosis complex (MTBC) clones worldwide. For this purpose, we retained strains typed by the most commonly used PCR-based methodology for TB genotyping, i.e., spoligotyping based on the polymorphism of the direct repeat (DR) locus, 5-loci Exact Tandem Repeats (ETRs), and MIRU-VNTR minisatellites used in 12-, 15-, or 24-loci formats. We describe the SITVIT2 database and integrated online applications that permit to interrogate the database using easy drop-down menus to draw maps, graphics and tables versus a long list of parameters and variables available for individual clinical isolates (year and place of isolation, origin, sex, and age of patient, drug-resistance, etc.). Available tools further allow to generate phylogenetical snapshot of circulating strains as Lineage-specific WebLogos, as well as minimum spanning trees of their genotypes in conjunction with their geographical distribution, drug-resistance, demographic, and epidemiologic characteristics instantaneously; whereas online statistical analyses let a user to pinpoint phylogeographical specificities of circulating MTBC lineages and conclude on actual demographic trends. Available associated information on gender (n√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ=√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ18,944), age (n√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ=√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ16,968), drug resistance (n√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ=√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ19,606), and HIV serology (n√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ=√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ2673), allowed to draw some important conclusions on TB geo-epidemiology; e.g. a positive correlation exists between certain Mycobacterium tuberculosis lineages (such as CAS and Beijing) and drug resistance (p-value<.001), while other lineages (such as LAM, X, and BOV) are more frequently associated with HIV-positive serology (p-value<.001). Besides, availability of information on the year of isolation of strains (range 1759-2012), also allowed to make tentative correlations between drug resistance information and lineages - portraying probable evolution trends over time and space. To conclude, the present approach of geographical mapping of predominant clinical isolates of tubercle bacilli causing the bulk of the disease both at country and regional level in conjunction with epidemiologic and demographic characteristics allows to shed new light on TB geo-epidemiology in relation with the continued waves of peopling and human migration.",SITVIT2,0.996629322,NA,0,SITVIT2,0.996629322,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/26/2018 +22365971,http://www.pasteur-guadeloupe.fr:8081/SITVIT_ONLINE,"SITVITWEB--a publicly available international multimarker database for studying Mycobacterium tuberculosis genetic diversity and molecular epidemiology. Among various genotyping methods to study Mycobacterium tuberculosis complex (MTC) genotypic polymorphism, spoligotyping and mycobacterial interspersed repetitive units-variable number of DNA tandem repeats (MIRU-VNTRs) have recently gained international approval as robust, fast, and reproducible typing methods generating data in a portable format. Spoligotyping constituted the backbone of a publicly available database SpolDB4 released in 2006; nonetheless this method possesses a low discriminatory power when used alone and should be ideally used in conjunction with a second typing method such as MIRU-VNTRs for high-resolution epidemiological studies. We hereby describe a publicly available international database named SITVITWEB which incorporates such multimarker data allowing to have a global vision of MTC genetic diversity worldwide based on 62,582 clinical isolates corresponding to 153 countries of patient origin (105 countries of isolation). We report a total of 7105 spoligotype patterns (corresponding to 58,180 clinical isolates) - grouped into 2740 shared-types or spoligotype international types (SIT) containing 53,816 clinical isolates and 4364 orphan patterns. Interestingly, only 7% of the MTC isolates worldwide were orphans whereas more than half of SITed isolates (n=27,059) were restricted to only 24 most prevalent SITs. The database also contains a total of 2379 MIRU patterns (from 8161 clinical isolates) from 87 countries of patient origin (35 countries of isolation); these were grouped in 847 shared-types or MIRU international types (MIT) containing 6626 isolates and 1533 orphan patterns. Lastly, data on 5-locus exact tandem repeats (ETRs) were available on 4626 isolates from 59 countries of patient origin (22 countries of isolation); a total of 458 different VNTR patterns were observed - split into 245 shared-types or VNTR International Types (VIT) containing 4413 isolates) and 213 orphan patterns. Datamining of SITVITWEB further allowed to update rules defining MTC genotypic lineages as well to have a new insight into MTC population structure and worldwide distribution at country, sub-regional and continental levels. At evolutionary level, the data compiled may be useful to distinguish the occasional convergent evolution of genotypes versus specific evolution of sublineages essentially influenced by adaptation to the host. This database is publicly available at: http://www.pasteur-guadeloupe.fr:8081/SITVIT_ONLINE.",SITVITWEB,0.995786428,NA,0,SITVITWEB,0.995786428,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/17/2012 +25309735,http://skatebase.org,"SkateBase, an elasmobranch genome project and collection of molecular resources for chondrichthyan fishes. Chondrichthyan fishes are a diverse class of gnathostomes that provide a valuable perspective on fundamental characteristics shared by all jawed and limbed vertebrates. Studies of phylogeny, species diversity, population structure, conservation, and physiology are accelerated by genomic, transcriptomic and protein sequence data. These data are widely available for many sarcopterygii (coelacanth, lungfish and tetrapods) and actinoptergii (ray-finned fish including teleosts) taxa, but limited for chondrichthyan fishes.√ɬÉ√ǬÇ√ɬÇ√Ǭ† In this study, we summarize available data for chondrichthyes and describe resources for one of the largest projects to characterize one of these fish, Leucoraja erinacea, the little skate.√ɬÉ√ǬÇ√ɬÇ√Ǭ† SkateBase ( http://skatebase.org) serves as the skate genome project portal linking data, research tools, and teaching resources.",SkateBase,0.982891858,NA,0,SkateBase,0.982891858,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/12/2014 +30481257,http://phenome.manchester.ac.uk,"SkeletalVis: an exploration and meta-analysis data portal of cross-species skeletal transcriptomics data. Motivation Skeletal diseases are prevalent in society, but improved molecular understanding is required to formulate new therapeutic strategies. Large and increasing quantities of available skeletal transcriptomics experiments give the potential for mechanistic insight of both fundamental skeletal biology and skeletal disease. However, no current repository provides access to processed, readily interpretable analysis of this data. To address this, we have developed SkeletalVis, an exploration portal for skeletal gene expression experiments. Results The SkeletalVis data portal provides an exploration and comparison platform for analysed skeletal transcriptomics data. It currently hosts 287 analysed experiments with 739 perturbation responses with comprehensive downstream analysis. We demonstrate its utility in identifying both known and novel relationships between skeletal expression signatures. SkeletalVis provides users with a platform to explore the wealth of available expression data, develop consensus signatures and the ability to compare gene signatures from new experiments to the analysed data to facilitate meta-analysis. Availability and implementation The SkeletalVis data portal is freely accessible at http://phenome.manchester.ac.uk. Supplementary information Supplementary data are available at Bioinformatics online.",SkeletalVis,0.980601847,NA,0,SkeletalVis,0.980601847,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2019 +27580923,http://101.200.211.232/skeletongenetics,"Skeleton Genetics: a comprehensive database for genes and mutations related to genetic skeletal disorders. . Genetic skeletal disorders (GSD) involving the skeletal system arises through disturbances in the complex processes of skeletal development, growth and homeostasis and remain a diagnostic challenge because of their clinical heterogeneity and genetic variety. Over the past decades, tremendous effort platforms have been made to explore the complex heterogeneity, and massive new genes and mutations have been identified in different GSD, but the information supplied by literature is still limited and it is hard to meet the further needs of scientists and clinicians. In this study, combined with Nosology and Classification of genetic skeletal disorders, we developed the first comprehensive and annotated genetic skeletal disorders database, named 'SkeletonGenetics', which contains information about all GSD-related knowledge including 8225 mutations in 357 genes, with detailed information associated with 481 clinical diseases (2260 clinical phenotype) classified in 42 groups defined by molecular, biochemical and/or radiographic criteria from 1698 publications. Further annotations were performed to each entry including Gene Ontology, pathways analysis, protein-protein interaction, mutation annotations, disease-disease clustering and gene-disease networking. Furthermore, using concise search methods, intuitive graphical displays, convenient browsing functions and constantly updatable features, 'SkeletonGenetics' could serve as a central and integrative database for unveiling the genetic and pathways pre-dispositions of GSD.Database URL: http://101.200.211.232/skeletongenetics/.",SkeletonGenetics,0.990358194,NA,0,SkeletonGenetics,0.990358194,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/31/2016 +22859501,http://life.bsc.es/pid/mutation_database,"SKEMPI: a Structural Kinetic and Energetic database of Mutant Protein Interactions and its use in empirical models. Motivation Empirical models for the prediction of how changes in sequence alter protein-protein binding kinetics and thermodynamics can garner insights into many aspects of molecular biology. However, such models require empirical training data and proper validation before they can be widely applied. Previous databases contained few stabilizing mutations and no discussion of their inherent biases or how this impacts model construction or validation. Results We present SKEMPI, a database of 3047 binding free energy changes upon mutation assembled from the scientific literature, for protein-protein heterodimeric complexes with experimentally determined structures. This represents over four times more data than previously collected. Changes in 713 association and dissociation rates and 127 enthalpies and entropies were also recorded. The existence of biases towards specific mutations, residues, interfaces, proteins and protein families is discussed in the context of how the data can be used to construct predictive models. Finally, a cross-validation scheme is presented which is capable of estimating the efficacy of derived models on future data in which these biases are not present. Availability The database is available online at http://life.bsc.es/pid/mutation_database/.",SKEMPI,0.998165965,NA,0,SKEMPI,0.998165965,1,NA,30020414,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,8/1/2012 +30020414,http://life.bsc.es/pid/skempi2,"SKEMPI 2.0: an updated benchmark of changes in protein-protein binding energy, kinetics and thermodynamics upon mutation. Motivation Understanding the relationship between the sequence, structure, binding energy, binding kinetics and binding thermodynamics of protein-protein interactions is crucial to understanding cellular signaling, the assembly and regulation of molecular complexes, the mechanisms through which mutations lead to disease, and protein engineering. Results We present SKEMPI 2.0, a major update to our database of binding free energy changes upon mutation for structurally resolved protein-protein interactions. This version now contains manually curated binding data for 7085 mutations, an increase of 133%, including changes in kinetics for 1844 mutations, enthalpy and entropy changes for 443 mutations, and 440 mutations, which abolish detectable binding. Availability and implementation The database is available as supplementary data and at https://life.bsc.es/pid/skempi2/. Supplementary information Supplementary data are available at Bioinformatics online.",SKEMPI,0.997190654,NA,0,SKEMPI,0.997190654,1,NA,22859501,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,2/1/2019 +28194231,http://cwtung.kmu.edu.tw/skinsensdb,"SkinSensDB: a curated database for skin sensitization assays. Skin sensitization is an important toxicological endpoint for chemical hazard determination and safety assessment. Prediction of chemical skin sensitizer had traditionally relied on data from rodent models. The development of the adverse outcome pathway (AOP) and associated alternative in vitro assays have reshaped the assessment of skin sensitizers. The integration of multiple assays as key events in the AOP has been shown to have improved prediction performance. Current computational models to predict skin sensitization mainly based on in vivo assays without incorporating alternative in vitro assays. However, there are few freely available databases integrating both the in vivo and the in vitro skin sensitization assays for development of AOP-based skin sensitization prediction models. To facilitate the development of AOP-based prediction models, a skin sensitization database named SkinSensDB has been constructed by curating data from published AOP-related assays. In addition to providing datasets for developing computational models, SkinSensDB is equipped with browsing and search tools which enable the assessment of new compounds for their skin sensitization potentials based on data from structurally similar compounds. SkinSensDB is publicly available at http://cwtung.kmu.edu.tw/skinsensdb.",SkinSensDB,0.995361865,NA,0,SkinSensDB,0.995361865,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/31/2017 +30165538,http://sunlab.cpy.cuhk.edu.hk/SKmDB,"SKmDB: an integrated database of next generation sequencing information in skeletal muscle. Motivation Skeletal muscles have indispensable functions and also possess prominent regenerative ability. The rapid emergence of Next Generation Sequencing (NGS) data in recent years offers us an unprecedented perspective to understand gene regulatory networks governing skeletal muscle development and regeneration. However, the data from public NGS database are often in raw data format or processed with different procedures, causing obstacles to make full use of them. Results We provide SKmDB, an integrated database of NGS information in skeletal muscle. SKmDB not only includes all NGS datasets available in the human and mouse skeletal muscle tissues and cells, but also provide preliminary data analyses including gene/isoform expression levels, gene co-expression subnetworks, as well as assembly of putative lincRNAs, typical and super enhancers and transcription factor hotspots. Users can efficiently search, browse and visualize the information with the well-designed user interface and server side. SKmDB thus will offer wet lab biologists useful information to study gene regulatory mechanisms in the field of skeletal muscle development and regeneration. Availability and implementation Freely available on the web at http://sunlab.cpy.cuhk.edu.hk/SKmDB. Supplementary information Supplementary data are available at Bioinformatics online.",SKmDB,0.996431947,NA,0,SKmDB,0.996431947,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2019 +31494246,http://sliceit.soic.iupui.edu,"SliceIt: A genome-wide resource and visualization tool to design CRISPR/Cas9 screens for editing protein-RNA interaction sites in the human genome. Several protein-RNA cross linking protocols have been established in recent years to delineate the molecular interaction of an RNA Binding Protein (RBP) and its target RNAs. However, functional dissection of the role of the RBP binding sites in modulating the post-transcriptional fate of the target RNA remains challenging. CRISPR/Cas9 genome editing system is being commonly employed to perturb both coding and noncoding regions in the genome. With the advancements in genome-scale CRISPR/Cas9 screens, it is now possible to not only perturb specific binding sites but also probe the global impact of protein-RNA interaction sites across cell types. Here, we present SliceIt (http://sliceit.soic.iupui.edu/), a database of in silico sgRNA (single guide RNA) library to facilitate conducting such high throughput screens. SliceIt comprises of ~4.8 million unique sgRNAs with an estimated range of 2-8 sgRNAs designed per RBP binding site, for eCLIP experiments of >100 RBPs in HepG2 and K562 cell lines from the ENCODE project. SliceIt provides a user friendly environment, developed using advanced search engine framework, Elasticsearch. It is available in both table and genome browser views facilitating the easy navigation of RBP binding sites, designed sgRNAs, exon expression levels across 53 human tissues along with prevalence of SNPs and GWAS hits on binding sites. Exon expression profiles enable examination of locus specific changes proximal to the binding sites. Users can also upload custom tracks of various file formats directly onto genome browser, to navigate additional genomic features in the genome and compare with other types of omics profiles. All the binding site-centric information is dynamically accessible via ""search by gene"", ""search by coordinates"" and ""search by RBP"" options and readily available to download. Validation of the sgRNA library in SliceIt was performed by selecting RBP binding sites in Lipt1 gene and designing sgRNAs. Effect of CRISPR/Cas9 perturbations on the selected binding sites in HepG2 cell line, was confirmed based on altered proximal exon expression levels using qPCR, further supporting the utility of the resource to design experiments for perturbing protein-RNA interaction networks. Thus, SliceIt provides a one-stop repertoire of guide RNA library to perturb RBP binding sites, along with several layers of functional information to design both low and high throughput CRISPR/Cas9 screens, for studying the phenotypes and diseases associated with RBP binding sites.",SliceIt,0.996555865,NA,0,SliceIt,0.996555865,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/5/2019 +27010673,http://zoulab.dalton.missouri.edu/SM-TF,"SM-TF: A structural database of small molecule-transcription factor complexes. Transcription factors (TFs) are the proteins involved in the transcription process, ensuring the correct expression of specific genes. Numerous diseases arise from the dysfunction of specific TFs. In fact, over 30 TFs have been identified as therapeutic targets of about 9% of the approved drugs. In this study, we created a structural database of small molecule-transcription factor (SM-TF) complexes, available online at http://zoulab.dalton.missouri.edu/SM-TF. The 3D structures of the co-bound small molecule and the corresponding binding sites on TFs are provided in the database, serving as a valuable resource to assist structure-based drug design related to TFs. Currently, the SM-TF database contains 934 entries covering 176 TFs from a variety of species. The database is further classified into several subsets by species and organisms. The entries in the SM-TF database are linked to the UniProt database and other sequence-based TF databases. Furthermore, the druggable TFs from human and the corresponding approved drugs are linked to the DrugBank. √ɬÉ√ǬÇ√ɬÇ√Ǭ© 2016 Wiley Periodicals, Inc.",SM-TF,0.909217815,NA,0,SM-TF,0.909217815,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/24/2016 +23220571,http://bioinfo.hrbmu.edu.cn/SM2miR,"SM2miR: a database of the experimentally validated small molecules' effects on microRNA expression. Unlabelled The inappropriate expression of microRNAs (miRNAs) is closely related with disease diagnosis, prognosis and therapy response. Recently, many studies have demonstrated that bioactive small molecules (or drugs) can regulate miRNA expression, which indicates that targeting miRNAs with small molecules is a new therapy for human diseases. In this study, we established the SM2miR database, which recorded 2925 relationships between 151 small molecules and 747 miRNAs in 17 species after manual curation from nearly 2000 articles. Each entry contains the detailed information about small molecules, miRNAs and evidences of their relationships, such as species, miRBase Accession number, DrugBank Accession number, PubChem Compound Identifier (CID), expression pattern of miRNA, experimental method, tissues or conditions for detection. SM2miR database has a user-friendly interface to retrieve by miRNA or small molecule. In addition, we offered a submission page. Thus, SM2miR provides a fairly comprehensive repository about the influences of small molecules on miRNA expression, which will promote the development of miRNA therapeutics. Availability SM2miR is freely available at http://bioinfo.hrbmu.edu.cn/SM2miR/.",SM2miR,0.988238767,NA,0,SM2miR,0.988238767,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/5/2012 +24531082,http://cefg.uestc.edu.cn/smal,"SMAL: A Resource of Spontaneous Mutation Accumulation Lines. Mutation is the ultimate source of genetic variation and evolution. Mutation accumulation (MA) experiments are an alternative approach to study de novo mutation events directly. We have constructed a resource of Spontaneous Mutation Accumulation Lines (SMAL; http://cefg.uestc.edu.cn/smal), which contains all the current publicly available MA lines identified by high-throughput sequencing. We have relocated and mapped the mutations based on the most recent genome annotations. A total of 5,608 single base mutations and 540 other mutations were obtained and are recorded in the current version of the SMAL database. The integrated data in SMAL provide detailed information that can be used in new theoretical analyses. We believe that the SMAL resource will help researchers better understand the processes of genetic variation and the incidence of disease.",SMAL,0.99170814,Spontaneous Mutation Accumulation Lines,0.987832281,SMAL,0.99170814,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/14/2014 +29040681,http://smart.embl.de,"20 years of the SMART protein domain annotation resource. SMART (Simple Modular Architecture Research Tool) is a web resource (http://smart.embl.de) for the identification and annotation of protein domains and the analysis of protein domain architectures. SMART version 8 contains manually curated models for more than 1300 protein domains, with approximately 100 new models added since our last update article (1). The underlying protein databases were synchronized with UniProt (2), Ensembl (3) and STRING (4), doubling the total number of annotated domains and other protein features to more than 200 million. In its 20th year, the SMART analysis results pages have been streamlined again and its information sources have been updated. SMART's vector based display engine has been extended to all protein schematics in SMART and rewritten to use the latest web technologies. The internal full text search engine has been redesigned and updated, resulting in greatly increased search speed.",SMART,0.996265411,Simple Modular Architecture Research Tool,0.979393108,SMART,0.996265411,1,NA,34648133,NA,NA,NA,do not merge,NA,NA,NA,1/1/2018 +34648133,http://smart.omicstudio.cloud,"SMART v1.0: A Database for Small Molecules with Functional Implications in Plants. We developed SMART v1.0 ( http://smart.omicstudio.cloud ), the first database for small molecules with functional implications in plants. The SMART database is devoted to providing and managing small molecules and their associated structural data, chemoinformatic data, protein targets, pathways and induced phenotype/function information. Currently, SMART v1.0 encompasses 1218 unique small molecules which are involved in multiple biological pathways. SMART v1.0 is featured with user-friendly interfaces, through which pathway-centered visualization of small molecules can be efficiently performed, and multiple types of searches (i.e., text search, structure similarity search and sequence similarity search) can be conveniently conducted. SMART v1.0 is also specifically designed to be a small molecule-sharing database, allowing users to release their newly discovered small molecules to public via the Contribute webpage. The SMART database will facilitate the comprehensive understanding of small molecules in complex biological processes in plants.",SMART,0.978860319,NA,0,SMART,0.978860319,1,NA,29040681,NA,NA,NA,do not merge,NA,NA,NA,10/14/2021 +"29062930, 29156309",http://www.secondarymetabolites.org,"The secondary metabolite bioinformatics portal: Computational tools to facilitate synthetic biology of secondary metabolite production. Natural products are among the most important sources of lead molecules for drug discovery. With the development of affordable whole-genome sequencing technologies and other 'omics tools, the field of natural products research is currently undergoing a shift in paradigms. While, for decades, mainly analytical and chemical methods gave access to this group of compounds, nowadays genomics-based methods offer complementary approaches to find, identify and characterize such molecules. This paradigm shift also resulted in a high demand for computational tools to assist researchers in their daily work. In this context, this review gives a summary of tools and databases that currently are available to mine, identify and characterize natural product biosynthesis pathways and their producers based on 'omics data. A web portal called Secondary Metabolite Bioinformatics Portal (SMBP at http://www.secondarymetabolites.org) is introduced to provide a one-stop catalog and links to these bioinformatics resources. In addition, an outlook is presented how the existing tools and those to be developed will influence synthetic biology approaches in the natural products field.",SMBP,0.996750757,Secondary Metabolite Bioinformatics Portal,0.986019856,SMBP,0.996750757,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2017 +26138588,http://smedgd.stowers.org,"SmedGD 2.0: The Schmidtea mediterranea genome database. Planarians have emerged as excellent models for the study of key biological processes such as stem cell function and regulation, axial polarity specification, regeneration, and tissue homeostasis among others. The most widely used organism for these studies is the free-living flatworm Schmidtea mediterranea. In 2007, the Schmidtea mediterranea Genome Database (SmedGD) was first released to provide a much needed resource for the small, but growing planarian community. SmedGD 1.0 has been a depository for genome sequence, a draft assembly, and related experimental data (e.g., RNAi phenotypes, in situ hybridization images, and differential gene expression results). We report here a comprehensive update to SmedGD (SmedGD 2.0) that aims to expand its role as an interactive community resource. The new database includes more recent, and up-to-date transcription data, provides tools that enhance interconnectivity between different genome assemblies and transcriptomes, including next-generation assemblies for both the sexual and asexual biotypes of S. mediterranea. SmedGD 2.0 (http://smedgd.stowers.org) not only provides significantly improved gene annotations, but also tools for data sharing, attributes that will help both the planarian and biomedical communities to more efficiently mine the genomics and transcriptomics of S. mediterranea.",SmedGD,0.994868353,Schmidtea mediterranea Genome Database,0.978626414,SmedGD,0.994868353,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/17/2015 +24163098,http://www.smmrna.org,"SMMRNA: a database of small molecule modulators of RNA. We have developed SMMRNA, an interactive database, available at http://www.smmrna.org, with special focus on small molecule ligands targeting RNA. Currently, SMMRNA consists of √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº770 unique ligands along with structural images of RNA molecules. Each ligand in the SMMRNA contains information such as Kd, Ki, IC50, √ɬÉ√Ǭé√ɬÇ√ǬîTm, molecular weight (MW), hydrogen donor and acceptor count, XlogP, number of rotatable bonds, number of aromatic rings and 2D and 3D structures. These parameters can be explored using text search, advanced search, substructure and similarity-based analysis tools that are embedded in SMMRNA. A structure editor is provided for 3D visualization of ligands. Advance analysis can be performed using substructure and OpenBabel-based chemical similarity fingerprints. Upload facility for both RNA and ligands is also provided. The physicochemical properties of the ligands were further examined using OpenBabel descriptors, hierarchical clustering, binning partition and multidimensional scaling. We have also generated a 3D conformation database of ligands to support the structure and ligand-based screening. SMMRNA provides comprehensive resource for further design, development and refinement of small molecule modulators for selective targeting of RNA molecules.",SMMRNA,0.997671962,NA,0,SMMRNA,0.997671962,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/24/2013 +26452014,http://www.cert.ucr.edu,"Development of a database for chemical mechanism assignments for volatile organic emissions. Unlabelled The development of a database for making model species assignments when preparing total organic gas (TOG) emissions input for atmospheric models is described. This database currently has assignments of model species for 12 different gas-phase chemical mechanisms for over 1700 chemical compounds and covers over 3000 chemical categories used in five different anthropogenic TOG profile databases or output by two different biogenic emissions models. This involved developing a unified chemical classification system, assigning compounds to mixtures, assigning model species for the mechanisms to the compounds, and making assignments for unknown, unassigned, and nonvolatile mass. The comprehensiveness of the assignments, the contributions of various types of speciation categories to current profile and total emissions data, inconsistencies with existing undocumented model species assignments, and remaining speciation issues and areas of needed work are also discussed. The use of the system to prepare input for SMOKE, the Speciation Tool, and for biogenic models is described in the supplementary materials. The database, associated programs and files, and a users manual are available online at http://www.cert.ucr.edu/~carter/emitdb . Implications Assigning air quality model species to the hundreds of emitted chemicals is a necessary link between emissions data and modeling effects of emissions on air quality. This is not easy and makes it difficult to implement new and more chemically detailed mechanisms in models. If done incorrectly, it is similar to errors in emissions speciation or the chemical mechanism used. Nevertheless, making such assignments is often an afterthought in chemical mechanism development and emissions processing, and existing assignments are usually undocumented and have errors and inconsistencies. This work is designed to address some of these problems.",SMOKE,0.694417238,NA,0,SMOKE,0.694417238,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME; no name in abstract,NA,NA,10/1/2015 +30674925,http://med.unr.edu/physio/transcriptome,"Smooth Muscle Transcriptome Browser: offering genome-wide references and expression profiles of transcripts expressed in intestinal SMC, ICC, and PDGFR√ɬÉ√Ǭé√ɬÇ√Ǭ±+ cells. Transcriptome data on the quantitative numbers of transcriptional variants expressed in primary cells offer essential clues into specific cellular functions and biological processes. We have previously collected transcriptomes from primary smooth muscle cells (SMC), interstitial cells of Cajal (ICC), and PDGFR√ɬÉ√Ǭé√ɬÇ√Ǭ±+ cells (fibroblast-like cells) isolated from murine jejunal and colonic smooth muscle and/or mucosal tissues as well as transcriptomes from the associated tissues (jejunal smooth muscle, colonic smooth muscle, and colonic mucosa). In this study, we have built the Smooth Muscle Transcriptome Browser (SMTB), https://med.unr.edu/physio/transcriptome , a web-based, graphical user interface that offers genetic references and expression profiles of all transcripts expressed at both the cellular (SMC, ICC, and PDGFR√ɬÉ√Ǭé√ɬÇ√Ǭ±+ cells) and tissue level (smooth muscle and mucosal tissue). This browser brings new insights into the cellular and biological functions of the cell types in gastrointestinal smooth muscle biology.",SMTB,0.983972132,Smooth Muscle Transcriptome Browser,0.98817302,Smooth Muscle Transcriptome Browser,0.98817302,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/23/2019 +24203708,http://www.smpdb.ca,"SMPDB 2.0: big improvements to the Small Molecule Pathway Database. The Small Molecule Pathway Database (SMPDB, http://www.smpdb.ca) is a comprehensive, colorful, fully searchable and highly interactive database for visualizing human metabolic, drug action, drug metabolism, physiological activity and metabolic disease pathways. SMPDB contains >600 pathways with nearly 75% of its pathways not found in any other database. All SMPDB pathway diagrams are extensively hyperlinked and include detailed information on the relevant tissues, organs, organelles, subcellular compartments, protein cofactors, protein locations, metabolite locations, chemical structures and protein quaternary structures. Since its last release in 2010, SMPDB has undergone substantial upgrades and significant expansion. In particular, the total number of pathways in SMPDB has grown by >70%. Additionally, every previously entered pathway has been completely redrawn, standardized, corrected, updated and enhanced with additional molecular or cellular information. Many SMPDB pathways now include transporter proteins as well as much more physiological, tissue, target organ and reaction compartment data. Thanks to the development of a standardized pathway drawing tool (called PathWhiz) all SMPDB pathways are now much more easily drawn and far more rapidly updated. PathWhiz has also allowed all SMPDB pathways to be saved in a BioPAX format. Significant improvements to SMPDB's visualization interface now make the browsing, selection, recoloring and zooming of pathways far easier and far more intuitive. Because of its utility and breadth of coverage, SMPDB is now integrated into several other databases including HMDB and DrugBank.",SMPDB,0.997425482,Small Molecule Pathway Database,0.985691021,SMPDB,0.997425482,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2013 +34536568,http://bigdata.ibp.ac.cn/SmProt,"SmProt: A Reliable Repository with Comprehensive Annotation of Small Proteins Identified from Ribosome Profiling. Small proteins specifically refer to proteins consisting of less than 100 amino acids translated from small open reading frames (sORFs), which were usually missed in previous genome annotation. The significance of small proteins has been revealed in current years, along with the discovery of their diverse functions. However, systematic annotation of small proteins is still insufficient. SmProt was specially developed to provide valuable information on small proteins for scientific community. Here we present the update of SmProt, which emphasizes reliability of translated sORFs, genetic variants in translated sORFs, disease-specific sORF translation events or sequences, and remarkably increased data volume. More components such as non-ATG translation initiation, function, and new sources are also included. SmProt incorporated 638,958 unique small proteins curated from 3,165,229 primary records, which were computationally predicted from 419 ribosome profiling (Ribo-seq) datasets or collected from literature and other sources from 370 cell lines or tissues in 8 species (Homo sapiens, Mus musculus, Rattus norvegicus, Drosophila melanogaster, Danio rerio, Saccharomyces cerevisiae, Caenorhabditis elegans, and Escherichia coli). In addition, small protein families identified from human microbiomes were also collected. All datasets in SmProt are free to access, and available for browse, search, and bulk downloads at http://bigdata.ibp.ac.cn/SmProt/.",SmProt,0.99719584,NA,0,SmProt,0.99719584,1,NA,28137767,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,8/1/2021 +28137767,http://bioinfo.ibp.ac.cn/SmProt,"SmProt: a database of small proteins encoded by annotated coding and non-coding RNA loci. Small proteins is the general term for proteins with length shorter than 100 amino acids. Identification and functional studies of small proteins have advanced rapidly in recent years, and several studies have shown that small proteins play important roles in diverse functions including development, muscle contraction and DNA repair. Identification and characterization of previously unrecognized small proteins may contribute in important ways to cell biology and human health. Current databases are generally somewhat deficient in that they have either not collected small proteins systematically, or contain only predictions of small proteins in a limited number of tissues and species. Here, we present a specifically designed web-accessible database, small proteins database (SmProt, http://bioinfo.ibp.ac.cn/SmProt), which is a database documenting small proteins. The current release of SmProt incorporates 255√ɬÉ√ǬÇ√ɬÇ√Ǭ†010 small proteins computationally or experimentally identified in 291 cell lines/tissues derived from eight popular species. The database provides a variety of data including basic information (sequence, location, gene name, organism, etc.) as well as specific information (experiment, function, disease type, etc.). To facilitate data extraction, SmProt supports multiple search options, including species, genome location, gene name and their aliases, cell lines/tissues, ORF type, gene type, PubMed ID and SmProt ID. SmProt also incorporates a service for the BLAST alignment search and provides a local UCSC Genome Browser. Additionally, SmProt defines a high-confidence set of small proteins and predicts the functions of the small proteins.",SmProt,0.989004731,small proteins database,0.90862302,SmProt,0.989004731,1,NA,34536568,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,7/1/2018 +27899672,http://swissmodel.expasy.org/repository,"The SWISS-MODEL Repository-new features and functionality. SWISS-MODEL Repository (SMR) is a database of annotated 3D protein structure models generated by the automated SWISS-MODEL homology modeling pipeline. It currently holds >400 000 high quality models covering almost 20% of Swiss-Prot/UniProtKB entries. In this manuscript, we provide an update of features and functionalities which have been implemented recently. We address improvements in target coverage, model quality estimates, functional annotations and improved in-page visualization. We also introduce a new update concept which includes regular updates of an expanded set of core organism models and UniProtKB-based targets, complemented by user-driven on-demand update of individual models. With the new release of the modeling pipeline, SMR has implemented a REST-API and adopted an open licencing model for accessing model coordinates, thus enabling bulk download for groups of targets fostering re-use of models in other contexts. SMR can be accessed at https://swissmodel.expasy.org/repository.",SMR,0.991144598,SWISS-MODEL Repository,0.87782962,SMR,0.991144598,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2016 +23129220,http://www.nitrosation.org,"SNObase, a database for S-nitrosation modification. S-Nitros(yl)ation is a ubiquitous redox-based post-translational modification of protein cysteine thiols by nitric oxide or its derivatives, which transduces the bioactivity of nitric oxide (NO) by regulation of protein conformation, activity, stability, localization and protein-protein interactions. These years, more and more S-nitrosated proteins were identified in physiological and pathological processes and the number is still growing. Here we developed a database named SNObase ( http://www.nitrosation.org ), which collected S-nitrosation targets extracted from literatures up to June 1st, 2012. SNObase contained 2561 instances, and provided information about S-nitrosation targets, sites, biological model, related diseases, trends of S-nitrosation level and effects of S-nitrosation on protein function. With SNObase, we did functional analysis for all the SNO targets: In the gene ontology (GO) biological process category, some processes were discovered to be related to S-nitrosation (""response to drug"", ""regulation of cell motion"") besides the previously reported related processes. In the GO cellular component category, cytosol and mitochondrion were both enriched. From the KEGG pathway enrichment results, we found SNO targets were enriched in different diseases, which suggests possible significant roles of S-nitrosation in the progress of these diseases. This SNObase means to be a database with precise, comprehensive and easily accessible information, an environment to help researchers integrate data with comparison and relevancy analysis between different groups or works, and also an SNO knowledgebase offering feasibility for systemic and global analysis of S-nitrosation in interdisciplinary studies.",SNObase,0.997782171,NA,0,SNObase,0.997782171,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2012 +24148649,http://snoopy.med.miyazaki-u.ac.jp,"snOPY: a small nucleolar RNA orthological gene database. Background Small nucleolar RNAs (snoRNAs) are a class of non-coding RNAs that guide the modification of specific nucleotides in ribosomal RNAs (rRNAs) and small nuclear RNAs (snRNAs). Although most non-coding RNAs undergo post-transcriptional modifications prior to maturation, the functional significance of these modifications remains unknown. Here, we introduce the snoRNA orthological gene database (snOPY) as a tool for studying RNA modifications. Findings snOPY provides comprehensive information about snoRNAs, snoRNA gene loci, and target RNAs. It also contains data for orthologues from various species, which enables users to analyze the evolution of snoRNA genes. In total, 13,770 snoRNA genes, 10,345 snoRNA gene loci, and 133 target RNAs have been registered. Users can search and access the data efficiently using a simple web interface with a series of internal links. snOPY is freely available on the web at http://snoopy.med.miyazaki-u.ac.jp. Conclusions snOPY is the database that provides information about the small nucleolar RNAs and their orthologues. It will help users to study RNA modifications and snoRNA gene evolution.",snOPY,0.990186676,snoRNA orthological gene database,0.855695571,snOPY,0.990186676,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/23/2013 +25075616,http://bioinfo.hrbmu.edu.cn/SNP_lincTFBS,"SNP@lincTFBS: an integrated database of polymorphisms in human LincRNA transcription factor binding sites. Large intergenic non-coding RNAs (lincRNAs) are a new class of functional transcripts, and aberrant expression of lincRNAs was associated with several human diseases. The genetic variants in lincRNA transcription factor binding sites (TFBSs) can change lincRNA expression, thereby affecting the susceptibility to human diseases. To identify and annotate these functional candidates, we have developed a database SNP@lincTFBS, which is devoted to the exploration and annotation of single nucleotide polymorphisms (SNPs) in potential TFBSs of human lincRNAs. We identified 6,665 SNPs in 6,614 conserved TFBSs of 2,423 human lincRNAs. In addition, with ChIPSeq dataset, we identified 139,576 SNPs in 304,517 transcription factor peaks of 4,813 lincRNAs. We also performed comprehensive annotation for these SNPs using 1000 Genomes Project datasets across 11 populations. Moreover, one of the distinctive features of SNP@lincTFBS is the collection of disease-associated SNPs in the lincRNA TFBSs and SNPs in the TFBSs of disease-associated lincRNAs. The web interface enables both flexible data searches and downloads. Quick search can be query of lincRNA name, SNP identifier, or transcription factor name. SNP@lincTFBS provides significant advances in identification of disease-associated lincRNA variants and improved convenience to interpret the discrepant expression of lincRNAs. The SNP@lincTFBS database is available at http://bioinfo.hrbmu.edu.cn/SNP_lincTFBS.",SNP@lincTFBS,0.949685822,NA,0,SNP@lincTFBS,0.949685822,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/30/2014 +31511885,http://gong_lab.hzau.edu.cn/SNP2APA,"SNP2APA: a database for evaluating effects of genetic variants on alternative polyadenylation in human cancers. Alternative polyadenylation (APA) is an important post-transcriptional regulation that recognizes different polyadenylation signals (PASs), resulting in transcripts with different 3' untranslated regions, thereby influencing a series of biological processes and functions. Recent studies have revealed that some single nucleotide polymorphisms (SNPs) could contribute to tumorigenesis and development through dysregulating APA. However, the associations between SNPs and APA in human cancers remain largely unknown. Here, using genotype and APA data of 9082 samples from The Cancer Genome Atlas (TCGA) and The Cancer 3'UTR Altas (TC3A), we systematically identified SNPs affecting APA events across 32 cancer types and defined them as APA quantitative trait loci (apaQTLs). As a result, a total of 467 942 cis-apaQTLs and 30 721 trans-apaQTLs were identified. By integrating apaQTLs with survival and genome-wide association studies (GWAS) data, we further identified 2154 apaQTLs associated with patient survival time and 151√ɬÉ√ǬÇ√ɬÇ√Ǭ†342 apaQTLs located in GWAS loci. In addition, we designed an online tool to predict the effects of SNPs on PASs by utilizing PAS motif prediction tool. Finally, we developed SNP2APA, a user-friendly and intuitive database (http://gong_lab.hzau.edu.cn/SNP2APA/) for data browsing, searching, and downloading. SNP2APA will significantly improve our understanding of genetic variants and APA in human cancers.",SNP2APA,0.91174376,NA,0,SNP2APA,0.91174376,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +26949480,http://apps.icbi.georgetown.edu/snp2structure,"SNP2Structure: A Public and Versatile Resource for Mapping and Three-Dimensional Modeling of Missense SNPs on Human Protein Structures. One of the long-standing challenges in biology is to understand how non-synonymous single nucleotide polymorphisms (nsSNPs) change protein structure and further affect their function. While it is impractical to solve all the mutated protein structures experimentally, it is quite feasible to model the mutated structures in silico. Toward this goal, we built a publicly available structure database resource (SNP2Structure, https://apps.icbi.georgetown.edu/snp2structure) focusing on missense mutations, msSNP. Compared with web portals with similar aims, SNP2Structure has the following major advantages. First, our portal offers direct comparison of two related 3D structures. Second, the protein models include all interacting molecules in the original PDB structures, so users are able to determine regions of potential interaction changes when a protein mutation occurs. Third, the mutated structures are available to download locally for further structural and functional analysis. Fourth, we used Jsmol package to display the protein structure that has no system compatibility issue. SNP2Structure provides reliable, high quality mapping of nsSNPs to 3D protein structures enabling researchers to explore the likely functional impact of human disease-causing mutations.",SNP2Structure,0.994531825,NA,0,SNP2Structure,0.994531825,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/30/2015 +27899579,http://ccg.vital-it.ch/snp2tfbs,"SNP2TFBS - a database of regulatory SNPs affecting predicted transcription factor binding site affinity. SNP2TFBS is a computational resource intended to support researchers investigating the molecular mechanisms underlying regulatory variation in the human genome. The database essentially consists of a collection of text files providing specific annotations for human single nucleotide polymorphisms (SNPs), namely whether they are predicted to abolish, create or change the affinity of one or several transcription factor (TF) binding sites. A SNP's effect on TF binding is estimated based on a position weight matrix (PWM) model for the binding specificity of the corresponding factor. These data files are regenerated at regular intervals by an automatic procedure that takes as input a reference genome, a comprehensive SNP catalogue and a collection of PWMs. SNP2TFBS is also accessible over a web interface, enabling users to view the information provided for an individual SNP, to extract SNPs based on various search criteria, to annotate uploaded sets of SNPs or to display statistics about the frequencies of binding sites affected by selected SNPs. Homepage: http://ccg.vital-it.ch/snp2tfbs/.",SNP2TFBS,0.997906208,NA,0,SNP2TFBS,0.997906208,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +22210871,http://www.rostlab.org/services/snpdbe,"SNPdbe: constructing an nsSNP functional impacts database. Unlabelled Many existing databases annotate experimentally characterized single nucleotide polymorphisms (SNPs). Each non-synonymous SNP (nsSNP) changes one amino acid in the gene product (single amino acid substitution;SAAS). This change can either affect protein function or be neutral in that respect. Most polymorphisms lack experimental annotation of their functional impact. Here, we introduce SNPdbe-SNP database of effects, with predictions of computationally annotated functional impacts of SNPs. Database entries represent nsSNPs in dbSNP and 1000 Genomes collection, as well as variants from UniProt and PMD. SAASs come from >2600 organisms; 'human' being the most prevalent. The impact of each SAAS on protein function is predicted using the SNAP and SIFT algorithms and augmented with experimentally derived function/structure information and disease associations from PMD, OMIM and UniProt. SNPdbe is consistently updated and easily augmented with new sources of information. The database is available as an MySQL dump and via a web front end that allows searches with any combination of organism names, sequences and mutation IDs. Availability http://www.rostlab.org/services/snpdbe.",SNPdbe,0.988785923,NA,0,SNPdbe,0.988785923,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/30/2011 +22140107,http://www.SNPedia.com,"SNPedia: a wiki supporting personal genome annotation, interpretation and analysis. SNPedia (http://www.SNPedia.com) is a wiki resource of the functional consequences of human genetic variation as published in peer-reviewed studies. Online since 2006 and freely available for personal use, SNPedia has focused on the medical, phenotypic and genealogical associations of single nucleotide polymorphisms. Entries are formatted to allow associations to be assigned to single genotypes as well as sets of genotypes (genosets). In this article, we discuss the growth of this resource and its use by affiliated software to create personal genome reports.",SNPedia,0.99539566,NA,0,SNPedia,0.99539566,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/2/2011 +22075996,http://snpeffect.switchlab.org,"SNPeffect 4.0: on-line prediction of molecular and structural effects of protein-coding variants. Single nucleotide variants (SNVs) are, together with copy number variation, the primary source of variation in the human genome and are associated with phenotypic variation such as altered response to drug treatment and susceptibility to disease. Linking structural effects of non-synonymous SNVs to functional outcomes is a major issue in structural bioinformatics. The SNPeffect database (http://snpeffect.switchlab.org) uses sequence- and structure-based bioinformatics tools to predict the effect of protein-coding SNVs on the structural phenotype of proteins. It integrates aggregation prediction (TANGO), amyloid prediction (WALTZ), chaperone-binding prediction (LIMBO) and protein stability analysis (FoldX) for structural phenotyping. Additionally, SNPeffect holds information on affected catalytic sites and a number of post-translational modifications. The database contains all known human protein variants from UniProt, but users can now also submit custom protein variants for a SNPeffect analysis, including automated structure modeling. The new meta-analysis application allows plotting correlations between phenotypic features for a user-selected set of variants.",SNPeffect,0.994509757,NA,0,SNPeffect,0.994509757,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/10/2011 +22544707,http://www.snp-nexus.org,"SNPnexus: a web server for functional annotation of novel and publicly known genetic variants (2012 update). Broader functional annotation of single nucleotide variations is a valuable mean for prioritizing targets in further disease studies and large-scale genotyping projects. We originally developed SNPnexus to assess the potential significance of known and novel SNPs on the major transcriptome, proteome, regulatory and structural variation models in order to identify the phenotypically important variants. Being committed to providing continuous support to the scientific community, we have substantially improved SNPnexus over time by incorporating a broader range of variations such as insertions/deletions, block substitutions, IUPAC codes submission and region-based analysis, expanding the query size limit, and most importantly including additional categories for the assessment of functional impact. SNPnexus provides a comprehensive set of annotations for genomic variation data by characterizing related functional consequences at the transcriptome/proteome levels of seven major annotation systems with in-depth analysis of potential deleterious effects, inferring physical and cytogenetic mapping, reporting information on HapMap genotype/allele data, finding overlaps with potential regulatory elements, structural variations and conserved elements, and retrieving links with previously reported genetic disease studies. SNPnexus has a user-friendly web interface with an improved query structure, enhanced functional annotation categories and flexible output presentation making it practically useful for biologists. SNPnexus is freely available at http://www.snp-nexus.org.",SNPnexus,0.9974401,NA,0,SNPnexus,0.9974401,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/28/2012 +22135417,http://lambchop.ads.uga.edu/snpxge2/index.php,"SNPxGE(2): a database for human SNP-coexpression associations. Motivation Recently, gene-coexpression relationships have been found to be often conditional and dynamic. Many studies have suggested that single nucleotide polymorphisms (SNPs) have impacts on gene expression variations in human populations. Results The SNPxGE(2) database contains the computationally predicted human SNP-coexpression associations, i.e. the differential coexpression between two genes is associated with the genotypes of an SNP. These data were generated from a large-scale association study that was based on the HapMap phase I data, which covered 269 individuals from 4 human populations, 556 873 SNPs and 15 000 gene expression profiles. In order to reduce the computational cost, the SNP-coexpression associations were assessed using gap/substitution models, proven to have a comparable power to logistic regression models. The results, at a false discovery rate (FDR) cutoff of 0.1, consisted of 44 769 and 50 792 SNP-coexpression associations based on single and pooled populations, respectively, and can be queried in the SNPxGE(2) database via either gene symbol or reference SNP ID. For each reported association, a detailed information page is provided. Availability http://lambchop.ads.uga.edu/snpxge2/index.php Contact wyp1125@uga.edu, rrekaya@uga.edu.",SNPxGE(2,0.970167935,NA,0,SNPxGE(2,0.970167935,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/30/2011 +25802363,"http://www.isham.org/, http://its.mycologylab.org","International Society of Human and Animal Mycology (ISHAM)-ITS reference DNA barcoding database--the quality controlled standard tool for routine identification of human and animal pathogenic fungi. Human and animal fungal pathogens are a growing threat worldwide leading to emerging infections and creating new risks for established ones. There is a growing need for a rapid and accurate identification of pathogens to enable early diagnosis and targeted antifungal therapy. Morphological and biochemical identification methods are time-consuming and require trained experts. Alternatively, molecular methods, such as DNA barcoding, a powerful and easy tool for rapid monophasic identification, offer a practical approach for species identification and less demanding in terms of taxonomical expertise. However, its wide-spread use is still limited by a lack of quality-controlled reference databases and the evolving recognition and definition of new fungal species/complexes. An international consortium of medical mycology laboratories was formed aiming to establish a quality controlled ITS database under the umbrella of the ISHAM working group on ""DNA barcoding of human and animal pathogenic fungi."" A new database, containing 2800 ITS sequences representing 421 fungal species, providing the medical community with a freely accessible tool at http://www.isham.org/ and http://its.mycologylab.org/ to rapidly and reliably identify most agents of mycoses, was established. The generated sequences included in the new database were used to evaluate the variation and overall utility of the ITS region for the identification of pathogenic fungi at intra-and interspecies level. The average intraspecies variation ranged from 0 to 2.25%. This highlighted selected pathogenic fungal species, such as the dermatophytes and emerging yeast, for which additional molecular methods/genetic markers are required for their reliable identification from clinical and veterinary specimens.",SHAM)-ITS,0.766970932,Society of Human and Animal Mycology,0.842032149,Society of Human and Animal Mycology,0.842032149,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/22/2015 +"23180788, 26578591",http://compbio.uthsc.edu/SomamiR,"SomamiR: a database for somatic mutations impacting microRNA function in cancer. Whole-genome sequencing of cancers has begun to identify thousands of somatic mutations that distinguish the genomes of normal tissues from cancers. While many germline mutations within microRNAs (miRNAs) and their targets have been shown to alter miRNA function in cancers and have been associated with cancer risk, the impact of somatic mutations on miRNA function has received relatively little attention. Here, we have created the SomamiR database (http://compbio.uthsc.edu/SomamiR/) to provide a comprehensive resource that integrates several types of data for use in investigating the impact of somatic and germline mutations on miRNA function in cancer. The database contains somatic mutations that may create or disrupt miRNA target sites and integrates these somatic mutations with germline mutations within the same target sites, genome-wide and candidate gene association studies of cancer and functional annotations that link genes containing mutations with cancer. Additionally, the database contains a collection of germline and somatic mutations in miRNAs and their targets that have been experimentally shown to impact miRNA function and have been associated with cancer.",SomamiR,0.985346377,NA,0,SomamiR,0.985346377,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +"26527729, 29140531, 30485709",http://www.sorfs.org,"sORFs.org: a repository of small ORFs identified by ribosome profiling. With the advent of ribosome profiling, a next generation sequencing technique providing a ""snap-shot'' of translated mRNA in a cell, many short open reading frames (sORFs) with ribosomal activity were identified. Follow-up studies revealed the existence of functional peptides, so-called micropeptides, translated from these 'sORFs', indicating a new class of bio-active peptides. Over the last few years, several micropeptides exhibiting important cellular functions were discovered. However, ribosome occupancy does not necessarily imply an actual function of the translated peptide, leading to the development of various tools assessing the coding potential of sORFs. Here, we introduce sORFs.org (http://www.sorfs.org), a novel database for sORFs identified using ribosome profiling. Starting from ribosome profiling, sORFs.org identifies sORFs, incorporates state-of-the-art tools and metrics and stores results in a public database. Two query interfaces are provided, a default one enabling quick lookup of sORFs and a BioMart interface providing advanced query and export possibilities. At present, sORFs.org harbors 263 354 sORFs that demonstrate ribosome occupancy, originating from three different cell lines: HCT116 (human), E14_mESC (mouse) and S2 (fruit fly). sORFs.org aims to provide an extensive sORFs database accessible to researchers with limited bioinformatics knowledge, thus enabling easy integration into personal projects.",sORFs.org,0.976624405,NA,0,sORFs.org,0.976624405,3,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/28/2018 +27352859,http://structuralbiology.cau.edu.cn/sorghum/index.html,"SorghumFDB: sorghum functional genomics database with multidimensional network analysis. . Sorghum (Sorghum bicolor [L.] Moench) has excellent agronomic traits and biological properties, such as heat and drought-tolerance. It is a C4 grass and potential bioenergy-producing plant, which makes it an important crop worldwide. With the sorghum genome sequence released, it is essential to establish a sorghum functional genomics data mining platform. We collected genomic data and some functional annotations to construct a sorghum functional genomics database (SorghumFDB). SorghumFDB integrated knowledge of sorghum gene family classifications (transcription regulators/factors, carbohydrate-active enzymes, protein kinases, ubiquitins, cytochrome P450, monolignol biosynthesis related enzymes, R-genes and organelle-genes), detailed gene annotations, miRNA and target gene information, orthologous pairs in the model plants Arabidopsis, rice and maize, gene loci conversions and a genome browser. We further constructed a dynamic network of multidimensional biological relationships, comprised of the co-expression data, protein-protein interactions and miRNA-target pairs. We took effective measures to combine the network, gene set enrichment and motif analyses to determine the key regulators that participate in related metabolic pathways, such as the lignin pathway, which is a major biological process in bioenergy-producing plants.Database URL: http://structuralbiology.cau.edu.cn/sorghum/index.html.",SorghumFDB,0.993632734,NA,0,SorghumFDB,0.993632734,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/26/2016 +21575179,http://sorgo.genouest.org/index.php,"SORGOdb: Superoxide Reductase Gene Ontology curated DataBase. Background Superoxide reductases (SOR) catalyse the reduction of superoxide anions to hydrogen peroxide and are involved in the oxidative stress defences of anaerobic and facultative anaerobic organisms. Genes encoding SOR were discovered recently and suffer from annotation problems. These genes, named sor, are short and the transfer of annotations from previously characterized neelaredoxin, desulfoferrodoxin, superoxide reductase and rubredoxin oxidase has been heterogeneous. Consequently, many sor remain anonymous or mis-annotated. Description SORGOdb is an exhaustive database of SOR that proposes a new classification based on domain architecture. SORGOdb supplies a simple user-friendly web-based database for retrieving and exploring relevant information about the proposed SOR families. The database can be queried using an organism name, a locus tag or phylogenetic criteria, and also offers sequence similarity searches using BlastP. Genes encoding SOR have been re-annotated in all available genome sequences (prokaryotic and eukaryotic (complete and in draft) genomes, updated in May 2010). Conclusions SORGOdb contains 325 non-redundant and curated SOR, from 274 organisms. It proposes a new classification of SOR into seven different classes and allows biologists to explore and analyze sor in order to establish correlations between the class of SOR and organism phenotypes. SORGOdb is freely available at http://sorgo.genouest.org/index.php.",SORGOdb,0.995990634,Superoxide Reductase Gene,0.695761979,SORGOdb,0.995990634,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/16/2011 +26744602,http://sorgsd.big.ac.cn,"SorGSD: a sorghum genome SNP database. Background Sorghum (Sorghum bicolor) is one of the most important cereal crops globally and a potential energy plant for biofuel production. In order to explore genetic gain for a range of important quantitative traits, such as drought and heat tolerance, grain yield, stem sugar accumulation, and biomass production, via the use of molecular breeding and genomic selection strategies, knowledge of the available genetic variation and the underlying sequence polymorphisms, is required. Results Based on the assembled and annotated genome sequences of Sorghum bicolor (v2.1) and the recently published sorghum re-sequencing data, ~62.9√ɬÉ√ǬÇ√ɬÇ√Ǭ†M SNPs were identified among 48 sorghum accessions and included in a newly developed sorghum genome SNP database SorGSD (http://sorgsd.big.ac.cn). The diverse panel of 48 sorghum lines can be classified into four groups, improved varieties, landraces, wild and weedy sorghums, and a wild relative Sorghum propinquum. SorGSD has a web-based query interface to search or browse SNPs from individual accessions, or to compare SNPs among several lines. The query results can be visualized as text format in tables, or rendered as graphics in a genome browser. Users may find useful annotation from query results including type of SNPs such as synonymous or non-synonymous SNPs, start, stop of splice variants, chromosome locations, and links to the annotation on Phytozome (www.phytozome.net) sorghum genome database. In addition, general information related to sorghum research such as online sorghum resources and literature references can also be found on the website. All the SNP data and annotations can be freely download from the website. Conclusions SorGSD is a comprehensive web-portal providing a database of large-scale genome variation across all racial types of cultivated sorghum and wild relatives. It can serve as a bioinformatics platform for a range of genomics and molecular breeding activities for sorghum and for other C4 grasses.",SorGSD,0.9974702,NA,0,SorGSD,0.9974702,1,NA,34344425,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/7/2016 +34344425,http://ngdc.cncb.ac.cn/sorgsd,"SorGSD: updating and expanding the sorghum genome science database with new contents and tools. Background As the fifth major cereal crop originated from Africa, sorghum (Sorghum bicolor) has become a key C4 model organism for energy plant research. With the development of high-throughput detection technologies for various omics data, much multi-dimensional and multi-omics information has been accumulated for sorghum. Integrating this information may accelerate genetic research and improve molecular breeding for sorghum agronomic traits. Results We updated the Sorghum Genome SNP Database (SorGSD) by adding new data, new features and renamed it to Sorghum Genome Science Database (SorGSD). In comparison with the original version SorGSD, which contains SNPs from 48 sorghum accessions mapped to the reference genome BTx623 (v2.1), the new version was expanded to 289 sorghum lines with both single nucleotide polymorphisms (SNPs) and small insertions/deletions (INDELs), which were aligned to the newly assembled and annotated sorghum genome BTx623 (v3.1). Moreover, phenotypic data and panicle pictures of critical accessions were provided in the new version. We implemented new tools including ID Conversion, Homologue Search and Genome Browser for analysis and updated the general information related to sorghum research, such as online sorghum resources and literature references. In addition, we deployed a new database infrastructure and redesigned a new user interface as one of the Genome Variation Map databases. The new version SorGSD is freely accessible online at http://ngdc.cncb.ac.cn/sorgsd/ . Conclusions SorGSD is a comprehensive integration with large-scale genomic variation, phenotypic information and incorporates online data analysis tools for data mining, genome navigation and analysis. We hope that SorGSD could provide a valuable resource for sorghum researchers to find variations they are interested in and generate customized high-throughput datasets for further analysis.",SorGSD,0.997315943,Sorghum Genome SNP Database,0.930164422,SorGSD,0.997315943,1,NA,26744602,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,8/3/2021 +33264401,http://soybase.org,"A new decade and new data at SoyBase, the USDA-ARS soybean genetics and genomics database. SoyBase, a USDA genetic and genomics database, holds professionally curated soybean genetic and genomic data, which is integrated and made accessible to researchers and breeders. The site holds several reference genome assemblies, as well as genetic maps, thousands of mapped traits, expression and epigenetic data, pedigree information, and extensive variant and genotyping data sets. SoyBase displays include genetic, genomic, and epigenetic maps of the soybean genome. Gene expression data is presented in the genome viewer as heat maps and pictorial and tabular displays in gene report pages. Millions of sequence variants have been added, representing variations across various collections of cultivars. This variant data is explorable using new interactive tools to visualize the distribution of those variants across the genome, between selected accessions. SoyBase holds several reference-quality soybean genome assemblies, accessible via various query tools and browsers, including a new visualization system for exploring the soybean pan-genome. SoyBase also serves as a nexus of announcements pertinent to the greater soybean research community. The database also includes a soybean-specific anatomic and biochemical trait ontology. The database can be accessed at https://soybase.org.",SoyBase,0.984504819,NA,0,SoyBase,0.984504819,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +28499913,http://proteome.dc.affrc.go.jp/Soybean,"Integration of gel-based and gel-free proteomic data for functional analysis of proteins through Soybean Proteome Database. The Soybean Proteome Database (SPD) stores data on soybean proteins obtained with gel-based and gel-free proteomic techniques. The database was constructed to provide information on proteins for functional analyses. The majority of the data is focused on soybean (Glycine max 'Enrei'). The growth and yield of soybean are strongly affected by environmental stresses such as flooding. The database was originally constructed using data on soybean proteins separated by two-dimensional polyacrylamide gel electrophoresis, which is a gel-based proteomic technique. Since 2015, the database has been expanded to incorporate data obtained by label-free mass spectrometry-based quantitative proteomics, which is a gel-free proteomic technique. Here, the portions of the database consisting of gel-free proteomic data are described. The gel-free proteomic database contains 39,212 proteins identified in 63 sample sets, such as temporal and organ-specific samples of soybean plants grown under flooding stress or non-stressed conditions. In addition, data on organellar proteins identified in mitochondria, nuclei, and endoplasmic reticulum are stored. Furthermore, the database integrates multiple omics data such as genomics, transcriptomics, metabolomics, and proteomics. The SPD database is accessible at http://proteome.dc.affrc.go.jp/Soybean/. Biological significance The Soybean Proteome Database stores data obtained from both gel-based and gel-free proteomic techniques. The gel-free proteomic database comprises 39,212 proteins identified in 63 sample sets, such as different organs of soybean plants grown under flooding stress or non-stressed conditions in a time-dependent manner. In addition, organellar proteins identified in mitochondria, nuclei, and endoplasmic reticulum are stored in the gel-free proteomics database. A total of 44,704 proteins, including 5490 proteins identified using a gel-based proteomic technique, are stored in the SPD. It accounts for approximately 80% of all predicted proteins from genome sequences, though there are over lapped proteins. Based on the demonstrated application of data stored in the database for functional analyses, it is suggested that these data will be useful for analyses of biological mechanisms in soybean. Furthermore, coupled with recent advances in information and communication technology, the usefulness of this database would increase in the analyses of biological mechanisms.",SPD,0.971009135,Soybean Proteome Database,0.979806413,Soybean Proteome Database,0.979806413,1,22661982,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,5/10/2017 +31337332,http://pgl.gnu.ac.kr/soy_vcf2genome,"Soybean-VCF2Genomes: a database to identify the closest accession in soybean germplasm collection. Background The development of next generation sequencer (NGS) and the analytical methods allowed the researchers to profile their samples more precisely and easier than before. Especially for agriculture, the certification of the genomic background of their plant materials would be important for the reliability of seed market and stable yield as well as for quarantine procedure. However, the analysis of NGS data is still difficult for non-computational researchers or breeders to verify their samples because majority of current softwares for NGS analysis require users to access unfamiliar Linux environment. Main body Here, we developed a web-application, ""Soybean-VCF2Genomes"", http://pgl.gnu.ac.kr/soy_vcf2genome/ to map single sample variant call format (VCF) file against known soybean germplasm collection for identification of the closest soybean accession. Based on principal component analysis (PCA), we simplified genotype matrix for lowering computational burden while maintaining accurate clustering. With our web-application, users can simply upload single sample VCF file created by more than 10x resequencing strategy to find the closest samples along with linkage dendrogram of the reference genotype matrix. Conclusion The information of the closest soybean cultivar will allow breeders to estimate relative germplasmic position of their query sample to determine soybean breeding strategies. Moreover, our VCF2Genomes scheme can be extended to other plant species where the whole genome sequences of core collection are publicly available.",Soybean-VCF2Genomes,0.917709383,NA,0,Soybean-VCF2Genomes,0.917709383,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/24/2019 +24618044,http://nclab.hit.edu.cn/SoyFN,"SoyFN: a knowledge database of soybean functional networks. Many databases for soybean genomic analysis have been built and made publicly available, but few of them contain knowledge specifically targeting the omics-level gene-gene, gene-microRNA (miRNA) and miRNA-miRNA interactions. Here, we present SoyFN, a knowledge database of soybean functional gene networks and miRNA functional networks. SoyFN provides user-friendly interfaces to retrieve, visualize, analyze and download the functional networks of soybean genes and miRNAs. In addition, it incorporates much information about KEGG pathways, gene ontology annotations and 3'-UTR sequences as well as many useful tools including SoySearch, ID mapping, Genome Browser, eFP Browser and promoter motif scan. SoyFN is a schema-free database that can be accessed as a Web service from any modern programming language using a simple Hypertext Transfer Protocol call. The Web site is implemented in Java, JavaScript, PHP, HTML and Apache, with all major browsers supported. We anticipate that this database will be useful for members of research communities both in soybean experimental science and bioinformatics. Database URL: http://nclab.hit.edu.cn/SoyFN.",SoyFN,0.997227907,NA,0,SoyFN,0.997227907,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/10/2014 +"24136998, 27987168",http://soykb.org,"Soybean knowledge base (SoyKB): a web resource for integration of soybean translational genomics and molecular breeding. Soybean Knowledge Base (http://soykb.org) is a comprehensive web resource developed for bridging soybean translational genomics and molecular breeding research. It provides information for six entities including genes/proteins, microRNAs/sRNAs, metabolites, single nucleotide polymorphisms, plant introduction lines and traits. It also incorporates many multi-omics datasets including transcriptomics, proteomics, metabolomics and molecular breeding data, such as quantitative trait loci, traits and germplasm information. Soybean Knowledge Base has a new suite of tools such as In Silico Breeding Program for soybean breeding, which includes a graphical chromosome visualizer for ease of navigation. It integrates quantitative trait loci, traits and germplasm information along with genomic variation data, such as single nucleotide polymorphisms, insertions, deletions and genome-wide association studies data, from multiple soybean cultivars and Glycine soja.",SoyKB,0.996702418,Soybean Knowledge Base,0.942847088,SoyKB,0.996702418,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +23423175,http://bioinformatics.towson.edu/Soybean_Seed_Proteins_2D_Gel_DB/Home.aspx,"SoyProDB: A database for the identification of soybean seed proteins. Unlabelled Soybean continues to serve as a rich and inexpensive source of protein for humans and animals. A substantial amount of information has been reported on the genotypic variation and beneficial genetic manipulation of soybeans. For better understanding of the consequences of genetic manipulation, elucidation of soybean protein composition is necessary, because of its direct relationship to phenotype. We have conducted studies to determine the composition of storage, allergen and anti-nutritional proteins in cultivated soybean using a combined proteomics approach. Two-dimensional polyacrylamide gel electrophoresis (2DPAGE) was implemented for the separation of proteins along with matrix-assisted laser desorption/ionization time of flight mass spectrometry (MALDI-TOF-MS) and liquid chromatography mass spectrometry (LC-MS/MS) for the identification of proteins. Our analysis resulted in the identification of several proteins, and a web based database named soybean protein database (SoyProDB) was subsequently built to house and allow scientists to search the data. This database will be useful to scientists who wish to genetically alter soybean with higher quality storage proteins, and also helpful for consumers to get a greater understanding about proteins that compose soy products available in the market. The database is freely accessible. Availability http://bioinformatics.towson.edu/Soybean_Seed_Proteins_2D_Gel_DB/Home.aspx.",SoyProDB,0.943566799,soybean protein database,0.737450778,SoyProDB,0.943566799,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/6/2013 +33231322,http://artemis.cyverse.org/soykb_dev/SoyTD,"Soybean transporter database: A comprehensive database for identification and exploration of natural variants in soybean transporter genes. Transporters, a class of membrane proteins that facilitate exchange of solutes including diverse molecules and ions across the cellular membrane, are vital component for the survival of all organisms. Understanding plant transporters is important to get insight of the basic cellular processes, physiology, and molecular mechanisms including nutrient uptake, signaling, response to external stress, and many more. In this regard, extensive analysis of transporters predicted in soybean and other plant species was performed. In addition, an integrated database for soybean transporter protein, SoyTD, was developed that will facilitate the identification, classification, and extensive characterization of transporter proteins by integrating expression, gene ontology, conserved domain and motifs, gene structure organization, and chromosomal distribution features. A comprehensive analysis was performed to identify highly confident transporters by integrating various prediction tools. Initially, 7541 transmembrane (TM) proteins were predicted in the soybean genome; out of these, 3306 non-redundant transporter genes carrying two or more transmembrane domains were selected for further analysis. The identified transporter genes were classified according to a standard transporter classification (TC) system. Comparative analysis of transporter genes among 47 plant genomes provided insights into expansion and duplication of transporter genes in land plants. The whole genome resequencing (WGRS) and tissue-specific transcriptome datasets of soybean were integrated to investigate the natural variants and expression profile associated with transporter(s) of interest. Overall, SoyTD provides a comprehensive interface to study genetic and molecular function of soybean transporters. SoyTD is publicly available at http://artemis.cyverse.org/soykb_dev/SoyTD/.",SoyTD,0.986389339,Soybean transporter database,0.780514371,SoyTD,0.986389339,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/14/2020 +29733404,http://www.lisanwanglab.org/SPAR,"SPAR: small RNA-seq portal for analysis of sequencing experiments. The introduction of new high-throughput small RNA sequencing protocols that generate large-scale genomics datasets along with increasing evidence of the significant regulatory roles of small non-coding RNAs (sncRNAs) have highlighted the urgent need for tools to analyze and interpret large amounts of small RNA sequencing data. However, it remains challenging to systematically and comprehensively discover and characterize sncRNA genes and specifically-processed sncRNA products from these datasets. To fill this gap, we present Small RNA-seq Portal for Analysis of sequencing expeRiments (SPAR), a user-friendly web server for interactive processing, analysis, annotation and visualization of small RNA sequencing data. SPAR supports sequencing data generated from various experimental protocols, including smRNA-seq, short total RNA sequencing, microRNA-seq, and single-cell small RNA-seq. Additionally, SPAR includes publicly available reference sncRNA datasets from our DASHR database and from ENCODE across 185 human tissues and cell types to produce highly informative small RNA annotations across all major small RNA types and other features such as co-localization with various genomic features, precursor transcript cleavage patterns, and conservation. SPAR allows the user to compare the input experiment against reference ENCODE/DASHR datasets. SPAR currently supports analyses of human (hg19, hg38) and mouse (mm10) sequencing data. SPAR is freely available at https://www.lisanwanglab.org/SPAR.",SPAR,0.965488374,Small RNA-seq Portal for Analysis of sequencing,0.857351972,SPAR,0.965488374,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2018 +31713629,http://www.spatialomics.org/SpatialDB,"SpatialDB: a database for spatially resolved transcriptomes. Spatially resolved transcriptomic techniques allow the characterization of spatial organization of cells in tissues, which revolutionize the studies of tissue function and disease pathology. New strategies for detecting spatial gene expression patterns are emerging, and spatially resolved transcriptomic data are accumulating rapidly. However, it is not convenient for biologists to exploit these data due to the diversity of strategies and complexity in data analysis. Here, we present SpatialDB, the first manually curated database for spatially resolved transcriptomic techniques and datasets. The current version of SpatialDB contains 24 datasets (305 sub-datasets) from 5 species generated by 8 spatially resolved transcriptomic techniques. SpatialDB provides a user-friendly web interface for visualization and comparison of spatially resolved transcriptomic data. To further explore these data, SpatialDB also provides spatially variable genes and their functional enrichment annotation. SpatialDB offers a repository for research community to investigate the spatial cellular structure of tissues, and may bring new insights into understanding the cellular microenvironment in disease. SpatialDB is freely available at https://www.spatialomics.org/SpatialDB.",SpatialDB,0.995859206,NA,0,SpatialDB,0.995859206,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +22661982,http://proteome.dc.affrc.go.jp/Soybean,"Soybean Proteome Database 2012: update on the comprehensive data repository for soybean proteomics. The Soybean Proteome Database (SPD) was created to provide a data repository for functional analyses of soybean responses to flooding stress, thought to be a major constraint for establishment and production of this plant. Since the last publication of the SPD, we thoroughly enhanced the contents of database, particularly protein samples and their annotations from several organelles. The current release contains 23 reference maps of soybean (Glycine max cv. Enrei) proteins collected from several organs, tissues, and organelles including the maps for plasma membrane, cell wall, chloroplast, and mitochondrion, which were analyzed by two-dimensional polyacrylamide gels. Furthermore, the proteins analyzed with gel-free proteomics technique have been added and are available online. In addition to protein fluctuations under flooding, those of salt and drought stress have been included in the current release. A case analysis employing a portion of those newly released data was conducted, and the results will be shown. An 'omics table has also been provided to reveal relationships among mRNAs, proteins, and metabolites with a unified temporal-profile tag in order to facilitate retrieval of the data based on the temporal profiles. An intuitive user interface based on dynamic HTML enables users to browse the network as well as the profiles of the multiple ""omes"" in an integrated fashion. The SPD is available at: http://proteome.dc.affrc.go.jp/Soybean/",SPD,0.99296691,Soybean Proteome Database,0.987338126,SPD,0.99296691,1,28499913,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,5/30/2012 +32761141,http://spdatabase.com:2080,"SPDB: a specialized database and web-based analysis platform for swine pathogens. . The rapid and accurate diagnosis of swine diseases is indispensable for reducing their negative impacts on the pork industry. Next-generation sequencing (NGS) is a promising diagnostic tool for swine diseases. To support the application of NGS in the diagnosis of swine disease, we established the Swine Pathogen Database (SPDB). The SPDB represents the first comprehensive and highly specialized database and analysis platform for swine pathogens. The current version features an online genome search tool, which now contains 26√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ148 genomes of swine, swine pathogens and phylogenetically related species. This database offers a comprehensive bioinformatics analysis pipeline for the identification of 4403 swine pathogens and their related species in clinical samples, based on targeted 16S rRNA gene sequencing and metagenomic NGS data. The SPDB provides a powerful and user-friendly service for veterinarians and researchers to support the applications of NGS in swine disease research. Database URL: http://spdatabase.com:2080/.",SPDB,0.982828304,Swine Pathogen Database,0.946259499,SPDB,0.982828304,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +26005672,http://alpha.dmi.unict.it/spectra,"SPECTRA: An Integrated Knowledge Base for Comparing Tissue and Tumor-Specific PPI Networks in Human. Protein-protein interaction (PPI) networks available in public repositories usually represent relationships between proteins within the cell. They ignore the specific set of tissues or tumors where the interactions take place. Indeed, proteins can form tissue-selective complexes, while they remain inactive in other tissues. For these reasons, a great attention has been recently paid to tissue-specific PPI networks, in which nodes are proteins of the global PPI network whose corresponding genes are preferentially expressed in specific tissues. In this paper, we present SPECTRA, a knowledge base to build and compare tissue or tumor-specific PPI networks. SPECTRA integrates gene expression and protein interaction data from the most authoritative online repositories. We also provide tools for visualizing and comparing such networks, in order to identify the expression and interaction changes of proteins across tissues, or between the normal and pathological states of the same tissue. SPECTRA is available as a web server at http://alpha.dmi.unict.it/spectra.",SPECTRA,0.996715844,NA,0,SPECTRA,0.996715844,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/8/2015 +22821489,"http://www.spectrabank.org, http://bioinfo.thep.lu.se/speclust.html","SpectraBank: an open access tool for rapid microbial identification by MALDI-TOF MS fingerprinting. MALDI-TOF MS has proved to be an accurate, rapid, and cost-effective technique for microbial identification in which the spectral fingerprint of an unknown strain can be compared to a database of spectra from reference strains. Most of the existing databases are private and often costly to access, and little spectral information is shared among researchers. The objective of the present communication is to introduce the SpectraBank database (http://www.spectrabank.org), which provides open access MALDI-TOF mass spectra from a variety of microorganisms. This work aims to familiarize readers with the SpectraBank database, from the sample preparation, data collection, and data analysis to how the spectral reference data can be used for microbial species identification. The database currently includes more than 200 MALDI-TOF MS spectra from more than 70 bacterial species and links to the freely available web-based application SPECLUST (http://bioinfo.thep.lu.se/speclust.html) to allow comparisons of the obtained peak mass lists and evaluate phyloproteomic relationships. The SpectraBank database is intended to be expanded by the addition of new spectra from microbial strains, obtained in our laboratory and by other researchers.",SpectraBank,0.972535014,NA,0,SpectraBank,0.972535014,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/1/2012 +23193286,http://mcg.ustc.edu.cn/sdap1/spermgenes,"SpermatogenesisOnline 1.0: a resource for spermatogenesis based on manual literature curation and genome-wide data mining. Human infertility affects 10-15% of couples, half of which is attributed to the male partner. Abnormal spermatogenesis is a major cause of male infertility. Characterizing the genes involved in spermatogenesis is fundamental to understand the mechanisms underlying this biological process and in developing treatments for male infertility. Although many genes have been implicated in spermatogenesis, no dedicated bioinformatic resource for spermatogenesis is available. We have developed such a database, SpermatogenesisOnline 1.0 (http://mcg.ustc.edu.cn/sdap1/spermgenes/), using manual curation from 30 233 articles published before 1 May 2012. It provides detailed information for 1666 genes reported to participate in spermatogenesis in 37 organisms. Based on the analysis of these genes, we developed an algorithm, Greed AUC Stepwise (GAS) model, which predicted 762 genes to participate in spermatogenesis (GAS probability >0.5) based on genome-wide transcriptional data in Mus musculus testis from the ArrayExpress database. These predicted and experimentally verified genes were annotated, with several identical spermatogenesis-related GO terms being enriched for both classes. Furthermore, protein-protein interaction analysis indicates direct interactions of predicted genes with the experimentally verified ones, which supports the reliability of GAS. The strategy (manual curation and data mining) used to develop SpermatogenesisOnline 1.0 can be easily extended to other biological processes.",SpermatogenesisOnline,0.914023519,NA,0,SpermatogenesisOnline,0.914023519,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/28/2012 +25269378,http://pranag.physics.iisc.ernet.in/SPGDB,"Streptococcus pneumoniae Genome Database (SPGDB): a database for strain specific comparative analysis of Streptococcus pneumoniae genes and proteins. Streptococcus pneumoniae causes pneumonia, septicemia and meningitis. S. pneumoniae is responsible for significant mortality both in children and in the elderly. In recent years, the whole genome sequencing of various S. pneumoniae strains have increased manifold and there is an urgent need to provide organism specific annotations to the scientific community. This prompted us to develop the Streptococcus pneumoniae Genome Database (SPGDB) to integrate and analyze the completely sequenced and available S. pneumoniae genome sequences. Further, links to several tools are provided to compare the pool of gene and protein sequences, and proteins structure across different strains of S. pneumoniae. SPGDB aids in the analysis of phenotypic variations as well as to perform extensive genomics and evolutionary studies with reference to S. pneumoniae. The database will be updated at regular intervals and is freely accessible through the URL: http://pranag.physics.iisc.ernet.in/SPGDB/.",SPGDB,0.997126877,Streptococcus pneumoniae Genome Database,0.974226971,SPGDB,0.997126877,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/28/2014 +31211398,http://spinachbase.org,"SpinachBase: a central portal for spinach genomics. . Spinach (Spinacia oleracea L.) is a nutritious vegetable enriched with many essential minerals and vitamins. A reference spinach genome has been recently released, and additional spinach genomic resources are being rapidly developed. Therefore, there is an urgent need of a central database to store, query, analyze and integrate various resources of spinach genomic data. To this end, we developed SpinachBase (http://spinachbase.org), which provides centralized public accesses to genomic data as well as analytical tools to assist research and breeding in spinach. The database currently stores the spinach reference genome sequence, and sequences and comprehensive functional annotations of protein-coding genes predicted from the genome. The database also contains gene expression profiles derived from RNA-Seq experiments as well as highly co-expressed genes and genetic variants called from transcriptome sequences of 120 cultivated and wild Spinacia accessions. Biochemical pathways have been predicted from spinach protein-coding genes and are available through a pathway database (SpinachCyc) within SpinachBase. SpinachBase provides a suite of analysis and visualization tools including a genome browser, sequence similarity searches with BLAST, functional enrichment and functional classification analyses and functions to query and retrieve gene sequences and annotations.",SpinachBase,0.997433722,NA,0,SpinachBase,0.997433722,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +27148975,http://222.73.98.124/spinachdb,"SpinachDB: A Well-Characterized Genomic Database for Gene Family Classification and SNP Information of Spinach. Spinach (Spinacia oleracea L.), which originated in central and western Asia, belongs to the family Amaranthaceae. Spinach is one of most important leafy vegetables with a high nutritional value as well as being a perfect research material for plant sex chromosome models. As the completion of genome assembly and gene prediction of spinach, we developed SpinachDB (http://222.73.98.124/spinachdb) to store, annotate, mine and analyze genomics and genetics datasets efficiently. In this study, all of 21702 spinach genes were annotated. A total of 15741 spinach genes were catalogued into 4351 families, including identification of a substantial number of transcription factors. To construct a high-density genetic map, a total of 131592 SSRs and 1125743 potential SNPs located in 548801 loci of spinach genome were identified in 11 cultivated and wild spinach cultivars. The expression profiles were also performed with RNA-seq data using the FPKM method, which could be used to compare the genes. Paralogs in spinach and the orthologous genes in Arabidopsis, grape, sugar beet and rice were identified for comparative genome analysis. Finally, the SpinachDB website contains seven main sections, including the homepage; the GBrowse map that integrates genome, genes, SSR and SNP marker information; the Blast alignment service; the gene family classification search tool; the orthologous and paralogous gene pairs search tool; and the download and useful contact information. SpinachDB will be continually expanded to include newly generated robust genomics and genetics data sets along with the associated data mining and analysis tools.",SpinachDB,0.997738004,NA,0,SpinachDB,0.997738004,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/5/2016 +22397686,http://pathod.cdc.go.kr/spiroestdb,"SpiroESTdb: a transcriptome database and online tool for sparganum expressed sequences tags. Background Sparganum (plerocercoid of Spirometra erinacei) is a parasite that possesses the remarkable ability to survive by successfully modifying its physiology and morphology to suit various hosts and can be found in various tissues, even the nervous system. However, surprisingly little is known about the molecular function of genes that are expressed during the course of the parasite life cycle. To begin to decipher the molecular processes underlying gene function, we constructed a database of expressed sequence tags (ESTs) generated from sparganum. Findings SpiroESTdb is a web-based information resource that is built upon the annotation and curation of 5,655 ESTs data. SpiroESTdb provides an integrated platform for expressed sequence data, expression dynamics, functional genes, genetic markers including single nucleotide polymorphisms and tandem repeats, gene ontology and KEGG pathway information. Moreover, SpiroESTdb supports easy access to gene pages, such as (i) curation and query forms, (ii) in silico expression profiling and (iii) BLAST search tools. Comprehensive descriptions of the sparganum content of all sequenced data are available, including summary reports. The contents of SpiroESTdb can be viewed and downloaded from the web (http://pathod.cdc.go.kr/spiroestdb). Conclusions This integrative web-based database of sequence data, functional annotations and expression profiling data will serve as a useful tool to help understand and expand the characterization of parasitic infections. It can also be used to identify potential industrial drug targets and vaccine candidate genes.",SpiroESTdb,0.997175336,NA,0,SpiroESTdb,0.997175336,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/8/2012 +26220682,http://spirpro.sbi.kmutt.ac.th,"SpirPro: A Spirulina proteome database and web-based tools for the analysis of protein-protein interactions at the metabolic level in Spirulina (Arthrospira) platensis C1. Background Spirulina (Arthrospira) platensis is the only cyanobacterium that in addition to being studied at the molecular level and subjected to gene manipulation, can also be mass cultivated in outdoor ponds for commercial use as a food supplement. Thus, encountering environmental changes, including temperature stresses, is common during the mass production of Spirulina. The use of cyanobacteria as an experimental platform, especially for photosynthetic gene manipulation in plants and bacteria, is becoming increasingly important. Understanding the mechanisms and protein-protein interaction networks that underlie low- and high-temperature responses is relevant to Spirulina mass production. To accomplish this goal, high-throughput techniques such as OMICs analyses are used. Thus, large datasets must be collected, managed and subjected to information extraction. Therefore, databases including (i) proteomic analysis and protein-protein interaction (PPI) data and (ii) domain/motif visualization tools are required for potential use in temperature response models for plant chloroplasts and photosynthetic bacteria. Descriptions A web-based repository was developed including an embedded database, SpirPro, and tools for network visualization. Proteome data were analyzed integrated with protein-protein interactions and/or metabolic pathways from KEGG. The repository provides various information, ranging from raw data (2D-gel images) to associated results, such as data from interaction and/or pathway analyses. This integration allows in silico analyses of protein-protein interactions affected at the metabolic level and, particularly, analyses of interactions between and within the affected metabolic pathways under temperature stresses for comparative proteomic analysis. The developed tool, which is coded in HTML with CSS/JavaScript and depicted in Scalable Vector Graphics (SVG), is designed for interactive analysis and exploration of the constructed network. SpirPro is publicly available on the web at http://spirpro.sbi.kmutt.ac.th . Conclusions SpirPro is an analysis platform containing an integrated proteome and PPI database that provides the most comprehensive data on this cyanobacterium at the systematic level. As an integrated database, SpirPro can be applied in various analyses, such as temperature stress response networking analysis in cyanobacterial models and interacting domain-domain analysis between proteins of interest.",SpirPro,0.993724525,NA,0,SpirPro,0.993724525,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/29/2015 +23118479,http://www.caspur.it/SpliceAidF,"SpliceAid-F: a database of human splicing factors and their RNA-binding sites. A comprehensive knowledge of all the factors involved in splicing, both proteins and RNAs, and of their interaction network is crucial for reaching a better understanding of this process and its functions. A large part of relevant information is buried in the literature or collected in various different databases. By hand-curated screenings of literature and databases, we retrieved experimentally validated data on 71 human RNA-binding splicing regulatory proteins and organized them into a database called 'SpliceAid-F' (http://www.caspur.it/SpliceAidF/). For each splicing factor (SF), the database reports its functional domains, its protein and chemical interactors and its expression data. Furthermore, we collected experimentally validated RNA-SF interactions, including relevant information on the RNA-binding sites, such as the genes where these sites lie, their genomic coordinates, the splicing effects, the experimental procedures used, as well as the corresponding bibliographic references. We also collected information from experiments showing no RNA-SF binding, at least in the assayed conditions. In total, SpliceAid-F contains 4227 interactions, 2590 RNA-binding sites and 1141 'no-binding' sites, including information on cellular contexts and conditions where binding was tested. The data collected in SpliceAid-F can provide significant information to explain an observed splicing pattern as well as the effect of mutations in functional regulatory elements.",SpliceAid-F,0.996136576,NA,0,SpliceAid-F,0.996136576,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/30/2012 +22139928,http://cmbi.bjmu.edu.cn/sdisease,"SpliceDisease database: linking RNA splicing and disease. RNA splicing is an important aspect of gene regulation in many organisms. Splicing of RNA is regulated by complicated mechanisms involving numerous RNA-binding proteins and the intricate network of interactions among them. Mutations in cis-acting splicing elements or its regulatory proteins have been shown to be involved in human diseases. Defects in pre-mRNA splicing process have emerged as a common disease-causing mechanism. Therefore, a database integrating RNA splicing and disease associations would be helpful for understanding not only the RNA splicing but also its contribution to disease. In SpliceDisease database, we manually curated 2337 splicing mutation disease entries involving 303 genes and 370 diseases, which have been supported experimentally in 898 publications. The SpliceDisease database provides information including the change of the nucleotide in the sequence, the location of the mutation on the gene, the reference Pubmed ID and detailed description for the relationship among gene mutations, splicing defects and diseases. We standardized the names of the diseases and genes and provided links for these genes to NCBI and UCSC genome browser for further annotation and genomic sequences. For the location of the mutation, we give direct links of the entry to the respective position/region in the genome browser. The users can freely browse, search and download the data in SpliceDisease at http://cmbi.bjmu.edu.cn/sdisease.",SpliceDisease,0.996948481,NA,0,SpliceDisease,0.996948481,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2011 +23118483,http://spliceosomedb.ucsc.edu,"Spliceosome database: a tool for tracking components of the spliceosome. The spliceosome is the extremely complex macromolecular machine responsible for pre-mRNA splicing. It assembles from five U-rich small nuclear RNAs (snRNAs) and over 200 proteins in a highly dynamic fashion. One important challenge to studying the spliceosome is simply keeping track of all these proteins, a situation further complicated by the variety of names and identifiers that exist in the literature for them. To facilitate studies of the spliceosome and its components, we created a database of spliceosome-associated proteins and snRNAs, which is available at http://spliceosomedb.ucsc.edu and can be queried through a simple browser interface. In the database, we cataloged the various names, orthologs and gene identifiers of spliceosome proteins to navigate the complex nomenclature of spliceosome proteins. We also provide links to gene and protein records for the spliceosome components in other databases. To navigate spliceosome assembly dynamics, we created tools to compare the association of spliceosome proteins with complexes that form at specific stages of spliceosome assembly based on a compendium of mass spectrometry experiments that identified proteins in purified splicing complexes. Together, the information in the database provides an easy reference for spliceosome components and will support future modeling of spliceosome structure and dynamics.",NA,0,Spliceosome,0.603129983,Spliceosome,0.603129983,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/30/2012 +24273012,http://lbbc.inca.gov.br/spliceprot,"SpliceProt: a protein sequence repository of predicted human splice variants. The mechanism of alternative splicing in the transcriptome may increase the proteome diversity in eukaryotes. In proteomics, several studies aim to use protein sequence repositories to annotate MS experiments or to detect differentially expressed proteins. However, the available protein sequence repositories are not designed to fully detect protein isoforms derived from mRNA splice variants. To foster knowledge for the field, here we introduce SpliceProt, a new protein sequence repository of transcriptome experimental data used to investigate for putative splice variants in human proteomes. Current version of SpliceProt contains 159√ɬÉ√ǬÇ√ɬÇ√Ǭ†719 non-redundant putative polypeptide sequences. The assessment of the potential of SpliceProt in detecting new protein isoforms resulting from alternative splicing was performed by using publicly available proteomics data. We detected 173 peptides hypothetically derived from splice variants, which 54 of them are not present in UniprotKB/TrEMBL sequence repository. In comparison to other protein sequence repositories, SpliceProt contains a greater number of unique peptides and is able to detect more splice variants. Therefore, SpliceProt provides a solution for the annotation of proteomics experiments regarding splice isofoms. The repository files containing the translated sequences of the predicted splice variants and a visualization tool are freely available at http://lbbc.inca.gov.br/spliceprot.",SpliceProt,0.991778851,NA,0,SpliceProt,0.991778851,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/1/2014 +25405079,http://www.bioinformatics-brazil.org/splooce,"Identification of rare alternative splicing events in MS/MS data reveals a significant fraction of alternative translation initiation sites. Integration of transcriptome data is a crucial step for the identification of rare protein variants in mass-spectrometry (MS) data with important consequences for all branches of biotechnology research. Here, we used Splooce, a database of splicing variants recently developed by us, to search MS data derived from a variety of human tumor cell lines. More than 800 new protein variants were identified whose corresponding MS spectra were specific to protein entries from Splooce. Although the types of splicing variants (exon skipping, alternative splice sites and intron retention) were found at the same frequency as in the transcriptome, we observed a large variety of modifications at the protein level induced by alternative splicing events. Surprisingly, we found that 40% of all protein modifications induced by alternative splicing led to the use of alternative translation initiation sites. Other modifications include frameshifts in the open reading frame and inclusion or deletion of peptide sequences. To make the dataset generated here available to the community in a more effective form, the Splooce portal (http://www.bioinformatics-brazil.org/splooce) was modified to report the alternative splicing events supported by MS data.",Splooce,0.992536426,NA,0,Splooce,0.992536426,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/13/2014 +31672983,http://www.signalingpathways.org,"The Signaling Pathways Project, an integrated 'omics knowledgebase for mammalian cellular signaling pathways. Mining of integrated public transcriptomic and ChIP-Seq (cistromic) datasets can illuminate functions of mammalian cellular signaling pathways not yet explored in the research literature. Here, we designed a web knowledgebase, the Signaling Pathways Project (SPP), which incorporates community classifications of signaling pathway nodes (receptors, enzymes, transcription factors and co-nodes) and their cognate bioactive small molecules. We then mapped over 10,000 public transcriptomic or cistromic experiments to their pathway node or biosample of study. To enable prediction of pathway node-gene√ɬÉ√ǬÇ√ɬÇ√Ǭ†target transcriptional regulatory relationships through SPP, we generated consensus 'omics signatures, or consensomes, which ranked genes based on measures of their significant differential expression or promoter occupancy across transcriptomic or cistromic experiments mapped to a specific node family. Consensomes were validated using alignment with canonical literature knowledge, gene√ɬÉ√ǬÇ√ɬÇ√Ǭ†target-level integration of transcriptomic and cistromic data points, and in bench experiments confirming previously uncharacterized node-gene target regulatory relationships. To expose the SPP knowledgebase to researchers, a web browser interface was designed that accommodates numerous routine data mining strategies. SPP is freely accessible at https://www.signalingpathways.org .",SPP,0.947210044,The Signaling Pathways Project,0.80841283,SPP,0.947210044,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/31/2019 +22701460,http://www.seed-proteome.com,"The seed proteome web portal. The Seed Proteome Web Portal (SPWP; http://www.seed-proteome.com/) gives access to information both on quantitative seed proteomic data and on seed-related protocols. Firstly, the SPWP provides access to the 475 different Arabidopsis seed proteins annotated from two dimensional electrophoresis (2DE) maps. Quantitative data are available for each protein according to their accumulation profile during the germination process. These proteins can be retrieved either in list format or directly on scanned 2DE maps. These proteomic data reveal that 40% of seed proteins maintain a stable abundance over germination, up to radicle protrusion. During sensu stricto germination (24√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâh upon imbibition) about 50% of the proteins display quantitative variations, exhibiting an increased abundance (35%) or a decreasing abundance (15%). Moreover, during radicle protrusion (24-48√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâh upon imbibition), 41% proteins display quantitative variations with an increased (23%) or a decreasing abundance (18%). In addition, an analysis of the seed proteome revealed the importance of protein post-translational modifications as demonstrated by the poor correlation (r(2)√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ=√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ0.29) between the theoretical (predicted from Arabidopsis genome) and the observed protein isoelectric points. Secondly, the SPWP is a relevant technical resource for protocols specifically dedicated to Arabidopsis seed proteome studies. Concerning 2D electrophoresis, the user can find efficient procedures for sample preparation, electrophoresis coupled with gel analysis, and protein identification by mass spectrometry, which we have routinely used during the last 12√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâyears. Particular applications such as the detection of oxidized proteins or de novo synthesized proteins radiolabeled by [(35)S]-methionine are also given in great details. Future developments of this portal will include proteomic data from studies such as dormancy release and protein turnover through de novo protein synthesis analyses during germination.",SPWP,0.99272126,Seed Proteome Web Portal,0.962555528,SPWP,0.99272126,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/11/2012 +25805861,http://srd.genouest.org,"SRD: a Staphylococcus regulatory RNA database. An overflow of regulatory RNAs (sRNAs) was identified in a wide range of bacteria. We designed and implemented a new resource for the hundreds of sRNAs identified in Staphylococci, with primary focus on the human pathogen Staphylococcus aureus. The ""Staphylococcal Regulatory RNA Database"" (SRD, http://srd.genouest.org/) compiled all published data in a single interface including genetic locations, sequences and other features. SRD proposes novel and simplified identifiers for Staphylococcal regulatory RNAs (srn) based on the sRNA's genetic location in S. aureus strain N315 which served as a reference. From a set of 894 sequences and after an in-depth cleaning, SRD provides a list of 575 srn exempt of redundant sequences. For each sRNA, their experimental support(s) is provided, allowing the user to individually assess their validity and significance. RNA-seq analysis performed on strains N315, NCTC8325, and Newman allowed us to provide further details, upgrade the initial annotation, and identified 159 RNA-seq independent transcribed sRNAs. The lists of 575 and 159 sRNAs sequences were used to predict the number and location of srns in 18 S. aureus strains and 10 other Staphylococci. A comparison of the srn contents within 32 Staphylococcal genomes revealed a poor conservation between species. In addition, sRNA structure predictions obtained with MFold are accessible. A BLAST server and the intaRNA program, which is dedicated to target prediction, were implemented. SRD is the first sRNA database centered on a genus; it is a user-friendly and scalable device with the possibility to submit new sequences that should spread in the literature.",SRD,0.972883582,Staphylococcal Regulatory RNA Database,0.963276863,SRD,0.972883582,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/24/2015 +30306862,http://science.cmb.ac.lk/tools/slflora,"Development of an Information System of Structures and Force Field Parameters of Chemical Compounds from Sri Lankan Flora. Background Sri Lanka offers a huge diversity of flora with a large proportion of those being endemic to the island. Both the endemic and native plants species serve as a rich bank of phytochemicals. Method In this study, ""Sri Lankan Flora"" an online web-based information system of phytochemical compounds isolated from the flora of Sri Lanka was proposed. Results The database contained 3D structures of those compounds, calculated quantitativestructure- activity relationship (QSAR) data and the GROMOS 54a7 force field parameters for each and every compound. The manually curated chemical structures, activities and force field parameters provide a possible direct avenue for computer-aided drug discovery. The present study is a continuing project with a wider goal of building up a database, not only for assisting the computeraided drug designing process, but also for other chemical applications, as the database includes structural, physical, chemical and dynamic properties of chemical compounds of the flora of Sri Lanka. The database is freely accessible at http://science.cmb.ac.lk/tools/slflora.",Sri Lankan Flora,0.628779441,NA,0,Sri Lankan Flora,0.628779441,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +27453469,http://www.srmatlas.org,"Human SRMAtlas: A Resource of Targeted Assays to Quantify the Complete Human Proteome. The ability to reliably and reproducibly measure any protein of the human proteome in any tissue or cell type would be transformative for understanding systems-level properties as well as specific pathways in physiology and disease. Here, we describe the generation and verification of a compendium of highly specific assays that enable quantification of 99.7% of the√ɬÉ√ǬÇ√ɬÇ√Ǭ†20,277 annotated human proteins by the widely accessible, sensitive, and robust targeted mass spectrometric method selected reaction monitoring, SRM. This human SRMAtlas provides definitive coordinates that conclusively identify the respective peptide in biological samples. We report data on 166,174 proteotypic peptides providing multiple, independent assays to quantify any human protein and numerous spliced variants, non-synonymous mutations, and post-translational modifications. The data are freely accessible as a resource at http://www.srmatlas.org/, and we demonstrate its utility by examining the network response to inhibition of cholesterol synthesis in liver cells and to docetaxel in prostate cancer lines.",SRMAtlas,0.915497303,NA,0,SRMAtlas,0.915497303,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,7/21/2016 +26503244,http://ccb1.bmi.ac.cn/srnatarbase,"sRNATarBase 3.0: an updated database for sRNA-target interactions in bacteria. Bacterial sRNAs are a class of small regulatory RNAs of about 40-500 nt in length; they play multiple biological roles through binding to their target mRNAs or proteins. Therefore, elucidating sRNA targets is very important. However, only targets of a few sRNAs have been described. To facilitate sRNA functional studies such as developing sRNA target prediction models, we updated the sRNATarBase database, which was initially developed in 2010. The new version (recently moved to http://ccb1.bmi.ac.cn/srnatarbase/) contains 771 sRNA-target entries manually collected from 213 papers, and 23 290 and 11 750 predicted targets from sRNATarget and sTarPicker, respectively. Among the 771 entries, 475 and 17 were involved in validated sRNA-mRNA and sRNA-protein interactions, respectively, while 279 had no reported interactions. We also presented detailed information for 316 binding regions of sRNA-target mRNA interactions and related mutation experiments, as well as new features, including NCBI sequence viewer, sRNA regulatory network, target prediction-based GO and pathway annotations, and error report system. The new version provides a comprehensive annotation of validated sRNA-target interactions, and will be a useful resource for bacterial sRNA studies.",sRNATarBase,0.988428116,NA,0,sRNATarBase,0.988428116,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/25/2015 +22493538,http://www.molgenv.com/ssa_mirnas_db_home.php,"Ssa miRNAs DB: Online repository of in silico predicted miRNAs in Salmo salar. Unlabelled The Atlantic salmon (Salmo salar) is a very valuable commercial salmonid species. As with other aquaculture species, intensive aquaculture of Atlantic salmon often faces disease problems especially in early life stages which can limit stable production of the species. 'Ssa miRNAs DB', a bioinformatics and manually curated database, aims at providing a comprehensive resource of microRNA in Altantic salmon, with a user friendly interface for a convenient retrieval of each entry by microRNA ID or target gene. The current version of Ssa miRNAs DB involved the prediction of 41 and 266 homologous and novel microRNAs, respectively. Availability The database is available for free at http://www.molgenv.com/ssa_mirnas_db_home.php.",Ssa miRNAs DB,0.706260227,miRNAs,0.592867792,Ssa miRNAs DB,0.706260227,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/31/2012 +27412095,http://ssbd.qbic.riken.jp,"SSBD: a database of quantitative data of spatiotemporal dynamics of biological phenomena. Motivation Rapid advances in live-cell imaging analysis and mathematical modeling have produced a large amount of quantitative data on spatiotemporal dynamics of biological objects ranging from molecules to organisms. There is now a crucial need to bring these large amounts of quantitative biological dynamics data together centrally in a coherent and systematic manner. This will facilitate the reuse of this data for further analysis. Results We have developed the Systems Science of Biological Dynamics database (SSBD) to store and share quantitative biological dynamics data. SSBD currently provides 311 sets of quantitative data for single molecules, nuclei and whole organisms in a wide variety of model organisms from Escherichia coli to Mus musculus The data are provided in Biological Dynamics Markup Language format and also through a REST API. In addition, SSBD provides 188 sets of time-lapse microscopy images from which the quantitative data were obtained and software tools for data visualization and analysis. Availability and implementation SSBD is accessible at http://ssbd.qbic.riken.jp CONTACT: sonami@riken.jp.",SSBD,0.980243762,Systems Science of Biological Dynamics database,0.951881438,SSBD,0.980243762,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/13/2016 +28420402,http://cefg.uestc.edu.cn/sser,"SSER: Species specific essential reactions database. Background Essential reactions are vital components of cellular networks. They are the foundations of synthetic biology and are potential candidate targets for antimetabolic drug design. Especially if a single reaction is catalyzed by multiple enzymes, then inhibiting the reaction would be a better option than targeting the enzymes or the corresponding enzyme-encoding gene. The existing databases such as BRENDA, BiGG, KEGG, Bio-models, Biosilico, and many others offer useful and comprehensive information on biochemical reactions. But none of these databases especially focus on essential reactions. Therefore, building a centralized repository for this class of reactions would be of great value. Description Here, we present a species-specific essential reactions database (SSER). The current version comprises essential biochemical and transport reactions of twenty-six organisms which are identified via flux balance analysis (FBA) combined with manual curation on experimentally validated metabolic network models. Quantitative data on the number of essential reactions, number of the essential reactions associated with their respective enzyme-encoding genes and shared essential reactions across organisms are the main contents of the database. Conclusion SSER would be a prime source to obtain essential reactions data and related gene and metabolite information and it can significantly facilitate the metabolic network models reconstruction and analysis, and drug target discovery studies. Users can browse, search, compare and download the essential reactions of organisms of their interest through the website http://cefg.uestc.edu.cn/sser .",SSER,0.930679306,Species specific essential reactions database,0.902112281,SSER,0.930679306,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/19/2017 +21292827,http://www.ssfa-gphr.de,"Research resource: Update and extension of a glycoprotein hormone receptors web application. The SSFA-GPHR (Sequence-Structure-Function-Analysis of Glycoprotein Hormone Receptors) database provides a comprehensive set of mutation data for the glycoprotein hormone receptors (covering the lutropin, the FSH, and the TSH receptors). Moreover, it provides a platform for comparison and investigation of these homologous receptors and helps in understanding protein malfunctions associated with several diseases. Besides extending the data set (> 1100 mutations), the database has been completely redesigned and several novel features and analysis tools have been added to the web site. These tools allow the focused extraction of semiquantitative mutant data from the GPHR subtypes and different experimental approaches. Functional and structural data of the GPHRs are now linked interactively at the web interface, and new tools for data visualization (on three-dimensional protein structures) are provided. The interpretation of functional findings is supported by receptor morphings simulating intramolecular changes during the activation process, which thus help to trace the potential function of each amino acid and provide clues to the local structural environment, including potentially relocated spatial counterpart residues. Furthermore, double and triple mutations are newly included to allow the analysis of their functional effects related to their spatial interrelationship in structures or homology models. A new important feature is the search option and data visualization by interactive and user-defined snake-plots. These new tools allow fast and easy searches for specific functional data and thereby give deeper insights in the mechanisms of hormone binding, signal transduction, and signaling regulation. The web application ""Sequence-Structure-Function-Analysis of GPHRs"" is accessible on the internet at http://www.ssfa-gphr.de/.",SSFA-GPHR,0.962571482,Structure-Function-Analysis of Glycoprotein Hormone Receptors,0.797694047,SSFA-GPHR,0.962571482,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/3/2011 +22759918,http://sskb.umn.edu,"Text-mining applied to autoimmune disease research: the Sj√ɬÉ√ǬÉ√ɬÇ√Ǭ∂gren's syndrome knowledge base. Background Sj√ɬÉ√ǬÉ√ɬÇ√Ǭ∂gren's syndrome is a tissue-specific autoimmune disease that affects exocrine tissues, especially salivary glands and lacrimal glands. Despite a large body of evidence gathered over the past 60 years, significant gaps still exist in our understanding of Sj√ɬÉ√ǬÉ√ɬÇ√Ǭ∂gren's syndrome. The goal of this study was to develop a database that collects and organizes gene and protein expression data from the existing literature for comparative analysis with future gene expression and proteomic studies of Sj√ɬÉ√ǬÉ√ɬÇ√Ǭ∂gren's syndrome. Description To catalog the existing knowledge in the field, we used text mining to generate the Sj√ɬÉ√ǬÉ√ɬÇ√Ǭ∂gren's Syndrome Knowledge Base (SSKB) of published gene/protein data, which were extracted from PubMed using text mining of over 7,700 abstracts and listing approximately 500 potential genes/proteins. The raw data were manually evaluated to remove duplicates and false-positives and assign gene names. The data base was manually curated to 477 entries, including 377 potential functional genes, which were used for enrichment and pathway analysis using gene ontology and KEGG pathway analysis. Conclusions The Sj√ɬÉ√ǬÉ√ɬÇ√Ǭ∂gren's syndrome knowledge base ( http://sskb.umn.edu) can form the foundation for an informed search of existing knowledge in the field as new potential therapeutic targets are identified by conventional or high throughput experimental techniques.",SSKB,0.872170428,ren's Syndrome Knowledge Base,0.860038102,SSKB,0.872170428,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/3/2012 +27402679,http://fantom.gsc.riken.jp/5/sstar,"FANTOM5 transcriptome catalog of cellular states based on Semantic MediaWiki. . The Functional Annotation of the Mammalian Genome project (FANTOM5) mapped transcription start sites (TSSs) and measured their activities in a diverse range of biological samples. The FANTOM5 project generated a large data set; including detailed information about the profiled samples, the uncovered TSSs at high base-pair resolution on the genome, their transcriptional initiation activities, and further information of transcriptional regulation. Data sets to explore transcriptome in individual cellular states encoded in the mammalian genomes have been enriched by a series of additional analysis, based on the raw experimental data, along with the progress of the research activities. To make the heterogeneous data set accessible and useful for investigators, we developed a web-based database called Semantic catalog of Samples, Transcription initiation And Regulators (SSTAR). SSTAR utilizes the open source wiki software MediaWiki along with the Semantic MediaWiki (SMW) extension, which provides flexibility to model, store, and display a series of data sets produced during the course of the FANTOM5 project. Our use of SMW demonstrates the utility of the framework for dissemination of large-scale analysis results. SSTAR is a case study in handling biological data generated from a large-scale research project in terms of maintenance and growth alongside research activities.Database URL: http://fantom.gsc.riken.jp/5/sstar/.",SSTAR,0.966379583,Functional Annotation of the Mammalian Genome project,0.852594042,SSTAR,0.966379583,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/9/2016 +33408242,http://www.stjude.cloud,"St. Jude Cloud: A Pediatric Cancer Genomic Data-Sharing Ecosystem. Effective data sharing is key to accelerating research to improve diagnostic precision, treatment efficacy, and long-term survival in pediatric cancer and other childhood catastrophic diseases. We present St. Jude Cloud (https://www.stjude.cloud), a cloud-based data-sharing ecosystem for accessing, analyzing, and visualizing genomic data from >10,000 pediatric patients with cancer and long-term survivors, and >800 pediatric sickle cell patients. Harmonized genomic data totaling 1.25 petabytes are freely available, including 12,104 whole genomes, 7,697 whole exomes, and 2,202 transcriptomes. The resource is expanding rapidly, with regular data uploads from St. Jude's prospective clinical genomics programs. Three interconnected apps within the ecosystem-Genomics Platform, Pediatric Cancer Knowledgebase, and Visualization Community-enable simultaneously performing advanced data analysis in the cloud and enhancing the Pediatric Cancer knowledgebase. We demonstrate the value of the ecosystem through use cases that classify 135 pediatric cancer subtypes by gene expression profiling and map mutational signatures across 35 pediatric cancer subtypes. SIGNIFICANCE: To advance research and treatment of pediatric cancer, we developed St. Jude Cloud, a data-sharing ecosystem for accessing >1.2 petabytes of raw genomic data from >10,000 pediatric patients and survivors, innovative analysis workflows, integrative multiomics visualizations, and a knowledgebase of published data contributed by the global pediatric cancer community.This article is highlighted in the In This Issue feature, p. 995.",St,0.97610724,NA,0,St,0.97610724,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/6/2021 +32976581,http://stab.comp-sysbio.org,"STAB: a spatio-temporal cell atlas of the human brain. The human brain is the most complex organ consisting of billions of neuronal and non-neuronal cells that are organized into distinct anatomical and functional regions. Elucidating the cellular and transcriptome architecture underlying the brain is crucial for understanding brain functions and brain disorders. Thanks to the single-cell RNA sequencing technologies, it is becoming possible to dissect the cellular compositions of the brain. Although great effort has been made to explore the transcriptome architecture of the human brain, a comprehensive database with dynamic cellular compositions and molecular characteristics of the human brain during the lifespan is still not available. Here, we present STAB (a Spatio-Temporal cell Atlas of the human Brain), a database consists of single-cell transcriptomes across multiple brain regions and developmental periods. Right now, STAB contains single-cell gene expression profiling of 42 cell subtypes across 20 brain regions and 11 developmental periods. With STAB, the landscape of cell types and their regional heterogeneity and temporal dynamics across the human brain can be clearly seen, which can help to understand both the development of the normal human brain and the etiology of neuropsychiatric disorders. STAB is available at http://stab.comp-sysbio.org.",STAB,0.990528941,Spatio-Temporal cell Atlas of the human Brain,0.935739333,STAB,0.990528941,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +30602089,http://stadium.pmrc.re.kr,"STADIUM: Species-Specific tRNA Adaptive Index Compendium. Due to the increasing interest in synonymous codons, several codon bias-related terms were introduced. As one measure of them, the tRNA adaptation index (tAI) was invented about a decade ago. The tAI is a measure of translational efficiency for a gene and is calculated based on the abundance of intracellular tRNA and the binding strength between a codon and a tRNA. The index has been widely used in various fields of molecular evolution, genetics, and pharmacology. Afterwards, an improved version of the index, named specific tRNA adaptation index (stAI), was developed by adapting tRNA copy numbers in species. Although a subsequently developed webserver (stAIcalc) provided tools that calculated stAI values, it was not available to access pre-calculated values. In addition to about 100 species in stAIcalc, we calculated stAI values for whole coding sequences in 148 species. To enable easy access to this index, we constructed a novel web database, named STADIUM (Species-specific tRNA adaptive index compendium). STADIUM provides not only the stAI value of each gene but also statistics based on pathway-based classification. The database is expected to help researchers who have interests in codon optimality and the role of synonymous codons. STADIUM is freely available at http://stadium.pmrc.re.kr.",STADIUM,0.983587086,Specific tRNA Adaptive Index Compendium,0.638744718,STADIUM,0.983587086,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/28/2018 +32719467,http://coralsnp.science.psu.edu/galaxy,"STAGdb: a 30K SNP genotyping array and Science Gateway for Acropora corals and their dinoflagellate symbionts. Standardized identification of genotypes is necessary in animals that reproduce asexually and form large clonal populations such as coral. We developed a high-resolution hybridization-based genotype array coupled with an analysis workflow and database for the most speciose genus of coral, Acropora, and their symbionts. We designed the array to co-analyze host and symbionts based on bi-allelic single nucleotide polymorphisms (SNP) markers identified from genomic data of the two Caribbean Acropora species as well as their dominant dinoflagellate symbiont, Symbiodinium 'fitti'. SNPs were selected to resolve multi-locus genotypes of host (called genets) and symbionts (called strains), distinguish host populations and determine ancestry of coral hybrids between Caribbean acroporids. Pacific acroporids can also be genotyped using a subset of the SNP loci and additional markers enable the detection of symbionts belonging to the genera Breviolum, Cladocopium, and Durusdinium. Analytic tools to produce multi-locus genotypes of hosts based on these SNP markers were combined in a workflow called the Standard Tools for Acroporid Genotyping (STAG). The STAG workflow and database are contained within a customized Galaxy environment (https://coralsnp.science.psu.edu/galaxy/), which allows for consistent identification of host genet and symbiont strains and serves as a template for the development of arrays for additional coral genera. STAG data can be used to track temporal and spatial changes of sampled genets necessary for restoration planning and can be applied to downstream genomic analyses. Using STAG, we uncover bi-directional hybridization between and population structure within Caribbean acroporids and detect a cryptic Acroporid species in the Pacific.",STAGdb,0.743933082,NA,0,STAGdb,0.743933082,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/27/2020 +24578355,http://staphylococcus.um.edu.my,"StaphyloBase: a specialized genomic resource for the staphylococcal research community. With the advent of high-throughput sequencing technologies, many staphylococcal genomes have been sequenced. Comparative analysis of these strains will provide better understanding of their biology, phylogeny, virulence and taxonomy, which may contribute to better management of diseases caused by staphylococcal pathogens. We developed StaphyloBase with the goal of having a one-stop genomic resource platform for the scientific community to access, retrieve, download, browse, search, visualize and analyse the staphylococcal genomic data and annotations. We anticipate this resource platform will facilitate the analysis of staphylococcal genomic data, particularly in comparative analyses. StaphyloBase currently has a collection of 754 032 protein-coding sequences (CDSs), 19 258 rRNAs and 15 965 tRNAs from 292 genomes of different staphylococcal species. Information about these features is also included, such as putative functions, subcellular localizations and gene/protein sequences. Our web implementation supports diverse query types and the exploration of CDS- and RNA-type information in detail using an AJAX-based real-time search system. JBrowse has also been incorporated to allow rapid and seamless browsing of staphylococcal genomes. The Pairwise Genome Comparison tool is designed for comparative genomic analysis, for example, to reveal the relationships between two user-defined staphylococcal genomes. A newly designed Pathogenomics Profiling Tool (PathoProT) is also included in this platform to facilitate comparative pathogenomics analysis of staphylococcal strains. In conclusion, StaphyloBase offers access to a range of staphylococcal genomic resources as well as analysis tools for comparative analyses. Database URL: http://staphylococcus.um.edu.my/.",StaphyloBase,0.997347653,NA,0,StaphyloBase,0.997347653,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/26/2014 +30963486,http://sfold.wadsworth.org,"Sfold Tools for MicroRNA Target Prediction. Computational prediction of miRNA binding sites on target mRNAs facilitates experimental investigation of miRNA functions. In this chapter, we describe STarMir and STarMirDB, two application modules of the Sfold RNA package. STarMir is a Web server for performing miRNA binding site predictions for mRNA and target sequences submitted by users. STarMirDB is a database of precomputed transcriptome-scale predictions. Both STarMir and STarMirDB provide comprehensive sequence, thermodynamic, and target structure features, a logistic probability as a measure of confidence for each predicted site, and a publication-quality diagram of the predicted miRNA-target hybrid. In addition, STarMir now offers a new quantitative score to address combined regulatory effects of multiple seed and seedless sites. This score provides a quantitative measure of the overall regulatory effects of both seed and seedless sites on the target. STarMir and STarMirDB are freely available to all through the Sfold Web application server at http://sfold.wadsworth.org .",STarMir,0.942518592,NA,0,STarMir,0.942518592,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: URL scramble,NA,NA,1/1/2019 +30994884,http://mobiosd-hub.com/starpep,"Graph-based data integration from bioactive peptide databases of pharmaceutical interest: toward an organized collection enabling visual network analysis. Motivation Bioactive peptides have gained great attention in the academy and pharmaceutical industry since they play an important role in human health. However, the increasing number of bioactive peptide databases is causing the problem of data redundancy and duplicated efforts. Even worse is the fact that the available data is non-standardized and often dirty with data entry errors. Therefore, there is a need for a unified view that enables a more comprehensive analysis of the information on this topic residing at different sites. Results After collecting web pages from a large variety of bioactive peptide databases, we organized the web content into an integrated graph database (starPepDB) that holds a total of 71√ɬÉ√ǬÇ√ɬÇ√Ǭ†310 nodes and 348√ɬÉ√ǬÇ√ɬÇ√Ǭ†505 relationships. In this graph structure, there are 45√ɬÉ√ǬÇ√ɬÇ√Ǭ†120 nodes representing peptides, and the rest of the nodes are connected to peptides for describing metadata. Additionally, to facilitate a better understanding of the integrated data, a software tool (starPep toolbox) has been developed for supporting visual network analysis in a user-friendly way; providing several functionalities such as peptide retrieval and filtering, network construction and visualization, interactive exploration and exporting data options. Availability and implementation Both starPepDB and starPep toolbox are freely available at http://mobiosd-hub.com/starpep/. Supplementary information Supplementary data are available at Bioinformatics online.",starPepDB,0.923639512,NA,0,starPepDB,0.923639512,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/1/2019 +26582925,http://start2fold.eu,"Start2Fold: a database of hydrogen/deuterium exchange data on protein folding and stability. Proteins fulfil a wide range of tasks in cells; understanding how they fold into complex three-dimensional (3D) structures and how these structures remain stable while retaining sufficient dynamics for functionality is essential for the interpretation of overall protein behaviour. Since the 1950's, solvent exchange-based methods have been the most powerful experimental means to obtain information on the folding and stability of proteins. Considerable expertise and care were required to obtain the resulting datasets, which, despite their importance and intrinsic value, have never been collected, curated and classified. Start2Fold is an openly accessible database (http://start2fold.eu) of carefully curated hydrogen/deuterium exchange (HDX) data extracted from the literature that is open for new submissions from the community. The database entries contain (i) information on the proteins investigated and the underlying experimental procedures and (ii) the classification of the residues based on their exchange protection levels, also allowing for the instant visualization of the relevant residue groups on the 3D structures of the corresponding proteins. By providing a clear hierarchical framework for the easy sharing, comparison and (re-)interpretation of HDX data, Start2Fold intends to promote a better understanding of how the protein sequence encodes folding and structure as well as the development of new computational methods predicting protein folding and stability.",Start2Fold,0.996267363,NA,0,Start2Fold,0.996267363,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +25157689,http://statdb.bic.nus.edu.sg,"STATdb: a specialised resource for the STATome. Signal transducers and activators of transcription (STAT) proteins are key signalling molecules in metazoans, implicated in various cellular processes. Increased research in the field has resulted in the accumulation of STAT sequence and structure data, which are scattered across various public databases, missing extensive functional annotations, and prone to effort redundancy because of the dearth of community sharing. Therefore, there is a need to integrate the existing sequence, structure and functional data into a central repository, one that is enriched with annotations and provides a platform for community contributions. Herein, we present STATdb (publicly available at http://statdb.bic.nus.edu.sg/), the first integrated resource for STAT sequences comprising 1540 records representing the known STATome, enriched with existing structural and functional information from various databases and literature and including manual annotations. STATdb provides advanced features for data visualization, analysis and prediction, and community contributions. A key feature is a meta-predictor to characterise STAT sequences based on a novel classification that integrates STAT domain architecture, lineage and function. A curation policy workflow has been devised for regulated and structured community contributions, with an update policy for the seamless integration of new data and annotations.",STATdb,0.997418284,NA,0,STATdb,0.997418284,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/26/2014 +29087479,http://opig.stats.ox.ac.uk/webapps/stcrdab,"STCRDab: the structural T-cell receptor database. The Structural T-cell Receptor Database (STCRDab; http://opig.stats.ox.ac.uk/webapps/stcrdab) is an online resource that automatically collects and curates TCR structural data from the Protein Data Bank. For each entry, the database provides annotations, such as the √ɬÉ√Ǭé√ɬÇ√Ǭ±/√ɬÉ√Ǭé√ɬÇ√Ǭ≤ or √ɬÉ√Ǭé√ɬÇ√Ǭ≥/√ɬÉ√Ǭé√ɬÇ√Ǭ¥ chain pairings, major histocompatibility complex details, and where available, antigen binding affinities. In addition, the orientation between the variable domains and the canonical forms of the complementarity-determining region loops are also provided. Users can select, view, and download individual or bulk sets of structures based on these criteria. Where available, STCRDab also finds antibody structures that are similar to TCRs, helping users explore the relationship between TCRs and antibodies.",STCRDab,0.99811368,Structural T-cell Receptor Database,0.990144104,STCRDab,0.99811368,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +22121217,http://discovery.hsci.harvard.edu,"The Stem Cell Discovery Engine: an integrated repository and analysis system for cancer stem cell comparisons. Mounting evidence suggests that malignant tumors are initiated and maintained by a subpopulation of cancerous cells with biological properties similar to those of normal stem cells. However, descriptions of stem-like gene and pathway signatures in cancers are inconsistent across experimental systems. Driven by a need to improve our understanding of molecular processes that are common and unique across cancer stem cells (CSCs), we have developed the Stem Cell Discovery Engine (SCDE)-an online database of curated CSC experiments coupled to the Galaxy analytical framework. The SCDE allows users to consistently describe, share and compare CSC data at the gene and pathway level. Our initial focus has been on carefully curating tissue and cancer stem cell-related experiments from blood, intestine and brain to create a high quality resource containing 53 public studies and 1098 assays. The experimental information is captured and stored in the multi-omics Investigation/Study/Assay (ISA-Tab) format and can be queried in the data repository. A linked Galaxy framework provides a comprehensive, flexible environment populated with novel tools for gene list comparisons against molecular signatures in GeneSigDB and MSigDB, curated experiments in the SCDE and pathways in WikiPathways. The SCDE is available at http://discovery.hsci.harvard.edu.",SCDE,0.878304124,Stem Cell Discovery Engine,0.896048009,Stem Cell Discovery Engine,0.896048009,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/24/2011 +27643925,http://www.cbligand.org/StemCellCKB,"StemCellCKB: An Integrated Stem Cell-Specific Chemogenomics KnowledgeBase for Target Identification and Systems-Pharmacology Research. Given the capacity of self-renewal and multilineage differentiation, stem cells are promising sources for use in regenerative medicines as well as in the clinical treatment of certain hematological malignancies and degenerative diseases. Complex networks of cellular signaling pathways largely determine stem cell fate and function. Small molecules that modulate these pathways can provide important biological and pharmacological insights. However, it is still challenging to identify the specific protein targets of these compounds, to explore the changes in stem cell phenotypes induced by compound treatment and to ascertain compound mechanisms of action. To facilitate stem cell related small molecule study and provide a better understanding of the associated signaling pathways, we have constructed a comprehensive domain-specific chemogenomics resource, called StemCellCKB ( http://www.cbligand.org/StemCellCKB/ ). This new cloud-computing platform describes the chemical molecules, genes, proteins, and signaling pathways implicated in stem cell regulation. StemCellCKB is also implemented with web applications designed specifically to aid in the identification of stem cell relevant protein targets, including TargetHunter, a machine-learning algorithm for predicting small molecule targets based on molecular fingerprints, and HTDocking, a high-throughput docking module for target prediction and systems-pharmacology analyses. We have systematically tested StemCellCKB to verify data integrity. Target-prediction accuracy has also been validated against the reported known target/compound associations. This proof-of-concept example demonstrates that StemCellCKB can (1) accurately predict the macromolecular targets of existing stem cell modulators and (2) identify novel small molecules capable of probing stem cell signaling mechanisms, for use in systems-pharmacology studies. StemCellCKB facilitates the exploration and exchange of stem cell chemogenomics data among members of the broader research community.",StemCellCKB,0.995615423,NA,0,StemCellCKB,0.995615423,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/7/2016 +29045725,http://stemmapper.sysbiolab.eu,"StemMapper: a curated gene expression database for stem cell lineage analysis. Transcriptomic data have become a fundamental resource for stem cell (SC) biologists as well as for a wider research audience studying SC-related processes such as aging, embryonic development and prevalent diseases including cancer, diabetes and neurodegenerative diseases. Access and analysis of the growing amount of freely available transcriptomics datasets for SCs, however, are not trivial tasks. Here, we present StemMapper, a manually curated gene expression database and comprehensive resource for SC research, built on integrated data for different lineages of human and mouse SCs. It is based on careful selection, standardized processing and stringent quality control of relevant transcriptomics datasets to minimize artefacts, and includes currently over 960 transcriptomes covering a broad range of SC types. Each of the integrated datasets was individually inspected and manually curated. StemMapper's user-friendly interface enables fast querying, comparison, and interactive visualization of quality-controlled SC gene expression data in a comprehensive manner. A proof-of-principle analysis discovering novel putative astrocyte/neural SC lineage markers exemplifies the utility of the integrated data resource. We believe that StemMapper can open the way for new insights and advances in SC research by greatly simplifying the access and analysis of SC transcriptomic data. StemMapper is freely accessible at http://stemmapper.sysbiolab.eu.",StemMapper,0.99310112,NA,0,StemMapper,0.99310112,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +23314754,http://caps.ncbs.res.in/stifdb2,"STIFDB2: an updated version of plant stress-responsive transcription factor database with additional stress signals, stress-responsive transcription factor binding sites and stress-responsive genes in Arabidopsis and rice. Understanding the principles of abiotic and biotic stress responses, tolerance and adaptation remains important in plant physiology research to develop better varieties of crop plants. Better understanding of plant stress response mechanisms and application of knowledge derived from integrated experimental and bioinformatics approaches are gaining importance. Earlier, we showed that compiling a database of stress-responsive transcription factors and their corresponding target binding sites in the form of Hidden Markov models at promoter, untranslated and upstream regions of stress-up-regulated genes from expression analysis can help in elucidating various aspects of the stress response in Arabidopsis. In addition to the extensive content in the first version, STIFDB2 is now updated with 15 stress signals, 31 transcription factors and 5,984 stress-responsive genes from three species (Arabidopsis thaliana, Oryza sativa subsp. japonica and Oryza sativa subsp. indica). We have employed an integrated biocuration and genomic data mining approach to characterize the data set of transcription factors and consensus binding sites from literature mining and stress-responsive genes from the Gene Expression Omnibus. STIFDB2 currently has 38,798 associations of stress signals, stress-responsive genes and transcription factor binding sites predicted using the Stress-responsive Transcription Factor (STIF) algorithm, along with various functional annotation data. As a unique plant stress regulatory genomics data platform, STIFDB2 can be utilized for targeted as well as high-throughput experimental and computational studies to unravel principles of the stress regulome in dicots and gramineae. STIFDB2 is available from the URL: http://caps.ncbs.res.in/stifdb2.",STIFDB2,0.995890498,NA,0,STIFDB2,0.995890498,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/10/2013 +29218589,http://stimul.cognitivestudies.ru,"StimulStat: A lexical database for Russian. In this article, we present StimulStat - a lexical database for the Russian language in the form of a web application. The database contains more than 52,000 of the most frequent Russian lemmas and more than 1.7 million word forms derived from them. These lemmas and forms are characterized according to more than 70 properties that were demonstrated to be relevant for psycholinguistic research, including frequency, length, phonological and grammatical properties, orthographic and phonological neighborhood frequency and size, grammatical ambiguity, homonymy and polysemy. Some properties were retrieved from various dictionaries and are presented collectively in a searchable form for the first time, the others were computed specifically for the database. The database can be accessed freely at http://stimul.cognitivestudies.ru .",StimulStat,0.99712956,NA,0,StimulStat,0.99712956,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2018 +"22075997, 24293645, 26590256",http://stitch.embl.de,"STITCH 3: zooming in on protein-chemical interactions. To facilitate the study of interactions between proteins and chemicals, we have created STITCH, an aggregated database of interactions connecting over 300,000 chemicals and 2.6 million proteins from 1133 organisms. Compared to the previous version, the number of chemicals with interactions and the number of high-confidence interactions both increase 4-fold. The database can be accessed interactively through a web interface, displaying interactions in an integrated network view. It is also available for computational studies through downloadable files and an API. As an extension in the current version, we offer the option to switch between two levels of detail, namely whether stereoisomers of a given compound are shown as a merged entity or as separate entities. Separate display of stereoisomers is necessary, for example, for carbohydrates and chiral drugs. Combining the isomers increases the coverage, as interaction databases and publications found through text mining will often refer to compounds without specifying the stereoisomer. The database is accessible at http://stitch.embl.de/.",STITCH,0.979809999,Tool for Interacting Chemicals,0.714140986,STITCH,0.979809999,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2015 +32934277,http://www.stonemod.org,"StoneMod: a database for kidney stone modulatory proteins with experimental evidence. Better understanding of molecular mechanisms for kidney stone formation is required to improve management of kidney stone disease with better therapeutic outcome. Recent kidney stone research has indicated critical roles of a group of proteins, namely 'stone modulators', in promotion or inhibition of the stone formation. Nevertheless, such information is currently dispersed and difficult to obtain. Herein, we present the kidney stone modulator database (StoneMod), which is a curated resource by obtaining necessary information of such stone modulatory proteins, which can act as stone promoters or inhibitors, with experimental evidence from previously published studies. Currently, the StoneMod database contains 10, 16, 13, 8 modulatory proteins that affect calcium oxalate crystallization, crystal growth, crystal aggregation, and crystal adhesion on renal tubular cells, respectively. Informative details of each modulatory protein and PubMed links to the published articles are provided. Additionally, hyperlinks to other protein/gene databases (e.g., UniProtKB, Swiss-Prot, Human Protein Atlas, PeptideAtlas, and Ensembl) are made available for the users to obtain additional in-depth information of each protein. Moreover, this database provides a user-friendly web interface, in which the users can freely access to the information and/or submit their data to deposit or update. Database URL: https://www.stonemod.org .",StoneMod,0.996549025,kidney stone modulator database,0.977263463,StoneMod,0.996549025,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/15/2020 +23284744,http://strap.nci.nih.gov,"StRAP: an integrated resource for profiling high-throughput cancer genomic data from stress response studies. The increasing availability and maturity of DNA microarray technology has led to an explosion of cancer profiling studies for identifying cancer biomarkers, and predicting treatment response. Uncovering complex relationships, however, remains the most challenging task as it requires compiling and efficiently querying data from various sources. Here, we describe the Stress Response Array Profiler (StRAP), an open-source, web-based resource for storage, profiling, visualization, and sharing of cancer genomic data. StRAP houses multi-cancer microarray data with major emphasis on radiotherapy studies, and takes a systems biology approach towards the integration, comparison, and cross-validation of multiple cancer profiling studies. The database is a comprehensive platform for comparative analysis of gene expression data. For effective use of arrays, we provide user-friendly and interactive visualization tools that can display the data and query results. StRAP is web-based, platform-independent, and freely accessible at http://strap.nci.nih.gov/.",StRAP,0.99467355,Stress Response Array Profiler,0.947469604,StRAP,0.99467355,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/17/2012 +27138013,http://streptococcus.um.edu.my,"StreptoBase: An Oral Streptococcus mitis Group Genomic Resource and Analysis Platform. The oral streptococci are spherical Gram-positive bacteria categorized under the phylum Firmicutes which are among the most common causative agents of bacterial infective endocarditis (IE) and are also important agents in septicaemia in neutropenic patients. The Streptococcus mitis group is comprised of 13 species including some of the most common human oral colonizers such as S. mitis, S. oralis, S. sanguinis and S. gordonii as well as species such as S. tigurinus, S. oligofermentans and S. australis that have only recently been classified and are poorly understood at present. We present StreptoBase, which provides a specialized free resource focusing on the genomic analyses of oral species from the mitis group. It currently hosts 104 S. mitis group genomes including 27 novel mitis group strains that we sequenced using the high throughput Illumina HiSeq technology platform, and provides a comprehensive set of genome sequences for analyses, particularly comparative analyses and visualization of both cross-species and cross-strain characteristics of S. mitis group bacteria. StreptoBase incorporates sophisticated in-house designed bioinformatics web tools such as Pairwise Genome Comparison (PGC) tool and Pathogenomic Profiling Tool (PathoProT), which facilitate comparative pathogenomics analysis of Streptococcus strains. Examples are provided to demonstrate how StreptoBase can be employed to compare genome structure of different S. mitis group bacteria and putative virulence genes profile across multiple streptococcal strains. In conclusion, StreptoBase offers access to a range of streptococci genomic resources as well as analysis tools and will be an invaluable platform to accelerate research in streptococci. Database URL: http://streptococcus.um.edu.my.",StreptoBase,0.997655153,NA,0,StreptoBase,0.997655153,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/3/2016 +26615197,http://www.pharmaceutical-bioinformatics.org/streptomedb,"StreptomeDB 2.0--an extended resource of natural products produced by streptomycetes. Over the last decades, the genus Streptomyces has stirred huge interest in the scientific community as a source of bioactive compounds. The majority of all known antibiotics is isolated from these bacterial strains, as well as a variety of other drugs such as antitumor agents, immunosuppressants and antifungals. To the best of our knowledge, StreptomeDB was the first database focusing on compounds produced by streptomycetes. The new version presented herein represents a major step forward: its content has been increased to over 4000 compounds and more than 2500 host organisms. In addition, we have extended the background information and included hundreds of new manually curated references to literature. The latest update features a unique scaffold-based navigation system, which enables the exploration of the chemical diversity of StreptomeDB on a structural basis. We have included a phylogenetic tree, based on 16S rRNA sequences, which comprises more than two-thirds of the included host organisms. It enables visualizing the frequency, appearance, and persistence of compounds and scaffolds in an evolutionary context. Additionally, we have included predicted MS- and NMR-spectra of thousands of compounds for assignment of experimental data. The database is freely accessible via http://www.pharmaceutical-bioinformatics.org/streptomedb.",StreptomeDB,0.997493088,NA,0,StreptomeDB,0.997493088,1,NA,"23193280.0, 33051671.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/28/2015 +33051671,http://www.pharmbioinf.uni-freiburg.de/streptomedb,"StreptomeDB 3.0: an updated compendium of streptomycetes natural products. Antimicrobial resistance is an emerging global health threat necessitating the rapid development of novel antimicrobials. Remarkably, the vast majority of currently available antibiotics are natural products (NPs) isolated from streptomycetes, soil-dwelling bacteria of the genus Streptomyces. However, there is still a huge reservoir of streptomycetes NPs which remains pharmaceutically untapped and a compendium thereof could serve as a source of inspiration for the rational design of novel antibiotics. Initially released in 2012, StreptomeDB (http://www.pharmbioinf.uni-freiburg.de/streptomedb) is the first and only public online database that enables the interactive phylogenetic exploration of streptomycetes and their isolated or mutasynthesized NPs. In this third release, there are substantial improvements over its forerunners, especially in terms of data content. For instance, about 2500 unique NPs were newly annotated through manual curation of about 1300 PubMed-indexed articles, published in the last five years since the second release. To increase interoperability, StreptomeDB entries were hyperlinked to several spectral, (bio)chemical and chemical vendor databases, and also to a genome-based NP prediction server. Moreover, predicted pharmacokinetic and toxicity profiles were added. Lastly, some recent real-world use cases of StreptomeDB are highlighted, to illustrate its applicability in life sciences.",StreptomeDB,0.997068405,NA,0,StreptomeDB,0.997068405,1,NA,"23193280.0, 26615197.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,1/1/2021 +23193280,http://www.pharmaceutical-bioinformatics.de/streptomedb,"StreptomeDB: a resource for natural compounds isolated from Streptomyces species. Bacteria from the genus Streptomyces are very important for the production of natural bioactive compounds such as antibiotic, antitumour or immunosuppressant drugs. Around two-thirds of all known natural antibiotics are produced by these bacteria. An enormous quantity of crucial data related to this genus has been generated and published, but so far no freely available and comprehensive database exists. Here, we present StreptomeDB (http://www.pharmaceutical-bioinformatics.de/streptomedb/). To the best of our knowledge, this is the largest database of natural products isolated from Streptomyces. It contains >2400 unique and diverse compounds from >1900 different Streptomyces strains and substrains. In addition to names and molecular structures of the compounds, information about source organisms, references, biological role, activities and synthesis routes (e.g. polyketide synthase derived and non-ribosomal peptides derived) is included. Data can be accessed through queries on compound names, chemical structures or organisms. Extraction from the literature was performed through automatic text mining of thousands of articles from PubMed, followed by manual curation. All annotated compound structures can be downloaded from the website and applied for in silico screenings for identifying new active molecules with undiscovered properties.",StreptomeDB,0.996224344,NA,0,StreptomeDB,0.996224344,1,NA,"26615197.0, 33051671.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,11/28/2012 +28974472,http://csgenomics.ahau.edu.cn/Stress2TF,"Stress2TF: a manually curated database of TF regulation in plant response to stress. Considerable studies demonstrate that plant transcription factors (TFs) play key regulatory roles in abiotic/biotic stress conditions, such as drought and pathogen attack. However, there is no effort dedicated to curate experimentally validated stress-TF regulatory relationships from these individual reports into a central database, which put an obstacle in the exploration of stress-TF regulations in plants. To address this issue, we presented a literature-curated database 'Stress2TF' that currently documented 1533 regulatory relationships between 71 abiotic/biotic stresses and 558 TFs in 47 plant species. Each entry in Stress2TF contains detailed information about a stress-TF relationship such as plant name, stress name, TF and brief description of stress-TF relationship. Stress2TF provided a user-friendly interface for entry browse, search and download. In addition, a submission page and several useful tools (e.g., BLAST, network visualization) were integrated. Stress2TF may be a valuable resource for the research of stress-TF regulatory mechanisms in plants. Stress2TF is available at http://csgenomics.ahau.edu.cn/Stress2TF.",Stress2TF,0.992926578,NA,0,Stress2TF,0.992926578,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/30/2017 +"23203871, 25352553, 27924014, 30476243, 33237311",http://string-db.org,"STRING v9.1: protein-protein interaction networks, with increased coverage and integration. Complete knowledge of all direct and indirect interactions between proteins in a given cell would represent an important milestone towards a comprehensive description of cellular mechanisms and functions. Although this goal is still elusive, considerable progress has been made-particularly for certain model organisms and functional systems. Currently, protein interactions and associations are annotated at various levels of detail in online resources, ranging from raw data repositories to highly formalized pathway databases. For many applications, a global view of all the available interaction data is desirable, including lower-quality data and/or computational predictions. The STRING database (http://string-db.org/) aims to provide such a global perspective for as many organisms as feasible. Known and predicted associations are scored and integrated, resulting in comprehensive protein networks covering >1100 organisms. Here, we describe the update to version 9.1 of STRING, introducing several improvements: (i) we extend the automated mining of scientific texts for interaction information, to now also include full-text articles; (ii) we entirely re-designed the algorithm for transferring interactions from one model organism to the other; and (iii) we provide users with statistical information on any functional enrichment observed in their networks.",STRING,0.995607018,NA,0,STRING,0.995607018,5,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +28888135,"http://www.ncbi.nlm.nih.gov/bioproject/380127, http://strider.online","STRSeq: A catalog of sequence diversity at human identification Short Tandem Repeat loci. The STR Sequencing Project (STRSeq) was initiated to facilitate the description of sequence-based alleles at the Short Tandem Repeat (STR) loci targeted in human identification assays. This international collaborative effort, which has been endorsed by the ISFG DNA Commission, provides a framework for communication among laboratories. The initial data used to populate the project are the aggregate alleles observed in targeted sequencing studies across four laboratories: National Institute of Standards and Technology (N=1786), Kings College London (N=1043), University of North Texas Health Sciences Center (N=839), and University of Santiago de Compostela (N=944), for a total of 4612 individuals. STRSeq data are maintained as GenBank records at the U.S. National Center for Biotechnology Information (NCBI), which participates in a daily data exchange with the DNA DataBank of Japan (DDBJ) and the European Nucleotide Archive (ENA). Each GenBank record contains the observed sequence of a STR region, annotation (""bracketing"") of the repeat region and flanking region polymorphisms, information regarding the sequencing assay and data quality, and backward compatible length-based allele designation. STRSeq GenBank records are organized within a BioProject at NCBI (https://www.ncbi.nlm.nih.gov/bioproject/380127), which is sub-divided into: commonly used autosomal STRs, alternate autosomal STRs, Y-chromosomal STRs, and X-chromosomal STRs. Each of these categories is further divided into locus-specific BioProjects. The BioProject hierarchy facilitates access to the GenBank records by browsing, BLAST searching, or ftp download. Future plans include user interface tools at strseq.nist.gov, a pathway for submission of additional allele records by laboratories performing population sample sequencing and interaction with the STRidER web portal for quality control (http://strider.online).",STRSeq,0.99774313,Sequencing Project,0.543064018,STRSeq,0.99774313,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2017 +25886721,http://bcc.ee.cityu.edu.hk/data/EGFR.html,"EGFR Mutant Structural Database: computationally predicted 3D structures and the corresponding binding free energies with gefitinib and erlotinib. Background Epidermal growth factor receptor (EGFR) mutation-induced drug resistance has caused great difficulties in the treatment of non-small-cell lung cancer (NSCLC). However, structural information is available for just a few EGFR mutants. In this study, we created an EGFR Mutant Structural Database (freely available at http://bcc.ee.cityu.edu.hk/data/EGFR.html ), including the 3D EGFR mutant structures and their corresponding binding free energies with two commonly used inhibitors (gefitinib and erlotinib). Results We collected the information of 942 NSCLC patients belonging to 112 mutation types. These mutation types are divided into five groups (insertion, deletion, duplication, modification and substitution), and substitution accounts for 61.61% of the mutation types and 54.14% of all the patients. Among all the 942 patients, 388 cases experienced a mutation at residue site 858 with leucine replaced by arginine (L858R), making it the most common mutation type. Moreover, 36 (32.14%) mutation types occur at exon 19, and 419 (44.48%) patients carried a mutation at exon 21. In this study, we predicted the EGFR mutant structures using Rosetta with the collected mutation types. In addition, Amber was employed to refine the structures followed by calculating the binding free energies of mutant-drug complexes. Conclusions The EGFR Mutant Structural Database provides resources of 3D structures and the binding affinity with inhibitors, which can be used by other researchers to study NSCLC further and by medical doctors as reference for NSCLC treatment.",EGFR,0.57483536,Structural Database,0.652144313,Structural Database,0.652144313,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/14/2015 +27188311,http://tesla.pcbi.upenn.edu/strucuturesurfer,"A comprehensive database of high-throughput sequencing-based RNA secondary structure probing data (Structure Surfer). Background RNA molecules fold into complex three-dimensional shapes, guided by the pattern of hydrogen bonding between nucleotides. This pattern of base pairing, known as RNA secondary structure, is critical to their cellular function. Recently several diverse methods have been developed to assay RNA secondary structure on a transcriptome-wide scale using high-throughput sequencing. Each approach has its own strengths and caveats, however there is no widely available tool for visualizing and comparing the results from these varied methods. Methods To address this, we have developed Structure Surfer, a database and visualization tool for inspecting RNA secondary structure in six transcriptome-wide data sets from human and mouse ( http://tesla.pcbi.upenn.edu/strucuturesurfer/ ). The data sets were generated using four different high-throughput sequencing based methods. Each one was analyzed with a scoring pipeline specific to its experimental design. Users of Structure Surfer have the ability to query individual loci as well as detect trends across multiple sites. Results Here, we describe the included data sets and their differences. We illustrate the database's function by examining known structural elements and we explore example use cases in which combined data is used to detect structural trends. Conclusions In total, Structure Surfer provides an easy-to-use database and visualization interface for allowing users to interrogate the currently available transcriptome-wide RNA secondary structure information for mammals.",Structure Surfer,0.915590485,NA,0,Structure Surfer,0.915590485,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/17/2016 +23335498,http://www.sts.org/quality-research-patient-safety/research/publications-and-research/access-data-sts-national-database,"The society of thoracic surgeons national database. Aims The Society of Thoracic Surgeons (STS) National Database collects detailed clinical information on patients undergoing adult cardiac, paediatric and congenital cardiac, and general thoracic surgical operations. These data are used to support risk-adjusted, nationally benchmarked performance assessment and feedback; voluntary public reporting; quality improvement initiatives; guideline development; appropriateness determination; shared decision making; research using cross-sectional and longitudinal registry linkages; comparative effectiveness studies; government collaborations including postmarket surveillance; regulatory compliance and reimbursement strategies. Interventions All database participants receive feedback reports which they may voluntarily share with their hospitals or payers, or publicly report. STS analyses are regularly used as the basis for local, regional and national quality improvement efforts. Population More than 90% of adult cardiac programmes in the USA participate, as do the majority of paediatric cardiac programmes, and general thoracic participation continues to increase. Since the inception of the Database in 1989, more than 5 million patient records have been submitted. Baseline data Each of the three subspecialty databases includes several hundred variables that characterise patient demographics, diagnosis, medical history, clinical risk factors and urgency of presentation, operative details and postoperative course including adverse outcomes. Data capture Data are entered by trained data abstractors and by the care team, using detailed data specifications for each element. Data quality Quality and consistency checks assure accurate and complete data, missing data are rare, and audits are performed annually of selected participant sites. Endpoints All major outcomes are reported including complications, status at discharge and mortality. Data access Applications for STS Database participants to use aggregate national data for research are available at http://www.sts.org/quality-research-patient-safety/research/publications-and-research/access-data-sts-national-database.",STS,0.908651332,Surgeons,0.600629091,STS,0.908651332,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/18/2013 +"27779618, 27779621",http://studyforrest.org,"A studyforrest extension, retinotopic mapping and localization of higher visual areas. The studyforrest (http://studyforrest.org) dataset is likely the largest neuroimaging dataset on natural language and story processing publicly available today. In this article, along with a companion publication, we present an update of this dataset that extends its scope to vision and multi-sensory research. 15 participants of the original cohort volunteered for a series of additional studies: a clinical examination of visual function, a standard retinotopic mapping procedure, and a localization of higher visual areas-such as the fusiform face area. The combination of this update, the previous data releases for the dataset, and the companion publication, which includes neuroimaging and eye tracking data from natural stimulation with a motion picture, form an extremely versatile and comprehensive resource for brain imaging research-with almost six hours of functional neuroimaging data across five different stimulation paradigms for each participant. Furthermore, we describe employed paradigms and present results that document the quality of the data for the purpose of characterising major properties of participants' visual processing stream.",studyforrest,0.994259179,NA,0,studyforrest,0.994259179,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/25/2016 +"23180787, 25161662",http://suba.plantenergy.uwa.edu.au,"SUBA3: a database for integrating experimentation and prediction to define the SUBcellular location of proteins in Arabidopsis. The subcellular location database for Arabidopsis proteins (SUBA3, http://suba.plantenergy.uwa.edu.au) combines manual literature curation of large-scale subcellular proteomics, fluorescent protein visualization and protein-protein interaction (PPI) datasets with subcellular targeting calls from 22 prediction programs. More than 14 500 new experimental locations have been added since its first release in 2007. Overall, nearly 650 000 new calls of subcellular location for 35 388 non-redundant Arabidopsis proteins are included (almost six times the information in the previous SUBA version). A re-designed interface makes the SUBA3 site more intuitive and easier to use than earlier versions and provides powerful options to search for PPIs within the context of cell compartmentation. SUBA3 also includes detailed localization information for reference organelle datasets and incorporates green fluorescent protein (GFP) images for many proteins. To determine as objectively as possible where a particular protein is located, we have developed SUBAcon, a Bayesian approach that incorporates experimental localization and targeting prediction data to best estimate a protein's location in the cell. The probabilities of subcellular location for each protein are provided and displayed as a pictographic heat map of a plant cell in SUBA3.",SUBA3,0.984873056,subcellular location database for Arabidopsis proteins,0.961620086,SUBA3,0.984873056,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/12/2014 +27899614,http://suba.live,"SUBA4: the interactive data analysis centre for Arabidopsis subcellular protein locations. The SUBcellular location database for Arabidopsis proteins (SUBA4, http://suba.live) is a comprehensive collection of manually curated published data sets of large-scale subcellular proteomics, fluorescent protein visualization, protein-protein interaction (PPI) as well as subcellular targeting calls from 22 prediction programs. SUBA4 contains an additional 35 568 localizations totalling more than 60 000 experimental protein location claims as well as 37 new suborganellar localization categories. The experimental PPI data has been expanded to 26 327 PPI pairs including 856 PPI localizations from experimental fluorescent visualizations. The new SUBA4 user interface enables users to choose quickly from the filter categories: 'subcellular location', 'protein properties', 'protein-protein interaction' and 'affiliations' to build complex queries. This allows substantial expansion of search parameters into 80 annotation types comprising 1 150 204 new annotations to study metadata associated with subcellular localization. The 'BLAST' tab contains a sequence alignment tool to enable a sequence fragment from any species to find the closest match in Arabidopsis and retrieve data on subcellular location. Using the location consensus SUBAcon, the SUBA4 toolbox delivers three novel data services allowing interactive analysis of user data to provide relative compartmental protein abundances and proximity relationship analysis of PPI and coexpression partners from a submitted list of Arabidopsis gene identifiers.",SUBA4,0.997400373,SUBcellular location database for Arabidopsis proteins,0.988059592,SUBA4,0.997400373,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +35113396,"http://suba.live/, http://crop-pal.org","Subcellular Proteomics as a Unified Approach of Experimental Localizations and Computed Prediction Data for Arabidopsis and Crop Plants. In eukaryotic organisms, subcellular protein location is critical in defining protein function and understanding sub-functionalization of gene families. Some proteins have defined locations, whereas others have low specificity targeting and complex accumulation patterns. There is no single approach that can be considered entirely adequate for defining the in vivo location of all proteins. By combining evidence from different approaches, the strengths and weaknesses of different technologies can be estimated, and a location consensus can be built. The Subcellular Location of Proteins in Arabidopsis database ( http://suba.live/ ) combines experimental data sets that have been reported in the literature and is analyzing these data to provide useful tools for biologists to interpret their own data. Foremost among these tools is a consensus classifier (SUBAcon) that computes a proposed location for all proteins based on balancing the experimental evidence and predictions. Further tools analyze sets of proteins to define the abundance of cellular structures. Extending these types of resources to plant crop species has been complex due to polyploidy, gene family expansion and contraction, and the movement of pathways and processes within cells across the plant kingdom. The Crop Proteins of Annotated Location database ( http://crop-pal.org/ ) has developed a range of subcellular location resources including a species-specific voting consensus for 12 plant crop species that offers collated evidence and filters for current crop proteomes akin to SUBA. Comprehensive cross-species comparison of these data shows that the sub-cellular proteomes (subcellulomes) depend only to some degree on phylogenetic relationship and are more conserved in major biosynthesis than in metabolic pathways. Together SUBA and cropPAL created reference subcellulomes for plants as well as species-specific subcellulomes for cross-species data mining. These data collections are increasingly used by the research community to provide a subcellular protein location layer, inform models of compartmented cell function and protein-protein interaction network, guide future molecular crop breeding strategies, or simply answer a specific question-where is my protein of interest inside the cell?",NA,0,Subcellular Location of Proteins in Arabidopsis,0.81757238,Subcellular Location of Proteins in Arabidopsis,0.81757238,1,NA,NA,low_prob_best_name,do not remove,NA,NA,TRUE POS: two resources; name and URL of first will be correct; second is lost,NA,NA,1/1/2021 +"22096228, 24178028, 26433225, 29788229",http://subtiwiki.uni-goettingen.de,"SubtiWiki--a comprehensive community resource for the model organism Bacillus subtilis. In the post-genomic era, most components of a cell are known and they can be quantified by large-scale functional genomics approaches. However, genome annotation is the bottleneck that hampers our understanding of living cells and organisms. Up-to-date functional annotation is of special importance for model organisms that provide a frame of reference for studies with other relevant organisms. We have generated a Wiki-type database for the Gram-positive model bacterium Bacillus subtilis, SubtiWiki (http://subtiwiki.uni-goettingen.de/). This Wiki is centered around the individual genes and gene products of B. subtilis and provides information on each aspect of gene function and expression as well as protein activity and its control. SubtiWiki is accompanied by two companion databases SubtiPathways and SubtInteract that provide graphical representations of B. subtilis metabolism and its regulation and of protein-protein interactions, respectively. The diagrams of both databases are easily navigatable using the popular Google maps API, and they are extensively linked with the SubtiWiki gene pages. Moreover, each gene/gene product was assigned to one or more functional categories and transcription factor regulons. Pages for the specific categories and regulons provide a rapid overview of functionally related genes/proteins. Today, SubtiWiki can be regarded as one of the most complete inventories of knowledge on a living organism in one single resource.",SubtiWiki,0.997424006,NA,0,SubtiWiki,0.997424006,4,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +26578555,http://sugarbind.expasy.org,"SugarBindDB, a resource of glycan-mediated host-pathogen interactions. The SugarBind Database (SugarBindDB) covers knowledge of glycan binding of human pathogen lectins and adhesins. It is a curated database; each glycan-protein binding pair is associated with at least one published reference. The core data element of SugarBindDB is a set of three inseparable components: the pathogenic agent, a lectin/adhesin and a glycan ligand. Each entity (agent, lectin or ligand) is described by a range of properties that are summarized in an entity-dedicated page. Several search, navigation and visualisation tools are implemented to investigate the functional role of glycans in pathogen binding. The database is cross-linked to protein and glycan-relaled resources such as UniProtKB and UniCarbKB. It is tightly bound to the latter via a substructure search tool that maps each ligand to full structures where it occurs. Thus, a glycan-lectin binding pair of SugarBindDB can lead to the identification of a glycan-mediated protein-protein interaction, that is, a lectin-glycoprotein interaction, via substructure search and the knowledge of site-specific glycosylation stored in UniCarbKB. SugarBindDB is accessible at: http://sugarbind.expasy.org.",SugarBindDB,0.997786748,NA,0,SugarBindDB,0.997786748,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +27749924,http://abims.sb-roscoff.fr/sulfatlas,"Matching the Diversity of Sulfated Biomolecules: Creation of a Classification Database for Sulfatases Reflecting Their Substrate Specificity. Sulfatases cleave sulfate groups from various molecules and constitute a biologically and industrially important group of enzymes. However, the number of sulfatases whose substrate has been characterized is limited in comparison to the huge diversity of sulfated compounds, yielding functional annotations of sulfatases particularly prone to flaws and misinterpretations. In the context of the explosion of genomic data, a classification system allowing a better prediction of substrate specificity and for setting the limit of functional annotations is urgently needed for sulfatases. Here, after an overview on the diversity of sulfated compounds and on the known sulfatases, we propose a classification database, SulfAtlas (http://abims.sb-roscoff.fr/sulfatlas/), based on sequence homology and composed of four families of sulfatases. The formylglycine-dependent sulfatases, which constitute the largest family, are also divided by phylogenetic approach into 73 subfamilies, each subfamily corresponding to either a known specificity or to an uncharacterized substrate. SulfAtlas summarizes information about the different families of sulfatases. Within a family a web page displays the list of its subfamilies (when they exist) and the list of EC numbers. The family or subfamily page shows some descriptors and a table with all the UniProt accession numbers linked to the databases UniProt, ExplorEnz, and PDB.",SulfAtlas,0.992132127,NA,0,SulfAtlas,0.992132127,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/17/2016 +25300487,http://bioinformatics.charite.de/supernatural,"Super Natural II--a database of natural products. Natural products play a significant role in drug discovery and development. Many topological pharmacophore patterns are common between natural products and commercial drugs. A better understanding of the specific physicochemical and structural features of natural products is important for corresponding drug development. Several encyclopedias of natural compounds have been composed, but the information remains scattered or not freely available. The first version of the Supernatural database containing √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº 50,000 compounds was published in 2006 to face these challenges. Here we present a new, updated and expanded version of natural product database, Super Natural II (http://bioinformatics.charite.de/supernatural), comprising √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº 326,000 molecules. It provides all corresponding 2D structures, the most important structural and physicochemical properties, the predicted toxicity class for √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº 170,000 compounds and the vendor information for the vast majority of compounds. The new version allows a template-based search for similar compounds as well as a search for compound names, vendors, specific physical properties or any substructures. Super Natural II also provides information about the pathways associated with synthesis and degradation of the natural products, as well as their mechanism of action with respect to structurally similar drugs and their target proteins.",Super Natural II,0.813094119,NA,0,Super Natural II,0.813094119,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/9/2014 +26578594,http://sea.edbc.org,"SEA: a super-enhancer archive. Super-enhancers are large clusters of transcriptional enhancers regarded as having essential roles in driving the expression of genes that control cell identity during development and tumorigenesis. The construction of a genome-wide super-enhancer database is urgently needed to better understand super-enhancer-directed gene expression regulation for a given biology process. Here, we present a specifically designed web-accessible database, Super-Enhancer Archive (SEA, http://sea.edbc.org). SEA focuses on integrating super-enhancers in multiple species and annotating their potential roles in the regulation of cell identity gene expression. The current release of SEA incorporates 83 996 super-enhancers computationally or experimentally identified in 134 cell types/tissues/diseases, including human (75 439, three of which were experimentally identified), mouse (5879, five of which were experimentally identified), Drosophila melanogaster (1774) and Caenorhabditis elegans (904). To facilitate data extraction, SEA supports multiple search options, including species, genome location, gene name, cell type/tissue and super-enhancer name. The response provides detailed (epi)genetic information, incorporating cell type specificity, nearby genes, transcriptional factor binding sites, CRISPR/Cas9 target sites, evolutionary conservation, SNPs, H3K27ac, DNA methylation, gene expression and TF ChIP-seq data. Moreover, analytical tools and a genome browser were developed for users to explore super-enhancers and their roles in defining cell identity and disease processes in depth.",SEA,0.812459141,Super-Enhancer Archive,0.932689333,Super-Enhancer Archive,0.932689333,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/17/2015 +30094004,http://www.krill.le.ac.uk,"The Euphausia superba transcriptome database, SuperbaSE: An online, open resource for researchers. Antarctic krill (Euphausia superba) is a crucial component of the Southern Ocean ecosystem, acting as the major link between primary production and higher trophic levels with an annual predator demand of up to 470√ɬÉ√ǬÇ√ɬÇ√Ǭ†million tonnes. It also acts as an ecosystem engineer, affecting carbon sequestration and recycling iron and nitrogen, and has increasing importance as a commercial product in the aquaculture and health industries. Here we describe the creation of a de novo assembled head transcriptome for E. superba. As an example of its potential as a molecular resource, we relate its exploitation in identifying and characterizing numerous genes related to the circadian clock in E. superba, including the major components of the central feedback loop. We have made the transcriptome openly accessible for a wider audience of ecologists, molecular biologists, evolutionary geneticists, and others in a user-friendly format at SuperbaSE, hosted at http://www.krill.le.ac.uk.",SuperbaSE,0.992503345,NA,0,SuperbaSE,0.992503345,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/28/2017 +29140469,http://cheminfo.charite.de/superdrug2,"SuperDRUG2: a one stop resource for approved/marketed drugs. Regular monitoring of drug regulatory agency web sites and similar resources for information on new drug approvals and changes to legal status of marketed drugs is impractical. It requires navigation through several resources to find complete information about a drug as none of the publicly accessible drug databases provide all features essential to complement in silico drug discovery. Here, we propose SuperDRUG2 (http://cheminfo.charite.de/superdrug2) as a comprehensive knowledge-base of approved and marketed drugs. We provide the largest collection of drugs (containing 4587 active pharmaceutical ingredients) which include small molecules, biological products and other drugs. The database is intended to serve as a one-stop resource providing data on: chemical structures, regulatory details, indications, drug targets, side-effects, physicochemical properties, pharmacokinetics and drug-drug interactions. We provide a 3D-superposition feature that facilitates estimation of the fit of a drug in the active site of a target with a known ligand bound to it. Apart from multiple other search options, we introduced pharmacokinetics simulation as a unique feature that allows users to visualise the 'plasma concentration versus time' profile for a given dose of drug with few other adjustable parameters to simulate the kinetics in a healthy individual and poor or extensive metabolisers.",SuperDRUG2,0.978730261,NA,0,SuperDRUG2,0.978730261,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +"25414345, 30445555",http://supfam.org,"The SUPERFAMILY 1.75 database in 2014: a doubling of data. We present updates to the SUPERFAMILY 1.75 (http://supfam.org) online resource and protein sequence collection. The hidden Markov model library that provides sequence homology to SCOP structural domains remains unchanged at version 1.75. In the last 4 years SUPERFAMILY has more than doubled its holding of curated complete proteomes over all cellular life, from 1400 proteomes reported previously in 2010 up to 3258 at present. Outside of the main sequence collection, SUPERFAMILY continues to provide domain annotation for sequences provided by other resources such as: UniProt, Ensembl, PDB, much of JGI Phytozome and selected subcollections of NCBI RefSeq. Despite this growth in data volume, SUPERFAMILY now provides users with an expanded and daily updated phylogenetic tree of life (sTOL). This tree is built with genomic-scale domain annotation data as before, but constantly updated when new species are introduced to the sequence library. Our Gene Ontology and other functional and phenotypic annotations previously reported have stood up to critical assessment by the function prediction community. We have now introduced these data in an integrated manner online at the level of an individual sequence, and--in the case of whole genomes--with enrichment analysis against a taxonomically defined background.",SUPERFAMILY,0.993269384,NA,0,SUPERFAMILY,0.993269384,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +25404137,http://superfly.crg.eu,"SuperFly: a comparative database for quantified spatio-temporal gene expression patterns in early dipteran embryos. We present SuperFly (http://superfly.crg.eu), a relational database for quantified spatio-temporal expression data of segmentation genes during early development in different species of dipteran insects (flies, midges and mosquitoes). SuperFly has a special focus on emerging non-drosophilid model systems. The database currently includes data of high spatio-temporal resolution for three species: the vinegar fly Drosophila melanogaster, the scuttle fly Megaselia abdita and the moth midge Clogmia albipunctata. At this point, SuperFly covers up to 9 genes and 16 time points per species, with a total of 1823 individual embryos. It provides an intuitive web interface, enabling the user to query and access original embryo images, quantified expression profiles, extracted positions of expression boundaries and integrated datasets, plus metadata and intermediate processing steps. SuperFly is a valuable new resource for the quantitative comparative study of gene expression patterns across dipteran species. Moreover, it provides an interesting test set for systems biologists interested in fitting mathematical gene network models to data. Both of these aspects are essential ingredients for progress toward a more quantitative and mechanistic understanding of developmental evolution.",SuperFly,0.996205926,NA,0,SuperFly,0.996205926,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2014 +24271391,http://bioinformatics.charite.de/superpain,"SuperPain--a resource on pain-relieving compounds targeting ion channels. Pain is more than an unpleasant sensory experience associated with actual or potential tissue damage: it is the most common reason for physician consultation and often dramatically affects quality of life. The management of pain is often difficult and new targets are required for more effective and specific treatment. SuperPain (http://bioinformatics.charite.de/superpain/) is freely available database for pain-stimulating and pain-relieving compounds, which bind or potentially bind to ion channels that are involved in the transmission of pain signals to the central nervous system, such as TRPV1, TRPM8, TRPA1, TREK1, TRESK, hERG, ASIC, P2X and voltage-gated sodium channels. The database consists of √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº8700 ligands, which are characterized by experimentally measured binding affinities. Additionally, 100 000 putative ligands are included. Moreover, the database provides 3D structures of receptors and predicted ligand-binding poses. These binding poses and a structural classification scheme provide hints for the design of new analgesic compounds. A user-friendly graphical interface allows similarity searching, visualization of ligands docked into the receptor, etc.",SuperPain,0.974839449,NA,0,SuperPain,0.974839449,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/22/2013 +34656056,http://tcm.charite.de/supertcm,"SuperTCM: A biocultural database combining biological pathways and historical linguistic data of Chinese Materia Medica for drug development. Aim of the study Botanicals used in Traditional Chinese Medicine (TCM) are a rich source for drug discovery and provide models for multi-component drug development. To facilitate the studies of the actions of TCM drugs and expand their applications, a comprehensive database is urgently required. Methods One online resource connects all the relevant data from multiple scientific sources and languages. Drug information from published TCM databases and the official Chinese Pharmacopoeia as well as specialized meta-websites such as Kew's Medicinal Plant Names Service was integrated on a higher level. Results Our database, SuperTCM, covers the aspects of TCM derived from medicinal plants, encompassing pharmacological recipes up to chemical compounds. It provides the information for 6516 TCM drugs (or ""herbs"") with 5372 botanical species, 55,772 active ingredients against 543 targets in 254 KEGG pathways associated with 8634 diseases. SuperTCM is freely available at http://tcm.charite.de/supertcm.",SuperTCM,0.990243733,NA,0,SuperTCM,0.990243733,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/15/2021 +26582922,http://www.surechembl.org,"SureChEMBL: a large-scale, chemically annotated patent document database. SureChEMBL is a publicly available large-scale resource containing compounds extracted from the full text, images and attachments of patent documents. The data are extracted from the patent literature according to an automated text and image-mining pipeline on a daily basis. SureChEMBL provides access to a previously unavailable, open and timely set of annotated compound-patent associations, complemented with sophisticated combined structure and keyword-based search capabilities against the compound repository and patent document corpus; given the wealth of knowledge hidden in patent documents, analysis of SureChEMBL data has immediate applications in drug discovery, medicinal chemistry and other commercial areas of chemical science. Currently, the database contains 17 million compounds extracted from 14 million patent documents. Access is available through a dedicated web-based interface and data downloads at: https://www.surechembl.org/.",SureChEMBL,0.998425841,NA,0,SureChEMBL,0.998425841,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +25894527,http://wlab.ethz.ch/cspa,"A mass spectrometric-derived cell surface protein atlas. Cell surface proteins are major targets of biomedical research due to their utility as cellular markers and their extracellular accessibility for pharmacological intervention. However, information about the cell surface protein repertoire (the surfaceome) of individual cells is only sparsely available. Here, we applied the Cell Surface Capture (CSC) technology to 41 human and 31 mouse cell types to generate a mass-spectrometry derived Cell Surface Protein Atlas (CSPA) providing cellular surfaceome snapshots at high resolution. The CSPA is presented in form of an easy-to-navigate interactive database, a downloadable data matrix and with tools for targeted surfaceome rediscovery (http://wlab.ethz.ch/cspa). The cellular surfaceome snapshots of different cell types, including cancer cells, resulted in a combined dataset of 1492 human and 1296 mouse cell surface glycoproteins, providing experimental evidence for their cell surface expression on different cell types, including 136 G-protein coupled receptors and 75 membrane receptor tyrosine-protein kinases. Integrated analysis of the CSPA reveals that the concerted biological function of individual cell types is mainly guided by quantitative rather than qualitative surfaceome differences. The CSPA will be useful for the evaluation of drug targets, for the improved classification of cell types and for a better understanding of the surfaceome and its concerted biological functions in complex signaling microenvironments.",CSPA,0.637170613,Surface Protein Atlas,0.705307941,Surface Protein Atlas,0.705307941,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,4/20/2015 +26249811,http://www.ebi.ac.uk/thornton-srv/databases/SurvCurv,"SurvCurv database and online survival analysis platform update. Unlabelled Understanding the biology of ageing is an important and complex challenge. Survival experiments are one of the primary approaches for measuring changes in ageing. Here, we present a major update to SurvCurv, a database and online resource for survival data in animals. As well as a substantial increase in data and additions to existing graphical and statistical survival analysis features, SurvCurv now includes extended mathematical mortality modelling functions and survival density plots for more advanced representation of groups of survival cohorts. Availability and implementation The database is freely available at https://www.ebi.ac.uk/thornton-srv/databases/SurvCurv/. All data are published under the Creative Commons Attribution License. Contact matthias.ziehm@ebi.ac.uk. Supplementary information Supplementary data are available at Bioinformatics online.",SurvCurv,0.998165548,NA,0,SurvCurv,0.998165548,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/6/2015 +24066126,http://bioinformatica.mty.itesm.mx/SurvExpress,"SurvExpress: an online biomarker validation tool and database for cancer gene expression data using survival analysis. Validation of multi-gene biomarkers for clinical outcomes is one of the most important issues for cancer prognosis. An important source of information for virtual validation is the high number of available cancer datasets. Nevertheless, assessing the prognostic performance of a gene expression signature along datasets is a difficult task for Biologists and Physicians and also time-consuming for Statisticians and Bioinformaticians. Therefore, to facilitate performance comparisons and validations of survival biomarkers for cancer outcomes, we developed SurvExpress, a cancer-wide gene expression database with clinical outcomes and a web-based tool that provides survival analysis and risk assessment of cancer datasets. The main input of SurvExpress is only the biomarker gene list. We generated a cancer database collecting more than 20,000 samples and 130 datasets with censored clinical information covering tumors over 20 tissues. We implemented a web interface to perform biomarker validation and comparisons in this database, where a multivariate survival analysis can be accomplished in about one minute. We show the utility and simplicity of SurvExpress in two biomarker applications for breast and lung cancer. Compared to other tools, SurvExpress is the largest, most versatile, and quickest free tool available. SurvExpress web can be accessed in http://bioinformatica.mty.itesm.mx/SurvExpress (a tutorial is included). The website was implemented in JSP, JavaScript, MySQL, and R.",SurvExpress,0.972360611,NA,0,SurvExpress,0.972360611,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/16/2013 +32813752,http://svad.mbc.nctu.edu.tw,"SVAD: A genetic database curates non-ischemic sudden cardiac death-associated variants. Sudden cardiac death (SCD) is an important cause of mortality worldwide. It accounts for approximately half of all deaths from cardiovascular disease. While coronary artery disease and acute myocardial infarction account for the majority of SCD in the elderly population, inherited cardiac diseases (inherited CDs) comprise a substantial proportion of younger SCD victims with a significant genetic component. Currently, the use of next-generation sequencing enables the rapid analysis to investigate relationships between genetic variants and inherited CDs causing SCD. Genetic contribution to risk has been considered an alternate predictor of SCD. In the past years, large numbers of SCD susceptibility variants were reported, but these results are scattered in numerous publications. Here, we present the SCD-associated Variants Annotation Database (SVAD) to facilitate the interpretation of variants and to meet the needs of data integration. SVAD contains data from a broad screening of scientific literature. It was constructed to provide a comprehensive collection of genetic variants along with integrated information regarding their effects. At present, SVAD has accumulated 2,292 entries within 1,239 variants by manually surveying pertinent literature, and approximately one-third of the collected variants are pathogenic/likely-pathogenic following the ACMG guidelines. To the best of our knowledge, SVAD is the most comprehensive database that can provide integrated information on the associated variants in various types of inherited CDs. SVAD represents a valuable source of variant information based on scientific literature and benefits clinicians and researchers, and it is now available on http://svad.mbc.nctu.edu.tw/.",SVAD,0.986849755,SCD-associated Variants Annotation Database,0.878880084,SVAD,0.986849755,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/19/2020 +24223973,http://simtk.org/home/sweetlead,"SWEETLEAD: an in silico database of approved drugs, regulated chemicals, and herbal isolates for computer-aided drug discovery. In the face of drastically rising drug discovery costs, strategies promising to reduce development timelines and expenditures are being pursued. Computer-aided virtual screening and repurposing approved drugs are two such strategies that have shown recent success. Herein, we report the creation of a highly-curated in silico database of chemical structures representing approved drugs, chemical isolates from traditional medicinal herbs, and regulated chemicals, termed the SWEETLEAD database. The motivation for SWEETLEAD stems from the observance of conflicting information in publicly available chemical databases and the lack of a highly curated database of chemical structures for the globally approved drugs. A consensus building scheme surveying information from several publicly accessible databases was employed to identify the correct structure for each chemical. Resulting structures are filtered for the active pharmaceutical ingredient, standardized, and differing formulations of the same drug were combined in the final database. The publically available release of SWEETLEAD (https://simtk.org/home/sweetlead) provides an important tool to enable the successful completion of computer-aided repurposing and drug discovery campaigns.",SWEETLEAD,0.981075227,NA,0,SWEETLEAD,0.981075227,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2013 +28832569,http://swefreq.nbis.se,"SweGen: a whole-genome data resource of genetic variability in a cross-section of the Swedish population. Here we describe the SweGen data set, a comprehensive map of genetic variation in the Swedish population. These data represent a basic resource for clinical genetics laboratories as well as for sequencing-based association studies by providing information on genetic variant frequencies in a cohort that is well matched to national patient cohorts. To select samples for this study, we first examined the genetic structure of the Swedish population using high-density SNP-array data from a nation-wide cohort of over 10√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ000 Swedish-born individuals included in the Swedish Twin Registry. A total of 1000 individuals, reflecting a cross-section of the population and capturing the main genetic structure, were selected for whole-genome sequencing. Analysis pipelines were developed for automated alignment, variant calling and quality control of the sequencing data. This resulted in a genome-wide collection of aggregated variant frequencies in the Swedish population that we have made available to the scientific community through the website https://swefreq.nbis.se. A total of 29.2 million single-nucleotide variants and 3.8 million indels were detected in the 1000 samples, with 9.9 million of these variants not present in current databases. Each sample contributed with an average of 7199 individual-specific variants. In addition, an average of 8645 larger structural variants (SVs) were detected per individual, and we demonstrate that the population frequencies of these SVs can be used for efficient filtering analyses. Finally, our results show that the genetic diversity within Sweden is substantial compared with the diversity among continental European populations, underscoring the relevance of establishing a local reference data set.",SweGen,0.992595315,NA,0,SweGen,0.992595315,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/23/2017 +28961249,http://scbt.sastra.edu/swisnfdb/index.php,"SWI/SNF Infobase-An exclusive information portal for SWI/SNF remodeling complex subunits. Chromatin remodeling complexes facilitate the access of condensed genomic DNA during transcription, replication, and repair, by altering the histone-DNA contacts in the nucleosome structures. SWI/SNF (SWItch/Sucrose Non-Fermentable) family of ATP dependent chromatin remodeling complexes have been documented for their tumour suppressor function. Recent studies have reported the high frequency of cancer causing mutations in this protein family. There exist multiple subunits for this complex and can form context-dependent sub-complexes. The cataloguing of individual subunits of this complex is essential for understanding their specific functions and their mechanism of action during chromatin remodeling. This would also facilitate further studies to characterize cancer causing mutations in SWI/SNF subunits. In the current study, a database containing information on the subunits of SWI/SNF-√ɬÉ√Ǭé√ɬÇ√Ǭ± (BRG1/BRM-Associated Factors (BAF)) and SWI/SNF-√ɬÉ√Ǭé√ɬÇ√Ǭ≤ (Polybromo-Associated BAF (PBAF)) sub classes of SWI/SNF family has been curated and catalogued. The database hosts information on 27 distinct SWI/SNF subunits from 20 organisms spanning a wide evolutionary range of eukaryotes. A non-redundant set of 522 genes coding for SWI/SNF subunits have been documented in the database. A detailed annotation on each subunit, including basic protein/gene information, protein sequence, functional domains, homologs and missense mutations of human proteins have been provided with a user-friendly graphical interface. The SWI/SNF Infobase presented here, would be a first of its kind exclusive information portal on SWI/SNF complex subunits and would be a valuable resource for the research community working on chromatin remodeling. The database is available at http://scbt.sastra.edu/swisnfdb/index.php.",SWI/SNF Infobase,0.784500964,NA,0,SWI/SNF Infobase,0.784500964,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/29/2017 +23161688,http://www.swissbioisostere.ch,"SwissBioisostere: a database of molecular replacements for ligand design. The SwissBioisostere database (http://www.swissbioisostere.ch) contains information on molecular replacements and their performance in biochemical assays. It is meant to provide researchers in drug discovery projects with ideas for bioisosteric modifications of their current lead molecule, as well as to give interested scientists access to the details on particular molecular replacements. As of August 2012, the database contains 21,293,355 datapoints corresponding to 5,586,462 unique replacements that have been measured in 35,039 assays against 1948 molecular targets representing 30 target classes. The accessible data were created through detection of matched molecular pairs and mining bioactivity data in the ChEMBL database. The SwissBioisostere database is hosted by the Swiss Institute of Bioinformatics and available via a web-based interface.",SwissBioisostere,0.985951066,NA,0,SwissBioisostere,0.985951066,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2012 +25943471,http://www.swisslipids.org,"The SwissLipids knowledgebase for lipid biology. Motivation Lipids are a large and diverse group of biological molecules with roles in membrane formation, energy storage and signaling. Cellular lipidomes may contain tens of thousands of structures, a staggering degree of complexity whose significance is not yet fully understood. High-throughput mass spectrometry-based platforms provide a means to study this complexity, but the interpretation of lipidomic data and its integration with prior knowledge of lipid biology suffers from a lack of appropriate tools to manage the data and extract knowledge from it. Results To facilitate the description and exploration of lipidomic data and its integration with prior biological knowledge, we have developed a knowledge resource for lipids and their biology-SwissLipids. SwissLipids provides curated knowledge of lipid structures and metabolism which is used to generate an in silico library of feasible lipid structures. These are arranged in a hierarchical classification that links mass spectrometry analytical outputs to all possible lipid structures, metabolic reactions and enzymes. SwissLipids provides a reference namespace for lipidomic data publication, data exploration and hypothesis generation. The current version of SwissLipids includes over 244√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ000 known and theoretically possible lipid structures, over 800 proteins, and curated links to published knowledge from over 620 peer-reviewed publications. We are continually updating the SwissLipids hierarchy with new lipid categories and new expert curated knowledge. Availability SwissLipids is freely available at http://www.swisslipids.org/. Contact alan.bridge@isb-sib.ch Supplementary information Supplementary data are available at Bioinformatics online.",SwissLipids,0.992622912,NA,0,SwissLipids,0.992622912,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/5/2015 +26339475,http://swisspalm.epfl.ch,"SwissPalm: Protein Palmitoylation database. Protein S-palmitoylation is a reversible post-translational modification that regulates many key biological processes, although the full extent and functions of protein S-palmitoylation remain largely unexplored. Recent developments of new chemical methods have allowed the establishment of palmitoyl-proteomes of a variety of cell lines and tissues from different species.√ɬÉ√ǬÇ√ɬÇ√Ǭ† As the amount of information generated by these high-throughput studies is increasing, the field requires centralization and comparison of this information. Here we present SwissPalm ( http://swisspalm.epfl.ch), our open, comprehensive, manually curated resource to study protein S-palmitoylation. It currently encompasses more than 5000 S-palmitoylated protein hits from seven species, and contains more than 500 specific sites of S-palmitoylation. SwissPalm also provides curated information and filters that increase the confidence in true positive hits, and integrates predictions of S-palmitoylated cysteine scores, orthologs and isoform multiple alignments. Systems analysis of the palmitoyl-proteome screens indicate that 10% or more of the human proteome is susceptible to S-palmitoylation. Moreover, ontology and pathway analyses of the human palmitoyl-proteome reveal that key biological functions involve this reversible lipid modification. Comparative analysis finally shows a strong crosstalk between S-palmitoylation and other post-translational modifications. Through the compilation of data and continuous updates, SwissPalm will provide a powerful tool to unravel the global importance of protein S-palmitoylation.",SwissPalm,0.997897446,Protein Palmitoylation database,0.7939366,SwissPalm,0.997897446,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/16/2015 +23180783,http://swissregulon.unibas.ch,"SwissRegulon, a database of genome-wide annotations of regulatory sites: recent updates. Identification of genomic regulatory elements is essential for understanding the dynamics of cellular processes. This task has been substantially facilitated by the availability of genome sequences for many species and high-throughput data of transcripts and transcription factor (TF) binding. However, rigorous computational methods are necessary to derive accurate genome-wide annotations of regulatory sites from such data. SwissRegulon (http://swissregulon.unibas.ch) is a database containing genome-wide annotations of regulatory motifs, promoters and TF binding sites (TFBSs) in promoter regions across model organisms. Its binding site predictions were obtained with rigorous Bayesian probabilistic methods that operate on orthologous regions from related genomes, and use explicit evolutionary models to assess the evidence of purifying selection on each site. New in the current version of SwissRegulon is a curated collection of 190 mammalian regulatory motifs associated with √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº340 TFs, and TFBS annotations across a curated set of √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº35 000 promoters in both human and mouse. Predictions of TFBSs for Saccharomyces cerevisiae have also been significantly extended and now cover 158 of yeast's √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº180 TFs. All data are accessible through both an easily navigable genome browser with search functions, and as flat files that can be downloaded for further analysis.",SwissRegulon,0.99072659,NA,0,SwissRegulon,0.99072659,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/24/2012 +23104376,http://www.swisssidechain.ch,"SwissSidechain: a molecular and structural database of non-natural sidechains. Amino acids form the building blocks of all proteins. Naturally occurring amino acids are restricted to a few tens of sidechains, even when considering post-translational modifications and rare amino acids such as selenocysteine and pyrrolysine. However, the potential chemical diversity of amino acid sidechains is nearly infinite. Exploiting this diversity by using non-natural sidechains to expand the building blocks of proteins and peptides has recently found widespread applications in biochemistry, protein engineering and drug design. Despite these applications, there is currently no unified online bioinformatics resource for non-natural sidechains. With the SwissSidechain database (http://www.swisssidechain.ch), we offer a central and curated platform about non-natural sidechains for researchers in biochemistry, medicinal chemistry, protein engineering and molecular modeling. SwissSidechain provides biophysical, structural and molecular data for hundreds of commercially available non-natural amino acid sidechains, both in l- and d-configurations. The database can be easily browsed by sidechain names, families or physico-chemical properties. We also provide plugins to seamlessly insert non-natural sidechains into peptides and proteins using molecular visualization software, as well as topologies and parameters compatible with molecular mechanics software.",SwissSidechain,0.996189356,NA,0,SwissSidechain,0.996189356,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/26/2012 +33459764,http://switches.ncbs.res.in,"SWITCHES: Searchable web interface for topologies of CHEmical switches. . Bistable biochemical switches are key motifs in cellular state decisions and long-term storage of cellular 'memory'. There are a few known biological switches that have been well characterized, however these examples are insufficient for systematic surveys of properties of these important systems. Here we present a resource of all possible bistable biochemical reaction networks with up to 6 reactions between 3 molecules, and 3 reactions between 4 molecules. Over 35,000 reaction topologies were constructed by identifying unique combinations of reactions between a fixed number of molecules. Then, these topologies were populated with rates within a biologically realistic range. The Searchable Web Interface for Topologies of CHEmical Switches (SWITCHES, https://switches.ncbs.res.in) provides a bistability and parameter analysis of over 7 million models from this systematic survey of chemical reaction space. This database will be useful for theoreticians interested in analyzing stability in chemical systems and also experimentalists for creating robust synthetic biological switches. Freely available on the web at https://switches.ncbs.res.in. Website implemented in PHP, MariaDB, Graphviz, and Apache, with all major browsers supported.",SWITCHES,0.99647893,for topologies of,0.699981242,SWITCHES,0.99647893,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/18/2021 +23550212,http://switches.elm.eu.org,"The switches.ELM resource: a compendium of conditional regulatory interaction interfaces. Short linear motifs (SLiMs) are protein interaction sites that play an important role in cell regulation by controlling protein activity, localization, and local abundance. The functionality of a SLiM can be modulated in a context-dependent manner to induce a gain, loss, or exchange of binding partners, which will affect the function of the SLiM-containing protein. As such, these conditional interactions underlie molecular decision-making in cell signaling. We identified multiple types of pre- and posttranslational switch mechanisms that can regulate the function of a SLiM and thereby control its interactions. The collected examples of experimentally characterized SLiM-based switch mechanisms were curated in the freely accessible switches.ELM resource (http://switches.elm.eu.org). On the basis of these examples, we defined and integrated rules to analyze SLiMs for putative regulatory switch mechanisms. We applied these rules to known validated SLiMs, providing evidence that more than half of these are likely to be pre- or posttranslationally regulated. In addition, we showed that posttranslationally modified sites are enriched around SLiMs, which enables cooperative and integrative regulation of protein interaction interfaces. We foresee switches.ELM complementing available resources to extend our knowledge of the molecular mechanisms underlying cell signaling.",switches.ELM,0.973136947,NA,0,switches.ELM,0.973136947,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/2/2013 +21423723,http://www.swmd.co.in,"Seaweed metabolite database (SWMD): A database of natural compounds from marine algae. Unlabelled The cataloguing of marine chemicals is a fundamental aspect for bioprospecting. This has applications in the development of drugs from marine sources. A publicly accessible database that provides comprehensive information about these compounds is therefore helpful. The Seaweed Metabolite Database (SWMD) is designed to provide information about the known compounds and their biological activity described in the literature. Geographical origin of the seaweed, extraction method and the chemical descriptors of each the compounds are recorded to enable effective chemo-informatics analysis. Crosslinks to other databases are also introduced to facilitate the access of information about 3D Structure by X-ray and NMR activity, drug properties and related literature for each compound. This database currently contains entries for 517 compounds encompassing 25 descriptive fields mostly from the Red algae of the genus Laurencia (Ceramiales, Rhodomelaceae). The customized search engine of this database will enable wildcard querying, which includes Accession Number, Compound type, Seaweed Binomial name, IUPAC name, SMILES notation or InChI. Availability The database is available for free at http://www.swmd.co.in.",SWMD,0.968675633,Seaweed metabolite database,0.964039514,SWMD,0.968675633,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/22/2011 +23497177,http://www.SymbioGBR.org,"SymbioGBR: a web-based database of Symbiodinium associated with cnidarian hosts on the Great Barrier Reef. Background The algal endosymbionts (genus Symbiodinium) associated with scleractinian corals (and other reef invertebrates) have received a lot of research attention in the past decade, particularly as certain host-symbiont associations appear more affected by increasing seawater temperatures than others. With the rapid accumulation of information on the diversity of Symbiodinium, it is becoming increasingly difficult to compare newly acquired Symbiodinium data with existing data to detect patterns of host-symbiont specificity on broader spatial scales. The lack of a general consensus on the classification of Symbiodinium species coupled with the variety of different markers used to identify the genus Symbiodinium (ITS1, ITS2, LSU D1/D2, chloroplast 23S rDNA and psbA minicircle) further complicate direct comparison. Description The SymbioGBR database compiles all currently available Symbiodinium sequences and associated host information of data collected from the Great Barrier Reef into a single relational database that is accessible via a user-friendly, searchable web-based application (http://www.SymbioGBR.org). SymbioGBR allows users to query Symbiodinium types or sequences sourced from various genetic markers (e.g. ITS1, ITS2, LSU D1/D2 and chloroplast 23S) and invertebrate host species to explore their reported associations. In addition, as the database includes sequence information of multiple genetic markers, it allows cross-referencing between conventional (e.g. ITS2 region) and novel markers that exhibit low intragenomic variability (e.g. psbA region). Finally, the database is based on the collection details of individual specimens. Such host-symbiont associations can be assessed quantitatively and viewed in relation to their environmental and geographic context. Conclusions The SymbioGBR database provides a comprehensive overview of Symbiodinium diversity and host-associations on the Great Barrier Reef. It provides a quick, user-friendly means to compare newly acquired data on Symbiodinium (e.g. raw sequences or characterized Symbiodinium types) with previous data on the diversity of invertebrate host-symbiont associations on the GBR. The inclusion of psbAncr sequence information allows for validation of widely used ITS1/ITS2 markers and their ability to accurately identify relevant sequences. Most importantly, centralization of sequence information from multiple genetic markers will aid the classification of Symbiodinium species diversity and allow researchers to easily compare patterns of host-Symbiodinium associations.",SymbioGBR,0.994127393,NA,0,SymbioGBR,0.994127393,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/13/2013 +26607947,http://symbiogenomesdb.uv.es,"SymbioGenomesDB: a database for the integration and access to knowledge on host-symbiont relationships. . Symbiotic relationships occur naturally throughout the tree of life, either in a commensal, mutualistic or pathogenic manner. The genomes of multiple organisms involved in symbiosis are rapidly being sequenced and becoming available, especially those from the microbial world. Currently, there are numerous databases that offer information on specific organisms or models, but none offer a global understanding on relationships between organisms, their interactions and capabilities within their niche, as well as their role as part of a system, in this case, their role in symbiosis. We have developed the SymbioGenomesDB as a community database resource for laboratories which intend to investigate and use information on the genetics and the genomics of organisms involved in these relationships. The ultimate goal of SymbioGenomesDB is to host and support the growing and vast symbiotic-host relationship information, to uncover the genetic basis of such associations. SymbioGenomesDB maintains a comprehensive organization of information on genomes of symbionts from diverse hosts throughout the Tree of Life, including their sequences, their metadata and their genomic features. This catalog of relationships was generated using computational tools, custom R scripts and manual integration of data available in public literature. As a highly curated and comprehensive systems database, SymbioGenomesDB provides web access to all the information of symbiotic organisms, their features and links to the central database NCBI. Three different tools can be found within the database to explore symbiosis-related organisms, their genes and their genomes. Also, we offer an orthology search for one or multiple genes in one or multiple organisms within symbiotic relationships, and every table, graph and output file is downloadable and easy to parse for further analysis. The robust SymbioGenomesDB will be constantly updated to cope with all the data being generated and included in major databases, in order to serve as an important, useful and timesaving tool. Database URL: http://symbiogenomesdb.uv.es.",SymbioGenomesDB,0.992349923,NA,0,SymbioGenomesDB,0.992349923,1,32055857,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/25/2015 +32055857,http://symbiogenomesdb.uv.es,"An update on the Symbiotic Genomes Database (SymGenDB): a collection of metadata, genomic, genetic and protein sequences, orthologs and metabolic networks of symbiotic organisms. . The Symbiotic Genomes Database (SymGenDB; http://symbiogenomesdb.uv.es/) is a public resource of manually curated associations between organisms involved in symbiotic relationships, maintaining a catalog of completely sequenced/finished bacterial genomes exclusively. It originally consisted of three modules where users could search for the bacteria involved in a specific symbiotic relationship, their genomes and their genes (including their orthologs). In this update, we present an additional module that includes a representation of the metabolic network of each organism included in the database, as Directed Acyclic Graphs (MetaDAGs). This module provides unique opportunities to explore the metabolism of each individual organism and/or to evaluate the shared and joint metabolic capabilities of the organisms of the same genera included in our listing, allowing users to construct predictive analyses of metabolic associations and complementation between systems. We also report a ~25% increase in manually curated content in the database, i.e. bacterial genomes and their associations, with a final count of 2328 bacterial genomes associated to 498 hosts. We describe new querying possibilities for all the modules, as well as new display features for the MetaDAGs module, providing a relevant range of content and utility. This update continues to improve SymGenDB and can help elucidate the mechanisms by which organisms depend on each other.",SymGenDB,0.998103142,Symbiotic Genomes Database,0.95245223,SymGenDB,0.998103142,1,26607947,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2020 +30380087,"http://www.symmap.org/, http://www.bioinfo.org/symmap","SymMap: an integrative database of traditional Chinese medicine enhanced by symptom mapping. Recently, the pharmaceutical industry has heavily emphasized phenotypic drug discovery (PDD), which relies primarily on knowledge about phenotype changes associated with diseases. Traditional Chinese medicine (TCM) provides a massive amount of information on natural products and the clinical symptoms they are used to treat, which are the observable disease phenotypes that are crucial for clinical diagnosis and treatment. Curating knowledge of TCM symptoms and their relationships to herbs and diseases will provide both candidate leads and screening directions for evidence-based PDD programs. Therefore, we present SymMap, an integrative database of traditional Chinese medicine enhanced by symptom mapping. We manually curated 1717 TCM symptoms and related them to 499 herbs and 961 symptoms used in modern medicine based on a committee of 17 leading experts practicing TCM. Next, we collected 5235 diseases associated with these symptoms, 19 595 herbal constituents (ingredients) and 4302 target genes, and built a large heterogeneous network containing all of these components. Thus, SymMap integrates TCM with modern medicine in common aspects at both the phenotypic and molecular levels. Furthermore, we inferred all pairwise relationships among SymMap components using statistical tests to give pharmaceutical scientists the ability to rank and filter promising results to guide drug discovery. The SymMap database can be accessed at http://www.symmap.org/ and https://www.bioinfo.org/symmap.",SymMap,0.99069941,NA,0,SymMap,0.99069941,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +29316788,http://synbiohub.org,"SynBioHub: A Standards-Enabled Design Repository for Synthetic Biology. The SynBioHub repository ( https://synbiohub.org ) is an open-source software project that facilitates the sharing of information about engineered biological systems. SynBioHub provides computational access for software and data integration, and a graphical user interface that enables users to search for and share designs in a Web browser. By connecting to relevant repositories (e.g., the iGEM repository, JBEI ICE, and other instances of SynBioHub), the software allows users to browse, upload, and download data in various standard formats, regardless of their location or representation. SynBioHub also provides a central reference point for other resources to link to, delivering design information in a standardized format using the Synthetic Biology Open Language (SBOL). The adoption and use of SynBioHub, a community-driven effort, has the potential to overcome the reproducibility challenge across laboratories by helping to address the current lack of information about published designs.",SynBioHub,0.997284293,NA,0,SynBioHub,0.997284293,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/30/2018 +25627341,http://bioinformatics.ac.cn/synbiolgdb,"SynBioLGDB: a resource for experimentally validated logic gates in synthetic biology. Synthetic biologists have developed DNA/molecular modules that perform genetic logic operations in living cells to track key moments in a cell's life or change the fate of a cell. Increasing evidence has also revealed that diverse genetic logic gates capable of generating a Boolean function play critically important roles in synthetic biology. Basic genetic logic gates have been designed to combine biological science with digital logic. SynBioLGDB (http://bioinformatics.ac.cn/synbiolgdb/) aims to provide the synthetic biology community with a useful resource for efficient browsing and visualization of genetic logic gates. The current version of SynBioLGDB documents more than 189 genetic logic gates with experimental evidence involving 80 AND gates and 16 NOR gates, etc. in three species (Human, Escherichia coli and Bacillus clausii). SynBioLGDB provides a user-friendly interface through which conveniently to query and browse detailed information about these genetic logic gates. SynBioLGDB will enable more comprehensive understanding of the connection of genetic logic gates to execute complex cellular functions in living cells.",SynBioLGDB,0.994347095,NA,0,SynBioLGDB,0.994347095,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/28/2015 +32442307,http://SYNERGxDB.ca,"SYNERGxDB: an integrative pharmacogenomic portal to identify synergistic drug combinations for precision oncology. Drug-combination data portals have recently been introduced to mine huge amounts of pharmacological data with the aim of improving current chemotherapy strategies. However, these portals have only been investigated for isolated datasets, and molecular profiles of cancer cell lines are lacking. Here we developed a cloud-based pharmacogenomics portal called SYNERGxDB (http://SYNERGxDB.ca/) that integrates multiple high-throughput drug-combination studies with molecular and pharmacological profiles of a large panel of cancer cell lines. This portal enables the identification of synergistic drug combinations through harmonization and unified computational analysis. We integrated nine of the largest drug combination datasets from both academic groups and pharmaceutical companies, resulting in 22 507 unique drug combinations (1977 unique compounds) screened against 151 cancer cell lines. This data compendium includes metabolomics, gene expression, copy number and mutation profiles of the cancer cell lines. In addition, SYNERGxDB provides analytical tools to discover effective therapeutic combinations and predictive biomarkers across cancer, including specific types. Combining molecular and pharmacological profiles, we systematically explored the large space of univariate predictors of drug synergism. SYNERGxDB constitutes a comprehensive resource that opens new avenues of research for exploring the mechanism of action for drug synergy with the potential of identifying new treatment strategies for cancer patients.",SYNERGxDB,0.997662961,NA,0,SYNERGxDB,0.997662961,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2020 +31171447,"http://syngoportal.org, http://geneontology.org","SynGO: An Evidence-Based, Expert-Curated Knowledge Base for the Synapse. Synapses are fundamental information-processing units of the brain, and synaptic dysregulation is central to many brain disorders (""synaptopathies""). However, systematic annotation of synaptic genes and ontology of synaptic processes are currently lacking. We established SynGO, an interactive knowledge base that accumulates available research about synapse biology using Gene Ontology (GO) annotations to novel ontology terms: 87 synaptic locations and 179 synaptic processes. SynGO annotations are exclusively based on published, expert-curated evidence. Using 2,922 annotations for 1,112 genes, we show that synaptic genes are exceptionally well conserved and less tolerant to mutations than other genes. Many SynGO terms are significantly overrepresented among gene variations associated with intelligence, educational attainment, ADHD, autism, and bipolar disorder and among de√ɬÉ√ǬÇ√ɬÇ√Ǭ†novo variants associated with neurodevelopmental disorders, including schizophrenia. SynGO is a public, universal reference for synapse research and an online analysis platform for interpretation of√ɬÉ√ǬÇ√ɬÇ√Ǭ†large-scale -omics data (https://syngoportal.org and http://geneontology.org).",SynGO,0.996233463,NA,0,SynGO,0.996233463,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/3/2019 +26516187,http://histone.sce.ntu.edu.sg/SynLethDB,"SynLethDB: synthetic lethality database toward discovery of selective and sensitive anticancer drug targets. Synthetic lethality (SL) is a type of genetic interaction between two genes such that simultaneous perturbations of the two genes result in cell death or a dramatic decrease of cell viability, while a perturbation of either gene alone is not lethal. SL reflects the biologically endogenous difference between cancer cells and normal cells, and thus the inhibition of SL partners of genes with cancer-specific mutations could selectively kill cancer cells but spare normal cells. Therefore, SL is emerging as a promising anticancer strategy that could potentially overcome the drawbacks of traditional chemotherapies by reducing severe side effects. Researchers have developed experimental technologies and computational prediction methods to identify SL gene pairs on human and a few model species. However, there has not been a comprehensive database dedicated to collecting SL pairs and related knowledge. In this paper, we propose a comprehensive database, SynLethDB (http://histone.sce.ntu.edu.sg/SynLethDB/), which contains SL pairs collected from biochemical assays, other related databases, computational predictions and text mining results on human and four model species, i.e. mouse, fruit fly, worm and yeast. For each SL pair, a confidence score was calculated by integrating individual scores derived from different evidence sources. We also developed a statistical analysis module to estimate the druggability and sensitivity of cancer cells upon drug treatments targeting human SL partners, based on large-scale genomic data, gene expression profiles and drug sensitivity profiles on more than 1000 cancer cell lines. To help users access and mine the wealth of the data, we developed other practical functionalities, such as search and filtering, orthology search, gene set enrichment analysis. Furthermore, a user-friendly web interface has been implemented to facilitate data analysis and interpretation. With the integrated data sets and analytics functionalities, SynLethDB would be a useful resource for biomedical research community and pharmaceutical industry.",SynLethDB,0.998530388,NA,0,SynLethDB,0.998530388,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/29/2015 +31189880,http://SynMICdb.dkfz.de,"A pan-cancer analysis of synonymous mutations. Synonymous mutations have been viewed as silent mutations, since they only affect the DNA and mRNA, but not the amino acid sequence of the resulting protein. Nonetheless, recent studies suggest their significant impact on splicing, RNA stability, RNA folding, translation or co-translational protein folding. Hence, we compile 659194 synonymous mutations found in human cancer and characterize their properties. We provide the user-friendly, comprehensive resource for synonymous mutations in cancer, SynMICdb√ɬÉ√ǬÇ√ɬÇ√Ǭ†( http://SynMICdb.dkfz.de ), which also contains orthogonal information about gene annotation, recurrence, mutation loads, cancer association, conservation, alternative events, impact on mRNA structure and a SynMICdb score. Notably, synonymous and missense mutations are depleted at the 5'-end of the coding sequence as well as at the ends of internal exons independent of mutational signatures. For patient-derived synonymous mutations in the oncogene KRAS, we indicate that single point mutations can have a relevant impact on expression as well as on mRNA secondary structure.",SynMICdb√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√ǬÉ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√ǬÇ,0.970528483,NA,0,SynMICdb√ɬÉ√ǬÉ√ɬÇ√ǬÉ√ɬÉ√ǬÇ√ɬÇ√ǬÉ√ɬÉ√ǬÉ√ɬÇ√ǬÇ√ɬÉ√ǬÇ√ɬÇ√ǬÇ,0.970528483,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,6/12/2019 +31728519,http://syntdb.amu.edu.pl,"SyntDB:√ɬÉ√ǬÇ√ɬÇ√Ǭ†defining orthologues of human long noncoding RNAs across primates. SyntDB (http://syntdb.amu.edu.pl/) is a collection of data on long noncoding RNAs (lncRNAs) and their evolutionary relationships in twelve primate species, including humans. This is the first database dedicated to primate lncRNAs, thousands of which are uniquely stored in SyntDB. The lncRNAs were predicted with our computational pipeline using publicly available RNA-Seq data spanning diverse tissues and organs. Most of the species included in SyntDB still lack lncRNA annotations in public resources. In addition to providing users with unique sets of lncRNAs and their characteristics, SyntDB provides data on orthology relationships between the lncRNAs of humans and other primates, which are not available on this scale elsewhere. Keeping in mind that only a small fraction of currently known human lncRNAs have been functionally characterized and that lncRNA conservation is frequently used to identify the most relevant lncRNAs for functional studies, we believe that SyntDB will contribute to ongoing research aimed at deciphering the biological roles of lncRNAs.",SyntDB,0.997279763,NA,0,SyntDB,0.997279763,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +34515387,http://synwiki.uni-goettingen.de,"SynWiki: Functional annotation of the first artificial organism Mycoplasma mycoides JCVI-syn3A. The new field of synthetic biology aims at the creation of artificially designed organisms. A major breakthrough in the field was the generation of the artificial synthetic organism Mycoplasma mycoides JCVI-syn3A. This bacterium possesses only 452 protein-coding genes, the smallest number for any organism that is viable independent of a host cell. However, about one third of the proteins have no known function indicating major gaps in our understanding of simple living cells. To facilitate the investigation of the components of this minimal bacterium, we have generated the database SynWiki (http://synwiki.uni-goettingen.de/). SynWiki is based on a relational database and gives access to published information about the genes and proteins of M. mycoides JCVI-syn3A. To gain a better understanding of the functions of the genes and proteins of the artificial bacteria, protein-protein interactions that may provide clues for the protein functions are included in an interactive manner. SynWiki is an important tool for the synthetic biology community that will support the comprehensive understanding of a minimal cell as well as the functional annotation of so far uncharacterized proteins.",SynWiki,0.996189475,NA,0,SynWiki,0.996189475,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/20/2021 +34663591,http://sepsis.gxbsidra.org/dm3/geneBrowser/list,"SysInflam HuDB, a Web Resource for Mining Human Blood Cells Transcriptomic Data Associated with Systemic Inflammatory Responses to Sepsis. Sepsis develops after a dysregulated host inflammatory response to a systemic infection. Identification of sepsis biomarkers has been challenging because of the multifactorial causes of disease susceptibility and progression. Public transcriptomic data are a valuable resource for mechanistic discoveries and cross-studies concordance of heterogeneous diseases. Nonetheless, the approach requires structured methodologies and effective visualization tools for meaningful data interpretation. Currently, no such database exists for sepsis or systemic inflammatory diseases in human. Hence we curated SysInflam HuDB (http://sepsis.gxbsidra.org/dm3/geneBrowser/list), a unique collection of human blood transcriptomic datasets associated with systemic inflammatory responses to sepsis. The transcriptome collection and the associated clinical metadata are integrated onto a user-friendly and Web-based interface that allows the simultaneous exploration, visualization, and interpretation of multiple datasets stemming from different study designs. To date, the collection encompasses 62 datasets and 5719 individual profiles. Concordance of gene expression changes with the associated literature was assessed, and additional analyses are presented to showcase database utility. Combined with custom data visualization at the group and individual levels, SysInflam HuDB facilitates the identification of specific human blood gene signatures in response to infection (e.g., patients with sepsis versus healthy control subjects) and the delineation of major genetic drivers associated with inflammation onset and progression under various conditions.",SysInflam HuDB,0.983198355,NA,0,SysInflam HuDB,0.983198355,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2021 +24705204,http://lifecenter.sgst.cn/SysPTM,"SysPTM 2.0: an updated systematic resource for post-translational modification. Post-translational modifications (PTMs) of proteins play essential roles in almost all cellular processes, and are closely related to physiological activity and disease development of living organisms. The development of tandem mass spectrometry (MS/MS) has resulted in a rapid increase of PTMs identified on proteins from different species. The collection and systematic ordering of PTM data should provide invaluable information for understanding cellular processes and signaling pathways regulated by PTMs. For this original purpose we developed SysPTM, a systematic resource installed with comprehensive PTM data and a suite of web tools for annotation of PTMs in 2009. Four years later, there has been a significant advance with the generation of PTM data and, consequently, more sophisticated analysis requirements have to be met. Here we submit an updated version of SysPTM 2.0 (http://lifecenter.sgst.cn/SysPTM/), with almost doubled data content, enhanced web-based analysis tools of PTMBlast, PTMPathway, PTMPhylog, PTMCluster. Moreover, a new session SysPTM-H is constructed to graphically represent the combinatorial histone PTMs and dynamic regulation of histone modifying enzymes, and a new tool PTMGO is added for functional annotation and enrichment analysis. SysPTM 2.0 not only facilitates resourceful annotation of PTM sites but allows systematic investigation of PTM functions by the user. Database URL: http://lifecenter.sgst.cn/SysPTM/.",SysPTM,0.994190574,NA,0,SysPTM,0.994190574,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/3/2014 +22807998,http://lifecenter.sgst.cn/SyStemCell,"SyStemCell: a database populated with multiple levels of experimental data from stem cell differentiation research. Elucidation of the mechanisms of stem cell differentiation is of great scientific interest. Increasing evidence suggests that stem cell differentiation involves changes at multiple levels of biological regulation, which together orchestrate the complex differentiation process; many related studies have been performed to investigate the various levels of regulation. The resulting valuable data, however, remain scattered. Most of the current stem cell-relevant databases focus on a single level of regulation (mRNA expression) from limited stem cell types; thus, a unifying resource would be of great value to compile the multiple levels of research data available. Here we present a database for this purpose, SyStemCell, deposited with multi-level experimental data from stem cell research. The database currently covers seven levels of stem cell differentiation-associated regulatory mechanisms, including DNA CpG 5-hydroxymethylcytosine/methylation, histone modification, transcript products, microRNA-based regulation, protein products, phosphorylation proteins and transcription factor regulation, all of which have been curated from 285 peer-reviewed publications selected from PubMed. The database contains 43,434 genes, recorded as 942,221 gene entries, for four organisms (Homo sapiens, Mus musculus, Rattus norvegicus, and Macaca mulatta) and various stem cell sources (e.g., embryonic stem cells, neural stem cells and induced pluripotent stem cells). Data in SyStemCell can be queried by Entrez gene ID, symbol, alias, or browsed by specific stem cell type at each level of genetic regulation. An online analysis tool is integrated to assist researchers to mine potential relationships among different regulations, and the potential usage of the database is demonstrated by three case studies. SyStemCell is the first database to bridge multi-level experimental information of stem cell studies, which can become an important reference resource for stem cell researchers. The database is available at http://lifecenter.sgst.cn/SyStemCell/.",SyStemCell,0.995095372,NA,0,SyStemCell,0.995095372,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/13/2012 +28985418,http://systemhcatlas.org,"The SysteMHC Atlas project. Mass spectrometry (MS)-based immunopeptidomics investigates the repertoire of peptides presented at the cell surface by major histocompatibility complex (MHC) molecules. The broad clinical relevance of MHC-associated peptides, e.g. in precision medicine, provides a strong rationale for the large-scale generation of immunopeptidomic datasets and recent developments in MS-based peptide analysis technologies now support the generation of the required data. Importantly, the availability of diverse immunopeptidomic datasets has resulted in an increasing need to standardize, store and exchange this type of data to enable better collaborations among researchers, to advance the field more efficiently and to establish quality measures required for the meaningful comparison of datasets. Here we present the SysteMHC Atlas (https://systemhcatlas.org), a public database that aims at collecting, organizing, sharing, visualizing and exploring immunopeptidomic data generated by MS. The Atlas includes raw mass spectrometer output files collected from several laboratories around the globe, a catalog of context-specific datasets of MHC class I and class II peptides, standardized MHC allele-specific peptide spectral libraries consisting of consensus spectra calculated from repeat measurements of the same peptide sequence, and links to other proteomics and immunology databases. The SysteMHC Atlas project was created and will be further expanded using a uniform and open computational pipeline that controls the quality of peptide identifications and peptide annotations. Thus, the SysteMHC Atlas disseminates quality controlled immunopeptidomic information to the public domain and serves as a community resource toward the generation of a high-quality comprehensive map of the human immunopeptidome and the support of consistent measurement of immunopeptidomic sample cohorts.",SysteMHC,0.961411715,NA,0,SysteMHC,0.961411715,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +27451428,http://www.szdb.org,"SZDB: A Database for Schizophrenia Genetic Research. Schizophrenia (SZ) is a debilitating brain disorder with a complex genetic architecture. Genetic studies, especially recent genome-wide association studies (GWAS), have identified multiple variants (loci) conferring risk to SZ. However, how to efficiently extract meaningful biological information from bulk genetic findings of SZ remains a major challenge. There is a pressing need to integrate multiple layers of data from various sources, eg, genetic findings from GWAS, copy number variations (CNVs), association and linkage studies, gene expression, protein-protein interaction (PPI), co-expression, expression quantitative trait loci (eQTL), and Encyclopedia of DNA Elements (ENCODE) data, to provide a comprehensive resource to facilitate the translation of genetic findings into SZ molecular diagnosis and mechanism study. Here we developed the SZDB database (http://www.szdb.org/), a comprehensive resource for SZ research. SZ genetic data, gene expression data, network-based data, brain eQTL data, and SNP function annotation information were systematically extracted, curated and deposited in SZDB. In-depth analyses and systematic integration were performed to identify top prioritized SZ genes and enriched pathways. Multiple types of data from various layers of SZ research were systematically integrated and deposited in SZDB. In-depth data analyses and integration identified top prioritized SZ genes and enriched pathways. We further showed that genes implicated in SZ are highly co-expressed in human brain and proteins encoded by the prioritized SZ risk genes are significantly interacted. The user-friendly SZDB provides high-confidence candidate variants and genes for further functional characterization. More important, SZDB provides convenient online tools for data search and browse, data integration, and customized data analyses.",SZDB,0.994548321,NA,0,SZDB,0.994548321,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2017 +27733502,http://bioinfo.uth.edu/SZGR,"SZGR 2.0: a one-stop shop of schizophrenia candidate genes. SZGR 2.0 is a comprehensive resource of candidate variants and genes for schizophrenia, covering genetic, epigenetic, transcriptomic, translational and many other types of evidence. By systematic review and curation of multiple lines of evidence, we included almost all variants and genes that have ever been reported to be associated with schizophrenia. In particular, we collected √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº4200 common variants reported in genome-wide association studies, √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº1000 de novo mutations discovered by large-scale sequencing of family samples, 215 genes spanning rare and replication copy number variations, 99 genes overlapping with linkage regions, 240 differentially expressed genes, 4651 differentially methylated genes and 49 genes as antipsychotic drug targets. To facilitate interpretation, we included various functional annotation data, especially brain eQTL, methylation QTL, brain expression featured in deep categorization of brain areas and developmental stages and brain-specific promoter and enhancer annotations. Furthermore, we conducted cross-study, cross-data type and integrative analyses of the multidimensional data deposited in SZGR 2.0, and made the data and results available through a user-friendly interface. In summary, SZGR 2.0 provides a one-stop shop of schizophrenia variants and genes and their function and regulation, providing an important resource in the schizophrenia and other mental disease community. SZGR 2.0 is available at https://bioinfo.uth.edu/SZGR/.",SZGR,0.997519851,NA,0,SZGR,0.997519851,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/12/2016 +34679164,http://www.bioinsilico.org/T-ARDIS,"Mining drug-target and drug-adverse drug reaction databases to identify target-adverse drug reaction relationships. . The level of attrition on drug discovery, particularly at advanced stages, is very high due to unexpected adverse drug reactions (ADRs) caused by drug candidates, and thus, being able to predict undesirable responses when modulating certain protein targets would contribute to the development of safer drugs and have important economic implications. On the one hand, there are a number of databases that compile information of drug-target interactions. On the other hand, there are a number of public resources that compile information on drugs and ADR. It is therefore possible to link target and ADRs using drug entities as connecting elements. Here, we present T-ARDIS (Target-Adverse Reaction Database Integrated Search) database, a resource that provides comprehensive information on proteins and associated ADRs. By combining the information from drug-protein and drug-ADR databases, we statistically identify significant associations between proteins and ADRs. Besides describing the relationship between proteins and ADRs, T-ARDIS provides detailed description about proteins along with the drug and adverse reaction information. Currently T-ARDIS contains over 3000 ADR and 248 targets for a total of more 17√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ000 pairwise interactions. Each entry can be retrieved through multiple search terms including target Uniprot ID, gene name, adverse effect and drug name. Ultimately, the T-ARDIS database has been created in response to the increasing interest in identifying early in the drug development pipeline potentially problematic protein targets whose modulation could result in ADRs. Database URL: http://www.bioinsilico.org/T-ARDIS.",T-ARDIS,0.996051848,Target-Adverse Reaction Database Integrated Search,0.962873194,T-ARDIS,0.996051848,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2021 +23406793,http://bws.iis.sinica.edu.tw/THOD,"T-HOD: a literature-based candidate gene database for hypertension, obesity and diabetes. Researchers are finding it more and more difficult to follow the changing status of disease candidate genes due to the exponential increase in gene mapping studies. The Text-mined Hypertension, Obesity and Diabetes candidate gene database (T-HOD) is developed to help trace existing research on three kinds of cardiovascular diseases: hypertension, obesity and diabetes, with the last disease categorized into Type 1 and Type 2, by regularly and semiautomatically extracting HOD-related genes from newly published literature. Currently, there are 837, 835 and 821 candidate genes recorded in T-HOD for hypertension, obesity and diabetes, respectively. T-HOD employed the state-of-art text-mining technologies, including a gene/disease identification system and a disease-gene relation extraction system, which can be used to affirm the association of genes with three diseases and provide more evidence for further studies. The primary inputs of T-HOD are the three kinds of diseases, and the output is a list of disease-related genes that can be ranked based on their number of appearance, protein-protein interactions and single-nucleotide polymorphisms. Unlike manually constructed disease gene databases, the content of T-HOD is regularly updated by our text-mining system and verified by domain experts. The interface of T-HOD facilitates easy browsing for users and allows T-HOD curators to verify data efficiently. We believe that T-HOD can help life scientists in search for more disease candidate genes in a less time- and effort-consuming manner. Database URL: http://bws.iis.sinica.edu.tw/THOD.",T-HOD,0.995550072,Text-mined Hypertension,0.958812918,T-HOD,0.995550072,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/12/2013 +31624839,http://tpsic.igcz.poznan.pl,"T-psi-C: user friendly database of tRNA sequences and structures. tRNAs have been widely studied for their role as genetic code decoders in the ribosome during translation, but have recently received new attention due to the discovery of novel roles beyond decoding, often in connection with human diseases. Yet, existing tRNA databases have not been updated for more than a decade, so they do not contain this new functional information and have not kept pace with the rate of discovery in this field. Therefore, a regularly updated database that contains information about newly discovered characteristics of tRNA molecules and can be regularly updated is strongly needed. Here, we report the creation of the T-psi-C database (http://tpsic.igcz.poznan.pl), an up-to-date collection of tRNA sequences that contains data obtained from high-throughput tRNA sequencing, e.g. all isoacceptors and isodecoders for human HEK293 cells. This database also contains 3D tRNA structures obtained from Protein Data Bank and generated using homology modeling. The T-psi-C database can be continuously updated by any member of the scientific community, and contains its own application programming interface (API), which allows users to retrieve or upload data in JSON format. Altogether, T-psi-C is user-friendly, easy to develop and an up-to-date source of knowledge about tRNAs.",T-psi-C,0.996630514,NA,0,T-psi-C,0.996630514,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +28807036,http://ttime.mlatlab.org,"T-Time: A data repository of T cell and calcium release-activated calcium channel activation imagery. Background A fundamental understanding of live-cell dynamics is necessary in order to advance scientific techniques and personalized medicine. For this understanding to be possible, image processing techniques, probes, tracking algorithms and many other methodologies must be improved. Currently there are no large open-source datasets containing live-cell imaging to act as a standard for the community. As a result, researchers cannot evaluate their methodologies on an independent benchmark or leverage such a dataset to formulate scientific questions. Findings Here we present T-Time, the largest free and publicly available data set of T cell phase contrast imagery designed with the intention of furthering live-cell dynamics research. T-Time consists of over 40 GB of imagery data, and includes annotations derived from these images using a custom T cell identification and tracking algorithm. The data set contains 71 time-lapse sequences containing T cell movement and calcium release activated calcium channel activation, along with 50 time-lapse sequences of T cell activation and T reg interactions. The database includes a user-friendly web interface, summary information on the time-lapse images, and a mechanism for users to download tailored image datasets for their own research. T-Time is freely available on the web at http://ttime.mlatlab.org . Conclusions T-Time is a novel data set of T cell images and associated metadata. It allows users to study T cell interaction and activation.",T-Time,0.980422139,NA,0,T-Time,0.980422139,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/15/2017 +34531327,http://d-lab.arna.cnrs.fr/t1tadb,"T1TAdb: the database of type I toxin-antitoxin systems. Type I toxin-antitoxin (T1TA) systems constitute a large class of genetic modules with antisense RNA (asRNA)-mediated regulation of gene expression. They are widespread in bacteria and consist of an mRNA coding for a toxic protein and a noncoding asRNA that acts as an antitoxin preventing the synthesis of the toxin by directly base-pairing to its cognate mRNA. The co- and post-transcriptional regulation of T1TA systems is intimately linked to RNA sequence and structure, therefore it is essential to have an accurate annotation of the mRNA and asRNA molecules to understand this regulation. However, most T1TA systems have been identified by means of bioinformatic analyses solely based on the toxin protein sequences, and there is no central repository of information on their specific RNA features. Here we present the first database dedicated to type I TA systems, named T1TAdb. It is an open-access web database (https://d-lab.arna.cnrs.fr/t1tadb) with a collection of √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº1900 loci in √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº500 bacterial strains in which a toxin-coding sequence has been previously identified. RNA molecules were annotated with a bioinformatic procedure based on key determinants of the mRNA structure and the genetic organization of the T1TA loci. Besides RNA and protein secondary structure predictions, T1TAdb also identifies promoter, ribosome-binding, and mRNA-asRNA interaction sites. It also includes tools for comparative analysis, such as sequence similarity search and computation of structural multiple alignments, which are annotated with covariation information. To our knowledge, T1TAdb represents the largest collection of features, sequences, and structural annotations on this class of genetic modules.",T1TAdb,0.98944664,NA,0,T1TAdb,0.98944664,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/16/2021 +23846596,http://tcm.zju.edu.cn/t2d,"T2D@ZJU: a knowledgebase integrating heterogeneous connections associated with type 2 diabetes mellitus. Type 2 diabetes mellitus (T2D), affecting >90% of the diabetic patients, is one of the major threats to human health. A comprehensive understanding of the mechanisms of T2D at molecular level is essential to facilitate the related translational research. Here, we introduce a comprehensive and up-to-date knowledgebase for T2D, i.e. T2D@ZJU. T2D@ZJU contains three levels of heterogeneous connections associated with T2D, which is retrieved from pathway databases, protein-protein interaction databases and literature, respectively. In current release, T2D@ZJU contains 1078 T2D related entities such as proteins, protein complexes, drugs and others together with their corresponding relationships, which include 3069 manually curated connections, 14,893 protein-protein interactions and 26,716 relationships identified by text-mining technology. Moreover, T2D@ZJU provides a user-friendly web interface for users to browse and search data. A Cytoscape Web-based interactive network browser is available to visualize the corresponding network relationships between T2D-related entities. The functionality of T2D@ZJU is shown by means of several case studies. Database URL: http://tcm.zju.edu.cn/t2d.",T2D@ZJU,0.931308525,NA,0,T2D@ZJU,0.931308525,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/11/2013 +30846808,http://tacco.life.nctu.edu.tw,"TACCO, a Database Connecting Transcriptome Alterations, Pathway Alterations and Clinical Outcomes in Cancers. Because of innumerable cancer sequencing projects, abundant transcriptome expression profiles together with survival data are available from the same patients. Although some expression signatures for prognosis or pathologic staging have been identified from these data, systematically discovering such kind of expression signatures remains a challenge. To address this, we developed TACCO (Transcriptome Alterations in CanCer Omnibus), a database for identifying differentially expressed genes and altered pathways in cancer. TACCO also reveals miRNA cooperative regulations and supports construction of models for prognosis. The resulting signatures have great potential for patient stratification and treatment decision-making in future clinical applications. TACCO is freely available at http://tacco.life.nctu.edu.tw/ .",TACCO,0.997243583,Transcriptome Alterations in CanCer Omnibus,0.932588655,TACCO,0.997243583,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/7/2019 +29106666,http://bioinfo-mml.sjtu.edu.cn/TADB2,"TADB 2.0: an updated database of bacterial type II toxin-antitoxin loci. TADB2.0 (http://bioinfo-mml.sjtu.edu.cn/TADB2/) is an updated database that provides comprehensive information about bacterial type II toxin-antitoxin (TA) loci. Compared with the previous version, the database refined and the new data schema is employed. With the aid of text mining and manual curation, it recorded 6193 type II TA loci in 870 replicons of bacteria and archaea, including 105 experimentally validated TA loci. In addition, the newly developed tool TAfinder combines the homolog searches and the operon structure detection, allowing the prediction for type II TA pairs in bacterial genome sequences. It also helps to investigate the genomic context of predicted TA loci for putative virulence factors, antimicrobial resistance determinants and mobile genetic elements via alignments to the specific public databases. Additionally, the module TAfinder-Compare allows comparing the presence of the given TA loci across the close relative genomes. With the recent updates, TADB2.0 might provide better support for understanding the important roles of type II TA systems in the prokaryotic life activities.",TADB2.0,0.957761908,NA,0,TADB2.0,0.957761908,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +30871473,http://dna.cs.miami.edu/TADKB,"TADKB: Family classification and a knowledge base of topologically associating domains. Background Topologically associating domains (TADs) are considered the structural and functional units of the genome. However, there is a lack of an integrated resource for TADs in the literature where researchers can obtain family classifications and detailed information about TADs. Results We built an online knowledge base TADKB integrating knowledge for TADs in eleven√ɬÉ√ǬÇ√ɬÇ√Ǭ†cell types of human and mouse. For each TAD, TADKB provides the predicted three-dimensional (3D) structures of chromosomes and TADs, and detailed annotations about the protein-coding genes and long non-coding RNAs (lncRNAs) existent in each TAD. Besides the 3D chromosomal structures inferred by population Hi-C, the single-cell haplotype-resolved chromosomal 3D structures of 17 GM12878 cells are also integrated in TADKB. A user can submit query gene/lncRNA ID/sequence to search for the TAD(s) that contain(s) the query gene or lncRNA. We also classified TADs into families. To achieve that, we used the TM-scores between reconstructed 3D structures of TADs as structural similarities and the Pearson's correlation coefficients between the fold enrichment of chromatin states as functional similarities. All of the TADs in one cell type were clustered based on structural and functional similarities respectively using the spectral clustering algorithm with various predefined numbers of clusters. We have√ɬÉ√ǬÇ√ɬÇ√Ǭ†compared the overlapping TADs from structural and functional clusters and found that most of the TADs in the functional clusters with depleted chromatin states are clustered into one or two structural clusters. This novel finding indicates a connection between the 3D structures of TADs and their DNA functions in terms of chromatin states. Conclusion TADKB is available at http://dna.cs.miami.edu/TADKB/ .",TADKB,0.996571302,NA,0,TADKB,0.996571302,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/14/2019 +"22140109, 29220077",http://arabidopsis.org,"The Arabidopsis Information Resource (TAIR): improved gene annotation and new tools. The Arabidopsis Information Resource (TAIR, http://arabidopsis.org) is a genome database for Arabidopsis thaliana, an important reference organism for many fundamental aspects of biology as well as basic and applied plant biology research. TAIR serves as a central access point for Arabidopsis data, annotates gene function and expression patterns using controlled vocabulary terms, and maintains and updates the A. thaliana genome assembly and annotation. TAIR also provides researchers with an extensive set of visualization and analysis tools. Recent developments include several new genome releases (TAIR8, TAIR9 and TAIR10) in which the A. thaliana assembly was updated, pseudogenes and transposon genes were re-annotated, and new data from proteomics and next generation transcriptome sequencing were incorporated into gene models and splice variants. Other highlights include progress on functional annotation of the genome and the release of several new tools including Textpresso for Arabidopsis which provides the capability to carry out full text searches on a large body of research literature.",TAIR,0.991281509,Arabidopsis Information Resource,0.796741247,TAIR,0.991281509,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/8/2017 +21383924,http://antibiotics.toku-e.com,"The Antimicrobial Index: a comprehensive literature-based antimicrobial database and reference work. Although the ever-growing usage of antimicrobials in the fields of medicine, pharmacology, and microbiology have undoubtedly allowed for unprecedented advances in the scientific world, these advances are nevertheless accompanied by unprecedented challenges. Sharp increases in antibiotic usages have led to inefficient and wasteful usage practices. Bacterial resistances have dramatically increased and therefore hindered the effectiveness of traditional antibiotics, thus forcing many life-science professionals to turn to plant extracts and synthetic chemicals [1]. The Antimicrobial Index (TAMI) seeks to alleviate some of these mounting difficulties through the collection and centralization of relevant antimicrobial susceptibility data from journals. Data compiled for antimicrobials include: method of action, physical properties, resistance genes, side effects, and minimal inhibitory concentrations (MIC50, MIC90 and/or ranges). TAMI currently contains data on 960 antimicrobials and over 24,000 microorganisms (3,500 unique strains) which were collected from over 400 pieces of published literature. Volume and scope of the index have been and will continue to increase and it is hoped that such an index will further foster international cooperation and communication of antimicrobial-related knowledge. TAMI can be accessed at: http://antibiotics.toku-e.com/.",TAMI,0.986702323,The Antimicrobial Index,0.871445251,TAMI,0.986702323,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/22/2011 +26208906,http://bioinformatics.mdanderson.org/main/TANRIC:Overview,"TANRIC: An Interactive Open Platform to Explore the Function of lncRNAs in Cancer. Long noncoding RNAs (lncRNA) have emerged as essential players in cancer biology. Using recent large-scale RNA-seq datasets, especially those from The Cancer Genome Atlas (TCGA), we have developed ""The Atlas of Noncoding RNAs in Cancer"" (TANRIC; http://bioinformatics.mdanderson.org/main/TANRIC:Overview), a user-friendly, open-access web resource for interactive exploration of lncRNAs in cancer. It characterizes the expression profiles of lncRNAs in large patient cohorts of 20 cancer types, including TCGA and independent datasets (>8,000 samples overall). TANRIC enables researchers to rapidly and intuitively analyze lncRNAs of interest (annotated lncRNAs or any user-defined ones) in the context of clinical and other molecular data, both within and across tumor types. Using TANRIC, we have identified a large number of lncRNAs with potential biomedical significance, many of which show strong correlations with established therapeutic targets and biomarkers across tumor types or with drug sensitivity across cell lines. TANRIC represents a valuable tool for investigating the function and clinical relevance of lncRNAs in cancer, greatly facilitating lncRNA-related biologic discoveries and clinical applications.",TANRIC,0.997347057,he Atlas of Noncoding RNAs in Cancer,0.930311513,TANRIC,0.997347057,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/24/2015 +28280852,"http://cvc.dfci.harvard.edu/tantigen/, http://projects.met-hilab.org/tadb","TANTIGEN: a comprehensive database of tumor T cell antigens. Tumor T cell antigens are both diagnostically and therapeutically valuable molecules. A large number of new peptides are examined as potential tumor epitopes each year, yet there is no infrastructure for storing and accessing the results of these experiments. We have retroactively cataloged more than 1000 tumor peptides from 368 different proteins, and implemented a web-accessible infrastructure for storing and accessing these experimental results. All peptides in TANTIGEN are labeled as one of the four categories: (1) peptides measured in vitro to bind the HLA, but not reported to elicit either in vivo or in vitro T cell response, (2) peptides found to bind the HLA and to elicit an in vitro T cell response, (3) peptides shown to elicit in vivo tumor rejection, and (4) peptides processed and naturally presented as defined by physical detection. In addition to T cell response, we also annotate peptides that are naturally processed HLA binders, e.g., peptides eluted from HLA in mass spectrometry studies. TANTIGEN provides a rich data resource for tumor-associated epitope and neoepitope discovery studies and is freely available at http://cvc.dfci.harvard.edu/tantigen/ or http://projects.met-hilab.org/tadb (mirror).",TANTIGEN,0.998069942,NA,0,TANTIGEN,0.998069942,1,NA,33849445,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,3/9/2017 +33849445,http://projects.met-hilab.org/tadb,"TANTIGEN 2.0: a knowledge base of tumor T cell antigens and epitopes. We previously developed TANTIGEN, a comprehensive online database cataloging more than 1000√ɬÉ√ǬÇ√ɬÇ√Ǭ†T cell epitopes and HLA ligands from 292 tumor antigens. In TANTIGEN 2.0, we significantly expanded coverage in both immune response targets (T cell epitopes and HLA ligands) and tumor antigens. It catalogs 4,296 antigen variants from 403 unique tumor antigens and more than 1500√ɬÉ√ǬÇ√ɬÇ√Ǭ†T cell epitopes and HLA ligands. We also included neoantigens, a class of tumor antigens generated through mutations resulting in new amino acid sequences in tumor antigens. TANTIGEN 2.0 contains validated TCR sequences specific for cognate T cell epitopes and tumor antigen gene/mRNA/protein expression information in major human cancers extracted by Human Pathology Atlas. TANTIGEN 2.0 is a rich data resource for tumor antigens and their associated epitopes and neoepitopes. It hosts a set of tailored data analytics tools tightly integrated with the data to form meaningful analysis workflows. It is freely available at http://projects.met-hilab.org/tadb .",TANTIGEN,0.997013807,adb,0.65444386,TANTIGEN,0.997013807,1,NA,28280852,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,4/14/2021 +"22135297, 29156006",http://www.microrna.gr/tarbase,"TarBase 6.0: capturing the exponential growth of miRNA targets with experimental support. As the relevant literature and the number of experiments increase at a super linear rate, databases that curate and collect experimentally verified microRNA (miRNA) targets have gradually emerged. These databases attempt to provide efficient access to this wealth of experimental data, which is scattered in thousands of manuscripts. Aim of TarBase 6.0 (http://www.microrna.gr/tarbase) is to face this challenge by providing a significant increase of available miRNA targets derived from all contemporary experimental techniques (gene specific and high-throughput), while incorporating a powerful set of tools in a user-friendly interface. TarBase 6.0 hosts detailed information for each miRNA-gene interaction, ranging from miRNA- and gene-related facts to information specific to their interaction, the experimental validation methodologies and their outcomes. All database entries are enriched with function-related data, as well as general information derived from external databases such as UniProt, Ensembl and RefSeq. DIANA microT miRNA target prediction scores and the relevant prediction details are available for each interaction. TarBase 6.0 hosts the largest collection of manually curated experimentally validated miRNA-gene interactions (more than 65,000 targets), presenting a 16.5-175-fold increase over other available manually curated databases.",TarBase,0.995145559,NA,0,TarBase,0.995145559,2,"25416803.0, 27603020.0",NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,1/1/2018 +33985427,http://www.biosequencing.cn/TarDB,"TarDB: an online database for plant miRNA targets and miRNA-triggered phased siRNAs. Background In plants, microRNAs (miRNAs) are pivotal regulators of plant development and stress responses. Different computational tools and web servers have been developed for plant miRNA target prediction; however, in silico prediction normally contains false positive results. In addition, many plant miRNA target prediction servers lack information for miRNA-triggered phased small interfering RNAs (phasiRNAs). Creating a comprehensive and relatively high-confidence plant miRNA target database is much needed. Results Here, we report TarDB, an online database that collects three categories of relatively high-confidence plant miRNA targets: (i) cross-species conserved miRNA targets; (ii) degradome/PARE (Parallel Analysis of RNA Ends) sequencing supported miRNA targets; (iii) miRNA-triggered phasiRNA loci. TarDB provides a user-friendly interface that enables users to easily search, browse and retrieve miRNA targets and miRNA initiated phasiRNAs in a broad variety of plants. TarDB has a comprehensive collection of reliable plant miRNA targets containing previously unreported miRNA targets and miRNA-triggered phasiRNAs even in the well-studied model species. Most of these novel miRNA targets are relevant to lineage-specific or species-specific miRNAs. TarDB data is freely available at http://www.biosequencing.cn/TarDB . Conclusions In summary, TarDB serves as a useful web resource for exploring relatively high-confidence miRNA targets and miRNA-triggered phasiRNAs in plants.",TarDB,0.990120292,NA,0,TarDB,0.990120292,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/13/2021 +34514504,http://www.citdbase.org,"Contraceptive and Infertility Target DataBase: a contraceptive drug development tool for targeting and analysis of human reproductive specific tissues√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭ†. The long and challenging drug development process begins with discovery biology for the selection of an appropriate target for a specific indication. Target is a broad term that can be applied to a range of biological entities such as proteins, genes, and ribonucleic acids (RNAs). Although there are numerous databases available for mining biological entities, publicly available searchable, downloadable databases to aid in target selection for a specific disease or indication (e.g., developing contraceptives and infertility treatments) are limited. We report the development of the Contraceptive and Infertility Target DataBase (https://www.citdbase.org), which provides investigators an interface to mine existing transcriptomic and proteomic resources to identify high-quality contraceptive/infertility targets. The development of similar databases is applicable to the identification of targets for other diseases and conditions.",NA,0,Target DataBase,0.72213304,Target DataBase,0.72213304,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,12/1/2021 +29106651,http://target.sbg.qb.fcen.uba.ar/patho,"Target-Pathogen: a structural bioinformatic approach to prioritize drug targets in pathogens. Available genomic data for pathogens has created new opportunities for drug discovery and development to fight them, including new resistant and multiresistant strains. In particular structural data must be integrated with both, gene information and experimental results. In this sense, there is a lack of an online resource that allows genome wide-based data consolidation from diverse sources together with thorough bioinformatic analysis that allows easy filtering and scoring for fast target selection for drug discovery. Here, we present Target-Pathogen database (http://target.sbg.qb.fcen.uba.ar/patho), designed and developed as an online resource that allows the integration and weighting of protein information such as: function, metabolic role, off-targeting, structural properties including druggability, essentiality and omic experiments, to facilitate the identification and prioritization of candidate drug targets in pathogens. We include in the database 10 genomes of some of the most relevant microorganisms for human health (Mycobacterium tuberculosis, Mycobacterium leprae, Klebsiella pneumoniae, Plasmodium vivax, Toxoplasma gondii, Leishmania major, Wolbachia bancrofti, Trypanosoma brucei, Shigella dysenteriae and Schistosoma Smanosoni) and show its applicability. New genomes can be uploaded upon request.",Target-Pathogen,0.984828025,NA,0,Target-Pathogen,0.984828025,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +24013925,http://app1.bioinformatics.mdanderson.org/tarhub/_design/basic/index.html,"targetHub: a programmable interface for miRNA-gene interactions. Motivation With the expansion of high-throughput technologies, understanding different kinds of genome-level data is a common task. MicroRNA (miRNA) is increasingly profiled using high-throughput technologies (microarrays or next-generation sequencing). The downstream analysis of miRNA targets can be difficult. Although there are many databases and algorithms to predict miRNA targets, there are few tools to integrate miRNA-gene interaction data into high-throughput genomic analyses. Results We present targetHub, a CouchDB database of miRNA-gene interactions. TargetHub provides a programmer-friendly interface to access miRNA targets. The Web site provides RESTful access to miRNA-gene interactions with an assortment of gene and miRNA identifiers. It can be a useful tool to integrate miRNA target interaction data directly into high-throughput bioinformatics analyses. Availability TargetHub is available on the web at http://app1.bioinformatics.mdanderson.org/tarhub/_design/basic/index.html.",targetHub,0.947265625,NA,0,targetHub,0.947265625,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/6/2013 +21408081,http://targetmine.nibio.go.jp,"TargetMine, an integrated data warehouse for candidate gene prioritisation and target discovery. Prioritising candidate genes for further experimental characterisation is a non-trivial challenge in drug discovery and biomedical research in general. An integrated approach that combines results from multiple data types is best suited for optimal target selection. We developed TargetMine, a data warehouse for efficient target prioritisation. TargetMine utilises the InterMine framework, with new data models such as protein-DNA interactions integrated in a novel way. It enables complicated searches that are difficult to perform with existing tools and it also offers integration of custom annotations and in-house experimental data. We proposed an objective protocol for target prioritisation using TargetMine and set up a benchmarking procedure to evaluate its performance. The results show that the protocol can identify known disease-associated genes with high precision and coverage. A demonstration version of TargetMine is available at http://targetmine.nibio.go.jp/.",TargetMine,0.979291916,NA,0,TargetMine,0.979291916,1,NA,26989145,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,3/8/2011 +26989145,http://targetmine.mizuguchilab.org,"An integrative data analysis platform for gene set analysis and knowledge discovery in a data warehouse framework. . Data analysis is one of the most critical and challenging steps in drug discovery and disease biology. A user-friendly resource to visualize and analyse high-throughput data provides a powerful medium for both experimental and computational biologists to understand vastly different biological data types and obtain a concise, simplified and meaningful output for better knowledge discovery. We have previously developed TargetMine, an integrated data warehouse optimized for target prioritization. Here we describe how upgraded and newly modelled data types in TargetMine can now survey the wider biological and chemical data space, relevant to drug discovery and development. To enhance the scope of TargetMine from target prioritization to broad-based knowledge discovery, we have also developed a new auxiliary toolkit to assist with data analysis and visualization in TargetMine. This toolkit features interactive data analysis tools to query and analyse the biological data compiled within the TargetMine data warehouse. The enhanced system enables users to discover new hypotheses interactively by performing complicated searches with no programming and obtaining the results in an easy to comprehend output format. Database URL: http://targetmine.mizuguchilab.org.",TargetMine,0.974033773,NA,0,TargetMine,0.974033773,1,NA,21408081,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob DOES NOT RESOLVE,3/17/2016 +27337171,http://www.herbbol.org:8001/tarnet,"TarNet: An Evidence-Based Database for Natural Medicine Research. Background Complex diseases seriously threaten human health. Drug discovery approaches based on ""single genes, single drugs, and single targets"" are limited in targeting complex diseases. The development of new multicomponent drugs for complex diseases is imperative, and the establishment of a suitable solution for drug group-target protein network analysis is a key scientific problem that must be addressed. Herbal medicines have formed the basis of sophisticated systems of traditional medicine and have given rise to some key drugs that remain in use today. The search for new molecules is currently taking a different route, whereby scientific principles of ethnobotany and ethnopharmacognosy are being used by chemists in the discovery of different sources and classes of compounds. Results In this study, we developed TarNet, a manually curated database and platform of traditional medicinal plants with natural compounds that includes potential bio-target information. We gathered information on proteins that are related to or affected by medicinal plant ingredients and data on protein-protein interactions (PPIs). TarNet includes in-depth information on both plant-compound-protein relationships and PPIs. Additionally, TarNet can provide researchers with network construction analyses of biological pathways and protein-protein interactions (PPIs) associated with specific diseases. Researchers can upload a gene or protein list mapped to our PPI database that has been manually curated to generate relevant networks. Multiple functions are accessible for network topological calculations, subnetwork analyses, pathway analyses, and compound-protein relationships. Conclusions TarNet will serve as a useful analytical tool that will provide information on medicinal plant compound-affected proteins (potential targets) and system-level analyses for systems biology and network pharmacology researchers. TarNet is freely available at http://www.herbbol.org:8001/tarnet, and detailed tutorials on the program are also available.",TarNet,0.9843418,NA,0,TarNet,0.9843418,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/23/2016 +24371150,http://bioinfo.jit.edu.cn/tasiRNADatabase,"tasiRNAdb: a database of ta-siRNA regulatory pathways. Summary In plants, many trans-acting small interfering RNA (ta-siRNA) regulatory pathways have been identified as significant components of the gene networks involved in development, metabolism, responses to biotic and abiotic stresses and DNA methylation at the TAS locus. To obtain a more comprehensive understanding on the nature of ta-siRNA regulatory pathways, we developed a freely accessible resource, tasiRNAdb, to serve as a repository for the sequences of ta-siRNA regulatory pathway-related microRNAs, TASs, ta-siRNAs and ta-siRNA targets, and for the cascading relations among them. With 583 pathways from 18 species, tasiRNAdb is the largest resource for known ta-siRNA regulatory pathways currently available. tasiRNAdb also provides a tool named TasExpAnalysis that was developed to map user-submitted small RNA and degradome libraries to a stored/input TAS and to perform sRNA phasing analysis and TAS cleavage analysis. Availability The database of plant ta-siRNA regulatory pathways is available at http://bioinfo.jit.edu.cn/tasiRNADatabase/.",tasiRNAdb,0.994892418,NA,0,tasiRNAdb,0.994892418,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/25/2013 +29234333,http://webtom.cabgrid.res.in/wheatssr,"Putative Microsatellite DNA Marker-Based Wheat Genomic Resource for Varietal Improvement and Management. Wheat fulfills 20% of global caloric requirement. World needs 60% more wheat for 9 billion population by 2050 but climate change with increasing temperature is projected to affect wheat productivity adversely. Trait improvement and management of wheat germplasm requires genomic resource. Simple Sequence Repeats (SSRs) being highly polymorphic and ubiquitously distributed in the genome, can be a marker of choice but there is no structured marker database with options to generate primer pairs for genotyping on desired chromosome/physical location. Previously associated markers with different wheat trait are also not available in any database. Limitations of in vitro SSR discovery can be overcome by genome-wide in silico mining of SSR. Triticum aestivum SSR database (TaSSRDb) is an integrated online database with three-tier architecture, developed using PHP and MySQL and accessible at http://webtom.cabgrid.res.in/wheatssr/. For genotyping, Primer3 standalone code computes primers on user request. Chromosome-wise SSR calling for all the three sub genomes along with choice of motif types is provided in addition to the primer generation for desired marker. We report here a database of highest number of SSRs (476,169) from complex, hexaploid wheat genome (~17 GB) along with previously reported 268 SSR markers associated with 11 traits. Highest (116.93 SSRs/Mb) and lowest (74.57 SSRs/Mb) SSR densities were found on 2D and 3A chromosome, respectively. To obtain homozygous locus, e-PCR was done. Such 30 loci were randomly selected for PCR validation in panel of 18 wheat Advance Varietal Trial (AVT) lines. TaSSRDb can be a valuable genomic resource tool for linkage mapping, gene/QTL (Quantitative trait locus) discovery, diversity analysis, traceability and variety identification. Varietal specific profiling and differentiation can supplement DUS (Distinctiveness, Uniformity, and Stability) testing, EDV (Essentially Derived Variety)/IV (Initial Variety) disputes, seed purity and hybrid wheat testing. All these are required in germplasm management as well as also in the endeavor of wheat productivity.",TaSSRDb,0.996534109,Triticum,0.721190155,TaSSRDb,0.996534109,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2017 +32027495,http://cactus.nci.nih.gov/download/tautomer,"Tautomer Database: A Comprehensive Resource for Tautomerism Analyses. We report a database of tautomeric structures that contains 2819 tautomeric tuples extracted from 171 publications. Each tautomeric entry has been annotated with experimental conditions reported in the respective publication, plus bibliographic details, structural identifiers (e.g., NCI/CADD identifiers FICTS, FICuS, uuuuu, and Standard InChI), and chemical information (e.g., SMILES, molecular weight). The majority of tautomeric tuples found were pairs; the remaining 10% were triples, quadruples, or quintuples, amounting to a total number of structures of 5977. The types of tautomerism were mainly prototropic tautomerism (79%), followed by ring-chain (13%) and valence tautomerism (8%). The experimental conditions reported in the publications included about 50 pure solvents and 9 solvent mixtures with 26 unique spectroscopic or nonspectroscopic methods. 1H and 13C NMR were the most frequently used methods. A total of 77 different tautomeric transform rules (SMIRKS) are covered by at least one example tuple in the database. This database is freely available as a spreadsheet at https://cactus.nci.nih.gov/download/tautomer/.",NA,0,tautomeric,0.663365901,tautomeric,0.663365901,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/10/2020 +26131021,http://bioinfo.au-kbc.org.in/taxane/Taxkb,"TaxKB: a knowledge base for new taxane-related drug discovery. Background Taxanes are naturally occurring compounds which belong to a powerful group of chemotherapeutic drugs with anticancer properties. Their current use, clinical efficacy, and unique mechanism of action indicate their potentiality for cancer drug discovery and development thereby promising to reduce the high economy associated with cancer worldwide. Extensive research has been carried out on taxanes with the aim to combat issues of drug resistance, side effects, limited natural supply, and also to increase the therapeutic index of these molecules. These efforts have led to the isolation of many naturally occurring compounds belonging to this family (more than 350 different kinds), and the synthesis of semisynthetic analogs of the naturally existing molecules (>500), and has also led to the characterization of many (>1000) of them. A web-based database system on clinically exploitable taxanes, providing a link between the structure and the pharmacological property of these molecules could help to reduce the druggability gap for these molecules. Results Taxane knowledge base (TaxKB, http://bioinfo.au-kbc.org.in/taxane/Taxkb/), is an online multi-tier relational database that currently holds data on 42 parameters of 250 natural and 503 semisynthetic analogs of taxanes. This database provides researchers with much-needed information necessary for drug development. TaxKB enables the user to search data on the structure, drug-likeness, and physicochemical properties of both natural and synthetic taxanes with a ""General Search"" option in addition to a ""Parameter Specific Search."" It displays 2D structure and allows the user to download the 3D structure (a PDB file) of taxanes that can be viewed with any molecular visualization tool. The ultimate aim of TaxKB is to provide information on Absorption, Distribution, Metabolism, and Excretion/Toxicity (ADME/T) as well as data on bioavailability and target interaction properties of candidate anticancer taxanes, ahead of expensive clinical trials. Conclusion This first web-based single-information portal will play a central role and help researchers to move forward in taxane-based cancer drug research.",TaxKB,0.994036555,Taxane knowledge base,0.832966849,TaxKB,0.994036555,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/28/2015 +31120982,http://depot.tbportals.niaid.nih.gov,"TB DEPOT (Data Exploration Portal): A multi-domain tuberculosis data analysis resource. The NIAID TB Portals Program (TBPP) established a unique and growing database repository of socioeconomic, geographic, clinical, laboratory, radiological, and genomic data from patient cases of drug-resistant tuberculosis (DR-TB). Currently, there are 2,428 total cases from nine country sites (Azerbaijan, Belarus, Moldova, Georgia, Romania, China, India, Kazakhstan, and South Africa), 1,611 (66%) of which are multidrug- or extensively-drug resistant and 1,185 (49%), 863 (36%), and 952 (39%) of which contain X-ray, computed tomography (CT) scan, and genomic data, respectively. We introduce the Data Exploration Portal (TB DEPOT, https://depot.tbportals.niaid.nih.gov) to visualize and analyze these multi-domain data. The TB DEPOT leverages the TBPP integration of clinical, socioeconomic, genomic, and imaging data into standardized formats and enables user-driven, repeatable, and reproducible analyses. It furthers the TBPP goals to provide a web-enabled analytics platform to countries with a high burden of multidrug-resistant TB (MDR-TB) but limited IT resources and inaccessible data, and enables the reusability of data, in conformity with the NIH's Findable, Accessible, Interoperable, and Reusable (FAIR) principles. TB DEPOT provides access to ""analysis-ready"" data and the ability to generate and test complex clinically-oriented hypotheses instantaneously with minimal statistical background and data processing skills. TB DEPOT is also promising for enhancing medical training and furnishing well annotated, hard to find, MDR-TB patient cases. TB DEPOT, as part of TBPP, further fosters collaborative research efforts to better understand drug-resistant tuberculosis and aid in the development of novel diagnostics and personalized treatment regimens.",TB DEPOT,0.832982928,NA,0,TB DEPOT,0.832982928,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/23/2019 +27387194,http://camellia.ahau.edu.cn/TBC2health,"TBC2health: a database of experimentally validated health-beneficial effects of tea bioactive compounds. Tea is one of the most consumed beverages in the world. Considerable studies show the exceptional health benefits (e.g. antioxidation, cancer prevention) of tea owing to its various bioactive components. However, data from these extensively published papers had not been made available in a central database. To lay a foundation in improving the understanding of healthy tea functions, we established a TBC2health database that currently documents 1338 relationships between 497 tea bioactive compounds and 206 diseases (or phenotypes) manually culled from over 300 published articles. Each entry in TBC2health contains comprehensive information about a bioactive relationship that can be accessed in three aspects: (i) compound information, (ii) disease (or phenotype) information and (iii) evidence and reference. Using the curated bioactive relationships, a bipartite network was reconstructed and the corresponding network (or sub-network) visualization and topological analyses are provided for users. This database has a user-friendly interface for entry browse, search and download. In addition, TBC2health provides a submission page and several useful tools (e.g. BLAST, molecular docking) to facilitate use of the database. Consequently, TBC2health can serve as a valuable bioinformatics platform for the exploration of beneficial effects of tea on human health. TBC2health is freely available at http://camellia.ahau.edu.cn/TBC2health.",TBC2health,0.996583238,NA,0,TBC2health,0.996583238,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/1/2017 +29520288,http://camellia.ahau.edu.cn/TBC2target,"TBC2target: A Resource of Predicted Target Genes of Tea Bioactive Compounds. Tea is one of the most popular non-alcoholic beverages consumed worldwide. Numerous bioactive constituents of tea were confirmed to possess healthy benefits via the mechanisms of regulating gene expressions or protein activities. However, a complete interacting profile between tea bioactive compounds (TBCs) and their target genes is lacking, which put an obstacle in the study of healthy function of tea. To fill this gap, we developed a database of target genes of TBCs (TBC2target, http://camellia.ahau.edu.cn/TBC2target) based on a pharmacophore mapping approach. In TBC2target, 6,226 interactions between 240 TBCs and 673 target genes were documented. TBC2target contains detailed information about each interacting entry, such as TBC, CAS number, PubChem CID, source of compound (e.g., green, black), compound type, target gene(s) of TBC, gene symbol, gene ID, ENSEMBL ID, PDB ID, TBC bioactivity and the reference. Using the TBC-target associations, we constructed a bipartite network and provided users the global network and local sub-network visualization and topological analyses. The entire database is free for online browsing, searching and downloading. In addition, TBC2target provides a BLAST search function to facilitate use of the database. The particular strengths of TBC2target are the inclusion of the comprehensive TBC-target interactions, and the capacity to visualize and analyze the interacting networks, which may help uncovering the beneficial effects of tea on human health as a central resource in tea health community.",TBC2target,0.991942734,NA,0,TBC2target,0.991942734,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/22/2018 +32882008,http://tbdb.io,"TBDB: a database of structurally annotated T-box riboswitch:tRNA pairs. T-box riboswitches constitute a large family of tRNA-binding leader sequences that play a central role in gene regulation in many gram-positive bacteria. Accurate inference of the tRNA binding to T-box riboswitches is critical to predict their cis-regulatory activity. However, there is no central repository of information on the tRNA binding specificities of T-box riboswitches, and de novo prediction of binding specificities requires advanced knowledge of computational tools to annotate riboswitch secondary structure features. Here, we present the T-box Riboswitch Annotation Database (TBDB, https://tbdb.io), an open-access database with a collection of 23,535 T-box riboswitch sequences, spanning the major phyla of 3,632 bacterial species. Among structural predictions, the TBDB also identifies specifier sequences, cognate tRNA binding partners, and downstream regulatory targets. To our knowledge, the TBDB presents the largest collection of feature, sequence, and structural annotations carried out on this important family of regulatory RNA.",TBDB,0.995206952,T-box Riboswitch Annotation Database,0.944775608,TBDB,0.995206952,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29297316,http://icg.nsc.ru/TBEVHostDB,"A database of human genes and a gene network involved in response to tick-borne encephalitis virus infection. BACKGROUND:Tick-borne encephalitis is caused by the neurotropic, positive-sense RNA virus, tick-borne encephalitis virus (TBEV). TBEV infection can lead to a variety of clinical manifestations ranging from slight fever to severe neurological illness. Very little is known about genetic factors predisposing to severe forms of disease caused by TBEV. The aims of the study were to compile a catalog of human genes involved in response to TBEV infection and to rank genes from the catalog based on the number of neighbors in the network of pairwise interactions involving these genes and TBEV RNA or proteins. RESULTS:Based on manual review and curation of scientific publications a catalog comprising 140 human genes involved in response to TBEV infection was developed. To provide access to data on all genes, the TBEVhostDB web resource ( http://icg.nsc.ru/TBEVHostDB/ ) was created. We reconstructed a network formed by pairwise interactions between TBEV virion itself, viral RNA and viral proteins and 140 genes/proteins from TBEVHostDB. Genes were ranked according to the number of interactions in the network. Two genes/proteins (CCR5 and IFNAR1) that had maximal number of interactions were revealed. It was found that the subnetworks formed by CCR5 and IFNAR1 and their neighbors were a fragments of two key pathways functioning during the course of tick-borne encephalitis: (1) the attenuation of interferon-I signaling pathway by the TBEV NS5 protein that targeted peptidase D; (2) proinflammation and tissue damage pathway triggered by chemokine receptor CCR5 interacting with CD4, CCL3, CCL4, CCL2. Among nine genes associated with severe forms of TBEV infection, three genes/proteins (CCR5, IL10, ARID1B) were found to have protein-protein interactions within the network, and two genes/proteins (IFNL3 and the IL10, that was just mentioned) were up- or down-regulated in response to TBEV infection. Based on this finding, potential mechanisms for participation of CCR5, IL10, ARID1B, and IFNL3 in the host response to TBEV infection were suggested. CONCLUSIONS:A database comprising 140 human genes involved in response to TBEV infection was compiled and the TBEVHostDB web resource, providing access to all genes was created. This is the first effort of integrating and unifying data on genetic factors that may predispose to severe forms of diseases caused by TBEV. The TBEVHostDB could potentially be used for assessment of risk factors for severe forms of tick-borne encephalitis and for the design of personalized pharmacological strategies for the treatment of TBEV infection.",TBEVhostDB,0.947500706,NA,0,TBEVhostDB,0.947500706,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/28/2017 +31838261,http://TBPortals.niaid.nih.gov,"Comparative analysis of genomic variability for drug-resistant strains of Mycobacterium tuberculosis: The special case of Belarus. Mycobacterium tuberculosis (M.tb) is the leading cause of death from an infectious disease. Drug resistant tuberculosis (DR-TB) threatens to exacerbate challenges in diagnostics and treatment. It is important to monitor strains circulating in countries with heavy burden of DR-TB, to make informed decisions about treatment, and because in these countries there is an elevated probability that DR-TB may advance to the totally drug resistant form. The TB Portals Program (TBPP, https://TBPortals.niaid.nih.gov) formed a global network of participating institutions and hospitals collecting and analyzing de-identified clinical, imaging and socioeconomic data, augmenting these with genomic sequencing results. TB Portals database includes complete M.tb genomes, with the information about spoligotypes, strains, and genomic variants related to drug resistance. Within the framework of TB Portals, we created Data Exploration Portal (DEPOT), to facilitate visualization and statistical analysis of user-defined cohorts from the entire TB Portals database. A continuing TB Portals research objective is to actively monitor and examine genomic variability that may account for observed differences in DR-TB incident rates and/or difficulties with diagnosis and treatment. Our analysis identified that several genomic variants implicated in drug resistance or improved fitness of the pathogen, were significantly more frequent in M.tb strains circulating in Belarus in comparison with other countries. Further studies are necessary to reveal whether the corresponding genomic variants may explain unusually high burden of drug-resistant M.tb in Belarus and suggest improvements for diagnostic and drug therapies.",TBPP,0.786532521,NA,0,TBPP,0.786532521,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/12/2019 +24408216,http://genome.igib.res.in/tbvar,"tbvar: A comprehensive genome variation resource for Mycobacterium tuberculosis. Mycobacterium tuberculosis, along with closely related species, commonly known as M. tuberculosis complex (MTBC), causes tuberculosis in humans and other organisms. Tuberculosis is a disease with high morbidity and mortality, especially in the third world. The genetic variability between clinical isolates of MTBC has been poorly understood, although recent years have seen the re-sequencing of a large number of clinical isolates of MTBC from around the world. The availability of genomic data of multiple isolates in public domain would potentially offer a unique opportunity toward understanding the variome of the organism and the functional consequences of the variations. This nevertheless has been limited by the lack of systematic curation and analysis of data sets available in public domain. In this report, we have re-analyzed re-sequencing data sets corresponding to >450 isolates of MTBC available in public domain to create a comprehensive variome map of MTBC comprising >29 000 single nucleotide variations. Using a systematic computational pipeline, we have annotated potential functional variants and drug-resistance-associated variants from the variome. We have made available this data set as a searchable database. Apart from a user-friendly interface, the database also has a novel option to annotate variants from clinical re-sequencing data sets of MTBC. To the best of our knowledge, tbvar is the largest and most comprehensive genome variation resources for MTBC. Database URL: http://genome.igib.res.in/tbvar/",tbvar,0.995915771,NA,0,tbvar,0.995915771,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/9/2014 +30053266,http://tc3a.org,"TC3A: The Cancer 3' UTR Atlas. Widespread alternative polyadenylation (APA) occurs during enhanced cellular proliferation and transformation. Recently, we demonstrated that CFIm25-mediated 3' UTR shortening through APA promotes glioblastoma tumor growth in vitro and in vivo, further underscoring its significance to tumorigenesis. Here, we report The Cancer 3' UTR Atlas (TC3A), a comprehensive resource of APA usage for 10,537 tumors across 32 cancer types. These APA events represent potentially novel prognostic biomarkers and may uncover novel mechanisms for the regulation of cancer driver genes. TC3A is built on top of the now de facto standard cBioPortal. Therefore, the large community of existing cBioPortal users and clinical researchers will find TC3A familiar and immediately usable. TC3A is currently fully functional and freely available at http://tc3a.org.",TC3A,0.990756714,UTR Atlas,0.958735685,TC3A,0.990756714,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +27168721,http://tccr.unmc.edu,"Thyroid Cancer and Tumor Collaborative Registry (TCCR). A multicenter, web-based Thyroid Cancer and Tumor Collaborative Registry (TCCR, http://tccr.unmc.edu) allows for the collection and management of various data on thyroid cancer (TC) and thyroid nodule (TN) patients. The TCCR is coupled with OpenSpecimen, an open-source biobank management system, to annotate biospecimens obtained from the TCCR subjects. The demographic, lifestyle, physical activity, dietary habits, family history, medical history, and quality of life data are provided and may be entered into the registry by subjects. Information on diagnosis, treatment, and outcome is entered by the clinical personnel. The TCCR uses advanced technical and organizational practices, such as (i) metadata-driven software architecture (design); (ii) modern standards and best practices for data sharing and interoperability (standardization); (iii) Agile methodology (project management); (iv) Software as a Service (SaaS) as a software distribution model (operation); and (v) the confederation principle as a business model (governance). This allowed us to create a secure, reliable, user-friendly, and self-sustainable system for TC and TN data collection and management that is compatible with various end-user devices and easily adaptable to a rapidly changing environment. Currently, the TCCR contains data on 2,261 subjects and data on more than 28,000 biospecimens. Data and biological samples collected by the TCCR are used in developing diagnostic, prevention, treatment, and survivorship strategies against TC.",TCCR,0.975648627,Thyroid Cancer and Tumor Collaborative Registry,0.948505716,TCCR,0.975648627,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/3/2016 +"24225317, 26546518",http://www.tcdb.org,"The transporter classification database. The Transporter Classification Database (TCDB; http://www.tcdb.org) serves as a common reference point for transport protein research. The database contains more than 10,000 non-redundant proteins that represent all currently recognized families of transmembrane molecular transport systems. Proteins in TCDB are organized in a five level hierarchical system, where the first two levels are the class and subclass, the second two are the family and subfamily, and the last one is the transport system. Superfamilies that contain multiple families are included as hyperlinks to the five tier TC hierarchy. TCDB includes proteins from all types of living organisms and is the only transporter classification system that is both universal and recognized by the International Union of Biochemistry and Molecular Biology. It has been expanded by manual curation, contains extensive text descriptions providing structural, functional, mechanistic and evolutionary information, is supported by unique software and is interconnected to many other relevant databases. TCDB is of increasing usefulness to the international scientific community and can serve as a model for the expansion of database technologies. This manuscript describes an update of the database descriptions previously featured in NAR database issues.",TCDB,0.996765494,The Transporter Classification Database,0.959318292,TCDB,0.996765494,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/5/2015 +28086803,http://labs.oicr.on.ca/boutros-lab/tcdd-transcriptomics,"Compendium of TCDD-mediated transcriptomic response datasets in mammalian model systems. Background 2,3,7,8-tetrachlorodibenzo-p-dioxin (TCDD) is the most potent congener of the dioxin class of environmental contaminants. Exposure to TCDD causes a wide range of toxic outcomes, ranging from chloracne to acute lethality. The severity of toxicity is highly dependent on the aryl hydrocarbon receptor (AHR). Binding of TCDD to the AHR leads to changes in transcription of numerous genes. Studies evaluating the transcriptional changes brought on by TCDD may provide valuable insight into the role of the AHR in human health and disease. We therefore compiled a collection of transcriptomic datasets that can be used to aid the scientific community in better understanding the transcriptional effects of ligand-activated AHR. Results Specifically, we have created a datasets package - TCDD.Transcriptomics - for the R statistical environment, consisting of 63 unique experiments comprising 377 samples, including various combinations of 3 species (human derived cell lines, mouse and rat), 4 tissue types (liver, kidney, white adipose tissue and hypothalamus) and a wide range of TCDD exposure times and doses. These datasets have been fully standardized using consistent preprocessing and annotation packages (available as of September 14, 2015). To demonstrate the utility of this R package, a subset of ""AHR-core"" genes were evaluated across the included datasets. Ahrr, Nqo1 and members of the Cyp family were significantly induced following exposure to TCDD across the studies as expected while Aldh3a1 was induced specifically in rat liver. Inmt was altered only in liver tissue and primarily by rat-AHR. Conclusions Analysis of the ""AHR-core"" genes demonstrates a continued need for studies surrounding the impact of AHR-activity on the transcriptome; genes believed to be consistently regulated by ligand-activated AHR show surprisingly little overlap across species and tissues. Until now, a comprehensive assessment of the transcriptome across these studies was challenging due to differences in array platforms, processing methods and annotation versions. We believe that this package, which is freely available for download ( http://labs.oicr.on.ca/boutros-lab/tcdd-transcriptomics ) will prove to be a highly beneficial resource to the scientific community evaluating the effects of TCDD exposure as well as the variety of functions of the AHR.",TCDD.Transcriptomics,0.805303514,NA,0,TCDD.Transcriptomics,0.805303514,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,1/13/2017 +31015229,http://tcea.tmu.edu.tw,"The Cancer Editome Atlas: A Resource for Exploratory Analysis of the Adenosine-to-Inosine RNA Editome in Cancer. Increasing evidence has suggested a role for adenosine-to-inosine RNA editing in carcinogenesis. However, the clinical utility of RNA editing remains limited because functions of the vast majority of editing events remain largely unexplored. To help the cancer research community investigate functional consequences of individual editing events, we have developed a user-friendly bioinformatic resource, The Cancer Editome Atlas (TCEA; http://tcea.tmu.edu.tw). TCEA characterizes >192 million editing events at >4.6 million editing sites from approximately 11,000 samples across 33 cancer types in The Cancer Genome Atlas. Clinical information, miRNA expression, and alteration in miRNA targeting modulated through RNA editing are also integrated into TCEA. TCEA supports several modules to search, analyze, and visualize the cancer editome, providing a solid basis for investigating the oncogenic mechanisms of RNA editing and expediting the identification of therapeutic targets in cancer. SIGNIFICANCE: This user-friendly bioinformatic resource reduces the barrier to analyzing the huge and complex cancer RNA editome that cancer researchers face and facilitates the identification of novel therapeutic targets in cancer.",TCEA,0.980338752,The Cancer Editome Atlas,0.965569031,TCEA,0.980338752,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/23/2019 +34273956,http://viroblast.pungentdb.org.cn/TCM-Blast/viroblast.php,"TCM-Blast for traditional Chinese medicine genome alignment with integrated resources. The traditional Chinese medicine (TCM) genome project aims to reveal the genetic information and regulatory network of herbal medicines, and to clarify their molecular mechanisms in the prevention and treatment of human diseases. Moreover, the TCM genome could provide the basis for the discovery of the functional genes of active ingredients in TCM, and for the breeding and improvement of TCM. The traditional Chinese Medicine Basic Local Alignment Search Tool (TCM-Blast) is a web interface for TCM protein and DNA sequence similarity searches. It contains approximately 40G of genome data on TCMs, including protein and DNA sequence for 36 TCMs with high medical value.The development of a publicly accessible TCM genome alignment database hosted on the TCM-Blast website ( http://viroblast.pungentdb.org.cn/TCM-Blast/viroblast.php ) has expanded to query multiple sequence databases to obtain TCM genome data, and provide user-friendly output for easy analysis and browsing of BLAST results. The genome sequencing of TCMs helps to elucidate the biosynthetic pathways of important secondary metabolites and provides an essential resource for gene discovery studies and molecular breeding. The TCMs genome provides a valuable resource for the investigation of novel bioactive compounds and drugs from these TCMs under the guidance of TCM clinical practice. Our database could be expanded to other TCMs after the determination of their genome data.",TCM-Blast,0.966936181,traditional Chinese Medicine Basic,0.723005056,TCM-Blast,0.966936181,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/17/2021 +28588237,http://mesh.tcm.microbioinformatics.org,"TCM-Mesh: The database and analytical system for network pharmacology analysis for TCM preparations. With the advancement of systems biology research, we have already seen great progress in pharmacology studies, especially in network pharmacology. Network pharmacology has been proven to be effective for establishing the ""compounds-proteins/genes-diseases"" network, and revealing the regulation principles of small molecules in a high-throughput manner, thus would be very effective for the analysis of drug combinations, especially for TCM preparations. In this work, we have proposed the TCM-Mesh system, which records TCM-related information collected from various resources and could serve for network pharmacology analysis for TCM preparations in a high-throughput manner (http://mesh.tcm.microbioinformatics.org/). Currently, the database contains 6,235 herbs, 383,840 compounds, 14,298 genes, 6,204 diseases, 144,723 gene-disease associations, 3,440,231 pairs of gene interactions, 163,221 side effect records and 71 toxic records, and web-based software construct a network between herbs and treated diseases, which will help to understand the underlying mechanisms for TCM preparations at molecular levels. We have used 1,293 FDA-approved drugs, as well as compounds from an herbal material Panax ginseng and a patented drug Liuwei Dihuang Wan (LDW) for evaluating our database. By comparison of different databases, as well as checking against literature, we have demonstrated the completeness, effectiveness, and accuracy of our database.",TCM-Mesh,0.989953801,NA,0,TCM-Mesh,0.989953801,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/6/2017 +"23203875, 29106634",http://www.megabionet.org/tcmid,"TCMID: Traditional Chinese Medicine integrative database for herb molecular mechanism analysis. As an alternative to modern western medicine, Traditional Chinese Medicine (TCM) is receiving increasingly attention worldwide. Great efforts have been paid to TCM's modernization, which tries to bridge the gap between TCM and modern western medicine. As TCM and modern western medicine share a common aspect at molecular level that the compound(s) perturb human's dysfunction network and restore human normal physiological condition, the relationship between compounds (in herb, refer to ingredients) and their targets (proteins) should be the key factor to connect TCM and modern medicine. Accordingly, we construct this Traditional Chinese Medicine Integrated Database (TCMID, http://www.megabionet.org/tcmid/), which records TCM-related information collected from different resources and through text-mining method. To enlarge the scope of the TCMID, the data have been linked to common drug and disease databases, including Drugbank, OMIM and PubChem. Currently, our TCMID contains √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº47 000 prescriptions, 8159 herbs, 25 210 compounds, 6828 drugs, 3791 diseases and 17 521 related targets, which is the largest data set for related field. Our web-based software displays a network for integrative relationships between herbs and their treated diseases, the active ingredients and their targets, which will facilitate the study of combination therapy and understanding of the underlying mechanisms for TCM at molecular level.",TCMID,0.992202723,Traditional Chinese Medicine Integrated Database,0.9660478,TCMID,0.992202723,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +32351388,http://tcmio.xielab.net,"TCMIO: A Comprehensive Database of Traditional Chinese Medicine on Immuno-Oncology. Advances in immuno-oncology (IO) are making immunotherapy a powerful tool for cancer treatment. With the discovery of an increasing number of IO targets, many herbs or ingredients from traditional Chinese medicine (TCM) have shown immunomodulatory function and antitumor effects via targeting the immune system. However, knowledge of underlying mechanisms is limited due to the complexity of TCM, which has multiple ingredients acting on multiple targets. To address this issue, we present TCMIO, a comprehensive database of Traditional Chinese Medicine on Immuno-Oncology, which can be used to explore the molecular mechanisms of TCM in modulating the cancer immune microenvironment. Over 120,000 small molecules against 400 IO targets were extracted from public databases and the literature. These ligands were further mapped to the chemical ingredients of TCM to identify herbs that interact with the IO targets. Furthermore, we applied a network inference-based approach to identify the potential IO targets of natural products in TCM. All of these data, along with cheminformatics and bioinformatics tools, were integrated into the publicly accessible database. Chemical structure mining tools are provided to explore the chemical ingredients and ligands against IO targets. Herb-ingredient-target networks can be generated online, and pathway enrichment analysis for TCM or prescription is available. This database is functional for chemical ingredient structure mining and network analysis for TCM. We believe that this database provides a comprehensive resource for further research on the exploration of the mechanisms of TCM in cancer immunity and TCM-inspired identification of novel drug leads for cancer immunotherapy. TCMIO can be publicly accessed at http://tcmio.xielab.net.",TCMIO,0.994452953,NA,0,TCMIO,0.994452953,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/15/2020 +24735618,http://sm.nwsuaf.edu.cn/lsp/tcmsp.php,"TCMSP: a database of systems pharmacology for drug discovery from herbal medicines. Background Modern medicine often clashes with traditional medicine such as Chinese herbal medicine because of the little understanding of the underlying mechanisms of action of the herbs. In an effort to promote integration of both sides and to accelerate the drug discovery from herbal medicines, an efficient systems pharmacology platform that represents ideal information convergence of pharmacochemistry, ADME properties, drug-likeness, drug targets, associated diseases and interaction networks, are urgently needed. Description The traditional Chinese medicine systems pharmacology database and analysis platform (TCMSP) was built based on the framework of systems pharmacology for herbal medicines. It consists of all the 499 Chinese herbs registered in the Chinese pharmacopoeia with 29,384 ingredients, 3,311 targets and 837 associated diseases. Twelve important ADME-related properties like human oral bioavailability, half-life, drug-likeness, Caco-2 permeability, blood-brain barrier and Lipinski's rule of five are provided for drug screening and evaluation. TCMSP also provides drug targets and diseases of each active compound, which can automatically establish the compound-target and target-disease networks that let users view and analyze the drug action mechanisms. It is designed to fuel the development of herbal medicines and to promote integration of modern medicine and traditional medicine for drug discovery and development. Conclusions The particular strengths of TCMSP are the composition of the large number of herbal entries, and the ability to identify drug-target networks and drug-disease networks, which will help revealing the mechanisms of action of Chinese herbs, uncovering the nature of TCM theory and developing new herb-oriented drugs. TCMSP is freely available at http://sm.nwsuaf.edu.cn/lsp/tcmsp.php.",TCMSP,0.992832065,traditional Chinese medicine systems pharmacology database,0.870840984,TCMSP,0.992832065,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/16/2014 +27789689,http://tcofdb.org,"TcoF-DB v2: update of the database of human and mouse transcription co-factors and transcription factor interactions. Transcription factors (TFs) play a pivotal role in transcriptional regulation, making them crucial for cell survival and important biological functions. For the regulation of transcription, interactions of different regulatory proteins known as transcription co-factors (TcoFs) and TFs are essential in forming necessary protein complexes. Although TcoFs themselves do not bind DNA directly, their influence on transcriptional regulation and initiation, although indirect, has been shown to be significant, with the functionality of TFs strongly influenced by the presence of TcoFs. In the TcoF-DB v2 database, we collect information on TcoFs. In this article, we describe updates and improvements implemented in TcoF-DB v2. TcoF-DB v2 provides several new features that enables exploration of the roles of TcoFs. The content of the database has significantly expanded, and is enriched with information from Gene Ontology, biological pathways, diseases and molecular signatures. TcoF-DB v2 now includes many more TFs; has substantially increased the number of human TcoFs to 958, and now includes information on mouse (418 new TcoFs). TcoF-DB v2 enables the exploration of information on TcoFs and allows investigations into their influence on transcriptional regulation in humans and mice. TcoF-DB v2 can be accessed at http://tcofdb.org/.",TcoF-DB,0.943981087,NA,0,TcoF-DB,0.943981087,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/26/2016 +29092939,http://tcpaportal.org,"Explore, Visualize, and Analyze Functional Cancer Proteomic Data Using the Cancer Proteome Atlas. Reverse-phase protein arrays (RPPA) represent a powerful functional proteomic approach to elucidate cancer-related molecular mechanisms and to develop novel cancer therapies. To facilitate community-based investigation of the large-scale protein expression data generated by this platform, we have developed a user-friendly, open-access bioinformatic resource, The Cancer Proteome Atlas (TCPA, http://tcpaportal.org), which contains two separate web applications. The first one focuses on RPPA data of patient tumors, which contains >8,000 samples of 32 cancer types from The Cancer Genome Atlas and other independent patient cohorts. The second application focuses on the RPPA data of cancer cell lines and contains >650 independent cell lines across 19 lineages. Many of these cell lines have publicly available, high-quality DNA, RNA, and drug screening data. TCPA provides various analytic and visualization modules to help cancer researchers explore these datasets and generate testable hypotheses in an effective and intuitive manner. Cancer Res; 77(21); e51-54. √ɬÉ√ǬÇ√ɬÇ√Ǭ©2017 AACR.",TCPA,0.990851581,The Cancer Proteome Atlas,0.966235052,TCPA,0.990851581,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2017 +31240309,http://tcr3d.ibbr.umd.edu,"TCR3d: The T cell receptor structural repertoire database. Summary T cell receptors (TCRs) are critical molecules of the adaptive immune system, capable of recognizing diverse antigens, including peptides, lipids and small molecules, and represent a rapidly growing class of therapeutics. Determining the structural and mechanistic basis of TCR targeting of antigens is a major challenge, as each individual has a vast and diverse repertoire of TCRs. Despite shared general recognition modes, diversity in TCR sequence and recognition represents a challenge to predictive modeling and computational techniques being developed to predict antigen specificity and mechanistic basis of TCR targeting. To this end, we have developed the TCR3d database, a resource containing all known TCR structures, with a particular focus on antigen recognition. TCR3d provides key information on antigen binding mode, interface features, loop sequences and germline gene usage. Users can interactively view TCR complex structures, search sequences of interest against known structures and sequences, and download curated datasets of structurally characterized TCR complexes. This database is updated on a weekly basis, and can serve the community as a centralized resource for those studying T cell receptors and their recognition. Availability and implementation The TCR3d database is available at https://tcr3d.ibbr.umd.edu/.",TCR3d,0.996341427,T cell receptor structural repertoire database,0.668063611,TCR3d,0.996341427,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2019 +33156327,"http://juniper.health.unm.edu/tcrd/, http://pharos.nih.gov","TCRD and Pharos 2021: mining the human proteome for disease biology. In 2014, the National Institutes of Health (NIH) initiated the Illuminating the Druggable Genome (IDG) program to identify and improve our understanding of poorly characterized proteins that can potentially be modulated using small molecules or biologics. Two resources produced from these efforts are: The Target Central Resource Database (TCRD) (http://juniper.health.unm.edu/tcrd/) and Pharos (https://pharos.nih.gov/), a web interface to browse the TCRD. The ultimate goal of these resources is to highlight and facilitate research into currently understudied proteins, by aggregating a multitude of data sources, and ranking targets based on the amount of data available, and presenting data in machine learning ready format. Since the 2017 release, both TCRD and Pharos have produced two major releases, which have incorporated or expanded an additional 25 data sources. Recently incorporated data types include human and viral-human protein-protein interactions, protein-disease and protein-phenotype associations, and drug-induced gene signatures, among others. These aggregated data have enabled us to generate new visualizations and content sections in Pharos, in order to empower users to find new areas of study in the druggable genome.",TCRD,0.885566056,Central Resource Database,0.841031889,TCRD,0.885566056,1,NA,NA,low_prob_best_name,do not remove,NA,NA,TRUE POS: two resources; name and URL of first will be correct; second is lost,NA,NA,1/1/2021 +32990749,http://bioinfo.life.hust.edu.cn/TCRdb,"TCRdb: a comprehensive database for T-cell receptor sequences with powerful search function. T cells and the T-cell receptor (TCR) repertoire play pivotal roles in immune response and immunotherapy. TCR sequencing (TCR-Seq) technology has enabled accurate profiling TCR repertoire and currently a large number of TCR-Seq data are available in public. Based on the urgent need to effectively re-use these data, we developed TCRdb, a comprehensive human TCR sequences database, by a uniform pipeline to characterize TCR sequences on TCR-Seq data. TCRdb contains more than 277 million highly reliable TCR sequences from over 8265√ɬÉ√ǬÇ√ɬÇ√Ǭ†TCR-Seq samples across hundreds of tissues/clinical conditions/cell types. The unique features of TCRdb include: (i) comprehensive and reliable sequences for TCR repertoire in different samples generated by a strict and uniform pipeline of TCRdb; (ii) powerful search function, allowing users to identify their interested TCR sequences in different conditions; (iii) categorized sample metadata, enabling comparison of TCRs in different sample types; (iv) interactive data visualization charts, describing the TCR repertoire in TCR diversity, length distribution and V-J gene utilization. The TCRdb database is freely available at http://bioinfo.life.hust.edu.cn/TCRdb/ and will be a useful resource in the research and application community of T cell immunology.",TCRdb,0.997715533,NA,0,TCRdb,0.997715533,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +"22116064, 31680154",http://tdrtargets.org,"TDR Targets: a chemogenomics resource for neglected diseases. The TDR Targets Database (http://tdrtargets.org) has been designed and developed as an online resource to facilitate the rapid identification and prioritization of molecular targets for drug development, focusing on pathogens responsible for neglected human diseases. The database integrates pathogen specific genomic information with functional data (e.g. expression, phylogeny, essentiality) for genes collected from various sources, including literature curation. This information can be browsed and queried using an extensive web interface with functionalities for combining, saving, exporting and sharing the query results. Target genes can be ranked and prioritized using numerical weights assigned to the criteria used for querying. In this report we describe recent updates to the TDR Targets database, including the addition of new genomes (specifically helminths), and integration of chemical structure, property and bioactivity information for biological ligands, drugs and inhibitors and cheminformatic tools for querying and visualizing these chemical data. These changes greatly facilitate exploration of linkages (both known and predicted) between genes and small molecules, yielding insight into whether particular proteins may be druggable, effectively allowing the navigation of chemical space in a genomics context.",TDR Targets,0.930215985,NA,0,TDR Targets,0.930215985,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +34154643,http://tehub.org,"TE Hub: A community-oriented space for sharing and connecting tools, data, resources, and methods for transposable element annotation. Transposable elements (TEs) play powerful and varied evolutionary and functional roles, and are widespread in most eukaryotic genomes. Research into their unique biology has driven the creation of a large collection of databases, software, classification systems, and annotation guidelines. The diversity of available TE-related methods and resources raises compatibility concerns and can be overwhelming to researchers and communicators seeking straightforward guidance or materials. To address these challenges, we have initiated a new resource, TE Hub, that provides a space where members of the TE community can collaborate to document and create resources and methods. The space consists of (1) a website organized with an open wiki framework,√ɬÉ√ǬÇ√ɬÇ√Ǭ† https://tehub.org , (2) a conversation framework via a Twitter account and a Slack channel, and (3) bi-monthly Hub Update video chats on the platform's development. In addition to serving as a centralized repository and communication platform, TE Hub lays the foundation for improved integration, standardization, and effectiveness of diverse tools and protocols. We invite the TE community, both novices and experts in TE identification and analysis, to join us in expanding our community-oriented resource.",TE Hub,0.932434916,NA,0,TE Hub,0.932434916,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/21/2021 +32248093,http://daooshee.github.io/TE141K,"TE141K: Artistic Text Benchmark for Text Effect Transfer. Text effects are combinations of visual elements such as outlines, colors and textures of text, which can dramatically improve its artistry. Although text effects are extensively utilized in the design industry, they are usually created by human experts due to their extreme complexity; this is laborious and not practical for normal users. In recent years, some efforts have been made toward automatic text effect transfer; however, the lack of data limits the capabilities of transfer models. To address this problem, we introduce a new text effects dataset, TE141K1 1.Project page: https://daooshee.github.io/TE141K/. with 141,081 text effect/glyph pairs in total. Our dataset consists of 152 professionally designed text effects rendered on glyphs, including English letters, Chinese characters, and Arabic numerals. To the best of our knowledge, this is the largest dataset for text effect transfer to date. Based on this dataset, we propose a baseline approach called text effect transfer GAN (TET-GAN), which supports the transfer of all 152 styles in one model and can efficiently extend to new styles. Finally, we conduct a comprehensive comparison in which 14 style transfer models are benchmarked. Experimental results demonstrate the superiority of TET-GAN both qualitatively and quantitatively and indicate that our dataset is effective and challenging.",TE141K1,0.987350821,Text Benchmark for,0.68329291,TE141K1,0.987350821,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/2/2021 +25224438,http://pcsb.ahau.edu.cn:8080/TCDB/index.jsp,"TMDB: a literature-curated database for small molecular compounds found from tea. Background Tea is one of the most consumed beverages worldwide. The healthy effects of tea are attributed to a wealthy of different chemical components from tea. Thousands of studies on the chemical constituents of tea had been reported. However, data from these individual reports have not been collected into a single database. The lack of a curated database of related information limits research in this field, and thus a cohesive database system should necessarily be constructed for data deposit and further application. Description The Tea Metabolome database (TMDB), a manually curated and web-accessible database, was developed to provide detailed, searchable descriptions of small molecular compounds found in Camellia spp. esp. in the plant Camellia sinensis and compounds in its manufactured products (different kinds of tea infusion). TMDB is currently the most complete and comprehensive curated collection of tea compounds data in the world. It contains records for more than 1393 constituents found in tea with information gathered from 364 published books, journal articles, and electronic databases. It also contains experimental 1H NMR and 13C NMR data collected from the purified reference compounds or collected from other database resources such as HMDB. TMDB interface allows users to retrieve tea compounds entries by keyword search using compound name, formula, occurrence, and CAS register number. Each entry in the TMDB contains an average of 24 separate data fields including its original plant species, compound structure, formula, molecular weight, name, CAS registry number, compound types, compound uses including healthy benefits, reference literatures, NMR, MS data, and the corresponding ID from databases such as HMDB and Pubmed. Users can also contribute novel regulatory entries by using a web-based submission page. The TMDB database is freely accessible from the URL of http://pcsb.ahau.edu.cn:8080/TCDB/index.jsp. The TMDB is designed to address the broad needs of tea biochemists, natural products chemists, nutritionists, and members of tea related research community. Conclusion The TMDB database provides a solid platform for collection, standardization, and searching of compounds information found in tea. As such this database will be a comprehensive repository for tea biochemistry and tea health research community.",TMDB,0.973765627,Tea Metabolome database,0.978793991,Tea Metabolome database,0.978793991,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/16/2014 +34154536,http://www.teaas.cn/index.php,"TeaAS: a comprehensive database for alternative splicing in tea plants (Camellia sinensis). Alternative splicing (AS) increases the diversity of transcripts and proteins through the selection of different splice sites and plays an important role in the growth, development and stress tolerance of plants. With the release of the reference genome of the tea plant (Camellia sinensis) and the development of transcriptome sequencing, researchers have reported the existence of AS in tea plants. However, there is a lack of a platform, centered on different RNA-seq datasets, that provides comprehensive information on AS.To facilitate access to information on AS and reveal the molecular function of AS in tea plants, we established the first comprehensive AS database for tea plants (TeaAS, http://www.teaas.cn/index.php ). In this study, 3.96√ɬÉ√ǬÇ√ɬÇ√Ǭ†Tb reads from 66 different RNA-seq datasets were collected to identify AS events. TeaAS supports four methods of retrieval of AS information based on gene ID, gene name, annotation (non-redundant/Kyoto encyclopedia of genes and genomes/gene ontology annotation or chromosomal location) and RNA-seq data. It integrates data pertaining to genome annotation, type of AS event, transcript sequence, and isoforms expression levels from 66 RNA-seq datasets. The AS events resulting from different environmental conditions and that occurring in varied tissue types, and the expression levels of specific transcripts can be clearly identified through this online database. Moreover, it also provides two useful tools, Basic Local Alignment Search Tool and Generic Genome Browser, for sequence alignment and visualization of gene structure.The features of the TeaAS database make it a comprehensive AS bioinformatics platform for researchers, as well as a reference for studying AS events in woody crops. It could also be helpful for revealing the novel biological functions of AS in gene regulation in tea plants.",TeaAS,0.994054417,plants,0.769802034,TeaAS,0.994054417,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/21/2021 +32620074,http://teacon.wchoda.com,"TeaCoN: a database of gene co-expression network for tea plant (Camellia sinensis). Background Tea plant (Camellia sinensis) is one of the world's most important beverage crops due to its numerous secondary metabolites conferring tea quality and health effects. However, only a small fraction of tea genes (especially√ɬÉ√ǬÇ√ɬÇ√Ǭ†for those metabolite-related genes) have been functionally characterized to date. A cohesive bioinformatics platform is thus urgently needed to aid in the functional determination of the remaining genes. Description TeaCoN, a database of gene co-expression network for tea plant, was established to provide genome-wide associations in gene co-expression to survey gene modules (i.e., co-expressed gene sets) for a function of interest. TeaCoN featured a comprehensive collection of 261 high-quality RNA-Seq experiments that covered a wide range of tea tissues as well as various treatments for tea plant. In the current version of TeaCoN, 31,968 (94% coverage of the genome) tea gene models were documented. Users can retrieve detailed co-expression information for gene(s) of interest in four aspects: 1) co-expressed genes with the corresponding Pearson correlation coefficients (PCC-values) and statistical P-values, 2) gene information (gene ID, description, symbol,√ɬÉ√ǬÇ√ɬÇ√Ǭ†alias, chromosomal location, GO and KEGG annotation), 3) expression profile heatmap of co-expressed genes across seven main tea tissues (e.g., leaf, bud, stem, root), and 4) network visualization of co-expressed genes. We also implemented a gene co-expression analysis, BLAST search function, GO and KEGG enrichment analysis, and genome browser to facilitate use of the database. Conclusion The TeaCoN project can serve as a beneficial platform for candidate gene screening and functional exploration of important agronomical traits in tea plant. TeaCoN is freely available at http://teacon.wchoda.com .",TeaCoN,0.997057378,NA,0,TeaCoN,0.997057378,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/3/2020 +32159215,http://indianteagenome.in:8080/teamid,"TeaMiD: a comprehensive database of simple sequence repeat markers of tea. . Tea is a highly cross-pollinated, woody, perennial tree. High heterozygosity combined with a long gestational period makes conventional breeding a cumbersome process. Therefore, marker-assisted breeding is a better alternative approach when compared with conventional breeding. Considering the large genome size of tea (~3√ɬÉ√ǬÇ√ɬÇ√Ǭ†Gb), information about simple sequence repeat (SSR) is scanty. Thus, we have taken advantage of the recently published tea genomes to identify large numbers of SSR markers in the tea. Besides the genomic sequences, we identified SSRs from the other publicly available sequences such as RNA-seq, GSS, ESTs and organelle genomes (chloroplasts and mitochondrial) and also searched published literature to catalog validated set of tea SSR markers. The complete exercise yielded a total of 935√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ547 SSRs. Out of the total, 82 SSRs were selected for validation among a diverse set of tea genotypes. Six primers (each with four to six alleles, an average of five alleles per locus) out of the total 27 polymorphic primers were used for a diversity analysis in 36 tea genotypes with mean polymorphic information content of 0.61-0.76. Finally, using all the information generated in this study, we have developed a user-friendly database (TeaMiD; http://indianteagenome.in:8080/teamid/) that hosts SSR from all the six resources including three nuclear genomes of tea and transcriptome sequences of 17 Camellia wild species. Database URL: http://indianteagenome.in:8080/teamid/.",TeaMiD,0.993912682,NA,0,TeaMiD,0.993912682,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +34000890,http://telemedicine.cimt.dk,"The hospital telemedicine TELEMED database: Providing information on evidence-based telemedicine services to hospital managers and healthcare professionals. Background Increased use of telemedicine in the healthcare system is a political goal in Denmark. Although the number of hospital patients using interventions such as the video consultation has increased in recent years only a small proportion of the outpatient and inpatient visits involve telemedicine. The TELEMED database (https://telemedicine.cimt.dk/) has been launched at the Center for Innovative Medical Technology in Denmark to ensure that hospital managers and healthcare professionals have access to information about telemedicine services and their effectiveness. This article describes the development and the content of the TELEMED database. Methods A structured literature search was made in the PubMed Database for randomised controlled trials or observational studies with a control group that investigated the effect of telemedicine interventions for hospital patients. Data were extracted from each article on the clinical effectiveness, patient perceptions, economic effects and implementation challenges. As the database should only provide inspiration to healthcare professionals regarding possibilities for use of telemedicine, the risk of bias in the studies was not assessed. Results The literature search resulted in 2825 hits. Based on full text assessment, 331 articles were included for data extraction and assessment. These articles present telemedicine services used in 22 different medical specialities. Forty-eight percent of the studies found a positive, statistically significant clinical effect, while 47% showed no statistically significant difference. In 48% of the studies, patients' experiences were examined and of these 68% found positive patient experiences. Fifty-four percent of the articles included information on the economic effects and, of these, 51% found reduction in healthcare utilization. In the majority of studies between two and four types of implementation challenges were found.Conclusions and recommendations: The TELEMED database provides an easily accessible overview of existing evidence-based telemedicine services for use by hospital managers and health professionals, who whish to to implement telemedicine. The database is freely available and expected to be continuously improved and broadened over time.",TELEMED,0.973653436,telemedicine TELEMED,0.615303091,TELEMED,0.973653436,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/18/2021 +29776332,http://www.cancertelsys.org/telnet,"TelNet - a database for human and yeast genes involved in telomere maintenance. Background The ends of linear chromosomes, the telomeres, comprise repetitive DNA sequences in complex with proteins that protects them from being processed by the DNA repair machinery. Cancer cells need to counteract the shortening of telomere repeats during replication for their unlimited proliferation by reactivating the reverse transcriptase telomerase or by using the alternative lengthening of telomeres (ALT) pathway. The different telomere maintenance (TM) mechanisms appear to involve hundreds of proteins but their telomere repeat length related activities are only partly understood. Currently, a database that integrates information on TM relevant genes is missing. Description To provide a resource for studies that dissect TM features, we here introduce the TelNet database at http://www.cancertelsys.org/telnet/ . It offers a comprehensive compilation of more than 2000 human and 1100 yeast genes linked to telomere maintenance. These genes were annotated in terms of TM mechanism, associated specific functions and orthologous genes, a TM significance score and information from peer-reviewed literature. This TM information can be retrieved via different search and view modes and evaluated for a set of genes as demonstrated for an exemplary application. Conclusion TelNet supports the annotation of genes identified from bioinformatics analysis pipelines to reveal possible connections with TM networks. We anticipate that TelNet will be a helpful resource for researchers that study telomeres.",TelNet,0.987919092,NA,0,TelNet,0.987919092,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/18/2018 +25792605,http://songyanglab.sysu.edu.cn/telopin,"TeloPIN: a database of telomeric proteins interaction network in mammalian cells. . Interaction network surrounding telomeres has been intensively studied during the past two decades. However, no specific resource by integrating telomere interaction information data is currently available. To facilitate the understanding of the molecular interaction network by which telomeres are associated with biological process and diseases, we have developed TeloPIN (Telomeric Proteins Interaction Network) database (http://songyanglab.sysu.edu.cn/telopin/), a novel database that points to provide comprehensive information on protein-protein, protein-DNA and protein-RNA interaction of telomeres. TeloPIN database contains four types of interaction data, including (i) protein--protein interaction (PPI) data, (ii) telomeric proteins ChIP-seq data, (iii) telomere-associated proteins data and (iv) telomeric repeat-containing RNAs (TERRA)-interacting proteins data. By analyzing these four types of interaction data, we found that 358 and 199 proteins have more than one type of interaction information in human and mouse cells, respectively. We also developed table browser and TeloChIP genome browser to help researchers with better integrated visualization of interaction data from different studies. The current release of TeloPIN database includes 1111 PPI, eight telomeric protein ChIP-seq data sets, 1391 telomere-associated proteins and 183 TERRA-interacting proteins from 92 independent studies in mammalian cells. The interaction information provided by TeloPIN database will greatly expand our knowledge of telomeric proteins interaction network.",TeloPIN,0.992713809,Telomeric Proteins Interaction Network,0.977430391,TeloPIN,0.992713809,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/18/2015 +32727974,http://togodb.org/db/tempura,"TEMPURA: Database of Growth TEMPeratures of Usual and RAre Prokaryotes. . Growth temperature is one of the most representative biological parameters for characterizing living organisms. Prokaryotes have been isolated from various temperature environments and show wide diversity in their growth temperatures. We herein constructed a database of growth TEMPeratures of Usual and RAre prokaryotes (TEMPURA, http://togodb.org/db/tempura), which contains the minimum, optimum, and maximum growth temperatures of 8,639 prokaryotic strains. Growth temperature information is linked with taxonomy IDs, phylogenies, and genomic information. TEMPURA provides useful information to researchers working on biotechnological applications of extremophiles and their biomolecules as well as those performing fundamental studies on the physiological diversity of prokaryotes.",TEMPURA,0.997982502,growth TEMPeratures of Usual and RAre prokaryotes,0.942272703,TEMPURA,0.997982502,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +26578693,http://tenor.dna.affrc.go.jp,"TENOR: Database for Comprehensive mRNA-Seq Experiments in Rice. Here we present TENOR (Transcriptome ENcyclopedia Of Rice, http://tenor.dna.affrc.go.jp), a database that encompasses large-scale mRNA sequencing (mRNA-Seq) data obtained from rice under a wide variety of conditions. Since the elucidation of the ability of plants to adapt to various growing conditions is a key issue in plant sciences, it is of great interest to understand the regulatory networks of genes responsible for environmental changes. We used mRNA-Seq and performed a time-course transcriptome analysis of rice, Oryza sativa L. (cv. Nipponbare), under 10 abiotic stress conditions (high salinity; high and low phosphate; high, low and extremely low cadmium; drought; osmotic; cold; and flood) and two plant hormone treatment conditions (ABA and jasmonic acid). A large number of genes that were responsive to abiotic stresses and plant hormones were detected by differential expression analysis. Furthermore, several responsive genes were found to encode transcription factors that could control the transcriptional network of stress responses, but the timing of the induction of these genes was not uniform across conditions. A significant number of cis-regulatory elements were enriched in the promoter regions of the responsive genes and were shared among conditions. These data suggest that some key components of gene regulation networks are shared between different stress signaling pathways. All the resources (novel genes identified from mRNA-Seq data, expression profiles, co-expressed genes and cis-regulatory elements) can be searched for and are available in TENOR.",TENOR,0.997442484,Transcriptome ENcyclopedia Of Rice,0.994375892,TENOR,0.997442484,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/16/2015 +33311384,http://www.tenuipalpidae.ibilce.unesp.br,"A newly available database of an important family of phytophagous mites: Tenuipalpidae Database. This paper announces a database on the taxonomy, distribution and host plants of mites of the family Tenuipalpidae Berlese (Acari: Tetranychoidea), available online at https://www.tenuipalpidae.ibilce.unesp.br/. In the Tenuipalpidae Database the recorded world distribution and range of host plants are provided for each tenuipalpid species, including synonyms, with a list of all relevant publications.",Tenuipalpidae,0.925811052,NA,0,Tenuipalpidae,0.925811052,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/29/2020 +30052772,http://tse.idies.jhu.edu,"The Terabase Search Engine: a large-scale relational database of short-read sequences. Motivation DNA sequencing archives have grown to enormous scales in recent years, and thousands of human genomes have already been sequenced. The size of these data sets has made searching the raw read data infeasible without high-performance data-query technology. Additionally, it is challenging to search a repository of short-read data using relational logic and to apply that logic across samples from multiple whole-genome sequencing samples. Results We have built a compact, efficiently-indexed database that contains the raw read data for over 250 human genomes, encompassing trillions of bases of DNA, and that allows users to search these data in real-time. The Terabase Search Engine enables retrieval from this database of all the reads for any genomic location in a matter of seconds. Users can search using a range of positions or a specific sequence that is aligned to the genome on the fly. Availability and implementation Public access to the Terabase Search Engine database is available at http://tse.idies.jhu.edu. Supplementary information Supplementary data are available at Bioinformatics online.",Terabase,0.869710565,NA,0,Terabase,0.869710565,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/1/2019 +32286817,http://terokit.qmclab.com,"TeroKit: A Database-Driven Web Server for Terpenome Research. Natural products are the major resource of drug discovery, and terpenoids represent the largest family of natural products. Terpenome is defined as all terpenoid-like and terpenoid-derived natural compounds, including the terpenoids, steroids, and their derivatives. Herein, aiming to navigate the chemical and biological space of terpenome, the first comprehensive database dedicated to terpenome research has been developed by collecting over 110√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ000 terpenome molecules from various resources, distributed in 14√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ351 species, belonging to 1109 families, and showing activity against 1366 biological targets. Much of the publically available information or computationally predicted properties for each terpenome molecule is annotated and integrated into TeroKit (http://terokit.qmclab.com/), serving as free Web server for academic use. Moreover, several practical toolkits, such as target profiling and conformer generation modules, are also implemented to facilitate the drug discovery of terpenome.",TeroKit,0.997192562,NA,0,TeroKit,0.997192562,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/20/2020 +31728526,http://webapp.ufz.de/tmdb,"TerrestrialMetagenomeDB: a public repository of curated and standardized metadata for terrestrial metagenomes. Microbiome studies focused on the genetic potential of microbial communities (metagenomics) became standard within microbial ecology. MG-RAST and the Sequence Read Archive (SRA), the two main metagenome repositories, contain over 202 858 public available metagenomes and this number has increased exponentially. However, mining databases can be challenging due to misannotated, misleading and decentralized data. The main goal of TerrestrialMetagenomeDB is to make it easier for scientists to find terrestrial metagenomes of interest that could be compared with novel datasets in meta-analyses. We defined terrestrial metagenomes as those that do not belong to marine environments. Further, we curated the database using text mining to assign potential descriptive keywords that better contextualize environmental aspects of terrestrial metagenomes, such as biomes and materials. TerrestrialMetagenomeDB release 1.0 includes 15 022 terrestrial metagenomes from SRA and MG-RAST. Together, the downloadable data amounts to 68 Tbp. In total, 199 terrestrial terms were divided into 14 categories. These metagenomes span 83 countries, 30 biomes and 7 main source materials. The TerrestrialMetagenomeDB is publicly available at https://webapp.ufz.de/tmdb.",TerrestrialMetagenomeDB,0.959326208,NA,0,TerrestrialMetagenomeDB,0.959326208,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +23482072,http://tfgd.ihb.ac.cn,"Tetrahymena functional genomics database (TetraFGD): an integrated resource for Tetrahymena functional genomics. The ciliated protozoan Tetrahymena thermophila is a useful unicellular model organism for studies of eukaryotic cellular and molecular biology. Researches on T. thermophila have contributed to a series of remarkable basic biological principles. After the macronuclear genome was sequenced, substantial progress has been made in functional genomics research on T. thermophila, including genome-wide microarray analysis of the T. thermophila life cycle, a T. thermophila gene network analysis based on the microarray data and transcriptome analysis by deep RNA sequencing. To meet the growing demands for the Tetrahymena research community, we integrated these data to provide a public access database: Tetrahymena functional genomics database (TetraFGD). TetraFGD contains three major resources, including the RNA-Seq transcriptome, microarray and gene networks. The RNA-Seq data define gene structures and transcriptome, with special emphasis on exon-intron boundaries; the microarray data describe gene expression of 20 time points during three major stages of the T. thermophila life cycle; the gene network data identify potential gene-gene interactions of 15 049 genes. The TetraFGD provides user-friendly search functions that assist researchers in accessing gene models, transcripts, gene expression data and gene-gene relationships. In conclusion, the TetraFGD is an important functional genomic resource for researchers who focus on the Tetrahymena or other ciliates. Database URL: http://tfgd.ihb.ac.cn/",TetraFGD,0.996410549,Tetrahymena functional genomics database,0.951898682,TetraFGD,0.996410549,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/12/2013 +30810209,http://ciliate.ihb.ac.cn,"Tetrahymena Comparative Genomics Database (TCGD): a community resource for Tetrahymena. . Ciliates are a large and diverse group of unicellular organisms characterized by having the following two distinct type of nuclei within a single cell: micronucleus (MIC) and macronucleus (MAC). Although the genomes of several ciliates in different groups have been sequenced, comparative genomics data for multiple species within a ciliate genus are not yet available. Here we collected the genome information and comparative genomics analysis results for 10 species in the Tetrahymena genus, including the previously sequenced model organism Tetrahymena thermophila and 9 newly sequenced species, and constructed a genus-level comparative analysis platform, the Tetrahymena Comparative Genomics Database (TCGD). Genome sequences, transcriptomic data, gene models, functional annotation, ortholog groups and synteny maps were built into this database and a user-friendly interface was developed for searching, visualizing and analyzing these data. In summary, the TCGD (http://ciliate.ihb.ac.cn) will be an important and useful resource for the ciliate research community.",TCGD,0.902827978,Tetrahymena Comparative Genomics Database,0.969320416,Tetrahymena Comparative Genomics Database,0.969320416,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +34534667,http://cb.imsc.res.in/texas,"Network biology approach to human tissue-specific chemical exposome. Human exposure to environmental chemicals is a major contributor to the global disease burden. To characterize the external exposome it is important to assess its chemical components and to study their impact on human health. Biomonitoring studies measure the body burden of environmental chemicals detected in biospecimens from a wide range of the population. The detection of these chemicals in biospecimens (and, hence, human tissues) is considered an important biomarker of human exposure. However, there is no readily available resource that compiles such exposure data for human tissues from published literature, and no studies that explore the patterns in the associations between tissue-specific exposures and human diseases. We present Human Tissue-specific Exposome Atlas (TExAs), a compilation of 380 environmental chemicals detected across 27 human tissues. TExAs is accessible via a user friendly webserver: https://cb.imsc.res.in/texas. We compare the chemicals in TExAs with 55 global chemical regulations, guidelines, and inventories, which represent several categories of the external exposome of humans. Further to understand the potential implications on human health of chemicals detected across human tissues, we employ a network biology approach and explore possible chemical exposure-disease associations. Ensuing analyses reveal the possibilities of disease comorbidities and demonstrate the application of network biology in unraveling complex disease associations due to chemical exposure.",TExAs,0.994255507,Human Tissue-specific Exposome Atlas,0.91863317,TExAs,0.994255507,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/15/2021 +27899608,http://tfbsbank.co.uk,"TFBSbank: a platform to dissect the big data of protein-DNA interaction in human and model species. Genome-wide transcription factors (TFs) binding data has been extensively generated in the past few years, which poses a great challenge to data interpretation. Therefore, comprehensive and dedicated functional annotation databases for TF-DNA interaction are in great demands to manage, explore and utilize those invaluable data resources. Here, we constructed a platform 'TFBSbank' which houses the annotation of 1870 chromatin immunoprecipitation (ChIP) datasets of 585 TFs in five species (human, mouse, fly, worm and yeast). There are mainly five functional modules in TFBSbank aimed at characterizing ChIP peaks, identifying putative targets, predicting TF responsive enhancers, revealing potential cofactors/collaborators and discovering enriched TF motifs. TFBSbank has two distinctive features compared to the existing databases. Firstly, we provided putative cofactors/collaborators analysis (for Drosophila melanogaster), as they are crucial for the in vivo functions of TFs. Additionally, this database predicted the enrichment of both known and de novo motifs based on ChIP data. TFBSbank is freely accessible at http://tfbsbank.co.uk.",TFBSbank,0.99741137,NA,0,TFBSbank,0.99741137,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +31665425,http://tfbsshape.usc.edu,"TFBSshape: an expanded motif database for DNA shape features of transcription factor binding sites. TFBSshape (https://tfbsshape.usc.edu) is a motif database for analyzing structural profiles of transcription factor binding sites (TFBSs). The main rationale for this database is to be able to derive mechanistic insights in protein-DNA readout modes from sequencing data without available structures. We extended the quantity and dimensionality of TFBSshape, from mostly in vitro to in vivo binding and from unmethylated to methylated DNA. This new release of TFBSshape improves its functionality and launches a responsive and user-friendly web interface for easy access to the data. The current expansion includes new entries from the most recent collections of transcription factors (TFs) from the JASPAR and UniPROBE databases, methylated TFBSs derived from in vitro high-throughput EpiSELEX-seq binding assays and in vivo methylated TFBSs from the MeDReaders database. TFBSshape content has increased to 2428 structural profiles for 1900 TFs from 39 different species. The structural profiles for each TFBS entry now include 13 shape features and minor groove electrostatic potential for standard DNA and four shape features for methylated DNA. We improved the flexibility and accuracy for the shape-based alignment of TFBSs and designed new tools to compare methylated and unmethylated structural profiles of TFs and methods to derive DNA shape-preserving nucleotide mutations in TFBSs.",TFBSshape,0.996661127,NA,0,TFBSshape,0.996661127,1,NA,24214955,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2020 +24214955,http://rohslab.cmb.usc.edu/TFBSshape,"TFBSshape: a motif database for DNA shape features of transcription factor binding sites. Transcription factor binding sites (TFBSs) are most commonly characterized by the nucleotide preferences at each position of the DNA target. Whereas these sequence motifs are quite accurate descriptions of DNA binding specificities of transcription factors (TFs), proteins recognize DNA as a three-dimensional object. DNA structural features refine the description of TF binding specificities and provide mechanistic insights into protein-DNA recognition. Existing motif databases contain extensive nucleotide sequences identified in binding experiments based on their selection by a TF. To utilize DNA shape information when analysing the DNA binding specificities of TFs, we developed a new tool, the TFBSshape database (available at http://rohslab.cmb.usc.edu/TFBSshape/), for calculating DNA structural features from nucleotide sequences provided by motif databases. The TFBSshape database can be used to generate heat maps and quantitative data for DNA structural features (i.e., minor groove width, roll, propeller twist and helix twist) for 739 TF datasets from 23 different species derived from the motif databases JASPAR and UniPROBE. As demonstrated for the basic helix-loop-helix and homeodomain TF families, our TFBSshape database can be used to compare, qualitatively and quantitatively, the DNA binding specificities of closely related TFs and, thus, uncover differential DNA binding specificities that are not apparent from nucleotide sequence alone.",TFBSshape,0.972820997,NA,0,TFBSshape,0.972820997,1,NA,31665425,low_prob_best_name,do not remove,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/7/2013 +"34037703, 34113986",http://lcbb.swjtu.edu.cn/tfcancer,"TFcancer: a manually curated database of transcription factors associated with human cancer. . Transcription factors (TFs) are critical regulation elements and its dysregulation can lead to a variety of cancers. However, currently, there are no such online resources for large-scale collection, storage and analysis of TF-cancer associations in those cancers. To fill this gap, we present a database called TFcancer (http://lcbb.swjtu.edu.cn/tfcancer/), which contains 3,136 experimentally supported associations between 364 TFs and 33 TCGA cancers by manually curating more than 1,800 literature. TFcancer mainly concentrates on four aspects: TF expression, molecular alteration, regulatory relationships between TFs and target genes, and biological processes and signaling pathways of TFs in cancers. TFcancer not only provides a user-friendly interface for browsing and searching but also allows flexible data downloading and user data submitting. It is believed that TFcancer is a helpful and valuable resource for researchers who seek to understand the functions and molecular mechanisms of TFs involved in human cancers. The TFcancer are freely available at http://lcbb.swjtu.edu.cn/tfcancer/. Supplementary data are available at Bioinformatics online.",TFcancer,0.99685061,NA,0,TFcancer,0.99685061,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/26/2021 +"23180794, 29087517",http://tfclass.bioinf.med.uni-goettingen.de,"TFClass: an expandable hierarchical classification of human transcription factors. TFClass (http://tfclass.bioinf.med.uni-goettingen.de/) provides a comprehensive classification of human transcription factors based on their DNA-binding domains. Transcription factors constitute a large functional family of proteins directly regulating the activity of genes. Most of them are sequence-specific DNA-binding proteins, thus reading out the information encoded in cis-regulatory DNA elements of promoters, enhancers and other regulatory regions of a genome. TFClass is a database that classifies human transcription factors by a six-level classification schema, four of which are abstractions according to different criteria, while the fifth level represents TF genes and the sixth individual gene products. Altogether, nine superclasses have been identified, comprising 40 classes and 111 families. Counted by genes, 1558 human TFs have been classified so far or >2900 different TFs when including their isoforms generated by alternative splicing or protein processing events. With this classification, we hope to provide a basis for deciphering protein-DNA recognition codes; moreover, it can be used for constructing expanded transcriptional networks by inferring additional TF-target gene relations.",TFClass,0.997942924,NA,0,TFClass,0.997942924,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +25053252,http://grassius.org,"The Maize TFome--development of a transcription factor open reading frame collection for functional genomics. Establishing the architecture of the gene regulatory networks (GRNs) responsible for controlling the transcription of all genes in an organism is a natural development that follows elucidation of the genome sequence. Reconstruction of the GRN requires the availability of a series of molecular tools and resources that so far have been limited to a few model organisms. One such resource consists of collections of transcription factor (TF) open reading frames (ORFs) cloned into vectors that facilitate easy expression in plants or microorganisms. In this study, we describe the development of a publicly available maize TF ORF collection (TFome) of 2034 clones corresponding to 2017 unique gene models in recombination-ready vectors that make possible the facile mobilization of the TF sequences into a number of different expression vectors. The collection also includes several hundred co-regulators (CoREGs), which we classified into well-defined families, and for which we propose here a standard nomenclature, as we have previously done for TFs. We describe the strategies employed to overcome the limitations associated with cloning ORFs from a genome that remains incompletely annotated, with a partial full-length cDNA set available, and with many TF/CoREG genes lacking experimental support. In many instances this required the combination of genome-wide expression data with gene synthesis approaches. The strategies developed will be valuable for developing similar resources for other agriculturally important plants. Information on all the clones generated is available through the GRASSIUS knowledgebase (http://grassius.org/).",TFome,0.858155866,TF ORF collection,0.775202051,TFome,0.858155866,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: CLASS,NA,NA,8/26/2014 +22434841,http://ciliate.org,"Tetrahymena Genome Database Wiki: a community-maintained model organism database. When funding for Tetrahymena Genome Database (TGD) ended in 2006, no further updates were made to this important community resource and the main database was taken offline in 2008. We have restored and updated this important resource for use by the Tetrahymena research community. We have also retooled the TGD website (now TGD Wiki) to allow members of the community to directly update the information presented for each gene, including gene names, descriptions and Gene Ontology annotations, from a web browser. Maintenance of genome annotations by the authors generating and publishing primary data, rather than dedicated scientific curators, is a viable alternative for the upkeep of genomes, particularly for organisms with smaller research communities. By combining simple, intuitive displays with the powerful search functions made possible by its underlying relational database, TGD Wiki has been designed to maximize participation by bench scientists in the development of their community bioinformatics resource. DATABASE URL: http://ciliate.org.",TGD,0.996270736,Tetrahymena Genome Database,0.989497207,TGD,0.996270736,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/20/2012 +21253873,http://tged.ihb.ac.cn,"Tetrahymena Gene Expression Database (TGED): a resource of microarray data and co-expression analyses for Tetrahymena. Tetrahymena thermophila is a model eukaryotic organism. Functional genomic analyses in Tetrahymena present rich opportunities to address fundamental questions of cell and molecular biology. The Tetrahymena Gene Expression Database (TGED; available at http://tged.ihb.ac.cn) is the first expression database of a ciliated protozoan. It covers three major physiological and developmental states: growth, starvation, and conjugation, and can be accessed through a user-friendly web interface. The gene expression profiles and candidate co-expressed genes for each gene can be retrieved using Gene ID or Gene description searches. Descriptions of standardized methods of sample preparation and the opportunity to add new Tetrahymena microarray data will be of great interest to the Tetrahymena research community. TGED is intended to be a resource for all members of the scientific research community who are interested in Tetrahymena and other ciliates.",TGED,0.988756279,Tetrahymena Gene Expression Database,0.987045392,TGED,0.988756279,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/21/2011 +24466070,http://59.163.192.91/tomato2,"Tomato genomic resources database: an integrated repository of useful tomato genomic information for basic and applied research. Tomato Genomic Resources Database (TGRD) allows interactive browsing of tomato genes, micro RNAs, simple sequence repeats (SSRs), important quantitative trait loci and Tomato-EXPEN 2000 genetic map altogether or separately along twelve chromosomes of tomato in a single window. The database is created using sequence of the cultivar Heinz 1706. High quality single nucleotide polymorphic (SNP) sites between the genes of Heinz 1706 and the wild tomato S. pimpinellifolium LA1589 are also included. Genes are classified into different families. 5'-upstream sequences (5'-US) of all the genes and their tissue-specific expression profiles are provided. Sequences of the microRNA loci and their putative target genes are catalogued. Genes and 5'-US show presence of SSRs and SNPs. SSRs located in the genomic, genic and 5'-US can be analysed separately for the presence of any particular motif. Primer sequences for all the SSRs and flanking sequences for all the genic SNPs have been provided. TGRD is a user-friendly web-accessible relational database and uses CMAP viewer for graphical scanning of all the features. Integration and graphical presentation of important genomic information will facilitate better and easier use of tomato genome. TGRD can be accessed as an open source repository at http://59.163.192.91/tomato2/.",TGRD,0.996569216,Tomato Genomic Resources Database,0.988132167,TGRD,0.996569216,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/21/2014 +32829394,http://psd.uohyd.ac.in/tgv,"Reanalysis of genome sequences of tomato accessions and its wild relatives: development of Tomato Genomic Variation (TGV) database integrating SNPs and INDELs polymorphisms. Motivation Facilitated by technological advances and expeditious decrease in the sequencing costs, whole-genome sequencing is increasingly implemented to uncover variations in cultivars/accessions of many crop plants. In tomato (Solanum lycopersicum), the availability of the genome sequence, followed by the resequencing of tomato cultivars and its wild relatives, has provided a prodigious resource for the improvement of traits. A high-quality genome resequencing of 84 tomato accessions and wild relatives generated a dataset that can be used as a resource to identify agronomically important alleles across the genome. Converting this dataset into a searchable database, including information about the influence of single-nucleotide polymorphisms (SNPs) on protein function, provides valuable information about the genetic variations. The database will assist in searching for functional variants of a gene for introgression into tomato cultivars. Results A recent release of better-quality tomato genome reference assembly SL3.0, and new annotation ITAG3.2 of SL3.0, dropped 3857 genes, added 4900 novel genes and updated 20√ɬÉ√ǬÇ√ɬÇ√Ǭ†766 genes. Using the above version, we remapped the data from the tomato lines resequenced under the '100 tomato genome resequencing project' on new tomato genome assembly SL3.0 and made an online searchable Tomato Genomic Variations (TGVs) database. The TGV contains information about SNPs and insertion/deletion events and expands it by functional annotation of variants with new ITAG3.2 using SIFT4G software. This database with search function assists in inferring the influence of SNPs on the function of a target gene. This database can be used for selecting SNPs, which can be potentially deployed for improving tomato traits. Availability and implementation TGV is freely available at http://psd.uohyd.ac.in/tgv.",TGV,0.91157198,Genomic,0.657844663,TGV,0.91157198,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2020 +28013278,"http://apps.araport.org/thalemine/, http://www.araport.org","ThaleMine: A Warehouse for Arabidopsis Data Integration and Discovery. ThaleMine (https://apps.araport.org/thalemine/) is a comprehensive data warehouse that integrates a wide array of genomic information of the model plant Arabidopsis thaliana. The data collection currently includes the latest structural and functional annotation from the Araport11 update, the Col-0 genome sequence, RNA-seq and array expression, co-expression, protein interactions, homologs, pathways, publications, alleles, germplasm and phenotypes. The data are collected from a wide variety of public resources. Users can browse gene-specific data through Gene Report pages, identify and create gene lists based on experiments or indexed keywords, and run GO enrichment analysis to investigate the biological significance of selected gene sets. Developed by the Arabidopsis Information Portal project (Araport, https://www.araport.org/), ThaleMine uses the InterMine software framework, which builds well-structured data, and provides powerful data query and analysis functionality. The warehoused data can be accessed by users via graphical interfaces, as well as programmatically via web-services. Here we describe recent developments in ThaleMine including new features and extensions, and discuss future improvements. InterMine has been broadly adopted by the model organism research community including nematode, rat, mouse, zebrafish, budding yeast, the modENCODE project, as well as being used for human data. ThaleMine is the first InterMine developed for a plant model. As additional new plant InterMines are developed by the legume and other plant research communities, the potential of cross-organism integrative data analysis will be further enabled.",ThaleMine,0.986150086,NA,0,ThaleMine,0.986150086,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +21520336,http://ccg.murdoch.edu.au/thalind,"ThalInd, a √ɬÉ√Ǭé√ɬÇ√Ǭ≤-thalassemia and hemoglobinopathies database for India: defining a model country-specific and disease-centric bioinformatics resource. Web-based informatics resources for genetic disorders have evolved from genome-wide databases like OMIM and HGMD to Locus Specific databases (LSDBs) and National and Ethnic Mutation Databases (NEMDBs). However, with the increasing amenability of genetic disorders to diagnosis and better management, many previously underreported conditions are emerging as disorders of public health significance. In turn, the greater emphasis on noncommunicable disorders has generated a demand for comprehensive and relevant disease-based information from end-users, including clinicians, patients, genetic epidemiologists, health administrators and policymakers. To accommodate these demands, country-specific and disease-centric resources are required to complement the existing LSDBs and NEMDBs. Currently available preconfigured Web-based software applications can be customized for this purpose. The present article describes the formulation and construction of a Web-based informatics resource for √ɬÉ√Ǭé√ɬÇ√Ǭ≤-thalassemia and other hemoglobinopathies, initially for use in India, a multiethnic, multireligious country with a population approaching 1,200 million. The resource ThalInd (http://ccg.murdoch.edu.au/thalind) has been created using the LOVD system, an open source platform-independent database system. The system has been customized to incorporate and accommodate data pertinent to molecular genetics, population genetics, genotype-phenotype correlations, disease burden, and infrastructural assessment. Importantly, the resource also has been aligned with the administrative health system and demographic resources of the country.",ThalInd,0.982127786,NA,0,ThalInd,0.982127786,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/23/2011 +29157087,http://thanatos.biocuckoo.org,"THANATOS: an integrative data resource of proteins and post-translational modifications in the regulation of autophagy. Macroautophagy/autophagy is a highly conserved process for degrading cytoplasmic contents, determines cell survival or death, and regulates the cellular homeostasis. Besides ATG proteins, numerous regulators together with various post-translational modifications (PTMs) are also involved in autophagy. In this work, we collected 4,237 experimentally identified proteins regulated in autophagy and cell death pathways from the literature. Then we computationally identified potential orthologs of known proteins, and developed a comprehensive database of The Autophagy, Necrosis, ApopTosis OrchestratorS (THANATOS, http://thanatos.biocuckoo.org ), containing 191,543 proteins potentially associated with autophagy and cell death pathways in 164 eukaryotes. We performed an evolutionary analysis of ATG genes, and observed that ATGs required for the autophagosome formation are highly conserved across eukaryotes. Further analyses revealed that known cancer genes and drug targets were overrepresented in human autophagy proteins, which were significantly associated in a number of signaling pathways and human diseases. By reconstructing a human kinase-substrate phosphorylation network for ATG proteins, our results confirmed that phosphorylation play a critical role in regulating autophagy. In total, we mapped 65,015 known sites of 11 types of PTMs to collected proteins, and revealed that all types of PTM substrates were enriched in human autophagy. In addition, we observed multiple types of PTM regulators such as protein kinases and ubiquitin E3 ligases or adaptors were significantly associated with human autophagy, and again the results emphasized the importance of PTM regulations in autophagy. We anticipated THANATOS can be a useful resource for further studies.",THANATOS,0.99623239,Necrosis,0.979467452,THANATOS,0.99623239,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +27905880,http://pachterlab.github.io/lair,"The Lair: a resource for exploratory analysis of published RNA-Seq data. Increased emphasis on reproducibility of published research in the last few years has led to the large-scale archiving of sequencing data. While this data can, in theory, be used to reproduce results in papers, it is difficult to use in practice. We introduce a series of tools for processing and analyzing RNA-Seq data in the Sequence Read Archive, that together have allowed us to build an easily extendable resource for analysis of data underlying published papers. Our system makes the exploration of data easily accessible and usable without technical expertise. Our database and associated tools can be accessed at The Lair: http://pachterlab.github.io/lair .",The,0.761562884,NA,0,The,0.761562884,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,12/1/2016 +22102572,http://psb.kobic.re.kr/STAP/refinement,"STAP Refinement of the NMR database: a database of 2405 refined solution NMR structures. According to several studies, some nuclear magnetic resonance (NMR) structures are of lower quality, less reliable and less suitable for structural analysis than high-resolution X-ray crystallographic structures. We present a public database of 2405 refined NMR solution structures [statistical torsion angle potentials (STAP) refinement of the NMR database, http://psb.kobic.re.kr/STAP/refinement] from the Protein Data Bank (PDB). A simulated annealing protocol was employed to obtain refined structures with target potentials, including the newly developed STAP. The refined database was extensively analysed using various quality indicators from several assessment programs to determine the nuclear Overhauser effect (NOE) completeness, Ramachandran appearance, √ɬÉ√Ǭè√ɬÇ√Ǭá(1)-√ɬÉ√Ǭè√ɬÇ√Ǭá(2) rotamer normality, various parameters for protein stability and other indicators. Most quality indicators are improved in our protocol mainly due to the inclusion of the newly developed knowledge-based potentials. This database can be used by the NMR structure community for further development of research and validation tools, structure-related studies and modelling in many fields of research.",NA,0,the,0.630721748,the,0.630721748,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,11/18/2011 +27173523,http://crrd.mcw.edu,"The Chinchilla Research Resource Database: resource for an otolaryngology disease model. . The long-tailed chinchilla (Chinchilla lanigera) is an established animal model for diseases of the inner and middle ear, among others. In particular, chinchilla is commonly used to study diseases involving viral and bacterial pathogens and polymicrobial infections of the upper respiratory tract and the ear, such as otitis media. The value of the chinchilla as a model for human diseases prompted the sequencing of its genome in 2012 and the more recent development of the Chinchilla Research Resource Database (http://crrd.mcw.edu) to provide investigators with easy access to relevant datasets and software tools to enhance their research. The Chinchilla Research Resource Database contains a complete catalog of genes for chinchilla and, for comparative purposes, human. Chinchilla genes can be viewed in the context of their genomic scaffold positions using the JBrowse genome browser. In contrast to the corresponding records at NCBI, individual gene reports at CRRD include functional annotations for Disease, Gene Ontology (GO) Biological Process, GO Molecular Function, GO Cellular Component and Pathway assigned to chinchilla genes based on annotations from the corresponding human orthologs. Data can be retrieved via keyword and gene-specific searches. Lists of genes with similar functional attributes can be assembled by leveraging the hierarchical structure of the Disease, GO and Pathway vocabularies through the Ontology Search and Browser tool. Such lists can then be further analyzed for commonalities using the Gene Annotator (GA) Tool. All data in the Chinchilla Research Resource Database is freely accessible and downloadable via the CRRD FTP site or using the download functions available in the search and analysis tools. The Chinchilla Research Resource Database is a rich resource for researchers using, or considering the use of, chinchilla as a model for human disease.Database URL: http://crrd.mcw.edu.",CRRD,0.799337149,The Chinchilla Research Resource Database,0.876183919,The Chinchilla Research Resource Database,0.876183919,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/12/2016 +21265623,http://era.bfs.de,"The European radiobiological archives: online access to data from radiobiological experiments. For financial and ethical reasons, the large-scale radiobiological animal studies conducted over the past 50 years are, to a large extent, unrepeatable experiments. It is therefore important to retain the primary data from these experiments to allow reanalysis, reinterpretation and re-evaluation of results from, for example, carcinogenicity studies, in the light of new knowledge in radiation biology. Consequently, there is an imperative need to keep these data available for the research community. The European Radiobiological Archives (ERA) were developed to fulfill this task. ERA has become a unique archive, including information from almost all European long-term studies carried out between the 1960s and the 1990s. The legacy database was originally developed in a manner that precluded online use. Therefore, strong efforts were made to transform it into a version that is available online through the web. This went together with quality assurance measures, including first the estimation of the rate of non-systematic errors in data entry, which at 2% proved to be very low. Second, every data set was compared against two external sources of information. Standardization of terminology and histopathology is a prerequisite for meaningful comparison of data across studies and analysis of potential carcinogenic effects. Standardization is particularly critical for the construction of a database that includes data from different studies evaluated by pathologists in different laboratories. A harmonized pathology nomenclature with modern standard pathology terms was introduced. As far as possible, references for the various studies were directly linked to the studies themselves. Further, a direct link to the JANUS database was established. ERA is now in a position where it has the potential to become a worldwide radiobiological research tool. ERA can be accessed at no cost at https://era.bfs.de. An ID and password can be obtained from the curators at era@bfs.de .",NA,0,The European Radiobiological Archives,0.877208258,The European Radiobiological Archives,0.877208258,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/25/2011 +26896848,http://mgfd.ahau.edu.cn,"MGFD: the maize gene families database. . Most gene families are transcription factor (TF) families, which have fundamental roles in almost all biological processes (development, growth and response to environmental factors) and have been employed to manipulate various types of metabolic, developmental and stress response pathways in plants. Maize (Zea mays) is one of the most important cereal crops in the world due its importance to human nutrition and health. Thus, identifying and annotating all the gene families in maize is an important primary step in defining their functions and understanding their roles in the regulation of diverse biological processes. In this study, we identified 96 predicted maize gene families and systematically characterized all 5826 of the genes in those families. We have also developed a comprehensive database of maize gene families (the MGFD). To further explore the functions of these gene families, we extensively annotated the genes, including such basic information as protein sequence features, gene structure, Gene Ontology classifications, phylogenetic relationships and expression profiles. The MGFD has a user-friendly web interface with multiple browse and search functions, as well as data downloading. The MGFD is freely available to users at http://mgfd.ahau.edu.cn/. Database URL: http://mgfd.ahau.edu.cn/.",MGFD,0.702373147,the maize gene families database,0.861386001,the maize gene families database,0.861386001,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/20/2016 +26123534,http://www.sanger.ac.uk/resources/mouse/genomes,"The Mouse Genomes Project: a repository of inbred laboratory mouse strain genomes. The Mouse Genomes Project was initiated in 2009 with the goal of using next-generation sequencing technologies to catalogue molecular variation in the common laboratory mouse strains, and a selected set of wild-derived inbred strains. The initial sequencing and survey of sequence variation in 17 inbred strains was completed in 2011 and included comprehensive catalogue of single nucleotide polymorphisms, short insertion/deletions, larger structural variants including their fine scale architecture and landscape of transposable element variation, and genomic sites subject to post-transcriptional alteration of RNA. From this beginning, the resource has expanded significantly to include 36 fully sequenced inbred laboratory mouse strains, a refined and updated data processing pipeline, and new variation querying and data visualisation tools which are available on the project's website ( http://www.sanger.ac.uk/resources/mouse/genomes/ ). The focus of the project is now the completion of de novo assembled chromosome sequences and strain-specific gene structures for the core strains. We discuss how the assembled chromosomes will power comparative analysis, data access tools and future directions of mouse genetics.",NA,0,The Mouse Genomes Project,0.571499872,The Mouse Genomes Project,0.571499872,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/30/2015 +32427908,http://yorku.ca/ocdp,"The Ontario Climate Data Portal, a user-friendly portal of Ontario-specific climate projections. An easily accessible climate data portal, http://yorku.ca/ocdp, was developed and officially launched in 2018 to disseminate a super ensemble of high-resolution regional climate change projections for the province of Ontario, Canada. The spatial resolution is ~10√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâkm√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ√ɬÉ√ǬÉ√ɬÇ√Ǭó√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ~10√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâkm and temporal resolution is one day, UTC. The data covers 120 years from 1981 to 2100. This user-friendly portal provides users with thousands of static and interactive maps, decadal variation trend lines, summary tables, reports and terabytes of bias-corrected downscaled data. The data portal was generated with an emphasis on interactive visualization of climate change information for researchers and the public to understand to what extent climate could change locally under different emission scenarios in the future. This paper presents an introduction to the portal structure and functions, the large extent of the datasets available and the data development methodology.",NA,0,The Ontario Climate Data Portal,0.631285894,The Ontario Climate Data Portal,0.631285894,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/19/2020 +26852673,http://thebadb.bioinfo-minzhao.org,"Multi-tissue transcriptomics for construction of a comprehensive gene resource for the terrestrial snail Theba pisana. The land snail Theba pisana is native to the Mediterranean region but has become one of the most abundant invasive species worldwide. Here, we present three transcriptomes of this agriculture pest derived from three tissues: the central nervous system, hepatopancreas (digestive gland), and foot muscle. Sequencing of the three tissues produced 339,479,092 high quality reads and a global de novo assembly generated a total of 250,848 unique transcripts (unigenes). BLAST analysis mapped 52,590 unigenes to NCBI non-redundant protein databases and further functional analysis annotated 21,849 unigenes with gene ontology. We report that T. pisana transcripts have representatives in all functional classes and a comparison of differentially expressed transcripts amongst all three tissues demonstrates enormous differences in their potential metabolic activities. The genes differentially expressed include those with sequence similarity to those genes associated with multiple bacterial diseases and neurological diseases. To provide a valuable resource that will assist functional genomics study, we have implemented a user-friendly web interface, ThebaDB (http://thebadb.bioinfo-minzhao.org/). This online database allows for complex text queries, sequence searches, and data browsing by enriched functional terms and KEGG mapping.",ThebaDB,0.965065837,NA,0,ThebaDB,0.965065837,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/8/2016 +23993102,http://www.broadinstitute.org/ctrp,"An interactive resource to identify cancer genetic and lineage dependencies targeted by small molecules. The high rate of clinical response to protein-kinase-targeting drugs matched to cancer patients with specific genomic alterations has prompted efforts to use cancer cell line (CCL) profiling to identify additional biomarkers of small-molecule sensitivities. We have quantitatively measured the sensitivity of 242 genomically characterized CCLs to an Informer Set of 354 small molecules that target many nodes in cell circuitry, uncovering protein dependencies that: (1) associate with specific cancer-genomic alterations and (2) can be targeted by small molecules. We have created the Cancer Therapeutics Response Portal (http://www.broadinstitute.org/ctrp) to enable users to correlate genetic features to sensitivity in individual lineages and control for confounding factors of CCL profiling. We report a candidate dependency, associating activating mutations in the oncogene √ɬÉ√Ǭé√ɬÇ√Ǭ≤-catenin with sensitivity to the Bcl-2 family antagonist, navitoclax. The resource can be used to develop novel therapeutic hypotheses and to accelerate discovery of drugs matched to patients by their cancer genotype and lineage.",NA,0,Therapeutics,0.518288732,Therapeutics,0.518288732,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,8/1/2013 +33095862,http://biosig.unimelb.edu.au/thermomutdb,"ThermoMutDB: a thermodynamic database for missense mutations. Proteins are intricate, dynamic structures, and small changes in their amino acid sequences can lead to large effects on their folding, stability and dynamics. To facilitate the further development and evaluation of methods to predict these changes, we have developed ThermoMutDB, a manually curated database containing >14,669 experimental data of thermodynamic parameters for wild type and mutant proteins. This represents an increase of 83% in unique mutations over previous databases and includes thermodynamic information on 204 new proteins. During manual curation we have also corrected annotation errors in previously curated entries. Associated with each entry, we have included information on the unfolding Gibbs free energy and melting temperature change, and have associated entries with available experimental structural information. ThermoMutDB supports users to contribute to new data points and programmatic access to the database via a RESTful API. ThermoMutDB is freely available at: http://biosig.unimelb.edu.au/thermomutdb.",ThermoMutDB,0.998117328,NA,0,ThermoMutDB,0.998117328,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +28759605,http://crdd.osdd.net/raghava/thpdb,"THPdb: Database of FDA-approved peptide and protein therapeutics. THPdb (http://crdd.osdd.net/raghava/thpdb/) is a manually curated repository of Food and Drug Administration (FDA) approved therapeutic peptides and proteins. The information in THPdb has been compiled from 985 research publications, 70 patents and other resources like DrugBank. The current version of the database holds a total of 852 entries, providing comprehensive information on 239 US-FDA approved therapeutic peptides and proteins and their 380 drug variants. The information on each peptide and protein includes their sequences, chemical properties, composition, disease area, mode of activity, physical appearance, category or pharmacological class, pharmacodynamics, route of administration, toxicity, target of activity, etc. In addition, we have annotated the structure of most of the protein and peptides. A number of user-friendly tools have been integrated to facilitate easy browsing and data analysis. To assist scientific community, a web interface and mobile App have also been developed.",THPdb,0.998677254,NA,0,THPdb,0.998677254,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/31/2017 +33258964,http://thairicestarch.kku.ac.th,"ThRSDB: a database of Thai rice starch composition, molecular structure and functionality. . As starch properties can affect end product quality in many ways, rice starch from Thai domesticated cultivars and landraces has been the focus of increasing research interest. Increasing knowledge in this area creates a high demand from the research community for better organized information. The Thai Rice Starch Database (ThRSDB) is an online database containing data extensively curated from original research articles on Thai rice starch composition, molecular structure and functionality. The key aim of the ThRSDB is to facilitate accessibility to dispersed rice starch information for, but not limited to, both research and industrial users. Currently, 373 samples from 191 different Thai rice cultivars have been collected from 39 published articles. The ThRSDB includes the search functions necessary for accessing data together with a user-friendly web interface and interactive visualization tools. We have also demonstrated how the collected data can be efficiently used to observe the relationships between starch parameters and rice cultivars through correlation analysis and Partial Least Squares Discriminant Analysis. Database URL: http://thairicestarch.kku.ac.th.",ThRSDB,0.989187896,Thai Rice Starch Database,0.958283556,ThRSDB,0.989187896,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2020 +23515433,http://tiara.gmi.ac.kr,"TIARA genome database: update 2013. The Total Integrated Archive of short-Read and Array (TIARA; http://tiara.gmi.ac.kr) database stores and integrates human genome data generated from multiple technologies including next-generation sequencing and high-resolution comparative genomic hybridization array. The TIARA genome browser is a powerful tool for the analysis of personal genomic information by exploring genomic variants such as SNPs, indels and structural variants simultaneously. As of September 2012, the TIARA database provides raw data and variant information for 13 sequenced whole genomes, 16 sequenced transcriptomes and 33 high resolution array assays. Sequencing reads are available at a depth of ~30√ɬÉ√ǬÉ√ɬÇ√Ǭó for whole genomes and 50√ɬÉ√ǬÉ√ɬÇ√Ǭó for transcriptomes. Information on genomic variants includes a total of ~9.56 million SNPs, 23 025 of which are non-synonymous SNPs, and ~1.19 million indels. In this update, by adding high coverage sequencing of additional human individuals, the TIARA genome database now provides an extensive record of rare variants in humans. Following TIARA's fundamentally integrative approach, new transcriptome sequencing data are matched with whole-genome sequencing data in the genome browser. Users can here observe, for example, the expression levels of human genes with allele-specific quantification. Improvements to the TIARA genome browser include the intuitive display of new complex and large-scale data sets.",TIARA,0.992577295,Total Integrated Archive of short-Read and Array,0.969094998,TIARA,0.992577295,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/20/2013 +24675620,http://www.chengfeng.info/tibs_database.html,"TIBS: a web database to browse gene expression in irritable bowel syndrome. Irritable bowel syndrome (IBS) is a chronic functional gastrointestinal disorder. Its symptoms include chronic abdominal pain, bloating gas, diarrhea and constipation. Many IBS patients also have psychological symptoms like depression or anxiety. These unpleasant symptoms significantly lower patients√ɬÉ√Ǭó√ɬÇ√Ǭ≥ quality of life. The prevalence of IBS in Europe and North America is about 10-15% of the population, which makes IBS a disorder with a high social cost. The pathophysiology of IBS is considered to be multifactorial and the exact cause of the disease remains poorly understood. Recently, a genome-wide expression microarray technique has been applied to investigate the possible mechanisms of IBS. However, a user-friendly database that allows scientists without bioinformatics background to query gene expression levels in these data sets and compare gene expression patterns across different tissues has not yet been established. Therefore, we have integrated four public expression microarray data (320 samples) from the Gene Expression Omnibus (GEO) and ArrayExpress databases into an online database called Transcriptome of Irritable Bowel Syndrome (TIBS). The gene expression change in IBS patients compared to healthy volunteers or UC patients in jejunum, sigmoid colon, rectum, and descending colon can be queried by gene symbols. Users can compare gene expression levels of IBS patients across these tissues. Sex difference of gene expression in IBS patients was also shown in the database. The current version of TIBS database contains 42,400 annotated gene probe sets represented on the Affymetrix Human Genome U133 plus 2.0 platform. TIBS will be an invaluable resource for a better understanding of the pathogenesis of IBS at the molecular level and for drug development. The TIBS database is available online at http://www.chengfeng.info/tibs_database.html.",TIBS,0.980226338,Transcriptome of Irritable Bowel Syndrome,0.862030645,TIBS,0.980226338,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/24/2014 +34244719,http://www.ieom-tm.com/tidb,"TIDB: a comprehensive database of trained immunity. . Trained immunity is a newly emerging concept that defines the ability of the innate immune system to form immune memory and provide long-lasting protection against previously encountered antigens. Accumulating evidence reveals that trained immunity not only has broad benefits to host defense but is also harmful to the host in chronic inflammatory diseases. However, all trained immunity-related information is scattered in the literature and thus is difficult to access. Here, we describe Trained Immunity DataBase (TIDB), a comprehensive database that provides well-studied trained immunity-related genes from human, rat and mouse as well as the related literature evidence. Moreover, TIDB also provides three modules to analyze the function of the trained-immunity-related genes of interest, including Reactome pathway over-representation analysis, Gene Ontology enrichment analysis and protein-protein interaction subnetwork reconstruction. We believe TIDB will help developing valuable strategies for vaccine design and immune-mediated disease therapy. Database URL: http://www.ieom-tm.com/tidb.",TIDB,0.992214262,Trained Immunity DataBase,0.935802007,TIDB,0.992214262,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +33729437,http://isomir.ccr.cancer.gov,"Tumor IsomiR Encyclopedia (TIE): a pancancer database of miRNA isoforms. . MicroRNAs (miRNAs) are master regulators of gene expression in cancers. Their sequence variants or isoforms (isomiRs) are highly abundant and possess unique functions. Given their short sequence length and high heterogeneity, mapping isomiRs can be challenging; without adequate depth and data aggregation, low frequency events are often disregarded. To address these challenges, we present the Tumor IsomiR Encyclopedia (TIE): a dynamic database of isomiRs from over 10,000 adult and pediatric tumor samples in The Cancer Genome Atlas (TCGA) and The Therapeutically Applicable Research to Generate Effective Treatments (TARGET) projects. A key novelty of TIE is its ability to annotate heterogeneous isomiR sequences and aggregate the variants obtained across all datasets. Results can be browsed online or downloaded as spreadsheets. Here we show analysis of isomiRs of miR-21 and miR-30a to demonstrate the utility of TIE. TIE search engine and data is freely available to use at https://isomir.ccr.cancer.gov/.",TIE,0.982593015,Tumor IsomiR Encyclopedia,0.978378109,TIE,0.982593015,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/17/2021 +23197656,"http://www.jcvi.org/tigrfams, http://www.jcvi.org/genome-properties","TIGRFAMs and Genome Properties in 2013. TIGRFAMs, available online at http://www.jcvi.org/tigrfams is a database of protein family definitions. Each entry features a seed alignment of trusted representative sequences, a hidden Markov model (HMM) built from that alignment, cutoff scores that let automated annotation pipelines decide which proteins are members, and annotations for transfer onto member proteins. Most TIGRFAMs models are designated equivalog, meaning they assign a specific name to proteins conserved in function from a common ancestral sequence. Models describing more functionally heterogeneous families are designated subfamily or domain, and assign less specific but more widely applicable annotations. The Genome Properties database, available at http://www.jcvi.org/genome-properties, specifies how computed evidence, including TIGRFAMs HMM results, should be used to judge whether an enzymatic pathway, a protein complex or another type of molecular subsystem is encoded in a genome. TIGRFAMs and Genome Properties content are developed in concert because subsystems reconstruction for large numbers of genomes guides selection of seed alignment sequences and cutoff values during protein family construction. Both databases specialize heavily in bacterial and archaeal subsystems. At present, 4284 models appear in TIGRFAMs, while 628 systems are described by Genome Properties. Content derives both from subsystem discovery work and from biocuration of the scientific literature.",TIGRFAMs,0.994315922,NA,0,TIGRFAMs,0.994315922,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2012 +23766369,http://www-cryst.bioc.cam.ac.uk/timbal,"TIMBAL v2: update of a database holding small molecules modulating protein-protein interactions. TIMBAL is a database holding molecules of molecular weight <1200 Daltons that modulate protein-protein interactions. Since its first release, the database has been extended to cover 50 known protein-protein interactions drug targets, including protein complexes that can be stabilized by small molecules with therapeutic effect. The resource contains 14 890 data points for 6896 distinct small molecules. UniProt codes and Protein Data Bank entries are also included. Database URL: http://www-cryst.bioc.cam.ac.uk/timbal",TIMBAL,0.996844351,NA,0,TIMBAL,0.996844351,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/13/2013 +28387841,http://www.timetree.org,"TimeTree: A Resource for Timelines, Timetrees, and Divergence Times. Evolutionary information on species divergence times is fundamental to studies of biodiversity, development, and disease. Molecular dating has enhanced our understanding of the temporal patterns of species divergences over the last five decades, and the number of studies is increasing quickly due to an exponential growth in the available collection of molecular sequences from diverse species and large number of genes. Our TimeTree resource is a public knowledge-base with the primary focus to make available all species divergence times derived using molecular sequence data to scientists, educators, and the general public in a consistent and accessible format. Here, we report a major expansion of the TimeTree resource, which more than triples the number of species (>97,000) and more than triples the number of studies assembled (>3,000). Furthermore, scientists can access not only the divergence time between two species or higher taxa, but also a timetree of a group of species and a timeline that traces a species' evolution through time. The new timetree and timeline visualizations are integrated with display of events on earth and environmental history over geological time, which will lead to broader and better understanding of the interplay of the change in the biosphere with the diversity of species on Earth. The next generation TimeTree resource is publicly available online at http://www.timetree.org.",TimeTree,0.989791155,NA,0,TimeTree,0.989791155,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2017 +21495663,http://mmg.rcsi.ie:8080/tin,"TIN-a combinatorial compound collection of synthetically feasible multicomponent synthesis products. The synthetic feasibility of any compound library used for virtual screening is critical to the drug discovery process. TIN, a recursive acronym for 'TIN Is Not commercial', is a virtual combinatorial database enumeration of diversity-orientated multicomponent syntheses (MCR). Using a 'one-pot' synthetic technique, 12 unique small molecule scaffolds were developed, predominantly styrylisoxazoles and bis-acetylenic ketones, with extensive derivatization potential. Importantly, the scaffolds were accessible in a single operation from commercially available sources containing R-groups which were then linked combinatorially. This resulted in a combinatorial database of over 28 million product structures, each of which is synthetically feasible. These structures can be accessed through a free Web-based 2D structure search engine or downloaded in SMILES, MOL2, and SDF formats. Subsets include a 10% diversity subset, a drug-like subset, and a lead-like subset that are also freely available for download and virtual screening ( http://mmg.rcsi.ie:8080/tin ).",TIN,0.984829128,NA,0,TIN,0.984829128,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/15/2011 +24930145,http://cwtung.kmu.edu.tw/tipdb,"TIPdb-3D: the three-dimensional structure database of phytochemicals from Taiwan indigenous plants. . The rich indigenous and endemic plants in Taiwan serve as a resourceful bank for biologically active phytochemicals. Based on our TIPdb database curating bioactive phytochemicals from Taiwan indigenous plants, this study presents a three-dimensional (3D) chemical structure database named TIPdb-3D to support the discovery of novel pharmacologically active compounds. The Merck Molecular Force Field (MMFF94) was used to generate 3D structures of phytochemicals in TIPdb. The 3D structures could facilitate the analysis of 3D quantitative structure-activity relationship, the exploration of chemical space and the identification of potential pharmacologically active compounds using protein-ligand docking. Database URL: http://cwtung.kmu.edu.tw/tipdb.",TIPdb,0.994740725,NA,0,TIPdb,0.994740725,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/13/2014 +23698860,http://biocomputing.it/tips,"TiPs: a database of therapeutic targets in pathogens and associated tools. Motivation The need for new drugs and new targets is particularly compelling in an era that is witnessing an alarming increase of drug resistance in human pathogens. The identification of new targets of known drugs is a promising approach, which has proven successful in several cases. Here, we describe a database that includes information on 5153 putative drug-target pairs for 150 human pathogens derived from available drug-target crystallographic complexes. Availability and implementation The TiPs database is freely available at http://biocomputing.it/tips. Contact anna.tramontano@uniroma1.it or allegra.via@uniroma1.it.",TiPs,0.989953816,NA,0,TiPs,0.989953816,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/21/2013 +33179754,http://tisch.comp-genomics.org,"TISCH: a comprehensive web resource enabling interactive single-cell transcriptome visualization of tumor microenvironment. Cancer immunotherapy targeting co-inhibitory pathways by checkpoint blockade shows remarkable efficacy in a variety of cancer types. However, only a minority of patients respond to treatment due to the stochastic heterogeneity of tumor microenvironment (TME). Recent advances in single-cell RNA-seq technologies enabled comprehensive characterization of the immune system heterogeneity in tumors but posed computational challenges on integrating and utilizing the massive published datasets to inform immunotherapy. Here, we present Tumor Immune Single Cell Hub (TISCH, http://tisch.comp-genomics.org), a large-scale curated database that integrates single-cell transcriptomic profiles of nearly 2 million cells from 76 high-quality tumor datasets across 27 cancer types. All the data were uniformly processed with a standardized workflow, including quality control, batch effect removal, clustering, cell-type annotation, malignant cell classification, differential expression analysis and functional enrichment analysis. TISCH provides interactive gene expression visualization across multiple datasets at the single-cell level or cluster level, allowing systematic comparison between different cell-types, patients, tissue origins, treatment and response groups, and even different cancer-types. In summary, TISCH provides a user-friendly interface for systematically visualizing, searching and downloading gene expression atlas in the TME from multiple cancer types, enabling fast, flexible and comprehensive exploration of the TME.",TISCH,0.996230185,Tumor Immune Single Cell Hub,0.991766587,TISCH,0.996230185,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +24203712,http://tisdb.human.cornell.edu,"TISdb: a database for alternative translation initiation in mammalian cells. Proper selection of the translation initiation site (TIS) on mRNAs is crucial for the production of desired protein products. Recent studies using ribosome profiling technology uncovered a surprising variety of potential TIS sites in addition to the annotated start codon. The prevailing alternative translation reshapes the landscape of the proteome in terms of diversity and complexity. To identify the hidden coding potential of the transcriptome in mammalian cells, we developed global translation initiation sequencing (GTI-Seq) that maps genome-wide TIS positions at nearly a single nucleotide resolution. To facilitate studies of alternative translation, we created a database of alternative TIS sites identified from human and mouse cell lines based on multiple GTI-Seq replicates. The TISdb, available at http://tisdb.human.cornell.edu, includes 6991 TIS sites from 4961 human genes and 9973 TIS sites from 5668 mouse genes. The TISdb website provides a simple browser interface for query of high-confidence TIS sites and their associated open reading frames. The output of search results provides a user-friendly visualization of TIS information in the context of transcript isoforms. Together, the information in the database provides an easy reference for alternative translation in mammalian cells and will support future investigation of novel translational products.",TISdb,0.993791044,NA,0,TISdb,0.993791044,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/6/2013 +29036590,http://zhaobioinfo.org/TissGDB,"TissGDB: tissue-specific gene database in cancer. Tissue-specific gene expression is critical in understanding biological processes, physiological conditions, and disease. The identification and appropriate use of tissue-specific genes (TissGenes) will provide important insights into disease mechanisms and organ-specific therapeutic targets. To better understand the tissue-specific features for each cancer type and to advance the discovery of clinically relevant genes or mutations, we built TissGDB (Tissue specific Gene DataBase in cancer) available at http://zhaobioinfo.org/TissGDB. We collected and curated 2461 tissue specific genes (TissGenes) across 22 tissue types that matched the 28 cancer types of The Cancer Genome Atlas (TCGA) from three representative tissue-specific gene expression resources: The Human Protein Atlas (HPA), Tissue-specific Gene Expression and Regulation (TiGER), and Genotype-Tissue Expression (GTEx). For these 2461 TissGenes, we performed gene expression, somatic mutation, and prognostic marker-based analyses across 28 cancer types using TCGA data. Our analyses identified hundreds of TissGenes, including genes that universally kept or lost tissue-specific gene expression, with other features: cancer type-specific isoform expression, fusion with oncogenes or tumor suppressor genes, and markers for protective or risk prognosis. TissGDB provides seven categories of annotations: TissGeneSummary, TissGeneExp, TissGene-miRNA, TissGeneMut, TissGeneNet, TissGeneProg, TissGeneClin.",TissGDB,0.99716121,Tissue specific Gene DataBase in cancer,0.886347724,TissGDB,0.99716121,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +31982380,http://hive.biochemistry.gwu.edu/review/tissue_codon,"TissueCoCoPUTs: Novel Human Tissue-Specific Codon and Codon-Pair Usage Tables√ɬÉ√ǬÇ√ɬÇ√Ǭ†Based on Differential Tissue Gene Expression. Protein expression in multicellular organisms varies widely across tissues. Codon usage in the transcriptome of each tissue is derived from genomic codon usage and the relative expression level of each gene. We created a comprehensive computational resource that houses tissue-specific codon, codon-pair, and dinucleotide usage data for 51 Homo sapiens tissues (TissueCoCoPUTs: https://hive.biochemistry.gwu.edu/review/tissue_codon), using transcriptome data from the Broad Institute Genotype-Tissue Expression (GTEx) portal. Distances between tissue-specific codon and codon-pair frequencies were used to generate a dendrogram based on the unique patterns of codon and codon-pair usage in each tissue that are clearly distinct from the genomic distribution. This novel resource may be useful in unraveling the relationship between codon usage and tRNA abundance, which could be critical in determining translation kinetics and efficiency across tissues. Areas of investigation such as biotherapeutic development, tissue-specific genetic engineering, and genetic disease prediction will greatly benefit from this resource.",TissueCoCoPUTs,0.951715197,NA,0,TissueCoCoPUTs,0.951715197,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/23/2020 +"23193266, 27899616",http://netbio.bgu.ac.il/tissuenet,"The TissueNet database of human tissue protein-protein interactions. Knowledge of protein-protein interactions (PPIs) is important for identifying the functions of proteins and the processes they are involved in. Although data of human PPIs are easily accessible through several public databases, these databases do not specify the human tissues in which these PPIs take place. The TissueNet database of human tissue PPIs (http://netbio.bgu.ac.il/tissuenet/) associates each interaction with human tissues that express both pair mates. This was achieved by integrating current data of experimentally detected PPIs with extensive data of gene and protein expression across 16 main human tissues. Users can query TissueNet using a protein and retrieve its PPI partners per tissue, or using a PPI and retrieve the tissues expressing both pair mates. The graphical representation of the output highlights tissue-specific and tissue-wide PPIs. Thus, TissueNet provides a unique platform for assessing the roles of human proteins and their interactions across tissues.",TissueNet,0.973481178,NA,0,TissueNet,0.973481178,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/29/2016 +29617745,http://tissues.jensenlab.org,"TISSUES 2.0: an integrative web resource on mammalian tissue expression. . Physiological and molecular similarities between organisms make it possible to translate findings from simpler experimental systems√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭîmodel organisms√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭîinto more complex ones, such as human. This translation facilitates the understanding of biological processes under normal or disease conditions. Researchers aiming to identify the similarities and differences between organisms at the molecular level need resources collecting multi-organism tissue expression data. We have developed a database of gene√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭìtissue associations in human, mouse, rat and pig by integrating multiple sources of evidence: transcriptomics covering all four species and proteomics (human only), manually curated and mined from the scientific literature. Through a scoring scheme, these associations are made comparable across all sources of evidence and across organisms. Furthermore, the scoring produces a confidence score assigned to each of the associations. The TISSUES database (version 2.0) is publicly accessible through a user-friendly web interface and as part of the STRING app for Cytoscape. In addition, we analyzed the agreement between datasets, across and within organisms, and identified that the agreement is mainly affected by the quality of the datasets rather than by the technologies used or organisms compared. http://tissues.jensenlab.org/",TISSUES,0.994719326,NA,0,TISSUES,0.994719326,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +32696292,http://tlpdb.cftri.com,"TLPdb: A Resource for Thaumatin-Like Proteins. Antifungal proteins and peptides have drawn the attention of numerous plant biologists and Clinicians, owing to their potential value in protecting commercial crops as well as preventing fungal infections in humans. Various proteins and peptides, such as glucanases, chitinases, chitinase-like proteins, lectins, peroxidases, defensins, and lipid transfer proteins have antifungal activities. Thaumatin is a protein from a West African plant Thaumatococcus danielli that is sweet in taste but does not exhibit antifungal activities. Despite the structural similarities between thaumatins and thaumatin-like proteins (TLPs), TLPs are not sweet in taste, unlike thaumatins. We developed a thaumatin-like protein database of various organisms. TLPs are pathogenesis-related proteins (PR) with molecular masses of 20-26√ɬÉ√ǬÇ√ɬÇ√Ǭ†kDa. The amino acid residues of TLPs involved in an antifungal activity remain obscure and make it hard to receive comprehensive information on TLPs. The biggest problem in the wine industry is white haze, an undesirable feature of high-quality wine. Hence, the problem may be figured out with the easy accessibility of amino acid sequences and to generate infest resistant crops. Overall, we aimed to produce a freely accessible TLP database ( https://tlpdb.cftri.com ) that would provide substantive information in understanding the mechanistic facet of TLPs. Briefly, TLPdb contains sequences, structures, and amino acid compositions of validated, published TLP protein sequences (from the plant, fungal as well as animal sources). Thus, this work may yield valuable information that may be useful in understanding the mechanistic aspects of TLP activity and in the evolution of antifungal proteins and fungal resistant crops. TLPdb is a comprehensive thaumatin-like protein resource database of various organisms. The database can serve as a unique Bioinformatics tool for understanding the TLPs. This further may help in understanding and the development of fungal resistant crops. TLPdb is freely available at https://tlpdb.cftri.com .",TLPdb,0.996521711,NA,0,TLPdb,0.996521711,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2020 +31490686,http://dna.engr.latech.edu,"TMB Library of Nucleosome Simulations. Nucleosomes are the fundamental building blocks of chromatin, the biomaterial that houses the genome in all higher organisms. A nucleosome consists of 145-147 base pairs of DNA wrapped 1.7 times around eight histones. Given a four-letter code (A, C, G, T), there are approximately 4147 or 1088 oligonucleotides that can form a nucleosome. Comparative, rather than comprehensive, studies are required. Here we introduce the TMB Library of nucleosome simulations and present a meta-analysis of over 20 √ɬÉ√Ǭé√ɬÇ√Ǭºs of all atom molecular dynamics simulations representing 518 different realizations of the nucleosome. The TMB Library serves as a reference for future comparative, on-demand simulations of nucleosomes and a demonstration of iBIOMES Lite as a tool for managing a laboratory's simulation library. For every simulation, dewatered trajectories, RMSD, and DNA helical parameter data are provided through iBIOMES Lite in a Web browser and a file browser format. A novel view of nucleosomal DNA emerges from our meta-analysis of the TMB Library. DNA conformation is restricted to a specific left-handed superhelix, but the range of conformations observed for individual bases and base pairs is not more restricted nor more highly deformed than DNA free in solution. With the exception of Roll, mean DNA helical parameter values obtained from simulations of nucleosomes are largely within the range of thermal motion of DNA free in solution. The library provides evidence of DNA kinking in the nucleosome and clearly demonstrates the effects of DNA sequence on the gross structure and dynamics of nucleosomes. These effects and mispositioning of the 601 super strong nucleosome positioning sequence can be detected in short simulations (10 ns). Collectively, the results provide a basis for comparative simulation studies of nucleosomes and extend our understanding of the binding of proteins and drugs to nucleosomal DNA. The TMB Library can be found at http://dna.engr.latech.edu/~tmbshare/ .",TMB,0.958310604,NA,0,TMB,0.958310604,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/24/2019 +21177659,http://bio-cluster.iis.sinica.edu.tw/TMPad,"TMPad: an integrated structural database for helix-packing folds in transmembrane proteins. √ɬÉ√Ǭé√ɬÇ√Ǭ±-helical transmembrane (TM) proteins play an important role in many critical and diverse biological processes, and specific associations between TM helices are important determinants for membrane protein folding, dynamics and function. In order to gain insights into the above phenomena, it is necessary to investigate different types of helix-packing modes and interactions. However, such information is difficult to obtain because of the experimental impediment and a lack of a well-annotated source of helix-packing folds in TM proteins. We have developed the TMPad (TransMembrane Protein Helix-Packing Database) which addresses the above issues by integrating experimentally observed helix-helix interactions and related structural information of membrane proteins. Specifically, the TMPad offers pre-calculated geometric descriptors at the helix-packing interface including residue backbone/side-chain contacts, interhelical distances and crossing angles, helical translational shifts and rotational angles. The TMPad also includes the corresponding sequence, topology, lipid accessibility, ligand-binding information and supports structural classification, schematic diagrams and visualization of the above structural features of TM helix-packing. Through detailed annotations and visualizations of helix-packing, this online resource can serve as an information gateway for deciphering the relationship between helix-helix interactions and higher levels of organization in TM protein structure and function. The website of the TMPad is freely accessible to the public at http://bio-cluster.iis.sinica.edu.tw/TMPad.",TMPad,0.989770174,TransMembrane Protein Helix-Packing Database,0.981935009,TMPad,0.989770174,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2011 +28365741,http://www.dsimb.inserm.fr/TMPL,"TMPL: a database of experimental and theoretical transmembrane protein models positioned in the lipid bilayer. . Knowing the position of protein structures within the membrane is crucial for fundamental and applied research in the field of molecular biology. Only few web resources propose coordinate files of oriented transmembrane proteins, and these exclude predicted structures, although they represent the largest part of the available models. In this article, we present TMPL (http://www.dsimb.inserm.fr/TMPL/), a database of transmembrane protein structures (√ɬÉ√Ǭé√ɬÇ√Ǭ±-helical and √ɬÉ√Ǭé√ɬÇ√Ǭ≤-sheet) positioned in the lipid bilayer. It is the first database to include theoretical models of transmembrane protein structures, making it a large repository with more than 11 000 entries. The TMPL database also contains experimentally solved protein structures, which are available as either atomistic or coarse-grained models. A unique feature of TMPL is the possibility for users to update the database by uploading, through an intuitive web interface, the membrane assignments they can obtain with our recent OREMPRO web server.",TMPL,0.995930076,NA,0,TMPL,0.995930076,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +25932650,http://bioinfo.hrbmu.edu.cn/TMREC,"TMREC: A Database of Transcription Factor and MiRNA Regulatory Cascades in Human Diseases. Over the past decades, studies have reported that the combinatorial regulation of transcription factors (TFs) and microRNAs (miRNAs) is essential for the appropriate execution of biological events and developmental processes. Dysregulations of these regulators often cause diseases. However, there are no available resources on the regulatory cascades of TFs and miRNAs in the context of human diseases. To fulfill this vacancy, we established the TMREC database in this study. First, we integrated curated transcriptional and post-transcriptional regulations to construct the TF and miRNA regulatory network. Next, we identified all linear paths using the Breadth First Search traversal method. Finally, we used known disease-related genes and miRNAs to measure the strength of association between cascades and diseases. Currently, TMREC consists of 74,248 cascades and 25,194 cascade clusters, involving in 412 TFs, 266 miRNAs and 545 diseases. With the expanding of experimental support regulation data, we will regularly update the database. TMREC aims to help experimental biologists to comprehensively analyse gene expression regulation, to understand the aetiology and to predict novel therapeutic targets. TMREC is freely available at http://bioinfo.hrbmu.edu.cn/TMREC/.",TMREC,0.997089624,NA,0,TMREC,0.997089624,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2015 +25378311,http://bioinformatics.sandia.gov/tmrna,"The tmRNA website. The transfer-messenger RNA (tmRNA) and its partner protein SmpB act together in resolving problems arising when translating bacterial ribosomes reach the end of mRNA with no stop codon. Their genes have been found in nearly all bacterial genomes and in some organelles. The tmRNA Website serves tmRNA sequences, alignments and feature annotations, and has recently moved to http://bioinformatics.sandia.gov/tmrna/. New features include software used to find the sequences, an update raising the number of unique tmRNA sequences from 492 to 1716, and a database of SmpB sequences which are served along with the tmRNA sequence from the same organism.",tmRNA Website,0.91520524,NA,0,tmRNA Website,0.91520524,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/5/2014 +33655207,http://lmc.uab.es/tmsnp,"TMSNP: a web server to predict pathogenesis of missense mutations in the transmembrane region of membrane proteins. The massive amount of data generated from genome sequencing brings tons of newly identified mutations, whose pathogenic/non-pathogenic effects need to be evaluated. This has given rise to several mutation predictor tools that, in general, do not consider the specificities of the various protein groups. We aimed to develop a predictor tool dedicated to membrane proteins, under the premise that their specific structural features and environment would give different responses to mutations compared to globular proteins. For this purpose, we created TMSNP, a database that currently contains information from 2624 pathogenic and 196√ɬÉ√ǬÇ√ɬÇ√Ǭ†705 non-pathogenic reported mutations located in the transmembrane region of membrane proteins. By computing various conservation parameters on these mutations in combination with annotations, we trained a machine-learning model able to classify mutations as pathogenic or not. TMSNP (freely available at√ɬÉ√ǬÇ√ɬÇ√Ǭ†http://lmc.uab.es/tmsnp/) improves considerably the prediction power of commonly used mutation predictors trained with globular proteins.",TMSNP,0.996496975,NA,0,TMSNP,0.996496975,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/23/2021 +34517763,"http://tncentral.proteininformationresource.org/, http://tncentral.ncc.unesp.br","TnCentral: a Prokaryotic Transposable Element Database and Web Portal for Transposon Analysis. We describe here the structure and organization of TnCentral (https://tncentral.proteininformationresource.org/ [or the mirror link at https://tncentral.ncc.unesp.br/]), a web resource for prokaryotic transposable elements (TE). TnCentral currently contains √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº400 carefully annotated TE, including transposons from the Tn3, Tn7, Tn402, and Tn554 families; compound transposons; integrons; and associated insertion sequences (IS). These TE carry passenger genes, including genes conferring resistance to over 25 classes of antibiotics and nine types of heavy metal, as well as genes responsible for pathogenesis in plants, toxin/antitoxin gene pairs, transcription factors, and genes involved in metabolism. Each TE has its own entry page, providing details about its transposition genes, passenger genes, and other sequence features required for transposition, as well as a graphical map of all features. TnCentral content can be browsed and queried through text- and sequence-based searches with a graphic output. We describe three use cases, which illustrate how the search interface, results tables, and entry pages can be used to explore and compare TE. TnCentral also includes downloadable software to facilitate user-driven identification, with manual annotation, of certain types of TE in genomic sequences. Through the TnCentral homepage, users can also access TnPedia, which provides comprehensive reviews of the major TE families, including an extensive general section and specialized sections with descriptions of insertion sequence and transposon families. TnCentral and TnPedia are intuitive resources that can be used by clinicians and scientists to assess TE diversity in clinical, veterinary, and environmental samples. IMPORTANCE The ability of bacteria to undergo rapid evolution and adapt to changing environmental circumstances drives the public health crisis of multiple antibiotic resistance, as well as outbreaks of disease in economically important agricultural crops and animal husbandry. Prokaryotic transposable elements (TE) play a critical role in this. Many carry ""passenger genes"" (not required for the transposition process) conferring resistance to antibiotics or heavy metals or causing disease in plants and animals. Passenger genes are spread by normal TE transposition activities and by insertion into plasmids, which then spread via conjugation within and across bacterial populations. Thus, an understanding of TE composition and transposition mechanisms is key to developing strategies to combat bacterial pathogenesis. Toward this end, we have developed TnCentral, a bioinformatics resource dedicated to describing and exploring the structural and functional features of prokaryotic TE whose use is intuitive and accessible to users with or without bioinformatics expertise.",TnCentral,0.997506559,NA,0,TnCentral,0.997506559,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/14/2021 +32265943,http://astrobiology.botany.wisc.edu/astrobotany-toast,"Test of Arabidopsis Space Transcriptome: A Discovery Environment to Explore Multiple Plant Biology Spaceflight Experiments. Recent advances in the routine access to space along with increasing opportunities to perform plant growth experiments on board the International Space Station have led to an ever-increasing body of transcriptomic, proteomic, and epigenomic data from plants experiencing spaceflight. These datasets hold great promise to help understand how plant biology reacts to this unique environment. However, analyses that mine across such expanses of data are often complex to implement, being impeded by the sheer number of potential comparisons that are possible. Complexities in how the output of these multiple parallel analyses can be presented to the researcher in an accessible and intuitive form provides further barriers to such research. Recent developments in computational systems biology have led to rapid advances in interactive data visualization environments designed to perform just such tasks. However, to date none of these tools have been tailored to the analysis of the broad-ranging plant biology spaceflight data. We have therefore developed the Test Of Arabidopsis Space Transcriptome (TOAST) database (https://astrobiology.botany.wisc.edu/astrobotany-toast) to address this gap in our capabilities. TOAST is a relational database that uses the Qlik database management software to link plant biology, spaceflight-related omics datasets, and their associated metadata. This environment helps visualize relationships across multiple levels of experiments in an easy to use gene-centric platform. TOAST draws on data from The US National Aeronautics and Space Administration's (NASA's) GeneLab and other data repositories and also connects results to a suite of web-based analytical tools to facilitate further investigation of responses to spaceflight and related stresses. The TOAST graphical user interface allows for quick comparisons between plant spaceflight experiments using real-time, gene-specific queries, or by using functional gene ontology, Kyoto Encyclopedia of Genes and Genomes pathway, or other filtering systems to explore genetic networks of interest. Testing of the database shows that TOAST confirms patterns of gene expression already highlighted in the literature, such as revealing the modulation of oxidative stress-related responses across multiple plant spaceflight experiments. However, this data exploration environment can also drive new insights into patterns of spaceflight responsive gene expression. For example, TOAST analyses highlight changes to mitochondrial function as likely shared responses in many plant spaceflight experiments.",TOAST,0.984416008,Test Of Arabidopsis Space Transcriptome,0.949631318,TOAST,0.984416008,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/4/2020 +26719120,http://tomatoma.nbrp.jp,"TOMATOMA Update: Phenotypic and Metabolite Information in the Micro-Tom Mutant Resource. TOMATOMA (http://tomatoma.nbrp.jp/) is a tomato mutant database providing visible phenotypic data of tomato mutant lines generated by ethylmethane sulfonate (EMS) treatment or √ɬÉ√Ǭé√ɬÇ√Ǭ≥-ray irradiation in the genetic background of Micro-Tom, a small and rapidly growing variety. To increase mutation efficiency further, mutagenized M3 seeds were subjected to a second round of EMS treatment; M3M1 populations were generated. These plants were self-pollinated, and 4,952 lines of M3M2 mutagenized seeds were generated. We checked for visible phenotypes in the M3M2 plants, and 618 mutant lines with 1,194 phenotypic categories were identified. In addition to the phenotypic information, we investigated Brix values and carotenoid contents in the fruits of individual mutants. Of 466 samples from 171 mutant lines, Brix values and carotenoid contents were between 3.2% and 11.6% and 6.9 and 37.3 √ɬÉ√ǬÇ√ɬÇ√Ǭµg g(-1) FW, respectively. This metabolite information concerning the mutant fruits would be useful in breeding programs as well as for the elucidation of metabolic regulation. Researchers are able to browse and search this phenotypic and metabolite information and order seeds of individual mutants via TOMATOMA. Our new Micro-Tom double-mutagenized populations and the metabolic information could provide a valuable genetic toolkit to accelerate tomato research and potential breeding programs.",TOMATOMA,0.994724512,NA,0,TOMATOMA,0.994724512,1,NA,21258066,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,12/30/2015 +21258066,http://tomatoma.nbrp.jp/index.jsp,"TOMATOMA: a novel tomato mutant database distributing Micro-Tom mutant collections. The tomato is an excellent model for studies of plants bearing berry-type fruits and for experimental studies of the Solanaceae family of plants due to its conserved genetic organization. In this study, a comprehensive mutant tomato population was generated in the background of Micro-Tom, a dwarf, rapid-growth variety. In this and previous studies, a family including 8,598 and 6,422 M(2) mutagenized lines was produced by ethylmethane sulfonate (EMS) mutagenesis and √ɬÉ√Ǭé√ɬÇ√Ǭ≥-ray irradiation, and this study developed and investigated these M(2) plants for alteration of visible phenotypes. A total of 9,183 independent M(2) families comprising 91,830 M(2) plants were inspected for phenotypic alteration, and 1,048 individual mutants were isolated. Subsequently, the observed mutant phenotypes were classified into 15 major categories and 48 subcategories. Overall, 1,819 phenotypic categories were found in 1,048 mutants. Of these mutants, 549 were pleiotropic, whereas 499 were non-pleiotropic. Multiple different mutant alleles per locus were found in the mutant libraries, suggesting that the mutagenized populations were nearly saturated. Additionally, genetic analysis of backcrosses indicated the successful inheritance of the mutations in BC(1)F(2) populations, confirming the reproducibility in the morphological phenotyping of the M(2) plants. To integrate and manage the visible phenotypes of mutants and other associated data, we developed the in silico database TOMATOMA, a relational system interfacing modules between mutant line names and phenotypic categories. TOMATOMA is a freely accessible database, and these mutant recourses are available through the TOMATOMA (http://tomatoma.nbrp.jp/index.jsp).",TOMATOMA,0.9822613,NA,0,TOMATOMA,0.9822613,1,NA,26719120,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/21/2011 +33969254,http://metabolites.in/tomato-fruits,"TOMATOMET: A metabolome database consists of 7118 accurate mass values detected in mature fruits of 25 tomato cultivars. The total number of low-molecular-weight compounds in the plant kingdom, most of which are secondary metabolites, is hypothesized to be over one million, although only a limited number of plant compounds have been characterized. Untargeted analysis, especially using mass spectrometry (MS), has been useful for understanding the plant metabolome; however, due to the limited availability of authentic compounds for MS-based identification, the identities of most of the ion peaks detected by MS remain unknown. Accurate mass values of peaks obtained by high accuracy mass measurement and, if available, MS/MS fragmentation patterns provide abundant annotation for each peak. Here, we carried out an untargeted analysis of compounds in the mature fruit of 25 tomato cultivars using liquid chromatography-Orbitrap MS for accurate mass measurement, followed by manual curation to construct the metabolome database TOMATOMET (http://metabolites.in/tomato-fruits/). The database contains 7,118 peaks with accurate mass values, in which 1,577 ion peaks are annotated as members of a chemical group. Remarkably, 71% of the mass values are not found in the accurate masses detected previously in Arabidopsis thaliana, Medicago truncatula or Jatropha curcas, indicating significant chemical diversity among plant species that remains to be solved. Interestingly, substantial chemical diversity exists also among tomato cultivars, indicating that chemical profiling from distinct cultivars contributes towards understanding the metabolome, even in a single organ of a species, and can prioritize some desirable metabolic targets for further applications such as breeding.",TOMATOMET,0.975067317,NA,0,TOMATOMET,0.975067317,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/29/2021 +28111364,http://bioinf.mind.meiji.ac.jp/tomatomics,"TOMATOMICS: A Web Database for Integrated Omics Information in Tomato. Solanum lycopersicum (tomato) is an important agronomic crop and a major model fruit-producing plant. To facilitate basic and applied research, comprehensive experimental resources and omics information on tomato are available following their development. Mutant lines and cDNA clones from a dwarf cultivar, Micro-Tom, are two of these genetic resources. Large-scale sequencing data for ESTs and full-length cDNAs from Micro-Tom continue to be gathered. In conjunction with information on the reference genome sequence of another cultivar, Heinz 1706, the Micro-Tom experimental resources have facilitated comprehensive functional analyses. To enhance the efficiency of acquiring omics information for tomato biology, we have integrated the information on the Micro-Tom experimental resources and the Heinz 1706 genome sequence. We have also inferred gene structure by comparison of sequences between the genome of Heinz 1706 and the transcriptome, which are comprised of Micro-Tom full-length cDNAs and Heinz 1706 RNA-seq data stored in the KaFTom and Sequence Read Archive databases. In order to provide large-scale omics information with streamlined connectivity we have developed and maintain a web database TOMATOMICS (http://bioinf.mind.meiji.ac.jp/tomatomics/). In TOMATOMICS, access to the information on the cDNA clone resources, full-length mRNA sequences, gene structures, expression profiles and functional annotations of genes is available through search functions and the genome browser, which has an intuitive graphical interface.",TOMATOMICS,0.993693531,NA,0,TOMATOMICS,0.993693531,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +25392424,http://topdb.enzim.ttk.mta.hu,"Expediting topology data gathering for the TOPDB database. The Topology Data Bank of Transmembrane Proteins (TOPDB, http://topdb.enzim.ttk.mta.hu) contains experimentally determined topology data of transmembrane proteins. Recently, we have updated TOPDB from several sources and utilized a newly developed topology prediction algorithm to determine the most reliable topology using the results of experiments as constraints. In addition to collecting the experimentally determined topology data published in the last couple of years, we gathered topographies defined by the TMDET algorithm using 3D structures from the PDBTM. Results of global topology analysis of various organisms as well as topology data generated by high throughput techniques, like the sequential positions of N- or O-glycosylations were incorporated into the TOPDB database. Moreover, a new algorithm was developed to integrate scattered topology data from various publicly available databases and a new method was introduced to measure the reliability of predicted topologies. We show that reliability values highly correlate with the per protein topology accuracy of the utilized prediction method. Altogether, more than 52,000 new topology data and more than 2600 new transmembrane proteins have been collected since the last public release of the TOPDB database.",TOPDB,0.997721473,Topology Data Bank of Transmembrane Proteins,0.98918283,TOPDB,0.997721473,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +27153630,http://topdom.enzim.hu,"TOPDOM: database of conservatively located domains and motifs in proteins. Unlabelled The TOPDOM database-originally created as a collection of domains and motifs located consistently on the same side of the membranes in √ɬÉ√Ǭé√ɬÇ√Ǭ±-helical transmembrane proteins-has been updated and extended by taking into consideration consistently localized domains and motifs in globular proteins, too. By taking advantage of the recently developed CCTOP algorithm to determine the type of a protein and predict topology in case of transmembrane proteins, and by applying a thorough search for domains and motifs as well as utilizing the most up-to-date version of all source databases, we managed to reach a 6-fold increase in the size of the whole database and a 2-fold increase in the number of transmembrane proteins. Availability and implementation TOPDOM database is available at http://topdom.enzim.hu The webpage utilizes the common Apache, PHP5 and MySQL software to provide the user interface for accessing and searching the database. The database itself is generated on a high performance computer. Contact tusnady.gabor@ttk.mta.hu Supplementary information Supplementary data are available at Bioinformatics online.",TOPDOM,0.992206335,NA,0,TOPDOM,0.992206335,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/12/2016 +25332401,http://clipserve.clip.ubc.ca/topfind,"Proteome TopFIND 3.0 with TopFINDer and PathFINDer: database and analysis tools for the association of protein termini to pre- and post-translational events. The knowledgebase TopFIND is an analysis platform focussed on protein termini, their origin, modification and hence their role on protein structure and function. Here, we present a major update to TopFIND, version 3, which includes a 70% increase in the underlying data to now cover a 90,696 proteins, 165,044 N-termini, 130,182 C-termini, 14,382 cleavage sites and 33,209 substrate cleavages in H. sapiens, M. musculus, A. thaliana, S. cerevisiae and E. coli. New features include the mapping of protein termini and cleavage entries across protein isoforms and significantly, the mapping of protein termini originating from alternative transcription and alternative translation start sites. Furthermore, two analysis tools for complex data analysis based on the TopFIND resource are now available online: TopFINDer, the TopFIND ExploRer, characterizes and annotates proteomics-derived N- or C-termini sets for their origin, sequence context and implications for protein structure and function. Neo-termini are also linked to associated proteases. PathFINDer identifies indirect connections between a protease and list of substrates or termini thus supporting the evaluation of complex proteolytic processes in vivo. To demonstrate the utility of the tools, a recent N-terminomics data set of inflamed murine skin has been re-analyzed. In re-capitulating the major findings originally performed manually, this validates the utility of these new resources. The point of entry for the resource is http://clipserve.clip.ubc.ca/topfind from where the graphical interface, all application programming interfaces (API) and the analysis tools are freely accessible.",TopFIND,0.99184984,NA,0,TopFIND,0.99184984,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/20/2014 +29432422,http://crdd.osdd.net/raghava/topicalpdb,"TopicalPdb: A database of topically delivered peptides. TopicalPdb (http://crdd.osdd.net/raghava/topicalpdb/) is a repository of experimentally verified topically delivered peptides. Data was manually collected from research articles. The current release of TopicalPdb consists of 657 entries, which includes peptides delivered through the skin (462 entries), eye (173 entries), and nose (22 entries). Each entry provides comprehensive information related to these peptides like the source of origin, nature of peptide, length, N- and C-terminal modifications, mechanism of penetration, type of assays, cargo and biological properties of peptides, etc. In addition to natural peptides, TopicalPdb contains information of peptides having non-natural, chemically modified residues and D-amino acids. Besides this primary information, TopicalPdb stores predicted tertiary structures as well as peptide sequences in SMILE format. Tertiary structures of peptides were predicted using state-of-art method PEPstrMod. In order to assist users, a number of web-based tools have been integrated that includes keyword search, data browsing, similarity search and structural similarity. We believe that TopicalPdb is a unique database of its kind and it will be very useful in designing peptides for non-invasive topical delivery.",TopicalPdb,0.998002231,NA,0,TopicalPdb,0.998002231,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/12/2018 +34522848,http://toppcell.cchmc.org,"An interactive single cell web portal identifies gene and cell networks in COVID-19 host responses. Numerous studies have provided single-cell transcriptome profiles of host responses to SARS-CoV-2 infection. Critically lacking however is a data mine that allows users to compare and explore cell profiles to gain insights and develop new hypotheses. To accomplish this, we harmonized datasets from COVID-19 and other control condition blood, bronchoalveolar lavage, and tissue samples, and derived a compendium of gene signature modules per cell type, subtype, clinical condition, and compartment. We demonstrate approaches to interacting with, exploring, and functional evaluating these modules via a new interactive web portal ToppCell (http://toppcell.cchmc.org/). As examples, we develop three hypotheses: (1) alternatively-differentiated monocyte-derived macrophages form a multicelllar signaling cascade that drives T√ɬÉ√ǬÇ√ɬÇ√Ǭ†cell recruitment and activation; (2) COVID-19-generated platelet subtypes exhibit dramatically altered potential to adhere, coagulate, and thrombose; and (3) extrafollicular B maturation is driven by a multilineage cell activation network that expresses an ensemble of genes strongly associated with risk for developing post-viral autoimmunity.",ToppCell,0.97753334,NA,0,ToppCell,0.97753334,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/10/2021 +23093603,http://iomics.ugent.be/toppr,"The Online Protein Processing Resource (TOPPR): a database and analysis platform for protein processing events. We here present The Online Protein Processing Resource (TOPPR; http://iomics.ugent.be/toppr/), an online database that contains thousands of published proteolytically processed sites in human and mouse proteins. These cleavage events were identified with COmbinded FRActional DIagonal Chromatography proteomics technologies, and the resulting database is provided with full data provenance. Indeed, TOPPR provides an interactive visual display of the actual fragmentation mass spectrum that led to each identification of a reported processed site, complete with fragment ion annotations and search engine scores. Apart from warehousing and disseminating these data in an intuitive manner, TOPPR also provides an online analysis platform, including methods to analyze protease specificity and substrate-centric analyses. Concretely, TOPPR supports three ways to retrieve data: (i) the retrieval of all substrates for one or more cellular stimuli or assays; (ii) a substrate search by UniProtKB/Swiss-Prot accession number, entry name or description; and (iii) a motif search that retrieves substrates matching a user-defined protease specificity profile. The analysis of the substrates is supported through the presence of a variety of annotations, including predicted secondary structure, known domains and experimentally obtained 3D structure where available. Across substrates, substrate orthologs and conserved sequence stretches can also be shown, with iceLogo visualization provided for the latter.",TOPPR,0.997798244,The Online Protein Processing Resource,0.95862323,TOPPR,0.997798244,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/23/2012 +33360695,http://www.rxnfinder.org/toxindb,"A data-driven integrative platform for computational prediction of toxin biotransformation with a case study. Recently, biogenic toxins have received increasing attention owing to their high contamination levels in feed and food as well as in the environment. However, there is a lack of an integrative platform for seamless linking of data-driven computational methods with 'wet' experimental validations. To this end, we constructed a novel platform that integrates the technical aspects of toxin biotransformation methods. First, a biogenic toxin database termed ToxinDB (http://www.rxnfinder.org/toxindb/), containing multifaceted data on more than 4836 toxins, was built. Next, more than 8000 biotransformation reaction rules were extracted from over 300,000 biochemical reactions extracted from ~580,000 literature reports curated by more than 100 people over the past decade. Based on these reaction rules, a toxin biotransformation prediction model was constructed. Finally, the global chemical space of biogenic toxins was constructed, comprising ~550,000 toxins and putative toxin metabolites, of which 94.7% of the metabolites have not been previously reported. Additionally, we performed a case study to investigate citrinin metabolism in Trichoderma, and a novel metabolite was identified with the assistance of the biotransformation prediction tool of ToxinDB. This unique integrative platform will assist exploration of the 'dark matter' of a toxin's metabolome and promote the discovery of detoxification enzymes.",ToxinDB,0.996215522,NA,0,ToxinDB,0.996215522,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/11/2020 +29385404,http://toxsign.genouest.org,"TOXsIgN: a cross-species repository for toxicogenomic signatures. Motivation:At the same time that toxicologists express increasing concern about reproducibility in this field, the development of dedicated databases has already smoothed the path toward improving the storage and exchange of raw toxicogenomic data. Nevertheless, none provides access to analyzed and interpreted data as originally reported in scientific publications. Given the increasing demand for access to this information, we developed TOXsIgN, a repository for TOXicogenomic sIgNatures. Results:The TOXsIgN repository provides a flexible environment that facilitates online submission, storage and retrieval of toxicogenomic signatures by the scientific community. It currently hosts 754 projects that describe more than 450 distinct chemicals and their 8491 associated signatures. It also provides users with a working environment containing a powerful search engine as well as bioinformatics/biostatistics modules that enable signature comparisons or enrichment analyses. Availability and implementation:The TOXsIgN repository is freely accessible at http://toxsign.genouest.org. Website implemented in Python, JavaScript and MongoDB, with all major browsers supported. Supplementary information:Supplementary data are available at Bioinformatics online.",TOXsIgN,0.996063232,NA,0,TOXsIgN,0.996063232,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2018 +21546359,http://tparvadb.ilri.cgiar.org,"TparvaDB: a database to support Theileria parva vaccine development. We describe the development of TparvaDB, a comprehensive resource to facilitate research towards development of an East Coast fever vaccine, by providing an integrated user-friendly database of all genome and related data currently available for Theileria parva. TparvaDB is based on the Generic Model Organism Database (GMOD) platform. It contains a complete reference genome sequence, Expressed Sequence Tags (ESTs), Massively Parallel Signature Sequencing (MPSS) expression tag data and related information from both public and private repositories. The Artemis annotation workbench provides online annotation functionality. TparvaDB represents a resource that will underpin and promote ongoing East Coast fever vaccine development and biological research. Database URL: http://tparvadb.ilri.cgiar.org.",TparvaDB,0.998618424,NA,0,TparvaDB,0.998618424,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/4/2011 +30913342,http://tpia.teaplant.org,"Tea Plant Information Archive: a comprehensive genomics and bioinformatics platform for tea plant. Tea is the world's widely consumed nonalcohol beverage with essential economic and health benefits. Confronted with the increasing large-scale omics-data set particularly the genome sequence released in tea plant, the construction of a comprehensive knowledgebase is urgently needed to facilitate the utilization of these data sets towards molecular breeding. We hereby present the first integrative and specially designed web-accessible database, Tea Plant Information Archive (TPIA; http://tpia.teaplant.org). The current release of TPIA employs the comprehensively annotated tea plant genome as framework and incorporates with abundant well-organized transcriptomes, gene expressions (across species, tissues and stresses), orthologs and characteristic metabolites determining tea quality. It also hosts massive transcription factors, polymorphic simple sequence repeats, single nucleotide polymorphisms, correlations, manually curated functional genes and globally collected germplasm information. A variety of versatile analytic tools (e.g. JBrowse, blast, enrichment analysis, etc.) are established helping users to perform further comparative, evolutionary and functional analysis. We show a case application of TPIA that provides novel and interesting insights into the phytochemical content variation of section Thea of genus Camellia under a well-resolved phylogenetic framework. The constructed knowledgebase of tea plant will serve as a central gateway for global tea community to better understand the tea plant biology that largely benefits the whole tea industry.",TPIA,0.981803596,Tea Plant Information Archive,0.938457757,TPIA,0.981803596,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/11/2019 +23547943,http://tracerdatabase.embl.de,"TRACER: a resource to study the regulatory architecture of the mouse genome. Background Mammalian genes are regulated through the action of multiple regulatory elements, often distributed across large regions. The mechanisms that control the integration of these diverse inputs into specific gene expression patterns are still poorly understood. New approaches enabling the dissection of these mechanisms in vivo are needed. Results Here, we describe TRACER (http://tracerdatabase.embl.de), a resource that centralizes information from a large on-going functional exploration of the mouse genome with different transposon-associated regulatory sensors. Hundreds of insertions have been mapped to specific genomic positions, and their corresponding regulatory potential has been documented by analysis of the expression of the reporter sensor gene in mouse embryos. The data can be easily accessed and provides information on the regulatory activities present in a large number of genomic regions, notably in gene-poor intervals that have been associated with human diseases. Conclusions TRACER data enables comparisons with the expression pattern of neighbouring genes, activity of surrounding regulatory elements or with other genomic features, revealing the underlying regulatory architecture of these loci. TRACER mouse lines can also be requested for in vivo transposition and chromosomal engineering, to analyse further regions of interest.",TRACER,0.993061364,NA,0,TRACER,0.993061364,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/2/2013 +34407614,http://tapdata.org.cn,"Tracking Air Pollution in China: Near Real-Time PM2.5 Retrievals from Multisource Data Fusion. Air pollution has altered the Earth's radiation balance, disturbed the ecosystem, and increased human morbidity and mortality. Accordingly, a full-coverage high-resolution air pollutant data set with timely updates and historical long-term records is essential to support both research and environmental management. Here, for the first time, we develop a near real-time air pollutant database known as Tracking Air Pollution in China (TAP, http://tapdata.org.cn/) that combines information from multiple data sources, including ground observations, satellite aerosol optical depth (AOD), operational chemical transport model simulations, and other ancillary data such as meteorological fields, land use data, population, and elevation. Daily full-coverage PM2.5 data at a spatial resolution of 10 km is our first near real-time product. The TAP PM2.5 is estimated based on a two-stage machine learning model coupled with the synthetic minority oversampling technique and a tree-based gap-filling method. Our model has an averaged out-of-bag cross-validation R2 of 0.83 for different years, which is comparable to those of other studies, but improves its performance at high pollution levels and fills the gaps in missing AOD on daily scale. The full coverage and near real-time updates of the daily PM2.5 data allow us to track the day-to-day variations in PM2.5 concentrations over China in a timely manner. The long-term records of PM2.5 data since 2000 will also support policy assessments and health impact studies. The TAP PM2.5 data are publicly available through our website for sharing with the research and policy communities.",TAP,0.955015858,Tracking Air Pollution in China,0.963049769,Tracking Air Pollution in China,0.963049769,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/18/2021 +30165582,http://cbdm-01.zdv.uni-mainz.de,Traitpedia: a collaborative effort to gather species traits. Summary Traitpedia is a collaborative database aimed to collect binary traits in a tabular form for a growing number of species. Availability and implementation Traitpedia can be accessed from http://cbdm-01.zdv.uni-mainz.de/~munoz/traitpedia. Supplementary information Supplementary data are available at Bioinformatics online.,Traitpedia,0.997839808,NA,0,Traitpedia,0.997839808,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2019 +33074314,http://www.biosino.org/transcirc,"TransCirc: an interactive database for translatable circular RNAs based on multi-omics evidence. TransCirc (https://www.biosino.org/transcirc/) is a specialized database that provide comprehensive evidences supporting the translation potential of circular RNAs (circRNAs). This database was generated by integrating various direct and indirect evidences to predict coding potential of each human circRNA and the putative translation products. Seven types of evidences for circRNA translation were included: (i) ribosome/polysome binding evidences supporting the occupancy of ribosomes onto circRNAs; (ii) experimentally mapped translation initiation sites on circRNAs; (iii) internal ribosome entry site on circRNAs; (iv) published N-6-methyladenosine modification data in circRNA that promote translation initiation; (v) lengths of the circRNA specific open reading frames; (vi) sequence composition scores from a machine learning prediction of all potential open reading frames; (vii) mass spectrometry data that directly support the circRNA encoded peptides across back-splice junctions. TransCirc provides a user-friendly searching/browsing interface and independent lines of evidences to predicte how likely a circRNA can be translated. In addition, several flexible tools have been developed to aid retrieval and analysis of the data. TransCirc can serve as an important resource for investigating the translation capacity of circRNAs and the potential circRNA-encoded peptides, and can be expanded to include new evidences or additional species in the future.",TransCirc,0.997567892,NA,0,TransCirc,0.997567892,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29363422,http://matsui-lab.riken.jp/rubber,"Construction of Par√ɬÉ√ǬÉ√ɬÇ√Ǭ° rubber tree genome and multi-transcriptome database accelerates rubber researches. Background Natural rubber is an economically important material. Currently the Par√ɬÉ√ǬÉ√ɬÇ√Ǭ° rubber tree, Hevea brasiliensis is the main commercial source. Little is known about rubber biosynthesis at the molecular level. Next-generation sequencing (NGS) technologies brought draft genomes of three rubber cultivars and a variety of RNA sequencing (RNA-seq) data. However, no current genome or transcriptome databases (DB) are organized by gene. Results A gene-oriented database is a valuable support for rubber research. Based on our original draft genome sequence of H. brasiliensis RRIM600, we constructed a rubber tree genome and transcriptome DB. Our DB provides genome information including gene functional annotations and multi-transcriptome data of RNA-seq, full-length cDNAs including PacBio Isoform sequencing (Iso-Seq), ESTs and genome wide transcription start sites (TSSs) derived from CAGE technology. Using our original and publically available RNA-seq data, we calculated co-expressed genes for identifying functionally related gene sets and/or genes regulated by the same transcription factor (TF). Users can access multi-transcriptome data through both a gene-oriented web page and a genome browser. For the gene searching system, we provide keyword search, sequence homology search and gene expression search; users can also select their expression threshold easily. Conclusion The rubber genome and transcriptome DB provides rubber tree genome sequence and multi-transcriptomics data. This DB is useful for comprehensive understanding of the rubber transcriptome. This will assist both industrial and academic researchers for rubber and economically important close relatives such as R. communis, M. esculenta and J. curcas. The Rubber Transcriptome DB release 2017.03 is accessible at http://matsui-lab.riken.jp/rubber/ .",NA,0,Transcriptome,0.769528151,Transcriptome,0.769528151,1,NA,"22359434.0, 23539303.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,1/19/2018 +23539303,"http://genome.ucsc.edu/, http://www.weizmann.ac.il/complex/compphys/software/Amit/primers/batch_query_qpcr_primers.htm","An accessible database for mouse and human whole transcriptome qPCR primers. Motivation Real time quantitative polymerase chain reaction (qPCR) is an important tool in quantitative studies of DNA and RNA molecules; especially in transcriptome studies, where different primer combinations allow identification of specific transcripts such as splice variants or precursor messenger RNA. Several softwares that implement various rules for optimal primer design are available. Nevertheless, as designing qPCR primers needs to be done manually, the repeated task is tedious, time consuming and prone to errors. Results We used a set of rules to automatically design all possible exon-exon and intron-exon junctions in the human and mouse transcriptomes. The resulting database is included as a track in the UCSC genome browser, making it widely accessible and easy to use. Availability The database is available from the UCSC genome browser (http://genome.ucsc.edu/), track name 'Whole Transcriptome qPCR Primers' for the hg19 (Human) and mm10 (Mouse) genome versions. Batch query is available in the following: http://www.weizmann.ac.il/complex/compphys/software/Amit/primers/batch_query_qpcr_primers.htm Contact amit.zeisel@weizmann.ac.il or eytan.domany@weizmann.ac.il Supplementary information Supplementary data are available at Bioinformatics online.",NA,0,Transcriptome,0.716399908,Transcriptome,0.716399908,1,NA,"22359434.0, 29363422.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: CLASS,NA,NA,3/28/2013 +22359434,http://www.bioingenios.ira.cinvestav.mx:81/Joomla,"The capsicum transcriptome DB: a ""hot"" tool for genomic research. Chili pepper (Capsicum annuum) is an economically important crop with no available public genome sequence. We describe a genomic resource to facilitate Capsicum annuum research. A collection of Expressed Sequence Tags (ESTs) derived from five C. annuum organs (root, stem, leaf, flower and fruit) were sequenced using the Sanger method and multiple leaf transcriptomes were deeply sampled using with GS-pyrosequencing. A hybrid assembly of 1,324,516 raw reads yielded 32,314 high quality contigs as validated by coverage and identity analysis with existing pepper sequences. Overall, 75.5% of the contigs had significant sequence similarity to entries in nucleic acid and protein databases; 23% of the sequences have not been previously reported for C. annuum and expand sequence resources for this species. A MySQL database and a user-friendly Web interface were constructed with search-tools that permit queries of the ESTs including sequence, functional annotation, Gene Ontology classification, metabolic pathways, and assembly information. The Capsicum Transcriptome DB is free available from http://www.bioingenios.ira.cinvestav.mx:81/Joomla/",NA,0,Transcriptome,0.684604645,Transcriptome,0.684604645,1,NA,"23539303.0, 29363422.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,1/6/2012 +34000296,http://genome.ucsc.edu/s/vollmers/IAMA,"Generation of an isoform-level transcriptome atlas of macrophage activation. RNA-seq is routinely used to measure gene expression changes in response to cell perturbation. Genes upregulated or downregulated following some perturbation are designated as genes of interest, and their most expressed isoform(s) would then be selected for follow-up experimentation. However, because of its need to fragment RNA molecules, RNA-seq is limited in its ability to capture gene isoforms and their expression patterns. This lack of isoform-specific data means that isoforms would be selected based on annotation databases that are incomplete, not tissue specific, or do not provide key information on expression levels. As a result, minority or nonexistent isoforms might be selected for follow-up, leading to loss in valuable resources and time. There is therefore a great need to comprehensively identify gene isoforms along with their corresponding levels of expression. Using the long-read nanopore-based R2C2 method, which does not fragment RNA molecules, we generated an Isoform-level transcriptome Atlas of Macrophage Activation that identifies full-length isoforms in primary human monocyte-derived macrophages. Macrophages are critical innate immune cells important for recognizing pathogens through binding of pathogen-associated molecular patterns to toll-like receptors, culminating in the initiation of host defense pathways. We characterized isoforms for most moderately-to-highly expressed genes in resting and toll-like receptor-activated monocyte-derived macrophages, identified isoforms differentially expressed between conditions, and validated these isoforms by RT-qPCR. We compiled these data into a user-friendly data portal within the UCSC Genome Browser (https://genome.ucsc.edu/s/vollmers/IAMA). Our atlas represents a valuable resource for innate immune research, providing unprecedented isoform information for primary human macrophages.",NA,0,transcriptome Atlas of Macrophage,0.631804083,transcriptome Atlas of Macrophage,0.631804083,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2021 +22292669,http://tagc.univ-mrs.fr/tbrowser,"TranscriptomeBrowser 3.0: introducing a new compendium of molecular interactions and a new visualization tool for the study of gene regulatory networks. Background Deciphering gene regulatory networks by in silico approaches is a crucial step in the study of the molecular perturbations that occur in diseases. The development of regulatory maps is a tedious process requiring the comprehensive integration of various evidences scattered over biological databases. Thus, the research community would greatly benefit from having a unified database storing known and predicted molecular interactions. Furthermore, given the intrinsic complexity of the data, the development of new tools offering integrated and meaningful visualizations of molecular interactions is necessary to help users drawing new hypotheses without being overwhelmed by the density of the subsequent graph. Results We extend the previously developed TranscriptomeBrowser database with a set of tables containing 1,594,978 human and mouse molecular interactions. The database includes: (i) predicted regulatory interactions (computed by scanning vertebrate alignments with a set of 1,213 position weight matrices), (ii) potential regulatory interactions inferred from systematic analysis of ChIP-seq experiments, (iii) regulatory interactions curated from the literature, (iv) predicted post-transcriptional regulation by micro-RNA, (v) protein kinase-substrate interactions and (vi) physical protein-protein interactions. In order to easily retrieve and efficiently analyze these interactions, we developed In-teractomeBrowser, a graph-based knowledge browser that comes as a plug-in for Transcriptome-Browser. The first objective of InteractomeBrowser is to provide a user-friendly tool to get new insight into any gene list by providing a context-specific display of putative regulatory and physical interactions. To achieve this, InteractomeBrowser relies on a ""cell compartments-based layout"" that makes use of a subset of the Gene Ontology to map gene products onto relevant cell compartments. This layout is particularly powerful for visual integration of heterogeneous biological information and is a productive avenue in generating new hypotheses. The second objective of InteractomeBrowser is to fill the gap between interaction databases and dynamic modeling. It is thus compatible with the network analysis software Cytoscape and with the Gene Interaction Network simulation software (GINsim). We provide examples underlying the benefits of this visualization tool for large gene set analysis related to thymocyte differentiation. Conclusions The InteractomeBrowser plugin is a powerful tool to get quick access to a knowledge database that includes both predicted and validated molecular interactions. InteractomeBrowser is available through the TranscriptomeBrowser framework and can be found at: http://tagc.univ-mrs.fr/tbrowser/. Our database is updated on a regular basis.",TranscriptomeBrowser,0.988476872,NA,0,TranscriptomeBrowser,0.988476872,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/31/2012 +22786849,http://www.nursa.org/transcriptomine,"Transcriptomine, a web resource for nuclear receptor signaling transcriptomes. The nuclear receptor (NR) superfamily of ligand-regulated transcription factors directs ligand- and tissue-specific transcriptomes in myriad developmental, metabolic, immunological, and reproductive processes. The NR signaling field has generated a wealth of genome-wide expression data points, but due to deficits in their accessibility, annotation, and integration, the full potential of these studies has not yet been realized. We searched public gene expression databases and MEDLINE for global transcriptomic datasets relevant to NRs, their ligands, and coregulators. We carried out extensive, deep reannotation of the datasets using controlled vocabularies for RNA Source and regulating molecule and resolved disparate gene identifiers to official gene symbols to facilitate comparison of fold changes and their significance across multiple datasets. We assembled these data points into a database, Transcriptomine (http://www.nursa.org/transcriptomine), that allows for multiple, menu-driven querying strategies of this transcriptomic ""superdataset,"" including single and multiple genes, Gene Ontology terms, disease terms, and uploaded custom gene lists. Experimental variables such as regulating molecule, RNA Source, as well as fold-change and P value cutoff values can be modified, and full data records can be either browsed or downloaded for downstream analysis. We demonstrate the utility of Transcriptomine as a hypothesis generation and validation tool using in silico and experimental use cases. Our resource empowers users to instantly and routinely mine the collective biology of millions of previously disparate transcriptomic data points. By incorporating future transcriptome-wide datasets in the NR signaling field, we anticipate Transcriptomine developing into a powerful resource for the NR- and other signal transduction research communities.",Transcriptomine,0.981516123,NA,0,Transcriptomine,0.981516123,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/10/2012 +24334957,http://bioinformatics.charite.de/transformer,"The Transformer database: biotransformation of xenobiotics. As the number of prescribed drugs is constantly rising, drug-drug interactions are an important issue. The simultaneous administration of several drugs can cause severe adverse effects based on interactions with the same metabolizing enzyme(s). The Transformer database (http://bioinformatics.charite.de/transformer) contains integrated information on the three phases of biotransformation (modification, conjugation and excretion) of 3000 drugs and >350 relevant food ingredients (e.g. grapefruit juice) and herbs, which are catalyzed by 400 proteins. A total of 100,000 interactions were found through text mining and manual validation. The 3D structures of 200 relevant proteins are included. The database enables users to search for drugs with a visual display of known interactions with phase I (Cytochrome P450) and phase II enzymes, transporters, food and herbs. For each interaction, PubMed references are given. To detect mutual impairments of drugs, the drug-cocktail tool displays interactions between selected drugs. By choosing the indication for a drug, the tool offers suggestions for alternative medications to avoid metabolic conflicts. Drug interactions can also be visualized in an interactive network view. Additionally, prodrugs, including their mechanisms of activation, and further information on enzymes of biotransformation, including 3D models, can be viewed.",Transformer,0.906411409,NA,0,Transformer,0.906411409,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/10/2013 +32277449,http://wwwmgs.bionet.nsc.ru/mgs/dbases/tgp/home.html,"Choice of the Promoter for Tissue and Developmental Stage-Specific Gene Expression. Transgenic technologies belong to important tools of reverse genetics and biotechnology in plants. Targeted genetic modifications can reveal functions of genes of interest, change metabolic and regulatory pathways, or result in accumulation of valuable proteins or metabolites. However, to be efficient in targeted genetic modification, the chimeric gene construct should be designed properly. In particular, the promoters used to control transgene expression need to be carefully chosen. Most promoters in widely used vectors belong to strong and constitutively expressed variants. However, in many cases transgene expression has to be restricted to certain tissue, stage of development, or response to some internal or external stimuli. In turn, a large variety of tissue-specific promoters have been studied and information on their characteristics may be recovered from the literature. An appropriate promoter may be selected and used in genetic construct to optimize the transgene transcription pattern. We have previously designed the TGP database (TransGene Promoters, http://wwwmgs.bionet.nsc.ru/mgs/dbases/tgp/home.html ) collecting information from the publications in this field. Here we review the wide range of noncanonical tissue-specific and developmentally regulated promoters that might be used for transgene expression control.",TGP,0.893220305,TransGene Promoters,0.957293236,TransGene Promoters,0.957293236,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +29106630,http://www.translatomedb.net,"TranslatomeDB: a comprehensive database and cloud-based analysis platform for translatome sequencing data. Translation is a key regulatory step, linking transcriptome and proteome. Two major methods of translatome investigations are RNC-seq (sequencing of translating mRNA) and Ribo-seq (ribosome profiling). To facilitate the investigation of translation, we built a comprehensive database TranslatomeDB (http://www.translatomedb.net/) which provides collection and integrated analysis of published and user-generated translatome sequencing data. The current version includes 2453 Ribo-seq, 10 RNC-seq and their 1394 corresponding mRNA-seq datasets in 13 species. The database emphasizes the analysis functions in addition to the dataset collections. Differential gene expression (DGE) analysis can be performed between any two datasets of same species and type, both on transcriptome and translatome levels. The translation indices translation ratios, elongation velocity index and translational efficiency can be calculated to quantitatively evaluate translational initiation efficiency and elongation velocity, respectively. All datasets were analyzed using a unified, robust, accurate and experimentally-verifiable pipeline based on the FANSe3 mapping algorithm and edgeR for DGE analyzes. TranslatomeDB also allows users to upload their own datasets and utilize the identical unified pipeline to analyze their data. We believe that our TranslatomeDB is a comprehensive platform and knowledgebase on translatome and proteome research, releasing the biologists from complex searching, analyzing and comparing huge sequencing data without needing local computational power.",TranslatomeDB,0.996909559,NA,0,TranslatomeDB,0.996909559,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +30380112,"http://translocatome.linkgroup.hu, http://comppi.linkgroup.hu","Translocatome: a novel resource for the analysis of protein translocation between cellular organelles. Here we present Translocatome, the first dedicated database of human translocating proteins (URL: http://translocatome.linkgroup.hu). The core of the Translocatome database is the manually curated data set of 213 human translocating proteins listing the source of their experimental validation, several details of their translocation mechanism, their local compartmentalized interactome, as well as their involvement in signalling pathways and disease development. In addition, using the well-established and widely used gradient boosting machine learning tool, XGBoost, Translocatome provides translocation probability values for 13√ɬÉ√ǬÇ√ɬÇ√Ǭ†066 human proteins identifying 1133 and 3268 high- and low-confidence translocating proteins, respectively. The database has user-friendly search options with a UniProt autocomplete quick search and advanced search for proteins filtered by their localization, UniProt identifiers, translocation likelihood or data complexity. Download options of search results, manually curated and predicted translocating protein sets are available on its website. The update of the database is helped by its manual curation framework and connection to the previously published ComPPI compartmentalized protein-protein interaction database√ɬÉ√ǬÇ√ɬÇ√Ǭ†(http://comppi.linkgroup.hu). As shown by the application examples of merlin (NF2) and tumor protein 63 (TP63) Translocatome allows a better comprehension of protein translocation as a systems biology phenomenon and can be used as a discovery-tool in the protein translocation field.",Translocatome,0.997318149,NA,0,Translocatome,0.997318149,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30371815,http://www.cuilab.cn/transmir,"TransmiR v2.0: an updated transcription factor-microRNA regulation database. MicroRNAs (miRNAs) are important post-transcriptional regulators of gene expression and play vital roles in various biological processes. It has been reported that aberrant regulation of miRNAs was associated with the development and progression of various diseases, but the underlying mechanisms are not fully deciphered. Here, we described our updated TransmiR v2.0 database for more comprehensive information about transcription factor (TF)-miRNA regulations. 3730 TF-miRNA regulations among 19 species from 1349 reports were manually curated by surveying >8000 publications, and more than 1.7 million tissue-specific TF-miRNA regulations were further incorporated based on ChIP-seq data. Besides, we constructed a 'Predict' module to query the predicted TF-miRNA regulations in human based on binding motifs of TFs. To facilitate the community, we provided a 'Network' module to visualize TF-miRNA regulations for each TF and miRNA, or for a specific disease. An 'Enrichment analysis' module was also included to predict TFs that are likely to regulate a miRNA list of interest. In conclusion, with improved data coverage and webserver functionalities, TransmiR v2.0 would be a useful resource for investigating the regulation of miRNAs. TransmiR v2.0 is freely accessible at http://www.cuilab.cn/transmir.",TransmiR,0.97742933,NA,0,TransmiR,0.97742933,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +31831861,http://plantomics.mind.meiji.ac.jp/nashi,"TRANSNAP: a web database providing comprehensive information on Japanese pear transcriptome. Japanese pear (Pyrus pyrifolia) is a major fruit tree in the family Rosaceae and is bred for fruit production. To promote the development of breeding strategies and molecular research for Japanese pear, we sequenced the transcripts of Japanese pear variety 'Hosui'. To exhaustively collect information of total gene expression, RNA samples from various organs and stages of Japanese pear were sequenced by three technologies, single-molecule real-time (SMRT) sequencing, 454 pyrosequencing, and Sanger sequencing. Using all those reads, we determined comprehensive reference sequences of Japanese pear. Then, their protein sequences were predicted, and biological functional annotations were assigned. Finally, we developed a web database, TRANSNAP (http://plantomics.mind.meiji.ac.jp/nashi), which is the first web resource of Japanese pear omics information. This database provides highly reliable information via a user-friendly web interface: the reference sequences, gene functional annotations, and gene expression profiles from microarray experiments. In addition, based on sequence comparisons among Japanese, Chinese and European pears, similar protein sequences among the pears and species-specific proteins in Japanese pear can be quickly and efficiently identified. TRANSNAP will aid molecular research and breeding in Japanese pear, and its information is available for comparative analysis among other pear species and families.",TRANSNAP,0.997132599,NA,0,TRANSNAP,0.997132599,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/12/2019 +27899676,http://www.membranetransport.org/transportDB2,"TransportDB 2.0: a database for exploring membrane transporters in sequenced genomes from all domains of life. All cellular life contains an extensive array of membrane transport proteins. The vast majority of these transporters have not been experimentally characterized. We have developed a bioinformatic pipeline to identify and annotate complete sets of transporters in any sequenced genome. This pipeline is now fully automated enabling it to better keep pace with the accelerating rate of genome sequencing. This manuscript describes TransportDB 2.0 (http://www.membranetransport.org/transportDB2/), a completely updated version of TransportDB, which provides access to the large volumes of data generated by our automated transporter annotation pipeline. The TransportDB 2.0 web portal has been rebuilt to utilize contemporary JavaScript libraries, providing a highly interactive interface to the annotation information, and incorporates analysis tools that enable users to query the database on a number of levels. For example, TransportDB 2.0 includes tools that allow users to select annotated genomes of interest from the thousands of species held in the database and compare their complete transporter complements.",TransportDB,0.979824901,NA,0,TransportDB,0.979824901,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +30184150,http://www.licpathway.net/TRCirc,"TRCirc: a resource for transcriptional regulation information of circRNAs. In recent years, high-throughput genomic technologies like chromatin immunoprecipitation sequencing (ChIp-seq) and transcriptome sequencing (RNA-seq) have been becoming both more refined and less expensive, making them more accessible. Many circular RNAs (circRNAs) that originate from back-spliced exons have been identified in various cell lines across different species. However, the regulatory mechanism for transcription of circRNAs remains unclear. Therefore, there is an urgent need to construct a database detailing the transcriptional regulation of circRNAs. TRCirc (http://www.licpathway.net/TRCirc) provides a resource for efficient retrieval, browsing and visualization of transcriptional regulation information of circRNAs. The current version of TRCirc documents 92√ɬÉ√ǬÇ√ɬÇ√Ǭ†375 circRNAs and 161 transcription factors (TFs) from more than 100 cell types and together represent more than 765√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 TF-circRNA regulatory relationships. Furthermore, TRCirc provides other regulatory information about transcription of circRNAs, including their expression, methylation levels, H3K27ac signals in regulation regions and super-enhancers associated with circRNAs. TRCirc provides a convenient, user-friendly interface to search, browse and visualize detailed information about these circRNAs.",TRCirc,0.998382151,NA,0,TRCirc,0.998382151,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2019 +30119164,http://www.trials-network.org/treasure,"Methodology of a new inflammatory arthritis registry: TReasure. Background/aim:The TReasure registry, created in 2017, is an observational multicenter cohort that includes inflammatory arthritis patients. This article reviews the methodology and objectives of the TReasure registry established to collect data from rheumatoid arthritis (RA) and spondyloarthritis (SpA) patients. Methodology:Fifteen rheumatology centers in Turkey will contribute data to the TReasure database. The actual proprietor of the database is the Hacettepe Rheumatology Association (HRD) and Hacettepe Financial Enterprises. Pharmaceutical companies that operate in Turkey (in alphabetical or er), Abbvie, Amgen, BMS, Celltrion Healthcare, Novartis, Pfizer, Roche, and UCB, support the TReasure registry. TReasure is a web-based database to which users connect through a URL (https://www.trials-network.org/treasure) with their unique identifier and passwords provided for data entry and access. TReasure records demographic and clinical features, comorbidities, radiology and laboratory results, measures of disease activity, and treatment data. Discussion:TReasure will provide us with various types of data, such as a cross-sectional view of the current nationwide status of the patients currently receiving these treatments, and retrospective data as much as allowed by the participating centers√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭô records. Finally, a high-quality prospective dataset will be built over the ensuing years from patients with a new diagnosis of RA or SpA.",TReasure,0.990983844,NA,0,TReasure,0.990983844,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/16/2018 +25604253,http://umd.be/TREAT_DMD,"The TREAT-NMD DMD Global Database: analysis of more than 7,000 Duchenne muscular dystrophy mutations. Analyzing the type and frequency of patient-specific mutations that give rise to Duchenne muscular dystrophy (DMD) is an invaluable tool for diagnostics, basic scientific research, trial planning, and improved clinical care. Locus-specific databases allow for the collection, organization, storage, and analysis of genetic variants of disease. Here, we describe the development and analysis of the TREAT-NMD DMD Global database (http://umd.be/TREAT_DMD/). We analyzed genetic data for 7,149 DMD mutations held within the database. A total of 5,682 large mutations were observed (80% of total mutations), of which 4,894 (86%) were deletions (1 exon or larger) and 784 (14%) were duplications (1 exon or larger). There were 1,445 small mutations (smaller than 1 exon, 20% of all mutations), of which 358 (25%) were small deletions and 132 (9%) small insertions and 199 (14%) affected the splice sites. Point mutations totalled 756 (52% of small mutations) with 726 (50%) nonsense mutations and 30 (2%) missense mutations. Finally, 22 (0.3%) mid-intronic mutations were observed. In addition, mutations were identified within the database that would potentially benefit from novel genetic therapies for DMD including stop codon read-through therapies (10% of total mutations) and exon skipping therapy (80% of deletions and 55% of total mutations).",TREAT-NMD DMD,0.639145579,NA,0,TREAT-NMD DMD,0.639145579,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/17/2015 +24194607,http://www.treefam.org,"TreeFam v9: a new website, more species and orthology-on-the-fly. TreeFam (http://www.treefam.org) is a database of phylogenetic trees inferred from animal genomes. For every TreeFam family we provide homology predictions together with the evolutionary history of the genes. Here we describe an update of the TreeFam database. The TreeFam project was resurrected in 2012 and has seen two releases since. The latest release (TreeFam 9) was made available in March 2013. It has orthology predictions and gene trees for 109 species in 15,736 families covering √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº2.2 million sequences. With release 9 we made modifications to our production pipeline and redesigned our website with improved gene tree visualizations and Wikipedia integration. Furthermore, we now provide an HMM-based sequence search that places a user-provided protein sequence into a TreeFam gene tree and provides quick orthology prediction. The tool uses Mafft and RAxML for the fast insertion into a reference alignment and tree, respectively. Besides the aforementioned technical improvements, we present a new approach to visualize gene trees and alternative displays that focuses on showing homology information from a species tree point of view. From release 9 onwards, TreeFam is now hosted at the EBI.",TreeFam,0.990964532,NA,0,TreeFam,0.990964532,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/4/2013 +25413576,http://www.treeshrewdb.org,"Tree shrew database (TreeshrewDB): a genomic knowledge base for the Chinese tree shrew. The tree shrew (Tupaia belangeri) is a small mammal with a close relationship to primates and it has been proposed as an alternative experimental animal to primates in biomedical research. The recent release of a high-quality Chinese tree shrew genome enables more researchers to use this species as the model animal in their studies. With the aim to making the access to an extensively annotated genome database straightforward and easy, we have created the Tree shrew Database (TreeshrewDB). This is a web-based platform that integrates the currently available data from the tree shrew genome, including an updated gene set, with a systematic functional annotation and a mRNA expression pattern. In addition, to assist with automatic gene sequence analysis, we have integrated the common programs Blast, Muscle, GBrowse, GeneWise and codeml, into TreeshrewDB. We have also developed a pipeline for the analysis of positive selection. The user-friendly interface of TreeshrewDB, which is available at http://www.treeshrewdb.org, will undoubtedly help in many areas of biological research into the tree shrew.",TreeshrewDB,0.991575599,Tree shrew database,0.932710469,TreeshrewDB,0.991575599,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2014 +23284086,http://treetfdb.bmep.riken.jp/index.pl,"TreeTFDB: an integrative database of the transcription factors from six economically important tree crops for functional predictions and comparative and functional genomics. Crop plants, whose productivity is affected by a wide range of growing and environmental conditions, are grown for economic purposes. Transcription factors (TFs) play central role in regulation of many biological processes, including plant development and responses to environmental stimuli, by activating or repressing spatiotemporal gene expression. Here, we describe the TreeTFDB (http://treetfdb.bmep.riken.jp/index.pl) that houses the TF repertoires of six economically important tree crop species: Jatropha curcas, papaya, cassava, poplar, castor bean and grapevine. Among these, the TF repertoire of J. curcas has not been reported by any other TF databases. In addition to their basic information, such as sequence and domain features, domain alignments, gene ontology assignment and sequence comparison, information on available full-length cDNAs, identity and positions of all types of known cis-motifs found in the promoter regions, gene expression data are provided. With its newly designed and friendly interface and its unique features, TreeTFDB will enable research community to predict the functions and provide access to available genetic resources for performing comparative and functional genomics of the crop TFs, either individually or at whole family level, in a comprehensive and convenient manner.",TreeTFDB,0.998396933,NA,0,TreeTFDB,0.998396933,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/2/2013 +32976578,http://shiny.imbei.uni-mainz.de:3838/trend-db,"TREND-DB-a transcriptome-wide atlas of the dynamic landscape of alternative polyadenylation. Alternative polyadenylation (APA) profoundly expands the transcriptome complexity. Perturbations of APA can disrupt biological processes, ultimately resulting in devastating disorders. A major challenge in identifying mechanisms and consequences of APA (and its perturbations) lies in the complexity of RNA 3' end processing, involving poorly conserved RNA motifs and multi-component complexes consisting of far more than 50 proteins. This is further complicated in that RNA 3' end maturation is closely linked to transcription, RNA processing and even epigenetic (histone/DNA/RNA) modifications. Here, we present TREND-DB (http://shiny.imbei.uni-mainz.de:3838/trend-db), a resource cataloging the dynamic landscape of APA after depletion of >170 proteins involved in various facets of transcriptional, co- and post-transcriptional gene regulation, epigenetic modifications and further processes. TREND-DB visualizes the dynamics of transcriptome 3' end diversification (TREND) in a highly interactive manner; it provides a global APA network map and allows interrogating genes affected by specific APA-regulators and vice versa. It also permits condition-specific functional enrichment analyses of APA-affected genes, which suggest wide biological and clinical relevance across all RNAi conditions. The implementation of the UCSC Genome Browser provides additional customizable layers of gene regulation accounting for individual transcript isoforms (e.g. epigenetics, miRNA-binding sites and RNA-binding proteins). TREND-DB thereby fosters disentangling the role of APA for various biological programs, including potential disease mechanisms, and helps identify their diagnostic and therapeutic potential.",TREND-DB,0.993754983,NA,0,TREND-DB,0.993754983,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29145635,http://combio.pl/trex)-the,"tRex: A Web Portal for Exploration of tRNA-Derived Fragments in Arabidopsis thaliana. tRNA-derived fragments (tRFs) constitute a new class of short regulatory RNAs that are a product of nascent or mature tRNA processing. tRF sequences have been identified in all domains of life; however, most published research pertains to human, yeast and some bacterial organisms. Despite growing interest in plant tRFs and accumulating evidence of their function in plant development and stress responses, no public, web-based repository dedicated to these molecules is currently available. Here, we introduce tRex (http://combio.pl/trex)-the first comprehensive data-driven online resource specifically dedicated to tRFs in the model plant Arabidopsis thaliana. The portal is based on verified Arabidopsis tRNA annotation and includes in-house-generated and publicly available small RNA sequencing experiments from various tissues, ecotypes, genotypes and stress conditions. The provided web-based tools are designed in a user-friendly manner and allow for seamless exploration of the data that are presented in the form of dynamic tables and cumulative coverage profiles. The tRex database is connected to external genomic and citation resources, which makes it a one-stop solution for Arabidopsis tRF-related research.",tRex,0.994673848,NA,0,tRex,0.994673848,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +25392422,http://genome.bioch.virginia.edu/trfdb,"tRFdb: a database for transfer RNA fragments. We have created tRFdb, the first database of transfer RNA fragments (tRFs), available at http://genome.bioch.virginia.edu/trfdb/. With over 100 small RNA libraries analyzed, the database currently contains the sequences and read counts of the three classes of tRFs for eight species: R. sphaeroides, S. pombe, D. melanogaster, C. elegans, Xenopus, zebra fish, mouse and human, for a total of 12,877 tRFs. The database can be searched by tRF ID or tRF sequence, and the results can be limited by organism. The search results show the genome coordinates and names of the tRNAs the sequence may derive from, and there are links for the sequence of the tRF and parental tRNA, and links for the read counts in all the corresponding small RNA libraries. As a case study for how this database may be used, we have shown that a certain class of tRFs, tRF-1s, is highly upregulated in B-cell malignancies.",tRFdb,0.998105168,NA,0,tRFdb,0.998105168,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +33035346,http://trftarget.net,"tRFtarget: a database for transfer RNA-derived fragment targets. Transfer RNA-derived fragments (tRFs) are a new class of small non-coding RNAs and play important roles in biological and physiological processes. Prediction of tRF target genes and binding sites is crucial in understanding the biological functions of tRFs in the molecular mechanisms of human diseases. We developed a publicly accessible web-based database, tRFtarget (http://trftarget.net), for tRF target prediction. It contains the computationally predicted interactions between tRFs and mRNA transcripts using the two state-of-the-art prediction tools RNAhybrid and IntaRNA, including location of the binding sites on the target, the binding region, and free energy of the binding stability with graphic illustration. tRFtarget covers 936 tRFs and 135 thousand predicted targets in eight species. It allows researchers to search either target genes by tRF IDs or tRFs by gene symbols/transcript names. We also integrated the manually curated experimental evidence of the predicted interactions into the database. Furthermore, we provided a convenient link to the DAVID√ɬÉ√ǬÇ√ɬÇ√Ç¬Æ web server to perform downstream functional pathway analysis and gene ontology annotation on the predicted target genes. This database provides useful information for the scientific community to experimentally validate tRF target genes and facilitate the investigation of the molecular functions and mechanisms of tRFs.",tRFtarget,0.997647464,NA,0,tRFtarget,0.997647464,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +31432762,"http://hanlab.uth.edu/tRic/, http://bioinfo.life.hust.edu.cn/tRic","tRic: a user-friendly data portal to explore the expression landscape of tRNAs in human cancers. Transfer RNAs (tRNAs) play critical roles in human cancer. Currently, no database provides the expression landscape and clinical relevance of tRNAs across a variety of human cancers. Utilizing miRNA-seq data from The Cancer Genome Atlas, we quantified the relative expression of tRNA genes and merged them into the codon level and amino level across 31 cancer types. The expression of tRNAs is associated with clinical features of patient smoking history and overall survival, and disease stage, subtype, and grade. We further analysed codon frequency and amino acid frequency for each protein coding gene and linked alterations of tRNA expression with protein translational efficiency. We include these data resources in a user-friendly data portal, tRic (tRNA in cancer, https://hanlab.uth.edu/tRic/ or http://bioinfo.life.hust.edu.cn/tRic/), which can be of significant interest to the research community.",tRic,0.994563282,NA,0,tRic,0.994563282,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/25/2019 +29045755,http://bioinformatics.psb.ugent.be/triforc,"The TriForC database: a comprehensive up-to-date resource of plant triterpene biosynthesis. Triterpenes constitute a large and important class of plant natural products with diverse structures and functions. Their biological roles range from membrane structural components over plant hormones to specialized plant defence compounds. Furthermore, triterpenes have great potential for a variety of commercial applications such as vaccine adjuvants, anti-cancer drugs, food supplements and agronomic agents. Their biosynthesis is carried out through complicated, branched pathways by multiple enzyme types that include oxidosqualene cyclases, cytochrome P450s, and UDP-glycosyltransferases. Given that the number of characterized triterpene biosynthesis enzymes has been growing fast recently, the need for a database specifically focusing on triterpene enzymology became eminent. Here, we present the TriForC database (http://bioinformatics.psb.ugent.be/triforc/), encompassing a comprehensive catalogue of triterpene biosynthesis enzymes. This highly interlinked database serves as a user-friendly access point to versatile data sets of enzyme and compound features, enabling the scanning of a complete catalogue of experimentally validated triterpene enzymes, their substrates and products, as well as the pathways they constitute in various plant species. The database can be accessed by direct browsing or through convenient search tools including keyword, BLAST, plant species and substructure options. This database will facilitate gene mining and creating genetic toolboxes for triterpene synthetic biology.",TriForC,0.973249376,NA,0,TriForC,0.973249376,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2018 +23071747,http://www.trpchannel.org,"TRIP database 2.0: a manually curated information hub for accessing TRP channel interaction network. Transient receptor potential (TRP) channels are a family of Ca(2+)-permeable cation channels that play a crucial role in biological and disease processes. To advance TRP channel research, we previously created the TRIP (TRansient receptor potential channel-Interacting Protein) Database, a manually curated database that compiles scattered information on TRP channel protein-protein interactions (PPIs). However, the database needs to be improved for information accessibility and data utilization. Here, we present the TRIP Database 2.0 (http://www.trpchannel.org) in which many helpful, user-friendly web interfaces have been developed to facilitate knowledge acquisition and inspire new approaches to studying TRP channel functions: 1) the PPI information found in the supplementary data of referred articles was curated; 2) the PPI summary matrix enables users to intuitively grasp overall PPI information; 3) the search capability has been expanded to retrieve information from 'PubMed' and 'PIE the search' (a specialized search engine for PPI-related articles); and 4) the PPI data are available as sif files for network visualization and analysis using 'Cytoscape'. Therefore, our TRIP Database 2.0 is an information hub that works toward advancing data-driven TRP channel research.",TRIP,0.917790532,TRansient receptor potential,0.710879376,TRIP,0.917790532,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/11/2012 +25187689,http://www.gbpuat-cbsh.ac.in/departments/bi/database/tripath,"TRIPATH: A Biological Genetic and Genomic Database of Three Economically Important Fungal Pathogen of Wheat - Rust: Smut: Bunt. Unlabelled Wheat, the major source of vegetable protein in human diet, provides staple food globally for a large proportion of the human population. With higher protein content than other major cereals, wheat has great socio- economic importance. Nonetheless for wheat, three important fungal pathogens i.e. rust, smut and bunt are major cause of significant yield losses throughout the world. Researchers are putting up a strong fight against devastating wheat pathogens, and have made progress in tracking and controlling disease outbreaks from East Africa to South Asia. The aim of the present work hence was to develop a fungal pathogens database dedicated to wheat, gathering information about different pathogen species and linking them to their biological classification, distribution and control. Towards this end, we developed an open access database Tripath: A biological, genetic and genomic database of economically important wheat fungal pathogens - rust: smut: bunt. Data collected from peer-reviewed publications and fungal pathogens were added to the customizable database through an extended relational design. The strength of this resource is in providing rapid retrieval of information from large volumes of text at a high degree of accuracy. Database TRIPATH is freely accessible. Availability http://www.gbpuat-cbsh.ac.in/departments/bi/database/tripath/",TRIPATH,0.990485728,of,0.707948923,TRIPATH,0.990485728,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/22/2014 +32047897,http://bio.licpathway.net/TRlnc,"TRlnc: a comprehensive database for human transcriptional regulatory information of lncRNAs. Long noncoding RNAs (lncRNAs) have been proven to play important roles in transcriptional processes and biological functions. With the increasing study of human diseases and biological processes, information in human H3K27ac ChIP-seq, ATAC-seq and DNase-seq datasets is accumulating rapidly, resulting in an urgent need to collect and process data to identify transcriptional regulatory regions of lncRNAs. We therefore developed a comprehensive database for human regulatory information of lncRNAs (TRlnc, http://bio.licpathway.net/TRlnc), which aimed to collect available resources of transcriptional regulatory regions of lncRNAs and to annotate and illustrate their potential roles in the regulation of lncRNAs in a cell type-specific manner. The current version of TRlnc contains 8√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ683√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ028 typical enhancers/super-enhancers and 32√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ348√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ244 chromatin accessibility regions associated with 91√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ906 human lncRNAs. These regions are identified from over 900 human H3K27ac ChIP-seq, ATAC-seq and DNase-seq samples. Furthermore, TRlnc provides the detailed genetic and epigenetic annotation information within transcriptional regulatory regions (promoter, enhancer/super-enhancer and chromatin accessibility regions) of lncRNAs, including common SNPs, risk SNPs, eQTLs, linkage disequilibrium SNPs, transcription factors, methylation sites, histone modifications and 3D chromatin interactions. It is anticipated that the use of TRlnc will help users to gain in-depth and useful insights into the transcriptional regulatory mechanisms of lncRNAs.",TRlnc,0.99749732,for human regulatory information of lncRNAs,0.931201637,TRlnc,0.99749732,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +24822057,http://trna.ie.niigata-u.ac.jp,"tRNADB-CE: tRNA gene database well-timed in the era of big sequence data. The tRNA gene data base curated by experts ""tRNADB-CE"" (http://trna.ie.niigata-u.ac.jp) was constructed by analyzing 1,966 complete and 5,272 draft genomes of prokaryotes, 171 viruses', 121 chloroplasts', and 12 eukaryotes' genomes plus fragment sequences obtained by metagenome studies of environmental samples. 595,115 tRNA genes in total, and thus two times of genes compiled previously, have been registered, for which sequence, clover-leaf structure, and results of sequence-similarity and oligonucleotide-pattern searches can be browsed. To provide collective knowledge with help from experts in tRNA researches, we added a column for enregistering comments to each tRNA. By grouping bacterial tRNAs with an identical sequence, we have found high phylogenetic preservation of tRNA sequences, especially at the phylum level. Since many species-unknown tRNAs from metagenomic sequences have sequences identical to those found in species-known prokaryotes, the identical sequence group (ISG) can provide phylogenetic markers to investigate the microbial community in an environmental ecosystem. This strategy can be applied to a huge amount of short sequences obtained from next-generation sequencers, as showing that tRNADB-CE is a well-timed database in the era of big sequence data. It is also discussed that batch-learning self-organizing-map with oligonucleotide composition is useful for efficient knowledge discovery from big sequence data.",tRNADB-CE,0.994249242,NA,0,tRNADB-CE,0.994249242,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/1/2014 +23161680,"http://tropgenedb.cirad.fr, http://southgreen.cirad.fr","TropGeneDB, the multi-tropical crop information system updated and extended. TropGeneDB (http://tropgenedb.cirad.fr) was created to store genetic, molecular and phenotypic data on tropical crop species. The most common data stored in TropGeneDB are molecular markers, quantitative trait loci, genetic and physical maps, genetic diversity, phenotypic diversity studies and information on genetic resources (geographic origin, parentage, collection). TropGeneDB is organized on a crop basis with currently nine public modules (banana, cocoa, coconut, coffee, cotton, oil palm, rice, rubber tree, sugarcane). Crop-specific Web consultation interfaces have been designed to allow quick consultations and personalized complex queries. TropGeneDB is a component of the South Green Bioinformatics Platform (http://southgreen.cirad.fr/).",TropGeneDB,0.997986138,NA,0,TropGeneDB,0.997986138,1,27987169,27987169,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/17/2012 +27987169,http://tropgenedb.cirad.fr,"Using TropGeneDB: A Database Containing Data on Molecular Markers, QTLs, Maps, Genotypes, and Phenotypes for Tropical Crops. TropGeneDB ( http://tropgenedb.cirad.fr ) is a web database that manages genomic, genetic, and phenotypic information on tropical crops. It is organized on a crop basis with currently nine public modules: banana, cocoa, coconut, coffee, cotton, oil palm, rice, rubber tree, and sugarcane. TropGeneDB contains data on molecular markers, quantitative trait loci (QTLs), genetic and physical maps, genotyping and phenotyping studies, and information on genetic resources (geographic origin, parentage, collection). Crop-specific web interfaces have been designed to allow quick consultations as well as personalized complex queries.",TropGeneDB,0.997896314,NA,0,TropGeneDB,0.997896314,1,23161680,23161680,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2017 +25025376,http://bioinf.hutton.ac.uk/tropiTree,"tropiTree: an NGS-based EST-SSR resource for 24 tropical tree species. The development of genetic tools for non-model organisms has been hampered by cost, but advances in next-generation sequencing (NGS) have created new opportunities. In ecological research, this raises the prospect for developing molecular markers to simultaneously study important genetic processes such as gene flow in multiple non-model plant species within complex natural and anthropogenic landscapes. Here, we report the use of bar-coded multiplexed paired-end Illumina NGS for the de novo development of expressed sequence tag-derived simple sequence repeat (EST-SSR) markers at low cost for a range of 24 tree species. Each chosen tree species is important in complex tropical agroforestry systems where little is currently known about many genetic processes. An average of more than 5,000 EST-SSRs was identified for each of the 24 sequenced species, whereas prior to analysis 20 of the species had fewer than 100 nucleotide sequence citations. To make results available to potential users in a suitable format, we have developed an open-access, interactive online database, tropiTree (http://bioinf.hutton.ac.uk/tropiTree), which has a range of visualisation and search facilities, and which is a model for the efficient presentation and application of NGS data.",tropiTree,0.987223446,NA,0,tropiTree,0.987223446,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/15/2014 +26066708,http://www.grnpedia.org/trrust,"TRRUST: a reference database of human transcriptional regulatory interactions. The reconstruction of transcriptional regulatory networks (TRNs) is a long-standing challenge in human genetics. Numerous computational methods have been developed to infer regulatory interactions between human transcriptional factors (TFs) and target genes from high-throughput data, and their performance evaluation requires gold-standard interactions. Here we present a database of literature-curated human TF-target interactions, TRRUST (transcriptional regulatory relationships unravelled by sentence-based text-mining, http://www.grnpedia.org/trrust), which currently contains 8,015 interactions between 748 TF genes and 1,975 non-TF genes. A sentence-based text-mining approach was employed for efficient manual curation of regulatory interactions from approximately 20 million Medline abstracts. To the best of our knowledge, TRRUST is the largest publicly available database of literature-curated human TF-target interactions to date. TRRUST also has several useful features: i) information about the mode-of-regulation; ii) tests for target modularity of a query TF; iii) tests for TF cooperativity of a query target; iv) inferences about cooperating TFs of a query TF; and v) prioritizing associated pathways and diseases with a query TF. We observed high enrichment of TF-target pairs in TRRUST for top-scored interactions inferred from high-throughput data, which suggests that TRRUST provides a reliable benchmark for the computational reconstruction of human TRNs.",TRRUST,0.997621059,regulatory relationships,0.536692739,TRRUST,0.997621059,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/12/2015 +25300491,http://www.metexplore.fr/trypanocyc,"TrypanoCyc: a community-led biochemical pathways database for Trypanosoma brucei. The metabolic network of a cell represents the catabolic and anabolic reactions that interconvert small molecules (metabolites) through the activity of enzymes, transporters and non-catalyzed chemical reactions. Our understanding of individual metabolic networks is increasing as we learn more about the enzymes that are active in particular cells under particular conditions and as technologies advance to allow detailed measurements of the cellular metabolome. Metabolic network databases are of increasing importance in allowing us to contextualise data sets emerging from transcriptomic, proteomic and metabolomic experiments. Here we present a dynamic database, TrypanoCyc (http://www.metexplore.fr/trypanocyc/), which describes the generic and condition-specific metabolic network of Trypanosoma brucei, a parasitic protozoan responsible for human and animal African trypanosomiasis. In addition to enabling navigation through the BioCyc-based TrypanoCyc interface, we have also implemented a network-based representation of the information through MetExplore, yielding a novel environment in which to visualise the metabolism of this important parasite.",TrypanoCyc,0.996331513,NA,0,TrypanoCyc,0.996331513,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/9/2014 +32380213,http://trypindb.biomedinformri.com,"TrypInDB: A searchable online resource of small molecule inhibitors against Trypanosoma sp. African Trypanosomiasis and American Trypanosomiasis are the diseases affecting more than thousands of people yearly and more than twenty-five million people risk acquiring the disease. The treatment for the disease is generally expensive, and most of the available drugs are of high-toxicity and cause fatal side-effects. Hence, there is a constant need for finding new treatment strategies for Trypanosomiasis. Combination therapy and repurposing or redesigning of existing inhibitors for new drugs are of high importance to address these hurdles, particularly the drug resistance. Hence, here we report TrypInDB, a searchable online resource of small molecule inhibitors having a varying degree of activity towards Trypanosoma sp. Information of about >14,000 small molecules from >700 published research articles was collected and made as an easy-to-search database. Four major sets of information were made available for each collected inhibitors viz., General information (activity values; source of the inhibitors; enzyme targets; etc.,), Structural information, Toxicity information, and Literature information. More than 25 different information about each inhibitor were collected or predicted and made accessible for searching. The database is designed to be queried easily with multiple-field filters with the provisions to perform sub-structure search and similar FDA approved drug searches. The database supports the easy export of queried records and structure in multiple formats. In addition, the TrypInDB is actively integrated into LeishInDB. We believe that the scope of TrypInDB permits the research community to exploit the available data for repurposing the inhibitors as well as for the investigation of new therapeutics. Database URL: http://trypindb.biomedinformri.com/.",TrypInDB,0.996800661,NA,0,TrypInDB,0.996800661,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/5/2020 +28158179,http://trypsNetDB.org,"TrypsNetDB: An integrated framework for the functional characterization of trypanosomatid proteins. Trypanosomatid parasites cause serious infections in humans and production losses in livestock. Due to the high divergence from other eukaryotes, such as humans and model organisms, the functional roles of many trypanosomatid proteins cannot be predicted by homology-based methods, rendering a significant portion of their proteins as uncharacterized. Recent technological advances have led to the availability of multiple systematic and genome-wide datasets on trypanosomatid parasites that are informative regarding the biological role(s) of their proteins. Here, we report TrypsNetDB (http://trypsNetDB.org), a web-based resource for the functional annotation of 16 different species/strains of trypanosomatid parasites. The database not only visualizes the network context of the queried protein(s) in an intuitive way but also examines the response of the represented network in more than 50 different biological contexts and its enrichment for various biological terms and pathways, protein sequence signatures, and potential RNA regulatory elements. The interactome core of the database, as of Jan 23, 2017, contains 101,187 interactions among 13,395 trypanosomatid proteins inferred from 97 genome-wide and focused studies on the interactome of these organisms.",TrypsNetDB,0.998068571,NA,0,TrypsNetDB,0.998068571,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/3/2017 +27543790,http://gb.whu.edu.cn/TSCD,"Comprehensive characterization of tissue-specific circular RNAs in the human and mouse genomes. Circular RNA (circRNA) is a group of RNA family generated by RNA circularization, which was discovered ubiquitously across different species and tissues. However, there is no global view of tissue specificity for circRNAs to date. Here we performed the comprehensive analysis to characterize the features of human and mouse tissue-specific (TS) circRNAs. We identified in total 302√ɬÉ√ǬÇ√ɬÇ√Ǭ†853 TS circRNAs in the human and mouse genome, and showed that the brain has the highest abundance of TS circRNAs. We further confirmed the existence of circRNAs by reverse transcription polymerase chain reaction (RT-PCR). We also characterized the genomic location and conservation of these TS circRNAs and showed that the majority of TS circRNAs are generated from exonic regions. To further understand the potential functions of TS circRNAs, we identified microRNAs and RNA binding protein, which might bind to TS circRNAs. This process suggested their involvement in development and organ differentiation. Finally, we constructed an integrated database TSCD (Tissue-Specific CircRNA Database: http://gb.whu.edu.cn/TSCD) to deposit the features of TS circRNAs. This study is the first comprehensive view of TS circRNAs in human and mouse, which shed light on circRNA functions in organ development and disorders.",TSCD,0.990743339,Tissue-Specific CircRNA Database,0.976619937,TSCD,0.990743339,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2017 +21253872,http://tsdb.cbi.pku.edu.cn,"TSdb: a database of transporter substrates linking metabolic pathways and transporter systems on a genome scale via their shared substrates. TSdb ( http://tsdb.cbi.pku.edu.cn ) is the first manually curated central repository that stores formatted information on the substrates of transporters. In total, 37608 transporters with 15075 substrates from 884 organisms were curated from UniProt functional annotation. A unique feature of TSdb is that all the substrates are mapped to identifiers from the KEGG Ligand compound database. Thus, TSdb links current metabolic pathway schema with compound transporter systems via the shared compounds in the pathways. Furthermore, all the transporter substrates in TSdb are classified according to their biochemical properties, biological roles and subcellular localizations. In addition to the functional annotation of transporters, extensive compound annotation that includes inhibitor information from the KEGG Ligand and BRENDA databases has been integrated, making TSdb a useful source for the discovery of potential inhibitory mechanisms linking transporter substrates and metabolic enzymes. User-friendly web interfaces are designed for easy access, query and download of the data. Text and BLAST searches against all transporters in the database are provided. We will regularly update the substrate data with evidence from new publications.",TSdb,0.996435046,NA,0,TSdb,0.996435046,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/21/2011 +31680168,http://bioinfo.uth.edu/TSEADB,"TSEA-DB: a trait-tissue association map for human complex traits and diseases. Assessing the causal tissues of human traits and diseases is important for better interpreting trait-associated genetic variants, understanding disease etiology, and improving treatment strategies. Here, we present a reference database for trait-associated tissue specificity based on genome-wide association study (GWAS) results, named Tissue-Specific Enrichment Analysis DataBase (TSEA-DB, available at https://bioinfo.uth.edu/TSEADB/). We collected GWAS summary statistics data for a wide range of human traits and diseases followed by rigorous quality control. The current version of TSEA-DB includes 4423 data sets from the UK Biobank (UKBB) and 596 from other resources (GWAS Catalog and literature mining), totaling 5019 unique GWAS data sets and 15 770 trait-associated gene sets. TSEA-DB aims to provide reference tissue(s) enriched with the genes from GWAS. To this end, we systematically performed a tissue-specific enrichment analysis using our recently developed tool deTS and gene expression profiles from two reference tissue panels: the GTEx panel (47 tissues) and the ENCODE panel (44 tissues). The comprehensive trait-tissue association results can be easily accessed, searched, visualized, analyzed, and compared across the studies and traits through our web site. TSEA-DB represents one of the many timely and comprehensive approaches in exploring human trait-tissue association.",TSEA-DB,0.995504111,Tissue-Specific Enrichment Analysis DataBase,0.810658699,TSEA-DB,0.995504111,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +"23066107, 26590405",http://bioinfo.mc.vanderbilt.edu/TSGene,"TSGene: a web resource for tumor suppressor genes. Tumor suppressor genes (TSGs) are guardian genes that play important roles in controlling cell proliferation processes such as cell-cycle checkpoints and inducing apoptosis. Identification of these genes and understanding their functions are critical for further investigation of tumorigenesis. So far, many studies have identified numerous TSGs and illustrated their functions in various types of tumors or normal samples. Furthermore, accumulating evidence has shown that non-coding RNAs can act as TSGs to prevent the tumorigenesis processes. Therefore, there is a growing demand to integrate TSGs with large-scale experimental evidence (e.g. gene expression and epigenetic signatures) to provide a comprehensive resource for further investigation of TSGs and their molecular mechanisms in cancer. To achieve this goal, we first developed a comprehensive literature-based database called TSGene (tumor suppressor gene database), freely available at http://bioinfo.mc.vanderbilt.edu/TSGene/. In the current release, TSGene contains 716 human (637 protein-coding and 79 non-coding genes), 628 mouse and 567 rat TSGs curated from UniProtKB, the Tumor Associated Gene database and 5795 PubMed abstracts. Additionally, the TSGene provides detailed annotations for each TSG, such as cancer mutations, gene expressions, methylation sites, TF regulations and protein-protein interactions.",TSGene,0.985564858,Tumor Suppressor Gene database,0.982487644,TSGene,0.985564858,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2015 +24889152,http://bioeng.swjtu.edu.cn/TSmiR,"Genome-wide survey of tissue-specific microRNA and transcription factor regulatory networks in 12 tissues. Tissue-specific miRNAs (TS miRNA) specifically expressed in particular tissues play an important role in tissue identity, differentiation and function. However, transcription factor (TF) and TS miRNA regulatory networks across multiple tissues have not been systematically studied. Here, we manually extracted 116 TS miRNAs and systematically investigated the regulatory network of TF-TS miRNA in 12 human tissues. We identified 2,347 TF-TS miRNA regulatory relations and revealed that most TF binding sites tend to enrich close to the transcription start site of TS miRNAs. Furthermore, we found TS miRNAs were regulated widely by non-tissue specific TFs and the tissue-specific expression level of TF have a close relationship with TF-genes regulation. Finally, we describe TSmiR (http://bioeng.swjtu.edu.cn/TSmiR), a novel and web-searchable database that houses interaction maps of TF-TS miRNA in 12 tissues. Taken together, these observations provide a new suggestion to better understand the regulatory network and mechanisms of TF-TS miRNAs underlying different tissues.",TSmiR,0.990324497,NA,0,TSmiR,0.990324497,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/3/2014 +30223042,http://biopharm.zju.edu.cn/tsnadb,"TSNAdb: A Database for Tumor-specific Neoantigens from Immunogenomics Data Analysis. Tumor-specific neoantigens have attracted much attention since they can be used as biomarkers to predict therapeutic effects of immune checkpoint blockade therapy and as potential targets for cancer immunotherapy. In this study, we developed a comprehensive tumor-specific neoantigen database (TSNAdb v1.0), based on pan-cancer immunogenomic analyses of somatic mutation data and human leukocyte antigen (HLA) allele information for 16 tumor types with 7748 tumor samples from The Cancer Genome Atlas (TCGA) and The Cancer Immunome Atlas (TCIA). We predicted binding affinities between mutant/wild-type peptides and HLA class I molecules by NetMHCpan v2.8/v4.0, and presented detailed information of 3,707,562/1,146,961 potential neoantigens generated by somatic mutations of all tumor samples. Moreover, we employed recurrent mutations in combination with highly frequent HLA alleles to predict potential shared neoantigens across tumor patients, which would facilitate the discovery of putative targets for neoantigen-based cancer immunotherapy. TSNAdb is freely available at http://biopharm.zju.edu.cn/tsnadb.",TSNAdb,0.982816732,tumor-specific neoantigen database,0.929707067,TSNAdb,0.982816732,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2018 +33068436,http://www.tsrbase.org,"tsRBase: a comprehensive database for expression and function of tsRNAs in multiple species. tRNA-derived small RNAs (tsRNAs) are a class of novel small RNAs, ubiquitously present in prokaryotes and eukaryotes. It has been reported that tsRNAs exhibit spatiotemporal expression patterns and can function as regulatory molecules in many biological processes. Current tsRNA databases only cover limited organisms and ignore tsRNA functional characteristics. Thus, integrating more relevant tsRNA information is helpful for further exploration. Here, we present a tsRNA database, named tsRBase, which integrates the expression pattern and functional information of tsRNAs in multiple species. In tsRBase, we identified 121 942 tsRNAs by analyzing more than 14 000 publicly available small RNA-seq data covering 20 species. This database collects samples from different tissues/cell-lines, or under different treatments and genetic backgrounds, thus helps depict specific expression patterns of tsRNAs under different conditions. Importantly, to enrich our understanding of biological significance, we collected tsRNAs experimentally validated from published literatures, obtained protein-binding tsRNAs from CLIP/RIP-seq data, and identified targets of tsRNAs from CLASH and CLEAR-CLIP data. Taken together, tsRBase is the most comprehensive and systematic tsRNA repository, exhibiting all-inclusive information of tsRNAs from diverse data sources of multiple species. tsRBase is freely available at http://www.tsrbase.org.",tsRBase,0.994211316,NA,0,tsRBase,0.994211316,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +27924015,http://tstmp.enzim.ttk.mta.hu,"TSTMP: target selection for structural genomics of human transmembrane proteins. The TSTMP database is designed to help the target selection of human transmembrane proteins for structural genomics projects and structure modeling studies. Currently, there are only 60 known 3D structures among the polytopic human transmembrane proteins and about a further 600 could be modeled using existing structures. Although there are a great number of human transmembrane protein structures left to be determined, surprisingly only a small fraction of these proteins have 'selected' (or above) status according to the current version the TargetDB/TargetTrack database. This figure is even worse regarding those transmembrane proteins that would contribute the most to the structural coverage of the human transmembrane proteome. The database was built by sorting out proteins from the human transmembrane proteome with known structure and searching for suitable model structures for the remaining proteins by combining the results of a state-of-the-art transmembrane specific fold recognition algorithm and a sequence similarity search algorithm. Proteins were searched for homologues among the human transmembrane proteins in order to select targets whose successful structure determination would lead to the best structural coverage of the human transmembrane proteome. The pipeline constructed for creating the TSTMP database guarantees to keep the database up-to-date. The database is available at http://tstmp.enzim.ttk.mta.hu.",TSTMP,0.996672034,NA,0,TSTMP,0.996672034,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/18/2016 +32512488,http://vit.ac.in/ttrmdb,"TTRMDB: A database for structural and functional analysis on the impact of SNPs over transthyretin (TTR) using bioinformatic tools. Hereditary Transthyretin-associated amyloidosis (ATTR) is an autosomal dominant protein-folding disorder with adult-onset caused by mutation of transthyretin (TTR). TTR is characterized by extracellular deposition of amyloid, leading to loss of autonomy and finally, death. More than 100 distinct mutations in TTR gene have been reported from variable age of onset, clinical expression and penetrance data. Besides, the cure for the disease remains still obscure. Further, the prioritizing of mutations concerning the characteristic features governing the stability and pathogenicity of TTR mutant proteins remains unanswered, to date and thus, a complex state of study for researchers. Herein, we provide a full report encompassing the effects of every reported mutant model of TTR protein about the stability, functionality and pathogenicity using various computational tools. In addition, the results obtained from our study were used to create TTRMDB (Transthyretin mutant database), which could be easy access to researchers at http://vit.ac.in/ttrmdb.",TTRMDB,0.990589499,Transthyretin mutant database,0.962966555,TTRMDB,0.990589499,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/25/2020 +25324314,http://ttsmi.bii.a-star.edu.sg,"The TTSMI database: a catalog of triplex target DNA sites associated with genes and regulatory elements in the human genome. A triplex target DNA site (TTS), a stretch of DNA that is composed of polypurines, is able to form a triple-helix (triplex) structure with triplex-forming oligonucleotides (TFOs) and is able to influence the site-specific modulation of gene expression and/or the modification of genomic DNA. The co-localization of a genomic TTS with gene regulatory signals and functional genome structures suggests that TFOs could potentially be exploited in antigene strategies for the therapy of cancers and other genetic diseases. Here, we present the TTS Mapping and Integration (TTSMI; http://ttsmi.bii.a-star.edu.sg) database, which provides a catalog of unique TTS locations in the human genome and tools for analyzing the co-localization of TTSs with genomic regulatory sequences and signals that were identified using next-generation sequencing techniques and/or predicted by computational models. TTSMI was designed as a user-friendly tool that facilitates (i) fast searching/filtering of TTSs using several search terms and criteria associated with sequence stability and specificity, (ii) interactive filtering of TTSs that co-localize with gene regulatory signals and non-B DNA structures, (iii) exploration of dynamic combinations of the biological signals of specific TTSs and (iv) visualization of a TTS simultaneously with diverse annotation tracks via the UCSC genome browser.",TTSMI,0.997173548,TTS Mapping and,0.7016765,TTSMI,0.997173548,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/16/2014 +24816183,http://tuberq.proteinq.com.ar,"TuberQ: a Mycobacterium tuberculosis protein druggability database. In 2012 an estimated 8.6 million people developed tuberculosis (TB) and 1.3 million died from the disease [including 320√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ000 deaths among human immunodeficiency virus (HIV)-positive people]. There is an urgent need for new anti-TB drugs owing to the following: the fact that current treatments have severe side effects, the increasing emergence of multidrug-resistant strains of Mycobacterium tuberculosis (Mtb), the negative drug-drug interactions with certain HIV (or other disease) treatments and the ineffectiveness against dormant Mtb. In this context we present here the TuberQ database, a novel resource for all researchers working in the field of drug development in TB. The main feature of TuberQ is to provide a druggability analysis of Mtb proteins in a consistent and effective manner, contributing to a better selection of potential drug targets for screening campaigns and the analysis of targets for structure-based drug design projects. The structural druggability analysis is combined with features related to the characteristics of putative inhibitor binding pockets and with functional and biological data of proteins. The structural analysis is performed on all available unique Mtb structures and high-quality structural homology-based models. This information is shown in an interactive manner, depicting the protein structure, the pockets and the associated characteristics for each protein. TuberQ also provides information about gene essentiality information, as determined from whole cell-based knockout experiments, and expression information obtained from microarray experiments done in different stress-related conditions. We hope that TuberQ will be a powerful tool for researchers working in TB and eventually will lead to the identification of novel putative targets and progresses in therapeutic activities. Database URL: http://tuberq.proteinq.com.ar/",TuberQ,0.99642241,NA,0,TuberQ,0.99642241,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/8/2014 +23594715,http://www.ncrnalab.com/TUMIR,"TUMIR: an experimentally supported database of microRNA deregulation in various cancers. Background MicroRNAs were found to play an important role in cancers and several literatures exist to describe the relationship between microRNA and cancer, but the expression pattern was still faintly. There is a need for a comprehensive collection and summary of the interactions under experimental support. Description TUMIR (http://www.ncrnalab.com/TUMIR/), a manually extracted database of experimentally supported microRNA-cancer relationship, aims at providing a large, high-quality, validated comprehensive resource of microRNA deregulation in various cancers. The current version includes a systematic literature search to May-1-2012 using PubMed database, contains data extracted from 205 literatures and 1163 entries describing a regulatory interaction between human microRNAs and cancers. Each entry in the database contains the details of microRNA name, the disease name, case number, control number, p value, the experimentally validated targets, sample type, and a brief description of patients' clinic pathologic parameters mentioned in the same paper. The website has several extensive external links to the related websites and any requests can be made by emailing to tumir_pumc@163.com. Conclusion TUMIR is an open access website and will be an accurate clue for the researchers who are interested in better understanding the relationship between miRNAs and cancer.",TUMIR,0.997981846,NA,0,TUMIR,0.997981846,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/17/2013 +22523575,http://crdd.osdd.net/raghava/tumorhope,"TumorHoPe: a database of tumor homing peptides. Background Cancer is responsible for millions of immature deaths every year and is an economical burden on developing countries. One of the major challenges in the present era is to design drugs that can specifically target tumor cells not normal cells. In this context, tumor homing peptides have drawn much attention. These peptides are playing a vital role in delivering drugs in tumor tissues with high specificity. In order to provide service to scientific community, we have developed a database of tumor homing peptides called TumorHoPe. Description TumorHoPe is a manually curated database of experimentally validated tumor homing peptides that specifically recognize tumor cells and tumor associated microenvironment, i.e., angiogenesis. These peptides were collected and compiled from published papers, patents and databases. Current release of TumorHoPe contains 744 peptides. Each entry provides comprehensive information of a peptide that includes its sequence, target tumor, target cell, techniques of identification, peptide receptor, etc. In addition, we have derived various types of information from these peptide sequences that include secondary/tertiary structure, amino acid composition, and physicochemical properties of peptides. Peptides in this database have been found to target different types of tumors that include breast, lung, prostate, melanoma, colon, etc. These peptides have some common motifs including RGD (Arg-Gly-Asp) and NGR (Asn-Gly-Arg) motifs, which specifically recognize tumor angiogenic markers. TumorHoPe has been integrated with many web-based tools like simple/complex search, database browsing and peptide mapping. These tools allow a user to search tumor homing peptides based on their amino acid composition, charge, polarity, hydrophobicity, etc. Conclusion TumorHoPe is a unique database of its kind, which provides comprehensive information about experimentally validated tumor homing peptides and their target cells. This database will be very useful in designing peptide-based drugs and drug-delivery system. It is freely available at http://crdd.osdd.net/raghava/tumorhope/.",TumorHoPe,0.990183711,NA,0,TumorHoPe,0.990183711,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/16/2012 +33993461,http://i.uestc.edu.cn/tupdb,"TUPDB: Target-Unrelated Peptide Data Bank. The isolation of target-unrelated peptides (TUPs) through biopanning remains as a major problem of phage display selection experiments. These TUPs do not have any actual affinity toward targets of interest, which tend to be mistakenly identified as target-binding peptides. Therefore, an information portal for storing TUP data is urgently needed. Here, we present a TUP data bank (TUPDB), which is a comprehensive, manually curated database of approximately 73 experimentally verified TUPs and 1963 potential TUPs collected from TUPScan, the BDB database, and public research articles. The TUPScan tool has been integrated in TUPDB to facilitate TUP analysis. We believe that TUPDB can help identify and remove TUPs in future reports in the biopanning community. The database is of great importance to improving the quality of phage display-based epitope mapping and promoting the development of vaccines, diagnostics, and therapeutics. The TUPDB database is available at http://i.uestc.edu.cn/tupdb .",TUPDB,0.994650185,Target-Unrelated Peptide Data Bank,0.885041343,TUPDB,0.994650185,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/16/2021 +31931895,http://twinsmxofficial.unam.mx,"TwinsMX: Uncovering the Basis of Health and Disease in the Mexican Population. TwinsMX is a national twin registry in Mexico recently created with institutional support from the Universidad Nacional Aut√ɬÉ√ǬÉ√ɬÇ√Ǭ≥noma de M√ɬÉ√ǬÉ√ɬÇ√Ǭ©xico. It aims to serve as a platform to advance epidemiological and genetic research in the country and to disentangle the genetic and environmental contributions to health and disease in the admixed Mexican population. Here, we describe our recruitment and data collection strategies and discuss both the progress to date and future directions. More information about the registry is available on our website: https://twinsmxofficial.unam.mx/ (content in Spanish).",TwinsMX,0.978481114,NA,0,TwinsMX,0.978481114,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/1/2019 +25935546,http://www.e-bioinformatics.net/ucare,"u-CARE: user-friendly Comprehensive Antibiotic resistance Repository of Escherichia coli. Background and aims Despite medical advancements, Escherichia coli-associated infections remain a major public health concern and although an abundant information about E. coli and its antibiotic resistance mechanisms is available, no effective tool exists that integrates gene and genomic data in context to drug resistance, thus raising a need to develop a repository that facilitates integration and assimilation of factors governing drug resistance in E. coli. Descriptions User-friendly Comprehensive Antibiotic resistance Repository of Escherichia coli (u-CARE) is a manually curated catalogue of 52 antibiotics with reported resistance, 107 genes, transcription factors and single nucleotide polymorphism (SNPs) involved in multiple drug resistance of this pathogen. Each gene page provides detailed information about its resistance mechanisms, while antibiotic page consists of summary, chemical description and structural descriptors with links to external public databases like GO, CDD, DEG, Ecocyc, KEGG, Drug Bank, PubChem and UniProt. Moreover, the database integrates this reductive information to holistic data such as strain-specific and segment-specific pathogenic islands and operons. In addition, the database offers rich user interface for the visualisation and retrieval of information using various search criteria such as sequence, keyword, image and class search. Conclusions u-CARE is aimed to cater to the needs of researchers working in the field of antimicrobial drug resistance with minimal knowledge of bioinformatics. This database is also intended as a guide book to medical practitioners to avoid use of antibiotics against which resistance has already been reported in E. coli. The database is available from: http://www.e-bioinformatics.net/ucare.",u-CARE,0.98301971,Comprehensive Antibiotic resistance Repository of Escherichia coli,0.747757421,u-CARE,0.98301971,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/2/2015 +30601939,http://ubihub.thesgc.org,"UbiHub: a data hub for the explorers of ubiquitination pathways. Motivation Protein ubiquitination plays a central role in important cellular machineries such as protein degradation or chromatin-mediated signaling. With the recent discovery of the first potent ubiquitin-specific protease inhibitors, and the maturation of proteolysis targeting chimeras as promising chemical tools to exploit the ubiquitin-proteasome system, protein target classes associated with ubiquitination pathways are becoming the focus of intense drug-discovery efforts. Results We have developed UbiHub, an online resource that can be used to visualize a diverse array of biological, structural and chemical data on phylogenetic trees of human protein families involved in ubiquitination signaling, including E3 ligases and deubiquitinases. This interface can inform target prioritization and drug design, and serves as a navigation tool for medicinal chemists, structural and cell biologists exploring ubiquitination pathways. Availability and implementation https://ubihub.thesgc.org.",UbiHub,0.998238921,NA,0,UbiHub,0.998238921,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2019 +27114492,http://csb.cse.yzu.edu.tw/UbiNet,"UbiNet: an online resource for exploring the functional associations and regulatory networks of protein ubiquitylation. . Protein ubiquitylation catalyzed by E3 ubiquitin ligases are crucial in the regulation of many cellular processes. Owing to the high throughput of mass spectrometry-based proteomics, a number of methods have been developed for the experimental determination of ubiquitylation sites, leading to a large collection of ubiquitylation data. However, there exist no resources for the exploration of E3-ligase-associated regulatory networks of for ubiquitylated proteins in humans. Therefore, the UbiNet database was developed to provide a full investigation of protein ubiquitylation networks by incorporating experimentally verified E3 ligases, ubiquitylated substrates and protein-protein interactions (PPIs). To date, UbiNet has accumulated 43 948 experimentally verified ubiquitylation sites from 14 692 ubiquitylated proteins of humans. Additionally, we have manually curated 499 E3 ligases as well as two E1 activating and 46 E2 conjugating enzymes. To delineate the regulatory networks among E3 ligases and ubiquitylated proteins, a total of 430 530 PPIs were integrated into UbiNet for the exploration of ubiquitylation networks with an interactive network viewer. A case study demonstrated that UbiNet was able to decipher a scheme for the ubiquitylation of tumor proteins p63 and p73 that is consistent with their functions. Although the essential role of Mdm2 in p53 regulation is well studied, UbiNet revealed that Mdm2 and additional E3 ligases might be implicated in the regulation of other tumor proteins by protein ubiquitylation. Moreover, UbiNet could identify potential substrates for a specific E3 ligase based on PPIs and substrate motifs. With limited knowledge about the mechanisms through which ubiquitylated proteins are regulated by E3 ligases, UbiNet offers users an effective means for conducting preliminary analyses of protein ubiquitylation. The UbiNet database is now freely accessible via http://csb.cse.yzu.edu.tw/UbiNet/ The content is regularly updated with the literature and newly released data.Database URL: http://csb.cse.yzu.edu.tw/UbiNet/.",UbiNet,0.996777952,NA,0,UbiNet,0.996777952,1,NA,33693667,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,4/25/2016 +33693667,http://awi.cuhk.edu.cn,"UbiNet 2.0: a verified, classified, annotated and updated database of E3 ubiquitin ligase-substrate interactions. . Ubiquitination is an important post-translational modification, which controls protein turnover by labeling malfunctional and redundant proteins for proteasomal degradation, and also serves intriguing non-proteolytic regulatory functions. E3 ubiquitin ligases, whose substrate specificity determines the recognition of target proteins of ubiquitination, play crucial roles in ubiquitin-proteasome system. UbiNet 2.0 is an updated version of the database UbiNet. It contains 3332 experimentally verified E3-substrate interactions (ESIs) in 54 organisms and rich annotations useful for investigating the regulation of ubiquitination and the substrate specificity of E3 ligases. Based on the accumulated ESIs data, the recognition motifs in substrates for each E3 were also identified and a functional enrichment analysis was conducted on the collected substrates. To facilitate the research on ESIs with different categories of E3 ligases, UbiNet 2.0 performed strictly evidence-based classification of the E3 ligases in the database based on their mechanisms of ubiquitin transfer and substrate specificity. The platform also provides users with an interactive tool that can visualize the ubiquitination network of a group of self-defined proteins, displaying ESIs and protein-protein interactions in a graphical manner. The tool can facilitate the exploration of inner regulatory relationships mediated by ubiquitination among proteins of interest. In summary, UbiNet 2.0 is a user-friendly web-based platform that provides comprehensive as well as updated information about experimentally validated ESIs and a visualized tool for the construction of ubiquitination regulatory networks available at http://awi.cuhk.edu.cn/~ubinet/index.php.",UbiNet,0.988490701,NA,0,UbiNet,0.988490701,1,NA,27114492,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URLs DO NOT RESOLVE,3/1/2021 +26199991,http://seiwertlab.uchicago.edu/UCDB,"Ulcerative Colitis Database: An Integrated Database and Toolkit for Gene Function and Medication Involved in Ulcerative Colitis. Background Over the last decade, a massive amount of well-annotated genomic data has been accumulated on the pathogenesis and therapies for ulcerative colitis (UC). However, a comprehensive repository is not available yet. Methods Ulcerative Colitis Database (UCDB) was constructed using text mining followed by manually curating on the literature to collect the reliable information of UC-related genes, drugs, and susceptibility loci. UC DNA microarray data were collected. R packages were used to implement gene expression analysis toolkit. Results UCDB includes 4 separate but closely related components: ""UC GENE,"" ""UC DRUG,"" ""UC LOCUS,"" and ""UC ANALYSIS."" The UC GENE contains comprehensive information for 1151 UC-related genes manually curated from 2919 publications. The UC DRUG includes information for 248 drugs manually curated from 2344 publications. ""UC LOCUS"" includes 110 UC susceptibility SNP loci, which were collected from 12 Genome-Wide Association Studies. A comprehensive expression quantitative trait loci browser was also implemented. The UC ANALYSIS is an expression analysis toolkit for 37 UC expression array data sets, which contains 1098 samples. The toolkit can be used to do gene expression correlation, clustering, differentially expressed, and Gene Set Enrichment Analysis (GSEA). Conclusions UCDB provides a comprehensive collection of well-curated UC-related genes and drugs, and straightforward interfaces for gene expression analyses. UCDB is a useful leading resource for both basic and clinical research and will benefit UC community worldwide. UCDB is freely accessible at http://seiwertlab.uchicago.edu/UCDB.",UCDB,0.994101167,Ulcerative Colitis Database,0.981074442,UCDB,0.994101167,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/1/2015 +22881376,http://grenada.lumc.nl/LOVD2/UCL-Heart/home.php?select_db=LDLR,"Low-density lipoprotein receptor gene familial hypercholesterolemia variant database: update and pathological assessment. Familial hypercholesterolemia (FH) is caused predominately by variants in the low-density lipoprotein receptor gene (LDLR). We report here an update of the UCL LDLR variant database to include variants reported in the literature and in-house between 2008 and 2010, transfer of the database to LOVDv.2.0 platform (https://grenada.lumc.nl/LOVD2/UCL-Heart/home.php?select_db=LDLR) and pathogenicity analysis. The database now contains over 1288 different variants reported in FH patients: 55% exonic substitutions, 22% exonic small rearrangements (<100 bp), 11% large rearrangements (>100 bp), 2% promoter variants, 10% intronic variants and 1 variant in the 3' untranslated sequence. The distribution and type of newly reported variants closely matches that of the 2008 database, and we have used these variants (n= 223) as a representative sample to assess the utility of standard open access software (PolyPhen, SIFT, refined SIFT, Neural Network Splice Site Prediction Tool, SplicePort and NetGene2) and additional analyses (Single Amino Acid Polymorphism database, analysis of conservation and structure and Mutation Taster) for pathogenicity prediction. In combination, these techniques have enabled us to assign with confidence pathogenic predictions to 8/8 in-frame small rearrangements and 8/9 missense substitutions with previously discordant results from PolyPhen and SIFT analysis. Overall, we conclude that 79% of the reported variants are likely to be disease causing.",UCL LDLR,0.977101341,lipoprotein receptor gene familial hypercholesterolemia variant database,0.635834286,UCL LDLR,0.977101341,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/1/2012 +23226127,http://umcd.humanconnectomeproject.org,"The UCLA multimodal connectivity database: a web-based platform for brain connectivity matrix sharing and analysis. Brain connectomics research has rapidly expanded using functional MRI (fMRI) and diffusion-weighted MRI (dwMRI). A common product of these varied analyses is a connectivity matrix (CM). A CM stores the connection strength between any two regions (""nodes"") in a brain network. This format is useful for several reasons: (1) it is highly distilled, with minimal data size and complexity, (2) graph theory can be applied to characterize the network's topology, and (3) it retains sufficient information to capture individual differences such as age, gender, intelligence quotient (IQ), or disease state. Here we introduce the UCLA Multimodal Connectivity Database (http://umcd.humanconnectomeproject.org), an openly available website for brain network analysis and data sharing. The site is a repository for researchers to publicly share CMs derived from their data. The site also allows users to select any CM shared by another user, compute graph theoretical metrics on the site, visualize a report of results, or download the raw CM. To date, users have contributed over 2000 individual CMs, spanning different imaging modalities (fMRI, dwMRI) and disorders (Alzheimer's, autism, Attention Deficit Hyperactive Disorder). To demonstrate the site's functionality, whole brain functional and structural connectivity matrices are derived from 60 subjects' (ages 26-45) resting state fMRI (rs-fMRI) and dwMRI data and uploaded to the site. The site is utilized to derive graph theory global and regional measures for the rs-fMRI and dwMRI networks. Global and nodal graph theoretical measures between functional and structural networks exhibit low correspondence. This example demonstrates how this tool can enhance the comparability of brain networks from different imaging modalities and studies. The existence of this connectivity-based repository should foster broader data sharing and enable larger-scale meta-analyses comparing networks across imaging modality, age group, and disease state.",NA,0,UCLA Multimodal Connectivity Database,0.900062852,UCLA Multimodal Connectivity Database,0.900062852,1,26311606,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,11/28/2012 +23193254,http://ccg.vital-it.ch/UCNEbase,"UCNEbase--a database of ultraconserved non-coding elements and genomic regulatory blocks. UCNEbase (http://ccg.vital-it.ch/UCNEbase) is a free, web-accessible information resource on the evolution and genomic organization of ultra-conserved non-coding elements (UCNEs). It currently covers 4351 such elements in 18 different species. The majority of UCNEs are supposed to be transcriptional regulators of key developmental genes. As most of them occur as clusters near potential target genes, the database is organized along two hierarchical levels: individual UCNEs and ultra-conserved genomic regulatory blocks (UGRBs). UCNEbase introduces a coherent nomenclature for UCNEs reflecting their respective associations with likely target genes. Orthologous and paralogous UCNEs share components of their names and are systematically cross-linked. Detailed synteny maps between the human and other genomes are provided for all UGRBs. UCNEbase is managed by a relational database system and can be accessed by a variety of web-based query pages. As it relies on the UCSC genome browser as visualization platform, a large part of its data content is also available as browser viewable custom track files. UCNEbase is potentially useful to any computational, experimental or evolutionary biologist interested in conserved non-coding DNA elements in vertebrates.",UCNEbase,0.997873008,NA,0,UCNEbase,0.997873008,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/27/2012 +27899642,http://genome.ucsc.edu,"The UCSC Genome Browser database: 2017 update. Since its 2001 debut, the University of California, Santa Cruz (UCSC) Genome Browser (http://genome.ucsc.edu/) team has provided continuous support to the international genomics and biomedical communities through a web-based, open source platform designed for the fast, scalable display of sequence alignments and annotations landscaped against a vast collection of quality reference genome assemblies. The browser's publicly accessible databases are the backbone of a rich, integrated bioinformatics tool suite that includes a graphical interface for data queries and downloads, alignment programs, command-line utilities and more. This year's highlights include newly designed home and gateway pages; a new 'multi-region' track display configuration for exon-only, gene-only and custom regions visualization; new genome browsers for three species (brown kiwi, crab-eating macaque and Malayan flying lemur); eight updated genome assemblies; extended support for new data types such as CRAM, RNA-seq expression data and long-range chromatin interaction pairs; and the unveiling of a new supported mirror site in Japan.",UCSC,0.904486577,Genome Browser,0.54243503,UCSC,0.904486577,1,"22086951.0, 23155063.0, 24270787.0, 25428374.0, 30407534.0, 26590259.0, 33221922.0","24270787.0, 25685613.0, 23109555.0, 25392408.0",low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,11/29/2016 +25685613,http://genome.ucsc.edu/ebolaPortal,"The UCSC Ebola Genome Portal. . With the Ebola epidemic raging out of control in West Africa, there has been a flurry of research into the Ebola virus, resulting in the generation of much genomic data. In response to the clear need for tools that integrate multiple strands of research around molecular sequences, we have created the University of California Santa Cruz (UCSC) Ebola Genome Browser, an adaptation of our popular UCSC Genome Browser web tool, which can be used to view the Ebola virus genome sequence from GenBank and nearly 30 annotation tracks generated by mapping external data to the reference sequence. Significant annotations include a multiple alignment comprising 102 Ebola genomes from the current outbreak, 56 from previous outbreaks, and 2 Marburg genomes as an outgroup; a gene track curated by NCBI; protein annotations curated by UniProt and antibody-binding epitopes curated by IEDB. We have extended the Genome Browser's multiple alignment color-coding scheme to distinguish mutations resulting from non-synonymous coding changes, synonymous changes, or changes in untranslated regions. Our Ebola Genome portal at http://genome.ucsc.edu/ebolaPortal/ links to the Ebola virus Genome Browser and an aggregate of useful information, including a collection of Ebola antibodies we are curating.",UCSC,0.776047568,Genome,0.4657709,UCSC,0.776047568,1,NA,"24270787.0, 27899642.0, 23109555.0, 25392408.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,11/7/2014 +"23109555, 25392408",http://genome-cancer.ucsc.edu,"The UCSC Cancer Genomics Browser: update 2013. The UCSC Cancer Genomics Browser (https://genome-cancer.ucsc.edu/) is a set of web-based tools to display, investigate and analyse cancer genomics data and its associated clinical information. The browser provides whole-genome to base-pair level views of several different types of genomics data, including some next-generation sequencing platforms. The ability to view multiple datasets together allows users to make comparisons across different data and cancer types. Biological pathways, collections of genes, genomic or clinical information can be used to sort, aggregate and zoom into a group of samples. We currently display an expanding set of data from various sources, including 201 datasets from 22 TCGA (The Cancer Genome Atlas) cancers as well as data from Cancer Cell Line Encyclopedia and Stand Up To Cancer. New features include a completely redesigned user interface with an interactive tutorial and updated documentation. We have also added data downloads, additional clinical heatmap features, and an updated Tumor Image Browser based on Google Maps. New security features allow authenticated users access to private datasets hosted by several different consortia through the public website.",UCSC,0.761313081,Genomics Browser,0.679223597,UCSC,0.761313081,2,NA,"24270787.0, 25685613.0, 27899642.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,11/11/2014 +24270787,"http://genome.ucsc.edu, http://genome-euro.ucsc.edu","The UCSC Genome Browser database: 2014 update. The University of California Santa Cruz (UCSC) Genome Browser (http://genome.ucsc.edu) offers online public access to a growing database of genomic sequence and annotations for a large collection of organisms, primarily vertebrates, with an emphasis on the human and mouse genomes. The Browser's web-based tools provide an integrated environment for visualizing, comparing, analysing and sharing both publicly available and user-generated genomic data sets. As of September 2013, the database contained genomic sequence and a basic set of annotation 'tracks' for √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº90 organisms. Significant new annotations include a 60-species multiple alignment conservation track on the mouse, updated UCSC Genes tracks for human and mouse, and several new sets of variation and ENCODE data. New software tools include a Variant Annotation Integrator that returns predicted functional effects of a set of variants uploaded as a custom track, an extension to UCSC Genes that displays haplotype alleles for protein-coding genes and an expansion of data hubs that includes the capability to display remotely hosted user-provided assembly sequence in addition to annotation data. To improve European access, we have added a Genome Browser mirror (http://genome-euro.ucsc.edu) hosted at Bielefeld University in Germany.",UCSC,0.757500827,Cruz,0.51517272,UCSC,0.757500827,1,"22086951.0, 23155063.0, 25428374.0, 27899642.0, 30407534.0, 26590259.0, 33221922.0","25685613.0, 27899642.0, 23109555.0, 25392408.0",low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,11/21/2013 +22080555,http://archaea.ucsc.edu,"The UCSC Archaeal Genome Browser: 2012 update. The UCSC Archaeal Genome Browser (http://archaea.ucsc.edu) offers a graphical web-based resource for exploration and discovery within archaeal and other selected microbial genomes. By bringing together existing gene annotations, gene expression data, multiple-genome alignments, pre-computed sequence comparisons and other specialized analysis tracks, the genome browser is a powerful aggregator of varied genomic information. The genome browser environment maintains the current look-and-feel of the vertebrate UCSC Genome Browser, but also integrates archaeal and bacterial-specific tracks with a few graphic display enhancements. The browser currently contains 115 archaeal genomes, plus 31 genomes of viruses known to infect archaea. Some of the recently developed or enhanced tracks visualize data from published high-throughput RNA-sequencing studies, the NCBI Conserved Domain Database, sequences from pre-genome sequencing studies, predicted gene boundaries from three different protein gene prediction algorithms, tRNAscan-SE gene predictions with RNA secondary structures and CRISPR locus predictions. We have also developed a companion resource, the Archaeal COG Browser, to provide better search and display of arCOG gene function classifications, including their phylogenetic distribution among available archaeal genomes.",arCOG,0.772361755,UCSC Archaeal Genome Browser,0.798626341,UCSC Archaeal Genome Browser,0.798626341,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/12/2011 +30407534,http://genome.ucsc.edu,"The UCSC Genome Browser database: 2019 update. The UCSC Genome Browser (https://genome.ucsc.edu) is a graphical viewer for exploring genome annotations. For almost two decades, the Browser has provided visualization tools for genetics and molecular biology and continues to add new data and features. This year, we added a new tool that lets users interactively arrange existing graphing tracks into new groups. Other software additions include new formats for chromosome interactions, a ChIP-Seq peak display for track hubs and improved support for HGVS. On the annotation side, we have added gnomAD, TCGA expression, RefSeq Functional elements, GTEx eQTLs, CRISPR Guides, SNPpedia and created a 30-way primate alignment on the human genome. Nine assemblies now have RefSeq-mapped gene models.",UCSC Genome,0.559859134,Genome Browser,0.479805579,UCSC Genome,0.559859134,1,"22086951.0, 23155063.0, 24270787.0, 25428374.0, 27899642.0, 26590259.0, 33221922.0",NA,low_prob_best_name,remove,conflicting record(s) to be removed,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2019 +"26590259, 33221922",http://genome.ucsc.edu,"The UCSC Genome Browser database: 2016 update. For the past 15 years, the UCSC Genome Browser (http://genome.ucsc.edu/) has served the international research community by offering an integrated platform for viewing and analyzing information from a large database of genome assemblies and their associated annotations. The UCSC Genome Browser has been under continuous development since its inception with new data sets and software features added frequently. Some release highlights of this year include new and updated genome browsers for various assemblies, including bonobo and zebrafish; new gene annotation sets; improvements to track and assembly hub support; and a new interactive tool, the ""Data Integrator"", for intersecting data from multiple tracks. We have greatly expanded the data sets available on the most recent human assembly, hg38/GRCh38, to include updated gene prediction sets from GENCODE, more phenotype- and disease-associated variants from ClinVar and ClinGen, more genomic regulatory data, and a new multiple genome alignment.",UCSC Genome Browser,0.851264405,UCSC Genome Browser,0.851264405,UCSC Genome Browser,0.851264405,2,"22086951.0, 23155063.0, 24270787.0, 25428374.0, 27899642.0, 30407534.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,1/1/2021 +25428374,http://genome.ucsc.edu,"The UCSC Genome Browser database: 2015 update. Launched in 2001 to showcase the draft human genome assembly, the UCSC Genome Browser database (http://genome.ucsc.edu) and associated tools continue to grow, providing a comprehensive resource of genome assemblies and annotations to scientists and students worldwide. Highlights of the past year include the release of a browser for the first new human genome reference assembly in 4 years in December 2013 (GRCh38, UCSC hg38), a watershed comparative genomics annotation (100-species multiple alignment and conservation) and a novel distribution mechanism for the browser (GBiB: Genome Browser in a Box). We created browsers for new species (Chinese hamster, elephant shark, minke whale), 'mined the web' for DNA sequences and expanded the browser display with stacked color graphs and region highlighting. As our user community increasingly adopts the UCSC track hub and assembly hub representations for sharing large-scale genomic annotation data sets and genome sequencing projects, our menu of public data hubs has tripled.",UCSC Genome,0.747623608,UCSC Genome Browser database,0.78975298,UCSC Genome Browser database,0.78975298,1,"22086951.0, 23155063.0, 24270787.0, 27899642.0, 30407534.0, 26590259.0, 33221922.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,11/26/2014 +26590254,http://mammoth.bcm.tmc.edu/uet,"UET: a database of evolutionarily-predicted functional determinants of protein sequences that cluster as functional sites in protein structures. The structure and function of proteins underlie most aspects of biology and their mutational perturbations often cause disease. To identify the molecular determinants of function as well as targets for drugs, it is central to characterize the important residues and how they cluster to form functional sites. The Evolutionary Trace (ET) achieves this by ranking the functional and structural importance of the protein sequence positions. ET uses evolutionary distances to estimate functional distances and correlates genotype variations with those in the fitness phenotype. Thus, ET ranks are worse for sequence positions that vary among evolutionarily closer homologs but better for positions that vary mostly among distant homologs. This approach identifies functional determinants, predicts function, guides the mutational redesign of functional and allosteric specificity, and interprets the action of coding sequence variations in proteins, people and populations. Now, the UET database offers pre-computed ET analyses for the protein structure databank, and on-the-fly analysis of any protein sequence. A web interface retrieves ET rankings of sequence positions and maps results to a structure to identify functionally important regions. This UET database integrates several ways of viewing the results on the protein sequence or structure and can be found at http://mammoth.bcm.tmc.edu/uet/.",UET,0.958269477,NA,0,UET,0.958269477,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/20/2015 +32548865,http://www.immunologicaltoolbox.co.uk,"The UK Veterinary Immunological Toolbox Website: promoting vaccine research by facilitating communication and removing reagent barriers. Using the best animal models to study immune responses against specific pathogens or vaccines can dramatically accelerate our understanding. Veterinary species are well studied, particularly livestock, to reduce their disease burden. They have also proven to be powerful models, especially for zoonotic pathogens and novel vaccination strategies. A prerequisite for any model selection is having the right quality and range of species-specific immunological reagents. To help promote the widest possible use of veterinary species, an open access website (https://www.immunologicaltoolbox.co.uk) has been created as a central community annotated hub for veterinary immunological reagents. The website is also the portal into services offered by the UK Immunological Toolbox project that includes antibody generation, sequencing and recombinant expression. The funding for this effort is linked into sustainable sources, but ultimate success relies on community engagement to continually increase the quality and quantity of information. It is hoped that as more users and reagent owners engage, it will become an essential resource for researchers, veterinarians and clinicians alike by removing barriers that prevent the use of the most informative animal models.",UK Immunological Toolbox,0.771031559,NA,0,UK Immunological Toolbox,0.771031559,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/29/2020 +26311606,http://umcd.humanconnectomeproject.org,"Connected brains and minds--The UMCD repository for brain connectivity matrices. We describe the USC Multimodal Connectivity Database (http://umcd.humanconnectomeproject.org), an interactive web-based platform for brain connectivity matrix sharing and analysis. The site enables users to download connectivity matrices shared by other users, upload matrices from their own published studies, or select a specific matrix and perform a real-time graph theory-based analysis and visualization of network properties. The data shared on the site span a broad spectrum of functional and structural brain connectivity information from humans across the entire age range (fetal to age 89), representing an array of different neuropsychiatric and neurodegenerative disease populations (autism spectrum disorder, ADHD, and APOE-4 carriers). An analysis combining 7 different datasets shared on the site illustrates the diversity of the data and the potential for yielding deeper insight by assessing new connectivity matrices with respect to population-wide network properties represented in the UMCD.",UMCD,0.88225615,Connectivity,0.573278666,UMCD,0.88225615,1,23226127,NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,8/24/2015 +"23161690, 24665023",http://p53.fr,"The TP53 website: an integrative resource centre for the TP53 mutation database and TP53 mutant analysis. A novel resource centre for TP53 mutations and mutants has been developed (http://p53.fr). TP53 gene dysfunction can be found in the majority of human cancer types. The potential use of TP53 mutation as a biomarker for clinical studies or exposome analysis has led to the publication of thousands of reports describing the TP53 gene status in >10,000 tumours. The UMD TP53 mutation database was created in 1990 and has been regularly updated. The 2012 release of the database has been carefully curated, and all suspicious reports have been eliminated. It is available either as a flat file that can be easily manipulated or as novel multi-platform analytical software that has been designed to analyse various aspects of TP53 mutations. Several tools to ascertain TP53 mutations are also available for download. We have developed TP53MULTLoad, a manually curated database providing comprehensive details on the properties of 2549 missense TP53 mutants. More than 100,000 entries have been arranged in 39 different activity fields, such as change of transactivation on various promoters, apoptosis or growth arrest. For several hot spot mutants, multiple gain of function activities are also included. The database can be easily browsed via a graphical user interface.",UMD,0.597225606,NA,0,UMD,0.597225606,2,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,6/1/2014 +24599579,http://www.umd.be/APC,"The UMD-APC database, a model of nation-wide knowledge base: update with data from 3,581 variations. Familial adenomatous polyposis (FAP) is a rare autosomal-inherited disease that highly predisposes to colorectal cancer, characterized by a diffuse duodenal and colorectal polyposis associated with various extradigestive tumors and linked to germline mutations within the APC gene. A French consortium of laboratories involved in APC mutation screening has progressively improved the description of the variation spectrum, inferred functional significance of nontruncating variations, and delineated phenotypic characteristics of the disease. The current version of the UMD-APC database is described here. The total number of variations has risen to 5,453 representing 1,473 distinct variations. The published records initially registered into the database were extended with 3,581 germline variations found through genetic testing performed by the eight licensed laboratories belonging to the French APC network. Sixty six of 149 variations of previously unknown significance have now been classified as (likely) causal or neutral. The database is available on the Internet (http://www.umd.be/APC/) and updated twice per year according to the consensus rules of the network. The UMD-APC database is thus expected to facilitate functional classification of rare synonymous, nonsynonymous, and intronic mutations and consequently improve genetic counseling and medical care in FAP families.",UMD-APC,0.840977097,NA,0,UMD-APC,0.840977097,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/7/2014 +32454857,http://jafarilab.com/unaprod,"UNaProd: A Universal Natural Product Database for Materia Medica of Iranian Traditional Medicine. Background Iranian traditional medicine (ITM) is a holistic medical system that uses a wide range of medicinal substances to treat disease. Reorganization and standardization of the data on ITM concepts is a necessity for optimal use of this rich source. In an initial step towards this goal, we created a database of ITM materia medica. Main Body. Primarily based on Makhzan al-Advieh, which is the most recent encyclopedia of materia medica in ITM with the largest number of monographs, a database of natural medicinal substances was created using both text mining methods and manual editing. UNaProd, a Universal Natural Product database for materia medica of ITM, is currently host to 2696 monographs, from herbal to animal to mineral compounds in 16 diverse attributes such as origin and scientific name. Currently, systems biology, and more precisely systems medicine and pharmacology, can be an aid in providing rationalizations for many traditional medicines and elucidating a great deal of knowledge they can offer to guide future research in medicine. Conclusions A database of materia medica is a stepping stone in creating a systems pharmacology platform of ITM that encompasses the relationships between the drugs, their targets, and diseases. UNaProd is hyperlinked to IrGO and CMAUP databases for Mizaj and molecular features, respectively, and it is freely available at http://jafarilab.com/unaprod/.",UNaProd,0.996470153,NA,0,UNaProd,0.996470153,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/13/2020 +27813701,http://barley.gambrinus.ueb.cas.cz,"UNcleProt (Universal Nuclear Protein database of barley): The first nuclear protein database that distinguishes proteins from different phases of the cell cycle. Proteins are the most abundant component of the cell nucleus, where they perform a plethora of functions, including the assembly of long DNA molecules into condensed chromatin, DNA replication and repair, regulation of gene expression, synthesis of RNA molecules and their modification. Proteins are important components of nuclear bodies and are involved in the maintenance of the nuclear architecture, transport across the nuclear envelope and cell division. Given their importance, the current poor knowledge of plant nuclear proteins and their dynamics during the cell's life and division is striking. Several factors hamper the analysis of the plant nuclear proteome, but the most critical seems to be the contamination of nuclei by cytosolic material during their isolation. With the availability of an efficient protocol for the purification of plant nuclei, based on flow cytometric sorting, contamination by cytoplasmic remnants can be minimized. Moreover, flow cytometry allows the separation of nuclei in different stages of the cell cycle (G1, S, and G2). This strategy has led to the identification of large number of nuclear proteins from barley (Hordeum vulgare), thus triggering the creation of a dedicated database called UNcleProt, http://barley.gambrinus.ueb.cas.cz/ .",UNcleProt,0.993122756,Universal Nuclear Protein database of barley,0.863325749,UNcleProt,0.993122756,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/4/2016 +21398669,http://www.unicarb-db.org,"UniCarb-DB: a database resource for glycomic discovery. Unlabelled Glycosylation is one of the most important post-translational modifications of proteins, known to be involved in pathogen recognition, innate immune response and protection of epithelial membranes. However, when compared to the tools and databases available for the processing of high-throughput proteomic data, the glycomic domain is severely lacking. While tools to assist the analysis of mass spectrometry (MS) and HPLC are continuously improving, there are few resources available to support liquid chromatography (LC)-MS/MS techniques for glycan structure profiling. Here, we present a platform for presenting oligosaccharide structures and fragment data characterized by LC-MS/MS strategies. The database is annotated with high-quality datasets and is designed to extend and reinforce those standards and ontologies developed by existing glycomics databases. Availability http://www.unicarb-db.org",UniCarb,0.640729487,NA,0,UniCarb,0.640729487,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/12/2011 +28150237,"http://unicarbkb.org, http://confluence.unicarbkb.org","Navigating the Glycome Space and Connecting the Glycoproteome. UniCarbKB ( http://unicarbkb.org ) is a comprehensive resource for mammalian glycoprotein and annotation data. In particular, the database provides information on the oligosaccharides characterized from a glycoprotein at either the global or site-specific level. This evidence is accumulated from a peer-reviewed and manually curated collection of information on oligosaccharides derived from membrane and secreted glycoproteins purified from biological fluids and/or tissues. This information is further supplemented with experimental method descriptions that summarize important sample preparation and analytical strategies. A new release of UniCarbKB is published every three months, each includes a collection of curated data and improvements to database functionality. In this Chapter, we outline the objectives of UniCarbKB, and describe a selection of step-by-step workflows for navigating the information available. We also provide a short description of web services available and future plans for improving data access. The information presented in this Chapter supplements content available in our knowledgebase including regular updates on interface improvements, new features, and revisions to the database content ( http://confluence.unicarbkb.org ).",UniCarbKB,0.998963594,NA,0,UniCarbKB,0.998963594,1,24234447,"21898825.0, 24234447.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2017 +24234447,http://unicarbkb.org,"UniCarbKB: building a knowledge platform for glycoproteomics. The UniCarb KnowledgeBase (UniCarbKB; http://unicarbkb.org) offers public access to a growing, curated database of information on the glycan structures of glycoproteins. UniCarbKB is an international effort that aims to further our understanding of structures, pathways and networks involved in glycosylation and glyco-mediated processes by integrating structural, experimental and functional glycoscience information. This initiative builds upon the success of the glycan structure database GlycoSuiteDB, together with the informatic standards introduced by EUROCarbDB, to provide a high-quality and updated resource to support glycomics and glycoproteomics research. UniCarbKB provides comprehensive information concerning glycan structures, and published glycoprotein information including global and site-specific attachment information. For the first release over 890 references, 3740 glycan structure entries and 400 glycoproteins have been curated. Further, 598 protein glycosylation sites have been annotated with experimentally confirmed glycan structures from the literature. Among these are 35 glycoproteins, 502 structures and 60 publications previously not included in GlycoSuiteDB. This article provides an update on the transformation of GlycoSuiteDB (featured in previous NAR Database issues and hosted by ExPASy since 2009) to UniCarbKB and its integration with UniProtKB and GlycoMod. Here, we introduce a refactored database, supported by substantial new curated data collections and intuitive user-interfaces that improve database searching.",UniCarbKB,0.998551776,NA,0,UniCarbKB,0.998551776,1,28150237,"21898825.0, 28150237.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,11/13/2013 +21898825,http://www.unicarbkb.org,"UniCarbKB: putting the pieces together for glycomics research. Despite the success of several international initiatives the glycosciences still lack a managed infrastructure that contributes to the advancement of research through the provision of comprehensive structural and experimental glycan data collections. UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. The knowledgebase will offer a freely accessible and information-rich resource supported by querying interfaces, annotation technologies and the adoption of common standards to integrate structural, experimental and functional data. The UniCarbKB framework endeavors to support the growth of glycobioinformatics and the dissemination of knowledge through the provision of an open and unified portal to encourage the sharing of data. In order to achieve this, the framework is committed to the development of tools and procedures that support data annotation, and expanding interoperability through cross-referencing of existing databases. Database URL: http://www.unicarbkb.org.",UniCarbKB,0.998428583,NA,0,UniCarbKB,0.998428583,1,NA,"24234447.0, 28150237.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,9/19/2011 +"22218860, 24214987",http://www.unihi.org,"The integration and annotation of the human interactome in the UniHI Database. In recent years, remarkable progress has been made toward the systematic charting of human protein interactions. The utilization of the generated interaction data remained however challenging for biomedical researchers due to lack of integration of currently available resources. To facilitate the direct access and analysis of the human interactome, we have developed the Unified Human Interactome (UniHI) database. It provides researchers with a user-friendly Web-interface and integrates interaction data from 12 major resources in its latest version, establishing one of the largest catalogs for human PPIs worldwide. At present, UniHI houses over 250,000 distinct interactions between 22,300 unique proteins and is publically available at http://www.unihi.org.",UniHI,0.993736625,Unified Human Interactome,0.957470024,UniHI,0.993736625,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2013 +33174598,http://www.unilectin.eu,"LectomeXplore, an update of UniLectin for the discovery of carbohydrate-binding proteins based on a new lectin classification. Lectins are non-covalent glycan-binding proteins mediating cellular interactions but their annotation in newly sequenced organisms is lacking. The limited size of functional domains and the low level of sequence similarity challenge usual bioinformatics tools. The identification of lectin domains in proteomes requires the manual curation of sequence alignments based on structural folds. A new lectin classification is proposed. It is built on three levels: (i) 35 lectin domain folds, (ii) 109 classes of lectins sharing at least 20% sequence similarity and (iii) 350 families of lectins sharing at least 70% sequence similarity. This information is compiled in the UniLectin platform that includes the previously described UniLectin3D database of curated lectin 3D structures. Since its first release, UniLectin3D has been updated with 485 additional 3D structures. The database is now complemented by two additional modules: PropLec containing predicted √ɬÉ√Ǭé√ɬÇ√Ǭ≤-propeller lectins and LectomeXplore including predicted lectins from sequences of the NBCI-nr and UniProt for every curated lectin class. UniLectin is accessible at https://www.unilectin.eu/.",UniLectin,0.981569886,NA,0,UniLectin,0.981569886,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +30239928,http://www.unilectin.eu/unilectin3D,"UniLectin3D, a database of carbohydrate binding proteins with curated information on 3D structures and interacting ligands. Lectins, and related receptors such as adhesins and toxins, are glycan-binding proteins from all origins that decipher the glycocode, i.e. the structural information encoded in the conformation of complex carbohydrates present on the surface of all cells. Lectins are still poorly classified and annotated, but since their functions are based on ligand recognition, their 3D-structures provide a solid foundation for characterization. UniLectin3D is a curated database that classifies lectins on origin and fold, with cross-links to literature, other databases in glycosciences and functional data such as known specificity. The database provides detailed information on lectins, their bound glycan ligands, and features their interactions using the Protein-Ligand Interaction Profiler (PLIP) server. Special care was devoted to the description of the bound glycan ligands with the use of simple graphical representation and numerical format for cross-linking to other databases in glycoscience. We conceived the design of the database architecture and the navigation tools to account for all organisms, as well as to search for oligosaccharide epitopes complexed within specified binding sites. UniLectin3D is accessible at https://www.unilectin.eu/unilectin3D.",UniLectin3D,0.978693748,NA,0,UniLectin3D,0.978693748,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +22102589,http://www.unipathway.org,"UniPathway: a resource for the exploration and annotation of metabolic pathways. UniPathway (http://www.unipathway.org) is a fully manually curated resource for the representation and annotation of metabolic pathways. UniPathway provides explicit representations of enzyme-catalyzed and spontaneous chemical reactions, as well as a hierarchical representation of metabolic pathways. This hierarchy uses linear subpathways as the basic building block for the assembly of larger and more complex pathways, including species-specific pathway variants. All of the pathway data in UniPathway has been extensively cross-linked to existing pathway resources such as KEGG and MetaCyc, as well as sequence resources such as the UniProt KnowledgeBase (UniProtKB), for which UniPathway provides a controlled vocabulary for pathway annotation. We introduce here the basic concepts underlying the UniPathway resource, with the aim of allowing users to fully exploit the information provided by UniPathway.",UniPathway,0.997355998,NA,0,UniPathway,0.997355998,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2011 +25378322,http://uniprobe.org,"UniPROBE, update 2015: new tools and content for the online database of protein-binding microarray data on protein-DNA interactions. The Universal PBM Resource for Oligonucleotide Binding Evaluation (UniPROBE) serves as a convenient source of information on published data generated using universal protein-binding microarray (PBM) technology, which provides in vitro data about the relative DNA-binding preferences of transcription factors for all possible sequence variants of a length k ('k-mers'). The database displays important information about the proteins and displays their DNA-binding specificity data in terms of k-mers, position weight matrices and graphical sequence logos. This update to the database documents the growth of UniPROBE since the last update 4 years ago, and introduces a variety of new features and tools, including a new streamlined pipeline that facilitates data deposition by universal PBM data generators in the research community, a tool that generates putative nonbinding (i.e. negative control) DNA sequences for one or more proteins and novel motifs obtained by analyzing the PBM data using the BEEML-PBM algorithm for motif inference. The UniPROBE database is available at http://uniprobe.org.",UniPROBE,0.997834285,Universal PBM Resource for Oligonucleotide Binding Evaluation,0.908042654,UniPROBE,0.997834285,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/5/2014 +"22102590, 23161681, 24253303, 25348405, 26519399, 30395287",http://www.uniprot.org,"Reorganizing the protein space at the Universal Protein Resource (UniProt). The mission of UniProt is to support biological research by providing a freely accessible, stable, comprehensive, fully classified, richly and accurately annotated protein sequence knowledgebase, with extensive cross-references and querying interfaces. UniProt is comprised of four major components, each optimized for different uses: the UniProt Archive, the UniProt Knowledgebase, the UniProt Reference Clusters and the UniProt Metagenomic and Environmental Sequence Database. A key development at UniProt is the provision of complete, reference and representative proteomes. UniProt is updated and distributed every 4 weeks and can be accessed online for searches or download at http://www.uniprot.org.",UniProt,0.997342706,Universal Protein Resource,0.747594476,UniProt,0.997342706,6,"21447597.0, 33237286.0",27899622,NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2019 +27899622,"http://sparql.uniprot.org/, http://www.uniprot.org","UniProt: the universal protein knowledgebase. The UniProt knowledgebase is a large resource of protein sequences and associated detailed annotation. The database contains over 60 million sequences, of which over half a million sequences have been curated by experts who critically review experimental and predicted data for each protein. The remainder are automatically annotated based on rule systems that rely on the expert curated knowledge. Since our last update in 2014, we have more than doubled the number of reference proteomes to 5631, giving a greater coverage of taxonomic diversity. We implemented a pipeline to remove redundant highly similar proteomes that were causing excessive redundancy in UniProt. The initial run of this pipeline reduced the number of sequences in UniProt by 47 million. For our users interested in the accessory proteomes, we have made available sets of pan proteome sequences that cover the diversity of sequences for each species that is found in its strains and sub-strains. To help interpretation of genomic variants, we provide tracks of detailed protein information for the major genome browsers. We provide a SPARQL endpoint that allows complex queries of the more than 22 billion triples of data in UniProt (http://sparql.uniprot.org/). UniProt resources can be accessed via the website at http://www.uniprot.org/.",UniProt,0.995713353,NA,0,UniProt,0.995713353,1,NA,"22102590.0, 23161681.0, 24253303.0, 25348405.0, 26519399.0, 30395287.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/29/2016 +23603847,http://pfam.sanger.ac.uk,"The challenge of increasing Pfam coverage of the human proteome. It is a worthy goal to completely characterize all human proteins in terms of their domains. Here, using the Pfam database, we asked how far we have progressed in this endeavour. Ninety per cent of proteins in the human proteome matched at least one of 5494 manually curated Pfam-A families. In contrast, human residue coverage by Pfam-A families was <45%, with 9418 automatically generated Pfam-B families adding a further 10%. Even after excluding predicted signal peptide regions and short regions (<50 consecutive residues) unlikely to harbour new families, for √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº38% of the human protein residues, there was no information in Pfam about conservation and evolutionary relationship with other protein regions. This uncovered portion of the human proteome was found to be distributed over almost 25 000 distinct protein regions. Comparison with proteins in the UniProtKB database suggested that the human regions that exhibited similarity to thousands of other sequences were often either divergent elements or N- or C-terminal extensions of existing families. Thirty-four per cent of regions, on the other hand, matched fewer than 100 sequences in UniProtKB. Most of these did not appear to share any relationship with existing Pfam-A families, suggesting that thousands of new families would need to be generated to cover them. Also, these latter regions were particularly rich in amino acid compositional bias such as the one associated with intrinsic disorder. This could represent a significant obstacle toward their inclusion into new Pfam families. Based on these observations, a major focus for increasing Pfam coverage of the human proteome will be to improve the definition of existing families. New families will also be built, prioritizing those that have been experimentally functionally characterized. Database URL: http://pfam.sanger.ac.uk/",UniProtKB,0.856226146,NA,0,UniProtKB,0.856226146,1,NA,"22465017.0, 33724838.0, 21447597.0, 33237286.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME,NA,NA,4/19/2013 +22465017,http://www.uniprot.org/program/Toxins,"The UniProtKB/Swiss-Prot Tox-Prot program: A central hub of integrated venom protein data. Animal toxins are of interest to a wide range of scientists, due to their numerous applications in pharmacology, neurology, hematology, medicine, and drug research. This, and to a lesser extent the development of new performing tools in transcriptomics and proteomics, has led to an increase in toxin discovery. In this context, providing publicly available data on animal toxins has become essential. The UniProtKB/Swiss-Prot Tox-Prot program (http://www.uniprot.org/program/Toxins) plays a crucial role by providing such an access to venom protein sequences and functions from all venomous species. This program has up to now curated more than 5000 venom proteins to the high-quality standards of UniProtKB/Swiss-Prot (release 2012_02). Proteins targeted by these toxins are also available in the knowledgebase. This paper describes in details the type of information provided by UniProtKB/Swiss-Prot for toxins, as well as the structured format of the knowledgebase.",UniProtKB,0.763967097,NA,0,UniProtKB,0.763967097,1,NA,"23603847.0, 33724838.0, 21447597.0, 33237286.0",low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: PARTIAL NAME,NA,NA,3/23/2012 +33724838,http://macpepdb.mpc.rub.de,"MaCPepDB: A Database to Quickly Access All Tryptic Peptides of the UniProtKB. Protein sequence databases play a crucial role in the majority of the currently applied mass-spectrometry-based proteomics workflows. Here UniProtKB serves as one of the major sources, as it combines the information of several smaller databases and enriches the entries with additional biological information. For the identification of peptides in a sample by tandem mass spectra, as generated by data-dependent acquisition, protein sequence databases provide the basis for most spectrum identification search engines. In addition, for targeted proteomics approaches like selected reaction monitoring (SRM) and parallel reaction monitoring (PRM), knowledge of the peptide sequences, their masses, and whether they are unique for a protein is essential. Because most bottom-up proteomics approaches use trypsin to cleave the proteins in a sample, the tryptic peptides contained in a protein database are of great interest. We present a database, called MaCPepDB (mass-centric peptide database), that consists of the complete tryptic digest of the Swiss-Prot and TrEMBL parts of UniProtKB. This database is especially designed to not only allow queries of peptide sequences and return the respective information about connected proteins and thus whether a peptide is unique but also allow queries of specific masses of peptides or precursors of MS/MS spectra. Furthermore, posttranslational modifications can be considered in a query as well as different mass deviations for posttranslational modifications. Hence the database can be used by a sequence query not only to, for example, check in which proteins of the UniProt database a tryptic peptide can be found but also to find possibly interfering peptides in PRM/SRM experiments using the mass query. The complete database contains currently 5√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ939√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ244√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ990 peptides from 185√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ561√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬØ610 proteins (UniProt version 2020_03), for which a single query usually takes less than 1 s. For easy exploration of the data, a web interface was developed. A REST application programming interface (API) for programmatic and workflow access is also available at https://macpepdb.mpc.rub.de.",UniProtKB,0.99742651,NA,0,UniProtKB,0.99742651,1,NA,"22465017.0, 23603847.0, 21447597.0, 33237286.0",NA,NA,NA,do not merge,NA,NA,noting predicted name incorrect,3/16/2021 +"21447597, 33237286",http://www.uniprot.org,"UniProt Knowledgebase: a hub of integrated protein data. The UniProt Knowledgebase (UniProtKB) acts as a central hub of protein knowledge by providing a unified view of protein sequence and functional information. Manual and automatic annotation procedures are used to add data directly to the database while extensive cross-referencing to more than 120 external databases provides access to additional relevant information in more specialized data collections. UniProtKB also integrates a range of data from other resources. All information is attributed to its original source, allowing users to trace the provenance of all data. The UniProt Consortium is committed to using and promoting common data exchange formats and technologies, and UniProtKB data is made freely available in a range of formats to facilitate integration with other databases. Database URL: http://www.uniprot.org/",UniProtKB,0.997420847,NA,0,UniProtKB,0.997420847,2,"22102590.0, 23161681.0, 24253303.0, 25348405.0, 26519399.0, 30395287.0","22465017.0, 23603847.0, 33724838.0",NA,NA,merge on record with best name prob,do not merge,NA,NA,should be merged with Uniprot; should be merged if URLs are done first,1/1/2021 +30371820,http://unite.ut.ee,"The UNITE database for molecular identification of fungi: handling dark taxa and parallel taxonomic classifications. UNITE (https://unite.ut.ee/) is a web-based database and sequence management environment for the molecular identification of fungi. It targets the formal fungal barcode-the nuclear ribosomal internal transcribed spacer√ɬÉ√ǬÇ√ɬÇ√Ǭ†(ITS) region-and offers all √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº1 000√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 public fungal ITS sequences for reference. These are clustered into √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº459√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 species hypotheses and assigned digital object identifiers (DOIs) to promote unambiguous reference across studies. In-house and web-based third-party sequence curation and annotation have resulted in more than 275√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 improvements to the data over the past 15 years. UNITE serves as a data provider for a range of metabarcoding software pipelines and regularly exchanges data with all major fungal sequence databases and other community resources. Recent improvements include redesigned handling of unclassifiable species hypotheses, integration with the taxonomic backbone of the Global Biodiversity Information Facility, and support for an unlimited number of parallel taxonomic classification systems.",UNITE,0.997397006,NA,0,UNITE,0.997397006,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +23314752,http://univio.psc.riken.jp,"UniVIO: a multiple omics database with hormonome and transcriptome data from rice. Plant hormones play important roles as signaling molecules in the regulation of growth and development by controlling the expression of downstream genes. Since the hormone signaling system represents a complex network involving functional cross-talk through the mutual regulation of signaling and metabolism, a comprehensive and integrative analysis of plant hormone concentrations and gene expression is important for a deeper understanding of hormone actions. We have developed a database named Uniformed Viewer for Integrated Omics (UniVIO: http://univio.psc.riken.jp/), which displays hormone-metabolome (hormonome) and transcriptome data in a single formatted (uniformed) heat map. At the present time, hormonome and transcriptome data obtained from 14 organ parts of rice plants at the reproductive stage and seedling shoots of three gibberellin signaling mutants are included in the database. The hormone concentration and gene expression data can be searched by substance name, probe ID, gene locus ID or gene description. A correlation search function has been implemented to enable users to obtain information of correlated substance accumulation and gene expression. In the correlation search, calculation method, range of correlation coefficient and plant samples can be selected freely.",UniVIO,0.993773353,Uniformed Viewer for Integrated Omics,0.9183596,UniVIO,0.993773353,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/10/2013 +24163100,http://cbdm.mdc-berlin.de/tools/uorfdb,"uORFdb--a comprehensive literature database on eukaryotic uORF biology. Approximately half of all human transcripts contain at least one upstream translational initiation site that precedes the main coding sequence (CDS) and gives rise to an upstream open reading frame (uORF). We generated uORFdb, publicly available at http://cbdm.mdc-berlin.de/tools/uorfdb, to serve as a comprehensive literature database on eukaryotic uORF biology. Upstream ORFs affect downstream translation by interfering with the unrestrained progression of ribosomes across the transcript leader sequence. Although the first uORF-related translational activity was observed >30 years ago, and an increasing number of studies link defective uORF-mediated translational control to the development of human diseases, the features that determine uORF-mediated regulation of downstream translation are not well understood. The uORFdb was manually curated from all uORF-related literature listed at the PubMed database. It categorizes individual publications by a variety of denominators including taxon, gene and type of study. Furthermore, the database can be filtered for multiple structural and functional uORF-related properties to allow convenient and targeted access to the complex field of eukaryotic uORF biology.",uORFdb,0.997731447,NA,0,uORFdb,0.997731447,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/24/2013 +32168374,http://uorflight.whu.edu.cn,"uORFlight: a vehicle toward uORF-mediated translational regulation mechanisms in eukaryotes. . Upstream open reading frames (uORFs) are prevalent in eukaryotic mRNAs. They act as a translational control element for precisely tuning the expression of the downstream major open reading frame (mORF). uORF variation has been clearly associated with several human diseases. In contrast, natural uORF variants in plants have not ever been identified or linked with any phenotypic changes. The paucity of such evidence encouraged us to generate this database-uORFlight (http://uorflight.whu.edu.cn). It facilitates the exploration of uORF variation among different splicing models of Arabidopsis and rice genes. Most importantly, users can evaluate uORF frequency among different accessions at the population scale and find out the causal single nucleotide polymorphism (SNP) or insertion/deletion (INDEL), which can be associated with phenotypic variation through database mining or simple experiments. Such information will help to make hypothesis of uORF function in plant development or adaption to changing environments on the basis of the cognate mORF function. This database also curates plant uORF relevant literature into distinct groups. To be broadly interesting, our database expands uORF annotation into more species of fungus (Botrytis cinerea and Saccharomyces cerevisiae), plant (Brassica napus, Glycine max, Gossypium raimondii, Medicago truncatula, Solanum lycopersicum, Solanum tuberosum, Triticum aestivum and Zea mays), metazoan (Caenorhabditis elegans and Drosophila melanogaster) and vertebrate (Homo sapiens, Mus musculus and Danio rerio). Therefore, uORFlight will light up the runway toward how uORF genetic variation determines phenotypic diversity and advance our understanding of translational control mechanisms in eukaryotes.",uORFlight,0.997313261,NA,0,uORFlight,0.997313261,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +21876203,http://122.70.220.102/biomarker,"A tool for biomarker discovery in the urinary proteome: a manually curated human and animal urine protein biomarker database. Urine is an important source of biomarkers. A single proteomics assay can identify hundreds of differentially expressed proteins between disease and control samples; however, the ability to select biomarker candidates with the most promise for further validation study remains difficult. A bioinformatics tool that allows accurate and convenient comparison of all of the existing related studies can markedly aid the development of this area. In this study, we constructed the Urinary Protein Biomarker (UPB) database to collect existing studies of urinary protein biomarkers from published literature. To ensure the quality of data collection, all literature was manually curated. The website (http://122.70.220.102/biomarker) allows users to browse the database by disease categories and search by protein IDs in bulk. Researchers can easily determine whether a biomarker candidate has already been identified by another group for the same disease or for other diseases, which allows for the confidence and disease specificity of their biomarker candidate to be evaluated. Additionally, the pathophysiological processes of the diseases can be studied using our database with the hypothesis that diseases that share biomarkers may have the same pathophysiological processes. Because of the natural relationship between urinary proteins and the urinary system, this database may be especially suitable for studying the pathogenesis of urological diseases. Currently, the database contains 553 and 275 records compiled from 174 and 31 publications of human and animal studies, respectively. We found that biomarkers identified by different proteomic methods had a poor overlap with each other. The differences between sample preparation and separation methods, mass spectrometers, and data analysis algorithms may be influencing factors. Biomarkers identified from animal models also overlapped poorly with those from human samples, but the overlap rate was not lower than that of human proteomics studies. Therefore, it is not clear how well the animal models mimic human diseases.",UPB,0.950364868,Urinary Protein Biomarker,0.927442985,UPB,0.950364868,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,8/29/2011 +24843289,http://www.gbif.es:8080/ipt/resource.do?r=germoplasma-urjc,"URJC GB dataset: Community-based seed bank of Mediterranean high-mountain and semi-arid plant species at Universidad Rey Juan Carlos (Spain). The Germplasm Bank of Universidad Rey Juan Carlos was created in 2008 and currently holds 235 accessions and 96 species. This bank focuses on the conservation of wild-plant communities and aims to conserve ex situ a representative sample of the plant biodiversity present in a habitat, emphasizing priority ecosystems identified by the Habitats Directive. It is also used to store plant material for research and teaching purposes. The collection consists of three subcollections, two representative of typical habitats in the center of the Iberian Peninsula: high-mountain pastures (psicroxerophylous pastures) and semi-arid habitats (gypsophylic steppes), and a third representative of the genus Lupinus. The high-mountain subcollection currently holds 153 accessions (63 species), the semi-arid subcollection has 76 accessions (29 species,) and the Lupinus subcollection has 6 accessions (4 species). All accessions are stored in a freezer at -18 √ɬÉ√ǬÇ√ɬÇ√Ǭ∞C in Kilner jars with silica gel. The Germplasm Bank of Universidad Rey Juan Carlos follows a quality control protocol which describes the workflow performed with seeds from seed collection to storage. All collectors are members of research groups with great experience in species identification. Herbarium specimens associated with seed accessions are preserved and 63% of the records have been georreferenced with GPS and radio points. The dataset provides unique information concerning the location of populations of plant species that form part of the psicroxerophylous pastures and gypsophylic steppes of Central Spain as well as populations of genus Lupinus in the Iberian Peninsula. It also provides relevant information concerning mean seed weight and seed germination values under specific incubation conditions. This dataset has already been used by researchers of the Area of Biodiversity and Conservation of URJC as a source of information for the design and implementation of experimental designs in these plant communities. Since they are all active subcollections in continuous growth, data is updated regularly every six months and the latest version can be accessed through the GBIF data portal at http://www.gbif.es:8080/ipt/resource.do?r=germoplasma-urjc. This paper describes the URJC Germplasm Bank and its associated dataset with the aim of disseminating the dataset and explaining how it was derived.",URJC GB,0.922845423,NA,0,URJC GB,0.922845423,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,3/25/2014 +27242032,http://server3.lpm.org.ru/urs,"URS DataBase: universe of RNA structures and their motifs. . The Universe of RNA Structures DataBase (URSDB) stores information obtained from all RNA-containing PDB entries (2935 entries in October 2015). The content of the database is updated regularly. The database consists of 51 tables containing indexed data on various elements of the RNA structures. The database provides a web interface allowing user to select a subset of structures with desired features and to obtain various statistical data for a selected subset of structures or for all structures. In particular, one can easily obtain statistics on geometric parameters of base pairs, on structural motifs (stems, loops, etc.) or on different types of pseudoknots. The user can also view and get information on an individual structure or its selected parts, e.g. RNA-protein hydrogen bonds. URSDB employs a new original definition of loops in RNA structures. That definition fits both pseudoknot-free and pseudoknotted secondary structures and coincides with the classical definition in case of pseudoknot-free structures. To our knowledge, URSDB is the first database supporting searches based on topological classification of pseudoknots and on extended loop classification.Database URL: http://server3.lpm.org.ru/urs/.",URSDB,0.997525334,Universe of RNA Structures DataBase,0.987289786,URSDB,0.997525334,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/30/2016 +23172288,http://uucd.biocuckoo.org,"UUCD: a family-based database of ubiquitin and ubiquitin-like conjugation. In this work, we developed a family-based database of UUCD (http://uucd.biocuckoo.org) for ubiquitin and ubiquitin-like conjugation, which is one of the most important post-translational modifications responsible for regulating a variety of cellular processes, through a similar E1 (ubiquitin-activating enzyme)-E2 (ubiquitin-conjugating enzyme)-E3 (ubiquitin-protein ligase) enzyme thioester cascade. Although extensive experimental efforts have been taken, an integrative data resource is still not available. From the scientific literature, 26 E1s, 105 E2s, 1003 E3s and 148 deubiquitination enzymes (DUBs) were collected and classified into 1, 3, 19 and 7 families, respectively. To computationally characterize potential enzymes in eukaryotes, we constructed 1, 1, 15 and 6 hidden Markov model (HMM) profiles for E1s, E2s, E3s and DUBs at the family level, separately. Moreover, the ortholog searches were conducted for E3 and DUB families without HMM profiles. Then the UUCD database was developed with 738 E1s, 2937 E2s, 46 631 E3s and 6647 DUBs of 70 eukaryotic species. The detailed annotations and classifications were also provided. The online service of UUCD was implemented in PHP + MySQL + JavaScript + Perl.",UUCD,0.997053325,NA,0,UUCD,0.997053325,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/20/2012 +30614601,"http://databases.lovd.nl/shared/genes, http://www.uvogene.com","UVEOGENE: An SNP database for investigations on genetic factors associated with uveitis and their relationship with other systemic autoimmune diseases. Uveitis is an intraocular inflammatory disease which can lead to serious visual impairment. Genetic factors have been shown to be involved in its development. However, few databases have focused on the information of associations between single nucleotide polymorphisms (SNPs) and uveitis. To discover the exact genetic background of uveitis, we developed an SNP database specific for uveitis, ""UVEOGENE,"" which includes 370 genes and 918 SNPs covering 14 uveitis entities and 40 populations from 286 PubMed English-language papers. Stratification analyses by gender, HLA status, and different clinical features were also extracted from the publications. As a result, 371 associations were judged as ""statistically significant."" These associations were also shared with Global Variome shared Leiden Open Variation Database (LOVD) (https://databases.lovd.nl/shared/genes). Based on these associations, we investigated the genetic relationship among three widely studied uveitis entities including Behcet's disease (BD), Vogt-Koyanagi-Harada (VKH) disease, and acute anterior uveitis (AAU). Furthermore, ""UVEOGENE"" can be used as a reliable and informative resource to identify similarities as well as differences in the genetic susceptibility among uveitis and other autoimmune diseases. UVEOGENE is freely accessible at http://www.uvogene.com.",UVEOGENE,0.992076588,NA,0,UVEOGENE,0.992076588,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/16/2019 +31021279,http://biokb.ncpsb.org/UVGD,"UVGD 1.0: a gene-centric database bridging ultraviolet radiation and molecular biology effects in organisms. Objectives: Exposing to ultraviolet for a certain time will trigger some significant molecular biology effects in an organism. In the past few decades, varied ultraviolet-associated biological effects as well as their related genes, have been discovered under biologists' efforts. However, information about ultraviolet-related genes is dispersed in thousands of scientific papers, and there is still no study emphasizing on the systematic collection of ultraviolet-related genes. Methods: We collected ultraviolet-related genes and built this gene-centric database UVGD based on literature mining and manual curation. Literature mining was based on the ultraviolet-related abstracts downloaded from PubMed, and we obtained sentences in which ultraviolet keywords and genes co-occur at single-sentence level by using bio-entity recognizer. After that, manual curation was implemented in order to identify whether the genes are related to ultraviolet or not. Results: We built the ultraviolet-related knowledge base UVGD 1.0 (URL: http://biokb.ncpsb.org/UVGD/ ), which contains 663 ultraviolet-related genes, together with 17 associated biological processes, 117 associated phenotypes, and 2628 MeSH terms. Conclusion: UVGD is helpful to understand the ultraviolet-related biological processes in organisms and we believe it would be useful for biologists to study the responding mechanisms to ultraviolet.",UVGD,0.993201971,NA,0,UVGD,0.993201971,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/13/2019 +25361969,http://bmi-tokai.jp/VaDE,"VaDE: a manually curated database of reproducible associations between various traits and human genomic polymorphisms. Genome-wide association studies (GWASs) have identified numerous single nucleotide polymorphisms (SNPs) associated with the development of common diseases. However, it is clear that genetic risk factors of common diseases are heterogeneous among human populations. Therefore, we developed a database of genomic polymorphisms that are reproducibly associated with disease susceptibilities, drug responses and other traits for each human population: 'VarySysDB Disease Edition' (VaDE; http://bmi-tokai.jp/VaDE/). SNP-trait association data were obtained from the National Human Genome Research Institute GWAS (NHGRI GWAS) catalog and RAvariome, and we added detailed information of sample populations by curating original papers. In addition, we collected and curated original papers, and registered the detailed information of SNP-trait associations in VaDE. Then, we evaluated reproducibility of associations in each population by counting the number of significantly associated studies. VaDE provides literature-based SNP-trait association data and functional genomic region annotation for SNP functional research. SNP functional annotation data included experimental data of the ENCODE project, H-InvDB transcripts and the 1000 Genome Project. A user-friendly web interface was developed to assist quick search, easy download and fast swapping among viewers. We believe that our database will contribute to the future establishment of personalized medicine and increase our understanding of genetic factors underlying diseases.",VaDE,0.989877224,NA,0,VaDE,0.989877224,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/31/2014 +25392418,http://ncbr.muni.cz/ValidatorDB,"ValidatorDB: database of up-to-date validation results for ligands and non-standard residues from the Protein Data Bank. Following the discovery of serious errors in the structure of biomacromolecules, structure validation has become a key topic of research, especially for ligands and non-standard residues. ValidatorDB (freely available at http://ncbr.muni.cz/ValidatorDB) offers a new step in this direction, in the form of a database of validation results for all ligands and non-standard residues from the Protein Data Bank (all molecules with seven or more heavy atoms). Model molecules from the wwPDB Chemical Component Dictionary are used as reference during validation. ValidatorDB covers the main aspects of validation of annotation, and additionally introduces several useful validation analyses. The most significant is the classification of chirality errors, allowing the user to distinguish between serious issues and minor inconsistencies. Other such analyses are able to report, for example, completely erroneous ligands, alternate conformations or complete identity with the model molecules. All results are systematically classified into categories, and statistical evaluations are performed. In addition to detailed validation reports for each molecule, ValidatorDB provides summaries of the validation results for the entire PDB, for sets of molecules sharing the same annotation (three-letter code) or the same PDB entry, and for user-defined selections of annotations or PDB entries.",ValidatorDB,0.997314692,NA,0,ValidatorDB,0.997314692,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +23093589,http://validness.ym.edu.tw,"ValidNESs: a database of validated leucine-rich nuclear export signals. ValidNESs (http://validness.ym.edu.tw/) is a new database for experimentally validated leucine-rich nuclear export signal (NES)-containing proteins. The therapeutic potential of the chromosomal region maintenance 1 (CRM1)-mediated nuclear export pathway and disease relevance of its cargo proteins has gained recognition in recent years. Unfortunately, only about one-third of known CRM1 cargo proteins are accessible in a single database since the last compilation in 2003. CRM1 cargo proteins are often recognized by a classical NES (leucine-rich NES), but this signal is notoriously difficult to predict from sequence alone. Fortunately, a recently developed prediction method, NESsential, is able to identify good candidates in some cases, enabling valuable hints to be gained by in silico prediction, but until now it has not been available through a web interface. We present ValidNESs, an integrated, up-to-date database holding 221 NES-containing proteins, combined with a web interface to prediction by NESsential.",ValidNESs,0.977603436,NA,0,ValidNESs,0.977603436,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/22/2012 +31263870,http://ncbr.muni.cz/ValTrendsDB,"ValTrendsDB: bringing Protein Data Bank validation information closer to the user. SUMMARY:Structures in PDB tend to contain errors. This is a very serious issue for authors that rely on such potentially problematic data. The community of structural biologists develops validation methods as countermeasures, which are also included in the PDB deposition system. But how are these validation efforts influencing the structure quality of subsequently published data? Which quality aspects are improving, and which remain problematic? We developed ValTrendsDB, a database that provides the results of an extensive exploratory analysis of relationships between quality criteria, size and metadata of biomacromolecules. Key input data are sourced from PDB. The discovered trends are presented via precomputed information-rich plots. ValTrendsDB also supports the visualization of a set of user-defined structures on top of general quality trends. Therefore, ValTrendsDB enables users to see the quality of structures published by selected author, laboratory or journal, discover quality outliers, etc. ValTrendsDB is updated weekly. AVAILABILITY AND IMPLEMENTATION:Freely accessible at http://ncbr.muni.cz/ValTrendsDB. The web interface was implemented in JavaScript. The database was implemented in C++. SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.",ValTrendsDB,0.964570105,NA,0,ValTrendsDB,0.964570105,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2019 +33095866,http://www.licpathway.net/VARAdb,"VARAdb: a comprehensive variation annotation database for human. With the study of human diseases and biological processes increasing, a large number of non-coding variants have been identified and facilitated. The rapid accumulation of genetic and epigenomic information has resulted in an urgent need to collect and process data to explore the regulation of non-coding variants. Here, we developed a comprehensive variation annotation database for human (VARAdb, http://www.licpathway.net/VARAdb/), which specifically considers non-coding variants. VARAdb provides annotation information for 577,283,813 variations and novel variants, prioritizes variations based on scores using nine annotation categories, and supports pathway downstream analysis. Importantly, VARAdb integrates a large amount of genetic and epigenomic data into five annotation sections, which include 'Variation information', 'Regulatory information', 'Related genes', 'Chromatin accessibility'√ɬÉ√ǬÇ√ɬÇ√Ǭ†and 'Chromatin interaction'. The detailed annotation information consists of motif changes, risk SNPs, LD SNPs, eQTLs, clinical variant-drug-gene pairs, sequence conservation, somatic mutations, enhancers, super enhancers, promoters, transcription factors, chromatin states, histone modifications, chromatin accessibility regions and chromatin interactions. This database is a user-friendly interface to query, browse and visualize variations and related annotation information. VARAdb is a useful resource for selecting potential functional variations and interpreting their effects on human diseases and biological processes.",VARAdb,0.997256637,comprehensive variation annotation database for human,0.841432224,VARAdb,0.997256637,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +29112736,http://varcards.biols.ac.cn,"VarCards: an integrated genetic and clinical database for coding variants in the human genome. A growing number of genomic tools and databases were developed to facilitate the interpretation of genomic variants, particularly in coding regions. However, these tools are separately available in different online websites or databases, making it challenging for general clinicians, geneticists and biologists to obtain the first-hand information regarding some particular variants and genes of interest. Starting with coding regions and splice sties, we artificially generated all possible single nucleotide variants (n = 110 154 363) and cataloged all reported insertion and deletions (n = 1 223 370). We then annotated these variants with respect to functional consequences from more than 60 genomic data sources to develop a database, named VarCards (http://varcards.biols.ac.cn/), by which users can conveniently search, browse and annotate the variant- and gene-level implications of given variants, including the following information: (i) functional effects; (ii) functional consequences through different in silico algorithms; (iii) allele frequencies in different populations; (iv) disease- and phenotype-related knowledge; (v) general meaningful gene-level information; and (vi) drug-gene interactions. As a case study, we successfully employed VarCards in interpretation of de novo mutations in autism spectrum disorders. In conclusion, VarCards provides an intuitive interface of necessary information for researchers to prioritize candidate variations and genes.",VarCards,0.997416556,NA,0,VarCards,0.997416556,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2018 +29575684,http://www.lovd.nl/PTCH1,"New mutations and an updated database for the patched-1 (PTCH1) gene. Background Basal cell nevus syndrome (BCNS) is an autosomal dominant disorder characterized by multiple basal cell carcinomas (BCCs), maxillary keratocysts, and cerebral calcifications. BCNS most commonly is caused by a germline mutation in the patched-1 (PTCH1) gene. PTCH1 mutations are also described in patients with holoprosencephaly. Methods We have established a locus-specific database for the PTCH1 gene using the Leiden Open Variation Database (LOVD). We included 117 new PTCH1 variations, in addition to 331 previously published unique PTCH1 mutations. These new mutations were found in 141 patients who had a positive PTCH1 mutation analysis in either the VU University Medical Centre (VUMC) or Maastricht University Medical Centre (MUMC) between 1995 and 2015. Results The database contains 331 previously published unique PTCH1 mutations and 117 new PTCH1 variations. Conclusion We have established a locus-specific database for the PTCH1 gene using the Leiden Open Variation Database (LOVD). The database provides an open collection for both clinicians and researchers and is accessible online at http://www.lovd.nl/PTCH1.",NA,0,Variation,0.619382262,Variation,0.619382262,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,3/25/2018 +21367872,"http://www.postgresql.org, http://vancouvershortr.sourceforge.net","Human variation database: an open-source database template for genomic discovery. Motivation Current public variation databases are based upon collaboratively pooling data into a single database with a single interface available to the public. This gives little control to the collaborator to mine the database and requires that they freely share their data with the owners of the repository. We aim to provide an alternative mechanism: providing the source code and application programming interface (API) of a database, enabling researchers to set up local versions without investing heavily in the development of the resource and allowing for confidential information to remain secure. Results We describe an open-source database that can be installed easily at any research facility for the storage and analysis of thousands of next-generation sequencing variations. This database is built using PostgreSQL 8.4 (The PostgreSQL Global Development Group. postgres 8.4: http://www.postgresql.org) and provides a novel method for collating and searching across the reported results from thousands of next-generation sequence samples, as well as rapidly accessing vital information on the origin of the samples. The schema of the database makes rapid and insightful queries simple and enables easy annotation of novel or known genetic variations. A modular and cross-platform Java API is provided to perform common functions, such as generation of standard experimental reports and graphical summaries of modifications to genes. Included libraries allow adopters of the database to quickly develop their own queries. Availability The software is available for download through the Vancouver Short Read Analysis Package on Sourceforge, http://vancouvershortr.sourceforge.net. Instructions for use and deployment are provided on the accompanying wiki pages. Contact afejes@bcgsc.ca.",NA,0,variation database,0.602989346,variation database,0.602989346,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/2/2011 +27899678,http://www.ncbi.nlm.nih.gov/genome/viruses/variation,"Virus Variation Resource - improved response to emergent viral outbreaks. The Virus Variation Resource is a value-added viral sequence data resource hosted by the National Center for Biotechnology Information. The resource is located at http://www.ncbi.nlm.nih.gov/genome/viruses/variation/ and includes modules for seven viral groups: influenza virus, Dengue virus, West Nile virus, Ebolavirus, MERS coronavirus, Rotavirus A and Zika virus Each module is supported by pipelines that scan newly released GenBank records, annotate genes and proteins and parse sample descriptors and then map them to controlled vocabulary. These processes in turn support a purpose-built search interface where users can select sequences based on standardized gene, protein and metadata terms. Once sequences are selected, a suite of tools for downloading data, multi-sequence alignment and tree building supports a variety of user directed activities. This manuscript describes a series of features and functionalities recently added to the Virus Variation Resource.",Virus,0.544467032,Variation Resource,0.622275452,Variation Resource,0.622275452,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/28/2016 +22903802,http://structure.bmc.lu.se/VariBench,"VariBench: a benchmark database for variations. Several computational methods have been developed for predicting the effects of rapidly expanding variation data. Comparison of the performance of tools has been very difficult as the methods have been trained and tested with different datasets. Until now, unbiased and representative benchmark datasets have been missing. We have developed a benchmark database suite, VariBench, to overcome this problem. VariBench contains datasets of experimentally verified high-quality variation data carefully chosen from literature and relevant databases. It provides the mapping of variation position to different levels (protein, RNA and DNA sequences, protein three-dimensional structure), along with identifier mapping to relevant databases. VariBench contains the first benchmark datasets for variation effect analysis, a field which is of high importance and where many developments are currently going on. VariBench datasets can be used, for example, to test performance of prediction tools as well as to train novel machine learning-based tools. New datasets will be included and the community is encouraged to submit high-quality datasets to the service. VariBench is freely available at http://structure.bmc.lu.se/VariBench.",VariBench,0.997050226,NA,0,VariBench,0.997050226,1,NA,32016318,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,10/11/2012 +32016318,"http://structure.bmc.lu.se/VariBench/, http://structure.bmc.lu.se/VariBench","Variation benchmark datasets: update, criteria, quality and applications. . Development of new computational methods and testing their performance has to be carried out using experimental data. Only in comparison to existing knowledge can method performance be assessed. For that purpose, benchmark datasets with known and verified outcome are needed. High-quality benchmark datasets are valuable and may be difficult, laborious and time consuming to generate. VariBench and VariSNP are the two existing databases for sharing variation benchmark datasets used mainly for variation interpretation. They have been used for training and benchmarking predictors for various types of variations and their effects. VariBench was updated with 419 new datasets from 109 papers containing altogether 329√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ014√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ152 variants; however, there is plenty of redundancy between the datasets. VariBench is freely available at http://structure.bmc.lu.se/VariBench/. The contents of the datasets vary depending on information in the original source. The available datasets have been categorized into 20 groups and subgroups. There are datasets for insertions and deletions, substitutions in coding and non-coding region, structure mapped, synonymous and benign variants. Effect-specific datasets include DNA regulatory elements, RNA splicing, and protein property for aggregation, binding free energy, disorder and stability. Then there are several datasets for molecule-specific and disease-specific applications, as well as one dataset for variation phenotype effects. Variants are often described at three molecular levels (DNA, RNA and protein) and sometimes also at the protein structural level including relevant cross references and variant descriptions. The updated VariBench facilitates development and testing of new methods and comparison of obtained performances to previously published methods. We compared the performance of the pathogenicity/tolerance predictor PON-P2 to several benchmark studies, and show that such comparisons are feasible and useful, however, there may be limitations due to lack of provided details and shared data. Database URL: http://structure.bmc.lu.se/VariBench.",VariBench,0.993677557,NA,0,VariBench,0.993677557,1,NA,22903802,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2020 +31705629,http://varicarta.msl.ubc.ca,"VariCarta: A Comprehensive Database of Harmonized Genomic Variants Found in Autism Spectrum Disorder Sequencing Studies. Recent years have seen a boom in the application of the next-generation sequencing technology to the study of human disorders, including Autism Spectrum Disorder (ASD), where the focus has been on identifying rare, possibly causative genomic variants in ASD individuals. Because of the high genetic heterogeneity of ASD, a large number of subjects is needed to establish evidence for a variant or gene ASD-association, thus aggregating data across cohorts and studies is necessary. However, methodological inconsistencies and subject overlap across studies complicate data aggregation. Here we present VariCarta, a web-based database developed to address these challenges by collecting, reconciling, and consistently cataloging literature-derived genomic variants found in ASD subjects using ongoing semi-manual curation. The careful manual curation combined with a robust data import pipeline rectifies errors, converts variants into a standardized format, identifies and harmonizes cohort overlaps, and documents data provenance. The harmonization aspect is especially important since it prevents the potential double counting of variants, which can lead to inflation of gene-based evidence for ASD-association. The database currently contains 170,416 variant events from 10,893 subjects, collected across 61 publications, and reconciles 16,202 variants that have been reported in literature multiple times. VariCarta is freely accessible at http://varicarta.msl.ubc.ca. Autism Res 2019, 12: 1728-1736. √ɬÉ√ǬÇ√ɬÇ√Ǭ© 2019 International Society for Autism Research, Wiley Periodicals, Inc. LAY SUMMARY: The search for genetic factors underlying Autism Spectrum Disorder (ASD) yielded numerous studies reporting potentially causative genomic variants found in ASD individuals. However, methodological differences and subject overlap across studies complicate the assembly of these data, diminishing its utility and accessibility. We developed VariCarta, a web-based database that aggregates carefully curated, annotated, and harmonized literature-derived variants identified in individuals with ASD using ongoing semi-manual curation.",VariCarta,0.995876968,NA,0,VariCarta,0.995876968,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/9/2019 +23493258,http://depts.washington.edu/sokurel/variome,"Microbial variome database: point mutations, adaptive or not, in bacterial core genomes. Analysis of genetic differences (gene presence/absence and nucleotide polymorphisms) among strains of a bacterial species is crucial to understanding molecular mechanisms of bacterial pathogenesis and selecting targets for novel antibacterial therapeutics. However, lack of genome-wide association studies on large and epidemiologically well-defined strain collections from the same species makes it difficult to identify the genes under positive selection and define adaptive polymorphisms in those genes. To address this need and to overcome existing limitations, we propose to create a ""microbial variome""--a species-specific resource database of genomic variations based on molecular evolutionary analysis. Here, we present prototype variome databases of Escherichia coli and Salmonella enterica subspecies enterica (http://depts.washington.edu/sokurel/variome, last accessed March 26, 2013). The prototypes currently include the point mutations data of core protein-coding genes from completely sequenced genomes of 22 E. coli and 17 S. enterica strains. These publicly available databases allow for single- and multiple-field sorting, filtering, and searching of the gene variability data and the potential adaptive significance. Such resource databases would immensely help experimental research, clinical diagnostics, epidemiology, and environmental control of human pathogens.",microbial,0.575602502,variome,0.622333229,variome,0.622333229,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,3/14/2013 +24198712,http://data.canadensys.net/vascan,"Database of Vascular Plants of Canada (VASCAN): a community contributed taxonomic checklist of all vascular plants of Canada, Saint Pierre and√ɬÉ√ǬÇ√ɬÇ√Ǭ†Miquelon, and Greenland. The Database of Vascular Plants of Canada or VASCAN (http://data.canadensys.net/vascan) is a comprehensive and curated checklist of all vascular plants reported in Canada, Greenland (Denmark), and Saint Pierre and Miquelon (France). VASCAN was developed at the Universit√ɬÉ√ǬÉ√ɬÇ√Ǭ© de Montr√ɬÉ√ǬÉ√ɬÇ√Ǭ©al Biodiversity Centre and is maintained by a group of editors and contributors. For every core taxon in the checklist (species, subspecies, or variety), VASCAN provides the accepted scientific name, the accepted French and English vernacular names, and their synonyms/alternatives in Canada, as well as the distribution status (native, introduced, ephemeral, excluded, extirpated, doubtful or absent) of the plant for each province or territory, and the habit (tree, shrub, herb and/or vine) of the plant in Canada. For reported hybrids (nothotaxa or hybrid formulas) VASCAN also provides the hybrid parents, except if the parents of the hybrid do not occur in Canada. All taxa are linked to a classification. VASCAN refers to a source for all name, classification and distribution information. All data have been released to the public domain under a CC0 waiver and are available through Canadensys and the Global Biodiversity Information Facility (GBIF). VASCAN is a service to the scientific community and the general public, including administrations, companies, and non-governmental organizations.",VASCAN,0.996923745,Database of Vascular Plants of Canada,0.964604948,VASCAN,0.996923745,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/24/2013 +27053566,http://www.violinet.org/vaxar,"Vaxar: A Web-Based Database of Laboratory Animal Responses to Vaccinations and Its Application in the Meta-Analysis of Different Animal Responses to Tuberculosis Vaccinations. Animal models are indispensable for vaccine research and development. However, choosing which species to use and designing a vaccine study that is optimized for that species is often challenging. Vaxar (http://www.violinet.org/vaxar/) is a web-based database and analysis system that stores manually curated data regarding vaccine-induced responses in animals. To date, Vaxar encompasses models from 35 animal species including rodents, rabbits, ferrets, primates, and birds. These 35 species have been used to study more than 1300 experimentally tested vaccines for 164 pathogens and diseases significant to humans and domestic animals. The responses to vaccines by animals in more than 1500 experimental studies are recorded in Vaxar; these data can be used for systematic meta-analysis of various animal responses to a particular vaccine. For example, several variables, including animal strain, animal age, and the dose or route of either vaccination or challenge, might affect host response outcomes. Vaxar can also be used to identify variables that affect responses to different vaccines in a specific animal model. All data stored in Vaxar are publically available for web-based queries and analyses. Overall Vaxar provides a unique systematic approach for understanding vaccine-induced host immunity.",Vaxar,0.983407974,NA,0,Vaxar,0.983407974,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/1/2016 +30887928,http://www.ddg-pharmfac.net/vaxi,"VaxiJen Dataset of Bacterial Immunogens: An Update. BACKGROUND:Identifying immunogenic proteins is the first stage in vaccine design and development. VaxiJen is the most widely used and highly cited server for immunogenicity prediction. As the developers of VaxiJen, we are obliged to update and improve it regularly. Here, we present an updated dataset of bacterial immunogens containing 317 experimentally proven immunogenic proteins of bacterial origin, of which 60% have been reported during the last 10 years. METHODS:PubMed was searched for papers containing data for novel immunogenic proteins tested on humans till March 2017. Corresponding protein sequences were collected from NCBI and UniProtKB. The set was curated manually for multiple protein fragments, isoforms, and duplicates. RESULTS:The final curated dataset consists of 306 immunogenic proteins tested on humans derived from 47 bacterial microorganisms. Certain proteins have several isoforms. All were considered, and the total protein sequences in the set are 317. The updated set contains 206 new immunogens, compared to the previous VaxiJen bacterial dataset. The average number of immunogens per species is 6.7. The set also contains 12 fusion proteins and 41 peptide fragments and epitopes. The dataset includes the names of bacterial microorganisms, protein names, and protein sequences in FASTA format. CONCLUSION:Currently, the updated VaxiJen bacterial dataset is the best known manually-curated compilation of bacterial immunogens. It is freely available at http://www.ddg-pharmfac.net/vaxi jen/dataset. It can easily be downloaded, searched, and processed. When combined with an appropriate negative dataset, this update could also serve as a training set, allowing enhanced prediction of the potential immunogenicity of unknown protein sequences.",VaxiJen,0.989937842,NA,0,VaxiJen,0.989937842,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30465539,"http://bigd.big.ac.cn/vcg/, http://bigd.big.ac.cn/gvm","[Database resources of the reference genome and genetic variation maps for the Chinese population]. With the implementation of the international human genome project and 1000 genome project, hundreds of Chinese individual genome sequences have been published. Establishing a high-precision Chinese population reference genome and identifying the unique genome variations are fundamental for future precision medicine research in China. To further meet the needs of scientific management and deep mining on the rapidly growing Chinese genomic data, Beijing Institute of Genomics, Chinese Academy of Sciences, has developed a Virtual Chinese Genome Database (VCGDB, http://bigd.big.ac.cn/vcg/) and Genome Variation Map (GVM, http://bigd.big.ac.cn/gvm/) based on the public whole genome sequencing data, which provides the worldwide services of data retrieval, sharing, downloading and online analysis. This paper presents the brief introduction of characteristics and functions of the two databases, as well as their future development and application prospects, aiming to provide useful information for the promotion and development of the reference genome and genome variation map database in China.",VCGDB,0.950244635,Virtual Chinese Genome Database,0.920584655,VCGDB,0.950244635,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/1/2018 +31588507,http://vdjdb.cdr3.net,"VDJdb in 2019: database extension, new analysis infrastructure and a T-cell receptor motif compendium. Here, we report an update of the VDJdb database with a substantial increase in the number of T-cell receptor (TCR) sequences and their cognate antigens. The update further provides a new database infrastructure featuring two additional analysis modes that facilitate database querying and real-world data analysis. The increased yield of TCR specificity identification methods and the overall increase in the number of studies in the field has allowed us to expand the database more than 5-fold. Furthermore, several new analysis methods are included. For example, batch annotation of TCR repertoire sequencing samples allows for annotating large datasets on-line. Using recently developed bioinformatic methods for TCR motif mining, we have built a reduced set of high-quality TCR motifs that can be used for both training TCR specificity predictors and matching against TCRs of interest. These additions enhance the versatility of the VDJdb in the task of exploring T-cell antigen specificities. The database is available at https://vdjdb.cdr3.net.",VDJdb,0.996914685,NA,0,VDJdb,0.996914685,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +"22135296, 25510499",http://www.vectorbase.org,"VectorBase: improvements to a bioinformatics resource for invertebrate vector genomics. VectorBase (http://www.vectorbase.org) is a NIAID-supported bioinformatics resource for invertebrate vectors of human pathogens. It hosts data for nine genomes: mosquitoes (three Anopheles gambiae genomes, Aedes aegypti and Culex quinquefasciatus), tick (Ixodes scapularis), body louse (Pediculus humanus), kissing bug (Rhodnius prolixus) and tsetse fly (Glossina morsitans). Hosted data range from genomic features and expression data to population genetics and ontologies. We describe improvements and integration of new data that expand our taxonomic coverage. Releases are bi-monthly and include the delivery of preliminary data for emerging genomes. Frequent updates of the genome browser provide VectorBase users with increasing options for visualizing their own high-throughput data. One major development is a new population biology resource for storing genomic variations, insecticide resistance data and their associated metadata. It takes advantage of improved ontologies and controlled vocabularies. Combined, these new features ensure timely release of multiple types of data in the public domain while helping overcome the bottlenecks of bioinformatics and annotation by engaging with our user community.",VectorBase,0.996112347,NA,0,VectorBase,0.996112347,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/15/2014 +27634949,http://verdant.iplantcollaborative.org/plastidDB,"Verdant: automated annotation, alignment and phylogenetic analysis of whole chloroplast genomes. Motivation Chloroplast genomes are now produced in the hundreds for angiosperm phylogenetics projects, but current methods for annotation, alignment and tree estimation still require some manual intervention reducing throughput and increasing analysis time for large chloroplast systematics projects. Results Verdant is a web-based software suite and database built to take advantage a novel annotation program, annoBTD. Using annoBTD, Verdant provides accurate annotation of chloroplast genomes without manual intervention. Subsequent alignment and tree estimation can incorporate newly annotated and publically available plastomes and can accommodate a large number of taxa. Verdant sharply reduces the time required for analysis of assembled chloroplast genomes and removes the need for pipelines and software on personal hardware. Availability and implementation Verdant is available at: http://verdant.iplantcollaborative.org/plastidDB/ It is implemented in PHP, Perl, MySQL, Javascript, HTML and CSS with all major browsers supported. Contact mrmckain@gmail.comSupplementary information: Supplementary data are available at Bioinformatics online.",Verdant,0.98544848,NA,0,Verdant,0.98544848,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/14/2016 +28365718,http://genomics.cicbiogune.es/VerSeDa/index.php,"VerSeDa: vertebrate secretome database. . Based on the current tools, de novo secretome (full set of proteins secreted by an organism) prediction is a time consuming bioinformatic task that requires a multifactorial analysis in order to obtain reliable in silico predictions. Hence, to accelerate this process and offer researchers a reliable repository where secretome information can be obtained for vertebrates and model organisms, we have developed VerSeDa (Vertebrate Secretome Database). This freely available database stores information about proteins that are predicted to be secreted through the classical and non-classical mechanisms, for the wide range of vertebrate species deposited at the NCBI, UCSC and ENSEMBL sites. To our knowledge, VerSeDa is the only state-of-the-art database designed to store secretome data from multiple vertebrate genomes, thus, saving an important amount of time spent in the prediction of protein features that can be retrieved from this repository directly. VerSeDa is freely available at http://genomics.cicbiogune.es/VerSeDa/index.php.",VerSeDa,0.998133659,vertebrate secretome database,0.940622234,VerSeDa,0.998133659,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2017 +21245417,http://www.verygene.com,"VeryGene: linking tissue-specific genes to diseases, drugs, and beyond for knowledge discovery. In addition to many other genes, tissue-specific genes (TSGs) represent a set of genes of great importance for human physiology. However, the links among TSGs, diseases, and potential therapeutic agents are often missing, hidden, or too scattered to find. There is a need to establish a knowledgebase for researchers to share this and additional information in order to speed up discovery and clinical practice. As an initiative toward systems biology, the VeryGene web server was developed to fill this gap. A significant effort has been made to integrate TSGs from two large-scale data analyses with respective information on subcellular localization, Gene Ontology, Reactome, KEGG pathway, Mouse Genome Informatics (MGI) Mammalian Phenotype, disease association, and targeting drugs. The current release carefully selected 3,960 annotated TSGs derived from 127 normal human tissues and cell types, including 5,672 gene-disease and 2,171 drug-target relationships. In addition to being a specialized source for TSGs, VeryGene can be used as a discovery tool by generating novel inferences. Some inherently useful but hidden relations among genes, diseases, drugs, and other important aspects can be inferred to form testable hypotheses. VeryGene is available online at http://www.verygene.com.",VeryGene,0.988496959,NA,0,VeryGene,0.988496959,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/18/2011 +30395310,http://www.microvesicles.org,"Vesiclepedia 2019:√ɬÉ√ǬÇ√ɬÇ√Ǭ†a compendium of RNA, proteins, lipids and metabolites in extracellular vesicles. Extracellular vesicles (EVs) are membranous vesicles that are released by both prokaryotic and eukaryotic cells into the extracellular microenvironment. EVs can be categorised as exosomes, ectosomes or shedding microvesicles and apoptotic bodies based on the mode of biogenesis. EVs contain biologically active cargo of nucleic acids, proteins, lipids and metabolites that can be altered based on the precise state of the cell. Vesiclepedia (http://www.microvesicles.org) is a web-based compendium of RNA, proteins, lipids and metabolites that are identified in EVs from both published and unpublished studies. Currently, Vesiclepedia contains data obtained from 1254 EV studies, 38 146 RNA entries, 349 988 protein entries and 639 lipid/metabolite entries. Vesiclepedia is publicly available and allows users to query and download EV cargo based on different search criteria. The mode of EV isolation and characterization, the biophysical and molecular properties and EV-METRIC are listed in the database aiding biomedical scientists in assessing the quality of the EV preparation and the corresponding data obtained. In addition, FunRich-based Vesiclepedia plugin is incorporated aiding users in data analysis.",Vesiclepedia,0.996926308,NA,0,Vesiclepedia,0.996926308,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +27242836,http://vespucci.colombos.fmach.it,"VESPUCCI: Exploring Patterns of Gene Expression in Grapevine. Large-scale transcriptional studies aim to decipher the dynamic cellular responses to a stimulus, like different environmental conditions. In the era of high-throughput omics biology, the most used technologies for these purposes are microarray and RNA-Seq, whose data are usually required to be deposited in public repositories upon publication. Such repositories have the enormous potential to provide a comprehensive view of how different experimental conditions lead to expression changes, by comparing gene expression across all possible measured conditions. Unfortunately, this task is greatly impaired by differences among experimental platforms that make direct comparisons difficult. In this paper, we present the Vitis Expression Studies Platform Using COLOMBOS Compendia Instances (VESPUCCI), a gene expression compendium for grapevine which was built by adapting an approach originally developed for bacteria, and show how it can be used to investigate complex gene expression patterns. We integrated nearly all publicly available microarray and RNA-Seq expression data: 1608 gene expression samples from 10 different technological platforms. Each sample has been manually annotated using a controlled vocabulary developed ad hoc to ensure both human readability and computational tractability. Expression data in the compendium can be visually explored using several tools provided by the web interface or can be programmatically accessed using the REST interface. VESPUCCI is freely accessible at http://vespucci.colombos.fmach.it.",VESPUCCI,0.992614388,Vitis Expression Studies Platform Using COLOMBOS Compendia Instances,0.96687065,VESPUCCI,0.992614388,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/10/2016 +26581408,http://vetbiobase.igbb.msstate.edu,"ERAIZDA: a model for holistic annotation of animal infectious and zoonotic diseases. . There is an urgent need for a unified resource that integrates trans-disciplinary annotations of emerging and reemerging animal infectious and zoonotic diseases. Such data integration will provide wonderful opportunity for epidemiologists, researchers and health policy makers to make data-driven decisions designed to improve animal health. Integrating emerging and reemerging animal infectious and zoonotic disease data from a large variety of sources into a unified open-access resource provides more plausible arguments to achieve better understanding of infectious and zoonotic diseases. We have developed a model for interlinking annotations of these diseases. These diseases are of particular interest because of the threats they pose to animal health, human health and global health security. We demonstrated the application of this model using brucellosis, an infectious and zoonotic disease. Preliminary annotations were deposited into VetBioBase database (http://vetbiobase.igbb.msstate.edu). This database is associated with user-friendly tools to facilitate searching, retrieving and downloading of disease-related information. Database URL: http://vetbiobase.igbb.msstate.edu.",VetBioBase,0.985843897,NA,0,VetBioBase,0.985843897,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2015 +31837751,"http://vetcot.org/index.php/home/identification-and-verification-process/, http://vetcot.org/index.php/home/registry-use-materials","VetCOT: The Veterinary Trauma Registry. The goals of the Veterinary Committee on Trauma (VetCOT) trauma registry are to (1) inform improvement of veterinary and human trauma patient care and (2) design clinical and preclinical trials that could inform go/no go decisions for interventional strategies and tools. The VetCOT registry was established in 2013, and includes all trauma cases that present to Veterinary Trauma Centers. Veterinary Trauma Centers are well-resourced veterinary hospitals that are initially identified, then subsequently verified, by the American College of Veterinary and Emergency Critical Care VetCOT (http://vetcot.org/index.php/home/identification-and-verification-process/). As of June 2019, there are > 40,000 dog and cat cases in the registry, 3 publications and 9 ongoing projects utilizing data from the registry. Application materials to utilize VetCOT registry data is available on the VetCOT website (http://vetcot.org/index.php/home/registry-use-materials/).",VetCOT,0.961757398,Trauma Registry,0.660841862,VetCOT,0.961757398,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/25/2019 +"22067448, 26578559",http://www.mgc.ac.cn/VFs,"VFDB 2012 update: toward the genetic diversity and molecular evolution of bacterial virulence factors. The virulence factor database (VFDB, http://www.mgc.ac.cn/VFs/) has served as a comprehensive repository of bacterial virulence factors (VFs) for >7 years. Bacterial virulence is an exciting and dynamic field, due to the availability of complete sequences of bacterial genomes and increasing sophisticated technologies for manipulating bacteria and bacterial genomes. The intricacy of virulence mechanisms offers a challenge, and there exists a clear need to decipher the 'language' used by VFs more effectively. In this article, we present the recent major updates of VFDB in an attempt to summarize some of the most important virulence mechanisms by comparing different compositions and organizations of VFs from various bacterial pathogens, identifying core components and phylogenetic clades and shedding new light on the forces that shape the evolutionary history of bacterial pathogenesis. In addition, the 2012 release of VFDB provides an improved user interface.",VFDB,0.998255268,virulence factor database,0.889513955,VFDB,0.998255268,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2015 +27511743,http://vhldb.bio.unipd.it,"VHLdb: A database of von Hippel-Lindau protein interactors and mutations. Mutations in von Hippel-Lindau tumor suppressor protein (pVHL) predispose to develop tumors affecting specific target organs, such as the retina, epididymis, adrenal glands, pancreas and kidneys. Currently, more than 400√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâpVHL interacting proteins are either described in the literature or predicted in public databases. This data is scattered among several different sources, slowing down the comprehension of pVHL's biological role. Here we present VHLdb, a novel database collecting available interaction and mutation data on pVHL to provide novel integrated annotations. In VHLdb, pVHL interactors are organized according to two annotation levels, manual and automatic. Mutation data are easily accessible and a novel visualization tool has been implemented. A user-friendly feedback function to improve database content through community-driven curation is also provided. VHLdb presently contains 478 interactors, of which 117 have been manually curated, and 1,074 mutations. This makes it the largest available database for pVHL-related information. VHLdb is available from URL: http://vhldb.bio.unipd.it/.",VHLdb,0.995007813,NA,0,VHLdb,0.995007813,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/11/2016 +22160653,http://dna.korea.ac.kr/vhot,"vHoT: a database for predicting interspecies interactions between viral microRNA and host genomes. Some viruses have been reported to transcribe microRNAs, implying complex relationships between the host and the pathogen at the post-transcriptional level through microRNAs in virus-infected cells. Although many computational algorithms have been developed for microRNA target prediction, few have been designed exclusively to find cellular or viral mRNA targets of viral microRNAs in a user-friendly manner. To address this, we introduce the viral microRNA host target (vHoT) database for predicting interspecies interactions between viral microRNA and host genomes. vHoT supports target prediction of 271 viral microRNAs from human, mouse, rat, rhesus monkey, cow, and virus genomes. vHoT is freely available at http://dna.korea.ac.kr/vhot.",vHoT,0.990057766,viral microRNA host target,0.892197204,vHoT,0.990057766,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/8/2011 +30371824,http://vibrism.neuroinf.jp,"ViBrism DB: an interactive search and viewer platform for 2D/3D anatomical images of gene expression and co-expression networks. Understanding anatomical structures and biological functions based on gene expression is critical in a systemic approach to address the complexity of the mammalian brain, where >25 000 genes are expressed in a precise manner. Co-expressed genes are thought to regulate cell type- or region-specific brain functions. Thus, well-designed data acquisition and visualization systems for profiling combinatorial gene expression in relation to anatomical structures are crucial. To this purpose, using our techniques of microtomy-based gene expression measurements and WebGL-based visualization programs, we mapped spatial expression densities of genome-wide transcripts to the 3D coordinates of mouse brains at four post-natal stages, and built a database, ViBrism DB (http://vibrism.neuroinf.jp/). With the DB platform, users can access a total of 172 022 expression maps of transcripts, including coding, non-coding and lncRNAs in the whole context of 3D magnetic resonance (MR) images. Co-expression of transcripts is represented in the image space and in topological network graphs. In situ hybridization images and anatomical area maps are browsable in the same space of 3D expression maps using a new browser-based 2D/3D viewer, BAH viewer. Created images are shareable using URLs, including scene-setting parameters. The DB has multiple links and is expandable by community activity.",ViBrism,0.936161578,NA,0,ViBrism,0.936161578,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2019 +30365026,http://www.phidias.us/victors,"Victors: a web-based knowledge base of virulence factors in human and animal pathogens. Virulence factors (VFs) are molecules that allow microbial pathogens to overcome host defense mechanisms and cause disease in a host. It is critical to study VFs for better understanding microbial pathogenesis and host defense mechanisms. Victors (http://www.phidias.us/victors) is a novel, manually curated, web-based integrative knowledge base and analysis resource for VFs of pathogens that cause infectious diseases in human and animals. Currently, Victors contains 5296 VFs obtained via manual annotation from peer-reviewed publications, with 4648, 179, 105 and 364 VFs originating from 51 bacterial, 54 viral, 13 parasitic and 8 fungal species, respectively. Our data analysis identified many VF-specific patterns. Within the global VF pool, cytoplasmic proteins were more common, while adhesins were less common compared to findings on protective vaccine antigens. Many VFs showed homology with host proteins and the human proteins interacting with VFs represented the hubs of human-pathogen interactions. All Victors data are queriable with a user-friendly web interface. The VFs can also be searched by a customized BLAST sequence similarity searching program. These VFs and their interactions with the host are represented in a machine-readable Ontology of Host-Pathogen Interactions. Victors supports the 'One Health' research as a vital source of VFs in human and animal pathogens.",Victors,0.988581896,NA,0,Victors,0.988581896,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +30407009,http://vietherb.com.vn,"VIETHERB: A Database for Vietnamese Herbal Species. Vietnam carries a highly diverse practice of traditional medicine in which various combinations of herbs have been widely used as remedies for many types of diseases. Poor hand-written records and current text-based databases, however, perplex the process of conventionalizing and evaluating canonical therapeutic effects. In efforts to reorganize the valuable information, we provide the VIETHERB database ( http://vietherb.com.vn/ ) for herbs documented in Vietnamese traditional medicines. This database is constructed with confidence to provide users with information on herbs and other side information including metabolites, diseases, morphologies, and geographical locations for each individual species. Our data in this release consist of 2,881 species, 10,887 metabolites, 458 geographical locations, and 8,046 therapeutic effects. The numbers of species-metabolite, species-therapeutic effect, species-morphology, and species-distribution binary relationships are 17,602, 2,718, 11,943, and 16,089, respectively. The information on Vietnamese herbal species can be easily accessed or queried using their scientific names. Searching for species sharing side information can be simply done by clicking on the data. The database primarily serves as an open source facilitating users in studies of modernizing traditional medicine, computer-aided drug design, conservation of endangered plants, and other relevant experimental sciences.",VIETHERB,0.988221526,NA,0,VIETHERB,0.988221526,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/3/2018 +31004550,http://www.psb.ugent.be/PlantPTMViewer,"The Plant PTM Viewer, a central resource for exploring plant protein modifications. Post-translational modifications (PTMs) of proteins are central in any kind of cellular signaling. Modern mass spectrometry technologies enable comprehensive identification and quantification of various PTMs. Given the increased numbers and types of mapped protein modifications, a database is necessary that simultaneously integrates and compares site-specific information for different PTMs, especially in plants for which the available PTM data are poorly catalogued. Here, we present the Plant PTM Viewer (http://www.psb.ugent.be/PlantPTMViewer), an integrative PTM resource that comprises approximately 370√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 PTM sites for 19 types of protein modifications in plant proteins from five different species. The Plant PTM Viewer provides the user with a protein sequence overview in which the experimentally evidenced PTMs are highlighted together with an estimate of the confidence by which the modified peptides and, if possible, the actual modification sites were identified and with functional protein domains or active site residues. The PTM sequence search tool can query PTM combinations in specific protein sequences, whereas the PTM BLAST tool searches for modified protein sequences to detect conserved PTMs in homologous sequences. Taken together, these tools help to assume the role and potential interplay of PTMs in specific proteins or within a broader systems biology context. The Plant PTM Viewer is an open repository that√ɬÉ√ǬÇ√ɬÇ√Ǭ†allows the submission of mass spectrometry-based PTM data to remain at pace with future PTM plant studies.",Viewer,0.740627408,NA,0,Viewer,0.740627408,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,5/13/2019 +26644460,http://viggs.dna.affrc.go.jp,"The Vigna Genome Server, 'VigGS': A Genomic Knowledge Base of the Genus Vigna Based on High-Quality, Annotated Genome Sequence of the Azuki Bean, Vigna angularis (Willd.) Ohwi & Ohashi. The genus Vigna includes legume crops such as cowpea, mungbean and azuki bean, as well as >100 wild species. A number of the wild species are highly tolerant to severe environmental conditions including high-salinity, acid or alkaline soil; drought; flooding; and pests and diseases. These features of the genus Vigna make it a good target for investigation of genetic diversity in adaptation to stressful environments; however, a lack of genomic information has hindered such research in this genus. Here, we present a genome database of the genus Vigna, Vigna Genome Server ('VigGS', http://viggs.dna.affrc.go.jp), based on the recently sequenced azuki bean genome, which incorporates annotated exon-intron structures, along with evidence for transcripts and proteins, visualized in GBrowse. VigGS also facilitates user construction of multiple alignments between azuki bean genes and those of six related dicot species. In addition, the database displays sequence polymorphisms between azuki bean and its wild relatives and enables users to design primer sequences targeting any variant site. VigGS offers a simple keyword search in addition to sequence similarity searches using BLAST and BLAT. To incorporate up to date genomic information, VigGS automatically receives newly deposited mRNA sequences of pre-set species from the public database once a week. Users can refer to not only gene structures mapped on the azuki bean genome on GBrowse but also relevant literature of the genes. VigGS will contribute to genomic research into plant biotic and abiotic stresses and to the future development of new stress-tolerant crops.",VigGS,0.995423436,Vigna Genome Server,0.905400942,VigGS,0.995423436,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/7/2015 +"23568467, 24259431",http://www.violinet.org,"Databases and in silico tools for vaccine design. In vaccine design, databases and in silico tools play different but complementary roles. Databases collect experimentally verified vaccines and vaccine components, and in silico tools provide computational methods to predict and design new vaccines and vaccine components. Vaccine-related databases include databases of vaccines and vaccine components. In the USA, the Food and Drug Administration (FDA) maintains a database of licensed human vaccines, and the US Department of Agriculture keeps a database of licensed animal vaccines. Databases of vaccine clinical trials and vaccines in research also exist. The important vaccine components include vaccine antigens, vaccine adjuvants, vaccine vectors, and -vaccine preservatives. The vaccine antigens can be whole proteins or immune epitopes. Various in silico vaccine design tools are also available. The Vaccine Investigation and Online Information Network (VIOLIN; http://www.violinet.org ) is a comprehensive vaccine database and analysis system. The VIOLIN database includes various types of vaccines and vaccine components. VIOLIN also includes Vaxign, a Web-based in silico vaccine design program based on the reverse vaccinology strategy. Vaccine information and resources can be integrated with Vaccine Ontology (VO). This chapter introduces databases and in silico tools that facilitate vaccine design, especially those in the VIOLIN system.",VIOLIN,0.977441937,Vaccine Investigation and,0.620449245,VIOLIN,0.977441937,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/19/2013 +22735743,http://vipdb.cgu.edu.tw,"VIP DB--a viral protein domain usage and distribution database. During the viral infection and replication processes, viral proteins are highly regulated and may interact with host proteins. However, the functions and interaction partners of many viral proteins have yet to be explored. Here, we compiled a VIral Protein domain DataBase (VIP DB) to associate viral proteins with putative functions and interaction partners. We systematically assign domains and infer the functions of proteins and their protein interaction partners from their domain annotations. A total of 2,322 unique domains that were identified from 2,404 viruses are used as a starting point to correlate GO classification, KEGG metabolic pathway annotation and domain-domain interactions. Of the unique domains, 42.7% have GO records, 39.6% have at least one domain-domain interaction record and 26.3% can also be found in either mammals or plants. This database provides a resource to help virologists identify potential roles for viral protein. All of the information is available at http://vipdb.cgu.edu.tw.",VIP DB,0.981878147,Protein domain DataBase,0.806038454,VIP DB,0.981878147,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/24/2012 +31283070,http://genomeinterpretation.org/vipdb,"VIPdb, a genetic Variant Impact Predictor Database. Genome sequencing identifies vast number of genetic variants. Predicting these variants' molecular and clinical effects is one of the preeminent challenges in human genetics. Accurate prediction of the impact of genetic variants improves our understanding of how genetic information is conveyed to molecular and cellular functions, and is an essential step towards precision medicine. Over one hundred tools/resources have been developed specifically for this purpose. We summarize these tools as well as their characteristics, in the genetic Variant Impact Predictor Database (VIPdb). This database will help researchers and clinicians explore appropriate tools, and inform the development of improved methods. VIPdb can be browsed and downloaded at https://genomeinterpretation.org/vipdb.",VIPdb,0.984589517,genetic Variant Impact Predictor Database,0.94663018,VIPdb,0.984589517,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/17/2019 +"30265627, 33313778",http://viperdb.scripps.edu,"VIPERdb: A Tool for Virus Research. The VIrus Particle ExploreR database (VIPERdb) ( http://viperdb.scripps.edu ) is a database and web portal for primarily icosahedral virus capsid structures that integrates structure-derived information with visualization and analysis tools accessed through a set of web interfaces. Our aim in developing VIPERdb is to provide comprehensive structure-derived information on viruses comprising simple to detailed attributes such as size (diameter), architecture ( T number), genome type, taxonomy, intersubunit association energies, and surface-accessible residues. In addition, a number of web-based tools are provided to enable users to interact with the structures and compare and contrast structure-derived properties between different viruses. Recently, we have constructed a series of data visualizations using modern JavaScript charting libraries such as Google Charts that allow users to explore trends and gain insights based on the various data available in the database. Furthermore, we now include helical viruses and nonicosahedral capsids by implementing modified procedures for data curation and analysis. This article provides an up-to-date overview of VIPERdb, describing various data and tools that are currently available and how to use them to facilitate structure-based bioinformatics analysis of virus capsids.",VIPERdb,0.994908571,Particle ExploreR data base,0.844930937,VIPERdb,0.994908571,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +30593617,http://www.viprbrc.org/brc/home.spg?decorator=flavi_hcv,"Hepatitis C Virus Database and Bioinformatics Analysis Tools in the Virus Pathogen Resource (ViPR). The Virus Pathogen Resource (ViPR; www.viprbrc.org ) is a US National Institute of Allergy and Infectious Diseases (NIAID)-sponsored Bioinformatics Resource Center providing bioinformatics support for major human viral pathogens. The hepatitis C virus (HCV) portal of ViPR facilitates basic research and development of diagnostics and therapeutics for HCV, by providing a comprehensive collection of HCV-related data integrated from various sources, a growing suite of analysis and visualization tools for data mining and hypothesis generation, and personal Workbench spaces for data storage and sharing. This chapter introduces the data and functionality provided by the ViPR HCV portal. It describes example workflows for (1) searching HCV genome and protein sequences, (2) conducting phylogenetic analysis, and (3) analyzing sequence variations using pattern search for amino acid substitutions in proteins, single nucleotide variation calculation, metadata-driven comparison, and sequence feature variant type analysis. All data and tools are freely available via the ViPR HCV portal at https://www.viprbrc.org/brc/home.spg?decorator=flavi_hcv .",ViPR,0.996145308,Virus Pathogen Resource,0.91619873,ViPR,0.996145308,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +33594411,"http://viralhostrangedb.pasteur.cloud, http://gitlab.pasteur.fr/hub/viralhostrangedb","Viral Host Range database, an online tool for recording, analyzing and disseminating virus-host interactions. . Viruses are ubiquitous in the living world, and their ability to infect more than one host defines their host range. However, information about which virus infects which host, and about which host is infected by which virus, is not readily available. We developed a web-based tool called the Viral Host Range database to record, analyze and disseminate experimental host range data for viruses infecting archaea, bacteria and eukaryotes. The ViralHostRangeDB application is available from https://viralhostrangedb.pasteur.cloud. Its source code is freely available from the Gitlab hub of Institut Pasteur (https://gitlab.pasteur.fr/hub/viralhostrangedb).",ViralHostRangeDB,0.884413302,Viral Host Range database,0.955648327,Viral Host Range database,0.955648327,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/17/2021 +34601118,http://jsjds.hzau.edu.cn/MBPC/ViPGD/index.php/home/index,"G-quadruplexes in genomes of viruses infecting eukaryotes or prokaryotes are under different selection pressures from hosts. G-quadruplexes in viral genomes can be applied as the targets of antiviral therapies, which has attracted wide interest. However, it is still not clear whether the pervasive number of such elements in the viral world is the result of natural selection for functionality. In this study, we identified putative quadruplex-forming sequences (PQSs) across the known viral genomes and analyzed the abundance, structural stability, and conservation of viral PQSs. A Viral Putative G-quadruplex Database (http://jsjds.hzau.edu.cn/MBPC/ViPGD/index.php/home/index) was constructed to collect the details of each viral PQS, which provides guidance for selecting the desirable PQS. The PQS with two putative G-tetrads (G2-PQS) was significantly enriched in both eukaryotic viruses and prokaryotic viruses, whereas the PQSs with three putative G-tetrads (G3-PQS) were only enriched in eukaryotic viruses and depleted in prokaryotic viruses. The structural stability of PQSs in prokaryotic viruses was significantly lower than that in eukaryotic viruses. Conservation analysis showed that the G2-PQS, instead of G3-PQS, was highly conserved within the genus. This suggested that the G2-quadruplex might play an important role in viral biology, and the difference in the occurrence of G-quadruplex between eukaryotic viruses and prokaryotic viruses may result from the different selection pressures from hosts.",NA,0,Viral Putative G-quadruplex,0.673970756,Viral Putative G-quadruplex,0.673970756,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/30/2021 +25274736,http://www.rna-society.org/virbase,"ViRBase: a resource for virus-host ncRNA-associated interactions. Increasing evidence reveals that diverse non-coding RNAs (ncRNAs) play critically important roles in viral infection. Viruses can use diverse ncRNAs to manipulate both cellular and viral gene expression to establish a host environment conducive to the completion of the viral life cycle. Many host cellular ncRNAs can also directly or indirectly influence viral replication and even target virus genomes. ViRBase (http://www.rna-society.org/virbase) aims to provide the scientific community with a resource for efficient browsing and visualization of virus-host ncRNA-associated interactions and interaction networks in viral infection. The current version of ViRBase documents more than 12,000 viral and cellular ncRNA-associated virus-virus, virus-host, host-virus and host-host interactions involving more than 460 non-redundant ncRNAs and 4400 protein-coding genes from between more than 60 viruses and 20 hosts. Users can query, browse and manipulate these virus-host ncRNA-associated interactions. ViRBase will be of help in uncovering the generic organizing principles of cellular virus-host ncRNA-associated interaction networks in viral infection.",ViRBase,0.998200536,NA,0,ViRBase,0.998200536,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/1/2014 +32509450,http://www.vitiligoinfores.com,"VIRdb: a comprehensive database for interactive analysis of genes/proteins involved in the pathogenesis of vitiligo. Vitiligo is a chronic asymptomatic disorder affecting melanocytes from the basal layer of the epidermis which leads to a patchy loss of skin color. Even though it is one of the neglected disease conditions, people suffering from vitiligo are more prone to psychological disorders. As of now, various studies have been done in order to project auto-immune implications as the root cause. To understand the complexity of vitiligo, we propose the Vitiligo Information Resource (VIRdb) that integrates both the drug-target and systems approach to produce a comprehensive repository entirely devoted to vitiligo, along with curated information at both protein level and gene level along with potential therapeutics leads. These 25,041 natural compounds are curated from Natural Product Activity and Species Source Database. VIRdb is an attempt to accelerate the drug discovery process and laboratory trials for vitiligo through the computationally derived potential drugs. It is an exhaustive resource consisting of 129 differentially expressed genes, which are validated through gene ontology and pathway enrichment analysis. We also report 22 genes through enrichment analysis which are involved in the regulation of epithelial cell differentiation. At the protein level, 40 curated protein target molecules along with their natural hits that are derived through virtual screening. We also demonstrate the utility of the VIRdb by exploring the Protein-Protein Interaction Network and Gene-Gene Interaction Network of the target proteins and differentially expressed genes. For maintaining the quality and standard of the data in the VIRdb, the gold standard in bioinformatics toolkits like Cytoscape, Schr√ɬÉ√ǬÉ√ɬÇ√Ǭ∂dinger's GLIDE, along with the server installation of MATLAB, are used for generating results. VIRdb can be accessed through ""http://www.vitiligoinfores.com/"".",VIRdb,0.994836926,Vitiligo Information Resource,0.84693079,VIRdb,0.994836926,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/21/2020 +25392406,http://virhostnet.prabi.fr,"VirHostNet 2.0: surfing on the web of virus/host molecular interactions data. VirHostNet release 2.0 (http://virhostnet.prabi.fr) is a knowledgebase dedicated to the network-based exploration of virus-host protein-protein interactions. Since the previous VirhostNet release (2009), a second run of manual curation was performed to annotate the new torrent of high-throughput protein-protein interactions data from the literature. This resource is shared publicly, in PSI-MI TAB 2.5 format, using a PSICQUIC web service. The new interface of VirHostNet 2.0 is based on Cytoscape web library and provides a user-friendly access to the most complete and accurate resource of virus-virus and virus-host protein-protein interactions as well as their projection onto their corresponding host cell protein interaction networks. We hope that the VirHostNet 2.0 system will facilitate systems biology and gene-centered analysis of infectious diseases and will help to identify new molecular targets for antiviral drugs design. This resource will also continue to help worldwide scientists to improve our knowledge on molecular mechanisms involved in the antiviral response mediated by the cell and in the viral strategies selected by viruses to hijack the host immune system.",VirHostNet,0.99125731,NA,0,VirHostNet,0.99125731,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/11/2014 +23219434,http://www.violinet.org/virmugendb,"Systematic annotation and analysis of ""virmugens""-virulence factors whose mutants can be used as live attenuated vaccines. Live attenuated vaccines are usually generated by mutation of genes encoding virulence factors. ""Virmugen"" is coined here to represent a gene that encodes for a virulent factor of a pathogen and has been proven feasible in animal models to make a live attenuated vaccine by knocking out this gene. Not all virulence factors are virmugens. VirmugenDB is a web-based virmugen database (http://www.violinet.org/virmugendb). Currently, VirmugenDB includes 225 virmugens that have been verified to be valuable for vaccine development against 57 bacterial, viral, and protozoan pathogens. Bioinformatics analysis has revealed significant patterns in virmugens. For example, 10 Gram-negative and 1 Gram-positive bacterial aroA genes are virmugens. A sequence analysis has revealed at least 50% of identities in the protein sequences of the 10 Gram-negative bacterial aroA virmugens. As a pathogen case study, Brucella virmugens were analyzed. Out of 15 verified Brucella virmugens, 6 are related to carbohydrate or nucleotide transport and metabolism, and 2 involving cell membrane biogenesis. In addition, 54 virmugens from 24 viruses and 12 virmugens from 4 parasites are also stored in VirmugenDB. Virmugens tend to involve metabolism of nutrients (e.g., amino acids, carbohydrates, and nucleotides) and cell membrane formation. Host genes whose expressions were regulated by virmugen mutation vaccines or wild type virulent pathogens have also been annotated and systematically compared. The bioinformatics annotation and analysis of virmugens helps to elucidate enriched virmugen profiles and the mechanisms of protective immunity, and further supports rational vaccine design.",VirmugenDB,0.994162917,NA,0,VirmugenDB,0.994162917,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/6/2012 +23734609,http://vammpire.pharmchem.uni-frankfurt.de,VAMMPIRE: a matched molecular pairs database for structure-based drug design and optimization. Structure-based optimization to improve the affinity of a lead compound is an established approach in drug discovery. Knowledge-based databases holding molecular replacements can be supportive in the optimization process. We introduce a strategy to relate the substitution effect within matched molecular pairs (MMPs) to the atom environment within the cocrystallized protein-ligand complex. Virtually Aligned Matched Molecular Pairs Including Receptor Environment (VAMMPIRE) database and the supplementary web interface ( http://vammpire.pharmchem.uni-frankfurt.de ) provide valuable information for structure-based lead optimization.,VAMMPIRE,0.947511351,Virtually Aligned Matched Molecular Pairs Including Receptor Environment,0.959377536,Virtually Aligned Matched Molecular Pairs Including Receptor Environment,0.959377536,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/18/2013 +24304891,http://www.ncbi.nlm.nih.gov/genomes/VirusVariation,"Virus Variation Resource--recent updates and future directions. Virus Variation (http://www.ncbi.nlm.nih.gov/genomes/VirusVariation/) is a comprehensive, web-based resource designed to support the retrieval and display of large virus sequence datasets. The resource includes a value added database, a specialized search interface and a suite of sequence data displays. Virus-specific sequence annotation and database loading pipelines produce consistent protein and gene annotation and capture sequence descriptors from sequence records then map these metadata to a controlled vocabulary. The database supports a metadata driven, web-based search interface where sequences can be selected using a variety of biological and clinical criteria. Retrieved sequences can then be downloaded in a variety of formats or analyzed using a suite of tools and displays. Over the past 2 years, the pre-existing influenza and Dengue virus resources have been combined into a single construct and West Nile virus added to the resultant resource. A number of improvements were incorporated into the sequence annotation and database loading pipelines, and the virus-specific search interfaces were updated to support more advanced functions. Several new features have also been added to the sequence download options, and a new multiple sequence alignment viewer has been incorporated into the resource tool set. Together these enhancements should support enhanced usability and the inclusion of new viruses in the future.",Virus Variation,0.927988728,Variation Resource,0.64340649,Virus Variation,0.927988728,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/4/2013 +32349124,http://www.computationalbiology.cn/ViruscircBase/home.html,"VirusCircBase: a database of virus circular RNAs. Circular RNAs (circRNAs) are covalently closed long noncoding RNAs critical in diverse cellular activities and multiple human diseases. Several cancer-related viral circRNAs have been identified in double-stranded DNA viruses (dsDNA), yet no systematic study about the viral circRNAs has been reported. Herein, we have performed a systematic survey of 11√ɬÉ√ǬÇ√ɬÇ√Ǭ†924 circRNAs from 23 viral species by computational prediction of viral circRNAs from viral-infection-related RNA sequencing data. Besides the dsDNA viruses, our study has also revealed lots of circRNAs in single-stranded RNA viruses and retro-transcribing viruses, such as the Zika virus, the Influenza A virus, the Zaire ebolavirus, and the Human immunodeficiency virus 1. Most viral circRNAs had reverse complementary sequences or repeated sequences at the flanking sequences of the back-splice sites. Most viral circRNAs only expressed in a specific cell line or tissue in a specific species. Functional enrichment analysis indicated that the viral circRNAs from dsDNA viruses were involved in KEGG pathways associated with cancer. All viral circRNAs presented in the current study were stored and organized in VirusCircBase, which is freely available at http://www.computationalbiology.cn/ViruscircBase/home.html and is the first virus circRNA database. VirusCircBase forms the fundamental atlas for the further exploration and investigation of viral circRNAs in the context of public health.",VirusCircBase,0.996753037,NA,0,VirusCircBase,0.996753037,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2021 +29308007,http://yaulab.math.tsinghua.edu.cn/VirusDB,"Virus Database and Online Inquiry System Based on Natural Vectors. We construct a virus database called VirusDB (http://yaulab.math.tsinghua.edu.cn/VirusDB/) and an online inquiry system to serve people who are interested in viral classification and prediction. The database stores all viral genomes, their corresponding natural vectors, and the classification information of the single/multiple-segmented viral reference sequences downloaded from National Center for Biotechnology Information. The online inquiry system serves the purpose of computing natural vectors and their distances based on submitted genomes, providing an online interface for accessing and using the database for viral classification and prediction, and back-end processes for automatic and manual updating of database content to synchronize with GenBank. Submitted genomes data in FASTA format will be carried out and the prediction results with 5 closest neighbors and their classifications will be returned by email. Considering the one-to-one correspondence between sequence and natural vector, time efficiency, and high accuracy, natural vector is a significant advance compared with alignment methods, which makes VirusDB a useful database in further research.",VirusDB,0.994696617,NA,0,VirusDB,0.994696617,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/17/2017 +28025349,http://www.virusite.org,"viruSITE-integrated database for viral genomics. . Viruses are the most abundant biological entities and the reservoir of most of the genetic diversity in the Earth's biosphere. Viral genomes are very diverse, generally short in length and compared to other organisms carry only few genes. viruSITE is a novel database which brings together high-value information compiled from various resources. viruSITE covers the whole universe of viruses and focuses on viral genomes, genes and proteins. The database contains information on virus taxonomy, host range, genome features, sequential relatedness as well as the properties and functions of viral genes and proteins. All entries in the database are linked to numerous information resources. The above-mentioned features make viruSITE a comprehensive knowledge hub in the field of viral genomics.The web interface of the database was designed so as to offer an easy-to-navigate, intuitive and user-friendly environment. It provides sophisticated text searching and a taxonomy-based browsing system. viruSITE also allows for an alternative approach based on sequence search. A proprietary genome browser generates a graphical representation of viral genomes. In addition to retrieving and visualising data, users can perform comparative genomics analyses using a variety of tools.Database URL: http://www.virusite.org/.",viruSITE,0.993772805,NA,0,viruSITE,0.993772805,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/26/2016 +25217587,http://virusmentha.uniroma2.it,"VirusMentha: a new resource for virus-host protein interactions. Viral infections often cause diseases by perturbing several cellular processes in the infected host. Viral proteins target host proteins and either form new complexes or modulate the formation of functional host complexes. Describing and understanding the perturbation of the host interactome following viral infection is essential for basic virology and for the development of antiviral therapies. In order to provide a general overview of such interactions, a few years ago we developed VirusMINT. We have now extended the scope and coverage of VirusMINT and established VirusMentha, a new virus-virus and virus-host interaction resource build on the detailed curation protocols of the IMEx consortium and on the integration strategies developed for mentha. VirusMentha is regularly and automatically updated every week by capturing, via the PSICQUIC protocol, interactions curated by five different databases that are part of the IMEx consortium. VirusMentha can be freely browsed at http://virusmentha.uniroma2.it/ and its complete data set is available for download.",VirusMentha,0.996016741,NA,0,VirusMentha,0.996016741,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/12/2014 +33811468,http://virusms.erc.monash.edu,"Resourcing, annotating, and analysing synthetic peptides of SARS-CoV-2 for immunopeptidomics and other immunological studies. SARS-CoV-2 has caused a significant ongoing pandemic worldwide. A number of studies have examined the T cell mediated immune responses against SARS-CoV-2, identifying potential T cell epitopes derived from the SARS-CoV-2 proteome. Such studies will aid in identifying targets for vaccination and immune monitoring. In this study, we applied tandem mass spectrometry and proteomic techniques to a library of √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº40,000 synthetic peptides, in order to generate a large dataset of SARS-CoV-2 derived peptide MS/MS spectra. On this basis, we built an online knowledgebase, termed virusMS (https://virusms.erc.monash.edu/), to document, annotate and analyse these synthetic peptides and their spectral information. VirusMS incorporates a user-friendly interface to facilitate searching, browsing and downloading the database content. Detailed annotations of the peptides, including experimental information, peptide modifications, predicted peptide-HLA (human leukocyte antigen) binding affinities, and peptide MS/MS spectral data, are provided in virusMS.",virusMS,0.953528702,NA,0,virusMS,0.953528702,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,4/14/2021 +33045721,"http://gmql.eu/virusurf/, http://gmql.eu/virusurf_gisaid","ViruSurf: an integrated database to investigate viral sequences. ViruSurf, available at http://gmql.eu/virusurf/, is a large public database of viral sequences and integrated and curated metadata from heterogeneous sources (RefSeq, GenBank, COG-UK and NMDC); it also exposes computed nucleotide and amino acid variants, called from original sequences. A GISAID-specific ViruSurf database, available at http://gmql.eu/virusurf_gisaid/, offers a subset of these functionalities. Given the current pandemic outbreak, SARS-CoV-2 data are collected from the four sources; but ViruSurf contains other virus species harmful to humans, including SARS-CoV, MERS-CoV, Ebola and Dengue. The database is centered on sequences, described from their biological, technological and organizational dimensions. In addition, the analytical dimension characterizes the sequence in terms of its annotations and variants. The web interface enables expressing complex search queries in a simple way; arbitrary search queries can freely combine conditions on attributes from the four dimensions, extracting the resulting sequences. Several example queries on the database confirm and possibly improve results from recent research papers; results can be recomputed over time and upon selected populations. Effective search over large and curated sequence data may enable faster responses to future threats that could arise from new viruses.",ViruSurf,0.998281777,NA,0,ViruSurf,0.998281777,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +33367605,http://virxicon.cs.put.poznan.pl,"Virxicon: A Lexicon Of Viral Sequences. . Viruses are the most abundant biological entities and constitute a large reservoir of genetic diversity. In recent years, knowledge about them has increased significantly as a result of dynamic development in life sciences and rapid technological progress. This knowledge is scattered across various data repositories, making a comprehensive analysis of viral data difficult. In response to the need for gathering a comprehensive knowledge of viruses and viral sequences, we developed Virxicon, a lexicon of all experimentally-acquired sequences for RNA and DNA viruses. The ability to quickly obtain data for entire viral groups, searching sequences by levels of taxonomic hierarchy-according to the Baltimore classification and ICTV taxonomy-and tracking the distribution of viral data and its growth over time are unique features of our database compared to the other tools. Virxicon is a publicly available resource, updated weekly. It has an intuitive web interface and can be freely accessed at http://virxicon.cs.put.poznan.pl/. Supplementary data are available at Bioinformatics online.",Virxicon,0.996595085,NA,0,Virxicon,0.996595085,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/26/2020 +31598702,http://bioinfo.uth.edu/VISDB,"VISDB: a manually curated database of viral integration sites in the human genome. Virus integration into the human genome occurs frequently and represents a key driving event in human disease. Many studies have reported viral integration sites (VISs) proximal to structural or functional regions of the human genome. Here, we systematically collected and manually curated all VISs reported in the literature and publicly available data resources to construct the Viral Integration Site DataBase (VISDB, https://bioinfo.uth.edu/VISDB). Genomic information including target genes, nearby genes, nearest transcription start site, chromosome fragile sites, CpG islands, viral sequences and target sequences were integrated to annotate VISs. We further curated VIS-involved oncogenes and tumor suppressor genes, virus-host interactions involved in non-coding RNA√ɬÉ√ǬÇ√ɬÇ√Ǭ†(ncRNA), target gene and microRNA expression in five cancers, among others. Moreover, we developed tools to visualize single integration events, VIS clusters, DNA elements proximal to VISs and virus-host interactions involved in ncRNA. The current version of VISDB contains a total of 77 632 integration sites of five DNA viruses and four RNA retroviruses. VISDB is currently the only active comprehensive VIS database, which provides broad usability for the study of disease, virus related pathophysiology, virus biology, host-pathogen interactions, sequence motif discovery and pattern recognition, molecular evolution and adaption, among others.",VISDB,0.994756818,Viral,0.535345972,VISDB,0.994756818,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +25538713,http://pathways.cgrb.oregonstate.edu,"VitisCyc: a metabolic pathway knowledgebase for grapevine (Vitis vinifera). We have developed VitisCyc, a grapevine-specific metabolic pathway database that allows researchers to (i) search and browse the database for its various components such as metabolic pathways, reactions, compounds, genes and proteins, (ii) compare grapevine metabolic networks with other publicly available plant metabolic networks, and (iii) upload, visualize and analyze high-throughput data such as transcriptomes, proteomes, metabolomes etc. using OMICs-Viewer tool. VitisCyc is based on the genome sequence of the nearly homozygous genotype PN40024 of Vitis vinifera ""Pinot Noir"" cultivar with 12X v1 annotations and was built on BioCyc platform using Pathway Tools software and MetaCyc reference database. Furthermore, VitisCyc was enriched for plant-specific pathways and grape-specific metabolites, reactions and pathways. Currently VitisCyc harbors 68 super pathways, 362 biosynthesis pathways, 118 catabolic pathways, 5 detoxification pathways, 36 energy related pathways and 6 transport pathways, 10,908 enzymes, 2912 enzymatic reactions, 31 transport reactions and 2024 compounds. VitisCyc, as a community resource, can aid in the discovery of candidate genes and pathways that are regulated during plant growth and development, and in response to biotic and abiotic stress signals generated from a plant's immediate environment. VitisCyc version 3.18 is available online at http://pathways.cgrb.oregonstate.edu.",VitisCyc,0.997620523,NA,0,VitisCyc,0.997620523,1,26973684,NA,NA,NA,do not merge,NA,NA,NA,NA,12/9/2014 +"32550548, 34530999",http://vitivar.igib.res.in,"VitiVar: A locus specific database of vitiligo associated genes and variations. Vitiligo is the most common skin pigmentation disorder which affects around 1% of the population worldwide. The disease has complex pathogenesis and is of multifactorial etiology, that finally culminates in patchy depigmentation of skin. Genetic contribution to the disease is well studied, however the information about multiple associated genes and contributing variations are scattered across the literature. To address this complex disorder affecting the skin, we systematically cataloged the genes and variations by creating a Locus Specific Database for vitiligo called, ""VitiVar"". This comprehensive resource houses manually curated 322 genes and 254 variations, from 202 articles indexed in PubMed. We applied an integrative approach to stratify genes and variations to facilitate dissection of vitiligo pathogenesis by layering it with expression status in specific constituent cell types of skin and in-house vitiligo expression data. Finally, we were able to demonstrate the utility of VitiVar by generating a vitiligo interactome using GeneMANIA and overlaying the vitiligo and cell type specific information. This interaction network yielded 20 new genes (apart from 322 VitiVar genes) of which we were able to prioritize IFI27 and IFI6 for further validation. This, thereby makes VitiVar a comprehensive integrative platform in unravelling disease biology by providing meaningful leads for functional interrogation. VitiVar is freely accessible to the research community for prioritizing and validating the candidate genes and variations (http://vitivar.igib.res.in/).",VitiVar,0.99736613,NA,0,VitiVar,0.99736613,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/11/2019 +31274965,http://vmsshare.nist.gov,"Visual mass-spec share (vMS-Share): a new public web-based mass spectrometry visualization and data mining repository. . Herein we introduce the Visual Mass-Spec Share (vMS-Share), a new public mass spectrometric (MS) repository and data mining website/resource freely accessible at https://vmsshare.nist.gov. vMS-Share is a web-based application developed for instant visualization of raw MS data with integrated display of metadata optimized for the sharing of proteomics and metabolomics experimental results. Each MS-based identification is linked to a given experiment and the entire experimental data can then be viewed using the link associated with a given peptide and/or small molecule. Interactive and user-friendly visualizations are provided to the user via variety of easily accessible search filters.",vMS-Share,0.993173659,Visual Mass-Spec Share,0.818651617,vMS-Share,0.993173659,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2019 +23161674,http://proteinformatics.charite.de/voronoia4rna,"Voronoia4RNA--a database of atomic packing densities of RNA structures and their complexes. Voronoia4RNA (http://proteinformatics.charite.de/voronoia4rna/) is a structural database storing precalculated atomic volumes, atomic packing densities (PDs) and coordinates of internal cavities for currently 1869 RNAs and RNA-protein complexes. Atomic PDs are a measure for van der Waals interactions. Regions of low PD, containing water-sized internal cavities, refer to local structure flexibility or compressibility. RNA molecules build up the skeleton of large molecular machineries such as ribosomes or form smaller flexible structures such as riboswitches. The wealth of structural data on RNAs and their complexes allows setting up representative data sets and analysis of their structural features. We calculated atomic PDs from atomic volumes determined by the Voronoi cell method and internal cavities analytically by Delaunay triangulation. Reference internal PD values were derived from a non-redundant sub-data set of buried atoms. Comparison of internal PD values shows that RNA is more tightly packed than proteins. Finally, the relation between structure size, resolution and internal PD of the Voronoia4RNA entries is discussed. RNA, protein structures and their complexes can be visualized by the Jmol-based viewer Provi. Variations in PD are depicted by a color code. Internal cavities are represented by their molecular boundaries or schematically as balls.",Voronoia4RNA,0.995378757,NA,0,Voronoia4RNA,0.995378757,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/17/2012 +21769196,http://www.vpdb.bicpu.edu.in,"VPDB: Viral Protein Structural Database. Unlabelled Viral Protein Database is an interactive database for three dimensional viral proteins. Our aim is to provide a comprehensive resource to the community of structural virology, with an emphasis on the description of derived data from structural biology. Currently, VPDB includes √ɬÉ√Ǭã√ɬÇ√Ǭú1,670 viral protein structures from >277 viruses with more than 465 virus strains. The whole database can be easily accessed through the user convenience text search. Interactivity has been enhanced by using Jmol, WebMol and Strap to visualize the viral protein molecular structure. Availability The database is available for free at http://www.vpdb.bicpu.edu.in.",VPDB,0.997562647,Viral Protein Structural Database,0.964751055,VPDB,0.997562647,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/6/2011 +31245720,http://vigs.noble.org,"Virus-induced gene silencing database for phenomics and functional genomics in Nicotiana benthamiana. Virus-induced gene silencing (VIGS) is an important forward and reverse genetics method for the study of gene function in many plant species, especially Nicotiana benthamiana. However, despite the widespread use of VIGS, a searchable database compiling the phenotypes observed with this method is lacking. Such a database would allow researchers to know the phenotype associated with the silencing of a large number of individual genes without experimentation. We have developed a VIGS phenomics and functional genomics database (VPGD) that has DNA sequence information derived from over 4,000√ɬÉ√ǬÇ√ɬÇ√Ǭ†N.√ɬÉ√ǬÇ√ɬÇ√Ǭ†benthamiana VIGS clones along with the associated silencing phenotype for approximately 1,300 genes. The VPGD has a built-in BLAST search feature that provides silencing phenotype information of specific genes. In addition, a keyword-based search function could be used to find a specific phenotype of interest with the corresponding gene, including its Gene Ontology descriptions. Query gene sequences from other plant species that have not been used for VIGS can also be searched for their homologs and silencing phenotype in N.√ɬÉ√ǬÇ√ɬÇ√Ǭ†benthamiana. VPGD is useful for identifying gene function not only in N.√ɬÉ√ǬÇ√ɬÇ√Ǭ†benthamiana but also in related Solanaceae plants such as tomato and potato. The database is accessible at http://vigs.noble.org.",VPGD,0.986272037,and,0.526194394,VPGD,0.986272037,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,4/23/2018 +22715306,http://www/vptd.in,"Database for vegetable phytochemicals and their mechanism of action. Unlabelled In an endeavor to screen bioactive compounds present in vegetables with effective mechanism using in silico method lead us to develop a vegetable phytochemicals and their target database (VPTD). The VPTD is a unique bioinformatics resource that compiles information about phytochemicals from vegetables and their mechanism. VPTD contains 2496 phytochemicals from 27 vegetables, their 3D images and their 1337 possible biological mechanism. Each phytochemical contain records of seven data fields providing detailed information on name, source, amount present, structure and mechanistic information. This information has been manually extracted and manually verified from numerous sources, including other electronic databases, textbooks and scientific journals. VPTD is fully searchable and supports extensive text search. The main focus of the VPTD is on providing possible mechanism of phytochemicals, which will help in discovery of potential drugs from one of the common bioresource-vegetable. VPTD is freely available. Availability The database is available for free at http://www/vptd.in.",VPTD,0.991008282,vegetable phytochemicals and their target database,0.914648957,VPTD,0.991008282,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,5/31/2012 +33094321,http://vptmdb.com:8787/VPTMdb,"VPTMdb: a viral posttranslational modification database. . In viruses, posttranslational modifications (PTMs) are essential for their life cycle. Recognizing viral PTMs is very important for a better understanding of the mechanism of viral infections and finding potential drug targets. However, few studies have investigated the roles of viral PTMs in virus-human interactions using comprehensive viral PTM datasets. To fill this gap, we developed the first comprehensive viral posttranslational modification database (VPTMdb) for collecting systematic information of PTMs in human viruses and infected host cells. The VPTMdb contains 1240 unique viral PTM sites with 8 modification types from 43 viruses (818 experimentally verified PTM sites manually extracted from 150 publications and 422 PTMs extracted from SwissProt) as well as 13√ɬÉ√ǬÇ√ɬÇ√Ǭ†650 infected cells' PTMs extracted from seven global proteomics experiments in six human viruses. The investigation of viral PTM sequences motifs showed that most viral PTMs have the consensus motifs with human proteins in phosphorylation and five cellular kinase families phosphorylate more than 10 viral species. The analysis of protein disordered regions presented that more than 50% glycosylation sites of double-strand DNA viruses are in the disordered regions, whereas single-strand RNA and retroviruses prefer ordered regions. Domain-domain interaction analysis indicating potential roles of viral PTMs play in infections. The findings should make an important contribution to the field of virus-human interaction. Moreover, we created a novel sequence-based classifier named VPTMpre to help users predict viral protein phosphorylation sites. VPTMdb online web server (http://vptmdb.com:8787/VPTMdb/) was implemented for users to download viral PTM data and predict phosphorylation sites of interest.",VPTMdb,0.996834993,viral posttranslational modification database,0.67337137,VPTMdb,0.996834993,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/1/2021 +24341535,http://vtcdb.adelaide.edu.au/Home.aspx,"VTCdb: a gene co-expression database for the crop species Vitis vinifera (grapevine). Background Gene expression datasets in model plants such as Arabidopsis have contributed to our understanding of gene function and how a single underlying biological process can be governed by a diverse network of genes. The accumulation of publicly available microarray data encompassing a wide range of biological and environmental conditions has enabled the development of additional capabilities including gene co-expression analysis (GCA). GCA is based on the understanding that genes encoding proteins involved in similar and/or related biological processes may exhibit comparable expression patterns over a range of experimental conditions, developmental stages and tissues. We present an open access database for the investigation of gene co-expression networks within the cultivated grapevine, Vitis vinifera. Description The new gene co-expression database, VTCdb (http://vtcdb.adelaide.edu.au/Home.aspx), offers an online platform for transcriptional regulatory inference in the cultivated grapevine. Using condition-independent and condition-dependent approaches, grapevine co-expression networks were constructed using the latest publicly available microarray datasets from diverse experimental series, utilising the Affymetrix Vitis vinifera GeneChip (16 K) and the NimbleGen Grape Whole-genome microarray chip (29 K), thus making it possible to profile approximately 29,000 genes (95% of the predicted grapevine transcriptome). Applications available with the online platform include the use of gene names, probesets, modules or biological processes to query the co-expression networks, with the option to choose between Affymetrix or Nimblegen datasets and between multiple co-expression measures. Alternatively, the user can browse existing network modules using interactive network visualisation and analysis via CytoscapeWeb. To demonstrate the utility of the database, we present examples from three fundamental biological processes (berry development, photosynthesis and flavonoid biosynthesis) whereby the recovered sub-networks reconfirm established plant gene functions and also identify novel associations. Conclusions Together, we present valuable insights into grapevine transcriptional regulation by developing network models applicable to researchers in their prioritisation of gene candidates, for on-going study of biological processes related to grapevine development, metabolism and stress responses.",VTCdb,0.994380951,NA,0,VTCdb,0.994380951,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/16/2013 +24267744,http://phenoscape.org,"The vertebrate taxonomy ontology: a framework for reasoning across model organism and species phenotypes. Background A hierarchical taxonomy of organisms is a prerequisite for semantic integration of biodiversity data. Ideally, there would be a single, expansive, authoritative taxonomy that includes extinct and extant taxa, information on synonyms and common names, and monophyletic supraspecific taxa that reflect our current understanding of phylogenetic relationships. Description As a step towards development of such a resource, and to enable large-scale integration of phenotypic data across vertebrates, we created the Vertebrate Taxonomy Ontology (VTO), a semantically defined taxonomic resource derived from the integration of existing taxonomic compilations, and freely distributed under a Creative Commons Zero (CC0) public domain waiver. The VTO includes both extant and extinct vertebrates and currently contains 106,947 taxonomic terms, 22 taxonomic ranks, 104,736 synonyms, and 162,400 cross-references to other taxonomic resources. Key challenges in constructing the VTO included (1) extracting and merging names, synonyms, and identifiers from heterogeneous sources; (2) structuring hierarchies of terms based on evolutionary relationships and the principle of monophyly; and (3) automating this process as much as possible to accommodate updates in source taxonomies. Conclusions The VTO is the primary source of taxonomic information used by the Phenoscape Knowledgebase (http://phenoscape.org/), which integrates genetic and evolutionary phenotype data across both model and non-model vertebrates. The VTO is useful for inferring phenotypic changes on the vertebrate tree of life, which enables queries for candidate genes for various episodes in vertebrate evolution.",VTO,0.888742566,Vertebrate Taxonomy Ontology,0.715373244,VTO,0.888742566,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/22/2013 +25614757,http://www.polebio.lrsv.ups-tlse.fr/WallProtDB,"WallProtDB, a database resource for plant cell wall proteomics. Background During the last fifteen years, cell wall proteomics has become a major research field with the publication of more than 50 articles describing plant cell wall proteomes. The WallProtDB database has been designed as a tool to facilitate the inventory, the interpretation of cell wall proteomics data and the comparisons between cell wall proteomes. Results WallProtDB (http://www.polebio.lrsv.ups-tlse.fr/WallProtDB/) presently contains 2170 proteins and ESTs identified experimentally in 36 cell wall proteomics studies performed on 11 different plant species. Two criteria have to be met for entering WallProtDB. First one is related to the identification of proteins. Only proteins identified in plant with available genomic or ESTs data are considered to ensure unambiguous identification. Second criterion is related to the difficulty to obtain clean cell wall fractions. Indeed, since cell walls constitute an open compartment difficult to isolate, numerous proteins predicted to be intracellular and/or having functions inside the cell have been identified in cell wall extracts. Then, except proteins predicted to be plasma membrane proteins, only proteins having a predicted signal peptide and no known intracellular retention signal are included in the database. In addition, WallProtDB contains information about the strategies used to obtain cell wall protein extracts and to identify proteins by mass spectrometry and bioinformatics. Mass spectrometry data are included when available. All the proteins of WallProtDB are linked to ProtAnnDB, another database, which contains structural and functional bioinformatics annotations of proteins as well as links to other databases (Aramemnon, CAZy, Planet, Phytozome). A list of references in the cell wall proteomics field is also provided. Conclusions WallProtDB aims at becoming a cell wall proteome reference database. It can be updated at any time on request and provide a support for sharing cell wall proteomics data and literature references with researchers interested in plant cell wall biology.",WallProtDB,0.996273875,NA,0,WallProtDB,0.996273875,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/16/2015 +31504823,http://waltzdb.switchlab.org,"WALTZ-DB 2.0: an updated database containing structural information of experimentally determined amyloid-forming peptides. Transition of soluble proteins into insoluble amyloid fibrils is driven by self-propagating short sequence stretches. However, accurate prediction of aggregation determinants remains challenging. Here, we describe WALTZ-DB 2.0, an updated and significantly expanded open-access database providing information on experimentally determined amyloid-forming hexapeptide sequences (http://waltzdb.switchlab.org/). We have updated WALTZ-DB 2.0 with new entries, including: (i) experimental validation of an in-house developed dataset of 229 hexapeptides, using electron microscopy and Thioflavin-T binding assays; (ii) manual curation of 98 amyloid-forming peptides isolated from literature. Furthermore, the content has been expanded by adding novel structural information for peptide entries, including sequences of the previous version. Using a computational methodology developed in the Switch lab, we have generated 3D-models of the putative amyloid fibril cores of WALTZ-DB 2.0 entries. Structural models, coupled with information on the energetic contributions and fibril core stabilities, can be accessed through individual peptide entries. Customized filtering options for subset selections and new modelling graphical features were added to upgrade online accessibility, providing a user-friendly interface for browsing, downloading and updating. WALTZ-DB 2.0 remains the largest open-access repository for amyloid fibril formation determinants and will continue to enhance the development of new approaches focused on accurate prediction of aggregation prone sequences.",WALTZ-DB,0.969778025,NA,0,WALTZ-DB,0.969778025,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2020 +26452372,http://waspatlas.com,"WaspAtlas: a Nasonia vitripennis gene database and analysis platform. . Nasonia vitripennis is a parasitoid wasp which is becoming an important model organism for parasitism, epigenetics, evolutionary and developmental genetics. WaspAtlas is a new gene database in which we have compiled annotation data from all available N. vitripennis releases along with a wealth of transcriptomic data, methylation data and original analyses and annotations to form a comprehensive resource to aid the study of Nasonia. WaspAtlas allows users to explore gene structure and function, to compare expression data across sexes, tissues, developmental stages and conditions, and to explore published data relating to gene(s) of interest. WaspAtlas is easy to navigate and the database is easily searchable through the web interface. Detailed illustrations are provided for splice variants, protein domain predictions and the results of analyses. The website also functions as an analysis platform analysis for Nasonia, providing a set of tools designed to perform common analyses including GO term overrepresentation and RNAi off-target prediction. WaspAtlas will act as a hub for published data relating to Nasonia genes, and will be continually updated with new data to reflect the state of Nasonia-omics research. Database URL: http://waspatlas.com.",WaspAtlas,0.996073008,NA,0,WaspAtlas,0.996073008,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/9/2015 +34848704,http://apiaceae.njau.edu.cn/waterdropwortdb,"Integrative genome, transcriptome, microRNA, and degradome analysis of water dropwort (Oenanthe javanica) in response to water stress. Water dropwort (Liyang Baiqin, Oenanthe javanica (BI.) DC.) is an aquatic perennial plant from the Apiaceae family with abundant protein, dietary fiber, vitamins, and minerals. It usually grows in wet soils and can even grow in water. Here, whole-genome sequencing of O. javanica via HiSeq 2000 sequencing technology was reported for the first time. The genome size was 1.28√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√ǬâGb, including 42,270 genes, of which 93.92% could be functionally annotated. An online database of the whole-genome sequences of water dropwort, Water dropwortDB, was established to share the results and facilitate further research on O. javanica (database homepage: http://apiaceae.njau.edu.cn/waterdropwortdb ). Water dropwortDB offers whole-genome and transcriptome sequences and a Basic Local Alignment Search Tool. Comparative analysis with other species showed that the evolutionary relationship between O. javanica and Daucus carota was the closest. Twenty-five gene families of O. javanica were found to be expanded, and some genetic factors (such as genes and miRNAs) related to phenotypic and anatomic differentiation in O. javanica under different water conditions were further investigated. Two miRNA and target gene pairs (miR408 and Oja15472, miR171 and Oja47040) were remarkably regulated by water stress. The obtained reference genome of O. javanica provides important information for future work, thus making in-depth genetic breeding and gene editing possible. The present study also provides a foundation for the understanding of the O. javanica response to water stress, including morphological, anatomical, and genetic differentiation.",Water dropwortDB,0.96038208,NA,0,Water dropwortDB,0.96038208,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2021 +27893392,http://ece.uwaterloo.ca,"Waterloo Exploration Database: New Challenges for Image Quality Assessment Models. The great content diversity of real-world digital images poses a grand challenge to image quality assessment (IQA) models, which are traditionally designed and validated on a handful of commonly used IQA databases with very limited content variation. To test the generalization capability and to facilitate the wide usage of IQA techniques in real-world applications, we establish a large-scale database named the Waterloo Exploration Database, which in its current state contains 4744 pristine natural images and 94 880 distorted images created from them. Instead of collecting the mean opinion score for each image via subjective testing, which is extremely difficult if not impossible, we present three alternative test criteria to evaluate the performance of IQA models, namely, the pristine/distorted image discriminability test, the listwise ranking consistency test, and the pairwise preference consistency test (P-test). We compare 20 well-known IQA models using the proposed criteria, which not only provide a stronger test in a more challenging testing environment for existing models, but also demonstrate the additional benefits of using the proposed database. For example, in the P-test, even for the best performing no-reference IQA model, more than 6 million failure cases against the model are ""discovered"" automatically out of over 1 billion test pairs. Furthermore, we discuss how the new database may be exploited using innovative approaches in the future, to reveal the weaknesses of existing IQA models, to provide insights on how to improve the models, and to shed light on how the next-generation IQA models may be developed. The database and codes are made publicly available at: https://ece.uwaterloo.ca/~k29ma/exploration/.",Waterloo,0.555541277,NA,0,Waterloo,0.555541277,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/22/2016 +25466819,http://bioclaims.uib.es,"White adipose tissue reference network: a knowledge resource for exploring health-relevant relations. Optimal health is maintained by interaction of multiple intrinsic and environmental factors at different levels of complexity-from molecular, to physiological, to social. Understanding and quantification of these interactions will aid design of successful health interventions. We introduce the reference network concept as a platform for multi-level exploration of biological relations relevant for metabolic health, by integration and mining of biological interactions derived from public resources and context-specific experimental data. A White Adipose Tissue Health Reference Network (WATRefNet) was constructed as a resource for discovery and prioritization of mechanism-based biomarkers for white adipose tissue (WAT) health status and the effect of food and drug compounds on WAT health status. The WATRefNet (6,797 nodes and 32,171 edges) is based on (1) experimental data obtained from 10 studies addressing different adiposity states, (2) seven public knowledge bases of molecular interactions, (3) expert's definitions of five physiologically relevant processes key to WAT health, namely WAT expandability, Oxidative capacity, Metabolic state, Oxidative stress and Tissue inflammation, and (4) a collection of relevant biomarkers of these processes identified by BIOCLAIMS ( http://bioclaims.uib.es ). The WATRefNet comprehends multiple layers of biological complexity as it contains various types of nodes and edges that represent different biological levels and interactions. We have validated the reference network by showing overrepresentation with anti-obesity drug targets, pathology-associated genes and differentially expressed genes from an external disease model dataset. The resulting network has been used to extract subnetworks specific to the above-mentioned expert-defined physiological processes. Each of these process-specific signatures represents a mechanistically supported composite biomarker for assessing and quantifying the effect of interventions on a physiological aspect that determines WAT health status. Following this principle, five anti-diabetic drug interventions and one diet intervention were scored for the match of their expression signature to the five biomarker signatures derived from the WATRefNet. This confirmed previous observations of successful intervention by dietary lifestyle and revealed WAT-specific effects of drug interventions. The WATRefNet represents a sustainable knowledge resource for extraction of relevant relationships such as mechanisms of action, nutrient intervention targets and biomarkers and for assessment of health effects for support of health claims made on food products.",WATRefNet,0.994038701,White Adipose Tissue Health Reference Network,0.908846239,WATRefNet,0.994038701,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/3/2014 +33756618,http://wcolite.com,"WCO-Lite version 1.1: an online nomenclatural catalogue of harvestmen of the world (Arachnida, Opiliones) curated in TaxonWorks. The ""World Catalogue of Opiliones"" (WCO) is a collaborative effort to comprehensively index the Earth's species of harvestmen. This paper announces one component of the WCO, ""WCO-Lite"" a website available at https://wcolite.com/. WCO-Lite provides a graphic user interface for a second component of the WCO, ""Opiliones of the World"", a database on the taxonomy of the harvestmen curated in TaxonWorks (TW). WCO-Lite interfaces include: (1) a checklist of all valid taxa of the arachnid Opiliones, exhaustive up to December 2018; (2) a taxonomic tree; (3) a search engine comprising two modules; and (4) a counter of species diversity for each taxon. An e-Book companion was launched simultaneously with WCO-Lite version 1.1 on September 12, 2020 to account for the formal publication of mandatory nomenclatural changes and availability of taxonomic names. The collective components of the WCO are also being summarized in a forthcoming conventional paper-form catalogue, currently in manuscript stage.",WCO-Lite,0.967464912,orld Catalogue of Opiliones,0.777871438,WCO-Lite,0.967464912,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/15/2021 +33216899,http://publish.plantnet-project.org/project/wildcofdb_en,"WCSdb: a database of wild Coffea species. . Coffee is a beverage enjoyed by millions of people worldwide and an important commodity for millions of people. Beside the two cultivated species (Coffea arabica and Coffea canephora), the 139 wild coffee species/taxa belonging to the Coffea genus are largely unknown to coffee scientists and breeders although these species may be crucial for future coffee crop development to face climate changes. Here we present the Wild Coffee Species database (WCSdb) hosted by Pl@ntNet platform (http://publish.plantnet-project.org/project/wildcofdb_en), providing information for 141 coffee species/taxa, for which 84 contain a photo gallery and 82 contain sequencing data (genotyping-by-sequencing, chloroplast or whole genome sequences). The objective of this database is to better understand and characterize the species (identification, morphology, biochemical compounds, genetic diversity and sequence data) in order to better protect and promote them. http://publish.plantnet-project.org/project/wildcofdb_en.",WCSdb,0.997819245,Wild Coffee Species database,0.988351196,WCSdb,0.997819245,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/1/2020 +31161214,"http://www.wdspdb.com/wdsp/, http://wu.scbb.pkusz.edu.cn/wdsp","WDSPdb: an updated resource for WD40 proteins. Summary The WD40-repeat proteins are a large family of scaffold molecules that assemble complexes in various cellular processes. Obtaining their structures is the key to understanding their interaction details. We present WDSPdb 2.0, a significantly updated resource providing accurately predicted secondary and tertiary structures and featured sites annotations. Based on an optimized pipeline, WDSPdb 2.0 contains about 600 thousand entries, an increase of 10-fold, and integrates more than 37√ɬÉ√ǬÇ√ɬÇ√Ǭ†000 variants from sources of ClinVar, Cosmic, 1000 Genomes, ExAC, IntOGen, cBioPortal and IntAct. In addition, the web site is largely improved for visualization, exploring and data downloading. Availability and implementation http://www.wdspdb.com/wdsp/ or http://wu.scbb.pkusz.edu.cn/wdsp/. Supplementary information Supplementary data are available at Bioinformatics online.",WDSPdb,0.998449087,NA,0,WDSPdb,0.998449087,1,NA,25348404,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,11/1/2019 +25348404,http://wu.scbb.pkusz.edu.cn/wdsp,"WDSPdb: a database for WD40-repeat proteins. WD40-repeat proteins, as one of the largest protein families, often serve as platforms to assemble functional complexes through the hotspot residues on their domain surfaces, and thus play vital roles in many biological processes. Consequently, it is highly required for researchers who study WD40 proteins and protein-protein interactions to obtain structural information of WD40 domains. Systematic identification of WD40-repeat proteins, including prediction of their secondary structures, tertiary structures and potential hotspot residues responsible for protein-protein interactions, may constitute a valuable resource upon this request. To achieve this goal, we developed a specialized database WDSPdb (http://wu.scbb.pkusz.edu.cn/wdsp/) to provide these details of WD40-repeat proteins based on our recently published method WDSP. The WDSPdb contains 63,211 WD40-repeat proteins identified from 3383 species, including most well-known model organisms. To better serve the community, we implemented a user-friendly interactive web interface to browse, search and download the secondary structures, 3D structure models and potential hotspot residues provided by WDSPdb.",WDSPdb,0.994846165,NA,0,WDSPdb,0.994846165,1,NA,31161214,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,10/27/2014 +32221380,http://weislab.com/WeiDOCK/?page=PKPD,"WeiBI (web-based platform): Enriching integrated interaction network with increased coverage and functional proteins from genome-wide experimental OMICS data. Many molecular system biology approaches recognize various interactions and functional associations of proteins that occur in cellular processing. Further understanding of the characterization technique reveals noteworthy information. These types of known and predicted interactions, gained through multiple resources, are thought to be important for experimental data to satisfy comprehensive and quality needs. The current work proposes the ""WeiBI (WeiBiologicalInteractions)"" database that clarifies direct and indirect partnerships associated with biological interactions. This database contains information concerning protein's functional partnerships and interactions along with their integration into a statistical model that can be computationally predicted for humans. This novel approach in WeiBI version 1.0 collects information using an improved algorithm by transferring interactions between more than 115570 entries, allowing statistical analysis with the automated background for the given inputs for functional enrichment. This approach also allows the input of an entity's list from a database along with the visualization of subsets as an interaction network and successful performance of the enrichment analysis for a gene set. This wisely improved algorithm is user-friendly, and its accessibility and higher accuracy make it the best database for exploring interactions among genomes' network and reflects the importance of this study. The proposed server ""WeiBI"" is accessible at http://weislab.com/WeiDOCK/?page=PKPD.",WeiBI,0.996359855,WeiBiologicalInteractions,0.583205014,WeiBI,0.996359855,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/27/2020 +32392583,http://m6a2target.canceromics.org,"M6A2Target: a comprehensive database for targets of m6A writers, erasers and readers. . N6-methyladenosine (m6A) is the most abundant posttranscriptional modification in mammalian mRNA molecules and has a crucial function in the regulation of many fundamental biological processes. The m6A modification is a dynamic and reversible process regulated by a series of writers, erasers and readers (WERs). Different WERs might have different functions, and even the same WER might function differently in different conditions, which are mostly due to different downstream genes being targeted by the WERs. Therefore, identification of the targets of WERs is particularly important for elucidating this dynamic modification. However, there is still no public repository to host the known targets of WERs. Therefore, we developed the m6A WER target gene database (m6A2Target) to provide a comprehensive resource of the targets of m6A WERs. M6A2Target provides a user-friendly interface to present WER targets in two different modules: 'Validated Targets', referred to as WER targets identified from low-throughput studies, and 'Potential Targets', including WER targets analyzed from high-throughput studies. Compared to other existing m6A-associated databases, m6A2Target is the first specific resource for m6A WER target genes. M6A2Target is freely accessible at http://m6a2target.canceromics.org.",M6A2Target,0.923408449,WER target gene database,0.948100317,WER target gene database,0.948100317,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/1/2021 +27789692,http://weram.biocuckoo.org,"WERAM: a database of writers, erasers and readers of histone acetylation and methylation in eukaryotes. In this work, we developed a database WERAM (http://weram.biocuckoo.org/) for histone acetyltransferases, histone deacetylases, histone methyltransferases, histone demethylases and acetyl- or methyl-binding proteins, which catalyze, remove and recognize histone acetylation and methylation sites as 'writers', 'erasers' and 'readers', and synergistically determine the 'histone code'. From the scientific literature, we totally collected over 580 experimentally identified histone regulators from eight model organisms, including Homo sapiens, Mus musculus, Rattus norvegicus, Drosophila melanogaster, Caenorhabditis elegans, Arabidopsis thaliana, Schizosaccharomyces pombe and Saccharomyces cerevisiae We also collected √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº900 site-specific regulator-histone relations from the eight species. According to the experimental evidence, known histone regulators were classified into distinct families. To computationally detect more proteins in eukaryotes, we constructed hidden Markov model (HMM) profiles for histone regulator families. For families without HMM profiles, we also conducted orthologous searches. Totally, WERAM database contained more than 20 thousand non-redundant histone regulators from 148 eukaryotes. The detailed annotations and classification information of histone regulators were provided, together with site-specific histone substrates if available.",WERAM,0.997139871,NA,0,WERAM,0.997139871,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/26/2016 +26078228,http://me.lzu.edu.cn/woodformation,"An integrated database of wood-formation related genes in plants. Wood, which consists mainly of plant cell walls, is an extremely important resource in daily lives. Genes whose products participate in the processes of cell wall and wood formation are therefore major subjects of plant science research. The Wood-Formation Related Genes database (WFRGdb, http://me.lzu.edu.cn/woodformation/) serves as a data resource center for genes involved in wood formation. To create this database, we collected plant genome data published in other online databases and predicted all cell wall and wood formation related genes using BLAST and HMMER. To date, 47 gene families and 33 transcription factors from 57 genomes (28 herbaceous, 22 woody and 7 non-vascular plants) have been covered and more than 122,000 genes have been checked and recorded. To provide easy access to these data, we have developed several search methods, which make it easy to download targeted genes or groups of genes free of charge in FASTA format. Sequence and phylogenetic analyses are also available online. WFRGdb brings together cell wall and wood formation related genes from all available plant genomes, and provides an integrative platform for gene inquiry, downloading and analysis. This database will therefore be extremely useful for those who focuses on cell wall and wood research.",WFRGdb,0.997528315,Wood-Formation Related Genes database,0.967793643,WFRGdb,0.997528315,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/16/2015 +28018331,http://wgmlst.imst.nsysu.edu.tw,"Construction of a Pan-Genome Allele Database of Salmonella enterica Serovar Enteritidis for Molecular Subtyping and Disease Cluster Identification. We built a pan-genome allele database with 395 genomes of Salmonella enterica serovar Enteritidis and developed computer tools for analysis of whole genome sequencing (WGS) data of bacterial isolates for disease cluster identification. A web server (http://wgmlst.imst.nsysu.edu.tw) was set up with the database and the tools, allowing users to upload WGS data to generate whole genome multilocus sequence typing (wgMLST) profiles and to perform cluster analysis of wgMLST profiles. The usefulness of the database in disease cluster identification was demonstrated by analyzing a panel of genomes from 55 epidemiologically well-defined S. Enteritidis isolates provided by the Minnesota Department of Health. The wgMLST-based cluster analysis revealed distinct clades that were concordant with the epidemiologically defined outbreaks. Thus, using a common pan-genome allele database, wgMLST can be a promising WGS-based subtyping approach for disease surveillance and outbreak investigation across laboratories.",wgMLST,0.924707353,NA,0,wgMLST,0.924707353,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/15/2016 +33181826,http://animal.nwsuaf.edu.cn/code/index.php/Wheat,"WGVD: an integrated web-database for wheat genome variation and selective signatures. . Bread wheat is one of the most important crops worldwide. With the release of the complete wheat reference genome and the development of next-generation sequencing technology, a mass of genomic data from bread wheat and its progenitors has been yield and has provided genomic resources for wheat genetics research. To conveniently and effectively access and use these data, we established Wheat Genome Variation Database, an integrated web-database including genomic variations from whole-genome resequencing and exome-capture data for bread wheat and its progenitors, as well as selective signatures during the process of wheat domestication and improvement. In this version, WGVD contains 7√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ346√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ814 single nucleotide polymorphisms (SNPs) and 1√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ044√ɬÉ√Ǭ¢√ɬÇ√ǬÄ√ɬÇ√Ǭâ400 indels focusing on genic regions and upstream or downstream regions. We provide allele frequency distribution patterns of these variations for 5 ploidy wheat groups or 17 worldwide bread wheat groups, the annotation of the variant types and the genotypes of all individuals for 2 versions of bread wheat reference genome (IWGSC RefSeq v1.0 and IWGSC RefSeq v2.0). Selective footprints for Aegilops tauschii, wild emmer, domesticated emmer, bread wheat landrace and bread wheat variety are evaluated with two statistical tests (FST and Pi) based on SNPs from whole-genome resequencing data. In addition, we provide the Genome Browser to visualize the genomic variations, the selective footprints, the genotype patterns and the read coverage depth, and the alignment tool Blast to search the homologous regions between sequences. All of these features of WGVD will promote wheat functional studies and wheat breeding. http://animal.nwsuaf.edu.cn/code/index.php/Wheat.",WGVD,0.993578374,Wheat Genome Variation Database,0.973856658,WGVD,0.993578374,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2020 +26705106,http://wheat.pw.usda.gov/WheatExp,"WheatExp: an RNA-seq expression database for polyploid wheat. Background For functional genomics studies, it is important to understand the dynamic expression profiles of transcribed genes in different tissues, stages of development and in response to environmental stimuli. The proliferation in the use of next-generation sequencing technologies by the plant research community has led to the accumulation of large volumes of expression data. However, analysis of these datasets is complicated by the frequent occurrence of polyploidy among economically-important crop species. In addition, processing and analyzing such large volumes of sequence data is a technical and time-consuming task, limiting their application in functional genomics studies, particularly for smaller laboratories which lack access to high-powered computing infrastructure. Wheat is a good example of a young polyploid species with three similar genomes (97 % identical among homoeologous genes), rapidly accumulating RNA-seq datasets and a large research community. Description We present WheatExp, an expression database and visualization tool to analyze and compare homoeologue-specific transcript profiles across a broad range of tissues from different developmental stages in polyploid wheat. Beginning with publicly-available RNA-seq datasets, we developed a pipeline to distinguish between homoeologous transcripts from annotated genes in tetraploid and hexaploid wheat. Data from multiple studies is processed and compiled into a database which can be queried either by BLAST or by searching for a known gene of interest by name or functional domain. Expression data of multiple genes can be displayed side-by-side across all expression datasets providing immediate access to a comprehensive panel of expression data for specific subsets of wheat genes. Conclusions The development of a publicly accessible expression database hosted on the GrainGenes website - http://wheat.pw.usda.gov/WheatExp/ - coupled with a simple and readily-comparable visualization tool will empower the wheat research community to use RNA-seq data and to perform functional analyses of target genes. The presented expression data is homoeologue-specific allowing for the analysis of relative contributions from each genome to the overall expression of a gene, a critical consideration for breeding applications. Our approach can be expanded to other polyploid species by adjusting sequence mapping parameters according to the specific divergence of their genomes.",WheatExp,0.978051126,NA,0,WheatExp,0.978051126,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/24/2015 +"22009731, 26519407",http://www.wheatgenome.info,"WheatGenome.info: an integrated database and portal for wheat genome information. Bread wheat (Triticum aestivum) is one of the most important crop plants, globally providing staple food for a large proportion of the human population. However, improvement of this crop has been limited due to its large and complex genome. Advances in genomics are supporting wheat crop improvement. We provide a variety of web-based systems hosting wheat genome and genomic data to support wheat research and crop improvement. WheatGenome.info is an integrated database resource which includes multiple web-based applications. These include a GBrowse2-based wheat genome viewer with BLAST search portal, TAGdb for searching wheat second-generation genome sequence data, wheat autoSNPdb, links to wheat genetic maps using CMap and CMap3D, and a wheat genome Wiki to allow interaction between diverse wheat genome sequencing activities. This system includes links to a variety of wheat genome resources hosted at other research organizations. This integrated database aims to accelerate wheat genome research and is freely accessible via the web interface at http://www.wheatgenome.info/.",WheatGenome.info,0.941735923,NA,0,WheatGenome.info,0.941735923,2,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2016 +23175606,http://wholecellkb.stanford.edu,"WholeCellKB: model organism databases for comprehensive whole-cell models. Whole-cell models promise to greatly facilitate the analysis of complex biological behaviors. Whole-cell model development requires comprehensive model organism databases. WholeCellKB (http://wholecellkb.stanford.edu) is an open-source web-based software program for constructing model organism databases. WholeCellKB provides an extensive and fully customizable data model that fully describes individual species including the structure and function of each gene, protein, reaction and pathway. We used WholeCellKB to create WholeCellKB-MG, a comprehensive database of the Gram-positive bacterium Mycoplasma genitalium using over 900 sources. WholeCellKB-MG is extensively cross-referenced to existing resources including BioCyc, KEGG and UniProt. WholeCellKB-MG is freely accessible through a web-based user interface as well as through a RESTful web service.",WholeCellKB,0.995857,NA,0,WholeCellKB,0.995857,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/21/2012 +26573482,http://widde.toulouse.inra.fr,"WIDDE: a Web-Interfaced next generation database for genetic diversity exploration, with a first application in cattle. Background The advent and democratization of next generation sequencing and genotyping technologies lead to a huge amount of data for the characterization of population genetic diversity in model and non model-species. However, efficient storage, management, cross-analyzing and exploration of such dense genotyping datasets remain challenging. This is particularly true for the bovine species where many SNP datasets have been generated in various cattle populations with different genotyping tools. Description We developed WIDDE, a Web-Interfaced Next Generation Database that stands as a generic tool applicable to a wide range of species and marker types ( http://widde.toulouse.inra.fr). As a first illustration, we hereby describe its first version dedicated to cattle biodiversity, which includes a large and evolving cattle genotyping dataset for over 750,000 SNPs available on 129 (89 public) different cattle populations representative of the world-wide bovine genetic diversity and on 7 outgroup bovid species. This version proposes an optional marker and individual filtering step, an export of genotyping data in different popular formats, and an exploration of genetic diversity through a principal component analysis. Users can also explore their own genotyping data together with data from WIDDE, assign their samples to WIDDE populations based on distance assignment method and supervised clustering, and estimate their ancestry composition relative to the populations represented in the database. Conclusion The cattle version of WIDDE represents to our knowledge the first database dedicated to cattle biodiversity and SNP genotyping data that will be very useful for researchers interested in this field. As a generic tool applicable to a wide range of marker types, WIDDE is overall intended to the genetic diversity exploration of any species and will be extended to other species shortly. The structure makes it easy to include additional output formats and new tools dedicated to genetic diversity exploration.",WIDDE,0.997149587,NA,0,WIDDE,0.997149587,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/14/2015 +23209562,http://severus.dbmi.pitt.edu/wiki-pi,"Wiki-pi: a web-server of annotated human protein-protein interactions to aid in discovery of protein function. Protein-protein interactions (PPIs) are the basis of biological functions. Knowledge of the interactions of a protein can help understand its molecular function and its association with different biological processes and pathways. Several publicly available databases provide comprehensive information about individual proteins, such as their sequence, structure, and function. There also exist databases that are built exclusively to provide PPIs by curating them from published literature. The information provided in these web resources is protein-centric, and not PPI-centric. The PPIs are typically provided as lists of interactions of a given gene with links to interacting partners; they do not present a comprehensive view of the nature of both the proteins involved in the interactions. A web database that allows search and retrieval based on biomedical characteristics of PPIs is lacking, and is needed. We present Wiki-Pi (read Wiki-√ɬÉ√Ǭè√ɬÇ√ǬÄ), a web-based interface to a database of human PPIs, which allows users to retrieve interactions by their biomedical attributes such as their association to diseases, pathways, drugs and biological functions. Each retrieved PPI is shown with annotations of both of the participant proteins side-by-side, creating a basis to hypothesize the biological function facilitated by the interaction. Conceptually, it is a search engine for PPIs analogous to PubMed for scientific literature. Its usefulness in generating novel scientific hypotheses is demonstrated through the study of IGSF21, a little-known gene that was recently identified to be associated with diabetic retinopathy. Using Wiki-Pi, we infer that its association to diabetic retinopathy may be mediated through its interactions with the genes HSPB1, KRAS, TMSB4X and DGKD, and that it may be involved in cellular response to external stimuli, cytoskeletal organization and regulation of molecular activity. The website also provides a wiki-like capability allowing users to describe or discuss an interaction. Wiki-Pi is available publicly and freely at http://severus.dbmi.pitt.edu/wiki-pi/.",Wiki-Pi,0.983535866,NA,0,Wiki-Pi,0.983535866,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2012 +22702248,http://www.wikicell.org,"WikiCell: a unified resource platform for human transcriptomics research. Here we present a database, WikiCell, as a portal for a unified view of the human transcriptome. At present, WikiCell consists of Expressed Sequenced Tags (ESTs), and users can access, curate, and submit database data by interactive mode, and also can browse, query, upload, and download sequences. Researchers can utilize the transcriptome model based on a human taxonomy graph. The sequences in each model are sorted by attributes such as physiological and pathological samples. The Genbank EST data format are conserved. Gene information is provided, including housekeeping genes, taxonomy location, and gene ontology (GO) description. We believe that WikiCell provides a useful resource for defining expression pattern and tissue differentiation based on human taxonomy mode. It can be accessed at http://www.wikicell.org/.",WikiCell,0.997400999,NA,0,WikiCell,0.997400999,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/1/2012 +26989148,http://www.wikidata.org,"Wikidata as a semantic framework for the Gene Wiki initiative. . Open biological data are distributed over many resources making them challenging to integrate, to update and to disseminate quickly. Wikidata is a growing, open community database which can serve this purpose and also provides tight integration with Wikipedia. In order to improve the state of biological data, facilitate data management and dissemination, we imported all human and mouse genes, and all human and mouse proteins into Wikidata. In total, 59,721 human genes and 73,355 mouse genes have been imported from NCBI and 27,306 human proteins and 16,728 mouse proteins have been imported from the Swissprot subset of UniProt. As Wikidata is open and can be edited by anybody, our corpus of imported data serves as the starting point for integration of further data by scientists, the Wikidata community and citizen scientists alike. The first use case for these data is to populate Wikipedia Gene Wiki infoboxes directly from Wikidata with the data integrated above. This enables immediate updates of the Gene Wiki infoboxes as soon as the data in Wikidata are modified. Although Gene Wiki pages are currently only on the English language version of Wikipedia, the multilingual nature of Wikidata allows for usage of the data we imported in all 280 different language Wikipedias. Apart from the Gene Wiki infobox use case, a SPARQL endpoint and exporting functionality to several standard formats (e.g. JSON, XML) enable use of the data by scientists. In summary, we created a fully open and extensible data resource for human and mouse molecular biology and biochemistry data. This resource enriches all the Wikipedias with structured information and serves as a new linking hub for the biological semantic web. Database URL: https://www.wikidata.org/.",Wikidata,0.992722154,NA,0,Wikidata,0.992722154,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/17/2016 +33211851,http://www.wikipathways.org,"WikiPathways: connecting communities. WikiPathways (https://www.wikipathways.org) is a biological pathway database known for its collaborative nature and open science approaches. With the core idea of the scientific community developing and curating biological knowledge in pathway models, WikiPathways lowers all barriers for accessing and using its content. Increasingly more content creators, initiatives, projects and tools have started using WikiPathways. Central in this growth and increased use of WikiPathways are the various communities that focus on particular subsets of molecular pathways such as for rare diseases and lipid metabolism. Knowledge from published pathway figures helps prioritize pathway development, using optical character and named entity recognition. We show the growth of WikiPathways over the last three years, highlight the new communities and collaborations of pathway authors and curators, and describe various technologies to connect to external resources and initiatives. The road toward a sustainable, community-driven pathway database goes through integration with other resources such as Wikidata and allowing more use, curation and redistribution of WikiPathways content.",WikiPathways,0.994370461,NA,0,WikiPathways,0.994370461,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/1/2021 +22075991,http://en.wikipedia.org/wiki/Portal:Gene_Wiki,"The Gene Wiki in 2011: community intelligence applied to human gene annotation. The Gene Wiki is an open-access and openly editable collection of Wikipedia articles about human genes. Initiated in 2008, it has grown to include articles about more than 10,000 genes that, collectively, contain more than 1.4 million words of gene-centric text with extensive citations back to the primary scientific literature. This growing body of useful, gene-centric content is the result of the work of thousands of individuals throughout the scientific community. Here, we describe recent improvements to the automated system that keeps the structured data presented on Gene Wiki articles in sync with the data from trusted primary databases. We also describe the expanding contents, editors and users of the Gene Wiki. Finally, we introduce a new automated system, called WikiTrust, which can effectively compute the quality of Wikipedia articles, including Gene Wiki articles, at the word level. All articles in the Gene Wiki can be freely accessed and edited at Wikipedia, and additional links and information can be found at the project's Wikipedia portal page: http://en.wikipedia.org/wiki/Portal:Gene_Wiki.",WikiTrust,0.861291409,NA,0,WikiTrust,0.861291409,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: INCORRECT NAME,NA,NA,11/10/2011 +30208844,http://webofmicrobes.org,"Web of microbes (WoM): a curated microbial exometabolomics database for linking chemistry and microbes. Background As microbiome research becomes increasingly prevalent in the fields of human health, agriculture and biotechnology, there exists a need for a resource to better link organisms and environmental chemistries. Exometabolomics experiments now provide assertions of the metabolites present within specific environments and how the production and depletion of metabolites is linked to specific microbes. This information could be broadly useful, from comparing metabolites across environments, to predicting competition and exchange of metabolites between microbes, and to designing stable microbial consortia. Here, we introduce Web of Microbes (WoM; freely available at: http://webofmicrobes.org ), the first exometabolomics data repository and visualization tool. Description WoM provides manually curated, direct biochemical observations on the changes to metabolites in an environment after exposure to microorganisms. The web interface displays a number of key features: (1) the metabolites present in a control environment prior to inoculation or microbial activation, (2) heatmap-like displays showing metabolite increases or decreases resulting from microbial activities, (3) a metabolic web displaying the actions of multiple organisms on a specified metabolite pool, (4) metabolite interaction scores indicating an organism's interaction level with its environment, potential for metabolite exchange with other organisms and potential for competition with other organisms, and (5) downloadable datasets for integration with other types of -omics datasets. Conclusion We anticipate that Web of Microbes will be a useful tool for the greater research community by making available manually curated exometabolomics results that can be used to improve genome annotations and aid in the interpretation and construction of microbial communities.",WoM,0.97369504,NA,0,WoM,0.97369504,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/12/2018 +32431267,http://www.wwarn.org/tools-resources/literature-reviews/wwarn-clinical-trials-publication-library,"The WorldWide Antimalarial Resistance Network Clinical Trials Publication Library: A Live, Open-Access Database of Plasmodium Treatment Efficacy Trials. Parasite resistance to antimalarial drugs poses a serious threat to malaria control. The WorldWide Antimalarial Resistance Network (WWARN) aims to provide a collaborative platform to support the global malaria research effort. Here, we describe the ""WWARN clinical trials publication library,"" an open-access, up-to-date resource to streamline the synthesis of antimalarial safety and efficacy data. A series of iteratively refined database searches were conducted to identify prospective clinical trials assessing antimalarial drug efficacy with at least 28 days of follow-up. Of approximately 45,000 articles screened, 1,221 trials published between 1946 and 2018 were identified, representing 2,339 treatment arms and 323,819 patients. In trials from endemic locations, 75.7% (787/1,040) recruited patients with Plasmodium falciparum, 17.0% (177/1,040) Plasmodium vivax, 6.9% (72/1,040) both, and 0.4% (4/1,040) other Plasmodium species; 57.2% (585/1,022) of trials included under-fives and 5.3% (55/1,036) included pregnant women. In Africa, there has been a marked increase in both P. falciparum and P. vivax studies over the last two decades. The WHO-recommended artemisinin-based combination therapies alone or with a gametocidal drug were assessed in 39.5% (705/1,783) of P. falciparum treatment arms and 10.5% (45/429) of P. vivax arms, increasing to 78.0% (266/341) and 22.9% (27/118), respectively, in the last five years. The library is a comprehensive, open-access tool that can be used by the malaria community to explore the collective knowledge on antimalarial efficacy (available at https://www.wwarn.org/tools-resources/literature-reviews/wwarn-clinical-trials-publication-library). It is the first of its kind in the field of global infectious diseases, and lessons learnt in its creation can be adapted to other infectious diseases.",WWARN,0.904815594,WorldWide Antimalarial Resistance Network,0.951693782,WorldWide Antimalarial Resistance Network,0.951693782,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,5/7/2020 +23172286,http://so.qbic.riken.jp/wddd,"WDDD: Worm Developmental Dynamics Database. During animal development, cells undergo dynamic changes in position and gene expression. A collection of quantitative information about morphological dynamics under a wide variety of gene perturbations would provide a rich resource for understanding the molecular mechanisms of development. Here, we created a database, the Worm Developmental Dynamics Database (http://so.qbic.riken.jp/wddd/), which stores a collection of quantitative information about cell division dynamics in early Caenorhabditis elegans embryos with single genes silenced by RNA-mediated interference. The information contains the three-dimensional coordinate values of the outlines of nuclear regions and the dynamics of the outlines over time. The database provides free access to 50 sets of quantitative data for wild-type embryos and 136 sets of quantitative data for RNA-mediated interference embryos corresponding to 72 of the 97 essential embryonic genes on chromosome III. The database also provides sets of four-dimensional differential interference contrast microscopy images on which the quantitative data were based. The database will provide a novel opportunity for the development of computational methods to obtain fresh insights into the mechanisms of development. The quantitative information and microscopy images can be synchronously viewed through a web browser, which is designed for easy access by experimental biologists.",WDDD,0.479633838,Worm Developmental Dynamics Database,0.896067631,Worm Developmental Dynamics Database,0.896067631,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/20/2012 +31642470,http://wormbase.org,"WormBase: a modern Model Organism Information Resource. WormBase (https://wormbase.org/) is a mature Model Organism Information Resource supporting researchers using the nematode Caenorhabditis elegans as a model system for studies across a broad range of basic biological processes. Toward this mission, WormBase efforts are arranged in three primary facets: curation, user interface and architecture. In this update, we describe progress in each of these three areas. In particular, we discuss the status of literature curation and recently added data, detail new features of the web interface and options for users wishing to conduct data mining workflows, and discuss our efforts to build a robust and scalable architecture by leveraging commercial cloud offerings. We conclude with a description of WormBase's role as a founding member of the nascent Alliance of Genome Resources.",WormBase,0.997976899,NA,0,WormBase,0.997976899,1,NA,"22067452.0, 24194605.0, 29069413.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2020 +"22067452, 24194605, 29069413",http://www.wormbase.org,"WormBase 2012: more genomes, more data, new website. Since its release in 2000, WormBase (http://www.wormbase.org) has grown from a small resource focusing on a single species and serving a dedicated research community, to one now spanning 15 species essential to the broader biomedical and agricultural research fields. To enhance the rate of curation, we have automated the identification of key data in the scientific literature and use similar methodology for data extraction. To ease access to the data, we are collaborating with journals to link entities in research publications to their report pages at WormBase. To facilitate discovery, we have added new views of the data, integrated large-scale datasets and expanded descriptions of models for human disease. Finally, we have introduced a dramatic overhaul of the WormBase website for public beta testing. Designed to balance complexity and usability, the new site is species-agnostic, highly customizable, and interactive. Casual users and developers alike will be able to leverage the public RESTful application programming interface (API) to generate custom data mining solutions and extensions to the site. We report on the growth of our database and on our work in keeping pace with the growing demand for data, efforts to anticipate the requirements of users and new collaborations with the larger science community.",WormBase,0.99719429,NA,0,WormBase,0.99719429,3,NA,31642470,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2018 +27899279,http://parasite.wormbase.org,"WormBase ParaSite - a comprehensive resource for helminth genomics. The number of publicly available parasitic worm genome sequences has increased dramatically in the past three years, and research interest in helminth functional genomics is now quickly gathering pace in response to the foundation that has been laid by these collective efforts. A systematic approach to the organisation, curation, analysis and presentation of these data is clearly vital for maximising the utility of these data to researchers. We have developed a portal called WormBase ParaSite (http://parasite.wormbase.org) for interrogating helminth genomes on a large scale. Data from over 100 nematode and platyhelminth species are integrated, adding value by way of systematic and consistent functional annotation (e.g. protein domains and Gene Ontology terms), gene expression analysis (e.g. alignment of life-stage specific transcriptome data sets), and comparative analysis (e.g. orthologues and paralogues). We provide several ways of exploring the data, including genome browsers, genome and gene summary pages, text search, sequence search, a query wizard, bulk downloads, and programmatic interfaces. In this review, we provide an overview of the back-end infrastructure and analysis behind WormBase ParaSite, and the displays and tools available to users for interrogating helminth genomic data.",WormBase ParaSite,0.946643819,NA,0,WormBase ParaSite,0.946643819,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/27/2016 +23180786,"http://www.wormqtl.org, http://www.rqtl.org","WormQTL--public archive and analysis web portal for natural variation data in Caenorhabditis spp. Here, we present WormQTL (http://www.wormqtl.org), an easily accessible database enabling search, comparative analysis and meta-analysis of all data on variation in Caenorhabditis spp. Over the past decade, Caenorhabditis elegans has become instrumental for molecular quantitative genetics and the systems biology of natural variation. These efforts have resulted in a valuable amount of phenotypic, high-throughput molecular and genotypic data across different developmental worm stages and environments in hundreds of C. elegans strains. WormQTL provides a workbench of analysis tools for genotype-phenotype linkage and association mapping based on but not limited to R/qtl (http://www.rqtl.org). All data can be uploaded and downloaded using simple delimited text or Excel formats and are accessible via a public web user interface for biologists and R statistic and web service interfaces for bioinformaticians, based on open source MOLGENIS and xQTL workbench software. WormQTL welcomes data submissions from other worm researchers.",WormQTL,0.998097301,NA,0,WormQTL,0.998097301,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/24/2012 +24217915,http://www.wormqtl-hd.org,"WormQTLHD--a web database for linking human disease to natural variation data in C. elegans. Interactions between proteins are highly conserved across species. As a result, the molecular basis of multiple diseases affecting humans can be studied in model organisms that offer many alternative experimental opportunities. One such organism-Caenorhabditis elegans-has been used to produce much molecular quantitative genetics and systems biology data over the past decade. We present WormQTL(HD) (Human Disease), a database that quantitatively and systematically links expression Quantitative Trait Loci (eQTL) findings in C. elegans to gene-disease associations in man. WormQTL(HD), available online at http://www.wormqtl-hd.org, is a user-friendly set of tools to reveal functionally coherent, evolutionary conserved gene networks. These can be used to predict novel gene-to-gene associations and the functions of genes underlying the disease of interest. We created a new database that links C. elegans eQTL data sets to human diseases (34 337 gene-disease associations from OMIM, DGA, GWAS Central and NHGRI GWAS Catalogue) based on overlapping sets of orthologous genes associated to phenotypes in these two species. We utilized QTL results, high-throughput molecular phenotypes, classical phenotypes and genotype data covering different developmental stages and environments from WormQTL database. All software is available as open source, built on MOLGENIS and xQTL workbench.",WormQTL(HD,0.97672087,NA,0,WormQTL(HD,0.97672087,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,11/11/2013 +29982559,http://nksingh.nationalprof.in,"A database of wild rice germplasm of Oryza rufipogon species complex from different agro-climatic zones of India. . Rice is a staple food for the people of Asia that supplies more than 50% of the food energy globally. It is widely accepted that the crop domestication process has left behind substantial useful genetic diversity in their wild progenitor species that has huge potential for developing crop varieties with enhanced resistance to an array of biotic and abiotic stresses. In this context, Oryza rufipogon, Oryza nivara and their intermediate types wild rice germplasm/s collected from diverse agro-climatic regions would provide a rich repository of genes and alleles that could be utilized for rice improvement using genomics-assisted breeding. Here we present a database of detailed information on 614 such diverse wild rice accessions collected from different agro-climatic zones of India, including 46 different morphological descriptors, complete passport data and DNA fingerprints. The information has been stored in a web-based database entitled 'Indian Wild Rice (IWR) Database'. The information provided in the IWR Database will be useful for the rice geneticists and breeders for improvement of rice cultivars for yield, quality and resilience to climate change.Database URL: http://nksingh.nationalprof.in: 8080/iwrdb/index.jsp.",WR,0.70587492,NA,0,WR,0.70587492,1,NA,NA,low_prob_best_name,remove,NA,NA,FALSE POS: PARTIAL NAME,NA,NA,1/1/2018 +29533231,http://validate.wwpdb.org,"Worldwide Protein Data Bank validation information: usage and trends. Realising the importance of assessing the quality of the biomolecular structures deposited in the Protein Data Bank (PDB), the Worldwide Protein Data Bank (wwPDB) partners established Validation Task Forces to obtain advice on the methods and standards to be used to validate structures determined by X-ray crystallography, nuclear magnetic resonance spectroscopy and three-dimensional electron cryo-microscopy. The resulting wwPDB validation pipeline is an integral part of the wwPDB OneDep deposition, biocuration and validation system. The wwPDB Validation Service webserver (https://validate.wwpdb.org) can be used to perform checks prior to deposition. Here, it is shown how validation metrics can be combined to produce an overall score that allows the ranking of macromolecular structures and domains in search results. The ValTrendsDB database provides users with a convenient way to access and analyse validation information and other properties of X-ray crystal structures in the PDB, including investigating trends in and correlations between different structure properties and validation metrics.",wwPDB,0.983193755,Worldwide,0.737224817,wwPDB,0.983193755,1,NA,27450113,NA,NA,NA,do not merge,NA,NA,NA,3/2/2018 +27450113,http://wwpdb.org,"The archiving and dissemination of biological structure data. The global Protein Data Bank (PDB) was the first open-access digital archive in biology. The history and evolution of the PDB are described, together with the ways in which molecular structural biology data and information are collected, curated, validated, archived, and disseminated by the members of the Worldwide Protein Data Bank organization (wwPDB; http://wwpdb.org). Particular emphasis is placed on the role of community in establishing the standards and policies by which the PDB archive is managed day-to-day.",wwPDB,0.968952253,Worldwide Protein Data Bank organization,0.829005563,wwPDB,0.968952253,1,"25540181.0, 28296894.0",29533231,low_prob_best_name,do not remove,merge on record with best name prob,do not merge,NA,NA,NA,7/21/2016 +24013926,http://bioinformatics.snu.ac.kr/xdb,"The Xeno-glycomics database (XDB): a relational database of qualitative and quantitative pig glycome repertoire. Summary In recent years, the improvement of mass spectrometry-based glycomics techniques (i.e. highly sensitive, quantitative and high-throughput analytical tools) has enabled us to obtain a large dataset of glycans. Here we present a database named Xeno-glycomics database (XDB) that contains cell- or tissue-specific pig glycomes analyzed with mass spectrometry-based techniques, including a comprehensive pig glycan information on chemical structures, mass values, types and relative quantities. It was designed as a user-friendly web-based interface that allows users to query the database according to pig tissue/cell types or glycan masses. This database will contribute in providing qualitative and quantitative information on glycomes characterized from various pig cells/organs in xenotransplantation and might eventually provide new targets in the √ɬÉ√Ǭé√ɬÇ√Ǭ±1,3-galactosyltransferase gene-knock out pigs era. Availability The database can be accessed on the web at http://bioinformatics.snu.ac.kr/xdb.",XDB,0.954009195,Xeno-glycomics database,0.933108858,XDB,0.954009195,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,9/6/2013 +"23125366, 25313157",http://www.xenbase.org,"Xenbase: expansion and updates of the Xenopus model organism database. Xenbase (http://www.xenbase.org) is a model organism database that provides genomic, molecular, cellular and developmental biology content to biomedical researchers working with the frog, Xenopus and Xenopus data to workers using other model organisms. As an amphibian Xenopus serves as a useful evolutionary bridge between invertebrates and more complex vertebrates such as birds and mammals. Xenbase content is collated from a variety of external sources using automated and semi-automated pipelines then processed via a combination of automated and manual annotation. A link-matching system allows for the wide variety of synonyms used to describe biological data on unique features, such as a gene or an anatomical entity, to be used by the database in an equivalent manner. Recent updates to the database include the Xenopus laevis genome, a new Xenopus tropicalis genome build, epigenomic data, collections of RNA and protein sequences associated with genes, more powerful gene expression searches, a community and curated wiki, an extensive set of manually annotated gene expression patterns and a new database module that contains data on over 700 antibodies that are useful for exploring Xenopus cell and developmental biology.",Xenbase,0.995507419,Xenopus model organism,0.689142883,Xenbase,0.995507419,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/13/2014 +27899583,http://www.xtalkdb.org,"XTalkDB: a database of signaling pathway crosstalk. Analysis of signaling pathways and their crosstalk is a cornerstone of systems biology. Thousands of papers have been published on these topics. Surprisingly, there is no database that carefully and explicitly documents crosstalk between specific pairs of signaling pathways. We have developed XTalkDB (http://www.xtalkdb.org) to fill this very important gap. XTalkDB contains curated information for 650 pairs of pathways from over 1600 publications. In addition, the database reports the molecular components (e.g. proteins, hormones, microRNAs) that mediate crosstalk between a pair of pathways and the species and tissue in which the crosstalk was observed. The XTalkDB website provides an easy-to-use interface for scientists to browse crosstalk information by querying one or more pathways or molecules of interest.",XTalkDB,0.99725914,NA,0,XTalkDB,0.99725914,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +22481888,http://www.xylella.lncc.br,"Xylella fastidiosa comparative genomic database is an information resource to explore the annotation, genomic features, and biology of different strains. The Xylella fastidiosa comparative genomic database is a scientific resource with the aim to provide a user-friendly interface for accessing high-quality manually curated genomic annotation and comparative sequence analysis, as well as for identifying and mapping prophage-like elements, a marked feature of Xylella genomes. Here we describe a database and tools for exploring the biology of this important plant pathogen. The hallmarks of this database are the high quality genomic annotation, the functional and comparative genomic analysis and the identification and mapping of prophage-like elements. It is available from web site http://www.xylella.lncc.br.",Xylella,0.679942429,NA,0,Xylella,0.679942429,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/1/2012 +22325123,http://www.yadamp.unisa.it,"YADAMP: yet another database of antimicrobial peptides. This work presents an antimicrobial peptide database (YADAMP) based on an extensive literature search. This database is focused primarily on bacteria, with detailed information for 2133 peptides active against bacteria. YADAMP was created to facilitate access to critical information on antimicrobial peptides (AMPs). The main difference between YADAMP and other web databases of AMPs is the explicit presence of antimicrobial activity against the most common bacterial strains. YADAMP allows complex queries, easily accessible through a web interface. Peptide information can be retrieved based on peptide name, number of amino acids, net charge, hydrophobic percentage, sequence motif, structure and activity against bacteria. YADAMP is suitable for reviewing information on AMPs and for structure-function analyses of peptides. The database can be accessed via a web-based browser at http://www.yadamp.unisa.it.",YADAMP,0.996951342,antimicrobial peptide database,0.960818076,YADAMP,0.996951342,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/9/2012 +24082050,http://www.ycelldeath.com/yapoptosis,"yApoptosis: yeast apoptosis database. In the past few years, programmed cell death (PCD) has become a popular research area due to its fundamental aspects and its links to human diseases. Yeast has been used as a model for studying PCD, since the discovery of morphological markers of apoptotic cell death in yeast in 1997. Increasing knowledge in identification of components and molecular pathways created a need for organization of information. To meet the demands from the research community, we have developed a curated yeast apoptosis database, yApoptosis. The database structurally collects an extensively curated set of apoptosis, PCD and related genes, their genomic information, supporting literature and relevant external links. A web interface including necessary functions is provided to access and download the data. In addition, we included several networks where the apoptosis genes or proteins are involved, and present them graphically and interactively to facilitate rapid visualization. We also promote continuous inputs and curation by experts. yApoptosis is a highly specific resource for sharing information online, which supports researches and studies in the field of yeast apoptosis and cell death. DATABASE URL: http://www.ycelldeath.com/yapoptosis/.",yApoptosis,0.986070871,NA,0,yApoptosis,0.986070871,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/29/2013 +30048518,http://cosbi4.ee.ncku.edu.tw/YARG,"YARG: A repository for arsenic-related genes in yeast. Arsenic is a toxic metalloid. Moderate levels of arsenic exposure from drinking water can cause various human health problems such as skin lesions, circulatory disorders and cancers. Thus, arsenic toxicity is a key focus area for environmental and toxicological investigations. Many arsenic-related genes in yeast have been identified by experimental strategies such as phenotypic screening and transcriptional profiling. These identified arsenic-related genes are valuable information for studying arsenic toxicity. However, the literature about these identified arsenic-related genes is widely dispersed and cannot be easily acquired by researchers. This prompts us to develop YARG (Yeast Arsenic-Related Genes) database, which comprehensively collects 3396 arsenic-related genes in the literature. For each arsenic-related gene, the number and types of experimental evidence (phenotypic screening and/or transcriptional profiling) are provided. Users can use both search and browse modes to query arsenic-related genes in YARG. We used two case studies to show that YARG can return biologically meaningful arsenic-related information for the query gene(s). We believe that YARG is a useful resource for arsenic toxicity research. YARG is available at http://cosbi4.ee.ncku.edu.tw/YARG/.",YARG,0.993278623,Yeast Arsenic-Related Genes,0.992494863,YARG,0.993278623,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/26/2018 +30546860,http://cadd.pharmacy.nankai.edu.cn/yatcm/home,"YaTCM: Yet another Traditional Chinese Medicine Database for Drug Discovery. Traditional Chinese Medicine (TCM) has a long history of widespread clinical applications, especially in East Asia, and is becoming frequently used in Western countries. However, owing to extreme complicacy in both chemical ingredients and mechanism of action, a deep understanding of TCM is still difficult. To accelerate the modernization and popularization of TCM, a single comprehensive database is required, containing a wealth of TCM-related information and equipped with complete analytical tools. Here we present YaTCM (Yet another Traditional Chinese Medicine database), a free web-based toolkit, which provides comprehensive TCM information and is furnished with analysis tools. YaTCM allows a user to (1) identify the potential ingredients that are crucial to TCM herbs through similarity search and substructure search, (2) investigate the mechanism of action for TCM or prescription through pathway analysis and network pharmacology analysis, (3) predict potential targets for TCM molecules by multi-voting chemical similarity ensemble approach, and (4) explore functionally similar herb pairs. All these functions can lead to one systematic network for visualization of TCM recipes, herbs, ingredients, definite or putative protein targets, pathways, and diseases. This web service would help in uncovering the mechanism of action of TCM, revealing the essence of TCM theory and then promoting the drug discovery process. YaTCM is freely available at http://cadd.pharmacy.nankai.edu.cn/yatcm/home.",YaTCM,0.994157434,NA,0,YaTCM,0.994157434,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/23/2018 +27392072,"http://cosbi.ee.ncku.edu.tw/YCRD/, http://cosbi2.ee.ncku.edu.tw/YCRD","YCRD: Yeast Combinatorial Regulation Database. In eukaryotes, the precise transcriptional control of gene expression is typically achieved through combinatorial regulation using cooperative transcription factors (TFs). Therefore, a database which provides regulatory associations between cooperative TFs and their target genes is helpful for biologists to study the molecular mechanisms of transcriptional regulation of gene expression. Because there is no such kind of databases in the public domain, this prompts us to construct a database, called Yeast Combinatorial Regulation Database (YCRD), which deposits 434,197 regulatory associations between 2535 cooperative TF pairs and 6243 genes. The comprehensive collection of more than 2500 cooperative TF pairs was retrieved from 17 existing algorithms in the literature. The target genes of a cooperative TF pair (e.g. TF1-TF2) are defined as the common target genes of TF1 and TF2, where a TF's experimentally validated target genes were downloaded from YEASTRACT database. In YCRD, users can (i) search the target genes of a cooperative TF pair of interest, (ii) search the cooperative TF pairs which regulate a gene of interest and (iii) identify important cooperative TF pairs which regulate a given set of genes. We believe that YCRD will be a valuable resource for yeast biologists to study combinatorial regulation of gene expression. YCRD is available at http://cosbi.ee.ncku.edu.tw/YCRD/ or http://cosbi2.ee.ncku.edu.tw/YCRD/.",YCRD,0.992400229,Yeast Combinatorial Regulation Database,0.966890701,YCRD,0.992400229,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,7/8/2016 +26061870,http://www.semanticgen.net/ydhs,"Human Chromosome Y and Haplogroups; introducing YDHS Database. Background As the high throughput sequencing efforts generate more biological information, scientists from different disciplines are interpreting the polymorphisms that make us unique. In addition, there is an increasing trend in general public to research their own genealogy, find distant relatives and to know more about their biological background. Commercial vendors are providing analyses of mitochondrial and Y-chromosomal markers for such purposes. Clearly, an easy-to-use free interface to the existing data on the identified variants would be in the interest of general public and professionals less familiar with the field. Here we introduce a novel metadatabase YDHS that aims to provide such an interface for Y-chromosomal DNA (Y-DNA) haplogroups and sequence variants. Methods The database uses ISOGG Y-DNA tree as the source of mutations and haplogroups and by using genomic positions of the mutations the database links them to genes and other biological entities. YDHS contains analysis tools for deeper Y-SNP analysis. Results YDHS addresses the shortage of Y-DNA related databases. We have tested our database using a set of different cases from literature ranging from infertility to autism. The database is at http://www.semanticgen.net/ydhs Conclusions Y-chromosomal DNA (Y-DNA) haplogroups and sequence variants have not been in the scientific limelight, excluding certain specialized fields like forensics, mainly because there is not much freely available information or it is scattered in different sources. However, as we have demonstrated Y-SNPs do play a role in various cases on the haplogroup level and it is possible to create a free Y-DNA dedicated bioinformatics resource.",YDHS,0.994329572,NA,0,YDHS,0.994329572,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/10/2015 +34285772,http://cosbi7.ee.ncku.edu.tw/YPIBP,"YPIBP: A repository for phosphoinositide-binding proteins in yeast. Phosphoinositides (PIs) are a family of eight lipids consisting of phosphatidylinositol (PtdIns) and its seven phosphorylated forms. PIs have important regulatory functions in the cell including lipid signaling, protein transport, and membrane trafficking. Yeast has been recognized as a eukaryotic model system to study lipid-protein interactions. Hundreds of yeast PI-binding proteins have been identified, but this research knowledge remains scattered. Besides, the complete PI-binding spectrum and potential PI-binding domains have not been interlinked. No comprehensive databases are available to support the lipid-protein interaction research on phosphoinositides. Here we constructed the first knowledgebase of Yeast Phosphoinositide-Binding Proteins (YPIBP), a repository consisting of 679 PI-binding proteins collected from high-throughput proteome-array and lipid-array studies, QuickGO, and a rigorous literature mining. The YPIBP also contains protein domain information in categories of lipid-binding domains, lipid-related domains and other domains. The YPIBP provides search and browse modes along with two enrichment analyses (PI-binding enrichment analysis and domain enrichment analysis). An interactive visualization is given to summarize the PI-domain-protein interactome. Finally, three case studies were given to demonstrate the utility of YPIBP. The YPIBP knowledgebase consolidates the present knowledge and provides new insights of the PI-binding proteins by bringing comprehensive and in-depth interaction network of the PI-binding proteins. YPIBP is available at http://cosbi7.ee.ncku.edu.tw/YPIBP/.",YPIBP,0.980853081,Yeast Phosphoinositide-Binding Proteins,0.98663541,Yeast Phosphoinositide-Binding Proteins,0.98663541,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,6/24/2021 +23110448,http://genome.jouy.inra.fr/yeastip,"YeastIP: a database for identification and phylogeny of Saccharomycotina yeasts. With the advances in sequencing techniques, identification of ascomycetous yeasts to the species level and phylogeny reconstruction increasingly require curated and updated taxonomic information. A specific database with nucleotide sequences of the most common markers used for yeast taxonomy and phylogeny and a user-friendly interface allowing identification, taxonomy and phylogeny of yeasts species was developed. By 1 September 2012, the YeastIP database contained all the described Saccharomycotina species for which sequences used for taxonomy and phylogeny, such as D1/D2 rDNA and ITS, are available. The database interface was developed to provide a maximum of relevant information and data mining tools, including the following features: (1) the blast n program for the sequences of the YeastIP database; (2) easy retrieval of selected sequences; (3) display of the available markers for each selected group of species; and (4) a tool to concatenate marker sequences, including those provided by the user. The concatenation tool allows phylogeny reconstruction through a direct link to the Phylogeny.fr platform. YeastIP is thus a unique database in that it provides taxonomic information and guides users in their taxonomic analyses. YeastIP facilitates multigenic analysis to encourage good practice in ascomycetous yeast phylogeny (URL: http://genome.jouy.inra.fr/yeastip.).",YeastIP,0.997585833,NA,0,YeastIP,0.997585833,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/17/2012 +24165882,http://www.inetbio.org/yeastnet,"YeastNet v3: a public database of data-specific and integrated functional gene networks for Saccharomyces cerevisiae. Saccharomyces cerevisiae, i.e. baker's yeast, is a widely studied model organism in eukaryote genetics because of its simple protocols for genetic manipulation and phenotype profiling. The high abundance of publicly available data that has been generated through diverse 'omics' approaches has led to the use of yeast for many systems biology studies, including large-scale gene network modeling to better understand the molecular basis of the cellular phenotype. We have previously developed a genome-scale gene network for yeast, YeastNet v2, which has been used for various genetics and systems biology studies. Here, we present an updated version, YeastNet v3 (available at http://www.inetbio.org/yeastnet/), that significantly improves the prediction of gene-phenotype associations. The extended genome in YeastNet v3 covers up to 5818 genes (√ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº99% of the coding genome) wired by 362 512 functional links. YeastNet v3 provides a new web interface to run the tools for network-guided hypothesis generations. YeastNet v3 also provides edge information for all data-specific networks (√ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº2 million functional links) as well as the integrated networks. Therefore, users can construct alternative versions of the integrated network by applying their own data integration algorithm to the same data-specific links.",YeastNet,0.946244299,NA,0,YeastNet,0.946244299,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,10/27/2013 +24170807,http://www.yeastract.com,"The YEASTRACT database: an upgraded information system for the analysis of gene and genomic transcription regulation in Saccharomyces cerevisiae. The YEASTRACT (http://www.yeastract.com) information system is a tool for the analysis and prediction of transcription regulatory associations in Saccharomyces cerevisiae. Last updated in June 2013, this database contains over 200,000 regulatory associations between transcription factors (TFs) and target genes, including 326 DNA binding sites for 113 TFs. All regulatory associations stored in YEASTRACT were revisited and new information was added on the experimental conditions in which those associations take place and on whether the TF is acting on its target genes as activator or repressor. Based on this information, new queries were developed allowing the selection of specific environmental conditions, experimental evidence or positive/negative regulatory effect. This release further offers tools to rank the TFs controlling a gene or genome-wide response by their relative importance, based on (i) the percentage of target genes in the data set; (ii) the enrichment of the TF regulon in the data set when compared with the genome; or (iii) the score computed using the TFRank system, which selects and prioritizes the relevant TFs by walking through the yeast regulatory network. We expect that with the new data and services made available, the system will continue to be instrumental for yeast biologists and systems biology researchers.",YEASTRACT,0.997687936,NA,0,YEASTRACT,0.997687936,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,10/28/2013 +25591325,http://yersinia.um.edu.my,"YersiniaBase: a genomic resource and analysis platform for comparative analysis of Yersinia. Background Yersinia is a Gram-negative bacteria that includes serious pathogens such as the Yersinia pestis, which causes plague, Yersinia pseudotuberculosis, Yersinia enterocolitica. The remaining species are generally considered non-pathogenic to humans, although there is evidence that at least some of these species can cause occasional infections using distinct mechanisms from the more pathogenic species. With the advances in sequencing technologies, many genomes of Yersinia have been sequenced. However, there is currently no specialized platform to hold the rapidly-growing Yersinia genomic data and to provide analysis tools particularly for comparative analyses, which are required to provide improved insights into their biology, evolution and pathogenicity. Description To facilitate the ongoing and future research of Yersinia, especially those generally considered non-pathogenic species, a well-defined repository and analysis platform is needed to hold the Yersinia genomic data and analysis tools for the Yersinia research community. Hence, we have developed the YersiniaBase, a robust and user-friendly Yersinia resource and analysis platform for the analysis of Yersinia genomic data. YersiniaBase has a total of twelve species and 232 genome sequences, of which the majority are Yersinia pestis. In order to smooth the process of searching genomic data in a large database, we implemented an Asynchronous JavaScript and XML (AJAX)-based real-time searching system in YersiniaBase. Besides incorporating existing tools, which include JavaScript-based genome browser (JBrowse) and Basic Local Alignment Search Tool (BLAST), YersiniaBase also has in-house developed tools: (1) Pairwise Genome Comparison tool (PGC) for comparing two user-selected genomes; (2) Pathogenomics Profiling Tool (PathoProT) for comparative pathogenomics analysis of Yersinia genomes; (3) YersiniaTree for constructing phylogenetic tree of Yersinia. We ran analyses based on the tools and genomic data in YersiniaBase and the preliminary results showed differences in virulence genes found in Yersinia pestis and Yersinia pseudotuberculosis compared to other Yersinia species, and differences between Yersinia enterocolitica subsp. enterocolitica and Yersinia enterocolitica subsp. palearctica. Conclusions YersiniaBase offers free access to wide range of genomic data and analysis tools for the analysis of Yersinia. YersiniaBase can be accessed at http://yersinia.um.edu.my .",YersiniaBase,0.989195168,NA,0,YersiniaBase,0.989195168,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1/16/2015 +22102575,http://yetfasco.ccbr.utoronto.ca,"YeTFaSCo: a database of evaluated yeast transcription factor sequence specificities. The yeast Saccharomyces cerevisiae is a prevalent system for the analysis of transcriptional networks. As a result, multiple DNA-binding sequence specificities (motifs) have been derived for most yeast transcription factors (TFs). However, motifs from different studies are often inconsistent with each other, making subsequent analyses complicated and confusing. Here, we have created YeTFaSCo (The Yeast Transcription Factor Specificity Compendium, http://yetfasco.ccbr.utoronto.ca/), an extensive collection of S. cerevisiae TF specificities. YeTFaSCo differs from related databases by being more comprehensive (including 1709 motifs for 256 proteins or protein complexes), and by evaluating the motifs using multiple objective quality metrics. The metrics include correlation between motif matches and ChIP-chip data, gene expression patterns, and GO terms, as well as motif agreement between different studies. YeTFaSCo also features an index of 'expert-curated' motifs, each associated with a confidence assessment. In addition, the database website features tools for motif analysis, including a sequence scanning function and precomputed genome-browser tracks of motif occurrences across the entire yeast genome. Users can also search the database for motifs that are similar to a query motif.",YeTFaSCo,0.996380448,The Yeast Transcription Factor Specificity Compendium,0.867617818,YeTFaSCo,0.996380448,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/18/2011 +23134687,http://me.lzu.edu.cn/yak,"The Yak genome database: an integrative database for studying yak biology and high-altitude adaption. Background The yak (Bos grunniens) is a long-haired bovine that lives at high altitudes and is an important source of milk, meat, fiber and fuel. The recent sequencing, assembly and annotation of its genome are expected to further our understanding of the means by which it has adapted to life at high altitudes and its ecologically important traits. Description The Yak Genome Database (YGD) is an internet-based resource that provides access to genomic sequence data and predicted functional information concerning the genes and proteins of Bos grunniens. The curated data stored in the YGD includes genome sequences, predicted genes and associated annotations, non-coding RNA sequences, transposable elements, single nucleotide variants, and three-way whole-genome alignments between human, cattle and yak. YGD offers useful searching and data mining tools, including the ability to search for genes by name or using function keywords as well as GBrowse genome browsers and/or BLAST servers, which can be used to visualize genome regions and identify similar sequences. Sequence data from the YGD can also be downloaded to perform local searches. Conclusions A new yak genome database (YGD) has been developed to facilitate studies on high-altitude adaption and bovine genomics. The database will be continuously updated to incorporate new information such as transcriptome data and population resequencing data. The YGD can be accessed at http://me.lzu.edu.cn/yak.",YGD,0.857634743,Yak Genome Database,0.853846967,YGD,0.857634743,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,11/7/2012 +32738156,http://yeast.biomedtzc.cn,"Predicted yeast interactome and network-based interpretation of transcriptionally changed genes. Saccharomyces cerevisiae, budding yeast, is a widely used model organism and research tool in genetics studies. Many efforts have been directed at constructing a high-quality comprehensive molecular interaction network to elucidate the design logic of the gene circuitries in this classic model organism. In this work, we present the yeast interactome resource (YIR), which includes 22,238 putative functional gene interactions inferred from functional gene association data integrated from 10 databases focusing on diverse functional perspectives. These putative functional gene interactions are expected to cover 18.84% of yeast protein interactions, and 38.49% may represent protein interactions. Based on the YIR, a gene set linkage analysis (GSLA) web tool was developed to annotate the potential functional impacts of a set of transcriptionally changed genes. In a case study, we show that the YIR/GSLA system produced more extensive and concise annotations compared with widely used gene set annotation tools, including PANTHER and DAVID. Both YIR and GSLA are accessible through the website http://yeast.biomedtzc.cn.",YIR,0.990129635,yeast interactome resource,0.962886065,YIR,0.990129635,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,8/11/2020 +"23203880, 25398902",http://ngs.ym.edu.tw/ym500,"YM500: a small RNA sequencing (smRNA-seq) database for microRNA research. MicroRNAs (miRNAs) are small RNAs √ɬÉ√Ǭ¢√ɬÇ√Ǭà√ɬÇ√Ǭº22 nt in length that are involved in the regulation of a variety of physiological and pathological processes. Advances in high-throughput small RNA sequencing (smRNA-seq), one of the next-generation sequencing applications, have reshaped the miRNA research landscape. In this study, we established an integrative database, the YM500 (http://ngs.ym.edu.tw/ym500/), containing analysis pipelines and analysis results for 609 human and mice smRNA-seq results, including public data from the Gene Expression Omnibus (GEO) and some private sources. YM500 collects analysis results for miRNA quantification, for isomiR identification (incl. RNA editing), for arm switching discovery, and, more importantly, for novel miRNA predictions. Wetlab validation on >100 miRNAs confirmed high correlation between miRNA profiling and RT-qPCR results (R = 0.84). This database allows researchers to search these four different types of analysis results via our interactive web interface. YM500 allows researchers to define the criteria of isomiRs, and also integrates the information of dbSNP to help researchers distinguish isomiRs from SNPs. A user-friendly interface is provided to integrate miRNA-related information and existing evidence from hundreds of sequencing datasets. The identified novel miRNAs and isomiRs hold the potential for both basic research and biotech applications.",YM500,0.994998276,NA,0,YM500,0.994998276,2,27899625,NA,NA,NA,merge on record with best name prob,NA,NA,NA,NA,11/14/2014 +27899625,http://ngs.ym.edu.tw/ym500,"YM500v3: a database for small RNA sequencing in human cancer research. We previously presented the YM500 database, which contains >8000 small RNA sequencing (smRNA-seq) data sets and integrated analysis results for various cancer miRNome studies. In the updated YM500v3 database (http://ngs.ym.edu.tw/ym500/) presented herein, we not only focus on miRNAs but also on other functional small non-coding RNAs (sncRNAs), such as PIWI-interacting RNAs (piRNAs), tRNA-derived fragments (tRFs), small nuclear RNAs (snRNAs) and small nucleolar RNAs (snoRNAs). There is growing knowledge of the role of sncRNAs in gene regulation and tumorigenesis. We have also incorporated >10 000 cancer-related RNA-seq and >3000 more smRNA-seq data sets into the YM500v3 database. Furthermore, there are two main new sections, 'Survival' and 'Cancer', in this updated version. The 'Survival' section provides the survival analysis results in all cancer types or in a user-defined group of samples for a specific sncRNA. The 'Cancer' section provides the results of differential expression analyses, miRNA-gene interactions and cancer miRNA-related pathways. In the 'Expression' section, sncRNA expression profiles across cancer and sample types are newly provided. Cancer-related sncRNAs hold potential for both biotech applications and basic research.",YM500v3,0.976454541,NA,0,YM500v3,0.976454541,1,"23203880.0, 25398902.0",NA,low_prob_best_name,do not remove,merge on record with best name prob,NA,NA,NA,NA,11/29/2016 +"22064855, 27899612",http://www.ymdb.ca,"YMDB: the Yeast Metabolome Database. The Yeast Metabolome Database (YMDB, http://www.ymdb.ca) is a richly annotated 'metabolomic' database containing detailed information about the metabolome of Saccharomyces cerevisiae. Modeled closely after the Human Metabolome Database, the YMDB contains >2000 metabolites with links to 995 different genes/proteins, including enzymes and transporters. The information in YMDB has been gathered from hundreds of books, journal articles and electronic databases. In addition to its comprehensive literature-derived data, the YMDB also contains an extensive collection of experimental intracellular and extracellular metabolite concentration data compiled from detailed Mass Spectrometry (MS) and Nuclear Magnetic Resonance (NMR) metabolomic analyses performed in our lab. This is further supplemented with thousands of NMR and MS spectra collected on pure, reference yeast metabolites. Each metabolite entry in the YMDB contains an average of 80 separate data fields including comprehensive compound description, names and synonyms, structural information, physico-chemical data, reference NMR and MS spectra, intracellular/extracellular concentrations, growth conditions and substrates, pathway information, enzyme data, gene/protein sequence data, as well as numerous hyperlinks to images, references and other public databases. Extensive searching, relational querying and data browsing tools are also provided that support text, chemical structure, spectral, molecular weight and gene/protein sequence queries. Because of S. cervesiae's importance as a model organism for biologists and as a biofactory for industry, we believe this kind of database could have considerable appeal not only to metabolomics researchers, but also to yeast biologists, systems biologists, the industrial fermentation industry, as well as the beer, wine and spirit industry.",YMDB,0.991479576,Yeast Metabolome Database,0.972250591,YMDB,0.991479576,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/28/2016 +25522035,http://cosbi3.ee.ncku.edu.tw/yna,"The Yeast Nucleosome Atlas (YNA) database: an integrative gene mining platform for studying chromatin structure and its regulation in yeast. Background Histone modification and remodeling play crucial roles in regulating gene transcription. These post-translational modifications of histones function in a combinatorial fashion and can be recognized by specific histone-binding proteins, thus regulating gene transcription. Therefore, understanding the combinatorial patterns of the histone code is vital to understanding the associated biological processes. However, most of the datasets regarding histone modification and chromatin regulation are scattered across various studies, and no comprehensive search and query tool has yet been made available to retrieve genes bearing specific histone modification patterns and regulatory proteins. Description For this reason, we developed the Yeast Nucleosome Atlas database, or the YNA database, which integrates the available experimental data on nucleosome occupancy, histone modifications, the binding occupancy of regulatory proteins, and gene expression data, and provides the genome-wide gene miner to retrieve genes with a specific combination of these chromatin-related datasets. Moreover, the biological significance analyzer, which analyzes the enrichments of histone modifications, binding occupancy, transcription rate, and functionality of the retrieved genes, was constructed to help researchers to gain insight into the correlation among chromatin regulation and transcription. Conclusions Compared to previously established genome browsing databases, YNA provides a powerful gene mining and retrieval interface, and is an investigation tool that can assist users to generate testable hypotheses for studying chromatin regulation during transcription. YNA is available online at http://cosbi3.ee.ncku.edu.tw/yna/.",YNA,0.981317759,Yeast Nucleosome Atlas,0.915188857,YNA,0.981317759,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,12/8/2014 +25024351,http://www.ystrexdb.com,"yStreX: yeast stress expression database. . Over the past decade genome-wide expression analyses have been often used to study how expression of genes changes in response to various environmental stresses. Many of these studies (such as effects of oxygen concentration, temperature stress, low pH stress, osmotic stress, depletion or limitation of nutrients, addition of different chemical compounds, etc.) have been conducted in the unicellular Eukaryal model, yeast Saccharomyces cerevisiae. However, the lack of a unifying or integrated, bioinformatics platform that would permit efficient and rapid use of all these existing data remain an important issue. To facilitate research by exploiting existing transcription data in the field of yeast physiology, we have developed the yStreX database. It is an online repository of analyzed gene expression data from curated data sets from different studies that capture genome-wide transcriptional changes in response to diverse environmental transitions. The first aim of this online database is to facilitate comparison of cross-platform and cross-laboratory gene expression data. Additionally, we performed different expression analyses, meta-analyses and gene set enrichment analyses; and the results are also deposited in this database. Lastly, we constructed a user-friendly Web interface with interactive visualization to provide intuitive access and to display the queried data for users with no background in bioinformatics. Database URL: http://www.ystrexdb.com.",yStreX,0.943920732,yeast stress expression database,0.765314773,yStreX,0.943920732,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,7/14/2014 +24608172,http://cosbi3.ee.ncku.edu.tw/YTRP,"YTRP: a repository for yeast transcriptional regulatory pathways. Regulatory targets of transcription factors (TFs) can be identified by the TF perturbation experiments, which reveal the expression changes owing to the perturbation (deletion or overexpression) of TFs. But the identified targets of a given TF consist of both direct and indirect regulatory targets. It has been shown that most of the TFPE-identified regulatory targets are indirect, indicating that TF-gene regulation is mainly through transcriptional regulatory pathways (TRPs) consisting of intermediate TFs. Without identification of these TRPs, it is not easy to understand how a TF regulates its indirect targets. Because there is no such database depositing the potential TRPs for Saccharomyces cerevisiae now, this motivates us to construct the YTRP (Yeast Transcriptional Regulatory Pathway) database. For each TF-gene regulatory pair under different experimental conditions, all possible TRPs in two underlying networks (constructed using experimentally verified TF-gene binding pairs and TF-gene regulatory pairs from the literature) for the specified experimental conditions were automatically enumerated by TRP mining procedures developed from the graph theory. The enumerated TRPs of a TF-gene regulatory pair provide experimentally testable hypotheses for the molecular mechanisms behind a TF and its regulatory target. YTRP is available online at http://cosbi3.ee.ncku.edu.tw/YTRP/. We believe that the TRPs deposited in this database will greatly improve the usefulness of TFPE data for yeast biologists to study the regulatory mechanisms between a TF and its knocked-out targets. Database URL: http://cosbi3.ee.ncku.edu.tw/YTRP/.",YTRP,0.992604494,Yeast Transcriptional Regulatory Pathway,0.94507375,YTRP,0.992604494,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/7/2014 +23411719,http://www.myogenesisdb.org/YY1TargetDB,"YY1TargetDB: an integral information resource for Yin Yang 1 target loci. Yin Yang 1 (YY1), a ubiquitously expressed transcription factor, plays a critical role in regulating cell development, differentiation, cellular proliferation and tumorigenesis. Previous studies identified many YY1-regulated target genes in both human and mouse. Emerging global mapping by Chromatin ImmnoPrecipitation (ChIP)-based high-throughput experiments indicate that YY1 binds to a vast number of loci genome-wide. However, the information is widely scattered in many disparate poorly cross-indexed literatures; a large portion was only published recently by the ENCODE consortium with limited annotation. A centralized database, which annotates and organizes YY1-binding loci and target motifs in a systematic way with easy access, will be valuable resources for the research community. We therefore implemented a web-based YY1 Target loci Database (YY1TargetDB). This database contains YY1-binding loci (binding peaks) from ChIP-seq and ChIP-on-chip experiments, computationally predicated YY1 and cofactor motifs within each locus. It also collects the experimentally verified YY1-binding motifs from individual researchers. The current version of YY1TargetDB contains 92 314 binding loci identified by ChIP-based experiments; 157 200 YY1-binding motifs in which 42 are experimentally verified and 157 158 are computationally predicted; and 130 759 binding motifs for 47 cofactors. Database URL: http://www.myogenesisdb.org/YY1TargetDB.",YY1TargetDB,0.982896606,Target loci Database,0.670763016,YY1TargetDB,0.982896606,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,2/14/2013 +22428748,http://zebase.bio.purdue.edu,"ZeBase: an open-source relational database for zebrafish laboratories. Abstract ZeBase is an open-source relational database for zebrafish inventory. It is designed for the recording of genetic, breeding, and survival information of fish lines maintained in a single- or multi-laboratory environment. Users can easily access ZeBase through standard web-browsers anywhere on a network. Convenient search and reporting functions are available to facilitate routine inventory work; such functions can also be automated by simple scripting. Optional barcode generation and scanning are also built-in for easy access to the information related to any fish. Further information of the database and an example implementation can be found at http://zebase.bio.purdue.edu.",ZeBase,0.995802045,NA,0,ZeBase,0.995802045,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,3/1/2012 +24578356,http://genome.igib.res.in/twiki,"The Zebrafish GenomeWiki: a crowdsourcing approach to connect the long tail for zebrafish gene annotation. A large repertoire of gene-centric data has been generated in the field of zebrafish biology. Although the bulk of these data are available in the public domain, most of them are not readily accessible or available in nonstandard formats. One major challenge is to unify and integrate these widely scattered data sources. We tested the hypothesis that active community participation could be a viable option to address this challenge. We present here our approach to create standards for assimilation and sharing of information and a system of open standards for database intercommunication. We have attempted to address this challenge by creating a community-centric solution for zebrafish gene annotation. The Zebrafish GenomeWiki is a 'wiki'-based resource, which aims to provide an altruistic shared environment for collective annotation of the zebrafish genes. The Zebrafish GenomeWiki has features that enable users to comment, annotate, edit and rate this gene-centric information. The credits for contributions can be tracked through a transparent microattribution system. In contrast to other wikis, the Zebrafish GenomeWiki is a 'structured wiki' or rather a 'semantic wiki'. The Zebrafish GenomeWiki implements a semantically linked data structure, which in the future would be amenable to semantic search. Database URL: http://genome.igib.res.in/twiki.",Zebrafish GenomeWiki,0.791018561,NA,0,Zebrafish GenomeWiki,0.791018561,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,2/26/2014 +32931381,http://konulab.shinyapps.io/zenofishdb,"ZenoFishDb v1.1: A Database for Xenotransplantation Studies in Zebrafish. Rapidly accumulating literature has proven feasibility of the zebrafish xenograft models in cancer research. Nevertheless, online databases for searching the current zebrafish xenograft literature are in great demand. Herein, we have developed a manually curated database, called ZenoFishDb v1.1 (https://konulab.shinyapps.io/zenofishdb), based on R Shiny platform aiming to provide searchable information on ever increasing collection of zebrafish studies for cancer cell line transplantation and patient-derived xenografts (PDXs). ZenoFishDb v1.1 user interface contains four modules: DataTable, Visualization, PDX Details, and PDX Charts. The DataTable and Visualization pages represent xenograft study details, including injected cell lines, PDX injections, molecular modifications of cell lines, zebrafish strains, as well as technical aspects of the xenotransplantation procedures in table, bar, and/or pie chart formats. The PDX Details module provides comprehensive information on the patient details in table format and can be searched and visualized. Overall, ZenoFishDb v1.1 enables researchers to effectively search, list, and visualize different technical and biological attributes of zebrafish xenotransplantation studies particularly focusing on the new trends that make use of reporters, RNA interference, overexpression, or mutant gene constructs of transplanted cancer cells, stem cells, and PDXs, as well as distinguished host modifications.",ZenoFishDb,0.99299258,NA,0,ZenoFishDb,0.99299258,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/15/2020 +"23074187, 27899582, 28838067, 33170210",http://zfin.org,"ZFIN, the Zebrafish Model Organism Database: increased support for mutants and transgenics. ZFIN, the Zebrafish Model Organism Database (http://zfin.org), is the central resource for zebrafish genetic, genomic, phenotypic and developmental data. ZFIN curators manually curate and integrate comprehensive data involving zebrafish genes, mutants, transgenics, phenotypes, genotypes, gene expressions, morpholinos, antibodies, anatomical structures and publications. Integrated views of these data, as well as data gathered through collaborations and data exchanges, are provided through a wide selection of web-based search forms. Among the vertebrate model organisms, zebrafish are uniquely well suited for rapid and targeted generation of mutant lines. The recent rapid production of mutants and transgenic zebrafish is making management of data associated with these resources particularly important to the research community. Here, we describe recent enhancements to ZFIN aimed at improving our support for mutant and transgenic lines, including (i) enhanced mutant/transgenic search functionality; (ii) more expressive phenotype curation methods; (iii) new downloads files and archival data access; (iv) incorporation of new data loads from laboratories undertaking large-scale generation of mutant or transgenic lines and (v) new GBrowse tracks for transgenic insertions, genes with antibodies and morpholinos.",ZFIN,0.995070577,Zebrafish Model Organism Database,0.974461385,ZFIN,0.995070577,4,26097180,"26097180.0, 30407545.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2021 +26097180,"http://zfin.org, http://zebrafishmine.org","ZFIN, The zebrafish model organism database: Updates and new directions. The Zebrafish Model Organism Database (ZFIN; http://zfin.org) is the central resource for genetic and genomic data from zebrafish (Danio rerio) research. ZFIN staff curate detailed information about genes, mutants, genotypes, reporter lines, sequences, constructs, antibodies, knockdown reagents, expression patterns, phenotypes, gene product function, and orthology from publications. Researchers can submit mutant, transgenic, expression, and phenotype data directly to ZFIN and use the ZFIN Community Wiki to share antibody and protocol information. Data can be accessed through topic-specific searches, a new site-wide search, and the data-mining resource ZebrafishMine (http://zebrafishmine.org). Data download and web service options are also available. ZFIN collaborates with major bioinformatics organizations to verify and integrate genomic sequence data, provide nomenclature support, establish reciprocal links, and participate in the development of standardized structured vocabularies (ontologies) used for data annotation and searching. ZFIN-curated gene, function, expression, and phenotype data are available for comparative exploration at several multi-species resources. The use of zebrafish as a model for human disease is increasing. ZFIN is supporting this growing area with three major projects: adding easy access to computed orthology data from gene pages, curating details of the gene expression pattern changes in mutant fish, and curating zebrafish models of human diseases.",ZFIN,0.994089166,Zebrafish Model Organism Database,0.985861821,ZFIN,0.994089166,1,"23074187.0, 27899582.0, 28838067.0, 33170210.0","30407545.0, 23074187.0, 27899582.0, 28838067.0, 33170210.0",NA,NA,merge on record with best name prob,"merge all ""dup name"" IDs",NA,NA,NA,7/8/2015 +30407545,"http://zfin.org/, http://alliancegenome.org","The Zebrafish Information Network: new support for non-coding genes, richer Gene Ontology annotations and the Alliance of Genome Resources. The Zebrafish Information Network (ZFIN) (https://zfin.org/) is the database for the model organism, zebrafish (Danio rerio). ZFIN expertly curates, organizes and provides a wide array of zebrafish genetic and genomic data, including genes, alleles, transgenic lines, gene expression, gene function, mutant phenotypes, orthology, human disease models, nomenclature and reagents. New features at ZFIN include increased support for genomic regions and for non-coding genes, and support for more expressive Gene Ontology annotations. ZFIN has recently taken over maintenance of the zebrafish reference genome sequence as part of the Genome Reference Consortium. ZFIN is also a founding member of the Alliance of Genome Resources, a collaboration of six model organism databases (MODs) and the Gene Ontology Consortium (GO). The recently launched Alliance portal (https://alliancegenome.org) provides a unified, comparative view of MOD, GO, and human data, and facilitates foundational and translational biomedical research.",ZFIN,0.989727378,The Zebrafish Information Network,0.800031294,ZFIN,0.989727378,1,NA,"26097180.0, 23074187.0, 27899582.0, 28838067.0, 33170210.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,NA,1/1/2019 +22067444,http://zfishbook.org,"zfishbook: connecting you to a world of zebrafish revertible mutants. zfishbook is an internet-based openly accessible database of revertible protein trap gene-breaking transposon (GBT) insertional mutants in the zebrafish, Danio rerio. In these lines, a monomeric red fluorescent protein (mRFP) is encoded by an artificial 3' exon, resulting in a translational fusion to endogenous loci. The natural transparency of the zebrafish embryo and larvae greatly facilitates the expression annotation of tagged loci using new capillary-based SCORE imaging methods. Molecular annotation of each line is facilitated by cloning methods such as 5'-Rapid Amplification of cDNA Ends (RACE) and inverse polymerase chain reaction (PCR). zfishbook (http://zfishbook.org) represents a central hub for molecular, expression and mutational information about GBT lines from the International Zebrafish Protein Trap Consortium (IZPTC) that includes researchers from around the globe. zfishbook is open to community-wide contributions including expression and functional annotation. zfishbook also represents a central location for information on how to obtain these lines from diverse members of the IZPTC and integration within other zebrafish community databases including Zebrafish Information Network (ZFIN), Ensembl and National Center for Biotechnology Information.",zfishbook,0.99548614,Zebrafish,0.59932977,zfishbook,0.99548614,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/8/2011 +26065909,http://genome.igib.res.in/zflncRNApedia,"zflncRNApedia: A Comprehensive Online Resource for Zebrafish Long Non-Coding RNAs. Recent transcriptome annotation using deep sequencing approaches have annotated a large number of long non-coding RNAs in zebrafish, a popular model organism for human diseases. These studies characterized lncRNAs in critical developmental stages as well as adult tissues. Each of the studies has uncovered a distinct set of lncRNAs, with minor overlaps. The availability of the raw RNA-Seq datasets in public domain encompassing critical developmental time-points and adult tissues provides us with a unique opportunity to understand the spatiotemporal expression patterns of lncRNAs. In the present report, we created a catalog of lncRNAs in zebrafish, derived largely from the three annotation sets, as well as manual curation of literature to compile a total of 2,267 lncRNA transcripts in zebrafish. The lncRNAs were further classified based on the genomic context and relationship with protein coding gene neighbors into 4 categories. Analysis revealed a total of 86 intronic, 309 promoter associated, 485 overlapping and 1,386 lincRNAs. We created a comprehensive resource which houses the annotation of lncRNAs as well as associated information including expression levels, promoter epigenetic marks, genomic variants and retroviral insertion mutants. The resource also hosts a genome browser where the datasets could be browsed in the genome context. To the best of our knowledge, this is the first comprehensive resource providing a unified catalog of lncRNAs in zebrafish. The resource is freely available at URL: http://genome.igib.res.in/zflncRNApedia.",zflncRNApedia,0.602053881,NA,0,zflncRNApedia,0.602053881,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,6/11/2015 +21276248,"http://bindr.gdcb.iastate.edu/ZFNGenome, http://www.zincfingers.org","ZFNGenome: a comprehensive resource for locating zinc finger nuclease target sites in model organisms. Background Zinc Finger Nucleases (ZFNs) have tremendous potential as tools to facilitate genomic modifications, such as precise gene knockouts or gene replacements by homologous recombination. ZFNs can be used to advance both basic research and clinical applications, including gene therapy. Recently, the ability to engineer ZFNs that target any desired genomic DNA sequence with high fidelity has improved significantly with the introduction of rapid, robust, and publicly available techniques for ZFN design such as the Oligomerized Pool ENgineering (OPEN) method. The motivation for this study is to make resources for genome modifications using OPEN-generated ZFNs more accessible to researchers by creating a user-friendly interface that identifies and provides quality scores for all potential ZFN target sites in the complete genomes of several model organisms. Description ZFNGenome is a GBrowse-based tool for identifying and visualizing potential target sites for OPEN-generated ZFNs. ZFNGenome currently includes a total of more than 11.6 million potential ZFN target sites, mapped within the fully sequenced genomes of seven model organisms; S. cerevisiae, C. reinhardtii, A. thaliana, D. melanogaster, D. rerio, C. elegans, and H. sapiens and can be visualized within the flexible GBrowse environment. Additional model organisms will be included in future updates. ZFNGenome provides information about each potential ZFN target site, including its chromosomal location and position relative to transcription initiation site(s). Users can query ZFNGenome using several different criteria (e.g., gene ID, transcript ID, target site sequence). Tracks in ZFNGenome also provide ""uniqueness"" and ZiFOpT (Zinc Finger OPEN Targeter) ""confidence"" scores that estimate the likelihood that a chosen ZFN target site will function in vivo. ZFNGenome is dynamically linked to ZiFDB, allowing users access to all available information about zinc finger reagents, such as the effectiveness of a given ZFN in creating double-stranded breaks. Conclusions ZFNGenome provides a user-friendly interface that allows researchers to access resources and information regarding genomic target sites for engineered ZFNs in seven model organisms. This genome-wide database of potential ZFN target sites should greatly facilitate the utilization of ZFNs in both basic and clinical research.ZFNGenome is freely available at: http://bindr.gdcb.iastate.edu/ZFNGenome or at the Zinc Finger Consortium website: http://www.zincfingers.org/.",ZFNGenome,0.933807492,inc Finger,0.642919501,ZFNGenome,0.933807492,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,1/28/2011 +23203887,http://zifdb.msi.umn.edu,"Zinc Finger Database (ZiFDB) v2.0: a comprehensive database of C√ɬÉ√Ǭ¢√ɬÇ√ǬÇ√ɬÇ√ǬÇH√ɬÉ√Ǭ¢√ɬÇ√ǬÇ√ɬÇ√Ç¬Ç zinc fingers and engineered zinc finger arrays. ZiFDB (Zinc Finger Database, http://zifdb.msi.umn.edu) is a web-accessible database that houses information on individual C(2)H(2) zinc fingers (ZFs) and engineered zinc finger arrays (ZFAs). ZiFDB serves as a resource for biologists interested in engineering ZFAs for use as sequence-specific DNA-binding reagents. Here, we describe four new features of ZiFDB: (i) the database allows users to input new ZFs and ZFAs; (ii) a shadow database temporarily stores user-submitted data, pending approval by the database curator and subsequent loading into the persistent database; (iii) ZiFDB contains 181 Context-Dependent Assembly (CoDA) ZFAs, which were generated by this newly described ZFA engineering platform; and (iv) the database also now contains 319 F1F2 CoDA units and 334 F2F3 CoDA units that can be used to construct CoDA arrays. In total, the new release of ZiFDB contains 1226 ZFs and 1123 ZFAs.",ZiFDB,0.996439636,Zinc Finger Database,0.955517188,ZiFDB,0.996439636,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,11/29/2012 +29197720,http://test5.bicpu.edu.in,"ZikaBase: An integrated ZIKV- Human Interactome Map database. Re-emergence of ZIKV has caused infections in more than 1.5 million people. The molecular mechanism and pathogenesis of ZIKV is not well explored due to unavailability of adequate model and lack of publically accessible resources to provide information of ZIKV-Human protein interactome map till today. This study made an attempt to curate the ZIKV-Human interaction proteins from published literatures and RNA-Seq data. 11 direct interaction, 12 associated genes are retrieved from literatures and 3742 Differentially Expressed Genes (DEGs) are obtained from RNA-Seq analysis. The genes have been analyzed to construct the ZIKV-Human Interactome Map. The importance of the study has been illustrated by the enrichment analysis and observed that direct interaction and associated genes are enriched in viral entry into host cell. Also, ZIKV infection modulates 32% signal and 27% immune system pathways. The integrated database, ZikaBase has been developed to help the virology research community and accessible at https://test5.bicpu.edu.in.",ZikaBase,0.957984567,NA,0,ZikaBase,0.957984567,1,NA,NA,low_prob_best_name,do not remove,NA,NA,NA,NA,NA,12/1/2017 +31512145,http://zikavid.org,"ZIKAVID-Zika virus infection database: a new platform to analyze the molecular impact of Zika virus infection. The recent outbreak of Zika virus (ZIKV) in Brazil and other countries globally demonstrated the relevance of ZIKV studies. During and after this outbreak, there was an intense increase in scientific production on ZIKV infections, especially toward alterations promoted by the infection and related to clinical outcomes. Considering this massive amount of new data, mainly thousands of genes and proteins whose expression is impacted by ZIKV infection, the ZIKA Virus Infection Database (ZIKAVID) was created. ZIKAVID is an online database that comprises all genes or proteins, and associated information, for which expression was experimentally measured and found to be altered after ZIKV infection. The database, available at https://zikavid.org, contains 16,984 entries of gene expression measurements from a total of 7348 genes. It allows users to easily perform searches for different experimental hosts (cell lines, tissues, and animal models), ZIKV strains (African, Asian, and Brazilian), and target molecules (messenger RNA [mRNA] and protein), among others, used in differential expression studies regarding ZIKV infection. In this way, the ZIKAVID will serve as an additional and important resource to improve the characterization of the molecular impact and pathogenesis associated with ZIKV infection.",ZIKAVID,0.995752037,ZIKA Virus Infection Database,0.853707194,ZIKAVID,0.995752037,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,9/11/2019 +23180778,"http://research.nhgri.nih.gov/ZInC/, http://zfin.org","The Zebrafish Insertion Collection (ZInC): a web based, searchable collection of zebrafish mutations generated by DNA insertion. ZInC (Zebrafish Insertional Collection, http://research.nhgri.nih.gov/ZInC/) is a web-searchable interface of insertional mutants in zebrafish. Over the last two decades, the zebrafish has become a popular model organism for studying vertebrate development as well as for modeling human diseases. To facilitate such studies, we are generating a genome-wide knockout resource that targets every zebrafish protein-coding gene. All mutant fish are freely available to the scientific community through the Zebrafish International Resource Center (ZIRC). To assist researchers in finding mutant and insertion information, we developed a comprehensive database with a web front-end, the ZInC. It can be queried using multiple types of input such as ZFIN (Zebrafish Information Network) IDs, UniGene accession numbers and gene symbols from zebrafish, human and mouse. In the future, ZInC may include data from other insertional mutation projects as well. ZInC cross-references all integration data with the ZFIN (http://zfin.org/).",ZInC,0.982900321,The Zebrafish Insertion Collection,0.843595777,ZInC,0.982900321,1,22736877,NA,NA,NA,do not merge,NA,NA,NA,NA,11/24/2012 +"30419167, 33759118",http://www.ZINClick.org,"ZINClick v.18: Expanding Chemical Space of 1,2,3-Triazoles. In the last years, we have investigated the click-chemical space covered by molecules containing the triazole ring and generated a database of 1,2,3-triazoles called ZINClick, starting from literature-reported alkynes and azides synthesizable in no more than three synthetic steps from commercially available products. This combinatorial database contains millions of 1,4-disubstituted 1,2,3-triazoles that are easily synthesizable. The library is regularly updated and can be freely downloaded from http://www.ZINClick.org . In this communication, the new implementation of ZINClick will be discussed as well as our new strategy for clustering the chemical space covered by 1,4-disubstituted 1,2,3-triazoles around their availability: from direct purchase to different degrees of synthetic feasibility of the compounds.",ZINClick,0.997613192,NA,0,ZINClick,0.997613192,2,NA,24451008,NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/1/2021 +24451008,http://www.symech.it/ZINClick,"ZINClick: a database of 16 million novel, patentable, and readily synthesizable 1,4-disubstituted triazoles. Since Professors Sharpless, Finn, and Kolb first introduced the concept of ""click reactions"" in 2001 as powerful tools in drug discovery, 1,4-disubstituted-1,2,3-triazoles have become important in medicinal chemistry due to the simultaneous discovery by Sharpless, Fokin, and Meldal of a perfect click 1,3-dipolar cycloaddition reaction between azides and alkynes catalyzed by copper salts. Because of their chemical features, these triazoles are proposed to be aggressive pharmacophores that participate in drug-receptor interactions while maintaining an excellent chemical and metabolic profile. Surprisingly, no virtual libraries of 1,4-disubstituted-1,2,3-triazoles have been generated for the systematic investigation of the click-chemical space. In this manuscript, a database of triazoles called ZINClick is generated from literature-reported alkynes and azides that can be synthesized within three steps from commercially available products. This combinatorial database contains over 16 million 1,4-disubstituted-1,2,3-triazoles that are easily synthesizable, new, and patentable! The structural diversity of ZINClick ( http://www.symech.it/ZINClick ) will be explored. ZINClick will also be compared to other available databases, and its application during the design of novel bioactive molecules containing triazole nuclei will be discussed.",ZINClick,0.996718585,NA,0,ZINClick,0.996718585,1,NA,"30419167.0, 33759118.0",NA,NA,NA,"merge all ""dup name"" IDs",NA,NA,URL assoc with best name prob RESOLVES SATISFACTORILY,1/31/2014 +22102568,http://www.geneontology.org,"The Gene Ontology: enhancements for 2011. The Gene Ontology (GO) (http://www.geneontology.org) is a community bioinformatics resource that represents gene product function through the use of structured, controlled vocabularies. The number of GO annotations of gene products has increased due to curation efforts among GO Consortium (GOC) groups, including focused literature-based annotation and ortholog-based functional inference. The GO ontologies continue to expand and improve as a result of targeted ontology development, including the introduction of computable logical definitions and development of new tools for the streamlined addition of terms to the ontology. The GOC continues to support its user community through the use of e-mail lists, social media and web-based resources.",NA,0,NA,0,NA,0,1,"23161678.0, 25428369.0",22857741,low_prob_best_name,remove,conflicting record(s) to be removed,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME; name is blank,NA,NA,11/18/2011 +22857741,http://www.gib.fi.upm.es/eMIR2,"e-MIR2: a public online inventory of medical informatics resources. Background Over the past years, the number of available informatics resources in medicine has grown exponentially. While specific inventories of such resources have already begun to be developed for Bioinformatics (BI), comparable inventories are as yet not available for the Medical Informatics (MI) field, so that locating and accessing them currently remains a difficult and time-consuming task. Description We have created a repository of MI resources from the scientific literature, providing free access to its contents through a web-based service. We define informatics resources as all those elements that constitute, serve to define or are used by informatics systems, ranging from architectures or development methodologies to terminologies, vocabularies, databases or tools. Relevant information describing the resources is automatically extracted from manuscripts published in top-ranked MI journals. We used a pattern matching approach to detect the resources' names and their main features. Detected resources are classified according to three different criteria: functionality, resource type and domain. To facilitate these tasks, we have built three different classification schemas by following a novel approach based on folksonomies and social tagging. We adopted the terminology most frequently used by MI researchers in their publications to create the concepts and hierarchical relationships belonging to the classification schemas. The classification algorithm identifies the categories associated with resources and annotates them accordingly. The database is then populated with this data after manual curation and validation. Conclusions We have created an online repository of MI resources to assist researchers in locating and accessing the most suitable resources to perform specific tasks. The database contains 609 resources at the time of writing and is available at http://www.gib.fi.upm.es/eMIR2. We are continuing to expand the number of available resources by taking into account further publications as well as suggestions from users and resource developers.",NA,0,NA,0,NA,0,1,NA,22102568,low_prob_best_name,remove,NA,conflicting record(s) to be removed,FALSE POS: INCORRECT NAME; name is blank,NA,NA,8/2/2012 diff --git a/data/ner_metrics/combined_test_stats.csv b/data/ner_metrics/combined_test_stats.csv new file mode 100644 index 0000000..af9a40d --- /dev/null +++ b/data/ner_metrics/combined_test_stats.csv @@ -0,0 +1,16 @@ +model,precision,recall,f1,loss +bert-base-uncased,0.6987951807228916,0.7073170731707317,0.703030303030303,0.0188343171699307 +dmis-lab/biobert-v1.1,0.6926406926406926,0.6504065040650406,0.670859538784067,0.0229609798769888 +kamalkraj/bioelectra-base-discriminator-pubmed,0.6047430830039525,0.6219512195121951,0.6132264529058116,0.0160053862916538 +kamalkraj/bioelectra-base-discriminator-pubmed-pmc,0.6752136752136753,0.6422764227642277,0.6583333333333333,0.0137734971463214 +allenai/biomed_roberta_base,0.6812749003984063,0.6951219512195121,0.6881287726358147,0.0163840041865114 +allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169,0.6857142857142857,0.6829268292682927,0.684317718940937,0.0116135302002658 +allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500,0.6891385767790262,0.7479674796747967,0.7173489278752437,0.0131652721768477 +bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12,0.6652173913043479,0.6219512195121951,0.6428571428571429,0.0192509487758798 +bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12,0.5665399239543726,0.6056910569105691,0.5854616895874264,0.0201863757253158 +giacomomiolo/electramed_base_scivocab_1M,0.6695278969957081,0.6341463414634146,0.651356993736952,0.0158931310102343 +cambridgeltl/SapBERT-from-PubMedBERT-fulltext,0.6744186046511628,0.5894308943089431,0.6290672451193059,0.0144474344063928 +cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token,0.6784140969162996,0.6260162601626016,0.6511627906976745,0.0113123779560555 +allenai/scibert_scivocab_uncased,0.6563706563706564,0.6910569105691057,0.6732673267326733,0.0138629199667775 +microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract,0.6381322957198443,0.6666666666666666,0.6520874751491054,0.0102324817198677 +microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext,0.7361111111111112,0.6463414634146342,0.6883116883116882,0.0137343333288299 diff --git a/data/ner_metrics/combined_train_stats.csv b/data/ner_metrics/combined_train_stats.csv new file mode 100644 index 0000000..775e616 --- /dev/null +++ b/data/ner_metrics/combined_train_stats.csv @@ -0,0 +1,151 @@ +epoch,train_precision,train_recall,train_f1,train_loss,val_precision,val_recall,val_f1,val_loss,model_name +0,0.5521628498727735,0.6304474142940151,0.5887140531741725,0.0046897814139905,0.504424778761062,0.5229357798165137,0.5135135135135136,0.0068885108832126,bert-base-uncased +1,0.7656338028169014,0.78965717606043,0.7774599542334095,0.0024609949299427,0.5695652173913044,0.6009174311926605,0.5848214285714286,0.0054005900254616,bert-base-uncased +2,0.8741258741258742,0.8715862870424173,0.8728542333430318,0.0011862297937177,0.6091370558375635,0.5504587155963303,0.5783132530120483,0.0064260511381479,bert-base-uncased +3,0.8960367604824814,0.906449738524114,0.9012131715771232,0.0007172614444632,0.5601851851851852,0.555045871559633,0.5576036866359447,0.0074264993622469,bert-base-uncased +4,0.9268897149938042,0.8692620569436375,0.8971514242878561,0.00068014876889,0.6791044776119403,0.4174311926605504,0.5170454545454546,0.010867114870612,bert-base-uncased +5,0.9414492753623188,0.9436374201045904,0.942542077771329,0.0003930213130249,0.6138613861386139,0.5688073394495413,0.5904761904761905,0.0087288946356588,bert-base-uncased +6,0.9371816638370118,0.9622312608948286,0.9495412844036698,0.0002947577666443,0.6404494382022472,0.5229357798165137,0.5757575757575757,0.0101727037377154,bert-base-uncased +7,0.9802325581395348,0.979662986635677,0.979947689625109,0.0001225889699001,0.7052023121387283,0.5596330275229358,0.6240409207161125,0.0116527687261203,bert-base-uncased +8,0.9906432748538012,0.9843114468332363,0.987467210725736,7.697805419025228e-05,0.6820512820512821,0.6100917431192661,0.6440677966101696,0.0105551072466858,bert-base-uncased +9,0.9802325581395348,0.979662986635677,0.979947689625109,0.0001494408509868,0.6839080459770115,0.5458715596330275,0.6071428571428571,0.0122198300579419,bert-base-uncased +0,0.6281179138321995,0.6438117373619988,0.6358680057388809,0.0039769837476846,0.5576036866359447,0.555045871559633,0.5563218390804598,0.0060009443416045,dmis-lab/biobert-v1.1 +1,0.7678085916258837,0.8204532248692621,0.7932584269662921,0.0023407716372105,0.6042553191489362,0.6513761467889908,0.6269315673289182,0.0056743119180632,dmis-lab/biobert-v1.1 +2,0.8863499699338545,0.8564787914003487,0.8711583924349883,0.0012184520054125,0.5897435897435898,0.6330275229357798,0.6106194690265486,0.0061104087704174,dmis-lab/biobert-v1.1 +3,0.8943577430972389,0.8657757117954678,0.8798346619427222,0.0010237175829338,0.56,0.5137614678899083,0.5358851674641149,0.0074517680309677,dmis-lab/biobert-v1.1 +4,0.9334126040428062,0.9122603137710632,0.922715251248898,0.0005286679887375,0.6333333333333333,0.6100917431192661,0.6214953271028038,0.0086744221536299,dmis-lab/biobert-v1.1 +5,0.9378953421506612,0.9477048227774548,0.9427745664739884,0.0003897858330123,0.6035242290748899,0.6284403669724771,0.6157303370786517,0.0095610353724063,dmis-lab/biobert-v1.1 +6,0.97610513739546,0.9494479953515398,0.9625920471281296,0.0002792014398714,0.6292682926829268,0.591743119266055,0.6099290780141844,0.0114098240804965,dmis-lab/biobert-v1.1 +7,0.98022105875509,0.979081929110982,0.9796511627906976,0.0001260129651615,0.6442307692307693,0.6146788990825688,0.6291079812206573,0.0108255702003729,dmis-lab/biobert-v1.1 +8,0.9578587699316627,0.9773387565368972,0.9675007190106416,0.0002701500492631,0.5477178423236515,0.6055045871559633,0.5751633986928105,0.0092899184338211,dmis-lab/biobert-v1.1 +9,0.9786743515850144,0.9866356769320164,0.9826388888888888,0.000155054891968,0.56,0.5779816513761468,0.5688487584650114,0.0110502665181859,dmis-lab/biobert-v1.1 +0,0.4691061787642471,0.4543869843114468,0.4616292798110979,0.0062803071457183,0.4747474747474747,0.4311926605504587,0.4519230769230769,0.0078590019570233,kamalkraj/bioelectra-base-discriminator-pubmed +1,0.6574412532637076,0.7315514235909355,0.6925192519251926,0.0037448749958312,0.5377777777777778,0.555045871559633,0.5462753950338601,0.0062933543499989,kamalkraj/bioelectra-base-discriminator-pubmed +2,0.7613095238095238,0.7431725740848344,0.7521317259629521,0.002391500999978,0.632768361581921,0.5137614678899083,0.5670886075949368,0.007801973784808,kamalkraj/bioelectra-base-discriminator-pubmed +3,0.7838957963291888,0.7693201626961069,0.7765395894428152,0.0019003164857382,0.6258503401360545,0.4220183486238532,0.504109589041096,0.0093831900560941,kamalkraj/bioelectra-base-discriminator-pubmed +4,0.8561682774303582,0.8750726321905868,0.8655172413793104,0.0012710651303067,0.5875706214689266,0.4770642201834862,0.5265822784810126,0.0093757008300879,kamalkraj/bioelectra-base-discriminator-pubmed +5,0.8787541713014461,0.9180708890180128,0.8979823813583404,0.0007755627394604,0.5980861244019139,0.573394495412844,0.5854800936768149,0.0085590263971915,kamalkraj/bioelectra-base-discriminator-pubmed +6,0.8840262582056893,0.9389889599070308,0.9106790645252184,0.000596215127637,0.59,0.5412844036697247,0.5645933014354066,0.0096907748184793,kamalkraj/bioelectra-base-discriminator-pubmed +7,0.8972677595628415,0.9540964555490994,0.9248099127006478,0.0004917199856848,0.6190476190476191,0.536697247706422,0.5749385749385749,0.0097437202569451,kamalkraj/bioelectra-base-discriminator-pubmed +8,0.9074889867841408,0.957582800697269,0.9318631608707943,0.0004322081671558,0.6285714285714286,0.5045871559633027,0.5597964376590332,0.0100404861542091,kamalkraj/bioelectra-base-discriminator-pubmed +9,0.9143798024149288,0.968041836141778,0.9404459497600904,0.0003838363853623,0.6424581005586593,0.5275229357798165,0.5793450881612091,0.0101108695022188,kamalkraj/bioelectra-base-discriminator-pubmed +0,0.7976190476190477,0.1167925624636839,0.203750633552965,0.0039364484687374,0.8235294117647058,0.128440366972477,0.2222222222222222,0.0046765255347753,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +1,0.6521511017838405,0.7222545031958164,0.6854149434794596,0.0022318721390251,0.5569620253164557,0.6055045871559633,0.5802197802197802,0.0030825376833001,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +2,0.8207951070336391,0.7797791981406159,0.7997616209773539,0.001148490275184,0.6766467065868264,0.518348623853211,0.5870129870129871,0.0034743362249663,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +3,0.904639175257732,0.8158047646717025,0.8579285059578369,0.0007540579780346,0.7851851851851852,0.4862385321100917,0.6005665722379604,0.0042694865573699,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +4,0.8415841584158416,0.7408483439860546,0.7880098887515451,0.0009474008796308,0.7372881355932204,0.3990825688073394,0.5178571428571429,0.0045897143248182,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +5,0.9463741620962828,0.9023823358512492,0.9238548483045806,0.0003726831361611,0.6492146596858639,0.5688073394495413,0.6063569682151589,0.0040680365726495,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +6,0.9579326923076924,0.926205694363742,0.9418020679468242,0.0003010669865615,0.634020618556701,0.5642201834862385,0.5970873786407768,0.0042219984864529,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +7,0.9659701492537314,0.9401510749564208,0.9528857479387516,0.0002348045712192,0.6170212765957447,0.5321100917431193,0.5714285714285714,0.0047107137865028,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +8,0.9715302491103204,0.9517722254503196,0.9615497505136484,0.0002065590537426,0.6302083333333334,0.555045871559633,0.5902439024390244,0.0045790973817929,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +9,0.9746909947027664,0.9622312608948286,0.968421052631579,0.0001963171961574,0.6197916666666666,0.5458715596330275,0.5804878048780487,0.0046136955229135,kamalkraj/bioelectra-base-discriminator-pubmed-pmc +0,0.5823250920568122,0.6432306798373039,0.6112644947542794,0.0047873769855605,0.5598086124401914,0.536697247706422,0.5480093676814988,0.006821020031706,allenai/biomed_roberta_base +1,0.7273762094479226,0.7425915165601394,0.7349051178838412,0.0025512914174013,0.605,0.555045871559633,0.5789473684210525,0.0048299089741284,allenai/biomed_roberta_base +2,0.8276820455808782,0.8651946542707728,0.8460227272727273,0.0015586193077212,0.6559633027522935,0.6559633027522935,0.6559633027522935,0.0049602510564279,allenai/biomed_roberta_base +3,0.9316923076923076,0.8797210923881464,0.904961147638972,0.0008306945981727,0.7354838709677419,0.5229357798165137,0.6112600536193029,0.0066258030951408,allenai/biomed_roberta_base +4,0.9145496535796768,0.9203951191167926,0.9174630755864468,0.0005289694210432,0.6614583333333334,0.5825688073394495,0.6195121951219513,0.0063566985132078,allenai/biomed_roberta_base +5,0.9201369081574444,0.937245787332946,0.9286125503742084,0.0005684297260739,0.6747572815533981,0.6376146788990825,0.6556603773584906,0.0077568634072891,allenai/biomed_roberta_base +6,0.9399538106235564,0.94596165020337,0.9429481610194034,0.0004142621845314,0.703125,0.6192660550458715,0.6585365853658536,0.0077284222844844,allenai/biomed_roberta_base +7,0.9730363423212192,0.9645554909936084,0.9687773562882988,0.0002357544045919,0.6572769953051644,0.6422018348623854,0.6496519721577727,0.0077270249099596,allenai/biomed_roberta_base +8,0.9835873388042204,0.9750145264381174,0.9792821709950392,0.0001039359219022,0.6633165829145728,0.6055045871559633,0.6330935251798561,0.0090775635594931,allenai/biomed_roberta_base +9,0.9888758782201406,0.9814061592097618,0.9851268591426072,8.536639519332081e-05,0.6847290640394089,0.6376146788990825,0.6603325415676959,0.0090358798020483,allenai/biomed_roberta_base +0,0.5700655933214073,0.5554909936083672,0.5626839317245438,0.0046403548366969,0.5434782608695652,0.4587155963302752,0.4975124378109452,0.0070160641917027,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +1,0.7484383872799546,0.7658338175479372,0.7570361860999426,0.0023922672038798,0.5707547169811321,0.555045871559633,0.5627906976744187,0.0046140137800158,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +2,0.8672102504368084,0.8651946542707728,0.8662012798138453,0.0013379092282889,0.645933014354067,0.6192660550458715,0.6323185011709601,0.0050376498973211,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +3,0.9299323909035032,0.8791400348634515,0.9038231780167264,0.0007673812743426,0.6722222222222223,0.555045871559633,0.6080402010050252,0.0059753659476812,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +4,0.9442761962447002,0.905868680999419,0.924673784104389,0.0004946950195896,0.6863905325443787,0.5321100917431193,0.599483204134367,0.0070260252463273,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +5,0.9481743227326266,0.9355026147588612,0.9417958467388124,0.0002983262247949,0.675531914893617,0.5825688073394495,0.6256157635467979,0.0078283891444488,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +6,0.9469026548672568,0.9325973271353863,0.939695550351288,0.00036740589761,0.6648936170212766,0.573394495412844,0.6157635467980296,0.0084916663942679,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +7,0.9702797202797204,0.9674607786170832,0.9688681990107652,0.0001456996560918,0.6616161616161617,0.6009174311926605,0.6298076923076923,0.0084550161314102,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +8,0.9753231492361928,0.9645554909936084,0.9699094361671048,0.0001343652934558,0.6458333333333334,0.5688073394495413,0.6048780487804879,0.0095776653344308,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +9,0.9824355971896956,0.9750145264381174,0.978710994459026,8.899824410690069e-05,0.7023809523809523,0.5412844036697247,0.6113989637305699,0.0096863085429784,allenai/dsp_roberta_base_dapt_biomed_tapt_chemprot_4169 +0,0.5576005453306067,0.4753050552004648,0.5131744040150565,0.0048140960023986,0.562874251497006,0.4311926605504587,0.4883116883116883,0.0073224126871746,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +1,0.7309392265193371,0.768739105171412,0.7493627867459643,0.0024784670477163,0.5981308411214953,0.5871559633027523,0.5925925925925924,0.0046508018321429,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +2,0.842344906089926,0.8599651365485182,0.851063829787234,0.0014746889980903,0.6764705882352942,0.6330275229357798,0.6540284360189574,0.0051444524500626,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +3,0.9409061063690084,0.8326554328878559,0.8834771886559803,0.0009044378520399,0.7133757961783439,0.5137614678899083,0.5973333333333334,0.0073484473172217,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +4,0.8964479229379891,0.8651946542707728,0.8805440567711414,0.0010224139348878,0.6683417085427136,0.6100917431192661,0.6378896882494005,0.0058538193001228,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +5,0.9317386231038508,0.9279488669378269,0.929839883551674,0.0004644622576715,0.6741071428571429,0.6926605504587156,0.6832579185520362,0.0063442418803998,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +6,0.9551357733175916,0.9401510749564208,0.947584187408492,0.0002460268728551,0.7094972067039106,0.5825688073394495,0.6397984886649875,0.0073186380194294,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +7,0.9617196702002356,0.9488669378268448,0.9552500731207956,0.0002619934736991,0.6790123456790124,0.5045871559633027,0.5789473684210527,0.0088641496637277,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +8,0.970798569725864,0.946542707728065,0.9585172109443952,0.0002653486265628,0.6559139784946236,0.5596330275229358,0.603960396039604,0.0085492354499785,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +9,0.9803012746234068,0.9831493317838466,0.9817232375979112,7.685106226843406e-05,0.68,0.6238532110091743,0.6507177033492824,0.0094380391595992,allenai/dsp_roberta_base_dapt_biomed_tapt_rct_500 +0,0.5914332784184514,0.6257989540964556,0.6081309994353472,0.0042432136229796,0.5602094240837696,0.4908256880733945,0.5232273838630807,0.0070669199787796,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +1,0.7581267217630854,0.799535153980244,0.7782805429864253,0.0022520124980303,0.583710407239819,0.591743119266055,0.5876993166287016,0.0054150229440822,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +2,0.8958837772397095,0.8599651365485182,0.877557070856804,0.0011157987084341,0.6302083333333334,0.555045871559633,0.5902439024390244,0.0071145912739806,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +3,0.9248075784487862,0.9076118535735038,0.9161290322580644,0.0006286785639453,0.5518867924528302,0.536697247706422,0.5441860465116279,0.0064743001040859,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +4,0.957888493475682,0.9384079023823358,0.9480481361901968,0.0003709253487995,0.6542553191489362,0.5642201834862385,0.6059113300492611,0.0089066058001033,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +5,0.9730046948356808,0.9633933759442184,0.9681751824817516,0.0001877257038992,0.6111111111111112,0.555045871559633,0.5817307692307692,0.0082724127211034,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +6,0.9853629976580796,0.977919814061592,0.9816272965879264,0.0001193045960637,0.6256410256410256,0.5596330275229358,0.5907990314769975,0.0097863351869678,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +7,0.9912126537785588,0.9831493317838466,0.9871645274212368,8.370675835591888e-05,0.6269430051813472,0.555045871559633,0.5888077858880778,0.0100637732164334,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +8,0.9935672514619884,0.9872167344567112,0.9903818128825416,6.94573819148248e-05,0.640625,0.5642201834862385,0.6,0.0103273813379368,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +9,0.9941588785046728,0.988959907030796,0.9915525779201864,6.459210033169161e-05,0.6288659793814433,0.5596330275229358,0.5922330097087379,0.0106192142344438,bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 +0,0.4484629294755877,0.2882045322486926,0.350902016271666,0.0033133041788363,0.44,0.2522935779816513,0.3206997084548105,0.0043410746058305,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +1,0.6689802455953017,0.7280650784427658,0.6972732331663885,0.0017820175691781,0.5154185022026432,0.536697247706422,0.5258426966292136,0.0033373893238604,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +2,0.7942555685814772,0.7873329459616502,0.79077910709075,0.0008109566457909,0.5798816568047337,0.4495412844036697,0.5064599483204134,0.0041059975673516,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +3,0.9233073696824444,0.89540964555491,0.9091445427728616,0.0003581836238635,0.6402439024390244,0.481651376146789,0.5497382198952879,0.0048548033452019,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +4,0.9638922888616892,0.915165601394538,0.9388971684053652,0.0002073908883148,0.6602564102564102,0.4724770642201835,0.5508021390374331,0.0055874938037819,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +5,0.9631760644418872,0.9726902963393376,0.9679098005203816,0.0001112927935551,0.6082474226804123,0.5412844036697247,0.5728155339805825,0.0056629163625005,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +6,0.9866123399301512,0.9848925043579314,0.9857516719976736,6.293504879153596e-05,0.5614035087719298,0.4403669724770642,0.493573264781491,0.0064927516863323,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +7,0.9982517482517482,0.9953515398024404,0.996799534477742,2.6688667656031623e-05,0.6,0.4954128440366973,0.542713567839196,0.0068902641785545,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +8,0.9976744186046512,0.9970947123765252,0.997384481255449,2.0824767083221625e-05,0.5792349726775956,0.4862385321100917,0.5286783042394015,0.006838324220063,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +9,0.9965177016831108,0.9976757699012202,0.997096399535424,1.828364274482976e-05,0.6127167630057804,0.4862385321100917,0.5421994884910486,0.0073069504175621,bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 +0,0.0,0.0,0.0,0.0095988869519629,0.0,0.0,0.0,0.0104230207462723,giacomomiolo/electramed_base_scivocab_1M +1,0.5862068965517241,0.7210923881464265,0.6466909848879625,0.0048840309623657,0.4898785425101215,0.555045871559633,0.5204301075268817,0.0071006233965118,giacomomiolo/electramed_base_scivocab_1M +2,0.7542888765910348,0.7919814061592098,0.7726757369614512,0.0030746205137189,0.5439560439560439,0.4541284403669725,0.495,0.0085968866210001,giacomomiolo/electramed_base_scivocab_1M +3,0.8885595732068761,0.8710052295177223,0.8796948356807511,0.0012375014150852,0.6709677419354839,0.4770642201834862,0.5576407506702412,0.0090500863286881,giacomomiolo/electramed_base_scivocab_1M +4,0.9062870699881376,0.8878558977338756,0.8969768124449662,0.0008266761937267,0.6167664670658682,0.4724770642201835,0.535064935064935,0.0100244664175149,giacomomiolo/electramed_base_scivocab_1M +5,0.947878787878788,0.9087739686228936,0.9279145654108572,0.0005704462216193,0.6594202898550725,0.4174311926605504,0.5112359550561798,0.010945601921636,giacomomiolo/electramed_base_scivocab_1M +6,0.9687684148497347,0.9552585705984892,0.9619660620245758,0.0003341106962642,0.6104651162790697,0.481651376146789,0.5384615384615385,0.0100675556056488,giacomomiolo/electramed_base_scivocab_1M +7,0.9795201872440024,0.9726902963393376,0.9760932944606412,0.0002492334776596,0.601123595505618,0.4908256880733945,0.5404040404040404,0.0101791875562272,giacomomiolo/electramed_base_scivocab_1M +8,0.9835873388042204,0.9750145264381174,0.9792821709950392,0.0001905567116384,0.6395348837209303,0.5045871559633027,0.564102564102564,0.0106736922170966,giacomomiolo/electramed_base_scivocab_1M +9,0.9859649122807016,0.979662986635677,0.982803847274847,0.0001753078257811,0.6470588235294118,0.5045871559633027,0.5670103092783505,0.0113328824652573,giacomomiolo/electramed_base_scivocab_1M +0,0.5419744795164539,0.4689134224288204,0.502803738317757,0.0063274960057693,0.5197368421052632,0.3623853211009174,0.427027027027027,0.0080671308409924,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +1,0.7776456599286563,0.7600232423009878,0.7687334704672347,0.0029335557792868,0.5891089108910891,0.5458715596330275,0.5666666666666665,0.0058505675271869,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +2,0.8660170523751523,0.8262638001162115,0.8456735057983943,0.0014343030650106,0.6900584795321637,0.5412844036697247,0.6066838046272494,0.0068634276226825,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +3,0.8999397227245328,0.8675188843695526,0.8834319526627219,0.0009068784543881,0.6631578947368421,0.5779816513761468,0.6176470588235293,0.0073557823377016,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +4,0.9196428571428572,0.8977338756536897,0.9085563069685384,0.0006870968349184,0.6649746192893401,0.6009174311926605,0.6313253012048192,0.0071391487570228,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +5,0.9404761904761904,0.9180708890180128,0.9291384886798,0.0005131261170731,0.7325581395348837,0.5779816513761468,0.6461538461538462,0.0083849933558107,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +6,0.9445093457943924,0.9395700174317256,0.942033207107486,0.0003301090060639,0.657608695652174,0.555045871559633,0.6019900497512438,0.0086274215173034,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +7,0.9408009286128846,0.9418942475305055,0.9413472706155632,0.0002901702726176,0.7027027027027027,0.5963302752293578,0.6451612903225806,0.0088604635252872,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +8,0.9612634088200238,0.937245787332946,0.9491026772580172,0.0002986379767696,0.7142857142857143,0.5045871559633027,0.5913978494623656,0.0103566944670511,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +9,0.958139534883721,0.957582800697269,0.957861086893345,0.0002102202228761,0.6825396825396826,0.591743119266055,0.6339066339066338,0.0087432921796929,cambridgeltl/SapBERT-from-PubMedBERT-fulltext +0,0.0,0.0,0.0,0.0042977830930931,0.0,0.0,0.0,0.004928735568403,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +1,0.6140524492825334,0.7210923881464265,0.6632816675574559,0.0024507541487296,0.5353982300884956,0.555045871559633,0.5450450450450449,0.003452050848864,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +2,0.7798329355608592,0.7594421847762929,0.7695025022078303,0.0012145310549612,0.6335403726708074,0.4678899082568807,0.5382585751978892,0.003274657871001,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +3,0.9124365482233504,0.8355607205113306,0.8723081589323627,0.00063642124111,0.7417218543046358,0.5137614678899083,0.6070460704607047,0.0038993824810649,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +4,0.9430255402750491,0.8367228355607205,0.8866995073891625,0.0005314021824917,0.752,0.4311926605504587,0.5481049562682216,0.0041466917162044,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +5,0.9377722464219042,0.8756536897152818,0.9056490384615384,0.0003917765766779,0.7463768115942029,0.4724770642201835,0.5786516853932585,0.0045643610736498,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +6,0.944043321299639,0.9116792562463684,0.927579071829737,0.0002602005716026,0.7383720930232558,0.5825688073394495,0.6512820512820513,0.0039257632228187,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +7,0.9507507507507508,0.9198140615920976,0.93502658003544,0.0002430901594411,0.6885245901639344,0.5779816513761468,0.6284289276807979,0.0046609281472718,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +8,0.9496445497630333,0.9314352120859966,0.9404517453798767,0.00019185290051,0.6864864864864865,0.5825688073394495,0.630272952853598,0.004766400079601,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +9,0.9705351773902584,0.9378268448576408,0.953900709219858,0.0001413802582454,0.7755102040816326,0.5229357798165137,0.6246575342465753,0.0052055220406215,cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token +0,0.717948717948718,0.7484020918070889,0.7328591749644382,0.0031955419200411,0.6328502415458938,0.6009174311926605,0.6164705882352941,0.005711731120335,allenai/scibert_scivocab_uncased +1,0.7489733059548255,0.8477629285299244,0.7953120741346417,0.0023905360940815,0.5836734693877551,0.6559633027522935,0.6177105831533476,0.0058496741089934,allenai/scibert_scivocab_uncased +2,0.8653846153846154,0.8890180127832655,0.877042132416165,0.0013450047500216,0.5972222222222222,0.591743119266055,0.5944700460829493,0.0060111101125725,allenai/scibert_scivocab_uncased +3,0.9252625077208152,0.8704241719930274,0.8970059880239521,0.0006752553373334,0.7241379310344828,0.481651376146789,0.578512396694215,0.0078581955637603,allenai/scibert_scivocab_uncased +4,0.9366197183098592,0.927367809413132,0.931970802919708,0.0004994666898228,0.6923076923076923,0.536697247706422,0.6046511627906976,0.0103736563917034,allenai/scibert_scivocab_uncased +5,0.97045191193511,0.9732713538640324,0.9718595880475775,0.0001770448414088,0.6802030456852792,0.6146788990825688,0.6457831325301204,0.009053939469875,allenai/scibert_scivocab_uncased +6,0.9671015314804312,0.9907030796048808,0.978760045924225,0.0002125826466123,0.6355555555555555,0.6559633027522935,0.6455981941309255,0.007684387669612,allenai/scibert_scivocab_uncased +7,0.9883788495061012,0.9883788495061012,0.9883788495061012,7.901200220420109e-05,0.7068965517241379,0.5642201834862385,0.6275510204081632,0.0092123861317164,allenai/scibert_scivocab_uncased +8,0.975287356321839,0.9860546194073212,0.9806414331118172,0.0001552618573908,0.6410256410256411,0.573394495412844,0.6053268765133173,0.0101882727110653,allenai/scibert_scivocab_uncased +9,0.9901105293775452,0.988959907030796,0.9895348837209302,7.341772612822887e-05,0.6631016042780749,0.5688073394495413,0.6123456790123457,0.0108710827771574,allenai/scibert_scivocab_uncased +0,0.6692913385826772,0.6914584543869843,0.6801943412403544,0.0035245931576909,0.6178010471204188,0.5412844036697247,0.5770171149144254,0.0063274969288613,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +1,0.7967884828349945,0.8361417780360255,0.8159909271335414,0.0020619224844843,0.6196581196581197,0.6651376146788991,0.6415929203539824,0.005346596059658,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +2,0.9082297217288336,0.8913422428820453,0.8997067448680353,0.0008745780567279,0.5859030837004405,0.6100917431192661,0.597752808988764,0.0070683217395876,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +3,0.9482653682288495,0.905287623474724,0.9262782401902496,0.0005154847780517,0.6170212765957447,0.5321100917431193,0.5714285714285714,0.0066811043473605,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +4,0.9747356051703878,0.9639744334689134,0.9693251533742332,0.0002260759151331,0.616822429906542,0.6055045871559633,0.611111111111111,0.0075186132095181,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +5,0.99241097489784,0.9877977919814062,0.99009900990099,9.539393686891082e-05,0.6358974358974359,0.5688073394495413,0.6004842615012106,0.0083990592482643,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +6,0.9976717112922002,0.9959325973271352,0.9968013957545798,4.32987623201092e-05,0.6190476190476191,0.5963302752293578,0.6074766355140186,0.0090674257710886,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +7,0.99883788495061,0.99883788495061,0.99883788495061,2.705632749543762e-05,0.6333333333333333,0.6100917431192661,0.6214953271028038,0.009224439539069,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +8,1.0,0.99883788495061,0.9994186046511628,2.2143565664153625e-05,0.6584158415841584,0.6100917431192661,0.6333333333333334,0.009582703115414,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +9,1.0,0.999418942475305,0.999709386806161,2.0765882220028515e-05,0.6551724137931034,0.6100917431192661,0.6318289786223278,0.0097550437897622,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract +0,0.4650735294117647,0.147007553747821,0.2233995584988962,0.0035692179663449,0.3934426229508196,0.110091743119266,0.1720430107526881,0.0042907621604032,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +1,0.6830287206266319,0.7600232423009878,0.7194719471947195,0.0018313721020789,0.6045454545454545,0.6100917431192661,0.6073059360730593,0.0026835869245517,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +2,0.8198573127229488,0.8012783265543288,0.8104613576256244,0.000997333282738,0.6720430107526881,0.573394495412844,0.6188118811881188,0.0029079594309083,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +3,0.9095873786407768,0.8710052295177223,0.889878302166815,0.0005521403110332,0.6593406593406593,0.5504587155963303,0.6,0.0029003086204354,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +4,0.9442355889724312,0.8756536897152818,0.908652396744046,0.0003623299420098,0.7151515151515152,0.5412844036697247,0.6161879895561357,0.0034349323071252,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +5,0.953125,0.9215572341661824,0.9370753323485967,0.000220633798018,0.7241379310344828,0.5779816513761468,0.6428571428571428,0.0037050262147274,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +6,0.947953216374269,0.9418942475305055,0.9449140192363742,0.0001649376506742,0.6804123711340206,0.6055045871559633,0.6407766990291263,0.0036620883575568,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +7,0.9674748669426376,0.9506101104009296,0.958968347010551,0.0001203091757792,0.7159090909090909,0.5779816513761468,0.6395939086294417,0.0039900518873204,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +8,0.9717314487632508,0.9587449157466588,0.9651945013161742,0.0001000739438328,0.7094972067039106,0.5825688073394495,0.6397984886649875,0.0040397230679026,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext +9,0.9747058823529412,0.9628123184195236,0.968722595732242,9.233257107796712e-05,0.7166666666666667,0.591743119266055,0.6482412060301508,0.0040915355650493,microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..e896a06 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers = + slow: mark a test as slow \ No newline at end of file diff --git a/renv.lock b/renv.lock new file mode 100644 index 0000000..47f9769 --- /dev/null +++ b/renv.lock @@ -0,0 +1,657 @@ +{ + "R": { + "Version": "4.0.3", + "Repositories": [ + { + "Name": "CRAN", + "URL": "https://cran.rstudio.com" + } + ] + }, + "Packages": { + "MASS": { + "Package": "MASS", + "Version": "7.3-53", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d1bc1c8e9c0ace57ec9ffea01021d45f" + }, + "Matrix": { + "Package": "Matrix", + "Version": "1.2-18", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "08588806cba69f04797dab50627428ed" + }, + "R6": { + "Package": "R6", + "Version": "2.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "470851b6d5d0ac559e9d01bb352b4021" + }, + "RColorBrewer": { + "Package": "RColorBrewer", + "Version": "1.1-3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "45f0398006e83a5b10b72a90663d8d8c" + }, + "Rcpp": { + "Package": "Rcpp", + "Version": "1.0.9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e9c08b94391e9f3f97355841229124f2" + }, + "RgoogleMaps": { + "Package": "RgoogleMaps", + "Version": "1.4.5.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bffdcd07380ea0a95228588f8aa93734" + }, + "V8": { + "Package": "V8", + "Version": "4.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ebee37dadb0a8f5086663825d2c33076" + }, + "argparse": { + "Package": "argparse", + "Version": "2.1.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "46a8d86acfcbf3af639d6066bc7cc33f" + }, + "askpass": { + "Package": "askpass", + "Version": "1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e8a22846fff485f0be3770c2da758713" + }, + "base64enc": { + "Package": "base64enc", + "Version": "0.1-3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "543776ae6848fde2f48ff3816d0628bc" + }, + "bigD": { + "Package": "bigD", + "Version": "0.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "93637e906f3fe962413912c956eb44db" + }, + "bit": { + "Package": "bit", + "Version": "4.0.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f36715f14d94678eea9933af927bc15d" + }, + "bit64": { + "Package": "bit64", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9fe98599ca456d6552421db0d6772d8f" + }, + "bitops": { + "Package": "bitops", + "Version": "1.0-7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b7d8d8ee39869c18d8846a184dd8a1af" + }, + "bslib": { + "Package": "bslib", + "Version": "0.3.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "56ae7e1987b340186a8a5a157c2ec358" + }, + "cli": { + "Package": "cli", + "Version": "3.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0d297d01734d2bcea40197bd4971a764" + }, + "clipr": { + "Package": "clipr", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3f038e5ac7f41d4ac41ce658c85e3042" + }, + "colorspace": { + "Package": "colorspace", + "Version": "2.0-3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bb4341986bc8b914f0f0acf2e4a3f2f7" + }, + "commonmark": { + "Package": "commonmark", + "Version": "1.8.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b6e3e947d1d7ebf3d2bdcea1bde63fe7" + }, + "cpp11": { + "Package": "cpp11", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fa53ce256cd280f468c080a58ea5ba8c" + }, + "crayon": { + "Package": "crayon", + "Version": "1.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8dc45fd8a1ee067a92b85ef274e66d6a" + }, + "curl": { + "Package": "curl", + "Version": "4.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "022c42d49c28e95d69ca60446dbabf88" + }, + "digest": { + "Package": "digest", + "Version": "0.6.29", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "cf6b206a045a684728c3267ef7596190" + }, + "dplyr": { + "Package": "dplyr", + "Version": "1.0.9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f0bda1627a7f5d3f9a0b5add931596ac" + }, + "ellipsis": { + "Package": "ellipsis", + "Version": "0.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" + }, + "europepmc": { + "Package": "europepmc", + "Version": "0.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7de048ca9ffe0677e3b0caec1c7f837f" + }, + "evaluate": { + "Package": "evaluate", + "Version": "0.15", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "699a7a93d08c962d9f8950b2d7a227f1" + }, + "fansi": { + "Package": "fansi", + "Version": "1.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "83a8afdbe71839506baa9f90eebad7ec" + }, + "farver": { + "Package": "farver", + "Version": "2.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8106d78941f34855c440ddb946b8f7a5" + }, + "fastmap": { + "Package": "fastmap", + "Version": "1.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "77bd60a6157420d4ffa93b27cf6a58b8" + }, + "findpython": { + "Package": "findpython", + "Version": "1.0.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "73cb09f8220dfed388bd89d180f0b922" + }, + "forcats": { + "Package": "forcats", + "Version": "0.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9d95bc88206321cd1bc98480ecfd74bb" + }, + "fs": { + "Package": "fs", + "Version": "1.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7c89603d81793f0d5486d91ab1fc6f1d" + }, + "generics": { + "Package": "generics", + "Version": "0.1.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "15e9634c0fcd294799e9b2e929ed1b86" + }, + "ggmap": { + "Package": "ggmap", + "Version": "3.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c4e0c8c8576ef44121a7447fb6958627" + }, + "ggplot2": { + "Package": "ggplot2", + "Version": "3.3.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0fb26d0674c82705c6b701d1a61e02ea" + }, + "glue": { + "Package": "glue", + "Version": "1.6.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e" + }, + "gt": { + "Package": "gt", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d100be8d1f54dc589cc8d63366839287" + }, + "gtable": { + "Package": "gtable", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ac5c6baf7822ce8732b343f14c072c4d" + }, + "highr": { + "Package": "highr", + "Version": "0.9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8eb36c8125038e648e5d111c0d7b2ed4" + }, + "hms": { + "Package": "hms", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5b8a2dd0fdbe2ab4f6081e6c7be6dfca" + }, + "htmltools": { + "Package": "htmltools", + "Version": "0.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "526c484233f42522278ab06fb185cb26" + }, + "httr": { + "Package": "httr", + "Version": "1.4.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "57557fac46471f0dbbf44705cc6a5c8c" + }, + "isoband": { + "Package": "isoband", + "Version": "0.2.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7ab57a6de7f48a8dc84910d1eca42883" + }, + "jpeg": { + "Package": "jpeg", + "Version": "0.1-10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "031a0b683d001a7519202f0628fc0358" + }, + "jquerylib": { + "Package": "jquerylib", + "Version": "0.1.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5aab57a3bd297eee1c1d862735972182" + }, + "jsonlite": { + "Package": "jsonlite", + "Version": "1.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d07e729b27b372429d42d24d503613a0" + }, + "juicyjuice": { + "Package": "juicyjuice", + "Version": "0.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3bcd11943da509341838da9399e18bce" + }, + "knitr": { + "Package": "knitr", + "Version": "1.39", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "029ab7c4badd3cf8af69016b2ba27493" + }, + "labeling": { + "Package": "labeling", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3d5108641f47470611a32d0bdf357a72" + }, + "lattice": { + "Package": "lattice", + "Version": "0.20-41", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fbd9285028b0263d76d18c95ae51a53d" + }, + "lifecycle": { + "Package": "lifecycle", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a6b6d352e3ed897373ab19d8395c98d0" + }, + "magrittr": { + "Package": "magrittr", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7ce2733a9826b3aeb1775d56fd305472" + }, + "maps": { + "Package": "maps", + "Version": "3.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b3d98a967ec17c80f795719529812fa0" + }, + "mgcv": { + "Package": "mgcv", + "Version": "1.8-33", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "eb7b6439bc6d812eed2cddba5edc6be3" + }, + "mime": { + "Package": "mime", + "Version": "0.12", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "18e9c28c1d3ca1560ce30658b22ce104" + }, + "munsell": { + "Package": "munsell", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6dfe8bf774944bd5595785e3229d8771" + }, + "nlme": { + "Package": "nlme", + "Version": "3.1-149", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7c24ab3a1e3afe50388eb2d893aab255" + }, + "openssl": { + "Package": "openssl", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b9621e75c0652041002a19609fb23c5a" + }, + "pillar": { + "Package": "pillar", + "Version": "1.7.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "51dfc97e1b7069e9f7e6f83f3589c22e" + }, + "pkgconfig": { + "Package": "pkgconfig", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "01f28d4278f15c76cddbea05899c5d6f" + }, + "plyr": { + "Package": "plyr", + "Version": "1.8.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9c17c6ee41639ebdc1d7266546d3b627" + }, + "png": { + "Package": "png", + "Version": "0.1-8", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bd54ba8a0a5faded999a7aab6e46b374" + }, + "prettyunits": { + "Package": "prettyunits", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "95ef9167b75dde9d2ccc3c7528393e7e" + }, + "progress": { + "Package": "progress", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061" + }, + "purrr": { + "Package": "purrr", + "Version": "0.3.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "97def703420c8ab10d8f0e6c72101e02" + }, + "rappdirs": { + "Package": "rappdirs", + "Version": "0.3.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5e3c5dc0b071b21fa128676560dbe94d" + }, + "readr": { + "Package": "readr", + "Version": "2.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9c59de1357dc209868b5feb5c9f0fe2f" + }, + "renv": { + "Package": "renv", + "Version": "0.14.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "30e5eba91b67f7f4d75d31de14bbfbdc" + }, + "rlang": { + "Package": "rlang", + "Version": "1.0.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "04884d9a75d778aca22c7154b8333ec9" + }, + "rmarkdown": { + "Package": "rmarkdown", + "Version": "2.14", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "31b60a882fabfabf6785b8599ffeb8ba" + }, + "sass": { + "Package": "sass", + "Version": "0.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f37c0028d720bab3c513fd65d28c7234" + }, + "scales": { + "Package": "scales", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6e8750cdd13477aa440d453da93d5cac" + }, + "sp": { + "Package": "sp", + "Version": "1.5-1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7b6b37e9d8e3295ccd5a73ac6d62db98" + }, + "stringi": { + "Package": "stringi", + "Version": "1.7.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bba431031d30789535745a9627ac9271" + }, + "stringr": { + "Package": "stringr", + "Version": "1.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0759e6b6c0957edb1311028a49a35e76" + }, + "sys": { + "Package": "sys", + "Version": "3.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b227d13e29222b4574486cfcbde077fa" + }, + "tibble": { + "Package": "tibble", + "Version": "3.1.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "08415af406e3dd75049afef9552e7355" + }, + "tidyr": { + "Package": "tidyr", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d8b95b7fee945d7da6888cf7eb71a49c" + }, + "tidyselect": { + "Package": "tidyselect", + "Version": "1.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "17f6da8cfd7002760a859915ce7eef8f" + }, + "tinytex": { + "Package": "tinytex", + "Version": "0.40", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e7b654da5e77bc4e5435a966329cd25f" + }, + "triebeard": { + "Package": "triebeard", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "847a9d113b78baca4a9a8639609ea228" + }, + "tzdb": { + "Package": "tzdb", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e" + }, + "urltools": { + "Package": "urltools", + "Version": "1.7.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e86a704261a105f4703f653e05defa3e" + }, + "utf8": { + "Package": "utf8", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c9c462b759a5cc844ae25b5942654d13" + }, + "vctrs": { + "Package": "vctrs", + "Version": "0.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8b54f22e2a58c4f275479c92ce041a57" + }, + "viridisLite": { + "Package": "viridisLite", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "55e157e2aa88161bdb0754218470d204" + }, + "vroom": { + "Package": "vroom", + "Version": "1.5.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "976507b5a105bc3bdf6a5a5f29e0684f" + }, + "withr": { + "Package": "withr", + "Version": "2.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c0e49a9760983e81e55cdd9be92e7182" + }, + "xfun": { + "Package": "xfun", + "Version": "0.30", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e83f48136b041845e50a6658feffb197" + }, + "xml2": { + "Package": "xml2", + "Version": "1.3.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "40682ed6a969ea5abfd351eb67833adc" + }, + "yaml": { + "Package": "yaml", + "Version": "2.3.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "458bb38374d73bf83b1bb85e353da200" + } + } +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..51dbd18 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +datasets == 1.18.3 +kaleido == 0.2.1 +nltk == 3.6.1 +numpy == 1.19 +pandas == 1.3.5 +plotly == 5.1.0 +pyyaml +scikit-learn == 0.24.1 +seqeval == 1.2.2 +snakemake == 7.1.1 +torch == 1.9.0 +transformers == 4.16.2 +tqdm == 4.63.0 +pycountry == 22.3.5 +pytest == 6.2.4 +flake8 == 3.9.2 +pylint == 2.8.2 +mypy == 0.812 +pytest-flake8 == 1.0.7 +pytest-pylint == 0.18.0 +pytest-mypy == 0.8.1 +requests == 2.27.1 +urllib3 == 1.26.8 \ No newline at end of file diff --git a/running_pipeline.ipynb b/running_pipeline.ipynb new file mode 100644 index 0000000..269036a --- /dev/null +++ b/running_pipeline.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"private_outputs":true,"authorship_tag":"ABX9TyM0qmq/11ZbcbKfRqcLv4cZ"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"gpuClass":"standard"},"cells":[{"cell_type":"markdown","source":["# Running Training and Prediction Pipeline\n","---\n","This notebook provides all the commands to reproduce the results of training the models, and prediction on the full corpus.\n","\n","This process does not have to be done to update the inventory, but simply to reproduce the reported results, (this is the process used to produce them in the first place).\n","\n","This pipeline has the following steps:\n","\n","* Split the manually curated datasets\n","* Train all models on the classificaiton and NER tasks\n","* Select the best model for each task\n","* Evaluate all models for each task on their test sets\n","* Perform classification of full corpus\n","* Run NER model on predicted biodata resource papers\n","* Extract URLs from predicted positives\n","* Process the predicted names\n","* Perform automated initial deduplication\n","* Flag the inventory for selective manual review\n","\n","### ***Warning***:\n","\n","Running the full pipeline trains many models, and their \"checkpoint\" files are quite large (~0.5GB per model, ~15GB in total). Simply running prediction requires much less resources, including storage space.\n","\n","### Other use-cases\n","\n","If you want to compare a new model to the previously compared models, you can add another row to `config/models_info.tsv`. This pipeline will train this model and compare it to the others. If the other trained model checkpoint files are still present from a previous run, they will not be re-trained during the process.\n","\n","# Setup\n","---\n","### Mount Drive\n","\n","First, mount Google Drive to have access to files necessary for the run:\n"],"metadata":{"id":"x4whPVjZZa7x"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"BmwESzXcjXTb"},"outputs":[],"source":["from google.colab import drive\n","drive.mount('/content/drive')\n","%cd /content/drive/MyDrive/GitHub/inventory_2022/"]},{"cell_type":"markdown","source":["Run the make target to install Python and R dependencies."],"metadata":{"id":"6a7pMnIVbKXE"}},{"cell_type":"code","source":["! make setup"],"metadata":{"id":"iBMUW3C0YIz4"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Obtaining Fine-tuned models\n","\n","All fine-tuned models have been archived. They can be optionally downloaded using the following cell. This cell also splits the training data first so that Snakemake will not automatically retrain the models (the training data is an input to the models, so if it is split after downloading, Snakemake will think the models are out of date)."],"metadata":{"id":"UXM8YkuDMOCI"}},{"cell_type":"code","source":["# Split the labeled data sets\n","! snakemake -s snakemake/train_predict.smk --configfile config/train_predict.yml -c 1 --until split_classif_data\n","! snakemake -s snakemake/train_predict.smk --configfile config/train_predict.yml -c 1 --until split_ner_data\n","\n","# Create output directory\n","! mkdir -p out/\n","\n","# Download models (may take several minutes)\n","! git lfs install\n","! git clone https://huggingface.co/globalbiodata/inventory_2022_all_models\n","\n","# Move models to proper directory and delete unused files\n","! mv inventory_2022_all_models/classification_models/ out/classif_train_out\n","! mv inventory_2022_all_models/ner_models/ out/ner_train_out\n","! rm -rf inventory_2022_all_models\n","! rm -rf out/classif_train_out/best\n","! rm -rf out/ner_train_out/best"],"metadata":{"id":"0UjJBuKpMzCZ"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Running the pipeline\n","---\n","Now, we are ready to run the pipeline\n","\n","## Previewing what has to be done.\n","\n","The following can be run to get a preview of what has to be done."],"metadata":{"id":"XG8imhT0bms7"}},{"cell_type":"code","source":["! make dryrun_reproduction"],"metadata":{"id":"L6sCA8z9nQWZ"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Run it\n","\n","The following cell will run the entire pipeline described above. It takes a while, even with GPU acceleration. Without GPU it will take a very long time, if it is able to finish at all."],"metadata":{"id":"BIyIBNEGcC_u"}},{"cell_type":"code","source":["! make train_and_predict"],"metadata":{"id":"zFSmOvuUnSPE"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Selective Manual Review\n","\n","After running the initial pipeline, the inventory has been flagged for selective manual review.\n","\n","The file to be reviewed is located at:\n","\n","`out/original_query/for_manual_review/predictions.csv`\n","\n","Review the flagged columns according to the instruction sheet ([doi: 10.5281/zenodo.7768363](https://doi.org/10.5281/zenodo.7768363)), then place the manually reviewed file in the following folder:\n","\n","`out/original_query/manually_reviewed/`\n","\n","The file must still be named `predictions.csv`\n"],"metadata":{"id":"eMe39pCwPAoH"}},{"cell_type":"markdown","source":["# Processing Manual Review\n","\n","Next, further processing is performed on the manually reviewed inventory.\n","\n","If you simply want to reproduce the original results (without manually reviewing the inventory or training models), you can copy the files that would have been generated to this point with the following commands. Otherwise, skip this code chunk."],"metadata":{"id":"Cqbody4wPyUM"}},{"cell_type":"code","source":["! mkdir -p out/original_query\n","! mkdir -p out/original_query/manually_reviewed/\n","! cp data/manually_reviewed_inventory.csv out/original_query/manually_reviewed/predictions.csv\n","! mkdir -p out/classif_train_out/combined_train_stats/\n","! cp data/classif_metrics/combined_train_stats.csv out/classif_train_out/combined_train_stats/combined_stats.csv\n","! mkdir -p out/classif_train_out/combined_test_stats/\n","! cp data/classif_metrics/combined_test_stats.csv out/classif_train_out/combined_test_stats/combined_stats.csv\n","! mkdir -p out/ner_train_out/combined_train_stats/\n","! cp data/ner_metrics/combined_train_stats.csv out/ner_train_out/combined_train_stats/combined_stats.csv\n","! mkdir -p out/ner_train_out/combined_test_stats/\n","! cp data/ner_metrics/combined_test_stats.csv out/ner_train_out/combined_test_stats/combined_stats.csv"],"metadata":{"id":"Iya2L4ecM2IG"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["To also skip processing of the manually reviewed file (such as getting metadata from EuropePMC), and just perform data analysis on the original output files, run the following code chunk:"],"metadata":{"id":"Y5ujIDs9NjW8"}},{"cell_type":"code","source":["! mkdir -p out/original_query/processed_countries/\n","! cp data/final_inventory_2022.csv out/original_query/processed_countries/predictions.csv"],"metadata":{"id":"ghybzbpKNu_-"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["Final analysis of the inventory compares the resources found to those in re3data and FAIRsharing. FAIRsharing requires login credentials to use their API. Before proceeding, please create an account at FAIRsharing, and enter your email address and password in the file `config/fairsharing_login.json`."],"metadata":{"id":"DviDeEJVK-eR"}},{"cell_type":"markdown","source":["After manually reviewing the inventory or running the above code chunks to copy the previous files, final processing is performed with the below code chunk:"],"metadata":{"id":"cPk1Ym3MPMXe"}},{"cell_type":"code","source":["! make process_manually_reviewed_original"],"metadata":{"id":"xPoZb6piP7fd"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Results\n","---\n","Once the pipeline everythuing is complete, there are a few important output files\n","\n","\n","## Final inventory\n","\n","The final inventory, including names, URLS, and metadata is found in the file:\n","* `out/original_query/processed_countries/predictions.csv`\n","\n","## Model training stats\n","\n","The per-epoch training statistics for all models are in the files:\n","\n","* `out/classif_train_out/combined_train_stats/combined_stats.csv`\n","* `out/ner_train_out/combined_train_stats/combined_stats.csv`\n","\n","## Test set evaluation\n","\n","Performance measures of the trained models on the test set are located in the files:\n","\n","* `out/classif_train_out/combined_test_stats/combined_stats.csv`\n","* `out/ner_train_out/combined_test_stats/combined_stats.csv`\n","\n","## Selected models\n","The name of the best models are in the files:\n","\n","* `out/classif_train_out/best/best_checkpt.txt`\n","* `out/ner_train_out/best/best_checkpt.txt`\n","\n","## Figures and analyses\n","\n","Figures showing the model performances on the validation sets are present:\n","\n","* `analysis/figures/class_val_set_performances.png`\n","* `analysis/figures/class_val_set_performances.svg`\n","* `analysis/figures/ner_val_set_performances.png`\n","* `analysis/figures/ner_val_set_performances.svg`\n","\n","There are tables of all models' performance on the validation and test sets:\n","\n","* `analysis/figures/combined_classification_table.docx`\n","* `analysis/figures/combined_ner_table.docx`\n","\n","Figures of location data are also output:\n","\n","* `analysis/figures/author_countries.png`\n","* `analysis/figures/ip_coordinates.png`\n","* `analysis/figures/ip_countries.png`\n","\n","Figures/table on text mining potential:\n","\n","* `analysis/figures/text_mining_potential_plot.png`\n","* `analysis/figures/text_mining_potential_plot.svg`\n","* `analysis/figures/text_mining_potential.csv`\n","\n","Comparisons to re3data and FAIRsharing:\n","\n","* `analysis/inventory_re3data_fairsharing_summary.csv`\n","* `analysis/venn_diagram_sets.csv`\n","\n","Finally, some stats on the invenotry are saved:\n","\n","* `analysis/analysed_metadata.txt`"],"metadata":{"id":"RsXV-FmxccZN"}}]} \ No newline at end of file diff --git a/snakemake/README.md b/snakemake/README.md new file mode 100644 index 0000000..3ab4c2c --- /dev/null +++ b/snakemake/README.md @@ -0,0 +1,47 @@ +# Snakemake Pipelines + +Snakemake is used to organize the various steps involved in reproducing the original results or updating the inventory by defining workflows. This allows for easy execution of many steps in a specified order to obtain the desired outputs. + +This directory contains both workflow files, and a file of rules (steps) shared by multiple workflows. + +```sh +. +├── README.md +├── shared_rules.smk # File with shared rules +├── train_predict.smk # Workflow for reproducing results +└── update_inventory.smk # Workflow for updating inventory +``` + +These files should not need to be edited, since they just capture the workflow logic, while configurations are separate and present in [config/](../config/). + +However, if any of these files get edited, it can be nice to format them. This makes them have consistent formatting, and can spot some syntax errors: + +```sh +# To format the Snakefiles +$ snakefmt *.smk +``` + + +## `shared_rules.smk` + +The rules in this file are imported for use in the other workflows by having the line `include: shared_rules.smk` in the workflows. Shared rules are modularized so as to adhere to D.R.Y. (Don't Repeat Yourself) as much as possible. These are mostly downstream steps, since those occur during both the original results and in updating the inventory. + +Note that the rules in this file often specify values obtained from the config file. For instance: + +```python +infile=config["query_out_dir"] + "/query_results.csv", +``` + +Each snakemake workflow will be utilizing a different config file. So the value of `config["query_out_dir"]` will vary based on what workflow is using the rule. That also means that the config file of each workflow using these rules must have the appropriate keys (*e.g.* "query_out_dir"). + +## `train_predict.smk` + +This is the workflow used to obtain the results in the manuscript, and can be run to reproduce our results. Instructions on how to do so are present in the [root README.md](../README.md). + +Configurations for this workflow are present in [train_predict.yml](../config/train_predict.yml) and [models_info.tsv](../config/models_info.tsv) + +## `update_inventory.smk` + +This workflow can be used update the inventory. Instructions on how to do so are present in the [root README.md](../README.md). + +Configurations for this workflow are present in [update_inventory.yml](../config/update_inventory.yml). \ No newline at end of file diff --git a/snakemake/shared_rules.smk b/snakemake/shared_rules.smk new file mode 100644 index 0000000..e74310c --- /dev/null +++ b/snakemake/shared_rules.smk @@ -0,0 +1,177 @@ +# Predict classification of entire corpus +rule classify_papers: + input: + model=config["classif_train_outdir"] + "/best/best_checkpt.txt", + infile=config["query_out_dir"] + "/query_results.csv", + output: + config["classif_out_dir"] + "/predictions.csv", + params: + out_dir=config["classif_out_dir"], + shell: + """ + python3 src/class_predict.py \ + -o {params.out_dir} \ + -i {input.infile} \ + -c "$(< {input.model})" + """ + + +# Filter out only predicted biodata resources +rule filter_positives: + input: + config["classif_out_dir"] + "/predictions.csv", + output: + config["classif_out_dir"] + "/predicted_positives.csv", + shell: + """ + grep -v 'not-bio-resource' {input} > {output} + """ + + +# Predict NER on predicted biodata resource papers +rule ner_predict: + input: + infile=config["classif_out_dir"] + "/predicted_positives.csv", + model=config["ner_train_outdir"] + "/best/best_checkpt.txt", + output: + config["ner_out_dir"] + "/predictions.csv", + params: + out_dir=config["ner_out_dir"], + shell: + """ + python3 src/ner_predict.py \ + -o {params.out_dir} \ + -i {input.infile} \ + -c "$(< {input.model})" + """ + + +# Extract URLs from title and abstract +rule extract_urls: + input: + config["ner_out_dir"] + "/predictions.csv", + output: + config["extract_url_dir"] + "/predictions.csv", + params: + out_dir=config["extract_url_dir"], + max_urls=config["max_urls"], + shell: + """ + python3 src/url_extractor.py \ + -o {params.out_dir} \ + -x {params.max_urls} \ + {input} + """ + + +# Process predcited resource names +rule process_names: + input: + config["extract_url_dir"] + "/predictions.csv", + output: + config["processed_names_dir"] + "/predictions.csv", + params: + out_dir=config["processed_names_dir"], + shell: + """ + python3 src/process_names.py \ + -o {params.out_dir} \ + {input} + """ + + +# Flag rows for manual review +rule flag_for_review: + input: + config["initial_dedupe_dir"] + "/predictions.csv", + output: + config["for_manual_review_dir"] + "/predictions.csv", + params: + out_dir=config["for_manual_review_dir"], + min_prob=config["min_best_name_prob"], + new_dir=config["manually_reviewed_dir"], + shell: + """ + python3 src/flag_for_review.py \ + -o {params.out_dir} \ + -p {params.min_prob} \ + {input} + + mkdir -p {params.new_dir} + echo "Inventory flagged for manual review." + echo "Once manual review is finished place file in {params.new_dir}." + """ + + +# Process manual review +rule process_manual_review: + input: + config["manually_reviewed_dir"] + "/predictions.csv", + output: + config["processed_manual_review"] + "/predictions.csv", + params: + out_dir=config["processed_manual_review"], + shell: + """ + python3 src/process_manual_review.py \ + -o {params.out_dir} \ + {input} + """ + + +# Check URL http statuses +rule check_urls: + input: + config["processed_manual_review"] + "/predictions.csv", + output: + config["check_url_dir"] + "/predictions.csv", + params: + out_dir=config["check_url_dir"], + chunk_size=config["chunk_size"], + num_tries=config["num_tries"], + backoff=config["backoff"], + shell: + """ + python3 src/check_urls.py \ + -s {params.chunk_size} \ + -n {params.num_tries} \ + -b {params.backoff} \ + -o {params.out_dir} \ + {input} + """ + + +# Get additional metadata from EuropePMC +rule get_epmc_meta: + input: + config["check_url_dir"] + "/predictions.csv", + output: + config["epmc_meta_dir"] + "/predictions.csv", + params: + out_dir=config["epmc_meta_dir"], + chunk_size=config["epmc_chunk_size"], + shell: + """ + python3 src/get_meta.py \ + -s {params.chunk_size} \ + -o {params.out_dir} \ + {input} + """ + + +# Process country information +rule process_countries: + input: + config["epmc_meta_dir"] + "/predictions.csv", + output: + config["processed_countries"] + "/predictions.csv", + params: + out_dir=config["processed_countries"], + out_format=config["country_format"], + shell: + """ + python3 src/process_countries.py \ + -o {params.out_dir} \ + -f {params.out_format} \ + {input} + """ diff --git a/snakemake/train_predict.smk b/snakemake/train_predict.smk new file mode 100644 index 0000000..1b9d268 --- /dev/null +++ b/snakemake/train_predict.smk @@ -0,0 +1,495 @@ +import pandas as pd + + +include: "shared_rules.smk" + + +# Import tab separated file containing the configurations +# used for training each model. +model_df = pd.read_table(config["models"]).set_index("model", drop=True) +model_df = model_df.fillna("") + + +rule all: + input: + config["for_manual_review_dir"] + "/predictions.csv", + config["classif_train_outdir"] + "/combined_train_stats/combined_stats.csv", + config["classif_train_outdir"] + "/combined_test_stats/combined_stats.csv", + config["ner_train_outdir"] + "/combined_train_stats/combined_stats.csv", + config["ner_train_outdir"] + "/combined_test_stats/combined_stats.csv", + + +rule all_analysis: + input: + config["processed_countries"] + "/predictions.csv", + config["figures_dir"] + "/class_val_set_performances.svg", + config["figures_dir"] + "/class_val_set_performances.png", + config["figures_dir"] + "/ner_val_set_performances.svg", + config["figures_dir"] + "/ner_val_set_performances.png", + config["figures_dir"] + "/combined_classification_table.docx", + config["figures_dir"] + "/combined_ner_table.docx", + config["figures_dir"] + "/ip_coordinates.png", + config["figures_dir"] + "/ip_countries.png", + config["figures_dir"] + "/author_countries.png", + config["analysis_dir"] + "/analysed_metadata.txt", + config["analysis_dir"] + "/inventory_re3data_fairsharing_summary.csv", + config["analysis_dir"] + "/venn_diagram_sets.csv", + config["figures_dir"] + "/text_mining_potential.csv", + config["figures_dir"] + "/text_mining_potential_plot.png", + config["figures_dir"] + "/text_mining_potential_plot.svg", + config["figures_dir"] + "/inventory_funders.csv", + config["figures_dir"] + "/funders_geo_counts.csv", + config["figures_dir"] + "/funder_countries.png", + + +# Run EruopePMC query +rule query_epmc: + output: + query_results=config["query_out_dir"] + "/query_results.csv", + date_file1=config["query_out_dir"] + "/last_query_dates.txt", + date_file2=config["last_date_dir"] + "/last_query_dates.txt", + params: + out_dir=config["query_out_dir"], + begin_date=config["initial_query_start"], + end_date=config["initial_query_end"], + query=config["query_string"], + shell: + """ + python3 src/query_epmc.py \ + -o {params.out_dir} \ + --from-date {params.begin_date} \ + --to-date {params.end_date} \ + {params.query} + + cp {output.date_file1} {output.date_file2} + """ + + +# Split curated classification set into train, val, and test +rule split_classif_data: + input: + config["classif_data"], + output: + config["classif_splits_dir"] + "/train_paper_classif.csv", + config["classif_splits_dir"] + "/val_paper_classif.csv", + config["classif_splits_dir"] + "/test_paper_classif.csv", + params: + out_dir=config["classif_splits_dir"], + splits=config["split_ratios"], + shell: + """ + python3 src/class_data_generator.py \ + -o {params.out_dir} \ + --splits {params.splits} \ + -r \ + {input} + """ + + +# Train each classifier +rule train_classif: + input: + train=config["classif_splits_dir"] + "/train_paper_classif.csv", + val=config["classif_splits_dir"] + "/val_paper_classif.csv", + output: + config["classif_train_outdir"] + "/{model}/checkpt.pt", + config["classif_train_outdir"] + "/{model}/train_stats.csv", + params: + out_dir=config["classif_train_outdir"] + "/{model}", + metric=config["class_criteria_metric"], + epochs=config["classif_epochs"], + hf_model=lambda w: model_df.loc[w.model, "hf_name"], + batch_size=lambda w: model_df.loc[w.model, "batch_size"], + learn_rate=lambda w: model_df.loc[w.model, "learning_rate"], + weight_decay=lambda w: model_df.loc[w.model, "weight_decay"], + scheduler_flag=lambda w: model_df.loc[w.model, "scheduler"], + log: + config["classif_log_dir"] + "/{model}.log", + benchmark: + config["classif_benchmark_dir"] + "/{model}.txt" + shell: + """ + (python3 src/class_train.py \ + -c {params.metric} \ + -m {params.hf_model} \ + -ne {params.epochs} \ + -t {input.train} \ + -v {input.val} \ + -o {params.out_dir} \ + -batch {params.batch_size} \ + -rate {params.learn_rate} \ + -decay {params.weight_decay} \ + -r \ + {params.scheduler_flag} + )2> {log} + """ + + +# Combine training stats of all models +rule combine_classifier_stats: + input: + expand( + "{d}/{model}/train_stats.csv", + d=config["classif_train_outdir"], + model=model_df.index, + ), + output: + config["classif_train_outdir"] + "/combined_train_stats/combined_stats.csv", + params: + out_dir=config["classif_train_outdir"] + "/combined_train_stats", + shell: + """ + python3 src/combine_stats.py \ + -o {params.out_dir} \ + {input} + """ + + +# Select best trained classifier based on validation set +rule find_best_classifier: + input: + expand( + "{d}/{model}/checkpt.pt", + d=config["classif_train_outdir"], + model=model_df.index, + ), + output: + config["classif_train_outdir"] + "/best/best_checkpt.txt", + params: + out_dir=config["classif_train_outdir"] + "/best", + metric=config["class_criteria_metric"], + shell: + """ + python3 src/model_picker.py \ + -o {params.out_dir} \ + -m {params.metric} \ + {input} + """ + + +# Evaluate classification models on test set +rule evaluate_classifiers_on_test_set: + input: + infile=config["classif_splits_dir"] + "/test_paper_classif.csv", + model=config["classif_train_outdir"] + "/{model}/checkpt.pt", + output: + config["classif_train_outdir"] + "/{model}/test_set_evaluation/metrics.csv", + params: + outdir=config["classif_train_outdir"] + "/{model}/test_set_evaluation", + shell: + """ + python3 src/class_final_eval.py \ + -o {params.outdir} \ + -t {input.infile} \ + -c {input.model} + """ + + +# Combine training stats of all article classification models on test set +rule combine_classifier_test_stats: + input: + expand( + "{d}/{model}/test_set_evaluation/metrics.csv", + d=config["classif_train_outdir"], + model=model_df.index, + ), + output: + config["classif_train_outdir"] + "/combined_test_stats/combined_stats.csv", + params: + out_dir=config["classif_train_outdir"] + "/combined_test_stats", + shell: + """ + python3 src/combine_stats.py \ + -o {params.out_dir} \ + {input} + """ + + +# Split curated NER set into train, val, and test +rule split_ner_data: + input: + config["ner_data"], + output: + config["ner_splits_dir"] + "/train_ner.csv", + config["ner_splits_dir"] + "/val_ner.csv", + config["ner_splits_dir"] + "/test_ner.csv", + config["ner_splits_dir"] + "/train_ner.pkl", + config["ner_splits_dir"] + "/val_ner.pkl", + config["ner_splits_dir"] + "/test_ner.pkl", + params: + out_dir=config["ner_splits_dir"], + splits=config["split_ratios"], + shell: + """ + python3 src/ner_data_generator.py \ + -o {params.out_dir} \ + --splits {params.splits} \ + -r \ + {input} + """ + + +# Train each NER model +rule train_ner: + input: + train=config["ner_splits_dir"] + "/train_ner.pkl", + val=config["ner_splits_dir"] + "/val_ner.pkl", + output: + config["ner_train_outdir"] + "/{model}/checkpt.pt", + config["ner_train_outdir"] + "/{model}/train_stats.csv", + params: + out_dir=config["ner_train_outdir"] + "/{model}", + metric=config["ner_criteria_metric"], + epochs=config["ner_epochs"], + hf_model=lambda w: model_df.loc[w.model, "hf_name"], + batch_size=lambda w: model_df.loc[w.model, "batch_size"], + learn_rate=lambda w: model_df.loc[w.model, "learning_rate"], + weight_decay=lambda w: model_df.loc[w.model, "weight_decay"], + scheduler_flag=lambda w: model_df.loc[w.model, "scheduler"], + log: + config["ner_log_dir"] + "/{model}.log", + benchmark: + config["ner_benchmark_dir"] + "/{model}.txt" + shell: + """ + (python3 src/ner_train.py \ + -c {params.metric} \ + -m {params.hf_model} \ + -ne {params.epochs} \ + -t {input.train} \ + -v {input.val} \ + -o {params.out_dir} \ + -batch {params.batch_size} \ + -rate {params.learn_rate} \ + -decay {params.weight_decay} \ + -r \ + {params.scheduler_flag} + )2> {log} + """ + + +# Combine training stats of all NER models +rule combine_ner_stats: + input: + expand( + "{d}/{model}/train_stats.csv", + d=config["ner_train_outdir"], + model=model_df.index, + ), + output: + config["ner_train_outdir"] + "/combined_train_stats/combined_stats.csv", + params: + out_dir=config["ner_train_outdir"] + "/combined_train_stats", + shell: + """ + python3 src/combine_stats.py \ + -o {params.out_dir} \ + {input} + """ + + +# Select best NER model based on validation set +rule find_best_ner: + input: + expand( + "{d}/{model}/checkpt.pt", + d=config["ner_train_outdir"], + model=model_df.index, + ), + output: + config["ner_train_outdir"] + "/best/best_checkpt.txt", + params: + out_dir=config["ner_train_outdir"] + "/best", + metric=config["ner_criteria_metric"], + shell: + """ + python3 src/model_picker.py \ + -o {params.out_dir} \ + -m {params.metric} \ + {input} + """ + + +# Evaluate NER models on test set +rule evaluate_ner_on_test_set: + input: + infile=config["ner_splits_dir"] + "/test_ner.pkl", + model=config["ner_train_outdir"] + "/{model}/checkpt.pt", + output: + config["ner_train_outdir"] + "/{model}/test_set_evaluation/metrics.csv", + params: + outdir=config["ner_train_outdir"] + "/{model}/test_set_evaluation", + shell: + """ + python3 src/ner_final_eval.py \ + -o {params.outdir} \ + -t {input.infile} \ + -c {input.model} + """ + + +# Combine stats of all NER models on test set +rule combine_ner_test_stats: + input: + expand( + "{d}/{model}/test_set_evaluation/metrics.csv", + d=config["ner_train_outdir"], + model=model_df.index, + ), + output: + config["ner_train_outdir"] + "/combined_test_stats/combined_stats.csv", + params: + out_dir=config["ner_train_outdir"] + "/combined_test_stats", + shell: + """ + python3 src/combine_stats.py \ + -o {params.out_dir} \ + {input} + """ + + +# Perform deduplication on exact match names and URLs +rule initial_deduplication: + input: + config["processed_names_dir"] + "/predictions.csv", + output: + config["initial_dedupe_dir"] + "/predictions.csv", + params: + out_dir=config["initial_dedupe_dir"], + shell: + """ + python3 src/initial_deduplicate.py \ + -o {params.out_dir} \ + {input} + """ + + +# Create model metric plots and tables +rule analyze_performance_metrics: + input: + class_train=config["classification_train_stats"], + class_test=config["classification_test_stats"], + ner_train=config["ner_train_stats"], + ner_test=config["ner_test_stats"], + output: + config["figures_dir"] + "/class_val_set_performances.svg", + config["figures_dir"] + "/class_val_set_performances.png", + config["figures_dir"] + "/ner_val_set_performances.svg", + config["figures_dir"] + "/ner_val_set_performances.png", + config["figures_dir"] + "/combined_classification_table.docx", + config["figures_dir"] + "/combined_ner_table.docx", + params: + out_dir=config["figures_dir"], + shell: + """ + Rscript analysis/performance_metrics.R \ + -o {params.out_dir} \ + -cv {input.class_train} \ + -ct {input.class_test} \ + -nv {input.ner_train} \ + -nt {input.ner_test} + """ + + +# Create location data figures +rule process_location_data: + input: + config["final_inventory_file"], + output: + config["figures_dir"] + "/ip_coordinates.png", + config["figures_dir"] + "/ip_countries.png", + config["figures_dir"] + "/author_countries.png", + params: + out_dir=config["figures_dir"], + shell: + """ + Rscript analysis/location_information.R \ + -o {params.out_dir} \ + {input} + """ + + +# Analyse inventory metadata +rule process_metadata: + input: + config["final_inventory_file"], + output: + config["analysis_dir"] + "/analysed_metadata.txt", + shell: + """ + Rscript analysis/metadata_analysis.R \ + {input} \ + > {output} + """ + + +# Compare against re3data and FAIRsharing +rule compare_repositories: + input: + inventory=config["final_inventory_file"], + output: + config["analysis_dir"] + "/inventory_re3data_fairsharing_summary.csv", + config["analysis_dir"] + "/venn_diagram_sets.csv", + params: + out_dir=config["analysis_dir"], + login=config["fair_login_file"], + shell: + """ + Rscript analysis/comparison.R \ + -o {params.out_dir} \ + -c {params.login} \ + {input.inventory} + """ + + +# Gather and analyze additional metadata from EuropePMC +rule analyze_text_mining_potential: + input: + inventory=config["final_inventory_file"], + output: + config["figures_dir"] + "/text_mining_potential.csv", + config["figures_dir"] + "/text_mining_potential_plot.png", + config["figures_dir"] + "/text_mining_potential_plot.svg", + params: + out_dir=config["figures_dir"], + query=config["query_string"], + shell: + """ + Rscript analysis/epmc_metadata.R \ + -o {params.out_dir} \ + -q {params.query} \ + {input.inventory} + """ + + +# Gather and analyze funder data +rule analyze_funding_agencies: + input: + inventory=config["final_inventory_file"], + output: + config["figures_dir"] + "/inventory_funders.csv", + params: + config["figures_dir"], + shell: + """ + Rscript analysis/funders.R \ + -o {params.out_dir} \ + {input.inventory} + """ + + +# Gather and analyze funder data +rule analyze_funding_countries: + input: + config["curated_funders"], + output: + config["figures_dir"] + "/funders_geo_counts.csv", + config["figures_dir"] + "/funder_countries.png", + params: + config["figures_dir"], + shell: + """ + Rscript analysis/funders.R \ + -o {params.out_dir} \ + {input} + """ diff --git a/snakemake/update_inventory.smk b/snakemake/update_inventory.smk new file mode 100644 index 0000000..e5de36b --- /dev/null +++ b/snakemake/update_inventory.smk @@ -0,0 +1,47 @@ +include: "shared_rules.smk" + + +rule all: + input: + config["for_manual_review_dir"] + "/predictions.csv", + + +# Run EuropePMC query with new dates +rule query_epmc: + output: + query_results=config["query_out_dir"] + "/query_results.csv", + date_file1=config["query_out_dir"] + "/last_query_dates.txt", + date_file2=config["last_date_dir"] + "/last_query_dates.txt", + params: + out_dir=config["query_out_dir"], + query=config["query_string"], + from_date=config["query_from_date"], + to_date=config["query_to_date"], + shell: + """ + python3 src/query_epmc.py \ + -o {params.out_dir} \ + --from-date {params.from_date} \ + --to-date {params.to_date} \ + {params.query} + + cp {output.date_file1} {output.date_file2} + """ + + +# Perform deduplication on exact match names and URLs +rule initial_deduplication: + input: + new_file=config["processed_names_dir"] + "/predictions.csv", + previous_file=config["previous_inventory"], + output: + config["initial_dedupe_dir"] + "/predictions.csv", + params: + out_dir=config["initial_dedupe_dir"], + shell: + """ + python3 src/initial_deduplicate.py \ + -o {params.out_dir} \ + -p {input.previous_file} \ + {input.new_file} + """ diff --git a/src/README.md b/src/README.md new file mode 100644 index 0000000..c520011 --- /dev/null +++ b/src/README.md @@ -0,0 +1,636 @@ +# Overview + +This directory contains the source code used in this project. + +```sh +. +├── inventory_utils/ # Modules used in the project +├── check_urls.py # Gather information from URLs +├── class_data_generator.py # Prepare and split classification data +├── class_final_eval.ppy # Evaluate trained model on test set +├── class_predict.py # Use trained model to predict classification +├── class_train.py # Train classifier +├── combine_stats.py # Combine training/evulation stats files +├── flag_for_review.py # Flag inventory for manual review +├── initial_deduplication.py # Perform initial automated deduplication +├── model_picker.py # Select best trained model +├── ner_data_generator.py # Prepare and split NER data +├── ner_final_eval.py # Evaluate trained model on test set +├── ner_predict.py # Use trained model to perform NER +├── ner_train.py # Train NER model +├── process_countries.py # Process informationa about country codes +├── process_manual_review.py # Process manually reviewed inventory +├── process_names.py # Process predicted names to determine best +├── query_epmc.py # Query EuropePMC +└── url_extractor.py # Extract URLs from text +``` + +## Accessing Help + +Each of the executable scripts listed above will respond to the `-h` or `--help` flag by providing a usage statement. + +For example: +```sh +$ url_extractor.py --help +usage: url_extractor.py [-h] [-o DIR] FILE + +Extract URLs from "text" column of file. + +positional arguments: + FILE Input file (csv) + +optional arguments: + -h, --help show this help message and exit + -o, --out-dir DIR Output directory (default: out/) +``` + +# Running Query + +`query_epmc.py` + +EuropePMC is queried using the query provided. The query can be supplied directly in the command-line (place quotes around it), or can be the name of a file whose only content is the query string. Such a file exists in [config/query.txt](../config/query.txt). + +The query should have the placeholders {0} and {1} for the publication date ranges. This makes the query reuable, and the `-f|--from-date` and `-t|--to-date` are provided at runtime. Again, these can be provided as literal strings, or as text files. + +Dates can be formatted as any of the following: + +* YYYY +* YYYY-MM +* YYYY-MM-DD + +If the query has no placeholders, the `--from-date` and `--to-date` arguments are ignored. + + +Once the query is completed two files are created in `--out-dir`: + +* `last_query_dates.txt`: File with the date range used in the query for later reference (formatted as `from_date`-`to_date`) +* `new_query_results.csv`: Containing IDs, titles, abstracts, and first publication dates from query + +# Data Generation + +`class_data_generator.py` and `ner_data_generator.py` + +The first step for training is processing the manually curated files of labeled data. This includes splitting into training, validation, and testing splits. The proportions assigned to train, val, test splits can be specified with the `--splits` arguement. To make the splits reproducible, the `-r|--seed` flag can be used to make the split non-random and consistent. + +Both scripts output 3 .csv files containing the split data. + +`ner_data_generator.py` outputs 3 additional files (.pkl), which are the inputs to `ner_train.py`. These files contain the tagged tokens for training. + + +# Model training + +`class_train.py` and `ner_train.py` + +These scripts load a pretrained `--model` from HuggingFace, and perform fine-tuning and classifier training. Training is done using the train and val splits from [Data Generation](#Data-Generation). `class_train.py` takes .csv files, while `ner_train.py` takes .pkl files. + +The `-m|--model-name` must be a valid HuggingFace model name, such as those in the "hf_name" column of [the model configuration file](../config/models_info.tsv). + +Several training parameters can be changed, such as learning rate, weight decay, batch size, and number of epochs. A learning rate scheduler can be optionally used. See [../config/README.csv](../config/README.md#modelsinfotsv) for more information on these parameters. + +If it is desired to run training on only a certain number of samples, the `-nt|--num-training` argument can be used. + +Finally, to make training reproducible, the `-r|--seed` option is available. + +Training is run for the number of epochs specified by `-ne|--num-epochs`. The epoch with the most performant model is decided based on the `-c|--metric`, which can be *F*1, precision, or recall. + +Once training is complete, two outputs are created in `--out-dir`: +* `checkpoint.pt`: The trained model checkpoint, which can be used for prediction +* `train_stats.csv`: File containing model performance statistics for each epoch of training. + +# Model selection + +`model_picker.py` + +Once all models that are to be compared have finished training, `model_picker.py` takes all the model checkpoint files as input in order to select the one with the highest validation score. Which metric to use for choosing the best model is passed in as `-m|--metric`. + +One output is created in `--out-dir`: +* best_checkpt.txt`: Text file containing locations of best model checkpoint. + +# Model evaluation + +`class_final_eval.py` and `ner_final_eval.py` + +Final evaluation of the chosen models is performed using `class_final_eval.py` and `ner_final_eval.py` on the witheld test sets. Precision, recall, *F*1 and loss are computed. + +One output file is created in `--out-dir`: +* `{out-dir}/metrics.csv` + +# Prediction + +`class_predict.py` and `ner_predict.py` + +The trained model checkpoint is used to perform prediction. + +## Classification + +`class_predict.py` outputs the same columns that are input (id, title, abstract) plus a `predicted_label` column. This column will contain the values specified using `-desc|--descriptive-labels` (default: 'not-bio-resource' and 'bio-resource'). + +## NER + +NER prediction should only be performed on articles predicted to be (or manually classified as) biodata resources. + +The NER model predicts labels and assigns a probability score for the tokens in the title and abstract. Five labels are used: + +* `B-COM`: Beginning of common name +* `I-COM`: Non-first token of common name +* `B-FUL`: Beginning of full name +* `I-FUL`: Non-first token of full name +* `O`: Otherwise + +The predicted labels for each article are processed together. All named entities (anything that does not have an `O` label) are extracted. The probability of a named entity is taken as the average probability of the tokens composing that entity. + +Named entitities are filtered on several conditions. All of the following are removed: + +* Entities that are a single character +* Entities greater than 100 characters long +* Entities that are actually a URL (contain "http") + +Both categories of names are deduplicated, such that for a given article, if the same name appears multiple times, it is only output once. For those deduplicated names, the highest probability score of the name's occurence is reported. + +In the case that the name appears multiple times, but only differs in case (*e.g.* "Protein Data Bank" vs. "protein data bank"), it is also deduplicated. The version of the name that appears the most is reported, and in the case of a tie, the highest probability version is reported. + +Any records for which no names were predicted are dropped by the `ner_predict.py` script. + +`ner_predict.py` outputs the same input columns, but removes the predicted label column and adds four new columns: + +* `common_name`: Predicted "common name" +* `common_prob`: Probability score of common name label +* `full_name`: Predicted "full name" +* `full_name_prob`: Probability score of full name label + +# Downstream tasks + +Once classification and NER have been performed, other information can be gathered about the predicted resources. These next steps take as input the output from `ner_predict.py`. + +## URL extraction + +`url_extractor.py` is used to extract all unique URLs from the "text" (title + abstract). This is done using a regular expression. + +Any records with either no detected URLs or more URLs than the limit `-x|--max-urls` (default: 2) are dropped from the inventory at this stage. + +## Name Selection + +The NER model may predict multiple common and full names. `process_names.py` selects the common name and full name with the highest probability, as well as the name with the overall highest probability. + +## Initial Deduplication + +Since many resources publish articles periodically to provide updates, many records may describe the same resource. To deduplicate the inventory, an initial automated deduplication is performed. + +`initial_deduplication.py` merges records that have the same `best_name` (name with highest probability) and same extracted URL (ignoring differences due to trailing slashes or difference between "http:" vs "https:"). + +Articles whose best_name probability is below the threshold (default: 0.978) are not merged, even if they appear to be duplicates. For this reason, it is best to use the same probability threshold for this step and the next step (flagging for manual review). + +## Flagging for Manual Review + +During this step, the inventory is marked for manual review. + +Records with a `best_name` probability below the threshold `-p|--min-prob` (default: 0.978) are marked for review in the `low_best_prob` column. + +Articles that have the same `best_name` are maked in the `duplicate_names` column. The values in the column are the IDs of the records that have the same name as the given record. + +Articles that have the same URL are marked in the `duplicate_urls` column. The values are given similar to the `duplicate_names` column. + +## Processing Manually Reviewed Inventory + +Once the flagged inventory has been manually reviewed according to the instructions on Zenodo ([doi: 10.5281/zenodo.7768363](https://doi.org/10.5281/zenodo.7768363)), the determinations made during review are executed (*e.g.* removing certain rows, merging duplicates) by `process_manual_review.py`. + +There are quite a few validations to ensure that the manual review process was conducted in a way that it can be properly processed. If there any errors are discovered during this evaluation, an error message with the ID values of bad rows will be given, as well as a description of the problem(s). + +The `text` column is dropped during this step. + +## Checking URLs + +`check_urls.py` checks each extracted URL by submitting a request. The status of the request (either a status code or the returned error message if an exception occurs) is recorded in a new column labeled extracted_url_status. Rows without URLs are removed, since the inventory requires a URL to identify the resource. + +The number of attempts to request the URL can be modified with the `-n|--num-tries` flag. To avoid exceeding the allowable number of attempts in a certain period of time, the `-b|--backoff` flag is used, where 0 adds no wait time and 1 adds the most wait time. + +Additionally, for URLs that return a status less than 400, various APIs are queried attempting to obtain the geolocation of the IP address which responded to the request. From this, the country and lat, lon coordinates are recorded. + +Then, each URL is submitted to [Internet Archive WaybackMachine](https://archive.org/help/wayback_api.php) to see if there exists an archived snapshot of the given URL. If so, this is marked as the checked URL. + +Since this process can take quite a while, it is implemented to allow for asynchronous parallelization. Each core supplied can submit a request at the same time, and as soon as one core finishes, it submits another. By default all available cores are used, but the desired number of cores can be specified with the `-c|--cores` flag. + +Additionally, a `-v|--verbose` flag is available for debugging. + +*Note* Actual wait time is calculated as + +``` +{backoff factor} * (2 ** ({number of total retries} - 1)) +``` + +So with a back off factor of 0.1, it will sleep for [0.0s, 0.2s, 0.4s, ...]. More information can be found in the [urllib3 documentation](https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#module-urllib3.util.retry) + +## Getting Metadata from EuropePMC + +`get_meta.py` queries EuropePMC to gather further metadata on the IDs that are now in the final inventory. + +In particular, the following columns are created: +`affiliation`: Author affiliations +`authors`: Author names +`grant_ids`: Grant IDs +`grant_agencies`: Unmodified grant agency list +`num_citations`: Number of citations for the paper(s) describing that resource + +Since many resources are described in multiple articles, this information has to be aggregated. To do so, the IDs are separated and queried indenpendently. The information for each resource is then re-joined. All information except for number of citations is just concatenated, using a comma and space to separate the information from each article. The number of citations is summed across the number of citations for each article associated with a given resource. + +The `-s|--chunk-size` parameter determines how many IDs to send to EuropePMC per request. 20 (default) is generally a good number. + +## Process Country Information + +`process_countries.py` porocesses and harmonizes the various information about countries in the inventory. + +During URL checking, the country of the IP address is queried using various APIs, which can give differently formatted country information (US vs USA). There are mentions of countries in the author affiliations. + +The Python library `pycountry` is used to extract the country information, and harmonize any differences. + +The output country code can be of 4 types as defined by [ISO 3166](https://en.wikipedia.org/wiki/ISO_3166). The `-f|--format` option can be used to select the output country code type from `alpha-2`, `alpha-3`, `full`, and `numeric`. + +The `extracted_url_country` column gets overwritten, by replacing country codes with the desired format. + +A new column is added, `affiliation_countries`. This is created by searching for any mentions of countries in the `affiliation` column, which could by in `alpha-2`, `alpha-3`, or `full` format. All country mentions that are found are formatted in the desired countryt code format, and joined with a comma and a space. + +*Note*: Both of these country information columns may not be 100% accurate. The `extracted_url_country` may not be the actual host country. The `affiliation_countries` will miss any countries that are not in one of the 3 standard formats. Additionally, there may be false positives, such as the state of Georgia being labeled as the country Georgia. If the values in the two columns match, that may be a decent indicator that they are accurate. + +# Manual Workflow Examples + +Here, you can find an example of how to run the entire workflow(s) manually from the command-line. This should not be necessary, since there are Snakemake pipelines to automate, and notebooks to guide, the process (see [../README.md](../README.md)). What is shown here is essentially what is run by the Snakemake pipelines. This may be useful for debugging. + +*All commands shown here should be run from the root of the repository* (not from the `src/` folder). + +## Training and Prediction + +### Data Splitting + +First, split the manually curated datasets. We will split into 80% training, 10% validation, 10% test. A random seed is used to ensure that the splits are the same each time this step is run. The choice of output directoriy is arbitrary. In these examples I will follow the schemes used in the Snakemake pipelines. +```sh +$ python3 src/class_data_generator.py \ + --out-dir out/classif_splits \ + --splits 0.8 0.1 0.1 \ + --seed \ + data/manual_classifications.csv +Done. Wrote 3 files to out/classif_splits. + +$ python3 src/ner_data_generator.py \ + --out-dir out/ner_splits \ + --splits 0.8 0.1 0.1 \ + --seed \ + data/manual_ner_extraction.csv +Done. Wrote 6 files to out/ner_splits. +``` + +3 files are created by `class_data_generator.py`, each is a .csv file of the corresponding dataset split. +```sh +$ ls out/classif_splits +test_paper_classif.csv train_paper_classif.csv val_paper_classif.csv +``` + +`ner_data_generator.py` creates 6 files. For each split, 2 files are created: a .csv file and .pkl file. The .pkl file is created because that is the input to the NER training. pkl is a Python Pickle file, which is essentially a way of directly storing a Python object. By storing the object directly, it simplifies reading in the tokenized and annotated data for training. +```sh +$ ls out/ner_splits +test_ner.csv test_ner.pkl train_ner.csv train_ner.pkl val_ner.csv val_ner.pkl +``` + +### Model Training + +Now, training can be performed. For the original project, 15 models were trained for each task (see [../config/models_info.tsv](../config/models_info.tsv) for all the models and their training parameters). For the sake of brevity, I will only demonstrate training two models for each task. + +During training, several messages will be output to the terminal. They are ommitted here. + +First, training the paper classifier: +```sh +$ python3 src/class_train.py \ + --train-file out/classif_splits/train_paper_classif.csv \ + --val-file out/classif_splits/val_paper_classif.csv \ + --model-name bert-base-uncased \ + --out-dir out/classif_train_out/bert \ + --num-epochs 10 \ + --batch-size 16 \ + --learning-rate 3e-5 \ + --weight-decay 0 \ + --seed + +$ python3 src/class_train.py \ + --train-file out/classif_splits/train_paper_classif.csv \ + --val-file out/classif_splits/val_paper_classif.csv \ + --model-name allenai/biomed_roberta_base \ + --out-dir out/classif_train_out/biomed_roberta \ + --num-epochs 10 \ + --batch-size 16 \ + --learning-rate 2e-5 \ + --weight-decay 0 \ + --seed +``` + +After training, two files are created. The model checkpoint, which contains the trained model (along with training metrics), and a .csv file of the performance metrics on the training and validation sets for each epoch of training. The best performing model checkpoint is saved, even if at later epochs the performance drops. +```sh +$ ls out/classif_train_out/bert +checkpt.pt train_stats.csv +``` + +Then triaining the NER model: +```sh +$ python3 src/ner_train.py \ + --train-file out/ner_splits/train_ner.pkl \ + --val-file out/ner_splits/val_ner.pkl \ + --model-name bert-base-uncased \ + --out-dir out/ner_train_out/bert \ + --num-epochs 10 \ + --batch-size 16 \ + --learning-rate 3e-5 \ + --weight-decay 0 \ + --seed + +$ python3 src/ner_train.py \ + --train-file out/ner_splits/train_ner.pkl \ + --val-file out/ner_splits/val_ner.pkl \ + --model-name allenai/biomed_roberta_base \ + --out-dir out/ner_train_out/biomed_roberta \ + --num-epochs 10 \ + --batch-size 16 \ + --learning-rate 2e-5 \ + --weight-decay 0 \ + --seed +``` + +### Model Comparison + +The same program is used to choose both the best classification and NER model. It takes any number of model checkpoints as input. + +```sh +$ python3 src/model_picker.py \ + --out-dir out/classif_train_out/best \ + out/classif_train_out/*/checkpt.pt +Checkpoint of best model is out/classif_train_out/biomed_roberta/checkpt.pt +Done. Wrote output to out/classif_train_out/best/best_checkpt.txt + +$ python3 src/model_picker.py \ + --out-dir out/ner_train_out/best \ + out/ner_train_out/*/checkpt.pt +Checkpoint of best model is out/ner_train_out/biomed_roberta/checkpt.pt +Done. Wrote output to out/ner_train_out/best/best_checkpt.txt +``` + +This creates a text file in the output directory which contains the path of the best model checkpoint. +```sh +$ ls out/classif_train_out/best +best_checkpt.txt + +$ ls out/classif_train_out/best +best_checkpt.txt +``` + +### Model Evaluation + +To estimate how the model will perform on the full dataset and in future runs, the best model is evaluated on the held-out test set. Since the model has not yet seen these data at all, it acts as a representative of new incoming data. + +You can manually supply the path to the best model checkpoint as indicated in the above steps, or just `cat` the contents of the `best_checkpt.txt` file and pipe that into the evaluation command using `/dev/stdin` as shown below. + +```sh +$ cat out/classif_train_out/best/best_checkpt.txt | \ + python3 src/class_final_eval.py \ + --out-dir out/classif_train_out/best/test_set_evaluation \ + --test-file out/classif_splits/test_paper_classif.csv \ + --checkpoint /dev/stdin +Done. Wrote output to out/classif_train_out/best/test_set_evaluation/. + +$ cat out/ner_train_out/best/best_checkpt.txt | \ + python3 src/ner_final_eval.py \ + --out-dir out/ner_train_out/best/test_set_evaluation \ + --test-file out/ner_splits/test_ner.pkl \ + --checkpoint /dev/stdin +Done. Wrote output to out/ner_train_out/best/test_set_evaluation. + +$ ls out/ner_train_out/best/test_set_evaluation +metrics.csv +``` + +### Performing Query + +To get the full list of papers that we will assess, we can obtain the original query. Note that if papers are added retroactively, the yield of this query may change in the future, but should be largely the same. + +```sh +$ python3 src/query_epmc.py \ + --out-dir out/original_query \ + --from-date 2011 \ + --to-date 2021 \ + config/query.txt +Done. Wrote 2 files to out/original_query +``` + +2 Files are written to the output directory. One is the results of the query, the other is a text fle containing today's date + +```sh +$ ls out/original_query +last_query_date.txt query_results.csv +``` + +In order to always have the last query date text file in a known place, copy it over. That way, we can always pass in the file out/last_query_date/last_suery_date.txt when updating the inventory. +```sh +$ cp out/original_query/last_query_date.txt out/last_query_date/ +``` + +### Predicting on Full Query Results + +Now, we have the best trained models, and an indication of how they will perform on new data, so we can run them on the original full corpus. + +First, run classification +```sh +$ cat out/classif_train_out/best/best_checkpt.txt | \ + python3 src/class_predict.py \ + --out-dir out/original_query/classification \ + --input-file out/original_query/query_results.csv \ + --checkpoint /dev/stdin + +$ ls out/original_query/classification +predictions.csv +``` + +If this results in an "Illegal seek" error, you can check the contents of `out/classif_train_out/best/best_checkpt.txt`, and manually supply that as the `--checkpoint` argument. For example: +```sh +$ cat out/classif_train_out/best/best_checkpt.txt +out/classif_train_out/biomedroberta_rct500/checkpt.pt + +$ python3 src/class_predict.py \ + --out-dir out/original_query/classification \ + --input-file out/original_query/query_results.csv \ + --checkpoint out/classif_train_out/biomedroberta_rct500/checkpt.pt + +$ ls out/original_query/classification +predictions.csv +``` + +Filter to include only those papers predicted to describe biodata resources. This can be done with `grep -v` to get lines not containing the negative label. +```sh +$ grep -v 'not-bio-resource' \ + out/original_query/classification/predictions.csv \ + > out/original_query/classification/predicted_positives.csv +``` + +Run NER on the predicted positives +```sh +$ cat out/ner_train_out/best/best_checkpt.txt | \ + python3 src/ner_predict.py \ + --out-dir out/original_query/ner \ + --input-file out/original_query/classification/predicted_positives.csv \ + --checkpoint /dev/stdin + +$ ls out/original_query/ner +predictions.csv +``` + +Extract URLs +```sh +$ python3 src/url_extractor.py \ + --out-dir out/original_query/url_predictions \ + out/original_query/ner/predictions.csv +``` + +Process names +```sh +$ python3 src/process_names.py \ + --out-dir out/original_query/processed_names \ + out/original_query/url_predictions/predictions.csv +``` + +Initial Deduplication +```sh +$ python3 src/initial_deduplicate.py \ + --out-dir out/original_query/initial_deduplication \ + out/original_query/processed_names/predictions.csv +``` + +Flag for selective manual review +```sh +$ python3 src/flag_for_review.py \ + --out-dir out/original_query/manual_review \ + --min-prob 0.978 \ + out/original_query/initial_deduplication/predictions.csv +``` + +Make directory for manually reviewed inventory +```sh +$ mkdir -p out/original_query/manually_reviewed +``` + +At this point, the inventory must be manually reviewed following the instructions. + +Once it has been reviewed, place the file (`predictions.csv`) in the folder created above. + +Process manually reviewed inventory +```sh +$ python3 src/process_manual_review.py \ + --out-dir out/original_query/processed_manual_review \ + out/original_query/manually_reviewed/predictions.csv +``` + +Check URL HTTP statuses +```sh +$ python3 src/check_urls.py \ + --out-dir out/original_query/url_checks \ + --chunk-size 200 \ + --num-tries 3 \ + --backoff 0.5 \ + out/original_query/processed_manual_review/predictions.csv +``` + +If the above gets interrupted, the partially completed output can be supplied as input to resume where it left off +```sh +$ python3 src/check_urls.py \ + --out-dir out/original_query/url_checks \ + --chunk-size 200 \ + --num-tries 3 \ + --backoff 0.5 \ + --partial out/original_query/url_checks/predictions.csv \ + out/original_query/processed_manual_review/predictions.csv +``` + +Get additional metadata from EuropePMC +```sh +$ python3 src/get_meta.py \ + --out-dir out/original_query/epmc_meta \ + --chunk-size 20 \ + out/original_query/url_checks/predictions.csv +``` + +Process country information +```sh +$ python3 src/process_countries.py \ + --out-dir out/original_query/processed_countries \ + out/original_query/epmc_meta/predictions.csv +``` + +## Updating the Inventory + +These commands do not have to be run manually, since there are Snakemake pipeline and notebooks as described in [../README.md](../README.md). This example workflow is provided as additional documentation, and may be useful in debugging. + +### Query EuropePMC + +If this is the first time updating the inventory, the `--from-date` must be supplied manually. Here, I will use the last date from the original inventory. Otherwise, you can use the file resulting from the last run. +```sh +$ python3 src/query_epmc.py + --out-dir out/new_query \ + --from-date out/last_query_date/last_query_date.txt \ + config/query.txt +Done. Wrote 2 files to out/new_query + +$ cp out/new_query/last_query_date.txt out/last_query_date/ +``` + +Two files are output to `--out-dir`: `last_query_date.txt` and `new_query_results.csv`. The former is then used in the next query. + +### Obtain models + +If the best trained models are not present in `out/classif_train_out/best/` and `out/ner_train_out/best/`, then they can be downloaded using the following command. + +```sh +# command to get models +``` + +### Perform predictions and get other information + +Classify the new results: +```sh +$ cat out/classif_train_out/best/best_checkpt.txt | \ + python3 src/class_predict.py \ + --out-dir out/new_query/classification \ + --input-file out/new_query/new_query_results.csv \ + --checkpoint /dev/stdin +``` + +Filter to include only those papers predicted to describe biodata resources. +```sh +$ grep -v 'not-bio-resource' \ + out/new_query/classification/predictions.csv \ + > out/new_query/classification/predicted_positives.csv +``` + +Run NER on the predicted positives +```sh +$ cat out/ner_train_out/best/best_checkpt.txt | \ + python3 src/ner_predict.py \ + --out-dir out/new_query/ner \ + --input-file out/new_query/classification/predicted_positives.csv \ + --checkpoint /dev/stdin +``` + +Extract URLs +```sh +$ python3 src/url_extractor.py \ + --out-dir out/new_query/urls \ + out/new_query/ner/predictions.csv +``` + +Check URLs +```sh +$ python3 src/check_urls.py \ + --out-dir out/new_query/check_urls \ + out/new_query/urls/predictions.csv +``` + +Get other metadata from EuropePMC query +```sh +$ python3 src/get_meta.py \ + --out-dir out/new_query/meta \ + out/new_query/check_urls/predictions.csv +``` + diff --git a/src/check_urls.py b/src/check_urls.py new file mode 100755 index 0000000..b98814d --- /dev/null +++ b/src/check_urls.py @@ -0,0 +1,900 @@ +#!/usr/bin/env python3 +""" +Purpose: Check URL statuses, attempt to Geolocate, and check WayBack +Authors: Kenneth Schackart +""" + +import argparse +import logging +import multiprocessing as mp +import os +import re +import socket +from concurrent.futures import ThreadPoolExecutor +from functools import partial +from multiprocessing.pool import Pool +from typing import List, NamedTuple, Optional, OrderedDict, TextIO, Union, cast + +import numpy as np +import pandas as pd +import pytest +import requests +from pandas.testing import assert_frame_equal +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.wrangling import chunk_rows, join_commas + +# --------------------------------------------------------------------------- +API_REQ_DICT = { + 'ipinfo': 'https://ipinfo.io/{}/json', + 'ip-api': 'http://ip-api.com/json/{}' +} +""" +Dictionary of APIs that geolocate from IP, and their templates. +Fill in template with `API_REQ_DICT[api].format(ip)` + +`key`: API name +`value`: Template with `{}` placeholder for IP address +""" + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + file: TextIO + partial: Optional[TextIO] + out_dir: str + verbose: bool + cores: Optional[int] + chunk_size: Optional[int] + num_tries: int + backoff: float + + +# --------------------------------------------------------------------------- +class URLStatus(NamedTuple): + """ + URL and its returned status and location + + `url`: URL string + `status`: URL status or error message from request + `country`: Geolocated country from IP address + `coordinates`: Geolocated coordinates (lan, lon) from IP address + """ + url: str + status: Union[str, int] + country: str + coordinates: str + + +# --------------------------------------------------------------------------- +class IPLocation(NamedTuple): + """ + IP address location + + `country`: Geolocated country from IP address + `coordinates`: Geolocated coordinates (lan, lon) from IP address + """ + country: str + coordinates: str + + +# --------------------------------------------------------------------------- +@pytest.fixture(name='in_dataframe') +def fixture_in_dataframe() -> requests.Session: + """ A minimal example of input dataframe, not all columns present """ + + in_df = pd.DataFrame([[123, 'Some text', 'http://google.com'], + [456, 'More text', 'http://google.com'], + [789, 'Third text', 'http://foo.com'], + [147, 'Fourth text', 'http://baz.net/']], + columns=['ID', 'text', 'extracted_url']) + + return in_df + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description=('Check extracted URL statuses and ' + 'see if snapshot is in WayBack Machine'), + formatter_class=CustomHelpFormatter) + + inputs = parser.add_argument_group('Inputs and Outputs') + runtime_params = parser.add_argument_group('Runtime Parameters') + url_checking = parser.add_argument_group('URL Requesting') + + inputs.add_argument('file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='CSV File with extracted_url column') + inputs.add_argument('-p', + '--partial', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='Partially completed output file') + inputs.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + + runtime_params.add_argument('-v', + '--verbose', + action='store_true', + help=('Run with debugging messages')) + runtime_params.add_argument('-c', + '--cores', + metavar='CORE', + type=int, + help=('Number of cores for multi-' + 'processing. -c <= 0 will use' + ' -c less than all available. ' + 'If not supplied, threading is used ' + 'instead of multiprocessing. ' + '(default: threading)')) + runtime_params.add_argument('-s', + '--chunk-size', + metavar='INT', + type=int, + help=('Number of rows ' + 'to process at a time. Output ' + 'is appended after each chunk. ' + '(default: all)')) + + url_checking.add_argument('-n', + '--num-tries', + metavar='INT', + type=int, + default=3, + help='Number of tries for checking URL') + url_checking.add_argument('-b', + '--backoff', + metavar='[0-1]', + type=float, + default=0.5, + help='Back-off Factor for retries') + + args = parser.parse_args() + + if not 0 <= args.backoff <= 1: + parser.error(f'--backoff ({args.backoff}) must ' + 'be between 0 and 1, inclusive.') + + if not args.num_tries >= 0: + parser.error(f'--num-tries ({args.num_tries}) must be at least 1') + + return Args(args.file, args.partial, args.out_dir, args.verbose, + args.cores, args.chunk_size, args.num_tries, args.backoff) + + +# --------------------------------------------------------------------------- +def remove_partial(all_df: pd.DataFrame, + partial_df: pd.DataFrame) -> pd.DataFrame: + """ + Remove rows in `all_df` that are in `partial_df`, since their URLs have + already been checked. + + Parameters: + `all_df`: Input dataframe containing all rows + `partial_df`: Dataframe of rows that have been checked + + Return: Dataframe without rows from `partial_df` + """ + + logging.debug('Removing articles present in the partially processed' + ' output file.') + out_df = all_df.copy() + processed_ids = partial_df['ID'] + + logging.debug('Removing %d articles.', len(processed_ids)) + out_df = out_df[~out_df['ID'].isin(processed_ids)] + out_df.reset_index(inplace=True, drop=True) + + return out_df + + +# --------------------------------------------------------------------------- +def test_remove_partial(in_dataframe) -> None: + """ Test remove_partial() """ + + part_df = pd.DataFrame( + [[456, 'More text', 'http://google.com', 200], + [789, 'Third text', 'http://foo.com', 404]], + columns=['ID', 'text', 'extracted_url', 'extracted_url_status']) + + out_df = pd.DataFrame([[123, 'Some text', 'http://google.com'], + [147, 'Fourth text', 'http://baz.net/']], + columns=['ID', 'text', 'extracted_url']) + + assert_frame_equal(remove_partial(in_dataframe, part_df), out_df) + + +# --------------------------------------------------------------------------- +def get_session(tries: int, backoff: float = 0) -> requests.Session: + """ + Establish request `Session` applying tries and backoff + + Parameters: + `tries`: Number of request attempts + `backoff`: Backoff factor to prevent quota error + + Return: A `requests.Session` + """ + + session = requests.Session() + + # total arg is provided as number of retries, so subtract one + retry = Retry(total=tries - 1, backoff_factor=backoff) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + + return session + + +# --------------------------------------------------------------------------- +def test_get_session() -> None: + """ Test get_session() """ + + session = get_session(3, 0.5) + + assert isinstance(session, requests.Session) + assert isinstance(session.adapters, OrderedDict) + assert isinstance(session.adapters['http://'], HTTPAdapter) + assert isinstance(session.adapters['http://'].max_retries, Retry) + assert session.adapters['http://'].max_retries.total == 2 + + +# --------------------------------------------------------------------------- +@pytest.fixture(name='testing_session') +def fixture_testing_session() -> requests.Session: + """ A basic session used for testing requests """ + + return get_session(1, 0) + + +# --------------------------------------------------------------------------- +def remove_missing_urls(df: pd.DataFrame) -> pd.DataFrame: + """ + Remove rows that do not have any URLs + + Parameters: + `df`: Raw dataframe + + Return: Dataframe with no missing URLs + """ + + return df.replace({ + 'extracted_url': '' + }, np.nan).dropna(subset=['extracted_url']) + + +# --------------------------------------------------------------------------- +def test_remove_missing_urls() -> None: + """ Test remove_missing_urls() """ + + in_df = pd.DataFrame( + [[123, 'Some text', 'https://www.google.com, http://google.com'], + [789, 'Foo', 'https://www.amazon.com/afbadfbnvbadfbaefbnaegn'], + [147, 'Blah', '']], + columns=['ID', 'text', 'extracted_url']) + + out_df = pd.DataFrame( + [[123, 'Some text', 'https://www.google.com, http://google.com'], + [789, 'Foo', 'https://www.amazon.com/afbadfbnvbadfbaefbnaegn']], + columns=['ID', 'text', 'extracted_url']) + + assert_frame_equal(remove_missing_urls(in_df), out_df) + + +# --------------------------------------------------------------------------- +def expand_url_col(df: pd.DataFrame) -> pd.DataFrame: + """ + Expand the URL column, by creating a row per URL. + + `df`: Dataframe with extracted_url column + + Return: Dataframe with row per URL + """ + logging.debug('Expanding URL column. One row per URL') + + df['extracted_url'] = df['extracted_url'].str.split(', ') + + df = df.explode('extracted_url') + + df.reset_index(drop=True, inplace=True) + + return df + + +# --------------------------------------------------------------------------- +def test_expand_url_col() -> None: + """ Test expand_url_col() """ + + in_df = pd.DataFrame( + [[123, 'Some text', 'https://www.google.com, http://google.com'], + [789, 'Foo', 'https://www.amazon.com/afbadfbnvbadfbaefbnaegn']], + columns=['ID', 'text', 'extracted_url']) + + out_df = pd.DataFrame( + [[123, 'Some text', 'https://www.google.com'], + [123, 'Some text', 'http://google.com'], + [789, 'Foo', 'https://www.amazon.com/afbadfbnvbadfbaefbnaegn']], + columns=['ID', 'text', 'extracted_url']) + + assert_frame_equal(expand_url_col(in_df), out_df) + + +# --------------------------------------------------------------------------- +def get_pool(cores: int) -> Pool: + """ + Get Pool for multiprocessing. + + Parameters: + `cores`: Number of cores to use. If `cores` >= 1 , use `cores`. + If `cores` <= 0, use available cores - |`cores`| + + Return: + multiprocessing `Pool` + """ + + if cores <= 0: + n_cores = mp.cpu_count() - abs(cores) + else: + n_cores = cores + + logging.debug('Running with %d cores', n_cores) + + return mp.Pool(n_cores) # pylint: disable=consider-using-with + + +# --------------------------------------------------------------------------- +def make_filename(out_dir: str, infile_name: str) -> str: + ''' + Make filename for output reusing input file's basename + + Parameters: + `out_dir`: Output directory + `infile_name`: Input file name + + Return: Output filename + ''' + + return os.path.join(out_dir, os.path.basename(infile_name)) + + +# --------------------------------------------------------------------------- +def test_make_filenames() -> None: + """ Test make_filenames() """ + + assert make_filename( + 'out/checked_urls', + 'out/urls/predictions.csv') == ('out/checked_urls/predictions.csv') + + +# --------------------------------------------------------------------------- +def request_url(url: str, session: requests.Session) -> Union[int, str]: + """ + Check a URL once using try-except to catch exceptions + + Parameters: + `url`: URL string + `session`: request `Session` + + Return: Status code or error message + """ + + try: + r = session.head(url, timeout=5) + except requests.exceptions.RequestException as err: + return str(err) + + return r.status_code + + +# --------------------------------------------------------------------------- +def test_request_url(testing_session: requests.Session) -> None: + """ Test request_url() """ + + # Hopefully, Google doesn't disappear, if it does use a different URL + assert request_url('https://www.google.com', testing_session) == 200 + + # Bad URLs + assert int(request_url('http://google.com', testing_session)) >= 300 + assert int( + request_url('https://www.amazon.com/afbadfbnvbadfbaefbnaegn', + testing_session)) >= 300 + + # Runtime exception + assert request_url('adflkbndijfbn', testing_session) == ( + "Invalid URL 'adflkbndijfbn': No scheme supplied. " + "Perhaps you meant http://adflkbndijfbn?") + + +# --------------------------------------------------------------------------- +def extract_domain(url: str) -> str: + """ + Extract domain name from URL + + Parameters: + `url`: URL string + + Return: Domain string + """ + + domain = re.sub('https?://', '', url) + domain = re.sub('/.*$', '', domain) + + return domain + + +# --------------------------------------------------------------------------- +def test_extract_domain() -> None: + """ Test extract_domain() """ + + assert extract_domain('https://www.google.com') == 'www.google.com' + assert extract_domain('www.google.com') == 'www.google.com' + assert extract_domain( + 'http://proteome.moffitt.org/QUAD/') == 'proteome.moffitt.org' + + +# --------------------------------------------------------------------------- +def query_ip(ip: str, api: str) -> IPLocation: + """ + Query an API to find location from IP address + + Parameters: + `ip`: IP address to check + `api`: API to query + + Return: An `IPLocation` object, which may have empty strings + """ + + logging.debug('Querying %s.', api) + query_template = API_REQ_DICT[api] + + r = requests.get(query_template.format(ip), verify=True) + + if r.status_code != 200: + return IPLocation('', '') + + data = cast(dict, r.json()) + country = data.get('country', '') + latitude, longitude = '', '' + + if api == 'ipinfo': + lat_lon = cast(str, data.get('loc', '')) + lat_lon_split = lat_lon.split(',') + if len(lat_lon_split) == 2: + latitude, longitude = lat_lon_split + elif api == 'ip-api': + latitude = str(data.get('lat', '')) + longitude = str(data.get('lon', '')) + + coordinates = '(' + latitude + ',' + longitude + ')' + ip_location = IPLocation(country, coordinates) + + logging.debug('Obtained IP address location: %s', ip_location.country) + return ip_location + + +# --------------------------------------------------------------------------- +def get_location(url: str) -> IPLocation: + """ + Get location of URL by first fetching the IP address of + the connection, then searching for location of that IP address + + Parameters: + `url`: URL to search + + Return: An `IPLocation` object, which may have empty strings + """ + + logging.debug('Attempting to determine IP address of %s', url) + try: + ip = socket.gethostbyname(extract_domain(url)) + except socket.gaierror: + ip = '' + + if not ip: + logging.debug('IP address for %s could not be determined', url) + return IPLocation('', '') + + logging.debug('IP address found: %s.', ip) + logging.debug('Attempting to geolocate IP address.') + location = query_ip(ip, 'ipinfo') + country = location.country + coordinates = location.coordinates + + if '' in [country, coordinates]: + location = query_ip(ip, 'ip-api') + + # Select non-empty location attributes + country = country if country else location.country + coordinates = coordinates if coordinates else location.coordinates + + logging.debug('Final location information for %s: %s', ip, country) + + return IPLocation(country, coordinates) + + +# --------------------------------------------------------------------------- +def test_get_location() -> None: + """ Test get_location() """ + + location = get_location('https://google.com') + assert location.country != '' + + location = get_location('google.com') + assert location.country != '' + + +# --------------------------------------------------------------------------- +def check_url(url: str, session: requests.Session) -> URLStatus: + """ + Try requesting URL the specified number of tries, returning 200 + if it succeeds at least once + + Parameters: + `url`: URL string + `session`: request `Session` + + Return: `URLStatus` object + """ + + location = IPLocation('', '') + + logging.debug('Requesting %s', url) + status = request_url(url, session) + logging.debug('Returned status for %s: %s', url, str(status)) + + if isinstance(status, int) and status < 400: + location = get_location(url) + + return URLStatus(url, status, location.country, location.coordinates) + + +@pytest.mark.slow +# --------------------------------------------------------------------------- +def test_check_url(testing_session: requests.Session) -> None: + """ Test check_url() """ + + url_status = check_url('https://www.google.com', testing_session) + assert url_status.url == 'https://www.google.com' + assert url_status.status == 200 + + # Bad URLs + url_status = check_url('http://google.com', testing_session) + assert url_status.url == 'http://google.com' + assert isinstance(url_status.status, int) + assert url_status.status >= 300 + + url_status = check_url('https://www.amazon.com/afbadffbaefbnaegn', + testing_session) + assert url_status.url == 'https://www.amazon.com/afbadffbaefbnaegn' + assert isinstance(url_status.status, int) + assert url_status.status >= 400 + assert url_status.country == '' + + # Runtime exception + url_status = check_url('adflkbndijfbn', testing_session) + assert url_status.url == 'adflkbndijfbn' + assert url_status.status == ( + "Invalid URL 'adflkbndijfbn': No scheme supplied. " + "Perhaps you meant http://adflkbndijfbn?") + assert url_status.country == '' + + +# --------------------------------------------------------------------------- +def merge_url_statuses(df: pd.DataFrame, + url_statuses: List[URLStatus]) -> pd.DataFrame: + """ + Create column of URL statuses + + Parameters: + `df`: Dataframe containing extracted_url column + `url_statuses`: List of `URLStatus` objects + + Return: Same dataframe, with additional extracted_url_status column + """ + + url_dict = { + x.url: { + 'status': x.status, + 'country': x.country, + 'coordinates': x.coordinates + } + for x in url_statuses + } + + df['extracted_url_status'] = df['extracted_url'].map( + lambda x: url_dict[x]['status']) + df['extracted_url_country'] = df['extracted_url'].map( + lambda x: url_dict[x]['country']) + df['extracted_url_coordinates'] = df['extracted_url'].map( + lambda x: url_dict[x]['coordinates']) + + return df + + +# --------------------------------------------------------------------------- +def test_merge_url_statuses() -> None: + """ Test merge_url_statuses() """ + + in_df = pd.DataFrame([[123, 'Some text', 'https://www.google.com'], + [456, 'More text', 'http://google.com']], + columns=['ID', 'text', 'extracted_url']) + + statuses = [ + URLStatus('http://google.com', 301, '', ''), + URLStatus('https://www.google.com', 200, 'United States', + '(34.0522,-118.2437)') + ] + + out_df = pd.DataFrame([[ + 123, 'Some text', 'https://www.google.com', 200, 'United States', + '(34.0522,-118.2437)' + ], [456, 'More text', 'http://google.com', 301, '', '']], + columns=[ + 'ID', 'text', 'extracted_url', + 'extracted_url_status', 'extracted_url_country', + 'extracted_url_coordinates' + ]) + + assert_frame_equal(merge_url_statuses(in_df, statuses), out_df) + + +# --------------------------------------------------------------------------- +def check_wayback(url: str) -> str: + """ + Check the WayBack Machine for an archived version of requested URL + + Parameters: + `url`: URL to check + + Return: WayBack snapshot URL or "no_wayback" + """ + + # Not try-except because if there is an exception it is not + # because there is not an archived version, it means the API + # has changed. The code must be updated then. + r = requests.get(f'http://archive.org/wayback/available?url={url}', + headers={'User-agent': 'biodata_resource_inventory'}) + + if r.status_code in [504, 503]: + print('WARNING: WayBack Machine is not responding') + return 'wayback is down' + + returned_dict = cast(dict, r.json()) + snapshots = cast(dict, returned_dict.get('archived_snapshots')) + + if not snapshots: + return 'no_wayback' + + snapshot = cast(dict, snapshots.get('closest')) + + return snapshot.get('url', 'no_wayback') + + +@pytest.mark.slow +# --------------------------------------------------------------------------- +def test_check_wayback() -> None: + """ Test check_wayback() """ + + # Example from their website + assert check_wayback('example.com') != '' + + # Valid URL, but not present as a snapshot + + # Invalid URL + assert check_wayback('aegkbnwefnb') == 'no_wayback' + + +# --------------------------------------------------------------------------- +def check_urls(df: pd.DataFrame, cores: Optional[int], + session: requests.Session) -> pd.DataFrame: + """ + Check all URLs in extracted_url column of dataframe + + Parameters: + `df`: Dataframe with extracted_url column + `cores`: (optional) number of cores to use + `session`: requests `Session` + + Return: Dataframe with extracted_url_status + and wayback_url columns added + """ + + check_url_part = partial(check_url, session=session) + out_df = df.copy() + + if cores is not None: + logging.debug('Starting multiple processes.') + with get_pool(cores) as pool: + logging.debug('Checking extracted URL statuses. ') + + url_statuses = pool.map_async(check_url_part, + out_df['extracted_url']).get() + else: + logging.debug('Starting thread pool.') + with ThreadPoolExecutor() as executor: + logging.debug('Checking extracted URL statuses. ') + + url_statuses = list( + executor.map(check_url_part, out_df['extracted_url'])) + + out_df = merge_url_statuses(out_df, url_statuses) + + logging.debug('Finished checking extracted URLs.') + logging.debug('Checking for snapshots of extracted URLs ' + 'on WayBack Machine.') + + out_df['wayback_url'] = out_df['extracted_url'].map(check_wayback) + + logging.debug('Finished checking WayBack Machine.') + + return out_df + + +@pytest.mark.slow +# --------------------------------------------------------------------------- +def test_check_urls(testing_session: requests.Session) -> None: + """ Test check_urls() """ + + in_df = pd.DataFrame( + [[123, 'Some text', 'https://www.google.com'], + [456, 'More text', 'http://google.com'], + [789, 'Foo', 'https://www.amazon.com/afbadfbnvbadfbaefbnaegn']], + columns=['ID', 'text', 'extracted_url']) + + # Check that it can run with either threading or multiprocessing + # cores == None -> threading + # cores == 0 or -1 -> multiprocessing + for cores in [None, 0, -1]: + returned_df = check_urls(in_df, cores, testing_session) + + # Correct number of rows + assert len(returned_df) == 3 + + # Correct columns + assert (returned_df.columns == [ + 'ID', 'text', 'extracted_url', 'extracted_url_status', + 'extracted_url_country', 'extracted_url_coordinates', 'wayback_url' + ]).all() + + +# --------------------------------------------------------------------------- +def regroup_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Regroup dataframe to contain one row per article, columns may contain + list elements + + `df`: Dataframe with one row per URL + + Return: Dataframe with one row per article + """ + + logging.debug('Collapsing columns. One row per article') + df['extracted_url_status'] = df['extracted_url_status'].astype(str) + df['extracted_url'] = df['extracted_url'].astype(str) + df['wayback_url'] = df['wayback_url'].astype(str) + + out_df = (df.groupby(['ID']).agg({ + 'best_name': 'first', + 'best_name_prob': 'first', + 'best_common': 'first', + 'best_common_prob': 'first', + 'best_full': 'first', + 'best_full_prob': 'first', + 'article_count': 'first', + 'publication_date': 'first', + 'extracted_url': join_commas, + 'extracted_url_status': join_commas, + 'extracted_url_country': join_commas, + 'extracted_url_coordinates': join_commas, + 'wayback_url': join_commas, + }).reset_index()) + + return out_df + + +# --------------------------------------------------------------------------- +def test_regroup_df() -> None: + """ Test regroup_df() """ + + in_df = pd.DataFrame( + [[ + 123, 'Some text', 'google', 0.99, 'google', 0.99, '', '', + 'https://www.google.com', 200, 'US', '(12,10)', 'wayback_google', + '2012-01-01', '2' + ], + [ + 123, 'Some text', 'google', 0.99, 'google', 0.99, '', '', + 'http://google.com', 301, 'US', '(100,17)', 'no_wayback', + '2012-01-01', '2' + ], + [ + 789, 'Foo', 'amazon', 0.87, 'amazon', 0.87, 'The Amazon', 0.65, + 'https://www.amazon.com/afbadfbnvbadfbaefbnaegn', 404, '', '', + 'no_wayback', '2012-01-02', '1' + ]], + columns=[ + 'ID', 'text', 'best_name', 'best_name_prob', 'best_common', + 'best_common_prob', 'best_full', 'best_full_prob', 'extracted_url', + 'extracted_url_status', 'extracted_url_country', + 'extracted_url_coordinates', 'wayback_url', 'publication_date', + 'article_count' + ]) + + out_df = pd.DataFrame( + [[ + 123, 'google', 0.99, 'google', 0.99, '', '', '2', '2012-01-01', + 'https://www.google.com, http://google.com', '200, 301', 'US, US', + '(12,10), (100,17)', 'wayback_google, no_wayback' + ], + [ + 789, 'amazon', 0.87, 'amazon', 0.87, 'The Amazon', 0.65, '1', + '2012-01-02', 'https://www.amazon.com/afbadfbnvbadfbaefbnaegn', + '404', '', '', 'no_wayback' + ]], + columns=[ + 'ID', 'best_name', 'best_name_prob', 'best_common', + 'best_common_prob', 'best_full', 'best_full_prob', 'article_count', + 'publication_date', 'extracted_url', 'extracted_url_status', + 'extracted_url_country', 'extracted_url_coordinates', 'wayback_url' + ]) + + assert_frame_equal(regroup_df(in_df), out_df) + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + outfile = make_filename(out_dir, args.file.name) + + session = get_session(args.num_tries, args.backoff) + + logging.debug('Reading input file: %s.', args.file.name) + df = remove_missing_urls(pd.read_csv(args.file, dtype=str)) + + if args.partial: + part_df = pd.read_csv(args.partial, dtype=str) + df = remove_partial(df, part_df) + part_df.to_csv(outfile, index=False) + + for i, chunk in enumerate(chunk_rows(df, args.chunk_size)): + logging.debug('Processing chunk %d (%d articles).', i + 1, len(chunk)) + df = expand_url_col(chunk) + df = check_urls(df, args.cores, session) + df = regroup_df(df) + + logging.debug('Writing intermediate output to %s.', outfile) + df.to_csv(outfile, + index=False, + mode='a', + header=not os.path.exists(outfile)) + + session.close() + + print(f'Done. Wrote output to {outfile}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/class_data_generator.py b/src/class_data_generator.py new file mode 100755 index 0000000..007b80e --- /dev/null +++ b/src/class_data_generator.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +""" +Purpose: Split curated data into training, validation, and testing sets +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import argparse +import os +import sys +from typing import List, NamedTuple, TextIO + +import pandas as pd +from pandas.testing import assert_frame_equal + +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.wrangling import split_df + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ + Command-line arguments + + `infile`: Input curated data filehandle + `outdir`: Output directory + `splits`: Train, val, test proportions + `seed`: Random seed + """ + infile: TextIO + outdir: str + splits: List[float] + seed: bool + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Split curated classification data', + formatter_class=CustomHelpFormatter) + + parser.add_argument('infile', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + default='data/manual_classifications.csv', + help='Manually classified input file') + parser.add_argument('-o', + '--out-dir', + metavar='', + type=str, + default='data/classif_splits', + help='Output directory') + parser.add_argument('--splits', + metavar='', + type=float, + nargs=3, + default=[0.7, 0.15, 0.15], + help='Proportions for train, val, test splits') + parser.add_argument('-r', + '--seed', + action='store_true', + help='Set random seed') + + args = parser.parse_args() + + if not sum(args.splits) == 1.0: + parser.error(f'--splits {args.splits} must sum to 1') + + return Args(args.infile, args.out_dir, args.splits, args.seed) + + +# --------------------------------------------------------------------------- +def check_input(df: pd.DataFrame) -> None: + """ + Check the input data columns + + Parameters: + `df`: Input dataframe + """ + + exp_cols = ['id', 'title', 'abstract', 'curation_score'] + + if not all(col in df.columns for col in exp_cols): + sys.exit( + f'ERROR: Input data does not have the expected columns: {exp_cols}' + ) + + +# --------------------------------------------------------------------------- +def filter_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Return only data with curation score of 0 or 1 + + Parameters: + `df`: Manually curated data + + Return: Filtered dataframe + """ + + df = df[['id', 'title', 'abstract', 'curation_score']] + + return df[df['curation_score'].isin(['0', '1'])] + + +# --------------------------------------------------------------------------- +def test_filter_data() -> None: + """ Test filter_data() """ + + in_df = pd.DataFrame( + [[123, 'First title', 'First abstract', '0', 'nope'], + [456, 'Second title', 'Second abstract', '1', 'yup'], + [789, 'Third title', 'Third abstract', '0.5', 'unsure']], + columns=['id', 'title', 'abstract', 'curation_score', 'notes']) + + out_df = pd.DataFrame( + [[123, 'First title', 'First abstract', '0'], + [456, 'Second title', 'Second abstract', '1']], + columns=['id', 'title', 'abstract', 'curation_score']) + + assert_frame_equal(filter_data(in_df), out_df, check_dtype=False) + + +# --------------------------------------------------------------------------- +def check_data(df: pd.DataFrame) -> None: + """ + Check that input data is valid, with same numnber of curation scores as + number of unique id's + + Parameters: + `df`: Curated data + """ + + num_certain = df['id'].count() + unique_ids = df['id'].nunique() + + if not num_certain == unique_ids: + sys.exit(f'ERROR: Number of certain scores ({num_certain}) not equal' + f' to number of unique IDs ({unique_ids}).') + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.outdir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + df = pd.read_csv(args.infile, dtype=str) + + check_input(df) + + df = filter_data(df) + + check_data(df) + + train_df, val_df, test_df = split_df(df, args.seed, args.splits) + + train_out, val_out, test_out = map(lambda f: os.path.join(out_dir, f), [ + 'train_paper_classif.csv', 'val_paper_classif.csv', + 'test_paper_classif.csv' + ]) + + train_df.to_csv(train_out, index=False) + val_df.to_csv(val_out, index=False) + test_df.to_csv(test_out, index=False) + + print(f'Done. Wrote 3 files to {out_dir}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/class_final_eval.py b/src/class_final_eval.py new file mode 100755 index 0000000..2223fa0 --- /dev/null +++ b/src/class_final_eval.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Purpose: Conduct evaluation on held-out test split +Authors: Kenneth Schackart +""" + +import argparse +import os +from typing import BinaryIO, List, NamedTuple, TextIO + +from torch.utils.data.dataloader import DataLoader + +from inventory_utils.class_data_handler import (DataFields, RunParams, + get_dataloader) +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.filing import get_classif_model, save_metrics +from inventory_utils.metrics import get_classif_metrics +from inventory_utils.runtime import get_torch_device + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + test_file: TextIO + checkpoint: BinaryIO + out_dir: str + predictive_field: str + descriptive_labels: List[str] + labels_field: str + max_len: int + batch_size: int + + +# --------------------------------------------------------------------------- +def get_args(): + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Evaluate model on held-out test set', + formatter_class=CustomHelpFormatter) + + inputs = parser.add_argument_group('Inputs and Outputs') + data_info = parser.add_argument_group('Information on Data') + model_params = parser.add_argument_group('Model Parameters') + runtime_params = parser.add_argument_group('Runtime Parameters') + + inputs.add_argument('-t', + '--test-file', + metavar='FILE', + type=argparse.FileType('rt'), + required=True, + help='Test data file') + inputs.add_argument('-c', + '--checkpoint', + metavar='PT', + type=argparse.FileType('rb'), + required=True, + help='Trained model checkpoint') + inputs.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + required=True, + help='Directory to output metrics') + + data_info.add_argument('-pred', + '--predictive-field', + metavar='PRED', + type=str, + default='title_abstract', + help='Data column to use for prediction', + choices=['title', 'abstract', 'title_abstract']) + data_info.add_argument('-labs', + '--labels-field', + metavar='LABS', + type=str, + default='curation_score', + help='Data column with classification labels') + data_info.add_argument('-desc', + '--descriptive-labels', + metavar='LAB', + type=str, + nargs=2, + default=['not-bio-resource', 'bio-resource'], + help='Descriptions of the classification labels') + + model_params.add_argument('-max', + '--max-len', + metavar='INT', + type=int, + default=256, + help='Max Sequence Length') + + runtime_params.add_argument('-batch', + '--batch-size', + metavar='INT', + type=int, + default=8, + help='Batch Size') + + args = parser.parse_args() + + return Args(args.test_file, args.checkpoint, args.out_dir, + args.predictive_field, args.descriptive_labels, + args.labels_field, args.max_len, args.batch_size) + + +# --------------------------------------------------------------------------- +def get_test_dataloader(args: Args, model_name: str) -> DataLoader: + """ + Generate the dataloaders + + Parameters: + `args`: Command-line arguments + `model_name`: HuggingFace model name + + Return: + Test `DataLoader` + """ + + data_fields = DataFields( + args.predictive_field, + args.descriptive_labels, + args.labels_field, + ) + + dataloader_params = RunParams(model_name, args.batch_size, args.max_len) + + dataloader = get_dataloader(args.test_file, data_fields, dataloader_params) + + return dataloader + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + out_file = os.path.join(args.out_dir, 'metrics.csv') + + device = get_torch_device() + + model, model_name = get_classif_model(args.checkpoint, device) + + dataloader = get_test_dataloader(args, model_name) + + test_metrics = get_classif_metrics(model, dataloader, device) + + save_metrics(model_name, test_metrics, out_file) + + print(f'Done. Wrote output to {out_dir}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/class_predict.py b/src/class_predict.py new file mode 100755 index 0000000..04c7be6 --- /dev/null +++ b/src/class_predict.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +""" +Purpose: Use trained BERT model for article classification +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import argparse +import os +from typing import BinaryIO, List, NamedTuple, TextIO + +import pandas as pd +import torch +from datasets import ClassLabel +from torch.utils.data.dataloader import DataLoader + +from inventory_utils.class_data_handler import (DataFields, RunParams, + get_dataloader) +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.filing import get_classif_model +from inventory_utils.runtime import get_torch_device + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + checkpoint: BinaryIO + infile: TextIO + out_dir: str + predictive_field: str + descriptive_labels: List[str] + max_len: int + batch_size: int + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Predict article classifications using trained BERT model', + formatter_class=CustomHelpFormatter) + + inputs = parser.add_argument_group('Inputs and Outputs') + data_info = parser.add_argument_group('Information on Data') + model_params = parser.add_argument_group('Model Parameters') + runtime_params = parser.add_argument_group('Runtime Parameters') + + inputs.add_argument('-c', + '--checkpoint', + metavar='CHKPT', + type=argparse.FileType('rb'), + required=True, + help='Trained model checkpoint') + inputs.add_argument('-i', + '--input-file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + required=True, + help='Input file for prediction') + inputs.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Directory to output predictions') + + data_info.add_argument('-pred', + '--predictive-field', + metavar='PRED', + type=str, + default='title_abstract', + help='Data column to use for prediction', + choices=['title', 'abstract', 'title_abstract']) + data_info.add_argument('-desc', + '--descriptive-labels', + metavar='LAB', + type=str, + nargs=2, + default=['not-bio-resource', 'bio-resource'], + help='Descriptions of the classification labels') + + model_params.add_argument('-max', + '--max-len', + metavar='INT', + type=int, + default=256, + help='Max Sequence Length') + + runtime_params.add_argument('-batch', + '--batch-size', + metavar='INT', + type=int, + default=8, + help='Batch Size') + + args = parser.parse_args() + + return Args(args.checkpoint, args.input_file, args.out_dir, + args.predictive_field, args.descriptive_labels, args.max_len, + args.batch_size) + + +# --------------------------------------------------------------------------- +def get_dataloaders(args: Args, model_name: str) -> DataLoader: + """ + Generate the dataloaders + + Parameters: + `args`: Command-line arguments + `model_name`: Huggingface model name + + Return: + A `DataLoader` of preprocessed data + """ + + data_fields = DataFields(args.predictive_field, args.descriptive_labels) + + dataloader_params = RunParams(model_name, args.batch_size, args.max_len) + + dataloader = get_dataloader(args.infile, data_fields, dataloader_params) + + return dataloader + + +# --------------------------------------------------------------------------- +def predict(model, dataloader: DataLoader, class_labels: ClassLabel, + device: torch.device) -> List[str]: + """ + Use model to predict article classifications + + Parameters: + `model`: Pretrained predictive model + `dataloader`: `DataLoader` with preprocessed data + `class_labels`: Class labels to apply in prediction + `device`: The `torch.device` to use + + Return: + List of predicted labels + """ + + all_predictions = [] + model.eval() + for batch in dataloader: + batch = {k: v.to(device) for k, v in batch.items()} + with torch.no_grad(): + outputs = model(**batch) + logits = outputs.logits + predictions = torch.argmax(logits, dim=-1).cpu().numpy() + all_predictions.extend(predictions) + + predicted_labels = [class_labels.int2str(int(x)) for x in all_predictions] + + return predicted_labels + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + + if not os.path.isdir(args.out_dir): + os.makedirs(args.out_dir) + + out_file = os.path.join(args.out_dir, 'predictions.csv') + + device = get_torch_device() + + model, model_name = get_classif_model(args.checkpoint, device) + + dataloader = get_dataloaders(args, model_name) + + class_labels = ClassLabel(num_classes=2, names=args.descriptive_labels) + + # Predict labels + df = pd.read_csv(open(args.infile.name, encoding='ISO-8859-1'), dtype=str) + df = df.fillna('') + df = df[~df.duplicated('id')] + df = df[df['id'] != ''] + predicted_labels = predict(model, dataloader, class_labels, device) + df['predicted_label'] = predicted_labels + + # Save labels to file + df = df.replace(r'\n', ' ', regex=True) + df.to_csv(out_file, index=False) + print('Done. Saved predictions to', out_file) + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/class_train.py b/src/class_train.py new file mode 100755 index 0000000..c8e8a2f --- /dev/null +++ b/src/class_train.py @@ -0,0 +1,387 @@ +#!/usr/bin/env python3 +""" +Purpose: Train BERT model for article classification +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import argparse +import copy +import os +from typing import Any, List, NamedTuple, TextIO, Tuple + +import pandas as pd +import torch +from torch.utils.data.dataloader import DataLoader +from tqdm.auto import tqdm +from transformers import (AdamW, AutoModelForSequenceClassification, + get_scheduler) + +from inventory_utils.class_data_handler import (DataFields, RunParams, + get_dataloader) +from inventory_utils.custom_classes import (CustomHelpFormatter, Metrics, + Settings) +from inventory_utils.filing import make_filenames, save_model, save_train_stats +from inventory_utils.metrics import get_classif_metrics +from inventory_utils.runtime import set_random_seed + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + train_file: TextIO + val_file: TextIO + out_dir: str + metric: str + predictive_field: str + labels_field: str + descriptive_labels: List[str] + model_name: str + max_len: int + learning_rate: float + weight_decay: float + num_training: int + num_epochs: int + batch_size: int + lr_scheduler: bool + seed: bool + + +# --------------------------------------------------------------------------- +def get_args(): + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Train BERT model for article classification', + formatter_class=CustomHelpFormatter) + + inputs = parser.add_argument_group('Inputs and Outputs') + data_info = parser.add_argument_group('Information on Data') + model_params = parser.add_argument_group('Model Parameters') + runtime_params = parser.add_argument_group('Runtime Parameters') + + inputs.add_argument('-t', + '--train-file', + metavar='FILE', + type=argparse.FileType('rt'), + required=True, + help='Training data file') + inputs.add_argument('-v', + '--val-file', + metavar='FILE', + type=argparse.FileType('rt'), + required=True, + help='Validation data file') + inputs.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Directory to output checkpt and loss plot') + + data_info.add_argument('-c', + '--metric', + metavar='METRIC', + choices=['f1', 'precision', 'recall'], + default='f1', + type=str, + help='Metric to use for choosing best model epoch') + data_info.add_argument('-pred', + '--predictive-field', + metavar='PRED', + type=str, + default='title_abstract', + help='Data column to use for prediction', + choices=['title', 'abstract', 'title_abstract']) + data_info.add_argument('-labs', + '--labels-field', + metavar='LABS', + type=str, + default='curation_score', + help='Data column with classification labels') + data_info.add_argument('-desc', + '--descriptive-labels', + metavar='LAB', + type=str, + nargs=2, + default=['not-bio-resource', 'bio-resource'], + help='Descriptions of the classification labels') + + model_params.add_argument('-m', + '--model-name', + metavar='MODEL', + type=str, + required=True, + help='Name of model') + model_params.add_argument('-max', + '--max-len', + metavar='INT', + type=int, + default=256, + help='Max Sequence Length') + model_params.add_argument('-rate', + '--learning-rate', + metavar='NUM', + type=float, + default=2e-5, + help='Learning Rate') + model_params.add_argument('-decay', + '--weight-decay', + metavar='NUM', + type=float, + default=0.0, + help='Weight Decay for Learning Rate') + + runtime_params.add_argument( + '-nt', + '--num-training', + metavar='INT', + type=int, + default=None, + help='Number of data points for training (default: all)') + runtime_params.add_argument('-ne', + '--num-epochs', + metavar='INT', + type=int, + default=10, + help='Number of Epochs') + runtime_params.add_argument('-batch', + '--batch-size', + metavar='INT', + type=int, + default=32, + help='Batch Size') + runtime_params.add_argument('-lr', + '--lr-scheduler', + action='store_true', + help='Use a Learning Rate Scheduler') + runtime_params.add_argument('-r', + '--seed', + action='store_true', + help='Set random seed') + + args = parser.parse_args() + + return Args(args.train_file, args.val_file, args.out_dir, args.metric, + args.predictive_field, args.labels_field, + args.descriptive_labels, args.model_name, args.max_len, + args.learning_rate, args.weight_decay, args.num_training, + args.num_epochs, args.batch_size, args.lr_scheduler, args.seed) + + +# --------------------------------------------------------------------------- +def train(settings: Settings, + crit_metric: str) -> Tuple[Any, pd.DataFrame, Metrics, Metrics]: + """ + Train the classifier + + Parameters: + `settings`: Model settings (NamedTuple) + `crit_metric`: Metric used for selecting best epoch + + Return: Tuple of best model, training stats dataframe, train_metrics, + and validation_metrics + """ + + model = settings.model + progress_bar = tqdm(range(settings.num_training_steps)) + train_progress = pd.DataFrame(columns=[ + 'epoch', 'train_precision', 'train_recall', 'train_f1', 'train_loss', + 'val_precision', 'val_recall', 'val_f1', 'val_loss' + ]) + best_model = model + best_val = Metrics(0, 0, 0, 0) + best_train = Metrics(0, 0, 0, 0) + model.train() + + for epoch in range(settings.num_epochs): + + train_loss = train_epoch(settings, progress_bar) + + model.eval() + train_metrics = get_classif_metrics(model, settings.train_dataloader, + settings.device) + val_metrics = get_classif_metrics(model, settings.val_dataloader, + settings.device) + + if getattr(val_metrics, crit_metric) > getattr(best_val, crit_metric): + best_val = val_metrics + best_train = train_metrics + best_model = copy.deepcopy(model) + + epoch_row = pd.DataFrame( + { + 'epoch': epoch, + 'train_precision': train_metrics.precision, + 'train_recall': train_metrics.recall, + 'train_f1': train_metrics.f1, + 'train_loss': train_metrics.loss, + 'val_precision': val_metrics.precision, + 'val_recall': val_metrics.recall, + 'val_f1': val_metrics.f1, + 'val_loss': val_metrics.loss + }, + index=[0]) + train_progress = pd.concat([train_progress, epoch_row]) + + print(f'Epoch {epoch + 1}:\n' + f'Train Loss: {train_loss:.5f}\n' + f'Val Loss: {val_metrics.loss:.5f}\n' + f'Train Precision: {train_metrics.precision:.3f}\n' + f'Train Recall: {train_metrics.recall:.3f}\n' + f'Train F1: {train_metrics.f1:.3f}\n' + f'Val Precision: {val_metrics.precision:.3f}\n' + f'Val Recall: {val_metrics.recall:.3f}\n' + f'Val F1: {val_metrics.f1:.3f}') + + print('Finished model training!') + print('=' * 30) + print(f'Best Train Precision: {best_train.precision:.3f}\n' + f'Best Train Recall: {best_train.recall:.3f}\n' + f'Best Train F1: {best_train.f1:.3f}\n' + f'Best Val Precision: {best_val.precision:.3f}\n' + f'Best Val Recall: {best_val.recall:.3f}\n' + f'Best Val F1: {best_val.f1:.3f}\n') + + return best_model, train_progress, best_train, best_val + + +# --------------------------------------------------------------------------- +def train_epoch(settings: Settings, progress_bar: tqdm) -> float: + """ + Perform one epoch of model training + + Parameters: + `settings`: Model settings (NamedTuple) + `progress_bar`: tqdm instance for tracking progress + + Return: Average train loss per observation + """ + train_loss = 0 + num_train = 0 + for batch in settings.train_dataloader: + batch = {k: v.to(settings.device) for k, v in batch.items()} + num_train += len(batch['input_ids']) + outputs = settings.model(**batch) + loss = outputs.loss + loss.backward() + train_loss += loss.item() + settings.optimizer.step() + if settings.lr_scheduler: + settings.lr_scheduler.step() + settings.optimizer.zero_grad() + progress_bar.update(1) + return train_loss / num_train + + +# --------------------------------------------------------------------------- +def get_dataloaders(args: Args, + model_name: str) -> Tuple[DataLoader, DataLoader]: + """ + Generate the dataloaders + + Parameters: + `args`: Command-line arguments + `model_name`: Huggingface model name + + Return: + A Tuple of trianing, validation `DataLoader`s + """ + + print('Generating dataloaders ...') + print('=' * 30) + + data_fields = DataFields( + args.predictive_field, + args.descriptive_labels, + args.labels_field, + ) + + dataloader_params = RunParams(model_name, args.batch_size, args.max_len, + args.num_training) + + train_dataloader = get_dataloader(args.train_file, data_fields, + dataloader_params) + val_dataloader = get_dataloader(args.val_file, data_fields, + dataloader_params) + + print('Finished generating dataloaders!') + print('=' * 30) + + return train_dataloader, val_dataloader + + +# --------------------------------------------------------------------------- +def initialize_model(model_name: str, args: Args, train_dataloader: DataLoader, + val_dataloader: DataLoader) -> Settings: + """ + Instatiate predictive model from HFHub and get settings + + Params: + `model_name`: Pretrained model name + `args`: Command-line arguments + `trin_dataloader`: Training `DataLoader` + `val_dataloader`: Validation `DataLoader` + + Return: + `Settings` including pretrained model + """ + + print(f'Initializing {model_name} model ...') + print('=' * 30) + model = AutoModelForSequenceClassification.from_pretrained(model_name, + num_labels=2) + optimizer = AdamW(model.parameters(), + lr=args.learning_rate, + weight_decay=args.weight_decay) + num_training_steps = args.num_epochs * len(train_dataloader) + if args.lr_scheduler: + lr_scheduler = get_scheduler('linear', + optimizer=optimizer, + num_warmup_steps=0, + num_training_steps=num_training_steps) + else: + lr_scheduler = None + device = torch.device( + 'cuda') if torch.cuda.is_available() else torch.device('cpu') + model.to(device) + + return Settings(model, optimizer, train_dataloader, val_dataloader, + lr_scheduler, args.num_epochs, num_training_steps, device) + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + model_name = args.model_name + + train_dataloader, val_dataloader = get_dataloaders(args, model_name) + + if args.seed: + set_random_seed(45) + settings = initialize_model(model_name, args, train_dataloader, + val_dataloader) + + print('Starting model training...') + print('=' * 30) + + model, train_stats_df, train_metrics, val_metrics = train( + settings, args.metric) + train_stats_df['model_name'] = model_name + + checkpt_filename, train_stats_filename = make_filenames(out_dir) + save_model(model, model_name, train_metrics, val_metrics, checkpt_filename) + save_train_stats(train_stats_df, train_stats_filename) + + print('Done. Saved best checkpoint to', checkpt_filename) + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/combine_stats.py b/src/combine_stats.py new file mode 100755 index 0000000..bc0cbd3 --- /dev/null +++ b/src/combine_stats.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +""" +Purpose: Combine scores during training of all models +Authors: Kenneth Schackart +""" + +import argparse +import os +from typing import List, NamedTuple, TextIO + +import pandas as pd + +from inventory_utils.custom_classes import CustomHelpFormatter + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + files: List[TextIO] + out_dir: str + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Combine scores during training of all models', + formatter_class=CustomHelpFormatter) + + parser.add_argument('files', + nargs='+', + metavar='FILE', + type=argparse.FileType('rt'), + help='Model training stats files') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + + args = parser.parse_args() + + return Args(args.files, args.out_dir) + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + + if not os.path.isdir(args.out_dir): + os.makedirs(args.out_dir) + + out_df = pd.DataFrame() + + for fh in args.files: + in_df = pd.read_csv(fh) + + out_df = pd.concat([out_df, in_df]) + + out_file = os.path.join(args.out_dir, 'combined_stats.csv') + + out_df.to_csv(out_file, index=False) + + print(f'Done. Wrote output to {out_file}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/flag_for_review.py b/src/flag_for_review.py new file mode 100755 index 0000000..9b05a9c --- /dev/null +++ b/src/flag_for_review.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python3 +""" +Purpose: Flag rows for manual review +Authors: Kenneth Schackart +""" + +import argparse +import os +from typing import NamedTuple, TextIO + +import numpy as np +import pandas as pd +import pytest +from pandas.testing import assert_series_equal + +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.wrangling import join_commas + +pd.options.mode.chained_assignment = None + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + file: TextIO + out_dir: str + min_prob: float + + +# --------------------------------------------------------------------------- +class FlaggingStats(NamedTuple): + """ + Counts of flagged rows + + `total_flags`: Total number of flagged rows + `duplicate_urls`: Number of rows flagged for duplicate URLs + `duplicate_names`: Number of rows flagged for duplicate names + `low_probs`: Number of rows flagged for low probability name + """ + total_flags: int + duplicate_urls: int + duplicate_names: int + low_probs: int + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser(description=('Flag rows for review ' + 'based on best ' + 'name probability, and ' + 'possible duplication.'), + formatter_class=CustomHelpFormatter) + + parser.add_argument('file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='CSV file of articles') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + parser.add_argument('-p', + '--min-prob', + metavar='PROB', + type=float, + default=0.95, + help=('Minimum probability of predicted resource name.' + ' Anything below will be flagged for review.')) + + args = parser.parse_args() + + return Args(args.file, args.out_dir, args.min_prob) + + +# --------------------------------------------------------------------------- +@pytest.fixture(name='raw_data') +def fixture_raw_data() -> pd.DataFrame: + """ DataFrame representative of the input data """ + + columns = [ + 'ID', 'text', 'common_name', 'common_prob', 'full_name', 'full_prob', + 'extracted_url', 'best_common', 'best_common_prob', 'best_full', + 'best_full_prob', 'best_name', 'best_name_prob' + ] + + df = pd.DataFrame( + [ + [ # Two common names, one full + '123', 'The text', 'mmCIF, PDB', '0.987, 0.775', + 'Protein Data Bank', '0.717', 'http://www.pdb.org/', '200', + 'US', '(34.22,-118.24)' + ], + [ # No common name, low probability full name + '456', 'More text.', '', '', 'SBASE', '0.648', + 'http://www.icgeb.trieste.it/sbase', '301', '', '' + ], + [ # No URL + '789', 'Stuff.', 'LDB2000', '0.997', '', '', '', '', '', '' + ], + [ # Two URLS + '147', 'Wow.', 'TwoURLS', '0.998', '', '', + 'http://website.com, http://db.org', '200, 302', 'JP, GB', + '(35.67,139.65), (52.20,0.13)' + ], + [ # Many URLs + '258', 'Sawasdee', 'LotsaURLS', '0.996', '', '', + 'http://db.com, http://site.org, http://res.net, http://db.io', + '404, Exception, 200, 301', ', , JP, GB', + ', , (35.67,139.65), (52.20,0.13)' + ], + [ # Same name as 123, but not highest prob + '369', 'The cat drank wine', 'PDB', '0.963', + 'Protein Data Bank', '0.964', 'http://www.pdb.org/', '200', + 'US', '(34.22,-118.24)' + ], + [ # Shared highest prob name with 369 + '741', 'Almost 7eleven', 'PDB', '0.983', 'Protein Data Bank', + '0.964', 'http://www.pdb.org/', '200', 'US', '(34.22,-118.24)' + ], + [ # Same common and full names, mismatched prob ranking with 741 + '852', 'Chihiro', 'PDB', '0.963', 'Protein Data Bank', '0.984', + 'http://www.pdb.org/', '200', 'US', '(34.22,-118.24)' + ], + [ # No names predicted + '963', 'Sen', '', '', '', '', 'http://www.pdb.org/', '200', + 'US', '(34.22,-118.24)' + ] + ], + columns=columns) + + return df + + +# --------------------------------------------------------------------------- +def flag_duplicates(ids: pd.Series, values: pd.Series) -> pd.Series: + """ + Create column which indicates potential duplicates based on the given + column. New column values are ID's of that row's potential duplicate + + Parameters: + `ids`: Column of IDs + `values`: Column of values which may have duplicates + + Return: Columns with potential duplicate IDs + """ + + out = [] + for id_n, value in zip(ids, values): + matches = [] + for split_value in value.split(','): + match_mask = [ + split_value in other_value.split(',') for other_value in values + ] + id_matches = ids[match_mask] + id_matches = [match for match in id_matches if match != id_n] + matches += id_matches + + out.append(join_commas(matches)) + + return pd.Series(out) + + +# --------------------------------------------------------------------------- +def test_flag_duplicates() -> None: + """ Test flag_duplicates() """ + + ids = pd.Series(['123', '456', '789', '147', '258', '369']) + + names = pd.Series(['name1', 'name2', 'name3', 'name1', 'name4', 'name1']) + expected_flagged_names = pd.Series( + ['147, 369', '', '', '123, 369', '', '123, 147']) + flagged_names = flag_duplicates(ids, names) + assert_series_equal(flagged_names, expected_flagged_names) + + urls = pd.Series( + ['url1', 'url2', 'url1', 'url13, url4', 'url2', 'url1, url5']) + expected_flagged_urls = pd.Series( + ['789, 369', '258', '123, 369', '', '456', '123, 789']) + flagged_urls = flag_duplicates(ids, urls) + assert_series_equal(flagged_urls, expected_flagged_urls) + + +# --------------------------------------------------------------------------- +def flag_probs(probs: pd.Series, min_prob: float) -> pd.Series: + """ + Flag rows with probability below `min_prob` + + Parameters: + `probs`: Column of Probabilities + `min_prob`: Minimum probability for flagging + + Return: Column of strings with flagged rows + """ + + probs = probs.astype(float) + + out_col = np.where(probs < min_prob, 'low_prob_best_name', '') + + return pd.Series(out_col) + + +# --------------------------------------------------------------------------- +def test_flag_probs() -> None: + """ Test flag_probs() """ + + min_prob = 0.6 + in_col = pd.Series(['0.00', '0.50', '0.75', '1.00']) + out_col = pd.Series(['low_prob_best_name', 'low_prob_best_name', '', '']) + + assert_series_equal(flag_probs(in_col, min_prob), out_col) + + +# --------------------------------------------------------------------------- +def flag_df(df: pd.DataFrame, min_prob: float): + """ + Flag dataframe for manual review. Two columns are added: + potential_duplicates, and check_out + + Parameters: + `df`: Input dataframe + `min_prob`: Minimum probability of best name, flag those below + + Return: + """ + + df['duplicate_urls'] = flag_duplicates(df['ID'], df['extracted_url']) + df['duplicate_names'] = flag_duplicates(df['ID'], df['best_name']) + + df['low_prob'] = flag_probs(df['best_name_prob'], min_prob) + + return df + + +# --------------------------------------------------------------------------- +def count_flags(url_flags: pd.Series, name_flags: pd.Series, + prob_flags: pd.Series) -> FlaggingStats: + """ + Count the number of rows that have been flagged for manual review + + Parameters: + `df`: Flagged dataframe + + Return: Number of flagged rows + """ + + num_url_flags = sum(url_flags != '') + num_name_flags = sum(name_flags != '') + num_prob_flags = sum(prob_flags != '') + + any_flags = [(url_flag == name_flag == prob_flag == '') + for url_flag, name_flag, prob_flag in zip( + url_flags, name_flags, prob_flags)] + num_any_flag = any_flags.count(False) + + return FlaggingStats(num_any_flag, num_url_flags, num_name_flags, + num_prob_flags) + + +# --------------------------------------------------------------------------- +def test_count_flags() -> None: + """ Test count_flags() """ + + url_flags = pd.Series(['', '258', '', '', '456', '']) + name_flags = pd.Series(['', '', '147, 258', '789, 258', '789, 147', '']) + prob_flags = pd.Series( + ['low_prob_best_name', 'low_prob_best_name', '', '', '', '']) + + expected_counts = FlaggingStats(5, 2, 3, 2) + + assert count_flags(url_flags, name_flags, prob_flags) == expected_counts + + +# --------------------------------------------------------------------------- +def add_review_columns(df: pd.DataFrame) -> pd.DataFrame: + """ + Add columns that are to be filled during manual review + + Parameters: + `df`: Flagged dataframe + + Return: Dataframe with new (empty)columns + """ + + df[[ + 'review_low_prob', 'review_dup_urls', 'review_dup_names', + 'review_notes_low_prob', 'review_notes_dup_urls', + 'review_notes_dup_names' + ]] = '' + + return df + + +# --------------------------------------------------------------------------- +def make_filename(out_dir: str, infile_name: str) -> str: + ''' + Make filename for output reusing input file's basename + + Parameters: + `out_dir`: Output directory + `infile_name`: Input file name + + Return: Output filename + ''' + + return os.path.join(out_dir, os.path.basename(infile_name)) + + +# --------------------------------------------------------------------------- +def test_make_filenames() -> None: + """ Test make_filenames() """ + + assert make_filename( + 'out/checked_urls', + 'out/urls/predictions.csv') == ('out/checked_urls/predictions.csv') + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + outfile = make_filename(out_dir, args.file.name) + + in_df = pd.read_csv(args.file, + dtype=str).fillna('').drop_duplicates(['ID']) + + flagged_df = flag_df(in_df, args.min_prob) + + num_flagged = count_flags(flagged_df['duplicate_urls'], + flagged_df['duplicate_names'], + flagged_df['low_prob']) + + print(f'Total number of flagged rows: {num_flagged.total_flags}') + print(f'Rows with duplicate names: {num_flagged.duplicate_names}') + print(f'Rows with duplicate URLs: {num_flagged.duplicate_urls}') + print(f'Rows with low probability name: {num_flagged.low_probs}') + + out_df = add_review_columns(flagged_df) + + out_df.to_csv(outfile, index=False) + + print(f'Wrote output to {outfile}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/get_meta.py b/src/get_meta.py new file mode 100755 index 0000000..f1d023d --- /dev/null +++ b/src/get_meta.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +""" +Purpose: Get metadata from EuropePMC query +Authors: Kenneth Schackart +""" + +import argparse +import os +from collections import defaultdict +from functools import partial +from typing import NamedTuple, Optional, TextIO, Tuple, cast + +import numpy as np +import pandas as pd +import requests +from pandas.testing import assert_frame_equal + +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.wrangling import chunk_rows, join_commas + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + file: TextIO + out_dir: str + chunk_size: Optional[int] + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description=('Get metadata from EuropePMC query'), + formatter_class=CustomHelpFormatter) + + parser.add_argument('file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='CSV File with ID column for articles') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + parser.add_argument('-s', + '--chunk-size', + metavar='INT', + type=int, + default=20, + help=('Number of IDs to send to' + 'EuropePMC at a time.')) + + args = parser.parse_args() + + return Args(args.file, args.out_dir, args.chunk_size) + + +# --------------------------------------------------------------------------- +def separate_ids(df: pd.DataFrame) -> pd.DataFrame: + """ + Separate IDs into one row per ID. Assign a resource number to each row + first so that they can be remerged after querying EuropePMC. + + Parameters: + `df`: Deduplicated dataframe + + Return: Dataframe with one row per ID + """ + + df['resource_num'] = np.arange(len(df)) + + df['ID'] = df['ID'].str.split(', ') + + df = df.explode('ID') + + df.reset_index(drop=True, inplace=True) + + return df + + +# --------------------------------------------------------------------------- +def test_separate_ids() -> None: + """ Test separate_ids() """ + + in_df = pd.DataFrame( + [['123', 'text1', 'url1'], ['456, 789', 'text2', 'url2'], + ['147', 'text3', 'url3']], + columns=['ID', 'text', 'extracted_url']) + + out_df = pd.DataFrame( + [['123', 'text1', 'url1', 0], ['456', 'text2', 'url2', 1], + ['789', 'text2', 'url2', 1], ['147', 'text3', 'url3', 2]], + columns=['ID', 'text', 'extracted_url', 'resource_num']) + + assert_frame_equal(separate_ids(in_df), out_df) + + +# --------------------------------------------------------------------------- +def make_filenames(outdir: str) -> Tuple[str, str]: + ''' + Make filenames for output csv file and last date text file + + Parameters: + `outdir`: Output directory + + Return: Tuple of csv and txt filenames + ''' + + csv_out = os.path.join(outdir, 'query_results.csv') + txt_out = os.path.join(outdir, 'last_query_date.txt') + + return csv_out, txt_out + + +# --------------------------------------------------------------------------- +def test_make_filenames() -> None: + """ Test make_filenames() """ + + assert make_filenames('data/new_query') == ( + 'data/new_query/query_results.csv', + 'data/new_query/last_query_date.txt') + + +# --------------------------------------------------------------------------- +def clean_results(results: dict) -> pd.DataFrame: + """ + Retrieve the metadata from results of query + + Parameters: + `results`: JSON-encoded response (nested dictionary) + + Return: Dataframe of results + """ + + parsed_info = defaultdict(list) + for paper in results.get('resultList').get('result'): # type: ignore + parsed_info['ids'].append(paper.get('id')) + parsed_info['titles'].append(paper.get('title')) + parsed_info['abstracts'].append(paper.get('abstractText')) + parsed_info['affiliations'].append(paper.get('affiliation')) + parsed_info['num_citations'].append(int(paper.get('citedByCount'))) + + authors = [] + for author in paper.get('authorList', {}).get('author', {}): + if author: + authors.append(author.get('fullName', '')) + else: + authors.append('') + parsed_info['authors'].append(', '.join(authors)) + + grant_ids = [] + agencies = [] + for grant in paper.get('grantsList', {}).get('grant', {}): + if grant: + grant_ids.append(grant.get('grantID', '')) + agencies.append(grant.get('agency', '')) + else: + grant_ids.append('') + agencies.append('') + parsed_info['grant_ids'].append(', '.join( + [grant_id for grant_id in grant_ids if grant_id])) + parsed_info['agencies'].append(', '.join( + [agency for agency in agencies if agency])) + + return pd.DataFrame({ + 'ID': parsed_info['ids'], + 'affiliation': parsed_info['affiliations'], + 'authors': parsed_info['authors'], + 'grant_ids': parsed_info['grant_ids'], + 'grant_agencies': parsed_info['agencies'], + 'num_citations': parsed_info['num_citations'] + }) + + +# --------------------------------------------------------------------------- +def run_query(ids: pd.Series, chunk_size: Optional[int]) -> pd.DataFrame: + """ + Run query on EuropePMC API + + Parameters: + `ids`: Dataframe ID column + `chunk_size`: Maximum number of IDs to check per request + + Return: `DataFrame` of returned article information + """ + + out_df = pd.DataFrame() + + for id_chunk in chunk_rows(ids, chunk_size): + query = ' OR '.join(set(id_chunk)) + prefix = ('https://www.ebi.ac.uk/europepmc/' + 'webservices/rest/search?query=') + suffix = '&resultType=core&fromSearchPost=false&format=json' + url = prefix + query + suffix + + # Not using try-except because if there is an exception, + # it means the API has changed. + results = requests.get(url) + status = results.status_code + if status != requests.codes.ok: # pylint: disable=no-member + results.raise_for_status() + + results_json = cast(dict, results.json()) + + cleaned_results = clean_results(results_json) + + out_df = pd.concat([out_df, cleaned_results]) + + return out_df + + +# --------------------------------------------------------------------------- +def remerge_resources(df: pd.DataFrame) -> pd.DataFrame: + """ + Re-merge rows that have the same resource number + (arbitrarily assigned while separating IDs). + + Parameters: + `df`: input dataframe with one row per ID + + Return: dataframe with one row per resource + """ + + join_commas_no_empty = partial(join_commas, remove_empty=True) + + df = df.groupby('resource_num').agg({ + 'ID': join_commas, + 'best_name': 'first', + 'best_name_prob': 'first', + 'best_common': 'first', + 'best_common_prob': 'first', + 'best_full': 'first', + 'best_full_prob': 'first', + 'article_count': 'first', + 'extracted_url': 'first', + 'extracted_url_status': 'first', + 'extracted_url_country': 'first', + 'extracted_url_coordinates': 'first', + 'wayback_url': 'first', + 'publication_date': 'first', + 'affiliation': join_commas_no_empty, + 'authors': join_commas_no_empty, + 'grant_ids': join_commas_no_empty, + 'grant_agencies': join_commas_no_empty, + 'num_citations': sum + }).reset_index() + + df.drop('resource_num', axis='columns', inplace=True) + + return df + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + df = pd.read_csv(args.file, dtype=str) + + df = separate_ids(df) + + results = run_query(df['ID'], args.chunk_size) + + all_info = pd.merge(df, results, how='left', on='ID') + + out_df = remerge_resources(all_info) + + out_file = os.path.join(out_dir, os.path.basename(args.file.name)) + + out_df.to_csv(out_file, index=False) + + print(f'Done. Wrote output to {out_file}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/initial_deduplicate.py b/src/initial_deduplicate.py new file mode 100755 index 0000000..f4f2a61 --- /dev/null +++ b/src/initial_deduplicate.py @@ -0,0 +1,383 @@ +#!/usr/bin/env python3 +""" +Purpose: Deduplicate rows with identical URL and name +Authors: Kenneth Schackart +""" + +import argparse +import os +import re +from typing import NamedTuple, TextIO + +import pandas as pd +import pytest +from pandas.testing import assert_series_equal + +from process_names import wrangle_names +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.wrangling import join_commas + +pd.options.mode.chained_assignment = None + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + file: TextIO + previous: TextIO + out_dir: str + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description=('Deduplicate rows with identical URL and name'), + formatter_class=CustomHelpFormatter) + + parser.add_argument('file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='CSV file of predictions and metadata') + parser.add_argument('-p', + '--previous', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='Previously processed inventory') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + + args = parser.parse_args() + + return Args(args.file, args.previous, args.out_dir) + + +# --------------------------------------------------------------------------- +@pytest.fixture(name='raw_data') +def fixture_raw_data() -> pd.DataFrame: + """ DataFrame representative of the input data """ + + columns = [ + 'ID', 'text', 'common_name', 'common_prob', 'full_name', 'full_prob', + 'extracted_url', 'best_common', 'best_common_prob', 'best_full', + 'best_full_prob', 'best_name', 'best_name_prob', 'publication_date' + ] + + df = pd.DataFrame( + [[ + '123', 'The text', 'mmCIF, PDB', '0.987, 0.775', + 'Protein Data Bank', '0.717', 'http://www.pdb.org/', 'mmCIF', + '0.987', 'Protein Data Bank', '0.717', 'mmCIF', '0.987', + '2011-01-01' + ], + [ + '456', 'More text.', '', '', 'SBASE', '0.648', + 'http://www.icgeb.trieste.it/sbase', '', '', 'SBASE', '0.648', + 'SBASE', '0.648', '2011-01-02' + ], + [ + '147', 'Wow.', 'TwoURLS', '0.998', '', '', + 'http://website.com, http://db.org', 'TwoURLS', '0.998', '', '', + 'TwoURLS', '0.998', '2011-01-03' + ], + [ + '369', 'The cat drank wine', 'PDB', '0.963', 'Protein Data Bank', + '0.964', 'http://www.pdb.org/', 'PDB', '0.963', + 'Protein Data Bank', '0.964', 'Protein Data Bank', '0.964', + '2011-01-04' + ], + [ + '741', 'Almost 7eleven', 'PDB', '0.983', 'Protein Data Bank', + '0.964', 'http://www.pdb.org/', 'PDB', '0.983', + 'Protein Data Bank', '0.964', 'PDB', '0.983', '2011-01-05' + ], + [ + '852', 'Chihiro', 'PDB', '0.963', 'Protein Data Bank', '0.984', + 'http://www.pdb.org/', 'PDB', '0.963', 'Protein Data Bank', + '0.984', 'Protein Data Bank', '0.984', '2011-01-06' + ]], + columns=columns) + + return df + + +# --------------------------------------------------------------------------- +def clean_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Prepare dataframe for potential merging of old results deduplication + + Parameters: + `df`: Input dataframe + + Return: Cleaned dataframe + """ + + df = df.drop(['common_name', 'common_prob', 'full_name', 'full_prob'], + axis='columns') + all_columns = df.columns + df[all_columns] = df[all_columns].fillna('').astype(str) + df['extracted_url'] = df['extracted_url'].map(clean_url) + + return df + + +# --------------------------------------------------------------------------- +def prep_previous(df: pd.DataFrame) -> pd.DataFrame: + """ + Prepare previous inventory for merging with new results + + Parameters: + `df`: Previous inventory results + + Return: Dataframe with same columns as new results + """ + + df['text'] = '' + + columns = [ + 'ID', 'text', 'extracted_url', 'best_common', 'best_common_prob', + 'best_full', 'best_full_prob', 'best_name', 'best_name_prob', + 'publication_date' + ] + + df = df[columns] + + return df + + +# --------------------------------------------------------------------------- +def test_prep_previous(raw_data: pd.DataFrame) -> None: + """ Test prep_previous() """ + + in_df = clean_df(raw_data) + + new_columns = in_df.columns + + # Previous results are already deduplicated + previous = deduplicate(raw_data) + + # Add extra columns to simulate previously obtained results + previous['extracted_url_status'] = '400' + previous['extracted_url_country'] = 'USA' + + previous = prep_previous(previous) + + prev_columns = previous.columns + + assert all(new_columns == prev_columns) + + +# --------------------------------------------------------------------------- +def integrate_previous(new_df: pd.DataFrame, + prev_df: pd.DataFrame) -> pd.DataFrame: + """ + Add previous results so that all can be deduplicated + + Parameters: + `new_df`: New data + `prev_df`: Previously processed inventory + + Return: Combined dataframe + """ + + prev_df = prep_previous(prev_df) + out_df = pd.concat([new_df, prev_df]) + + return out_df + + +# --------------------------------------------------------------------------- +def test_integrate_previous(raw_data: pd.DataFrame) -> None: + """ Test integrate_previous() """ + + previous = deduplicate(raw_data) + + # Add extra columns to simulate previously obtained results + previous['extracted_url_status'] = '400' + previous['extracted_url_country'] = 'USA' + + out_df = integrate_previous(raw_data, previous) + + assert len(out_df) == 11 + + +# --------------------------------------------------------------------------- +def clean_url(url: str) -> str: + """ + For the sake of matching URLs, remove trailing slash, replace + https:// with http://, and lowercase all before first single slash + + Parameters: + `url`: URL string + + Return: Cleaned URL + """ + + # Split at first single slash to lowercase the first half + url_parts = re.search( + r'''(?P.*?) # Group everything before first slash + (?.*) # Group everything after first slash + ''', url, re.X) + + if url_parts: + url = url_parts['before_slash'].lower( + ) + '/' + url_parts['after_slash'] + else: + url = url.lower() + + return re.sub('https', 'http', url.rstrip('/')) + + +# --------------------------------------------------------------------------- +def test_clean_url() -> None: + """ Test clean_url() """ + + # Does not modify good URL + assert clean_url('http://mirdb.org') == 'http://mirdb.org' + + # Removes trailing slashes + assert clean_url('http://mirdb.org/') == 'http://mirdb.org' + + # Replaces https with http + assert clean_url('https://mirdb.org') == 'http://mirdb.org' + + # Does both + assert clean_url('https://mirdb.org/') == 'http://mirdb.org' + + # Lowercases domain + assert clean_url('http://mycoCLAP.fungalgenomics.ca' + ) == 'http://mycoclap.fungalgenomics.ca' + + # Does not lowercase anything after first single slash + assert clean_url('http://MYDB.com/BASE') == 'http://mydb.com/BASE' + + +# --------------------------------------------------------------------------- +def deduplicate(df: pd.DataFrame) -> pd.DataFrame: + """ + Deduplicate the resource dataframe by finding resources with the same + names. + + Parameters: + `df`: Dataframe that has gone through URL filtering and name wrangling + `thresh`: Threshold probability for which a name can be used for + deduplication by matching + `common`: Use common name for matching, if above `thresh` + `full`: Use full name for matching, if above `thresh` + `url`: Use URLs for matching + + Return: Deduplicated dataframe + """ + + duplicates = df.duplicated(['best_name', 'extracted_url'], keep=False) + + unique_df = df[~duplicates] + duplicate_df = df[duplicates] + duplicate_df['article_count'] = 0 + + duplicate_df = (duplicate_df.sort_values( + 'publication_date', + ascending=False).groupby(['best_name', 'extracted_url']).agg({ + 'ID': + join_commas, + 'text': + 'first', + 'best_common': + join_commas, + 'best_common_prob': + join_commas, + 'best_full': + join_commas, + 'best_full_prob': + join_commas, + 'article_count': + len, + 'publication_date': + 'first' + }).reset_index()) + + unique_df['article_count'] = 1 + + if len(duplicate_df) > 0: + duplicate_df = wrangle_names(duplicate_df, 'best_common', + 'best_common_prob', 'best_full', + 'best_full_prob') + + out_df = pd.concat([unique_df, duplicate_df]) + + return out_df.reset_index(drop=True) + + +# --------------------------------------------------------------------------- +def test_deduplicate(raw_data: pd.DataFrame) -> None: + """ Test deduplicate() """ + + out_ids = pd.Series(['123', '456', '147', '741', '852, 369'], name='ID') + out_citations = pd.Series([1, 1, 1, 1, 2], name='article_count') + + return_df = deduplicate(raw_data) + + assert_series_equal(return_df['ID'], out_ids) + assert_series_equal(return_df['article_count'], out_citations) + + +# --------------------------------------------------------------------------- +def make_filename(out_dir: str, infile_name: str) -> str: + ''' + Make filename for output reusing input file's basename + + Parameters: + `out_dir`: Output directory + `infile_name`: Input file name + + Return: Output filename + ''' + + return os.path.join(out_dir, os.path.basename(infile_name)) + + +# --------------------------------------------------------------------------- +def test_make_filenames() -> None: + """ Test make_filenames() """ + + assert make_filename( + 'out/checked_urls', + 'out/urls/predictions.csv') == ('out/checked_urls/predictions.csv') + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + in_df = clean_df(pd.read_csv(args.file, dtype=str)) + + if args.previous: + in_df = integrate_previous(in_df, pd.read_csv(args.previous, + dtype=str)) + + out_df = deduplicate(in_df) + + outfile = make_filename(out_dir, args.file.name) + + out_df.to_csv(outfile, index=False) + + print(f'Done. Wrote output to {outfile}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/inventory_utils/README.md b/src/inventory_utils/README.md new file mode 100644 index 0000000..a50e3b4 --- /dev/null +++ b/src/inventory_utils/README.md @@ -0,0 +1,57 @@ +# Modules + +This directory contains Python modules. The contents of the files present here can be imported into other Python scripts or modules + +```sh +. +├── __init__.py # Allows for modules to be imported +├── aliases.py # Type aliases +├── class_data_handler.py # Preparing classification data +├── constants.py # Constants +├── custom_classes.py # Custom classes +├── filings.py # Reading/writing files +├── metrics.py # Performance metrics +├── ner_data_handler.py # Preparing NER data +├── runtime.py # Changing or assessing runtime +└── wrangling.py # Wrangling/cleaning data structures +``` + +## `__init__.py` + +Empty file that is necessary for importing these modules from other places in this repository. + +## `aliases.py` + +Custom type aliases that simplify type annotations. + +## `class_data_handler.py` + +Modules mostly used by [../class_data_generator.py](../class_data_generator.py), for labeling, creating dataloaders and tokenizing. + +## `constants` + +Constant values, such as dictionaries mapping text to integer labels for NER annotation. + +## `custom_classes` + +Custom data structures (classes). These are generally just classes which inherit from `typing.NamedTuple`. They do not have custom methods and are just for simplifying how data are passed and structured. + +## `filing.py` + +Functions for reading files, creating file names, and saving files. + +## `metrics.py` + +Functions for calculcating and working with performance metrics. + +## `ner_data_handler.py` + +Modules mostly used by [../ner_data_generator.py](../ner_data_generator.py), for labeling, creating dataloaders and tokenizing. + +## `runtime.py` + +Functions for modifying the runtime environment (such as setting global random seeds) or detecting the environment (such as determining if CPU or CUDA should be used by `torch`). + +## `wrangling.py` + +Functions for wrangling and cleaning data, especially text. diff --git a/src/inventory_utils/__init__.py b/src/inventory_utils/__init__.py new file mode 100644 index 0000000..23808d7 --- /dev/null +++ b/src/inventory_utils/__init__.py @@ -0,0 +1,11 @@ +""" +Biodata Resource Inventory Modules +~~~ + +Modules for the Biodata Resource Inventory. + +Specific objects (classes, functions, etc.) can be imported. For example: +>>> from inventory_utils.wrangling import strip_xml + +None of the files here are meant to be executed, only imported. +""" diff --git a/src/inventory_utils/aliases.py b/src/inventory_utils/aliases.py new file mode 100644 index 0000000..259b3bd --- /dev/null +++ b/src/inventory_utils/aliases.py @@ -0,0 +1,17 @@ +""" +Aliases +~~~ + +Typing aliases for simplified type annotations. + +Authors: Kenneth Schackart +""" + +import sys +from typing import List + +TaggedBatch = List[List[str]] + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + sys.exit('This file is a module, and is not meant to be run.') diff --git a/src/inventory_utils/class_data_handler.py b/src/inventory_utils/class_data_handler.py new file mode 100755 index 0000000..a718719 --- /dev/null +++ b/src/inventory_utils/class_data_handler.py @@ -0,0 +1,201 @@ +""" +Classification Data Handler +~~~ + +Functions for creating a dataloader for the classification task, +which includes preprocessing and tokenization. + +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import random +import sys +from functools import partial +from typing import List, NamedTuple, Optional, TextIO, Tuple + +import pandas as pd +from datasets import ClassLabel, Dataset +from torch.utils.data import DataLoader +from transformers import AutoTokenizer, PreTrainedTokenizer + +from .wrangling import preprocess_data + + +# --------------------------------------------------------------------------- +class DataFields(NamedTuple): + """ + Fields in data used for training and classification + + `predictive`: Column used for prediction + `descriptive_labels`: Descriptions of the classification labels + `labels`: Column containing labels (optional) + """ + predictive: str + descriptive_labels: List[str] + labels: Optional[str] = None + + +# --------------------------------------------------------------------------- +class RunParams(NamedTuple): + """ + Model and run parameters + + `model_name`: Huggingface model name + `batch_size`: Tokenization batch size + `max_len`: Tokenization max length + `num_train`: Number of training datapoints (optional) + """ + model_name: str + batch_size: int + max_len: int + num_train: Optional[int] = None + + +# --------------------------------------------------------------------------- +def get_dataloader(file: TextIO, fields: DataFields, + run_params: RunParams) -> DataLoader: + """ + Preprocess data and create dataloader + + Parameters: + `file`: Input file handle + `fields`: Fields in data used for training and classification + `run_params`: Model and run parameters + + Return: + A `DataLoader` with preprocessed data + """ + + df = preprocess_data(file) + + data_loader = generate_dataloader(df, file.name, fields, run_params) + + return data_loader + + +# --------------------------------------------------------------------------- +def generate_dataloader(df: pd.DataFrame, filename: str, fields: DataFields, + params: RunParams) -> DataLoader: + """ + Generate dataloader from preprocessed data + + Parameters: + `df`: Dataframe to be converted to `DataLoader` + `filename`: Name of file from which `df` originates + `fields`: Fields in data used for training and classification + `params`: Model and run parameters + + Return: + A `DataLoader` of preprocessed data + """ + + if fields.predictive not in df.columns: + sys.exit(f'Predictive field column "{fields.predictive}" ' + f'not in file {filename}.') + + if fields.labels and fields.labels not in df.columns: + sys.exit(f'Labels field column "{fields.labels}" ' + f'not in file {filename}.') + + text, labels = get_text_labels(df, fields) + + class_labels = ClassLabel(num_classes=2, names=fields.descriptive_labels) + + tokenizer = AutoTokenizer.from_pretrained(params.model_name) + + dataset = tokenize_text(text, labels, class_labels, tokenizer, + params.max_len) + + if params.num_train: + dataset = dataset.select( + random.sample(range(dataset.num_rows), k=params.num_train)) + + return DataLoader(dataset, batch_size=params.batch_size) # type:ignore + + +# --------------------------------------------------------------------------- +def get_text_labels(df: pd.DataFrame, fields: DataFields) -> Tuple[List, List]: + """ + Get lists of predictive text and (optionally) labels + + Parameters: + `df`: Dataframe containing `fields.predictive` + `fields`: Specification of column names + + Return: + A tuple of lists: predictive text, labels + """ + + text = df[fields.predictive].tolist() + + labels = [] + if fields.labels: + labels = df[fields.labels].tolist() + + return text, labels + + +# --------------------------------------------------------------------------- +def test_get_text_labels() -> None: + """ Test get_text_labels() """ + + df = pd.DataFrame( + [['Title 1', 'Abstract 1', 0], ['Title 2', 'Abstract 2', 1], + ['Title 3', 'Abstract 3', 0]], + columns=['title', 'abstract', 'score']) + + fields = DataFields('title', ['yes', 'no']) + + assert get_text_labels(df, fields) == (['Title 1', 'Title 2', + 'Title 3'], []) + + fields = DataFields('title', ['yes', 'no'], 'score') + + assert get_text_labels(df, fields) == (['Title 1', 'Title 2', + 'Title 3'], [0, 1, 0]) + + +# --------------------------------------------------------------------------- +def tokenize_text(text: List, labels: List, class_labels: ClassLabel, + tokenizer: PreTrainedTokenizer, max_len: int) -> Dataset: + """ + Tokenize predictive text + + Parameters: + `text`: A list of predictive text + `labels`: A list of labels of `text` + `class_labels`: Descriptive labels of data in `text` + `tokenizer`: Pretrained tokenizer + `max_len`: Max length used in tokenization + + Return: + A tokenized and possibly labeled `Dataset` + """ + + data = {'text': text} + if labels: + data['labels'] = labels + dataset = Dataset.from_dict(data) + + # Partially apply arguments to the tokenizer so it is ready to tokenize + tokenize = partial(tokenizer, + padding='max_length', + max_length=max_len, + truncation=True) + + tokenized_dataset = dataset.map(lambda x: tokenize(x['text']), + batched=True) + + if labels: + tokenized_dataset = tokenized_dataset.cast_column( + 'labels', class_labels) + + tokenized_dataset = tokenized_dataset.remove_columns(['text']) + tokenized_dataset.set_format("torch") + + return tokenized_dataset + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + sys.exit('This file is a module, and is not meant to be run.') diff --git a/src/inventory_utils/constants.py b/src/inventory_utils/constants.py new file mode 100644 index 0000000..f967376 --- /dev/null +++ b/src/inventory_utils/constants.py @@ -0,0 +1,32 @@ +""" +Constants +~~~ + +Constants that are used throughout the Biodata Resource Inventory. + + +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import sys + +# --------------------------------------------------------------------------- +NER_TAG2ID = {'O': 0, 'B-COM': 1, 'I-COM': 2, 'B-FUL': 3, 'I-FUL': 4} +""" +Mapping of NER tags to numerical labels + +`key`: String NER label ("O", "B-COM", "I-COM", "B-FUL", "I-FUL") +`value`: Numerical label (1, 2, 3, 4) +""" + +ID2NER_TAG = {v: k for k, v in NER_TAG2ID.items()} +""" +Mapping of numerical labels to NER tags + +`key`: Numerical label (1, 2, 3, 4) +`value`: String NER label ("O", "B-COM", "I-COM", "B-FUL", "I-FUL") +""" + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + sys.exit('This file is a module, and is not meant to be run.') diff --git a/src/inventory_utils/custom_classes.py b/src/inventory_utils/custom_classes.py new file mode 100644 index 0000000..51f5f1e --- /dev/null +++ b/src/inventory_utils/custom_classes.py @@ -0,0 +1,105 @@ +""" +Custom Classes +~~~ + +Custom classes for the Biodata Resource Inventory. + +Authors: Kenneth Schackart +""" + +import argparse +import sys +from typing import Any, NamedTuple + +import pandas as pd +import torch +from torch.utils.data.dataloader import DataLoader +from transformers import AdamW + + +# --------------------------------------------------------------------------- +class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter): + """ Custom Argparse help formatter """ + def _get_help_string(self, action): + """ Suppress defaults that are None """ + if action.default is None: + return action.help + return super()._get_help_string(action) + + def _format_action_invocation(self, action): + """ Show metavar only once """ + if not action.option_strings: + metavar, = self._metavar_formatter(action, action.dest)(1) + return metavar + parts = [] + if action.nargs == 0: + parts.extend(action.option_strings) + else: + default = action.dest.upper() + args_string = self._format_args(action, default) + for option_string in action.option_strings: + parts.append('%s' % option_string) + parts[-1] += ' %s' % args_string + return ', '.join(parts) + + +# --------------------------------------------------------------------------- +class Splits(NamedTuple): + """ + Training, validation, and test dataframes + + `train`: Training data + `val`: Validation data + `test`: Test data + """ + train: pd.DataFrame + val: pd.DataFrame + test: pd.DataFrame + + +# --------------------------------------------------------------------------- +class Settings(NamedTuple): + """ + Settings used for model training + + `model`: Pretrained model + `optimizer`: Training optimizer + `train_dataloader`: `DataLoader` of training data + `val_dataloader`: `DataLoader` of validation data + `lr_scheduler`: Learning rate schedule (optional) + `num_epochs`: Maximum number of training epochs + `num_training_steps`: Maximum number of training steps + (`num_epochs` * `num_training`) + `device`: Torch device + """ + + model: Any + optimizer: AdamW + train_dataloader: DataLoader + val_dataloader: DataLoader + lr_scheduler: Any + num_epochs: int + num_training_steps: int + device: torch.device + + +# --------------------------------------------------------------------------- +class Metrics(NamedTuple): + """ + Performance metrics + + `precision`: Model precision + `recall`: Model recall + `f1`: Model F1 score + `loss`: Model loss + """ + + precision: float + recall: float + f1: float + loss: float + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + sys.exit('This file is a module, and is not meant to be run.') diff --git a/src/inventory_utils/filing.py b/src/inventory_utils/filing.py new file mode 100644 index 0000000..58ed34d --- /dev/null +++ b/src/inventory_utils/filing.py @@ -0,0 +1,158 @@ +""" +Filing +~~~ + +Functions related to reading and writing files. + +Authors: Kenneth Schackart +""" + +import os +import sys +from typing import Any, BinaryIO, Tuple + +import pandas as pd +import torch +from transformers import AutoModelForSequenceClassification as classifier +from transformers import AutoModelForTokenClassification as ner_classifier +from transformers import AutoTokenizer +from transformers.tokenization_utils import PreTrainedTokenizer + +from .constants import ID2NER_TAG, NER_TAG2ID +from .custom_classes import Metrics + + +# --------------------------------------------------------------------------- +def get_classif_model(checkpoint_fh: BinaryIO, + device: torch.device) -> Tuple[Any, str]: + """ + Instatiate predictive model from checkpoint + + Params: + `checkpoint_fh`: Model checkpoint filehandle + `device`: The `torch.device` to use + + Return: + Model instance from checkpoint, and model name + """ + + checkpoint = torch.load(checkpoint_fh, map_location=device) + model_name = checkpoint['model_name'] + model = classifier.from_pretrained(model_name, num_labels=2) + model.load_state_dict(checkpoint['model_state_dict']) + model.to(device) + model.eval() + + return model, model_name + + +# --------------------------------------------------------------------------- +def get_ner_model( + checkpoint_fh: BinaryIO, + device: torch.device) -> Tuple[Any, str, PreTrainedTokenizer]: + """ + Instatiate predictive NER model from checkpoint + + Params: + `checkpoint_fh`: Model checkpoint filehandle + `device`: The `torch.device` to use + + Return: + Model instance from checkpoint, model name, and tokenizer + """ + + checkpoint = torch.load(checkpoint_fh, map_location=device) + model_name = checkpoint['model_name'] + model = ner_classifier.from_pretrained(model_name, + id2label=ID2NER_TAG, + label2id=NER_TAG2ID) + model.load_state_dict(checkpoint['model_state_dict']) + model.to(device) + model.eval() + + tokenizer = AutoTokenizer.from_pretrained(model_name) + + return model, model_name, tokenizer + + +# --------------------------------------------------------------------------- +def make_filenames(out_dir: str) -> Tuple[str, str]: + """ + Make output filename + + Parameters: + `out_dir`: Output directory to be included in filename + + Return: Tuple['{out_dir}/checkpt.pt', '{out_dir}/train_stats.csv'] + """ + + return os.path.join(out_dir, + 'checkpt.pt'), os.path.join(out_dir, 'train_stats.csv') + + +# --------------------------------------------------------------------------- +def test_make_filenames() -> None: + """ Test make_filenames """ + + assert make_filenames('out/scibert') == ('out/scibert/checkpt.pt', + 'out/scibert/train_stats.csv') + + +# --------------------------------------------------------------------------- +def save_model(model: Any, model_name: str, train_metrics: Metrics, + val_metrics: Metrics, filename: str) -> None: + """ + Save model checkpoint, epoch, and F1 score to file + + Parameters: + `model`: Model to save + `model_name`: Model HuggingFace name + `train_metrics`: Metrics on training set of best epoch + `val_metrics`: Metrics on validation set of best epoch + `filename`: Name of file for saving model + """ + + torch.save( + { + 'model_state_dict': model.state_dict(), + 'model_name': model_name, + 'train_metrics': train_metrics, + 'val_metrics': val_metrics + }, filename) + + +# --------------------------------------------------------------------------- +def save_train_stats(df: pd.DataFrame, filename: str) -> None: + """ + Save training performance metrics to file + + Parameters: + `df`: Training stats dataframe + `filename`: Name of file for saving dataframe + """ + + df.to_csv(filename, index=False) + + +# --------------------------------------------------------------------------- +def save_metrics(model_name: str, metrics: Metrics, filename: str) -> None: + """ + Save test metrics to csv file + + Parameters: + `model_name`: Name of model + `metrics`: A `Metrics` NamedTuple + `filename`: Output file name + """ + + with open(filename, 'wt') as fh: + print('model,precision,recall,f1,loss', file=fh) + print(f'{model_name},{metrics.precision},{metrics.recall},', + f'{metrics.f1},{metrics.loss}', + sep='', + file=fh) + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + sys.exit('This file is a module, and is not meant to be run.') diff --git a/src/inventory_utils/metrics.py b/src/inventory_utils/metrics.py new file mode 100644 index 0000000..f10d319 --- /dev/null +++ b/src/inventory_utils/metrics.py @@ -0,0 +1,133 @@ +""" +Metrics +~~~ + +Functions used for calculating and extracting model performance metrics. + +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import sys +from typing import Any, List, Optional, cast + +import numpy as np +import torch +from datasets import load_metric +from torch.functional import Tensor +from torch.utils.data.dataloader import DataLoader + +from .custom_classes import Metrics +from .wrangling import convert_to_tags + + +# --------------------------------------------------------------------------- +def get_classif_metrics(model: Any, dataloader: DataLoader, + device: torch.device) -> Metrics: + """ + Compute classifier model performance metrics + + Parameters: + `model`: Classification model + `dataloader`: DataLoader containing tokenized text entries and + corresponding labels + `device`: Torch device + + Return: + A `Metrics` NamedTuple + """ + calc_precision = load_metric('precision') + calc_recall = load_metric('recall') + calc_f1 = load_metric('f1') + total_loss = 0. + num_seen_datapoints = 0 + for batch in dataloader: + batch = {k: v.to(device) for k, v in batch.items()} + with torch.no_grad(): + outputs = model(**batch) + num_seen_datapoints += len(batch['input_ids']) + predictions = torch.argmax(outputs.logits, dim=-1) + calc_precision.add_batch(predictions=predictions, + references=batch['labels']) + calc_recall.add_batch(predictions=predictions, + references=batch['labels']) + calc_f1.add_batch(predictions=predictions, references=batch['labels']) + total_loss += outputs.loss.item() + total_loss /= num_seen_datapoints + + precision = cast(dict, calc_precision.compute()) + recall = cast(dict, calc_recall.compute()) + f1 = cast(dict, calc_f1.compute()) + + return Metrics(precision['precision'], recall['recall'], f1['f1'], + total_loss) + + +# --------------------------------------------------------------------------- +def get_ner_metrics(model: Any, dataloader: DataLoader, + device: torch.device) -> Metrics: + """ + Compute model performance metrics for NER model + + Parameters: + `model`: Classification model + `dataloader`: DataLoader containing tokenized text entries and + corresponding labels + `device`: Torch device + + Return: + A `Metrics` NamedTuple + """ + # pylint: disable=too-many-locals + calc_seq_metrics = load_metric('seqeval') + total_loss = 0. + num_seen_datapoints = 0 + for batch in dataloader: + batch = {k: v.to(device) for k, v in batch.items()} + with torch.no_grad(): + outputs = model(**batch) + num_seen_datapoints += len(batch['input_ids']) + predictions = torch.argmax(outputs.logits, dim=-1) + predictions_array = predictions.detach().cpu().clone().numpy() + predictions_array = cast(np.ndarray, predictions) + + labels = cast(Tensor, batch['labels']) + labels_array = labels.detach().cpu().clone().numpy() + labels_array = cast(np.ndarray, labels) + + pred_labels, true_labels = convert_to_tags(predictions_array, + labels_array) + + calc_seq_metrics.add_batch(predictions=pred_labels, + references=true_labels) + + total_loss += outputs.loss.item() + total_loss /= num_seen_datapoints + + precision, recall, f1 = extract_metrics(calc_seq_metrics.compute()) + + return Metrics(precision, recall, f1, total_loss) + + +# --------------------------------------------------------------------------- +def extract_metrics(metric_dict: Optional[dict]) -> List[float]: + """ + Extract precision, recall, and F1 + + Parameters: + `metric_dict`: Dictionary of metrics + + Return: List of precision, recall, and F1 + """ + + if not metric_dict: + sys.exit('Unable to calculate metrics.') + + return [ + metric_dict[f'overall_{metric}'] + for metric in ['precision', 'recall', 'f1'] + ] + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + sys.exit('This file is a module, and is not meant to be run.') diff --git a/src/inventory_utils/ner_data_handler.py b/src/inventory_utils/ner_data_handler.py new file mode 100644 index 0000000..0f5e910 --- /dev/null +++ b/src/inventory_utils/ner_data_handler.py @@ -0,0 +1,156 @@ +""" +NER Data Handler +~~~ + +Functions for creating a dataloader for the NER task, +which includes preprocessing and tokenization. + +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import random +import sys +from functools import partial +from typing import List, NamedTuple, Optional, cast + +from datasets import load_dataset +from datasets.arrow_dataset import Batch +from datasets.dataset_dict import DatasetDict +from torch.utils.data import DataLoader +from transformers import (AutoTokenizer, DataCollatorForTokenClassification, + PreTrainedTokenizer) +from transformers.tokenization_utils_base import BatchEncoding + +from .constants import NER_TAG2ID + + +# --------------------------------------------------------------------------- +class RunParams(NamedTuple): + """ + Model and run parameters + + `model_name`: Huggingface model name + `batch_size`: Tokenization batch size + `num_train`: Number of training datapoints (optional) + """ + model_name: str + batch_size: int + num_train: Optional[int] = None + + +# --------------------------------------------------------------------------- +def get_dataloader(file: str, run_params: RunParams) -> DataLoader: + """ + Preprocess data and create dataloader + + Parameters: + `file`: Input file name + `run_params`: Model and run parameters + + Return: + A `DataLoader` with preprocessed data + """ + + dataset = load_dataset('pandas', data_files={'set': file}) + dataset = cast(DatasetDict, dataset) # Cast for type checker + + tokenizer = AutoTokenizer.from_pretrained(run_params.model_name, + add_prefix_space=True) + collator = DataCollatorForTokenClassification(tokenizer=tokenizer) + + tokenize_align_labels_with_tokenizer = partial(tokenize_align_labels, + tokenizer=tokenizer) + tokenized_dataset = dataset.map(tokenize_align_labels_with_tokenizer, + batched=True, + remove_columns=dataset['set'].column_names) + + if run_params.num_train: + tokenized_dataset['set'] = tokenized_dataset['set'].select( + random.sample(range(dataset['set'].num_rows), + k=run_params.num_train)) + + dataloader = DataLoader( + tokenized_dataset['set'], # type:ignore + shuffle=True, + collate_fn=collator, + batch_size=run_params.batch_size) + + return dataloader + + +# --------------------------------------------------------------------------- +def tokenize_align_labels(dataset: Batch, + tokenizer: PreTrainedTokenizer) -> BatchEncoding: + """ + Tokenize sequences of `words` and align numeric tags to tokens + based on provided `ner_tags` + + Parameters: + `dataset`: Batch of a `Dataset` + `tokenizer`: Tokenizer for sequence tokenization + + Return: Batch of tokenized dataset with labels + """ + + tokenized_inputs = tokenizer(dataset['words'], + truncation=True, + is_split_into_words=True) + + new_labels = [] + for i, labels in enumerate(dataset['ner_tags']): + labels = [NER_TAG2ID[x] for x in labels] + word_ids = tokenized_inputs.word_ids(i) + new_labels.append(align_labels_with_tokens(labels, word_ids)) + + tokenized_inputs['labels'] = new_labels + + return tokenized_inputs + + +# --------------------------------------------------------------------------- +def align_labels_with_tokens(labels: List[int], + word_ids: List[Optional[int]], + cls_token: int = -100) -> List[int]: + """ + Apply labels to all word indices from the tokenized sequence + + Parameters: + `labels`: NER labels for the original words in sequence + `word_ids`: Word indices of tokenized sequence + `cls_token`: Value to assign for CLS tokens + + Return: Labels for each word index + """ + + label_dict = dict(zip(set(word_ids), [*labels, cls_token])) + + new_labels = [label_dict.get(id, cls_token) for id in word_ids] + + new_labels[1:] = [ + curr + 1 if curr == last and curr % 2 == 1 else curr + for curr, last in zip(new_labels[1:], new_labels) + ] + + return new_labels + + +# --------------------------------------------------------------------------- +def test_align_labels_with_tokens() -> None: + """ Test align_labels_with_tokens() """ + + in_labels = [1, 0, 0] + word_ids = [None, 0, 0, 0, 1, 2, 2, 2, None] + out_labels = [-100, 1, 2, 2, 0, 0, 0, 0, -100] + + assert align_labels_with_tokens(in_labels, word_ids) == out_labels + + in_labels = [0, 3, 4, 4, 4, 0, 0] + word_ids = [None, 0, 1, 2, 3, 4, 4, 5, 6, 6, None] + out_labels = [-100, 0, 3, 4, 4, 4, 4, 0, 0, 0, -100] + + assert align_labels_with_tokens(in_labels, word_ids) == out_labels + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + sys.exit('This file is a module, and is not meant to be run.') diff --git a/src/inventory_utils/runtime.py b/src/inventory_utils/runtime.py new file mode 100644 index 0000000..7b9ee9a --- /dev/null +++ b/src/inventory_utils/runtime.py @@ -0,0 +1,42 @@ +""" +Runtime +~~~ + +Functions that modify or detect the runtime. + +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import sys + +import torch + + +# --------------------------------------------------------------------------- +def set_random_seed(seed: int): + """ + Set random seed for deterministic outcome of ML-trained models + + `seed`: Value to use for seed + """ + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + +# --------------------------------------------------------------------------- +def get_torch_device() -> torch.device: + """ + Get device for torch + + Return: + `torch.device` either "cuda" or "cpu" + """ + + return torch.device('cuda') if torch.cuda.is_available() else torch.device( + 'cpu') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + sys.exit('This file is a module, and is not meant to be run.') diff --git a/src/inventory_utils/wrangling.py b/src/inventory_utils/wrangling.py new file mode 100644 index 0000000..9a33366 --- /dev/null +++ b/src/inventory_utils/wrangling.py @@ -0,0 +1,419 @@ +""" +Wrangling +~~~ + +Functions for wrangling, cleaning, and splitting data. + +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import io +import logging +import math +import re +import sys +from typing import List, Optional, TextIO, Tuple, Union + +import numpy as np +import pandas as pd +import pytest +from numpy import array +from pandas.testing import assert_frame_equal +from sklearn.model_selection import train_test_split + +from .aliases import TaggedBatch +from .constants import ID2NER_TAG +from .custom_classes import Splits + + +# --------------------------------------------------------------------------- +def split_df(df: pd.DataFrame, rand_seed: bool, splits: List[float]) -> Splits: + """ + Split manually curated data into train, validation and test sets + + Parameters: + `df`: Manually curated classification data + `rand_seed`: Optionally use random seed + `splits`: Proportions of data for [train, validation, test] + + Return: + `Splits` containing train, validation, and test dataframes + """ + + seed = 241 if rand_seed else None + + _, val_split, test_split = splits + val_test_split = val_split + test_split + + train, val_test = train_test_split(df, + test_size=val_test_split, + train_size=1 - val_test_split, + random_state=seed) + val, test = train_test_split(val_test, + test_size=test_split / val_test_split, + train_size=1 - (test_split / val_test_split), + random_state=seed) + + return Splits(train, val, test) + + +# --------------------------------------------------------------------------- +@pytest.fixture(name='unsplit_data') +def fixture_unsplit_data() -> pd.DataFrame: + """ Example dataframe for testing splitting function """ + + df = pd.DataFrame([[123, 'First title', 'First abstract', 0], + [456, 'Second title', 'Second abstract', 1], + [789, 'Third title', 'Third abstract', 0], + [321, 'Fourth title', 'Fourth abstract', 1], + [654, 'Fifth title', 'Fifth abstract', 0], + [987, 'Sixth title', 'Sixth abstract', 1], + [741, 'Seventh title', 'Seventh abstract', 0], + [852, 'Eighth title', 'Eighth abstract', 1]], + columns=['id', 'title', 'abstract', 'curation_score']) + + return df + + +# --------------------------------------------------------------------------- +def test_random_split(unsplit_data: pd.DataFrame) -> None: + """ Test that split_df() gives correct proportions """ + + in_df = unsplit_data + + train, val, test = split_df(in_df, False, [0.5, 0.25, 0.25]) + + assert len(train.index) == 4 + assert len(val.index) == 2 + assert len(test.index) == 2 + + +# --------------------------------------------------------------------------- +def test_seeded_split(unsplit_data: pd.DataFrame) -> None: + """ Test that split_df() behaves deterministically """ + + in_df = unsplit_data + + train, val, test = split_df(in_df, True, [0.5, 0.25, 0.25]) + + assert list(train['id'].values) == [321, 789, 741, 654] + assert list(val['id'].values) == [987, 456] + assert list(test['id'].values) == [852, 123] + + +# --------------------------------------------------------------------------- +def strip_xml(text: str) -> str: + """ + Strip XML tags from a string + + Parameters: + `text`: String possibly containing XML tags + + Return: + String without XML tags + """ + # If header tag between two adjacent strings, replace with a space + pattern = re.compile( + r'''(?<=[\w.?!]) # Header tag must be preceded by word + () # Header tag has letter h and number + (?=[\w]) # Header tag must be followed by word''', re.X) + text = re.sub(pattern, ' ', text) + + # Remove all other XML tags + text = re.sub(r'<[\w/]+>', '', text) + + return text + + +# --------------------------------------------------------------------------- +def test_strip_xml() -> None: + """ Test strip_xml() """ + + assert strip_xml('

Supplementary info

') == 'Supplementary info' + assert strip_xml('H2O2') == 'H2O2' + assert strip_xml( + 'the Bacillus pumilus group.') == 'the Bacillus pumilus group.' + + # If there are not spaces around header tags, add them + assert strip_xml( + 'MS/MS spectra.

Availability') == 'MS/MS spectra. Availability' + assert strip_xml('http://proteomics.ucsd.edu/Software.html

Contact' + ) == 'http://proteomics.ucsd.edu/Software.html Contact' + assert strip_xml( + '

Summary

Neuropeptides') == 'Summary Neuropeptides' + assert strip_xml('

Wow!

Go on') == 'Wow! Go on' + + +# --------------------------------------------------------------------------- +def strip_newlines(text: str) -> str: + """ + Remove all newline characters from string + + Parameters: + `text`: String + + Return: string without newlines + """ + + return re.sub('\n', '', text) + + +# --------------------------------------------------------------------------- +def test_strip_newlines() -> None: + """ Test strip_newlines() """ + + assert strip_newlines('Hello, \nworld!') == 'Hello, world!' + + +# --------------------------------------------------------------------------- +def concat_title_abstract(df: pd.DataFrame) -> pd.DataFrame: + """ + Concatenate abstract and title columns + + Parameters: + `df`: Dataframe with columns "title" and "abstract" + + Return: + A `pd.DataFrame` with new column "title_abstract" + """ + + df['title_abstract'] = df['title'].map(add_period) + ' ' + df['abstract'] + + return df + + +# --------------------------------------------------------------------------- +def test_concat_title_abstract() -> None: + """ Test concat_title_abstract() """ + + in_df = pd.DataFrame([['A Descriptive Title', 'A detailed abstract.']], + columns=['title', 'abstract']) + + out_df = pd.DataFrame([[ + 'A Descriptive Title', 'A detailed abstract.', + 'A Descriptive Title. A detailed abstract.' + ]], + columns=['title', 'abstract', 'title_abstract']) + + assert_frame_equal(concat_title_abstract(in_df), out_df) + + +# --------------------------------------------------------------------------- +def add_period(text: str) -> str: + """ + Add period to end of sentence if punctuation not present + + Parameters: + `text`: String that may be missing final puncturation + + Return: + `text` with final punctuation + """ + + if not text: + return '' + + return text if text[-1] in '.?!' else text + '.' + + +# --------------------------------------------------------------------------- +def test_add_period() -> None: + """ Test add_poeriod() """ + + assert add_period('') == '' + assert add_period('A statement.') == 'A statement.' + assert add_period('A question?') == 'A question?' + assert add_period('An exclamation!') == 'An exclamation!' + assert add_period('An incomplete') == 'An incomplete.' + + +# --------------------------------------------------------------------------- +def preprocess_data(file: TextIO) -> pd.DataFrame: + """ + Strip XML tags and newlines and concatenate title and abstract columns + + Parameters: + `file`: Input file handle + + Returns: + a `pd.DataFrame` of preprocessed data + """ + + df = pd.read_csv(file, dtype=str) + + if not all(map(lambda c: c in df.columns, ['id', 'title', 'abstract'])): + sys.exit(f'Data file {file.name} must contain columns ' + 'labeled "title" and "abstract".') + + df = df.fillna('') + df = df[~df.duplicated('id')] + df = df[df['id'] != ''] + + for col in ['title', 'abstract']: + df[col] = df[col].apply(strip_xml) + df[col] = df[col].apply(strip_newlines) + + df = concat_title_abstract(df) + + return df + + +# --------------------------------------------------------------------------- +def test_preprocess_data() -> None: + """ Test preprocess_data() """ + + in_fh = io.StringIO('id,title,abstract\n' + '123,A Descriptive Title,A detailed abstract.\n' + '456,Another title,Another abstract.\n' + '456,Another title,Another abstract.\n' + ',This one should go,now\n') + + out_df = pd.DataFrame( + [[ + '123', 'A Descriptive Title', 'A detailed abstract.', + 'A Descriptive Title. A detailed abstract.' + ], + [ + '456', 'Another title', 'Another abstract.', + 'Another title. Another abstract.' + ]], + columns=['id', 'title', 'abstract', 'title_abstract']) + + assert_frame_equal(preprocess_data(in_fh), out_df) + + +# --------------------------------------------------------------------------- +def convert_to_tags( + batch_predictions: np.ndarray, + batch_labels: np.ndarray) -> Tuple[TaggedBatch, TaggedBatch]: + """ + Convert numeric labels to string tags + + Parameters: + `batch_predictions`: Predicted numeric labels of batch of sequences + `batch_labels`: True numeric labels of batch of sequences + + Return: Lists of tagged sequences of tokens + from predictions and true labels + """ + + true_labels = [[ + ID2NER_TAG[token_label] for token_label in seq_labels + if token_label != -100 + ] for seq_labels in batch_labels] + pred_labels = [[ + ID2NER_TAG[token_pred] + for (token_pred, token_label) in zip(seq_preds, seq_labels) + if token_label != -100 + ] for seq_preds, seq_labels in zip(batch_predictions, batch_labels)] + + return pred_labels, true_labels + + +# --------------------------------------------------------------------------- +def test_convert_to_tags() -> None: + """ Test convert_to_tags """ + + # Inputs + predictions = array([[0, 0, 1, 2, 2, 0, 3, 4, 0], + [0, 0, 0, 1, 0, 0, 0, 0, 0]]) + labels = array([[-100, 0, 1, 2, 2, 0, 3, 4, -100], + [-100, 0, 0, 3, -100, -100, -100, -100, -100]]) + + # Expected outputs + exp_pred = [['O', 'B-COM', 'I-COM', 'I-COM', 'O', 'B-FUL', 'I-FUL'], + ['O', 'O', 'B-COM']] + exp_labels = [['O', 'B-COM', 'I-COM', 'I-COM', 'O', 'B-FUL', 'I-FUL'], + ['O', 'O', 'B-FUL']] + + res_pred, res_labels = convert_to_tags(predictions, labels) + + assert exp_pred == res_pred + assert exp_labels == res_labels + + +# --------------------------------------------------------------------------- +def join_commas(ls: List[str], remove_empty: bool = False) -> str: + """ + Create a string by placing a comma and space between each element in a + list of strings. + + Parameters + `ls`: List of strings + `remove_empty`: Optionally, filter out empty strings before joining + + Return: Joined string + """ + + if remove_empty: + ls = [item for item in ls if item != ''] + + ls = [str(item) for item in ls] + return ', '.join(ls) + + +# --------------------------------------------------------------------------- +def test_join_commas() -> None: + """ Test join_commas() """ + + assert join_commas(['', ''], True) == '' + assert join_commas(['foo']) == 'foo' + assert join_commas(['foo', 'bar', 'baz']) == 'foo, bar, baz' + + +# --------------------------------------------------------------------------- +def chunk_rows( + in_item: Union[pd.DataFrame, pd.Series], + chunk_size: Optional[int]) -> List[Union[pd.DataFrame, pd.Series]]: + """ + Separate input dataframe or series into a list of dataframes (or series), + each with ~`chunk_size` rows. + + Parameters: + `in_item`: Input dataframe or series + `chunk_size`: Maximum number of rows per chunk + + Return: List of dataframes or series + """ + + if not chunk_size: + return [in_item] + + logging.debug('Splitting data into ~%d-row chunks', chunk_size) + chunks = [] + num_chunks = math.ceil(len(in_item) / chunk_size) + for chunk in np.array_split(in_item, num_chunks): + chunks.append(chunk) + + return chunks + + +# --------------------------------------------------------------------------- +def test_chunk_df() -> None: + """ Test chunk_df() """ + + in_df = pd.DataFrame([['foo', 'bar'], ['baz', 'qux'], ['quux', 'quuz'], + ['corge', 'grault']], + columns=['col_a', 'col_b']) + + # Return whole dataframe if no chunk size + chunks = chunk_rows(in_df, None) + assert len(chunks) == 1 + + chunks = chunk_rows(in_df, 2) + assert len(chunks) == 2 + + chunks = chunk_rows(in_df, 1) + assert len(chunks) == 4 + + chunks = chunk_rows(in_df, 3) + assert len(chunks) == 2 + + # Also handles pd.Series + chunks = chunk_rows(in_df['col_a'], 2) + assert len(chunks) == 2 + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + sys.exit('This file is a module, and is not meant to be run.') diff --git a/src/model_picker.py b/src/model_picker.py new file mode 100755 index 0000000..2cefaea --- /dev/null +++ b/src/model_picker.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +Purpose: Choose model based on highest metric of choice +Authors: Kenneth Schackart +""" + +import argparse +import os +from typing import BinaryIO, Dict, List, NamedTuple, Union, cast + +import pandas as pd +import torch + +from inventory_utils.custom_classes import CustomHelpFormatter, Metrics + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + checkpoints: List[BinaryIO] + metric: str + out_dir: str + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Choose model with highest validation metric of choice', + formatter_class=CustomHelpFormatter) + + parser.add_argument('checkpoints', + nargs='+', + metavar='FILE', + type=argparse.FileType('rb'), + help='Model checkpoints to be compared') + parser.add_argument('-m', + '--metric', + metavar='METRIC', + choices=['f1', 'precision', 'recall'], + default='f1', + type=str, + help='Metric to use for choosing best model') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + + args = parser.parse_args() + + return Args(args.checkpoints, args.metric, args.out_dir) + + +# --------------------------------------------------------------------------- +def get_metrics(checkpoint_fh: BinaryIO) -> Dict[str, Union[float, str]]: + """ + Retrieve the validation metrics from model checkpoint + + Parameters: + `checkpoint_fh`: Trained model checkpoint + + Return: Dictionary of validation set metrics + """ + + checkpoint = torch.load(checkpoint_fh) + metrics = cast(Metrics, checkpoint['val_metrics']) + + return { + 'f1': metrics.f1, + 'precision': metrics.precision, + 'recall': metrics.recall, + 'loss': metrics.loss + } + + +# --------------------------------------------------------------------------- +def get_best_model(df: pd.DataFrame, metric: str) -> str: + """ + Determine best model from the training stats + + Parameters: + `df`: Chosen models dataframe + `metric`: Metric to use as criteria + + Return: Best model checkpoint + """ + + best_model = df.sort_values(by=[metric, 'loss'], ascending=[False, + True]).iloc[0] + + return best_model['checkpt'] + + +# --------------------------------------------------------------------------- +def test_get_best_model() -> None: + """ Test get_best_model() """ + + cols = ['f1', 'precision', 'recall', 'loss', 'checkpt'] + + in_df = pd.DataFrame([[ + 0.92, + 0.926, + 0.82, + 0.008, + 'scibert.pt', + ], [0.87, 0.926, 0.82, 0.007, 'biobert.pt']], + columns=cols) + + # scibert has higher f1 + assert get_best_model(in_df, 'f1') == 'scibert.pt' + + # They have same precision, but biobert has lower loss on that epoch + assert get_best_model(in_df, 'precision') == 'biobert.pt' + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + + if not os.path.isdir(args.out_dir): + os.makedirs(args.out_dir) + + all_metrics = pd.DataFrame() + for checkpoint in args.checkpoints: + metrics = get_metrics(checkpoint) + metrics['checkpt'] = checkpoint.name + all_metrics = pd.concat( + [all_metrics, pd.DataFrame(metrics, index=[0])]) + + best_model = get_best_model(all_metrics, args.metric) + + out_file = os.path.join(args.out_dir, 'best_checkpt.txt') + with open(out_file, 'wt') as out_fh: + print(best_model, file=out_fh) + + print(f'Best model checkpoint is {best_model}.') + print(f'Done. Wrote output to {out_file}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/ner_data_generator.py b/src/ner_data_generator.py new file mode 100755 index 0000000..ded834f --- /dev/null +++ b/src/ner_data_generator.py @@ -0,0 +1,581 @@ +#!/usr/bin/env python3 +""" +Purpose: Split curated NER data into training, validation, and testing sets +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import argparse +import os +import string +import sys +from typing import List, NamedTuple, TextIO + +import nltk +import pandas as pd +from numpy.core.numeric import NaN +from pandas._testing.asserters import assert_series_equal +from pandas.testing import assert_frame_equal + +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.wrangling import (concat_title_abstract, split_df, + strip_xml) + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ + Command-line arguments + + `infile`: Input curated data filehandle + `outdir`: Output directory + `splits`: Train, val, test proportions + `seed`: Random seed + """ + infile: TextIO + outdir: str + splits: List[float] + seed: bool + + +# --------------------------------------------------------------------------- +class LabeledSentence(NamedTuple): + """ + Sentence labeled with BIO scheme + + `words`: List of words in sentence + `indices`: Word indices + `tags`: BIO tag per word + """ + words: List[str] + indices: List[int] + tags: List[str] + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser(description='Split curated NER data.', + formatter_class=CustomHelpFormatter) + + parser.add_argument('infile', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + default='data/manual_ner_extraction.csv', + help='Manually curated input file') + parser.add_argument('-o', + '--out-dir', + metavar='', + type=str, + default='data/', + help='Output directory') + parser.add_argument('--splits', + metavar='', + type=float, + nargs=3, + default=[0.7, 0.15, 0.15], + help='Proportions for train, val, test splits') + parser.add_argument('-r', + '--seed', + action='store_true', + help='Set random seed') + + args = parser.parse_args() + + if not sum(args.splits) == 1.0: + parser.error(f'--splits {args.splits} must sum to 1') + + return Args(args.infile, args.out_dir, args.splits, args.seed) + + +# --------------------------------------------------------------------------- +def check_input(df: pd.DataFrame) -> None: + """ + Check the input data columns + + Parameters: + `df`: Input dataframe + """ + + exp_cols = ['id', 'title', 'abstract', 'full_name', 'common_name'] + + if not all(col in df.columns for col in exp_cols): + sys.exit( + f'ERROR: Input data does not have the expected columns: {exp_cols}' + ) + + +# -------------------------------------------------------------------------- +def filter_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Filter input data for completeness and relevant columns + + Parameters: + `df`: Input data dataframe + + Return: Filtered dataframe + """ + + # Filter out rows that are missing both full_name and common_name + df = df.dropna(subset=['full_name', 'common_name'], how='all') + + df = df.reset_index(drop=True) + + return df[['id', 'title', 'abstract', 'full_name', 'common_name']] + + +# -------------------------------------------------------------------------- +def test_filter_data() -> None: + """ Test filter_data() """ + + in_df = pd.DataFrame( + [['123', 'A title', 'An abstract.', NaN, NaN, '', ''], + ['456', 'A title', 'An abstract.', 'full_name', NaN, '', ''], + ['789', 'A title', 'An abstract.', NaN, 'common_name', '', '']], + columns=[ + 'id', 'title', 'abstract', 'full_name', 'common_name', 'url', + 'short_description' + ]) + + out_df = pd.DataFrame( + [['456', 'A title', 'An abstract.', 'full_name', NaN], + ['789', 'A title', 'An abstract.', NaN, 'common_name']], + columns=['id', 'title', 'abstract', 'full_name', 'common_name']) + + assert_frame_equal(filter_data(in_df), out_df) + + +# -------------------------------------------------------------------------- +def clean_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Strip XML tags, replace NAs, deduplicate + + Parameters: + `df`: Input data dataframe + + Return: Cleaned dataframe + """ + + df['title'] = df['title'].apply(strip_xml) + df['abstract'] = df['abstract'].apply(strip_xml) + df = df.fillna('') + + df = df.drop_duplicates() + + return df + + +# -------------------------------------------------------------------------- +def test_clean_data() -> None: + """ Test clean_data() """ + + in_df = pd.DataFrame( + [['123', 'A title', 'An abstract.', 'full_name', NaN], + ['456', 'A dup title', 'A dup abstract.', 'full_name', 'common_name'], + ['456', 'A dup title', 'A dup abstract.', 'full_name', 'common_name'] + ], + columns=['id', 'title', 'abstract', 'full_name', 'common_name']) + + out_df = pd.DataFrame( + [['123', 'A title', 'An abstract.', 'full_name', ''], + ['456', 'A dup title', 'A dup abstract.', 'full_name', 'common_name'] + ], + columns=['id', 'title', 'abstract', 'full_name', 'common_name']) + + assert_frame_equal(clean_data(in_df), out_df) + + +# -------------------------------------------------------------------------- +def combine_rows(df: pd.DataFrame) -> pd.DataFrame: + """ + Combine rows of same id into single row + + Parameters: + `df`: Dataframe with potentially multiple rows per id + + Return: Dataframe with single row per id + """ + + out_df = pd.DataFrame( + columns=['id', 'title', 'abstract', 'full_name', 'common_name']) + for article_id, article in df.groupby('id'): + title = article.title.values[0] + abstract = article.abstract.values[0] + full_names = sorted(list(set(article.full_name.values))) + common_names = sorted(list(set(article.common_name.values))) + + row = pd.DataFrame( + [[article_id, title, abstract, full_names, common_names]], + columns=['id', 'title', 'abstract', 'full_name', 'common_name']) + out_df = pd.concat([out_df, row]) + + out_df = out_df.reset_index(drop=True) + + return out_df + + +# -------------------------------------------------------------------------- +def test_combine_rows() -> None: + """ Test combine_rows() """ + + in_df = pd.DataFrame( + [['123', 'MEGALEX', 'An abstract', '', 'MEGALEX'], + ['456', 'CircR2Cancer', 'circR2Cancer', 'foo', 'CircR2Cancer'], + ['456', 'CircR2Cancer', 'circR2Cancer', 'foo', 'circR2Cancer']], + columns=['id', 'title', 'abstract', 'full_name', 'common_name']) + + out_df = pd.DataFrame( + [['123', 'MEGALEX', 'An abstract', [''], ['MEGALEX']], + [ + '456', 'CircR2Cancer', 'circR2Cancer', ['foo'], + ['CircR2Cancer', 'circR2Cancer'] + ]], + columns=['id', 'title', 'abstract', 'full_name', 'common_name']) + + assert_frame_equal(combine_rows(in_df), out_df) + + +# --------------------------------------------------------------------------- +def restructure_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Create a row for each word in article title and abstract + Add sentence and word index columns + + Parameters: + `df`: Dataframe for single article with id, title_abstract, common_name + and full_name columns + + Return: + Dataframe with one row per token and with word and sentence indices + """ + + out_df = df.drop(['common_name', 'full_name'], axis='columns') + + out_df = df.set_index(['id'], append=True) + out_df = out_df.title_abstract.map(nltk.sent_tokenize).apply(pd.Series) + out_df = out_df.stack() + out_df = out_df.reset_index(level=2, drop=True) + out_df = out_df.reset_index(name='sentence') + out_df = out_df.set_index(['id'], append=True) + out_df = out_df.sentence.str.split(expand=True) + out_df = out_df.stack() + out_df = out_df.reset_index(name='word') + out_df = out_df.rename(columns={ + 'level_0': 'sent_idx', + 'level_2': 'word_idx', + 'id': 'pmid' + }) + + out_df = out_df[['pmid', 'sent_idx', 'word_idx', 'word']] + + return out_df + + +# --------------------------------------------------------------------------- +def test_restructure_df() -> None: + """ Test restructure_df() """ + + in_df = pd.DataFrame( + [[ + 456, 'The Auditory English Lexicon Project: A multi. (AELP) is a.', + ['Auditory English Lexicon Project'], ['AELP'] + ]], + columns=['id', 'title_abstract', 'full_name', 'common_name']) + + out_df = pd.DataFrame( + [[456, 0, 0, 'The'], [456, 0, 1, 'Auditory'], [456, 0, 2, 'English'], + [456, 0, 3, 'Lexicon'], [456, 0, 4, 'Project:'], [456, 0, 5, 'A'], + [456, 0, 6, 'multi.'], [456, 1, 0, '(AELP)'], [456, 1, 1, 'is'], + [456, 1, 2, 'a.']], + columns=['pmid', 'sent_idx', 'word_idx', 'word']) + + assert_frame_equal(restructure_df(in_df), out_df) + + +# --------------------------------------------------------------------------- +def assign_tags(words: pd.Series, full_names: List[str], + common_names: List[str]) -> pd.Series: + """ + Assign BIO tags to words in sequence + + Parameters: + `words`: Series of tokens stripped of punctuation + `full_name`: Resource long name + `common_name`: Resource common name + + Return: + Series of tags (`O`, `B-COM`, `I-COM`, `B-FUL`, or `I-FUL`) corresponding + to words in sequence + """ + + seq_len = len(words) + + tags = pd.Series(['O'] * seq_len) + for i in range(seq_len): + for common_name in common_names: + common_name_split = common_name.split(' ') + common_name_len = len(common_name_split) + if i + common_name_len <= seq_len: + if all(words[i:i + common_name_len] == common_name_split): + tags[i] = 'B-COM' + tags[i + 1:i + common_name_len] = 'I-COM' + for full_name in full_names: + full_name = '' if any(name in full_name + for name in common_names) else full_name + full_name_split = full_name.split(' ') + full_name_len = len(full_name_split) + if i + full_name_len <= seq_len: + if all(words[i:i + full_name_len] == full_name_split): + tags[i] = 'B-FUL' + tags[i + 1:i + full_name_len] = 'I-FUL' + + return tags + + +# --------------------------------------------------------------------------- +def test_assign_tags() -> None: + """ Test assign_tags() """ + + # Partial matches to named entities should not be tagged + words = pd.Series( + 'The database of peptide ligand DPL is a database'.split(' ')) + full_names = ['database of peptide ligand'] + common_names = ['DPL'] + tags = pd.Series( + ['O', 'B-FUL', 'I-FUL', 'I-FUL', 'I-FUL', 'B-COM', 'O', 'O', 'O']) + + assert_series_equal(assign_tags(words, full_names, common_names), tags) + + # Able to tag multiple entities per category + words = pd.Series('CircR2DNA is a database circR2DNA'.split(' ')) + full_names = [''] + common_names = ['circR2DNA', 'CircR2DNA'] + tags = pd.Series(['B-COM', 'O', 'O', 'O', 'B-COM']) + + assert_series_equal(assign_tags(words, full_names, common_names), tags) + + # Tokens cannot have multiple tags, so if common_name is in full_name + # Do not tag full_name + words = pd.Series('The Ensembl project is a thing'.split(' ')) + full_names = ['Ensembl project'] + common_names = ['Ensembl'] + tags = pd.Series(['O', 'B-COM', 'O', 'O', 'O', 'O']) + + assert_series_equal(assign_tags(words, full_names, common_names), tags) + + +# --------------------------------------------------------------------------- +def tag_article_tokens(df: pd.DataFrame) -> pd.DataFrame: + """ + Apply BIO tagging to single article dataframe + + Parameters: + `df`: Dataframe for single article with id, title_abstract, common_name + and full_name columns + + Return: + Dataframe with one row per word and with word and sentence indices and + BIO tags + """ + + full_name = df['full_name'].iloc[0] + common_name = df['common_name'].iloc[0] + out_df = restructure_df(df) + + words = out_df['word'].str.strip(string.punctuation) + + out_df['tag'] = assign_tags(words, full_name, common_name) + + out_df = out_df[['pmid', 'sent_idx', 'word_idx', 'tag', 'word']] + + return out_df + + +# --------------------------------------------------------------------------- +def test_tag_article_tokens() -> None: + """ Test tag_article_tokens() """ + + in_df = pd.DataFrame( + [[ + 456, 'The Auditory English Lexicon Project: A multi. (AELP) is a.', + ['Auditory English Lexicon Project'], ['AELP'] + ]], + columns=['id', 'title_abstract', 'full_name', 'common_name']) + + out_df = pd.DataFrame( + [[456, 0, 0, 'O', 'The'], [456, 0, 1, 'B-FUL', 'Auditory'], + [456, 0, 2, 'I-FUL', 'English'], [456, 0, 3, 'I-FUL', 'Lexicon'], + [456, 0, 4, 'I-FUL', 'Project:'], [456, 0, 5, 'O', 'A'], + [456, 0, 6, 'O', 'multi.'], [456, 1, 0, 'B-COM', '(AELP)'], + [456, 1, 1, 'O', 'is'], [456, 1, 2, 'O', 'a.']], + columns=['pmid', 'sent_idx', 'word_idx', 'tag', 'word']) + + assert_frame_equal(tag_article_tokens(in_df), out_df) + + +# --------------------------------------------------------------------------- +def BIO_scheme_transform(df: pd.DataFrame) -> pd.DataFrame: + """ + Perform BIO tagging for all articles in dataset + + Parameters: + `df`: Dataframe with one row per article including extracted resource + common name and full name + + Return: Dataframe with one row per word per article including indices + and BIO tags + """ + + df = concat_title_abstract(df) + + out_df = pd.DataFrame() + for _, article_df in df.groupby('id'): + tagged_df = tag_article_tokens(article_df) + out_df = pd.concat([out_df, tagged_df]) + + out_df = out_df.reset_index(drop=True) + + return out_df + + +# --------------------------------------------------------------------------- +def test_BIO_scheme_transform() -> None: + """ Test BIO_scheme_transform() """ + + in_df = pd.DataFrame( + [[ + 123, 'MEGALEX: A megastudy.', 'New database (MEGALEX) of.', + ['MEGALEX'], ['MEGALEX'] + ], + [ + 456, 'The Auditory English Lexicon Project: A multi.', + '(AELP) is a.', ['Auditory English Lexicon Project'], ['AELP'] + ]], + columns=['id', 'title', 'abstract', 'full_name', 'common_name']) + + out_df = pd.DataFrame( + [[123, 0, 0, 'B-COM', 'MEGALEX:'], [123, 0, 1, 'O', 'A'], + [123, 0, 2, 'O', 'megastudy.'], [123, 1, 0, 'O', 'New'], + [123, 1, 1, 'O', 'database'], [123, 1, 2, 'B-COM', '(MEGALEX)'], + [123, 1, 3, 'O', 'of.'], [456, 0, 0, 'O', 'The'], + [456, 0, 1, 'B-FUL', 'Auditory'], [456, 0, 2, 'I-FUL', 'English'], + [456, 0, 3, 'I-FUL', 'Lexicon'], [456, 0, 4, 'I-FUL', 'Project:'], + [456, 0, 5, 'O', 'A'], [456, 0, 6, 'O', 'multi.'], + [456, 1, 0, 'B-COM', '(AELP)'], [456, 1, 1, 'O', 'is'], + [456, 1, 2, 'O', 'a.']], + columns=['pmid', 'sent_idx', 'word_idx', 'tag', 'word']) + + assert_frame_equal(BIO_scheme_transform(in_df), out_df, check_dtype=False) + + +# --------------------------------------------------------------------------- +def group_tagged_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Group dataframe by pmid and sentence index + + Parameters: + `df`: Dataframe to be grouped + """ + df_grouped = df.groupby(['pmid', 'sent_idx']).agg(list).reset_index() + df_grouped = df_grouped.rename(columns={ + 'word': 'words', + 'tag': 'ner_tags' + }) + + return df_grouped + + +# --------------------------------------------------------------------------- +def test_group_tagged_df() -> None: + """ Test group_tagged_df() """ + + in_df = pd.DataFrame( + [[123, 0, 0, 'B-COM', 'MEGALEX:'], [123, 0, 1, 'O', 'A'], + [123, 0, 2, 'O', 'megastudy.'], [123, 1, 0, 'O', 'New'], + [123, 1, 1, 'O', 'database'], [123, 1, 2, 'B-COM', '(MEGALEX)'], + [123, 1, 3, 'O', 'of.'], [456, 0, 0, 'O', 'The'], + [456, 0, 1, 'B-FUL', 'Auditory'], [456, 0, 2, 'I-FUL', 'English'], + [456, 0, 3, 'I-FUL', 'Lexicon'], [456, 0, 4, 'I-FUL', 'Project:'], + [456, 0, 5, 'O', 'A'], [456, 0, 6, 'O', 'multi.'], + [456, 1, 0, 'B-COM', '(AELP)'], [456, 1, 1, 'O', 'is'], + [456, 1, 2, 'O', 'a.']], + columns=['pmid', 'sent_idx', 'word_idx', 'ner_tags', 'words']) + + out_df = pd.DataFrame( + [[ + 123, 0, [0, 1, 2], ['B-COM', 'O', 'O'], + ['MEGALEX:', 'A', 'megastudy.'] + ], + [ + 123, 1, [0, 1, 2, 3], ['O', 'O', 'B-COM', 'O'], + ['New', 'database', '(MEGALEX)', 'of.'] + ], + [ + 456, 0, [0, 1, 2, 3, 4, 5, 6], + ['O', 'B-FUL', 'I-FUL', 'I-FUL', 'I-FUL', 'O', 'O'], + [ + 'The', 'Auditory', 'English', 'Lexicon', 'Project:', 'A', + 'multi.' + ] + ], [456, 1, [0, 1, 2], ['B-COM', 'O', 'O'], ['(AELP)', 'is', 'a.']]], + columns=['pmid', 'sent_idx', 'word_idx', 'ner_tags', 'words']) + + assert_frame_equal(group_tagged_df(in_df), out_df, check_dtype=False) + + +# --------------------------------------------------------------------------- +def save_df(df: pd.DataFrame, filename: str) -> None: + """ + Save dataframe to pickle + + Parameters: + `df`: Dataframe to be pickled + `filename`: Output filename + """ + + df.to_pickle(filename) + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.outdir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + df = pd.read_csv(args.infile, dtype=str) + + check_input(df) + + df = combine_rows(clean_data(filter_data(df))) + + raw_train, raw_val, raw_test = split_df(df, args.seed, args.splits) + + train_df, val_df, test_df = map(BIO_scheme_transform, + [raw_train, raw_val, raw_test]) + + raw_train_out, raw_val_out, raw_test_out = map( + lambda f: os.path.join(out_dir, f), + ['train_ner.csv', 'val_ner.csv', 'test_ner.csv']) + + raw_train.to_csv(raw_train_out, index=False) + raw_val.to_csv(raw_val_out, index=False) + raw_test.to_csv(raw_test_out, index=False) + + train_out, val_out, test_out = map( + lambda f: os.path.join(out_dir, f), + ['train_ner.pkl', 'val_ner.pkl', 'test_ner.pkl']) + + save_df(group_tagged_df(train_df), train_out) + save_df(group_tagged_df(val_df), val_out) + save_df(group_tagged_df(test_df), test_out) + + print(f'Done. Wrote 6 files to {out_dir}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/ner_final_eval.py b/src/ner_final_eval.py new file mode 100755 index 0000000..083f6f9 --- /dev/null +++ b/src/ner_final_eval.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +""" +Purpose: Conduct evaluation on held-out test split +Authors: Kenneth Schackart +""" + +import argparse +import os +from typing import BinaryIO, NamedTuple + +from torch.utils.data.dataloader import DataLoader + +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.filing import get_ner_model, save_metrics +from inventory_utils.metrics import get_ner_metrics +from inventory_utils.ner_data_handler import RunParams, get_dataloader +from inventory_utils.runtime import get_torch_device + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + test_file: str + checkpoint: BinaryIO + out_dir: str + + +# --------------------------------------------------------------------------- +def get_args(): + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Evaluate model on held-out test set', + formatter_class=CustomHelpFormatter) + + parser.add_argument('-t', + '--test-file', + metavar='PKL', + type=str, + required=True, + help='Test data file (.pkl)') + parser.add_argument('-c', + '--checkpoint', + metavar='PT', + type=argparse.FileType('rb'), + required=True, + help='Trained model checkpoint') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Directory to output metrics') + + args = parser.parse_args() + + if ".pkl" not in args.test_file: + parser.error(f'Invalid input file "{args.test_file}". Must be .pkl') + + return Args(args.test_file, args.checkpoint, args.out_dir) + + +# --------------------------------------------------------------------------- +def get_test_dataloader(args: Args, model_name: str) -> DataLoader: + """ + Generate the dataloaders + + Parameters: + `args`: Command-line arguments + `model_name`: HuggingFace model name + + Return: + Test `DataLoader` + """ + + params = RunParams(model_name, 8) + + dataloader = get_dataloader(args.test_file, params) + + return dataloader + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + out_file = os.path.join(args.out_dir, 'metrics.csv') + + device = get_torch_device() + + model, model_name, _ = get_ner_model(args.checkpoint, device) + + dataloader = get_test_dataloader(args, model_name) + + test_metrics = get_ner_metrics(model, dataloader, device) + + save_metrics(model_name, test_metrics, out_file) + + print(f'Done. Wrote output to {out_dir}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/ner_predict.py b/src/ner_predict.py new file mode 100755 index 0000000..7ce463b --- /dev/null +++ b/src/ner_predict.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python3 +""" +Purpose: Use trained BERT model for named entity recognition +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import argparse +import os +import re +import string +from itertools import compress +from statistics import mean +from typing import BinaryIO, Dict, List, NamedTuple, TextIO, cast + +import pandas as pd +import torch +from pandas.testing import assert_frame_equal +from transformers.modeling_outputs import TokenClassifierOutput +from transformers.tokenization_utils import PreTrainedTokenizer +from transformers.tokenization_utils_base import CharSpan + +from inventory_utils.constants import ID2NER_TAG +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.filing import get_ner_model +from inventory_utils.runtime import get_torch_device +from inventory_utils.wrangling import preprocess_data + +pd.options.mode.chained_assignment = None + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + checkpoint: BinaryIO + infile: TextIO + out_dir: str + + +# --------------------------------------------------------------------------- +class SeqPrediction(NamedTuple): + """ + Attributes of predicted sequence labels + + `seq`: Original sequence + `word_ids`: List of word indices + `word_locs`: Dictionary giving character spans for each word + `preds`: Predicted labels + `probs`: Predicted label probability + """ + seq: str + word_ids: List[int] + word_locs: Dict[int, CharSpan] + preds: List[str] + probs: List[float] + + +# --------------------------------------------------------------------------- +class NamedEntity(NamedTuple): + """ + Predicted named entity + + `string`: String predicted to be a named entity + `label`: Predicted label + `prob`: Probability of predicted label + """ + string: str + label: str + prob: float + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Predict named entities using trained BERT model', + formatter_class=CustomHelpFormatter) + + parser.add_argument('-c', + '--checkpoint', + metavar='CHKPT', + type=argparse.FileType('rb'), + required=True, + help='Trained model checkpoint') + parser.add_argument('-i', + '--input-file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + required=True, + help='Input file for prediction') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Directory to output predictions') + + args = parser.parse_args() + + return Args(args.checkpoint, args.input_file, args.out_dir) + + +# --------------------------------------------------------------------------- +def convert_predictions(seq_preds: SeqPrediction) -> List[NamedEntity]: + """ + Convert raw predictions to meaningful predictions + + Parameters: + seq_preds: `SeqPrediction` output from model + + Return: List of `NamedEntity`s + """ + + entities: List[NamedEntity] = [] + began_entity = False + + for loc_id, span in seq_preds.word_locs.items(): + mask = [word_id == loc_id for word_id in seq_preds.word_ids] + labels = set(compress(seq_preds.preds, mask)) + probs = list(compress(seq_preds.probs, mask)) + substring = seq_preds.seq[span.start:span.end + 1] + if loc_id > 0: + if seq_preds.word_locs[loc_id - 1].end == span.start: + substring = seq_preds.seq[span.start + 1:span.end + 1] + if any(label[0] == 'B' for label in labels): + began_entity = True + label = list( + compress(labels, [label[0] == 'B' for label in labels]))[0] + entities.append(NamedEntity(substring, label, mean(probs))) + prob_count = len(probs) + elif any(label[0] == 'I' for label in labels) and substring != ' ': + if not began_entity: + began_entity = True + label = list( + compress(labels, [label[0] == 'I' for label in labels]))[0] + entities.append(NamedEntity(substring, label, mean(probs))) + prob_count = len(probs) + else: + last_entity = entities[-1] + prob = (last_entity.prob * prob_count + + sum(probs)) / (prob_count + len(probs)) + prob_count += len(probs) + entities[-1] = NamedEntity(last_entity.string + substring, + last_entity.label, prob) + else: + began_entity = False + + out_entities = [] + + for entity in entities: + if len(entity.string.strip()) == 1 or re.findall( + 'http', entity.string) or len(entity.string.strip()) > 100: + continue + + out_entities.append( + NamedEntity(entity.string.strip(), entity.label, entity.prob)) + + return out_entities + + +# --------------------------------------------------------------------------- +def test_convert_predictions() -> None: + """ Test convert_predictions() """ + + seq = 'ALCOdb: Gene Coexpression Database for Microalgae.' + word_ids = [0, 0, 0, 1, 2, 3, 3, 3, 4, 5, 6, 6, 7] + word_locs = { + 0: CharSpan(0, 6), + 1: CharSpan(6, 7), + 2: CharSpan(8, 12), + 3: CharSpan(13, 25), + 4: CharSpan(26, 34), + 5: CharSpan(35, 38), + 6: CharSpan(39, 49), + 7: CharSpan(49, 50) + } + preds = [ + 'B-COM', 'I-COM', 'I-COM', 'I-COM', 'B-FUL', 'I-FUL', 'I-FUL', 'I-FUL', + 'I-FUL', 'I-FUL', 'I-FUL', 'I-FUL', 'I-FUL' + ] + probs = [ + 0.9914268, 0.9947973, 0.9970761, 0.9951375, 0.98841196, 0.9884289, + 0.99392915, 0.9951815, 0.9865631, 0.99616784, 0.99818134, 0.9980192, + 0.90898293 + ] + + seq_preds = SeqPrediction(seq, word_ids, word_locs, preds, probs) + + expected = [ + NamedEntity('ALCOdb:', 'B-COM', 0.9944334), + NamedEntity('Gene Coexpression Database for Microalgae.', 'B-FUL', + 0.98376288) + ] + + assert convert_predictions(seq_preds) == expected + + seq = 'Inside outside inside inside (inside).' + word_ids = [0, 1, 2, 3, 4, 5, 6] + word_locs = { + 0: CharSpan(0, 6), + 1: CharSpan(7, 14), + 2: CharSpan(15, 21), + 3: CharSpan(22, 28), + 4: CharSpan(29, 30), + 5: CharSpan(30, 36), + 6: CharSpan(36, 37) + } + preds = ['I-COM', 'O', 'I-FUL', 'I-FUL', 'B-COM', 'I-COM', 'I-COM'] + probs = [0.996, 0.999, 0.998, 0.978, 0.99, 0.98, 0.97] + + seq_preds = SeqPrediction(seq, word_ids, word_locs, preds, probs) + + expected = [ + NamedEntity('Inside', 'I-COM', 0.996), + NamedEntity('inside inside', 'I-FUL', 0.988), + NamedEntity('(inside).', 'B-COM', 0.98) + ] + + assert convert_predictions(seq_preds) == expected + + # Check that single letter and URL entitites are removed + seq = 'A (https://hello.py)' + word_ids = [0, 1] + word_locs = {0: CharSpan(0, 1), 1: CharSpan(2, 20)} + preds = ['B-COM', 'B-FUL'] + probs = [0.996, 0.999] + + seq_preds = SeqPrediction(seq, word_ids, word_locs, preds, probs) + + expected = [] + + assert convert_predictions(seq_preds) == expected + + +# --------------------------------------------------------------------------- +def predict_sequence(model, device: torch.device, seq: str, + tokenizer: PreTrainedTokenizer) -> List[NamedEntity]: + """ + Run token prediction on sequence + + Parameters: + `model`: Trained token classification model + `device`: Device to use + `seq`: Input string/sequence + `tokenizer`: Pretrained tokenizer + + Return: List of `NamedEntity`s + """ + + with torch.no_grad(): + tokenized_seq = tokenizer(seq, + return_tensors="pt", + padding=True, + truncation=True, + max_length=512).to(device) + outputs = cast(TokenClassifierOutput, model(**tokenized_seq)) + logits = outputs.logits + preds = logits.argmax(dim=-1).cpu().numpy()[0][1:-1] + all_probs = torch.nn.functional.softmax(logits, + dim=-1).cpu().numpy()[0][1:-1] + probs = [prob[pred] for pred, prob in zip(preds, all_probs)] + labels = [ID2NER_TAG[pred] for pred in preds] + word_ids = tokenized_seq.word_ids()[1:-1] + word_locs = { + id: tokenized_seq.word_to_chars(id) + for id in set(word_ids) + } + + seq_preds = SeqPrediction(seq, word_ids, word_locs, labels, probs) + + return convert_predictions(seq_preds) + + +# --------------------------------------------------------------------------- +def predict(model, tokenizer: PreTrainedTokenizer, inputs: pd.DataFrame, + device: torch.device) -> pd.DataFrame: + """ + Perform NER prediction on rows of input dataframe + + Parameters: + `model`: Trained token classification model + `tokenizer`: Pretrained tokenizer + `inputs`: Input dataframe + `device`: Device to use + + Return: Dataframe containining one row per named entity including id, text, + mention, lable, and probability columns + """ + + pred_df = pd.DataFrame( + columns=['ID', 'text', 'publication_date', 'mention', 'label', 'prob']) + + for _, row in inputs.iterrows(): + seq = row['title_abstract'] + predicted_labels = predict_sequence(model, device, seq, tokenizer) + num_preds = len(predicted_labels) + mentions = [ + x.string.strip(string.punctuation) for x in predicted_labels + ] + labels = [x.label[2:] for x in predicted_labels] + probs = [x.prob for x in predicted_labels] + pred_df = pd.concat([ + pred_df, + pd.DataFrame({ + 'ID': [row['id']] * num_preds, + 'text': [seq] * num_preds, + 'publication_date': [row['publication_date']] * num_preds, + 'mention': mentions, + 'label': labels, + 'prob': probs + }) + ]) + + return pred_df + + +# --------------------------------------------------------------------------- +def deduplicate(df: pd.DataFrame) -> pd.DataFrame: + """ + Deduplicate predicted entities, keeping only highest probability for each + predicted named entity. Duplicates will still exist for distinct papers. + + Parameters: + df: Predicted entities dataframe + + Return: Deduplicated dataframe + """ + + unique_df = pd.DataFrame(columns=[*df.columns, 'count']) + out_df = pd.DataFrame(columns=df.columns) + + # First, remove exact duplicates of a named entity, assigning + # the highest probability found for that entity + for _, mention in df.groupby(['ID', 'mention']): + mention = mention.sort_values('prob', ascending=False) + mention['count'] = len(mention) + unique_df = pd.concat([unique_df, mention.head(1)]) + + unique_df['uncased_mention'] = unique_df['mention'].str.lower() + out_df = pd.DataFrame(columns=unique_df.columns) + + # Remove duplicates that differ only in case + # Choose which to keep by prioritizing number of occurences then prob + for _, mention in unique_df.groupby(['ID', 'uncased_mention']): + mention = mention.sort_values(['count', 'prob'], ascending=False) + out_df = pd.concat([out_df, mention.head(1)]) + + out_df.drop(['count', 'uncased_mention'], axis='columns', inplace=True) + out_df.reset_index(drop=True, inplace=True) + + return out_df + + +# --------------------------------------------------------------------------- +def test_deduplicate() -> None: + """ Test deduplicate() """ + + in_df = pd.DataFrame( + [[ + 123, 'SAVI Synthetically Accessible Virtual Inventory', 'SAVI', + 'COM', 0.98 + ], + [ + 123, 'SAVI Synthetically Accessible Virtual Inventory', + 'Synthetically Accessible Virtual Inventory', 'FUL', 0.64 + ], [456, 'PANTHER PANTHER PANTHER', 'PANTHER', 'COM', 0.67], + [456, 'PANTHER PANTHER PANTHER', 'PANTHER', 'COM', 0.95], + [456, 'PANTHER PANTHER PANTHER', 'PANTHER', 'COM', 0.55], + [789, 'MicrobPad MD (MicrobPad)', 'MicrobPad', 'FUL', 0.54], + [789, 'MicrobPad MD (MicrobPad)', 'MicrobPad', 'COM', 0.96], + [147, 'Chewie-NS Chewie-NS chewie-NS', 'Chewie-NS', 'COM', 0.88], + [147, 'Chewie-NS Chewie-NS chewie-NS', 'Chewie-NS', 'COM', 0.72], + [147, 'Chewie-NS Chewie-NS chewie-NS', 'chewie-NS', 'COM', 0.92]], + columns=['ID', 'text', 'mention', 'label', 'prob']) + + out_df = pd.DataFrame( + [[ + 123, 'SAVI Synthetically Accessible Virtual Inventory', 'SAVI', + 'COM', 0.98 + ], + [ + 123, 'SAVI Synthetically Accessible Virtual Inventory', + 'Synthetically Accessible Virtual Inventory', 'FUL', 0.64 + ], [147, 'Chewie-NS Chewie-NS chewie-NS', 'Chewie-NS', 'COM', 0.88], + [456, 'PANTHER PANTHER PANTHER', 'PANTHER', 'COM', 0.95], + [789, 'MicrobPad MD (MicrobPad)', 'MicrobPad', 'COM', 0.96]], + columns=['ID', 'text', 'mention', 'label', 'prob']) + + assert_frame_equal(deduplicate(in_df), out_df, check_dtype=False) + + +# --------------------------------------------------------------------------- +def reformat_output(df: pd.DataFrame) -> pd.DataFrame: + """ + Reformat output datframe to wide format + + Parameters: + `df`: Dataframe output by deduplicate() + + Return: + Wide-format datframe + """ + + df['prob'] = df['prob'].astype(str) + + # Add two dummy rows so that both COM and FUL are present as labels + df.loc[len(df)] = ['-1', 'foo bar', 'date hold', 'foo', 'COM', '0'] + df.loc[len(df)] = ['-1', 'foo bar', 'date hold', 'bar', 'FUL', '0'] + df = df[df['mention'] != ''] + + # For each article, aggregate multiple occurences + # of same label into single row + df2 = df['mention'].groupby( + [df.ID, df.text, df.publication_date, + df.label]).apply(list).reset_index() + df2['prob'] = df['prob'].groupby( + [df.ID, df.text, df.publication_date, + df.label]).apply(list).reset_index()['prob'] + + # Create combined column of mentions and their probs + df2['mention_prob'] = list(zip(df2['mention'], df2['prob'])) + + # Pivot to wide format, each label gets its own column + df2 = df2.pivot(index=['ID', 'text', 'publication_date'], + columns='label', + values='mention_prob') + + # Fill missing values + for col in [ + c for c in list(df2.columns) + if c not in ['ID', 'text', 'publication_date'] + ]: + isna = df2[col].isna() + df2.loc[isna, col] = pd.Series([[[''], ['']]] * isna.sum(), + dtype='object').values + + # Split mentions and probs to their own columns + # and drop the unsplit columns + df2[['common_name', 'common_prob']] = pd.DataFrame(df2['COM'].tolist(), + index=df2.index) + df2[['full_name', 'full_prob']] = pd.DataFrame(df2['FUL'].tolist(), + index=df2.index) + df2.drop(['COM', 'FUL'], inplace=True, axis='columns') + + # Convert lists of multiple mentions to string with commas between + for col in [ + c for c in list(df2.columns) + if c not in ['ID', 'text', 'publication_date'] + ]: + df2[col] = df2[col].fillna('') + df2[col] = df2[col].apply(', '.join) + + df2.reset_index(inplace=True) + + # Remove the dummy row + df2 = df2[df2['ID'] != '-1'] + + return df2 + + +# --------------------------------------------------------------------------- +def test_reformat_output() -> None: + """ Test reformat_output() """ + + in_df = pd.DataFrame( + [[ + 123, 'SAVI Synthetically Accessible Virtual Inventory', + '2011-01-01', 'SAVI', 'COM', 0.98 + ], + [ + 123, 'SAVI Synthetically Accessible Virtual Inventory', + '2011-01-01', 'Synthetically Accessible Virtual Inventory', 'FUL', + 0.64 + ], + [ + 147, 'Chewie-NS Chewie-NS chewie-NS', '2011-01-02', 'Chewie-NS', + 'COM', 0.88 + ], [ + 456, 'PANTHER PANTHER LION', '2011-01-03', 'PANTHER', 'COM', 0.95 + ], [456, 'PANTHER PANTHER LION', '2011-01-03', 'LION', 'COM', 0.92], + [ + 789, 'MicrobPad MD (MicrobPad)', '2011-01-04', 'MicrobPad', 'COM', + 0.96 + ]], + columns=['ID', 'text', 'publication_date', 'mention', 'label', 'prob']) + + out_df = pd.DataFrame([[ + 123, 'SAVI Synthetically Accessible Virtual Inventory', '2011-01-01', + 'SAVI', '0.98', 'Synthetically Accessible Virtual Inventory', '0.64' + ], + [ + 147, 'Chewie-NS Chewie-NS chewie-NS', + '2011-01-02', 'Chewie-NS', '0.88', '', '' + ], + [ + 456, 'PANTHER PANTHER LION', '2011-01-03', + 'PANTHER, LION', '0.95, 0.92', '', '' + ], + [ + 789, 'MicrobPad MD (MicrobPad)', '2011-01-04', + 'MicrobPad', '0.96', '', '' + ]], + columns=[ + 'ID', 'text', 'publication_date', 'common_name', + 'common_prob', 'full_name', 'full_prob' + ]) + + assert_frame_equal(reformat_output(in_df), + out_df, + check_names=False, + check_dtype=False) + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + + input_df = preprocess_data(args.infile) + + if not os.path.isdir(args.out_dir): + os.makedirs(args.out_dir) + + out_file = os.path.join(args.out_dir, 'predictions.csv') + + device = get_torch_device() + + model, _, tokenizer = get_ner_model(args.checkpoint, device) + + predictions = reformat_output( + deduplicate(predict(model, tokenizer, input_df, device))) + + predictions.to_csv(out_file, index=False) + + print(f'Done. Saved predictions to {out_file}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/ner_train.py b/src/ner_train.py new file mode 100755 index 0000000..65d6238 --- /dev/null +++ b/src/ner_train.py @@ -0,0 +1,348 @@ +#!/usr/bin/env python3 +""" +Purpose: Train NER model from pretrained BERT +Authors: Ana-Maria Istrate and Kenneth Schackart +""" + +import argparse +import copy +import os +from typing import Any, NamedTuple, Optional, Tuple, cast + +import pandas as pd +import torch +from torch.optim import AdamW +from torch.utils.data.dataloader import DataLoader +from tqdm.auto import tqdm +from transformers import (AutoModelForTokenClassification, get_scheduler, + optimization) + +from inventory_utils.constants import ID2NER_TAG, NER_TAG2ID +from inventory_utils.custom_classes import (CustomHelpFormatter, Metrics, + Settings) +from inventory_utils.filing import make_filenames, save_model, save_train_stats +from inventory_utils.metrics import get_ner_metrics +from inventory_utils.ner_data_handler import RunParams, get_dataloader +from inventory_utils.runtime import set_random_seed + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + train_file: str + val_file: str + out_dir: str + metric: str + model_name: str + learning_rate: float + weight_decay: float + num_training: int + num_epochs: int + batch_size: int + lr_scheduler: bool + model_checkpoint: Optional[str] + seed: bool + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Train BERT model for named entity recognition', + formatter_class=CustomHelpFormatter) + + inputs = parser.add_argument_group('Inputs and Outputs') + model_params = parser.add_argument_group('Model Parameters') + runtime_params = parser.add_argument_group('Runtime Parameters') + + inputs.add_argument('-t', + '--train-file', + metavar='FILE', + type=str, + required=True, + help='Training data file (.pkl)') + inputs.add_argument('-v', + '--val-file', + metavar='FILE', + type=str, + required=True, + help='Validation data file (.pkl)') + inputs.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Directory to output checkpt and loss plot') + + model_params.add_argument( + '-c', + '--metric', + metavar='METRIC', + choices=['f1', 'precision', 'recall'], + default='f1', + type=str, + help='Metric to use for choosing best model epoch') + model_params.add_argument('-m', + '--model-name', + metavar='', + type=str, + required=True, + help='Name of HuggingFace model') + model_params.add_argument('-rate', + '--learning-rate', + metavar='NUM', + type=float, + default=3e-5, + help='Learning rate') + model_params.add_argument('-decay', + '--weight-decay', + metavar='NUM', + type=float, + default=0.01, + help='Weight decay for learning rate') + + runtime_params.add_argument( + '-nt', + '--num-training', + metavar='INT', + type=int, + default=None, + help='Number of data points for training (default: all)') + runtime_params.add_argument('-ne', + '--num_epochs', + metavar='INT', + type=int, + default=3, + help='Number of epochs') + runtime_params.add_argument('-batch', + '--batch-size', + metavar='INT', + type=int, + default=16, + help='Batch size') + runtime_params.add_argument('-lr', + '--lr_scheduler', + action='store_true', + help='Use a learning rate scheduler') + runtime_params.add_argument('-r', + '--seed', + action='store_true', + help='Set random seed') + + args = parser.parse_args() + + for infile in [args.train_file, args.val_file]: + if ".pkl" not in infile: + parser.error(f'Invalid input file "{infile}". Must be .pkl') + + return Args(args.train_file, args.val_file, args.out_dir, args.metric, + args.model_name, args.learning_rate, args.weight_decay, + args.num_training, args.num_epochs, args.batch_size, + args.lr_scheduler, None, args.seed) + + +# --------------------------------------------------------------------------- +def get_dataloaders(args) -> Tuple[DataLoader, DataLoader]: + """ + Generate training and validation dataloaders + + Parameters: + `args`: Command-line arguments + + Return: training dataloader, validation dataloader + """ + + print('Generating training and validation dataloaders ...') + print('=' * 30) + + params = RunParams(args.model_name, args.batch_size, args.num_training) + train_dataloader = get_dataloader(args.train_file, params) + val_dataloader = get_dataloader(args.val_file, params) + + print('Finished generating dataloaders!') + print('=' * 30) + + return train_dataloader, val_dataloader + + +# --------------------------------------------------------------------------- +def initialize_model(args: Args, train_dataloader: DataLoader, + val_dataloader: DataLoader) -> Settings: + """ + Initialize the model and get settings + + `args`: Command-line arguments + `train_dataloader`: Training dataloader + `val_dataloader`: Validation dataloader + + Return: Training settings including model + """ + + print('Initializing', args.model_name, 'model ...') + print('=' * 30) + + model = AutoModelForTokenClassification.from_pretrained( + args.model_name, id2label=ID2NER_TAG, label2id=NER_TAG2ID) + device = torch.device( + 'cuda') if torch.cuda.is_available() else torch.device('cpu') + model.to(device) + optimizer = cast( + optimization.AdamW, + AdamW(model.parameters(), + lr=args.learning_rate, + weight_decay=args.weight_decay)) + num_training_steps = args.num_epochs * len(train_dataloader) + + if args.lr_scheduler: + lr_scheduler = get_scheduler('linear', + optimizer=optimizer, + num_warmup_steps=0, + num_training_steps=num_training_steps) + else: + lr_scheduler = None + + return Settings(model, optimizer, train_dataloader, val_dataloader, + lr_scheduler, args.num_epochs, num_training_steps, device) + + +# --------------------------------------------------------------------------- +def train(settings: Settings, + crit_metric: str) -> Tuple[Any, pd.DataFrame, Metrics, Metrics]: + """ + Train the classifier + + Parameters: + `settings`: Model settings (NamedTuple) + `crit_metric`: Metric used for selecting best epoch + + Return: Tuple of best model, training stats dataframe, train_metrics, + and validation_metrics + """ + + model = settings.model + progress_bar = tqdm(range(settings.num_training_steps)) + train_progress = pd.DataFrame(columns=[ + 'epoch', 'train_precision', 'train_recall', 'train_f1', 'train_loss', + 'val_precision', 'val_recall', 'val_f1', 'val_loss' + ]) + best_model = model + best_val = Metrics(0, 0, 0, 0) + best_train = Metrics(0, 0, 0, 0) + model.train() + + for epoch in range(settings.num_epochs): + + train_loss = train_epoch(settings, progress_bar) + + model.eval() + train_metrics = get_ner_metrics(model, settings.train_dataloader, + settings.device) + val_metrics = get_ner_metrics(model, settings.val_dataloader, + settings.device) + + if getattr(val_metrics, crit_metric) > getattr(best_val, crit_metric): + best_val = val_metrics + best_train = train_metrics + best_model = copy.deepcopy(model) + + epoch_row = pd.DataFrame( + { + 'epoch': epoch, + 'train_precision': train_metrics.precision, + 'train_recall': train_metrics.recall, + 'train_f1': train_metrics.f1, + 'train_loss': train_metrics.loss, + 'val_precision': val_metrics.precision, + 'val_recall': val_metrics.recall, + 'val_f1': val_metrics.f1, + 'val_loss': val_metrics.loss + }, + index=[0]) + train_progress = pd.concat([train_progress, epoch_row]) + + print(f'Epoch {epoch + 1}:\n' + f'Train Loss: {train_loss:.5f}\n' + f'Val Loss: {val_metrics.loss:.5f}\n' + f'Train Precision: {train_metrics.precision:.3f}\n' + f'Train Recall: {train_metrics.recall:.3f}\n' + f'Train F1: {train_metrics.f1:.3f}\n' + f'Val Precision: {val_metrics.precision:.3f}\n' + f'Val Recall: {val_metrics.recall:.3f}\n' + f'Val F1: {val_metrics.f1:.3f}') + + print('Finished model training!') + print('=' * 30) + print(f'Best Train Precision: {best_train.precision:.3f}\n' + f'Best Train Recall: {best_train.recall:.3f}\n' + f'Best Train F1: {best_train.f1:.3f}\n' + f'Best Val Precision: {best_val.precision:.3f}\n' + f'Best Val Recall: {best_val.recall:.3f}\n' + f'Best Val F1: {best_val.f1:.3f}\n') + + return best_model, train_progress, best_train, best_val + + +# --------------------------------------------------------------------------- +def train_epoch(settings: Settings, progress_bar: tqdm) -> float: + """ + Perform one epoch of model training + + Parameters: + `settings`: Model settings (NamedTuple) + `progress_bar`: tqdm instance for tracking progress + + Return: Average train loss per observation + """ + train_loss = 0 + num_train = 0 + for batch in settings.train_dataloader: + batch = {k: v.to(settings.device) for k, v in batch.items()} + num_train += len(batch['input_ids']) + outputs = settings.model(**batch) + loss = outputs.loss + loss.backward() + train_loss += loss.item() + settings.optimizer.step() + if settings.lr_scheduler: + settings.lr_scheduler.step() + settings.optimizer.zero_grad() + progress_bar.update(1) + return train_loss / num_train + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.mkdir(out_dir) + + model_name = args.model_name + train_dataloader, val_dataloader = get_dataloaders(args) + + if args.seed: + set_random_seed(45) + settings = initialize_model(args, train_dataloader, val_dataloader) + + print('Starting model training...') + print('=' * 30) + + model, train_stats_df, train_metrics, val_metrics = train( + settings, args.metric) + train_stats_df['model_name'] = model_name + + checkpt_filename, train_stats_filename = make_filenames(out_dir) + + save_model(model, model_name, train_metrics, val_metrics, checkpt_filename) + save_train_stats(train_stats_df, train_stats_filename) + + print('Done. Saved best checkpoint to', checkpt_filename) + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/process_countries.py b/src/process_countries.py new file mode 100755 index 0000000..a1867de --- /dev/null +++ b/src/process_countries.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +""" +Purpose: Extract country information from affiliations and + make IP countries consistent +Authors: Kenneth Schackart +""" + +import argparse +import os +import re +from typing import NamedTuple, TextIO + +import pandas as pd +import pycountry +from pandas.testing import assert_series_equal + +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.wrangling import join_commas + +pd.options.mode.chained_assignment = None + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + file: TextIO + out_dir: str + country_format: str + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + desc = ('Extract country information from affiliations ' + 'and make IP countries consistent') + parser = argparse.ArgumentParser(description=desc, + formatter_class=CustomHelpFormatter) + + parser.add_argument('file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='CSV file of inventory') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + parser.add_argument('-f', + '--format', + metavar='FMT', + type=str, + default='alpha-3', + choices=['alpha-2', 'alpha-3', 'full', 'numeric'], + help='ISO 3166-1 Country Code output format') + + args = parser.parse_args() + + return Args(args.file, args.out_dir, args.format) + + +# --------------------------------------------------------------------------- +def extract_countries(strings: pd.Series, country_format: str) -> pd.Series: + """ + Extract country names from column of strings + + Parameters: + `strings`: Column of strings that may contain country mentions + `country_format`: Country code output format + + Return: Column of extracted country names + """ + + countries = [] + for string in strings: + found_countries = [] + for country in pycountry.countries: + for country_code in [ + country.name, + country.alpha_3 # country.alpha_2 + ]: + matches = re.findall(fr'\b{country_code}\b', string) + + if not matches: + continue + + if country_format == 'alpha-2': + found_country = country.alpha_2 + elif country_format == 'alpha-3': + found_country = country.alpha_3 + elif country_format == 'numeric': + found_country = country.numeric + else: + found_country = country.name + + for _ in matches: + found_countries.append(found_country) + + countries.append(join_commas(found_countries)) + + return pd.Series(countries) + + +# --------------------------------------------------------------------------- +def test_extract_countries() -> None: + """ Test extract_countries() """ + + in_col = pd.Series([ + 'USA.', 'United States', 'The United States of America', + '605014, India.', 'France' + ]) + + # Can retrieve 3 character countrty codes + out_col = pd.Series(['USA', 'USA', 'USA', 'IND', 'FRA']) + assert_series_equal(extract_countries(in_col, 'alpha-3'), out_col) + + # Can retrieve countrty names + out_col = pd.Series( + ['United States', 'United States', 'United States', 'India', 'France']) + assert_series_equal(extract_countries(in_col, 'full'), out_col) + + # Can retrieve numeric country codes + out_col = pd.Series(['840', '840', '840', '356', '250']) + assert_series_equal(extract_countries(in_col, 'numeric'), out_col) + + # Can retrieve multiple instances from single row + # Returns empty string if none found + in_col = pd.Series(['Slovenia and Singapore', 'Portugal', '']) + out_col = pd.Series(['SGP, SVN', 'PRT', '']) + assert_series_equal(extract_countries(in_col, 'alpha-3'), out_col) + + # Keeps the original number of instances + in_col = pd.Series(['France, India, and France']) + out_col = pd.Series(['FRA, FRA, IND']) + assert_series_equal(extract_countries(in_col, 'alpha-3'), out_col) + + +# --------------------------------------------------------------------------- +def process_data(df: pd.DataFrame, country_format: str) -> pd.DataFrame: + """ + Process manually reviewed data. + + Parameters: + `df`: Manually reviewed dataframe + `country_format`: Country code output format + + Return: Processed dataframe + """ + + df['affiliation_countries'] = extract_countries(df['affiliation'], + country_format) + df['extracted_url_country'] = extract_countries( + df['extracted_url_country'], country_format) + + return df + + +# --------------------------------------------------------------------------- +def make_filename(out_dir: str, infile_name: str) -> str: + ''' + Make filename for output reusing input file's basename + + Parameters: + `out_dir`: Output directory + `infile_name`: Input file name + + Return: Output filename + ''' + + return os.path.join(out_dir, os.path.basename(infile_name)) + + +# --------------------------------------------------------------------------- +def test_make_filenames() -> None: + """ Test make_filenames() """ + + assert make_filename( + 'out/checked_urls', + 'out/urls/predictions.csv') == ('out/checked_urls/predictions.csv') + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + outfile = make_filename(out_dir, args.file.name) + + in_df = pd.read_csv(args.file, dtype=str).fillna('') + + out_df = process_data(in_df, args.country_format) + + out_df.to_csv(outfile, index=False) + + print(f'Done. Wrote output to {outfile}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/process_manual_review.py b/src/process_manual_review.py new file mode 100755 index 0000000..91b7675 --- /dev/null +++ b/src/process_manual_review.py @@ -0,0 +1,966 @@ +#!/usr/bin/env python3 +""" +Purpose: Process data that has been manually reviewed after flagging +Authors: Kenneth Schackart +""" + +import argparse +import itertools +import os +import re +import sys +from typing import List, NamedTuple, TextIO, Tuple + +import pandas as pd +import pytest +from pandas.testing import assert_frame_equal + +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.wrangling import join_commas +from process_names import wrangle_names + +pd.options.mode.chained_assignment = None + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + file: TextIO + out_dir: str + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + desc = 'Process data that has been manually reviewed after flagging' + parser = argparse.ArgumentParser(description=desc, + formatter_class=CustomHelpFormatter) + + parser.add_argument('file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='CSV file of articles') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + + args = parser.parse_args() + + return Args(args.file, args.out_dir) + + +# --------------------------------------------------------------------------- +@pytest.fixture(name='raw_data') +def fixture_raw_data() -> pd.DataFrame: + """ DataFrame representative of the input data """ + + columns = [ + 'ID', 'text', 'extracted_url', 'best_common', 'best_common_prob', + 'best_full', 'best_full_prob', 'best_name', 'best_name_prob', + 'article_count', 'duplicate_urls', 'duplicate_names', 'low_prob', + 'review_low_prob', 'review_dup_urls', 'review_dup_names', + 'review_notes_low_prob', 'review_notes_dup_urls', + 'review_notes_dup_names', 'publication_date' + ] + + df = pd.DataFrame( + [ + [ # Nothing to be done + '1', 'text1', 'url1', '', '', '', '', 'name1', '1.0', '1', '', + '', '', '', '', '', '', '', '', '1/1/2011' + ], + [ # Flagged low name probability, marked keep + '2', 'text2', 'url2', '', '', '', '', 'name2', '0.85', '1', '', + '', 'low_prob_best_name', 'do not remove', '', '', '', '', '', + '1/2/2011' + ], + [ # Flagged low name probability, marked remove + '3', 'text3', 'url3', '', '', '', '', 'name3', '0.85', '1', '', + '', 'low_prob_best_name', 'remove', '', '', '', '', '', + '1/3/2011' + ], + [ # Same URL as 5, marked do not merge + '4', 'text4', 'url4', '', '', '', '', 'name4', '0.96', '1', + '5.0', '', '', '', 'do not merge', '', '', '', '', '1/4/2011' + ], + [ # Same URL as 4, marked do not merge + '5', 'text5', 'url4', '', '', '', '', 'name5', '0.97', '1', + '4.0', '', '', '', 'do not merge', '', '', '', '', '1/5/2011' + ], + [ # Same URL as 7, marked merge + '6', 'text6', 'url6', 'name6', '0.96', '', '', 'name6', '0.96', + '1', '7.0', '', '', '', 'merge on record with best name prob', + '', '', '', '', '1/6/2011' + ], + [ # Same URL as 6, marked merge + '7', 'text7', 'url6', 'name7', '0.97', '', '', 'name7', '0.97', + '1', '6.0', '', '', '', 'merge on record with best name prob', + '', '', '', '', '1/7/2011' + ], + [ # Same name as 9, marked do not merge + '8', 'text8', 'url8', 'name8', '0.99', '', '', 'name8', '0.99', + '1', '', '9', '', '', '', 'do not merge', '', '', '', + '1/8/2011' + ], + [ # Same name as 8, marked do not merge + '9', 'text9', 'url9', 'name8', '0.99', '', '', 'name8', '0.99', + '1', '', '8', '', '', '', 'do not merge', '', '', '', + '1/9/2011' + ], + [ # Same name as 11, marked merge all "dup name" IDs + '10', 'text10', 'url10', 'name10', '0.99', '', '', 'name10', + '0.99', '1', '', '11', '', '', '', 'merge all "dup name" IDs', + '', '', '', '1/10/2011' + ], + [ # Same name as 10, marked merge all "dup name" IDs + '11', 'text11', 'url11', 'name10', '0.98', '', '', 'name10', + '0.98', '1', '', '10', '', '', '', 'merge all "dup name" IDs', + '', '', '', '1/11/2011' + ], + [ # Same name as 13 and 14, marked do not merge + '12', 'text12', 'url12', 'name12', '0.98', '', '', 'name12', + '0.98', '1', '', '13, 14', '', '', '', 'do not merge', '', '', + '', '1/12/2011' + ], + [ # Same name as 12 and 14 marked merge only: + '13', 'text13', 'url13', 'name12', '0.99', '', '', 'name12', + '0.99', '1', '', '12, 14', '', '', '', 'merge only:', '', '', + '13, 14', '1/13/2011' + ], + [ # Same name as 12 and 13, marked merge only: + '14', 'text14', 'url14', 'name12', '0.97', '', '', 'name12', + '0.98', '1', '', '12, 13', '', '', '', 'merge only:', '', '', + '13, 14', '1/14/2011' + ], + [ # Same name as 17 + '15', 'text15', 'url15', 'name15', '0.91', '', '', 'name15', + '0.91', '1', '', '17', 'low_prob_best_name', 'do not remove', + '', 'merge only:', '', '', '15, 17', '1/12/2011' + ], + [ # Same URL as 17, same name as 18 + '16', 'text17', 'url16', 'name16', '0.96', '', '', 'name16', + '0.96', '1', '17', '18', 'low_prob_best_name', 'do not remove', + 'merge on record with best name prob', 'do not merge', '', '', + '13, 14', '1/13/2011' + ], + [ # Same URL as 16, same name as 15 + '17', 'text17', 'url16', 'name15', '0.99', '', '', 'name15', + '0.99', '1', '16', '15', '', '', + 'merge on record with best name prob', 'merge only:', '', '', + '15, 17', '1/14/2011' + ], + [ # Same name as 16 + '18', 'text18', 'url18', 'name16', '0.98', '', '', 'name16', + '0.98', '1', '', '16', '', '', '', 'do not merge', '', '', + '13, 14', '1/14/2011' + ] + ], + columns=columns) + + return df + + +# --------------------------------------------------------------------------- +def ids_are_ok(string: str) -> bool: + """ + Check that the value in a column which should only contain IDs or a list + of IDs is valid + + Parameters: + `string`: Input string + + Return: `True` or `False` stating if the value is OK + """ + + # Can't be empty string + if not string: + return False + + allowable = '0123456789., ' + + return all(char in allowable for char in string) + + +# --------------------------------------------------------------------------- +def test_ids_are_ok() -> None: + """ Test ids_are_ok() """ + + assert ids_are_ok('123456') + assert ids_are_ok('123456.0') + assert ids_are_ok('123456.0, 789456.0') + assert ids_are_ok('123456, 789456') + + assert not ids_are_ok('') + assert not ids_are_ok('ACH alphabetic characters!') + + +# --------------------------------------------------------------------------- +def check_manual_columns(df: pd.DataFrame) -> str: + """ + Check that the correct manual review columns are present. + If any columns are missing, an error message is returned as a string. + If all columns are present, an empty string is returned. + + Parameters: + `df`: Input dataframe + + Return: Error message string + """ + + # These are hard-coded because if they change, the logic of the + # program will need to be updated. + manual_cols = [ + 'review_low_prob', 'review_dup_urls', 'review_dup_names', + 'review_notes_dup_names' + ] + + exit_message = '' + + for col in manual_cols: + if col not in df.columns: + exit_message += f'ERROR: Manual review column {col} is missing.\n' + + return exit_message + + +# --------------------------------------------------------------------------- +def test_check_manual_columns() -> None: + """ Test check_manual_columns()""" + + in_df = pd.DataFrame([['foo', 'bar', 'baz', 'qux']], + columns=['not', 'the', 'right', 'columns']) + + exit_message = check_manual_columns(in_df) + + assert exit_message != '' + assert exit_message.count('ERROR:') == 4 # All 4 columns are missing + assert exit_message.count('\n') == 4 + + in_df = pd.DataFrame([['foo', 'bar', 'baz', 'qux']], + columns=[ + 'review_low_prob', 'review_dup_urls', + 'review_dup_names', 'review_notes_dup_names' + ]) + + assert check_manual_columns(in_df) == '' + + +# --------------------------------------------------------------------------- +def check_for_responses(df: pd.DataFrame) -> str: + """ + Check that all flagged rows have been addressed in their appropricate + manual review columns. + Error message will give the ID of rows that are flagged but not + addressed. If all rows are okay, an empty string is returned. + + Parameters: + `df`: Input dataframe + + Return: Error message string + """ + + flag_cols = ['duplicate_urls', 'duplicate_names', 'low_prob'] + response_cols = ['review_dup_urls', 'review_dup_names', 'review_low_prob'] + + exit_message = '' + for flag_col, response_col in zip(flag_cols, response_cols): + unresponded = df[(df[flag_col] != '') & (df[response_col] == '')] + for row in unresponded.itertuples(): + exit_message += ('ERROR: Missing response to flagged column ' + f'"{flag_col}" for ID {row.ID}\n') + + return exit_message + + +# --------------------------------------------------------------------------- +def test_check_for_responses() -> None: + """ Test check_for_responses() """ + + columns = [ + 'ID', 'duplicate_urls', 'duplicate_names', 'low_prob', + 'review_low_prob', 'review_dup_urls', 'review_dup_names' + ] + + in_df = pd.DataFrame([['123', '456', '', '', '', '', ''], + ['456', '123', '789', '', '', '', ''], + ['789', '', '456', '', '', '', ''], + ['147', '', '', 'low_prob_best_name', '', '', '']], + columns=columns) + + exit_message = check_for_responses(in_df) + + assert exit_message != '' + assert re.findall('"duplicate_urls" for ID 123', exit_message) + assert re.findall('"duplicate_names" for ID 456', exit_message) + assert re.findall('"duplicate_names" for ID 789', exit_message) + assert re.findall('"low_prob" for ID 147', exit_message) + + in_df = pd.DataFrame( + [['123', '456', '', '', '', 'do not merge', ''], + ['456', '123', '789', '', '', 'do not merge', 'do not merge'], + ['789', '', '456', '', '', '', 'do not merge'], + ['147', '', '', 'low_prob_best_name', 'remove', '', '']], + columns=columns) + + exit_message = check_for_responses(in_df) + + assert exit_message == '' + + +# --------------------------------------------------------------------------- +def check_manual_column_values(df: pd.DataFrame) -> str: + """ + Check that each manual review columns only contains allowed values. + If any columns have invalid values, an error message is returned. + If all columns are okay, an empty string is returned. + + Parameters: + `df`: Input dataframe + + Return: Error message string + """ + + allowed_values = { + 'review_low_prob': + set(['', 'remove', 'do not remove']), + 'review_dup_urls': + set([ + '', 'merge on record with best name prob', 'do not merge', + 'conflicting record(s) to be removed' + ]), + 'review_dup_names': + set([ + '', 'merge all "dup name" IDs', 'do not merge', 'merge only:', + 'conflicting record(s) to be removed' + ]) + } + + exit_message = '' + for col in allowed_values: + col_values = set(df[col].unique()) + bad_values = col_values - allowed_values[col] + + if bad_values: + exit_message += (f'ERROR: Column "{col}" contains invalid values: ' + f'{bad_values}\n') + + return exit_message + + +# --------------------------------------------------------------------------- +def test_check_manual_column_values() -> None: + """ Test check_manual_column_values()""" + + in_df = pd.DataFrame([['foo', 'bar', 'baz', '']], + columns=[ + 'review_low_prob', 'review_dup_urls', + 'review_dup_names', 'review_notes_dup_names' + ]) + + exit_message = check_manual_column_values(in_df) + + assert exit_message != '' + assert exit_message.count('ERROR:') == 3 # All 3 columns have bad values + assert exit_message.count('\n') == 3 + + in_df = pd.DataFrame( + [['remove', 'conflicting record(s) to be removed', '', ''], + ['do not remove', 'merge on record with best name prob', '', ''], + ['do not remove', '', 'merge only:', '123, 456']], + columns=[ + 'review_low_prob', 'review_dup_urls', 'review_dup_names', + 'review_notes_dup_names' + ]) + + assert check_manual_column_values(in_df) == '' + + +# --------------------------------------------------------------------------- +def check_note_column_values(df: pd.DataFrame) -> str: + """ + Check that if a manual review column says "merge only:", the + corresponding note column contains IDs only. + If the notes column contains anything else, an error message is returned. + If the notes column is good, an empty string is returned. + + Parameters: + `df`: Input dataframe + + Return: Error message string + """ + + exit_message = '' + for row in df.itertuples(): + if row.review_dup_names == 'merge only:': + ids = row.review_notes_dup_names + if not ids_are_ok(ids): + exit_message += (f'ERROR: Invalid IDs for ID {row.ID} ' + 'in column "review_notes_dup_names": ' + f'{ids}.\n') + + return exit_message + + +# --------------------------------------------------------------------------- +def test_check_note_column_values() -> None: + """ Test check_note_column_values() """ + + in_df = pd.DataFrame([['123', '', '', 'merge only:', ''], + ['456', '', '', 'merge only:', 'Bad']], + columns=[ + 'ID', 'review_low_prob', 'review_dup_urls', + 'review_dup_names', 'review_notes_dup_names' + ]) + + exit_message = check_note_column_values(in_df) + + assert exit_message != '' + assert exit_message.count('ERROR:') == 2 + assert exit_message.count('\n') == 2 + assert re.findall('123', exit_message) + assert re.findall('456', exit_message) + + in_df = pd.DataFrame([[ + '123', 'remove', 'conflicting record(s) to be removed', '', '' + ], ['456', 'do not remove', 'merge on record with best name prob', '', ''], + ['789', '', '', 'merge only:', '123, 456']], + columns=[ + 'ID', 'review_low_prob', 'review_dup_urls', + 'review_dup_names', 'review_notes_dup_names' + ]) + + assert check_note_column_values(in_df) == '' + + +# --------------------------------------------------------------------------- +def check_data(df: pd.DataFrame) -> None: + """ + Check that dataframe has the necessary columns for processing, and that + only valid values are in the manually filled columns. Raise an excpetion + on invalid input. + + Parameters: + `df`: Input dataframe + + Return: `None` + """ + + exit_message = check_manual_columns(df) + + if exit_message: + sys.exit(exit_message) + + exit_message = check_for_responses(df) + exit_message += check_manual_column_values(df) + # exit_message += check_note_column_values(df) + + if exit_message: + sys.exit(exit_message) + + +# --------------------------------------------------------------------------- +def remove_decimals(in_string: str) -> str: + """ + Remove all decimal points and trailing 0's from a string + + Parameters: + `in_string`: Input string of IDs + + Return: String without decimals + """ + + return in_string.replace('.0', '') + + +# --------------------------------------------------------------------------- +def test_remove_decimals() -> None: + """ Test remove_decimals() """ + + assert remove_decimals('123456.0') == '123456' + assert remove_decimals('123456.0, 456789.0') == '123456, 456789' + assert remove_decimals('123456') == '123456' + + +# --------------------------------------------------------------------------- +def reformat_date(date: str) -> str: + """ + Reformat date from M/D/YYYY format to YYYY-MM-DD so that they can be + sorted as strings properly + + Parameters: + `date`: Date in M/D/YYYY format + + Return: Date in YYYY-MM-DD format + """ + + if date == '': + return date + + match = re.match( + r''' + (?P\d{1,2})/ # 1 or 2 digit month + (?P\d{1,2})/ # 1 or 2 digit day + (?P\d{4}) # 4 digit year + ''', date, re.X) + + if not match: + sys.exit('ERROR: Dates must be in M/D/YYYY format') + + year = match['year'] + month = match['month'] if len( + match['month']) == 2 else '0' + match['month'] + day = match['day'] if len(match['day']) == 2 else '0' + match['day'] + + return year + '-' + month + '-' + day + + +# --------------------------------------------------------------------------- +def test_reformat_date() -> None: + """ Test reformat_date() """ + + assert reformat_date('') == '' + assert reformat_date('1/1/2011') == '2011-01-01' + assert reformat_date('10/31/2012') == '2012-10-31' + + +# --------------------------------------------------------------------------- +def clean_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Clean dataframe before further processing. Remove decimal points from + columns with IDs + + Parameters: + `df`: Input dataframe + + Return: Cleaned dataframe + """ + + df['ID'] = df['ID'].map(remove_decimals) + df['duplicate_urls'] = df['duplicate_urls'].map(remove_decimals) + df['duplicate_names'] = df['duplicate_names'].map(remove_decimals) + df['review_notes_dup_names'] = df['review_notes_dup_names'].map( + remove_decimals) + df['publication_date'] = df['publication_date'].map(reformat_date) + df.drop('text', axis='columns', inplace=True) + + return df + + +# --------------------------------------------------------------------------- +def test_clean_df() -> None: + """ Test clean_df() """ + + in_df = pd.DataFrame( + [['123.0', 'text1', '456.0', '789.0', '147.0', '11/18/2021']], + columns=[ + 'ID', 'text', 'duplicate_urls', 'duplicate_names', + 'review_notes_dup_names', 'publication_date' + ]) + + out_df = pd.DataFrame([['123', '456', '789', '147', '2021-11-18']], + columns=[ + 'ID', 'duplicate_urls', 'duplicate_names', + 'review_notes_dup_names', 'publication_date' + ]) + + assert_frame_equal(clean_df(in_df), out_df) + + +# --------------------------------------------------------------------------- +def drop_low_probs(df: pd.DataFrame) -> pd.DataFrame: + """ + Remove rows that have been marked for removal for having incorrect + names that had been flagged due to low predictoin probability. + + Parameters: + `df`: Input dataframe + + Return: Dataframe with rows removed + """ + + df = df.copy() + + df = df[df['review_low_prob'] != 'remove'] + df = df.drop(['low_prob', 'review_low_prob', 'review_notes_low_prob'], + axis='columns') + + return df + + +# --------------------------------------------------------------------------- +def test_drop_low_probs(raw_data: pd.DataFrame) -> None: + """ Test drop_low_probs() """ + + out_df = drop_low_probs(raw_data) + remaining_ids = out_df['ID'].values + + assert '3' not in remaining_ids + assert '2' in remaining_ids + + assert all( + col not in out_df.columns + for col in ['low_prob', 'review_low_prob', 'review_notes_low_prob']) + + +# --------------------------------------------------------------------------- +def check_instructions(id_col: pd.Series, + review_col: pd.Series) -> Tuple[List[str], str]: + """ + Check that instructions for given rows are consistent. + If not, return a string describing the problem. + + Parameters: + `id_col`: Column of problematic IDs + `review_col`: Column of conflicting instructions + + Return: tuple(Unique set of instrucions, error string or empty string) + """ + + exit_message = '' + + unpacked_instructions = [elem.split(', ') for elem in review_col] + review_col_vals = pd.Series(itertools.chain(*unpacked_instructions)) + + instructions = review_col_vals.unique() + if len(instructions) != 1: + conflicts = [ + f'{id_i}: "{msg_i}"' + for id_i, msg_i in zip(id_col.values, review_col.values) + ] + exit_message = ( + f'ERROR: Conflicting instructions in column {review_col.name}:\n' + + '\n'.join(conflicts) + '\n') + + return instructions, exit_message + + +# --------------------------------------------------------------------------- +def test_check_instructions() -> None: + """ Test check_instructions() """ + + # Consistent instructions + df = pd.DataFrame([['123', 'do not merge'], ['456', 'do not merge'], + ['789', 'do not merge, do not merge']], + columns=['ID', 'review_dup_urls']) + + instructions, error_message = check_instructions(df['ID'], + df['review_dup_urls']) + + assert instructions == 'do not merge' + assert error_message == '' + + # Inconsistent instructions + df = pd.DataFrame([['123', 'merge on record with best name prob'], + ['456', 'do not merge']], + columns=['ID', 'review_dup_urls']) + + instructions, error_message = check_instructions(df['ID'], + df['review_dup_urls']) + + assert len(instructions) == 2 + assert error_message != '' + assert re.findall('456: "do not merge"', error_message) + + +# --------------------------------------------------------------------------- +def process_duplicate_urls(df: pd.DataFrame) -> pd.DataFrame: + """ + Process rows flagged for duplicate URLs + + Parameters: + `df`: Input dataframe + + Return: Dataframe + """ + + out_df = df[df['duplicate_urls'] == ''] + duplicate_url_df = df[df['duplicate_urls'] != ''] + exit_message = '' + cols_for_removal = [ + 'duplicate_urls', 'review_dup_urls', 'review_notes_dup_urls' + ] + + out_df.drop(cols_for_removal, axis='columns', inplace=True) + + for _, group_df in duplicate_url_df.groupby('extracted_url'): + + instructs, problem = check_instructions(group_df['ID'], + group_df['review_dup_urls']) + + if problem: + exit_message += problem + continue + + if instructs[0] == 'merge on record with best name prob': + group_df = group_df.sort_values( + 'best_name_prob', ascending=False).groupby('extracted_url') + group_df = group_df.agg({ + 'ID': join_commas, + 'best_common': join_commas, + 'best_common_prob': join_commas, + 'best_full': join_commas, + 'best_full_prob': join_commas, + 'article_count': len, + 'duplicate_names': 'first', + 'review_dup_names': 'first', + 'review_notes_dup_names': 'first', + 'publication_date': 'first' + }).reset_index() + + group_df = wrangle_names(group_df, 'best_common', + 'best_common_prob', 'best_full', + 'best_full_prob') + else: + group_df.drop(cols_for_removal, axis='columns', inplace=True) + + out_df = pd.concat([out_df, group_df]) + + if exit_message: + sys.exit(exit_message) + + return out_df + + +# --------------------------------------------------------------------------- +def test_process_duplicate_urls(raw_data: pd.DataFrame) -> None: + """ Test process_duplicate_urls() """ + + out_df = process_duplicate_urls(drop_low_probs(clean_df(raw_data))) + + # Previous rows not removed + assert '1' in out_df['ID'].values + assert '2' in out_df['ID'].values + + # Same URL, marked do not merge + assert '4' in out_df['ID'].values + assert '5' in out_df['ID'].values + + # Same URL, marked merge + assert '7, 6' in out_df['ID'].values + assert 'name6' not in out_df['best_name'].values + assert 'name7' in out_df['best_name'].values + + # Manual review columns removed + assert all(col not in out_df.columns for col in + ['duplicate_urls', 'review_dup_urls', 'review_notes_dup_urls']) + + +# --------------------------------------------------------------------------- +def process_duplicate_names(df: pd.DataFrame) -> pd.DataFrame: + """ + Process rows that are flagged for having duplicate names. + When records are merged with conflicting URLs, the newest URL is used. + + Parameters: + `df`: Input dataframe + + Return: Dataframe + """ + + out_df = df[df['duplicate_names'] == ''] + duplicate_name_df = df[df['duplicate_names'] != ''] + exit_message = '' + cols_for_removal = [ + 'duplicate_names', 'review_dup_names', 'review_notes_dup_names' + ] + + for _, group_df in duplicate_name_df.groupby('best_name'): + + do_not_merge = group_df[group_df['review_dup_names'] == 'do not merge'] + do_merge = group_df[group_df['review_dup_names'] != 'do not merge'] + + if len(do_merge) > 0: + _, problem = check_instructions(do_merge['ID'], + do_merge['review_dup_names']) + + if problem: + exit_message += problem + continue + + do_merge = do_merge.sort_values( + 'publication_date', ascending=False).groupby('best_name') + do_merge = do_merge.agg({ + 'ID': join_commas, + 'best_common': join_commas, + 'best_common_prob': join_commas, + 'best_full': join_commas, + 'best_full_prob': join_commas, + 'article_count': len, + 'extracted_url': 'first', + 'publication_date': 'first' + }).reset_index() + + do_merge = wrangle_names(do_merge, 'best_common', + 'best_common_prob', 'best_full', + 'best_full_prob') + + out_df = pd.concat([out_df, do_merge, do_not_merge]) + + out_df.drop(cols_for_removal, axis='columns', inplace=True) + + if exit_message: + sys.exit(exit_message) + + return out_df + + +# --------------------------------------------------------------------------- +def test_process_duplicate_names(raw_data: pd.DataFrame) -> None: + """ Test process_duplicate_names """ + + out_df = process_duplicate_names( + process_duplicate_urls(drop_low_probs(clean_df(raw_data)))) + + # Previous rows not removed + assert '1' in out_df['ID'].values + assert '2' in out_df['ID'].values + assert '4' in out_df['ID'].values + + # Same name, marked do not merge + assert '8' in out_df['ID'].values + assert '9' in out_df['ID'].values + + # Same name, marked merge all "dup name" IDs + assert '11, 10' in out_df['ID'].values + assert 'url10' not in out_df['extracted_url'].values + assert 'url11' in out_df['extracted_url'].values + + # Same name, merge only some + assert '12' in out_df['ID'].values + assert '14, 13' in out_df['ID'].values + assert 'url12' in out_df['extracted_url'].values + assert 'url14' in out_df['extracted_url'].values + + # Manual review columns removed + assert all( + col not in out_df.columns for col in + ['duplicate_names', 'review_dup_names', 'review_notes_dup_names']) + + +# --------------------------------------------------------------------------- +def count_articles(id_list: str) -> str: + """ + Count the number of article IDs in a string + + Parameters: + `id_list`: String which is a list of IDs + + Return: Number of articles in list as string + """ + + return str(id_list.count(',') + 1) + + +# --------------------------------------------------------------------------- +def test_count_articles() -> None: + """ Test count_articles() """ + + assert count_articles('123') == '1' + assert count_articles('123, 456') == '2' + assert count_articles('123, 456, 789') == '3' + + +# --------------------------------------------------------------------------- +def update_article_count(df: pd.DataFrame) -> pd.DataFrame: + """ + Update the count of articles after deduplication based on the number of + IDs in the ID column + + Parameters: + `df`: Input dataframe + + Output: Dataframe + """ + + df['article_count'] = df['ID'].map(count_articles) + + return df + + +# --------------------------------------------------------------------------- +def test_update_article_count() -> None: + """ Test update_article_count """ + + in_df = pd.DataFrame([['123', 'name1', '1'], ['123, 456', 'name2', '1']], + columns=['ID', 'best_name', 'article_count']) + + out_df = pd.DataFrame([['123', 'name1', '1'], ['123, 456', 'name2', '2']], + columns=['ID', 'best_name', 'article_count']) + + assert_frame_equal(update_article_count(in_df), out_df) + + +# --------------------------------------------------------------------------- +def process_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Process manually reviewed data. + + Parameters: + `df`: Manually reviewed dataframe + + Return: Processed dataframe + """ + + df = clean_df(df) + df = drop_low_probs(df) + df = process_duplicate_urls(df) + df = process_duplicate_names(df) + df = update_article_count(df) + + return df + + +# --------------------------------------------------------------------------- +def make_filename(out_dir: str, infile_name: str) -> str: + ''' + Make filename for output reusing input file's basename + + Parameters: + `out_dir`: Output directory + `infile_name`: Input file name + + Return: Output filename + ''' + + return os.path.join(out_dir, os.path.basename(infile_name)) + + +# --------------------------------------------------------------------------- +def test_make_filenames() -> None: + """ Test make_filenames() """ + + assert make_filename( + 'out/checked_urls', + 'out/urls/predictions.csv') == ('out/checked_urls/predictions.csv') + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + outfile = make_filename(out_dir, args.file.name) + + in_df = pd.read_csv(args.file, dtype=str).fillna('') + + check_data(in_df) + + out_df = process_data(in_df) + + out_df.to_csv(outfile, index=False) + + print(f'Done. Wrote output to {outfile}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/process_names.py b/src/process_names.py new file mode 100755 index 0000000..5aeb579 --- /dev/null +++ b/src/process_names.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python3 +""" +Purpose: Process predicted names +Authors: Kenneth Schackart +""" + +import argparse +import os +from itertools import chain +from typing import Dict, Iterator, List, NamedTuple, TextIO, Tuple, Union + +import pandas as pd +import pytest +from pandas.testing import assert_series_equal + +from inventory_utils.custom_classes import CustomHelpFormatter + +pd.options.mode.chained_assignment = None + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + file: TextIO + out_dir: str + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser(description=('Process predicted names'), + formatter_class=CustomHelpFormatter) + + parser.add_argument('file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='CSV file of predictions and metadata') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + + args = parser.parse_args() + + return Args(args.file, args.out_dir) + + +# --------------------------------------------------------------------------- +@pytest.fixture(name='raw_data') +def fixture_raw_data() -> pd.DataFrame: + """ DataFrame representative of the input data """ + + columns = [ + 'ID', 'text', 'common_name', 'common_prob', 'full_name', 'full_prob', + 'extracted_url' + ] + + df = pd.DataFrame( + [ + [ # Only common name + '123', 'text', 'Sen', '0.99', '', '', 'url1' + ], + [ # Only full name + '456', 'text', '', '', 'Chihiro Ogino', '0.98', 'url2' + ], + [ # One of each + '789', 'text', 'Sen', '0.99', 'Chihiro Ogino', '0.98', 'url3' + ], + [ # Two common + '147', 'text', 'Sen, Kamaji', '0.97, 0.99', '', '', 'url4' + ], + [ # Two full + '258', 'text', '', '', 'Yubaba, Haku', '0.95, 0.98', 'url5' + ], + [ # Two of each + '369', 'text', 'Sen, Kamaji', '0.97, 0.99', 'Yubaba, Haku', + '0.95, 0.98', 'url6' + ], + [ # No name + '321', 'No Face', '', '', '', '', 'url7' + ] + ], + columns=columns) + + return df + + +# --------------------------------------------------------------------------- +def filter_names(df: pd.DataFrame) -> pd.DataFrame: + """ + Remove articles for which no names were predicted + + Parameters: + `df`: Input dataframe + + Return: Dataframe with rows without names are removed + """ + + return df[~((df['common_name'] == '') & (df['full_name'] == ''))] + + +# --------------------------------------------------------------------------- +def test_filter_names(raw_data: pd.DataFrame) -> None: + """ Test filter_names() """ + + # Article ID 321 is the only article without any predicted names + remaining_article_ids = pd.Series( + ['123', '456', '789', '147', '258', '369'], name='ID') + + return_df = filter_names(raw_data) + + assert (raw_data.columns == return_df.columns).all() + assert_series_equal(return_df['ID'], remaining_article_ids) + + +# --------------------------------------------------------------------------- +def make_dict(keys: List, values: Union[List, Iterator[float]]) -> Dict: + """ + Make a dictionary from lists of keys and values + + Parameters: + `keys`: list of keys + `values`: list of values + + Return: Dictionary + """ + + # Replace single character keys (names) with empty string + keys = [key if len(key) != 1 else '' for key in keys] + + # Assign zero probability (value) to empty strings + return {key: value if key != '' else 0 for key, value in zip(keys, values)} + + +# --------------------------------------------------------------------------- +def test_make_dict() -> None: + """ Test make_dict() """ + + names = ['mmCIF', 'PDB', 'A'] + probs = [0.987, 0.775, 0.95] + + assert make_dict(names, probs) == {'mmCIF': 0.987, 'PDB': 0.775, '': 0} + + +# --------------------------------------------------------------------------- +def concat_dicts(*args: Dict) -> Dict: + """ + Concatenate multiple dictionaries into one + + Parameters: + `*args`: Any number of dictionaries + + Return: Concatenated dictionary + """ + + return dict(chain.from_iterable(d.items() for d in args)) + + +# --------------------------------------------------------------------------- +def test_combine_dicts() -> None: + """ Test combine_dicts() """ + + comm = {'mmCIF': 0.987, 'PDB': 0.775} + full = {'Protein Data Bank': 0.717} + combined = {'mmCIF': 0.987, 'PDB': 0.775, 'Protein Data Bank': 0.717} + + assert concat_dicts(comm, full) == combined + + +# --------------------------------------------------------------------------- +def select_names(common_names: str, common_probs: str, full_names: str, + full_probs: str) -> pd.Series: + """ + Select common name with highest probability, full name with highest + probability, and name with overall highest probability + + Parameters: + `common_names`: Predicted common name(s) + `common_probs`: Probabilities of predicted common name(s) + `full_names`: Predicted full name(s) + `full_probs`: Probabilities of predicted full name(s) + + Return: Pandas Series with probable common name, probable full name, + best overall name, and probabilities of each + """ + def convert_number(s: str) -> float: + return float(s) if s else 0 + + common_dict = make_dict(common_names.split(', '), + map(convert_number, common_probs.split(', '))) + full_dict = make_dict(full_names.split(', '), + map(convert_number, full_probs.split(', '))) + combined_dict = concat_dicts(full_dict, common_dict) + + best_common = sorted( + common_dict, + key=common_dict.get, # type: ignore + reverse=True)[0] + best_common_prob = combined_dict[best_common] + best_full = sorted( + full_dict, + key=full_dict.get, # type: ignore + reverse=True)[0] + best_full_prob = combined_dict[best_full] + best_name = sorted( + combined_dict, + key=combined_dict.get, # type: ignore + reverse=True)[0] + best_prob = combined_dict[best_name] + + return pd.Series([ + best_common, best_common_prob, best_full, best_full_prob, best_name, + best_prob + ], + index=[ + 'best_common', 'best_common_prob', 'best_full', + 'best_full_prob', 'best_name', 'best_name_prob' + ]) + + +# --------------------------------------------------------------------------- +def test_select_names() -> None: + """ Test select_names() """ + + idx = [ + 'best_common', 'best_common_prob', 'best_full', 'best_full_prob', + 'best_name', 'best_name_prob' + ] + # Only one found + in_list = ['LBD2000', '0.997', '', ''] + output = pd.Series(['LBD2000', 0.997, '', 0, 'LBD2000', 0.997], index=idx) + assert_series_equal(select_names(*in_list), output) + + # Common name is better + in_list = ['PDB', '0.983', 'Protein Data Bank', '0.964'] + output = pd.Series( + ['PDB', 0.983, 'Protein Data Bank', 0.964, 'PDB', 0.983], index=idx) + assert_series_equal(select_names(*in_list), output) + + # Full name is better + in_list = ['PDB', '0.963', 'Protein Data Bank', '0.984'] + output = pd.Series( + ['PDB', 0.963, 'Protein Data Bank', 0.984, 'Protein Data Bank', 0.984], + index=idx) + assert_series_equal(select_names(*in_list), output) + + # Multiple to unpack + in_list = ['mmCIF, PDB', '0.987, 0.775', 'Protein Data Bank', '0.717'] + output = pd.Series( + ['mmCIF', 0.987, 'Protein Data Bank', 0.717, 'mmCIF', 0.987], + index=idx) + assert_series_equal(select_names(*in_list), output) + + # Equal probability, favor full name + in_list = ['PDB', '0.963', 'Protein Data Bank', '0.963'] + output = pd.Series( + ['PDB', 0.963, 'Protein Data Bank', 0.963, 'Protein Data Bank', 0.963], + index=idx) + assert_series_equal(select_names(*in_list), output) + + # Single letter name + in_list = ['mmCIF, A', '0.987, 0.99', 'F, G', '0.717, 0.912'] + output = pd.Series(['mmCIF', 0.987, '', 0, 'mmCIF', 0.987], index=idx) + assert_series_equal(select_names(*in_list), output) + + +# --------------------------------------------------------------------------- +def wrangle_names(df: pd.DataFrame, + common_col: str = 'common_name', + common_prob_col: str = 'common_prob', + full_name_col: str = 'full_name', + full_prob_col: str = 'full_prob') -> pd.DataFrame: + """ + Place best common name, best full name, best overall name, and best name + probability in new columns + + Parameters: + `df`: Dataframe + + Return: Dataframe with 4 new columns + """ + + new_cols = [ + 'best_common', 'best_common_prob', 'best_full', 'best_full_prob', + 'best_name', 'best_name_prob' + ] + + df[new_cols] = df.apply(lambda x: list( + select_names(x[common_col], x[common_prob_col], x[full_name_col], x[ + full_prob_col])), + axis=1, + result_type='expand') + + return df.reset_index(drop=True) + + +# --------------------------------------------------------------------------- +def test_wrangle_names(raw_data: pd.DataFrame) -> None: + """ Test wrangle_names() """ + + out_df = wrangle_names(filter_names(raw_data)) + + best_common = pd.Series(['Sen', '', 'Sen', 'Kamaji', '', 'Kamaji'], + name='best_common') + best_full = pd.Series( + ['', 'Chihiro Ogino', 'Chihiro Ogino', '', 'Haku', 'Haku'], + name='best_full') + best_overall = pd.Series( + ['Sen', 'Chihiro Ogino', 'Sen', 'Kamaji', 'Haku', 'Kamaji'], + name='best_name') + best_prob = pd.Series([0.99, 0.98, 0.99, 0.99, 0.98, 0.99], + name='best_name_prob') + + assert_series_equal(out_df['best_common'], best_common) + assert_series_equal(out_df['best_full'], best_full) + assert_series_equal(out_df['best_name'], best_overall) + assert_series_equal(out_df['best_name_prob'], best_prob) + + +# --------------------------------------------------------------------------- +def process_df(df: pd.DataFrame) -> Tuple[pd.DataFrame, int]: + """ + Determine best short and full names, and remove rows with no names + + Parameters: + `df`: Input dataframe + + Return: Tuple of Dataframe, number of no-names + """ + + orig_rows = len(df) + + out_df = wrangle_names(filter_names(df)) + num_bad_names = orig_rows - len(out_df) + + return out_df, num_bad_names + + +# --------------------------------------------------------------------------- +def test_process_df(raw_data: pd.DataFrame) -> None: + """ Test filter_df() """ + + _, num_no_name = process_df(raw_data) + + assert num_no_name == 1 + + +# --------------------------------------------------------------------------- +def make_filename(out_dir: str, infile_name: str) -> str: + ''' + Make filename for output reusing input file's basename + + Parameters: + `out_dir`: Output directory + `infile_name`: Input file name + + Return: Output filename + ''' + + return os.path.join(out_dir, os.path.basename(infile_name)) + + +# --------------------------------------------------------------------------- +def test_make_filenames() -> None: + """ Test make_filenames() """ + + assert make_filename( + 'out/checked_urls', + 'out/urls/predictions.csv') == ('out/checked_urls/predictions.csv') + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + outfile = make_filename(out_dir, args.file.name) + + in_df = pd.read_csv(args.file, + dtype=str).fillna('').drop_duplicates(['ID']) + + out_df, num_no_name = process_df(in_df) + + plu = 's' if num_no_name != 1 else '' + print(f'Done processing names.\n{num_no_name} ' + f'article{plu} with no names removed.') + + out_df.to_csv(outfile, index=False) + + print(f'Wrote output to {outfile}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/query_epmc.py b/src/query_epmc.py new file mode 100755 index 0000000..8084a11 --- /dev/null +++ b/src/query_epmc.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +""" +Purpose: Run query on EuropePMC +Authors: Ana Maria Istrate and Kenneth Schackart +""" + +import argparse +import os +import re +from datetime import datetime +from typing import List, NamedTuple, Tuple, cast + +import pandas as pd +import requests + +from inventory_utils.custom_classes import CustomHelpFormatter + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + query: str + from_date: str + to_date: str + out_dir: str + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description=('Query EuropePMC to retrieve articles. ' + 'Saves csv of results and file of query dates'), + formatter_class=CustomHelpFormatter) + + parser.add_argument('query', + metavar='QUERY', + type=str, + help='EuropePMC query to run (file or string)') + parser.add_argument('-f', + '--from-date', + metavar='DATE', + type=str, + required=True, + help='Articles published after (file or string)') + parser.add_argument('-t', + '--to-date', + metavar='DATE', + type=str, + default=None, + help='Articles published before (default: today)') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + + args = parser.parse_args() + + if os.path.isfile(args.query): + args.query = open(args.query).read() + if os.path.isfile(args.from_date): + args.from_date = open(args.from_date).read() + + date_pattern = re.compile( + r'''^ # Beginning of date string + [\d]{4} # Must start wwith 4 digit year + (-[\d]{2} # Optionally 2 digit month + (-[\d]{2})? # Optionally 2 digit day + )? # Finish making month optional + $ # Followed by nothing else + ''', re.X) + for date in [args.from_date, args.to_date]: + if not re.match(date_pattern, date): + parser.error(f'Date "{date}" must be one of:\n' + '\t\t\tYYYY\n' + '\t\t\tYYYY-MM\n' + '\t\t\tYYYY-MM-DD') + + return Args(args.query, args.from_date, args.to_date, args.out_dir) + + +# --------------------------------------------------------------------------- +def make_filenames(outdir: str) -> Tuple[str, str]: + ''' + Make filenames for output csv file and last date text file + + Parameters: + `outdir`: Output directory + + Return: Tuple of csv and txt filenames + ''' + + csv_out = os.path.join(outdir, 'query_results.csv') + txt_out = os.path.join(outdir, 'last_query_dates.txt') + + return csv_out, txt_out + + +# --------------------------------------------------------------------------- +def test_make_filenames() -> None: + """ Test make_filenames() """ + + assert make_filenames('data/new_query') == ( + 'data/new_query/query_results.csv', + 'data/new_query/last_query_date.txt') + + +# --------------------------------------------------------------------------- +def clean_results(results: List[dict]) -> pd.DataFrame: + """ + Retrieve the PMIDs, titles, and abstracts from results of query + + Parameters: + `results`: JSON-encoded response (nested dictionary) + + Return: Dataframe of results + """ + + pmids = [] + titles = [] + abstracts = [] + dates = [] + for page in results: + for paper in page.get('resultList').get('result'): # type: ignore + pmids.append(paper.get('pmid')) + titles.append(paper.get('title')) + abstracts.append(paper.get('abstractText')) + dates.append(paper.get('firstPublicationDate')) + + return pd.DataFrame({ + 'id': pmids, + 'title': titles, + 'abstract': abstracts, + 'publication_date': dates + }) + + +# --------------------------------------------------------------------------- +def run_query(query: str, from_date: str, to_date: str) -> pd.DataFrame: + """ + Run query on EuropePMC API + + Parameters: + `query`: Query to use + `from_date`: Articles published after this date + `to_date`: Articles published after this date + + Return: `DataFrame` of returned titles and abstracts + """ + + query = query.format(from_date, to_date) + + prefix = 'https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=' + suffix = '&resultType=core&fromSearchPost=false&format=json' + url = prefix + query + suffix + + results = requests.get(url) + if results.status_code != requests.codes.ok: # pylint: disable=no-member + results.raise_for_status() + + results_json = cast(dict, results.json()) + + result_pages: List[dict] = [] + result_pages.append(results_json) + + while results_json.get('nextPageUrl') is not None: + results = requests.get(results_json['nextPageUrl']) + status = results.status_code + if status != requests.codes.ok: # pylint: disable=no-member + results.raise_for_status() + + results_json = cast(dict, results.json()) + + result_pages.append(results_json) + + return clean_results(result_pages) + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + out_df, date_out = make_filenames(out_dir) + + if not args.to_date: + to_date = datetime.today().strftime(r'%Y-%m-%d') + else: + to_date = args.to_date + + from_date = args.from_date + + results = run_query(args.query, from_date, to_date) + + results.to_csv(out_df, index=False) + print(f"{from_date}-{to_date}", file=open(date_out, 'wt')) + + print(f'Done. Wrote 2 files to {out_dir}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/submit_to_wayback.py b/src/submit_to_wayback.py new file mode 100755 index 0000000..1abb020 --- /dev/null +++ b/src/submit_to_wayback.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +""" +Purpose: Submit URLs to WayBack Machine if they are missing +Authors: AKenneth Schackart +""" + +import argparse +import os +from subprocess import getstatusoutput +from typing import List, NamedTuple, TextIO + +import pandas as pd +from pandas.testing import assert_frame_equal + +from inventory_utils.custom_classes import CustomHelpFormatter + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + file: TextIO + key: str + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description=('Submit request to capture any URLs' + ' not present in WayBack Machine'), + formatter_class=CustomHelpFormatter) + + parser.add_argument('file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='Inventory file') + parser.add_argument('-k', + '--key', + metavar='KEY|FILE', + type=str, + required=True, + help='Internet Archive user secret key') + + args = parser.parse_args() + + if os.path.isfile(args.key): + args.key = open(args.key).read() + + return Args(args.file, args.key) + + +# --------------------------------------------------------------------------- +def expand_cols(df: pd.DataFrame) -> pd.DataFrame: + """ + Expand the URL and wayback columns, by creating a row per URL. + + `df`: Dataframe with extracted_url and wayback_url columns + + Return: Dataframe with row per URL + """ + + df['extracted_url'] = df['extracted_url'].str.split(', ') + df['wayback_url'] = df['wayback_url'].str.split(', ') + + df = df.explode(['extracted_url', 'wayback_url']) + + df.reset_index(drop=True, inplace=True) + + return df + + +# --------------------------------------------------------------------------- +def test_expand_cols() -> None: + """ Test expand_cols() """ + + in_df = pd.DataFrame( + [['url_1, url_2', 'wb_1, no_wayback'], ['url_3', 'wb_3']], + columns=['extracted_url', 'wayback_url']) + + out_df = pd.DataFrame( + [['url_1', 'wb_1'], ['url_2', 'no_wayback'], ['url_3', 'wb_3']], + columns=['extracted_url', 'wayback_url']) + + assert_frame_equal(expand_cols(in_df), out_df) + + +# --------------------------------------------------------------------------- +def get_missing_urls(df: pd.DataFrame) -> List[str]: + """ + Find URLs that are not in the WayBack machine + + Parameters: + `df`: Dataframe with `wayback_url` column + + Return: List of urls not present in WayBack machine + """ + df = expand_cols(df) + df = df[df['wayback_url'] == 'no_wayback'] + + return list(df['extracted_url']) + + +# --------------------------------------------------------------------------- +def test_get_missing_urls() -> None: + """ Test get_missing_urls """ + + # Returns missing URLs + in_df = pd.DataFrame([['url_1', 'wb_1'], ['url_2', 'no_wayback']], + columns=['extracted_url', 'wayback_url']) + missing_urls = ['url_2'] + assert get_missing_urls(in_df) == missing_urls + + # Is okay with extra columns + in_df = pd.DataFrame( + [['123', 'url_1', 'wb_1'], ['456', 'url_2', 'no_wayback']], + columns=['ID', 'extracted_url', 'wayback_url']) + missing_urls = ['url_2'] + assert get_missing_urls(in_df) == missing_urls + + # Can return multiple URLs per resource + in_df = pd.DataFrame( + [['url_1', 'wb_1'], ['url_2, url_3', 'no_wayback, no_wayback'], + ['url_4, url_5', 'wb_4, no_wayback']], + columns=['extracted_url', 'wayback_url']) + missing_urls = ['url_2', 'url_3', 'url_5'] + assert get_missing_urls(in_df) == missing_urls + + +# --------------------------------------------------------------------------- +def get_command(url: str, key: str) -> str: + """ + Get submission command for a URL + + Parameters: + `url`: URL to submit + `key`: Internet Archive user secret key + + Return: shell command for submitting + """ + + command = ('curl -X POST ' + '-H "Accept: application/json" ' + f'-H "Authorization: LOW myaccesskey:{key}" ' + f'-d\'url={url}\' https://web.archine.org/save') + + return command + + +# --------------------------------------------------------------------------- +def test_get_command() -> None: + """ Test get_command() """ + + key = 'foo' + url = 'bar' + + expected = ('curl -X POST ' + '-H "Accept: application/json" ' + '-H "Authorization: LOW myaccesskey:foo" ' + '-d\'url=bar\' https://web.archine.org/save') + + assert get_command(url, key) == expected + + +# --------------------------------------------------------------------------- +def submit_urls(urls: List[str], key: str) -> None: + """ + Submit URLs for capture by WayBack Machine + + Parameters: + `urls`: List of URLs to submit + `key`: Internet Archive user secret key + """ + + print(f'Submitting {len(urls)} urls.') + + for url in urls: + print(f'Submitting {url}... ', end='') + command = get_command(url, key) + retval, out = getstatusoutput(command) + if retval == 0: + print('Done.') + else: + print('Non-zero return value, see output:') + print(out) + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + + in_df = pd.read_csv(args.file, dtype=str) + + missing_urls = get_missing_urls(in_df) + + submit_urls(missing_urls, args.key) + + print('Done.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/src/url_extractor.py b/src/url_extractor.py new file mode 100755 index 0000000..8c3f350 --- /dev/null +++ b/src/url_extractor.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +""" +Purpose: Choose model based on highest validation F1 score +Authors: Kenneth Schackart +""" + +import argparse +import os +import re +import string +from typing import List, NamedTuple, Set, TextIO + +import pandas as pd +import pytest +from pandas.testing import assert_frame_equal + +from inventory_utils.custom_classes import CustomHelpFormatter +from inventory_utils.wrangling import preprocess_data + + +# --------------------------------------------------------------------------- +class Args(NamedTuple): + """ Command-line arguments """ + file: TextIO + out_dir: str + max_urls: int + + +# --------------------------------------------------------------------------- +def get_args() -> Args: + """ Parse command-line arguments """ + + parser = argparse.ArgumentParser( + description='Extract URLs from "text" column of file.', + formatter_class=CustomHelpFormatter) + + parser.add_argument('file', + metavar='FILE', + type=argparse.FileType('rt', encoding='ISO-8859-1'), + help='Input file (csv)') + parser.add_argument('-o', + '--out-dir', + metavar='DIR', + type=str, + default='out/', + help='Output directory') + parser.add_argument('-x', + '--max-urls', + metavar='INT', + type=int, + default=2, + help=('Maximum number of URLs, remove rows with ' + 'more than this number.')) + + args = parser.parse_args() + + return Args(args.file, args.out_dir, args.max_urls) + + +# --------------------------------------------------------------------------- +@pytest.fixture(name='raw_data') +def fixture_raw_data() -> pd.DataFrame: + """ Fake input dataframe """ + + df = pd.DataFrame( + [['123', 'ATAV (http://atavdb.org)', 'ATAV', '0.995', '', ''], + [ + '456', + 'https://pharos.nih.gov/ and http://juniper.health.unm.edu/tcrd/', + 'Pharos', '0.961', '', '' + ], ['789', 'no url', 'Anon', '0.97', '', '']], + columns=[ + 'ID', 'text', 'common_name', 'common_prob', 'full_name', + 'full_prob' + ]) + + return df + + +# --------------------------------------------------------------------------- +def extract_urls(text: str) -> List[str]: + """ + Extract URLs from a string + + Parameters: + `text`: String possible containing one or more URLs + + Return: List of URLs + """ + + url_pattern = re.compile( + r'''http[s]? # http and optional s + :// # Literal :// + (?:[\w$-_@.&+!*\(\),] # Any word or number chars or these symbols + )+''', re.X) + + urls = re.findall(url_pattern, text) + bad_punct = re.sub('/', '', string.punctuation) # Do not remove trailing / + urls = list(map(lambda s: s.strip(bad_punct), urls)) + + # Remove duplicates + seen: Set[str] = set() + seen_add = seen.add + urls = [x for x in urls if not (x in seen or seen_add(x))] + + return urls + + +# --------------------------------------------------------------------------- +def test_extract_urls() -> None: + """ Test extract_urls() """ + + # Single URL + in_str = 'http://bacdb.org/BacWGSTdb/' + out = ['http://bacdb.org/BacWGSTdb/'] + assert extract_urls(in_str) == out + + # Multiple URLs + in_str = 'http://mirtrondb.cp.utfpr.edu.br/ http://bbcancer.renlab.org/' + out = ['http://mirtrondb.cp.utfpr.edu.br/', 'http://bbcancer.renlab.org/'] + assert extract_urls(in_str) == out + + # No extraneous words + in_str = 'Extraneous http://AciDB.cl words!' + out = ['http://AciDB.cl'] + assert extract_urls(in_str) == out + + # Various formats seen + in_str = ( + 'https://exobcd.liumwei.org ' # https + 'https://enset-project.org/EnMom@base.html ' # @ sign + 'http://oka.protres.ru:4200 ' # colon later in string + '(https://gitlab.pasteur.fr/hub/viralhostrangedb). ' # Parens + 'http://evpedia.info http://evpedia.info ' # Duplicates + ) + out = [ + 'https://exobcd.liumwei.org', + 'https://enset-project.org/EnMom@base.html', + 'http://oka.protres.ru:4200', + 'https://gitlab.pasteur.fr/hub/viralhostrangedb', 'http://evpedia.info' + ] + assert extract_urls(in_str) == out + + +# --------------------------------------------------------------------------- +def add_url_column(df: pd.DataFrame) -> pd.DataFrame: + """ + Add column of extracted URLs + + Parameters: + `df`: Input dataframe that has text column + + Return: Dataframe with new column of URLs, with URLs separated by commas + """ + + df['extracted_url'] = df['text'].apply(extract_urls) + + df['extracted_url'] = df['extracted_url'].apply(', '.join) + + return df + + +# --------------------------------------------------------------------------- +def test_add_url_column(raw_data: pd.DataFrame) -> None: + """ Test add_url_column """ + + out_df = pd.DataFrame( + [[ + '123', 'ATAV (http://atavdb.org)', 'ATAV', '0.995', '', '', + 'http://atavdb.org' + ], + [ + '456', + 'https://pharos.nih.gov/ and http://juniper.health.unm.edu/tcrd/', + 'Pharos', '0.961', '', '', + 'https://pharos.nih.gov/, http://juniper.health.unm.edu/tcrd/' + ], ['789', 'no url', 'Anon', '0.97', '', '', '']], + columns=[ + 'ID', 'text', 'common_name', 'common_prob', 'full_name', + 'full_prob', 'extracted_url' + ]) + + assert_frame_equal(add_url_column(raw_data), out_df) + + +# --------------------------------------------------------------------------- +def filter_url_column(df: pd.DataFrame, max_urls: int) -> pd.DataFrame: + """ + Remove rows for which no URL could be found + + Parameters: + `df`: Input dataframe with predicted URLs + + Return: Dataframe + """ + + df = df.copy() + df = df[df['extracted_url'] != ''] + + df['url_count'] = df['extracted_url'].map(lambda x: len(x.split(', '))) + df = df[df['url_count'] <= max_urls] + + df.drop(['url_count'], axis='columns', inplace=True) + + return df + + +# --------------------------------------------------------------------------- +def test_filter_url_column(raw_data: pd.DataFrame) -> None: + """ Test add_url_column """ + + out_df = pd.DataFrame([[ + '123', 'ATAV (http://atavdb.org)', 'ATAV', '0.995', '', '', + 'http://atavdb.org' + ]], + columns=[ + 'ID', 'text', 'common_name', 'common_prob', + 'full_name', 'full_prob', 'extracted_url' + ]) + + assert_frame_equal(filter_url_column(add_url_column(raw_data), 1), out_df) + + +# --------------------------------------------------------------------------- +def get_outname(outdir: str, filename: str) -> str: + """ + Creaate output file name using output directory and basename of input + file + + Parameters: + `outdir`: Output directory + `filename`: Input filename + + Return: Output filepath + """ + + return os.path.join(outdir, os.path.basename(filename)) + + +# --------------------------------------------------------------------------- +def test_get_outname() -> None: + """ Test get_outname() """ + + assert get_outname( + 'out', 'data/ner_predict/predictions.csv') == 'out/predictions.csv' + + +# --------------------------------------------------------------------------- +def main() -> None: + """ Main function """ + + args = get_args() + out_dir = args.out_dir + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + df = pd.read_csv(args.file) + + if 'text' not in df.columns: + df = preprocess_data(df) + df = df.rename(columns={'title_abstract': 'text'}) + + df = filter_url_column(add_url_column(df), args.max_urls) + + out_name = get_outname(out_dir, args.file.name) + + df.to_csv(out_name, index=False) + + print(f'Done. Wrote output to {out_name}.') + + +# --------------------------------------------------------------------------- +if __name__ == '__main__': + main() diff --git a/updating_inventory.ipynb b/updating_inventory.ipynb new file mode 100644 index 0000000..8476f92 --- /dev/null +++ b/updating_inventory.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","gpuClass":"standard"},"cells":[{"cell_type":"markdown","source":["# Updating the Inventory\n","---\n","This notebook will provide the code necessary to perform an update to the Biodata Resource Inventory, using trained models.\n","\n","The steps include:\n","* Run new query on EuropePMC\n","* Classify new articles\n","* Run NER to get resource names for predicted positives\n","* Get URLs for predicted positives\n","* Gather other metadata\n","\n","\n","\n","# Setup\n","---\n","### Mount Drive\n","\n","First, mount Google Drive to have access to files necessary for the run:\n"],"metadata":{"id":"x4whPVjZZa7x"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"BmwESzXcjXTb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1675109206770,"user_tz":420,"elapsed":1702,"user":{"displayName":"Kenneth Schackart","userId":"14619721059788161882"}},"outputId":"c938f5b9-9033-4ebd-c8a4-d7cc008390f0"},"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n","/content/drive/MyDrive/GitHub/inventory_2022\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')\n","%cd /content/drive/MyDrive/GitHub/inventory_2022/"]},{"cell_type":"markdown","source":["Run the make target to install Python dependencies.\n","\n","You may see the error: `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed`, but the code should run regardless.\n","\n"],"metadata":{"id":"6a7pMnIVbKXE"}},{"cell_type":"code","source":["! make setup_for_updating"],"metadata":{"id":"iBMUW3C0YIz4"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["If you need to download the model checkpoints for the best classifier and NER models, run the cell below. If the `train_and_predict` pipeline was run, then the models are already present."],"metadata":{"id":"UleWUNAcqoL-"}},{"cell_type":"code","source":["# Get trained article classification model\n","# Create output directory\n","! mkdir -p out/classif_train_out/best\n","# Print name of model to the necessary file\n","! CLASSIFIER=\"out/classif_train_out/article_classifier.pt\"\n","! echo $CLASSIFIER > out/classif_train_out/best/best_checkpt.txt\n","# Download the model\n","! wget -O $CLASSIFIER https://huggingface.co/globalbiodata/inventory/resolve/main/article_classifier.pt\n","# Check that it downloaded properly\n","! echo \"5718a7f70becacb46d46501734c83aab81c86feec563594f6a25c116aa31b521 $CLASSIFIER\" | sha256sum -c\n","\n","# Get trained NER model\n","! mkdir -p out/ner_train_out/best\n","! NER=\"out/ner_train_out/named_entity_recognition.pt\"\n","! echo $NER > out/ner_train_out/best/best_checkpt.txt\n","! wget -O $NER https://huggingface.co/globalbiodata/inventory/resolve/main/named_entity_recognition.pt\n","! echo \"dc0bc8b4929e33da52bc92e12720260b392421883889e0a36c809cb0b5c40f5d $NER\" | sha256sum -c"],"metadata":{"id":"RcQ3mQhiqoi-"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Setting up Configurations\n","\n","Before running the automated pipelines, first update the configuration file `config/update_inventory.yml`. It can be accessed in Google Drive, though you may need to download it and edit it in a text editor such as Notepad, then reupload it.\n","\n","* **Europe PMC query publication date range**: These are stored as variables `query_from_date` and `query_to_date` in that file. Note that the dates are inclusive. For example to get papers published in 2022, both of those varibles should be 2022.\n","* **Previous inventory file**: During strict deduplication and flagging for manual review, the results of the previous inventory are taken into account. Specify the location of the most recent inventory output file in the variable `previous_inventory`."],"metadata":{"id":"CFB8BHYk8AwS"}},{"cell_type":"markdown","source":["# Running the pipeline\n","---\n","Now, we are ready to run the pipeline\n","\n","## Run it\n","\n","The following cell will run the pipeline described above. It may take a while, but GPU will speed it up a lot."],"metadata":{"id":"XG8imhT0bms7"}},{"cell_type":"code","source":["! make update_inventory"],"metadata":{"id":"zFSmOvuUnSPE"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Selective Manual Review\n","\n","After running the initial pipeline, the inventory has been flagged for selective manual review.\n","\n","The file to be reviewed is located at:\n","\n","`out/new_query/for_manual_review/predictions.csv`\n","\n","Review the flagged columns according to the instruction sheet ([doi: 10.5281/zenodo.7768363](https://doi.org/10.5281/zenodo.7768363)), then place the manually reviewed file in the following folder:\n","\n","`out/new_query/manually_reviewed/`\n","\n","The file must still be named `predictions.csv`\n","\n","# Processing Manual Review\n","\n","Next, further processing is performed on the manually reviewed inventory."],"metadata":{"id":"mdwb9NveMdP0"}},{"cell_type":"code","source":["! make process_manually_reviewed_update"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"z_c4ZbgONIoo","executionInfo":{"status":"ok","timestamp":1675109651846,"user_tz":420,"elapsed":42670,"user":{"displayName":"Kenneth Schackart","userId":"14619721059788161882"}},"outputId":"5c69eb01-f62f-4df6-cd20-cd7512d0bba6"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["snakemake \\\n","-s snakemake/update_inventory.smk \\\n","--configfile config/update_inventory.yml \\\n","-c 1 \\\n","--until process_countries\n","\u001b[33mBuilding DAG of jobs...\u001b[0m\n","\u001b[33mUsing shell: /usr/bin/bash\u001b[0m\n","\u001b[33mProvided cores: 1 (use --cores to define parallelism)\u001b[0m\n","\u001b[33mRules claiming more threads will be scaled down.\u001b[0m\n","\u001b[33mJob stats:\n","job count min threads max threads\n","----------------- ------- ------------- -------------\n","check_urls 1 1 1\n","get_epmc_meta 1 1 1\n","process_countries 1 1 1\n","total 3 1 1\n","\u001b[0m\n","\u001b[33mSelect jobs to execute...\u001b[0m\n","\u001b[32m\u001b[0m\n","\u001b[32m[Mon Jan 30 20:13:29 2023]\u001b[0m\n","\u001b[32mrule check_urls:\n"," input: out/new_query/processed_manual_review/predictions.csv\n"," output: out/new_query/url_checking/predictions.csv\n"," jobid: 11\n"," reason: Missing output files: out/new_query/url_checking/predictions.csv\n"," resources: tmpdir=/tmp\u001b[0m\n","\u001b[32m\u001b[0m\n","/usr/local/lib/python3.8/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.1\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","Done. Wrote output to out/new_query/url_checking/predictions.csv.\n","\u001b[32m[Mon Jan 30 20:13:59 2023]\u001b[0m\n","\u001b[32mFinished job 11.\u001b[0m\n","\u001b[32m1 of 3 steps (33%) done\u001b[0m\n","\u001b[33mSelect jobs to execute...\u001b[0m\n","\u001b[32m\u001b[0m\n","\u001b[32m[Mon Jan 30 20:13:59 2023]\u001b[0m\n","\u001b[32mrule get_epmc_meta:\n"," input: out/new_query/url_checking/predictions.csv\n"," output: out/new_query/epmc_meta/predictions.csv\n"," jobid: 10\n"," reason: Missing output files: out/new_query/epmc_meta/predictions.csv; Input files updated by another job: out/new_query/url_checking/predictions.csv\n"," resources: tmpdir=/tmp\u001b[0m\n","\u001b[32m\u001b[0m\n","/usr/local/lib/python3.8/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.1\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","Done. Wrote output to out/new_query/epmc_meta/predictions.csv.\n","\u001b[32m[Mon Jan 30 20:14:06 2023]\u001b[0m\n","\u001b[32mFinished job 10.\u001b[0m\n","\u001b[32m2 of 3 steps (67%) done\u001b[0m\n","\u001b[33mSelect jobs to execute...\u001b[0m\n","\u001b[32m\u001b[0m\n","\u001b[32m[Mon Jan 30 20:14:06 2023]\u001b[0m\n","\u001b[32mrule process_countries:\n"," input: out/new_query/epmc_meta/predictions.csv\n"," output: out/new_query/processed_countries/predictions.csv\n"," jobid: 9\n"," reason: Missing output files: out/new_query/processed_countries/predictions.csv; Input files updated by another job: out/new_query/epmc_meta/predictions.csv\n"," resources: tmpdir=/tmp\u001b[0m\n","\u001b[32m\u001b[0m\n","/usr/local/lib/python3.8/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.1\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","Done. Wrote output to out/new_query/processed_countries/predictions.csv.\n","\u001b[32m[Mon Jan 30 20:14:10 2023]\u001b[0m\n","\u001b[32mFinished job 9.\u001b[0m\n","\u001b[32m3 of 3 steps (100%) done\u001b[0m\n","\u001b[33mComplete log: .snakemake/log/2023-01-30T201328.761519.snakemake.log\u001b[0m\n"]}]},{"cell_type":"markdown","source":["## Final inventory\n","\n","The final inventory, including names, URLS, and metadata is found in the file:\n","* `out/new_query/processed_countries/predictions.csv`"],"metadata":{"id":"AV4p2VA_NUfi"}}]} \ No newline at end of file